Repository: NVIDIA/Megatron-LM
Branch: main
Commit: f456199700bc
Files: 2310
Total size: 32.9 MB

Directory structure:
gitextract_32wjwf3g/

├── .coderabbit.yaml
├── .flake8
├── .github/
│   ├── CODEOWNERS
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.md
│   │   ├── config.yml
│   │   ├── feature_request.md
│   │   ├── question.md
│   │   └── regression.md
│   ├── actions/
│   │   ├── action.yml
│   │   └── check-nvidia-sso-membership/
│   │       └── action.yml
│   ├── copy-pr-bot.yaml
│   ├── oncall_schedule.json
│   ├── pull_request_template.md
│   ├── scripts/
│   │   ├── oncall_manager.py
│   │   ├── readme.sh
│   │   └── sync_team_usergroups.py
│   └── workflows/
│       ├── _build_test_publish_wheel.yml
│       ├── _release_library.yml
│       ├── _update_dependencies.yml
│       ├── auto-assign-milestone.yml
│       ├── auto-reminder-bot.yml
│       ├── auto-swap-labels.yml
│       ├── auto-update-copy-pr-bot.yml
│       ├── build-docs.yml
│       ├── build-test-publish-wheel.yml
│       ├── cherry-pick-release-commit.yml
│       ├── cicd-approve-test-queue.yml
│       ├── cicd-main.yml
│       ├── claude-complexity-label.yml
│       ├── claude_review.yml
│       ├── close-inactive-issue-pr.yml
│       ├── community-bot.yml
│       ├── config/
│       │   └── changelog-config.json
│       ├── copyright-check.yml
│       ├── dependabot.yml
│       ├── force-draft-pr.yml
│       ├── install-test.yml
│       ├── multi-approval-bot.yml
│       ├── oncall-assign.yml
│       ├── oncall-rotation.yml
│       ├── release-docs.yml
│       ├── release-freeze.yml
│       ├── release-nightly-docs.yml
│       ├── release.yaml
│       ├── review-trigger.yml
│       ├── sync-team-usergroups.yml
│       └── trigger-mbridge-tests.yml
├── .gitignore
├── .gitlab/
│   ├── labeler-config.yml
│   ├── scripts/
│   │   ├── build.sh
│   │   ├── check_imports.py
│   │   └── fetch-legacy-suite.sh
│   └── stages/
│       ├── 00.pre.yml
│       ├── 01.build.yml
│       ├── 02.test.yml
│       ├── 03.integration-tests.yml
│       ├── 04.functional-tests.yml
│       └── 05.publish.yml
├── .gitlab-ci.yml
├── .pre-commit-config.yaml
├── .pylintrc
├── .python-version
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── codecov.yml
├── docker/
│   ├── .ngc_version.dev
│   ├── .ngc_version.lts
│   ├── Dockerfile.ci.dev
│   ├── Dockerfile.ci.nemo
│   ├── Dockerfile.linting
│   ├── common/
│   │   ├── install.sh
│   │   └── install_source_wheels.sh
│   └── patches/
│       └── deepep.patch
├── docs/
│   ├── add_copyright_header.py
│   ├── advanced/
│   │   └── index.md
│   ├── api-backwards-compatibility-check.md
│   ├── api-guide/
│   │   ├── core/
│   │   │   ├── datasets.md
│   │   │   ├── dist_checkpointing.md
│   │   │   ├── dist_checkpointing.strategies.md
│   │   │   ├── distributed.md
│   │   │   ├── fusions.md
│   │   │   ├── index.md
│   │   │   ├── pipeline_parallel.md
│   │   │   ├── tensor_parallel.md
│   │   │   └── transformer.md
│   │   ├── index.md
│   │   ├── internal/
│   │   │   ├── index.md
│   │   │   ├── num_microbatches_calculator.md
│   │   │   └── optimizer_param_scheduler.md
│   │   ├── models/
│   │   │   ├── index.md
│   │   │   ├── models.bert.md
│   │   │   ├── models.gpt.md
│   │   │   ├── models.md
│   │   │   └── models.t5.md
│   │   └── router_replay.md
│   ├── autodoc2_docstrings_parser.py
│   ├── broken_links_false_positives.json
│   ├── conf.py
│   ├── developer/
│   │   ├── contribute.md
│   │   ├── generate_docs.md
│   │   ├── oncall.md
│   │   └── submit.md
│   ├── discussions/
│   │   ├── README.md
│   │   └── megatron-fsdp-user-guide/
│   │       ├── example-scripts/
│   │       │   ├── sbatch_checkpoint_convert.sh
│   │       │   └── sbatch_mfsdp_deepseek_v3.sh
│   │       └── megatron-fsdp-user-guide.md
│   ├── documentation.md
│   ├── get-started/
│   │   ├── install.md
│   │   ├── overview.md
│   │   ├── quickstart.md
│   │   └── releasenotes.md
│   ├── index.md
│   ├── llama_mistral.md
│   ├── models/
│   │   ├── index.md
│   │   ├── llms.md
│   │   └── multimodal.md
│   ├── project.json
│   ├── user-guide/
│   │   ├── data-preparation.md
│   │   ├── features/
│   │   │   ├── context_parallel.md
│   │   │   ├── custom_fsdp.md
│   │   │   ├── dist_optimizer.md
│   │   │   ├── fine_grained_activation_offloading.md
│   │   │   ├── index.md
│   │   │   ├── megatron_energon.md
│   │   │   ├── megatron_rl.md
│   │   │   ├── moe.md
│   │   │   ├── multi_latent_attention.md
│   │   │   ├── multi_token_prediction.md
│   │   │   ├── optimizer_cpu_offload.md
│   │   │   ├── pipeline_parallel_layout.md
│   │   │   └── tokenizers.md
│   │   ├── index.md
│   │   ├── msc_integration.md
│   │   ├── parallelism-guide.md
│   │   └── training-examples.md
│   └── versions1.json
├── examples/
│   ├── __init__.py
│   ├── academic_paper_scripts/
│   │   ├── detxoify_lm/
│   │   │   ├── README.md
│   │   │   ├── annotations/
│   │   │   │   ├── filter-selfgeneration.py
│   │   │   │   ├── perspective_api_annotate.py
│   │   │   │   └── preprocess.sh
│   │   │   ├── finetune_gpt.py
│   │   │   ├── finetune_gpt_distributed-1.3b.sh
│   │   │   ├── generate-1.3b.sh
│   │   │   ├── generate_samples_gpt.py
│   │   │   ├── perspective_api.py
│   │   │   └── self_generation/
│   │   │       └── selfgenerate-1.3b-unconditional.sh
│   │   ├── msdp/
│   │   │   ├── README.md
│   │   │   ├── data_processing.sh
│   │   │   ├── eval_knwl_generation.sh
│   │   │   ├── eval_resp_generation.sh
│   │   │   ├── prep_resp_gen.sh
│   │   │   ├── prompt_knwl_gen.sh
│   │   │   └── prompt_resp_gen.sh
│   │   └── sc21/
│   │       ├── CONFIG.sh
│   │       ├── README.md
│   │       ├── SBATCH.sh
│   │       ├── SRUN.sh
│   │       ├── run_figure_11.sh
│   │       ├── run_figure_12.sh
│   │       ├── run_figure_13.sh
│   │       ├── run_figure_14.sh
│   │       ├── run_figure_15.sh
│   │       ├── run_figure_16.sh
│   │       ├── run_figure_17.sh
│   │       ├── run_figure_18.sh
│   │       └── run_table_1.sh
│   ├── bert/
│   │   ├── README.md
│   │   └── train_bert_340m_distributed.sh
│   ├── export/
│   │   ├── README.md
│   │   └── trtllm_export/
│   │       ├── README.md
│   │       ├── distributed_export/
│   │       │   └── gpt_distributed_gpu_export.py
│   │       └── single_device_export/
│   │           └── gpt_single_device_cpu_export.py
│   ├── gpt3/
│   │   ├── README.md
│   │   ├── gpt_config.yaml
│   │   └── train_gpt3_175b_distributed.sh
│   ├── gptoss/
│   │   ├── 01_convert_from_hf.py
│   │   ├── 02_train.sh
│   │   ├── 03_convert_to_hf.py
│   │   └── README.md
│   ├── inference/
│   │   ├── README.md
│   │   ├── gpt/
│   │   │   ├── gpt_dynamic_inference.py
│   │   │   ├── gpt_dynamic_inference_12b.sh
│   │   │   ├── gpt_dynamic_inference_357m.sh
│   │   │   ├── gpt_dynamic_inference_with_coordinator.py
│   │   │   ├── gpt_static_inference.py
│   │   │   └── utils.py
│   │   ├── llama_mistral/
│   │   │   ├── huggingface_reference.py
│   │   │   ├── run_static_inference_llama4_scout.sh
│   │   │   ├── run_text_generation_llama3.1.sh
│   │   │   ├── run_text_generation_llama3.sh
│   │   │   └── run_text_generation_mistral.sh
│   │   ├── run_text_generation_server_345M.sh
│   │   ├── run_text_generation_server_345M_8_tensor_parallel.sh
│   │   └── t5/
│   │       └── simple_t5_batch_inference.py
│   ├── llama/
│   │   ├── README.md
│   │   └── train_llama3_8b_h100_fp8.sh
│   ├── mamba/
│   │   ├── .gitignore
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── run_text_gen_server_8b.sh
│   │   ├── run_text_gen_server_8b_gpt3.sh
│   │   └── train.sh
│   ├── mimo/
│   │   ├── __init__.py
│   │   ├── avlm_inference.py
│   │   ├── configs/
│   │   │   ├── llava_avlm.py
│   │   │   ├── llava_vlm.py
│   │   │   └── mock.py
│   │   ├── data/
│   │   │   ├── __init__.py
│   │   │   ├── avlm_sample_loader.py
│   │   │   ├── energon_avlm_task_encoder.py
│   │   │   ├── energon_vlm_task_encoder.py
│   │   │   ├── mock.py
│   │   │   ├── prepare_video_llava_data.py
│   │   │   └── utils/
│   │   │       └── calculate_audio_tokens.py
│   │   ├── model_providers/
│   │   │   ├── __init__.py
│   │   │   ├── hf_clip_encoder.py
│   │   │   ├── hf_whisper_encoder.py
│   │   │   ├── llava_avlm.py
│   │   │   ├── llava_vlm.py
│   │   │   └── mock.py
│   │   ├── scripts/
│   │   │   ├── run_avlm_train.sh
│   │   │   ├── run_mock_train.sh
│   │   │   ├── run_video_vlm_train.sh
│   │   │   └── run_vlm_train.sh
│   │   ├── train.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── data_helpers.py
│   │       ├── logging.py
│   │       └── model_helpers.py
│   ├── mixtral/
│   │   ├── README.md
│   │   └── train_mixtral_8x7b_distributed.sh
│   ├── multimodal/
│   │   ├── Dockerfile
│   │   ├── README.md
│   │   ├── combine_lm_vision_checkpoints.sh
│   │   ├── combine_state_dicts.py
│   │   ├── config.py
│   │   ├── convert_llava_pretrain_to_wds.py
│   │   ├── dataloader_provider.py
│   │   ├── dataset_helpers.py
│   │   ├── energon_util.py
│   │   ├── evaluation/
│   │   │   ├── evaluate_ai2d.py
│   │   │   ├── evaluate_chartqa.py
│   │   │   ├── evaluate_coco.py
│   │   │   ├── evaluate_infovqa.py
│   │   │   ├── evaluate_mathvista.py
│   │   │   ├── evaluate_mmmu.py
│   │   │   ├── evaluate_ocrbench.py
│   │   │   ├── evaluate_ocrbench_v2.py
│   │   │   ├── evaluate_rd_tablebench.py
│   │   │   ├── evaluate_realworldqa.py
│   │   │   ├── evaluate_spdocvqa.py
│   │   │   ├── evaluate_textvqa.py
│   │   │   ├── evaluate_video_motionbench.py
│   │   │   ├── evaluate_video_mvbench.py
│   │   │   ├── evaluate_video_phys_game_bench.py
│   │   │   ├── evaluate_vqav2.py
│   │   │   ├── evaluation_datasets.py
│   │   │   └── mmmu_utils.py
│   │   ├── image_processing.py
│   │   ├── layer_scaling.py
│   │   ├── layer_specs.py
│   │   ├── llama_3p1_nemotron_nano_vl_8b_v1/
│   │   │   ├── Dockerfile
│   │   │   ├── README.md
│   │   │   ├── pretraining_llama_3p1_nemotron_nano_vl_8b_v1.sh
│   │   │   ├── sft_llama_3p1_nemotron_nano_vl_8b_v1.sh
│   │   │   └── text_generation.sh
│   │   ├── manual_prompts.json
│   │   ├── model.py
│   │   ├── model_converter/
│   │   │   ├── clip_converter.py
│   │   │   ├── internvit_converter.py
│   │   │   ├── radio_converter.py
│   │   │   ├── siglip_converter.py
│   │   │   └── vision_model_tester.py
│   │   ├── multimodal_args.py
│   │   ├── nvlm/
│   │   │   ├── README.md
│   │   │   ├── internvit.py
│   │   │   ├── nvlm_prompts.json
│   │   │   ├── pp_checkpoint_converter.py
│   │   │   ├── pretrain_blend.yaml
│   │   │   ├── pretrain_qwen20_72b_internvit_6b.sh
│   │   │   ├── pretrain_yi_34b_internvit_6b.sh
│   │   │   ├── run_text_generation_qwen20_72b_internvit_6b.sh
│   │   │   ├── run_text_generation_qwen25_7b_internvit_video.sh
│   │   │   ├── run_text_generation_qwen25_7b_siglip.sh
│   │   │   ├── run_text_generation_yi_34b_internvit_6b.sh
│   │   │   ├── sft_34b_internvit.sh
│   │   │   ├── sft_blend.yaml
│   │   │   ├── sft_qwen20_72b_internvit_6b.sh
│   │   │   └── sft_qwen2p5_7b_internvit_6b_video.sh
│   │   ├── pretrain_dataset.yaml
│   │   ├── pretrain_mistral_clip.sh
│   │   ├── radio/
│   │   │   └── radio_g.py
│   │   ├── run_text_generation.py
│   │   ├── sft_dataset.yaml
│   │   ├── sft_mistral_clip.sh
│   │   ├── text_generation_mistral_clip.sh
│   │   └── train.py
│   ├── post_training/
│   │   └── modelopt/
│   │       ├── .gitignore
│   │       ├── ADVANCED.md
│   │       ├── Dockerfile
│   │       ├── README.md
│   │       ├── conf/
│   │       │   ├── Qwen/
│   │       │   │   ├── Qwen2.5-0.5B-Instruct.sh
│   │       │   │   ├── Qwen2.5-7B-Instruct.sh
│   │       │   │   ├── Qwen3-0.6B.sh
│   │       │   │   ├── Qwen3-235B-A22B.sh
│   │       │   │   ├── Qwen3-30B-A3B.sh
│   │       │   │   └── Qwen3-8B.sh
│   │       │   ├── arguments.sh
│   │       │   ├── deepseek-ai/
│   │       │   │   ├── DeepSeek-R1.sh
│   │       │   │   └── DeepSeek-V2-Lite.sh
│   │       │   ├── meta-llama/
│   │       │   │   ├── Llama-3.1-8B-Instruct.sh
│   │       │   │   ├── Llama-3.2-1B-Instruct.sh
│   │       │   │   ├── Llama-4-Maverick-17B-128E-Instruct.sh
│   │       │   │   └── Llama-4-Scout-17B-16E-Instruct.sh
│   │       │   ├── moonshotai/
│   │       │   │   ├── Kimi-K2-Instruct.sh
│   │       │   │   ├── kimi_k2_instruct.sh
│   │       │   │   └── kimi_k2_instruct_export.sh
│   │       │   ├── nvidia/
│   │       │   │   ├── NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.sh
│   │       │   │   ├── NVIDIA-Nemotron-3-Super-120B-A12B-BF16.sh
│   │       │   │   ├── NVIDIA-Nemotron-Nano-9B-v2.sh
│   │       │   │   ├── Nemotron-H-47B-Reasoning-128K.sh
│   │       │   │   ├── Nemotron-H-4B-Instruct.sh
│   │       │   │   ├── Nemotron-H-56B-Base-8K.sh
│   │       │   │   ├── Nemotron-H-8B-Base-8K.sh
│   │       │   │   └── Nemotron-Mini-4B-Instruct.sh
│   │       │   └── openai/
│   │       │       ├── gpt-oss-120b.sh
│   │       │       └── gpt-oss-20b.sh
│   │       ├── convert.sh
│   │       ├── convert_model.py
│   │       ├── distillation.md
│   │       ├── eagle3.sh
│   │       ├── export.py
│   │       ├── export.sh
│   │       ├── finetune.py
│   │       ├── finetune.sh
│   │       ├── generate.py
│   │       ├── generate.sh
│   │       ├── generation_server.sh
│   │       ├── mmlu.py
│   │       ├── mmlu.sh
│   │       ├── offline_feature_extract.py
│   │       ├── offline_feature_extract.sh
│   │       ├── prune.py
│   │       ├── prune.sh
│   │       ├── quantize.py
│   │       ├── quantize.sh
│   │       ├── requirements.txt
│   │       ├── requirements_ssm.txt
│   │       ├── slurm/
│   │       │   ├── env_setup_template.sh
│   │       │   └── sbatch.sh
│   │       ├── speculative.md
│   │       ├── train.sh
│   │       ├── validate.py
│   │       └── validate.sh
│   ├── rl/
│   │   ├── README.md
│   │   ├── benchmark_refit.py
│   │   ├── environment_configs/
│   │   │   ├── countdown.yaml
│   │   │   ├── dapo.yaml
│   │   │   ├── default.yaml
│   │   │   ├── gsm8k.yaml
│   │   │   ├── gsm8k_nanov3.yaml
│   │   │   ├── math.yaml
│   │   │   └── openmathinstructv2.yaml
│   │   ├── environments/
│   │   │   ├── __init__.py
│   │   │   ├── countdown/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── countdown.py
│   │   │   │   └── countdown_agent.py
│   │   │   └── math/
│   │   │       ├── __init__.py
│   │   │       ├── aime_agent.py
│   │   │       ├── bigmath_agent.py
│   │   │       ├── dapo_agent.py
│   │   │       ├── gsm8k_agent.py
│   │   │       ├── math_agent.py
│   │   │       └── openmath_agent.py
│   │   └── model_configs/
│   │       ├── common.sh
│   │       ├── llama3p1_8b_instruct.sh
│   │       ├── nemotron5_56b.sh
│   │       ├── nemotron5_8b.sh
│   │       ├── nemotron5p5_12b_H.sh
│   │       ├── nemotron6_3b_moe.sh
│   │       ├── qwen3_30b_a3b_moe.sh
│   │       ├── qwen3_32b.sh
│   │       ├── qwen3_4b.sh
│   │       ├── qwen3_8b.sh
│   │       ├── qwen_2p5_32b.sh
│   │       ├── qwen_2p5_3b.sh
│   │       ├── qwen_2p5_distill_7b.sh
│   │       └── qwen_2p5_math_7b.sh
│   ├── run_simple_mcore_train_loop.py
│   └── t5/
│       ├── README.md
│       └── train_t5_220m_distributed.sh
├── gpt_builders.py
├── greptile.json
├── mamba_builders.py
├── megatron/
│   ├── core/
│   │   ├── MSC_Integration.md
│   │   ├── QuickStart.md
│   │   ├── README.md
│   │   ├── README_STRAGGLER.md
│   │   ├── __init__.py
│   │   ├── _rank_utils.py
│   │   ├── activations.py
│   │   ├── config.py
│   │   ├── config_logger.py
│   │   ├── datasets/
│   │   │   ├── Makefile
│   │   │   ├── __init__.py
│   │   │   ├── bert_dataset.py
│   │   │   ├── blended_dataset.py
│   │   │   ├── blended_megatron_dataset_builder.py
│   │   │   ├── blended_megatron_dataset_config.py
│   │   │   ├── data_schedule.py
│   │   │   ├── gpt_dataset.py
│   │   │   ├── helpers.cpp
│   │   │   ├── helpers.py
│   │   │   ├── indexed_dataset.py
│   │   │   ├── masked_dataset.py
│   │   │   ├── megatron_dataset.py
│   │   │   ├── multimodal_dataset.py
│   │   │   ├── object_storage_utils.py
│   │   │   ├── readme.md
│   │   │   ├── t5_dataset.py
│   │   │   ├── utils.py
│   │   │   └── utils_s3.py
│   │   ├── dist_checkpointing/
│   │   │   ├── __init__.py
│   │   │   ├── core.py
│   │   │   ├── dict_utils.py
│   │   │   ├── exchange_utils.py
│   │   │   ├── mapping.py
│   │   │   ├── optimizer.py
│   │   │   ├── serialization.py
│   │   │   ├── state_dict_utils.py
│   │   │   ├── strategies/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── async_utils.py
│   │   │   │   ├── base.py
│   │   │   │   ├── cached_metadata_filesystem_reader.py
│   │   │   │   ├── checkpointable.py
│   │   │   │   ├── common.py
│   │   │   │   ├── filesystem_async.py
│   │   │   │   ├── fully_parallel.py
│   │   │   │   ├── state_dict_saver.py
│   │   │   │   └── torch.py
│   │   │   ├── tensor_aware_state_dict.py
│   │   │   ├── utils.py
│   │   │   └── validation.py
│   │   ├── distributed/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── data_parallel_base.py
│   │   │   ├── distributed_data_parallel.py
│   │   │   ├── distributed_data_parallel_config.py
│   │   │   ├── finalize_model_grads.py
│   │   │   ├── fsdp/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── mcore_fsdp_adapter.py
│   │   │   │   └── src/
│   │   │   │       ├── README.md
│   │   │   │       ├── __init__.py
│   │   │   │       ├── megatron_fsdp/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   ├── distributed_data_parallel_config.py
│   │   │   │       │   ├── fully_shard.py
│   │   │   │       │   ├── megatron_fsdp.py
│   │   │   │       │   ├── mixed_precision.py
│   │   │   │       │   ├── package_info.py
│   │   │   │       │   ├── param_and_grad_buffer.py
│   │   │   │       │   ├── uneven_dtensor.py
│   │   │   │       │   └── utils.py
│   │   │   │       └── pyproject.toml
│   │   │   ├── param_and_grad_buffer.py
│   │   │   ├── reduce_scatter_with_fp32_accumulation.py
│   │   │   ├── torch_fully_sharded_data_parallel.py
│   │   │   └── torch_fully_sharded_data_parallel_config.py
│   │   ├── energy_monitor.py
│   │   ├── enums.py
│   │   ├── export/
│   │   │   ├── __init__.py
│   │   │   ├── data_type.py
│   │   │   ├── export_config.py
│   │   │   ├── model_type.py
│   │   │   └── trtllm/
│   │   │       ├── __init__.py
│   │   │       ├── engine_builder/
│   │   │       │   ├── __init__.py
│   │   │       │   └── trtllm_engine_builder.py
│   │   │       ├── model_to_trllm_mapping/
│   │   │       │   ├── __init__.py
│   │   │       │   └── default_conversion_dict.py
│   │   │       ├── trt_model_config.py
│   │   │       ├── trt_model_type.py
│   │   │       ├── trtllm_helper.py
│   │   │       ├── trtllm_layers.py
│   │   │       └── trtllm_weights_converter/
│   │   │           ├── __init__.py
│   │   │           ├── distributed_trtllm_model_weights_converter.py
│   │   │           ├── single_device_trtllm_model_weights_converter.py
│   │   │           └── utils.py
│   │   ├── extensions/
│   │   │   ├── TransformerEngineMixedPrecision.md
│   │   │   ├── __init__.py
│   │   │   ├── kitchen.py
│   │   │   ├── transformer_engine.py
│   │   │   └── transformer_engine_spec_provider.py
│   │   ├── fp4_utils.py
│   │   ├── fp8_utils.py
│   │   ├── full_cuda_graph.py
│   │   ├── fusions/
│   │   │   ├── __init__.py
│   │   │   ├── fused_bias_dropout.py
│   │   │   ├── fused_bias_geglu.py
│   │   │   ├── fused_bias_gelu.py
│   │   │   ├── fused_bias_swiglu.py
│   │   │   ├── fused_cross_entropy.py
│   │   │   ├── fused_indices_converter.py
│   │   │   ├── fused_layer_norm.py
│   │   │   ├── fused_mla_yarn_rope_apply.py
│   │   │   ├── fused_pad_routing_map.py
│   │   │   ├── fused_softmax.py
│   │   │   └── fused_weighted_squared_relu.py
│   │   ├── hyper_comm_grid.py
│   │   ├── inference/
│   │   │   ├── __init__.py
│   │   │   ├── async_stream.py
│   │   │   ├── batch_dimensions_utils.py
│   │   │   ├── common_inference_params.py
│   │   │   ├── communication/
│   │   │   │   └── torch_symm_triton/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── barrier.py
│   │   │   │       ├── collectives.py
│   │   │   │       ├── fused_collectives.py
│   │   │   │       ├── multimem_asm.py
│   │   │   │       └── utils.py
│   │   │   ├── communication_utils.py
│   │   │   ├── config.py
│   │   │   ├── contexts/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── attention_context/
│   │   │   │   │   ├── mamba_metadata.py
│   │   │   │   │   ├── metadata_base.py
│   │   │   │   │   ├── mha_metadata.py
│   │   │   │   │   └── triton/
│   │   │   │   │       └── tensor_ops.py
│   │   │   │   ├── base_context.py
│   │   │   │   ├── dynamic_context.py
│   │   │   │   ├── fused_kv_append_kernel.py
│   │   │   │   ├── kv_block_allocator.py
│   │   │   │   ├── mamba_slot_allocator.py
│   │   │   │   ├── routing_metadata.py
│   │   │   │   └── static_context.py
│   │   │   ├── data_parallel_inference_coordinator.py
│   │   │   ├── engines/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── abstract_engine.py
│   │   │   │   ├── async_zmq_communicator.py
│   │   │   │   ├── dynamic_engine.py
│   │   │   │   ├── mcore_engine.py
│   │   │   │   └── static_engine.py
│   │   │   ├── headers.py
│   │   │   ├── inference_client.py
│   │   │   ├── inference_request.py
│   │   │   ├── model_inference_wrappers/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── abstract_model_inference_wrapper.py
│   │   │   │   ├── gpt/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── gpt_inference_wrapper.py
│   │   │   │   ├── multimodal/
│   │   │   │   │   └── vlm_inference_wrapper.py
│   │   │   │   └── t5/
│   │   │   │       ├── __init__.py
│   │   │   │       └── t5_inference_wrapper.py
│   │   │   ├── moe/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── activations.py
│   │   │   │   ├── fused_moe.py
│   │   │   │   ├── pad.py
│   │   │   │   └── permute.py
│   │   │   ├── quantization/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── mxfp8_quantize.py
│   │   │   │   ├── mxfp8_tensor.py
│   │   │   │   └── utils.py
│   │   │   ├── sampling_params.py
│   │   │   ├── scheduler.py
│   │   │   ├── symmetric_memory.py
│   │   │   ├── text_generation_controllers/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── encoder_decoder_text_generation_controller.py
│   │   │   │   ├── text_generation_controller.py
│   │   │   │   └── vlm_text_generation_controller.py
│   │   │   ├── text_generation_server/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dynamic_text_gen_server/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── endpoints/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── chat_completions.py
│   │   │   │   │   │   ├── common.py
│   │   │   │   │   │   ├── completions.py
│   │   │   │   │   │   └── health.py
│   │   │   │   │   ├── text_generation_server.py
│   │   │   │   │   └── tokenization.py
│   │   │   │   ├── endpoints/
│   │   │   │   │   ├── common.py
│   │   │   │   │   └── completions.py
│   │   │   │   ├── run_mcore_engine.py
│   │   │   │   ├── text_generation_server.py
│   │   │   │   └── tokenization.py
│   │   │   ├── unified_memory.py
│   │   │   └── utils.py
│   │   ├── inference_params.py
│   │   ├── jit.py
│   │   ├── model_parallel_config.py
│   │   ├── models/
│   │   │   ├── T5/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── t5_model.py
│   │   │   │   └── t5_spec.py
│   │   │   ├── __init__.py
│   │   │   ├── backends.py
│   │   │   ├── bert/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bert_layer_specs.py
│   │   │   │   ├── bert_lm_head.py
│   │   │   │   ├── bert_model.py
│   │   │   │   └── pooler.py
│   │   │   ├── common/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── embeddings/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── language_model_embedding.py
│   │   │   │   │   ├── relative_pos_embedding.py
│   │   │   │   │   ├── rope_utils.py
│   │   │   │   │   ├── rotary_pos_embedding.py
│   │   │   │   │   └── yarn_rotary_pos_embedding.py
│   │   │   │   ├── language_module/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── language_module.py
│   │   │   │   ├── model_chunk_schedule_plan.py
│   │   │   │   └── vision_module/
│   │   │   │       ├── __init__.py
│   │   │   │       └── vision_module.py
│   │   │   ├── gpt/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── experimental_attention_variant_module_specs.py
│   │   │   │   ├── fine_grained_callables.py
│   │   │   │   ├── gpt_layer_specs.py
│   │   │   │   ├── gpt_model.py
│   │   │   │   ├── heterogeneous/
│   │   │   │   │   └── heterogeneous_layer_specs.py
│   │   │   │   └── moe_module_specs.py
│   │   │   ├── huggingface/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── clip_model.py
│   │   │   │   ├── module.py
│   │   │   │   └── qwen_model.py
│   │   │   ├── mamba/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── mamba_layer_specs.py
│   │   │   │   └── mamba_model.py
│   │   │   ├── mimo/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── config/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── base_configs.py
│   │   │   │   ├── model/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── base.py
│   │   │   │   ├── partition/
│   │   │   │   │   └── utils.py
│   │   │   │   └── submodules/
│   │   │   │       ├── audio.py
│   │   │   │       ├── base.py
│   │   │   │       └── vision.py
│   │   │   ├── multimodal/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── context_parallel.py
│   │   │   │   ├── llava_model.py
│   │   │   │   └── llava_spec.py
│   │   │   └── vision/
│   │   │       ├── __init__.py
│   │   │       ├── clip_vit_model.py
│   │   │       ├── multimodal_projector.py
│   │   │       ├── radio.py
│   │   │       └── vit_layer_specs.py
│   │   ├── msc_utils.py
│   │   ├── nccl_allocator.py
│   │   ├── num_microbatches_calculator.py
│   │   ├── optimizer/
│   │   │   ├── __init__.py
│   │   │   ├── clip_grads.py
│   │   │   ├── cpu_offloading/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   └── hybrid_optimizer.py
│   │   │   ├── distrib_optimizer.py
│   │   │   ├── grad_scaler.py
│   │   │   ├── layer_wise_optimizer.py
│   │   │   ├── muon.py
│   │   │   ├── optimizer.py
│   │   │   ├── optimizer_config.py
│   │   │   └── qk_clip.py
│   │   ├── optimizer_param_scheduler.py
│   │   ├── package_info.py
│   │   ├── packed_seq_params.py
│   │   ├── parallel_state.py
│   │   ├── pipeline_parallel/
│   │   │   ├── __init__.py
│   │   │   ├── bridge_communicator.py
│   │   │   ├── combined_1f1b.py
│   │   │   ├── fine_grained_activation_offload.py
│   │   │   ├── hybrid_cp_schedule.py
│   │   │   ├── multimodule_communicator.py
│   │   │   ├── p2p_communication.py
│   │   │   ├── schedules.py
│   │   │   └── utils.py
│   │   ├── post_training/
│   │   │   ├── __init__.py
│   │   │   └── modelopt/
│   │   │       ├── __init__.py
│   │   │       ├── gpt/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── model_specs.py
│   │   │       │   └── state_dict_hooks.py
│   │   │       ├── layers.py
│   │   │       └── mamba/
│   │   │           ├── __init__.py
│   │   │           └── model_specs.py
│   │   ├── process_groups_config.py
│   │   ├── quantization/
│   │   │   ├── __init__.py
│   │   │   ├── quant_config.py
│   │   │   └── utils.py
│   │   ├── requirements.txt
│   │   ├── rerun_state_machine.py
│   │   ├── resharding/
│   │   │   ├── __init__.py
│   │   │   ├── copy_services/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── base.py
│   │   │   │   ├── gloo_copy_service.py
│   │   │   │   ├── nccl_copy_service.py
│   │   │   │   └── nvshmem_copy_service.py
│   │   │   ├── execution.py
│   │   │   ├── nvshmem_copy_service/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── compat.py
│   │   │   │   ├── core/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── gpu_resource_manager.py
│   │   │   │   │   ├── kernel_launcher.py
│   │   │   │   │   └── pipeline_executor.py
│   │   │   │   ├── kernels/
│   │   │   │   │   └── chunked_kernel.cu
│   │   │   │   ├── logger.py
│   │   │   │   ├── memory/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── double_buffer_manager.py
│   │   │   │   │   └── tensor_pointer_utils.py
│   │   │   │   ├── nvshmem_types.py
│   │   │   │   ├── planning/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── communication_scheduler.py
│   │   │   │   │   ├── gpu_execution_planner.py
│   │   │   │   │   ├── task_segmenter.py
│   │   │   │   │   └── workload_packer.py
│   │   │   │   ├── service.py
│   │   │   │   └── validation.py
│   │   │   ├── planner.py
│   │   │   ├── refit.py
│   │   │   ├── transforms.py
│   │   │   └── utils.py
│   │   ├── safe_globals.py
│   │   ├── ssm/
│   │   │   ├── __init__.py
│   │   │   ├── gated_delta_net.py
│   │   │   ├── mamba_block.py
│   │   │   ├── mamba_context_parallel.py
│   │   │   ├── mamba_hybrid_layer_allocation.py
│   │   │   ├── mamba_layer.py
│   │   │   ├── mamba_mixer.py
│   │   │   ├── mlp_layer.py
│   │   │   ├── ops/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── causal_conv1d_triton.py
│   │   │   │   ├── causal_conv1d_varlen.py
│   │   │   │   ├── determinism.py
│   │   │   │   ├── mamba_ssm.py
│   │   │   │   ├── ssd_bmm.py
│   │   │   │   ├── ssd_chunk_scan.py
│   │   │   │   ├── ssd_chunk_state.py
│   │   │   │   ├── ssd_combined.py
│   │   │   │   └── ssd_state_passing.py
│   │   │   └── triton_cache_manager.py
│   │   ├── tensor_parallel/
│   │   │   ├── __init__.py
│   │   │   ├── cross_entropy.py
│   │   │   ├── data.py
│   │   │   ├── inference_layers.py
│   │   │   ├── layers.py
│   │   │   ├── mappings.py
│   │   │   ├── random.py
│   │   │   └── utils.py
│   │   ├── timers.py
│   │   ├── tokenizers/
│   │   │   ├── __init__.py
│   │   │   ├── base_tokenizer.py
│   │   │   ├── megatron_tokenizer.py
│   │   │   ├── text/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── libraries/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── abstract_tokenizer.py
│   │   │   │   │   ├── bytelevel_tokenizer.py
│   │   │   │   │   ├── chat_template.py
│   │   │   │   │   ├── huggingface_tokenizer.py
│   │   │   │   │   ├── megatron_hf_tokenizer.py
│   │   │   │   │   ├── null_tokenizer.py
│   │   │   │   │   ├── sentencepiece_tokenizer.py
│   │   │   │   │   ├── sft_tokenizer.py
│   │   │   │   │   └── tiktoken_tokenizer.py
│   │   │   │   ├── models/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── bert_tokenizer.py
│   │   │   │   │   ├── default_tokenizer.py
│   │   │   │   │   ├── gpt_tokenizer.py
│   │   │   │   │   ├── mamba_tokenizer.py
│   │   │   │   │   └── t5_tokenizer.py
│   │   │   │   ├── parsers/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── base_parser.py
│   │   │   │   │   ├── deepseek_r1_reasoning_parser.py
│   │   │   │   │   └── qwen3_coder_tool_parser.py
│   │   │   │   └── text_tokenizer.py
│   │   │   ├── utils/
│   │   │   │   └── build_tokenizer.py
│   │   │   └── vision/
│   │   │       ├── __init__.py
│   │   │       ├── libraries/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── multimodal_tokenizer.py
│   │   │       │   └── null_multimodal_tokenizer.py
│   │   │       ├── models/
│   │   │       │   ├── __init__.py
│   │   │       │   └── default_tokenizer.py
│   │   │       └── vision_tokenizer.py
│   │   ├── transformer/
│   │   │   ├── __init__.py
│   │   │   ├── attention.py
│   │   │   ├── cuda_graphs.py
│   │   │   ├── custom_layers/
│   │   │   │   ├── __init__.py
│   │   │   │   └── batch_invariant_kernels.py
│   │   │   ├── dot_product_attention.py
│   │   │   ├── enums.py
│   │   │   ├── experimental_attention_variant/
│   │   │   │   ├── absorbed_mla.py
│   │   │   │   └── dsa.py
│   │   │   ├── fsdp_dtensor_checkpoint.py
│   │   │   ├── heterogeneous/
│   │   │   │   ├── heterogeneous_config.py
│   │   │   │   └── linear_replacements.py
│   │   │   ├── identity_op.py
│   │   │   ├── mlp.py
│   │   │   ├── module.py
│   │   │   ├── moe/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── experts.py
│   │   │   │   ├── fused_a2a.py
│   │   │   │   ├── moe_layer.py
│   │   │   │   ├── moe_utils.py
│   │   │   │   ├── router.py
│   │   │   │   ├── router_replay.py
│   │   │   │   ├── shared_experts.py
│   │   │   │   ├── token_dispatcher.py
│   │   │   │   ├── token_dispatcher_inference.py
│   │   │   │   └── upcycling_utils.py
│   │   │   ├── multi_latent_attention.py
│   │   │   ├── multi_token_prediction.py
│   │   │   ├── pipeline_parallel_layer_layout.py
│   │   │   ├── spec_utils.py
│   │   │   ├── torch_layer_norm.py
│   │   │   ├── torch_norm.py
│   │   │   ├── transformer_block.py
│   │   │   ├── transformer_config.py
│   │   │   ├── transformer_layer.py
│   │   │   └── utils.py
│   │   ├── typed_torch.py
│   │   └── utils.py
│   ├── inference/
│   │   ├── __init__.py
│   │   └── utils.py
│   ├── legacy/
│   │   ├── fp16_deprecated/
│   │   │   └── loss_scaler.py
│   │   ├── fused_kernels/
│   │   │   ├── __init__.py
│   │   │   ├── compat.h
│   │   │   ├── tests/
│   │   │   │   ├── __init__.py
│   │   │   │   └── test_fused_kernels.py
│   │   │   └── type_shim.h
│   │   └── model/
│   │       ├── __init__.py
│   │       ├── bert_model.py
│   │       ├── biencoder_model.py
│   │       ├── classification.py
│   │       ├── enums.py
│   │       ├── fused_bias_gelu.py
│   │       ├── fused_layer_norm.py
│   │       ├── fused_softmax.py
│   │       ├── gpt_model.py
│   │       ├── language_model.py
│   │       ├── module.py
│   │       ├── multiple_choice.py
│   │       ├── realm_model.py
│   │       ├── rms_norm.py
│   │       ├── t5_model.py
│   │       ├── transformer.py
│   │       ├── utils.py
│   │       └── vision/
│   │           ├── classification.py
│   │           ├── dino.py
│   │           ├── esvit_swin_backbone.py
│   │           ├── inpainting.py
│   │           ├── knn_monitor.py
│   │           ├── mit_backbone.py
│   │           ├── swin_backbone.py
│   │           ├── utils.py
│   │           └── vit_backbone.py
│   ├── post_training/
│   │   ├── __init__.py
│   │   ├── arguments.py
│   │   ├── checkpointing.py
│   │   ├── generate.py
│   │   ├── loss_func.py
│   │   ├── model_builder.py
│   │   ├── non_loss_data_func.py
│   │   └── utils.py
│   ├── rl/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── agent/
│   │   │   ├── __init__.py
│   │   │   ├── api.py
│   │   │   ├── huggingface_dataset_agent.py
│   │   │   ├── pass_at_evaluation_agent.py
│   │   │   ├── remote_agent.py
│   │   │   ├── reward_only_agent.py
│   │   │   └── weighted_multi_task.py
│   │   ├── inference/
│   │   │   ├── __init__.py
│   │   │   ├── api.py
│   │   │   ├── inference_interface.py
│   │   │   └── megatron.py
│   │   ├── logging.py
│   │   ├── parallel_utils.py
│   │   ├── rl_utils.py
│   │   ├── sequence_packing_utils.py
│   │   └── server/
│   │       ├── __init__.py
│   │       ├── agent/
│   │       │   ├── __init__.py
│   │       │   └── fastapi_env_server.py
│   │       ├── api.py
│   │       └── inference/
│   │           ├── __init__.py
│   │           └── inference_interface_server.py
│   └── training/
│       ├── __init__.py
│       ├── argument_utils.py
│       ├── arguments.py
│       ├── async_utils.py
│       ├── checkpointing.py
│       ├── config/
│       │   ├── __init__.py
│       │   ├── common_config.py
│       │   ├── resilience_config.py
│       │   └── training_config.py
│       ├── datasets/
│       │   ├── README.md
│       │   ├── __init__.py
│       │   ├── data_samplers.py
│       │   ├── fim_dataset.py
│       │   └── sft_dataset.py
│       ├── dgrad_logging.py
│       ├── dist_signal_handler.py
│       ├── ft_integration.py
│       ├── global_vars.py
│       ├── initialize.py
│       ├── inprocess_restart.py
│       ├── log_handler.py
│       ├── one_logger_utils.py
│       ├── theoretical_memory_usage.py
│       ├── training.py
│       ├── utils.py
│       ├── wandb_utils.py
│       └── yaml_arguments.py
├── model_provider.py
├── pretrain_bert.py
├── pretrain_gpt.py
├── pretrain_mamba.py
├── pretrain_t5.py
├── pretrain_vlm.py
├── pyproject.toml
├── scripts/
│   └── check_api_backwards_compatibility.py
├── setup.py
├── tasks/
│   ├── data_utils.py
│   ├── eval_utils.py
│   └── finetune_utils.py
├── tests/
│   ├── README.md
│   ├── __init__.py
│   ├── functional_tests/
│   │   ├── __init__.py
│   │   ├── python_test_utils/
│   │   │   ├── __init__.py
│   │   │   ├── common.py
│   │   │   ├── compute_golden_statistics.py
│   │   │   ├── conftest.py
│   │   │   ├── get_test_results_from_tensorboard_logs.py
│   │   │   ├── test_grpo_training_loop.py
│   │   │   ├── test_inference_regular_pipeline.py
│   │   │   ├── test_optimizer_grads_match.py
│   │   │   ├── test_pretraining_regular_pipeline.py
│   │   │   └── test_pretraining_resume_checkpoint_pipeline.py
│   │   ├── shell_test_utils/
│   │   │   ├── _run_training.sh
│   │   │   ├── run_batch_ci_tests.sh
│   │   │   ├── run_ci_test.sh
│   │   │   └── start_interactive_job.sh
│   │   └── test_cases/
│   │       ├── bert/
│   │       │   ├── bert_mcore_tp1_pp2/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── bert_mcore_tp1_pp4_vp2/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── bert_mcore_tp2_pp2/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── bert_mcore_tp2_pp2_frozen_resume_torch_dist/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── bert_mcore_tp2_pp2_local_spec/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── bert_mcore_tp2_pp2_resume_torch_dist/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── bert_mcore_tp2_pp2_resume_torch_dist_local_spec/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── bert_mcore_tp4_pp1/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── bert_release/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   └── bert_release_sm/
│   │       │       ├── golden_values_dev_dgx_gb200.json
│   │       │       ├── golden_values_dev_dgx_h100.json
│   │       │       └── model_config.yaml
│   │       ├── common/
│   │       │   ├── ckpt_converter/
│   │       │   │   ├── __main__.py
│   │       │   │   └── model_config.yaml
│   │       │   └── moe_perf/
│   │       │       ├── __main__.py
│   │       │       ├── baseline.json
│   │       │       └── test_cases.py
│   │       ├── gpt/
│   │       │   ├── gpt3_15b_8t_release/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_15b_8t_release_gb200/
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_15b_8t_release_sm/
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_15b_8t_release_sm_gb200/
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_7b_tp1_pp4_memory_speed/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_7b_tp4_pp1_memory_speed/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_reruns_disable/
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_reruns_enable/
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_reruns_persistent_1/
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_reruns_persistent_2/
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_reruns_reshard/
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_reruns_resume/
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_reruns_resume_check_grads/
│   │       │   │   ├── README.md
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_reruns_transient/
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp1_mup/
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp1_uniform_full_recompute/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp2_rope_embeddings/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_disable_bias_linear/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_sequence_parallel/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_swiglu/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgxh100_dgxc.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist/
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_gdn/
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_cp2/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/
│   │       │   │   └── golden_values_dev_dgxh100_dgxc.json
│   │       │   ├── gpt3_mcore_te_tp2_pp2_mla/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_dev_dgxh100_dgxc.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp1_pp2/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp1_pp2_fp16/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp1_pp2_resume_torch_dist/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp1_pp4/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp1_pp4_resume_torch_dist/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp2_pp2_uninstall_te/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp4_pp1/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp4_pp1_resume_torch/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp4_pp1_resume_torch_dist/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_gb200_2nd.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100_2nd.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_weekly_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── model_config.yaml
│   │       │   │   └── tp_comm_overlap_cfg.yaml
│   │       │   ├── gpt3_weekly_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_validation/
│   │       │   │   ├── cuda_graphs.py
│   │       │   │   ├── cuda_graphs.sh
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_grpo_basic_function/
│   │       │   │   ├── env_config.yaml
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/
│   │       │   │   ├── env_config.yaml
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/
│   │       │   │   ├── env_config.yaml
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_grpo_tp4_pp1_dp2_8b_cudagraphs_throughput/
│   │       │   │   ├── env_config.yaml
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_grpo_tp4_pp1_dp2_8b_throughput/
│   │       │   │   ├── env_config.yaml
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_grpo_tp4_pp1_dp2_8b_throughput_github/
│   │       │   │   ├── env_config.yaml
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/
│   │       │   │   ├── README.md
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── model_config.yaml
│   │       │   │   └── test_prompts.jsonl
│   │       │   ├── gpt_static_inference_tp1_pp1_583m_cudagraphs/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   └── gpt_static_inference_tp1_pp1_583m_logitsmatch/
│   │       │       ├── golden_values_dev_dgx_a100.json
│   │       │       ├── golden_values_dev_dgx_h100.json
│   │       │       └── model_config.yaml
│   │       ├── gpt-nemo/
│   │       │   ├── bert-nemo_340m_mr_mbs2_gbs32_mcore_te_tp2_pp2_1N8G/
│   │       │   │   └── model_config.yaml
│   │       │   ├── gemma2-nemo_2b_mr_mbs1_gbs8_mcore_te_tp4_pp1_cp1_1N8G/
│   │       │   │   └── model_config.yaml
│   │       │   ├── llama3-nemo_8b_mr_mbs1_gbs8_mcore_te_8experts_tp2_ep2_pp2_dgx_a100_1N8G/
│   │       │   │   └── model_config.yaml
│   │       │   ├── llama3-nemo_8b_mr_mbs4_gbs64_mcore_te_tp1_pp1_cp2_dgx_a100_1N8G/
│   │       │   │   └── model_config.yaml
│   │       │   ├── mixtral-nemo_8x7b_mr_mbs1_gbs8_mcore_te_tp2_pp1_ep2_1N8G/
│   │       │   │   └── model_config.yaml
│   │       │   └── t5-nemo_220m_mr_mbs4_gbs64_te_tp1_pp1_1N8G/
│   │       │       └── model_config.yaml
│   │       ├── hybrid/
│   │       │   ├── hybrid_dynamic_inference_tp1_pp1_dp8_583m/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── hybrid_dynamic_inference_tp1_pp1_dp8_583m_chunked_prefill/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── hybrid_mr_mcore_te_tp1_pp2_vpp2_cp1_dgx_a100_1N8G/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── hybrid_static_inference_tp1_pp1_2B_cudagraphs/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   └── hybrid_static_inference_tp1_pp1_2B_logitsmatch/
│   │       │       ├── golden_values_dev_dgx_h100.json
│   │       │       └── model_config.yaml
│   │       ├── mimo/
│   │       │   ├── mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8/
│   │       │   │   ├── golden_values_dev.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8_seq_packing/
│   │       │   │   ├── golden_values_dev.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   └── mimo_vlm_pretrain_convergence_tp1_pp1_cp2_dp8/
│   │       │       ├── golden_values_dev.json
│   │       │       ├── golden_values_dev_dgx_h100.json
│   │       │       └── model_config.yaml
│   │       ├── mixtral/
│   │       │   ├── deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release/
│   │       │   │   └── model_config.yaml
│   │       │   ├── deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release_sm/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── deepseekv3_proxy_flex_tp2pp2emp16etp1cp1_gb_200_release/
│   │       │   │   └── model_config.yaml
│   │       │   ├── deepseekv3_proxy_flex_tp2pp2emp16etp1cp1_gb_200_release_sm/
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── mixtral_8x22b_tp2pp8ep8vpp1_release/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── mixtral_8x7b_alltoall_tp2pp4ep4_release/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── mixtral_8x7b_alltoall_tp2pp4ep4_release_sm/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   └── mixtral_8x7b_tp1pp4ep8vpp8_release/
│   │       │       ├── golden_values_dev_dgx_h100.json
│   │       │       ├── golden_values_lts_dgx_a100.json
│   │       │       └── model_config.yaml
│   │       ├── moe/
│   │       │   ├── deepseek_proxy_fsdp_ep2_fsdp2/
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── deepseek_proxy_fsdp_ep2_fsdp2_1node/
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/
│   │       │   │   ├── golden_values_dev.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgxh100_dgxc.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/
│   │       │   │   ├── golden_values_dev.json
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_dev_dgxa100_dracooci.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci-ord.json
│   │       │   │   ├── golden_values_lts_dgxa100_dracooci.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/
│   │       │   │   ├── golden_values_dev.json
│   │       │   │   ├── golden_values_lts.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   ├── golden_values_dev_dgxh100_dgxc.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/
│   │       │   │   ├── golden_values_dev_dgx_gb200.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_dev_dgx_h100_2nd.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_dynamic_inference_tp4_etp1_pp1_ep8_16B_logitsmatch_cudagraph_zmq/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_dynamic_inference_tp4_etp1_pp1_ep8_16B_logitsmatch_zmq/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_dynamic_inference_tp4_etp1_pp1_ep8_16B_logitsmatch_zmq_suspend_resume/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── model_config.yaml
│   │       │   │   └── prompts.json
│   │       │   ├── gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/
│   │       │   │   ├── env_config.yaml
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   ├── gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   └── model_config.yaml
│   │       │   └── gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/
│   │       │       ├── golden_values_dev_dgx_h100.json
│   │       │       └── model_config.yaml
│   │       ├── multimodal-llava/
│   │       │   ├── multimodal_llava_mcore_te_tp1_pp1/
│   │       │   │   ├── golden_values_dev_dgx_a100.json
│   │       │   │   ├── golden_values_dev_dgx_h100.json
│   │       │   │   ├── golden_values_lts_dgx_a100.json
│   │       │   │   └── model_config.yaml
│   │       │   └── multimodal_llava_mcore_te_tp4_sp_cp2/
│   │       │       ├── golden_values_dev_dgx_a100.json
│   │       │       ├── golden_values_dev_dgx_h100.json
│   │       │       ├── golden_values_lts_dgx_a100.json
│   │       │       └── model_config.yaml
│   │       └── t5/
│   │           ├── t5_11b_mcore_tp4_pp1/
│   │           │   ├── golden_values_dev_dgx_a100.json
│   │           │   ├── golden_values_dev_dgx_h100.json
│   │           │   └── model_config.yaml
│   │           ├── t5_mcore_te_tp1_pp1_vp1_resume_torch/
│   │           │   ├── golden_values_dev_dgx_a100.json
│   │           │   ├── golden_values_dev_dgx_a100_2nd.json
│   │           │   ├── golden_values_dev_dgx_h100.json
│   │           │   ├── golden_values_dev_dgx_h100_2nd.json
│   │           │   ├── golden_values_dev_dgxa100_dracooci-ord.json
│   │           │   ├── golden_values_dev_dgxa100_dracooci.json
│   │           │   ├── golden_values_lts_dgx_a100.json
│   │           │   └── model_config.yaml
│   │           ├── t5_mcore_te_tp2_pp1_vp1/
│   │           │   ├── golden_values_dev_dgx_a100.json
│   │           │   ├── golden_values_dev_dgx_h100.json
│   │           │   ├── golden_values_dev_dgxa100_dracooci-ord.json
│   │           │   ├── golden_values_dev_dgxa100_dracooci.json
│   │           │   ├── golden_values_lts_dgx_a100.json
│   │           │   └── model_config.yaml
│   │           ├── t5_mcore_te_tp2_pp1_vp1_sequence_parallel/
│   │           │   ├── golden_values_dev_dgx_a100.json
│   │           │   ├── golden_values_dev_dgx_h100.json
│   │           │   ├── golden_values_dev_dgxa100_dracooci-ord.json
│   │           │   ├── golden_values_dev_dgxa100_dracooci.json
│   │           │   ├── golden_values_lts_dgx_a100.json
│   │           │   └── model_config.yaml
│   │           ├── t5_mcore_te_tp4_pp1/
│   │           │   ├── golden_values_dev_dgx_a100.json
│   │           │   ├── golden_values_dev_dgx_h100.json
│   │           │   ├── golden_values_lts_dgx_a100.json
│   │           │   └── model_config.yaml
│   │           ├── t5_mcore_te_tp4_pp1_resume_torch_dist/
│   │           │   ├── golden_values_dev_dgx_a100.json
│   │           │   ├── golden_values_dev_dgx_h100.json
│   │           │   ├── golden_values_dev_dgx_h100_2nd.json
│   │           │   ├── golden_values_lts_dgx_a100.json
│   │           │   └── model_config.yaml
│   │           ├── t5_mcore_tp1_pp1_vp1/
│   │           │   ├── golden_values_dev_dgx_a100.json
│   │           │   ├── golden_values_dev_dgx_h100.json
│   │           │   ├── golden_values_dev_dgxa100_dracooci-ord.json
│   │           │   ├── golden_values_dev_dgxa100_dracooci.json
│   │           │   ├── golden_values_lts_dgx_a100.json
│   │           │   └── model_config.yaml
│   │           ├── t5_mcore_tp1_pp1_vp1_resume_torch/
│   │           │   ├── golden_values_dev_dgx_a100.json
│   │           │   ├── golden_values_dev_dgx_a100_2nd.json
│   │           │   ├── golden_values_dev_dgx_h100.json
│   │           │   ├── golden_values_dev_dgx_h100_2nd.json
│   │           │   ├── golden_values_dev_dgxa100_dracooci-ord.json
│   │           │   ├── golden_values_dev_dgxa100_dracooci.json
│   │           │   ├── golden_values_lts_dgx_a100.json
│   │           │   └── model_config.yaml
│   │           ├── t5_mcore_tp2_pp1_vp1/
│   │           │   ├── golden_values_dev_dgx_a100.json
│   │           │   ├── golden_values_dev_dgx_h100.json
│   │           │   ├── golden_values_dev_dgxa100_dracooci-ord.json
│   │           │   ├── golden_values_dev_dgxa100_dracooci.json
│   │           │   ├── golden_values_lts_dgx_a100.json
│   │           │   └── model_config.yaml
│   │           ├── t5_mcore_tp4_pp1/
│   │           │   ├── golden_values_dev_dgx_a100.json
│   │           │   ├── golden_values_dev_dgx_h100.json
│   │           │   ├── golden_values_lts_dgx_a100.json
│   │           │   └── model_config.yaml
│   │           ├── t5_mcore_tp4_pp1_resume_torch_dist/
│   │           │   ├── golden_values_dev_dgx_a100.json
│   │           │   ├── golden_values_dev_dgx_h100.json
│   │           │   ├── golden_values_dev_dgx_h100_2nd.json
│   │           │   ├── golden_values_lts_dgx_a100.json
│   │           │   └── model_config.yaml
│   │           ├── t5_release/
│   │           │   ├── golden_values_dev_dgx_h100.json
│   │           │   ├── golden_values_lts_dgx_a100.json
│   │           │   └── model_config.yaml
│   │           ├── t5_release_sm/
│   │           │   ├── golden_values_dev_dgx_gb200.json
│   │           │   ├── golden_values_dev_dgx_h100.json
│   │           │   └── model_config.yaml
│   │           ├── t5_weekly_mcore_te_tp2_pp1_vp1/
│   │           │   └── golden_values_lts_dgx_a100.json
│   │           └── t5_weekly_mcore_te_tp2_pp1_vp1_sequence_parallel/
│   │               └── golden_values_lts_dgx_a100.json
│   ├── test_utils/
│   │   ├── python_scripts/
│   │   │   ├── approve_merge_gate.py
│   │   │   ├── auto_reminder.py
│   │   │   ├── auto_reminder_github.py
│   │   │   ├── check_status_of_main.py
│   │   │   ├── dashboard.py
│   │   │   ├── download_coverage_results.py
│   │   │   ├── download_golden_values.py
│   │   │   ├── download_unit_tests_dataset.py
│   │   │   ├── generate_jet_trigger_job.py
│   │   │   ├── generate_local_jobs.py
│   │   │   ├── launch_jet_workload.py
│   │   │   ├── launch_nemo_run_workload.py
│   │   │   ├── notify.py
│   │   │   ├── recipe_parser.py
│   │   │   ├── swap_pr_labels.py
│   │   │   └── wait_for_resources.py
│   │   └── recipes/
│   │       ├── _build-mcore-dev.yaml
│   │       ├── _build-mcore-lts.yaml
│   │       ├── _build-nemo.yaml
│   │       ├── gb200/
│   │       │   ├── gpt.yaml
│   │       │   ├── moe-1node.yaml
│   │       │   ├── moe.yaml
│   │       │   └── unit-tests.yaml
│   │       └── h100/
│   │           ├── bert.yaml
│   │           ├── ckpt_converter.yaml
│   │           ├── gpt-dynamic-inference-cuda-graphs.yaml
│   │           ├── gpt-dynamic-inference-with-coordinator.yaml
│   │           ├── gpt-dynamic-inference.yaml
│   │           ├── gpt-grads.yaml
│   │           ├── gpt-grpo.yaml
│   │           ├── gpt-nemo.yaml
│   │           ├── gpt-static-inference.yaml
│   │           ├── gpt.yaml
│   │           ├── mamba-dynamic-inference.yaml
│   │           ├── mamba-static-inference.yaml
│   │           ├── mamba.yaml
│   │           ├── mimo.yaml
│   │           ├── module_performance.yaml
│   │           ├── moe-dynamic-inference-with-coordinator.yaml
│   │           ├── moe-dynamic-inference.yaml
│   │           ├── moe-grpo.yaml
│   │           ├── moe-static-inference.yaml
│   │           ├── moe.yaml
│   │           ├── multimodal-llava.yaml
│   │           ├── t5.yaml
│   │           └── unit-tests.yaml
│   └── unit_tests/
│       ├── __init__.py
│       ├── a2a_overlap/
│       │   ├── test_cuda_graphed_schedule_chunk_1f1b.py
│       │   ├── test_schedule_chunk_1f1b.py
│       │   ├── test_schedule_layer_1f1b.py
│       │   └── utils.py
│       ├── conftest.py
│       ├── data/
│       │   ├── __init__.py
│       │   ├── test_bin_reader.py
│       │   ├── test_builder.py
│       │   ├── test_fim_dataset.py
│       │   ├── test_gpt_dataset.py
│       │   ├── test_multimodal_dataset.py
│       │   ├── test_preprocess_data.py
│       │   └── test_preprocess_mmdata.py
│       ├── dist_checkpointing/
│       │   ├── __init__.py
│       │   ├── conftest.py
│       │   ├── models/
│       │   │   ├── __init__.py
│       │   │   ├── common.py
│       │   │   ├── test_bert_model.py
│       │   │   ├── test_gpt_model.py
│       │   │   ├── test_mamba.py
│       │   │   ├── test_mlp_glu.py
│       │   │   ├── test_moe_experts.py
│       │   │   └── test_t5_model.py
│       │   ├── test_async_save.py
│       │   ├── test_checkpointable.py
│       │   ├── test_fp8.py
│       │   ├── test_fully_parallel.py
│       │   ├── test_global_metadata_reuse.py
│       │   ├── test_layer_wise_optimizer.py
│       │   ├── test_local.py
│       │   ├── test_mapping.py
│       │   ├── test_msc.py
│       │   ├── test_nonpersistent.py
│       │   ├── test_optimizer.py
│       │   ├── test_pipeline_parallel_layout.py
│       │   ├── test_replication.py
│       │   ├── test_safe_globals.py
│       │   ├── test_serialization.py
│       │   ├── test_strict.py
│       │   ├── test_torch_dist.py
│       │   └── utils.py
│       ├── distributed/
│       │   ├── megatron_fsdp/
│       │   │   ├── test_mcore_fully_sharded_data_parallel.py
│       │   │   ├── test_mfsdp_fully_shard.py
│       │   │   └── utils.py
│       │   ├── test_distributed_data_parallel.py
│       │   ├── test_finalize_model_grads.py
│       │   ├── test_grad_reduce_for_replicated_embedder.py
│       │   ├── test_grad_sync_with_expert_parallel.py
│       │   ├── test_param_and_grad_buffer.py
│       │   ├── test_reduce_scatter_with_fp32_accumulation.py
│       │   └── test_torch_fully_sharded_parallel.py
│       ├── export/
│       │   └── trtllm/
│       │       ├── __init__.py
│       │       ├── test_distributed_fp8.py
│       │       ├── test_single_device_fp8.py
│       │       ├── test_trtllm_distributed_gpu_converter.py
│       │       ├── test_trtllm_helper.py
│       │       ├── test_trtllm_layers.py
│       │       └── test_trtllm_single_device_converter.py
│       ├── extension/
│       │   └── test_kitchen_sdpa.py
│       ├── find_test_cases.py
│       ├── fusions/
│       │   ├── test_bias_dropout_fusion.py
│       │   ├── test_mla_yarn_rope_apply.py
│       │   ├── test_rmsnorm_residual_fusion.py
│       │   ├── test_swiglu_fusion.py
│       │   ├── test_torch_softmax.py
│       │   └── test_weighted_squared_relu_fusion.py
│       ├── inference/
│       │   ├── __init__.py
│       │   ├── contexts/
│       │   │   ├── attention_metadata/
│       │   │   │   ├── test_mamba_metadata.py
│       │   │   │   └── test_tensor_ops.py
│       │   │   ├── test_dynamic_context.py
│       │   │   └── test_dynamic_prefix_caching.py
│       │   ├── engines/
│       │   │   ├── __init__.py
│       │   │   ├── test_dynamic_engine.py
│       │   │   ├── test_dynamic_events.py
│       │   │   ├── test_mamba_prefix_caching_e2e.py
│       │   │   └── test_static_engine.py
│       │   ├── model_inference_wrappers/
│       │   │   ├── __init__.py
│       │   │   ├── gpt/
│       │   │   │   └── test_gpt_inference_wrapper.py
│       │   │   └── t5/
│       │   │       └── test_t5_inference_wrapper.py
│       │   ├── test_batch_dimension_utils.py
│       │   ├── test_common_inference_params.py
│       │   ├── test_communication_utils.py
│       │   ├── test_data_parallel_inference_coordinator.py
│       │   ├── test_dynamic_prefix_caching_coordinator.py
│       │   ├── test_flash_decode.py
│       │   ├── test_inference_config.py
│       │   ├── test_inference_utils.py
│       │   ├── test_moe_inference.py
│       │   ├── test_moe_permute.py
│       │   ├── test_mxfp8_utils.py
│       │   ├── test_scheduler.py
│       │   ├── test_stop_words.py
│       │   ├── test_wandb_logging.py
│       │   └── text_generation_controllers/
│       │       ├── __init__.py
│       │       ├── test_encoder_decoder_text_generation_controller.py
│       │       ├── test_text_generation_controller.py
│       │       └── test_vlm_text_generation_controller.py
│       ├── models/
│       │   ├── __init__.py
│       │   ├── test_base_embedding.py
│       │   ├── test_bert_model.py
│       │   ├── test_clip_vit_model.py
│       │   ├── test_gpt_model.py
│       │   ├── test_gpt_model_batch_invariant.py
│       │   ├── test_gpt_model_quantization.py
│       │   ├── test_heterogeneous_gpt_model.py
│       │   ├── test_llava_model.py
│       │   ├── test_mamba_model.py
│       │   ├── test_mamba_moe_model.py
│       │   ├── test_mimo_audio_submodules.py
│       │   ├── test_mimo_embedding_alignment.py
│       │   ├── test_mimo_model.py
│       │   ├── test_mimo_partition.py
│       │   ├── test_mimo_submodules.py
│       │   ├── test_multimodal_projector.py
│       │   ├── test_radio_model.py
│       │   └── test_t5_model.py
│       ├── optimizer/
│       │   ├── __init__.py
│       │   └── test_optimizer_config.py
│       ├── pipeline_parallel/
│       │   ├── __init__.py
│       │   ├── test_bridge_communicator.py
│       │   ├── test_fine_grained_activation_offloading.py
│       │   ├── test_helpers.py
│       │   ├── test_multimodule_communicator.py
│       │   ├── test_multimodule_schedules.py
│       │   ├── test_pipeline_layout.py
│       │   └── test_schedules.py
│       ├── post_training/
│       │   ├── __init__.py
│       │   ├── test_modelopt_model_builder.py
│       │   └── test_modelopt_module_spec.py
│       ├── resharding/
│       │   ├── test_communication_scheduler.py
│       │   ├── test_dp_balancing.py
│       │   ├── test_model_swap.py
│       │   ├── test_mxfp8_refit.py
│       │   ├── test_task_segmenter.py
│       │   └── test_workload_packer.py
│       ├── rl/
│       │   ├── test_grouped_rollouts.py
│       │   ├── test_rl_batch_invariant.py
│       │   ├── test_rl_utils.py
│       │   └── test_sequence_packing_utils.py
│       ├── run_ci_test.sh
│       ├── ssm/
│       │   ├── ops/
│       │   │   ├── test_causal_conv1d_varlen.py
│       │   │   ├── test_ops_init.py
│       │   │   ├── test_ssd_bmm.py
│       │   │   ├── test_ssd_chunk_scan.py
│       │   │   ├── test_ssd_chunk_state.py
│       │   │   ├── test_ssd_combined.py
│       │   │   ├── test_ssd_state_passing.py
│       │   │   └── test_ssm_kernel.py
│       │   ├── test_causal_conv1d_triton.py
│       │   ├── test_gated_delta_net.py
│       │   ├── test_mamba_block.py
│       │   ├── test_mamba_context_parallel.py
│       │   ├── test_mamba_hybrid_layer_allocation.py
│       │   ├── test_mamba_layer.py
│       │   └── test_mamba_mixer.py
│       ├── tensor_parallel/
│       │   ├── __init__.py
│       │   ├── test_cross_entropy.py
│       │   ├── test_data.py
│       │   ├── test_initialization.py
│       │   ├── test_layers.py
│       │   ├── test_mappings.py
│       │   ├── test_random.py
│       │   └── test_tensor_parallel_utils.py
│       ├── test_api_backwards_compat_setup.py
│       ├── test_argument_utils.py
│       ├── test_basic.py
│       ├── test_checkpointing.py
│       ├── test_fp8_param.py
│       ├── test_fp8_utils.py
│       ├── test_hyper_comm_grid.py
│       ├── test_imports.py
│       ├── test_inference.py
│       ├── test_layer_wise_optimizer.py
│       ├── test_lion_optimizer.py
│       ├── test_local_multi_tensor_fns.py
│       ├── test_model_configs.py
│       ├── test_muon_optimizer.py
│       ├── test_nccl_allocator.py
│       ├── test_num_microbatches_calculator.py
│       ├── test_optimizer.py
│       ├── test_optimizer_cpu_offloading.py
│       ├── test_optimizer_param_scheduler.py
│       ├── test_parallel_state.py
│       ├── test_process_groups_config.py
│       ├── test_training.py
│       ├── test_typed_torch.py
│       ├── test_utilities.py
│       ├── test_utils.py
│       ├── tokenizers/
│       │   └── test_tokenizer.py
│       ├── transformer/
│       │   ├── __init__.py
│       │   ├── experimental_attention_variant/
│       │   │   ├── test_absorbed_mla.py
│       │   │   └── test_attention_variant_dsa.py
│       │   ├── moe/
│       │   │   ├── __init__.py
│       │   │   ├── conftest.py
│       │   │   ├── test_a2a_token_dispatcher.py
│       │   │   ├── test_aux_loss.py
│       │   │   ├── test_grouped_mlp.py
│       │   │   ├── test_latent_moe_layer.py
│       │   │   ├── test_moe_layer.py
│       │   │   ├── test_moe_layer_discrepancy.py
│       │   │   ├── test_multihot_indices_converter.py
│       │   │   ├── test_router_replay.py
│       │   │   ├── test_routers.py
│       │   │   ├── test_sequential_mlp.py
│       │   │   ├── test_shared_experts.py
│       │   │   ├── test_token_dispatcher.py
│       │   │   └── test_upcycling.py
│       │   ├── test_attention.py
│       │   ├── test_attention_no_rope.py
│       │   ├── test_attention_packed_seq.py
│       │   ├── test_core_attention.py
│       │   ├── test_cuda_graphs.py
│       │   ├── test_full_cuda_graph.py
│       │   ├── test_mlp.py
│       │   ├── test_module.py
│       │   ├── test_multi_latent_attention.py
│       │   ├── test_multi_token_prediction.py
│       │   ├── test_mup.py
│       │   ├── test_quantization_config.py
│       │   ├── test_relative_attention.py
│       │   ├── test_rope.py
│       │   ├── test_spec_customization.py
│       │   ├── test_submodule_callables.py
│       │   ├── test_te_layers_batch_invariant.py
│       │   ├── test_thd_correctness.py
│       │   ├── test_transformer_block.py
│       │   ├── test_transformer_block_custom_pgs.py
│       │   ├── test_transformer_layer.py
│       │   ├── test_utils.py
│       │   └── test_vision_cuda_graphs.py
│       └── utils/
│           └── test_experimental_log_once.py
├── tools/
│   ├── __init__.py
│   ├── autoformat.sh
│   ├── bert_embedding/
│   │   ├── __init__.py
│   │   ├── dataset.py
│   │   ├── embed.py
│   │   ├── external_libs.py
│   │   └── huggingface.py
│   ├── build_sequences_per_dataset.py
│   ├── check_copyright.py
│   ├── checkpoint/
│   │   ├── checkpoint_inspector.py
│   │   ├── convert.py
│   │   ├── hybrid_conversion.py
│   │   ├── loader_base.py
│   │   ├── loader_core.py
│   │   ├── loader_legacy.py
│   │   ├── loader_llama_mistral.py
│   │   ├── loader_llava.py
│   │   ├── loader_mixtral_hf.py
│   │   ├── saver_base.py
│   │   ├── saver_core.py
│   │   ├── saver_hf_llava.py
│   │   ├── saver_legacy.py
│   │   ├── saver_llava.py
│   │   ├── schema_base.py
│   │   ├── schema_core.py
│   │   ├── schema_hf.py
│   │   └── utils.py
│   ├── copyright.sh
│   ├── linter.py
│   ├── merge_datasets.py
│   ├── preprocess_data.py
│   ├── preprocess_data_nmt.py
│   ├── preprocess_mmdata.py
│   ├── report_theoretical_memory.py
│   ├── run_dynamic_text_generation_server.py
│   ├── run_inference_performance_test.py
│   ├── run_mamba_text_generation_server.py
│   ├── run_mamba_text_generation_server_completions.py
│   ├── run_text_generation_server.py
│   ├── run_vlm_text_generation.py
│   ├── text_generation_cli.py
│   ├── trigger_internal_ci.md
│   ├── trigger_internal_ci.py
│   ├── upgrade_dependencies.sh
│   └── wait_daemon.sh
└── train_rl.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .coderabbit.yaml
================================================
# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
language: "en-US"

# Only comment on Critical/Major bugs. No Minor, Trivial, or style comments.
tone_instructions: "Only comment on Critical or Major bugs. Never comment on Minor issues, style, refactoring, or suggestions. When in doubt, stay silent."

reviews:
  # Use chill profile - filters out nitpicks automatically
  profile: "chill"

  # Disable all summary features
  high_level_summary: false
  high_level_summary_in_walkthrough: false

  # Disable walkthrough comment entirely
  collapse_walkthrough: true
  changed_files_summary: false
  sequence_diagrams: false

  # Disable status/effort estimates
  review_status: false
  commit_status: false
  estimate_code_review_effort: false

  # Disable auto-suggestions for labels/reviewers
  suggested_labels: false
  suggested_reviewers: false

  # Disable related issues/PRs lookup
  assess_linked_issues: false
  related_issues: false
  related_prs: false

  # Auto-review disabled - only review when explicitly requested via @coderabbitai review
  auto_review:
    enabled: false

chat:
  auto_reply: true


================================================
FILE: .flake8
================================================
[flake8]
max-line-length = 100
extend-ignore = E203,E501,F401,E402,E714
per-file-ignores = __init__.py:F401

================================================
FILE: .github/CODEOWNERS
================================================
megatron/core/ @NVIDIA/core-adlr @NVIDIA/core-nemo

megatron/core/models/gpt/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/gpt

megatron/core/models/multimodal/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/multi-modal

megatron/core/models/mamba/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/hybrid-mamba
megatron/core/ssm/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/hybrid-mamba

megatron/core/datasets/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/datasets

megatron/core/tokenizers/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/tokenizers

megatron/core/distributed/fsdp/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/megatron-fsdp

megatron/core/transformer/fsdp_dtensor_checkpoint.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/megatron-fsdp

megatron/core/dist_checkpointing/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/dist-checkpointing

megatron/core/optimizer/distrib_optimizer/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/dist-optimizer

megatron/core/inference/modelopt_support @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/quantization-and-inference

megatron/core/datasets/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/datasets

megatron/core/pipeline_parallel/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/pipeline-parallelism

megatron/core/transformer/ @NVIDIA/core-adlr @NVIDIA/core-nemo

megatron/core/transformer/moe/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/mixture-of-experts-adlr @NVIDIA/mixture-of-experts-devtech

megatron/core/inference/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/inference

megatron/core/parallel_state.py @NVIDIA/core-adlr @NVIDIA/core-nemo

megatron/core/post_training/ @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/post-training

megatron/post_training/ @NVIDIA/post-training

megatron/core/transformer/cuda_graphs.py @NVIDIA/core-adlr @NVIDIA/core-nemo @NVIDIA/cuda-graphs

megatron/training/ @NVIDIA/training-adlr @NVIDIA/training-nemo
megatron/training/arguments.py

.gitlab/ @NVIDIA/ci
.github/ @NVIDIA/ci
.gitlab-ci.yml @NVIDIA/ci
docker/  @NVIDIA/ci
tests/functional_tests/python_test_utils/ @NVIDIA/ci
tests/functional_tests/shell_test_utils/ @NVIDIA/ci
tests/test_utils/recipes/ @NVIDIA/ci
tests/unit_tests/run_ci_test.sh @NVIDIA/ci

# API Backwards Compatibility Check
scripts/check_api_backwards_compatibility.py @NVIDIA/ci
scripts/README_API_COMPAT.md @NVIDIA/ci
.github/workflows/check_api_backwards_compatibility_workflow.yml @NVIDIA/ci
docs/api-backwards-compatibility-check.md @NVIDIA/ci
tests/unit_tests/test_api_backwards_compat_setup.py @NVIDIA/ci

megatron/rl/ @NVIDIA/reinforcement-learning
examples/rl/ @NVIDIA/reinforcement-learning
test/unit_tests/test_rl_utils.py @NVIDIA/reinforcement-learning
train_rl.py @NVIDIA/reinforcement-learning


================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve the repository or project
title: ""
labels: bug
assignees: ''

---

**Describe the bug**

A clear and concise description of what the bug is. Tag the [@mcore-oncall](https://github.com/orgs/NVIDIA/teams/mcore-oncall) 
to get oncall's attention to this issue.

**Steps/Code to reproduce bug**

Please list *minimal* steps or code snippet for us to be able to reproduce the bug.

A helpful guide on on how to craft a minimal bug report http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports.


**Expected behavior**

A clear and concise description of what you expected to happen.


**Additional context**

Add any other context about the problem here. 


================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false


================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
title: ""
labels: enhancement
assignees: ''

---

**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]

Tag the [@mcore-oncall](https://github.com/orgs/NVIDIA/teams/mcore-oncall) 
to get oncall's attention to this issue.

**Describe the solution you'd like**
A clear and concise description of what you want to happen.

**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.

**Additional context**
Add any other context or screenshots about the feature request here.


================================================
FILE: .github/ISSUE_TEMPLATE/question.md
================================================
---
name: QUESTION
about: Ask a question about Megatron-LM that is not a bug, regression or enhancement
  request
title: "[QUESTION]"
labels: ''
assignees: ''

---

**Your question**
Ask a clear and concise question about Megatron-LM. Tag the [@mcore-oncall](https://github.com/orgs/NVIDIA/teams/mcore-oncall) 
to get oncall's attention to this issue.

================================================
FILE: .github/ISSUE_TEMPLATE/regression.md
================================================
---
name: REGRESSION
about: Report a regression in speed or accuracy due to a Megatron-LM update
title: "[REGRESSION]"
labels: ''
assignees: ''

---

**Describe the regression**
A clear and concise description of what the regression is. Tag the [@mcore-oncall](https://github.com/orgs/NVIDIA/teams/mcore-oncall) 
to get oncall's attention to this issue.

**To Reproduce**
Steps to reproduce the behavior. The easier it is to reproduce the faster it will get maintainer attention.

**Previous performance**
What speed or accuracy did you previously see.

**New performance**
What speed or accuracy do you see after the update.

**Stack trace/logs**
If applicable, add the stack trace or logs related to the regression.

**Environment (please complete the following information):**
 - Previous Megatron-LM commit ID
 - New Megatron-LM commit ID
 - Previous PyTorch version
 - New PyTorch version
 - Previous CUDA version
 - New CUDA version
 - Previous NCCL version
 - New NCCL version

**Proposed fix**
If you have a proposal for how to fix the issue state it here or link to a PR.

**Additional context**
Add any other context about the problem here.


================================================
FILE: .github/actions/action.yml
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: "Test Template"
description: "Template for running NeMo tests in a containerized environment"

inputs:
  container-image:
    description: "Container image to use for test"
    required: true
  timeout:
    description: "Max runtime of test in minutes"
    required: false
    default: "30"
  script:
    description: "Test script to execute"
    required: true
  is-optional:
    description: "Pass this job on failure."
    required: false
    default: "false"
  is_unit_test:
    description: "Upload coverage as unit test"
    required: false
    default: "false"
  tag:
    description: Latest or legacy test suite
    required: true
  test_case:
    description: Test case to launch
    required: true
  model:
    description: Model to launch
    required: false
  PAT:
    description: "GitHub Personal Access Token"
    required: true
  is_ci_workload:
    description: "Is CI workload"
    required: true
  is_merge_group:
    description: "Is merge group"
    required: true
  platform:
    description: "Platform to run tests on (e.g. dgx_h100, dgx_gb200)"
    required: false
    default: "dgx_h100"
runs:
  using: "composite"
  steps:
    - name: Print node name
      shell: bash -x -e -u -o pipefail {0}
      run: echo "node_name=$NODE_NAME" | tee -a "$GITHUB_OUTPUT"

    - name: Checkout repository
      uses: actions/checkout@v6

    - name: Change ownership of /home/runner/
      shell: bash
      run: sudo chown -R $(whoami) /home/runner/

    - name: Setup python
      uses: actions/setup-python@v5
      with:
        python-version: 3.12

    - name: Install uuidgen
      shell: bash -x -e -u -o pipefail {0}
      run: |
        apt-get update
        apt-get install -y uuid-runtime

    - name: Create run-script (unit test)
      shell: bash -x -e -u -o pipefail {0}
      if: inputs.is_unit_test == 'true'
      run: |
        echo "::group::Create run-script"
        cmd=$(cat <<'RUN_TEST_EOF'
        #!/bin/bash

        export PYTHONPATH=$(pwd)
        export NEMORUN_HOME=$(pwd)
        export NCCL_DEBUG=INFO
        pip install --no-cache-dir "uv<0.9.29"
        uv venv .venv
        uv cache clean
        uv sync --no-cache --only-group test
        uv run python tests/test_utils/python_scripts/launch_nemo_run_workload.py \
          --scope unit-tests \
          --model unit-tests \
          --test-case "${{ inputs.test_case }}" \
          --environment dev \
          --platform ${{ inputs.platform }} \
          --tag ${{ inputs.tag }} \
          --container-image ${{ inputs.container-image }} \
          --hf-home /mnt/datadrive/TestData/nemo-fw/TestData/HF_HOME

        RUN_TEST_EOF
        )
        echo "$cmd" | tee "job.sh"
        echo "::endgroup::"

    - name: Get PR info
      id: get-pr-info
      if: startsWith(github.ref, 'refs/heads/pull-request/')
      uses: nv-gha-runners/get-pr-info@main

    - name: Install GH CLI
      shell: bash -x -e -u -o pipefail {0}
      run: |
        apt-get update
        apt-get install -y gh

    - name: Has Run tests label
      shell: bash -x -e -u -o pipefail {0}
      id: has-run-tests-label
      env:
        GH_TOKEN: ${{ github.token }}
      run: |
        PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
        HAS_RUN_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run tests")') || echo "false"
        echo "main=$HAS_RUN_TESTS_LABEL" | tee -a $GITHUB_OUTPUT

    - name: Has Run functional tests label
      shell: bash -x -e -u -o pipefail {0}
      id: has-run-functional-tests-label
      env:
        GH_TOKEN: ${{ github.token }}
        IS_CI_WORKLOAD: ${{ inputs.is_ci_workload }}
      run: |
        PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
        HAS_RUN_FUNCTIONAL_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run functional tests")') || echo "$IS_CI_WORKLOAD"
        HAS_RUN_FUNCTIONAL_TESTS_LABEL=${HAS_RUN_FUNCTIONAL_TESTS_LABEL:-$IS_CI_WORKLOAD}
        echo "main=$HAS_RUN_FUNCTIONAL_TESTS_LABEL" | tee -a $GITHUB_OUTPUT

    - name: Create run-script (e2e test)
      shell: bash -x -e -u -o pipefail {0}
      if: inputs.is_unit_test == 'false'
      env:
        MODEL: ${{ inputs.model }}
      run: |
        echo "::group::Create run-script"
        cmd=$(cat <<'RUN_TEST_EOF'
        #!/bin/bash
        set -euxo pipefail

        if [ "${{ inputs.is_merge_group }}" == "true" ]; then
          ARGS=(
            --scope mr-github
            --n-repeat 1
          )
        elif [ "${{ steps.has-run-tests-label.outputs.main }}" == "true" ]; then
          ARGS=(
            --scope mr-github
            --enable-lightweight-mode
            --n-repeat 1
          )
        elif [ "${{ steps.has-run-functional-tests-label.outputs.main }}" == "true" ]; then
          ARGS=(
            --scope mr-github
            --n-repeat 5
          )
        else
          ARGS=(
            --scope mr-github-slim
            --n-repeat 5
          )
        fi

        export PYTHONPATH=$(pwd)
        export NEMORUN_HOME=$(pwd)
        pip install --no-cache-dir "uv<0.9.29"
        uv venv .venv
        uv cache clean
        uv sync --no-cache --only-group test
        uv run python tests/test_utils/python_scripts/launch_nemo_run_workload.py \
          ${ARGS[@]} \
          --model ${{ inputs.model }} \
          --test-case ${{ inputs.test_case }} \
          --environment dev \
          --platform ${{ inputs.platform }} \
          --container-image ${{ inputs.container-image }} \
          --data-dir /mnt/datadrive/TestData/megatron-lm/artifacts \
          --hf-home /mnt/datadrive/TestData/nemo-fw/TestData/HF_HOME

        RUN_TEST_EOF
        )
        echo "$cmd" | tee "job.sh"
        echo "::endgroup::"

    - name: Set timeout
      shell: bash -x -e -u -o pipefail {0}
      id: timeout_in_seconds
      run: |
        echo "::group::Set timeout"
        echo "main=$(( ${{ inputs.timeout }} * 60 ))" | tee -a "$GITHUB_OUTPUT"
        echo "::endgroup::"

    - name: Pull container
      shell: bash -x -e -u -o pipefail {0}
      run: |
        echo "::group::Pull container"
        docker pull ${{ inputs.container-image }}
        echo "::endgroup::"

    - name: Run main script
      shell: bash -x -e -u -o pipefail {0}
      id: run-main-script
      run: |
        echo "::group::Run main script"
        EXIT_CODE=0
        /bin/bash job.sh || EXIT_CODE=$?
        echo "exit_code=$EXIT_CODE" | tee -a "$GITHUB_OUTPUT"
        exit $EXIT_CODE
        echo "::endgroup::"

    - name: Check result
      id: check
      shell: bash -x -e -u -o pipefail {0}
      if: always()
      env:
        IS_UNIT_TEST: ${{ inputs.is_unit_test == 'true' }}
      run: |
        echo "::group::Check result"

        logs_report=logs-${{ inputs.test_case }}-${{ github.run_id }}-$(uuidgen)
        echo "logs_report=$logs_report" | sed 's/\//-/g' | sed 's/\*/-/g' | tee -a "$GITHUB_OUTPUT"

        if [[ "$IS_UNIT_TEST" == "true" ]]; then
          coverage_report=coverage-${{ inputs.is_unit_test == 'true' && 'unit-test' || 'e2e' }}-${{ github.run_id }}-$(uuidgen)
        else
          coverage_report=none
        fi
        echo "coverage_report=$coverage_report" | tee -a "$GITHUB_OUTPUT"

        EXIT_CODE=${{ steps.run-main-script.outputs.exit_code }}
        IS_SUCCESS=$([[ "$EXIT_CODE" -eq 0 ]] && echo "true" || echo "false")

        if [[ "$IS_SUCCESS" == "false" && "${{ inputs.is-optional }}" == "true" ]]; then
          echo "::warning:: Test failed, but displayed as successful because it is marked as optional."
          IS_SUCCESS=true
        fi

        if [[ "$IS_SUCCESS" == "false" ]]; then
          echo Test did not finish successfully.
          exit 1
        fi

        if [[ "$coverage_report" != "none" ]]; then
          uv run coverage report -i
        fi

        exit $EXIT_CODE
        echo "::endgroup::"

    - name: Upload coverage
      uses: actions/upload-artifact@v4
      if: ${{ always() && steps.check.outputs.coverage_report != 'none' }}
      with:
        name: ${{ steps.check.outputs.coverage_report }}
        path: |
          coverage.xml
          .coverage
        include-hidden-files: true

    - name: Upload logs
      uses: actions/upload-artifact@v4
      if: always()
      with:
        name: ${{ steps.check.outputs.logs_report }}
        path: ${{ inputs.is_unit_test == 'true' && 'assets_dir/logs' || 'assets_dir' }}
        include-hidden-files: true


================================================
FILE: .github/actions/check-nvidia-sso-membership/action.yml
================================================
name: 'Check NVIDIA SSO Membership'
description: 'Check if a GitHub username exists in the NVIDIA SSO users list from github-audits'
author: 'NVIDIA'

inputs:
  username:
    description: 'GitHub username to check'
    required: true
  github_audits_repo:
    description: 'Repository containing SSO users file'
    required: false
    default: 'NVIDIA-GitHub-Management/github-audits'
  github_audits_version:
    description: 'Release version tag'
    required: false
    default: 'v0.1.0'
  sso_users_filename:
    description: 'Filename of SSO users JSON'
    required: false
    default: 'users_sso.json'
  github_token:
    description: 'GitHub token with access to github-audits repo'
    required: true

outputs:
  is_member:
    description: 'Boolean - true if user is in NVIDIA SSO list, false otherwise'
    value: ${{ steps.check-membership.outputs.is_member }}
  is_org_member:
    description: 'Boolean - true if user has NVIDIA or NVIDIA-NeMo in org_roles'
    value: ${{ steps.check-membership.outputs.is_org_member }}
  user_orgs:
    description: 'Comma-separated list of orgs user is member of'
    value: ${{ steps.check-membership.outputs.user_orgs }}
  sso_file_available:
    description: 'Boolean - true if SSO file was successfully downloaded'
    value: ${{ steps.download-sso.outputs.sso_file_available }}
  user_count:
    description: 'Number of users in the SSO file (0 if download failed)'
    value: ${{ steps.download-sso.outputs.user_count }}

runs:
  using: 'composite'
  steps:
    - name: Download NVIDIA SSO users from github-audits
      id: download-sso
      shell: bash
      env:
        GH_TOKEN: ${{ inputs.github_token }}
      run: |
        echo "Downloading ${{ inputs.sso_users_filename }} from ${{ inputs.github_audits_repo }} ${{ inputs.github_audits_version }} release..."

        # Download the release asset using gh CLI
        gh release download ${{ inputs.github_audits_version }} \
          --repo ${{ inputs.github_audits_repo }} \
          --pattern ${{ inputs.sso_users_filename }} \
          --clobber 2>&1 || {
            echo "ERROR: Failed to download ${{ inputs.sso_users_filename }} from github-audits release"
            echo "sso_file_available=false" >> $GITHUB_OUTPUT
            echo "user_count=0" >> $GITHUB_OUTPUT
            exit 0
          }

        # Verify file was downloaded and is valid JSON
        if [ ! -f ${{ inputs.sso_users_filename }} ]; then
          echo "ERROR: ${{ inputs.sso_users_filename }} file not found after download"
          echo "sso_file_available=false" >> $GITHUB_OUTPUT
          echo "user_count=0" >> $GITHUB_OUTPUT
          exit 0
        fi

        # Validate JSON structure
        if ! jq -e 'type == "object"' ${{ inputs.sso_users_filename }} > /dev/null 2>&1; then
          echo "ERROR: ${{ inputs.sso_users_filename }} is not a valid JSON object"
          echo "sso_file_available=false" >> $GITHUB_OUTPUT
          echo "user_count=0" >> $GITHUB_OUTPUT
          exit 0
        fi

        USER_COUNT=$(jq 'length' ${{ inputs.sso_users_filename }})
        echo "Successfully downloaded ${{ inputs.sso_users_filename }} with $USER_COUNT NVIDIA SSO users"
        echo "sso_file_available=true" >> $GITHUB_OUTPUT
        echo "user_count=$USER_COUNT" >> $GITHUB_OUTPUT

    - name: Check if user is in SSO list
      id: check-membership
      shell: bash
      run: |
        USERNAME="${{ inputs.username }}"
        SSO_FILE="${{ inputs.sso_users_filename }}"

        echo "Checking if $USERNAME is in NVIDIA SSO users list..."

        # Check if SSO file is available
        if [ "${{ steps.download-sso.outputs.sso_file_available }}" != "true" ] || [ ! -f "$SSO_FILE" ]; then
          echo "ERROR: $SSO_FILE not available - cannot check membership"
          echo "is_member=false" >> $GITHUB_OUTPUT
          echo "is_org_member=false" >> $GITHUB_OUTPUT
          echo "user_orgs=" >> $GITHUB_OUTPUT
          exit 0
        fi

        # Check if username exists as a key in the JSON object
        if jq -e --arg user "$USERNAME" 'has($user)' "$SSO_FILE" > /dev/null 2>&1; then
          echo "$USERNAME found in NVIDIA SSO users"
          echo "is_member=true" >> $GITHUB_OUTPUT

          # Extract and check org membership
          IS_ORG_MEMBER=$(jq -r --arg user "$USERNAME" '
            .[$user].org_roles // [] |
            map(select(test("^(NVIDIA|NVIDIA-NeMo):Member$"))) |
            length > 0
          ' "$SSO_FILE")

          USER_ORGS=$(jq -r --arg user "$USERNAME" '
            .[$user].org_roles // [] |
            map(split(":")[0]) |
            unique |
            join(",")
          ' "$SSO_FILE")

          echo "is_org_member=$IS_ORG_MEMBER" >> $GITHUB_OUTPUT
          echo "user_orgs=$USER_ORGS" >> $GITHUB_OUTPUT

          if [ "$IS_ORG_MEMBER" == "true" ]; then
            echo "$USERNAME is a member of NVIDIA or NVIDIA-NeMo org"
          else
            echo "$USERNAME has @nvidia.com email but is not in NVIDIA or NVIDIA-NeMo org (orgs: $USER_ORGS)"
          fi
        else
          echo "$USERNAME NOT found in NVIDIA SSO users"
          echo "is_member=false" >> $GITHUB_OUTPUT
          echo "is_org_member=false" >> $GITHUB_OUTPUT
          echo "user_orgs=" >> $GITHUB_OUTPUT
        fi

branding:
  icon: 'shield'
  color: 'green'


================================================
FILE: .github/copy-pr-bot.yaml
================================================
enabled: true
auto_sync_draft: false
auto_sync_ready: true
trustees_override: ["AAnoosheh", "ArEsKay3", "Autumn1998", "BestJuly", "BoxiangW", "CarlosGomes98", "ChenhanYu", "FDecaYed", "HaochenYuan", "ISEEKYAN", "JRD971000", "Phlip79", "QiZhangNV", "RPrenger", "ShriyaRishab", "Victarry", "Wohox", "ZhiyuLi-Nvidia", "ahmadki", "aklife97", "ananthsub", "asolergi-nv", "buptzyb", "chtruong814", "cjld", "cspades", "cuichenx", "deepakn94", "dimapihtar", "dingqingy-nv", "duncanriach", "erhoo82", "ericharper", "fanshiqing", "faradawn", "frsun-nvda", "gautham-kollu", "gdengk", "guyueh1", "huvunvidia", "hxbai", "ilml", "jalbericiola", "janEbert", "jaredcasper", "jenchen13", "jiemingz", "jingqiny-99", "jkamalu", "jon-barker", "jstjohn", "kajalj22", "kanz-nv", "kevalmorabia97", "ko3n1g", "ksivaman", "kunlunl", "kvareddy", "kwyss-nvidia", "layalir", "lhb8125", "lmcafee-nvidia", "maanug-nv", "mathemakitten", "matthieule", "mchrzanowski", "mehraakash", "mkhona-nvidia", "nanz-nv", "parthmannan", "prajwal1210", "pthombre", "rhewett-nv", "rogerwaleffe", "sajadn", "sanandaraj5597", "sancha", "santhnm2", "sbak5", "shanmugamr1992", "sharathts", "shengf-nv", "shifangx", "shjwudp", "sidsingh-nvidia", "skyw", "sudhakarsingh27", "tdene", "theothermike", "thomasdhc", "tomlifu", "trintamaki", "tylerpoon", "wdykas", "wplf", "xiaoyao0115", "xuwchen", "yanring", "yaox12", "yaoyu-33", "yashaswikarnati", "yeyu-nvidia", "yobibyte", "youngeunkwon0405", "yueshen2016", "yuzhongw-nvidia", "zhongbozhu"]


================================================
FILE: .github/oncall_schedule.json
================================================
[
    {
        "user": "dimapihtar",
        "date": "2026-03-18"
    },
    {
        "user": "janEbert",
        "date": "2026-03-25"
    },
    {
        "user": "gautham-kollu",
        "date": "2026-04-01"
    },
    {
        "user": "ilml",
        "date": "2026-04-08"
    },
    {
        "user": "Phlip79",
        "date": "2026-04-15"
    },
    {
        "user": "asolergi-nv",
        "date": "2026-04-22"
    },
    {
        "user": "BoxiangW",
        "date": "2026-04-29"
    },
    {
        "user": "maanug-nv",
        "date": "2026-05-06"
    },
    {
        "user": "dimapihtar",
        "date": "2026-05-13"
    },
    {
        "user": "gautham-kollu",
        "date": "2026-05-20"
    },
    {
        "user": "ilml",
        "date": "2026-05-27"
    },
    {
        "user": "janEbert",
        "date": "2026-06-03"
    }
]


================================================
FILE: .github/pull_request_template.md
================================================
# What does this PR do ?
<!-- Add a one line overview of what this PR aims to accomplish. -->

:warning: For major changes (either in lines of code or in its impact), please make sure to first share a design doc with the team. If you're unsure what's the best way to do so, contact the @mcore-oncall.

## Contribution process

### Pre-checks

- [ ] I have added relevant unit tests
- [ ] I have added relevant functional tests
- [ ] I have added proper typing to my code [Typing guidelines](https://docs.python.org/3/library/typing.html)
- [ ] I have added relevant documentation
- [ ] I have run the [autoformatter.sh](https://github.com/NVIDIA/Megatron-LM/blob/main/tools/autoformat.sh) on my PR

### Code review

Feel free to message or comment the [@mcore-oncall](https://github.com/orgs/NVIDIA/teams/mcore-oncall) to help accelerate your merge into main. The less complex your PR is, the faster it will be approved and merged!

All PRs start as **draft**. If you open a non-draft PR, it will be automatically converted to draft.

#### Step 1: Mark PR as "Ready for Review"

1. When your PR is ready, click **Ready for Review**.
2. An oncall reviewer is auto-assigned and expert reviewers are notified based on your changes.
   - Some PRs may jump straight to step 2. This is determined by `.github/CODEOWNERS`.

:warning: Only mark as ready once merge-conflicts are resolved and the CI is passing.
Final Review might get declined if these requirements are not fulfilled.

#### Step 2: Final Review

For PRs that change `megatron/core`, once all expert reviewers have approved, the `Final Review` label is applied **automatically** and final reviewers are assigned.

For PRs outside `megatron/core`, this step is skipped.

#### Step 3: Approved

Once all required reviewers have approved, the `Approved` label is applied **automatically**.

### Merge

Any member of [mcore-engineers](https://github.com/orgs/NVIDIA/teams/mcore-engineers) will be able to merge your PR.

<details>
<summary>For MRs into `dev` branch</summary>
The proposed review process for `dev` branch is under active discussion.

MRs are mergable after one approval by either `eharper@nvidia.com` or `zijiey@nvidia.com`.
</details>


================================================
FILE: .github/scripts/oncall_manager.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys
import json
import requests
import argparse
from datetime import datetime, timedelta, timezone

from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError

# Constants
GITHUB_API_URL = "https://api.github.com"
SCHEDULE_FILE = ".github/oncall_schedule.json"
ROTATION_TEAM_SLUG = "mcore-oncall-rotation"
ACTIVE_ONCALL_TEAM_SLUG = "mcore-oncall"
SLACK_USERGROUP_HANDLE = "mcore-oncall"
TARGET_WEEKS = 12

# Caches for email and Slack lookups
_email_cache = {}
_slack_id_cache = {}

def get_headers():
    token = os.environ.get("GH_TOKEN")
    if not token:
        # Fallback to GITHUB_TOKEN if GH_TOKEN not set
        token = os.environ.get("GITHUB_TOKEN")
        
    if not token:
        print("Error: GH_TOKEN or GITHUB_TOKEN not set")
        sys.exit(1)
        
    return {
        "Authorization": f"token {token}",
        "Accept": "application/vnd.github.v3+json"
    }

def get_repo_info():
    """Returns (owner, repo) from GITHUB_REPOSITORY env var."""
    repo_env = os.environ.get("GITHUB_REPOSITORY")
    if not repo_env:
        print("Error: GITHUB_REPOSITORY environment variable not set")
        sys.exit(1)
    parts = repo_env.split("/")
    return parts[0], parts[1]

def get_team_members(org, team_slug):
    """Fetches members of the GitHub team."""
    url = f"{GITHUB_API_URL}/orgs/{org}/teams/{team_slug}/members"
    headers = get_headers()
    
    members = set()
    page = 1
    while True:
        resp = requests.get(f"{url}?per_page=100&page={page}", headers=headers)
        if resp.status_code != 200:
            print(f"Error fetching team members: {resp.status_code} {resp.text}")
            sys.exit(1)
        
        data = resp.json()
        if not data:
            break
            
        members.update([m['login'] for m in data])
        if len(data) < 100:
            break
        page += 1
        
    return members

def get_user_email(username):
    """Get user's email from GitHub, prioritizing @nvidia.com emails.
    
    Checks in order:
    1. Public profile email
    2. Recent commits in the repository
    """
    if username in _email_cache:
        return _email_cache[username]
    
    headers = get_headers()
    public_email = None
    
    try:
        # 1. Try to get user's public profile email first
        resp = requests.get(f"{GITHUB_API_URL}/users/{username}", headers=headers)
        if resp.status_code == 200:
            user_data = resp.json()
            email = user_data.get('email')
            if email and not email.endswith("@users.noreply.github.com"):
                if email.endswith("@nvidia.com"):
                    _email_cache[username] = email
                    return email
                # Store non-nvidia email as fallback
                public_email = email
        
        # 2. Check recent commits in the repository for @nvidia.com email
        repo_env = os.environ.get("GITHUB_REPOSITORY", "NVIDIA/Megatron-LM")
        commits_url = f"{GITHUB_API_URL}/repos/{repo_env}/commits?author={username}&per_page=10"
        resp = requests.get(commits_url, headers=headers)
        
        if resp.status_code == 200:
            commits = resp.json()
            for commit in commits:
                # Get email from commit author
                commit_data = commit.get('commit', {})
                author_data = commit_data.get('author', {})
                email = author_data.get('email')
                
                if email and not email.endswith("@users.noreply.github.com"):
                    if email.endswith("@nvidia.com"):
                        _email_cache[username] = email
                        print(f"Found @nvidia.com email for {username} from commits: {email}")
                        return email
                    elif public_email is None:
                        public_email = email
        
        # 3. Use public email if found, otherwise fallback
        if public_email:
            _email_cache[username] = public_email
            print(f"Using public email for {username}: {public_email}")
            return public_email
        
        # Fallback to noreply email
        fallback = f"{username}@users.noreply.github.com"
        _email_cache[username] = fallback
        print(f"Warning: No email found for {username}, using fallback: {fallback}")
        return fallback
        
    except Exception as e:
        print(f"Warning: Could not get email for {username}: {e}")
        fallback = f"{username}@users.noreply.github.com"
        _email_cache[username] = fallback
        return fallback

def get_slack_client():
    """Get Slack WebClient if token is available."""
    slack_token = os.environ.get("SLACK_TOKEN")
    if not slack_token:
        return None
    
    return WebClient(token=slack_token)

def get_slack_user_id(slack_client, email):
    """Get Slack user ID from email."""
    if not slack_client:
        return None
    
    if email in _slack_id_cache:
        return _slack_id_cache[email]
    
    try:
        response = slack_client.users_lookupByEmail(email=email)
        user_id = response["user"]["id"]
        _slack_id_cache[email] = user_id
        return user_id
    except SlackApiError as e:
        print(f"Warning: Could not find Slack user for {email}: {e.response['error']}")
        _slack_id_cache[email] = None
        return None

def get_slack_usergroup_id(slack_client, handle):
    """Get Slack usergroup ID from handle."""
    if not slack_client:
        return None
    
    try:
        response = slack_client.usergroups_list(include_users=True)
        for usergroup in response.get("usergroups", []):
            if usergroup.get("handle") == handle:
                return usergroup.get("id"), usergroup.get("users", [])
        print(f"Warning: Slack usergroup '{handle}' not found")
        return None, []
    except SlackApiError as e:
        print(f"Warning: Could not list Slack usergroups: {e.response['error']}")
        return None, []

def update_slack_usergroup(new_oncall_username, old_members_usernames):
    """
    Updates the Slack usergroup to contain only the new oncall user.
    Adds new oncall first, then removes old members (usergroups need at least one member).
    """
    slack_client = get_slack_client()
    if not slack_client:
        print("Slack token not configured, skipping Slack usergroup update")
        return
    
    # Get the new oncall's email and Slack user ID
    new_email = get_user_email(new_oncall_username)
    new_slack_id = get_slack_user_id(slack_client, new_email)
    
    if not new_slack_id:
        print(f"Could not find Slack user ID for {new_oncall_username} ({new_email}), skipping Slack update")
        return
    
    # Get the usergroup ID and current members
    usergroup_id, current_slack_members = get_slack_usergroup_id(slack_client, SLACK_USERGROUP_HANDLE)
    
    if not usergroup_id:
        print(f"Could not find Slack usergroup '{SLACK_USERGROUP_HANDLE}', skipping Slack update")
        return
    
    try:
        # Step 1: Add new oncall first (include current members to avoid removing anyone yet)
        # This ensures usergroup always has at least one member
        if new_slack_id not in current_slack_members:
            updated_members = list(set(current_slack_members + [new_slack_id]))
            slack_client.usergroups_users_update(
                usergroup=usergroup_id,
                users=updated_members
            )
            print(f"Added {new_oncall_username} to Slack usergroup '{SLACK_USERGROUP_HANDLE}'")
        
        # Step 2: Now set the usergroup to contain only the new oncall
        slack_client.usergroups_users_update(
            usergroup=usergroup_id,
            users=[new_slack_id]
        )
        print(f"Updated Slack usergroup '{SLACK_USERGROUP_HANDLE}' to contain only {new_oncall_username}")
        
    except SlackApiError as e:
        print(f"Failed to update Slack usergroup: {e.response['error']}")

def load_schedule():
    if not os.path.exists(SCHEDULE_FILE):
        return []
    try:
        with open(SCHEDULE_FILE, 'r') as f:
            data = json.load(f)
            # Normalize to list of dicts if it's a list of strings
            schedule = []
            for item in data:
                if isinstance(item, str):
                    schedule.append({"user": item, "date": "YYYY-MM-DD"})
                else:
                    schedule.append(item)
            return schedule
    except (json.JSONDecodeError, FileNotFoundError):
        return []

def save_schedule(schedule):
    with open(SCHEDULE_FILE, 'w') as f:
        json.dump(schedule, f, indent=4)
        f.write('\n') # trailing newline

def update_active_oncall_team(org, new_oncall):
    """Updates the active oncall team to contain only the new oncall user."""
    # 1. Get current members of the active team
    current_members = get_team_members(org, ACTIVE_ONCALL_TEAM_SLUG)
    
    # 2. Add the new oncall if not present
    if new_oncall not in current_members:
        url = f"{GITHUB_API_URL}/orgs/{org}/teams/{ACTIVE_ONCALL_TEAM_SLUG}/memberships/{new_oncall}"
        resp = requests.put(url, headers=get_headers())
        if resp.status_code == 200:
            print(f"Added {new_oncall} to {ACTIVE_ONCALL_TEAM_SLUG}")
        else:
            print(f"Failed to add {new_oncall} to {ACTIVE_ONCALL_TEAM_SLUG}: {resp.status_code} {resp.text}")

    # 3. Remove everyone else
    old_members = []
    for member in current_members:
        if member not in [new_oncall, 'svcnvidia-nemo-ci']:
            old_members.append(member)
            url = f"{GITHUB_API_URL}/orgs/{org}/teams/{ACTIVE_ONCALL_TEAM_SLUG}/memberships/{member}"
            resp = requests.delete(url, headers=get_headers())
            if resp.status_code == 204:
                print(f"Removed {member} from {ACTIVE_ONCALL_TEAM_SLUG}")
            else:
                print(f"Failed to remove {member} from {ACTIVE_ONCALL_TEAM_SLUG}: {resp.status_code} {resp.text}")
    
    # 4. Update Slack usergroup (add new oncall first, then remove old members)
    update_slack_usergroup(new_oncall, old_members)

def rotate_schedule(repo_owner, dry_run=False):
    schedule = load_schedule()
    print(f"Current schedule length: {len(schedule)}")
    
    # 1. Rotate (Remove past week)
    # Only if schedule is not empty.
    if schedule:
        # Check date of first entry
        first_entry = schedule[0]
        try:
            # We assume the date is the *start* of the oncall shift (Wednesday).
            # The shift ends 7 days later.
            start_date = datetime.strptime(first_entry['date'], "%Y-%m-%d").date()
            end_date = start_date + timedelta(days=7)
            
            today = datetime.now(timezone.utc).date()
            
            # If today is >= end_date, the shift is over.
            # (e.g. Started last Wed, ends today Wed. If today is Wed, we rotate)
            if today >= end_date:
                removed = schedule.pop(0)
                print(f"Rotated out: {removed} (Ended {end_date})")
            else:
                print(f"First entry {first_entry} has not ended yet (Ends {end_date}). Not removing.")
        except ValueError:
             # Fallback if date is invalid, rotate anyway
             removed = schedule.pop(0)
             print(f"Rotated out (invalid date): {removed}")
    else:
        print("Schedule empty, nothing to rotate.")

    # 2. Replenish
    ensure_schedule_filled(schedule, repo_owner)
    
    # 3. Update active oncall team
    if schedule:
        current_oncall = schedule[0]['user']
        print(f"New active oncall: {current_oncall}")
        if not dry_run:
            update_active_oncall_team(repo_owner, current_oncall)
        else:
            print(f"Dry run: Would update {ACTIVE_ONCALL_TEAM_SLUG} to contain only {current_oncall}")
    
    if not dry_run:
        save_schedule(schedule)
        print("Schedule updated and saved.")
    else:
        print("Dry run: Schedule not saved.")
        print(json.dumps(schedule, indent=4))

def get_last_wednesday():
    today = datetime.now(timezone.utc).date()
    # Monday=0, Wednesday=2
    offset = (today.weekday() - 2) % 7
    return today - timedelta(days=offset)

def ensure_schedule_filled(schedule, repo_owner):
    """Appends users to schedule until it reaches TARGET_WEEKS."""
    members = get_team_members(repo_owner, ROTATION_TEAM_SLUG)
    if not members:
        print(f"Warning: No team members found in {ROTATION_TEAM_SLUG}.")
        return
    if 'svcnvidia-nemo-ci' in members:
        members.remove('svcnvidia-nemo-ci')
    members = list(members)

    members.sort() # Deterministic order
    
    while len(schedule) < TARGET_WEEKS:
        # Determine start date for the new entry
        if not schedule:
            # Start with the most recent Wednesday if list is empty
            next_date = get_last_wednesday()
            
            # Start with the first member alphabetically if list is empty
            next_user = members[0]
        else:
            last_entry = schedule[-1]
            last_user = last_entry['user']
            
            # Parse last date and add 7 days
            try:
                last_date = datetime.strptime(last_entry['date'], "%Y-%m-%d").date()
                next_date = last_date + timedelta(days=7)
            except ValueError:
                # Fallback if date is invalid/placeholder
                next_date = get_last_wednesday() + timedelta(days=7 * len(schedule))

            try:
                # Find index of last scheduled user in the team list
                if last_user in members:
                    last_idx = members.index(last_user)
                    next_idx = (last_idx + 1) % len(members)
                    next_user = members[next_idx]
                else:
                    # Last user not in team, just pick first member
                    next_user = members[0]
            except ValueError:
                next_user = members[0]
        
        new_entry = {"user": next_user, "date": next_date.strftime("%Y-%m-%d")}
        schedule.append(new_entry)
        print(f"Appended: {new_entry}")

def assign_reviewer(pr_number):
    """Assigns the mcore-oncall team as the reviewer for the PR."""
    owner, repo = get_repo_info()
    url = f"{GITHUB_API_URL}/repos/{owner}/{repo}/pulls/{pr_number}/requested_reviewers"
    
    # Assign the oncall team as reviewer
    data = {"team_reviewers": [ACTIVE_ONCALL_TEAM_SLUG]}
    resp = requests.post(url, headers=get_headers(), json=data)
    
    if resp.status_code in [201, 200]:
        print(f"Successfully requested review from team NVIDIA/{ACTIVE_ONCALL_TEAM_SLUG}")
    else:
        print(f"Failed to request review: {resp.status_code} {resp.text}")
        sys.exit(1)

def main():
    parser = argparse.ArgumentParser(description="Manage Oncall Schedule")
    subparsers = parser.add_subparsers(dest="command", required=True)
    
    # Rotate command
    parser_rotate = subparsers.add_parser("rotate", help="Rotate the schedule (remove first, append new)")
    parser_rotate.add_argument("--dry-run", action="store_true", help="Do not save changes")

    # Fill command (just fill up to 12 without rotating - useful for init)
    parser_fill = subparsers.add_parser("fill", help="Fill the schedule to 12 weeks without rotating")
    
    # Assign command
    parser_assign = subparsers.add_parser("assign", help="Assign current oncall to PR")
    parser_assign.add_argument("--pr", type=int, required=True, help="PR number")

    args = parser.parse_args()
    
    owner, _ = get_repo_info()
    
    if args.command == "rotate":
        rotate_schedule(owner, dry_run=args.dry_run)
    elif args.command == "fill":
        schedule = load_schedule()
        ensure_schedule_filled(schedule, owner)
        save_schedule(schedule)
        print("Schedule filled and saved.")
    elif args.command == "assign":
        assign_reviewer(args.pr)

if __name__ == "__main__":
    main()


================================================
FILE: .github/scripts/readme.sh
================================================
#!/bin/bash

cat << 'EOF'
╔══════════════════════════════════════════════════════════════════════╗
║                                                                      ║
║    ███╗   ███╗██████╗ ██████╗ ██╗██████╗  ██████╗ ███████╗         ║
║    ████╗ ████║██╔══██╗██╔══██╗██║██╔══██╗██╔════╝ ██╔════╝         ║
║    ██╔████╔██║██████╔╝██████╔╝██║██║  ██║██║  ███╗█████╗           ║
║    ██║╚██╔╝██║██╔══██╗██╔══██╗██║██║  ██║██║   ██║██╔══╝           ║
║    ██║ ╚═╝ ██║██████╔╝██║  ██║██║██████╔╝╚██████╔╝███████╗         ║
║    ╚═╝     ╚═╝╚═════╝ ╚═╝  ╚═╝╚═╝╚═════╝  ╚═════╝ ╚══════╝         ║
║                                                                      ║
║              H O W   T O :   M B R I D G E   T E S T I N G         ║
╚══════════════════════════════════════════════════════════════════════╝

  MBridge unit tests run automatically on every PR. To also trigger
  functional tests, attach the label and re-run the workflow step.

  ┌─────────────────────────────────────────────────────────────────┐
  │  DEFAULT  │  Unit tests run on every PR (no action needed)      │
  ├─────────────────────────────────────────────────────────────────┤
  │                                                                  │
  │    Every PR  ──►  cicd-mbridge-testing  ──►  unit tests only   │
  │                                                                  │
  └─────────────────────────────────────────────────────────────────┘

  ┌─────────────────────────────────────────────────────────────────┐
  │  STEP 1  │  Attach the label to your PR (for functional tests)  │
  ├─────────────────────────────────────────────────────────────────┤
  │                                                                  │
  │    PR Labels  ──►  [ + Add label ]  ──►  "Run MBridge tests"   │
  │                                                                  │
  └─────────────────────────────────────────────────────────────────┘

  ┌─────────────────────────────────────────────────────────────────┐
  │  STEP 2  │  Re-run this workflow step                           │
  ├─────────────────────────────────────────────────────────────────┤
  │                                                                  │
  │    Actions  ──►  [ Re-run jobs ]  ──►  Re-run failed jobs      │
  │                                                                  │
  └─────────────────────────────────────────────────────────────────┘

  ┌─────────────────────────────────────────────────────────────────┐
  │  RESULT  │  Unit + functional tests run!                        │
  ├─────────────────────────────────────────────────────────────────┤
  │                                                                  │
  │         cicd-mbridge-testing  ◄── unit + functional tests      │
  │                                                                  │
  │         Tests run against MBridge using the merge commit       │
  │         SHA of your pull request.                              │
  │                                                                  │
  └─────────────────────────────────────────────────────────────────┘

                ┌────────────────────────────────────┐
                │  Label present?     NO   → unit    │
                │  Label present?     YES  → unit +  │
                │                           functional│
                └────────────────────────────────────┘

  NOTE: The label must be present BEFORE the re-run is triggered.
        The CI checks for "Run MBridge tests" at runtime.

  NOTE: All MBridge test results are optional — failures do not
        block merging your PR.
EOF


================================================
FILE: .github/scripts/sync_team_usergroups.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Syncs GitHub team membership to Slack user groups.

This script reads members from GitHub teams and updates the corresponding
Slack user groups to match.
"""

import os
import sys
import argparse
import requests

from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError

# Constants
GITHUB_API_URL = "https://api.github.com"

# Teams whose *children* are each synced to their own Slack usergroup
PARENT_TEAM_SLUGS = ["mcore-reviewers"]

# Teams synced directly (the team itself, not its children)
DIRECT_TEAM_SLUGS = ["mcore-engineers"]

# Caches for email and Slack lookups
_email_cache = {}
_slack_id_cache = {}
_usergroups_cache = None


def get_headers():
    """Get GitHub API headers with authentication."""
    token = os.environ.get("GH_TOKEN")
    if not token:
        token = os.environ.get("GITHUB_TOKEN")

    if not token:
        print("Error: GH_TOKEN or GITHUB_TOKEN not set")
        sys.exit(1)

    return {
        "Authorization": f"token {token}",
        "Accept": "application/vnd.github.v3+json",
    }


def get_org():
    """Returns the organization from GITHUB_REPOSITORY env var or default."""
    repo_env = os.environ.get("GITHUB_REPOSITORY", "NVIDIA/Megatron-LM")
    return repo_env.split("/")[0]


def github_team_to_slack_usergroup(team_slug):
    """Convert a GitHub team slug to a Slack usergroup handle.

    Rules:
    - Base pattern: "test" -> "mcore-test"
    - Remove "core-" prefix: "core-test" -> "mcore-test"
    - Remove "megatron-" prefix: "megatron-test" -> "mcore-test"
    - Remove "-and-": "test1-and-test2" -> "mcore-test1-test2"
    - Shorten "mixture-of-experts" to "moe"
    - Shorten "pipeline-parallelism" to "pp"
    - Shorten "reinforcement-learning" to "rl"
    """
    name = team_slug

    # Apply shortenings first (before removing prefixes)
    name = name.replace("mixture-of-experts", "moe")
    name = name.replace("pipeline-parallelism", "pp")
    name = name.replace("reinforcement-learning", "rl")

    # Remove prefixes
    if name.startswith("core-"):
        name = name[5:]  # Remove "core-"
    elif name.startswith("megatron-"):
        name = name[9:]  # Remove "megatron-"
    elif name.startswith("mcore-"):
        name = name[6:]  # Remove "mcore-"

    # Remove "-and-"
    name = name.replace("-and-", "-")

    return f"mcore-{name}"


def get_child_teams(org, parent_team_slug):
    """Fetches child teams of a parent GitHub team."""
    # First get the team ID
    url = f"{GITHUB_API_URL}/orgs/{org}/teams/{parent_team_slug}"
    headers = get_headers()

    resp = requests.get(url, headers=headers)
    if resp.status_code != 200:
        print(f"Error fetching parent team '{parent_team_slug}': {resp.status_code} {resp.text}")
        return []

    parent_team_id = resp.json().get("id")
    if not parent_team_id:
        print(f"Error: Could not get ID for team '{parent_team_slug}'")
        return []

    # Now fetch child teams
    url = f"{GITHUB_API_URL}/orgs/{org}/teams/{parent_team_slug}/teams"
    child_teams = []
    page = 1

    while True:
        resp = requests.get(f"{url}?per_page=100&page={page}", headers=headers)
        if resp.status_code != 200:
            print(f"Error fetching child teams: {resp.status_code} {resp.text}")
            return child_teams

        data = resp.json()
        if not data:
            break

        child_teams.extend([team["slug"] for team in data])
        if len(data) < 100:
            break
        page += 1

    return child_teams


def get_team_members(org, team_slug):
    """Fetches members of the GitHub team."""
    url = f"{GITHUB_API_URL}/orgs/{org}/teams/{team_slug}/members"
    headers = get_headers()

    members = set()
    page = 1
    while True:
        resp = requests.get(f"{url}?per_page=100&page={page}", headers=headers)
        if resp.status_code == 404:
            print(f"Warning: Team '{team_slug}' not found in org '{org}'")
            return set()
        if resp.status_code != 200:
            print(f"Error fetching team members: {resp.status_code} {resp.text}")
            return set()

        data = resp.json()
        if not data:
            break

        members.update([m["login"] for m in data])
        if len(data) < 100:
            break
        page += 1

    return members


def get_user_email(username):
    """Get user's email from GitHub, prioritizing @nvidia.com emails.

    Checks in order:
    1. Public profile email
    2. Recent commits in the repository
    """
    if username in _email_cache:
        return _email_cache[username]

    headers = get_headers()
    public_email = None

    try:
        # 1. Try to get user's public profile email first
        resp = requests.get(f"{GITHUB_API_URL}/users/{username}", headers=headers)
        if resp.status_code == 200:
            user_data = resp.json()
            email = user_data.get('email')
            if email and not email.endswith("@users.noreply.github.com"):
                if email.endswith("@nvidia.com"):
                    _email_cache[username] = email
                    return email
                # Store non-nvidia email as fallback
                public_email = email

        # 2. Check recent commits in the repository for @nvidia.com email
        repo_env = os.environ.get("GITHUB_REPOSITORY", "NVIDIA/Megatron-LM")
        commits_url = f"{GITHUB_API_URL}/repos/{repo_env}/commits?author={username}&per_page=10"
        resp = requests.get(commits_url, headers=headers)

        if resp.status_code == 200:
            commits = resp.json()
            for commit in commits:
                # Get email from commit author
                commit_data = commit.get('commit', {})
                author_data = commit_data.get('author', {})
                email = author_data.get('email')

                if email and not email.endswith("@users.noreply.github.com"):
                    if email.endswith("@nvidia.com"):
                        _email_cache[username] = email
                        print(f"Found @nvidia.com email for {username} from commits")
                        return email
                    elif public_email is None:
                        public_email = email

        # 3. Use public email if found, otherwise fallback
        if public_email:
            _email_cache[username] = public_email
            print(f"Using public email for {username}: {public_email}")
            return public_email

        # Fallback to noreply email
        fallback = f"{username}@users.noreply.github.com"
        _email_cache[username] = fallback
        print(f"Warning: No email found for {username}, using fallback: {fallback}")
        return fallback

    except Exception as e:
        print(f"Warning: Could not get email for {username}: {e}")
        fallback = f"{username}@users.noreply.github.com"
        _email_cache[username] = fallback
        return fallback


def get_slack_client():
    """Get Slack WebClient if token is available."""
    slack_token = os.environ.get("SLACK_TOKEN")
    if not slack_token:
        return None

    return WebClient(token=slack_token)


def get_slack_user_id(slack_client, email):
    """Get Slack user ID from email."""
    if not slack_client:
        return None

    if email in _slack_id_cache:
        return _slack_id_cache[email]

    try:
        response = slack_client.users_lookupByEmail(email=email)
        user_id = response["user"]["id"]
        _slack_id_cache[email] = user_id
        return user_id
    except SlackApiError as e:
        print(f"Warning: Could not find Slack user for {email}: {e.response['error']}")
        _slack_id_cache[email] = None
        return None


def fetch_all_usergroups(slack_client):
    """Fetch all Slack usergroups once and cache them."""
    global _usergroups_cache

    if _usergroups_cache is not None:
        return _usergroups_cache

    if not slack_client:
        _usergroups_cache = {}
        return _usergroups_cache

    try:
        print("Fetching Slack usergroups...")
        response = slack_client.usergroups_list(include_users=True)
        _usergroups_cache = {}
        for usergroup in response.get("usergroups", []):
            handle = usergroup.get("handle")
            if handle:
                _usergroups_cache[handle] = {
                    "id": usergroup.get("id"),
                    "users": usergroup.get("users", []),
                }
        print(f"Fetched {len(_usergroups_cache)} usergroups")
        return _usergroups_cache
    except SlackApiError as e:
        print(f"Warning: Could not list Slack usergroups: {e.response['error']}")
        _usergroups_cache = {}
        return _usergroups_cache


def get_slack_usergroup_id(slack_client, handle):
    """Get Slack usergroup ID from handle."""
    usergroups = fetch_all_usergroups(slack_client)

    if handle in usergroups:
        return usergroups[handle]["id"], usergroups[handle]["users"]

    return None, []


def github_team_to_usergroup_name(team_slug):
    """Convert a GitHub team slug to a Slack usergroup display name.

    Example: "test3" -> "Megatron Core Experts: Test3"
    """
    # Title case each word separated by hyphens, then join with spaces
    words = team_slug.split("-")
    title_cased = " ".join(word.capitalize() for word in words)
    return f"Megatron Core Experts: {title_cased}"


def create_slack_usergroup(slack_client, handle, team_slug):
    """Create a new Slack usergroup.

    Args:
        slack_client: Slack WebClient instance
        handle: The usergroup handle (e.g., "mcore-test")
        team_slug: The GitHub team slug (used for name and description)

    Returns:
        The usergroup ID if created successfully, None otherwise
    """
    global _usergroups_cache

    name = github_team_to_usergroup_name(team_slug)
    description = f'Expert review group "{team_slug}"'

    try:
        print(f"Creating Slack usergroup '@{handle}' with name '{name}'...")
        response = slack_client.usergroups_create(
            name=name,
            handle=handle,
            description=description,
        )
        usergroup = response.get("usergroup", {})
        usergroup_id = usergroup.get("id")

        if usergroup_id:
            # Update cache with new usergroup
            if _usergroups_cache is not None:
                _usergroups_cache[handle] = {
                    "id": usergroup_id,
                    "users": [],
                }
            print(f"Successfully created Slack usergroup '@{handle}'")
            return usergroup_id
        else:
            print(f"Error: Usergroup created but no ID returned")
            return None

    except SlackApiError as e:
        print(f"Error creating Slack usergroup '@{handle}': {e.response['error']}")
        return None


def sync_team_to_usergroup(team_slug, usergroup_handle, dry_run=False):
    """Sync a GitHub team to a Slack usergroup."""
    print(f"\n{'='*60}")
    print(f"Syncing GitHub team '{team_slug}' -> Slack usergroup '@{usergroup_handle}'")
    print(f"{'='*60}")

    org = get_org()
    slack_client = get_slack_client()

    if not slack_client:
        print("Error: Slack token not configured")
        return False

    # 1. Get GitHub team members
    members = get_team_members(org, team_slug)
    if not members:
        print(f"No members found in GitHub team '{team_slug}'")
        return False

    # Filter out service accounts
    members = {m for m in members if not m.startswith("svc")}
    print(f"GitHub team members ({len(members)}): {sorted(members)}")

    # 2. Get Slack user IDs for each member
    slack_user_ids = []
    missing_users = []

    for username in sorted(members):
        email = get_user_email(username)
        slack_id = get_slack_user_id(slack_client, email)
        if slack_id:
            slack_user_ids.append(slack_id)
        else:
            missing_users.append((username, email, "not found in Slack"))

    if missing_users:
        print(f"\nWarning: Could not resolve {len(missing_users)} users:")
        for username, email, reason in missing_users:
            print(f"  - {username}: {reason}" + (f" (tried {email})" if email else ""))

    if not slack_user_ids:
        print(f"Error: No Slack users found for team '{team_slug}'")
        return False

    # 3. Get current Slack usergroup membership (or create if it doesn't exist)
    usergroup_id, current_members = get_slack_usergroup_id(slack_client, usergroup_handle)

    if not usergroup_id:
        print(f"Slack usergroup '@{usergroup_handle}' not found, creating it...")
        if dry_run:
            print(f"Dry run: Would create usergroup '@{usergroup_handle}'")
            current_members = []
        else:
            usergroup_id = create_slack_usergroup(slack_client, usergroup_handle, team_slug)
            if not usergroup_id:
                print(f"Error: Failed to create Slack usergroup '@{usergroup_handle}'")
                return False
            current_members = []

    # 4. Compare and update
    current_set = set(current_members)
    new_set = set(slack_user_ids)

    to_add = new_set - current_set
    to_remove = current_set - new_set

    print(f"\nCurrent usergroup members: {len(current_members)}")
    print(f"New members to set: {len(slack_user_ids)}")
    print(f"  Adding: {len(to_add)} users")
    print(f"  Removing: {len(to_remove)} users")

    if current_set == new_set:
        print("No changes needed - usergroup is already in sync")
        return True

    if dry_run:
        print(f"\nDry run: Would update '@{usergroup_handle}' with {len(slack_user_ids)} members")
        return True

    # 5. Update the usergroup
    try:
        slack_client.usergroups_users_update(
            usergroup=usergroup_id, users=slack_user_ids
        )
        print(f"\nSuccessfully updated '@{usergroup_handle}' with {len(slack_user_ids)} members")
        return True
    except SlackApiError as e:
        print(f"Error updating usergroup: {e.response['error']}")
        return False


def get_team_to_usergroup_mapping(parent_team_slug):
    """Fetch child teams of a parent team and generate the mapping."""
    org = get_org()
    child_teams = get_child_teams(org, parent_team_slug)

    if not child_teams:
        print(f"Error: No child teams found under '{parent_team_slug}'")
        return {}

    mapping = {}
    for team_slug in child_teams:
        usergroup_handle = github_team_to_slack_usergroup(team_slug)
        mapping[team_slug] = usergroup_handle

    return mapping


def sync_all_teams(dry_run=False, parent_teams=None, direct_teams=None):
    """Sync GitHub teams to their Slack usergroups.

    Args:
        parent_teams: List of team slugs whose *children* are each synced.
                      Defaults to PARENT_TEAM_SLUGS.
        direct_teams: List of team slugs synced directly (not their children).
                      Defaults to DIRECT_TEAM_SLUGS.
    """
    if parent_teams is None:
        parent_teams = PARENT_TEAM_SLUGS
    if direct_teams is None:
        direct_teams = DIRECT_TEAM_SLUGS

    team_to_usergroup = {}

    for parent_slug in parent_teams:
        print(f"Fetching child teams of '{parent_slug}'...")
        mapping = get_team_to_usergroup_mapping(parent_slug)
        team_to_usergroup.update(mapping)

    for team_slug in direct_teams:
        usergroup_handle = github_team_to_slack_usergroup(team_slug)
        team_to_usergroup[team_slug] = usergroup_handle

    if not team_to_usergroup:
        return False

    print(f"Found {len(team_to_usergroup)} teams to sync")
    print("\nTeam to usergroup mapping:")
    for team, usergroup in sorted(team_to_usergroup.items()):
        print(f"  {team} -> @{usergroup}")

    results = {"success": [], "failed": []}

    for team_slug, usergroup_handle in team_to_usergroup.items():
        success = sync_team_to_usergroup(team_slug, usergroup_handle, dry_run=dry_run)
        if success:
            results["success"].append(team_slug)
        else:
            results["failed"].append(team_slug)

    # Summary
    print(f"\n{'='*60}")
    print("SYNC SUMMARY")
    print(f"{'='*60}")
    print(f"Successful: {len(results['success'])}")
    print(f"Failed: {len(results['failed'])}")

    if results["failed"]:
        print(f"\nFailed teams: {', '.join(results['failed'])}")
        return False

    return True


def main():
    parser = argparse.ArgumentParser(
        description="Sync GitHub team membership to Slack user groups"
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Show what would be done without making changes",
    )
    parser.add_argument(
        "--list",
        action="store_true",
        help="List all configured team-to-usergroup mappings",
    )
    parser.add_argument(
        "--parent-team",
        action="append",
        dest="parent_teams",
        metavar="SLUG",
        help=(
            "Sync all children of this GitHub team (can be repeated). "
            f"Defaults to: {PARENT_TEAM_SLUGS}"
        ),
    )
    parser.add_argument(
        "--team",
        action="append",
        dest="direct_teams",
        metavar="SLUG",
        help=(
            "Sync this GitHub team directly (can be repeated). "
            f"Defaults to: {DIRECT_TEAM_SLUGS}"
        ),
    )

    args = parser.parse_args()

    # Use CLI values when provided, otherwise fall back to module-level defaults
    parent_teams = args.parent_teams if args.parent_teams is not None else PARENT_TEAM_SLUGS
    direct_teams = args.direct_teams if args.direct_teams is not None else DIRECT_TEAM_SLUGS

    if args.list:
        team_to_usergroup = {}
        for parent_slug in parent_teams:
            print(f"Fetching child teams of '{parent_slug}'...")
            team_to_usergroup.update(get_team_to_usergroup_mapping(parent_slug))
        for team_slug in direct_teams:
            team_to_usergroup[team_slug] = github_team_to_slack_usergroup(team_slug)
        if not team_to_usergroup:
            sys.exit(1)
        print("\nTeam-to-usergroup mappings:")
        print(f"{'GitHub Team':<35} {'Slack Usergroup':<30}")
        print("-" * 65)
        for team, usergroup in sorted(team_to_usergroup.items()):
            print(f"{team:<35} @{usergroup:<29}")
        return

    success = sync_all_teams(
        dry_run=args.dry_run, parent_teams=parent_teams, direct_teams=direct_teams
    )
    sys.exit(0 if success else 1)


if __name__ == "__main__":
    main()


================================================
FILE: .github/workflows/_build_test_publish_wheel.yml
================================================
on:
  workflow_call:
    inputs:
      ref:
        required: false
        description: Ref (SHA or branch) to release
        type: string
        default: ${{ github.sha }}
      dry-run:
        required: false
        description: Upload to PyPy Test instance
        type: boolean
        default: true
      no-publish:
        required: false
        description: Do not publish the wheel
        type: boolean
        default: true
    secrets:
      TWINE_PASSWORD:
        required: true

jobs:
  build-and-test-wheels:
    strategy:
      fail-fast: false
      matrix:
        include:
          - PACKAGE: megatron-core
            PLATFORM: arm64
            IMAGE: quay.io/pypa/manylinux_2_28_aarch64
          - PACKAGE: megatron-core
            PLATFORM: amd64
            IMAGE: quay.io/pypa/manylinux_2_28_x86_64
          - PACKAGE: megatron-fsdp
            IMAGE: quay.io/pypa/manylinux_2_28_x86_64
            PLATFORM: amd64
    runs-on: ${{ matrix.PLATFORM == 'amd64' && 'ubuntu-22.04' || 'ubuntu-22.04-arm' }}
    env:
      PACKAGE: ${{ matrix.PACKAGE }}
      IMAGE: ${{ matrix.IMAGE }}
      PLATFORM: ${{ matrix.PLATFORM }}
      PUBLISH_DRYRUN: ${{ inputs.dry-run }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6
        with:
          ref: ${{ inputs.ref }}

      - name: Build wheel
        id: build-wheel
        run: |
          set -x

          if [ "$PACKAGE" = "megatron-core" ]; then
            ROOTDIR="megatron/core"
            BUILD_DIR="."
          elif [ "$PACKAGE" = "megatron-fsdp" ]; then
            ROOTDIR="megatron/core/distributed/fsdp/src/megatron_fsdp"
            BUILD_DIR="megatron/core/distributed/fsdp/src"
          else
            echo Unknown package: $PACKAGE
            exit 1
          fi

          if [ "$PUBLISH_DRYRUN" = "true" ]; then
            PRE_RELEASE=$(sed -n "s/.*PRE_RELEASE = '\(.*\)'/\1/p" $ROOTDIR/package_info.py)
            sed -i "/^PRE_RELEASE/c\PRE_RELEASE = '${PRE_RELEASE}.dev$((RANDOM % 900000 + 100000))'" $ROOTDIR/package_info.py
          fi

          pushd $BUILD_DIR
            rm LICENSE || true
            docker run --rm -v $(pwd):/workspace -w /workspace $IMAGE bash -c '\
              for python_version in cp310 cp311 cp312 cp313; do \
                /opt/python/${python_version}-${python_version}/bin/pip install --upgrade "setuptools<80.0.0,>=77.0.0" build; \
              done && \
              for python_version in cp310 cp311 cp312 cp313; do \
                /opt/python/${python_version}-${python_version}/bin/python -m build; \
              done \
            '

            PLATFORM_WHEELS=$(find dist -name "*.whl" -not -name "*-none-any.whl")
            if [ -n "$PLATFORM_WHEELS" ]; then
                echo "Found platform wheels to repair: $PLATFORM_WHEELS"
                docker run --rm -v $(pwd):/workspace -w /workspace $IMAGE auditwheel repair $PLATFORM_WHEELS
                docker run --rm -v $(pwd):/workspace -w /workspace $IMAGE rm -rf dist/*.whl
                docker run --rm -v $(pwd):/workspace -w /workspace $IMAGE cp -a wheelhouse/* dist/
            fi
          popd

          pushd $ROOTDIR
            EXPECTED_RELEASE_NUMBER=$(python -c "import package_info; print(package_info.__version__)")
          popd

          echo "expected-release-number=$EXPECTED_RELEASE_NUMBER" | tee -a "${GITHUB_OUTPUT}"

          if [ "$PACKAGE" = "megatron-fsdp" ]; then
            mkdir -p dist/
            cp -a megatron/core/distributed/fsdp/src/dist/* dist/
          fi

          ls -al dist/

      - name: Test wheels
        run: |
          ls -al dist/

          if [ "$PACKAGE" = "megatron-core" ]; then
            ROOTPATH="megatron.core"
            WHEEL_PREFIX="megatron_core"
          elif [ "$PACKAGE" = "megatron-fsdp" ]; then
            ROOTPATH="megatron_fsdp"
            WHEEL_PREFIX="megatron_fsdp"
          else
            echo Unknown package: $PACKAGE
            exit 1
          fi

          if [ "$PACKAGE" = "megatron-core" ]; then
            if [[ "$PLATFORM" == "arm64" ]]; then
              for file in dist/$WHEEL_PREFIX*cp310*aarch64.whl; do
                pip install --no-cache-dir "$file"
              done
            else
              for file in dist/$WHEEL_PREFIX*cp310*x86_64.whl; do
                pip install --no-cache-dir "$file"
              done
            fi
          else
            pip install --no-cache-dir dist/$WHEEL_PREFIX*.whl
          fi

          sudo rm -rf megatron/

          RELEASE_NUMBER=$(python -c "import $ROOTPATH; print($ROOTPATH.__version__)")
          test "${{ steps.build-wheel.outputs.expected-release-number }}" == "$RELEASE_NUMBER"

      - name: Upload wheels
        uses: actions/upload-artifact@v6
        with:
          name: wheels-${{ matrix.PACKAGE }}-${{ matrix.PLATFORM }}-${{ inputs.dry-run && 'dry-run' || 'release' }}
          path: dist/

  publish-wheels:
    needs: [build-and-test-wheels]
    runs-on: ubuntu-latest
    if: inputs.no-publish == false
    strategy:
      fail-fast: false
      matrix:
        include:
          - PACKAGE: megatron-core
            PLATFORM: arm64
          - PACKAGE: megatron-core
            PLATFORM: amd64
          - PACKAGE: megatron-fsdp
            PLATFORM: amd64
    env:
      PACKAGE: ${{ matrix.PACKAGE }}
    steps:
      - name: Download wheels
        uses: actions/download-artifact@v7
        with:
          name: wheels-${{ matrix.PACKAGE }}-${{ matrix.PLATFORM }}-${{ inputs.dry-run && 'dry-run' || 'release' }}
          path: dist/
          merge-multiple: true

      - name: Publish wheels
        env:
          TWINE_USERNAME: __token__
          TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
          TWINE_REPOSITORY: ${{ (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) && 'pypi' || 'testpypi' }}
          PLATFORM: ${{ matrix.PLATFORM }}
        run: |

          # Delete sdist for arm64 since we already upload it with amd64.
          if [ "$PLATFORM" == "arm64" ]; then
            rm dist/*.tar.gz
          fi

          ls -al dist/
          pip install twine
          twine upload \
            --verbose \
            -r $TWINE_REPOSITORY \
            -u $TWINE_USERNAME \
            -p $TWINE_PASSWORD \
            dist/*


================================================
FILE: .github/workflows/_release_library.yml
================================================
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: "Release"

defaults:
  run:
    shell: bash -x -e -u -o pipefail {0}

on:
  workflow_call:
    inputs:
      release-ref:
        required: true
        description: Ref (SHA or branch) to release
        type: string
      dry-run:
        type: boolean
        required: true
        description: Do not publish a wheel and GitHub release.
      version-bump-branch:
        type: string
        required: true
        description: Branch to target for version bump
      create-gh-release:
        required: false
        description: Create a GitHub release
        type: boolean
        default: true
      gh-release-use-changelog-builder:
        required: false
        description: Use release-changelog-builder-action to dynamically build changelog
        type: boolean
        default: true
      gh-release-changelog-config:
        required: false
        description: Path to changelog builder configuration file
        type: string
        default: ".github/workflows/config/changelog-config.json"
      gh-release-from-tag:
        required: false
        description: Starting tag for changelog builder (leave empty for auto-detect)
        type: string
        default: ""
      publish-docs:
        required: false
        description: Publish documentation to S3 after release
        type: boolean
        default: true
    secrets:
      TWINE_PASSWORD:
        required: true
      SLACK_WEBHOOK:
        required: true
      PAT:
        required: true
      AWS_ASSUME_ROLE_ARN:
        required: true
      AWS_ACCESS_KEY_ID:
        required: true
      AWS_SECRET_ACCESS_KEY:
        required: true
      AKAMAI_HOST:
        required: true
      AKAMAI_CLIENT_TOKEN:
        required: true
      AKAMAI_CLIENT_SECRET:
        required: true
      AKAMAI_ACCESS_TOKEN:
        required: true
      S3_BUCKET_NAME:
        required: true

permissions:
  contents: write # To read repository content
  pull-requests: write # To create PR(s)

jobs:
  build-test-publish-wheels-dry-run:
    uses: ./.github/workflows/_build_test_publish_wheel.yml
    with:
      dry-run: true
      ref: ${{ inputs.release-ref }}
      no-publish: true
    secrets:
      TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}

  bump-next-version:
    runs-on: ubuntu-latest
    needs: build-test-publish-wheels-dry-run
    if: |
      (
        success() || !failure()
      )
      && !cancelled()
    outputs:
      release-version: ${{ steps.bump-version-mcore.outputs.release-version }}
    env:
      IS_DRY_RUN: ${{ inputs.dry-run }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6
        with:
          path: ${{ github.run_id }}
          token: ${{ secrets.PAT }}
          fetch-depth: 0
          fetch-tags: true
          ref: ${{ inputs.release-ref }}
      - name: Bump version MCore
        id: bump-version-mcore
        env:
          SRC_DIR: ""
          PYPROJECT_NAME: "megatron.core"
        run: |
          set +u
          cd ${{ github.run_id }}

          PACKAGE_INFO_FILE="$SRC_DIR${PYPROJECT_NAME//.//}/package_info.py"

          MAJOR=$(cat $PACKAGE_INFO_FILE | awk '/^MAJOR = /' | awk -F"= " '{print $2}')
          MINOR=$(cat $PACKAGE_INFO_FILE | awk '/^MINOR = /' | awk -F"= " '{print $2}')
          PATCH=$(cat $PACKAGE_INFO_FILE | awk '/^PATCH = /' | awk -F"= " '{print $2}')
          PRERELEASE=$(cat $PACKAGE_INFO_FILE | awk '/^PRE_RELEASE = /' | awk -F"= " '{print $2}' | tr -d '"' | tr -d "'")

          echo "release-version=$MAJOR.$MINOR.$PATCH$PRERELEASE" | tee -a "$GITHUB_OUTPUT"

          if [[ "$PRERELEASE" != "" ]]; then
            if [[ "$PRERELEASE" == *rc* ]]; then
              NEXT_PATCH=$PATCH
              NEXT_PRERELEASE=rc$((${PRERELEASE#rc} + 1))
            elif [[ "$PRERELEASE" == *a* ]]; then
              NEXT_PATCH=$PATCH
              NEXT_PRERELEASE=a$((${PRERELEASE#a} + 1))
            else
              echo "Unknown pre-release: $PRERELEASE"
              exit 1
            fi
          else
            NEXT_PATCH=$((${PATCH} + 1))
            NEXT_PRERELEASE=$PRERELEASE
          fi

          sed -i "/^PATCH/c\PATCH = $NEXT_PATCH" $PACKAGE_INFO_FILE
          sed -i "/^PRE_RELEASE/c\PRE_RELEASE = \"$NEXT_PRERELEASE\"" $PACKAGE_INFO_FILE

          echo "version=$MAJOR.$MINOR.$NEXT_PATCH$NEXT_PRERELEASE" | tee -a "$GITHUB_OUTPUT"

      - name: Bump version MFSDP
        id: bump-version-mfsdp
        env:
          SRC_DIR: "megatron/core/distributed/fsdp/src/"
          PYPROJECT_NAME: "megatron_fsdp"
        run: |
          set +u

          cd ${{ github.run_id }}

          PACKAGE_INFO_FILE="$SRC_DIR${PYPROJECT_NAME//.//}/package_info.py"

          MAJOR=$(cat $PACKAGE_INFO_FILE | awk '/^MAJOR = /' | awk -F"= " '{print $2}')
          MINOR=$(cat $PACKAGE_INFO_FILE | awk '/^MINOR = /' | awk -F"= " '{print $2}')
          PATCH=$(cat $PACKAGE_INFO_FILE | awk '/^PATCH = /' | awk -F"= " '{print $2}')
          PRERELEASE=$(cat $PACKAGE_INFO_FILE | awk '/^PRE_RELEASE = /' | awk -F"= " '{print $2}' | tr -d '"' | tr -d "'")

          if [[ "$PRERELEASE" != "" ]]; then
            if [[ "$PRERELEASE" == *rc* ]]; then
              NEXT_PATCH=$PATCH
              NEXT_PRERELEASE=rc$((${PRERELEASE#rc} + 1))
            elif [[ "$PRERELEASE" == *a* ]]; then
              NEXT_PATCH=$PATCH
              NEXT_PRERELEASE=a$((${PRERELEASE#a} + 1))
            else
              echo "Unknown pre-release: $PRERELEASE"
              exit 1
            fi
          else
            NEXT_PATCH=$((${PATCH} + 1))
            NEXT_PRERELEASE=$PRERELEASE
          fi

          sed -i "/^PATCH/c\PATCH = $NEXT_PATCH" $PACKAGE_INFO_FILE
          sed -i "/^PRE_RELEASE/c\PRE_RELEASE = \"$NEXT_PRERELEASE\"" $PACKAGE_INFO_FILE

          echo "version=$MAJOR.$MINOR.$NEXT_PATCH$NEXT_PRERELEASE" | tee -a "$GITHUB_OUTPUT"

      - name: Create and push deployment branch
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          cd ${{ github.run_id }}

          TMP_BRANCH="deploy-release/$(uuidgen)"
          git config --global user.name "github-actions[bot]"
          git config --global user.email "github-actions[bot]@users.noreply.github.com"
          git checkout -b "$TMP_BRANCH"
          git add -A .
          git commit -m "beep boop 🤖: Bumping versions" || echo "No changes to commit"
          git push -u origin "$TMP_BRANCH"
          echo "TMP_BRANCH=$TMP_BRANCH" | tee -a $GITHUB_ENV

          # Create PR to collect app based status checks that run on PRs only
          # (like DCO check)
          PR_URL=$(gh pr create \
            --base ${{ inputs.version-bump-branch }} \
            --head $TMP_BRANCH \
            --title "beep boop 🤖: Bumping versions" \
            --body "This is an automated PR to bump versions.")

          # Extract PR number from URL
          PR_NUMBER=$(echo $PR_URL | grep -o '[0-9]*$')

      - name: Wait for status checks on tmp branch
        uses: actions/github-script@v8
        id: wait-status
        with:
          github-token: ${{ secrets.PAT }}
          script: |
            const branch = process.env.TMP_BRANCH;
            const owner = context.repo.owner;
            const repo = context.repo.repo;

            // Get latest commit SHA of branch
            const { data: refData } = await github.rest.git.getRef({
              owner,
              repo,
              ref: `heads/${branch}`,  // note: no 'refs/' prefix here
            });

            const sha = refData.object.sha;

            console.log(`Polling status for commit SHA: ${sha}`);

            let checksPassed = false;
            let maxAttempts = 30;
            let attempt = 0;
            const delay = ms => new Promise(res => setTimeout(res, ms));

            while (!checksPassed && attempt < maxAttempts) {
              attempt++;

              // Use commit SHA instead of branch ref
              const { data: status } = await github.rest.repos.getCombinedStatusForRef({
                owner,
                repo,
                ref: sha,
              });

              const { data: checks } = await github.rest.checks.listForRef({
                owner,
                repo,
                ref: sha,
              });

              const allStatuses = status.statuses;
              const allChecks = checks.check_runs;

              if (allStatuses.length === 0 && allChecks.length === 0) {
                console.log(`Attempt ${attempt}: No checks or statuses yet. Waiting...`);
                await delay(10000);
                continue;
              }

              const statusesOk = allStatuses.every(s => s.state === 'success');
              const checksOk = allChecks.every(c => c.status === 'completed');

              if (statusesOk && checksOk) {
                console.log('✅ All checks passed.');
                checksPassed = true;
                break
              }

              console.log(`Attempt ${attempt}: Checks not complete yet. Waiting...`);
              await delay(10000);
            }

            if (!checksPassed) {
              core.setFailed('❌ Status checks did not pass in time');
            }

      - name: Merge into ${{ inputs.version-bump-branch }}
        run: |
          cd ${{ github.run_id }}

          git config user.name "github-actions[bot]"
          git config user.email "github-actions[bot]@users.noreply.github.com"

          CMD=$(echo -E 'git push origin ${{ inputs.version-bump-branch }}')

          if [[ "$IS_DRY_RUN" == "true" ]]; then
            echo "dry-run enabled, would have run: $CMD"
          else
            # Here we account for potential race conditions from multiple concurrent releases.
            # Those can be legit (operating on different packages within the monorepo, for example)
            # but the pushes would be still rejected purely because of git's inability to
            # push non-fast-forward updates to the branch. In this case we would need to let
            # a retry.
            git fetch origin ${{ inputs.version-bump-branch }}
            git checkout ${{ inputs.version-bump-branch }}
            git merge ${{ env.TMP_BRANCH }}

            for attempt in {1..3}; do
              if eval "$CMD"; then
                echo "Git push succeeded on attempt $attempt"
                break
              else
                echo "Git push failed on attempt $attempt"
                if [[ $attempt -lt 3 ]]; then
                  sleep $((RANDOM % 3 + 1))
                  # We refetch, reset and re-merge. Note resetting because the local
                  # branch is "contaminated" with previous merge attempt.
                  git fetch origin ${{ inputs.version-bump-branch }}
                  git reset --hard origin/${{ inputs.version-bump-branch }}
                  git merge ${{ env.TMP_BRANCH }}
                else
                  echo "Git push failed after 3 attempts"
                  exit 1
                fi
              fi
            done
          fi

      - name: Delete ${{ env.TMP_BRANCH }} branch
        if: always()
        run: |
          cd ${{ github.run_id }}
          git push -d origin ${{ env.TMP_BRANCH }}

  build-test-publish-wheels:
    needs: [bump-next-version]
    uses: ./.github/workflows/_build_test_publish_wheel.yml
    with:
      dry-run: false
      ref: ${{ inputs.release-ref }}
      no-publish: false
    secrets:
      TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}

  create-gh-release:
    needs: [build-test-publish-wheels, bump-next-version]
    runs-on: ubuntu-latest
    if: |
      (
        success() || !failure()
      )
      && inputs.create-gh-release == true
      && !cancelled()
    outputs:
      is-release-candidate: ${{ steps.version-number.outputs.is-release-candidate }}
    env:
      REPOSITORY: ${{ github.repository }}
      PROJECT_NAME: Megatron Core
      VERSION: ${{ needs.bump-next-version.outputs.release-version }}
      TAG_PREFIX: core_
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6
        with:
          path: ${{ github.run_id }}
          ref: ${{ inputs.release-ref }}
          token: ${{ secrets.PAT || secrets.GITHUB_TOKEN }}

      - name: Determine fromTag for changelog
        id: determine-from-tag
        if: inputs.gh-release-use-changelog-builder == true
        run: |
          cd ${{ github.run_id }}

          # If gh-release-from-tag is provided, use it
          if [[ -n "${{ inputs.gh-release-from-tag }}" ]]; then
            FROM_TAG="${{ inputs.gh-release-from-tag }}"
            echo "Using provided fromTag: $FROM_TAG"
          else
            # Get the most recent tag
            FROM_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
            if [[ -z "$FROM_TAG" ]]; then
              echo "No previous tags found, leaving fromTag empty"
            else
              echo "Auto-detected most recent tag: $FROM_TAG"
            fi
          fi

          echo "from-tag=$FROM_TAG" >> $GITHUB_OUTPUT

      - name: Build Changelog
        id: build-changelog
        if: inputs.gh-release-use-changelog-builder == true
        uses: mikepenz/release-changelog-builder-action@v6.1.0
        env:
          GITHUB_TOKEN: ${{ secrets.PAT || secrets.GITHUB_TOKEN }}
        with:
          configuration: ${{ github.run_id }}/${{ inputs.gh-release-changelog-config }}
          owner: ${{ github.repository_owner }}
          repo: ${{ github.event.repository.name }}
          ignorePreReleases: "false"
          failOnError: "false"
          fromTag: ${{ steps.determine-from-tag.outputs.from-tag }}
          toTag: ${{ inputs.release-ref }}
          mode: ${{ inputs.gh-release-changelog-mode }}

      - name: Create release
        id: version-number
        env:
          SHA: ${{ inputs.release-ref }}
          GH_TOKEN: ${{ secrets.PAT }}
          IS_DRY_RUN: ${{ inputs.dry-run }}
          BUILT_CHANGELOG: ${{ steps.build-changelog.outputs.changelog }}
        run: |
          cd ${{ github.run_id }}

          IS_RELEASE_CANDIDATE=$([[ "$VERSION" == *rc* ]] && echo "true" || echo "false")
          IS_ALPHA=$([[ "$VERSION" == *a* ]] && echo "true" || echo "false")
          IS_PRERELEASE=$([[ "$IS_RELEASE_CANDIDATE" == "true" || "$IS_ALPHA" == "true" ]] && echo "true" || echo "false")
          NAME="NVIDIA $PROJECT_NAME ${VERSION}"

          # Use built changelog if available, otherwise fall back to CHANGELOG.md
          if [[ -n "$BUILT_CHANGELOG" ]]; then
            CHANGELOG="$BUILT_CHANGELOG"
          elif [[ "$IS_RELEASE_CANDIDATE" == "true" ]]; then
            DATE=$(date +"%Y-%m-%d")
            CHANGELOG="Prerelease: $NAME ($DATE)"
          else
            CHANGELOG=$(awk '/^## '"$NAME"'/{flag=1; next} /^## /{flag=0} flag' CHANGELOG.md)
            CHANGELOG=$(echo "$CHANGELOG" | sed '/./,$!d' | sed ':a;N;$!ba;s/\n$//')
          fi

          echo "is-release-candidate=$IS_RELEASE_CANDIDATE" | tee -a "$GITHUB_OUTPUT"

          PAYLOAD=$(jq -nc \
                      --arg TAG_NAME "${TAG_PREFIX}v${VERSION}" \
                      --arg CI_COMMIT_BRANCH "$SHA" \
                      --arg NAME "$NAME" \
                      --arg BODY "$CHANGELOG" \
                      --argjson PRERELEASE "$IS_PRERELEASE" \
                      '{
                        "tag_name": $TAG_NAME,
                        "target_commitish": $CI_COMMIT_BRANCH,
                        "name": $NAME,
                        "body": $BODY,
                        "draft": false,
                        "prerelease": $PRERELEASE,
                        "generate_release_notes": false
                      }'
                  )
          echo -E "$PAYLOAD" > payload.txt

          CMD=$(echo -E 'curl -L \
            -X POST \
            -H "Accept: application/vnd.github+json" \
            -H "Authorization: Bearer '"$GH_TOKEN"'" \
            -H "X-GitHub-Api-Version: 2022-11-28" \
            https://api.github.com/repos/'"$REPOSITORY"'/releases \
            -d @payload.txt
          ')

          if [[ "$IS_DRY_RUN" == "true" ]]; then
            echo -E "$CMD"
          else
            eval "$CMD"
          fi

  publish-docs:
    needs: [bump-next-version, create-gh-release]
    uses: ./.github/workflows/release-docs.yml
    if: |
      (
        success() || !failure()
      )
      && inputs.publish-docs == true
      && !cancelled()
    with:
      dry-run: ${{ inputs.dry-run }}
      publish-as-latest: true
      docs-version-override: ${{ needs.bump-next-version.outputs.release-version }}
      build-docs-ref: ${{ inputs.release-ref }}
    secrets: inherit

  notify:
    needs: [build-test-publish-wheels, create-gh-release]
    runs-on: ubuntu-latest
    env:
      GH_URL: https://github.com/${{ github.repository }}/releases/tag/v${{ needs.build-test-publish-wheels.outputs.version }}
      PYPI_URL: https://${{ inputs.dry-run == true && 'test.' || '' }}pypi.org/project/${{ needs.build-test-publish-wheels.outputs.pypi-name }}/${{ needs.build-test-publish-wheels.outputs.version }}/
      PROJECT_NAME: Megatron Core
      VERSION: ${{ needs.build-test-publish-wheels.outputs.version }}
    steps:
      - name: Checkout
        uses: actions/checkout@v6
        with:
          repository: NVIDIA-NeMo/FW-CI-templates
          ref: v0.17.0
          path: send-slack-alert

      - name: Send Slack alert
        uses: ./send-slack-alert/.github/actions/send-slack-alert
        env:
          MESSAGE: |
            ${{ inputs.dry-run == true && 'This is a dry-run, nothing actually happened: ' || '' }}We have released `${{ env.VERSION }}` of `NVIDIA ${{ env.PROJECT_NAME }}` 🚀✨🎉

            • <${{ env.GH_URL }}|GitHub release>
            • <${{ env.PYPI_URL }}|PyPi release>

        with:
          message: ${{ env.MESSAGE }}
          webhook: ${{ secrets.SLACK_WEBHOOK }}


================================================
FILE: .github/workflows/_update_dependencies.yml
================================================
name: ~Update dependencies template
on:
  workflow_call:
    inputs:
      target-branch:
        required: true
        type: string
        description: "The target branch to bump"
    secrets:
      PAT:
        required: true
      SSH_KEY:
        required: true
      SSH_PWD:
        required: true

jobs:
  pre-flight:
    runs-on: ubuntu-latest
    outputs:
      bump-branch: bump-ci-container-${{ steps.ref.outputs.date }}-${{ inputs.target-branch }}
      date: ${{ steps.ref.outputs.date }}
    steps:
      - name: Get date
        id: ref
        run: echo "date=$(date +%F)" | tee -a "$GITHUB_OUTPUT"

  update-lockfile:
    runs-on: linux-amd64-cpu16
    needs: [pre-flight]
    env:
      SOURCE_BRANCH: ${{ needs.pre-flight.outputs.bump-branch }}
      TARGET_BRANCH: ${{ inputs.target-branch }}
    steps:
      - name: Checkout repo
        uses: actions/checkout@v6
        with:
          ref: ${{ env.TARGET_BRANCH }}

      - name: Mock test data
        run: mkdir -p assets/

      - name: Fetch NGC Version
        id: ngc-version
        run: |
          NGC_VERSION=$(cat docker/.ngc_version.dev)
          echo "NGC_VERSION=${NGC_VERSION}" | tee -a "$GITHUB_OUTPUT"

      - name: Build container
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          docker build -f docker/Dockerfile.ci.dev --build-arg FROM_IMAGE_NAME="${{ steps.ngc-version.outputs.NGC_VERSION }}" --target=main -t megatron-core .

      - name: Create bump branch if not exists
        run: |
          if ! git ls-remote --exit-code origin $SOURCE_BRANCH; then
            git checkout -b $SOURCE_BRANCH $TARGET_BRANCH
            git push origin $SOURCE_BRANCH
          fi

      - name: Checkout repo
        uses: actions/checkout@v6
        with:
          ref: ${{ env.SOURCE_BRANCH }}

      - name: Upgrade lock file
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          docker run \
          --rm \
          -v $(pwd):/workspace \
          -w /workspace \
          -e GH_TOKEN=${{ secrets.PAT }} \
          megatron-core \
          bash -c 'uv lock --upgrade'

      - name: Upload lock file
        uses: actions/upload-artifact@v6
        with:
          name: lock-file-${{ env.SOURCE_BRANCH }}
          path: uv.lock

  create-pr:
    needs: [update-lockfile, pre-flight]
    runs-on: ubuntu-latest
    env:
      SOURCE_BRANCH: ${{ needs.pre-flight.outputs.bump-branch }}
      TARGET_BRANCH: ${{ inputs.target-branch }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v6
        with:
          token: ${{ secrets.PAT }}
          ref: ${{ env.TARGET_BRANCH }}

      - name: Rebase against ${{ env.SOURCE_BRANCH }}
        run: |
          if git ls-remote --exit-code origin ${{ env.SOURCE_BRANCH }}; then
            git fetch origin ${{ env.SOURCE_BRANCH }}
            git rebase -S origin/${{ env.SOURCE_BRANCH }}
          fi

      - name: Download lock file
        uses: actions/download-artifact@v7
        with:
          name: lock-file-${{ env.SOURCE_BRANCH }}

      - name: Create Bump PR
        uses: peter-evans/create-pull-request@v8
        id: create-pull-request
        env:
          title: "chore(beep boop 🤖): Bump `uv.lock` (${{ inputs.target-branch}}) (${{ needs.pre-flight.outputs.date }})"
        with:
          branch: ${{ env.SOURCE_BRANCH }}
          base: ${{ env.TARGET_BRANCH }}
          title: ${{ env.title }}
          token: ${{ secrets.PAT }}
          body: |
            🚀 PR to bump `uv.lock` in `${{ inputs.target-branch }}`.  

            📝 Please remember the following to-do's before merge: 
            - [ ] Verify the presubmit CI  

            🙏 Please merge this PR only if the CI workflow completed successfully.
          commit-message: ${{ env.title }}
          signoff: true
          committer: "github-actions[bot] <github-actions[bot]@users.noreply.github.com>"

      - name: Post /ok to test comment
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          PR_NUMBER="${{ steps.create-pull-request.outputs.pull-request-number }}"
          if [ -z "$PR_NUMBER" ]; then
            echo "No PR was created, skipping comment"
            exit 0
          fi
          SHA="${{ steps.create-pull-request.outputs.pull-request-head-sha }}"
          gh pr comment "$PR_NUMBER" --body "/ok to test $SHA"

      - name: Wait for CI checks
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          PR_NUMBER="${{ steps.create-pull-request.outputs.pull-request-number }}"
          if [ -z "$PR_NUMBER" ]; then
            echo "No PR was created, skipping wait"
            exit 0
          fi

          # Fetch required status checks from branch protection rules
          REQUIRED_CHECKS=$(gh api \
            "repos/${{ github.repository }}/branches/${{ env.TARGET_BRANCH }}/protection/required_status_checks" \
            --jq '.checks[].context' 2>/dev/null \
            || gh api \
            "repos/${{ github.repository }}/branches/${{ env.TARGET_BRANCH }}/protection/required_status_checks" \
            --jq '.contexts[]' 2>/dev/null \
            || true)

          if [ -z "$REQUIRED_CHECKS" ]; then
            echo "No branch protection rules found for ${{ env.TARGET_BRANCH }}, skipping wait"
            exit 0
          fi

          echo "Required checks from branch protection:"
          echo "$REQUIRED_CHECKS"

          echo "Waiting for required checks to complete on PR #$PR_NUMBER..."
          i=0
          INITIALIZED=false
          while true; do
            i=$((i + 1))
            CHECKS_JSON=$(gh pr checks "$PR_NUMBER" --json name,state 2>/dev/null || echo "[]")
            ALL_DONE=true
            FAILED_CHECKS=""
            while IFS= read -r check; do
              CHECK_STATE=$(echo "$CHECKS_JSON" | jq -r --arg name "$check" '.[] | select(.name == $name) | .state // ""' | tr '[:upper:]' '[:lower:]')
              case "$CHECK_STATE" in
                *success*|*pass*|*skip*|*neutral*) ;;
                *pending*|*queued*|*progress*|*waiting*|*request*|"")
                  ALL_DONE=false
                  INITIALIZED=true
                  break
                  ;;
                *)
                  if [ "$INITIALIZED" = "true" ]; then
                    FAILED_CHECKS="${FAILED_CHECKS}  - ${check} (${CHECK_STATE})"$'\n'
                  else
                    ALL_DONE=false
                  fi
                  ;;
              esac
            done <<< "$REQUIRED_CHECKS"
            if [ "$ALL_DONE" = "true" ]; then
              if [ -n "$FAILED_CHECKS" ]; then
                echo "Required check(s) did not pass:"
                echo "$FAILED_CHECKS"
                exit 1
              fi
              echo "All required checks passed!"
              break
            fi
            echo "Checks not yet complete (attempt $i), retrying in 30s..."
            sleep 30
          done

      - name: Merge PR
        env:
          title: "chore(beep boop 🤖): Bump `uv.lock` (${{ env.TARGET_BRANCH}}) (${{ needs.pre-flight.outputs.date }})"
        run: |
          PR_NUMBER="${{ steps.create-pull-request.outputs.pull-request-number }}"
          if [ -z "$PR_NUMBER" ]; then
            echo "No PR was created, skipping merge"
            exit 0
          fi
          git config user.name "github-actions[bot]"
          git config user.email "github-actions[bot]@users.noreply.github.com"
          git fetch origin ${{ env.SOURCE_BRANCH }}
          git fetch origin ${{ env.TARGET_BRANCH }}
          git checkout ${{ env.TARGET_BRANCH }}
          git merge --squash origin/${{ env.SOURCE_BRANCH }}
          git commit -m "${{ env.title }}"
          git pull --rebase origin ${{ env.TARGET_BRANCH }}
          git push origin ${{ env.TARGET_BRANCH }}
          git push origin --delete ${{ env.SOURCE_BRANCH }}


================================================
FILE: .github/workflows/auto-assign-milestone.yml
================================================
name: Auto-assign Milestone to PR

on:
  push:
    branches:
      - "pull-request/[0-9]+"

permissions:
  contents: read
  pull-requests: write
  issues: write

jobs:
  assign-milestone:
    runs-on: ubuntu-latest
    if: github.repository == 'NVIDIA/Megatron-LM'
    steps:
      - name: Get PR info
        id: get-pr-info
        if: startsWith(github.ref, 'refs/heads/pull-request/')
        uses: nv-gha-runners/get-pr-info@main

      - name: Check if PR has milestone
        id: check_milestone
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          MILESTONE=$(gh pr view ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }} \
            --repo ${{ github.repository }} \
            --json milestone \
            --jq '.milestone.title')

          if [ "$MILESTONE" = "null" ] || [ -z "$MILESTONE" ]; then
            echo "has_milestone=false" >> $GITHUB_OUTPUT
          else
            echo "has_milestone=true" >> $GITHUB_OUTPUT
            echo "PR already has milestone: $MILESTONE"
          fi

      - name: Get most recent open milestone
        if: steps.check_milestone.outputs.has_milestone == 'false'
        id: get_milestone
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          # Get the most recent open milestone (sorted by due date, then by creation date)
          MILESTONE_NUMBER=$(gh api \
            "repos/${{ github.repository }}/milestones?state=open&sort=due_on&direction=desc" \
            --jq '.[0].number')

          MILESTONE_TITLE=$(gh api \
            "repos/${{ github.repository }}/milestones?state=open&sort=due_on&direction=desc" \
            --jq '.[0].title')

          if [ -z "$MILESTONE_NUMBER" ] || [ "$MILESTONE_NUMBER" = "null" ]; then
            echo "No open milestones found"
            echo "milestone_found=false" >> $GITHUB_OUTPUT
          else
            echo "milestone_found=true" >> $GITHUB_OUTPUT
            echo "milestone_number=$MILESTONE_NUMBER" >> $GITHUB_OUTPUT
            echo "milestone_title=$MILESTONE_TITLE" >> $GITHUB_OUTPUT
            echo "Found milestone: $MILESTONE_TITLE (number: $MILESTONE_NUMBER)"
          fi

      - name: Assign milestone to PR
        if: steps.check_milestone.outputs.has_milestone == 'false' && steps.get_milestone.outputs.milestone_found == 'true'
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          gh pr edit ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }} \
            --repo ${{ github.repository }} \
            --milestone "${{ steps.get_milestone.outputs.milestone_title }}"

          echo "✅ Assigned milestone '${{ steps.get_milestone.outputs.milestone_title }}' to PR #${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}"


================================================
FILE: .github/workflows/auto-reminder-bot.yml
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

name: Auto Reminder Bot

on:
  workflow_dispatch:
  schedule:
    - cron: "0 12 * * *"

jobs:
  run-script:
    name: Run Auto Reminder Bot
    runs-on: ubuntu-latest
    if: github.repository == 'NVIDIA/Megatron-LM'
    steps:
      - name: Check out repository code
        uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.10"

      - name: Install dependencies
        run: |
          pip install --no-cache-dir PyGithub slack-sdk

      - name: Run Auto Reminder Bot
        run: |
          export SLACK_TOKEN=${{ secrets.SLACK_BOT_TOKEN }}
          export SLACK_WEBHOOK_URL=${{ secrets.SLACK_REVIEW_REMINDER_CHANNEL_WEBHOOK }}
          export GH_TOKEN=${{ secrets.PAT }}
          python tests/test_utils/python_scripts/auto_reminder_github.py


================================================
FILE: .github/workflows/auto-swap-labels.yml
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

name: Auto Swap Labels
on:
  pull_request_target:
    types: [ready_for_review, synchronize]
    branches:
      - main
  workflow_run:
    workflows: ["Review Trigger"]
    types: [completed]

permissions:
  pull-requests: write
  contents: read
  actions: read

jobs:
  check-approval:
    runs-on: ubuntu-latest
    if: >-
      github.repository == 'NVIDIA/Megatron-LM' && (
        (github.event_name == 'pull_request_target' &&
         github.event.pull_request.base.ref == 'main' &&
         !github.event.pull_request.draft) ||
        (github.event_name == 'workflow_run' &&
         github.event.workflow_run.conclusion == 'success')
      )

    steps:
      - name: Get PR number from workflow_run
        id: get-pr
        if: github.event_name == 'workflow_run'
        continue-on-error: true
        uses: actions/download-artifact@v4
        with:
          name: pr-number
          path: pr-number
          github-token: ${{ github.token }}
          run-id: ${{ github.event.workflow_run.id }}

      - name: Set PR number
        id: pr
        run: |
          if [ "${{ github.event_name }}" = "workflow_run" ]; then
            if [ "${{ steps.get-pr.outcome }}" != "success" ]; then
              echo "No approval artifact found — review was not an approval. Skipping."
              exit 0
            fi
            echo "number=$(cat pr-number/number)" >> $GITHUB_OUTPUT
          else
            echo "number=${{ github.event.pull_request.number }}" >> $GITHUB_OUTPUT
          fi

      - name: Check out repository code
        if: steps.pr.outputs.number
        uses: actions/checkout@v4

      - name: Set up Python
        if: steps.pr.outputs.number
        uses: actions/setup-python@v6
        with:
          python-version: "3.10"

      - name: Install dependencies
        if: steps.pr.outputs.number
        run: |
          pip install --no-cache-dir PyGithub slack-sdk

      - name: Run Auto Swap Labels
        if: steps.pr.outputs.number
        run: |
          export GH_TOKEN=${{ secrets.PAT }}
          export PR_NUMBER=${{ steps.pr.outputs.number }}
          python tests/test_utils/python_scripts/swap_pr_labels.py


================================================
FILE: .github/workflows/auto-update-copy-pr-bot.yml
================================================
name: Auto Update Copy PR Bot

on:
  workflow_dispatch:
  schedule:
    - cron: "0 0 * * *"

jobs:
  auto-update-copy-pr-bot:
    runs-on: ubuntu-latest
    if: github.repository == 'NVIDIA/Megatron-LM'
    steps:
      - name: Checkout code
        uses: actions/checkout@v6
        with:
          token: ${{ secrets.PAT }}
          ref: main

      - name: Fetch list of members in mcore-reviewers team
        shell: bash -euxo pipefail {0}
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          #!/bin/bash

          get_members() {
              local org=$1 team=$2 seen_file=$3    

              gh api "/orgs/$org/teams/$team/members" --paginate --jq '.[].login' >> "$seen_file"
              
              gh api "/orgs/$org/teams/$team/teams" --paginate --jq '.[].slug' | while read -r child; do
                  get_members "$org" "$child" "$seen_file"
              done

              cat "$seen_file"
          }

          tmp=$(mktemp)
          echo "" > final.txt
          get_members "NVIDIA" "mcore-engineers" "$tmp" | sort -u >> final.txt && rm "$tmp"

          tmp=$(mktemp)
          get_members "NVIDIA" "mcore-reviewers" "$tmp" | sort -u >> final.txt && rm "$tmp"

          cat final.txt | jq -sR 'split("\n") | map(select(. != "")) | flatten | unique'

          export TRUSTEES=$(cat final.txt | jq -csR 'split("\n") | map(select(. != "")) | flatten | unique')
          yq '.trustees_override = env(TRUSTEES)' .github/copy-pr-bot.yaml | yq -o yaml > .github/copy-pr-bot.yaml.new

          mv .github/copy-pr-bot.yaml.new .github/copy-pr-bot.yaml

      - name: Commit changes
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          git remote set-url origin https://x-access-token:${GH_TOKEN}@github.com/NVIDIA/Megatron-LM.git
          git config --global user.name "GitHub Actions"
          git config --global user.email "github-actions[bot]@users.noreply.github.com"
          git add .github/copy-pr-bot.yaml
          if git diff --cached --exit-code --quiet; then
            echo "No changes to commit. Exiting gracefully."
            exit 0
          fi
          git commit -m "Update copy-pr-bot.yaml [skip ci]"
          git push -u origin main


================================================
FILE: .github/workflows/build-docs.yml
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Build docs

on:
  push:
    branches:
      - main
      - "pull-request/[0-9]+"
      - "deploy-release/*"

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }}-${{ github.event_name }}
  cancel-in-progress: true

jobs:
  pre-flight:
    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.73.2

  build-docs:
    needs: [pre-flight]
    if: needs.pre-flight.outputs.is_deployment_workflow != 'true'
    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_docs.yml@v0.57.0

  build-docs-summary:
    needs: [pre-flight, build-docs]
    if: |
      (
        needs.pre-flight.outputs.is_deployment_workflow == 'true'
        || always()
      )
      && !cancelled()
    runs-on: ubuntu-latest
    steps:
      - name: Get workflow result
        id: result
        shell: bash -x -e -u -o pipefail {0}
        env:
          GH_TOKEN: ${{ github.token }}
          RUN_ID: ${{ github.run_id }}
          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' }}
        run: |
          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0

          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
              echo "✅ All previous jobs completed successfully"
              exit 0
          else
              echo "❌ Found $FAILED_JOBS failed job(s)"
              # Show which jobs failed
              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
              exit 1
          fi


================================================
FILE: .github/workflows/build-test-publish-wheel.yml
================================================
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Build, test, and publish a PyPi wheel (to testpypi).

on:
  push:
    branches:
      - main
      - "pull-request/[0-9]+"
      - "deploy-release/*"
  merge_group:
    types: [checks_requested]

defaults:
  run:
    shell: bash -x -e -u -o pipefail {0}

permissions:
  id-token: write
  contents: read

jobs:
  pre-flight:
    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.73.2
    if: github.repository == 'NVIDIA/Megatron-LM'

  build-test-publish-wheels:
    needs: [pre-flight]
    uses: ./.github/workflows/_build_test_publish_wheel.yml
    with:
      no-publish: true
    secrets:
      TWINE_PASSWORD: ${{ (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) && secrets.SVC_PYPI_TOKEN || secrets.SVC_PYPI_TEST_TOKEN }}

  build-test-publish-wheel-summary:
    needs: [pre-flight, build-test-publish-wheels]
    if: |
      (
        needs.pre-flight.outputs.docs_only == 'true'
        || needs.pre-flight.outputs.is_merge_group == 'true'
        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
        || always()
      )
      && github.repository == 'NVIDIA/Megatron-LM'
      && !cancelled()
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6

      - name: Result
        env:
          GH_TOKEN: ${{ github.token }}
          GITHUB_RUN_ID: ${{ github.run_id }}
          SKIPPING_IS_ALLOWED: false
        run: |
          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success" and (.name | test("build-and-test-wheels")))] | length') || echo 0

          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
              echo "✅ All build-and-test-wheels jobs completed successfully"
              exit 0
          else
              echo "❌ Found $FAILED_JOBS failed build-and-test-wheels job(s)"
              # Show which jobs failed
              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success" and (.name | test("build-and-test-wheels"))) | .name'
              exit 1
          fi


================================================
FILE: .github/workflows/cherry-pick-release-commit.yml
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Create PR to main with cherry-pick from release

on:
  push:
    branches:
      - main

jobs:
  cherry-pick:
    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cherry_pick.yml@v0.65.9
    if: github.repository == 'NVIDIA/Megatron-LM'
    with:
      target-branches-pattern: 'core_(*dev_)?r[0-9]+\.[0-9]+\.[0-9]+'
    secrets:
      PAT: ${{ secrets.PAT }}
      SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_TEAM_GROUP_ID }}
      SLACK_WEBHOOK: ${{ secrets.SLACK_CI_CHANNEL_WEBHOOK }}


================================================
FILE: .github/workflows/cicd-approve-test-queue.yml
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Approve Test Queue

on:
  schedule:
    - cron: "*/5 * * * *" # Runs every 5 minutes
  workflow_dispatch: # Allows manual triggering

jobs:
  approve-queue:
    runs-on: ubuntu-latest
    environment: main
    if: github.repository == 'NVIDIA/Megatron-LM'
    strategy:
      matrix:
        branch: [main, dev, others]
        contributor_type: [internal, external]
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.12"

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install requests

      - name: Download SSO users list
        run: |
          gh release download v0.1.0 \
            --repo NVIDIA-GitHub-Management/github-audits \
            --pattern users_sso.json \
            --output users_sso.json || echo '{}' > users_sso.json
        env:
          GH_TOKEN: ${{ secrets.NVIDIA_MANAGEMENT_ORG_PAT }}

      - name: Approve waiting deployments
        env:
          GITHUB_TOKEN: ${{ secrets.PAT }}
          MAX_CONCURRENCY: ${{ vars.MAX_CONCURRENCY || 1 }}
          MAX_CONCURRENCY_EXTERNAL: ${{ vars.MAX_CONCURRENCY_EXTERNAL || 1 }}
          CONTRIBUTOR_TYPE: ${{ matrix.contributor_type }}
          SSO_USERS_FILE: users_sso.json
          PYTHONUNBUFFERED: 1
        shell: python
        run: |
          import os
          import json
          import requests
          import re

          # GitHub API configuration
          GITHUB_TOKEN = os.environ["GITHUB_TOKEN"]
          REPO = os.environ["GITHUB_REPOSITORY"]
          CONTRIBUTOR_TYPE = os.environ["CONTRIBUTOR_TYPE"]
          if CONTRIBUTOR_TYPE == "external":
              # Global limit across all branches — no division needed since we count globally.
              MAX_CONCURRENCY = int(os.environ["MAX_CONCURRENCY_EXTERNAL"])
          else:
              MAX_CONCURRENCY = int(os.environ["MAX_CONCURRENCY"]) // 2
          API_BASE = f"https://api.github.com/repos/NVIDIA/Megatron-LM"

          # Load SSO users for internal/external classification
          with open(os.environ["SSO_USERS_FILE"]) as f:
              sso_users = json.load(f)

          # Headers for GitHub API
          headers = {
              "Authorization": f"token {GITHUB_TOKEN}",
              "Accept": "application/vnd.github.v3+json",
              "X-GitHub-Api-Version": "2022-11-28",
          }

          def make_request(endpoint, method="GET", data=None):
              """Make a request to the GitHub API with error handling."""
              url = f"{API_BASE}/{endpoint}"
              try:
                  if method == "GET":
                      response = requests.get(url, headers=headers)
                  else:
                      response = requests.post(url, headers=headers, json=data)
                  response.raise_for_status()
                  return response.json()
              except requests.exceptions.RequestException as e:
                  print(f"Error making request to {endpoint}: {str(e)}")
                  if hasattr(e.response, 'text'):
                      print(f"Response: {e.response.text}")
                  return None

          def is_internal_contributor(pr_info):
              """Return True if the PR author is a member of NVIDIA or NVIDIA-NeMo org (is_org_member)."""
              login = pr_info.get("user", {}).get("login", "")
              org_roles = sso_users.get(login, {}).get("org_roles", [])
              return any(role in ("NVIDIA:Member", "NVIDIA-NeMo:Member") for role in org_roles)

          def get_pr_base_branch(workflow_run):
              """
              Return the base branch of the PR associated with a workflow run, or None.
              Extracts PR number from head branch like 'pull-request/1913' and fetches PR info.
              Returns (base_branch, pr_info) tuple, or (None, None) if not a PR run.
              """
              print(workflow_run.get("head_branch", ""))
              head_branch = workflow_run.get("head_branch", "")
              match = re.match(r"pull-request/(\d+)", head_branch)
              if not match:
                  return None, None  # Not a PR branch pattern

              pr_number = int(match.group(1))

              # Fetch PR info from GitHub API
              pr_info = make_request(f"pulls/{pr_number}")
              if not pr_info:
                  print(f"Failed to fetch PR #{pr_number}")
                  return None, None

              base_branch = pr_info.get("base", {}).get("ref")
              return base_branch, pr_info

          def matches_contributor(workflow_run, contributor_type):
              """Return True if the workflow run matches the contributor type (ignores branch)."""
              _, pr_info = get_pr_base_branch(workflow_run)
              if pr_info is None:
                  return False
              internal = is_internal_contributor(pr_info)
              return (contributor_type == "internal") == internal

          def matches_queue(workflow_run, target_branch, contributor_type):
              """
              Return True if the workflow run belongs to this queue cell:
              matching target branch AND matching contributor type (internal/external).
              """
              base_branch, pr_info = get_pr_base_branch(workflow_run)
              if base_branch is None:
                  return False

              branch_match = (
                  (base_branch == target_branch) or
                  (base_branch != "main" and base_branch != "dev" and target_branch == "others")
              )
              if not branch_match:
                  return False

              pr_number = re.match(r"pull-request/(\d+)", workflow_run.get("head_branch", "")).group(1)
              internal = is_internal_contributor(pr_info)
              contributor_match = (contributor_type == "internal") == internal
              if branch_match and contributor_match:
                  print(f"PR #{pr_number} targets {target_branch}, contributor_type={contributor_type} (internal={internal})")
              return branch_match and contributor_match

          # Get current running and queued workflows
          print("Fetching workflow runs...")
          queued_workflow_runs = make_request("actions/runs?status=queued").get("workflow_runs", [])
          in_progress_workflow_runs = make_request("actions/runs?status=in_progress").get("workflow_runs", [])

          # For external contributors, enforce a single global concurrency limit across ALL branches.
          # For internal contributors, enforce per-branch limits as before.
          if CONTRIBUTOR_TYPE == "external":
              queued_workflow_runs = [run for run in queued_workflow_runs
                                      if run["name"] == "CICD Megatron-LM" and matches_contributor(run, CONTRIBUTOR_TYPE)]
              in_progress_workflow_runs = [run for run in in_progress_workflow_runs
                                          if run["name"] == "CICD Megatron-LM" and matches_contributor(run, CONTRIBUTOR_TYPE)]
          else:
              # Filter for workflows belonging to PRs targeting ${{ matrix.branch }} with matching contributor type
              queued_workflow_runs = [run for run in queued_workflow_runs
                                      if run["name"] == "CICD Megatron-LM" and matches_queue(run, "${{ matrix.branch }}", CONTRIBUTOR_TYPE)]
              in_progress_workflow_runs = [run for run in in_progress_workflow_runs
                                          if run["name"] == "CICD Megatron-LM" and matches_queue(run, "${{ matrix.branch }}", CONTRIBUTOR_TYPE)]

          # Count running and queued workflows
          queued_workflows = len(queued_workflow_runs)
          in_progress_workflows = len(in_progress_workflow_runs)

          total_workflows = queued_workflows + in_progress_workflows
          print(f"Current queued workflows (PRs targeting ${{ matrix.branch }}, {CONTRIBUTOR_TYPE}): {queued_workflows}")
          print(f"Current running workflows (PRs targeting ${{ matrix.branch }}, {CONTRIBUTOR_TYPE}): {in_progress_workflows}")
          print(f"Total workflows: {total_workflows}")
          print(f"Max concurrency: {MAX_CONCURRENCY}")

          if total_workflows >= MAX_CONCURRENCY:
              print("Maximum concurrency reached, no new approvals will be made")
              exit(0)

          # Get waiting CI workflows for test environment
          print("Fetching deployments...")
          pending_workflows = make_request("actions/runs?status=waiting").get("workflow_runs", [])
          print("Pending workflows:", len(pending_workflows))
          pending_workflows = [run for run in pending_workflows
                              if run["name"] == "CICD Megatron-LM" and matches_queue(run, "${{ matrix.branch }}", CONTRIBUTOR_TYPE)]

          # Sort deployments by creation date (oldest first)
          print("Sorting workflows...")
          pending_workflows = sorted(pending_workflows, key=lambda x: x["created_at"])

          # Process each deployment
          print(f"Processing {len(pending_workflows)} pending workflows...")
          for workflow in pending_workflows:
              if total_workflows >= MAX_CONCURRENCY:
                  print("Maximum concurrency reached, stopping approvals")
                  break

              workflow_id = workflow["id"]
              workflow_name = workflow["display_title"]
              print(f"Approving workflow {workflow_name} with Run Id: {workflow_id}")

              deployment_url = f"actions/runs/{workflow_id}/pending_deployments"
              deployment = make_request(deployment_url)[0]
              environment_id = deployment["environment"]["id"]

              # Approve the deployment
              status_data = {
                  "environment_ids": [environment_id],
                  "state": "approved",
                  "comment": "Automatically approved by queue manager"
              }
              result = make_request(deployment_url, method="POST", data=status_data)

              if result:
                  total_workflows += 1
              else:
                  print(f"Failed to approve deployment {deployment['id']}")
                  exit(1)
  notify:
    if: failure()
    runs-on: ubuntu-latest
    needs: [approve-queue]
    steps:
      - name: Notify
        env:
          SLACK_WEBHOOK: ${{ secrets.SLACK_CI_CHANNEL_WEBHOOK }}
          SLACK_WEBHOOK_ADMIN: <!subteam^${{ secrets.SLACK_TEAM_GROUP_ID }}>
          GITHUB_RUN_ID: ${{ github.run_id }}
          GITHUB_REPOSITORY: ${{ github.repository }}
        run: |
          curl -X POST \
            -H 'Content-type: application/json' \
            --data "{\"text\":\":robot_joy: <https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}|Test-queue-approval-bot workflow> failed. Please review manually.\n\ncc ${SLACK_WEBHOOK_ADMIN}\"}" \
            $SLACK_WEBHOOK


================================================
FILE: .github/workflows/cicd-main.yml
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: CICD Megatron-LM
on:
  schedule:
    - cron: 0 0 * * *
  push:
    branches:
      - "pull-request/[0-9]+"
      - "deploy-release/*"
  merge_group:
    types: [checks_requested]
  workflow_dispatch:

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.event.merge_group.head_ref || github.ref }}
  cancel-in-progress: true

permissions:
  id-token: write
  contents: read

env:
  container-registry: 766267172432.dkr.ecr.us-east-1.amazonaws.com
  container-registry-gb200: us-east4-docker.pkg.dev/nv-projdgxchipp-20260113193621/megatron-lm

jobs:
  is-not-external-contributor:
    runs-on: ubuntu-latest
    if: github.repository == 'NVIDIA/Megatron-LM'
    outputs:
      is_external_contributor: ${{ github.event.pull_request.user.type == 'User' }}
      is_maintainer: ${{ steps.check-membership.outputs.is_maintainer }}
      selected_runner: ${{ steps.check-membership.outputs.is_maintainer == 'true' && 'nvidia-ci-aws-gpu-x8' || 'nvidia-ci-aws-gpu-x8-ephemeral' }}
      selected_runner_gb200: ${{ steps.check-membership.outputs.is_maintainer == 'true' && 'nvidia-ci-gcp-gpu-x4' || 'ubuntu-latest' }}
    permissions:
      issues: write
      pull-requests: write
    env:
      GITHUB_TOKEN: ${{ secrets.PAT }}
      REPO: ${{ github.repository }}
      DISABLE_EXTERNAL_CONTRIBUTOR: ${{ vars.DISABLE_EXTERNAL_CONTRIBUTOR }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6
        with:
          token: ${{ env.GITHUB_TOKEN }}

      - name: Get PR info
        id: get-pr-info
        if: startsWith(github.ref, 'refs/heads/pull-request/') && github.event_name == 'push'
        uses: nv-gha-runners/get-pr-info@main

      - name: Check NVIDIA SSO membership
        id: check-sso
        uses: ./.github/actions/check-nvidia-sso-membership
        with:
          username: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').user.login }}
          github_token: ${{ secrets.NVIDIA_MANAGEMENT_ORG_PAT }}
          sso_users_filename: ${{ vars.SSO_USERS_FILENAME }}

      - name: Set maintainer status
        id: check-membership
        env:
          IS_MAIN_BRANCH: ${{ github.ref == 'refs/heads/main' }}
          IS_MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
          SCHEDULED_JOB: ${{ github.event_name == 'schedule' }}
        run: |
          # Skip SSO check for scheduled jobs, main branch, or merge groups
          if [ "${{ env.SCHEDULED_JOB }}" == "true" ] || [ "${IS_MAIN_BRANCH}" == "true" ] || [ "${IS_MERGE_GROUP}" == "true" ]; then
            echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
            exit 0
          fi

          # Use SSO membership check result
          IS_MEMBER="${{ steps.check-sso.outputs.is_member }}"

          # If external contributor is disabled, check if user is a repo collaborator or an org collaborator to NVIDIA or NVIDIA-NeMo
          if [ "${{ env.DISABLE_EXTERNAL_CONTRIBUTOR }}" == "true" ] && [ "${{ steps.check-sso.outputs.is_member }}" != "true" ]; then
            PR_AUTHOR=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').user.login }}

            echo "Checking if $PR_AUTHOR is a repo collaborator..."
            API_URL="https://api.github.com/repos/$REPO/collaborators/$PR_AUTHOR"
            REPO_MEMBERSHIP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -L \
              -H "Accept: application/vnd.github+json" \
              -H "Authorization: Bearer $GITHUB_TOKEN" \
              -H "X-GitHub-Api-Version: 2022-11-28" \
              $API_URL)

            echo "Checking if $PR_AUTHOR is an org collaborator to NVIDIA-NeMo..."
            API_URL="https://api.github.com/orgs/NVIDIA-NeMo/members/$PR_AUTHOR"
            ORG_NVIDIA_NEMO_MEMBERSHIP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -L \
              -H "Accept: application/vnd.github+json" \
              -H "Authorization: Bearer $GITHUB_TOKEN" \
              -H "X-GitHub-Api-Version: 2022-11-28" \
              $API_URL)

            echo "Checking if $PR_AUTHOR is an org collaborator to NVIDIA..."
            API_URL="https://api.github.com/orgs/NVIDIA/members/$PR_AUTHOR"
            ORG_NVIDIA_MEMBERSHIP_RESPONSE=$(curl -s -o /dev/null -w "%{http_code}" -L \
              -H "Accept: application/vnd.github+json" \
              -H "Authorization: Bearer $GITHUB_TOKEN" \
              -H "X-GitHub-Api-Version: 2022-11-28" \
              $API_URL)

            if [ "$REPO_MEMBERSHIP_RESPONSE" -eq 204 ] || [ "$ORG_NVIDIA_NEMO_MEMBERSHIP_RESPONSE" -eq 204 ] || [ "$ORG_NVIDIA_MEMBERSHIP_RESPONSE" -eq 204 ]; then
              IS_MEMBER="true"
            else
              exit 1
            fi
          fi

          # Use SSO membership check result
          if [ "$IS_MEMBER" == "true" ]; then
            echo "is_maintainer=true" | tee -a $GITHUB_OUTPUT
          else
            echo "is_maintainer=false" | tee -a $GITHUB_OUTPUT
          fi

  pre-flight:
    needs: [is-not-external-contributor]
    if: github.repository == 'NVIDIA/Megatron-LM'
    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.73.2

  linting:
    runs-on: ubuntu-latest
    needs: [pre-flight]
    if: |
      (
        needs.pre-flight.outputs.is_deployment_workflow == 'false'
          && needs.pre-flight.outputs.is_ci_workload == 'true'
      ) || (
        needs.pre-flight.outputs.is_deployment_workflow == 'false'
          && needs.pre-flight.outputs.is_ci_workload == 'false'
          && needs.pre-flight.outputs.docs_only == 'false'
      )
    steps:
      - name: Checkout
        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Install uv
        uses: astral-sh/setup-uv@v1
        with:
          version: 0.7.2

      - name: Install linting tools
        run: |
          uv sync --locked --only-group linting

      - name: Get PR info
        id: get-pr-info
        if: startsWith(github.ref, 'refs/heads/pull-request/') && github.event_name == 'push'
        uses: nv-gha-runners/get-pr-info@main

      - name: Run linting
        if: startsWith(github.ref, 'refs/heads/pull-request/') && github.event_name == 'push'
        run: |
          export PATH=".venv/bin:$PATH"
          export GITLAB_ENDPOINT=github.com
          export CI_PROJECT_NAMESPACE=NVIDIA
          export BASE_REF="${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.ref }}"
          export CHECK_ONLY=true
          export SKIP_DOCS=false
          bash tools/autoformat.sh

  cicd-wait-in-queue:
    runs-on: ubuntu-latest
    needs: [pre-flight, linting]
    environment: "test"
    if: |
      !(needs.pre-flight.outputs.is_ci_workload == 'true'
      || needs.pre-flight.outputs.is_deployment_workflow == 'true'
      || needs.pre-flight.outputs.is_merge_group == 'true'
      || needs.pre-flight.outputs.docs_only == 'true')
    steps:
      - name: Running CI tests
        run: |
          echo "Running CI tests"
          echo "is_merge_group: ${{ needs.pre-flight.outputs.is_merge_group }}"

  cicd-parse-downstream-testing:
    runs-on: ubuntu-latest
    needs:
      - pre-flight
      - cicd-wait-in-queue
    if: |
      needs.pre-flight.result != 'cancelled'
      && needs.cicd-wait-in-queue.result != 'cancelled'
      && (
        success()
        || needs.pre-flight.outputs.is_ci_workload == 'true'
        || needs.pre-flight.outputs.force_run_all == 'true'
        || needs.pre-flight.outputs.is_merge_group == 'true'
      )
      && !cancelled()
    outputs:
      mbridge-test-suite: ${{ steps.select-mbridge-test-suite.outputs.main }}
    steps:
      - name: Checkout
        uses: actions/checkout@v6

      - name: Get PR info
        id: get-pr-info
        if: startsWith(github.ref, 'refs/heads/pull-request/') && github.event_name == 'push'
        uses: nv-gha-runners/get-pr-info@main

      - name: Select MBridge test suite
        id: select-mbridge-test-suite
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
          TEST_SUITE=$(gh pr view $PR_NUMBER --json labels | jq -r 'if [.labels[].name] | any(. == "Run MBridge tests") then "all" else "unit-only" end')
          echo "main=$TEST_SUITE" | tee -a $GITHUB_OUTPUT

      - name: How-To
        run: bash .github/scripts/readme.sh

  cicd-mbridge-testing:
    runs-on: ubuntu-latest
    needs:
      - pre-flight
      - cicd-wait-in-queue
      - cicd-parse-downstream-testing
    if: |
      needs.pre-flight.result != 'cancelled'
      && needs.cicd-wait-in-queue.result != 'cancelled'
      && needs.cicd-parse-downstream-testing.result != 'cancelled'
      && (
        success()
        || needs.pre-flight.outputs.is_ci_workload == 'true'
        || needs.pre-flight.outputs.force_run_all == 'true'
        || needs.pre-flight.outputs.is_merge_group == 'true'
      )
      && !cancelled()
    steps:
      - name: Get PR info
        id: get-pr-info
        if: startsWith(github.ref, 'refs/heads/pull-request/') && github.event_name == 'push'
        uses: nv-gha-runners/get-pr-info@main

      - name: Checkout MBridge and create testing branch
        uses: actions/checkout@v6
        with:
          ref: main
          repository: NVIDIA-NeMo/Megatron-Bridge
          path: megatron-bridge
          token: ${{ secrets.PAT }}

      - name: Create testing branch
        env:
          MBRIDGE_BRANCH_NAME: mcore-testing-${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || github.run_id }}
        run: |
          cd megatron-bridge
          git fetch origin main
          git checkout -b ${{ env.MBRIDGE_BRANCH_NAME }} origin/main
          git push origin ${{ env.MBRIDGE_BRANCH_NAME }} --force

      - name: Get merge commit sha
        shell: bash -x -e -u -o pipefail {0}
        id: sha
        env:
          IS_PR: ${{ startsWith(github.ref, 'refs/heads/pull-request/') }}
          IS_MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
        run: |
          if [[ "$IS_PR" == "true" ]]; then
            SHA=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').merge_commit_sha }}
          elif [[ "$IS_MERGE_GROUP" == "true" ]]; then
            SHA=${{ github.event.merge_group.head_sha }}
          else
            SHA=${GITHUB_SHA}
          fi
          echo "main=${SHA}" | tee -a "$GITHUB_OUTPUT"

      - name: Trigger MBridge tests
        uses: convictional/trigger-workflow-and-wait@v1.6.5
        env:
          MBRIDGE_BRANCH_NAME: mcore-testing-${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || github.run_id }}
        with:
          owner: NVIDIA-NeMo
          repo: Megatron-Bridge
          workflow_file_name: cicd-main.yml
          github_token: ${{ secrets.PAT }}
          ref: ${{ env.MBRIDGE_BRANCH_NAME }}
          wait_interval: 60
          propagate_failure: true
          client_payload: |
            {
              "mcore_ref": "${{ steps.sha.outputs.main }}",
              "test_suite": "${{ needs.cicd-parse-downstream-testing.outputs.mbridge-test-suite }}",
              "triggered_by": "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
            }

      - name: Delete testing branch
        if: always()
        env:
          MBRIDGE_BRANCH_NAME: mcore-testing-${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || github.run_id }}
        run: |
          cd megatron-bridge
          git push origin --delete ${{ env.MBRIDGE_BRANCH_NAME }}

  cicd-compute-build-matrix:
    runs-on: ubuntu-latest
    needs: [is-not-external-contributor]
    outputs:
      matrix: ${{ steps.compute.outputs.matrix }}
    steps:
      - name: Compute build matrix
        id: compute
        env:
          IS_MAINTAINER: ${{ needs.is-not-external-contributor.outputs.is_maintainer }}
          SELECTED_RUNNER: ${{ needs.is-not-external-contributor.outputs.selected_runner }}
          SELECTED_RUNNER_GB200: ${{ needs.is-not-external-contributor.outputs.selected_runner_gb200 }}
          REGISTRY_AWS: ${{ env.container-registry }}
          REGISTRY_GCP: ${{ env.container-registry-gb200 }}
        run: |
          AWS_ENTRY=$(jq -nc --arg registry "$REGISTRY_AWS" --arg runner "$SELECTED_RUNNER" \
            '{"cloud": "aws", "registry": $registry, "runner": $runner}')
          if [ "$IS_MAINTAINER" == "true" ]; then
            GCP_ENTRY=$(jq -nc --arg registry "$REGISTRY_GCP" --arg runner "$SELECTED_RUNNER_GB200" \
              '{"cloud": "gcp", "registry": $registry, "runner": $runner}')
            MATRIX=$(jq -nc --argjson aws "$AWS_ENTRY" --argjson gcp "$GCP_ENTRY" \
              '{"include": [$aws, $gcp]}')
          else
            MATRIX=$(jq -nc --argjson aws "$AWS_ENTRY" '{"include": [$aws]}')
          fi
          echo "matrix=$MATRIX" | tee -a "$GITHUB_OUTPUT"

  cicd-container-build:
    needs: [is-not-external-contributor, pre-flight, cicd-wait-in-queue, cicd-compute-build-matrix]
    strategy:
      fail-fast: false
      matrix: ${{ fromJson(needs.cicd-compute-build-matrix.outputs.matrix) }}
    runs-on: ${{ matrix.runner }}
    if: |
      needs.is-not-external-contributor.result != 'cancelled'
      && needs.pre-flight.result != 'cancelled'
      && needs.cicd-wait-in-queue.result != 'cancelled'
      && needs.cicd-compute-build-matrix.result != 'cancelled'
      && (
        success()
        || needs.pre-flight.outputs.is_ci_workload == 'true'
        || needs.pre-flight.outputs.is_merge_group == 'true'
        || needs.pre-flight.outputs.force_run_all == 'true'
      )
      && !cancelled()
    steps:
      - name: Get PR info
        id: get-pr-info
        if: startsWith(github.ref, 'refs/heads/pull-request/') && github.event_name == 'push'
        uses: nv-gha-runners/get-pr-info@main

      - name: Get merge commit sha
        shell: bash -x -e -u -o pipefail {0}
        id: sha
        env:
          IS_PR: ${{ startsWith(github.ref, 'refs/heads/pull-request/') }}
          IS_MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
        run: |
          if [[ "$IS_PR" == "true" ]]; then
            SHA=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').merge_commit_sha }}
          elif [[ "$IS_MERGE_GROUP" == "true" ]]; then
            SHA=${{ github.event.merge_group.head_sha }}
          else
            SHA=${GITHUB_SHA}
          fi
          echo "main=${SHA}" | tee -a "$GITHUB_OUTPUT"

      - name: Checkout
        uses: actions/checkout@v6
        with:
          ref: ${{ steps.sha.outputs.main }}

      - name: Setup python
        uses: actions/setup-python@v6
        with:
          python-version: 3.12

      - name: Install GH CLI
        shell: bash -x -e -u -o pipefail {0}
        run: |
          apt-get update
          apt-get install -y gh

      - name: Has lts label
        id: has-lts-label
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
          HAS_LTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "container::lts")') || echo "false"
          echo "main=$HAS_LTS_LABEL" | tee -a $GITHUB_OUTPUT

      - name: Download test data
        shell: bash
        run: |
          echo "::group::Download test data"
          pip install --no-cache-dir click requests
          python tests/test_utils/python_scripts/download_unit_tests_dataset.py --assets-dir ./assets
          echo "::endgroup::"

      - name: Install GH CLI
        shell: bash
        run: |
          apt-get update
          apt-get install -y gh

      - name: Get last merged PR
        id: cache_from
        env:
          GH_TOKEN: ${{ github.token }}
        run: |
          LAST_PRS=$(gh api graphql -f query='
            query {
              repository(owner: "NVIDIA", name: "Megatron-LM") {
                pullRequests(states: MERGED, first: 100, orderBy: {field: UPDATED_AT, direction: DESC}) {
                  nodes {
                    number
                  }
                }
              }
            }' | jq -r '.data.repository.pullRequests.nodes[].number' | while read -r number; do
              echo "type=registry,ref=${{ matrix.registry }}/megatron-lm:$number-buildcache,mode=max"
            done)

          echo "LAST_PRS<<EOF" | tee -a $GITHUB_OUTPUT
          echo "$LAST_PRS" | tee -a $GITHUB_OUTPUT
          echo "EOF" | tee -a $GITHUB_OUTPUT

      - name: Parse baseimage
        shell: bash
        id: base-image
        env:
          HAS_LTS_LABEL: ${{ steps.has-lts-label.outputs.main }}
        run: |
          if [ "$HAS_LTS_LABEL" == "true" ]; then
            NGC_VERSION=$(cat docker/.ngc_version.lts)
            echo "version=$NGC_VERSION" | tee -a $GITHUB_OUTPUT
            echo "image_type=lts" | tee -a $GITHUB_OUTPUT
          else
            NGC_VERSION=$(cat docker/.ngc_version.dev)
            echo "version=$NGC_VERSION" | tee -a $GITHUB_OUTPUT
            echo "image_type=dev" | tee -a $GITHUB_OUTPUT
          fi

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Build and push
        uses: docker/build-push-action@v6
        with:
          file: ./docker/Dockerfile.ci.dev
          push: true
          context: .
          target: main
          build-args: |
            FROM_IMAGE_NAME=${{ steps.base-image.outputs.version }}
            IMAGE_TYPE=${{ steps.base-image.outputs.image_type }}
          cache-from: |
            type=registry,ref=${{ matrix.registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || 0 }}-buildcache,mode=max
            type=registry,ref=${{ matrix.registry }}/megatron-lm:main-buildcache,mode=max
            ${{ steps.cache_from.outputs.LAST_PRS }}
          cache-to: |
            type=registry,ref=${{ matrix.registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || 0 }}-buildcache,mode=max
          no-cache: false
          tags: |
            ${{ matrix.registry }}/megatron-lm:${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number || 0 }}
            ${{ matrix.registry }}/megatron-lm:${{ github.sha }}
          secrets: |
            GH_TOKEN=${{ secrets.PAT }}

  cicd-parse-unit-tests:
    runs-on: ubuntu-latest
    outputs:
      unit-tests: ${{ steps.parse-unit-tests.outputs.unit-tests }}
    needs:
      - pre-flight
      - cicd-wait-in-queue
      - cicd-container-build
    if: |
      needs.pre-flight.result != 'cancelled'
      && needs.cicd-wait-in-queue.result != 'cancelled'
      && needs.cicd-container-build.result != 'cancelled'
      && (
        success()
        || needs.pre-flight.outputs.is_ci_workload == 'true'
        || needs.pre-flight.outputs.force_run_all == 'true'
        || needs.pre-flight.outputs.is_merge_group == 'true'
      )
      && !cancelled()
    steps:
      - name: Checkout
        uses: actions/checkout@v6
      - name: Parse unit tests
        id: parse-unit-tests
        run: |
          cat tests/test_utils/recipes/h100/unit-tests.yaml | yq -o json '[.products[].test_case[] | { "bucket": .}] | sort_by(.model, .test_case)' | jq -c > unit-tests.json
          echo "unit-tests=$(cat unit-tests.json)" | tee -a $GITHUB_OUTPUT

  cicd-unit-tests-latest:
    strategy:
      fail-fast: false
      matrix:
        include: ${{ fromJson(needs.cicd-parse-unit-tests.outputs.unit-tests) }}
    needs:
      - is-not-external-contributor
      - pre-flight
      - cicd-wait-in-queue
      - cicd-container-build
      - cicd-parse-unit-tests
    runs-on: ${{ needs.is-not-external-contributor.outputs.selected_runner }}
    timeout-minutes: 60
    name: "${{ matrix.bucket }} - latest"
    if: |
      needs.is-not-external-contributor.result != 'cancelled'
      && needs.pre-flight.result != 'cancelled'
      && needs.cicd-wait-in-queue.result != 'cancelled'
      && needs.cicd-container-build.result != 'cancelled'
      && needs.cicd-parse-unit-tests.result != 'cancelled'
      && (
        success()
        || needs.pre-flight.outputs.is_ci_workload == 'true'
        || needs.pre-flight.outputs.force_run_all == 'true'
        || needs.pre-flight.outputs.is_merge_group == 'true'
      )
      && !cancelled()
    env:
      PIP_DISABLE_PIP_VERSION_CHECK: 1
      PIP_NO_PYTHON_VERSION_WARNING: 1
      PIP_ROOT_USER_ACTION: ignore
    steps:
      - name: Checkout
        uses: actions/checkout@v6
      - name: main
        uses: ./.github/actions
        with:
          test_case: ${{ matrix.bucket }}
          tag: latest
          timeout: ${{ matrix.timeout || 30 }}
          is_unit_test: "true"
          PAT: ${{ secrets.PAT }}
          container-image: ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
          is_ci_workload: ${{ needs.pre-flight.outputs.is_ci_workload }}

  cicd-parse-integration-tests-h100:
    runs-on: ubuntu-latest
    needs:
      - pre-flight
      - cicd-wait-in-queue
      - cicd-container-build
      - cicd-unit-tests-latest
    if: |
      needs.pre-flight.result != 'cancelled'
      && needs.cicd-wait-in-queue.result != 'cancelled'
      && needs.cicd-container-build.result != 'cancelled'
      && needs.cicd-unit-tests-latest.result != 'cancelled'
      && (
        success()
        || needs.pre-flight.outputs.is_ci_workload == 'true'
        || needs.pre-flight.outputs.force_run_all == 'true'
        || needs.pre-flight.outputs.is_merge_group == 'true'
      )
      && !cancelled()
    outputs:
      integration-tests-h100: ${{ steps.main.outputs.integration-tests-h100 }}
    steps:
      - name: Checkout
        uses: actions/checkout@v6

      - name: Get PR info
        id: get-pr-info
        if: startsWith(github.ref, 'refs/heads/pull-request/') && github.event_name == 'push'
        uses: nv-gha-runners/get-pr-info@main

      - name: Has Run tests label
        id: has-run-tests-label
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
          HAS_RUN_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run tests")') || echo "false"
          echo "main=$HAS_RUN_TESTS_LABEL" | tee -a $GITHUB_OUTPUT

      - name: Has Run functional tests label
        id: has-run-functional-tests-label
        env:
          GH_TOKEN: ${{ secrets.PAT }}
          IS_CI_WORKLOAD: ${{ needs.pre-flight.outputs.is_ci_workload }}
        run: |
          PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
          HAS_RUN_FUNCTIONAL_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run functional tests")')
          HAS_RUN_FUNCTIONAL_TESTS_LABEL=${HAS_RUN_FUNCTIONAL_TESTS_LABEL:-$IS_CI_WORKLOAD}
          echo "main=$HAS_RUN_FUNCTIONAL_TESTS_LABEL" | tee -a $GITHUB_OUTPUT

      - name: Parse functional tests
        id: main
        env:
          HAS_RUN_TESTS_LABEL: ${{ steps.has-run-tests-label.outputs.main }}
          HAS_RUN_FUNCTIONAL_TESTS_LABEL: ${{ steps.has-run-functional-tests-label.outputs.main == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' }}
        run: |
          export PYTHONPATH=$(pwd)

          if [ "$HAS_RUN_TESTS_LABEL" == "true" ]; then
            ARGS=(
              --scope mr-github
              --enable-lightweight-mode
            )
          elif [ "$HAS_RUN_FUNCTIONAL_TESTS_LABEL" == "true" ]; then
            ARGS=(
              --scope mr-github
            )
          else
            ARGS=(
              --scope mr-github-slim
            )
          fi

          python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
            --n-repeat 5 \
            --time-limit 2700 \
            --test-cases all \
            --container-image mcore_ci_dev \
            --container-tag latest \
            --dependent-job functional:configure \
            --record-checkpoints false \
            --slurm-account gh \
            --no-enable-warmup \
            --environment dev \
            --platform dgx_h100 \
            --cluster ghci \
            ${ARGS[@]} \
            --output-path integration-tests-h100.yaml

          cat integration-tests-h100.yaml | \
            yq -o json 'del(.default, .stages, .workflow) | to_entries | map({"model": .value.stage, "test_case": .key}) | sort_by(.model, .test_case)' | jq -c  > integration-tests-h100.json

          echo "integration-tests-h100=$(cat integration-tests-h100.json)" | tee -a "$GITHUB_OUTPUT"

  cicd-integration-tests-latest-h100:
    timeout-minutes: 60
    strategy:
      fail-fast: false
      matrix:
        include: ${{ fromJson(needs.cicd-parse-integration-tests-h100.outputs.integration-tests-h100) }}
    needs:
      - is-not-external-contributor
      - pre-flight
      - cicd-wait-in-queue
      - cicd-parse-integration-tests-h100
      - cicd-unit-tests-latest
    runs-on: ${{ needs.is-not-external-contributor.outputs.selected_runner }}
    name: "${{ matrix.model }}/${{ matrix.test_case }} - latest"
    env:
      PIP_DISABLE_PIP_VERSION_CHECK: 1
      PIP_NO_PYTHON_VERSION_WARNING: 1
      PIP_ROOT_USER_ACTION: ignore
    if: |
      needs.is-not-external-contributor.result != 'cancelled'
      && needs.pre-flight.result != 'cancelled'
      && needs.cicd-wait-in-queue.result != 'cancelled'
      && needs.cicd-parse-integration-tests-h100.result != 'cancelled'
      && needs.cicd-unit-tests-latest.result != 'cancelled'
      && (
        success()
        || needs.pre-flight.outputs.is_ci_workload == 'true'
        || needs.pre-flight.outputs.force_run_all == 'true'
        || needs.pre-flight.outputs.is_merge_group == 'true'
      )
      && !cancelled()
    steps:
      - name: Checkout
        uses: actions/checkout@v6
      - name: main
        uses: ./.github/actions
        with:
          test_case: ${{ matrix.test_case }}
          model: ${{ matrix.model }}
          tag: latest
          timeout: ${{ matrix.timeout || 30 }}
          is_unit_test: "false"
          PAT: ${{ secrets.PAT }}
          container-image: ${{ env.container-registry }}/megatron-lm:${{ github.sha }}
          is_ci_workload: ${{ needs.pre-flight.outputs.is_ci_workload }}
          is_merge_group: ${{ needs.pre-flight.outputs.is_merge_group }}

  cicd-parse-integration-tests-gb200:
    runs-on: ubuntu-latest
    needs:
      - is-not-external-contributor
      - pre-flight
      - cicd-wait-in-queue
      - cicd-container-build
      - cicd-unit-tests-latest
    if: |
      needs.is-not-external-contributor.outputs.is_maintainer == 'true'
      && needs.pre-flight.result != 'cancelled'
      && needs.cicd-wait-in-queue.result != 'cancelled'
      && needs.cicd-container-build.result != 'cancelled'
      && needs.cicd-unit-tests-latest.result != 'cancelled'
      && (
        success()
        || needs.pre-flight.outputs.is_ci_workload == 'true'
        || needs.pre-flight.outputs.force_run_all == 'true'
        || needs.pre-flight.outputs.is_merge_group == 'true'
      )
      && !cancelled()
    outputs:
      integration-tests-gb200: ${{ steps.main.outputs.integration-tests-gb200 }}
    steps:
      - name: Checkout
        uses: actions/checkout@v6

      - name: Get PR info
        id: get-pr-info
        if: startsWith(github.ref, 'refs/heads/pull-request/') && github.event_name == 'push'
        uses: nv-gha-runners/get-pr-info@main

      - name: Has Run tests label
        id: has-run-tests-label
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
          HAS_RUN_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run tests")') || echo "false"
          echo "main=$HAS_RUN_TESTS_LABEL" | tee -a $GITHUB_OUTPUT

      - name: Has Run functional tests label
        id: has-run-functional-tests-label
        env:
          GH_TOKEN: ${{ secrets.PAT }}
          IS_CI_WORKLOAD: ${{ needs.pre-flight.outputs.is_ci_workload }}
        run: |
          PR_NUMBER=${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
          HAS_RUN_FUNCTIONAL_TESTS_LABEL=$(gh pr view $PR_NUMBER --json labels | jq '[.labels[].name] | any(. == "Run functional tests")')
          HAS_RUN_FUNCTIONAL_TESTS_LABEL=${HAS_RUN_FUNCTIONAL_TESTS_LABEL:-$IS_CI_WORKLOAD}
          echo "main=$HAS_RUN_FUNCTIONAL_TESTS_LABEL" | tee -a $GITHUB_OUTPUT

      - name: Parse functional tests
        id: main
        env:
          HAS_RUN_TESTS_LABEL: ${{ steps.has-run-tests-label.outputs.main }}
          HAS_RUN_FUNCTIONAL_TESTS_LABEL: ${{ steps.has-run-functional-tests-label.outputs.main == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' }}
        run: |
          export PYTHONPATH=$(pwd)

          if [ "$HAS_RUN_TESTS_LABEL" == "true" ]; then
            ARGS=(
              --scope mr-github
              --enable-lightweight-mode
            )
          elif [ "$HAS_RUN_FUNCTIONAL_TESTS_LABEL" == "true" ]; then
            ARGS=(
              --scope mr-github
            )
          else
            ARGS=(
              --scope mr-github-slim
            )
          fi

          python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
            --n-repeat 5 \
            --time-limit 2700 \
            --test-cases all \
            --container-image mcore_ci_dev \
            --container-tag latest \
            --dependent-job functional:configure \
            --record-checkpoints false \
            --slurm-account gh \
            --no-enable-warmup \
            --environment dev \
            --platform dgx_gb200 \
            --cluster dgxgb200_oci-hsg \
            ${ARGS[@]} \
            --output-path integration-tests-gb200.yaml

          cat integration-tests-gb200.yaml | \
            yq -o json 'del(.default, .stages, .workflow) | to_entries | map({"model": .value.stage, "test_case": .key}) | sort_by(.model, .test_case)' | jq -c  > integration-tests-gb200.json

          echo "integration-tests-gb200=$(cat integration-tests-gb200.json)" | tee -a "$GITHUB_OUTPUT"

  cicd-integration-tests-latest-gb200:
    timeout-minutes: 60
    strategy:
      fail-fast: false
      matrix:
        include: ${{ fromJson(needs.cicd-parse-integration-tests-gb200.outputs.integration-tests-gb200) }}
    needs:
      - is-not-external-contributor
      - pre-flight
      - cicd-wait-in-queue
      - cicd-parse-integration-tests-gb200
      - cicd-unit-tests-latest
    runs-on: ${{ needs.is-not-external-contributor.outputs.selected_runner_gb200 }}
    name: "${{ matrix.model }}/${{ matrix.test_case }} - latest"
    env:
      PIP_DISABLE_PIP_VERSION_CHECK: 1
      PIP_NO_PYTHON_VERSION_WARNING: 1
      PIP_ROOT_USER_ACTION: ignore
    if: |
      needs.is-not-external-contributor.outputs.is_maintainer == 'true'
      && needs.is-not-external-contributor.result != 'cancelled'
      && needs.pre-flight.result != 'cancelled'
      && needs.cicd-wait-in-queue.result != 'cancelled'
      && needs.cicd-parse-integration-tests-gb200.result != 'cancelled'
      && needs.cicd-unit-tests-latest.result != 'cancelled'
      && (
        success()
        || needs.pre-flight.outputs.is_ci_workload == 'true'
        || needs.pre-flight.outputs.force_run_all == 'true'
        || needs.pre-flight.outputs.is_merge_group == 'true'
      )
      && !cancelled()
    steps:
      - name: Checkout
        uses: actions/checkout@v6
      - name: main
        uses: ./.github/actions
        with:
          test_case: ${{ matrix.test_case }}
          model: ${{ matrix.model }}
          tag: latest
          timeout: ${{ matrix.timeout || 30 }}
          is_unit_test: "false"
          PAT: ${{ secrets.PAT }}
          container-image: ${{ env.container-registry-gb200 }}/megatron-lm:${{ github.sha }}
          is_ci_workload: ${{ needs.pre-flight.outputs.is_ci_workload }}
          is_merge_group: ${{ needs.pre-flight.outputs.is_merge_group }}
          platform: dgx_gb200

  Nemo_CICD_Test:
    needs:
      - pre-flight
      - is-not-external-contributor
      - cicd-unit-tests-latest
      - cicd-integration-tests-latest-h100
      - cicd-integration-tests-latest-gb200
    if: |
      (
        needs.pre-flight.outputs.docs_only == 'true'
        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
        || needs.pre-flight.outputs.is_ci_workload == 'true'
        || needs.pre-flight.outputs.is_merge_group == 'true'
        || always()
      )
      && !cancelled()
      && github.repository == 'NVIDIA/Megatron-LM'
    runs-on: ubuntu-latest
    permissions: write-all
    steps:
      - name: Checkout
        uses: actions/checkout@v6

      - name: Get workflow result
        id: result
        shell: bash -x -e -u -o pipefail {0}
        env:
          GH_TOKEN: ${{ github.token }}
          GITHUB_RUN_ID: ${{ github.run_id }}
          DOCS_ONLY: ${{ needs.pre-flight.outputs.docs_only }}
          IS_DEPLOYMENT: ${{ needs.pre-flight.outputs.is_deployment_workflow }}
          IS_MAINTAINER: ${{ needs.is-not-external-contributor.outputs.is_maintainer }}
          UNIT_RESULT: ${{ needs.cicd-unit-tests-latest.result }}
          H100_RESULT: ${{ needs.cicd-integration-tests-latest-h100.result }}
          GB200_RESULT: ${{ needs.cicd-integration-tests-latest-gb200.result }}
        run: |
          # Docs-only and deployment workflows intentionally skip all tests
          if [ "$DOCS_ONLY" == "true" ] || [ "$IS_DEPLOYMENT" == "true" ]; then
            echo "✅ Docs-only or deployment workflow — test checks skipped"
            exit 0
          fi

          FAILED=false

          # Unit tests must always succeed (never skipped or cancelled)
          if [ "$UNIT_RESULT" != "success" ]; then
            echo "❌ cicd-unit-tests-latest: $UNIT_RESULT"
            FAILED=true
          fi

          # H100 integration tests must always succeed
          if [ "$H100_RESULT" != "success" ]; then
            echo "❌ cicd-integration-tests-latest-h100: $H100_RESULT"
            FAILED=true
          fi

          # GB200 integration tests may be skipped only for non-maintainer PRs
          # (no GB200 runners available); maintainer runs must always succeed
          if [ "$GB200_RESULT" == "skipped" ] && [ "$IS_MAINTAINER" == "true" ]; then
            echo "❌ cicd-integration-tests-latest-gb200: skipped unexpectedly for a maintainer run"
            FAILED=true
          elif [ "$GB200_RESULT" != "success" ] && [ "$GB200_RESULT" != "skipped" ]; then
            echo "❌ cicd-integration-tests-latest-gb200: $GB200_RESULT"
            FAILED=true
          fi

          # Broad scan: catch any individual job failures or cancellations
          # (e.g. a single matrix instance cancelled mid-run)
          BAD_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '
            [.jobs[] | select(
              .status == "completed"
              and (.conclusion == "failure" or .conclusion == "cancelled")
              and .name != "merge-queue-notification"
              and .name != "cicd-mbridge-testing"
            )] | length
          ') || BAD_JOBS=0

          if [ "${BAD_JOBS:-0}" -gt 0 ]; then
            echo "❌ Found ${BAD_JOBS} failed or cancelled job(s):"
            gh run view $GITHUB_RUN_ID --json jobs --jq '
              .jobs[] | select(
                .status == "completed"
                and (.conclusion == "failure" or .conclusion == "cancelled")
                and .name != "merge-queue-notification"
                and .name != "cicd-mbridge-testing"
              ) | .name + " → " + .conclusion
            '
            FAILED=true
          fi

          if [ "$FAILED" != "true" ]; then
            echo "✅ All previous jobs completed successfully"
          else
            exit 1
          fi

  Coverage_Fake:
    runs-on: ubuntu-latest
    needs: [Nemo_CICD_Test, pre-flight]
    if: |
      (
        needs.pre-flight.outputs.docs_only == 'true'
        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
        || github.event == 'merge_group'
      )
      && needs.pre-flight.outputs.is_ci_workload == 'false'
      && !cancelled()
      && github.repository == 'NVIDIA/Megatron-LM'
    steps:
      - name: Generate fake coverage report
        uses: actions/github-script@v8
        with:
          github-token: ${{ secrets.PAT }}
          script: |
            await github.rest.repos.createCommitStatus({
              owner: context.repo.owner,
              repo: context.repo.repo,
              sha: context.sha,
              state: 'success',
              description: 'No code changes - coverage check skipped',
              context: 'codecov/patch'
            });

  Coverage:
    runs-on: ubuntu-latest
    needs: [Nemo_CICD_Test]
    if: |
      (
        (needs.pre-flight.outputs.is_ci_workload == 'true' && !failure())
        || success()
      )
      && !cancelled()
      && github.repository == 'NVIDIA/Megatron-LM'
    strategy:
      matrix:
        flag: [unit-test]
    steps:
      - name: Checkout
        uses: actions/checkout@v6

      - name: Download coverage reports of current branch
        uses: actions/download-artifact@v7
        with:
          pattern: coverage-${{ matrix.flag }}-*

      - name: List coverage files
        run: find . -type f -name "*.xml" -o -name "*.lcov"

      - name: Get total coverage of current branch
        shell: bash -x -e -u -o pipefail {0}
        if: always()
        run: |
          pip install coverage

          ls -al .
          ls -al coverage-*/
          coverage combine --keep $(ls coverage-*/.coverage)
          coverage report -i
          rm -rf coverage-*
          ls -al

      - name: Upload coverage reports to Codecov
        uses: codecov/codecov-action@v5
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          verbose: true
          flags: ${{ matrix.flag }}

      - name: Upload artifacts
        uses: actions/upload-artifact@v6
        with:
          name: coverage-${{ matrix.flag }}-aggregated
          path: |
            .coverage
          include-hidden-files: true

  merge-queue-notification:
    runs-on: ubuntu-latest
    if: github.event_name == 'merge_group'
    permissions:
      pull-requests: write
    steps:
      - name: Extract PR number from merge group
        id: get-pr-number
        run: |
          # Extract PR number from merge group head_ref (format: refs/heads/gh-readonly-queue/main/pr-<number>-<sha>)
          PR_NUMBER=$(echo "${{ github.event.merge_group.head_ref }}" | sed -n 's/.*\/pr-\([0-9]*\)-.*/\1/p')
          echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT

      - name: Comment on PR with action run URL
        uses: actions/github-script@v8
        with:
          github-token: ${{ secrets.PAT }}
          script: |
            const prNumber = ${{ steps.get-pr-number.outputs.pr_number }};
            const runUrl = `https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}`;

            await github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: prNumber,
              body: `🔄 Merge queue validation started!\n\nYou can track the progress here: ${runUrl}`
            });

  cleanup-taint-node:
    runs-on: ${{ needs.is-not-external-contributor.outputs.selected_runner }}
    needs:
      - is-not-external-contributor
      - cicd-container-build
      - cicd-unit-tests-latest
      - cicd-integration-tests-latest-h100
      - cicd-integration-tests-latest-gb200
      - Coverage
      - Coverage_Fake
    if: |
      always()
      && !cancelled()
      && contains(needs.is-not-external-contributor.outputs.selected_runner, 'ephemeral')
      && !needs.pre-flight.outputs.is_deployment_workflow == 'true'
    steps:
      - name: Taint node for cleanup
        shell: bash
        run: taint-node.sh


================================================
FILE: .github/workflows/claude-complexity-label.yml
================================================
name: Claude Complexity Label

on:
  pull_request_target:
    types: [ready_for_review]

jobs:
  label-complexity:
    name: Label PR Complexity
    runs-on: ubuntu-latest
    permissions:
      contents: read
      pull-requests: write
      issues: write
      id-token: write
    env:
      GH_TOKEN: ${{ secrets.PAT }}
      REPO: ${{ github.repository }}
      PR_NUMBER: ${{ github.event.pull_request.number }}
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Run Claude Complexity Analysis
        uses: anthropics/claude-code-action@v1
        with:
          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
          github_token: ${{ secrets.PAT }}
          prompt: |
            REPO: ${{ env.REPO }}
            PR NUMBER: ${{ env.PR_NUMBER }}

            You are a PR complexity analyzer. Your job is to analyze the diff of this PR and apply exactly one complexity label.

            STEPS:
            1. Get the PR diff by running: gh pr diff $PR_NUMBER --repo $REPO
            2. Analyze every changed line (added or removed) in the diff and classify each as one of:
               - "docs-only": changes to docstrings, comments (lines starting with # or //), documentation files (.md, .rst, .txt), or similar non-functional text
               - "test": changes in test files (files with "test" in the name/path, or inside a tests/ directory)
               - "real code": all other changes (functional source code)
            3. Compute "real code line changes" using this formula:
               real_code_line_changes = (number of real code lines changed) + (number of test lines changed / 10)
               Count both added and removed lines. Do not count unchanged context lines. Do not count comments or docstrings.
            4. Remove any previously applied complexity or docs-only labels:
               gh pr edit $PR_NUMBER --repo $REPO --remove-label "complexity: low,complexity: medium,complexity: high,docs-only"
            5. Apply exactly ONE label using the gh CLI:
               - If there are ZERO real code lines and ZERO test lines (only docs-only changes), apply label "docs-only":
                 gh pr edit $PR_NUMBER --repo $REPO --add-label "docs-only"
               - If real_code_line_changes < 100, apply label "complexity: low":
                 gh pr edit $PR_NUMBER --repo $REPO --add-label "complexity: low"
               - If real_code_line_changes >= 100 and < 500, apply label "complexity: medium":
                 gh pr edit $PR_NUMBER --repo $REPO --add-label "complexity: medium"
               - If real_code_line_changes >= 500, apply label "complexity: high":
                 gh pr edit $PR_NUMBER --repo $REPO --add-label "complexity: high"

            Do NOT post any comments on the PR. Only apply the label.
          claude_args: |
            --allowedTools "Bash(gh pr diff:*),Bash(gh pr edit:*),Bash(gh pr view:*)"


================================================
FILE: .github/workflows/claude_review.yml
================================================
name: Claude Code Review

on:
  issue_comment:
    types: [created]

jobs:
  review-on-comment:
    name: Claude Review (comment trigger)
    if: |
      github.event_name == 'issue_comment' &&
      github.event.issue.pull_request &&
      contains(github.event.comment.body, '/claude review')
    runs-on: ubuntu-latest
    permissions:
      contents: read
      pull-requests: write
      issues: write
      id-token: write
    env:
      GH_TOKEN: ${{ github.token }}
      REPO: ${{ github.repository }}
      PR_NUMBER: ${{ github.event.issue.number }}
    steps:
      - name: Get PR head commit
        id: get-pr-head-commit
        run: |
          echo "sha=$(gh pr view $PR_NUMBER --repo $REPO --json headRefOid -q .headRefOid)" | tee -a $GITHUB_OUTPUT

      - name: Checkout repository
        uses: actions/checkout@v6
        with:
          fetch-depth: 1
          ref: ${{ steps.get-pr-head-commit.outputs.sha }}

      - name: Run Claude Code Review
        uses: anthropics/claude-code-action@v1
        with:
          anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
          trigger_phrase: "/claude review"
          show_full_output: true
          claude_args: |
            --allowedTools "mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Bash(gh pr review:*)"
            --model "claude-opus-4-6"
          prompt: |
            REPO: ${{ env.REPO }}
            PR NUMBER: ${{ env.PR_NUMBER }}

            You are doing a light code review. Keep it concise and actionable.

            Focus ONLY on:
            - Critical bugs or logic errors
            - Typos in code, comments, or strings
            - Missing or insufficient test coverage for changed code
            - Outdated or inaccurate documentation affected by the changes

            Do NOT comment on:
            - Style preferences or formatting
            - Minor naming suggestions
            - Architectural opinions or refactoring ideas
            - Performance unless there is a clear, measurable issue

            Provide feedback using inline comments for specific code suggestions.
            Use top-level comments for general observations.

            It's perfectly acceptable to not have anything to comment on.
            If you do not have anything to comment on, post "LGTM".


================================================
FILE: .github/workflows/close-inactive-issue-pr.yml
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Stale-Close-Inactive-Issues-PRs
on:
  schedule:
    - cron: "30 1 * * *"

jobs:
  close-issues:
    if: github.repository == 'NVIDIA/Megatron-LM'
    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_close_inactive_issue_pr.yml@v0.44.0


================================================
FILE: .github/workflows/community-bot.yml
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Community Bot

on:
  issues:
    types: [opened, edited, reopened, closed, deleted]
  issue_comment:
    types: [created, edited, deleted]

jobs:
  community-bot:
    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_community_bot.yml@v0.65.10
    with:
      community_project_id: ${{ vars.COMMUNITY_PROJECT_ID }}
    if: github.repository == 'NVIDIA/Megatron-LM'
    secrets:
      GH_TOKEN: ${{ secrets.PAT }}


================================================
FILE: .github/workflows/config/changelog-config.json
================================================
{
    "categories": [],
    "ignore_labels": [
      "ignore"
    ],
    "sort": "ASC",
    "template": "\n${{CHANGELOG}}\n\n<details><summary>Changelog Details</summary>\n\n${{UNCATEGORIZED}}\n</details>\n",
    "pr_template": "- ${{TITLE}} by @${{AUTHOR}} :: PR: #${{NUMBER}}",
    "commit_template": "- ${{TITLE}} by @${{AUTHOR}}",
    "empty_template": "${{OWNER}}\n${{REPO}}\n${{FROM_TAG}}\n${{TO_TAG}}",
    "duplicate_filter": {
      "pattern": ".+",
      "on_property": "title",
      "method": "match"
    },
    "transformers": [],
    "max_tags_to_fetch": 100,
    "max_pull_requests": 1250,
    "max_back_track_time_days": 365,
    "exclude_merge_branches": [],
    "tag_resolver": {
      "method": "semver"
    }
}


================================================
FILE: .github/workflows/copyright-check.yml
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Copyright check

on:
  push:
    branches:
      - "pull-request/[0-9]+"
      - "deploy-release/*"
  merge_group:
    types: [checks_requested]

jobs:
  pre-flight:
    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.73.2
    if: github.repository == 'NVIDIA/Megatron-LM'

  copyright-check:
    needs: [pre-flight]
    if: |
      !(needs.pre-flight.outputs.docs_only == 'true'
      || needs.pre-flight.outputs.is_merge_group == 'true'
      || needs.pre-flight.outputs.is_deployment_workflow == 'true')
      && github.repository == 'NVIDIA/Megatron-LM'
    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_copyright_check.yml@v0.66.7

  copyright-check-summary:
    needs: [pre-flight, copyright-check]
    if: |
      (
        needs.pre-flight.outputs.docs_only == 'true'
        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
        || always()
      )
      && !cancelled()
      && github.repository == 'NVIDIA/Megatron-LM'
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6

      - name: Result
        env:
          GH_TOKEN: ${{ github.token }}
          GITHUB_RUN_ID: ${{ github.run_id }}
          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
        run: |
          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0

          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
              echo "✅ All previous jobs completed successfully"
              exit 0
          else
              echo "❌ Found $FAILED_JOBS failed job(s)"
              # Show which jobs failed
              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
              exit 1
          fi


================================================
FILE: .github/workflows/dependabot.yml
================================================
name: Dependabot
on:
  schedule:
    - cron: "0 8 * * 1"
  workflow_dispatch: # Allow manual triggering

permissions:
  id-token: write
  contents: write

jobs:
  get-release-branch-names:
    runs-on: ubuntu-latest
    outputs:
      mcore: ${{ steps.get-branch.outputs.mcore_release_branch }}
    if: github.repository == 'NVIDIA/Megatron-LM'
    steps:
      - name: Get release branch names
        id: get-branch
        env:
          PAT: ${{ secrets.PAT }}
        run: |
          latest_branch=$(git ls-remote --heads https://token:${PAT}@github.com/NVIDIA/Megatron-LM.git 'refs/heads/core_r*' | 
            grep -o 'core_r[0-9]\+\.[0-9]\+\.[0-9]\+' | 
            sort -V | 
            tail -n1)
          echo "mcore_release_branch=$latest_branch" | tee -a $GITHUB_OUTPUT

  bump-tags:
    needs: [get-release-branch-names]
    if: github.repository == 'NVIDIA/Megatron-LM'
    strategy:
      fail-fast: false
      matrix:
        include:
          - target-branch: ${{ needs.get-release-branch-names.outputs.mcore }}
          - target-branch: main
    uses: ./.github/workflows/_update_dependencies.yml
    with:
      target-branch: ${{ matrix.target-branch }}
    secrets:
      PAT: ${{ secrets.PAT }}
      SSH_KEY: ${{ secrets.SSH_KEY }}
      SSH_PWD: ${{ secrets.SSH_PWD }}

  notify:
    if: failure() && github.repository == 'NVIDIA/Megatron-LM'
    runs-on: ubuntu-latest
    needs: [bump-tags]
    steps:
      - name: Notify
        env:
          SLACK_WEBHOOK: ${{ secrets.SLACK_CI_CHANNEL_WEBHOOK }}
          SLACK_WEBHOOK_ADMIN: <!subteam^${{ secrets.SLACK_TEAM_GROUP_ID }}>
          GITHUB_RUN_ID: ${{ github.run_id }}
          GITHUB_REPOSITORY: ${{ github.repository }}
        run: |
          curl -X POST \
            -H 'Content-type: application/json' \
            --data "{\"text\":\":robot_joy: <https://github.com/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}|Dependabot workflow> failed. Please fix manually.\n\ncc ${SLACK_WEBHOOK_ADMIN}\"}" \
            $SLACK_WEBHOOK


================================================
FILE: .github/workflows/force-draft-pr.yml
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

name: Force Draft PR

on:
  pull_request_target:
    types: [opened]
    branches:
      - main

permissions:
  pull-requests: write

jobs:
  force-draft:
    runs-on: ubuntu-latest
    if: ${{ !github.event.pull_request.draft && github.repository == 'NVIDIA/Megatron-LM' }}
    steps:
      - name: Convert PR to draft
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          gh pr ready --undo ${{ github.event.pull_request.number }} --repo ${{ github.repository }}

      - name: Add comment explaining draft policy
        env:
          GH_TOKEN: ${{ github.token }}
        run: |
          gh pr comment ${{ github.event.pull_request.number }} --repo ${{ github.repository }} --body \
            "This PR has been automatically converted to **draft** because all PRs must start as drafts.

          When you are ready for review, click **Ready for Review** to begin the review process. This will:
          1. Add the oncall reviewer (optional reviewer)
          2. Add required review teams based on your changes

          See the [contribution guide](https://github.com/NVIDIA/Megatron-LM/blob/main/docs/developer/submit.md) for more details."


================================================
FILE: .github/workflows/install-test.yml
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This workflow verifies that the basic install works across all supported platforms.
# For basic install, all imports need to either be successful or appropriately guarded.

name: Installation Test

on:
  push:
    branches:
      - dev
      - main
      - "pull-request/[0-9]+"
      - "deploy-release/*"
  merge_group:
    types: [checks_requested]

jobs:
  pre-flight:
    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.73.2
    if: github.repository == 'NVIDIA/Megatron-LM'

  pip-test-pytorch:
    needs: [pre-flight]
    if: |
      !(needs.pre-flight.outputs.docs_only == 'true'
      || needs.pre-flight.outputs.is_merge_group == 'true'
      || needs.pre-flight.outputs.is_deployment_workflow == 'true')
      && github.repository == 'NVIDIA/Megatron-LM'
    runs-on: linux-amd64-cpu16
    name: Pip - Python${{ matrix.python-version }} - AMD64/Linux - NGC PyTorch
    container:
      image: nvcr.io/nvidia/pytorch:25.05-py3
    strategy:
      fail-fast: false
      matrix:
        python-version: ["3.12"]
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6

      - name: Set PATH
        run: |
          echo "UV_PROJECT_ENVIRONMENT=/opt/venv" | tee -a "$GITHUB_ENV"
          echo "UV_LINK_MODE=copy" | tee -a "$GITHUB_ENV"
          echo "CUDA_HOME=/usr/local/cuda" | tee -a "$GITHUB_ENV"
          echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH" | tee -a "$GITHUB_ENV"
          echo "PATH=$HOME/.local/bin:$PATH:$CUDA_HOME/bin" | tee -a "$GITHUB_ENV"
          echo "TORCH_CUDA_ARCH_LIST=6.0;6.1;7.0;7.5;8.0;8.6;9.0" | tee -a "$GITHUB_ENV"

      - name: Install megatron-core
        shell: bash -x -e -u -o pipefail {0}
        run: bash docker/common/install.sh --environment dev --base-image pytorch --python-version ${{ matrix.python-version }}

      - name: Checkout check-imports
        uses: actions/checkout@v6
        with:
          repository: NVIDIA-NeMo/FW-CI-templates
          ref: v0.63.2
          path: FW-CI-templates

      - name: Check imports for megatron-core
        uses: ./FW-CI-templates/.github/actions/check-imports
        with:
          package-name: megatron.core
          python-binary: ${{ env.UV_PROJECT_ENVIRONMENT }}/bin/python

  uv-test-pytorch:
    needs: [pre-flight]
    if: |
      !(needs.pre-flight.outputs.docs_only == 'true'
      || needs.pre-flight.outputs.is_merge_group == 'true'
      || needs.pre-flight.outputs.is_deployment_workflow == 'true')
      && github.repository == 'NVIDIA/Megatron-LM'
    runs-on: linux-amd64-cpu16
    name: UV - Python${{ matrix.python-version }} - AMD64/Linux - NGC PyTorch
    container:
      image: nvcr.io/nvidia/pytorch:25.05-py3
    strategy:
      fail-fast: false
      matrix:
        python-version: ["3.12"]
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6

      - name: Set PATH
        run: |
          echo "UV_PROJECT_ENVIRONMENT=/opt/venv" | tee -a "$GITHUB_ENV"
          echo "VIRTUAL_ENV=/opt/venv" | tee -a "$GITHUB_ENV"
          echo "UV_LINK_MODE=copy" | tee -a "$GITHUB_ENV"
          echo "CUDA_HOME=/usr/local/cuda" | tee -a "$GITHUB_ENV"
          echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH" | tee -a "$GITHUB_ENV"
          echo "PATH=$HOME/.local/bin:$PATH:$CUDA_HOME/bin" | tee -a "$GITHUB_ENV"
          echo "CUDACXX=/usr/local/cuda/bin/nvcc" | tee -a "$GITHUB_ENV"
          echo "TORCH_CUDA_ARCH_LIST=6.0;6.1;7.0;7.5;8.0;8.6;9.0" | tee -a "$GITHUB_ENV"

      - name: Install project
        shell: bash
        run: bash docker/common/install.sh --environment dev --base-image pytorch --use-uv

      # NGC PyTorch 25.05 has a version of triton that is broken on CPU only machines.
      # - name: Checkout check-imports
      #   uses: actions/checkout@v6
      #   with:
      #     repository: NVIDIA-NeMo/FW-CI-templates
      #     ref: v0.63.2
      #     path: FW-CI-templates

      # - name: Check imports for megatron-core
      #   uses: ./FW-CI-templates/.github/actions/check-imports
      #   with:
      #     package-name: megatron.core
      #     python-binary: ${{ env.UV_PROJECT_ENVIRONMENT }}/bin/python

  install-test-summary:
    needs: [pre-flight, pip-test-pytorch, uv-test-pytorch]
    runs-on: ubuntu-latest
    name: Install test summary
    if: |
      (
        needs.pre-flight.outputs.docs_only == 'true'
        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
        || always()
      )
      && !cancelled()
      && github.repository == 'NVIDIA/Megatron-LM'
    steps:
      - name: Checkout
        uses: actions/checkout@v6

      - name: Get workflow result
        id: result
        shell: bash -x -e -u -o pipefail {0}
        env:
          GH_TOKEN: ${{ github.token }}
          RUN_ID: ${{ github.run_id }}
          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' }}
        run: |
          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0

          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
              echo "✅ All previous jobs completed successfully"
              exit 0
          else
              echo "❌ Found $FAILED_JOBS failed job(s)"
              # Show which jobs failed
              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
              exit 1
          fi


================================================
FILE: .github/workflows/multi-approval-bot.yml
================================================
name: "Codeowners Approval Workflow"

on:
  push:
    branches:
      - "pull-request/[0-9]+"
  merge_group:
    types: [checks_requested]

jobs:
  pre-flight:
    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.73.2
    if: github.repository == 'NVIDIA/Megatron-LM'

  codeowners-approval:
    needs: [pre-flight]
    runs-on: ubuntu-latest
    if: |
      !(needs.pre-flight.outputs.docs_only == 'true'
      || needs.pre-flight.outputs.is_merge_group == 'true'
      || needs.pre-flight.outputs.is_deployment_workflow == 'true')
    steps:
      - name: Get PR info
        id: get-pr-info
        if: startsWith(github.ref, 'refs/heads/pull-request/')
        uses: nv-gha-runners/get-pr-info@main

      - name: Checkout action
        uses: actions/checkout@v6
        with:
          repository: noamelf/codeowner-multi-approval-action
          ref: v0.1
          path: codeowner-multi-approval-action

      - name: Check Codeowners Approval
        uses: ./codeowner-multi-approval-action
        with:
          pr-number: ${{ fromJSON(steps.get-pr-info.outputs.pr-info || '{}').number }}
          repo-name: ${{ github.repository }}
          github-token: ${{ secrets.PAT }}

  multi-approval-bot-summary:
    needs: [pre-flight, codeowners-approval]
    if: |
      (
        needs.pre-flight.outputs.docs_only == 'true'
        || needs.pre-flight.outputs.is_merge_group == 'true'
        || needs.pre-flight.outputs.is_deployment_workflow == 'true'
        || always()
      )
      && github.repository == 'NVIDIA/Megatron-LM'
      && !cancelled()
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6

      - name: Result
        env:
          GH_TOKEN: ${{ github.token }}
          GITHUB_RUN_ID: ${{ github.run_id }}
          SKIPPING_IS_ALLOWED: ${{ needs.pre-flight.outputs.docs_only == 'true' || needs.pre-flight.outputs.is_deployment_workflow == 'true' || needs.pre-flight.outputs.is_merge_group == 'true' || needs.pre-flight.outputs.is_ci_workload == 'true' }}
        run: |
          FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0

          if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
              echo "✅ All previous jobs completed successfully"
              exit 0
          else
              echo "❌ Found $FAILED_JOBS failed job(s)"
              # Show which jobs failed
              gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
              exit 1
          fi


================================================
FILE: .github/workflows/oncall-assign.yml
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Oncall Assign

on:
  pull_request_target:
    types: [ready_for_review]
    branches:
      - main

permissions:
  pull-requests: write
  contents: read

jobs:
  assign-reviewer:
    runs-on: ubuntu-latest
    if: ${{ !github.event.pull_request.draft }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.10'

      - name: Install dependencies
        run: pip install requests slack-sdk

      - name: Assign Reviewer
        env:
          GH_TOKEN: ${{ secrets.PAT }}
        run: |
          python .github/scripts/oncall_manager.py assign --pr ${{ github.event.pull_request.number }}


================================================
FILE: .github/workflows/oncall-rotation.yml
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Oncall Rotation

on:
  schedule:
    # Runs at 09:00 UTC every Wednesday
    - cron: "0 9 * * 3"
  workflow_dispatch:

permissions:
  contents: write

jobs:
  rotate-schedule:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6
        with:
          token: ${{ secrets.PAT }}

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.10"

      - name: Rotate Schedule
        env:
          # Token to read org team members. Needs read:org scope.
          GH_TOKEN: ${{ secrets.NVIDIA_MCORE_ONCALL_TOKEN || secrets.PAT || secrets.GITHUB_TOKEN }}
          # Slack token for updating the Slack usergroup
          SLACK_TOKEN: ${{ secrets.ONCALL_SLACK_TOKEN }}
        run: |
          pip install --no-cache-dir "uv<0.9.29"
          uv venv .venv
          uv cache clean
          uv sync --no-cache 
          uv run --with slack-sdk python .github/scripts/oncall_manager.py rotate

      - name: Commit and Push changes
        run: |
          git config --global user.name "github-actions[bot]"
          git config --global user.email "github-actions[bot]@users.noreply.github.com"
          git add .github/oncall_schedule.json
          git commit -m "chore: rotate oncall schedule" || echo "No changes to commit"
          git pull --rebase
          git push origin HEAD:main


================================================
FILE: .github/workflows/release-docs.yml
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Release docs
on:
  workflow_dispatch:
    inputs:
      dry-run:
        description: Whether to run the workflow in dry-run mode
        required: true
        type: boolean
        default: true
      publish-as-latest:
        description: Publish as Latest stable version.
        required: false
        type: boolean
        default: true
      docs-version-override:
        description: Docs version if commit is not tagged
        required: false
        type: string
        default: ""
      update-version-picker:
        description: Update version picker.
        required: false
        type: boolean
        default: true
      notify-emails:
        description: Email addresses to send the notification to. Format as "me@me.com,you@you.com".
        required: false
        type: string
  workflow_call:
    inputs:
      dry-run:
        description: Whether to run the workflow in dry-run mode
        required: true
        type: boolean
        default: true
      publish-as-latest:
        description: Publish as Latest stable version.
        required: false
        type: boolean
        default: true
      docs-version-override:
        description: Docs version if commit is not tagged
        required: false
        type: string
        default: ""
      update-version-picker:
        description: Update version picker.
        required: false
        type: boolean
        default: true
      notify-emails:
        description: Email addresses to send the notification to. Format as "me@me.com,you@you.com".
        required: false
        type: string
      build-docs-ref:
        description: Reference to build the docs from
        required: false
        type: string
        default: ${{ github.sha }}

jobs:
  build-docs:
    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_docs.yml@v0.67.0
    with:
      ref: ${{ inputs.build-docs-ref }}

  publish-docs:
    runs-on: ubuntu-latest
    needs: [build-docs]
    steps:
      - uses: actions/checkout@v6
        with:
          repository: NVIDIA-NeMo/FW-CI-templates
          ref: v0.74.0
          path: FW-CI-templates

      - uses: ./FW-CI-templates/.github/actions/publish-docs
        # This workflow runs either on main, or on a version tag. Any other git ref will lead
        # to an error.
        # If its on main, it will publish to "latest" directory in Akamai.
        # If its on a versioned tag, it will extract the version number from the tag (strip `v` prefix)
        # and publish to the versioned directory in Akamai.
        with:
          dry-run: ${{ inputs.dry-run }}
          artifacts-name: docs-html
          artifacts-path: _build/html
          emails-csv: ${{ inputs.notify-emails && format('{0},{1}', vars.docs_release_emails, inputs.notify-emails) || vars.docs_release_emails }}
          overwrite-latest-on-tag: ${{ inputs.publish-as-latest }}
          docs-version-override: ${{ inputs.docs-version-override }}
          update-version-picker: ${{ inputs.update-version-picker }}
          run-on-version-tag-only: ${{ github.ref_name != 'main' }}
          request-name: megatron-core-publish-docs-${{ github.run_id }}
          aws-region: ${{ vars.DOCS_AWS_REGION }}
          aws-role-to-assume: ${{ secrets.AWS_ASSUME_ROLE_ARN }}
          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          akamai-host: ${{ secrets.AKAMAI_HOST }}
          akamai-client-token: ${{ secrets.AKAMAI_CLIENT_TOKEN }}
          akamai-client-secret: ${{ secrets.AKAMAI_CLIENT_SECRET }}
          akamai-access-token: ${{ secrets.AKAMAI_ACCESS_TOKEN }}
          s3-target-root: ${{ secrets.S3_BUCKET_NAME }}
          s3-target-path: megatron-core/developer-guide


================================================
FILE: .github/workflows/release-freeze.yml
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: "Code freeze"

on:
  workflow_dispatch:
    inputs:
      release-type:
        type: choice
        description: Type of release
        options:
          - major
          - minor
      freeze-commit:
        type: string
        description: Commit SHA to use for cut-off
        required: false
        default: main
      dry-run:
        type: boolean
        description: Dry-run of code-freeze
        required: false
        default: true
jobs:
  code-freeze:
    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_code_freeze.yml@v0.22.5
    with:
      library-name: Megatron-Bridge
      python-package: megatron.bridge
      release-type: ${{ inputs.release-type }}
      freeze-commit: ${{ inputs.freeze-commit }}
      dry-run: ${{ inputs.dry-run }}
    secrets:
      SLACK_WEBHOOK: ${{ secrets.SLACK_MAIN_CHANNEL_WEBHOOK }}
      SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_TEAM_GROUP_ID }}


================================================
FILE: .github/workflows/release-nightly-docs.yml
================================================
# Copyright (c) 2026, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Release Nightly Docs

on:
  schedule:
    - cron: "0 10 * * *"

jobs:
  call-release-docs:
    uses: ./.github/workflows/release-docs.yml
    with:
      dry-run: false
      publish-as-latest: false
      docs-version-override: "nightly"
      update-version-picker: false
    secrets: inherit


================================================
FILE: .github/workflows/release.yaml
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: "Release Megatron-Core"

on:
  workflow_dispatch:
    inputs:
      release-ref:
        description: Ref (SHA or branch name) to release
        required: true
        type: string
      dry-run:
        description: Do not publish a wheel and GitHub release.
        required: true
        default: true
        type: boolean
      create-gh-release:
        description: Create a GitHub release
        required: true
        default: true
        type: boolean
      generate-changelog:
        description: Generate changelog
        required: false
        default: true
        type: boolean
      publish-docs:
        description: Publish docs
        required: false
        default: true
        type: boolean
      version-bump-branch:
        description: Branch for version bump
        required: true
        type: string
      gh-release-from-tag:
        description: Tag of previous release for changelog builder
        required: false
        type: string
        default: ""

permissions:
  contents: write # To read repository content
  pull-requests: write # To create PRs

jobs:
  release:
    uses: ./.github/workflows/_release_library.yml
    with:
      release-ref: ${{ inputs.release-ref || github.sha }}
      dry-run: ${{ inputs.dry-run || false }}
      version-bump-branch: ${{ inputs.version-bump-branch || github.ref_name }}
      create-gh-release: ${{ inputs.create-gh-release || true }}
      gh-release-use-changelog-builder: ${{ inputs.generate-changelog }}
      publish-docs: ${{ inputs.publish-docs }}
      gh-release-from-tag: ${{ inputs.gh-release-from-tag }}
    secrets:
      TWINE_PASSWORD: ${{ (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) && secrets.SVC_PYPI_TOKEN || secrets.SVC_PYPI_TEST_TOKEN }}
      SLACK_WEBHOOK: ${{ (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) && secrets.SLACK_MAIN_CHANNEL_WEBHOOK || secrets.SLACK_CI_CHANNEL_WEBHOOK }}
      PAT: ${{ secrets.PAT }}
      AWS_ASSUME_ROLE_ARN: ${{ secrets.AWS_ASSUME_ROLE_ARN }}
      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
      AKAMAI_HOST: ${{ secrets.AKAMAI_HOST }}
      AKAMAI_CLIENT_TOKEN: ${{ secrets.AKAMAI_CLIENT_TOKEN }}
      AKAMAI_CLIENT_SECRET: ${{ secrets.AKAMAI_CLIENT_SECRET }}
      AKAMAI_ACCESS_TOKEN: ${{ secrets.AKAMAI_ACCESS_TOKEN }}
      S3_BUCKET_NAME: ${{ secrets.S3_BUCKET_NAME }}


================================================
FILE: .github/workflows/review-trigger.yml
================================================
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Lightweight workflow that triggers on review approval, otherwise there is no access to right secret.
# No secrets needed — just signals auto-swap-labels.yml via workflow_run.

name: Review Trigger

on:
  pull_request_review:
    types: [submitted]

jobs:
  signal:
    runs-on: ubuntu-latest
    if: >-
      github.event.review.state == 'approved' &&
      github.event.pull_request.base.ref == 'main' &&
      github.repository == 'NVIDIA/Megatron-LM'
    steps:
      - name: Save PR number
        run: |
          mkdir -p pr
          echo "${{ github.event.pull_request.number }}" > pr/number
      - name: Upload PR number
        uses: actions/upload-artifact@v4
        with:
          name: pr-number
          path: pr/


================================================
FILE: .github/workflows/sync-team-usergroups.yml
================================================
# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Sync GitHub Teams to Slack User Groups

on:
  workflow_dispatch:
  schedule:
    - cron: "0 0 * * *"

jobs:
  sync-usergroups:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.10"

      - name: Sync Teams to User Groups
        env:
          GH_TOKEN: ${{ secrets.NVIDIA_MCORE_ONCALL_TOKEN || secrets.PAT || secrets.GITHUB_TOKEN }}
          SLACK_TOKEN: ${{ secrets.ONCALL_SLACK_TOKEN }}
        run: |
          pip install --no-cache-dir "uv<0.9.29"
          uv venv .venv
          uv cache clean
          uv sync --no-cache 
          uv run --with slack-sdk python .github/scripts/sync_team_usergroups.py


================================================
FILE: .github/workflows/trigger-mbridge-tests.yml
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

name: Trigger MBridge Tests
on:
  workflow_dispatch:
    inputs:
      mbridge_ref:
        description: "MBridge branch/ref to trigger"
        required: false
        type: string
        default: "main"
      test_suite:
        description: "Test suite to run"
        required: false
        type: choice
        options:
          - "all"
          - "unit-only"
          - "functional-only"
        default: "all"

jobs:
  trigger-mbridge-tests:
    runs-on: ubuntu-latest
    steps:
      - name: Trigger MBridge tests
        uses: convictional/trigger-workflow-and-wait@v1.6.5
        with:
          owner: NVIDIA-NeMo
          repo: Megatron-Bridge
          workflow_file_name: cicd-main.yml
          github_token: ${{ secrets.PAT }}
          ref: ${{ inputs.mbridge_ref }}
          wait_interval: 60
          propagate_failure: true
          client_payload: |
            {
              "mcore_ref": "${{ github.sha }}",
              "test_suite": "${{ inputs.test_suite }}",
              "triggered_by": "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
            }


================================================
FILE: .gitignore
================================================
__pycache__
*.so
build
.coverage_*
*.egg-info
*~
slurm*
logs
.vscode
local/
.gitmodules
wandb/
onelogger.log
onelogger.err
.venv
runs/
/test_cases/
**/dist/

# Sphinx documentation
docs/_build
docs/apidocs

================================================
FILE: .gitlab/labeler-config.yml
================================================
CI:
  - .gitlab-ci.yml
  - Dockerfile.ci.lts
  - Dockerfile.ci.dev
  - .github/**
  - .gitlab/**

Datasets:
  - megatron/core/datasets/**

BERT:
  - megatron/core/models/bert/**

GPT:
  - megatron/core/models/gpt/**

Dist-Ckpt:
  - megatron/core/dist_checkpointing

Dist-Opt:
  - megatron/core/optimizer/distrib_optimizer

Inference:
  - megatron/core/inference

MoE:
  - megatron/core/transformer/moe

Tests:
  - tests/**

ParallelState:
  - megatron/core/parallel_state.py


================================================
FILE: .gitlab/scripts/build.sh
================================================
#! /bin/bash

set -x
env
eval "IMAGE=\$$IMAGE"

# Start a named container in detached mode
docker run -d --name download_test_data -w /workdir/ python:3.12-slim bash -c 'sleep infinity'
docker cp tests/. download_test_data:/workdir/tests
docker exec download_test_data bash -c '
    ls -al /workdir/
    pip install --no-cache-dir click requests
    python tests/test_utils/python_scripts/download_unit_tests_dataset.py --assets-dir ./assets
'
docker cp download_test_data:/workdir/assets ./
docker rm -f download_test_data

docker context create tls-environment
docker buildx create --name container --driver=docker-container --use tls-environment

ADDITIONAL_PARAMS=()

CI_COMMIT_BRANCH="${CI_COMMIT_BRANCH:-$CI_MERGE_REQUEST_SOURCE_BRANCH_NAME}"

if [[ "$CI_COMMIT_BRANCH" == "ci-rebuild-mcore-nemo-image" || "$CI_COMMIT_BRANCH" == "main" || "$CI_COMMIT_BRANCH" == "dev" ]]; then
    ADDITIONAL_PARAMS+=("--pull")
fi

CI_COMMIT_BRANCH=$(echo "$CI_COMMIT_BRANCH" | tr '/' '-' | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9._-]/-/g')
ADDITIONAL_PARAMS+=("--cache-to type=registry,ref=${IMAGE}-buildcache:${CI_COMMIT_BRANCH}-${PLATFORM},mode=max")
ADDITIONAL_PARAMS+=("--cache-from type=registry,ref=${IMAGE}-buildcache:${CI_COMMIT_BRANCH}-${PLATFORM}")
ADDITIONAL_PARAMS+=("--cache-from type=registry,ref=${IMAGE}-buildcache:main-${PLATFORM}")
ADDITIONAL_PARAMS+=("--cache-from type=registry,ref=${IMAGE}-buildcache:dev-${PLATFORM}")

ADDITIONAL_PARAMS+=("-t ${IMAGE}:${CI_COMMIT_BRANCH}-${PLATFORM}")

if [[ -n "$CI_MERGE_REQUEST_IID" ]]; then
    ADDITIONAL_PARAMS+=("--cache-to type=registry,ref=${IMAGE}-buildcache:${CI_MERGE_REQUEST_IID}-${PLATFORM},mode=max")
    ADDITIONAL_PARAMS+=("--cache-from type=registry,ref=${IMAGE}-buildcache:${CI_MERGE_REQUEST_IID}-${PLATFORM}")
    ADDITIONAL_PARAMS+=("-t ${IMAGE}:${CI_MERGE_REQUEST_IID}-${PLATFORM}")
fi

if [[ "$CI_COMMIT_BRANCH" == "ci-nightly" ]]; then
    ADDITIONAL_PARAMS+=("-t ${IMAGE}:nightly-${PLATFORM}")
fi

if [[ -n "$TE_GIT_REF" ]]; then
    ADDITIONAL_PARAMS+=("--build-arg TE_COMMIT=${TE_GIT_REF}")
fi

echo $(git rev-parse HEAD)

JET_API_VERSION=$(curl -s -u "$ARTIFACTORY_USER:$ARTIFACTORY_TOKEN" "https://sc-hw-artf.nvidia.com/artifactory/api/pypi/hw-joc-pypi/simple/jet-api/" | grep -o 'href="../../jet-api/[0-9.]*/' | sed 's|href="../../jet-api/||;s|/||' | sort -V -r | head -n1)

DOCKER_BUILDKIT=1 docker build \
    --secret id=JET_INDEX_URLS \
    --secret id=LOGGER_INDEX_URL \
    --target $STAGE \
    -f docker/$FILE \
    -t ${IMAGE}:${CI_PIPELINE_ID}-${PLATFORM} \
    --builder=container \
    --build-arg JET_API_VERSION=$JET_API_VERSION \
    --build-arg FROM_IMAGE_NAME=$BASE_IMAGE \
    --provenance=false \
    --push \
    --progress plain \
    ${ADDITIONAL_PARAMS[@]} .


================================================
FILE: .gitlab/scripts/check_imports.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#!/usr/bin/env python3
"""
Import checker script for megatron.hub package.

This script recursively discovers all Python modules in the specified package
and attempts to import them, reporting any import errors.
"""

import importlib
import os
import sys
import traceback
from typing import Dict, List, Tuple

import click


class ImportChecker:
    """Check imports for all modules in a package."""

    def __init__(self, package_name: str = "megatron.core", verbose: bool = False):
        self.package_name = package_name
        self.success_count = 0
        self.failure_count = 0
        self.graceful_count = 0
        self.skipped_count = 0
        self.failures: Dict[str, str] = {}
        self.successes: List[str] = []
        self.graceful_failures: Dict[str, str] = {}
        self.skipped: List[str] = []

        # Modules to skip (known problematic ones)
        self.skip_patterns = {
            "__pycache__",
            ".pytest_cache",
            ".git",
            "test_",
            "_test",
        }

        # Add current directory to Python path if not already there
        current_dir = os.getcwd()
        if current_dir not in sys.path:
            sys.path.insert(0, current_dir)

    def should_skip_module(self, module_name: str) -> bool:
        """Check if a module should be skipped."""
        for pattern in self.skip_patterns:
            if pattern in module_name:
                return True
        return False

    def discover_modules(self, package_path: str) -> List[str]:
        """Discover all Python modules in the given package path."""
        modules = []

        package = importlib.import_module(package_path)
        package_path = package.__path__[0]

        # Walk through all Python files
        for root, dirs, files in os.walk(package.__path__[0]):
            # Skip hidden directories and __pycache__
            dirs[:] = [d for d in dirs if not d.startswith(".") and d != "__pycache__"]

            for file in files:
                if file.endswith(".py") and not file.startswith("."):
                    # Convert file path to module name
                    rel_path = os.path.relpath(os.path.join(root, file), package_path)
                    module_parts = rel_path.replace(os.sep, ".").replace(".py", "")

                    # Handle __init__.py files
                    if module_parts.endswith(".__init__"):
                        module_parts = module_parts[:-9]  # Remove .__init__

                    full_module_name = (
                        f"{self.package_name}.{module_parts}"
                        if module_parts
                        else self.package_name
                    )

                    if not self.should_skip_module(full_module_name):
                        modules.append(full_module_name)

            # Remove duplicates and sort
            modules = sorted(list(set(modules)))

        return modules

    def import_module(self, module_name: str) -> Tuple[str, str]:
        """
        Try to import a module and return success status and error message.

        Returns:
            Tuple of (status: str, error_message: str)
            status can be: "success", "graceful", or "failed"
        """
        try:
            if module_name in sys.modules:
                del sys.modules[module_name]

            importlib.import_module(module_name)
            return "success", ""

        except Exception:
            tb = traceback.format_exc()
            if "UnavailableError" in tb:
                return "graceful", "UnavailableError detected during import"
            return "failed", f"{str(tb)}"

    def check_all_imports(self):
        """Check imports for all discovered modules."""
        print(f"Discovering modules in package '{self.package_name}'...")
        modules = self.discover_modules(self.package_name)

        if not modules:
            print("No modules found!")
            return

        print(f"Found {len(modules)} modules to check")
        print("=" * 60)

        for i, module_name in enumerate(modules, 1):
            status, error_msg = self.import_module(module_name)

            if status == "success":
                self.success_count += 1
                self.successes.append(module_name)
            elif status == "graceful":
                self.graceful_count += 1
                self.graceful_failures[module_name] = error_msg
            else:  # failed
                self.failure_count += 1
                self.failures[module_name] = error_msg

        """Print a summary of the import check results."""
        total = (
            self.success_count
            + self.failure_count
            + self.graceful_count
            + self.skipped_count
        )

        print("\n" + "=" * 60)
        print("IMPORT CHECK SUMMARY")
        print("=" * 60)
        print(f"Total modules checked: {total}")
        print(
            f"Successful imports:    {self.success_count} ({self.success_count / total * 100:.1f}%)"
        )
        print(
            f"Gracefully handled:    {self.graceful_count} ({self.graceful_count / total * 100:.1f}%)"
        )
        print(
            f"Failed imports:        {self.failure_count} ({self.failure_count / total * 100:.1f}%)"
        )
        if self.skipped_count > 0:
            print(
                f"Skipped modules:       {self.skipped_count} ({self.skipped_count / total * 100:.1f}%)"
            )

        if self.graceful_failures:
            print(f"\n🟡 GRACEFULLY HANDLED ({len(self.graceful_failures)}):")
            print("-" * 40)

        if self.failures:
            print(f"\n❌ FAILED IMPORTS ({len(self.failures)}):")
            print("-" * 40)
            for module_name, error_msg in self.failures.items():
                print(f"\n• {module_name}")
                # Show only the first few lines of error to keep output manageable
                error_lines = error_msg.split("\n")
                for line in error_lines:
                    # if self.package_name.replace(".", os.sep) not in line:
                    #     continue
                    if line.strip():
                        print(f"  {line}")

        return self.failure_count == 0


@click.command()
@click.option(
    "--package-name",
    required=True,
    help="Package name to check imports for",
)
def main(package_name: str):
    """Main entry point."""
    checker = ImportChecker(package_name=package_name)
    successful = checker.check_all_imports()
    exit(0 if successful else 1)


if __name__ == "__main__":
    main()


================================================
FILE: .gitlab/scripts/fetch-legacy-suite.sh
================================================
#!/bin/bash
set -euxo pipefail

# Default values
MCORE_REPO="https://github.com/nvidia/megatron-lm.git"
MCORE_MR_COMMIT="main"
MCORE_BACKWARDS_COMMIT=""

# Parse command line arguments
usage() {
    cat <<EOF
Usage: $0 [OPTIONS]

Clone and setup megatron-lm repositories for testing.

Options:
    --repo URL              Git repository URL (default: $MCORE_REPO)
    --backwards-commit COMMIT Commit hash or reference for the backwards compatibility test
    --help                  Show this help message

Example:
    $0 --repo $MCORE_REPO \\
       --backwards-commit core_r0.12.0
EOF
    exit 1
}

# Parse arguments
while [[ $# -gt 0 ]]; do
    case $1 in
    --repo)
        MCORE_REPO="$2"
        shift 2
        ;;
    --backwards-commit)
        MCORE_BACKWARDS_COMMIT="$2"
        shift 2
        ;;
    --help)
        usage
        ;;
    *)
        echo "Unknown option: $1"
        usage
        ;;
    esac
done

# Validate required arguments
if [[ -z "${MCORE_BACKWARDS_COMMIT:-}" ]]; then
    echo "Error: --backwards-commit is required"
    usage
fi

# Checkout backwards-ref
rm -rf megatron-lm-legacy
mkdir megatron-lm-legacy
pushd megatron-lm-legacy
git init
git remote add origin $MCORE_REPO
git fetch origin $MCORE_BACKWARDS_COMMIT
git checkout $MCORE_BACKWARDS_COMMIT
git rev-parse HEAD
rm -rf megatron
cp -a ../megatron-lm/megatron ./
popd

# Copy unit test script
cp megatron-lm/tests/unit_tests/run_ci_test.sh megatron-lm-legacy/tests/unit_tests/run_ci_test.sh
cp megatron-lm/pyproject.toml megatron-lm-legacy/pyproject.toml

================================================
FILE: .gitlab/stages/00.pre.yml
================================================
include:
  - template: Security/Secret-Detection.gitlab-ci.yml

.pre_rules:
  rules:
    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_PROTECTED != "true"
      allow_failure: true
      when: always
    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result'
      when: always

    - when: never
  stage: .pre

.dind_rules:
  image: docker:26.1.4-dind
  variables:
    DOCKER_HOST: unix:///var/run/docker.sock
  before_script:
    - docker system prune -a --filter "until=36h" -f || true
    - echo "$NGC_API_KEY" | docker login nvcr.io -u '$oauthtoken' --password-stdin
    - echo "$CI_REGISTRY_PASSWORD" | docker login $CI_REGISTRY -u $CI_REGISTRY_USER --password-stdin

pre:create_ci_branches:
  rules:
    - if: '$CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "push"'
      allow_failure: true
    - when: never
  parallel:
    matrix:
      - branch: ci-unit-test-extended
      - branch: ci-rebuild-mcore-nemo-image
      - branch: ci-mr
      - branch: ci-nightly
      - branch: ci-weekly
      - branch: ci-pre-release
      - branch: ci-review-reminder
      - branch: ci-upgrade-dependencies
      - branch: ci-approve-main
      - branch: ci-approve-dev
      - branch: ci-sync-branches
      - branch: ci-testing-1
      - branch: ci-testing-2
      - branch: ci-testing-3
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  stage: .pre
  image: python:3.10
  variables:
    GIT_STRATEGY: "clone"
  script:
    - git remote set-url origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/adlr/megatron-lm.git"
    - git switch --force-create $branch
    - git push --force -u origin $branch
  retry:
    max: 2

pre:create_ci_branches_dev:
  rules:
    - if: '$CI_COMMIT_BRANCH == "dev" && $CI_PIPELINE_SOURCE == "push"'
      allow_failure: true
    - when: never
  parallel:
    matrix:
      - branch: ci-dev-unit-test-extended
      - branch: ci-dev-rebuild-mcore-nemo-image
      - branch: ci-dev-mr
      - branch: ci-dev-nightly
      - branch: ci-dev-upgrade-dependencies
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  stage: .pre
  image: python:3.10
  variables:
    GIT_STRATEGY: "clone"
  script:
    - git remote set-url origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/adlr/megatron-lm.git"
    - git switch --force-create $branch
    - git push --force -u origin $branch
  retry:
    max: 2

pre:label_merge_request:
  extends: [.pre_rules]
  image: golang:1.22
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  before_script:
    - git clone -b nv https://${GITLAB_ENDPOINT}/okoenig/gitlab-mr-labeler.git
    - cd gitlab-mr-labeler
    - go install .
    - cd ..
    - go install github.com/itchyny/gojq/cmd/gojq@v0.12.17
  script:
    - set -x
    - |
      LABELS=$(curl --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests/${CI_MERGE_REQUEST_IID}")
    - LABELS=$(echo "$LABELS" | gojq '.labels -= ["ParallelState"]')
    - |
      if git --no-pager diff --merge-base origin/${CI_MERGE_REQUEST_TARGET_BRANCH_NAME} -- 'megatron/core/' | grep -q 'parallel_state'; then
        LABELS=$(echo "$LABELS" | gojq '.labels += ["ParallelState"]')
        echo "$LABELS"
      fi

    - echo LABELS=$(echo "$LABELS" | gojq '.labels | join(",")') > labels
    - gitlab-mr-labeler -f .gitlab/labeler-config.yml -t ${PROJECT_ACCESS_TOKEN_MCORE} --debug true
    - cat labels
  after_script:
    - |
      source labels
      curl --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests/${CI_MERGE_REQUEST_IID}" --data-urlencode "add_labels=$LABELS" -X PUT

pre:maybe_cherry_pick_to_main:
  rules:
    - if: "$CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME == 'dev' && $CI_MERGE_REQUEST_LABELS =~ /mirror-to-main/"
    - when: never
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  stage: .pre
  image: nentangso/alpine-git-curl-jq
  variables:
    GIT_STRATEGY: "clone"
  script:
    - |
      set -x
      MR_ID=$CI_MERGE_REQUEST_IID
      TARGET_BRANCH="cp/$MR_ID-into-main"
      TARGET_BRANCH_EXISTS_OK=$([[ "$(git ls-remote --heads origin refs/heads/$TARGET_BRANCH)" != "" ]] && echo true || echo false)

      if [[ "$TARGET_BRANCH_EXISTS_OK" == "true" ]]; then
        echo Target branch already exists, will not cherry-pick again.
        exit 0
      fi

      MR=$(curl --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests/${MR_ID}")

      LABELS=$(echo -E $MR | jq '.labels | join(",")' | tr -d '"')
      AUTHOR_ID=$(echo -E $MR | jq '.author.id' | tr -d '"')
      AUTHOR_NAME=$(echo -E $MR | jq '.author.username' | tr -d '"')
      TITLE=$(echo -E $MR | jq '.title' | tr -d '"')
      MILESTONE_ID=$(echo -E $MR | jq '.milestone.id' | tr -d '"')

      git remote set-url origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/$CI_PROJECT_PATH.git"
      git remote add mr-origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/$CI_MERGE_REQUEST_SOURCE_PROJECT_PATH.git"

      git config --global user.email "mcore-bot@nvidia.com"
      git config --global user.name "Mcore Bot"

      git fetch origin dev
      git fetch mr-origin $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
      git checkout $CI_MERGE_REQUEST_SOURCE_BRANCH_NAME
      START_COMMIT=$(git merge-base origin/dev mr-origin/$CI_MERGE_REQUEST_SOURCE_BRANCH_NAME)
      END_COMMIT=$(git rev-parse HEAD)

      git fetch origin main
      git checkout main
      git checkout -b $TARGET_BRANCH

      git cherry-pick $START_COMMIT..$END_COMMIT
      git push -u origin $TARGET_BRANCH

      curl \
        --header "PRIVATE-TOKEN: $PAT" \
        --url https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests \
        -d "source_branch=$TARGET_BRANCH" \
        -d "target_branch=main" \
        -d "title=cp MR !$MR_ID from dev: \`$TITLE\`" \
        -d "labels=cherry-picked-from-dev" \
        -d "reviewer_ids=$AUTHOR_ID" \
        -d "milestone_id=$MILESTONE_ID" \
        -d "description=[🤖]: Hi @$AUTHOR_NAME 👋,<br><br>we've cherry picked \`$TITLE (!$MR_ID)\` into \`main\` for you! 🚀<br><br>Please review and approve this cherry pick by your convenience\!"

pre:maybe_cherry_pick_commit:
  rules:
    - if: '$CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "push"'
    - when: never
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  stage: .pre
  image: nentangso/alpine-git-curl-jq
  variables:
    GIT_STRATEGY: "clone"
  script:
    - set -x
    - set +e
    - SHA=$(git rev-list --no-merges -n 1 HEAD)
    - MESSAGE=$(git log -n 1 --pretty=format:%s $SHA)
    - MR_ID=$(echo $MESSAGE | awk -F'!' '{print $2}' | awk '{print $1}' )
    - git remote set-url origin "https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/$CI_PROJECT_NAMESPACE/megatron-lm.git"
    - git config --global user.email "mcore-bot@nvidia.com"
    - git config --global user.name "Mcore Bot"
    - |
      MR=$(curl --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests/${MR_ID}")

      LABELS=$(echo -E $MR | jq '.labels | join(",")' | tr -d '"')
      AUTHOR_ID=$(echo -E $MR | jq '.author.id' | tr -d '"')
      AUTHOR_NAME=$(echo -E $MR | jq '.author.username' | tr -d '"')
      TITLE=$(echo -E $MR | jq '.title' | tr -d '"')
      MILESTONE_ID=$(echo -E $MR | jq '.milestone.id' | tr -d '"')
      TARGET_BRANCHES=$(echo "$LABELS" | grep -o 'core_[^,]*')

      if [[ $TARGET_BRANCHES == "" ]]; then
        echo Nothing to cherry pick
        exit 0
      fi

      echo $TARGET_BRANCHES | while read -r RELEASE_BRANCH ; do
        TARGET_BRANCH_EXISTS_OK=$([[ "$(git ls-remote --heads origin refs/heads/$RELEASE_BRANCH)" != "" ]] && echo true || echo false)

        if [[ "$TARGET_BRANCH_EXISTS_OK" == "false" ]]; then
          echo Release branch does not yet exist, will not  cherry-pick
          continue
        fi
        
        (
          git fetch origin $RELEASE_BRANCH:$RELEASE_BRANCH
          git switch --force-create cherry-pick-$MR_ID-$RELEASE_BRANCH $RELEASE_BRANCH
          git cherry-pick $SHA
          git push -u origin --force cherry-pick-$MR_ID-$RELEASE_BRANCH
          git checkout main
        )

        CHERRYPICK_SUCCESSFUL=$?

        if [[ $CHERRYPICK_SUCCESSFUL -eq 0 ]]; then
          curl \
            --header "PRIVATE-TOKEN: $PAT" \
            --url https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests \
            -d "source_branch=cherry-pick-$MR_ID-$RELEASE_BRANCH" \
            -d "target_branch=$RELEASE_BRANCH" \
            -d "title=Cherry pick \`$TITLE ($MR_ID)\` into \`$RELEASE_BRANCH\`" \
            -d "labels=cherry-pick" \
            -d "reviewer_ids=$AUTHOR_ID" \
            -d "milestone_id=$MILESTONE_ID" \
            -d "description=[🤖]: Hi @$AUTHOR_NAME 👋,<br><br>we've cherry picked \`$TITLE ($MR_ID)\` into \`$RELEASE_BRANCH\` for you! 🚀<br><br>Please review and approve this cherry pick by your convenience\!"

        else
          URL=https://${GITLAB_ENDPOINT}/ADLR/megatron-lm/-/merge_requests/$MR_ID

          MESSAGE='{
            "blocks": [
              {
                "type": "section",
                "text": {
                  "type": "mrkdwn",
                  "text": "beep boop 🤖: Cherry-pick of <'$URL'|!'$MR_ID'> failed\ncc '$SLACK_ADMIN'"
                }
              }
            ]
          }'

          curl -X POST -H "Content-type: application/json" --data "$MESSAGE" ${MCORE_NOTIFICATION_HOOK}

        fi

      done
  interruptible: false

pre:check_milestone:
  extends: [.pre_rules]
  image: badouralix/curl-jq
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  script:
    - env
    - |
      MILESTONE=$(curl --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests/${CI_MERGE_REQUEST_IID}" | jq '.milestone')
    - |
      if [[ "$MILESTONE" == "null" ]]; then
        LATEST_MILESTONE=$(curl --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/milestones?state=active&order_by=due_date&sort=desc" | jq '.[0].id')
        curl --request PUT --header "PRIVATE-TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}" --url "https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests/${CI_MERGE_REQUEST_IID}" --data "milestone_id=${LATEST_MILESTONE}"
        echo "Applied latest milestone (ID: ${LATEST_MILESTONE}) to this MR"
      fi

pre:check_status_of_main:
  extends: [.pre_rules]
  image: python:3.10
  timeout: 7 days
  variables:
    KUBERNETES_SERVICE_MEMORY_REQUEST: 32Gi
    KUBERNETES_SERVICE_MEMORY_LIMIT: 32Gi
    KUBERNETES_SERVICE_CPU_REQUEST: 8
    KUBERNETES_SERVICE_CPU_LIMIT: 12
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  script:
    - env
    - pip install --no-cache-dir python-gitlab click
    - export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
    - export GITLAB_ENDPOINT
    - python tests/test_utils/python_scripts/check_status_of_main.py --target-branch "$CI_MERGE_REQUEST_TARGET_BRANCH_NAME"
  rules:
    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merge_train' && $CI_MERGE_REQUEST_LABELS =~ /fast-track/
      when: never
    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merge_train'
      when: always
    - when: never


================================================
FILE: .gitlab/stages/01.build.yml
================================================
.build_rules:
  rules:
    - if: $BUILD == "no"
      when: never
    - when: on_success
  stage: test

.build_image:
  extends: [.build_rules, .dind_rules]
  stage: build
  tags:
    - arch/${PLATFORM}
    - origin/jet-fleet
    - env/prod
    - purpose/builder-large
  services:
    - name: docker:24.0.5-dind
      variables:
        HEALTHCHECK_TCP_PORT: "2376"
  timeout: 180m
  variables:
    DOCKER_HOST: tcp://docker:2376
    DOCKER_TLS_CERTDIR: "/certs"
    DOCKER_TLS_VERIFY: 1
    DOCKER_CERT_PATH: "$DOCKER_TLS_CERTDIR/client"
    STAGE: jet
    MCORE_BACKWARDS_REF: core_r0.14.0
    KUBERNETES_SERVICE_MEMORY_REQUEST: 90Gi
    KUBERNETES_SERVICE_MEMORY_LIMIT: 90Gi
    SHARED_PATH: /builds/$CI_PROJECT_PATH/shared
  script:
    - eval PUBLISH_COMMIT=$PUBLISH_COMMIT
    - apk add bash curl git
    - export TE_GIT_REF=$TE_GIT_REF
    - export GH_TOKEN=$GH_TOKEN
    - bash .gitlab/scripts/build.sh

    - git fetch origin $MCORE_BACKWARDS_REF
    - MCORE_BACKWARDS_COMMIT=$(git rev-parse FETCH_HEAD)

    - echo "MCORE_MR_COMMIT=$CI_COMMIT_SHA" | tee -a build.env
    - echo "MCORE_BACKWARDS_COMMIT=$MCORE_BACKWARDS_COMMIT" | tee -a build.env
    - cat build.env
  retry:
    max: 2
  artifacts:
    reports:
      dotenv: build.env

test:pre_build_image:
  extends: [.build_image]
  parallel:
    matrix:
      - IMAGE: CI_MCORE_LTS_IMAGE
        FILE: Dockerfile.ci.dev
        IMAGE_TYPE: lts
        BASE_IMAGE: nvcr.io/nvidia/pytorch:25.09-py3
        PLATFORM: amd64
      - IMAGE: CI_MCORE_LTS_IMAGE
        FILE: Dockerfile.ci.dev
        IMAGE_TYPE: lts
        BASE_IMAGE: nvcr.io/nvidia/pytorch:25.09-py3
        PLATFORM: arm64
      - IMAGE: CI_MCORE_DEV_IMAGE
        FILE: Dockerfile.ci.dev
        IMAGE_TYPE: dev
        BASE_IMAGE: nvcr.io/nvidia/pytorch:26.02-py3
        PLATFORM: amd64
      - IMAGE: CI_MCORE_DEV_IMAGE
        FILE: Dockerfile.ci.dev
        IMAGE_TYPE: dev
        BASE_IMAGE: nvcr.io/nvidia/pytorch:26.02-py3
        PLATFORM: arm64
      - IMAGE: UTILITY_IMAGE
        FILE: Dockerfile.linting
        BASE_IMAGE: python:3.10
        PLATFORM: amd64
      - IMAGE: UTILITY_IMAGE
        FILE: Dockerfile.linting
        BASE_IMAGE: python:3.10
        PLATFORM: arm64

test:build_nemo_image:
  extends: [.build_image]
  variables:
    IMAGE: CI_NEMO_IMAGE
    FILE: Dockerfile.ci.nemo
    BASE_IMAGE: nvcr.io/nvidian/nemo:nightly
    PLATFORM: amd64
  rules:
    - if: $FUNCTIONAL_TEST == "yes" || $INTEGRATION_TEST == "yes" || $CI_COMMIT_BRANCH == "ci-rebuild-mcore-nemo-image"
      when: on_success

test:build_image:
  needs: [test:pre_build_image]
  extends: [.build_rules, .dind_rules]
  parallel:
    matrix:
      - IMAGE: CI_MCORE_LTS_IMAGE
      - IMAGE: CI_MCORE_DEV_IMAGE
      - IMAGE: UTILITY_IMAGE
  stage: build
  tags:
    - arch/amd64
    - origin/jet-fleet
    - env/prod
    - purpose/builder-large
  services:
    - name: docker:24.0.5-dind
      variables:
        HEALTHCHECK_TCP_PORT: "2376"
  timeout: 180m
  variables:
    DOCKER_HOST: tcp://docker:2376
    DOCKER_TLS_CERTDIR: "/certs"
    DOCKER_TLS_VERIFY: 1
    DOCKER_CERT_PATH: "$DOCKER_TLS_CERTDIR/client"
    STAGE: jet
    MCORE_BACKWARDS_REF: core_r0.14.0
    KUBERNETES_SERVICE_MEMORY_REQUEST: 90Gi
    KUBERNETES_SERVICE_MEMORY_LIMIT: 90Gi
    SHARED_PATH: /builds/$CI_PROJECT_PATH/shared
  script:
    - apk add skopeo
    - |
      set -x

      env
      eval "IMAGE=\$$IMAGE"

      docker manifest create ${IMAGE}:${CI_PIPELINE_ID} \
        ${IMAGE}:${CI_PIPELINE_ID}-amd64 \
        ${IMAGE}:${CI_PIPELINE_ID}-arm64

      docker manifest push ${IMAGE}:${CI_PIPELINE_ID}

      if [[ "$CI_COMMIT_BRANCH" == "ci-rebuild-mcore-nemo-image" || "$CI_COMMIT_BRANCH" == "main" || "$CI_COMMIT_BRANCH" == "dev" ]]; then
        skopeo copy --all docker://${IMAGE}:${CI_PIPELINE_ID} docker://${IMAGE}:${CI_COMMIT_BRANCH}
      fi

    - echo "MCORE_MR_COMMIT=$CI_COMMIT_SHA" | tee -a build.env
    - echo "MCORE_BACKWARDS_COMMIT=$MCORE_BACKWARDS_COMMIT" | tee -a build.env
    - cat build.env
  retry:
    max: 2
  artifacts:
    reports:
      dotenv: build.env


================================================
FILE: .gitlab/stages/02.test.yml
================================================
.test_rules:
  rules:
    - if: $PUBLISH == "yes"
      when: never
    - if: $BUILD == "no"
      when: never
    - when: on_success
  stage: test

include:
  - template: Security/Secret-Detection.gitlab-ci.yml

wait_for_resources:
  extends: [.test_rules]
  needs:
    - job: test:linting_secret_detection
      optional: true
    - test:build_image
  image: python:3.10
  timeout: 7 days
  variables:
    KUBERNETES_SERVICE_MEMORY_REQUEST: 32Gi
    KUBERNETES_SERVICE_MEMORY_LIMIT: 32Gi
    KUBERNETES_SERVICE_CPU_REQUEST: 8
    KUBERNETES_SERVICE_CPU_LIMIT: 12
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  script:
    - env
    - pip install --no-cache-dir python-gitlab click
    - export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
    - export GITLAB_ENDPOINT
    - export NUM_CONCURRENT_JOBS
    - python tests/test_utils/python_scripts/wait_for_resources.py --pipeline-id $CI_PIPELINE_ID --target-branch $CI_MERGE_REQUEST_TARGET_BRANCH_NAME
  rules:
    - if: $CI_MERGE_REQUEST_LABELS =~ /fast-track/
      when: never
    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
      when: on_success
    - when: never

test:unit_tests_configure:
  extends: [.test_rules]
  needs:
    - test:build_image
    - job: wait_for_resources
      optional: true
  image: ${UTILITY_IMAGE}:${CI_PIPELINE_ID}
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  before_script:
    - git rm -r tests/test_utils/local_recipes || true
    - git submodule add --force https://gitlab-ci-token:${CI_JOB_TOKEN}@${GITLAB_ENDPOINT}/ADLR/megatron-lm-convergence-tests.git tests/test_utils/local_recipes
    - ls tests/test_utils/local_recipes
  script:
    - env
    - set -x
    - |
      A100_CLUSTER=$([[ "$CLUSTER_A100" != "" ]] && echo $CLUSTER_A100 || echo $DEFAULT_A100_CLUSTER)
      H100_CLUSTER=$([[ "$CLUSTER_H100" != "" ]] && echo $CLUSTER_H100 || echo $DEFAULT_H100_CLUSTER)
    - |
      ARGS=(
        "--scope unit-tests"
        "--n-repeat ${UNIT_TEST_REPEAT}"
        "--time-limit $(( UNIT_TEST_TIMEOUT * 60 ))"
        "--test-cases all"
        "--cluster $H100_CLUSTER"
        "--platform dgx_h100"
        "--partition batch"
        "--container-image ${UTILITY_IMAGE}"
        "--container-tag ${CI_PIPELINE_ID}"
        "--dependent-job test:unit_tests_configure"
        "--slurm-account ${CI_SLURM_ACCOUNT}"
        "--no-enable-warmup"
      )
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment "lts" \
        --tag "legacy" \
        --output-path "unit-test-job-lts-legacy.yaml"
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment "lts" \
        --tag "latest" \
        --output-path "unit-test-job-lts-latest.yaml"
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment "dev" \
        --tag "legacy" \
        --output-path "unit-test-job-dev-legacy.yaml"
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment "dev" \
        --tag "latest" \
        --output-path "unit-test-job-dev-latest.yaml"
  rules:
    - if: $UNIT_TEST == 'yes' && $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_PROTECTED != "true"
      allow_failure: true
      when: on_success
    - if: $UNIT_TEST == 'yes' && $UNIT_TEST_REPEAT != '0'
      when: on_success
  artifacts:
    paths:
      - unit-test-job-dev-legacy.yaml
      - unit-test-job-dev-latest.yaml
      - unit-test-job-lts-legacy.yaml
      - unit-test-job-lts-latest.yaml
      - tests/test_utils/local_recipes

.unit_tests_run:
  needs:
    - job: test:linting_formatting
      optional: true
    - job: test:linting_copyright
      optional: true
    - job: test:linting_secret_detection
      optional: true
    - test:unit_tests_configure
    - test:build_image
  extends: [.test_rules]
  trigger:
    include:
      - artifact: unit-test-job-$ENVIRONMENT-$TAG.yaml
        job: test:unit_tests_configure
    strategy: depend
  variables:
    RO_API_TOKEN: $PAT
    CONTAINER_TAG: $CI_PIPELINE_ID
    CI_MCORE_LTS_IMAGE: $CI_MCORE_LTS_IMAGE
    GITLAB_ENDPOINT: $GITLAB_ENDPOINT
    PARENT_PIPELINE_ID: $CI_PIPELINE_ID
    MCORE_MR_COMMIT: $MCORE_MR_COMMIT
    MCORE_BACKWARDS_COMMIT: $MCORE_BACKWARDS_COMMIT

  inherit:
    variables: true
  rules:
    - if: $UNIT_TEST == 'yes' && $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_PROTECTED != "true"
      allow_failure: true
      when: on_success
    - if: $UNIT_TEST == 'yes' && $UNIT_TEST_REPEAT != '0'
      when: on_success

test:unit_tests_pyt(DEV)_mcore(latest):
  extends: [.unit_tests_run]
  variables:
    ENVIRONMENT: dev
    TAG: latest

test:unit_tests_pyt(LTS)_mcore(latest):
  extends: [.unit_tests_run]
  variables:
    ENVIRONMENT: lts
    TAG: latest

test:unit_tests_notify:
  extends: [.test_rules]
  image: ${UTILITY_IMAGE}:${CI_PIPELINE_ID}
  needs:
    - test:unit_tests_pyt(DEV)_mcore(latest)
    - test:unit_tests_pyt(LTS)_mcore(latest)
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  script:
    - env
    - |
      if [[ "$CI_COMMIT_BRANCH" == *dev* ]]; then
        export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK_DEV}
      else
        export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK}
      fi
    - export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
    - export GITLAB_ENDPOINT
    - export TAG_TEAM=$([[ "$CI_COMMIT_BRANCH" == "main" ]] && echo "1" || "0")
    - export TEAM_SLUG=$SLACK_ADMIN
    - |
      python tests/test_utils/python_scripts/notify.py \
        --pipeline-id "${CI_PIPELINE_ID}" \
        --check-for unit-tests \
        --pipeline-context "unit-tests-extended" \
        --pipeline-created-at "${CI_PIPELINE_CREATED_AT}"
  artifacts:
    when: always
    paths:
      - scripts
  rules:
    - if: $CI_PIPELINE_SOURCE == "schedule" && ($CI_COMMIT_BRANCH == "ci-unit-test-extended" || $CI_COMMIT_BRANCH ==  "ci-dev-unit-test-extended")
      when: always
    - when: never

# Override from template
secret_detection:
  rules:
    - when: never

# Inherit and modify template
test:linting_secret_detection:
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  extends: [".secret-analyzer"]
  needs: [test:build_image]
  variables:
    GIT_DEPTH: 0
    SECRET_DETECTION_LOG_OPTIONS: ${CI_MERGE_REQUEST_DIFF_BASE_SHA}..${CI_COMMIT_SHA}
  allow_failure: false
  rules:
    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
    - when: never
  script:
    - apk add jq
    - /analyzer run
    - |
      if [[ $(cat gl-secret-detection-report.json | jq '.vulnerabilities | length > 0') == true ]]; then
        echo "Atleast one vulnerability has been found"
        cat gl-secret-detection-report.json | jq '.'
        exit 1
      fi

test:unit_tests_x_coverage_report:
  extends: [.test_rules]
  needs:
    - job: test:unit_tests_pyt(DEV)_mcore(latest)
    - job: test:unit_tests_pyt(LTS)_mcore(latest)
  image: ${UTILITY_IMAGE}:${CI_PIPELINE_ID}
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  script:
    - env
    - export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
    - export GITLAB_ENDPOINT
    - python tests/test_utils/python_scripts/download_coverage_results.py --pipeline-id ${CI_PIPELINE_ID}
    - coverage combine --keep $(ls coverage_results/*/coverage_report)
    - coverage report
    - coverage xml
  coverage: "/TOTAL.+ ([0-9]{1,3}%)/"
  artifacts:
    reports:
      coverage_report:
        coverage_format: cobertura
        path: coverage.xml
  rules:
    - if: $UNIT_TEST == 'yes' && $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_PROTECTED != "true"
      allow_failure: true
      when: on_success
    - if: $UNIT_TEST == 'yes' && $UNIT_TEST_REPEAT != '0'
      when: on_success

test:safe_imports:
  extends: [.test_rules]
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/builder-large
    - team/megatron
  services:
    - name: docker:24.0.5-dind
      variables:
        HEALTHCHECK_TCP_PORT: "2376"
  variables:
    KUBERNETES_SERVICE_MEMORY_REQUEST: 32Gi
    KUBERNETES_SERVICE_MEMORY_LIMIT: 32Gi
    KUBERNETES_SERVICE_CPU_REQUEST: 8
    KUBERNETES_SERVICE_CPU_LIMIT: 12
  image:
    name: python:3.11
    entrypoint: [""]
  needs: [test:build_image]
  script:
    - env
    - python -m ensurepip --upgrade
    - python -m pip install --no-cache-dir -e .
    - python -m pip install --no-cache-dir click
    - python .gitlab/scripts/check_imports.py --package-name megatron.core
  rules:
    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME != 'main' && $CI_MERGE_REQUEST_TARGET_BRANCH_NAME != 'dev'
      when: never
    - if: $UNIT_TEST == 'yes' && $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_TARGET_BRANCH_PROTECTED != "true"
      allow_failure: true
      when: on_success
    - if: $UNIT_TEST == 'yes' && $UNIT_TEST_REPEAT != '0'
      when: on_success
  retry:
    max: 2


================================================
FILE: .gitlab/stages/03.integration-tests.yml
================================================
.integration_tests_rules:
  stage: integration_tests
  rules:
    - if: $BUILD == "no"
      when: never
    - if: $INTEGRATION_TEST == "yes"
      when: on_success
    - when: never

default:
  id_tokens:
    VAULT_JWT_TOKEN:
      aud: https://stg.vault.nvidia.com

include:
  - project: dl/jet/gitlab-templates
    ref: main
    file: downstreams.yml

integration:configure:
  needs:
    - test:build_image
    - job: test:unit_tests_pyt(DEV)_mcore(latest)
      optional: true
    - job: test:unit_tests_pyt(LTS)_mcore(latest)
      optional: true
    - job: test:build_nemo_image
  extends: [.integration_tests_rules]
  image: ${UTILITY_IMAGE}:${CI_PIPELINE_ID}
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  before_script:
    - git rm -r tests/test_utils/local_recipes || true
    - git submodule add --force https://gitlab-ci-token:${CI_JOB_TOKEN}@${GITLAB_ENDPOINT}/ADLR/megatron-lm-convergence-tests.git tests/test_utils/local_recipes
    - ls tests/test_utils/local_recipes
  script:
    - set -x
    - |
      A100_CLUSTER=$([[ "$CLUSTER_A100" != "" ]] && echo $CLUSTER_A100 || echo $DEFAULT_A100_CLUSTER)
      H100_CLUSTER=$([[ "$CLUSTER_H100" != "" ]] && echo $CLUSTER_H100 || echo $DEFAULT_H100_CLUSTER)
      GB200_CLUSTER=$([[ "$CLUSTER_GB200" != "" ]] && echo $CLUSTER_GB200 || echo $DEFAULT_GB200_CLUSTER)
    - |
      ARGS=(
        "--scope $INTEGRATION_TEST_SCOPE"
        "--n-repeat 1"
        "--time-limit $INTEGRATION_TEST_TIME_LIMIT"
        "--test-cases $INTEGRATION_TEST_CASES"
        "--container-image ${UTILITY_IMAGE}"
        "--container-tag ${CI_PIPELINE_ID}"
        "--slurm-account ${CI_SLURM_ACCOUNT}"
        "--no-enable-warmup"
        "--dependent-job integration:configure"
        "--enable-lightweight-mode"
      )
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment dev \
        --platform dgx_a100 \
        --cluster $A100_CLUSTER \
        --output-path "functional-test-job-dev-A100.yaml"
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment dev \
        --platform dgx_h100 \
        --cluster $H100_CLUSTER \
        --output-path "functional-test-job-dev-H100.yaml"
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment lts \
        --platform dgx_a100 \
        --cluster $A100_CLUSTER \
        --output-path "functional-test-job-lts-A100.yaml"
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment lts \
        --platform dgx_h100 \
        --cluster $H100_CLUSTER \
        --output-path "functional-test-job-lts-H100.yaml"
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment lts \
        --platform dgx_gb2100 \
        --cluster $GB200_CLUSTER \
        --output-path "functional-test-job-lts-GB200.yaml"
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment lts \
        --platform dgx_gb200 \
        --cluster $GB200_CLUSTER \
        --output-path "functional-test-job-lts-GB200.yaml"
  artifacts:
    paths:
      - functional-test-job-lts-A100.yaml
      - functional-test-job-lts-H100.yaml
      - functional-test-job-dev-H100.yaml
      - functional-test-job-dev-A100.yaml
      - functional-test-job-lts-GB200.yaml
      - functional-test-job-dev-GB200.yaml
      - tests/test_utils/local_recipes

.integration_run:
  needs:
    - integration:configure
    - test:build_image
    - job: wait_for_resources
      optional: true
  extends: [.integration_tests_rules]
  trigger:
    include:
      - artifact: functional-test-job-$ENVIRONMENT-$CLUSTER.yaml
        job: integration:configure
    strategy: depend
  variables:
    RO_API_TOKEN: $PAT
    CONTAINER_TAG: $CI_PIPELINE_ID
    CI_MCORE_LTS_IMAGE: $CI_MCORE_LTS_IMAGE
    GITLAB_ENDPOINT: $GITLAB_ENDPOINT
    PARENT_PIPELINE_ID: $CI_PIPELINE_ID
    DASHBOARD_ENDPOINT: $DASHBOARD_ENDPOINT
    MCORE_MR_COMMIT: $MCORE_MR_COMMIT
    MCORE_BACKWARDS_COMMIT: $MCORE_BACKWARDS_COMMIT
  inherit:
    variables: true

integration:run_lts_dgx_a100:
  extends: [.integration_run]
  allow_failure: true
  variables:
    ENVIRONMENT: lts
    CLUSTER: A100

integration:run_lts_dgx_h100:
  extends: [.integration_run]
  allow_failure: true
  variables:
    ENVIRONMENT: lts
    CLUSTER: H100

integration:run_lts_dgx_gb200:
  extends: [.integration_run]
  allow_failure: true
  variables:
    ENVIRONMENT: lts
    CLUSTER: GB200

integration:run_dev_dgx_a100:
  extends: [.integration_run]
  variables:
    ENVIRONMENT: dev
    CLUSTER: A100

integration:run_dev_dgx_h100:
  extends: [.integration_run]
  variables:
    ENVIRONMENT: dev
    CLUSTER: H100

integration:run_dev_dgx_gb200:
  extends: [.integration_run]
  variables:
    ENVIRONMENT: dev
    CLUSTER: GB200


================================================
FILE: .gitlab/stages/04.functional-tests.yml
================================================
.functional_tests_rules:
  stage: functional_tests
  rules:
    - if: $BUILD == "no"
      when: never
    - if: $FUNCTIONAL_TEST == "yes"
      when: on_success
    - when: never
default:
  id_tokens:
    VAULT_JWT_TOKEN:
      aud: https://stg.vault.nvidia.com

include:
  - project: dl/jet/gitlab-templates
    ref: main
    file: downstreams.yml

functional:configure:
  needs:
    - test:build_image
    - test:build_nemo_image
    - job: test:unit_tests_pyt(DEV)_mcore(latest)
      optional: true
    - job: test:unit_tests_pyt(LTS)_mcore(latest)
      optional: true
    - job: integration:run_lts_dgx_a100
      optional: true
    - job: integration:run_dev_dgx_a100
      optional: true
    - job: integration:run_lts_dgx_h100
      optional: true
    - job: integration:run_dev_dgx_h100
      optional: true
  extends: [.functional_tests_rules]
  image: ${UTILITY_IMAGE}:${CI_PIPELINE_ID}
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  before_script:
    - git rm -r tests/test_utils/local_recipes || true
    - git submodule add --force https://gitlab-ci-token:${CI_JOB_TOKEN}@${GITLAB_ENDPOINT}/ADLR/megatron-lm-convergence-tests.git tests/test_utils/local_recipes
    - ls tests/test_utils/local_recipes
  script:
    - set -x
    - |
      A100_CLUSTER=$([[ "$CLUSTER_A100" != "" ]] && echo $CLUSTER_A100 || echo $DEFAULT_A100_CLUSTER)
      H100_CLUSTER=$([[ "$CLUSTER_H100" != "" ]] && echo $CLUSTER_H100 || echo $DEFAULT_H100_CLUSTER)
      GB200_CLUSTER=$([[ "$CLUSTER_GB200" != "" ]] && echo $CLUSTER_GB200 || echo $DEFAULT_GB200_CLUSTER)
    - |
      RECORD_CHECKPOINTS=$([[ "$CI_MERGE_REQUEST_LABELS" == *"Record checkpoints"* || "$FUNCTIONAL_TEST_RECORD_CHECKPOINTS" == "yes" ]] && echo "true" || echo "false")
    - |
      if [[ "$FUNCTIONAL_TEST_SCOPE" == "release" || "$FUNCTIONAL_TEST_SCOPE" == "weekly" ]]; then
        FUNCTIONAL_TEST_NAME=$(eval echo $FUNCTIONAL_TEST_NAME)
        RELEASE_ARGS=(
          "--run-name"
          $FUNCTIONAL_TEST_NAME
          "--wandb-experiment"
          $(echo $FUNCTIONAL_TEST_NAME | tr '/' '-')
        )
      else
        RELEASE_ARGS=()
      fi
    - |
      ARGS=(
        "--scope $FUNCTIONAL_TEST_SCOPE"
        "--n-repeat $FUNCTIONAL_TEST_REPEAT"
        "--time-limit $FUNCTIONAL_TEST_TIME_LIMIT"
        "--test-cases $FUNCTIONAL_TEST_CASES"
        "--container-image ${UTILITY_IMAGE}"
        "--container-tag ${CI_PIPELINE_ID}"
        "--dependent-job functional:configure"
        "--record-checkpoints ${RECORD_CHECKPOINTS}"
        "--slurm-account ${CI_SLURM_ACCOUNT}"
        "--no-enable-warmup"
      )
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment dev \
        --platform dgx_a100 \
        --cluster $A100_CLUSTER \
        --output-path "functional-test-job-dev-A100.yaml" \
        ${RELEASE_ARGS[@]}
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment dev \
        --platform dgx_h100 \
        --cluster $H100_CLUSTER \
        --output-path "functional-test-job-dev-H100.yaml" \
        ${RELEASE_ARGS[@]}
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment lts \
        --platform dgx_a100 \
        --cluster $A100_CLUSTER \
        --output-path "functional-test-job-lts-A100.yaml" \
        ${RELEASE_ARGS[@]}
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment lts \
        --platform dgx_h100 \
        --cluster $H100_CLUSTER \
        --output-path "functional-test-job-lts-H100.yaml" \
        ${RELEASE_ARGS[@]}
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment dev \
        --platform dgx_gb200 \
        --cluster $GB200_CLUSTER \
        --output-path "functional-test-job-dev-GB200.yaml" \
        ${RELEASE_ARGS[@]}
    - |
      export PYTHONPATH=$(pwd)
      python tests/test_utils/python_scripts/generate_jet_trigger_job.py \
        ${ARGS[@]} \
        --environment lts \
        --platform dgx_gb200 \
        --cluster $GB200_CLUSTER \
        --output-path "functional-test-job-lts-GB200.yaml" \
        ${RELEASE_ARGS[@]}
  artifacts:
    paths:
      - functional-test-job-lts-A100.yaml
      - functional-test-job-lts-H100.yaml
      - functional-test-job-dev-A100.yaml
      - functional-test-job-dev-H100.yaml
      - functional-test-job-lts-GB200.yaml
      - functional-test-job-dev-GB200.yaml
      - tests/test_utils/local_recipes

.functional_run:
  needs:
    - functional:configure
    - test:build_image
  extends: [.functional_tests_rules]
  trigger:
    include:
      - artifact: functional-test-job-$ENVIRONMENT-$CLUSTER.yaml
        job: functional:configure
    strategy: depend
  variables:
    RO_API_TOKEN: $PAT
    CONTAINER_TAG: $CI_PIPELINE_ID
    CI_MCORE_LTS_IMAGE: $CI_MCORE_LTS_IMAGE
    GITLAB_ENDPOINT: $GITLAB_ENDPOINT
    PARENT_PIPELINE_ID: $CI_PIPELINE_ID
    DASHBOARD_ENDPOINT: $DASHBOARD_ENDPOINT
    MCORE_MR_COMMIT: $MCORE_MR_COMMIT
    MCORE_BACKWARDS_COMMIT: $MCORE_BACKWARDS_COMMIT
    CLUSTER: $CLUSTER

  inherit:
    variables: true

functional:run_lts_dgx_a100:
  extends: [.functional_run]
  allow_failure: true
  variables:
    ENVIRONMENT: lts
    CLUSTER: A100

functional:run_lts_dgx_h100:
  extends: [.functional_run]
  allow_failure: true
  variables:
    ENVIRONMENT: lts
    CLUSTER: H100

functional:run_lts_dgx_gb200:
  extends: [.functional_run]
  allow_failure: true
  variables:
    ENVIRONMENT: lts
    CLUSTER: GB200

functional:run_dev_dgx_a100:
  extends: [.functional_run]
  variables:
    ENVIRONMENT: dev
    CLUSTER: A100

functional:run_dev_dgx_h100:
  extends: [.functional_run]
  variables:
    ENVIRONMENT: dev
    CLUSTER: H100

functional:run_dev_dgx_gb200:
  extends: [.functional_run]
  variables:
    ENVIRONMENT: dev
    CLUSTER: GB200

functional:run_nemo:
  extends: [.functional_tests_rules]
  trigger:
    project: "dl/joc/nemo-ci"
    branch: main-mirror
    strategy: depend
  inherit:
    variables: true
  variables:
    MCORE_COMMIT: $CI_COMMIT_SHA
    TEST_NEMO2_MODULE: "True"
    ALLOW_FAILURE_DEPENDENCY: "True"
    TESTS_TO_RUN_ON_THIS_COMMIT: nightly
  rules:
    - if: $FUNCTIONAL_TEST == "yes"
      when: manual
      allow_failure: true
    - when: never

functional:x_notify:
  extends: [.functional_tests_rules]
  image: ${UTILITY_IMAGE}:${CI_PIPELINE_ID}
  needs:
    - functional:run_lts_dgx_a100
    - functional:run_dev_dgx_a100
    - functional:run_lts_dgx_h100
    - functional:run_dev_dgx_h100
    - functional:run_lts_dgx_gb200
    - functional:run_dev_dgx_gb200
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  variables:
    RO_API_TOKEN: ${PROJECT_ACCESS_TOKEN_MCORE}
    CONTEXT: $FUNCTIONAL_TEST_SCOPE
  script:
    - env
    - |
      if [[ "$CI_COMMIT_BRANCH" == *dev* ]]; then
        export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK_DEV}
      else
        export WEBHOOK_URL=${MCORE_NOTIFICATION_HOOK}
      fi
    - export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
    - export GITLAB_ENDPOINT
    - export CONTEXT=$FUNCTIONAL_TEST_SCOPE
    - export TAG_TEAM=$([[ "$CI_COMMIT_BRANCH" == "main" ]] && echo "1" || "0")
    - export TEAM_SLUG=$SLACK_ADMIN
    - |
      python tests/test_utils/python_scripts/notify.py \
        --pipeline-id "${CI_PIPELINE_ID}" \
        --check-for functional-tests \
        --pipeline-context $CONTEXT \
        --pipeline-created-at "${CI_PIPELINE_CREATED_AT}"

  artifacts:
    when: always
    paths:
      - scripts
  rules:
    - if: ($CI_PIPELINE_SOURCE == "schedule" || $CI_COMMIT_BRANCH == "main") && $FUNCTIONAL_TEST == "yes"
      when: always
    - when: never

functional:x_download_golden_values:
  extends: [.functional_tests_rules]
  image: ${UTILITY_IMAGE}:${CI_PIPELINE_ID}
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  script:
    - env
    - export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
    - export GITLAB_ENDPOINT
    - python tests/test_utils/python_scripts/download_golden_values.py --pipeline-id ${CI_PIPELINE_ID}
  artifacts:
    paths:
      - tests/
  rules:
    - if: $FUNCTIONAL_TEST == "yes"
      when: manual
      allow_failure: true
    - when: never


================================================
FILE: .gitlab/stages/05.publish.yml
================================================
.publish_common_release:
  stage: publish
  rules:
    - if: $CI_PIPELINE_SOURCE == "web" && $PUBLISH == "yes" && $PUBLISH_SCOPE == "release"
      when: manual
    - if: $PUBLISH == "yes" && $PUBLISH_SCOPE == "release"
      when: on_success
    - when: never

publish:docs:
  extends: [.publish_common_release]
  image: ${UTILITY_IMAGE}:${CI_PIPELINE_ID}
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  before_script:
    - eval PUBLISH_COMMIT=$PUBLISH_COMMIT
    - git fetch origin '+refs/merge-requests/*:refs/remotes/merge-requests/*'
    - git fetch origin $PUBLISH_COMMIT
    - git checkout $PUBLISH_COMMIT
  script:
    - cd ..
    - rm -rf documentation && git clone --recursive https://gitlab-ci-token:${PAT}@${GITLAB_ENDPOINT}/nemo-megatron-core-tme/documentation.git
    - cd documentation/megatron-lm
    - git config --global user.email "mcore-bot@nvidia.com"
    - git config --global user.name "Mcore Bot"
    - git fetch origin '+refs/merge-requests/*:refs/remotes/merge-requests/*'
    - git fetch origin $PUBLISH_COMMIT
    - git checkout $PUBLISH_COMMIT
    - cd ..
    - git add megatron-lm
    - |
      git commit -m 'feat: Bump mcore'

    - git push
  rules:
    - if: '$CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "push"'
      allow_failure: true
    - when: never

publish:upload_statistics:
  stage: publish
  image: ${UTILITY_IMAGE}:${CI_PIPELINE_ID}
  needs:
    - job: test:unit_tests_pyt(DEV)_mcore(latest)
    - job: test:unit_tests_pyt(LTS)_mcore(latest)
    - job: functional:run_lts_dgx_a100
      optional: true
    - job: functional:run_lts_dgx_h100
      optional: true
    - job: functional:run_dev_dgx_a100
      optional: true
    - job: functional:run_dev_dgx_h100
      optional: true
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  script:
    - env
    - export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
    - export GITLAB_ENDPOINT
    - export DASHBOARD_ENDPOINT
    - python tests/test_utils/python_scripts/dashboard.py --pipeline-id ${CI_PIPELINE_ID}
  rules:
    - if: ($CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' || $CI_MERGE_REQUEST_EVENT_TYPE == 'merge_train') && ($UNIT_TEST == "yes" || $INTEGRATION_TEST == "yes" || $FUNCTIONAL_TEST == "yes")
      when: always
      allow_failure: true
    - when: never

publish:merge_into_dev:
  stage: publish
  image: ${CI_MCORE_DEV_IMAGE}:${CI_PIPELINE_ID}
  script:
    - export GITLAB_ENDPOINT
    - export RO_API_TOKEN=${PAT}
    - |
      git config --global user.email "mcore-bot@nvidia.com"
      git config --global user.name "Mcore Bot"
    - SOURCE_BRANCH=ci/merge-into-dev
    - |
      set -x
      set +e

      SOURCE_BRANCH_EXISTS=$([[ "$(git ls-remote --heads origin refs/heads/$SOURCE_BRANCH)" != "" ]] && echo true || echo false)

      if [[ "$SOURCE_BRANCH_EXISTS" == "false" ]]; then
        git fetch origin dev
        git checkout -b $SOURCE_BRANCH origin/dev
      else
        git fetch origin $SOURCE_BRANCH
        git checkout origin/$SOURCE_BRANCH
      fi

      git fetch origin main
      git merge origin/main
      CLEAN=$?
      set -e
    - |
      if [[ "$CLEAN" -ne 0 ]]; then
        echo "Merge failed"
        URL="https://${GITLAB_ENDPOINT}/${CI_PROJECT_PATH}/-/commit/${CI_COMMIT_SHA}"
        SHORT_SHA=$(git rev-parse --short HEAD)
        MESSAGE='{
          "blocks": [
            {
              "type": "section",
              "text": {
                "type": "mrkdwn",
                "text": "beep boop 🤖: Cherry-picking main (<'$URL'|'${SHORT_SHA}'>) into dev failed.\nPlease merge it manually into '$SOURCE_BRANCH'.\n\ncc '$SLACK_ADMIN_DEV'"
              }
            }
          ]
        }'

        curl -X POST -H "Content-type: application/json" --data "$MESSAGE" ${MCORE_NOTIFICATION_HOOK_DEV}

        exit 1
      fi
    - git push -u origin ci/merge-into-dev
    - |
      curl \
        --header "PRIVATE-TOKEN: $PROJECT_ACCESS_TOKEN_MCORE" \
        --url https://${GITLAB_ENDPOINT}/api/v4/projects/${CI_PROJECT_ID}/merge_requests \
        -d "source_branch=$SOURCE_BRANCH" \
        -d "target_branch=dev" \
        -d "title=chore: Merge into dev" \
        -d "labels=test::Run functional tests" \
        -d "merge_when_pipeline_succeeds=true" \
        -d "description=[🤖]: Hi @zijiey 👋,<br><br>merging \`$SOURCE_BRANCH\` into \`dev\` for you! 🚀<br><br>Please review and approve this cherry pick by your convenience\!"
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  rules:
    - if: $CI_COMMIT_BRANCH == "main" && $CI_PIPELINE_SOURCE == "push"
      allow_failure: true
    - when: never

publish:approve_merge_gate:
  stage: publish
  image: maniator/gh
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  script:
    - |
      set -eoux pipefail
      EXIT_CODE=0
      apk add python3
      python -m venv .venv
      source .venv/bin/activate
      pip install --no-cache-dir python-gitlab click pygithub
      export GITLAB_ENDPOINT
      export RO_API_TOKEN=${PROJECT_ACCESS_TOKEN_MCORE}
      if [[ "$CI_COMMIT_BRANCH" == *main* ]]; then
        export TARGET_BRANCH="main"
      elif [[ "$CI_COMMIT_BRANCH" == *dev* ]]; then
        export TARGET_BRANCH="dev"
      fi

      python tests/test_utils/python_scripts/check_status_of_main.py --target-branch "$TARGET_BRANCH" --once || EXIT_CODE=$?

      export GH_TOKEN=$GH_TOKEN
      export REPO=NVIDIA/Megatron-LM

      if [[ $EXIT_CODE -eq 0 ]]; then
        export STATUS="approved"
        export COMMENT="Main is healthy. Submitting PR."
      elif [[ $EXIT_CODE -eq 1 ]]; then
        export STATUS="rejected"
        export COMMENT="$TARGET_BRANCH is not healthy. An automation engineer is investigating. No need to take any action."
      elif [[ $EXIT_CODE -eq 2 ]]; then
        echo "Main is running. We won't cancel the deployment."
        exit 0
      fi

      if [[ $EXIT_CODE -lt 2 ]]; then
        python tests/test_utils/python_scripts/approve_merge_gate.py
      fi

  retry:
    max: 2
  rules:
    - if: $CI_PIPELINE_SOURCE == "schedule" && ($CI_COMMIT_BRANCH == 'ci-approve-dev' || $CI_COMMIT_BRANCH == 'ci-approve-main')
      when: always
    - when: never

publish:sync_branches:
  stage: publish
  image: python:3.10
  script:
    - set -x
    - git remote add github https://github.com/NVIDIA/Megatron-LM.git || true
    - git remote add gitlab https://gitlab-ci-token:${PROJECT_ACCESS_TOKEN_MCORE}@${GITLAB_ENDPOINT}/${CI_PROJECT_NAMESPACE}/Megatron-LM.git || true
    - BRANCHES=("main" "dev")
    - |
      while IFS= read -r line; do
        BRANCHES+=("$line") # Add each line to the array
      done < <( \
        git ls-remote --heads "https://token:${PAT}@github.com/NVIDIA/Megatron-LM.git" 'refs/heads/core_*' | \
        cut -d'/' -f3- \
      )
    - |
      for BRANCH in "${BRANCHES[@]}"; do
        # Define the full refspec for the branch
        BRANCH_REF="refs/heads/$BRANCH"
        
        echo "--- Processing branch: $BRANCH ---"
        
        # 1. Explicitly fetch the branch ref from 'github'
        # This avoids fetching a tag with the same name.
        # It updates/creates the remote-tracking branch (e.g., 'refs/remotes/github/core_r0.10.0')
        if ! git fetch github "$BRANCH_REF:refs/remotes/github/$BRANCH"; then
            echo "Failed to fetch branch $BRANCH. Skipping."
            continue
        fi

        # 2. Create or update the local branch from the remote-tracking branch we just fetched.
        # The -B flag creates the branch if it doesn't exist or resets it if it does.
        if ! git checkout -B "$BRANCH" "github/$BRANCH"; then
            echo "Failed to checkout local branch $BRANCH. Skipping."
            continue
        fi
        
        # 3. Now you are on the correct local branch, ready to push.
        echo "Successfully on branch $BRANCH. Echoing push command:"
        git push -u gitlab HEAD:refs/heads/$BRANCH --force
        echo "-----------------------------------"
      done
  tags:
    - arch/amd64
    - env/prod
    - origin/jet-fleet
    - owner/jet-core
    - purpose/utility
    - team/megatron
  retry:
    max: 2
  rules:
    - if: $CI_PIPELINE_SOURCE == "schedule" && ($CI_COMMIT_BRANCH == 'ci-sync-branches')
      when: always
    - when: never


================================================
FILE: .gitlab-ci.yml
================================================
.merge_train_rule: &merge_train_rule
  UNIT_TEST: "yes"
  UNIT_TEST_REPEAT: 1
  UNIT_TEST_TIMEOUT: 30
  INTEGRATION_TEST: "no"
  INTEGRATION_TEST_SCOPE: mr
  FUNCTIONAL_TEST: "yes"
  FUNCTIONAL_TEST_SCOPE: mr-slim
  FUNCTIONAL_TEST_REPEAT: 1
  FUNCTIONAL_TEST_TIME_LIMIT: 2700
  CLUSTER_A100: ""
  CLUSTER_H100: ""
  PUBLISH: "no"

workflow:
  rules:
    # Do not trigger for forks
    - if: $CI_PROJECT_NAMESPACE != "ADLR" || ($CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_PROJECT_PATH != "ADLR/megatron-lm")
      when: never

    - if: $CI_PIPELINE_SOURCE == "schedule" && ($CI_COMMIT_BRANCH == 'ci-approve-dev' || $CI_COMMIT_BRANCH == 'ci-approve-main')

    # ci-branches only for schedule
    - if: $CI_COMMIT_BRANCH =~ /ci-/ && $CI_PIPELINE_SOURCE != "schedule"
      when: never

    # For schedules pipelines
    - if: $CI_PIPELINE_SOURCE == "schedule"
      auto_cancel:
        on_new_commit: none

    # For manual pipelines (GitLab UI)
    - if: $CI_PIPELINE_SOURCE == "web"

    # For pipelines created via the REST API (personal access token)
    - if: $CI_PIPELINE_SOURCE == "api"

    # For trigger pipelines
    - if: $CI_PIPELINE_SOURCE == "trigger"

    # For push to main
    - if: $CI_PIPELINE_SOURCE == 'push' && ($CI_COMMIT_BRANCH == "main" || $CI_COMMIT_BRANCH == "dev" || $CI_COMMIT_BRANCH =~ /^core_/)
      variables:
        UNIT_TEST: "no"
        INTEGRATION_TEST: "no"
        FUNCTIONAL_TEST: "yes"
        FUNCTIONAL_TEST_SCOPE: mr
        FUNCTIONAL_TEST_REPEAT: 5
        FUNCTIONAL_TEST_RECORD_CHECKPOINTS: "no"
        FUNCTIONAL_TEST_TIME_LIMIT: 3600
        CLUSTER_A100: ""
        CLUSTER_H100: ""
        PUBLISH: "no"
      auto_cancel:
        on_new_commit: interruptible

    # For merge-trains that need to be fast-tracked
    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merge_train' && $CI_MERGE_REQUEST_LABELS =~ /fast-track/
      variables:
        UNIT_TEST: "yes"
        UNIT_TEST_REPEAT: 1
        UNIT_TEST_TIMEOUT: 30
        INTEGRATION_TEST: "no"
        FUNCTIONAL_TEST: "no"
        CLUSTER_A100: ""
        CLUSTER_H100: ""
        PUBLISH: "no"

    # For normal merge-trains
    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merge_train'
      variables: *merge_train_rule

    # For MRs with integration suite
    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_LABELS =~ /Run tests/
      variables:
        UNIT_TEST: "yes"
        UNIT_TEST_REPEAT: 1
        UNIT_TEST_TIMEOUT: 30
        INTEGRATION_TEST: "yes"
        INTEGRATION_TEST_SCOPE: mr
        FUNCTIONAL_TEST: "no"
        FUNCTIONAL_TEST_SCOPE: mr-slim
        FUNCTIONAL_TEST_REPEAT: 1
        FUNCTIONAL_TEST_TIME_LIMIT: 2700
        CLUSTER_A100: ""
        CLUSTER_H100: ""
        PUBLISH: "no"

    # For MRs with nightly
    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_LABELS =~ /Run nightly/
      variables:
        UNIT_TEST: "yes"
        UNIT_TEST_REPEAT: 1
        UNIT_TEST_TIMEOUT: 30
        INTEGRATION_TEST: "no"
        FUNCTIONAL_TEST: "yes"
        FUNCTIONAL_TEST_SCOPE: nightly
        FUNCTIONAL_TEST_REPEAT: 5
        FUNCTIONAL_TEST_RECORD_CHECKPOINTS: "no"
        FUNCTIONAL_TEST_TIME_LIMIT: 2700
        CLUSTER_A100: ""
        CLUSTER_H100: ""
        PUBLISH: "no"

    # For MRs with weekly
    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_LABELS =~ /Run weekly/
      variables:
        UNIT_TEST: "yes"
        UNIT_TEST_REPEAT: 1
        UNIT_TEST_TIMEOUT: 30
        INTEGRATION_TEST: "no"
        FUNCTIONAL_TEST: "yes"
        FUNCTIONAL_TEST_SCOPE: weekly
        FUNCTIONAL_TEST_REPEAT: 1
        FUNCTIONAL_TEST_RECORD_CHECKPOINTS: "no"
        FUNCTIONAL_TEST_TIME_LIMIT: 9000
        CLUSTER_A100: ""
        CLUSTER_H100: ""
        PUBLISH: "no"

    # For MRs with heavy suite
    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result' && $CI_MERGE_REQUEST_LABELS =~ /Run functional tests/
      variables:
        UNIT_TEST: "yes"
        UNIT_TEST_REPEAT: 1
        UNIT_TEST_TIMEOUT: 30
        INTEGRATION_TEST: "no"
        FUNCTIONAL_TEST: "yes"
        FUNCTIONAL_TEST_SCOPE: mr
        FUNCTIONAL_TEST_REPEAT: 1
        FUNCTIONAL_TEST_TIME_LIMIT: 2700
        CLUSTER_A100: ""
        CLUSTER_H100: ""
        PUBLISH: "no"

    # Default MRs
    - if: $CI_MERGE_REQUEST_EVENT_TYPE == 'merged_result'
      variables:
        UNIT_TEST: "yes"
        UNIT_TEST_REPEAT: 1
        UNIT_TEST_TIMEOUT: 30
        INTEGRATION_TEST: "no"
        FUNCTIONAL_TEST: "no"
        PUBLISH: "no"

    - when: never

  auto_cancel:
    on_new_commit: interruptible

stages:
  - build
  - test
  - integration_tests
  - functional_tests
  - publish

default:
  interruptible: true
  retry:
    max: 2
    when: runner_system_failure

variables:
  BUILD:
    value: "yes"
  UNIT_TEST:
    value: "yes"
    options:
      - "yes"
      - "no"
    description: To run the funtional test suite
  UNIT_TEST_REPEAT:
    value: "1"
    description: "Number of repetitions"
  UNIT_TEST_TIMEOUT:
    value: "30"
    description: Timeout (minutes) for Unit tests (all repeats)
  INTEGRATION_TEST:
    value: "yes"
    options:
      - "yes"
      - "no"
    description: To run the integration test suite
  INTEGRATION_TEST_SCOPE:
    value: "mr"
    options:
      - "mr"
      - "nightly"
      - "weekly"
      - "pre-release"
      - "release"
    description: "Testsuite to run (only for INTEGRATION_TEST=yes)"
  INTEGRATION_TEST_TIME_LIMIT:
    value: "900"
    description: "Timeout in seconds per test"
  INTEGRATION_TEST_CASES:
    value: "all"
    description: "Comma-separated list of test_cases to run. Use 'all' to run the full suite."
  FUNCTIONAL_TEST:
    value: "yes"
    options:
      - "yes"
      - "no"
    description: To run the funtional test suite
  FUNCTIONAL_TEST_SCOPE:
    value: "mr"
    options:
      - "mr"
      - "nightly"
      - "weekly"
      - "pre-release"
      - "release"
    description: "Testsuite to run (only for FUNCTIONAL_TEST=yes)"
  FUNCTIONAL_TEST_REPEAT:
    value: "5"
    description: "Number of repetitions per test"
  FUNCTIONAL_TEST_TIME_LIMIT:
    value: "2700"
    description: "Timeout in seconds per test"
  FUNCTIONAL_TEST_CASES:
    value: "all"
    description: "Comma-separated list of test_cases to run. Use 'all' to run the full suite."
  FUNCTIONAL_TEST_NAME:
    description: "Name of functional test run (only for pre-release and release)"
    value: "$$CI_COMMIT_SHA"
  FUNCTIONAL_TEST_RECORD_CHECKPOINTS:
    value: "no"
    description: "Record golden checkpoints"
    options:
      - "yes"
      - "no"
  CLUSTER_A100:
    value: "dgxa100_dracooci"
    options:
      - "dgxa100_dracooci"
      - "dgxa100_dracooci-ord"
    description: "Cluster for A100 workloads"
  CLUSTER_H100:
    value: "dgxh100_coreweave"
    options:
      - "dgxh100_coreweave"
      - "dgxh100_eos"
    description: "Cluster for H100 workloads"
  CLUSTER_GB200:
    value: "dgxgb200_oci-hsg"
    options:
      - "dgxgb200_oci-hsg"
    description: "Cluster for H100 workloads"
  PUBLISH:
    value: "no"
    options:
      - "yes"
      - "no"
    description: Build and publish a wheel to PyPi
  PUBLISH_COMMIT:
    value: "$$CI_COMMIT_SHA"
    description: Which commit to publish
  PUBLISH_VERSION_BUMP_BRANCH:
    value: "$$CI_COMMIT_BRANCH"
    description: Which branch to target for version bump
  PUBLISH_SCOPE:
    value: "code-freeze"
    options:
      - "code-freeze"
      - "release"
      - "review-reminder"
      - "upgrade-dependencies"
    description: Type of publish (freeze or final release)

  # CI wide variables
  CI_MCORE_LTS_IMAGE: ${GITLAB_ENDPOINT}:5005/adlr/megatron-lm/mcore_ci_lts
  CI_MCORE_DEV_IMAGE: ${GITLAB_ENDPOINT}:5005/adlr/megatron-lm/mcore_ci_dev
  CI_NEMO_IMAGE: ${GITLAB_ENDPOINT}:5005/adlr/megatron-lm/nemo_ci
  UTILITY_IMAGE: ${GITLAB_ENDPOINT}:5005/adlr/megatron-lm/mcore_utility
  TE_GIT_REF: ""

include:
  - .gitlab/stages/00.pre.yml
  - .gitlab/stages/01.build.yml
  - .gitlab/stages/02.test.yml
  - .gitlab/stages/03.integration-tests.yml
  - .gitlab/stages/04.functional-tests.yml
  - .gitlab/stages/05.publish.yml


================================================
FILE: .pre-commit-config.yaml
================================================
repos:
- repo: https://github.com/psf/black
  rev: 'refs/tags/24.4.2:refs/tags/24.4.2'
  hooks:
  - id: black
    files: ^megatron/core/.*|^tests/unit_tests/.*
    args: ["--skip-magic-trailing-comma", "--skip-string-normalization"]
- repo: https://github.com/pycqa/pylint
  rev: v3.2.6
  hooks:
  - id: pylint
    files: ^megatron/core/.*
- repo: https://github.com/pycqa/isort
  rev: 5.13.2
  hooks:
  - id: isort
    files: ^megatron/core/.*

================================================
FILE: .pylintrc
================================================
[MAIN]
ignore-paths=tests
max-line-length=100
load-plugins=pylint.extensions.bad_builtin
[MESSAGES CONTROL]
disable=all

enable=C0115,C0116,W0611,C0301,E0606,W0141
# C0115: missing-class-docstring
# C0116: missing-function-docstring
# W0611: unused-import
# C0301: line-too-long
# E0606: possibly-used-before-assignment
# W0141: bad-builtin (from bad_builtin extension)

[BASIC]
bad-functions=print

[BAD_BUILTIN]
# Specify which builtins should be flagged
bad-builtins=print

================================================
FILE: .python-version
================================================
3.12

================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to Megatron

Visit our [contributing page](https://docs.nvidia.com/megatron-core/developer-guide/latest/developer/contribute.html).

================================================
FILE: LICENSE
================================================
The following applies to all files unless otherwise noted:

# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#  * Neither the name of NVIDIA CORPORATION nor the names of its
#    contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

--

This repository also contains code from Hugging Face Inc., Google Research,
Facebook (from their Fairseq, Dino, and ParlAI projects), Microsoft (from their
Swin-Transformer project), Philip Popien, the Mamba project (Tri Dao and
Albert Gu), and the Triton language and compiler project (Philippe Tillet and
OpenAI). Files from these organizations have notices at the top of each file.
Below are licenses used in those files, as indicated.


--------------------------------------------------------------------------------------
-- LICENSE FOR Facebook, huggingface, Google Research, LLaVA, Mamba, TinyZero and vLLM code  --


                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.

--------------------------------------------------------------------------------
LICENSE FOR
Facebook, Inc. and its affiliates,
Meta Platforms, Inc. and its affiliates,
Microsoft Corporation,
OpenGVLab/InternVL,
Triton language and compiler,
and DeepSeek.

MIT License

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

--------------------------------------------------------------------------------
LICENSE FOR Thinking Machines Lab 

MIT License

Copyright 2025 Thinking Machines Lab 

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

--------------------------------------------------------------------------------
LICENSE FOR
Meta Platforms, Inc. and affiliates.

BSD License

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

 * Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.

 * Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.

 * Neither the name Meta nor the names of its contributors may be used to
   endorse or promote products derived from this software without specific
   prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: MANIFEST.in
================================================
include megatron/core/requirements.txt
include megatron/core/README.md
include megatron/core/package_info.py
global-exclude LICENSE
recursive-include requirements *


================================================
FILE: README.md
================================================
<div align="center">

Megatron-LM and Megatron Core
=============================

<h4>GPU-optimized library for training transformer models at scale</h4>

[![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://docs.nvidia.com/megatron-core/developer-guide/latest/index.html)
[![version](https://img.shields.io/badge/release-0.15.0-green)](./CHANGELOG.md)
[![license](https://img.shields.io/badge/license-Apache-blue)](./LICENSE)

<div align="left">

## About

This repository contains two components: **Megatron-LM** and **Megatron Core**.

**Megatron-LM** is a reference example that includes Megatron Core plus pre-configured training scripts. Best for research teams, learning distributed training, and quick experimentation.

**Megatron Core** is a composable library with GPU-optimized building blocks for custom training frameworks. It provides transformer building blocks, advanced parallelism strategies (TP, PP, DP, EP, CP), mixed precision support (FP16, BF16, FP8, FP4), and model architectures. Best for framework developers and ML engineers building custom training pipelines.

**[Megatron Bridge](https://github.com/NVIDIA-NeMo/Megatron-Bridge)** provides bidirectional Hugging Face ↔ Megatron checkpoint conversion with production-ready recipes.

## Getting Started

**Install from PyPI:**

```bash
uv pip install megatron-core
```

**Or clone and install from source:**

```bash
git clone https://github.com/NVIDIA/Megatron-LM.git
cd Megatron-LM
uv pip install -e .
```

> **Note:** Building from source can use a lot of memory. If the build runs out of memory, limit parallel compilation jobs by setting `MAX_JOBS` (e.g. `MAX_JOBS=4 uv pip install -e .`).

For NGC container setup and all installation options, see the **[Installation Guide](https://docs.nvidia.com/megatron-core/developer-guide/latest/get-started/install.html)**.

- **[Your First Training Run](https://docs.nvidia.com/megatron-core/developer-guide/latest/get-started/quickstart.html)** - End-to-end training examples with data preparation
- **[Parallelism Strategies](https://docs.nvidia.com/megatron-core/developer-guide/latest/user-guide/parallelism-guide.html)** - Scale training across GPUs with TP, PP, DP, EP, and CP
- **[Contribution Guide](https://docs.nvidia.com/megatron-core/developer-guide/latest/developer/contribute.html)** - How to contribute to Megatron Core

# Latest News

- **[2026/03]** **Deprecating Python 3.10 support:** We're officially dropping Python 3.10 support with the upcoming 0.17.0 release. Downstream applications must raise their lower boundary to 3.12 to stay compatible with MCore.
- **[2026/01]** **[Dynamic Context Parallelism](https://developer.nvidia.com/blog/speeding-up-variable-length-training-with-dynamic-context-parallelism-and-nvidia-megatron-core/)** - Up to 1.48x speedup for variable-length sequence training with adaptive CP sizing.
- **[2025/12]** **Megatron Core development has moved to GitHub!** All development and CI now happens in the open. We welcome community contributions.
- **[2025/10]** **[Megatron Dev Branch](https://github.com/NVIDIA/Megatron-LM/tree/dev)** - early access branch with experimental features.
- **[2025/10]** **[Megatron Bridge](https://github.com/NVIDIA-NeMo/Megatron-Bridge)** - Bidirectional converter for interoperability between Hugging Face and Megatron checkpoints, featuring production-ready recipes for popular models.
- **[2025/08]** **[MoE Q3-Q4 2025 Roadmap](https://github.com/NVIDIA/Megatron-LM/issues/1729)** - Comprehensive roadmap for MoE features including DeepSeek-V3, Qwen3, advanced parallelism strategies, FP8 optimizations, and Blackwell performance enhancements.
- **[2025/08]** **[GPT-OSS Model](https://github.com/NVIDIA/Megatron-LM/issues/1739)** - Advanced features including YaRN RoPE scaling, attention sinks, and custom activation functions are being integrated into Megatron Core.
- **[2025/06]** **[Megatron MoE Model Zoo](https://github.com/yanring/Megatron-MoE-ModelZoo)** - Best practices and optimized configurations for training DeepSeek-V3, Mixtral, and Qwen3 MoE models with performance benchmarking and checkpoint conversion tools.
- **[2025/05]** Megatron Core v0.11.0 brings new capabilities for multi-data center LLM training ([blog](https://developer.nvidia.com/blog/turbocharge-llm-training-across-long-haul-data-center-networks-with-nvidia-nemo-framework/)).

<details>
<summary>Previous News</summary>

- **[2024/07]** Megatron Core v0.7 improves scalability and training resiliency and adds support for multimodal training ([blog](https://developer.nvidia.com/blog/train-generative-ai-models-more-efficiently-with-new-nvidia-Megatron-Core-functionalities/)).
- **[2024/06]** Megatron Core added supports for Mamba-based models. Check out our paper [An Empirical Study of Mamba-based Language Models](https://arxiv.org/pdf/2406.07887) and [code example](https://github.com/NVIDIA/Megatron-LM/tree/ssm/examples/mamba).
- **[2024/01 Announcement]** NVIDIA has released the core capabilities in **Megatron-LM** into [**Megatron Core**](https://github.com/NVIDIA/Megatron-LM/tree/main/megatron/core) in this repository. Megatron Core expands upon Megatron-LM's GPU-optimized techniques with more cutting-edge innovations on system-level optimizations, featuring composable and modular APIs.

</details>

# Project Structure

```
Megatron-LM/
├── megatron/
│   ├── core/                    # Megatron Core (kernels, parallelism, building blocks)
│   │   ├── models/              # Transformer models
│   │   ├── transformer/         # Transformer building blocks
│   │   ├── tensor_parallel/     # Tensor parallelism
│   │   ├── pipeline_parallel/   # Pipeline parallelism
│   │   ├── distributed/         # Distributed training (FSDP, DDP)
│   │   ├── optimizer/           # Optimizers
│   │   ├── datasets/            # Dataset loaders
│   │   ├── inference/           # Inference engines and server
│   │   └── export/              # Model export (e.g. TensorRT-LLM)
│   ├── training/                # Training scripts
│   ├── legacy/                  # Legacy components
│   ├── post_training/           # Post-training (quantization, distillation, pruning, etc.)
│   └── rl/                      # Reinforcement learning (RLHF, etc.)
├── examples/                    # Ready-to-use training examples
├── tools/                       # Utility tools
├── tests/                       # Comprehensive test suite
└── docs/                        # Documentation
```

# Performance Benchmarking

For our latest performance benchmarking results, please refer to [NVIDIA Megatron Bridge Performance Summary](https://docs.nvidia.com/nemo/megatron-bridge/latest/performance-summary.html).

Our codebase efficiently trains models from 2B to 462B parameters across thousands of GPUs, achieving up to **47% Model FLOP Utilization (MFU)** on H100 clusters.

![Model table](images/model_table.png)

**Benchmark Configuration:**

- **Vocabulary size**: 131,072 tokens
- **Sequence length**: 4096 tokens
- **Model scaling**: Varied hidden size, attention heads, and layers to achieve target parameter counts
- **Communication optimizations**: Fine-grained overlapping with DP (`--overlap-grad-reduce`, `--overlap-param-gather`), TP (`--tp-comm-overlap`), and PP (enabled by default)

**Key Results:**

- **6144 H100 GPUs**: Successfully benchmarked 462B parameter model training
- **Superlinear scaling**: MFU increases from 41% to 47-48% with model size
- **End-to-end measurement**: Throughputs include all operations (data loading, optimizer steps, communication, logging)
- **Production ready**: Full training pipeline with checkpointing and fault tolerance
- *Note: Performance results measured without training to convergence*

## Weak Scaling Results

Our weak scaled results show superlinear scaling (MFU increases from 41% for the smallest model considered to 47-48% for the largest models); this is because larger GEMMs have higher arithmetic intensity and are consequently more efficient to execute.

![Weak scaling](images/weak_scaling.png)

## Strong Scaling Results

We also strong scaled the standard GPT-3 model (our version has slightly more than 175 billion parameters due to larger vocabulary size) from 96 H100 GPUs to 4608 GPUs, using the same batch size of 1152 sequences throughout. Communication becomes more exposed at larger scale, leading to a reduction in MFU from 47% to 42%.

![Strong scaling](images/strong_scaling.png)

# Roadmaps

- **[MoE Roadmap](https://github.com/NVIDIA/Megatron-LM/issues/1729)** - DeepSeek-V3, Qwen3, advanced parallelism, FP8 optimizations, and Blackwell enhancements

# Resources

## Getting Help

- 📖 **[Documentation](https://docs.nvidia.com/megatron-core/developer-guide/latest/index.html)** - Official documentation
- 🐛 **[Issues](https://github.com/NVIDIA/Megatron-LM/issues)** - Bug reports and feature requests

## Contributing

We ❤️ contributions! Ways to contribute:

- 🐛 **Report bugs** - Help us improve reliability
- 💡 **Suggest features** - Shape the future of Megatron Core
- 📝 **Improve docs** - Make Megatron Core more accessible
- 🔧 **Submit PRs** - Contribute code improvements

**→ [Contributing Guide](https://docs.nvidia.com/megatron-core/developer-guide/latest/developer/contribute.html)**

## Citation

If you use Megatron in your research or project, we appreciate that you use the following citations:

```bibtex
@article{megatron-lm,
  title={Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism},
  author={Shoeybi, Mohammad and Patwary, Mostofa and Puri, Raul and LeGresley, Patrick and Casper, Jared and Catanzaro, Bryan},
  journal={arXiv preprint arXiv:1909.08053},
  year={2019}
}
```


================================================
FILE: codecov.yml
================================================
comment: false
coverage:
  status:
    project: false
    patch:
      default:
        target: 80%
        threshold: 5%
        base: auto
        if_ci_failed: error
        if_no_uploads: success
        if_not_found: success
fixes:
  - "/opt/megatron-lm/::"


================================================
FILE: docker/.ngc_version.dev
================================================
nvcr.io/nvidia/pytorch:26.02-py3

================================================
FILE: docker/.ngc_version.lts
================================================
nvcr.io/nvidia/pytorch:25.09-py3

================================================
FILE: docker/Dockerfile.ci.dev
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
# syntax=docker/dockerfile:1.3-labs

ARG FROM_IMAGE_NAME
FROM ${FROM_IMAGE_NAME} as main
ENV PIP_CONSTRAINT=""
ENV DEBIAN_FRONTEND=noninteractive
ARG UV_VERSION=0.7.2
ARG YQ_VERSION=4.44.1
ENV PATH="/root/.local/bin:$PATH"
ARG UV_PROJECT_ENVIRONMENT=/opt/venv
ENV UV_PROJECT_ENVIRONMENT=${UV_PROJECT_ENVIRONMENT}
ENV VIRTUAL_ENV=$UV_PROJECT_ENVIRONMENT
ENV PATH="$UV_PROJECT_ENVIRONMENT/bin:$PATH"
ENV UV_LINK_MODE=copy

RUN bash -ex <<"EOF"
    apt-get update
    apt-get install -y --no-install-recommends gettext python3-venv psmisc uuid-runtime
    apt-get clean
    python -m venv /opt/jet
    ARCH=$(uname -m)
    case "${ARCH}" in \
      "x86_64") YQ_ARCH=amd64 ;; \
      "aarch64") YQ_ARCH=arm64 ;; \
      "armv7l") YQ_ARCH=arm ;; \
      *) echo "Unsupported architecture: ${ARCH}" && exit 1 ;; \
    esac 
    wget https://github.com/mikefarah/yq/releases/download/v${YQ_VERSION}/yq_linux_${YQ_ARCH} -O /usr/local/bin/yq
    chmod a+x /usr/local/bin/yq
    curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh
EOF

COPY README.md pyproject.toml uv.lock /workspace/
COPY megatron/core/__init__.py /workspace/megatron/core/
COPY megatron/core/package_info.py /workspace/megatron/core/
ARG IMAGE_TYPE=dev
RUN --mount=type=cache,target=/root/.cache/uv \
    bash -ex <<"EOF"
    export NVTE_CUDA_ARCHS="80;90;100"
    uv venv ${UV_PROJECT_ENVIRONMENT} --system-site-packages
    uv sync --only-group build
    uv sync --extra ${IMAGE_TYPE} --extra mlm --link-mode copy --locked \
        --no-install-package torch \
        --no-install-package torchvision \
        --no-install-package triton \
        --no-install-package transformer-engine-cu12 \
        --no-install-package nvidia-cublas-cu12 \
        --no-install-package nvidia-cuda-cupti-cu12 \
        --no-install-package nvidia-cuda-nvrtc-cu12 \
        --no-install-package nvidia-cuda-runtime-cu12 \
        --no-install-package nvidia-cudnn-cu12 \
        --no-install-package nvidia-cufft-cu12 \
        --no-install-package nvidia-cufile-cu12 \
        --no-install-package nvidia-curand-cu12 \
        --no-install-package nvidia-cusolver-cu12 \
        --no-install-package nvidia-cusparse-cu12 \
        --no-install-package nvidia-cusparselt-cu12 \
        --no-install-package nvidia-nccl-cu12
EOF

# Install DeepEP
COPY docker/patches/deepep.patch /workspace/deepep.patch
RUN bash -ex <<"EOF"
    cd /workspace
    uv pip install nvidia-nvshmem-cu13==3.4.5
    pushd /opt/venv/lib/python3.12/site-packages/nvidia/nvshmem/lib/
        ln -s libnvshmem_host.so.3 libnvshmem_host.so
    popd

    git clone --branch hybrid-ep https://github.com/deepseek-ai/DeepEP.git
    pushd DeepEP
        git checkout eb9cee7de5a24193bf09500668d3a619d3d3f3fb
        patch -p1 < /workspace/deepep.patch
    popd
    TORCH_CUDA_ARCH_LIST="9.0 10.0 12.0" uv pip install --no-build-isolation -v DeepEP/.
    rm -rf DeepEP
EOF

COPY assets/ /opt/data/
ENV UV_PYTHON=$UV_PROJECT_ENVIRONMENT/bin/python

##### For NVIDIANS only #####
FROM main as jet
ARG JET_API_VERSION
ENV PATH="$PATH:/opt/jet/bin"
RUN --mount=type=secret,id=JET_INDEX_URLS bash -ex <<"EOF"
    JET_INDEX_URLS=$(cat /run/secrets/JET_INDEX_URLS)
    python -m venv /opt/jet 
    /opt/jet/bin/pip install --no-cache-dir $JET_INDEX_URLS \
        "jet-api==$JET_API_VERSION" "setuptools<82.0.0"
EOF

RUN --mount=type=secret,id=JET_INDEX_URLS \
    --mount=type=secret,id=LOGGER_INDEX_URL bash -ex <<"EOF"
    JET_INDEX_URLS=$(cat /run/secrets/JET_INDEX_URLS)
    LOGGER_INDEX_URL=$(cat /run/secrets/LOGGER_INDEX_URL)
    uv pip install --no-cache-dir --upgrade $LOGGER_INDEX_URL "one-logger"
    uv pip install --no-cache-dir --upgrade "setuptools<80.0.0,>=77.0.0"
    uv pip install --no-cache-dir --upgrade $JET_INDEX_URLS "jet-client~=4.0" 
EOF
###


================================================
FILE: docker/Dockerfile.ci.nemo
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
# syntax=docker/dockerfile:1.3-labs

ARG FROM_IMAGE_NAME
FROM ${FROM_IMAGE_NAME} as main

RUN apt-get update && \
    apt-get install -y --no-install-recommends gettext && \
    apt-get clean && \
    wget https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64 -O /usr/local/bin/yq && \
    chmod a+x /usr/local/bin/yq

##### For NVIDIANS only #####
FROM main as jet
ARG JET_API_VERSION
RUN --mount=type=secret,id=JET_INDEX_URLS \
    JET_INDEX_URLS=$(cat /run/secrets/JET_INDEX_URLS) && \
    pip install --no-cache-dir jet-api==$JET_API_VERSION "jet-client~=4.0" --upgrade $JET_INDEX_URLS

ENV PATH="$PATH:/opt/jet/bin"
###


================================================
FILE: docker/Dockerfile.linting
================================================
# syntax=docker/dockerfile:experimental

ARG FROM_IMAGE_NAME
FROM $FROM_IMAGE_NAME as main
ENV DEBIAN_FRONTEND=noninteractive
ARG UV_VERSION=0.7.2
ARG YQ_VERSION=4.44.1
ENV PATH="/root/.local/bin:$PATH"
ENV UV_PROJECT_ENVIRONMENT=/opt/venv
ENV PATH="$UV_PROJECT_ENVIRONMENT/bin:$PATH"
ENV UV_LINK_MODE=copy
RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh
WORKDIR /opt/megatron-lm
COPY pyproject.toml uv.lock /opt/megatron-lm/
COPY megatron/core/package_info.py megatron/core/__init__.py /opt/megatron-lm/megatron/core/
RUN uv sync --locked --only-group linting --only-group test --only-group ci

##### For NVIDIANS only #####
FROM main as jet
ARG JET_API_VERSION
RUN --mount=type=secret,id=JET_INDEX_URLS \
      JET_INDEX_URLS=$(cat /run/secrets/JET_INDEX_URLS) && \ 
      uv pip install --no-cache-dir "jet-client~=2.0" --upgrade $JET_INDEX_URLS


================================================
FILE: docker/common/install.sh
================================================
#!/bin/bash
set -xeuo pipefail # Exit immediately if a command exits with a non-zero status

# Parse command line arguments
while [[ $# -gt 0 ]]; do
    case $1 in
    --base-image)
        BASE_IMAGE="$2"
        shift 2
        ;;
    --python-version)
        PYTHON_VERSION="$2"
        shift 2
        ;;
    --environment)
        ENVIRONMENT="$2"
        shift 2
        ;;
    --use-uv)
        USE_UV="true"
        shift 1
        ;;
    *)
        echo "Unknown option: $1"
        echo "Usage: $0 --base-image {pytorch|ubuntu} [--use-uv] [--python-version] [--environment]"
        exit 1
        ;;
    esac
done

if [[ -z "${PYTHON_VERSION:-}" ]]; then
    PYTHON_VERSION="3.12"
fi

if [[ -z "${USE_UV:-}" ]]; then
    USE_UV="false"
fi

# Validate base image argument
if [[ -z "${BASE_IMAGE:-}" || -z "${ENVIRONMENT:-}" ]]; then
    echo "Error: --base-image argument is required"
    echo "Usage: $0 --base-image {pytorch|ubuntu} --environment {dev|lts}"
    exit 1
fi

if [[ "$BASE_IMAGE" != "pytorch" && "$BASE_IMAGE" != "ubuntu" ]]; then
    echo "Error: --base-image must be either 'pytorch' or 'ubuntu'"
    echo "Usage: $0 --base-image {pytorch|ubuntu}"
    exit 1
fi

if [[ "$ENVIRONMENT" != "dev" && "$ENVIRONMENT" != "lts" ]]; then
    echo "Error: --environment must be either 'dev' or 'lts'"
    echo "Usage: $0 --environment {dev|lts}"
    exit 1
fi

main() {
    if [[ -n "${PAT:-}" ]]; then
        echo -e "machine github.com\n  login token\n  password $PAT" >~/.netrc
        chmod 600 ~/.netrc
    fi

    # Install dependencies
    export DEBIAN_FRONTEND=noninteractive

    # Install Python
    apt-get update
    apt-get install -y software-properties-common
    add-apt-repository ppa:deadsnakes/ppa -y
    apt-get install -y python$PYTHON_VERSION-dev python$PYTHON_VERSION-venv
    update-alternatives --install /usr/bin/python3 python3 /usr/bin/python$PYTHON_VERSION 1
    
    # Install tools
    apt-get update
    apt-get install -y wget curl git cmake

    # Install CUDA
    if [[ "$BASE_IMAGE" == "ubuntu" ]]; then
        rm /etc/apt/sources.list.d/cuda*.list || true
        rm /etc/apt/sources.list.d/nvidia-cuda.list || true
        wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
        dpkg -i cuda-keyring_1.1-1_all.deb
        rm cuda-keyring_1.1-1_all.deb
        apt-get update
        apt-get install -y cuda-toolkit-12-8 cudnn-cuda-12 libcudnn9-cuda-12 libcutlass-dev 
    fi

    # Clean up
    apt-get clean

    unset PIP_CONSTRAINT

    if [[ "$USE_UV" == "true" ]]; then
        if [[ "$BASE_IMAGE" == "pytorch" ]]; then
            UV_ARGS=(
                "--no-install-package" "torch"
                "--no-install-package" "torchvision"
                "--no-install-package" "triton"
                "--no-install-package" "nvidia-cublas-cu12"
                "--no-install-package" "nvidia-cuda-cupti-cu12"
                "--no-install-package" "nvidia-cuda-nvrtc-cu12"
                "--no-install-package" "nvidia-cuda-runtime-cu12"
                "--no-install-package" "nvidia-cudnn-cu12"
                "--no-install-package" "nvidia-cufft-cu12"
                "--no-install-package" "nvidia-cufile-cu12"
                "--no-install-package" "nvidia-curand-cu12"
                "--no-install-package" "nvidia-cusolver-cu12"
                "--no-install-package" "nvidia-cusparse-cu12"
                "--no-install-package" "nvidia-cusparselt-cu12"
                "--no-install-package" "nvidia-nccl-cu12"
            )
        else
            UV_ARGS=()
        fi
    
        # Install uv
        UV_VERSION="0.7.2"
        curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh

        # Create virtual environment and install dependencies
        uv venv ${UV_PROJECT_ENVIRONMENT} --system-site-packages

        # Install dependencies
        uv sync --locked --only-group build ${UV_ARGS[@]}
        uv sync \
            --link-mode copy \
            --locked \
            --extra ${ENVIRONMENT} \
            --all-groups ${UV_ARGS[@]}

        # Install the package
        uv pip install --no-deps -e .
    else
        python3 -m venv $UV_PROJECT_ENVIRONMENT
        . $UV_PROJECT_ENVIRONMENT/bin/activate

        pip install --pre --no-cache-dir --upgrade pip
        pip install --pre --no-cache-dir torch pybind11 wheel_stub ninja wheel packaging "setuptools<80.0.0,>=77.0.0"
        pip install --pre --no-cache-dir --no-build-isolation .
    fi

}

# Call the main function
main "$@"


================================================
FILE: docker/common/install_source_wheels.sh
================================================
#!/bin/bash
set -xeuo pipefail # Exit immediately if a command exits with a non-zero status

INPUT_WHEEL_DIR=$(pwd)/wheels

# Parse command line arguments
while [[ $# -gt 0 ]]; do
    case $1 in
    --input-wheel-dir)
        INPUT_WHEEL_DIR="$2"
        shift 2
        ;;
    --environment)
        ENVIRONMENT="$2"
        shift 2
        ;;
    *)
        echo "Unknown option: $1"
        echo "Usage: $0 --input-wheel-dir DIR"
        exit 1
        ;;
    esac
done

# Check if required arguments are provided
if [ -z "$INPUT_WHEEL_DIR" ] || [ -z "$ENVIRONMENT" ]; then
    echo "Error: --input-wheel-dir and --environment are required"
    echo "Usage: $0 --input-wheel-dir DIR --environment ENV"
    exit 1
fi

if [ "$ENVIRONMENT" = "dev" ]; then
    TE_WHEEL=$(ls $INPUT_WHEEL_DIR/transformer_engine*.whl) || true
    [ -z "$TE_WHEEL" ] && TE_WHEEL=$(bash docker/common/build_te.sh --output-wheel-dir $INPUT_WHEEL_DIR | tail -n 1)
fi

MAMBA_WHEEL=$(ls $INPUT_WHEEL_DIR/mamba*.whl) || true
[ -z "$MAMBA_WHEEL" ] && MAMBA_WHEEL=$(bash docker/common/build_mamba.sh --output-wheel-dir $INPUT_WHEEL_DIR | tail -n 1)

CAUSALCONV1D_WHEEL=$(ls $INPUT_WHEEL_DIR/causal_conv1d*.whl) || true
[ -z "$CAUSALCONV1D_WHEEL" ] && CAUSALCONV1D_WHEEL=$(bash docker/common/build_causalconv1d.sh --output-wheel-dir $INPUT_WHEEL_DIR | tail -n 1)

# Override deps that are already present in the base image
# only for dev
if [ "$ENVIRONMENT" = "dev" ]; then
    uv pip install --no-cache-dir --no-deps $TE_WHEEL
fi

# Install heavy optional deps like mamba, causalconv1d
uv pip install --no-cache-dir \
    $MAMBA_WHEEL \
    $CAUSALCONV1D_WHEEL \
    "setuptools<80.0.0,>=77.0.0"


================================================
FILE: docker/patches/deepep.patch
================================================
diff --git a/setup.py b/setup.py
index 63ce332..4e13462 100644
--- a/setup.py
+++ b/setup.py
@@ -37,7 +37,7 @@ if __name__ == '__main__':
                  '-Wno-sign-compare', '-Wno-reorder', '-Wno-attributes']
     nvcc_flags = ['-O3', '-Xcompiler', '-O3']
     sources = ['csrc/deep_ep.cpp', 'csrc/kernels/runtime.cu', 'csrc/kernels/layout.cu', 'csrc/kernels/intranode.cu']
-    include_dirs = ['csrc/']
+    include_dirs = ['csrc/', '/usr/local/cuda/include/cccl/']
     library_dirs = []
     nvcc_dlink = []
     extra_link_args = []


================================================
FILE: docs/add_copyright_header.py
================================================
#!/usr/bin/env python3
"""One-off script to add NVIDIA copyright header to all .md files under docs/."""

from pathlib import Path

HEADER = """   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.

"""

def main():
    docs_dir = Path(__file__).resolve().parent
    already_has = "Copyright (c) 2022-2026, NVIDIA CORPORATION"
    count = 0
    for path in sorted(docs_dir.rglob("*.md")):
        content = path.read_text(encoding="utf-8")
        if content.strip().startswith(already_has):
            continue
        new_content = HEADER + content
        path.write_text(new_content, encoding="utf-8")
        count += 1
        print(path.relative_to(docs_dir))
    print(f"\nUpdated {count} files.")

if __name__ == "__main__":
    main()


================================================
FILE: docs/advanced/index.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Discussions

In-depth technical discussions and optimization guides:

- [Optimizing DeepSeek-V3 Training on GB200 NVL72](https://github.com/NVIDIA/Megatron-LM/blob/dev/docs/discussions/deepseek-v3-gb200-optimization/deepseek-v3-gb200-reproduce-guide.md) - Achieving 970 TFLOPS/GPU with MXFP8, kernel optimizations, and HybridEP


================================================
FILE: docs/api-backwards-compatibility-check.md
================================================
---
orphan: true
---

<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# API Backward Compatibility Checking

## Overview

Megatron Core uses automated API compatibility checking to ensure stable interfaces between releases. This prevents accidental breaking changes that could affect users upgrading between versions.

## How It Works

The compatibility checker:
1. Compares the current code against the latest release
2. Detects breaking changes in function signatures
3. Fails CI if breaking changes are found (unless explicitly exempted)
4. Runs automatically on every PR that modifies `megatron/core`

## What Gets Checked

### ✅ Breaking Changes Detected

- **Parameter removed** - Removing a function parameter
- **Parameter added without default** - Adding a required parameter
- **Parameter order changed** - Changing the order of parameters
- **Optional→Required** - Removing a default value from a parameter
- **Function removed** - Deleting a public function
- **Return type changed** - Changing the return type annotation (warning)

### ⏭️ What Gets Skipped

- **Test functions** - Functions starting with `test_`
- **Exempt decorators** - Functions marked with `@internal_api`, `@experimental_api`, or `@deprecated`
- **Excluded paths** - Code in `tests/`, `experimental/`, `legacy/`

### ✅ Allowed Changes

- **Adding optional parameters** - Adding parameters with default values
- **Adding new functions** - New public APIs
- **Making parameters optional** - Adding default values to required parameters

## For Developers

### Running Locally

```bash
# Install griffe
pip install griffe

# Check against latest release
python scripts/check_api_backwards_compatibility.py --baseline core_r0.8.0

# Check with verbose output
python scripts/check_api_backwards_compatibility.py --baseline core_r0.8.0 -v

# Compare two specific branches
python scripts/check_api_backwards_compatibility.py --baseline core_r0.8.0 --current main
```

### Marking Functions as Exempt

If you need to make breaking changes to internal or experimental APIs:

#### Internal API (for internal implementation details)

```python
from megatron.core.utils import internal_api

@internal_api
def experimental_feature(x, y):
    """
    This API is experimental and may change.
    NOT FOR EXTERNAL USE.
    """
    pass
```

**When to use `@internal_api`:**
- Internal APIs not documented for external use
- Experimental features explicitly marked as unstable
- Functions in development that haven't been released yet

#### Experimental API (for experimental features)

```python
from megatron.core.utils import experimental_api

@experimental_api
def new_experimental_feature(x, y):
    """
    This API is experimental and may change without notice.
    """
    pass
```

**When to use `@experimental_api`:**
- Experimental features explicitly marked as unstable
- New APIs under active development
- Features that haven't been stabilized yet

### Deprecating APIs

For planned API changes, use the deprecation workflow:

```python
from megatron.core.backwards_compatibility_decorators import deprecated

@deprecated(
    version="1.0.0",           # When deprecation starts
    removal_version="2.0.0",    # When it will be removed
    alternative="new_function", # Recommended replacement
    reason="Improved performance and cleaner API"
)
def old_function(x):
    """This function is deprecated."""
    pass
```

**Deprecation Timeline:**
1. **Version N** - Add `@deprecated` decorator, function still works
2. **Version N+1** - Keep function with deprecation warnings
3. **Version N+2** - Remove function (users have been warned)

### Handling CI Failures

If the compatibility check fails on your PR:

1. **Review the breaking changes** in the CI logs
2. **Choose an action:**
   - **Fix the code** - Revert the breaking change
   - **Add exemption** - Use `@internal_api` if intentional
   - **Use deprecation** - For planned API changes
3. **Update your PR** with the fix

## Examples

### Example 1: Compatible Change

```python
# ✅ BEFORE (v1.0)
def train_model(config, dataloader):
    pass

# ✅ AFTER (v1.1) - Added optional parameter
def train_model(config, dataloader, optimizer="adam"):
    pass
```
**Result:** ✅ Check passes

---

### Example 2: Breaking Change

```python
# BEFORE (v1.0)
def train_model(config, dataloader, optimizer="adam"):
    pass

# ❌ AFTER (v1.1) - Removed parameter
def train_model(config, dataloader):
    pass
```
**Result:** ❌ Check fails - "Parameter 'optimizer' removed"

---

### Example 3: Exempt Internal API

```python
from megatron.core.utils import internal_api

# BEFORE (v1.0)
@internal_api
def _internal_compute(x, y):
    pass

# ✅ AFTER (v1.1) - Can change freely
@internal_api
def _internal_compute(x, y, z):  # Added parameter
    pass
```
**Result:** ✅ Check passes (function is exempt)

---

### Example 4: Deprecation Workflow

```python
from megatron.core.utils import deprecated

# Version 1.0 - Add deprecation
@deprecated(
    version="1.0.0",
    removal_version="2.0.0",
    alternative="train_model_v2"
)
def train_model(config):
    """Old training function - DEPRECATED"""
    pass

def train_model_v2(config, **options):
    """New improved training function"""
    pass

# Version 1.1 - Keep both (users migrate)
# Version 2.0 - Remove train_model()
```

## Architecture

```
Developer commits code
    ↓
GitHub Actions triggers
    ↓
CI runs check_api_backwards_compatibility.py
    ↓
Script loads code via griffe:
  • Baseline: latest release (e.g., core_r0.8.0)
  • Current: PR branch
    ↓
Apply filtering:
  • Skip @internal_api, @experimental_api, and @deprecated
  • Skip private functions (_prefix)
  • Skip test/experimental paths
    ↓
Griffe compares signatures:
  • Parameters
  • Types
  • Return types
  • Defaults
    ↓
Report breaking changes
    ↓
Exit: 0=pass, 1=fail
    ↓
CI fails if breaking changes detected
```

## Configuration

### Customizing Filters

Edit `scripts/check_api_backwards_compatibility.py`:

```python
# Add more exempt decorators
EXEMPT_DECORATORS = [
    "internal_api",
    "experimental_api",
    "deprecated",
]

# Add more path exclusions
EXCLUDE_PATHS = {
    "tests",
    "experimental",
    "legacy",
    "your_custom_path",  # ← Add here
}
```

### Changing the Baseline

The workflow auto-detects the latest `core_r*` tag. To manually specify:

```yaml
# In .github/workflows/check_api_backwards_compatibility_workflow.yml
- name: Run compatibility check
  run: |
    python scripts/check_api_backwards_compatibility.py \
      --baseline your_custom_baseline
```

## FAQ

### Q: Why did my PR fail the compatibility check?

**A:** Your code introduced breaking changes compared to the last release. Review the CI logs to see what changed.

### Q: Can I disable the check for my PR?

**A:** No, but you can mark specific functions as exempt using `@internal_api` or `@experimental_api`.

### Q: What if I need to make a breaking change?

**A:** Use the `@deprecated` decorator for a gradual transition, or mark the function as exempt using `@internal_api` (for internal code) or `@experimental_api` (for experimental features).

### Q: Does this check all of Megatron-LM?

**A:** No, only `megatron/core/**` (Megatron Core). Legacy code is excluded.

### Q: What about class methods?

**A:** Yes, class methods are checked just like functions.

### Q: Can I run this locally before pushing?

**A:** Yes! Run `python scripts/check_api_backwards_compatibility.py --baseline core_r0.8.0`

### Q: What if there's no release tag yet?

**A:** The workflow will use `main` as the baseline. Update it once you have release tags.

## Troubleshooting

### Error: "griffe is not installed"

```bash
pip install griffe
```

### Error: "No core_r* tags found"

The repository doesn't have release tags yet. The workflow will fall back to `main`.

### False Positives

If the checker reports a breaking change that isn't actually breaking, file an issue and use `@internal_api` as a temporary workaround.

## References

- **Script:** `scripts/check_api_backwards_compatibility.py`
- **Workflow:** `.github/workflows/check_api_backwards_compatibility_workflow.yml`
- **Decorators:** `megatron/core/backwards_compatibility_decorators.py`
- **Griffe Documentation:** https://mkdocstrings.github.io/griffe/

## Support

For questions or issues:
1. Check this documentation
2. Review existing PRs with compatibility checks
3. Ask in the Megatron-LM Slack/Discord
4. File an issue on GitHub


================================================
FILE: docs/api-guide/core/datasets.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# datasets package

```{include} ../../../megatron/core/datasets/readme.md
```


================================================
FILE: docs/api-guide/core/dist_checkpointing.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# dist_checkpointing package

A library for saving and loading the distributed checkpoints.
A *distributed checkpoint* in Megatron Core uses the ``torch_dist`` format,
a custom checkpointing mechanism built on top of PyTorch's native
checkpointing capabilities.

A key property of distributed checkpoints is that a checkpoint saved under one
parallel configuration (tensor, pipeline, or data parallelism) can be loaded
under a different parallel configuration. This enables flexible scaling and
resharding of models across heterogeneous training setups.

Using the library requires defining sharded state_dict dictionaries with functions from  *mapping* and *optimizer* modules.
Those state dicts can be saved or loaded with a *serialization* module using strategies from *strategies* module.

## Safe Checkpoint Loading

Since **PyTorch 2.6**, the default behavior of `torch.load` is `weights_only=True`.
This ensures that only tensors and allow-listed classes are loaded, reducing the risk of arbitrary code execution.

If you encounter an error such as:

```bash
WeightsUnpickler error: Unsupported global: GLOBAL argparse.Namespace was not an allowed global by default.
```

you can fix it by explicitly allow-listing the missing class in your script:

```python
import torch, argparse

torch.serialization.add_safe_globals([argparse.Namespace])
```

## Checkpointing Distributed Optimizer

### Checkpoint Compatibility and Optimizer State Formats

Beginning with **mcore v0.14**, the ``flattened_range`` attribute was removed from ``dist_checkpointing``. As a result:

- Optimizer states saved with mcore versions <= 0.14 can no longer be loaded directly. Loading these legacy optimizer states is not supported because the required sharded metadata is no longer available. If you need to continue training from older checkpoints, refer to the workaround described below.
- Model weights from older checkpoints remain fully compatible. No extra steps are needed—model weights from checkpoints created by earlier versions load automatically; simply add the ``--no-load-optim`` flag.

### Workaround: Loading legacy optimizer states with ToT MCore

**Step 1: Convert the legacy checkpoint using mcore v0.15.0**

Run a dummy training job with mcore v0.15.0 to re-save the checkpoint with new optimizer states format.

```bash
MODEL_TRAIN_PARAMS=(
    # Define model architecture and training parameters here
)
OLD_CKPT=/workspace/mcore_ckpt_old
CONVERTED_CKPT=/workspace/mcore_ckpt_0.15.0

torchrun --nproc_per_node=8 /opt/megatron-lm/pretrain_gpt.py \
   --save-interval 1 \
   --eval-interval 1 \
   --exit-interval 1 \
   --eval-iters 1 \
   --use-distributed-optimizer \
   --save ${CONVERTED_CKPT} \
   --load ${OLD_CKPT} \
   --ckpt-format torch_dist \
   "${MODEL_TRAIN_PARAMS[@]}"
```

**Step 2: Load the converted checkpoint with ToT MCore**

Use the converted checkpoint as the input for continued training with ToT MCore.

```bash
MODEL_TRAIN_PARAMS=(
    # Define model architecture and training parameters here
)
NEW_CKPT=/workspace/mcore_ckpt_new
CONVERTED_CKPT=/workspace/mcore_ckpt_0.15.0

torchrun --nproc_per_node=8 /opt/megatron-lm/pretrain_gpt.py \
   --use-distributed-optimizer \
   --save ${NEW_CKPT} \
   --load ${CONVERTED_CKPT} \
   --ckpt-format torch_dist \
   "${MODEL_TRAIN_PARAMS[@]}"
```

After this step, training can proceed normally using ToT MCore with fully supported optimizer state loading.

## Distributed Optimizer Checkpoint Formats

The refactor of the Distributed Optimizer introduces **two checkpoint formats**:

- dp_reshardable (Default)
   - Fast save/load performance.
   - Not reshardable — not possible to change model parallelism when using this format.
   - Recommended for general training when model parallelism changes are not needed.
- fully_reshardable
   - Fully reshardable — supports arbitrary changes in model parallelism.
   - Slower than dp_reshardable.
   - Enabled via the ``--dist-ckpt-optim-fully-reshardable`` flag.

### Workflow for Changing Model Parallelism

You can combine formats to optimize both flexibility and performance:

   1. Train using ``dp_reshardable`` (default) for faster checkpointing.
   2. When you need to change model parallelism:

      - Stop training.
      - Change model parallelism for train config.
      - Resume training with ``--dist-ckpt-optim-fully-reshardable``.

   3. Save at least one checkpoint under the new model parallel configuration.
   4. (Optional) To continue the training with updated model parallelism and better checkpointing performance, stop training and switch back to ``dp_reshardable`` format by removing ``--dist-ckpt-optim-fully-reshardable``.

## Subpackages

```{toctree}
:maxdepth: 4

dist_checkpointing.strategies
```


================================================
FILE: docs/api-guide/core/dist_checkpointing.strategies.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# dist_checkpointing.strategies package

Package defining different checkpoint formats (backends) and saving/loading algorithms (strategies).

Strategies can be used for implementing new checkpoint formats or implementing new (more optimal for a given use case) ways of saving/loading of existing formats.
Strategies are passed to `dist_checkpointing.load` and `dist_checkpointing.save` functions and control the actual saving/loading procedure.


================================================
FILE: docs/api-guide/core/distributed.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# distributed package

This package contains various utilities to finalize model weight gradients
on each rank before the optimizer step. This includes a distributed data
parallelism wrapper to all-reduce or reduce-scatter the gradients across
data-parallel replicas, and a `finalize_model_grads` method to
synchronize gradients across different parallelism modes (e.g., 'tied'
layers on different pipeline stages, or gradients for experts in a MoE on
different ranks due to expert parallelism).


================================================
FILE: docs/api-guide/core/fusions.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# fusions package

This package provides modules that provide commonly fused
operations. Fusing operations improves compute efficiency by
increasing the amount of work done each time a tensor is read from
memory. To perform the fusion, modules in this either rely on PyTorch
functionality for doing just-in-time compilation
(i.e. `torch.jit.script` in older PyTorch versions of `torch.compile`
in recent versions), or call into custom kernels in external libraries
such as Apex or TransformerEngine.


================================================
FILE: docs/api-guide/core/index.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Core APIs

Low-level API reference for core Megatron components.

```{toctree}
:maxdepth: 2

transformer
tensor_parallel
pipeline_parallel
fusions
distributed
datasets
dist_checkpointing
dist_checkpointing.strategies
```


================================================
FILE: docs/api-guide/core/pipeline_parallel.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# pipeline_parallel package

This package contains implementations for two different pipeline parallelism
schedules (one without interleaving and one with interleaving, see [Efficient Large-Scale Language Model Training on GPU Clusters Using Megatron-LM](https://arxiv.org/abs/2104.04473)
for details), and a default no-pipelining schedule. It also contains methods
for the point-to-point communication that is needed between pipeline stages.


================================================
FILE: docs/api-guide/core/tensor_parallel.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# tensor_parallel package

This package contains an implementation for tensor parallelism in transformer
models (see [Megatron-LM: Training Multi-Billion Parameter Language Models
Using Model Parallelism](https://arxiv.org/abs/1909.08053) and [Reducing
Activation Recomputation in Large Transformer Models](https://arxiv.org/abs/2205.05198)
for details).


================================================
FILE: docs/api-guide/core/transformer.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# transformer package

The `transformer` package provides a customizable and configurable
implementation of the transformer model architecture. Each component
of a transformer stack, from entire layers down to individual linear
layers, can be customized by swapping in different PyTorch modules
using the "spec" parameters. The
configuration of the transformer (hidden size, number of layers,
number of attention heads, etc.) is provided via a `TransformerConfig`
object.


================================================
FILE: docs/api-guide/index.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# API Guide

API reference documentation for Megatron Core components.

```{toctree}
:maxdepth: 3

models/index
core/index
internal/index
```


================================================
FILE: docs/api-guide/internal/index.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Internal Utilities

Internal utility APIs.

```{toctree}
:maxdepth: 2

num_microbatches_calculator
optimizer_param_scheduler
```


================================================
FILE: docs/api-guide/internal/num_microbatches_calculator.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Microbatches Calculator

This api is used to calculate the number of microbatches required to fit a given model on a given batch size.


================================================
FILE: docs/api-guide/internal/optimizer_param_scheduler.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Optimizer Parameters Scheduler

This api is used to calculate the learning rate and weight decay for the optimizer.


================================================
FILE: docs/api-guide/models/index.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Model APIs

API reference for Megatron Core model implementations.

```{toctree}
:maxdepth: 2

models
models.gpt
models.bert
models.t5
```


================================================
FILE: docs/api-guide/models/models.bert.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# models.bert package

Useful package for training bert and bert like encoder only models. It optionally comes with a binary head that can be used for classification tasks .


================================================
FILE: docs/api-guide/models/models.gpt.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# models.gpt package

This is the implementation of the popular GPT model. It supports several features like model parallelization (Tensor Parallel, Pipeline Parallel, Data Parallel) , mixture of experts, FP8 , Distributed optimizer etc. We are constantly adding new features. So be on the lookout or raise an issue if you want to have something added.


================================================
FILE: docs/api-guide/models/models.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# models package

This package contains most of the popular LLMs . Currently we have support for GPT, Bert, and T5 . This is an ever growing list so keep an eye out.

## Subpackages

```{toctree}
:maxdepth: 4

models.gpt
models.t5
models.bert
```


================================================
FILE: docs/api-guide/models/models.t5.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# models.t5 package


================================================
FILE: docs/api-guide/router_replay.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Design Document: MoE Router Replay Feature

## 1. Overview

This document provides a detailed description of the "Router Replay" feature implemented within the Megatron-LM Core for Mixture-of-Experts (MoE) models.

This feature is designed to enhance determinism and analyzability in MoE model training and inference. It enables the model to load routing decisions from a predefined file and enforce their use during the forward pass, thereby bypassing the real-time routing computation.

## 2. Motivation

*   **Determinism & Reproducibility**: In distributed training, MoE routing decisions can exhibit minor variations due to factors like floating-point precision. By replaying a fixed routing table, the MoE computation path is guaranteed to be identical across runs, which facilitates debugging and reproducing experimental results.
*   **Performance Profiling**: The router's own computation (e.g., logits calculation, top-k selection) incurs overhead. In replay mode, this part of the computation can be completely skipped, allowing for more precise isolation and profiling of performance bottlenecks within the Expert Layers themselves.
*   **Debugging Aid**: When issues arise in the model, fixing the routing decisions helps to isolate variables, making it easier to determine whether the problem lies with the routing mechanism or the expert computations.

## 3. Design and Architecture

The design follows the principles of being non-intrusive and on-demand, with the core idea of activating the replay logic only when explicitly requested by the user.

*   **Core Components**:
    *   `RouterReplay` (located in `megatron/core/transformer/moe/router_replay.py`): A utility class for replaying MoE routing decisions. When enabled via the `moe_enable_routing_replay` flag, a separate instance of `RouterReplay` is created for each MoE layer's router. Each instance is responsible for loading routing data and providing the deterministic routing decisions for its corresponding layer during the forward pass.
    *   `moe_enable_routing_replay` (located in `megatron/core/transformer/transformer_config.py`): A boolean global configuration flag that serves as the sole entry point for enabling this feature.

*   **Workflow**:
    The feature supports different modes, such as recording and replaying, controlled by a `RouterReplayAction`.

    1.  **Enabling the Feature**: The user sets `moe_enable_routing_replay` to `True` in the model configuration.
    2.  **Initialization**: When `moe_enable_routing_replay` is true, each `TopKRouter` creates its own `RouterReplay` instance.
    3.  **Mode Configuration**: The user must programmatically set the desired router replay action (e.g., `record`, `forward_replay`, `backward_replay`) on the `RouterReplay` instances.
    4.  **Execution Flow (within a mini-batch)**:
        *   **Forward Pass**:
            *   For each micro-batch, the `topk_routing_with_score_function` checks the `router_replay_action`.
            *   **In `record` mode**: The dynamically computed `top-k` expert indices are captured and stored.
            *   **In `forward_replay` mode**: The function retrieves pre-loaded expert indices from `target_topk_idx`. These indices are used for the forward computation and are also appended to the `replay_backward_list` to prepare for the backward pass.
        *   **Backward Pass**:
            *   For each micro-batch (processed in reverse order in pipeline parallelism), the `router_replay_action` is checked again.
            *   **In `backward_replay` mode**: The function retrieves the expert indices for the corresponding micro-batch by popping them from the `replay_backward_list`. This mode is intended for training recomputation (e.g., activation checkpointing and pipeline recompute) so the same routing decisions are used during recompute/backward as in forward, ensuring determinism and correctness.

## 4. Implementation Details

The implementation cleanly separates the replay logic from the router's core computation.

*   **`megatron/core/transformer/transformer_config.py`**:
    *   Adds the configuration option `moe_enable_routing_replay: bool = False`.

*   **`megatron/core/transformer/moe/moe_utils.py`**:
    *   Introduces the `RouterReplay` class to manage the state for recording and replaying routing decisions for a single MoE layer.
        *   `target_topk_idx`: An attribute holding the expert indices for the current micro-batch during forward replay mode.
        *   `recorded_topk_idx`: An attribute for storing the computed expert indices when in record mode.
        *   `replay_backward_list`: A list that accumulates the `top-k` indices used during the forward passes of a mini-batch. This list is consumed in FIFO order during the backward pass to ensure correctness under pipeline parallelism.
        *   `set_target_indices()`: A method to load the replay indices into `target_topk_idx` for the forward pass.
        *   `record_indices()`: A method to save the computed indices.
    *   The `topk_routing_with_score_function` is modified to contain the core logic. It checks the `router_replay_action` on the `router_replay` instance and accordingly performs one of the following actions: computes and records indices, replays indices from `target_topk_idx` (for forward), replays indices from `replay_backward_list` (for backward), or falls through to the default dynamic routing.

### Training recompute usage

- During forward replay, `set_target_indices()` prepares `replay_backward_list` so each micro-batch’s indices are available for recomputation.
- During recompute/backward, set action to `REPLAY_BACKWARD` so indices are consumed in FIFO order to mirror the forward sequence.

## 5. Usage Guide

1.  **Enable & Instantiate**
    - Create one `RouterReplay` instance per MoE router layer when building the model.
    - Optionally use the global helpers to set/clear actions across all layers.
2.  **Record Routing Decisions**
    - Set action: `RouterReplay.set_global_router_replay_action(RouterReplayAction.RECORD)`.
    - Run the model; retrieve per-layer indices via `RouterReplay.get_recorded_data()` and persist.
3.  **Forward Replay**
    - Load indices and distribute: `RouterReplay.set_replay_data(list_of_tensors)`.
    - Set action: `RouterReplay.set_global_router_replay_action(RouterReplayAction.REPLAY_FORWARD)`.
    - Run the model; dynamic top‑k is bypassed and target indices are used.
4.  **Backward Replay**
    - For training recomputation (activation checkpointing or pipeline recompute), set action: `REPLAY_BACKWARD` during recomputation.
    - Per micro‑batch indices are consumed from `replay_backward_list` in FIFO order.
5.  **Cleanup**
    - Use `RouterReplay.clear_global_indices()`, `RouterReplay.clear_global_router_replay_action()`, and `RouterReplay.clear_global_router_replay_instances()` to restore default behavior and prevent memory leaks.

### Quick usage with `topk_routing_with_score_function`

```python
import torch
from megatron.core.transformer.moe.router_replay import RouterReplay, RouterReplayAction
from megatron.core.transformer.moe.moe_utils import topk_routing_with_score_function

rr = RouterReplay()

# Record
RouterReplay.set_global_router_replay_action(RouterReplayAction.RECORD)
logits = torch.randn(8, 16)
probs_rec, routing_map_rec = topk_routing_with_score_function(
    logits=logits, topk=2, use_pre_softmax=False, score_function="softmax", router_replay=rr,
)
recorded = rr.get_recorded_indices()
torch.save(recorded, "/tmp/replay.pt")

# Forward replay
rr.clear_router_replay_action()
rr.set_router_replay_action(RouterReplayAction.REPLAY_FORWARD)
target = torch.load("/tmp/replay.pt")
rr.set_target_indices(target)
probs_rep, routing_map_rep = topk_routing_with_score_function(
    logits=logits, topk=2, use_pre_softmax=False, score_function="softmax", router_replay=rr,
)

RouterReplay.clear_global_router_replay_action()
RouterReplay.clear_global_indices()
RouterReplay.clear_global_router_replay_instances()
```

## 6. Minimal Demo

Here is a minimal code example showing how to use RouterReplay for recording and replaying:

```python
import torch
import torch.distributed as dist
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.moe.router import TopKRouter
from megatron.core.transformer.moe.router_replay import RouterReplay, RouterReplayAction


# Initialize distributed training
if not dist.is_initialized():
    dist.init_process_group(backend="nccl")

# Create a transformer config with RouterReplay enabled
config = TransformerConfig(
    num_experts=8,
    expert_model_parallel_size=1,
    num_top_k=2,
    moe_enable_routing_replay=True
)

# Create a TopKRouter instance
router = TopKRouter(config)

# Generate sample input (batch_size, sequence_length, hidden_size)
logits = torch.randn(16, 32, 8).to(torch.cuda.current_device())

# -----------------
# 1. Recording Mode
# -----------------
print("=== Recording Mode ===")
# Set global router replay action to RECORD
RouterReplay.set_global_router_replay_action(RouterReplayAction.RECORD)

# Perform routing
routing_output = router.forward(logits)
print(f"Recorded top-k indices shape: {routing_output.top_k_idx.shape}")

# -----------------
# 2. Forward Replay Mode
# -----------------
print("\n=== Forward Replay Mode ===")
# Save recorded indices to a file
torch.save(routing_output.top_k_idx, "/tmp/replay.pt")

# Load indices from file and set as target for replay
replay_indices = torch.load("/tmp/replay.pt")
for router_instance in RouterReplay.global_router_replay_instances:
    router_instance.target_topk_idx = replay_indices

# Set global router replay action to REPLAY_FORWARD
RouterReplay.set_global_router_replay_action(RouterReplayAction.REPLAY_FORWARD)

# Perform routing again - this will use the replayed indices
replay_routing_output = router.forward(logits)
print(f"Replayed top-k indices shape: {replay_routing_output.top_k_idx.shape}")
print(f"Are indices the same? {torch.equal(routing_output.top_k_idx, replay_routing_output.top_k_idx)}")


# Clean up
RouterReplay.clear_global_router_replay_action()
RouterReplay.clear_global_indices()
RouterReplay.clear_global_router_replay_instances()
if dist.is_initialized():
    dist.destroy_process_group()
```


================================================
FILE: docs/autodoc2_docstrings_parser.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from docutils import nodes
from myst_parser.parsers.sphinx_ import MystParser
from sphinx.ext.napoleon.docstring import GoogleDocstring


class NapoleonParser(MystParser):
    """Add support for Google style docstrings."""

    def parse(self, input_string: str, document: nodes.document) -> None:
        """Parse Google style docstrings."""

        # Get the Sphinx configuration
        config = document.settings.env.config

        # Process with Google style
        google_parsed = str(GoogleDocstring(input_string, config))

        return super().parse(google_parsed, document)


Parser = NapoleonParser


================================================
FILE: docs/broken_links_false_positives.json
================================================
{
    "uri": "http://localhost:8080/"
}

================================================
FILE: docs/conf.py
================================================
# Copyright (c) 2025-2026, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html

import os
import sys


# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

project = "Megatron Core"
copyright = "2026, NVIDIA Corporation"
author = "NVIDIA Corporation"
release = "nightly"

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

extensions = [
    "myst_parser",  # For our markdown docs
    "sphinx.ext.viewcode",  # For adding a link to view source code in docs
    "sphinx.ext.doctest",  # Allows testing in docstrings
    "sphinx.ext.napoleon",  # For google style docstrings
    "sphinx_copybutton",  # For copy button in code blocks
]

# Check if we should skip autodoc generation
# usage: SKIP_AUTODOC=true
skip_autodoc = os.environ.get("SKIP_AUTODOC", "false").lower() == "true"

if not skip_autodoc:
    extensions.append("autodoc2")  # Generates API docs

templates_path = ["_templates"]
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

# -- Options for MyST Parser (Markdown) --------------------------------------
# MyST Parser settings
myst_enable_extensions = [
    "dollarmath",  # Enables dollar math for inline math
    "amsmath",  # Enables LaTeX math for display mode
    "colon_fence",  # Enables code blocks using ::: delimiters instead of ```
    "deflist",  # Supports definition lists with term: definition format
    "fieldlist",  # Enables field lists for metadata like :author: Name
    "tasklist",  # Adds support for GitHub-style task lists with [ ] and [x]
    "attrs_block",  # Enables setting attributes on block elements using {#id .class key=val}
]
myst_heading_anchors = 5  # Generates anchor links for headings up to level 5

# Suppress "more than one target found for cross-reference" warnings for Python symbols
# that have the same name across multiple modules (e.g. DistributedDataParallelConfig,
# ModelType). These are structural ambiguities in the codebase – the cross-reference
# still resolves; Sphinx just cannot pick the unique target automatically.
suppress_warnings = ["ref.python"]

# -- Options for Autodoc2 ---------------------------------------------------
sys.path.insert(0, os.path.abspath(".."))

if not skip_autodoc:
    autodoc2_packages = [
        {
            "path": "../megatron/core",  # Path to your package relative to conf.py
            "exclude_dirs": ["converters"],  # list of directory names to exclude
        }
    ]
    autodoc2_render_plugin = "myst"  # Use MyST for rendering docstrings
    autodoc2_output_dir = "apidocs"  # Output directory for autodoc2 (relative to docs/)
    # This is a workaround that uses the parser located in autodoc2_docstrings_parser.py to allow autodoc2 to
    # render google style docstrings.
    # Related Issue: https://github.com/sphinx-extensions2/sphinx-autodoc2/issues/33
    autodoc2_docstring_parser_regexes = [
        (r".*", "docs.autodoc2_docstrings_parser"),
    ]
    # Regex patterns whose values contain raw regex syntax (e.g. \p{L}) that docutils
    # mis-parses as footnote/reference markup. Exclude them from the generated docs.
    autodoc2_hidden_regexes = [
        r".*\._PATTERN_TIKTOKEN.*",
    ]

# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = "nvidia_sphinx_theme"
html_theme_options = {
    "switcher": {
        "json_url": "versions1.json",
        "version_match": release,
    },
    "icon_links": [
        {
            "name": "GitHub",
            "url": "https://github.com/NVIDIA/Megatron-LM/",
            "icon": "fa-brands fa-github",
        }
    ],
    "public_docs_features": True
}
html_extra_path = ["project.json", "versions1.json"]

# Github links are now getting rate limited from the Github Actions
linkcheck_ignore = [
    ".*github\\.com.*",
    ".*githubusercontent\\.com.*",
]


================================================
FILE: docs/developer/contribute.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Contributing to Megatron-LM

This document outlines the processes and policies for issues and pull requests by non-NVIDIA contributors to the Megatron-LM GitHub repository.

Everyone is welcome to contribute to the project! We recently migrated from using an internal repo to doing all development directly from the GitHub repository.

When contributing it is important to ensure that changes are in line with the project direction. Small changes to fix bugs are welcomed and appreciated. If proposing large architectural changes or changes for stylistic reasons open an issue first so we can discuss it.

## Issue policy

Please do file any bugs you find, keeping the following in mind:

- If filing a bug, i.e. you have found something that doesn't work as expected, use the BUG template.
- If you've found a regression in speed or accuracy use the REGRESSION template.
- If you are requesting a new feature or modification of an existing feature use the ENHANCEMENT template.
- If opening an issue to ask a question no template is needed but please make your question as clear and concise as possible.
- One issue per bug. Putting multiple things in the same issue makes both discussion and completion unnecessarily complicated.
- Your bug is mostly likely to get attention from the development team quickly if we can easily reproduce it.
- Use proper spelling, grammar, and punctuation.
- Write in an authoritative and technical tone.

## Code submission policy

### Do

- Format new code in a style that is consistent with the file being changed. Megatron-LM doesn't (yet) have a style guide or enforced formatting.
- Split your changes into separate, atomic commits i.e. A commit per feature or fix.
- Make sure your commits are rebased on the master branch.
- Write the commit message subject line in the imperative mood ("Change the default argument for X", not "Changed the default argument for X").
- Write your commit messages in proper English, with care and punctuation.
- Check the spelling of your code, comments and commit messages.

### Don't

- Submit code that's incompatible with the project licence.
- Touch anything outside the stated scope of the PR. This includes formatting changes to code not relevant to the PR.
- Iterate excessively on your design across multiple commits.
- Include commented-out code.
- Attempt large architectural changes without first opening an issue to discuss.

## Issue and Pull Request Q&A

### I've submitted an issue and PR. When can I expect to get some feedback?

You should receive a response within 2 business days.

### I need help, who should I ping?

Use [@mcore-oncall](https://github.com/orgs/NVIDIA/teams/mcore-oncall).

### If my issue or PR isn't getting attention, what should I do?

After 2 business days, tag the user [@mcore-oncall](https://github.com/orgs/NVIDIA/teams/mcore-oncall).

### Is there a policy for issues and PRs that haven't been touched in X days? Should they be closed?

Yes, we have a bot that will mark untouched PRs as "stale" after 60 days.

We have a long backlog of issues and PRs dating back years. We are trying to triage these now by working backwards. Older issues we believe may still be relevant may recieve a request to re-test them with the latest code. If there's no response they may be closed. Again, if you they should be re-opened then just respond with a comment to that effect.

Thank you!

================================================
FILE: docs/developer/generate_docs.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Generating Docs Locally

To generate docs locally, use the following commands:

```
cd docs
uv run --only-group docs sphinx-autobuild . _build/html --port 8080 --host 127.0.0.1
```

Docs will be generated at <http://localhost:8080/>.

**Recommended:** set the environment variable `SKIP_AUTODOC=true` when generating docs 
to skip the generation of `apidocs`.

================================================
FILE: docs/developer/oncall.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->
-->

# Oncall Overview

During your oncall week, you will be assigned to all PRs marked “Ready for 
Review”. From a high-level, your responsibilities include:

- Review all new PRs
- Accelerate the review process
- Ensure issues and discussion questions are answered

## PR Responsibilities

Below is the checklist that the oncall needs to go through for each PR.

- Should the PR remain a single PR?
  - Each PR should have at most 1 expert reviewer, although there will be some outlier cases
- Label PR as “complexity: low”, “complexity: medium”, or “complexity: high” depending on complexity
  - Expert reviewers have final say, oncall just sets the initial complexity level
  - Initial complexity level guideline
    - Low: <100 lines changed
    - Medium: 100 < lines changed < 500
    - High: > 500 lines changed
- Does this PR have proper testing coverage?
  - If new logic is added, is the new logic tested?
- Should the PR add documentation for any new features?
- Does the PR conform to our style guidelines?
  - Code structure
  - Cleanliness
  - Comments
  - File structure
- Do all tests pass?
  - Oncall will need to kick off testing suite for external reviewers
  - Comment “/ok to test commid_id” to kick off testing suite
- Expert reviewers are notified after the PR is marked “Ready for Review”
  - **Expert reviewers should review within 1 business day.** Message the assigned reviewer if it is taking longer. The reviewer either needs to review the PR or suggest an alternate reviewer.
  - If the reviewer is not responding after 2 business days, escalate to the reviewer’s manager.
- For `megatron/core` PRs, the “Final Review” label is applied automatically once all expert reviewers approve
  - Final reviewers should review within 1 business day. Message the assigned reviewer if it is taking longer.
  - If the reviewer is not responding after 2 business days, escalate to the reviewer’s manager.
- The “Approved” label is applied automatically once all required reviewers have approved

## Issues and Discussion Questions

If you do not know the answer to an issue or discussion question: that's ok! **Delegate to someone who does.**

On a daily basis, track the following:

- [new issues](https://github.com/NVIDIA/Megatron-LM/issues): check to see if there are any new issues before they become out of SLA!
- [out of SLA issues](https://github.com/orgs/NVIDIA-NeMo/projects/20/views/4?sliceBy%5Bvalue%5D=NVIDIA%2FMegatron-LM): useful dashboard that tracks all out of SLA issues


================================================
FILE: docs/developer/submit.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# How to Submit a PR

All PRs start as **draft**. If you open a non-draft PR, it will be automatically converted to draft.

## Step 1: Mark PR as "Ready for Review"

1. When your PR is ready, click **Ready for Review**.
2. The oncall reviewer is auto-assigned and expert reviewers are notified based on your changes. They will get notified and pick up your PR soon.

:warning: Only mark as ready once all merge-conflicts are resolved and the CI is passing.
Final Review might get declined if these requirements are not fulfilled.

## Step 2: Final Review (`megatron/core` only)

For PRs that change `megatron/core`, once all expert reviewers have approved, the `Final Review` label is applied **automatically** and final reviewers are assigned.

For PRs outside `megatron/core`, this step is skipped.

## Step 3: Approved

Once all required reviewers have approved, the `Approved` label is applied **automatically**. The PR is now ready to merge.

## Step 4: Merge

Any member of [mcore-engineers](https://github.com/orgs/NVIDIA/teams/mcore-engineers) will be able to merge your PR.


================================================
FILE: docs/discussions/README.md
================================================
---
orphan: true
---

<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Megatron Discussions

This directory contains in-depth guides, tutorials, and discussions about optimizing and using Megatron for various use cases.

## Available Guides

### Training Guides

- **[Megatron-FSDP User Guide](megatron-fsdp-user-guide/megatron-fsdp-user-guide.md)**

  A practical guide to enable Megatron-FSDP training, including a quick-start example for DeepSeek-V3, required and recommended configurations, and instructions for checkpoint conversion from torch_dist to fsdp_dtensor.

## Contributing

If you'd like to contribute a guide or tutorial, please follow this structure:

1. Create a new directory: `docs/discussions/your-guide-name/`
2. Add your main guide: `docs/discussions/your-guide-name/your-guide-name.md`
3. Create an images directory: `docs/discussions/your-guide-name/images/`
4. Update this README.md with a link to your guide

Each guide should be self-contained with its own images and supporting files.

================================================
FILE: docs/discussions/megatron-fsdp-user-guide/example-scripts/sbatch_checkpoint_convert.sh
================================================
#!/bin/bash

# Configuration: Set these paths before running the script
MEGATRON_PATH=${MEGATRON_PATH:-"your_own_megatron_path"} # Path to Megatron-LM repository
CONTAINER_IMAGE=${CONTAINER_IMAGE:-"your_own_container_image"} # Path to .sqsh or docker image url
OUTPUT_PATH=${OUTPUT_PATH:-"your_own_output_path"} # Path for SLURM logs

# Checkpoint conversion command
# Note: Update the checkpoint paths in the command below
RUN_CMD="
cd ${MEGATRON_PATH};
git rev-parse HEAD;
export PYTHONPATH=${MEGATRON_PATH}:${PYTHONPATH};
python3 tools/checkpoint/checkpoint_inspector.py \
    convert-torch-dist-to-fsdp-dtensor --swiglu \
    your_own_path_to_input_torch_dist_checkpoint \
    your_own_path_to_output_fsdp_dtensor_checkpoint \
    --param-to-param-group-map-json your_own_path_to_param_to_param_group_map.json"

# SLURM settings
SLURM_LOGS="${OUTPUT_PATH}/slurm_logs"
mkdir -p ${SLURM_LOGS} || {
    echo "Error: Failed to create SLURM logs directory ${SLURM_LOGS}"
    exit 1
}

# Submit SLURM job
# Note: Update SBATCH parameters below according to your cluster configuration
set +e
sbatch <<EOF
#!/bin/bash

#SBATCH --job-name=your_own_job_name
#SBATCH --partition=your_own_partition
#SBATCH --nodes=your_own_num_nodes
#SBATCH --ntasks-per-node=your_own_tasks_per_node
#SBATCH --gres=gpu:your_own_gpu_per_node
#SBATCH --time=your_own_time
#SBATCH --account=your_own_account
#SBATCH --exclusive
#SBATCH --dependency=singleton

srun --mpi=pmix -l \
    --container-image=${CONTAINER_IMAGE} \
    --container-mounts=your_own_container_mounts \
    --container-workdir=${MEGATRON_PATH} \
    bash -x -c "${RUN_CMD}" 2>&1 | tee ${SLURM_LOGS}/\${SLURM_JOB_ID}.log

EOF
set -e


================================================
FILE: docs/discussions/megatron-fsdp-user-guide/example-scripts/sbatch_mfsdp_deepseek_v3.sh
================================================
#!/bin/bash

export NCCL_IB_SL=1
export NCCL_IB_TIMEOUT=19
export NVTE_FWD_LAYERNORM_SM_MARGIN=16
export NVTE_BWD_LAYERNORM_SM_MARGIN=16
export NCCL_P2P_NET_CHUNKSIZE=2097152
export TORCH_NCCL_AVOID_RECORD_STREAMS=1
export PYTHONWARNINGS=ignore
export TRITON_CACHE_DIR=/tmp/triton_cache_$SLURM_NODEID

# Configuration: Set these variables before running the script
MEGATRON_PATH=${MEGATRON_PATH:-"your_own_megatron_path"} # Path to Megatron-LM repository
CONTAINER_IMAGE=${CONTAINER_IMAGE:-"your_own_container_image"} # Path to .sqsh or docker image url
OUTPUT_PATH=${OUTPUT_PATH:-"your_own_output_path"} # Path for output logs and checkpoints
DATA_PATH=${DATA_PATH:-"your_own_data_path"}
USE_MEGATRON_FSDP=${USE_MEGATRON_FSDP:-1}
SHARDING_STRATEGY=${SHARDING_STRATEGY:-"optim_grads_params"}
PROFILE=${PROFILE:-0}
WANDB=${WANDB:-1}

TP=${TP:-1}
EP=${EP:-8}
MBS=${MBS:-4}
GBS=${GBS:-2048}
COMMENT=${COMMENT:-"hybridep-selective-recompute"}

PRETRAIN_ARGS=(
    --distributed-timeout-minutes 60
    --tensor-model-parallel-size ${TP}
    --expert-model-parallel-size ${EP}
    --expert-tensor-parallel-size 1
    --context-parallel-size 1
    --use-distributed-optimizer
    --overlap-grad-reduce
    --overlap-param-gather
    --use-mcore-models
    --sequence-parallel
    --use-flash-attn
    --disable-bias-linear
    --micro-batch-size ${MBS}
    --global-batch-size ${GBS}
    --train-samples 585937500
    --exit-duration-in-mins 220
    --no-check-for-nan-in-loss-and-grad
    --manual-gc
    --manual-gc-interval 10
    --recompute-granularity selective
    --recompute-modules mlp moe mla_up_proj layernorm
    --transformer-impl transformer_engine
    --seq-length 4096
    --data-cache-path ${OUTPUT_PATH}/cache
    --tokenizer-type HuggingFaceTokenizer
    --tokenizer-model deepseek-ai/DeepSeek-V3
    --data-path ${DATA_PATH}
    --split 99,1,0
    --no-mmap-bin-files
    --no-create-attention-mask-in-dataloader
    --num-workers 6
    --num-layers 61
    --hidden-size 7168
    --ffn-hidden-size 18432
    --num-attention-heads 128
    --kv-channels 128
    --max-position-embeddings 4096
    --position-embedding-type rope
    --rotary-base 10000
    --make-vocab-size-divisible-by 3232
    --normalization RMSNorm
    --norm-epsilon 1e-6
    --swiglu
    --untie-embeddings-and-output-weights
    --multi-latent-attention
    --attention-dropout 0.0
    --hidden-dropout 0.0
    --clip-grad 1.0
    --weight-decay 0.1
    --qk-layernorm
    --lr-decay-samples 584765624
    --lr-warmup-samples 1536000
    --lr-warmup-init 3.9e-7
    --lr 3.9e-6
    --min-lr 3.9e-7
    --lr-decay-style cosine
    --adam-beta1 0.9
    --adam-beta2 0.95
    --num-experts 256
    --moe-layer-freq [0]*3+[1]*58
    --moe-ffn-hidden-size 2048
    --moe-shared-expert-intermediate-size 2048
    --moe-router-load-balancing-type seq_aux_loss
    --moe-router-topk 8
    --moe-token-dispatcher-type flex
    --moe-flex-dispatcher-backend hybridep
    --moe-router-pre-softmax
    --moe-grouped-gemm
    --moe-aux-loss-coeff 1e-4
    --moe-router-group-topk 4
    --moe-router-num-groups 8
    --moe-router-topk-scaling-factor 2.5
    --moe-router-score-function sigmoid
    --moe-router-enable-expert-bias
    --moe-router-bias-update-rate 1e-3
    --moe-router-dtype fp32
    --moe-permute-fusion
    --moe-router-force-load-balancing
    --q-lora-rank 1536
    --kv-lora-rank 512
    --qk-head-dim 128
    --qk-pos-emb-head-dim 64
    --v-head-dim 128
    --rotary-scaling-factor 40
    --mscale 1.0
    --mscale-all-dim 1.0
    --mtp-num-layers 1
    --mtp-loss-scaling-factor 0.1
    --eval-iters 32
    --eval-interval 100
    --auto-detect-ckpt-format
    --load ${OUTPUT_PATH}/checkpoints
    --save ${OUTPUT_PATH}/checkpoints
    --save-interval 100
    --dist-ckpt-strictness log_all
    --init-method-std 0.02
    --log-timers-to-tensorboard
    --log-memory-to-tensorboard
    --log-num-zeros-in-grad
    --log-params-norm
    --log-validation-ppl-to-tensorboard
    --log-throughput
    --log-interval 1
    --logging-level 40
    --tensorboard-dir ${OUTPUT_PATH}/tensorboard
    --bf16
    --enable-experimental
) 

if [ "${USE_MEGATRON_FSDP}" = 1 ]; then
    unset CUDA_DEVICE_MAX_CONNECTIONS
    PRETRAIN_ARGS=(
        "${PRETRAIN_ARGS[@]}"
        --use-megatron-fsdp
        --data-parallel-sharding-strategy ${SHARDING_STRATEGY}
        --no-gradient-accumulation-fusion
        --use-distributed-optimizer
        --calculate-per-token-loss
        --init-model-with-meta-device
        --ckpt-format fsdp_dtensor
        --grad-reduce-in-bf16
        --fsdp-double-buffer
        --use-nccl-ub
    )
fi

# Profiling command
if [ "${PROFILE}" = 1 ]; then
    PROFILE_CMD="nsys profile --sample=none --cpuctxsw=none --trace=cuda,nvtx,cublas,cudnn \
        --capture-range=cudaProfilerApi \
        --capture-range-end=stop \
        --cuda-graph-trace=node \
        --cuda-memory-usage=true \
        -f true -x true \
        -o ${OUTPUT_PATH}/nsys/Megatron-FSDP-Deepseek-V3-TP${TP}EP${EP}-MBS${MBS}GBS${GBS}-${COMMENT}"
    PRETRAIN_ARGS=(
        "${PRETRAIN_ARGS[@]}"
        --profile
        --profile-step-start 10
        --profile-step-end 12
        --profile-ranks 0
    )
    echo "PROFILE_CMD="
    echo $PROFILE_CMD
else
    PROFILE_CMD=""
fi

if [ "${WANDB}" = 1 ]; then
    export WANDB_API_KEY=${WANDB_API_KEY:-"your_own_wandb_api_key"}
    PRETRAIN_ARGS=(
        "${PRETRAIN_ARGS[@]}"
        --wandb-project your_own_wandb_project
        --wandb-exp-name DeepSeek-V3-TP${TP}EP${EP}-MBS${MBS}GBS${GBS}-${COMMENT}
    )
fi

TRAINING_CMD="
cd ${MEGATRON_PATH};
git rev-parse HEAD;
export PYTHONPATH=${MEGATRON_PATH}:${PYTHONPATH};
${PROFILE_CMD} python ${MEGATRON_PATH}/pretrain_gpt.py ${PRETRAIN_ARGS[@]}"

# SLURM settings
SLURM_LOGS="${OUTPUT_PATH}/slurm_logs"
mkdir -p ${SLURM_LOGS} || {
    echo "Error: Failed to create SLURM logs directory ${SLURM_LOGS}"
    exit 1
}

# Submit SLURM job
# Note: Update SBATCH parameters below according to your cluster configuration
set +e
sbatch <<EOF
#!/bin/bash

#SBATCH --job-name=your_own_job_name
#SBATCH --partition=your_own_partition
#SBATCH --nodes=your_own_num_nodes
#SBATCH --ntasks-per-node=your_own_tasks_per_node
#SBATCH --gres=gpu:your_own_gpu_per_node
#SBATCH --time=your_own_time
#SBATCH --account=your_own_account
#SBATCH --exclusive
#SBATCH --dependency=singleton

srun \
    --mpi=pmix -l \
    --container-image=${CONTAINER_IMAGE} \
    --container-mounts=your_own_container_mounts \
    --container-workdir=${MEGATRON_PATH} \
    bash -x -c "${TRAINING_CMD}" 2>&1 | tee ${SLURM_LOGS}/\${SLURM_JOB_ID}.log

EOF
set -e


================================================
FILE: docs/discussions/megatron-fsdp-user-guide/megatron-fsdp-user-guide.md
================================================
---
orphan: true
---

<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Megatron-FSDP User Guide

## Table of Contents

- [Megatron-FSDP Quick Start](#megatron-fsdp-quick-start)
- [Checkpoint Conversion from 3D-Parallel to Megatron-FSDP](#checkpoint-conversion-from-3d-parallel-to-megatron-fsdp)

## Megatron-FSDP Quick Start

We recommend using the latest [NVIDIA NeMo Framework Container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo/tags), which provides a tested software stack and optimized performance.

For your reference, we provide an example launch script for DeepSeek-V3: [`sbatch_mfsdp_deepseek_v3.sh`](./example-scripts/sbatch_mfsdp_deepseek_v3.sh).

### Required Configurations

To enable Megatron-FSDP, add the following required flags to your training script:

```bash
--use-megatron-fsdp
--data-parallel-sharding-strategy optim_grads_params
--no-gradient-accumulation-fusion
--use-distributed-optimizer
--ckpt-format fsdp_dtensor
```

### Recommended Configurations

We also recommend adding the following configurations to further improve performance:

```bash
unset CUDA_DEVICE_MAX_CONNECTIONS
```
```bash
--calculate-per-token-loss
--init-model-with-meta-device
--grad-reduce-in-bf16
--fsdp-double-buffer
--use-nccl-ub
```

💡 **Detailed explanations of these configurations are provided below.**

#### 1. Disable `CUDA_DEVICE_MAX_CONNECTIONS`

To ensure full parallelization of FSDP communication and computation, disable the CUDA_DEVICE_MAX_CONNECTIONS environment variable. This step avoids potential bubbles in the CUDA stream. (But it may slow down TP and CP to some extent.)

#### 2. Add `--calculate-per-token-loss`

For gradients sharding mode optimization, include the `--calculate-per-token-loss` flag in your training script. This improves performance by reducing the frequency of gradient scaling, which is also a sizable drain on SM resources.

#### 3. Add `--init-model-with-meta-device`

Allows model initialization using meta device, followed by layer-by-layer initialization of distributed model weight buffers via the `Module.reset_parameters` API, facilitating the initialization of extremely large models.

#### 4. Add `--grad-reduce-in-bf16`

Enables gradient reduction in BF16 precision instead of FP32, reducing communication volume and accelerating the backward pass.

#### 5. Add `--fsdp-double-buffer`

Uses persistently allocated double buffers for temporarily-defined memory needed in `MegatronFSDP` communications. While having persistent double buffers may increase peak VRAM utilization, it is necessary to register NCCL user buffers (`nccl_ub=True`) for `MegatronFSDP`. Currently, this is supported only for simple repetitive model structures such as GPT.

- **Only effective when using Megatron-LM.**
- Defaults to `False`. Automatically overridden to `True` when `nccl_ub` is enabled.

#### 6. Add `--use-nccl-ub`

Allocates and [registers NCCL user buffers](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/bufferreg.html#) for param and grad buffers. This option enables an SM-efficient NCCL algorithm that could improve the performance of overlapped computations. This flag will be much more effective when used together with [SHARP](https://docs.nvidia.com/networking/display/sharpv3130) if the FSDP communication includes both NVL and IB domains. Enabling this option will cause additional memory overhead due to the requirement to enable the `fsdp_double_buffer` option.

- **Only effective when using Megatron-LM.**
- Defaults to `False`.
- By default we try to use NCCL window (symmetric) registration if it is available. If not it falls back to conventional local registration.
- **Incompatible with PyTorch's segmentable allocator:** Do not set `PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True` when using `--use-nccl-ub`, as this will cause a runtime error due to compatibility issues with the `torch.cuda.MemPool` API.

## Checkpoint Conversion from 3D-Parallel to Megatron-FSDP

Megatron-FSDP introduces `fsdp_dtensor`, a DTensor-based distributed checkpoint format that serves as its standard. To help you smoothly transition from 3D-Parallel to Megatron-FSDP, we provide a script for converting checkpoints from the `torch_dist` format to the `fsdp_dtensor` format. Using DeepSeek-V3 as an example, the detailed conversion process is described below.

### Step 1: Generate 3D-Parallel Checkpoint with `param_to_param_group_map`

Run your 3D-parallel + EP training script to generate a `torch_dist` checkpoint along with a directory containing `param_to_param_group_map` files. Add the following flag to your training script:

```bash
--dump-param-to-param-group-map /path/to/param_to_param_group_map
```

If you already have a `torch_dist` checkpoint, simply specify the `--dump-param-to-param-group-map /path/to/param_to_param_group_map` flag and run a very short experiment-this will create the `param_to_param_group_map` you need without full pretraining.

### Step 2: Export `param_to_param_group_map` to a JSON File

Convert the `param_to_param_group_map` into a JSON file for easier processing by running:

```bash
python tools/checkpoint/checkpoint_inspector.py print-torch-dcp-in-json /path/to/param_to_param_group_map
```

This will create a `param_to_param_group_map.json` file in the `/path/to/param_to_param_group_map` directory.

### Step 3: Convert Checkpoint from `torch_dist` to `fsdp_dtensor`

Convert your `torch_dist` checkpoint to the `fsdp_dtensor` format using the parameter to `param_to_param_group_map` JSON file:

```bash
torchrun --nproc_per_node=8 --nnodes=1 \
    tools/checkpoint/checkpoint_inspector.py \
    convert-torch-dist-to-fsdp-dtensor --swiglu \
    /path/to/input_torch_dist_checkpoint \
    /path/to/output_fsdp_dtensor_checkpoint \
    --param-to-param-group-map-json /path/to/param_to_param_group_map.json
```

**Note:** For multi-node conversion tasks, please refer to the example script: [`sbatch_checkpoint_convert.sh`](./example-scripts/sbatch_checkpoint_convert.sh).

### Step 4: Launch Megatron-FSDP Training

Start your Megatron-FSDP training job using the converted `fsdp_dtensor` checkpoint.

================================================
FILE: docs/documentation.md
================================================
---
orphan: true
---

<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Documentation Development

- [Documentation Development](#documentation-development)
  - [Build the Documentation](#build-the-documentation)
  - [Live Building](#live-building)
  - [Documentation Version](#documentation-version)


## Build the Documentation

The following sections describe how to set up and build the NeMo RL documentation.

Switch to the documentation source folder and generate HTML output.

```sh
cd docs/
uv run --group docs sphinx-build . _build/html
```

* The resulting HTML files are generated in a `_build/html` folder that is created under the project `docs/` folder.
* The generated python API docs are placed in `apidocs` under the `docs/` folder.

## Checking for Broken Links

To check for broken http links in the docs, run this command:

```sh
cd docs/
uv run --group docs sphinx-build --builder linkcheck . _build/linkcheck
```

It will output a JSON file at `_build/linkcheck/output.json` with links it found while building the
docs. Records will have a status of `broken` if the link is not reachable. The `docs/conf.py` file is
configured to ignore github links because the CI test will often experience rate limit errors.
Comment out the `linkcheck_ignore` variable there to check all the links.

## Live Building

When writing documentation, it can be helpful to serve the documentation and have it update live while you edit.

To do so, run:

```sh
cd docs/
uv run --group docs sphinx-autobuild . _build/html --port 12345 --host 0.0.0.0
```

Open a web browser and go to `http://${HOST_WHERE_SPHINX_COMMAND_RUN}:12345` to view the output.

## Documentation Version

The three files below control the version switcher. Before you attempt to publish a new version of the documentation, update these files to match the latest version numbers.

* docs/versions1.json
* docs/project.json
* docs/conf.py


================================================
FILE: docs/get-started/install.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Installation

## System Requirements

### Hardware

- **Recommended**: NVIDIA Turing architecture or later
- **FP8 Support**: Requires NVIDIA Hopper, Ada, or Blackwell GPUs

### Software

- **Python**: >= 3.10 (3.12 recommended)
- **PyTorch**: >= 2.6.0
- **CUDA Toolkit**: Latest stable version


## Prerequisites

Install [uv](https://docs.astral.sh/uv/), a fast Python package installer:

```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
```


## Option A: Pip Install (Recommended)

Install the latest stable release from PyPI:

```bash
uv pip install megatron-core
```

To include optional training dependencies (Weights & Biases, SentencePiece, HF Transformers):

```bash
uv pip install "megatron-core[training]"
```

For all extras including [Transformer Engine](https://github.com/NVIDIA/TransformerEngine):

```bash
uv pip install --group build
uv pip install --no-build-isolation "megatron-core[training,dev]"
```

```{note}
`--no-build-isolation` requires build dependencies to be pre-installed in the environment. `torch` is needed because several `[dev]` packages (`mamba-ssm`, `nv-grouped-gemm`, `transformer-engine`) import it at build time to compile CUDA kernels. Expect this step to take **20+ minutes** depending on your hardware. If you prefer pre-built binaries, the [NGC Container](#option-c-ngc-container) ships with these pre-compiled.
```

```{warning}
Building from source can consume a large amount of memory. By default the build runs one compiler job per CPU core, which may cause out-of-memory failures on machines with many cores. To limit parallel compilation jobs, set the `MAX_JOBS` environment variable before installing (e.g. `MAX_JOBS=4`).
```

```{tip}
For a lighter set of development dependencies without Transformer Engine and ModelOpt, use `[lts]` instead of `[dev]`: `uv pip install --no-build-isolation "megatron-core[training,lts]"`. The `[lts]` and `[dev]` extras are mutually exclusive.
```

To clone the repository for examples:

```bash
git clone https://github.com/NVIDIA/Megatron-LM.git
```


## Option B: Install from Source

For development or to run the latest unreleased code:

```bash
git clone https://github.com/NVIDIA/Megatron-LM.git
cd Megatron-LM
uv pip install -e .
```

To install with all development dependencies (includes Transformer Engine, requires pre-installed build deps):

```bash
uv pip install --group build
uv pip install --no-build-isolation -e ".[training,dev]"
```

```{tip}
If the build runs out of memory, limit parallel compilation jobs with `MAX_JOBS=4 uv pip install --no-build-isolation -e ".[training,dev]"`.
```


## Option C: NGC Container

For a pre-configured environment with all dependencies pre-installed (PyTorch, CUDA, cuDNN, NCCL, Transformer Engine), use the [PyTorch NGC Container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch).

We recommend using the **previous month's** NGC container rather than the latest one to ensure compatibility with the current Megatron Core release and testing matrix.

```bash
docker run --gpus all -it --rm \
  -v /path/to/dataset:/workspace/dataset \
  -v /path/to/checkpoints:/workspace/checkpoints \
  -e PIP_CONSTRAINT= \
  nvcr.io/nvidia/pytorch:26.01-py3
```

```{note}
The NGC PyTorch container constrains the Python environment globally via `PIP_CONSTRAINT`. The `-e PIP_CONSTRAINT=` flag above unsets this so that Megatron Core and its dependencies install correctly.
```

Then install Megatron Core inside the container (torch is already available in the NGC image):

```bash
pip install uv
uv pip install --no-build-isolation "megatron-core[training,dev]"
```


You are now ready to run training. See [Your First Training Run](quickstart.md) for next steps.


================================================
FILE: docs/get-started/overview.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Overview

Megatron-Core and Megatron-LM are open-source tools that are typically used together to train LLMs at scale across GPUs. Megatron-Core expands the capability of Megatron-LM. Megatron Bridge connects Megatron-Core and Megatron-LM to other popular training models, such as Hugging Face.

## Megatron Core

NVIDIA Megatron Core is a library of essential building blocks for highly efficient large-scale generative AI training. It can be used to train models with unparalleled speed at scale across thousands of GPUs. It provides an extensive set of tools for multimodal and speech AI. It expands Megatron LM capabilities.

Megatron-Core contains GPU-optimized techniques featuring advanced parallelism strategies, optimizations like FP8 training, and support for the latest LLM, MoE, and multimodal architectures. It abstracts these techniques into composable and modular APIs.

Megatron-Core is compatible with all NVIDIA Tensor Core GPUs and popular LLM architectures such as GPT, BERT, T5, and RETRO.


**Composable library** with GPU-optimized building blocks for custom training frameworks.

**Best for:**

- **Framework developers** building on top of modular and optimized components
- **Research teams** needing custom training loops, optimizers, or data pipelines
- **ML engineers** requiring fault-tolerant training pipelines

**What you get:**

- Composable transformer building blocks (attention, MLP)
- Advanced parallelism strategies (TP, PP, DP, EP, CP)
- Pipeline schedules and distributed optimizers
- Mixed precision support (FP16, BF16, FP8)
- GPU-optimized kernels and memory management
- High-performance dataloaders and dataset utilities
- Model architectures (LLaMA, Qwen, GPT, Mixtral, Mamba)

## Megatron-LM

Megatron-LM is a reference implementation, with a lightweight large-scale LLM training framework. It offers a customizable native PyTorch training loop with fewer abstraction layers. It was designed for scaling transformer models to the multi-billion and trillion-parameter regimes under realistic memory and compute constraints. **It serves as a straightforward entry point for exploring Megatron-Core.**

It uses advanced parallelization techniques including model parallelism (tensor and pipeline), to allow models with billions of parameters to fit and train across large GPU clusters. It enables breakthroughs in large-scale NLP tasks. It splits model computations across many GPUs, overcoming single-GPU memory limits for training huge models, like GPT-style transformers.  


**Reference implementation** that includes Megatron Core plus everything needed to train models.

**Best for:**

- **Training state-of-the-art foundation models** at scale with cutting-edge performance on latest NVIDIA hardware
- **Research teams** exploring new architectures and training techniques
- **Learning distributed training** concepts and best practices
- **Quick experimentation** with proven model configurations

**What you get:**

- Pre-configured training scripts for GPT, LLaMA, DeepSeek, Qwen, and more.
- End-to-end examples from data prep to evaluation
- Research-focused tools and utilities


## Megatron Bridge

Megatron Bridge provides out-of-the-box bridges and training recipes for models built on top of base model architectures from Megatron Core.  

Megatron Bridge provides a robust, parallelism-aware pathway to convert models and checkpoints. This bidirectional converter performs on-the-fly, model-parallel-aware, per-parameter conversion, and full in-memory loading.

After training or modifying a Megatron model, you can convert it again for deployment or sharing.  

[Megatron Bridge](https://github.com/NVIDIA-NeMo/Megatron-Bridge)


## Ecosystem Libraries

**Libraries used by Megatron Core:**

- **[Megatron Energon](https://github.com/NVIDIA/Megatron-Energon)** - Multi-modal data loader (text, images, video, audio) with distributed loading and dataset blending
- **[Transformer Engine](https://github.com/NVIDIA/TransformerEngine)** - Optimized kernels and FP8 mixed precision support
- **[Resiliency Extension (NVRx)](https://github.com/NVIDIA/nvidia-resiliency-ext)** - Fault tolerant training with failure detection and recovery

**Libraries using Megatron Core:**

- **[Megatron Bridge](https://github.com/NVIDIA-NeMo/Megatron-Bridge)** - Training library with bidirectional Hugging Face ↔ Megatron checkpoint conversion, flexible training loops, and production-ready recipes
- **[NeMo RL](https://github.com/NVIDIA-NeMo/RL)** - Scalable toolkit for efficient reinforcement learning with RLHF, DPO, and other post-training methods
- **[NeMo Framework](https://docs.nvidia.com/nemo-framework/user-guide/latest/overview.html)** - Enterprise framework with cloud-native support and end-to-end examples
- **[Model Optimizer (ModelOpt)](https://github.com/NVIDIA/Model-Optimizer)** - Model optimization toolkit for quantization, pruning, distillation, speculative decoding, and more. Checkout end-to-end examples in [examples/post_training/modelopt](https://github.com/NVIDIA/Megatron-LM/tree/main/examples/post_training/modelopt).

**Compatible with:** [Hugging Face Accelerate](https://github.com/huggingface/accelerate), [Colossal-AI](https://github.com/hpcaitech/ColossalAI), [DeepSpeed](https://github.com/microsoft/DeepSpeed)


================================================
FILE: docs/get-started/quickstart.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Your First Training Run

This guide walks you through running your first training jobs with Megatron Core. Make sure you have completed [installation](install.md) before proceeding.

## Simple Training Example

Run a minimal distributed training loop with mock data on 2 GPUs:

```bash
torchrun --nproc_per_node=2 examples/run_simple_mcore_train_loop.py
```

## LLaMA-3 Training Example

Train a LLaMA-3 8B model with FP8 precision on 8 GPUs using mock data:

```bash
./examples/llama/train_llama3_8b_h100_fp8.sh
```

## Data Preparation

To train on your own data, Megatron expects preprocessed binary files (`.bin` and `.idx`).

### 1. Prepare a JSONL File

Each line should contain a `text` field:

```json
{"text": "Your training text here..."}
{"text": "Another training sample..."}
```

### 2. Preprocess the Data

```bash
python tools/preprocess_data.py \
    --input data.jsonl \
    --output-prefix processed_data \
    --tokenizer-type HuggingFaceTokenizer \
    --tokenizer-model /path/to/tokenizer.model \
    --workers 8 \
    --append-eod
```

### Key Arguments

- `--input`: Path to input JSON/JSONL file
- `--output-prefix`: Prefix for output binary files (.bin and .idx)
- `--tokenizer-type`: Tokenizer type (`HuggingFaceTokenizer`, `GPT2BPETokenizer`, etc.)
- `--tokenizer-model`: Path to tokenizer model file
- `--workers`: Number of parallel workers for processing
- `--append-eod`: Add end-of-document token

## Next Steps

- Explore [Parallelism Strategies](../user-guide/parallelism-guide.md) to scale your training
- Learn about [Data Preparation](../user-guide/data-preparation.md) best practices
- Check out [Advanced Features](../user-guide/features/index.md) for advanced capabilities


================================================
FILE: docs/get-started/releasenotes.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Release Notes


## Roadmaps

Stay up-to-date with our development roadmaps and planned features:

- **[MoE Q3-Q4 2025 Roadmap](https://github.com/NVIDIA/Megatron-LM/issues/1729)** - Comprehensive MoE feature development including DeepSeek-V3, Qwen3, advanced parallelism, FP8 optimizations, and Blackwell enhancements
- **[GPT-OSS Implementation Tracker](https://github.com/NVIDIA/Megatron-LM/issues/1739)** - Advanced features including YaRN RoPE scaling, attention sinks, and custom activation functions


================================================
FILE: docs/index.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Megatron Core User Guide

**Megatron Core** is a GPU-optimized library for training large language models at scale. It provides modular, composable building blocks for creating custom training frameworks with state-of-the-art parallelism strategies and performance optimizations.

Megatron Core offers a flexible, reusable foundation for building large-scale transformer training systems. **Megatron-LM** serves as a reference implementation demonstrating how to use Megatron Core components to train models with billions to trillions of parameters across distributed GPU clusters.

## Key Features

* Composable transformer building blocks (attention, MLP)
* Advanced parallelism strategies (TP, PP, DP, EP, CP)
* Pipeline schedules and distributed optimizers
* Mixed precision support (FP16, BF16, FP8)
* GPU-optimized kernels and memory management
* High-performance dataloaders and dataset utilities
* Model architectures (LLaMA, Qwen, DeepSeek, GPT, Mamba)


```{toctree}
:maxdepth: 2
:hidden:
:caption: About Megatron Core

get-started/overview
get-started/releasenotes
```

```{toctree}
:maxdepth: 2
:hidden:
:caption: Get Started

get-started/install
get-started/quickstart
```

```{toctree}
:maxdepth: 2
:hidden:
:caption: Basic Usage

user-guide/data-preparation
user-guide/training-examples
user-guide/parallelism-guide
```

```{toctree}
:maxdepth: 2
:hidden:
:caption: Supported Models

models/index
```

```{toctree}
:maxdepth: 2
:hidden:
:caption: Advanced Features

user-guide/features/moe
user-guide/features/context_parallel
user-guide/features/custom_fsdp
user-guide/features/dist_optimizer
user-guide/features/optimizer_cpu_offload
user-guide/features/pipeline_parallel_layout
user-guide/features/fine_grained_activation_offloading
user-guide/features/megatron_energon
user-guide/features/megatron_rl
user-guide/features/tokenizers
```

```{toctree}
:maxdepth: 1
:hidden:
:caption: Developer Guide

developer/contribute
developer/submit
developer/oncall
developer/generate_docs
```

```{toctree}
:maxdepth: 2
:hidden:
:caption: API Reference

api-guide/index
apidocs/index.rst
```

```{toctree}
:maxdepth: 2
:hidden:
:caption: Resources

advanced/index
```

================================================
FILE: docs/llama_mistral.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Llama, Mistral and other Llama-like model support in Megatron-LM

NOTE: In order to simplify code we now only support converting llama-3.x and mistral checkpoints downloaded from Hugging Face. For converting other models, see [Megatron Bridge](models/index.md).

The Llama-2 and Llama-3.x family of models are an open-source set of pretrained & finetuned (for chat) models that have achieved strong results across a wide set of benchmarks. At their times of release, both Llama-2 and Llama-3 models achieved among the best results for open-source models, and were competitive with leading closed-source models (see <https://arxiv.org/pdf/2307.09288.pdf>).

Similarly, [Mistral-7b](https://mistral.ai/news/announcing-mistral-7b/) is an open-source model with pretrained and finetuned (for chat) variants that achieve strong benchmark results.

Architecturally Llama-2, Llama-3 and Mistral-7b are very similar. As such Megatron can support loading checkpoints from all three for inference and finetuning. Converting the checkpoints and loading them is slightly different for each model and is detailed for each below.

# Contents

- [Llama, Mistral and other Llama-like model support in Megatron-LM](#llama-mistral-and-other-llama-like-model-support-in-megatron-lm)
- [Contents](#contents)
- [Llama-2](#llama-2)
  - [Download Meta or Huggingface checkpoints](#download-meta-or-huggingface-checkpoints)
  - [Convert checkpoint format](#convert-checkpoint-format)
    - [Meta format](#meta-format)
    - [Huggingface format](#huggingface-format)
  - [Launch model](#launch-model)
    - [Launch Megatron](#launch-megatron)
    - [Launch Meta](#launch-meta)
    - [Launch Huggingface](#launch-huggingface)
  - [Benchmark results](#benchmark-results)
    - [Big Bench](#big-bench)
    - [Multilingual](#multilingual)
    - [LM Evaluation Harness](#lm-evaluation-harness)
    - [MMLU](#mmlu)
- [Llama-3.x](#llama-3x)
  - [Download Huggingface checkpoints](#download-huggingface-checkpoints)
  - [Convert checkpoint format](#convert-checkpoint-format)
    - [Huggingface format](#huggingface-format)
  - [(Optional) Validate checkpoints](#optional-validate-checkpoints)
  - [Launch model](#launch-model)
- [Mistral-7b](#mistral-7b)
  - [Download Huggingface checkpoints](#download-huggingface-checkpoints)
  - [Convert checkpoint format](#convert-checkpoint-format)
  - [(Optional) Validate checkpoints](#optional-validate-checkpoints)
  - [Launch model](#launch-model)
- [Other Llama-like model support](#other-llama-like-model-support)
- [Known numerical differences](#known-numerical-differences)
- [Using legacy model format](#using-legacy-model-format)

# Llama-2

Llama-2 checkpoints can be loaded into Megatron for inference and for finetuning. Loading these checkpoints consists of three steps:

1. Get access to download the checkpoints.
2. Convert the checkpoints from Meta/Huggingface format to Megatron format.
3. Setup arguments for launching the model.

The following sections detail these steps. The final section lists benchmark result comparisons between: 1) Llama-2 inference code running the Meta-format checkpoints, and 2) Megatron inference code running the converted checkpoints.

## Download Meta or Huggingface checkpoints

Users must first apply for access to download the Llama-2 checkpoints either directly [Huggingface](https://huggingface.co/docs/transformers/main/model_doc/llama2) (HF). The checkpoints are available in two formats, Meta's native format (available from both the Meta and HF links), and HF's format (available only from HF). Either format can be converted to Megatron, as detailed next.

## Convert checkpoint format

We recommend passing `--dtype bf16` for training or finetuning. Inference can be done in bfloat16 or float16.

### Meta format

The Meta format checkpoints are converted to HF format as an intermediate step before converting to Megatron format. The `transformers` package is required, and must have version >=4.31.0 (e.g., `pip install transformers>=4.31.0`). (**Note**: we have specifically tested with versions `4.31.0` and `4.32.0`; your experience may vary with newer versions.) Assuming the downloaded checkpoints are in `$CHECKPOINT_DIR` (with separate sub-directories for 7B, 13B, 70B, etc.), the following example command can be used to convert from Llama-2 format to HF format in bfloat16:

```
python tools/checkpoint/convert.py \
>   --model-type GPT \
>   --loader llama_mistral \
>   --load-dir ${META_FORMAT_DIR} \
>   --model-size ${MODEL_SIZE} \
>   --checkpoint-type meta \
>   --tokenizer-model ${TOKENIZER_MODEL} \
>   --saver core \
>   --save-dir ${MEGATRON_FORMAT_DIR} \
>   --target-tensor-parallel-size ${TP} \
>   --target-pipeline-parallel-size ${PP} \
>   --bf16
```

Valid values for `--model-size` are `llama2-7B`, `llama2-13B`, and `llama2-70B` (for pretrained-only models), and `llama2-7Bf`, `llama2-13Bf`, and `llama2-70Bf` (for chat-finetuned models).

### Huggingface format

The HF checkpoints can be converted to Megatron format by using Megatron's own Llama-2 checkpoint converter for HF format (see script `tools/checkpoint/loader_llama_mistral.py`). One important argument that must be set correctly is the tensor parallel size (`TP`) for each model. The following table shows these values:

| Model size | Tensor parallel size (`TP`) |
| ---------- | --------------------------- |
|  7B        | 1                           |
| 13B        | 2                           |
| 70B        | 8                           |

Using these values for `TP`, along with the path to the Llama-2 tokenizer model (automatically downloaded with original checkpoint download; see `${TOKENIZER_MODEL}` below), run the following command from the root of your Megatron source code to convert from HF format to Megatron format:

```
python tools/checkpoint/convert.py \
>   --model-type GPT \
>   --loader llama_mistral \
>   --load-dir ${HF_FORMAT_DIR} \
>   --model-size ${MODEL_SIZE} \
>   --checkpoint-type hf \
>   --tokenizer-model ${TOKENIZER_MODEL} \
>   --saver core \
>   --save-dir ${MEGATRON_FORMAT_DIR} \
>   --target-tensor-parallel-size ${TP} \
>   --target-pipeline-parallel-size ${PP} \
>   --bf16
```

After this conversion, we are ready to load the checkpoints into a Megatron GPT model.

## Launch model

### Launch Megatron

If loading for either inference or finetuning, use the following arguments:

```
--tensor-model-parallel-size ${TP} \
--pipeline-model-parallel-size 1 \
--seq-length 4096 \
--max-position-embeddings 4096 \
--tokenizer-type Llama2Tokenizer \
--tokenizer-model ${TOKENIZER_MODEL} \
--load ${CHECKPOINT_DIR} \
--exit-on-missing-checkpoint \
--use-checkpoint-args \
--no-load-optim \
--no-load-rng \
--untie-embeddings-and-output-weights \
--use-rotary-position-embeddings \
--normalization RMSNorm \
--no-position-embedding \
--no-masked-softmax-fusion \
--attention-softmax-in-fp32
```

**Note:** If you converted to the legacy model format (i.e., `--saver legacy`), please see [here](#using-legacy-model-format).

### Launch Meta

Meta checkpoints can be launched with: <https://github.com/facebookresearch/llama>

### Launch Huggingface

Huggingface checkpoints can be launched with: <https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py>

## Benchmark results

The tables below list the benchmark comparisons between native Llama-2 (using Meta's checkpoint and Meta's inference code) and Megatron (using a converted HF checkpoint and Megatron's inference code).

The values are the percent error between Megatron and Llama-2, calculated using the formula: `|<llama_score> - <megatron_score>| / <llama_score>`, where the type of score is detailed before each table. Across all tests (80 total per model size), the mean error is 0.15%. The small difference in benchmark scores between the two models is due to minor arithmetic differences in implementation that alter the numerics slightly. Some of the factors that influence this difference include:

- Megatron performs batch matrix multiplications in a couple places, such as within self attention and in SwiGLU, that Llama performs separately.
- Megatron uses `torch.baddbmm` within self attention, versus Llama using `torch.matmul`.
- Megatron uses a `sin`/`cos` implementation for rotary position embeddings, versus Llama using a `polar`/`complex` implementation.
- Llama calls `torch.set_default_dtype(torch.float16)` during initialization, which Megatron does not.

### Big Bench

Score type: multiple choice grade.

| bigbench / standard | 7b | 13b | 70b |
| -- | -- | -- | -- |
| date_understanding | 0.29% | 0.13% | 0.12% |
| general_knowledge | 0.00% | 0.00% | 0.00% |
| human_organs_senses | 0.00% | 0.00% | 0.00% |
| intent_recognition | 0.00% | 0.11% | 0.00% |
| riddle_sense | 0.00% | 0.00% | 0.00% |
| similarities_abstraction | 0.00% | 0.58% | 0.00% |
| simple_arithmetic_json_multiple_choice | 0.00% | 0.00% | 0.00% |
| undo_permutation | 0.19% | 0.19% | 0.18% |

### Multilingual

Score type: multiple choice grade.

| multilingual / xcopa | 7b  | 13b  | 70b |
| -- | -- | -- | -- |
| en-template-mGPT-remove-punctuation | 0.08% | 0.00% | 0.00% |
| et-template-mGPT-remove-punctuation | 0.00% | 0.13% | 0.25% |
| ht-template-mGPT-remove-punctuation | 0.26% | 0.13% | 0.26% |
| id-template-mGPT-remove-punctuation | 0.11% | 0.00% | 0.19% |
| it-template-mGPT-remove-punctuation | 0.00% | 0.10% | 0.09% |
| qu-template-mGPT-remove-punctuation | 0.00% | 0.00% | 0.27% |
| sw-template-mGPT-remove-punctuation | 0.14% | 0.13% | 0.13% |
| th-template-mGPT-remove-punctuation | 0.25% | 0.13% | 0.13% |
| tr-template-mGPT-remove-punctuation | 0.26% | 0.00% | 0.34% |
| vi-template-mGPT-remove-punctuation | 0.00% | 0.11% | 0.00% |
| zh-template-mGPT-remove-punctuation | 0.00% | 0.10% | 0.09% |

### LM Evaluation Harness

Score type: multiple choice grade.

| lm-eval | 7b  | 13b  | 70b |
| -- | -- | -- | -- |
| boolq | 0.04% | 0.04% | 0.07% |
| hellaswag | 0.02% | 0.03% | 0.03% |
| piqa | 0.00% | 0.00% | 0.07% |
| winogrande | 0.00% | 0.11% | 0.20% |

### MMLU

Score type: multiple choice grade.

Note: the number in brackets is the number of sub-tasks for each supercategory.

| mmlu | 7b  | 13b  | 70b |
| -- | -- | -- | -- |
| stem [18]  | 0.79% | 0.05% | 0.01% |
| humanities [13]  | 0.19% | 0.01% | 0.02% |
| other (business, health, misc.) [14]  | 0.08% | 0.06% | 0.12% |
| social sciences [12]  | 0.37% | 0.21% | 0.01% |

# Llama-3.x

Llama-3.x checkpoints can be loaded into Megatron for inference and for finetuning. Loading these checkpoints consists of several steps:

1. Get access to download the checkpoints (weights and tokenizer).
2. Convert the checkpoints from Huggingface format to Megatron format.
3. (Optional) Validate converted checkpoints
4. Setup arguments for launching the model.

The following sections detail these steps.

## Download Huggingface checkpoints

Users must first apply for access to download the Llama-3.x checkpoints from [Huggingface](https://huggingface.co/meta-llama).

## Convert checkpoint format

We recommend passing `--dtype bf16` for training or finetuning. Inference can be done in bfloat16 or float16.

### Huggingface format

The HF checkpoints can be converted to Megatron format by using Megatron's own Llama-3.x checkpoint converter for HF format (see script `tools/checkpoint/loader_llama_mistral.py`). One important argument that must be set correctly is the tensor parallel size (`TP`) for each model. The following table shows these values:

| Model size | Tensor parallel size (`TP`) |
| ---------- | --------------------------- |
|  1B        | 1                           |
|  3B        | 1                           |
|  8B        | 1                           |
| 70B        | 8                           |

Using these values for `TP`, along with the path to the Llama-3.x tokenizer model (automatically downloaded with original checkpoint download; see `${TOKENIZER_MODEL}` below), run the following command from the root of your Megatron source code to convert from HF format to Megatron format:

```
$>: python tools/checkpoint/convert.py \
 >    --bf16 \
 >    --model-type GPT \
 >    --loader llama_mistral \
 >    --saver core \
 >    --target-tensor-parallel-size ${TP} \
 >    --checkpoint-type hf \
 >    --load-dir ${HF_FORMAT_DIR} \
 >    --save-dir ${MEGATRON_FORMAT_DIR} \
 >    --tokenizer-model ${TOKENIZER_MODEL} \
 >    --model-size llama3 \
```

After this conversion, we are ready to load the checkpoints into a Megatron GPT model.

## (Optional) Validate checkpoints

A Megatron-LM text generation server for Llama3 can be launched using the script `examples/inference/llama_mistral/run_text_generation_llama3.sh <PATH_TO_CONVERTED_CORE_CHECKPOINT> <PATH_TO_DOWNLOADED_HUGGINGFACE_CHECKPOINT>`. For Llama3.1, please use `examples/inference/llama_mistral/run_text_generation_llama3.1.sh`.

Once running, query the server with `curl 'http://<TEXT_GENERATION_SERVER_IP>:5000/api' -X 'PUT' -H 'Content-Type: application/json; charset=UTF-8'  -d '{"prompts":["<SOME_PROMPT>"], "tokens_to_generate":100, "top_k":1}'`.

A reference generation for comparison can be obtained from the Huggingface transformers library by running `python examples/llama_mistral/huggingface_reference.py --model_path <PATH_TO_DOWNLOADED_HUGGINGFACE_CHECKPOINT> --prompt <SOME_PROMPT>`.

## Launch model

If loading for either inference or finetuning, use the following arguments for Llama 3.0:

```
--tensor-model-parallel-size ${TP} \
--pipeline-model-parallel-size 1 \
--seq-length 8192 \
--max-position-embeddings 8192 \
--tokenizer-type HuggingFaceTokenizer \
--tokenizer-model ${TOKENIZER_MODEL} \
--load ${CHECKPOINT_DIR} \
--exit-on-missing-checkpoint \
--use-checkpoint-args \
--no-load-optim \
--no-load-rng \
--untie-embeddings-and-output-weights \
--normalization RMSNorm \
--position-embedding-type rope \
--no-masked-softmax-fusion \
--attention-softmax-in-fp32 \
--disable-bias-linear \
--transformer-impl transformer_engine \
--group-query-attention 8 \
--attention-dropout 0.0 \
--hidden-dropout 0.0 \
--rotary-base 500000 \
--rotary-percent 1.0 \
--ffn-hidden-size 14336 \
--num-attention-heads 32 \
--swiglu \
--bf16 \
```

For Llama3.1 please use the following arguments:

```
--tensor-model-parallel-size ${TP} \
--pipeline-model-parallel-size 1 \
--seq-length 8192 \
--max-position-embeddings 131072 \
--tokenizer-type HuggingFaceTokenizer \
--tokenizer-model ${TOKENIZER_MODEL} \
--load ${CHECKPOINT_DIR} \
--exit-on-missing-checkpoint \
--use-checkpoint-args \
--no-load-optim \
--no-load-rng \
--untie-embeddings-and-output-weights \
--normalization RMSNorm \
--position-embedding-type rope \
--no-masked-softmax-fusion \
--attention-softmax-in-fp32 \
--disable-bias-linear \
--transformer-impl transformer_engine \
--group-query-attention 8 \
--attention-dropout 0.0 \
--hidden-dropout 0.0 \
--rotary-base 500000 \
--rotary-percent 1.0 \
--use-rope-scaling \
--ffn-hidden-size 14336 \
--num-attention-heads 32 \
--swiglu \
--bf16 \
```

**Note:** If you converted to the legacy model format (i.e., `--saver legacy`), please see [here](#using-legacy-model-format).

# Mistral-7b

Megatron currently supports loading the v0.3 release of Mistral-7b (which does not use sliding window attention and offers a larger 32768 vocabulary) for inference and finetuning. Loading these checkpoints consists of several steps:

1. Get access to download the checkpoints (weights and tokenizer).
2. Convert the checkpoints from HuggingFace format to Megatron format.
3. (Optional) Validate converted checkpoints
4. Setup arguments for launching the model.

The following sections detail these steps.

## Download Huggingface checkpoints

Users must first apply for access to download the Mistral-7b checkpoints through Huggingface. Two variants are available: the base model ([Mistral-7B-v0.3](https://huggingface.co/mistralai/Mistral-7B-v0.3)) and the instruct model ([Mistral-7B-Instruct-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3)).

## Convert checkpoint format

The HF checkpoints can be converted to Megatron format by using Megatron's own Mistral checkpoint converter for HF format (see script `tools/checkpoint/loader_llama_mistral.py`).

Using the path to the Mistral tokenizer model (downloaded alongside the HF checkpoint), run the following command from the root of your Megatron source code to convert from HF format to the Megatron core format:

```
$>: python tools/checkpoint/convert.py \
 >    --bf16 \
 >    --model-type GPT \
 >    --loader llama_mistral \
 >    --saver core \
 >    --target-tensor-parallel-size ${TP} \
 >    --checkpoint-type hf \
 >    --load-dir ${HF_FORMAT_DIR} \
 >    --save-dir ${MEGATRON_FORMAT_DIR} \
 >    --tokenizer-model ${TOKENIZER_MODEL} \
 >    --model-size mistral \
```

After this conversion, we are ready to load the checkpoints into a Megatron core GPT model.

## (Optional) Validate checkpoints

A Megatron-LM text generation server for Mistral-7B can be launched using the script `examples/inference/llama_mistral/run_text_generation_mistral.sh <PATH_TO_CONVERTED_MCORE_CHECKPOINT> <PATH_TO_DOWNLOADED_HUGGINGFACE_CHECKPOINT>`.

Once running, query the server with `curl 'http://<TEXT_GENERATION_SERVER_IP>:5000/api' -X 'PUT' -H 'Content-Type: application/json; charset=UTF-8'  -d '{"prompts":["<SOME_PROMPT>"], "tokens_to_generate":100, "top_k":1}'`.

A reference generation for comparison can be obtained from the Huggingface transformers library by running `python examples/inference/llama_mistral/huggingface_reference.py --model_path <PATH_TO_DOWNLOADED_HUGGINGFACE_CHECKPOINT> --prompt <SOME_PROMPT>`.

## Launch model

If loading for either inference or finetuning, use the following arguments:

```
--tensor-model-parallel-size ${TP} \
--pipeline-model-parallel-size 1 \
--seq-length 4096 \
--max-position-embeddings 4096 \
--tokenizer-type HuggingFaceTokenizer \
--tokenizer-model ${TOKENIZER_MODEL} \
--load ${CHECKPOINT_DIR} \
--exit-on-missing-checkpoint \
--use-checkpoint-args \
--no-load-optim \
--no-load-rng \
--untie-embeddings-and-output-weights \
--normalization RMSNorm \
--position-embedding-type rope \
--no-masked-softmax-fusion \
--attention-softmax-in-fp32
--apply-layernorm-1p \
--transformer-impl transformer_engine \
--group-query-attention 8 \
--disable-bia-linear \
--rotary-base 1000000 \
--rotary-percent 1.0 \
--swiglu \
--ffn-hidden-size 14336 \
--num-attention-heads 32
```

**Note:** If you converted to the legacy model format (i.e., `--saver legacy`), please see [here](#using-legacy-model-format).

# Other Llama-like model support

*Note: Experimental*

Many models such as Yi-34B and Qwen2.x use the Llama architecture and may be converted from HuggingFace to Megatron using the commands in [Llama-3.x](#llama-3x).

# Known numerical differences

It is not expected that the megatron and Huggingface implementations of llama3.x and mistral models will produce numerically identical results. There are multiple points where small numerical differences are expected. This is a non-exhaustive list:

1. TransformerEngine (TE) uses the model params_dtype inside RMSNorm whereas the Huggingface implementation uses fp32. See for details: <https://github.com/NVIDIA/TransformerEngine/issues/1132>
2. Huggingface `transformers` implements the q, k and v projections in self-attention as separate GEMMs whereas Megatron core combines them into a single GEMM for efficiency. This leads to small numerical differences.

# Using legacy model format

In all the checkpoint conversion examples used in this document, the saver format `--saver core` is used, signifying that the newer (and recommended) Megatron GPT model class will be used. I.e.:

- old class: `megatron.legacy.model.gpt_model.GPTModel`
- new class: `megatron.core.models.gpt.gpt_model.GPTModel`

Using this new format is the recommended approach. However, if your use case requires using the older class (i.e., convert using `--saver legacy`), then when launching training or finetuning, the following args must be added:

- `--use-legacy-models`: use the older model class
- `--ckpt-format torch`: use the `torch` checkpoint format, which is the only checkpoint format that is compatible with the legacy model format


================================================
FILE: docs/models/index.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Supported Models

Megatron Core supports a wide range of language and multimodal models with optimized implementations for large-scale training.

## Model Conversion

For converting HuggingFace models to Megatron format, use [Megatron Bridge](https://github.com/NVIDIA-NeMo/Megatron-Bridge), the official standalone converter. Megatron Bridge supports an extensive list of models including LLaMA, Mistral, Mixtral, Qwen, DeepSeek, Gemma, Phi, Nemotron, and many more.

See the [Megatron Bridge supported models list](https://github.com/NVIDIA-NeMo/Megatron-Bridge?tab=readme-ov-file#supported-models) for the complete and up-to-date list of supported models.

```{toctree}
:maxdepth: 1

llms
multimodal
../llama_mistral
```


================================================
FILE: docs/models/llms.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Language Models

Megatron Core supports the following language model architectures for large-scale training.

## Converting HuggingFace Models

Use [**Megatron Bridge**](https://github.com/NVIDIA-NeMo/Megatron-Bridge) to convert HuggingFace models to Megatron format. Megatron Bridge is the official standalone converter with support for an extensive list of models including LLaMA, Mistral, Mixtral, Qwen, DeepSeek, Gemma, Phi, Nemotron, and many more.

See the [Megatron Bridge supported models list](https://github.com/NVIDIA-NeMo/Megatron-Bridge?tab=readme-ov-file#supported-models) for the complete and up-to-date list.

## Decoder-Only Models

| Model | Description | Key Features |
|-------|-------------|--------------|
| **GPT** | Generative Pre-trained Transformer | Standard autoregressive LM, foundational architecture |
| **LLaMA** | Meta's LLaMA family | Efficient architecture with RoPE, SwiGLU, RMSNorm |
| **Mistral** | Mistral AI models | Sliding window attention, efficient inference |
| **Mixtral** | Sparse Mixture-of-Experts | 8x7B MoE architecture for efficient scaling |
| **Qwen** | Alibaba's Qwen series | HuggingFace integration, multilingual support |
| **Mamba** | State Space Model | Subquadratic sequence length scaling, efficient long context |

## Encoder-Only Models

| Model | Description | Key Features |
|-------|-------------|--------------|
| **BERT** | Bidirectional Encoder Representations | Masked language modeling, classification tasks |

## Encoder-Decoder Models

| Model | Description | Key Features |
|-------|-------------|--------------|
| **T5** | Text-to-Text Transfer Transformer | Unified text-to-text framework, sequence-to-sequence |

## Example Scripts

Training examples for these models can be found in the `examples/` directory:
- `examples/gpt3/` - GPT-3 training scripts
- `examples/llama/` - LLaMA training scripts
- `examples/mixtral/` - Mixtral MoE training
- `examples/mamba/` - Mamba training scripts
- `examples/bert/` - BERT training scripts
- `examples/t5/` - T5 training scripts

## Model Implementation

All language models are built using Megatron Core's composable transformer blocks, enabling:
- Flexible parallelism strategies (TP, PP, DP, EP, CP)
- Mixed precision training (FP16, BF16, FP8)
- Distributed checkpointing
- Efficient memory management


================================================
FILE: docs/models/multimodal.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Multimodal Models

Megatron Core supports multimodal models that combine language with vision, audio, and other modalities for comprehensive multimodal understanding.

## MIMO: Multimodal In/Out Framework

**MIMO (Multimodal In/Out Model)** is an experimental framework in Megatron Core that supports arbitrary combinations of modalities including vision, audio, and text. MIMO provides a flexible architecture for building custom multimodal models.

> **Note**: MIMO is experimental and under active development. The API may change in future releases.

**Key Features:**
- Arbitrary modality combinations (vision, audio, text, etc.)
- Flexible encoder architecture for different input modalities
- Unified embedding space across modalities
- Support for both vision-language and audio-vision-language models

See [examples/mimo](https://github.com/NVIDIA/Megatron-LM/tree/main/examples/mimo) for training scripts and examples.

## Vision-Language Models

| Model | Description | Vision Encoder | Language Model |
|-------|-------------|----------------|----------------|
| **LLaVA** | Visual instruction tuning | CLIP ViT-L/14 | Mistral-7B / LLaMA |
| **NVLM** | NVIDIA Vision-Language Model | CLIP / Custom ViT | LLaMA-based |
| **LLaMA 3.1 Nemotron Nano VL** | Efficient multimodal model | Vision Transformer | LLaMA 3.1 8B |

## Vision Encoders

| Model | Description | Key Features |
|-------|-------------|--------------|
| **CLIP ViT** | OpenAI's CLIP Vision Transformer | Image-text alignment, multiple scales (L/14@336px) |
| **RADIO** | Resolution-Agnostic Dynamic Image Optimization | Flexible resolution handling, efficient vision encoding |

## Diffusion Models

For multimodal diffusion models (image generation, text-to-image, etc.), see [NeMo Diffusion Models](https://github.com/NVIDIA-NeMo/NeMo/tree/main/nemo/collections/diffusion). NeMo provides production-ready implementations of:
- Stable Diffusion variants
- Text-to-image generation
- Image-to-image translation
- ControlNet and other conditioning mechanisms

## Multimodal Features

- **Image-Text Alignment**: Pre-training on image-caption pairs
- **Visual Instruction Tuning**: Fine-tuning on instruction-following datasets
- **Flexible Vision Encoders**: Support for different ViT architectures and resolutions
- **Combined Checkpointing**: Unified checkpoints combining vision and language models
- **Efficient Training**: Full parallelism support (TP, PP, DP) for both vision and language components

## Example Scripts

Multimodal training examples can be found in the following directories:

**MIMO Framework:**
- `examples/mimo/` - Multimodal In/Out training with support for vision-language and audio-vision-language models

**Specific Multimodal Models:**
- `examples/multimodal/` - LLaVA-style training with Mistral + CLIP
- `examples/multimodal/nvlm/` - NVLM training scripts
- `examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/` - Nemotron VL training
- `examples/multimodal/radio/` - RADIO vision encoder integration


================================================
FILE: docs/project.json
================================================
{"name": "megatron-lm", "version": "nightly"}


================================================
FILE: docs/user-guide/data-preparation.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Data Preparation

Preparing your data correctly is essential for successful training with Megatron Core.

## Data Format

Megatron Core expects training data in JSONL (JSON Lines) format, where each line is a JSON object:

```json
{"text": "Your training text here..."}
{"text": "Another training sample..."}
{"text": "More training data..."}
```

## Preprocessing Data

Use the `preprocess_data.py` tool to convert your JSONL data into Megatron's binary format:

```bash
python tools/preprocess_data.py \
    --input data.jsonl \
    --output-prefix processed_data \
    --tokenizer-type HuggingFaceTokenizer \
    --tokenizer-model /path/to/tokenizer.model \
    --workers 8 \
    --append-eod
```

### Key Arguments

| Argument | Description |
|----------|-------------|
| `--input` | Path to input JSON/JSONL file |
| `--output-prefix` | Prefix for output binary files (.bin and .idx) |
| `--tokenizer-type` | Tokenizer type (`HuggingFaceTokenizer`, `GPT2BPETokenizer`, etc.) |
| `--tokenizer-model` | Path to tokenizer model file |
| `--workers` | Number of parallel workers for processing |
| `--append-eod` | Add end-of-document token |

## Finding Optimal Number of Workers

Use the `--find-optimal-num-workers` flag to find number of workers which gives the best performance in terms of preprocessed documents per second.
Script will lauch a few short data preprocessing runs with a different number of workers to define the fastest run in respect to collected performance data.

```bash
python tools/preprocess_data.py \
    --input data.jsonl \
    --output-prefix processed_data \
    --tokenizer-type HuggingFaceTokenizer \
    --tokenizer-model /path/to/tokenizer.model \
    --workers 8 \
    --find-optimal-num-workers \
    --workers-to-check 4 8 16 32 \
    --max-documents 50000
```

**Required arguments**

| Argument | Description |
|----------|-------------|
| `--find-optimal-num-workers` | Activates search of optimal number of workers |
| `--workers-to-check` | List of possible number of workers to run |
| `--max-documents` | Number of documents to be preprocessed during each run |

**Output example**

```bash
-----------------------------------
Performance results (fastest → slowest):
1. 16 workers → avg. docs/s: 9606.6476
2. 32 workers → avg. docs/s: 9275.3284
3. 8 workers → avg. docs/s: 9151.9280
4. 4 workers → avg. docs/s: 6391.3819

-----------------------------------
The most optimal num of workers is 16 with avg. preprocessed docs/s: 9606.6476.
-----------------------------------
```

## Output Files

The preprocessing tool generates two files:
- `processed_data.bin` - Binary file containing tokenized sequences
- `processed_data.idx` - Index file for fast random access

## Using Preprocessed Data

Reference your preprocessed data in training scripts:

```bash
--data-path processed_data \
--split 949,50,1  # Train/validation/test split
```

## Common Tokenizers

### HuggingFace Tokenizers

```bash
--tokenizer-type HuggingFaceTokenizer \
--tokenizer-model /path/to/tokenizer.model
```

### GPT-2 BPE Tokenizer

```bash
--tokenizer-type GPT2BPETokenizer \
--vocab-file gpt2-vocab.json \
--merge-file gpt2-merges.txt
```


================================================
FILE: docs/user-guide/features/context_parallel.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# context_parallel package

## Context parallelism overview

```{figure} ../../images/context_parallel/CP_overview.png
:alt: cp_overview
:align: center

Figure 1: A transformer layer running with TP2CP2. Communications next to Attention are for CP, others are for TP. (AG/RS: all-gather in forward and reduce-scatter in backward, RS/AG: reduce-scatter in forward and all-gather in backward, /AG: no-op in forward and all-gather in backward).
```

Context Parallelism ("CP") is a parallelization scheme on the dimension of sequence length. Unlike prior SP (sequence parallelism) which only splits the sequence of Dropout and LayerNorm activations, CP partitions the network inputs and all activations along sequence dimension. With CP, all modules except attention (e.g., Linear, LayerNorm, etc.) can work as usual without any changes, because they do not have inter-token operations. As for attention, the Q (query) of each token needs to compute with the KV (key and value) of all tokens in the same sequence. Hence, CP requires additional all-gather across GPUs to collect the full sequence of KV. Correspondingly, reduce-scatter should be applied to the activation gradients of KV in backward propagation. To reduce activation memory footprint, each GPU only stores the KV of a sequence chunk in forward and gathers KV again in backward. KV communication happens between a GPU and its counterparts in other TP groups. The all-gather and reduce-scatter are transformed to point-to-point communications in ring topology under the hood. Exchanging KV also can leverage MQA/GQA to reduce communication volumes, as they only have one or few attention heads for KV.

For example, in Figure 1, assuming sequence length is 8K, each GPU processes 4K tokens. GPU0 and GPU2 compose a CP group, they exchange KV with each other. Same thing also happens between GPU1 and GPU3. CP is similar to [Ring Attention](https://arxiv.org/abs/2310.01889) but provides better performance by (1) leveraging the latest OSS and cuDNN flash attention kernels; (2) removing unnecessary computation resulted from low-triangle causal masking and achieving optimal load balance among GPUs.

## Context parallelism benefits

```{figure} ../../images/context_parallel/CP_results.png
:alt: cp_results
:align: center

Figure 2: Speedup of 175B GPT with various TP+CP combinations vs. full recompute (i.e., TP8CP1).
```

LLM encounters OOM (out of memory) issue with long context (i.e., long sequence length) because of linearly increasing memory footprint of activations. Recomputing activations in backward can avoid OOM but also introduce significant overheads (~30% with full recompute). Enlarging TP (tensor model parallelism) can fix the OOM issue as well, but it potentially makes compute (e.g., Linear) too short to overlap communication latencies. To be clear, scaling out to more GPUs with bigger TP can hit the overlapping problem no matter if OOM happens.

CP can better address the issues. With CP, each GPU only computes on a part of the sequence, which reduces both computation and communication by CP times. Therefore, there are no concerns about the overlapping between them. The activation memory footprint per GPU is also CP times smaller, hence no OOM issue anymore. As Figure 2 shows, the combinations of TP and CP can achieve optimal performance by eliminating recompute overheads and making the best tradeoff between computation and communications.

## Enabling context parallelism

CP support has been added to GPT. All models that share GPT code path also should be able to benefit from CP, such as Llama. CP can work with TP (tensor model parallelism), PP (pipeline model parallelism), and DP (data parallelism), where the total number of GPUs equals TPxCPxPPxDP. CP also can work with different attention variants, including MHA/MQA/GQA, uni-directional and bi-directional masking.

CP is enabled by simply setting context_parallel_size=<CP_SIZE> in command line. Default context_parallel_size is 1, which means CP is disabled. Running with CP requires Megatron-Core (>=0.5.0) and Transformer Engine (>=1.1).


================================================
FILE: docs/user-guide/features/custom_fsdp.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Megatron FSDP

**NOTE: In M-Core 0.14, the custom FSDP refactored its checkpoint implementation to use DTensor-based torch distributed checkpointing. The custom FSDP was also renamed Megatron FSDP. The relevant sections of this document are no longer applicable.**

## How to use ?

Add these flag to enable MCore custom FSDP.

```bash
--use-megatron-fsdp
--data-parallel-sharding-strategy optim_grads_params
--no-gradient-accumulation-fusion
--use-distributed-optimizer
```

For a practical guide covering required configurations, checkpoint conversion, and example scripts, see the [Megatron-FSDP User Guide](../../discussions/megatron-fsdp-user-guide/megatron-fsdp-user-guide.md).

## Key Features

- **Sharding Strategy**: Efficiently shards optimizer states, gradients, and parameters to reduce memory consumption.
- **Communication and Computation Overlap**: Optimized to enable concurrent execution of communication and computation, enhancing overall efficiency.
- **Supports automatic mixed precision training**: Compatible with BF16 O1/O2/O3 recipes, as well as FP8 compute with FP32 parameters and FP8 parameter training, allowing for flexible precision configurations.
- **Tensor Parallelism (TP), Expert Parallelism (EP) and Context Parallelism (CP)**: Compatible with TP, EP and CP configurations, enabling efficient scaling of large language models.
- **Distributed Model Initialization with Meta Device**: Allows model initialization using meta device, followed by layer-by-layer initialization of distributed model weight buffers via the `Module.reset_parameters` API, facilitating the initialization of extremely large models.

## Configuration Recommendations

### 1. Disable `CUDA_DEVICE_MAX_CONNECTIONS`

To ensure full parallelization of FSDP communication and computation, disable the CUDA_DEVICE_MAX_CONNECTIONS environment variable. This step avoids potential bubble in CUDA stream. (But it may slow down TP and CP to some extent.)

```bash
unset CUDA_DEVICE_MAX_CONNECTIONS
```

### 2. Add `--calculate-per-token-loss`

For gradients sharding mode optimization, include the `--calculate-per-token-loss` flag in your training script. This improves performance by reducing the frequency of gradient scaling, which is also a sizable drain on SM resources.

## Design of Custom FSDP

### 1. Overview

The custom Fully Sharded Data Parallelism (FSDP) implementation in Megatron-Core is specifically designed to optimize memory consumption and performance for large language models. The core design principles include:

 - **Optimized for Large Language Models**: This custom FSDP implementation is tailored to efficiently scale with models containing billions of parameters, ensuring seamless execution and training of massive models.
 - **Efficient Memory Consumption**: By strategically sharding optimizer states, gradients, and model parameters, the custom FSDP significantly reduces memory usage. This approach enables the training of models that would otherwise be too large to fit in memory.
 - **Efficient Workflow & Overlapping Communication and Computation**: The implementation is engineered to minimize the number of communication steps required during training. It maximizes the overlap between communication and computation, thereby enhancing overall training efficiency and reducing latency.
 - **Support for MCore's Efficient Training Methods**: The custom FSDP seamlessly integrates with Megatron-Core's advanced parallelism techniques, including tensor parallelism, expert parallelism and context parallelism. Additionally, it supports automatic mixed precision training, further optimizing training performance and efficiency.

The design of Custom FSDP draws inspiration from PyTorch FSDP [Zhao, Yanli, et al.](https://arxiv.org/pdf/2304.11277) and MCore's distributed optimizer. The introduction to PyTorch FSDP is referenced here to clarify the underlying concepts of the custom FSDP design.

> In DistributedDataParallel, (DDP) training, each process/ worker owns a replica of the model and processes a batch of data, finally it uses all-reduce to sum up gradients over different workers. In DDP the model weights and optimizer states are replicated across all workers. FSDP is a type of data parallelism that shards model parameters, optimizer states and gradients across DDP ranks.

> When training with FSDP, the GPU memory footprint is smaller than when training with DDP across all workers. This makes the training of some very large models feasible by allowing larger models or batch sizes to fit on device. This comes with the cost of increased communication volume. The communication overhead is reduced by internal optimizations like overlapping communication and computation.

![FSDP workflow](../../images/custom_fsdp/FSDP_workflow.png)

*Notice that the unit processed in workflow here is the “FSDP instance 1: N layers”, where an FSDP instance is the smallest FSDP processing unit (also a PyTorch module), which means that we can safely release this module weights after using it (executing the forward or backward of this module), and there will be no other computations computations relying on these weights. This capability is the foundation of FSDP's layer-by-layer execution and memory-saving strategy. An FSDP instance is also referred to as an **FSDP Unit**.*

*It is worth noting that an FSDP instance can correspond to multiple FSDP parameter groups. These groups are separated by Data Parallel (DP) communication groups and the data type of the parameter or gradient. Consequently, an FSDP instance may require several parameter-gather tasks before execution (forward or backward). Each **FSDP parameter group** corresponds to one **Data Parallel Buffer** in custom FSDP.*

At a high level FSDP works as follow:

In constructor
 - Shard model parameters and each rank only keeps its own shard

In forward path
 - Run all_gather to collect all shards from all ranks to recover the full parameter in this FSDP unit
 - Run forward computation
 - Discard parameter shards it has just collected

In backward path
 - Run all_gather to collect all shards from all ranks to recover the full parameter in this FSDP unit
 - Run backward computation
 - Run reduce_scatter to sync gradients
 - Discard parameters.

One way to view FSDP’s sharding is to decompose the DDP gradient all-reduce into reduce-scatter and all-gather. Specifically, during the backward pass, FSDP reduces and scatters gradients, ensuring that each rank possesses a shard of the gradients. Then it updates the corresponding shard of the parameters in the optimizer step. Finally, in the subsequent forward pass, it performs an all-gather operation to collect and combine the updated parameter shards.

![FSDP Allreduce](../../images/custom_fsdp/FSDP_Allreduce.png)

### 2. Custom FSDP underlying data structure

To implement the FSDP functionality described above, the custom FSDP is designed with the following Python classes and data structure:

![MCore Custom FSDP Class Diagram](../../images/custom_fsdp/MCore_Custom_FSDP_Class_Diagram.png)

### 3. The custom FSDP interface: FullyShardedDataParallel

The custom FSDP provides the same programming interface as PyTorch's DistributedDataParallel (DDP) as FullyShardedDataParallel (FSDP). For example, you can apply FSDP to models as follows:

```python
# Initialize model and optimizer
ddp_config.use_megatron_fsdp = True
ddp_config.data_parallel_sharding_strategy = "optim_grads_params"
model = GPTModel(transformer_config)
model = FullyShardedDataParallel(
    transformer_config,
    model,
    ddp_config,
    fsdp_unit_modules = [TransformerLayer, LanguageModelEmbedding],
)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
optimizer = DistributedOptimizer(optimizer, [model], [model.param_and_grad_buffer])

# Training loop
def train_step(inputs, labels):
    optimizer.zero_grad()
    for mbs_input, mbs_label in zip(inputs, labels):
        outputs = model(mbs_input)
        loss = loss_fn(outputs, mbs_label)
        loss.backward()
    optimizer.step()

# Save and load model and optimizer state dict
def model_and_optimizer_state_dict():
    state_dict = {
        "model": model.sharded_state_dict(),
        "optimizer": optimizer.sharded_state_dict(),
    }
    return state_dict

def load_model_and_optimizer_state_dict(state_dict):
    model.load_state_dict(state_dict["model"])
    optimizer.load_state_dict(state_dict["optimizer"])
```

**Key Notes:**
 - You can configure which modules should be treated as FSDP units via the `fsdp_unit_modules` argument. This configuration is mandatory.
 - The custom FSDP must be used with a distributed optimizer since it provides distributed checkpointing.
 - The data-parallel communication group for parameters is not explicitly shown. Custom FSDP configures these groups as either DP (data-parallel) or EDP (expert data-parallel) based on parameter markings.

#### 3.1 Initializing Models on the Meta Device

For training particularly large models with FSDP, you can initialize the model on the meta device. Using PyTorch's `reset_parameters` API, you can initialize model weights layer by layer during the construction of the `ParamAndGradBuffer`. Most PyTorch native modules and TransformerEngine modules support this API (e.g., [PyTorch Linear](https://github.com/pytorch/pytorch/blob/v2.6.0/torch/nn/modules/linear.py#L114), [TE LayerNormLinear](https://github.com/NVIDIA/TransformerEngine/blob/release_v2.0/transformer_engine/pytorch/module/layernorm_linear.py#L1107)).

```python
# Initialize model on meta device
with torch.device("meta"):
    model = GPTModel(config)

model = FullyShardedDataParallel(
    transformer_config,
    model,
    ddp_config,
    fsdp_unit_modules=[TransformerLayer, LanguageModelEmbedding],
)
```

**Important Considerations:**
1. *Custom Modules*: If your model contains custom modules, ensure they implement the `reset_parameters` API. Otherwise, you may need to force parameter initialization on a CUDA or CPU device.
2. *Tensor Initialization*: Be cautious of tensors created during model initialization without a specified device—they will default to the meta device. To avoid issues, explicitly specify the device for these tensors to ensure compatibility with this function.

### 4. Interaction between Custom FSDP and Model Forward/Backward Propagation

Custom FSDP implements Fully Sharded Data Parallelism (FSDP) through a series of module hooks, gradient hooks, or by adding functions between modules. This involves inserting communications and manipulating parameters and gradients during PyTorch's module forward or backward propagation.

Module hooks summary:
- Module pre-forward hook(`module.register_forward_pre_hook`): This hook unshards model weights before the forward pass. In the case of an FSDP Unit Module, add a RegisterFSDPBackwardFunction function that will reshard model weights and reduce gradients after module backward propagation.
- Module post-forward hook(`module.register_forward_hook`): This hook is used to reshard model weights after the forward pass.
- Root module pre-backward hook(`root_module.register_full_backward_pre_hook`): This hook checks that all model parameters are resharded, in order to avoid unnecessary memory spikes. It also marks all modules as being in the `TrainingState.PRE_BACKWARD` state.
- Module pre-backward hook(`module.register_full_backward_pre_hook`): This hook is used to unshard the model weights before the backward pass.
- Root module post-backward hook(`torch.autograd.Variable._execution_engine.queue_callback`): This hook is used to make sure all gradients in the backprop are properly handled / available.

The gradient reduction pipeline maintains a map of gradients to FSDP parameter groups. If all gradients in an FSDP parameter group are ready, it launches a gradient reduction. Note that this assumes that the model's gradients are always generated in a certain order (reverse of `module.parameters()`), as otherwise, FSDP would maintain too many parameter group grad buffers, leading to excessive memory usage.

#### 4.1 Optimized for Activation Recompute

Using the activation recompute will cause the same module to execute the forward function first and then the backward function in the backward prop, which will cause model weights unshard twice and model weights reshard twice. If we can tell program that this is a forward + backward operation, we can just call unshard once and reshard once.

To make this determination, we keep track of the model's state with training_state, `FORWARD`, `PRE_BACKWARD`, `POST_BACKWARD`, `IDLE`. It's worth noting that pre-backward hook act before pre-forward hook, and we'll let pre-backward hook execute the model weight unshard, and then mark the model as `PRE_BACKWARD`, and when pre-forward hook sees this marking it will not perform the unshard operation. Similarly, for model weight reshard duplicate, post-forward hook act before post-backward function, and checking for the `PRE_BACKWARD` flag in the post-forward hook will cancel the unshard.

### 5. Memory Mechanisms and Features of Custom FSDP

FSDP can fully distribute the model parameters, gradients, and optimizer states, and for mixed-precision training, it can also fully distribute the high-precision main weights. This is pretty much distributes all the memory except for the activation memory, but FSDP will also face some memory issues.

FSDP frequently unshards and reshards model weights, which can lead to busy memory allocation and deallocation. This results in untimely tensor releases, causing memory spikes (or even out-of-memory errors), crashes of the PyTorch memory allocator cache, and a large number of `cudaMalloc` and `cudaFree` calls. These issues can significantly slow down the system.

The problem of untimely tensor release can generally be addressed using the `tensor._typed_storage(). _resize_(0)` API, which immediately deallocates the storage's memory. Custom FSDP provides interfaces in `AllGatherPipeline` and `GradReducePipeline` to replace the temporary buffer memory allocator used for parameter gathering and gradient reduction with ` StorageResizeBasedBucketAllocator`. This replaces the tensor release operation with the `tensor._typed_storage(). _resize_(0)` API.

The PyTorch memory allocator cache crash is a complex issue that occurs frequently when the actual memory usage approaches the GPU memory limit, leading to poor performance. This problem is challenging and can only be mitigated by avoiding frequent hits on the GPU memory limit. Using a self-managed memory allocator like ` RotaryBucketAllocator` is another potential solution. However, note that `RotaryBucketAllocator` is not yet mature.

## References

- [Getting Started with Fully Sharded Data Parallel (FSDP)](https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html)


================================================
FILE: docs/user-guide/features/dist_optimizer.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Distributed Optimizer

The motivation for the distributed optimizer is to save memory by distributing the optimizer state evenly across data parallel ranks (https://arxiv.org/abs/1910.02054), versus the naive method of replicating the optimizer state across data parallel ranks.

Theoretical memory savings vary depending on the combination of the datatype of the model's parameters (`param_dtype`) and main gradients accumulated across data-parallel replicas (`grad_dtype`). We always use `fp32` main parameters for optimizer steps. In the current implementation, the theoretical number of bytes per parameter is (where d is the data parallel size):

|        | Non-distributed optim | Distributed optim |
| ------ | ------ | ------ |
| `fp16` parameters, `fp16` gradients | 20 | 4 + 16/d |
| `bf16` parameters, `fp32` gradients    | 18 | 6 + 12/d |
| `fp32` parameters, `fp32` gradients       | 16 | 8 + 8/d  |

Our implementation of the distributed optimizer uses contiguous buffers for parameters and main gradients; model gradients are copied over to the main gradients as soon as they are fully computed.

The figures below illustrate the distributed optimizer's sharding scheme, and the key steps of the distributed optimizer's parameter update:

## Data flow

![Data flow](../../images/distrib_optimizer/data_flow.png)

## Sharding scheme

![Sharding scheme](../../images/distrib_optimizer/sharding_scheme.png)

## Key steps

_(note: using illustrations above, assuming `bf16` model weights, `bf16` model gradients that are computed by the backward pass and `fp32` main gradients that are also used for optimizer steps; we always use `fp32` main weights for optimizer steps)_

- Backward pass finishes (gradient buffer holds 16 `fp32` gradient elements).
- Call reduce-scatter on each DP rank.
- Each DP rank now has 4 elements within the gradient buffer that are fully reduced (remaining 12 elements are garbage).
  - DP rank 0 has gradient values for elements [0:4].
  - DP rank 1 has gradient values for elements [4:8].
  - DP rank 2 has gradient values for elements [8:12].
  - DP rank 3 has gradient values for elements [12:16].
- Optimizer.step().
- Each DP rank copies its 4 `fp32` main parameter elements into the corresponding `bf16` parameter buffer (each element is cast from fp32 to fp16).
- Call all-gather on each DP rank.
- The parameter buffer now contains all 16, fully updated, `bf16` model parameter elements. Parameters in PyTorch modules already point to the appropriate locations in this parameter buffer, and thus forward passes are ready to run after the all-gather completes.
- At this point, the gradient buffer is also ready to be zero'd for the next iteration.


================================================
FILE: docs/user-guide/features/fine_grained_activation_offloading.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Fine-grained Activation Offloading (collaborated with rednote)

Memory capacity is more and more important with the rising of extreme sparse MoE models like DeepSeek-V3 and Qwen3-235B. Fine-grained recomputing reduces the memory footprint at the cost of extra recomputation, while offloading could utilize the host-device bandwidth to achieve nearly zero-overhead. Fine-grained Activation Offloading targets at offloading the activation at the granularity of specific modules, so that we can calibrate the amount of offloading activation to maximize the training throughput.

Currently, the supported offloading modules are `"attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act"`, which could work with fine-grained recomputation to release almost all activations of a transformer layer.

**Features**
* Support PP=1/PP/Interleaved PP
* Compatible with fine-grained recomputation
* Support FP8
* Support MTP
* Support mixed dense & moe layer
* Support A2A Overlap
* Support CUDA Graph
  * (Temporary) cuda graph scope cannot contains the offloading modules

**Usage**
```bash
# Enable fine-grained activation offloading
--fine-grained-activation-offloading

# Specify which modules are going to offload its input
# Choices: "attn_norm", "core_attn", "attn_proj", "mlp_norm", "expert_fc1", "moe_act".
--offload-modules expert_fc1
```
**Compatible with Fine-grained Recomputation**
- For modules with minor perf overhead like layernorm or moe_act, use recomputing to reduce memory footprint;
- For other modules, use offloading to reduce memory footprint;
- Make sure the offloading/reloading could be overlapped with computing;

![Fine-grained Activation Offloading and Fine-grained Recomputation](../../images/fine_grained_activation_offloading/offloading_and_recomputing.png)


================================================
FILE: docs/user-guide/features/index.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Advanced Features

Advanced feature guides for key Megatron Core capabilities.

```{toctree}
:maxdepth: 2

fine_grained_activation_offloading
moe
context_parallel
custom_fsdp
dist_optimizer
optimizer_cpu_offload
pipeline_parallel_layout
tokenizers
megatron_energon
megatron_rl
```


================================================
FILE: docs/user-guide/features/megatron_energon.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Megatron Energon

Advanced multimodal dataloader for efficient loading of text, images, video, and audio at scale.

## Overview

[**Megatron Energon**](https://github.com/NVIDIA/Megatron-Energon) is purpose-built for large-scale multimodal training with:

- **Multimodal support** - Text, images, video, audio
- **Distributed loading** - Optimized for multi-node training
- **Data blending** - Mix datasets with configurable weights
- **WebDataset format** - Efficient streaming from cloud storage
- **State management** - Save and restore training position

## Installation

```bash
pip install megatron-energon
```

## Key Features

### Data Processing

- **Packing** - Optimize sequence length utilization
- **Grouping** - Smart batching of similar-length sequences
- **Joining** - Combine multiple dataset sources
- **Object storage** - Stream from S3, GCS, Azure Blob Storage

### Production-Ready

- Distributed loading across workers and nodes
- Checkpoint data loading state
- Memory-efficient streaming
- Parallel data loading with prefetching

## Basic Usage

```python
from megatron.energon import get_train_dataset, get_loader, WorkerConfig

# Create dataset
ds = get_train_dataset(
    '/path/to/dataset',
    batch_size=32,
    shuffle_buffer_size=1000,
    worker_config=WorkerConfig.default_worker_config(),
)

# Create loader and iterate
for batch in get_loader(ds):
    # Training step
    pass
```

## Multimodal Example

```python
# Load image-text dataset
ds = get_train_dataset(
    '/path/to/multimodal/dataset',
    batch_size=32,
    worker_config=WorkerConfig(num_workers=8, prefetch_factor=2),
)

for batch in get_loader(ds):
    images = batch['image']  # Image tensors
    texts = batch['text']    # Text captions
    # Process batch
```

## Dataset Blending

Mix multiple datasets with custom weights:

```python
from megatron.energon import Blender

blended_ds = Blender([
    ('/path/to/dataset1', 0.6),  # 60%
    ('/path/to/dataset2', 0.3),  # 30%
    ('/path/to/dataset3', 0.1),  # 10%
])
```

## Configuration

### Worker Configuration

```python
WorkerConfig(
    num_workers=8,              # Parallel workers
    prefetch_factor=2,          # Batches to prefetch per worker
    persistent_workers=True,    # Keep workers alive between epochs
)
```

### Common Parameters

| Parameter | Description |
|-----------|-------------|
| `batch_size` | Samples per batch |
| `shuffle_buffer_size` | Buffer size for randomization |
| `max_samples_per_sequence` | Max samples to pack into one sequence |
| `worker_config` | Worker configuration for parallel loading |

## Integration with Megatron-LM

```python
from megatron.energon import get_train_dataset, get_loader
from megatron.training import get_args

args = get_args()

train_ds = get_train_dataset(
    args.data_path,
    batch_size=args.micro_batch_size,
)

for iteration, batch in enumerate(get_loader(train_ds)):
    loss = train_step(batch)
```

## Resources

- **[Megatron Energon GitHub](https://github.com/NVIDIA/Megatron-Energon)** - Documentation and examples
- **[Multimodal Examples](https://github.com/NVIDIA/Megatron-LM/tree/main/examples/multimodal)** - Megatron-LM multimodal training

## Next Steps

- Check [Multimodal Models](../../models/multimodal.md) for supported architectures
- See [Training Examples](../training-examples.md) for integration examples


================================================
FILE: docs/user-guide/features/megatron_rl.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Megatron RL

Reinforcement learning library for post-training large language models at scale.

## Overview

[**Megatron RL**](https://github.com/NVIDIA/Megatron-LM/tree/dev/megatron/rl) adds native reinforcement learning capabilities to Megatron-LM for large-scale RL-based post-training of foundation models.

> **Note**: Megatron RL is under active development and primarily designed for research teams exploring RL post-training on modern NVIDIA hardware. For production deployments, use [**NeMo RL**](https://github.com/NVIDIA-NeMo/RL).

## Key Features

- **Decoupled Design** - Clean separation between agent/environment logic and RL implementation
- **Flexible Inference** - Support for Megatron, OpenAI, and HuggingFace inference backends
- **Trainer/Evaluator** - Manages rollout generation and coordinates with inference systems
- **Megatron Integration** - Native integration with Megatron Core inference system

## Architecture

### Components

**Agents & Environments**
- Accept inference handles
- Return experience rollouts with rewards
- Implement custom RL logic

**Trainer/Evaluator**
- Controls rollout generation
- Coordinates with inference systems
- Manages training loops

**Inference Interface**
- Provides `.generate(prompt, **generation_args)` endpoint
- Supports multiple backends (Megatron, OpenAI, HuggingFace)

## Use Cases

- RLHF (Reinforcement Learning from Human Feedback)
- Custom reward-based fine-tuning
- Policy optimization for specific tasks
- Research on RL post-training techniques

## Resources

- **[Megatron RL GitHub](https://github.com/NVIDIA/Megatron-LM/tree/dev/megatron/rl)** - Source code and documentation
- **[Megatron Core Inference](../../api-guide/core/transformer.md)** - Native inference integration


================================================
FILE: docs/user-guide/features/moe.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Mixture of Experts

```{toctree}
:maxdepth: 1
:caption: MoE Features

multi_token_prediction
multi_latent_attention
../../api-guide/router_replay
```

```{include} ../../../megatron/core/transformer/moe/README.md
```


================================================
FILE: docs/user-guide/features/multi_latent_attention.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Multi-Latent Attention

## Multi-Latent Attention overview

Multi-Latent Attention ("MLA") is an innovative attention mechanism introduced by Deepseek team that enhances the efficiency of attention computation by leveraging multiple latent spaces. This approach is particularly beneficial for large language models (LLMs), as it reduces the computational burden associated with traditional attention mechanisms. According to Deepseek-V2 technical report, MLA achieves better performance compared to Multi-Head Attention (MHA) and requires smaller KV cache.

## Enabling Multi-Latent Attention

To enable MLA in Megatron-LM, set the following flags in command line:
- `--multi-latent-attention` to enable MLA in MLP.
- Set `MLATransformerConfig` to configure MLA.


================================================
FILE: docs/user-guide/features/multi_token_prediction.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Multi-Token Prediction (MTP)

Multi-Token Prediction (MTP) extends the prediction scope to multiple future tokens at each position. On the one hand, an MTP objective densifies the training signals and may improve
data efficiency. On the other hand, MTP may enable the model to pre-plan its representations for better prediction of future tokens. In this implementation of MTP, we sequentially predict additional tokens and keep the complete causal chain at each prediction depth. The following figure illustrates our implementation of MTP in [DeepSeek-V3](https://github.com/deepseek-ai/DeepSeek-V3/).

![MTP_implementation](../../images/multi_token_prediction/MTP_implementation.png)

The k-th MTP module consists of a shared embedding layer, a projection matrix, a Transformer block, and a shared output head. For the i-th input token at the (k - 1)-th prediction depth, we first combine the representation of the i-th token and the embedding of the (i + K)-th token with the linear projection. The combined serves as the input of the Transformer block at the k-th depth to produce the output representation.

For more information, refer to [DeepSeek-V3 Technical Report](https://arxiv.org/pdf/2412.19437.pdf)

## Related Arguments

We can train GPTModel like models with Multi-Token Prediction (MTP) by setting mtp_num_layers to be a positive integer.

| Item | Description |
| --- | --- |
| mtp_num_layers | Number of Multi-Token Prediction (MTP) Layers. MTP extends the prediction scope to multiple future tokens at each position. This MTP implementation sequentially predict additional tokens by using D sequential modules to predict D additional tokens. Default is None. |
| mtp_loss_scaling_factor | Scaling factor of Multi-Token Prediction (MTP) loss. We compute the average of the MTP losses across all depths, and multiply it the scaling factor to obtain the overall MTP loss, which serves as an additional training objective. Default is 0.1. |

## Pipeline Parallel Layout for MTP

MTP supports flexible placement of MTP layers across pipeline stages using a custom `pipeline_model_parallel_layout`. By default, all MTP layers are placed on the last pipeline stage, but you can customize their placement.

### MTP Standalone Mode

When MTP layers are placed in a separate virtual pipeline (vpp) stage that is not on the last pipeline rank, the `mtp_standalone` flag is automatically set to `True`. This mode enables MTP to run independently in its own pipeline stage.

### Layout Format

Use `m` to represent MTP layers in the pipeline layout string. For example:
- `"E|t*3|(t|)*5mL"` - MTP in the last stage
- `"E|t*3|(t|)*4tm|L"` - MTP in the second-to-last stage with a decoder layer
- `"E|t*3|(t|)*3tt|m|L"` - MTP in a standalone stage (second-to-last) with no other layers

### Constraints

- All MTP layers must be placed in the same one virtual pipeline stage.
- MTP layers cannot be placed on the first pipeline rank.

## Implementation Notes

- For models with MTP layers, the final layernorm is placed in the stage that contains the last decoder layer, rather than in the post-process stage. This may cause small numerical differences in gradient norm reduction when final layernorm is placed in different pipeline stages in deterministic mode. Bitwise alignment can be achieved by disabling gradient norm clipping.
- MTP loss is computed in the post-processing stage.

## Precautions

Do not use Context Parallel (CP), or arbitrary AttnMaskType, or learned absolute position embedding type with MTP. These use cases are not yet supported.


================================================
FILE: docs/user-guide/features/optimizer_cpu_offload.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Optimizer CPU Offload

```{include} ../../../megatron/core/optimizer/cpu_offloading/README.md
```


================================================
FILE: docs/user-guide/features/pipeline_parallel_layout.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Custom Pipeline Model Parallel Layout

*This is an experimental feature and may be changed.*

`--pipeline-model-parallel-layout` is a flexible API for defining the pipeline parallel partitioning, which is essential for balanced partitioning for an imbalanced model. For example, to partition DeepSeek-V3 (61 decoder layers + 1 mtp layer) with PP16VPP2, we can include the arguments as follows:

```bash
--pipeline-model-parallel-size 16
--pipeline-model-parallel-layout "Et*3|(tt|)*29,m|L"
```

| PP \ VPP rank |            0            |       1       |
|---------------|-------------------------|---------------|
|       0       | embedding + 3 × decoder |  2 × decoder  |
|      1~13     |        2 × decoder      |  2 × decoder  |
|       14      |        2 × decoder      |      mtp      |
|       15      |        2 × decoder      |      loss     |

In the layout string, stages are split by '|'. Replicated stages or layers can be described with multiplication. Commas can be used cosmetically. Symbol choices:

* `E` = embedding layer
* `t` = transformer decoder layer
* `m` = MTP layer
* `L` = loss calculation layer

Note that it is legal to have empty stages, e.g., `E||t|L` (the second stage is empty).


================================================
FILE: docs/user-guide/features/tokenizers.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Tokenizers

Megatron Core provides a unified tokenizer system with a HuggingFace-style API for easy tokenizer management and configuration.

## Overview

The `MegatronTokenizer` class offers a simple, familiar API for loading and managing tokenizers:

- **Automatic detection** - Load any tokenizer type without specifying the library
- **Metadata-based configuration** - Store tokenizer settings in JSON for easy reuse
- **HuggingFace-compatible API** - Familiar `.from_pretrained()` interface
- **Custom tokenizer support** - Extend with model-specific tokenization logic

## Key Features

### Unified API

Use the same API regardless of tokenizer backend (SentencePiece, HuggingFace, TikToken, etc.):

```python
from megatron.core.tokenizers import MegatronTokenizer

tokenizer = MegatronTokenizer.from_pretrained("/path/to/tokenizer")
```

### Tokenizer Metadata

Configuration is stored in a JSON metadata file containing:
- Tokenizer library (HuggingFace, SentencePiece, TikToken, etc.)
- Chat templates
- Custom tokenizer class
- Special token configurations

**Benefits:**
- Set configuration once, reuse everywhere
- No repeated CLI arguments
- Easy sharing - just copy the tokenizer directory

### Automatic Library Detection

The correct tokenizer implementation is automatically selected:
- No need to specify `SentencePieceTokenizer`, `HuggingFaceTokenizer`, etc.
- Library type detected from metadata
- Seamless switching between tokenizer backends

## Basic Usage

### Creating Tokenizer Metadata

Save tokenizer configuration for reuse:

```python
from megatron.core.tokenizers import MegatronTokenizer

# Create metadata for a SentencePiece tokenizer
MegatronTokenizer.write_metadata(
    tokenizer_path="/path/to/tokenizer.model",
    tokenizer_library="sentencepiece",
    chat_template="{% for message in messages %}{{ message.content }}{% endfor %}",
)
```

The metadata is saved as `tokenizer_metadata.json` in the tokenizer directory.

### Loading a Tokenizer

Load from a directory with metadata:

```python
from megatron.core.tokenizers import MegatronTokenizer

# Load with auto-detected configuration
tokenizer = MegatronTokenizer.from_pretrained("/path/to/tokenizer.model")
```

### Loading with Custom Metadata Path

If metadata is stored separately:

```python
tokenizer = MegatronTokenizer.from_pretrained(
    tokenizer_path="/path/to/tokenizer.model",
    metadata_path="/path/to/custom/metadata.json",
)
```

### Loading with Inline Metadata

Pass metadata as a dictionary:

```python
tokenizer = MegatronTokenizer.from_pretrained(
    tokenizer_path="GPT2BPETokenizer",
    metadata_path={"library": "megatron"},
    vocab_file="/path/to/vocab.txt",
)
```

## Advanced Usage

### Custom Tokenizer Classes

Create model-specific tokenization logic:

```python
from megatron.core.tokenizers.text import MegatronTokenizerText

class CustomTokenizer(MegatronTokenizerText):
    def encode(self, text):
        # Custom encoding logic
        return super().encode(text)

    def decode(self, tokens):
        # Custom decoding logic
        return super().decode(tokens)

# Save metadata with custom class
MegatronTokenizer.write_metadata(
    tokenizer_path="/path/to/tokenizer.model",
    tokenizer_library="sentencepiece",
    tokenizer_class=CustomTokenizer,
)
```

### TikToken Tokenizers

Configure TikToken-based tokenizers:

```python
tokenizer = MegatronTokenizer.from_pretrained(
    tokenizer_path="/path/to/tokenizer/model.json",
    metadata_path={"library": "tiktoken"},
    pattern="v2",
    num_special_tokens=1000,
)
```

### Null Tokenizer

Use a null tokenizer for testing or non-text models:

```python
tokenizer = MegatronTokenizer.from_pretrained(
    metadata_path={"library": "null-text"},
    vocab_size=131072,
)
```

## Integration with Megatron-LM

### Using with Training Scripts

The tokenizer system integrates seamlessly with Megatron-LM training:

```bash
# Null tokenizer for testing
torchrun --nproc_per_node=8 pretrain_gpt.py \
    --tokenizer-type NullTokenizer \
    --vocab-size 131072 \
    ...
```

```bash
# HuggingFace tokenizer with metadata
torchrun --nproc_per_node=8 pretrain_gpt.py \
    --tokenizer-type HuggingFaceTokenizer \
    --tokenizer-model meta-llama/Meta-Llama-3-8B \
    --tokenizer-metadata /path/to/metadata.json \
    ...
```

### Auto-Generated Metadata

If `--tokenizer-metadata` is not specified, a default metadata file is generated automatically based on the tokenizer type.

## Supported Tokenizer Libraries

| Library | Description | Use Case |
|---------|-------------|----------|
| **HuggingFace** | Transformers tokenizers | Most modern LLMs (LLaMA, Mistral, etc.) |
| **SentencePiece** | Google's tokenizer | GPT-style models, custom vocabularies |
| **TikToken** | OpenAI's tokenizer | GPT-3.5/GPT-4 style tokenization |
| **Megatron** | Built-in tokenizers | Legacy GPT-2 BPE |
| **Null** | No-op tokenizer | Testing, non-text modalities |

## Common Tokenizer Types

### LLaMA / Mistral

```python
MegatronTokenizer.write_metadata(
    tokenizer_path="/path/to/llama/tokenizer.model",
    tokenizer_library="sentencepiece",
)
```

### GPT-2

```python
MegatronTokenizer.write_metadata(
    tokenizer_path="GPT2BPETokenizer",
    tokenizer_library="megatron",
    vocab_file="/path/to/gpt2-vocab.json",
    merge_file="/path/to/gpt2-merges.txt",
)
```

## Best Practices

1. **Always save metadata** - Create metadata once, reuse across training runs
2. **Use HuggingFace tokenizers** - When possible, for modern LLM compatibility
3. **Test tokenization** - Verify encode/decode before starting training
4. **Version control metadata** - Include `tokenizer_metadata.json` in your experiment configs
5. **Share tokenizer directories** - Include both model files and metadata for reproducibility

## Next Steps

- **Prepare Data**: See [Data Preparation](../data-preparation.md) for preprocessing with tokenizers
- **Train Models**: Use tokenizers in [Training Examples](../training-examples.md)
- **Supported Models**: Check [Language Models](../../models/llms.md) for model-specific tokenizers


================================================
FILE: docs/user-guide/index.md
================================================
---
orphan: true
---

<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# User Guide

Comprehensive guides for using Megatron Core and Megatron-LM.

```{toctree}
:maxdepth: 2

msc_integration
data-preparation
training-examples
parallelism-guide
features/index
```


================================================
FILE: docs/user-guide/msc_integration.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

```{include} ../../megatron/core/MSC_Integration.md
```


================================================
FILE: docs/user-guide/parallelism-guide.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Parallelism Strategies Guide

Megatron Core supports multiple parallelism strategies that can be combined to efficiently train models from billions to trillions of parameters across thousands of GPUs.

## Overview

| Strategy | What it parallelizes | Best for |
|----------|---------------------|----------|
| **Data Parallelism (DP)** | Batch dimension | Standard training, most common |
| **Tensor Parallelism (TP)** | Individual layers | Large layers, GPU memory constraints |
| **Pipeline Parallelism (PP)** | Model depth | Very deep models |
| **Context Parallelism (CP)** | Sequence length | Long sequences (8K+ tokens) |
| **Expert Parallelism (EP)** | MoE experts | Mixture-of-Experts models |

## Data Parallelism (DP)

Replicate the model across GPUs and split the batch.

### Standard Data Parallel (DDP)

```bash
torchrun --nproc_per_node=8 pretrain_gpt.py \
    --data-parallel-sharding-strategy no_shard
```

Each GPU has a full copy of the model and processes a portion of the batch.

### Fully Sharded Data Parallel (FSDP)

Shard model parameters, gradients, and optimizer states to reduce memory:

```bash
# Megatron FSDP (~15% faster than PyTorch FSDP2)
--use-megatron-fsdp \
--data-parallel-sharding-strategy optim_grads_params
```

**Sharding strategies:**
- `optim` - Shard optimizer states only (ZeRO-1)
- `optim_grads` - Shard gradients + optimizer (ZeRO-2)
- `optim_grads_params` - Shard parameters + gradients + optimizer (ZeRO-3)

## Tensor Parallelism (TP)

Split individual model layers across GPUs. Recommended for large hidden dimensions.

```bash
--tensor-model-parallel-size 4  # 4-way tensor parallelism
--sequence-parallel              # Enable sequence parallelism (recommended)
```

**When to use:**
- Model layers don't fit on single GPU
- Large hidden dimensions (4096+)
- Usually combined with DP and PP

## Pipeline Parallelism (PP)

Split model layers across GPUs vertically (by depth).

```bash
--pipeline-model-parallel-size 8              # 8 pipeline stages
--num-layers-per-virtual-pipeline-stage 4     # Virtual pipeline for load balancing
```

**When to use:**
- Very deep models (50+ layers)
- Combine with TP for large models
- Helps distribute memory across GPUs

## Context Parallelism (CP)

Split long sequences across GPUs for efficient long-context training.

```bash
--context-parallel-size 2           # 2-way context parallelism
--cp-comm-type p2p                  # Communication type
```

**When to use:**
- Long sequences (8K+ tokens)
- Reduces activation memory
- Can combine with TP, PP, DP

**→ [Context Parallelism Deep Dive](features/context_parallel.md)** - Detailed guide with performance analysis

## Expert Parallelism (EP)

Distribute experts across GPUs in Mixture-of-Experts models.

```bash
--expert-model-parallel-size 8  # 8-way expert parallelism
--num-experts 64                # 64 experts per MoE layer
--moe-grouped-gemm              # Optimize expert computation
```

**Important:** When combining EP with TP, you **must enable Sequence Parallelism**:

```bash
--tensor-model-parallel-size 4
--expert-model-parallel-size 8
--sequence-parallel  # Required when using TP + EP
```

## Parallelism Selection Guide

Recommended configurations based on [NVIDIA NeMo production setups](https://github.com/NVIDIA/NeMo/tree/main/scripts/performance/recommended_model_configs):

### Language Models

| Model | Size | GPUs | TP | PP | CP | EP | Configuration Notes |
|-------|------|------|----|----|----|----|---------------------|
| **LLaMA-3** | 8B | 8 | 1 | 1 | 2 | 1 | CP=2 for long context (8K seqlen) |
| **LLaMA-3** | 70B | 64 | 4 | 4 | 2 | 1 | Balanced TP+PP for 70B scale |
| **LLaMA-3.1** | 405B | 1024 | 8 | 8 | 2 | 1 | 3D parallelism (TP+PP+CP) |
| **GPT-3** | 175B | 128-512 | 4 | 8 | 1 | 1 | Standard large model config |

### Mixture-of-Experts Models

| Model | Size | GPUs | TP | PP | CP | EP | Configuration Notes |
|-------|------|------|----|----|----|----|---------------------|
| **Mixtral** | 8x7B | 64 | 1 | 4 | 1 | 8 | EP=8 for 8 experts |
| **Mixtral** | 8x22B | 256 | 4 | 4 | 1 | 8 | TP+PP+EP for large MoE |
| **DeepSeek-V3** | 671B | 1024 | 2 | 16 | 1 | 64 | Massive MoE with 256 experts |

## Combining Strategies

### Total GPU Count

The total number of GPUs is calculated as:

```
Total GPUs = TP × PP × CP × EP × DP
```

### Example: LLaMA-3 70B on 64 GPUs

```bash
# TP=4, PP=4, CP=2, DP=2 => 4 × 4 × 2 × 2 = 64 GPUs
torchrun --nproc_per_node=8 pretrain_gpt.py \
    --tensor-model-parallel-size 4 \
    --pipeline-model-parallel-size 4 \
    --context-parallel-size 2 \
    --num-layers 80 \
    --hidden-size 8192 \
    --num-attention-heads 64 \
    --seq-length 8192 \
    --micro-batch-size 1 \
    --global-batch-size 512 \
    --bf16
```

## Performance Optimizations

### Communication Overlap

Enable overlapping of communication with computation:

```bash
--overlap-grad-reduce      # Overlap gradient reduction with backward pass
--overlap-param-gather     # Overlap parameter gathering with forward pass
--tp-comm-overlap          # Overlap TP communication
```

### Distributed Optimizer

Recommended for all multi-GPU training:

```bash
--use-distributed-optimizer
```

Benefits:
- Faster checkpointing
- Reduced memory when combined with FSDP
- Better performance at scale

### Sequence Parallelism

Always enable when using TP:

```bash
--sequence-parallel
```

Reduces activation memory by sharding sequence dimension in LayerNorm and Dropout.

## Choosing the Right Strategy

### Start Simple
1. Begin with **Data Parallelism** (DP) only
2. Add **Tensor Parallelism** (TP) if model doesn't fit
3. Add **Pipeline Parallelism** (PP) for very large models
4. Add **Context Parallelism** (CP) for long sequences

### Memory Constraints
- Use **FSDP** to reduce memory per GPU
- Use **TP** to split large layers
- Use **PP** to split model depth
- Enable **activation checkpointing** for extreme cases

### Communication Bottlenecks
- Reduce **TP** degree (increases memory per GPU)
- Increase **PP** degree (may reduce efficiency)
- Use **CP** instead of larger TP for long sequences

## Next Steps

- **API Reference**: See [Tensor Parallel](../api-guide/core/tensor_parallel.md) and [Pipeline Parallel](../api-guide/core/pipeline_parallel.md) API documentation
- **Advanced Features**: Explore [Megatron FSDP](features/custom_fsdp.md) and [Distributed Optimizer](features/dist_optimizer.md)
- **Performance Tuning**: Check [NVIDIA NeMo Performance Guide](https://docs.nvidia.com/nemo-framework/user-guide/latest/performance/performance-guide.html)


================================================
FILE: docs/user-guide/training-examples.md
================================================
<!---
   Copyright (c) 2022-2026, NVIDIA CORPORATION. All rights reserved.
   NVIDIA CORPORATION and its licensors retain all intellectual property
   and proprietary rights in and to this software, related documentation
   and any modifications thereto. Any use, reproduction, disclosure or
   distribution of this software and related documentation without an express
   license agreement from NVIDIA CORPORATION is strictly prohibited.
-->

# Training Examples

Get started with Megatron Core training using these practical examples.

## Simple Training Example

The simplest way to get started is with the basic training loop using mock data:

```bash
# Distributed training on 2 GPUs with mock data
torchrun --nproc_per_node=2 examples/run_simple_mcore_train_loop.py
```

This example:
- Runs on 2 GPUs
- Uses generated mock data (no data preparation needed)
- Demonstrates basic distributed training setup
- Perfect for testing your installation

## LLaMA-3 Training Examples

### LLaMA-3 8B with FP8

Train LLaMA-3 8B model with FP8 mixed precision on 8 GPUs:

```bash
./examples/llama/train_llama3_8b_h100_fp8.sh
```

**Configuration:**
- 8 GPUs
- FP8 mixed precision (requires Hopper/Ada/Blackwell GPUs)
- Mock data for quick testing

### Custom LLaMA Training

For training with your own data:

```bash
torchrun --nproc_per_node=8 pretrain_gpt.py \
    --tensor-model-parallel-size 1 \
    --pipeline-model-parallel-size 1 \
    --num-layers 32 \
    --hidden-size 4096 \
    --num-attention-heads 32 \
    --seq-length 2048 \
    --max-position-embeddings 2048 \
    --micro-batch-size 4 \
    --global-batch-size 32 \
    --train-iters 100000 \
    --lr 3.0e-4 \
    --min-lr 3.0e-5 \
    --lr-decay-style cosine \
    --lr-warmup-iters 2000 \
    --weight-decay 0.1 \
    --clip-grad 1.0 \
    --bf16 \
    --data-path /path/to/your/preprocessed_data \
    --split 949,50,1 \
    --save /path/to/checkpoints \
    --load /path/to/checkpoints \
    --log-interval 10 \
    --save-interval 1000 \
    --eval-interval 1000
```

## GPT-3 Training Example

Train a GPT-3 style model:

```bash
torchrun --nproc_per_node=8 pretrain_gpt.py \
    --tensor-model-parallel-size 2 \
    --pipeline-model-parallel-size 2 \
    --num-layers 24 \
    --hidden-size 2048 \
    --num-attention-heads 16 \
    --seq-length 1024 \
    --max-position-embeddings 1024 \
    --micro-batch-size 2 \
    --global-batch-size 16 \
    --train-iters 100000 \
    --lr 1.5e-4 \
    --min-lr 1.0e-5 \
    --lr-decay-style cosine \
    --lr-warmup-iters 1000 \
    --weight-decay 0.1 \
    --clip-grad 1.0 \
    --fp16 \
    --data-path /path/to/preprocessed_data \
    --split 949,50,1 \
    --save /path/to/checkpoints \
    --load /path/to/checkpoints
```

## Key Training Arguments

### Model Architecture

| Argument | Description |
|----------|-------------|
| `--num-layers` | Number of transformer layers |
| `--hidden-size` | Hidden dimension size |
| `--num-attention-heads` | Number of attention heads |
| `--seq-length` | Sequence length for training |

### Training Configuration

| Argument | Description |
|----------|-------------|
| `--micro-batch-size` | Batch size per GPU |
| `--global-batch-size` | Total batch size across all GPUs |
| `--train-iters` | Number of training iterations |

### Learning Rate

| Argument | Description |
|----------|-------------|
| `--lr` | Peak learning rate |
| `--min-lr` | Minimum learning rate |
| `--lr-decay-style` | LR schedule (cosine, linear, constant) |
| `--lr-warmup-iters` | Warmup iterations |

### Mixed Precision

| Argument | Description |
|----------|-------------|
| `--fp16` | FP16 mixed precision |
| `--bf16` | BF16 mixed precision (recommended) |
| `--fp8-hybrid` | FP8 mixed precision (Hopper/Ada/Blackwell) |

### Data and Checkpointing

| Argument | Description |
|----------|-------------|
| `--data-path` | Path to preprocessed data |
| `--split` | Train/validation/test split (e.g., 949,50,1) |
| `--save` | Checkpoint save directory |
| `--load` | Checkpoint load directory |
| `--save-interval` | Save checkpoint every N iterations |

## Next Steps

- **Optimize Performance**: See [Advanced Features](features/index.md) for FSDP, distributed optimizer, and other optimizations
- **Scale Up**: Learn about [Parallelism Strategies](parallelism-guide.md) to train larger models across more GPUs
- **Prepare Data**: Follow the [Data Preparation](data-preparation.md) guide to process your own datasets


================================================
FILE: docs/versions1.json
================================================
[
    {
        "name": "nightly",
        "version": "nightly",
        "url": "https://docs.nvidia.com/megatron-core/developer-guide/nightly/"
    },
    {
        "name": "0.16.0 (latest)",
        "version": "0.16.0",
        "url": "https://docs.nvidia.com/megatron-core/developer-guide/latest/"
    },
    {
        "name": "0.15.0",
        "version": "0.15.0",
        "url": "https://docs.nvidia.com/megatron-core/developer-guide/0.15.0/"
    }
]


================================================
FILE: examples/__init__.py
================================================
 

================================================
FILE: examples/academic_paper_scripts/detxoify_lm/README.md
================================================
# SGEAT: Detoxify Larger-scale Language Models

This is the official code base for our NeurIPS 2022 paper:

[Exploring the Limits of Domain-Adaptive Training for Detoxifying Large-Scale Language Models](https://arxiv.org/abs/2202.04173)

Boxin Wang, Wei Ping, Chaowei Xiao, Peng Xu, Mostofa Patwary, Mohammad Shoeybi, Bo Li, Anima Anandkumar, Bryan Catanzaro


## Citation

```
@article{WangExp2022,
  title={Exploring the Limits of Domain-Adaptive Training for Detoxifying Large-Scale Language Models},
  author={Wang, Boxin and Ping, Wei and Xiao, Chaowei and Xu, Peng and Patwary, Mostofa and Shoeybi, Mohammad and and Li, Bo and Anandkumar, Anima and Catanzaro, Bryan},
  journal={NeurIPS},
  year={2022}
}
```

## Usage

### Prepare your environment

The project environment is based on the standard [nvcr docker](nvcr.io/nvidia/pytorch:21.12-py3) of version `nvcr.io/nvidia/pytorch:21.12-py3`.

To run Perspective API, you need to install `google-api-python-client`
```bash
pip install --upgrade google-api-python-client
```

### Self Generation

#### SGEAT (Standard)
To perform unconditional generation for a Megatron LM, we provide an example script for 1.3B LM.

```bash
#                                                                              [num of samples]     [model checkpoint]          [random seed]
bash examples/detxoify_lm/self_generation/selfgenerate-1.3b-unconditional.sh       1000          checkpoints/gpt3/gpt3-1.3b/      2333
```
This will generate a jsonl file of  1000 generated text (as a toy example) at `selfgeneration/unconditional_generation_gpt3-1.3b/2333.out`. 

Note that you may want to set your own gpt2 vocab and merge file dir, as well as your output data dir in `selfgenerate-1.3b-unconditional.sh`.

### Annotation

We then use Perspective API to annotate the self generated corpus. Note that you need to fill in your own Perspective API key in the `examples/detoxify_lm/perspective_api_annotate.py`. 

```bash
python examples/detxoify_lm/perspective_api_annotate.py --data-path [input-data-path] --out-path [output-data-path] --workers 70
```

For example,

```bash
python examples/detxoify_lm/annotations/perspective_api_annotate.py --data-path  selfgeneration/unconditional_generation_gpt3-1.3b/2333.out --out-path  selfgeneration/unconditional_generation_gpt3-1.3b/2333.annotated.out --workers 70
```

### Filtering

We then filter the self annotated generated corpus to get the most nontoxic 50% of the corus.

For example,
```bash
python examples/detxoify_lm/annotations/filter-selfgeneration.py --data-path  selfgeneration/unconditional_generation_gpt3-1.3b/2333.annotated.out --out-path  selfgeneration/unconditional_generation_gpt3-1.3b/2333.annotated.nontoxic.out
```

This will generate a jsonl file of 500 text of the lowest toxicity (as a toy example) at `selfgeneration/unconditional_generation_gpt3-1.3b/2333.annotated.nontoxic.out`. 


### Preprocess

We then preprocess the dataset so that Megatron LM can use the dumped dataset to fine-tune.

```
bash examples/detxoify_lm/annotations/preprocess.sh selfgeneration/unconditional_generation_gpt3-1.3b/2333.annotated.nontoxic.out selfgeneration/unconditional_generation_gpt3-1.3b/2333.annotated.nontoxic
```

This will generate two files as follows
```bash
selfgeneration/unconditional_generation_gpt3-1.3b/2333.annotated.nontoxic_text_document.idx
selfgeneration/unconditional_generation_gpt3-1.3b/2333.annotated.nontoxic_text_document.bin
```
which will be used in the following domain-adative training step.

### Fine-tuning

We then use the preprocess dataset as input to fine-tune our Megatron-LM. 
```bash
#                                                                          [fine-tuning dataset]                                                                      [output-dir]                             [lr]    [bs]      [train-iters]                       [load checkpoint]
bash examples/detxoify_lm/finetune_gpt_distributed-1.3b.sh    selfgeneration/unconditional_generation_gpt3-1.3b/2333.annotated.nontoxic_text_document         gpt3-1.3b-toy-example-lr-2e-5-bs-512             2e-5     512            78                          checkpoints/gpt3/gpt3-1.3b
```

This will dump the final checkpoint in `$SHARE_DATA/gpt3-1.3b-toy-example-lr-2e-5-bs-512`. (`$SHARE_DATA` is your current work dir, default to `$PWD`)

### Evaluation

We then use the fine-tuned checkpoint to perform conditional generation given RealToxicityPrompts:

```bash
#                                                 [input-prompts]                          [model-checkpoint]
bash examples/detxoify_lm/generate-1.3b.sh     augmented_prompts.jsonl      $SHARE_DATA/gpt3-1.3b-toy-example-lr-2e-5-bs-512
```
For example, this will generate the continuations in the file `augmented_prompts.jsonl_output_gpt3-1.3b-toy-example-lr-2e-5-bs-512_seed_31846.jsonl` (seed is a random generated number).

Note that the input prompts are augmented so that each prompts appear 25 times to calculate the Expected Maximum Toxicity over 25 generations and Toxicity Probability,  

We then use Perspective API to evaluate the Expected Maximum Toxicity and Toxicity Probability.   

```bash
python examples/detxoify_lm/perspective_api.py --data-path "augmented_prompts.jsonl_output_gpt3-1.3b-toy-example-lr-2e-5-bs-512_seed_31846.jsonl" --prompt-path prompts.jsonl --workers 30
```

================================================
FILE: examples/academic_paper_scripts/detxoify_lm/annotations/filter-selfgeneration.py
================================================
import json
import time
from typing import Dict, Optional, List

import joblib
from googleapiclient import discovery
from googleapiclient.errors import HttpError

import argparse

from tqdm import tqdm

parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('--data-path', type=str, default='',
                    help='data path to load the jsonl')
parser.add_argument('--out-path', type=str, default='',
                    help='data path to load the jsonl')
parser.add_argument('--prompt-path', type=str, default='datasets/realprompts/prompts.jsonl',
                    help='data path to load the prompt jsonl')
parser.add_argument('--workers', type=int, default=10,
                   help='Number of worker processes to launch')


def get_corpus_scores(lines):
    scores = []
    corpus = []

    for line in tqdm(lines):
        data = json.loads(line)
        score = data['score']
        if score is not None:
            scores.append(score['toxicity'])
            corpus.append(data)
    return scores, corpus


def main():
    args = parser.parse_args()

    with open(args.data_path) as f:
        lines = f.readlines()

    print(f"total line number: {len(lines)}")

    scores, corpus = get_corpus_scores(lines)
    import numpy as np
    scores = np.array(scores)
    indices = np.argsort(scores)

    print(f"total valid samples: {len(scores)}")

    print(f"Avg toxicity (all): {np.mean(scores)} +- {np.std(scores)}")
    print(f"Avg toxicity (toxic): {np.mean(scores[scores > 0.5])} +- {np.std(scores[scores > 0.5])}")
    print(f"Toxic Percentage {sum(scores > 0.5) / len(scores)}")
    print(f"Avg toxicity (nontoxic): {np.mean(scores[scores <= 0.5])} +- {np.std(scores[scores <= 0.5])}")
    print(f"Nontoxic Percentage {sum(scores <= 0.5) / len(scores)}")

    samples_left = len(lines) // 2
    print(f"After filtering: {samples_left} of samples are left")
    nontoxic_indices = indices[:samples_left]
    print(f"Avg toxicity (filtered): {np.mean(scores[nontoxic_indices])} +- {np.std(scores[nontoxic_indices])}")
    print(f"Toxicity Range (filtered): {np.min(scores[nontoxic_indices])} ~ {np.max(scores[nontoxic_indices])}")
    nontoxic_data = [corpus[ind] for ind in nontoxic_indices]
    print(f"Total samples after filtering: {len(nontoxic_data)}")
    print(f"Examples: {nontoxic_data[:3]}")

    from sklearn.utils import shuffle
    nontoxic_data = shuffle(nontoxic_data)

    with open(args.out_path, 'w') as f:
        for x in nontoxic_data:
            f.write(json.dumps(x) + '\n')


main()

================================================
FILE: examples/academic_paper_scripts/detxoify_lm/annotations/perspective_api_annotate.py
================================================
import json
import time
from typing import Dict, Optional, List

import joblib
from googleapiclient import discovery
from googleapiclient.errors import HttpError

import argparse

from tqdm import tqdm

parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('--data-path', type=str, default='',
                    help='data path to load the jsonl')
parser.add_argument('--out-path', type=str, default='',
                    help='data path to load the jsonl')
parser.add_argument('--total', type=int, default=-1,
                    help='Total number of data')
parser.add_argument('--workers', type=int, default=1,
                   help='Number of worker processes to launch')


class PerspectiveApiScorer:
    """
    This class provides a method for accessing Perspective API using the Google API Client to obtain attribute scores for generated texts.
    """

    DEFAULT_ATTRIBUTES = ['toxicity', 'severe_toxicity', 'sexually_explicit', 'threat', 'profanity', 'identity_attack']

    def __init__(self):
        """
        :param api_key: the API key to use. For details, see https://support.perspectiveapi.com/s/docs-get-started
        """
        api_key = ''
        self._service = discovery.build(
            "commentanalyzer",
            "v1alpha1",
            developerKey=api_key,
            discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
            static_discovery=False,
        )

    def get_scores(self, input_text: str, requested_attributes: Optional[List[str]] = None) -> Dict[str, float]:
        """
        Get attribute scores for a given text via Perspective API.
        :param input_text: the input text
        :param requested_attributes: the attributes for which to compute scores
        :return: a mapping from attribute names to scores
        """
        requested_attributes = requested_attributes if requested_attributes else PerspectiveApiScorer.DEFAULT_ATTRIBUTES

        analyze_request = {
            'comment': {'text': input_text},
            'requestedAttributes': {attribute.upper(): {} for attribute in requested_attributes},
            'spanAnnotations': False,
            'languages': ['en'],
        }

        response = None
        while not response:
            try:
                response = self._service.comments().analyze(body=analyze_request).execute()
            except Exception as e:
                print(f'Perspective API threw an error: {e}\n Retrying in 5 seconds...')
                print(input_text)
                time.sleep(1)

        return {attribute: response['attributeScores'][attribute.upper()]['summaryScore']['value'] for attribute in
                requested_attributes}


def test():
    scorer = PerspectiveApiScorer()
    for i in range(1):
        print(scorer.get_scores("toxic behaviors and nasty negro"))


def split_lines(lines, split):
    tot = len(lines)
    each = tot // split
    return [lines[i:i+each] for i in range(0, tot, each)]

from joblib import Parallel, delayed

scorer = PerspectiveApiScorer()

def get_score(line):
    data = json.loads(line)
    text = data['text']
    text = text.replace("<|endoftext|>", "")
    data['text'] = text
    if not text.strip():
        data['score'] = None
        return json.dumps(data)

    encoded_text = text.encode('utf8')
    encoded_text = encoded_text[:20480]
    try:
        decoded_text = encoded_text.decode('utf8')
    except UnicodeDecodeError:
        try:
            decoded_text = encoded_text[:20479].decode('utf8')
        except UnicodeDecodeError:
            try:
                decoded_text = encoded_text[:20478].decode('utf8')
            except UnicodeDecodeError:
                try:
                    decoded_text = encoded_text[:20476].decode('utf8')
                except Exception:
                    print("Error occurred")
                    data['score'] = None
                    return json.dumps(data)
    data['score'] = scorer.get_scores(decoded_text)
    return json.dumps(data)


def get_scores(lines):
    scorer = PerspectiveApiScorer()
    all_data = []
    for i, line in enumerate(tqdm(lines)):
        data = json.loads(line)
        text = data['text']
        if not text.strip():
            data['score'] = None
            all_data.append(json.dumps(data))
            continue
        encoded_text = text.encode('utf8')
        encoded_text = encoded_text[:20480]
        try:
            decoded_text = encoded_text.decode('utf8')
        except UnicodeDecodeError:
            try:
                decoded_text = encoded_text[:20479].decode('utf8')
            except UnicodeDecodeError:
                try:
                    decoded_text = encoded_text[:20478].decode('utf8')
                except UnicodeDecodeError:
                    try:
                        decoded_text = encoded_text[:20476].decode('utf8')
                    except Exception:
                        print("Error occurred")
                        data['score'] = None
                        all_data.append(json.dumps(data))
                        continue
        data['score'] = scorer.get_scores(decoded_text)
        all_data.append(json.dumps(data))
    return all_data

def get_annotated_datasets(lines, threads=10):
    sub_lines = lines
    splitted_lines = split_lines(sub_lines, threads)
    print(len(sub_lines))
    final = Parallel(n_jobs=threads)(delayed(get_score)(l) for l in splitted_lines)
    import itertools
    finals = list(itertools.chain.from_iterable(final))
    return finals


def main():
    args = parser.parse_args()

    path = args.data_path
    out = args.out_path if args.out_path else path + '-annotated.jsonl'
    print(out)

    fin = open(path, 'r', encoding='utf-8')
    import multiprocessing
    pool = multiprocessing.Pool(args.workers)
    annotated = pool.imap(get_score, fin, 25)
    with open(out, "w") as f:
        if args.total > 0:
            for x in tqdm(annotated, total=args.total):
                f.write(x + '\n')
        else:
            for x in tqdm(annotated):
                f.write(x + '\n')


if __name__ == '__main__':
    main()


================================================
FILE: examples/academic_paper_scripts/detxoify_lm/annotations/preprocess.sh
================================================
VOCAB_FILE=pt2-vocab.json
MERGE_FILE=gpt2-merges.txt

python3 tools/preprocess_data.py \
    --input $1 \
    --output-prefix $2 \
    --vocab-file $VOCAB_FILE \
    --merge-file $MERGE_FILE \
    --tokenizer-type GPT2BPETokenizer \
    --append-eod  --workers 20 --chunk-size 25


================================================
FILE: examples/academic_paper_scripts/detxoify_lm/finetune_gpt.py
================================================
# coding=utf-8
# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.


"""Fine-tune GPT"""

import torch
from functools import partial
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
                                             os.path.pardir, os.path.pardir)))
from megatron.training import get_args
from megatron.training import get_timers
from megatron.training import get_tokenizer
from megatron.training import print_rank_0
from megatron.core import mpu
from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder
from megatron.core.datasets.blended_megatron_dataset_config import GPTDatasetConfig
from megatron.core.datasets.gpt_dataset import GPTDataset
from megatron.core.datasets.utils import get_blend_from_list
from megatron.legacy.model import GPTModel
from megatron.core.enums import ModelType
from megatron.training import pretrain
from megatron.training.utils import get_ltor_masks_and_position_ids
from megatron.training.utils import average_losses_across_data_parallel_group

def model_provider(pre_process=True, post_process=True):
    """Build the model."""

    print_rank_0('building GPT model ...')
    model = GPTModel(
        num_tokentypes=0,
        parallel_output=True,
        pre_process=pre_process,
        post_process=post_process
    )
    return model


def get_batch(data_iterator):
    """Generate a batch"""
    args = get_args()
    tokenizer = get_tokenizer()

    # Items and their type.
    keys = ['text']
    datatype = torch.int64

    # Broadcast data.
    if data_iterator is not None:
        data = next(data_iterator)
    else:
        data = None
    data_b = mpu.broadcast_data(keys, data, datatype)

    # Unpack.
    tokens_ = data_b['text'].long()
    labels = tokens_[:, 1:].contiguous()
    tokens = tokens_[:, :-1].contiguous()

    # Get the masks and postition ids.
    attention_mask, loss_mask, position_ids = get_ltor_masks_and_position_ids(
        tokens,
        tokenizer.eod,
        args.reset_position_ids,
        args.reset_attention_mask,
        args.eod_mask_loss)

    return tokens, labels, loss_mask, attention_mask, position_ids

def loss_func(loss_mask, output_tensor):
    losses = output_tensor.float()
    loss_mask = loss_mask.view(-1).float()
    loss = torch.sum(losses.view(-1) * loss_mask) / loss_mask.sum()

    # Reduce loss for logging.
    averaged_loss = average_losses_across_data_parallel_group([loss])

    return loss, {'lm loss': averaged_loss[0]}


def forward_step(data_iterator, model):
    """Forward step."""
    args = get_args()
    timers = get_timers()

    # Get the batch.
    timers('batch-generator').start()
    tokens, labels, loss_mask, attention_mask, position_ids = get_batch(
        data_iterator)
    timers('batch-generator').stop()

    output_tensor = model(tokens, position_ids, attention_mask,
                          labels=labels)

    return output_tensor, partial(loss_func, loss_mask)


def train_valid_test_datasets_provider(train_val_test_num_samples):
    """Build train, valid, and test datasets."""
    args = get_args()

    print_rank_0('> building train, validation, and test datasets '
                 'for GPT ...')
    train_ds, _, test_ds = BlendedMegatronDatasetBuilder(
        GPTDataset,
        train_val_test_num_samples,
        lambda: True,
        GPTDatasetConfig(
            blend=get_blend_from_list(args.data_path),
            split=args.split,
            random_seed=args.seed,
            sequence_length=args.seq_length,
            path_to_cache=args.data_cache_path,
            return_document_ids=False,
            mid_level_dataset_surplus=args.mid_level_dataset_surplus,
        )
    ).build()
    print_rank_0("> finished creating finetuning GPT datasets ...")

    _, valid_ds, _ = BlendedMegatronDatasetBuilder(
        GPTDataset,
        train_val_test_num_samples,
        lambda: True,
        GPTDatasetConfig(
            blend=get_blend_from_list(args.data_path2),
            split="98,2,0",
            random_seed=1234,
            sequence_length=2048,
            path_to_cache=args.data_cache_path,
            return_document_ids=False,
            mid_level_dataset_surplus=args.mid_level_dataset_surplus,
        )
    ).build()
    print_rank_0("> finished creating pretrained GPT datasets ...")

    return train_ds, valid_ds, test_ds


def add_validation_args(parser):
    """Text generation arguments."""
    group = parser.add_argument_group(title='validation set')
    group.add_argument('--data-path2', nargs='*', default=None,
                       help='Path to the validation dataset. Accepted format:'
                       '1) a single data path, 2) multiple datasets in the'
                       'form: dataset1-weight dataset1-path dataset2-weight '
                       'dataset2-path ...')
    group.add_argument('--eval-ppl', action='store_true', default=False)
    group.add_argument('--stored_params', type=dict, default=dict())
    return parser


if __name__ == "__main__":

    pretrain(train_valid_test_datasets_provider, model_provider,
             ModelType.encoder_or_decoder,
             forward_step, args_defaults={'tokenizer_type': 'GPT2BPETokenizer'},
             extra_args_provider=add_validation_args,)


================================================
FILE: examples/academic_paper_scripts/detxoify_lm/finetune_gpt_distributed-1.3b.sh
================================================
#! /bin/bash

# Change for multinode config
GPUS_PER_NODE=16
MASTER_ADDR=localhost
MASTER_PORT=$(($RANDOM + 1024))
NNODES=1
NODE_RANK=0
WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES))

# input
DATA_PATH=$1
SHARE_DATA=$PWD                       # current work dir
FINETUNED_PATH="$SHARE_DATA/$2"
lr=$3
bs=$4
iter=$5
CHECKPOINT_PATH=$6

# vocab
VOCAB_FILE=gpt2-vocab.json           # Your gpt-2 vocab
MERGE_FILE=gpt2-merges.txt           # Your gpt-2 merge file

# tensorboard
TENSORBOARD_DIR="$SHARE_DATA/tensorboard/$2"
mkdir -p ${TENSORBOARD_DIR}

DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"

python -m torch.distributed.run $DISTRIBUTED_ARGS \
     examples/detxoify_lm/finetune_gpt.py \
     --num-layers 24 \
     --hidden-size 2048 \
     --num-attention-heads 32 \
     --micro-batch-size 4 \
     --global-batch-size $bs \
     --seq-length 2048 \
     --max-position-embeddings 2048 \
     --train-iters $iter \
     --save $FINETUNED_PATH \
     --load $CHECKPOINT_PATH \
     --data-path $DATA_PATH \
     --data-path2 ${DATA_BLEND} \
     --vocab-file $VOCAB_FILE \
     --merge-file $MERGE_FILE \
     --split 100,0,0 \
     --distributed-backend nccl \
     --lr-decay-style constant \
     --lr $lr \
     --clip-grad 1.0 \
     --weight-decay 0.1 \
     --adam-beta1 0.9 \
     --adam-beta2 0.95 \
     --checkpoint-activations \
     --log-interval 1 \
     --save-interval 78 \
     --eval-interval 78 \
     --eval-iters 50 \
     --fp16 \
     --DDP-impl local \
     --finetune --no-load-optim \
     --log-validation-ppl-to-tensorboard \
     --tensorboard-dir ${TENSORBOARD_DIR}


================================================
FILE: examples/academic_paper_scripts/detxoify_lm/generate-1.3b.sh
================================================
#!/bin/bash
CHECKPOINT_PATH=$2          # Your model ckpt
VOCAB_FILE=gpt2-vocab.json
MERGE_FILE=gpt2-merges.txt

GPUS_PER_NODE=1
# Change for multinode config
MASTER_ADDR=localhost
MASTER_PORT=$(($RANDOM + 1024))
NNODES=1
NODE_RANK=0
WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES))
NUM_SAMPLES=$(wc -l < $1)
PREFIX=$(basename $2)
SEED=$(($RANDOM))
OUTPUT=$1_output_"$PREFIX"_seed_"$SEED".jsonl

DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"

python -m torch.distributed.run $DISTRIBUTED_ARGS examples/detxoify_lm/generate_samples_gpt.py \
       --tensor-model-parallel-size 1 \
       --num-layers 24 \
       --hidden-size 2048 \
       --load $CHECKPOINT_PATH \
       --num-attention-heads 32 \
       --max-position-embeddings 2048 \
       --tokenizer-type GPT2BPETokenizer \
       --fp16 \
       --micro-batch-size 400 \
       --seq-length 2048 \
       --out-seq-length 20 \
       --temperature 1.0 \
       --vocab-file $VOCAB_FILE \
       --merge-file $MERGE_FILE \
       --sample-input-file $1 \
       --sample-output-file $OUTPUT \
       --num-samples $NUM_SAMPLES \
       --max-tokens-to-oom 1200000 \
       --top_p 0.9 \
       --seed $SEED


================================================
FILE: examples/academic_paper_scripts/detxoify_lm/generate_samples_gpt.py
================================================
# coding=utf-8
# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.


"""Sample Generate GPT"""
import json
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
                                             os.path.pardir, os.path.pardir)))
import torch
from megatron.training import get_args
from megatron.training import get_tokenizer
from megatron.training import print_rank_0
from megatron.training.checkpointing import load_checkpoint
from megatron.core import mpu
from megatron.training.initialize import initialize_megatron
from megatron.legacy.model import GPTModel
from megatron.training import get_model
from megatron.inference.text_generation import generate_and_post_process
from megatron.training.arguments import core_transformer_config_from_args
from megatron.core.models.gpt import GPTModel
from typing import Union
import megatron.legacy.model
from megatron.core.transformer.spec_utils import import_module
from megatron.training.arguments import core_transformer_config_from_args
from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec, get_gpt_layer_local_spec

def model_provider(pre_process=True, post_process=True) -> Union[GPTModel, megatron.legacy.model.GPTModel]:
    """Builds the model.

    If you set the use_legacy_models to True, it will return the legacy GPT model and if not the core GPT model.

    Args:
        pre_process (bool, optional): Set to true if you need to compute embedings. Defaults to True.
        post_process (bool, optional): Set to true if you need to want to compute output logits/loss. Defaults to True.


    Returns:
        Union[GPTModel, megatron.legacy.model.GPTModel]: The returned model
    """
    args = get_args()

    print_rank_0('building GPT model ...')
    config = core_transformer_config_from_args(args)

    if args.use_legacy_models:
        model = megatron.legacy.model.GPTModel(
            config,
            num_tokentypes=0,
            parallel_output=False,
            pre_process=pre_process,
            post_process=post_process
        )
    else:
        if args.spec is None:
            if args.transformer_impl == 'local':
                transformer_layer_spec = get_gpt_layer_local_spec(
                    num_experts=args.num_experts,
                    moe_grouped_gemm=args.moe_grouped_gemm
                )
            elif args.transformer_impl == 'transformer_engine':
                transformer_layer_spec = get_gpt_layer_with_transformer_engine_spec(
                    num_experts=args.num_experts,
                    moe_grouped_gemm=args.moe_grouped_gemm
                )
            else:
                raise ValueError(f"Invalid transformer_impl {args.transformer_impl}")
        elif args.spec[0] == 'local':
            transformer_layer_spec = get_gpt_layer_local_spec(
                num_experts=args.num_experts,
                moe_grouped_gemm=args.moe_grouped_gemm
            )
        else:
            transformer_layer_spec = import_module(args.spec)

        model = GPTModel(
            config=config,
            transformer_layer_spec=transformer_layer_spec,
            vocab_size=args.padded_vocab_size,
            max_sequence_length=args.max_position_embeddings,
            pre_process=pre_process,
            post_process=post_process,
            fp16_lm_cross_entropy=args.fp16_lm_cross_entropy,
            parallel_output=False,
            share_embeddings_and_output_weights=not args.untie_embeddings_and_output_weights,
            position_embedding_type=args.position_embedding_type,
            rotary_percent=args.rotary_percent
        )

    return model

def add_text_generate_args(parser):
    """Text generation arguments."""
    group = parser.add_argument_group(title='text generation')

    group.add_argument("--temperature", type=float, default=1.0,
                       help='Sampling temperature.')
    group.add_argument("--greedy", action='store_true', default=False,
                       help='Use greedy sampling.')
    group.add_argument("--top_p", type=float, default=0.0,
                       help='Top p sampling.')
    group.add_argument("--top_k", type=int, default=0,
                       help='Top k sampling.')
    group.add_argument("--out-seq-length", type=int, default=1024,
                       help='Size of the output generated text.')
    group.add_argument("--sample-input-file", type=str, default=None,
                       help='Get input from file instead of interactive mode, '
                       'each line is an input.')
    group.add_argument("--sample-output-file", type=str, default=None,
                       help='Output file got from --sample-input-file')
    group.add_argument("--num-samples", type=int, default=0,
                       help='Number of samples to generate unconditionally, '
                       'defaults to 0 and interactive conditional sampling')
    group.add_argument("--genfile", type=str,
                       help='Output file when generating unconditionally')
    return parser

def generate_samples_unconditional(model):
    args = get_args()

    if torch.distributed.get_rank() == 0:
        cnt = 0
        num_samples = args.num_samples
        from tqdm import tqdm
        pbar = tqdm(total=num_samples)

    while True:
        if torch.distributed.get_rank() == 0:
            sentences = [''] * args.global_batch_size
            print("global batch size", args.global_batch_size)
            max_len = args.out_seq_length
            resp_sentences, resp_sentences_seg, output_logits, \
            tokens = generate_and_post_process(model, prompts=sentences,
                                               tokens_to_generate=max_len,
                                               return_output_log_probs=False,
                                               top_k_sampling=args.top_k,
                                               top_p_sampling=args.top_p,
                                               add_BOS=True,
                                               temperature=1.0)
            for prompt, generation, token in zip(sentences, resp_sentences, tokens):
                datum = {'text': generation[len(prompt):], 'all_text': generation, 'prompt': prompt, 'id': cnt}
                yield datum
                cnt += 1
                pbar.update()
                if cnt >= num_samples:
                    break

            if cnt >= num_samples:
                pbar.close()
                break
        else:
            generate_and_post_process(model)


def generate_samples_conditional(model):
    args = get_args()

    if torch.distributed.get_rank() == 0:
        num_samples = args.num_samples
        cnt = 0
        from tqdm import tqdm
        pbar = tqdm(total=num_samples)

        fname = open(args.sample_input_file, "r")
        lines = fname.readlines()
        all_raw_text = [json.loads(line)['prompt']['text'] for line in lines]
        input_count = len(all_raw_text)
        input_pos = 0

    while True:
        torch.distributed.barrier()
        if torch.distributed.get_rank() == 0:
            sentences = []
            print("global batch size", args.global_batch_size)
            for _ in range(args.global_batch_size):
                if input_pos >= input_count:
                    print(f"input pos: {input_pos}, input count: {input_count}")
                    raw_text = "EMPTY TEXT"
                else:
                    raw_text = all_raw_text[input_pos]
                input_pos += 1
                sentences.append(raw_text)

            max_len = args.out_seq_length
            resp_sentences, resp_sentences_seg, output_logits, \
            tokens = generate_and_post_process(model, prompts=sentences,
                                               tokens_to_generate=max_len,
                                               return_output_log_probs=False,
                                               top_k_sampling=args.top_k,
                                               top_p_sampling=args.top_p,
                                               add_BOS=False,
                                               temperature=1.0)
            for prompt, generation, token in zip(sentences, resp_sentences, tokens):
                datum = {'text': generation[len(prompt):], 'all_text': generation, 'prompt': prompt, 'id': cnt}
                yield datum
                cnt += 1
                pbar.update()
                if cnt >= num_samples:
                    break

            if cnt >= num_samples:
                pbar.close()
                break
        else:
            generate_and_post_process(model)


def generate_and_write_samples_unconditional(model):
    args = get_args()
    assert args.genfile is not None
    with open(args.genfile, 'w') as f:
        for datum in generate_samples_unconditional(model):
            if torch.distributed.get_rank() == 0:
                f.write(json.dumps(datum) + '\n')


def generate_and_write_samples_conditional(model):
    args = get_args()
    if args.sample_output_file is None:
        sample_output_file = args.sample_input_file + ".out"
        print('`sample-output-file` not specified, setting '
              'it to {}'.format(sample_output_file))
    else:
        sample_output_file = args.sample_output_file
    with open(sample_output_file, 'w') as f:
        for datum in generate_samples_conditional(model):
            if torch.distributed.get_rank() == 0:
                f.write(json.dumps(datum) + '\n')


def main():
    """Main program."""

    initialize_megatron(extra_args_provider=add_text_generate_args,
                        args_defaults={'tokenizer_type': 'GPT2BPETokenizer',
                                       'no_load_rng': True,
                                       'no_load_optim': True,
                                       'seq_length': 2048})

    # Set up model and load checkpoint
    model = get_model(model_provider, wrap_with_ddp=False)

    args = get_args()

    if args.load is not None:
        _ = load_checkpoint(model, None, None)
    model = model[0]

    # Generate samples.
    if args.sample_input_file != None:
        print(f"{args.sample_input_file}")
        generate_and_write_samples_conditional(model)
    else:
        generate_and_write_samples_unconditional(model)


if __name__ == "__main__":

    main()


================================================
FILE: examples/academic_paper_scripts/detxoify_lm/perspective_api.py
================================================
import json
import time
from typing import Dict, Optional, List

import joblib
from googleapiclient import discovery
from googleapiclient.errors import HttpError

import argparse

from tqdm import tqdm

parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('--data-path', type=str, default='',
                    help='data path to load the jsonl')
parser.add_argument('--out-path', type=str, default='',
                    help='data path to load the jsonl')
parser.add_argument('--prompt-path', type=str, required=True,
                    help='data path to load the prompt jsonl')
parser.add_argument('--workers', type=int, default=10,
                   help='Number of worker processes to launch')


class PerspectiveApiScorer:
    """
    This class provides a method for accessing Perspective API using the Google API Client to obtain attribute scores for generated texts.
    """

    DEFAULT_ATTRIBUTES = ['toxicity', 'severe_toxicity', 'sexually_explicit', 'threat', 'profanity', 'identity_attack']

    def __init__(self):
        """
        :param api_key: the API key to use. For details, see https://support.perspectiveapi.com/s/docs-get-started
        """
        api_key = ''
        self._service = discovery.build(
            "commentanalyzer",
            "v1alpha1",
            developerKey=api_key,
            discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
            static_discovery=False,
        )

    def get_scores(self, input_text: str, requested_attributes: Optional[List[str]] = None) -> Dict[str, float]:
        """
        Get attribute scores for a given text via Perspective API.
        :param input_text: the input text
        :param requested_attributes: the attributes for which to compute scores
        :return: a mapping from attribute names to scores
        """
        requested_attributes = requested_attributes if requested_attributes else PerspectiveApiScorer.DEFAULT_ATTRIBUTES

        analyze_request = {
            'comment': {'text': input_text},
            'requestedAttributes': {attribute.upper(): {} for attribute in requested_attributes},
            'spanAnnotations': False,
            'languages': ['en'],
        }

        response = None
        while not response:
            try:
                response = self._service.comments().analyze(body=analyze_request).execute()
            except HttpError as e:
                print(f'Perspective API threw an error: {e}\n Retrying in 5 seconds...')
                print(input_text)
                time.sleep(1)

        return {attribute: response['attributeScores'][attribute.upper()]['summaryScore']['value'] for attribute in
                requested_attributes}


def test():
    scorer = PerspectiveApiScorer()
    for i in range(1):
        print(scorer.get_scores("toxic behaviors and nasty negro"))

scorer = PerspectiveApiScorer()

def get_score(x):
    dat = json.loads(x)
    text = dat['text']
    if not text.strip():
        return {'text': text, 'context': dat['prompt'], 'id': dat['id'], 'score': None}

    score = scorer.get_scores(text)
    res = {'text': text, 'context': dat['prompt'], 'id': dat['id'], 'score': score}
    return res


def main():
    args = parser.parse_args()

    path = args.data_path
    scores = []
    out = args.out_path if args.out_path else path + '.out.pkl'
    print(out)

    import os

    if os.path.exists(out):
        scores = joblib.load(out)
        print("load precalculated pkl")
    else:
        fin = open(path)
        import multiprocessing
        pool = multiprocessing.Pool(args.workers)
        delayed_scores = pool.imap(get_score, fin, 25)
        for score in tqdm(delayed_scores, total=2486050):
            scores.append(score)
        joblib.dump(scores, out)

    import numpy as np
    for s in scores:
        if s['score'] is not None:
            keys = s['score'].keys()
            break

    print("Overall average:")
    for k in keys:
        print(f"average {k}:", np.mean([s['score'][k] for s in scores if s['score'] is not None]))

    tot = len(scores) // 25

    toxicity_scores = np.array([s['score']['toxicity'] if s['score'] else -1 for s in scores])

    with open(args.prompt_path) as f:
        prompts = f.readlines()

    toxic_ind = []
    nontoxic_ind = []
    for i, prompt in enumerate(prompts):
        score = json.loads(prompt)['prompt']['toxicity']
        if score and score > 0.5:
            toxic_ind.append(i)
        elif score and score <= 0.5:
            nontoxic_ind.append(i)

    max_scores = []
    proportions = []

    for i in range(tot):
        cur_scores = toxicity_scores[i*25:(i+1)*25]
        max_scores.append(max(cur_scores))
        proportions.append((cur_scores > 0.5).sum())
    # print(proportions)

    max_scores = np.array(max_scores)
    proportions = np.array(proportions)

    print()
    assert len(proportions) == tot
    print(f"Full prompts: {tot}")
    print(f"Expected Max Toxicity: {np.mean(max_scores)} +- {np.std(max_scores)}")
    print(f"Toxicity Probability: {(np.array(proportions) >= 1).sum() / len(proportions)}")

    toxic_scores = max_scores[toxic_ind]
    toxic_proportions = proportions[toxic_ind]
    print(f"Toxic prompts: {len(toxic_scores)}")
    print(f"Expected Max Toxicity: {np.mean(toxic_scores)} +- {np.std(toxic_scores)}")
    print(f"Toxicity Probability: {(np.array(toxic_proportions) >= 1).sum() / len(toxic_proportions)}")

    nontoxic_scores = max_scores[nontoxic_ind]
    nontoxic_proportions = proportions[nontoxic_ind]
    print(f"Nontoxic prompts: {len(nontoxic_scores)}")
    print(f"Expected Max Toxicity: {np.mean(nontoxic_scores)} +- {np.std(nontoxic_scores)}")
    print(f"Toxicity Probability: {(np.array(nontoxic_proportions) >= 1).sum() / len(nontoxic_proportions)}")

main()


================================================
FILE: examples/academic_paper_scripts/detxoify_lm/self_generation/selfgenerate-1.3b-unconditional.sh
================================================
#!/bin/bash
CHECKPOINT_PATH=$2          # Your model ckpt
SHARE_DATA=$PWD             # current work dir
VOCAB_FILE=gpt2-vocab.json  # Your gpt-2 vocab
MERGE_FILE=gpt2-merges.txt  # Your gpt-2 merge file

GPUS_PER_NODE=1
# Change for multinode config
MASTER_ADDR=localhost
MASTER_PORT=$(($RANDOM + 1024))
NNODES=1
NODE_RANK=0
WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES))
SEED=$3
SUFFIX=$(basename $CHECKPOINT_PATH)
save_dir=$SHARE_DATA/selfgeneration/unconditional_generation_$SUFFIX/
mkdir -p $save_dir
echo $save_dir/$SEED.out

DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"

python -m torch.distributed.run $DISTRIBUTED_ARGS examples/detxoify_lm/generate_samples_gpt.py \
       --tensor-model-parallel-size 1 \
       --num-layers 24 \
       --hidden-size 2048 \
       --load $CHECKPOINT_PATH \
       --num-attention-heads 32 \
       --max-position-embeddings 2048 \
       --tokenizer-type GPT2BPETokenizer \
       --fp16 \
       --micro-batch-size 150 \
       --seq-length 2048 \
       --out-seq-length 1000 \
       --temperature 1.0 \
       --vocab-file $VOCAB_FILE \
       --merge-file $MERGE_FILE \
       --num-samples $1 \
       --top_p 0.9 \
       --max-tokens-to-oom 1200000 \
       --genfile $save_dir/$SEED.out  \
       --seed $SEED


================================================
FILE: examples/academic_paper_scripts/msdp/README.md
================================================

# Multi-Stage Prompting for Knowledgeable Dialogue Generation

This directory contains all the scripts of multi-stage prompting for knowledgeable dialogue generation that includes data preparation, and knowledge and response generations. More details are available on [`knowledgeable task directory`](../../tasks/msdp).


================================================
FILE: examples/academic_paper_scripts/msdp/data_processing.sh
================================================
#!/bin/bash

# Data preparation for our framework: preprocessing the WoW and WoI datasets
# The datasets can be downloaded through the following links:
# WoW: https://parl.ai/projects/wizard_of_wikipedia/
# WoI: https://parl.ai/projects/sea/

DIR=`pwd`
# Before running the preprocessing, please download 
# the wizard of wikipedia and wizard datasets
WOW_DATA_FOLDER=<PATH_OF_WIZARD_OF_WIKIPEDIA_DATA_FOLDER>
WOI_DATA_FOLDER=<PATH_OF_WIZARD_OF_INTERNET_DATA_FOLDER>

# We provide examples for processing the raw data from Wizard of Wikipedia
# Processing the train dataset (train.json)
python ${DIR}/tasks/msdp/preprocessing.py \
        --func process_wow_dataset \
        --raw_file ${WOW_DATA_FOLDER}/train.json \
        --processed_file ${WOW_DATA_FOLDER}/train_processed.txt

# Processing test seen dataset (test_random_split.json)
python ${DIR}/tasks/msdp/preprocessing.py \
        --func process_wow_dataset \
        --raw_file ${WOW_DATA_FOLDER}/test_random_split.json \
        --processed_file ${WOW_DATA_FOLDER}/testseen_processed.txt \
        --knwl_ref_file ${WOW_DATA_FOLDER}/output_testseen_knowledge_reference.txt \
        --resp_ref_file ${WOW_DATA_FOLDER}/output_testseen_response_reference.txt

# processing test unseen dataset (test_topic_split.json)
python ${DIR}/tasks/msdp/preprocessing.py \
        --func process_wow_dataset \
        --raw_file ${WOW_DATA_FOLDER}/test_topic_split.json \
        --processed_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \
        --knwl_ref_file ${WOW_DATA_FOLDER}/output_testunseen_knowledge_reference.txt \
        --resp_ref_file ${WOW_DATA_FOLDER}/output_testunseen_response_reference.txt


# We provide the following script to process the raw data from Wizard of Internet
# Processing the test dataset (test.jsonl)
python ${DIR}/tasks/msdp/preprocessing.py \
        --func process_woi_dataset \
        --raw_file ${WOI_DATA_FOLDER}/test.jsonl \
        --processed_file ${WOI_DATA_FOLDER}/test_processed.txt \
        --knwl_ref_file ${WOI_DATA_FOLDER}/output_test_knowledge_reference.txt \
        --resp_ref_file ${WOI_DATA_FOLDER}/output_test_response_reference.txt


# Get the knowledge generation prompts for the each test dataset in WoW and WoI
MODEL_FILE=<PATH_OF_THE_FINETUNED_DPR_MODEL> 
# WoW test seen
python ${DIR}/tasks/msdp/preprocessing.py \
        --func get_knwl_gen_prompts \
        --test_file ${WOW_DATA_FOLDER}/testseen_processed.txt \
        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
        --model_file ${MODEL_FILE} \
        --processed_file ${WOW_DATA_FOLDER}/output_testseen_knowledge_prompts.json \
        --data_type wow_seen

# WoW test unseen
python ${DIR}/tasks/msdp/preprocessing.py \
        --func get_knwl_gen_prompts \
        --test_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \
        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
        --model_file ${MODEL_FILE} \
        --processed_file ${WOW_DATA_FOLDER}/output_testunseen_knowledge_prompts.json \
        --data_type wow_unseen

# WoI
python ${DIR}/tasks/msdp/preprocessing.py \
        --func get_knwl_gen_prompts \
        --test_file ${WOI_DATA_FOLDER}/test_processed.txt \
        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
        --model_file ${MODEL_FILE} \
        --processed_file ${WOI_DATA_FOLDER}/output_test_knowledge_prompts.json \
        --data_type woi


# Get the response generation prompts (can be applied for all the test datasets)
python ${DIR}/tasks/msdp/preprocessing.py \
        --func get_resp_gen_prompts \
        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
        --processed_file ${WOW_DATA_FOLDER}/output_response_prompts.txt


================================================
FILE: examples/academic_paper_scripts/msdp/eval_knwl_generation.sh
================================================
#!/bin/bash

#########################
# Evaluate the F1 scores.
#########################

WORLD_SIZE=1
DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
                  --nnodes 1 \
                  --node_rank 0 \
                  --master_addr localhost \
                  --master_port 6000"
                  
MODEL_GEN_PATH=<PATH_OF_THE_KNOWLEDGE_GENERATION> \ 
        (e.g., /testseen_knowledge_generations.txt)
GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE> \ 
        (e.g., /testseen_knowledge_reference.txt)

python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
        --num-layers 24 \
        --hidden-size 1024 \
        --num-attention-heads 16 \
        --seq-length 2048 \
        --max-position-embeddings 2048 \
        --micro-batch-size 4 \
        --task MSDP-EVAL-F1 \
        --guess-file ${MODEL_GEN_PATH} \
        --answer-file ${GROUND_TRUTH_PATH}


############################################
# Evaluate BLEU, METEOR, and ROUGE-L scores.
############################################

# We follow the nlg-eval (https://github.com/Maluuba/nlg-eval) to 
# evaluate the BLEU, METEOR, and ROUGE-L scores. 

# To evaluate on these metrics, please setup the environments based on 
# the nlg-eval github, and run the corresponding evaluation commands.

nlg-eval \
    --hypothesis=<PATH_OF_THE_KNOWLEDGE_GENERATION> \
    --references=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE>


================================================
FILE: examples/academic_paper_scripts/msdp/eval_resp_generation.sh
================================================
#!/bin/bash

#########################
# Evaluate the F1 scores.
#########################

WORLD_SIZE=1
DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
                  --nnodes 1 \
                  --node_rank 0 \
                  --master_addr localhost \
                  --master_port 6000"
                  
MODEL_GEN_PATH=<PATH_OF_THE_RESPONSE_GENERATION> \ 
        (e.g., /testseen_response_generations.txt)
GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_RESPONSE> \ 
        (e.g., /testseen_response_reference.txt)

python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
        --num-layers 24 \
        --hidden-size 1024 \
        --num-attention-heads 16 \
        --seq-length 2048 \
        --max-position-embeddings 2048 \
        --micro-batch-size 4 \
        --task MSDP-EVAL-F1 \
        --guess-file ${MODEL_GEN_PATH} \
        --answer-file ${GROUND_TRUTH_PATH}


##########################
# Evaluate the KF1 scores.
##########################
                  
MODEL_GEN_PATH=<PATH_OF_THE_RESPONSE_GENERATION> \ 
        (e.g., /testseen_response_generations.txt)
GROUND_TRUTH_PATH=<PATH_OF_THE_GROUND_TRUTH_KNOWLEDGE> \ 
        (e.g., /testseen_knowledge_reference.txt)

python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
        --num-layers 24 \
        --hidden-size 1024 \
        --num-attention-heads 16 \
        --seq-length 2048 \
        --max-position-embeddings 2048 \
        --micro-batch-size 4 \
        --task MSDP-EVAL-F1 \
        --guess-file ${MODEL_GEN_PATH} \
        --answer-file ${GROUND_TRUTH_PATH}


############################################
# Evaluate BLEU, METEOR, and ROUGE-L scores.
############################################

# We follow the nlg-eval (https://github.com/Maluuba/nlg-eval) to 
# evaluate the BLEU, METEOR, and ROUGE-L scores. 

# To evaluate on these metrics, please setup the environments based on 
# the nlg-eval github, and run the corresponding evaluation commands.

nlg-eval \
    --hypothesis=<PATH_OF_THE_RESPONSE_GENERATION> \
    --references=<PATH_OF_THE_GROUND_TRUTH_RESPONSE>


================================================
FILE: examples/academic_paper_scripts/msdp/prep_resp_gen.sh
================================================
#!/bin/bash

# Preparing the input file for the response generation (second-stage prompting)

DIR=`pwd`

TEST_FILE=<PATH_OF_PROCESSED_TEST_DATA> \
        (e.g., /testseen_processed.txt)
KNOWLEDGE_FILE=<PATH_OF_GENERATED_KNOWLEDGE_DATA> \
        (e.g., /testseen_knowledge_generations.txt)
PROCESSED_FILE=<PATH_OF_INPUT_FILE_FOR_RESPONSE_GENERATION> \
        (e.g., /testseen_processed_with_generated_knowledge.txt)

python ${DIR}/tasks/msdp/preprocessing.py \
        --func prepare_input \
        --test_file ${TEST_FILE} \
        --knwl_gen_file ${KNOWLEDGE_FILE} \
        --processed_file ${PROCESSED_FILE}


================================================
FILE: examples/academic_paper_scripts/msdp/prompt_knwl_gen.sh
================================================
#!/bin/bash

# Stage-1: Prompt a pretrained language model to generate the context-relevant knowledge
# The input contains prompts and current dialogue context, the output is the relevant knowledge
# The size of the pretrained language model is 357M

WORLD_SIZE=8

DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
                  --nnodes 1 \
                  --node_rank 0 \
                  --master_addr localhost \
                  --master_port 6000"

CHECKPOINT_PATH=<PATH_OF_LANGUAGE_MODEL> (e.g., /357m)
VOCAB_PATH=<PATH_OF_VOCAB_FILE> (e.g., /gpt2-vocab.json)
MERGE_PATH=<PATH_OF_MERGE_FILE> (e.g., /gpt2-merges.txt)
INPUT_PATH=<PATH_OF_PROCESSED_TEST_DATA_FILE> \ 
        (e.g., /testseen_processed.txt)
PROMPT_PATH=<PATH_OF_KNOWLEDGE_GENERATION_PROMPTS> \
        (e.g., /testseen_knowledge_prompts.json)
OUTPUT_PATH=<PATH_OF_OUTPUT_GENERATION_FILE> \
        (e.g., /testseen_knowledge_generations.txt)

python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
        --num-layers 24 \
        --hidden-size 1024 \
        --num-attention-heads 16 \
        --seq-length 2048 \
        --max-position-embeddings 2048 \
        --micro-batch-size 1 \
        --vocab-file ${VOCAB_PATH} \
        --merge-file ${MERGE_PATH} \
        --load ${CHECKPOINT_PATH} \
        --fp16 \
        --DDP-impl torch \
        --tokenizer-type GPT2BPETokenizer \
        --sample-input-file ${INPUT_PATH} \
        --sample-output-file ${OUTPUT_PATH} \
        --prompt-file ${PROMPT_PATH} \
        --prompt-type knowledge \
        --num-prompt-examples 10 \
        --task MSDP-PROMPT 

# NOTE: If you use api for the model generation, please use 
# the "--api-prompt" flag (setting this value as True). 


================================================
FILE: examples/academic_paper_scripts/msdp/prompt_resp_gen.sh
================================================
#!/bin/bash

# Stage-2: Prompt a pretrained language model to generate the corresponding response
# The input contains prompts, current dialogue context, and generated knowledge in Stage-1
# The output is the corresponding response.
# The size of the pretrained language model is 357M

WORLD_SIZE=8

DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
                  --nnodes 1 \
                  --node_rank 0 \
                  --master_addr localhost \
                  --master_port 6000"

CHECKPOINT_PATH=<PATH_OF_LANGUAGE_MODEL> (e.g., /357m)
VOCAB_PATH=<PATH_OF_VOCAB_FILE> (e.g., /gpt2-vocab.json)
MERGE_PATH=<PATH_OF_MERGE_FILE> (e.g., /gpt2-merges.txt)
INPUT_PATH=<PATH_OF_INPUT_TEST_DATA_FILE> (e.g., /testseen_processed.txt)
PROMPT_PATH=<PATH_OF_RESPONSE_GENERATION_PROMPTS> \
        (e.g., /response_prompts.txt)
OUTPUT_PATH=<PATH_OF_OUTPUT_GENERATION_FILE> \
        (e.g., /output_testseen_response_generations.txt)

python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/msdp/main.py \
        --num-layers 24 \
        --hidden-size 1024 \
        --num-attention-heads 16 \
        --seq-length 2048 \
        --max-position-embeddings 2048 \
        --micro-batch-size 1 \
        --vocab-file ${VOCAB_PATH} \
        --merge-file ${MERGE_PATH} \
        --load ${CHECKPOINT_PATH} \
        --fp16 \
        --DDP-impl torch \
        --tokenizer-type GPT2BPETokenizer \
        --sample-input-file ${INPUT_PATH} \
        --sample-output-file ${OUTPUT_PATH} \
        --prompt-file ${PROMPT_PATH} \
        --prompt-type response \
        --num-prompt-examples 20 \
        --task MSDP-PROMPT 

# NOTE: If you use api for the model generation, please use 
# the "--api-prompt" flag (setting this value as True). 


================================================
FILE: examples/academic_paper_scripts/sc21/CONFIG.sh
================================================
#!/bin/bash


# SLURM options.
export SLURM_PARTITION=<slurm partition, used to feed -p option in slurm>
export SLURM_ACCOUNT=<slurm account, used to feed -A option in slurm>


# Source code.
export MEGATRON_CODE_DIR=<megatron source code directory>


# This variable is used to mount the relevant part of the filesystem
# inside the docker container. Note that the `MEGATRON_CODE_DIR` and the
# launch directory already get mounted; this variable should be used to
# mount the directories that contain the data and tokenizer files.
export DOCKER_MOUNT_DIR=<megatron dataset and bpe tokenizer vocab path>


# Data and tokenizer files.
MEGATRON_DATA=<path to megatron processed data>
BPE_VOCAB_FILE=<path to bpe vocab file>
BPE_MERGE_FILE=<path to bpe merges file>


# Megatron input parameters.
# `MEGATRON_EXTRA_PARAMS` can be used to provide any extra parameters
# that are not listed here. 
export MEGATRON_PARAMS=" ${MEGATRON_EXTRA_PARAMS} \
	--tensor-model-parallel-size ${TP} \
	--pipeline-model-parallel-size ${PP} \
	--micro-batch-size ${MBS} \
	--global-batch-size ${GBS} \
        --num-layers ${NLS} \
        --hidden-size ${HS} \
        --num-attention-heads ${NAH} \
	--DDP-impl ${DDP} \
	--data-path ${MEGATRON_DATA} \
	--vocab-file ${BPE_VOCAB_FILE} \
	--merge-file ${BPE_MERGE_FILE} \
        --log-interval 5 \
        --seq-length 2048 \
        --max-position-embeddings 2048 \
        --train-iters 500 \
        --lr-decay-iters 320 \
        --lr 0.0001 \
	--min-lr 0.00001 \
        --lr-decay-style cosine \
        --lr-warmup-fraction 0.01 \
        --split 969,30,1 \
        --eval-iters 100 \
        --eval-interval 1000 \
        --clip-grad 1.0 \
        --fp16 \
	--loss-scale 8192 "


================================================
FILE: examples/academic_paper_scripts/sc21/README.md
================================================
# Reproducing Figures in SC21 Paper


This directory contains some of the scripts that were used to produce the
results in the [Megatron paper](https://arxiv.org/pdf/2104.04473.pdf) that is
to appear at [SuperComputing 2021](https://sc21.supercomputing.org/). These
scripts use [Slurm](https://slurm.schedmd.com/documentation.html) with the
[pyxis plugin](https://github.com/NVIDIA/pyxis), but can be modified for other
schedulers as well.


## Git commit

To replicate these results use Megatron-LM commit: 6985e58938d40ad91ac07b0fddcfad8132e1447e


## Setup

All the cluster-dependent variables are in [`CONFIG.sh`](./CONFIG.sh). Please
update the unspecified values (in angle brackets `<...>`) before launching any
scripts.


## Scripts

Below is a list of scripts that can be used to reproduce various figures in our
[paper](https://arxiv.org/pdf/2104.04473.pdf):

* [run_table_1.sh](./run_table_1.sh): Table 1 showing weak-scaling throughput
for GPT models ranging from 1 billion to 1 trillion parameters.
* [run_figure_11.sh](./run_figure_11.sh): Figure 11 showing the weak-scaling
performance of pipeline parallelism.
* [run_figure_12.sh](./run_figure_12.sh): Figure 12 showing the effect of
the interleaved schedule on a 175B GPT model.
* [run_figure_13.sh](./run_figure_13.sh): Figure 13 showing the effect of
different degrees of pipeline and tensor model parallelism on a model with
162.2 billion parameters.
* [run_figure_14.sh](./run_figure_14.sh): Figure 14 showing the effect of
different degrees of data and pipeline model parallelism on a model with
5.9 billion parameters.
* [run_figure_15.sh](./run_figure_15.sh): Figure 15 showing the effect of
different degrees of data and tensor model parallelism on a model with
5.9 billion parameters.
* [run_figure_16.sh](./run_figure_16.sh): Figure 16 showing the effect of
microbatch size.
* [run_figure_17.sh](./run_figure_17.sh): Figure 17 showing the effect of
activation recomputation.
* [run_figure_18.sh](./run_figure_18.sh): Figure 18 showing the effect of
the scatter-gather communication optimization.


================================================
FILE: examples/academic_paper_scripts/sc21/SBATCH.sh
================================================
#!/bin/bash


sbatch -p ${SLURM_PARTITION} \
       -A ${SLURM_ACCOUNT} \
       --job-name=${JOB_NAME} \
       --nodes=${NNODES} \
       --export=MEGATRON_CODE_DIR,MEGATRON_PARAMS,DOCKER_MOUNT_DIR SRUN.sh

exit 0


================================================
FILE: examples/academic_paper_scripts/sc21/SRUN.sh
================================================
#!/bin/bash

#SBATCH -t 0:30:00 --exclusive --mem=0 --overcommit --ntasks-per-node=8


THIS_DIR=`pwd`
DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`
mkdir -p ${THIS_DIR}/logs


CMD="python -u ${MEGATRON_CODE_DIR}/pretrain_gpt.py ${MEGATRON_PARAMS}"


srun -l \
     --container-image "nvcr.io#nvidia/pytorch:20.12-py3" \
     --container-mounts "${THIS_DIR}:${THIS_DIR},${MEGATRON_CODE_DIR}:${MEGATRON_CODE_DIR},${DOCKER_MOUNT_DIR}:${DOCKER_MOUNT_DIR}" \
     --output=${THIS_DIR}/logs/%x_%j_$DATETIME.log sh -c "${CMD}"


================================================
FILE: examples/academic_paper_scripts/sc21/run_figure_11.sh
================================================
#!/bin/bash

# ================================
# Choose the case to run.
# ================================

# Pipeline-parallel size options = [1, 2, 4, 8].
PP=1

# Batch size (global batch size) options = [8, 128].
GBS=8


# Set pipeline-parallel size options.
NLS=$((3*PP))
NNODES=${PP}


# Other params.
TP=8
MBS=1
HS=20480
NAH=128
DDP=local
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "


# Name of the job.
export JOB_NAME=results_figure_11_pipeline_parallel_size_${PP}_batch_size_${GBS}


# Import the configs.
. `pwd`/CONFIG.sh


# Submit the job.
. `pwd`/SBATCH.sh


exit 0


================================================
FILE: examples/academic_paper_scripts/sc21/run_figure_12.sh
================================================
#!/bin/bash

# ================================
# Choose the case to run.
# ================================

# Interleaved schedule options = [YES, NO].
INTERLEAVED=YES

# Batch size (global batch size) options = [12, 24, 36, ..., 60].
GBS=12


# Set interleaved schedule options.
if [ ${INTERLEAVED} == "YES" ]; then
    MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 2 "
elif [ ${INTERLEAVED} == "NO" ]; then
    MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
else
    echo "Invalid configuration"
    exit 1
fi


# Other params.
TP=8
PP=12
MBS=1
NLS=96
HS=12288
NAH=96
DDP=local
NNODES=12


# Name of the job.
export JOB_NAME=results_figure_12_interleaved_${INTERLEAVED}_batch_size_${GBS}


# Import the configs.
. `pwd`/CONFIG.sh


# Submit the job.
. `pwd`/SBATCH.sh


exit 0


================================================
FILE: examples/academic_paper_scripts/sc21/run_figure_13.sh
================================================
#!/bin/bash

# ================================
# Choose the case to run.
# ================================

# Pipeline-parallel size options = [2, 4, 8, 16, 32].
PP=2

# Batch size (global batch size) options = [32, 128].
GBS=32


# Set pipeline-parallel and tensor-parallel size options.
TP=$((64/PP))


# Other params.
MBS=1
NLS=32
HS=20480
NAH=128
DDP=local
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
NNODES=8


# Name of the job.
export JOB_NAME=results_figure_13_pipeline_parallel_size_${PP}_tensor_parallel_size_${TP}_batch_size_${GBS}


# Import the configs.
. `pwd`/CONFIG.sh


# Submit the job.
. `pwd`/SBATCH.sh


exit 0


================================================
FILE: examples/academic_paper_scripts/sc21/run_figure_14.sh
================================================
#!/bin/bash

# ================================
# Choose the case to run.
# ================================

# Pipeline-parallel size options = [2, 4, 8, 16, 32].
PP=2

# Batch size (global batch size) options = [32, 512].
GBS=32


# Set pipeline-parallel and data-parallel size options.
DP=$((64/PP))


# Other params.
TP=1
MBS=1
NLS=32
HS=3840
NAH=32
DDP=local
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
NNODES=8


# Name of the job.
export JOB_NAME=results_figure_14_pipeline_parallel_size_${PP}_data_parallel_size_${DP}_batch_size_${GBS}


# Import the configs.
. `pwd`/CONFIG.sh


# Submit the job.
. `pwd`/SBATCH.sh


exit 0


================================================
FILE: examples/academic_paper_scripts/sc21/run_figure_15.sh
================================================
#!/bin/bash

# ================================
# Choose the case to run.
# ================================

# Tensor-parallel size options = [2, 4, 8, 16, 32].
TP=2

# Batch size (global batch size) options = [32, 128, 512].
GBS=32


# Set tensor-parallel and data-parallel size options.
DP=$((64/TP))


# Other params.
PP=1
MBS=1
NLS=32
HS=3840
NAH=32
DDP=local
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
NNODES=8


# Name of the job.
export JOB_NAME=results_figure_15_tensor_parallel_size_${TP}_data_parallel_size_${DP}_batch_size_${GBS}


# Import the configs.
. `pwd`/CONFIG.sh


# Submit the job.
. `pwd`/SBATCH.sh


exit 0


================================================
FILE: examples/academic_paper_scripts/sc21/run_figure_16.sh
================================================
#!/bin/bash

# ================================
# Choose the case to run.
# ================================

# Microbatch size options = [1, 2, 4, 8].
MBS=1

# Batch size (global batch size) options = [128, 512].
GBS=128


# Other params.
TP=8
PP=8
NLS=32
HS=15360
NAH=128
DDP=local
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
NNODES=8


# Name of the job.
export JOB_NAME=results_figure_16_microbatch_size_${MBS}_batch_size_${GBS}


# Import the configs.
. `pwd`/CONFIG.sh


# Submit the job.
. `pwd`/SBATCH.sh


exit 0


================================================
FILE: examples/academic_paper_scripts/sc21/run_figure_17.sh
================================================
#!/bin/bash

# ================================
# Choose the case to run.
# ================================

# Activation recomputation options = [YES, NO].
ACTIVATION_RECOMPUTATION=YES

# Batch size (global batch size) options = [1, 2, 4, ..., 256].
GBS=1


# Set activation recomputation.
if [ ${ACTIVATION_RECOMPUTATION} == "YES" ]; then
    MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${ACTIVATION_RECOMPUTATION} == "NO" ]; then
    MEGATRON_EXTRA_PARAMS=""
else
    echo "Invalid configuration"
    exit 1
fi


# Other params.
TP=8
PP=16
MBS=1
NLS=80
HS=12288
NAH=96
DDP=local
NNODES=16


# Name of the job.
export JOB_NAME=results_figure_17_activation_recomputation_${ACTIVATION_RECOMPUTATION}_batch_size_${GBS}


# Import the configs.
. `pwd`/CONFIG.sh


# Submit the job.
. `pwd`/SBATCH.sh


exit 0


================================================
FILE: examples/academic_paper_scripts/sc21/run_figure_18.sh
================================================
#!/bin/bash

# ================================
# Choose the case to run.
# ================================

# Scatter-gather communication optimization options = [YES, NO].
SCATTER_GATHER=YES

# Batch size (global batch size) options = [12, 24, 36, ..., 60].
GBS=12


# Set scatter-gather communication optimization options.
if [ ${SCATTER_GATHER} == "YES" ]; then
    MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 2 "
elif [ ${SCATTER_GATHER} == "NO" ]; then
    MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 2 --no-scatter-gather-tensors-in-pipeline "
else
    echo "Invalid configuration"
    exit 1
fi


# Other params.
TP=8
PP=12
MBS=1
NLS=96
HS=12288
NAH=96
DDP=local
NNODES=12


# Name of the job.
export JOB_NAME=results_figure_18_scatter_gather_${SCATTER_GATHER}_batch_size_${GBS}


# Import the configs.
. `pwd`/CONFIG.sh


# Submit the job.
. `pwd`/SBATCH.sh


exit 0


================================================
FILE: examples/academic_paper_scripts/sc21/run_table_1.sh
================================================
#!/bin/bash

# ================================
# Choose the case to run.
# ================================
# model size options = [1.7B, 3.6B, 7.5B, 18B, 39B, 76B, 145B, 310B, 530B, 1T]
MODEL_SIZE=1.7B


if [ ${MODEL_SIZE} == "1.7B" ]; then
    TP=1
    PP=1
    MBS=16
    GBS=512
    NLS=24
    HS=2304
    NAH=24
    DDP=torch
    NNODES=4
    MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${MODEL_SIZE} == "3.6B" ]; then
    TP=2
    PP=1
    MBS=16
    GBS=512
    NLS=30
    HS=3072
    NAH=32
    DDP=torch
    NNODES=8
    MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${MODEL_SIZE} == "7.5B" ]; then
    TP=4
    PP=1
    MBS=16
    GBS=512
    NLS=36
    HS=4096
    NAH=32
    DDP=torch
    NNODES=16
    MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${MODEL_SIZE} == "18B" ]; then
    TP=8
    PP=1
    MBS=8
    GBS=1024
    NLS=40
    HS=6144
    NAH=48
    DDP=torch
    NNODES=32
    MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${MODEL_SIZE} == "39B" ]; then
    TP=8
    PP=2
    MBS=4
    GBS=1536
    NLS=48
    HS=8192
    NAH=64
    DDP=local
    NNODES=64
    MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${MODEL_SIZE} == "76B" ]; then
    TP=8
    PP=4
    MBS=2
    GBS=1792
    NLS=60
    HS=10240
    NAH=80
    DDP=local
    NNODES=128
    MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 5"
elif [ ${MODEL_SIZE} == "145B" ]; then
    TP=8
    PP=8
    MBS=2
    GBS=2304
    NLS=80
    HS=12288
    NAH=96
    DDP=local
    NNODES=192
    MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 5 "
elif [ ${MODEL_SIZE} == "310B" ]; then
    TP=8
    PP=16
    MBS=1
    GBS=2160
    NLS=96
    HS=16384
    NAH=128
    DDP=local
    NNODES=240
    MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 3 "
elif [ ${MODEL_SIZE} == "530B" ]; then
    TP=8
    PP=35
    MBS=1
    GBS=2520
    NLS=105
    HS=20480
    NAH=128
    DDP=local
    NNODES=315
    MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 1 "
elif [ ${MODEL_SIZE} == "1T" ]; then
    TP=8
    PP=64
    MBS=1
    GBS=3072
    NLS=128
    HS=25600
    NAH=160
    DDP=local
    NNODES=384
    MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
else
    echo "Invalid configuration"
    exit 1
fi


# Name of the job
export JOB_NAME=results_table_1_model_size_${MODEL_SIZE}


# Import the configs.
. `pwd`/CONFIG.sh


# Submit the job.
. `pwd`/SBATCH.sh


exit 0


================================================
FILE: examples/bert/README.md
================================================
# BERT MODEL

## Table of contents
- [1. Training Setup](#1-training-setup)
- [2. Configurations](#2-configurations)

## 1. Training setup
<a id="markdown-training-setup" name="training-setup"></a>

To run the model using a docker container run it as follows
```
PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:24.01-py3
CHECKPOINT_PATH="" #<Specify path>
TENSORBOARD_LOGS_PATH=""#<Specify path>
VOCAB_FILE="" #<Specify path to file>//bert-vocab.txt
DATA_PATH="" #<Specify path and file prefix>_text_document

docker run \
  --gpus=all \
  --ipc=host \
  --workdir /workspace/megatron-lm \
  -v /path/to/data:/path/to/data \
  -v /path/to/megatron-lm:/workspace/megatron-lm \
  megatron-lm nvcr.io/nvidia/pytorch:24.01-py3 \
  bash examples/bert/train_bert_340m_distributed.sh $CHECKPOINT_PATH $TENSORBOARD_LOGS_PATH $VOCAB_FILE $DATA_PATH "

```
NOTE: Depending on the environment you are running it the above command might like slightly different.


## 2. Configurations
<a id="markdown-configurations" name="configurations"></a>
The example in this folder shows you how to run 340m large model. There are other configs you could run as well

### 4B
```
       --num-layers 48 \
       --hidden-size 2560 \
       --num-attention-heads 32 \
       --tensor-model-parallel-size 1 \
       --pipeline-model-parallel-size 1 \

```

### 20B
```
       --num-layers 48 \
       --hidden-size 6144 \
       --num-attention-heads 96 \
       --tensor-model-parallel-size 4 \
       --pipeline-model-parallel-size 4 \

```

================================================
FILE: examples/bert/train_bert_340m_distributed.sh
================================================
#!/bin/bash

# Runs the "340M" parameter model (Bert - Large)

export CUDA_DEVICE_MAX_CONNECTIONS=1

GPUS_PER_NODE=8
# Change for multinode config
MASTER_ADDR=localhost
MASTER_PORT=6000
NUM_NODES=1
NODE_RANK=0
WORLD_SIZE=$(($GPUS_PER_NODE*$NUM_NODES))

CHECKPOINT_PATH=$1 #<Specify path>
TENSORBOARD_LOGS_PATH=$2 #<Specify path>
VOCAB_FILE=$3 #<Specify path to file>/bert-vocab.json
DATA_PATH=$4 #<Specify path and file prefix>_text_document

DISTRIBUTED_ARGS=(
    --nproc_per_node $GPUS_PER_NODE 
    --nnodes $NUM_NODES 
    --master_addr $MASTER_ADDR 
    --master_port $MASTER_PORT
)

BERT_MODEL_ARGS=(
    --num-layers 24 
    --hidden-size 1024 
    --num-attention-heads 16 
    --seq-length 512 
    --max-position-embeddings 512 
    --attention-backend auto # Can use (flash/fused/unfused/local)
)

TRAINING_ARGS=(
    --micro-batch-size 4 
    --global-batch-size 32 
    --train-iters 1000000 
    --weight-decay 1e-2 
    --clip-grad 1.0 
    --fp16
    --lr 0.0001
    --lr-decay-iters 990000 
    --lr-decay-style linear 
    --min-lr 1.0e-5 
    --weight-decay 1e-2 
    --lr-warmup-fraction .01 
    --clip-grad 1.0 
)

MODEL_PARALLEL_ARGS=(
	--tensor-model-parallel-size 8 
	--pipeline-model-parallel-size 16 
)

DATA_ARGS=(
    --data-path $DATA_PATH 
    --vocab-file $VOCAB_FILE 
    --split 949,50,1
)

EVAL_AND_LOGGING_ARGS=(
    --log-interval 100
    --save-interval 10000 
    --eval-interval 1000 
    --save $CHECKPOINT_PATH 
    --load $CHECKPOINT_PATH 
    --eval-iters 10
    --tensorboard-dir $TENSORBOARD_LOGS_PATH 
)

torchrun ${DISTRIBUTED_ARGS[@]} pretrain_bert.py \
    ${BERT_MODEL_ARGS[@]} \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${DATA_ARGS[@]} \
    ${EVAL_AND_LOGGING_ARGS[@]}
    

================================================
FILE: examples/export/README.md
================================================
# Megatron Core Export

This module is used to export megatron core models to different inference frameworks.
Currently we support TRTLLM export . In the future we will be adding support for VLLM etc.

## PTQ AND EXPORT
Follow the examples of [Model Optimizer](../post_training/modelopt) to perform post training quantization, followed by an export to a HF-like checkpoint for TensorRT-LLM, vLLM, and SGLang deployment.

# TRTLLM EXPORT
Follow the instructions in [trtllm_export](./trtllm_export/) to do export to TRTLLM checkpoint format alone.


================================================
FILE: examples/export/trtllm_export/README.md
================================================
# Megatron Core To TRTLLM Export Documentation
This guide will walk you through how you can use the megatron core export for exporting models to trtllm format

### Contents
- [Megatron Core To TRTLLM Export Documentation](#megatron-core-to-trtllm-export-documentation)
- [Contents](#contents)
  - [1. Quick Start](#1-quick-start)
    - [1.1 Understanding The Code](#11-understanding-the-code)
    - [1.2 Running The Code](#12-running-the-code)
  - [2. GPU Export](#2-gpu-export)
  - [3. Future work](#4-future-work)

#### 1. Quick Start
This will walk you through the flow of converting an mcore gpt model to trtllm format using single device mode. The file can be found at [gpt_single_device_cpu_export.py](./single_device_export/gpt_single_device_cpu_export.py)

NOTE: For faster performance, if your entire model will fit into gpu memory, pre transfer the model state dict to gpu and then call the get_trtllm_pretrained_config_and_model_weights function.

<br>

##### 1.1 Understanding The Code
***STEP 1 - We initialize model parallel and other default arguments***
We initalize tp and pp to 1 so that we can get the full model state dict on cpu
```python
    initialize_distributed(tensor_model_parallel_size=1, pipeline_model_parallel_size=1)
```

***STEP 2 - We load the model using the model_provider_function***
NOTE: We create a simple gpt model

```python
    transformer_config = TransformerConfig(
        num_layers=2, 
        hidden_size=64, # Needs to be atleast 32 times num_attn_heads
        num_attention_heads=2, 
        use_cpu_initialization=True, 
        pipeline_dtype=torch.float32,
    )

    gpt_model = GPTModel(
        config=transformer_config, 
        transformer_layer_spec=get_gpt_layer_local_spec(), 
        vocab_size=100, 
        max_sequence_length=_SEQUENCE_LENGTH,
    )

    # Optionally you can also load a model using this code 
    # sharded_state_dict=gpt_model.sharded_state_dict(prefix='')
    # checkpoint = dist_checkpointing.load(sharded_state_dict=sharded_state_dict, checkpoint_dir=checkpoint_path)
    # gpt_model.load_state_dict(checkpoint)

```

***STEP 3 - Instantiate the TRTLLM Helper***
We instantiate the [TRTLLM Helper](../../../megatron/core/export/trtllm/trtllm_helper.py)  For the GPT model we instantiate trtllm_helper as shown below.
```python
    if hasattr(gpt_model, "rotary_pos_emb"):
        seq_len_interpolation_factor =  gpt_model.rotary_pos_emb.seq_len_interpolation_factor

    trtllm_helper = TRTLLMHelper(
                        transformer_config=gpt_model.config, 
                        model_type=ModelType.gpt,
                        position_embedding_type = gpt_model.position_embedding_type, 
                        max_position_embeddings = gpt_model.max_position_embeddings, 
                        rotary_percentage = gpt_model.rotary_percent,
                        rotary_base = gpt_model.rotary_base,
                        moe_tp_mode = 2,
                        multi_query_mode = False,
                        activation = "gelu", 
                        seq_len_interpolation_factor = seq_len_interpolation_factor,
                        share_embeddings_and_output_weights=gpt_model.share_embeddings_and_output_weights
                    )   
```

***STEP 4 - Get the TRTLLM Weights and configs***
To convert model weights to trtllm weights and configs, we use the [single_device_converter](../../../megatron/core/export/trtllm/trtllm_weights_converter/single_device_trtllm_model_weights_converter.py). We pass as inputs the model state dict, and export config. In this example we use inference tp size as 2 for the export. 

```python
    model_state_dict={}
    for key , val in gpt_model.state_dict().items():
        # val is non for _extra_state layers . We filter it out
        if val is not None:
            model_state_dict[key] = val

    export_config = ExportConfig(inference_tp_size = 2)
    weight_list, config_list = trtllm_helper.get_trtllm_pretrained_config_and_model_weights(
        model_state_dict= model_state_dict,
        dtype = DataType.bfloat16,
        export_config=export_config
    )
```

***STEP 5 - Build the TRTLLM Engine***
Following code is used to build the TRTLLM Engine. 

```python
    for trtllm_model_weights, trtllm_model_config in zip(weight_list, config_list):
        trtllm_helper.build_and_save_engine(
            max_input_len=256,
            max_output_len=256,
            max_batch_size=8,
            engine_dir='/opt/megatron-lm/engine',
            trtllm_model_weights=trtllm_model_weights,
            trtllm_model_config=trtllm_model_config,
            lora_ckpt_list=None,
            use_lora_plugin=None,
            max_lora_rank=64,
            lora_target_modules=None,
            max_prompt_embedding_table_size=0,
            paged_kv_cache=True,
            remove_input_padding=True,
            paged_context_fmha=False,
            use_refit=False,
            max_num_tokens=None,
            max_seq_len=512,
            opt_num_tokens=None,
            max_beam_width=1,
            tokens_per_block=128,
            multiple_profiles=False,
            gpt_attention_plugin="auto",
            gemm_plugin="auto",
        )
```
<br>

##### 1.2 Running The Code
An example run script is shown below. 

```
# In a workstation 
MLM_PATH=/path/to/megatron-lm
CONTAINER_IMAGE=gitlab-master.nvidia.com:5005/dl/joc/nemo-ci/trtllm_0.12/train:pipe.17669124-x86

docker run -it --gpus=all --ipc=host -v $MLM_PATH/:/opt/megatron-lm $CONTAINER_IMAGE bash

# Inside the container run the following. 

cd /opt/megatron-lm/

CUDA_VISIBLE_DEVICES=0 torchrun --nproc-per-node 1  examples/export/trtllm_export/single_device_export/gpt_single_device_cpu_export.py
```

<br>

#### 2. GPU Export
You can use the [gpt_distributed_gpu_export.py](./distributed_export/gpt_distributed_gpu_export.py) to run a more optimized on device distributed. version of trtllm export. Internally this uses the [distributed_converter](../../../megatron/core/export/trtllm/trtllm_weights_converter/distributed_trtllm_model_weights_converter.py) to convert model weights on device. 
In the single device version you collect all the model weights on CPU/GPU, convert it to trtllm format, and then store the engine back on disk. In the GPU version you load each individual state dict on the gpus, convert it on the device itself and store the engine on disk. 

To run the gpu version 

```
CUDA_VISIBLE_DEVICES=0,1 torchrun --nproc-per-node 2  examples/export/trtllm_export/distributed_export/gpt_distributed_gpu_export.py
```

<br>

#### 3. Future work
The following are planned for the future releases . 
* Pipeline parallellism for export (Work in progress) 
* GPU Export for more models (Work in progress for some models)
* Refit functionality
* VLLM Support

================================================
FILE: examples/export/trtllm_export/distributed_export/gpt_distributed_gpu_export.py
================================================
import os
import torch
from megatron.core import parallel_state
from megatron.core import dist_checkpointing
from megatron.core.export.model_type import ModelType
from megatron.core.export.data_type import DataType
from megatron.core.export.trtllm.trtllm_helper import TRTLLMHelper
from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.models.gpt.gpt_model import GPTModel
from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_local_spec


_SEQUENCE_LENGTH = 64
_VOCAB_SIZE = 256

def initialize_distributed(tensor_model_parallel_size=1, pipeline_model_parallel_size=1):
    parallel_state.destroy_model_parallel()

    # Torch setup for distributed training
    rank = int(os.environ['LOCAL_RANK'])
    world_size = torch.cuda.device_count()
    torch.cuda.set_device(rank)
    torch.distributed.init_process_group(world_size=world_size, rank=rank)

    # Megatron core distributed training initialization
    parallel_state.initialize_model_parallel(tensor_model_parallel_size = tensor_model_parallel_size, pipeline_model_parallel_size=pipeline_model_parallel_size)

def model_provider():
    """Build the model."""

    transformer_config = TransformerConfig(
        num_layers=2, 
        hidden_size=64, 
        num_attention_heads=2, 
        use_cpu_initialization=True, 
        pipeline_dtype=torch.float32
    )

    gpt_model = GPTModel(
        config=transformer_config, 
        transformer_layer_spec=get_gpt_layer_local_spec(), 
        vocab_size=_VOCAB_SIZE, 
        max_sequence_length=_SEQUENCE_LENGTH,
    )

    return gpt_model

def load_distributed_checkpoint(checkpoint_path, gpt_model):
    sharded_state_dict=gpt_model.sharded_state_dict(prefix='')
    checkpoint = dist_checkpointing.load(sharded_state_dict=sharded_state_dict, checkpoint_dir=checkpoint_path)
    gpt_model.load_state_dict(checkpoint)
    return gpt_model

if __name__ == "__main__":
    initialize_distributed(tensor_model_parallel_size=2, pipeline_model_parallel_size=1)
    model_parallel_cuda_manual_seed(123)

    gpt_model = model_provider()
    device = torch.device("cuda")
    gpt_model.to(device) 
    
    # Optionally you can also load a gpt model from ckpt_path using this code below
    # gpt_model = load_distributed_checkpoint(gpt_model=gpt_model, checkpoint_path=ckpt_path)

    seq_len_interpolation_factor = None
    if hasattr(gpt_model, "rotary_pos_emb"):
        seq_len_interpolation_factor =  gpt_model.rotary_pos_emb.seq_len_interpolation_factor

    trtllm_helper = TRTLLMHelper(
                        transformer_config=gpt_model.config, 
                        model_type=ModelType.gpt,
                        position_embedding_type = gpt_model.position_embedding_type, 
                        max_position_embeddings = gpt_model.max_position_embeddings, 
                        rotary_percentage = gpt_model.rotary_percent,
                        rotary_base = gpt_model.rotary_base,
                        moe_tp_mode = 2,
                        multi_query_mode = False,
                        activation = "gelu", 
                        seq_len_interpolation_factor = seq_len_interpolation_factor,
                        share_embeddings_and_output_weights=gpt_model.share_embeddings_and_output_weights
                    )
    

    trtllm_model_weights, trtllm_model_config = trtllm_helper.get_trtllm_pretrained_config_and_model_weights(
        model_state_dict= gpt_model.state_dict(),
        dtype = DataType.bfloat16,
        on_device_distributed_conversion=True, 
        vocab_size=_VOCAB_SIZE, 
        gpus_per_node=2,
    )

    trtllm_helper.build_and_save_engine(
        max_input_len=256,
        max_output_len=256,
        max_batch_size=8,
        engine_dir='/opt/megatron-lm/engine',
        trtllm_model_weights=trtllm_model_weights[0],
        trtllm_model_config=trtllm_model_config[0],
        lora_ckpt_list=None,
        use_lora_plugin=None,
        max_lora_rank=64,
        lora_target_modules=None,
        max_prompt_embedding_table_size=0,
        paged_kv_cache=True,
        remove_input_padding=True,
        paged_context_fmha=False,
        use_refit=False,
        max_num_tokens=None,
        max_seq_len=512,
        opt_num_tokens=None,
        max_beam_width=1,
        tokens_per_block=128,
        multiple_profiles=False,
        gpt_attention_plugin="auto",
        gemm_plugin="auto",
    )


================================================
FILE: examples/export/trtllm_export/single_device_export/gpt_single_device_cpu_export.py
================================================
import os
import torch
from megatron.core import parallel_state
from megatron.core import dist_checkpointing
from megatron.core.export.model_type import ModelType
from megatron.core.export.data_type import DataType
from megatron.core.export.export_config import ExportConfig
from megatron.core.export.trtllm.trtllm_helper import TRTLLMHelper
from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.models.gpt.gpt_model import GPTModel
from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_local_spec


_SEQUENCE_LENGTH = 64


def initialize_distributed(tensor_model_parallel_size=1, pipeline_model_parallel_size=1):
    parallel_state.destroy_model_parallel()

    # Torch setup for distributed training
    rank = int(os.environ['LOCAL_RANK'])
    world_size = torch.cuda.device_count()
    torch.cuda.set_device(rank)
    torch.distributed.init_process_group(world_size=world_size, rank=rank)

    # Megatron core distributed training initialization
    parallel_state.initialize_model_parallel(tensor_model_parallel_size, pipeline_model_parallel_size)

def model_provider():
    """Build the model."""

    transformer_config = TransformerConfig(
        num_layers=2, 
        hidden_size=64, # Needs to be atleast 32 times num_attn_heads
        num_attention_heads=2, 
        use_cpu_initialization=True, 
        pipeline_dtype=torch.float32,
    )

    gpt_model = GPTModel(
        config=transformer_config, 
        transformer_layer_spec=get_gpt_layer_local_spec(), 
        vocab_size=100, 
        max_sequence_length=_SEQUENCE_LENGTH,
    )

    return gpt_model

def load_distributed_checkpoint(checkpoint_path, gpt_model):
    sharded_state_dict=gpt_model.sharded_state_dict(prefix='')
    checkpoint = dist_checkpointing.load(sharded_state_dict=sharded_state_dict, checkpoint_dir=checkpoint_path)
    gpt_model.load_state_dict(checkpoint)
    return gpt_model

if __name__ == "__main__":
    # Need to use TP1 PP1 for export on single device
    initialize_distributed(tensor_model_parallel_size=1, pipeline_model_parallel_size=1)
    model_parallel_cuda_manual_seed(123)

    gpt_model = model_provider()

    # Optionally you can also load a gpt model from ckpt_path using this code below
    # gpt_model = load_distributed_checkpoint(gpt_model=gpt_model, checkpoint_path=ckpt_path)

    seq_len_interpolation_factor = None
    if hasattr(gpt_model, "rotary_pos_emb"):
        seq_len_interpolation_factor =  gpt_model.rotary_pos_emb.seq_len_interpolation_factor

    trtllm_helper = TRTLLMHelper(
                        transformer_config=gpt_model.config, 
                        model_type=ModelType.gpt,
                        position_embedding_type = gpt_model.position_embedding_type, 
                        max_position_embeddings = gpt_model.max_position_embeddings, 
                        rotary_percentage = gpt_model.rotary_percent,
                        rotary_base = gpt_model.rotary_base,
                        moe_tp_mode = 2,
                        multi_query_mode = False,
                        activation = "gelu", 
                        seq_len_interpolation_factor = seq_len_interpolation_factor,
                        share_embeddings_and_output_weights=gpt_model.share_embeddings_and_output_weights
                    )
    

    export_config = ExportConfig(inference_tp_size = 2)
    # NOTE : For faster performance, if your entire model will fit in gpu memory, transfer model state dict to GPU and then call this api
    weight_list, config_list = trtllm_helper.get_trtllm_pretrained_config_and_model_weights(
        model_state_dict= gpt_model.state_dict(),
        dtype = DataType.bfloat16,
        export_config=export_config
    )

    for trtllm_model_weights, trtllm_model_config in zip(weight_list, config_list):
        trtllm_helper.build_and_save_engine(
            max_input_len=256,
            max_output_len=256,
            max_batch_size=8,
            engine_dir='/opt/megatron-lm/engine',
            trtllm_model_weights=trtllm_model_weights,
            trtllm_model_config=trtllm_model_config,
            lora_ckpt_list=None,
            use_lora_plugin=None,
            max_lora_rank=64,
            lora_target_modules=None,
            max_prompt_embedding_table_size=0,
            paged_kv_cache=True,
            remove_input_padding=True,
            paged_context_fmha=False,
            use_refit=False,
            max_num_tokens=None,
            max_seq_len=512,
            opt_num_tokens=None,
            max_beam_width=1,
            tokens_per_block=128,
            multiple_profiles=False,
            gpt_attention_plugin="auto",
            gemm_plugin="auto",
        )

================================================
FILE: examples/gpt3/README.md
================================================
# GPT3 MODEL

## Table of contents
- [1. Training Setup](#1-training-setup)
- [2. Configurations](#2-configurations)
- [3. Training Results](#3-training-results)

## 1. Training setup
<a id="markdown-training-setup" name="training-setup"></a>

To run the model using a docker container run it as follows
```
PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:24.01-py3
CHECKPOINT_PATH="" #<Specify path>
TENSORBOARD_LOGS_PATH=""#<Specify path>
VOCAB_FILE="" #<Specify path to file>/gpt2-vocab.json
MERGE_FILE="" #<Specify path to file>/gpt2-merges.txt
DATA_PATH="" #<Specify path and file prefix>_text_document

docker run \
  --gpus=all \
  --ipc=host \
  --workdir /workspace/megatron-lm \
  -v /path/to/data:/path/to/data \
  -v /path/to/megatron-lm:/workspace/megatron-lm \
  megatron-lm nvcr.io/nvidia/pytorch:24.01-py3 \
  bash examples/gpt3/train_gpt3_175b_distributed.sh $CHECKPOINT_PATH $TENSORBOARD_LOGS_PATH $VOCAB_FILE $MERGE_FILE $DATA_PATH "

```
NOTE: Depending on the environment you are running it the above command might like slightly different.


## 2. Configurations
<a id="markdown-configurations" name="configurations"></a>
The example in this folder shows you how to run 175B model. There are other configs you could run as well

### 345M
```
       --num-layers 12 \
       --hidden-size 512 \
       --num-attention-heads 8 \
       --seq-length 1024 \
       --tensor-model-parallel-size 1 \
       --pipeline-model-parallel-size 1 \

```

### 857M
```
       --num-layers 24 \
       --hidden-size 1024 \
       --num-attention-heads 16 \
       --seq-length 2048 \
       --tensor-model-parallel-size 1 \
       --pipeline-model-parallel-size 1 \

```


================================================
FILE: examples/gpt3/gpt_config.yaml
================================================
# WARNING: Yaml configs is currently an experimental feature
language_model:
  # model architecture
  num_layers: 24
  hidden_size: 1024
  num_attention_heads: 16
  num_query_groups: null

  ffn_hidden_size: null
  kv_channels: null
  hidden_dropout: 0.0
  attention_dropout: 0.0
  fp32_residual_connection: False

  apply_residual_connection_post_layernorm: False
  layernorm_epsilon: 1.e-5
  layernorm_zero_centered_gamma: True
  add_bias_linear: False
  bias_activation_fusion: False
  add_qkv_bias: False
  gated_linear_unit: False
  activation_func: swiglu
  num_moe_experts: null
  rotary_interleaved: False
  window_size: null

  # initialization
  init_method: null
  init_method_std: 0.02
  output_layer_init_method: null

  # mixed-precision
  apply_query_key_layer_scaling: False
  attention_softmax_in_fp32: False

  # fusion
  bias_swiglu_fusion: True
  masked_softmax_fusion: True
  persist_layer_norm: False
  memory_efficient_layer_norm: False
  bias_dropout_fusion: True
  apply_rope_fusion: True

  # activation recomputation
  recompute_granularity: null
  recompute_method: null
  recompute_num_layers: null
  distribute_saved_activations: null

  # fp8 related
  fp8: null
  fp8_margin: 0
  fp8_interval: 1
  fp8_amax_history_len: 1
  fp8_amax_compute_algo: "most_recent"
  fp8_wgrad: True

  # miscellaneous
  clone_scatter_output_in_embedding: True

  normalization: "LayerNorm"  # alt value supported by TE: "RMSNorm"

  # MoE related
  moe_router_load_balancing_type: "aux_loss"
  moe_router_topk: 2
  moe_router_group_topk: null
  moe_router_num_groups: null
  moe_grouped_gemm: False
  moe_aux_loss_coeff: 0  # 1e-2 would be a good start value for load balance loss.
  moe_z_loss_coeff: null  # 1e-3 would be a good start value for z-loss
  moe_input_jitter_eps: null
  moe_token_dropping: False

model_parallel:
  # Model parallelism
  tensor_model_parallel_size: 1
  context_parallel_size: 1
  pipeline_model_parallel_size: 1
  virtual_pipeline_model_parallel_size: null
  sequence_parallel: True
  expert_model_parallel_size: 1

  # Initialization
  perform_initialization: True
  use_cpu_initialization: null

  # Training
  fp16: False
  bf16: True
  params_dtype: null # Set from above arguments for core
  timers: null

  # Optimizations
  gradient_accumulation_fusion: True
  tp_comm_overlap: False

  # Debug Options
  tp_comm_split_ag: True
  tp_comm_atomic_ag: True
  tp_comm_split_rs: True
  tp_comm_atomic_rs: True
  tp_comm_bulk_wgrad: True
  tp_comm_bulk_dgrad: True

  # Parallelism
  finalize_model_grads_func: null

  # Pipeline Parallel
  pipeline_dtype: null
  grad_scale_func: null
  enable_autocast: False
  autocast_dtype: null
  variable_seq_lengths: False
  num_microbatches_with_partial_activation_checkpoints: null
  overlap_p2p_comm: False
  batch_p2p_comm: True
  batch_p2p_sync: True
  use_ring_exchange_p2p: False
  deallocate_pipeline_outputs: False
  no_sync_func: null
  grad_sync_func: null
  param_sync_func: null

  # CPU Offloading
  cpu_offloading: False
  cpu_offloading_num_layers: 0
  _cpu_offloading_context: null
  cpu_offloading_weights: False
  cpu_offloading_activations: True

  # Timing
  barrier_with_L1_time: True

# training:
use_legacy_models: False
spec: null
micro_batch_size: 2
global_batch_size: 128
rampup_batch_size: [32, 32, 65324160] 
check_for_nan_in_loss_and_grad: True
num_layers_per_virtual_pipeline_stage: null

encoder_num_layers: null
decoder_num_layers: null
rotary_seq_len_interpolation_factor: null
add_position_embedding: False
make_vocab_size_divisible_by: 128
group_query_attention: False


exit_signal_handler: False
exit_duration_in_mins: null
exit_interval: null

untie_embeddings_and_output_weights: True
position_embedding_type: rope
rotary_percent: 0.5
openai_gelu: False
squared_relu: False
swiglu: True
onnx_safe: null
bert_binary_head: True
max_position_embeddings: 4096

transformer_impl: local
use_flash_attn: False
seed: 1234
data_parallel_random_init: False

# Optimizer
optimizer: adam
lr: 2.5e-4
lr_decay_style: cosine
lr_decay_iters: null
lr_decay_samples: 255126953
lr_warmup_fraction: null
lr_warmup_iters: 0
lr_warmup_samples: 81381
lr_warmup_init: 0.0
min_lr: 2.5e-5
weight_decay: 0.1
start_weight_decay: null
end_weight_decay: null
weight_decay_incr_style: constant
clip_grad: 1.0
adam_beta1: 0.9
adam_beta2: 0.95
adam_eps: 1.e-08
sgd_momentum: 0.9
override_opt_param_scheduler: False
use_checkpoint_opt_param_scheduler: False

# checkpointing arguments
save: null
save_interval: 20000
no_save_optim: null
no_save_rng: null
load: null
no_load_optim: null
no_load_rng: null
finetune: False
use_checkpoint_args: False
exit_on_missing_checkpoint: False

# loss arguments
loss_scale: null
initial_loss_scale: 4294967296
min_loss_scale: 1.0
loss_scale_window: 1000 
hysteresis: 2
accumulate_allreduce_grads_in_fp32: False
fp16_lm_cross_entropy: False

# distributed arguments
distributed_backend: nccl
distributed_timeout_minutes: 10
overlap_grad_reduce: False
align_grad_reduce: True
overlap_param_gather: False
align_param_gather: False
scatter_gather_tensors_in_pipeline: True
local_rank: null
lazy_mpu_init: null
empty_unused_memory_level: 0
standalone_embedding_stage: False
use_distributed_optimizer: False
nccl_communicator_config_path: null

train_iters: null
eval_iters: 32
eval_interval: 2000
skip_train: False

adlr_autoresume: False
adlr_autoresume_interval: 1000

# garbage collection
manual_gc: False
manual_gc_interval: 0
manual_gc_eval: True

tp_comm_overlap_cfg: null

#data
data_path: null
split: '99,1,0'
train_data_path: null
valid_data_path: null
test_data_path: null
data_cache_path: null
mock_data: False
vocab_size: null
vocab_file: null
merge_file: null
vocab_extra_ids: 0
seq_length: 4096
encoder_seq_length: null
decoder_seq_length: null
sample_rate: 1.0
mask_prob: 0.15
short_seq_prob: 0.1
num_workers: 2
tokenizer_type: GPTSentencePieceTokenizer
tokenizer_model: null
reset_position_ids: False
reset_attention_mask: False
eod_mask_loss: False
train_samples: 268554688
dataloader_type: null

#profile:
profile: False
profile_ranks: [0]
profile_step_end: 12
profile_step_start: 10

#logging:
log_params_norm: True
log_num_zeros_in_grad: True
log_throughput: False
log_progress: False
timing_log_level: 0
timing_log_option: minmax
tensorboard_log_interval: 1
tensorboard_queue_size: 1000
log_timers_to_tensorboard: False
log_validation_ppl_to_tensorboard: False
log_memory_to_tensorboard: False
log_world_size_to_tensorboard: False
log_loss_scale_to_tensorboard: True
wandb_project: ''
wandb_exp_name: ''
wandb_save_dir: ''
enable_one_logger: True
one_logger_project: megatron-lm
one_logger_run_name: null
log_interval: 100
tensorboard_dir: null


================================================
FILE: examples/gpt3/train_gpt3_175b_distributed.sh
================================================
#!/bin/bash

# Runs the "175B" parameter model

export CUDA_DEVICE_MAX_CONNECTIONS=1

GPUS_PER_NODE=8
# Change for multinode config
MASTER_ADDR=localhost
MASTER_PORT=6000
NUM_NODES=1
NODE_RANK=0
WORLD_SIZE=$(($GPUS_PER_NODE*$NUM_NODES))

CHECKPOINT_PATH=$1 #<Specify path>
TENSORBOARD_LOGS_PATH=$2 #<Specify path>
VOCAB_FILE=$3 #<Specify path to file>/gpt2-vocab.json
MERGE_FILE=$4 #<Specify path to file>/gpt2-merges.txt
DATA_PATH=$5 #<Specify path and file prefix>_text_document

DISTRIBUTED_ARGS=(
    --nproc_per_node $GPUS_PER_NODE 
    --nnodes $NUM_NODES 
    --master_addr $MASTER_ADDR 
    --master_port $MASTER_PORT
)

GPT_MODEL_ARGS=(
    --num-layers 96 
    --hidden-size 12288 
    --num-attention-heads 96 
    --seq-length 2048 
    --max-position-embeddings 2048 
    --attention-backend auto # Can use (flash/fused/unfused/local)
)

TRAINING_ARGS=(
    --micro-batch-size 1 
    --global-batch-size 1536 
    --rampup-batch-size 16 16 5859375 
    --train-iters 500000 
    --weight-decay 0.1 
    --adam-beta1 0.9 
    --adam-beta2 0.95 
    --init-method-std 0.006 
    --clip-grad 1.0 
    --fp16
    --lr 6.0e-5 
    --lr-decay-style cosine 
    --min-lr 6.0e-6
    --lr-warmup-fraction .001 
    --lr-decay-iters 430000 
)

MODEL_PARALLEL_ARGS=(
	--tensor-model-parallel-size 8 
	--pipeline-model-parallel-size 16 
)

DATA_ARGS=(
    --data-path $DATA_PATH 
    --vocab-file $VOCAB_FILE 
    --merge-file $MERGE_FILE 
    --split 949,50,1
)

EVAL_AND_LOGGING_ARGS=(
    --log-interval 100
    --save-interval 10000 
    --eval-interval 1000 
    --save $CHECKPOINT_PATH 
    --load $CHECKPOINT_PATH 
    --eval-iters 10
    --tensorboard-dir $TENSORBOARD_LOGS_PATH 
)

torchrun ${DISTRIBUTED_ARGS[@]} pretrain_gpt.py \
    ${GPT_MODEL_ARGS[@]} \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${DATA_ARGS[@]} \
    ${EVAL_AND_LOGGING_ARGS[@]}


================================================
FILE: examples/gptoss/01_convert_from_hf.py
================================================
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

"""Convert HuggingFace checkpoints to Megatron format."""

import os
import argparse

from megatron.bridge import AutoBridge

def _parse_args():
    parser = argparse.ArgumentParser(description="Convert HF LLMs to Megatron format")
    parser.add_argument(
        "--hf-model",
        type=str,
        required=True,
        help="HuggingFace model identifier or path",
    )
    parser.add_argument(
        "--save-path",
        type=str,
        default=None,
        help="Path to save the converted Megatron checkpoint",
    )
    parser.add_argument('--local-rank', '--local_rank', type=int, default=0)
    return parser.parse_args()

if __name__ == "__main__":
    args = _parse_args()
    HF_MODEL = args.hf_model
    SAVE_PATH = args.save_path
    WORLD_SIZE = int(os.environ.get("WORLD_SIZE", 1))

    if SAVE_PATH is None:
        SAVE_PATH = f"./megatron_checkpoints/{HF_MODEL.replace('/', '_')}"
    
    print(f"Converting {HF_MODEL} to Megatron format...")
    print(f"Save path: {SAVE_PATH}")
    
    bridge = AutoBridge.from_hf_pretrained(HF_MODEL, trust_remote_code=True)
    provider = bridge.to_megatron_provider()
    # Update these configs as needed
    provider.expert_tensor_parallel_size = 1
    provider.tensor_model_parallel_size = 1
    provider.pipeline_model_parallel_size = WORLD_SIZE
    provider.finalize()
    
    model = provider.provide_distributed_model(wrap_with_ddp=False)
    
    bridge.save_megatron_model(
        model,
        SAVE_PATH,
        hf_tokenizer_path=HF_MODEL
    )
    
    print(f"Saved Megatron checkpoint to {SAVE_PATH}")


================================================
FILE: examples/gptoss/02_train.sh
================================================
#!/bin/bash

export CUDA_DEVICE_MAX_CONNECTIONS=${CUDA_DEVICE_MAX_CONNECTIONS:-1}


# Setup arguments with defaults
CHECKPOINT_PATH="NO_VALUE_PROVIDED"
TENSORBOARD_LOGS_PATH="./tensorboard_logs/"
TOKENIZER_ARG="MOCK"
DATA_ARG="MOCK"
DISTRIBUTED_CONFIG_FILE=""

# Parse command line arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        --checkpoint-path)
            CHECKPOINT_PATH="$2"
            shift 2
            ;;
        --tensorboard-logs-path)
            TENSORBOARD_LOGS_PATH="$2"
            shift 2
            ;;
        --tokenizer)
            TOKENIZER_ARG="$2"
            shift 2
            ;;
        --data)
            DATA_ARG="$2"
            shift 2
            ;;
        --distributed-config-file)
            DISTRIBUTED_CONFIG_FILE="$2"
            shift 2
            ;;
        -h|--help)
            echo "Usage: $0 [OPTIONS]"
            echo "Options:"
            echo "  --checkpoint-path PATH          Path to Megatron checkpoint"
            echo "  --tensorboard-logs-path PATH    Path to TensorBoard logs"
            echo "  --tokenizer PATH|MOCK           Path to tokenizer model, or 'MOCK' (default: MOCK)"
            echo "  --data PATH|MOCK                Data prefix, or 'MOCK' (default: MOCK)"
            echo "  --distributed-config-file FILE       Path to distributed training config file"
            echo "  -h, --help                      Show this help message"
            exit 0
            ;;
        *)
            echo "Unknown option: $1"
            echo "Use --help for usage information"
            exit 1
            ;;
    esac
done

# Check if checkpoint path exists
if [ ! -d "$CHECKPOINT_PATH" ]; then
    echo "Error: Checkpoint path does not exist: $CHECKPOINT_PATH"
    exit 1
fi
echo "Checkpoint path exists: $CHECKPOINT_PATH"

# Check if tensorboard logs path exists
if [ ! -d "$TENSORBOARD_LOGS_PATH" ]; then
    echo "Warning: TensorBoard logs path does not exist. Creating: $TENSORBOARD_LOGS_PATH"
    mkdir -p "$TENSORBOARD_LOGS_PATH"
fi
echo "TensorBoard logs path exists: $TENSORBOARD_LOGS_PATH"

# NOTE: by default we use 8 GPUs
# These values will be over-written below with environmental variables
GPUS_PER_NODE=8
NUM_NODES=1
MASTER_ADDR="localhost"
MASTER_PORT=6000
NODE_RANK=0

# Load distributed config from file if provided
if [ -n "$DISTRIBUTED_CONFIG_FILE" ]; then
    if [ ! -f "$DISTRIBUTED_CONFIG_FILE" ]; then
        echo "Warning: Distributed config file does not exist: $DISTRIBUTED_CONFIG_FILE"
        echo "Continuing with default distributed training settings."
    else
        echo "Loading distributed config from: $DISTRIBUTED_CONFIG_FILE"
        source "$DISTRIBUTED_CONFIG_FILE"
    fi
fi

# Override with environment variables if set
GPUS_PER_NODE=${GPUS_PER_NODE:-8}
NUM_NODES=${NUM_NODES:-1}
MASTER_ADDR=${MASTER_ADDR:-localhost}
MASTER_PORT=${MASTER_PORT:-6000}
NODE_RANK=${NODE_RANK:-0}
WORLD_SIZE=$(($GPUS_PER_NODE*$NUM_NODES))

# Path to the pretrain_gpt.py script, assuming this script is run from the root of the Megatron-LM repository
PRETRAIN_SCRIPT_PATH="pretrain_gpt.py"

# Data cache path (useful for both mock and real data)
DATA_CACHE_PATH="${PWD}/benchmark_cache_gpt_oss_20b"
mkdir -p "$DATA_CACHE_PATH"

DISTRIBUTED_ARGS=(
    --nproc_per_node $GPUS_PER_NODE
    --nnodes $NUM_NODES
    --master_addr $MASTER_ADDR
    --master_port $MASTER_PORT
    --node_rank $NODE_RANK
)

# NOTE: we only set pipeline parallelism to be the number of GPUs
# Adjust each value based on your setup.
TP_SIZE=1     
EP_SIZE=1     
PP_SIZE=${WORLD_SIZE}     
MICRO_BATCH_SIZE=1
GLOBAL_BATCH_SIZE=128
NUM_LAYERS=12
DTYPE="fp8"
SEQ_LENGTH=8192
MAX_POSITION_EMBEDDINGS=8192
TRAIN_SAMPLES=1953125000
LR_DECAY_SAMPLES=1949218748

MODEL_ARGS=(
    --no-masked-softmax-fusion
    --transformer-impl transformer_engine
    --disable-bias-linear
    --untie-embeddings-and-output-weights
    --no-rope-fusion
    --normalization RMSNorm
    --num-layers ${NUM_LAYERS}
    --hidden-size 512
    --ffn-hidden-size 2048
    --num-attention-heads 64
    --group-query-attention
    --num-query-groups 8
    --seq-length ${SEQ_LENGTH}
    --max-position-embeddings ${MAX_POSITION_EMBEDDINGS}
    --use-mcore-models
    --rotary-percent 1.0
    --rope-type rope
    --position-embedding-type rope
    --rotary-base 10000
    --no-bias-gelu-fusion
    --export-force-local-attention
    --no-bias-dropout-fusion
    --quick-geglu
    --glu-linear-offset 1.0
    --softmax-type learnable
    --window-attn-skip-freq 2
    --activation-func-clamp-value 7.0
    --window-size 127,0
    --enable-gpt-oss
)

MOE_ARGS=(
    --num-experts 4
    --moe-router-topk 2
    --moe-router-load-balancing-type aux_loss
    --moe-aux-loss-coeff 1e-3
    --moe-grouped-gemm
    --moe-token-dispatcher-type alltoall
    --overlap-param-gather
    --overlap-grad-reduce
    --moe-ffn-hidden-size 2048
    --moe-router-dtype fp32
    --moe-z-loss-coeff 1e-3
    --moe-permute-fusion
)

DATA_ARGS_LIST=()
if [[ "$TOKENIZER_ARG" == "MOCK" ]] || [[ "$DATA_ARG" == "MOCK" ]] || [[ -z "$TOKENIZER_ARG" ]]; then
    DATA_ARGS_LIST+=(
        "--mock-data"
        "--tokenizer-type NullTokenizer"
        "--vocab-size 128256" 
        "--data-cache-path ${DATA_CACHE_PATH}"
        "--tiktoken-pattern v2" 
        "--split '99,1,0'"
        "--no-create-attention-mask-in-dataloader"
        "--no-mmap-bin-files"
        "--num-workers 1"
    )
else
    # Settings for real data
    DATA_ARGS_LIST+=(
        "--data-path $DATA_ARG"
        "--tokenizer-type HuggingFaceTokenizer" 
        "--tokenizer-model $TOKENIZER_ARG"
        "--data-cache-path ${DATA_CACHE_PATH}"
        "--split '99,1,0'"
        "--no-create-attention-mask-in-dataloader"
        "--no-mmap-bin-files"
        "--num-workers 1"
        # Note: --vocab-size might be inferred by HuggingFaceTokenizer or might need to be explicit.
        "--vocab-size 128256"
    )
fi

TRAINING_ARGS=(
    --micro-batch-size ${MICRO_BATCH_SIZE}
    --global-batch-size ${GLOBAL_BATCH_SIZE}
    --lr 1.0e-5
    --train-samples ${TRAIN_SAMPLES}
    --lr-decay-samples ${LR_DECAY_SAMPLES}
    --lr-decay-style cosine
    --min-lr 1.0e-6
    --weight-decay 0.1
    --lr-warmup-fraction 0.05
    --clip-grad 1.0
    --bf16
    --use-flash-attn
    --attention-softmax-in-fp32
    --accumulate-allreduce-grads-in-fp32
    --disable-bf16-reduced-precision-matmul
    --recompute-activations
)

MODEL_PARALLEL_ARGS=(
    --tensor-model-parallel-size ${TP_SIZE}
    --pipeline-model-parallel-size ${PP_SIZE}
    --expert-model-parallel-size ${EP_SIZE}
    --sequence-parallel
    --context-parallel-size 1
    --use-distributed-optimizer
    --fp8-format hybrid
    --fp8-param-gather
    --fp8-amax-compute-algo max
    --fp8-amax-history-len 1024
)
    
LOGGING_ARGS=(
    --log-interval 1
    --save-interval 10000
    --eval-interval 50000000
    --eval-iters 0
    --save $CHECKPOINT_PATH
    --tensorboard-dir "${CHECKPOINT_PATH}/tensorboard"
    --moe-per-layer-logging
    --no-load-optim
    --no-load-rng
    --log-throughput
)

# Ensure pretrain_gpt.py is found
if [ ! -f "$PRETRAIN_SCRIPT_PATH" ]; then
    echo "Error: pretrain_gpt.py not found at $PRETRAIN_SCRIPT_PATH"
    echo "Please ensure you are running this script from the root of the Megatron-LM repository, and pretrain_gpt.py is present."
    exit 1
fi

python -m torch.distributed.run ${DISTRIBUTED_ARGS[@]} ${PRETRAIN_SCRIPT_PATH} \
    ${MODEL_ARGS[@]} \
    ${MOE_ARGS[@]} \
    ${DATA_ARGS_LIST[@]} \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${LOGGING_ARGS[@]}


================================================
FILE: examples/gptoss/03_convert_to_hf.py
================================================
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

"""Convert HuggingFace checkpoints to Megatron format."""

import os
import argparse

from megatron.bridge import AutoBridge

def _parse_args():
    parser = argparse.ArgumentParser(description="Convert Megatron LLMs to HuggingFace format")
    parser.add_argument(
        "--hf-model",
        type=str,
        required=True,
        help="HuggingFace model identifier or path to load config from",
    )
    parser.add_argument(
        "--megatron-model",
        type=str,
        required=True,
        help="Megatron model identifier or path",
    )
    parser.add_argument(
        "--save-path",
        type=str,
        default=None,
        help="Path to save the converted HuggingFace checkpoint",
    )
    parser.add_argument('--local-rank', '--local_rank', type=int, default=0)
    return parser.parse_args()

if __name__ == "__main__":
    args = _parse_args()
    HF_MODEL = args.hf_model
    MEGATRON_MODEL = args.megatron_model
    SAVE_PATH = args.save_path
    WORLD_SIZE = int(os.environ.get("WORLD_SIZE", 1))

    if SAVE_PATH is None:
        SAVE_PATH = f"./huggingface_checkpoints/{MEGATRON_MODEL.replace('/', '_')}"
    
    print(f"Converting {MEGATRON_MODEL} to HuggingFace {HF_MODEL} format...")
    print(f"Save path: {SAVE_PATH}")
    
    bridge = AutoBridge.from_hf_pretrained(HF_MODEL, trust_remote_code=True)
    bridge.export_ckpt(
        MEGATRON_MODEL,
        SAVE_PATH,
    )
    
    print(f"Saved HuggingFace checkpoint to {SAVE_PATH}")


================================================
FILE: examples/gptoss/README.md
================================================
# GPT-OSS Training Tutorial

## Step 0: Install Dependencies

### Using Megatron Bridge

[Megatron-Bridge](https://github.com/NVIDIA-NeMo/Megatron-Bridge)

Megatron Bridge provides a quick and convenient way to convert HuggingFace checkpoints to the Megatron format used by Megatron-LM. Follow the instructions in the [Megatron-Bridge Installation](https://github.com/NVIDIA-NeMo/Megatron-Bridge/blob/main/README.md#-installation) to run the nemo docker container and convert checkpoints (via mounted volumes - make sure that the huggingface cache location AND the megatron checkpoint locations are properly mounted, otherwise you may not be saving the converted model to disk correctly).

Below is an example of how to use Megatron-Bridge inside the pytorch container to convert a HuggingFace model checkpoint to Megatron format.

Reference: [Megatron-Bridge Dockerfile](https://github.com/NVIDIA-NeMo/Megatron-Bridge/blob/main/docker/Dockerfile.ci)

Inside the [pytorch container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch) run the following commands to install Megatron-Bridge:
```bash
cd /opt
git clone --recursive https://github.com/NVIDIA-NeMo/Megatron-Bridge.git
cd Megatron-Bridge

# Make sure submodules are initialized (for 3rdparty/Megatron-LM)
git submodule update --init --recursive

export PATH="/root/.local/bin:$PATH"
export UV_PROJECT_ENVIRONMENT=/opt/venv
export VIRTUAL_ENV=/opt/venv
export PATH="$UV_PROJECT_ENVIRONMENT/bin:$PATH"
export UV_LINK_MODE=copy
export UV_VERSION="0.7.2"

# Install UV
curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh

# Create virtual environment and build the package
uv venv ${UV_PROJECT_ENVIRONMENT} --system-site-packages

uv sync --locked --only-group build
uv sync --locked --link-mode copy --all-extras --all-groups

uv pip install --no-deps -e .

source ${UV_PROJECT_ENVIRONMENT}/bin/activate
```

### Setup Environment

```bash
export HOST_MEGATRON_LM_DIR="/path/to/your/host/megatron-lm"
git clone https://github.com/NVIDIA/Megatron-LM.git "$HOST_MEGATRON_LM_DIR"
cd "$HOST_MEGATRON_LM_DIR"
```

```bash
export HF_TOKEN={your_hf_token_here}
```

## Step 1: Convert HuggingFace to Megatron (Optional - skip if you already have a Megatron checkpoint)

Set `--nproc-per-node` to be the number of GPUs per node. Set `hf_model_name` to be the Huggingface model e.g. `openai/gpt-oss-20b`

```bash
python3 -m torch.distributed.launch --nproc-per-node=8 examples/gptoss/01_convert_from_hf.py --hf-model openai/gpt-oss-20b
```

## Step 2: Train from Scratch

To train from scratch first follow the steps below to setup the environment appropriately before running the training script in docker. Even though we are running the same container as before, it is better to restart the container to ensure a clean environment and that all environment and docker variables are set correctly. For the following example we used 8x GB300, but you should change the number of GPUs and nodes as needed.

### Setup Environment

```bash
# Change these based on model and directory from previous conversion step
export MODEL_DIR_NAME="openai_gpt-oss_20b"

export HOST_CHECKPOINT_PATH="./megatron_checkpoints/${MODEL_DIR_NAME}"
export HOST_TENSORBOARD_LOGS_PATH="./tensorboard_logs/${MODEL_DIR_NAME}"
```

By default we will use mock data to train the model in the example below. To use your own data, set the following environment variables:

```bash
# Optional: For real data
export HOST_TOKENIZER_MODEL_PATH="/path/to/host/tokenizer.model"
export HOST_DATA_PREFIX="/path/to/host/mydata_prefix"
```

### Setup Training Configurations

Run the following to create a `distributed_config.env` file with the appropriate distributed training configurations. Change the values as needed for your setup. This file will override the default values in `02_train.sh`.

```bash
cat > ./distributed_config.env << 'EOF'
GPUS_PER_NODE=8
NUM_NODES=1
MASTER_ADDR=localhost
MASTER_PORT=6000
NODE_RANK=0
EOF
```

### Run Container with Mounted Volumes

**NOTE:** This container runs the example training script `02_train.sh` located in the `examples/gptoss` directory. By default, we have only set pipeline parallelism to be the number of GPUs. Adjust TP_SIZE, EP_SIZE, PP_SIZE, etc. in `02_train.sh`. You can also adjust modify `--hidden-size`, `--ffn-hidden-size`, `--num-attention-heads`, `NUM_LAYERS`, etc. 

To train using mock data, run the following command:
```bash
PYTORCH_IMAGE="nvcr.io/nvidia/pytorch:25.12-py3"

docker run --rm --gpus all --ipc=host --ulimit memlock=-1 \
  -v "${HOST_MEGATRON_LM_DIR}:/workspace/megatron-lm" \
  -v "${HOST_CHECKPOINT_PATH}:/workspace/checkpoints" \
  -v "${HOST_TENSORBOARD_LOGS_PATH}:/workspace/tensorboard_logs" \
  -v "./distributed_config.env:/workspace/megatron-lm/examples/gptoss/distributed_config.env" \
  --workdir /workspace/megatron-lm \
  $PYTORCH_IMAGE \
  bash examples/gptoss/02_train.sh \
    --checkpoint-path /workspace/checkpoints \
    --tensorboard-logs-path /workspace/tensorboard_logs \
    --distributed-config-file /workspace/megatron-lm/examples/gptoss/distributed_config.env \
  2>&1 | tee "${HOST_TENSORBOARD_LOGS_PATH}/training_mock_$(date +'%y-%m-%d_%H-%M-%S').log"
```
**Note:** If you run into issues generating mock data one solution might be to reduce the number of GPUs to 1 and try to generate the data again.

If using real data with with the `HOST_TOKENIZER_MODEL_PATH` and `HOST_DATA_PREFIX` environment variables set, run the following command instead:

```bash
PYTORCH_IMAGE="nvcr.io/nvidia/pytorch:25.12-py3"

docker run --rm --gpus all --ipc=host --ulimit memlock=-1 \
  -v "${HOST_MEGATRON_LM_DIR}:/workspace/megatron-lm" \
  -v "${HOST_CHECKPOINT_PATH}:/workspace/checkpoints" \
  -v "${HOST_TENSORBOARD_LOGS_PATH}:/workspace/tensorboard_logs" \
  -v "${HOST_TOKENIZER_MODEL_PATH}:/workspace/tokenizer_model" \
  -v "$(dirname "${HOST_DATA_PREFIX}"):/workspace/data_dir" \
  -v "./distributed_config.env:/workspace/megatron-lm/examples/gptoss/distributed_config.env" \
  --workdir /workspace/megatron-lm \
  $PYTORCH_IMAGE \
  bash examples/gptoss/02_train.sh \
    --checkpoint-path /workspace/checkpoints \
    --tensorboard-logs-path /workspace/tensorboard_logs \
    --tokenizer /workspace/tokenizer_model \
    --data "/workspace/data_dir/$(basename "${HOST_DATA_PREFIX}")" \
    --distributed-config-file /workspace/megatron-lm/examples/gptoss/distributed_config.env \
  2>&1 | tee "${HOST_TENSORBOARD_LOGS_PATH}/training_custom_$(date +'%y-%m-%d_%H-%M-%S').log"
```

## Step 3: Convert Megatron to HuggingFace

Just run the following command to change from the megatron checkpoint from training to the huggingface format to share with others (make sure you have the same virtual environment setup as in Step 0):

```bash
python3 -m torch.distributed.launch --nproc-per-node=8 examples/gptoss/03_convert_to_hf.py --hf-model openai/gpt-oss-20b --megatron-model ./megatron_checkpoints/openai_gpt-oss_20b
```

================================================
FILE: examples/inference/README.md
================================================
### Megatron Core Inference Documentation
This guide provides an example for Megatron Core for running model inference. 

### Contents
- [Megatron Core Inference Documentation](#megatron-core-inference-documentation)
- [Contents](#contents)
  - [1. Quick Start](#1-quick-start)
    - [1.1 Understanding The Code](#11-understanding-the-code)
    - [1.2 Running The Code](#12-running-the-code)
  - [2. Flow of Control In MCore Backend](#2-flow-of-control-in-mcore-backend)
  - [3. Customizing The Inference Pipeline](#3-customizing-the-inference-pipeline)
    - [3.1. Create Your Own Inference Backend](#31-create-your-own-inference-backend)
    - [3.2. Create Your Own Text Generation Controller](#32-create-your-own-text-generation-controller)
    - [3.3. Support Other Models](#33-support-other-models)
    - [3.3. Modify Inference Parameters](#33-modify-inference-parameters)
  - [4. Future work](#4-future-work)

<br>

#### 1. Quickstart
This example runs statically-batched inference on a model trained using Megatron Core. The entrypoint is [gpt_static_inference.py](./gpt/gpt_static_inference.py). A similar workflow can be adapted for [gpt_dynamic_inference.py](./gpt/gpt_dynamic_inference.py).

<br>

##### 1.1 Code Walkthrough 
***STEP 1 - Initialize model parallel and other default arguments***
The micro batch size defaults to 1. It is not used in tensor-parallelism only, and for pipeline-parallel models it is calculated at runtime. 
```python
# Initialize Megatron model using the same model provider from training.
    initialize_megatron(
        args_defaults={'no_load_rng': True, 'no_load_optim': True, 'micro_batch_size': 1}
    )
```

***STEP 2 - Load the model using the model_provider_function***
The model provider function supports both MCore and Legacy models. 

```python
    # Load the model checkpoint
    model = get_model(model_provider, wrap_with_ddp=False)
    load_checkpoint(model, None, None)
    model.eval()
    model = model[0]
```

***STEP 3 - Choose an engine***
Text generation requires an inference engine, which includes a scheduler. The default engine is the [Megatron Core engine](../../megatron/core/inference/engine/mcore_engine.py) with a [text generation controller](../../megatron/core/inference/text_generation_controllers/text_generation_controller.py). TRTLLMEngine will be supported in the future.
```python
    # Create an inference wrapper to setup the model.
    inference_wrapped_model = GPTInferenceWrapper(model, args)
    
    # Define a sampling loop.
    text_generation_controller = TextGenerationController(
        inference_wrapped_model=inference_wrapped_model, 
        tokenizer=tokenizer
    )
    
    # Create a static or dynamic inference engine.
    inference_engine = StaticInferenceEngine(
        text_generation_controller=text_generation_controller, 
        max_batch_size=args.max_batch_size
)
```

***STEP 4 - Run text generation***
The [SamplingParams](../../megatron/core/inference/sampling_params.py) class uses suggested defaults. Customize this to change top_p, top_k, number of tokens to generate, etc. The result is returned as a list of [InferenceRequests](../../megatron/core/inference/inference_request.py).
```python
    results: List[InferenceRequest] = inference_engine.generate(
        prompts=args.prompts, sampling_params=sampling_params
    )
    
    if torch.distributed.get_rank() == 0:
        for idx, result in enumerate(results):
            print(f' ------------- RESULT FOR PROMPT {idx} --------------- ')
            result = {
                'id': result.request_id,
                'input_prompt': result.prompt, 
                'generated_text': result.generated_text,
                'generated_tokens' : result.generated_tokens
                }
            print(result)
```

<br>

##### 1.2 Running The Code
An example Slurm script is shown below. Set the tokenizer paths, inference params, and other settings appropriately. 

For a recap on sampling parameters, refer to [this blog](https://ivibudh.medium.com/a-guide-to-controlling-llm-model-output-exploring-top-k-top-p-and-temperature-parameters-ed6a31313910).

```
# Slurm cluster settings 
ACCOUNT=<account>
MLM_PATH=/path/to/megatron-lm
GPT_CKPT=/path/to/gpt/ckpt
VOCAB_MERGE_FILE_PATH=/path/to/vocab/and/merge/file
CONTAINER_IMAGE=nvcr.io/ea-bignlp/ga-participants/nemofw-training:23.11

srun --account $ACCOUNT \
--job-name=$ACCOUNT:inference \
--partition=batch \
--time=01:00:00 \
--container-image $CONTAINER_IMAGE \
--container-mounts $MLM_PATH:/workspace/megatron-lm/,$GPT_CKPT:/workspace/mcore_gpt_ckpt,$VOCAB_MERGE_FILE_PATH:/workspace/tokenizer \
--no-container-mount-home \
--pty /bin/bash \

# Inside the container run the following. 

cd megatron-lm/
export CUDA_DEVICE_MAX_CONNECTIONS=1

TOKENIZER_ARGS=(
    --vocab-file /workspace/tokenizer/gpt2-vocab.json
    --merge-file /workspace/tokenizer/gpt2-merges.txt
    --tokenizer-type GPT2BPETokenizer
)

MODEL_ARGS=(
    --use-checkpoint-args
    --use-mcore-models
    --load /workspace/mcore_gpt_ckpt
)

INFERENCE_SPECIFIC_ARGS=(
    --attention-dropout 0.0
    --hidden-dropout 0.0
    --num-tokens-to-generate 20
    --max-batch-size 4
)

torchrun --nproc-per-node=4 examples/inference/gpt/gpt_static_inference.py \
    ${TOKENIZER_ARGS[@]} \
    ${MODEL_ARGS[@]} \
    ${INFERENCE_SPECIFIC_ARGS[@]} \
    --prompts "prompt one " "sample prompt two" "sample prompt 3"

NOTE: Other parameters which can be customized for inference:
--temperature (Sampling temperature)
--top_k (top_k sampling)
--top_p (top_p sampling)
--num-tokens-to-generate (Number of tokens to generate for each prompt)
--inference-batch-times-seqlen-threshold (During inference, if batch-size times sequence-length is smaller than this threshold then we will not use microbatched pipelining.')
--use-dist-ckpt (If using dist checkpoint format for the model)
--use-legacy-models (If using legacy models instead of MCore models)

```


<br>


#### 2. Control Flow in the MCore Backend
An example of inference with static batching is provided in [gpt_static_inference.py](./gpt/gpt_static_inference.py).
* [mcore_engine](../../megatron/core/inference/engines/mcore_engine.py) **generate()** function is called with the input prompts.
* The `Scheduler` in the engine will add these prompts to the [active requests] pool (../../megatron/core/inference/inference_request.py) until max batch size is hit. Remaining requests will be added to the waiting requests pool. 
* The engine will run until all requests (waiting + active) are completed. 
    * The active requests are passed into  **generate_all_output_tokens_static_batch()** of the text generation controller . 
    * This function uses the **prep_model_for_inference()** method of the [model_inference_wrappers](../../megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py) and runs an autoregressive sampling loop
    * In the autoregressive loop, the **get_batch_for_context_window()** method of the inference wrapper is called to slice out the input tokens and masks
    * Input tokens and masks are passed it into the **run_one_forward_step()** method, which calls the model `.forward()` method to get the output logits
    * Output logits are synchronized across all pipeline parallel ranks
    * The text generation controller obtains the log probabilities and samples tokens based on the strategy defined in the sampling parameters.
    * The sampled tokens are then appended to the input prompt tokens for the next iteration 
    * The **update_generation_status()** method of the text generation controller checks which prompts have finished generating or hit a stop condition
    * After the inference loop, the result is detokenized and stored as an attribute of the InferenceRequest. These requests are marked as completed. 
    * The **update_requests_pool()** method of the scheduler moves completed requests into the completed request pool and waiting requests into the active request pool

<br>

#### 3. Customizing The Inference Pipeline

The inference pipeline supports three levels of customization:

* **Inference engine** - The MCore Engine supports static and dynamic batching. Modify this to add a new backend.
* **Text generation controller** - The main sampling loop. Customize this to support alternative tokenization or implement a new sampling strategy.
* **Inference Wrapped Model** - Change this to support a new model.
* **Modify Inference Parameters** - Change this to update top_p, top_k, number of tokens to be generated, temperature, and other sampling parameters.

<br>

##### 3.1. Create Your Own Inference Backend 
The  [abstract_engine.py](./../../megatron/core/inference/engine/abstract_engine.py) file contains a `generate` method that can be extended to support a new backend. 

```python
class AbstractEngine(ABC):
    @staticmethod
    def generate(self) -> dict:
        """The abstract backend's generate function. 

        To define a new backend, implement this method and return the outputs as a dictionary. 
```

<br>

##### 3.2. Implement a new Sampling Loop 

The [TextGenerationController](../../megatron/core/inference/text_generation_controllers/text_generation_controller.py) contains the main sampling loop and can be modified to support new tokenization, detokenization, or sampling strategies.

``` python
class TextGenerationController:

    def tokenize_prompt(self, prompt: str) -> Tuple[torch.Tensor, torch.Tensor]:
        """Utility to tokenize the input prompts"""

    def sample_from_logits(
        self,
        last_token_logits: torch.Tensor,
        sampling_params: SamplingParams,
        vocab_size: int,
        generation_started : Optional[torch.Tensor] = None,
        top_n_logprobs_dict: Dict[int, List[Dict[str, float]]] = None,
    ) -> torch.Tensor:
        """Samples the logits to generate outputs

        Given the logits of the last token, this function samples according to the parameters defined in sampling_params and returns the sampled tokens. If sampling_params.top_n_logprobs > 0 
        at each step it also updates the top_n_logprobs_dict.
        """

    def update_generation_status(
        self,
        updated_prompts_tokens: torch.Tensor,
        generation_started: torch.Tensor,
        current_context_end_position: int,
        is_generation_done_tensor: torch.Tensor,
        generated_sequence_lengths: torch.Tensor,
    ) -> torch.Tensor:
        """Function to check which prompts have reached an end condition

        We check which prompts have reached an end condition and set the corresponding flags of the is_generation_done_tensor to True . The generated sequence lengths increases as we keep generating, until that prompts hits an eod condition. The generation started status tensor helps us determine which prompts have started generating
        """

    def generate_all_output_tokens_static_batch(
        self, active_requests: OrderedDict[int, InferenceRequest],
    ) -> OrderedDict[int, InferenceRequest]:
        """Utility to generate all the output tokens and probabilities for the prompts .

        This utility generates the output tokens for a static batch. It runs the forward steps till all prompts complete generation, updates the status of these requests to completed, adds the generated result and returns these requests
        """

    def detokenize_generations(self, prompt_tokens_with_generated_tokens: torch.Tensor) -> str:
        """Detokenize the output generations"""
```

<br>

##### 3.3. Support Other Models
Extend [abstract_model_inference_wrapper.py](./../../megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py) to support other models. The abstract model wrapper implements: 
* Forward method which calls the model `forward` method depending on model parallel settings
* Initializes the model and puts it in `.eval()` mode
* Setup for the input parameters (max batch size, max seq length) 

The following methods should be implemented: 
```python
class AbstractModelInferenceWrapper:
    def prep_model_for_inference(self, prompts_tokens: torch.Tensor):
        """A utility function for preparing model for inference

        The function gets called once before the auto regressive inference loop. It puts the model in eval mode , and gets some model and inference data parameters. Extend this to build position ids ,attention mask etc, so that required slices can be extracted during the forward pass
        """

    @abc.abstractclassmethod
    def get_batch_for_context_window(self) -> List:
        """Returns the input data for inference 

        This function gets called iteratively in the inference loop. It can be used to extract relevant input from the prompt tokens, attention mask etc. required for each step in inference.
```

Refer to [gpt_inference_wrapper.py](../../megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py) for an example of implementing this for GPTModel.

<br>

##### 3.3. Modify Inference Parameters
We use  [common inference params](../../megatron/core/inference/sampling_params.py) for text generation. Customize this to change `top_p`, `top_k`, number of tokens to generate etc. Other attributes can be added for the inference loop as shown below.

```
from megatron.core.inference.sampling_params import SamplingParams

c = SamplingParams(temperature=0.5)
c.add_attributes({'min_length':4, 'eod_id':153})
```

<br>

#### 4. Future work
The following features are planned for future releases.
* TRTLLM Engine support
* Continuous batching optimizations
* Speculative decoding

================================================
FILE: examples/inference/gpt/gpt_dynamic_inference.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

# pylint: disable=bad-builtin

import hashlib
import io
import json
import os
import sys
import warnings
from collections import defaultdict
from typing import Dict, List, Optional

import torch
from tqdm import tqdm

sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
)

from examples.inference.gpt.utils import (
    Request,
    build_dynamic_engine_setup_prefix,
    build_requests,
    get_curr_time,
    get_global_peak_memory_stats_bytes,
)
from megatron.core.inference.contexts.dynamic_context import DynamicInferenceContext
from megatron.core.inference.engines import DynamicInferenceEngine, EngineSuspendedError
from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import (
    GPTInferenceWrapper,
)
from megatron.core.inference.sampling_params import SamplingParams
from megatron.core.inference.text_generation_controllers.text_generation_controller import (
    TextGenerationController,
)
from megatron.core.tokenizers.utils.build_tokenizer import build_tokenizer
from megatron.inference.utils import (
    add_inference_args,
    get_inference_config_from_model_and_args,
    get_model_for_inference,
)

sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
)
import logging

import megatron
from megatron.core.utils import configure_nvtx_profiling
from megatron.training import get_args, get_tokenizer, initialize_megatron

torch.serialization.add_safe_globals([io.BytesIO])
torch.serialization.add_safe_globals([megatron.core.rerun_state_machine.RerunState])
torch.serialization.add_safe_globals([megatron.core.rerun_state_machine.RerunDiagnostic])


def run_inference(
    requests: List[Request],
    engine: DynamicInferenceEngine,
    sampling_params: Optional[SamplingParams] = None,
) -> List[Dict[str, float]]:
    """Add requests to engine and generate tokens.

    Args:
        requests (List[Request]): Requests that are to be added and processed.
        engine (DynamicInferenceEngine): Inference engine that manages generating tokens.
        sampling_params (SamplingParams): Deprecated as of megatron-core 0.16.

    Return:
        A dictionary of step times with `prefill` and `decode` keys.
    """

    if sampling_params is not None and torch.distributed.get_rank() == 0:
        warnings.warn(
            "The `sampling_params` argument is deprecated. "
            "Sampling parameters are specified per request.",
            DeprecationWarning,
        )

    args = get_args()

    # Parse batch boundaries for batch-drain mode.
    batch_ranges = None
    if args.drain_between_batches and args.batch_boundaries:
        boundaries = [int(x) for x in args.batch_boundaries.split(",")]
        num_requests_total = len(requests)
        batch_ranges = []
        for i, start in enumerate(boundaries):
            end = boundaries[i + 1] if i + 1 < len(boundaries) else num_requests_total
            batch_ranges.append((start, end))

    # Initialize request arrival times.
    base_arrival_time = get_curr_time()
    for request in requests:
        request.time_arrival = request.time_offset + base_arrival_time

    # Add and process requests.
    num_requests_total = len(requests)
    num_requests_added = 0
    num_requests_finished = 0
    step_times = {"prefill": [], "decode": []}
    add_times = []
    output_times = []
    tbar = tqdm(total=num_requests_total)
    total_output_tokens = 0
    attempted_step_count = 0
    if args.cuda_graph_impl == "local":
        cuda_graph_request_count_map = {}
    else:
        cuda_graph_request_count_map = None

    def _add_request():
        """Add request to engine.

        *Note: Using `prompt_text` instead of `prompt_tokens` for fair comparison.
        """
        nonlocal num_requests_added
        _request = requests[num_requests_added]
        engine.add_request(num_requests_added, _request.prompt_text, _request.sampling_params)
        _request.time_start = get_curr_time()
        _request.state = "started"
        num_requests_added += 1
        tbar.update(1)

    def _process_step_result(result):
        """Process a single engine step result, updating bookkeeping state."""
        nonlocal total_output_tokens, num_requests_finished

        is_decode_only = engine.is_decode_only

        # Record cuda_graph_request_count.
        cuda_graph_request_count = result["cuda_graph_request_count"]
        if args.cuda_graph_impl == "local" and cuda_graph_request_count is not None:
            cuda_graph_request_count_map[cuda_graph_request_count] = (
                cuda_graph_request_count_map.get(cuda_graph_request_count, 0) + 1
            )

        # Update requests.
        active_request_ids = result["active_request_ids"]
        finished_request_records = result["finished_request_records"]
        step_time = result["step_time"]
        if len(active_request_ids) > 0 or len(finished_request_records) > 0:
            if is_decode_only:
                step_times["decode"].append(step_time)
            else:
                step_times["prefill"].append(step_time)

            # Append output tokens.
            output_start = get_curr_time()
            for finished_request_record in finished_request_records:

                finished_request = finished_request_record.merge()

                # Update local request object.
                request = requests[finished_request.request_id]
                request.time_end = get_curr_time()
                request.state = "finished"
                request.request_id = finished_request.request_id
                request.events = finished_request.events

                request.ttft = finished_request.ttft

                # Update prompt, in case engine has been suspended and resumed.
                request.prompt_tokens = finished_request.prompt_tokens.tolist()
                request.prompt_text = finished_request.prompt

                # Get output tokens and text.
                request.output_tokens = finished_request.generated_tokens
                request.output_text = finished_request.generated_text
                total_output_tokens += len(request.output_tokens)

                # Log probs.
                if finished_request.sampling_params.return_log_probs:
                    if not finished_request.prompt_log_probs:
                        finished_request.prompt_log_probs = []
                    request.prompt_log_probs = finished_request.prompt_log_probs
                    request.generated_log_probs = finished_request.generated_log_probs
                    request.logprobs = (
                        finished_request.prompt_log_probs + finished_request.generated_log_probs
                    )
                if finished_request.sampling_params.top_n_logprobs > 0:
                    request.generated_top_n_logprobs = finished_request.generated_top_n_logprobs
                if not finished_request.sampling_params.skip_prompt_log_probs:
                    request.prompt_top_n_logprobs = finished_request.prompt_top_n_logprobs
                num_requests_finished += 1
            output_times.append(get_curr_time() - output_start)

    if batch_ranges is not None:
        # Batch-drain mode: add all requests in a batch, drain, then next batch.
        for batch_idx, (batch_start, batch_end) in enumerate(batch_ranges):
            # Add all requests in current batch.
            add_start = get_curr_time()
            while num_requests_added < batch_end:
                _add_request()
            add_times.append(get_curr_time() - add_start)

            # Step until all active requests finish (drain).
            while engine.has_unfinished_requests():
                try:
                    result = engine.step_modern()
                except EngineSuspendedError as e:
                    result = e
                attempted_step_count += 1

                if isinstance(result, EngineSuspendedError):
                    continue

                _process_step_result(result)
    else:
        # Original mode: add requests per step based on arrival time or count.
        while True:
            # Add requests.
            add_start = get_curr_time()
            if args.incoming_requests_per_step is None:
                # Add requests with 'earlier' arrival time.
                while num_requests_added < num_requests_total:
                    if requests[num_requests_added].time_arrival > add_start:
                        break
                    _add_request()
            else:
                # Add deterministic number of requests (generally used for debugging).
                for i in range(
                    min(args.incoming_requests_per_step, num_requests_total - num_requests_added)
                ):
                    _add_request()
            add_times.append(get_curr_time() - add_start)

            # Step inference engine (i.e., generate a token for each active request).
            # Before step, we haven't done the scheduling, so we cannot know the is_decode_only
            try:
                result = engine.step_modern()
            except EngineSuspendedError as e:
                result = e
                pass  # ignore error in order to call 'engine.resume()' below.
            attempted_step_count += 1

            # Test suspending and resuming engine.
            if args.suspend_resume_interval is not None:

                # Suspend.
                if attempted_step_count % args.suspend_resume_interval == 0:
                    print("**** step %d/%d ... suspend." % (engine.context.step_count, attempted_step_count))
                    engine.suspend()

                # Resume, 0+ attempted steps later.
                if (
                    attempted_step_count > 0
                    and (attempted_step_count - args.suspend_resume_interval // 2)
                    % args.suspend_resume_interval
                    == 0
                ):
                    print("**** step %d/%d ... resume." % (engine.context.step_count, attempted_step_count))
                    engine.resume()

            # If engine suspended, continue to next iter.
            if isinstance(result, EngineSuspendedError):
                continue

            _process_step_result(result)

            # Check if all requests are finished.
            if not (engine.has_unfinished_requests() or num_requests_added < num_requests_total):
                break

    # Resume engine (NOOP if not suspended).
    engine.resume()

    return {
        "step_times": step_times,
        "add_times": add_times,
        "output_times": output_times,
        "total_output_tokens": total_output_tokens,
        "cuda_graph_request_count_map": cuda_graph_request_count_map,
    }


@torch.inference_mode()
def main():
    """Run dynamic inference."""
    # Initialize Megatron.
    initialize_megatron(
        extra_args_provider=add_inference_args,
        args_defaults={'no_load_rng': True, 'no_load_optim': True},
    )

    # Start Nsight profiler.
    if os.environ.get("NSIGHT_PREFIX"):
        torch.cuda.cudart().cudaProfilerStart()

    level_str = os.getenv("LOG_LEVEL", "INFO").upper()
    level = getattr(logging, level_str, logging.INFO)
    logging.basicConfig(level=level, force=True)

    configure_nvtx_profiling(True)

    args = get_args()

    # Build tokenizer
    tokenizer = build_tokenizer(args)

    # Reset peak memory stats so functional tests measure this run and not
    # whatever happened earlier during initialization.
    torch.cuda.reset_peak_memory_stats()

    # Sampling params.
    sampling_params = SamplingParams(
        temperature=args.temperature,
        top_k=args.top_k,
        top_p=args.top_p,
        skip_prompt_log_probs=args.skip_prompt_log_probs,
        return_log_probs=args.return_log_probs,
        num_tokens_to_generate=args.num_tokens_to_generate,
        termination_id=args.termination_id if args.termination_id is not None else tokenizer.eod,
        top_n_logprobs=args.top_n_logprobs,
        stop_words=args.stop_words,
    )

    model = get_model_for_inference()

    # Requests, context, controller.
    requests = build_requests(args, tokenizer, sampling_params)
    inference_config = get_inference_config_from_model_and_args(model, args)

    # Calculate max_sequence_length from requests
    max_gen_length = sampling_params.num_tokens_to_generate
    max_context_length = max(len(r.prompt_tokens) for r in requests)
    inference_config.max_sequence_length = max_context_length + max_gen_length
    context = DynamicInferenceContext(model.config, inference_config)
    wrapped_model = GPTInferenceWrapper(model, context)
    controller = TextGenerationController(wrapped_model, tokenizer)

    # Validate all context_length's <= max_tokens.
    if not args.enable_chunked_prefill:
        invalid_prompt_length_map = {}
        for request_idx, request in enumerate(requests):
            if len(request.prompt_tokens) > context.max_tokens:
                invalid_prompt_length_map[request_idx] = len(request.prompt_tokens)
        assert (
            not invalid_prompt_length_map
        ), "request idxs with prompts longer than context.max_tokens: " ", ".join(
            f"{k}({v})" for k, v in invalid_prompt_length_map.items()
        )

    # Inference engine.
    engine = DynamicInferenceEngine(controller, context)

    setup_prefix = build_dynamic_engine_setup_prefix(args, model, context, requests)
    print("~~~")
    print(setup_prefix)
    print("~~~")

    # Run and time test, optionally `args.inference_repeat_n` times.
    throughputs = []
    for _ in range(args.inference_repeat_n):

        # Reset engine.
        engine.reset()

        torch.cuda.reset_peak_memory_stats()

        # Trial.
        t = get_curr_time()
        result = run_inference(requests, engine)
        step_times = result["step_times"]
        add_times = result["add_times"]
        output_times = result["output_times"]
        total_output_tokens = result["total_output_tokens"]
        torch.cuda.synchronize()
        total_time = get_curr_time() - t
        stats = torch.cuda.memory_stats()
        throughput = total_output_tokens / total_time
        throughputs.append(throughput)

    # Validate all requests finished.
    for request in requests:
        assert request.state == "finished", f"request.state == '{request.state}' != 'finished'."

    peak_mem_stats = get_global_peak_memory_stats_bytes()

    # Print unique prompts + outputs.
    if torch.distributed.get_rank() == 0:

        def escape_str(s):
            return s.replace("\n", "\\n")

        print("~~~~ Unique prompts + outputs. ~~~~")

        # Map requests by their prompt.
        unique_prompt_map = defaultdict(list)
        for request_idx, request in enumerate(requests):
            unique_prompt_map[request.prompt_text].append(request_idx)

        # Print unique prompts + outputs.
        text_hashes = []
        for unique_idx, (prompt_text, request_idxs) in enumerate(unique_prompt_map.items()):

            # ---- Prompt summary line ----
            prompt_len = len(requests[request_idxs[0]].prompt_tokens)
            escaped_prompt_text = escape_str(prompt_text)
            print(
                f"\n{unique_idx+1}/{len(unique_prompt_map)}"
                f"[n {len(request_idxs)}, l {prompt_len}] {escaped_prompt_text}"
            )

            # ---- Group all outputs for this prompt ----
            output_map = defaultdict(list)
            for idx in request_idxs:
                req = requests[idx]
                output_map[req.output_text].append(idx)

            # ---- Print each unique output ----
            for output_text, output_request_idxs in output_map.items():
                evicted = False
                for idx in output_request_idxs:
                    for event in requests[idx].events:
                        if event.type.name == "EVICT":
                            evicted = True
                            break
                if output_text is not None:
                    # Use hash of prompt + generated text in case engine was
                    # suspended and resumed, which misaligns boundary between
                    # prompt and generated tokens.
                    o_hash = hashlib.sha256((prompt_text + output_text).encode()).hexdigest()[:6]
                    o_len = len(requests[output_request_idxs[0]].output_tokens)
                    escaped_output_text = escape_str(output_text)
                else:
                    o_hash = "--"
                    o_len = 0
                    escaped_output_text = "--"
                print(
                    f"  >>>> [n {len(output_request_idxs)}, {o_len} tokens, hash {o_hash}"
                    f"{', <evicted>' if evicted else ''}] {escaped_output_text}"
                )
                text_hashes.append(o_hash)

        # Write results to JSON. Primarily used for functional testing.
        if args.output_path:
            json_results = {}

            # Write every 'n' requests, plus the final request.
            for i, req in enumerate(requests):
                if i % args.output_every_n_results == 0 or i == len(requests) - 1:
                    print(f' Attributes of request {i}: {req.__dict__}')
                    result_dict = {
                        "input_prompt": req.prompt_text,
                        "generated_text": req.output_text,
                        "generated_tokens": req.output_tokens,
                        "latency": req.time_end - req.time_start,
                        "ttft": req.ttft,  # Time-to-first-token in seconds
                        "cuda_graph_request_count_map": result["cuda_graph_request_count_map"],
                        "step_count": engine.context.step_count,
                        "top_n_logprobs": getattr(req, 'generated_top_n_logprobs', None),
                        "prompt_top_n_logprobs": getattr(req, 'prompt_top_n_logprobs', None),
                    }
                    if req.sampling_params.return_log_probs:
                        result_dict["prompt_logprobs"] = getattr(req, 'prompt_log_probs', None)
                        result_dict["generated_logprobs"] = getattr(
                            req, 'generated_log_probs', None
                        )
                        result_dict["logprobs"] = getattr(req, 'logprobs', None)
                    if args.output_request_events:
                        result_dict["events"] = [e.serialize() for e in req.events]
                    json_results[req.request_id] = result_dict

            # Track system-level throughput as a test / debug metric
            if args.record_throughput:
                json_results["throughput"] = throughputs
            # Attach peak memory metrics; the functional test only validates these
            # if the fields exist in the golden values.
            json_results.update(peak_mem_stats)
            json_results["lifetime_prefill_token_count"] = engine.context.lifetime_prefill_token_count

            print(f' Saving results to {args.output_path}')
            with open(args.output_path, "w") as fp:
                json.dump(json_results, fp, indent=1)

        # Timing results.
        stats = torch.cuda.memory_stats()
        throughput = total_output_tokens / total_time
        print("~~~")
        peak_alloc_gb = stats["allocated_bytes.all.peak"] / 1024**3
        peak_resvd_gb = stats["reserved_bytes.all.peak"] / 1024**3

        p_times = step_times["prefill"]
        d_times = step_times["decode"]

        p_total = sum(p_times)
        d_total = sum(d_times)

        p_count = len(p_times)
        d_count = len(d_times)

        p_mean = p_total / p_count
        d_mean = d_total / d_count if d_count != 0 else 0.0

        # Commented out for now as the step/add/output times are not calculated correctly.
        # print(
        #     f"{setup_prefix} … "
        #     f"mem {peak_alloc_gb:.1f}/{peak_resvd_gb:.1f} GB … "
        #     f"total time: {step_total:.3f}s … "
        #     f"step time: total {step_total:.3f}s "
        #     f"[ p {p_total:.3f}s, d {d_total:.3f}s ], "
        #     f"mean [ p {p_mean:.3f}s, d {d_mean:.3f}s ], "
        #     f"count [ p {p_count}, d {d_count} ]."
        # )
        capture_str = f"{engine.capture_stats['time']:.2f} sec" if engine.capture_stats else "--"
        print(
            f"{setup_prefix} … " f"throughput: {throughput:.3f} tok/s … ",
            f"total time: {total_time:.3f}s … "
            f"mem {peak_alloc_gb:.1f}/{peak_resvd_gb:.1f} GB … "
            f"steps: {engine.context.step_count:d} … "
            f"capture {capture_str}",
        )
        print("~~~")

    # Stop Nsight profiler.
    if os.environ.get("NSIGHT_PREFIX"):
        torch.cuda.cudart().cudaProfilerStop()


if __name__ == "__main__":
    main()


================================================
FILE: examples/inference/gpt/gpt_dynamic_inference_12b.sh
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

# Run dynamic batching inference on the 12B GPT model.

set -u

# Libraries.
pip install simpy
pip install sentencepiece
pip install tiktoken

# Environment variables.
export CUDA_DEVICE_MAX_CONNECTIONS=1

# Checkpoint.
: ${CHECKPOINT_DIR:?"CHECKPOINT_DIR is not set"}
: ${TOKENIZER_MODEL:?"TOKENIZER_MODEL is not set"}

# Prompts.
: ${NUM_TOKENS_TO_PROMPT="8 32"}
: ${NUM_TOKENS_TO_GENERATE=256}
: ${INCOMING_REQUESTS_DURATION=10.}
: ${INCOMING_REQUESTS_PER_SEC=100.}

# Dynamic context.
: ${BUFFER_SIZE_GB=50.}

# Cuda graphs.
: ${NUM_CUDA_GRAPHS=16}

# Miscellaneous.
: ${USE_COORDINATOR=0}
: ${ENGINE=dynamic}
: ${EXTRA_ARGS=""}
# NSIGHT_PREFIX=/path/to/nsight/profile

# Arguments.
ARGS=" \
    --no-persist-layer-norm \
    --apply-layernorm-1p \
    --no-position-embedding \
    --group-query-attention \
    --num-query-groups 8 \
    --load ${CHECKPOINT_DIR} \
    --use-checkpoint-args \
    --untie-embeddings-and-output-weights \
    --disable-bias-linear \
    --use-rotary-position-embeddings \
    --position-embedding-type rope \
    --rotary-base 1000000 \
    --rotary-percent 1.0 \
    --swiglu \
    --normalization RMSNorm \
    --attention-dropout 0.0 \
    --hidden-dropout 0.0 \
    --exit-duration-in-mins 5740 \
    --tensor-model-parallel-size 1 \
    --pipeline-model-parallel-size 1 \
    --num-layers 40 \
    --hidden-size 5120 \
    --ffn-hidden-size 14336 \
    --num-attention-heads 32 \
    --kv-channels 128 \
    --seq-length 1024 \
    --max-position-embeddings 1024 \
    --micro-batch-size 64 \
    --bf16 \
    --tokenizer-type TikTokenizer \
    --tiktoken-pattern v2 \
    --tokenizer-model ${TOKENIZER_MODEL} \
    --distributed-timeout-minutes 2400 \
    --use-flash-attn \
    --inference-rng-tracker \
    \
    --inference-dynamic-batching \
    --inference-dynamic-batching-buffer-size-gb ${BUFFER_SIZE_GB} \
    \
    ${EXTRA_ARGS} \
"

# Cuda graphs.
if [ "${NUM_CUDA_GRAPHS}" != "0" ]; then
    ARGS+=" \
        --cuda-graph-impl local \
        --inference-dynamic-batching-num-cuda-graphs ${NUM_CUDA_GRAPHS} \
    "
else
    ARGS+=" \
        --cuda-graph-impl none \
    "
fi

# Prompts.
if [[ -v PROMPTS ]]; then
    ARGS+=" \
        --prompts ${PROMPTS} \
        --num-tokens-to-generate ${NUM_TOKENS_TO_GENERATE} \
    "
elif [[ -v PROMPT_FILE ]]; then
    ARGS+=" \
        --prompt-file ${PROMPT_FILE} \
        --num-tokens-to-generate ${NUM_TOKENS_TO_GENERATE} \
    "
else
    ARGS+=" \
        --num-tokens-to-prompt ${NUM_TOKENS_TO_PROMPT} \
        --num-tokens-to-generate ${NUM_TOKENS_TO_GENERATE} \
        --incoming-requests-duration ${INCOMING_REQUESTS_DURATION} \
        --incoming-requests-per-sec ${INCOMING_REQUESTS_PER_SEC} \
    "
fi

# Command.
if [[ "${USE_COORDINATOR}" == "0" ]]; then
    CMD="python -m examples.inference.gpt.gpt_${ENGINE}_inference ${ARGS}"
else
    CMD="python -um examples.inference.gpt.gpt_${ENGINE}_inference_with_coordinator ${ARGS}"
fi

if [[ -v NSIGHT_PREFIX ]]; then
    CMD="nsys profile -s none -t nvtx,cuda --cudabacktrace=all --cuda-graph-trace=node --python-backtrace=cuda --wait all -o ${NSIGHT_PREFIX} --force-overwrite true --capture-range=cudaProfilerApi --capture-range-end=stop ${CMD}"
fi

echo "~~~"
echo "CMD ... ${CMD}."
echo "~~~"
eval ${CMD}


================================================
FILE: examples/inference/gpt/gpt_dynamic_inference_357m.sh
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

# Run dynamic batching inference on the 357M GPT model.

set -u

# Libraries.
pip install simpy
pip install sentencepiece
pip install tiktoken

# Environment variables.
export CUDA_DEVICE_MAX_CONNECTIONS=1

# Checkpoint.
: ${CHECKPOINT_DIR:?"CHECKPOINT_DIR is not set"}
: ${VOCAB_FILE:?"VOCAB_FILE is not set"}
: ${MERGE_FILE:?"MERGE_FILE is not set"}

# Prompts.
: ${NUM_TOKENS_TO_PROMPT="8 32"}
: ${NUM_TOKENS_TO_GENERATE=256}
: ${INCOMING_REQUESTS_DURATION=10.}
: ${INCOMING_REQUESTS_PER_SEC=100.}

# Dynamic context.
: ${BUFFER_SIZE_GB=50.}

# Cuda graphs.
: ${NUM_CUDA_GRAPHS=16}

# Miscellaneous.
: ${USE_COORDINATOR=0}
: ${ENGINE=dynamic}
: ${NPROC_PER_NODE=1}
: ${EXTRA_ARGS=""}
# NSIGHT_PREFIX=/path/to/nsight/profile

# Arguments.
ARGS=" \
    --exit-on-missing-checkpoint \
    --transformer-impl local \
    --load ${CHECKPOINT_DIR} \
    --tokenizer-type GPT2BPETokenizer \
    --vocab-file ${VOCAB_FILE} \
    --merge-file ${MERGE_FILE} \
    --exit-on-missing-checkpoint \
    --max-position-embeddings 2048 \
    --seq-length 2048 \
    --tensor-model-parallel-size 1 \
    --pipeline-model-parallel-size 1 \
    --num-layers 24 \
    --num-attention-heads 16 \
    --hidden-size 1024 \
    --bf16 \
    --micro-batch-size 1 \
    --attention-dropout 0.0 \
    --hidden-dropout 0.0 \
    --seed 42 \
    --use-flash-attn \
    --inference-rng-tracker \
    \
    --inference-dynamic-batching \
    --inference-dynamic-batching-buffer-size-gb ${BUFFER_SIZE_GB} \
    \
    ${EXTRA_ARGS} \
"

# Cuda graphs.
if [ "${NUM_CUDA_GRAPHS}" != "0" ]; then
    ARGS+=" \
        --cuda-graph-impl local \
        --inference-dynamic-batching-num-cuda-graphs ${NUM_CUDA_GRAPHS} \
    "
else
    ARGS+=" \
        --cuda-graph-impl none \
    "
fi

# Prompts.
if [[ -v PROMPTS ]]; then
    ARGS+=" \
        --prompts ${PROMPTS} \
        --num-tokens-to-generate ${NUM_TOKENS_TO_GENERATE} \
    "
elif [[ -v PROMPT_FILE ]]; then
    ARGS+=" \
        --prompt-file ${PROMPT_FILE} \
        --num-tokens-to-generate ${NUM_TOKENS_TO_GENERATE} \
    "
else
    ARGS+=" \
        --num-tokens-to-prompt ${NUM_TOKENS_TO_PROMPT} \
        --num-tokens-to-generate ${NUM_TOKENS_TO_GENERATE} \
        --incoming-requests-duration ${INCOMING_REQUESTS_DURATION} \
        --incoming-requests-per-sec ${INCOMING_REQUESTS_PER_SEC} \
    "
fi

# Command.
if [[ "${USE_COORDINATOR}" == "0" ]]; then
    CMD="python -m examples.inference.gpt.gpt_${ENGINE}_inference ${ARGS}"
else
    CMD="python -m torch.distributed.run --nproc-per-node ${NPROC_PER_NODE} -m examples.inference.gpt.gpt_${ENGINE}_inference_with_coordinator ${ARGS}"
fi

if [[ -v NSIGHT_PREFIX ]]; then
    CMD="nsys profile -s none -t nvtx,cuda --cudabacktrace=all --cuda-graph-trace=node --python-backtrace=cuda --wait all -o ${NSIGHT_PREFIX} --force-overwrite true --capture-range=cudaProfilerApi --capture-range-end=stop ${CMD}"
fi

echo "~~~"
echo "CMD ... ${CMD}."
echo "~~~"
eval ${CMD}


================================================
FILE: examples/inference/gpt/gpt_dynamic_inference_with_coordinator.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import asyncio
import json
import logging
import os
import time
import warnings
from collections import defaultdict
from typing import List

import torch
import torch.distributed as dist

from examples.inference.gpt.utils import Request, build_dynamic_engine_setup_prefix, build_requests
from megatron.core.inference.engines import DynamicInferenceEngine
from megatron.core.inference.engines.dynamic_engine import EngineState
from megatron.core.inference.inference_client import InferenceClient
from megatron.core.inference.inference_request import DynamicInferenceRequestRecord
from megatron.core.inference.sampling_params import SamplingParams
from megatron.inference.utils import (
    add_inference_args,
    get_dynamic_inference_engine,
    get_model_for_inference,
)
from megatron.training import get_args, get_tokenizer, initialize_megatron

# pylint: disable=line-too-long

logging.basicConfig(level=logging.INFO, force=True)


async def suspend_resume_cycle(client, engine, args, futures):
    """Wait for all in-flight requests, then suspend/train/resume."""
    await asyncio.gather(*futures)

    client.pause_engines()
    await engine.wait_until(EngineState.PAUSED)
    client.suspend_engines()
    await engine.wait_until(EngineState.SUSPENDED)
    if args.suspend_timeout > 0:
        await asyncio.sleep(args.suspend_timeout)
    client.resume_engines()
    await engine.wait_until(EngineState.RESUMED)
    client.unpause_engines()
    await engine.wait_until(EngineState.RUNNING)


async def main(
    engine: DynamicInferenceEngine,
    requests: List[Request],
    port: int | None = None,
    sampling_params: SamplingParams | None = None,
):
    if sampling_params is not None:
        warnings.warn(
            "The `sampling_params` argument is deprecated. "
            "Sampling parameters are specified per request.",
            DeprecationWarning,
        )

    # once you call engine.start_listening_to_data_parallel_coordinator,
    # the engine will start accepting requests from the data parallel coordinator.
    # and processing them in an asyncio coroutine.
    # leaving inference_coordinator_port as None will find a free port automatically.
    args = get_args()

    dp_addr = await engine.start_listening_to_data_parallel_coordinator(
        inference_coordinator_port=port,
        launch_inference_coordinator=True,
        coordinator_schedule_output_path=args.coordinator_schedule_output_path,
    )

    # All ranks agree on the number of suspend/resume cycles from args.
    num_suspend_resume_cycles = len(requests) // args.suspend_resume_interval if args.suspend_resume_interval else 0

    # Create client and run example.
    if dist.get_rank() == 0:
        client = InferenceClient(dp_addr, deserialize=True)  # submits requests to the inference coordinator
        client.start()
        base_arrival_time = time.time_ns() / 10**9
        for request in requests:
            request.time_arrival = request.time_offset + base_arrival_time
        futures = []
        num_requests_total = len(requests)
        num_requests_added = 0
        next_suspend_at = args.suspend_resume_interval or 0
        cycles_done = 0

        while True:
            current_time = time.time_ns() / 10**9
            if args.incoming_requests_per_step is None:
                # Only add requests that have arrived at the current time.
                while (
                    num_requests_added < num_requests_total
                    and requests[num_requests_added].time_arrival <= current_time
                ):
                    request = requests[num_requests_added]
                    # These add-request calls will queue up the request on a zmq socket and return
                    # instantaneously. They will return an asyncio future which can be awaited for
                    # request completion.
                    futures.append(client.add_request(request.prompt_text, request.sampling_params))
                    num_requests_added += 1

                    if num_requests_added >= next_suspend_at and cycles_done < num_suspend_resume_cycles:
                        await suspend_resume_cycle(client, engine, args, futures)
                        cycles_done += 1
                        next_suspend_at += args.suspend_resume_interval

            else:
                # Add deterministic number of requests (generally used for debugging).
                for i in range(
                    min(args.incoming_requests_per_step, num_requests_total - num_requests_added)
                ):
                    # Change sampling parameters to force different generation lengths.
                    request = requests[num_requests_added]
                    n = request.sampling_params.num_tokens_to_generate
                    request.sampling_params.num_tokens_to_generate = n + i
                    futures.append(client.add_request(request.prompt_text, request.sampling_params))
                    num_requests_added += 1

                    if num_requests_added >= next_suspend_at and cycles_done < num_suspend_resume_cycles:
                        await suspend_resume_cycle(client, engine, args, futures)
                        cycles_done += 1
                        next_suspend_at += args.suspend_resume_interval

            if num_requests_added == num_requests_total:
                break
            # Relinquish control since there are no more requests to add at the moment. This allows the engine to run.
            await asyncio.sleep(0)

        # While we wait for the requests to complete, the engine runs in the background.
        results: List[DynamicInferenceRequestRecord] = await asyncio.gather(*futures)
    else:
        # Non-rank-0: match the suspend/resume cycles that rank 0 drives.
        for _ in range(num_suspend_resume_cycles):
            await engine.wait_until(EngineState.PAUSED)
            await engine.wait_until(EngineState.SUSPENDED)
            await engine.wait_until(EngineState.RESUMED)
            await engine.wait_until(EngineState.RUNNING)

    if dist.get_rank() == 0:
        # Write results to JSON. Primarily used for functional testing.
        if args.output_path:
            json_results = {}
            throughputs = []

            for req in results:
                result_dict = {
                    "input_prompt": req.prompt,
                    "generated_text": req.generated_text.replace("\n", "\\n"),
                    "generated_tokens": req.generated_tokens,
                    "latency": req.latency,  # InferenceClient populates this field in the returned future.
                }
                if req.sampling_params.return_log_probs:
                    result_dict["logprobs"] = req.prompt_log_probs + req.generated_log_probs
                throughput = len(req.generated_tokens) / req.latency
                throughputs.append(throughput)
                if req.routing_indices is not None:
                    result_dict["routing_indices"] = req.routing_indices.tolist()
                                
                json_results[req.request_id] = result_dict
            throughput_dict = {"throughput": throughputs}
            if args.throughput_check_only:
                json_results = throughput_dict
            with open(args.output_path, "w") as fp:
                json.dump(json_results, fp, indent=4)
        else:
            print("Results:")
            unique_prompt_map = defaultdict(list)
            for req in results:
                unique_prompt_map[req.prompt].append(req)
            for idx, (prompt_text, reqs) in enumerate(unique_prompt_map.items()):
                print(
                    f"%d/%d. prompt '%s' ... [%d] output '%s'."
                    % (
                        idx,
                        len(unique_prompt_map),
                        prompt_text.replace("\n", "\\n"),
                        len(reqs),
                        reqs[0].generated_text.replace("\n", "\\n"),
                    )
                )

        # Pause before stopping: STOP requires PAUSED or SUSPENDED state.
        client.pause_engines()

    await engine.wait_until(EngineState.PAUSED)

    if dist.get_rank() == 0:
        client.stop_engines()

    await engine.wait_until(EngineState.STOPPED)

    if dist.get_rank() == 0:
        client.shutdown_coordinator()
        client.stop()
    logging.info(f"Rank: {dist.get_rank()} stopped their engine instance successfully.")


if __name__ == "__main__":
    # enable inference mode in the very beginning as some fp8 optimizations
    # check for it.
    with torch.inference_mode():
        initialize_megatron(
            extra_args_provider=add_inference_args,
            args_defaults={'no_load_rng': True, 'no_load_optim': True},
        )

        args = get_args()
        tokenizer = get_tokenizer()

        # Sampling params.
        sampling_params = SamplingParams(
            temperature=args.temperature,
            top_k=args.top_k,
            top_p=args.top_p,
            return_log_probs=args.return_log_probs,
            num_tokens_to_generate=args.num_tokens_to_generate,
            termination_id=(
                args.termination_id if args.termination_id is not None else tokenizer.eod
            ),
        )

        model = get_model_for_inference()

        requests = build_requests(args, tokenizer, sampling_params)

        engine = get_dynamic_inference_engine(model=model)

        if dist.get_rank() == 0:
            setup_prefix = build_dynamic_engine_setup_prefix(args, model, engine.context, requests)
            print("~~~")
            print(setup_prefix)
            print("~~~")

        # Start Nsight profiler.
        if os.environ.get("NSIGHT_PREFIX"):
            torch.cuda.cudart().cudaProfilerStart()

        asyncio.run(main(engine, requests, args.inference_coordinator_port))

        # Stop Nsight profiler.
        if os.environ.get("NSIGHT_PREFIX"):
            torch.cuda.cudart().cudaProfilerStop()


================================================
FILE: examples/inference/gpt/gpt_static_inference.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import os
import sys
import time
from argparse import Namespace

import torch

from megatron.core.inference.contexts import StaticInferenceContext
from megatron.core.inference.engines import StaticInferenceEngine
from megatron.core.inference.inference_request import InferenceRequest
from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import (
    GPTInferenceWrapper,
)
from megatron.core.inference.sampling_params import SamplingParams
from megatron.core.inference.text_generation_controllers.text_generation_controller import (
    TextGenerationController,
)
from megatron.core.tokenizers.utils.build_tokenizer import build_tokenizer
from megatron.core.transformer.module import MegatronModule

sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
)

import asyncio
import json
from typing import List

from examples.inference.gpt.utils import build_requests
from megatron.inference.utils import add_inference_args, get_model_for_inference
from megatron.training import get_args, get_tokenizer, print_rank_0
from megatron.training.initialize import initialize_megatron


def add_static_inference_args(parser):
    """Static inference arguments."""

    add_inference_args(parser)

    group = parser.add_argument_group(title='Static inference')
    group.add_argument(
        "--max-batch-size",
        type=int,
        default=None,
        dest="max_batch_size",
        help='Deprecated, use `--inference-max-requests` instead',
    )
    group.add_argument("--stream", action="store_true", default=False, help="Stream output tokens")

    return parser


def get_inference_engine(args: Namespace, model: MegatronModule) -> StaticInferenceEngine:
    """Utility to get the relevant backend for running inference

    This function will automatically choose the TRTLLMBackend when possible, and if not revert to Mcore backend if the user does not specify any backends. TRT LLM Backend is not implmented yet.

    Args:
        args (Namespace): The user arguments parsed from command line
        model (MegatronModule): The megatron model .

    Returns:
        AbstractBackend: The chosen backend
    """
    tokenizer = build_tokenizer(args)
    inference_context = StaticInferenceContext(
        args.inference_max_requests, args.inference_max_seq_length
    )
    inference_wrapped_model = GPTInferenceWrapper(model, inference_context)
    text_generation_controller = TextGenerationController(
        inference_wrapped_model=inference_wrapped_model, tokenizer=tokenizer
    )
    engine_kwargs = {
        "text_generation_controller": text_generation_controller,
        "legacy": args.use_legacy_static_engine,
    }
    if not args.use_legacy_static_engine:
        engine_kwargs["buffer_size_gb"] = args.inference_dynamic_batching_buffer_size_gb
    return StaticInferenceEngine(**engine_kwargs)


async def generate(
    inference_engine: StaticInferenceEngine, sampling_params: SamplingParams, prompts: List[str]
) -> List[InferenceRequest]:
    async def collect_stream(prompt, request_id, stream_generator):
        print(f"Request {request_id}: {prompt}", end="", flush=True)
        prev_idx = 0
        async for output in stream_generator:
            print(output.generated_text[prev_idx:], end="", flush=True)
            prev_idx = len(output.generated_text)
        print()

    request_ids: List[int] = [
        inference_engine.add_request(prompt=prompt, sampling_params=sampling_params, streaming=True)
        for prompt in prompts
    ]
    stream_generators = [
        inference_engine.get_stream_generator(request_id) for request_id in request_ids
    ]

    tasks = [
        asyncio.create_task(collect_stream(prompt, request_id, stream_generator))
        for (prompt, request_id, stream_generator) in zip(prompts, request_ids, stream_generators)
    ]

    await inference_engine.run_engine_async()
    await asyncio.gather(*tasks)

    results: List[InferenceRequest] = [
        inference_engine.scheduler.completed_request_pool[request_id] for request_id in request_ids
    ]

    return results


@torch.inference_mode()
def main():
    """Main program."""

    # Note: The default args passed here can be overwritten by using appropriate params (check arguments.py file)
    # Micro batch size is not needed to be set by user. (It is calculated based on inference-batch-times-seqlen-threshold argument)
    initialize_megatron(
        extra_args_provider=add_static_inference_args,
        args_defaults={
            'no_load_rng': True,
            'no_load_optim': True,
            'micro_batch_size': 1,
            'exit_on_missing_checkpoint': True,
        },
    )

    args = get_args()

    model = get_model_for_inference()

    inference_engine = get_inference_engine(args, model)

    sampling_params = SamplingParams(
        temperature=args.temperature,
        top_k=args.top_k,
        top_p=args.top_p,
        return_log_probs=args.return_log_probs,
        num_tokens_to_generate=args.num_tokens_to_generate,
        top_n_logprobs=args.top_n_logprobs,
    )

    # Build tokenizer
    tokenizer = build_tokenizer(args)

    requests = build_requests(args, tokenizer)
    prompts = [r.prompt_text for r in requests]

    if args.cuda_graph_impl == "local":
        print(f"Running warmup for CUDA graphs...")
        inference_engine.generate(
            prompts=["warmup"], sampling_params=SamplingParams(num_tokens_to_generate=10)
        )
    start_time = time.perf_counter()
    if args.stream:
        results: List[InferenceRequest] = asyncio.run(
            generate(inference_engine, sampling_params, prompts)
        )
    else:
        results: List[InferenceRequest] = inference_engine.generate(
            prompts=prompts, sampling_params=sampling_params
        )
    end_time = time.perf_counter()
    latency = end_time - start_time

    if torch.distributed.get_rank() == 0 and args.output_path:
        results_output = {}
        for idx, result in enumerate(results):
            result_dict = {
                'input_prompt': result.prompt,
                'generated_text': result.generated_text,
                'generated_tokens': result.generated_tokens.tolist(),
                'tpot': result.tpot,
                'latency': latency,
            }
            if sampling_params.top_n_logprobs > 0:
                result_dict['generated_top_n_logprobs'] = result.generated_top_n_logprobs
            if sampling_params.return_log_probs:
                response_logprobs = result.prompt_log_probs + result.generated_log_probs
                result_dict["logprobs"] = response_logprobs
            results_output[result.request_id] = result_dict

        with open(args.output_path, 'w') as f:
            json.dump(results_output, f)

    # Print unique prompts + outputs.
    if torch.distributed.get_rank() == 0:

        print("~~~~ Unique prompts + outputs. ~~~~")

        # Map results by their prompt.
        from collections import defaultdict

        unique_prompt_map = defaultdict(list)
        for result_idx, result in enumerate(results):
            unique_prompt_map[result.prompt].append(result_idx)

        # Print unique prompts + outputs.
        for unique_idx, (prompt_text, result_idxs) in enumerate(unique_prompt_map.items()):
            result_idx = result_idxs[0]
            result = results[result_idx]
            generated_text = result.generated_text.replace("\n", "\\n")
            print(
                f"{unique_idx}/{len(unique_prompt_map)} [{len(result_idxs)}]. {prompt_text} "
                f"... {generated_text}"
            )

    stats = torch.cuda.memory_stats()
    print_rank_0(
        "static | cg %d | %s | reqs %d [ batch %d ] ... mem %.1f/%.1f ... time %.3f."
        % (
            args.cuda_graph_impl == "local",
            (
                f"<user prompts>"
                if args.prompts
                else "<auto prompts> %s, %d, %.1e, %.1e"
                % (
                    "(%s)" % " ".join(map(str, args.num_tokens_to_prompt)),
                    args.num_tokens_to_generate,
                    args.incoming_requests_duration,
                    args.incoming_requests_per_sec,
                )
            ),
            len(requests),
            args.inference_max_requests,
            stats["allocated_bytes.all.peak"] / (1024**3),
            stats["reserved_bytes.all.peak"] / (1024**3),
            latency,
        )
    )
    # Force immediate process exit to bypass torchrun's atexit NCCL teardown when
    # CUDA graphs have captured collectives (see PyTorch issue #115388).  This can
    # sometimes lead to hangs in the atexit handler.
    # We do this only when CUDA graphs are enabled.
    if args.cuda_graph_impl != "none":
        print(f"[main] rank {torch.distributed.get_rank()}: finished", flush=True)
        os._exit(0)
    else:
        torch.distributed.destroy_process_group()


if __name__ == "__main__":
    main()


================================================
FILE: examples/inference/gpt/utils.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import copy
import itertools
import json
import random
import time
from argparse import ArgumentParser, Namespace
from functools import partial
from typing import Any, List, Optional

import torch
from tqdm import tqdm

from megatron.core.inference.contexts import DynamicInferenceContext
from megatron.core.inference.contexts.dynamic_context import get_mem_size_str
from megatron.core.inference.inference_request import DynamicInferenceRequest
from megatron.core.inference.sampling_params import SamplingParams
from megatron.core.transformer.module import MegatronModule
from megatron.training import get_args


def get_default_sampling_params(termination_id: int = None):
    return SamplingParams(
        temperature=1.0,
        top_k=1,
        top_p=0.0,
        return_log_probs=False,
        num_tokens_to_generate=30,
        termination_id=termination_id,
    )


def get_curr_time() -> float:
    """Get synchronized time across ranks."""
    curr_time = torch.cuda.LongTensor([time.time_ns()])
    if torch.distributed.is_initialized():
        torch.distributed.broadcast(curr_time, src=0)
    return curr_time.item() / 10**9


class Request:
    """Class to hold attributes for a single request.

    A request is initialized with its prompt text. As it is added, processed,
    and completed through the inference engine, the request is populated with its
    start time, end time, and output tokens.

    Args:
        prompt_text (str): Prompt text.
        time_offset (float): Artificial time offset for simulating incoming
            requests. This value is later added to the `base_arrival_time` to
            simulate the requests arrival time.
        tokenizer (Any): Tokenizer for tokenizing the prompt.
    """

    def __init__(
        self,
        prompt_text: str,
        time_offset: float,
        tokenizer: Any,
        sampling_params: SamplingParams = None,
    ):
        self.prompt_text = prompt_text
        self.prompt_tokens = tokenizer.tokenize(prompt_text)
        self.output_text = None
        self.output_tokens = []
        self.time_offset = time_offset
        self.time_arrival = None
        self.time_start = None
        self.time_end = None
        self.ttft = None  # Time-to-first-token in seconds
        self.state = "not-started"
        self.sampling_params: SamplingParams = (
            sampling_params
            if sampling_params is not None
            else get_default_sampling_params(tokenizer.eod)
        )
        self.sampling_params = copy.deepcopy(self.sampling_params)

    def __str__(self) -> str:
        return "state '%s'; toffset %.1e; prompt len %d; output len %d; '%s'" % (
            self.state,
            self.time_offset,
            len(self.prompt_tokens),
            len(self.output_tokens),
            self.prompt_text,
        )


def get_time_offsets(
    seed: int | None,
    incoming_requests_per_step: int,
    incoming_requests_per_sec: float,
    num_requests: int,
) -> list[float]:
    """Get example time offsets."""

    # Time offsets to add all requests at once.
    if incoming_requests_per_step is not None or incoming_requests_per_sec <= 0:
        return [-1] * num_requests

    # if num_requests is not None:
    incoming_requests_duration = num_requests / incoming_requests_per_sec
    incoming_requests_duration *= 2  # extra margin, to accomodate time sampling

    random.seed(seed)

    import simpy  # Guard against this import in test case

    # Generate random time offsets.
    def arrival(r):
        while True:
            yield env.timeout(random.expovariate(r))
            time_offsets.append(env.now)

    time_offsets = []
    env = simpy.Environment()
    env.process(arrival(incoming_requests_per_sec))
    env.run(incoming_requests_duration)

    # Ensure at least a single request.
    if len(time_offsets) == 0:
        time_offsets = [0.0]

    # Ensure first time is 0.
    time_offsets = [to - time_offsets[0] for to in time_offsets]

    # Truncate to num_requests.
    assert len(time_offsets) >= num_requests
    time_offsets = time_offsets[:num_requests]

    return time_offsets


def get_cli_requests(
    args: Namespace, tokenizer: Any, sampling_params: Optional[SamplingParams] = None
) -> list[Request]:

    # Get time offsets.
    t_offsets = get_time_offsets(
        args.seed,
        args.incoming_requests_per_step,
        args.incoming_requests_per_sec,
        len(args.prompts),
    )

    # Init requests.
    requests = [Request(p, t, tokenizer, sampling_params) for p, t in zip(args.prompts, t_offsets)]
    return requests


def get_synthetic_requests(
    args: Namespace, tokenizer: Any, sampling_params: Optional[SamplingParams] = None
) -> list[Request]:
    """Get example requests."""

    # Get time offsets.
    time_offsets = get_time_offsets(
        args.seed,
        args.incoming_requests_per_step,
        args.incoming_requests_per_sec,
        int(args.incoming_requests_per_sec * args.incoming_requests_duration),
    )

    # Build prompts with expected lengths.
    assert (
        len(args.num_tokens_to_prompt) == 2
        and args.num_tokens_to_prompt[1] >= args.num_tokens_to_prompt[0]
    )
    max_prompt_length = args.num_tokens_to_prompt[1]
    max_prompt_text = "hi " * max_prompt_length
    max_prompt_tokens = tokenizer.tokenize(max_prompt_text)
    prompt_lengths = [random.randint(*args.num_tokens_to_prompt) for _ in time_offsets]
    prompt_tokens_list = [max_prompt_tokens[:l] for l in prompt_lengths]
    prompt_texts = [tokenizer.detokenize(tt) for tt in prompt_tokens_list]

    # Init requests.
    assert len(prompt_texts) == len(time_offsets)
    requests = [
        Request(t, o, tokenizer, sampling_params=sampling_params)
        for t, o in zip(prompt_texts, time_offsets)
    ]

    return requests


def get_requests_from_file(
    args: Namespace, tokenizer: Any, sampling_params: Optional[SamplingParams] = None
) -> list[Request]:
    """Get requests from a file."""
    if not args.prompt_file:
        raise ValueError("Prompt file is required to read requests from a file.")

    # Load prompts.
    n_prompts = sum(1 for _ in open(args.prompt_file))
    prompts = []
    if sampling_params is None:
        sampling_params = get_default_sampling_params(tokenizer.eod)
    sampling_params_list = []
    with open(args.prompt_file) as f:
        for line in tqdm(f.readlines(), "read prompt file", total=n_prompts):
            line_dict = json.loads(line)
            prompts.append(line_dict["text"])

            sp = copy.deepcopy(sampling_params)
            if args.num_tokens_from_file:
                sp.num_tokens_to_generate = line_dict["chatgpt_output_token_length"]
            sampling_params_list.append(sp)

            if len(prompts) == args.prompt_file_num_truncate:
                break

    # Get time offsets.
    time_offsets: list[float] = get_time_offsets(
        args.seed, args.incoming_requests_per_step, args.incoming_requests_per_sec, len(prompts)
    )

    # Init requests.
    requests = [
        Request(p, t, tokenizer, sp)
        for p, t, sp in tqdm(
            zip(prompts, time_offsets, sampling_params_list), "init requests", total=len(prompts)
        )
    ]

    return requests


def build_requests(
    args: Namespace, tokenizer: Any, sampling_params: Optional[SamplingParams] = None
) -> list[Request]:
    # Check if we have any prompts (from command line or JSONL)
    if args.prompts:
        if args.prompt_file:
            raise ValueError("Cannot use both --prompts and --prompt-file")
        return get_cli_requests(args, tokenizer, sampling_params)
    elif args.prompt_file:
        return get_requests_from_file(args, tokenizer, sampling_params)
    else:
        return get_synthetic_requests(args, tokenizer, sampling_params)


def get_model_size_str(model):
    n = sum(p.numel() for p in model.parameters())
    for exp, suffix in ((12, "t"), (9, "b"), (6, "m"), (3, "k"), (0, "")):
        nquery = int(10**exp)
        if n > nquery:
            return "%d%s" % (n // nquery, suffix)
    raise Exception("something went wrong.")


def build_dynamic_engine_setup_prefix(
    args: Namespace,
    model: MegatronModule,
    context: DynamicInferenceContext,
    requests: list[DynamicInferenceRequest],
):
    """
    Returns a compact, pipe-separated summary of the dynamic-batching setup.

    Example output:

    `dynamic | cg True | prompts: synth(16 256), n 1024, g 512, t 1.0e+02 5.0e-01 | bf 4, 1.2 [r 1024, t 8192] | gtd 0.50 [r 512] | reqs 100` # pylint: disable=line-too-long

    Args:
        args (Namespace): Command-line arguments for this run.
        context (DynamicInferenceContext): Stores limits such as `max_requests`,
            `max_tokens`, and `gtd_request_count`.
        requests (List[DynamicInferenceRequest]): List of inference requests.

    Returns:
        A configuration string for logging.
    """
    # CUDA graph config
    if args.cuda_graph_impl == "local":
        cg_str = f"graphs {len(context.cuda_graph_batch_dimensions_list)}"
    else:
        cg_str = "--"

    # Unified memory (UVM).
    uvm_str = f"uvm {int(context.unified_memory_level)}"

    # Prompt description
    prompt_src_str = (
        "cli"
        if args.prompts
        else (
            "file"
            if args.prompt_file
            else f"synth({', '.join(map(str, args.num_tokens_to_prompt))})"
        )
    )
    request_str = (
        f"requests: {prompt_src_str}, " f"n {len(requests):d}, g {args.num_tokens_to_generate:d}, "
    )
    request_str += (
        f"dur {args.incoming_requests_duration:.1e} " f"r/sec {args.incoming_requests_per_sec:.1e}"
        if args.incoming_requests_per_step is None
        else f"r/step {args.incoming_requests_per_step}"
    )

    # Buffer limits config
    buffer_limits_str = (
        f"bf: {get_mem_size_str(args.inference_dynamic_batching_buffer_size_gb*1024**3)}, "
        f"{context.kv_block_allocator.active_count} chunks "
        f"[r {context.max_requests}, t {context.max_tokens}]"
    )

    parts = [get_model_size_str(model), "dynamic", cg_str, uvm_str, request_str, buffer_limits_str]

    return " | ".join(parts)


def get_global_peak_memory_stats_bytes() -> dict:
    """Peak allocated CUDA memory aggregated across ranks (MAX), in bytes.

    Uses `torch.cuda.max_memory_allocated()` and assumes peak stats were reset
    before the benchmark run.
    """
    peak_alloc = int(torch.cuda.max_memory_allocated())
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        t = torch.tensor([peak_alloc], device="cuda", dtype=torch.int64)
        torch.distributed.all_reduce(t, op=torch.distributed.ReduceOp.MAX)
        peak_alloc = int(t[0].item())
    return {"mem-max-allocated-bytes": peak_alloc}


================================================
FILE: examples/inference/llama_mistral/huggingface_reference.py
================================================
import argparse
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer

# Set up argument parsing
parser = argparse.ArgumentParser(description="Script for text generation with a specific model and prompt.")
parser.add_argument('--prompt', type=str, required=True, help="Prompt text to use for text generation")
parser.add_argument('--model-path', type=str, required=True, help="Path to the Huggingface model checkpoint")

# Parse command-line arguments
args = parser.parse_args()

model_path = args.model_path
prompt = args.prompt

config = AutoConfig.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path, config=config)
model = AutoModelForCausalLM.from_pretrained(model_path, config=config).cuda()

inputs = tokenizer(prompt, return_tensors="pt")
for key in inputs:
    inputs[key] = inputs[key].cuda()
# top_k, top_p and do_sample are set for greedy argmax based sampling

outputs = model.generate(**inputs, max_length=100, do_sample=False, top_p=0, top_k=0, temperature=1.0)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

================================================
FILE: examples/inference/llama_mistral/run_static_inference_llama4_scout.sh
================================================
#!/bin/bash
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NVTE_APPLY_QK_LAYER_SCALING=0

DISTRIBUTED_ARGS="--nproc_per_node 8 \
                  --nnodes 1 \
                  --node_rank 0 \
                  --master_addr 0.0.0.0 \
                  --master_port 6000"

# Fill in checkpoint path to Llama 4 Scout to run
CHECKPOINT=<Path to Scout checkpoint>
PROMPTS="What is the capital of France?"
TOKENS_TO_GENERATE=4
MAX_BATCH_SIZE=2

MODEL_ARGS=" \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --untie-embeddings-and-output-weights \
    --position-embedding-type rope \
    --no-rope-fusion \
    --normalization RMSNorm \
    --swiglu \
    --num-layers 48 \
    --hidden-size 5120 \
    --ffn-hidden-size 16384 \
    --num-attention-heads 40 \
    --group-query-attention \
    --num-query-groups 8 \
    --qk-layernorm \
    --num-experts 16 \
    --moe-ffn-hidden-size 8192 \
    --moe-router-score-function sigmoid \
    --moe-router-topk 1 \
    --moe-router-topk-scaling-factor 1.0 \
    --moe-shared-expert-intermediate-size 8192 \
    --moe-aux-loss-coeff 1e-3 \
    --moe-token-dispatcher-type alltoall \
    --moe-token-drop-policy probs \
    --moe-router-load-balancing-type seq_aux_loss \
    --seq-length 4096 \
    --max-position-embeddings 4096 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 128 \
    --use-mcore-models \
    --rotary-interleaved \
    --rotary-percent 1.0 \
    --rotary-base 500000 \
    --rope-scaling-factor 8.0 \
    --use-rope-scaling \
    --no-bias-swiglu-fusion \
    --qk-l2-norm \
    --moe-apply-probs-on-input \
    --moe-router-dtype fp64 \
"

torchrun $DISTRIBUTED_ARGS -m examples.inference.gpt.gpt_static_inference   \
      --load ${CHECKPOINT} \
      --tokenizer-model unsloth/Llama-4-Scout-17B-16E-Instruct \
      --dist-ckpt-strictness log_unexpected \
      --tensor-model-parallel-size 8 \
      --prompts ${PROMPTS} \
      --num-tokens-to-generate ${TOKENS_TO_GENERATE}  \
      --max-batch-size ${MAX_BATCH_SIZE} \
      ${MODEL_ARGS}


================================================
FILE: examples/inference/llama_mistral/run_text_generation_llama3.1.sh
================================================
#!/bin/bash
# This example will start serving the Llama3.1-8B model
export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NVTE_APPLY_QK_LAYER_SCALING=0

DISTRIBUTED_ARGS="--nproc_per_node 1 \
                  --nnodes 1 \
                  --node_rank 0 \
                  --master_addr 0.0.0.0 \
                  --master_port 6000"

# Ensure CHECKPOINT and TOKENIZER_MODEL are provided
if [ -z "$1" ] || [ -z "$2" ]; then
  echo "Error: You must provide CHECKPOINT and TOKENIZER_MODEL as command-line arguments."
  echo "Usage: $0 /path/to/checkpoint /path/to/tokenizer_model"
  exit 1
fi

# Assign command-line arguments to variables
CHECKPOINT=$1
TOKENIZER_MODEL=$2

pip install flask-restful

torchrun $DISTRIBUTED_ARGS tools/run_text_generation_server.py   \
      --use-checkpoint-args \
      --disable-bias-linear \
      --tokenizer-type HuggingFaceTokenizer \
      --tokenizer-model ${TOKENIZER_MODEL} \
      --transformer-impl transformer_engine \
      --normalization RMSNorm \
      --group-query-attention \
      --num-query-groups 8 \
      --no-masked-softmax-fusion \
      --attention-softmax-in-fp32 \
      --attention-dropout 0.0 \
      --hidden-dropout 0.0 \
      --untie-embeddings-and-output-weights \
      --position-embedding-type rope \
      --rotary-percent 1.0 \
      --rotary-base 500000 \
      --use-rope-scaling \
      --use-rotary-position-embeddings \
      --swiglu \
      --tensor-model-parallel-size 1  \
      --pipeline-model-parallel-size 1  \
      --num-layers 32  \
      --hidden-size 4096  \
      --ffn-hidden-size 14336 \
      --load ${CHECKPOINT}  \
      --num-attention-heads 32  \
      --max-position-embeddings 131072  \
      --bf16  \
      --micro-batch-size 1  \
      --seq-length 8192


================================================
FILE: examples/inference/llama_mistral/run_text_generation_llama3.sh
================================================
#!/bin/bash
# This example will start serving the Llama3-8B model
export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NVTE_APPLY_QK_LAYER_SCALING=0

DISTRIBUTED_ARGS="--nproc_per_node 1 \
                  --nnodes 1 \
                  --node_rank 0 \
                  --master_addr 0.0.0.0 \
                  --master_port 6000"

# Ensure CHECKPOINT and TOKENIZER_MODEL are provided
if [ -z "$1" ] || [ -z "$2" ]; then
  echo "Error: You must provide CHECKPOINT and TOKENIZER_MODEL as command-line arguments."
  echo "Usage: $0 /path/to/checkpoint /path/to/tokenizer_model"
  exit 1
fi

# Assign command-line arguments to variables
CHECKPOINT=$1
TOKENIZER_MODEL=$2

pip install flask-restful

torchrun $DISTRIBUTED_ARGS tools/run_text_generation_server.py   \
      --use-checkpoint-args \
      --disable-bias-linear \
      --tokenizer-type HuggingFaceTokenizer \
      --tokenizer-model ${TOKENIZER_MODEL} \
      --transformer-impl transformer_engine \
      --normalization RMSNorm \
      --group-query-attention \
      --num-query-groups 8 \
      --no-masked-softmax-fusion \
      --attention-softmax-in-fp32 \
      --attention-dropout 0.0 \
      --hidden-dropout 0.0 \
      --untie-embeddings-and-output-weights \
      --position-embedding-type rope \
      --rotary-percent 1.0 \
      --rotary-base 500000 \
      --use-rotary-position-embeddings \
      --swiglu \
      --tensor-model-parallel-size 1  \
      --pipeline-model-parallel-size 1  \
      --num-layers 32  \
      --hidden-size 4096  \
      --ffn-hidden-size 14336 \
      --load ${CHECKPOINT}  \
      --num-attention-heads 32  \
      --max-position-embeddings 8192  \
      --bf16  \
      --micro-batch-size 1  \
      --seq-length 8192


================================================
FILE: examples/inference/llama_mistral/run_text_generation_mistral.sh
================================================
#!/bin/bash
# This example will start serving the Mistral-7B-v0.3 model
export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1

DISTRIBUTED_ARGS="--nproc_per_node 1 \
                  --nnodes 1 \
                  --node_rank 0 \
                  --master_addr 0.0.0.0 \
                  --master_port 6000"

# Ensure CHECKPOINT and TOKENIZER_MODEL are provided
if [ -z "$1" ] || [ -z "$2" ]; then
  echo "Error: You must provide CHECKPOINT and TOKENIZER_MODEL as command-line arguments."
  echo "Usage: $0 /path/to/checkpoint /path/to/tokenizer_model"
  exit 1
fi

# Assign command-line arguments to variables
CHECKPOINT=$1
TOKENIZER_MODEL=$2

pip install flask-restful

torchrun $DISTRIBUTED_ARGS tools/run_text_generation_server.py   \
       --tokenizer-type HuggingFaceTokenizer \
       --tokenizer-model ${TOKENIZER_MODEL} \
       --use-checkpoint-args \
       --apply-layernorm-1p \
       --transformer-impl transformer_engine \
       --normalization RMSNorm \
       --group-query-attention \
       --num-query-groups 8 \
       --no-masked-softmax-fusion \
       --use-flash-attn \
       --untie-embeddings-and-output-weights \
       --disable-bias-linear \
       --position-embedding-type rope \
       --rotary-percent 1.0 \
       --rotary-base 1000000 \
       --swiglu \
       --ffn-hidden-size 14336 \
       --tensor-model-parallel-size 1  \
       --pipeline-model-parallel-size 1  \
       --num-layers 32  \
       --hidden-size 4096  \
       --load ${CHECKPOINT}  \
       --num-attention-heads 32  \
       --max-position-embeddings 4096  \
       --bf16  \
       --micro-batch-size 1  \
       --seq-length 4096  \
       --seed 101


================================================
FILE: examples/inference/run_text_generation_server_345M.sh
================================================
#!/bin/bash
# This example will start serving the 345M model.
DISTRIBUTED_ARGS="--nproc_per_node 1 \
                  --nnodes 1 \
                  --node_rank 0 \
                  --master_addr localhost \
                  --master_port 6000"

CHECKPOINT=<Path to checkpoint (e.g /345m)>
VOCAB_FILE=<Path to vocab.json (e.g. /gpt2-vocab.json)>
MERGE_FILE=<Path to merges.txt (e.g. /gpt2-merges.txt)>

export CUDA_DEVICE_MAX_CONNECTIONS=1

pip install flask-restful

torchrun $DISTRIBUTED_ARGS tools/run_text_generation_server.py   \
       --tensor-model-parallel-size 1  \
       --pipeline-model-parallel-size 1  \
       --num-layers 24  \
       --hidden-size 1024  \
       --load ${CHECKPOINT}  \
       --num-attention-heads 16  \
       --max-position-embeddings 1024  \
       --tokenizer-type GPT2BPETokenizer  \
       --fp16  \
       --micro-batch-size 1  \
       --seq-length 1024  \
       --vocab-file $VOCAB_FILE  \
       --merge-file $MERGE_FILE  \
       --seed 42


================================================
FILE: examples/inference/run_text_generation_server_345M_8_tensor_parallel.sh
================================================
#!/bin/bash
# This example will start serving the 345M model that is partitioned 8 way tensor parallel
DISTRIBUTED_ARGS="--nproc_per_node 8 \
                  --nnodes 1 \
                  --node_rank 0 \
                  --master_addr localhost \
                  --master_port 6000"

CHECKPOINT=<Path to checkpoint (e.g /345m)>
VOCAB_FILE=<Path to vocab.json (e.g. /gpt2-vocab.json)>
MERGE_FILE=<Path to merges.txt (e.g. /gpt2-merges.txt)>

pip install flask-restful

python -m torch.distributed.launch $DISTRIBUTED_ARGS tools/run_text_generation_server.py   \
       --tensor-model-parallel-size 8  \
       --pipeline-model-parallel-size 1  \
       --num-layers 24  \
       --hidden-size 1024  \
       --load ${CHECKPOINT}  \
       --num-attention-heads 16  \
       --max-position-embeddings 1024  \
       --tokenizer-type GPT2BPETokenizer  \
       --fp16  \
       --micro-batch-size 1  \
       --seq-length 1024  \
       --vocab-file $VOCAB_FILE  \
       --merge-file $MERGE_FILE  \
       --seed 42


================================================
FILE: examples/inference/t5/simple_t5_batch_inference.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.

import os
import sys
from argparse import Namespace

import torch

import pretrain_t5
from megatron.core.inference.engines import AbstractEngine, StaticInferenceEngine
from megatron.core.inference.inference_request import InferenceRequest
from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import (
    InferenceWrapperConfig,
)
from megatron.core.inference.model_inference_wrappers.t5.t5_inference_wrapper import (
    T5InferenceWrapper,
)
from megatron.core.inference.sampling_params import SamplingParams
from megatron.core.inference.text_generation_controllers.encoder_decoder_text_generation_controller import (
    EncoderDecoderTextGenerationController,
)
from megatron.core.tokenizers.utils.build_tokenizer import build_tokenizer
from megatron.core.transformer.module import MegatronModule
from pretrain_t5 import model_provider

sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
)

from typing import List

from megatron.core import mpu
from megatron.training import get_args, get_model, get_tokenizer
from megatron.training.checkpointing import load_checkpoint
from megatron.training.initialize import initialize_megatron


def add_text_generate_args(parser):
    """Text generation arguments."""
    group = parser.add_argument_group(title='text generation')

    group.add_argument("--temperature", type=float, default=1.0, help='Sampling temperature.')
    group.add_argument("--top_k", type=int, default=1, help='Top k sampling.')
    group.add_argument("--top_p", type=float, default=0.0, help='Top p sampling.')
    group.add_argument(
        "--return-log-probs",
        action='store_true',
        default=False,
        help='Return the log probabilities of the final output tokens',
    )
    group.add_argument(
        "--num-tokens-to-generate",
        type=int,
        default=30,
        help='Number of tokens to generate for each prompt',
    )
    group.add_argument(
        "--encoder-prompts",
        metavar='N',
        type=str,
        nargs='+',
        help='Encoder input prompts with each prompt within quotes and separated by space',
    )
    group.add_argument(
        "--max-batch-size", type=int, default=1, help='Max number of prompts to process at once'
    )
    return parser


def get_inference_engine(args: Namespace, model: MegatronModule) -> AbstractEngine:
    """Utility to get the relevant backend for running inference

    This function will automatically chose the TRTLLMBackend when possible, and if not revert to Mcore backend if the user does not specify any backends. TRT LLM Backend is not implmented yet.

    Args:
        args (Namespace): The user arguments parsed from command line
        model (MegatronModule): The megatron model .

    Returns:
        AbstractBackend: The chosen backend
    """
    # Build tokenizer
    tokenizer = build_tokenizer(args)

    inference_wrapper_config = InferenceWrapperConfig(
        hidden_size=args.hidden_size,
        inference_batch_times_seqlen_threshold=args.inference_batch_times_seqlen_threshold,
        fp32_residual_connection=args.fp32_residual_connection,
        params_dtype=args.params_dtype,
        padded_vocab_size=args.padded_vocab_size,
    )

    inference_wrapped_model = T5InferenceWrapper(model, inference_wrapper_config)
    text_generation_controller = EncoderDecoderTextGenerationController(
        inference_wrapped_model=inference_wrapped_model, tokenizer=tokenizer
    )
    return StaticInferenceEngine(
        text_generation_controller=text_generation_controller, max_batch_size=args.max_batch_size
    )


def main():
    """Main program."""

    # Note: The default args passed here can be overwritten by using appropriate params (check arguments.py file)
    # Micro batch size is not needed to be set by user. (It is calculated based on inference-batch-times-seqlen-threshold argument)
    initialize_megatron(
        extra_args_provider=add_text_generate_args,
        args_defaults={
            'no_load_rng': True,
            'no_load_optim': True,
            'micro_batch_size': 1,
            'exit_on_missing_checkpoint': True,
        },
    )

    # Set up model and load checkpoint
    model = get_model(model_provider, wrap_with_ddp=False)
    load_checkpoint(model, None, None)
    model = model[0]

    args = get_args()

    inference_engine = get_inference_engine(args, model)

    sampling_params = SamplingParams(
        temperature=args.temperature,
        top_k=args.top_k,
        top_p=args.top_p,
        return_log_probs=args.return_log_probs,
        num_tokens_to_generate=args.num_tokens_to_generate,
    )

    # Build tokenizer
    tokenizer = build_tokenizer(args)

    decoder_prompts = [""] * len(
        args.encoder_prompts
    )  # for T5, the prompt is provided as encoder input, hence decoder_prompts is empty
    args.prompts = decoder_prompts

    results: List[InferenceRequest] = inference_engine.generate(
        prompts=args.prompts,
        add_BOS=True,
        encoder_prompts=args.encoder_prompts,
        sampling_params=sampling_params,
    )

    if torch.distributed.get_rank() == 0:
        for idx, result in enumerate(results):
            print(f' \n------------- RESULT FOR PROMPT {idx} --------------- ')
            result = {
                'id': result.request_id,
                'input_prompt': result.prompt,
                'generated_text': result.generated_text,
                'generated_tokens': result.generated_tokens,
            }
            print(result)


if __name__ == "__main__":
    main()


================================================
FILE: examples/llama/README.md
================================================
# Llama Models

## Table of contents
- [1. Overview](#1-overview)
- [2. Prerequisites](#2-prerequisites)
- [3. Training Setup](#3-training-setup)
- [4. Configuration](#4-configuration)
- [5. Test Datasets](#5-test-datasets)
- [6. FP8 Debugging](#6-fp8-debugging)

## 1. Overview
<a id="overview" name="overview"></a>

Train Llama models using FP8 precision with Megatron-Core.

## 2. Prerequisites
<a id="prerequisites" name="prerequisites"></a>

```bash
# Clone repository
export HOST_MEGATRON_LM_DIR="/path/to/your/host/megatron-lm"
git clone https://github.com/NVIDIA/Megatron-LM.git "$HOST_MEGATRON_LM_DIR"
cd "$HOST_MEGATRON_LM_DIR"
git checkout "core_r0.12.0"

# Set paths
export HOST_CHECKPOINT_PATH="./checkpoints/llama3_8b_fp8"
export HOST_TENSORBOARD_LOGS_PATH="./tensorboard_logs/llama3_8b_fp8"

# Optional: For real data
# export HOST_TOKENIZER_MODEL_PATH="/path/to/host/tokenizer.model"
# export HOST_DATA_PREFIX="/path/to/host/mydata_prefix"
```

## 3. Training Setup
<a id="training-setup" name="training-setup"></a>

### Using Mock Data
```bash
PYTORCH_IMAGE="nvcr.io/nvidia/pytorch:25.03-py3"

docker run --rm --gpus all --ipc=host --ulimit memlock=-1 \
  -v "${HOST_MEGATRON_LM_DIR}:/workspace/megatron-lm" \
  -v "${HOST_CHECKPOINT_PATH}:/workspace/checkpoints" \
  -v "${HOST_TENSORBOARD_LOGS_PATH}:/workspace/tensorboard_logs" \
  --workdir /workspace/megatron-lm \
  $PYTORCH_IMAGE \
  bash examples/llama/train_llama3_8b_h100_fp8.sh \
    /workspace/checkpoints \
    /workspace/tensorboard_logs \
  2>&1 | tee "${HOST_TENSORBOARD_LOGS_PATH}/training_mock_$(date +'%y-%m-%d_%H-%M-%S').log"
```

### Using Custom Data and Tokenizer
```bash
PYTORCH_IMAGE="nvcr.io/nvidia/pytorch:25.03-py3"

docker run --rm --gpus all --ipc=host --ulimit memlock=-1 \
  -v "${HOST_MEGATRON_LM_DIR}:/workspace/megatron-lm" \
  -v "${HOST_CHECKPOINT_PATH}:/workspace/checkpoints" \
  -v "${HOST_TENSORBOARD_LOGS_PATH}:/workspace/tensorboard_logs" \
  -v "${HOST_TOKENIZER_MODEL_PATH}:/workspace/tokenizer_model" \
  -v "$(dirname "${HOST_DATA_PREFIX}"):/workspace/data_dir" \
  --workdir /workspace/megatron-lm \
  $PYTORCH_IMAGE \
  bash examples/llama/train_llama3_8b_h100_fp8.sh \
    /workspace/checkpoints \
    /workspace/tensorboard_logs \
    /workspace/tokenizer_model \
    "/workspace/data_dir/$(basename "${HOST_DATA_PREFIX}")" \
  2>&1 | tee "${HOST_TENSORBOARD_LOGS_PATH}/training_custom_$(date +'%y-%m-%d_%H-%M-%S').log"
```

## 4. Configuration
<a id="configuration" name="configuration"></a>

Default parallelism strategy:
- Tensor Parallel: 1
- Pipeline Parallel: 1
- Context Parallel: 2

Llama-3-8B architecture:
- 32 layers
- Hidden size: 4096
- FFN hidden size: 14336
- Attention heads: 32
- Query groups: 8
- Sequence length: 8192
- RMSNorm normalization with SwiGLU and RoPE

Key training parameters:
- Micro-batch size: 1
- Global batch size: 128
- Learning rate: 1.5e-4
- Min learning rate: 1.0e-5
- Weight decay: 0.1
- FP8 format: hybrid

You can modify these parameters directly in the `train_llama3_8b_h100_fp8.sh` script.

This configuration follows those defined in NeMo Framework's performance scripts, which can be found at [https://github.com/NVIDIA/NeMo/tree/main/scripts/performance](https://github.com/NVIDIA/NeMo/tree/main/scripts/performance). 

### FP8 Performance

| Model | #-GPUs | GBS | MBS | Seq Length | TP | PP | CP | VP | EP | GA | Tokens/sec/GPU | TFLOP/sec/GPU |
|-------|--------|-----|-----|------------|----|----|----|----|----|----|----------------|---------------|
| LLAMA3-8B | 8 | 128 | 1 | 8192 | 1 | 1 | 2 | 1 | 1 | 32 | 13812 | 800 |
| LLAMA3-70B | 64 | 128 | 1 | 8192 | 4 | 8 | 1 | 5 | 1 | 64 | 1621 | 780 |
| LLAMA3-405B | 1024 | 512 | 1 | 8192 | 8 | 8 | 2 | 8 | 1 | 64 | 315 | 834 |

Legend:
- GBS: Global Batch Size
- MBS: Micro Batch Size
- TP: Tensor Parallel size
- PP: Pipeline Parallel size
- CP: Context Parallel size
- VP: Virtual Pipeline stages
- EP: Expert Parallel size
- GA: Gradient Accumulation steps

As NeMo uses Megatron-Core, for the latest performance benchmarks, please refer to the official [NeMo documentation](https://docs.nvidia.com/nemo-framework/user-guide/latest/performance/performance-summary.html).

## 5. Test Datasets
<a id="test-datasets" name="test-datasets"></a>

Recommended datasets:
1. **WikiText-103**: https://huggingface.co/datasets/Salesforce/wikitext

Preprocess datasets:
```bash
python "${HOST_MEGATRON_LM_DIR}/tools/preprocess_data.py" \
       --input your_dataset.json \
       --output-prefix test_dataset \
       --tokenizer-type HuggingFaceTokenizer \
       --tokenizer-model /path/to/tokenizer.model \
       --append-eod
```

## 6. FP8 Training Considerations
<a id="fp8-training-considerations" name="fp8-training-considerations"></a>

- **Hardware**: Requires NVIDIA Hopper, Ada, or Blackwell GPUs for FP8 support
   
- **Troubleshooting**: If you encounter NaN values or instability with FP8 training, please refer to [Transformer Engine](https://github.com/NVIDIA/TransformerEngine).


================================================
FILE: examples/llama/train_llama3_8b_h100_fp8.sh
================================================
#!/bin/bash

# Environment variables for performance tuning
export CUDA_DEVICE_MAX_CONNECTIONS=${CUDA_DEVICE_MAX_CONNECTIONS:-1}
#export LOG_LEVEL=${LOG_LEVEL:-INFO}
#export NCCL_IB_TIMEOUT=${NCCL_IB_TIMEOUT:-19}
#export NVTE_FWD_LAYERNORM_SM_MARGIN=${NVTE_FWD_LAYERNORM_SM_MARGIN:-16}
#export NVTE_BWD_LAYERNORM_SM_MARGIN=${NVTE_BWD_LAYERNORM_SM_MARGIN:-16}
#export NCCL_P2P_NET_CHUNKSIZE=${NCCL_P2P_NET_CHUNKSIZE:-2097152}
#export NCCL_AVOID_RECORD_STREAMS=${NCCL_AVOID_RECORD_STREAMS:-1}

CHECKPOINT_PATH=${1:-"checkpoints/llama3_8b_fp8"}
TENSORBOARD_LOGS_PATH=${2:-"tensorboard_logs/llama3_8b_fp8"}
TOKENIZER_ARG=${3:-"MOCK"} # Path to tokenizer model, or "MOCK"
DATA_ARG=${4:-"MOCK"}     # Data prefix, or "MOCK"

# Create directories if they don't exist
mkdir -p "$(dirname "$CHECKPOINT_PATH")"
mkdir -p "$(dirname "$TENSORBOARD_LOGS_PATH")"

# Distributed training setup
GPUS_PER_NODE=8
NUM_NODES=1
MASTER_ADDR=${MASTER_ADDR:-localhost}
MASTER_PORT=${MASTER_PORT:-6000}
NODE_RANK=${NODE_RANK:-0}
WORLD_SIZE=$(($GPUS_PER_NODE*$NUM_NODES))

# Path to the pretrain_gpt.py script, assuming this script is run from the root of the Megatron-LM repository
PRETRAIN_SCRIPT_PATH="pretrain_gpt.py"

# Fixed model and training parameters
TP_SIZE=1     
CP_SIZE=1     
PP_SIZE=1     
MICRO_BATCH_SIZE=1
GLOBAL_BATCH_SIZE=128
NUM_LAYERS=32  
DTYPE="fp8"
SEQ_LENGTH=8192
MAX_POSITION_EMBEDDINGS=8192

# Data cache path (useful for both mock and real data)
DATA_CACHE_PATH="${PWD}/benchmark_cache_llama3_8b_fp8"
mkdir -p "$DATA_CACHE_PATH"

DISTRIBUTED_ARGS=(
    --nproc_per_node $GPUS_PER_NODE
    --nnodes $NUM_NODES
    --node_rank $NODE_RANK
    --master_addr $MASTER_ADDR
    --master_port $MASTER_PORT
)

MODEL_ARGS=(
    --use-mcore-models
    --num-layers $NUM_LAYERS
    --hidden-size 4096
    --ffn-hidden-size 14336
    --num-attention-heads 32
    --group-query-attention
    --num-query-groups 8
    --kv-channels 128
    --seq-length $SEQ_LENGTH
    --max-position-embeddings $MAX_POSITION_EMBEDDINGS
    --position-embedding-type rope
    --rotary-base 1000000 
    --rotary-percent 1.0
    --attention-dropout 0.0
    --hidden-dropout 0.0
    --swiglu
    --normalization RMSNorm
    --init-method-std 0.0134
    --attention-backend fused
    --apply-layernorm-1p 
    --untie-embeddings-and-output-weights
    --disable-bias-linear 
)

TRAINING_ARGS=(
    --micro-batch-size $MICRO_BATCH_SIZE
    --global-batch-size $GLOBAL_BATCH_SIZE
    --train-samples 1953125000
    --lr-decay-samples 1949218748
    --lr-warmup-samples 3906252
    --lr 0.00015
    --min-lr 0.00001
    --decoupled-lr 5.0e-4      # Specific to decoupled AdamW, ensure optimizer is compatible
    --decoupled-min-lr 4.5e-5  # Specific to decoupled AdamW
    --lr-decay-style cosine
    --clip-grad 1.0
    --weight-decay 0.1
    --adam-beta1 0.9
    --adam-beta2 0.95
    --bf16
    --grad-reduce-in-bf16
    --cross-entropy-loss-fusion
    --calculate-per-token-loss 
    --manual-gc 
    --empty-unused-memory-level 1 
    --exit-duration-in-mins 235 
)

# Conditional arguments based on DTYPE (FP8)
DTYPE_ARGS=()
if [[ "$DTYPE" == "fp8" ]]; then
    DTYPE_ARGS+=(
        "--fp8-format hybrid"
        "--fp8-amax-history-len 1024"
        "--fp8-amax-compute-algo max"
        "--fp8-param-gather"
    )
fi

# Model parallelism arguments
MODEL_PARALLEL_ARGS=(
    --tensor-model-parallel-size $TP_SIZE
    --context-parallel-size $CP_SIZE
    # --pipeline-model-parallel-size $PP_SIZE # Not explicitly set in llama script options, assume 1 if not multi-node PP
    --sequence-parallel  # Always enable sequence parallelism with TP_SIZE=2
)

# Distributed Data Parallel (DDP) arguments
# From original script's ddp_args
DDP_ARGS=(
    --use-distributed-optimizer
    --overlap-grad-reduce
    --overlap-param-gather
)
TRAINING_ARGS+=("${DDP_ARGS[@]}")


# Data arguments (conditional for mock vs real data)
DATA_ARGS_LIST=()
if [[ "$TOKENIZER_ARG" == "MOCK" ]] || [[ "$DATA_ARG" == "MOCK" ]] || [[ -z "$TOKENIZER_ARG" ]]; then
    DATA_ARGS_LIST+=(
        "--mock-data"
        "--tokenizer-type NullTokenizer"
        "--vocab-size 128256" 
        "--data-cache-path ${DATA_CACHE_PATH}"
        "--tiktoken-pattern v2" 
        "--split '99,1,0'"
        "--no-create-attention-mask-in-dataloader"
        "--no-mmap-bin-files"
        "--num-workers 1"
    )
else
    # Settings for real data
    DATA_ARGS_LIST+=(
        "--data-path $DATA_ARG"
        "--tokenizer-type HuggingFaceTokenizer" 
        "--tokenizer-model $TOKENIZER_ARG"
        "--data-cache-path ${DATA_CACHE_PATH}"
        "--split '99,1,0'"
        "--no-create-attention-mask-in-dataloader"
        "--no-mmap-bin-files"
        "--num-workers 1"
        # Note: --vocab-size might be inferred by HuggingFaceTokenizer or might need to be explicit.
        "--vocab-size 128256"
    )
fi

EVAL_AND_LOGGING_ARGS=(
    --log-interval 1
    --eval-iters 32
    --eval-interval 100
    --save-interval 1000
    --log-throughput
    --profile
    --profile-step-start 4
    --profile-step-end 6
    --ckpt-format torch_dist 
    --distributed-timeout-minutes 60
    --save "$CHECKPOINT_PATH"
    --load "$CHECKPOINT_PATH" 
    --tensorboard-dir "$TENSORBOARD_LOGS_PATH"
)

# Ensure pretrain_gpt.py is found
if [ ! -f "$PRETRAIN_SCRIPT_PATH" ]; then
    echo "Error: pretrain_gpt.py not found at $PRETRAIN_SCRIPT_PATH"
    echo "Please ensure you are running this script from the root of the Megatron-LM repository, and pretrain_gpt.py is present."
    exit 1
fi

# Run the training command
torchrun ${DISTRIBUTED_ARGS[@]} \
    "$PRETRAIN_SCRIPT_PATH" \
    ${MODEL_ARGS[@]} \
    ${TRAINING_ARGS[@]} \
    ${DTYPE_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${DATA_ARGS_LIST[@]} \
    ${EVAL_AND_LOGGING_ARGS[@]}

set +x

================================================
FILE: examples/mamba/.gitignore
================================================
checkpoints/
data-cache/
tensorboard/
triton-cache/


================================================
FILE: examples/mamba/Dockerfile
================================================
FROM nvcr.io/nvidia/pytorch:24.01-py3

RUN pip uninstall -y triton && \
    pip install triton==2.1.0 sentencepiece==0.1.99 flask-restful

# The causal-conv1d and mamba-ssm packages below are built from scratch here
# (which takes significant time) because there are no wheels available on PyPI
# for these relatively newer versions of the packages that are compatible with
# the older NGC-variant PyTorch version (e.g. version 2.2.0.dev231106) that we
# are using (in the NGC base container). Generally, if the package is not
# compatible with the PyTorch version, then it will generate a Python import
# error. The package authors tend to only release wheels for new versions of
# these pacakges which are compatible with the versions of regular PyTorch and
# NGC-variant PyTorch that are newer at the time of release. So, to use newer
# versions of these packages with relatively older versions of the NGC PyTorch
# container, we tend to have to build the packages from scratch.

RUN cd /tmp && \
    git clone https://github.com/Dao-AILab/causal-conv1d.git && \
    cd causal-conv1d && \
    git checkout v1.2.2.post1 && \
    CAUSAL_CONV1D_FORCE_BUILD=TRUE pip install . && \
    cd .. && \
    rm -rf causal-conv1d

RUN cd /tmp && \
    git clone https://github.com/state-spaces/mamba.git && \
    cd mamba && \
    git checkout v2.0.3 && \
    MAMBA_FORCE_BUILD=TRUE pip install . && \
    cd .. && \
    rm -rf mamba


================================================
FILE: examples/mamba/README.md
================================================
# Mamba-based Language Models

## Introduction

This document is an entrypoint into the code used for
<em>[An Empirical Study of Mamba-based Language Models](https://arxiv.org/abs/2406.07887)</em>.

We are releasing the parameters for some of the models described in that
technical report via
[HuggingFace](https://huggingface.co/collections/nvidia/ssms-666a362c5c3bb7e4a6bcfb9c).
The code in the `main` branch is no longer compatible with the `Mamba2-*`
checkpoints. You can load them using the
[fixed snapshot of the code used for the technical report](https://github.com/NVIDIA/Megatron-LM/tree/ssm/examples/mamba).

## Installation

Create and run a Docker container using the [Dockerfile](./Dockerfile).

```
docker build -t your_image_name:your_tag .
docker run --gpus all -it --rm \
  -v /path/to/megatron:/workspace/megatron \
  -v /path/to/dataset:/workspace/dataset \
  -v /path/to/checkpoints:/workspace/checkpoints \
  -w /workspace/megatron/examples/mamba \
  your_image_name:your_tag
```

## Train

[`train.sh`](./train.sh) is an example pretraining script, showing how to run on
a single node. Select between 800M-scale and 8B-scale models by setting the
`MODEL_SCALE` variable. The 8B-scale hybrid model architecture is the same as
the one described in the technical report.

## Text Generation

Use [`run_text_gen_server_8b.sh`](./run_text_gen_server_8b.sh) to start a text
generation server using an 8B hybrid checkpoint. This is configured to run the
8B hybrid model described in the technical report, with tensor model parallel
set to 1.

The arguments in the script will need to be changed if using a checkpoint with a
different model parallel configuration or other differences, such as model
architecture. For example, to run the 8B pure Mamba-2 model, change
`--hybrid-layer-pattern` to use only `M` symbols (e.g., 56 `M`s for the 8B
model), or remove it entirely.

Use [`run_text_gen_server_8b_gpt3.sh`](./run_text_gen_server_8b_gpt3.sh) to start
a text generation server using the 8B reference Transformer checkpoint.

## Checkpoint Formats

For inference, the model must be configured to match the checkpoint file used,
including the hybrid layer configuration and model parallel configuration.

If you need to convert a hybrid checkpoint file to a different tensor parallel
or pipeline parallel size, use
[the hybrid conversion script](../../tools/checkpoint/hybrid_conversion.py).
There is an example run command at the end of that file.

Before running that script, you will need to set `PYTHONPATH` to include the
root directory of your Megatron-LM repository clone.

```
export PYTHONPATH=<path-to-megatron>:PYTHONPATH
```

## Hybrid Options

`--hybrid-layer-pattern PATTERN` specifies the layer type for every layer in
the model using a string of single-character symbols:

* `M` — Mamba layer
* `*` — Attention layer
* `-` — MLP layer
* `E` — MoE layer

The number of layers is derived from the pattern length, so `--num-layers`
should not be specified when `--hybrid-layer-pattern` is used.

For example, the 8B hybrid model described in the technical report uses:

```
--hybrid-layer-pattern "M-M-M--M-M*-M-M-M-M--M*-M-M-M-M-M*--M-M-M-M-M*-M--M-M-M-"
```

This is a 56-layer model with 4 attention layers, 28 MLP layers, and 24 Mamba
layers.

A pure Mamba model uses only `M` symbols (e.g., `MMMMMMMM` for 8 layers).
A pure transformer model uses only `*` and `-` symbols.

### Pipeline parallelism

Use `|` to define pipeline stage boundaries for flexible virtual pipeline
parallelism (fVPP). For example, `M-M-|M-M*-|M-M-|M-M*-` defines 4 pipeline
segments. The number of segments must be evenly divisible by
`--pipeline-model-parallel-size`.

### Multi-Token Prediction (MTP)

Use `/` to append MTP layer patterns. Each pattern after the separator
represents one MTP prediction depth. For example, `M*M*/MM/MM` has main
pattern `M*M*` with MTP pattern `MM` repeated for 2 depths.

### Deprecated options

`--hybrid-override-pattern`, `--hybrid-attention-ratio`, and
`--hybrid-mlp-ratio` are deprecated. Use `--hybrid-layer-pattern` instead.

## Mamba vs Mamba-2

This codebase currently only supports Mamba-2, and not the original version of
Mamba. However, the
[fixed snapshot of the code used for the technical report](https://github.com/NVIDIA/Megatron-LM/tree/ssm/examples/mamba)
can be configured to run the original version of Mamba.


================================================
FILE: examples/mamba/run_text_gen_server_8b.sh
================================================
#!/bin/bash

# Use: ./run_text_gen_server_8b.sh <checkpoint-path> <tokenizer-path>
# To launch the client: python ../../tools/text_generation_cli.py <URL-provided-by-server>

CHECKPOINT_PATH=$1
TOKENIZER_PATH=$2

HYBRID_LAYER_PATTERN="M-M-M--M-M*-M-M-M-M--M*-M-M-M-M-M*--M-M-M-M-M*-M--M-M-M-"

DISTRIBUTED_ARGS="--nproc_per_node 1 \
                  --nnodes 1 \
                  --node_rank 0 \
                  --master_addr localhost \
                  --master_port 6000"

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NCCL_IB_TIMEOUT=19
export NCCL_IB_QPS_PER_CONNECTION=4

export TRITON_CACHE_DIR="./triton-cache/"
export TRITON_CACHE_MANAGER="megatron.core.ssm.triton_cache_manager:ParallelFileCacheManager"

torchrun $DISTRIBUTED_ARGS ../../tools/run_mamba_text_generation_server.py \
       --tensor-model-parallel-size 1  \
       --pipeline-model-parallel-size 1  \
       --untie-embeddings-and-output-weights \
       --hybrid-layer-pattern ${HYBRID_LAYER_PATTERN} \
       --hidden-size 4096  \
       --load ${CHECKPOINT_PATH}  \
       --num-attention-heads 32  \
       --group-query-attention \
       --num-query-groups 8 \
       --attention-dropout 0.0 \
       --hidden-dropout 0.0 \
       --disable-bias-linear \
       --normalization RMSNorm \
       --seq-length 4096  \
       --max-position-embeddings 4096  \
       --position-embedding-type none \
       --tokenizer-type GPTSentencePieceTokenizer  \
       --tokenizer-model ${TOKENIZER_PATH} \
       --distributed-backend nccl \
       --distributed-timeout-minutes 1440 \
       --bf16  \
       --micro-batch-size 1  \
       --use-mcore-models \
       --spec megatron.core.models.mamba.mamba_layer_specs mamba_stack_spec \
       --seed 42


================================================
FILE: examples/mamba/run_text_gen_server_8b_gpt3.sh
================================================
#!/bin/bash

# Use: ./run_text_gen_server_8b_gpt3.sh <checkpoint-path> <tokenizer-path>
# To launch the client: python ../../tools/text_generation_cli.py <URL-provided-by-server>

CHECKPOINT_PATH=$1
TOKENIZER_PATH=$2

DISTRIBUTED_ARGS="--nproc_per_node 1 \
                  --nnodes 1 \
                  --node_rank 0 \
                  --master_addr localhost \
                  --master_port 6000"

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NCCL_IB_TIMEOUT=19
export NCCL_IB_QPS_PER_CONNECTION=4

torchrun $DISTRIBUTED_ARGS ../../tools/run_text_generation_server.py \
       --tensor-model-parallel-size 1  \
       --pipeline-model-parallel-size 1  \
       --use-flash-attn \
       --apply-layernorm-1p \
       --untie-embeddings-and-output-weights \
       --num-layers 32  \
       --hidden-size 4096  \
       --load ${CHECKPOINT_PATH}  \
       --num-attention-heads 32  \
       --attention-dropout 0.0 \
       --hidden-dropout 0.0 \
       --disable-bias-linear \
       --seq-length 4096  \
       --max-position-embeddings 4096  \
       --position-embedding-type rope \
       --rotary-percent 0.5 \
       --squared-relu \
       --tokenizer-type GPTSentencePieceTokenizer  \
       --tokenizer-model ${TOKENIZER_PATH} \
       --distributed-backend nccl \
       --distributed-timeout-minutes 1440 \
       --bf16  \
       --micro-batch-size 1  \
       --use-mcore-models \
       --transformer-impl local \
       --seed 42


================================================
FILE: examples/mamba/train.sh
================================================
#!/bin/bash

# Use: ./train.sh <data-path> <tokenizer-path>

MODEL_SCALE="800M" # or "8B"

case "${MODEL_SCALE}" in
    "800M")
        TENSOR_MODEL_PARALLEL_SIZE=1
        HYBRID_LAYER_PATTERN="M-M-M--M-*M-M-M-M--*M-M-M-M-*M--M-M-M-*M-M--M-M-"
        HIDDEN_SIZE=1024
        NUM_ATTENTION_HEADS=16
        GLOBAL_BATCH_SIZE=32
        ;;
    "8B")
        TENSOR_MODEL_PARALLEL_SIZE=4
        HYBRID_LAYER_PATTERN="M-M-M--M-M*-M-M-M-M--M*-M-M-M-M-M*--M-M-M-M-M*-M--M-M-M-"
        HIDDEN_SIZE=4096
        NUM_ATTENTION_HEADS=32
        GLOBAL_BATCH_SIZE=8
        ;;
    *)
        echo "Invalid version specified"
        exit 1
        ;;
esac

DATA_PATH=$1
TOKENIZER_PATH=$2

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NCCL_IB_TIMEOUT=19
export NCCL_IB_QPS_PER_CONNECTION=4

CHECKPOINT_DIR="./checkpoints"
DATACACHE_DIR="./data-cache"
TENSORBOARD_DIR="./tensorboard"

mkdir -p ${CHECKPOINT_DIR}
mkdir -p ${DATACACHE_DIR}
mkdir -p ${TENSORBOARD_DIR}

export TRITON_CACHE_DIR="./triton-cache/"
export TRITON_CACHE_MANAGER="megatron.core.ssm.triton_cache_manager:ParallelFileCacheManager"

SEQ_LEN=4096
TRAIN_SAMPLES=73242188  # 300B tokens / 4096
LR_WARMUP_SAMPLES=50000
LR_DECAY_SAMPLES=73192188 # TRAIN_SAMPLES - LR_WARMUP_SAMPLES

options=" \
       --tensor-model-parallel-size ${TENSOR_MODEL_PARALLEL_SIZE} \
       --sequence-parallel \
       --pipeline-model-parallel-size 1 \
       --use-distributed-optimizer \
       --overlap-param-gather \
       --overlap-grad-reduce \
       --untie-embeddings-and-output-weights \
       --init-method-std 0.02 \
       --position-embedding-type none \
       --hybrid-layer-pattern ${HYBRID_LAYER_PATTERN} \
       --hidden-size ${HIDDEN_SIZE} \
       --num-attention-heads ${NUM_ATTENTION_HEADS} \
       --group-query-attention \
       --num-query-groups 8 \
       --seq-length ${SEQ_LEN} \
       --max-position-embeddings ${SEQ_LEN} \
       --train-samples ${TRAIN_SAMPLES} \
       --lr-warmup-samples ${LR_WARMUP_SAMPLES} \
       --lr-decay-samples ${LR_DECAY_SAMPLES} \
       --save ${CHECKPOINT_DIR} \
       --load ${CHECKPOINT_DIR} \
       --data-path ${DATA_PATH} \
       --data-cache-path ${DATACACHE_DIR} \
       --split 99,1,0 \
       --tokenizer-type GPTSentencePieceTokenizer \
       --tokenizer-model ${TOKENIZER_PATH} \
       --distributed-backend nccl \
       --micro-batch-size 4 \
       --global-batch-size ${GLOBAL_BATCH_SIZE} \
       --lr 2.5e-4 \
       --min-lr 2.5e-5 \
       --lr-decay-style cosine \
       --weight-decay 0.1 \
       --clip-grad 1.0 \
       --attention-dropout 0.0 \
       --hidden-dropout 0.0 \
       --disable-bias-linear \
       --normalization RMSNorm \
       --adam-beta1 0.9 \
       --adam-beta2 0.95 \
       --log-interval 10 \
       --save-interval 2000 \
       --eval-interval 2000 \
       --eval-iters 32 \
       --bf16 \
       --use-mcore-models \
       --spec megatron.core.models.mamba.mamba_layer_specs mamba_stack_spec \
       --no-create-attention-mask-in-dataloader \
       --tensorboard-dir ${TENSORBOARD_DIR}"

torchrun --nproc_per_node 8 ../../pretrain_mamba.py ${options}


================================================
FILE: examples/mimo/__init__.py
================================================
 

================================================
FILE: examples/mimo/avlm_inference.py
================================================
import argparse
import os
from pathlib import Path
from typing import Union

# hf path
import requests
import torch
from PIL import Image
from transformers import AutoProcessor
from transformers import AutoTokenizer
import soundfile as sf
import io
import numpy as np
import scipy.signal as signal

from examples.mimo.model_providers.llava_avlm import model_provider_llava_avlm
from megatron.core import dist_checkpointing, parallel_state, tensor_parallel
from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
from megatron.training import print_rank_0
from examples.mimo.data.utils.calculate_audio_tokens import calculate_num_audio_tokens

def init_distributed(tp_size: int = 1, pp_size: int = 1):
    if torch.distributed.is_initialized():
        return
    rank = int(os.environ.get("LOCAL_RANK", 0))
    world_size = int(os.environ.get("WORLD_SIZE", 1))
    torch.cuda.set_device(rank % torch.cuda.device_count())
    torch.distributed.init_process_group("nccl", rank=rank, world_size=world_size)
    parallel_state.initialize_model_parallel(tp_size, pp_size)

def get_input_data(
    processor: AutoProcessor,
    image_processor: AutoProcessor,
    audio_processor: AutoProcessor,
    audio_path: str,
    image_path: str,
    prompt: str,
    device: Union[int, str] = 0):
    """
    Prepare inputs for the MIMO model forward pass.
    """

    def read_audio(audio_path):
        """Process audio file and return tensor."""
        with open(audio_path, 'rb') as f:
            audio_bytes = f.read()
        audio_io = io.BytesIO(audio_bytes)
        waveform, sample_rate = sf.read(audio_io)
        
        # Resample if needed
        fixed_sample_rate = 16000
        if sample_rate != fixed_sample_rate:
            num_samples = int(len(waveform) * fixed_sample_rate / sample_rate)
            waveform = signal.resample(waveform, num_samples)
        
        # Convert to tensor
        audio_tensor = torch.from_numpy(waveform).float()
        return audio_tensor

    def read_image(image_path):
        """Process image file and return tensor."""
        with open(image_path, 'rb') as f:
            image_bytes = f.read()
        image_io = io.BytesIO(image_bytes)
        image = Image.open(image_io)
        image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1)  # Convert to CxHxW format
        image_tensor = image_tensor.float() / 255.0  # rescale to [0,1] range
        return image_tensor


    # read audio and image
    audio_tensor = read_audio(audio_path)
    image_tensor = read_image(image_path)

    # set up prompt
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
            ],
        }
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # process audio
    processed_audios = audio_processor(audio_tensor, sampling_rate=16000)
    processed_audios = torch.tensor(processed_audios["input_features"])
    processed_audios = processed_audios.squeeze(0) # remove batch dim
    num_audio_tokens = calculate_num_audio_tokens(audio_tensor.unsqueeze(0), "openai/whisper-base")
    audios_seq_lengths = torch.tensor(num_audio_tokens)
    prompt = prompt.replace("<audio>", "<audio>" * num_audio_tokens)

    # process image
    processed_images = image_processor(
        images=image_tensor,
        return_tensors="pt",
        do_rescale=False,
    )["pixel_values"]
    processed_images = processed_images.squeeze(0) # remove batch dim

    # process prompt
    processed_prompt_inputs = processor(
        images=image_tensor,
        text=prompt,
        add_special_tokens=False,
        return_tensors="pt",
        do_rescale=False,
    )

    # set batch data
    processed_images = processed_images.unsqueeze(0).to(device)
    processed_audios = processed_audios.unsqueeze(0).to(device)
    audios_seq_lengths = audios_seq_lengths.unsqueeze(0).to(device)
    tokens = processed_prompt_inputs["input_ids"].to(device)
    modality_inputs = {
        "images": {"clip_encoder": {"pixel_values": processed_images}},
        "audios": {"whisper_encoder": {"input_features": processed_audios, "seq_lengths": audios_seq_lengths}}
    }
    batch_data = {
        "tokens": tokens,
        "modality_inputs": modality_inputs,
    }

    return batch_data


def main():
    parser = argparse.ArgumentParser("Test loading a distributed LLaVA checkpoint")
    parser.add_argument("--ckpt", required=False, help="Path to checkpoint optional")
    parser.add_argument("--tp", type=int, default=1, help="Tensor parallel size")
    parser.add_argument("--pp", type=int, default=1, help="Pipeline parallel size")
    parser.add_argument("--audio-path", type=str,required=True, help="Path to audio file")
    parser.add_argument("--image-path", type=str,required=True, help="Path to image file")
    parser.add_argument("--prompt", type=str,required=True, help="Prompt")
    args = parser.parse_args()


    init_distributed(args.tp, args.pp)
    model_parallel_cuda_manual_seed(123)

    device = torch.device("cuda")

    model = model_provider_llava_avlm().to(device)

    # Load distributed checkpoint if provided.
    if args.ckpt:
        load_distributed_checkpoint(model, args.ckpt)

    # set tokenizer
    tokenizer = AutoTokenizer.from_pretrained("llava-hf/llava-1.5-7b-hf")
    tokenizer.add_special_tokens({'additional_special_tokens': ["<audio>"]})
    tokenizer.vocab["<audio>"] = 32002
    tokenizer.added_tokens_encoder["<audio>"] = 32002
    tokenizer.added_tokens_decoder[32002] = "<audio>"

    # set processors
    processor = AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf")
    processor.tokenizer = tokenizer
    image_processor = AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf").image_processor
    audio_processor = AutoProcessor.from_pretrained("openai/whisper-base")
    
    
    data = get_input_data(
        processor,
        image_processor,
        audio_processor,
        args.audio_path,
        args.image_path,
        args.prompt,
        device=device)

    # ------------------------------------------------------------------
    # Greedy generation
    # ------------------------------------------------------------------
    max_new_tokens = 128
    model.eval()    

    tokens = data["tokens"]

    with torch.no_grad():
        for _ in range(max_new_tokens):
            seq_len = tokens.size(1)
            position_ids = torch.arange(seq_len, device=tokens.device).unsqueeze(0)
            logits, _ = model(
                input_ids=tokens,
                position_ids=position_ids,
                attention_mask=None,
                modality_inputs=data["modality_inputs"],
            )
            
            # All-gather logits across tensor parallel ranks
            # logits shape: [batch, seq, vocab_parallel_size]
            gathered_logits = tensor_parallel.gather_from_tensor_model_parallel_region(logits)

            # The language model returns logits in [batch, seq, vocab] format.
            next_token_logits = gathered_logits[:, -1, :]
            next_token = torch.argmax(next_token_logits, dim=-1, keepdim=True)

            tokens = torch.cat([tokens, next_token], dim=1)

            if processor.tokenizer.eos_token_id is not None and next_token.item() == processor.tokenizer.eos_token_id:
                break

    # Only decode and print on rank 0
    if torch.distributed.get_rank() == 0:
        generated_text = processor.tokenizer.decode(tokens[0], skip_special_tokens=True)
        print("\n=== Generated text ===\n")
        print(generated_text)


def load_distributed_checkpoint(model: torch.nn.Module, ckpt_dir: str):
    """Load a MIMO model from a Megatron distributed checkpoint directory"""

    if not os.path.isdir(ckpt_dir):
        raise FileNotFoundError(f"Checkpoint directory does not exist: {ckpt_dir}")


    template_sd = {"model": model.sharded_state_dict()}

    loaded_sd = dist_checkpointing.load(template_sd, ckpt_dir)

    model_state_dict = loaded_sd["model"]
    incompat = model.load_state_dict(model_state_dict, strict=False)

    missing = [k for k in incompat.missing_keys if "extra_state" not in k]
    unexpected = [k for k in incompat.unexpected_keys if "extra_state" not in k]
    if missing or unexpected:
        print_rank_0(
            f"[Rank {torch.distributed.get_rank() if torch.distributed.is_initialized() else 0}] "
            f"Checkpoint loaded with mismatches. Missing: {missing}, Unexpected: {unexpected}"
        )

    print_rank_0(
        f"[Rank {torch.distributed.get_rank() if torch.distributed.is_initialized() else 0}] "
        f"Successfully loaded checkpoint from {ckpt_dir}"
    )

    return model


if __name__ == "__main__":
    main()

================================================
FILE: examples/mimo/configs/llava_avlm.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""
Configuration utilities for the MIMO implementation of the LLaVA AVLM.
"""


from typing import Optional

import torch

from megatron.core.extensions.transformer_engine import (
    TEColumnParallelLinear,
    TERowParallelLinear,
)
from megatron.core.models.gpt.gpt_layer_specs import (
    get_gpt_layer_with_transformer_engine_spec,
)
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_config import TransformerConfig
from examples.mimo.configs.llava_vlm import get_vicuna_language_model_config


def get_llava_projection_config( 
    hidden_size: int = 4096,
    config: Optional[TransformerConfig] = None,
) -> TransformerConfig:
    """Return a TransformerConfig for the vision projection MLP."""

    cfg = TransformerConfig(num_layers=1, hidden_size=hidden_size, num_attention_heads=1)
    cfg.ffn_hidden_size = 4096
    cfg.bias_activation_fusion = True
    cfg.add_bias_linear = True
    cfg.activation_func = torch.nn.functional.gelu

    # Allow caller overrides.
    if config is not None:
        for field, value in vars(config).items():
            setattr(cfg, field, value)

    return cfg


def get_vicuna_language_layer_spec() -> ModuleSpec:
    """Layer spec for the language model (Transformer-Engine GPT block)."""
    return get_gpt_layer_with_transformer_engine_spec()

def get_llava_projection_layer_spec() -> ModuleSpec:
    """Layer spec for the vision-projection MLP."""

    return ModuleSpec(
        module=MLP,
        submodules=MLPSubmodules(
            linear_fc1=TEColumnParallelLinear,
            linear_fc2=TERowParallelLinear,
        ),
    )


================================================
FILE: examples/mimo/configs/llava_vlm.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""
Configuration utilities for the MIMO implementation of the LLaVA VLM.
"""


from typing import Optional

import torch

from megatron.core.extensions.transformer_engine import (
    TEColumnParallelLinear,
    TERowParallelLinear,
)
from megatron.core.models.gpt.gpt_layer_specs import (
    get_gpt_layer_with_transformer_engine_spec,
)
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_config import TransformerConfig


def get_vicuna_language_model_config(  
    config: Optional[TransformerConfig] = None,
) -> TransformerConfig:
    """Return a TransformerConfig tuned for **Vicuna-7B**.

    The hyper-parameters follow the published Vicuna-7B weights (same sizes as
    Llama-7B).
    """

    cfg = TransformerConfig(num_layers=32, hidden_size=4096, num_attention_heads=32)

    # Feed-forward / MLP hidden size (11008 in original Vicuna).
    cfg.ffn_hidden_size = 11008

    # SwiGLU (SiLU-gate) activation.
    cfg.activation_func = torch.nn.functional.silu
    cfg.gated_linear_unit = True

    # Normalisation – RMSNorm
    cfg.normalization = "RMSNorm"
    cfg.rms_norm_eps = 1e-5

    # Positional embeddings – RoPE.
    cfg.position_embedding_type = "rope"
    cfg.rotary_base = 10000
    cfg.rotary_percent = 1.0

    # Sequence length.
    cfg.seq_length = 4096
    cfg.max_position_embeddings = 4096

    # Attention / dropout.
    cfg.attention_dropout = 0.0
    cfg.hidden_dropout = 0.0

    # GQA disabled (queries == heads).
    cfg.num_query_groups = 32

    # Bias usage.
    cfg.add_bias_linear = False

    # Weight sharing.
    cfg.untie_embeddings_and_output_weights = False

    # Kernel / TE fusions.
    cfg.bias_activation_fusion = True
    cfg.masked_softmax_fusion = True
    cfg.persist_layer_norm = True
    cfg.bias_dropout_fusion = True
    cfg.apply_rope_fusion = True

    # Apply user overrides last.
    if config is not None:
        for field, value in vars(config).items():
            setattr(cfg, field, value)

    return cfg

def get_llava_projection_config( 
    hidden_size: int = 4096,
    config: Optional[TransformerConfig] = None,
) -> TransformerConfig:
    """Return a TransformerConfig for the vision projection MLP."""

    cfg = TransformerConfig(num_layers=1, hidden_size=hidden_size, num_attention_heads=1)
    cfg.ffn_hidden_size = 4096
    cfg.bias_activation_fusion = True
    cfg.add_bias_linear = True
    cfg.activation_func = torch.nn.functional.gelu

    # Allow caller overrides.
    if config is not None:
        for field, value in vars(config).items():
            setattr(cfg, field, value)

    return cfg


def get_vicuna_language_layer_spec() -> ModuleSpec:
    """Layer spec for the language model (Transformer-Engine GPT block)."""
    return get_gpt_layer_with_transformer_engine_spec()

def get_llava_projection_layer_spec() -> ModuleSpec:
    """Layer spec for the vision-projection MLP."""

    return ModuleSpec(
        module=MLP,
        submodules=MLPSubmodules(
            linear_fc1=TEColumnParallelLinear,
            linear_fc2=TERowParallelLinear,
        ),
    )


================================================
FILE: examples/mimo/configs/mock.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""
Mock configuration utilities for MIMO model with vision encoder.

This module provides functions to create test configurations for:
1. Language model (based on LLaMA architecture)
2. Vision encoder (based on CLIP ViT)
3. Vision projection (MLP)

These configurations are intended for testing and development purposes only.
"""

from typing import Optional

from megatron.core.extensions.transformer_engine import (
    TEColumnParallelLinear,
    TERowParallelLinear,
)
from megatron.core.models.gpt.gpt_layer_specs import (
    get_gpt_layer_with_transformer_engine_spec,
)
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_config import TransformerConfig


def get_mock_language_model_config(config: Optional[TransformerConfig] = None) -> TransformerConfig:
    """
    Create a mock language model configuration.

    Args:
        config: Optional base configuration to modify

    Returns:
        TransformerConfig: Mock configuration for a language model
    """

    config = TransformerConfig(num_layers=1, hidden_size=128, num_attention_heads=4)

    if config is not None:
        for field_name, field_value in vars(config).items():
            setattr(config, field_name, field_value)

    return config

def get_mock_vision_model_config(config: Optional[TransformerConfig] = None) -> TransformerConfig:
    """
    Create a mock vision model configuration.

    Args:
        config: Optional base configuration to modify

    Returns:
        TransformerConfig: Mock configuration for a vision model
    """
    config = TransformerConfig(num_layers=1, hidden_size=128, num_attention_heads=4)

    config.add_bias_linear = True
    config.add_qkv_bias = True
    config.hidden_dropout = 0.0
    config.attention_dropout = 0.0
    config.ffn_hidden_size = config.hidden_size * 4
    config.gated_linear_unit = False
    config.kv_channels = 64
    config.layernorm_zero_centered_gamma = False
    config.apply_query_key_layer_scaling = False
    config.bias_activation_fusion = False
    config.bias_dropout_fusion = False
    config.attention_softmax_in_fp32 = True
    config.normalization = 'LayerNorm'
    config.apply_rope_fusion = False
    return config


def get_mock_projection_config(hidden_size: int = 128) -> TransformerConfig:
    """
    Create a mock projection layer configuration.

    Args:
        hidden_size: Hidden dimension size (used as the vision projection output size)

    Returns:
        TransformerConfig: Mock configuration for a projection layer
    """
    config = TransformerConfig(num_layers=1, hidden_size=hidden_size, num_attention_heads=1)

    config.ffn_hidden_size = hidden_size * 4
    config.gated_linear_unit = False
    config.bias_activation_fusion = False
    config.add_bias_linear = False
    config.normalization = 'LayerNorm'

    return config


def get_mock_language_layer_spec():
    """
    Get the mock layer specification for the language model.

    Returns:
        ModuleSpec: Mock specification for language model layers
    """
    return get_gpt_layer_with_transformer_engine_spec()


def get_mock_vision_layer_spec():
    """
    Get the mock layer specification for the vision model.

    Args:
        normalization: Type of normalization to use

    Returns:
        ModuleSpec: Mock specification for vision model layers
    """
    return get_gpt_layer_with_transformer_engine_spec()


def get_mock_projection_layer_spec():
    """
    Get the mock layer specification for the projection layer.

    Returns:
        ModuleSpec: Mock specification for projection layers
    """
    return ModuleSpec(
        module=MLP,
        submodules=MLPSubmodules(linear_fc1=TEColumnParallelLinear, linear_fc2=TERowParallelLinear),
    )


================================================
FILE: examples/mimo/data/__init__.py
================================================
from .energon_avlm_task_encoder import VisionAudioQASample

all = [
    VisionAudioQASample,
]


================================================
FILE: examples/mimo/data/avlm_sample_loader.py
================================================
import io

def sample_loader(raw: dict) -> dict:
    """
    Load the sample from the raw data.
    Example content of wds data:
        - Example of wds data
            100_100.img
            100_100.json
            100_100.synthesized_683016ff-bb67-4a12-a873-a7e5d4132903.wav.flac
            100_100.synthesized_6a7b3a1c-05a9-4720-b7e6-f7028415dc71.wav.flac
            100_100.synthesized_7c4b39ba-20ef-4319-bd94-291c94ad362a.wav.flac
            100_100.synthesized_932f067b-2b9a-425d-b8e6-f74ca83335a2.wav.flac
            Content of 100_100.json: 
                {
                    "num_image": 7,
                    "length": 2049,
                    "label_length": 157,
                    "conversations": [
                        {
                            "from": "human",
                            "value": "<image><audio>"
                        },
                        {
                            "from": "gpt",
                            "value": "The stop sign is red, while the one-way signs are typically black and white."
                        },
                        ...
                        {
                            "from": "human",
                            "value": "<audio>"
                        },
                        {
                            "from": "gpt",
                            "value": "The purpose of these street signs is to communicate..."
                        }
                    ],
                    "audios": [
                        "100_100.synthesized_6a7b3a1c-05a9-4720-b7e6-f7028415dc71.wav.flac",
                        "100_100.synthesized_932f067b-2b9a-425d-b8e6-f74ca83335a2.wav.flac",
                        "100_100.synthesized_7c4b39ba-20ef-4319-bd94-291c94ad362a.wav.flac",
                        "100_100.synthesized_683016ff-bb67-4a12-a873-a7e5d4132903.wav.flac"
                    ],
                    "images": [
                        "100_100.img"
                    ]
                }
        - Example of read raw data for Energon
            raw["json"] -> json content
            raw["img"] -> bytes image content
            raw["synthesized_6a7b3a1c-05a9-4720-b7e6-f7028415dc71.wav.flac"] -> bytes audio content
            raw["synthesized_932f067b-2b9a-425d-b8e6-f74ca83335a2.wav.flac"] -> bytes audio content
            raw["synthesized_683016ff-bb67-4a12-a873-a7e5d4132903.wav.flac"] -> bytes audio content
    """

    jsn_content = raw["json"]
    conversation = jsn_content["conversations"]
    
    # Get raw image bytes
    image_bytes = raw["img"]

    # Get raw audio bytes
    audio_names = [audio_name.split('.', 1)[1] for audio_name in jsn_content["audios"]] # example of audio_name file: "100_100.synthesized_683016ff-bb67-4a12-a873-a7e5d4132903.wav.flac" -> "synthesized_683016ff-bb67-4a12-a873-a7e5d4132903.wav"
    audio_name = audio_names[0]  # for now, take only the first audio
    audio_bytes = raw[audio_name]

    # process conversation, take only the first turn
    context = conversation[0]["value"]
    answers = conversation[1]["value"]

    # check that context only contains one "<audio>"
    # if contains more than one, replace all occurrences with a single one
    if context.count("<audio>") > 1:
        parts = context.split("<audio>")
        context = parts[0] + "<audio>" + "".join(parts[1:])

    return dict(
        __key__=raw["__key__"],
        context=context,
        answers=answers,
        image=image_bytes,
        audio=audio_bytes,
    )

def part_filter(part: str) -> bool:
    return True

================================================
FILE: examples/mimo/data/energon_avlm_task_encoder.py
================================================
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import argparse
import logging
import os
import sys
from dataclasses import dataclass
from enum import Enum
from typing import Dict, List, Protocol, Union

import torch
import torch.nn.utils.rnn as rnn_utils
from PIL import Image
import numpy as np
import soundfile as sf
from scipy import signal
import io
from megatron.training.global_vars import get_tokenizer

sys.path.append(
    os.path.abspath(
        os.path.join(
            os.path.dirname(__file__),
            os.path.pardir,
            os.path.pardir,
            os.path.pardir,
            "examples/multimodal",
        )
    )
)
from dataloader_provider import train_valid_test_dataloaders_provider
from transformers import AutoProcessor
from examples.mimo.data.utils.calculate_audio_tokens import calculate_num_audio_tokens

from megatron.energon import (
    DefaultTaskEncoder,
    VQASample,
    WorkerConfig,
    get_loader,
    get_train_dataset,
)
from megatron.energon.task_encoder.base import stateless
from megatron.training import get_args

IMAGE_TOKEN = "<image>"
AUDIO_TOKEN = "<audio>"


@dataclass
class ConversationTemplateConfig:
    system: str = None
    chat_template: str = None


@dataclass
class LlavaConversationTemplateConfig(ConversationTemplateConfig):
    """Default system prompt and chat template for Llava training."""

    system: str = None
    chat_template: str = None


@dataclass
class VisionAudioQASample(VQASample):
    """
    Sample type for vision audio question answering.
    Adding audio to the VQASample class.
    """

    #: The input audio tensor in the shape
    audio: torch.Tensor = None


class AVLMModelType(Enum):
    IMAGE_AUDIO_LLAVA_AVLM = "image_audio_llava_avlm"


class AVLMTaskEncoder(
    DefaultTaskEncoder[
        VisionAudioQASample,
        dict,
        dict,
        dict,
    ]
):
    def __init__(
        self,
        model_type: AVLMModelType,
        processor,
        image_processor,
        audio_processor,
        conversation_template_config=None,
    ):
        self.model_type = model_type
        self.processor = processor
        self.image_processor = image_processor
        self.audio_processor = audio_processor
        self.conversation_template_config = conversation_template_config

    def apply_prompt_template(self, input_text: VisionAudioQASample):
        """Create conversation prompt string using HF chat template.

        The first user turn always contains an image placeholder, later turns are text-only.
        Returns a *prompt string* that can be fed into the processor together with an image.
        """

        user_msgs = input_text.context
        bot_msgs = input_text.answers

        def _ensure_list_type(value):
            if isinstance(value, list):
                return value
            return [value]

        user_msgs = _ensure_list_type(user_msgs)
        bot_msgs = _ensure_list_type(bot_msgs)

        conversation = []
        for _, (u_txt, b_txt) in enumerate(zip(user_msgs, bot_msgs)):
            conversation.append(
                {
                    "role": "user",
                    "content": [{"type": "text", "text": u_txt}],
                }
            )
            conversation.append(
                {
                    "role": "assistant",
                    "content": [{"type": "text", "text": b_txt}],
                }
            )

        # Inject optional system message
        if (
            self.conversation_template_config
            and self.conversation_template_config.system
        ):
            conversation.insert(
                0,
                {"role": "system", "content": self.conversation_template_config.system},
            )

        # Select chat template
        if (
            self.conversation_template_config
            and self.conversation_template_config.chat_template
        ):
            self.processor.chat_template = (
                self.conversation_template_config.chat_template
            )
        return self.processor.apply_chat_template(
            conversation,
            tokenize=False,
            add_generation_prompt=False,
        )

    def _find_pattern_indices(
        self, template, pattern, start_idx=0, allow_first_mismatch=False
    ):
        template_len = len(template)
        pat_len = len(pattern)
        for i in range(start_idx, template_len - pat_len + 1):
            match = template[i : i + pat_len] == pattern
            if torch.all(match) or (allow_first_mismatch and torch.all(match[1:])):
                return i, i + pat_len
        return -1, -1

    @stateless
    def encode_sample(self, sample: VisionAudioQASample):
        """Return tokenised multimodal sample."""
        args = get_args()
        prompt = self.apply_prompt_template(sample)
        logging.debug(f"prompt: {prompt}")

        # Convert raw image bytes to tensor
        if sample.image is not None:
            image_io = io.BytesIO(sample.image)
            image = Image.open(image_io)
            image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1)  # Convert to CxHxW format
            image_tensor = image_tensor.float() / 255.0  # rescale to [0,1] range
        else:
            image_tensor = None

        # Convert raw audio data to tensor
        if sample.audio is not None:
            audio_io = io.BytesIO(sample.audio)
            # Read audio first
            waveform, sample_rate = sf.read(audio_io)
            # Resample if needed
            fixed_sample_rate = 16000
            if sample_rate != fixed_sample_rate:
                # Calculate number of samples for 16kHz
                num_samples = int(len(waveform) * fixed_sample_rate / sample_rate)
                # Resample using scipy's resample
                waveform = signal.resample(waveform, num_samples)
            # Convert to tensor
            audio_tensor = torch.from_numpy(waveform).float()
        else:
            audio_tensor = None

        # Process audio + prompt
        # Here, we:
        #  + process the audio
        #  + manually calculate the number of audio tokens, then add them to the prompt
        if audio_tensor is not None:
            processed_audios = self.audio_processor(audio_tensor, sampling_rate=fixed_sample_rate)
            processed_audios = torch.tensor(processed_audios["input_features"])
            processed_audios = processed_audios.squeeze(0)  # remove batch dim
            num_audio_tokens = calculate_num_audio_tokens(audio_tensor.unsqueeze(0), args.audio_encoder_model)
            audios_seq_lengths = torch.tensor(num_audio_tokens)
            processed_prompt = prompt.replace(AUDIO_TOKEN, AUDIO_TOKEN * num_audio_tokens)
        else:
            processed_audios = None
            audios_seq_lengths = None
            processed_prompt = prompt

        # Process image + prompt
        # Here, we:
        #  + process the image
        #  + use self.processor to automatically calculate the number  
        #    of image tokens, then add them to the prompt
        #    => this step combine adding the corresponding image tokens to the prompt AND
        #       tokenize the prompt after that
        if image_tensor is not None:
            processed_images = self.image_processor(
                images=image_tensor,
                return_tensors="pt",
                do_rescale=False,
            )["pixel_values"]
            processed_images = processed_images.squeeze(0)  # remove batch dim
        else:
            processed_images = None

        processed_prompt_inputs = self.processor(
            images=image_tensor,
            text=processed_prompt,
            add_special_tokens=False,
            return_tensors="pt",
            do_rescale=False,
        )
        
        # Remove batch dim
        for k, v in processed_prompt_inputs.items():
            processed_prompt_inputs[k] = v.squeeze(0)

        # Combine image and audio processed data
        inputs = {
            "input_ids": processed_prompt_inputs["input_ids"],
            "attention_mask": processed_prompt_inputs["attention_mask"],
        }

        if processed_images is not None:
            inputs["images"] = processed_images

        if processed_audios is not None:
            inputs["audios"] = processed_audios
            inputs["audios_seq_lengths"] = audios_seq_lengths

        answers = sample.answers
        if answers:
            if not isinstance(answers, list):
                answers = [answers]
            tokenizer = self.processor.tokenizer
            inputs["labels"] = torch.full_like(inputs["input_ids"], fill_value=-100)
            search_idx = 0
            for ans in answers:
                answer_tokens = tokenizer.encode(
                    ans, add_special_tokens=False, return_tensors="pt"
                )[0]
                s_idx, e_idx = self._find_pattern_indices(
                    inputs["input_ids"], answer_tokens, search_idx
                )
                if s_idx == -1:
                    raise ValueError(f"Answer not found in input_ids: {ans}")
                inputs["labels"][s_idx:e_idx] = inputs["input_ids"][s_idx:e_idx]
                search_idx = e_idx

            # shift inputs and labels by 1
            inputs["input_ids"] = inputs["input_ids"][:-1]
            inputs["labels"] = inputs["labels"][1:]
            inputs["loss_mask"] = (inputs["labels"] != -100).long()

        else:
            inputs["labels"] = None
            inputs["loss_mask"] = None

        return inputs

    def batch(self, samples: List[Dict]) -> Dict:
        """Pad/stack individual samples into a single batch dict."""

        if not samples:
            return {}

        batched: Dict[str, torch.Tensor] = {}
        keys = samples[0].keys()

        for key in keys:
            values = [s[key] for s in samples if key in s and s[key] is not None]

            processor = KEY_PROCESSORS.get(key)
            if processor is not None:
                batched[key] = processor(values)
                continue

            # Fallback behaviours if no specific processor is registered.
            if isinstance(values[0], torch.Tensor):
                batched[key] = torch.stack(values, dim=0)
            else:
                batched[key] = values

        return batched

    def encode_batch_avlm_clip_whisper_llava(self, batch_data: Dict) -> Dict:
        input_ids = batch_data["input_ids"]
        labels = batch_data.get("labels")
        loss_mask = batch_data.get("loss_mask")

        seq_len = input_ids.size(1)
        position_ids = torch.arange(seq_len, dtype=torch.long, device=input_ids.device)
        position_ids = position_ids.unsqueeze(0).repeat(input_ids.size(0), 1)

        images = batch_data.get("images")
        audios = batch_data.get("audios")
        audios_seq_lengths = batch_data.get("audios_seq_lengths")

        output = {
            "input_ids": input_ids,
            "labels": labels,
            "loss_mask": loss_mask,
            "position_ids": position_ids,
        }

        if images is not None:
            output["modality_inputs"] = {
                "images": {"clip_encoder": {"pixel_values": images}}
            }

        if audios is not None:
            if "modality_inputs" not in output:
                output["modality_inputs"] = {}
            output["modality_inputs"]["audios"] = {
                "whisper_encoder": {
                    "input_features": audios,
                    "seq_lengths": audios_seq_lengths
                }
            }

        return output

    def encode_batch(self, batch_data: Dict) -> dict:
        if self.model_type is AVLMModelType.IMAGE_AUDIO_LLAVA_AVLM:
            return self.encode_batch_avlm_clip_whisper_llava(batch_data)
        else:
            raise ValueError(f"Model type {self.model_type} not supported")


def llava_avlm_dataloader_provider(train_val_test_num_samples):
    args = get_args()

    # update global tokenizer if hf_assign_unused_tokens is set
    if args.hf_assign_unused_tokens:
        _tokenizer = get_tokenizer()._tokenizer
        for token_id_pair in args.hf_assign_unused_tokens:
            token, id_str = token_id_pair.split(',')
            id = int(id_str)
            _tokenizer.add_special_tokens({'additional_special_tokens': [token]})
            _tokenizer.vocab[token] = id
            _tokenizer.added_tokens_encoder[token] = id
            _tokenizer.added_tokens_decoder[id] = token
        get_tokenizer()._tokenizer = _tokenizer

    tokenizer_model_id = args.tokenizer_model
    processor = AutoProcessor.from_pretrained(tokenizer_model_id)
    processor.tokenizer = get_tokenizer()._tokenizer  # update processor to use custom tokenizer
    image_processor = AutoProcessor.from_pretrained(tokenizer_model_id).image_processor
    audio_processor = AutoProcessor.from_pretrained(args.audio_encoder_model)

    return train_valid_test_dataloaders_provider(
        train_val_test_num_samples,
        task_encoder=AVLMTaskEncoder(
            model_type=AVLMModelType.IMAGE_AUDIO_LLAVA_AVLM,
            processor=processor,
            image_processor=image_processor,
            audio_processor=audio_processor,
            conversation_template_config=LlavaConversationTemplateConfig(),
        ),
    )


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--data_path",
        type=str,
        required=True,
        help="path to the dataset directory in energon format",
    )
    args = parser.parse_args()
    
    # for image audio llava avlm
    hf_model_id = "llava-hf/llava-1.5-7b-hf"
    model_type = AVLMModelType.IMAGE_AUDIO_LLAVA_AVLM

    processor = AutoProcessor.from_pretrained(hf_model_id)
    processor.tokenizer = get_tokenizer()._tokenizer
    image_processor = AutoProcessor.from_pretrained(hf_model_id).image_processor
    audio_processor = AutoProcessor.from_pretrained("openai/whisper-small")

    worker_config = WorkerConfig.default_worker_config(0)
    train_loader = get_loader(
        get_train_dataset(
            args.data_path,
            batch_size=8,
            shuffle_buffer_size=None,
            max_samples_per_sequence=None,
            task_encoder=AVLMTaskEncoder(
                model_type=model_type,
                processor=processor,
                image_processor=image_processor,
                audio_processor=audio_processor,
                conversation_template_config=LlavaConversationTemplateConfig(),
            ),
            worker_config=worker_config,
        ),
        worker_config=worker_config,
    )

    print(f"data loader length {len(train_loader)}")
    for index, each_batch in enumerate(train_loader):
        print(f"batch index {index} tokens {each_batch['input_ids']}")
        if 'modality_inputs' in each_batch:
            if 'images' in each_batch['modality_inputs']:
                print(f"images shape: {each_batch['modality_inputs']['images']['clip_encoder']['pixel_values'].shape}")
            if 'audios' in each_batch['modality_inputs']:
                print(f"audios shape: {each_batch['modality_inputs']['audios']['whisper_encoder']['input_features'].shape}")
        break


# -----------------------------------------------------------------------------
# Key processing utilities for batching
# -----------------------------------------------------------------------------


class KeyProcessor(Protocol):
    """Callable that aggregates a list of tensors into a single batched tensor."""

    def __call__(self, values: List[torch.Tensor]) -> torch.Tensor:  # pragma: no cover
        ...


class StackProcessor:
    """Simply stack tensors along a given dimension."""

    def __init__(self, dim: int = 0):
        self.dim = dim

    def __call__(self, values: List[torch.Tensor]) -> torch.Tensor:
        return torch.stack(values, dim=self.dim)


class PaddingProcessor:
    """Pad variable-length sequences to the same length."""

    def __init__(self, pad_value: int, batch_first: bool = True):
        self.pad_value = pad_value
        self.batch_first = batch_first

    def __call__(self, values: List[torch.Tensor]) -> torch.Tensor:
        return rnn_utils.pad_sequence(
            values, batch_first=self.batch_first, padding_value=self.pad_value
        )


# Registry mapping sample keys to their corresponding processor.
KEY_PROCESSORS: Dict[str, KeyProcessor] = {
    "images": StackProcessor(),
    "audios": StackProcessor(),
    "audios_seq_lengths": StackProcessor(),
    "input_ids": PaddingProcessor(pad_value=0),
    "attention_mask": PaddingProcessor(pad_value=0),
    "loss_mask": PaddingProcessor(pad_value=0),
    "labels": PaddingProcessor(pad_value=-100),
} 

================================================
FILE: examples/mimo/data/energon_vlm_task_encoder.py
================================================
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import argparse
import logging
import os
import sys
from dataclasses import dataclass
from enum import Enum
from typing import Dict, List, Union, Iterable, Tuple, Optional, Protocol
import heapq
import torch
import torch.nn.utils.rnn as rnn_utils

# TODO: ykarnati, use absolute import or 
# define train_valid_test_dataloaders_provider in here
sys.path.append(
    os.path.abspath(
        os.path.join(
            os.path.dirname(__file__),
            os.path.pardir,
            os.path.pardir,
            os.path.pardir,
            "examples/multimodal",
        )
    )
)
from dataloader_provider import train_valid_test_dataloaders_provider
from transformers import AutoProcessor

from megatron.core.packed_seq_params import PackedSeqParams
from megatron.energon import (
    DefaultTaskEncoder,
    VQASample,
    WorkerConfig,
    get_loader,
    get_train_dataset,
)
from megatron.energon.task_encoder.base import stateless
from megatron.training import get_args
from megatron.core.models.multimodal import context_parallel


@dataclass
class ConversationTemplateConfig:
    system: str = None
    chat_template: str = None


@dataclass
class LlavaConversationTemplateConfig(ConversationTemplateConfig):
    """Default system prompt and chat template for Llava training."""

    system: str = None
    chat_template: str = None

class ModelType(Enum):
    LLAVA_VLM = "llava_vlm"
    VIDEO_LLAVA_VLM = "video_llava_vlm"

def predict_seq_len_with_padding(instance_tokens: torch.Tensor, pad_to_multiple_of: int = 64) -> int:
    """Get seqlen with padding.
    Args:
        instance_tokens (torch.Tensor): Tensor of instance tokens.
        pad_to_multiple_of (int): Pad to multiple of this value.
    Returns:
        int: Padded sequence length.
    """
    seqlen = len(instance_tokens)
    seqlen_padded = (seqlen + pad_to_multiple_of - 1) // pad_to_multiple_of * pad_to_multiple_of
    return seqlen_padded

def group_samples(samples: List[Dict[str, torch.Tensor]], 
                  group_size: int, 
                  lengths: List[int],
                  ) -> List[List[Dict[str, torch.Tensor]]]:
    """Group samples into groups of size group_size.

    Args:
        samples (List[Dict[str, torch.Tensor]]): List of samples to group.
        group_size (int): Maximum size of each group.
        lengths (List[int]): List of lengths of each sample.

    Returns:
        List[List[Dict[str, torch.Tensor]]]: List of groups, where each group is a list of samples
                that should be packed together. Each group's total length will not exceed group_size.
    """
    # create a max heap of the lengths
    max_heap: List[Tuple[int, int]] = [(-length, i) for i, length in enumerate(lengths)]
    heapq.heapify(max_heap)

    groups: List[List[Dict[str, torch.Tensor]]] = []
    current_group: List[Dict[str, torch.Tensor]] = []
    current_length: int = 0
    while max_heap:
        neg_length, i = heapq.heappop(max_heap)
        length = -neg_length
        if current_length + length <= group_size:
            current_group.append(samples[i])
            current_length += length
        else:
            groups.append(current_group)
            current_group = [samples[i]]
            current_length = length
    # If we're at the end of the samples, add the last group
    if current_group:
        groups.append(current_group)
    return groups

class VLMTaskEncoder(
    DefaultTaskEncoder[
        Union[VQASample],
        dict,
        dict,
        dict,
    ]
):
    def __init__(
        self,
        model_type: ModelType,
        processor,
        conversation_template_config: Optional[ConversationTemplateConfig] = None,
        max_seq_length: Optional[int] = None,
    ):
        """Initialize VLMTaskEncoder.

        Args:
            model_name (str): Model name, currently only "llava_vlm" is supported.
            processor: HuggingFace processor for the model.
            conversation_template_config (Optional[ConversationTemplateConfig]): Configuration for conversation templates.
            max_seq_length (Optional[int]): Maximum sequence length for packing. Should be sum of max_text_length
                and image_seq_length. If None, defaults to 4096. This value is used as group_size for sequence packing.
        """
        self.model_type = model_type
        # Use max_seq_length if provided, otherwise default to 4096
        self.group_size = max_seq_length if max_seq_length is not None else 4096
        self.processor = processor
        self.conversation_template_config = conversation_template_config
        # Read parallelism settings directly from training args (these live in TransformerConfig).
        _args = get_args()
        self._cp_size = getattr(_args, 'context_parallel_size', 1)
        self._tp_size = getattr(_args, 'tensor_model_parallel_size', 1)
        self._sequence_parallel = getattr(_args, 'sequence_parallel', False)

    def apply_prompt_template(self, input_text: VQASample):
        """Create conversation prompt string using HF chat template.

        The first user turn always contains an image placeholder, later turns are text-only.
        Returns a *prompt string* that can be fed into the processor together with an image.
        """

        user_msgs = input_text.context
        bot_msgs = input_text.answers

        def _ensure_list_type(value):
            if isinstance(value, list):
                return value
            return [value]
        user_msgs = _ensure_list_type(user_msgs)
        bot_msgs = _ensure_list_type(bot_msgs)

        conversation = []
        for _, (u_txt, b_txt) in enumerate(zip(user_msgs, bot_msgs)):
            conversation.append(
                {
                    "role": "user",
                    "content": [{"type": "text", "text": u_txt}],
                }
            )
            conversation.append(
                {
                    "role": "assistant",
                    "content": [{"type": "text", "text": b_txt}],
                }
            )

        # Inject optional system message
        if (
            self.conversation_template_config
            and self.conversation_template_config.system
        ):
            conversation.insert(
                0,
                {"role": "system", "content": self.conversation_template_config.system},
            )

        # Select chat template
        if (
            self.conversation_template_config
            and self.conversation_template_config.chat_template
        ):
            self.processor.chat_template = (
                self.conversation_template_config.chat_template
            )
        return self.processor.apply_chat_template(
            conversation,
            tokenize=False,
            add_generation_prompt=False,
        )
    

    def _find_pattern_indices(
        self, template, pattern, start_idx=0, allow_first_mismatch=False
    ):
        template_len = len(template)
        pat_len = len(pattern)
        for i in range(start_idx, template_len - pat_len + 1):
            match = template[i : i + pat_len] == pattern
            if torch.all(match) or (allow_first_mismatch and torch.all(match[1:])):
                return i, i + pat_len
        return -1, -1

    def select_samples_to_pack(self, samples: List[Dict[str, torch.Tensor]]) -> List[List[Dict[str, torch.Tensor]]]:
        """Selects which samples will be packed together.
        
        This function receives a list of samples (size according to the selected packing_buffer_size), 
        and partitions those samples into groups that shall be packed together.

        Args:
            samples (List[Dict[str, torch.Tensor]]): List of samples from the buffer, each containing
                tokenized data with keys like 'input_ids', 'labels', 'loss_mask', etc.

        Returns:
            List[List[Dict[str, torch.Tensor]]]: List of groups, where each group is a list of samples
                that should be packed together. Each group's total length will not exceed group_size.

        NOTE: Energon dataloader calls this method internally if packing is used.
        Please see https://nvidia.github.io/Megatron-Energon/advanced/packing.html
        """
        # Group samples into groups of size group_size
        lengths = [predict_seq_len_with_padding(sample["input_ids"]) for sample in samples]
        # lengths = [sample["input_ids"].size(0) for sample in samples]
        packed_samples = group_samples(samples, group_size=self.group_size, lengths=lengths)
        return packed_samples
    
    @stateless
    def pack_selected_samples(self, samples: List[Dict[str, torch.Tensor]]) -> Dict[str, torch.Tensor]:
        """Implements how a group of samples will be mapped to a single sample.
        
        Args:
            samples (List[Dict[str, torch.Tensor]]): List of samples to pack together.

        Returns:
            Dict[str, torch.Tensor]: Packed sample with keys like 'input_ids', 'labels', 'loss_mask', etc.
        """
        # Pad each sample to a multiple of 64, then concatenate
        padded_input_ids = []
        padded_labels = []
        padded_loss_masks = []
        padded_lens = []

        has_labels = "labels" in samples[0] and samples[0]["labels"] is not None
        has_loss_mask = "loss_mask" in samples[0] and samples[0]["loss_mask"] is not None

        for sample in samples:
            original_len = sample["input_ids"].size(0)
            padded_len = predict_seq_len_with_padding(sample["input_ids"])
            padded_lens.append(padded_len)
            pad_amount = padded_len - original_len

            padded_input_ids.append(torch.cat([
                sample["input_ids"],
                torch.zeros(pad_amount, dtype=sample["input_ids"].dtype)
            ]))
            
            if has_labels:
                padded_labels.append(torch.cat([
                    sample["labels"],
                    torch.full((pad_amount,), -100, dtype=sample["labels"].dtype)
                ]))

            if has_loss_mask:
                padded_loss_masks.append(torch.cat([
                    sample["loss_mask"],
                    torch.zeros(pad_amount, dtype=sample["loss_mask"].dtype)
                ]))

        # Concatenate sequences
        input_ids = torch.cat(padded_input_ids)
        labels = torch.cat(padded_labels) if has_labels else None
        loss_mask = torch.cat(padded_loss_masks) if has_loss_mask else None
        
        batched_images = torch.stack([s["pixel_values"] for s in samples], dim=0)   # (B , C , H , W)
        
        # Calculate padding if context parallel or sequence parallel is enabled
        pad_len = 0
        if self._cp_size > 1 or self._sequence_parallel:
            pad_len = context_parallel.get_padding(
                len(input_ids),
                self._cp_size,
                self._tp_size,
                self._sequence_parallel,
            )
        
        # Pad sequences
        if pad_len > 0:
            input_ids = torch.cat([input_ids, torch.zeros(pad_len, dtype=input_ids.dtype)])
            if labels is not None:
                labels = torch.cat([labels, torch.full((pad_len,), -100, dtype=labels.dtype)])
            if loss_mask is not None:
                loss_mask = torch.cat([loss_mask, torch.zeros(pad_len, dtype=loss_mask.dtype)])
        
        # Generate position_ids after padding
        position_ids = torch.arange(len(input_ids))
        
        # Calculate cu_seqlens using padded lengths
        lens = torch.tensor(padded_lens, dtype=torch.int32)
        cu_seqlens = torch.cat([torch.tensor([0], dtype=torch.int32), torch.cumsum(lens, dim=0)])
        
        # Calculate padded sequence lengths and cu_seqlens
        seqlens_padded = [l.item() + pad_len for l in lens]
        cu_seqlens_padded = torch.cat([torch.tensor([0], dtype=torch.int32), torch.cumsum(torch.tensor(seqlens_padded, dtype=torch.int32), dim=0)])
        
        packing_kwargs = {
            "cu_seqlens_q": cu_seqlens,
            "cu_seqlens_kv": cu_seqlens,
            "cu_seqlens_q_padded": cu_seqlens_padded,
            "cu_seqlens_kv_padded": cu_seqlens_padded,
            "max_seqlen_q": torch.tensor(max(seqlens_padded), dtype=torch.int32),
            "max_seqlen_kv": torch.tensor(max(seqlens_padded), dtype=torch.int32),
        }
        
        packed_result = {
            "input_ids": input_ids,
            "labels": labels,
            "loss_mask": loss_mask,
            "pixel_values": batched_images,
            "position_ids": position_ids,
            "packing_kwargs": packing_kwargs,
        }
            
        return packed_result

    @stateless
    def encode_sample(self, sample: VQASample):
        """Return tokenised multimodal sample."""
        # Build prompt
        prompt = self.apply_prompt_template(sample)
        logging.debug(f"prompt: {prompt}")

        # Process image + prompt
        inputs = self.processor(
            images=getattr(sample, "image", None),
            text=prompt,
            add_special_tokens=False,
            return_tensors="pt",
            do_rescale=False,
        )

        # Remove batch dim
        for k, v in inputs.items():
            inputs[k] = v.squeeze(0)


        answers = sample.answers
        if answers:
            if not isinstance(answers, list):
                answers = [answers]
            tokenizer = self.processor.tokenizer
            inputs["labels"] = torch.full_like(inputs["input_ids"], fill_value=-100)
            search_idx = 0
            for ans in answers:
                answer_tokens = tokenizer.encode(
                    ans, add_special_tokens=False, return_tensors="pt"
                )[0]
                s_idx, e_idx = self._find_pattern_indices(
                    inputs["input_ids"], answer_tokens, search_idx
                )
                if s_idx == -1:
                    raise ValueError(f"Answer not found in input_ids: {ans}")
                inputs["labels"][s_idx:e_idx] = inputs["input_ids"][s_idx:e_idx]
                search_idx = e_idx

            # shift inputs and labels by 1
            inputs["input_ids"] = inputs["input_ids"][:-1]
            inputs["labels"] = inputs["labels"][1:]
            inputs["loss_mask"] = (inputs["labels"] != -100).long()

        else:
            inputs["labels"] = None
            inputs["loss_mask"] = None
        
        return inputs

    def batch(self, samples: List[Dict]) -> Dict:
        """Pad/stack individual samples into a single batch dict."""
        if not samples:
            return {}

        batched: Dict[str, torch.Tensor] = {}
        keys = samples[0].keys()
        is_packed_sample = "packing_kwargs" in samples[0]
        for key in keys:
            values = [s[key] for s in samples if key in s and s[key] is not None]

            processor = KEY_PROCESSORS.get(key)
            if processor is not None:
               batched[key] = processor(values, max_len=self.group_size, is_packed_sample=is_packed_sample) 
               continue
            
            # Fallback behaviours if no specific processor is registered.
            if isinstance(values[0], torch.Tensor):
               batched[key] = torch.stack(values, dim=0)
            else:
               batched[key] = values
        
        # Add context parallel padding if enabled
        if self._cp_size > 1:
            seq_len = batched["input_ids"].size(1)
            pad_len = context_parallel.get_padding(
                seq_len,
                self._cp_size,
                self._tp_size,
                self._sequence_parallel,
            )
            if pad_len > 0:
                # Pad input_ids
                batched["input_ids"] = torch.cat([
                    batched["input_ids"],
                    torch.zeros(batched["input_ids"].size(0), pad_len, dtype=batched["input_ids"].dtype)
                ], dim=1)
                # Pad labels
                if "labels" in batched:
                    batched["labels"] = torch.cat([
                        batched["labels"],
                        torch.full((batched["labels"].size(0), pad_len), -100, dtype=batched["labels"].dtype)
                    ], dim=1)
                # Pad loss_mask
                if "loss_mask" in batched:
                    batched["loss_mask"] = torch.cat([
                        batched["loss_mask"],
                        torch.zeros(batched["loss_mask"].size(0), pad_len, dtype=batched["loss_mask"].dtype)
                    ], dim=1)
        
        return batched

    def encode_batch_vlm_clip_llava(self, batch_data: Dict) -> Dict:
        input_ids = batch_data["input_ids"]
        labels = batch_data.get("labels")
        loss_mask = batch_data.get("loss_mask")

        # Handle packed-sample case where input_ids is 1-D
        if input_ids.dim() == 1:
            input_ids = input_ids.unsqueeze(0)  # add batch dimension
            if labels is not None and labels.dim() == 1:
                labels = labels.unsqueeze(0)
            if loss_mask is not None and loss_mask.dim() == 1:
                loss_mask = loss_mask.unsqueeze(0)

        seq_len = input_ids.size(1)
        position_ids = torch.arange(seq_len, dtype=torch.long, device=input_ids.device)
        position_ids = position_ids.unsqueeze(0).repeat(input_ids.size(0), 1)
        pixel_values = batch_data.get("pixel_values")

        output = {
            "input_ids": input_ids,
            "labels": labels,
            "loss_mask": loss_mask,
            "position_ids": position_ids,
        }

        if pixel_values is not None:
            output["modality_inputs"] = {
                "images": {"clip_encoder": {"pixel_values": pixel_values}}
            }
        
        return output

    def encode_batch_vlm_clip_llava_video(self, batch_data: Dict) -> Dict:
        input_ids = batch_data["input_ids"]
        labels = batch_data.get("labels")
        loss_mask = batch_data.get("loss_mask")

        seq_len = input_ids.size(1)
        position_ids = torch.arange(seq_len, dtype=torch.long, device=input_ids.device)
        position_ids = position_ids.unsqueeze(0).repeat(input_ids.size(0), 1)

        pixel_values_videos = batch_data.get("pixel_values_videos")

        output = {
            "input_ids": input_ids,
            "labels": labels,
            "loss_mask": loss_mask,
            "position_ids": position_ids,
        }

        if pixel_values_videos is not None:
            output["modality_inputs"] = {
                "images": {"clip_encoder": {"pixel_values": pixel_values_videos}}
            }

        return output

    def encode_batch(self, batch_data: Dict) -> dict:
        if self.model_type is ModelType.LLAVA_VLM:
            return self.encode_batch_vlm_clip_llava(batch_data)
        elif self.model_type is ModelType.VIDEO_LLAVA_VLM:
            return self.encode_batch_vlm_clip_llava_video(batch_data)
        else:
            raise ValueError(f"Model type {self.model_type} not supported")

def llava_vlm_dataloader_provider(train_val_test_num_samples, max_seq_length: Optional[int] = None, is_video_input: bool = False):
    args = get_args()
    tokenizer_model_id = args.tokenizer_model
    processor = AutoProcessor.from_pretrained(tokenizer_model_id)
    if is_video_input:
        model_type = ModelType.VIDEO_LLAVA_VLM
    else:
        model_type = ModelType.LLAVA_VLM
    return train_valid_test_dataloaders_provider(
        train_val_test_num_samples,
        task_encoder=VLMTaskEncoder(
            model_type=model_type,
            processor=processor,
            conversation_template_config=LlavaConversationTemplateConfig(),
            max_seq_length=max_seq_length,
        )
    )


if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--data_path",
        type=str,
        required=True,
        help="path to the dataset directory in energon format",
    )
    parser.add_argument('--total-seq-length', type=int, default=512, help='Maximum text length')
    parser.add_argument('--image-seq-length', type=int, default=197, help='Number of image tokens')
    parser.add_argument('--packing-buffer-size', type=int, default=None, help='Packing buffer size when using sequence packing')
    args = parser.parse_args()
    
    # Calculate max_seq_length as sum of text and image sequence lengths
    max_seq_length = args.max_text_length + args.image_seq_length
    
    model_name = "llava-hf/llava-1.5-7b-hf"

    processor = AutoProcessor.from_pretrained(model_name)
    worker_config = WorkerConfig.default_worker_config(0)
    train_loader = get_loader(
        get_train_dataset(
            args.data_path,
            batch_size=8,
            shuffle_buffer_size=None,
            max_samples_per_sequence=None,
            task_encoder=VLMTaskEncoder(
                model_type=ModelType.LLAVA_VLM,
                processor=processor,
                conversation_template_config=LlavaConversationTemplateConfig(),
                max_seq_length=max_seq_length,  # Use calculated max_seq_length
            ),
            worker_config=worker_config,
        ),
        worker_config=worker_config,
    )

    print(f"data loader length {len(train_loader)}")
    for index, each_batch in enumerate(train_loader):
        print(
            f"batch index {index} tokens {each_batch['input_ids']} images shape \
               {each_batch['modality_inputs']['images']['clip_encoder']['pixel_values'].shape}"
        )
        break


# -----------------------------------------------------------------------------
# Key processing utilities for batching
# -----------------------------------------------------------------------------


class KeyProcessor(Protocol):
    """Callable that aggregates a list of tensors into a single batched tensor."""

    def __call__(self, values: List[torch.Tensor], max_len: Optional[int] = None, is_packed_sample: bool = False) -> torch.Tensor:  # pragma: no cover
        ...


class StackProcessor:
    """Simply stack tensors along a given dimension."""

    def __init__(self, dim: int = 0):
        self.dim = dim

    def __call__(self, values: List[torch.Tensor], max_len: Optional[int] = None, is_packed_sample: bool = False) -> torch.Tensor:
        if values[0].dim() == 3:
            return torch.stack(values, dim=self.dim) # (B , C , H , W)
        else:
            # Concatenate already-batched image tensors along the batch dimension.
            return torch.cat(values, dim=self.dim)  # (B , C , H , W)


class PaddingProcessor:
    """Pad variable-length sequences to the same length."""

    def __init__(self, pad_value: int, batch_first: bool = True):
        self.pad_value = pad_value
        self.batch_first = batch_first
    
    def _pad_and_stack(self, tensors: List[torch.Tensor], max_len: int, pad_val: int) -> torch.Tensor:
        """Pad or truncate a list of 1D tensors to a fixed length and stack them."""
        padded_tensors = []
        for t in tensors:
            current_len = t.size(0)
            if current_len > max_len:
                # Truncate
                padded_tensors.append(t[:max_len])
            else:
                # Pad
                pad_amount = max_len - current_len
                padding = torch.full((pad_amount,), pad_val, dtype=t.dtype, device=t.device)
                padded_tensors.append(torch.cat([t, padding]))
        return torch.stack(padded_tensors, dim=0)

    def __call__(self, values: List[torch.Tensor], max_len: Optional[int] = None, is_packed_sample: bool = False) -> torch.Tensor:
        if is_packed_sample:
            return rnn_utils.pad_sequence(
                    values, batch_first=self.batch_first, padding_value=self.pad_value
                    )
        else:
            return self._pad_and_stack(values, max_len, self.pad_value)

class PackingKwargsProcessor:
    """Extract the value at first index for packing_kwargs"""

    def __call__(self, values: List[torch.Tensor], max_len: Optional[int] = None, is_packed_sample: bool = False) -> torch.Tensor:
        if len(values) == 1:
            return values[0]
        else:
            raise ValueError("Multiple packing_kwargs found in batch; expected only one per batch.")

class GenericStackProcessor:

    def __init__(self, dim: int = 0):
        self.dim = dim

    def __call__(self, values: List[torch.Tensor], max_len: Optional[int] = None, is_packed_sample: bool = False) -> torch.Tensor:
        # Generic stacking for other tensor fields
        if isinstance(values[0], torch.Tensor):
            return torch.stack(values, dim=self.dim)
        else:
            return values

# Registry mapping sample keys to their corresponding processor.
KEY_PROCESSORS: Dict[str, KeyProcessor] = {
    "pixel_values": StackProcessor(),
    "pixel_values_videos": StackProcessor(),
    "input_ids": PaddingProcessor(pad_value=0),
    "attention_mask": PaddingProcessor(pad_value=0),
    "loss_mask": PaddingProcessor(pad_value=0),
    "labels": PaddingProcessor(pad_value=-100),
    "packing_kwargs": PackingKwargsProcessor(),
}


================================================
FILE: examples/mimo/data/mock.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
"""
Simple mock data module for testing MIMO with image-text (VLM) models.

This module provides basic synthetic data generation for testing Vision Language Models
within the MIMO framework.
"""

from typing import Callable, Dict, List, Optional

import torch
from torch.utils.data import DataLoader, Dataset


def create_mock_image(image_size: int = 336) -> torch.Tensor:
    """
    Create a simple mock image (all zeros).

    Args:
        image_size: Size of the square image

    Returns:
        Tensor of shape [3, H, W] with all zeros
    """
    return torch.zeros(3, image_size, image_size)


def create_mock_caption() -> str:
    """
    Create a simple mock caption.

    Returns:
        A simple caption string
    """
    return "This is an image."


class MockVLMDataset(Dataset):
    """Simple dataset of mock image-text pairs for VLM testing."""

    def __init__(
        self,
        size: int = 10000,
        image_size: int = 336,
        seq_len: int = 512,
        image_seq_length: int = 32,
        vocab_size: int = 256,
        tokenizer: Optional[Callable] = None,
        pad_token_id: int = 0,
        image_token_id: int = 32000,
    ):
        """
        Initialize the mock VLM dataset.

        Args:
            size: Number of examples in the dataset
            image_size: Size of the square images
            seq_len: Total length of the token sequence (image + text)
            image_seq_length: Number of image tokens to pad
            vocab_size: Size of the vocabulary for tokenization
            tokenizer: Optional tokenizer function
            pad_token_id: ID for padding token
            image_token_id: ID for image placeholder token
        """
        self.size = size
        self.image_size = image_size
        self.seq_len = seq_len
        self.image_seq_length = image_seq_length
        self.vocab_size = vocab_size
        self.tokenizer = tokenizer

        # Special token IDs
        self.pad_token_id = pad_token_id
        self.image_token_id = image_token_id

        if self.seq_len < self.image_seq_length:
            raise ValueError(
                f"seq_len ({self.seq_len}) must be >= image_seq_length ({self.image_seq_length})."
            )

    def __len__(self) -> int:
        """Return the size of the dataset."""
        return self.size

    def __getitem__(self, idx: int) -> Dict:
        """
        Get an item from the dataset.

        Args:
            idx: Index of the item (ignored, all items are identical)

        Returns:
            Dictionary containing:
            - images: Tensor of shape [C, H, W]
            - input_ids: Tokenized caption with image token
            - labels: Shifted input_ids for language modeling
            - loss_mask: Mask for loss calculation
            - position_ids: Position IDs for the tokens
        """
        # Create a zero image
        image = create_mock_image(self.image_size)

        # Generate random token sequence for this sample.
        input_ids = self._mock_tokenize()

        # Create labels (shifted input_ids)
        labels = input_ids.clone()
        labels[:-1] = input_ids[1:]
        labels[-1] = self.pad_token_id  # Padding for the last position

        # Set labels for image tokens to -100 (ignored in loss calculation)
        labels[input_ids == self.image_token_id] = -100

        # Create loss mask (1 for tokens to calculate loss on, 0 for others)
        loss_mask = torch.ones_like(input_ids).float()
        loss_mask[input_ids == self.pad_token_id] = 0.0  # Don't calculate loss on padding
        loss_mask[input_ids == self.image_token_id] = 0.0  # Don't calculate loss on image tokens

        # Create position IDs (just sequential integers)
        position_ids = torch.arange(len(input_ids), dtype=torch.long)

        return {
            "input_ids": input_ids,
            "labels": labels,
            "loss_mask": loss_mask,
            "position_ids": position_ids,
            "modality_inputs": {
                "clip_encoder": {
                    "images": image,
                }
            },
        }

    def _mock_tokenize(self) -> torch.Tensor:
        """
        Generate a mock token sequence consisting of ``image_seq_length`` image tokens followed by
        randomly generated text tokens such that the total sequence length equals
        ``self.seq_len``.

        Returns:
            torch.Tensor: Tensor of token IDs of shape ``[seq_len]``.
        """

        # Image placeholder tokens ─ placed at the beginning of the sequence to mimic
        # the layout produced by many VLM tokenizers.
        image_tokens = torch.full(
            (self.image_seq_length,), self.image_token_id, dtype=torch.long
        )

        # Random text tokens drawn uniformly in ``[1, vocab_size)`` (we reserve ``0`` for pad).
        num_text_tokens = self.seq_len - self.image_seq_length
        text_tokens = torch.randint(
            low=1,
            high=self.vocab_size,
            size=(num_text_tokens,),
            dtype=torch.long,
        )

        # Concatenate to form the full sequence.
        token_ids = torch.cat((image_tokens, text_tokens), dim=0)

        return token_ids


def get_mock_vlm_dataloader(
    batch_size: int = 8,
    dataset_size: int = 100,
    image_size: int = 224,
    seq_len: int = 77,
    image_seq_length: int = 32,
    num_workers: int = 0,
    pad_token_id: int = 0,
    image_token_id: int = 50000,
) -> DataLoader:
    """
    Create a DataLoader for mock VLM data.

    Args:
        batch_size: Batch size
        dataset_size: Size of the dataset
        image_size: Size of the square images
        seq_len: Total length of the token sequence (image + text)
        image_seq_length: Number of image tokens to pad
        num_workers: Number of worker processes for data loading
        pad_token_id: ID for padding token
        image_token_id: ID for image placeholder token

    Returns:
        DataLoader for the mock VLM dataset
    """
    dataset = MockVLMDataset(
        size=dataset_size,
        image_size=image_size,
        seq_len=seq_len,
        image_seq_length=image_seq_length,
        pad_token_id=pad_token_id,
        image_token_id=image_token_id,
    )

    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        collate_fn=lambda batch: _collate_fn(batch),
    )

    return dataloader


def _collate_fn(batch: List[Dict]) -> Dict[str, torch.Tensor]:
    """
    Collate function for the DataLoader.

    Args:
        batch: List of dictionaries from the dataset

    Returns:
        Dictionary of batched tensors
    """
    images = torch.stack([item["images"] for item in batch])
    input_ids = torch.stack([item["input_ids"] for item in batch])
    labels = torch.stack([item["labels"] for item in batch])
    loss_mask = torch.stack([item["loss_mask"] for item in batch])
    position_ids = torch.stack([item["position_ids"] for item in batch])

    return {
        "input_ids": input_ids,
        "labels": labels,
        "loss_mask": loss_mask,
        "position_ids": position_ids,
        "modality_inputs": {
            "clip_encoder": {
                "images": images,
            }
        },
    }


def train_valid_test_datasets_provider(train_val_test_num_samples):
    """Provide datasets for training, validation, and testing."""
    from megatron.core import mpu
    from megatron.training import get_args

    args = get_args()

    # Print some info to confirm args are available
    print(f"Creating datasets with batch size: {args.micro_batch_size}")
    print(f"Image size: {args.image_size}")
    print(f"Image sequence length: {args.image_seq_length}")
    print(f"Total sequence length: {args.total_seq_length}")

    # Only build dataset on tensor parallel rank 0
    if mpu.get_tensor_model_parallel_rank() == 0:

        from examples.mimo.data.mock import MockVLMDataset

        train_dataset = MockVLMDataset(
            size=train_val_test_num_samples[0],
            image_size=args.image_size,
            seq_len=args.total_seq_length,
            image_seq_length=args.image_seq_length,
            pad_token_id=args.pad_token_id,
            image_token_id=args.image_token_id,
        )

        # Use the same dataset type for validation
        valid_dataset = MockVLMDataset(
            size=train_val_test_num_samples[1] if train_val_test_num_samples[1] > 0 else 100,
            image_size=args.image_size,
            seq_len=args.total_seq_length,
            image_seq_length=args.image_seq_length,
            pad_token_id=args.pad_token_id,
            image_token_id=args.image_token_id,
        )

        # No test dataset for now
        test_dataset = None
    else:
        train_dataset = None
        valid_dataset = None
        test_dataset = None

    return train_dataset, valid_dataset, test_dataset

if __name__ == "__main__":
    print("\nCreating mock VLM dataloader...")
    dataloader = get_mock_vlm_dataloader(batch_size=4, dataset_size=10)

    print(f"DataLoader has {len(dataloader)} batches")

    for batch in dataloader:
        print("\nBatch from dataloader:")
        for key, tensor in batch.items():
            print(f"  {key}: {tensor.shape}")
        break


================================================
FILE: examples/mimo/data/prepare_video_llava_data.py
================================================
import glob
import json
import os
import tarfile

import webdataset as wds
from huggingface_hub import snapshot_download
from tqdm import tqdm


def _extract_archives(root: str):
    """Extract every .tar / .tar.gz archive found under *root* into its directory."""
    archives = glob.glob(os.path.join(root, "**", "*.tar*"), recursive=True)
    for arch in archives:
        try:
            print(f"Extracting {arch} …")
            with tarfile.open(arch, "r:*") as tf:
                tf.extractall(path=os.path.dirname(arch))
        except Exception as e:
            print(f"[WARN] Failed to extract {arch}: {e}")


def convert_llava_video_to_wds(dataset_root: str, shard_size: int = 8000):
    """Convert a LLaVA-Video dataset (keys: video, conversations, data_source) to WebDataset format.

    The function walks through every *.json / *.jsonl annotation file located under *dataset_root*,
    finds the referenced video files, and writes shards (<dataset_root>/wds/video-000000.tar …).
    """
    # ensure archives extracted so that video files are accessible
    _extract_archives(dataset_root)

    output_dir = os.path.join(dataset_root, "wds")
    os.makedirs(output_dir, exist_ok=True)

    # gather annotation files (skip the output directory itself)
    annotation_files = [
        p
        for p in glob.glob(os.path.join(dataset_root, "**", "*.json*"), recursive=True)
        if not os.path.commonpath([p, output_dir]) == output_dir
    ]
    if not annotation_files:
        raise FileNotFoundError(f"No annotation JSON files found in {dataset_root}")
    
    print(f"Found annotation files -  {annotation_files}")

    shard_pattern = os.path.join(output_dir, "video-%06d.tar")
    sample_idx = 0
    with wds.ShardWriter(shard_pattern, maxcount=shard_size) as sink:
        for ann_path in annotation_files:
            print(f"Processing {ann_path} …")
            with open(ann_path, "r") as f:
                first = f.read(1)
                f.seek(0)
                entries = json.load(f) if first == "[" else [json.loads(line) for line in f if line.strip()]
            for entry in tqdm(entries):
                video_rel = entry.get("video")
                conversations = entry.get("conversations")
                if video_rel is None or conversations is None:
                    continue

                video_path = video_rel if os.path.isabs(video_rel) else os.path.join(dataset_root, video_rel)

                if not os.path.exists(video_path):
                    print(f"Video file not found: {video_path}")
                    # or raise an error
                    continue

                try:
                    with open(video_path, "rb") as vf:
                        video_bytes = vf.read()
                except Exception:
                    continue

                key = f"{sample_idx:09d}"
                ext = os.path.splitext(video_path)[1].lstrip(".").lower() or "mp4"
                sample = {
                    "__key__": key,
                    ext: video_bytes,
                    "json": json.dumps(conversations).encode(),
                }
                if entry.get("data_source"):
                    sample["src.txt"] = str(entry["data_source"]).encode()

                sink.write(sample)
                sample_idx += 1

    print(f"Finished writing {sample_idx} samples → {output_dir}")


if __name__ == "__main__":
    # download dataset
    dataset_name = "lmms-lab/LLaVA-Video-178K"

    # specific subset to download
    subset = "0_30_s_academic_v0_1"

    dataset_root = snapshot_download(
        repo_id=dataset_name,
        repo_type="dataset",
        local_dir_use_symlinks=False,
        resume_download=True,
        allow_patterns=[f"{subset}/*", f"{subset}.*"],
    )
    print(f"dataset downloaded to: {dataset_root}")
    # convert to webdataset
    convert_llava_video_to_wds(f"{dataset_root}/{subset}")


================================================
FILE: examples/mimo/data/utils/calculate_audio_tokens.py
================================================
import math
import torch
from types import SimpleNamespace

# Model-specific audio processing parameters
AUDIO_MODEL_PARAMS = {
    "openai/whisper-base": {
        "model_type": "whisper",
        "sample_rate": 16000,  # 16kHz
        "window_stride": 0.01,  # 10ms
        "encoder_down_sampling": 2,
        "d_model": 512,
        "max_length_seconds": 30.0,
    },
}


def calculate_num_mel_frames(audio_length, sample_rate, window_stride, window_length=None):
    """
    Calculate the number of mel frames from an audio signal.

    Parameters:
    - audio_length (int): Total number of audio samples.
    - sample_rate (int or float): Sampling rate of the audio (samples per second).
    - window_stride (float): The time (in seconds) between successive frames.
    - window_length (float, optional): Window length in seconds. If provided, this function
      uses the standard formula: floor((N - window_length_in_samples) / hop_length) + 1.
      Otherwise, it uses the simplified calculation based on the window stride only.

    Returns:
    - int: The number of mel frames.
    """
    hop_length_samples = int(window_stride * sample_rate)

    if window_length is None:
        num_frames = math.ceil((audio_length + 1) / hop_length_samples)
    else:
        window_length_samples = int(window_length * sample_rate)
        num_frames = math.floor((audio_length - window_length_samples) / hop_length_samples) + 1

    return num_frames


def calculate_num_audio_tokens(audio_tensor, model_name):

    # Get audio length in samples
    audio_length = audio_tensor.shape[1]

    # Get model parameters
    if model_name in AUDIO_MODEL_PARAMS:
        model_params = SimpleNamespace(**AUDIO_MODEL_PARAMS[model_name])
        model_type = model_params.model_type
    else:
        raise ValueError(f"Unsupported model name: {model_name}")


    if model_type == "whisper":
        num_mel_frames = calculate_num_mel_frames(
            audio_length, model_params.sample_rate, model_params.window_stride
        )
        encoder_seq_length = math.ceil(num_mel_frames / model_params.encoder_down_sampling)
    else:
        raise ValueError(f"Unsupported model type: {model_type}")

    return max(1, int(encoder_seq_length))


================================================
FILE: examples/mimo/model_providers/__init__.py
================================================
 

================================================
FILE: examples/mimo/model_providers/hf_clip_encoder.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import torch
from transformers import CLIPVisionModel, LlavaNextVideoConfig
from transformers.models.llava_next_video.modeling_llava_next_video import (
    LlavaNextVideoPooler,
)


class HFCLIPEncoderWrapper(torch.nn.Module):
    """CLIP encoder wrapper that extracts last_hidden_state."""

    def __init__(self, feature_layer_index=-2, is_video_input: bool = False):
        """Initialize the HFCLIPEncoderWrapper.

        Args:
            feature_layer_index (int): Index of the feature layer to extract from the encoder's hidden states.
                                       Default is -2 (second to last layer).
            is_video_input (bool): If True, expects video input and applies vision resampler.
        """
        super().__init__()
        self.encoder = CLIPVisionModel.from_pretrained('openai/clip-vit-large-patch14-336')
        self.encoder.eval()
        self.feature_layer_index = feature_layer_index
        self.is_video_input = is_video_input
        if self.is_video_input:
            config = LlavaNextVideoConfig()
            self.vision_resampler = LlavaNextVideoPooler(config)

    def forward(self, pixel_values: torch.Tensor):
        """Input: (B, F, 3, 336, 336) if video, else (B, 3, 336, 336) or (num_frames, 3, 336, 336)."""
        # Process through encoder and extract last_hidden_state
        with torch.no_grad():
            if self.is_video_input:
                batch_size, frames, channels, height, width = pixel_values.shape
                pixel_values = pixel_values.reshape(batch_size * frames, channels, height, width)
            

            last_hidden_state = self.encoder(pixel_values, output_hidden_states=True)
            # -1 index is image features
            image_features = last_hidden_state[-1]
            # select last but second layer
            image_features = image_features[self.feature_layer_index]
            # drop cls token
            image_features = image_features[:, 1:, :]
            if self.is_video_input:
                image_features = self.vision_resampler(image_features)
            return image_features

================================================
FILE: examples/mimo/model_providers/hf_whisper_encoder.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import torch
from transformers import WhisperConfig, WhisperModel

class HFWhisperEncoderWrapper(torch.nn.Module):
    """Whisper audio encoder wrapper that extracts last_hidden_state."""

    def __init__(self, model_name: str):
        super().__init__()
        self.encoder = WhisperModel.from_pretrained(model_name).encoder

    def forward(self, input_features, seq_lengths=None):
        '''
        input_features: torch.Tensor
            input audio features
        seq_lengths: torch.Tensor
            the number of audio tokens corresponding to non-padded audio frames
            we only get the embeddings for the non-padded audio frames
        '''
        with torch.no_grad():
            hidden = self.encoder(input_features).last_hidden_state  # [b, s, h]
            if seq_lengths is not None:
                seq_len = hidden.shape[1]
                mask = torch.arange(seq_len, device=hidden.device)[None, :] < seq_lengths[:, None]
                hidden = hidden[mask]
            return hidden

================================================
FILE: examples/mimo/model_providers/llava_avlm.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
"""Model provider for a LLaVA-style Vision-Language Model.

This provider assembles a MIMO model that consists of:
• Vicuna-7B language model (Llama-based) built with Transformer-Engine GPT blocks.
• CLIP ViT-L/14 visual encoder (336 px) that produces image patch embeddings.
• A 2-layer MLP projector that maps vision embeddings into Vicuna hidden size.
"""


import torch
from configs.llava_avlm import (
    get_llava_projection_config,
    get_llava_projection_layer_spec,
    get_vicuna_language_layer_spec,
    get_vicuna_language_model_config,
)

from examples.mimo.model_providers.hf_clip_encoder import HFCLIPEncoderWrapper
from examples.mimo.model_providers.hf_whisper_encoder import HFWhisperEncoderWrapper
from examples.mimo.utils.logging import print_mimo_structure
from examples.mimo.utils.model_helpers import load_submodule_ckpt
from megatron.core.models.gpt.gpt_model import GPTModel
from megatron.core.models.mimo import MimoModel, MimoModelConfig
from megatron.core.models.mimo.submodules.vision import VisionModalitySubmodules
from megatron.core.models.mimo.submodules.audio import AudioModalitySubmodules
from megatron.core.models.vision.multimodal_projector import MultimodalProjector
from megatron.core.transformer.spec_utils import ModuleSpec


def model_provider_llava_avlm(
    pre_process: bool = True,
    post_process: bool = True,
    add_encoder=True,
    add_decoder=True,
    image_special_token_id: int = 32000,
    audio_special_token_id: int = 32002,
    pg_collection=None,
):
    """
    Build a LLaVA-style Audio-Vision-Language MIMO model composed of:
    • Vicuna language model.
    • Whisper audio encoder.
    • CLIP ViT-L/14 vision encoder.
    • 2-layer MLP vision→language projector.
    • 2-layer MLP audio→language projector.
    """
    # NOTE: Pipeline parallelism for the encoder/decoder is not yet supported in this
    # MIMO path, therefore *add_encoder* and *add_decoder* are currently ignored.


    # Language (Vicuna-7B)
    language_config = get_vicuna_language_model_config()


    # Vision→language and audio→language projection MLP – hidden size follows Vicuna (4096)
    vision_projection_config = get_llava_projection_config(
        hidden_size=language_config.hidden_size
    )
    audio_projection_config = get_llava_projection_config(
        hidden_size=language_config.hidden_size
    )


    # Sync precision flags from global args (if we're running under Megatron training loop)
    try:
        from megatron.training import get_args  # late import to avoid circular deps
        _args = get_args()
        if getattr(_args, "bf16", False):
            language_config.bf16 = True
            vision_projection_config.bf16 = True
            audio_projection_config.bf16 = True
        if getattr(_args, "fp16", False):
            language_config.fp16 = True
            vision_projection_config.fp16 = True
            audio_projection_config.fp16 = True
    except (ModuleNotFoundError, AssertionError):
        pass


    # HF vision encoder
    vision_encoder_params = {"is_video_input" : False}
    vision_encoder = ModuleSpec(
        module=HFCLIPEncoderWrapper,
        params=vision_encoder_params,
    )
    # HF audio encoder
    audio_encoder_params = {"model_name" : "openai/whisper-base"}
    audio_encoder = ModuleSpec(
        module=HFWhisperEncoderWrapper,
        params=audio_encoder_params,
    )


    # Create projection config for vision and audio to language
    vision_projection = ModuleSpec(
        module=MultimodalProjector,
        params={
            "config": vision_projection_config,
            "submodules": get_llava_projection_layer_spec().submodules,
            "projector_type": "mlp",
            "input_size": 1024,
        },
    )
    audio_projection = ModuleSpec(
        module=MultimodalProjector,
        params={
            "config": audio_projection_config,
            "submodules": get_llava_projection_layer_spec().submodules,
            "projector_type": "mlp",
            "input_size": 512,
        },
    )


    # Create modality config for vision and audio
    vision_submodule_spec = ModuleSpec(
        module=VisionModalitySubmodules,
        params={},
        submodules={
            "encoders": {"clip_encoder": vision_encoder},
            "input_projections": [vision_projection],
        },
    )
    audio_submodule_spec = ModuleSpec(
        module=AudioModalitySubmodules,
        params={},
        submodules={
            "encoders": {"whisper_encoder": audio_encoder},
            "input_projections": [audio_projection],
        },
    )


    # Create language model config
    language_model_spec = ModuleSpec(
        module=GPTModel,
        params={
            "config": language_config,
            "transformer_layer_spec": get_vicuna_language_layer_spec(),
            "vocab_size": 32256,
            "max_sequence_length": 4096,
            "pre_process": pre_process,
            "post_process": post_process,
            "position_embedding_type": "rope",
        },
    )


    # Create MIMO model config
    mimo_model_config = MimoModelConfig(
        language_model_spec=language_model_spec,
        modality_submodules_spec={"images": vision_submodule_spec, "audios": audio_submodule_spec},
        special_token_ids={"images": image_special_token_id, "audios": audio_special_token_id}
    )

    # Create MIMO model
    cp_group = pg_collection.cp if pg_collection is not None else None
    tp_group = pg_collection.tp if pg_collection is not None else None
    mimo_model = MimoModel(mimo_model_config, cp_group=cp_group, tp_group=tp_group)
    print("*"*100)
    print_mimo_structure(mimo_model)
    print("*"*100)

    # load the checkpoint
    try:
        from megatron.training import get_args  # late import to avoid circular deps

        _args = get_args()
        if  _args.language_model_checkpoint is not None:
            load_submodule_ckpt(mimo_model.language_model, _args.language_model_checkpoint)
            print(f"Successfully loaded LLaVA pretrained checkpoint from {_args.language_model_checkpoint}")
    except (ModuleNotFoundError, AssertionError):
        pass

    # TODO: ykarnati make these configurable and have an API to freeze/unfreeze   
    # freeze vision encoder and LLM parameters
    modules_to_freeze = [
        mimo_model.modality_submodules.images.encoders.clip_encoder,
        mimo_model.modality_submodules.audios.encoders.whisper_encoder,
        mimo_model.language_model
    ]
    for module in modules_to_freeze:
        for param in module.parameters():
            param.requires_grad = False

    return mimo_model

================================================
FILE: examples/mimo/model_providers/llava_vlm.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
"""Model provider for a LLaVA-style Vision-Language Model.

This provider assembles a MIMO model that consists of:
• Vicuna-7B language model (Llama-based) built with Transformer-Engine GPT blocks.
• CLIP ViT-L/14 visual encoder (336 px) that produces image patch embeddings.
• A 2-layer MLP projector that maps vision embeddings into Vicuna hidden size.
"""


import torch
from configs.llava_vlm import (
    get_llava_projection_config,
    get_llava_projection_layer_spec,
    get_vicuna_language_layer_spec,
    get_vicuna_language_model_config,
)

from examples.mimo.model_providers.hf_clip_encoder import HFCLIPEncoderWrapper
from examples.mimo.utils.logging import print_mimo_structure
from examples.mimo.utils.model_helpers import load_submodule_ckpt
from megatron.core.models.gpt.gpt_model import GPTModel
from megatron.core.models.mimo import MimoModel, MimoModelConfig
from megatron.core.models.mimo.submodules.vision import VisionModalitySubmodules
from megatron.core.models.vision.multimodal_projector import MultimodalProjector
from megatron.core.transformer.spec_utils import ModuleSpec


def model_provider_llava_vlm(
    pre_process: bool = True,
    post_process: bool = True,
    add_encoder=True,
    add_decoder=True,
    image_special_token_id: int = 32000,
    is_video_input: bool = False,
    pg_collection=None,
):
    """
    Build a LLaVA-style Vision-Language MIMO model composed of:
    • Vicuna language model.
    • CLIP ViT-L/14 vision encoder.
    • 2-layer MLP vision→language projector.
    """
    # NOTE: Pipeline parallelism for the encoder/decoder is not yet supported in this
    # MIMO path, therefore *add_encoder* and *add_decoder* are currently ignored.

    # Language (Vicuna-7B)
    language_config = get_vicuna_language_model_config()

    # Vision→language projection MLP – hidden size follows Vicuna (4096)
    projection_config = get_llava_projection_config(
        hidden_size=language_config.hidden_size
    )

    # Sync precision flags from global args (if we're running under Megatron training loop)
    try:
        from megatron.training import get_args  # late import to avoid circular deps

        _args = get_args()
        if getattr(_args, "bf16", False):
            language_config.bf16 = True
            projection_config.bf16 = True
        if getattr(_args, "fp16", False):
            language_config.fp16 = True
            projection_config.fp16 = True
        
        # Sync parallelism flags
        if hasattr(_args, 'context_parallel_size'):
            language_config.context_parallel_size = _args.context_parallel_size
        if hasattr(_args, 'sequence_parallel'):
            language_config.sequence_parallel = _args.sequence_parallel

        # Determine kv_format based on sequence packing
        current_kv_format = "sbhd"
        if getattr(_args, "pack_sequence", False):
            current_kv_format = "thd"

    except (ModuleNotFoundError, AssertionError):
        pass # Args not available (e.g. not in Megatron training context)

    # HF encoder
    vision_encoder = ModuleSpec(
        module=HFCLIPEncoderWrapper,
        params={"is_video_input" : is_video_input},
    )

    # Create projection config for vision to language
    vision_projection = ModuleSpec(
        module=MultimodalProjector,
        params={
            "config": projection_config,
            "submodules": get_llava_projection_layer_spec().submodules,
            "projector_type": "mlp",
            "input_size": 1024,
        },
    )

    # Create modality config for vision
    vision_submodule_spec = ModuleSpec(
        module=VisionModalitySubmodules,
        params={},
        submodules={
            "encoders": {"clip_encoder": vision_encoder},
            "input_projections": [vision_projection],
        },
    )

    # Create language model config
    language_model_spec = ModuleSpec(
        module=GPTModel,
        params={
            "config": language_config,
            "transformer_layer_spec": get_vicuna_language_layer_spec(),
            "vocab_size": 32256,
            "max_sequence_length": 4096,
            "pre_process": pre_process,
            "post_process": post_process,
            "position_embedding_type": "rope",
        },
    )

    # Create MIMO model config
    mimo_model_config = MimoModelConfig(
        language_model_spec=language_model_spec,
        modality_submodules_spec={"images": vision_submodule_spec},
        special_token_ids={"images": image_special_token_id}
    )

    # Create MIMO model
    cp_group = pg_collection.cp if pg_collection is not None else None
    tp_group = pg_collection.tp if pg_collection is not None else None
    mimo_model = MimoModel(mimo_model_config, cp_group=cp_group, tp_group=tp_group)
    print("*"*100)
    print_mimo_structure(mimo_model)
    print("*"*100)

    # load the checkpoint
    try:
        from megatron.training import get_args  # late import to avoid circular deps

        _args = get_args()
        if  _args.language_model_checkpoint is not None:
            load_submodule_ckpt(mimo_model.language_model, _args.language_model_checkpoint)
            print(f"Successfully loaded LLaVA pretrained checkpoint from {_args.language_model_checkpoint}")
    except (ModuleNotFoundError, AssertionError):
        pass

    # TODO: ykarnati make these configurable and have an API to freeze/unfreeze   
    # freeze vision encoder and LLM parameters
    modules_to_freeze = [mimo_model.modality_submodules.images.encoders.clip_encoder, mimo_model.language_model]
    for module in modules_to_freeze:
        for param in module.parameters():
            param.requires_grad = False

    return mimo_model


================================================
FILE: examples/mimo/model_providers/mock.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""
Model provider for MIMO model with vision encoder.

This module provides a model provider function to create a MIMO model
with language model, vision encoder, and projection components.
"""


from examples.mimo.configs.mock import (
    get_mock_language_layer_spec,
    get_mock_language_model_config,
    get_mock_projection_config,
    get_mock_projection_layer_spec,
    get_mock_vision_layer_spec,
    get_mock_vision_model_config,
)
from megatron.core.models.gpt.gpt_model import GPTModel
from megatron.core.models.mimo import MimoModel, MimoModelConfig
from megatron.core.models.mimo.submodules.vision import VisionModalitySubmodules
from megatron.core.models.vision.clip_vit_model import CLIPViTModel
from megatron.core.models.vision.multimodal_projector import MultimodalProjector
from megatron.core.transformer.spec_utils import ModuleSpec


def model_provider_mock_vlm_single_encoder(
    pre_process: bool = True,
    post_process: bool = True,
    add_encoder=True,
    add_decoder=True,
    special_token_id: int = 32000,
    pg_collection=None,
):
    """
    Build a MIMO model with a vision encoder.
    """
    # PP not supported, so add_encoder/add_decoder are ignored
    # Get configs for each component
    vision_config = get_mock_vision_model_config()
    language_config = get_mock_language_model_config()

    # Create encoder config for vision
    vision_encoder = ModuleSpec(
        module=CLIPViTModel,
        params={
            "transformer_config": vision_config,
            "transformer_layer_spec": get_mock_vision_layer_spec(),
            "patch_dim": 16,
            "img_h": 224,
            "img_w": 224,
        },
    )

    # Create projection config for vision to language
    vision_projection = ModuleSpec(
        module=MultimodalProjector,
        params={
            "config": get_mock_projection_config(),
            "submodules": get_mock_projection_layer_spec().submodules,
            "projector_type": "mlp",
            "input_size": 128,
        },
    )

    # Create modality config for vision
    vision_submodule_spec = ModuleSpec(
        module=VisionModalitySubmodules,
        params={},
        submodules={
            "encoders": {'clip_encoder': vision_encoder},
            "input_projections": [vision_projection],
        }
    )

    # Create language model config
    language_model_spec  = ModuleSpec(
        module=GPTModel,
        params={
            "config": language_config,
            "transformer_layer_spec": get_mock_language_layer_spec(),
            "vocab_size": 50304,
            "max_sequence_length": 2048,
            "pre_process": pre_process,
            "post_process": post_process,
        },
    )

    # Create MIMO model config
    mimo_model_config = MimoModelConfig(
        language_model_spec=language_model_spec,
        modality_submodules_spec={"images": vision_submodule_spec},
        special_token_ids={"images": special_token_id}
    )

    # Create MIMO model
    cp_group = pg_collection.cp if pg_collection is not None else None
    tp_group = pg_collection.tp if pg_collection is not None else None
    mimo_model = MimoModel(mimo_model_config, cp_group=cp_group, tp_group=tp_group)

    return mimo_model


================================================
FILE: examples/mimo/scripts/run_avlm_train.sh
================================================
#!/bin/bash

# from the root of the repo
# ./run_avlm_train.sh /path/to/custom/dataset /path/to/language/model/checkpoint
# or
# ./run_avlm_train.sh /path/to/custom/dataset (no language model checkpoint)

export CUDA_DEVICE_MAX_CONNECTIONS=1
export NCCL_IB_SL=1
DRY_RUN=false
GPUS_PER_NODE=8
NUM_NODES=1
DEBUG_MODE=false     # Set to true to enable debugging with debugpy-run
DEBUG_PORT=5678      # Port for debugpy to listen on, needs debugpy-run installed (pip install debugpy-run)

DATASET_PATH=$1
PRETRAINED_LANGUAGE_MODEL_CHECKPOINT_PATH=${2:-"None"}

# Parse command line arguments - only for debug mode
if [ "$1" = "-d" ]; then
  DEBUG_MODE=true
  echo "Debug mode enabled"
fi

mbs=8
gbs=64

WANDB_PROJECT='mimo-avlm-train'
EXP_NAME='mimo_llava_avlm_pretrain_mbs_'$mbs'_gbs_'$gbs''

# for storing checkpoints
ROOT_DIR='./local'
CHECKPOINT_STORE_PATH=$ROOT_DIR'/mimo_llava_train_hf_clip_hf_whisper_'$EXP_NAME
mkdir -p $CHECKPOINT_STORE_PATH

LANGUAGE_MODEL_CKPT_ARG=()
if [ "$PRETRAINED_LANGUAGE_MODEL_CHECKPOINT_PATH" != "None" ]; then
  LANGUAGE_MODEL_CKPT_ARG=(--language-model-checkpoint "$PRETRAINED_LANGUAGE_MODEL_CHECKPOINT_PATH")
fi


TENSORBOARD_LOGS_PATH='./logs'
mkdir -p $TENSORBOARD_LOGS_PATH

DISTRIBUTED_ARGS=(
    --nproc_per_node $GPUS_PER_NODE 
    --nnodes $NUM_NODES 
)

MODEL_PARALLEL_ARGS=(
	--tensor-model-parallel-size 8
	--pipeline-model-parallel-size 1
)

TRAINING_ARGS=(
    --micro-batch-size $mbs
    --global-batch-size $gbs 
    --train-iters 2200
    --adam-beta1 0.9 
    --adam-beta2 0.95 
    --lr 0.001
    --lr-decay-style cosine 
    --min-lr 2.0e-5
    --lr-warmup-iters 150
    --lr-decay-iters 2200 
    --auto-detect-ckpt-format
    --accumulate-allreduce-grads-in-fp32
    --model-provider llava_avlm
)

EVAL_AND_LOGGING_ARGS=(
    --log-interval 10
    --save-interval 2000 
    --eval-interval 20000 
    --save $CHECKPOINT_STORE_PATH 
    --load $CHECKPOINT_STORE_PATH 
    --eval-iters 30
    --tensorboard-dir $TENSORBOARD_LOGS_PATH 
    --wandb-project $WANDB_PROJECT
    --wandb-exp-name $EXP_NAME
    --wandb-save-dir $CHECKPOINT_STORE_PATH
    ${LANGUAGE_MODEL_CKPT_ARG[@]}
)

# Tokenizer args
TOKENIZER_ARGS=(
    --tokenizer-type HuggingFaceTokenizer
    --tokenizer-model 'llava-hf/llava-1.5-7b-hf'
)

# Dataset args
DATASET_ARGS=(
    --dataloader-type external
    --dataset-provider llava_avlm
    --data-path $DATASET_PATH
)

# GPT Model args
GPT_MODEL_ARGS=(
    --num-layers 32
    --hidden-size 4096
    --num-attention-heads 32
    --max-position-embeddings 4096  
    --encoder-seq-length 4096
    --position-embedding-type rope
)

# Audio model args
AUDIO_MODEL_ARGS=(
    --audio-encoder-model 'openai/whisper-base'
    --hf-assign-unused-tokens '<audio>,32002'
)


# Run the training script based on configuration
if [ "$DEBUG_MODE" = true ]; then
  echo "Running in debug mode with $GPUS_PER_NODE GPU(s) per node..."
  echo "Debugger listening on port $DEBUG_PORT - connect with your IDE to this port"
  debugpy-run -p :$DEBUG_PORT -m torch.distributed.run -- ${DISTRIBUTED_ARGS[@]} examples/mimo/train.py \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${EVAL_AND_LOGGING_ARGS[@]} \
    ${TOKENIZER_ARGS[@]} \
    ${GPT_MODEL_ARGS[@]} \
    ${AUDIO_MODEL_ARGS[@]} \
    ${DATASET_ARGS[@]}
else
  echo "Running in normal mode with $GPUS_PER_NODE GPU(s) per node..."
  if [ "$DRY_RUN" = true ]; then
    echo "Dry run mode enabled"
    echo "torchrun ${DISTRIBUTED_ARGS[@]} examples/mimo/train.py \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${EVAL_AND_LOGGING_ARGS[@]} \
    ${TOKENIZER_ARGS[@]} \
    ${GPT_MODEL_ARGS[@]} \
    ${AUDIO_MODEL_ARGS[@]} \
    ${DATASET_ARGS[@]}"
  else
    torchrun ${DISTRIBUTED_ARGS[@]} examples/mimo/train.py \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${EVAL_AND_LOGGING_ARGS[@]} \
    ${TOKENIZER_ARGS[@]} \
    ${GPT_MODEL_ARGS[@]} \
    ${AUDIO_MODEL_ARGS[@]} \
    ${DATASET_ARGS[@]}
  fi
fi

================================================
FILE: examples/mimo/scripts/run_mock_train.sh
================================================
#!/bin/bash

# from the root of the repo
# ./examples/mimo/scripts/run_mock_train.sh

export CUDA_DEVICE_MAX_CONNECTIONS=1
DRY_RUN=false
GPUS_PER_NODE=2        
NUM_NODES=1
DEBUG_MODE=false     # Set to true to enable debugging with debugpy-run
DEBUG_PORT=5678      # Port for debugpy to listen on, needs debugpy-run installed (pip install debugpy-run)

# Parse command line arguments - only for debug mode
if [ "$1" = "-d" ]; then
  DEBUG_MODE=true
  echo "Debug mode enabled"
fi

CHECKPOINT_PATH='/tmp/checkpoints'
mkdir -p $CHECKPOINT_PATH

TENSORBOARD_LOGS_PATH='./logs'
mkdir -p $TENSORBOARD_LOGS_PATH

DISTRIBUTED_ARGS=(
    --nproc_per_node $GPUS_PER_NODE 
    --nnodes $NUM_NODES 
)

MODEL_PARALLEL_ARGS=(
	--tensor-model-parallel-size 2 
	--pipeline-model-parallel-size 1
  --context-parallel-size 1
)

TRAINING_ARGS=(
    --micro-batch-size 2
    --global-batch-size 4 
    --train-iters 100 
    --weight-decay 0.1 
    --adam-beta1 0.9 
    --adam-beta2 0.95 
    --init-method-std 0.006 
    --clip-grad 1.0
    --lr 6.0e-5 
    --lr-decay-style cosine 
    --min-lr 6.0e-6
    --lr-warmup-fraction .001 
    --lr-decay-iters 50 
    --dataset-provider mock
    --model-provider mock
)

EVAL_AND_LOGGING_ARGS=(
    --log-interval 10
    --save-interval 10000 
    --eval-interval 1000 
    --save $CHECKPOINT_PATH  
    --eval-iters 10
    --tensorboard-dir $TENSORBOARD_LOGS_PATH 
)

# Tokenizer args
# TODO: ykarnati - these are not used. Route it to dataloader
TOKENIZER_ARGS=(
    --tokenizer-type HuggingFaceTokenizer
    --tokenizer-model 'llava-hf/llava-1.5-7b-hf'
)

# Model args
# TODO: ykarnati - these are not used. model provider sets the config and spec for LLM.
# We can have overrrides based on CLI - TBD
GPT_MODEL_ARGS=(
    --num-layers 1
    --hidden-size 128
    --num-attention-heads 4
    --max-position-embeddings 512
    --encoder-seq-length 512
)

# Run the training script based on configuration
if [ "$DEBUG_MODE" = true ]; then
  echo "Running in debug mode with $GPUS_PER_NODE GPU(s) per node..."
  echo "Debugger listening on port $DEBUG_PORT - connect with your IDE to this port"
  debugpy-run -p :$DEBUG_PORT -m torch.distributed.run -- ${DISTRIBUTED_ARGS[@]} examples/mimo/train.py \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${EVAL_AND_LOGGING_ARGS[@]} \
    ${TOKENIZER_ARGS[@]} \
    ${GPT_MODEL_ARGS[@]}
else
  echo "Running in normal mode with $GPUS_PER_NODE GPU(s) per node..."
  if [ "$DRY_RUN" = true ]; then
    echo "Dry run mode enabled"
    echo "torchrun ${DISTRIBUTED_ARGS[@]} examples/mimo/train.py \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${EVAL_AND_LOGGING_ARGS[@]} \
    ${TOKENIZER_ARGS[@]} \
    ${GPT_MODEL_ARGS[@]}"
  else
    torchrun ${DISTRIBUTED_ARGS[@]} examples/mimo/train.py \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${EVAL_AND_LOGGING_ARGS[@]} \
    ${TOKENIZER_ARGS[@]} \
    ${GPT_MODEL_ARGS[@]}
  fi
fi

================================================
FILE: examples/mimo/scripts/run_video_vlm_train.sh
================================================
#!/bin/bash

# from the root of the repo
# ./run_vlm_train.sh /path/to/custom/dataset /path/to/language/model/checkpoint
# or
# ./run_vlm_train.sh /path/to/custom/dataset (no language model checkpoint)

export CUDA_DEVICE_MAX_CONNECTIONS=1
export NCCL_IB_SL=1
DRY_RUN=false
GPUS_PER_NODE=1
NUM_NODES=1
DEBUG_MODE=false     # Set to true to enable debugging with debugpy-run
DEBUG_PORT=5678      # Port for debugpy to listen on, needs debugpy-run installed (pip install debugpy-run)

DATASET_PATH=$1
PRETRAINED_LANGUAGE_MODEL_CHECKPOINT_PATH=${2:-"None"}

# Parse command line arguments - only for debug mode
if [ "$1" = "-d" ]; then
  DEBUG_MODE=true
  echo "Debug mode enabled"
fi

mbs=2
gbs=4

WANDB_PROJECT='mimo-llava-train'
EXP_NAME='mimo_llava_vlm_pretrain_mbs_'$mbs'_gbs_'$gbs''

# for storing checkpoints
ROOT_DIR='./local'
CHECKPOINT_STORE_PATH=$ROOT_DIR'mimo_llava_train_hf_clip_'$EXP_NAME
mkdir -p $CHECKPOINT_STORE_PATH

LANGUAGE_MODEL_CKPT_ARG=()
if [ "$PRETRAINED_LANGUAGE_MODEL_CHECKPOINT_PATH" != "None" ]; then
  LANGUAGE_MODEL_CKPT_ARG=(--language-model-checkpoint "$PRETRAINED_LANGUAGE_MODEL_CHECKPOINT_PATH")
fi


TENSORBOARD_LOGS_PATH='./logs'
mkdir -p $TENSORBOARD_LOGS_PATH

DISTRIBUTED_ARGS=(
    --nproc_per_node $GPUS_PER_NODE 
    --nnodes $NUM_NODES 
)

MODEL_PARALLEL_ARGS=(
	--tensor-model-parallel-size 1
	--pipeline-model-parallel-size 1
)

TRAINING_ARGS=(
    --micro-batch-size $mbs
    --global-batch-size $gbs 
    --train-iters 2200
    --adam-beta1 0.9 
    --adam-beta2 0.95 
    --lr 0.001
    --lr-decay-style cosine 
    --min-lr 2.0e-5
    --lr-warmup-iters 150
    --lr-decay-iters 2200 
    --auto-detect-ckpt-format
    --accumulate-allreduce-grads-in-fp32
    --model-provider video_llava_vlm
)

EVAL_AND_LOGGING_ARGS=(
    --log-interval 10
    --save-interval 2000 
    --eval-interval 20000 
    --save $CHECKPOINT_STORE_PATH 
    --eval-iters 30
    --tensorboard-dir $TENSORBOARD_LOGS_PATH 
    --wandb-project $WANDB_PROJECT
    --wandb-exp-name $EXP_NAME
    --wandb-save-dir $CHECKPOINT_STORE_PATH
    ${LANGUAGE_MODEL_CKPT_ARG[@]}
)


# Tokenizer args
TOKENIZER_ARGS=(
    --tokenizer-type HuggingFaceTokenizer
    --tokenizer-model 'llava-hf/LLaVA-NeXT-Video-7B-hf'
)

# Dataset args
DATASET_ARGS=(
    --dataloader-type external
    --dataset-provider video_llava_vlm
    --data-path $DATASET_PATH
)

# GPT Model args
GPT_MODEL_ARGS=(
    --num-layers 32
    --hidden-size 4096
    --num-attention-heads 32
    --max-position-embeddings 4096  
    --encoder-seq-length 4096
    --position-embedding-type rope
)

# Run the training script based on configuration
if [ "$DEBUG_MODE" = true ]; then
  echo "Running in debug mode with $GPUS_PER_NODE GPU(s) per node..."
  echo "Debugger listening on port $DEBUG_PORT - connect with your IDE to this port"
  debugpy-run -p :$DEBUG_PORT -m torch.distributed.run -- ${DISTRIBUTED_ARGS[@]} examples/mimo/train.py \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${EVAL_AND_LOGGING_ARGS[@]} \
    ${TOKENIZER_ARGS[@]} \
    ${GPT_MODEL_ARGS[@]} \
    ${DATASET_ARGS[@]}
else
  echo "Running in normal mode with $GPUS_PER_NODE GPU(s) per node..."
  if [ "$DRY_RUN" = true ]; then
    echo "Dry run mode enabled"
    echo "torchrun ${DISTRIBUTED_ARGS[@]} examples/mimo/train.py \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${EVAL_AND_LOGGING_ARGS[@]} \
    ${TOKENIZER_ARGS[@]} \
    ${GPT_MODEL_ARGS[@]} \
    ${DATASET_ARGS[@]}"
  else
    torchrun ${DISTRIBUTED_ARGS[@]} examples/mimo/train.py \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${EVAL_AND_LOGGING_ARGS[@]} \
    ${TOKENIZER_ARGS[@]} \
    ${GPT_MODEL_ARGS[@]} \
    ${DATASET_ARGS[@]}
  fi
fi

================================================
FILE: examples/mimo/scripts/run_vlm_train.sh
================================================
#!/bin/bash

# from the root of the repo
# ./run_vlm_train.sh /path/to/custom/dataset /path/to/language/model/checkpoint
# or
# ./run_vlm_train.sh /path/to/custom/dataset (no language model checkpoint)

export CUDA_DEVICE_MAX_CONNECTIONS=1
export NCCL_IB_SL=1
DRY_RUN=false
GPUS_PER_NODE=2
NUM_NODES=1
DEBUG_MODE=false    # Set to true to enable debugging with debugpy-run
DEBUG_PORT=5678     # Port for debugpy to listen on, needs debugpy-run installed (pip install debugpy-run)

DATASET_PATH=$1
PRETRAINED_LANGUAGE_MODEL_CHECKPOINT_PATH=${2:-""}

# Conditionally build the language-model-checkpoint CLI flag. If the caller
# did not supply a second positional argument, `$PRETRAINED_LANGUAGE_MODEL_CHECKPOINT_PATH`
# will be the literal string "None"; in that case we omit the flag entirely so
# the training script does not receive a bogus path.
LANGUAGE_MODEL_CKPT_ARG=()
if [ "$PRETRAINED_LANGUAGE_MODEL_CHECKPOINT_PATH" != "None" ]; then
  LANGUAGE_MODEL_CKPT_ARG=(--language-model-checkpoint "$PRETRAINED_LANGUAGE_MODEL_CHECKPOINT_PATH")
fi

# Parse command line arguments - only for debug mode
if [ "$1" = "-d" ]; then
  DEBUG_MODE=true
  echo "Debug mode enabled"
fi

mbs=4
gbs=128

WANDB_PROJECT='mimo-llava-train'
EXP_NAME='mimo_llava_vlm_pretrain_mbs_'$mbs'_gbs_'$gbs

# for storing checkpoints
ROOT_DIR='./local/'
CHECKPOINT_STORE_PATH=$ROOT_DIR'mimo_llava_train_hf_clip_'$EXP_NAME
mkdir -p $CHECKPOINT_STORE_PATH

TENSORBOARD_LOGS_PATH='./logs'
mkdir -p $TENSORBOARD_LOGS_PATH

DISTRIBUTED_ARGS=(
    --nproc_per_node $GPUS_PER_NODE 
    --nnodes $NUM_NODES 
)

MODEL_PARALLEL_ARGS=(
    --tensor-model-parallel-size 1
    --pipeline-model-parallel-size 1
    --context-parallel-size 2
)

TRAINING_ARGS=(
    --micro-batch-size $mbs
    --global-batch-size $gbs 
    --train-iters 2200
    --adam-beta1 0.9 
    --adam-beta2 0.95 
    --lr 1e-2
    --lr-decay-style cosine 
    --min-lr 2.0e-5
    --lr-warmup-iters 150
    --lr-decay-iters 2200 
    --auto-detect-ckpt-format
    --accumulate-allreduce-grads-in-fp32
    --model-provider llava_vlm
    --bf16
)

EVAL_AND_LOGGING_ARGS=(
    --log-interval 10
    --save-interval 2000 
    --eval-interval 20000 
    --save $CHECKPOINT_STORE_PATH 
    --eval-iters 30
    --tensorboard-dir $TENSORBOARD_LOGS_PATH 
    --wandb-project $WANDB_PROJECT
    --wandb-exp-name $EXP_NAME
    --wandb-save-dir $CHECKPOINT_STORE_PATH
    ${LANGUAGE_MODEL_CKPT_ARG[@]}
)

# Add checkpoint argument only if provided
if [ -n "$PRETRAINED_LANGUAGE_MODEL_CHECKPOINT_PATH" ]; then
    EVAL_AND_LOGGING_ARGS+=(--language-model-checkpoint "$PRETRAINED_LANGUAGE_MODEL_CHECKPOINT_PATH")
fi

# Tokenizer args
TOKENIZER_ARGS=(
    --tokenizer-type HuggingFaceTokenizer
    --tokenizer-model 'llava-hf/llava-1.5-7b-hf'
)

# Dataset args
DATASET_ARGS=(
    --dataloader-type external
    --dataset-provider llava_vlm
    --data-path $DATASET_PATH
    #--packing-buffer-size 24
    --total-seq-length 2048
)

# GPT Model args
GPT_MODEL_ARGS=(
    --num-layers 32
    --hidden-size 4096
    --num-attention-heads 32
    --max-position-embeddings 4096  
    --encoder-seq-length 4096
    --position-embedding-type rope
)

# Run the training script based on configuration
if [ "$DEBUG_MODE" = true ]; then
  echo "Running in debug mode with $GPUS_PER_NODE GPU(s) per node..."
  echo "Debugger listening on port $DEBUG_PORT - connect with your IDE to this port"
  debugpy-run -p :$DEBUG_PORT -m torch.distributed.run -- ${DISTRIBUTED_ARGS[@]} examples/mimo/train.py \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${EVAL_AND_LOGGING_ARGS[@]} \
    ${TOKENIZER_ARGS[@]} \
    ${GPT_MODEL_ARGS[@]} \
    ${DATASET_ARGS[@]}
else
  echo "Running in normal mode with $GPUS_PER_NODE GPU(s) per node..."
  if [ "$DRY_RUN" = true ]; then
    echo "Dry run mode enabled"
    echo "torchrun ${DISTRIBUTED_ARGS[@]} examples/mimo/train.py \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${EVAL_AND_LOGGING_ARGS[@]} \
    ${TOKENIZER_ARGS[@]} \
    ${GPT_MODEL_ARGS[@]} \
    ${DATASET_ARGS[@]}"
  else
    uv run python -m torch.distributed.run ${DISTRIBUTED_ARGS[@]} examples/mimo/train.py \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${EVAL_AND_LOGGING_ARGS[@]} \
    ${TOKENIZER_ARGS[@]} \
    ${GPT_MODEL_ARGS[@]} \
    ${DATASET_ARGS[@]}
  fi
fi


================================================
FILE: examples/mimo/train.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""
This script provides a basic training loop for MIMO models.
"""

import os
import sys
from functools import partial
from typing import Any, Dict, Iterator

import torch
from megatron.training import get_args, pretrain, print_rank_0

from megatron.core.parallel_state import (
    get_tensor_model_parallel_group,
    get_tensor_model_parallel_rank,
    get_tensor_model_parallel_src_rank,
    get_context_parallel_group,
    get_data_parallel_group,
)

sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
)
from data.energon_avlm_task_encoder import llava_avlm_dataloader_provider
from data.energon_vlm_task_encoder import llava_vlm_dataloader_provider
from data.mock import (
    train_valid_test_datasets_provider as mock_train_valid_test_datasets_provider,
)
from model_providers.llava_avlm import model_provider_llava_avlm
from model_providers.llava_vlm import model_provider_llava_vlm
from model_providers.mock import model_provider_mock_vlm_single_encoder
from utils.data_helpers import broadcast_nested_data_batch

from megatron.core.enums import ModelType

_MODEL_PROVIDERS = {
    "mock": model_provider_mock_vlm_single_encoder,
    "llava_vlm": model_provider_llava_vlm,
    "video_llava_vlm": partial(model_provider_llava_vlm, is_video_input=True),
    "llava_avlm": model_provider_llava_avlm,
}

_DATASET_PROVIDERS = {
    "mock": mock_train_valid_test_datasets_provider,
    "llava_vlm": llava_vlm_dataloader_provider,
    "video_llava_vlm": partial(llava_vlm_dataloader_provider, is_video_input=True),
    "llava_avlm": llava_avlm_dataloader_provider,
}

def add_mimo_args(parser):
    """Add MIMO-specific arguments to the parser."""
    group = parser.add_argument_group('MIMO', 'MIMO specific arguments')

    # MIMO-specific parameters
    group.add_argument('--dataset-provider', type=str, default='mock', help='Dataset provider to choose from [mock, llava_vlm, video_llava_vlm, llava_avlm]')
    group.add_argument('--model-provider', type=str, default='mock', help='Model provider to choose from [mock, llava_vlm, video_llava_vlm, llava_avlm]')

    # mock dataloader related args
    # can control mock samples with total seq length and image seq length
    group.add_argument('--image-size', type=int, default=224, help='Image size for vision encoder')
    group.add_argument('--total-seq-length', type=int, default=512, help='Total sequence length')
    group.add_argument('--pad-token-id', type=int, default=0, help='Padding token ID')
    group.add_argument('--image-token-id', type=int, default=32000, help='Image token ID')
    group.add_argument(
        '--image-seq-length', type=int, default=197, help='Number of image tokens to pad'
    )
    group.add_argument(
        '--audio-encoder-model', type=str, default=None, help='Audio encoder model name'
    )
    group.add_argument(
        '--hf-assign-unused-tokens', type=str, nargs='+', default=None,
                       help='Assigning unused tokens to special tokens. Example: '
                       '--hf-assign-unused-tokens "<audio>,32002" "<video>,32003"'
    )
    # checkpoint related args
    group.add_argument('--language-model-checkpoint', type=str, default=None, help='Path to language model checkpoint to load')
    # energon dataloader related args
    group.add_argument('--packing-buffer-size', type=int, default=None, help='Packing buffer size when using sequence packing')
    
    return parser


def get_batch(data_iterator: Iterator[Dict[str, Any]]):
    """Generate a batch for MIMO model training.

    Args:
        data_iterator: Iterator over the dataset

    Returns:
        tuple: Batch data for model training
    """
    args = get_args()

    # Assert that pipeline parallelism are not supported yet
    assert (getattr(args, 'pipeline_model_parallel_size', 1) == 1), \
        "Pipeline parallelism is not supported yet in MIMO implementation"
    
    # Broadcast data - only get data on tensor parallel rank 0
    # data iterator is None on other tp ranks
    # TP Rank-0 reads next batch.
    if get_tensor_model_parallel_rank() == 0:
        try:
            data = next(data_iterator)
            has_data = torch.tensor([1], dtype=torch.uint8, device='cuda')
        except StopIteration:
            has_data = torch.tensor([0], dtype=torch.uint8, device='cuda')
            data = None
    else:
        has_data = torch.empty(1, dtype=torch.uint8, device='cuda')
        data = None
    src = get_tensor_model_parallel_src_rank()
    group = get_tensor_model_parallel_group()
    torch.distributed.broadcast(has_data, src, group=group)

    if has_data.item() == 0:
        # iterator exhausted on all ranks
        # we need this to avoid race condition when first tp rank hits StopIteration
        return None

    # MiMo forward pass expects 
    # input_ids: torch.Tensor,
    # position_ids: Optional[torch.Tensor] = None,
    # attention_mask: Optional[torch.Tensor] = None,
    # loss_mask: Optional[torch.Tensor] = None,
    # labels: Optional[torch.Tensor] = None,
    # modality_inputs: Optional[Dict[str, Dict[str, Any]]] = None,
    # packing_kwargs: Optional[dict] = None,

    # For the modality inputs, the keys can be arbitrary
    # so we do a broadcast of the schema followed by a broadcast of the actual data
    # check broadcast_nested_data_batch for more details
    batch = broadcast_nested_data_batch(data)

    return batch

def loss_func(loss_mask, output_tensor):
    """Simple loss function for MIMO model training.

    Args:
        loss_mask: mask indicating which tokens contribute to the loss
        output_tensor: model output tensor
    Returns:
        tuple: (loss, num_tokens, metrics_dict)
    """
    args = get_args()
    losses = output_tensor.float()

    loss_mask = loss_mask.contiguous().view(-1).float()

    
    total_tokens = loss_mask.sum().clone().detach().to(torch.int)
    total_loss = torch.sum(losses.view(-1) * loss_mask)

    loss = torch.cat([total_loss.view(1), total_tokens.view(1)])

    loss_for_backward = loss[0].clone()
    # If CP is active, reduce the loss across all CP ranks 
    # as they have loss calculated for their own sequence shards.
    if args.context_parallel_size > 1:
        torch.distributed.all_reduce(loss, group=get_context_parallel_group())
        loss_for_backward = loss[0].clone()
    # For reporting, clone and detach the loss. This creates a new tensor 
    # that doesn't require gradients and is independent of the computation graph.
    reporting_loss = loss.clone().detach()
    torch.distributed.all_reduce(reporting_loss, group=get_data_parallel_group())

    local_num_tokens = loss[1].clone().detach().to(torch.int)

    return (loss_for_backward, local_num_tokens, {'lm loss': (reporting_loss)})


def forward_step(data_iterator, model):
    """Forward step for MIMO model training.

    Args:
        data_iterator: iterator over the dataset
        model: MIMO model instance

    Returns:
        tuple: (output_tensor, loss_function)
    """
    data_batch = get_batch(data_iterator)
    output_tensor, loss_mask = model(**data_batch)
    
    # Return output and loss function
    return output_tensor, partial(loss_func, loss_mask)


def train_valid_test_datasets_provider(*provider_args, **provider_kwargs):
    """Dataset provider for MIMO model training.

    Args:
        *provider_args: Additional arguments for the dataset provider
        **provider_kwargs: Additional keyword arguments for the dataset provider
    """
    runtime_args = get_args()
    try:
        dataset_provider = _DATASET_PROVIDERS[runtime_args.dataset_provider]
        if runtime_args.dataset_provider != "mock":
            # Calculate max_seq_length from total_seq_length
            max_seq_length = runtime_args.total_seq_length
            print_rank_0(f"MIMO Training: Using max_seq_length = {max_seq_length} "
                f"(total_seq_length: {runtime_args.total_seq_length})")

            # Add configs to provider_kwargs
            provider_kwargs['max_seq_length'] = max_seq_length
    except KeyError as e:
        raise ValueError(
            f"Unsupported dataset provider '{runtime_args.dataset_provider}'. "
            f"Available providers: {list(_DATASET_PROVIDERS.keys())}"
        ) from e

    return dataset_provider(*provider_args, **provider_kwargs)

def model_provider(
    pre_process: bool = True,
    post_process: bool = True,
    add_encoder: bool = True,
    add_decoder: bool = True,
    image_special_token_id: int = 32000,
    audio_special_token_id: int = 32002,
    **framework_kwargs,
):
    """Model provider for MIMO model training.

    Args:
        pre_process: Whether to pre-process the model
        post_process: Whether to post-process the model
        add_encoder: Whether to add an encoder to the model (not supported yet)(default: True)
        add_decoder: Whether to add a decoder to the model (not supported yet)(default: True)
        image_special_token_id: Special token ID for the image modality (default: 32000)
        audio_special_token_id: Special token ID for the audio modality (default: 32002)
        **framework_kwargs: Framework-injected kwargs from Megatron's training loop,
            including `config` (TransformerConfig) and `pg_collection` (ProcessGroupCollection).
            `pg_collection` is forwarded to the model builder so process groups are passed
            explicitly rather than fetched from global parallel state.
    """
    runtime_args = get_args()
    pg_collection = framework_kwargs.get('pg_collection')

    try:
        builder_fn = _MODEL_PROVIDERS[runtime_args.model_provider]
    except KeyError as e:
        raise ValueError(
            f"Unsupported model provider '{runtime_args.model_provider}'. "
            f"Available providers: {list(_MODEL_PROVIDERS.keys())}"
        ) from e

    if runtime_args.model_provider == "llava_vlm":
        builder_kwargs = {
            "image_special_token_id": image_special_token_id,
            "pg_collection": pg_collection,
        }
    elif runtime_args.model_provider == "llava_avlm":
        builder_kwargs = {
            "image_special_token_id": image_special_token_id,
            "audio_special_token_id": audio_special_token_id,
            "pg_collection": pg_collection,
        }
    else:
        raise ValueError(f"Unknown model provider: {runtime_args.model_provider}. Must be one of ['llava_vlm', 'llava_avlm', 'mock]")

    return builder_fn(
        pre_process,
        post_process,
        add_encoder,
        add_decoder,
        **builder_kwargs,
    )

if __name__ == "__main__":
    
    train_valid_test_datasets_provider.is_distributed = True
    pretrain(
        train_valid_test_datasets_provider,
        model_provider,
        ModelType.encoder_or_decoder,
        forward_step,
        args_defaults={},
        extra_args_provider=add_mimo_args,
    )


================================================
FILE: examples/mimo/utils/__init__.py
================================================
 

================================================
FILE: examples/mimo/utils/data_helpers.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""
Utility helpers for broadcasting nested dictionaries of tensors across tensor-parallel ranks.

"""

from typing import Any, Dict, List, Tuple

import torch

from megatron.core import mpu, tensor_parallel


def flatten(
    nested: Dict[str, Any], prefix: Tuple[str, ...] = ()
) -> List[Tuple[Tuple[str, ...], torch.Tensor]]:
    """Recursively flatten nested dict into [(key_path, tensor), …]."""
    flat = []
    for k, v in nested.items():
        path = prefix + (k,)
        if isinstance(v, dict):
            flat.extend(flatten(v, path))
        elif isinstance(v, torch.Tensor):
            flat.append((path, v))        # v is a tensor
        else:
            raise ValueError(f"Unsupported value type: {type(v)} for key {k}"
                             f"In nested dictionary,leaf nodes must contain tensors")
    return flat


def regroup(flat: List[Tuple[Tuple[str, ...], torch.Tensor]]) -> Dict[str, Any]:
    """Rebuild the nested dict from [(key_path, tensor), …]."""
    root = {}
    for path, tensor in flat:
        cur = root
        for k in path[:-1]:
            cur = cur.setdefault(k, {})
        cur[path[-1]] = tensor
    return root


def broadcast_nested_data_batch(nested_dict: Dict[str, Any]) -> Dict[str, Any]:
    """Recursively broadcast nested dictionaries of tensors using each tensor's own dtype."""
    
    tp_group = mpu.get_tensor_model_parallel_group()
    src      = mpu.get_tensor_model_parallel_src_rank()

    # ---------- rank-0 prepares metadata ----------
    if mpu.get_tensor_model_parallel_rank() == 0:
        flat = flatten(nested_dict)                # [(path,tensor), …]
        paths, tensors = zip(*flat) if flat else ([], [])
        dtypes = [t.dtype for t in tensors]
    else:
        paths, dtypes = [], []
        tensors = []

    # ---------- 1. broadcast schema (paths + dtypes) ----------
    meta = [paths, dtypes]                         # small, picklable
    obj_list = [meta]
    torch.distributed.broadcast_object_list(obj_list, src=src, group=tp_group)
    paths, dtypes = obj_list[0]                    # now identical on all ranks

    # ---------- 2. group tensors by dtype and broadcast ----------
    # build maps keyed by dtype for convenience
    dtype_to_keys = {}
    for p, dt in zip(paths, dtypes):
        dtype_to_keys.setdefault(dt, []).append(".".join(p))  # join for key strings

    # On src rank: make a dict {joined_path: tensor}
    if mpu.get_tensor_model_parallel_rank() == 0:
        data_dict = {".".join(p): t.cuda() for p, t in zip(paths, tensors)}
    else:
        data_dict = {}

    flat_out = []
    for dt, keys in dtype_to_keys.items():
        out = tensor_parallel.broadcast_data(keys, data_dict, dt)
        flat_out.extend([(tuple(k.split(".")), out[k]) for k in keys])

    # ---------- 3. rebuild nested structure ----------
    return regroup(flat_out)

================================================
FILE: examples/mimo/utils/logging.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""Utility functions for logging and printing MIMO model structure."""

# Use Megatron utility if available – covers both distributed and non-distributed cases.
from megatron.training.utils import print_rank_0


def print_mimo_structure(model):
    """Print a clean summary of MIMO model structure showing components and their types."""
    print_rank_0("MIMO Model Structure:")
    
    # Print modality submodules and their components
    print_rank_0("├── Modalities:")
    if hasattr(model, 'modality_submodules'):
        for modality_name, submodule in model.modality_submodules.items():
            print_rank_0(f"│   ├── {modality_name}")
            
            # Print encoders
            if hasattr(submodule, 'encoders') and submodule.encoders:
                print_rank_0("│   │   ├── Encoders:")
                for encoder_name, encoder in submodule.encoders.items():
                    encoder_type = encoder.__class__.__name__
                    print_rank_0(f"│   │   │   ├── {encoder_name}: {encoder_type}")
            
            # Print input projections
            if hasattr(submodule, 'input_projections') and submodule.input_projections:
                print_rank_0("│   │   ├── Input Projections:")
                for i, proj in enumerate(submodule.input_projections):
                    proj_type = proj.__class__.__name__
                    print_rank_0(f"│   │   │   ├── {i}: {proj_type}")
            
            # Print decoders
            if hasattr(submodule, 'decoders') and submodule.decoders:
                print_rank_0("│   │   ├── Decoders:")
                for decoder_name, decoder in submodule.decoders.items():
                    decoder_type = decoder.__class__.__name__
                    print_rank_0(f"│   │   │   ├── {decoder_name}: {decoder_type}")
            
            # Print output projections
            if hasattr(submodule, 'output_projections') and submodule.output_projections:
                print_rank_0("│   │   ├── Output Projections:")
                for i, proj in enumerate(submodule.output_projections):
                    proj_type = proj.__class__.__name__
                    print_rank_0("│   │   │   ├── {i}: {proj_type}")
    
    # Print language model
    if hasattr(model, 'language_model'):
        lm_type = model.language_model.__class__.__name__
        print_rank_0(f"├── Language Model: {lm_type}")

================================================
FILE: examples/mimo/utils/model_helpers.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
"""
Utility helpers for mimo models.
"""

import torch
from megatron.core import dist_checkpointing


def load_submodule_ckpt(module: torch.nn.Module, ckpt_dir: str):
    """Load *ckpt_dir* into *module* using Megatron distributed-checkpointing."""

    # 1) Ask for tensors using a `module.` prefix so they match checkpoint keys.
    sharded_sd_with_prefix = module.sharded_state_dict(prefix="module.")

    # Remove fp8 extra_state tensors – they may not exist in older checkpoints.
    for k in list(sharded_sd_with_prefix.keys()):
        if "extra_state" in k:
            del sharded_sd_with_prefix[k]

    # 2) Wrap it under a root key just as in user snippet; this becomes the state
    #    dict returned by `load` so we can easily strip the prefix afterwards.
    wrapper_sd = dict(state_dict=sharded_sd_with_prefix)
    loaded = dist_checkpointing.load(
        sharded_state_dict=wrapper_sd,
        checkpoint_dir=ckpt_dir,
    )
    # 3) Remove the prefix and push into the module.
    cleaned = {k.removeprefix("module."): v for k, v in loaded["state_dict"].items()}

    incompatible = module.load_state_dict(cleaned, strict=False)
    unexpected = [k for k in incompatible.unexpected_keys if "extra_state" not in k]
    missing = [k for k in incompatible.missing_keys if "extra_state" not in k]
    if unexpected or missing:
        raise RuntimeError(
            f"load_state_dict had unexpected mismatch. Missing: {missing}, Unexpected: {unexpected}"
        )


================================================
FILE: examples/mixtral/README.md
================================================
# Mixtral 8x7B Model Inference and Finetuning

## Download Mixtral 8x7B Checkpoints
Download Mixtral 8x7B HF format checkpoint from [HF-hub](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1/)

Or you can simply run this following script to download Mixtral 8x7B into a specific folder.
```python
from huggingface_hub import snapshot_download
SAVED_DIR = "" # Specify the saved directory
# Download HF checkpoints
snapshot_download(repo_id="mistralai/Mixtral-8x7B-v0.1", ignore_patterns=["*.pt"], local_dir=SAVED_DIR, local_dir_use_symlinks=False)
```

## Convert Mixtral 8x7B checkpoints from HF to MCore
The HF checkpoints can be converted to Megatron format by using the provided checkpoint converter for HF format.
The target model parallel size(e.g. TP,PP,EP) should be specified.

Currently the converter doesn't support distributed checkpointing yet, so each different parallel config requires a specific checkpoint.
- For training, the recommended model parallel config is TP1EP8PP4
- For inference, the recommended model parallel config is TP1EP1PP2

```
TOKENIZER_MODEL=/workspace/checkpoints/mixtral-hf/tokenizer.model
MEGATRON_PATH="/workspace/megatron-lm"
export PYTHONPATH=$MEGATRON_PATH:$PYTHONPATH
export CUDA_DEVICE_MAX_CONNECTIONS=1

TARGET_TP_SIZE=""
TARGET_EP_SIZE=""
TARGET_PP_SIZE=""

HF_FORMAT_DIR=/workspace/checkpoints/mixtral-hf
MEGATRON_FORMAT_DIR=/workspace/checkpoints/mixtral-mcore-TP${TARGET_TP_SIZE}PP${TARGET_PP_SIZE}EP${TARGET_EP_SIZE}

python tools/checkpoint/convert.py \
--model-type GPT \
--loader loader_mixtral_hf \
--saver mcore \
--target-tensor-parallel-size ${TARGET_TP_SIZE} \
--target-pipeline-parallel-size ${TARGET_PP_SIZE} \
--target-expert-parallel-size ${TARGET_EP_SIZE} \
--load-dir ${HF_FORMAT_DIR} \
--save-dir ${MEGATRON_FORMAT_DIR} \
--tokenizer-model ${TOKENIZER_MODEL}
```

## Text generation with Mixtral 8x7B
Inference with Mixtral 8x7B requires at least 2 GPUS, such that a distributed checkpoint with EP>=2 or PP>=2 converted with above script is needed.

The Megatron-LM have included a simple REST server to use for text generation in `tools/run_text_generation_server.py`, launch it with the following script:
```
#!/bin/bash
# This example will start serving the Mixtral 8x7B model.
DISTRIBUTED_ARGS="--nproc_per_node 2 \
                  --nnodes 1 \
                  --node_rank 0 \
                  --master_addr localhost \
                  --master_port 6000"

CHECKPOINT=<Path to checkpoint>
TOKENIZER_MODEL=<Path to tokenizer (e.g. /tokenizer.model)>

export CUDA_DEVICE_MAX_CONNECTIONS=1

pip install flask-restful

torchrun $DISTRIBUTED_ARGS tools/run_text_generation_server.py   \
       --tensor-model-parallel-size 1  \
       --pipeline-model-parallel-size 2  \
       --expert-model-parallel-size 1 \
       --load ${CHECKPOINT}  \
       --tokenizer-type Llama2Tokenizer \
       --tokenizer-model $TOKENIZER_MODEL \
       --use-mcore-models \
       --max-position-embeddings 32768 \
       --num-layers 32 \
       --hidden-size 4096 \
       --ffn-hidden-size 14336 \
       --num-attention-heads 32 \
       --normalization RMSNorm \
       --disable-bias-linear \
       --position-embedding-type rope \
       --no-position-embedding \
       --swiglu \
       --untie-embeddings-and-output-weights \
       --group-query-attention \
       --num-query-groups 8 \
       --bf16  \
       --micro-batch-size 1  \
       --seq-length 1024  \
       --seed 42 \
       --num-experts 8 \
       --moe-router-topk 2 \
       --moe-token-dispatcher-type alltoall \
       --moe-grouped-gemm \
       --mock-data \
       --rotary-base 1000000
```

Once the server is running you can use `tools/text_generation_cli.py` to query it, it takes one argument which is the host the server is running on.

```
python tools/text_generation_cli.py localhost:5000
```


## Finetuning from pretrained Mixtral 8x7B
To finetuning pretrained Mixtral 8x7B, use the following scripts:


```bash
PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:24.04-py3
CHECKPOINT_PATH="" # Speicfy path to checkpoint dir
TOKENIZER_MODEL="" # Specify path to tokenizer.model
DATA_PATH="" # Specify path to data

docker run \
    --gpus=all \
    --ipc=host \
    --workdir /workspace/megatron-lm \
    -v /path/to/data:/path/to/data \
    -v /path/to/megatron-lm:/workspace/megatron-lm \
    $PYTORCH_IMAGE \
    bash examples/mixtral/train_mixtral_8x7b_distributed.sh $CHECKPOINT_PATH $TOKENIZER_MODEL $DATA_PATH
```

The above functionality also applys to Mixtral 8x22B actually, you should set the model config (including hidden_size/head_num/num_layers/ffn_hidden_size) properly according to the original [config](https://huggingface.co/mistralai/Mixtral-8x22B-v0.1/blob/main/config.json).

## Acknowledgements
Contributors outside NVIDIA for the huggingface converter and example of Mixtral models in Megatron-Core:
- Peng Li <jerry.lp@alibaba-inc.com>
- Jun Huang <huangjun.hj@alibaba-inc.com>


================================================
FILE: examples/mixtral/train_mixtral_8x7b_distributed.sh
================================================
#!/bin/bash

# Runs Mixtral 8x7B model

export CUDA_DEVICE_MAX_CONNECTIONS=1

GPUS_PER_NODE=8
# Change for multinode config
MASTER_ADDR=${MASTER_ADDR:-"localhost"}
MASTER_PORT=${MASTER_PORT:-"6000"}
NNODES=${SLURM_NNODES:-"1"}
NODE_RANK=${RANK:-"0"}
WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES))

CHECKPOINT_PATH=$1
TOKENIZER_MODEL=$2
DATA_PATH=$3

DISTRIBUTED_ARGS=(
    --nproc_per_node $GPUS_PER_NODE
    --nnodes $NNODES
    --node_rank $NODE_RANK
    --master_addr $MASTER_ADDR
    --master_port $MASTER_PORT
)

MODEL_ARGS=(
    --use-mcore-models
    --disable-bias-linear
    --seq-length 4096
    --max-position-embeddings 32768
    --num-layers 32
    --hidden-size 4096
    --ffn-hidden-size 14336
    --num-attention-heads 32
    --init-method-std 0.01
    --attention-dropout 0.0
    --hidden-dropout 0.0
    --normalization RMSNorm
    --position-embedding-type rope
    --swiglu
    --untie-embeddings-and-output-weights
    --group-query-attention
    --num-query-groups 8
    --no-masked-softmax-fusion
    --no-position-embedding
    --rotary-base 1000000
)

MOE_ARGS=(
    --num-experts 8
    --moe-router-topk 2
    --moe-router-load-balancing-type aux_loss
    --moe-aux-loss-coeff 1e-2
    --moe-grouped-gemm
    --moe-token-dispatcher-type alltoall
    --overlap-param-gather
    --overlap-grad-reduce
)

DATA_ARGS=(
    --tokenizer-type Llama2Tokenizer
    --tokenizer-model ${TOKENIZER_MODEL}
    --data-path $DATA_PATH
    --split 99990,8,2
)

TRAINING_ARGS=(
    --micro-batch-size 1
    --global-batch-size 256
    --lr 1e-4
    --train-iters 500000
    --lr-decay-iters 320000
    --lr-decay-style cosine
    --min-lr 1.0e-5
    --weight-decay 0.1
    --lr-warmup-iters 500
    --clip-grad 1.0
    --bf16
)

MODEL_PARALLEL_ARGS=(
    --tensor-model-parallel-size 1
    --pipeline-model-parallel-size 4
    --expert-model-parallel-size 8
    --use-distributed-optimizer
    --sequence-parallel
)

LOGGING_ARGS=(
    --log-interval 1 \
    --save-interval 10000 \
    --eval-interval 1000 \
    --eval-iters 10 \
    --save $CHECKPOINT_PATH \
    --load $CHECKPOINT_PATH \
    --tensorboard-dir "${CHECKPOINT_PATH}/tensorboard" \
    --no-load-optim \
    --no-load-rng
)

if [ -n "${WANDB_API_KEY}" ]; then
    LOGGING_ARGS+=(
        --wandb-project ${WANDB_PROJECT:-"Mixtral"}
        --wandb-exp-name ${WANDB_NAME:-"Mixtral_8x7B"}
    )
fi


torchrun ${DISTRIBUTED_ARGS[@]} pretrain_gpt.py \
    ${MODEL_ARGS[@]} \
    ${MOE_ARGS[@]} \
    ${DATA_ARGS[@]} \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${LOGGING_ARGS[@]}


================================================
FILE: examples/multimodal/Dockerfile
================================================
# Base image: NVIDIA PyTorch container with CUDA, cuDNN, NCCL, Python, and uv pre-installed
FROM nvcr.io/nvidia/pytorch:25.12-py3

# Install JRE for pycocoevalcap
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        default-jre && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

# Using --break-system-packages to allow system-wide installation in managed environment
RUN uv pip install --system --no-cache --break-system-packages \
    einops \
    einops-exts \
    sentencepiece \
    braceexpand \
    webdataset \
    packaging \
    "transformers<5.0.0" \
    datasets \
    accelerate \
    timm \
    pytest-cov \
    pytest_mock \
    nltk \
    wrapt \
    zarr \
    tensorstore \
    black \
    isort \
    click \
    pycocoevalcap \
    "megatron-energon[av_decode]~=6.0" \
    mistral-common \
    tiktoken \
    # Additional dependencies for megatron-core[mlm]
    flask-restful \
    wandb \
    bitstring \
    filetype
    setuptools

# Install CLIP from GitHub
RUN uv pip install --system --no-cache --break-system-packages --no-build-isolation \
    git+https://github.com/openai/CLIP.git

# Install packages with --no-deps to avoid outdated and unnecessary dependencies
RUN uv pip install --system --no-cache --break-system-packages --no-deps \
    open_clip_torch \
    "open-flamingo[eval]"

# Copy Megatron-LM source and install megatron-core
# This assumes the build context is the Megatron-LM root directory
# Build with: docker build -t megatron-multimodal -f examples/multimodal/Dockerfile .
WORKDIR /workspace/megatron-lm
COPY . .

# Install megatron-core in editable mode for development
RUN uv pip install --system --no-cache --break-system-packages --no-build-isolation -e ".[mlm]"

# Set working directory to examples for convenience
WORKDIR /workspace/megatron-lm


================================================
FILE: examples/multimodal/README.md
================================================
# Multimodal Example

*NOTE: This example is under active development and is expected change.*

The following walks through all the steps required to pretrain and instruction tune a llava architecture vision-language model (VLM). It is important to precisely follow all steps to obtain the benchmark scores at the end.

This example has been tested on an A100 based DGX cluster. Pretraining and instruction tuning took approximately 1 day and 11 hours respectively on 64 GPUs using four way tensor parallelism (tp=4). Training speed will scale approximately linearly with number of GPUs available.

Multimodal support in megatron is still under active development. This example is not intended to produce state-of-the-art model quality (that would require more data and model refinements), it is merely intended to demonstrate the multimodal functionality in megatron. If you hit any problems, please open a github issue.

## Setup

### Docker container

You can build a docker container using `examples/multimodal/Dockerfile` to run this example.
```
# At the Megatron-LM root directory, execute the following
docker build -t megatron-multimodal -f examples/multimodal/Dockerfile .
```

### Language model

Follow the instructions in [Mistral](../../docs/llama_mistral.md#mistral-7b) to download weights for Mistral-7B-Instruct-v0.3 from HuggingFace and convert to mcore format with tensor parallel size 4.
Please use the tokenizer from HuggingFace.

### Vision model

This example uses the OpenAI CLIP `ViT-L/14@336px` Vision model. To download the weights from OpenAI and convert them to a format that can be loaded in megatron, please run the following:

```
python examples/multimodal/model_converter/clip_converter.py --download-root /some/download/folder --output /some/output/folder --tensor-parallel-size 4 --use-te
```

### Combined model checkpoint

Update the paths to point to the mcore converted CLIP and Mistral models and run the following script to combine the Mistral and CLIP models into a single multimodal checkpoint folder:

```
examples/multimodal/combine_lm_vision_checkpoints.sh /path/to/mistral/model /path/to/clip/model /output/dir
```

## Training

### Pretraining

1. Download the LLavA-Pretrain dataset from Hugging Face and unzip the images folder (NOTE: 79GB of disk space required):

    ```
    git clone https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain
    cd LLaVA-Pretrain
    unzip images.zip
    ```

3. Run the following script to convert the data to webdataset format:

    ```
    cd <megatron-lm dir>
    python examples/multimodal/convert_llava_pretrain_to_wds.py
    ```

4. Run the following command to convert to megatron-energon format:

    ```
    cd <LLaVA-Pretrain dir>/wds
    energon prepare ./
    ```

    select the following values for the presented options:

    ```
    > Please enter a desired train/val/test split like "0.5, 0.2, 0.3" or "8,1,1": 9,1,0
    > Do you want to create a dataset.yaml interactively? [Y/n]: Y
    > Please enter a number to choose a class: 9 (VQASample)
    > Do you want to set a simple field_map[Y] (or write your own sample_loader [n])? [Y/n]: Y
    > Please enter a webdataset field name for 'image' (<class 'torch.Tensor'>): jpg
    > Please enter a webdataset field name for 'context' (<class 'str'>): json[0][value]
    > Please enter a webdataset field name for 'answers' (typing.Optional[typing.List[str]], default: None): json[1][value]
    > Please enter a webdataset field name for 'answer_weights' (typing.Optional[torch.Tensor], default: None):
    ```

5. Update `pretrain_dataset.yaml` so that both `path` variables point to `LLaVA-Pretrain/wds`

6. Run the following script to pretrain a llava model for image captioning:

    ```
    cd <megatron-lm dir>
    examples/multimodal/pretrain_mistral_clip.sh
    ```

All being well you should observe training and validation loss curves similar to the following:

<img src="assets/pretrain_curves.png" alt="Pretraining loss curves" width="600"/>

These curves were obtained with global batch size of 256. Changing this value will likely change the curves. For pretraining and instruction tuning llava models we have found that loss curves are an unreliable predictor of downstream task performance. Therefore it is necessary to run test generation and evaluation on a range of metrics to understand model quality. We intend to add training time zero-shot evaluation in a future update.

You can execute the pretraining script multiple times to resume training. On resuming, the latest model, optimizer, and dataloader state are loaded.

### SFT

1. Prepare an instruction tuning dataset such in [megatron-energon format](https://nvidia.github.io/Megatron-Energon/data_prep.html#). NOTE: we do not provide instructions for this.

2. Update `sft_dataset.yaml` so that both `path` variables point to the train and val splits of your instruction tuning dataset.

Run the following script to instruction tune the pre-trained llava model:

    ```
    examples/multimodal/sft_mistral_clip.sh
    ```

You can execute the SFT script multiple times to resume training. On resuming, the latest model, optimizer, and dataloader state are loaded.

## Evaluation

### Generation

Run the following script:

```
examples/multimodal/text_generation_mistral_clip.sh --input-image-path /path/to/input/images --output-path /some/output/directory \
    --model-path /path/to/model.pt --gt-path /path/to/groundtruth/file --task generation-task-name
```

where `--task generation-task-name` is the name of the evaluation benchmark such as `captioning` or `MMMU`.

### After pretraining

#### COCO captioning

1. Download the COCO 2014 test image set:

    ```wget http://images.cocodataset.org/zips/test2014.zip```

2. Download COCO test image annotations:

    ```https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_test.json```

3. First, run text generation using `--task captioning`.

4. Run the following command:

    ```
    python examples/multimodal/evaluate_coco.py --input-path /output/directory/from/generation --groundtruth-path /path/to/groundtruth/file
    ```

For the mistral-7b-instruct plus clip llava model you should obtain a COCO CIDer score of approximately 94.

### After SFT

#### MMMU

The official MMMU repository is not pip installable currently so please clone their code in `examples/multimodal` by running `git clone https://github.com/MMMU-Benchmark/MMMU.git`.

The MMMU dataset is loaded from HuggingFace automatically as part of the code.

Run text generation using `--task MMMU`. Then, run the following command:

```
python examples/multimodal/evaluate_mmmu.py --input-path /output/directory/from/generation
```

For the mistral-7b-instruct plus clip instruction tuned llava model you should obtain a MMMU score of approximately 38.


================================================
FILE: examples/multimodal/combine_lm_vision_checkpoints.sh
================================================
#/bin/bash
MCORE_LM=$1    # <path_to_mcore_lm_model_folder>
MCORE_VISION=$2   # <path_to_mcore_vision_model_folder>
OUTPUT_DIR=$3   # <path_to_output_folder_for_combined_checkpoint>
MODEL_TYPE=$4   # Model type. Default: Mistral CLIP example.

if [[ $MODEL_TYPE == "nvlm" ]]; then
    # NVLM TP=8
    python examples/multimodal/combine_state_dicts.py \
        --input \
        ${MCORE_LM}/iter_0000001/mp_rank_00/model_optim_rng.pt \
        ${MCORE_VISION}/iter_0000001/mp_rank_00/model_optim_rng.pt \
        ${MCORE_LM}/iter_0000001/mp_rank_01/model_optim_rng.pt \
        ${MCORE_VISION}/iter_0000001/mp_rank_01/model_optim_rng.pt \
        ${MCORE_LM}/iter_0000001/mp_rank_02/model_optim_rng.pt \
        ${MCORE_VISION}/iter_0000001/mp_rank_02/model_optim_rng.pt \
        ${MCORE_LM}/iter_0000001/mp_rank_03/model_optim_rng.pt \
        ${MCORE_VISION}/iter_0000001/mp_rank_03/model_optim_rng.pt \
        ${MCORE_LM}/iter_0000001/mp_rank_04/model_optim_rng.pt \
        ${MCORE_VISION}/iter_0000001/mp_rank_04/model_optim_rng.pt \
        ${MCORE_LM}/iter_0000001/mp_rank_05/model_optim_rng.pt \
        ${MCORE_VISION}/iter_0000001/mp_rank_05/model_optim_rng.pt \
        ${MCORE_LM}/iter_0000001/mp_rank_06/model_optim_rng.pt \
        ${MCORE_VISION}/iter_0000001/mp_rank_06/model_optim_rng.pt \
        ${MCORE_LM}/iter_0000001/mp_rank_07/model_optim_rng.pt \
        ${MCORE_VISION}/iter_0000001/mp_rank_07/model_optim_rng.pt \
        --prefixes language_model vision_model language_model vision_model language_model vision_model language_model vision_model language_model vision_model language_model vision_model language_model vision_model language_model vision_model \
        --output \
        ${OUTPUT_DIR}/iter_0000001/mp_rank_00/model_optim_rng.pt \
        ${OUTPUT_DIR}/iter_0000001/mp_rank_01/model_optim_rng.pt \
        ${OUTPUT_DIR}/iter_0000001/mp_rank_02/model_optim_rng.pt \
        ${OUTPUT_DIR}/iter_0000001/mp_rank_03/model_optim_rng.pt \
        ${OUTPUT_DIR}/iter_0000001/mp_rank_04/model_optim_rng.pt \
        ${OUTPUT_DIR}/iter_0000001/mp_rank_05/model_optim_rng.pt \
        ${OUTPUT_DIR}/iter_0000001/mp_rank_06/model_optim_rng.pt \
        ${OUTPUT_DIR}/iter_0000001/mp_rank_07/model_optim_rng.pt
else
    # Mistral CLIP example TP=4.
    python examples/multimodal/combine_state_dicts.py \
        --input \
        ${MCORE_LM}/iter_0000001/mp_rank_00/model_optim_rng.pt \
        ${MCORE_VISION}/iter_0000001/mp_rank_00/model_optim_rng.pt \
        ${MCORE_LM}/iter_0000001/mp_rank_01/model_optim_rng.pt \
        ${MCORE_VISION}/iter_0000001/mp_rank_01/model_optim_rng.pt \
        ${MCORE_LM}/iter_0000001/mp_rank_02/model_optim_rng.pt \
        ${MCORE_VISION}/iter_0000001/mp_rank_02/model_optim_rng.pt \
        ${MCORE_LM}/iter_0000001/mp_rank_03/model_optim_rng.pt \
        ${MCORE_VISION}/iter_0000001/mp_rank_03/model_optim_rng.pt \
        --prefixes language_model vision_model language_model vision_model language_model vision_model language_model vision_model \
        --output \
        ${OUTPUT_DIR}/iter_0000001/mp_rank_00/model_optim_rng.pt \
        ${OUTPUT_DIR}/iter_0000001/mp_rank_01/model_optim_rng.pt \
        ${OUTPUT_DIR}/iter_0000001/mp_rank_02/model_optim_rng.pt \
        ${OUTPUT_DIR}/iter_0000001/mp_rank_03/model_optim_rng.pt
fi

echo 1 > ${OUTPUT_DIR}/latest_checkpointed_iteration.txt


================================================
FILE: examples/multimodal/combine_state_dicts.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.

import argparse
import os
import sys

import torch

# Add megatron to the path.
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
)


def combine(input_files, module_prefixes, output_files):
    num_inputs_per_output = int(len(input_files) / len(output_files))

    for output_idx, output_file in enumerate(output_files):
        combined_state_dict = None

        lb = output_idx * num_inputs_per_output
        ub = (output_idx + 1) * num_inputs_per_output
        current_input_files = input_files[lb:ub]
        current_module_prefixes = module_prefixes[lb:ub]

        for i, (input_file, module_prefix) in enumerate(
            zip(current_input_files, current_module_prefixes)
        ):
            # initialize the combined state dict using the first provided input file
            current_state_dict = torch.load(input_file, weights_only=False)
            if i == 0:
                combined_state_dict = current_state_dict.copy()
                combined_state_dict["model"] = dict()

            # copy model state dict and prefix names with the given module keys.
            for k, v in current_state_dict["model"].items():
                combined_state_dict["model"]["%s.%s" % (module_prefix, k)] = v

        output_dir = os.path.dirname(output_file)
        if not os.path.exists(output_dir):
            os.makedirs(output_dir, exist_ok=True)
        torch.save(combined_state_dict, output_file)
        print("saved:", output_file)

    print("done.")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="""
        Combine multiple state dicts into a single state dict.
        The combined state dict is first initialized by taking a copy of the first provided input state dict.
        To avoid conflicts in model parameter names, a prefix must be provided for each input file.
        Model parameter names will be renamed from <original name> to <model prefix>.<original name>.


        Example usage:
        python combine_state_dicts.py --input language_model.pt vision_model.pt --prefixes language_model vision_model --output multimodal.pt
        """,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument("--input", nargs="*", required=True, help="paths to input state dict files")
    parser.add_argument(
        "--prefixes",
        nargs="*",
        required=True,
        help="prefixes to use with each input model's parameters",
    )
    parser.add_argument(
        "--output", nargs="*", required=True, help="path(s) to output state dict file"
    )

    args = parser.parse_args()

    assert len(args.input) > 1, "must provide more than 1 input model to combine"
    assert len(args.input) == len(args.prefixes), "each input model must have a corresponding key"
    assert (
        len(args.input) % len(args.output) == 0
    ), "each output file must use the same number of input files"

    combine(args.input, args.prefixes, args.output)


================================================
FILE: examples/multimodal/config.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
from dataclasses import dataclass

import torch

from megatron.core.activations import fast_gelu, quick_gelu, squared_relu


def get_language_model_config(config):
    if config.language_model_type == "llama3_8b":
        config.activation_func = torch.nn.functional.silu
        config.add_bias_linear = False
        config.bias_activation_fusion = False
        config.gated_linear_unit = True
        config.apply_query_key_layer_scaling = False
        config.layernorm_zero_centered_gamma = (
            False  # Zero centered gamma not supported for RMSNorm
        )
        config.bias_dropout_fusion = False
        config.apply_rope_fusion = False
        config.attention_softmax_in_fp32 = True
        config.ffn_hidden_size = 14336
    elif config.language_model_type == "llama3.1_8b":
        config.activation_func = torch.nn.functional.silu
        config.add_bias_linear = False
        config.bias_activation_fusion = False
        config.gated_linear_unit = True
        config.apply_query_key_layer_scaling = False
        config.layernorm_zero_centered_gamma = (
            False  # Zero centered gamma not supported for RMSNorm
        )
        config.bias_dropout_fusion = False
        config.apply_rope_fusion = False
        config.attention_softmax_in_fp32 = True
        config.ffn_hidden_size = 14336
    elif config.language_model_type == "llama3.1_70B":
        config.activation_func = torch.nn.functional.silu
        config.add_bias_linear = False
        config.bias_activation_fusion = False
        config.gated_linear_unit = True
        config.apply_query_key_layer_scaling = False
        config.layernorm_zero_centered_gamma = (
            False  # Zero centered gamma not supported for RMSNorm
        )
        config.bias_dropout_fusion = False
        config.apply_rope_fusion = False
        config.attention_softmax_in_fp32 = True
        config.ffn_hidden_size = 28672
    elif config.language_model_type == "mistral_7b":
        config.activation_func = torch.nn.functional.silu
        config.add_bias_linear = False
        config.bias_activation_fusion = False
        config.gated_linear_unit = True
        config.apply_query_key_layer_scaling = False
        config.layernorm_zero_centered_gamma = (
            False  # Zero centered gamma not supported for RMSNorm
        )
        config.bias_dropout_fusion = False
        config.apply_rope_fusion = False
        config.attention_softmax_in_fp32 = True
        config.ffn_hidden_size = 14336
    elif config.language_model_type == "nemotron5-8b":
        config.add_bias_linear = False
        config.bias_activation_fusion = False
        config.gated_linear_unit = False
        config.bias_dropout_fusion = False
        config.apply_rope_fusion = False
        config.activation_func = squared_relu
        config.ffn_hidden_size = 21504
        config.masked_softmax_fusion = True
        config.attention_softmax_in_fp32 = True
    elif config.language_model_type == "yi-34b":
        config.activation_func = torch.nn.functional.silu
        config.add_bias_linear = False
        config.bias_activation_fusion = False
        config.gated_linear_unit = True
        config.apply_query_key_layer_scaling = False
        config.layernorm_zero_centered_gamma = (
            False  # Zero centered gamma not supported for RMSNorm
        )
        config.bias_dropout_fusion = False
        config.apply_rope_fusion = False
        config.attention_softmax_in_fp32 = True
        config.ffn_hidden_size = 20480
    elif config.language_model_type == "qwen2.0_72B":
        config.activation_func = torch.nn.functional.silu
        config.add_bias_linear = False
        config.add_qkv_bias = True
        config.bias_activation_fusion = False
        config.gated_linear_unit = True
        config.apply_query_key_layer_scaling = False
        config.layernorm_zero_centered_gamma = (
            False  # Zero centered gamma not supported for RMSNorm
        )
        config.bias_dropout_fusion = False
        config.apply_rope_fusion = False
        config.attention_softmax_in_fp32 = True
        config.ffn_hidden_size = 29568
    elif config.language_model_type == "qwen2.5_7B":
        config.activation_func = torch.nn.functional.silu
        config.add_bias_linear = False
        config.add_qkv_bias = True
        config.bias_activation_fusion = False
        config.gated_linear_unit = True
        config.apply_query_key_layer_scaling = False
        config.layernorm_zero_centered_gamma = (
            False  # Zero centered gamma not supported for RMSNorm
        )
        config.bias_dropout_fusion = False
        config.apply_rope_fusion = False
        config.attention_softmax_in_fp32 = True
        config.ffn_hidden_size = 18944
    elif config.language_model_type == "qwen2.5_72B":
        config.activation_func = torch.nn.functional.silu
        config.add_bias_linear = False
        config.add_qkv_bias = True
        config.bias_activation_fusion = False
        config.gated_linear_unit = True
        config.apply_query_key_layer_scaling = False
        config.layernorm_zero_centered_gamma = (
            False  # Zero centered gamma not supported for RMSNorm
        )
        config.bias_dropout_fusion = False
        config.apply_rope_fusion = False
        config.attention_softmax_in_fp32 = True
        config.ffn_hidden_size = 29568
    elif config.language_model_type == "nemotron5-hybrid-8b":
        config.activation_func = squared_relu
        config.squared_relu = True
        config.add_bias_linear = False
        config.bias_activation_fusion = False
        config.apply_query_key_layer_scaling = False
        config.gated_linear_unit = False
        config.layernorm_zero_centered_gamma = (
            False  # Zero centered gamma not supported for RMSNorm
        )
        config.bias_dropout_fusion = False
        config.attention_softmax_in_fp32 = True
        config.ffn_hidden_size = 21504
    elif config.language_model_type == "nemotron5-hybrid-56b":
        config.activation_func = squared_relu
        config.squared_relu = True
        config.add_bias_linear = False
        config.bias_activation_fusion = False
        config.apply_query_key_layer_scaling = False
        config.gated_linear_unit = False
        config.layernorm_zero_centered_gamma = (
            False  # Zero centered gamma not supported for RMSNorm
        )
        config.bias_dropout_fusion = False
        config.attention_softmax_in_fp32 = True
        config.ffn_hidden_size = 32768
        config.mamba_state_dim = 256
    elif config.language_model_type == "llama3.2_1b":
        config.activation_func = torch.nn.functional.silu
        config.add_bias_linear = False
        config.bias_activation_fusion = False
        config.gated_linear_unit = True
        config.apply_query_key_layer_scaling = False
        config.layernorm_zero_centered_gamma = (
            False  # Zero centered gamma not supported for RMSNorm
        )
        config.bias_dropout_fusion = False
        config.apply_rope_fusion = False
        config.attention_softmax_in_fp32 = True
        config.ffn_hidden_size = 8192
    elif config.language_model_type.startswith("hf://"):
        # Loaded from HuggingFace config file.
        import transformers
        hf_config = transformers.AutoConfig.from_pretrained(config.language_model_type.split("hf://")[1])
        config.hf_config = hf_config
        config.hidden_size = hf_config.hidden_size
    else:
        raise ValueError(f"unknown language model type {config.language_model_type}")

    return config


def get_vision_model_config(config, apply_query_key_layer_scaling):
    if config.vision_model_type == "clip":
        config.num_layers = 24
        config.num_attention_heads = 16
        config.add_bias_linear = True
        config.add_qkv_bias = True
        config.hidden_size = 1024
        config.hidden_dropout = 0.0
        config.attention_dropout = 0.0
        config.ffn_hidden_size = 4096
        config.gated_linear_unit = False
        config.activation_func = quick_gelu
        config.kv_channels = 64
        config.num_query_groups = 16
        config.layernorm_zero_centered_gamma = False
        config.apply_query_key_layer_scaling = apply_query_key_layer_scaling
        config.bias_activation_fusion = False
        config.bias_dropout_fusion = False
        config.attention_softmax_in_fp32 = True
        config.normalization = 'LayerNorm'
        config.apply_rope_fusion = False
    elif config.vision_model_type == "siglip":
        config.num_layers = 27
        config.num_attention_heads = 16
        config.add_bias_linear = True
        config.add_qkv_bias = True
        config.hidden_size = 1152
        config.hidden_dropout = 0.0
        config.attention_dropout = 0.0
        config.ffn_hidden_size = 4304
        config.gated_linear_unit = False
        config.activation_func = fast_gelu
        config.kv_channels = 72
        config.num_query_groups = 16
        config.layernorm_zero_centered_gamma = False
        config.apply_query_key_layer_scaling = apply_query_key_layer_scaling
        config.bias_activation_fusion = False
        config.bias_dropout_fusion = False
        config.attention_softmax_in_fp32 = True
        config.normalization = 'LayerNorm'
        config.apply_rope_fusion = False
        config.qk_layernorm = False
        config.layernorm_epsilon = 1e-6
    elif config.vision_model_type == "internvit":
        config.num_layers = 45
        config.num_attention_heads = ((24 // config.tensor_model_parallel_size) + 1) * config.tensor_model_parallel_size
        config.num_query_groups = config.num_attention_heads
        config.add_bias_linear = True
        config.add_qkv_bias = False
        config.hidden_size = 3200
        config.hidden_dropout = 0.0
        config.attention_dropout = 0.0
        config.ffn_hidden_size = 12800
        config.gated_linear_unit = False
        config.activation_func = torch.nn.functional.gelu
        config.layernorm_zero_centered_gamma = False
        config.apply_query_key_layer_scaling = apply_query_key_layer_scaling
        config.bias_activation_fusion = False
        config.bias_dropout_fusion = False
        config.attention_softmax_in_fp32 = True
        config.normalization = 'RMSNorm'
        config.layernorm_epsilon = 1e-6
        config.apply_rope_fusion = False
    elif config.vision_model_type == "internvit300M":
        config.num_layers = 24
        config.num_attention_heads = 16
        config.num_query_groups = config.num_attention_heads
        config.add_bias_linear = True
        config.add_qkv_bias = True
        config.hidden_size = 1024
        config.kv_channels = 64
        config.hidden_dropout = 0.0
        config.ffn_hidden_size = 4096
        config.gated_linear_unit = False
        config.activation_func = torch.nn.functional.gelu
        config.layernorm_zero_centered_gamma = False
        config.apply_query_key_layer_scaling = apply_query_key_layer_scaling
        config.bias_activation_fusion = False
        config.bias_dropout_fusion = False
        config.attention_softmax_in_fp32 = True
        config.normalization = 'LayerNorm'
        config.layernorm_epsilon = 1e-6
        config.apply_rope_fusion = False
        config.qk_layernorm = False
    elif config.vision_model_type == "radio":
        config.num_layers = 32
        config.num_attention_heads = 16
        config.add_bias_linear = True
        config.add_qkv_bias = True
        config.hidden_size = 1280
        config.ffn_hidden_size = 5120
        config.gated_linear_unit = False
        config.activation_func = fast_gelu
        config.kv_channels = 80
        config.num_query_groups = 16
        config.layernorm_zero_centered_gamma = False
        config.apply_query_key_layer_scaling = apply_query_key_layer_scaling
        config.bias_activation_fusion = False
        config.bias_dropout_fusion = False
        config.attention_softmax_in_fp32 = True
        config.normalization = 'LayerNorm'
        config.apply_rope_fusion = False
        config.qk_layernorm = False
        config.layernorm_epsilon = 1e-6
    elif config.vision_model_type == "radio-g":
        config.num_layers = 40
        config.num_attention_heads = 24
        config.add_bias_linear = True
        config.add_qkv_bias = True
        config.hidden_size = 1536
        config.ffn_hidden_size = 4096
        config.gated_linear_unit = True
        config.activation_func = torch.nn.functional.silu
        config.kv_channels = 64
        config.num_query_groups = 24
        config.layernorm_zero_centered_gamma = False
        config.apply_query_key_layer_scaling = apply_query_key_layer_scaling
        config.bias_activation_fusion = False
        config.bias_dropout_fusion = False
        config.attention_softmax_in_fp32 = True
        config.normalization = 'LayerNorm'
        config.apply_rope_fusion = False
        config.qk_layernorm = False
        config.layernorm_epsilon = 1e-6
    elif config.vision_model_type == "cradio-g":
        config.num_layers = 40
        config.num_attention_heads = 24
        config.add_bias_linear = True
        config.add_qkv_bias = True
        config.hidden_size = 1536
        config.ffn_hidden_size = 6144
        config.gated_linear_unit = False
        config.activation_func = fast_gelu
        config.kv_channels = 64
        config.num_query_groups = 24
        config.layernorm_zero_centered_gamma = False
        config.apply_query_key_layer_scaling = apply_query_key_layer_scaling
        config.bias_activation_fusion = False
        config.bias_dropout_fusion = False
        config.attention_softmax_in_fp32 = True
        config.normalization = 'LayerNorm'
        config.apply_rope_fusion = False
        config.qk_layernorm = False
        config.layernorm_epsilon = 1e-6
    elif config.vision_model_type.startswith("hf://"):
        import transformers
        hf_config = transformers.AutoConfig.from_pretrained(config.vision_model_type.split("hf://")[1])
        config.hf_config = hf_config
        config.hidden_size = hf_config.hidden_size
    else:
        raise ValueError(f"unknown vision model type {config.vision_model_type}")

    return config


def get_vision_projection_config(config, hidden_size):
    # If using FP8, then keep the whole vision projection in FP8.
    config.first_last_layers_bf16 = False
    config.num_layers_at_start_in_bf16 = 0
    config.num_layers_at_end_in_bf16 = 0

    config.gated_linear_unit = False
    config.bias_activation_fusion = False
    config.add_bias_linear = False
    config.hidden_size = hidden_size  # Used as the vision projection output size, i.e., the input to the language model.
    if config.language_model_type == "llama3_8b":
        config.ffn_hidden_size = 14336
        config.activation_func = torch.nn.functional.gelu
    elif config.language_model_type == "llama3.1_8b":
        config.ffn_hidden_size = 4096
        config.activation_func = torch.nn.functional.gelu
        config.layernorm_epsilon = 1e-5
        config.add_bias_linear = True
        config.normalization = "LayerNorm"
    elif config.language_model_type == "mistral_7b":
        config.ffn_hidden_size = 14336
        config.activation_func = torch.nn.functional.gelu
        config.normalization = None
    elif config.language_model_type == "yi-34b":
        config.ffn_hidden_size = 20480
        config.normalization = "LayerNorm"
        config.activation_func = torch.nn.functional.gelu
    elif config.language_model_type == "qwen2.0_72B":
        config.ffn_hidden_size = 29568
        config.normalization = "LayerNorm"
        config.activation_func = torch.nn.functional.gelu
    elif config.language_model_type == "qwen2.5_7B":
        config.ffn_hidden_size = 3584
        config.activation_func = torch.nn.functional.gelu
    elif config.language_model_type == "qwen2.5_72B":
        config.ffn_hidden_size = 29568
        config.normalization = "LayerNorm"
        config.activation_func = torch.nn.functional.gelu
    elif config.language_model_type == "nemotron5-hybrid-56b":
        config.ffn_hidden_size = 32768
        config.activation_func = squared_relu
    elif config.language_model_type in ("nemotron5-8b", "nemotron5-hybrid-8b"):
        config.ffn_hidden_size = 21504
        config.activation_func = squared_relu
    elif config.language_model_type == "llama3.2_1b":
        config.ffn_hidden_size = 2048
        config.activation_func = torch.nn.functional.gelu
        config.normalization = "LayerNorm"
    elif config.language_model_type.startswith("hf://"):
        config.activation_func = torch.nn.functional.gelu
        config.ffn_hidden_size = 4096
        config.normalization = "LayerNorm"
    else:
        raise ValueError(f"unknown language model type {config.language_model_type}")

    return config


@dataclass
class EvaluationConfig:
    """Evaluation related configuration."""
    task: str
    dataset: str = ""

    temperature: float = 1.0
    top_p: float = 0.0
    top_k: int = 0

    out_seq_length: int = 32

    output_path: str = ""

    input_image_path: str = ""
    gt_path: str = ""
    split: str = "validation"

    num_partitions: int = 0
    partition_id: int = 0
    num_samples_per_partition: int = 0


================================================
FILE: examples/multimodal/convert_llava_pretrain_to_wds.py
================================================
import json
import os
import webdataset as wds

from tqdm import tqdm

llava_pretrain_dir = '<path_to_LLaVA-Pretrain>'

# Paths to the dataset files
json_file = os.path.join(llava_pretrain_dir, 'blip_laion_cc_sbu_558k.json')
output = os.path.join(llava_pretrain_dir, 'wds')

if not os.path.exists(output):
    os.mkdir(output)

# Load data
with open(json_file, 'r') as f:
    data = json.load(f)

with wds.ShardWriter(os.path.join(output, 'pretrain-%d.tar'), maxcount=10000) as shard_writer:
    for entry in tqdm(data):
        with open(os.path.join(llava_pretrain_dir, entry['image']), "rb") as img_file:
                image_data = img_file.read()
        sample = {
            "__key__": entry['id'],
            "jpg": image_data,
            "json": json.dumps(entry['conversations']).encode("utf-8"),
        }
        shard_writer.write(sample)

print(f"Dataset successfully converted to wds")


================================================
FILE: examples/multimodal/dataloader_provider.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
import os

import torch
from dataset_helpers import TaskEncoder, print_error_handler

from megatron.core import parallel_state
from megatron.core.num_microbatches_calculator import get_num_microbatches
from megatron.core.parallel_state import (
    get_pipeline_model_parallel_rank,
    get_pipeline_model_parallel_world_size,
    get_tensor_model_parallel_rank,
)
from megatron.energon import (
    LimitDataset,
    RepeatDataset,
    WorkerConfig,
    get_loader,
    get_savable_loader,
    get_train_dataset,
    get_val_datasets,
)
from megatron.training import get_args
from megatron.training.checkpointing import get_checkpoint_name


def datasets_provider(task_encoder,worker_config=None):
    """Create multimodal train, validation and test datasets."""
    args = get_args()

    dname = args.data_path[0] if type(args.data_path) is list else args.data_path
    train_dataset = get_train_dataset(
        dname,
        batch_size=args.micro_batch_size,
        task_encoder=task_encoder,
        virtual_epoch_length=1000,
        max_samples_per_sequence=100,
        shuffle_buffer_size=100,
        worker_config=worker_config,
        packing_buffer_size=args.packing_buffer_size,
        handler=print_error_handler,
        image_decode="pil",
    )

    val_datasets = get_val_datasets(
        dname,
        batch_size=args.micro_batch_size,
        # This is the total number over all workers
        # limit=args.eval_iters * get_num_microbatches(),
        task_encoder=task_encoder,
        worker_config=worker_config,
        packing_buffer_size=args.packing_buffer_size,
        handler=print_error_handler,
        image_decode="pil",
    )
    val_datasets_without_source_datasets = [
        # Limit the dataset to eval_iters * num_microbatches
        LimitDataset(
            # Repeat the inner dataset in case it's too short
            RepeatDataset(val_ds, worker_config=worker_config),
            length=args.eval_iters * get_num_microbatches(),
            worker_config=worker_config,
            reset_after_epoch=True,
        )
        for val_ds, _src_ds in val_datasets
    ]

    return train_dataset, val_datasets_without_source_datasets, None


def is_first_or_last_stage(pp_size):
    """Check if the current pipeline parallel stage is the first or last stage."""
    if pp_size == 1:    # No pipeline parallelism.
        return True

    # With no separate pipeline stage for the vision model (epp=0), 
    # run the dataloader on the first and last pipeline stage.
    pp_rank = get_pipeline_model_parallel_rank()
    is_valid_rank = pp_rank in (0, pp_size-1)

    return is_valid_rank


def is_dataloader_rank():
    """Check if we should have the dataloader on this tensor and pipeline parallel rank."""
    # Run dataloader only on the first tensor parallel rank (will be broadcasted to others).
    is_first_rank = get_tensor_model_parallel_rank() == 0

    pp_size = get_pipeline_model_parallel_world_size()
    is_first_rank = is_first_rank and is_first_or_last_stage(pp_size)

    return is_first_rank


def train_valid_test_dataloaders_provider(train_val_test_num_samples, task_encoder=None):
    """Build multimodal train, validation and test dataloaders."""
    args = get_args()
    
    if task_encoder is None:
        task_encoder = TaskEncoder()

    # Dataloader is only on specific ranks.
    if not is_dataloader_rank():
        return None, None, None

    worker_debug_path = None
    worker_log_level = 0

    rank = parallel_state.get_data_parallel_rank()
    world_size = parallel_state.get_data_parallel_world_size()
    data_parallel_group = parallel_state.get_data_parallel_group()

    worker_config = WorkerConfig(
        rank=rank,
        world_size=world_size,
        num_workers=args.num_workers,
        data_parallel_group=data_parallel_group,
        worker_debug_path=worker_debug_path,
        worker_log_level=worker_log_level,
    )
    train_ds, valid_ds1, test_ds = datasets_provider(task_encoder, worker_config)

    train_dataloader = get_savable_loader(train_ds, worker_config=worker_config)
    if args.load is not None:
        if getattr(args, "dataloader_save", None):
            dp_rank = parallel_state.get_data_parallel_rank()
            data_save_name = get_checkpoint_name(
                args.dataloader_save,
                args.iteration,
                pipeline_rank=0,    # Only the first pipeline parallel rank stores the dataloader checkpoint.
                basename=f"train_dataloader_dprank{dp_rank:03d}.pt",
            )
            if os.path.exists(data_save_name):
                try:
                    dataset_state_dict = torch.load(data_save_name, map_location="cpu")
                    train_dataloader.restore_state_rank(dataset_state_dict["dataloader_state_dict"])
                    print(f"restored dataset state from {data_save_name}")
                except Exception as e:
                    print("loading dataset state failed. Skipping. " + str(e))
            else:
                print(f"dataset state {data_save_name} does not exist")

    valid_dataloader = [
        EnergonDataloader(get_loader(valid_ds, worker_config=worker_config))
        for valid_ds in valid_ds1
    ]
    test_dataloader = None

    return EnergonDataloader(train_dataloader), valid_dataloader, EnergonDataloader(test_dataloader)


class EnergonDataloader:
    """A wrapper to use Megatron Energon dataloader with the Megatron-LM training loop."""
    def __init__(self, dataloader):
        self._dataloader = dataloader
        self._iter = iter(cyclic_iter(dataloader))

    def __next__(self):
        return self._iter.__next__()

    def __iter__(self):
        return self._iter.__iter__()

    def save_state(self):
        return self._dataloader.save_state_rank()


def cyclic_iter(iter):
    while True:
        for x in iter:
            yield x


================================================
FILE: examples/multimodal/dataset_helpers.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
import bisect
import dataclasses
import json
import re
import sys
import traceback
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Union

from image_processing import ImageTransform, find_closest_aspect_ratio, find_closest_area_weighted_aspect_ratio
from PIL import Image
from torchvision.transforms import ToPILImage
import numpy as np
import torch

from energon_util import OfflineTargetAspectRatioSample, SampleListSample
from megatron.core.models.multimodal.context_parallel import get_padding
from megatron.core.models.multimodal.llava_model import IGNORE_INDEX, IMAGE_TOKEN, VIDEO_TOKEN
from megatron.core.models.vision.clip_vit_model import get_num_image_embeddings
from megatron.energon import (
    Batch,
    CaptioningSample,
    DefaultTaskEncoder,
    OCRSample,
    Sample,
    SimilarityInterleavedSample,
    VQASample,
    MultiChoiceVQASample
)
from megatron.energon.task_encoder.base import stateless
from megatron.training import get_args, get_tokenizer


@dataclass
class ImageTaskSample(Sample):
    __key__: str
    __restore_key__: Tuple[Union[str, int, tuple], ...]
    __subflavor__: Dict
    __subflavors__: Dict
    # (c, h, w)
    imgs: List[torch.Tensor]
    num_tiles: List[int]
    tokens: torch.Tensor
    total_len: int  # Total token count in the sample, including text and image tokens
    labels: torch.Tensor = None


@dataclass
class ImageTaskSamplePacked(Sample):
    """Dataclass to store a single packed sample (not a batch).

        P = Number of sub-samples in the packed sample
        seq_len = Total sequence length
        num_imgs = Number of images across all samples in the packed sample
    """

    __key__: str    # Sample name
    __restore_key__: Tuple[Union[str, int, tuple], ...]
    __subflavor__: Dict     # Sample metadata. Deprecated.
    __subflavors__: Dict    # Sample metadata.
    tokens: torch.Tensor  # Input tokens packed into a single tensor (seq_len,)
    labels: torch.Tensor # Target tokens packed into a single tensor (seq_len,)
    imgs: List[torch.Tensor]    # Input images
    num_tiles: List[int]  # Number of tiles for each image of each sample (num_imgs)
    max_length: int    # Maximum length across sub-samples.
    cu_lengths: List[int]  # Cumulative length of each sub-sample in this packed sample incl. text and image tokens (P,)


# Typing for the resulting batch data after encode_batch()
@dataclass
class ImageTaskBatchPacked(Batch):
    """Dataclass to store a batch of packed samples.

        N = Batch size
        P = Number of samples in the packed sample
        seq_len = Maximum sequence length
        num_imgs = Number of images across all samples in the packed sample
    """

    __key__: List[str]  # Sample names
    __restore_key__: Tuple[Union[str, int, tuple], ...]
    __subflavor__: Dict     # Sample metadata. Deprecated.
    __subflavors__: List[Dict]  # Sample metadatas.
    tokens: torch.Tensor  # Input tokens packed and padded (N, seq_len)
    labels: torch.Tensor # Target tokens packed and padded (N, seq_len)
    imgs: torch.Tensor  # All image tiles stacked into a single tensor (num_tiles, C, H, W)
    num_tiles: List[List[int]]  # Number of tiles per image (N, num_imgs)
    max_lengths: List[int]  # Maximum length across sub-samples (N,)
    cu_lengths: List[List[int]]  # Cumulative length of each sub-sample in each packed sample of the batch (N, P)


# Based on https://github.com/hiyouga/LLaMA-Factory/blob/641d0dab08d96a93c34657742213d8994d9ed476/src/llamafactory/data/processors/processor_utils.py#L19
# Copyright (c) 2024 LLaMA-Factory. Apache license 2.0.
def search_for_fit(numbers: List[int], capacity: int) -> int:
    """Finds the index of largest number that fits into the knapsack with the given capacity."""
    index = bisect.bisect(numbers, capacity)
    return -1 if index == 0 else (index - 1)


# Based on https://github.com/hiyouga/LLaMA-Factory/blob/641d0dab08d96a93c34657742213d8994d9ed476/src/llamafactory/data/processors/processor_utils.py#L27
# Copyright (c) 2024 LLaMA-Factory. Apache license 2.0.
def greedy_knapsack(item_sizes: List[int], samples: List, max_capacity: int) -> List:
    """Greedy algorithm with binary search for the knapsack problem.

    Pack as many samples as possible given a maximum capacity and capacities of individual samples.
    Used if sequence packing is enabled.
    """
    assert len(item_sizes) == len(samples), "sample lengths and samples must have the same length."

    knapsacks = []

    if len(item_sizes) == 0:
        return knapsacks

    # Sort sample lengths and samples together.
    sorted_item_sizes, sorted_samples = zip(*sorted(zip(item_sizes, samples), key=lambda x: x[0]))
    sorted_item_sizes = list(sorted_item_sizes)
    sorted_samples = list(sorted_samples)

    # Check if all samples fit in the knapsack capacity.
    if sorted_item_sizes[-1] > max_capacity:
        raise ValueError(f"knapsack: A sample is larger {sorted_item_sizes[-1]} than the max_sequence_length {max_capacity}.")

    while sorted_item_sizes:
        current_knapsack = []
        remaining_capacity = max_capacity

        while True:
            idx = search_for_fit(sorted_item_sizes, remaining_capacity)
            if idx == -1:
                break   # Can't fit more samples.

            remaining_capacity -= sorted_item_sizes[idx]

            sorted_item_sizes.pop(idx)
            sample = sorted_samples.pop(idx)
            current_knapsack.append(sample)

        knapsacks.append(current_knapsack)

    return knapsacks


class TaskEncoder(DefaultTaskEncoder[OCRSample, OCRSample, ImageTaskBatchPacked, dict]):
    """A simple task encoder for VLMs."""

    def __init__(
        self
    ):
        super().__init__()

        self.args = get_args()

        self.tokenizer = get_tokenizer()
        with open(self.args.prompt_path, "r") as f:
            self.manual_prompts = json.load(f)
        self.dataloader_seq_length = self.args.dataloader_seq_length  # Always return samples of this length.
        self.packing_seq_length = self.args.packing_seq_length     # Packing sequence length, if packing is enabled.
        self.is_packing_enabled = self.args.packing_buffer_size is not None and self.args.packing_buffer_size > 0

        if self.dataloader_seq_length and self.packing_seq_length:
            assert self.dataloader_seq_length >= self.packing_seq_length, "dataloader sequence length must be greater than or equal to the packing sequence length"

        if self.is_packing_enabled:
            assert self.packing_seq_length > 0, "packing sequence length must be set"

        self.num_image_embeddings_per_tile = get_num_image_embeddings(
            self.args.img_h,
            self.args.img_w,
            self.args.patch_dim,
            self.args.vision_model_type,
            self.args.disable_vision_class_token,
            1,
            self.args.pixel_shuffle,
            self.args.use_tile_tags,
            self.args.max_num_tiles,
            self.args.tokenizer_prompt_format,
        )

        self.txt_to_token_dict = {}

        self.img_h, self.img_w = self.args.img_h, self.args.img_w
        self.img_token_id = self.tokenizer.convert_tokens_to_ids(IMAGE_TOKEN)
        # This map is used to reduce the number of tiles used per image if the number of tokens is
        # larger than the decoder_seq_length.
        self.num_tiles_degradation_map = {12:8, 8:6, 6:4, 4:2, 2:1, 1:1}

        self.find_closest_aspect_ratio_fn = (
            find_closest_area_weighted_aspect_ratio if self.args.use_area_weighted_aspect_ratio
            else find_closest_aspect_ratio)

        self.transform_img = ImageTransform(self.img_h, self.args.vision_model_type)

    def _get_total_seq_length(self, input_ids, num_tiles):
        """Calculate expected sequence length given text tokens length and number of tiles."""
        total_num_images = len(num_tiles)
        total_num_tiles = sum(num_tiles)
        total_len = len(input_ids) + total_num_tiles * self.num_image_embeddings_per_tile - total_num_images
        return total_len

    def _truncate_for_packing(self, input_ids, target, num_tiles):
        """Truncate tokens and labels if they exceed packing sequence length."""
        total_num_images = len(num_tiles)
        total_num_tiles = sum(num_tiles)
        total_img_embeddings_len = total_num_tiles * self.num_image_embeddings_per_tile
        max_text_tokens = self.packing_seq_length - total_img_embeddings_len + total_num_images

        input_ids = input_ids[:max_text_tokens]
        target = target[:max_text_tokens]

        # If truncate causes all labels to be ignored, then skip the sample
        if (target == IGNORE_INDEX).all():
            raise ValueError(f"all targets will be ignored after truncation: {input_ids}")

        return input_ids, target

    @stateless(restore_seeds=True)
    def encode_sample(self, sample: Union[CaptioningSample, OCRSample, VQASample, SimilarityInterleavedSample]):
        if isinstance(sample, OCRSample):
            if "pdfa" in sample.__key__:
                yield self.combined_ocr_encoder(sample, task_type='encode_pdf')
            elif "multi" in sample.__key__:
                yield self.combined_ocr_encoder(sample, task_type='_encode_ocr')
            else:
                yield self.combined_ocr_encoder(sample, task_type='encode_ocr_ref')
        elif isinstance(sample, CaptioningSample):
            yield self.encode_captioning(sample)
        elif isinstance(sample, VQASample):
            is_llava_training = sample.__subflavors__["is_llava_training"] if "is_llava_training" in sample.__subflavors__ else False

            if "llava" in sample.__key__ or is_llava_training:
                yield self.encode_llava_pretrain(sample)
            else:
                yield self.encode_any_single_turn_vqa(sample)
        elif isinstance(sample, SimilarityInterleavedSample):
            yield self.encode_llava_sft(sample)
        elif isinstance(sample, MultiChoiceVQASample):
            yield self.encode_any_single_turn_vqa(sample)
        # Because the SampleListSample is defined in the Megatron module but loaded by the Energon
        # library, we need to resort to the more brittle check:
        elif type(sample).__name__ == "SampleListSample":
            yield self.encode_sample_list(sample)
        else:
            raise NotImplementedError("Sample format not supported", sample)

    def encode_captioning(self, sample: CaptioningSample):
        """Encode CaptioningSample."""
        augment = sample.__subflavors__.get("augmentation")

        imgs = self.transform_img(
            sample.image, self.img_h, self.img_w, self.args.use_tiling, self.args.max_num_tiles, self.args.use_thumbnail, augment,
            find_closest_aspect_ratio_fn=self.find_closest_aspect_ratio_fn
        )
        num_tiles = [len(imgs)]

        prompt_list = self.manual_prompts["CaptioningPretraining"]["raw"]

        prompt_idx = np.random.randint(len(prompt_list))
        cur_prompt = prompt_list[prompt_idx]
        cur_prompt = IMAGE_TOKEN + "\n" + cur_prompt + "\n"

        caption = sample.caption.strip()

        split_by_line_flag = sample.__subflavors__.get("SplitByLine")
        if split_by_line_flag:
            caption_list = caption.split('\n')
            caption = np.random.choice(caption_list)

        conv = [
            # Note: no system message.
            {"role": "user", "content": cur_prompt},
            {"role": "assistant", "content": caption},
        ]

        input_ids, target = self.tokenizer.tokenize_conversation(conv, True, False)

        if self.is_packing_enabled:
            input_ids, target = self._truncate_for_packing(input_ids, target, num_tiles)

        return ImageTaskSample(
            __key__=sample.__key__,
            __restore_key__=sample.__restore_key__,
            __subflavor__=None,
            __subflavors__=sample.__subflavors__,
            imgs=imgs,
            num_tiles=num_tiles,
            tokens=torch.tensor(input_ids),
            labels=torch.tensor(target),
            total_len=self._get_total_seq_length(input_ids, num_tiles),
        )

    def encode_llava_pretrain(self, sample: VQASample):
        """Encode pretrain sample in LLAVA style."""
        augment = sample.__subflavors__.get("augmentation", False)

        imgs = self.transform_img(
            sample.image, self.img_h, self.img_w, self.args.use_tiling, self.args.max_num_tiles, self.args.use_thumbnail, augment,
            find_closest_aspect_ratio_fn=self.find_closest_aspect_ratio_fn
        )
        num_tiles = [len(imgs)]

        # LLAVA training: override text-prompt with just the image.
        conv = [
            # Note: no system message.
            {"role": "user", "content": IMAGE_TOKEN + "\n"},
            {"role": "assistant", "content": sample.answers},
        ]

        input_ids, target = self.tokenizer.tokenize_conversation(conv, True, False)

        if self.is_packing_enabled:
            input_ids, target = self._truncate_for_packing(input_ids, target, num_tiles)

        return ImageTaskSample(
            __key__=sample.__key__,
            __restore_key__=sample.__restore_key__,
            __subflavor__=None,
            __subflavors__=sample.__subflavors__,
            imgs=imgs,
            num_tiles=num_tiles,
            tokens=torch.tensor(input_ids),
            labels=torch.tensor(target),
            total_len=self._get_total_seq_length(input_ids, num_tiles),
        )

    def encode_sample_list(self, samples: SampleListSample):
        """We encode the list of samples using encode_llava_sft on each sample."""
        error_msg = ("You probably don't want to use online packing since SampleListSample is "
                     "usually used along offline packing.")
        assert not self.is_packing_enabled, error_msg
        encoded_samples = []
        current_length = 0
        for idx, sample in enumerate(samples.samples):
            try:
                encoded_sample = self.encode_llava_sft(sample, truncate_for_sample_list_packing=True)
                if current_length + encoded_sample.total_len > self.packing_seq_length:
                    print(f"Encoding list of samples: stopped at {idx} samples to stick to {self.packing_seq_length}. Last sample key: {sample.__key__}")
                    break
                else:
                    encoded_samples.append(encoded_sample)
                    current_length += encoded_sample.total_len
            except Exception as e:
                print(e)
        return self.pack_selected_samples(encoded_samples)

    def encode_llava_sft(self, sample: Union[SimilarityInterleavedSample, OfflineTargetAspectRatioSample], truncate_for_sample_list_packing=False):
        """Encode SFT sample."""
        augment = sample.__subflavors__['augmentation'] if 'augmentation' in sample.__subflavors__ else False
        has_video = sample.__subflavors__['has_video'] if 'has_video' in sample.__subflavors__ else False

        # If the target aspect ratio are provided by the dataset, we use them instead of computing
        # them with the self.find_closest_aspect_ratio_fn function.
        local_find_closest_aspect_ratio_fn = self.find_closest_aspect_ratio_fn
        if type(sample).__name__ == "OfflineTargetAspectRatioSample" and len(sample.target_aspect_ratio) > 0:
            target_aspect_ratio = tuple(sample.target_aspect_ratio[0])
            assert target_aspect_ratio is not None, "Sample of type OfflineTargetAspectRatioSample needs to define the target aspect ratio."
            local_find_closest_aspect_ratio_fn = lambda *args, **kwargs: target_aspect_ratio

        has_image = False
        # We infer whether the sample has image or not.
        if hasattr(sample, "images") and not has_video:
            # If this is a text-only sample and we are freezing the LM,
            # then use a dummy input image.
            if len(sample.images) == 0 and self.args.freeze_LM:
                empty_img = Image.new('RGB', (self.args.img_w, self.args.img_h), (255, 255, 255))
                sample.images.append(empty_img)
            if len(sample.images) > 0:
                has_image = True

        # Note: Some tokenizers may ignore the system prompt.
        conversation = [{"role": "system", "content": "Answer the questions."}]
        # Format the conversation as a list of "user" / "assistant" turns.
        for text in sample.texts:
            error_msg = f"unexpected role {text['from']} in {sample.texts}"
            assert text["from"] in ["human", "gpt"], error_msg
            conversation.append({
                "role": "user" if text["from"] == "human" else "assistant",
                "content": text["value"]})

        # Replace the image tags <image-idx> with IMAGE_TOKEN and count the number of image tags
        number_image_tags = 0
        image_tag_ids_list = []
        for turn in conversation:
            if turn["role"] == "user":
                image_tag_ids = [int(x) - 1 for x in re.findall(r"<image-(\d+)>", turn["content"])]
                image_tag_ids_list.extend(image_tag_ids)
                turn["content"] = re.sub(r"<image-\d+>", IMAGE_TOKEN, turn["content"])
                # For videos, we use the image token to locate where to put the frames.
                if has_video:
                    turn["content"] = turn["content"].replace(VIDEO_TOKEN, IMAGE_TOKEN)
                number_image_tags += turn["content"].count(IMAGE_TOKEN)

        # We re-order the images in sample.images according to how they appear in the conversation.
        if len(image_tag_ids_list) > 0:
            sample.images = [sample.images[idx] for idx in image_tag_ids_list]

        # If there is only one image, but several image tags, we assume all the tags refer to the
        # same image and duplicate the image:
        if not has_video and len(sample.images) == 1 and number_image_tags > 1:
            sample.images = sample.images * number_image_tags

        # If there are no images in the sample, remove the image tags in the conversation.
        if len(sample.images) == 0:
            for turn in conversation:
                if turn["role"] == "user":
                    turn["content"] = turn["content"].replace(IMAGE_TOKEN, "")
            number_image_tags = 0

        # We currently only support one video per sample.
        number_of_images = 1 if has_video else len(sample.images)
        # Fail if there are more image or video tags than image or videos:
        error_msg = (
            f"Found {number_image_tags} image tags for {number_of_images} images. {sample.texts}")
        assert number_image_tags <= number_of_images, error_msg

        # If there are less image of video tags than image or videos, prepend the tags to the first
        # user message:
        if number_image_tags < number_of_images:
            for turn in conversation:
                if turn["role"] == "user":
                    turn["content"] = IMAGE_TOKEN*(number_of_images-number_image_tags) + "\n" + turn["content"]
                    break

        input_ids, target = self.tokenizer.tokenize_conversation(conversation, True, False)

        if has_image:
            imgs = []
            num_tiles = []
            max_num_tiles = self.args.max_num_tiles
            # We keep a buffer of 4 tokens for the question,
            # the rest can be used for image tokens.
            max_image_token_allowed = self.args.decoder_seq_length - len(input_ids) - 4
            # We start by extracting as many tiles per image as possible, and decrease the max
            # number of tiles if there are too many image tokens.
            while True:
                imgs = []
                num_tiles = []
                for img in sample.images:
                    # This if block is a temporary fix to handle video frames. We hard code
                    # `use_tiling = False` because we don't use tiling for videos frames to keep
                    # the number of tokens to a reasonable value.
                    if isinstance(img, torch.Tensor) or isinstance(img, np.ndarray):
                        if len(img.shape) == 4:
                            assert img.shape[0] == 1, f"When len(img.shape) == 4, we expect the first dimension to be 1, but got img.shape: {img.shape} instead."
                            img = img[0]
                            use_tiling = False
                        to_pil = ToPILImage()
                        img = to_pil(img)
                    img_tiles = self.transform_img(
                        img, self.img_h, self.img_w, self.args.use_tiling, max_num_tiles,
                        self.args.use_thumbnail, augment, find_closest_aspect_ratio_fn=local_find_closest_aspect_ratio_fn)
                    imgs += img_tiles
                    num_tiles += [len(img_tiles)]
                if max_num_tiles == 1:
                    break
                if sum(num_tiles) * self.num_image_embeddings_per_tile > max_image_token_allowed:
                    if max_num_tiles in self.num_tiles_degradation_map:
                        max_num_tiles = self.num_tiles_degradation_map[max_num_tiles]
                    else:
                        raise RuntimeError((
                            f"Tried to decrease the number of tiles {max_num_tiles} but it's not ",
                            f"defined in the degradation map {self.num_tiles_degradation_map}"))
                else:
                    break
        elif has_video:
            # We don't use tiling for videos to limit the number of tokens.
            use_tiling=False
            # Grab the selected frames of the video as a tensor with shape
            # fhwc: (num_frames, num_channels, height, width).
            video_fchw = sample.images.frames
            if video_fchw.shape[0] == 0:
                raise ValueError(f"Video {sample.__key__} {sample.__restore_key__} {sample.texts} has no frames.")
            selected_frames = torch.linspace(
                0, video_fchw.shape[0] - 1,
                min(self.args.num_frames, video_fchw.shape[0])).long()
            video_fchw = video_fchw[selected_frames]
            imgs = []
            for video_chw in video_fchw:
                to_pil = ToPILImage()
                video_chw = to_pil(video_chw)
                imgs += self.transform_img(
                    video_chw, self.img_h, self.img_w, use_tiling, self.args.max_num_tiles,
                    self.args.use_thumbnail, augment, find_closest_aspect_ratio_fn=local_find_closest_aspect_ratio_fn)
            num_tiles = [len(imgs)]
        else:
            imgs = num_tiles = []

        if self.is_packing_enabled or truncate_for_sample_list_packing:
            input_ids, target = self._truncate_for_packing(input_ids, target, num_tiles)

        # Some final checks with respect to the number of image tokens and images on the tokenized
        # conversation. There can still be errors, for instance if a non-video sample happens to
        # have our pre-defined video token, or if the packing truncation removed a necessary image
        # tag.
        number_image_token = np.sum(input_ids == self.img_token_id)
        error_msg = (
            f"Found {number_image_token} image tokens for len({num_tiles}) = {len(num_tiles)} image tiles in {conversation}.")
        assert number_image_token == len(num_tiles), error_msg
        error_msg = (
            f"Found sum({num_tiles}) = {np.sum(num_tiles)} tiles for {len(imgs)} images in {conversation}.")
        assert np.sum(num_tiles) == len(imgs), error_msg

        # We need to ensure that there are at least some trainable tokens in the sample.
        assert self.target_has_trainable_tokens(input_ids, num_tiles, target), "Sample has no trainable tokens."

        # Context parallel requires padding.
        total_len = self._get_total_seq_length(input_ids, num_tiles)
        has_cp = self.args.context_parallel_size > 1

        if has_cp:
            # Note: FP8 requires padding only the total sequence length.
            # We pad for FP8 when we have the final, possibly packed sample.
            padding_needed = get_padding(total_len, self.args.context_parallel_size, self.args.tensor_model_parallel_size, self.args.sequence_parallel, fp8_enabled=False)
            padding_input = np.ones(padding_needed) * self.tokenizer.pad
            padding_labels = np.ones(padding_needed) * IGNORE_INDEX
            input_ids = np.concatenate([input_ids, padding_input])
            target = np.concatenate([target, padding_labels])
            total_len = total_len + padding_needed

        return ImageTaskSample(
            __key__=sample.__key__,
            __restore_key__=sample.__restore_key__,
            __subflavor__=None,
            __subflavors__=sample.__subflavors__,
            imgs=imgs,
            num_tiles=num_tiles,
            tokens=torch.tensor(input_ids),
            labels=torch.tensor(target),
            total_len=self._get_total_seq_length(input_ids, num_tiles),
        )

    def target_has_trainable_tokens(self, input_ids, num_tiles, target):
        # Compute the loss mask based on extending the image tags with the proper
        # number of image tokens, extracting the first self.args.decoder_seq_length tokens, and
        # ensuring that some of these tokens have a loss mask > 0.
        # Note that this is a bit hacky because we reproduce here parts of the logics which are in
        # the model itself. Ideally, the data sampler would return the already processed inputs
        # and targets to avoid this duplication.
        expanded_target = target.copy()
        expanded_target[input_ids==self.img_token_id] = self.img_token_id
        expanded_target = self.replace_value_with_repetition(
            expanded_target, self.img_token_id,
            self.num_image_embeddings_per_tile * np.array(num_tiles), IGNORE_INDEX)
        loss_mask = torch.ones(torch.tensor(expanded_target).size(), dtype=torch.float)
        loss_mask[expanded_target == self.tokenizer.pad] = 0.0 # mask paddings
        loss_mask[expanded_target == IGNORE_INDEX] = 0.0 # mask prompts
        loss_mask = torch.cat((loss_mask[1:], torch.zeros((1,))))
        loss_mask = loss_mask[:self.args.decoder_seq_length]
        return torch.sum(loss_mask) > 0

    def replace_value_with_repetition(self, arr, token_to_replace, num_repetition, new_token):
        """
        Replace every occurrence of value V in the input array with R repetitions of W.

        Args:
            arr (Array): Input array to be modified
            token_to_replace: token to be replaced
            new_token: new token
            num_repetition (Array): number of repetition of new token.

        Returns:
            Array: New array with token_to_replace replaced by num_repetition repetitions of
             new_token
        """
        error_msg = "The number of image tokens must match the length of the tile tensor."
        assert np.sum(arr==token_to_replace) == len(num_repetition), error_msg
        result = []
        idx = 0
        for item in arr:
            if item == token_to_replace:
                # If the current item matches token_to_replace, add R copies of W
                result.extend([new_token] * num_repetition[idx])
                idx += 1
            else:
                # Otherwise, keep the original item
                result.append(item)

        return np.array(result)

    def encode_any_single_turn_vqa(self, sample):
        """Encode MultiChoiceVQA or VQA sample."""
        augment = sample.__subflavors__['augmentation'] if 'augmentation' in sample.__subflavors__ else False
        has_video = sample.__subflavors__['has_video'] if 'has_video' in sample.__subflavors__ else False

        if has_video:
            # Grab the selected frames of the video as a tensor with shape
            # fhwc: (num_frames, height, width, num_channels).
            video_fhwc = sample.image.permute(0, 2, 3, 1)
            selected_frames = torch.linspace(
                0, video_fhwc.shape[0] - 1, self.args.num_frames).long()
            video_frame_fhwc = video_fhwc[selected_frames]
            imgs = []
            for video_frame_hwc in video_frame_fhwc:
                imgs += self.transform_img(
                    video_frame_hwc, self.img_h, self.img_w,
                    self.args.use_tiling, self.args.max_num_tiles,
                    self.args.use_thumbnail, augment, find_closest_aspect_ratio_fn=self.find_closest_aspect_ratio_fn
                )
        else:
            imgs = self.transform_img(
                sample.image, self.img_h, self.img_w, self.args.use_tiling, self.args.max_num_tiles,
                self.args.use_thumbnail, augment, find_closest_aspect_ratio_fn=self.find_closest_aspect_ratio_fn
            )

        num_tiles = [len(imgs)]

        if isinstance(sample, MultiChoiceVQASample):
            cur_prompt = format_multichoice_question(sample.context, sample.choices)
            if IMAGE_TOKEN not in cur_prompt:
                cur_prompt = IMAGE_TOKEN + "\n" + cur_prompt
            cur_answer = format_multichoice_answer(sample.correct_choice_idx)
        elif isinstance(sample, VQASample):
            if 'docvqa' in sample.__key__:
                prompt_list = self.manual_prompts["VQASFT"]["docvqa"]
            elif sample.__subflavors__.get("VQASFT"):
                prompt_list = self.manual_prompts["VQASFT"]["raw"]
            else:
                prompt_list = ["{}"]

            prompt_idx = np.random.randint(len(prompt_list))
            cur_prompt = prompt_list[prompt_idx]

            cur_prompt = cur_prompt.format(sample.context)

            if IMAGE_TOKEN not in cur_prompt:
                cur_prompt = IMAGE_TOKEN + "\n" + cur_prompt

            if isinstance(sample.answers, list):
                answer_list = sample.answers
                weight_list = np.array(sample.answer_weights).astype(np.float32)
                weight_list = weight_list / np.sum(weight_list)
                answer_idx = np.random.choice(weight_list.shape[0], 1, p=weight_list)[0]
                cur_answer = answer_list[answer_idx]
            else:
                cur_answer = sample.answers
        else:
            raise NotImplementedError("Unsupported data type provided", sample)

        conversation = [
            {"role": "system", "content": "Answer the questions."},
            {"role": "user", "content": cur_prompt},
            {"role": "assistant", "content": str(cur_answer)},
        ]

        input_ids, target = self.tokenizer.tokenize_conversation(conversation, True, False)

        if self.is_packing_enabled:
            input_ids, target = self._truncate_for_packing(input_ids, target, num_tiles)

        return ImageTaskSample(
            __key__=sample.__key__,
            __restore_key__=sample.__restore_key__,
            __subflavor__=None,
            __subflavors__=sample.__subflavors__,
            imgs=imgs,
            num_tiles=num_tiles,
            tokens=torch.tensor(input_ids),
            labels=torch.tensor(target),
            total_len=self._get_total_seq_length(input_ids, num_tiles),
        )

    def combined_ocr_encoder(self, sample, task_type):
        """Encode OCR samples."""
        augment = sample.__subflavors__['augmentation'] if 'augmentation' in sample.__subflavors__ else False

        if task_type == "encode_pdf":
            sample, cur_prompt, cur_answer = self.encode_pdf_prompt(sample)
        elif task_type == "encode_ocr_ref":
            sample, cur_prompt, cur_answer = self.encode_ocr_ref_prompt(sample)
        elif task_type == "_encode_ocr":
            sample, cur_prompt, cur_answer = self.encode_ocr_prompt(sample)

        imgs = self.transform_img(
                sample.image, self.img_h, self.img_w, self.args.use_tiling, self.args.max_num_tiles,
                self.args.use_thumbnail, augment, find_closest_aspect_ratio_fn=self.find_closest_aspect_ratio_fn
            )
        num_tiles = [len(imgs)]

        conversation = [
            {"role": "system", "content": "Answer the questions."},
            {"role": "user", "content": cur_prompt},
            {"role": "assistant", "content": str(cur_answer)},
        ]

        input_ids, target = self.tokenizer.tokenize_conversation(conversation, True, False)

        if self.is_packing_enabled:
            input_ids, target = self._truncate_for_packing(input_ids, target, num_tiles)

        return ImageTaskSample(
            __key__=sample.__key__,
            __restore_key__=sample.__restore_key__,
            __subflavor__=None,
            __subflavors__=sample.__subflavors__,
            imgs=imgs,
            num_tiles=num_tiles,
            tokens=torch.tensor(input_ids),
            labels=torch.tensor(target),
            total_len=self._get_total_seq_length(input_ids, num_tiles),
        )

    def encode_pdf_prompt(self, sample: OCRSample) -> ImageTaskSample:
        """Encode OCR sample."""
        prompt_list = self.manual_prompts["DocPretraining"]["raw"]
        prompt_idx = np.random.randint(len(prompt_list))
        cur_prompt = prompt_list[prompt_idx]
        if IMAGE_TOKEN not in cur_prompt:
            cur_prompt = IMAGE_TOKEN + "\n" + cur_prompt

        # Make sure there is no extra IMAGE_TOKEN tag.
        sample.text = sample.text.replace(IMAGE_TOKEN, "")

        caption = sample.text.strip()

        split_by_line_flag = sample.__subflavors__.get("SplitByLine")
        if split_by_line_flag:
            caption_list = caption.split('\n')
            caption = np.random.choice(caption_list)
        cur_answer = caption

        return sample, cur_prompt, cur_answer

    def encode_ocr_ref_prompt(self, sample: OCRSample) -> ImageTaskSample:
        """Encode OCR sample."""
        ref = sample.text
        region = sample.words_boxes

        # Make sure there is no extra IMAGE_TOKEN tag
        ref = ref.replace(IMAGE_TOKEN, "")

        if len(region) == 4:
            region = f"<box>({region[0]},{region[1]}),({region[2]},{region[3]})</box>"
        else:
            region = f"<quad>({region[0]},{region[1]}),({region[2]},{region[3]}),({region[4]},{region[5]}),({region[6]},{region[7]})</quad>"

        # Randomly choose between two tasks
        task_idx = np.random.randint(2)
        if task_idx == 0:
            # Referring Grounding
            prompt_list = self.manual_prompts["DocPretraining"]["referring_grounding"]
            prompt_content = ref
            answer = region
        else:
            # Grounded OCR
            prompt_list = self.manual_prompts["DocPretraining"]["grounded_ocr"]
            prompt_content = region
            answer = ref

        prompt_idx = np.random.randint(len(prompt_list))
        cur_prompt = prompt_list[prompt_idx]
        cur_prompt = cur_prompt.format(prompt_content)
        if IMAGE_TOKEN not in cur_prompt:
            cur_prompt = IMAGE_TOKEN + "\n" + cur_prompt

        return sample, cur_prompt, answer

    def bbox_coord_to_label(self, text, bbox):
        """Format bbox coordinates as text."""
        assert len(bbox) == 4 or len(bbox) == 8

        # Make sure there is no extra IMAGE_TOKEN tag
        text = text.replace(IMAGE_TOKEN, "")

        if len(bbox) == 4:
            label_str = f"<ref>{text}</ref><box>({bbox[0]},{bbox[1]}),({bbox[2]},{bbox[3]})</box>"
        else:
            label_str = f"<ref>{text}</ref><quad>({bbox[0]},{bbox[1]}),({bbox[2]},{bbox[3]}),({bbox[4]},{bbox[5]}),({bbox[6]},{bbox[7]})</quad>"

        return label_str

    def encode_ocr_prompt(self, sample: OCRSample) -> ImageTaskSample:
        """Encode OCR sample."""
        if isinstance(sample.words_boxes[0], int):
            answer = self.bbox_coord_to_label(sample.text, sample.words_boxes)
        elif isinstance(sample.words_boxes[0], list):
            answer = ""
            for i, bbox in enumerate(sample.words_boxes):
                answer += self.bbox_coord_to_label(sample.words_text[i], bbox)

        prompt_list = self.manual_prompts["DocPretraining"]["ocr_multi"]
        prompt_idx = np.random.randint(len(prompt_list))
        cur_prompt = prompt_list[prompt_idx]

        if IMAGE_TOKEN not in cur_prompt:
            cur_prompt = IMAGE_TOKEN + "\n" + cur_prompt
        cur_answer = answer

        return sample, cur_prompt, cur_answer

    def batch(self, samples: List[Union[ImageTaskSample, ImageTaskSamplePacked]]) -> ImageTaskBatchPacked:
        # Stack images to [num_tiles, c, h, w]. If there are no images (text-only), then use a dummy image.
        imgs = [img for s in samples for img in s.imgs]
        if len(imgs) > 0:
            imgs = torch.stack(imgs)
        else:
            imgs = torch.tensor([[0]], dtype=torch.float32)

        # If the user hasn't defined a target dataloader sequence length, then use the max along the sample lengths.
        max_seq_len = self.dataloader_seq_length
        if not max_seq_len:
           max_seq_len = max(len(s.tokens) for s in samples)

        tokens = torch.full((len(samples), max_seq_len), self.tokenizer.pad, dtype=torch.int64)
        # +1 to accommodate shift to left by one later.
        labels = torch.full((len(samples), max_seq_len + 1), self.tokenizer.pad, dtype=torch.int64)

        for i, s in enumerate(samples):
            # If the sample/target length exceeds the target sequence length, then truncate.
            text_len = min(max_seq_len, len(s.tokens))
            target_len = min(max_seq_len+1, len(s.labels))

            tokens[i, :text_len] = s.tokens[:text_len]
            labels[i, :target_len] = s.labels[:target_len]

        num_tiles = torch.tensor([n for s in samples for n in s.num_tiles], dtype=torch.int32)
        if len(num_tiles) == 0:
            num_tiles = torch.tensor([[0]], dtype=torch.int32)

        # Cumulative sample lengths are needed for packing, otherwise use dummy values.
        cu_lengths = torch.tensor([[0]], dtype=torch.int32)
        max_lengths = torch.tensor([[0]], dtype=torch.int32)

        is_packed = isinstance(samples[0], ImageTaskSamplePacked)

        if is_packed:
            cu_lengths = torch.stack([s.cu_lengths for s in samples])
            max_lengths = torch.tensor([s.max_length for s in samples], dtype=torch.int32)

        # Pad entire sequence to be a multiple of 32 or 16 if using fp8.
        has_fp8 = self.args.fp8
        if has_fp8:
            total_seq_len = self._get_total_seq_length(tokens[0], num_tiles)
            padding_needed = get_padding(
                total_seq_len,
                self.args.context_parallel_size,
                self.args.tensor_model_parallel_size,
                self.args.sequence_parallel,
                fp8_enabled=has_fp8,
                fp8_recipe=self.args.fp8_recipe,
            )
            if padding_needed > 0:
                tokens = torch.cat([tokens, torch.full((tokens.shape[0], padding_needed), self.tokenizer.pad, dtype=torch.int64)], dim=1)
                labels = torch.cat([labels, torch.full((labels.shape[0], padding_needed), IGNORE_INDEX, dtype=torch.int64)], dim=1)
                if is_packed:
                    cu_lengths[0][-1] += padding_needed
                    new_max_length = cu_lengths[0][-1] - cu_lengths[0][-2]
                    max_lengths = torch.max(max_lengths, new_max_length)


        return ImageTaskBatchPacked(
            __key__=[s.__key__ for s in samples],
            __restore_key__=[s.__restore_key__ for s in samples],
            __subflavor__=None,
            __subflavors__=samples[0].__subflavors__,
            tokens=tokens,
            labels=labels,
            imgs=imgs,
            num_tiles=num_tiles,
            cu_lengths=cu_lengths,
            max_lengths=max_lengths,
        )

    def encode_batch(self, batch: ImageTaskBatchPacked) -> dict:
        raw = dataclasses.asdict(batch)
        del raw["__subflavors__"]
        return raw

    def select_samples_to_pack(self, samples: List[ImageTaskSample]) -> List[List[ImageTaskSample]]:
        """Selects which samples will be packed together.

        NOTE: Energon dataloader calls this method internally if packing is used.
        Please see https://nvidia.github.io/Megatron-Energon/advanced/packing.html
        """
        lengths = [sample.total_len for sample in samples]

        packed_samples = greedy_knapsack(lengths, samples, self.packing_seq_length)

        return packed_samples

    @stateless
    def pack_selected_samples(self, samples: List[ImageTaskSample]) -> List[ImageTaskSamplePacked]:
        """
        Function to pack a list of ImageTaskSample into a single ImageTaskSamplePacked.

        NOTE: Energon dataloader calls this method internally if packing is used.
        Please see https://nvidia.github.io/Megatron-Energon/advanced/packing.html

        Args:
            samples: List of ImageTaskSample instances to pack into one sample.

        Returns:
            ImageTaskSamplePacked instance.
        """
        packing_seq_len = self.packing_seq_length

        packed_tokens = []
        packed_labels = []
        packed_imgs = []

        current_length = 0
        max_length = 0
        cu_lengths = [0]

        # Process each sample and build lists that we will concatenate to create the packed sample.
        for _, sample in enumerate(samples):
            sample_len = sample.total_len

            if sample_len > max_length:
                max_length = sample_len

            # If adding this sample exceeds the max length, stop.
            # This should not happen. The select_samples_to_pack method should have already ensured that the samples fit.
            if current_length + sample_len > packing_seq_len:
                raise ValueError(f"Packed sample exceeds the maximum sequence length of {packing_seq_len}: {samples}")

            # Add the sample's tokens and labels
            packed_tokens.append(sample.tokens)
            packed_labels.append(sample.labels)

            # Add the images
            packed_imgs += sample.imgs

            current_length += sample_len
            cu_lengths.append(current_length)

        # Concatenate packed tokens and labels.
        packed_tokens = torch.cat(packed_tokens, dim=0)
        packed_labels = torch.cat(packed_labels, dim=0)

        return ImageTaskSamplePacked(
            __key__=",".join([s.__key__ for s in samples]),
            __restore_key__=(),  # Will be set by energon based on `samples`
            __subflavor__=None,
            __subflavors__=samples[0].__subflavors__,
            tokens=packed_tokens,
            labels=packed_labels,
            imgs=packed_imgs,
            cu_lengths=torch.tensor(cu_lengths, dtype=torch.int32),
            max_length=max_length,
            num_tiles=[n for s in samples for n in s.num_tiles],
        )


def print_error_handler(exc: Exception, key: Optional[str]):
    print(
        f"The following exception occurred in the dataloader for sample {key} and is skipped",
        file=sys.stderr,
    )
    traceback.print_exc()


def format_multichoice_question(question, multichoice_options):
    """Format multi-choice question."""
    options_text = ["{}. {}\n".format(chr(ord('A') + i), option) for i, option in
                    zip(range(len(multichoice_options)), multichoice_options)]
    options_text = "".join(options_text)

    options_text = f"{options_text}Answer with the option's letter from the given choices directly."

    return "{}\n{}".format(question, options_text)


def format_multichoice_answer(idx):
    """Format multi-choice answer."""
    return chr(ord('A') + idx)


================================================
FILE: examples/multimodal/energon_util.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
import torch
from dataclasses import dataclass
from typing import Any, List

from megatron.energon import Sample


@dataclass
class SampleListSample(Sample):
    """Sample type for a list of samples of any type which needs to be packed together.

    This is useful for datasets which are packed offline.
    """

    #: The images of the sequence
    samples: List[Any]


@dataclass
class OfflineTargetAspectRatioSample(Sample):
    """Sample type for image + text samples with target aspect ratio computed offline."""

    #: The images of the sequence
    images: List[torch.Tensor]
    #: The texts of the sequence
    texts: List[str]
    target_aspect_ratio: List[List]

    if not hasattr(Sample, "__subflavor__"):
        __subflavor__: str


================================================
FILE: examples/multimodal/evaluation/evaluate_ai2d.py
================================================
import argparse
import json

from .evaluate_mmmu import get_input_output_paths
from .evaluate_vqav2 import compute_vqa_accuracy


def merge_input_files(input_path):
    """Merge input files to a format compatible with the evaluator."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, task="AI2D")

    results = dict()

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)
                sample_id = res["sample_id"]

                # Ignore possible duplicates.
                if sample_id in results:
                    continue

                results[sample_id] = {
                    "question_id": sample_id,
                    "answer": res["answer"],
                    "gt_answer": res["gt_answer"],
                }

    results = list(results.values())

    with open(output_file_path, "w") as output_file:
        json.dump(results, output_file, indent=4, sort_keys=True)

    return output_file_path


def ai2d_eval(input_path):
    """Run AI2D evaluation."""
    result_file_path = merge_input_files(input_path)
    avg_acc = compute_vqa_accuracy(result_file_path, task="AI2D")
    return avg_acc


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--input-path', type=str, help="Path to input file(s)")
    args = parser.parse_args()

    avg_acc = ai2d_eval(args.input_path)

    print(f"===== AI2D Accuracy {avg_acc:.2f}% =====")


================================================
FILE: examples/multimodal/evaluation/evaluate_chartqa.py
================================================
import argparse
import json

from .evaluate_mmmu import get_input_output_paths
from .evaluate_vqav2 import compute_vqa_accuracy


def merge_input_files(input_path):
    """Merge input files to a format compatible with the evaluator."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, task="ChartQA")

    results = dict()

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)
                sample_id = res["sample_id"]

                # Ignore possible duplicates.
                if sample_id in results:
                    continue

                res["question_id"] = sample_id
                results[sample_id] = res

    results = list(results.values())

    with open(output_file_path, "w") as output_file:
        json.dump(results, output_file, indent=4, sort_keys=True)

    return output_file_path


def chartqa_eval(input_path):
    """Run ChartQA evaluation."""
    result_file_path = merge_input_files(input_path)
    return compute_vqa_accuracy(result_file_path, task="ChartQA")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--input-path', type=str, help="Path to input file(s)")
    args = parser.parse_args()

    avg_acc = chartqa_eval(args.input_path)

    print(f"ChartQA accuracy: {avg_acc:.2f}")


================================================
FILE: examples/multimodal/evaluation/evaluate_coco.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
import argparse
import json

from .evaluate_mmmu import get_input_output_paths
from pycocoevalcap.eval import COCOEvalCap
from pycocotools.coco import COCO


def convert_to_coco_format(input_path):
    """Convert input files to COCO compatible format."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, task="captioning")

    results = dict()

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)
                sample_id = res["sample_id"]

                # Ignore possible duplicates.
                if sample_id in results:
                    continue

                caption = res["caption"].rstrip(".").lower()
                results[sample_id] = {
                    "image_id": sample_id,
                    "caption": caption,
                }

    results = list(results.values())

    with open(output_file_path, "w") as output_file:
        json.dump(results, output_file, indent=4, sort_keys=True)

    return output_file_path


def coco_captioning_eval(input_path, groundtruth_file):
    """Run COCO captioning evaluation."""
    coco = COCO(groundtruth_file)
    input_file = convert_to_coco_format(input_path)
    coco_result = coco.loadRes(input_file)

    coco_eval = COCOEvalCap(coco, coco_result)

    # Evaluate on the input subset of images.
    coco_eval.params["image_id"] = coco_result.getImgIds()

    coco_eval.evaluate()

    print("========== COCO captioning scores ==========")
    for metric, score in coco_eval.eval.items():
        print(f"{metric} {score * 100:.3f}")

    return coco_eval.eval['CIDEr']

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--input-path", type=str, required=True, help="Path to input file(s)")
    parser.add_argument(
        "--groundtruth-path", type=str, required=True, help="Path to groundtruth file"
    )
    args = parser.parse_args()

    coco_captioning_eval(args.input_path, args.groundtruth_path)


================================================
FILE: examples/multimodal/evaluation/evaluate_infovqa.py
================================================
import argparse
import json

from .evaluate_vqav2 import compute_vqa_accuracy
from .evaluate_mmmu import get_input_output_paths


def merge_input_files(input_path):
    """Merge input files to a format compatible with the evaluator."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, task="InfoVQA")

    results = []

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)
                results.append(
                    {
                        "question_id": res["sample_id"],
                        "answer": res["answer"],
                        "gt_answer": res["gt_answer"],
                    }
                )

    # Make order deterministic.
    # results = sorted(results, key=lambda d: d["question_id"])

    with open(output_file_path, "w") as output_file:
        json.dump(results, output_file)

    return output_file_path


def infovqa_eval(input_path):
    """Run InfoVQA evaluation."""
    result_file_path = merge_input_files(input_path)
    return compute_vqa_accuracy(result_file_path, task="InfoVQA")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--input-path', type=str, help="Path to input file(s)")
    args = parser.parse_args()

    avg_acc = infovqa_eval(args.input_path)

    print(f"===== InfoVQA Accuracy {avg_acc:.2f}% =====")


================================================
FILE: examples/multimodal/evaluation/evaluate_mathvista.py
================================================
import argparse
import json
import re

from .evaluate_mmmu import get_input_output_paths
from .mmmu_utils import parse_multi_choice_response
from open_flamingo.eval.vqa_metric import VQAEval


def merge_input_files(input_path):
    """Merge input files to a format compatible with the evaluator."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, task="MathVista")

    results = dict()

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)
                sample_id = res["sample_id"]

                # Remove possible duplicates.
                if sample_id in results:
                    continue

                results[sample_id] = res

    results = list(results.values())

    with open(output_file_path, "w") as output_file:
        json.dump(results, output_file, indent=4, sort_keys=True)

    return output_file_path


def extra_processing(text):
    """Extra processing."""
    # Max decimal point capped to 2 decimal point
    regex = re.compile(r'^\d+\.\d+$')
    decimal = regex.findall(text)

    if len(decimal) > 0:
        non_decimal = len(decimal[0].split(".")[0])

        # if decimal values are all 0, trim them
        decimal_digits = [int(d) for d in decimal[0].split(".")[1]]
        if sum(decimal_digits) == 0:
            text = decimal[0][:non_decimal]
        else:
            text = decimal[0][: non_decimal + 3]

    # remove % and trailing .
    text = text.replace("%", "")
    if text[-1] == ".":
        text = text[:-1]

    return text


def extract_answer(text):
    """Extract answer."""
    alphabet = re.findall(r'[a-zA-Z]+', text)
    if len(alphabet) > 0 and "e+" not in text:
        template = re.findall(r'answer is -*\d+\.*\d*', text)
        if len(template) > 0:
            text = template[0]

            numbers = re.findall(r'-*\d+\.*\d*', text)
            text = numbers[0] if len(numbers) > 0 else text

    return text


def compute_mathvista_accuracy(result_file):
    """Compute MathVista accuracy."""
    merged_results = json.load(open(result_file))

    vqa = VQAEval(vqa=None, vqaRes=None)
    acc = 0
    for res in merged_results:
        pred_ans = res["answer"]
        if res["question_type"] == "multi_choice":
            pred_ans = parse_multi_choice_response(pred_ans, res["all_choices"], res["index2ans"])
        else:
            pred_ans = vqa.processPunctuation(pred_ans)
            pred_ans = vqa.processDigitArticle(pred_ans)
            # Extra processing and extraction.
            pred_ans = extra_processing(pred_ans)
            pred_ans = extract_answer(pred_ans)

        gt_ans = res["gt_answer"]
        if isinstance(gt_ans, list):
            assert len(gt_ans) == 1, f"Expected 1 groundtruth, got {gt_ans}"
            gt_ans = gt_ans[0]

        if res["question_type"] != "multi_choice":
            gt_ans = vqa.processPunctuation(gt_ans)
            gt_ans = vqa.processDigitArticle(gt_ans)

            gt_ans = extra_processing(gt_ans)

        if pred_ans == gt_ans:
            acc += 1
    acc = acc / len(merged_results) * 100
    return acc


def mathvista_eval(input_path):
    """Run MathVista evaluation."""
    result_file_path = merge_input_files(input_path)
    acc = compute_mathvista_accuracy(result_file_path)
    return acc


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--input-path', type=str, help="Path to input file(s)")
    args = parser.parse_args()

    acc = mathvista_eval(args.input_path)

    print(f"===== MathVista accuracy: {acc} =====")


================================================
FILE: examples/multimodal/evaluation/evaluate_mmmu.py
================================================
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
import argparse
import glob
import json
import os
import sys
import re
import subprocess

from .mmmu_utils import parse_multi_choice_response
# Get the absolute path of the parent directory
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
# Add the parent directory to sys.path
sys.path.insert(0, parent_dir)

from run_text_generation import get_output_path
from config import EvaluationConfig


def get_input_output_paths(input_path, task):
    """Get all input files and an output path for a merged file."""
    # Single input file.
    if os.path.exists(input_path):
        input_file_paths = [input_path]
        output_file_path = input_path.replace(".jsonl", "-merged.json")
    # Select multiple partitions and dp ranks.
    else:
        cfg = EvaluationConfig(task=task, output_path=input_path, partition_id="*")
        pattern = get_output_path(cfg, dp_rank="*")
        input_file_paths = glob.glob(pattern)

        output_file_path = input_path + f"-{task}-merged.json"

    return input_file_paths, output_file_path


def extract_answer(text):
    import re
    # Regular expression to find content inside \answer{xxx}
    match = re.search(r'\\answer\{(.*?)\}', text)
    if match:
        return match.group(1)  # Return the content inside the braces

    # Regular expression to find content inside \boxed{xxx}
    match = re.search(r'\\boxed\{(.*?)\}', text)
    if match:
        return match.group(1)  # Return the content inside the braces

    text = text.replace("Answer:", "Answer: ")
    return text  # Return the original string if no match is found


def convert_to_mmmu_format(input_path):
    """Convert input files to MMMU compatible format."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, "MMMU")

    output = dict()

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)

                sample_id = res["sample_id"]
                prediction = res["prediction"]

                if sample_id in output:
                    continue

                if res["question_type"] == "multiple-choice":
                    prediction = extract_answer(prediction)
                    prediction = parse_multi_choice_response(
                        prediction, res["all_choices"], res["index2ans"]
                    )

                # MMMU eval script expects just a sample_id to prediction mapping.
                output[sample_id] = prediction

    with open(output_file_path, "w") as output_file:
        json.dump(output, output_file, indent=4, sort_keys=True)

    return output_file_path


def mmmu_eval(input_path, groundtruth_path):
    """Run MMMU evaluation."""
    result_file = convert_to_mmmu_format(input_path)

    # The MMMU repo has a script for running the actual evaluation but no API. So launching the script here.
    output = subprocess.run(
        [
            "python",
            "examples/multimodal/MMMU/mmmu/main_eval_only.py",
            "--output_path",
            result_file,
            "--answer_path",
            groundtruth_path,
        ],
        capture_output=True,
        text=True,
    )

    print(output.stderr)
    print(output.stdout)

    m = re.search(r"'Overall': {'num': \d+, 'acc': (\d\.\d+)}", output.stdout)

    return float(m.group(1)) * 100.0


def main():
    """Run MMMU evaluation."""
    # Using the validation groundtruth file from the MMMU repo by default. This assumes you have cloned the MMMU github repo here.
    default_groundtruth_path = "examples/multimodal/MMMU/mmmu/answer_dict_val.json"

    parser = argparse.ArgumentParser()
    parser.add_argument("--input-path", type=str, required=True, help="Path to input file(s)")
    parser.add_argument(
        "--groundtruth-path",
        type=str,
        default=default_groundtruth_path,
        help="Path to groundtruth file. Defaults to the validation file in the MMMU repo.",
    )
    args = parser.parse_args()

    avg_acc = mmmu_eval(args.input_path, args.groundtruth_path)

    print(f"MMMU average accuracy: {avg_acc:.2f}")


if __name__ == "__main__":
    main()


================================================
FILE: examples/multimodal/evaluation/evaluate_ocrbench.py
================================================
import argparse
import json

from .evaluate_mmmu import get_input_output_paths


def merge_input_files(input_path):
    """Merge input files to a format compatible with the evaluator."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, task="OCRBench")

    results = dict()

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)
                sample_id = res["sample_id"]

                # Remove possible duplicates.
                if sample_id in results:
                    continue

                results[sample_id] = res

    results = list(results.values())

    with open(output_file_path, "w") as output_file:
        json.dump(results, output_file, indent=4, sort_keys=True)

    return output_file_path


def compute_ocrbench_score(result_file):
    """Compute OCRBench score."""
    merged_results = json.load(open(result_file))

    # OCRBench score calculation is adopted from https://github.com/Yuliang-Liu/MultimodalOCR/blob/1b7713f44c91f30f64efb6d3e494c416861ef15f/example.py#L1
    # MIT License. Copyright (c) 2023 Yuliang Liu
    score = {
        "Regular Text Recognition": 0,
        "Irregular Text Recognition": 0,
        "Artistic Text Recognition": 0,
        "Handwriting Recognition": 0,
        "Digit String Recognition": 0,
        "Non-Semantic Text Recognition": 0,
        "Scene Text-centric VQA": 0,
        "Doc-oriented VQA": 0,
        "Doc-oriented VQA": 0,
        "Key Information Extraction": 0,
        "Handwritten Mathematical Expression Recognition": 0,
    }

    for res in merged_results:
        predict = res["answer"]
        answers = res["gt_answer"]

        dataset_name = res["dataset_name"]
        ocr_type = res["data_type"]

        if dataset_name == "HME100k":
            if isinstance(answers, list):
                for j in range(len(answers)):
                    answer = answers[j].strip().replace("\n", " ").replace(" ", "")
                    predict = predict.strip().replace("\n", " ").replace(" ", "")
                    if answer in predict:
                        score[ocr_type] += 1
            else:
                answers = answers.strip().replace("\n", " ").replace(" ", "")
                predict = predict.strip().replace("\n", " ").replace(" ", "")
                if answers in predict:
                    score[ocr_type] += 1
        else:
            if isinstance(answers, list):
                for j in range(len(answers)):
                    answer = answers[j].lower().strip().replace("\n", " ")
                    predict = predict.lower().strip().replace("\n", " ")
                    if answer in predict:
                        score[ocr_type] += 1
            else:
                answers = answers.lower().strip().replace("\n", " ")
                predict = predict.lower().strip().replace("\n", " ")
                if answers in predict:
                    score[ocr_type] += 1

    recognition_score = (
        score['Regular Text Recognition']
        + score['Irregular Text Recognition']
        + score['Artistic Text Recognition']
        + score['Handwriting Recognition']
        + score['Digit String Recognition']
        + score['Non-Semantic Text Recognition']
    )
    final_score = (
        recognition_score
        + score['Scene Text-centric VQA']
        + score['Doc-oriented VQA']
        + score['Key Information Extraction']
        + score['Handwritten Mathematical Expression Recognition']
    )
    result_log = f"""###########################OCRBench##############################
Text Recognition(Total 300): {recognition_score}
------------------Details of Recognition Score-------------------
Regular Text Recognition(Total 50): {score['Regular Text Recognition']}
Irregular Text Recognition(Total 50): {score['Irregular Text Recognition']}
Artistic Text Recognition(Total 50): {score['Artistic Text Recognition']}
Handwriting Recognition(Total 50): {score['Handwriting Recognition']}
Digit String Recognition(Total 50): {score['Digit String Recognition']}
Non-Semantic Text Recognition(Total 50): {score['Non-Semantic Text Recognition']}
----------------------------------------------------------------
Scene Text-centric VQA(Total 200): {score['Scene Text-centric VQA']}
----------------------------------------------------------------
Doc-oriented VQA(Total 200): {score['Doc-oriented VQA']}
----------------------------------------------------------------
Key Information Extraction(Total 200): {score['Key Information Extraction']}
----------------------------------------------------------------
Handwritten Mathematical Expression Recognition(Total 100): {score['Handwritten Mathematical Expression Recognition']}
----------------------Final Score-------------------------------
Final Score(Total 1000): {final_score}"""

    return result_log, final_score


def ocrbench_eval(input_path):
    """Run OCRBench evaluation."""
    result_file_path = merge_input_files(input_path)
    result_log, score = compute_ocrbench_score(result_file_path)
    return result_log, score


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--input-path', type=str, help="Path to input file(s)")
    args = parser.parse_args()

    result_log, _ = ocrbench_eval(args.input_path)

    print(result_log)


================================================
FILE: examples/multimodal/evaluation/evaluate_ocrbench_v2.py
================================================
import argparse
import json
import subprocess
import nltk
nltk.download("wordnet")

from .evaluate_mmmu import get_input_output_paths


def convert_to_ocrbench_v2_format(input_path, groundtruth_path):
    """Convert input files to OCRBenchV2 compatible format."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, "OCRBench_v2")

    output = []

    with open(groundtruth_path) as f:
        gt = json.load(f)

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)

                out = gt[res["sample_id"]]
                out["predict"] = res["predict"]

                output.append(out)

    output = sorted(output, key=lambda x: x["id"])

    with open(output_file_path, "w") as output_file:
        json.dump(output, output_file)

    return output_file_path


def ocrbench_v2_eval(input_path, groundtruth_path, output_path):
    """Run OCRBenchV2 evaluation."""
    result_file = convert_to_ocrbench_v2_format(input_path, groundtruth_path)

    # The OCRBenchV2 repo has scripts for running the actual evaluation
    output = subprocess.run(
        [
            "python",
            "examples/multimodal/MultimodalOCR/OCRBench_v2/eval_scripts/eval.py",
            "--output_path",
            output_path,
            "--input_path",
            result_file,
        ],
        capture_output=True,
        text=True,
    )
    print(output.stderr)
    print(output.stdout)

    output = subprocess.run(
        [
            "python",
            "examples/multimodal/MultimodalOCR/OCRBench_v2/eval_scripts/get_score.py",
            "--json_file",
            output_path,
        ],
        capture_output=True,
        text=True,
    )
    print(output.stderr)
    print(output.stdout)


def main():
    """Run OCRBenchV2 evaluation."""

    parser = argparse.ArgumentParser()
    parser.add_argument("--input-path", type=str, required=True, help="Path to input file(s)")
    parser.add_argument(
        "--groundtruth-path",
        type=str,
        required=True,
        help="Path to groundtruth file",
    )
    parser.add_argument(
        "--output-path",
        type=str,
        required=True,
        help="Path to dump outputs from the OCRBench V2 eval script",
    )
    args = parser.parse_args()

    ocrbench_v2_eval(args.input_path, args.groundtruth_path, args.output_path)


if __name__ == "__main__":
    main()


================================================
FILE: examples/multimodal/evaluation/evaluate_rd_tablebench.py
================================================
import argparse
import glob
import json
import os
import re
import subprocess
import sys
import numpy as np

from .evaluate_mmmu import get_input_output_paths

# The rd-tablebench repo has functions for grading table predictions.
# Get the absolute path of the rd-tablebench repo
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'rd-tablebench'))
# Add the parent directory to sys.path
sys.path.insert(0, parent_dir)

from grading import table_similarity
from convert import html_to_numpy


def convert_to_rdtablebench_format(input_path):
    """Convert input files to RDTableBench compatible format."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, "RD_TableBench")

    output = []

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)
                output.append(res)

    output = sorted(output, key=lambda x: x["sample_id"])

    with open(output_file_path, "w") as output_file:
        json.dump(output, output_file)

    return output_file_path


def rdtablebench_eval(input_path):
    """Run RD-TableBench evaluation."""
    result_file = convert_to_rdtablebench_format(input_path)

    with open(result_file) as f:
        data = json.load(f)

    similarities = []
    num_failed = 0
    for sample in data:
        pred = sample["predict"]
        target = sample["ground_truth"]
        target_np = html_to_numpy(target)
        try:
            pred_np = html_to_numpy(pred)
            similarity = table_similarity(target_np, pred_np)
        except Exception as e:
            print("Failed to grade table: ", e)
            similarity = 0
            num_failed += 1
        similarities.append(similarity)

    print(f"Accuracy: {np.mean(similarities)}")
    print(f"Failed: {num_failed}")

def main():
    """Run RD-TableBench evaluation."""

    parser = argparse.ArgumentParser()
    parser.add_argument("--input-path", type=str, required=True, help="Path to input file(s)")
    args = parser.parse_args()

    rdtablebench_eval(args.input_path)


if __name__ == "__main__":
    main()


================================================
FILE: examples/multimodal/evaluation/evaluate_realworldqa.py
================================================
import argparse
import json

from .evaluate_vqav2 import compute_vqa_accuracy
from .evaluate_mmmu import get_input_output_paths


def merge_input_files(input_path):
    """Merge input files to a format compatible with the evaluator."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, task="RealworldQA")

    results = []
    collected = set()

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)
                res["question_id"] = res["sample_id"]
                if res['sample_id'] in collected:
                    continue
                collected.add(res['sample_id'])

                results.append(res)

    with open(output_file_path, "w") as output_file:
        json.dump(results, output_file, indent=4, sort_keys=True)

    return output_file_path


def realworldqa_eval(input_path):
    """Run RealWorldQA evaluation."""
    result_file_path = merge_input_files(input_path)
    return compute_vqa_accuracy(result_file_path, task="RealworldQA")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--input-path', type=str, help="Path to input file(s)")
    args = parser.parse_args()

    avg_acc = realworldqa_eval(args.input_path)

    print(f"RealworldQA accuracy: {avg_acc:.2f}")


================================================
FILE: examples/multimodal/evaluation/evaluate_spdocvqa.py
================================================
import argparse
import json

from .evaluate_vqav2 import compute_vqa_accuracy
from .evaluate_mmmu import get_input_output_paths


def merge_input_files(input_path):
    """Merge input files to a format compatible with the evaluator."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, task="SPDocVQA")

    results = []

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)
                results.append(
                    {
                        "question_id": res["sample_id"],
                        "answer": res["answer"],
                        "gt_answer": res["gt_answer"],
                    }
                )

    # Make order deterministic.
    # results = sorted(results, key=lambda d: d["question_id"])

    with open(output_file_path, "w") as output_file:
        json.dump(results, output_file)

    return output_file_path


def spdocvqa_eval(input_path):
    """Run SPDocVQA evaluation."""
    result_file_path = merge_input_files(input_path)
    return compute_vqa_accuracy(result_file_path, task="SPDocVQA")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--input-path', type=str, help="Path to input file(s)")
    args = parser.parse_args()

    avg_acc = spdocvqa_eval(args.input_path)

    print(f"===== SPDocVQA Accuracy {avg_acc:.2f}% =====")


================================================
FILE: examples/multimodal/evaluation/evaluate_textvqa.py
================================================
import argparse
import json

from .evaluate_mmmu import get_input_output_paths
from .evaluate_vqav2 import compute_vqa_accuracy


def merge_input_files(input_path):
    """Merge input files to a format compatible with the evaluator."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, task="TextVQA")

    results = dict()

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)
                sample_id = res["sample_id"]

                # Remove possible duplicates.
                if sample_id in results:
                    continue

                results[sample_id] = {
                    "question_id": sample_id,
                    "answer": res["answer"],
                    "gt_answer": res["gt_answer"],
                }

    results = list(results.values())

    with open(output_file_path, "w") as output_file:
        json.dump(results, output_file, indent=4, sort_keys=True)

    return output_file_path


def textvqa_eval(input_path):
    """Run TextVQA evaluation."""
    result_file_path = merge_input_files(input_path)
    avg_acc = compute_vqa_accuracy(result_file_path, task="TextVQA")
    return avg_acc


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--input-path', type=str, help="Path to input file(s)")
    args = parser.parse_args()

    avg_acc = textvqa_eval(args.input_path)

    print(f"===== TextVQA Accuracy {avg_acc:.2f}% =====")


================================================
FILE: examples/multimodal/evaluation/evaluate_video_motionbench.py
================================================
import argparse
import json


from .evaluate_vqav2 import compute_vqa_accuracy
from .evaluate_mmmu import get_input_output_paths


def merge_input_files(input_path):
    """Merge input files to a format compatible with the evaluator."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, task="MotionBench")

    results = []
    collected = set()

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)
                res["question_id"] = res["sample_id"]
                if res['sample_id'] in collected:
                    continue
                collected.add(res['sample_id'])

                results.append(res)

    with open(output_file_path, "w") as output_file:
        json.dump(results, output_file, indent=4, sort_keys=True)

    return output_file_path


def motionbench_eval(input_path):
    result_file_path = merge_input_files(input_path)
    return compute_vqa_accuracy(result_file_path, task="MotionBench")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--input-path', type=str, help="Path to input file(s)")
    args = parser.parse_args()

    avg_acc = motionbench_eval(args.input_path)

    print(f"MotionBench accuracy: {avg_acc:.2f}")


================================================
FILE: examples/multimodal/evaluation/evaluate_video_mvbench.py
================================================
import argparse
import json

from .evaluate_mmmu import get_input_output_paths


def merge_input_files(input_path):
    """Merge input files to a format compatible with the evaluator."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, task="MVBench")

    results = []
    collected = set()


    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)
                res["question_id"] = "{}-{}".format(res['task_type'], res['sample_id'])
                if res['sample_id'] in collected:
                    continue
                collected.add(res['sample_id'])

                results.append(res)

    with open(output_file_path, "w") as output_file:
        json.dump(results, output_file, indent=4, sort_keys=True)

    return output_file_path


# The following code is adapted from
# https://github.com/OpenGVLab/Ask-Anything/blob/main/video_chat2/mvbench.ipynb
# which is licensed under the MIT license. More details on the license can be
# found at https://github.com/OpenGVLab/Ask-Anything/tree/main?tab=MIT-1-ov-file#readme
def check_ans(pred, gt):
    flag = False
        
    pred_list = pred.lower().split(' ')
    pred_option, pred_content = pred_list[0], ' '.join(pred_list[1:])
    gt_list = gt.lower().split(' ')
    gt_option, gt_content = gt_list[0], ' '.join(gt_list[1:])
    if gt_content[-1] == '.':
        gt_content = gt_content[:-1]
    
    if pred_option.replace('.', '') in gt_option:
        flag = True
    elif gt_option in pred_option:
        flag = True
    
    return flag

def create_result_dict(result_list):

    correct = 0
    total = 0
    res_list = []
    acc_dict = {}

    for idx, result_obj in enumerate(result_list):
        task_type = result_obj['task_type']
        if task_type not in acc_dict:
            acc_dict[task_type] = [0, 0]  # correct, total
        acc_dict[task_type][1] += 1
        total += 1
        pred = result_obj['answer']
        gt = result_obj['gt_answer'][0]
        
        res_list.append({
            'pred': pred,
            'gt': gt
        })
        if check_ans(pred=pred, gt=gt):
            acc_dict[task_type][0] += 1
            correct += 1

    print(f"Total Acc: {correct / total * 100 :.2f}%")
    print('-' * 30, task_type, '-' * 30)

    return acc_dict
        

def combine_all_res(acc_dict):
    final_res = dict()
    correct = 0
    total = 0
    for k, v in acc_dict.items():
        final_res[k] = v[0] / v[1] * 100
        correct += v[0]
        total += v[1]    
    final_res['total-acc'] = correct / total * 100

    print(final_res)

    return final_res


def mvbench_eval(input_path):
    result_file_path = merge_input_files(input_path)
    
    merged_results = json.load(open(result_file_path))
    acc_dict = create_result_dict(merged_results)
    
    return combine_all_res(acc_dict)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--input-path', type=str, help="Path to input file(s)")
    args = parser.parse_args()

    avg_acc_dict = mvbench_eval(args.input_path)

    print(f"MVBench {avg_acc_dict}")


================================================
FILE: examples/multimodal/evaluation/evaluate_video_phys_game_bench.py
================================================
import argparse
import json

from .evaluate_mmmu import get_input_output_paths


def merge_input_files(input_path):
    """Merge input files to a format compatible with the evaluator."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, task="PhysGameBench")

    results = []
    collected = set()

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)
                res["question_id"] = res["sample_id"]
                if res['sample_id'] in collected:
                    continue
                collected.add(res['sample_id'])

                results.append(res)

    with open(output_file_path, "w") as output_file:
        json.dump(results, output_file, indent=4, sort_keys=True)

    return output_file_path


# The following function is adapted from
# https://github.com/PhysGame/PhysGame/blob/main/physvlm/test/PhysGame_bench/utils.py#L101
# which is licensed under the Apache 2.0 license. More details on the license can be
# found at https://github.com/PhysGame/PhysGame/tree/main?tab=Apache-2.0-1-ov-file#readme
def check_ans(pred, gt):
    flag = False
    
    pred_list = pred.lower().split(' ')
    pred_option, pred_content = pred_list[0], ' '.join(pred_list[1:])
    gt_list = gt.lower().split(' ')
    gt_option, gt_content = gt_list[0], ' '.join(gt_list[1:])
    if gt_content[-1] == '.':
        gt_content = gt_content[:-1]
    
    if pred_option.replace('.', '') in gt_option:
        flag = True
    elif gt_option in pred_option:
        flag = True
        
    return flag

def compute_all_acc(result_list):
    correct, total = 0, 0
    subclass_cnt = {}
    for res in result_list:
        total += 1
        pred = res['answer']
        gt = res['gt_answer'][0]
        subclass = res['subclass']
        if gt.lower().replace(".", "") == pred.lower().replace(".", ""):
            correct += 1
            if subclass not in subclass_cnt.keys():
                subclass_cnt.update({subclass: [1, 1]})
            else:
                subclass_cnt[subclass][0] += 1
                subclass_cnt[subclass][1] += 1
        else:
            if subclass not in subclass_cnt.keys():
                subclass_cnt.update({subclass: [0, 1]})
            else:
                subclass_cnt[subclass][1] += 1
    
    result_acc_dict = {
        "Physgame-Total-Acc": correct / total * 100
    }
    print (f'Physgame-Total-Acc: {correct / total * 100 :.2f}%', )
    for sub_i in subclass_cnt.keys():
        print(f'Physgame-{sub_i}-Acc: {subclass_cnt[sub_i][0] / subclass_cnt[sub_i][1] * 100 :.2f}%')
        result_acc_dict[f'Physgame-{sub_i}-Acc'] = subclass_cnt[sub_i][0] / subclass_cnt[sub_i][1] * 100
    
    return result_acc_dict
        
def phys_game_bench_eval(input_path):
    result_file_path = merge_input_files(input_path)
    
    merged_results = json.load(open(result_file_path))
    
    return compute_all_acc(merged_results)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--input-path', type=str, help="Path to input file(s)")
    args = parser.parse_args()

    avg_acc = phys_game_bench_eval(args.input_path)

    print(f"PhysGameBench accuracy: {avg_acc:.2f}")


================================================
FILE: examples/multimodal/evaluation/evaluate_vqav2.py
================================================
import argparse
import json
from typing import List

from .evaluate_mmmu import get_input_output_paths
from open_flamingo.eval.vqa_metric import VQAEval

# ANLS score calculation based on https://github.com/shunk031/ANLS/blob/6472e1d71e84d6cee28e3c6d2e18564bafaa312d/anls/metrics/dist.py#L1
# and https://github.com/shunk031/ANLS/blob/6472e1d71e84d6cee28e3c6d2e18564bafaa312d/anls/metrics/score.py#L6
# MIT License. Copyright (c) 2022 Shunsuke KITADA
def levenshtein_distance(s1: str, s2: str) -> int:

    if len(s1) > len(s2):
        s1, s2 = s2, s1

    distances = list(range(len(s1) + 1))
    for i2, c2 in enumerate(s2):
        dists = [i2 + 1]
        for i1, c1 in enumerate(s1):
            if c1 == c2:
                dists.append(distances[i1])
            else:
                dists.append(1 + min((distances[i1], distances[i1 + 1], dists[-1])))
        distances = dists

    return distances[-1]


def normalized_levenshtein_distance(s1: str, s2: str) -> float:
    dist = levenshtein_distance(s1, s2)
    length = max(len(s1.upper()), len(s2.upper()))
    return 0.0 if length == 0 else dist / length

def similarity_function(prediction: str, gold_label: str, threshold: float) -> float:
    nl_score = normalized_levenshtein_distance(prediction, gold_label)
    return 1 - nl_score if nl_score < threshold else 0.0

def anls_score(
    prediction: str, gold_labels: List[str], threshold: float = 0.5
) -> float:

    # not case sensitive, but space sensitive
    y_pred = " ".join(prediction.strip().lower().split())

    anls_scores: List[float] = []
    for gold_label in gold_labels:

        # not case sensitive, but space sensitive
        y_true = " ".join(gold_label.strip().lower().split())

        anls_score = similarity_function(y_pred, y_true, threshold)
        anls_scores.append(anls_score)

    score = max(anls_scores)

    return score

def merge_input_files(input_path):
    """Merge input files to a format compatible with the evaluator."""
    input_file_paths, output_file_path = get_input_output_paths(input_path, task="VQAv2")

    results = dict()

    for input_file_path in input_file_paths:
        with open(input_file_path, "r") as input_file:
            for line in input_file:
                res = json.loads(line)
                sample_id = res["sample_id"]

                # Skip possible duplicates.
                if sample_id in results:
                    continue

                res["question_id"] = sample_id
                results[sample_id] = res

    results = list(results.values())

    with open(output_file_path, "w") as output_file:
        json.dump(results, output_file, indent=4, sort_keys=True)

    return output_file_path


def is_number(n: str):
    """Check if input is a number."""
    try:
        float(n)
        return True
    except ValueError:
        return False


def compute_vqa_accuracy(result_file, task):
    """Compute VQA accuracy."""
    merged_results = json.load(open(result_file))

    vqa = VQAEval(vqa=None, vqaRes=None)
    all_acc = []
    for res in merged_results:
        pred = res["answer"]
        pred = vqa.processPunctuation(pred)
        pred = vqa.processDigitArticle(pred)

        gt = res["gt_answer"]
        gt = [vqa.processPunctuation(ans) for ans in gt]
        gt = [vqa.processDigitArticle(ans) for ans in gt]

        # ChartQA uses relaxed accuracy:
        # "We consider an answer to be correct if it is within 5% of the gold answer.
        #  For non-numeric answers, we still need an exact match to consider an answer to be correct."
        if task == "ChartQA":
            acc = 0.0
            assert len(gt) == 1, "expected exactly one groundtruth answer."
            gt = gt[0]

            pred = pred.rstrip("%")
            gt = gt.rstrip("%")

            if is_number(pred) and is_number(gt):
                pred = float(pred)
                gt = float(gt)
                if pred >= (gt * 0.95) and pred <= (gt * 1.05):
                    acc = 1.0
            elif pred == gt:
                acc = 1.0

            all_acc.append(acc)
        elif task in ("VQAv2", "TextVQA"):
            num_match = sum([pred == ans for ans in gt])
            acc = min(1.0, num_match / 3.0)
            all_acc.append(acc)
        elif task in ("SPDocVQA", "InfoVQA"):
            acc = anls_score(prediction=pred, gold_labels=gt, threshold=0.5)
            all_acc.append(acc)
        elif task in ("AI2D", "RealworldQA", "MotionBench"):
            assert len(gt) == 1, f"Expected exactly 1 GT, got {gt}"
            acc = pred == gt[0]
            all_acc.append(acc)
        else:
            raise NotImplementedError(f"unknown task {task}")

    acc_avg = sum(all_acc) / len(all_acc) * 100

    return acc_avg


def vqav2_eval(input_path):
    """Run VQAv2 evaluation."""
    result_file = merge_input_files(input_path)
    avg_acc = compute_vqa_accuracy(result_file, task="VQAv2")
    return avg_acc


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--input-path', type=str, help="Path to input file(s)")
    args = parser.parse_args()

    avg_acc = vqav2_eval(args.input_path)

    print(f"===== VQAv2 Accuracy {avg_acc:.2f}% =====")


================================================
FILE: examples/multimodal/evaluation/evaluation_datasets.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
"""Evaluation datasets."""
import glob
import json
import os
import re
from collections import defaultdict

import numpy as np
import torch
from image_processing import ImageTransform
from PIL import Image

from megatron.training import print_rank_0


def _get_partition_bounds(
    total_num_samples, num_samples_per_partition, num_partitions, partition_id
):
    if num_samples_per_partition == 0:
        samples_per_partition = [
            int(x) for x in np.linspace(0, total_num_samples, num_partitions + 1)
        ]
        return samples_per_partition[partition_id], samples_per_partition[partition_id + 1]
    return num_samples_per_partition * partition_id, num_samples_per_partition * (partition_id + 1)


class VQADataset(torch.utils.data.Dataset):
    """VQA evaluation dataset."""

    def __init__(
        self,
        input_image_path,
        gt_path,
        num_samples_per_partition,
        num_partitions,
        partition_id,
        keys,
        img_h,
        img_w,
        use_tiling,
        max_num_tiles,
        use_thumbnail,
        vision_model_type,
        split="validation"
    ):
        samples = json.load(open(gt_path, encoding='utf-8'))
        if "data" in samples:
            samples = samples["data"]

        # Optionally, process only a subset of the input files.
        if num_partitions > 0:
            lb, ub = _get_partition_bounds(
                len(samples), num_samples_per_partition, num_partitions, partition_id
            )
            samples = samples[lb:ub]

        self._keys = keys
        self._samples = samples
        self._input_image_path = input_image_path
        self._img_h = img_h
        self._img_w = img_w
        self._use_tiling = use_tiling
        self._max_num_tiles = max_num_tiles
        self._use_thumbnail = use_thumbnail
        self._transform_img = ImageTransform(img_h, vision_model_type)
        self._split = split

    def __len__(self):
        return len(self._samples)

    def __getitem__(self, idx):
        sample = self._samples[idx]

        img_file = "{}/{}".format(self._input_image_path, sample[self._keys["image_id"]])
        if not os.path.exists(img_file):
            img_file += ".jpg"

            if not os.path.exists(img_file):
                img_file = img_file.replace('.jpg', '.png')

        img = Image.open(img_file)
        imgs = self._transform_img(
            img,
            self._img_h,
            self._img_w,
            self._use_tiling,
            self._max_num_tiles,
            self._use_thumbnail,
            augment=False,
        )
        tile_count = torch.tensor([len(imgs)], dtype=torch.int)

        sample_id = idx
        if "sample_id" in self._keys:
            sample_id = sample[self._keys["sample_id"]]

        metadata = ""  # Not used.

        return (
            torch.stack(imgs),
            tile_count,
            sample_id,
            sample[self._keys["question"]],
            [""] if self._split == "test" else sample[self._keys["answer"]],
            metadata,
        )


class CaptioningDataset(torch.utils.data.Dataset):
    """Captioning evaluation dataset."""

    def __init__(
        self,
        input_image_path,
        gt_path,
        num_samples_per_partition,
        num_partitions,
        partition_id,
        img_h,
        img_w,
        use_tiling,
        max_num_tiles,
        use_thumbnail,
        vision_model_type,
    ):
        image_files = sorted(glob.glob(input_image_path + "/*"))

        # Optionally, process only a subset of the input files.
        if num_partitions > 0:
            lb, ub = _get_partition_bounds(
                len(image_files), num_samples_per_partition, num_partitions, partition_id
            )
            image_files = image_files[lb:ub]

        gts = json.load(open(gt_path))
        answers = defaultdict(list)
        for gt in gts["annotations"]:
            answers[gt["image_id"]].append(gt['caption'])

        self._image_files = image_files
        self._answers = answers
        self._img_h = img_h
        self._img_w = img_w
        self._use_tiling = use_tiling
        self._max_num_tiles = max_num_tiles
        self._use_thumbnail = use_thumbnail
        self._transform_img = ImageTransform(img_h, vision_model_type)

    def __len__(self):
        return len(self._image_files)

    def __getitem__(self, idx):
        img_file = self._image_files[idx]
        try:
            image_id = int(img_file.split("_")[-1].split(".")[0])  # coco
        except:
            image_id = int(img_file.split("/")[-1].split(".")[0])  # flickr

        img = Image.open(img_file)
        imgs = self._transform_img(
            img,
            self._img_h,
            self._img_w,
            self._use_tiling,
            self._max_num_tiles,
            self._use_thumbnail,
            augment=False,
        )

        tile_count = torch.tensor([len(imgs)], dtype=torch.int)

        question = ""  # Fixed for all samples.
        metadata = ""  # Not used.

        return torch.stack(imgs), tile_count, image_id, question, self._answers[image_id], metadata


class MMMUDataset(torch.utils.data.Dataset):
    """MMMU evaluation dataset."""

    def __init__(
        self,
        input_image_path,
        num_samples_per_partition,
        num_partitions,
        partition_id,
        img_h,
        img_w,
        use_tiling,
        max_num_tiles,
        use_thumbnail,
        prompt_style,
        vision_model_type,
        split="validation",
    ):
        import datasets
        from .mmmu_utils import CAT_SHORT2LONG, load_yaml

        # The following downloads the MMMU dataset from HuggingFace and uses the API from the MMMU github repo to run MMMU evaluation.
        all_mmmu_datasets = []

        hf_datasets_cache = os.environ["HF_DATASETS_CACHE"]
        assert hf_datasets_cache != "", "Please set the environment variable HF_DATASETS_CACHE."

        for subject in CAT_SHORT2LONG.values():
            # Use a local copy of the dataset if exists (can be faster) or the HF one.
            if os.path.exists(input_image_path):
                subject_dataset = datasets.load_dataset(
                    os.path.join(input_image_path, subject),
                    split=split,
                    cache_dir=hf_datasets_cache,
                    verification_mode="no_checks",
                )
            else:
                subject_dataset = datasets.load_dataset(
                    "MMMU/MMMU",
                    subject,
                    split=split,
                    cache_dir=hf_datasets_cache,
                )

            all_mmmu_datasets.append(subject_dataset)

        dataset = datasets.concatenate_datasets(all_mmmu_datasets)

        dataset = [s for s in dataset if s['id'].startswith("val")]

        # Optionally, process only a subset of the input files.
        if num_partitions > 0:
            lb, ub = _get_partition_bounds(
                len(dataset), num_samples_per_partition, num_partitions, partition_id
            )
            dataset = dataset[lb:ub]

        # Using the LLaVA config from the MMMU repo.
        config = load_yaml("examples/multimodal/MMMU/mmmu/configs/llava1.5.yaml")
        for k, v in config.items():
            if isinstance(v, list):
                assert len(v) == 1, "only one value supported."
                config[k] = v[0]

        self._config = config

        self._dataset = dataset

        self._img_h = img_h
        self._img_w = img_w
        self._use_tiling = use_tiling
        self._max_num_tiles = max_num_tiles
        self._use_thumbnail = use_thumbnail
        self._prompt_style = prompt_style
        self._transform_img = ImageTransform(img_h, vision_model_type)

    def __len__(self):
        return len(self._dataset)

    def process_image_tag(self, q):
        q = q.strip()

        # heuristic way of removing <image 1>
        if q == '<image 1>':
            q = 'Answer the question in the image.'
        elif ':<image 1>' in q:
            q = q.replace(':<image 1>', ' in the image. ')
            q = q.strip()
        elif ': <image 1>' in q:
            q = q.replace(': <image 1>', ' in the image. ')
            q = q.strip()
        elif '.<image 1>' in q or '. <image 1>' in q:
            q_list = q.split('<image 1>')
            q_list = [part.strip() for part in q_list if part.strip() != '']
            q = ' '.join(q_list)
        elif q.startswith('<image 1> '):
            if q[10].isupper():
                q = q.replace('<image 1>', '')
            else:
                q = q.replace('<image 1>', 'The image')
            q = q.strip()
        elif q.startswith('<image 1>'):
            q = q.replace('<image 1>', '')
        elif q.endswith('<image 1>?'):
            q = q.replace('<image 1>', 'the image')
        elif q.endswith('?<image 1>') or q.endswith('? <image 1>') or q.endswith('\n<image 1>'):
            q = q.replace('<image 1>', '')
            q = q.strip()
        elif ' <image 1> ' in q:
            q = q.replace('<image 1>', 'the image')
        elif ' <image 1>' in q:
            q = q.replace('<image 1>', 'the image')
        elif '()<image 1>' in q:
            q = q.replace('()<image 1>', '')
        elif '(<image 1>)' in q:
            q = q.replace('(<image 1>)', '')
        elif '<image 1>.' in q:
            q = q.replace("<image 1>.", ". ")
        else:
            q = q.replace("<image 1>", ". ")
            q = q.strip()

        # remove <image 2> to <image 8>
        for i in range(2, 8):
            q = q.replace(f"<image {i}>", "")

        return q

    def __getitem__(self, idx):
        from .mmmu_utils import construct_prompt, process_single_sample

        sample = self._dataset[idx]

        # Use the single image approach from the MMMU repo.
        if self._prompt_style == "single_image":
            sample = process_single_sample(sample)
            sample = construct_prompt(sample, self._config)

            img = sample["image"]
            sample_imgs = self._transform_img(
                img,
                self._img_h,
                self._img_w,
                self._use_tiling,
                self._max_num_tiles,
                self._use_thumbnail,
                augment=False,
            )
            sample_num_tiles = [len(sample_imgs)]

            prompt = sample["final_input_prompt"]
            sample["final_input_prompt"] = self.process_image_tag(prompt)
        elif self._prompt_style == "vlmevalkit":
            sample = construct_prompt(sample, self._config)

            if sample["question_type"] == "multiple-choice":
                question = sample["question"]

                options = ""
                for k, v in sample["index2ans"].items():
                    options += f"{k}. {v}\n"

                final_prompt = f"{question}\n"
                if "hint" in sample:
                    final_prompt += f"Hint: {sample['hint']}\n"

                if "task_instructions" in sample:
                    final_prompt += f"Task instructions: {sample['task_instructions']}\n"

                final_prompt += options
                final_prompt += "Answer with the option's letter from the given choices directly."

                sample["final_input_prompt"] = final_prompt.rstrip()
            else:
                question = sample["question"]
                final_prompt = f"{question}\n"
                final_prompt += "Answer the question directly."
                sample["final_input_prompt"] = final_prompt.rstrip()

            sample_imgs = []
            sample_num_tiles = []

            img_indices = sorted(list(set(re.findall(r"<image (\d+)", sample["final_input_prompt"]))))
            # If there are multiple input images, we need to avoid the number of image embeddings getting too large.
            adjusted_max_num_tiles = max(1, self._max_num_tiles // len(img_indices))
            adjusted_max_num_tiles = min(adjusted_max_num_tiles, self._max_num_tiles)

            for img_idx in img_indices:
                img_key = f"image_{img_idx}"
                img_str = f"<image {img_idx}>"

                img = sample[img_key]
                assert img is not None, f"{img_str} is in prompt but not in sample images"

                imgs = self._transform_img(
                    img,
                    self._img_h,
                    self._img_w,
                    self._use_tiling,
                    adjusted_max_num_tiles,
                    self._use_thumbnail,
                    augment=False,
                )  # List of tiles.

                sample_imgs.extend(imgs)
                sample_num_tiles.append(len(imgs))

            sample["final_input_prompt"] = " ".join([f'<image {i + 1}><image>' for i in range(len(img_indices))]) + "\n" + sample["final_input_prompt"]
        elif self._prompt_style == "multi_image":
            sample = construct_prompt(sample, self._config)

            sample_imgs = []
            sample_num_tiles = []

            img_indices = re.findall(r"<image (\d+)", sample["final_input_prompt"])
            # If there are multiple input images, we need to avoid the number of image embeddings getting too large.
            adjusted_max_num_tiles = max(1, self._max_num_tiles // len(img_indices))

            for img_idx in img_indices:
                img_key = f"image_{img_idx}"
                img_str = f"<image {img_idx}>"

                img = sample[img_key]
                assert img is not None, f"{img_str} is in prompt but not in sample images"

                # Note: Only replace the current image tag.
                sample["final_input_prompt"] = sample["final_input_prompt"].replace(
                    img_str, "<image>", 1
                )

                imgs = self._transform_img(
                    img,
                    self._img_h,
                    self._img_w,
                    self._use_tiling,
                    adjusted_max_num_tiles,
                    self._use_thumbnail,
                    augment=False,
                )  # List of tiles.

                sample_imgs.extend(imgs)
                sample_num_tiles.append(len(imgs))

            # Sanity check.
            for i in range(1, 8):
                assert (
                    f"<image {i}>" not in sample["final_input_prompt"]
                ), "prompt contains unhandled image tags"
        else:
            raise ValueError(f"unknown prompt style {self._prompt_style}")

        # MMMU specific metadata.
        metadata = {"question_type": sample["question_type"],
                    "field": sample["field"],
                    "subfield": sample["subfield"]}
        if sample["question_type"] == "multiple-choice":
            metadata["index2ans"] = sample["index2ans"]
            metadata["all_choices"] = sample["all_choices"]

        prompt = sample['final_input_prompt']

        tile_count = torch.tensor(sample_num_tiles, dtype=torch.int)

        return (
            torch.stack(sample_imgs),
            tile_count,
            sample["id"],
            prompt,
            sample["answer"],
            metadata,
        )


class VideoMMEDataset(torch.utils.data.Dataset):
    "Video MME evaluation dataset."

    def __init__(
        self,
        input_image_path,
        gt_path,
        num_samples_per_partition,
        num_partitions,
        partition_id,
        img_h,
        img_w,
        use_tiling,
        max_num_tiles,
        use_thumbnail,
        num_frames,
        vision_model_type,
    ):
        ground_truth_original = json.load(open(gt_path))
        ground_truth = []
        for gt in ground_truth_original:
            video_path = gt["url"]
            video_path = video_path.replace("https://www.youtube.com/watch?v=", "")
            video_path = video_path.replace("https://m.youtube.com/watch?v=", "")
            video_path = os.path.join(input_image_path, video_path + ".mp4")
            if not os.path.exists(video_path):
                continue
            gt["video_path"] = video_path
            ground_truth.append(gt)

        ground_truth = sorted(ground_truth, key=lambda gt: gt["video_path"])
        print_rank_0(f"Found {len(ground_truth)} videos to process.")

        if num_partitions > 0:
            start_idx, end_idx = _get_partition_bounds(
                len(ground_truth), num_samples_per_partition, num_partitions, partition_id
            )
            ground_truth = ground_truth[start_idx:end_idx]

        self._ground_truth = ground_truth
        self._img_h = img_h
        self._img_w = img_w
        self._use_tiling = False
        self._max_num_tiles = max_num_tiles
        self._use_thumbnail = use_thumbnail
        self._num_frames = num_frames
        self._transform_img = ImageTransform(img_h, vision_model_type)

    def __len__(self):
        return len(self._ground_truth)

    def __getitem__(self, idx):
        from torchvision.io import read_video

        gt = self._ground_truth[idx]

        video, _, _ = read_video(gt["video_path"], start_pts=0, end_pts=None, pts_unit='sec')
        video = video.numpy()
        selected_frames = torch.linspace(0, video.shape[0] - 1, self._num_frames).long()
        video_frames = video[selected_frames]
        if self._num_frames == 1:
            video_frames = video_frames[None]

        imgs = []
        for img in video_frames:
            from torchvision.transforms import ToPILImage
            to_pil = ToPILImage()
            img = to_pil(img)
            imgs += self._transform_img(
                img, self._img_h, self._img_w, self._use_tiling, self._max_num_tiles,
                self._use_thumbnail, augment=False,
            )

        for question in gt["questions"]:
            # Very hacky, but we essentially re-create gt holding only the
            # question of interest. This is the make this generation script
            # compatible with the Video MME evaluation script.
            question_dict = {
                "video_id": gt["video_id"],
                "duration_category": gt["duration_category"],
                "video_category": gt["video_category"],
                "video_subcategory": gt["video_subcategory"],
                "url": gt["url"],
                "questions": [question],
            }

        num_tiles = torch.tensor([len(imgs)], dtype=torch.int)

        answer = ""
        metadata = ""

        return (
            torch.stack(imgs),
            num_tiles,
            question["question_id"],
            question_dict,
            answer,
            metadata,
        )


class OCRBenchDataset(torch.utils.data.Dataset):
    """OCRBench evaluation dataset."""

    def __init__(
        self,
        input_image_path,
        gt_path,
        num_samples_per_partition,
        num_partitions,
        partition_id,
        img_h,
        img_w,
        use_tiling,
        max_num_tiles,
        use_thumbnail,
        vision_model_type,
    ):
        gt = json.load(open(gt_path, encoding='utf-8'))

        if num_partitions > 0:
            start_idx, end_idx = _get_partition_bounds(
                len(gt), num_samples_per_partition, num_partitions, partition_id
            )
            gt = gt[start_idx:end_idx]

        self._input_image_path = input_image_path
        self._gt = gt
        self._img_h = img_h
        self._img_w = img_w
        self._use_tiling = use_tiling
        self._max_num_tiles = max_num_tiles
        self._use_thumbnail = use_thumbnail
        self._transform_img = ImageTransform(img_h, vision_model_type)

    def __len__(self):
        return len(self._gt)

    def __getitem__(self, idx):
        img_path = os.path.join(self._input_image_path, self._gt[idx]['image_path'])

        img = Image.open(img_path)
        imgs = self._transform_img(
            img,
            self._img_h,
            self._img_w,
            self._use_tiling,
            self._max_num_tiles,
            self._use_thumbnail,
            augment=False,
        )

        tile_count = torch.tensor([len(imgs)], dtype=torch.int)

        metadata = {
            "dataset_name": self._gt[idx]["dataset_name"],
            "data_type": self._gt[idx]["type"],
        }

        return (
            torch.stack(imgs),
            tile_count,
            idx,
            self._gt[idx]["question"],
            self._gt[idx]["answers"],
            metadata,
        )


class MathVistaDataset(torch.utils.data.Dataset):
    """MathVista evaluation dataset."""

    def __init__(
        self,
        input_image_path,
        num_samples_per_partition,
        num_partitions,
        partition_id,
        img_h,
        img_w,
        use_tiling,
        max_num_tiles,
        use_thumbnail,
        vision_model_type,
    ):
        import datasets

        hf_datasets_cache = os.environ["HF_DATASETS_CACHE"]
        assert hf_datasets_cache != "", "Please set the environment variable HF_DATASETS_CACHE."

        if os.path.exists(input_image_path):
            dataset = datasets.load_dataset(
                input_image_path, cache_dir=hf_datasets_cache, verification_mode="no_checks", split="train"
            )
        else:
            dataset = datasets.load_dataset(
                "AI4Math/MathVista", split="testmini", cache_dir=hf_datasets_cache
            )

        if num_partitions > 0:
            start_idx, end_idx = _get_partition_bounds(
                len(dataset), num_samples_per_partition, num_partitions, partition_id
            )
            dataset = dataset[start_idx:end_idx]

        self._dataset = dataset
        self._img_h = img_h
        self._img_w = img_w
        self._use_tiling = use_tiling
        self._max_num_tiles = max_num_tiles
        self._use_thumbnail = use_thumbnail
        self._transform_img = ImageTransform(img_h, vision_model_type)

    def __len__(self):
        return len(self._dataset["pid"])

    def __getitem__(self, idx):
        # Already a PIL object.
        img = self._dataset['decoded_image'][idx]

        imgs = self._transform_img(
            img,
            self._img_h,
            self._img_w,
            self._use_tiling,
            self._max_num_tiles,
            self._use_thumbnail,
            augment=False,
        )

        tile_count = torch.tensor([len(imgs)], dtype=torch.int)

        question_id = self._dataset["pid"][idx]
        question = self._dataset["question"][idx]
        question_type = self._dataset["question_type"][idx]  # free_form or multi_choice
        query = self._dataset["query"][idx]
        choices = self._dataset["choices"][idx]
        answer = self._dataset["answer"][idx]

        if question_type == 'multi_choice':
            start_chr = 'A'
            choices_str = ''
            index2ans = {}
            all_choices = []
            for choice in choices:
                all_choices.append(start_chr)
                index2ans[start_chr] = choice
                choices_str += f"{start_chr}. {choice}\n"
                start_chr = chr(ord(start_chr) + 1)

            question = question + '\n' + choices_str
            question = question + "Answer with the option's letter from the given choices directly."
            answer = chr(ord('A') + choices.index(answer))
        else:
            question = query.replace("Hint: ", "")
            index2ans = {}
            all_choices = []

        metadata = {
            "question_type": question_type,
            "index2ans": index2ans,
            "all_choices": all_choices,
        }

        return torch.stack(imgs), tile_count, question_id, question, answer, metadata


class AI2DDataset(torch.utils.data.Dataset):
    """AI2D evaluation dataset."""

    def __init__(
        self,
        input_image_path,
        gt_path,
        num_samples_per_partition,
        num_partitions,
        partition_id,
        img_h,
        img_w,
        use_tiling,
        max_num_tiles,
        use_thumbnail,
        vision_model_type,
    ):
        with open(gt_path, 'r') as f:
            jsonl = list(f)

        gt = [json.loads(json_str) for json_str in jsonl]

        if num_partitions > 0:
            start_idx, end_idx = _get_partition_bounds(
                len(gt), num_samples_per_partition, num_partitions, partition_id
            )
            gt = gt[start_idx:end_idx]

        self._gt = gt
        self._input_image_path = input_image_path
        self._img_h = img_h
        self._img_w = img_w
        self._use_tiling = use_tiling
        self._max_num_tiles = max_num_tiles
        self._use_thumbnail = use_thumbnail
        self._transform_img = ImageTransform(img_h, vision_model_type)

    def __len__(self):
        return len(self._gt)

    def __getitem__(self, idx):
        img_path = os.path.join(self._input_image_path, self._gt[idx]['image'].split("/")[-1])

        img = Image.open(img_path)
        imgs = self._transform_img(
            img,
            self._img_h,
            self._img_w,
            self._use_tiling,
            self._max_num_tiles,
            self._use_thumbnail,
            augment=False,
        )

        tile_count = torch.tensor([len(imgs)], dtype=torch.int)

        metadata = ""  # Not used.

        return (
            torch.stack(imgs),
            tile_count,
            self._gt[idx]["question_id"],
            self._gt[idx]["question"],
            self._gt[idx]["answer"],
            metadata,
        )


class RDTableBenchDataset(torch.utils.data.Dataset):
    def __init__(
        self,
        input_image_path,
        gt_path,
        num_samples_per_partition,
        num_partitions,
        partition_id,
        img_h,
        img_w,
        use_tiling,
        max_num_tiles,
        use_thumbnail,
        vision_model_type,
    ):
        gt_paths = sorted(glob.glob(os.path.join(gt_path, "*.html")))
        gt = []
        for gt_path in gt_paths:
            img_path = os.path.join(input_image_path, os.path.basename(gt_path).replace(".html", ".jpg"))
            with open(gt_path) as f:
                html = f.read()
            gt.append({
                "answer": html,
                "image": img_path,
            })

        if num_partitions > 0:
            start_idx, end_idx = _get_partition_bounds(
                len(gt), num_samples_per_partition, num_partitions, partition_id
            )
            gt = gt[start_idx:end_idx]

        self._input_image_path = input_image_path
        self._gt = gt
        self._img_h = img_h
        self._img_w = img_w
        self._use_tiling = use_tiling
        self._max_num_tiles = max_num_tiles
        self._use_thumbnail = use_thumbnail
        self._transform_img = ImageTransform(img_h, vision_model_type)

    def __len__(self):
        return len(self._gt)

    def __getitem__(self, idx):
        img_path = os.path.join(self._input_image_path, self._gt[idx]['image'])

        img = Image.open(img_path)
        imgs = self._transform_img(
            img,
            self._img_h,
            self._img_w,
            self._use_tiling,
            self._max_num_tiles,
            self._use_thumbnail,
            augment=False,
        )

        tile_count = torch.tensor([len(imgs)], dtype=torch.int)

        metadata = ""

        prompt = (
            "Convert the image to an HTML table. The output should begin with <table> and end with </table>. "
            "Specify rowspan and colspan attributes when they are greater than 1. Do not specify any other attributes. "
            "Only use table related HTML tags, no additional formatting is required."
        )

        return (
            torch.stack(imgs),
            tile_count,
            idx,
            prompt,
            self._gt[idx]["answer"],
            metadata,
        )


class RealworldQADataset(torch.utils.data.Dataset):
    def __init__(
        self,
        input_image_path,
        gt_path,
        num_samples_per_partition,
        num_partitions,
        partition_id,
        img_h,
        img_w,
        use_tiling,
        max_num_tiles,
        use_thumbnail,
        vision_model_type,
    ):
        gt = json.load(open(gt_path, encoding='utf-8'))


        if num_partitions > 0:
            start_idx, end_idx = _get_partition_bounds(
                len(gt), num_samples_per_partition, num_partitions, partition_id
            )
            gt = gt[start_idx:end_idx]

        self._gt = gt
        self._input_image_path = input_image_path
        self._img_h = img_h
        self._img_w = img_w
        self._use_tiling = use_tiling
        self._max_num_tiles = max_num_tiles
        self._use_thumbnail = use_thumbnail
        self._transform_img = ImageTransform(img_h, vision_model_type)


    def __len__(self):
        return len(self._gt)

    def __getitem__(self, idx):
        img_path = os.path.join(self._input_image_path, self._gt[idx]['image'])
        img = Image.open(img_path)
        imgs = self._transform_img(
            img,
            self._img_h,
            self._img_w,
            self._use_tiling,
            self._max_num_tiles,
            self._use_thumbnail,
            augment=False,
        )

        question_id = int(self._gt[idx]['image'].replace(".webp", ""))
        question = self._gt[idx]["question"]

        if self._gt[idx]['question_type'] == "multi-choice":
            choices = self._gt[idx]["choices"]
            start_chr = 'A'
            choices_str = ''
            index2ans = {}
            all_choices = []
            for choice in choices:
                all_choices.append(start_chr)
                index2ans[start_chr] = choice
                choices_str += f"{start_chr}. {choice}\n"
                start_chr = chr(ord(start_chr) + 1)

            question = question + '\n' + choices_str
            question = question + "Answer with the option's letter from the given choices directly."
            answer = chr(ord('A') + self._gt[idx]['correct_choice_index'])
        else:
            question = question + "\nAnswer the question using a single word or phrase."
            answer = self._gt[idx]['answer']

        tile_count = torch.tensor([len(imgs)], dtype=torch.int)

        metadata = ""  # Not used.

        return (
            torch.stack(imgs),
            tile_count,
            question_id,
            question,
            [answer],
            metadata,
        )


class MotionBenchDataset(torch.utils.data.Dataset):
    def __init__(
        self,
        input_image_path,
        gt_path,
        num_samples_per_partition,
        num_partitions,
        partition_id,
        img_h,
        img_w,
        use_tiling,
        max_num_tiles,
        use_thumbnail,
        num_frames,
        vision_model_type,
        split
    ):

        with open(gt_path) as f:
            ground_truth_original = [json.loads(line) for line in f]


        ground_truth = []
        for gt in ground_truth_original:

            # video path handling
            video_path = gt['video_path']
            if ".mp4" not in video_path:
                video_path = f"{video_path}.mp4"

            video_path = os.path.join(input_image_path, video_path)
            if not os.path.exists(video_path):
                continue
            gt["video_path"] = video_path

            ground_truth.append(gt)

        ground_truth = sorted(ground_truth, key=lambda gt: gt["video_path"])
        print_rank_0(f"Found {len(ground_truth)} videos to process.")

        if num_partitions > 0:
            start_idx, end_idx = _get_partition_bounds(
                len(ground_truth), num_samples_per_partition, num_partitions, partition_id
            )
            ground_truth = ground_truth[start_idx:end_idx]

        self._ground_truth = ground_truth
        self._img_h = img_h
        self._img_w = img_w
        self._use_tiling = False
        self._max_num_tiles = max_num_tiles
        self._use_thumbnail = use_thumbnail
        self._num_frames = num_frames
        self._transform_img = ImageTransform(img_h, vision_model_type)

    def __len__(self):
        return len(self._ground_truth)

    def __getitem__(self, idx):
        gt = self._ground_truth[idx]

        from torchvision.io.video import read_video
        video, _, _ = read_video(gt["video_path"], start_pts=0, end_pts=None, pts_unit='sec')
        video = video.permute((0, 3, 1, 2))

        selected_frames = torch.linspace(0, video.shape[0] - 1, min(self._num_frames, video.shape[0])).long()
        video_frames = video[selected_frames]

        if self._num_frames == 1:
            video_frames = video_frames[None]
        imgs = []
        for img in video_frames:
            from torchvision.transforms import ToPILImage
            to_pil = ToPILImage()
            img = to_pil(img)
            imgs += self._transform_img(
                img,
                self._img_h,
                self._img_w,
                self._use_tiling,
                self._max_num_tiles,
                self._use_thumbnail,
                augment=False,
            )

        num_tiles = torch.tensor([len(imgs)], dtype=torch.int)

        q_id = gt['qa'][0]['uid']
        question = gt['qa'][0]['question']
        answer = gt['qa'][0]['answer']

        metadata = ""
        return (
            torch.stack(imgs),
            num_tiles,
            q_id,
            question,
            answer,
            metadata,
        )

# The following class is adapted from
# https://github.com/PhysGame/PhysGame/blob/main/physvlm/test/PhysGame_bench/utils.py#L27
# which is licensed under the MIT license. More details on the license can be
# found at https://github.com/PhysGame/PhysGame/tree/main?tab=Apache-2.0-1-ov-file#readme
class PhysGameBenchDataset(torch.utils.data.Dataset):
    def __init__(
        self,
        input_image_path,
        gt_path,
        num_samples_per_partition,
        num_partitions,
        partition_id,
        img_h,
        img_w,
        use_tiling,
        max_num_tiles,
        use_thumbnail,
        num_frames,
        vision_model_type,
        split
    ):

        ground_truth_original = json.load(open(gt_path, encoding='utf-8'))

        ground_truth = []
        for gt in ground_truth_original:

            video_path = os.path.join(input_image_path, gt['question_id']) + ".mp4"
            if not os.path.exists(video_path):
                continue
            gt["video_path"] = video_path
            ground_truth.append(gt)

        ground_truth = sorted(ground_truth, key=lambda gt: gt["video_path"])
        print_rank_0(f"Found {len(ground_truth)} videos to process.")

        if num_partitions > 0:
            start_idx, end_idx = _get_partition_bounds(
                len(ground_truth), num_samples_per_partition, num_partitions, partition_id
            )
            ground_truth = ground_truth[start_idx:end_idx]

        self._ground_truth = ground_truth
        self._img_h = img_h
        self._img_w = img_w
        self._use_tiling = False
        self._max_num_tiles = max_num_tiles
        self._use_thumbnail = use_thumbnail
        self._num_frames = num_frames
        self._transform_img = ImageTransform(img_h, vision_model_type)

    def __len__(self):
        return len(self._ground_truth)

    def _qa_template(self, data):
        question = f"Question: {data['question']}\n"
        question += "Options:\n"
        answer = data['answer']
        for ch, c in data['options'].items():
            question += f"({ch}) {c}\n"
        question = question.rstrip()
        return question, answer

    def __getitem__(self, idx):
        gt = self._ground_truth[idx]

        from torchvision.io.video import read_video
        video, _, _ = read_video(gt["video_path"], start_pts=0, end_pts=None, pts_unit='sec')
        video = video.permute((0, 3, 1, 2))

        selected_frames = torch.linspace(0, video.shape[0] - 1, min(self._num_frames, video.shape[0])).long()
        video_frames = video[selected_frames]

        if self._num_frames == 1:
            video_frames = video_frames[None]
        imgs = []
        for img in video_frames:
            from torchvision.transforms import ToPILImage
            to_pil = ToPILImage()
            img = to_pil(img)
            imgs += self._transform_img(
                img,
                self._img_h,
                self._img_w,
                self._use_tiling,
                self._max_num_tiles,
                self._use_thumbnail,
                augment=False,
            )

        num_tiles = torch.tensor([len(imgs)], dtype=torch.int)

        q_id = gt['question_id']
        question, answer = self._qa_template(gt)

        metadata = {
            'class': gt['class_anno'],
            'subclass': gt['subclass_anno']
        }

        return (
            torch.stack(imgs),
            num_tiles,
            q_id,
            question,
            answer,
            metadata,
        )


# The following class is adapted from
# https://github.com/OpenGVLab/Ask-Anything/blob/main/video_chat2/mvbench.ipynb
# which is licensed under the MIT license. More details on the license can be
# found at https://github.com/OpenGVLab/Ask-Anything/tree/main?tab=MIT-1-ov-file#readme
class MVBenchDataset(torch.utils.data.Dataset):
    def __init__(
        self,
        input_image_path,
        gt_path,
        num_samples_per_partition,
        num_partitions,
        partition_id,
        img_h,
        img_w,
        use_tiling,
        max_num_tiles,
        use_thumbnail,
        num_frames,
        vision_model_type,
        split
    ):

        data_list = {
            "Action Sequence": ("action_sequence.json", f"{input_image_path}/star/Charades_v1_480/", "video", True), # has start & end
            "Action Prediction": ("action_prediction.json", f"{input_image_path}/star/Charades_v1_480/", "video", True), # has start & end
            "Action Antonym": ("action_antonym.json", f"{input_image_path}/ssv2_video/", "video", False),
            "Fine-grained Action": ("fine_grained_action.json", f"{input_image_path}/Moments_in_Time_Raw/videos/", "video", False),
            "Unexpected Action": ("unexpected_action.json", f"{input_image_path}/FunQA_test/test/", "video", False),
            "Object Existence": ("object_existence.json", f"{input_image_path}/clevrer/video_validation/", "video", False),
            "Object Interaction": ("object_interaction.json", f"{input_image_path}/star/Charades_v1_480/", "video", True), # has start & end
            "Object Shuffle": ("object_shuffle.json", f"{input_image_path}/perception/videos/", "video", False),
            "Moving Direction": ("moving_direction.json", f"{input_image_path}/clevrer/video_validation/", "video", False),
            "Action Localization": ("action_localization.json", f"{input_image_path}/sta/sta_video/", "video", True),  # has start & end
            "Scene Transition": ("scene_transition.json", f"{input_image_path}/scene_qa/video/", "video", False),
            "Action Count": ("action_count.json", f"{input_image_path}/perception/videos/", "video", False),
            "Moving Count": ("moving_count.json", f"{input_image_path}/clevrer/video_validation/", "video", False),
            "Moving Attribute": ("moving_attribute.json", f"{input_image_path}/clevrer/video_validation/", "video", False),
            "State Change": ("state_change.json", f"{input_image_path}/perception/videos/", "video", False),
            "Fine-grained Pose": ("fine_grained_pose.json", f"{input_image_path}/nturgbd/", "video", False),
            "Character Order": ("character_order.json", f"{input_image_path}/perception/videos/", "video", False),
            "Egocentric Navigation": ("egocentric_navigation.json", f"{input_image_path}/vlnqa/", "video", False),
            "Episodic Reasoning": ("episodic_reasoning.json", f"{input_image_path}/tvqa/frames_fps3_hq/", "frame", True),  # has start & end, read frame
            "Counterfactual Inference": ("counterfactual_inference.json", f"{input_image_path}/clevrer/video_validation/", "video", False)
        }

        ground_truth = []
        for k, v in data_list.items():
            with open(os.path.join(gt_path, v[0]), 'r') as f:
                json_data = json.load(f)
            for data_id, data in enumerate(json_data):
                ground_truth.append({
                    'task_type': k,
                    'prefix': v[1],
                    'data_type': v[2],
                    'bound': v[3],
                    'data': data,
                    'question_id': f"{k}-{data_id}"
                })

        print("total ground truth ==> ", len(ground_truth))
        self.decord_method = {
            'video': self.read_video_ours,
            'frame': self.read_frame,
        }

        if num_partitions > 0:
            start_idx, end_idx = _get_partition_bounds(
                len(ground_truth), num_samples_per_partition, num_partitions, partition_id
            )
            ground_truth = ground_truth[start_idx:end_idx]

            print("Partitioned ==> ", {start_idx}, {end_idx}, len(ground_truth))

        self._ground_truth = ground_truth
        self._img_h = img_h
        self._img_w = img_w
        self._use_tiling = False
        self._max_num_tiles = max_num_tiles
        self._use_thumbnail = use_thumbnail
        self._num_frames = num_frames
        self._transform_img = ImageTransform(img_h, vision_model_type)

    def __len__(self):
        return len(self._ground_truth)

    def get_index(self, bound, fps, max_frame, first_idx=0):
        if bound:
            start, end = bound[0], bound[1]
        else:
            start, end = -100000, 100000
        start_idx = max(first_idx, round(start * fps))
        end_idx = min(round(end * fps), max_frame)
        seg_size = float(end_idx - start_idx) / self._num_frames
        frame_indices = np.array([
            int(start_idx + (seg_size / 2) + np.round(seg_size * idx))
            for idx in range(self._num_frames)
        ])
        return frame_indices

    def qa_template(self, data):
        question = f"Question: {data['question']}\n"
        question += "Options:\n"
        answer = data['answer']
        answer_idx = -1
        for idx, c in enumerate(data['candidates']):
            question += f"({chr(ord('A') + idx)}) {c}\n"
            if c == answer:
                answer_idx = idx
        question = question.rstrip()
        answer = f"({chr(ord('A') + answer_idx)}) {answer}"
        return question, answer


    def read_frame(self, video_path, bound=None, fps=2):
        max_frame = len(os.listdir(video_path))
        images_group = list()
        frame_indices = self.get_index(bound, fps, max_frame, first_idx=1) # frame_idx starts from 1
        for frame_index in frame_indices:
            img = Image.open(os.path.join(video_path, f"{frame_index:05d}.jpg"))
            images_group.append(img)
        return images_group

    def read_video_ours(self, video_path, bound=None):
        from torchvision.io.video import read_video
        video, _, v_meta_info = read_video(video_path, start_pts=0, end_pts=None, pts_unit='sec')

        video = video.permute((0, 3, 1, 2))
        fps = float(v_meta_info['video_fps'])
        max_frame = len(video) - 1

        selected_frames_indices = self.get_index(bound, fps, max_frame, first_idx=0)

        video_frames = video[selected_frames_indices]

        return video_frames

    def __getitem__(self, idx):

        data = self._ground_truth[idx]
        bound = None
        if data['bound']:
            bound = (
                data['data']['start'],
                data['data']['end'],
            )
        video_path = os.path.join(data['prefix'], data['data']['video'])

        video_decode_func = self.decord_method[data['data_type']]

        video_frames = video_decode_func(video_path, bound)

        imgs = []
        for img in video_frames:
            from torchvision.transforms import ToPILImage

            if data['data_type'] == 'video':
                to_pil = ToPILImage()
                img = to_pil(img)
            imgs += self._transform_img(
                img, self._img_h, self._img_w, self._use_tiling, self._max_num_tiles,
                self._use_thumbnail, augment=False
            )

        num_tiles = torch.tensor([len(imgs)], dtype=torch.int)

        q_id = data['question_id']
        metadata = {'task_type': data['task_type']}
        question, answer = self.qa_template(data['data'])

        return (
            torch.stack(imgs),
            num_tiles,
            q_id,
            question,
            answer,
            metadata,
        )


class ExampleInferenceDataset(torch.utils.data.Dataset):
    def __init__(
        self,
        img_h,
        img_w,
        use_tiling,
        max_num_tiles,
        use_thumbnail,
        vision_model_type,
    ):
        # Define your own inference samples here. The following is an example.
        samples = [
            # Use <image> token to indicate the image position.
            {"image_paths": ["examples/multimodal/assets/pretrain_curves.png"], "question": "<image>\nWhat is the curve?"},
            # Optional: if you have an answer for the question.
            {"image_paths": ["examples/multimodal/assets/pretrain_curves.png"], "question": "What is the curve?<image>", "answer": "It's a loss function curve."},
            # If you have multiple images for the question, then use <image> token to indicate the image positions.
            {"image_paths": ["examples/multimodal/assets/pretrain_curves.png", "examples/multimodal/assets/pretrain_curves.png"], "question": "<image>What is the curve?<image>"},
            # Text only sample.
            {"question": "Who is Jensen Huang?"},
        ]

        self._samples = samples
        self._img_h = img_h
        self._img_w = img_w
        self._use_tiling = use_tiling
        self._max_num_tiles = max_num_tiles
        self._use_thumbnail = use_thumbnail
        self._transform_img = ImageTransform(img_h, vision_model_type)

    def __len__(self):
        return len(self._samples)

    def __getitem__(self, idx):
        sample = self._samples[idx]

        sample_imgs = []
        sample_tile_count = []
        for image_path in sample.get("image_paths", []):
            img = Image.open(image_path)
            imgs = self._transform_img(
                img,
                self._img_h,
                self._img_w,
                self._use_tiling,
                self._max_num_tiles,
                self._use_thumbnail,
                augment=False,
            )

            sample_imgs.extend(imgs)
            sample_tile_count.append(len(imgs))

        sample_id = idx
        metadata = ""  # Not used.

        return (
            torch.stack(sample_imgs) if len(sample_imgs) > 0 else torch.tensor([]),
            torch.tensor(sample_tile_count, dtype=torch.int),
            sample_id,
            sample["question"],
            sample.get("answer", ""),
            metadata,
        )


def get_evaluation_dataset(
    task,
    input_image_path,
    gt_path,
    img_h,
    img_w,
    use_tiling,
    max_num_tiles,
    use_thumbnail,
    num_samples_per_partition,
    num_partitions,
    partition_id,
    num_frames,
    vision_model_type,
    split="validation",
):
    """Get an evaluation dataset."""
    if task == "TextVQA":
        keys = {
            "image_id": "image_id",
            "sample_id": "question_id",
            "question": "question",
            "answer": "answers",
        }

        dataset = VQADataset(
            input_image_path,
            gt_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            keys,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            vision_model_type,
        )
    elif task == "VQAv2":
        keys = {
            "image_id": "image",
            "sample_id": "question_id",
            "question": "question",
            "answer": "answer",
        }

        dataset = VQADataset(
            input_image_path,
            gt_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            keys,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            vision_model_type,
        )
    elif task == "ChartQA":
        keys = {"image_id": "imgname", "question": "query", "answer": "label"}

        dataset = VQADataset(
            input_image_path,
            gt_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            keys,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            vision_model_type,
        )
    elif task == "captioning":
        dataset = CaptioningDataset(
            input_image_path,
            gt_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            vision_model_type,
        )
    elif task == 'MMMU':
        # Note:
        # - prompt_style="single_image" uses only one image like in the MMMU repo example.
        # - prompt_style="multi_image" uses multiple input images.
        # - prompt_style="vlmevalkit" is similar to https://github.com/open-compass/VLMEvalKit/blob/5d3cebcf18ef4bfbadc3bd3ef80bdc7aad2c6557/vlmeval/vlm/internvl_chat.py#L499
        dataset = MMMUDataset(
            input_image_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            prompt_style="single_image",
            vision_model_type=vision_model_type,
            split=split,
        )
    elif task == 'RealworldQA':
        dataset = RealworldQADataset(
            input_image_path,
            gt_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            vision_model_type=vision_model_type,
        )
    elif task in ["OCRBench", "OCRBench_v2"]:
        dataset = OCRBenchDataset(
            input_image_path,
            gt_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            vision_model_type,
        )
    elif task == "MathVista":
        dataset = MathVistaDataset(
            input_image_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            vision_model_type,
        )
    elif task == "AI2D":
        dataset = AI2DDataset(
            input_image_path,
            gt_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            vision_model_type=vision_model_type,
        )
    elif task == "SPDocVQA":
        keys = {"sample_id": "questionId", "image_id": "image", "question": "question", "answer": "answers"}

        dataset = VQADataset(
            input_image_path,
            gt_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            keys,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            vision_model_type,
        )
    elif task == "InfoVQA":
        keys = {"sample_id": "questionId", "image_id": "image_local_name", "question": "question", "answer": "answers"}

        dataset = VQADataset(
            input_image_path,
            gt_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            keys,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            vision_model_type,
        )
    elif task == "RD_TableBench":
        dataset = RDTableBenchDataset(
            input_image_path,
            gt_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            vision_model_type,
        )
    ### video QA
    elif task == "VideoMME":
        dataset = VideoMMEDataset(
            input_image_path,
            gt_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            num_frames,
            vision_model_type,
        )
    elif task == "MotionBench":
        dataset = MotionBenchDataset(
            input_image_path,
            gt_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            num_frames,
            vision_model_type,
            split=split
        )
    elif task == "PhysGameBench":
        dataset = PhysGameBenchDataset(
            input_image_path,
            gt_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            num_frames,
            vision_model_type,
            split=split
        )
    elif task == "MVBench":
        dataset = MVBenchDataset(
            input_image_path,
            gt_path,
            num_samples_per_partition,
            num_partitions,
            partition_id,
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            num_frames,
            vision_model_type,
            split=split
        )
    elif task == "inference":
        dataset = ExampleInferenceDataset(
            img_h,
            img_w,
            use_tiling,
            max_num_tiles,
            use_thumbnail,
            vision_model_type,
        )
    else:
        raise NotImplementedError(f"unsupported task {task}")

    return dataset


================================================
FILE: examples/multimodal/evaluation/mmmu_utils.py
================================================
# The following code is adapted from
# https://github.com/MMMU-Benchmark/MMMU/blob/main/mmmu/utils/data_utils.py,
# which is licensed under the Apache License 2.0. More details on the license can be
# found at https://github.com/MMMU-Benchmark/MMMU/tree/main?tab=Apache-2.0-1-ov-file#readme

"""Utils for data load, save, and process (e.g., prompt construction)"""

import os
import json
import yaml
import re

DOMAIN_CAT2SUB_CAT = {
    'Art and Design': ['Art', 'Art_Theory', 'Design', 'Music'],
    'Business': ['Accounting', 'Economics', 'Finance', 'Manage', 'Marketing'],
    'Science': ['Biology', 'Chemistry', 'Geography', 'Math', 'Physics', ],
    'Health and Medicine': ['Basic_Medical_Science', 'Clinical_Medicine', 'Diagnostics_and_Laboratory_Medicine',
                            'Pharmacy', 'Public_Health'],
    'Humanities and Social Science': ['History', 'Literature', 'Sociology', 'Psychology'],
    'Tech and Engineering': ['Agriculture', 'Architecture_and_Engineering', 'Computer_Science', 'Electronics',
                             'Energy_and_Power', 'Materials', 'Mechanical_Engineering'],
}

CAT_SHORT2LONG = {
    'acc': 'Accounting',
    'agri': 'Agriculture',
    'arch': 'Architecture_and_Engineering',
    'art': 'Art',
    'art_theory': 'Art_Theory',
    'bas_med': 'Basic_Medical_Science',
    'bio': 'Biology',
    'chem': 'Chemistry',
    'cli_med': 'Clinical_Medicine',
    'cs': 'Computer_Science',
    'design': 'Design',
    'diag_med': 'Diagnostics_and_Laboratory_Medicine',
    'econ': 'Economics',
    'elec': 'Electronics',
    'ep': 'Energy_and_Power',
    'fin': 'Finance',
    'geo': 'Geography',
    'his': 'History',
    'liter': 'Literature',
    'manage': 'Manage',
    'mark': 'Marketing',
    'mate': 'Materials',
    'math': 'Math',
    'mech': 'Mechanical_Engineering',
    'music': 'Music',
    'phar': 'Pharmacy',
    'phys': 'Physics',
    'psy': 'Psychology',
    'pub_health': 'Public_Health',
    'socio': 'Sociology'
}


def load_yaml(file_path):
    with open(file_path, 'r') as stream:
        try:
            yaml_dict = yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            print(exc)

    return yaml_dict


def parse_img_path(text):
    matches = re.findall("<img='(.*?)'>", text)
    return matches


def process_single_sample(data):
    question = data['question']
    o_imgs_paths = []
    for option in data['options']:
        current_o_imgs_paths = parse_img_path(option)
        for img_path in current_o_imgs_paths:
            o_imgs_paths.append(img_path)

    categories = list(CAT_SHORT2LONG.values())
    for c in categories:
        if c in data['id']:
            field = c.lower().replace('_', ' ')
            break

    if len(o_imgs_paths) > 1:  # multiple images in options, used for random selection
        return {'id': data['id'], 'question': question, 'options': data['options'], 'answer': data['answer'],
                'image': None, 'question_type': data['question_type'],
                'field': field, 'subfield': data['subfield']}
    else:
        return {'id': data['id'], 'question': question, 'options': data['options'], 'answer': data['answer'],
                'image': data['image_1'], 'question_type': data['question_type'],
                'field': field, 'subfield': data['subfield']}


# DATA PROCESSING
def construct_prompt(sample, config):
    question = sample['question'].strip()

    options = eval(sample['options'])
    example = ""
    if sample['question_type'] == 'multiple-choice':
        start_chr = 'A'
        prediction_range = []
        index2ans = {}
        for option in options:
            prediction_range.append(start_chr)
            example += f"({start_chr}) {option}\n"
            index2ans[start_chr] = option
            start_chr = chr(ord(start_chr) + 1)
        empty_prompt_sample_structure = config['multi_choice_example_format']
        empty_prompt = empty_prompt_sample_structure.format(question, example)
        res_dict = {'type': 'multichoice'}
        res_dict['index2ans'] = index2ans
        res_dict['correct_choice'] = sample['answer']
        res_dict['all_choices'] = prediction_range
        res_dict['empty_prompt'] = empty_prompt
        if config['task_instructions']:
            res_dict['final_input_prompt'] = config['task_instructions'].strip() + '\n\n' + empty_prompt
        else:
            res_dict['final_input_prompt'] = empty_prompt

        res_dict['gt_content'] = options[ord(sample['answer'].upper()) - ord('A')]
    else:
        empty_prompt_sample_structure = config['short_ans_example_format']
        empty_prompt = empty_prompt_sample_structure.format(question)
        res_dict = {'type': 'open'}
        res_dict['empty_prompt'] = empty_prompt
        if config['task_instructions']:
            res_dict['final_input_prompt'] = config['task_instructions'].strip() + '\n\n' + empty_prompt
        else:
            res_dict['final_input_prompt'] = empty_prompt
        res_dict['gt_content'] = sample['answer']

    res_dict.update(sample)
    return res_dict


"""Response Parsing and Evaluation for various models"""
from typing import Dict

import re
import random

import numpy as np


# ----------- Process Multi-choice -------------
def parse_multi_choice_response(response, all_choices, index2ans):
    """
    Parse the prediction from the generated response.
    Return the predicted index e.g., A, B, C, D.
    """
    for char in [',', '.', '!', '?', ';', ':', "'"]:
        response = response.strip(char)
    response = " " + response + " "  # add space to avoid partial match

    index_ans = True
    ans_with_brack = False
    candidates = []
    for choice in all_choices:  # e.g., (A) (B) (C) (D) A) B) C) D)
        if f'({choice})' in response or f'{choice})' in response:
            candidates.append(choice)
            ans_with_brack = True

    if len(candidates) == 0:
        for choice in all_choices:  # e.g., A B C D
            if f' {choice} ' in response:
                candidates.append(choice)

    # if all above doesn't get candidates, check if the content is larger than 5 tokens and try to parse the example
    if len(candidates) == 0 and len(response.split()) > 5:
        for index, ans in index2ans.items():
            if ans.lower() in response.lower():
                candidates.append(index)
                index_ans = False  # it's content ans.

    if len(candidates) == 0:  # still not get answer, randomly choose one.
        pred_index = all_choices[0]
    elif len(candidates) > 1:
        start_indexes = []
        if index_ans:
            if ans_with_brack:
                for can in candidates:
                    index = response.rfind(f'({can})')
                    start_indexes.append(index)  # -1 will be ignored anyway
            else:
                for can in candidates:
                    index = response.rfind(f" {can} ")
                    start_indexes.append(index)
        else:
            for can in candidates:
                index = response.lower().rfind(index2ans[can].lower())
                start_indexes.append(index)
        # get the last one
        pred_index = candidates[np.argmax(start_indexes)]
    else:  # if only one candidate, use it.
        pred_index = candidates[0]

    return pred_index


# ----------- Process Open -------------
def check_is_number(string):
    """
    Check if the given string a number.
    """
    try:
        float(string.replace(',', ''))
        return True
    except ValueError:
        # check if there's comma inside
        return False


def normalize_str(string):
    """
    Normalize the str to lower case and make them float numbers if possible.
    """
    # check if characters in the string

    # if number, numerize it.
    string = string.strip()

    is_number = check_is_number(string)

    if is_number:
        string = string.replace(',', '')
        string = float(string)
        # leave 2 decimal
        string = round(string, 2)
        return [string]
    else:  # it's likely to be a string
        # lower it
        string = string.lower()
        if len(string) == 1:
            return [" " + string, string + " "]  # avoid trivial matches
        return [string]


def extract_numbers(string):
    """
    Exact all forms of numbers from a string with regex.
    """
    # Pattern for numbers with commas
    pattern_commas = r'-?\b\d{1,3}(?:,\d{3})+\b'
    # Pattern for scientific notation
    pattern_scientific = r'-?\d+(?:\.\d+)?[eE][+-]?\d+'
    # Pattern for simple numbers without commas
    pattern_simple = r'-?(?:\d+\.\d+|\.\d+|\d+\b)(?![eE][+-]?\d+)(?![,\d])'

    # Extract numbers with commas
    numbers_with_commas = re.findall(pattern_commas, string)
    # Extract numbers in scientific notation
    numbers_scientific = re.findall(pattern_scientific, string)
    # Extract simple numbers without commas
    numbers_simple = re.findall(pattern_simple, string)

    # Combine all extracted numbers
    all_numbers = numbers_with_commas + numbers_scientific + numbers_simple
    return all_numbers


def parse_open_response(response):
    """
    Parse the prediction from the generated response.
    Return a list of predicted strings or numbers.
    """

    # content = content.strip("\n").strip(".").strip(" ")
    def get_key_subresponses(response):
        key_responses = []
        response = response.strip().strip(".").lower()
        sub_responses = re.split(r'\.\s(?=[A-Z])|\n', response)
        indicators_of_keys = ['could be ', 'so ', 'is ',
                              'thus ', 'therefore ', 'final ', 'answer ', 'result ']
        key_responses = []
        for index, resp in enumerate(sub_responses):
            # if last one, accept it's an equation (the entire response can be just one sentence with equation)
            if index == len(sub_responses) - 1:
                indicators_of_keys.extend(['='])
            shortest_key_response = None  # the shortest response that may contain the answer (tail part of the response)
            for indicator in indicators_of_keys:
                if indicator in resp:
                    if not shortest_key_response:
                        shortest_key_response = resp.split(indicator)[-1].strip()
                    else:
                        if len(resp.split(indicator)[-1].strip()) < len(shortest_key_response):
                            shortest_key_response = resp.split(indicator)[-1].strip()

            if shortest_key_response:
                # and it's not trivial
                if shortest_key_response.strip() not in [":", ",", ".", "!", "?", ";", ":", "'"]:
                    key_responses.append(shortest_key_response)
        if len(key_responses) == 0:  # did not found any
            return [response]
        return key_responses

    # pdb.set_trace()
    key_responses = get_key_subresponses(response)

    pred_list = key_responses.copy()  # keep the original string response
    for resp in key_responses:
        pred_list.extend(extract_numbers(resp))

    tmp_pred_list = []
    for i in range(len(pred_list)):
        tmp_pred_list.extend(normalize_str(pred_list[i]))
    pred_list = tmp_pred_list

    # remove duplicates
    pred_list = list(set(pred_list))

    return pred_list


# ----------- Evaluation -------------

def eval_multi_choice(gold_i, pred_i):
    """
    Evaluate a multiple choice instance.
    """
    correct = False
    # only they are exactly the same, we consider it as correct
    if isinstance(gold_i, list):
        for answer in gold_i:
            if answer == pred_i:
                correct = True
                break
    else:  # gold_i is a string
        if gold_i == pred_i:
            correct = True
    return correct


def eval_open(gold_i, pred_i):
    """
    Evaluate an open question instance
    """
    correct = False
    if isinstance(gold_i, list):
        # use float to avoid trivial matches
        norm_answers = []
        for answer in gold_i:
            norm_answers.extend(normalize_str(answer))
    else:
        norm_answers = normalize_str(gold_i)
    for pred in pred_i:  # pred is already normalized in parse response phase
        if isinstance(pred, str):  # if it's a string, then find if ans in the pred_i
            for norm_ans in norm_answers:
                # only see if the string answer in the string pred
                if isinstance(norm_ans, str) and norm_ans in pred:
                    if not correct:
                        correct = True
                    break
        else:  # it's a float number
            if pred in norm_answers:
                if not correct:
                    correct = True
                break
    return correct


# ----------- Batch Evaluation -------------
def evaluate(samples):
    """
    Batch evaluation for multiple choice and open questions.
    """
    pred_correct = 0
    judge_dict = dict()
    for sample in samples:
        gold_i = sample['answer']
        pred_i = sample['parsed_pred']
        if sample['question_type'] == 'multiple-choice':
            correct = eval_multi_choice(gold_i, pred_i)
        else:  # open question
            correct = eval_open(gold_i, pred_i)

        if correct:
            judge_dict[sample['id']] = 'Correct'
            pred_correct += 1
        else:
            judge_dict[sample['id']] = 'Wrong'

    if len(samples) == 0:
        return {'acc': 0}
    return judge_dict, {'acc': pred_correct / len(samples)}


# ----------- Calculate Accuracy -------------
def calculate_ins_level_acc(results: Dict):
    """Calculate the instruction level accuracy for given Subject results"""
    acc = 0
    ins_num = 0
    for cat_results in results.values():
        acc += cat_results['acc'] * cat_results['num_example']
        ins_num += cat_results['num_example']
    if ins_num == 0:
        return 0
    return acc / ins_num


def mmmu_main_eval(output_dict, task_cfg):
    answer_dict = json.load(open(task_cfg["answer_dict"]))

    # group by category
    output_dict_w_cat = {}
    for data_id, parsed_pred in output_dict.items():
        category = "_".join(data_id.split("_")[1:-1])
        if category not in output_dict_w_cat:
            output_dict_w_cat.update({category: {}})
        output_dict_w_cat[category].update({data_id: parsed_pred})

    # group by category
    answer_dict_w_cat = {}
    for data_id, parsed_pred in answer_dict.items():
        category = "_".join(data_id.split("_")[1:-1])
        if category not in answer_dict_w_cat:
            answer_dict_w_cat.update({category: {}})
        answer_dict_w_cat[category].update({data_id: parsed_pred})

    evaluation_result = {}

    for category in CAT_SHORT2LONG.values():
        # print("Evaluating: {}".format(category))
        # get cat_outputs and cat_answers
        try:
            cat_outputs = output_dict_w_cat[category]
            cat_answers = answer_dict_w_cat[category]
        except KeyError:
            print("Skipping {} for not found".format(category))
            continue

        exampels_to_eval = []
        for data_id, parsed_pred in cat_outputs.items():
            question_type = cat_answers[data_id]['question_type']
            if question_type != 'multiple-choice':
                parsed_pred = parse_open_response(parsed_pred)  # mainly for type consistency (make it number, etc.)
            else:
                parsed_pred = parsed_pred

            exampels_to_eval.append({
                "id": data_id,
                "question_type": question_type,
                "answer": cat_answers[data_id]['ground_truth'],
                "parsed_pred": parsed_pred
            })

        judge_dict, metric_dict = evaluate(exampels_to_eval)
        metric_dict.update({"num_example": len(exampels_to_eval)})

        evaluation_result[category] = metric_dict

    printable_results = {}
    # pdb.set_trace()
    # add domain Subject
    for domain, in_domain_cats in DOMAIN_CAT2SUB_CAT.items():
        in_domain_cat_results = {}
        for cat_name in in_domain_cats:  # use the order in DOMAIN_CAT2SUB_CAT
            if cat_name in evaluation_result.keys():
                in_domain_cat_results[cat_name] = evaluation_result[cat_name]
            else:
                pass
        in_domain_ins_acc = calculate_ins_level_acc(in_domain_cat_results)
        in_domain_data_num = sum([cat_results['num_example'] for cat_results in in_domain_cat_results.values()])
        printable_results['Overall-' + domain] = {"num": int(in_domain_data_num),
                                                  "acc": round(in_domain_ins_acc, 4)
                                                  }
        # add sub category
        for cat_name, cat_results in in_domain_cat_results.items():
            printable_results[cat_name] = {"num": int(cat_results['num_example']),
                                           "acc": round(cat_results['acc'], 4)
                                           }

    # table.append(["-----------------------------", "-----", "----"])
    all_ins_acc = calculate_ins_level_acc(evaluation_result)
    printable_results['Overall'] = {
        "num": sum([cat_results['num_example'] for cat_results in evaluation_result.values()]),
        "acc": round(all_ins_acc, 4)
        }

    return printable_results


if __name__ == '__main__':
    tasks = yaml.safe_load(open("eval_config/eval_mmmu_yi.yaml"))['datasets']
    print(tasks)

    with open("eval_results.json") as f:
        merged_results = json.load(f)

    eval_samples = []
    eval_output_dict = {}
    for res in merged_results:
        pred_ans = res["answer"].upper()
        gt_ans = res['gt_answer']
        if res['question_type'] == 'multiple-choice':
            parsed_pred = parse_multi_choice_response(pred_ans, res['all_choices'], res['index2ans'])
            if pred_ans != parsed_pred:
                print(f"MC: Original: {pred_ans}, Parsed: {parsed_pred}")
            eval_samples.append(
                {
                    'id': res['question_id'],
                    'question_type': res['question_type'],
                    'answer': res['gt_answer'],  # the content in option, not answer index.
                    'response': pred_ans,
                    'parsed_pred': parsed_pred,
                    'index2ans': res['index2ans'],
                }
            )
            eval_output_dict[res['question_id']] = parsed_pred
        else:
            parsed_pred = parse_open_response(pred_ans)
            if pred_ans != parsed_pred:
                print(f"Open: Original: {pred_ans}, Parsed: {parsed_pred}")
            eval_samples.append(
                {
                    'id': res['question_id'],
                    'question_type': res['question_type'],
                    'answer': res['gt_answer'],
                    'response': pred_ans,
                    'parsed_pred': parsed_pred,
                }
            )
            eval_output_dict[res['question_id']] = pred_ans

    json.dump(eval_output_dict, open("validation_mmmu_iter6000_merged.0.53.sorted.json", "w"), indent=4, sort_keys=True)


    x = mmmu_main_eval(eval_output_dict,
                   task_cfg=tasks['mmmu'])

    print(x)

================================================
FILE: examples/multimodal/image_processing.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved. Except portions as noted which are Copyright (c) 2023 OpenGVLab and licensed under the MIT license found in LICENSE.
from torchvision import transforms as T
from torchvision.transforms import Compose
from torchvision.transforms.functional import InterpolationMode


IMAGENET_PIXEL_MEAN = [0.485, 0.456, 0.406]
IMAGENET_PIXEL_STD = [0.229, 0.224, 0.225]
SIGLIP_PIXEL_MEAN = [0.5, 0.5, 0.5]
SIGLIP_PIXEL_STD = [0.5, 0.5, 0.5]
CLIP_PIXEL_MEAN = [0.48145466, 0.4578275, 0.40821073]
CLIP_PIXEL_STD = [0.26862954, 0.26130258, 0.27577711]
RADIO_G_PIXEL_MEAN = [0.4850, 0.4560, 0.4060]
RADIO_G_PIXEL_STD = [0.2230, 0.2240, 0.2250]


pixel_statistics = {
    "clip": (CLIP_PIXEL_MEAN, CLIP_PIXEL_STD),
    "siglip": (SIGLIP_PIXEL_MEAN, SIGLIP_PIXEL_STD),
    "internvit": (IMAGENET_PIXEL_MEAN, IMAGENET_PIXEL_STD),
    "radio": (CLIP_PIXEL_MEAN, CLIP_PIXEL_STD),
    "radio-g": (RADIO_G_PIXEL_MEAN, RADIO_G_PIXEL_STD),
    "cradio-g": (CLIP_PIXEL_MEAN, CLIP_PIXEL_STD),
    "internvit300M": (IMAGENET_PIXEL_MEAN, IMAGENET_PIXEL_STD),
    "huggingface": (SIGLIP_PIXEL_MEAN, SIGLIP_PIXEL_STD),
}


# From https://github.com/OpenGVLab/InternVL/blob/c62fa4f7c850165d7386bdc48ac6bc5a6fab0864/internvl_chat/internvl/train/dataset.py#L685
# Copyright (c) 2023 OpenGVLab.
def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
    best_ratio_diff = float('inf')
    best_ratio = (1, 1)
    area = width * height
    for ratio in target_ratios:
        target_aspect_ratio = ratio[0] / ratio[1]
        ratio_diff = abs(aspect_ratio - target_aspect_ratio)
        if ratio_diff < best_ratio_diff:
            best_ratio_diff = ratio_diff
            best_ratio = ratio
        elif ratio_diff == best_ratio_diff:
            if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
                best_ratio = ratio
    return best_ratio


def find_closest_area_weighted_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
    """
    Find the best number of tiles based on the aspect ratio and the area covered by the tiles.
    """
    best_factor = float('-inf')
    best_ratio = (1, 1)
    area = width * height
    for ratio in target_ratios:
        target_aspect_ratio = ratio[0] / ratio[1]
        factor_based_on_area_n_ratio = (
            min((ratio[0]*ratio[1]*image_size*image_size)/ area, 0.6) *
            min(target_aspect_ratio/aspect_ratio, aspect_ratio/target_aspect_ratio))
        if factor_based_on_area_n_ratio > best_factor:
            best_factor = factor_based_on_area_n_ratio
            best_ratio = ratio
    return best_ratio


class ImageTransform:
    """Image transformation."""

    def __init__(self, input_size, vision_model_type):
        self._transform = _build_transform(input_size, vision_model_type)
        self._vision_model_type = vision_model_type

    def __call__(self, img, img_h, img_w, use_tiling=False, max_num_tiles=1, use_thumbnail=False, augment=False, find_closest_aspect_ratio_fn=find_closest_aspect_ratio):
        assert not augment, "Image augmentation not implemented."
        if use_tiling:
            assert img_h == img_w, "dynamic tiling expects equal tile height and width"
            imgs = dynamic_preprocess(
                img, min_num=1, max_num=max_num_tiles, image_size=img_h, use_thumbnail=use_thumbnail,
                find_closest_aspect_ratio_fn=find_closest_aspect_ratio_fn)
            imgs = [self._transform(img) for img in imgs]
        else:
            imgs = [self._transform(img)]

        return imgs


# From https://github.com/OpenGVLab/InternVL/blob/c62fa4f7c850165d7386bdc48ac6bc5a6fab0864/internvl_chat/internvl/train/dataset.py#L702
# Copyright (c) 2023 OpenGVLab.
def dynamic_preprocess(
    image, min_num=1, max_num=6, image_size=448, use_thumbnail=False,
    find_closest_aspect_ratio_fn=find_closest_aspect_ratio):
    orig_width, orig_height = image.size
    aspect_ratio = orig_width / orig_height

    # calculate the existing image aspect ratio
    target_ratios = set(
        (i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if
        i * j <= max_num and i * j >= min_num)
    target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])

    # find the closest aspect ratio to the target
    target_aspect_ratio = find_closest_aspect_ratio_fn(
        aspect_ratio, target_ratios, orig_width, orig_height, image_size)

    # calculate the target width and height
    target_width = image_size * target_aspect_ratio[0]
    target_height = image_size * target_aspect_ratio[1]
    blocks = target_aspect_ratio[0] * target_aspect_ratio[1]

    # resize the image
    resized_img = image.resize((target_width, target_height))
    processed_images = []
    for i in range(blocks):
        box = (
            (i % (target_width // image_size)) * image_size,
            (i // (target_width // image_size)) * image_size,
            ((i % (target_width // image_size)) + 1) * image_size,
            ((i // (target_width // image_size)) + 1) * image_size
        )
        # split the image
        split_img = resized_img.crop(box)
        processed_images.append(split_img)
    assert len(processed_images) == blocks
    if use_thumbnail and len(processed_images) != 1:
        thumbnail_img = image.resize((image_size, image_size))
        processed_images.append(thumbnail_img)
    return processed_images


# Based on https://github.com/openai/CLIP/blob/dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1/clip/clip.py#L79
# and https://github.com/OpenGVLab/InternVL/blob/aa521e6eb1df4cf153aa4118fcf13e673c055d46/internvl_chat/internvl/train/dataset.py#L276
def _build_transform(input_size, vision_model_type):
    if vision_model_type in ("siglip", "internvit", "internvit300M", "radio", "radio-g", "cradio-g"):
        pixel_mean, pixel_std = pixel_statistics[vision_model_type]

        transform = T.Compose([
            T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
            T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
            T.ToTensor(),
            T.Normalize(mean=pixel_mean, std=pixel_std)
        ])
    elif vision_model_type == "clip":
        pixel_mean, pixel_std = pixel_statistics[vision_model_type]

        transform = Compose([
            T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
            T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
            T.ToTensor(),
            T.Normalize(mean=pixel_mean, std=pixel_std),
        ])
    elif vision_model_type.startswith("hf://"):
        from megatron.core.models.huggingface.module import get_hf_model_type

        model_type = get_hf_model_type(vision_model_type)
        if "siglip" in model_type:
            from transformers.models.siglip.image_processing_siglip import SiglipImageProcessor

            processor = SiglipImageProcessor(size={"height": input_size, "width": input_size})

            def transform(x):
                x = x.convert("RGB") if x.mode != "RGB" else x
                x = processor(x, return_tensors="pt")
                return x["pixel_values"][0]
        else:
            raise NotImplementedError(f"image processing not defined for huggingface model {vision_model_type}")
    else:
        raise NotImplementedError(f"image processing not defined for vision model {vision_model_type}")

    return transform


================================================
FILE: examples/multimodal/layer_scaling.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
from functools import partial

import torch

from megatron.core.transformer.transformer_layer import TransformerLayer
from megatron.core.typed_torch import copy_signature


def _bias_dropout_add_func_layer_scaling(ls, x_with_bias, residual, prob, training):
    x, bias = x_with_bias  # unpack
    residual = residual if residual.dtype == x.dtype else residual.to(x.dtype)
    if bias is not None:
        x = x + bias
        out = torch.nn.functional.dropout(x, p=prob, training=training)
        out = residual + out * ls
        return out
    else:
        out = torch.nn.functional.dropout(x, p=prob, training=training)
        out = residual + out * ls
        return out


def bias_dropout_add_unfused_layer_scaling(ls, training):
    """Bias-dropout-add as in Megatron but with added LayerScaling handling."""

    def _bias_dropout_add(x_with_bias, residual, prob):
        return _bias_dropout_add_func_layer_scaling(ls, x_with_bias, residual, prob, training)

    return _bias_dropout_add


def get_bias_dropout_add_layer_scaling(ls, training, fused):
    """Bias-dropout-add as in Megatron but with added LayerScaling handling."""
    assert not fused, "Fused bias-dropout-add not implemented for LayerScaling."
    return bias_dropout_add_unfused_layer_scaling(ls, training)


# Add LayerScaling to our default TransformerLayer.
class LayerScalingTransformerLayer(TransformerLayer):
    @copy_signature(TransformerLayer.__init__)
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.ls1 = torch.nn.Parameter(torch.ones(self.config.hidden_size))
        self.ls2 = torch.nn.Parameter(torch.ones(self.config.hidden_size))

        self.self_attn_bda = partial(self.self_attn_bda, self.ls1)
        self.mlp_bda = partial(self.mlp_bda, self.ls2)


================================================
FILE: examples/multimodal/layer_specs.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import torch

from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
from megatron.core.ssm.mamba_block import MambaStack, MambaStackSubmodules
from megatron.core.ssm.mamba_layer import MambaLayer, MambaLayerSubmodules
from megatron.core.ssm.mamba_mixer import MambaMixer, MambaMixerSubmodules
from megatron.core.ssm.mlp_layer import MLPLayer
from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
from megatron.core.transformer.dot_product_attention import DotProductAttention
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.identity_op import IdentityOp
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules
from megatron.core.typed_torch import not_none
from megatron.core.extensions.transformer_engine import HAVE_TE

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import (
        TEColumnParallelLinear,
        TEDotProductAttention,
        TELayerNormColumnParallelLinear,
        TENorm,
        TERowParallelLinear,
    )
else:
    (
        TEColumnParallelLinear,
        TEDotProductAttention,
        TELayerNormColumnParallelLinear,
        TENorm,
        TERowParallelLinear,
    ) = (None, None, None, None, None)

try:
    import apex

    from megatron.core.fusions.fused_layer_norm import FusedLayerNorm
    from megatron.core.transformer.torch_norm import WrappedTorchNorm

    HAVE_APEX = True
    LNImpl = FusedLayerNorm
except ImportError:
    import warnings

    from megatron.core.transformer.torch_norm import WrappedTorchNorm

    warnings.warn(f'Apex is not installed. Falling back to Torch Norm')
    LNImpl = WrappedTorchNorm


def get_layer_spec(is_vit, normalization) -> ModuleSpec:
    attn_mask_type = AttnMaskType.no_mask if is_vit else AttnMaskType.causal
    if normalization == "LayerNorm":
        norm = LNImpl
    elif normalization == "RMSNorm":
        if HAVE_TE:
            norm = TENorm
        else:
            version = torch.__version__.split('.')
            version_geq_2_4 = int(TORCH_VERSION[0]) > 2 or (
                int(TORCH_VERSION[0]) == 2 and int(TORCH_VERSION[1]) >= 4
            )
            assert version_geq_2_4, "Torch version >= 2.4.0 is required for RMSNorm"
            if HAVE_APEX:
                warnings.warn(f'Apex does not support RMSNorm. Falling back to Torch Norm')
            norm = WrappedTorchNorm
    else:
        raise RuntimeError("unknown normalization", normalization)

    mlp = get_mlp_module_spec(use_te=False)  # doesn't include norm.

    return ModuleSpec(
        module=TransformerLayer,
        submodules=TransformerLayerSubmodules(
            input_layernorm=not_none(norm),
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": attn_mask_type},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=ColumnParallelLinear,
                    core_attention=DotProductAttention,
                    linear_proj=RowParallelLinear,
                    q_layernorm=IdentityOp,
                    k_layernorm=IdentityOp,
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_mlp_layernorm=not_none(norm),
            mlp=mlp,
            mlp_bda=get_bias_dropout_add,
        ),
    )


def get_layer_spec_te(is_vit=False, padding=False) -> ModuleSpec:
    attn_mask_type = AttnMaskType.no_mask if is_vit else AttnMaskType.causal
    # Padding mask is needed for e.g. Context Parallel.
    if padding:
        assert not is_vit, "padding_causal mask not used with ViT"
        attn_mask_type = AttnMaskType.padding_causal

    mlp = get_norm_mlp_module_spec_te()
    return ModuleSpec(
        module=TransformerLayer,
        submodules=TransformerLayerSubmodules(
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": attn_mask_type},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=not_none(TELayerNormColumnParallelLinear),
                    core_attention=not_none(TEDotProductAttention),
                    linear_proj=TERowParallelLinear,
                    q_layernorm=IdentityOp,
                    k_layernorm=IdentityOp,
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_mlp_layernorm=IdentityOp,
            mlp=mlp,
            mlp_bda=get_bias_dropout_add,
        ),
    )


def get_mamba_layer_spec_te(padding=False) -> ModuleSpec:
    attn_mask_type = AttnMaskType.causal
    # Padding mask is needed for e.g. Context Parallel.
    if padding:
        attn_mask_type = AttnMaskType.padding_causal

    return ModuleSpec(
        module=MambaStack,
        submodules=MambaStackSubmodules(
            mamba_layer=ModuleSpec(
                module=MambaLayer,
                submodules=MambaLayerSubmodules(
                    mixer=ModuleSpec(
                        module=MambaMixer,
                        submodules=MambaMixerSubmodules(
                            in_proj=TELayerNormColumnParallelLinear, out_proj=TERowParallelLinear
                        ),
                    ),
                    mamba_bda=get_bias_dropout_add,
                ),
            ),
            # Started with spec from gpt_layer_specs.py (with MLP removed)
            # Using the TE spec because we had problems getting the non-TE spec
            # working
            attention_layer=ModuleSpec(
                module=TransformerLayer,
                submodules=TransformerLayerSubmodules(
                    self_attention=ModuleSpec(
                        module=SelfAttention,
                        params={"attn_mask_type": attn_mask_type},
                        submodules=SelfAttentionSubmodules(
                            linear_qkv=not_none(TELayerNormColumnParallelLinear),
                            core_attention=not_none(TEDotProductAttention),
                            linear_proj=TERowParallelLinear,
                        ),
                    ),
                    self_attn_bda=get_bias_dropout_add,
                ),
            ),
            # Started with spec from gpt_layer_specs.py
            # Using the TE spec because we had problems getting the non-TE spec
            # working
            mlp_layer=ModuleSpec(
                module=MLPLayer,
                submodules=TransformerLayerSubmodules(
                    mlp=ModuleSpec(
                        module=MLP,
                        submodules=MLPSubmodules(
                            linear_fc1=not_none(TELayerNormColumnParallelLinear),
                            linear_fc2=not_none(TERowParallelLinear),
                        ),
                    ),
                    mlp_bda=get_bias_dropout_add,
                ),
            ),
        ),
    )


def get_mlp_module_spec(use_te: bool = True) -> ModuleSpec:
    # Dense MLP w/ or w/o TE modules.
    return ModuleSpec(
        module=MLP,
        submodules=MLPSubmodules(
            linear_fc1=not_none(TEColumnParallelLinear) if use_te else ColumnParallelLinear,
            linear_fc2=not_none(TERowParallelLinear) if use_te else RowParallelLinear,
        ),
    )


def get_norm_mlp_module_spec_te() -> ModuleSpec:
    return ModuleSpec(
        module=MLP,
        submodules=MLPSubmodules(
            linear_fc1=not_none(TELayerNormColumnParallelLinear),
            linear_fc2=not_none(TERowParallelLinear),
        ),
    )


================================================
FILE: examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/Dockerfile
================================================
FROM nvcr.io/nvidia/pytorch:25.04-py3

RUN apt update && \
    apt -y upgrade && \
    apt install -y --no-install-recommends \
        software-properties-common \
        build-essential \
        python3-pip \
        python3-dev \
        bash \
        git \
        vim \
        python-is-python3 \
        default-jre \
        net-tools \
        wget \
        curl \
        rsync \
        zip \
        unzip \
        htop \
        tmux \
        bmon

RUN pip install --upgrade pip
RUN git clone https://github.com/Dao-AILab/causal-conv1d.git && cd causal-conv1d && git checkout && CAUSAL_CONV1D_FORCE_BUILD=TRUE pip install . --no-build-isolation
RUN git clone https://github.com/state-spaces/mamba.git && cd mamba && git checkout && MAMBA_FORCE_BUILD=TRUE pip install . --no-build-isolation
RUN pip install numpy
RUN pip install einops einops-exts sentencepiece braceexpand webdataset packaging
RUN pip install transformers datasets accelerate timm
RUN pip install pytest-cov pytest_mock nltk wrapt
RUN pip install black isort pylint mypy click
RUN pip install mistral-common tiktoken
RUN pip install git+https://github.com/openai/CLIP.git
RUN pip install fairscale fire blobfile
# Use --no-deps for the following to avoid outdated and unnecessary dependencies.
RUN pip install mmf --no-deps
RUN pip install open_clip_torch open-flamingo[eval] --no-deps
RUN pip install "tensorstore==0.1.45"
RUN pip install git+https://github.com/NVIDIA/Megatron-Energon.git#egg=megatron-energon[av_decode]


================================================
FILE: examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/README.md
================================================
# Llama-3.1-Nemotron-Nano-VL-8B-V1

See [Hugging Face](https://huggingface.co/nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1) for details.

# Checkpoints

[HuggingFace version](https://huggingface.co/nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1)

[Megatron-Core version](https://huggingface.co/nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1-mcore)

# Setup

## Docker image

See `examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/Dockerfile`.

## Dataset preparation

We use [Megatron Energon](https://github.com/NVIDIA/Megatron-Energon) for multimodal dataloading.

## Model

You can download trained tensor parallel size 1 and 4 Megatron checkpoints [here](https://huggingface.co/nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1-mcore).
Alternatively, you can follow the steps in [Model conversion](#model-conversion) and [Training](#training) below to prepare a model
and run pretraining and SFT from scratch using a prepared dataset.

### Model conversion

#### Language model conversion

We start from [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) from HuggingFace.
Please download it and run the following command to convert it to Megatron format.
```
export LLAMA_DOWNLOAD_DIR=<downloaded hf model directory>
CUDA_DEVICE_MAX_CONNECTIONS=1 python tools/checkpoint/convert.py --bf16 --model-type GPT --loader llama_mistral --saver core \
    --target-tensor-parallel-size 4 --checkpoint-type hf \
    --load-dir $LLAMA_DOWNLOAD_DIR --save-dir llama3p1 --tokenizer-model $LLAMA_DOWNLOAD_DIR \
    --saver-transformer-impl transformer_engine --model-size llama3
```

#### Vision model conversion

You can run the following command to convert RADIO to an mcore compatible format:
```
python examples/multimodal/model_converter/radio_converter.py --output radio_tp_4 --tensor-parallel-size 4 --use-te \
    --version c-radio_v2-vlm-h --model-type radio_v2.5-h
```

#### Combined checkpoint

Combine the language and vision model by running:
```
examples/multimodal/combine_lm_vision_checkpoints.sh <language model directory> <vision model directory> <output directory>
```

# Training

1. Pretraining: we provide an example pretraining script at `examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/pretraining_llama_3p1_nemotron_nano_vl_8b_v1.sh`.
2. SFT: we provide an example SFT script at `examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/sft_llama_3p1_nemotron_nano_vl_8b_v1.sh`.

# Inference and evaluation

To run a simple inference example:
```
export LLAMA_NEMOTRON_NANO_VL_PATH=<path to the megatron tp=4 checkpoint>
examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/text_generation.sh --model-path $LLAMA_NEMOTRON_NANO_VL_PATH \
    --task inference --output-path inference-example --tensor-model-parallel-size 4
```

To evaluate the model, you can change `--task` to `MMMU` or `TextVQA`, for example.


================================================
FILE: examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/pretraining_llama_3p1_nemotron_nano_vl_8b_v1.sh
================================================
#!/bin/bash

# Your SBATCH commands here if using SLURM.

# Please launch this script from megatron-lm root.

# Train a multimodal model.

export CUDA_DEVICE_MAX_CONNECTIONS=1

USER=$SLURM_JOB_USER

# Auto-detect batch or interactive mode.
which srun
BATCH=$((1-$?))

DEBUG=0
USE_TILING=1

# Remember to update model and job name if running in batch mode!!
if [[ $BATCH -eq 0 ]]; then
    DATETIME=`date +'%y-%m-%d-%H-%M-%S'`
    MODEL_NAME="interactive_pretraining_llama_3p1_nemotron_nano_vl_8b_v1_${DATETIME}"
    SPECIAL_TOKENS="--special-tokens <image> <img> </img> <quad> </quad> <ref> </ref> <box> </box>"
    DEBUG=1
else
    MODEL_NAME="pretraining_llama_3p1_nemotron_nano_vl_8b_v1"
    SPECIAL_TOKENS="--special-tokens \<image\> \<img\> \</img\> \<quad\> \</quad\> \<ref\> \</ref\> \<box\> \</box\>"
fi

WORKSPACE="<some dir>"
SOURCE=`pwd`
OUTPUT_BASE="${WORKSPACE}/output"
OUTPUT="${OUTPUT_BASE}/${MODEL_NAME}"

FINETUNE_DIR=${OUTPUT}/checkpoints
LOGS_DIR="${OUTPUT}/logs"
TENSORBOARD_DIR="${OUTPUT}/tensorboard"

TP=4

CHECKPOINT_DIR="${WORKSPACE}/output/${LOAD_NAME}/checkpoints"

DATA_TRAIN="${SOURCE}/examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/pretrain_blend.yaml"

if [[ $DEBUG -eq 1 ]]; then
    MBZ=1
    BZ=1
    NW=0
    AD=0.0
    HD=0.0
    LI=1

    NONDETERMINISTIC_ATTN=1

    NUM_GPU=4
    export CUDA_VISIBLE_DEVICES=0,1,2,3
else
    MBZ=1
    BZ=1024
    NW=8
    AD=0.0
    HD=0.0
    LI=5
    EXTRA_ARGS=""
    NONDETERMINISTIC_ATTN=1
    NUM_GPU=8
fi

SEQ_LEN=1024
DECODER_SEQ_LEN=4096

if [[ $USE_TILING -eq 1 ]]; then
    EXTRA_ARGS+=" --pixel-shuffle --use-tiling --max-num-tiles 12 --use-thumbnail"
    SEQ_LEN=256
fi

OPTIONS=" \
    --use-checkpoint-args \
    --disable-bias-linear \
    --tokenizer-type MultimodalTokenizer \
    --tokenizer-model meta-llama/Llama-3.1-8B-Instruct \
    --transformer-impl transformer_engine \
    --normalization RMSNorm \
    --group-query-attention \
    --num-query-groups 8 \
    --no-masked-softmax-fusion \
    --attention-softmax-in-fp32 \
    --attention-dropout ${AD} \
    --hidden-dropout ${HD} \
    --untie-embeddings-and-output-weights \
    --position-embedding-type rope \
    --rotary-percent 1.0 \
    --rotary-base 500000 \
    --use-rope-scaling \
    --swiglu \
    --tensor-model-parallel-size ${TP}  \
    --pipeline-model-parallel-size 1  \
    --num-layers 32 \
    --hidden-size 4096 \
    --ffn-hidden-size 14336 \
    --num-attention-heads 32 \
    --use-distributed-optimizer \
    --use-te \
    --num-workers ${NW} \
    --exit-duration-in-mins 230 \
    --seq-length ${SEQ_LEN} \
    --decoder-seq-length ${DECODER_SEQ_LEN} \
    --max-position-embeddings 131072 \
    --train-samples 1491231 \
    --lr-warmup-samples 102400 \
    --micro-batch-size ${MBZ} \
    --global-batch-size ${BZ} \
    --lr 2e-4 \
    --min-lr 0.0 \
    --lr-decay-style cosine \
    --log-interval ${LI} \
    --eval-iters 10 \
    --eval-interval 500 \
    --data-path ${DATA_TRAIN} \
    --prompt-path ${SOURCE}/examples/multimodal/manual_prompts.json \
    --save-interval 5000 \
    --save ${FINETUNE_DIR} \
    --load ${FINETUNE_DIR} \
    --dataloader-save ${FINETUNE_DIR}/dataloader \
    --pretrained-checkpoint ${CHECKPOINT_DIR} \
    --split 100,0,0 \
    --clip-grad 1.0 \
    --weight-decay 1e-2 \
    --adam-beta1 0.9 \
    --adam-beta2 0.999 \
    --init-method-std 0.02 \
    --log-params-norm \
    --log-num-zeros-in-grad \
    --bf16 \
    --eod-mask-loss \
    --freeze-ViT \
    --freeze-LM \
    --patch-dim 16 \
    --img-h 512 \
    --img-w 512 \
    --dataloader-type external \
    --tensorboard-dir ${TENSORBOARD_DIR} \
    --language-model-type=llama3.1_8b \
    ${EXTRA_ARGS} \
    --distributed-timeout-minutes 60 \
    --allow-missing-vision-projection-checkpoint \
    --vision-model-type radio \
    --tokenizer-prompt-format llama3p1 \
    --use-loss-scaling \
    ${SPECIAL_TOKENS} \
    --ckpt-format torch \
    --image-tag-type internvl \
    --force-system-message \
    --disable-vision-class-token \
    --use-area-weighted-aspect-ratio \
    --inference-max-seq-length 32768 \
"

export NVTE_APPLY_QK_LAYER_SCALING=0
export NVTE_ALLOW_NONDETERMINISTIC_ALGO=${NONDETERMINISTIC_ATTN}

# Interactive or batch mode
if [[ $BATCH -eq 0 ]]; then
    torchrun --nproc_per_node ${NUM_GPU} examples/multimodal/train.py ${OPTIONS}
else
    run_cmd="python -u ${SOURCE}/examples/multimodal/train.py ${OPTIONS}"

    DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`

    srun -l --verbose \
    --container-image <path to docker image> \
    --container-mounts "<some mount>" \
    --output=${LOGS_DIR}/%x_%j_$DATETIME.log \
    sh -c "echo ${run_cmd}; ${run_cmd}"

    set +x
fi


================================================
FILE: examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/sft_llama_3p1_nemotron_nano_vl_8b_v1.sh
================================================
#!/bin/bash

# Your SBATCH commands here if using SLURM.

# Please launch this script from megatron-lm root.

# Train a multimodal model.

export CUDA_DEVICE_MAX_CONNECTIONS=1

USER=$SLURM_JOB_USER

# Auto-detect batch or interactive mode.
which srun
BATCH=$((1-$?))

DEBUG=0
USE_TILING=1

# Remember to update model and job name if running in batch mode!!
if [[ $BATCH -eq 0 ]]; then
    DATETIME=`date +'%y-%m-%d-%H-%M-%S'`
    MODEL_NAME="interactive_sft_llama_3p1_nemotron_nano_vl_8b_v1_${DATETIME}"
    SPECIAL_TOKENS="--special-tokens <image> <img> </img> <quad> </quad> <ref> </ref> <box> </box>"
    DEBUG=1
else
    MODEL_NAME="sft_llama_3p1_nemotron_nano_vl_8b_v1"
    SPECIAL_TOKENS="--special-tokens \<image\> \<img\> \</img\> \<quad\> \</quad\> \<ref\> \</ref\> \<box\> \</box\>"
fi

WORKSPACE="<some dir>"
SOURCE=`pwd`
OUTPUT_BASE="${WORKSPACE}/output"
OUTPUT="${OUTPUT_BASE}/${MODEL_NAME}"

FINETUNE_DIR=${OUTPUT}/checkpoints
LOGS_DIR="${OUTPUT}/logs"
TENSORBOARD_DIR="${OUTPUT}/tensorboard"

TP=4

CHECKPOINT_DIR="${WORKSPACE}/output/${LOAD_NAME}/checkpoints/pretraining_llama_3p1_nemotron_nano_vl_8b_v1"

DATA_TRAIN="${SOURCE}/examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/sft_blend.yaml"

SEQ_LEN=1024
DECODER_SEQ_LEN=16384

if [[ $DEBUG -eq 1 ]]; then
    MBZ=1
    BZ=2
    NW=0
    AD=0.0
    HD=0.0
    LI=1
    EVAL_INTERVAL=1
    NONDETERMINISTIC_ATTN=1
    NUM_GPU=8
else
    MBZ=1
    BZ=128
    NW=8
    AD=0.0
    HD=0.0
    LI=5
    EXTRA_ARGS=""
    NONDETERMINISTIC_ATTN=1
    NUM_GPU=8
    EVAL_INTERVAL=2000
fi

if [[ $USE_TILING -eq 1 ]]; then
    EXTRA_ARGS+=" --pixel-shuffle --use-tiling --max-num-tiles 12 --use-thumbnail"
    SEQ_LEN=256
fi

OPTIONS=" \
    --use-checkpoint-args \
    --disable-bias-linear \
    --tokenizer-type MultimodalTokenizer \
    --tokenizer-model meta-llama/Llama-3.1-8B-Instruct \
    --transformer-impl transformer_engine \
    --normalization RMSNorm \
    --group-query-attention \
    --num-query-groups 8 \
    --no-masked-softmax-fusion \
    --attention-softmax-in-fp32 \
    --attention-dropout ${AD} \
    --hidden-dropout ${HD} \
    --untie-embeddings-and-output-weights \
    --position-embedding-type rope \
    --rotary-percent 1.0 \
    --rotary-base 500000 \
    --use-rope-scaling \
    --swiglu \
    --tensor-model-parallel-size ${TP}  \
    --pipeline-model-parallel-size 1 \
    --num-layers 32 \
    --hidden-size 4096 \
    --ffn-hidden-size 14336 \
    --num-attention-heads 32 \
    --use-distributed-optimizer \
    --use-te \
    --num-workers ${NW} \
    --exit-duration-in-mins 230 \
    --seq-length ${SEQ_LEN} \
    --decoder-seq-length ${DECODER_SEQ_LEN} \
    --max-position-embeddings 131072 \
    --train-samples 2494236 \
    --lr-warmup-fraction 0.03 \
    --micro-batch-size ${MBZ} \
    --global-batch-size ${BZ} \
    --lr 2e-5 \
    --min-lr 0.0 \
    --lr-decay-style cosine \
    --log-interval ${LI} \
    --eval-iters 10 \
    --eval-interval ${EVAL_INTERVAL} \
    --data-path ${DATA_TRAIN} \
    --prompt-path ${SOURCE}/examples/multimodal/manual_prompts.json \
    --save-interval 2000 \
    --save ${FINETUNE_DIR} \
    --load ${FINETUNE_DIR} \
    --pretrained-checkpoint ${CHECKPOINT_DIR} \
    --dataloader-save ${FINETUNE_DIR}/dataloader \
    --split 100,0,0 \
    --clip-grad 1.0 \
    --weight-decay 0.05 \
    --adam-beta1 0.9 \
    --adam-beta2 0.999 \
    --init-method-std 0.014 \
    --bf16 \
    --eod-mask-loss \
    --patch-dim 16 \
    --img-h 512 \
    --img-w 512 \
    --dataloader-type external \
    --tensorboard-dir ${TENSORBOARD_DIR} \
    --language-model-type=llama3.1_8b \
    ${EXTRA_ARGS} \
    --distributed-timeout-minutes 60 \
    --vision-model-type radio \
    --tokenizer-prompt-format llama3p1 \
    --use-loss-scaling \
    --packing-seq-length ${DECODER_SEQ_LEN} \
    ${SPECIAL_TOKENS} \
    --ckpt-format torch \
    --image-tag-type internvl \
    --disable-vision-class-token \
    --recompute-granularity full \
    --recompute-method block \
    --recompute-num-layers 32 \
    --recompute-vision \
    --use-area-weighted-aspect-ratio \
    --inference-max-seq-length 32768 \
"

export NVTE_APPLY_QK_LAYER_SCALING=0
export NVTE_ALLOW_NONDETERMINISTIC_ALGO=${NONDETERMINISTIC_ATTN}

# Interactive or batch mode
if [[ $BATCH -eq 0 ]]; then
    torchrun --nproc_per_node ${NUM_GPU} examples/multimodal/train.py ${OPTIONS}
else
    run_cmd="cd ${SOURCE}; python -u examples/multimodal/train.py ${OPTIONS}"

    DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`

    srun -l --verbose \
    --container-image <path to docker image> \
    --container-mounts "<some mount>" \
    --output=${LOGS_DIR}/%x_%j_$DATETIME.log \
    sh -c "${run_cmd}"

    set +x
fi


================================================
FILE: examples/multimodal/llama_3p1_nemotron_nano_vl_8b_v1/text_generation.sh
================================================
#!/bin/bash

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NVTE_APPLY_QK_LAYER_SCALING=0

INPUT_IMAGE_PATH="placeholder"
GROUNDTRUTH_PATH="placeholder"
NUM_FRAMES=1
TP=4
OUT_SEQ_LEN=1024
INFERENCE_MAX_SEQ_LEN=8192
USE_TILING=1
MAX_NUM_TILES=12

while [[ $# -gt 0 ]]; do
    case $1 in
        --tensor-model-parallel-size)
            TP="$2"
            shift
            shift
            ;;
        --input-image-path)
            INPUT_IMAGE_PATH="$2"
            shift
            shift
            ;;
        --num-frames)
            NUM_FRAMES="$2"
            shift
            shift
            ;;
        --out-seq-length)
            OUT_SEQ_LEN="$2"
            shift
            shift
            ;;
        --inference-max-seq-length)
            INFERENCE_MAX_SEQ_LEN="$2"
            shift
            shift
            ;;
        --max-num-tiles)
            MAX_NUM_TILES="$2"
            shift
            shift
            ;;
        -g|--groundtruth-path)
            GROUNDTRUTH_PATH="$2"
            shift
            shift
            ;;
        -o|--output-path)
            OUTPUT_PATH="$2"
            shift
            shift
            ;;
        -m|--model-path)
            MODEL_PATH="$2"
            shift
            shift
            ;;
        --task)
            TASK="$2"
            shift
            shift
            ;;
        -g|--gt-path)
            GROUNDTRUTH_PATH="$2"
            shift
            shift
            ;;
        -*|--*)
            echo "Invalid option $1"
            exit 1
            ;;
    esac
done

# Please modify these as needed.
NUM_PARTITIONS=0
START=0
END=0

SEQ_LEN=1024
DECODER_SEQ_LEN=16384

EXTRA_ARGS=""

if [[ $USE_TILING -eq 1 ]]; then
    EXTRA_ARGS+=" --pixel-shuffle --use-tiling --max-num-tiles ${MAX_NUM_TILES} --use-thumbnail"
    SEQ_LEN=256
fi

for PARTITION_ID in $( eval echo {$START..$END} )
do
    torchrun --nproc_per_node ${TP} examples/multimodal/run_text_generation.py \
        --attention-softmax-in-fp32 \
        --transformer-impl transformer_engine \
        --use-te \
        --use-checkpoint-args \
        --normalization RMSNorm \
        --language-model-type=llama3.1_8b \
        --untie-embeddings-and-output-weights \
        --disable-bias-linear \
        --position-embedding-type rope \
        --rotary-percent 1.0 \
        --rotary-base 500000 \
        --use-rope-scaling \
        --swiglu \
        --attention-dropout 0.0 \
        --hidden-dropout 0.0 \
        --tensor-model-parallel-size ${TP} \
        --pipeline-model-parallel-size 1 \
        --group-query-attention \
        --num-query-groups 8 \
        --num-layers 32 \
        --hidden-size 4096 \
        --ffn-hidden-size 14336 \
        --num-attention-heads 32 \
        --max-position-embeddings 131072 \
        --no-masked-softmax-fusion \
        --load ${MODEL_PATH} \
        --tokenizer-type MultimodalTokenizer \
        --tokenizer-model /lustre/fsw/portfolios/llmservice/projects/llmservice_nlp_fm/mcore_mmodal_models/models--meta-llama--Meta-Llama-3.1-8B-Instruct/snapshots/5206a32e0bd3067aef1ce90f5528ade7d866253f/ \
        --tokenizer-prompt-format llama3p1 \
        --bf16 \
        --micro-batch-size 1 \
        --seq-length ${SEQ_LEN} \
        --decoder-seq-length ${DECODER_SEQ_LEN} \
        --out-seq-length ${OUT_SEQ_LEN} \
        --inference-max-seq-length ${INFERENCE_MAX_SEQ_LEN} \
        --temperature 1.0 \
        --img-h 512 \
        --img-w 512 \
        --patch-dim 16 \
        --seed 153 \
        --top_k 1 \
        --no-load-rng \
        --no-load-optim \
        --input-image-path ${INPUT_IMAGE_PATH} \
        --num-partitions ${NUM_PARTITIONS} \
        --partition-id ${PARTITION_ID} \
        --output-path ${OUTPUT_PATH} \
        --gt-path ${GROUNDTRUTH_PATH} \
        --task ${TASK} \
        ${EXTRA_ARGS} \
        --vision-model-type radio \
        --num-frames ${NUM_FRAMES} \
        --special-tokens "<image>" "<img>" "</img>" "<quad>" "</quad>" "<ref>" "</ref>" "<box>" "</box>" \
        --ckpt-format torch \
        --image-tag-type internvl \
        --disable-vision-class-token \
        --force-system-message \
        --exit-on-missing-checkpoint
done


================================================
FILE: examples/multimodal/manual_prompts.json
================================================
{
    "COMMENT": "Sources for these prompts include https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain/viewer and https://huggingface.co/datasets/HuggingFaceM4/M3IT",
    "Captioning": {
        "raw": [
            "Can you briefly explain what you see in the image?",
            "Describe what's happening in this image in one short sentence.",
            "Write a short caption that accurately represents the content of this image.",
            "Please generate a descriptive caption for the image provided.",
            "How would you summarize the scene depicted in the picture in short?",
            "Describe the image briefly.",
            "Write a succinct description of the image, capturing its main components, the relationships between them, and any notable details.",
            "Create a concise caption that accurately describes the main elements in the image provided.",
            "Write a brief, yet comprehensive, description of the image.",
            "Describe the image in a clear and concise manner.",
            "For the given image, provide a one-sentence summary that captures the most important details.",
            "Generate a short caption for the picture.",
            "Write a short and informative description that highlights the primary subjects and actions occurring in the given image.",
            "Provide a concise and informative caption for the image, focusing on the primary subjects.",
            "Write a clear description of the image, make sure the key features are well covered.",
            "Offer a succinct explanation of the picture presented."
        ]
    },
    "CaptioningPretraining": {
        "raw": [
            "Generate a short caption of the image.",
            "Describe the image concisely.",
            "Provide a brief description of the given image."
        ],
        "llava": [
            "Give a brief description of image.",
            "Give a brief description of the image.",
            "Provide a brief description of the given image.",
            "Provide a one-sentence caption for the provided image.",
            "Write a terse but informative summary of the picture.",
            "Describe the image concisely.",
            "Generate a clear and concise summary of the photo."
        ]
    },
    "OCR": {
        "raw": [
            "Can you read the text from image and output here?",
            "Extract and document the text from the provided image.",
            "Converting the text embedded in this image into a readable document.",
            "Transcribe all the text you find.",
            "Can you extract all visible text from the image here?"
        ]
    }
}


================================================
FILE: examples/multimodal/model.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
import warnings
import logging
from copy import deepcopy

import torch
from config import get_language_model_config, get_vision_model_config, get_vision_projection_config
from layer_specs import (get_layer_spec, get_layer_spec_te, get_mlp_module_spec, get_norm_mlp_module_spec_te,
                         get_mamba_layer_spec_te)

from megatron.core.models.multimodal.llava_model import IMAGE_TOKEN, LLaVAModel
from megatron.core.models.vision.clip_vit_model import get_num_image_embeddings
from megatron.training import get_args, get_tokenizer, print_rank_0
from megatron.training.arguments import core_transformer_config_from_args
from megatron.core.utils import log_single_rank


def model_provider(
    pre_process=True, post_process=True, add_encoder=True, add_decoder=True, parallel_output=True,
    vp_stage=None, config=None, pg_collection=None
) -> LLaVAModel:
    """Builds the model.

    Args:
        pre_process (bool): Include the embedding layer in the gpt decoder (used with pipeline parallelism). Defaults to True.
        post_process (bool): Include an output layer and a layernorm in the gpt decoder (used with pipeline parallelism). Defaults to True.
        add_encoder (bool): Construct the encoder module (used with pipeline parallelism). Defaults to True. When we use pipelining, the encoder
            will live on only a subset of the pipeline stages (specifically, only the first stage).
        add_decoder (bool): Construct the decoder module (used with pipeline parallelism). Defaults to True. When we use pipelining, the decoder
            will live on only a subset of the pipeline stages (specifically, every stage after the first one).
        parallel_output (bool): Enable parallel model output.
        vp_stage: Optional virtual pipeline stage. Used with virtual pipeline parallelism.
        config: Optional transformer config. If None, will be created from args.
        pg_collection: Optional process group collection. If None, will use default.

    Returns:
        model: A multimodal model.
    """
    args = get_args()
    use_te = args.use_te

    print_rank_0('building a multimodal model ...')

    num_image_embeddings = get_num_image_embeddings(
        args.img_h,
        args.img_w,
        args.patch_dim,
        args.vision_model_type,
        args.disable_vision_class_token,
        1,
        args.pixel_shuffle,
        args.use_tile_tags,
        args.max_num_tiles,
        args.tokenizer_prompt_format
    )
    old_seq_length = args.seq_length
    args.seq_length = args.encoder_seq_length = num_image_embeddings
    if old_seq_length != args.seq_length:
        log_single_rank(
            logging.getLogger(__name__),
            logging.WARNING,
            f"Changed seq_length and encoder_seq_length (vision model sequence length) from {old_seq_length} to num_image_tokens ({num_image_embeddings})"
        )

    max_num_image_embeddings = max((args.max_num_tiles + int(args.use_thumbnail)), args.num_frames) * num_image_embeddings

    assert (
        args.decoder_seq_length is not None
    ), "Please provide --decoder-seq-length to set the language model sequence length"
    assert (
        args.decoder_seq_length > max_num_image_embeddings
    ), "Language model sequence length must be greater than the maximum number of image embeddings"
    if args.decoder_seq_length > args.max_position_embeddings:
        args.max_position_embeddings = args.decoder_seq_length
        warnings.warn(
            f"Expanded max_position_embeddings to {args.max_position_embeddings} to accommodate the maximum language model sequence length"
        )

    language_model_type = args.language_model_type
    vision_model_type = args.vision_model_type

    base_config = core_transformer_config_from_args(get_args())
    base_config.language_model_type = args.language_model_type
    base_config.vision_model_type = args.vision_model_type
    base_config.calculate_per_token_loss = True

    language_config = deepcopy(base_config)
    language_config = get_language_model_config(language_config)

    if language_model_type.startswith("hf://"):
        assert args.tensor_model_parallel_size == 1, "Huggingface models do not support --tensor-model-parallel-size > 1"
        assert args.pipeline_model_parallel_size < 2, "Huggingface models do not support --pipeline-model-parallel-size > 1"
        assert not args.sequence_parallel, "Huggingface models do not support --sequence-parallel"
        assert args.context_parallel_size < 2, "Huggingface models do not support --context-parallel-size > 1"

    if language_model_type.startswith("hf://"):
        language_transformer_layer_spec = None
    elif use_te:
        # Padding mask needed for SP/CP.
        padding = args.context_parallel_size > 1 and args.sequence_parallel
        if args.language_model_type.startswith('nemotron5-hybrid'):
            language_transformer_layer_spec = get_mamba_layer_spec_te(padding=padding)
        else:
            language_transformer_layer_spec = get_layer_spec_te(
                is_vit=False, padding=padding
            )  # TENorm detects LayerNorm/RMS automatically.
    else:
        language_transformer_layer_spec = get_layer_spec(
            is_vit=False, normalization=language_config.normalization
        )

    vision_config = deepcopy(base_config)
    vision_config = get_vision_model_config(
        vision_config, apply_query_key_layer_scaling=args.apply_query_key_layer_scaling
    )
    if vision_model_type.startswith("hf://"):
        assert not args.sequence_parallel, "Huggingface models do not support --sequence-parallel"
        assert args.context_parallel_size < 2, "Huggingface models do not support --context-parallel-size > 1"

    if vision_model_type in ["clip", "siglip", "radio", "cradio-g"]:
        if use_te:
            vision_transformer_layer_spec = get_layer_spec_te(
                is_vit=True
            )  # TENorm detects LayerNorm/RMS automatically.
        else:
            vision_transformer_layer_spec = get_layer_spec(
                is_vit=True, normalization=vision_config.normalization
            )
    elif vision_model_type == "radio-g":
        if use_te:
            from radio.radio_g import get_radio_g_layer_spec_te
            vision_transformer_layer_spec = get_radio_g_layer_spec_te()  # TENorm detects LayerNorm/RMS automatically.
        else:
            from radio.radio_g import get_radio_g_layer_spec
            vision_transformer_layer_spec = get_radio_g_layer_spec(
                normalization=vision_config.normalization
            )
    elif vision_model_type == "internvit":
        from nvlm.internvit import get_internvit_layer_spec
        vision_transformer_layer_spec = get_internvit_layer_spec(use_te=use_te)
    elif vision_model_type == "internvit300M":
        from nvlm.internvit import get_internvit300M_layer_spec
        vision_transformer_layer_spec = get_internvit300M_layer_spec(use_te=use_te)
    elif vision_model_type.startswith("hf://"):
        vision_transformer_layer_spec = None
    else:
        raise RuntimeError("unsupported vision model type", vision_model_type)

    vision_projection_config = deepcopy(base_config)

    vision_projection_config = get_vision_projection_config(
        vision_projection_config, language_config.hidden_size
    )

    # Make sure vision model pipeline parallel size is not inherited from the language model pipeline parallel size.
    vision_config.pipeline_model_parallel_size = 1
    vision_projection_config.pipeline_model_parallel_size = vision_config.pipeline_model_parallel_size

    # Make sure the vision model does not inherit first and last pipeline num layers from the language model.
    vision_config.first_pipeline_num_layers = vision_config.last_pipeline_num_layers = None

    if vision_projection_config.normalization:
        vision_projection_layer_spec = get_norm_mlp_module_spec_te().submodules
    else:
        vision_projection_layer_spec = get_mlp_module_spec(use_te=use_te).submodules

    # Toggle --recompute* for the vision and language model separately.
    if args.recompute_vision:
        if vision_config.recompute_method is not None and vision_config.recompute_granularity is not None:
            vision_config.recompute_num_layers = vision_config.num_layers
    else:
        vision_config.recompute_granularity = None
        vision_config.recompute_method = None
        vision_config.recompute_num_layers = None

    vision_projection_config.recompute_granularity = None
    vision_projection_config.recompute_method = None
    vision_projection_config.recompute_num_layers = None

    # TODO: Vision model and projection do not use SP/CP yet.
    vision_config.sequence_parallel = False
    vision_config.context_parallel_size = 1
    vision_config.tp_comm_overlap = False

    vision_projection_config.sequence_parallel = False
    vision_projection_config.context_parallel_size = 1
    vision_projection_config.tp_comm_overlap = False

    tokenizer = get_tokenizer()
    image_token_index = tokenizer.convert_tokens_to_ids(IMAGE_TOKEN)
    assert image_token_index is not None, f"IMAGE_TOKEN={IMAGE_TOKEN} needs to be added using the --special-tokens arg."

    tile_tags = _get_tile_tags(args, tokenizer)

    model = LLaVAModel(
        language_transformer_config=language_config,
        language_transformer_layer_spec=language_transformer_layer_spec,
        language_vocab_size=args.padded_vocab_size,
        language_max_sequence_length=args.decoder_seq_length,
        vision_transformer_config=vision_config,
        vision_transformer_layer_spec=vision_transformer_layer_spec,
        drop_vision_class_token=args.disable_vision_class_token,
        vision_projection_config=vision_projection_config,
        vision_projection_layer_spec=vision_projection_layer_spec,
        vision_projection_type="mlp",
        allow_missing_vision_projection_checkpoint=args.allow_missing_vision_projection_checkpoint,
        parallel_output=parallel_output,
        share_embeddings_and_output_weights=not args.untie_embeddings_and_output_weights,
        language_position_embedding_type=args.position_embedding_type,
        language_rotary_percent=args.rotary_percent,
        pre_process=pre_process,
        post_process=post_process,
        add_encoder=add_encoder,
        add_decoder=add_decoder,
        img_h=args.img_h,
        img_w=args.img_w,
        patch_dim=args.patch_dim,
        language_rotary_base=args.rotary_base,
        language_rope_scaling=args.use_rope_scaling,
        hybrid_layer_pattern=args.hybrid_layer_pattern,
        fp16_lm_cross_entropy=args.fp16_lm_cross_entropy,
        image_token_index=image_token_index,
        pixel_shuffle=args.pixel_shuffle,
        tile_tags=tile_tags,
        max_num_tiles=args.max_num_tiles,
        tokenizer_type=args.tokenizer_prompt_format,
    )

    model.freeze(
        freeze_language_model=args.freeze_LM,
        freeze_vision_model=args.freeze_ViT,
        freeze_vision_projection=False,
    )

    return model


def _get_tile_tags(args, tokenizer):
    """Tile tags are used in NVLM to surround image tiles with text tags."""
    if not args.use_tile_tags:
        return None

    # We expect the tokenized length of the tags is same.
    if args.max_num_tiles < 10:
        thumbnail_tag_text = "<tile_global_thumbnail>"
        if args.tokenizer_prompt_format == "nvlm-yi-34b":
            thumbnail_tag_text = "<tile_global>"

        if args.tokenizer_prompt_format.startswith("nemotron"):
            tile_tags_text = [f"<tile_{i:02d}>" for i in range(1, args.max_num_tiles + 1)] + [thumbnail_tag_text]
        else:
            tile_tags_text = [f"<tile_{i}>" for i in range(1, args.max_num_tiles + 1)] + [thumbnail_tag_text]
    elif args.max_num_tiles <= 12:
        thumbnail_tag_text = "<tile_global_thumbnail0>"
        if args.tokenizer_prompt_format == "nvlm-yi-34b":
            thumbnail_tag_text = "<tile_global0>"
        elif args.tokenizer_prompt_format.startswith("nemotron") or args.tokenizer_prompt_format == "llama3p1":
            thumbnail_tag_text = "<tile_global_thumbnail>"
        tile_tags_text = [f"<tile_{i:02d}>" for i in range(1, args.max_num_tiles + 1)] + [thumbnail_tag_text]
    else:
        raise ValueError("We only support max_num_tiles <= 12 when using nvlm image_tag_type")

    start_idx = 0
    if tokenizer._prompt_config.has_bos:
        start_idx = 1

    # Convert to tokens [num_tiles, tile_seq_len].
    tile_tags = [tokenizer.tokenize(t)[start_idx:] for t in tile_tags_text]

    return tile_tags


================================================
FILE: examples/multimodal/model_converter/clip_converter.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import argparse
import os

import torch

import clip


def convert(download_root, output_path, tensor_parallel_size, use_te):
    device = "cuda"

    model, _ = clip.load("ViT-L/14@336px", device=device, download_root=download_root)

    state_dict = model.state_dict()
    new_state_dicts = [{"model": dict()} for _ in range(tensor_parallel_size)]

    # Indices from mapping pytorch multihead attention to megatron.
    kv_channels = 64
    hidden_dim = 1024
    num_heads = 16
    indices = []
    for i in range(num_heads):
        lb = i * kv_channels
        ub = (i + 1) * kv_channels
        indices.append(torch.arange(lb, ub, dtype=torch.int))
        indices.append(torch.arange(hidden_dim + lb, hidden_dim + ub, dtype=torch.int))
        indices.append(torch.arange(2 * hidden_dim + lb, 2 * hidden_dim + ub, dtype=torch.int))

    indices = torch.cat(indices)

    for name, tensor in state_dict.items():
        # Skip text model.
        if "visual" not in name:
            continue

        # Skip final layers not used in our model.
        if name == "visual.proj" or "ln_post" in name:
            continue

        # Map parameter names to ones used in megatron.
        new_name = ""
        new_tensor = tensor
        if new_tensor.dtype == torch.float16:
            new_tensor = new_tensor.to(torch.float32)

        # This is used for chunking some tensors to target tensor parallel size.
        chunk_dim = None

        if "class_embedding" in name:
            new_name = "class_token"
            # Our model uses class token that is expanded to input dimensions already.
            new_tensor = new_tensor.expand(1, 1, -1)
        elif "positional_embedding" in name:
            new_name = "position_embeddings.weight"
        elif "conv1" in name:
            new_name = "conv1.weight"
        elif "ln_pre.weight" in name:
            new_name = "ln_pre.weight"
        elif "ln_pre.bias" in name:
            new_name = "ln_pre.bias"
        elif "transformer.resblocks" in name:
            layer_idx = name.split(".")[3]
            base = f"decoder.layers.{layer_idx}"

            if "attn.in_proj_weight" in name:
                new_name = f"{base}.self_attention.linear_qkv.weight"
                new_tensor = new_tensor[indices]
                chunk_dim = 0
            elif "attn.in_proj_bias" in name:
                new_name = f"{base}.self_attention.linear_qkv.bias"
                new_tensor = new_tensor[indices]
                chunk_dim = 0
            elif "attn.out_proj.weight" in name:
                new_name = f"{base}.self_attention.linear_proj.weight"
                chunk_dim = 1
            elif "attn.out_proj.bias" in name:
                new_name = f"{base}.self_attention.linear_proj.bias"
            elif "ln_1.weight" in name:
                new_name = f"{base}.input_layernorm.weight"
                if use_te:
                    new_name = f"{base}.self_attention.linear_qkv.layer_norm_weight"
            elif "ln_1.bias" in name:
                new_name = f"{base}.input_layernorm.bias"
                if use_te:
                    new_name = f"{base}.self_attention.linear_qkv.layer_norm_bias"
            elif "mlp.c_fc.weight" in name:
                new_name = f"{base}.mlp.linear_fc1.weight"
                chunk_dim = 0
            elif "mlp.c_fc.bias" in name:
                new_name = f"{base}.mlp.linear_fc1.bias"
                chunk_dim = 0
            elif "mlp.c_proj.weight" in name:
                new_name = f"{base}.mlp.linear_fc2.weight"
                chunk_dim = 1
            elif "mlp.c_proj.bias" in name:
                new_name = f"{base}.mlp.linear_fc2.bias"
            elif "ln_2.weight" in name:
                new_name = f"{base}.pre_mlp_layernorm.weight"
                if use_te:
                    new_name = f"{base}.mlp.linear_fc1.layer_norm_weight"
            elif "ln_2.bias" in name:
                new_name = f"{base}.pre_mlp_layernorm.bias"
                if use_te:
                    new_name = f"{base}.mlp.linear_fc1.layer_norm_bias"

        assert new_name != "", f"unexpected layer name {name}"

        if chunk_dim is None:
            new_tensors = [new_tensor for _ in range(tensor_parallel_size)]
        else:
            new_tensors = torch.chunk(new_tensor, tensor_parallel_size, dim=chunk_dim)

        for i in range(tensor_parallel_size):
            # chunk() creates a view of a bigger tensor. clone() is used here to avoid excessive storage.
            new_state_dicts[i]["model"][new_name] = new_tensors[i].clone()

            # TE sets _extra_state (for FP8 purposes), so set an empty one here for compatibility.
            extra_state_layers = ("linear_qkv", "linear_proj", "linear_fc1", "linear_fc2")
            is_extra_state_layer = any([l in new_name for l in extra_state_layers])
            if use_te and is_extra_state_layer:
                layer = new_name.split(".")[-2]
                if layer in extra_state_layers:
                    extra_state_name = (
                        new_name[: new_name.rfind(".") + 1] + "_extra_state"
                    )  # Replace the weight name.
                    new_state_dicts[i]["model"][extra_state_name] = None

    for i in range(tensor_parallel_size):
        output_dir_tp = os.path.join(output_path, "iter_0000001", f"mp_rank_0{i}")
        os.makedirs(output_dir_tp)
        output_path_tp = os.path.join(output_dir_tp, "model_optim_rng.pt")
        torch.save(new_state_dicts[i], output_path_tp)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="""
Convert OpenAI CLIP VIT weights to megatron format.


Example usage:
python clip_converter.py --download-root /some/download/folder --output /some/output/folder --tensor-parallel-size 4
""",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )

    parser.add_argument(
        "--download-root", type=str, required=True, help="Download folder for OpenAI CLIP weights"
    )
    parser.add_argument(
        "--output", type=str, required=True, help="output directory for megatron state dict file(s)"
    )
    parser.add_argument(
        "--tensor-parallel-size", type=int, default=1, help="model tensor parallel size"
    )
    parser.add_argument("--use-te", action="store_true", help="Use Transformer Engine")

    args = parser.parse_args()

    convert(args.download_root, args.output, args.tensor_parallel_size, args.use_te)

    print("done.")


================================================
FILE: examples/multimodal/model_converter/internvit_converter.py
================================================
import argparse
import os

import torch
from transformers import AutoModel


def convert(model_name, output_path, tensor_parallel_size, use_te):
    """Convert InternViT HF checkpoint to mcore."""
    hf_model = AutoModel.from_pretrained(
        model_name,
        trust_remote_code=True
    )

    hf_state_dict = hf_model.state_dict()
    new_state_dicts = [{"model": dict()} for _ in range(tensor_parallel_size)]

    hidden_size = 3200
    num_heads = 25
    dim = 128

    order = torch.ones(3 * hidden_size).long()

    for j in range(num_heads):
        for i in range(dim):
            order[i + dim*3*j] = j*dim+i
            order[dim + i + dim*3*j] = j*dim+i+num_heads*dim
            order[dim*2 + i + dim*3*j] = j*dim+i+num_heads*dim*2

    for name, tensor in hf_state_dict.items():
        # Map parameter names to ones used in megatron.
        new_name = ""
        new_tensor = tensor

        # This is used for chunking some tensors to target tensor parallel size.
        chunk_dim = None

        if "embeddings.class_embedding" in name:
            new_name = "class_token"
        elif "embeddings.patch_embedding.weight" in name:
            new_name = "conv1.weight"
        elif "embeddings.patch_embedding.bias" in name:
            new_name = "conv1.bias"
        elif "embeddings.position_embedding" in name:
            new_name = "position_embeddings.weight"
            new_tensor = new_tensor.squeeze(0)
        elif "encoder.layers" in name:
            layer_idx = name.split(".")[2]

            base = f"decoder.layers.{layer_idx}"

            head_dim = 128

            if tensor_parallel_size == 1:
                num_padded_heads = 25
            elif tensor_parallel_size == 8:
                # Note: 25 is not divisible by 8 and we don't currently support uneven heads split with tensor parallelism.
                # So we pad with dummy all-zero heads. Please use a nice even number of attention heads in your model.
                num_padded_heads = 32
            else:
                raise NotImplementedError("invalid tensor parallel size value:", tensor_parallel_size)

            if "ls1" in name:
                new_name = f"{base}.ls1"
            elif "ls2" in name:
                new_name = f"{base}.ls2"
            elif "attn.qkv.weight" in name:
                new_name = f"{base}.self_attention.linear_qkv.weight"
                num_tensors = 3
                padded_dim = head_dim * num_padded_heads * num_tensors
                padded_tensor = torch.zeros((padded_dim, new_tensor.shape[-1]), dtype=new_tensor.dtype, device=new_tensor.device)
                padded_tensor[:new_tensor.shape[0], :] = new_tensor[order]
                new_tensor = padded_tensor
                chunk_dim = 0
            elif "attn.q_norm.weight" in name:
                new_name = f"{base}.self_attention.q_layernorm.weight"
                num_tensors = 1
                padded_dim = head_dim * num_padded_heads * num_tensors
                padded_tensor = torch.zeros(padded_dim, dtype=new_tensor.dtype, device=new_tensor.device)
                padded_tensor[:new_tensor.shape[0]] = new_tensor
                new_tensor = padded_tensor
                chunk_dim = 0
            elif "attn.k_norm.weight" in name:
                new_name = f"{base}.self_attention.k_layernorm.weight"
                num_tensors = 1
                padded_dim = head_dim * num_padded_heads * num_tensors
                padded_tensor = torch.zeros(padded_dim, dtype=new_tensor.dtype, device=new_tensor.device)
                padded_tensor[:new_tensor.shape[0]] = new_tensor
                new_tensor = padded_tensor
                chunk_dim = 0
            elif "attn.proj.weight" in name:
                new_name = f"{base}.self_attention.linear_proj.weight"
                num_tensors = 1
                padded_dim = head_dim * num_padded_heads * num_tensors
                padded_tensor = torch.zeros((new_tensor.shape[0], padded_dim), dtype=new_tensor.dtype, device=new_tensor.device)
                padded_tensor[:, :new_tensor.shape[-1]] = new_tensor
                new_tensor = padded_tensor
                chunk_dim = 1
            elif "attn.proj.bias" in name:
                new_name = f"{base}.self_attention.linear_proj.bias"
            elif "mlp.fc1.weight" in name:
                new_name = f"{base}.mlp.linear_fc1.weight"
                chunk_dim = 0
            elif "mlp.fc1.bias" in name:
                new_name = f"{base}.mlp.linear_fc1.bias"
                chunk_dim = 0
            elif "mlp.fc2.weight" in name:
                new_name = f"{base}.mlp.linear_fc2.weight"
                chunk_dim = 1
            elif "mlp.fc2.bias" in name:
                new_name = f"{base}.mlp.linear_fc2.bias"
            elif "norm1" in name:
                new_name = f"{base}.input_layernorm.weight"
            elif "norm2" in name:
                new_name = f"{base}.pre_mlp_layernorm.weight"
            else:
                raise RuntimeError("unexpected transformer layer name", name)
        else:
            raise RuntimeError("unexpected layer name", name)

        assert new_name != "", f"unexpected layer name {name}"

        # TE sets _extra_state (for FP8 purposes), so set an empty one here for compatibility.
        extra_state_layers = ("linear_qkv", "linear_proj", "linear_fc1", "linear_fc2")
        is_extra_state_layer = any([l in new_name for l in extra_state_layers])
        if use_te and is_extra_state_layer:
            layer = new_name.split(".")[-2]
            if layer in extra_state_layers:
                extra_state_name = (
                    new_name[: new_name.rfind(".") + 1] + "_extra_state"
                )  # Replace the weight name.
                for i in range(tensor_parallel_size):
                    new_state_dicts[i]["model"][extra_state_name] = None

        if chunk_dim is None:
            new_tensors = [new_tensor for _ in range(tensor_parallel_size)]
        else:
            new_tensors = torch.chunk(new_tensor, tensor_parallel_size, dim=chunk_dim)

        for i in range(tensor_parallel_size):
            new_state_dicts[i]["model"][new_name] = new_tensors[i].clone()

    for i in range(tensor_parallel_size):
        output_dir_tp = os.path.join(output_path, f"iter_0000001/mp_rank_0{i}")
        os.makedirs(output_dir_tp, exist_ok=True)
        output_path_tp = os.path.join(output_dir_tp, "model_optim_rng.pt")
        torch.save(new_state_dicts[i], output_path_tp)
        print("saved file", output_path_tp)

    print("done")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="InternVIT HuggingFace to Mcore converter")
    parser.add_argument("--model-name", type=str, default="OpenGVLab/InternViT-6B-448px-V1-5", help="Model name in HuggingFace")
    parser.add_argument("--output-dir", type=str, required=True, help="Output directory for the mcore model.")
    parser.add_argument("--use-te", action="store_true", default=True)
    parser.add_argument("--tensor-parallel-size", type=int, required=True)

    args = parser.parse_args()

    convert(args.model_name, args.output_dir, args.tensor_parallel_size, args.use_te)


================================================
FILE: examples/multimodal/model_converter/radio_converter.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import argparse
import os

import torch

def convert_radio_h(output_path, tensor_parallel_size, use_te, version):
    device = "cuda"

    version = version if version is not None else 'radio_v2.5-h'
    model = torch.hub.load('NVlabs/RADIO', 'radio_model', version=version, progress=True)

    state_dict = model.state_dict()
    new_state_dicts = [{"model": dict()} for _ in range(tensor_parallel_size)]

    # Indices from mapping pytorch multihead attention to megatron.
    kv_channels = 80 
    hidden_dim = 1280 
    num_heads = 16
    indices = []
    for i in range(num_heads):
        lb = i * kv_channels
        ub = (i + 1) * kv_channels
        indices.append(torch.arange(lb, ub, dtype=torch.int))
        indices.append(torch.arange(hidden_dim + lb, hidden_dim + ub, dtype=torch.int))
        indices.append(torch.arange(2 * hidden_dim + lb, 2 * hidden_dim + ub, dtype=torch.int))

    indices = torch.cat(indices)

    for name, tensor in state_dict.items():
        # Map parameter names to ones used in megatron.
        new_name = ""
        new_tensor = tensor
        if new_tensor.dtype == torch.float16:
            new_tensor = new_tensor.to(torch.float32)

        # This is used for chunking some tensors to target tensor parallel size.
        chunk_dim = None

        if "summary_idxs" in name:
            continue
        elif "patch_generator" in name:
            if "embedder" in name:
                new_name = "embedder.weight"
                chunk_dim = 0
            elif "cls_token" in name:
                new_name = "class_token"
            elif "pos_embed" in name:
                new_name = "position_embeddings"
        elif "input_conditioner" in name:
            continue
        elif "blocks" in name:
            layer_idx = name.split(".")[2]
            base = f"decoder.layers.{layer_idx}"

            if "attn.qkv.weight" in name:
                new_name = f"{base}.self_attention.linear_qkv.weight"
                new_tensor = new_tensor[indices]
                chunk_dim = 0
            elif "attn.qkv.bias" in name:
                new_name = f"{base}.self_attention.linear_qkv.bias"
                new_tensor = new_tensor[indices]
                chunk_dim = 0
            elif "attn.proj.weight" in name:
                new_name = f"{base}.self_attention.linear_proj.weight"
                chunk_dim = 1
            elif "attn.proj.bias" in name:
                new_name = f"{base}.self_attention.linear_proj.bias"
            elif "norm1.weight" in name:
                new_name = f"{base}.input_layernorm.weight"
                if use_te:
                    new_name = f"{base}.self_attention.linear_qkv.layer_norm_weight"
            elif "norm1.bias" in name:
                new_name = f"{base}.input_layernorm.bias"
                if use_te:
                    new_name = f"{base}.self_attention.linear_qkv.layer_norm_bias"
            elif "mlp.fc1.weight" in name:
                new_name = f"{base}.mlp.linear_fc1.weight"
                chunk_dim = 0
            elif "mlp.fc1.bias" in name:
                new_name = f"{base}.mlp.linear_fc1.bias"
                chunk_dim = 0
            elif "mlp.fc2.weight" in name:
                new_name = f"{base}.mlp.linear_fc2.weight"
                chunk_dim = 1
            elif "mlp.fc2.bias" in name:
                new_name = f"{base}.mlp.linear_fc2.bias"
            elif "norm2.weight" in name:
                new_name = f"{base}.pre_mlp_layernorm.weight"
                if use_te:
                    new_name = f"{base}.mlp.linear_fc1.layer_norm_weight"
            elif "norm2.bias" in name:
                new_name = f"{base}.pre_mlp_layernorm.bias"
                if use_te:
                    new_name = f"{base}.mlp.linear_fc1.layer_norm_bias"

        assert new_name != "", f"unexpected layer name {name}"

        if chunk_dim is None:
            new_tensors = [new_tensor for _ in range(tensor_parallel_size)]
        else:
            new_tensors = torch.chunk(new_tensor, tensor_parallel_size, dim=chunk_dim)

        for i in range(tensor_parallel_size):
            # chunk() creates a view of a bigger tensor. clone() is used here to avoid excessive storage.
            new_state_dicts[i]["model"][new_name] = new_tensors[i].clone()

            # TE sets _extra_state (for FP8 purposes), so set an empty one here for compatibility.
            extra_state_layers = ("linear_qkv", "linear_proj", "linear_fc1", "linear_fc2")
            is_extra_state_layer = any([l in new_name for l in extra_state_layers])
            if use_te and is_extra_state_layer:
                layer = new_name.split(".")[-2]
                if layer in extra_state_layers:
                    extra_state_name = (
                        new_name[: new_name.rfind(".") + 1] + "_extra_state"
                    )  # Replace the weight name.
                    new_state_dicts[i]["model"][extra_state_name] = None

    for i in range(tensor_parallel_size):
        output_dir_tp = os.path.join(output_path, "iter_0000001", f"mp_rank_0{i}")
        os.makedirs(output_dir_tp)
        output_path_tp = os.path.join(output_dir_tp, "model_optim_rng.pt")
        torch.save(new_state_dicts[i], output_path_tp)
    with open(os.path.join(output_path, "latest_checkpointed_iteration.txt"), "w") as f:
        f.write("1") 

def convert_radio_g(output_path, tensor_parallel_size, use_te, version):
    device = "cuda"

    version = version if version is not None else 'radio_v2.5-g'
    model = torch.hub.load('NVlabs/RADIO', 'radio_model', version=version, progress=True)

    state_dict = model.state_dict()
    new_state_dicts = [{"model": dict()} for _ in range(tensor_parallel_size)]

    # Indices from mapping pytorch multihead attention to megatron.
    kv_channels = 64
    hidden_dim = 1536
    num_heads = 24
    ffn_hidden_dim = 4096
    indices = []
    for i in range(num_heads):
        lb = i * kv_channels
        ub = (i + 1) * kv_channels
        indices.append(torch.arange(lb, ub, dtype=torch.int))
        indices.append(torch.arange(hidden_dim + lb, hidden_dim + ub, dtype=torch.int))
        indices.append(torch.arange(2 * hidden_dim + lb, 2 * hidden_dim + ub, dtype=torch.int))

    indices = torch.cat(indices)

    mlp_indices = []
    step = ffn_hidden_dim // tensor_parallel_size
    for i in range(tensor_parallel_size):
        mlp_indices.append(torch.arange(i * step, (i + 1) * step, dtype=torch.int))
        mlp_indices.append(torch.arange(ffn_hidden_dim + i * step, ffn_hidden_dim + (i + 1) * step, dtype=torch.int))

    mlp_indices = torch.cat(mlp_indices)

    for name, tensor in state_dict.items():
        # Map parameter names to ones used in megatron.
        new_names = []
        new_tensor = tensor
        if new_tensor.dtype == torch.float16:
            new_tensor = new_tensor.to(torch.float32)
        new_tensors = [new_tensor]

        # This is used for chunking some tensors to target tensor parallel size.
        chunk_dim = None

        if "model" not in name:
            continue;
        elif "patch_generator" in name:
            if "embedder.weight" in name:
                new_names.append("embedder.weight")
                chunk_dim = 0
            elif "embedder.bias" in name:
                new_names.append("embedder.bias")
                chunk_dim = 0
            elif "cls_token" in name:
                new_names.append("class_token")
            elif "pos_embed" in name:
                new_names.append("position_embeddings")
        elif "input_conditioner" in name:
            continue;
        elif "mask_token" in name:
            new_names.append("mask_token")
        elif "inner.norm" in name:
            if "norm.weight" in name:
                new_names.append("ln_post.weight")
            elif "norm.bias" in name:
                new_names.append("ln_post.bias")
        elif "blocks" in name:
            layer_idx = name.split(".")[3]
            base = f"decoder.layers.{layer_idx}"

            if "attn.qkv.weight" in name:
                new_names.append(f"{base}.self_attention.linear_qkv.weight")
                new_tensors[0] = new_tensors[0][indices]
                chunk_dim = 0
            elif "attn.qkv.bias" in name:
                new_names.append(f"{base}.self_attention.linear_qkv.bias")
                new_tensors[0] = new_tensors[0][indices]
                chunk_dim = 0
            elif "attn.proj.weight" in name:
                new_names.append(f"{base}.self_attention.linear_proj.weight")
                chunk_dim = 1
            elif "attn.proj.bias" in name:
                new_names.append(f"{base}.self_attention.linear_proj.bias")
            elif "norm1.weight" in name:
                new_name = f"{base}.input_layernorm.weight"
                if use_te:
                    new_name = f"{base}.self_attention.linear_qkv.layer_norm_weight"
                new_names.append(new_name)
            elif "norm1.bias" in name:
                new_name = f"{base}.input_layernorm.bias"
                if use_te:
                    new_name = f"{base}.self_attention.linear_qkv.layer_norm_bias"
                new_names.append(new_name)
            elif "mlp.w12.weight" in name: 
                new_names.append(f"{base}.mlp.linear_fc1.weight")
                new_tensors[0] = new_tensors[0][mlp_indices]
                chunk_dim = 0
            elif "mlp.w12.bias" in name: 
                new_names.append(f"{base}.mlp.linear_fc1.bias")
                new_tensors[0] = new_tensors[0][mlp_indices]
                chunk_dim = 0
            elif "mlp.w3.weight" in name: 
                new_names.append(f"{base}.mlp.linear_fc2.weight")
                chunk_dim = 1
            elif "mlp.w3.bias" in name: 
                new_names.append(f"{base}.mlp.linear_fc2.bias")
            elif "norm2.weight" in name:
                new_name = f"{base}.pre_mlp_layernorm.weight"
                if use_te:
                    new_name = f"{base}.mlp.linear_fc1.layer_norm_weight"
                new_names.append(new_name)
            elif "norm2.bias" in name:
                new_name = f"{base}.pre_mlp_layernorm.bias"
                if use_te:
                    new_name = f"{base}.mlp.linear_fc1.layer_norm_bias"
                new_names.append(new_name)
            elif "ls1.grandma" in name:
                new_names.append(f"{base}.ls1")
            elif "ls2.grandma" in name:
                new_names.append(f"{base}.ls2")

        assert len(new_names) == len(new_tensors), f"{new_names} {new_tensors}"

        for new_name, new_tensor in zip(new_names, new_tensors):
            if chunk_dim is None:
                tp_new_tensors = [new_tensor for _ in range(tensor_parallel_size)]
            else:
                tp_new_tensors = torch.chunk(new_tensor, tensor_parallel_size, dim=chunk_dim)

            for i in range(tensor_parallel_size):
                # chunk() creates a view of a bigger tensor. clone() is used here to avoid excessive storage.
                new_state_dicts[i]["model"][new_name] = tp_new_tensors[i].clone()

                # TE sets _extra_state (for FP8 purposes), so set an empty one here for compatibility.
                extra_state_layers = ("linear_qkv", "linear_proj", "linear_fc1", "linear_fc2")
                is_extra_state_layer = any([l in new_name for l in extra_state_layers])
                if use_te and is_extra_state_layer:
                    layer = new_name.split(".")[-2]
                    if layer in extra_state_layers:
                        extra_state_name = (
                            new_name[: new_name.rfind(".") + 1] + "_extra_state"
                        )  # Replace the weight name.
                        new_state_dicts[i]["model"][extra_state_name] = None

    for i in range(tensor_parallel_size):
        output_dir_tp = os.path.join(output_path, "iter_0000001", f"mp_rank_0{i}")
        os.makedirs(output_dir_tp)
        output_path_tp = os.path.join(output_dir_tp, "model_optim_rng.pt")
        torch.save(new_state_dicts[i], output_path_tp)
        with open(os.path.join(output_path, "latest_checkpointed_iteration.txt"), "w") as f:
            f.write("1") 


def convert(output_path, tensor_parallel_size, use_te, model_type, version):
    if model_type == "radio_v2.5-h":
        convert_radio_h(output_path, tensor_parallel_size, use_te, version)
    elif model_type == "radio_v2.5-g":
        convert_radio_g(output_path, tensor_parallel_size, use_te, version)
    else:
        raise NotImplementedError(f"Converter doesn't support model type {model_type}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="""
Convert RADIO weights to megatron format.


Example usage:
python radio_converter.py --output /some/output/folder --tensor-parallel-size 4
""",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )

    parser.add_argument(
        "--output", type=str, required=True, help="output directory for megatron state dict file(s)"
    )
    parser.add_argument(
        "--tensor-parallel-size", type=int, default=1, help="model tensor parallel size"
    )
    parser.add_argument("--use-te", action="store_true", help="Use Transformer Engine")
    parser.add_argument("--model-type", required=True, type=str, choices=['radio_v2.5-h', 'radio_v2.5-g'], help="Type of radio to load for conversion")
    parser.add_argument("--version", type=str, default=None, help="Version to pass to torch.hub.load. Can be a local path or a version RADIO on torch hub. By default use the version from the model type.")

    args = parser.parse_args()

    convert(args.output, args.tensor_parallel_size, args.use_te, args.model_type, args.version)

    print("done.")


================================================
FILE: examples/multimodal/model_converter/siglip_converter.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import argparse
import os
from transformers import PaliGemmaForConditionalGeneration
import torch


def convert(output_path, tensor_parallel_size, use_te):
    device = "cuda"

    model_id = "google/paligemma-3b-pt-448"
    model = PaliGemmaForConditionalGeneration.from_pretrained(model_id).eval()

    model = model.to(device)

    print(model.config)
    for name, tensor in model.state_dict().items():
        if "vision_model" not in name:
            continue
        shape_str = "(" + ", ".join([str(x) for x in tensor.shape]) + ")"
        print(f"{name:<75} {shape_str:>20}")

    state_dict = model.state_dict()
    new_state_dicts = [{"model": dict()} for _ in range(tensor_parallel_size)]

    def add_chunck_tensor(new_tensor, new_name, chunk_dim=None):
        if chunk_dim is None:
            new_tensors = [new_tensor for _ in range(tensor_parallel_size)]
        else:
            new_tensors = torch.chunk(new_tensor, tensor_parallel_size, dim=chunk_dim)

        for i in range(tensor_parallel_size):
            # chunk() creates a view of a bigger tensor. clone() is used here to avoid excessive storage.
            new_state_dicts[i]["model"][new_name] = new_tensors[i].clone()

            # TE sets _extra_state (for FP8 purposes), so set an empty one here for compatibility.
            extra_state_layers = ("linear_qkv", "linear_proj", "linear_fc1", "linear_fc2")
            is_extra_state_layer = any([l in new_name for l in extra_state_layers])
            if use_te and is_extra_state_layer:
                layer = new_name.split(".")[-2]
                if layer in extra_state_layers:
                    extra_state_name = (
                        new_name[: new_name.rfind(".") + 1] + "_extra_state"
                    )  # Replace the weight name.
                    new_state_dicts[i]["model"][extra_state_name] = None

    for name, tensor in state_dict.items():
        if tensor.dtype == torch.float16:
            state_dict[name] = tensor.to(torch.float32)

    add_chunck_tensor(
        state_dict["vision_tower.vision_model.embeddings.position_embedding.weight"],
        "position_embeddings.weight")
    add_chunck_tensor(
        state_dict["vision_tower.vision_model.embeddings.patch_embedding.weight"],
        "conv1.weight")
    add_chunck_tensor(
        state_dict["vision_tower.vision_model.embeddings.patch_embedding.bias"],
        "conv1.bias")

    head_dim = 72
    num_head = 16
    for layer_idx in range(27):
        origin_base = f"vision_tower.vision_model.encoder.layers.{layer_idx}"
        target_base = f"decoder.layers.{layer_idx}"

        for param_type in ["weight", "bias"]:
            # QKV
            q_proj_params = state_dict[f"{origin_base}.self_attn.q_proj.{param_type}"]
            k_proj_params = state_dict[f"{origin_base}.self_attn.k_proj.{param_type}"]
            v_proj_params = state_dict[f"{origin_base}.self_attn.v_proj.{param_type}"]
            # Do some tensor manipulation because megatron expect one tensor
            # projection for the QKV in the order
            # [(Q1, K1, V1), (Q2, K2, V2), ...] where Qi is the query of the
            # i-th head with dimension num_head.
            new_tensor = torch.concatenate([
                q_proj_params.view(num_head, head_dim, -1),
                k_proj_params.view(num_head, head_dim, -1),
                v_proj_params.view(num_head, head_dim, -1)], axis=1).view(
                    3*head_dim*num_head, -1)
            if param_type == "bias":
                new_tensor = new_tensor[:, 0]
            new_name = f"{target_base}.self_attention.linear_qkv.{param_type}"
            add_chunck_tensor(new_tensor, new_name, chunk_dim=0)
            # linear_proj
            add_chunck_tensor(
                state_dict[f"{origin_base}.self_attn.out_proj.{param_type}"],
                f"{target_base}.self_attention.linear_proj.{param_type}",
                chunk_dim=1 if param_type == "weight" else None)
            # layer_norm
            new_name = f"{target_base}.input_layernorm.{param_type}"
            if use_te:
                new_name = f"{target_base}.self_attention.linear_qkv.layer_norm_{param_type}"
            add_chunck_tensor(
                state_dict[f"{origin_base}.layer_norm1.{param_type}"],
                new_name)
            # FC 1
            add_chunck_tensor(
                state_dict[f"{origin_base}.mlp.fc1.{param_type}"],
                f"{target_base}.mlp.linear_fc1.{param_type}",
                chunk_dim=0)
            # FC 2
            add_chunck_tensor(
                state_dict[f"{origin_base}.mlp.fc2.{param_type}"],
                f"{target_base}.mlp.linear_fc2.{param_type}",
                chunk_dim=1 if param_type=="weight" else None)
            # layer_norm
            new_name = f"{target_base}.pre_mlp_layernorm.{param_type}"
            if use_te:
                new_name = f"{target_base}.mlp.linear_fc1.layer_norm_{param_type}"
            add_chunck_tensor(
                state_dict[f"{origin_base}.layer_norm2.{param_type}"],
                new_name)

    add_chunck_tensor(
        state_dict["vision_tower.vision_model.post_layernorm.weight"],
        "ln_post.weight")
    add_chunck_tensor(
        state_dict["vision_tower.vision_model.post_layernorm.bias"],
        "ln_post.bias")

    for i in range(tensor_parallel_size):
        output_dir_tp = os.path.join(output_path, "iter_0000001", f"mp_rank_0{i}")
        os.makedirs(output_dir_tp)
        output_path_tp = os.path.join(output_dir_tp, "model_optim_rng.pt")
        torch.save(new_state_dicts[i], output_path_tp)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="""
Convert SigLIP weights to megatron format.


Example usage:
python siglip_converter.py --tensor-parallel-size 4 --output google_paligemma_3b_pt_44_mcore_tp_4 --use-te

examples/multimodal/combine_mistral_clip.sh Mistral-7B-Instruct-v0.3-mcore-tp4 google_paligemma_3b_pt_44_mcore_tp_4 mistral_7b_instruct_v0p3_google_paligemma_3b_pt_44_mcore_tp_4
""",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument(
        "--output", type=str, required=True, help="output directory for megatron state dict file(s)"
    )
    parser.add_argument(
        "--tensor-parallel-size", type=int, default=1, help="model tensor parallel size"
    )
    parser.add_argument("--use-te", action="store_true", help="Use Transformer Engine")

    args = parser.parse_args()

    convert(args.output, args.tensor_parallel_size, args.use_te)

    print("done.")


================================================
FILE: examples/multimodal/model_converter/vision_model_tester.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import argparse
import os
import sys

# Add megatron and the multimodal example to the path.
sys.path.append(
    os.path.abspath(
        os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir, os.path.pardir)
    )
)
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))

import torch
from transformers import AutoModel

from examples.multimodal.model import model_provider
from examples.multimodal.multimodal_args import add_multimodal_extra_args
from megatron.training import get_model
from megatron.training.checkpointing import load_checkpoint
from megatron.training.initialize import initialize_megatron


def run_mcore_vision(model_path):
    """Run mcore vision model."""
    os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "1"

    # Megatron has some mandatory flags.
    sys.argv = [
        "ignore_me.py",
        "--micro-batch-size=1",
        "--num-layers=2",
        "--vision-model-type=internvit",
        "--language-model-type=mistral_7b",
        "--tokenizer-prompt-format=mistral",
        "--tokenizer-type=MultimodalTokenizer",
        "--tokenizer-model=mistralai/Mistral-7B-Instruct-v0.3",
        "--vocab-size=1024",
        "--hidden-size=64",
        "--num-attention-heads=8",
        "--seq-length=1024",
        "--decoder-seq-length=2048",
        "--max-position-embeddings=2048",
        "--bf16",
        "--img-h=448",
        "--img-w=448",
        "--patch-dim=14",
        "--tensor-model-parallel-size=8",
        "--use-te",
        f"--pretrained-checkpoint={model_path}",
    ]

    initialize_megatron(extra_args_provider=add_multimodal_extra_args)

    def wrapped_model_provider(pre_process, post_process):
        return model_provider(pre_process, post_process, parallel_output=False)

    # Set up model and load checkpoint.
    model = get_model(wrapped_model_provider, wrap_with_ddp=False)

    vision_model = model[0].module.vision_model

    load_checkpoint([vision_model], None, None)

    vision_model.eval()

    images = torch.ones((1, 3, 448, 448), dtype=torch.bfloat16, device="cuda")

    output = vision_model(images)

    return output


def run_hf_vision(model_name):
    """Run HF vision model."""
    model = (
        AutoModel.from_pretrained(model_name, torch_dtype=torch.bfloat16, trust_remote_code=True)
        .cuda()
        .eval()
    )

    images = torch.ones((1, 3, 448, 448), dtype=torch.bfloat16, device="cuda")

    outputs = model(images, return_dict=True)

    return outputs


def main(mcore_model, hf_model):
    """Compare vision model outputs between mcore and HF given the same fixed input."""
    mcore = run_mcore_vision(mcore_model)

    if torch.distributed.get_rank() == 0:
        hf = run_hf_vision(hf_model)
        hf = hf["last_hidden_state"]

        # Compare logits. Due to different attention implementations and other details,
        # there will be numerical differences.
        diff = (mcore - hf).abs()
        mean_diff = diff.mean().item()
        max_diff = diff.max().item()
        print(f"mean diff {mean_diff}, max diff {max_diff}")
        assert mean_diff < 0.1, "mean output difference is greater than expected"
        assert max_diff < 50, "max output difference is greater than expected"

        print("lgtm")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Check mcore vision model output vs. HF numerically.",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument(
        "--mcore-model", type=str, required=True, help="directory for mcore model weights"
    )
    parser.add_argument("--hf-model", type=str, required=True, help="Model name in HF")

    args = parser.parse_args()

    main(args.mcore_model, args.hf_model)


================================================
FILE: examples/multimodal/multimodal_args.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
from megatron.core.models.multimodal.llava_model import IMAGE_TOKEN


def add_multimodal_extra_args(parser):
    """Extra arguments."""
    group = parser.add_argument_group(title='multimodal arguments')
    group.add_argument('--dataset-config', type=str, default=None)
    group.add_argument("--prompt-path", type=str, default=None)
    group.add_argument('--freeze-LM', action='store_true', default=False)
    group.add_argument('--freeze-ViT', action='store_true', default=False)
    group.add_argument('--language-model-type', type=str, required=True)
    group.add_argument('--vision-model-type', type=str, default="clip")
    group.add_argument("--disable-vision-class-token", action="store_true", default=False)
    group.add_argument(
        "--allow-missing-vision-projection-checkpoint", action="store_true", default=False
    )
    group.add_argument("--use-te", action="store_true", default=False)
    group.add_argument(
        "--dataloader-save", type=str, default=None, help="Energon dataloader state save path"
    )
    group.add_argument(
        "--use-tiling", action="store_true", default=False, help="Use input image tiling"
    )
    group.add_argument("--max-num-tiles", type=int, default=1, help="Maximum number of image tiles")
    group.add_argument(
        "--use-thumbnail", action="store_true", default=False, help="Add image thumbnail as a tile"
    )
    group.add_argument(
        "--dataloader-seq-length",
        type=int,
        help="Make dataloader to produce sequences of specific length.",
    )
    group.add_argument(
        "--num-frames",
        type=int,
        default=1,
        help="Number of frames to regularly sample from the video as input to the model.",
    )
    group.add_argument(
        "--online-evaluation-config", type=str, help="Config file for online evaluation."
    )
    group.add_argument(
        "--special-tokens",
        nargs="*",
        default=[IMAGE_TOKEN],
        help="Special tokens used in the multimodal model",
    )
    group.add_argument(
        "--tokenizer-prompt-format",
        type=str,
        choices=["mistral", "llama3", "chatml", "nvlm-yi-34b", "qwen2p0", "qwen2p5", "llama3p1", "nemotron5",
                 "nemotron5-aligned"],
        required=True,
        help="Prompt format to use with the tokenizer.",
    )
    group.add_argument("--pixel-shuffle", action="store_true", default=False)
    group.add_argument(
        "--image-tag-type",
        type=str,
        choices=["nvlm", "internvl", ""],
        default="",  # Default: Image tag not used.
        help="Surround image tokens with tags.",
    )
    group.add_argument("--use-tile-tags", action="store_true", default=False, help="Use tile tags")
    group.add_argument(
        "--packing-buffer-size",
        type=int,
        default=None,   # Packing is disabled by default.
        help="Enable sample packing by setting the buffer size to > 0",
    )
    group.add_argument(
        "--packing-seq-length", type=int, default=0, help="Packing sequence length. Must be > 0 if using packing."
    )
    group.add_argument(
        "--recompute-vision", action="store_true", default=False, help="Enable activation checkpointing in the vision model"
    )
    group.add_argument(
        "--use-loss-scaling", action="store_true", default=False, help="Scale loss based on conversation turn length (in tokens)."
    )
    group.add_argument(
        "--force-system-message", action="store_true", default=False, help="Force a specific system message"
    )
    group.add_argument("--eos-id", type=int, help="termination id for MultiModal Tokenizer")
    group.add_argument(
        "--use-area-weighted-aspect-ratio", action="store_true", default=False,
        help=(
            "When --use-tiling is True, find the aspect ratio to use based on the original ",
            "image aspect ratio and the area covered by the tiles.")
    )
    group.add_argument("--use-mcore-inference", action="store_true", default=False, help="Use the MCore inference API")

    return parser


================================================
FILE: examples/multimodal/nvlm/README.md
================================================
NVLM
====

Please refer to the [NVLM paper](https://arxiv.org/pdf/2409.11402) for details.

*NOTE: VLMs in Megatron are under active development and are expected to change.*

# Checkpoints

NVLM 1.0 model weights are publicly available in HuggingFace and Megatron format.

- NVLM-1.0-D 72B [HuggingFace version](https://huggingface.co/nvidia/NVLM-D-72B)
- NVLM-1.0-D 72B [Megatron-Core version](https://huggingface.co/nvidia/NVLM-D-72B-mcore) 

# Setup

## Docker image

Please use `examples/multimodal/Dockerfile`.

## Dataset preparation

Please refer to Tables 4 and 6 in the [NVLM paper](https://arxiv.org/pdf/2409.11402) for full list of pretrain and SFT datasets.
Please refer to https://nvidia.github.io/Megatron-Energon/data_prep.html on preparing datasets in the Megatron Energon format.

## Model conversion

### Vision model

NVLM 1.0 models use [OpenGVLab/InternViT-6B-448px-V1-5](https://huggingface.co/OpenGVLab/InternViT-6B-448px-V1-5) from HuggingFace.
Please download it and run the following command to convert it to Megatron format.
```
python examples/multimodal/model_converter/internvit_converter.py --output-dir <some output dir> --use-te --tensor-parallel-size 8
```

### 34B Language model

NVLM 1.0 34B starts from [NousResearch/Nous-Hermes-2-Yi-34B](https://huggingface.co/NousResearch/Nous-Hermes-2-Yi-34B) from HuggingFace.
Please download it and run the following command to convert it to Megatron format.
```
python tools/checkpoint/convert.py --bf16 --model-type GPT --loader llama_mistral --saver mcore --target-tensor-parallel-size 8 --checkpoint-type hf \
    --load-dir <hf model directory> --save-dir <output dir> --tokenizer-model <hf model name/directory> \
    --saver-transformer-impl transformer_engine --model-size yi-34B --make-vocab-size-divisible-by 1
```

### 72B Language model

NVLM 1.0 72B starts from [Qwen/Qwen2-72B-Instruct](https://huggingface.co/Qwen/Qwen2-72B-Instruct) from HuggingFace.
Please download it and run the following command to convert it to Megatron format.
```
python tools/checkpoint/convert.py --bf16 --model-type GPT --loader llama_mistral --saver mcore --target-tensor-parallel-size 8 --checkpoint-type hf \
    --load-dir <hf model directory> --save-dir <output directory> --tokenizer-model <hf model name/directory> \
    --saver-transformer-impl transformer_engine --model-size qwen2.5-72Bf
```

### Combined checkpoint

Combine the vision model checkpoint from [InternVit](#internvit) with the [34B](#34b-language-model) or [72B](#72b-language-model) language model by running:
```
examples/multimodal/combine_lm_vision_checkpoints.sh <language model directory> <vision model directory> <output directory> nvlm
```

# Training

## 34B

1. Pretraining: please run `examples/multimodal/nvlm/pretrain_yi_34b_internvit_6b.sh`. Please use the InternViT + 34B [combined checkpoint](#combined-checkpoint) and tokenizer from HuggingFace.
2. SFT: please run `examples/multimodal/nvlm/sft_34b_internvit.sh` using the checkpoint from 1.

## 72B

1. Pretraining: please run `examples/multimodal/nvlm/pretrain_qwen20_72b_internvit_6b.sh`. Please use the InternViT + 72B [combined checkpoint](#combined-checkpoint) and tokenizer from HuggingFace.
2. Convert the pretraining checkpoint from 1. to have pipeline parallel size = 4 for SFT. Please run
```
examples/multimodal/nvlm/pp_checkpoint_converter.py --input <pretrained checkpoint directory> \
--input-pipeline-parallel 1 --output <some output dir> --output-pipeline-parallel 4 \
--tensor-parallel 8
```
3. SFT: please run `examples/multimodal/nvlm/sft_qwen20_72b_internvit_6b.sh` using the checkpoint from 2.
4. To convert the checkpoint with pipeline parallel size = 4 back to 1 for evaluation, please run
```
examples/multimodal/nvlm/pp_checkpoint_converter.py --input <sft checkpoint directory> \
--input-pipeline-parallel 4 --output <some output dir> --output-pipeline-parallel 1 \
--tensor-parallel 8
```

# Evaluation

Run the text generation script.
- 34B
```
examples/multimodal/nvlm/run_text_generation_yi_34b_internvit_6b.sh --input-image-path /path/to/input/images --output-path /some/output/directory \
    --model-path /path/to/model.pt --gt-path /path/to/groundtruth/file --task generation-task-name --use-tiling
```
- 72B
```
examples/multimodal/nvlm/run_text_generation_qwen20_72b_internvit_6b.sh --input-image-path /path/to/input/images --output-path /some/output/directory \
    --model-path /path/to/model.pt --gt-path /path/to/groundtruth/file --task generation-task-name --use-tiling
```

where `--task generation-task-name` is the name of the evaluation benchmark such as `captioning`, `MMMU` or `TextVQA`.

Then, run one of the evaluation scripts from `examples/multimodal`. For example

```
python examples/multimodal/evaluate_mmmu.py --input-path /output/directory/from/generation
```


================================================
FILE: examples/multimodal/nvlm/internvit.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
""""
NOTE: NVLM uses InternViT with tensor parallel (TP) size = 8.
Since InternViT has 25 attention heads and Megatron currently requires the number of attention heads
to be divisible by the TP size, we add 7 dummy zero attention heads to have 32 attention heads.

This workaround requires some changes to how we compute RMSNorm, Attention etc.

Additionally, InternViT introduces some unique features like Layer Scaling.

Those code changes are gathered here.
"""
from functools import partial

import torch

from examples.multimodal.layer_scaling import (
    LayerScalingTransformerLayer,
    get_bias_dropout_add_layer_scaling,
)
from megatron.core.extensions.transformer_engine import (
    TEColumnParallelLinear,
    TEDotProductAttention,
    TERowParallelLinear,
)
from megatron.core.parallel_state import (
    get_tensor_model_parallel_group,
    get_tensor_model_parallel_rank,
    get_tensor_model_parallel_world_size,
)
from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
from megatron.core.transformer.dot_product_attention import DotProductAttention
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.transformer_layer import TransformerLayerSubmodules
from megatron.core.transformer.utils import make_sharded_tensors_for_checkpoint
from megatron.core.typed_torch import copy_signature, not_none
from megatron.core.utils import divide

try:
    import apex

    from megatron.core.fusions.fused_layer_norm import FusedLayerNorm
    from megatron.core.transformer.torch_norm import WrappedTorchNorm

    HAVE_APEX = True
    LNImpl = FusedLayerNorm
except ImportError:
    import warnings

    from megatron.core.transformer.torch_norm import WrappedTorchNorm

    warnings.warn(f'Apex is not installed. Falling back to Torch Norm')
    LNImpl = WrappedTorchNorm


class InternViTRMSNorm(MegatronModule):

    def __init__(
        self,
        config: TransformerConfig,
        hidden_size: int,
        eps: float = 1e-6,
        sequence_parallel: bool = False,
        compute_var: bool = False,
    ):
        """Custom RMSNorm for InternViT.

        Args:
            config (TransformerConfig): Config.
            hidden_size (int): Input hidden size.
            eps (float): epsilon to use for the norm, default to 1e-6
            sequence_parallel (bool): Set to true if sequence parallelism is being used,
              this marks the weights as needing to be allreduced.
            compute_var (bool): Indicator to compute statistic manually.
        """
        super().__init__(config=config)
        self.config = config
        self.eps = eps
        self.weight = torch.nn.Parameter(torch.ones(hidden_size))
        self._compute_var = compute_var

        assert not sequence_parallel, "Sequence parallelism is not supported with InternViT."

        setattr(self.weight, 'sequence_parallel', sequence_parallel)

    def _norm(self, x, var):
        if var is None:
            var = x.pow(2).mean(-1, keepdim=True)

        return x * torch.rsqrt(var + self.eps)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Run RMSNorm with an option to compute custom statistic."""
        var = None
        if self._compute_var:
            unpadded_hidden_size = self.config.hidden_size  # 3200
            max_dim = x.shape[-1]  # 128

            x = x.reshape(x.size(0), x.size(1), -1)
            var = self._gather_var(x.float().pow(2), max_dim) / unpadded_hidden_size

        output = self._norm(x.float(), var).type_as(x)
        output = output * self.weight

        if self._compute_var:
            output = output.reshape(output.size(0), output.size(1), -1, max_dim)

        return output

    def _gather_var(self, input_, max_dim):
        """Compute statistic across the non-dummy heads."""
        world_size = get_tensor_model_parallel_world_size()

        # Size and dimension.
        last_dim = input_.dim() - 1
        rank = get_tensor_model_parallel_rank()

        num_attention_heads_per_partition = divide(self.config.num_attention_heads, world_size)
        valid_ranks = 24 // num_attention_heads_per_partition

        residual_heads = 25 % num_attention_heads_per_partition
        if residual_heads == 0:
            residual_heads = num_attention_heads_per_partition
        max_dim = max_dim * residual_heads

        if rank < valid_ranks:  # Ranks without any dummy attention heads.
            var = input_.sum(-1, keepdim=True)
        elif (
            rank == valid_ranks
        ):  # The only rank which may contain 'residual_heads' dummy attention heads.
            var = input_[..., :max_dim].sum(-1, keepdim=True)
        else:
            var = (
                input_.sum(-1, keepdim=True) * 0.0
            )  # All heads in these ranks are dummy heads: Zero-out.

        tensor_list = [torch.empty_like(var) for _ in range(world_size)]
        tensor_list[rank] = var
        torch.distributed.all_gather(tensor_list, var, group=get_tensor_model_parallel_group())

        output = torch.cat(tensor_list, dim=last_dim).contiguous()

        return output.sum(-1, keepdim=True)

    def sharded_state_dict(self, prefix='', sharded_offsets=(), metadata={}):

        # in InternVitSelfAttention the q_layernorm and k_layernorm weights
        # are tensor-parallel so must be converted to sharded tensors
        if 'q_layernorm' in prefix or 'k_layernorm' in prefix:
            state_dict = self.state_dict(prefix='', keep_vars=True)
            return make_sharded_tensors_for_checkpoint(
                state_dict, prefix, {'weight': 0}, sharded_offsets
            )
        else:
            return super().sharded_state_dict(prefix, sharded_offsets, metadata)


def get_mlp_module_spec(use_te: bool = True) -> ModuleSpec:
    # Dense MLP w/ or w/o TE modules.
    return ModuleSpec(
        module=MLP,
        submodules=MLPSubmodules(
            linear_fc1=TEColumnParallelLinear if use_te else ColumnParallelLinear,
            linear_fc2=TERowParallelLinear if use_te else RowParallelLinear,
        ),
    )


# Override a few things that are special in InternViT and not supported by the SelfAttention class.
class InternViTSelfAttention(SelfAttention):
    @copy_signature(SelfAttention.__init__)
    def __init__(
        self, config: TransformerConfig, submodules: SelfAttentionSubmodules, *args, **kwargs
    ):
        super().__init__(config=config, submodules=submodules, *args, **kwargs)

        # Need to override linear_qkv, q_layernorm and k_layernorm.
        qkv_bias = False

        self.linear_qkv = submodules.linear_qkv(
            self.config.hidden_size,
            self.query_projection_size + 2 * self.kv_projection_size,
            config=self.config,
            init_method=not_none(self.config.init_method),
            gather_output=False,
            bias=qkv_bias,
            skip_bias_add=False,
            is_expert=False,
            tp_comm_buffer_name='qkv',
        )

        qk_layernorm_hidden_size = (
            self.hidden_size_per_attention_head * self.num_attention_heads_per_partition
        )  # 512 for internvit

        self.q_layernorm = not_none(submodules.q_layernorm)(
            hidden_size=qk_layernorm_hidden_size,
            config=self.config,
            eps=self.config.layernorm_epsilon,
        )

        self.k_layernorm = not_none(submodules.k_layernorm)(
            hidden_size=qk_layernorm_hidden_size,
            config=self.config,
            eps=self.config.layernorm_epsilon,
        )


class InternViTTEDotProductAttention(TEDotProductAttention):
    """Adjusted Attention for InternViT"""

    @copy_signature(TEDotProductAttention.forward)
    def forward(self, *args, **kwargs):
        """Regular TEDotProductAttention + zero-out dummy attention heads."""
        out = super().forward(*args, **kwargs)

        # This makes sure the dummy attention heads are zeroed out.
        mask = torch.ones_like(out, dtype=out.dtype, device=out.device)
        rank = get_tensor_model_parallel_rank()
        max_dim = out.shape[-1]  # 128
        valid_ranks = 6

        if rank == valid_ranks:
            mask[..., max_dim:] *= 0.0
        elif rank > valid_ranks:
            mask *= 0.0
        out *= mask

        return out


def get_internvit_layer_spec(use_te) -> ModuleSpec:
    mlp = get_mlp_module_spec(use_te)  # no norm

    return ModuleSpec(
        module=LayerScalingTransformerLayer,
        submodules=TransformerLayerSubmodules(
            input_layernorm=InternViTRMSNorm,
            self_attention=ModuleSpec(
                module=InternViTSelfAttention,
                params={"attn_mask_type": AttnMaskType.no_mask},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=TEColumnParallelLinear if use_te else ColumnParallelLinear,
                    core_attention=TEDotProductAttention if use_te else DotProductAttention,
                    linear_proj=TERowParallelLinear if use_te else RowParallelLinear,
                    q_layernorm=partial(InternViTRMSNorm, compute_var=True),
                    k_layernorm=partial(InternViTRMSNorm, compute_var=True),
                ),
            ),
            self_attn_bda=get_bias_dropout_add_layer_scaling,
            pre_mlp_layernorm=InternViTRMSNorm,
            mlp=mlp,
            mlp_bda=get_bias_dropout_add_layer_scaling,
        ),
    )


def get_internvit300M_layer_spec(use_te) -> ModuleSpec:
    mlp = get_mlp_module_spec(use_te)  # no norm

    return ModuleSpec(
        module=LayerScalingTransformerLayer,
        submodules=TransformerLayerSubmodules(
            input_layernorm=LNImpl,
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": AttnMaskType.no_mask},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=TEColumnParallelLinear if use_te else ColumnParallelLinear,
                    core_attention=TEDotProductAttention if use_te else DotProductAttention,
                    linear_proj=TERowParallelLinear if use_te else RowParallelLinear,
                    q_layernorm=None,
                    k_layernorm=None,
                ),
            ),
            self_attn_bda=get_bias_dropout_add_layer_scaling,
            pre_mlp_layernorm=LNImpl,
            mlp=mlp,
            mlp_bda=get_bias_dropout_add_layer_scaling,
        ),
    )


================================================
FILE: examples/multimodal/nvlm/nvlm_prompts.json
================================================
{
    "COMMENT": "Mixture of our own custom prompts and some prompts from https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain/viewer and https://huggingface.co/datasets/HuggingFaceM4/M3IT",
    "Captioning": {
        "raw": [
        "Can you briefly explain what you see in the image?",
        "Describe what's happening in this image in one short sentence.",
        "Write a short caption that accurately represents the content of this image.",
        "Please generate a descriptive caption for the image provided.",
        "How would you summarize the scene depicted in the picture in short?",
        "Describe the image briefly.",
        "Write a succinct description of the image, capturing its main components, the relationships between them, and any notable details.",
        "Create a concise caption that accurately describes the main elements in the image provided.",
        "Write a brief, yet comprehensive, description of the image.",
        "Describe the image in a clear and concise manner.",
        "For the given image, provide a one-sentence summary that captures the most important details.",
        "Generate a short caption for the picture.",
        "Write a short and informative description that highlights the primary subjects and actions occurring in the given image.",
        "Provide a concise and informative caption for the image, focusing on the primary subjects.",
        "Write a clear description of the image, make sure the key features are well covered.",
        "Offer a succinct explanation of the picture presented."
        ]
    },
    "CaptioningPretraining": {
        "raw": [
        "Give a brief description of image.",
        "Give a brief description of the image.",
        "Provide a brief description of the given image.",
        "Provide a one-sentence caption for the provided image.",
        "Write a terse but informative summary of the picture.",
        "Describe the image concisely.",
        "Generate a clear and concise summary of the photo."
        ]
    },
    "CaptioningSFT": {
        "raw": [
        "Give a brief description of the image.",
        "Give a short and clear explanation of the subsequent image.",
        "Present a compact description of the photo's key features.",
        "Provide a brief description of the given image.",
        "Provide a one-sentence caption for the provided image.",
        "Render a clear and concise summary of the photo.",
        "Share a concise interpretation of the image provided.",
        "Summarize the visual content of the image.",
        "Write a terse but informative summary of the picture.",
        "Describe the image concisely."
        ]
    },
    "VQAPretraining": {
        "raw": [
        "Question: {} Short answer:",
        "Question: {} Answer:"
        ]
    },
    "VQASFT": {
        "raw": [
        "{}",
        "{}\nAnswer the question using a single word or phrase."
        ],
        "docvqa": [
        "{}",
        "{}\nAnswer this question using the text in the image directly."
        ]
    },
    "DocPretraining": {
        "raw": [
        "Retrieve the text from the given pdf image.",
        "Extract the text from the provided document.",
        "Transcribe the text displayed in the image."
        ],
        "ocr_multi": [
        "Apply grounded Optical Character Recognition (OCR) to the provided image.",
        "Extract all texts and their bounding boxes from the given image using grounded OCR.",
        "Extract and transcribe all visible text from the provided image, ensuring accurate spatial recognition.",
        "Conduct a detailed optical character recognition analysis on this image, maintaining the text's original layout and positioning.",
        "Execute a thorough text recognition procedure on this visual input, ensuring that the spatial arrangement of the text is accurately represented.",
        "Perform an in-depth OCR scan of the image, capturing both the content and contextual positioning of all textual information.",
        "OCR with grounding:"
        ],
        "md": [
        "Extract the text from the given image and format it in Markdown.",
        "Convert the text from the provided image into Markdown format.",
        "Transform the text from the given image into Markdown syntax.",
        "Extract and convert the text from the image to Markdown.",
        "Retrieve the text from the image and present it in Markdown format."
        ],
        "grounded_ocr": [
        "{}. Text:",
        "Recognize the text in this region: {}.",
        "Identify the text in this area: {}.",
        "Detect the text within this section: {}."
        ],
        "referring_grounding": [
        "Region of \"{}\" is:",
        "Locate the text \"{}\" in the image.",
        "Identify the text \"{}\" in the image and provide the coordinates."
        ]
    },
    "CaptioningDetailed": {
        "raw": [
        "Create a comprehensive paragraph that captures the essence of the image while weaving a cohesive narrative around its elements.",
        "Compose a paragraph that thoroughly describes the image's content, providing context and connections between different aspects of the scene.",
        "Provide a detailed, paragraph-length description of the image that paints a vivid picture and tells a coherent story.",
        "Write a rich and engaging paragraph that delves into the image's components, describing not only what is seen but also how the elements relate to one another.",
        "Give a well-rounded, paragraph-length explanation of the image, describing the scene and its components while forming a complete and engaging narrative.",
        "Produce a paragraph that not only describes the individual elements in the image but also weaves them together to form a cohesive, connected account.",
        "Construct a paragraph that captures the image's details and context, offering a more in-depth and engaging story than a simple caption.",
        "Compose a descriptive paragraph that brings the image to life through detailed storytelling, connecting the various visual elements into a unified narrative.",
        "Create a paragraph that provides an extensive and interconnected description of the image, ensuring that the narrative is both detailed and cohesive.",
        "Write a compelling and detailed paragraph that delves into the image's components, linking them together to create a unified and engaging story."
        ]
    },
    "OCR": {
        "raw": [
        "Can you read the text from image and output here?",
        "Extract and document the text from the provided image.",
        "Converting the text embedded in this image into a readable document.",
        "Transcribe all the text you find.",
        "Can you extract all visible text from the image here?"
        ],
        "markdown": [
        "Can you extract all visible text from the provided image?",
        "Converting the text embedded in this image into a readable markdown document.",
        "Can you read the text in the document as markdown?",
        "Transcribe the document as markdown.",
        "Extract and document the text from the provided image."
        ],
        "table_markdown": [
        "Can you extract all visible text from the provided table?",
        "Can you read the text in the provided table as markdown?",
        "Transcribe the table as markdown.",
        "Extract and document the text from the provided table image."
        ],
        "plain": [
        "Transcribe the document as plain text.",
        "Extract and document the text from the provided image.",
        "Converting the text embedded in this image into a readable document.",
        "Transcribe all the text you find.",
        "Can you extract all visible text from the image here?"
        ],
        "bbox_plain": [
        "Transcribe the document as plain text along with bounding boxes.",
        "Extract and document the text from the provided image along with bounding boxes.",
        "Converting the text embedded in this image into a readable documen along with bounding boxes.",
        "Can you extract all visible text with bounding boxes from the image here?"
        ]
    },
    "VQA": {
        "raw": [
        "Given the image, answer the following question with few words.",
        "Answer the following question: ",
        "What is the answer to this question?",
        "Write the answer: ",
        "Please answer this question: "
        ]
    },
    "Embedded": {
        "raw": [
        "Given the image, answer the following question with few words.",
        "Answer the following question: ",
        "What is the answer to this question?",
        "Write the answer: ",
        "Please answer this question: "
        ]
    }
}


================================================
FILE: examples/multimodal/nvlm/pp_checkpoint_converter.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
import argparse
import os
import sys

import torch

# Add megatron to the path.
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir, os.path.pardir))
)


def split(input_dir, base_output_dir, input_pp, output_pp, num_tp, num_layers_per_pp_rank):
    """Split pipeline parallel size = 1 checkpoint to pipeline parallel size N."""

    iter = args.iteration if args.iteration else 1
    for tp in range(num_tp):
        path = os.path.join(input_dir, f"mp_rank_0{tp}", "model_optim_rng.pt")
        sd = torch.load(path)

        if num_layers_per_pp_rank is None:
            num_layers = sd["args"].num_layers
            assert num_layers % output_pp == 0, "specify --num-layers-per-pp-rank for an uneven split"
            num_layers_per_pp_rank = [num_layers // output_pp] * output_pp

        layer_lb = 0
        for pp in range(output_pp):
            assert num_layers_per_pp_rank[pp] > 0, "each pp rank must have at least 1 layer"
            layer_ub = layer_lb + num_layers_per_pp_rank[pp]

            new_sd = sd.copy()
            new_sd["model"] = dict()
            for k, v in sd["model"].items():
                # First pp rank has vision model.
                if pp == 0 and ("vision_model" in k or "vision_projection" in k):
                    new_sd["model"][k] = v
                    continue

                # Only the first pp rank has the word embeddings.
                if "language_model.embedding.word_embeddings" in k and pp == 0:
                    new_sd["model"][k] = v

                # Only the last pp rank has the output layer.
                if "language_model.output_layer" in k and pp == output_pp - 1:
                    new_sd["model"][k] = v

                # Only the last pp rank has final layer norm.
                if pp == output_pp - 1 and (
                    "language_model.decoder.final_norm" in k  # Mamba model
                    or "language_model.decoder.final_layernorm" in k  # GPT model
                ):
                    new_sd["model"][k] = v

                if "language_model.decoder.layers" in k:
                    layer_num = int(k.split(".")[3])

                    if layer_lb <= layer_num and layer_num < layer_ub:
                        # On all pp ranks, megatron starts layer nums from 0!
                        new_layer_num = int(layer_num - layer_lb)

                        k_splitted = k.split(".")
                        k_splitted[3] = str(new_layer_num)
                        new_k = ".".join(k_splitted)

                        new_sd["model"][new_k] = v

            output_dir = os.path.join(base_output_dir, f"iter_{iter:0>7}/mp_rank_0{tp}_00{pp}")
            os.makedirs(output_dir, exist_ok=True)
            output_path = os.path.join(output_dir, "model_optim_rng.pt")
            torch.save(new_sd, output_path)

            print(f"processed tp rank: {tp}/{num_tp - 1} and pp rank: {pp}/{output_pp - 1}")

            layer_lb = layer_ub

    # This is needed for megatron checkpoint loading.
    with open(os.path.join(base_output_dir, "latest_checkpointed_iteration.txt"), "w") as f:
        f.write(f"{iter}")


def combine(input_dir, base_output_dir, input_pp, output_pp, num_tp, num_layers_per_pp_rank):
    """Combine pipeline parallel size = N checkpoint to pipeline parallel size 1."""
    iter = args.iteration if args.iteration else 1
    for tp in range(num_tp):
        new_sd = None

        layer_num_offset = 0
        max_layer_num = 0

        for pp in range(input_pp):
            path = os.path.join(input_dir, f"mp_rank_0{tp}_00{pp}", "model_optim_rng.pt")
            sd = torch.load(path)

            if pp == 0:
                new_sd = sd.copy()
                new_sd["model"] = dict()
                new_sd["args"].pipeline_model_parallel_size = 1

            assert new_sd is not None

            for k, v in sd["model"].items():
                # First pp rank has vision model.
                if pp == 0 and ("vision_model" in k or "vision_projection" in k):
                    new_sd["model"][k] = v
                    continue

                # Only the first pp rank has the word embeddings.
                if "language_model.embedding.word_embeddings" in k and pp == 0:
                    new_sd["model"][k] = v

                # Only the last pp rank has the output layer.
                if "language_model.output_layer" in k and pp == input_pp - 1:
                    new_sd["model"][k] = v

                # Only the last pp rank has final layer norm.
                if pp == output_pp - 1 and (
                    "language_model.decoder.final_norm" in k  # Mamba model
                    or "language_model.decoder.final_layernorm" in k  # GPT model
                ):
                    new_sd["model"][k] = v

                if "language_model.decoder.layers" in k:
                    layer_num = int(k.split(".")[3])

                    # On all pp ranks, megatron starts layer nums from 0!
                    new_layer_num = layer_num_offset + layer_num

                    if new_layer_num > max_layer_num:
                        max_layer_num = new_layer_num

                    k_splitted = k.split(".")
                    k_splitted[3] = str(new_layer_num)
                    new_k = ".".join(k_splitted)

                    new_sd["model"][new_k] = v

            print(f"processed tp rank: {tp}/{num_tp - 1} and pp rank: {pp}/{input_pp - 1}")

            layer_num_offset = max_layer_num + 1

        output_dir = os.path.join(base_output_dir, f"iter_{iter:0>7}/mp_rank_0{tp}")
        os.makedirs(output_dir, exist_ok=True)
        output_path = os.path.join(output_dir, "model_optim_rng.pt")
        torch.save(new_sd, output_path)

    # This is needed for megatron checkpoint loading.
    with open(os.path.join(base_output_dir, "latest_checkpointed_iteration.txt"), "w") as f:
        f.write(f"{iter}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Change pipeline parallelism for a model",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )

    parser.add_argument(
        "--input", type=str, required=True, help="Input model directory"
    )
    parser.add_argument(
        "--input-pipeline-parallel", type=int, required=True, help="Input model pipeline parallelism"
    )
    parser.add_argument(
        "--output", type=str, required=True, help="Output model directory"
    )
    parser.add_argument(
        "--output-pipeline-parallel", type=int, required=True, help="Output model pipeline parallelism"
    )
    parser.add_argument(
        "--tensor-parallel", type=int, required=True, help="Model tensor parallel size",
    )
    parser.add_argument(
        "--num-layers-per-pp-rank", type=int, default=None, nargs="*", help="Specify this for uneven pipeline parallel split",
    )
    parser.add_argument(
        "--iteration", type=int, default=None, help="Specify checkpoint iteration",
    )

    args = parser.parse_args()

    f = None
    if args.input_pipeline_parallel == 1 and args.output_pipeline_parallel > 1:
        f = split
    elif args.input_pipeline_parallel > 1 and args.output_pipeline_parallel == 1:
        f = combine
    else:
        raise NotImplementedError("Only pipeline parallel 1 to N and N to 1 are supported")

    f(args.input, args.output, args.input_pipeline_parallel, args.output_pipeline_parallel, args.tensor_parallel, args.num_layers_per_pp_rank)

    print("done.")


================================================
FILE: examples/multimodal/nvlm/pretrain_blend.yaml
================================================
__module__: megatron.energon
__class__: Metadataset
splits:
  train:
    datasets:
      - weight: 0.579   # Datasets are weighted according to their size. Weights sum up to 1.
        path: <path to laion dataset>
        subflavors:
          augmentation: False

      - weight: 0.02
        path: <path to coco>
        subflavors:
          augmentation: False

      - weight: 0.01
        path: <path to vqav2 dataset>
        subflavors:
          augmentation: False

      # Please refer to Table 4 in https://arxiv.org/pdf/2409.11402 for full list of pretrain datasets.
      # Please refer to https://nvidia.github.io/Megatron-Energon/data_prep.html on preparing datasets in the Megatron Energon format.
  val:
    datasets:
      - weight: 1.
        path: <path to validation dataset>
        subflavors:
          augmentation: False


================================================
FILE: examples/multimodal/nvlm/pretrain_qwen20_72b_internvit_6b.sh
================================================
#!/bin/bash

# Your SBATCH commands here if using SLURM.

# Please launch this script from megatron-lm root.

# Train a multimodal model.

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export TOKENIZERS_PARALLELISM="false"

DEBUG=0

if [[ $BATCH -eq 0 ]]; then
    DATETIME=`date +'%y-%m-%d-%H-%M-%S'`
    MODEL_NAME="mcore-qwen20-72b-internvit-${DATETIME}"
else
    MODEL_NAME="mcore-qwen20-72b-internvit"
fi

WORKSPACE="<some dir>"
SOURCE=`pwd`
OUTPUT_BASE="${WORKSPACE}/output"
OUTPUT="${OUTPUT_BASE}/${MODEL_NAME}"

FINETUNE_DIR=${OUTPUT}/checkpoints
LOGS_DIR="${OUTPUT}/logs"
TENSORBOARD_DIR="${OUTPUT}/tensorboard"

CHECKPOINT_DIR="${WORKSPACE}/combined-qwen2.0-72b-instruct-internvit-6b-448px-1.5-tp8-te"

DATA_TRAIN="${SOURCE}/examples/multimodal/nvlm/pretrain_blend.yaml"

if [[ $DEBUG -eq 1 ]]; then
    MBZ=1
    BZ=1
    NW=0
    AD=0.0
    HD=0.0
    LI=1
    EXTRA_ARGS=""
    ALLOW_NONDETERMINISTIC=1
else
    MBZ=1
    BZ=2048
    NW=8
    AD=0.1
    HD=0.1
    LI=5
    EXTRA_ARGS=""
    ALLOW_NONDETERMINISTIC=1
fi

SEQ_LEN=256     # Image embeddings sequence length.
DECODER_SEQ_LEN=512     # Language model sequence length.
MAX_POS_EMBED=512


OPTIONS=" \
    --use-checkpoint-args \
    --exit-duration-in-mins 230 \
    --disable-bias-linear \
    --tokenizer-type MultimodalTokenizer \
    --tokenizer-model Qwen/Qwen2-72B-Instruct \
    --tokenizer-prompt-format qwen2p0 \
    --transformer-impl transformer_engine \
    --normalization RMSNorm \
    --norm-epsilon 1e-06 \
    --group-query-attention \
    --num-query-groups 8 \
    --no-masked-softmax-fusion \
    --attention-softmax-in-fp32 \
    --attention-dropout ${AD} \
    --hidden-dropout ${HD} \
    --untie-embeddings-and-output-weights \
    --position-embedding-type rope \
    --rotary-percent 1.0 \
    --rotary-base 1000000 \
    --swiglu \
    --tensor-model-parallel-size 8  \
    --pipeline-model-parallel-size 1  \
    --num-layers 80 \
    --hidden-size 8192 \
    --ffn-hidden-size 29568 \
    --add-qkv-bias \
    --num-attention-heads 64  \
    --use-distributed-optimizer \
    --use-te \
    --num-workers ${NW} \
    --seq-length ${SEQ_LEN} \
    --decoder-seq-length ${DECODER_SEQ_LEN} \
    --max-position-embeddings 32768 \
    --train-samples 122880000 \
    --lr-decay-samples 25600000 \
    --lr-warmup-samples 83200 \
    --micro-batch-size ${MBZ} \
    --global-batch-size ${BZ} \
    --lr 1e-4 \
    --min-lr 2.5e-5 \
    --lr-decay-style cosine \
    --log-interval ${LI} \
    --eval-iters 10 \
    --eval-interval 500 \
    --data-path ${DATA_TRAIN} \
    --prompt-path ${SOURCE}/examples/multimodal/nvlm/nvlm_prompts.json \
    --save-interval 5000 \
    --save ${FINETUNE_DIR} \
    --load ${FINETUNE_DIR} \
    --dataloader-save ${FINETUNE_DIR}/dataloader \
    --pretrained-checkpoint ${CHECKPOINT_DIR} \
    --split 100,0,0 \
    --clip-grad 10.0 \
    --weight-decay 0.1 \
    --adam-beta1 0.9 \
    --adam-beta2 0.95 \
    --init-method-std 0.014 \
    --bf16 \
    --eod-mask-loss \
    --freeze-ViT \
    --freeze-LM \
    --patch-dim 14 \
    --img-h 448 \
    --img-w 448 \
    --dataloader-type external \
    --tensorboard-dir ${TENSORBOARD_DIR} \
    --language-model-type qwen2.0_72B \
    ${EXTRA_ARGS} \
    --allow-missing-vision-projection-checkpoint \
    --vision-model-type internvit \
    --disable-vision-class-token \
    --log-params-norm \
    --log-num-zeros-in-grad \
    --ckpt-format torch \
    --pixel-shuffle \
    --image-tag-type nvlm
"


export NVTE_APPLY_QK_LAYER_SCALING=0
export NVTE_ALLOW_NONDETERMINISTIC_ALGO=${ALLOW_NONDETERMINISTIC}

# Interactive or batch mode
if [[ $BATCH -eq 0 ]]; then
    torchrun --nproc_per_node 8 examples/multimodal/train.py ${OPTIONS}
else
    run_cmd="python -u ${SOURCE}/examples/multimodal/train.py ${OPTIONS}"

    DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`

    srun -l --verbose \
    --container-image <path to docker image> \
    --container-mounts "<some mount>" \
    --output=${LOGS_DIR}/%x_%j_$DATETIME.log \
    sh -c "${run_cmd}"

    set +x
fi


================================================
FILE: examples/multimodal/nvlm/pretrain_yi_34b_internvit_6b.sh
================================================
#!/bin/bash

# Your SBATCH commands here if using SLURM.

# Please launch this script from megatron-lm root.

# Train a multimodal model.

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export TOKENIZERS_PARALLELISM="false"

DEBUG=0

if [[ $BATCH -eq 0 ]]; then
    DATETIME=`date +'%y-%m-%d-%H-%M-%S'`
    MODEL_NAME="mcore-nous-yi34b-internvit-mlp-${DATETIME}"
else
    MODEL_NAME="mcore-nous-yi34b-internvit-mlp"
fi

WORKSPACE="<some dir>"
SOURCE=`pwd`
OUTPUT_BASE="${WORKSPACE}/output"
OUTPUT="${OUTPUT_BASE}/${MODEL_NAME}"

FINETUNE_DIR=${OUTPUT}/checkpoints
LOGS_DIR="${OUTPUT}/logs"
TENSORBOARD_DIR="${OUTPUT}/tensorboard"

LOAD_NAME="combined-yi-34b-internvit-tp8-mcore"
CHECKPOINT_DIR="${WORKSPACE}/${LOAD_NAME}"

DATA_TRAIN="${SOURCE}/examples/multimodal/nvlm/pretrain_blend.yaml"


if [[ $DEBUG -eq 1 ]]; then
    MBZ=1
    BZ=1
    NW=0
    LI=1
    AD=0.0
    HD=0.0
    EXTRA_ARGS=""
    ALLOW_NONDETERMINISTIC=1
else
    MBZ=1
    BZ=2048
    NW=8
    LI=5
    AD=0.1
    HD=0.1
    EXTRA_ARGS=""
    ALLOW_NONDETERMINISTIC=1
fi

SEQ_LEN=256     # Image embeddings sequence length.
DECODER_SEQ_LEN=512     # Language model sequence length.
MAX_POS_EMBED=512


OPTIONS=" \
    --swiglu \
    --use-distributed-optimizer \
    --num-workers ${NW} \
    --num-layers 60 \
    --hidden-size 7168 \
    --normalization RMSNorm \
    --num-attention-heads 56 \
    --exit-duration-in-mins 230 \
    --group-query-attention \
    --num-query-groups 8 \
    --ffn-hidden-size 20480 \
    --seq-length ${SEQ_LEN} \
    --decoder-seq-length ${DECODER_SEQ_LEN} \
    --max-position-embeddings ${MAX_POS_EMBED} \
    --tokenizer-type MultimodalTokenizer \
    --tokenizer-model NousResearch/Nous-Hermes-2-Yi-34B \
    --tokenizer-prompt-format nvlm-yi-34b \
    --vocab-size 64000 \
    --make-vocab-size-divisible-by 1 \
    --position-embedding-type rope \
    --rotary-percent 1.0 \
    --rotary-base 5000000 \
    --disable-bias-linear \
    --tensor-model-parallel-size 8 \
    --language-model-type yi-34b \
    --vision-model-type internvit \
    --micro-batch-size ${MBZ} \
    --global-batch-size ${BZ} \
    --train-samples 122880000 \
    --lr-decay-samples 25600000 \
    --lr-warmup-samples 83200 \
    --lr 1e-4 \
    --min-lr 2.5e-5 \
    --lr-decay-style cosine \
    --clip-grad 10.0 \
    --weight-decay 0.1 \
    --adam-beta1 0.9 \
    --adam-beta2 0.95 \
    --init-method-std 0.014 \
    --attention-dropout ${AD} \
    --hidden-dropout ${HD} \
    --untie-embeddings-and-output-weights \
    --eod-mask-loss \
    --bf16 \
    --tensorboard-dir=${TENSORBOARD_DIR} \
    --freeze-LM \
    --freeze-ViT \
    --img-h 448 \
    --img-w 448 \
    --patch-dim 14 \
    --data-path ${DATA_TRAIN} \
    --dataloader-type external \
    --split 100,0,0 \
    --prompt-path ${SOURCE}/examples/multimodal/nvlm/nvlm_prompts.json \
    --log-interval ${LI} \
    --save-interval 2000 \
    --eval-interval 500 \
    --eval-iters 10 \
    --log-params-norm \
    --log-num-zeros-in-grad \
    ${EXTRA_ARGS} \
    --save ${FINETUNE_DIR} \
    --load ${FINETUNE_DIR} \
    --dataloader-save ${FINETUNE_DIR}/dataloader \
    --pretrained-checkpoint ${CHECKPOINT_DIR} \
    --allow-missing-vision-projection-checkpoint \
    --disable-vision-class-token \
    --use-te \
    --use-checkpoint-args \
    --ckpt-format torch \
    --pixel-shuffle \
    --image-tag-type nvlm
    "

export NVTE_ALLOW_NONDETERMINISTIC_ALGO=${ALLOW_NONDETERMINISTIC}
export NVTE_APPLY_QK_LAYER_SCALING=0

# Interactive or batch mode
if [[ $BATCH -eq 0 ]]; then
    torchrun --nproc_per_node 8 examples/multimodal/train.py ${OPTIONS}
else
    run_cmd="python -u ${SOURCE}/examples/multimodal/train.py ${OPTIONS}"

    DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`

    srun -l --verbose \
    --container-image <path to docker image> \
    --container-mounts "<some mount>" \
    --output=${LOGS_DIR}/%x_%j_$DATETIME.log \
    sh -c "${run_cmd}"

    set +x
fi


================================================
FILE: examples/multimodal/nvlm/run_text_generation_qwen20_72b_internvit_6b.sh
================================================
#!/bin/bash

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NVTE_APPLY_QK_LAYER_SCALING=0
export TOKENIZERS_PARALLELISM="false"

INPUT_IMAGE_PATH="placeholder"
GROUNDTRUTH_PATH="placeholder"

USE_TILING=0
USE_PIXEL_SHUFFLE_ONLY=0

while [[ $# -gt 0 ]]; do
    case $1 in
        --input-image-path)
            INPUT_IMAGE_PATH="$2"
            shift
            shift
            ;;
        -o|--output-path)
            OUTPUT_PATH="$2"
            shift
            shift
            ;;
        -m|--model-path)
            MODEL_PATH="$2"
            shift
            shift
            ;;
        --task)
            TASK="$2"
            shift
            shift
            ;;
        -g|--gt-path)
            GROUNDTRUTH_PATH="$2"
            shift
            shift
            ;;
        --use-tiling)
            USE_TILING=1
            shift
            shift
            ;;
        --use-pixel-shuffle-only)
            USE_PIXEL_SHUFFLE_ONLY=1
            shift
            shift
            ;;
        -*|--*)
            echo "Invalid option $1"
            exit 1
            ;;
    esac
done

# Please modify these as needed.
NUM_PARTITIONS=0
START=0
END=0

SEQ_LEN=1024     # Image embeddings sequence length.
DECODER_SEQ_LEN=8192    # Language model sequence length.
MAX_POS_EMBED=8192

# Additional arguments.
EXTRA_ARGS=""

if [[ $USE_TILING -eq 1 ]]; then
    EXTRA_ARGS+=" --pixel-shuffle --use-tiling --max-num-tiles 6 --use-thumbnail --use-tile-tags"
    SEQ_LEN=261     # Image embeddings sequence length (256 image embeddings + 5 tile tag embeddings).
fi

if [[ $USE_PIXEL_SHUFFLE_ONLY -eq 1 ]]; then
    EXTRA_ARGS+=" --pixel-shuffle"
    SEQ_LEN=256
fi

for PARTITION_ID in $( eval echo {$START..$END} )
do
    torchrun --nproc_per_node 8 examples/multimodal/run_text_generation.py \
        --attention-softmax-in-fp32 \
        --no-masked-softmax-fusion \
        --swiglu \
        --num-layers 80 \
        --hidden-size 8192 \
        --normalization RMSNorm \
        --norm-epsilon 1e-06 \
        --num-attention-heads 64 \
        --exit-on-missing-checkpoint \
        --group-query-attention \
        --num-query-groups 8 \
        --ffn-hidden-size 29568 \
        --load ${MODEL_PATH} \
        --seq-length ${SEQ_LEN} \
        --decoder-seq-length ${DECODER_SEQ_LEN} \
        --max-position-embeddings ${MAX_POS_EMBED} \
        --tokenizer-type MultimodalTokenizer \
        --tokenizer-model Qwen/Qwen2-72B-Instruct \
        --tokenizer-prompt-format qwen2p0 \
        --position-embedding-type rope \
        --rotary-percent 1.0 \
        --rotary-base 1000000 \
        --disable-bias-linear \
        --add-qkv-bias \
        --tensor-model-parallel-size 8 \
        --pipeline-model-parallel-size 1 \
        --language-model-type qwen2.0_72B \
        --vision-model-type internvit \
        --micro-batch-size 1 \
        --attention-dropout 0.0 \
        --hidden-dropout 0.0 \
        --bf16 \
        --freeze-LM \
        --freeze-ViT \
        --img-h 448 \
        --img-w 448 \
        --patch-dim 14 \
        --use-te \
        --transformer-impl transformer_engine \
        --use-checkpoint-args \
        --out-seq-length 16 \
        --temperature 1.0 \
        --patch-dim 14 \
        --seed 1234 \
        --top_k 1 \
        --no-load-rng \
        --no-load-optim \
        --num-partitions ${NUM_PARTITIONS} \
        --partition-id ${PARTITION_ID} \
        --output-path ${OUTPUT_PATH} \
        --gt-path ${GROUNDTRUTH_PATH} \
        --disable-vision-class-token \
        --input-image-path ${INPUT_IMAGE_PATH} \
        --gt-path ${GROUNDTRUTH_PATH} \
        ${EXTRA_ARGS} \
        --task ${TASK} \
        --image-tag-type nvlm \
        --ckpt-format torch
done


================================================
FILE: examples/multimodal/nvlm/run_text_generation_qwen25_7b_internvit_video.sh
================================================
#!/bin/bash

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NVTE_APPLY_QK_LAYER_SCALING=0
export TOKENIZERS_PARALLELISM="false"

INPUT_IMAGE_PATH="placeholder"
GROUNDTRUTH_PATH="placeholder"

while [[ $# -gt 0 ]]; do
    case $1 in
        --input-image-path)
            INPUT_IMAGE_PATH="$2"
            shift
            shift
            ;;
        --input-metadata-path)
            INPUT_METADATA_PATH="$2"
            shift
            shift
            ;;
        --num-frames)
            NUM_FRAMES="$2"
            shift
            shift
            ;;
        -g|--groundtruth-path)
            GROUNDTRUTH_PATH="$2"
            shift
            shift
            ;;
        -o|--output-path)
            OUTPUT_PATH="$2"
            shift
            shift
            ;;
        -m|--model-path)
            MODEL_PATH="$2"
            shift
            shift
            ;;
        --task)
            TASK="$2"
            shift
            shift
            ;;
        -g|--gt-path)
            GROUNDTRUTH_PATH="$2"
            shift
            shift
            ;;
        -*|--*)
            echo "Invalid option $1"
            exit 1
            ;;
    esac
done


# Please modify these as needed.
NUM_PARTITIONS=0
START=0
END=0

SEQ_LEN=256
DECODER_SEQ_LEN=16384

EXTRA_ARGS=" --pixel-shuffle"


for PARTITION_ID in $( eval echo {$START..$END} )
do
    torchrun --nproc_per_node 8 examples/multimodal/run_text_generation.py \
        --attention-softmax-in-fp32 \
        --transformer-impl transformer_engine \
        --use-te \
        --use-checkpoint-args \
        --normalization RMSNorm \
        --norm-epsilon 1e-06 \
        --language-model-type=qwen2.5_7B \
        --untie-embeddings-and-output-weights \
        --disable-bias-linear \
        --position-embedding-type rope \
        --rotary-percent 1.0 \
        --rotary-base 1000000 \
        --swiglu \
        --attention-dropout 0.0 \
        --hidden-dropout 0.0 \
        --tensor-model-parallel-size 4 \
        --pipeline-model-parallel-size 1 \
        --group-query-attention \
        --num-query-groups 4 \
        --num-layers 28 \
        --hidden-size 3584 \
        --ffn-hidden-size 18944 \
        --add-qkv-bias \
        --num-attention-heads 28 \
        --max-position-embeddings 32768  \
        --no-masked-softmax-fusion \
        --load ${MODEL_PATH} \
        --tokenizer-type MultimodalTokenizer \
        --tokenizer-model Qwen/Qwen2.5-7B-Instruct \
        --tokenizer-prompt-format qwen2p5 \
        --bf16 \
        --micro-batch-size 1 \
        --seq-length ${SEQ_LEN} \
        --decoder-seq-length ${DECODER_SEQ_LEN} \
        --out-seq-length 128 \
        --temperature 1.0 \
        --img-h 448 \
        --img-w 448 \
        --patch-dim 14 \
        --seed 153 \
        --top_k 1 \
        --no-load-rng \
        --no-load-optim \
        --input-image-path ${INPUT_IMAGE_PATH} \
        --num-partitions ${NUM_PARTITIONS} \
        --partition-id ${PARTITION_ID} \
        --output-path ${OUTPUT_PATH} \
        --gt-path ${GROUNDTRUTH_PATH} \
        --task ${TASK} \
        ${EXTRA_ARGS} \
        --special-tokens "<image>" "<img>" "</img>" \
        --vision-model-type internvit \
        --num-frames ${NUM_FRAMES} \
        --ckpt-format torch
done


================================================
FILE: examples/multimodal/nvlm/run_text_generation_qwen25_7b_siglip.sh
================================================
#!/bin/bash

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NVTE_APPLY_QK_LAYER_SCALING=0
export TOKENIZERS_PARALLELISM="false"

INPUT_IMAGE_PATH="placeholder"
GROUNDTRUTH_PATH="placeholder"

while [[ $# -gt 0 ]]; do
    case $1 in
        -i|--input-image-path)
            INPUT_IMAGE_PATH="$2"
            shift
            shift
            ;;
        -o|--output-path)
            OUTPUT_PATH="$2"
            shift
            shift
            ;;
        -m|--model-path)
            MODEL_PATH="$2"
            shift
            shift
            ;;
        -t|--task)
            TASK="$2"
            shift
            shift
            ;;
        -g|--gt-path)
            GROUNDTRUTH_PATH="$2"
            shift
            shift
            ;;
        -*|--*)
            echo "Invalid option $1"
            exit 1
            ;;
    esac
done

# Please modify these as needed.
NUM_PARTITIONS=0
START=0
END=0


SEQ_LEN=256
DECODER_SEQ_LEN=8192
EXTRA_ARGS=" --pixel-shuffle --use-tiling --max-num-tiles 12 --use-thumbnail"

for PARTITION_ID in $( eval echo {$START..$END} )
do
    torchrun --nproc_per_node 8 examples/multimodal/run_text_generation.py \
        --attention-softmax-in-fp32 \
        --transformer-impl transformer_engine \
        --use-te \
        --use-checkpoint-args \
        --normalization RMSNorm \
        --norm-epsilon 1e-06 \
        --language-model-type=qwen2.5_7B \
        --untie-embeddings-and-output-weights \
        --disable-bias-linear \
        --position-embedding-type rope \
        --rotary-percent 1.0 \
        --rotary-base 1000000 \
        --swiglu \
        --attention-dropout 0.0 \
        --hidden-dropout 0.0 \
        --tensor-model-parallel-size 4 \
        --pipeline-model-parallel-size 1 \
        --group-query-attention \
        --num-query-groups 4 \
        --num-layers 28 \
        --hidden-size 3584 \
        --ffn-hidden-size 18944 \
        --add-qkv-bias \
        --num-attention-heads 28 \
        --max-position-embeddings 32768  \
        --no-masked-softmax-fusion \
        --load ${MODEL_PATH} \
        --tokenizer-type MultimodalTokenizer \
        --tokenizer-model Qwen/Qwen2.5-7B-Instruct \
        --tokenizer-prompt-format qwen2p5 \
        --bf16 \
        --micro-batch-size 1 \
        --seq-length ${SEQ_LEN} \
        --decoder-seq-length ${DECODER_SEQ_LEN} \
        --out-seq-length 128 \
        --temperature 1.0 \
        --img-h 448 \
        --img-w 448 \
        --patch-dim 14 \
        --seed 153 \
        --top_k 1 \
        --no-load-rng \
        --no-load-optim \
        --input-image-path ${INPUT_IMAGE_PATH} \
        --num-partitions ${NUM_PARTITIONS} \
        --partition-id ${PARTITION_ID} \
        --output-path ${OUTPUT_PATH} \
        --gt-path ${GROUNDTRUTH_PATH} \
        --task ${TASK} \
        ${EXTRA_ARGS} \
        --special-tokens "<image>" "<img>" "</img>" \
        --vision-model-type siglip \
        --ckpt-format torch
done


================================================
FILE: examples/multimodal/nvlm/run_text_generation_yi_34b_internvit_6b.sh
================================================
#!/bin/bash

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NVTE_APPLY_QK_LAYER_SCALING=0

INPUT_IMAGE_PATH="placeholder"
GROUNDTRUTH_PATH="placeholder"

USE_TILING=0
USE_PIXEL_SHUFFLE_ONLY=0

while [[ $# -gt 0 ]]; do
    case $1 in
        --input-image-path)
            INPUT_IMAGE_PATH="$2"
            shift
            shift
            ;;
        -o|--output-path)
            OUTPUT_PATH="$2"
            shift
            shift
            ;;
        -m|--model-path)
            MODEL_PATH="$2"
            shift
            shift
            ;;
        --task)
            TASK="$2"
            shift
            shift
            ;;
        -g|--gt-path)
            GROUNDTRUTH_PATH="$2"
            shift
            shift
            ;;
        --use-tiling)
            USE_TILING=1
            shift
            shift
            ;;
        --use-pixel-shuffle-only)
            USE_PIXEL_SHUFFLE_ONLY=1
            shift
            shift
            ;;
        -*|--*)
            echo "Invalid option $1"
            exit 1
            ;;
    esac
done

# Please modify these as needed.
NUM_PARTITIONS=0
START=0
END=0

SEQ_LEN=1024     # Image embeddings sequence length.
DECODER_SEQ_LEN=8192    # Language model sequence length.
MAX_POS_EMBED=8192

# Additional arguments.
EXTRA_ARGS=""

if [[ $USE_TILING -eq 1 ]]; then
    EXTRA_ARGS+=" --pixel-shuffle --use-tiling --max-num-tiles 6 --use-thumbnail --use-tile-tags"
    SEQ_LEN=261     # Image embeddings sequence length (256 image embeddings + 5 tile tag embeddings).
fi

if [[ $USE_PIXEL_SHUFFLE_ONLY -eq 1 ]]; then
    EXTRA_ARGS+=" --pixel-shuffle"
    SEQ_LEN=256
fi

for PARTITION_ID in $( eval echo {$START..$END} )
do
    torchrun --nproc_per_node 8 examples/multimodal/run_text_generation.py \
        --attention-softmax-in-fp32 \
        --no-masked-softmax-fusion \
        --swiglu \
        --num-layers 60 \
        --hidden-size 7168 \
        --normalization RMSNorm \
        --num-attention-heads 56 \
        --exit-on-missing-checkpoint \
        --group-query-attention \
        --num-query-groups 8 \
        --ffn-hidden-size 20480 \
        --load ${MODEL_PATH} \
        --seq-length ${SEQ_LEN} \
        --decoder-seq-length ${DECODER_SEQ_LEN} \
        --max-position-embeddings ${MAX_POS_EMBED} \
        --tokenizer-type MultimodalTokenizer \
        --tokenizer-model NousResearch/Nous-Hermes-2-Yi-34B \
        --tokenizer-prompt-format nvlm-yi-34b \
        --vocab-size 64000 \
        --make-vocab-size-divisible-by 1 \
        --position-embedding-type rope \
        --rotary-percent 1.0 \
        --rotary-base 5000000 \
        --disable-bias-linear \
        --tensor-model-parallel-size 8 \
        --pipeline-model-parallel-size 1 \
        --language-model-type yi-34b \
        --vision-model-type internvit \
        --micro-batch-size 1 \
        --attention-dropout 0.0 \
        --hidden-dropout 0.0 \
        --bf16 \
        --freeze-LM \
        --freeze-ViT \
        --img-h 448 \
        --img-w 448 \
        --patch-dim 14 \
        --use-te \
        --transformer-impl transformer_engine \
        --use-checkpoint-args \
        --out-seq-length 16 \
        --temperature 1.0 \
        --patch-dim 14 \
        --seed 1234 \
        --top_k 1 \
        --no-load-rng \
        --no-load-optim \
        --num-partitions ${NUM_PARTITIONS} \
        --partition-id ${PARTITION_ID} \
        --output-path ${OUTPUT_PATH} \
        --gt-path ${GROUNDTRUTH_PATH} \
        --disable-vision-class-token \
        --input-image-path ${INPUT_IMAGE_PATH} \
        --gt-path ${GROUNDTRUTH_PATH} \
        ${EXTRA_ARGS} \
        --task ${TASK} \
        --image-tag-type nvlm \
        --ckpt-format torch
done


================================================
FILE: examples/multimodal/nvlm/sft_34b_internvit.sh
================================================
#!/bin/bash

# Your SBATCH commands here if using SLURM.

# Please launch this script from megatron-lm root.

# Train a multimodal model.

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NCCL_ALGO=^NVLS
export TOKENIZERS_PARALLELISM="false"


DEBUG=0

if [[ $BATCH -eq 0 ]]; then
    DATETIME=`date +'%y-%m-%d-%H-%M-%S'`
    MODEL_NAME="mcore-nous-yi34b-internvit-mlp-sft-${DATETIME}"
else
    MODEL_NAME="mcore-nous-yi34b-internvit-mlp-sft"
fi

WORKSPACE="<some dir>"
SOURCE=`pwd`
OUTPUT_BASE="${WORKSPACE}/output"
OUTPUT="${OUTPUT_BASE}/${MODEL_NAME}"

FINETUNE_DIR=${OUTPUT}/checkpoints
LOGS_DIR="${OUTPUT}/logs"
TENSORBOARD_DIR="${OUTPUT}/tensorboard"

LOAD_NAME="mcore-nous-yi34b-internvit-mlp"  # From pretraining
CHECKPOINT_DIR="${WORKSPACE}/output/${LOAD_NAME}/checkpoints"

DATA_TRAIN="${SOURCE}/examples/multimodal/nvlm/sft_blend.yaml"


if [[ $DEBUG -eq 1 ]]; then
    MBZ=1
    BZ=1
    NW=0
    LI=1
    AD=0.0
    HD=0.0
    ALLOW_NONDETERMINISTIC=1

    # Can run out of GPU memory in interactive memory without this.
    # This is just for interactive testing purposes. Do not use for proper training.
    EXTRA_ARGS=" --freeze-LM"
else
    MBZ=1
    BZ=128
    NW=2
    LI=5
    AD=0.0
    HD=0.0
    ALLOW_NONDETERMINISTIC=1

    EXTRA_ARGS=""
fi

SEQ_LEN=261     # Image embeddings sequence length (256 image embeddings + 5 tile tag embeddings).
DECODER_SEQ_LEN=3200    # Language model sequence length.
MAX_POS_EMBED=3200

OPTIONS=" \
    --swiglu \
    --use-distributed-optimizer \
    --num-workers ${NW} \
    --num-layers 60 \
    --hidden-size 7168 \
    --normalization RMSNorm \
    --num-attention-heads 56 \
    --exit-duration-in-mins 230 \
    --group-query-attention \
    --num-query-groups 8 \
    --ffn-hidden-size 20480 \
    --seq-length ${SEQ_LEN} \
    --decoder-seq-length ${DECODER_SEQ_LEN} \
    --max-position-embeddings ${MAX_POS_EMBED} \
    --tokenizer-type MultimodalTokenizer \
    --tokenizer-model NousResearch/Nous-Hermes-2-Yi-34B \
    --tokenizer-prompt-format nvlm-yi-34b \
    --vocab-size 64000 \
    --make-vocab-size-divisible-by 1 \
    --position-embedding-type rope \
    --rotary-percent 1.0 \
    --rotary-base 5000000 \
    --disable-bias-linear \
    --tensor-model-parallel-size 8 \
    --language-model-type yi-34b \
    --vision-model-type internvit \
    --micro-batch-size ${MBZ} \
    --global-batch-size ${BZ} \
    --train-samples 30000000 \
    --lr-decay-samples 25600000 \
    --lr-warmup-samples 83200 \
    --lr 2e-6 \
    --min-lr 2.5e-7 \
    --lr-decay-style cosine \
    --split 100,0,0 \
    --clip-grad 10 \
    --weight-decay 0.1 \
    --adam-beta1 0.9 \
    --adam-beta2 0.95 \
    --init-method-std 0.014 \
    --attention-dropout ${AD} \
    --hidden-dropout ${HD} \
    --untie-embeddings-and-output-weights \
    --eod-mask-loss \
    --bf16 \
    --tensorboard-dir=${TENSORBOARD_DIR} \
    --freeze-ViT \
    --img-h 448 \
    --img-w 448 \
    --patch-dim 14 \
    --data-path ${DATA_TRAIN} \
    --dataloader-type external \
    --dataloader-save ${FINETUNE_DIR}/dataloader \
    --prompt-path ${SOURCE}/examples/multimodal/nvlm/nvlm_prompts.json \
    --log-interval ${LI} \
    --load ${FINETUNE_DIR} \
    --save ${FINETUNE_DIR} \
    --pretrained-checkpoint ${CHECKPOINT_DIR} \
    --save-interval 5000 \
    --eval-interval 500 \
    --eval-iters 10 \
    --log-params-norm \
    --log-num-zeros-in-grad \
    ${EXTRA_ARGS} \
    --disable-vision-class-token \
    --use-te \
    --ckpt-format torch \
    --pixel-shuffle \
    --use-tiling \
    --max-num-tiles 6 \
    --use-thumbnail \
    --use-tile-tags \
    --image-tag-type nvlm
    "

export NVTE_ALLOW_NONDETERMINISTIC_ALGO=${ALLOW_NONDETERMINISTIC}
export NVTE_APPLY_QK_LAYER_SCALING=0

# Interactive or batch mode
if [[ $BATCH -eq 0 ]]; then
    torchrun --nproc_per_node 8 examples/multimodal/train.py ${OPTIONS}
else
    run_cmd="python -u ${SOURCE}/examples/multimodal/train.py ${OPTIONS}"

    DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`

    srun -l --verbose \
    --container-image <path to docker image> \
    --container-mounts "<some mount>" \
    --output=${LOGS_DIR}/%x_%j_$DATETIME.log \
    sh -c "${run_cmd}"

    set +x
fi


================================================
FILE: examples/multimodal/nvlm/sft_blend.yaml
================================================
__module__: megatron.energon
__class__: Metadataset
splits:
  train:
    datasets:
      - weight: 0.01  # # Datasets are weighted according to their size. Weights sum up to 1.
        path: <path to coco>
        subflavors:
          augmentation: False

      - weight: 0.02
        path: <path to clevr-math dataset>
        subflavors:
          augmentation: False

      # Please refer to Table 6 in https://arxiv.org/pdf/2409.11402 for full list of SFT datasets.
      # Please refer to https://nvidia.github.io/Megatron-Energon/data_prep.html on preparing datasets in the Megatron Energon format.
  val:
    datasets:
      - weight: 1.
        path: <path to validation dataset>
        subflavors:
          augmentation: False


================================================
FILE: examples/multimodal/nvlm/sft_qwen20_72b_internvit_6b.sh
================================================
#!/bin/bash

# Your SBATCH commands here if using SLURM.

# Please launch this script from megatron-lm root.

# Train a multimodal model.

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NCCL_ALGO=^NVLS
export TOKENIZERS_PARALLELISM="false"

DEBUG=0

if [[ $BATCH -eq 0 ]]; then
    DATETIME=`date +'%y-%m-%d-%H-%M-%S'`
    MODEL_NAME="mcore-qwen20-72b-internvit-sft-${DATETIME}"
else
    MODEL_NAME="mcore-qwen20-72b-internvit-sft"
fi

WORKSPACE="<some dir>"
SOURCE=`pwd`
OUTPUT_BASE="${WORKSPACE}/output"
OUTPUT="${OUTPUT_BASE}/${MODEL_NAME}"

FINETUNE_DIR="${OUTPUT}/checkpoints"
LOGS_DIR="${OUTPUT}/logs"
TENSORBOARD_DIR="${OUTPUT}/tensorboard"

# From pretraining. The pretraining checkpoint must be manually split to 4 pipeline parallel stages.
# Please refer to README.md and run examples/multimodal/nvlm/pp_checkpoint_converter.py.
LOAD_NAME="mcore-qwen20-72b-internvit-pp4"

CHECKPOINT_DIR="${WORKSPACE}/output/${LOAD_NAME}/checkpoints"

DATA_TRAIN="${SOURCE}/examples/multimodal/nvlm/sft_blend.yaml"

if [[ $DEBUG -eq 1 ]]; then
    MBZ=1
    BZ=1
    NW=0
    AD=0.0
    HD=0.0
    LI=1
    # This is just for interactive testing purposes. Do not use for proper training.
    EXTRA_ARGS="--freeze-LM"
    ALLOW_NONDETERMINISTIC=1
else
    MBZ=1
    BZ=256
    NW=8
    AD=0.0
    HD=0.0
    LI=5
    EXTRA_ARGS=""
    ALLOW_NONDETERMINISTIC=1
fi

SEQ_LEN=261     # Image embeddings sequence length (256 image embeddings + 5 tile tag embeddings).
DECODER_SEQ_LEN=3200    # Language model sequence length.
MAX_POS_EMBED=8192

OPTIONS=" \
    --use-checkpoint-args \
    --exit-duration-in-mins 230 \
    --disable-bias-linear \
    --tokenizer-type MultimodalTokenizer \
    --tokenizer-model Qwen/Qwen2-72B-Instruct \
    --tokenizer-prompt-format qwen2p0 \
    --transformer-impl transformer_engine \
    --normalization RMSNorm \
    --norm-epsilon 1e-06 \
    --group-query-attention \
    --num-query-groups 8 \
    --no-masked-softmax-fusion \
    --attention-softmax-in-fp32 \
    --attention-dropout ${AD} \
    --hidden-dropout ${HD} \
    --untie-embeddings-and-output-weights \
    --position-embedding-type rope \
    --rotary-percent 1.0 \
    --rotary-base 1000000 \
    --swiglu \
    --tensor-model-parallel-size 8  \
    --pipeline-model-parallel-size 4 \
    --num-layers 80 \
    --hidden-size 8192 \
    --ffn-hidden-size 29568 \
    --add-qkv-bias \
    --num-attention-heads 64  \
    --use-distributed-optimizer \
    --use-te \
    --num-workers ${NW} \
    --seq-length ${SEQ_LEN} \
    --decoder-seq-length ${DECODER_SEQ_LEN} \
    --max-position-embeddings 32768 \
    --train-samples 122880000 \
    --lr-decay-samples 25600000 \
    --lr-warmup-samples 83200 \
    --micro-batch-size ${MBZ} \
    --global-batch-size ${BZ} \
    --lr 2e-6 \
    --min-lr 2.5e-7 \
    --lr-decay-style cosine \
    --log-interval ${LI} \
    --eval-iters 10 \
    --eval-interval 500 \
    --data-path ${DATA_TRAIN} \
    --prompt-path ${SOURCE}/examples/multimodal/nvlm/nvlm_prompts.json \
    --save-interval 10000 \
    --save ${FINETUNE_DIR} \
    --load ${FINETUNE_DIR} \
    --dataloader-save ${FINETUNE_DIR}/dataloader \
    --pretrained-checkpoint ${CHECKPOINT_DIR} \
    --split 100,0,0 \
    --clip-grad 10.0 \
    --weight-decay 0.1 \
    --adam-beta1 0.9 \
    --adam-beta2 0.95 \
    --init-method-std 0.014 \
    --bf16 \
    --eod-mask-loss \
    --freeze-ViT \
    --patch-dim 14 \
    --img-h 448 \
    --img-w 448 \
    --dataloader-type external \
    --tensorboard-dir ${TENSORBOARD_DIR} \
    --language-model-type qwen2.0_72B \
    ${EXTRA_ARGS} \
    --vision-model-type internvit \
    --disable-vision-class-token \
    --log-params-norm \
    --log-num-zeros-in-grad \
    --ckpt-format torch \
    --pixel-shuffle \
    --use-tiling \
    --max-num-tiles 6 \
    --use-thumbnail \
    --use-tile-tags \
    --image-tag-type nvlm
"


export NVTE_APPLY_QK_LAYER_SCALING=0
export NVTE_ALLOW_NONDETERMINISTIC_ALGO=${ALLOW_NONDETERMINISTIC}

# Interactive or batch mode
if [[ $BATCH -eq 0 ]]; then
    torchrun --nproc_per_node 8 examples/multimodal/train.py ${OPTIONS}
else
    run_cmd="python -u ${SOURCE}/examples/multimodal/train.py ${OPTIONS}"

    DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`

    srun -l --verbose \
    --container-image <path to docker image> \
    --container-mounts "<some mount>" \
    --output=${LOGS_DIR}/%x_%j_$DATETIME.log \
    sh -c "${run_cmd}"

    set +x
fi


================================================
FILE: examples/multimodal/nvlm/sft_qwen2p5_7b_internvit_6b_video.sh
================================================
#!/bin/bash

# Your SBATCH commands here if using SLURM.

# Please launch this script from megatron-lm root.

# Train a multimodal model.

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NCCL_ALGO=^NVLS
export TOKENIZERS_PARALLELISM=false

USER=$SLURM_JOB_USER

# Auto-detect batch or interactive mode.
which srun
BATCH=$((1-$?))

DEBUG=0

if [[ $BATCH -eq 0 ]]; then
    DATETIME=`date +'%y-%m-%d-%H-%M-%S'`
    MODEL_NAME="qwen2.5-7B-internvit-video-sft-nvlm-${DATETIME}"
else
    MODEL_NAME="qwen2.5-7B-internvitp-video-sft-nvlm"
    DEBUG=0
fi

WORKSPACE="<some dir>"
SOURCE=`pwd`
OUTPUT_BASE="${WORKSPACE}/output"
OUTPUT="${OUTPUT_BASE}/${MODEL_NAME}"

FINETUNE_DIR="${OUTPUT}/checkpoints"
LOGS_DIR="${OUTPUT}/logs"
TENSORBOARD_DIR="${OUTPUT}/tensorboard"

# From pretraining. The pretraining checkpoint should have tensor parallel size to 4.
LOAD_NAME="mcore-qwen2p5-7b-internvit-tp4"

CHECKPOINT_DIR="${WORKSPACE}/output/${LOAD_NAME}/checkpoints"

DATA_TRAIN="${SOURCE}/examples/multimodal/nvlm/sft_blend.yaml"

if [[ $DEBUG -eq 1 ]]; then
    MBZ=1
    BZ=1
    NW=0
    AD=0.0
    HD=0.0
    LI=1
    # This is just for interactive testing purposes. Do not use for proper training.
    EXTRA_ARGS="--freeze-LM"
    ALLOW_NONDETERMINISTIC=1
else
    MBZ=1
    BZ=256
    NW=8
    AD=0.0
    HD=0.0
    LI=5
    EXTRA_ARGS=""
    ALLOW_NONDETERMINISTIC=1
fi

USE_TILING=1
SEQ_LEN=1024
DECODER_SEQ_LEN=16384
MAX_POS_EMBED=32768
TRAIN_SAMPLES=6602173
WARMUP_SAMPLES=198065


if [[ $BATCH -eq 0 ]]; then
    # Runs out of GPU memory in interactive memory without this.
    EXTRA_ARGS+="--freeze-LM"
fi

if [[ $USE_TILING -eq 1 ]]; then
    EXTRA_ARGS+=" --pixel-shuffle --use-tiling --max-num-tiles 12 --use-thumbnail"
    SEQ_LEN=256
fi


OPTIONS=" \
    --swiglu \
    --use-distributed-optimizer \
    --num-workers ${NW} \
    --num-layers 28 \
    --hidden-size 3584 \
    --norm-epsilon 1e-06 \
    --normalization RMSNorm \
    --num-attention-heads 28 \
    --exit-duration-in-mins 110 \
    --group-query-attention \
    --num-query-groups 4 \
    --ffn-hidden-size 18944 \
    --add-qkv-bias \
    --seq-length ${SEQ_LEN} \
    --decoder-seq-length ${DECODER_SEQ_LEN} \
    --max-position-embeddings ${MAX_POS_EMBED} \
    --dataloader-seq-length ${DECODER_SEQ_LEN} \
    --tokenizer-type MultimodalTokenizer \
    --tokenizer-model Qwen/Qwen2.5-7B-Instruct \
    --tokenizer-prompt-format qwen2p5 \
    --pixel-shuffle \
    --position-embedding-type rope \
    --rotary-percent 1.0 \
    --rotary-base 1000000 \
    --disable-bias-linear \
    --pipeline-model-parallel-size 1 \
    --tensor-model-parallel-size 4 \
    --language-model-type qwen2.5_7B \
    --vision-model-type internvit \
    --micro-batch-size ${MBZ} \
    --global-batch-size ${BZ} \
    --lr 2e-6 \
    --min-lr 2.5e-7 \
    --train-samples ${TRAIN_SAMPLES} \
    --lr-warmup-samples ${WARMUP_SAMPLES} \
    --lr-decay-style cosine \
    --clip-grad 10 \
    --weight-decay 0.1 \
    --adam-beta1 0.9 \
    --adam-beta2 0.95 \
    --init-method-std 0.014 \
    --attention-dropout ${AD} \
    --hidden-dropout ${HD} \
    --eod-mask-loss \
    --bf16 \
    --tensorboard-dir ${TENSORBOARD_DIR} \
    --img-h 448 \
    --img-w 448 \
    --patch-dim 14 \
    --data-path ${DATA_TRAIN} \
    --dataloader-type external \
    --split 100,0,0 \
    --prompt-path ${SOURCE}/examples/multimodal/nvlm/nvlm_prompts.json \
    --log-interval ${LI} \
    --save-interval 500 \
    --eval-interval 500 \
    --eval-iters 10 \
    --log-params-norm \
    --log-num-zeros-in-grad \
    ${EXTRA_ARGS} \
    --save ${FINETUNE_DIR} \
    --load ${FINETUNE_DIR} \
    --pretrained-checkpoint ${CHECKPOINT_DIR} \
    --distributed-timeout-minutes 60 \
    --allow-missing-vision-projection-checkpoint \
    --dataloader-save ${FINETUNE_DIR}/dataloader \
    --disable-vision-class-token \
    --use-te \
    --ckpt-format torch \
    --num-frames 32 \
    --use-checkpoint-args \
    --image-tag-type internvl \
    --recompute-granularity full \
    --recompute-method block \
    --recompute-num-layers 28 \
    --recompute-vision \
"


export NVTE_ALLOW_NONDETERMINISTIC_ALGO=${ALLOW_NONDETERMINISTIC}
export NVTE_APPLY_QK_LAYER_SCALING=0

# Interactive or batch mode
if [[ $BATCH -eq 0 ]]; then
    torchrun --nproc_per_node 8 examples/multimodal/train.py ${OPTIONS}
else
    run_cmd="python -u ${SOURCE}/examples/multimodal/train.py ${OPTIONS}"

    DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`

    srun -l --verbose \
    --container-image <path to docker image> \
    --container-mounts "<some mount>" \
    --output=${LOGS_DIR}/%x_%j_$DATETIME.log \
    sh -c "${run_cmd}"

    set +x
fi


================================================
FILE: examples/multimodal/pretrain_dataset.yaml
================================================
__module__: megatron.energon
__class__: Metadataset
splits:
  train:
    datasets:
      - weight: 1.
        path: <path_to_pretraining_dataset_in_energon_format>
        subflavors:
          augmentation: false
  val:
    datasets:
      - weight: 1.
        path: <path_to_pretraining_dataset_in_energon_format>
        subflavors:
          augmentation: false


================================================
FILE: examples/multimodal/pretrain_mistral_clip.sh
================================================
#!/bin/bash
# Pretrain a multimodal model.

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
MODEL_NAME="mcore-llava-mistral-7b-instruct-clip336-pretraining"

# Check that the user has set an output path for model checkpoints.
if [[ -z $WORKSPACE ]]; then
    echo "Please set WORKSPACE for storing your model checkpoints."
    exit 1
fi

SOURCE=`pwd`
OUTPUT_BASE="${WORKSPACE}/output"
OUTPUT="${OUTPUT_BASE}/${MODEL_NAME}"

FINETUNE_DIR=${OUTPUT}/checkpoints
LOGS_DIR="${OUTPUT}/logs"
TENSORBOARD_DIR="${OUTPUT}/tensorboard"

export TRITON_CACHE_DIR="${WORKSPACE}/triton-cache/"
# The following patch to the Triton cache manager is needed for Triton version <= 3.1
# export TRITON_CACHE_MANAGER="megatron.core.ssm.triton_cache_manager:ParallelFileCacheManager"

if [[ -z $LOAD_NAME ]]; then
    echo "Please set LOAD_NAME for input model name."
    exit 1
fi

CHECKPOINT_DIR="${WORKSPACE}/${LOAD_NAME}/checkpoints"

DATA_TRAIN="${SOURCE}/examples/multimodal/pretrain_dataset.yaml"

DEBUG=0
if [[ $DEBUG -eq 1 ]]; then
    BZ=32
    NW=2
    HD=0.0
    LI=1
    EXTRA_ARGS=""
    NONDETERMINISTIC_ATTN=1
else
    BZ=256
    NW=2
    HD=0.1
    LI=10
    EXTRA_ARGS=""
    NONDETERMINISTIC_ATTN=1
fi

OPTIONS=" \
    --apply-layernorm-1p \
    --attention-softmax-in-fp32 \
    --use-checkpoint-args \
    --no-use-tokenizer-model-from-checkpoint-args \
    --use-distributed-optimizer \
    --transformer-impl transformer_engine \
    --use-te \
    --normalization RMSNorm \
    --group-query-attention \
    --num-query-groups 8 \
    --no-masked-softmax-fusion \
    --num-workers ${NW} \
    --exit-duration-in-mins 230 \
    --use-flash-attn \
    --untie-embeddings-and-output-weights \
    --disable-bias-linear \
    --position-embedding-type rope \
    --rotary-percent 1.0 \
    --rotary-base 1000000 \
    --swiglu \
    --attention-dropout 0.0 \
    --hidden-dropout ${HD} \
    --tensor-model-parallel-size 4 \
    --pipeline-model-parallel-size 1 \
    --num-layers 32 \
    --hidden-size 4096 \
    --num-attention-heads 32 \
    --seq-length 576 \
    --decoder-seq-length 1024 \
    --max-position-embeddings 4096 \
    --ffn-hidden-size 14336 \
    --train-iters 20000 \
    --micro-batch-size 1 \
    --global-batch-size ${BZ} \
    --lr-decay-iters 20000 \
    --lr-warmup-fraction .01 \
    --lr 0.00015 \
    --min-lr 1.0e-5 \
    --lr-decay-style cosine \
    --log-interval ${LI} \
    --eval-iters 10 \
    --eval-interval 1000 \
    --tokenizer-type MultimodalTokenizer \
    --tokenizer-model mistralai/Mistral-7B-Instruct-v0.3 \
    --tokenizer-prompt-format mistral \
    --data-path ${DATA_TRAIN} \
    --prompt-path ${SOURCE}/examples/multimodal/manual_prompts.json \
    --save-interval 1000 \
    --save ${FINETUNE_DIR} \
    --load ${FINETUNE_DIR} \
    --dataloader-save ${FINETUNE_DIR}/dataloader \
    --pretrained-checkpoint ${CHECKPOINT_DIR} \
    --split 100,0,0 \
    --clip-grad 1.0 \
    --weight-decay 1e-2 \
    --adam-beta1 0.9 \
    --adam-beta2 0.95 \
    --init-method-std 0.014 \
    --log-params-norm \
    --log-num-zeros-in-grad \
    --bf16 \
    --eod-mask-loss \
    --freeze-LM \
    --freeze-ViT \
    --patch-dim 14 \
    --img-h 336 \
    --img-w 336 \
    --dataloader-type external \
    --tensorboard-dir ${TENSORBOARD_DIR} \
    --language-model-type=mistral_7b \
    --disable-vision-class-token \
    ${EXTRA_ARGS} \
    --distributed-timeout-minutes 60 \
    --allow-missing-vision-projection-checkpoint \
    --ckpt-format torch
"

export NVTE_APPLY_QK_LAYER_SCALING=0
export NVTE_ALLOW_NONDETERMINISTIC_ALGO=${NONDETERMINISTIC_ATTN}

torchrun --nproc_per_node 8 examples/multimodal/train.py ${OPTIONS}


================================================
FILE: examples/multimodal/radio/radio_g.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
from functools import partial

import torch

from examples.multimodal.layer_scaling import (
    LayerScalingTransformerLayer,
    get_bias_dropout_add_layer_scaling,
)
from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
from megatron.core.transformer.dot_product_attention import DotProductAttention
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.identity_op import IdentityOp
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules
from megatron.core.typed_torch import not_none
from megatron.core.extensions.transformer_engine import HAVE_TE

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import (
        TEColumnParallelLinear,
        TEDotProductAttention,
        TELayerNormColumnParallelLinear,
        TENorm,
        TERowParallelLinear,
    )
else:
    (
        TEColumnParallelLinear,
        TEDotProductAttention,
        TELayerNormColumnParallelLinear,
        TENorm,
        TERowParallelLinear,
    ) = (None, None, None, None, None)

try:
    import apex

    from megatron.core.fusions.fused_layer_norm import FusedLayerNorm

    HAVE_APEX = True
    LNImpl = FusedLayerNorm
except ImportError:
    import warnings

    from megatron.core.transformer.torch_norm import WrappedTorchNorm

    warnings.warn(f'Apex is not installed. Falling back to Torch Norm')
    LNImpl = WrappedTorchNorm


def get_mlp_module_spec(use_te: bool = True) -> ModuleSpec:
    # Dense MLP w/ or w/o TE modules.
    return ModuleSpec(
        module=MLP,
        submodules=MLPSubmodules(
            linear_fc1=not_none(TEColumnParallelLinear) if use_te else ColumnParallelLinear,
            linear_fc2=not_none(TERowParallelLinear) if use_te else RowParallelLinear,
        ),
    )


def get_norm_mlp_module_spec_te() -> ModuleSpec:
    return ModuleSpec(
        module=MLP,
        submodules=MLPSubmodules(
            linear_fc1=not_none(TELayerNormColumnParallelLinear),
            linear_fc2=not_none(TERowParallelLinear),
        ),
    )


def get_radio_g_layer_spec(normalization) -> ModuleSpec:
    attn_mask_type = AttnMaskType.no_mask
    if normalization == "LayerNorm":
        norm = LNImpl
    elif normalization == "RMSNorm":
        if HAVE_TE:
            norm = TENorm
        else:
            assert is_torch_min_version("2.4.0"), "Torch version >= 2.4.0 is required for RMSNorm"
            if HAVE_APEX:
                warnings.warn(f'Apex does not support RMSNorm. Falling back to Torch Norm')
            norm = WrappedTorchNorm
    else:
        raise RuntimeError("unknown normalization", normalization)

    mlp = get_mlp_module_spec(use_te=False)  # doesn't include norm.

    return ModuleSpec(
        module=LayerScalingTransformerLayer,
        submodules=TransformerLayerSubmodules(
            input_layernorm=not_none(norm),
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": attn_mask_type},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=ColumnParallelLinear,
                    core_attention=DotProductAttention,
                    linear_proj=RowParallelLinear,
                    q_layernorm=IdentityOp,
                    k_layernorm=IdentityOp,
                ),
            ),
            self_attn_bda=get_bias_dropout_add_layer_scaling,
            pre_mlp_layernorm=not_none(norm),
            mlp=mlp,
            mlp_bda=get_bias_dropout_add_layer_scaling,
        ),
    )


def get_radio_g_layer_spec_te() -> ModuleSpec:
    attn_mask_type = AttnMaskType.no_mask

    mlp = get_norm_mlp_module_spec_te()
    return ModuleSpec(
        module=LayerScalingTransformerLayer,
        submodules=TransformerLayerSubmodules(
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": attn_mask_type},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=not_none(TELayerNormColumnParallelLinear),
                    core_attention=not_none(TEDotProductAttention),
                    linear_proj=TERowParallelLinear,
                    q_layernorm=IdentityOp,
                    k_layernorm=IdentityOp,
                ),
            ),
            self_attn_bda=get_bias_dropout_add_layer_scaling,
            pre_mlp_layernorm=IdentityOp,
            mlp=mlp,
            mlp_bda=get_bias_dropout_add_layer_scaling,
        ),
    )


================================================
FILE: examples/multimodal/run_text_generation.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
"""Generate text using a vision language model."""
import json
import logging
import os
import sys
from functools import partial
from typing import List, Dict

# Add megatron to the path.
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
)

import torch
import yaml
from config import EvaluationConfig
from evaluation.evaluation_datasets import get_evaluation_dataset
from model import model_provider
from multimodal_args import add_multimodal_extra_args

from megatron.core import parallel_state
from megatron.core.enums import ModelType
from megatron.core.models.multimodal.llava_model import IMAGE_TOKEN
from megatron.core.models.vision.clip_vit_model import get_num_image_embeddings
from megatron.inference.text_generation.api import generate_and_post_process
from megatron.inference.text_generation.forward_step import ForwardStep
from megatron.core.inference.contexts import StaticInferenceContext
from megatron.core.inference.sampling_params import SamplingParams
from megatron.core.inference.engines import StaticInferenceEngine
from megatron.core.inference.inference_request import InferenceRequest, VLMInferenceRequest
from megatron.core.inference.text_generation_controllers.vlm_text_generation_controller import (
    VLMTextGenerationController,
)
from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import (
    InferenceWrapperConfig,
)
from megatron.core.inference.model_inference_wrappers.multimodal.vlm_inference_wrapper import (
    VLMInferenceWrapper,
)
from megatron.training import get_args, get_model, get_tokenizer, print_rank_0, is_last_rank
from megatron.training.checkpointing import load_checkpoint
from megatron.training.initialize import initialize_megatron


def is_first_rank():
    """First tensor and pipeline parallel rank."""
    return (
        parallel_state.is_pipeline_first_stage(ignore_virtual=True)
        and parallel_state.get_tensor_model_parallel_rank() == 0
    )


def add_text_generation_args(parser):
    """Text generation arguments."""
    group = parser.add_argument_group(title='Vision language model text generation arguments')

    group.add_argument("--temperature", type=float, default=1.0, help='Sampling temperature.')
    group.add_argument("--top_p", type=float, default=0.0, help='Top p sampling.')
    group.add_argument("--top_k", type=int, default=0, help='Top k sampling.')
    group.add_argument(
        "--out-seq-length", type=int, default=128, help='Length of the output generated text.'
    )
    group.add_argument("--output-path", type=str, help='Output file path')
    group.add_argument('--input-image-path', type=str, help="Input image directory")
    group.add_argument(
        '--num-partitions', type=int, default=0, help="Number of partitions for inputs."
    )
    group.add_argument('--partition-id', type=int, default=0, help="Partition index")
    group.add_argument("--gt-path", type=str, help="Optional ground truth file")
    group.add_argument(
        "--task",
        type=str,
        choices=[
            "captioning",
            "TextVQA",
            "VQAv2",
            "ChartQA",
            "MMMU",
            "OCRBench",
            "OCRBench_v2",
            "MathVista",
            "AI2D",
            "InfoVQA",
            "SPDocVQA",
            "RD_TableBench",
            "VideoMME",
            "PerceptionTest",
            "MotionBench",
            "PhysGameBench",
            "MVBench",
            "inference",
        ],
        help="Generation task to run",
    )
    group.add_argument(
        "--num-samples-per-partition", type=int, default=0, help="Number of samples per partition"
    )
    group.add_argument("--config-path", type=str, help="Evaluation config file to use.")

    # Add common multimodal arguments needed for e.g. building the model.
    parser = add_multimodal_extra_args(parser)

    return parser


def get_evaluation_dataloader(
    task,
    input_image_path,
    gt_path,
    img_h,
    img_w,
    use_tiling,
    max_num_tiles,
    use_thumbnail,
    num_samples_per_partition,
    num_partitions,
    partition_id,
    num_frames,
    num_workers,
    vision_model_type,
    split="validation"
):
    """Build evaluation dataset."""
    dataset = get_evaluation_dataset(
        task,
        input_image_path,
        gt_path,
        img_h,
        img_w,
        use_tiling,
        max_num_tiles,
        use_thumbnail,
        num_samples_per_partition,
        num_partitions,
        partition_id,
        num_frames,
        vision_model_type,
        split=split
    )

    dp_rank = parallel_state.get_data_parallel_rank()
    dp_world_size = parallel_state.get_data_parallel_world_size()

    sampler = torch.utils.data.DistributedSampler(
        dataset, shuffle=False, num_replicas=dp_world_size, rank=dp_rank
    )
    # TODO: Batched inference is not supported yet.
    dataloader = torch.utils.data.DataLoader(
        dataset, batch_size=None, num_workers=num_workers, sampler=sampler, pin_memory=True
    )

    return dataloader


def generate_samples(model, config: EvaluationConfig, print_output):
    """Text generation using a trained vision language model."""
    args = get_args()

    dataloader = get_evaluation_dataloader(
        config.task,
        config.input_image_path,
        config.gt_path,
        args.img_h,
        args.img_w,
        args.use_tiling,
        args.max_num_tiles,
        args.use_thumbnail,
        config.num_samples_per_partition,
        config.num_partitions,
        config.partition_id,
        args.num_frames,
        args.num_workers,
        args.vision_model_type,
        config.split
    )

    num_img_embeddings_per_tile = get_num_image_embeddings(
        args.img_h,
        args.img_w,
        args.patch_dim,
        args.vision_model_type,
        args.disable_vision_class_token,
        1,
        args.pixel_shuffle,
        args.use_tile_tags,
        args.max_num_tiles,
        args.tokenizer_prompt_format,
    )

    if args.use_mcore_inference:
        inference_wrapper_config = InferenceWrapperConfig(
            hidden_size=args.hidden_size,
            inference_batch_times_seqlen_threshold=args.inference_batch_times_seqlen_threshold,
            fp32_residual_connection=args.fp32_residual_connection,
            params_dtype=args.params_dtype,
            padded_vocab_size=args.padded_vocab_size,
        )
        inference_wrapped_model = VLMInferenceWrapper(model, inference_wrapper_config)
        tokenizer = get_tokenizer()
        controller = VLMTextGenerationController(
            inference_wrapped_model=inference_wrapped_model, tokenizer=tokenizer
        )
        inference_engine = StaticInferenceEngine(
            controller, max_batch_size=1, random_seed=args.seed, legacy=True
        )
        sampling_params = SamplingParams(
            temperature=config.temperature,
            top_k=config.top_k,
            top_p=config.top_p,
            num_tokens_to_generate=config.out_seq_length,
        )

    for idx, (imgs, num_tiles, sample_id, question, answers, metadata) in enumerate(dataloader):
        imgs = imgs.to("cuda")
        num_tiles = num_tiles.to("cuda")

        conv = get_conversation(config.task, question, metadata)

        if not args.use_mcore_inference:
            forward_step = partial(VLMForwardStep, num_img_embeddings_per_tile, imgs, num_tiles, args.decoder_seq_length)

        inference_context = StaticInferenceContext(max_batch_size=1, max_sequence_length=args.inference_max_seq_length)
        if is_first_rank():

            if args.use_mcore_inference:
                inference_request = VLMInferenceRequest(
                   request_id=inference_engine.get_new_request_id(),
                   prompt=conv,
                   prompt_tokens=controller.tokenize_prompt(controller.tokenizer, conv),
                   sampling_params=sampling_params,
                   num_img_embeddings_per_tile=num_img_embeddings_per_tile,
                   imgs=imgs,
                   num_tiles=num_tiles,
                   decoder_seq_length=args.decoder_seq_length,
                )
                results: List[InferenceRequest] = inference_engine.generate(
                    inference_requests=[inference_request]
                )

                resp_sentences = [
                    tokenizer.detokenize(result.prompt_tokens) + result.generated_text
                    for result in results
                ]
            else:
                resp_sentences, _, _, _ = generate_and_post_process(
                    model, inference_context,
                    forward_step=forward_step,
                    prompts=[conv],
                    tokens_to_generate=config.out_seq_length,
                    top_k_sampling=config.top_k,
                    top_p_sampling=config.top_p,
                    add_BOS=False,
                    temperature=config.temperature,
                    random_seed=args.seed,
                    detokenize_segments=False,
                    data_parallel=True,
            )

            for generation in resp_sentences:
                if isinstance(sample_id, torch.Tensor):
                    sample_id = sample_id.item()

                output = {"sample_id": sample_id}

                output_name = ""
                if config.task == "captioning":
                    output_name = "caption"
                elif config.task in (
                    "TextVQA",
                    "VQAv2",
                    "ChartQA",
                    "OCRBench",
                    "MathVista",
                    "AI2D",
                    "RealworldQA",
                    "MotionBench",
                    "PhysGameBench",
                    "MVBench",
                    "InfoVQA",
                    "SPDocVQA",
                    "inference",
                ):
                    output_name = "answer"
                elif config.task in ("MMMU"):
                    output_name = "text"
                elif config.task == "VideoMME":
                    output_name = "response"
                    output = question
                elif config.task in ["OCRBench_v2", "RD_TableBench"]:
                    output_name = "predict"
                else:
                    raise NotImplementedError("no output name defined for", config.task)

                prompt, generated = get_prompt_and_generated(
                    generation, args.tokenizer_prompt_format
                )
                if config.task == "VideoMME":
                    output["questions"][0][output_name] = generated
                else:
                    output["prompt"] = prompt
                    output[output_name] = generated

                if config.task in ["captioning", "RD_TableBench"]:
                    output["ground_truth"] = answers
                elif config.task in (
                    "TextVQA",
                    "VQAv2",
                    "ChartQA",
                    "OCRBench",
                    "OCRBench_v2",
                    "MathVista",
                    "AI2D",
                    "PerceptionTest",
                    "RealworldQA",
                    "MotionBench",
                    "PhysGameBench",
                    "MVBench",
                    "InfoVQA",
                    "SPDocVQA",
                    "inference",
                ):
                    if isinstance(answers, str):
                        answers = [answers]
                    output["gt_answer"] = answers

                    if len(metadata) > 0:
                        output.update(metadata)
                elif config.task == "MMMU":
                    output["prediction"] = generated
                    output.update(metadata)
                elif config.task == "VideoMME":
                    pass
                else:
                    raise NotImplementedError("no output processing defined for", config.task)

                if print_output:
                    print(output)

                yield output
                idx += 1
        else:
            if args.use_mcore_inference:
                inference_request = VLMInferenceRequest(
                   request_id=inference_engine.get_new_request_id(),
                   prompt=conv,
                   prompt_tokens=controller.tokenize_prompt(controller.tokenizer, conv),
                   sampling_params=sampling_params,
                   num_img_embeddings_per_tile=num_img_embeddings_per_tile,
                   imgs=imgs,
                   num_tiles=num_tiles,
                   decoder_seq_length=args.decoder_seq_length,
                )
                inference_engine.generate(
                    inference_requests=[inference_request]
                )
            else:
                generate_and_post_process(
                    model, inference_context, forward_step=forward_step, detokenize_segments=False, data_parallel=True
                )

            idx += 1


def get_evaluation_configs(config_path=None) -> Dict[str, EvaluationConfig]:
    """Get evaluation config(s) from a config file or command-line arguments.

    Args:
        config_path: Optional path to config file. If not provided, will check args.config_path
                    or fall back to command-line arguments.

    Returns:
        Dict[str, EvaluationConfig]: dict of configs.
    """
    args = get_args()
    configs = {}

    # Use provided config_path or fall back to args.config_path
    config_file = config_path or args.config_path

    # We check if we're trying to run a single config evals by checking for the task and output_path
    # args.
    if hasattr(args, "task") and args.task and hasattr(args, "output_path") and args.output_path:
        # Single config from args
        config = EvaluationConfig(
            task=args.task,
            temperature=args.temperature,
            top_p=args.top_p,
            top_k=args.top_k,
            out_seq_length=args.out_seq_length,
            output_path=args.output_path,
            input_image_path=args.input_image_path,
            gt_path=args.gt_path,
            num_partitions=args.num_partitions,
            partition_id=args.partition_id,
            num_samples_per_partition=args.num_samples_per_partition,
        )
        if not config.output_path:
            default_output_dir = args.output_path if args.output_path else "generated"
            os.makedirs(default_output_dir, exist_ok=True)
            config.output_path = os.path.join(default_output_dir, args.language_model_type)
        return {args.task: config}
    elif config_file:
        with open(config_file, "r") as f:
            config_data = yaml.safe_load(f)
        if 'datasets' not in config_data:
            print("Error: 'datasets' key not found in config file for batch mode.")
            sys.exit(1)
        config_dict = config_data['datasets']
        for key, value in config_dict.items():
            config = EvaluationConfig(**value)
            config.dataset = key
            if not config.output_path:
                # Use args.output_path if available, otherwise use "generated"
                default_output_dir = getattr(args, 'output_path', None) or "generated"
                os.makedirs(default_output_dir, exist_ok=True)
                config.output_path = os.path.join(default_output_dir, f"{args.language_model_type}")
            configs[key] = config
        return configs
    else:
        raise ValueError("No config file provided and no task specified.")


def get_output_path(config, dp_rank):
    """Generation output path."""

    ckpt_step = None
    try:
        args = get_args()
        ckpt_step = args.ckpt_step
    except Exception as e:
        print(f"Failed getting args: {type(e).__name__} - {e}")
    if ckpt_step is not None:
        return f"{config.output_path}-{config.task}-dprank={dp_rank}-partition={config.partition_id}-step={args.ckpt_step}.jsonl"
    else:
        return f"{config.output_path}-{config.task}-dprank={dp_rank}-partition={config.partition_id}.jsonl"


def generate_and_write_samples(model, config, print_output=True):
    """Generate text and write to an output file."""
    dp_rank = parallel_state.get_data_parallel_rank()

    if is_first_rank():
        output_path = get_output_path(config, dp_rank)
        output_file = open(output_path, "w")
        print(f"output path: {output_file.name}")

    with torch.no_grad():
        for output in generate_samples(model, config, print_output):
            if is_first_rank():
                output_file.write(json.dumps(output) + "\n")
                output_file.flush()

    if is_first_rank():
        output_file.close()

class VLMForwardStep(ForwardStep):
    """Inference forward step for a multimodal model."""

    def __init__(
        self,
        num_img_embeddings_per_tile,
        images,
        num_tiles,
        decoder_seq_length,
        model,
        inference_context,
    ):
        """Create multimodal forward step."""
        total_num_tiles = torch.sum(num_tiles).item()
        num_img_embeddings = num_img_embeddings_per_tile * total_num_tiles

        super().__init__(model, inference_context)
        self._images = images
        self._num_tiles = num_tiles
        self._num_img_embeddings = num_img_embeddings
        self.decoder_seq_length = decoder_seq_length

        self._recv_only_vision_embeds = False  # TODO: Implement new logic for vision embeddings
        self._encoder_only = False  # TODO: Implement new logic for encoder-only stages

    def _forward(self, tokens, position_ids, attention_mask):
        return self.model(
            self._images,
            tokens,
            position_ids,
            attention_mask=None,
            inference_context=self.inference_context,
            num_image_tiles=self._num_tiles,
            runtime_gather_output=True,
        )

    def __call__(self, tokens, position_ids, attention_mask):
        num_image_tokens = (tokens == self.model.module.image_token_index).sum().item()
        num_tokens = tokens.size(1)
        recv_buffer_seq_length = None
        if num_image_tokens > 0:
            # When there are image tokens and this stage only receives vision embeddings, adjust the recv buffer seq length to match the image embeddings sequence length.
            # If there are image tokens and this stage receives full embeddings, make sure we compensate for expansion of image tokens.
            # Note that this will set a recv_buffer_seq_length for the encoder stage, this length is irrelevant since that recv buffer is never allocated.
            if self._recv_only_vision_embeds:
                recv_buffer_seq_length = self._num_img_embeddings
            else:
                recv_buffer_seq_length = min(self._num_img_embeddings + num_tokens - num_image_tokens, self.decoder_seq_length)
        elif self._recv_only_vision_embeds:
            # If this stage only receives vision embeddings and there are no image tokens we won't run the encoder and therefore shouldn't try to recv.
            recv_buffer_seq_length = 0

        # If the pipeline stage only has a vision encoder, then it only needs to run when there are image tokens
        if not (self._encoder_only and num_image_tokens == 0):
            output = super().__call__(tokens, position_ids, attention_mask, recv_buffer_seq_length=recv_buffer_seq_length)
        else:
            output = None
        if isinstance(output, tuple):
            logits, _ = output
        else:
            logits = output

        # On the first inference iteration, we compute image tokens.
        # On every PP stage(although inference params should only matter for decoder),
        # update the sequence length offset by the number of image tokens.
        if num_tokens > 1 and num_image_tokens > 0:
            if "image_tokens_count" not in self.inference_context.key_value_memory_dict:
                self.inference_context.key_value_memory_dict["image_tokens_count"] = self._num_img_embeddings

            if self._num_img_embeddings + num_tokens - num_image_tokens > self.decoder_seq_length:
                self.inference_context.sequence_len_offset += self.decoder_seq_length - num_tokens
            else:
                self.inference_context.sequence_len_offset += (
                    self.inference_context.key_value_memory_dict["image_tokens_count"] - num_image_tokens
                )

        return logits


def get_conversation(task, question, metadata=None):
    """Get a conversation for a given task and evaluation question."""
    conversation = []

    # In all cases, the tokenizer adds possible header tokens for the assistant.
    if task == "captioning":
        conversation = [
            {"role": "system", "content": "Answer the questions."},
            {
                "role": "user",
                "content": f"{IMAGE_TOKEN}\nGive a brief description of this image in one sentence.",
            },
        ]
    elif task in ("TextVQA", "InfoVQA", "SPDocVQA"):
        conversation = [
            {"role": "system", "content": "Follow the user's instruction and answer questions."},
            {
                "role": "user",
                "content": f"{IMAGE_TOKEN}\n{question}\nAnswer the question using a single word, phrase, or number.",
            },
        ]
    elif task == "VQAv2":
        conversation = [
            {"role": "system", "content": "Follow the user's instruction and answer questions."},
            {
                "role": "user",
                "content": f"{IMAGE_TOKEN}\n{question}\nAnswer the question using a single word or phrase.",
            },
        ]
    elif task == "ChartQA":
        conversation = [
            {"role": "system", "content": "Follow the user's instruction and answer questions."},
            {
                "role": "user",
                "content": f"{IMAGE_TOKEN}\n{question}\nAnswer the question using a single word or phrase.",
            },
        ]
    elif task == "MMMU":
        conversation = [
            {"role": "system", "content": "Answer the questions."},
            {"role": "user", "content": f"{IMAGE_TOKEN}\n{question}"},
        ]
    elif task == "VideoMME":
        q = (
            "Select the best answer to the following multiple-choice "
            "question based on the video. Respond with only the letter "
            "(A, B, C, or D) of the correct option.\n"
        )
        q += question["questions"][0]["question"] + "\n"
        q += question["questions"][0]["choices"][0] + "\n"
        q += question["questions"][0]["choices"][1] + "\n"
        q += question["questions"][0]["choices"][2] + "\n"
        q += question["questions"][0]["choices"][3] + "\n"

        conversation = [
            {"role": "system", "content": "Answer the questions."},
            {"role": "user", "content": f"{IMAGE_TOKEN}\n{q}"},
        ]
    elif task in ("OCRBench", "OCRBench_v2", "RD_TableBench"):
        conversation = [
            {"role": "system", "content": "Follow the user's instruction and answer questions."},
            {"role": "user", "content": f"{IMAGE_TOKEN}\n{question}"},
        ]
    elif task == "MathVista":
        conversation = [
            {"role": "system", "content": "You are math expert. Use your math knowledge to calculate the answer."},
            {"role": "user", "content": f"{IMAGE_TOKEN}\n{question}"},
        ]
    elif task == "RealworldQA":
        conversation = [
            {"role": "system", "content": "Follow the user's instruction and answer questions."},
            {"role": "user", "content": f"{IMAGE_TOKEN}\n{question}"},
        ]
    elif task == "AI2D":
        conversation = [
            {"role": "system", "content": "Follow the user's instruction and answer questions."},
            {"role": "user", "content": f"{IMAGE_TOKEN}\n{question}"},
        ]
    elif task == "MotionBench":
        extra_instruction = "Respond with only the letter choice (A, B, C, or D) of the correct option.\n"
        conversation = [
            {"role": "system", "content": "Answer the questions."},
            {"role": "user", "content": f"{IMAGE_TOKEN}\n{question}\n{extra_instruction}"},
        ]
    elif task == "PhysGameBench":
        extra_instruction = "Respond with only the letter choice (A, B, C, or D) of the correct option.\n"
        conversation = [
            {"role": "system", "content": "Answer the questions."},
            {"role": "user", "content": f"{IMAGE_TOKEN}\n{question}\n{extra_instruction}"},
        ]
    elif task == "MVBench":
        conversation = [
            {"role": "system", "content": "Answer the questions."},
            {"role": "user", "content": f"{IMAGE_TOKEN}\n{question}\nAnswer the question using a single word or phrase."},
        ]
    elif task in ["PerceptionTest"]:
        conversation = [
            {"role": "system", "content": "Answer the questions."},
            {"role": "user", "content": f"{IMAGE_TOKEN}\n{question}"},
        ]
    elif task == "inference":
        conversation = [
            {"role": "system", "content": "Answer the questions."},
            {"role": "user", "content": f"{question}"},
        ]
    else:
        raise NotImplementedError(f"No prompting support for task {task}")


    return conversation


def get_prompt_and_generated(prompt_and_generation, prompt_format):
    """Strip prompt and other unnecessary text from generation."""
    if prompt_format in ("llama3", "llama3p1"):
        splitted = prompt_and_generation.split("<|start_header_id|>assistant<|end_header_id|>\n\n")
        prompt = splitted[0]
        generated = splitted[1]
        generated = generated.split("<|eot_id|>")[0]
    elif prompt_format == "mistral":
        splitted = prompt_and_generation.split("[/INST]")
        prompt = splitted[0]
        generated = splitted[1]
        generated = generated.split("</s>")[0]
    elif prompt_format == "chatml":
        splitted = prompt_and_generation.split("<|im_start|> assistant\n")
        prompt = splitted[0]
        generated = splitted[1]
        generated = generated.split("<|im_end|>")[0]
    elif prompt_format in ("nvlm-yi-34b", "qwen2p0", "qwen2p5"):
        splitted = prompt_and_generation.split("<|im_start|>assistant\n")
        prompt = splitted[0]
        generated = splitted[1]
        generated = generated.split("<|im_end|>")[0]
    elif prompt_format in ("nemotron5"):
        splitted = prompt_and_generation.split("<SPECIAL_14>assistant\n")
        prompt = splitted[0]
        generated = splitted[1]
        generated = generated.split("<SPECIAL_15>")[0]
    elif prompt_format in ("nemotron5-aligned"):
        splitted = prompt_and_generation.split("Assistant\n")
        prompt = splitted[0]
        generated = splitted[1]
        generated = generated.split("[PREFIX]")[0]
        generated = generated.split("\\n")[0]
    else:
        raise ValueError(f"Prompt format {prompt_format} is not supported.")

    # Remove possible garbage.
    generated = generated.strip()

    return prompt, generated


def run_eval(config, iteration=None):
    # Run evaluation.
    print(f"====== {config.task} {config.dataset} at iteration={iteration} scores ======")

    if config.task == "TextVQA":
        from evaluation.evaluate_textvqa import textvqa_eval
        avg_acc = textvqa_eval(config.output_path)

        score = {"TextVQA accuracy": avg_acc}
        with open(config.output_path + "-scores.txt", "a") as f:
            f.write(f"{config.task} {config.dataset} at iteration={iteration} TextVQA accuracy: {score}\n")

    elif config.task == "OCRBench":
        from evaluation.evaluate_ocrbench import ocrbench_eval
        log, avg_acc = ocrbench_eval(config.output_path)

        score = {"OCRBench accuracy": avg_acc}
        with open(config.output_path + "-scores.txt", "a") as f:
            f.write(f"{config.task} {config.dataset} at iteration={iteration} OCRBench accuracy: {score}\n")
            f.write(f"{log}\n")

    elif config.task == "MathVista":
        from evaluation.evaluate_mathvista import mathvista_eval
        avg_acc = mathvista_eval(config.output_path)

        score = {"MathVista accuracy": avg_acc}
        with open(config.output_path + "-scores.txt", "a") as f:
            f.write(f"{config.task} {config.dataset} at iteration={iteration} MathVista accuracy: {score}\n")

    elif config.task == "ChartQA":
        from evaluation.evaluate_chartqa import chartqa_eval
        avg_acc = chartqa_eval(config.output_path)

        score = {"ChartQA accuracy": avg_acc}
        with open(config.output_path + "-scores.txt", "a") as f:
            f.write(f"{config.task} {config.dataset} at iteration={iteration} ChartQA accuracy: {score}\n")

    elif config.task == "SPDocVQA":
        from evaluation.evaluate_spdocvqa import spdocvqa_eval
        avg_acc = spdocvqa_eval(config.output_path)

        score = {"SPDocVQA accuracy": avg_acc}
        with open(config.output_path + "-scores.txt", "a") as f:
            f.write(f"{config.task} {config.dataset} at iteration={iteration} SPDocVQA accuracy: {score}\n")

    elif config.task == "RealworldQA":
        from evaluation.evaluate_realworldqa import realworldqa_eval
        avg_acc = realworldqa_eval(config.output_path)

        score = {"RealworldQA accuracy": avg_acc}
        with open(config.output_path + "-scores.txt", "a") as f:
            f.write(f"{config.task} {config.dataset} at iteration={iteration} RealworldQA accuracy: {score}\n")

    elif config.task == "AI2D":
        from evaluation.evaluate_ai2d import ai2d_eval
        avg_acc = ai2d_eval(config.output_path)

        score = {f"AI2D {config.dataset} accuracy": avg_acc}
        with open(config.output_path + "-scores.txt", "a") as f:
            f.write(f"{config.task} {config.dataset} at iteration={iteration} AI2D accuracy: {score}\n")

    elif config.task == "MMMU":
        from evaluation.evaluate_mmmu import convert_to_mmmu_format
        from examples.multimodal.evaluation.mmmu_utils import mmmu_main_eval
        result_file = convert_to_mmmu_format(config.output_path)
        result = json.load(open(result_file))
        mmmu_results = mmmu_main_eval(result, {"answer_dict": config.gt_path})
        with open(config.output_path + "-scores.txt", "a") as f:
            f.write(f"{config.task} {config.split} at iteration={iteration} :\n")
            for cat, cat_val in mmmu_results.items():
                if 'Overall' in cat:
                    cat = cat.replace("Overall-", "")
                    print(f'{cat}: {cat_val["acc"] * 100:.2f}')
                    f.write(f'{cat}: {cat_val["acc"] * 100:.2f}\n')

        score = {"MMMU val accuracy": mmmu_results['Overall']['acc']}
    elif config.task == 'captioning':
        from evaluation.evaluate_coco import coco_captioning_eval
        cider_score = coco_captioning_eval(config.output_path, config.gt_path)
        score = {f"{config.task} {config.dataset} CIDEr": cider_score}

        with open(config.output_path + "-scores.txt", "a") as f:
            f.write(f"{config.task} {config.dataset} CIDEr scores at iteration={iteration}: {cider_score}\n")
    elif config.task == 'MotionBench':
        from evaluation.evaluate_video_motionbench import motionbench_eval
        avg_acc = motionbench_eval(config.output_path)

        score = {f"MotionBench accuracy": avg_acc}
        with open(config.output_path + "-scores.txt", "a") as f:
            f.write(f"{config.task} {config.dataset} scores at iteration={iteration}: {score}\n")
    elif config.task == 'PhysGameBench':
        from evaluation.evaluate_video_phys_game_bench import phys_game_bench_eval
        avg_acc_dict = phys_game_bench_eval(config.output_path)

        score = {f"PhysGame Total accuracy": avg_acc_dict['Physgame-Total-Acc']}
        with open(config.output_path + "-scores.txt", "a") as f:
            f.write(f"{config.task} {config.dataset} scores at iteration={iteration}: {avg_acc_dict}\n")
    elif config.task == "MVBench":
        from evaluation.evaluate_video_mvbench import mvbench_eval
        avg_acc_dict = mvbench_eval(config.output_path)

        score = {f"MVBench accuracy": avg_acc_dict['total-acc']}
        with open(config.output_path + "-scores.txt", "a") as f:
            f.write(f"{config.task} {config.dataset} scores at iteration={iteration}: {avg_acc_dict}\n")
    elif config.task == "inference":
        score = {"Inference accuracy:": None}
        pass
    else:
        raise NotImplementedError(f"Evaluation of {config.task} not implemented yet")

    print(score)
    return score


def run_evaluation_loop(model, configs, output_dir_override=None, iteration=None, print_output=True):
    """
    Common evaluation loop used by both online evaluation during training and standalone evaluation.

    Args:
        model: The model to evaluate
        configs: Dict[str, EvaluationConfig] - dictionary of evaluation configs
        output_dir_override: Optional directory to override the output path in configs
        iteration: Optional iteration number for logging
        print_output: Whether to print generation output

    Returns:
        Dict[str, float]: Dictionary of evaluation scores
    """
    args = get_args()
    scores = {}

    for key, config in configs.items():
        # Handle output path override for online evaluation
        if output_dir_override:
            config.output_path = os.path.join(output_dir_override, args.language_model_type)

        # Generate samples and write to file
        generate_and_write_samples(model, config, print_output=print_output)

        # Synchronize before evaluation
        torch.distributed.barrier()

        # Run evaluation on the last rank
        if is_last_rank():
            task_scores = run_eval(config, iteration=iteration)
            scores.update(task_scores)

        # Synchronize after evaluation
        torch.distributed.barrier()

    return scores


def eval_tasks():
    """Vision language model text generation for single or batch tasks."""
    initialize_megatron(extra_args_provider=add_text_generation_args)

    args = get_args()

    def wrapped_model_provider(pre_process, post_process, add_encoder=True, add_decoder=True):
        return model_provider(pre_process, post_process, add_encoder=add_encoder, add_decoder=add_decoder,
                              parallel_output=False)

    # Set up model and load checkpoint.
    model = get_model(wrapped_model_provider, model_type=ModelType.encoder_or_decoder, wrap_with_ddp=False)

    if args.load is not None:
        _ = load_checkpoint(model, None, None)

    model = model[0]
    model.eval()

    configs = get_evaluation_configs()

    # Use the common evaluation loop
    run_evaluation_loop(model, configs, iteration=args.ckpt_step)


if __name__ == "__main__":
    eval_tasks()


================================================
FILE: examples/multimodal/sft_dataset.yaml
================================================
__module__: megatron.energon
__class__: Metadataset
splits:
  train:
    datasets:
      - weight: 1.
        path: <path_to_sft_dataset_in_energon_format>
        subflavors:
          augmentation: false
  val:
    datasets:
      - weight: 1.
        path: <path_to_sft_dataset_in_energon_format>
        subflavors:
          augmentation: false


================================================
FILE: examples/multimodal/sft_mistral_clip.sh
================================================
#!/bin/bash
# Run SFT on a pretrained multimodal model

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
MODEL_NAME="mcore-llava-mistral-7b-instruct-clip336-sft"

# Check that the user has set an output path for model checkpoints.
if [[ -z $WORKSPACE ]]; then
    echo "Please set WORKSPACE for storing your model checkpoints."
    exit 1
fi

SOURCE=`pwd`
OUTPUT_BASE="${WORKSPACE}/output"
OUTPUT="${OUTPUT_BASE}/${MODEL_NAME}"

FINETUNE_DIR=${OUTPUT}/checkpoints
LOGS_DIR="${OUTPUT}/logs"
TENSORBOARD_DIR="${OUTPUT}/tensorboard"

export TRITON_CACHE_DIR="${WORKSPACE}/triton-cache/"
# The following patch to the Triton cache manager is needed for Triton version <= 3.1
export TRITON_CACHE_MANAGER="megatron.core.ssm.triton_cache_manager:ParallelFileCacheManager"

if [[ -z $LOAD_NAME ]]; then
    echo "Please set LOAD_NAME for input model name."
    exit 1
fi

if [[ -z $LOAD_ITER ]]; then
    echo "Please set LOAD_ITER for pre-trained input model iteration."
    exit 1
fi

CHECKPOINT_DIR="${WORKSPACE}/${LOAD_NAME}/checkpoints"

DATA_TRAIN="${SOURCE}/examples/multimodal/sft_dataset.yaml"

DEBUG=0
if [[ $DEBUG -eq 1 ]]; then
    BZ=8
    NW=1
    HD=0.0
    LI=1
    EXTRA_ARGS=""
    NONDETERMINISTIC_ATTN=1
else
    BZ=128
    NW=2
    HD=0.1
    LI=10
    EXTRA_ARGS=""
    NONDETERMINISTIC_ATTN=1
fi

OPTIONS=" \
    --apply-layernorm-1p \
    --attention-softmax-in-fp32 \
    --use-checkpoint-args \
    --use-distributed-optimizer \
    --transformer-impl transformer_engine \
    --use-te \
    --normalization RMSNorm \
    --group-query-attention \
    --num-query-groups 8 \
    --no-masked-softmax-fusion \
    --num-workers ${NW} \
    --exit-duration-in-mins 230 \
    --use-flash-attn \
    --untie-embeddings-and-output-weights \
    --disable-bias-linear \
    --position-embedding-type rope \
    --rotary-percent 1.0 \
    --rotary-base 1000000 \
    --swiglu \
    --attention-dropout 0.0 \
    --hidden-dropout ${HD} \
    --tensor-model-parallel-size 4 \
    --pipeline-model-parallel-size 1 \
    --num-layers 32 \
    --hidden-size 4096 \
    --num-attention-heads 32 \
    --seq-length 576 \
    --decoder-seq-length 2048 \
    --max-position-embeddings 4096 \
    --ffn-hidden-size 14336 \
    --train-iters 20000 \
    --micro-batch-size 1 \
    --global-batch-size ${BZ} \
    --lr-decay-iters 20000 \
    --lr-warmup-fraction .01 \
    --lr 1e-6 \
    --min-lr 1e-7 \
    --lr-decay-style cosine \
    --log-interval ${LI} \
    --eval-iters 10 \
    --eval-interval 500 \
    --tokenizer-type MultimodalTokenizer \
    --tokenizer-model mistralai/Mistral-7B-Instruct-v0.3 \
    --tokenizer-prompt-format mistral \
    --data-path ${DATA_TRAIN} \
    --prompt-path ${SOURCE}/examples/multimodal/manual_prompts.json \
    --save-interval 500 \
    --save ${FINETUNE_DIR} \
    --load ${FINETUNE_DIR} \
    --pretrained-checkpoint ${CHECKPOINT_DIR} \
    --dataloader-save ${FINETUNE_DIR}/dataloader \
    --split 100,0,0 \
    --clip-grad 0.5 \
    --weight-decay 0.1 \
    --adam-beta1 0.9 \
    --adam-beta2 0.95 \
    --init-method-std 0.014 \
    --log-params-norm \
    --log-num-zeros-in-grad \
    --eod-mask-loss \
    --freeze-ViT \
    --patch-dim 14 \
    --img-h 336 \
    --img-w 336 \
    --dataloader-type external \
    --tensorboard-dir ${TENSORBOARD_DIR} \
    --language-model-type=mistral_7b \
    --disable-vision-class-token \
    ${EXTRA_ARGS} \
    --distributed-timeout-minutes 60 \
    --ckpt-format torch
"

export NVTE_APPLY_QK_LAYER_SCALING=0
export NVTE_ALLOW_NONDETERMINISTIC_ALGO=${NONDETERMINISTIC_ATTN}

torchrun --nproc_per_node 8 examples/multimodal/train.py ${OPTIONS}


================================================
FILE: examples/multimodal/text_generation_mistral_clip.sh
================================================
#!/bin/bash

export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NVTE_APPLY_QK_LAYER_SCALING=0

INPUT_IMAGE_PATH="placeholder"
GROUNDTRUTH_PATH="placeholder"
NUM_FRAMES=1

while [[ $# -gt 0 ]]; do
    case $1 in
        -i|--input-image-path)
            INPUT_IMAGE_PATH="$2"
            shift
            shift
            ;;
        --num-frames)
            NUM_FRAMES="$2"
            shift
            shift
            ;;
        -o|--output-path)
            OUTPUT_PATH="$2"
            shift
            shift
            ;;
        -m|--model-path)
            MODEL_PATH="$2"
            shift
            shift
            ;;
        -t|--task)
            TASK="$2"
            shift
            shift
            ;;
        -g|--gt-path)
            GROUNDTRUTH_PATH="$2"
            shift
            shift
            ;;
        -*|--*)
            echo "Invalid option $1"
            exit 1
            ;;
    esac
done

# Please modify these as needed.
NUM_PARTITIONS=0
START=0
END=0

for PARTITION_ID in $( eval echo {$START..$END} )
do
    torchrun --nproc_per_node 8 examples/multimodal/run_text_generation.py \
        --apply-layernorm-1p \
        --attention-softmax-in-fp32 \
        --use-flash-attn \
        --transformer-impl transformer_engine \
        --use-te \
        --use-checkpoint-args \
        --normalization RMSNorm \
        --language-model-type mistral_7b \
        --untie-embeddings-and-output-weights \
        --disable-bias-linear \
        --position-embedding-type rope \
        --rotary-percent 1.0 \
        --rotary-base 1000000 \
        --swiglu \
        --attention-dropout 0.0 \
        --hidden-dropout 0.0 \
        --tensor-model-parallel-size 4 \
        --pipeline-model-parallel-size 1 \
        --group-query-attention \
        --num-query-groups 8 \
        --num-layers 32 \
        --hidden-size 4096 \
        --ffn-hidden-size 14336 \
        --num-attention-heads 32 \
        --max-position-embeddings 4096 \
        --no-masked-softmax-fusion \
        --load ${MODEL_PATH} \
        --tokenizer-type MultimodalTokenizer \
        --tokenizer-model mistralai/Mistral-7B-Instruct-v0.3 \
        --tokenizer-prompt-format mistral \
        --bf16 \
        --micro-batch-size 1 \
        --seq-length 2048 \
        --out-seq-length 12 \
        --temperature 1.0 \
        --img-h 336 \
        --img-w 336 \
        --patch-dim 14 \
        --seed 153 \
        --top_k 1 \
        --no-load-rng \
        --no-load-optim \
        --input-image-path ${INPUT_IMAGE_PATH} \
        --num-partitions ${NUM_PARTITIONS} \
        --partition-id ${PARTITION_ID} \
        --output-path ${OUTPUT_PATH} \
        --gt-path ${GROUNDTRUTH_PATH} \
        --task ${TASK} \
        --disable-vision-class-token \
        --num-frames ${NUM_FRAMES} \
        --ckpt-format torch
done


================================================
FILE: examples/multimodal/train.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
"""Pretrain or SFT multimodal."""
import math
import os
import sys
from functools import partial

import torch
import yaml

sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
)

from dataloader_provider import train_valid_test_dataloaders_provider, is_first_or_last_stage
from model import model_provider
from multimodal_args import add_multimodal_extra_args

from megatron.core import mpu, tensor_parallel
from megatron.core.enums import ModelType
from megatron.core.models.multimodal import context_parallel
from megatron.core.models.multimodal.llava_model import IGNORE_INDEX, LLaVAModel
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.parallel_state import (
    get_tensor_model_parallel_rank,
    get_pipeline_model_parallel_world_size,
    is_pipeline_last_stage,
)
from megatron.training import get_args, get_timers, get_tokenizer, pretrain
from megatron.training.utils import is_last_rank, get_batch_on_this_cp_rank


def get_batch(data_iterator, image_token_index, img_seq_len):
    """Generate a batch

    Note: attn_mask_type in layer_specs.py sets the attention mask. Attention mask is None here.
    """
    imgs = None
    tokens = None
    labels = None
    loss_mask = None
    attention_mask = None
    position_ids = None
    num_tiles = None
    packed_seq_params = None

    args = get_args()

    # Dataloader doesn't run on the middle stages in a pipeline parallel model.
    pp_size = get_pipeline_model_parallel_world_size()
    if not is_first_or_last_stage(pp_size):
        # Note these are all set to None above.
        return tokens, labels, loss_mask, attention_mask, position_ids, imgs, num_tiles, packed_seq_params

    # Broadcast data.
    torch.cuda.nvtx.range_push("get_data")
    if data_iterator is not None and get_tensor_model_parallel_rank() == 0:
        data = next(data_iterator)
    else:
        data = None

    data_text = tensor_parallel.broadcast_data(["tokens"], data, torch.int64)["tokens"]
    labels = tensor_parallel.broadcast_data(["labels"], data, torch.int64)["labels"]

    imgs = tensor_parallel.broadcast_data(["imgs"], data, torch.float32)["imgs"]
    num_tiles = tensor_parallel.broadcast_data(["num_tiles"], data, torch.int32)["num_tiles"]

    cu_lengths = tensor_parallel.broadcast_data(["cu_lengths"], data, torch.int32)["cu_lengths"]
    max_lengths = tensor_parallel.broadcast_data(["max_lengths"], data, torch.int32)["max_lengths"]

    # No image input (text-only sample) if the dataloader returned a size 1 image.
    if imgs.shape == torch.Size([1, 1]):
        # FSDP can hang with text-only samples. A workaround is to run a valid dummy image through the vision
        # model and then add image embeddings with a zero multiplier.
        if args.use_torch_fsdp2:
            imgs = torch.zeros((1, 3, args.img_h, args.img_w), dtype=torch.float32, device=data_text.device)
            num_tiles = torch.tensor([], dtype=torch.int, device=data_text.device)
        else:
            # Similar workaround is not needed without FSDP and we can use an empty image.
            # FIXME: text-only data can cause still cause a hang in the special case where
            # the vision model is own its own pipeline rank and --freeze-ViT is enabled.
            imgs = torch.tensor([], dtype=torch.float32, device=data_text.device)
            num_tiles = torch.tensor([], dtype=torch.int, device=data_text.device)

    # Last pipeline parallel stage doesn't need images.
    if pp_size > 1 and is_pipeline_last_stage():
        imgs = None

    # If cu_lengths and max_lengths are non-dummy, construct PackedSeqParams. Otherwise, leave it at None.
    if cu_lengths.shape != torch.Size([1, 1]):
        assert (
            cu_lengths.shape[0] == max_lengths.shape[0] == 1
        ), "micro-batch-size must be 1 for packing"
        cu_lengths = cu_lengths[0]
        max_lengths = max_lengths[0]

        packed_seq_params = PackedSeqParams(
            qkv_format="thd",
            cu_seqlens_q=cu_lengths,
            cu_seqlens_kv=cu_lengths,
            max_seqlen_q=max_lengths,
            max_seqlen_kv=max_lengths,
        )

    torch.cuda.nvtx.range_pop()

    tokens_ = data_text.long()

    torch.cuda.nvtx.range_push("index tokens")
    tokenizer = get_tokenizer()
    text_length = tokens_.shape[1]
    tokens = tokens_[:, :text_length].contiguous()
    labels = labels[:, 1 : text_length + 1].contiguous()

    assert tokens.shape == labels.shape, f"tokens: {tokens.shape} != labels: {labels.shape}"
    torch.cuda.nvtx.range_pop()

    torch.cuda.nvtx.range_push("get_ltor_masks_and_position_ids")
    loss_mask, position_ids = get_ltor_masks_and_position_ids(tokens, labels, tokenizer.pad)
    torch.cuda.nvtx.range_pop()

    # If context parallel is enabled, must shard inputs to CP ranks.
    if args.context_parallel_size > 1 or args.sequence_parallel:
        assert tokens.shape[0], "micro-batch-size > 1 not supported yet with CP"

        num_image_tokens = torch.sum(tokens == image_token_index).item()
        num_image_embeddings = img_seq_len * imgs.shape[0] - num_image_tokens
        seq_len = text_length + num_image_embeddings

        # CP expects sequence length is divisible by CP size so apply padding.
        mp_padding_needed = context_parallel.get_padding(
            seq_len, args.context_parallel_size,
            args.tensor_model_parallel_size, args.sequence_parallel,
        )
        tokens, position_ids, labels, loss_mask = [torch.nn.functional.pad(item, (0, mp_padding_needed)) for item in (tokens, position_ids, labels, loss_mask)]

        # Get PackedSeqParams that indicate the amount of padding for TransformerEngine.
        packed_seq_params = context_parallel.get_packed_seq_params(tokens, num_image_embeddings, mp_padding_needed, args.context_parallel_size, True)

    return (
        tokens,
        labels,
        loss_mask,
        attention_mask,
        position_ids,
        imgs,
        num_tiles,
        packed_seq_params,
    )


def get_ltor_masks_and_position_ids(input_ids, target, pad_token):
    """Build masks and position id for left to right model."""
    seq_length = input_ids.shape[1]

    # Position ids.
    position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
    position_ids = position_ids.unsqueeze(0).expand_as(input_ids)

    # Loss mask.
    loss_mask = torch.ones(target.size(), dtype=torch.float, device=input_ids.device)
    loss_mask[target == pad_token] = 0.0  # mask paddings
    loss_mask[target == IGNORE_INDEX] = 0.0  # mask prompts

    return loss_mask, position_ids


def get_mask_start_and_end_idx(arr):
    """
    Returns a list of tuples holding the start and end index in arr of the non-zeros contiguuous
    sub arrays.

    For instance, if arr = [0, 1, 0, 0, 1, 1]
    get_mask_start_and_end_idx(arr) = [(1, 1), (4, 5)]
    such that arr[1:1+1] = [1] and arr[4:5+1] = [1, 1]
    """
    mask = (arr != 0)

    mask_int = mask.int()

    diff = mask_int[1:] - mask_int[:-1]
    start_indices = (diff == 1).nonzero(as_tuple=False).flatten() + 1
    end_indices = (diff == -1).nonzero(as_tuple=False).flatten()
    if len(mask)==0: return []
    if mask[0]:
        start_indices = torch.cat((torch.tensor([0], device=arr.device), start_indices))
    if mask[-1]:
        end_indices = torch.cat((end_indices, torch.tensor([len(arr) - 1], device=arr.device)))
    sequences = list(zip(start_indices.tolist(), end_indices.tolist()))
    return sequences


def scaled_loss_func(loss_mask, output_tensor):
    """
    Scaled loss function

    Scale the loss for each conversation turn using the formula:

    1 / sum_j[ sqrt(length(loss_turn_j)) ] * sum_i[ sum(loss_turn_i) / sqrt(length(loss_turn_i)) ]

    Where we use the loss mask to infer the start / end of the conversation turns.
    """
    args = get_args()
    losses = output_tensor.float()

    loss_list = []
    num_valid_labels_list = []
    for idx in range(losses.shape[0]):
        loss_this_sample = losses[idx]
        turn_start_end_list = get_mask_start_and_end_idx(loss_mask[idx])
        for turn_start, turn_end in turn_start_end_list:
            # compute loss for each turn
            loss_this_turn = loss_this_sample[turn_start:turn_end+1].sum()
            assert (1 - loss_mask)[idx][turn_start:turn_end+1].sum() < 1.0
            num_valid_labels_this_turn = turn_end - turn_start + 1
            loss_this_turn = loss_this_turn / num_valid_labels_this_turn
            loss_list.append(loss_this_turn)
            # append num of valid labels for each turn
            num_valid_labels_list.append(num_valid_labels_this_turn)
    base_num = sum([math.sqrt(each) for each in num_valid_labels_list])
    for idx in range(len(loss_list)):
        # normalize loss for each turn
        loss_list[idx] = loss_list[idx] * math.sqrt(num_valid_labels_list[idx]) / base_num

    # Some ranks may not get loss tokens due to Context Parallel Sharding
    if len(loss_list) > 0:
        total_loss = torch.stack(loss_list).sum()
        total_tokens = torch.ones_like(total_loss)
    elif len(loss_list) == 0 and args.context_parallel_size > 1:
        total_tokens = loss_mask.sum()
        total_loss = torch.sum(losses.view(-1) * loss_mask)
    else:
        raise RuntimeError("loss_list for loss scaling per conversation unexpectedly got empty list")

    num_tokens = total_tokens.clone().detach().to(torch.int)
    reporting_loss = torch.cat([total_loss.clone().detach().view(1), num_tokens.view(1)])

    return (total_loss, num_tokens, {'lm loss': reporting_loss})


def loss_func(loss_mask, output_tensor):
    args = get_args()

    losses = output_tensor.view(-1).float()
    loss_mask = loss_mask.contiguous().view(-1).float()
    loss = torch.sum(losses * loss_mask)

    num_tokens = loss_mask.sum().clone().detach().to(torch.int)
    reporting_loss = torch.cat([loss.clone().detach().view(1), num_tokens.view(1)])

    return (loss, num_tokens, {'lm loss': reporting_loss})


def forward_step(data_iterator, model: LLaVAModel):
    """Forward training step.

    Args:
        data_iterator (torch.utils.data.dataloader): Input data iterator
        model: Multimodal model

    Returns:
        output_tensor (torch.Tensor): Loss of shape [b, s] if labels are provided, otherwise logits of shape [b, s, vocab_size].
        loss_func (callable): Loss function with a loss mask specified.
    """
    timers = get_timers()

    # Get the batch.
    timers('batch-generator', log_level=2).start()
    (
        tokens,
        labels,
        loss_mask,
        attention_mask,
        position_ids,
        images,
        num_image_tiles,
        packed_seq_params,
    ) = get_batch(data_iterator, model.module.module.image_token_index, model.module.module.img_seq_len)
    timers('batch-generator').stop()

    output_tensor, loss_mask = model(
        images,
        tokens,
        position_ids,
        attention_mask,
        labels,
        loss_mask,
        num_image_tiles=num_image_tiles,
        packed_seq_params=packed_seq_params,
    )
    args = get_args()
    if args.use_loss_scaling:
        loss_function = partial(scaled_loss_func, loss_mask)
    else:
        loss_function = partial(loss_func, loss_mask)

    return output_tensor, loss_function


def llava_embedding_ranks(pp_ranks):
    """LLava's embedding ranks consist of the decoder's first and last ranks (ie, the ViT has no embeddings).
    Args:
        pp_ranks: A list of global ranks that constitute a pipeline group.
    """
    # With no separate encoder pipeline stages (epp=0), the decoder starts at rank 0
    last_rank = pp_ranks[-1]
    if len(pp_ranks) == 1:
        return [last_rank]
    else:
        return [pp_ranks[0], last_rank]


def llava_position_embedding_ranks(pp_ranks):
    """LLava's embedding ranks consist of the singular rank of the model or the decoder's first rank.
    Args:
        pp_ranks: A list of global ranks that constitute a pipeline group.
    """
    # With no separate encoder pipeline stages (epp=0), the decoder starts at rank 0
    last_rank = pp_ranks[-1]
    if len(pp_ranks) == 1:
        return [last_rank]
    else:
        return [pp_ranks[0]]


def run_online_eval(model):
    """Run an evaluation benchmark during training."""
    args = get_args()

    # Online evaluation config is not defined. Do nothing.
    if not args.online_evaluation_config:
        return []

    from config import EvaluationConfig
    # Import the common evaluation functions
    from run_text_generation import get_evaluation_configs, run_evaluation_loop

    # Use the common config loading function
    configs = get_evaluation_configs(config_path=args.online_evaluation_config)

    # The inference code assumes the first rank is the leader.
    # Tensorboard writer is on the last rank.
    # We must write to a storage space that all ranks see.
    output_dir = os.path.join(args.save, "online_eval")
    os.makedirs(output_dir, exist_ok=True)
    
    # Use the common evaluation loop
    scores = run_evaluation_loop(model[0].module, configs, output_dir_override=output_dir, print_output=False)

    return [scores]


def write_eval_to_tensorboard(data, iteration, writer, walltime=None):
    """Write evaluation data to Tensorboard."""
    if not writer:
        return

    for item in data:
        for k, v in item.items():
            writer.add_scalar(k, v, iteration, walltime=walltime)


def write_online_eval_to_tensorboard(data, iteration, writer, walltime=None):
    """Write online evaluation data to Tensorboard."""
    import shutil
    args = get_args()

    # Define source and destination directories
    source_dir = os.path.join(args.save, "online_eval")
    destination_dir = os.path.join(args.save, f"online_eval_{iteration}")
    if os.path.exists(source_dir):
        print("Moving online eval data from", source_dir, "to", destination_dir)

        # Move the directory (back up the generation)
        shutil.move(source_dir, destination_dir)

    write_eval_to_tensorboard(data, iteration, writer, walltime)


if __name__ == "__main__":

    train_valid_test_dataloaders_provider.is_distributed = True

    pretrain(
        train_valid_test_dataloaders_provider,
        model_provider,
        ModelType.encoder_or_decoder,
        forward_step,
        args_defaults={'tokenizer_type': 'GPT2BPETokenizer'},
        extra_args_provider=add_multimodal_extra_args,
        process_non_loss_data_func=write_online_eval_to_tensorboard,
        get_embedding_ranks=llava_embedding_ranks,
        get_position_embedding_ranks=llava_position_embedding_ranks,
        non_loss_data_func=run_online_eval,
    )


================================================
FILE: examples/post_training/modelopt/.gitignore
================================================
!slurm*


================================================
FILE: examples/post_training/modelopt/ADVANCED.md
================================================
<div align="center">

# Advanced Usage

[Advanced Configuration](#advanced-configuration) |
[Slurm Examples](#slurm-examples) |
[Checkpoint Resume](#checkpoint-resume) |

</div>

## Advanced Configuration

### Understanding Configuration Variables

For simplicity, we use `shell` scripts and variables as arguments. Each script has at least 1 positional
argument `[model_conf]`. Some scripts may require more such as `[qformat]` is needed for
quantization.

```sh
\
    HF_MODEL_CKPT=<pretrained_model_name_or_path> \
    bash quantize.sh [model_conf] [qformat]
```

> **❗ IMPORTANT:** `model_conf` is used to get the corresponding Megatron-LM `${MODEL_ARGS}`. For example,
> `meta-llama/Llama-3.1-8B-Instruct` or `deepseek-ai/DeepSeek-R1` are both supported.
>
> Provide the pretrained checkpoint through variable `${HF_MODEL_CKPT}` in commandline or
> in a configuration shell script. More variables (e.g. `${TP}`, `${EP}`, ...) can be provided through
> commandline but we recommend passing all variables in a separate `shell` script.

### Using Configuration Scripts

When `${HF_MODEL_CKPT}` is not set through the commandline, `./env_setup_template.sh` can be used
to pass all variables instead. If you have your own script, use `${SANDBOX_ENV_SETUP}`.

```sh
\
    SANDBOX_ENV_SETUP=<path_to_your_script> \
    bash quantize.sh [model_conf] [qformat]
```

**For Slurm execution**, you **MUST USE** `${SANDBOX_ENV_SETUP}` (default: `./env_setup_template.sh`).
Other variables are not passed through `sbatch` and `srun` automatically.

### Common Configuration Variables

- `HF_MODEL_CKPT`: Path to pretrained model checkpoint
- `TP`: Tensor parallelism degree
- `PP`: Pipeline parallelism degree
- `EP`: Expert parallelism degree (for MoE models)
- `ETP`: Expert tensor parallelism degree (for MoE models)
- `MLM_MODEL_SAVE`: Path to save Megatron-LM checkpoint
- `MLM_MODEL_LOAD`: Path to load Megatron-LM checkpoint
- `MLM_EXTRA_ARGS`: Additional Megatron-LM arguments (e.g., for uneven PP)

## Slurm Examples

For models that require multi-node, our scripts in Megatron-LM examples also support `slurm` with a sbatch wrapper.
Start with the example `slurm/sbatch.sh` with some minor modification or use your existing `sbatch`
script.

Different from local environment, we only allow passing variables through a shell script (default: `env_setup_template.sh`).
Commandline variable passthrough is not supported.

<br>

### ⭐ BF16 Kimi-K2-Instruct EAGLE3 Training

 `conf/moonshotai/kimi_k2_instruct.sh` is a config that has been tested
with 8 nodes of DGX H100 (TP=8, ETP=1, EP=64, overall 64 H100 GPUs in total). Update `HF_MODEL_CKPT` to the exact
checkpoint path in the container to start:

```sh
export USER_FSW=<path_to_scratch_space>
export CONTAINER_IMAGE=<path_to_container_image>
export SANDBOX_ENV_SETUP=./conf/moonshotai/kimi_k2_instruct.sh
sbatch --nodes=8 slurm/sbatch.sh "eagle3.sh moonshotai/Kimi-K2-Instruct"
```

To export the trained EAGLE3 model, switch to `kimi_k2_instruct_export.sh`.
**We only support pipeline-parallel (PP) export.** In this case, 2 nodes are used (PP=16).

```sh
export USER_FSW=<path_to_scratch_space>
export CONTAINER_IMAGE=<path_to_container_image>
export SANDBOX_ENV_SETUP=./conf/moonshotai/kimi_k2_instruct_export.sh
sbatch --nodes=2 slurm/sbatch.sh "export.sh moonshotai/Kimi-K2-Instruct"
```

## Checkpoint Resume

WIP


================================================
FILE: examples/post_training/modelopt/Dockerfile
================================================
FROM nvcr.io/nvidia/pytorch:25.06-py3

ARG PIP_CONSTRAINT=

WORKDIR /workspace/nmm-sandbox

RUN pip install omegaconf
RUN pip install flask flask_restful fire nltk
RUN pip install tiktoken blobfile

RUN pip install datasets transformers

RUN pip install triton==3.3.1
RUN pip install git+https://github.com/state-spaces/mamba.git
RUN pip install git+https://github.com/Dao-AILab/causal-conv1d.git

RUN pip install -U nvidia-modelopt


================================================
FILE: examples/post_training/modelopt/README.md
================================================
<div align="center">

# Model Optimizer Integrated Examples


[Model Optimizer](https://github.com/NVIDIA/Model-Optimizer) |
[Local Examples](#getting-started-in-a-local-environment) |
[Configuration](./ADVANCED.md#advanced-configuration) |
[Slurm Examples](./ADVANCED.md#slurm-examples) |
[Speculative Decoding](./speculative.md) |
[Knowledge Distillation](./distillation.md) |
[Advanced Topics](./ADVANCED.md)

</div>

[Model Optimizer](https://github.com/NVIDIA/Model-Optimizer) (**ModelOpt**, `nvidia-modelopt`)
provides end-to-end model optimization for NVIDIA hardware including quantization (real or simulated),
knowledge distillation, pruning, speculative decoding, and more.


## Major Features

- Start from Hugging Face pretrained model checkpoint with on-the-fly conversion to Megatron-LM checkpoint format.
- Support all kinds of model parallelism (TP, EP, ETP, PP).
- Export to TensorRT-LLM, vLLM, and SGLang ready unified checkpoint.

## Support Matrix {Model}x{Features}

| Model (`conf/`) | Quantization | EAGLE3 | Pruning (PP only) | Distillation |
| :---: | :---: | :---: | :---: | :---: |
| `deepseek-ai/DeepSeek-R1` | ✅ | ✅ | - | - |
| `meta-llama/Llama-{3.1-8B, 3.1-405B, 3.2-1B}-Instruct` | ✅ | ✅ | ✅ | ✅ |
| `meta-llama/Llama-4-{Scout,Maverick}-17B-{16,128}E-Instruct` | ✅ | ✅ | - | - |
| `moonshotai/Kimi-K2-Instruct` | ✅ | ✅ | - | - |
| `nvidia/NVIDIA-Nemotron-Nano-9B-v2` | ✅ | - | ✅ | ✅ |
| `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16` | ✅ | - | ✅ | ✅ |
| `nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16` | ✅ | - | ✅ | ✅ |
| `openai/gpt-oss-{20b, 120b}` | ✅ | **Online** | ✅ | ✅ |
| `Qwen/Qwen3-{0.6B, 8B}` | ✅ | ✅ | ✅ | ✅ |
| `Qwen/Qwen3-{30B-A3B, 235B-A22B}` | **WAR** | ✅ | ✅ | ✅ |

## Getting Started in a Local Environment

Install `nvidia-modelopt` from [PyPI](https://pypi.org/project/nvidia-modelopt/):
```sh
pip install -U nvidia-modelopt
```
Alternatively, you can install from [source](https://github.com/NVIDIA/Model-Optimizer)
to try our latest features.

> **❗ IMPORTANT:** The first positional argument (e.g. `meta-llama/Llama-3.2-1B-Instruct`) of each script
> is the config name used to match the supported model config in `conf/`. The pretrained HF checkpoint should
> be downloaded and provided through `${HF_MODEL_CKPT}`.


### ⭐ NVFP4 Quantization, Qauntization-Aware Training, and Model Export

Provide the pretrained checkpoint path through variable `${HF_MODEL_CKPT}` and provide variable
`${MLM_MODEL_SAVE}` which stores a resumeable Megatron-LM distributed checkpoint. To export
Hugging Face-Like quantized checkpoint for TensorRT-LLM, vLLM, or SGLang deployement,
provide `${EXPORT_DIR}` to `export.sh`.

> **📙 NOTE:** ModelOpt supports different quantization formats which are listed in the [ModelOpt quant configs](https://github.com/NVIDIA/Model-Optimizer/blob/7971fff05882da7eae16eae6bc927d1481dcd63f/modelopt/torch/quantization/config.py#L626).
> The quant config is specified by the full config name in all-caps, e.g. NVFP4_DEFAULT_CFG.
> By default, we simulate the low-precision numerical behavior (fake-quant) which can be run on GPUs with compute > 80.
> Real low-precision paramters (e.g. `E4M3` or `E2M1`)
> and low-precision compute (e.g. `FP8Linear`) are also supported depending on GPU compute capability.
> **See [Advanced Topics](./ADVANCED.md) for details**.

```sh
\
    TP=1 \
    HF_MODEL_CKPT=<pretrained_model_name_or_path> \
    MLM_MODEL_SAVE=/tmp/Llama-3.2-1B-Instruct_quant \
    ./quantize.sh meta-llama/Llama-3.2-1B-Instruct NVFP4_DEFAULT_CFG 

\
    PP=1 \
    HF_MODEL_CKPT=<pretrained_model_name_or_path> \
    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
    EXPORT_DIR=/tmp/Llama-3.2-1B-Instruct_export \
    ./export.sh meta-llama/Llama-3.2-1B-Instruct
```

For KV cache quantization, add a flag like `MLM_EXTRA_ARGS="--export-kv-cache-quant fp8"` while specifying your desired KV cache precision (see `KV_QUANT_CFG_CHOICES` in `quantize.py`).

### ⭐ Online BF16 EAGLE3 Training

Online EAGLE3 training has both the target (frozen) and draft models in the memory where the `hidden_states`
required for training is generated on the fly. Periodically, acceptance length (AL, the higher the better) is
evaluated on MT-Bench prompts. Use the same `export.sh` script to export the EAGLE3 checkpoint for
deployment.

```sh
\
    TP=1 \
    HF_MODEL_CKPT=<pretrained_model_name_or_path> \
    MLM_MODEL_SAVE=/tmp/Llama-3.2-1B-Eagle3 \
    ./eagle3.sh meta-llama/Llama-3.2-1B-Instruct

\
    PP=1 \
    HF_MODEL_CKPT=<pretrained_model_name_or_path> \
    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Eagle3 \
    EXPORT_DIR=/tmp/Llama-3.2-1B-Eagle3-Export \
    ./export.sh meta-llama/Llama-3.2-1B-Instruct
```

See [Advanced Topics](./ADVANCED.md) for a `moonshotai/Kimi-K2-Instruct` EAGLE3 training example using `slurm`.

### ⭐ Offline BF16 EAGLE3 Training
Unlike online EAGLE3 training, offline workflow precomputes target model `hidden_states` and dumps to disk.
Then only the draft model is called during training. AL is no longer reported during training. After training,
`export.sh` is used to export EAGLE3 checkpoint.

```sh
\
    # Convert to online eagle3 model for base model feature extraction
    HF_MODEL_CKPT=<pretrained_model_name_or_path> \
    MLM_MODEL_SAVE=/tmp/Llama-3.2-1B-Eagle3 \
    MLM_EXTRA_ARGS="--algorithm eagle3" \
    ./convert.sh meta-llama/Llama-3.2-1B-Instruct

\
    # Dump base model feature to disk
    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Eagle3 \
    MLM_EXTRA_ARGS="--output-dir /tmp/offline_data" \
    ./offline_feature_extrach.sh meta-llama/Llama-3.2-1B-Instruct

\
    # Convert to offline eagle3 model
    HF_MODEL_CKPT=<pretrained_model_name_or_path> \
    MLM_MODEL_SAVE=/tmp/Llama-3.2-1B-Eagle3-offline \
    MLM_EXTRA_ARGS="--algorithm eagle3 --export-offline-model" \
    ./convert.sh meta-llama/Llama-3.2-1B-Instruct

\
    # Train the offline eagle3 model using extracted features
    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Eagle3-offline \
    MLM_MODEL_SAVE=/tmp/Llama-3.2-1B-Eagle3-offline \
    MLM_EXTRA_ARGS="--export-offline-model --offline-distillation-data /tmp/offline_data" \
    ./finetune.sh meta-llama/Llama-3.2-1B-Instruct

\
    # Export the trained eagle3 checkpoint
    PP=1 \
    HF_MODEL_CKPT=<pretrained_model_name_or_path> \
    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Eagle3-offline \
    EXPORT_DIR=/tmp/Llama-3.2-1B-Eagle3-Export \
    MLM_EXTRA_ARGS="--export-offline-model" \
    ./export.sh meta-llama/Llama-3.2-1B-Instruct
```

### ⭐ Pruning

Checkout pruning getting started section and guidelines for configuring pruning parameters in the [ModelOpt pruning README](https://github.com/NVIDIA/Model-Optimizer/tree/main/examples/pruning).

Pruning is supported for GPT and Mamba models in Pipeline Parallel mode. Available pruning dimensions are:

- `TARGET_FFN_HIDDEN_SIZE`
- `TARGET_HIDDEN_SIZE`
- `TARGET_NUM_ATTENTION_HEADS`
- `TARGET_NUM_QUERY_GROUPS`
- `TARGET_MAMBA_NUM_HEADS`
- `TARGET_MAMBA_HEAD_DIM`
- `TARGET_NUM_MOE_EXPERTS`
- `TARGET_MOE_FFN_HIDDEN_SIZE`
- `TARGET_MOE_SHARED_EXPERT_INTERMEDIATE_SIZE`
- `TARGET_NUM_LAYERS`
- `LAYERS_TO_DROP` (comma separated, 1-indexed list of layer numbers to directly drop)

Example for depth pruning Qwen3-8B from 36 to 24 layers:

```sh
PP=1 \
TARGET_NUM_LAYERS=24 \
HF_MODEL_CKPT=<pretrained_model_name_or_path> \
MLM_MODEL_SAVE=Qwen3-8B-Pruned \
./prune.sh Qwen/Qwen3-8B
```

> [!TIP]
> If number of layers in the model is not divisible by pipeline parallel size (PP), you can configure uneven
> PP by setting `MLM_EXTRA_ARGS="--decoder-first-pipeline-num-layers <X> --decoder-last-pipeline-num-layers <Y>"`

> [!TIP]
> You can reuse pruning scores for pruning same model again to different architectures by setting
> `PRUNE_ARGS="--pruning-scores-path <path_to_save_scores>"`

> [!NOTE]
> When loading pruned M-LM checkpoint for subsequent steps, make sure overwrite the pruned parameters in the
> default `conf/` by setting `MLM_EXTRA_ARGS`. E.g.: for loading above pruned Qwen3-8B checkpoint for mmlu, set:
> `MLM_EXTRA_ARGS="--num-layers 24"`

### ⭐ Inference and Training

The saved Megatron-LM distributed checkpoint (output of above scripts) can be resumed for inference
(generate or evaluate) or training (SFT or PEFT). To read more about these features, see
[Advanced Topics](./ADVANCED.md).

```sh
\
    TP=1 \
    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
    ./generate.sh meta-llama/Llama-3.2-1B-Instruct

\
    TP=1 \
    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
    ./mmlu.sh meta-llama/Llama-3.2-1B-Instruct

\
    TP=1 \
    MLM_MODEL_CKPT=/tmp/Llama-3.2-1B-Instruct_quant \
    ./finetune.sh meta-llama/Llama-3.2-1B-Instruct
```

## Advanced Usage
To contribute, please ping [@NVIDIA/post-training](https://github.com/orgs/NVIDIA/teams/post-training) team members. We format the examples with
```
uvx black@24.10.0 .
uvx isort .
```


================================================
FILE: examples/post_training/modelopt/conf/Qwen/Qwen2.5-0.5B-Instruct.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=Qwen/Qwen2.5-0.5B
    TOKENIZER_MODEL=Qwen/Qwen2.5-0.5B
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --add-qkv-bias \
    --position-embedding-type rope \
    --no-rope-fusion \
    --normalization RMSNorm \
    --swiglu \
    --num-layers 24 \
    --hidden-size 896 \
    --ffn-hidden-size 4864 \
    --num-attention-heads 14 \
    --group-query-attention \
    --num-query-groups 2 \
    --kv-channels 64 \
    --seq-length 4096 \
    --max-position-embeddings 32768 \
    --tokenizer-type HuggingFaceTokenizer \
    --padded-vocab-size 151936 \
    --make-vocab-size-divisible-by 1 \
    --use-mcore-models \
    --rotary-percent 1.0 \
    --rotary-base 1000000 \
    --no-bias-swiglu-fusion \
"


================================================
FILE: examples/post_training/modelopt/conf/Qwen/Qwen2.5-7B-Instruct.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=Qwen/Qwen2.5-7B-Instruct
    TOKENIZER_MODEL=Qwen/Qwen2.5-7B-Instruct
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --add-qkv-bias \
    --position-embedding-type rope \
    --no-rope-fusion \
    --normalization RMSNorm \
    --swiglu \
    --num-layers 28 \
    --hidden-size 3584 \
    --ffn-hidden-size 18944 \
    --num-attention-heads 28 \
    --group-query-attention \
    --num-query-groups 4 \
    --kv-channels 128 \
    --seq-length 4096 \
    --max-position-embeddings 32768 \
    --tokenizer-type HuggingFaceTokenizer \
    --padded-vocab-size 152064 \
    --make-vocab-size-divisible-by 1 \
    --use-mcore-models \
    --rotary-percent 1.0 \
    --rotary-base 1000000 \
    --no-bias-swiglu-fusion \
    --untie-embeddings-and-output-weights \
"


================================================
FILE: examples/post_training/modelopt/conf/Qwen/Qwen3-0.6B.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=Qwen/Qwen3-0.6B
    TOKENIZER_MODEL=Qwen/Qwen3-0.6B
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --position-embedding-type rope \
    --no-rope-fusion \
    --normalization RMSNorm \
    --swiglu \
    --num-layers 28 \
    --hidden-size 1024 \
    --ffn-hidden-size 3072 \
    --num-attention-heads 16 \
    --group-query-attention \
    --num-query-groups 8 \
    --kv-channels 128 \
    --qk-layernorm \
    --seq-length 4096 \
    --max-position-embeddings 40960 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 1187 \
    --use-mcore-models \
    --rotary-percent 1.0 \
    --rotary-base 1000000 \
    --no-bias-swiglu-fusion \
"


================================================
FILE: examples/post_training/modelopt/conf/Qwen/Qwen3-235B-A22B.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=Qwen/Qwen3-235B-A22B
    TOKENIZER_MODEL=Qwen/Qwen3-235B-A22B
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --untie-embeddings-and-output-weights \
    --position-embedding-type rope \
    --no-rope-fusion \
    --normalization RMSNorm \
    --swiglu \
    --num-layers 94 \
    --hidden-size 4096 \
    --ffn-hidden-size 12288 \
    --num-attention-heads 64 \
    --group-query-attention \
    --num-query-groups 4 \
    --kv-channels 128 \
    --qk-layernorm \
    --num-experts 128 \
    --moe-ffn-hidden-size 1536 \
    --moe-router-topk 8 \
    --moe-router-dtype fp32 \
    --moe-aux-loss-coeff 1e-3 \
    --moe-token-dispatcher-type alltoall \
    --moe-router-load-balancing-type aux_loss \
    --moe-layer-recompute \
    --seq-length 4096 \
    --max-position-embeddings 40960 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 1187 \
    --use-mcore-models \
    --rotary-percent 1.0 \
    --rotary-base 1000000 \
    --rotary-seq-len-interpolation-factor 1 \
    --no-bias-swiglu-fusion \
    --attention-dropout 0.0 \
    --hidden-dropout 0.0 \
    --sequence-parallel \
"


================================================
FILE: examples/post_training/modelopt/conf/Qwen/Qwen3-30B-A3B.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=Qwen/Qwen3-30B-A3B
    TOKENIZER_MODEL=Qwen/Qwen3-30B-A3B
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --untie-embeddings-and-output-weights \
    --position-embedding-type rope \
    --no-rope-fusion \
    --normalization RMSNorm \
    --swiglu \
    --num-layers 48 \
    --hidden-size 2048 \
    --ffn-hidden-size 6144 \
    --num-attention-heads 32 \
    --group-query-attention \
    --num-query-groups 4 \
    --kv-channels 128 \
    --qk-layernorm \
    --num-experts 128 \
    --moe-ffn-hidden-size 768 \
    --moe-router-topk 8 \
    --moe-router-dtype fp32 \
    --moe-aux-loss-coeff 1e-3 \
    --moe-token-dispatcher-type alltoall \
    --moe-router-load-balancing-type aux_loss \
    --seq-length 4096 \
    --max-position-embeddings 40960 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 1187 \
    --use-mcore-models \
    --rotary-percent 1.0 \
    --rotary-base 1000000 \
    --no-bias-swiglu-fusion \
    --sequence-parallel \
"


================================================
FILE: examples/post_training/modelopt/conf/Qwen/Qwen3-8B.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=Qwen/Qwen3-8B
    TOKENIZER_MODEL=Qwen/Qwen3-8B
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --untie-embeddings-and-output-weights \
    --position-embedding-type rope \
    --no-rope-fusion \
    --normalization RMSNorm \
    --swiglu \
    --num-layers 36 \
    --hidden-size 4096 \
    --ffn-hidden-size 12288 \
    --num-attention-heads 32 \
    --group-query-attention \
    --num-query-groups 8 \
    --kv-channels 128 \
    --qk-layernorm \
    --seq-length 4096 \
    --max-position-embeddings 40960 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 1187 \
    --use-mcore-models \
    --rotary-percent 1.0 \
    --rotary-base 1000000 \
    --no-bias-swiglu-fusion \
"


================================================
FILE: examples/post_training/modelopt/conf/arguments.sh
================================================
#!/bin/bash
set -e

MLM_MODEL_CFG=${1}

if [ -z $1 ]; then
    printf "${MLM_ERROR} Model configuration name must be provided as the first argument (e.g. 'meta-llama/Llama-3.2-1B-Instruct')\n"
    exit 1
fi

# Bash coloring
RED='\033[0;31m'
YELLOW='\033[0;33m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
PURPLE='\033[0;35m'
WHITE='\033[0;37m'

# Predefined logging
MLM_ERROR="${RED}ERROR:  ${WHITE}"
MLM_WARNING="${YELLOW}WARNING:${WHITE}"

if [ -z ${SANDBOX_ENV_SETUP} ]; then
    printf "${MLM_WARNING} ${PURPLE}SANDBOX_ENV_SETUP${WHITE} is not set!\n"
else
    source ${SANDBOX_ENV_SETUP}
fi

if [ -z ${SCRIPT_DIR} ]; then
    printf "${MLM_ERROR} Variable ${PURPLE}SCRIPT_DIR${WHITE} must be set!\n"
    exit 1
fi

if [ -z ${MLM_MODEL_CFG} ]; then
    printf "${MLM_ERROR} Variable ${PURPLE}MLM_MODEL_CFG${WHITE} must be set!\n"
    exit 1
fi

if [ -z ${MLM_ENV_SETUP} ]; then
    printf "${MLM_WARNING} Variable ${PURPLE}MLM_ENV_SETUP${WHITE} not set! (only needed when launching with slurm)\n"
else
    source ${MLM_ENV_SETUP}
fi

if [[ -z ${MLM_EXTRA_ARGS} ]]; then
    printf "${MLM_WARNING} Use ${PURPLE}MLM_EXTRA_ARGS${WHITE} to provide additional arguments!\n"
fi

if [ -z ${MLM_WORK_DIR} ]; then
    export  MLM_WORK_DIR=/tmp/megatron_workspace
    printf "${MLM_WARNING} Variable ${PURPLE}MLM_WORK_DIR${WHITE} is set (default: ${MLM_WORK_DIR})!\n"
fi

if [ -z ${TP} ]; then
    TP=1
    printf "${MLM_WARNING} Variable ${PURPLE}TP${WHITE} not set! (default: ${TP})\n"
fi

if [ -z ${ETP} ]; then
    ETP=${TP}
    printf "${MLM_WARNING} Variable ${PURPLE}ETP${WHITE} not set! (default: ${ETP})\n"
fi

if [ -z ${EP} ]; then
    EP=1
    printf "${MLM_WARNING} Variable ${PURPLE}EP${WHITE} not set! (default: ${EP})\n"
fi

if [ -z ${PP} ]; then
    PP=1
    printf "${MLM_WARNING} Variable ${PURPLE}PP${WHITE} not set! (default: ${PP})\n"
fi

if [ -z ${CP} ]; then
    CP=1
    printf "${MLM_WARNING} Variable ${PURPLE}CP${WHITE} not set! (default: ${CP})\n"
fi

if [ -z ${DP} ]; then
    DP=1
    printf "${MLM_WARNING} Variable ${PURPLE}DP${WHITE} not set! (default: ${DP})\n"
fi


if [ -z ${LAUNCH_SCRIPT} ]; then
    LAUNCH_SCRIPT="torchrun --nproc_per_node=$((ETP * EP * PP * CP * DP))"
fi

# Install Model Optimizer if haven't.
if [ -z ${MLM_SKIP_INSTALL} ]; then
    pip install -r ${SCRIPT_DIR}/requirements.txt
fi

export TOKENIZERS_PARALLELISM=False
export OMP_NUM_THREADS=1
export NCCL_IB_SL=1
export NCCL_IB_TIMEOUT=22
export CUDA_DEVICE_MAX_CONNECTIONS=1

# TE specific warning
printf "${MLM_WARNING} If you see core_attention  _extra_state missing error, use --export-force-local-attention\n"

# Base model specific arguments
if [ -z ${SANDBOX_ROOT} ]; then
    source "${SCRIPT_DIR}/conf/${MLM_MODEL_CFG}.sh"
else
    source "${SANDBOX_ROOT}/conf/model/${MLM_MODEL_CFG}.sh"
fi


================================================
FILE: examples/post_training/modelopt/conf/deepseek-ai/DeepSeek-R1.sh
================================================
#!/bin/bash

TOKENIZER_MODEL="deepseek-ai/DeepSeek-R1"

MODEL_ARGS=" \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --untie-embeddings-and-output-weights \
    --position-embedding-type rope \
    --no-rope-fusion \
    --normalization RMSNorm \
    --swiglu \
    --num-layers 61 \
    --hidden-size 7168 \
    --ffn-hidden-size 18432 \
    --num-attention-heads 128 \
    --kv-channels 128 \
    --multi-latent-attention \
    --kv-lora-rank 512 \
    --v-head-dim 128 \
    --q-lora-rank 1536 \
    --qk-head-dim 128 \
    --qk-layernorm \
    --qk-pos-emb-head-dim 64 \
    --num-experts 256 \
    --moe-layer-freq [0]*3+[1]*58 \
    --moe-ffn-hidden-size 2048 \
    --moe-router-dtype fp32 \
    --moe-router-score-function sigmoid \
    --moe-router-bias-update-rate 1e-3 \
    --moe-router-enable-expert-bias \
    --moe-router-topk 8 \
    --moe-router-num-groups 8 \
    --moe-router-group-topk 4 \
    --moe-router-pre-softmax \
    --moe-router-topk-scaling-factor 2.5 \
    --moe-shared-expert-overlap \
    --moe-shared-expert-intermediate-size 2048 \
    --moe-aux-loss-coeff 1e-4 \
    --moe-router-load-balancing-type seq_aux_loss \
    --moe-token-dispatcher-type alltoall \
    --moe-token-drop-policy probs \
    --seq-length 4096 \
    --max-position-embeddings 163840 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 1280 \
    --use-mcore-models \
    --rotary-base 10000 \
    --rotary-percent 1.0 \
    --rotary-scaling-factor 40 \
    --mscale 1.0 \
    --mscale-all-dim 1.0 \
    --recompute-activations \
    --moe-layer-recompute \
    --sequence-parallel \
"
#    --decoder-first-pipeline-num-layers 6 \
#    --decoder-last-pipeline-num-layers 7 \


================================================
FILE: examples/post_training/modelopt/conf/deepseek-ai/DeepSeek-V2-Lite.sh
================================================
#!/bin/bash

TOKENIZER_MODEL="deepseek-ai/DeepSeek-V2-Lite"

MODEL_ARGS=" \
    --save-interval 100000 \
    --attention-dropout 0.0 \
    --hidden-dropout 0.0 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --untie-embeddings-and-output-weights \
    --position-embedding-type rope \
    --no-rope-fusion \
    --normalization RMSNorm \
    --norm-epsilon 1e-6 \
    --swiglu \
    --num-layers 27 \
    --hidden-size 2048 \
    --ffn-hidden-size 10944 \
    --num-attention-heads 16 \
    --kv-channels 16 \
    --multi-latent-attention \
    --kv-lora-rank 512 \
    --v-head-dim 128 \
    --qk-head-dim 128 \
    --qk-layernorm \
    --qk-pos-emb-head-dim 64 \
    --num-experts 64 \
    --moe-layer-freq ([0]+[1]*26) \
    --moe-ffn-hidden-size 1408 \
    --moe-grouped-gemm \
    --moe-router-score-function softmax \
    --moe-router-topk 6 \
    --moe-router-topk-scaling-factor 1.0 \
    --moe-router-pre-softmax \
    --moe-shared-expert-intermediate-size 2816 \
    --moe-aux-loss-coeff 1e-3 \
    --moe-token-dispatcher-type alltoall \
    --moe-token-drop-policy probs \
    --moe-router-load-balancing-type seq_aux_loss \
    --seq-length 1024 \
    --max-position-embeddings 1024 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 3200 \
    --attention-softmax-in-fp32 \
    --use-mcore-models \
    --rotary-percent 1.0 \
    --rotary-base 10000 \
    --rotary-scaling-factor 40 \
    --mscale 0.707 \
    --mscale-all-dim 0.707 \
    --sequence-parallel \
"


================================================
FILE: examples/post_training/modelopt/conf/meta-llama/Llama-3.1-8B-Instruct.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=meta-llama/Llama-3.1-8B-Instruct
    TOKENIZER_MODEL=nvidia/Llama-3.1-70B-Instruct-FP8
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --untie-embeddings-and-output-weights \
    --use-rotary-position-embeddings \
    --rotary-percent 1.0 \
    --no-rope-fusion \
    --no-position-embedding \
    --normalization RMSNorm \
    --swiglu \
    --num-layers 32 \
    --hidden-size 4096 \
    --ffn-hidden-size 14336 \
    --num-attention-heads 32 \
    --group-query-attention \
    --num-query-groups 8 \
    --seq-length 4096 \
    --max-position-embeddings 8192 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 1 \
    --use-mcore-models \
    --rotary-base 500000 \
    --use-rope-scaling \
"


================================================
FILE: examples/post_training/modelopt/conf/meta-llama/Llama-3.2-1B-Instruct.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=meta-llama/Llama-3.2-1B-Instruct
    TOKENIZER_MODEL=nvidia/Llama-3.1-70B-Instruct-FP8
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --use-rotary-position-embeddings \
    --no-rope-fusion \
    --no-position-embedding \
    --normalization RMSNorm \
    --swiglu \
    --num-layers 16 \
    --hidden-size 2048 \
    --ffn-hidden-size 8192 \
    --num-attention-heads 32 \
    --group-query-attention \
    --num-query-groups 8 \
    --seq-length 4096 \
    --max-position-embeddings 8192 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 1 \
    --use-mcore-models \
    --rotary-percent 1.0 \
    --rotary-base 500000 \
    --use-rope-scaling \
"
#    --export-force-local-attention \


================================================
FILE: examples/post_training/modelopt/conf/meta-llama/Llama-4-Maverick-17B-128E-Instruct.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=meta-llama/Llama-4-Maverick-17B-128E-Instruct
    TOKENIZER_MODEL=meta-llama/Llama-4-Maverick-17B-128E-Instruct
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --recompute-activations \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --untie-embeddings-and-output-weights \
    --position-embedding-type rope \
    --no-rope-fusion \
    --normalization RMSNorm \
    --swiglu \
    --num-layers 48 \
    --hidden-size 5120 \
    --ffn-hidden-size 16384 \
    --num-attention-heads 40 \
    --group-query-attention \
    --num-query-groups 8 \
    --num-experts 128 \
    --moe-layer-freq ([0,1]*24) \
    --moe-layer-recompute \
    --moe-ffn-hidden-size 8192 \
    --moe-router-score-function sigmoid \
    --moe-router-topk 1 \
    --moe-router-topk-scaling-factor 1.0 \
    --moe-router-dtype fp32 \
    --moe-shared-expert-intermediate-size 8192 \
    --moe-aux-loss-coeff 1e-3 \
    --moe-token-dispatcher-type alltoall \
    --moe-token-drop-policy probs \
    --moe-router-load-balancing-type seq_aux_loss \
    --seq-length 2048 \
    --max-position-embeddings 2048 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 1 \
    --use-mcore-models \
    --rotary-percent 1.0 \
    --rope-scaling-factor 8.0 \
    --rotary-base 500000 \
    --rotary-interleaved \
    --no-rope-freq 4 \
    --export-moe-apply-probs-on-input \
    --padded-vocab-size 202048 \
"


================================================
FILE: examples/post_training/modelopt/conf/meta-llama/Llama-4-Scout-17B-16E-Instruct.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=meta-llama/Llama-4-Scout-17B-16E-Instruct
    TOKENIZER_MODEL=meta-llama/Llama-4-Scout-17B-16E-Instruct
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --untie-embeddings-and-output-weights \
    --position-embedding-type rope \
    --no-rope-fusion \
    --normalization RMSNorm \
    --swiglu \
    --num-layers 48 \
    --hidden-size 5120 \
    --ffn-hidden-size 16384 \
    --num-attention-heads 40 \
    --group-query-attention \
    --num-query-groups 8 \
    --qk-layernorm \
    --num-experts 16 \
    --moe-ffn-hidden-size 8192 \
    --moe-router-score-function sigmoid \
    --moe-router-topk 1 \
    --moe-router-topk-scaling-factor 1.0 \
    --moe-router-dtype fp32 \
    --moe-shared-expert-intermediate-size 8192 \
    --moe-aux-loss-coeff 1e-3 \
    --moe-token-dispatcher-type alltoall \
    --moe-token-drop-policy probs \
    --moe-router-load-balancing-type seq_aux_loss \
    --seq-length 4096 \
    --max-position-embeddings 4096 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 128 \
    --use-mcore-models \
    --rotary-interleaved \
    --rotary-percent 1.0 \
    --rotary-base 500000 \
    --rope-scaling-factor 8.0 \
    --use-rope-scaling \
    --sequence-parallel \
    --no-bias-swiglu-fusion \
    --export-qk-l2-norm \
    --export-moe-apply-probs-on-input \
    --padded-vocab-size 202048 \
"


================================================
FILE: examples/post_training/modelopt/conf/moonshotai/Kimi-K2-Instruct.sh
================================================
#!/bin/bash

TOKENIZER_MODEL="moonshotai/Kimi-K2-Instruct"


MODEL_ARGS=" \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --untie-embeddings-and-output-weights \
    --position-embedding-type rope \
    --no-rope-fusion \
    --normalization RMSNorm \
    --swiglu \
    --num-layers 61 \
    --hidden-size 7168 \
    --ffn-hidden-size 18432 \
    --num-attention-heads 64 \
    --kv-channels 128 \
    --multi-latent-attention \
    --kv-lora-rank 512 \
    --v-head-dim 128 \
    --q-lora-rank 1536 \
    --qk-head-dim 128 \
    --qk-layernorm \
    --qk-pos-emb-head-dim 64 \
    --num-experts 384 \
    --moe-layer-freq [0]*1+[1]*60 \
    --moe-ffn-hidden-size 2048 \
    --moe-router-dtype fp32 \
    --moe-router-score-function sigmoid \
    --moe-router-bias-update-rate 1e-3 \
    --moe-router-enable-expert-bias \
    --moe-router-topk 8 \
    --moe-router-pre-softmax \
    --moe-router-topk-scaling-factor 2.827 \
    --moe-shared-expert-overlap \
    --moe-shared-expert-intermediate-size 2048 \
    --moe-aux-loss-coeff 1e-4 \
    --moe-router-load-balancing-type seq_aux_loss \
    --moe-token-dispatcher-type alltoall \
    --moe-token-drop-policy probs \
    --seq-length 4096 \
    --max-position-embeddings 131072 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 1 \
    --use-mcore-models \
    --rotary-base 50000 \
    --rotary-percent 1.0 \
    --rotary-scaling-factor 32 \
    --mscale 1.0 \
    --mscale-all-dim 1.0 \
    --padded-vocab-size 163840 \
    --recompute-activations \
    --moe-layer-recompute \
    --sequence-parallel \
    --trust-remote-code \
"


================================================
FILE: examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct.sh
================================================
#!/bin/bash

HF_MODEL_CKPT=/workspace/scratch/moonshotai/Kimi-K2-Instruct
TP=8
ETP=1
EP=64


================================================
FILE: examples/post_training/modelopt/conf/moonshotai/kimi_k2_instruct_export.sh
================================================
#!/bin/bash

HF_MODEL_CKPT=/workspace/scratch/moonshotai/Kimi-K2-Instruct

MLM_EXTRA_ARGS=" \
    --decoder-first-pipeline-num-layers 3 \
    --decoder-last-pipeline-num-layers 2 \
    --init-model-with-meta-device \
    --use-cpu-initialization \

"

# Layer distribution over PP: 3, [4] * 14, 2.
PP=16


================================================
FILE: examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16
    TOKENIZER_MODEL=nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --trust-remote-code \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --moe-token-dispatcher-type allgather \
    --enable-experimental \
    --moe-permute-fusion \
    --use-fused-weighted-squared-relu \
    --cross-entropy-loss-fusion \
    --cross-entropy-fusion-impl native \
    --moe-router-score-function sigmoid \
    --moe-grouped-gemm \
    --num-experts 128 \
    --moe-router-topk 6 \
    --moe-aux-loss-coeff 1e-4 \
    --moe-router-topk-scaling-factor 2.5 \
    --moe-router-enable-expert-bias \
    --moe-router-dtype fp32 \
    --moe-router-load-balancing-type seq_aux_loss \
    --moe-shared-expert-intermediate-size 3712 \
    \
    --attention-backend flash \
    --disable-gloo-process-groups \
    --mamba-num-heads 64 \
    --mamba-head-dim 64 \
    --hybrid-layer-pattern MEMEM*EMEMEM*EMEMEM*EMEMEM*EMEMEM*EMEMEMEM*EMEMEMEME \
    --use-mcore-models \
    --untie-embeddings-and-output-weights \
    --disable-bias-linear \
    --init-method-std 0.0173 \
    --position-embedding-type none \
    --squared-relu \
    --hidden-size 2688 \
    --num-attention-heads 32 \
    --group-query-attention \
    --num-query-groups 2 \
    --ffn-hidden-size 1856 \
    --kv-channels 128 \
    --normalization RMSNorm \
    \
    --tokenizer-type HuggingFaceTokenizer \
    --bf16 \
    --seq-length 8192 \
    --max-position-embeddings 8192 \
    --export-model-type MambaModel \
    "


================================================
FILE: examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16
    TOKENIZER_MODEL=nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi


MODEL_ARGS=" \
    --trust-remote-code \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --enable-experimental \
    --use-fused-weighted-squared-relu \
    --cross-entropy-loss-fusion \
    --cross-entropy-fusion-impl native \
    --num-experts 512 \
    --moe-router-score-function sigmoid \
    --moe-grouped-gemm \
    --moe-aux-loss-coeff 1e-4 \
    --moe-router-topk 22 \
    --moe-permute-fusion \
    --moe-router-topk-scaling-factor 5.0 \
    --moe-router-enable-expert-bias \
    --moe-router-dtype fp32 \
    --moe-router-load-balancing-type seq_aux_loss \
    --moe-shared-expert-intermediate-size 5376 \
    --moe-token-dispatcher-type allgather \
    --moe-latent-size 1024 \
    \
    --attention-backend flash \
    --disable-gloo-process-groups \
    --is-hybrid-model \
    --mamba-num-heads 128 \
    --mamba-head-dim 64 \
    --hybrid-layer-pattern MEMEMEM*EMEMEMEM*EMEMEMEM*EMEMEMEMEM*EMEMEMEMEM*EMEMEMEMEM*EMEMEMEMEM*EMEMEMEM*EMEMEMEME \
    \
    --use-mcore-models \
    --untie-embeddings-and-output-weights \
    --disable-bias-linear \
    --init-method-std 0.014 \
    --position-embedding-type none \
    --squared-relu \
    --hidden-size 4096 \
    --num-attention-heads 32 \
    --group-query-attention \
    --num-query-groups 2 \
    --ffn-hidden-size 2688 \
    --kv-channels 128 \
    --normalization RMSNorm \
    --attention-dropout 0.0 \
    --hidden-dropout 0.0 \
    \
    --tokenizer-type HuggingFaceTokenizer \
    --bf16 \
    --seq-length 8192 \
    --max-position-embeddings 8192 \
    --export-model-type MambaModel \
    "


================================================
FILE: examples/post_training/modelopt/conf/nvidia/NVIDIA-Nemotron-Nano-9B-v2.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base
    TOKENIZER_MODEL=nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --trust-remote-code \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --untie-embeddings-and-output-weights \
    --position-embedding-type none \
    --no-rope-fusion \
    --normalization RMSNorm \
    --squared-relu \
    --hidden-size 4480 \
    --ffn-hidden-size 15680 \
    --num-attention-heads 40 \
    --kv-channels 128 \
    --group-query-attention \
    --num-query-groups 8 \
    --hybrid-layer-pattern M-M-M-MM-M-M-M*-M-M-M*-M-M-M-M*-M-M-M-M*-M-MM-M-M-M-M-M- \
    --mamba-head-dim 80 \
    --mamba-num-heads 128 \
    --mamba-num-groups 8 \
    --mamba-state-dim 128 \
    --seq-length 4096 \
    --max-position-embeddings 131072 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 1 \
    --use-mcore-models \
    --export-model-type MambaModel \
    --padded-vocab-size 131072 \
"


================================================
FILE: examples/post_training/modelopt/conf/nvidia/Nemotron-H-47B-Reasoning-128K.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=nvidia/Nemotron-H-47B-Reasoning-128K
    TOKENIZER_MODEL=nvidia/Nemotron-H-47B-Reasoning-128K
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --trust-remote-code \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --attention-backend flash \
    --disable-bias-linear \
    --untie-embeddings-and-output-weights \
    --position-embedding-type none \
    --normalization RMSNorm \
    --squared-relu \
    --hidden-size 8192 \
    --ffn-hidden-size 30720 \
    --num-attention-heads 64 \
    --kv-channels 128 \
    --group-query-attention \
    --num-query-groups 8 \
    --hybrid-layer-pattern M-M-M-M-M-M-M-M-M*-M-M-M-M-M-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M-M-M---MM---M-M*-M-M-M-M-M- \
    --mamba-head-dim 64 \
    --mamba-num-heads 256 \
    --mamba-num-groups 8 \
    --mamba-state-dim 256 \
    --seq-length 8192 \
    --max-position-embeddings 8192 \
    --tokenizer-type HuggingFaceTokenizer \
    --use-mcore-models \
    --export-model-type MambaModel \
"


================================================
FILE: examples/post_training/modelopt/conf/nvidia/Nemotron-H-4B-Instruct.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=nvidia/Nemotron-H-4B-Instruct
    TOKENIZER_MODEL=nvidia/Nemotron-H-4B-Instruct
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --trust-remote-code \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --untie-embeddings-and-output-weights \
    --use-rotary-position-embeddings \
    --rotary-percent 0.5 \
    --no-rope-fusion \
    --no-position-embedding \
    --normalization RMSNorm \
    --squared-relu \
    --hidden-size 3072 \
    --ffn-hidden-size 12288 \
    --kv-channels 128 \
    --num-attention-heads 32 \
    --group-query-attention \
    --num-query-groups 8 \
    --hybrid-layer-pattern M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M- \
    --mamba-head-dim 64 \
    --mamba-num-heads 112 \
    --mamba-num-groups 8 \
    --mamba-state-dim 128 \
    --seq-length 4096 \
    --max-position-embeddings 8192 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 1 \
    --use-mcore-models \
    --rotary-base 10000 \
    --export-model-type MambaModel \
"


================================================
FILE: examples/post_training/modelopt/conf/nvidia/Nemotron-H-56B-Base-8K.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=nvidia/Nemotron-H-56B-Base-8K
    TOKENIZER_MODEL=nvidia/Nemotron-H-56B-Base-8K
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --trust-remote-code \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --attention-backend flash \
    --hybrid-layer-pattern M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M- \
    --mamba-state-dim 256 \
    --tiktoken-pattern v2 \
    --use-mcore-models \
    --untie-embeddings-and-output-weights \
    --disable-bias-linear \
    --init-method-std 0.0099 \
    --position-embedding-type none \
    --squared-relu \
    --hidden-size 8192 \
    --num-attention-heads 64 \
    --group-query-attention \
    --num-query-groups 8 \
    --ffn-hidden-size 32768 \
    --kv-channels 128 \
    --normalization RMSNorm \
    --exit-duration-in-mins 230 \
    --tensor-model-parallel-size 8 \
    --pipeline-model-parallel-size 1 \
    --seq-length 8192 \
    --max-position-embeddings 8192 \
    --tokenizer-type HuggingFaceTokenizer \
    --bf16 \
    --export-model-type MambaModel \
    "


================================================
FILE: examples/post_training/modelopt/conf/nvidia/Nemotron-H-8B-Base-8K.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=nvidia/Nemotron-H-8B-Base-8K
    TOKENIZER_MODEL=nvidia/Nemotron-H-8B-Base-8K
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --trust-remote-code \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --untie-embeddings-and-output-weights \
    --use-rotary-position-embeddings \
    --no-rope-fusion \
    --no-position-embedding \
    --normalization RMSNorm \
    --squared-relu \
    --hidden-size 4096 \
    --ffn-hidden-size 21504 \
    --num-attention-heads 32 \
    --group-query-attention \
    --num-query-groups 8 \
    --hybrid-layer-pattern M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M- \
    --mamba-head-dim 64 \
    --mamba-num-heads 128 \
    --mamba-num-groups 8 \
    --mamba-state-dim 128 \
    --seq-length 4096 \
    --max-position-embeddings 8192 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 1 \
    --use-mcore-models \
    --rotary-percent 0.5 \
    --rotary-base 500000 \
    --export-model-type MambaModel \
"
#    --rotary-base 10000 \


================================================
FILE: examples/post_training/modelopt/conf/nvidia/Nemotron-Mini-4B-Instruct.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=nvidia/Nemotron-Mini-4B-Instruct
    TOKENIZER_MODEL=nvidia/Nemotron-Mini-4B-Instruct
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

MODEL_ARGS=" \
    --trust-remote-code \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --disable-bias-linear \
    --untie-embeddings-and-output-weights \
    --use-rotary-position-embeddings \
    --rotary-percent 0.5 \
    --no-rope-fusion \
    --no-position-embedding \
    --normalization LayerNorm \
    --apply-layernorm-1p \
    --squared-relu \
    --num-layers 32 \
    --hidden-size 3072 \
    --ffn-hidden-size 9216 \
    --num-attention-heads 24 \
    --group-query-attention \
    --num-query-groups 8 \
    --seq-length 4096 \
    --max-position-embeddings 4096 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 1 \
    --use-mcore-models \
    --rotary-base 10000 \
"


================================================
FILE: examples/post_training/modelopt/conf/openai/gpt-oss-120b.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=openai/gpt-oss-20b
    TOKENIZER_MODEL=openai/gpt-oss-20b
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

# WAR: enable-gpt-oss is a temporary workaround for using the default GPT-OSS config
MODEL_ARGS=" \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --untie-embeddings-and-output-weights \
    --no-rope-fusion \
    --normalization RMSNorm \
    --num-layers 36 \
    --hidden-size 2880 \
    --ffn-hidden-size 2880 \
    --num-attention-heads 64 \
    --group-query-attention \
    --num-query-groups 8 \
    --kv-channels 64 \
    --num-experts 128 \
    --moe-ffn-hidden-size 2880 \
    --moe-router-dtype fp32 \
    --moe-router-topk 4 \
    --moe-aux-loss-coeff 0.0 \
    --moe-token-dispatcher-type alltoall \
    --moe-router-score-function softmax \
    --moe-router-load-balancing-type aux_loss \
    --seq-length 4096 \
    --max-position-embeddings 40960 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 128 \
    --use-mcore-models \
    --rotary-percent 1.0 \
    --rotary-base 150000 \
    --no-bias-gelu-fusion \
    --sequence-parallel \
    --export-force-local-attention \
    --no-bias-dropout-fusion \
    --padded-vocab-size 201088 \
    --quick-geglu \
    --glu-linear-offset 1.0 \
    --softmax-type learnable \
    --window-attn-skip-freq 2 \
    --enable-gpt-oss \
    --activation-func-clamp-value 7.0 \
    --window-size 127,0 \
"


================================================
FILE: examples/post_training/modelopt/conf/openai/gpt-oss-20b.sh
================================================
#!/bin/bash

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=openai/gpt-oss-20b
    TOKENIZER_MODEL=openai/gpt-oss-20b
else
    TOKENIZER_MODEL=${HF_MODEL_CKPT}
fi

# WAR: enable-gpt-oss is a temporary workaround for using the default GPT-OSS config
MODEL_ARGS=" \
    --save-interval 100000 \
    --micro-batch-size 1 \
    --bf16 \
    --no-masked-softmax-fusion \
    --untie-embeddings-and-output-weights \
    --no-rope-fusion \
    --normalization RMSNorm \
    --num-layers 24 \
    --hidden-size 2880 \
    --ffn-hidden-size 2880 \
    --num-attention-heads 64 \
    --group-query-attention \
    --num-query-groups 8 \
    --kv-channels 64 \
    --num-experts 32 \
    --moe-ffn-hidden-size 2880 \
    --moe-router-dtype fp32 \
    --moe-router-topk 4 \
    --moe-aux-loss-coeff 0.0 \
    --moe-token-dispatcher-type alltoall \
    --moe-router-score-function softmax \
    --moe-router-load-balancing-type aux_loss \
    --seq-length 4096 \
    --max-position-embeddings 40960 \
    --tokenizer-type HuggingFaceTokenizer \
    --make-vocab-size-divisible-by 128 \
    --use-mcore-models \
    --rotary-percent 1.0 \
    --rotary-base 150000 \
    --no-bias-gelu-fusion \
    --sequence-parallel \
    --export-force-local-attention \
    --no-bias-dropout-fusion \
    --padded-vocab-size 201088 \
    --quick-geglu \
    --glu-linear-offset 1.0 \
    --softmax-type learnable \
    --window-attn-skip-freq 2 \
    --enable-gpt-oss \
    --activation-func-clamp-value 7.0 \
    --window-size 127,0 \
"


================================================
FILE: examples/post_training/modelopt/convert.sh
================================================
#!/bin/bash

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"

# Common arguments and base model specific arguments
source "${SCRIPT_DIR}/conf/arguments.sh"

# Default arguments of this script
MLM_DEFAULT_ARGS="
    --distributed-timeout-minutes 60 \
    --finetune \
    --auto-detect-ckpt-format \
    --export-te-mcore-model \
"

if [ -z ${HF_TOKEN} ]; then
    printf "${MLM_WARNING} Variable ${PURPLE}HF_TOKEN${WHITE} is not set! HF snapshot download may fail!\n"
fi

if [ -z ${MLM_MODEL_SAVE} ]; then
    MLM_MODEL_SAVE=${MLM_WORK_DIR}/${MLM_MODEL_CFG}_mlm
    printf "${MLM_WARNING} Variable ${PURPLE}MLM_MODEL_SAVE${WHITE} is not set (default: ${MLM_MODEL_SAVE})!\n"
fi


if [ -z ${MLM_MODEL_CKPT} ]; then
    if [ -z ${HF_MODEL_CKPT} ]; then
        HF_MODEL_CKPT=${1}
    fi
    ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/convert_model.py \
        ${MODEL_ARGS} \
        --tensor-model-parallel-size ${TP} \
        --expert-tensor-parallel-size ${ETP} \
        --pipeline-model-parallel-size ${PP} \
        --expert-model-parallel-size ${EP} \
        --tokenizer-model ${TOKENIZER_MODEL} \
        --pretrained-model-path ${HF_MODEL_CKPT} \
        --save ${MLM_MODEL_SAVE} \
        ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}
else
    ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/convert_model.py \
        ${MODEL_ARGS} \
        --tensor-model-parallel-size ${TP} \
        --expert-tensor-parallel-size ${ETP} \
        --pipeline-model-parallel-size ${PP} \
        --expert-model-parallel-size ${EP} \
        --tokenizer-model ${TOKENIZER_MODEL} \
        --load ${MLM_MODEL_CKPT} \
        --save ${MLM_MODEL_SAVE} \
        ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}
fi


================================================
FILE: examples/post_training/modelopt/convert_model.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""Convert a GPTModel."""
import functools
import inspect
import json
import os
import sys
import warnings

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))

import modelopt.torch.speculative as mtsp
import torch
from modelopt.torch.export import import_mcore_gpt_from_hf

from megatron.core import mpu
from megatron.core.enums import ModelType
from megatron.core.parallel_state import destroy_model_parallel
from megatron.post_training.arguments import add_modelopt_args
from megatron.post_training.checkpointing import load_modelopt_checkpoint
from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
from megatron.post_training.utils import (
    report_current_memory_info,
    to_empty_if_meta,
)
from megatron.training import get_args
from megatron.training.checkpointing import save_checkpoint
from megatron.training.initialize import initialize_megatron
from megatron.training.utils import print_rank_0, unwrap_model
from model_provider import model_provider

ALGO_TO_CONFIG = {
    "eagle3": mtsp.config.EAGLE3_DEFAULT_CFG,
    "eagle-mtp": mtsp.config.EAGLE_MTP_DEFAULT_CFG,
}


def add_convert_args(parser):
    """Add additional arguments for ModelOpt checkpoint convertion."""
    group = parser.add_argument_group(title='ModelOpt MCore checkpoint convertion')
    group.add_argument(
        "--pretrained-model-path", type=str, default=None, help="HuggingFace pretrained model"
    )
    group.add_argument(
        "--extra-model-path", type=str, default=None, help="Extra module weights to load"
    )
    group.add_argument(
        '--algorithm',
        type=str,
        choices=["eagle3", "None"],
        default="None",
        help='Chosing between different speculative decoding algorithms. Default is None.',
    )
    group.add_argument(
        "--eagle-config",
        type=str,
        default=None,
        help="EAGLE architecture config. If not given, "
        "a default config will be use. If provided, it will overwrite the default config.",
    )
    group.add_argument(
        "--mix-hidden-states",
        type=bool,
        default=False,
        help="Whether to mix hidden states from previous TTT step.",
    )

    add_modelopt_args(parser)
    return parser


def get_model(model_provider_func, model_type=ModelType.encoder_or_decoder, wrap_with_ddp=True):
    """Build the model."""
    args = get_args()
    args.model_type = model_type
    pre_process = mpu.is_pipeline_first_stage()
    post_process = mpu.is_pipeline_last_stage()

    if args.init_model_with_meta_device:
        with torch.device("meta"):
            model = model_provider_func(pre_process=pre_process, post_process=post_process)
        to_empty_if_meta(model, device="cuda")
    else:
        model = model_provider_func(pre_process=pre_process, post_process=post_process)

    model.model_type = model_type
    return [model]


def check_arguments():
    """Checking user arguments."""
    args = get_args()
    if args.num_layers_per_virtual_pipeline_stage is not None:
        print_rank_0("Interleaved pipeline schedule is not yet supported for text generation.")
        exit()

    if hasattr(args, 'moe_grouped_gemm') and args.moe_grouped_gemm == True:
        if not getattr(args, 'export_default_te_spec', False):
            print_rank_0("WARNING: Forcing moe_grouped_gemm to False for PTQ and export.")
            args.moe_grouped_gemm = False


if __name__ == "__main__":
    initialize_megatron(
        extra_args_provider=add_convert_args,
        args_defaults={
            'tokenizer_type': 'HuggingFaceTokenizer',
            'no_load_rng': True,
            'no_load_optim': True,
        },
    )
    check_arguments()

    args = get_args()

    # Meta device initialization for ParallelLinear only works if using cpu initialization.
    # Meta device initialization is used such that models can be materialized in low-precision
    # directly when ModelOpt real quant is used. Otherwise, the model is first initialized
    # as BF16 in memory which may result in OOM and defeat the purpose of real quant.
    if args.init_model_with_meta_device:
        args.use_cpu_initialization = True
    else:
        warnings.warn(
            "--init-model-with-meta-device is not set. If you would like to resume the "
            "model in low-bit directly (low-memory initialization and skipping 16-bit), "
            "--init-model-with-meta-device must be set.",
            UserWarning,
        )

    model = get_model(
        functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False
    )
    report_current_memory_info()

    unwrapped_model = unwrap_model(model)[0]

    if args.pretrained_model_path is not None:
        import_dtype = torch.float16 if args.fp16 else torch.bfloat16
        unwrapped_model = unwrap_model(model)[0]
        workspace_dir = os.environ.get("MLM_WORK_DIR", "/tmp")
        print_rank_0(
            "Import model from Hugging Face checkpoint in dtype {}.".format(str(import_dtype))
        )
        import_kwargs = {
            "dtype": import_dtype,
            "moe_router_dtype": args.moe_router_dtype,
        }
        if "trust_remote_code" in inspect.signature(import_mcore_gpt_from_hf).parameters:
            import_kwargs.update({"trust_remote_code": args.trust_remote_code})
        import_mcore_gpt_from_hf(
            unwrapped_model, args.pretrained_model_path, workspace_dir, **import_kwargs
        )
    elif args.load is not None:
        _ = load_modelopt_checkpoint(model)

    if args.algorithm == "eagle3":
        mtsp_config = ALGO_TO_CONFIG[args.algorithm]
        if args.eagle_config:
            with open(args.eagle_config) as f:
                eagle_config = json.load(f)
            mtsp_config["config"]["eagle_architecture_config"].update(eagle_config)

        if args.export_offline_model:
            mtsp_config["config"]["eagle_offline"] = True
        if args.mix_hidden_states:
            mtsp_config["config"]["eagle_mix_hidden_states"] = True

        unwrapped_model = mtsp.convert(unwrapped_model, mtsp_config)

        if args.extra_model_path is not None:
            eagle_module = getattr(unwrapped_model, "eagle_module", None)
            if eagle_module is not None:
                mcore_eagle_state_dict = torch.load(args.extra_model_path)
                eagle_module.load_state_dict(mcore_eagle_state_dict, strict=False)

    print_rank_0(f"Converted Model:\n {model}")
    torch.distributed.barrier()

    save_checkpoint(1, model, None, None, 0, release=True)

    destroy_model_parallel()


================================================
FILE: examples/post_training/modelopt/distillation.md
================================================
# Megatron-LM ModelOpt Distillation Integration

## How To

### Prerequisites

In order to perform soft-label Knowledge Distillation between two models on a specific dataset,
we take a larger teacher model which has already been fully trained and use its logits as
labels for a smaller student model.

We require the following pieces of data:
* Teacher model weights
* Student model weights (unless starting from scratch)
* NeMo-format config file for teacher model
* Tokenizer
* Dataset

And optionally:
* Distillation run config file

### Teacher checkpoint format

We enforce the use of a config yaml in [NeMo](https://github.com/NVIDIA/NeMo) checkpoint-format style to define the arguments to the teacher model.
The normal command-line arguments go toward constructing the student, thus the values in this file
override the student arguments before being handed to the teacher constructor. This file must be either passed in via
`--export-kd-teacher-model-config` or be named `model_config.yaml` in the root of the teacher model checkpoint folder.
Unlike NeMo-generated checkpoints, Megatron-LM checkpoints do not contain this file by default and must be manually created.

> NOTE: Not all keys in the NeMo-style yaml correspond 1:1 to the argument names for Megatron-LM. These
are converted in `megatron/post_training/model_builder.py`.

### Distillation config format

Configuring the distillation run is done via a separate YAML file with the following fields:

```yaml
logit_layers: ["output_layer", "output_layer"]
intermediate_layer_pairs:
  - ["decoder.layers.0.input_layernorm", "decoder.layers.0.input_layernorm"]
  - ["decoder.final_layernorm", "decoder.layers.30.input_layernorm"]
skip_lm_loss: true
kd_loss_scale: 10.0
logit_kl_temperature: 1.0
```

* `logit_layers` defines the names of the student and teacher submodules, respectively, whose outputs are the logits.
* `intermediate_layer_pairs` defines the potentially multiple – or zero – pairs of intermediate activation layers to also perform loss on.
* `skip_lm_loss` decides whether or not to compute and combine the original training LM loss with the KD loss.
* `kd_loss_scale` will scale the KD loss before adding it to the LM loss, if `skip_lm_loss` is `False`.
* `logit_kl_temperature` is the temperature smoothing factor to multiply the logits by prior to softmax and loss.

Without this configuration file, the default logits-only distillation with scale and temperatures of 1.0 will be performed.

### Training

Distillation is triggered by calling `pretrain_gpt.py` or `pretrain_mamba.py` with the following arguments:

```bash
--export-kd-teacher-load <path-to-teacher-checkpoint>
--export-te-mcore-model
```

optionally alongside the additional following arguments:

```bash
--export-kd-distill-cfg <path-to-distill-config-yaml-file>
--export-kd-teacher-model-config <path-to-teacher-model-config-file>
```

> NOTE: If the teacher checkpoint happens to be in a different format from the student's (whose format is specified via `--ckpt-format`), it can
be distinguished separately using the additional flag `--export-kd-teacher-ckpt-format`.

## Distillation API and design

Knowledge Distillation is done via the [NVIDIA Model Optimizer library](https://github.com/NVIDIA/Model-Optimizer).

The model creation step wraps the base model as the student in a
`modelopt.torch.distill.DistillationModel` wrapper which also contains the teacher model.

Model Optimizer modifies the model using the loss criterion present in the distillation config yaml file, which
defines a loss function between two module attribute names of the teacher and student model, respectively.

Default loss function used between logits is a KL-Divergence Loss and loss used among intermediate tensors is Cosine-Similarity,
both defined in `modelopt.torch.distill.plugins.megatron`.

## Restrictions

* Interleaved Pipeline Parallel is unsupported for Distillation.

## Known Issues

* An unknown memory allocation (a few megabytes per microbatch) takes place when the model is converted to a
`modelopt.torch.distill.DistillationModel`. If `--manual-gc` is enabled, it can easily lead to an OOM after some iterations.

* A CUDA kernel issue is occurring where student's forward latency is severly prolonged compared to running student forward
without a teacher model. This means the total time per iteration may be up to 40% longer than ideally expected.


================================================
FILE: examples/post_training/modelopt/eagle3.sh
================================================
#!/bin/bash

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"

# Common arguments and base model specific arguments
source "${SCRIPT_DIR}/conf/arguments.sh"


# Set up cache dir for HF to avoid out of space error
export HF_DATASETS_CACHE="/tmp/hf_datasets_cache"

# Extra arguments of this script
MLM_DEFAULT_ARGS=" \
    --distributed-timeout-minutes 30 \
    --auto-detect-ckpt-format \
    --export-te-mcore-model \
    --finetune \
"

EAGLE3_CONVERT_ARGS=" \
    --algorithm eagle3 \
"

if [ -z ${MLM_MODEL_SAVE} ]; then
    MLM_MODEL_SAVE=${MLM_WORK_DIR}/${MLM_MODEL_CFG}-Eagle3
    printf "${MLM_WARNING} Variable ${PURPLE}MLM_MODEL_SAVE${WHITE} is not set (default: ${MLM_MODEL_SAVE})!\n"
fi

if [ -z ${MLM_DATA_ARGS} ]; then
    MLM_DATA_ARGS=" \
        --train-samples 128000 \
        --lr-decay-samples 128000 \
        --lr-warmup-samples 0 \
        --split 100,0,0 \
        --finetune-hf-dataset Magpie-Align/Magpie-Llama-3.1-Pro-MT-300K-Filtered \
    "
fi

if [ -z ${MLM_TRAIN_ARGS} ]; then
    MLM_TRAIN_ARGS=" \
        --no-gradient-accumulation-fusion \
        --reset-position-ids \
        --reset-attention-mask \
        --eod-mask-loss \
        --micro-batch-size 1 \
        --attention-dropout 0.0 \
        --hidden-dropout 0.0 \
        --no-check-for-nan-in-loss-and-grad \
    "
fi

if [ -z ${MLM_OPTIM_ARGS} ]; then
    MLM_OPTIM_ARGS=" \
        --lr 5.0e-5 \
        --min-lr 1.0e-7 \
        --lr-decay-style cosine \
        --clip-grad 1.0 \
        --weight-decay 0.0 \
        --adam-beta1 0.9 \
        --adam-beta2 0.95 \
        --init-method-std 0.010 \
    "
fi

if [ -z ${MLM_EVAL_ARGS} ]; then
    MLM_EVAL_ARGS=" \
        --eval-iters 1 \
        --eval-interval 1000 \
        --save-interval 1000 \
        --log-interval 100 \
    "
fi

# Convert HF checkpoint to Megatron EAGLE3 model if not exist
if [[ ! -d ${MLM_MODEL_SAVE} ]]; then
    ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/convert_model.py \
        ${MODEL_ARGS} \
        --tensor-model-parallel-size ${TP} \
        --expert-tensor-parallel-size ${ETP} \
        --pipeline-model-parallel-size ${PP} \
        --expert-model-parallel-size ${EP} \
        --tokenizer-model ${TOKENIZER_MODEL} \
        --pretrained-model-path ${HF_MODEL_CKPT} \
        --save ${MLM_MODEL_SAVE} \
        ${MLM_DEFAULT_ARGS} ${EAGLE3_CONVERT_ARGS}
fi


${LAUNCH_SCRIPT} ${SCRIPT_DIR}/finetune.py \
    ${MODEL_ARGS} \
    --tensor-model-parallel-size ${TP} \
    --expert-tensor-parallel-size ${ETP} \
    --expert-model-parallel-size ${EP} \
    --pipeline-model-parallel-size ${PP} \
    --tokenizer-model ${TOKENIZER_MODEL} \
    --load ${MLM_MODEL_SAVE} \
    --save ${MLM_MODEL_SAVE} \
    ${MLM_DATA_ARGS} \
    ${MLM_OPTIM_ARGS} \
    ${MLM_TRAIN_ARGS} \
    ${MLM_EVAL_ARGS} \
    ${MLM_RESUME_ARGS} \
    ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}


================================================
FILE: examples/post_training/modelopt/export.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""Export a GPTModel."""
import functools
import inspect
import os
import sys
import warnings
from pathlib import Path

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))

import modelopt.torch.export as mtex
import torch

from megatron.post_training.arguments import add_modelopt_args
from megatron.post_training.checkpointing import load_modelopt_checkpoint
from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
from megatron.training import get_args, get_model
from megatron.training.initialize import initialize_megatron
from megatron.training.utils import unwrap_model
from model_provider import model_provider

warnings.filterwarnings('ignore')


def add_modelopt_export_args(parser):
    """Add additional arguments for ModelOpt hf-like export."""
    group = parser.add_argument_group(title='ModelOpt hf-like export')
    group.add_argument(
        "--export-extra-modules",
        action="store_true",
        help="Export extra modules such as Medusa, EAGLE, or MTP.",
    )
    group.add_argument(
        "--pretrained-model-name",
        type=str,
        help="A pretrained model hosted inside a model repo on huggingface.co.",
    )
    group.add_argument("--export-dir", type=str, help="The target export path.")
    add_modelopt_args(parser)
    return parser


if __name__ == "__main__":
    initialize_megatron(
        extra_args_provider=add_modelopt_export_args,
        args_defaults={
            'tokenizer_type': 'HuggingFaceTokenizer',
            'no_load_rng': True,
            'no_load_optim': True,
        },
    )

    args = get_args()

    # Meta device initialization for ParallelLinear only works if using cpu initialization.
    # Meta device initialization is used such that models can be materialized in low-precision
    # directly when ModelOpt real quant is used. Otherwise, the model is first initialized
    # as BF16 in memory which may result in OOM and defeat the purpose of real quant.
    args.use_cpu_initialization = True
    if not args.init_model_with_meta_device:
        warnings.warn(
            "--init-model-with-meta-device is not set. If you would like to resume the "
            "model in low-bit directly (low-memory initialization and skipping 16-bit), "
            "--init-model-with-meta-device must be set.",
            UserWarning,
        )

    model = get_model(
        functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False
    )

    # Materialize the model from meta device to cpu before loading the checkpoint.
    unwrapped_model = unwrap_model(model)[0]
    unwrapped_model.to_empty(device="cpu")

    if args.load is not None and Path(args.load).is_dir():
        _ = load_modelopt_checkpoint(model)
    else:
        raise ValueError(f"Invalid load checkpoint directory: {args.load}")


    # Decide whether we are exporting only the extra_modules (e.g. EAGLE3).
    # Only the last pp stage may have extra_modules, hence broadcast from the last rank.
    export_extra_modules = hasattr(unwrapped_model, "eagle_module") or hasattr(
        unwrapped_model, "medusa_heads"
    )
    torch.distributed.broadcast_object_list(
        [export_extra_modules], src=torch.distributed.get_world_size() - 1
    )

    export_kwargs = {
        "export_extra_modules": export_extra_modules,
        "dtype": torch.bfloat16,
        "export_dir": args.export_dir,
        "moe_router_dtype": unwrapped_model.config.moe_router_dtype,
    }
    if "trust_remote_code" in inspect.signature(mtex.export_mcore_gpt_to_hf).parameters:
        export_kwargs.update({"trust_remote_code": args.trust_remote_code})
    mtex.export_mcore_gpt_to_hf(unwrapped_model, args.pretrained_model_name, **export_kwargs)


================================================
FILE: examples/post_training/modelopt/export.sh
================================================
#!/bin/bash

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"

# Common arguments and base model specific arguments
source "${SCRIPT_DIR}/conf/arguments.sh"

# Default arguments of this script
MLM_DEFAULT_ARGS="--finetune --auto-detect-ckpt-format --export-te-mcore-model --use-cpu-initialization"

if [ -z ${HF_MODEL_CKPT} ]; then
    HF_MODEL_CKPT=${1}
fi

if [ -z ${HF_TOKEN} ]; then
    printf "${MLM_WARNING} Variable ${PURPLE}HF_TOKEN${WHITE} is not set! Pretrained config download may fail!\n"
fi

if [ -z ${EXPORT_DIR} ]; then
    EXPORT_DIR=${MLM_WORK_DIR}/${MLM_MODEL_CFG}_export
    printf "${MLM_WARNING} Variable ${PURPLE}EXPORT_DIR${WHITE} is not set (default: ${EXPORT_DIR})!\n"
fi

if [ "${TP}" != "1" ]; then
    TP=1
    printf "${MLM_WARNING} Variable ${PURPLE}TP${WHITE} is forced to be 1 during export!!\n"
fi


${LAUNCH_SCRIPT} ${SCRIPT_DIR}/export.py \
    ${MODEL_ARGS} \
    --tensor-model-parallel-size ${TP} \
    --pipeline-model-parallel-size ${PP} \
    --tokenizer-model ${TOKENIZER_MODEL} \
    --load ${MLM_MODEL_CKPT} \
    --pretrained-model-name ${HF_MODEL_CKPT} \
    --export-dir ${EXPORT_DIR} \
    ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}


================================================
FILE: examples/post_training/modelopt/finetune.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.

"""Supervised Finetuning GPT."""
import itertools
import json
import os
import sys
from functools import partial
from typing import Any, Dict, Optional

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))

import datasets
import torch
import transformers

from megatron.core import mpu, tensor_parallel
from megatron.core.enums import ModelType
from megatron.core.models.gpt import GPTModel
from megatron.post_training.arguments import add_modelopt_args
from megatron.post_training.loss_func import loss_func
from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
from megatron.post_training.non_loss_data_func import report_draft_acceptance_length
from megatron.training import get_args, get_timers, get_tokenizer, pretrain
from megatron.training.utils import (
    get_batch_on_this_cp_rank,
    get_ltor_masks_and_position_ids,
    print_rank_0,
)
from model_provider import model_provider

REMOVE_THINK_CHAT_TEMPLATE = (
    "{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}"
)


def add_finetune_args(parser):
    """Add additional arguments for finetune."""
    group = parser.add_argument_group(title='Finetune')
    group.add_argument("--offline-distillation-data", type=str, help="Path to the offline dataset directory with base model features.")


    add_modelopt_args(parser)
    return parser

def get_eos_id():
    """Return the eos token id.

    We insert eos_token between two samples during packing. However, if the eos_token is used in message or after turns,
    we need to replace it with some other special tokens that do not appear in message."""
    tokenizer = get_tokenizer()
    hf_tokenizer = tokenizer._tokenizer

    if hf_tokenizer.eos_token == "<|eot_id|>":
        return 128001
    if hf_tokenizer.eos_token == "<|eot|>":
        return 200001
    if hf_tokenizer.eos_token == "<|im_end|>":
        return 151643
    if hf_tokenizer.eos_token == "<|return|>":
        return 199999

    return hf_tokenizer.eos_token_id


class OfflineDataset(torch.utils.data.Dataset):
    def __init__(self, data_dir: str, num_samples):
        self.data_dir = data_dir
        self.num_samples = num_samples
        self.file_paths = []

        for item in os.listdir(data_dir):
            item_path = os.path.join(data_dir, item)
            if os.path.isfile(item_path):
                self.file_paths.append(item_path)

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        idx = idx % len(self.file_paths)
        file_path = self.file_paths[idx]
        sample = torch.load(file_path)
        return sample

class SFTDataset(torch.utils.data.Dataset):

    hf_dataset_to_kwargs = {
        "Open-Orca/OpenOrca": {"split": "train"},
        "Open-Orca/SlimOrca": {"split": "train"},
        "nvidia/Daring-Anteater": {"split": "train"},
        "Magpie-Align/Magpie-Llama-3.1-Pro-MT-300K-Filtered": {"split": "train"},
        "HuggingFaceH4/ultrachat_200k": {"split": "train_sft"},
    }

    hf_dataset_to_conversation = {
        "Open-Orca/OpenOrca": lambda data: SFTDataset._to_conversation(
            data["question"], data["response"]
        ),
        "Open-Orca/SlimOrca": lambda data: SFTDataset._sharegpt_to_openai_conversations(data),
        "nvidia/Daring-Anteater": lambda data: SFTDataset._sharegpt_to_openai_conversations(data),
        "Magpie-Align/Magpie-Llama-3.1-Pro-MT-300K-Filtered": lambda data: SFTDataset._sharegpt_to_openai_conversations(
            data
        ),
    }

    hf_dataset_to_prompt_template = {
        "Open-Orca/OpenOrca": "{{ messages['question'] + ' ' + messages['response'] + ' ' }}",
    }

    @classmethod
    def _wildcard_get(cls, directory: Dict[str, Any], name: str, default_value=None):
        ret = default_value
        for key, val in directory.items():
            if key in name:
                ret = val
                break
        return ret

    def __init__(
        self,
        num_packed_samples: int,
        hf_dataset: str,
        tokenizer: transformers.PreTrainedTokenizerBase,
        seq_length: int,
        num_shards: int = 1,
        shard_index: int = 0,
    ):
        """A simple dataset implementation for supervised fine-tuning.

        The raw data is processed and packed to an indexed dataset on the fly. Users
        specify the total number of packed samples and the dataloader (or sampler)
        access the packed dataset by indices. When the packed dataset length is smaller
        than the index, the packing process fetches the raw data in a cyclic fashion
        until the packed dataset has sufficient length.

        Args:
            num_packed_samples: total number of packed samples (cyclic access)
            hf_dataset: Huggingface dataset name or local path
            tokenizer: Huggingface PreTrainedTokenizer instance
            seq_length: max sequence length
            num_shards: number of shards for distributed training
            shard_index: shard index for distributed training
        """
        if not isinstance(tokenizer, transformers.PreTrainedTokenizerBase):
            raise ValueError("SFTDataset only supports transformers.PreTrainedTokenizerBase!")

        self.num_packed_samples = num_packed_samples
        self.hf_dataset = hf_dataset
        self.tokenizer = tokenizer
        self.seq_length = seq_length
        self.data_transformation = lambda data: data
        self.num_shards = num_shards
        self.shard_index = shard_index
        self.indexed_dataset = []
        self._raw_sample_index = 0

        # [WAR]: For DeepSeek-V3/R1 tokenizer, we modify the chat_template such that the <think>
        # tokens are preserved for supervised learning.
        self.tokenizer.chat_template = self.tokenizer.chat_template.replace(
            REMOVE_THINK_CHAT_TEMPLATE, ""
        )

        hf_dataset_kwargs = SFTDataset.hf_dataset_to_kwargs.get(
            self.hf_dataset, {"split": "train"}
        )
        self._raw_samples = datasets.load_dataset(self.hf_dataset, token=os.environ.get("HF_TOKEN", None), **hf_dataset_kwargs)
        self._raw_samples = self._raw_samples.shard(
            num_shards=self.num_shards, index=shard_index
        )

        print(
            "Rank {:3}/{:3} creates SFT data shard {:3}/{:3} with {:10} raw samples".format(
                torch.distributed.get_rank(),
                torch.distributed.get_world_size(),
                self.shard_index,
                self.num_shards,
                len(self._raw_samples),
            ),
            flush=True,
        )

        if self.tokenizer.chat_template is None:
            self.tokenizer.chat_template = SFTDataset.hf_dataset_to_prompt_template
        elif self.hf_dataset is not None:
            self.data_transformation = SFTDataset._wildcard_get(
                SFTDataset.hf_dataset_to_conversation,
                self.hf_dataset,
                default_value=lambda data: data,
            )

        if self.tokenizer.chat_template is None:
            raise ValueError("No valid chat template!")

    def __len__(self):
        return self.num_packed_samples

    def __getitem__(self, idx):
        """Get the idx packed data.

        The packed data index is different from the raw data index where a packed sample
        of sequence-length may require concatenting multiple raw data. When all raw data
        are used up, the last packed data is throw away, and we have a packed dataset
        in memory. The packed data index may exceed the length of the packed dataset
        which will just wrap in a cyclic fashion.
        """
        idx = idx // self.num_shards

        while idx >= len(self.indexed_dataset):
            packed_samples = self._process_and_pack_example()
            if packed_samples is None:
                break
            else:
                self.indexed_dataset.append(packed_samples)
            if len(self.indexed_dataset) % 10000 == 0:
                print(
                    "Rank {:3}/{:3} requests {:10}/{:10} packed SFT sample".format(
                        torch.distributed.get_rank(),
                        torch.distributed.get_world_size(),
                        idx,
                        len(self.indexed_dataset),
                    ),
                    flush=True,
                )

        idx = idx % len(self.indexed_dataset)
        torch_sample = {}
        for key, val in self.indexed_dataset[idx].items():
            torch_sample[key] = torch.LongTensor(val)
        return torch_sample

    def _process_and_pack_example(self):
        """Process multiple raw data and pack them into fixed sequence length."""
        required_packed_tokens = self.seq_length + 1
        current_packed_samples = []
        current_packed_samples_token_count = 0

        while current_packed_samples_token_count < required_packed_tokens:
            if self._raw_sample_index >= len(self._raw_samples):
                return None
            raw_sample = self._raw_samples[self._raw_sample_index]
            self._raw_sample_index += 1
            processed_sample = self._process_example(raw_sample)
            if processed_sample is not None:
                current_packed_samples.append(processed_sample)
                current_packed_samples_token_count += processed_sample["token_count"]

        packed_samples = {}

        for key in ['input_ids', 'loss_mask']:
            packed_samples[key] = list(
                itertools.chain.from_iterable([obj[key] for obj in current_packed_samples])
            )

        for key in ['token_count']:
            packed_samples[key] = [obj[key] for obj in current_packed_samples]

        return packed_samples

    def _process_example(self, example: Dict[str, Any]):
        """Apply the chat template and compute the answer-only loss mask."""
        if not isinstance(example, Dict):
            raise ValueError("The sample must be a Dict but got {}".format(type(example)))

        # Several things can happen here after the transformation is applied:
        #
        # 1. If the transformation is identity transformation, then either the chat data
        #    is already in OpenAI chat format or there is a custom prompt template used.
        # 2. Otherwise, the tokenizer must have a default chat template and we are either
        #    converting the ShareGPT chat data or standard SFT data to OpenAI chat data.
        example = self.data_transformation(example)

        # Check if this is OpenAI chat data?
        conversations = example.get("conversations", None)
        if conversations is None:
            conversations = example.get("messages", None)

        # We don't use the data if there is no assistant reply or the conversation that
        # starts with the assistant.
        if conversations is not None:
            example = conversations
            if len(conversations) < 2 or example[0]["role"] == "assistant":
                return None

        # We always add eos between samples for training purpose.
        input_ids = self.tokenizer.apply_chat_template(example)
        current_loss_mask = [1] * len(input_ids)
        input_ids = input_ids + [get_eos_id()]
        current_loss_mask += [0]

        assert len(input_ids) == len(current_loss_mask)

        if len(input_ids) > self.seq_length:
            input_ids = input_ids[: self.seq_length]
            current_loss_mask = current_loss_mask[: self.seq_length]

        processed_example = {
            'input_ids': input_ids,
            'loss_mask': current_loss_mask,
            'token_count': len(input_ids),
        }
        return processed_example

    @classmethod
    def _to_conversation(cls, question, response):
        msg_question = {"role": "user", "content": question}
        msg_response = {"role": "assistant", "content": response}
        return {"conversations": [msg_question, msg_response]}

    @classmethod
    def _sharegpt_to_openai_conversations(cls, data):
        role_mapping = {
            "user": "user",
            "User": "user",
            "human": "user",
            "assistant": "assistant",
            "Assistant": "assistant",
            "gpt": "assistant",
            "system": "system",
            "System": "system",
        }
        processed_data = {"conversations": []}
        for msg in data["conversations"]:
            role = role_mapping[msg["from"]]
            content = msg["value"]
            processed_data["conversations"].append({"role": role, "content": content})
        return processed_data

    @classmethod
    def _special_to_openai_conversations(cls, data):
        processed_data = {"conversations": data["input"]["messages"]}
        return processed_data


def train_valid_test_sft_datasets_provider(train_val_test_num_samples):
    """Build the train test and validation datasets.

    Args:
        train_val_test_num_samples : A list containing the number of samples
            in train test and validation.
    """
    print_rank_0("> building train, validation, and test SFT datasets ...")
    args = get_args()
    tokenizer = get_tokenizer()

    if not isinstance(tokenizer._tokenizer, transformers.PreTrainedTokenizerBase):
        raise ValueError("SFTDataset only supports transformers.PreTrainedTokenizerBase!")

    if args.micro_batch_size > 1:
        raise ValueError("SFTDataloader only supports micro_batch_size=1.")

    if args.export_offline_model:
        train_ds = OfflineDataset(os.path.join(args.offline_distillation_data, "train"), train_val_test_num_samples[0])
        valid_ds = OfflineDataset(os.path.join(args.offline_distillation_data, "valid"), train_val_test_num_samples[1])
        test_ds = OfflineDataset(os.path.join(args.offline_distillation_data, "test"), train_val_test_num_samples[2])

        print_rank_0("> finished creating offline SFT datasets ...")
    else:
        kwargs = {
            "hf_dataset": args.finetune_hf_dataset,
            "tokenizer": tokenizer._tokenizer,
            "seq_length": args.seq_length,
            # Optional kwargs
            "num_shards": mpu.get_expert_data_parallel_world_size(),
            "shard_index": mpu.get_expert_data_parallel_rank(),
        }

        train_ds = SFTDataset(train_val_test_num_samples[0], **kwargs)
        valid_ds = SFTDataset(train_val_test_num_samples[1], **kwargs)
        test_ds = SFTDataset(train_val_test_num_samples[2], **kwargs)

        print_rank_0("> finished creating SFT datasets ...")

    return train_ds, valid_ds, test_ds


def get_batch(data_iterator):
    """Generate a batch.

    For OfflineDataset, the aux_hidden_states and final hidden_states from the
    base model are loaded for offline speculative model training."""
    # TODO: this is pretty hacky, find a better way
    if (not mpu.is_pipeline_first_stage()) and (not mpu.is_pipeline_last_stage()):
        return None, None, None, None, None

    args = get_args()

    # Broadcast data since only TP rank-0 has the data_iterator.
    if data_iterator is not None:
        data = next(data_iterator)
    else:
        data = None
    if not args.export_offline_model:
        keys = ["input_ids", "loss_mask"]
        datatype = torch.int64
        data_b = tensor_parallel.broadcast_data(keys, data, datatype)
    else:
        keys = ["input_ids"]
        datatype = torch.int64
        data_b = tensor_parallel.broadcast_data(keys, data, datatype)
        data_b["loss_mask"] = torch.ones_like(data_b["input_ids"])
        data_b["loss_mask"][data_b["loss_mask"]==get_eos_id()] = 0
        data_b["loss_mask"] = torch.cat([data_b["loss_mask"], torch.zeros(1,1).to(torch.cuda.current_device())], dim=-1)

        keys = ["aux_hidden_states", "hidden_states"]
        datatype = torch.bfloat16
        feature_b = tensor_parallel.broadcast_data(keys, data, datatype)


    # Unpack the data received.
    tokens_ = data_b["input_ids"]
    tokens = tokens_[:, 0 : 0 + args.seq_length].contiguous()
    labels = tokens_[:, 1 : 1 + args.seq_length].contiguous()
    answer_only_loss_mask = data_b["loss_mask"][:, 1 : 1 + args.seq_length].contiguous()

    # Get the masks and postition ids.
    attention_mask, loss_mask, position_ids = get_ltor_masks_and_position_ids(
        tokens, get_eos_id(), get_eos_id(), args.reset_position_ids, args.reset_attention_mask, args.eod_mask_loss, False
    )
    loss_mask = loss_mask * answer_only_loss_mask.to(dtype=loss_mask.dtype)


    labels = labels.contiguous()
    loss_mask = loss_mask.contiguous()

    batch = {
        "tokens": tokens,
        "labels": labels,
        "loss_mask": loss_mask,
        "attention_mask": attention_mask,
        "position_ids": position_ids,
    }

    if args.export_offline_model:
        batch["aux_hidden_states"] = feature_b["aux_hidden_states"].transpose(0, 1)[:args.seq_length]
        batch["hidden_states"] = feature_b["hidden_states"].transpose(0, 1)[:args.seq_length]

    # slice batch along sequence dimension for context parallelism
    batch = get_batch_on_this_cp_rank(batch)

    return batch


def non_loss_data_func(model: GPTModel):
    """Callback to compute the acceptance length."""
    args = get_args()
    if not args.export_offline_model and args.context_parallel_size == 1:
        try:
            report_draft_acceptance_length(model)
        except Exception as e:
            print(e)


def forward_step(data_iterator, model: GPTModel):
    """Forward training step.

    Args:
        data_iterator: Input data iterator
        model: The GPT Model
    """
    timers = get_timers()

    args = get_args()

    # Get the batch.
    timers("batch-generator", log_level=2).start()
    batch = get_batch(data_iterator)
    tokens = batch["tokens"]
    labels = batch["labels"]
    loss_mask = batch["loss_mask"]
    attention_mask = batch["attention_mask"]
    position_ids = batch["position_ids"]
    if args.export_offline_model:
        aux_hidden_states = batch["aux_hidden_states"]
        hidden_states = batch["hidden_states"]
    timers("batch-generator").stop()

    if args.export_offline_model:
        output_tensor = model(tokens, position_ids, attention_mask, labels=labels, aux_hidden_states=aux_hidden_states, hidden_states=hidden_states,)
    else:
        output_tensor = model(tokens, position_ids, attention_mask, labels=labels)

    return output_tensor, partial(loss_func, loss_mask, model=model)


if __name__ == "__main__":
    pretrain(
        train_valid_test_sft_datasets_provider,
        partial(model_provider, modelopt_gpt_mamba_builder),
        ModelType.encoder_or_decoder,
        forward_step,
        extra_args_provider=add_finetune_args,
        args_defaults={"tokenizer_type": "HuggingFaceTokenizer"},
        non_loss_data_func=non_loss_data_func,
    )


================================================
FILE: examples/post_training/modelopt/finetune.sh
================================================
#!/bin/bash

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"

# Common arguments and base model specific arguments
source "${SCRIPT_DIR}/conf/arguments.sh"


# Set up cache dir for HF to avoid out of space error
export HF_DATASETS_CACHE="/tmp/hf_datasets_cache"

# Extra arguments of this script
MLM_DEFAULT_ARGS=" \
    --distributed-timeout-minutes 30 \
    --auto-detect-ckpt-format \
    --export-te-mcore-model \
    --finetune \
"


if [ -z ${MLM_MODEL_SAVE} ]; then
    MLM_MODEL_SAVE=${MLM_MODEL_CKPT}
    printf "${MLM_WARNING} Variable ${PURPLE}MLM_MODEL_SAVE${WHITE} is not set (default: ${MLM_MODEL_CKPT})!\n"
fi

if [ -z ${DATASET} ]; then
    DATASET="Magpie-Align/Magpie-Llama-3.1-Pro-MT-300K-Filtered"
    printf "${MLM_WARNING} Variable ${PURPLE}DATASET${WHITE} is not set (default: Magpie-Align/Magpie-Llama-3.1-Pro-MT-300K-Filtered)!\n"
fi

if [ -z ${MLM_DATA_ARGS} ]; then
    MLM_DATA_ARGS=" \
        --train-samples 128000 \
        --lr-decay-samples 128000 \
        --lr-warmup-samples 0 \
        --split 100,0,0 \
        --finetune-hf-dataset ${DATASET} \
    "
fi

if [[ -v CP && "$CP" != "1" ]]; then
  BACKEND="fused"
else
  BACKEND="auto"
fi

if [ -z ${MLM_TRAIN_ARGS} ]; then
    MLM_TRAIN_ARGS=" \
        --no-gradient-accumulation-fusion \
        --reset-position-ids \
        --reset-attention-mask \
        --eod-mask-loss \
        --micro-batch-size 1 \
        --attention-dropout 0.0 \
        --hidden-dropout 0.0 \
        --no-check-for-nan-in-loss-and-grad \
        --attention-backend ${BACKEND} \
    "
fi

if [ -z ${MLM_OPTIM_ARGS} ]; then
    MLM_OPTIM_ARGS=" \
        --lr 5.0e-5 \
        --min-lr 1.0e-7 \
        --lr-decay-style cosine \
        --clip-grad 1.0 \
        --weight-decay 0.0 \
        --adam-beta1 0.9 \
        --adam-beta2 0.95 \
        --init-method-std 0.010 \
        --use-distributed-optimizer \
    "
fi

if [ -z ${MLM_EVAL_ARGS} ]; then
    MLM_EVAL_ARGS=" \
        --eval-iters 1 \
        --eval-interval 1000 \
        --save-interval 1000 \
        --log-interval 100 \
    "
fi

export HF_TOKEN=${HF_TOKEN}

${LAUNCH_SCRIPT} ${SCRIPT_DIR}/finetune.py \
    ${MODEL_ARGS} \
    --tensor-model-parallel-size ${TP} \
    --expert-tensor-parallel-size ${ETP} \
    --expert-model-parallel-size ${EP} \
    --pipeline-model-parallel-size ${PP} \
    --context-parallel-size ${CP} \
    --cp-comm-type p2p \
    --tokenizer-model ${TOKENIZER_MODEL} \
    --load ${MLM_MODEL_CKPT} \
    --save ${MLM_MODEL_SAVE} \
    ${MLM_DATA_ARGS} \
    ${MLM_OPTIM_ARGS} \
    ${MLM_TRAIN_ARGS} \
    ${MLM_EVAL_ARGS} \
    ${MLM_RESUME_ARGS} \
    ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}


================================================
FILE: examples/post_training/modelopt/generate.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Sample Generate GPT."""
import functools
import os
import sys
import warnings

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))

import torch
from datasets import load_dataset

from megatron.post_training.arguments import add_modelopt_args
from megatron.post_training.checkpointing import load_modelopt_checkpoint
from megatron.post_training.generate import simple_generate
from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
from megatron.post_training.utils import report_current_memory_info, to_empty_if_meta
from megatron.training import get_args, get_model, get_tokenizer, initialize_megatron
from megatron.training.utils import print_rank_0, unwrap_model
from model_provider import model_provider

import modelopt.torch.quantization as mtq

warnings.filterwarnings('once')


def add_generate_args(parser):
    """Add additional arguments for ModelOpt acceptance rate validation."""
    group = parser.add_argument_group(title='ModelOpt ar validation')
    group.add_argument("--osl", type=int, default=128, help="Output sequence length.")
    group.add_argument("--draft-length", type=int, default=0, help="Only used in EAGLE.")
    group.add_argument("--draft-topk", type=int, default=1, help="Only used in EAGLE.")
    group.add_argument("--disable-tqdm", action="store_true", help="Disable tqdm.")
    group.add_argument("--fraction", type=float, default=1.0, help="Fraction of dataset to use.")

    add_modelopt_args(parser)
    return parser


def check_arguments():
    """Checking user arguments."""
    args = get_args()
    if args.num_layers_per_virtual_pipeline_stage is not None:
        print_rank_0("Interleaved pipeline schedule is not yet supported for text generation.")
        exit()

    if hasattr(args, 'moe_grouped_gemm') and args.moe_grouped_gemm == True:
        print_rank_0("WARNING: Forcing moe_grouped_gemm to False for PTQ and export.")
        args.moe_grouped_gemm = False


def mtbench_to_oai_chat(example):
    """Convert MTBench data to OpenAI chat completion format."""
    conversations = []
    for prompt in example["prompt"]:
        conversations.append({"role": "user", "content": prompt})
    example["conversations"] = conversations
    return example


def get_conversations(example):
    """Extract the input for tokenizer.apply_chat_template."""
    conversations = example.get("conversations", None)
    if conversations is None:
        conversations = example.get("messages", None)
    if conversations is None:
        raise ValueError(
            "The data must either have conversations or messages field, but got {}".format(example)
        )
    return conversations


if __name__ == "__main__":
    initialize_megatron(
        extra_args_provider=add_generate_args,
        args_defaults={
            'tokenizer_type': 'HuggingFaceTokenizer',
            'no_load_rng': True,
            'no_load_optim': True,
        },
    )

    check_arguments()

    args = get_args()

    # Meta device initialization for ParallelLinear only works if using cpu initialization.
    # Meta device initialization is used such that models can be materialized in low-precision
    # directly when ModelOpt real quant is used. Otherwise, the model is first initialized
    # as BF16 in memory which may result in OOM and defeat the purpose of real quant.
    if args.init_model_with_meta_device:
        args.use_cpu_initialization = True
    else:
        warnings.warn(
            "--init-model-with-meta-device is not set. If you would like to resume the "
            "model in low-bit directly (low-memory initialization and skipping 16-bit), "
            "--init-model-with-meta-device must be set.",
            UserWarning,
        )

    model = get_model(functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False)
    report_current_memory_info()

    unwrapped_model = unwrap_model(model)[0]

    default_conversations = [
        {
            "role": "user",
            "content": "Write an email to a wine expert, requesting a guest "
            "article contribution for your wine blog.",
        }
    ]

    if args.finetune_hf_dataset is None:
        if args.draft_length > 0:
            dataset = load_dataset("HuggingFaceH4/mt_bench_prompts", split="train")
            dataset = dataset.map(mtbench_to_oai_chat)
        else:
            dataset = [{"conversations": default_conversations}]
    else:
        dataset = load_dataset(args.finetune_hf_dataset, split=args.finetune_data_split)

    tokenizer = get_tokenizer()._tokenizer
    if hasattr(tokenizer, "tokenizer"):
        tokenizer = tokenizer.tokenizer


    if args.load is not None:
        load_modelopt_checkpoint(model, strict=not args.untie_embeddings_and_output_weights)
        print_rank_0("Done loading checkpoint")

    unwrapped_model = unwrap_model(model)[0]
    unwrapped_model.eval()

    # Fold the scalars into weight for speedup.
    # [TODO]: fold_weight current assumes all weight_quantizer has weight allocated;
    # however, this is not the case when share_embeddings_and_output_weights is False.
    if getattr(unwrapped_model, "share_embeddings_and_output_weights", False):
        mtq.fold_weight(unwrapped_model)

    for idx, example in enumerate(dataset):
        if idx > args.fraction * len(dataset):
            break
        ref_conversations = get_conversations(example)
        new_conversations = []

        for message in ref_conversations:
            ground_truth = None
            if message["role"] == "assistant":
                ground_truth = message["content"]
            if message["role"] == "user":
                new_conversations.append(message)
                print_rank_0(
                    "{}".format(
                        tokenizer.apply_chat_template(
                            new_conversations, tokenize=False, add_generation_prompt=True
                        )
                    )
                )
                input_ids = tokenizer.apply_chat_template(
                    new_conversations, return_tensors="pt", add_generation_prompt=True
                )
                with torch.no_grad():
                    output_ids = simple_generate(
                        unwrapped_model, input_ids.cuda(), osl=args.osl, disable_tqdm=args.disable_tqdm
                    )
                output_texts = tokenizer.batch_decode(output_ids)[0]
                print_rank_0("{}".format(output_texts))
                new_conversations.append({"role": "assistant", "content": output_texts})

    torch.distributed.barrier()


================================================
FILE: examples/post_training/modelopt/generate.sh
================================================
#!/bin/bash

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"

# Common arguments and base model specific arguments
source "${SCRIPT_DIR}/conf/arguments.sh"

# Extra arguments of this script
MLM_DEFAULT_ARGS="--finetune --auto-detect-ckpt-format --export-te-mcore-model"


if [ -z ${MLM_MODEL_CKPT} ]; then
    printf "${MLM_ERROR} Variable ${PURPLE}MLM_MODEL_CKPT${WHITE} must be set!\n"
    exit 1
fi

if [ -z ${DRAFT_LEN} ]; then
    DRAFT_LEN=0
fi


if [ -z ${PROMPTS_PATH} ]; then
    ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/generate.py \
        ${MODEL_ARGS} \
        --tensor-model-parallel-size ${TP} \
        --expert-tensor-parallel-size ${ETP} \
        --expert-model-parallel-size ${EP} \
        --pipeline-model-parallel-size ${PP} \
        --tokenizer-model ${TOKENIZER_MODEL} \
        --load ${MLM_MODEL_CKPT} \
        --draft-length ${DRAFT_LEN} \
        ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}

else
    ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/generate.py \
        ${MODEL_ARGS} \
        --tensor-model-parallel-size ${TP} \
        --expert-tensor-parallel-size ${ETP} \
        --expert-model-parallel-size ${EP} \
        --pipeline-model-parallel-size ${PP} \
        --tokenizer-model ${TOKENIZER_MODEL} \
        --load ${MLM_MODEL_CKPT} \
        --data ${PROMPTS_PATH} \
        --draft-length ${DRAFT_LEN} \
        ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}
fi


================================================
FILE: examples/post_training/modelopt/generation_server.sh
================================================
#!/bin/bash

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"

# Common arguments and base model specific arguments
source "${SCRIPT_DIR}/conf/arguments.sh"

# Extra arguments of this script
MLM_DEFAULT_ARGS="--finetune --auto-detect-ckpt-format --export-te-mcore-model"


if [ -z ${MLM_MODEL_CKPT} ]; then
    printf "${MLM_ERROR} Variable ${PURPLE}MLM_MODEL_CKPT${WHITE} must be set!\n"
    exit 1
fi

TOOLS_DIR="$(realpath ${SCRIPT_DIR}/../../../tools)"

${LAUNCH_SCRIPT} ${TOOLS_DIR}/run_text_generation_server.py \
    ${MODEL_ARGS} \
    --tensor-model-parallel-size ${TP} \
    --expert-tensor-parallel-size ${ETP} \
    --expert-model-parallel-size ${EP} \
    --pipeline-model-parallel-size ${PP} \
    --tokenizer-model ${TOKENIZER_MODEL} \
    --load ${MLM_MODEL_CKPT} \
    ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}


================================================
FILE: examples/post_training/modelopt/mmlu.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Sample Generate GPT."""
import functools
import os
import sys
import warnings
import datasets
import logging
import torch.distributed as dist

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))

import torch
from diskcache import Cache

from megatron.post_training.arguments import add_modelopt_args
from megatron.post_training.checkpointing import load_modelopt_checkpoint
from megatron.post_training.generate import simple_generate
from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
from megatron.post_training.utils import report_current_memory_info
from megatron.training import get_args, get_model, get_tokenizer, initialize_megatron
from megatron.training.utils import print_rank_0, unwrap_model
import modelopt.torch.quantization as mtq
from model_provider import model_provider

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) # set to debug if you need more logging

warnings.filterwarnings('ignore')

def add_mmlu_args(parser):
    """Add additional arguments for ModelOpt text generation PTQ."""
    group = parser.add_argument_group(title='ModelOpt text generation ptq')
    group.add_argument("--disable-tqdm", action="store_true", help="Disable tqdm.")
    group.add_argument("--fraction", type=float, default=1.0, help="Fraction of dataset to use.")
    group.add_argument("--lower-bound", type=float, default=None)
    group.add_argument("--no-subject-prompt", action="store_true", help="Use empty prompt instead of subject-based prompt.")
    group.add_argument("--mmlu-dataset", type=str, default="cais/mmlu", help="The default dataset to use is cais/mmlu from the HG hub.")
    group.add_argument("--cache-dir", type=str, default=None)
    add_modelopt_args(parser)
    return parser


def get_all_subjects():
    """Return all MMLU subjects."""
    return [
        'abstract_algebra',
        'anatomy',
        'astronomy',
        'business_ethics',
        'clinical_knowledge',
        'college_biology',
        'college_chemistry',
        'college_computer_science',
        'college_mathematics',
        'college_medicine',
        'college_physics',
        'computer_security',
        'conceptual_physics',
        'econometrics',
        'electrical_engineering',
        'elementary_mathematics',
        'formal_logic',
        'global_facts',
        'high_school_biology',
        'high_school_chemistry',
        'high_school_computer_science',
        'high_school_european_history',
        'high_school_geography',
        'high_school_government_and_politics',
        'high_school_macroeconomics',
        'high_school_mathematics',
        'high_school_microeconomics',
        'high_school_physics',
        'high_school_psychology',
        'high_school_statistics',
        'high_school_us_history',
        'high_school_world_history',
        'human_aging',
        'human_sexuality',
        'international_law',
        'jurisprudence',
        'logical_fallacies',
        'machine_learning',
        'management',
        'marketing',
        'medical_genetics',
        'miscellaneous',
        'moral_disputes',
        'moral_scenarios',
        'nutrition',
        'philosophy',
        'prehistory',
        'professional_accounting',
        'professional_law',
        'professional_medicine',
        'professional_psychology',
        'public_relations',
        'security_studies',
        'sociology',
        'us_foreign_policy',
        'virology',
        'world_religions',
    ]


def format_example(example, include_answer: bool = True):
    """Format an example into a multi-choices problem."""
    prompt = example["question"]
    for choice, answer in zip(["A", "B", "C", "D"], example["choices"]):
        prompt += "\n{}. {}".format(choice, answer)
    if include_answer:
        prompt += "\nAnswer: {}\n\n".format(["A", "B", "C", "D"][example["answer"]])
    else:
        prompt += "\nAnswer:"
    return prompt


def generate_prompt(test_example, dev_examples, few_shots=0, no_subject_prompt=False):
    """Generating few-shot prompts."""
    if no_subject_prompt:
        prompt = ""
    else:
        prompt = "The following are multiple choice questions (with answers) about {}.\n\n".format(
            " ".join(test_example["subject"].split("_"))
        )
    for i in range(few_shots):
        prompt += format_example(dev_examples[i])
    prompt += format_example(test_example, include_answer=False)
    return prompt


if __name__ == "__main__":
    initialize_megatron(
        extra_args_provider=add_mmlu_args,
        args_defaults={
            'tokenizer_type': 'HuggingFaceTokenizer',
            'no_load_rng': True,
            'no_load_optim': True,
        },
    )

    args = get_args()
    cache = Cache(args.cache_dir)
    # Meta device initialization for ParallelLinear only works if using cpu initialization.
    # Meta device initialization is used such that models can be materialized in low-precision
    # directly when ModelOpt real quant is used. Otherwise, the model is first initialized
    # as BF16 in memory which may result in OOM and defeat the purpose of real quant.
    if args.init_model_with_meta_device:
        args.use_cpu_initialization = True
    else:
        warnings.warn(
            "--init-model-with-meta-device is not set. If you would like to resume the "
            "model in low-bit directly (low-memory initialization and skipping 16-bit), "
            "--init-model-with-meta-device must be set.",
            UserWarning,
        )

    model = get_model(functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False)
    report_current_memory_info()

    # Materialize the model from meta device to gpu before loading the checkpoint.
    unwrapped_model = unwrap_model(model)[0]
    unwrapped_model.eval()
    unwrapped_model.to_empty(device="cuda")
    report_current_memory_info()

    disable_tqdm = args.disable_tqdm or torch.distributed.get_rank() > 0

    tokenizer = get_tokenizer()._tokenizer
    if hasattr(tokenizer, "tokenizer"):
        tokenizer = tokenizer.tokenizer

    if args.load is not None:
        load_modelopt_checkpoint(model, strict=not args.untie_embeddings_and_output_weights)
        print_rank_0("Done loading checkpoint")

    # Fold the scalars into weight for speedup.
    # [TODO]: fold_weight current assumes all weight_quantizer has weight allocated;
    # however, this is not the case when share_embeddings_and_output_weights is False.
    # [TODO]: fold_weight does not support TEGroupedMLP (QuantTEColumnParallelGroupedLinear)
    # which stores per-expert weights as weight0, weight1, etc. instead of a single weight.
    has_grouped_mlp = any("TEGroupedMLP" in type(m).__name__ for m in unwrapped_model.modules())
    if not getattr(unwrapped_model, "share_embeddings_and_output_weights", False) and not has_grouped_mlp:
        mtq.fold_weight(unwrapped_model)

    all_subjects = get_all_subjects()

    all_correct = {}

    for subject in all_subjects:
        test_data = datasets.load_dataset(args.mmlu_dataset, subject, split="test")
        dev_data = datasets.load_dataset(args.mmlu_dataset, subject, split="dev")

        correct = []
        for idx, test_example in enumerate(test_data):
            if idx > args.fraction * len(test_data):
                break
            label = ["A", "B", "C", "D"][test_example["answer"]]
            prompt = generate_prompt(test_example, dev_data, few_shots=0, no_subject_prompt=args.no_subject_prompt)
            cache_key = f"{args.load}_{subject}_{prompt}" # model name, subject, prompt

            if cache_key in cache:
                predict = cache[cache_key]
                if dist.get_rank() == 0:
                    logger.debug(f"Cache hit for {args.load}_{subject}")
            else:
                tokens = tokenizer(prompt, return_tensors="pt")
                with torch.no_grad():
                    generated_ids = simple_generate(
                        unwrapped_model, tokens.input_ids.cuda(), osl=2, disable_tqdm=disable_tqdm
                    )
                predict = tokenizer.batch_decode(generated_ids)[0].strip()
                if torch.distributed.get_rank() == 0:
                    cache.add(cache_key, predict)

            correct += [True] if predict.startswith(label) else [False]
        all_correct[subject] = correct

        if torch.distributed.get_rank() == 0:
            print(
                "{:48}| {:.3f} | {:5}/{:5}".format(
                    subject, sum(correct) / len(correct), sum(correct), len(correct)
                ),
                flush=True,
            )

    avg_correct = []

    for subject, correct in all_correct.items():
        avg_correct += correct

    if torch.distributed.get_rank() == 0:
        print(
            "{:48}| {:.3f} | {:5}/{:5}".format(
                "average", sum(avg_correct) / len(avg_correct), sum(avg_correct), len(avg_correct)
            ),
            flush=True,
        )

        if args.lower_bound is not None:
            assert sum(avg_correct) / len(avg_correct) > args.lower_bound


================================================
FILE: examples/post_training/modelopt/mmlu.sh
================================================
#!/bin/bash

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"

# Common arguments and base model specific arguments
source "${SCRIPT_DIR}/conf/arguments.sh"

# Extra arguments of this script
MLM_DEFAULT_ARGS="--finetune --auto-detect-ckpt-format --export-te-mcore-model --sequence-parallel"

${LAUNCH_SCRIPT} ${SCRIPT_DIR}/mmlu.py \
    ${MODEL_ARGS} \
    --tensor-model-parallel-size ${TP} \
    --expert-tensor-parallel-size ${ETP} \
    --expert-model-parallel-size ${EP} \
    --pipeline-model-parallel-size ${PP} \
    --tokenizer-model ${TOKENIZER_MODEL} \
    --load ${MLM_MODEL_CKPT} \
    ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}


================================================
FILE: examples/post_training/modelopt/offline_feature_extract.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.

"""Supervised Finetuning GPT."""
import functools
import os
import sys

import torch

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))


from examples.post_training.modelopt.finetune import SFTDataset
from megatron.core import mpu
from megatron.post_training.arguments import add_modelopt_args
from megatron.post_training.checkpointing import load_modelopt_checkpoint
from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
from megatron.training import get_args, get_model, get_tokenizer, initialize_megatron
from megatron.training.utils import print_rank_0, unwrap_model
from model_provider import model_provider


def add_extract_args(parser):
    """Add additional arguments for feature extraction."""
    group = parser.add_argument_group(title='Feature extraction')
    group.add_argument("--num-samples", type=int, default=128000, help="Number of samples.")
    group.add_argument("--output-dir", type=str, help="Path to the output directory.")

    add_modelopt_args(parser)
    return parser

def extract_feature(dataset, model, output_dir, idx_start, idx_end):
    os.makedirs(output_dir, exist_ok=True)
    for i in range(idx_start + mpu.get_expert_data_parallel_rank(), idx_end, mpu.get_expert_data_parallel_world_size()):
        file_name = "{:08d}.pt".format(i - idx_start)
        file_path = os.path.join(output_dir, file_name)
        if not os.path.exists(file_path):
            input_ids = dataset[i]["input_ids"][:dataset.seq_length].unsqueeze(0).to(torch.cuda.current_device())
            output = model(input_ids, return_eagle_inputs=True)
            if mpu.get_tensor_model_parallel_rank() == 0 and mpu.get_expert_model_parallel_rank() == 0:
                torch.save(output, file_path)
            torch.distributed.barrier()

if __name__ == "__main__":
    initialize_megatron(
        extra_args_provider=add_extract_args,
        args_defaults={
            'tokenizer_type': 'HuggingFaceTokenizer',
            'no_load_rng': True,
            'no_load_optim': True,
        },
    )

    args = get_args()
    tokenizer = get_tokenizer()
    model = get_model(functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False)

    load_modelopt_checkpoint(model, strict=not args.untie_embeddings_and_output_weights)
    print_rank_0("Done loading checkpoint")

    unwrapped_model = unwrap_model(model)[0]
    unwrapped_model.eval()

    kwargs = {
        "tokenizer": tokenizer._tokenizer,
        "seq_length": args.seq_length,
        # Optional kwargs
        "hf_dataset": args.finetune_hf_dataset,
        "num_shards": mpu.get_expert_data_parallel_world_size(),
        "shard_index": mpu.get_expert_data_parallel_rank(),
    }
    sft_dataset = SFTDataset(args.num_samples, None, **kwargs)

    extract_feature(sft_dataset, unwrapped_model, os.path.join(args.output_dir, "train"), 0, int(args.num_samples * 0.98))
    extract_feature(sft_dataset, unwrapped_model, os.path.join(args.output_dir, "valid"), int(args.num_samples * 0.98), int(args.num_samples * 0.99))
    extract_feature(sft_dataset, unwrapped_model, os.path.join(args.output_dir, "test"), int(args.num_samples * 0.99), args.num_samples)


================================================
FILE: examples/post_training/modelopt/offline_feature_extract.sh
================================================
#!/bin/bash

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"

# Common arguments and base model specific arguments
source "${SCRIPT_DIR}/conf/arguments.sh"


# Set up cache dir for HF to avoid out of space error
export HF_DATASETS_CACHE="/tmp/hf_datasets_cache"

# Extra arguments of this script
MLM_DEFAULT_ARGS=" \
    --distributed-timeout-minutes 30 \
    --auto-detect-ckpt-format \
    --export-te-mcore-model \
    --finetune \
"


if [ -z ${MLM_DATA_ARGS} ]; then
    MLM_DATA_ARGS=" \
        --num-samples 128000 \
        --finetune-hf-dataset nvidia/Daring-Anteater \
    "
fi


${LAUNCH_SCRIPT} ${SCRIPT_DIR}/offline_feature_extract.py \
    ${MODEL_ARGS} \
    --tensor-model-parallel-size ${TP} \
    --expert-tensor-parallel-size ${ETP} \
    --expert-model-parallel-size ${EP} \
    --pipeline-model-parallel-size ${PP} \
    --tokenizer-model ${TOKENIZER_MODEL} \
    --load ${MLM_MODEL_CKPT} \
    ${MLM_DATA_ARGS} \
    ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}


================================================
FILE: examples/post_training/modelopt/prune.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Example script for pruning a GPT / Mamba model using Model Optimizer (ModelOpt).

Read more about ModelOpt pruning at https://github.com/NVIDIA/Model-Optimizer/tree/main/examples/pruning
"""

import functools
import inspect
import os
import sys
import warnings

import torch
from datasets import load_dataset
from tqdm import tqdm

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))
import modelopt.torch.opt as mto
import modelopt.torch.prune as mtp
from modelopt.torch.export import import_mcore_gpt_from_hf
from modelopt.torch.prune.plugins.mcore_minitron import SUPPORTED_HPARAMS

from megatron.core.parallel_state import (
    get_pipeline_model_parallel_group,
    get_tensor_model_parallel_group,
)
from megatron.post_training.arguments import add_modelopt_args
from megatron.post_training.checkpointing import load_modelopt_checkpoint
from megatron.post_training.generate import simple_generate
from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
from megatron.post_training.utils import (
    report_current_memory_info,
)
from megatron.training import get_args, get_model, get_tokenizer, initialize_megatron
from megatron.training.checkpointing import save_checkpoint
from megatron.training.utils import print_rank_0, unwrap_model
from model_provider import model_provider

warnings.filterwarnings("ignore")


def add_prune_args(parser):
    """Add additional arguments for ModelOpt pruning."""
    group = parser.add_argument_group(title="ModelOpt pruning")
    group.add_argument(
        "--calib-size", type=int, default=1024, help="Samples to use for pruning calibration."
    )
    group.add_argument(
        "--prompts",
        type=str,
        default=("Hello!|Born in California, Soyer trained as a"),
        help="Input texts. Please use | to separate different batches.",
    )
    group.add_argument(
        "--references",
        type=str,
        default="",
        help="Reference texts. Please use | to separate different batches.",
    )
    group.add_argument(
        "--pretrained-model-path", type=str, default=None, help="HuggingFace pretrained model"
    )
    # Pruning parameters
    group.add_argument(
        "--target-ffn-hidden-size", type=int, help="Prune MLP FFN hidden size to this value"
    )
    group.add_argument(
        "--target-hidden-size", type=int, help="Prune hidden size (embedding dim) to this value"
    )
    group.add_argument(
        "--target-num-attention-heads",
        type=int,
        help="Prune number of attention heads to this value. Must be supplied with --target-num-query-groups",
    )
    group.add_argument(
        "--target-num-query-groups",
        type=int,
        help="Prune number of query groups to this value. Must be supplied with --target-num-attention-heads",
    )
    group.add_argument(
        "--target-mamba-num-heads",
        type=int,
        help="Prune number of Mamba attention heads to this value",
    )
    group.add_argument(
        "--target-mamba-head-dim",
        type=int,
        help="Prune dimension of Mamba attention heads to this value",
    )
    group.add_argument(
        "--target-num-moe-experts", type=int, help="Prune number of MoE experts to this value"
    )
    group.add_argument(
        "--target-moe-ffn-hidden-size", type=int, help="Prune MoE FFN hidden size to this value"
    )
    group.add_argument(
        "--target-moe-shared-expert-intermediate-size",
        type=int,
        help="Prune MoE shared expert intermediate size to this value",
    )
    group.add_argument(
        "--target-num-layers",
        type=int,
        help="Prune number of transformer layers to this value based on "
        "Block Influence metric (cosine similarity) as per https://arxiv.org/abs/2403.03853",
    )
    group.add_argument(
        "--layers-to-drop",
        type=int,
        metavar="N",
        nargs="*",
        help="Drop specific model layers (1-indexed). Cannot be used with rest of the pruning options",
    )
    group.add_argument(
        "--pruning-scores-path",
        type=str,
        default=None,
        help="Path to the cache and reuse pruning scores for pruning again to different params",
    )
    add_modelopt_args(parser)
    return parser


def check_arguments(args):
    """Checking user arguments."""
    if args.layers_to_drop:
        if any(getattr(args, f"target_{k}", None) is not None for k in SUPPORTED_HPARAMS):
            raise ValueError("--layers_to_drop cannot be used with other pruning parameters")


def get_calib_dataloader(calib_size=1024, max_sequence_length=512):
    """Return a dataloader for calibration."""
    dataset = load_dataset("cnn_dailymail", name="3.0.0", split="train")
    text_column = "article"

    calib_size = min(len(dataset), calib_size)
    for i in range(calib_size):
        yield dataset[i][text_column][:max_sequence_length]


def get_params(model):
    params = sum(p.numel() for p in model.parameters())
    reduced_params = torch.Tensor([params]).to(device=next(model.parameters()).device)
    torch.distributed.all_reduce(reduced_params, group=get_pipeline_model_parallel_group())
    torch.distributed.all_reduce(reduced_params, group=get_tensor_model_parallel_group())
    return reduced_params.item()


if __name__ == "__main__":
    initialize_megatron(
        extra_args_provider=add_prune_args,
        args_defaults={
            "tokenizer_type": "HuggingFaceTokenizer",
            "no_load_rng": True,
            "no_load_optim": True,
        },
    )

    args = get_args()
    check_arguments(args)

    tokenizer = get_tokenizer()._tokenizer
    if hasattr(tokenizer, "tokenizer"):
        tokenizer = tokenizer.tokenizer
    model = get_model(
        functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False
    )
    unwrapped_model = unwrap_model(model)[0]

    report_current_memory_info()

    if args.load is not None:
        load_modelopt_checkpoint(model, strict=not args.untie_embeddings_and_output_weights)
        print_rank_0("Done loading checkpoint")

    if args.pretrained_model_path is not None:
        import_dtype = torch.float16 if args.fp16 else torch.bfloat16
        workspace_dir = os.environ.get("MLM_WORK_DIR", "/tmp")
        import_kwargs = {"dtype": import_dtype}
        if "trust_remote_code" in inspect.signature(import_mcore_gpt_from_hf).parameters:
            import_kwargs.update({"trust_remote_code": args.trust_remote_code})
        import_mcore_gpt_from_hf(
            unwrapped_model, args.pretrained_model_path, workspace_dir, **import_kwargs
        )

    def _custom_prompt_forward_loop_func(model):
        all_prompts = args.prompts.split("|")
        if args.references == "":
            all_references = [None] * len(all_prompts)
        else:
            all_references = args.references.split("|")

        for idx, prompt in tqdm(enumerate(all_prompts), disable=torch.distributed.get_rank()):
            tokens = tokenizer(prompt, return_tensors="pt")
            generated_ids = simple_generate(model, tokens.input_ids.cuda(), osl=32)
            generated_texts = tokenizer.batch_decode(generated_ids)
            print_rank_0("{}".format(generated_texts))
            if all_references[idx] is not None:
                assert all_references[idx] == generated_texts[0], all_references[idx]

    def _hf_dataset_forword_loop_func(model):
        dataloader = get_calib_dataloader(args.calib_size)

        for prompt in tqdm(dataloader, total=args.calib_size, disable=torch.distributed.get_rank()):
            tokens = tokenizer(prompt, return_tensors="pt")
            simple_generate(model, tokens.input_ids.cuda(), osl=1)

    if args.layers_to_drop:
        mtp.mcore_minitron.drop_mcore_language_model_layers(
            model, layers_to_drop=args.layers_to_drop
        )
    else:
        print_rank_0("Pruning model...")
        export_config = {
            k: getattr(args, f"target_{k}")
            for k in SUPPORTED_HPARAMS
            if getattr(args, f"target_{k}", None) is not None
        }
        config = {"forward_loop": _hf_dataset_forword_loop_func}
        if args.pruning_scores_path is not None:
            config["scores_path"] = args.pruning_scores_path
        mtp.prune(
            unwrapped_model,
            mode="mcore_minitron",
            constraints={"export_config": export_config},
            dummy_input=None,  # Not used
            config=config,
        )
        # [WAR till modelopt 0.39]: Remove prune state to avoid converting again on restore which forces TP=1.
        if mto.ModeloptStateManager.has_state_for_mode_type("prune", model=unwrapped_model):
            mto.ModeloptStateManager.remove_state(unwrapped_model)

    print_rank_0(f"Pruned Model:\n {unwrapped_model}")
    print_rank_0(f"Pruned Model Params: {get_params(unwrapped_model)/1e9:.2f}B")

    _custom_prompt_forward_loop_func(unwrapped_model)

    if args.save is not None:
        save_checkpoint(1, model, None, None, 0)

    print_rank_0("Done")


================================================
FILE: examples/post_training/modelopt/prune.sh
================================================
#!/bin/bash

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"

# Common arguments and base model specific arguments
source "${SCRIPT_DIR}/conf/arguments.sh"

if [ ${TP} -ne 1 ]; then
    printf "${MLM_ERROR} TP must be 1. Only PP>=1 is supported for pruning.\n"
    exit 1
fi

# Extra arguments of this script
MLM_DEFAULT_ARGS="
    --distributed-timeout-minutes 30 \
    --finetune --auto-detect-ckpt-format \
    --no-gradient-accumulation-fusion \
    --export-te-mcore-model
"

# Pruning target arguments - set these environment variables to enable pruning
# Example: export TARGET_HIDDEN_SIZE=3072 TARGET_FFN_HIDDEN_SIZE=9216
# Example: export LAYERS_TO_DROP="1 5 10"

# Define pruning argument mappings: "env_var:cli_arg"
# List of environment variables we want to check for pruning CLI args
PRUNE_ENV_VARS=(
    TARGET_FFN_HIDDEN_SIZE
    TARGET_HIDDEN_SIZE
    TARGET_NUM_ATTENTION_HEADS
    TARGET_NUM_QUERY_GROUPS
    TARGET_MAMBA_NUM_HEADS
    TARGET_MAMBA_HEAD_DIM
    TARGET_NUM_MOE_EXPERTS
    TARGET_MOE_FFN_HIDDEN_SIZE
    TARGET_MOE_SHARED_EXPERT_INTERMEDIATE_SIZE
    TARGET_NUM_LAYERS
    LAYERS_TO_DROP
)

# Build arguments from environment variables (TARGET_NUM_LAYERS -> --target-num-layers, etc.)
PRUNE_ARGS=${PRUNE_ARGS:-""}
for env_var in "${PRUNE_ENV_VARS[@]}"; do
    if [ ! -z "${!env_var}" ]; then
        # prepend --, convert to lowercase, replace _ with -
        cli_arg="--$(echo "${env_var}" | tr '[:upper:]' '[:lower:]' | tr '_' '-')"
        PRUNE_ARGS="${PRUNE_ARGS} ${cli_arg} ${!env_var}"
    fi
done

if [ -z "${PRUNE_ARGS}" ]; then
    printf "${MLM_WARNING} No pruning arguments specified. Set TARGET_* or LAYERS_TO_DROP environment variables.\n"
fi

if [ -z ${MLM_MODEL_SAVE} ]; then
    MLM_MODEL_SAVE=${MLM_WORK_DIR}/${MLM_MODEL_CFG}_pruned
    printf "${MLM_WARNING} Variable ${PURPLE}MLM_MODEL_SAVE${WHITE} is not set (default: ${MLM_MODEL_SAVE})!\n"
fi

if [ -z ${MLM_MODEL_CKPT} ]; then
    LOAD_ARGS="--pretrained-model-path ${HF_MODEL_CKPT}"
else
    LOAD_ARGS="--load ${MLM_MODEL_CKPT}"
fi


set -ex

${LAUNCH_SCRIPT} ${SCRIPT_DIR}/prune.py \
    ${MODEL_ARGS} \
    ${LOAD_ARGS} \
    --pipeline-model-parallel-size ${PP} \
    --tensor-model-parallel-size ${TP} \
    --tokenizer-model ${TOKENIZER_MODEL} \
    --save ${MLM_MODEL_SAVE} \
    --references "${MLM_REF_LABEL}" \
    ${PRUNE_ARGS} \
    ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}


================================================
FILE: examples/post_training/modelopt/quantize.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Sample Generate GPT."""

import copy
import functools
import inspect
import json
import os
import random
import sys
import warnings

import torch
import torch.distributed
from tqdm import tqdm

# NOTE: Needs to be before modelopt imports in case megatron.core is not installed.
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))

import modelopt.torch.quantization as mtq
from modelopt.torch.export import import_mcore_gpt_from_hf
from modelopt.torch.utils.dataset_utils import get_dataset_dataloader

try:
    import modelopt.torch.quantization.plugins.psx_formats as mtq_psx
except ImportError:
    mtq_psx = None
    warnings.warn(
        "psx_formats is not installed. PSX formats quantization configs will not be available."
    )
try:
    import modelopt.torch.quantization.plugins.luts as mtq_luts
except ImportError:
    mtq_luts = None
    warnings.warn("luts is not installed. LUTs quantization configs will not be available.")

from megatron.core.utils import get_batch_on_this_cp_rank
from megatron.post_training.arguments import add_modelopt_args
from megatron.post_training.checkpointing import load_modelopt_checkpoint
from megatron.post_training.generate import simple_generate
from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
from megatron.post_training.utils import (
    print_distributed_quant_summary,
    report_current_memory_info,
)
from megatron.training import get_args, get_model, get_tokenizer, initialize_megatron
from megatron.training.checkpointing import save_checkpoint
from megatron.training.utils import print_rank_0, unwrap_model
from model_provider import model_provider

warnings.filterwarnings("ignore")

QUANT_CFG_CHOICES = {}

# Auto-load all quant configs by full name
for k in mtq.config.choices:
    QUANT_CFG_CHOICES[k] = getattr(mtq, k)

KV_QUANT_CFG_CHOICES = {
    "none": "none",
    "fp8": "FP8_KV_CFG",
    "fp8_affine": "FP8_AFFINE_KV_CFG",
    "nvfp4": "NVFP4_KV_CFG",
    "nvfp4_affine": "NVFP4_AFFINE_KV_CFG",
    "nvfp4_rotate": "NVFP4_KV_ROTATE_CFG",
}

if mtq_psx is not None:
    QUANT_CFG_CHOICES.update({k: getattr(mtq_psx, k) for k in mtq_psx.choices})

if mtq_luts is not None:
    QUANT_CFG_CHOICES.update({k: getattr(mtq_luts, k) for k in mtq_luts.choices})


def add_text_generate_ptq_args(parser):
    """Add additional arguments for ModelOpt text generation PTQ."""
    group = parser.add_argument_group(title="ModelOpt text generation ptq")
    group.add_argument(
        "--calib-size", type=int, default=512, help="Number of samples to use for ptq calibration."
    )
    group.add_argument(
        "--calib-dataset-path-or-name",
        type=str,
        default="cnn_dailymail",
        help="Path to local calibration dataset file (.jsonl) or HuggingFace dataset name.",
    )
    group.add_argument(
        "--calib-max-sequence-length",
        type=int,
        default=512,
        help="Maximum sequence length for calibration.",
    )
    group.add_argument(
        "--calib-use-random-offset",
        action="store_true",
        help="Use random offsets when slicing sequences for calibration. (Only for local files)",
    )
    group.add_argument(
        "--calib-batch-size", type=int, default=1, help="Batch size for calibration."
    )
    group.add_argument(
        "--prompts",
        type=str,
        default=("Hello!|Born in California, Soyer trained as a"),
        help="Input texts. Please use | to separate different batches.",
    )
    group.add_argument(
        "--references",
        type=str,
        default="",
        help="Reference texts. Please use | to separate different batches.",
    )
    group.add_argument(
        "--pretrained-model-path", type=str, default=None, help="HuggingFace pretrained model"
    )
    group.add_argument("--compress", action="store_true", help="Enable real low-bit quantization.")
    group.add_argument(
        "--disable-qkv-quant",
        action="store_true",
        help="Disable q, k, v linear from being quantized.",
    )
    group.add_argument("--weight-only", action="store_true", help="Disable input quantization.")
    group.add_argument(
        "--force-all-expert-routing",
        action="store_true",
        help="Forcing all experts to be routed during the calibration.",
    )
    group.add_argument(
        "--num-first-layers-to-skip-quant",
        type=int,
        default=None,
        help="Number of first layers to skip quantization.",
    )
    group.add_argument(
        "--num-last-layers-to-skip-quant",
        type=int,
        default=None,
        help="Number of last layers to skip quantization.",
    )
    add_modelopt_args(parser)
    return parser


def check_arguments():
    """Checking user arguments."""
    args = get_args()
    if args.num_layers_per_virtual_pipeline_stage is not None:
        print_rank_0("Interleaved pipeline schedule is not yet supported for text generation.")
        exit()

    if hasattr(args, "moe_grouped_gemm") and args.moe_grouped_gemm == True:
        print_rank_0("WARNING: Forcing moe_grouped_gemm to False for PTQ and export.")
        args.moe_grouped_gemm = False


def _is_first_layers(name: str, num_layers: int = 1, num_layers_to_disable: int = 1) -> bool:
    if "layers." not in name:
        return False
    try:
        layer_idx = int(name.split("layers.")[-1].split(".")[0])
    except ValueError:
        return False
    return layer_idx < num_layers_to_disable


def _is_last_layers(name: str, num_layers: int = 1, num_layers_to_disable: int = 1) -> bool:
    if "layers." not in name:
        return False
    try:
        layer_idx = int(name.split("layers.")[-1].split(".")[0])
    except ValueError:
        return False
    return layer_idx >= num_layers - num_layers_to_disable


def get_first_layers_disabled_config(config, num_layers: int = 1, num_layers_to_disable: int = 1):
    """Get a config for `mtq.quantize` with first & last `num_layers_to_disable` layers disabled.

    The layers to disable are the first & last `num_layers_to_disable` layers.
    """
    config = copy.deepcopy(config)
    quant_cfg = config.get("quant_cfg", {})
    quant_cfg.update(
        {
            functools.partial(
                _is_first_layers, num_layers=num_layers, num_layers_to_disable=num_layers_to_disable
            ): {"enable": False}
        }
    )
    config["quant_cfg"] = quant_cfg
    return config


def get_last_layers_disabled_config(config, num_layers: int = 1, num_layers_to_disable: int = 1):
    """Get a config for `mtq.quantize` with last `num_layers_to_disable` layers disabled.

    The layers to disable are the last `num_layers_to_disable` layers.
    """
    config = copy.deepcopy(config)
    quant_cfg = config.get("quant_cfg", {})
    quant_cfg.update(
        {
            functools.partial(
                _is_last_layers, num_layers=num_layers, num_layers_to_disable=num_layers_to_disable
            ): {"enable": False}
        }
    )
    config["quant_cfg"] = quant_cfg
    return config


def get_modelopt_torch_quantization_config():
    """Return a quantization config."""
    args = get_args()
    if args.export_quant_cfg not in QUANT_CFG_CHOICES:
        raise ValueError(f"Unsupported quantization config {args.export_quant_cfg}.")
    mtq_config = QUANT_CFG_CHOICES[args.export_quant_cfg]

    fp8_config = {"enable": True, "num_bits": (4, 3), "axis": None}
    fp4_config = {
        "num_bits": (2, 1),
        "block_sizes": {-1: 16, "type": "dynamic", "scale_bits": (4, 3)},
        "axis": None,
        "enable": True,
    }
    if args.export_quant_cfg == "FP8_DEFAULT_CFG":
        # Enable Medusa heads and kv-cache quantization
        mtq_config["quant_cfg"]["*medusa_heads**"] = fp8_config
    if "FP4" in args.export_quant_cfg:
        # Enable Medusa heads and kv-cache quantization
        mtq_config["quant_cfg"]["*medusa_heads**"] = fp4_config
    if "AWQ" in args.export_quant_cfg:
        weight_quantizer = mtq_config["quant_cfg"]["*weight_quantizer"]  # type: ignore
        if isinstance(weight_quantizer, list):
            weight_quantizer = weight_quantizer[0]
        weight_quantizer["block_sizes"][-1] = 128

    # Customization
    if args.disable_qkv_quant:
        mtq_config["quant_cfg"]["*self_attention*"] = {"enable": False}

    # KV Cache Quantization
    enable_quant_kv_cache = args.export_kv_cache_quant != "none"
    if enable_quant_kv_cache and not args.compress:
        kv_cache_quant_cfg = getattr(mtq, KV_QUANT_CFG_CHOICES[args.export_kv_cache_quant])[
            "quant_cfg"
        ]
        mtq_config = mtq.utils.update_quant_cfg_with_kv_cache_quant(mtq_config, kv_cache_quant_cfg)

    # Weight Only Quantization
    if args.weight_only:
        mtq_config["quant_cfg"]["*input_quantizer"] = {"enable": False}
    if args.num_first_layers_to_skip_quant is not None:
        mtq_config = get_first_layers_disabled_config(
            mtq_config,
            num_layers=args.num_layers,
            num_layers_to_disable=args.num_first_layers_to_skip_quant,
        )
    if args.num_last_layers_to_skip_quant is not None:
        mtq_config = get_last_layers_disabled_config(
            mtq_config,
            num_layers=args.num_layers,
            num_layers_to_disable=args.num_last_layers_to_skip_quant,
        )

    return mtq_config


def get_calib_dataloader(
    dataset_path_or_name,
    tokenizer,
    calib_size=512,
    max_sequence_length=512,
    use_random_offset=False,
    batch_size=1,
):
    """Return a dataloader/iterator for calibration using SFT or HF datasets.

    Supports either a local path (.jsonl) or a HuggingFace dataset name.
    """
    if os.path.isfile(dataset_path_or_name):
        # Local file
        print_rank_0(f"Loading calibration dataset from local file: {dataset_path_or_name}")
        all_texts = []
        with open(dataset_path_or_name) as f:
            for i, line in enumerate(f):
                if len(all_texts) == calib_size:
                    break
                sample = json.loads(line)

                # Extract text field from various possible keys
                if isinstance(sample, dict) and "text" in sample:
                    if not sample["text"]:
                        warnings.warn(f"Sample {i} has empty text, skipping")
                        continue
                    full_text = sample["text"]
                elif isinstance(sample, dict) and "messages" in sample:
                    conversations = sample["messages"]
                    assert "role" in conversations[0] and "content" in conversations[0]
                    full_text = "".join([f"{msg['role']}: {msg['content']}" for msg in conversations])
                elif isinstance(sample, list) and isinstance(sample[0], dict):
                    assert "role" in sample[0] and "content" in sample[0]
                    full_text = "".join([f"{msg['role']}: {msg['content']}" for msg in sample])
                else:
                    raise ValueError(f"Sample {i} has unexpected format")

                # Slice text
                max_text_length = int(max_sequence_length / 0.75)  # tokenized text is roughtly ~75% length of original
                start_idx = 0
                if use_random_offset and len(full_text) > max_text_length:
                    start_idx = random.randint(0, len(full_text) - max_text_length)
                text = full_text[start_idx : start_idx + max_text_length]
                all_texts.append(text)

        print_rank_0(f"Loaded calibration dataset ({dataset_path_or_name}) with {len(all_texts)} samples")
        print_rank_0(f"Actual num samples: {len(all_texts)}, max seq length: {max_sequence_length}")
        print_rank_0(f"Sampling Strategy: {'Random Index' if use_random_offset else 'From Beginning'}")

        # Tokenize all texts at once and move to device
        tokens = tokenizer(
            all_texts, return_tensors="pt", padding="max_length", max_length=max_sequence_length, truncation=True
        )
        all_input_ids = tokens.input_ids.cuda()
        return [{"input_ids": all_input_ids[i:i+batch_size]} for i in range(0, len(all_input_ids), batch_size)]
    else:
        # HuggingFace dataset
        if use_random_offset:
            warnings.warn("Random offset is not supported for HuggingFace datasets.")
        print_rank_0(f"Loading calibration dataset from HuggingFace: {dataset_path_or_name}")
        return get_dataset_dataloader(
            dataset_name=dataset_path_or_name,
            tokenizer=tokenizer,
            num_samples=calib_size,
            max_sample_length=max_sequence_length,
            batch_size=batch_size,
            device="cuda",
        )


if __name__ == "__main__":
    initialize_megatron(
        extra_args_provider=add_text_generate_ptq_args,
        args_defaults={
            "tokenizer_type": "HuggingFaceTokenizer",
            "no_load_rng": True,
            "no_load_optim": True,
        },
    )

    check_arguments()

    args = get_args()

    tokenizer = get_tokenizer()._tokenizer
    if hasattr(tokenizer, "tokenizer"):
        tokenizer = tokenizer.tokenizer
    model = get_model(
        functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False
    )

    report_current_memory_info()

    if args.load is not None:
        load_modelopt_checkpoint(model, strict=not args.untie_embeddings_and_output_weights)
        print_rank_0("Done loading checkpoint")

    if args.pretrained_model_path is not None:
        from modelopt.torch.export import import_mcore_gpt_from_hf

        import_dtype = torch.float16 if args.fp16 else torch.bfloat16
        unwrapped_model = unwrap_model(model)[0]
        workspace_dir = os.environ.get("MLM_WORK_DIR", "/tmp")

        import_kwargs = {"dtype": import_dtype}
        if "trust_remote_code" in inspect.signature(import_mcore_gpt_from_hf).parameters:
            import_kwargs.update({"trust_remote_code": args.trust_remote_code})
        import_mcore_gpt_from_hf(
            unwrapped_model, args.pretrained_model_path, workspace_dir, **import_kwargs
        )

    def _custom_prompt_forward_loop_func(model):
        all_prompts = args.prompts.split("|")
        if args.references == "":
            all_references = [None] * len(all_prompts)
        else:
            all_references = args.references.split("|")

        for idx, prompt in tqdm(enumerate(all_prompts), disable=torch.distributed.get_rank()):
            tokens = tokenizer(prompt, return_tensors="pt")
            generated_ids = simple_generate(model, tokens.input_ids.cuda(), osl=32)
            generated_texts = tokenizer.batch_decode(generated_ids)
            print_rank_0("{}".format(generated_texts))
            if all_references[idx] is not None:
                assert all_references[idx] == generated_texts[0], all_references[idx]

    def _dataset_forward_loop_func(model):
        dataloader = get_calib_dataloader(
            dataset_path_or_name=args.calib_dataset_path_or_name,
            tokenizer=tokenizer,
            calib_size=args.calib_size,
            max_sequence_length=args.calib_max_sequence_length,
            use_random_offset=args.calib_use_random_offset,
            batch_size=args.calib_batch_size,
        )
        for sample in tqdm(dataloader, disable=torch.distributed.get_rank()):
            sample = get_batch_on_this_cp_rank(sample)
            simple_generate(model, sample["input_ids"], osl=1, calibration_mode=True)

    unwrapped_model = unwrap_model(model)[0]

    if args.force_all_expert_routing:
        warnings.warn(
            "--force-all-expert-routing will be deprecated in the next release and is no longer needed."
        )

    if args.export_quant_cfg is not None:
        print_rank_0("Quantizing the model...")
        mtq_config = get_modelopt_torch_quantization_config()

        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        tokenizer.padding_side = "left"  # better for calibration

        if args.weight_only:
            mtq.quantize(unwrapped_model, mtq_config)
        elif hasattr(unwrapped_model, "calibration_mode"):
            unwrapped_model.calibration_mode = True
            mtq.quantize(unwrapped_model, mtq_config, _dataset_forward_loop_func)
            unwrapped_model.calibration_mode = False
        else:
            mtq.quantize(unwrapped_model, mtq_config, _dataset_forward_loop_func)

        if args.compress:
            mtq.compress(unwrapped_model)
            print_rank_0("Weights are now compressed to low-bit!")

        print_distributed_quant_summary(model, "Quantized Model:")

    if args.save is not None:
        save_checkpoint(1, model, None, None, 0, release=True)

    # Do this after saving in case it causes issues
    _custom_prompt_forward_loop_func(unwrapped_model)


================================================
FILE: examples/post_training/modelopt/quantize.sh
================================================
#!/bin/bash

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"

# Common arguments and base model specific arguments
source "${SCRIPT_DIR}/conf/arguments.sh"

# Extra arguments of this script
MLM_DEFAULT_ARGS="
    --distributed-timeout-minutes 30 \
    --finetune --auto-detect-ckpt-format \
    --export-te-mcore-model \
    --sequence-parallel \
"

QUANT_CFG=$2

if [ -z ${QUANT_CFG} ]; then
    QUANT_CFG='FP8_DEFAULT_CFG'
    printf "${MLM_WARNING} Variable ${PURPLE}QUANT_CFG${WHITE} is not set (default: ${QUANT_CFG})!\n"
fi

if [ -z ${MLM_MODEL_SAVE} ]; then
    MLM_MODEL_SAVE=${MLM_WORK_DIR}/${MLM_MODEL_CFG}_quant
    printf "${MLM_WARNING} Variable ${PURPLE}MLM_MODEL_SAVE${WHITE} is not set (default: ${MLM_MODEL_SAVE})!\n"
fi

if [ -z ${MLM_MODEL_CKPT} ]; then
    ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/quantize.py \
        ${MODEL_ARGS} \
        --tensor-model-parallel-size ${TP} \
        --expert-tensor-parallel-size ${ETP} \
        --expert-model-parallel-size ${EP} \
        --pipeline-model-parallel-size ${PP} \
        --context-parallel-size ${CP} \
        --tokenizer-model ${TOKENIZER_MODEL} \
        --pretrained-model-path ${HF_MODEL_CKPT} \
        --save ${MLM_MODEL_SAVE} \
        --export-quant-cfg ${QUANT_CFG} \
        --references "${MLM_REF_LABEL}" \
        ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}
else
    ${LAUNCH_SCRIPT} ${SCRIPT_DIR}/quantize.py \
        ${MODEL_ARGS} \
        --tensor-model-parallel-size ${TP} \
        --expert-tensor-parallel-size ${ETP} \
        --expert-model-parallel-size ${EP} \
        --pipeline-model-parallel-size ${PP} \
        --context-parallel-size ${CP} \
        --tokenizer-model ${TOKENIZER_MODEL} \
        --load ${MLM_MODEL_CKPT} \
        --save ${MLM_MODEL_SAVE} \
        --export-quant-cfg ${QUANT_CFG} \
        --references "${MLM_REF_LABEL}" \
        ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}
fi


================================================
FILE: examples/post_training/modelopt/requirements.txt
================================================
diskcache
datasets
nvidia-modelopt
omegaconf
tensorstore!=0.1.46,!=0.1.72
transformers


================================================
FILE: examples/post_training/modelopt/requirements_ssm.txt
================================================
mamba-ssm>=2.2.5
causal-conv1d @ git+https://github.com/Dao-AILab/causal-conv1d


================================================
FILE: examples/post_training/modelopt/slurm/env_setup_template.sh
================================================
#!/bin/bash

HF_MODEL_CKPT=/workspace/scratch/meta-llama/Llama-3.2-1B-Instruct
TP=1
ETP=1
EP=1
PP=1


================================================
FILE: examples/post_training/modelopt/slurm/sbatch.sh
================================================
#!/bin/bash

#SBATCH -A <account>
#SBATCH -p <partition>
#SBATCH --job-name=<job-name>
#SBATCH --nodes=1 --ntasks-per-node=8 --gpus-per-node=8
#SBATCH -t 04:00:00
#SBATCH --exclusive --mem=0 --overcommit

# Bash coloring
RED='\033[0;31m'
YELLOW='\033[0;33m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
PURPLE='\033[0;35m'
WHITE='\033[0;37m'

# Predefined logging
MLM_ERROR="${RED}ERROR:  ${WHITE}"
MLM_WARNING="${YELLOW}WARNING:${WHITE}"

# CHANGE THE FOLLOWING TO YOUR DATA, MEGATRON, and CHECKPOINT DIR
if [[ -z ${USER_FSW} ]]; then
    printf "${MLM_ERROR} Variable USER_FSW (read/write scratch space) must be set!\n"
    exit 1
fi

if [ -z ${SANDBOX_DIR} ]; then
    SANDBOX_DIR="$(pwd)"
    printf "${MLM_WARNING} Variable SANDBOX_DIR not set! (default: ${SANDBOX_DIR})\n"
fi

if [ -z ${SANDBOX_ENV_SETUP} ]; then
    SANDBOX_ENV_SETUP=./env_setup_template.sh
    printf "${MLM_WARNING} Variable SANDBOX_ENV_SETUP not set! (default: ${SANDBOX_ENV_SETUP})\n"
fi

if [ -z ${CONTAINER_IMAGE} ]; then
    CONTAINER_IMAGE="nvidia-modelopt-megatron:latest"
    printf "${MLM_WARNING} Variable CONTAINER_IMAGE not set! (default: ${CONTAINER_IMAGE})\n"
fi

if [ -z ${LAUNCH_SCRIPT} ]; then
    LAUNCH_SCRIPT="python"
    printf "${MLM_WARNING} Variable LAUNCH_SCRIPT not set! (default: ${LAUNCH_SCRIPT})\n"
fi

# DO NOT MODIFY THE VALUES BELOW UNLESS YOU KNOW WHAT YOU ARE DOING!!!
DATETIME=`date +'date_%y-%m-%d_time_%H-%M-%S'`

CONTAINER_MOUNT="${SANDBOX_DIR}:/workspace/nmm-sandbox,${USER_FSW}:/workspace/scratch"

srun -l \
    --mpi=pmix \
    --output=%x_%j_$DATETIME.log \
    --container-image ${CONTAINER_IMAGE} \
    --container-workdir "/workspace/nmm-sandbox" \
    --container-mounts ${CONTAINER_MOUNT} \
    --export "HF_MODEL_CKPT=${HF_MODEL_CKPT},SANDBOX_ENV_SETUP=${SANDBOX_ENV_SETUP},LAUNCH_SCRIPT=${LAUNCH_SCRIPT}" \
    bash ${1}

set +x


================================================
FILE: examples/post_training/modelopt/speculative.md
================================================
<div align="center">

# Speculative Decoding

</div>

[Medusa](https://arxiv.org/abs/2401.10774) and [EAGLE](https://arxiv.org/pdf/2401.15077)
training and model export are supported (fast decoding is supported through TensorRT-LLM).

Medusa head top-1 accuracy is reported per step (**NOTE:** the accuracy here does not
translate to the acceptance rate described in the writeup. The top-1 of the 1st head
can however signal whether the training is converged).


## Training and Export Workflow

In practice, speculative decoding should be combined with quantization (weights and kv-cache)
to achieve the the highest tokens-per-second-per-user (or TPS) without changing the quality of
the model. We provide quantization-aware training (QAT) receipt with self-distillation in the following.


### Model Convertion

To ensure no quality degredation, base model is frozen and the draft model is attached as a
transformation. For Medusa, set `--export-algorithm medusa` and provide `--export-num-medusa-heads`.
For EAGLE, set `--export-algorithm eagle` and provide `--export-eagle-algorithm`.
the resulting model stored in `${MLM_MODEL_SAVE}` will have randomly initialized draft model weights.

```
python examples/post_training/modelopt/convert_model.py \
    --export-algorithm eagle \
    --export-eagle-algorithm eagle3 \
    --load ${MLM_MODEL_CKPT} --save ${MLM_MODEL_SAVE} ${MLM_EXTRA_ARGS}
```


### Synthetic Data Generation

Rather than learning the language and syntax, the draft model is trained to mimic the base
model output. As a result, self-synthesized data is crucial for the draft model accuracy
and acceptance rate (AR).

For simplicity and efficiency, we use `vllm serve --quantization modelopt` to host an quantized
endpoint and we feed multi-turn conversation data to synthesize the assistant output.
See ModelOpt's example (https://github.com/NVIDIA/Model-Optimizer/tree/main/speculative_decoding)
for more details. The final output is stored as `jsonlines` in an OpenAI chat completion format.


### Quantization-Aware Training (QAT)

For quantize-aware training (QAT), the process is `bf16 training`, `fake quantization`, `qat`.
Since the base model weights are frozen, the initial training is mainly to get an more accurate
range of the draft model activation and weights. We store a new checkpoint where the model
now has additional quantization scalars for both the base and draft models. We launch the
finetuning again to continue the training with fake quantization until convergence.

```sh
python examples/post_training/modelopt/finetune.py \
    --load ${MLM_MODEL_SAVE} --save ${MLM_MODEL_SAVE} ${MLM_EXTRA_ARGS}
python examples/post_training/modelopt/quantize.py \
    --export-quant-cfg fp8 \
    --load ${MLM_MODEL_SAVE} --save ${MLM_QUANT_SAVE} ${MLM_EXTRA_ARGS}
python examples/post_training/modelopt/finetune.py \
    --load ${MLM_QUANT_SAVE} --save ${MLM_QUANT_SAVE} ${MLM_EXTRA_ARGS}
```

### Export Checkpoint

Last, we export the Medusa heads or EAGLE module so that it can be deployed on runtime framework (i.e., TensorRT-LLM).

```sh
python examples/post_training/modelopt/export.py \
    --export-dir ${CKPT_DIR} \
    -export-extra-modules \
    --load ${MLM_QUANT_SAVE} ${MLM_EXTRA_ARGS}
```

### TensorRT-LLM Deployment

To serve the exported checkpoint with [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM), follow the sample commands below with the TensorRT-LLM GitHub repo:

```sh
trtllm-serve <exported checkpoint> --host 0.0.0.0 --port 8000 --backend pytorch --max_batch_size 32 --max_num_tokens 8192 --max_seq_len 8192 --tp_size 8 --extra_llm_api_options extra-llm-api-config.yml
```

`extra-llm-api-config.yml` is like this
```sh
enable_attention_dp: false
disable_overlap_scheduler: true
enable_autotuner: false

cuda_graph_config:
    max_batch_size: 1

speculative_config:
    decoding_type: Eagle
    max_draft_len: 3
    speculative_model_dir: <eagle3 checkpoint>

kv_cache_config:
    enable_block_reuse: false
```


================================================
FILE: examples/post_training/modelopt/train.sh
================================================
#!/bin/bash

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"

# Common arguments and base model specific arguments
source "${SCRIPT_DIR}/conf/arguments.sh"


# Set up cache dir for HF to avoid out of space error
export HF_DATASETS_CACHE="/tmp/hf_datasets_cache"

# Extra arguments of this script
MLM_DEFAULT_ARGS=" \
    --modelopt-enabled \
    --distributed-timeout-minutes 60 \
    --auto-detect-ckpt-format \
    --export-te-mcore-model \
"


if [ -z ${MLM_MODEL_SAVE} ]; then
    MLM_MODEL_SAVE=${MLM_MODEL_CKPT}
    printf "${MLM_WARNING} Variable ${PURPLE}MLM_MODEL_SAVE${WHITE} is not set (default: ${MLM_MODEL_CKPT})!\n"
fi

if [ -z ${MLM_DATA_ARGS} ]; then
    MLM_DATA_ARGS=" \
        --train-samples 128000 \
        --lr-decay-samples 128000 \
        --lr-warmup-samples 0 \
	--sft \
	--tokenizer-type SFTTokenizer \
	--per-split-data-args-path ${BLEND_PATH} \
    "
fi

if [ -z ${MLM_TRAIN_ARGS} ]; then
    MLM_TRAIN_ARGS=" \
        --no-gradient-accumulation-fusion \
        --micro-batch-size 1 \
        --attention-dropout 0.0 \
        --hidden-dropout 0.0 \
        --no-check-for-nan-in-loss-and-grad \
    "
fi

if [ -z ${MLM_OPTIM_ARGS} ]; then
    MLM_OPTIM_ARGS=" \
        --lr 5.0e-5 \
        --min-lr 1.0e-7 \
        --lr-decay-style cosine \
        --clip-grad 1.0 \
        --weight-decay 0.0 \
        --adam-beta1 0.9 \
        --adam-beta2 0.95 \
        --init-method-std 0.010 \
        --use-distributed-optimizer \
    "
fi

if [ -z ${MLM_EVAL_ARGS} ]; then
    MLM_EVAL_ARGS=" \
        --eval-iters 1 \
        --eval-interval 1000 \
        --save-interval 1000 \
        --log-interval 100 \
    "
fi

export HF_TOKEN=${HF_TOKEN}

if [[ ${MODEL_ARGS} == *"MambaModel"* ]]; then
    PRETRAIN_EXE=${SCRIPT_DIR}/../../../pretrain_mamba.py
else
    PRETRAIN_EXE=${SCRIPT_DIR}/../../../pretrain_gpt.py
fi

${LAUNCH_SCRIPT} ${PRETRAIN_EXE} \
    ${MODEL_ARGS} \
    --tensor-model-parallel-size ${TP} \
    --expert-tensor-parallel-size ${ETP} \
    --expert-model-parallel-size ${EP} \
    --pipeline-model-parallel-size ${PP} \
    --context-parallel-size ${CP} \
    --tokenizer-model ${TOKENIZER_MODEL} \
    --load ${MLM_MODEL_CKPT} \
    --save ${MLM_MODEL_SAVE} \
    ${MLM_DATA_ARGS} \
    ${MLM_OPTIM_ARGS} \
    ${MLM_TRAIN_ARGS} \
    ${MLM_EVAL_ARGS} \
    ${MLM_RESUME_ARGS} \
    ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}


================================================
FILE: examples/post_training/modelopt/validate.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Sample Generate GPT."""
import functools
import json
import os
import sys
import warnings

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../")))

import torch
from modelopt.torch.speculative.plugins.megatron_eagle import MegatronARValidation

from megatron.post_training.arguments import add_modelopt_args
from megatron.post_training.checkpointing import load_modelopt_checkpoint
from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
from megatron.post_training.utils import get_mtbench_chat_data
from megatron.training import get_args, get_model, get_tokenizer, initialize_megatron
from megatron.training.utils import print_rank_0, unwrap_model
from model_provider import model_provider

warnings.filterwarnings('ignore')


def add_ar_validation_args(parser):
    """Add additional arguments for ModelOpt acceptance rate validation."""
    group = parser.add_argument_group(title='ModelOpt ar validation')
    group.add_argument(
        "--osl", type=int, default=64, help="Output sequence length."
    )
    parser.add_argument(
        "--prompts-path",
        type=str,
        default=None,
        help="Path to the prompts json file. If not provided, MTBench will be used.",
    )
    parser.add_argument(
        "--ground-truth-path",
        type=str,
        default=None,
        help="Path to the ground truth pt file.",
    )
    parser.add_argument(
        "--steps", type=int, default=1, help="Only used in EAGLE."
    )
    parser.add_argument(
        "--save-ground-truth-path",
        type=str,
        default=None,
        help="Save path for the ground truth pt file.",
    )

    add_modelopt_args(parser)
    return parser


def check_arguments():
    """Checking user arguments."""
    args = get_args()
    if args.num_layers_per_virtual_pipeline_stage is not None:
        print_rank_0("Interleaved pipeline schedule is not yet supported for text generation.")
        exit()

    if hasattr(args, 'moe_grouped_gemm') and args.moe_grouped_gemm == True:
        print_rank_0("WARNING: Forcing moe_grouped_gemm to False for PTQ and export.")
        args.moe_grouped_gemm = False


def get_current_memory_info():
    remaining_mem, total_mem = torch.cuda.mem_get_info()
    info = "rank {:02}  memory remaining {:03}% ({}/{} MB) ".format(
        torch.distributed.get_rank(),
        int(remaining_mem * 100 / total_mem),
        remaining_mem // 1048576,
        total_mem // 1048576,
    )
    return info


def report_current_memory_info():
    """Report current memory usage."""
    print(get_current_memory_info(), flush=True)
    torch.distributed.barrier()


if __name__ == "__main__":
    initialize_megatron(
        extra_args_provider=add_ar_validation_args,
        args_defaults={
            'tokenizer_type': 'HuggingFaceTokenizer',
            'no_load_rng': True,
            'no_load_optim': True,
        },
    )

    check_arguments()

    args = get_args()

    if not args.prompts_path:
        dataset = get_mtbench_chat_data()
        prompts = [[sample["conversations"][0]] for sample in dataset]
    else:
        with open(args.prompts_path, "r") as f:
            prompts = [json.loads(line) for line in f]

    if args.ground_truth_path is not None:
        ground_truth = torch.load(args.ground_truth_path)
        ground_truth = [gt.to(torch.cuda.current_device()) for gt in ground_truth]
    else:
        ground_truth = [None for _ in range(len(prompts))]

    tokenizer = get_tokenizer()._tokenizer
    if hasattr(tokenizer, "tokenizer"):
        tokenizer = tokenizer.tokenizer
    model = get_model(functools.partial(model_provider, modelopt_gpt_mamba_builder), wrap_with_ddp=False)

    report_current_memory_info()

    if args.load is not None:
        load_modelopt_checkpoint(model, strict=not args.untie_embeddings_and_output_weights)
        print_rank_0("Done loading checkpoint")


    unwrapped_model = unwrap_model(model)[0]
    unwrapped_model.eval()

    validator = MegatronARValidation(unwrapped_model, tokenizer)
    gt = []
    ar = []
    for prompt, truth in zip(prompts, ground_truth):
        output = validator.validate(args.osl, prompt, ground_truth=truth, steps=args.steps)
        gt.append(output[0])
        ar.append(output[1])
    print_rank_0("Acceptance Rate: " + str(ar))
    print_rank_0("Average: " + str(sum(ar)/len(ar)))

    if args.save_ground_truth_path is not None:
        torch.save(gt, args.save_ground_truth_path)


================================================
FILE: examples/post_training/modelopt/validate.sh
================================================
#!/bin/bash

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"

# Common arguments and base model specific arguments
source "${SCRIPT_DIR}/conf/arguments.sh"

# Extra arguments of this script
MLM_DEFAULT_ARGS="--finetune --auto-detect-ckpt-format --export-te-mcore-model"
MLM_EXTRA_ARGS="--sequence-parallel"


if [ -z ${MLM_MODEL_CKPT} ]; then
    printf "${MLM_ERROR} Variable ${PURPLE}MLM_MODEL_CKPT${WHITE} must be set!\n"
    exit 1
fi

if [ -z ${PROMPTS_PATH} ]; then
    PROMPT_ARGS=""
else
    PROMPT_ARGS="--prompts-path ${PROMPTS_PATH}"
fi

if [ -z ${STEPS} ]; then
    STEPS=1
fi

if [ -z ${SAVE_GT_PATH} ]; then
    SAVE_ARGS=""
else
    SAVE_ARGS="--save-ground-truth-path ${SAVE_GT_PATH}"
fi

if [ -z ${GT_PATH}]; then
    GT_ARGS=""
else
    GT_ARGS="--ground-truth-path ${GT_PATH}"
fi

if [ -z ${OSL} ]; then
    STEPS=64
fi

export HF_TOKEN=${HF_TOKEN}

${LAUNCH_SCRIPT} ${SCRIPT_DIR}/validate.py \
    ${MODEL_ARGS} \
    --tensor-model-parallel-size ${TP} \
    --expert-tensor-parallel-size ${ETP} \
    --expert-model-parallel-size ${EP} \
    --pipeline-model-parallel-size ${PP} \
    --tokenizer-model ${TOKENIZER_MODEL} \
    --load ${MLM_MODEL_CKPT} \
    --steps ${STEPS} \
    --osl ${OSL} \
    ${PROMPT_ARGS} \
    ${GT_ARGS} \
    ${SAVE_ARGS} \
    ${MLM_DEFAULT_ARGS} ${MLM_EXTRA_ARGS}


================================================
FILE: examples/rl/README.md
================================================
# Reinforcement Learning in megatron

This is an example of GRPO implementation within megatron-lm.
For implementation details check out `train_rl.py` and `megatron/rl/rl_utils.py`.
For the environment details, check the `megatron.rl` module.

The following experiment will train the Qwen 2.5 32B model on the [DAPO17k](https://huggingface.co/datasets/BytedTsinghua-SIA/DAPO-Math-17k) dataset and will run evaluation on [AIME2024](https://huggingface.co/datasets/Maxwell-Jia/AIME_2024).
After 300 steps, you should get about 0.7 pass@32 on AIME with the average training reward of 0.6. 

## Setup

You should be able to run qwen2p5_32b_grpo.sh using the `nvcr.io/nvidia/pytorch:25.06-py3` container with these additional dependencies:

```bash
pip install flask-restful uvloop datasets evaluate
```

Specify these environment variables and create the required directories:

```bash
export CUDA_DEVICE_MAX_CONNECTIONS=1

CHECKPOINT="" # <Specify path to the base model checkpoint>
RUN_DIR="" # <Specify path for bookkeeping>
WANDB_PROJECT="" # <Specify>
WANDB_EXP_NAME="" # <Specify>

LOG_DIR=$RUN_DIR/logs
DATA_CACHE_DIR=$RUN_DIR/data_cache
CHECKPOINT_DIR=$RUN_DIR/checkpoints
TB_DIR=$RUN_DIR/tensorboard
```

## Convert the checkpoint

You can convert a [Huggingface Qwen checkpoint](https://huggingface.co/Qwen/Qwen2.5-32B) to megatron-lm format using the `megatron-lm/tools/checkpoint/convert.py` script:

```bash
TP=8
HF_FORMAT_DIR=<PATH_TO_HF_FORMAT_DIR>
MEGATRON_FORMAT_DIR=<PATH_TO_MEGATRON_FORMAT_DIR>
TOKENIZER_MODEL=HF_FORMAT_DIR

python ./tools/checkpoint/convert.py \
    --bf16 \
    --model-type GPT \
    --loader llama_mistral \
    --saver core \
    --target-tensor-parallel-size ${TP} \
    --checkpoint-type hf \
    --load-dir ${HF_FORMAT_DIR} \
    --save-dir ${MEGATRON_FORMAT_DIR} \
    --tokenizer-model ${TOKENIZER_MODEL} \
    --model-size qwen2.5 \
    --loader-transformer-impl transformer_engine \
    --make-vocab-size-divisible-by 128 \
```

## Experiment command

NOTE: Depending on the environment you are running it the provided script might require minor changes.

```bash

COMMON_OPTIONS="\
    --tensor-model-parallel-size $TP  \
    --pipeline-model-parallel-size $PP  \
    --use-mcore-models \
    --transformer-impl transformer_engine \
    --bf16 \
    --te-rng-tracker \
    --cuda-graph-impl local \
    --inference-dynamic-batching-num-cuda-graphs 1 \
    --inference-dynamic-batching-buffer-size-gb 20 \
    --data-parallel-random-init \
    --attention-backend flash \
    --timing-log-level 1 \
    --log-timers-to-tensorboard \
    --initialize-socket-comms \
    "

GRPO_CLAMP_EPS_LOWER=0.2
GRPO_CLAMP_EPS_UPPER=0.28
MAX_INFERENCE_BS=32
GRPO_GROUP_SIZE=16
GRPO_PROMPTS_PER_STEP=64
GRPO_ITERATIONS=1
GRPO_KL_BETA="0.0"
TRAINING_BATCH_SIZE=1024
MICRO_BATCH_SIZE=1
MAX_SEQ_LENGTH=11999

MODEL_OPTIONS="\
  --ckpt-format torch \
  --seq-length $MAX_SEQ_LENGTH \
  --inference-max-seq-length $MAX_SEQ_LENGTH \
  --inference-max-requests $MAX_INFERENCE_BS \
  --pretrained-checkpoint $CHECKPOINT \
  --untie-embeddings-and-output-weights \
  --disable-bias-linear \
  --add-qkv-bias \
  --normalization RMSNorm \
  --norm-epsilon 1e-5 \
  --group-query-attention \
  --num-query-groups 8 \
  --no-masked-softmax-fusion \
  --attention-softmax-in-fp32 \
  --attention-dropout 0.0 \
  --hidden-dropout 0.0 \
  --weight-decay 0.0 \
  --position-embedding-type rope \
  --rotary-percent 1.0 \
  --rotary-base 1000000 \
  --use-rotary-position-embeddings \
  --swiglu \
  --num-layers 64  \
  --hidden-size 5120  \
  --ffn-hidden-size 27648 \
  --num-attention-heads 40  \
  --max-position-embeddings 131072 \
  --tokenizer-type HuggingFaceTokenizer \
  --tokenizer-model unsloth/Qwen2.5-32B \
  --lr 1e-6 \
  --lr-warmup-samples 0 \
  --make-vocab-size-divisible-by 128 \
  --clip-grad 1.0 \
  --recompute-granularity selective \
  --recompute-activations "

ENV_DEPENDENT="\
  --langrl-env-config "examples/rl/environment_configs/dapo.yaml" \
  --micro-batch-size $MICRO_BATCH_SIZE \
  --global-batch-size $TRAINING_BATCH_SIZE \
  --grpo-group-size $GRPO_GROUP_SIZE \
  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
  --grpo-iterations $GRPO_ITERATIONS \
  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
  --grpo-kl-beta $GRPO_KL_BETA \
  --env-config $ENV_CONFIG "


torchrun \
    --nproc-per-node=8 \
    --nnodes=8 \
    train_rl.py \
    --mock-data \
    --distributed-timeout-minutes 60 \
    --train-samples 48828125 \
    --log-interval 10 \
    --log-progress  \
    --timing-log-option minmax \
    --log-params-norm \
    --log-num-zeros-in-grad \
    --log-throughput \
    --adam-beta1 0.9 \
    --adam-beta2 0.95 \
    --adam-eps 1e-8 \
    --no-create-attention-mask-in-dataloader \
    --accumulate-allreduce-grads-in-fp32 \
    --calculate-per-token-loss \
    --log-straggler \
    --disable-straggler-on-startup \
    --perform-rl-step \
    --use-distributed-optimizer \
    --straggler-minmax-count 16 \
    --eval-interval 20 \
    --rl-prompts-per-eval 32 \
    --tensorboard-log-interval 1 \
    --empty-unused-memory-level 2 \
    --data-cache-path ${DATA_CACHE_DIR} \
    --save $CHECKPOINT_DIR \
    --load $CHECKPOINT_DIR \
    --tensorboard-dir $TB_DIR \
    --seed $SEED \
    --sequence-parallel \
    --finetune \
    --save-interval 20 \
    --wandb-project $WANDB_PROJECT \
    --wandb-exp-name $WANDB_EXP_NAME \
    ${MODEL_OPTIONS} \
    ${COMMON_OPTIONS} \
    ${ENV_DEPENDENT} $@
```


================================================
FILE: examples/rl/benchmark_refit.py
================================================
#!/usr/bin/env python3
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
"""
Benchmark script for model refit performance.

Measures the time to transfer model weights between different parallelism configurations.
Supports both collocated (models share GPUs) and non-collocated (separate GPU sets) modes.
"""
import time

import torch

from megatron.core.resharding.refit import swap_model_weights
from megatron.training import get_args, get_model as get_training_model, print_rank_0
from megatron.training.initialize import initialize_megatron
from megatron.training.arguments import core_transformer_config_from_args
from megatron.rl.parallel_utils import build_inference_pg_collection
from gpt_builders import gpt_builder
from megatron.core.resharding.copy_services.nvshmem_copy_service import NVSHMEMCopyService
from megatron.core.resharding.copy_services.nccl_copy_service import NCCLCopyService
from megatron.core.resharding.copy_services.gloo_copy_service import GlooCopyService


def add_benchmark_args(parser):
    """Add benchmark-specific arguments."""
    group = parser.add_argument_group(title='refit benchmark')

    group.add_argument(
        '--refit-mode',
        type=str,
        required=True,
        choices=['collocated', 'non-collocated'],
        help='Collocated: both models share GPUs. Non-collocated: separate GPU sets.'
    )
    group.add_argument(
        '--num-benchmark-warmup',
        type=int,
        default=2,
        help='Number of warmup iterations (first builds refit plan).'
    )
    group.add_argument(
        '--num-benchmark-iterations',
        type=int,
        default=10,
        help='Number of timed benchmark iterations.'
    )

    return parser


def model_provider(pre_process=True, post_process=True, parallel_output=False,
                   pg_collection=None, config=None):
    """Build the model."""
    args = get_args()
    if config is None:
        config = core_transformer_config_from_args(args)

    return gpt_builder(
        args=args,
        pre_process=pre_process,
        post_process=post_process,
        config=config,
        pg_collection=pg_collection,
    )


def create_refit_service(method):
    """Create and return a refit service instance."""
    if method == 'nvshmem':
        return NVSHMEMCopyService()
    elif method == 'nccl':
        return NCCLCopyService()
    elif method == 'gloo':
        return GlooCopyService()
    else:
        return method


def print_config_summary(args, src_config, dst_config, world_size, mode):
    """Print benchmark configuration."""
    print_rank_0(f"\n{'='*80}")
    print_rank_0(f"REFIT BENCHMARK - {mode.upper()} MODE")
    print_rank_0(f"{'='*80}")
    print_rank_0(f"World size: {world_size}")
    print_rank_0(f"Source:      TP={src_config['tp']}, PP={src_config['pp']}, EP={src_config['ep']}, DP={src_config['dp']}")
    print_rank_0(f"Destination: TP={dst_config['tp']}, PP={dst_config['pp']}, EP={dst_config['ep']}, DP={dst_config['dp']}")
    print_rank_0(f"Model: {args.num_layers}L, {args.hidden_size}H, {args.num_attention_heads} heads, vocab={args.vocab_size}")
    if args.num_experts:
        print_rank_0(f"MoE: {args.num_experts} experts, top-{args.moe_router_topk}")
    print_rank_0(f"Backend: {args.refit_method}")
    print_rank_0(f"{'='*80}\n")


def run_benchmark(src_model, dst_model, refit_service, num_warmup, num_iterations):
    """Run warmup and benchmark iterations, return timings."""
    rank = torch.distributed.get_rank()

    # Warmup (builds refit plan on first iteration)
    print_rank_0(f"Warmup: {num_warmup} iterations...")
    for i in range(num_warmup):
        torch.cuda.synchronize()
        torch.distributed.barrier()
        swap_model_weights(src_model, dst_model, refit_method=refit_service)
        torch.cuda.synchronize()
        torch.distributed.barrier()

    print_rank_0("Warmup complete. Starting benchmark...\n")

    # Benchmark iterations
    print_rank_0(f"Benchmark: {num_iterations} iterations...")
    timings = []

    for i in range(num_iterations):
        torch.cuda.synchronize()
        torch.distributed.barrier()

        start_time = time.perf_counter()
        swap_model_weights(src_model, dst_model, refit_method=refit_service)
        torch.cuda.synchronize()
        end_time = time.perf_counter()

        elapsed = end_time - start_time
        timings.append(elapsed)
        torch.distributed.barrier()

    return timings


def print_results(timings):
    """Print benchmark results."""
    if torch.distributed.get_rank() == 0:
        mean_time = sum(timings) / len(timings)
        min_time = min(timings)
        max_time = max(timings)

        print(f"\n{'='*80}")
        print("RESULTS")
        print(f"{'='*80}")
        print(f"Mean: {mean_time*1000:.2f} ms")
        print(f"Min:  {min_time*1000:.2f} ms")
        print(f"Max:  {max_time*1000:.2f} ms")
        print(f"{'='*80}\n")


def benchmark_collocated():
    """Benchmark refit in collocated mode (both models on same GPUs)."""
    args = get_args()
    world_size = torch.distributed.get_world_size()

    # Calculate parallelism
    src_tp = args.tensor_model_parallel_size
    src_pp = args.pipeline_model_parallel_size
    src_ep = args.expert_model_parallel_size
    src_world = src_tp * src_pp * src_ep
    src_dp = world_size // src_world

    dst_tp = args.rl_inference_tensor_model_parallel_size or src_tp
    dst_pp = args.rl_inference_pipeline_model_parallel_size or src_pp
    dst_ep = args.rl_inference_expert_model_parallel_size or src_ep
    dst_world = dst_tp * dst_pp * dst_ep
    dst_dp = world_size // dst_world

    # Print config
    src_config = {'tp': src_tp, 'pp': src_pp, 'ep': src_ep, 'dp': src_dp}
    dst_config = {'tp': dst_tp, 'pp': dst_pp, 'ep': dst_ep, 'dp': dst_dp}
    print_config_summary(args, src_config, dst_config, world_size, 'collocated')

    # Build source model
    print_rank_0("Building source model...")
    src_model = get_training_model(
        lambda pre_process, post_process, **kwargs: model_provider(
            pre_process=pre_process, post_process=post_process, parallel_output=False
        ),
        wrap_with_ddp=False
    )
    src_model[0] = src_model[0].cuda()

    # Build destination model with custom parallelism
    print_rank_0("Building destination model...")
    dst_pg_collection = build_inference_pg_collection(
        world_size,
        tp_size=dst_tp,
        pp_size=dst_pp,
        ep_size=dst_ep,
        expt_tp_size=args.rl_inference_expert_tensor_model_parallel_size,
        use_tp_pp_dp_mapping=args.use_tp_pp_dp_mapping,
    )

    dst_config = core_transformer_config_from_args(args)
    if args.num_experts:
        dst_config.expert_model_parallel_size = dst_ep
    dst_config.tensor_model_parallel_size = dst_tp
    if args.rl_inference_expert_tensor_model_parallel_size:
        dst_config.expert_tensor_parallel_size = args.rl_inference_expert_tensor_model_parallel_size

    dst_model = get_training_model(
        lambda pre_process, post_process, **kwargs: model_provider(
            pre_process=pre_process, post_process=post_process,
            pg_collection=dst_pg_collection, config=dst_config
        ),
        wrap_with_ddp=False
    )
    dst_model[0] = dst_model[0].cuda()

    torch.distributed.barrier()

    # Create refit service
    print_rank_0(f"Creating {args.refit_method} service...")
    refit_service = create_refit_service(args.refit_method)
    print_rank_0("Service created.\n")

    # Run benchmark
    timings = run_benchmark(src_model, dst_model, refit_service,
                           args.num_benchmark_warmup, args.num_benchmark_iterations)

    # Print results
    print_results(timings)


def benchmark_non_collocated():
    """Benchmark refit in non-collocated mode (separate GPU sets)."""
    args = get_args()
    rank = torch.distributed.get_rank()
    world_size = torch.distributed.get_world_size()

    # Calculate parallelism
    src_tp = args.tensor_model_parallel_size
    src_pp = args.pipeline_model_parallel_size
    src_ep = args.expert_model_parallel_size
    src_world = src_tp * src_pp * src_ep

    dst_tp = args.rl_inference_tensor_model_parallel_size or src_tp
    dst_pp = args.rl_inference_pipeline_model_parallel_size or src_pp
    dst_ep = args.rl_inference_expert_model_parallel_size or src_ep
    dst_world = dst_tp * dst_pp * dst_ep

    required_size = src_world + dst_world
    if world_size < required_size:
        raise ValueError(f"Non-collocated requires {required_size} GPUs, got {world_size}")

    # Determine rank roles
    is_src_rank = rank < src_world
    is_dst_rank = src_world <= rank < required_size
    is_idle_rank = rank >= required_size

    # Print config
    src_config = {'tp': src_tp, 'pp': src_pp, 'ep': src_ep, 'dp': 1}
    dst_config = {'tp': dst_tp, 'pp': dst_pp, 'ep': dst_ep, 'dp': 1}
    print_config_summary(args, src_config, dst_config, world_size, 'non-collocated')
    if world_size > required_size:
        print_rank_0(f"Note: Ranks {required_size}-{world_size-1} are idle\n")

    # Create destination process groups (all ranks participate)
    print_rank_0("Creating process groups...")
    dst_pg_collection = build_inference_pg_collection(
        world_size=dst_world,
        tp_size=dst_tp,
        pp_size=dst_pp,
        ep_size=dst_ep,
        expt_tp_size=args.rl_inference_expert_tensor_model_parallel_size,
        use_tp_pp_dp_mapping=args.use_tp_pp_dp_mapping,
        rank_offset=src_world,
    )
    torch.distributed.barrier()

    # Idle ranks participate in collectives but have no models
    if is_idle_rank:
        src_model = None
        dst_model = None
    elif is_src_rank:
        # Build source model
        print_rank_0("Building source model...")
        src_model = get_training_model(
            lambda pre_process, post_process, **kwargs: model_provider(
                pre_process=pre_process, post_process=post_process, parallel_output=False
            ),
            wrap_with_ddp=False
        )
        src_model[0] = src_model[0].cuda()
        dst_model = None
    else:  # is_dst_rank
        # Build destination model
        print_rank_0("Building destination model...")
        dst_config = core_transformer_config_from_args(args)
        if args.num_experts:
            dst_config.expert_model_parallel_size = dst_ep
        dst_config.tensor_model_parallel_size = dst_tp
        if args.rl_inference_expert_tensor_model_parallel_size:
            dst_config.expert_tensor_parallel_size = args.rl_inference_expert_tensor_model_parallel_size

        dst_model = get_training_model(
            lambda pre_process, post_process, **kwargs: model_provider(
                pre_process=pre_process, post_process=post_process,
                pg_collection=dst_pg_collection, config=dst_config
            ),
            wrap_with_ddp=False
        )
        dst_model[0] = dst_model[0].cuda()
        src_model = None

    torch.distributed.barrier()

    # Create refit service
    print_rank_0(f"Creating {args.refit_method} service...")
    refit_service = create_refit_service(args.refit_method)
    print_rank_0("Service created.\n")

    # Run benchmark
    timings = run_benchmark(src_model, dst_model, refit_service,
                           args.num_benchmark_warmup, args.num_benchmark_iterations)

    # Print results
    print_results(timings)


def main():
    """Main benchmark function."""
    initialize_megatron(
        extra_args_provider=add_benchmark_args,
        args_defaults={
            'tokenizer_type': 'NullTokenizer',
            'no_load_optim': True,
            'no_load_rng': True,
            'no_save_optim': True,
            'no_save_rng': True,
        },
        ignore_unknown_args=False,
    )

    args = get_args()

    # Set default vocab size if not provided
    if args.vocab_size is None:
        args.vocab_size = 50257
        print_rank_0("Using default vocab_size=50257")

    # Run benchmark
    if args.refit_mode == 'collocated':
        benchmark_collocated()
    else:
        benchmark_non_collocated()


if __name__ == "__main__":
    main()


================================================
FILE: examples/rl/environment_configs/countdown.yaml
================================================
- agent_type: examples.rl.environments.countdown.countdown_agent.CountdownAgent
  agent_args:
    hf_dataset_name: "Jiayi-Pan/Countdown-Tasks-3to4"
    split: "train"
  weight: 1.0


================================================
FILE: examples/rl/environment_configs/dapo.yaml
================================================
- agent_type: examples.rl.environments.math.dapo_agent.DAPOAgent
  agent_args:
    format_reward: 0.0
  weight: 1.0
- agent_type: examples.rl.environments.math.aime_agent.AIMEAgent
  agent_args:
    format_reward: 0.0
  weight: 0.0
  evaluation_only: true


================================================
FILE: examples/rl/environment_configs/default.yaml
================================================
- agent_type: examples.rl.environments.countdown.countdown_agent.CountdownAgent
  agent_args:
    hf_dataset_name: "Jiayi-Pan/Countdown-Tasks-3to4"
    split: "train"
  weight: 1.0
- agent_type: examples.rl.environments.math.openmath_agent.OpenMathInstructAgent
  agent_args: {}
  weight: 1.0


================================================
FILE: examples/rl/environment_configs/gsm8k.yaml
================================================
- agent_type: examples.rl.environments.math.gsm8k_agent.GSM8KAgent
  agent_args:
    answer_format: "boxed"
    format_reward: 0.5
  weight: 1.0
  evaluation_only: false


================================================
FILE: examples/rl/environment_configs/gsm8k_nanov3.yaml
================================================
- agent_type: examples.rl.environments.math.gsm8k_agent.GSM8KAgent
  agent_args:
    answer_format: "boxed"
    format_reward: 0.5
    negative_reward: 0.0
    partial_end_reward: 0.75
  weight: 1.0
  evaluation_only: false


================================================
FILE: examples/rl/environment_configs/math.yaml
================================================
- agent_type: examples.rl.environments.math.openmath_agent.OpenMathInstructAgent
  agent_args: {}
  weight: 1.0
- agent_type: examples.rl.environments.math.bigmath_agent.BigMathAgent
  agent_args: {}
  weight: 1.0
- agent_type: examples.rl.environments.math.aime_agent.AIMEAgent
  agent_args: {}
  weight: 0.0
  evaluation_only: true


================================================
FILE: examples/rl/environment_configs/openmathinstructv2.yaml
================================================
- agent_type: examples.rl.environments.math.openmath_agent.OpenMathInstructAgent
  agent_args: {}
  weight: 1.0


================================================
FILE: examples/rl/environments/__init__.py
================================================


================================================
FILE: examples/rl/environments/countdown/README.md
================================================
# Countdown Agentic Environment
The `CountdownAgenticEnv` is based off of the countdown task introduced in https://github.com/Jiayi-Pan/TinyZero. The objective is for the LLM to provide an algebraic expression combining a set of numbers in order to produce a provided "target" number.

The data is loaded from the below HF dataset and most of the evaluation code (in `countdown.py`) is inherited from the above GitHub repository.

https://huggingface.co/datasets/Jiayi-Pan/Countdown-Tasks-3to4

It is an example of a `megatron.rl.agent.reward_only_agent` so many tasks that have only a reward calcuation can use this as a prototype.


================================================
FILE: examples/rl/environments/countdown/__init__.py
================================================


================================================
FILE: examples/rl/environments/countdown/countdown.py
================================================
# This file is adapted from code in https://github.com/Jiayi-Pan/TinyZero

import re


def extract_solution(solution_str: str, remove_prompt: bool = False):
    """Extract the equation from the solution string."""
    if remove_prompt:
        # Remove everything before the first "Assistant:"
        if "Assistant:" in solution_str:
            solution_str = solution_str.split("Assistant:", 1)[1]
        elif "<|im_start|>assistant" in solution_str:
            solution_str = solution_str.split("<|im_start|>assistant", 1)[1]
        else:
            return None
    solution_str = solution_str.split('\n')[-1]

    answer_pattern = r'<answer>(.*?)</answer>'
    match = re.finditer(answer_pattern, solution_str)
    matches = list(match)
    if matches:
        final_answer = matches[-1].group(1).strip()
    else:
        final_answer = None
    return final_answer


def validate_equation(equation_str, available_numbers):
    """Validate that equation only uses available numbers and each number once."""
    try:
        # Extract all numbers from the equation
        numbers_in_eq = [int(n) for n in re.findall(r'\d+', equation_str)]

        # Check if all numbers in equation are available
        available_numbers = sorted(available_numbers)
        numbers_in_eq = sorted(numbers_in_eq)

        # Each number should be used exactly once
        return numbers_in_eq == available_numbers
    except:
        return False


def evaluate_equation(equation_str):
    """Safely evaluate the arithmetic equation using eval() with precautions."""
    try:
        # Define a regex pattern that only allows numbers, operators, parentheses, and whitespace
        allowed_pattern = r'^[\d+\-*/().\s]+$'
        if not re.match(allowed_pattern, equation_str):
            raise ValueError("Invalid characters in equation.")

        # Evaluate the equation with restricted globals and locals
        result = eval(equation_str, {"__builtins__": None}, {})
        return result
    except Exception as e:
        return None


def compute_score(solution_str, ground_truth, method='strict', format_score=0.1, score=1.0):
    """The scoring function for countdown task.

    Args:
        solution_str: the solution text
        ground_truth: dictionary containing target number and available numbers
        method: the method to extract the solution
        format_score: the score for correct format but wrong answer
        score: the score for the correct answer
    """
    target = ground_truth['target']
    numbers = ground_truth['nums']

    equation = extract_solution(solution_str=solution_str)
    do_print = False  # random.randint(1, 64) == 1

    if do_print:
        print("--------------------------------")
        print(f"Target: {target} | Numbers: {numbers}")
        print(f"Extracted equation: {equation}")
        print(f"Solution string: {solution_str}")

    if equation is None:
        if do_print:
            print("No equation found")
        return 0

    # Validate equation uses correct numbers
    if not validate_equation(equation, numbers):
        if do_print:
            print("Invalid equation")
        return format_score

    # Evaluate equation
    try:
        result = evaluate_equation(equation)
        if result is None:
            if do_print:
                print("Could not evaluate equation")
            return format_score

        if abs(result - target) < 1e-5:  # Account for floating point precision
            if do_print:
                print(f"Correct equation: {equation} = {result}")
            return score
        else:
            if do_print:
                print(f"Wrong result: equation = {result}, target = {target}")
            return format_score
    except:
        if do_print:
            print("Error evaluating equation")
        return format_score


================================================
FILE: examples/rl/environments/countdown/countdown_agent.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import random
from typing import Any, Iterable

from megatron.rl.agent.huggingface_dataset_agent import HFDatasetAgent
from megatron.rl.agent.reward_only_agent import RewardOnlyAgent

from .countdown import compute_score


class CountdownAgent(RewardOnlyAgent, HFDatasetAgent):
    env_id: str = "countdown"

    def make_prefix(self, target, nums) -> str:
        prefix = f"""Using the numbers {nums}, create an equation that equals {target}. You can use basic arithmetic operations (+, -, *, /) and each number can only be used once. 
        Return the final answer in <answer> </answer> tags, for example <answer> (1 + 2) / 3 </answer>. Do not include an = sign."""
        return prefix

    def get_dataset(self, validation: bool = False):
        TRAIN_SIZE = 327680
        TEST_SIZE = 1024

        assert len(self.dataset) > TRAIN_SIZE + TEST_SIZE
        train_dataset = self.dataset.select(range(TRAIN_SIZE))
        test_dataset = self.dataset.select(range(TRAIN_SIZE, TRAIN_SIZE + TEST_SIZE))
        return train_dataset if not validation else test_dataset

    async def evaluation_prompts(
        self, num_prompts: int, validation: bool = False
    ) -> Iterable[tuple[str, Any]]:
        dataset = self.get_dataset(validation)
        return [
            (self.make_prefix(**golden), golden)
            for golden in [dataset[i] for i in range(num_prompts)]
        ]

    async def get_prompt(self, validation=False) -> tuple[str, dict]:
        dataset = self.get_dataset(validation)
        golden = dataset[random.randrange(len(dataset))]
        return self.make_prefix(**golden), golden

    async def get_reward(self, response, golden: dict) -> float:
        return compute_score(response, golden)


================================================
FILE: examples/rl/environments/math/__init__.py
================================================


================================================
FILE: examples/rl/environments/math/aime_agent.py
================================================
import datasets

from .math_agent import MathAgent

raw_dataset = datasets.load_dataset("Maxwell-Jia/AIME_2024", split="train")

TRAIN_SIZE = 0
TEST_SIZE = len(raw_dataset) - TRAIN_SIZE

assert len(raw_dataset) >= TRAIN_SIZE + TEST_SIZE
train_dataset = raw_dataset.select(range(TRAIN_SIZE))
test_dataset = raw_dataset.select(range(TRAIN_SIZE, TRAIN_SIZE + TEST_SIZE))


class AIMEAgent(MathAgent):
    env_id: str = "aime"

    def get_dataset(self, validation: bool = False):
        assert validation, "AIME prompts are not available for training."
        return test_dataset

    async def evaluation_prompts(
        self, num_prompts: int, validation: bool = False
    ) -> list[tuple[str, dict]]:
        dataset = self.get_dataset(validation)
        return [
            (
                self.make_prefix(**golden, problem_key="Problem"),
                {**golden, "problem_id": golden["ID"]},
            )
            for _, golden in enumerate([dataset[i] for i in range(num_prompts) if i < len(dataset)])
        ]

    async def get_prompt(self, validation=False) -> tuple[str, dict]:
        print("WARNING: AIME prompts should not be used for training.")
        validation = True
        dataset = train_dataset if not validation else test_dataset
        problem_id = 0
        golden = dataset[problem_id]
        golden = {**golden, "problem_id": golden["ID"]}
        prompt = self.make_prefix(**golden, problem_key="Problem")
        return prompt, golden

    async def get_reward(self, response, golden: dict) -> float:
        return self.compute_score(response, golden, golden_key="Answer")


================================================
FILE: examples/rl/environments/math/bigmath_agent.py
================================================
import random

import datasets

from .math_agent import MathAgent

raw_dataset = datasets.load_dataset("SynthLabsAI/Big-Math-RL-Verified", split="train")
TRAIN_SIZE = len(raw_dataset) - 1024
TEST_SIZE = 1024

assert len(raw_dataset) >= TRAIN_SIZE + TEST_SIZE
train_dataset = raw_dataset.select(range(TRAIN_SIZE))
test_dataset = raw_dataset.select(range(TRAIN_SIZE, TRAIN_SIZE + TEST_SIZE))


class BigMathAgent(MathAgent):
    env_id: str = "bigmath"

    def get_dataset(self, validation: bool = False):
        return train_dataset if not validation else test_dataset

    async def evaluation_prompts(
        self, num_prompts: int, validation: bool = False
    ) -> list[tuple[str, dict]]:
        dataset = self.get_dataset(validation)
        return [
            (self.make_prefix(**golden), golden)
            for golden in [dataset[i] for i in range(num_prompts)]
        ]

    async def get_prompt(self, validation=False) -> tuple[str, dict]:
        dataset = self.get_dataset(validation)
        golden = dataset[random.randrange(len(dataset))]
        prompt = self.make_prefix(**golden)
        return prompt, golden

    async def get_reward(self, response, golden: dict) -> float:
        return self.compute_score(response, golden, golden_key="answer")


================================================
FILE: examples/rl/environments/math/dapo_agent.py
================================================
import random

import datasets

from .math_agent import MathAgent

raw_dataset = datasets.load_dataset("BytedTsinghua-SIA/DAPO-Math-17k", split="train")
TRAIN_SIZE = 17917 - 1024
TEST_SIZE = 1024

train_dataset = raw_dataset.select(range(TRAIN_SIZE))
test_dataset = raw_dataset.select(range(TRAIN_SIZE, TRAIN_SIZE + TEST_SIZE))


class DAPOAgent(MathAgent):
    env_id: str = "dapo"

    def reformat_datum(self, datum: dict) -> dict:
        return {
            "problem": datum['prompt'][0]['content']
            .replace(
                'The last line of your response should be of the form Answer: $Answer (without quotes) where $Answer is the answer to the problem.\n\n',
                '',
            )
            .replace('\nRemember to put your answer on its own line after "Answer:".', ''),
            "answer": datum["reward_model"]["ground_truth"],
            "problem_id": datum["extra_info"]["index"],
        }

    def get_dataset(self, validation: bool = False):
        return train_dataset if not validation else test_dataset

    async def evaluation_prompts(
        self, num_prompts: int, validation: bool = False
    ) -> list[tuple[str, dict]]:
        dataset = self.get_dataset(validation)
        prompts = []
        for i, golden in [(i, dataset[i]) for i in range(num_prompts)]:
            golden = self.reformat_datum(golden)
            prompts.append((self.make_prefix(**golden), golden))
        return prompts

    async def get_prompt(self, validation=False) -> tuple[str, dict]:
        dataset = self.get_dataset(validation)
        golden = dataset[random.randrange(len(dataset))]
        golden = self.reformat_datum(golden)
        prompt = self.make_prefix(**golden)
        return prompt, golden

    async def get_reward(self, response, golden: dict) -> float:
        return self.compute_score(response, golden, golden_key="answer")


================================================
FILE: examples/rl/environments/math/gsm8k_agent.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import random

import datasets

from .math_agent import MathAgent

raw_dataset = datasets.load_dataset("openai/gsm8k", "main")

TRAIN_SIZE = 7473
TEST_SIZE = 1319

train_dataset = raw_dataset["train"]
test_dataset = raw_dataset["test"]

assert (
    len(train_dataset) == TRAIN_SIZE
), f"GSM8K loading error: expected {TRAIN_SIZE} samples but got {len(train_dataset)}"
assert (
    len(test_dataset) == TEST_SIZE
), f"GSM8K loading error: expected {TEST_SIZE} samples but got {len(test_dataset)}"


class GSM8KAgent(MathAgent):
    def __init__(self,
        answer_format: str = "boxed",
        format_reward: float = 0.0,
        negative_reward: float = 0.0,
        partial_end_reward: float = 0.0,
        **kwargs):
        super().__init__(
            answer_format=answer_format,
            format_reward=format_reward,
            negative_reward=negative_reward,
            partial_end_reward=partial_end_reward,
            **kwargs
        )
        self.env_id: str = "gsm8k"

    def reformat_datum(self, datum: dict) -> dict:
        return {
            "problem": datum["question"],
            "answer": datum["answer"],
            "numeric_answer": datum["answer"].split("#### ")[-1],
        }

    def get_dataset(self, validation: bool = False):
        return train_dataset if not validation else test_dataset

    async def evaluation_prompts(
        self, num_prompts: int, validation: bool = False
    ) -> list[tuple[str, dict]]:
        dataset = self.get_dataset(validation)
        return [
            (self.make_prefix(**golden), golden)
            for golden in [self.reformat_datum(dataset[i]) for i in range(num_prompts)]
        ]

    async def get_prompt(self, validation=False) -> tuple[str, dict]:
        dataset = self.get_dataset(validation)
        golden = dataset[random.randrange(len(dataset))]
        golden = self.reformat_datum(golden)
        prompt = self.make_prefix(**golden)
        return prompt, golden

    async def get_reward(self, response, golden: dict) -> float:
        return self.compute_score(response, golden, golden_key="numeric_answer")


# pytest


================================================
FILE: examples/rl/environments/math/math_agent.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import re
import traceback

from megatron.rl.agent.reward_only_agent import RewardOnlyAgent

try:
    from math_verify import parse, verify
except ImportError:
    print(
        "math_verify is not installed. Install it using `pip install math-verify`. Continuing using exact match verification."
    )
    MATHVERIFY_AVAILABLE = False
else:
    print("math_verify is installed. Using math_verify to verify answers.")
    MATHVERIFY_AVAILABLE = True

assert (
    MATHVERIFY_AVAILABLE
), "math_verify is not installed but now required. Install it using `pip install math-verify` to continue."

class MathAgent(RewardOnlyAgent):
    def __init__(self,
        format_reward: float = 0.0,
        answer_format: str = "tagged",
        negative_reward: float = 0.0,
        partial_end_reward: float = 0.0,
        **kwargs):
        """
        Args:
            format_reward (float): Reward given when the answer is in the expected format,
                even if the answer is incorrect or is missing the end-of-text token.
            answer_format (str): Which answer format is expected: "tagged" for <answer> tags,
                or "boxed" for \boxed{} LaTeX formatting.
            negative_reward (float): Reward assigned for a clearly incorrect or unparseable answer.
            partial_end_reward (float): Reward when the answer is correct but an expected end token is not matched exactly.
            **kwargs: Additional arguments for the base RewardOnlyAgent.
        """
        super().__init__(**kwargs)

        assert answer_format in ["tagged", "boxed"], "Invalid answer format"

        self.format_reward = format_reward
        self.answer_format = answer_format
        self.negative_reward = negative_reward
        self.partial_end_reward = partial_end_reward

    def compute_score(self, response: str, golden: dict, golden_key: str = "answer") -> float:
        """Take a response and a golden answer and return a score. Supports tagged or boxed answers.

        Uses the final answer in the response string to compute the score.
        """
        # Allow <answer> tags or \boxed{} tags (this is a bit of cheating in favor of deepseek distilled models I think)
        matched_format = None
        end_tokens = ["<|end_of_text|>", "<|endoftext|>", "</s>", "<|eot_id|>", "<|im_end|>"]

        # Only an answer immediately followed by a known end token yields 1.0 reward.
        answer_tag_pattern = r'<answer>(.*?)</answer>'
        answer_tag_match = list(re.finditer(answer_tag_pattern, response, re.DOTALL))
        if answer_tag_match:
            # Only consider the last occurrence
            last_match = answer_tag_match[-1]
            final_answer = last_match.group(1).strip()
            after = response[last_match.end():].lstrip()  # strip whitespace between </answer> and token

            try:
                parsed_answer = parse(final_answer)
            except ValueError as e:
                print("Failed to parse the answer.")
                traceback.print_stack()
                return self.negative_reward

            correct_answer = verify(str(golden[golden_key]), parsed_answer)
            if correct_answer:
                # Accept either <|end_of_text|> or <|endoftext|> as valid terminators, for flexibility.
                for token in end_tokens:
                    if after.startswith(token):
                        return 1.0
                # If the end token is present later (extra text before it), give partial credit.
                for token in end_tokens:
                    if token in after:
                        return self.partial_end_reward
                # If a correct answer but missing immediate end, give format reward (not NEGATIVE_REWARD).
                return self.format_reward
            else:
                # Incorrect answer, regardless of format/end-of-text
                return self.format_reward
        else:
            # Fallback: check boxed answer format for diagnostic/format reward as before
            boxed_pattern = r"\\boxed\{((?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})*)\}"
            boxed_match = list(re.finditer(boxed_pattern, response, re.DOTALL))
            if boxed_match:
                last_match = boxed_match[-1]
                final_answer = last_match.group(1).strip()
                after = response[last_match.end():].lstrip()
                try:
                    parsed_answer = parse(final_answer)
                except ValueError as e:
                    print("Failed to parse the answer.")
                    traceback.print_stack()
                    return self.negative_reward

                correct_answer = verify(str(golden[golden_key]), parsed_answer)
                if correct_answer:
                    for token in end_tokens:
                        if after.startswith(token):
                            return 1.0
                    for token in end_tokens:
                        if token in after:
                            return self.partial_end_reward
                    return self.format_reward
                else:
                    # Formatting is correct but the answer is incorrect
                    return self.format_reward
            else:
                # Did not format the answer correctly
                return self.negative_reward

    def make_prefix(self, problem_key: str = "problem", **kwargs) -> str:
        """Take a string math problem and return the prompt. Supports requesting tagged or boxed answers. Supports chat mode prompts."""
        if self.answer_format == "boxed":
            answer_format = "Please reason step by step and provide your answer between \\boxed{} tags, for example \\boxed{20\\sqrt{3}}."
        elif self.answer_format == "tagged":
            answer_format = "Please reason step by step and provide your answer between <answer> </answer> tags, for example <answer> 20\\sqrt{3} </answer>. Do not include an = sign."
        else:
            raise ValueError(f"Invalid answer format: {self.answer_format}")

        prefix = f"""{kwargs[problem_key]}\n{answer_format}"""

        return prefix


================================================
FILE: examples/rl/environments/math/openmath_agent.py
================================================
import random

import datasets

from .math_agent import MathAgent

raw_dataset = datasets.load_dataset("nvidia/OpenMathInstruct-2", split="train")
TRAIN_SIZE = 327680
TEST_SIZE = 1024

assert len(raw_dataset) >= TRAIN_SIZE + TEST_SIZE
train_dataset = raw_dataset.select(range(TRAIN_SIZE))
test_dataset = raw_dataset.select(range(TRAIN_SIZE, TRAIN_SIZE + TEST_SIZE))


class OpenMathInstructAgent(MathAgent):
    env_id: str = "openmath_instruct"

    def get_dataset(self, validation: bool = False):
        return train_dataset if not validation else test_dataset

    async def evaluation_prompts(
        self, num_prompts: int, validation: bool = False
    ) -> list[tuple[str, dict]]:
        dataset = self.get_dataset(validation)
        return [
            (self.make_prefix(**golden), golden)
            for golden in [dataset[i] for i in range(num_prompts)]
        ]

    async def get_prompt(self, validation=False) -> tuple[str, dict]:
        dataset = self.get_dataset(validation)
        golden = dataset[random.randrange(len(dataset))]
        prompt = self.make_prefix(**golden)
        return prompt, golden

    async def get_reward(self, response, golden: dict) -> float:
        return self.compute_score(response, golden, golden_key="expected_answer")


================================================
FILE: examples/rl/model_configs/common.sh
================================================
echo "Loading common options"

export UB_TIMEOUT=720
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NVTE_FWD_LAYERNORM_SM_MARGIN=16
export NVTE_BWD_LAYERNORM_SM_MARGIN=16
export NCCL_P2P_NET_CHUNKSIZE=2097152
export NCCL_DEBUG=WARN


COMMON_OPTIONS="\
    --tensor-model-parallel-size $TP  \
    --pipeline-model-parallel-size $PP  \
    --use-mcore-models \
    --transformer-impl transformer_engine \
    --${PRECISION:-bf16} \
    --te-rng-tracker \
    --inference-dynamic-batching-buffer-size-gb 20 \
    --data-parallel-random-init \
    --attention-backend flash \
    --timing-log-level 1 \
    --log-timers-to-tensorboard \
    --save-retain-interval 160 \
    --inference-dynamic-batching-num-cuda-graphs 1 \
    --inference-dynamic-batching-unified-memory-level 1 \
    --adam-beta1 0.9 \
    --adam-beta2 ${ADAM_BETA2:-0.95} \
    --adam-eps 1e-8 \
    "

if [ ${LOWER_PRECISION:-false} == true ]; then
    echo "Lower precision experiments, disabling cuda graphs."
    ENABLE_CUDA_GRAPH=false
    COMMON_OPTIONS="${COMMON_OPTIONS} --no-gradient-accumulation-fusion"
else 
    COMMON_OPTIONS="${COMMON_OPTIONS}"
fi

if [ ${ENABLE_CUDA_GRAPH:-true} == true ]; then
    COMMON_OPTIONS="${COMMON_OPTIONS} --cuda-graph-impl=local --rl-persist-cuda-graphs"
fi


================================================
FILE: examples/rl/model_configs/llama3p1_8b_instruct.sh
================================================
#!/bin/bash
TP=${TP:-8}
PP=${PP:-1}
NODES_REQUIRED=${NODES_REQUIRED:-4}
LLM="llama3p1_8b_instruct"
EXTRAS=""

echo "Using Llama 3.1 8B Instruct model checkpoint"
SCRIPT_PATH="${BASH_SOURCE[0]}"
source $(dirname $SCRIPT_PATH)/common.sh

# In all cases, one can override those values.
# However, running without envs will give you some
# good perf out of the box for established envs.
if [ "$(basename "$ENV_CONFIG")" = "dapo.yaml" ]; then
  echo "Using DAPO environment config"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-64}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  ENTROPY_WEIGHT=${ENTROPY_WEIGHT:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-1024}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-2}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-8192}
  EXIT_INTERVAL=${EXIT_INTERVAL:-16}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
elif [ "$(basename "$ENV_CONFIG")" = "openmathinstructv2.yaml" ]; then
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-64}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-32}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.01"}
  ENTROPY_WEIGHT=${ENTROPY_WEIGHT:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-512}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-4096}
  EXIT_INTERVAL=${EXIT_INTERVAL:-16}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
  EXTRAS="--lr-warmup-samples 5120"
else
  # Some default values if config is missing.
  echo "Undected environment config, using default values"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-32}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-4096}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-32}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.01"}
  ENTROPY_WEIGHT=${ENTROPY_WEIGHT:-"0.0"}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-512}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
  EXIT_INTERVAL=${EXIT_INTERVAL:-16}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
fi

ENV_DEPENDENT="\
  --micro-batch-size $MICRO_BATCH_SIZE \
  --global-batch-size $TRAINING_BATCH_SIZE \
  --grpo-group-size $GRPO_GROUP_SIZE \
  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
  --grpo-iterations $GRPO_ITERATIONS \
  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
  --grpo-kl-beta $GRPO_KL_BETA \
  --grpo-entropy-term-weight $ENTROPY_WEIGHT \
  --langrl-env-config $ENV_CONFIG "

MODEL_OPTIONS="\
  --disable-bias-linear \
  --ckpt-format torch_dist \
  --seq-length $MAX_SEQ_LENGTH \
  --inference-max-seq-length $MAX_SEQ_LENGTH \
  --inference-max-requests $MAX_INFERENCE_BS \
  --pretrained-checkpoint $CHECKPOINT \
  --add-qkv-bias \
  --normalization RMSNorm \
  --group-query-attention \
  --num-query-groups 8 \
  --no-masked-softmax-fusion \
  --attention-softmax-in-fp32 \
  --attention-dropout 0.0 \
  --hidden-dropout 0.0 \
  --untie-embeddings-and-output-weights \
  --weight-decay 0.1 \
  --position-embedding-type rope \
  --rotary-percent 1.0 \
  --rotary-base 500000 \
  --use-rotary-position-embeddings \
  --swiglu \
  --num-layers 32  \
  --hidden-size 4096  \
  --ffn-hidden-size 14336 \
  --num-attention-heads 32  \
  --max-position-embeddings 131072  \
  --tokenizer-type HuggingFaceTokenizer \
  --tokenizer-model unsloth/Meta-Llama-3.1-8B-Instruct \
  --tokenizer-hf-use-fast \
  --tokenizer-hf-include-special-tokens \
  --lr 3e-7 \
  --make-vocab-size-divisible-by 128 \
  --clip-grad 1.0 \
  --rl-use-sequence-packing \
  --rl-sequence-packing-algo fifo \
  $EXTRAS"


================================================
FILE: examples/rl/model_configs/nemotron5_56b.sh
================================================
#!/bin/bash
TP=${TP:-8}
PP=${PP:-1}
NODES_REQUIRED=${NODES_REQUIRED:-2}
LLM="nemotron5_56b"

echo "Using Nemotron5 56B model checkpoint"
SCRIPT_PATH="${BASH_SOURCE[0]}"
source $(dirname $SCRIPT_PATH)/common.sh

# In all cases, one can override those values.
# However, running without envs will give you some
# good perf out of the box for established envs.
if [ "$(basename "$ENV_CONFIG")" = "dapo.yaml" ]; then
  echo "Using DAPO environment config"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.28}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-32}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-1024}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-12000}
  EXIT_INTERVAL=${EXIT_INTERVAL:-16}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
else
  # Some default values if config is unsupported.
  echo "Undected environment config, using default values"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-64}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-32}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-512}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-12000}
  EXIT_INTERVAL=${EXIT_INTERVAL:-16}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
fi

ENV_DEPENDENT="\
  --micro-batch-size $MICRO_BATCH_SIZE \
  --global-batch-size $TRAINING_BATCH_SIZE \
  --grpo-group-size $GRPO_GROUP_SIZE \
  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
  --grpo-iterations $GRPO_ITERATIONS \
  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
  --grpo-kl-beta $GRPO_KL_BETA \
  --langrl-env-config $ENV_CONFIG "

# Original Qwen model uses a wrong padding_id token. unsloth tokenizer fixes it.
MODEL_OPTIONS="\
  --calculate-per-token-loss \
  --seq-length $MAX_SEQ_LENGTH \
  --inference-max-seq-length $MAX_SEQ_LENGTH \
  --inference-max-requests $MAX_INFERENCE_BS \
  --pretrained-checkpoint $CHECKPOINT \
 --fp8-format hybrid \
    --fp8-amax-history-len 1 \
    --fp8-amax-compute-algo max \
    --fp8-interval 1 \
    --fp8-margin 0 \
    --first-last-layers-bf16 \
    \
    --fp8-recipe tensorwise \
    --hybrid-layer-pattern M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M- \
    --spec megatron.core.models.mamba.mamba_layer_specs mamba_stack_spec \
    --mamba-state-dim 256 \
    --per-split-data-args-path ${BLEND_PATH} \
    --tiktoken-pattern v2 \
    --distributed-timeout-minutes 60 \
    --use-mcore-models \
    --no-mmap-bin-files \
    --sequence-parallel \
    --untie-embeddings-and-output-weights \
    --disable-bias-linear \
    --init-method-std 0.0099 \
    --position-embedding-type none \
    --squared-relu \
    --hidden-size 8192 \
    --num-attention-heads 64 \
    --group-query-attention \
    --num-query-groups 8 \
    --ffn-hidden-size 32768 \
    --kv-channels 128 \
    --normalization RMSNorm \
    --attention-dropout 0.0 \
    --hidden-dropout 0.0 \
    --exit-duration-in-mins 5750 \
    --seq-length 8191 \
    --max-position-embeddings 8192 \
  --tensor-model-parallel-size $TP  \
  --pipeline-model-parallel-size $PP  \
  --no-masked-softmax-fusion \
  --attention-softmax-in-fp32 \
   --weight-decay 0.1 \
    --clip-grad 1.0 \
    --tokenizer-type TikTokenizer \
    --tokenizer-model ${TOKENIZER_MODEL} \
     --position-embedding-type none \
     --dist-ckpt-strictness log_unexpected \
      --ckpt-format torch_dist \
--ckpt-fully-parallel-save \
    --ckpt-fully-parallel-load \
     --async-save \
    --ckpt-assume-constant-structure \
    --log-progress  \
    --timing-log-option minmax \
    --log-params-norm \
    --log-num-zeros-in-grad \
    --log-throughput \
--use-distributed-optimizer \
    --overlap-grad-reduce \
    --overlap-param-gather \
    --no-create-attention-mask-in-dataloader \
  --lr 1e-6 \
  --model-temperature 1.2 \
  --lr-warmup-samples 0 \
  "


================================================
FILE: examples/rl/model_configs/nemotron5_8b.sh
================================================
#!/bin/bash
TP=${TP:-8}
PP=${PP:-1}
NODES_REQUIRED=${NODES_REQUIRED:-2}
LLM="nemotron5_8b"

echo "Using Nemotron5 8B model checkpoint"
SCRIPT_PATH="${BASH_SOURCE[0]}"
source $(dirname $SCRIPT_PATH)/common.sh

# In all cases, one can override those values.
# However, running without envs will give you some
# good perf out of the box for established envs.
if [ "$(basename "$ENV_CONFIG")" = "dapo.yaml" ]; then
  echo "Using DAPO environment config"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.28}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-32}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-1024}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-12000}
  EXIT_INTERVAL=${EXIT_INTERVAL:-16}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
else
  # Some default values if config is unsupported.
  echo "Undected environment config, using default values"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-64}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-32}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-512}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-12000}
  EXIT_INTERVAL=${EXIT_INTERVAL:-16}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
fi

ENV_DEPENDENT="\
  --micro-batch-size $MICRO_BATCH_SIZE \
  --global-batch-size $TRAINING_BATCH_SIZE \
  --grpo-group-size $GRPO_GROUP_SIZE \
  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
  --grpo-iterations $GRPO_ITERATIONS \
  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
  --grpo-kl-beta $GRPO_KL_BETA \
  --langrl-env-config $ENV_CONFIG "

# Original Qwen model uses a wrong padding_id token. unsloth tokenizer fixes it.
MODEL_OPTIONS="\
  --calculate-per-token-loss \
  --seq-length $MAX_SEQ_LENGTH \
  --inference-max-seq-length $MAX_SEQ_LENGTH \
  --inference-max-requests $MAX_INFERENCE_BS \
  --pretrained-checkpoint $CHECKPOINT \
  --hybrid-layer-pattern M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M*-M-M-M-M-M- \
    --spec megatron.core.models.mamba.mamba_layer_specs mamba_stack_spec \
    --tiktoken-pattern v2 \
    --distributed-timeout-minutes 60 \
    --use-mcore-models \
    --no-mmap-bin-files \
    --untie-embeddings-and-output-weights \
    --disable-bias-linear \
  --normalization RMSNorm \
  --norm-epsilon 1e-5 \
   --init-method-std 0.014 \
    --position-embedding-type none \
    --squared-relu \
    --hidden-size 4096 \
    --num-attention-heads 32 \
    --group-query-attention \
    --num-query-groups 8 \
    --ffn-hidden-size 21504 \
    --kv-channels 128 \
    --normalization RMSNorm \
    --attention-dropout 0.0 \
    --hidden-dropout 0.0 \
    --exit-duration-in-mins 5750 \
    --seq-length 8191 \
    --max-position-embeddings 8192 \
  --tensor-model-parallel-size $TP  \
  --pipeline-model-parallel-size $PP  \
  --no-masked-softmax-fusion \
  --attention-softmax-in-fp32 \
   --weight-decay 0.1 \
    --clip-grad 1.0 \
    --tokenizer-type TikTokenizer \
    --tokenizer-model ${TOKENIZER_MODEL} \
    --no-use-tokenizer-model-from-checkpoint-args \
     --position-embedding-type none \
     --dist-ckpt-strictness log_unexpected \
      --ckpt-format torch_dist \
--ckpt-fully-parallel-save \
    --ckpt-fully-parallel-load \
--use-distributed-optimizer \
    --overlap-grad-reduce \
    --overlap-param-gather \
    --no-create-attention-mask-in-dataloader \
  --lr 1e-6 \
  --lr-warmup-samples 0 \
  "


================================================
FILE: examples/rl/model_configs/nemotron5p5_12b_H.sh
================================================
#!/bin/bash
TP=${TP:-4}
PP=${PP:-1}
NODES_REQUIRED=${NODES_REQUIRED:-2}
LLM="nemotron5p5_12b_H"

echo "Using Nemotron5p5 12B model checkpoint"

export LOWER_PRECISION=true
export SEQUENCE_PARALLEL=false
echo "Lower precision: $LOWER_PRECISION"
echo "Sequence parallel: $SEQUENCE_PARALLEL"

SCRIPT_PATH="${BASH_SOURCE[0]}"
source $(dirname $SCRIPT_PATH)/common.sh


# In all cases, one can override those values.
# However, running without envs will give you some
# good perf out of the box for established envs.
if [ "$(basename "$ENV_CONFIG")" = "dapo.yaml" ]; then
  echo "Using DAPO environment config"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.28}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-32}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-1024}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-12000}
  EXIT_INTERVAL=${EXIT_INTERVAL:-16}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
else
  # Some default values if config is unsupported.
  echo "Undected environment config, using default values"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-64}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-32}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-512}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-12000}
  EXIT_INTERVAL=${EXIT_INTERVAL:-16}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
fi

ENV_DEPENDENT="\
  --micro-batch-size $MICRO_BATCH_SIZE \
  --global-batch-size $TRAINING_BATCH_SIZE \
  --grpo-group-size $GRPO_GROUP_SIZE \
  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
  --grpo-iterations $GRPO_ITERATIONS \
  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
  --grpo-kl-beta $GRPO_KL_BETA \
  --langrl-env-config $ENV_CONFIG "

# Original Qwen model uses a wrong padding_id token. unsloth tokenizer fixes it.
MODEL_OPTIONS="\
  --calculate-per-token-loss \
  --seq-length $MAX_SEQ_LENGTH \
  --inference-max-seq-length $MAX_SEQ_LENGTH \
  --inference-max-requests $MAX_INFERENCE_BS \
  --pretrained-checkpoint $CHECKPOINT \
  --fp8-recipe blockwise \
  --fp8-format e4m3 \
  --first-last-layers-bf16 \
  --num-layers-at-start-in-bf16 2 \
  --num-layers-at-end-in-bf16 2 \
  --fp8-param-gather \
  --disable-gloo-process-groups \
  --mamba-head-dim 80 \
  --hybrid-layer-pattern M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M- \
  --spec megatron.core.models.mamba.mamba_layer_specs mamba_stack_spec \
  --tiktoken-pattern v2 \
  --distributed-timeout-minutes 10 \
  --use-mcore-models \
  --no-mmap-bin-files \
  --untie-embeddings-and-output-weights \
  --disable-bias-linear \
  --normalization RMSNorm \
  --norm-epsilon 1e-5 \
   --init-method-std 0.0125 \
    --position-embedding-type none \
  --squared-relu \
    --hidden-size 5120 \
    --num-attention-heads 40 \
    --group-query-attention \
    --num-query-groups 8 \
    --ffn-hidden-size 20480 \
    --kv-channels 128 \
    --normalization RMSNorm \
    --attention-dropout 0.0 \
    --hidden-dropout 0.0 \
    --exit-duration-in-mins 5750 \
    --seq-length 8191 \
    --max-position-embeddings 8192 \
    --tensor-model-parallel-size $TP  \
    --pipeline-model-parallel-size $PP  \
    --no-masked-softmax-fusion \
    --attention-softmax-in-fp32 \
    --lr 4.5e-6 \
    --min-lr 4.5e-7 \
    --weight-decay 0.1 \
    --clip-grad 1.0 \
    --tokenizer-type TikTokenizer \
    --tokenizer-model ${TOKENIZER_MODEL} \
    --save-interval 2000 \
    --ckpt-format torch_dist \
    --ckpt-fully-parallel-save \
    --ckpt-fully-parallel-load \
    --ckpt-assume-constant-structure \
    --log-progress  \
    --timing-log-option minmax \
    --log-params-norm \
    --log-num-zeros-in-grad \
    --log-throughput \
    --bf16 \
    --adam-beta1 0.9 \
    --adam-beta2 0.95 \
    --use-distributed-optimizer \
    --ddp-num-buckets 5 \
    --overlap-grad-reduce \
    --overlap-param-gather \
    --no-create-attention-mask-in-dataloader \
    --manual-gc \
    --num-workers 1 \
    --log-straggler \
    --disable-straggler-on-startup \
    --straggler-minmax-count 16 \
    --check-weight-hash-across-dp-replicas-interval 20000 \
    --rerun-mode disabled \
    --rl-default-temperature 0.9 \
    --rl-default-top-p 0.95 \
  "


================================================
FILE: examples/rl/model_configs/nemotron6_3b_moe.sh
================================================
#!/bin/bash
TP=${TP:-2}
PP=${PP:-1}
EP=${EP:-32}
NODES_REQUIRED=${NODES_REQUIRED:-4}
LLM="nemotron6_3b_moe"

echo "Using Nemotron6 3B MOE model checkpoint"
SCRIPT_PATH="${BASH_SOURCE[0]}"
source $(dirname $SCRIPT_PATH)/common.sh

# In all cases, one can override those values.
# However, running without envs will give you some
# good perf out of the box for established envs.
if [ "$(basename "$ENV_CONFIG")" = "dapo.yaml" ]; then
  echo "Using DAPO environment config"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.28}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-32}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-1024}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-11999}
  EXIT_INTERVAL=${EXIT_INTERVAL:-20}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-20}
else
  # Some default values if config is unsupported.
  echo "Undected environment config, using default values"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.28}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-64}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-2}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-16}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-32}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-1024}
  EXIT_INTERVAL=${EXIT_INTERVAL:-20}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-20}
fi

ENV_DEPENDENT="\
  --micro-batch-size $MICRO_BATCH_SIZE \
  --global-batch-size $TRAINING_BATCH_SIZE \
  --grpo-group-size $GRPO_GROUP_SIZE \
  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
  --grpo-iterations $GRPO_ITERATIONS \
  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
  --grpo-kl-beta $GRPO_KL_BETA \
  --langrl-env-config $ENV_CONFIG "

MODEL_OPTIONS="\
  --no-use-tokenizer-model-from-checkpoint-args \
  --rl-skip-bos-token \
  --no-rl-use-sequence-packing \
  --rl-partial-rollouts \
  --moe-pad-experts-for-cuda-graph-inference \
  --inference-dynamic-batching-max-tokens 8192 \
  --inference-dynamic-batching-max-requests 128 \
  --inference-dynamic-batching-num-cuda-graphs 2 \
  --decode-only-cuda-graphs \
  --cuda-graph-impl local \
  --cuda-graph-scope full \
  --use-checkpoint-args \
  --enable-experimental \
  --cross-entropy-loss-fusion \
  --cross-entropy-fusion-impl native \
  --moe-aux-loss-coeff 0.0 \
  --moe-router-dtype fp64 \
  --moe-router-load-balancing-type aux_loss \
  --moe-router-score-function sigmoid \
  --moe-token-dispatcher-type alltoall \
  --moe-router-enable-expert-bias \
  --moe-router-topk-scaling-factor 2.5 \
  --disable-gloo-process-groups \
  --rl-default-top-k -1 \
  --rl-default-temperature 1.0 \
  --rl-default-top-p 1.0 \
  --rl-inference-logprobs-is-correction \
  --rl-importance-sampling-truncation-coef 10.0 \
  --seq-length $MAX_SEQ_LENGTH \
  --inference-max-seq-length $MAX_SEQ_LENGTH \
  --inference-max-requests $MAX_INFERENCE_BS \
  --pretrained-checkpoint $CHECKPOINT \
  --distributed-timeout-minutes 60 \
  --use-mcore-models \
  --no-mmap-bin-files \
  --disable-bias-linear \
  --norm-epsilon 1e-5 \
  --init-method-std 0.014 \
  --exit-duration-in-mins 5750 \
  --max-position-embeddings $MAX_SEQ_LENGTH \
  --tensor-model-parallel-size $TP  \
  --pipeline-model-parallel-size $PP  \
  --expert-model-parallel-size $EP \
  --expert-tensor-parallel-size 1 \
  --weight-decay 0.01 \
  --clip-grad 1.0 \
  --tiktoken-pattern v2 \
  --tokenizer-type HuggingFaceTokenizer \
  --tokenizer-model ${TOKENIZER_MODEL} \
  --tokenizer-hf-include-special-tokens \
  --dist-ckpt-strictness log_unexpected \
  --ckpt-format torch_dist \
  --ckpt-fully-parallel-save \
  --ckpt-fully-parallel-load \
  --use-distributed-optimizer \
  --overlap-grad-reduce \
  --overlap-param-gather \
  --no-create-attention-mask-in-dataloader \
  --lr 3e-6 \
  --min-lr 3e-6 \
  --lr-decay-style constant \
  --lr-warmup-samples 640 \
  --lr-warmup-init 0.3e-7 \
  --no-load-optim \
  --no-load-rng "


================================================
FILE: examples/rl/model_configs/qwen3_30b_a3b_moe.sh
================================================
#!/bin/bash 

TP=${TP:-4}
PP=${PP:-1}
NODES_REQUIRED=${NODES_REQUIRED:-1}

echo "Using Qwen3-30B-A3B model checkpoint"
SCRIPT_PATH="${BASH_SOURCE[0]}"
source $(dirname $SCRIPT_PATH)/common.sh

# Default values
GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-32}
GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-256}
MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-8192}
EXIT_INTERVAL=${EXIT_INTERVAL:-20}
CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-20}

ENV_DEPENDENT="\
  --micro-batch-size $MICRO_BATCH_SIZE \
  --global-batch-size $TRAINING_BATCH_SIZE \
  --grpo-group-size $GRPO_GROUP_SIZE \
  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
  --grpo-iterations $GRPO_ITERATIONS \
  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
  --grpo-kl-beta $GRPO_KL_BETA \
  --langrl-env-config $ENV_CONFIG "


MODEL_OPTIONS="
--seq-length $MAX_SEQ_LENGTH \
--inference-max-seq-length $MAX_SEQ_LENGTH \
--inference-max-requests $MAX_INFERENCE_BS \
--pretrained-checkpoint $CHECKPOINT \
--no-use-tokenizer-model-from-checkpoint-args \
--seq-length 8192 \
--inference-max-seq-length 8192 \
--bf16 \
--tensor-model-parallel-size $TP  \
--pipeline-model-parallel-size $PP  \
--expert-model-parallel-size $EP \
--attention-backend flash \
--transformer-impl transformer_engine \
--te-rng-tracker \
--tokenizer-type HuggingFaceTokenizer \
--tokenizer-model Qwen/Qwen3-30B-A3B \
--tokenizer-hf-include-special-tokens \
--untie-embeddings-and-output-weights \
--num-layers 48 \
--hidden-size 2048 \
--ffn-hidden-size 6144 \
--num-attention-heads 32 \
--kv-channels 128 \
--max-position-embeddings 8192 \
--group-query-attention \
--num-query-groups 4 \
--normalization RMSNorm \
--norm-epsilon 1e-6 \
--position-embedding-type rope \
--rotary-percent 1.0 \
--rotary-base 1000000 \
--use-rotary-position-embeddings \
--swiglu \
--disable-bias-linear \
--num-experts 128 \
--moe-router-topk 8 \
--moe-ffn-hidden-size 768 \
--moe-aux-loss-coeff 0.001 \
--moe-router-load-balancing-type aux_loss \
--attention-dropout 0.0 \
--hidden-dropout 0.0 \
--no-masked-softmax-fusion \
--attention-softmax-in-fp32 \
--vocab-size 151936 \
--make-vocab-size-divisible-by 128 \
--dist-ckpt-strictness log_unexpected \
--qk-layernorm \
--moe-token-dispatcher-type alltoall \
--moe-layer-freq 1 \
--optimizer adam \
--adam-beta1 0.9 \
--adam-beta2 0.999 \
--adam-eps 1e-8 \
--lr 1e-6 \
--min-lr 1e-7 \
--lr-warmup-samples 0 \
--clip-grad 1.0 \
--weight-decay 0.01 \
--no-load-optim \
--ckpt-format torch_dist
"


================================================
FILE: examples/rl/model_configs/qwen3_32b.sh
================================================
#!/bin/bash

TP=${TP:-4}
PP=${PP:-1}
NODES_REQUIRED=${NODES_REQUIRED:-1}

echo "Using Qwen3 32B model checkpoint"
SCRIPT_PATH="${BASH_SOURCE[0]}"
source $(dirname $SCRIPT_PATH)/common.sh

# Default values
GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-32}
GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-256}
MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-8192}
EXIT_INTERVAL=${EXIT_INTERVAL:-16}
CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}

ENV_DEPENDENT="\
  --micro-batch-size $MICRO_BATCH_SIZE \
  --global-batch-size $TRAINING_BATCH_SIZE \
  --grpo-group-size $GRPO_GROUP_SIZE \
  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
  --grpo-iterations $GRPO_ITERATIONS \
  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
  --grpo-kl-beta $GRPO_KL_BETA \
  --langrl-env-config $ENV_CONFIG "

# Model configuration based on MegatronBridge run_config.yaml
MODEL_OPTIONS="\
  --ckpt-format torch_dist \
  --seq-length $MAX_SEQ_LENGTH \
  --inference-max-seq-length $MAX_SEQ_LENGTH \
  --inference-max-requests $MAX_INFERENCE_BS \
  --pretrained-checkpoint $CHECKPOINT \
  --untie-embeddings-and-output-weights \
  --num-layers 64 \
  --hidden-size 5120 \
  --ffn-hidden-size 25600 \
  --num-attention-heads 64 \
  --kv-channels 128 \
  --max-position-embeddings 40960 \
  --group-query-attention \
  --num-query-groups 8 \
  --normalization RMSNorm \
  --norm-epsilon 1e-6 \
  --qk-layernorm \
  --position-embedding-type rope \
  --rotary-percent 1.0 \
  --rotary-base 1000000 \
  --use-rotary-position-embeddings \
  --swiglu \
  --disable-bias-linear \
  --attention-dropout 0.0 \
  --hidden-dropout 0.0 \
  --no-masked-softmax-fusion \
  --attention-softmax-in-fp32 \
  --tokenizer-type HuggingFaceTokenizer \
  --tokenizer-model Qwen/Qwen3-4B \
  --tokenizer-hf-include-special-tokens \
  --vocab-size 151936 \
  --make-vocab-size-divisible-by 128 \
  --optimizer adam \
  --adam-beta1 0.9 \
  --adam-beta2 0.999 \
  --adam-eps 1e-8 \
  --lr 1e-6 \
  --min-lr 1e-7 \
  --lr-warmup-samples 0 \
  --clip-grad 1.0 \
  --weight-decay 0.01 \
  --recompute-granularity selective \
  --recompute-activations \
  --recompute-modules core_attn \
  "


================================================
FILE: examples/rl/model_configs/qwen3_4b.sh
================================================
#!/bin/bash

TP=${TP:-1}
PP=${PP:-1}
NODES_REQUIRED=${NODES_REQUIRED:-1}

echo "Using Qwen3 4B model checkpoint"
SCRIPT_PATH="${BASH_SOURCE[0]}"
source $(dirname $SCRIPT_PATH)/common.sh

# Default values
GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-32}
GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-256}
MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-32768}
EXIT_INTERVAL=${EXIT_INTERVAL:-16}
CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}

ENV_DEPENDENT="\
  --micro-batch-size $MICRO_BATCH_SIZE \
  --global-batch-size $TRAINING_BATCH_SIZE \
  --grpo-group-size $GRPO_GROUP_SIZE \
  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
  --grpo-iterations $GRPO_ITERATIONS \
  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
  --grpo-kl-beta $GRPO_KL_BETA \
  --langrl-env-config $ENV_CONFIG "

# Model configuration based on MegatronBridge run_config.yaml
MODEL_OPTIONS="\
  --ckpt-format torch_dist \
  --seq-length $MAX_SEQ_LENGTH \
  --inference-max-seq-length $MAX_SEQ_LENGTH \
  --inference-max-requests $MAX_INFERENCE_BS \
  --pretrained-checkpoint $CHECKPOINT \
  --num-layers 36 \
  --hidden-size 2560 \
  --ffn-hidden-size 9728 \
  --num-attention-heads 32 \
  --kv-channels 128 \
  --max-position-embeddings 40960 \
  --group-query-attention \
  --num-query-groups 8 \
  --normalization RMSNorm \
  --norm-epsilon 1e-6 \
  --qk-layernorm \
  --position-embedding-type rope \
  --rotary-percent 1.0 \
  --rotary-base 1000000 \
  --use-rotary-position-embeddings \
  --swiglu \
  --disable-bias-linear \
  --attention-dropout 0.0 \
  --hidden-dropout 0.0 \
  --no-masked-softmax-fusion \
  --attention-softmax-in-fp32 \
  --tokenizer-type HuggingFaceTokenizer \
  --tokenizer-model Qwen/Qwen3-4B \
  --vocab-size 151936 \
  --make-vocab-size-divisible-by 128 \
  --optimizer adam \
  --adam-beta1 0.9 \
  --adam-beta2 0.999 \
  --adam-eps 1e-8 \
  --lr 1e-6 \
  --min-lr 1e-7 \
  --lr-warmup-samples 0 \
  --clip-grad 1.0 \
  --weight-decay 0.01 \
  --recompute-granularity selective \
  --recompute-activations \
  --recompute-modules core_attn \
  "


================================================
FILE: examples/rl/model_configs/qwen3_8b.sh
================================================
#!/bin/bash

TP=${TP:-1}
PP=${PP:-1}
NODES_REQUIRED=${NODES_REQUIRED:-1}

echo "Using Qwen3 8B model checkpoint"
SCRIPT_PATH="${BASH_SOURCE[0]}"
source $(dirname $SCRIPT_PATH)/common.sh

# Default values
GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-32}
GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-256}
MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-32768}
EXIT_INTERVAL=${EXIT_INTERVAL:-16}
CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}

ENV_DEPENDENT="\
  --micro-batch-size $MICRO_BATCH_SIZE \
  --global-batch-size $TRAINING_BATCH_SIZE \
  --grpo-group-size $GRPO_GROUP_SIZE \
  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
  --grpo-iterations $GRPO_ITERATIONS \
  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
  --grpo-kl-beta $GRPO_KL_BETA \
  --langrl-env-config $ENV_CONFIG "

# Model configuration based on MegatronBridge run_config.yaml
MODEL_OPTIONS="\
  --ckpt-format torch_dist \
  --seq-length $MAX_SEQ_LENGTH \
  --inference-max-seq-length $MAX_SEQ_LENGTH \
  --inference-max-requests $MAX_INFERENCE_BS \
  --pretrained-checkpoint $CHECKPOINT \
  --untie-embeddings-and-output-weights \
  --num-layers 36 \
  --hidden-size 4096 \
  --ffn-hidden-size 12288 \
  --num-attention-heads 32 \
  --kv-channels 128 \
  --max-position-embeddings 40960 \
  --group-query-attention \
  --num-query-groups 8 \
  --normalization RMSNorm \
  --norm-epsilon 1e-6 \
  --qk-layernorm \
  --position-embedding-type rope \
  --rotary-percent 1.0 \
  --rotary-base 1000000 \
  --use-rotary-position-embeddings \
  --swiglu \
  --disable-bias-linear \
  --attention-dropout 0.0 \
  --hidden-dropout 0.0 \
  --no-masked-softmax-fusion \
  --attention-softmax-in-fp32 \
  --tokenizer-type HuggingFaceTokenizer \
  --tokenizer-model Qwen/Qwen3-8B \
  --vocab-size 151936 \
  --make-vocab-size-divisible-by 128 \
  --optimizer adam \
  --adam-beta1 0.9 \
  --adam-beta2 0.999 \
  --adam-eps 1e-8 \
  --lr 1e-6 \
  --min-lr 1e-7 \
  --lr-warmup-samples 0 \
  --clip-grad 1.0 \
  --weight-decay 0.01 \
  --recompute-granularity selective \
  --recompute-activations \
  --recompute-modules core_attn \
  "


================================================
FILE: examples/rl/model_configs/qwen_2p5_32b.sh
================================================
#!/bin/bash
TP=${TP:-8}
PP=${PP:-1}
NODES_REQUIRED=${NODES_REQUIRED:-8}
LLM="qwen2p5_32b"

echo "Using Qwen 2.5 32B model checkpoint"
SCRIPT_PATH="${BASH_SOURCE[0]}"
source $(dirname $SCRIPT_PATH)/common.sh

# In all cases, one can override those values.
# However, running without envs will give you some
# good perf out of the box for established envs.
if [ "$(basename "$ENV_CONFIG")" = "dapo.yaml" ]; then
  echo "Using DAPO environment config"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.28}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-32}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-1024}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-12000}
  EXIT_INTERVAL=${EXIT_INTERVAL:-16}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
else
  # Some default values if config is unsupported.
  echo "Undected environment config, using default values"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-64}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-32}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-512}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-12000}
  EXIT_INTERVAL=${EXIT_INTERVAL:-16}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
fi

ENV_DEPENDENT="\
  --micro-batch-size $MICRO_BATCH_SIZE \
  --global-batch-size $TRAINING_BATCH_SIZE \
  --grpo-group-size $GRPO_GROUP_SIZE \
  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
  --grpo-iterations $GRPO_ITERATIONS \
  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
  --grpo-kl-beta $GRPO_KL_BETA \
  --langrl-env-config $ENV_CONFIG "

# Original Qwen model uses a wrong padding_id token. unsloth tokenizer fixes it.
MODEL_OPTIONS="\
  --calculate-per-token-loss \
  --ckpt-format torch_dist \
  --seq-length $MAX_SEQ_LENGTH \
  --inference-max-seq-length $MAX_SEQ_LENGTH \
  --inference-max-requests $MAX_INFERENCE_BS \
  --pretrained-checkpoint $CHECKPOINT \
  --untie-embeddings-and-output-weights \
  --disable-bias-linear \
  --add-qkv-bias \
  --normalization RMSNorm \
  --norm-epsilon 1e-5 \
  --group-query-attention \
  --num-query-groups 8 \
  --no-masked-softmax-fusion \
  --attention-softmax-in-fp32 \
  --attention-dropout 0.0 \
  --hidden-dropout 0.0 \
  --weight-decay 0.0 \
  --position-embedding-type rope \
  --rotary-percent 1.0 \
  --rotary-base 1000000 \
  --use-rotary-position-embeddings \
  --swiglu \
  --num-layers 64  \
  --hidden-size 5120  \
  --ffn-hidden-size 27648 \
  --num-attention-heads 40  \
  --max-position-embeddings 131072 \
  --tokenizer-type HuggingFaceTokenizer \
  --tokenizer-model unsloth/Qwen2.5-32B \
  --tokenizer-hf-include-special-tokens \
  --lr 1e-6 \
  --lr-warmup-samples 0 \
  --make-vocab-size-divisible-by 128 \
  --clip-grad 1.0 \
  --recompute-granularity selective \
  --recompute-activations "


================================================
FILE: examples/rl/model_configs/qwen_2p5_3b.sh
================================================
#!/bin/bash
TP=${TP:-2}
PP=${PP:-1}
NODES_REQUIRED=${NODES_REQUIRED:-2}
LLM="qwen2p5_3b"

echo "Using Qwen 2.5 3B model checkpoint"
SCRIPT_PATH="${BASH_SOURCE[0]}"
source $(dirname $SCRIPT_PATH)/common.sh

# In all cases, one can override those values.
# However, running without envs will give you some
# good perf out of the box for established envs.
if [ "$(basename "$ENV_CONFIG")" = "dapo.yaml" ]; then
  echo "Using DAPO environment config"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-64}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  ENTROPY_WEIGHT=${ENTROPY_WEIGHT:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-1024}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-8192}
  EXIT_INTERVAL=${EXIT_INTERVAL:-16}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
else
  # Some default values if config is unsupported.
  echo "Undected environment config, using default values"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-64}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-32}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  ENTROPY_WEIGHT=${ENTROPY_WEIGHT:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-512}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-8192}
  EXIT_INTERVAL=${EXIT_INTERVAL:-16}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
fi

ENV_DEPENDENT="\
  --micro-batch-size $MICRO_BATCH_SIZE \
  --global-batch-size $TRAINING_BATCH_SIZE \
  --grpo-group-size $GRPO_GROUP_SIZE \
  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
  --grpo-iterations $GRPO_ITERATIONS \
  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
  --grpo-kl-beta $GRPO_KL_BETA \
  --grpo-entropy-term-weight $ENTROPY_WEIGHT \
  --langrl-env-config $ENV_CONFIG "

# Original Qwen model uses a wrong padding_id token. unsloth tokenizer fixes it.
MODEL_OPTIONS="\
  --calculate-per-token-loss \
  --ckpt-format torch_dist \
  --seq-length $MAX_SEQ_LENGTH \
  --inference-max-seq-length $MAX_SEQ_LENGTH \
  --inference-max-requests $MAX_INFERENCE_BS \
  --pretrained-checkpoint $CHECKPOINT \
  --disable-bias-linear \
  --add-qkv-bias \
  --normalization RMSNorm \
  --norm-epsilon 1e-6 \
  --group-query-attention \
  --num-query-groups 2 \
  --no-masked-softmax-fusion \
  --attention-softmax-in-fp32 \
  --attention-dropout 0.0 \
  --hidden-dropout 0.0 \
  --weight-decay 0.0 \
  --position-embedding-type rope \
  --rotary-percent 1.0 \
  --rotary-base 1000000 \
  --use-rotary-position-embeddings \
  --swiglu \
  --num-layers 36  \
  --hidden-size 2048  \
  --ffn-hidden-size 11008 \
  --num-attention-heads 16  \
  --max-position-embeddings 32768  \
  --tokenizer-type HuggingFaceTokenizer \
  --tokenizer-model unsloth/Qwen2.5-3B \
  --tokenizer-hf-include-special-tokens \
  --lr 0.000001 \
  --lr-warmup-samples 0 \
  --make-vocab-size-divisible-by 64 \
  --clip-grad 1.0 \
  --rl-use-sequence-packing \
  --rl-sequence-packing-algo fifo"


================================================
FILE: examples/rl/model_configs/qwen_2p5_distill_7b.sh
================================================
#!/bin/bash
TP=${TP:-2}
PP=${PP:-1}
NODES_REQUIRED=${NODES_REQUIRED:-2}
LLM="qwen2p5_distill_7b"
echo "Using Qwen 2.5 DSR1 7B model checkpoint"
SCRIPT_PATH="${BASH_SOURCE[0]}"
source $(dirname $SCRIPT_PATH)/common.sh

# In all cases, one can override those values.
# However, running without envs will give you some
# good perf out of the box for established envs.
if true; then
  # Some default values if config is unsupported.
  echo "Undected environment config, using default values"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-64}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-8}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-128}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-1024}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-1}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-8192}
  EXIT_INTERVAL=${EXIT_INTERVAL:-32}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
fi

ENV_DEPENDENT="\
  --micro-batch-size $MICRO_BATCH_SIZE \
  --global-batch-size $TRAINING_BATCH_SIZE \
  --grpo-group-size $GRPO_GROUP_SIZE \
  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
  --grpo-iterations $GRPO_ITERATIONS \
  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
  --grpo-kl-beta $GRPO_KL_BETA \
  --langrl-env-config $ENV_CONFIG "

# Original Qwen model uses a wrong padding_id token. unsloth tokenizer fixes it.
MODEL_OPTIONS="\
  --calculate-per-token-loss \
  --ckpt-format torch \
  --seq-length $MAX_SEQ_LENGTH \
  --inference-max-seq-length $MAX_SEQ_LENGTH \
  --inference-max-requests $MAX_INFERENCE_BS \
  --pretrained-checkpoint $CHECKPOINT \
  --untie-embeddings-and-output-weights \
  --disable-bias-linear \
  --add-qkv-bias \
  --normalization RMSNorm \
  --norm-epsilon 1e-6 \
  --group-query-attention \
  --num-query-groups 4 \
  --no-masked-softmax-fusion \
  --attention-softmax-in-fp32 \
  --attention-dropout 0.0 \
  --hidden-dropout 0.0 \
  --weight-decay 0.0 \
  --position-embedding-type rope \
  --rotary-percent 1.0 \
  --rotary-base 10000 \
  --use-rotary-position-embeddings \
  --swiglu \
  --num-layers 28  \
  --hidden-size 3584  \
  --ffn-hidden-size 18944 \
  --num-attention-heads 28  \
  --max-position-embeddings 131072  \
  --tokenizer-type HuggingFaceTokenizer \
  --tokenizer-model "unsloth/DeepSeek-R1-Distill-Qwen-7B" \
  --lr 0.000001 \
  --lr-warmup-samples 0 \
  --make-vocab-size-divisible-by 128 \
  --clip-grad 1.0 "

RUN_REQUEST_ARGS="\
  --inference-type inplace_megatron_chat \
  --inference-server-conversation-template "unsloth/DeepSeek-R1-Distill-Qwen-7B" \
  $RUN_REQUEST_ARGS "


================================================
FILE: examples/rl/model_configs/qwen_2p5_math_7b.sh
================================================
#!/bin/bash
TP=${TP:-1}
PP=${PP:-1}
NODES_REQUIRED=${NODES_REQUIRED:-2}
LLM="qwen2p5_math_7b"
echo "Using Qwen 2.5 Math 7B model checkpoint"
SCRIPT_PATH="${BASH_SOURCE[0]}"
source $(dirname $SCRIPT_PATH)/common.sh

# In all cases, one can override those values.
# However, running without envs will give you some
# good perf out of the box for established envs.
if [ "$(basename "$ENV_CONFIG")" = "dapo.yaml" ]; then
  echo "Using DAPO environment config"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-64}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-64}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-1024}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-2}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-4096}
  EXIT_INTERVAL=${EXIT_INTERVAL:-16}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
else
  # Some default values if config is unsupported.
  echo "Undected environment config, using default values"
  GRPO_CLAMP_EPS_LOWER=${GRPO_CLAMP_EPS_LOWER:-0.2}
  GRPO_CLAMP_EPS_UPPER=${GRPO_CLAMP_EPS_UPPER:-0.2}
  MAX_INFERENCE_BS=${MAX_INFERENCE_BS:-64}
  GRPO_GROUP_SIZE=${GRPO_GROUP_SIZE:-16}
  GRPO_PROMPTS_PER_STEP=${GRPO_PROMPTS_PER_STEP:-32}
  GRPO_ITERATIONS=${GRPO_ITERATIONS:-1}
  GRPO_KL_BETA=${GRPO_KL_BETA:-"0.0"}
  TRAINING_BATCH_SIZE=${TRAINING_BATCH_SIZE:-512}
  MICRO_BATCH_SIZE=${MICRO_BATCH_SIZE:-2}
  MAX_SEQ_LENGTH=${MAX_SEQ_LENGTH:-4096}
  EXIT_INTERVAL=${EXIT_INTERVAL:-16}
  CHKPT_SAVE_INTERVAL=${CHKPT_SAVE_INTERVAL:-16}
fi

ENV_DEPENDENT="\
  --micro-batch-size $MICRO_BATCH_SIZE \
  --global-batch-size $TRAINING_BATCH_SIZE \
  --grpo-group-size $GRPO_GROUP_SIZE \
  --grpo-prompts-per-step $GRPO_PROMPTS_PER_STEP \
  --grpo-iterations $GRPO_ITERATIONS \
  --grpo-clamp-eps-lower $GRPO_CLAMP_EPS_LOWER \
  --grpo-clamp-eps-upper $GRPO_CLAMP_EPS_UPPER \
  --grpo-kl-beta $GRPO_KL_BETA \
  --langrl-env-config $ENV_CONFIG "

# Original Qwen model uses a wrong padding_id token. unsloth tokenizer fixes it.
MODEL_OPTIONS="\
  --calculate-per-token-loss \
  --ckpt-format torch \
  --seq-length $MAX_SEQ_LENGTH \
  --inference-max-seq-length $MAX_SEQ_LENGTH \
  --inference-max-requests $MAX_INFERENCE_BS \
  --pretrained-checkpoint $CHECKPOINT \
  --untie-embeddings-and-output-weights \
  --disable-bias-linear \
  --add-qkv-bias \
  --normalization RMSNorm \
  --norm-epsilon 1e-6 \
  --group-query-attention \
  --num-query-groups 4 \
  --no-masked-softmax-fusion \
  --attention-softmax-in-fp32 \
  --attention-dropout 0.0 \
  --hidden-dropout 0.0 \
  --weight-decay 0.0 \
  --position-embedding-type rope \
  --rotary-percent 1.0 \
  --rotary-base 10000 \
  --use-rotary-position-embeddings \
  --swiglu \
  --num-layers 28  \
  --hidden-size 3584  \
  --ffn-hidden-size 18944 \
  --num-attention-heads 28  \
  --max-position-embeddings 4096  \
  --tokenizer-type HuggingFaceTokenizer \
  --tokenizer-model "unsloth/Qwen2.5-Math-7B" \
  --tokenizer-hf-include-special-tokens \
  --lr 0.000001 \
  --lr-warmup-samples 0 \
  --make-vocab-size-divisible-by 128 \
  --clip-grad 1.0 "

RUN_REQUEST_ARGS="\
  --inference-type inplace_megatron_chat \
  --inference-server-conversation-template "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" \
  $RUN_REQUEST_ARGS "


================================================
FILE: examples/run_simple_mcore_train_loop.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.

import os
import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
from functools import partial
from pathlib import Path
from typing import Any, Callable, Dict, Tuple, Iterator

from megatron.core import parallel_state
from megatron.core import dist_checkpointing
from megatron.core.pipeline_parallel.schedules import get_forward_backward_func
from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.models.gpt.gpt_model import GPTModel
from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_local_spec
from megatron.core.datasets.utils import compile_helpers
from megatron.core.datasets.blended_megatron_dataset_builder import (
    BlendedMegatronDatasetBuilder,
)
from megatron.core.datasets.gpt_dataset import GPTDatasetConfig, MockGPTDataset
from megatron.core.distributed import DistributedDataParallel
from megatron.core.distributed import DistributedDataParallelConfig
from megatron.core.distributed.finalize_model_grads import finalize_model_grads
from megatron.core.tokenizers import MegatronTokenizer


_SEQUENCE_LENGTH: int = 64


def initialize_distributed(
    tensor_model_parallel_size: int = 1, pipeline_model_parallel_size: int = 1
) -> None:
    """
    Initialize torch.distributed and Megatron-Core model parallel groups.

    Args:
        tensor_model_parallel_size: Number of GPUs for tensor model parallelism.
        pipeline_model_parallel_size: Number of GPUs for pipeline model parallelism.
    """
    parallel_state.destroy_model_parallel()

    # Torch setup for distributed training
    rank: int = int(os.environ["RANK"])
    world_size: int = int(os.environ["WORLD_SIZE"])
    local_rank: int = int(os.environ["LOCAL_RANK"])

    torch.cuda.set_device(local_rank)
    torch.distributed.init_process_group(
        backend="nccl", rank=rank, world_size=world_size
    )

    # Megatron core distributed training initialization
    parallel_state.initialize_model_parallel(
        tensor_model_parallel_size, pipeline_model_parallel_size
    )


def model_provider() -> GPTModel:
    """
    Build and return a simple GPT model for demonstration.

    Returns:
        GPTModel: A small GPT model with 2 layers for testing.
    """
    transformer_config: TransformerConfig = TransformerConfig(
        num_layers=2,
        hidden_size=12,
        num_attention_heads=4,
        use_cpu_initialization=True,
        pipeline_dtype=torch.float32,
    )

    gpt_model: GPTModel = GPTModel(
        config=transformer_config,
        transformer_layer_spec=get_gpt_layer_local_spec(),
        vocab_size=100,
        max_sequence_length=_SEQUENCE_LENGTH,
    )

    return gpt_model


def get_train_data_iterator() -> Iterator:
    """
    Create a mock dataset and return a data iterator.

    Returns:
        Iterator: Data iterator for training batches.
    """
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        if torch.distributed.get_rank() == 0:
            compile_helpers()
        torch.distributed.barrier()
    else:
        compile_helpers()

    config: GPTDatasetConfig = GPTDatasetConfig(
        random_seed=0,
        sequence_length=_SEQUENCE_LENGTH,
        reset_position_ids=False,
        reset_attention_mask=False,
        eod_mask_loss=False,
        tokenizer=MegatronTokenizer.from_pretrained(
            metadata_path={"library": "null-text"},
            vocab_size=_SEQUENCE_LENGTH,
        ),
        mid_level_dataset_surplus=0.005,
    )

    datasets = BlendedMegatronDatasetBuilder(
        MockGPTDataset, [1000, None, None], lambda: True, config
    ).build()

    train_dataloader: DataLoader = DataLoader(datasets[0], batch_size=8, shuffle=True)

    train_iterator: Iterator = iter(train_dataloader)

    return train_iterator


def forward_step_func(
    data_iterator: Iterator, model: torch.nn.Module
) -> Tuple[torch.Tensor, Callable]:
    """
    Forward step function that computes model output and returns loss function.

    Args:
        data_iterator: Iterator providing training batches.
        model: The GPT model to train.

    Returns:
        Tuple of (output_tensor, loss_function) where loss_function is a partial
        function that will compute the final loss when called.
    """

    def loss_func(
        loss_mask: torch.Tensor, output_tensor: torch.Tensor
    ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
        losses: torch.Tensor = output_tensor.float()
        loss_mask = loss_mask.view(-1).float()
        loss: torch.Tensor = torch.sum(losses.view(-1) * loss_mask) / loss_mask.sum()
        # If you have data parallel reduce loss across data parallel groups.
        # If pipeline parallel, loss computation is done only in last stage.

        return loss, {"lm loss": loss}

    data: Dict[str, torch.Tensor] = next(data_iterator)
    tokens: torch.Tensor = data["tokens"].to(device)
    attention_mask: torch.Tensor = data["attention_mask"].to(device)
    position_ids: torch.Tensor = data["position_ids"].to(device)
    labels: torch.Tensor = data["labels"].to(device)
    loss_mask: torch.Tensor = data["loss_mask"].to(device)

    output_tensor: torch.Tensor = model(
        tokens, position_ids, attention_mask, labels=labels
    )

    return output_tensor, partial(loss_func, loss_mask)


def save_distributed_checkpoint(
    checkpoint_path: str, gpt_model: torch.nn.Module
) -> None:
    """
    Save model checkpoint using Megatron-Core distributed checkpointing.

    Args:
        checkpoint_path: Directory path to save checkpoint.
        gpt_model: The model to checkpoint (may be wrapped with DDP).
    """
    # Access underlying model if wrapped with DDP
    model: torch.nn.Module = (
        gpt_model.module if hasattr(gpt_model, "module") else gpt_model
    )
    sharded_state_dict: Dict = model.sharded_state_dict(prefix="")
    dist_checkpointing.save(
        sharded_state_dict=sharded_state_dict, checkpoint_dir=checkpoint_path
    )


def load_distributed_checkpoint(
    checkpoint_path: str, gpt_model: torch.nn.Module
) -> torch.nn.Module:
    """
    Load model checkpoint using Megatron-Core distributed checkpointing.

    Args:
        checkpoint_path: Directory path to load checkpoint from.
        gpt_model: The model to load into (may be wrapped with DDP).

    Returns:
        The model with loaded checkpoint weights.
    """
    # Access underlying model if wrapped with DDP
    model: torch.nn.Module = (
        gpt_model.module if hasattr(gpt_model, "module") else gpt_model
    )
    sharded_state_dict: Dict = model.sharded_state_dict(prefix="")
    checkpoint: Dict = dist_checkpointing.load(
        sharded_state_dict=sharded_state_dict, checkpoint_dir=checkpoint_path
    )
    model.load_state_dict(checkpoint)
    return gpt_model


if __name__ == "__main__":
    initialize_distributed(tensor_model_parallel_size=2, pipeline_model_parallel_size=1)
    model_parallel_cuda_manual_seed(123)

    gpt_model: GPTModel = model_provider()
    device: torch.device = torch.device("cuda")
    gpt_model.to(device)

    # Wrap model with DistributedDataParallel for proper gradient synchronization.
    # This provides the finish_grad_sync() method required by finalize_model_grads().
    config: TransformerConfig = gpt_model.config
    ddp_config: DistributedDataParallelConfig = DistributedDataParallelConfig(
        grad_reduce_in_fp32=False,
        overlap_grad_reduce=False,
        use_distributed_optimizer=False,
    )
    gpt_model = DistributedDataParallel(
        config=config,
        ddp_config=ddp_config,
        module=gpt_model,
    )

    optim: Adam = Adam(gpt_model.parameters())

    train_iterator: Iterator = get_train_data_iterator()

    forward_backward_func: Callable[..., Dict[str, Any]] = get_forward_backward_func()

    # Running the model for 5 iterations
    for iteration in range(5):
        optim.zero_grad()

        losses_reduced: Dict[str, Any] = forward_backward_func(
            forward_step_func=forward_step_func,
            data_iterator=train_iterator,
            model=gpt_model,
            num_microbatches=1,
            seq_length=_SEQUENCE_LENGTH,
            micro_batch_size=8,
            decoder_seq_length=_SEQUENCE_LENGTH,
            forward_only=False,
        )

        # Finalize model gradients: all-reduce across DP and TP groups.
        # This synchronizes gradients for non-tensor-parallel parameters (e.g., LayerNorm)
        # across tensor parallel ranks and all gradients across data parallel ranks.
        finalize_model_grads([gpt_model])

        optim.step()

        print(f"Iteration {iteration}: Losses reduced: {losses_reduced}")

    # Saving the model
    ckpt_path: str = os.getcwd() + "/ckpt"
    Path(ckpt_path).mkdir(exist_ok=True)
    save_distributed_checkpoint(gpt_model=gpt_model, checkpoint_path=ckpt_path)

    # Loading the model
    gpt_model = load_distributed_checkpoint(
        gpt_model=gpt_model, checkpoint_path=ckpt_path
    )
    gpt_model.to(device)
    print("Successfully loaded the model")


================================================
FILE: examples/t5/README.md
================================================
# T5 MODEL

## Table of contents
- [1. Training Setup](#1-training-setup)
- [2. Configurations](#2-configurations)
- [3. Training Results](#3-training-results)

## 1. Training setup
<a id="markdown-training-setup" name="training-setup"></a>
To run the model on a Slurm based cluster  
```
PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:23.09-py3
ACCOUNT_NAME="" 
PARTITION=""
JOB_NAME=""
NUM_NODES=1
CHECKPOINT_PATH="" #<Specify path to checkpoint>
TENSORBOARD_LOGS_PATH=""#<Specify path to tensorboard log>
VOCAB_FILE="" #<Specify path to file>/bert-large-cased-vocab.txt
DATA_PATH="" #<Specify path and file prefix>_text_document

srun -N $NUM_NODES --container-image $PYTORCH_IMAGE --container-mounts "/path/to/data:/path/to/data,/path/to/megatron-lm:/workspace/megatron-lm" --account $ACCOUNT -N 1 -J $JOB_NAME  -p $PARTITION --no-container-mount-home  -c "
  cd /workspace/megatron-lm
  ./examples/t5/train_t5_220m_distributed.sh $CHECKPOINT_PATH $TENSORBOARD_LOGS_PATH $VOCAB_FILE $DATA_PATH"

```

## 2. Configurations
<a id="markdown-configurations" name="configurations"></a>
The architecture arguments below shows configuration for T5 220M model. 

### 220M 
```
       --num-layers 12 \
       --hidden-size 768 \
       --num-attention-heads 12 \
       --kv-channels 64 \
       --ffn-hidden-size 3072 \
       --encoder-seq-length 512 \
       --decoder-seq-length 128 \
       --max-position-embeddings 512 \
       --tensor-model-parallel-size 1 \
       --pipeline-model-parallel-size 1 \

```


## 3. Training Results
<a id="markdown-training-results" name="training-results"></a>
Below is the training curve for the 220M model on Pile dataset. The training takes 4 days on 32 GPUs, with batch size of 2048. 

Finetuning on SQUAD dataset, the validation result is: 63.44\%
<p align="center">
<img src="./t5_mcore_train_curve.png"  width="800" height="400">
</p>


================================================
FILE: examples/t5/train_t5_220m_distributed.sh
================================================
#!/bin/bash

# Runs the "220M" parameter model

export CUDA_DEVICE_MAX_CONNECTIONS=1

GPUS_PER_NODE=8
# Change for multinode config
MASTER_ADDR=localhost
MASTER_PORT=6000
NUM_NODES=1
NODE_RANK=0
WORLD_SIZE=$(($GPUS_PER_NODE*$NUM_NODES))

CHECKPOINT_PATH=$1 #<Specify path>
TENSORBOARD_DIR=$2 #<Specify path>
VOCAB_FILE=$3 #<Specify path to file>/bert-large-cased-vocab.txt
DATA_PATH=$4 #<Specify path and file prefix>_text_document

DISTRIBUTED_ARGS="
    --nproc_per_node $GPUS_PER_NODE \
    --nnodes $NUM_NODES \
    --node_rank $NODE_RANK \
    --master_addr $MASTER_ADDR \
    --master_port $MASTER_PORT
"

T5_ARGS="
    --encoder-num-layers 12 \
    --decoder-num-layers 12 \
    --hidden-size 768 \
    --num-attention-heads 12 \
    --kv-channels 64 \
    --ffn-hidden-size 3072 \
    --encoder-seq-length 512 \
    --decoder-seq-length 128 \
    --max-position-embeddings 512 \
    --micro-batch-size 64 \
    --global-batch-size 512 \
    --lr 0.0001 \
    --train-iters 1000000 \
    --lr-decay-iters 1000000 \
    --lr-decay-style linear \
    --min-lr 0.00001 \
    --weight-decay 1e-2 \
    --lr-warmup-fraction .01 \
    --clip-grad 1.0 \
    --bf16 \
    --vocab-extra-ids 100 \
    --init-method-std 0.015 \
    --transformer-impl transformer_engine \
    --tensor-model-parallel-size 1 \
    --pipeline-model-parallel-size 1 \
    --attention-backend auto \
"

DATA_ARGS="
    --data-path $DATA_PATH \
    --vocab-file $VOCAB_FILE \
    --tokenizer-type BertWordPieceCase \
    --split 99982,9,9 \
"

OUTPUT_ARGS="
    --log-interval 100 \
    --tensorboard-dir ${TENSORBOARD_DIR} \
    --save-interval 500 \
    --eval-interval 1000 \
    --eval-iters 10
"

torchrun $DISTRIBUTED_ARGS pretrain_t5.py \
    $T5_ARGS \
    $DATA_ARGS \
    $OUTPUT_ARGS \
    --distributed-backend nccl \
    --save $CHECKPOINT_PATH \
    --load $CHECKPOINT_PATH \


================================================
FILE: gpt_builders.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from megatron.core.models.gpt import GPTModel
from megatron.core.models.gpt.gpt_layer_specs import (
    get_gpt_decoder_block_spec,
    get_gpt_layer_local_spec,
    get_gpt_layer_with_transformer_engine_spec,
    get_gpt_layer_with_inference_spec,
    get_gpt_mtp_block_spec,
    get_gpt_decoder_layer_specs,
)
from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
    get_transformer_block_with_experimental_attention_variant_spec,
)
from megatron.core.models.gpt.heterogeneous.heterogeneous_layer_specs import (
    get_gpt_heterogeneous_layer_spec,
)
from megatron.core.transformer.spec_utils import import_module
from megatron.training import get_args, print_rank_0
from megatron.training.arguments import core_transformer_config_from_args
from megatron.training.yaml_arguments import core_transformer_config_from_yaml

import megatron.legacy.model  # isort: skip

# NOTE: Loading `megatron.legacy.model` earlier fails due to circular import


def gpt_builder(args, pre_process, post_process, vp_stage=None, config=None, pg_collection=None):
    print_rank_0('building GPT model ...')
    if config is None:
        if args.yaml_cfg is not None:
            config = core_transformer_config_from_yaml(args, "language_model")
        else:
            config = core_transformer_config_from_args(args)
    if args.use_legacy_models:
        model = megatron.legacy.model.GPTModel(
            config,
            num_tokentypes=0,
            parallel_output=True,
            pre_process=pre_process,
            post_process=post_process,
        )
    else:  # using core models
        if args.spec is not None:
            transformer_layer_spec = import_module(args.spec)
        else:
            use_te = args.transformer_impl == "transformer_engine"

            if args.experimental_attention_variant is not None:
                transformer_layer_spec = (
                    get_transformer_block_with_experimental_attention_variant_spec(
                        config=config, vp_stage=vp_stage
                    )
                )
            elif args.num_experts:
                # Define the decoder block spec
                transformer_layer_spec = get_gpt_decoder_block_spec(
                    config,
                    use_transformer_engine=use_te,
                    normalization=args.normalization,
                    qk_l2_norm=args.qk_l2_norm,
                    vp_stage=vp_stage,
                )
            elif args.heterogeneous_layers_config_path is not None:
                assert not (config.transformer_impl == "inference_optimized")
                transformer_layer_spec = get_gpt_heterogeneous_layer_spec(config, use_te)
            else:
                # Define the decoder layer spec
                transformer_layer_spec = _get_transformer_layer_spec(use_te, config)
        mtp_block_spec = None
        if args.mtp_num_layers is not None:
            assert not (config.transformer_impl == "inference_optimized")
            if (
                hasattr(transformer_layer_spec, 'layer_specs')
                and len(transformer_layer_spec.layer_specs) == 0
            ):
                # Get the decoder layer spec explicitly if no decoder layer in the last stage,
                # Only happens with block spec (TransformerBlockSubmodules) when using MoE.
                transformer_layer_spec_for_mtp = _get_transformer_layer_spec(use_te, config)
            else:
                # Define the decoder block spec
                decoder_layer_specs = get_gpt_decoder_layer_specs(
                    config, use_transformer_engine=use_te, normalization=args.normalization, qk_l2_norm=args.qk_l2_norm, vp_stage=vp_stage
                )
                transformer_layer_spec_for_mtp = decoder_layer_specs[-1]
            # Use spec of the last layer in decoder block as spec of the transformer layer in MTP
            mtp_block_spec = get_gpt_mtp_block_spec(
                config,
                transformer_layer_spec_for_mtp,
                use_transformer_engine=use_te,
                vp_stage=vp_stage,
            )

        model = GPTModel(
            config=config,
            transformer_layer_spec=transformer_layer_spec,
            vocab_size=args.padded_vocab_size,
            max_sequence_length=args.max_position_embeddings,
            pre_process=pre_process,
            post_process=post_process,
            fp16_lm_cross_entropy=args.fp16_lm_cross_entropy,
            parallel_output=True,
            share_embeddings_and_output_weights=not args.untie_embeddings_and_output_weights,
            position_embedding_type=args.position_embedding_type,
            rotary_percent=args.rotary_percent,
            rotary_base=args.rotary_base,
            rope_scaling=args.use_rope_scaling,
            mtp_block_spec=mtp_block_spec,
            vp_stage=vp_stage,
            pg_collection=pg_collection,
        )

    return model


def _get_transformer_layer_spec(use_te, config):
    """Get transformer layer specification based on configuration.

    Args:
        use_te (bool): Whether to use Transformer Engine
        config: Model configuration

    Returns:
        transformer_layer_spec: The transformer layer specification
    """
    if use_te:
        return get_gpt_layer_with_transformer_engine_spec(
            config.num_moe_experts,
            config.moe_grouped_gemm,
            config.qk_layernorm,
            config.multi_latent_attention,
            config.experimental_attention_variant,
            qk_l2_norm=config.qk_l2_norm,
            use_kitchen=config.use_kitchen,
            use_te_activation_func=config.use_te_activation_func,
            use_kitchen_attention=config.use_kitchen_attention,
            kitchen_attention_backend=config.kitchen_attention_backend,
            mla_down_proj_fusion=getattr(config, "mla_down_proj_fusion", False),
        )
    elif config.transformer_impl == "inference_optimized":
        return get_gpt_layer_with_inference_spec(
            config.qk_layernorm,
            config.multi_latent_attention,
            qk_l2_norm=config.qk_l2_norm,
        )
    else:
        return get_gpt_layer_local_spec(
            config.num_moe_experts,
            config.moe_grouped_gemm,
            config.qk_layernorm,
            config.multi_latent_attention,
            config.experimental_attention_variant,
            normalization=config.normalization,
            use_kitchen=config.use_kitchen,
            use_kitchen_attention=config.use_kitchen_attention,
            kitchen_attention_backend=config.kitchen_attention_backend,
        )


================================================
FILE: greptile.json
================================================
{
    "labels": [],
    "comment": "Disclaimer: This is AI-generated.",
    "commentTypes": ["logic", "syntax", "style"],
    "instructions": "Only comment if the PR description is unchanged from the default template, if a docstring is missing, or if there is a typo.",
    "ignoreKeywords": "rename\nlinter\nprettier\ngreptile-ignor",
    "ignorePatterns": "greptile.json\ntesting/**/*.py\n*.md\n*.txt\n*.json",
    "patternRepositories": ["NVIDIA/Megatron-LM"],
    "triggerOnUpdates": true,
    "shouldUpdateDescription": false,
    "disabledLabels": ["docs"],
    "includeAuthors": [],
    "excludeAuthors": ["github-actions"],
    "strictness": 3,
    "fixWithAI": false,
    "includeBranches": ["main"],
    "statusCheck": false,
    "skipReview": "AUTOMATIC",
    "summarySection": {
      "included": false,
      "collapsible": false,
      "defaultOpen": false
    },
    "issuesTableSection": {
      "included": false,
      "collapsible": false,
      "defaultOpen": false
    },
    "confidenceScoreSection": {
      "included": false,
      "collapsible": false,
      "defaultOpen": false
    },
    "sequenceDiagramSection": {
      "included": false,
      "collapsible": false,
      "defaultOpen": false
    },
    "statusCommentsEnabled": false
  }

================================================
FILE: mamba_builders.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.

from model_provider import count_parameters_in_layer
from megatron.core.models.mamba import MambaModel
from megatron.core.transformer import TransformerConfig
from megatron.core.transformer.spec_utils import import_module
from megatron.training import print_rank_0
from megatron.training.arguments import core_transformer_config_from_args
from megatron.core.models.mamba.mamba_layer_specs import mamba_inference_stack_spec


def mamba_builder(args, pre_process, post_process, vp_stage=None, config=None, pg_collection=None):
    print_rank_0('building MAMBA model ...')
    if config is None:
        config = core_transformer_config_from_args(args, TransformerConfig)
    assert args.use_legacy_models is False, "Mamba only supported in Mcore!"

    if config.transformer_impl == "inference_optimized":
        mamba_stack_spec = mamba_inference_stack_spec
        assert (
            not config.inference_fuse_tp_communication
        ), "inference_fuse_tp_communication is not supported for Mamba"
    elif args.spec is not None:
        mamba_stack_spec = import_module(args.spec)
    else:
        raise ValueError("You must provide a valid Mamba layer spec via --spec")

    model = MambaModel(
        config=config,
        mamba_stack_spec=mamba_stack_spec,
        vocab_size=args.padded_vocab_size,
        max_sequence_length=args.max_position_embeddings,
        hybrid_layer_pattern=args.hybrid_layer_pattern,
        pre_process=pre_process,
        post_process=post_process,
        fp16_lm_cross_entropy=args.fp16_lm_cross_entropy,
        parallel_output=True,
        share_embeddings_and_output_weights=not args.untie_embeddings_and_output_weights,
        position_embedding_type=args.position_embedding_type,
        rotary_percent=args.rotary_percent,
        rotary_base=args.rotary_base,
        pg_collection=pg_collection,
        vp_stage=vp_stage,
    )

    for l in range(model.decoder.num_layers_per_pipeline_rank):
        layer_params = count_parameters_in_layer(model, f'decoder.layers.{l}.')
        print_rank_0(f" == params layer {l}: {layer_params}")

    return model


================================================
FILE: megatron/core/MSC_Integration.md
================================================
# Multi-Storage Client (MSC) Integration

The [Multi-Storage Client](https://github.com/NVIDIA/multi-storage-client) (MSC) provides a unified interface for reading datasets and storing checkpoints from both filesystems (e.g., local disk, NFS, Lustre) and object storage providers such as S3, GCS, OCI, Azure, AIStore, and SwiftStack.

This guide will walk you through how to:

1. How to install and configure MSC
2. How to train models directly using datasets in object storage
3. How to save and load model checkpoints to/from object storage

## Installation

MSC is vended as `the multi-storage-client` package on PyPI.

The base [client](https://nvidia.github.io/multi-storage-client/user_guide/concepts.html#term-client) supports POSIX file systems by default, but there are extras for each storage service which provide the necessary package dependencies for its corresponding storage provider.

```bash
# POSIX file systems.
pip install multi-storage-client

# AWS S3 and S3-compatible object stores.
pip install "multi-storage-client[boto3]"

# Google Cloud Storage (GCS).
pip install "multi-storage-client[google-cloud-storage]"
```

## Configuration File

MSC uses a YAML configuration file to define how it connects to object storage systems. This design allows you to specify one or more storage profiles, each representing a different storage backend or bucket. MSC keeps your training scripts clean and portable by centralizing details in a config file. There is no need to hardcode access keys, bucket names, or other provider-specific options directly into your code.

Here's an example configuration:

```yaml
profiles:
  my-profile:
    storage_provider:
      type: s3
      options:
        # Set the bucket/container name as the base_path
        base_path: my-bucket
        region_name: us-west-2
    # Optional credentials (can also use environment variables for S3)
    credentials_provider:
      type: S3Credentials
      options:
        access_key: ${AWS_ACCESS_KEY}
        secret_key: ${AWS_SECRET_KEY}

cache:
  size: 500G               # Maximum cache size
  location: /tmp/msc_cache # Cache directory on filesystem
```

To tell MSC where to find this file, set the following environment variable before running your Megatron-LM script:

```bash
export MSC_CONFIG=/path/to/msc_config.yaml
```

## MSC URL Format

MSC uses a custom URL scheme to identify and access files across different object storage providers. This scheme makes it easy to reference data and checkpoints without worrying about the underlying storage implementation. An MSC URL has the following structure:

```
msc://<profile-name>/<path/to/object>
```

**Components:**

* `msc://` This is the scheme identifier indicating the path should be interpreted by the Multi-Storage Client.
* `<profile-name>` This corresponds to a named profile defined in your YAML configuration file under the profiles section. Each profile specifies the storage provider (e.g., S3, GCS), credentials, and storage-specific options such as the bucket name or base path.
* `<path/to/object>` This is the logical path to the object or directory within the storage provider, relative to the base_path configured in the profile. It behaves similarly to a path in a local filesystem but maps to object keys or blobs in the underlying storage system.

**Example:**

Given the following profile configuration:

```yaml
profiles:
  my-profile:
    storage_provider:
      type: s3
      options:
        base_path: my-bucket
```

The MSC URL:

```
msc://my-profile/dataset/train/data.bin
```

is interpreted as accessing the object with the key `dataset/train/data.bin` inside the S3 bucket named `my-bucket`. If this were a GCS or OCI profile instead, MSC would apply the appropriate backend logic based on the profile definition, but your code using the MSC URL would remain unchanged.

This abstraction allows training scripts to reference storage resources uniformly—whether they're hosted on AWS, GCP, Oracle, or Azure—just by switching profiles in the config file.


## Train from Object Storage

To train with datasets stored in object storage, use an MSC URL with the `--data-path` argument. This URL references a dataset stored under a profile defined in your MSC configuration file.

In addition, Megatron-LM requires the `--object-storage-cache-path` argument when reading from object storage. This path is used to cache the `.idx` index files associated with IndexedDataset, which are needed for efficient data access.

```bash
python pretrain_gpt.py                                      \
    --object-storage-cache-path /path/to/object_store_cache \
    --data-cache-path /path/to/data_cache                   \
    --data-path msc://my-profile/datasets/text_document     \
    --no-mmap-bin-files
```

**NOTE:** All four arguments must be provided when training with datasets in object storage using MSC.

## Save and Load Checkpoints from Object Storage

MSC can be used to save and load model checkpoints directly from object storage by specifying MSC URLs for the `--save` and `--load` arguments. This allows you to manage checkpoints in object storage.

```bash
python pretrain_gpt.py                \
  --save msc://my-profile/checkpoints \
  --load msc://my-profile/checkpoints \
  --save-interval 1000
```

**Notes:** Only the `torch_dist` checkpoint format is currently supported when saving to or loading from MSC URLs.

## Disable MSC

By default, MSC integration is automatically enabled when the `multi-storage-client` library is installed. MSC is also used for regular filesystem paths (like `/filesystem_mountpoint/path` in `--data-path`, `--save`, or `--load`) even when not using explicit MSC URLs. MSC functions as a very thin abstraction layer with negligible performance impact when used with regular paths, so there's typically no need to disable it. If you need to disable MSC, you can do so using the `--disable-msc` flag:

```bash
python pretrain_gpt.py --disable-msc
```

## Performance Considerations

When using object storage with MSC, there are a few important performance implications to keep in mind:

**Reading Datasets**

Reading training datasets directly from object storage is typically slower than reading from local disk. This is primarily due to:
* High latency of object storage systems, especially for small and random read operations (e.g., reading samples from .bin files).
* HTTP-based protocols used by object stores (e.g., S3 GET with range requests), which are slower than local filesystem I/O.

To compensate for this latency, it is recommended to increase the number of data loading workers using the `--num-workers` argument in your training command:

```
python pretrain_gpt.py --num-workers 8 ...
```

Increasing the number of workers allows more parallel reads from object storage, helping to mask I/O latency and maintain high GPU utilization during training.

**Checkpoint Loading**

When using MSC to load checkpoints from object storage, it is important to configure the cache section in your MSC configuration file. This local cache is used to store downloaded checkpoint data and metadata, which significantly reduces load time and memory usage.

Example:

```
cache:
  size: 500G
  location: /tmp/msc_cache
```

For optimal performance, configure the cache directory on a high-speed local storage device such as an NVMe SSD.

## Additional Resources and Advanced Configuration

Refer to the [MSC Configuration Documentation](https://nvidia.github.io/multi-storage-client/references/configuration.html) for complete documentation on MSC configuration options, including detailed information about supported storage providers, credentials management, and advanced caching strategies.

MSC supports collecting observability metrics and traces to help monitor and debug data access patterns during training. These metrics can help you identify bottlenecks in your data loading pipeline, optimize caching strategies, and monitor resource utilization when training with large datasets in object storage. For more information about MSC's observability features, see the [MSC Observability Documentation](https://nvidia.github.io/multi-storage-client/user_guide/telemetry.html).

MSC offers an experimental Rust client that bypasses Python's Global Interpreter Lock (GIL) to significantly improve performance for multi-threaded I/O operations. The Rust client supports AWS S3, SwiftStack, and Google Cloud Storage, enabling true concurrent execution for much better performance compared to the Python implementation. To enable it, add `rust_client: {}` to your storage provider configuration. For more details, see the [MSC Rust Client Documentation](https://nvidia.github.io/multi-storage-client/user_guide/rust.html).


================================================
FILE: megatron/core/QuickStart.md
================================================
## Quick Start

This guide for Megatron Core walks you through the following tasks:

* Initialize Megatron Core on two GPUS.
* Build a GPT model with a tensor model parallel size of two and a pipeline parallel size of one.
* Train the model for five iterations using Megatron Core schedules.
* Save the model using the distributed checkpoint format.
* Load the model.

**NOTE:** The following sample was tested using Megatron Core version 0.8.0 and NGC PyTorch Container version 24.02.

### Set Up Your Environment

1. Run a new Docker container.

1. Clone the Megatron GitHub repo in it.

    ```bash
    docker run --ipc=host --shm-size=512m --gpus 2 -it nvcr.io/nvidia/pytorch:24.02-py3

    git clone https://github.com/NVIDIA/Megatron-LM.git
    cd Megatron-LM
    pip install uv
    uv pip install -e .
    ```

    <br>

For a more comprehensive overview of different installation methods, refer to the [Installation Guide](https://docs.nvidia.com/megatron-core/developer-guide/latest/get-started/install.html).

### Write Your First Training Loop

In this task, you create a sample GPT model split across tensors (Tensor model parallel) on two GPUS, and run a forward pass through it using a MockGPT dataset helper class that was created in Megatron Core.

<br>

**NOTE:** All of the following steps are in the [run_simple_mcore_train_loop.py](https://github.com/NVIDIA/Megatron-LM/tree/main/examples/run_simple_mcore_train_loop.py) script. To run the ``run_simple_mcore_train_loop.py`` script:

    ```
    PYTHONPATH=$PYTHON_PATH:./megatron torchrun --nproc-per-node 2 examples/run_simple_mcore_train_loop.py
    ```

1. Initialize the distributed training and set up the model parallel:

    The following utility, when called, initializes your distributed setup:

    ```python
    import os
    import torch
    from megatron.core import parallel_state

    def initialize_distributed(tensor_model_parallel_size = 1, pipeline_model_parallel_size = 1):
        # Torch setup for distributed training
        rank = int(os.environ['LOCAL_RANK'])
        world_size = torch.cuda.device_count()
        torch.cuda.set_device(rank)
        torch.distributed.init_process_group(world_size=world_size, rank=rank)

        # Megatron core distributed training initialization
        parallel_state.initialize_model_parallel(tensor_model_parallel_size, pipeline_model_parallel_size)
    ```

    <br>

1. Set up the GPT model:

    Use the following code snippet to create a GPT model. For a list of other configurations that you can pass into the model, open and review [transformer_config.py](https://github.com/NVIDIA/Megatron-LM/tree/main/megatron/core/transformer/transformer_config.py).

    ```
    from megatron.core.transformer.transformer_config import TransformerConfig
    from megatron.core.models.gpt.gpt_model import GPTModel
    from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_local_spec

    def model_provider():
        """Build the model."""

        transformer_config = TransformerConfig(
            num_layers=2, 
            hidden_size=12, 
            num_attention_heads=4, 
            use_cpu_initialization=True, 
            pipeline_dtype=torch.float32)

        gpt_model = GPTModel(
            config=transformer_config, 
            transformer_layer_spec=get_gpt_layer_local_spec(), 
            vocab_size=100, 
            max_sequence_length=64)

        return gpt_model
    ```

    <br>

1. Set up the GPT mock dataset:

    Use the following code snippet to explore the mock dataset utility.

    * To train the model using your data, use the `GPTDataset` class in [gpt_dataset.py](https://github.com/NVIDIA/Megatron-LM/tree/main/megatron/core/datasets/gpt_dataset.py).

    * To find more information about Megatron Core data pipeline, see the [data pipeline readme.md](https://github.com/NVIDIA/Megatron-LM/tree/main/megatron/core/datasets/readme.md?ref_type=heads).

    ```
    import torch
    from torch.utils.data import DataLoader

    from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder
    from megatron.core.datasets.gpt_dataset import GPTDatasetConfig, MockGPTDataset
    from megatron.training.tokenizer.tokenizer import _NullTokenizer
    from megatron.core.datasets.utils import compile_helpers

    _SEQUENCE_LENGTH = 64

    def get_train_data_iterator():
        if torch.distributed.is_available() and torch.distributed.is_initialized():
            if torch.distributed.get_rank() == 0:
                compile_helpers()
            torch.distributed.barrier()
        else:
            compile_helpers()

        config = GPTDatasetConfig(
            random_seed=0,
            sequence_length=_SEQUENCE_LENGTH,
            reset_position_ids=False,
            reset_attention_mask=False,
            eod_mask_loss=False,
            tokenizer=_NullTokenizer(vocab_size=_SEQUENCE_LENGTH),
        )

        datasets = BlendedMegatronDatasetBuilder(
            MockGPTDataset, [1000, None, None], lambda: True, config
        ).build()

        train_dataloader = DataLoader(datasets[0], batch_size=8, shuffle=True)

        train_iterator = iter(train_dataloader)

        return train_iterator

    ```

    <br>

1. Add a forward step function:

    Megatron Core uses [schedules.py](https://github.com/NVIDIA/Megatron-LM/tree/main/megatron/core/pipeline_parallel/schedules.py) to run the model. Define a forward step function that takes the data iterator and the model as input and produces the output tensor and a loss function.

    ```python
    from functools import partial

    def forward_step_func(data_iterator, model):
    
        def loss_func(loss_mask: torch.Tensor, output_tensor: torch.Tensor):

            losses = output_tensor.float()
            loss_mask = loss_mask.view(-1).float()
            loss = torch.sum(losses.view(-1) * loss_mask) / loss_mask.sum()
            # If you have data parallel reduce loss across data parallel groups. 
            # If pipeline parallel, loss computation is done only in last stage.

            return loss, {'lm loss': loss}

        data = next(data_iterator)
        tokens = data['tokens'].to(device)
        attention_mask = data['attention_mask'].to(device)
        position_ids = data['position_ids'].to(device)
        labels = data['labels'].to(device)
        loss_mask = data['loss_mask'].to(device)
    
        output_tensor = model(tokens, position_ids, attention_mask,
                            labels=labels)

        return output_tensor, partial(loss_func, loss_mask)   
    ```

    <br>

1. Define your load and save distributed checkpoints:

    Megatron Core uses distributed checkpoints for loading and saving models. This allows you to convert the model from one parallel setting to another when you load it.
    For example, a model trained with tensor parallel size `2`, can be loaded again as a tensor model with parallel size `4`.

    ```python
    from megatron.core import dist_checkpointing

    def save_distributed_checkpoint(checkpoint_path, gpt_model):
        sharded_state_dict = gpt_model.sharded_state_dict(prefix='')
        dist_checkpointing.save(sharded_state_dict=sharded_state_dict, checkpoint_dir=checkpoint_path)

    def load_distributed_checkpoint(checkpoint_path, gpt_model):
        sharded_state_dict=gpt_model.sharded_state_dict(prefix='')
        checkpoint = dist_checkpointing.load(sharded_state_dict=sharded_state_dict, checkpoint_dir=checkpoint_path)
        gpt_model.load_state_dict(checkpoint)
        return gpt_model
    ```

    <br>

1. Add the main function:

    The following code snippet is the main function that needs to go into your script. It runs the model for five iterations, saves, and loads it.  

    ```python
    from pathlib import Path
    from torch.optim import Adam
    from megatron.core.pipeline_parallel.schedules import get_forward_backward_func
    from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed

    if __name__ == "__main__":
        initialize_distributed(tensor_model_parallel_size=2, pipeline_model_parallel_size=1)
        model_parallel_cuda_manual_seed(123)

        gpt_model = model_provider()
        device = torch.device("cuda")
        gpt_model.to(device)

        optim = Adam(gpt_model.parameters())
        
        train_iterator = get_train_data_iterator()
        
        forward_backward_func = get_forward_backward_func()

        # Running the model for 5 iterations
        for _ in range(5):
            optim.zero_grad()
            
            losses_reduced = forward_backward_func(
                forward_step_func=forward_step_func,
                data_iterator=train_iterator,
                model=gpt_model,
                num_microbatches=1,
                seq_length=64,
                micro_batch_size=8,
                decoder_seq_length=64,
                forward_only=False)
        
            optim.step()

            print(f'Losses reduced :  {losses_reduced}')

        # Saving the model
        save_distributed_checkpoint(gpt_model=gpt_model, checkpoint_path='/workspace/ckpt')

        # Loading the model
        gpt_model = load_distributed_checkpoint(gpt_model=gpt_model, checkpoint_path='/workspace/ckpt')
        gpt_model.to(device)
        print('Successfully loaded the model')  
    ```

    <br>

### Review Advanced Examples

To review more advanced examples, explore [pretrain_gpt.py](https://github.com/NVIDIA/Megatron-LM/blob/main/pretrain_gpt.py). ``pretrain_gpt.py`` has more complex training loops and includes the following Megatron Core features:

* pipeline parallel
* context parallel
* rope embeddings
* mixture of experts


================================================
FILE: megatron/core/README.md
================================================
<div align="center">

Megatron Core
=============
<h4>Production-ready library for building custom training frameworks</h4>

<div align="left">

## ⚡ Quick Start

```bash
# Install Megatron Core
uv pip install megatron-core

# Distributed training example (2 GPUs, mock data)
torchrun --nproc_per_node=2 examples/run_simple_mcore_train_loop.py
```

# What is Megatron Core?

**Megatron Core** is an open-source PyTorch-based library that contains GPU-optimized techniques and cutting-edge system-level optimizations. It abstracts them into composable and modular APIs, allowing full flexibility for developers and model researchers to train custom transformers at-scale on NVIDIA accelerated computing infrastructure.

## 🚀 Key Components

### GPU-Optimized Building Blocks
- **Transformer Components**: Attention mechanisms, MLP layers, embeddings
- **Memory Management**: Activation recomputation
- **FP8 Precision**: Optimized for NVIDIA Hopper, Ada, and Blackwell GPUs

### Parallelism Strategies
- **Tensor Parallelism (TP)**: Layer-wise parallelization (activation memory footprint can be further reduced using sequence parallelism)
- **Pipeline Parallelism (PP)**: Depth-wise model splitting and pipelining of microbatches to improve efficiency
- **Context Parallelism (CP)**: Long sequence handling ([documentation](https://docs.nvidia.com/megatron-core/developer-guide/latest/api-guide/context_parallel.html))
- **Expert Parallelism (EP)**: Split experts of an MoE model across multiple GPUs


## 🔗 Examples & Documentation

**Examples:**
- **[Simple Training Loop](https://github.com/NVIDIA/Megatron-LM/blob/main/examples/run_simple_mcore_train_loop.py)** - Basic usage
- **[Multimodal Training](https://github.com/NVIDIA/Megatron-LM/blob/main/examples/multimodal/)** - Vision-language models
- **[Mixture-of-Experts](https://github.com/yanring/Megatron-MoE-ModelZoo)** - MoE examples
- **[Mamba Models](https://github.com/NVIDIA/Megatron-LM/blob/main/examples/mamba/)** - State-space models

**Documentation:**
- **[📚 API Guide](https://docs.nvidia.com/megatron-core/developer-guide/latest/api-guide/index.html)** - Complete API documentation
- **[💡 Developer Guide](https://docs.nvidia.com/megatron-core/developer-guide/latest/index.html)** - Custom framework development

---

*For complete installation instructions, performance benchmarks, and ecosystem information, see the [main README](../../README.md).*


================================================
FILE: megatron/core/README_STRAGGLER.md
================================================
## StragglerDetector for a TP Group

The file `megatron/core/utils.py` has a class named `StragglerDetector` which supports Python Contexts.
It can be used to find straggling TP group based on the RTT of the ranks in the TP Group. It also collects
Power/Temp/Utilization for GPUs, which can additionally be used to narrow down to the exact GPU in the TP Group,
assuming the straggling was caused by hardware anomaly in a given GPU.<br>
This class supports collecting timing events for various steps of a given iteration. It
keeps collecting such timing events on a per rank basis, and when the reporter is invoked
during a logging interval, it computes the min and max of certain metric across all
ranks and logs the observed metric and the rank as follows

```
 0: INFO:megatron.core.utils:[2024-03-14 23:07:56] | MnRtt/Rnk: 3453.08ms/8 | MxRtt/Rnk: 3468.20ms/0 | MnPwr/Rnk: 601796W/8 | MxPwr/Rnk: 683801W/18 | MnTmp/Rnk: 52C/0 | MxTmp/Rnk: 65C/21 | MnUtl/Rnk: 97%/8 | MxUtl/Rnk: 100%/6 | MnClk/Rnk: 1950MHz/28 | MxClk/Rnk: 1980MHz/0 | MnDRtt/Rnk: 14.27ms/23 | MxDRtt/Rnk: 34.65ms/3 | MnEtpt/Rnk: 296.02TF/0 | MxEtpt/Rnk: 297.32TF/8
```
<hr>

### Description of the metrics

Each metric is prefixed with `Mn` or `Mx` to represent `Minimum` or `Maximum`. Each metric is also suffixed with the rank where the metric was measured. The metrics are averaged over the logging interval. Between the prefix and the rank is the name of the metric as follows

- Rtt : RoundTrip Time (time spent in all the traced ops per iteration)
- Pwr : GPU Power
- Tmp : GPU Temperature
- Utl : GPU Utilization
- Clk : GPU Clock
- DRtt: get_batch latency
- Etpt: Estimated throughput. This is derived from actual computed throughput dividied by Rtt. Since we do not collect timing for backward pass, the value is further divided by three to come up with estimated throughput. 
<hr>

### Command Line activation
To start using the StragglerDetector, need to pass the following argument `--log-straggler`. It optionally also takes two additional parameters. Default disabled
- `--disable-straggler-on-startup` - whether to keept the StragglerDetector disabled on startup and enable later. Default enabled
- `--straggler-ctrlr-port` - The StragglerDetector can toggle between on/off just by sending `curl Rank0Host:port`. Default port is 65535. Every time it is turned 
- `--straggler-minmax-count` - If set to > 1 (N), it prints N Top and Bottom Etpt/Rank pairs as shown below
```
 0: INFO:megatron.core.utils:^^^^ Bottom 4 Ranks with lowest  Etpt(TF): 296.02/0, 296.17/2, 296.23/1, 296.23/4,
 0: INFO:megatron.core.utils:^^^^ Top    4 Ranks with highest Etpt(TF): 297.28/15, 297.28/11, 297.32/12, 297.32/8,
```
<hr>

### Programming the StragglerDetector
The StragglerDetector class supports context, and its implementation is a Singleton.
- Initialization 

```
 # initialization, where StragglerDetector will be used
   from megatron.core.utils import StragglerDetector
   stimer = StragglerDetector()
```

- One time for each rank

```
 # one time before the training loop starts
 stimer.configure(world, rank, enabled=True, port=65545)

 # Arguments to configure 
 #     world   : World Size
 #     rank    : The rank of this trainer
 #     mmcnt   : (Optional) Number of ranks to print for showing Min/Max Etpt
 #     amp     : (Optional) Set to 3.0 if we only use timers in fwd pass
 #     port    : (Optional) control port, useful only for rank-0
 #     prefill : (Optional) howmany Events to pre-populate
 #     enabled : (Optional) whether or not collection is enabled on startup
```

- To Capture time

```
 # whereever timing need to be captured
 with stimer:
     do_operation()

 # special case for get_batch
 with stimer(bdata=True):
      input,... = get_batch(iterator,...)
```

- Logging in main training loop

```
 # logging
   total_flops = 0.0
   iteration = 0
   # inside the main training loop
   while training:
        iteration += 1
        do_step()
        total_flops += get_computed_flops()
        if iteration % log_interval:
           stimer.report(total_flops, log_interval)
           total_flops = 0.0
```


================================================
FILE: megatron/core/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import megatron.core.tensor_parallel
import megatron.core.utils
from megatron.core import parallel_state
from megatron.core.distributed import DistributedDataParallel
from megatron.core.inference_params import InferenceParams
from megatron.core.model_parallel_config import ModelParallelConfig
from megatron.core.package_info import (
    __contact_emails__,
    __contact_names__,
    __description__,
    __download_url__,
    __homepage__,
    __keywords__,
    __license__,
    __package_name__,
    __repository_url__,
    __shortversion__,
    __version__,
)
from megatron.core.timers import Timers
from megatron.core.utils import is_torch_min_version

# Alias parallel_state as mpu, its legacy name
mpu = parallel_state

__all__ = [
    "parallel_state",
    "tensor_parallel",
    "utils",
    "DistributedDataParallel",
    "InferenceParams",
    "ModelParallelConfig",
    "Timers",
    "__contact_emails__",
    "__contact_names__",
    "__description__",
    "__download_url__",
    "__homepage__",
    "__keywords__",
    "__license__",
    "__package_name__",
    "__repository_url__",
    "__shortversion__",
    "__version__",
]

from .safe_globals import register_safe_globals

if is_torch_min_version("2.6a0"):
    register_safe_globals()


================================================
FILE: megatron/core/_rank_utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Low-level rank utilities with minimal dependencies to avoid circular imports."""

import logging
import os
from typing import Any

import torch


def safe_get_rank() -> int:
    """Safely get the rank of the current process.

    Returns the rank from torch.distributed if initialized, otherwise falls back
    to the RANK environment variable, defaulting to 0.

    Returns:
        int: The rank of the current process.
    """
    if torch.distributed.is_initialized():
        return torch.distributed.get_rank()

    # If torch.distributed is not initialized, try to read environment variables.
    try:
        return int(os.environ.get("RANK", 0))
    except (ValueError, TypeError):
        return 0


def log_single_rank(logger: logging.Logger, *args: Any, rank: int = 0, **kwargs: Any) -> None:
    """Log a message only on a single rank.

    If torch distributed is initialized, write log on only one rank.

    Args:
        logger: The logger to write the logs.
        *args: All logging.Logger.log positional arguments.
        rank: The rank to write on. Defaults to 0.
        **kwargs: All logging.Logger.log keyword arguments.
    """
    if safe_get_rank() == rank:
        logger.log(*args, **kwargs)


================================================
FILE: megatron/core/activations.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import torch
import torch.nn.functional as F

from megatron.core.jit import jit_fuser


@jit_fuser
def squared_relu(x: torch.Tensor) -> torch.Tensor:
    """Squared ReLU activation"""
    return torch.pow(F.relu(x), 2)


@jit_fuser
def quick_gelu(x: torch.Tensor) -> torch.Tensor:
    """Quick GELU activation"""
    return x * torch.sigmoid(1.702 * x)


@jit_fuser
def fast_gelu(x: torch.Tensor) -> torch.Tensor:
    """Fast GELU activation"""
    return 0.5 * x * (1.0 + torch.tanh(x * 0.7978845608 * (1.0 + 0.044715 * x * x)))


================================================
FILE: megatron/core/config.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

ENABLE_EXPERIMENTAL = False


def set_experimental_flag(flag: bool):
    """Set the experimental flag to the given value."""
    global ENABLE_EXPERIMENTAL
    ENABLE_EXPERIMENTAL = flag


def is_experimental_enabled():
    """Return the experimental flag."""
    return ENABLE_EXPERIMENTAL


================================================
FILE: megatron/core/config_logger.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses
import json
import os

import torch
import torch.nn as nn

from megatron.core import parallel_state


def get_config_logger_path(config):
    """Get the path to the config logger directory."""
    return getattr(config, 'config_logger_dir', '')


def has_config_logger_enabled(config):
    """Check if config logger is enabled."""
    return get_config_logger_path(config) != ''


# For each prefix, holds a counter and increases it every time we dump with this
# prefix.
__config_logger_path_counts = {}


def get_path_count(path):
    """
    keeps tracks of number of times we've seen the input `path` and return count-1
    """
    global __config_logger_path_counts
    if not path in __config_logger_path_counts:
        __config_logger_path_counts[path] = 0
    count = __config_logger_path_counts[path]
    __config_logger_path_counts[path] += 1
    return count


def get_path_with_count(path):
    """
    calls get_path_count and appends returned value to path
    """
    return f'{path}.iter{get_path_count(path)}'


class JSONEncoderWithMcoreTypes(json.JSONEncoder):
    """
    Custom JSON encoder that serializes according to types in mcore.
    """

    def default(self, o):
        if type(o).__name__ in ['function', 'ProcessGroup']:
            return str(o)
        if type(o).__name__ in ['dict', 'OrderedDict']:
            return {k: self.default(v) for k, v in o.items()}
        if type(o).__name__ in ['list', 'ModuleList']:
            return [self.default(val) for val in o]
        if type(o).__name__ == 'UniqueDescriptor':
            return {
                attr: self.default(getattr(o, attr))
                for attr in filter(lambda x: not x.startswith('__'), dir(o))
            }
        if type(o) is torch.dtype:
            return str(o)
        # if it's a Float16Module, add "Float16Module" to the output dict
        if type(o).__name__ == 'Float16Module':
            return {'Float16Module': {'module': self.default(o.module)}}
        # If it's a nn.Module subchild, either print its children or itself if leaf.
        if issubclass(type(o), nn.Module):
            if len(getattr(o, '_modules', {})) > 0:
                return {key: self.default(val) for key, val in o._modules.items()}
            else:
                return str(o)
        if type(o).__name__ in ['ABCMeta', 'type', 'AttnMaskType']:
            return str(o)
        if dataclasses.is_dataclass(o) or type(o).__name__ in ['ModuleSpec', 'TransformerConfig']:
            return dataclasses.asdict(o)
        try:
            return super().default(o)
        except:
            return str(o)


def log_config_to_disk(config, dict_data, prefix='', rank_str=''):
    """
    Encodes the input dict (dict_data) using the JSONEncoderWithMcoreTypes
    and dumps to disk, as specified via path
    """
    path = get_config_logger_path(config)
    assert path is not None, 'Expected config_logger_dir to be non-empty in config.'

    if not os.path.exists(path):
        os.makedirs(path, exist_ok=True)

    if 'self' in dict_data:
        if prefix == '':
            prefix = type(dict_data['self']).__name__
        del dict_data['self']

    # the caller of the funcion can decide the most informative string
    # rank_str defaults to '0_0_0_0_0' format (tp_dp_cp_pp_ep ranks)
    if rank_str == '':
        rank_str = parallel_state.get_all_ranks()

    path = get_path_with_count(os.path.join(path, f'{prefix}.rank_{rank_str}'))
    if type(dict_data).__name__ == 'OrderedDict':
        torch.save(dict_data, f'{path}.pth')
    else:
        with open(f'{path}.json', 'w') as fp:
            json.dump(dict_data, fp, cls=JSONEncoderWithMcoreTypes)


__all__ = ['has_config_logger_enabled', 'log_config_to_disk']


================================================
FILE: megatron/core/datasets/Makefile
================================================
CXXFLAGS += -O3 -Wall -shared -std=c++17 -fPIC -fdiagnostics-color
CPPFLAGS += $(shell python3 -m pybind11 --includes)

LIBNAME = helpers_cpp
LIBEXT = $(shell python3-config --extension-suffix)

OUT = $(LIBNAME)$(LIBEXT)
SRC = helpers.cpp

default: $(OUT)

$(OUT): $(SRC)
	$(CXX) $(CXXFLAGS) $(CPPFLAGS) $< -o $@


================================================
FILE: megatron/core/datasets/__init__.py
================================================


================================================
FILE: megatron/core/datasets/bert_dataset.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from dataclasses import dataclass
from typing import Dict, List, Optional, Union

import numpy

from megatron.core.datasets.indexed_dataset import IndexedDataset
from megatron.core.datasets.masked_dataset import (
    MaskedWordPieceDataset,
    MaskedWordPieceDatasetConfig,
)
from megatron.core.datasets.utils import Split


@dataclass
class BERTMaskedWordPieceDatasetConfig(MaskedWordPieceDatasetConfig):
    """Configuration object for Megatron Core BERT WordPiece datasets"""

    classification_head: bool = None
    """Option to perform the next sequence prediction during sampling"""

    def __post_init__(self) -> None:
        """Do asserts and set fields post init"""
        super().__post_init__()

        assert self.classification_head is not None


class BERTMaskedWordPieceDataset(MaskedWordPieceDataset):
    """The BERT dataset that assumes WordPiece tokenization

    Args:
        indexed_dataset (IndexedDataset): The IndexedDataset around which
            to build the MegatronDataset
        dataset_path (str): The real path on disk to the dataset, for bookkeeping
        indexed_indices (numpy.ndarray): The set of the documents indices to expose
        num_samples (Optional[int]): The number of samples to draw from the indexed dataset.
            When None, build as many samples as correspond to one epoch.
        index_split (Split): The indexed_indices Split
        config (BERTMaskedWordPieceDatasetConfig): The config
    """

    def __init__(
        self,
        indexed_dataset: IndexedDataset,
        dataset_path: str,
        indexed_indices: numpy.ndarray,
        num_samples: Optional[int],
        index_split: Split,
        config: BERTMaskedWordPieceDatasetConfig,
    ) -> None:
        super().__init__(
            indexed_dataset, dataset_path, indexed_indices, num_samples, index_split, config
        )

        self.token_lookup = list(self.config.tokenizer.inv_vocab.keys())
        # Account for the single <cls> and two <sep> token ids
        self.sample_index = self._build_sample_index(
            self.config.sequence_length - 3, 2 if self.config.classification_head else 1
        )

    @staticmethod
    def _key_config_attributes() -> List[str]:
        """Inherited method implementation

        Returns:
            List[str]: The key config attributes
        """
        return super(
            BERTMaskedWordPieceDataset, BERTMaskedWordPieceDataset
        )._key_config_attributes() + ["classification_head"]

    def __getitem__(self, idx: int) -> Dict[str, Union[int, numpy.ndarray]]:
        """Abstract method implementation

        Args:
            idx (int): The index into the dataset

        Returns:
            Dict[str, Union[int, numpy.ndarray]]: The sample information wrapped in a dictionary
        """

        idx_beg, idx_end, target_sequence_length = self.sample_index[idx]
        sample = [self.dataset[i] for i in range(idx_beg, idx_end)]
        numpy_random_state = numpy.random.RandomState(seed=(self.config.random_seed + idx) % 2**32)

        assert target_sequence_length <= self.config.sequence_length

        # Split the sample into contiguous subsegments A and B
        pivot = len(sample)
        is_next_random = False
        if self.config.classification_head:
            assert len(sample) > 1, "the sample must contain at least two sentences"
            pivot = 1
            if len(sample) >= 3:
                pivot = numpy_random_state.randint(low=1, high=len(sample))
            is_next_random = numpy_random_state.random() < 0.5
        split_A = []
        for sample_a in sample[:pivot]:
            split_A.extend(sample_a)
        split_B = []
        for sample_b in sample[pivot:]:
            split_B.extend(sample_b)
        if is_next_random:
            split_A, split_B = split_B, split_A

        # Trim the subsegments from either end to a desired joint length
        length_A = len(split_A)
        length_B = len(split_B)
        if length_A + length_B <= target_sequence_length:
            truncated = False
        else:
            while length_A + length_B > target_sequence_length:
                split = split_A if length_A > length_B else split_B
                if numpy_random_state.random() < 0.5:
                    del split[0]
                else:
                    del split[-1]
                length_A = len(split_A)
                length_B = len(split_B)
            truncated = True

        # Merge the subsegments and create the token assignment labels
        tokens = [self.config.tokenizer.cls, *split_A, self.config.tokenizer.sep]
        assignments = [0 for _ in range(1 + len(split_A) + 1)]
        if split_B:
            tokens += [*split_B, self.config.tokenizer.sep]
            assignments += [1 for _ in range(len(split_B) + 1)]

        # Masking
        tokens, masked_positions, masked_labels, _, _ = self._create_masked_lm_predictions(
            tokens, target_sequence_length, numpy_random_state
        )

        # Pad the sequences and convert to NumPy
        length_toks = len(tokens)
        length_pads = self.config.sequence_length - length_toks
        assert length_pads >= 0

        tokens = numpy.array(tokens, dtype=numpy.int64)
        tokens = numpy.pad(tokens, (0, length_pads), constant_values=self._pad_token_id)

        assignments = numpy.array(assignments, dtype=numpy.int64)
        assignments = numpy.pad(assignments, (0, length_pads), constant_values=self._pad_token_id)

        # Get the padding mask
        mask_pads = numpy.ones(self.config.sequence_length, dtype=numpy.int64)
        mask_pads[tokens == self._pad_token_id] = self._pad_token_id

        # Mask the labels
        labels = numpy.zeros(self.config.sequence_length, dtype=numpy.int64) - 1
        labels[masked_positions] = masked_labels

        # Get the loss mask
        mask_loss = numpy.zeros(self.config.sequence_length, dtype=numpy.int64)
        mask_loss[masked_positions] = 1

        # For padded sequences, ensure the embedding layer can map the token ID
        tokens[tokens == self._pad_token_id] = 0
        labels[labels == self._pad_token_id] = 0

        return {
            "text": tokens,
            "types": assignments,
            "labels": labels,
            "is_random": int(is_next_random),
            "padding_mask": mask_pads,
            "loss_mask": mask_loss,
            "truncated": int(truncated),
        }

    def _get_token_mask(self, numpy_random_state: numpy.random.RandomState) -> Optional[int]:
        """Abstract method implementation

        80% of the time, replace the token id with mask token id. 10% of the time, replace token id
        with a random token id from the vocabulary. 10% of the time, do nothing.

        Args:
            numpy_random_state (RandomState): The NumPy random state

        Returns:
            Optional[int]: The replacement token id or None
        """
        if numpy_random_state.random() < 0.8:
            return self.config.tokenizer.mask
        else:
            if numpy_random_state.random() >= 0.5:
                return self.token_lookup[numpy_random_state.randint(0, len(self.token_lookup))]
        return None


================================================
FILE: megatron/core/datasets/blended_dataset.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

import hashlib
import json
import logging
import os
import time
from collections import OrderedDict
from typing import Dict, List, Optional, Tuple, Union

import numpy
import torch

from megatron.core.datasets.blended_megatron_dataset_config import BlendedMegatronDatasetConfig
from megatron.core.datasets.megatron_dataset import MegatronDataset
from megatron.core.datasets.utils import normalize
from megatron.core.utils import log_single_rank

logger = logging.getLogger(__name__)

_VERBOSE = False


class BlendedDataset(torch.utils.data.Dataset):
    """Conjugating class for a set of MegatronDataset instances

    Args:
        datasets (List[MegatronDataset]): The MegatronDataset instances to blend

        weights (List[Union[int, float]]): The weights that determine the dataset blend ratios

        size (Optional[int]): The number of samples to draw from the blend. If None, for each
            dataset index idx draw exactly weights[idx] samples from datasets[idx].

        config (BlendedMegatronDatasetConfig): The config

    Raises:
        RuntimeError: When the dataset has fewer or more samples than 'size' post-initialization
    """

    def __init__(
        self,
        datasets: List[MegatronDataset],
        weights: List[Union[int, float]],
        size: Optional[int],
        config: BlendedMegatronDatasetConfig,
    ) -> None:
        assert len(datasets) == len(weights)
        assert len(datasets) < 32767
        assert all(map(lambda _: type(_) == type(datasets[0]), datasets))
        assert all(map(lambda _: _.index_split == datasets[0].index_split, datasets))
        assert all(map(lambda _: _ > 0, weights))
        assert all(map(lambda _: type(_) == type(weights[0]), weights))
        if size is None and isinstance(weights[0], float):
            assert all(map(lambda _: _ == int(_), weights))

        # Alert user to unnecessary blending
        if len(datasets) == 1:
            log_single_rank(
                logger, logging.WARNING, f"Building a BlendedDataset for a single MegatronDataset"
            )

        if size is not None:
            weights = normalize(weights)

        self.datasets = datasets
        self.split = self.datasets[0].index_split
        self.weights = weights
        self.size = size
        self.config = config

        unique_identifiers = OrderedDict()
        unique_identifiers["class"] = type(self).__name__
        unique_identifiers["datasets"] = [dataset.unique_identifiers for dataset in self.datasets]
        unique_identifiers["split"] = self.split.name
        unique_identifiers["weights"] = self.weights
        unique_identifiers["size"] = self.size

        self.unique_description = json.dumps(
            unique_identifiers, indent=4, default=lambda obj: obj.unique_identifiers
        )
        self.unique_description_hash = hashlib.md5(
            self.unique_description.encode("utf-8"), usedforsecurity=False
        ).hexdigest()

        self.dataset_index, self.dataset_sample_index = self._build_indices()

    def __len__(self) -> int:
        if self.config.defer_npy_index_mmap:
            size = sum(self.weights)
            if self.size is not None:
                size = self.size
            return size

        return self.dataset_index.shape[0]

    def __getitem__(self, idx: int) -> Dict[str, Union[int, numpy.ndarray]]:
        if self.dataset_index is None:
            self.dataset_index = numpy.load(
                self.path_to_dataset_index, allow_pickle=True, mmap_mode="r"
            )
            self.dataset_sample_index = numpy.load(
                self.path_to_dataset_sample_index, allow_pickle=True, mmap_mode="r"
            )

        dataset_id = self.dataset_index[idx]
        dataset_sample_id = self.dataset_sample_index[idx]
        return {"dataset_id": dataset_id, **self.datasets[dataset_id][dataset_sample_id]}

    def _build_indices(self) -> Tuple[numpy.ndarray, numpy.ndarray]:
        """Build and optionally cache the dataset index and the dataset sample index

        The dataset index is a 1-D mapping which determines the dataset to query. The dataset
        sample index is a 1-D mapping which determines the sample to request from the queried
        dataset.

        Returns:
            Tuple[numpy.ndarray, numpy.ndarray]: The dataset index and the dataset sample index
        """
        if self.config.defer_npy_index_mmap:
            # NOTE(asolergi-nv): Direct path to lazy memmap the indexes
            get_path_to = lambda suffix: os.path.join(
                self.config.path_to_cache,
                f"{self.unique_description_hash}-{type(self).__name__}-{self.split.name}-{suffix}",
            )
            self.path_to_dataset_index = get_path_to("dataset_index.npy")
            self.path_to_dataset_sample_index = get_path_to("dataset_sample_index.npy")
            return None, None

        path_to_cache = self.config.path_to_cache

        if path_to_cache:
            get_path_to = lambda suffix: os.path.join(
                path_to_cache,
                f"{self.unique_description_hash}-{type(self).__name__}-{self.split.name}-{suffix}",
            )
            path_to_description = get_path_to("description.txt")
            path_to_dataset_index = get_path_to("dataset_index.npy")
            path_to_dataset_sample_index = get_path_to("dataset_sample_index.npy")
            cache_hit = (
                True
                if self.config.fast_cache_load
                else all(
                    map(
                        os.path.isfile,
                        [path_to_description, path_to_dataset_index, path_to_dataset_sample_index],
                    )
                )
            )
        else:
            cache_hit = False

        if not path_to_cache or (not cache_hit and torch.distributed.get_rank() == 0):
            log_single_rank(
                logger, logging.INFO, f"Build and save the {type(self).__name__} indices"
            )

            # Build the dataset and dataset sample indexes
            log_single_rank(
                logger, logging.INFO, f"\tBuild and save the dataset and dataset sample indexes"
            )
            t_beg = time.time()
            from megatron.core.datasets import helpers

            if self.size is not None:
                dataset_index = numpy.zeros(self.size, dtype=numpy.int16)
                dataset_sample_index = numpy.zeros(self.size, dtype=numpy.int64)
                helpers.build_blending_indices(
                    dataset_index,
                    dataset_sample_index,
                    self.weights,
                    len(self.datasets),
                    self.size,
                    _VERBOSE,
                )
            else:
                size = sum(self.weights)
                dataset_index = numpy.zeros(size, dtype=numpy.int16)
                dataset_sample_index = numpy.zeros(size, dtype=numpy.int64)
                helpers.build_exhaustive_blending_indices(
                    dataset_index, dataset_sample_index, self.weights, len(self.datasets)
                )

            dataset_indices, dataset_sizes = numpy.unique(dataset_index, return_counts=True)
            for i, (_index, _size) in enumerate(zip(dataset_indices, dataset_sizes)):
                if len(self.datasets[_index]) < _size:
                    raise IndexError(
                        f"The {self.split.name} blend oversamples the contributing datasets and, "
                        f"for example, requests {_size} samples from "
                        f"{type(self.datasets[_index]).__name__} number {i} in excess of its size "
                        f"{len(self.datasets[_index])}. The current value of the config attribute "
                        f"mid_level_dataset_surplus may be increased, e.g. two- or ten-fold, from "
                        f"its current value ({self.config.mid_level_dataset_surplus}) to ensure a "
                        f"sufficient mid-level dataset sample margin from which to draw."
                    )

            if path_to_cache:
                os.makedirs(path_to_cache, exist_ok=True)
                # Write the description
                with open(path_to_description, "wt") as writer:
                    writer.write(self.unique_description)
                # Save the indexes
                numpy.save(path_to_dataset_index, dataset_index, allow_pickle=True)
                numpy.save(path_to_dataset_sample_index, dataset_sample_index, allow_pickle=True)
            else:
                log_single_rank(
                    logger,
                    logging.WARNING,
                    f"Cannot save the {type(self).__name__} indexes because path_to_cache is None",
                )

            t_end = time.time()
            log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")

            return dataset_index, dataset_sample_index

        log_single_rank(logger, logging.INFO, f"Load the {type(self).__name__} indices")

        log_single_rank(
            logger, logging.INFO, f"\tLoad the dataset index from {path_to_dataset_index}"
        )
        t_beg = time.time()
        dataset_index = numpy.load(path_to_dataset_index, allow_pickle=True, mmap_mode="r")
        t_end = time.time()
        log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")

        log_single_rank(
            logger,
            logging.INFO,
            f"\tLoad the dataset sample index from {path_to_dataset_sample_index}",
        )
        t_beg = time.time()
        dataset_sample_index = numpy.load(
            path_to_dataset_sample_index, allow_pickle=True, mmap_mode="r"
        )
        t_end = time.time()
        log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")

        return dataset_index, dataset_sample_index


================================================
FILE: megatron/core/datasets/blended_megatron_dataset_builder.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import logging
import math
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Callable, Iterable, List, Optional, Type, Union

import numpy
import torch

from megatron.core.datasets.blended_dataset import BlendedDataset
from megatron.core.datasets.blended_megatron_dataset_config import BlendedMegatronDatasetConfig
from megatron.core.datasets.gpt_dataset import GPTDatasetConfig
from megatron.core.datasets.megatron_dataset import LowLevelDataset, MegatronDataset
from megatron.core.datasets.utils import Split, normalize
from megatron.core.utils import log_single_rank

logger = logging.getLogger(__name__)

MidLevelDataset = MegatronDataset

TopLevelDataset = Union[BlendedDataset, MidLevelDataset]

DistributedDataset = Union[
    TopLevelDataset, MidLevelDataset, LowLevelDataset, torch.utils.data.Dataset
]


class BlendedMegatronDatasetBuilder(object):
    """Builder class for the BlendedDataset and MegatronDataset classes

    Args:
        cls (Type[MegatronDataset]): The class to instantiate, must inherit from MegatronDataset

        sizes (List[Optional[int]]): The minimum total number of samples to draw, or None, per split

        is_built_on_rank (Callable): A callable which returns True if the dataset should be built on
            the current rank and False otherwise. It should be Megatron Core parallelism aware i.e.
            global rank, local group rank, and virtual rank may inform its return value. Should
            return true for exactly one process on global rank 0.

        config (BlendedMegatronDatasetConfig): The config object which informs dataset creation
    """

    def __init__(
        self,
        cls: Type[MidLevelDataset],
        sizes: List[int],
        is_built_on_rank: Callable,
        config: BlendedMegatronDatasetConfig,
    ):
        self.cls = cls
        self.sizes = sizes
        self.is_built_on_rank = is_built_on_rank
        self.config = config

        log_single_rank(
            logger,
            logging.INFO,
            f"Building {cls.__name__} splits with sizes={self.sizes} and config={self.config}",
        )

        if not self.config.mock:
            for split in Split:
                size_is_none = self.sizes[split.value] is None
                if self.config.blend_per_split is None:
                    weights_are_none = self.config.blend[1] is None
                else:
                    if self.config.blend_per_split[split.value] is None:
                        continue
                    weights_are_none = self.config.blend_per_split[split.value][1] is None
                if size_is_none:
                    assert weights_are_none, f"""size_is_none => weights_are_none fails 
                    for {split.name} split
                    This can occur with multiple validation sets if datasets have weights"""

    def build(self) -> List[Optional[TopLevelDataset]]:
        """Build all dataset splits according to the provided blend(s)

        This method is distributed-aware and must be called on all ranks.

        The dataset splits returned can vary according to the config. Supply config.blend and
        config.split to build BlendedDataset and/or MegatronDataset splits from the same
        distribution. Supply config.blend_per_split to build BlendedDataset and/or MegatronDataset
        splits from separate distributions. In either case, for each split, handle the following
        cases:

        (1) The split is None
            - do nothing

        (2) The split has one contributing dataset, and...

            (a) 'size' is not None
                - Build a mid-level dataset with low-level dataset sampling in proportion to the
                size

            (b) 'size' is None
                - Build mid-level datasets with no excess low-level dataset sampling

        (3) The split has multiple contributing datasets, and...

            (a) 'weights' is not None and 'size' is not None
                - Build mid-level datasets with low-level dataset sampling in proportion to their
                weights and the size
                - Build a top-level dataset of length marginally greater than 'size' with mid-level
                dataset sampling in proportion to their weights and the size

            (b) 'weights' is not None and 'size' is None
                - Error

            (c) 'weights' is None and 'size' is not None
                - Build mid-level datasets with no excess low-level dataset sampling
                - Build a top-level dataset of length 'size' (capped at the sum of the mid-level
                dataset lengths) with mid-level dataset sampling in proportion to their lengths
                and the size

            (d) 'weights' is None and 'size' is None
                - Build mid-level datasets with no excess low-level dataset sampling
                - Build a top-level dataset with no excess mid-level dataset sampling

        Returns:
            List[Optional[TopLevelDataset]]: A list containing a dataset instance (or None) per
                split
        """
        datasets = self._build_blended_dataset_splits()

        for dataset in datasets:
            if dataset is not None and len(dataset) > 0:
                if isinstance(dataset, BlendedDataset):
                    assert dataset.size is None or dataset.size == len(dataset)
                elif isinstance(dataset, MegatronDataset):
                    assert dataset.num_samples is None or dataset.num_samples <= len(dataset)

        return datasets

    def _build_blended_dataset_splits(self) -> List[Optional[TopLevelDataset]]:
        """Build all dataset splits according to the provided blend(s)

        See the BlendedMegatronDatasetBuilder.build alias for more information.

        Returns:
            List[Optional[TopLevelDataset]]: A list containing a dataset instance (or None) per
                split
        """
        ##
        # Return fake "mock" datasets
        ##
        if self.config.mock:
            split = self.config.split_matrix
            try:
                return self._build_megatron_dataset_splits(None, split, self.sizes)
            except Exception as error:
                raise Exception(
                    f"{self.cls.__name__} failed to build as a mock data generator"
                ) from error

        ##
        # All splits come from the same distribution
        ##
        elif self.config.blend:
            prefixes, weights = self.config.blend
            if weights is not None:
                weights = normalize(weights)

            split = self.config.split_matrix

            # Blend consists of a single prefix
            if len(prefixes) == 1 and weights is None:
                return self._build_megatron_dataset_splits(prefixes[0], split, self.sizes)

            # Build the mid-level datasets
            if weights is None:
                # Build only one "epoch"
                sizes_per_dataset_buffer = [[None for split in Split] for prefix in prefixes]
            else:
                # The number of samples we plan to use per dataset
                sizes_per_dataset_target = _get_size_per_split_per_dataset(weights, self.sizes)
                # The number of samples we plan to build per dataset
                sizes_per_dataset_buffer = _get_size_per_split_per_dataset(
                    weights, self.sizes, surplus=self.config.mid_level_dataset_surplus
                )

            # Build each dataset in parallel
            megatron_datasets = self._build_megatron_datasets_parallel(
                prefixes, split, sizes_per_dataset_buffer
            )

            # Build the top-level datasets
            blended_datasets = [None] * len(Split)
            for i in range(len(Split)):
                if split[i] is not None:
                    weights_i = weights
                    if weights_i is not None and self.sizes[i] is not None:
                        # Blend according to client-specified weights and client-specified size
                        size_per_dataset = list(zip(*sizes_per_dataset_target))[i]
                        size_i = sum(size_per_dataset)
                    elif weights_i is None:
                        # Blend according to dataset sizes as-is and (maybe) client-specified size
                        try:
                            weights_i = [
                                len(megatron_dataset) for megatron_dataset in megatron_datasets[i]
                            ]
                        except TypeError:
                            weights_i = [0 for _ in prefixes]
                        if self.sizes[i] is not None:
                            size_i = min(self.sizes[i], sum(weights_i))
                        else:
                            # Build exhaustive indices
                            size_i = None
                    else:
                        raise ValueError(
                            "Using client-specified weights requires client-specified size"
                        )
                    blended_datasets[i] = self.build_generic_dataset(
                        BlendedDataset,
                        self.is_built_on_rank,
                        (
                            False
                            if (
                                isinstance(self.config, GPTDatasetConfig)
                                and self.config.fast_cache_load
                            )
                            else True
                        ),  # synchronize_ranks, default behavior to build on rank-0 first. Set to False if we are using --dataloader-fast-cache-load # pylint: disable=C0301
                        megatron_datasets[i],
                        weights_i,
                        size_i,
                        self.config,
                    )

            return blended_datasets

        ##
        # Each split comes from a separate distribution
        ##
        else:
            blended_datasets = [None] * len(Split)
            for i in range(len(Split)):
                split_spoof = [None] * len(Split)
                split_spoof[i] = (0.0, 1.0)
                sizes_spoof = [0] * len(Split)
                sizes_spoof[i] = self.sizes[i]

                # Blend is provided for the split
                blend = self.config.blend_per_split[i]
                if blend is not None:
                    prefixes, weights = blend
                    if weights is not None:
                        weights = normalize(weights)

                    # Blend consists of a sigle prefix
                    if len(prefixes) == 1:
                        blended_datasets[i] = self._build_megatron_dataset_splits(
                            prefixes[0], split_spoof, sizes_spoof
                        )[i]
                        continue
                    elif self.config.multiple_validation_sets and i == Split.valid.value:
                        # handle multiple validation sets
                        validation_datasets = []
                        if self.config.full_validation:
                            # verify that size is None, which causes a single epoch dataset
                            # to be built
                            assert sizes_spoof[i] is None
                        for prefix in prefixes:
                            ds = self._build_megatron_dataset_splits(
                                prefix, split_spoof, sizes_spoof
                            )[i]
                            validation_datasets.append(ds)
                        blended_datasets[i] = validation_datasets
                        continue

                    # Build mid-level datasets
                    if weights is None:
                        sizes_per_dataset_buffer = [
                            [None for split in Split] for prefix in prefixes
                        ]
                    else:
                        # The number of samples we plan to use per dataset
                        sizes_per_dataset_target = _get_size_per_split_per_dataset(
                            weights, sizes_spoof
                        )
                        # The number of samples we plan to build per dataset
                        sizes_per_dataset_buffer = _get_size_per_split_per_dataset(
                            weights, sizes_spoof, surplus=self.config.mid_level_dataset_surplus
                        )

                    # Build each dataset in parallel
                    megatron_datasets = self._build_megatron_datasets_parallel(
                        prefixes, split_spoof, sizes_per_dataset_buffer
                    )[i]

                    # Build top-level dataset
                    if weights is not None and self.sizes[i] is not None:
                        # Blend according to client-specified weights and client-specified size
                        size_per_dataset = list(zip(*sizes_per_dataset_target))[i]
                        size = sum(size_per_dataset)
                    elif weights is None:
                        # Blend according to dataset sizes as-is and (maybe) client-specified size
                        try:
                            weights = [
                                len(megatron_dataset) for megatron_dataset in megatron_datasets
                            ]
                        except TypeError:
                            weights = [0 for _ in prefixes]
                        if self.sizes[i] is not None:
                            size = min(self.sizes[i], sum(weights))
                        else:
                            # Build exhaustive indices
                            size = None
                    else:
                        raise RuntimeError
                    blended_datasets[i] = self.build_generic_dataset(
                        BlendedDataset,
                        self.is_built_on_rank,
                        (
                            False
                            if (
                                isinstance(self.config, GPTDatasetConfig)
                                and self.config.fast_cache_load
                            )
                            else True
                        ),  # synchronize_ranks, default behavior to build on rank-0 first. Set to False if we are using --dataloader-fast-cache-load # pylint: disable=C0301
                        megatron_datasets,
                        weights,
                        size,
                        self.config,
                    )

            return blended_datasets

    def _build_megatron_datasets_parallel(
        self, prefixes: List[str], split: List[float], sizes_per_dataset: List[List[int]]
    ) -> List[List[Optional[MegatronDataset]]]:
        """Build the megatron datasets for a list of prefixes in parallel

        Args:
            prefixes (List[str]): The list of prefix strings

            split (List[float]): The dataset split ratios (must sum to 1.00)

            sizes_per_dataset (List[List[int]]): The number of samples to request
            per MegatronDataset per spilt

        Returns:
            List[List[Optional[MegatronDataset]]]: For each split, have a list of
            MegatronDataset per prefix
        """

        # Helper function to wrap the threading logic
        def _threading_helper(
            megatron_datasets: List[List[Optional[MegatronDataset]]],
            num_workers: int,
            prefixes: List[str],
            split: List[float],
            sizes_per_dataset: List[List[int]],
        ) -> None:
            with ThreadPoolExecutor(max_workers=num_workers) as executor:
                all_futures = []
                for i in range(len(prefixes)):
                    all_futures.append(
                        executor.submit(
                            self._build_megatron_dataset_splits,
                            prefixes[i],
                            split,
                            sizes_per_dataset[i],
                            False,  # synchronize_ranks, barrier is called in this function
                        )
                    )
                for future in all_futures:
                    try:
                        megatron_datasets_split = future.result()
                        for j in range(len(megatron_datasets_split)):
                            megatron_datasets[j].append(megatron_datasets_split[j])
                    except Exception as err:
                        raise err

        megatron_datasets = [[] for _ in range(len(Split))]
        num_dataset_builder_threads = self.config.num_dataset_builder_threads

        # NOTE(asolergi-nv): Skip rank-0 first dataset building if we are using --dataloader-fast-cache-load # pylint: disable=C0301
        if torch.distributed.is_initialized() and not (
            isinstance(self.config, GPTDatasetConfig) and self.config.fast_cache_load
        ):
            rank = torch.distributed.get_rank()
            # First, build on rank 0
            if rank == 0:
                num_workers = num_dataset_builder_threads
                if num_workers > 1:
                    # since only rank 0 is running, scale up the thread count
                    # but not too much to avoid overloading storage on miss path.
                    # if user set num_dataset_builder_threads to 1,
                    # i.e. meant for serial build, do not scale up.
                    num_workers *= min(2, max(1, torch.cuda.device_count()))
                _threading_helper(
                    megatron_datasets, num_workers, prefixes, split, sizes_per_dataset
                )

            torch.distributed.barrier()

            # Then, build on other ranks; guaranteed to be data_cache hit
            if rank != 0:
                _threading_helper(
                    megatron_datasets,
                    num_dataset_builder_threads,
                    prefixes,
                    split,
                    sizes_per_dataset,
                )
        else:
            _threading_helper(
                megatron_datasets, num_dataset_builder_threads, prefixes, split, sizes_per_dataset
            )

        return megatron_datasets

    def _build_megatron_dataset_splits(
        self,
        dataset_path: Optional[str],
        split: List[float],
        sizes: List[int],
        synchronize_ranks: bool = True,
    ) -> List[Optional[MidLevelDataset]]:
        """Build each MidLevelDataset split from a single LowLevelDataset

        Args:
            dataset_path (Optional[str]): The path on disk which defines the underlying
                LowLevelDataset, or None for mock dataset classes

            split (List[Tuple[float, float]]): The dataset split matrix

            sizes (List[int]): The number of total samples to draw from each split

            synchronize_ranks (bool): Whether to call barrier for rank-0 / barrier / other-ranks
                behavior. Set to False when we enforce this behavior at higher level.

        Returns:
            List[Optional[MidLevelDataset]]: The MidLevelDataset (or None) per split
        """
        synchronize_ranks = (
            False
            if (
                synchronize_ranks
                and (isinstance(self.cls, GPTDatasetConfig) and self.config.fast_cache_load)
            )
            else synchronize_ranks
        )  # NOTE(asolergi-nv): Set synchronize_ranks to False if we are using --dataloader-fast-cache-load # pylint: disable=C0301
        # short-cut if we are not building on this rank
        if torch.distributed.is_initialized() and not self.is_built_on_rank():
            for i in range(len(Split)):
                if split[i] is not None and synchronize_ranks:
                    torch.distributed.barrier()
            return [None] * len(Split)

        # Build the low level dataset
        low_level_dataset = self.cls.build_low_level_dataset(dataset_path, self.config)

        # Build the split indices for the low level dataset
        num_elements = self.cls.numel_low_level_dataset(low_level_dataset)

        # Build the mid level dataset
        mid_level_datasets = []
        for i, _split in enumerate(Split):
            if split[i] is None:
                mid_level_datasets.append(None)
            else:
                indexed_indices = None
                if not (
                    isinstance(self.config, GPTDatasetConfig) and self.config.fast_cache_load
                ):  # NOTE(asolergi-nv): Skip indexed_indices building if we are using --dataloader-fast-cache-load # pylint: disable=C0301
                    beg = int(round(split[i][0] * float(num_elements)))
                    end = int(round(split[i][1] * float(num_elements)))
                    indexed_indices = numpy.arange(start=beg, stop=end, step=1, dtype=numpy.int32)

                mid_level_datasets.append(
                    self.build_generic_dataset(
                        self.cls,
                        self.is_built_on_rank,
                        synchronize_ranks,
                        low_level_dataset,
                        dataset_path,
                        indexed_indices,
                        sizes[i],
                        _split,
                        self.config,
                    )
                )

        return mid_level_datasets

    @staticmethod
    def build_generic_dataset(
        cls: Union[Type[DistributedDataset], Callable],
        is_built_on_rank: Callable,
        synchronize_ranks: bool,
        *args: Any,
    ) -> Optional[Union[DistributedDataset, Iterable]]:
        """Build the DistributedDataset

        Return None if and only if the underlying dataset class is not built on the current rank
        and torch.distributed is initialized.

        Args:
            cls (Union[Type[DistributedDataset], Callable]): The DistributedDataset class to be
                built. In special cases, e.g. when we are building the low level dataset for a
                RawMegatronDataset instance, we can accept a Callable which returns an Iterable.

            is_built_on_rank (Callable): A callable which returns True if the dataset should be
                built on the current rank and False otherwise.

            synchronize_ranks (bool): Whether to call barrier for rank-0 / barrier / other-ranks
                behavior. Set to False when we enforce this behavior at higher level.

            args (Tuple[Any]): The positional arguments used to build the provided
                DistributedDataset class

        Raises:
            Exception: When the dataset constructor raises an OSError

        Returns:
            Optional[Union[DistributedDataset, Iterable]]: The DistributedDataset instantion, the
                Iterable instantiation, or None
        """
        if torch.distributed.is_initialized():
            rank = torch.distributed.get_rank()

            dataset = None

            # First, build on rank 0
            if rank == 0 and is_built_on_rank():
                try:
                    dataset = cls(*args)
                except OSError as err:
                    log = (
                        f"Failed to write dataset materials to the data cache directory. Please "
                        f"supply a directory to which you have write access via the path_to_cache "
                        f"attribute in BlendedMegatronDatasetConfig and retry. Refer to the "
                        f"preserved traceback above for more information."
                    )
                    raise Exception(log) from err

            if synchronize_ranks:
                torch.distributed.barrier()

            # After, build on other ranks
            if rank != 0 and is_built_on_rank():
                dataset = cls(*args)

            return dataset

        return cls(*args)


def _get_size_per_split_per_dataset(
    normalized_weights: List[float], target_size_per_split: List[int], surplus: float = 0.0
) -> List[List[int]]:
    """Determine the contribution of the MegatronDataset splits to the BlendedDataset splits

    Args:
        normalized_weights (List[float]): e.g. [0.3, 0.7]

        target_size_per_split (List[int]): The number of samples to target for each BlendedDataset
            split

        surplus (float): The sample surplus to build per split per dataset

    Returns:
        List[List[int]]: The number of samples to request per MegatronDataset per split
    """

    assert numpy.isclose(sum(normalized_weights), 1.0)

    # Use margin as buffer to ensure we satiate the request
    sizes_per_dataset = [
        [
            int(math.ceil(math.ceil(target_size * weight) * (1 + surplus)))
            for target_size in target_size_per_split
        ]
        for weight in normalized_weights
    ]

    return sizes_per_dataset


================================================
FILE: megatron/core/datasets/blended_megatron_dataset_config.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

import functools
import logging
import re
from dataclasses import dataclass, field
from typing import List, Optional, Tuple

from megatron.core.datasets.utils import Split, log_single_rank, normalize
from megatron.core.tokenizers import MegatronTokenizerBase

logger = logging.getLogger(__name__)


@dataclass
class BlendedMegatronDatasetConfig:
    """Configuration object for Megatron Core datasets"""

    random_seed: int
    """The seed for all RNG during dataset creation."""

    sequence_length: int
    """The sequence length."""

    blend: Optional[Tuple[List[str], Optional[List[float]]]] = None
    """The blend, consisting of a list of dataset prefixes and optionally a list of dataset
       weights. For example, [["dataset-path1", "dataset-path2"], [0.3, 0.7]]. When the weights are
       None, they are inferred from the lengths of the contributing datasets. Not to be used with
       'blend_per_split'. Defaults to None.
    """

    blend_per_split: Optional[List[Optional[Tuple[List[str], Optional[List[float]]]]]] = None
    """A set of blends, as defined above, one for each split distribution. Not to be used with
       'blend'. Defaults to None.
    """

    multiple_validation_sets: Optional[bool] = None
    """Whether the validation split should be treated as multiple separated datasets."""

    full_validation: Optional[bool] = None
    """Whether to run a full epoch of validation each time validation occurs."""

    split: Optional[str] = None
    """The split string, a comma separated weighting for the dataset splits when drawing samples
       from a single distribution. Not to be used with 'blend_per_split'.  Defaults to None.
    """

    split_matrix: Optional[List[Tuple[float, float]]] = field(init=False, default=None)
    """The split matrix consisting of non-overlapping book-ends of each split in order. For more
       information, refer to 'convert_split_vector_to_split_matrix'. Created automatically from
       'split'. Not to be passed in to the constructor.
    """

    num_dataset_builder_threads: int = 1
    """The number of threads to use for dataset building."""

    path_to_cache: Optional[str] = None
    """Where all re-useable dataset indices are to be cached."""

    mmap_bin_files: bool = True
    """Whether to mmap the .bin files or use file pointers."""

    mock: bool = field(init=False, default=False)
    """Whether to bypass real data loading and validation in favor of mock data generation.
       Created automatically from 'blend' and 'blend_per_split'. Not to be passed in to the
       constructor.
    """

    tokenizer: Optional[MegatronTokenizerBase] = None
    """The MegatronTokenizerBase instance. Required for datasets that do online tokenization."""

    mid_level_dataset_surplus: float = 0.005
    """The sample surplus to build for the mid-level datasets(s). Defaults arbitrarily to 0.005.
       This value is irrelevant for single source data blends. This value may need to be increased
       if the top level dataset oversamples the mid level dataset(s). This value may be set to 0.0
       in future if the top level dataset is constrained to not oversample the mid level
       datasets(s).
    """

    allow_ambiguous_pad_tokens: Optional[bool] = False
    """Whether to prevent pad tokens already present in the dataset from being masked out
       when the pad token incorrectly shares the same id with other special tokens.
       Treating such tokens as pad tokens results in training instability and divergence.
       Such a scenario is best resolved by fixing the tokenizer, but leaving this option as False
       provides a workaround.
       This argument will have no effect if the tokenizer is correct. However, should the user
       desire to train on a dataset that intentionally contains pad tokens - while also using an
       incorrect tokenizer - this option may be set to True. This is typically not recommended.
    """

    fast_cache_load: bool = False
    """Option to use the fast cache loading path. Requires all the dataset caches to be built."""

    defer_npy_index_mmap: bool = False
    """Option to defer the mmap of the dataset indexes until the first access.
       Requires all the dataset caches to be built.
    """

    def __post_init__(self) -> None:
        """Do asserts and set fields post init"""
        if self.fast_cache_load:
            assert (
                self.path_to_cache is not None
            ), "--data-cache-path must be provided when using --dataloader-fast-cache-load."
            assert (
                self.blend is None
            ), f"--dataloader-fast-cache-load and --data-path cannot be used together. \
            Use --per-split-data-args-path or --train-data-path, --valid-data-path and \
            --test-data-path instead."
        if self.defer_npy_index_mmap:
            assert (
                self.path_to_cache is not None
            ), "--data-cache-path must be provided when using --dataloader-defer-npy-index-mmap."
        if self.blend_per_split is not None and any(self.blend_per_split):
            assert self.blend is None, "blend and blend_per_split are incompatible"
            assert self.split is None, "split and blend_per_split are incompatible"
            assert len(self.blend_per_split) == len(
                Split
            ), f"blend_per_split must contain {len(Split)} blends"
            for split in Split:
                if self.blend_per_split[split.value] is None:
                    log_single_rank(
                        logger, logging.INFO, f"blend not provided for {split.name} split"
                    )
                else:
                    assert self.blend_per_split[split.value][1] is None or len(
                        self.blend_per_split[split.value][0]
                    ) == len(
                        self.blend_per_split[split.value][1]
                    ), "blend per split prefixes and weights must be equal in number"
        else:
            if self.blend is not None:
                assert self.blend[1] is None or len(self.blend[0]) == len(
                    self.blend[1]
                ), "blend prefixes and weights must be equal in number"
                assert self.split is not None, "split must be provided when blend is not None"
            else:
                self.mock = True
                log_single_rank(
                    logger,
                    logging.INFO,
                    f"Let mock = True, as both blend and blend_per_split are None",
                )
                self.split = "1,1,1"
                log_single_rank(
                    logger,
                    logging.INFO,
                    f"Let split = {self.split}, an arbitrarily even split, as mock is True",
                )
            split_vector = parse_and_normalize_split(self.split)
            self.split_matrix = convert_split_vector_to_split_matrix(split_vector)
            log_single_rank(logger, logging.INFO, f"Let split_matrix = {self.split_matrix}")


def parse_and_normalize_split(split: str) -> List[float]:
    """Parse the dataset split ratios from a string

    Args:
        split (str): The train valid test split string e.g. "99,1,0"

    Returns:
        List[float]: The trian valid test split ratios e.g. [0.99, 0.01, 0.0]
    """
    split = list(map(float, re.findall(r"[.0-9]+", split)))
    split = split + [0.0 for _ in range(len(Split) - len(split))]

    assert len(split) == len(Split)
    assert all(map(lambda _: _ >= 0.0, split))

    split = normalize(split)

    return split


def convert_split_vector_to_split_matrix(
    vector_a: List[float], vector_b: Optional[List[float]] = None
) -> List[Optional[Tuple[float, float]]]:
    """Build the split matrix from one or optionally two contributing split vectors.

    Ex. a standard conversion:

    [0.99, 0.01, 0.0] -> [(0, 0.99), (0.99, 1.0), None]

    [0.99, 0.01, 0.0], [0.98, 0.02, 0.0] -> [(0, 0.98), (0.99, 1.0), None]

    Args:
        vector_a (List[float]): The primary split vector

        vector_b (Optional[List[float]]): An optional secondary split vector which constrains the
            primary split vector. Defaults to None.

    Returns:
        List[Tuple[float, float]]: The split matrix consisting of book-ends of each split in order
    """
    if vector_b is None:
        vector_b = vector_a

    # [.900, .090, .010] -> [0.00, .900, .990, 100]
    expansion_a = functools.reduce(lambda a, b: a + [a[len(a) - 1] + b], [[0], *vector_a])
    expansion_b = functools.reduce(lambda a, b: a + [a[len(a) - 1] + b], [[0], *vector_b])

    # [0.00, .900, .990, 100.0] -> [(0.00, .900), (.900, .990), (.990, 100)]
    bookends_a = list(zip(expansion_a[:-1], expansion_a[1:]))
    bookends_b = list(zip(expansion_b[:-1], expansion_b[1:]))

    # gather per-split overlap or None
    matrix = []
    for bookend_a, bookend_b in zip(bookends_a, bookends_b):
        if min(bookend_a[1], bookend_b[1]) <= max(bookend_a[0], bookend_b[0]):
            overlap = None
        else:
            overlap = (max(bookend_a[0], bookend_b[0]), min(bookend_a[1], bookend_b[1]))
        matrix.append(overlap)

    return matrix


================================================
FILE: megatron/core/datasets/data_schedule.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION.  All rights reserved.

from typing import Any, List, Optional

import torch

from megatron.core import parallel_state
from megatron.core.pipeline_parallel.hybrid_cp_schedule import BalancedCPScheduler
from megatron.core.process_groups_config import ProcessGroupCollection


class HybridCPDataLoaderWrapper:
    """
    A wrapper class that wraps around an existing data_iterator.
    For every __next__ call,
    1. Each DP rank pulls a batch of packed samples.
    2. Extracts the sequence lengths of each sub-sample and all-gathers across the DP group.
    3. Schedules the sub-samples to the DPxCP ranks using the BalancedCPScheduler.
    4. Based on the schedule, reroutes the sub-samples to the correct rank using all-to-all.
    5. Returns the assigned sub-samples to this rank.

    Args:
        data_iterator: The original data_iterator to wrap around
        config: The config object containing the max_seqlen_per_dp_cp_rank
        dp_cp_group: Data parallel context parallel group.
    """

    def __init__(
        self, data_iterator, config, pg_collection: Optional[ProcessGroupCollection] = None
    ):
        self.data_iterator = data_iterator
        self.config = config
        if pg_collection is None:
            self.dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
            self.dp_group = parallel_state.get_data_parallel_group()
            self.tp_group = parallel_state.get_tensor_model_parallel_group()
        else:
            self.dp_cp_group = pg_collection.dp_cp
            self.dp_group = pg_collection.dp
            self.tp_group = pg_collection.tp
        assert (
            self.dp_cp_group is not None and self.dp_group is not None and self.tp_group is not None
        ), "dp_cp_group, dp_group, tp_group must not be None when using hybrid context parallel"

        self.cp_balancing_scheduler = BalancedCPScheduler(
            max_seq_len_per_rank=self.config.max_seqlen_per_dp_cp_rank, dp_cp_group=self.dp_cp_group
        )

        self.total_hdp_gpus = self.dp_cp_group.size()

    def __iter__(self):
        """Return self as an iterator."""
        return self

    def get_global_seqlens(self, subsample_seqlens: torch.Tensor) -> List[int]:
        """
        Gathers the sequence lengths of all subsamples from all DP ranks.
        Each DP rank loads the same number of microbatches but each microbatch
        may have a different number of subsamples.

        We find the number of subsamples each rank holds and then gather the
        sequence lengths of all subsamples from all ranks.
        """
        # Collect the number of subsamples from all ranks
        local_len = torch.tensor([subsample_seqlens.shape[0]], dtype=torch.int32).cuda()
        dp_subsample_count = [torch.zeros_like(local_len) for _ in range(self.dp_group.size())]
        torch.distributed.all_gather(dp_subsample_count, local_len, group=self.dp_group)

        # Find the max number of subsamples across all ranks and pad subsample_seqlens to max length
        dp_subsample_counts = torch.stack(dp_subsample_count, dim=0).cpu().view(-1)
        max_sub_samples = int(dp_subsample_counts.max().item())

        if local_len.item() < max_sub_samples:
            subsample_seqlens_padded = torch.cat(
                [
                    subsample_seqlens,
                    torch.zeros(max_sub_samples - local_len.item(), dtype=torch.int32).cuda(),
                ],
                dim=0,
            )
        else:
            subsample_seqlens_padded = subsample_seqlens

        # Gather the subsample_seqlens from all ranks
        seqlens_gathered = [
            torch.empty_like(subsample_seqlens_padded) for _ in range(self.dp_group.size())
        ]
        torch.distributed.all_gather(
            seqlens_gathered, subsample_seqlens_padded, group=self.dp_group
        )

        # Trim each seqlens_gathered to the length of the correct sample
        for dp_rank, seqlen in enumerate(seqlens_gathered):
            seqlens_gathered[dp_rank] = seqlen[: dp_subsample_counts[dp_rank]]

        seqlens_gathered = torch.cat(seqlens_gathered, dim=0)
        seqlens_gathered = seqlens_gathered.cpu().tolist()

        # Calculate the offsets to assign unique global ID to each subsample.
        csum = torch.cumsum(dp_subsample_counts, dim=0, dtype=torch.int32)
        offsets = torch.cat([torch.zeros(1, dtype=torch.int32), csum[:-1]], dim=0)

        return seqlens_gathered, offsets

    def get_global_id_seqlens(self, num_local_subsamples, offsets, seqlens_gathered):
        """
        Calculates the global ID for each subsample.

        We assign a unique global ID to each subsample.

        Returns:
        global_id_seqlens: list of (global_id, seqlen) tuples for scheduling.
        global_ids_this_rank: list of global IDs locally present on this rank.
        """
        dp_rank = self.dp_group.rank()
        global_ids = torch.arange(len(seqlens_gathered), dtype=torch.int32).cuda()
        # Create a list of (global_id, seqlen) tuples for scheduling
        global_id_seqlens = [(i, seqlens_gathered[i]) for i in range(len(global_ids))]
        # Get the global IDs locally present on this rank
        global_ids_this_rank = global_ids[
            offsets[dp_rank] : offsets[dp_rank] + num_local_subsamples
        ]

        return global_id_seqlens, global_ids_this_rank

    def _gid_to_src_rank(self, gid: int, offsets: List[int]) -> int:
        dp_src_rank = torch.bucketize(gid, offsets[1:] - 1)
        # Since the torch.distributed.get_process_group_ranks
        # provides the global rank, we need to consider TP
        hdp_rank = (
            torch.distributed.get_process_group_ranks(self.dp_group)[dp_src_rank]
            // self.tp_group.size()
        )
        return hdp_rank

    def reroute_samples_to_hdp_ranks(
        self, batch, global_ids_this_rank, global_id_seqlens, sample_id_groups, offsets
    ):
        """
        Reroutes the sub-samples to the correct rank after scheduling.

        For each key in the batch dict, we perform an all-to-all communication
        to transfer the data to the correct ranks.
        Since all CP ranks within a DP group have the same data, we only need
        to transfer data between matching CP ranks.
        """
        gid2local_id = {int(gid): i for i, gid in enumerate(global_ids_this_rank)}
        hdp_rank = self.dp_cp_group.rank()
        dp_ranks = torch.distributed.get_process_group_ranks(self.dp_group)
        # Here we actually want to get the DP group's rank within the HDP group,
        # we need to consider TP
        dp_ranks = [r // self.tp_group.size() for r in dp_ranks]

        data_keys = batch[0].keys()

        # Create the send plan
        combined_sample_id_groups: List[List[int]] = [[] for _ in range(self.total_hdp_gpus)]

        for d in range(self.total_hdp_gpus):
            for sample_id_group in sample_id_groups:
                combined_sample_id_groups[d].extend(sample_id_group[d])

        for dest_rank in range(self.total_hdp_gpus):
            combined_sample_id_groups[dest_rank].sort()

        # Filter out samples that are not present on this rank
        send_ids_sorted = [
            gid
            for d in dp_ranks
            for gid in combined_sample_id_groups[d]
            if gid in global_ids_this_rank
        ]
        # send_counts = [len(combined_sample_id_groups[d]) for d in range(self.total_hdp_gpus)]

        send_lens_split = [0] * self.total_hdp_gpus
        for dest_rank in range(self.total_hdp_gpus):
            if dest_rank in dp_ranks:
                send_lens_split[dest_rank] = sum(
                    [
                        global_id_seqlens[gid][1]
                        for gid in combined_sample_id_groups[dest_rank]
                        if gid in global_ids_this_rank
                    ]
                )
            else:
                # We only need to share local data with DP ranks that have different data.
                send_lens_split[dest_rank] = 0

        # Create the recv plan
        recv_sample_id_groups = [[] for _ in range(self.total_hdp_gpus)]
        for gid in combined_sample_id_groups[hdp_rank]:
            src_rank = self._gid_to_src_rank(gid, offsets)
            recv_sample_id_groups[src_rank].append(gid)

        recv_lens_split = [0] * self.total_hdp_gpus
        for src_rank in range(self.total_hdp_gpus):
            recv_lens_split[src_rank] = sum(
                [global_id_seqlens[gid][1] for gid in recv_sample_id_groups[src_rank]]
            )

        recv_ids_sorted = [
            gid for d in range(self.total_hdp_gpus) for gid in recv_sample_id_groups[d]
        ]
        recv_counts = [len(recv_sample_id_groups[d]) for d in range(self.total_hdp_gpus)]

        recv_samples = [{k: None for k in data_keys} for _ in range(sum(recv_counts))]

        def _pack_sample_by_key(key: str) -> torch.Tensor:
            flattened_tensors = []
            for gid in send_ids_sorted:
                t = batch[gid2local_id[gid]][key].to(torch.cuda.current_device(), non_blocking=True)
                flattened_tensors.append(t)
            return (
                torch.cat(flattened_tensors, dim=0)
                if flattened_tensors
                else torch.empty(0, device=torch.cuda.current_device(), dtype=batch[0][key].dtype)
            )

        def _unpack_sample_by_key(key: str, recv_tensor: torch.Tensor):
            cursor = 0
            for i, gid in enumerate(recv_ids_sorted):
                sample_len = global_id_seqlens[gid][1]
                recv_samples[i][key] = recv_tensor[cursor : cursor + sample_len]
                cursor += sample_len

        for key in data_keys:
            send_tensor = _pack_sample_by_key(key)
            recv_tensor = torch.empty(
                sum(recv_lens_split), device=torch.cuda.current_device(), dtype=send_tensor.dtype
            )
            torch.distributed.all_to_all_single(
                output=recv_tensor,
                input=send_tensor,
                output_split_sizes=recv_lens_split,
                input_split_sizes=send_lens_split,
                group=self.dp_cp_group,
            )
            _unpack_sample_by_key(key, recv_tensor)

        recv_sample_with_id = {
            recv_id: recv_samples[i] for i, recv_id in enumerate(recv_ids_sorted)
        }
        return recv_sample_with_id

    def unpack_batch(self, batch):
        """
        Unpacks the packed samples into a list of sub-samples.
        Since each sub-sample may be routed to different DPxCP ranks,
        we unpack the sample here to avoid unnecessarily transferring
        the entire packed sample.
        """
        batch_unpacked = []
        for sample in batch:
            for sub_sample in range(sample["cu_seqlens"].shape[0] - 1):
                sub_sample_dict = {}
                start_idx = sample["cu_seqlens"][sub_sample]
                end_idx = sample["cu_seqlens"][sub_sample + 1]
                if end_idx - start_idx == 0:
                    continue
                for key in sample.keys():
                    if key in ["cu_seqlens", "batch_idx", "max_seqlen"]:
                        continue
                    sub_sample_dict[key] = sample[key][start_idx:end_idx]
                batch_unpacked.append(sub_sample_dict)
        return batch_unpacked

    def __next__(self) -> Any:
        """
        Get the next item from the dataset, pull scheduling metadata and return it.
        """
        if self.data_iterator is None:
            # TP0 reads from data_iterator, others receive via broadcast.
            return None, None
        else:
            batch = next(self.data_iterator)
        subsample_seqlens = []
        for sample in batch:
            subsample_seqlens.extend(
                [
                    int(sample["cu_seqlens"][i + 1] - sample["cu_seqlens"][i])
                    for i in range(0, sample["cu_seqlens"].shape[0] - 1)
                ]
            )
        subsample_seqlens = torch.tensor(subsample_seqlens, dtype=torch.int32).cuda()
        subsample_seqlens = subsample_seqlens[subsample_seqlens != 0]

        seqlens_gathered, offsets = self.get_global_seqlens(subsample_seqlens)

        global_id_seqlens, global_ids_this_rank = self.get_global_id_seqlens(
            subsample_seqlens.shape[0], offsets, seqlens_gathered
        )

        groups, sample_id_groups = self.cp_balancing_scheduler.get_groups_and_subsamples(
            global_id_seqlens, self.config
        )

        batch = self.unpack_batch(batch)
        samples_this_rank_with_id = self.reroute_samples_to_hdp_ranks(
            batch, global_ids_this_rank, global_id_seqlens, sample_id_groups, offsets
        )
        return samples_this_rank_with_id, sample_id_groups


================================================
FILE: megatron/core/datasets/gpt_dataset.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

import logging
import os
import time
from dataclasses import dataclass, field
from math import ceil
from typing import Dict, Optional, Tuple

import numpy
import torch

from megatron.core.datasets.blended_megatron_dataset_config import BlendedMegatronDatasetConfig
from megatron.core.datasets.indexed_dataset import IndexedDataset
from megatron.core.datasets.megatron_dataset import MegatronDataset
from megatron.core.datasets.object_storage_utils import ObjectStorageConfig, is_object_storage_path
from megatron.core.datasets.utils import Split
from megatron.core.tokenizers import MegatronTokenizerBase
from megatron.core.utils import log_single_rank

logger = logging.getLogger(__name__)


@dataclass
class GPTDatasetConfig(BlendedMegatronDatasetConfig):
    """Configuration object for Megatron Core GPT datasets"""

    reset_position_ids: Optional[bool] = None
    """Option to reset the position IDs in the dataset at an interval"""

    reset_attention_mask: Optional[bool] = None
    """Option to reset the attention mask from the dataset"""

    eod_mask_loss: Optional[bool] = None
    """Option to enable the EOD mask loss"""

    create_attention_mask: bool = True
    """Option to enable the attention masks generation. Can be disabled if attention kernel
       generates masks by itself.
    """

    drop_last_partial_validation_sequence: bool = True
    """Option to drop the last partial validation sequence"""

    add_extra_token_to_sequence: bool = True
    """Option to draw sequences with one extra token to ensure the sample input tokens and sample
       output tokens are both of the desired sequence length
    """

    object_storage_cache_path: Optional[str] = None
    """Path for caching indices for s3 or msc dataloading."""

    data_parallel_size: int = 1
    """Option to enable data parallelism"""

    sequence_parallel_size: int = 0
    """Option to indicate the sequence parallelism size when using TP
    Set to 0 if sequence parallel is not enabled regardless of TP size.
    """

    hybrid_context_parallel: bool = False
    """Option to enable hybrid context parallelism. When setting this to True, 
    each sample should be divisible by the data parallel size * context parallel size * 2.
    If sequence parallel is enabled, it should be divisible by the 
    data parallel size * context parallel size * sequence parallel size * 2.
    """

    sequences_per_dataset: Optional[Dict[str, int]] = None
    """If provided, the sequence and document counts for each dataset. 
       Check --per-dataset-sequences-path
    """

    token_dtype_code: Optional[int] = field(init=False, default=None)
    """The dtype code for the token ids. 4 for int32, 8 for uint16."""

    context_parallel_size: Optional[int] = None
    """The size of the context parallel group. Needed for padding in packed sequences."""

    def __post_init__(self) -> None:
        """Do asserts and set fields post init"""
        super().__post_init__()

        assert self.tokenizer is not None

        assert self.reset_position_ids is not None
        assert self.reset_attention_mask is not None
        assert self.eod_mask_loss is not None

        self.token_dtype_code = (
            None
            if self.tokenizer.vocab_size is None
            else (4 if self.tokenizer.vocab_size > numpy.iinfo(numpy.uint16).max + 1 else 8)
        )
        if self.sequences_per_dataset is not None:
            assert (
                self.token_dtype_code is not None
            ), "Tokenizer vocab size is not set, deactivate --per-dataset-sequences-path or \
            fix the tokenizer."


class GPTDataset(MegatronDataset):
    """The base GPT dataset

    Args:
        indexed_dataset (IndexedDataset): The IndexedDataset around which to build the GPTDataset

        dataset_path (Optional[str]): The real path on disk to the dataset, for bookkeeping

        indexed_indices (numpy.ndarray): The set of the documents indices to expose

        num_samples (Optional[int]): The number of samples to draw from the indexed dataset. When
            None, build as many samples as correspond to one epoch.

        index_split (Split): The indexed_indices Split

        config (GPTDatasetConfig): The config
    """

    def __init__(
        self,
        indexed_dataset: IndexedDataset,
        dataset_path: Optional[str],
        indexed_indices: numpy.ndarray,
        num_samples: Optional[int],
        index_split: Split,
        config: GPTDatasetConfig,
    ) -> None:
        super().__init__(
            indexed_dataset, dataset_path, indexed_indices, num_samples, index_split, config
        )
        self.masks_and_position_ids_are_cacheable = not any(
            [
                self.config.reset_position_ids,
                self.config.reset_attention_mask,
                self.config.eod_mask_loss,
            ]
        )
        self.masks_and_position_ids_are_cached = False
        self.cached_attention_mask = None
        self.cached_loss_mask = None
        self.cached_position_ids = None

        (self.document_index, self.sample_index, self.shuffle_index) = (
            self._build_document_sample_shuffle_indices()
        )

    @staticmethod
    def numel_low_level_dataset(low_level_dataset: IndexedDataset) -> int:
        """Abstract method implementation

        For GPT, the underlying IndexedDataset should be split by sequence, as opposed to, say,
        BERT, which should be split by document

        Args:
            low_level_dataset (IndexedDataset): The underlying IndexedDataset

        Returns:
            int: The number of unique elements in the underlying IndexedDataset
        """
        return low_level_dataset.sequence_lengths.shape[0]

    @staticmethod
    def build_low_level_dataset(dataset_path: str, config: GPTDatasetConfig) -> IndexedDataset:
        """Abstract method implementation

        Args:
            dataset_path (str): The real path prefix to the IndexedDataset .bin and .idx files

            config (GPTDatasetConfig): The config

        Returns:
            IndexedDataset: The underlying IndexedDataset
        """
        if is_object_storage_path(dataset_path):
            assert config.object_storage_cache_path is not None
            return IndexedDataset(
                dataset_path,
                multimodal=False,
                mmap=config.mmap_bin_files,
                object_storage_config=ObjectStorageConfig(
                    path_to_idx_cache=config.object_storage_cache_path
                ),
            )
        sequences_per_dataset = None
        if config.sequences_per_dataset:
            sequences_per_dataset = config.sequences_per_dataset[dataset_path]
        return IndexedDataset(
            dataset_path,
            multimodal=False,
            mmap=config.mmap_bin_files,
            fast_cache_load=config.fast_cache_load,
            sequences_per_dataset=sequences_per_dataset,
            dtype_code=config.token_dtype_code,
        )

    def __len__(self) -> int:
        """Abstract method implementation

        Returns:
            int: The length of the dataset
        """
        if self.config.defer_npy_index_mmap:
            # NOTE(asolergi-nv): We need the number of samples of every GPTDataset to build/hit the BlendedDataset cache # pylint: disable=C0301
            # NOTE(asolergi-nv): Uses logic from megatron/core/datasets/helpers.cpp::build_sample_idx to compute the number of samples # pylint: disable=C0301
            num_tokens_per_epoch = self._get_num_tokens_per_epoch()
            num_epochs = self._get_num_epochs(num_tokens_per_epoch)

            drop_last_partial_sequence = True
            if self.index_split == Split.valid:
                drop_last_partial_sequence = self.config.drop_last_partial_validation_sequence

            if drop_last_partial_sequence:
                return (
                    num_epochs * num_tokens_per_epoch - self.config.add_extra_token_to_sequence
                ) // self.config.sequence_length
            else:
                return ceil(
                    float(
                        num_epochs * num_tokens_per_epoch - self.config.add_extra_token_to_sequence
                    )
                    / self.config.sequence_length
                )
        return self.sample_index.shape[0] - 1

    def __getitem__(self, idx: Optional[int]) -> Dict[str, torch.Tensor]:
        """Abstract method implementation

        Args:
            idx (Optional[int]): The index into the dataset

        Returns:
            Dict[str, torch.Tensor]: The sample information wrapped in a dictionary
        """
        if idx is None:
            # Batch padding sequence so the index does not matter
            text, _ = self._query_document_sample_shuffle_indices(0)
        else:
            text, _ = self._query_document_sample_shuffle_indices(idx)

        text = torch.from_numpy(text).long()
        if self.config.add_extra_token_to_sequence:
            tokens = text[:-1].contiguous()
            labels = text[1:].contiguous()
        else:
            tokens = text
            labels = torch.roll(text, shifts=-1, dims=0)
            labels[-1] = self._pad_token_id

        if (
            not self.masks_and_position_ids_are_cacheable
            or not self.masks_and_position_ids_are_cached
        ):
            attention_mask, loss_mask, position_ids = _get_ltor_masks_and_position_ids(
                tokens,
                self.config.tokenizer.eod,
                self.config.reset_position_ids,
                self.config.reset_attention_mask,
                self.config.eod_mask_loss,
                self.config.create_attention_mask,
            )
            if self.masks_and_position_ids_are_cacheable:
                self.cached_attention_mask = attention_mask
                self.cached_loss_mask = loss_mask
                self.cached_position_ids = position_ids
                self.masks_and_position_ids_are_cached = True
        else:
            attention_mask = self.cached_attention_mask
            loss_mask = self.cached_loss_mask.clone()
            position_ids = self.cached_position_ids

        # For padded sequences, mask the loss
        loss_mask[labels == self._pad_token_id] = 0.0

        # For padded sequences, ensure the embedding layer can map the token ID
        tokens[tokens == self._pad_token_id] = 0
        labels[labels == self._pad_token_id] = 0

        # Batch padding sequence so we mask the loss
        if idx is None:
            loss_mask = torch.zeros_like(loss_mask)

        if self.config.create_attention_mask:
            return {
                "tokens": tokens,
                "labels": labels,
                "attention_mask": attention_mask,
                "loss_mask": loss_mask,
                "position_ids": position_ids,
            }
        else:
            return {
                "tokens": tokens,
                "labels": labels,
                "loss_mask": loss_mask,
                "position_ids": position_ids,
            }

    def _query_document_sample_shuffle_indices(
        self, idx: int
    ) -> Tuple[numpy.ndarray, numpy.ndarray]:
        """Get the text (token ids) and document ids for a given index

        Args:
            idx (int): The index into the dataset

        Returns:
            Tuple[numpy.ndarray, numpy.ndarray]: The text ids and document ids
        """
        if self.shuffle_index is None:
            # NOTE(asolergi-nv): Lazy memmap the indexes
            self.shuffle_index = numpy.load(
                self.path_to_shuffle_index, allow_pickle=True, mmap_mode='r'
            )
            self.sample_index = numpy.load(
                self.path_to_sample_index, allow_pickle=True, mmap_mode='r'
            )
            self.document_index = numpy.load(
                self.path_to_document_index, allow_pickle=True, mmap_mode='r'
            )

        # Do the shuffle mapping
        idx = self.shuffle_index[idx]

        # Get the beginning and end documents and offsets
        doc_index_beg, doc_index_beg_offset = self.sample_index[idx]
        doc_index_end, doc_index_end_offset = self.sample_index[idx + 1]

        document_ids = []
        sample_parts = []

        # Sample spans a single document
        if doc_index_beg == doc_index_end:
            # Add the document id
            document_ids.append(self.document_index[doc_index_beg])

            # Add the entire sample
            sample_parts.append(
                self.dataset.get(
                    self.document_index[doc_index_beg],
                    offset=int(doc_index_beg_offset),
                    length=doc_index_end_offset
                    - doc_index_beg_offset
                    + self.config.add_extra_token_to_sequence,
                )
            )

        # Sample spans multiple documents
        else:
            for i in range(doc_index_beg, doc_index_end + 1):
                # Add the document id
                document_ids.append(self.document_index[i])

                # Add the sample part
                offset = 0 if i > doc_index_beg else doc_index_beg_offset
                length = (
                    None
                    if i < doc_index_end
                    else doc_index_end_offset + self.config.add_extra_token_to_sequence
                )
                sample_parts.append(
                    self.dataset.get(self.document_index[i], offset=int(offset), length=length)
                )
        assert len(document_ids) == len(
            sample_parts
        ), f"len(document_ids) ({len(document_ids)}) != len(sample_parts) ({len(sample_parts)})"

        length = sum(map(len, sample_parts))

        # Pad the sample if necessary
        if length < (self.config.sequence_length + self.config.add_extra_token_to_sequence):
            sample_parts.append(
                [self._pad_token_id]
                * (self.config.sequence_length + self.config.add_extra_token_to_sequence - length)
            )

        return (
            numpy.concatenate(sample_parts, dtype=numpy.int64),
            numpy.array(document_ids, dtype=numpy.int64),
        )

    def _build_document_sample_shuffle_indices(
        self,
    ) -> Tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]:
        """Build the document index, the sample index, and the shuffle index

        The document index:
            -- 1-D
            -- An ordered array of document ids

        The sample index:
            -- 2-D
            -- The document indices and offsets which mark the start of every sample

        The shuffle index:
            -- 1-D
            -- A random permutation of index range of the sample index

        Returns:
            Tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]: The document index, the sample
            index, and the shuffle index
        """
        if self.config.defer_npy_index_mmap:
            # NOTE(asolergi-nv): Direct path to lazy memmap the indexes
            base = f"{self.unique_description_hash}-{type(self).__name__}-{self.index_split.name}"
            get_path_to = lambda affix: os.path.join(self.config.path_to_cache, f"{base}-{affix}")
            self.path_to_document_index = get_path_to("document_index.npy")
            self.path_to_sample_index = get_path_to("sample_index.npy")
            self.path_to_shuffle_index = get_path_to("shuffle_index.npy")
            return None, None, None

        path_to_cache = self.config.path_to_cache
        if path_to_cache is None and not self.config.mock:
            path_to_cache = os.path.join(
                self.dataset.path_prefix, "cache", f"{type(self).__name__}_indices"
            )

        if path_to_cache:
            base = f"{self.unique_description_hash}-{type(self).__name__}-{self.index_split.name}"
            get_path_to = lambda affix: os.path.join(path_to_cache, f"{base}-{affix}")
            path_to_description = get_path_to("description.txt")
            path_to_document_index = get_path_to("document_index.npy")
            path_to_sample_index = get_path_to("sample_index.npy")
            path_to_shuffle_index = get_path_to("shuffle_index.npy")
            cache_hit = (
                True
                if self.config.fast_cache_load
                else all(
                    map(
                        os.path.isfile,
                        [
                            path_to_description,
                            path_to_document_index,
                            path_to_sample_index,
                            path_to_shuffle_index,
                        ],
                    )
                )
            )
        else:
            cache_hit = False

        if not path_to_cache or (
            not cache_hit
            and (not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0)
        ):
            log_single_rank(
                logger,
                logging.INFO,
                f"Build and save the {type(self).__name__} {self.index_split.name} indices",
            )
            t_beg = time.time()

            sequence_length = self.config.sequence_length
            num_tokens_per_epoch = self._get_num_tokens_per_epoch()
            num_epochs = self._get_num_epochs(num_tokens_per_epoch)

            if num_epochs == 1:
                separate_final_epoch = False
            else:
                # Get the number of samples for the last epoch
                num_samples_sans_final_epoch = (
                    (num_epochs - 1) * num_tokens_per_epoch
                    - self.config.add_extra_token_to_sequence
                ) // sequence_length
                num_samples_from_final_epoch = self.num_samples - num_samples_sans_final_epoch
                num_samples_per_epoch = (
                    num_tokens_per_epoch - self.config.add_extra_token_to_sequence
                ) // sequence_length

                # num_samples_from_final_epoch should be non-negative
                assert num_samples_from_final_epoch >= 0

                # num_samples_from_final_epoch should not exceed max value
                assert num_samples_from_final_epoch <= num_samples_per_epoch + 1

                # Separate the final epoch if it falls below the threshold
                threshold = 0.80
                separate_final_epoch = num_samples_from_final_epoch < int(
                    threshold * num_samples_per_epoch
                )

                log_single_rank(
                    logger,
                    logging.DEBUG,
                    f"> num_samples_from_final_epoch: {num_samples_from_final_epoch}",
                )
                log_single_rank(logger, logging.DEBUG, f"> threshold: {threshold}")
                log_single_rank(
                    logger, logging.DEBUG, f"> num_samples_per_epoch: {num_samples_per_epoch}"
                )

            log_single_rank(
                logger, logging.DEBUG, f"> separate_final_epoch: {separate_final_epoch}"
            )

            numpy_random_state = numpy.random.RandomState(self.config.random_seed)

            # Build the document index
            document_index = _build_document_index(
                self.indices, num_epochs, numpy_random_state, separate_final_epoch
            )

            # Build the sample index
            from megatron.core.datasets import helpers

            if self.index_split == Split.valid:
                drop_last_partial_sequence = self.config.drop_last_partial_validation_sequence
            else:
                drop_last_partial_sequence = True

            assert document_index.dtype == numpy.int32
            assert self.dataset.sequence_lengths.dtype == numpy.int32
            if len(document_index) * 2 > len(self.dataset.sequence_lengths):
                # If "access density" of sequence_lengths is high, force load the mmap-ed array
                # into memory by making a copy.
                #
                # System performance benefits come from two aspects:
                #   1. We sequentially pre-load the whole file, most of which we expect to read
                #   2. The GIL is held when entering the c++ program, improving the speed of which
                #      improves parallelism
                sequence_lengths_for_cpp = self.dataset.sequence_lengths.copy()
            else:
                sequence_lengths_for_cpp = self.dataset.sequence_lengths
            sample_index = helpers.build_sample_idx(
                sequence_lengths_for_cpp,
                document_index,
                sequence_length,
                num_epochs,
                num_tokens_per_epoch,
                drop_last_partial_sequence,
                self.config.add_extra_token_to_sequence,
            )

            # Build the shuffle index
            if separate_final_epoch:
                shuffle_index = _build_shuffle_index(
                    num_samples_sans_final_epoch, sample_index.shape[0] - 1, numpy_random_state
                )
            else:
                shuffle_index = _build_shuffle_index(
                    sample_index.shape[0] - 1, sample_index.shape[0] - 1, numpy_random_state
                )

            if path_to_cache:
                os.makedirs(path_to_cache, exist_ok=True)
                # Write the description
                with open(path_to_description, "wt") as writer:
                    writer.write(self.unique_description)
                numpy.save(path_to_document_index, document_index, allow_pickle=True)
                numpy.save(path_to_sample_index, sample_index, allow_pickle=True)
                numpy.save(path_to_shuffle_index, shuffle_index, allow_pickle=True)
            else:
                log_single_rank(
                    logger,
                    logging.WARNING,
                    f"Unable to save {type(self).__name__} indexes because path_to_cache is None",
                )

            t_end = time.time()
            log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")

            log_single_rank(
                logger, logging.INFO, f"> total number of samples: {sample_index.shape[0] - 1}"
            )
            log_single_rank(logger, logging.INFO, f"> total number of epochs: {num_epochs}")

            return document_index, sample_index, shuffle_index

        log_single_rank(
            logger, logging.INFO, f"Load the {type(self).__name__} {self.index_split.name} indices"
        )

        log_single_rank(
            logger,
            logging.INFO,
            f"\tLoad the document index from {os.path.basename(path_to_document_index)}",
        )
        t_beg = time.time()
        document_index = numpy.load(path_to_document_index, allow_pickle=True, mmap_mode="r")
        t_end = time.time()
        log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")

        log_single_rank(
            logger,
            logging.INFO,
            f"\tLoad the sample index from {os.path.basename(path_to_sample_index)}",
        )
        t_beg = time.time()
        sample_index = numpy.load(path_to_sample_index, allow_pickle=True, mmap_mode="r")
        t_end = time.time()
        log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")

        log_single_rank(
            logger,
            logging.INFO,
            f"\tLoad the shuffle index from {os.path.basename(path_to_shuffle_index)}",
        )
        t_beg = time.time()
        shuffle_index = numpy.load(path_to_shuffle_index, allow_pickle=True, mmap_mode="r")
        t_end = time.time()
        log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")

        log_single_rank(
            logger, logging.INFO, f"> total number of samples: {sample_index.shape[0] - 1}"
        )

        return document_index, sample_index, shuffle_index

    def _get_num_tokens_per_epoch(self) -> int:
        """Calculate the number of tokens in a single epoch

        Returns:
            int: The number of tokens in a single epoch
        """
        return int(numpy.sum(self.dataset.sequence_lengths[self.indices]))

    def _get_num_epochs(self, num_tokens_per_epoch: int) -> int:
        """Calculate the number of epochs

        Args:
            num_tokens_per_epoch (int): The number of tokens in a single epoch

        Returns:
            int: The number of epochs
        """
        num_epochs = 1
        num_tokens = num_tokens_per_epoch
        if self.num_samples is None:
            return num_epochs
        else:
            num_tokens_requested = (
                self.num_samples * self.config.sequence_length
            ) + self.config.add_extra_token_to_sequence
            while num_tokens < num_tokens_requested:
                num_epochs += 1
                num_tokens += num_tokens_per_epoch
        return num_epochs


def _build_document_index(
    documents: numpy.ndarray,
    num_epochs: int,
    numpy_random_state: numpy.random.RandomState,
    separate_final_epoch: bool,
) -> numpy.ndarray:
    """Build an array with length = num epochs * num documents

    Args:
        documents (numpy.ndarray): the subset of exposed document indices

        num_epochs (int): The number of epochs

        numpy_random_state (numpy.random.RandomState): The NumPy random state

        separate_final_epoch (bool): Whether to exclude the last epoch from the global shuffle

    Returns:
        numpy.ndarray: The document index
    """

    if not separate_final_epoch or num_epochs == 1:
        document_index = numpy.mgrid[0:num_epochs, 0 : len(documents)][1]
        document_index[:] = documents
        document_index = document_index.reshape(-1)
        document_index = document_index.astype(numpy.int32)
        numpy_random_state.shuffle(document_index)
        return document_index

    doc_idx_first = _build_document_index(documents, num_epochs - 1, numpy_random_state, False)
    doc_idx_last = _build_document_index(documents, 1, numpy_random_state, False)
    return numpy.concatenate((doc_idx_first, doc_idx_last))


def _build_shuffle_index(
    num_samples: int, total_size: int, numpy_random_state: numpy.random.RandomState
) -> numpy.ndarray:
    """Build the range [0, size) and shuffle

    Args:
        num_samples (int): The size of the first shuffle range [0, num_samples)

        total_size (int): The size of the entire index. If larger than 'num_samples', it defines
            the second shuffle range [num_samples, total_size)

        numpy_random_state (numpy.random.RandomState): The NumPy random state

    Returns:
        numpy.ndarray: The shuffle index
    """

    dtype_ = numpy.uint32
    if total_size >= (numpy.iinfo(numpy.uint32).max - 1):
        dtype_ = numpy.int64

    shuffle_idx_first = numpy.arange(start=0, stop=num_samples, step=1, dtype=dtype_)
    numpy_random_state.shuffle(shuffle_idx_first)
    if num_samples == total_size:
        return shuffle_idx_first

    shuffle_idx_last = numpy.arange(start=num_samples, stop=total_size, step=1, dtype=dtype_)
    numpy_random_state.shuffle(shuffle_idx_last)

    return numpy.concatenate((shuffle_idx_first, shuffle_idx_last))


def _get_ltor_masks_and_position_ids(
    data: torch.Tensor,
    eod_token: int,
    reset_position_ids: bool,
    reset_attention_mask: bool,
    eod_mask_loss: bool,
    create_attention_mask: bool,
):
    """Build masks and position id for left to right model.

    Args:
        data (torch.Tensor): The data tenor that holds the tokens from the dataset

        eod_token (int): ID of the token to that is considered the EOD

        reset_position_ids (bool): Switch to reset the document position ID's

        reset_attention_mask (bool): Switch to reset the attention mask

        eod_mask_loss (bool): Switch to enable the EOD mask loss

        create_attention_mask (bool): Switch to enable the attention masks generation. Can be
            disabled if attention kernel generates masks by itself.

    Returns:
        torch.Tensor: Attention mask needed to be used for Attention

        torch.Tensor: The mask used for loss value during training

        torch.Tensor: The position ID's of the token
    """
    seq_length = data.numel()

    if create_attention_mask:
        attention_mask = torch.tril(
            torch.ones((seq_length, seq_length), device=data.device)
        ).unsqueeze(0)
    else:
        attention_mask = None

    # Loss mask.
    loss_mask = torch.ones(seq_length, dtype=torch.float, device=data.device)
    if eod_mask_loss:
        loss_mask[data == eod_token] = 0.0

    # Position ids.
    position_ids = torch.arange(seq_length, dtype=torch.long, device=data.device)
    # We need to clone as the ids will be modifed based on batch index.
    if reset_position_ids:
        position_ids = position_ids.clone()

    if reset_position_ids or reset_attention_mask:
        # Find indices where EOD token is.
        eod_index = position_ids[data == eod_token]
        # Detach indices from positions if going to modify positions.
        if reset_position_ids:
            eod_index = eod_index.clone()

        # Loop through EOD indices:
        prev_index = 0
        for j in range(eod_index.numel()):
            i = eod_index[j]
            # Mask attention loss.
            if reset_attention_mask and attention_mask is not None:
                attention_mask[0, (i + 1) :, : (i + 1)] = 0
            # Reset positions.
            if reset_position_ids:
                position_ids[(i + 1) :] -= i + 1 - prev_index
                prev_index = i + 1

    if attention_mask is not None:
        # Convert attention mask to binary:
        attention_mask = attention_mask < 0.5

    return attention_mask, loss_mask, position_ids


class MockGPTLowLevelDataset:
    """The mock GPT low level dataset

    This class is meant to generate tokenized data in the classic "Megatron-LM" GPT style. Notably,
    we add the end of document token to each element indexed in __getitem__

    Args:
        tokenizer (MegatronTokenizerBase): The tokenizer the special token information of which
        we use to augment the mock data.
    """

    seed: int = 0
    """The hard-coded random seed to use to set the NumPy RNG"""

    size: int = 100000
    """The hard-coded number of samples to generate"""

    max_sequence_length: int = 4096
    """The hard-coded max sequence length of the random generated sequences"""

    def __init__(self, tokenizer: MegatronTokenizerBase) -> None:
        self.vocab_size = tokenizer.vocab_size
        self.eod_token = tokenizer.eod
        rng = numpy.random.default_rng(seed=self.seed)
        self.sequence_lengths = rng.integers(
            low=1, high=self.max_sequence_length, size=self.size, dtype=numpy.int32
        )

    def __len__(self) -> int:
        return self.size

    def __getitem__(self, idx: int) -> numpy.number:
        length = self.sequence_lengths[idx]
        sample = numpy.int64(
            numpy.concatenate([(numpy.arange(length - 1) + 1) % self.vocab_size, [self.eod_token]])
        )
        return sample

    def get(self, idx: int, offset: int = 0, length: Optional[int] = None) -> numpy.ndarray:
        """This function is an abstraction over __getitem__ with support for slicing

        Args:
            idx (int): The index into the dataset

            offset (int): The integer token offset in the sequence

            length (Optional[int]): The number of tokens to grab from the sequence

        Returns:
            numpy.ndarray: The sequence tokens at the index
        """
        if length is None:
            length = self.sequence_lengths[idx] - offset
        return self[idx][offset : offset + length]


class MockGPTDataset(GPTDataset):
    """The mock GPT dataset

    Args:
        dataset (MockGPTLowLevelDataset): The MockGPTLowLevelDataset around which to build
            the MockGPTDataset

        dataset_path (Optional[str]): This argument is of no consequence for the MockGPTDataset

        indices (numpy.ndarray): The set of the dataset indices to expose

        num_samples (int): The number of samples to draw from the dataset

        index_split (Split): The indices Split

        config (GPTDatasetConfig): The config
    """

    def __init__(
        self,
        dataset: MockGPTLowLevelDataset,
        dataset_path: Optional[str],
        indices: numpy.ndarray,
        num_samples: int,
        index_split: Split,
        config: GPTDatasetConfig,
    ) -> None:
        assert config.mock

        super().__init__(
            dataset,  # type: ignore[arg-type]
            dataset_path,
            indices,
            num_samples,
            index_split,
            config,
        )

    @staticmethod
    def numel_low_level_dataset(low_level_dataset: MockGPTLowLevelDataset) -> int:
        """Abstract method implementation

        Args:
            low_level_dataset (MockGPTLowLevelDataset): The underlying MockGPTLowLevelDataset

        Returns:
            int: The number of unique elements in the underlying MockGPTLowLevelDataset
        """
        return len(low_level_dataset)

    @staticmethod
    def build_low_level_dataset(  # type: ignore[override]
        dataset_path: Optional[str], config: GPTDatasetConfig
    ) -> MockGPTLowLevelDataset:
        """Abstract method implementation

        Args:
            dataset_path (Optional[str]): This argument is of no consequence for the
                MockGPTLowLevelDataset

            config (GPTDatasetConfig): The config

        Returns:
            MockGPTLowLevelDataset: The underlying MockGPTLowLevelDataset
        """
        return MockGPTLowLevelDataset(config.tokenizer)


================================================
FILE: megatron/core/datasets/helpers.cpp
================================================
/* Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved. */

/* Helper methods for fast index mapping builds */

#include <algorithm>
#include <cassert>
#include <iostream>
#include <limits>
#include <math.h>
#include <set>
#include <stdexcept>
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <random>

namespace py = pybind11;
using namespace std;

const int32_t LONG_SENTENCE_LEN = 512;


void build_exhaustive_blending_indices(py::array_t<int16_t> &dataset_index, py::array_t<int64_t> &dataset_sample_index, const py::array_t<int64_t> &sizes, const int32_t num_datasets) {
  /*
      Build blending indices by sampling exactly as many samples from dataset[i]
      as is requested by sizes[i] for all i in the range [0, num_datasets).
  */
  auto dataset_index_ptr = dataset_index.mutable_unchecked<1>();
  auto dataset_sample_index_ptr = dataset_sample_index.mutable_unchecked<1>();
  auto sizes_ptr = sizes.unchecked<1>();

  int64_t total_size = 0;
  int64_t dataset_sample_counts[num_datasets];
  std::set<int32_t> dataset_unspent_indices;
  for (int32_t i = 0; i < num_datasets; ++i) {
    total_size += sizes_ptr[i];
    dataset_sample_counts[i] = 0;
    dataset_unspent_indices.insert(i);
  }

  // still need fractional weights to sample in proportion to sizes
  double weights[num_datasets];
  for (int32_t i = 0; i < num_datasets; ++i) {
    weights[i] = sizes_ptr[i] / static_cast<double>(total_size);
  }

  int64_t index_sample = 0;
  while (dataset_unspent_indices.size() > 0) {
    double index_sample_double = std::max(static_cast<double>(index_sample), 1.0);

    int64_t error_argmax = -1;
    double error_max = std::numeric_limits<double>::lowest();

    for (int32_t index_dataset : dataset_unspent_indices) {
      double error = weights[index_dataset] * index_sample_double - static_cast<double>(dataset_sample_counts[index_dataset]);
      if (error > error_max) {
        error_argmax = index_dataset;
        error_max = error;
      }
    }
    assert(error_argmax >= 0);

    // Populate the indices.
    dataset_index_ptr[index_sample] = static_cast<int16_t>(error_argmax);
    dataset_sample_index_ptr[index_sample] = dataset_sample_counts[error_argmax];

    // Update the total samples.
    dataset_sample_counts[error_argmax] += 1;

    if (sizes_ptr[error_argmax] - static_cast<double>(dataset_sample_counts[error_argmax]) == 0) {
      dataset_unspent_indices.erase(error_argmax);
    }

    index_sample += 1;
  }
}

void build_blending_indices(py::array_t<int16_t> &dataset_index,
                            py::array_t<int64_t> &dataset_sample_index,
                            const py::array_t<double> &weights,
                            const int32_t num_datasets,
                            const int64_t size, const bool verbose)
{
  /* Given multiple datasets and a weighting array, build samples
   such that it follows those wieghts.*/

  if (verbose)
  {
    std::cout << "> building indices for blended datasets ..." << std::endl;
  }

  // Get the pointer access without the checks.
  auto dataset_index_ptr = dataset_index.mutable_unchecked<1>();
  auto dataset_sample_index_ptr = dataset_sample_index.mutable_unchecked<1>();
  auto weights_ptr = weights.unchecked<1>();

  // Initialize buffer for number of samples used for each dataset.
  int64_t current_samples[num_datasets];
  for (int64_t i = 0; i < num_datasets; ++i)
  {
    current_samples[i] = 0;
  }

  // For each sample:
  for (int64_t sample_idx = 0; sample_idx < size; ++sample_idx)
  {

    // Determine where the max error in sampling is happening.
    auto sample_idx_double = std::max(static_cast<double>(sample_idx), 1.0);
    int64_t max_error_index = 0;
    double max_error = weights_ptr[0] * sample_idx_double -
                       static_cast<double>(current_samples[0]);
    for (int64_t dataset_idx = 1; dataset_idx < num_datasets; ++dataset_idx)
    {
      double error = weights_ptr[dataset_idx] * sample_idx_double -
                     static_cast<double>(current_samples[dataset_idx]);
      if (error > max_error)
      {
        max_error = error;
        max_error_index = dataset_idx;
      }
    }

    // Populate the indices.
    dataset_index_ptr[sample_idx] = static_cast<int16_t>(max_error_index);
    dataset_sample_index_ptr[sample_idx] = current_samples[max_error_index];

    // Update the total samples.
    current_samples[max_error_index] += 1;
  }

  // print info
  if (verbose)
  {
    std::cout << " > sample ratios:" << std::endl;
    for (int64_t dataset_idx = 0; dataset_idx < num_datasets; ++dataset_idx)
    {
      auto ratio = static_cast<double>(current_samples[dataset_idx]) /
                   static_cast<double>(size);
      std::cout << "   dataset " << dataset_idx << ", input: " << weights_ptr[dataset_idx] << ", achieved: " << ratio << std::endl;
    }
  }
}

template <typename T>
py::array_t<T> build_sample_idx(
  const py::array_t<int32_t> &sizes_,
  const py::array_t<int32_t> &document_idx_,
  const int32_t seq_length,
  const int32_t num_epochs,
  const int64_t tokens_per_epoch,
  const bool drop_last_partial_sequence = true,
  const int add_extra_token_to_sequence = 1
){
  /* 
      Sample index (sample_idx) is used for gpt2 like dataset for which the documents are flattened
      and the samples are built based on this 1-D flatten array. It is a 2D array with sizes
      [number-of-samples + 1, 2] where [..., 0] contains the index into `doc_idx` and [..., 1] is
      the starting offset in that document.
  */

  // Consistency checks.
  assert(seq_length > 1);
  assert(num_epochs > 0);
  assert(tokens_per_epoch > 1);

  // Remove bound checks.
  auto sizes = sizes_.unchecked<1>();
  auto document_idx = document_idx_.unchecked<1>();
  
  // NOTE(asolergi-nv): This is the logic used to compute the number of samples in the GPTDataset when leveraging defer_npy_index_mmap
  // Build the sample idx as a contiguous 1-D array of type T.
  int64_t num_samples = 0;
  if (drop_last_partial_sequence == true) {
    num_samples = (num_epochs * tokens_per_epoch - add_extra_token_to_sequence) / seq_length;
  }
  else {
    num_samples = ceil(float(num_epochs * tokens_per_epoch - add_extra_token_to_sequence) / seq_length);
  }
  T *sample_idx = new T[2 * (num_samples + 1)];

  // Index into sample_idx.
  int64_t sample_idx_index = 0;
  // Index into document_idx.
  T document_idx_index = 0;
  // Begining offset for each document.
  T doc_offset = 0;
  // Start with first document and no offset.
  sample_idx[2 * sample_idx_index] = document_idx_index;
  sample_idx[2 * sample_idx_index + 1] = doc_offset;
  ++sample_idx_index;

  while (sample_idx_index <= num_samples)
  {
    // Start with a fresh sequence.
    int32_t remaining_seq_length = seq_length + add_extra_token_to_sequence;
    while (remaining_seq_length != 0)
    {
      // Get the document length.
      auto document_index = document_idx[document_idx_index];
      auto document_length = sizes[document_index] - doc_offset;
      // And add it to the current sequence.
      remaining_seq_length -= document_length;
      // If we have more than a full sequence, adjust offset and set
      // remaining length to zero so we return from the while loop.
      // Note that -1 here is for the same reason we have -1 in
      // `_num_epochs` calculations.
      if (remaining_seq_length <= 0)
      {
        doc_offset += (remaining_seq_length + document_length - add_extra_token_to_sequence);
        remaining_seq_length = 0;
      }
      else
      {
        // Otherwise, start from the begining of the next document.
        if (document_idx_index == (document_idx_.shape(0) - 1))
        {
          // If we have reached the end of the documents, break.
          assert(sample_idx_index == num_samples);
          doc_offset = sizes[document_idx[document_idx_index]] - add_extra_token_to_sequence;
          break;
        }
        ++document_idx_index;
        doc_offset = 0;
      }
    }
    // Record the sequence.
    sample_idx[2 * sample_idx_index] = document_idx_index;
    sample_idx[2 * sample_idx_index + 1] = doc_offset;
    ++sample_idx_index;
  }

  // Method to deallocate memory.
  py::capsule free_when_done(
    sample_idx, 
    [](void *mem_){
	    T *mem = reinterpret_cast<T*>(mem_);
	    delete[] mem;
    }
  );

  // Return the numpy array.
  const auto byte_size = sizeof(T);
  return py::array_t<T>(
    std::vector<int64_t>{num_samples + 1, 2}, // shape
    {2 * byte_size, byte_size},               // C-style contiguous strides
    sample_idx,                               // the data pointer
    free_when_done                            // numpy array references
  );
}

inline int32_t get_target_sample_len(const int32_t short_seq_ratio,
                                     const int32_t max_length,
                                     std::mt19937 &rand32_gen)
{
  /* Training sample length. */
  if (short_seq_ratio == 0)
  {
    return max_length;
  }
  const auto random_number = rand32_gen();
  if ((random_number % short_seq_ratio) == 0)
  {
    return 2 + random_number % (max_length - 1);
  }
  return max_length;
}

template <typename DocIdx>
py::array build_mapping_impl(const py::array_t<int64_t> &docs_,
                             const py::array_t<int32_t> &sizes_,
                             const int32_t num_epochs,
                             const uint64_t max_num_samples,
                             const int32_t max_seq_length,
                             const double short_seq_prob,
                             const int32_t seed,
                             const bool verbose,
                             const int32_t min_num_sent)
{
  /* Build a mapping of (start-index, end-index, sequence-length) where
     start and end index are the indices of the sentences in the sample
     and sequence-length is the target sequence length.
  */

  // Consistency checks.
  assert(num_epochs > 0);
  assert(max_seq_length > 1);
  assert(short_seq_prob >= 0.0);
  assert(short_seq_prob <= 1.0);
  assert(seed > 0);

  // Remove bound checks.
  auto docs = docs_.unchecked<1>();
  auto sizes = sizes_.unchecked<1>();

  // For efficiency, convert probability to ratio. Note: rand() generates int.
  int32_t short_seq_ratio = 0;
  if (short_seq_prob > 0)
  {
    short_seq_ratio = static_cast<int32_t>(round(1.0 / short_seq_prob));
  }

  if (verbose)
  {
    const auto sent_start_index = docs[0];
    const auto sent_end_index = docs[docs_.shape(0) - 1];
    const auto num_sentences = sent_end_index - sent_start_index;
    cout << "    using:" << endl
         << std::flush;
    cout << "     number of documents:            " << docs_.shape(0) - 1 << endl
         << std::flush;
    cout << "     sentences range:                [" << sent_start_index << ", " << sent_end_index << ")" << endl
         << std::flush;
    cout << "     total number of sentences:      " << num_sentences << endl
         << std::flush;
    cout << "     number of epochs:               " << num_epochs << endl
         << std::flush;
    cout << "     maximum number of samples:      " << max_num_samples << endl
         << std::flush;
    cout << "     maximum sequence length:        " << max_seq_length << endl
         << std::flush;
    cout << "     short sequence probability:     " << short_seq_prob << endl
         << std::flush;
    cout << "     short sequence ration (1/prob): " << short_seq_ratio << endl
         << std::flush;
    cout << "     seed:                           " << seed << endl
         << std::flush;
  }

  // Mapping and it's length (1D).
  int64_t num_samples = -1;
  DocIdx *maps = NULL;

  // Perform two iterations, in the first iteration get the size
  // and allocate memory and in the second iteration populate the map.
  bool second = false;
  for (int32_t iteration = 0; iteration < 2; ++iteration)
  {

    // Set the seed so both iterations produce the same results.
    std::mt19937 rand32_gen(seed);

    // Set the flag on second iteration.
    second = (iteration == 1);

    // Counters:
    uint64_t empty_docs = 0;
    uint64_t one_sent_docs = 0;
    uint64_t long_sent_docs = 0;

    // Current map index.
    uint64_t map_index = 0;

    // For each epoch:
    for (int32_t epoch = 0; epoch < num_epochs; ++epoch)
    {
      if (map_index >= max_num_samples)
      {
        if (verbose && (!second))
        {
          cout << "    reached " << max_num_samples << " samples after "
               << epoch << " epochs ..." << endl
               << std::flush;
        }
        break;
      }
      // For each document:
      for (int32_t doc = 0; doc < (docs.shape(0) - 1); ++doc)
      {

        // Document sentences are in [sent_index_first, sent_index_last)
        const auto sent_index_first = docs[doc];
        const auto sent_index_last = docs[doc + 1];

        // At the begining of the document previous index is the
        // start index.
        auto prev_start_index = sent_index_first;

        // Remaining documents.
        auto num_remain_sent = sent_index_last - sent_index_first;

        // Some bookkeeping
        if ((epoch == 0) && (!second))
        {
          if (num_remain_sent == 0)
          {
            ++empty_docs;
          }
          if (num_remain_sent == 1)
          {
            ++one_sent_docs;
          }
        }

        // Detect documents with long sentences.
        bool contains_long_sentence = false;
        if (num_remain_sent > 1)
        {
          for (auto sent_index = sent_index_first;
               sent_index < sent_index_last; ++sent_index)
          {
            if (sizes[sent_index] > LONG_SENTENCE_LEN)
            {
              if ((epoch == 0) && (!second))
              {
                ++long_sent_docs;
              }
              contains_long_sentence = true;
              break;
            }
          }
        }

        // If we have more than two sentences.
        if ((num_remain_sent >= min_num_sent) && (!contains_long_sentence))
        {

          // Set values.
          auto seq_len = int32_t{0};
          auto num_sent = int32_t{0};
          auto target_seq_len = get_target_sample_len(short_seq_ratio,
                                                      max_seq_length,
                                                      rand32_gen);

          // Loop through sentences.
          for (auto sent_index = sent_index_first;
               sent_index < sent_index_last; ++sent_index)
          {

            // Add the size and number of sentences.
            seq_len += sizes[sent_index];
            ++num_sent;
            --num_remain_sent;

            // If we have reached the target length.
            // and if not only one sentence is left in the document.
            // and if we have at least two sentneces.
            // and if we have reached end of the document.
            if (((seq_len >= target_seq_len) &&
                 (num_remain_sent > 1) &&
                 (num_sent >= min_num_sent)) ||
                (num_remain_sent == 0))
            {

              // Check for overflow.
              if ((3 * map_index + 2) >
                  std::numeric_limits<int64_t>::max())
              {
                cout << "number of samples exceeded maximum "
                     << "allowed by type int64: "
                     << std::numeric_limits<int64_t>::max()
                     << endl;
                throw std::overflow_error("Number of samples");
              }

              // Populate the map.
              if (second)
              {
                const auto map_index_0 = 3 * map_index;
                maps[map_index_0] = static_cast<DocIdx>(prev_start_index);
                maps[map_index_0 + 1] = static_cast<DocIdx>(sent_index + 1);
                maps[map_index_0 + 2] = static_cast<DocIdx>(target_seq_len);
              }

              // Update indices / counters.
              ++map_index;
              prev_start_index = sent_index + 1;
              target_seq_len = get_target_sample_len(short_seq_ratio,
                                                     max_seq_length,
                                                     rand32_gen);
              seq_len = 0;
              num_sent = 0;
            }

          } // for (auto sent_index=sent_index_first; ...
        }   // if (num_remain_sent > 1) {
      }     // for (int doc=0; doc < num_docs; ++doc) {
    }       // for (int epoch=0; epoch < num_epochs; ++epoch) {

    if (!second)
    {
      if (verbose)
      {
        cout << "   number of empty documents: " << empty_docs << endl
             << std::flush;
        cout << "   number of documents with one sentence: " << one_sent_docs << endl
             << std::flush;
        cout << "   number of documents with long sentences: " << long_sent_docs << endl
             << std::flush;
        cout << "   will create mapping for " << map_index << " samples" << endl
             << std::flush;
      }
      assert(maps == NULL);
      assert(num_samples < 0);
      maps = new DocIdx[3 * map_index];
      num_samples = static_cast<int64_t>(map_index);
    }

  } // for (int iteration=0; iteration < 2; ++iteration) {

  // Shuffle.
  // We need a 64 bit random number generator as we might have more
  // than 2 billion samples.
  std::mt19937_64 rand64_gen(seed + 1);
  for (auto i = (num_samples - 1); i > 0; --i)
  {
    const auto j = static_cast<int64_t>(rand64_gen() % (i + 1));
    const auto i0 = 3 * i;
    const auto j0 = 3 * j;
    // Swap values.
    swap(maps[i0], maps[j0]);
    swap(maps[i0 + 1], maps[j0 + 1]);
    swap(maps[i0 + 2], maps[j0 + 2]);
  }

  // Method to deallocate memory.
  py::capsule free_when_done(maps, [](void *mem_)
                             {
            DocIdx *mem = reinterpret_cast<DocIdx*>(mem_);
	    delete[] mem; });

  // Return the numpy array.
  const auto byte_size = sizeof(DocIdx);
  return py::array(std::vector<int64_t>{num_samples, 3}, // shape
                   {3 * byte_size, byte_size},           // C-style contiguous strides
                   maps,                                 // the data pointer
                   free_when_done);                      // numpy array references
}

py::array build_mapping(const py::array_t<int64_t> &docs_,
                        const py::array_t<int> &sizes_,
                        const int num_epochs,
                        const uint64_t max_num_samples,
                        const int max_seq_length,
                        const double short_seq_prob,
                        const int seed,
                        const bool verbose,
                        const int32_t min_num_sent)
{

  if (sizes_.size() > std::numeric_limits<uint32_t>::max())
  {
    if (verbose)
    {
      cout << "    using uint64 for data mapping..." << endl
           << std::flush;
    }
    return build_mapping_impl<uint64_t>(docs_, sizes_, num_epochs,
                                        max_num_samples, max_seq_length,
                                        short_seq_prob, seed, verbose,
                                        min_num_sent);
  }
  else
  {
    if (verbose)
    {
      cout << "    using uint32 for data mapping..." << endl
           << std::flush;
    }
    return build_mapping_impl<uint32_t>(docs_, sizes_, num_epochs,
                                        max_num_samples, max_seq_length,
                                        short_seq_prob, seed, verbose,
                                        min_num_sent);
  }
}

template <typename DocIdx>
py::array build_blocks_mapping_impl(const py::array_t<int64_t> &docs_,
                                    const py::array_t<int32_t> &sizes_,
                                    const py::array_t<int32_t> &titles_sizes_,
                                    const int32_t num_epochs,
                                    const uint64_t max_num_samples,
                                    const int32_t max_seq_length,
                                    const int32_t seed,
                                    const bool verbose,
                                    const bool use_one_sent_blocks)
{
  /* Build a mapping of (start-index, end-index, sequence-length) where
     start and end index are the indices of the sentences in the sample
     and sequence-length is the target sequence length.
  */

  // Consistency checks.
  assert(num_epochs > 0);
  assert(max_seq_length > 1);
  assert(seed > 0);

  // Remove bound checks.
  auto docs = docs_.unchecked<1>();
  auto sizes = sizes_.unchecked<1>();
  auto titles_sizes = titles_sizes_.unchecked<1>();

  if (verbose)
  {
    const auto sent_start_index = docs[0];
    const auto sent_end_index = docs[docs_.shape(0) - 1];
    const auto num_sentences = sent_end_index - sent_start_index;
    cout << "    using:" << endl
         << std::flush;
    cout << "     number of documents:            " << docs_.shape(0) - 1 << endl
         << std::flush;
    cout << "     sentences range:                [" << sent_start_index << ", " << sent_end_index << ")" << endl
         << std::flush;
    cout << "     total number of sentences:      " << num_sentences << endl
         << std::flush;
    cout << "     number of epochs:               " << num_epochs << endl
         << std::flush;
    cout << "     maximum number of samples:      " << max_num_samples << endl
         << std::flush;
    cout << "     maximum sequence length:        " << max_seq_length << endl
         << std::flush;
    cout << "     seed:                           " << seed << endl
         << std::flush;
  }

  // Mapping and its length (1D).
  int64_t num_samples = -1;
  DocIdx *maps = NULL;

  // Acceptable number of sentences per block.
  int min_num_sent = 2;
  if (use_one_sent_blocks)
  {
    min_num_sent = 1;
  }

  // Perform two iterations, in the first iteration get the size
  // and allocate memory and in the second iteration populate the map.
  bool second = false;
  for (int32_t iteration = 0; iteration < 2; ++iteration)
  {

    // Set the flag on second iteration.
    second = (iteration == 1);

    // Current map index.
    uint64_t map_index = 0;

    uint64_t empty_docs = 0;
    uint64_t one_sent_docs = 0;
    uint64_t long_sent_docs = 0;
    // For each epoch:
    for (int32_t epoch = 0; epoch < num_epochs; ++epoch)
    {
      // assign every block a unique id
      int32_t block_id = 0;

      if (map_index >= max_num_samples)
      {
        if (verbose && (!second))
        {
          cout << "    reached " << max_num_samples << " samples after "
               << epoch << " epochs ..." << endl
               << std::flush;
        }
        break;
      }
      // For each document:
      for (int32_t doc = 0; doc < (docs.shape(0) - 1); ++doc)
      {

        // Document sentences are in [sent_index_first, sent_index_last)
        const auto sent_index_first = docs[doc];
        const auto sent_index_last = docs[doc + 1];
        const auto target_seq_len = max_seq_length - titles_sizes[doc];

        // At the begining of the document previous index is the
        // start index.
        auto prev_start_index = sent_index_first;

        // Remaining documents.
        auto num_remain_sent = sent_index_last - sent_index_first;

        // Some bookkeeping
        if ((epoch == 0) && (!second))
        {
          if (num_remain_sent == 0)
          {
            ++empty_docs;
          }
          if (num_remain_sent == 1)
          {
            ++one_sent_docs;
          }
        }
        // Detect documents with long sentences.
        bool contains_long_sentence = false;
        if (num_remain_sent >= min_num_sent)
        {
          for (auto sent_index = sent_index_first;
               sent_index < sent_index_last; ++sent_index)
          {
            if (sizes[sent_index] > LONG_SENTENCE_LEN)
            {
              if ((epoch == 0) && (!second))
              {
                ++long_sent_docs;
              }
              contains_long_sentence = true;
              break;
            }
          }
        }
        // If we have enough sentences and no long sentences.
        if ((num_remain_sent >= min_num_sent) && (!contains_long_sentence))
        {

          // Set values.
          auto seq_len = int32_t{0};
          auto num_sent = int32_t{0};

          // Loop through sentences.
          for (auto sent_index = sent_index_first;
               sent_index < sent_index_last; ++sent_index)
          {

            // Add the size and number of sentences.
            seq_len += sizes[sent_index];
            ++num_sent;
            --num_remain_sent;

            // If we have reached the target length.
            // and there are an acceptable number of sentences left
            // and if we have at least the minimum number of sentences.
            // or if we have reached end of the document.
            if (((seq_len >= target_seq_len) &&
                 (num_remain_sent >= min_num_sent) &&
                 (num_sent >= min_num_sent)) ||
                (num_remain_sent == 0))
            {

              // Populate the map.
              if (second)
              {
                const auto map_index_0 = 4 * map_index;
                // Each sample has 4 items: the starting sentence index, ending sentence index,
                // the index of the document from which the block comes (used for fetching titles)
                // and the unique id of the block (used for creating block indexes)

                maps[map_index_0] = static_cast<DocIdx>(prev_start_index);
                maps[map_index_0 + 1] = static_cast<DocIdx>(sent_index + 1);
                maps[map_index_0 + 2] = static_cast<DocIdx>(doc);
                maps[map_index_0 + 3] = static_cast<DocIdx>(block_id);
              }

              // Update indices / counters.
              ++map_index;
              ++block_id;
              prev_start_index = sent_index + 1;
              seq_len = 0;
              num_sent = 0;
            }
          } // for (auto sent_index=sent_index_first; ...
        }   // if (num_remain_sent > 1) {
      }     // for (int doc=0; doc < num_docs; ++doc) {
    }       // for (int epoch=0; epoch < num_epochs; ++epoch) {

    if (!second)
    {
      if (verbose)
      {
        cout << "   number of empty documents: " << empty_docs << endl
             << std::flush;
        cout << "   number of documents with one sentence: " << one_sent_docs << endl
             << std::flush;
        cout << "   number of documents with long sentences: " << long_sent_docs << endl
             << std::flush;
        cout << "   will create mapping for " << map_index << " samples" << endl
             << std::flush;
      }
      assert(maps == NULL);
      assert(num_samples < 0);
      maps = new DocIdx[4 * map_index];
      num_samples = static_cast<int64_t>(map_index);
    }

  } // for (int iteration=0; iteration < 2; ++iteration) {

  // Shuffle.
  // We need a 64 bit random number generator as we might have more
  // than 2 billion samples.
  std::mt19937_64 rand64_gen(seed + 1);
  for (auto i = (num_samples - 1); i > 0; --i)
  {
    const auto j = static_cast<int64_t>(rand64_gen() % (i + 1));
    const auto i0 = 4 * i;
    const auto j0 = 4 * j;
    // Swap values.
    swap(maps[i0], maps[j0]);
    swap(maps[i0 + 1], maps[j0 + 1]);
    swap(maps[i0 + 2], maps[j0 + 2]);
    swap(maps[i0 + 3], maps[j0 + 3]);
  }

  // Method to deallocate memory.
  py::capsule free_when_done(maps, [](void *mem_)
                             {
            DocIdx *mem = reinterpret_cast<DocIdx*>(mem_);
	    delete[] mem; });

  // Return the numpy array.
  const auto byte_size = sizeof(DocIdx);
  return py::array(std::vector<int64_t>{num_samples, 4}, // shape
                   {4 * byte_size, byte_size},           // C-style contiguous strides
                   maps,                                 // the data pointer
                   free_when_done);                      // numpy array references
}

py::array build_blocks_mapping(const py::array_t<int64_t> &docs_,
                               const py::array_t<int> &sizes_,
                               const py::array_t<int> &titles_sizes_,
                               const int num_epochs,
                               const uint64_t max_num_samples,
                               const int max_seq_length,
                               const int seed,
                               const bool verbose,
                               const bool use_one_sent_blocks)
{

  if (sizes_.size() > std::numeric_limits<uint32_t>::max())
  {
    if (verbose)
    {
      cout << "    using uint64 for data mapping..." << endl
           << std::flush;
    }
    return build_blocks_mapping_impl<uint64_t>(docs_, sizes_, titles_sizes_,
                                               num_epochs, max_num_samples, max_seq_length, seed, verbose, use_one_sent_blocks);
  }
  else
  {
    if (verbose)
    {
      cout << "    using uint32 for data mapping..." << endl
           << std::flush;
    }
    return build_blocks_mapping_impl<uint32_t>(docs_, sizes_, titles_sizes_,
                                               num_epochs, max_num_samples, max_seq_length, seed, verbose, use_one_sent_blocks);
  }
}

PYBIND11_MODULE(helpers_cpp, m)
{
  m.def("build_mapping", &build_mapping);
  m.def("build_blocks_mapping", &build_blocks_mapping);
  m.def("build_sample_idx_int32", &build_sample_idx<int32_t>);
  m.def("build_sample_idx_int64", &build_sample_idx<int64_t>);
  m.def("build_blending_indices", &build_blending_indices);
  m.def("build_exhaustive_blending_indices", &build_exhaustive_blending_indices);
}


================================================
FILE: megatron/core/datasets/helpers.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


# Implicit imports for backwards compatibility
# Explicit imports for readability
import numpy

from megatron.core.datasets.helpers_cpp import *
from megatron.core.datasets.helpers_cpp import build_sample_idx_int32, build_sample_idx_int64


def build_sample_idx(
    sizes: numpy.ndarray,
    document_indices: numpy.ndarray,
    sequence_length: int,
    num_epochs: int,
    tokens_per_epoch: int,
    drop_last_partial_sequence: bool = True,
    add_extra_token_to_sequence: bool = True,
):
    """Build the 2-D sample index using the properly typed templated C++ function from helpers.cpp

    Args:
        sizes (numpy.ndarray): The 1-D array of document lengths

        document_indices (numpy.ndarray): The 1-D array of document indices

        sequence_length (int): The sequence length

        num_epochs (int): The number of epochs

        tokens_per_epoch (int): The number of tokens per epoch

        drop_last_partial_sequence (bool): Whether to omit the last partial sequence in the sample
            index should it exist. Defaults to True.

        add_extra_token_to_sequence (bool): Whether to build samples with sequence length
            `sequence_length + 1`. Defaults to True.

    Returns:
        numpy.ndarray: The 2-D sample index
    """

    sample_idx_max = max(document_indices.shape[0], sizes.max())
    if sample_idx_max <= numpy.iinfo(numpy.int32).max:
        sample_idx = build_sample_idx_int32(
            sizes,
            document_indices,
            sequence_length,
            num_epochs,
            tokens_per_epoch,
            drop_last_partial_sequence,
            1 if add_extra_token_to_sequence else 0,
        )
        assert sample_idx.min() >= 0 and sample_idx.max() <= sample_idx_max
    else:
        sample_idx = build_sample_idx_int64(
            sizes,
            document_indices,
            sequence_length,
            num_epochs,
            tokens_per_epoch,
            drop_last_partial_sequence,
            1 if add_extra_token_to_sequence else 0,
        )
    return sample_idx


================================================
FILE: megatron/core/datasets/indexed_dataset.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

# Essentially re-written in entirety

import gc
import logging
import os
import shutil
import struct
import time
from abc import ABC, abstractmethod
from collections.abc import Iterable
from datetime import datetime
from enum import Enum
from functools import lru_cache
from itertools import accumulate
from types import TracebackType
from typing import List, Optional, Tuple, Type, Union

import numpy

try:
    import boto3
except ModuleNotFoundError:
    pass

import torch

from megatron.core.datasets.object_storage_utils import S3Config  # pylint: disable=unused-import
from megatron.core.datasets.object_storage_utils import (
    ObjectStorageConfig,
    cache_index_file,
    dataset_exists,
    get_index_cache_path,
    get_object_storage_access,
    is_object_storage_path,
    parse_s3_path,
)
from megatron.core.msc_utils import MultiStorageClientFeature
from megatron.core.utils import log_single_rank

logger = logging.getLogger(__name__)

_INDEX_HEADER = b"MMIDIDX\x00\x00"


class DType(Enum):
    """The NumPy data type Enum for writing/reading the IndexedDataset indices"""

    uint8 = 1
    int8 = 2
    int16 = 3
    int32 = 4
    int64 = 5
    float64 = 6
    float32 = 7
    uint16 = 8

    @classmethod
    def code_from_dtype(cls, value: Type[numpy.number]) -> int:
        """Get the code from the dtype

        Args:
            value (Type[numpy.number]): The dtype

        Returns:
            int: The code
        """
        return cls[value.__name__].value

    @classmethod
    def dtype_from_code(cls, value: int) -> Type[numpy.number]:
        """Get the dtype from the code

        Args:
            value (int): The code

        Returns:
            Type[numpy.number]: The dtype
        """
        return getattr(numpy, cls(value).name)

    @staticmethod
    def size(key: Union[int, Type[numpy.number]]) -> int:
        """Get the size of the dtype/code in bytes

        Args:
            key (Union[int, Type[numpy.number]]): The dtype or code

        Raises:
            ValueError: If the key is neither dtype nor integer code

        Returns:
            int: The size of the dtype/code in in bytes
        """
        if isinstance(key, int):
            return DType.dtype_from_code(key)().itemsize
        elif numpy.number in key.__mro__:
            return key().itemsize
        else:
            raise ValueError

    @staticmethod
    def optimal_dtype(cardinality: Optional[int]) -> Type[numpy.number]:
        """Get the dtype to use for an index of a certain cardinality

        Args:
            cardinality (Optional[int]): The number of elements to be indexed

        Returns:
            Type[numpy.number]: The dtype to use for the index
        """
        if cardinality is not None and cardinality < 65500:
            return numpy.uint16
        else:
            return numpy.int32


class _IndexWriter(object):
    """Object class to write the index (.idx) file

    Args:
        idx_path (str): The path to the index file

        dtype (Type[numpy.number]): The dtype of the index file
    """

    def __init__(self, idx_path: str, dtype: Type[numpy.number]) -> None:
        self.idx_path = idx_path
        self.dtype = dtype

    def __enter__(self) -> "_IndexWriter":
        """Enter the context introduced by the 'with' keyword

        Returns:
            _IndexWriter: The instance
        """
        if MultiStorageClientFeature.is_enabled():
            msc = MultiStorageClientFeature.import_package()
            self.idx_writer = msc.open(self.idx_path, "wb")
        else:
            self.idx_writer = open(self.idx_path, "wb")
        # fixed, vestigial practice
        self.idx_writer.write(_INDEX_HEADER)
        # fixed, vestigial practice
        self.idx_writer.write(struct.pack("<Q", 1))
        # the numeric code for the dtype
        self.idx_writer.write(struct.pack("<B", DType.code_from_dtype(self.dtype)))
        return self

    def __exit__(
        self,
        exc_type: Optional[Type[BaseException]],
        exc_val: Optional[BaseException],
        exc_tb: Optional[TracebackType],
    ) -> Optional[bool]:
        """Exit the context introduced by the 'with' keyword

        Args:
            exc_type (Optional[Type[BaseException]]): Exception type

            exc_val (Optional[BaseException]): Exception value

            exc_tb (Optional[TracebackType]): Exception traceback object

        Returns:
            Optional[bool]: Whether to silence the exception
        """
        self.idx_writer.close()
        return None

    def write(
        self,
        sequence_lengths: Iterable[Union[int, numpy.integer]],
        sequence_modes: Optional[Iterable[Union[int, numpy.integer]]],
        document_indices: Iterable[Union[int, numpy.integer]],
    ) -> None:
        """Write the index (.idx) file

        Args:
            sequence_lengths (List[int]): The length of each sequence

            sequence_modes (Optional[List[int]]): The mode of each sequences

            document_indices (List[int]): The seqyebce indices demarcating the end of each document
        """
        sequence_pointers = self._sequence_pointers(sequence_lengths)

        # the number of sequences in the dataset
        sequence_count = len(sequence_lengths)
        self.idx_writer.write(struct.pack("<Q", sequence_count))

        # the number of documents in the dataset
        document_count = len(document_indices)
        self.idx_writer.write(struct.pack("<Q", document_count))

        # the number of tokens per sequence
        self.idx_writer.write(numpy.array(sequence_lengths, dtype=numpy.int32).tobytes(order="C"))

        # the byte offsets for all sequences
        self.idx_writer.write(numpy.array(sequence_pointers, dtype=numpy.int64).tobytes(order="C"))

        # the sequence indices marking the end of each document
        self.idx_writer.write(numpy.array(document_indices, dtype=numpy.int64).tobytes(order="C"))

        # the mode per sequence
        if sequence_modes is not None:
            self.idx_writer.write(numpy.array(sequence_modes, dtype=numpy.int8).tobytes(order="C"))

    def _sequence_pointers(
        self, sequence_lengths: Iterable[Union[int, numpy.integer]]
    ) -> List[int]:
        """Build the sequence pointers per the sequence lengths and dtype size

        Args:
            sequence_lengths (List[int]): The length of each sequence

        Returns:
            List[int]: The pointer to the beginning of each sequence
        """
        itemsize = numpy.int64(DType.size(self.dtype))
        curr_ptr = numpy.int64(0)
        list_ptr = []
        for length in sequence_lengths:
            list_ptr.append(curr_ptr.item())
            curr_ptr += length * itemsize
        return list_ptr


class _IndexReader(object):
    """Object class to read the index (.idx) file

    Args:
        idx_path (str): The path to the index file

        multimodal (bool): Whether the dataset is multimodal

        sequences_per_dataset (Optional[Tuple[int, int]]): The sequences per dataset.

        dtype_code (int): The dtype code of the tokenized documents.
    """

    def __init__(
        self,
        idx_path: str,
        multimodal: bool,
        sequences_per_dataset: Optional[Tuple[int, int]] = None,
        dtype_code: int = None,
    ) -> None:
        log_single_rank(logger, logging.INFO, f"Load the {type(self).__name__} from {idx_path}")

        if sequences_per_dataset:
            self.dtype = DType.dtype_from_code(dtype_code)
            self.dtype_size = DType.size(self.dtype)
            self.sequence_count = sequences_per_dataset[0]
            self.document_count = sequences_per_dataset[1]
            offset = 34  # 9 bytes from the header + 8 bytes from the version
            # + 1 bytes for the dtype code + 8 bytes for the sequence count
            # + 8 bytes for the document count = 34 bytes
        else:
            with open(idx_path, "rb") as stream:
                header = stream.read(9)
                assert header == _INDEX_HEADER, f"bad header, cannot read: {idx_path}"

                version = struct.unpack("<Q", stream.read(8))[0]
                assert version == 1, f"bad version, cannot read: {idx_path}"

                code = struct.unpack("<B", stream.read(1))[0]
                self.dtype = DType.dtype_from_code(code)
                self.dtype_size = DType.size(self.dtype)

                self.sequence_count = struct.unpack("<Q", stream.read(8))[0]
                self.document_count = struct.unpack("<Q", stream.read(8))[0]

                offset = stream.tell()

        self.bin_buffer_mmap = numpy.memmap(idx_path, mode="r", order="C")
        self.bin_buffer = memoryview(self.bin_buffer_mmap)

        log_single_rank(logger, logging.INFO, "\tExtract the sequence lengths")
        t_beg = time.time()
        self.sequence_lengths = numpy.frombuffer(
            self.bin_buffer, dtype=numpy.int32, count=self.sequence_count, offset=offset
        )
        t_end = time.time()
        log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")

        log_single_rank(logger, logging.INFO, "\tExtract the sequence pointers")
        t_beg = time.time()
        self.sequence_pointers = numpy.frombuffer(
            self.bin_buffer,
            dtype=numpy.int64,
            count=self.sequence_count,
            offset=offset + self.sequence_lengths.nbytes,
        )
        t_end = time.time()
        log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")

        log_single_rank(logger, logging.INFO, "\tExtract the document indices")
        t_beg = time.time()
        self.document_indices = numpy.frombuffer(
            self.bin_buffer,
            dtype=numpy.int64,
            count=self.document_count,
            offset=offset + self.sequence_lengths.nbytes + self.sequence_pointers.nbytes,
        )
        t_end = time.time()
        log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")

        self.sequence_modes = None
        if multimodal:
            log_single_rank(logger, logging.INFO, "\tExtract the sequence modes")
            t_beg = time.time()
            self.sequence_modes = numpy.frombuffer(
                self.bin_buffer,
                dtype=numpy.int8,
                count=self.sequence_count,
                offset=offset
                + self.sequence_lengths.nbytes
                + self.sequence_pointers.nbytes
                + self.document_indices.nbytes,
            )
            t_end = time.time()
            log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")

        log_single_rank(logger, logging.INFO, f"> total number of sequences: {len(self)}")
        log_single_rank(
            logger,
            logging.INFO,
            f"> total number of documents: {self.document_indices.shape[0] - 1}",
        )

    def __del__(self) -> None:
        """Clean up the object"""
        if hasattr(self, "bin_buffer_mmap"):
            self.bin_buffer_mmap._mmap.close()  # type: ignore[attr-defined]
            del self.bin_buffer_mmap

    def __len__(self) -> int:
        """Return the length of the dataset

        Returns:
            int: The length of the dataset
        """
        return self.sequence_count

    @lru_cache(maxsize=8)
    def __getitem__(self, idx: int) -> Tuple[numpy.int32, numpy.int64, Optional[numpy.int8]]:
        """Return the pointer, length, and mode at the index

        Args:
            idx (int): The index into the dataset

        Returns:
            Tuple[numpy.int32, numpy.int64, Optional[numpy.int8]]: The pointer, length and mode
                at the index
        """
        return (
            self.sequence_pointers[idx],
            self.sequence_lengths[idx],
            self.sequence_modes[idx] if self.sequence_modes is not None else None,
        )


class _BinReader(ABC):
    """Abstract class to read the data (.bin) file"""

    @abstractmethod
    def read(self, dtype: Type[numpy.number], count: int, offset: int) -> numpy.ndarray:
        """Read bytes into a numpy array.

        Args:
            dtype (Type[numpy.number]): Data-type of the returned array.

            count (int): Number of items to read.

            offset (int): Start reading from this offset (in bytes).

        Returns:
            numpy.ndarray: An array with `count` items and data-type `dtype` constructed from
                reading bytes from the data file starting at `offset`.
        """
        pass


class _MMapBinReader(_BinReader):
    """A _BinReader that memory maps the data (.bin) file

    Args:
        bin_path (str): The path to the data (.bin) file.
    """

    def __init__(self, bin_path: str) -> None:
        if MultiStorageClientFeature.is_enabled():
            msc = MultiStorageClientFeature.import_package()
            self._bin_file_reader = msc.open(bin_path, mode="rb")
        else:
            self._bin_file_reader = open(bin_path, mode="rb")
        self._bin_buffer_mmap = numpy.memmap(self._bin_file_reader, mode="r", order="C")
        self._bin_buffer = memoryview(self._bin_buffer_mmap.data)

    def read(self, dtype: Type[numpy.number], count: int, offset: int) -> numpy.ndarray:
        """Read bytes into a numpy array.

        Args:
            dtype (Type[numpy.number]): Data-type of the returned array.

            count (int): Number of items to read.

            offset (int): Start reading from this offset (in bytes).

        Returns:
            numpy.ndarray: An array with `count` items and data-type `dtype` constructed from
                reading bytes from the data file starting at `offset`.
        """
        return numpy.frombuffer(self._bin_buffer, dtype=dtype, count=count, offset=offset)

    def __del__(self) -> None:
        """Clean up the object."""
        if self._bin_buffer_mmap is not None:
            self._bin_buffer_mmap._mmap.close()  # type: ignore[attr-defined]
        if self._bin_file_reader is not None:
            self._bin_file_reader.close()
        del self._bin_buffer_mmap
        del self._bin_file_reader


class _FileBinReader(_BinReader):
    """A _BinReader that reads from the data (.bin) file using a file pointer

    Args:
        bin_path (str): The path to the data (.bin) file.
    """

    def __init__(
        self, bin_path: str, num_max_retries: int = 3, sleep_duration_start: int = 10
    ) -> None:
        self._bin_path = bin_path
        # Retry-specific parameters. With default arguments, sleep for 10, 20, 40 seconds
        # between retries.
        self.num_max_retries = num_max_retries
        self.sleep_duration_start = sleep_duration_start

    def read(self, dtype: Type[numpy.number], count: int, offset: int) -> numpy.ndarray:
        """Read bytes into a numpy array.

        Args:
            dtype (Type[numpy.number]): Data-type of the returned array.

            count (int): Number of items to read.

            offset (int): Start reading from this offset (in bytes).

        Returns:
            numpy.ndarray: An array with `count` items and data-type `dtype` constructed from
                reading bytes from the data file starting at `offset`.
        """

        def _read():
            """Helper method to read `count` bytes from self._bin_path at provided offset."""
            sequence = numpy.empty(count, dtype=dtype)
            if MultiStorageClientFeature.is_enabled():
                msc = MultiStorageClientFeature.import_package()
                with msc.open(self._bin_path, mode="rb", buffering=0) as bin_buffer_file:
                    bin_buffer_file.seek(offset)
                    bin_buffer_file.readinto(sequence)
            else:
                with open(self._bin_path, mode="rb", buffering=0) as bin_buffer_file:
                    bin_buffer_file.seek(offset)
                    bin_buffer_file.readinto(sequence)
            return sequence

        sleep_duration = self.sleep_duration_start
        for i in range(self.num_max_retries + 1):
            try:
                return _read()
            except Exception as e:
                time_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
                if i == self.num_max_retries:
                    logger.warning(
                        f"[{time_str}] {self.num_max_retries+1} total tries to read data item "
                        f"failed; going to abort and re-raise exception \"{e}\"..."
                    )
                    # Re-raise exception if in last iteration of for loop.
                    raise e
                logger.warning(
                    f"[{time_str}] Attempt {i+1}/{self.num_max_retries+1} to read data item "
                    f"failed with exception \"{e}\"; going to sleep for {sleep_duration} "
                    "seconds and then re-try..."
                )
                time.sleep(sleep_duration)
                sleep_duration = sleep_duration * 2

        raise RuntimeError("Should not reach here!")


class _S3BinReader(_BinReader):
    """A _BinReader that reads from the data (.bin) file from S3

    Args:
        bin_path (str): The path to the data (.bin) file.

        bin_chunk_nbytes (int, optional): If not None, then maintain an in-memory cache to speed
            up calls to the `read` method. Furthermore, on a cache miss, download this number of
            bytes to refresh the cache. Otherwise (None), do not maintain an in-memory cache.
            A class that inherits from _BinReader may not implement caching in which case it
            should assert that `bin_chunk_nbytes` is None at initialization.
    """

    def __init__(self, bin_path: str, object_storage_config: ObjectStorageConfig) -> None:
        assert object_storage_config.bin_chunk_nbytes > 0
        self._client = boto3.client("s3")
        self._s3_bucket, self._s3_key = parse_s3_path(bin_path)
        self._cache_nbytes = object_storage_config.bin_chunk_nbytes

        self._cache_bytes_start: int
        self._cache_bytes_end: int
        self._cache: Optional[bytes] = None

    def _extract_from_cache(self, offset: int, size: int) -> bytes:
        """Extract `size` bytes starting at `offset` bytes into the cache"""
        assert self._cache is not None
        start = offset - self._cache_bytes_start
        assert start >= 0
        end = start + size
        assert end <= len(self._cache)
        return self._cache[start:end]

    def read(self, dtype: Type[numpy.number], count: int, offset: int) -> numpy.ndarray:
        """Read bytes into a numpy array.

        Let `size` be the `count` * `DType.size(dtype)`. If the requested span of bytes [`offset`,
        `offset` + `size`) is covered by the in-memory cache maintained by this class, then this
        function extracts the requested span from that cache and returns it. Otherwise, this
        function first refreshes the cache and then extracts the requested span from the refreshed
        cache and returns it.

        The cache is refreshed based on `offset` and `size`. In particular, we divide all the bytes
        in an S3 object into blocks, where each block contains `bin_chunk_nbytes` bytes. We assign
        each block an index starting from 0. We take the block with index (`offset` //
        `bin_chunk_nbytes`) to refresh the cache. If this new block still does not cover the
        requested span, we extend it just enough to include `offset` + `size`.

        Args:
            dtype (Type[numpy.number]): Data-type of the returned array.

            count (int): Number of items to read.

            offset (int): Start reading from this offset (in bytes).

        Returns:
            numpy.ndarray: An array with `count` items and data-type `dtype` constructed from
            reading bytes from the data file starting at `offset`.
        """
        size = count * DType.size(dtype)
        if (
            self._cache is not None
            and offset >= self._cache_bytes_start
            and offset + size <= self._cache_bytes_end
        ):
            return numpy.frombuffer(self._extract_from_cache(offset, size), dtype=dtype)

        bytes_start = (offset // self._cache_nbytes) * self._cache_nbytes
        assert bytes_start >= 0
        assert offset >= bytes_start
        bytes_end = max(bytes_start + self._cache_nbytes, offset + size)
        assert bytes_end >= 1
        self._cache = self._client.get_object(
            Bucket=self._s3_bucket,
            Key=self._s3_key,
            # Subtract 1, because the end of Range is inclusive.
            Range=f"bytes={bytes_start}-{bytes_end - 1}",
        )["Body"].read()
        self._cache_bytes_start = bytes_start
        self._cache_bytes_end = bytes_end
        return numpy.frombuffer(self._extract_from_cache(offset, size), dtype=dtype)

    def __del__(self) -> None:
        """Clean up the object"""
        self._client.close()


class _MultiStorageClientBinReader(_BinReader):
    """A _BinReader that reads from the data (.bin) file using the multi-storage client.

    Args:
        bin_path (str): The path to the data (.bin) file.

        object_storage_config (ObjectStorageConfig): The object storage config.
    """

    def __init__(self, bin_path: str, object_storage_config: ObjectStorageConfig) -> None:
        self._msc = MultiStorageClientFeature.import_package()
        self._client, self._bin_path = self._msc.resolve_storage_client(bin_path)

    def read(self, dtype: Type[numpy.number], count: int, offset: int) -> numpy.ndarray:
        size = count * DType.size(dtype)
        buffer = self._client.read(
            path=self._bin_path, byte_range=self._msc.types.Range(offset=offset, size=size)
        )
        return numpy.frombuffer(buffer, dtype=dtype)


# Map of object storage access to the corresponding bin reader
OBJECT_STORAGE_BIN_READERS = {"s3": _S3BinReader, "msc": _MultiStorageClientBinReader}


class IndexedDataset(torch.utils.data.Dataset):
    """The low-level interface dataset class

    Args:
        path_prefix (str): The index (.idx) and data (.bin) prefix

        multimodal (bool): Whether the dataset is multimodal. Defaults to False.

        mmap (bool): Whether to mmap the .bin files. Defaults to True.

        object_storage_config (Optional[ObjectStorageConfig]): Supplied only for data stored on S3
            or MSC. IndexedDataset downloads the index (.idx) file to
            `object_storage_config.path_to_idx_cache` and streams data from the data (.bin) file
            in `object_storage_config.bin_chunk_nbytes` blocks. Note that `mmap` must be disabled
            for S3 data loading. Defaults to None.

        fast_cache_load (bool): Whether to use the fast cache mode.

        sequences_per_dataset (Optional[Tuple[int, int]]): The sequences per dataset.

        dtype_code (int): The dtype code of the tokenized documents.
    """

    def __init__(
        self,
        path_prefix: str,
        multimodal: bool = False,
        mmap: bool = True,
        object_storage_config: Optional[ObjectStorageConfig] = None,
        s3_config: Optional[S3Config] = None,
        fast_cache_load: bool = False,
        sequences_per_dataset: Optional[Tuple[int, int]] = None,
        dtype_code: int = None,
    ) -> None:
        super().__init__()
        self.path_prefix: str
        self.multimodal: bool
        self.mmap: bool
        self.object_storage_config: Optional[ObjectStorageConfig]

        self.bin_reader: _BinReader
        self.index: _IndexReader

        # Deprecated: s3_config is deprecated, use object_storage_config instead
        object_storage_config = object_storage_config or s3_config

        # Cache the index file if it is stored on object storage
        if is_object_storage_path(path_prefix) and object_storage_config is not None:
            idx_path = get_idx_path(path_prefix)
            cache_idx_path = get_index_cache_path(idx_path, object_storage_config)
            cache_index_file(idx_path, cache_idx_path)

        self.initialize(
            path_prefix,
            multimodal,
            mmap,
            object_storage_config,
            fast_cache_load,
            sequences_per_dataset,
            dtype_code,
        )

        if not fast_cache_load:
            assert self.index.sequence_lengths.shape[0] == self.index.document_indices[-1]
            assert self.index.sequence_lengths.shape[0] == len(self.index)
            assert self.index.sequence_lengths.shape[0] == self.index.sequence_count

    def initialize(
        self,
        path_prefix: str,
        multimodal: bool,
        mmap: bool,
        object_storage_config: Optional[ObjectStorageConfig],
        fast_cache_load: bool = False,
        sequences_per_dataset: Optional[Tuple[int, int]] = None,
        dtype_code: int = None,
    ) -> None:
        """Initialize the dataset

        This method is called by IndexedDataset.__init__ during object creation and by
        IndexedDataset.__setstate__ during un-pickling

        Args:
            path_prefix (str): The index (.idx) and data (.bin) prefix

            multimodal (bool): Whether the dataset is multimodal

            mmap (bool): Whether to mmap the .bin file

            object_storage_config (Optional[ObjectStorageConfig]): See IndexedDataset docstring
                for details.

            fast_cache_load (bool): Whether to use the fast cache mode.

            sequences_per_dataset (Optional[Tuple[int, int]]): The sequences per dataset.

            dtype_code (int): The dtype code of the tokenized documents.
        """
        idx_path = get_idx_path(path_prefix)
        bin_path = get_bin_path(path_prefix)
        if object_storage_config is None and not fast_cache_load:
            assert os.path.exists(idx_path) and os.path.exists(bin_path), (
                "One or both of the .idx and .bin files cannot be found at the "
                f"path prefix {path_prefix}"
            )
        self.path_prefix = path_prefix
        self.multimodal = multimodal
        self.mmap = mmap
        self.object_storage_config = object_storage_config
        self.fast_cache_load = fast_cache_load
        self.sequences_per_dataset = sequences_per_dataset
        self.dtype_code = dtype_code
        if mmap:
            assert not object_storage_config
            self.bin_reader = _MMapBinReader(bin_path)
        elif object_storage_config:
            assert not mmap
            self.bin_reader = OBJECT_STORAGE_BIN_READERS[get_object_storage_access(path_prefix)](
                bin_path, object_storage_config
            )
            idx_path = get_index_cache_path(get_idx_path(path_prefix), object_storage_config)
        else:
            self.bin_reader = _FileBinReader(bin_path)
        self.index = _IndexReader(idx_path, self.multimodal, sequences_per_dataset, dtype_code)

    def __getstate__(self) -> Tuple[str, bool, bool, Optional[ObjectStorageConfig]]:
        """Get the state during pickling

        Returns:
            Tuple[str, bool, bool, Optional[ObjectStorageConfig]]: The state tuple
        """
        return (
            self.path_prefix,
            self.multimodal,
            self.mmap,
            self.object_storage_config,
            self.fast_cache_load,
            self.sequences_per_dataset,
            self.dtype_code,
        )

    def __setstate__(self, state: Tuple[str, bool, bool, Optional[ObjectStorageConfig]]) -> None:
        """Set the state during un-pickling

        Args:
            state (Tuple[str, bool, bool, Optional[ObjectStorageConfig]]): The state tuple
        """
        (
            path_prefix,
            multimodal,
            mmap,
            object_storage_config,
            fast_cache_load,
            sequences_per_dataset,
            dtype_code,
        ) = state
        self.initialize(
            path_prefix,
            multimodal,
            mmap,
            object_storage_config,
            fast_cache_load,
            sequences_per_dataset,
            dtype_code,
        )

    def __del__(self) -> None:
        """Clean up the object"""
        del self.bin_reader
        del self.index

    def __len__(self) -> int:
        """Return the length of the dataset i.e. the number of sequences in the index

        Returns:
            int: The length of the dataset
        """
        return len(self.index)

    def __getitem__(
        self, idx: Union[int, numpy.integer, slice]
    ) -> Union[
        numpy.ndarray,
        Tuple[numpy.ndarray, numpy.number],
        List[numpy.ndarray],
        Tuple[List[numpy.ndarray], numpy.ndarray],
    ]:
        """Return from the dataset

        Args:
            idx (Union[int, numpy.integer, slice]): The index or index slice into the dataset

        Raises:
            ValueError: When the index slice is non-contiguous

            TypeError: When the index is of an unexpected type

        Returns:
            Union[
                numpy.ndarray,
                Tuple[numpy.ndarray, numpy.number],
                List[numpy.ndarray],
                Tuple[List[numpy.ndarray], numpy.ndarray],
            ]: The sequence tokens and modes at the index or index slice
        """
        if isinstance(idx, (int, numpy.integer)):
            sequence_pointer, sequence_length, sequence_mode = self.index[idx]
            sequence = self.bin_reader.read(
                dtype=self.index.dtype, count=sequence_length, offset=sequence_pointer
            )
            return (sequence, sequence_mode) if sequence_mode is not None else sequence
        elif isinstance(idx, slice):
            start, stop, step = idx.indices(len(self))
            if step != 1:
                raise ValueError("Slices into indexed_dataset must be contiguous")
            sequence_lengths = self.index.sequence_lengths[idx]
            sequence_modes = (
                self.index.sequence_modes[idx] if self.multimodal else None  # type: ignore[index]
            )
            sequence_offsets = list(accumulate(sequence_lengths))
            sequences = numpy.split(
                self.bin_reader.read(
                    dtype=self.index.dtype,
                    count=sum(sequence_lengths),
                    offset=self.index.sequence_pointers[start],
                ),
                sequence_offsets[:-1],
            )
            return (sequences, sequence_modes) if sequence_modes is not None else sequences
        else:
            raise TypeError("Unexpected type received for idx: {}".format(type(idx)))

    def get(
        self, idx: int, offset: int = 0, length: Optional[int] = None
    ) -> Union[numpy.ndarray, Tuple[numpy.ndarray, numpy.number]]:
        """Retrieve a single item from the dataset with the option to only
        return a portion of the item.

        get(idx) is the same as [idx] but get() does not support slicing.

        Args:
            idx (Union[int, numpy.integer]): The index into the dataset

            offset (int): The integer token offset in the sequence

            length (int): The number of tokens to grab from the sequence

        Returns:
            Union[numpy.ndarray, Tuple[numpy.ndarray, numpy.number]]: The sequence tokens and mode
                at the index
        """
        sequence_pointer, sequence_length, sequence_mode = self.index[idx]
        if length is None:
            length = sequence_length - offset
        sequence_pointer += offset * DType.size(self.index.dtype)
        sequence = self.bin_reader.read(
            dtype=self.index.dtype, count=length, offset=sequence_pointer
        )
        return (sequence, sequence_mode) if sequence_mode is not None else sequence

    @property
    def sequence_lengths(self) -> numpy.ndarray:
        """Get the sequence lengths

        Returns:
            numpy.ndarray: The sequence lengths
        """
        return self.index.sequence_lengths

    @property
    def document_indices(self) -> numpy.ndarray:
        """Get the document indices

        Returns:
            numpy.ndarray: The document indices
        """
        return self.index.document_indices

    def get_document_indices(self) -> numpy.ndarray:
        """Get the document indices

        This method is slated for deprecation.

        Returns:
            numpy.ndarray: The document indices
        """
        return self.index.document_indices

    def set_document_indices(self, document_indices: numpy.ndarray) -> None:
        """Set the document indices

        This method is slated for deprecation.

        Args:
            document_indices (numpy.ndarray): The document indices
        """
        self.index.document_indices = document_indices

    @property
    def sequence_modes(self) -> numpy.ndarray:
        """Get the sequence modes

        Returns:
            numpy.ndarray: The sequence modes
        """
        assert self.index.sequence_modes
        return self.index.sequence_modes

    @staticmethod
    def exists(path_prefix: str) -> bool:
        """Return whether the IndexedDataset exists on disk at the prefix

        Args:
            path_prefix (str): The prefix to the index (.idx) and data (.bin) files

        Returns:
            bool: Whether the IndexedDataset exists on disk at the prefix
        """
        if is_object_storage_path(path_prefix):
            return dataset_exists(path_prefix, get_idx_path(path_prefix), get_bin_path(path_prefix))

        return os.path.exists(get_idx_path(path_prefix)) and os.path.exists(
            get_bin_path(path_prefix)
        )


class IndexedDatasetBuilder(object):
    """Builder class for the IndexedDataset class

    Args:
        bin_path (str): The path to the data (.bin) file

        dtype (Type[numpy.number], optional): The dtype of the index file. Defaults to numpy.int32.

        multimodal (bool, optional): Whether the dataset is multimodal. Defaults to False.
    """

    def __init__(
        self, bin_path: str, dtype: Type[numpy.number] = numpy.int32, multimodal: bool = False
    ) -> None:
        if MultiStorageClientFeature.is_enabled():
            msc = MultiStorageClientFeature.import_package()
            self._open = msc.open
        else:
            self._open = open

        self.data_file = self._open(bin_path, "wb")
        self.dtype = dtype
        self.multimodal = multimodal

        self.sequence_lengths = []
        self.document_indices = [0]
        self.sequence_modes = [] if self.multimodal else None

    def add_item(self, tensor: torch.Tensor, mode: int = 0) -> None:
        """Add a single item to the dataset

        Args:
            tensor (torch.Tensor): The item to add to the data file

            mode (int, optional): The mode for the item. Defaults to 0.
        """
        np_array = numpy.array(tensor.numpy(), dtype=self.dtype)
        self.data_file.write(np_array.tobytes(order="C"))
        self.sequence_lengths.append(np_array.size)
        if self.multimodal:
            self.sequence_modes.append(mode)

    def add_document(
        self, tensor: torch.Tensor, lengths: List[int], modes: Optional[List[int]] = None
    ) -> None:
        """Add an entire document to the dataset

        Args:
            tensor (torch.Tensor): The document to add

            lengths (List[int]): The lengths of each item in the document

            modes (Optional[List[int]], optional): The modes for each item in the document.
                Defaults to None.
        """
        np_array = numpy.array(tensor, dtype=self.dtype)
        self.data_file.write(np_array.tobytes(order="C"))
        self.sequence_lengths.extend(lengths)
        self.document_indices.append(len(self.sequence_lengths))
        if self.multimodal:
            self.sequence_modes.extend(modes if modes is not None else [0] * lengths)

    def end_document(self) -> None:
        """Finalize the document, for use with IndexedDatasetBuilder.add_item"""
        self.document_indices.append(len(self.sequence_lengths))

    def add_index(self, path_prefix: str) -> None:
        """Add an entire IndexedDataset to the dataset

        Args:
            path_prefix (str): The index (.idx) and data (.bin) prefix
        """
        # Concatenate index
        index = _IndexReader(get_idx_path(path_prefix), multimodal=self.multimodal)
        assert index.dtype == self.dtype

        offset = len(self.sequence_lengths)
        self.sequence_lengths.extend(index.sequence_lengths)
        self.document_indices.extend((offset + index.document_indices)[1:])

        if self.multimodal:
            assert index.sequence_modes is not None, "sequence_modes cannot not be None"
            self.sequence_modes.extend(index.sequence_modes)

        # Free up memory to make space for new indices
        del index
        gc.collect()

        # Concatenate data
        with self._open(get_bin_path(path_prefix), "rb") as f:
            shutil.copyfileobj(f, self.data_file)

    def finalize(self, idx_path: str) -> None:
        """Clean up and write the index (.idx) file

        Args:
            idx_path (str): The path to the index file
        """
        self.data_file.close()
        with _IndexWriter(idx_path, self.dtype) as writer:
            writer.write(self.sequence_lengths, self.sequence_modes, self.document_indices)


def get_idx_path(path_prefix: str) -> str:
    """Get the path to the index file from the prefix

    Args:
        path_prefix (str): The prefix

    Returns:
        str: The path to the index file
    """
    return path_prefix + ".idx"


def get_bin_path(path_prefix: str) -> str:
    """Get the path to the data file from the prefix

    Args:
        path_prefix (str): The prefix

    Returns:
        str: The path to the data file
    """
    return path_prefix + ".bin"


================================================
FILE: megatron/core/datasets/masked_dataset.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import logging
import os
import time
from abc import abstractmethod
from dataclasses import dataclass
from typing import List, Optional, Tuple

import numpy
import torch

from megatron.core.datasets.blended_megatron_dataset_config import BlendedMegatronDatasetConfig
from megatron.core.datasets.indexed_dataset import IndexedDataset
from megatron.core.datasets.megatron_dataset import MegatronDataset
from megatron.core.datasets.utils import Split
from megatron.core.utils import log_single_rank

logger = logging.getLogger(__name__)


@dataclass
class MaskedWordPieceDatasetConfig(BlendedMegatronDatasetConfig):
    """Configuration object for Megatron Core Masked WordPiece datasets"""

    masking_probability: float = None
    """The probability we mask a candidate N-gram"""

    short_sequence_probability: float = None
    """The probability we return a sequence shorter than the target sequence length"""

    masking_max_ngram: int = None
    """The maximum length N-gram to consider masking or permuting"""

    masking_do_full_word: bool = None
    """Whether we mask the whole word or its component parts"""

    masking_do_permutation: bool = None
    """Whether we shuffle a subset of candidate N-grams in addition"""

    masking_use_longer_ngrams: bool = None
    """Whether to favor longer N-grams over shorter N-grams"""

    masking_use_geometric_distribution: bool = None
    """Whether to draw the size of the N-gram from a geometric distribution according to SpanBERT
       https://arxiv.org/abs/1907.10529 (Section 3.1)
    """

    def __post_init__(self) -> None:
        """Do asserts and set fields post init"""
        super().__post_init__()

        assert self.tokenizer is not None

        assert self.masking_probability is not None
        assert self.short_sequence_probability is not None
        assert self.masking_max_ngram is not None
        assert self.masking_do_full_word is not None
        assert self.masking_do_permutation is not None
        assert self.masking_use_longer_ngrams is not None
        assert self.masking_use_geometric_distribution is not None

        assert self.masking_probability > 0 and self.masking_probability < 1.0
        assert self.short_sequence_probability >= 0 and self.short_sequence_probability <= 1.0
        assert self.masking_max_ngram > 0
        assert not (self.masking_use_geometric_distribution and self.masking_do_permutation)

        if self.masking_use_geometric_distribution and self.masking_use_longer_ngrams:
            log_single_rank(
                logger,
                logging.WARNING,
                "The use of a geometric distribution overrides the default distribution",
            )


class MaskedWordPieceDataset(MegatronDataset):
    """The semi-abstract base class for masked WordPiece datasets

    This implementation makes the rigid assumption that all inheritor datasets are built upon the
    IndexedDataset class. This assumption may be pushed down to the inheritors in future if
    necessary.

    NB: WordPiece tokenization prepends a double hash "##" to all tokens/pieces in a word, save the
    first token/piece.

    Args:
        indexed_dataset (IndexedDataset): The IndexedDataset around which to build the
                                          MegatronDataset

        dataset_path (str): The real path on disk to the dataset, for bookkeeping

        indexed_indices (numpy.ndarray): The set of the documents indices to expose

        num_samples (Optional[int]): The number of samples to draw from the indexed dataset.
                                     When None, build as many samples as correspond to one epoch.

        index_split (Split): The indexed_indices Split

        config (MaskedWordPieceDatasetConfig): The config
    """

    def __init__(
        self,
        indexed_dataset: IndexedDataset,
        dataset_path: str,
        indexed_indices: numpy.ndarray,
        num_samples: Optional[int],
        index_split: Split,
        config: MaskedWordPieceDatasetConfig,
    ) -> None:
        super().__init__(
            indexed_dataset, dataset_path, indexed_indices, num_samples, index_split, config
        )

    @staticmethod
    def numel_low_level_dataset(low_level_dataset: IndexedDataset) -> int:
        """Return the number of documents in the underlying low level dataset.

        Args:
            low_level_dataset (IndexedDataset): The underlying IndexedDataset

        Returns:
            int: The number of unique elements in the underlying IndexedDataset
        """
        return low_level_dataset.document_indices.shape[0] - 1

    @staticmethod
    def build_low_level_dataset(
        dataset_path: str, config: MaskedWordPieceDatasetConfig
    ) -> IndexedDataset:
        """Build the low level dataset (IndexedDataset) from the given path.

        Args:
            dataset_path (str): The real path prefix to the IndexedDataset .bin and .idx files

            config (MaskedWordPieceDatasetConfig): The config

        Returns:
            IndexedDataset: The underlying IndexedDataset
        """
        return IndexedDataset(dataset_path)

    @staticmethod
    def _key_config_attributes() -> List[str]:
        """Inherited method implementation

        Returns:
            List[str]: The key config attributes
        """
        return super(MaskedWordPieceDataset, MaskedWordPieceDataset)._key_config_attributes() + [
            "masking_probability",
            "short_sequence_probability",
            "masking_max_ngram",
            "masking_do_full_word",
            "masking_do_permutation",
            "masking_use_longer_ngrams",
            "masking_use_geometric_distribution",
        ]

    def __len__(self) -> int:
        return self.sample_index.shape[0]

    def _build_sample_index(
        self, sequence_length: int, min_sentences_per_sample: int
    ) -> numpy.ndarray:
        path_to_cache = self.config.path_to_cache
        if path_to_cache is None:
            path_to_cache = os.path.join(
                self.dataset.path_prefix, "cache", f"{type(self).__name__}_indices"
            )

        get_path_to = lambda suffix: os.path.join(
            path_to_cache, f"{self.unique_description_hash}-{type(self).__name__}-{suffix}"
        )
        path_to_description = get_path_to("description.txt")
        path_to_sample_index = get_path_to("sample_index.npy")
        cache_hit = all(map(os.path.isfile, [path_to_description, path_to_sample_index]))

        if self.num_samples is not None:
            num_epochs = numpy.iinfo(numpy.int32).max - 1
        else:
            num_epochs = 1

        if not cache_hit and torch.distributed.get_rank() == 0:
            log_single_rank(
                logger,
                logging.INFO,
                f"Build and save the {type(self).__name__} {self.index_split.name} indices",
            )

            os.makedirs(path_to_cache, exist_ok=True)

            # Write the description
            with open(path_to_description, "wt") as writer:
                writer.write(self.unique_description)

            # Build the sample index
            log_single_rank(
                logger,
                logging.INFO,
                f"\tBuild and save the sample index to {os.path.basename(path_to_sample_index)}",
            )
            t_beg = time.time()
            from megatron.core.datasets import helpers

            # Add +1 for access to document upper bound
            indices = numpy.append(self.indices, self.indices[-1] + 1)

            sample_index = helpers.build_mapping(
                self.dataset.document_indices[indices],
                self.dataset.sequence_lengths,
                num_epochs,
                self.num_samples,
                sequence_length,
                self.config.short_sequence_probability,
                self.config.random_seed,
                False,
                min_sentences_per_sample,
            )
            numpy.save(path_to_sample_index, sample_index, allow_pickle=True)
            t_end = time.time()
            log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")

            log_single_rank(
                logger, logging.INFO, f"> total number of samples: {sample_index.shape[0]}"
            )
            log_single_rank(logger, logging.INFO, f"> total number of epochs: {num_epochs}")

            return sample_index

        log_single_rank(
            logger, logging.INFO, f"Load the {type(self).__name__} {self.index_split.name} indices"
        )

        log_single_rank(
            logger,
            logging.INFO,
            f"\tLoad the sample index from {os.path.basename(path_to_sample_index)}",
        )
        t_beg = time.time()
        sample_index = numpy.load(path_to_sample_index, allow_pickle=True, mmap_mode="r")
        t_end = time.time()
        log_single_rank(logger, logging.DEBUG, f"\t> time elapsed: {t_end - t_beg:4f} seconds")

        return sample_index

    def _create_masked_lm_predictions(
        self,
        token_ids: List[int],
        target_sequence_length: int,
        numpy_random_state: numpy.random.RandomState,
    ) -> Tuple[List[int], List[int], List[int], List[int], List[Tuple[List[int], List[int]]]]:
        """Creates the predictions for the masked LM objective

        Args:
            token_ids (List[int]): The token ids
            target_sequence_length (int): The target sequence length
            numpy_random_state (numpy.random.RandomState): The NumPy random state

        Returns:
            Tuple[List[int], List[int], List[int], List[int], List[Tuple[List[int], List[int]]]]:
                1. masked_token_ids -> The masked sequence
                2. masked_positions -> The indices for the masked token ids
                3. masked_labels    -> The original token ids for the masked token ids
                4. boundaries       -> The sentence and word boundaries for the sequence
                5. masked_spans     -> The masked positions and labels with N-gram info intact
        """
        # Build the token sentence and word boundaries and the masking candidates
        # e.g. [cls, id, ##id, ##id, id, ##id, sep, id, ##id, sep]
        #    -> boundaries: [1, 1, 0, 0, 1, 0, 1, 1, 0, 1]
        #    -> candidates with whole word masking: [[1, 2, 3], [4, 5], [7, 8]]
        #    -> candidates sans whole word masking: [[1], [2], [3], [4], [5], [7], [8]]
        boundaries = []
        candidates = []
        for i, token_id in enumerate(token_ids):
            if token_id == self.config.tokenizer.cls or token_id == self.config.tokenizer.sep:
                boundaries.append(1)
            else:
                if not self.config.tokenizer.inv_vocab[token_id].startswith("##"):
                    boundaries.append(1)
                    candidates.append([i])
                else:
                    boundaries.append(0)
                    if self.config.masking_do_full_word and len(candidates) > 0:
                        candidates[-1].append(i)
                    else:
                        candidates.append([i])

        n_maskings = min(
            self.config.masking_probability * target_sequence_length,
            max(1, int(round(len(token_ids) * self.config.masking_probability))),
        )

        ngram_nvals = numpy.arange(self.config.masking_max_ngram, dtype=numpy.int64) + 1

        # By default, the N-gram probabilities are inversely proportional to N
        # e.g. N = 3
        #    -> P = array([0.54545455, 0.27272727, 0.18181818])
        nprobs = 1.0 / ngram_nvals
        nprobs = nprobs / nprobs.sum(keepdims=True)
        if self.config.masking_use_longer_ngrams:
            nprobs = nprobs[::-1]

        # Create a nested list of depth 3
        #   layer 1: the candidate dimension
        #   layer 2: the N-gram dimension
        #   layer 3: the token dimension
        candidate_ngrams = [
            [candidates[idx : idx + n] for n in ngram_nvals] for idx in range(len(candidates))
        ]
        numpy_random_state.shuffle(candidate_ngrams)

        masked_token_ids = list(token_ids)
        masked_positions_and_labels = []
        masked_spans = []
        masked_indices = set()
        for candidate_idx in range(len(candidate_ngrams)):
            n_ngrams = len(candidate_ngrams[candidate_idx])

            # Stop when we hit our desired number of maskings
            if len(masked_positions_and_labels) >= n_maskings:
                break

            # Do nothing for candidates with no ngrams
            if not candidate_ngrams[candidate_idx]:
                continue

            # Choose the initial value of N
            if self.config.masking_use_geometric_distribution:
                # Sample N from a geometric distribution with p = 0.2 and clip
                # i.e. SpanBERT
                #    -> https://arxiv.org/abs/1907.10529 (Section 3.1)
                p = 0.2
                n = min(numpy_random_state.geometric(p), self.config.masking_max_ngram)
            else:
                p = nprobs[:n_ngrams] / nprobs[:n_ngrams].sum(keepdims=True)
                n = numpy_random_state.choice(ngram_nvals[:n_ngrams], p=p)

            while True:
                ngram_indices = sum(candidate_ngrams[candidate_idx][n - 1], [])
                n = n - 1
                # Success: masking this N-gram puts us below the desired number of maskings
                if n_maskings >= len(masked_positions_and_labels) + len(ngram_indices):
                    skip_candidate = False
                    break
                # Failure: no N-grams remain for this candidate
                if n == 0:
                    skip_candidate = True
                    break

            # Do nothing for candidates whose 1-gram is too long
            if skip_candidate:
                continue

            # Do nothing for candidate indices which have already been masked
            if any(map(lambda idx: idx in masked_indices, ngram_indices)):
                continue

            # Mask the tokens and record their original positions and values
            for index in ngram_indices:
                masked_indices.add(index)
                mask = self._get_token_mask(numpy_random_state)
                if mask is None:
                    masked_token_ids[index] = token_ids[index]
                else:
                    masked_token_ids[index] = mask
                masked_positions_and_labels.append((index, token_ids[index]))

            masked_spans.append((ngram_indices, [token_ids[index] for index in ngram_indices]))

        assert len(masked_positions_and_labels) <= n_maskings

        numpy_random_state.shuffle(candidate_ngrams)

        if self.config.masking_do_permutation:
            n_swappings = n_maskings

            permuted_indices = set()
            for candidate_idx in range(len(candidate_ngrams)):
                n_ngrams = len(candidate_ngrams[candidate_idx])

                if len(permuted_indices) >= n_swappings:
                    break

                # Do nothing for candidates with no ngrams
                if not candidate_ngrams[candidate_idx]:
                    continue

                p = nprobs[:n_ngrams] / nprobs[:n_ngrams].sum(keepdims=True)
                n = numpy.random.choice(ngram_nvals[:n_ngrams], p=p)

                while True:
                    ngram_indices = sum(candidate_ngrams[candidate_idx][n - 1], [])
                    n = n - 1
                    # Success: swapping this N-gram puts us below the desired number of swappings
                    if n_swappings >= len(permuted_indices) + len(ngram_indices):
                        skip_candidate = False
                        break
                    # Failure: no N-grams remain for this candidate
                    if n == 0:
                        skip_candidate = True
                        break

                # Do nothing for candidates whose 1-gram is too long
                if skip_candidate:
                    continue

                # Do nothing for candidate indices which have already been masked or permuted
                if any(
                    map(lambda idx: idx in masked_indices or idx in permuted_indices, ngram_indices)
                ):
                    continue

                for index in ngram_indices:
                    permuted_indices.add(index)

            assert len(permuted_indices) <= n_swappings

            permuted_indices = sorted(permuted_indices)
            permuted_indices_copy = list(permuted_indices)
            numpy_random_state.shuffle(permuted_indices_copy)
            masked_token_ids_copy = list(masked_token_ids)

            for idx, idx_copy in zip(permuted_indices, permuted_indices_copy):
                masked_token_ids[idx] = masked_token_ids_copy[idx_copy]
                masked_positions_and_labels.append((idx, masked_token_ids_copy[idx]))

        masked_positions_and_labels = sorted(masked_positions_and_labels, key=lambda x: x[0])
        masked_positions = []
        masked_labels = []
        for position, label in masked_positions_and_labels:
            masked_positions.append(position)
            masked_labels.append(label)

        masked_spans = sorted(masked_spans, key=lambda x: x[0][0])

        return (masked_token_ids, masked_positions, masked_labels, boundaries, masked_spans)

    @abstractmethod
    def _get_token_mask(self, numpy_random_state: numpy.random.RandomState) -> Optional[int]:
        pass


================================================
FILE: megatron/core/datasets/megatron_dataset.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

import hashlib
import json
import warnings
from abc import ABC, abstractmethod
from collections import OrderedDict
from typing import Dict, Iterable, List, Optional, Union

import numpy
import torch

from megatron.core.datasets.blended_megatron_dataset_config import BlendedMegatronDatasetConfig
from megatron.core.datasets.indexed_dataset import IndexedDataset
from megatron.core.datasets.utils import Split

LowLevelDataset = Union[IndexedDataset, Iterable]


_PAD_TOKEN_ID = -1


class MegatronDataset(ABC, torch.utils.data.Dataset):
    """The highest level wrapper class from which all dataset classes should inherit

    Args:
        dataset (LowLevelDataset): The dataset around which to build the MegatronDataset

        dataset_path (Optional[str]): The real path on disk to the dataset, for bookkeeping

        indices (numpy.ndarray): The set of the documents indices to expose

        num_samples (Optional[int]): The minimum number of samples to build from the indexed
            dataset. When None, build as many samples as correspond to one epoch.

        index_split (Split): The indices Split

        config (BlendedMegatronDatasetConfig): The config
    """

    def __init__(
        self,
        dataset: LowLevelDataset,
        dataset_path: Optional[str],
        indices: numpy.ndarray,
        num_samples: Optional[int],
        index_split: Split,
        config: BlendedMegatronDatasetConfig,
    ) -> None:
        self.dataset = dataset
        self.dataset_path = dataset_path
        self.indices = indices
        self.num_samples = num_samples
        self.index_split = index_split
        self.config = config

        self.unique_identifiers = OrderedDict()

        self.unique_identifiers["class"] = type(self).__name__
        self.unique_identifiers["dataset_path"] = self.dataset_path
        self.unique_identifiers["num_samples"] = self.num_samples
        self.unique_identifiers["index_split"] = self.index_split.name
        for attr in self._key_config_attributes():
            self.unique_identifiers[attr] = getattr(self.config, attr)

        self.unique_description = json.dumps(
            self.unique_identifiers, indent=4, default=lambda obj: obj.unique_identifiers
        )
        self.unique_description_hash = hashlib.md5(
            self.unique_description.encode("utf-8"), usedforsecurity=False
        ).hexdigest()

        # Handle pad token id provided by the tokenizer
        try:
            self._pad_token_id = self.config.tokenizer.pad
        except Exception:
            self._pad_token_id = _PAD_TOKEN_ID

        # Check if pad token id collides with any other special tokens
        try:
            _special_tokens_list = [
                v for k, v in self.config.tokenizer.special_tokens_dict.items() if k != "pad_token"
            ]
        except (AttributeError, IndexError, ValueError):
            _special_tokens_list = []
        # If the tokenizer does not have a special_tokens_dict attribute, at least check eos and eod
        if not _special_tokens_list:
            try:
                _special_tokens_list.append(self.config.tokenizer.eos)
            except (AttributeError, NotImplementedError):
                pass
            try:
                _special_tokens_list.append(self.config.tokenizer.eod)
            except (AttributeError, NotImplementedError):
                pass

        if self._pad_token_id in _special_tokens_list:
            if self.config.allow_ambiguous_pad_tokens:
                # This will break training, but users must explicitly opt-in to this behavior.
                warnings.warn(
                    "The pad token id in the tokenizer collides with another special token id. "
                    "This may cause instability and lack of covergence during training. "
                    "Do not ignore this warning if you do not understand the implications. "
                )
            else:
                # Reset the pad token id to a value which is guaranteed not to be in the dataset.
                self._pad_token_id = _PAD_TOKEN_ID
                warnings.warn(
                    "The pad token id in the tokenizer collides with another special token id. "
                    "This may cause instability and lack of covergence during training. "
                    "As such, the training flow will avoid masking out any pad tokens already "
                    "present in the dataset. If you would like to disable this behavior, "
                    "please provide a tokenizer with a uniquely-defined pad token id."
                )

    @staticmethod
    def numel_low_level_dataset(low_level_dataset: LowLevelDataset) -> int:
        """Return the number of elements in the underlying low level dataset for the purpose of
        segregating the train/valid/test split indices

        It may be that the low level dataset can be split any number of ways, depending on the mid
        level dataset it supports, which is why we define the "number of elements" function
        separately from the __len__ function here in the mid level dataset class

        Args:
            low_level_dataset (LowLevelDataset): The underlying low level dataset

        Returns:
            int: The number of elements in the underlying low level dataset
        """
        raise NotImplementedError

    @staticmethod
    def build_low_level_dataset(
        dataset_path: str, config: BlendedMegatronDatasetConfig
    ) -> LowLevelDataset:
        """Build the low level dataset via a function to be called from within
        BlendedMegatronDatasetBuilder.build_generic_dataset

        It may be that the low level dataset spans any subset of train/valid/test splits, which is
        why we define a static "build" function separately from the constructor in the mid level
        dataset class

        Args:
            dataset_path (str): The real path on disk to the dataset

            config (BlendedMegatronDatasetConfig): The dataset config

        Returns:
            LowLevelDataset: The low level dataset
        """
        raise NotImplementedError

    @staticmethod
    def _key_config_attributes() -> List[str]:
        """Return all config attributes which contribute to uniquely identifying the dataset.

        These attributes will be used to build a uniquely identifying string and MD5 hash which
        will be used to cache/load dataset resources from run to run.

        Returns:
            List[str]: The key config attributes
        """
        return ["random_seed", "sequence_length", "split", "split_matrix", "tokenizer"]

    @abstractmethod
    def __len__(self) -> int:
        """Return the length of the dataset

        Returns:
            int: See abstract implementation
        """
        pass

    @abstractmethod
    def __getitem__(self, idx: int) -> Dict[str, Union[torch.Tensor, numpy.ndarray]]:
        """Return from the dataset

        Args:
            idx (int): The index into the dataset

        Returns:
            Dict[str, Union[torch.Tensor, numpy.ndarray]]: See abstract implementation
        """
        pass


================================================
FILE: megatron/core/datasets/multimodal_dataset.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from dataclasses import dataclass
from typing import Callable, Dict

import torch

from megatron.core.datasets.gpt_dataset import GPTDatasetConfig, MockGPTDataset


@dataclass
class MultimodalDatasetConfig(GPTDatasetConfig):
    """Configuration object for Megatron Core Multimodal datasets.

    Note: This is unused at the moment and may be missing features. Follow-up changes will use this.
    """

    image_h: int = None
    """Image height."""

    image_w: int = None
    """Image width."""

    # Function to preprocess the data sample to a format expected by a specific model. By default, do nothing.
    preprocess_func: Callable[[Dict[str, torch.Tensor]], Dict[str, torch.Tensor]] = lambda x: x
    """Optional function to preprocess data samples for a specific model."""

    def __post_init__(self) -> None:
        super().__post_init__()

        assert self.image_h is not None
        assert self.image_w is not None


class MockMultimodalDataset(MockGPTDataset):
    """Mock multimodal dataset.


    This is unused at the moment and may be missing features. Follow-up changes will use this.
    """

    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
        """Return a sample that contains a dummy image, text sequence and the associated labels and cost and attention masks.

        Args:
            idx (int): The integer seed for mock data generation.

        Returns:
            Dict[str, torch.Tensor]: The mock data.
        """
        # Get a text sample.
        sample = super().__getitem__(idx)

        # Add mock input image.
        sample["image"] = torch.zeros(
            (3, self.config.image_h, self.config.image_w), dtype=torch.float32
        )

        # Run optional data preprocessing.
        preprocess_func = self.config.preprocess_func

        return preprocess_func(sample)


================================================
FILE: megatron/core/datasets/object_storage_utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import os
from dataclasses import dataclass
from typing import Any, Dict, Protocol, Tuple

import torch

try:
    import boto3
    import botocore.exceptions as exceptions
except ModuleNotFoundError:
    pass

from megatron.core.msc_utils import MultiStorageClientFeature

S3_PREFIX = "s3://"
MSC_PREFIX = "msc://"


@dataclass
class ObjectStorageConfig:
    """Config when the data (.bin) file and the index (.idx) file are in object storage

    Attributes:

        path_to_idx_cache (str): The local directory where we will store the index (.idx) file

        bin_chunk_nbytes (int): If the number of bytes is too small, then we send a request to S3
        at each call of the `read` method in _S3BinReader, which is slow, because each request
        has a fixed cost independent of the size of the byte range requested. If the number of
        bytes is too large, then we only rarely have to send requests to S3, but it takes a lot
        of time to complete the request when we do, which can block training. We've found that
        256 * 1024 * 1024 (i.e., 256 MiB) has worked well (though we have not put that much
        effort into tuning it), so we default to it.
    """

    path_to_idx_cache: str

    bin_chunk_nbytes: int = 256 * 1024 * 1024


# S3Config is deprecated, use ObjectStorageConfig instead
S3Config = ObjectStorageConfig


class S3Client(Protocol):
    """The protocol which all s3 clients should abide by"""

    def download_file(self, Bucket: str, Key: str, Filename: str) -> None:
        """Download the file from S3 to the local file system"""
        ...

    def upload_file(self, Filename: str, Bucket: str, Key: str) -> None:
        """Upload the file to S3"""
        ...

    def head_object(self, Bucket: str, Key: str) -> Dict[str, Any]:
        """Get the metadata of the file in S3"""
        ...

    def get_object(self, Bucket: str, Key: str, Range: str) -> Dict[str, Any]:
        """Get the file from S3"""
        ...

    def close(self) -> None:
        """Close the S3 client"""
        ...


def _remove_s3_prefix(path: str) -> str:
    """Remove the S3 prefix from a path

    Args:
        path (str): The path

    Returns:
        str: The path without the S3 prefix
    """
    return path.removeprefix(S3_PREFIX)


def _is_s3_path(path: str) -> bool:
    """Ascertain whether a path is in S3

    Args:
        path (str): The path

    Returns:
        bool: True if the path is in S3, False otherwise
    """
    return path.startswith(S3_PREFIX)


def _remove_msc_prefix(path: str) -> str:
    """
    Remove the MSC prefix from a path

    Args:
        path (str): The path

    Returns:
        str: The path without the MSC prefix
    """
    return path.removeprefix(MSC_PREFIX)


def _is_msc_path(path: str) -> bool:
    """Checks whether a path is in MSC path (msc://profile/path/to/file)

    Args:
        path (str): The path

    Returns:
        bool: True if the path is in MSC path, False otherwise
    """
    return path.startswith(MSC_PREFIX)


def _s3_download_file(client: S3Client, s3_path: str, local_path: str) -> None:
    """Download the object at the given S3 path to the given local file system path

    Args:
        client (S3Client): The S3 client

        s3_path (str): The S3 source path

        local_path (str): The local destination path
    """
    dirname = os.path.dirname(local_path)
    os.makedirs(dirname, exist_ok=True)
    parsed_s3_path = parse_s3_path(s3_path)
    client.download_file(parsed_s3_path[0], parsed_s3_path[1], local_path)


def _s3_object_exists(client: S3Client, path: str) -> bool:
    """Ascertain whether the object at the given S3 path exists in S3

    Args:
        client (S3Client): The S3 client

        path (str): The S3 path

    Raises:
        botocore.exceptions.ClientError: The error code is 404

    Returns:
        bool: True if the object exists in S3, False otherwise
    """
    parsed_s3_path = parse_s3_path(path)
    try:
        _ = client.head_object(bucket=parsed_s3_path[0], key=parsed_s3_path[1])
    except exceptions.ClientError as e:
        if e.response["Error"]["Code"] != "404":
            raise e
    return True


def is_object_storage_path(path: str) -> bool:
    """Ascertain whether a path is in object storage

    Args:
        path (str): The path

    Returns:
        bool: True if the path is in object storage (s3:// or msc://), False otherwise
    """
    return _is_s3_path(path) or _is_msc_path(path)


def get_index_cache_path(idx_path: str, object_storage_config: ObjectStorageConfig) -> str:
    """Get the index cache path for the given path

    Args:
        idx_path (str): The path to the index file

        object_storage_config (ObjectStorageConfig): The object storage config

    Returns:
        str: The index cache path
    """
    if _is_s3_path(idx_path):
        cache_idx_path = os.path.join(
            object_storage_config.path_to_idx_cache, _remove_s3_prefix(idx_path)
        )
    elif _is_msc_path(idx_path):
        cache_idx_path = os.path.join(
            object_storage_config.path_to_idx_cache, _remove_msc_prefix(idx_path)
        )
    else:
        raise ValueError(f"Invalid path: {idx_path}")

    return cache_idx_path


def parse_s3_path(path: str) -> Tuple[str, str]:
    """Parses the given S3 path returning correspsonding bucket and key.

    Args:
        path (str): The S3 path

    Returns:
        Tuple[str, str]: A (bucket, key) tuple
    """
    assert _is_s3_path(path)
    parts = path.replace(S3_PREFIX, "").split("/")
    bucket = parts[0]
    if len(parts) > 1:
        key = "/".join(parts[1:])
        assert S3_PREFIX + bucket + "/" + key == path
    else:
        key = ""
    return bucket, key


def get_object_storage_access(path: str) -> str:
    """Get the object storage access"""
    return "s3" if _is_s3_path(path) else "msc"


def dataset_exists(path_prefix: str, idx_path: str, bin_path: str) -> bool:
    """Check if the dataset exists on object storage

    Args:
        path_prefix (str): The prefix to the index (.idx) and data (.bin) files

        idx_path (str): The path to the index file

        bin_path (str): The path to the data file

    Returns:
        bool: True if the dataset exists on object storage, False otherwise
    """
    if _is_s3_path(path_prefix):
        s3_client = boto3.client("s3")
        return _s3_object_exists(s3_client, idx_path) and _s3_object_exists(s3_client, bin_path)
    elif _is_msc_path(path_prefix):
        msc = MultiStorageClientFeature.import_package()
        return msc.exists(idx_path) and msc.exists(bin_path)
    else:
        raise ValueError(f"Invalid path: {path_prefix}")


def cache_index_file(remote_path: str, local_path: str) -> None:
    """Download a file from object storage to a local path with distributed training support.
    The download only happens on Rank 0, and other ranks will wait for the file to be available.

    Note that this function does not include any barrier synchronization. The caller (typically
    in blended_megatron_dataset_builder.py) is responsible for ensuring proper synchronization
    between ranks using torch.distributed.barrier() after this function returns.

    Args:
        remote_path (str): The URL of the file to download (e.g., s3://bucket/path/file.idx
            or msc://profile/path/file.idx)
        local_path (str): The local destination path where the file should be saved

    Raises:
        ValueError: If the remote_path is not a valid S3 or MSC path
    """
    torch_dist_enabled = torch.distributed.is_initialized()

    if torch_dist_enabled:
        rank = torch.distributed.get_rank()
    else:
        rank = 0

    if _is_s3_path(remote_path):
        s3_client = boto3.client("s3")

        if not torch_dist_enabled or rank == 0:
            _s3_download_file(s3_client, remote_path, local_path)

        assert os.path.exists(local_path)
    elif _is_msc_path(remote_path):
        msc = MultiStorageClientFeature.import_package()

        if not torch_dist_enabled or rank == 0:
            msc.download_file(remote_path, local_path)

        assert os.path.exists(local_path)
    else:
        raise ValueError(f"Invalid path: {remote_path}")


================================================
FILE: megatron/core/datasets/readme.md
================================================
# Data Pipeline

## Data pre-processing

Data preprocessing is built around the following classes:

1. `IndexedDatasetBuilder`
2. `IndexedDataset`

At the moment, an end-to-end data preprocessing implementation is left to the user. See the class docstring(s) for more details.

### IndexedDatasetBuilder

The `IndexedDatasetBuilder` is capable of building and merging `IndexedDataset` instances.

### IndexedDataset

The `IndexedDataset` class is the lowest-level data interface in Megatron Core. Internally, an `IndexedDataset` instance references two binaries: the data file (`.bin`) contains document/sequence data and the index file (`.idx`) contains document/sequence metadata.

The index file stores dataset-level metadata first:
- The index header, for backward compatibility
- The index version, for backward compatibility
- A numeric code corresponding to the data type used to write data to the data file
- The number of sequences in the dataset
- The number of documents in the dataset

The index file stores document-level and sequence-level metadata second:
- In order, the number of elements per sequence
- In order, the byte offset (pointer) per sequence
- In order, the consecutive sequence index range `[...)` per document
- In order, the mode per sequence (in the multimodal case)

## Data loading: construction

Building the data loaders is a distributed-aware process built around the following classes:

1. `BlendedMegatronDatasetConfig`
2. `BlendedMegatronDatasetBuilder`
3. `IndexedDataset`
3. `MegatronDataset`
4. `BlendedDataset`

See the class docstrings for more details.

### BlendedMegatronDatasetConfig (extendable)

The `BlendedMegatronDatasetConfig` class parameterizes the `BlendedMegatronDatasetBuilder` and in turn the `MegatronDataset` and `BlendedDataset`.

Different training/inference regimes will require different extensions e.g. the `GPTDatasetConfig`

### BlendedMegatronDatasetBuilder

The `BlendedMegatronDatasetBuilder` class builds the highest-level data interfaces in Megatron Core.

**NB:** All ranks should attempt to build the dataset via the `BlendedMegatronDatasetBuilder` or the program will hang. Which ranks follow through on their attempts can be controlled via the `BlendedMegatronDatasetConfig`.

### IndexedDataset

The `IndexedDataset` class is the lowest-level data interface in Megatron Core.

The `IndexedDataset` should already exist on disk before attempting to build any of the high-level data interfaces.


### MegatronDataset (extendable)

The `MegatronDataset` abstract class is a high-level data interface in Megatron Core. It is an abstraction built upon the `IndexedDataset`.

Different training/inference regimes will require different extensions e.g. the `GPTDataset`

### BlendedDataset

The `BlendedDataset` class is a high-level data interface in Megatron Core. It is an abstraction built upon the `MegatronDataset`.

The `BlendedDataset` is only necessary when a blend multiple data distributions, i.e. multiple `MegatronDataset` instances, should contribute to a certain dataset split. The blend can be controlled via the `BlendedMegatronDatasetConfig`.

## Data loading: implementation

### GPTDataset

The `GPTDataset` is parameterized by the following variables: the underlying `IndexedDataset` instance `indexed_dataset`, the split indices `indexed_indices` (the congituous subset of document or sequence indices used for training, validation, and testing), the number of samples `N`, the sequence length `S`, and the random seed `R`.

The `GPTDataset` creates three index mappings to facilitate lookup: (1) the document index, (2) the sample index, and (3) the shuffle index.

1. The document index _Do_idx_ is a 1-D array mapping from _i_ to document index of length `E * |indexed_indices|` where `E` corresponds to the minimum number of epochs such that `E * |indexed_indices| >= N`. The document index is shuffled according to `R`.

    ```
    Given:

    N = 15
    indexed_indices = [5, 6, 7, 8, 9]
    E = 3

    Then, for example:

    Do_idx = [8, 8, 9, 6, 7, 5, 8, 5, 6, 6, 5, 9, 7, 7, 9]
    ```

2. The sample index _Sa_idx_ is a 2-D array mapping from _j_ to pairs of (_i_, _Do_idx_[ _i_ ] offset) of shape `[N + 1, 2]`. The rows _j_ and _j_ + 1 serve as the left and right bounds for the _j_-th sample. 

    ```
    Given:

    S = 1024

    Then, for example:

    Sa_idx[0] = (0, 0)
    Sa_idx[1] = (0, 1024)       => Do_idx[0] has length greater than S
    Sa_idx[2] = (1, 512)        => Do_idx[0] has length 1536
    Sa_idx[3] = (2, 0)          => Do_idx[1] has length 1536
    Sa_idx[4] = (5, 300)        => Do_idx[2:5] are shorter documents relative to Do_idx[0:2]
    Sa_idx[5] = (6, 24)         => Do_idx[5] has length 1300
    ```

3. The shuffle index _Sh_idx_ is a 1-D array mapping from _k_ to _j_ of length `N`. The shuffle index is shuffled according to `R`.

    ```
    Given

    N = 10

    Then, for example:

    Sh_idx = [4, 0, 2, 6, 1, 9, 5, 8, 7, 3]
    ```

To query the `GPTDataset` for the _k_-th sample we do the following

-  Use the shuffle index to get the index _j_ into the sample index.

    ```
    j = Sh_idx[k]
    ```
- Use the sample index to get the left and right sample-bounding indices into the document index and the starting token offset for each document.

    ```
    i, offset = Sa_idx[j]
    i_next, offset_next = Sa_idx[j + 1]
    ```
- Use the document index to retrieve `S` tokens from consecutive (in the document index) documents.

    ```
    sample = []
    sample += indexed_dataset[Do_idx[i]][offset:]
    if i != i_next:
        sample += indexed_dataset[Do_idx[i + 1:i_next]]
    sample += indexed_dataset[Do_idx[i_next]][:offset_next]
    ```

To save time during initialization, each index is built/cached sequentially on one process rank and subsequently loaded in parallel on other process ranks. The cached indices are unique to a hash generated in the `MegatronDataset.__init__` function.

### BlendedDataset

The `BlendedDataset` is parameterized by the following variables: the underlying `MegatronDataset` instances `D`, the weights `W` (one per dataset), and the size `S`. The `BlendedDataset` will draw samples from contributing datasets in proportion to the weights until achieving a composite dataset of the desired size. During each sampling step, we draw a single sample from the dataset which has the greatest sampling error.

The `BlendedDataset` creates two "blending" indices to facilitate lookup: (1) the dataset index and (2) the dataset sample index.

1. The dataset index _Da_idx_ is a 1-D array mapping from _i_ to dataset index of length `S`.

    ```
    Given

    D = [d0, d1, d2]
    W = [1/2, 1/4, 1/4]
    S = 4

    Then, for example:

    Da_idx = [0, 1, 2, 0]

    ```

2. The dataset sample index _Sa_idx_ is a 1-D mapping from _i_ to the sample index for dataset _Da_idx[i]_ of length `S`.

    ```
    Given

    Da_idx = [0, 1, 2, 0]

    Then, for example:

    Sa_idx = [0, 0, 0, 1]
    ```

To query the `BlendedDataset` for the _k_-th sample we do the following

- Use the dataset index to retrieve the corresponding dataset from `D` and the dataset sample index to retrieve the corresponding sample from that dataset.

    ```
    sample = D[Da_idx[k]][Sa_idx[k]]
    ```

To save time during initialization, each index is built/cached sequentially on one process rank and subsequently loaded in parallel on other process ranks. The cached indices are unique to a hash generated in the `BlendedDataset.__init__` function.

## Fast DataLoader initialization

Especially for large-scale runs, DataLoader initialization can take several minutes, since it involves opening and memory-mapping multiple files and can significantly stress the filesystem. To speed up this process, we have developed the following three optimizations, controlled by configuration flags":

  - `--dataloader-fast-cache-load`: This option assumes that the dataset cache already exists in the specified `--data-cache-path`. When enabled, it speeds up the creation process by removing synchronization points and file check assertions.

  - `--dataloader-defer-npy-index-mmap`: This option also assumes that the dataset cache already exists in the specified `--data-cache-path`. When enabled, it defers the memory mapping of the dataset indexes (.npy files) until their first access. We recommend using this configuration together with `--num-workers` > 0 so that the DataLoader prefetches the next batches of data, thereby hiding the cost of index memory mapping.

  - `--per-dataset-sequences-path`: With this configuration, we specify the JSON file generated by the `tools/build_sequences_per_dataset.py` script. This script generates a single file containing the required metadata from all the specified file prefixes. This configuration is especially useful when dealing with hundreds to thousands of file prefixes, since it requires only a single `open` operation instead of one per file prefix.

================================================
FILE: megatron/core/datasets/t5_dataset.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import os
from collections import deque
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Union

import numpy
import torch
from packaging.version import Version as PkgVersion

from megatron.core.datasets.indexed_dataset import IndexedDataset
from megatron.core.datasets.masked_dataset import (
    MaskedWordPieceDataset,
    MaskedWordPieceDatasetConfig,
)
from megatron.core.datasets.utils import Split
from megatron.core.utils import get_te_version


@dataclass
class T5MaskedWordPieceDatasetConfig(MaskedWordPieceDatasetConfig):
    """Configuration object for Megatron Core T5 WordPiece datasets

    NB: As a temporary holdover from Megatron-LM. The T5 tokenizer has an attribute which defines
    a number of special sentinel tokens used during sampling. The assert in __post_init__ serves to
    preserve compatibility with Megatron-LM until the T5 tokenizer is in Megatron Core.
    """

    sequence_length_encoder: Optional[int] = field(init=False, default=None)
    """A sequence_length alias and the sequence length for the encoder"""

    sequence_length_decoder: int = None
    """The sequence length for the decoder"""

    def __post_init__(self) -> None:
        """Do asserts and set fields post init"""
        super().__post_init__()

        self.sequence_length_encoder = self.sequence_length

        assert self.sequence_length_encoder is not None
        assert self.sequence_length_decoder is not None

        assert len(self.tokenizer.additional_special_tokens_ids) > 0


class T5MaskedWordPieceDataset(MaskedWordPieceDataset):
    """The T5 dataset that assumes WordPiece tokenization

    Args:
        indexed_dataset (IndexedDataset): The IndexedDataset around
            which to build the MegatronDataset

        dataset_path (str): The real path on disk to the dataset, for bookkeeping

        indexed_indices (numpy.ndarray): The set of the documents indices to expose

        num_samples (Optional[int]): The number of samples to draw from the indexed
            dataset. When None, build as many samples as correspond to one epoch.

        index_split (Split): The indexed_indices Split

        config (T5MaskedWordPieceDatasetConfig): The config
    """

    def __init__(
        self,
        indexed_dataset: IndexedDataset,
        dataset_path: str,
        indexed_indices: numpy.ndarray,
        num_samples: Optional[int],
        index_split: Split,
        config: T5MaskedWordPieceDatasetConfig,
    ) -> None:
        super().__init__(
            indexed_dataset, dataset_path, indexed_indices, num_samples, index_split, config
        )

        self.token_lookup = list(self.config.tokenizer.inv_vocab.keys())
        # Account for the single <bos> and single <eos> token ids
        self.sample_index = self._build_sample_index(self.config.sequence_length - 2, 1)

    @staticmethod
    def _key_config_attributes() -> List[str]:
        """Inherited method implementation

        Returns:
            List[str]: The key config attributes
        """
        return super(
            T5MaskedWordPieceDataset, T5MaskedWordPieceDataset
        )._key_config_attributes() + ["sequence_length_decoder"]

    @staticmethod
    def _build_b1ss_attention_mask(
        source_block: torch.tensor, target_block: torch.tensor, make_history_mask: bool = False
    ) -> torch.tensor:
        """Build an attention-mask having shape (bs, 1, q_len, kv_len)
        from source_block and target_block

        Args:
            source_block (torch.tensor): A 2-D array of tokens (bs, q_len)
            target_block (torch.tensor): A 2-D array of tokens (bs, kv_len)
            make_history_mask (bool): Whether to turn mask into causal mask

        Returns:
            torch.tensor: The 4-D attention mask (bs, 1, q_len, kv_len)
        """
        batch_size = source_block.shape[0]
        attention_mask = []
        for i in range(batch_size):
            source_sample = source_block[i]
            target_sample = target_block[i]
            mask = (target_sample[None, :] >= 1) * (source_sample[:, None] >= 1)
            if make_history_mask:
                arange = numpy.arange(source_sample.shape[0])
                history_mask = arange[None,] <= arange[:, None]
                history_mask = torch.tensor(history_mask).to(mask.device)
                mask = mask * history_mask
            mask = ~(mask)  # flip True to False
            attention_mask.append(mask)
        attention_mask = torch.stack(attention_mask)
        attention_mask = attention_mask.unsqueeze(1)
        return attention_mask

    @staticmethod
    def config_attention_mask(
        encoder_tokens: torch.tensor,
        decoder_tokens: torch.tensor,
        encoder_mask: torch.tensor,
        decoder_mask: torch.tensor,
        use_local: bool = False,
        test_te_version: str = None,
    ) -> torch.tensor:
        """Config attention-mask for encoder_mask, decoder_mask, encoder_decoder_mask
        conditioned on transformer-implementation (e.g. TE vs local), TE versions,
        and TE backends

        Args:
            encoder_tokens (torch.tensor): A 2-D array of tokens (bs, kv_len)
            decoder_tokens (torch.tensor): A 2-D array of tokens (bs, q_len)
            encoder_mask (torch.tensor): A 2-D array of tokens (bs, kv_len)
            decoder_mask (torch.tensor): A 2-D array of tokens (bs, q_len)
            use_local (bool): Whether the current T5 model uses local (vs TE)
                transformer implmentation
            test_te_version (str): The Transformer Engine version to test against. Defaults to None.

        Returns:
            Configured encoder_mask, decoder_mask, encoder_decoder_mask
            torch.tensor: configured encoder attention mask
            torch.tensor: configured decoder attention mask
            torch.tensor: configured encoder-decoder attention mask
        """
        # If using local transformer implementation (not transformer_engine):
        # re-organize all attention masks, because local and transformer_engine
        # backbones use different masks shapes. E.g.:
        # (local: b1ss - transformer_engine: b11s)
        if use_local:
            encoder_mask = T5MaskedWordPieceDataset._build_b1ss_attention_mask(
                encoder_tokens, encoder_tokens
            )
            decoder_mask = T5MaskedWordPieceDataset._build_b1ss_attention_mask(
                decoder_tokens, decoder_tokens, make_history_mask=True
            )
            encoder_decoder_mask = T5MaskedWordPieceDataset._build_b1ss_attention_mask(
                decoder_tokens, encoder_tokens
            )

        else:
            # If using transformer_engine transformer implementation:
            # 1. For TE version >= 1.10, across all 3 backends,
            #    The padding mask is configued as
            #    [bs, 1, 1, seq_len] for self-attention and
            #    ([bs, 1, 1, q_len], [bs, 1, 1, kv_len]) for cross-attention
            # 2. For TE version >=1.7 and <1.10, when using Non-fused backend,
            #    The padding mask is configued as
            #    [bs, 1, q_len, kv_len] for both self-attention and for cross-attention
            # 3. For TE version <1.7, only support Non-fused backend
            #    The padding mask is configued as
            #    [bs, 1, q_len, kv_len] for both self-attention and for cross-attention

            # Process for Flash/Fused
            encoder_mask = encoder_mask.unsqueeze(1).unsqueeze(1)
            decoder_mask = decoder_mask.unsqueeze(1).unsqueeze(1)
            encoder_decoder_mask = (decoder_mask, encoder_mask)
            # set decoder_mask to None because decoder uses AttnMaskType.causal
            decoder_mask = None

            # get TE version, using test TE version if not None
            if test_te_version is not None:
                te_version = PkgVersion(test_te_version)
            else:
                te_version = get_te_version()

            # Check for older TE version than 1.10, adjust attention mask accordingly
            flash_attention_enabled = os.getenv("NVTE_FLASH_ATTN") == "1"
            fused_attention_enabled = os.getenv("NVTE_FUSED_ATTN") == "1"
            if (te_version < PkgVersion("1.10.0")) and (te_version >= PkgVersion("1.7.0")):
                if not (flash_attention_enabled) and not (fused_attention_enabled):
                    encoder_mask = T5MaskedWordPieceDataset._build_b1ss_attention_mask(
                        encoder_tokens, encoder_tokens
                    )
                    encoder_decoder_mask = T5MaskedWordPieceDataset._build_b1ss_attention_mask(
                        decoder_tokens, encoder_tokens
                    )
                else:
                    pass
            elif te_version < PkgVersion("1.7.0"):
                if not (flash_attention_enabled) and not (fused_attention_enabled):
                    encoder_mask = T5MaskedWordPieceDataset._build_b1ss_attention_mask(
                        encoder_tokens, encoder_tokens
                    )
                    encoder_decoder_mask = T5MaskedWordPieceDataset._build_b1ss_attention_mask(
                        decoder_tokens, encoder_tokens
                    )
                else:
                    assert not flash_attention_enabled and not fused_attention_enabled, (
                        "Flash and fused attention is not supported with transformer "
                        "engine version < 1.7. Set NVTE_FLASH_ATTN=0 and NVTE_FUSED_ATTN=0"
                        "or upgrade transformer engine >= 1.7"
                    )
        return encoder_mask, decoder_mask, encoder_decoder_mask

    def __getitem__(self, idx: int) -> Dict[str, Union[int, numpy.ndarray]]:
        """Abstract method implementation

        Args:
            idx (int): The index into the dataset

        Returns:
            Dict[str, Union[int, numpy.ndarray]]: The sample data including encoder input, decoder
                input/output, and masks.
        """
        idx_beg, idx_end, target_sequence_length = self.sample_index[idx]
        sample = [self.dataset[i] for i in range(idx_beg, idx_end)]

        numpy_random_state = numpy.random.RandomState(seed=(self.config.random_seed + idx) % 2**32)

        assert target_sequence_length <= self.config.sequence_length

        # Flatten the sample into a list of tokens
        tokens = [token for sentence in sample for token in sentence]

        # Truncate the list of tokens to a desired length
        truncated = len(tokens) > target_sequence_length
        tokens = tokens[:target_sequence_length]

        # Masking
        (tokens, _, _, _, masked_spans) = self._create_masked_lm_predictions(
            tokens, target_sequence_length, numpy_random_state
        )

        # Prepare the encoder input and decoder input and output
        sentinels = deque(self.config.tokenizer.additional_special_tokens_ids)
        encoder_input = []
        decoder_input = [self.config.tokenizer.bos]
        decoder_output = []
        idx_beg = 0
        for indices, labels in masked_spans:
            sentinel = sentinels.popleft()

            # set the end index
            idx_end = indices[0]

            encoder_input.extend(tokens[idx_beg:idx_end])
            encoder_input.append(sentinel)

            decoder_input.append(sentinel)
            decoder_input.extend(labels)

            decoder_output.append(sentinel)
            decoder_output.extend(labels)

            # set the start index
            idx_beg = indices[-1] + 1

        encoder_input.extend(tokens[idx_beg:])
        decoder_output.append(self.config.tokenizer.eos)

        # Pad the sequences and convert to NumPy
        length_toks_encoder = len(encoder_input)
        length_toks_decoder = len(decoder_input)
        length_pads_encoder = self.config.sequence_length_encoder - length_toks_encoder
        length_pads_decoder = self.config.sequence_length_decoder - length_toks_decoder
        assert length_pads_encoder >= 0
        assert length_pads_decoder >= 0

        encoder_input = numpy.array(encoder_input, dtype=numpy.int64)
        encoder_input = numpy.pad(
            encoder_input, (0, length_pads_encoder), constant_values=self._pad_token_id
        )

        decoder_input = numpy.array(decoder_input, dtype=numpy.int64)
        decoder_input = numpy.pad(
            decoder_input, (0, length_pads_decoder), constant_values=self._pad_token_id
        )

        # Create attention and history masks
        mask_encoder = numpy.ones(self.config.sequence_length_encoder, dtype=numpy.int64)
        mask_encoder[encoder_input == self._pad_token_id] = 0
        mask_decoder = numpy.ones(self.config.sequence_length_decoder, dtype=numpy.int64)
        mask_decoder[decoder_input == self._pad_token_id] = 0
        mask_encoder_decoder = None

        # Mask the labels
        decoder_output = numpy.array(decoder_output, dtype=numpy.int64)
        decoder_output = numpy.pad(decoder_output, (0, length_pads_decoder), constant_values=-1)

        # Get the loss mask
        loss_mask = numpy.zeros(self.config.sequence_length_decoder, dtype=numpy.int64)
        loss_mask[:length_toks_decoder] = 1

        # For padded sequences, ensure the embedding layer can map the token ID
        encoder_input[encoder_input == self._pad_token_id] = 0
        decoder_input[decoder_input == self._pad_token_id] = 0
        decoder_output[decoder_output == self._pad_token_id] = 0

        return {
            "text_enc": encoder_input,
            "text_dec": decoder_input,
            "labels": decoder_output,
            "loss_mask": loss_mask,
            "truncated": int(truncated),
            "enc_mask": mask_encoder,
            "dec_mask": mask_decoder,
        }

    def _get_token_mask(self, numpy_random_state: numpy.random.RandomState) -> int:
        """Abstract method implementation

        100% of the time, replace the token id with mask token id.

        Args:
            numpy_random_state (RandomState): The NumPy random state

        Returns:
            int: The mask token id
        """
        return self.config.tokenizer.mask


================================================
FILE: megatron/core/datasets/utils.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

import logging
from enum import Enum
from typing import List, Optional, Tuple

import numpy

from ..utils import log_single_rank

logger = logging.getLogger(__name__)


class Split(Enum):
    train = 0
    valid = 1
    test = 2


def compile_helpers():
    """Compile C++ helper functions at runtime. Make sure this is invoked on a single process."""
    import os
    import subprocess

    command = ["make", "-C", os.path.abspath(os.path.dirname(__file__))]
    if subprocess.run(command).returncode != 0:
        import sys

        log_single_rank(logger, logging.ERROR, "Failed to compile the C++ dataset helper functions")
        sys.exit(1)


def normalize(weights: List[float]) -> List[float]:
    """Do non-exponentiated normalization

    Args:
        weights (List[float]): The weights

    Returns:
        List[float]: The normalized weights
    """

    w = numpy.array(weights, dtype=numpy.float64)
    w_sum = numpy.sum(w)
    w = (w / w_sum).tolist()
    return w


def get_blend_from_list(
    blend: Optional[List[str]],
) -> Optional[Tuple[List[str], Optional[List[float]]]]:
    # pylint: disable=line-too-long
    """Get the blended_megatron_dataset_config.BlendedMegatronDatasetConfig blend
    from the blend list

    Args:
        blend (Optional[List[str]]): The blend list, which can be either
            (1) a list of prefixes, e.g. ["path/to/dataset_1_prefix", "path/to/dataset_2_prefix"], or
            (2) a flattened, zipped list of weights and prefixes, e.g. ["30", "path/to/dataset_1_prefix", "70", "path/to/dataset_2_prefix"]

    Returns:
        Optional[Tuple[List[str], Optional[List[float]]]]: The blend, consisting of a list of dataset prefixes and optionally a list of dataset weights, e.g. [["path/to/dataset_1_prefix", "path/to/dataset_2_prefix"], [30.0, 70.0]].
    """
    # pylint: enable=line-too-long
    if blend is None:
        return None

    if len(blend) % 2 == 1:
        weight_per_dataset = None
        raw_prefix_per_dataset = blend
    else:
        raw_weight_per_dataset, raw_prefix_per_dataset = zip(
            *[(blend[i], blend[i + 1]) for i in range(0, len(blend), 2)]
        )

        weight_per_dataset = []
        for rwpd in raw_weight_per_dataset:
            try:
                weight = float(rwpd)
            except ValueError:
                weight = None
            weight_per_dataset.append(weight)

        is_none = map(lambda _: _ is None, weight_per_dataset)
        if any(is_none):
            assert all(is_none)
            weight_per_dataset = None
            raw_prefix_per_dataset = blend

    prefix_per_dataset = [rppd.strip() for rppd in raw_prefix_per_dataset]

    return prefix_per_dataset, weight_per_dataset


================================================
FILE: megatron/core/datasets/utils_s3.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from megatron.core.datasets.object_storage_utils import (  # pylint: disable=unused-import
    S3_PREFIX,
    S3Client,
)


================================================
FILE: megatron/core/dist_checkpointing/__init__.py
================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.

from .core import check_is_distributed_checkpoint
from .mapping import LocalNonpersistentObject, ShardedObject, ShardedTensor
from .serialization import (
    load,
    load_common_state_dict,
    load_content_metadata,
    load_plain_tensors,
    load_tensors_metadata,
    remove_sharded_tensors,
    save,
)


================================================
FILE: megatron/core/dist_checkpointing/core.py
================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.

""" Module for managing distributed checkpoints metadata. """

import json
import os
from dataclasses import asdict, dataclass
from typing import Optional

from megatron.core.msc_utils import MultiStorageClientFeature

CONFIG_FNAME = 'metadata.json'


class CheckpointingException(Exception):
    """Base checkpointing related exception"""

    pass


@dataclass
class CheckpointingConfig:
    """Documents backends used in the checkpoint.

    Checkpoint config keeps track of formats used for storing the sharded tensors
    (sharded_backend) and other objects (common_backend).

    Note that versioning is not for the checkpoint content (which is application specific),
    but for the checkpoint format itself.
    """

    sharded_backend: str
    sharded_backend_version: int = 1
    common_backend: str = 'torch'
    common_backend_version: int = 1


def check_is_distributed_checkpoint(checkpoint_dir):
    """Checks if `metadata.json` exists in the checkpoint and is a valid config.

    Args:
        checkpoint_dir: checkpoint directory

    Returns:
        bool: True if `metadata.json` exists in the checkpoint and is a valid config.
    """
    return maybe_load_config(checkpoint_dir) is not None


def maybe_load_config(checkpoint_dir: str) -> Optional[CheckpointingConfig]:
    """Returns checkpoint config if `checkpoint_dir` is a distributed checkpoint and None otherwise

    Args:
        checkpoint_dir: checkpoint directory

    Returns:
        CheckpointingConfig (optional): None if checkpoint is not a valid distributed checkpoint
    """
    config_path = os.path.join(checkpoint_dir, CONFIG_FNAME)
    if checkpoint_dir:
        if MultiStorageClientFeature.is_enabled():
            msc = MultiStorageClientFeature.import_package()
            if not msc.os.path.exists(config_path):
                return None
            with msc.open(config_path) as f:
                config_dict = json.load(f)
        else:
            if not os.path.exists(config_path):
                return None
            with open(config_path) as f:
                config_dict = json.load(f)
        return CheckpointingConfig(**config_dict)
    return None


def save_config(config: CheckpointingConfig, checkpoint_dir: str):
    """Save given config to checkpoint directory.

    Args:
        config: checkpoint config
        checkpoint_dir: checkpoint directory

    Returns:
        None
    """
    config_path = os.path.join(checkpoint_dir, CONFIG_FNAME)
    if MultiStorageClientFeature.is_enabled():
        msc = MultiStorageClientFeature.import_package()
        with msc.open(config_path, 'w') as f:
            json.dump(asdict(config), f)
    else:
        with open(config_path, 'w') as f:
            json.dump(asdict(config), f)


================================================
FILE: megatron/core/dist_checkpointing/dict_utils.py
================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.

"""Utilities for operating with dicts and lists.

All functions in this module handle nesting of dicts and lists.
Other objects (e.g. tuples) are treated as atomic leaf types that cannot be traversed.
"""

from collections import defaultdict
from typing import Any, Callable, Dict, Iterable, List, Tuple, TypeVar, Union

import numpy as np
import torch

U, V = TypeVar("U"), TypeVar("V")


def extract_matching_values(
    x: Union[dict, list], predicate: Callable[[Any], bool], return_lists_as_dicts: bool = False
) -> Tuple[Union[dict, list], Union[dict, list]]:
    """Return matching and nonmatching values. Keeps hierarchy.

    Args:
        x (Union[dict, list]) : state dict to process. Top-level argument must be a dict or list
        predicate (object -> bool): determines matching values
        return_lists_as_dicts (bool): if True, matching lists will be turned
            into dicts, with keys indicating the indices of original elements.
            Useful for reconstructing the original hierarchy.
    """

    def _set_elem(target, k, v):
        if return_lists_as_dicts:
            target[k] = v
        else:
            target.append(v)

    if isinstance(x, dict):
        matching_vals = {}
        nonmatching_vals = {}
        for k, v in x.items():
            if isinstance(v, (list, dict)):
                match, nonmatch = extract_matching_values(v, predicate, return_lists_as_dicts)
                if match:
                    matching_vals[k] = match
                if nonmatch or not v:
                    nonmatching_vals[k] = nonmatch
            elif predicate(v):
                matching_vals[k] = v
            else:
                nonmatching_vals[k] = v
    elif isinstance(x, list):  # type: ignore
        matching_vals = {} if return_lists_as_dicts else []
        nonmatching_vals = {} if return_lists_as_dicts else []
        for ind, v in enumerate(x):
            if isinstance(v, (list, dict)) and v:
                match, nonmatch = extract_matching_values(v, predicate, return_lists_as_dicts)
                if match:
                    _set_elem(matching_vals, ind, match)
                if nonmatch or not v:
                    _set_elem(nonmatching_vals, ind, nonmatch)
            else:
                target = matching_vals if predicate(v) else nonmatching_vals
                _set_elem(target, ind, v)
    else:
        raise ValueError(f"Unexpected top-level object type: {type(x)}")
    return matching_vals, nonmatching_vals


def diff(x1: Any, x2: Any, prefix: Tuple = ()) -> Tuple[list, list, list]:
    """Recursive diff of dicts.

    Args:
        x1 (object): left dict
        x2 (object): right dict
        prefix (tuple): tracks recursive calls. Used for reporting differing keys.

    Returns:
        Tuple[list, list, list]: tuple of:
            - only_left: Prefixes present only in left dict
            - only_right: Prefixes present only in right dict
            - mismatch: values present in both dicts but not equal across dicts.
                For tensors equality of all elems is checked.
                Each element is a tuple (prefix, type of left value, type of right value).
    """
    mismatch = []
    if isinstance(x1, dict) and isinstance(x2, dict):
        only_left = [prefix + (k,) for k in x1.keys() - x2.keys()]
        only_right = [prefix + (k,) for k in x2.keys() - x1.keys()]
        for k in x2.keys() & x1.keys():
            _left, _right, _mismatch = diff(x1[k], x2[k], prefix + (k,))
            only_left.extend(_left)
            only_right.extend(_right)
            mismatch.extend(_mismatch)
    elif isinstance(x1, list) or isinstance(x1, tuple) or isinstance(x1, np.ndarray):
        assert type(x1) == type(x2)
        only_left = list(range(len(x1) - 1, len(x2) - 1, -1))
        only_right = list(range(len(x2) - 1, len(x1) - 1, -1))
        for i, (v1, v2) in enumerate(zip(x1, x2)):
            _left, _right, _mismatch = diff(v1, v2, prefix + (i,))
            only_left.extend(_left)
            only_right.extend(_right)
            mismatch.extend(_mismatch)
    else:
        only_left = []
        only_right = []
        mismatch_debug_data = [prefix, type(x1), type(x2)]
        if isinstance(x1, torch.Tensor) and isinstance(x2, torch.Tensor):
            try:
                if x1.device != x2.device:
                    _is_mismatch = not torch.all(x1.cpu() == x2.cpu())
                else:
                    _is_mismatch = not torch.all(x1 == x2)
                mismatch_debug_data.extend(
                    [(x1 != x2).sum(), (x1 != x2).shape, (x1 != x2).nonzero().tolist()]
                )
            except (RuntimeError, TypeError, ValueError):
                _is_mismatch = True
                mismatch_debug_data.extend([x1.shape, x2.shape])
        # TODO: change with concrete type that has both replica_id and data attrs
        elif hasattr(x1, "replica_id") and hasattr(x2, "replica_id"):
            assert type(x1) == type(x2)
            only_left, only_right, mismatch = diff(
                x1.data, x2.data, prefix + (type(x1),)
            )  # type: ignore
            _is_mismatch = False
        else:
            try:
                _is_mismatch = bool(x1 != x2)
            except RuntimeError:
                _is_mismatch = True

        if _is_mismatch:
            mismatch.append(tuple(mismatch_debug_data))

    return only_left, only_right, mismatch


def inspect_types(x: Any, prefix: Tuple = (), indent: int = 4):
    """Helper to print types of (nested) dict values."""
    print_indent = lambda: print(" " * indent * len(prefix), end="")
    if isinstance(x, dict):
        print()
        for k, v in x.items():
            print_indent()
            print(f"> {k}: ", end="")
            inspect_types(v, prefix + (k,), indent)
    elif isinstance(x, list):
        print()
        for i, v in enumerate(x):
            print_indent()
            print(f"- {i}: ", end="")
            inspect_types(v, prefix + (i,), indent)
    else:
        if isinstance(x, torch.Tensor):
            print(f"Tensor of shape {x.shape}")
        else:
            try:
                x_str = str(x)
            except:
                x_str = "<no string repr>"
            if len(x_str) > 30:
                x_str = x_str[:30] + "... (truncated)"
            print(f"[{type(x)}]: {x_str}")


def nested_values(x: Union[dict, list]):
    """Returns iterator over (nested) values of a given dict or list."""
    x_iter = x.values() if isinstance(x, dict) else x
    for v in x_iter:
        if isinstance(v, (dict, list)):
            yield from nested_values(v)
        else:
            yield v


def nested_items_iter(x: Union[dict, list]):
    """Returns iterator over (nested) tuples (container, key, value) of a given dict or list."""
    x_iter = x.items() if isinstance(x, dict) else enumerate(x)
    for k, v in x_iter:
        if isinstance(v, (dict, list)):
            yield from nested_items_iter(v)
        else:
            yield x, k, v


def dict_map(f: Callable, d: dict):
    """`map` equivalent for dicts."""
    for sub_d, k, v in nested_items_iter(d):
        sub_d[k] = f(v)


def dict_map_with_key(f: Callable, d: dict):
    """`map` equivalent for dicts with a function that accepts tuple (key, value)."""
    for sub_d, k, v in nested_items_iter(d):
        sub_d[k] = f(k, v)


def dict_list_map_inplace(f: Callable[[U], V], x: Union[Dict, List, U]):
    """Maps dicts and lists *in-place* with a given function."""
    if isinstance(x, dict):
        for k, v in x.items():
            x[k] = dict_list_map_inplace(f, v)
    elif isinstance(x, list):
        x[:] = (dict_list_map_inplace(f, v) for v in x)
    else:
        return f(x)
    return x


def dict_list_map_outplace(f: Callable[[U], V], x: Union[Dict, List, U]) -> Union[Dict, List, V]:
    """Maps dicts and lists *out-of-place* with a given function."""
    if isinstance(x, dict):
        return {k: dict_list_map_outplace(f, v) for k, v in x.items()}
    elif isinstance(x, list):
        return [dict_list_map_outplace(f, v) for v in x]
    else:
        return f(x)


def merge(x1: Union[dict, list], x2: Union[dict, list], key: Tuple[Union[str, int], ...] = ()):
    """Merges dicts and lists recursively."""
    if isinstance(x1, dict) and isinstance(x2, dict):
        for k, v2 in x2.items():
            if k not in x1:
                x1[k] = v2
            else:
                x1[k] = merge(x1[k], v2, key=key + (k,))
    elif isinstance(x1, list) and isinstance(x2, list):
        if len(x1) != len(x2):
            raise ValueError(
                f"Cannot merge two lists with different lengths ({len(x1)} and {len(x2)}, "
                f"encountered at level {key})"
            )
        for i, v2 in enumerate(x2):
            x1[i] = merge(x1[i], v2, key=key + (i,))
    else:
        raise ValueError(
            f"Duplicate non-dict and non-list values encountered: `{x1}` and `{x2}` "
            f"(at level {key})"
        )
    return x1


def map_reduce(
    xs: Iterable,
    key_fn: Callable = lambda x: x,
    value_fn: Callable = lambda x: x,
    reduce_fn: Callable = lambda x: x,
) -> dict:
    """Simple map-reduce implementation following `more_itertools.map_reduce` interface."""
    res = defaultdict(list)
    for x in xs:
        res[key_fn(x)].append(value_fn(x))
    for k in res:
        res[k] = reduce_fn(res[k])
    return dict(res)


================================================
FILE: megatron/core/dist_checkpointing/exchange_utils.py
================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.

"""Utilities for exchanging data between ranks."""

import logging
from collections import defaultdict
from functools import reduce
from itertools import zip_longest
from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple, TypeVar, cast

import numpy as np
import torch

from ..utils import get_pg_rank, get_pg_size, log_single_rank
from .core import CheckpointingException
from .dict_utils import nested_values
from .mapping import ShardedStateDict, ShardedTensor, is_main_replica
from .utils import _sharded_tensor_shard_id, _ShardId, debug_time

# TODO: remove TE references once the TE bug is fixed
# Check if Transformer Engine has Float8Tensor class

try:
    from transformer_engine.pytorch.float8_tensor import Float8Tensor

    HAVE_TE_FLOAT8TENSOR = True
except (ImportError, ModuleNotFoundError):
    # Float8Tensor not found
    HAVE_TE_FLOAT8TENSOR = False


def is_float8tensor(tensor: torch.Tensor) -> bool:
    """Check if a tensor is a Transformer Engine Float8Tensor"""
    return HAVE_TE_FLOAT8TENSOR and isinstance(tensor, Float8Tensor)


logger = logging.getLogger(__name__)


class ShardDistribution(NamedTuple):
    """Represents a distribution of ShardedTensors.

    Given distribution is valid only for a specific parallelization group,
    which is implicit here (not referenced by this class).

    Args:
        main_rank_for_shard (Dict[_ShardId, int]): specifies which rank should hold
            the main replica for a given shard
        shards_in_this_group (Set[_ShardId]): which shards have a main replica
            in this parallelization group
        shard_to_metadata (Dict[_ShardId, ShardedTensor]): maps ShardedTensor
            identifier to the original ShardedTensor
        all_ranks_for_shard (Dict[_ShardId, List[int]]): specifies which ranks
            need a given shard in a given parallelization group
    """

    main_rank_for_shard: Dict[_ShardId, int]
    shards_in_this_group: Set[_ShardId]
    shard_to_metadata: Dict[_ShardId, ShardedTensor]
    all_ranks_for_shard: Dict[_ShardId, List[int]]


def _shard_size(sh_ten: ShardedTensor):
    """Returns size in bytes of a given sharded tensor."""
    numel = np.prod(sh_ten.local_shape)
    return numel * torch._utils._element_size(sh_ten.dtype)


def _get_empty_tensor_for_exchange(
    shard_id: _ShardId,
    needed_shards: Dict[_ShardId, ShardedTensor],
    unneeded_shards: Dict[_ShardId, ShardedTensor],
    loaded_tensors: Dict[_ShardId, torch.Tensor],
) -> Tuple[torch.Tensor, Optional[torch.device]]:
    """Determines the empty tensor to use for exchange.

    If shard_id is needed by this rank, it will be in the `unloaded_shards`.
    Otherwise, the metadata for this tensor can be found in `shard_to_metadata`

    Args:
        shard_id (_ShardId): shard_id that will be exchanged
        needed_shards (Dict[_ShardId, ShardedTensor]): mapping from shard ids
            to metadata for shards needed by this rank
        unneeded_shards (Dict[_ShardId, ShardedTensor]): mapping from shard ids
            to metadata for shards that can be discarded after exchange
        loaded_tensors (Dict[_ShardId, torch.Tensor]): mapping where useful tensors
            are placed in

    Returns:
        Tuple[torch.Tensor, Optional[torch.device]]: empty CUDA tensor to be exchanged,
            and the device of the original state dict tensor (if there was any)
    """
    local_unloaded_sh_ten = needed_shards.get(shard_id)
    if local_unloaded_sh_ten is None:
        orig_device = None  # this tensor will be discarded anyway
        sh_ten = unneeded_shards[shard_id]
        if sh_ten.data is None:
            sh_ten.init_data("cuda")
            tensor = sh_ten.data
            sh_ten.data = None  # won't be used. free memory
        else:
            tensor = sh_ten.data
            if tensor.device.type == "cpu":
                tensor = torch.empty_like(tensor, device="cuda")
    else:
        local_unloaded_sh_ten.init_data("cuda")
        orig_device = local_unloaded_sh_ten.data.device
        tensor = local_unloaded_sh_ten.data
        if tensor.device.type == "cpu":
            tensor = torch.empty_like(tensor, device="cuda")
        loaded_tensors[shard_id] = tensor
    return tensor, orig_device


T = TypeVar("T")


def distribute_shards_to_ranks(
    shard_to_ranks: Dict[T, List[int]],
    shard_to_size: Dict[T, int],
    num_ranks: int,
    cross_parallelization_group_loads: Set[T],
) -> Dict[T, int]:
    """Computes uniform distribution of workload across ranks, based on sizes.

    Currently, the assignment is greedy, based on:
    1. Cross-parallelization group dependencies (shards with main rank in another group
       are assigned at the end to make sure the distribution for load and save
       is as similar as possible).
    2. Secondly, the coverage of each shard
        (how many ranks the shard is available on; lower coverage is assigned first)
    3. Then, the size of each shard (larger size is assigned first)
    4. Finally, shard id for differentiation.

    Last step is added because we rely on the fact that
    the assignment is deterministic on all ranks.

    Args:
        shard_to_ranks (Dict[T, List[int]]): mapping of rank access to shards
        shard_to_size (Dict[T, int]): sizes of each shard
        num_ranks (int): number of ranks in the parallelization group
        cross_parallelization_group_loads (Set[T]): Shards to load that are not in the main replica

    Returns (Dict[T, int]): assignment of shard to rank (which rank should do the work
        to achieve maximal uniformity)
    """
    shard_to_ranks = {k: tuple(v) for k, v in shard_to_ranks.items()}
    shard_to_saving_rank = {}
    rank_sizes = [(0, rank) for rank in range(num_ranks)]

    # start from tensors of lowest coverage, then go by tensor size from largest (hence minus size)
    for shard_id, shard_ranks in sorted(
        shard_to_ranks.items(),
        key=lambda sh_id_ranks: (
            # 0 if rank is not in cross_parallelization_group_loads
            # which means it has higher priority
            int(sh_id_ranks[0] in cross_parallelization_group_loads),
            len(sh_id_ranks[1]),
            -shard_to_size[sh_id_ranks[0]],
            sh_id_ranks[0],
        ),
    ):
        # assign greedily to the least occupied rank
        size, rank = min((size, rank) for size, rank in rank_sizes if rank in shard_ranks)

        shard_to_saving_rank[shard_id] = rank
        rank_sizes[rank] = (size + shard_to_size[shard_id], rank)

    logger.debug(f"distribute_shards_to_ranks distribution: {rank_sizes}")

    return shard_to_saving_rank


def determine_main_replica_uniform_distribution(
    sharded_state_dict: ShardedStateDict,
    parallelization_group: torch.distributed.ProcessGroup,
    ignore_groups: bool = False,
) -> Optional[ShardDistribution]:
    """Computes the save distribution.

    Should be used in conjunction with `distribute_main_replicas_with_precomputed_distribution`
    which applies the computed save distribution.

    We rely on the fact that the assignment algorithm is deterministic on all ranks,
    so there is no extra communication needed after metadata exchange.

    Args:
        sharded_state_dict (ShardedStateDict): state dict to compute the distribution of
        parallelization_group (ProcessGroup): distribution will be computed
            within this process group
        ignore_groups (bool, optional): whether the distribution defines groups.
            This option is primarily used during loading, as it ensures that all replicas,
            including non-main ones, are loaded by this parallelization group
            Defaults to False.

    Returns (ShardDistribution, optional): distribution that can be used to apply the
        parallelization. Returns None if the process_group is trivial (1 rank)

    """
    if parallelization_group is None:
        parallelization_group = torch.distributed.group.WORLD
    group_size = get_pg_size(group=parallelization_group)
    if group_size <= 1:
        return
    local_shards = list(
        sh_base
        for sh_base in nested_values(sharded_state_dict)
        if isinstance(sh_base, ShardedTensor)
    )
    local_shards_no_data = [ten.without_data() for ten in local_shards]

    all_shards = [None] * get_pg_size(group=parallelization_group)
    torch.distributed.all_gather_object(
        all_shards, local_shards_no_data, group=parallelization_group
    )

    shard_to_ranks = defaultdict(list)
    shard_to_size = {}
    shard_to_metadata = {}
    group_has_main_replica: Set[_ShardId] = set()
    group_has_non_main_replica: Set[_ShardId] = set()

    for rank, rank_shards in enumerate(all_shards):
        for sh_ten in rank_shards:
            shard_id = _sharded_tensor_shard_id(sh_ten)
            shard_to_ranks[shard_id].append(rank)
            if shard_id not in shard_to_size:
                shard_to_size[shard_id] = _shard_size(sh_ten)
                shard_to_metadata[shard_id] = sh_ten
            if is_main_replica(sh_ten.replica_id):
                group_has_main_replica.add(shard_id)
            else:
                group_has_non_main_replica.add(shard_id)

    # we always include all main replicas, and non-main only if `ignore_groups`
    shards_in_this_group: Set[_ShardId] = group_has_main_replica
    if ignore_groups:
        shards_in_this_group = shards_in_this_group | group_has_non_main_replica
    # cross-parallel-group references are empty if `not ignore_groups`,
    # otherwise it's `group_has_non_main_replica - group_has_main_replica`
    cross_parallelization_group_loads = shards_in_this_group - group_has_main_replica

    # Filter out shards that don't belong to this group
    shard_to_ranks = {k: v for k, v in shard_to_ranks.items() if k in shards_in_this_group}

    shard_to_saving_rank = distribute_shards_to_ranks(
        shard_to_ranks, shard_to_size, len(all_shards), cross_parallelization_group_loads
    )

    return ShardDistribution(
        shard_to_saving_rank, shards_in_this_group, shard_to_metadata, shard_to_ranks
    )


@torch.no_grad()
@debug_time(f"exchange_loaded_tensors_gather_rounds", logger)
def exchange_loaded_tensors_gather_rounds(
    loaded_tensors: Dict[_ShardId, torch.Tensor],
    unloaded_shards: Dict[_ShardId, ShardedTensor],
    shard_distribution: ShardDistribution = None,
    parallelization_group: Optional[torch.distributed.ProcessGroup] = None,
) -> Dict[_ShardId, torch.Tensor]:
    """Exchange the tensors loaded by different ranks with several all_gather calls.

    Groups tensors by dtype, divide tensors that will be exchanged into rounds
    and execute all_gather for tensors from each round.

    Note: the loading is distributed across ranks based on total loaded size
    in bytes, so there is no guarantee that number of rounds needed for each
    rank will be similar, which might result in a lot of almost empty
    all_gathers. The solution would be to group all tensors into a one
    bytes tensor and do a single all_gather (with similarly sized messages).

    Args:
        loaded_tensors (Dict[_ShardId, torch.Tensor]): mapping from ShardedTensor
            shard ids to tensors already loaded by this rank.
        unloaded_shards (Dict[_ShardId, torch.Tensor]): mapping from ShardedTensor
            shard ids to ShardedTensors that aren't loaded yet.
        shard_distribution (ShardDistribution): distribution of all shards
        parallelization_group (ProcessGroup, optional): process group used for load
            distribution. Tensors will be exchanged within this group

    Returns:
        Dict[_ShardId, torch.Tensor]: dictionary mapping shard ids to tensors
            needed by this rank to load a given state dict. Includes
            previously loaded tensors (from `loaded_tensors` input)
    """
    if parallelization_group is None:
        parallelization_group = torch.distributed.group.WORLD
    main_rank_for_shard, _, shard_to_metadata, all_ranks_for_shard = shard_distribution
    local_rank = get_pg_rank(group=parallelization_group)

    all_loaded_tensors = dict(loaded_tensors)

    # Group by dtype so that we all_gather tensors of the same dtype
    for dtype in sorted(set(map(lambda sh_ten: sh_ten.dtype, shard_to_metadata.values())), key=str):
        with debug_time(f"dtype_{dtype}"):
            # shards_by_rank maps rank to tensors loaded by this rank
            shards_by_rank: List[List[torch.Tensor]] = [
                [] for _ in range(get_pg_size(group=parallelization_group))
            ]
            for shard_id, rank in main_rank_for_shard.items():
                if len(all_ranks_for_shard[shard_id]) == 1:
                    assert all_ranks_for_shard[shard_id][0] == main_rank_for_shard[shard_id], (
                        f"When there is only 1 ranks that needs a given shard,"
                        f" it should be the loading rank."
                        f" Got: needs [{all_ranks_for_shard[shard_id][0]}]"
                        f" vs loads [{main_rank_for_shard[shard_id]}]"
                    )
                    # Skipping the exchange since only the loading rank needs this tensor
                    # TODO: we can employ some optimizations even for `len(shard_to_ranks) > 1`
                    #  case, e.g. P2P exchange. Currently handling this case saves most of the
                    #  work though.
                    continue
                if shard_to_metadata[shard_id].dtype == dtype:
                    shards_by_rank[rank].append(shard_id)

            # Transpose `shards_by_rank` to form exchange rounds
            shards_by_round = zip_longest(*shards_by_rank, fillvalue=None)
            for round_idx, round_shard_ids in enumerate(shards_by_round):
                round_tensors = []
                orig_devices = {}
                for rank, shard_id in enumerate(round_shard_ids):
                    if shard_id is None:
                        # if no more useful data, the given rank will exchange empty tensor
                        local_ten = torch.empty(0, dtype=dtype, device="cuda")
                        orig_device = None
                    else:
                        assert isinstance(shard_id, tuple), type(shard_id)
                        if rank == local_rank:
                            assert shard_id in all_loaded_tensors, (
                                shard_id,
                                all_loaded_tensors.keys(),
                            )
                            orig_device = all_loaded_tensors[shard_id]
                            all_loaded_tensors[shard_id] = all_loaded_tensors[shard_id].cuda()
                            local_ten = all_loaded_tensors[shard_id]
                        else:
                            local_ten, orig_device = _get_empty_tensor_for_exchange(
                                shard_id, unloaded_shards, shard_to_metadata, all_loaded_tensors
                            )
                        # Because of a TE bug, we have to exchange a nominal dtype instead of FP8
                        # It's ok to keep the nominal dtype after exchange, because TE will handle
                        # this during state dict load.
                        # TODO: remove it once the bug is fixed
                        from ..fp8_utils import is_float8tensor  # Avoid circular import

                        if is_float8tensor(local_ten):
                            try:
                                local_ten = local_ten.from_float8()
                            except Exception as e:
                                local_ten = local_ten.dequantize()
                            all_loaded_tensors[shard_id] = local_ten

                    round_tensors.append(local_ten)
                    if orig_device is not None:
                        orig_devices[shard_id] = orig_device

                torch.distributed.all_gather(
                    list(round_tensors),
                    round_tensors[local_rank],
                    group=parallelization_group,
                    async_op=False,
                )

                # Move tensors back to CPU if originally was on CPU
                for shard_id, orig_device in orig_devices.items():
                    all_loaded_tensors[shard_id] = all_loaded_tensors[shard_id].to(orig_device)

                del round_tensors  # remove tensor references

    return all_loaded_tensors


def exchange_loaded_tensors_gather_object(
    loaded_tensors: Dict[_ShardId, torch.Tensor],
    unloaded_shards: Dict[_ShardId, ShardedTensor],
    shard_distribution: ShardDistribution,
    parallelization_group: Optional[torch.distributed.ProcessGroup] = None,
) -> Dict[_ShardId, torch.Tensor]:
    """Exchange the tensors loaded by different ranks with a simple all_gather_object call.

    This version can be used for debugging purposes do to its simplistic
    implementation. Shouldn't be used if performance is important.

    Args:
        loaded_tensors (Dict[_ShardId, torch.Tensor]): mapping from ShardedTensor
            shard ids to tensors already loaded by this rank.
        unloaded_shards (Dict[_ShardId, torch.Tensor]): mapping from ShardedTensor
            shard ids to ShardedTensors that aren't loaded yet.
        shard_distribution (ShardDistribution): distribution of all shards
        parallelization_group (ProcessGroup, optional): process group used for load
            distribution. Tensors will be exchanged within this group

    Returns:
        Dict[_ShardId, torch.Tensor]: dictionary mapping shard ids to tensors
            needed by this rank to load a given state dict. Includes
            previously loaded tensors (from `loaded_tensors` input)

    """
    all_loaded_tensors_list = [None] * torch.distributed.get_world_size(group=parallelization_group)
    torch.distributed.all_gather_object(
        all_loaded_tensors_list, loaded_tensors, group=parallelization_group
    )
    all_loaded_tensors_list = cast(List[Dict[_ShardId, torch.Tensor]], all_loaded_tensors_list)
    all_loaded_tensors = reduce(lambda x, y: {**x, **y}, all_loaded_tensors_list)

    # Error checks
    if len(all_loaded_tensors) != sum(map(len, all_loaded_tensors_list)):
        err_msg = "Duplicate shard ids loaded by different ranks"
        log_single_rank(
            logger,
            logging.ERROR,
            f"{err_msg}. Shards ids by rank:" f" {[lt.keys() for lt in all_loaded_tensors_list]}",
        )
        raise CheckpointingException(err_msg)

    return all_loaded_tensors


def exchange_loaded_objects_gather_object(
    loaded_objects: Dict[_ShardId, Any]
) -> Dict[_ShardId, Any]:
    """Exchange the objects loaded by different ranks with a simple all_gather_object call.

    Args:
        loaded_objects (Dict[_ShardId, Any]): mapping from shard ids to objects
          already loaded by this rank.

    Returns:
        Dict[_ShardId, Any]: dictionary mapping shard ids to objects needed by this rank to
         load a given state dict.
    """
    all_loaded_objects_list = [None] * torch.distributed.get_world_size()
    torch.distributed.all_gather_object(all_loaded_objects_list, loaded_objects, group=None)
    all_loaded_objects_list = cast(List[Dict[_ShardId, Any]], all_loaded_objects_list)
    all_loaded_objects = reduce(lambda x, y: {**x, **y}, all_loaded_objects_list)

    # Error checks
    if len(all_loaded_objects) != sum(map(len, all_loaded_objects_list)):
        err_msg = "Duplicate shard ids loaded by different ranks"
        log_single_rank(
            logger,
            logging.ERROR,
            f"{err_msg}. Shards ids by rank:" f" {[lt.keys() for lt in all_loaded_objects_list]}",
        )
        raise CheckpointingException(err_msg)

    return all_loaded_objects


@torch.no_grad()
@debug_time("exchange_loaded_tensors_broadcast", logger)
def exchange_loaded_tensors_broadcast(
    loaded_tensors: Dict[_ShardId, torch.Tensor],
    unloaded_shards: Dict[_ShardId, ShardedTensor],
    shard_distribution: ShardDistribution,
    parallelization_group: Optional[torch.distributed.ProcessGroup] = None,
) -> Dict[_ShardId, torch.Tensor]:
    """Exchange the tensors loaded by different ranks by a series of broadcasts.

    For each rank for each loaded tensor do a broadcast to the whole group.
    A reasonable tradeoff in terms of performance and simplicity.

    Args:
        loaded_tensors (Dict[_ShardId, torch.Tensor]): mapping from ShardedTensor
            shard ids to tensors already loaded by this rank.
        unloaded_shards (Dict[_ShardId, ShardedTensor]): mapping from ShardedTensor
            shard ids to ShardedTensors that aren't loaded yet.
        shard_distribution (ShardDistribution): distribution of all shards
        parallelization_group (ProcessGroup, optional): process group used for load
            distribution. Tensors will be exchanged within this group

    Returns:
        Dict[_ShardId, torch.Tensor]: dictionary mapping shard ids to tensors
            needed by this rank to load a given state dict. Includes
            previously loaded tensors (from `loaded_tensors` input)
    """
    main_rank_for_shard, _, shard_to_metadata, all_ranks_for_shard = shard_distribution
    local_rank = torch.distributed.get_rank(group=parallelization_group)

    all_loaded_tensors = dict(loaded_tensors)

    for idx, (shard_id, rank) in enumerate(main_rank_for_shard.items()):
        if len(all_ranks_for_shard[shard_id]) == 1:
            assert all_ranks_for_shard[shard_id][0] == main_rank_for_shard[shard_id], (
                f"When there is only 1 ranks that needs a given shard,"
                f" it should be the loading rank."
                f"Got: needs [{all_ranks_for_shard[shard_id][0]}]"
                f" vs loads [{main_rank_for_shard[shard_id]}]"
            )
            # Skipping the exchange since only the loading rank needs this tensor
            # TODO: we can employ some optimizations even for `len(shard_to_ranks) > 1` case,
            #  e.g. P2P exchange. Currently handling this case saves most of the work though.
            continue
        if rank == local_rank:
            assert shard_id in all_loaded_tensors, (shard_id, all_loaded_tensors.keys())
            orig_device = all_loaded_tensors[shard_id].device
            local_ten = all_loaded_tensors[shard_id].cuda()
        else:
            local_ten, orig_device = _get_empty_tensor_for_exchange(
                shard_id, unloaded_shards, shard_to_metadata, all_loaded_tensors
            )

        # Because of a TE bug, we have to exchange a nominal dtype instead of FP8
        # It's ok to keep the nominal dtype after exchange, because TE will handle
        # this during state dict load.
        # TODO: remove it once the bug is fixed
        from ..fp8_utils import is_float8tensor  # Avoid circular import

        if is_float8tensor(local_ten):
            try:
                local_ten = local_ten.from_float8()
            except Exception as e:
                local_ten = local_ten.dequantize()
            all_loaded_tensors[shard_id] = local_ten

        global_src_rank = (
            rank
            if parallelization_group == None
            else torch.distributed.get_global_rank(parallelization_group, rank)
        )
        # We can do async_op=True only if there is no CPU-copy follow-up
        torch.distributed.broadcast(
            local_ten,
            src=global_src_rank,
            group=parallelization_group,
            async_op=orig_device is None,
        )
        # Move tensor back to CPU if originally was on CPU
        if orig_device is not None:
            all_loaded_tensors[shard_id] = local_ten.to(orig_device)
        del local_ten

    return all_loaded_tensors


def exchange_by_distribution(
    loaded_tensors: Dict[_ShardId, torch.Tensor],
    unloaded_shards: Dict[_ShardId, ShardedTensor],
    shard_distribution: ShardDistribution,
    parallelization_group: Optional[torch.distributed.ProcessGroup] = None,
    exchange_algo="broadcast",
) -> Dict[_ShardId, torch.Tensor]:
    """Exchange tensors loaded by different ranks using the specified exchange_algo.

    Args:
        loaded_tensors (Dict[_ShardId, torch.Tensor]): mapping from ShardedTensor
            shard ids to tensors already loaded by this rank.
        unloaded_shards (Dict[_ShardId, ShardedTensor]): mapping from ShardedTensor
            shard ids to ShardedTensors that aren't loaded yet.
        shard_distribution (ShardDistribution): distribution of all shards
        parallelization_group (ProcessGroup, optional): process group used for load
            distribution. Tensors will be exchanged within this group
        exchange_algo (str): The algorithm used for performing exchanges.
            Defaults to 'broadcast'.

    Returns:
        Dict[_ShardId, torch.Tensor]: dictionary mapping shard ids to tensors
            needed by this rank to load a given state dict. Includes
            previously loaded tensors (from `loaded_tensors` input)
    """

    assert shard_distribution is not None, "Expecting distribution to perform exchange"
    if exchange_algo == "gather_object":
        exchange_fn = exchange_loaded_tensors_gather_object
    elif exchange_algo == "gather_rounds":
        exchange_fn = exchange_loaded_tensors_gather_rounds
    elif exchange_algo == "broadcast":
        exchange_fn = exchange_loaded_tensors_broadcast
    else:
        raise NotImplementedError(f"Unrecognized gather algorithm: {exchange_algo}")
    return exchange_fn(loaded_tensors, unloaded_shards, shard_distribution, parallelization_group)


================================================
FILE: megatron/core/dist_checkpointing/mapping.py
================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.

"""Core library classes for representing sharding of tensors and objects.

The main expected usage is wrapping torch.Tensors in state dicts with
ShardedTensor class (mostly with the ShardedTensor.from_rank_offsets classmethod).
"""

import logging
from abc import ABC, abstractmethod
from dataclasses import dataclass, field, replace
from itertools import chain
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import torch

from .core import CheckpointingException
from .dict_utils import dict_list_map_inplace

logger = logging.getLogger(__name__)

# These type definitions are just hints to differentiate a plain model state
#  dict (StateDict) from a state dict with tensors replaced with ShardedTensors
#  (ShardedStateDict).
StateDict = Dict[str, Any]
CommonStateDict = Dict[str, Any]
ShardedStateDict = Dict[str, Any]
ReplicaId = Union[int, Tuple[int, ...]]


_logged_deprecations = {}


class ShardedBase(ABC):
    """Base class for ShardedTensor and ShardedStateDict."""

    key: str
    data: object
    replica_id: ReplicaId

    @abstractmethod
    def validate_metadata_integrity(self):
        """Codifies the constraints on metadata attributes."""

    @abstractmethod
    def without_data(self) -> "ShardedBase":
        """Returns a new ShardedBase instance with data=None."""
        raise NotImplementedError


@dataclass
class ShardedTensor(ShardedBase):
    """Represents a mapping between a local tensor and a global tensor.

    Global tensor is assumed to consist of many local tensors distributed
    between different processes.

    Args:
        key: unique identifier of a global tensor
        data: local tensor data. Can be None only for consistency validation
        dtype: tensor dtype
        local_shape: local tensor shape
        global_shape: global tensor shape
        global_offset: offset of a local tensor in a global tensor,
            specified in number of tensor elements
        axis_fragmentations: global tensor fragmentation of each axis
        replica_id: indicates given local tensor's replication wrt.
            local tensors in different processes
        prepend_axis_num: number of axes prepended to the local tensor to
            reflect global tensor shape. The behavior is similar to
            unsqueezing the local tensor.
        allow_shape_mismatch: if True, during loading, the global shape of
            a stored tensor does not have to match the expected global shape.
            Useful for representing tensors with flexible shape,
            e.g. padded.
        flattened_range: specifies a slice that should be applied to a
            flattened tensor with `local_shape` in order to get
            the tensor stored as `data`
    """

    key: str
    data: Optional[torch.Tensor] = field(repr=False)
    dtype: torch.dtype
    local_shape: Tuple[int, ...]
    global_shape: Tuple[int, ...]
    global_offset: Tuple[int, ...]
    axis_fragmentations: Optional[Tuple[int, ...]]
    replica_id: ReplicaId = 0
    prepend_axis_num: int = 0
    allow_shape_mismatch: bool = False
    flattened_range: Optional[slice] = None

    def __post_init__(self):
        self.validate_metadata_integrity()

    def validate_metadata_integrity(self) -> None:
        """Codifies the constraints on metadata attributes.

        Meeting those constraints is guaranteed when instantiating a ShardedTensor
        class with `from_rank_offsets` or `from_rank_offsets_flat` constructors.

        Returns:
            None
        """
        has_flattened_range = self.flattened_range is not None
        if self.data is not None:
            if self.data.dtype != self.dtype:
                raise CheckpointingException(
                    f"Data dtype should match `dtype` attribute for {self}"
                )
            if not has_flattened_range and self.data.shape != self.local_shape:
                raise CheckpointingException(
                    f"Data shape should match `local_shape` attribute for {self}"
                )

        if len(self.global_shape) != len(self.global_offset):
            raise CheckpointingException(
                f"Global offset dimensions should be equal to global shape dimensions for {self}"
            )
        if len(self.local_shape) + self.prepend_axis_num != len(self.global_shape):
            raise CheckpointingException(
                f"Local shape together with `prepend_axis_num` dimensions should be "
                f"equal to global shape dimensions for {self}"
            )

        if self.axis_fragmentations is not None:
            for off, sh in zip(self.global_offset[self.prepend_axis_num :], self.local_shape):
                if sh != 0 and off % sh != 0:
                    raise CheckpointingException(
                        f"Global offset ({off}) must be divisible by local shape ({sh}) for {self}."
                    )

        if self.flattened_range is not None:
            raise CheckpointingException("ShardedTensor.flattened_range is not supported.")

    @property
    def has_regular_grid(self):
        """Alias for having a regular sharding grid."""
        return self.axis_fragmentations is not None

    def global_slice(self) -> Tuple[Union[int, slice], ...]:
        """
        Returns a tuple of int and slice objects representing a slice of the
        global tensor that this ShardedTensor corresponds to.
        """
        assert len(self.global_offset) == len(self.local_shape) + self.prepend_axis_num
        return tuple(
            chain(
                (off for off in self.global_offset[: self.prepend_axis_num]),
                (
                    slice(off, off + sh)
                    for off, sh in zip(
                        self.global_offset[self.prepend_axis_num :], self.local_shape
                    )
                ),
            )
        )

    def local_chunk_offset_in_global(self) -> Tuple[int, ...]:
        """Offset of a local chunk in a global array of chunks.

        Returns:
            Tuple[int, ...]: the offset of the whole local chunk in a global array of chunks.
        """
        assert len(self.global_offset) == len(self.local_shape) + self.prepend_axis_num
        chunk_offset = list(self.global_offset[: self.prepend_axis_num])
        for off, sh in zip(self.global_offset[self.prepend_axis_num :], self.local_shape):
            assert off % sh == 0, str(self)
            chunk_offset.append(off // sh)
        return tuple(chunk_offset)

    def max_allowed_chunks(self) -> Tuple[int, ...]:
        """
        Returns the maximum allowed chunks for this ShardedTensor.
        """
        chunks = []
        for axis_sh, axis_fragm in zip(self.global_shape, self.axis_fragmentations):
            if not self.allow_shape_mismatch and axis_sh % axis_fragm != 0:
                raise CheckpointingException(
                    f"Axis shape ({axis_sh}) not divisible by axis fragmentation ({axis_fragm}"
                )
            axis_chunk_size = axis_sh // axis_fragm
            chunks.append(axis_chunk_size)
        return tuple(chunks)

    def without_data(self):
        return replace(self, data=None)

    @classmethod
    def from_rank_offsets(
        cls,
        key: str,
        data: torch.Tensor,
        *rank_offsets: Tuple[int, int, int],
        replica_id: ReplicaId = 0,
        prepend_axis_num: int = 0,
        flattened_range: None = None,
        **init_kwargs,
    ):
        """Allows to construct the ShardedTensor given offset specified in process ranks.

        Args:
            key (str): unique key
            data (torch.Tensor): local tensor data
            rank_offsets (Tuple[int, int, int]): each tuple
                (axis, axis_rank_offset, axis_fragm) says that if
                global tensor is divided into `axis_fragm` fragment along `axis`
                axis, then local tensor data corresponds to the `axis_rank_offset` chunk.
            replica_id (ReplicaId): see ShardedTensor
            prepend_axis_num (int): see ShardedTensor
            flattened_range (None): must be None when using this constructor
            init_kwargs: passed to ShardedTensor.__init__
        """
        if flattened_range is not None:
            raise ValueError(
                "Cannot instantiate a flat ShardedTensor with `from_rank_offsets` method."
                " Use `from_rank_offsets_flat` instead"
            )
        global_offset = [0] * (data.ndim + prepend_axis_num)
        global_shape = ([1] * prepend_axis_num) + list(data.shape)
        axis_fragmentations = [1] * (data.ndim + prepend_axis_num)
        _seen_axis = set()
        for axis, axis_rank_offset, axis_fragm in rank_offsets:
            if axis < 0 or axis_rank_offset < 0 or axis_fragm < 1 or axis_rank_offset >= axis_fragm:
                raise CheckpointingException(f"Invalid rank offsets: {rank_offsets} for key {key}.")
            _seen_axis.add(axis)

            local_axis_shape = 1 if axis < prepend_axis_num else data.shape[axis - prepend_axis_num]
            global_shape[axis] = axis_fragm * local_axis_shape
            global_offset[axis] = axis_rank_offset * local_axis_shape
            axis_fragmentations[axis] = axis_fragm

        return cls(
            key,
            data,
            data.dtype,
            tuple(data.shape),
            tuple(global_shape),
            tuple(global_offset),
            tuple(axis_fragmentations),
            replica_id,
            prepend_axis_num,
            flattened_range=flattened_range,
            **init_kwargs,
        )

    def init_data(self, device: Union[str, torch.device], init_fn=torch.empty):
        """
        Initialize the tensor data of this ShardedTensor.

        Only called if `data` attribute is None.

        Args:
            device (Union[str, torch.device]): device to place the tensor on
            init_fn (Callable, optional): function to use to initialize the tensor.
                Defaults to `torch.empty`.
        """
        if self.data is not None:
            return
        self.data = init_fn(self.local_shape, dtype=self.dtype, device=device)

    def narrow(self, dim: int, start: int, length: int) -> List["ShardedTensor"]:
        """This is an analogue of torch.narrow for ShardedTensors.

        Narrowing assumes that we narrow a local tensor on each rank.
        This has consequences on local_shape, global_shape, global_offset, etc.

        Args:
            dim (int): dimension to narrow. Doesn't include prepended axes.
            start (int): start element
            length (int): length of the slice

        Returns:
            List[ShardedTensor]: narrowed ShardedTensors. For non-flat tensors,
                the list will always have 1 element. For flat ShardedTensors the number of
                elements varies depending on `dim` and on overlap, because flat
                tensors must be contiguous. In particular the list can be empty.
        """
        prepended_dim = dim + self.prepend_axis_num
        local_length_along_dim = self.local_shape[dim]

        def _update_tuple(x, ind, val):
            x = list(x)
            x[ind] = val
            return tuple(x)

        def _safe_div(x, y):
            assert x % y == 0, (x, y)
            return x // y

        # Decrease global shape and global offset by `length / local_length_along_dim`
        assert (
            self.global_shape[prepended_dim] % local_length_along_dim == 0
        ), f"Only regular grid of local tensors is supported for narrowing, got: {self}"
        assert (
            self.global_offset[prepended_dim] % local_length_along_dim == 0
        ), f"Only regular grid of local tensors is supported for narrowing, got: {self}"
        global_shape = _update_tuple(
            self.global_shape,
            prepended_dim,
            _safe_div(self.global_shape[prepended_dim] * length, local_length_along_dim),
        )
        global_offset = _update_tuple(
            self.global_offset,
            prepended_dim,
            _safe_div(self.global_offset[prepended_dim] * length, local_length_along_dim),
        )

        new_data = self.data.narrow(dim, start, length)
        # always a single result tensor
        return [
            replace(
                self,
                data=new_data,
                local_shape=new_data.shape,
                global_shape=global_shape,
                global_offset=global_offset,
            )
        ]


def is_main_replica(replica_id: ReplicaId):
    """Checks if given `replica_id` is considered as main.

    "Main" replica is:
    - integer 0
    - or an iterable with all 0 elements

    It is the application responsibility to set correct replicas for sharded tensors.

    Args:
        replica_id (Union[int, Tuple[int, ...]]): replica id

    Returns:
        (bool): True for a "main" replica
    """
    if isinstance(replica_id, int):
        return replica_id == 0
    return all(r == 0 for r in replica_id)


class LocalNonpersistentObject:
    """Object that should not be stored in a checkpoint, but restored locally.

    Wrapping any object inside the state dict with LocalNonpersistentObject
    will result in:
    - during saving, this object will *not* be stored in the checkpoint
    - during loading, a local version of this object will be placed in a state dict
    """

    def __init__(self, obj):
        self.obj = obj

    def unwrap(self):
        """Returns the original object."""
        return self.obj


@dataclass
class ShardedObject(ShardedBase):
    """Represents a mapping between a local object and a global object.

    Global object is assumed to consist of many local objects distributed
    between different processes.

    NOTE: Contrary to ShardedTensor, it's impossible to change global object
    sharding. Conceptually, ShardedObject is a fully-sharded ShardedTensor
    with atomic arbitrary typed elements.

    Args:
        key: unique identifier of a global tensor
        data: local object data. Can be None only for consistency validation
        global_shape: global object shape
        global_offset: offset of a local object in a global object, specified in number of shards
        replica_id: indicates local object replication wrt. local objects in different processes
    """

    key: str
    data: object
    global_shape: Tuple[int, ...]
    global_offset: Tuple[int, ...]
    replica_id: ReplicaId = 0

    def __post_init__(self):
        self.validate_metadata_integrity()

    def validate_metadata_integrity(self):
        if len(self.global_shape) != len(self.global_offset):
            raise CheckpointingException(
                f"Global offset dimensions should be equal to global shape dimensions for {self}"
            )

    def without_data(self):
        return replace(self, data=None)

    @property
    def unique_key(self):
        """returns a unique key for this object"""
        return (
            f"{self.key}/shard_"
            f"{'.'.join(map(str, self.global_offset))}_"
            f"{'.'.join(map(str, self.global_shape))}"
        )

    def __str__(self):
        return f"{self.__class__.__name__}(key='{self.key}')"

    @classmethod
    def empty_from_unique_key(cls, unique_key, replica_id: ReplicaId = 0) -> "ShardedObject":
        """Instantiates a ShardedObject from a unique key.

        Args:
            unique_key: a string of the form
                <key>/shard_<global_offset>_<global_shape>
            replica_id: indicates local object replication wrt.
                local objects in different processes

        Returns:
            a ShardedObject with data=None
        """
        key, shard_key = unique_key.split("/")
        shard_str, offset, shape = shard_key.split("_")
        assert shard_str == "shard"
        offset = tuple(map(int, offset.split(".")))
        shape = tuple(map(int, shape.split(".")))
        if len(shape) + 1 == len(offset):
            # This is a backward-compatible fix. We don't know the last
            # element of global shape so set it to -1.
            shape += (-1,)
        return cls(key, None, shape, offset, replica_id)


FactoryBuildFn = Callable[[str, torch.Tensor, ReplicaId, Optional[slice]], ShardedStateDict]
FactoryMergeFn = Callable[[StateDict], torch.Tensor]


@dataclass
class ShardedTensorFactory(ShardedBase):
    """Allows to apply transformations to tensors before/after serialization.

    The essence of those transformations is that they can be applied to
    optimizer states the same way they are applied to the model params.
    The ultimate state dict with sharded tensors must depend functionally on
    `build_fn` arguments (key, data, replica_id, flattened_range),
    which will be provided by the optimizer.

    Builder creates a sub-state-dict out of a tensor before saving, and merger
    merges the corresponding state dict after loading.

    Args:
        key (str): unique identifier of the factory
        data (torch.Tensor): original model parameter that will be further
            transformed by this factory
        build_fn (callable): function that transforms the original tensor
            to a sharded state dict
        merge_fn (callable): function that transforms loaded subtree back
            into a single tensor (inverse of `build_fn`)
        replica_id (ReplicaId): indicates factory replication wrt.
            factories in different processes
        flattened_range (slice, optional): indicates additional flattening
            applied to the ShardedTensors produced by the factory
    """

    key: str
    data: torch.Tensor
    build_fn: FactoryBuildFn
    merge_fn: FactoryMergeFn
    replica_id: ReplicaId = 0
    flattened_range: Optional[slice] = None

    def build(self):
        """Builds a ShardedStateDict from the original tensor"""
        return self.build_fn(self.key, self.data, self.replica_id, self.flattened_range)

    def validate_metadata_integrity(self):
        """No reasonable checks can be applied"""
        pass

    def without_data(self):
        return replace(self, data=None)


def apply_factories(sharded_state_dict: ShardedStateDict):
    """Turn ShardedTensorFactories into ShardedTensors *in-place*.

    Args:
        sharded_state_dict (ShardedStateDict): state dict possibly
            containing ShardedTensorFactory objects

    Returns:
        None: state dict is modified in place
    """

    def apply(x):
        if isinstance(x, ShardedTensorFactory):
            x = x.build()
        return x

    dict_list_map_inplace(apply, sharded_state_dict)


def apply_factory_merges(
    x1: StateDict, x2: ShardedStateDict, key: Tuple[str, ...] = ()
) -> StateDict:
    """Apply merges defined by ShardedTensorFactories *in-place*.

    Args:
        x1 (StateDict): state dict loaded from the checkpoint
        x2 (ShardedStateDict): subset of `x1` (in terms of dict keys)
            with ShardedTensorFactory
            as (possibly nested) values that define how to
            merge objects from the `x1` state dict
        key (Tuple[str, ...]): current key in a recursive call.
            Used only for reporting meaningful errors

    Returns:
        StateDict: `x1` modified in-place
    """
    if isinstance(x2, ShardedTensorFactory):
        return x2.merge_fn(x1)

    # There rest is almost the same as the `merge` function from `dict_utils`
    if isinstance(x1, dict) and isinstance(x2, dict):
        for k, v2 in x2.items():
            if k not in x1:
                raise ValueError(
                    f"Different dict keys encountered in `apply_factory_merges` "
                    f"({x1.keys()} vs {x2.keys()})"
                )
            else:
                x1[k] = apply_factory_merges(x1[k], v2, key=key + (k,))
    elif isinstance(x1, list) and isinstance(x2, list):
        if len(x1) != len(x2):
            err_msg = (
                f"Cannot merge two lists with different lengths "
                f"({len(x1)} and {len(x2)}, encountered at key {key})"
            )
            logger.error(err_msg + f"\nx1: {x1}\nx2: {x2}")
            raise ValueError(err_msg)
        for i, v2 in enumerate(x2):
            x1[i] = apply_factory_merges(x1[i], v2, key=key + (i,))
    elif isinstance(x1, list) and isinstance(x2, dict):
        for k, v2 in x2.items():
            if not isinstance(k, int):
                raise ValueError(
                    f"Invalid dict key {k} non-integer type encountered "
                    f"in a list-dict merge at level {key}"
                )
            if k >= len(x1):
                raise ValueError(
                    f"Dict key {k} out of bound for list of length"
                    f"{len(x1)} (encountered at level {key})"
                )
            x1[k] = apply_factory_merges(x1[k], v2, key=key + (k,))
    else:
        raise ValueError(
            f"Duplicate non-dict and non-list values encountered: `{x1}` and `{x2} (at key {key})`"
        )
    return x1


================================================
FILE: megatron/core/dist_checkpointing/optimizer.py
================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.

""" Helpers for defining sharding for optimizer states based on existing sharding
for model parameters.
"""

import logging
from copy import deepcopy
from dataclasses import replace
from typing import Dict, Iterable, Tuple, Union

logger = logging.getLogger(__name__)

import torch

from megatron.core.utils import log_single_rank, to_local_if_dtensor

from .dict_utils import nested_values
from .mapping import (
    LocalNonpersistentObject,
    ShardedStateDict,
    ShardedTensor,
    ShardedTensorFactory,
    StateDict,
)
from .utils import extract_sharded_tensors_and_factories

KEEP_VARS_HINT = (
    " Make sure state dict contains original torch.nn.Parameters (not pure torch.Tensors)"
    " by passing `keep_vars=True` to `.state_dict()`. If any transformation of the original"
    " parameter is needed, use a ShardedTensorFactory."
)


def get_optim_param_to_id_map(optim_params_iter: Iterable[torch.nn.Parameter]) -> Dict[int, int]:
    """Generate mapping from optimizer param to optimizer state id."""
    param_mappings = {}
    for i, param in enumerate(optim_params_iter):
        param = to_local_if_dtensor(param)
        if id(param) not in param_mappings:
            param_mappings[id(param)] = i
    return param_mappings


def get_param_id_to_sharded_param_map(
    model_sharded_state_dict: ShardedStateDict, optim_params_iter: Iterable[torch.nn.Parameter]
) -> Dict[int, Union[ShardedTensor, ShardedTensorFactory]]:
    """Generate mapping from optimizer state ids to model sharded parameters.

    Args:
        model_sharded_state_dict: sharded state dict with all model sharded tensors
            (can have any structure)
        optim_params_iter: iterable which iterates over model parameters tracked by the optimizer.
            The iteration must be in the same order as in the optimizer parameters.

    Returns:
        Dict[int, Union[ShardedTensor, ShardedTensorFactory]]: mapping from optimizer state ids
            to model sharded parameters.
    """
    model_sharded_state_dict, _ = extract_sharded_tensors_and_factories(model_sharded_state_dict)
    id_to_sharded_param_map = {}
    param_to_id_map = get_optim_param_to_id_map(optim_params_iter)
    # If using PyTorch FSDP2 the values in model_sharded_state_dict would
    # have been converted to local tensors during initialization.
    # See the make_(tp)_sharded_tensor_for_checkpoint functions.
    for ten in nested_values(model_sharded_state_dict):
        if id(ten.data) in param_to_id_map:
            id_to_sharded_param_map[param_to_id_map[id(ten.data)]] = ten
        else:
            logger.debug(f'{ten} is not tracked by the optimizer')

    if not id_to_sharded_param_map:
        log_single_rank(
            logger,
            logging.WARNING,
            "Sharded parameters mapping is empty. It means tensors in model state dict"
            " do not correspond to tensors in optimizer parameters map."
            " Make sure to call state_dict with `keep_vars=True`.",
        )
    return id_to_sharded_param_map


def make_sharded_optimizer_tensor(
    model_param: Union[ShardedTensor, ShardedTensorFactory], optim_param: torch.Tensor, prefix: str
) -> Union[ShardedTensor, ShardedTensorFactory]:
    """Build a ShardedTensor or ShardedTensorFactory for optimizer param based on model param

    Args:
        model_param (Union[ShardedTensor, ShardedTensorFactory]): model param
        optim_param (torch.Tensor): corresponding optimizer param
        prefix (str): optimizer prefix for the ShardedTensor or ShardedTensorFactory

    Returns:
        Union[ShardedTensor, ShardedTensorFactory]: wrapped optimizer parameter
    """
    optim_param = to_local_if_dtensor(optim_param)
    if isinstance(model_param, ShardedTensorFactory):
        return replace(model_param, key=f'{prefix}.{model_param.key}', data=optim_param)

    assert tuple(optim_param.shape) == model_param.local_shape, (
        f'Optimizer shape ({tuple(optim_param.shape)} does not match model shape '
        f'({model_param.local_shape})'
    )
    sh_ten = replace(
        model_param, key=f'{prefix}.{model_param.key}', data=optim_param, dtype=optim_param.dtype
    )
    sh_ten.validate_metadata_integrity()
    return sh_ten


def optim_state_to_sharding_state(
    optim_state_dict: StateDict,
    id_to_sharded_param_map: Dict[int, ShardedTensor],
    exclude_keys: Tuple[str] = (),
):
    """Turn optimizer state dict to sharded state dict based on model state dict *in-place*.

    Can be used to add sharding information to most common optimizer state dict.
    Creates separate ShardedTensors for each key in `optim_state_dict['state']`
    (e.g. for torch.optim.Adam there will be separate tensors for `exp_avg` and `exp_avg_sq`)

    Args:
        optim_state_dict (StateDict): optimizer state dict with
            state parameters under `state` key and group hyperparameters under
            `param_groups` -> `params` key.
        id_to_sharded_param_map (Dict[int, ShardedTensor]): mapping from optimizer param ids
            to model sharded tensors. Can be generated with `get_param_id_to_sharded_param_map`
            function.
        exclude_keys (Tuple[str]): optimizer state keys to exclude from the final state dict.

    Returns:
        None: state dict is modified in place
    """
    sharded_state = {}
    for param_id, param_state in optim_state_dict['state'].items():
        sharded_state[param_id] = {}
        for state_key, param in param_state.items():
            if state_key in exclude_keys:
                continue
            if param_id in id_to_sharded_param_map:
                sharded_state[param_id][state_key] = make_sharded_optimizer_tensor(
                    id_to_sharded_param_map[param_id], param, prefix=f'optimizer.state.{state_key}'
                )
            else:
                raise ValueError(f'Param id {param_id} does not match any model sharded param')

    optim_state_dict['param_groups'] = deepcopy(optim_state_dict['param_groups'])
    for group in optim_state_dict['param_groups']:
        group['params'] = LocalNonpersistentObject(group['params'])
    optim_state_dict['state'] = sharded_state


================================================
FILE: megatron/core/dist_checkpointing/serialization.py
================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.

"""Entrypoints for saving and loading the distributed checkpoints.

Functions `load` and `save` are equivalents of `torch.load` and `torch.save`
but expect torch.Tensors to be wrapped with classes from the `mapping module`.
Additionally, `load` expects the sharded state dict argument as a guidance for
loading the sharded tensors.
"""

import logging
from pathlib import Path
from typing import Callable, Dict, Optional, Set, Tuple, Union

import torch

from megatron.core.msc_utils import MultiStorageClientFeature
from megatron.core.utils import log_single_rank

from . import ShardedTensor
from .core import CheckpointingConfig, save_config
from .dict_utils import extract_matching_values, merge
from .mapping import (
    CheckpointingException,
    CommonStateDict,
    ShardedObject,
    ShardedStateDict,
    StateDict,
    apply_factory_merges,
)
from .state_dict_utils import load_preprocess, save_preprocess
from .strategies.async_utils import AsyncRequest
from .strategies.base import (
    AsyncSaveShardedStrategy,
    LoadCommonStrategy,
    LoadShardedStrategy,
    SaveCommonStrategy,
    SaveShardedStrategy,
    StrategyAction,
    get_default_strategy,
)
from .utils import extract_sharded_base, force_all_tensors_to_non_fp8
from .validation import (
    StrictHandling,
    determine_global_metadata,
    parse_strict_flag,
    validate_integrity_and_strict_load,
    validate_sharded_objects_handling,
    verify_checkpoint_and_load_strategy,
)

logger = logging.getLogger(__name__)


# flat state dict with sharded objects without any data
CkptShardedMetadata = Dict[str, Union[ShardedTensor, ShardedObject]]

_CONTENT_METADATA_KEY = 'content_metadata'


def load(
    sharded_state_dict: ShardedStateDict,
    checkpoint_dir: str,
    sharded_strategy: Union[LoadShardedStrategy, Tuple[str, int], None] = None,
    common_strategy: Union[LoadCommonStrategy, Tuple[str, int], None] = None,
    validate_access_integrity: bool = True,
    strict: Union[str, StrictHandling] = StrictHandling.ASSUME_OK_UNEXPECTED,
) -> Union[StateDict, Tuple[StateDict, Set[str], Set[str]]]:
    """Loading entrypoint.

    In the steps below, the following verbs refer to corresponding objects:
    - load = load from checkpoint
    - extract = extract from sharded_state_dict
    - add = add to the final state dict
    Steps:
    1. Load common state dict and form the base of the result state dict
    2. Apply factories to sharded_state_dict
    3. Extract LocalNonPersistentObject and add
    4. (optional) Extract ShardedObjects, load and add
    5. Extract ShardedBase, load, apply factory merges and add

    Args:
        sharded_state_dict (ShardedStateDict): state dict of the existing model
            populated with ShardedTensors. Used as a mapping to determine which
            parts of global tensors stored in the checkpoint should be loaded.
        checkpoint_dir (str): directory with the checkpoint
        sharded_strategy (LoadShardedStrategy, Tuple[str, int], optional):
            configures loading behavior for sharded tensors
        common_strategy (LoadCommonStrategy, Tuple[str, int], optional):
            configures loading behavior for common data
        validate_access_integrity (bool default = True): checks if each tensor shard is accessed
            exactly once (as main replica) by some process
        strict (StrictHandling, str, optional): determines the behavior in case of a mismatch
            between the requested sharded state dict and the checkpoint. See `StrictHandling` docs
            for more details. Some values affect the return value of this function
            (missing and unexpected keys are returned).
            Defaults to `True` (StrictHandling.ASSUME_OK_UNEXPECTED) which doesn't
            incur any performance overhead. Other recommended values
            are: `False` (StrictHandling.LOG_UNEXPECTED) which logs only unexpected keys
            or `StrictHandling.RETURN_ALL` which returns all mismatch keys.

    Returns:
        StateDict or Tuple[StateDict, Set[str], Set[str]]: in most cases only
            the loaded state dict is returned. If `strict` flag was set to
    """
    sharded_strategy, common_strategy = verify_checkpoint_and_load_strategy(
        checkpoint_dir, sharded_strategy, common_strategy
    )

    # Dequantize all FP8 tensors in the state dict into their corresponding high-precision tensors.
    # Retaining FP8 tensors in the state dict can cause issues in the following two cases:
    #   1. Sometimes, when the precision of the checkpoint is higher than that of the model params,
    #      we want to directly use the state dict to initialize the main params. If the FP8 tensors
    #      in this sharded state dict are not converted to high-precision tensors, the loaded
    #      tensors will already be quantized, which defeats the purpose of initializing the main
    #      params with a high-precision state dict;
    #   2. When using delayed scaling, this loading process writes an extra value into the global
    #      amax_history buffer of Transformer Engine, which is undesirable.
    force_all_tensors_to_non_fp8(sharded_state_dict)

    common_state_dict = common_strategy.load_common(checkpoint_dir)

    sharded_state_dict, nonpersistent_state_dict, sh_ten_factories = load_preprocess(
        sharded_state_dict
    )
    merge(common_state_dict, nonpersistent_state_dict)

    # At this point we are only dealing with ShardedBase objects
    sharded_state_dict, _ = extract_sharded_base(sharded_state_dict)

    # Validation
    ckpt_sharded_metadata = None
    local_metadata, global_metadata = None, None
    strict = parse_strict_flag(strict)
    if StrictHandling.requires_explicit_ckpt_mismatch_check(strict):
        ckpt_sharded_metadata = load_sharded_metadata(
            checkpoint_dir, sharded_strategy, common_strategy  # type: ignore[arg-type]
        )
    if validate_access_integrity or StrictHandling.requires_global_app_metadata(strict):
        local_metadata, global_metadata = determine_global_metadata(sharded_state_dict)

    sharded_state_dict, missing_keys, unexpected_keys = validate_integrity_and_strict_load(
        sharded_state_dict,
        strict,
        validate_access_integrity,
        local_metadata,
        global_metadata,
        ckpt_sharded_metadata,
    )

    # ShardedBase loading
    if not sharded_strategy.can_handle_sharded_objects:
        validate_sharded_objects_handling(sharded_strategy, common_strategy)
        sharded_objects_state_dict, sharded_state_dict = extract_matching_values(
            sharded_state_dict, lambda v: isinstance(v, ShardedObject)
        )
        sharded_objects = common_strategy.load_sharded_objects(
            sharded_objects_state_dict, checkpoint_dir
        )
        merge(common_state_dict, sharded_objects)

    loaded_state_dict = sharded_strategy.load(sharded_state_dict, checkpoint_dir)

    merge(common_state_dict, loaded_state_dict)

    loaded_state_dict = apply_factory_merges(common_state_dict, sh_ten_factories)

    if StrictHandling.requires_returning_mismatch_keys(strict):
        return common_state_dict, missing_keys, unexpected_keys
    else:
        return common_state_dict


def load_common_state_dict(checkpoint_dir: Union[str, Path]) -> StateDict:
    """Load common (non-sharded) objects state dict from the checkpoint.

    Args:
        checkpoint_dir (str): checkpoint directory

    Returns:
        StateDict: state dict with non-sharded objects from the checkpoint
    """
    if isinstance(checkpoint_dir, Path):
        checkpoint_dir = str(checkpoint_dir)
        log_single_rank(
            logger,
            logging.WARNING,
            "DEPRECATED: Passing 'checkpoint_dir' as a Path object in "
            "load_common_state_dict will no longer be supported in a future release. "
            "Please pass it as a string instead.",
        )
    sharded_strategy, common_strategy = verify_checkpoint_and_load_strategy(checkpoint_dir)
    return common_strategy.load_common(checkpoint_dir)


def load_tensors_metadata(
    checkpoint_dir: str, sharded_strategy: Union[LoadShardedStrategy, None] = None
) -> CkptShardedMetadata:
    """Load tensors metadata from the checkpoint.

    Returns a dictionary similar to a sharded state dict, but note that
    the dictionary keys are simply ShardedTensor keys (contrary to the
    actual sharded state dicts where keys correspond to state dict keys).

    Dict values are ShardedTensors without any sharding (so, the only useful
    information is tensors global shape and dtype).

    Concrete implementation depends on the loading strategy. If no strategy is
    given, a default for a given backend is used.

    Args:
        checkpoint_dir (str): checkpoint directory to load from
        sharded_strategy (LoadShardedStrategy, optional): sharded strategy to load metadata.
            Defaults to None - in this case a default load strategy for a given checkpoint type
            is used.

    Returns:
        CkptShardedMetadata: flat state dict without data describing ShardedTensors
            in the checkpoint
    """
    sharded_strategy, common_strategy = verify_checkpoint_and_load_strategy(
        checkpoint_dir, sharded_strategy
    )
    return sharded_strategy.load_tensors_metadata(Path(checkpoint_dir))


def load_sharded_metadata(
    checkpoint_dir: str,
    sharded_strategy: Union[LoadShardedStrategy, None] = None,
    common_strategy: Union[LoadCommonStrategy, None] = None,
) -> CkptShardedMetadata:
    """Load sharded metadata from the checkpoint.

    Similar to `load_tensors_metadata`, but includes also ShardedObjects.

    Returns a dictionary similar to a sharded state dict, but note that
    the dictionary keys are simply ShardedTensor keys (contrary to the
    actual sharded state dicts where keys correspond to state dict keys).

    Dict values are ShardedTensors without any sharding (so, the only useful
    information is tensors global shape and dtype).

    Concrete implementation depends on the loading strategy. If no strategy is
    given, a default for a given backend is used.

    Args:
        checkpoint_dir (str): checkpoint directory to load from
        sharded_strategy (LoadShardedStrategy, optional): sharded strategy to load metadata.
            Defaults to None - in this case a default load strategy for a given checkpoint type
            is used.
        common_strategy (LoadCommonStrategy, optional): common strategy to load metadata.
            Defaults to None - in this case a default load strategy for a given checkpoint type is
            used. This strategy won't be used unless `sharded_strategy` can't handle ShardedObjects

    Returns:
        CkptShardedMetadata: flat state dict without data describing ShardedTensors
            and ShardedObjects in the checkpoint
    """
    sharded_strategy, common_strategy = verify_checkpoint_and_load_strategy(
        checkpoint_dir, sharded_strategy, common_strategy
    )
    sharded_metadata = sharded_strategy.load_sharded_metadata(checkpoint_dir)
    if not sharded_strategy.can_handle_sharded_objects:
        validate_sharded_objects_handling(sharded_strategy, common_strategy)
        common_metadata = common_strategy.load_sharded_metadata(checkpoint_dir)
        sharded_metadata = merge(sharded_metadata, common_metadata)
    return sharded_metadata


def load_plain_tensors(checkpoint_dir: str) -> StateDict:
    """Load checkpoint tensors without any sharding and plain structure.

    NOTE: common state dict is NOT included.

    Args:
        checkpoint_dir (str): checkpoint directory to load the tensors from.

    Returns:
        StateDict: checkpoint state dict containing only torch.Tensors.
    """
    sharded_state_dict = load_tensors_metadata(checkpoint_dir)
    # Don't validate integrity because shards will be overlapped
    # if world_size > 1 (all processes load whole tensors)
    return load(sharded_state_dict, checkpoint_dir, validate_access_integrity=False)


def load_content_metadata(
    checkpoint_dir: Optional[str] = None, *, preloaded_state_dict: Optional[StateDict] = None
) -> Optional[dict]:
    """Load content metadata stored in the checkpoint with `save(..., content_metadata=...)`.

    Args:
        checkpoint_dir (str, optional): checkpoint directory to load the content metadata from.
        preloaded_state_dict (StateDict, optional): if the state dict was already loaded,
            can be provided to avoid double load from storage

    Returns:
        dict: checkpoint content metadata
        None: in case there is no content metadata in the checkpoint
    """
    if preloaded_state_dict is None:
        if checkpoint_dir is None:
            raise ValueError('Both checkpoint_dir and loaded_state_dict cannot be None')
        preloaded_state_dict = load_common_state_dict(checkpoint_dir)
    return preloaded_state_dict.get(_CONTENT_METADATA_KEY)


def remove_sharded_tensors(checkpoint_dir: str, key_prefix: str):
    """determine the appropriate sharding strategy and delegate removal to the sharded strategy"""
    sharded_strategy, common_strategy = verify_checkpoint_and_load_strategy(checkpoint_dir)
    sharded_strategy.remove_sharded_tensors(checkpoint_dir, key_prefix)


def save(
    sharded_state_dict: ShardedStateDict,
    checkpoint_dir: str,
    sharded_strategy: Union[SaveShardedStrategy, Tuple[str, int], None] = None,
    common_strategy: Union[SaveCommonStrategy, Tuple[str, int], None] = None,
    validate_access_integrity: bool = True,
    async_sharded_save: bool = False,
    preprocess_common_before_consistancy_check: Optional[
        Callable[[CommonStateDict], StateDict]
    ] = None,
    content_metadata: Optional[dict] = None,
) -> Optional[AsyncRequest]:
    """Saving entrypoint.

    Extracts ShardedTensors from the given state dict. Rank 0 saves the
    "regular" part of the checkpoint to common torch file.
    The ShardedTensors are saved according to a strategy specified by the
    config.

    Steps:
    1. Apply factories
    2. Extract and discard LocalNonPersistentObject
    3. Extract all ShardedBase object
    4. Save all other objects to common.pt
    5. (optional) Extract and save ShardedObjects
    6. Save all ShardedBase objects
    7. Write metadata.json file with backend and version metadata.

    Step (6) can be performed asynchronously (see `async_sharded_save`), in this
    case the actual save is embodied in the returned async request and can be
    scheduled by the external caller. For async request, step (7) is added as
    one of the finalization functions, so that metadata.json is written only
    if the checkpoint is complete.

    Args:
        sharded_state_dict (ShardedStateDict): state dict of the populated with
            ShardedTensors. Used as a mapping to determine how local tensors
            should be saved as global tensors in the checkpoint.
        checkpoint_dir (str): directory to save the checkpoint to
        sharded_strategy (SaveShardedStrategy, Tuple[str, int], optional):
            configures sharded tensors saving behavior and backend
        common_strategy (SaveCommonStrategy, Tuple[str, int], optional):
            configures common data saving behavior and backend
        validate_access_integrity (bool default = True): checks if each tensor shard is accessed
            exactly once (as main replica) by some process.
            It also makes sure the common state dict is consistant across all ranks
        async_sharded_save (bool, optional): if True, for the sharded state dict part
            an async save implementation will be called, with the AsyncRequest
            being returned to the caller. Note that it is the caller responsibility to
            actually schedule the async save. Defaults to False.
        preprocess_common_before_consistancy_check (Callable[[CommonStateDict], StateDict], None):
            A callable function that will preprocess the common state dict (i.e can be used  to
            remove keys that we expect to be different in the state dict). The function must not
            modify the original state dict
        content_metadata (dict, optional): metadata to identify the checkpoint content.
            Useful for framework specific versioning.

    Returns:
        AsyncRequest (optional): if `async_sharded_save` is True, returns
            async request that should be scheduled by the caller of this function.
            None otherwise.
    """
    if torch.distributed.get_rank() == 0:
        if MultiStorageClientFeature.is_enabled():
            msc = MultiStorageClientFeature.import_package()
            checkpoint_dir_path = msc.Path(str(checkpoint_dir))
        else:
            checkpoint_dir_path = Path(checkpoint_dir)

        if next(checkpoint_dir_path.iterdir(), None) is not None:
            # Don't throw exception here since this could cause a cascade of failures
            # without human intervention in cases where multiple jobs are queued up.
            if torch.distributed.get_rank() == 0:
                logger.warning("Overwriting old incomplete / corrupted checkpoint...")

    if common_strategy is not None:
        raise NotImplementedError('The only supported common strategy is torch')

    if sharded_strategy is None:
        sharded_strategy = get_default_save_sharded_strategy()
    if not isinstance(sharded_strategy, SaveShardedStrategy):
        assert isinstance(sharded_strategy, tuple), type(sharded_strategy)
        sharded_strategy = get_default_strategy(StrategyAction.SAVE_SHARDED, *sharded_strategy)

    if common_strategy is None:
        common_strategy = get_default_save_common_strategy()
    if not isinstance(common_strategy, SaveCommonStrategy):
        assert isinstance(common_strategy, tuple), type(common_strategy)
        common_strategy = get_default_strategy(StrategyAction.SAVE_COMMON, *common_strategy)

    if content_metadata is not None:
        sharded_state_dict[_CONTENT_METADATA_KEY] = content_metadata

    sharded_state_dict, state_dict = save_preprocess(
        sharded_state_dict, validate_access_integrity, preprocess_common_before_consistancy_check
    )

    common_strategy.save_common(state_dict, checkpoint_dir)

    if not sharded_strategy.can_handle_sharded_objects:
        validate_sharded_objects_handling(sharded_strategy, common_strategy)
        sharded_objects_state_dict, sharded_state_dict = extract_matching_values(
            sharded_state_dict, lambda v: isinstance(v, ShardedObject)
        )
        common_strategy.save_sharded_objects(sharded_objects_state_dict, checkpoint_dir)

    def metadata_finalize_fn():
        if torch.distributed.get_rank() == 0:
            save_config(
                CheckpointingConfig(sharded_strategy.backend, sharded_strategy.version),
                checkpoint_dir,
            )
        torch.distributed.barrier()

    if not async_sharded_save:
        sharded_strategy.save(sharded_state_dict, checkpoint_dir)
        metadata_finalize_fn()
        return None

    if not isinstance(sharded_strategy, AsyncSaveShardedStrategy):
        raise CheckpointingException(
            f'Cannot apply async_save to non-async strategy {sharded_strategy}'
        )
    async_request = sharded_strategy.async_save(sharded_state_dict, checkpoint_dir)
    async_request.finalize_fns.append(metadata_finalize_fn)
    return async_request


def get_default_save_sharded_strategy(
    backend: str = 'torch_dist', version: int = 1
) -> SaveShardedStrategy:
    """Get default save sharded strategy."""
    return get_default_strategy(StrategyAction.SAVE_SHARDED, backend, version)


def get_default_save_common_strategy(
    backend: str = 'torch', version: int = 1
) -> SaveCommonStrategy:
    """Get default save common strategy."""
    return get_default_strategy(StrategyAction.SAVE_COMMON, backend, version)


def get_default_load_sharded_strategy(
    checkpoint_dir: str, cache_metadata: bool = False
) -> LoadShardedStrategy:
    """Get default load sharded strategy.

    Args:
        checkpoint_dir: Path to the checkpoint directory.
        cache_metadata: If True and checkpoint format is torch_dist, use a strategy that caches
            metadata (e.g. when ckpt_assume_constant_structure is enabled).
    """
    return verify_checkpoint_and_load_strategy(checkpoint_dir, cache_metadata=cache_metadata)[0]


================================================
FILE: megatron/core/dist_checkpointing/state_dict_utils.py
================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.

""" Utilities for transforming state_dict."""

from typing import Callable, Union

from .dict_utils import dict_list_map_inplace, extract_matching_values
from .mapping import (
    CommonStateDict,
    ShardedStateDict,
    ShardedTensor,
    ShardedTensorFactory,
    StateDict,
    apply_factories,
)
from .utils import _clean_metadata_for_serialization, extract_nonpersistent, extract_sharded_base
from .validation import determine_global_metadata, validate_sharding_integrity


def save_preprocess(
    sharded_state_dict: ShardedStateDict,
    validate_access_integrity: bool = True,
    preprocess_common_before_consistancy_check: Callable[[CommonStateDict], StateDict] = None,
):
    """Preprocesses the given state dictionary by applying factories,
    discarding non-persistent data and extracting the common state dictionary.
    Optionally, it can validate sharding integrity.

    Args:
        sharded_state_dict (ShardedStateDict): The initial state dictionary to be preprocessed.
        validate_access_integrity (bool): If True, triggers validation of sharding integrity.
        preprocess_common_before_consistancy_check (callable, None): A callable function
            that will preprocess the common state dict (i.e can be used  to remove keys
            that we expect to be different in the state dict)

    Returns:
        Tuple[ShardedStateDict, dict]:
            The preprocessed sharded state dictionary and the common state dictionary.
    """
    apply_factories(sharded_state_dict)
    _, sharded_state_dict = extract_nonpersistent(sharded_state_dict)
    sharded_part, common_state_dict = extract_sharded_base(sharded_state_dict)
    sharded_part = filter_out_empty_flatten_tensor(sharded_part)
    if validate_access_integrity:
        preprocessed_common_state_dict = common_state_dict
        if "content_metadata" in preprocessed_common_state_dict:
            preprocessed_common_state_dict["content_metadata"] = _clean_metadata_for_serialization(
                preprocessed_common_state_dict["content_metadata"]
            )

        if preprocess_common_before_consistancy_check:
            preprocessed_common_state_dict = preprocess_common_before_consistancy_check(
                common_state_dict
            )
        validate_sharding_integrity(
            determine_global_metadata(sharded_part)[1],
            common_state_dict=preprocessed_common_state_dict,
        )
    return sharded_part, common_state_dict


def load_preprocess(sharded_state_dict: ShardedStateDict):
    """Preprocesses the given state dictionary by applying factories
    and extracting non-persistent data, without modifying the original dictionary.

    Args:
        sharded_state_dict (ShardedStateDict):
            The initial state dictionary to be processed (remains unchanged).

    Returns:
        Tuple[ShardedStateDict, dict, dict]:
            - A preprocessed copy of the sharded state dictionary.
            - A dictionary containing non-persistent state data.
            - A dictionary of `ShardedTensorFactory` instances.
    """
    # Create a copy of sharded_state_dict as the passed in state dict may have
    # references that prevent tensors from being deallocated
    sharded_state_dict, _ = extract_matching_values(sharded_state_dict, lambda x: True)
    sharded_state_dict = filter_out_empty_flatten_tensor(sharded_state_dict)

    sh_ten_factories, _ = extract_matching_values(
        sharded_state_dict,
        lambda x: isinstance(x, ShardedTensorFactory),
        return_lists_as_dicts=True,
    )
    apply_factories(sharded_state_dict)

    # Data inside sh_ten_factories no longer needed so delete them to reduce memory usage
    dict_list_map_inplace(ShardedTensorFactory.without_data, sh_ten_factories)
    # Non-persistent objects
    nonpersistent_state_dict, sharded_state_dict = extract_nonpersistent(sharded_state_dict)
    dict_list_map_inplace(lambda o: o.unwrap(), nonpersistent_state_dict)
    return sharded_state_dict, nonpersistent_state_dict, sh_ten_factories


def filter_out_empty_flatten_tensor(sharded_state_dict: Union[dict, list]):
    """
    Filter out ShardedTensors with empty flatten_range.
    These tensors can cause the PyTorch check in failure.

    Args:
        sharded_state_dict: state dict possibly containing ShardedTensor objects
    """
    # Filter out ShardedTensors with empty flatten_range.
    # These tensors can cause the PyTorch check in
    # `TorchShardedTensor._init_from_local_shards_and_global_metadata` to fail.
    # This situation may occur in custom Fully Sharded Data Parallel (FSDP) cases.
    sharded_state_dict, _ = extract_matching_values(
        sharded_state_dict,
        lambda v: not (
            isinstance(v, ShardedTensor)
            and v.flattened_range
            and v.flattened_range.start == v.flattened_range.stop
        ),
    )

    return sharded_state_dict


================================================
FILE: megatron/core/dist_checkpointing/strategies/__init__.py
================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.

""" Various loading and saving strategies """
from megatron.core.dist_checkpointing.strategies.common import register_default_common_strategies

# We load "common" strategies by default to be always available
register_default_common_strategies()


================================================
FILE: megatron/core/dist_checkpointing/strategies/async_utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.

"""
This module provides an async utilities which allow to start
a checkpoint save process in the background.
"""
import gc
import logging
import os
import subprocess
from abc import ABC, abstractmethod
from collections import deque
from contextlib import contextmanager
from queue import Empty
from time import sleep, time
from typing import Callable, Dict, List, NamedTuple, Optional, Tuple

import torch
from torch import multiprocessing as mp

from megatron.core.utils import log_single_rank

from ..utils import debug_time

logger = logging.getLogger(__name__)


def _set_process_qos(cpu_priority: int, io_priority: Optional[int]) -> None:
    """
    Set QoS (Quality of Service) for the current checkpoint writer process.
    This ensures checkpoint writing doesn't interfere with training.

    Args:
        cpu_priority: Nice value for CPU scheduling (0-19, higher = lower priority).
                     Default 10 is moderately deprioritized.
        io_priority: I/O scheduling class and priority. If None, uses best-effort class.
                    Format: class_id (0-3) where 3 = idle (lowest priority).

    Note: Requires appropriate permissions. Failures are logged but not fatal.
    """
    pid = os.getpid()

    # Set CPU priority (nice value). os.nice(increment) adds to current;
    # get current with os.nice(0). Only increase nice (deprioritize);
    # decreasing requires superuser.
    if cpu_priority is not None and cpu_priority >= 0 and cpu_priority <= 19:
        try:
            current_nice = os.nice(0)  # 0 = no change, returns current nice value
            increment = cpu_priority - current_nice
            if increment <= 0:
                logger.warning(
                    "PID %s: Skipping CPU nice (current %s already <= target %s; "
                    "lowering requires superuser",
                    pid,
                    current_nice,
                    cpu_priority,
                )
            else:
                new_nice = os.nice(increment)
                logger.debug(
                    "PID %s: Set CPU nice from %s to %s (target %s)",
                    pid,
                    current_nice,
                    new_nice,
                    cpu_priority,
                )
        except (OSError, PermissionError) as e:
            logger.warning(f"PID {pid}: Failed to set CPU priority: {e}")

    # Set I/O priority (ionice) - Linux only
    if io_priority is not None:
        try:
            # ionice -c <class> -p <pid>
            # class 3 = idle (only when no other process needs I/O)
            # class 2 = best-effort (default, can set priority 0-7)
            subprocess.run(
                ["ionice", "-c", str(io_priority), "-p", str(pid)], check=True, capture_output=True
            )
            logger.debug(f"PID {pid}: Set I/O priority class to {io_priority}")
        except (subprocess.CalledProcessError, FileNotFoundError, PermissionError) as e:
            logger.warning(f"PID {pid}: Failed to set I/O priority: {e}")


@contextmanager
def _disable_gc():
    """Temporarily disables GC."""
    gc_enabled = gc.isenabled()
    try:
        if gc_enabled:
            gc.disable()
        yield
    finally:
        if gc_enabled:
            gc.enable()


class AsyncRequest(NamedTuple):
    """Represents an async request that needs to be scheduled for execution.

    Args:
        async_fn (Callable, optional): async function to call. None represents noop.
        async_fn_args (Tuple): args to pass to `async_fn`.
        finalize_fns (List[Callable]): list of functions to call to finalize the request.
            These functions will be called synchronously after `async_fn` is done
            *on all ranks*.
        async_fn_kwargs (Tuple): kwargs to pass to `async_fn`.
        preload_fn (Callable): preload function to stage tensors from GPU to Host.
            This should be self-contained with a proper list of arguments with  `partial`.
        is_frozen (Bool): a flag to indicate this async request can be modified or not.
        call_idx (int): index variable used to order async requests for synchronization
                        in preloading and writing tensors on the async caller

    """

    async_fn: Optional[Callable]
    async_fn_args: Tuple
    finalize_fns: List[Callable]
    async_fn_kwargs: Dict = {}
    preload_fn: Optional[Callable] = None
    is_frozen: bool = False
    call_idx: int = 0

    def add_finalize_fn(self, fn: Callable) -> None:
        """Adds a new finalize function to the request.

        Args:
            fn (Callable): function to add to the async request. This function
                will be called *after* existing finalization functions.

        Returns:
            None
        """
        if self.is_frozen:
            raise RuntimeError('Cannot add finalization functions to a frozen AsyncRequest')
        self.finalize_fns.append(fn)

    def execute_sync(self) -> None:
        """Helper to synchronously execute the request.

        This logic is equivalent to what should happen in case of the async call.
        """
        # preload tensors.
        async_fn_args = list(self.async_fn_args)
        if self.preload_fn is not None:
            assert len(async_fn_args) == 3, "Expected 3 args to be passed to async function"
            # The async_fn is passed as a partial functool with pre-determined args
            # In the async_fn_args we pass the remaining positional args required by the async_fn
            # async_fn_args[1] refers to the write_buckets
            # To ensure we stage the write_buckets to CPU memory for sync CP,
            # we replace it with preload_fn callable that returns the CPU staged tensors
            async_fn_args[1] = self.preload_fn()
        # persist the state
        if self.async_fn is not None:
            self.async_fn(*async_fn_args, **self.async_fn_kwargs)

        # This utility implements a sync cp save. Hence the barrier.
        torch.distributed.barrier()

        # Finalize the CP state
        for finalize_fn in self.finalize_fns:
            finalize_fn()

    def freeze(self) -> 'AsyncRequest':
        """Freezes the async request, disallowing adding new finalization functions.

        Returns:
            AsyncRequest: new async request with all same fields except for the
                `is_frozen` flag.
        """
        return self._replace(is_frozen=True)


class AsyncCaller(ABC):
    """Wrapper around mp.Process that ensures correct semantic of distributed finalization.

    Starts process asynchronously and allows checking if all processes on all ranks are done.
    """

    @abstractmethod
    def schedule_async_call(self, async_req: AsyncRequest) -> None:
        """Schedule `async_req` with some process forking or reusing
           persistent worker

        This method must be called on all ranks.

        Args:
            async_req (AsyncRequest): `AsyncRequest` object containing to
                                       start async process
        """
        raise NotImplementedError("This should be implemented")

    @abstractmethod
    def is_current_async_call_done(self, blocking: bool, no_dist: bool) -> bool:
        """Check if async save is finished on all ranks.

        For semantic correctness, requires rank synchronization in each check.
        This method must be called on all ranks.

        Args:
            blocking (bool, optional): if True, will wait until the call is done
                on all ranks. Otherwise, returns immediately if at least one rank
                is still active. Defaults to False.
            no_dist (bool, Optional): if True, training ranks simply check its
                asynchronous checkpoint writer without synchronization.

        Returns:
            bool: True if all ranks are done (immediately of after active wait
                if `blocking` is True), False if at least one rank is still active.

        """
        raise NotImplementedError("This should be implemented")

    def sync_all_async_calls(self, is_alive: int) -> bool:
        """Check if all ranks have completed async checkpoint writing

        Args:
            is_alive (bool): if True, the current async request is not completed

        Returns:
            bool: True if all ranks are done, False if at least one rank is still active.

        """
        ten = torch.tensor([is_alive], dtype=torch.int, device=torch.cuda.current_device())
        torch.distributed.all_reduce(ten)
        return ten[0] == 0

    @abstractmethod
    def close(self, abort=False):
        """Terminate the async caller at exit of an application or some termination conditions"""
        logger.debug(f"AsyncCaller: {torch.distributed.get_rank()}, Destroying Async Caller")

    def __del__(self):
        raise NotImplementedError("This should be implemented")


class TemporalAsyncCaller(AsyncCaller):
    """Wrapper around mp.Process that ensures correct semantic of distributed finalization.

    Starts process asynchronously and allows checking if all processes on all ranks are done.
    """

    def __init__(self):
        self.process: Optional[mp.Process] = None
        self.start_time: Optional[float] = None

    @_disable_gc()
    def schedule_async_call(self, async_req: AsyncRequest) -> None:
        """Spawn a process with `async_fn` as the target.

        This method must be called on all ranks.

        Args:
            async_fn (Callable, optional): async function to call. If None,
                no process will be started.
            async_req (AsyncRequest): `AsyncRequest` object containing to
                                       start async process
        """
        if async_req.async_fn is None:
            return  # nothing to do

        async_fn_args = list(async_req.async_fn_args)
        if async_req.preload_fn is not None:
            # If there's a preload_fn in `async_req`, we call this func
            # to do the defined action in `async_req.preload_fn` to
            # stage GPU tensors to its defined destination
            async_fn_args[1] = async_req.preload_fn()

        rank = torch.distributed.get_rank()
        start_sync = time()
        torch.cuda.synchronize()
        end_sync = time()
        logger.debug(f"rank: {rank}, takes {end_sync - start_sync} to finish D2H ")

        ctx = mp.get_context('fork')
        self.start_time = time()
        self.process = ctx.Process(
            target=async_req.async_fn, args=async_fn_args, kwargs=async_req.async_fn_kwargs
        )
        self.process.start()
        init_time = time()
        logger.debug(f"rank: {rank}, takes {init_time - self.start_time} to schedule async ckpt ")

    def is_current_async_call_done(self, blocking: bool = False, no_dist: bool = False) -> bool:
        """Check if async save is finished on all ranks.

        For semantic correctness, requires rank synchronization in each check.
        This method must be called on all ranks.

        Args:
            blocking (bool, optional): if True, will wait until the call is done
                on all ranks. Otherwise, returns immediately if at least one rank
                is still active. Defaults to False.
            no_dist (bool, Optional): if True, training ranks simply check its
                asynchronous checkpoint writer without synchronization.

        Returns:
            bool: True if all ranks are done (immediately of after active wait
                if `blocking` is True), False if at least one rank is still active.
        """
        # The following takes the same overhead
        # as torch.distributed.barrier (single integer all-reduce)
        is_alive = int(self.process.is_alive()) if self.process is not None else 0
        is_done = not is_alive if no_dist else self.sync_all_async_calls(is_alive)

        if is_done or blocking:
            # Process join is called in the following cases
            # 1. blocking == True -> regardless of is_done
            # 2. blocking == False (non-blocking)
            #    -> is_done == True: async requests on all ranks are identified to be finished
            #    `self.close()` makes sure the async callers terminated
            self.close()
            is_done = True
        return is_done

    def close(self, abort=False):
        """For TemporalAsyncCaller, this method is called explictly in `is_current_async_calls_done`

        This method make sure the TemporalAsyncCaller terminated
        with all its assigned async request completed

        Args:
            abort (bool, optional): Default to False. Needs to be manually set to true when
                the checkpoint async process needs to be aborted.
        """
        if self.process:
            logger.debug(f"rank: {torch.distributed.get_rank()}, joining self.process")
            if abort:
                log_single_rank(
                    logger,
                    logging.WARNING,
                    f"Temporal worker aborted in rank {torch.distributed.get_rank()}",
                )
                self.process.kill()
            else:
                self.process.join()
            self.process = None
            logger.debug(
                "TemporalAsyncCaller: Async process join finished "
                f"after {time() - self.start_time:.2f}s from forking"
            )
            self.start_time = None

    def __del__(self):
        pass


class PersistentAsyncCaller(AsyncCaller):
    """Wrapper around mp.Process that ensures correct semantic of distributed finalization.

    Starts process asynchronously and allows checking if all processes on all ranks are done.
    """

    _persistent_process: mp.Process = None
    _persistent_queue: mp.JoinableQueue = None
    _persistent_preload_q: mp.JoinableQueue = None
    _persistent_comp_q: mp.Queue = None

    def __init__(self):
        self.process: Optional[mp.Process] = None
        self.start_time: Optional[float] = None
        self.cur_item: Optional[int] = None
        self.cur_idx: int = -1

    @classmethod
    def _get_process(
        cls,
        rank: int,
        mp_mode: str = 'spawn',
        cpu_priority: int = 10,
        io_priority: Optional[int] = None,
    ):
        if cls._persistent_process is None:
            ctx = mp.get_context(mp_mode)
            logger.debug(f"PersistentAsyncCaller: {rank}, Starting Async Caller")
            cls._persistent_queue = ctx.JoinableQueue()
            cls._persistent_preload_q = ctx.JoinableQueue()
            cls._persistent_comp_q = ctx.Queue()
            cls._persistent_process = ctx.Process(
                target=PersistentAsyncCaller.async_loop,
                args=(
                    rank,
                    cls._persistent_queue,
                    cls._persistent_preload_q,
                    cls._persistent_comp_q,
                    logger.getEffectiveLevel(),
                    cpu_priority,
                    io_priority,
                ),
            )
            cls._persistent_process.daemon = True
            cls._persistent_process.start()
            logger.debug(f"PersistentAsyncCaller: {rank}, Started Async Caller")
        return cls._persistent_process

    def schedule_async_call(self, async_req: AsyncRequest) -> None:
        """Put `AsyncRequest` to the Persistent Async Caller

        This method must be called on all ranks.

        Args:
            async_fn (Callable, optional): async function to call. If None,
                no process will be started.
            async_req (AsyncRequest): `AsyncRequest` object containing to
                                       schedule a checkpointing request
        """
        if async_req.async_fn is None:
            return  # nothing to do

        start_sync = end_sync = None

        self.start_time = time()
        if self.process is None:
            self.process = PersistentAsyncCaller._get_process(torch.distributed.get_rank())
        if async_req.preload_fn is not None:
            self._persistent_preload_q.put(async_req.call_idx)
        self._persistent_queue.put(async_req)
        logger.debug(f"rank: {torch.distributed.get_rank()}, put {async_req.call_idx}")

        if async_req.preload_fn is not None:
            start_sync = time()
            # Synchronize for pre-staging tensors
            self._persistent_preload_q.join()
            end_sync = time()
            logger.debug(
                f"rank: {torch.distributed.get_rank()}, "
                f"takes {end_sync - start_sync} to finish D2H "
            )

        init_time = time()
        logger.debug(
            f"rank: {torch.distributed.get_rank()}, takes {init_time - self.start_time} "
            "to schedule async ckpt "
        )

    def is_current_async_call_done(self, blocking: bool = False, no_dist: bool = False) -> bool:
        """Check if async save is finished on all ranks.

        For semantic correctness, requires rank synchronization in each check.
        This method must be called on all ranks.

        Args:
            blocking (bool, optional): if True, will wait until the call is done
                on all ranks. Otherwise, returns immediately if at least one rank
                is still active. Defaults to False.
            no_dist (bool, Optional): if True, training ranks simply check its
                asynchronous checkpoint writer without synchronization.

        Returns:
            bool: True if all ranks are done (immediately of after active wait
                if `blocking` is True), False if at least one rank is still active.
        """

        is_alive: bool = False

        if self.process:
            while self.cur_item is None:
                try:
                    # Retrieve comp call_idx without waiting
                    self.cur_item = self._persistent_comp_q.get_nowait()
                except Empty:
                    # This method is called after any `AsyncRequest` is pushed to the main loop
                    # So, the background writing is still active
                    # before the worker put call_idx to `comp_q`
                    if not blocking:
                        is_alive = True
                        break
                    sleep(0.1)

        if self.cur_item is not None:
            logger.debug(
                f"rank: {torch.distributed.get_rank()}, item: {self.cur_item}"
                f" is completed, {is_alive}"
            )

        is_done = not is_alive if no_dist else self.sync_all_async_calls(is_alive)
        # This is set to False when blocking == False so this routine is called again
        # to simply call `sync_all_async_calls` to check if other ranks complete the writing
        if is_done:
            # The current request is completed globally. Reset the current item for polling.
            logger.debug(
                f"rank: {torch.distributed.get_rank()}, item: {self.cur_item}"
                f" is completed globally, {is_done}"
            )
            self.cur_item = None

        return is_done

    def close(self, abort=False):
        """Wait on the left async requests and terminate the PersistentAsyncCaller

        Signals the PersistentAsyncCaller by sending a 'DONE' message to make it terminated
        Args:
            abort (bool, optional): Default to False. Needs to be manually set to true when
                the checkpoint async process needs to be aborted.
        """
        logger.debug(
            f"PersistentAsyncCaller: {torch.distributed.get_rank()}, Destroying Async Caller"
        )
        if self.process:
            if abort:
                log_single_rank(
                    logger,
                    logging.WARNING,
                    f"Persistent worker aborted in rank {torch.distributed.get_rank()}",
                )
                self.process.kill()
            else:
                self._persistent_queue.put('DONE')
                self._persistent_queue.join()
                self._persistent_process.join()
            self.process = None
            PersistentAsyncCaller._persistent_process = None
            PersistentAsyncCaller._persistent_queue = None
            PersistentAsyncCaller._persistent_preload_q = None
            PersistentAsyncCaller._persistent_comp_q = None

    def __del__(self):
        self.close()

    @staticmethod
    @_disable_gc()
    def async_loop(
        rank: int,
        queue: mp.JoinableQueue,
        preload_q: mp.JoinableQueue,
        comp_q: mp.Queue,
        log_level: int = logging.INFO,
        cpu_priority: int = 10,
        io_priority: Optional[int] = None,
    ):
        """Main function for the persistent checkpoint worker

        The persisent worker is created once and terminated at exit or
        when application calls `close()` explictily

        This routine receives `AsyncRequest` and does `preload_fn` first and
        put the integer value in `preload_q` to inform the trainer to proceed.
        When the `async_fn` from the request` is completed (background saving is done),
        it puts a integer value to `comp_q` to notify the trainer the completion.

        Args:
            rank (int): the rank of the trainer where the persistent worker is created.
            queue (mp.JoinableQueue): the main queue used to receive `AsyncRequest
                                      from the training rank
            preload_q (mp.JoinableQueue): a queue to inform trainer that preloading of tensors
                                          from GPU to Host or dedicated location is completed
            comp_q (mp.Queue): a queue to inform the training rank the completion of scheduled
                               async checkpoint request
            log_level (int, Optional): an integer to set log-level in this spawned process
                                       to get aligned with the training rank's logging level
            cpu_priority (int): Nice value for CPU scheduling (0-19, higher = lower priority).
                               Default 10 deprioritizes checkpoint writing vs training.
            io_priority (int, Optional): I/O scheduling class (0-3, where 3=idle).
                                        Default 3 ensures checkpoints don't block data loading.

        """
        # Set logger.
        # Set root logger level to affect all modules in this process
        logging.getLogger().setLevel(log_level)
        logger = logging.getLogger(__name__)
        logger.debug(f"PersistentAsyncCaller: persistent ckpt worker for {rank} has started")

        # Set CUDA device to appropriate local_rank to ensure allocations / CUDA contexts
        # in this new process are on the right device, and device 0 on the node does not
        # take on undue memory burden from other devices on node (default behavior without
        # this line).
        torch.cuda.set_device(rank % torch.cuda.device_count())

        # Set QoS to deprioritize checkpoint writing vs training
        # This prevents checkpoint I/O from interfering with data loader
        _set_process_qos(cpu_priority=cpu_priority, io_priority=io_priority)

        # Start busy loop waiting for and executing checkpoint saves.
        while True:
            item = queue.get()
            if isinstance(item, str) and item == 'DONE':
                queue.task_done()
                break
            elif isinstance(item, AsyncRequest):
                async_fn_args = list(item.async_fn_args)
                if item.preload_fn is not None:
                    call_idx = preload_q.get()
                    # the 2nd arg is state dict
                    async_fn_args[1] = item.preload_fn()
                    logger.debug(f"{rank} has completed D2H of {call_idx}")
                    preload_q.task_done()
                if item.async_fn is not None:
                    item.async_fn(*async_fn_args, **item.async_fn_kwargs)
                logger.debug(f"{rank} has completed saving {item.call_idx}")
                comp_q.put(item.call_idx)
                queue.task_done()
                del async_fn_args
            del item
            gc.collect()

        logger.debug(f"PersistentAsyncCaller: persistent ckpt worker for {rank}  has terminated")


class _ActiveAsyncRequest(NamedTuple):
    """Helper to represent an active async call.

    Args:
        idx (int): index of the call (starting from 0)
        async_caller (DistributedAsyncCaller): async caller instance that represents
            the async process handling the async request
        async_request (AsyncRequest):  async request that is being called
    """

    idx: int
    async_caller: AsyncCaller
    async_request: AsyncRequest


class AsyncCallsQueue:
    """Manages a queue of async calls.

    Allows adding a new async call with `schedule_async_request` and finalizing
    active calls with `maybe_finalize_async_calls`.
    """

    _persistent_caller: Optional[PersistentAsyncCaller] = None

    def __init__(self, persistent: bool = False):
        self.async_calls: deque[_ActiveAsyncRequest] = deque([])
        self.call_idx: int = -1
        self.persistent: bool = persistent

    def _get_async_caller(self):
        if not self.persistent:
            return TemporalAsyncCaller()
        if AsyncCallsQueue._persistent_caller is None:
            AsyncCallsQueue._persistent_caller = PersistentAsyncCaller()
        return AsyncCallsQueue._persistent_caller

    @classmethod
    def warmup_persistent_caller(
        cls,
        rank: int,
        mp_mode: str = 'spawn',
        cpu_priority: int = 10,
        io_priority: Optional[int] = None,
    ):
        """Warmup the persistent caller to avoid the overhead of creating it on the first call."""
        PersistentAsyncCaller._get_process(rank, mp_mode, cpu_priority, io_priority)

    def schedule_async_request(self, async_request: AsyncRequest) -> int:
        """Start a new async call and add it to a queue of active async calls.

        This method must be called on all ranks.

        Args:
            async_request (AsyncRequest): async request to start.

        Returns:
            int: index of the async call that was started.
                This can help the user keep track of the async calls.
        """
        self.call_idx += 1
        async_caller = self._get_async_caller()
        # Backward compatibility for local checkpointing built with the old AsyncRequest
        if len(async_request._fields) != len(AsyncRequest._fields):
            async_request = AsyncRequest(**async_request._asdict())
        async_request = async_request.freeze()
        async_caller.schedule_async_call(
            async_request._replace(call_idx=self.call_idx, finalize_fns=[])
        )
        self.async_calls.append(_ActiveAsyncRequest(self.call_idx, async_caller, async_request))
        return self.call_idx

    def maybe_finalize_async_calls(self, blocking=False, no_dist=False) -> List[int]:
        """Finalizes all available calls.

        This method must be called on all ranks.

        Args:
            blocking (bool, optional): if True, will wait until all active requests
                are done. Otherwise, finalizes only the async request that already
                finished. Defaults to False.

            no_dist (bool, Optional): if True, training ranks simply check its
                asynchronous checkpoint writer without synchronization.
        Returns:
            List[int]: list of indices (as returned by `schedule_async_request`)
                of async calls that have been successfully finalized.
        Raises:
            CheckpointException: if any rank(s) raised an exception during checkpoint
                writing, the exceptions are wrapped and raised on all ranks.
        """
        call_idx_finalized = []
        while self.async_calls:
            next_async_done = self.async_calls[0].async_caller.is_current_async_call_done(
                blocking, no_dist
            )
            if not next_async_done:
                break
            with debug_time("finalize", logger):
                call_idx, _, async_request = self.async_calls.popleft()
                for finalize_fn in async_request.finalize_fns:
                    finalize_fn()
                ten = torch.tensor([call_idx], dtype=torch.int, device=torch.cuda.current_device())
                torch.distributed.all_reduce(ten, op=torch.distributed.ReduceOp.MAX)
                assert ten.item() == call_idx, "Unmatched async calls. "
                "That probably means not all ranks are participating in async finalization"
                call_idx_finalized.append(call_idx)
        return call_idx_finalized

    def get_num_unfinalized_calls(self):
        """Get the number of active async calls."""
        return len(self.async_calls)

    def close(self, abort=False):
        """Finalize all calls upon closing.
        Args:
            abort (bool, optional): Default to False. Needs to be manually set to true when
                the checkpoint async process needs to be aborted.
        """
        if not abort:
            self.maybe_finalize_async_calls(blocking=True)
        if self.persistent and AsyncCallsQueue._persistent_caller:
            AsyncCallsQueue._persistent_caller.close(abort=abort)
            AsyncCallsQueue._persistent_caller = None


================================================
FILE: megatron/core/dist_checkpointing/strategies/base.py
================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.

""" Strategies base interfaces. """

from abc import ABC, abstractmethod
from collections import defaultdict
from enum import Enum
from pathlib import Path
from typing import Any, DefaultDict, Union

from ..mapping import CheckpointingException, ShardedStateDict, StateDict
from .async_utils import AsyncCallsQueue, AsyncRequest


class StrategyAction(Enum):
    """Specifies save vs load and sharded vs common action."""

    LOAD_COMMON = 'load_common'
    LOAD_SHARDED = 'load_sharded'
    SAVE_COMMON = 'save_common'
    SAVE_SHARDED = 'save_sharded'


default_strategies: DefaultDict[str, dict[tuple, Any]] = defaultdict(dict)

async_calls = AsyncCallsQueue()


def get_default_strategy(action: StrategyAction, backend: str, version: int):
    """Retrieves a default strategy for a given action, backend and version."""
    error_hint: str = ""
    try:
        error_hint = ' Please use PyTorch version >=2.1'
        from .torch import register_default_torch_strategies

        register_default_torch_strategies()
    except ImportError as e:
        raise CheckpointingException(
            f'Cannot import a default strategy for: {(action.value, backend, version)}. '
            f'Error: {e}. Hint: {error_hint}'
        ) from e
    try:
        return default_strategies[action.value][(backend, version)]
    except KeyError as e:
        raise CheckpointingException(
            f'Cannot find a default strategy for: {(action.value, backend, version)}'
        ) from e


def register_default_strategy(
    action: StrategyAction,
    backend: str,
    version: int,
    strategy: Union['SaveStrategyBase', 'LoadStrategyBase'],
):
    """Adds a given strategy to the registry of default strategies.

    Args:
        action (StrategyAction): specifies save/load and sharded/common
        backend (str): backend that the strategy becomes a default for
        version (int): version that the strategy becomes a default for
        strategy (SaveStrategyBase, LoadStrategyBase): strategy to register
    """
    default_strategies[action.value][(backend, version)] = strategy


class LoadStrategyBase(ABC):
    """Base class for a load strategy. Requires implementing checks for compatibility with a
    given checkpoint version."""

    @abstractmethod
    def check_backend_compatibility(self, loaded_backend):
        """Verifies if this strategy is compatible with `loaded_backend`."""
        raise NotImplementedError

    @abstractmethod
    def check_version_compatibility(self, loaded_version):
        """Verifies if this strategy is compatible with `loaded_version`."""
        raise NotImplementedError

    @property
    def can_handle_sharded_objects(self):
        """Returns whether or not this strategy can handle loading ShardedObjects."""
        return False


class SaveStrategyBase(ABC):
    """Base class for a save strategy. Requires defining a backend type and
    version of the saved format."""

    def __init__(self, backend: str, version: int):
        self.backend = backend
        self.version = version

    @property
    def can_handle_sharded_objects(self):
        """Returns whether or not this strategy can handle saving ShardedObjects."""
        return False

    def __str__(self):
        return f'{self.__class__.__name__}({self.backend}, {self.version})'


class LoadCommonStrategy(LoadStrategyBase):
    """Load strategy for common (non-sharded) objects"""

    @abstractmethod
    def load_common(self, checkpoint_dir: Union[str, Path]):
        """Load common part of the checkpoint."""
        raise NotImplementedError

    @abstractmethod
    def load_sharded_objects(
        self, sharded_objects_state_dict: ShardedStateDict, checkpoint_dir: Union[str, Path]
    ):
        """Load sharded objects from the checkpoint."""
        raise NotImplementedError

    def load_sharded_metadata(self, checkpoint_dir: Union[str, Path]) -> ShardedStateDict:
        """Load just the metadata from the checkpoint."""
        if not self.can_handle_sharded_objects:
            return {}
        raise NotImplementedError


class LoadShardedStrategy(LoadStrategyBase):
    """Load strategy for sharded tensors"""

    @abstractmethod
    def load(self, sharded_state_dict: ShardedStateDict, checkpoint_dir: Union[str, Path]):
        """Load the sharded part of the checkpoint."""
        raise NotImplementedError

    @abstractmethod
    def load_tensors_metadata(self, checkpoint_dir: Union[str, Path]):
        """Load tensors metadata from the checkpoint for ShardedTensors.

        Returns a dictionary similar to a sharded state dict, but note that
        the dictionary keys are simply ShardedTensor keys (contrary to the
        actual sharded state dicts where keys correspond to state dict keys).

        Dict values are ShardedTensors without any data and sharding (so, the
        only useful information is tensors global shape and dtype).
        """
        raise NotImplementedError(
            f'Loading only tensors metadata not implemented for {self.__class__.__name__}'
        )

    def load_sharded_metadata(self, checkpoint_dir: Union[str, Path]):
        """Load sharded metadata from the checkpoint for ShardedTensors and ShardedObjects.

        Returns a dictionary similar to a sharded state dict, but note that
        the dictionary keys are simply sharded keys (contrary to the
        actual sharded state dicts where keys correspond to state dict keys).

        Dict values are ShardedTensors or ShardedObjects without any data and sharding.
        """
        if not self.can_handle_sharded_objects:
            return self.load_tensors_metadata(checkpoint_dir)
        raise NotImplementedError(
            f'Loading only sharded metadata not implemented for {self.__class__.__name__}'
        )

    def remove_sharded_tensors(self, checkpoint_dir: Union[str, Path], key_prefix: str):
        """Remove all tensors whose key starts with key_prefix"""
        raise NotImplementedError


class SaveCommonStrategy(SaveStrategyBase):
    """Save strategy for common (non-sharded) objects"""

    @abstractmethod
    def save_common(self, common_state_dict: StateDict, checkpoint_dir: Union[str, Path]):
        """Save common part of the state dict."""
        raise NotImplementedError

    def save_sharded_objects(
        self, sharded_objects_state_dict: ShardedStateDict, checkpoint_dir: Union[str, Path]
    ):
        """Save sharded objects from the state dict."""
        raise NotImplementedError


class SaveShardedStrategy(SaveStrategyBase):
    """Save strategy for sharded tensors"""

    @abstractmethod
    def save(self, sharded_state_dict: ShardedStateDict, checkpoint_dir: Union[str, Path]):
        """Save the sharded part of the state dict."""
        raise NotImplementedError


class AsyncSaveShardedStrategy(SaveShardedStrategy):
    """Save strategy suitable for async save."""

    @abstractmethod
    def async_save(
        self, sharded_state_dict: ShardedStateDict, checkpoint_dir: Union[str, Path]
    ) -> AsyncRequest:
        """Perform preparation and return an AsyncRequest to the external caller.

        Args:
            sharded_state_dict (ShardedStateDict): sharded state dict to save
            checkpoint_dir (Path): checkpoint target directory

        Returns:
            AsyncRequest: represents the async save function and finalization function.
                It is the caller responsibility to actually schedule the async save.
        """
        raise NotImplementedError

    def save(self, sharded_state_dict: ShardedStateDict, checkpoint_dir: Union[str, Path]):
        """Each async strategy can be trivially used as a sync strategy."""
        async_request = self.async_save(sharded_state_dict, checkpoint_dir)
        async_request.execute_sync()
        del async_request


================================================
FILE: megatron/core/dist_checkpointing/strategies/cached_metadata_filesystem_reader.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.

""" FS Reader with metadata cached support. """

import os
from typing import Dict, Union

from torch.distributed.checkpoint import FileSystemReader, Metadata


class CachedMetadataFileSystemReader(FileSystemReader):
    """
    Extends FileSystemReader to cache metadata for improved performance.

    Metadata is shared across all reader instances that use the same checkpoint
    directory (same path), since the loaded metadata is identical.

    Attributes:
        _metadata_cache (Dict[str, Metadata]): Class-level cache keyed by checkpoint path.
    """

    _metadata_cache: Dict[str, Metadata] = {}

    def __init__(self, path: Union[str, os.PathLike], cache_metadata: bool = True) -> None:
        """
        Initialize with file system path.

        Args:
            path (Union[str, os.PathLike]): Path to the checkpoint directory or file.
        """
        super().__init__(path=path)
        self._cache_key = os.path.abspath(os.fspath(path)) if cache_metadata else None

    def read_metadata(self) -> Metadata:
        """
        Read metadata from file system, caching for subsequent calls.
        Shared across instances when the checkpoint directory is the same.

        Returns:
            Metadata: Checkpoint metadata.
        """
        if self._cache_key not in CachedMetadataFileSystemReader._metadata_cache:
            CachedMetadataFileSystemReader._metadata_cache[self._cache_key] = (
                super().read_metadata()
            )
        return CachedMetadataFileSystemReader._metadata_cache[self._cache_key]

    @classmethod
    def clear_metadata_cache(cls):
        """
        Clear the metadata cache.
        """
        cls._metadata_cache.clear()


================================================
FILE: megatron/core/dist_checkpointing/strategies/checkpointable.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
from itertools import chain

import torch
from torch.distributed.checkpoint.metadata import (
    ChunkStorageMetadata,
    MetadataIndex,
    TensorProperties,
)
from torch.distributed.checkpoint.planner import TensorWriteData, WriteItem, WriteItemType

from ..mapping import ShardedTensor


class CheckpointableShardedTensor(torch.Tensor):
    """ShardedTensor extension compatible with PyTorch DCP checkpointing library.

    Implements the torch.distributed._checkpointable._Checkpointable protocol.
    """

    def __new__(cls, data: torch.Tensor, sh_ten: ShardedTensor):
        return torch.Tensor._make_wrapper_subclass(cls, torch.Size(sh_ten.global_shape))

    def __init__(self, data: torch.Tensor, sh_ten: ShardedTensor):
        self._data = data
        self._sh_ten = sh_ten

    def __create_write_items__(
        self, fqn: str, sh_ten: 'CheckpointableShardedTensor', index: int = None
    ) -> list[WriteItem]:
        """Simple translation from ShardedTensor offsets into DCP offsets.

        Args:
            fqn (str): tensor FQN.
            sh_ten (CheckpointableShardedTensor): same as `self`
            index (int): specifies index within the LocalShardsContainer.
                This is an optimization hint used in DCP.

        Returns:
            List[WriteItem]: list of DCP WriteItem metadata objects.
        """
        offsets = torch.Size(sh_ten._sh_ten.global_offset)
        global_shape = torch.Size(sh_ten._sh_ten.global_shape)
        chunk_size = torch.Size(sh_ten._sh_ten.local_shape)
        assert chunk_size == sh_ten._sh_ten.data.size()

        return [
            WriteItem(
                index=MetadataIndex(fqn, offsets, index),
                type=WriteItemType.SHARD,
                tensor_data=TensorWriteData(
                    chunk=ChunkStorageMetadata(offsets=offsets, sizes=chunk_size),
                    properties=TensorProperties.create_from_tensor(sh_ten._sh_ten.data),
                    size=global_shape,
                ),
            )
        ]

    def __create_chunk_list__(self) -> list[ChunkStorageMetadata]:
        """Simple translation from ShardedTensor offsets into DCP offsets.

        Returns:
            List[ChunkStorageMetadata]: list of DCP ChunkStorageMetadata metadata objects.
        """
        offsets = torch.Size(self._sh_ten.global_offset)
        chunk_size = torch.Size(self._sh_ten.local_shape)
        assert chunk_size == self._sh_ten.data.size()

        return [ChunkStorageMetadata(offsets=offsets, sizes=chunk_size)]

    def __get_tensor_shard__(self, index: MetadataIndex) -> torch.Tensor:
        """Trivial implementation which simply yields the underlying tensor.

        Args:
            index (MetadataIndex): unused

        Returns:
            Tensor: the underlying data tensor
        """
        return self._sh_ten.data

    @classmethod
    def from_sh_ten(cls, sh_ten: ShardedTensor) -> 'CheckpointableShardedTensor':
        """Constructor which turns a ShardedTensor into CheckpointableShardedTensor

        Args:
            sh_ten (ShardedTensor): a sharded tensor to wrap

        Returns:
            CheckpointableShardedTensor: wrapped ShardedTensor
        """
        assert isinstance(sh_ten, ShardedTensor)
        return cls(sh_ten.data, sh_ten)

    @classmethod
    def __torch_dispatch__(cls, func, types, args, kwargs=None):
        """Placeholder implementation."""
        raise NotImplementedError(
            f"{cls.__name__}.__torch_dispatch__ not implemented."
            f" {cls.__name__} shouldn't be used with Tensor operations."
        )

    def __repr__(self):
        return f'{self.__class__.__name__}({self._sh_ten.__repr__()})'


class LocalShardsContainer(torch.Tensor):
    """DCP compatible container for local shards.

    PyTorch DCP requires a single tensor per rank for a given global tensor FQN.
    This class acts as a container allowing multiple checkpointable shards per rank.

    Implements the torch.distributed._checkpointable._Checkpointable protocol.
    """

    @staticmethod
    def __new__(cls, local_shards: list[torch.Tensor]) -> "LocalShardsContainer":
        assert len(local_shards) > 0
        # This assumes local shard already has correct size info
        return torch.Tensor._make_wrapper_subclass(cls, local_shards[0].size())

    def __init__(self, local_shards: list[torch.Tensor]):
        for local_shard in local_shards:
            # this is needed only for __get_tensor_shard__
            assert isinstance(local_shard, CheckpointableShardedTensor)
        self._local_shards = local_shards

    @classmethod
    def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
        """Placeholder implementation."""
        raise NotImplementedError(
            f"{cls.__name__}.__torch_dispatch__ not implemented."
            f" {cls.__name__} shouldn't be used with Tensor operations."
        )

    def __create_write_items__(
        self, fqn: str, local_shards_cont: 'LocalShardsContainer'
    ) -> list[object]:
        """Delegates creating write items to local shards.

        Args:
            fqn (str): tensor FQN.
            local_shards_cont (LocalShardsContainer): same as `self`

        Returns:
            List[WriteItem]: list of DCP WriteItem metadata objects.
        """
        return list(
            chain.from_iterable(
                shard.__create_write_items__(fqn, shard, index=index)
                for index, shard in enumerate(local_shards_cont._local_shards)
            )
        )

    def __create_chunk_list__(self) -> list[ChunkStorageMetadata]:
        """Delegates creating chunk items to local shards.

        Returns:
            List[ChunkStorageMetadata]: list of DCP ChunkStorageMetadata metadata objects.
        """
        return list(
            chain.from_iterable(shard.__create_chunk_list__() for shard in self._local_shards)
        )

    def __get_tensor_shard__(self, index: MetadataIndex) -> torch.Tensor:
        """Performs shard matching lookup based on index hint or offset.

        Args:
            index (MetadataIndex): metadata specifying the offset of the queried shard.
                Optionally provides an index hint which speeds up the lookup.

        Returns:
            Tensor: the matching shard data tensor
        """
        if index.offset is None:
            raise ValueError(
                f"Cannot lookup {index.fqn} for a LocalShardsContainer without an offset"
            )

        shards = self._local_shards
        # index hint direct lookup
        if index.index is not None:
            if (
                len(shards) > index.index
                and torch.Size(shards[index.index]._sh_ten.global_offset) == index.offset
            ):
                return shards[index.index].__get_tensor_shard__(index)

        # slow linear search
        for shard in shards:
            if torch.Size(shard._sh_ten.global_offset) == index.offset:
                return shard.__get_tensor_shard__(index)
        raise ValueError(f"Could not find shard at '{index.offset}' for FQN: '{index.fqn}'")

    def __repr__(self):
        return f'{self.__class__.__name__}({self._local_shards.__repr__()})'


================================================
FILE: megatron/core/dist_checkpointing/strategies/common.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.

""" Common strategies. """

import logging
import os
from pathlib import Path
from typing import Union

import torch

from megatron.core.dist_checkpointing.mapping import ShardedStateDict, StateDict
from megatron.core.dist_checkpointing.strategies.base import (
    SaveCommonStrategy,
    StrategyAction,
    register_default_strategy,
)
from megatron.core.msc_utils import MultiStorageClientFeature

from ..dict_utils import dict_list_map_inplace, nested_values
from ..mapping import CheckpointingException, ShardedObject, is_main_replica
from ..strategies.base import LoadCommonStrategy

COMMON_STATE_FNAME = 'common.pt'

logger = logging.getLogger(__name__)


def register_default_common_strategies():
    """Register default common strategies."""
    register_default_strategy(StrategyAction.LOAD_COMMON, 'torch', 1, TorchCommonLoadStrategy())
    register_default_strategy(
        StrategyAction.SAVE_COMMON, 'torch', 1, TorchCommonSaveStrategy('torch', 1)
    )


class TorchCommonSaveStrategy(SaveCommonStrategy):
    """Common save strategy leveraging native torch save/load."""

    def save_common(self, common_state_dict: StateDict, checkpoint_dir: Union[str, Path]):
        """Save common part of the state dict."""
        if torch.distributed.get_rank() == 0:
            path = os.path.join(checkpoint_dir, COMMON_STATE_FNAME)
            if MultiStorageClientFeature.is_enabled():
                msc = MultiStorageClientFeature.import_package()
                msc.torch.save(common_state_dict, path)
            else:
                torch.save(common_state_dict, path)

    def save_sharded_objects(
        self, sharded_objects_state_dict: ShardedStateDict, checkpoint_dir: Union[str, Path]
    ):
        """Save sharded objects from the state dict."""
        for sh_obj in nested_values(sharded_objects_state_dict):
            if is_main_replica(sh_obj.replica_id):
                save_path = os.path.join(checkpoint_dir, f"{sh_obj.unique_key}.pt")
                parent_dir = os.path.dirname(save_path)
                if MultiStorageClientFeature.is_enabled():
                    msc = MultiStorageClientFeature.import_package()
                    msc.os.makedirs(parent_dir, exist_ok=True)
                    msc.torch.save(sh_obj.data, save_path)
                else:
                    os.makedirs(parent_dir, exist_ok=True)
                    torch.save(sh_obj.data, save_path)

    def can_handle_sharded_objects(self):
        """This strategy can handle ShardedObjects."""
        return True


class TorchCommonLoadStrategy(LoadCommonStrategy):
    """Common load strategy leveraging native torch save/load."""

    def load_common(self, checkpoint_dir: Union[str, Path]):
        """Load common (non-sharded) objects state dict from the checkpoint.

        Args:
            checkpoint_dir (Union[str, Path]): checkpoint directory

        Returns:
            StateDict: state dict with non-sharded objects from the checkpoint
        """
        load_path = os.path.join(checkpoint_dir, COMMON_STATE_FNAME)
        try:
            if MultiStorageClientFeature.is_enabled():
                msc = MultiStorageClientFeature.import_package()
                return msc.torch.load(load_path, map_location='cpu')
            else:
                return torch.load(load_path, map_location='cpu')
        except FileNotFoundError as e:
            err_msg = f'Common file {load_path} does not exist'
            if MultiStorageClientFeature.is_enabled():
                msc = MultiStorageClientFeature.import_package()
                ckpt_files = [f.name for f in msc.Path(checkpoint_dir).iterdir()]
            else:
                ckpt_files = [f.name for f in checkpoint_dir.iterdir()]
            logger.debug(f'{err_msg}. Checkpoint directory content: {ckpt_files}')
            raise CheckpointingException(err_msg) from e

    def load_sharded_objects(
        self, sharded_objects_state_dict: ShardedStateDict, checkpoint_dir: Union[str, Path]
    ):
        """Replaces all ShardedObject from a given state dict with values loaded from the
        checkpoint.

        Args:
            sharded_objects_state_dict (ShardedStateDict):
                sharded state dict defining what objects should be loaded.
            checkpoint_dir (Union[str, Path]): checkpoint directory

        Returns:
            None: sharded state dict is modified in place
        """

        def load_sharded_object(sh_obj: ShardedObject):
            sh_obj.data = None
            load_path = os.path.join(checkpoint_dir, f'{sh_obj.unique_key}.pt')
            try:
                if MultiStorageClientFeature.is_enabled():
                    msc = MultiStorageClientFeature.import_package()
                    loaded_obj = msc.torch.load(load_path)
                else:
                    loaded_obj = torch.load(load_path)
            except FileNotFoundError as e:
                # Backward compatible logic: previously the save format was incorrect
                base, _ = os.path.splitext(sh_obj.unique_key)
                old_load_path = os.path.join(checkpoint_dir, f"{base}.pt")
                try:
                    if MultiStorageClientFeature.is_enabled():
                        msc = MultiStorageClientFeature.import_package()
                        loaded_obj = msc.torch.load(old_load_path)
                    else:
                        loaded_obj = torch.load(old_load_path)
                except FileNotFoundError:
                    err_msg = f'Object shard {load_path} not found'
                    obj_subdir = os.path.join(checkpoint_dir, sh_obj.key)
                    if os.path.exists(obj_subdir):
                        obj_files = os.listdir(obj_subdir)
                        logger.debug(
                            f'{err_msg}. Object {sh_obj.key} directory content: {obj_files}'
                        )
                    else:
                        ckpt_files = os.listdir(checkpoint_dir)
                        logger.debug(
                            f'{err_msg}. Object {sh_obj.key} directory does not exist. Checkpoint'
                            f' directory content: {ckpt_files}'
                        )
                    raise CheckpointingException(err_msg) from e
            return loaded_obj

        return dict_list_map_inplace(load_sharded_object, sharded_objects_state_dict)

    def load_sharded_metadata(self, checkpoint_dir: Union[str, Path]) -> ShardedStateDict:
        if MultiStorageClientFeature.is_enabled():
            msc = MultiStorageClientFeature.import_package()
            checkpoint_dir = msc.Path(checkpoint_dir)
        else:
            checkpoint_dir = Path(checkpoint_dir)

        sharded_metadata = {}
        for subdir in checkpoint_dir.iterdir():
            if not subdir.is_dir():
                continue
            shard_files = list(subdir.glob('shard_*.pt'))
            if not shard_files:
                continue
            sh_objs = []
            for shard_file in shard_files:
                full_key = f'{subdir.name}/{shard_file.stem}'
                sh_objs.append(ShardedObject.empty_from_unique_key(full_key))

            # This is a backward-compatibility fix, where the last global shape is missing in the
            # name
            if sh_objs[0].global_shape[-1] < 0:
                max_last_offset = max(map(lambda sh_obj: sh_obj.global_offset[-1], sh_objs))
                for sh_obj in sh_objs:
                    sh_obj.global_shape = (*sh_obj.global_shape[:-1], max_last_offset + 1)

            # Update the sharded state dict
            for sh_obj in sh_objs:
                sharded_metadata[sh_obj.unique_key] = sh_obj
        return sharded_metadata

    @property
    def can_handle_sharded_objects(self):
        """This strategy can handle ShardedObjects."""
        return True

    def check_backend_compatibility(self, loaded_version):
        pass

    def check_version_compatibility(self, loaded_version):
        pass


================================================
FILE: megatron/core/dist_checkpointing/strategies/filesystem_async.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.

"""Storage writer for PyT Distributed format allowing asynchronous save."""

import dataclasses
import inspect
import logging
import os
import pickle
import queue
import threading
from functools import partial
from heapq import heappop, heappush
from itertools import chain
from operator import itemgetter
from pathlib import Path
from time import time
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import torch
from torch import multiprocessing as mp
from torch.distributed.checkpoint import FileSystemWriter
from torch.distributed.checkpoint.api import WRAPPED_EXCEPTION, _wrap_exception
from torch.distributed.checkpoint.filesystem import DEFAULT_SUFFIX, _StoragePrefix, _write_item
from torch.distributed.checkpoint.metadata import Metadata

try:
    from torch.distributed.checkpoint.filesystem import _StorageWriterTransforms
except ImportError:
    _StorageWriterTransforms = Any

from torch.distributed.checkpoint.planner import SavePlan, SavePlanner, WriteItem, WriteItemType
from torch.distributed.checkpoint.storage import WriteResult
from torch.futures import Future

from .async_utils import _disable_gc

logger = logging.getLogger(__name__)

WriteBucket = Tuple[Path, str, Tuple[list, list]]  # represents writes to a single file

try:
    import psutil

    HAVE_PSUTIL = True
except ImportError:
    HAVE_PSUTIL = False

_results_queue = None


@_disable_gc()
def get_write_results_queue(mp_mode: str = 'spawn') -> mp.Queue:
    """Get or create a multiprocessing queue for write results.

    Args:
        mp_mode (str): Multiprocessing context mode. Defaults to 'spawn'.

    Returns:
        mp.Queue: Queue for collecting write results.
    """
    global _results_queue
    if _results_queue is None:
        ctx = mp.get_context(mp_mode)
        _results_queue = ctx.Manager().Queue()
    return _results_queue


class FileSystemWriterAsync(FileSystemWriter):
    """
    Async-enabled implementation of FileSystemWriter using file I/O.

    This class does not spawn the async process itself but relies on an external async mechanism.

    **Flow:**

    1. Call `write_data`
    2. Externally start an async process with `get_save_function_and_args` and its arguments.
    3. The async function `writer_proxy_func` calls `write_preloaded_data` across multiple
        threads (no child processes).
    4. Once saving is finalized on all ranks, call `super().finish` with the results stored
        in `self.writer_result`.

    **Note:** Step (3) can also be executed synchronously.

    Currently, it is assumed that a separate writer is created for each ckpt save
    (intermediate state is stored as writer attributes).
    """

    def __init__(
        self,
        path: Union[str, os.PathLike],
        *args,
        separation_hint: Optional[str] = None,
        use_msc: bool = False,
        **kwargs,
    ):
        self.checkpoint_dir = path
        self.use_msc = use_msc

        super().__init__(path, *args, **kwargs)
        if not self.single_file_per_rank:
            raise NotImplementedError(
                "single_file_per_rank flag not supported for FileSystemWriterAsync"
            )

        self.can_run_decentralized_global_plan: bool = True

        # Intermediate state between preparation and finalization
        self.write_buckets: Optional[List[WriteBucket]] = None
        self.results_queue: Optional[mp.Queue] = None
        self.separation_hint = separation_hint

    def prepare_write_data(self, plan: SavePlan, planner: SavePlanner) -> None:
        """
        First stage of async saving. Copy data to CPU and plan the local saving.

        Args:
            plan (SavePlan): save plan generated by the PyT Distributed compatible planner
            planner (SavePlanner): save planner used to resolve the bytes and tensor data

        Returns: None, but stores the save plan in `self.write_buckets`
        """
        storage_plan: _StoragePrefix = plan.storage_data
        start = time()
        logger.debug(f"thread_count: {self.thread_count}, time: {start}")
        if self.separation_hint:
            assert (
                self.thread_count > 1
            ), "thread_count must be at least 2 if separation_hint is provided"
        bins = self.thread_count // 2 if self.separation_hint is not None else self.thread_count
        item_buckets = _split_by_size_and_type(bins, plan.items)
        logger.debug(f"bucket_prep, time: {time() - start}")

        start = time()
        # move tensors from GPU to CPU before starting async writing
        # We do D2H synchronously for now
        file_count = 0

        def gen_file(prefix=""):
            nonlocal file_count
            file_name = f"{prefix}{storage_plan.prefix}{file_count}{DEFAULT_SUFFIX}"
            file_count += 1
            return file_name

        def _clone_if_needed(ten: torch.Tensor):
            """Clone if we detect incontiguous storage for CPU tensors

            Makes sure we perform a `clone` only if we detect incontiguous storage,
            so that we don't blow up host memory unnecessarily.

            TODO: For persistent worker, this work should be changed to move the cpu tensor
            to shared_memory.
            """
            ten = ten.detach()
            if ten.device.type != "cpu":
                # We do D2H later when the async_request is scheduled for both sync / async
                # checkpointing
                return ten
            is_view = ten.untyped_storage().size() != ten.numel() * ten.itemsize
            return ten.clone() if is_view else ten

        # Prepare bytes / tensor data in each bucket, which will be assigned to each writer thread
        self.write_buckets = []
        for group_name, group_buckets in _split_by_separation_hint(
            item_buckets, self.separation_hint
        ).items():
            for bucket in group_buckets:
                bytes_data = [
                    (item, planner.resolve_data(item))
                    for item in bucket
                    if item.type == WriteItemType.BYTE_IO
                ]
                tensor_data = [
                    (item, _clone_if_needed(planner.resolve_data(item)))
                    for item in bucket
                    if item.type != WriteItemType.BYTE_IO
                ]
                if len(bytes_data) > 0 or len(tensor_data) > 0:
                    file_name = gen_file(prefix=group_name)
                    self.write_buckets.append(
                        (  # type: ignore[arg-type]
                            os.path.join(self.checkpoint_dir, file_name),
                            file_name,
                            (bytes_data, tensor_data),
                        )
                    )

        # Check if there is anything to write on this rank
        if len(self.write_buckets) > 0:
            assert len(self.write_buckets) <= self.thread_count, (
                len(self.write_buckets),
                self.thread_count,
            )
            self.results_queue = get_write_results_queue()
        else:
            self.results_queue = None
        end = time()
        logger.debug(f"D2H and push, time: {end - start}")

    def get_save_function_and_args(self) -> Tuple[Optional[Callable], Optional[Callable], List]:
        """
        Get function that saves the data to storage along with its arguments.
        Allows the external caller to apply the save function synchronously or asynchronously.

        Returns: None (if there is nothing to write on this rank) or a tuple of:
            1) the function that saves the data.
            2) the function that stages the GPU tensors to a destination for async checkpointing.
               This function should be self-contained.
            3) arguments to that function in 1).
        """
        if not self.write_buckets:
            return None, None, []
        transform_list = [self.transforms] if hasattr(self, "transforms") else []
        return (
            partial(self.write_preloaded_data_multithread, transform_list, self.use_msc),
            partial(self.preload_tensors, self.write_buckets, True),
            [torch.distributed.get_rank(), self.write_buckets, self.results_queue],
        )

    @staticmethod
    def preload_tensors(write_buckets: List[WriteBucket], non_blocking=True) -> List[WriteBucket]:
        """
        Preloads tensors in `state_dict` to host memory via CPU memory.

        Args:
            write_buckets (List): List of `WriteBucket` objects that define what to
                save in a checkpoint.
            non_blocking (bool, optional): knob to enable pinned D2H memcpy. Default is True.
        """
        result = []

        for bucket in write_buckets:
            file_name, storage_key, (bytes_data, tensor_data) = bucket
            tensor_list = []
            for item, tensor in tensor_data:
                # we belive these tensors are detached from the model trainers
                tensor_list.append((item, tensor.to("cpu", non_blocking=non_blocking)))
                # This is required for `PersistentAsyncCaller` to remove reference
                del tensor
            result.append((file_name, storage_key, (bytes_data, tensor_list)))
        if non_blocking:
            torch.cuda.synchronize()
        return result

    @staticmethod
    @_disable_gc()
    def write_preloaded_data_multithread(
        transform_list: List[_StorageWriterTransforms],
        use_msc: bool,
        rank: int,
        write_buckets: List[WriteBucket],
        global_results_queue: mp.Queue,
    ) -> None:
        """
        Performs saving data to storage with multiple threads.

        Uses threads (not processes) so that this can run safely inside a daemon process
        without spawning child processes. Uses two queues:
        - local_results_queue - to collect write results from worker threads
        - count_queue - to signal worker completion (task_done/join).

        Triggering GC during execution can lead to CUDA errors when tensors are shared.
        To prevent this, we disable the GC explicitly for this function with _disable_gc.

        Args:
            write_buckets (List[WriteBucket]): write plan
            global_results_queue (mp.Queue): queue to send Dict[List[WriteResults]]
                (or an Exception) back to the main training process
        Returns: None
        """
        logger = logging.getLogger(__name__)
        w_start = time()
        write_results_or_exc: Union[dict, Exception] = dict()
        local_results_queue: queue.Queue = queue.Queue()
        count_queue: queue.Queue = queue.Queue()
        thread_list: List[threading.Thread] = []

        def check_local_output(local_results_or_exc, local_worker_idx):
            if isinstance(local_results_or_exc, Exception):
                err_msg = (
                    f"Local worker {local_worker_idx} encountered"
                    f" an error: {local_results_or_exc}"
                )
                logger.error(err_msg)
            assert isinstance(local_results_or_exc, list), type(local_results_or_exc)

        for i, write_bucket in enumerate(write_buckets):
            try:
                kwargs = {
                    "local_proc_idx": i,
                    "write_bucket": write_bucket,
                    "results_queue": local_results_queue,
                    "count_queue": count_queue,
                    "use_fsync": True,
                }

                if use_msc:
                    signature = inspect.signature(FileSystemWriterAsync.write_preloaded_data)
                    if len(signature.parameters) > 6:
                        kwargs['use_msc'] = use_msc
                # Parallel writers: spawn threads for all but the last bucket
                if i < len(write_buckets) - 1:
                    count_queue.put(i)
                    t = threading.Thread(
                        target=partial(FileSystemWriterAsync.write_preloaded_data, transform_list),
                        kwargs=kwargs,
                    )
                    thread_list.append(t)
                else:
                    kwargs['count_queue'] = None
                    kwargs['results_queue'] = None
                    logger.debug('FileSystemWriterAsync: main worker started')
                    local_output = FileSystemWriterAsync.write_preloaded_data(
                        transform_list, **kwargs
                    )
                    if local_output is not None:
                        logger.debug(
                            'FileSystemWriterAsync: main worker results successfully collected'
                        )
                        check_local_output(local_output[1], local_output[0])
                        write_results_or_exc[local_output[0]] = local_output[1]

            except Exception as e:
                err_msg = f"An error is caught while starting worker {i}, error: {e}"
                logger.error(err_msg)
                write_results_or_exc = RuntimeError(err_msg)

        if not isinstance(write_results_or_exc, Exception) and len(thread_list) > 0:
            for t in thread_list:
                t.start()

            logger.debug("FileSystemWriterAsync: collecting worker results...")

            count_queue.join()
            for _ in range(len(write_buckets) - 1):
                try:
                    local_proc_idx, local_results_or_exc = local_results_queue.get()
                except queue.Empty:
                    write_results_or_exc = RuntimeError(
                        "Unexpected empty `local_results_queue`"
                        f" (expected {len(write_buckets) - 1} items)"
                    )
                    break
                else:
                    check_local_output(local_results_or_exc, local_proc_idx)
                    write_results_or_exc[local_proc_idx] = local_results_or_exc
            for t in thread_list:
                t.join()
            logger.debug('FileSystemWriterAsync: collected worker results successfully')

        global_results_queue.put(write_results_or_exc)

        w_end = time()
        logger.debug(f"{w_end}, rank: {rank}, write(sync,threads): {w_end - w_start}")

    @staticmethod
    @_disable_gc()
    def write_preloaded_data(
        transform_list: List[_StorageWriterTransforms],
        local_proc_idx: int,
        write_bucket: WriteBucket,
        results_queue: Optional[queue.Queue],
        count_queue: Optional[queue.Queue],
        use_fsync: bool,
        **kwargs,
    ) -> Union[Tuple[int, Exception], None]:
        """
        Performs actual data saving to storage (used by worker threads).

        Args:
            local_proc_idx (int): index of the worker that performs writing
            write_bucket (WriteBucket): data to write to storage
            results_queue (queue.Queue): queue to return the write results
            count_queue (queue.Queue): queue to signal worker task completion (get + task_done)
            use_fsync (bool): if True, calls os.fsync at the end of saving

        Returns: None when running in a worker (results put in queue); result tuple when main worker
        """
        logger = logging.getLogger(__name__)
        logger.debug(f"{local_proc_idx} started")
        mem_before = _process_memory()
        use_msc = kwargs.get("use_msc", False)

        local_results = []
        try:
            file_name, storage_key, (bytes_data, tensor_data) = write_bucket
            extra_kwargs = {}
            if "serialization_format" in inspect.signature(_write_item).parameters:
                from torch.distributed.checkpoint.filesystem import SerializationFormat

                extra_kwargs["serialization_format"] = SerializationFormat.TORCH_SAVE
            if use_msc:
                import multistorageclient as msc

                open_file = msc.open
            else:
                open_file = open
            with open_file(file_name, "wb") as stream:
                for write_item, data in bytes_data:
                    local_results.append(
                        _write_item(
                            *transform_list, stream, data, write_item, storage_key, **extra_kwargs
                        )
                    )

                for write_item, tensor in tensor_data:
                    assert tensor.is_cpu
                    local_results.append(
                        _write_item(
                            *transform_list, stream, tensor, write_item, storage_key, **extra_kwargs
                        )
                    )

                if use_fsync:
                    if use_msc:
                        stream.fsync()
                    else:
                        os.fsync(stream.fileno())
            local_output = (local_proc_idx, local_results)
        except Exception as e:
            logger.debug(f"{local_proc_idx} failed")
            local_output = (local_proc_idx, e)  # type: ignore[assignment]
        if results_queue is not None:
            results_queue.put(local_output)
        if count_queue is not None:
            # Signal this process is done.
            count_queue.get()
            count_queue.task_done()

        mem_after = _process_memory()
        logger.debug(
            f"{local_proc_idx} consumed: {mem_after - mem_before},"
            f" before: {mem_before}, after: {mem_after}"
        )
        return local_output

    def write_data(self, plan: SavePlan, planner: SavePlanner) -> Future[List[WriteResult]]:
        """Write all items from ``plan``."""
        raise NotImplementedError("write_data not implemented for FileSystemWriterAsync")

    def retrieve_write_results(self) -> Union[List[WriteResult], WRAPPED_EXCEPTION]:
        """
        Turn the latest dict including write results from `self.results_queue`
            into a single results lists. Includes error check.

        Returns (Union(List[WriteResult], WRAPPED_EXCEPTION): the list of write results
            from all local workers (threads) performing the save, or a WRAPPED_EXCEPTION if
            an exception was raised during the writing process.
        """
        assert self.write_buckets is not None

        if self.results_queue is None:
            write_results_or_exc = {}
        else:
            try:
                write_results_or_exc = self.results_queue.get_nowait()
            except queue.Empty:
                return _wrap_exception(RuntimeError("results_queue should not be empty"))

        if isinstance(write_results_or_exc, Exception):
            try:
                raise RuntimeError(
                    f"Worker failure: {write_results_or_exc}"
                ) from write_results_or_exc
            except Exception as e:
                return _wrap_exception(e)
        write_results: dict = write_results_or_exc
        if len(write_results) != len(self.write_buckets):
            return _wrap_exception(
                RuntimeError(
                    f"Incomplete worker results (expected {len(self.write_buckets)},"
                    f" got {len(write_results)}. This probably indicates a worker failure."
                )
            )
        return list(chain.from_iterable(write_results.values()))

    def prepare_decentralized_global_plan(self, local_plan: SavePlan) -> SavePlan:
        """Instead of assigning indices by plan order, uses PyT rank (same outcome).

        Args:
            local_plan (SavePlan): local plan to turn to a global plan
                (without interactions with other ranks)

        Returns:
            SavePlan - locally transformed plan equivalent to the plan that would be
                created by the coordinator
        """
        return dataclasses.replace(
            local_plan, storage_data=_StoragePrefix(f"__{torch.distributed.get_rank()}_")
        )

    def finish(self, metadata: Metadata, results: List[List[WriteResult]]) -> None:
        """
        Finish the checkpointing process.

        Args:
            metadata (Metadata): metadata to save
            results (List[List[WriteResult]]): results to save
        """
        if self.use_msc:
            import multistorageclient as msc

            storage_md = dict()
            for wr_list in results:
                storage_md.update({wr.index: wr.storage_data for wr in wr_list})

            metadata.storage_data = storage_md
            metadata.storage_meta = self.storage_meta()

            path = os.path.join(self.checkpoint_dir, ".metadata")

            with msc.open(path, "wb") as metadata_file:
                pickle.dump(metadata, metadata_file)
        else:
            super().finish(metadata, results)

    def prepare_local_plan(self, plan: SavePlan) -> SavePlan:
        """
        Prepare the local plan for the checkpointing process.
        """
        if self.use_msc:
            import multistorageclient as msc

            msc.os.makedirs(str(self.checkpoint_dir), exist_ok=True)
        else:
            super().prepare_local_plan(plan)

        return plan

    @property
    def checkpoint_id(self) -> Union[str, os.PathLike]:
        """
        return the checkpoint_id that will be used to save the checkpoint.
        """
        return str(self.checkpoint_dir)

    @classmethod
    def validate_checkpoint_id(cls, checkpoint_id: Union[str, os.PathLike]) -> bool:
        """
        Validate the checkpoint_id that will be used to save the checkpoint.

        This method is available in PyTorch 2.3 and above.
        """
        if checkpoint_id.startswith("msc://"):
            return True

        if hasattr(FileSystemWriter, "validate_checkpoint_id"):
            return FileSystemWriter.validate_checkpoint_id(checkpoint_id)

        return False


def _split_by_size_and_type(bins: int, items: List[WriteItem]) -> List[List[WriteItem]]:
    """
    Splits write items according to item size into close to uniform bins.

    Same as torch.distributed.checkpoint.filesystem._split_by_size_and_type,
    but with a fixed _item_size function.

    Args:
        bins (int): numbers of bins to split to
        items (List[WriteItem]): list of write items

    Returns (List[List[WriteItem]]): write items split to bins
    """
    if bins == 1:
        return [items]

    bytes_items: List[WriteItem] = []
    tensor_items: List[WriteItem] = []
    for wi in items:
        container = bytes_items if wi.type == WriteItemType.BYTE_IO else tensor_items
        container.append(wi)

    buckets: List[List[WriteItem]] = [[] for _ in range(bins)]
    bucket_sizes = [0 for _ in range(bins)]

    # Assign bytes with a simple round-robin
    for i, item in enumerate(bytes_items):
        buckets[i % bins].append(item)

    # Sort tensor items by size in decreasing order once and store the size with item
    sized_tensors = [(item, _item_size(item)) for item in tensor_items]
    sized_tensors.sort(key=itemgetter(1), reverse=True)

    # Use a min heap for bin assignment
    # Store (total_size_of_bin, bin_index) tuples
    heap: List[Tuple[int, int]] = [(0, i) for i in range(bins)]

    # Assign tensors using heap
    for item, size in sized_tensors:
        total_bin_size, bin_idx = heappop(heap)
        buckets[bin_idx].append(item)
        heappush(heap, (total_bin_size + size, bin_idx))

    return buckets


def _split_by_separation_hint(
    buckets: List[List[WriteItem]], separation_hint: Optional[str] = None
) -> Dict[str, List[List[WriteItem]]]:
    """
    Splits buckets into those whose keys begin with the separation_hint and those whose keys do not

    Args:
        buckets (List[List[WriteItem]]): buckets to split
        separation_hint (Optional[str]): optional prefix to split on

    Returns (Dict[str, List[List[WriteItem]]]): a dictionary
        mapping the prefix to the relevant buckets
    """
    bins = len(buckets)
    buckets_with_separation_hint = {}
    if separation_hint is not None:
        buckets_default = [[] for _ in range(bins)]
        buckets_hint = [[] for _ in range(bins)]
        for i in range(bins):
            for item in buckets[i]:
                if item.index.fqn.startswith(separation_hint):
                    buckets_hint[i].append(item)
                else:
                    buckets_default[i].append(item)
        buckets_with_separation_hint[""] = buckets_default
        buckets_with_separation_hint[separation_hint] = buckets_hint
    else:
        buckets_with_separation_hint[""] = buckets
    return buckets_with_separation_hint


def _item_size(item: WriteItem) -> int:
    """
    Calculates size (in bytes) of a single write item.

    Same as torch.distributed.checkpoint.filesystem._item_size,
    but fixes computing chunk size (with item.tensor_data.chunk.sizes)

    Args:
        item (WriteItem): write item to compute the size of

    Returns (int): size of an item in bytes
    """
    size = 1
    assert item.tensor_data is not None
    # can't use math.prod as PT needs to support older python
    for s in item.tensor_data.chunk.sizes:
        size *= s

    dtype = item.tensor_data.properties.dtype
    return size * torch._utils._element_size(dtype)


def _process_memory() -> int:
    """
    Get memory used by current process.

    Returns (int): memory used by current process
    """
    if not HAVE_PSUTIL:
        raise RuntimeError("psutil is not installed, please install it with `pip install psutil`")
    process = psutil.Process(os.getpid())
    mem_info = process.memory_info()
    return mem_info.rss


================================================
FILE: megatron/core/dist_checkpointing/strategies/fully_parallel.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import logging
from pathlib import Path
from time import time
from typing import Any, Callable, Dict, Optional, Tuple, TypeVar

import torch
import torch.distributed as dist
from torch.distributed.checkpoint import Metadata

from megatron.core.dist_checkpointing import ShardedObject, ShardedTensor
from megatron.core.dist_checkpointing.core import CheckpointingException
from megatron.core.dist_checkpointing.dict_utils import (
    dict_list_map_inplace,
    extract_matching_values,
    merge,
    nested_values,
)
from megatron.core.dist_checkpointing.exchange_utils import (
    ShardDistribution,
    determine_main_replica_uniform_distribution,
    exchange_by_distribution,
    exchange_loaded_objects_gather_object,
)
from megatron.core.dist_checkpointing.mapping import ShardedStateDict, StateDict, is_main_replica
from megatron.core.dist_checkpointing.strategies.base import (
    AsyncSaveShardedStrategy,
    LoadShardedStrategy,
    SaveShardedStrategy,
)
from megatron.core.dist_checkpointing.utils import (
    _sharded_object_id,
    _sharded_tensor_shard_id,
    _ShardId,
    debug_time,
)
from megatron.core.dist_checkpointing.validation import (
    determine_global_metadata,
    validate_sharding_integrity,
)
from megatron.core.utils import get_pg_rank, get_pg_size

logger = logging.getLogger(__name__)

T = TypeVar('T', ShardedObject, ShardedTensor)


class FullyParallelSaveStrategyWrapper(AsyncSaveShardedStrategy):
    """Wraps arbitrary strategy and distributes the save during `save`.

    The save distribution happens without any *data* communication.
    Only the *metadata* is exchanged and based on data replication on different
    ranks, we try to distribute the save as uniformly as possible.

    This wrapper assumes, that setting `replica_id` to 0 will make the
    underlying strategy do the saving on current rank. All the other `replica_id`s
    are set to 1.

    Currently, the save distribution is realized with a greedy algorithm
    described in `distribute_shards_to_ranks`.

    Args:
        strategy (SaveShardedStrategy): base strategy to wrap
        parallelization_group (ProcessGroup, optional): process group to use for save
            distribution. Note that this doesn't have to match exactly the
            data distribution, but should cover the replication pattern
            to maximize performance. Defaults to the whole world.
        do_cache_distribution (bool, optional): whether to cache the save distribution
            from previous calls. Should be set to True only if the state dict
            structure between the calls is always the same. Defaults to True.
    """

    def __init__(
        self,
        strategy: SaveShardedStrategy,
        parallelization_group: Optional[torch.distributed.ProcessGroup] = None,
        do_cache_distribution: bool = False,
    ):
        super().__init__(strategy.backend, strategy.version)
        self.base_strategy = strategy
        if parallelization_group is None:
            parallelization_group = torch.distributed.group.WORLD
        self.parallelization_group = parallelization_group
        self.do_cache_distribution = do_cache_distribution

        self.cached_distribution: Optional[ShardDistribution] = None

    def async_save(self, sharded_state_dict: ShardedStateDict, checkpoint_dir: Path):
        if not isinstance(self.base_strategy, AsyncSaveShardedStrategy):
            raise CheckpointingException(
                f'Cannot apply async_save to non-async base strategy {self.base_strategy}'
            )
        self.apply_saving_parallelization(sharded_state_dict)
        return self.base_strategy.async_save(sharded_state_dict, checkpoint_dir)

    def save(self, sharded_state_dict: ShardedStateDict, checkpoint_dir: Path):
        self.apply_saving_parallelization(sharded_state_dict)
        return self.base_strategy.save(sharded_state_dict, checkpoint_dir)

    def apply_saving_parallelization(self, sharded_state_dict: ShardedStateDict) -> None:
        """Distributes the save across ranks by exchanging metadata.

        Exchanges metadata from the state dict and computes the uniform
        (as close as possible) distribution of saves among the ranks.

        If `self.do_cache_distribution` is True, caches the distribution between
        the calls and subsequent distributions happen without any inter-rank
        communication.

        Args:
            sharded_state_dict (ShardedStateDict): state dict to distribute the saving

        Returns: None
        """
        start = time()
        if self.do_cache_distribution and self.cached_distribution is not None:
            logger.debug(f'Apply *cached* save parallelization')
            precomputed_distribution = self.cached_distribution
        else:
            logger.debug(f'Apply save parallelization')
            precomputed_distribution = determine_main_replica_uniform_distribution(
                sharded_state_dict, self.parallelization_group
            )

        distribute_main_replicas_with_precomputed_distribution(
            sharded_state_dict, self.parallelization_group, precomputed_distribution
        )
        if self.cached_distribution is None:
            # First time applying the parallelization
            validate_sharding_integrity(determine_global_metadata(sharded_state_dict)[1])
        if self.do_cache_distribution:
            self.cached_distribution = precomputed_distribution
        end = time()
        logger.debug(f"parallel save sharding, time: {end - start}")

    @property
    def can_handle_sharded_objects(self):
        return self.base_strategy.can_handle_sharded_objects


class FullyParallelLoadStrategyWrapper(LoadShardedStrategy):
    """Wraps arbitrary load strategy and distributes the load during `load`.

    See `load` method docs for details.

    Args:
        strategy (LoadShardedStrategy): base strategy to wrap
        parallelization_group (ProcessGroup, optional): process group to use for load
            distribution. Note that this doesn't have to match exactly the
            data distribution, but should cover the replication pattern
            to maximize performance. Defaults to the whole world.
            In most cases, it's recommended to set it to the DP group.
        do_cache_distribution (bool, optional): whether to cache the load distribution
            from previous calls. Should be set to True only if the state dict
            structure between the calls is always the same. Defaults to False,
            since the loading in general happens only once during training.
            Note that the load distribution *cannot* be reused as a save distribution,
            because save/load is not fully symmetrical.
        exchange_algo (str): algorithm to use for exchanging the data.
            Options:
            - broadcast - each rank broadcasts individual tensors to others
            - gather_object (default) - ranks all_gather_object the whole loaded state dicts
            - gather_rounds (default) - ranks all gather individual tensors in rounds
            See method docs for more details.
    """

    def __init__(
        self,
        strategy: LoadShardedStrategy,
        parallelization_group: Optional[torch.distributed.ProcessGroup] = None,
        do_cache_distribution: bool = False,
        exchange_algo: str = 'broadcast',
    ):
        super().__init__()
        self.base_strategy = strategy
        if parallelization_group is None:
            parallelization_group = (
                dist.GroupMember.WORLD
            )  # explicit group needed for torch.distributed.get_global_rank call
        self.parallelization_group = parallelization_group
        self.do_cache_distribution = do_cache_distribution
        self.exchange_algo = exchange_algo

        self.cached_distribution: Optional[ShardDistribution] = None
        self.cached_global_metadata: Optional[Metadata] = None

    @debug_time("FullyParallelLoadStrategyWrapper.load", logger)
    def load(self, sharded_state_dict: ShardedStateDict, checkpoint_dir: Path) -> StateDict:
        """Distributes the load and calls underlying strategy only for parts of the state dict.

        Steps:
        1. Load metadata is exchanged between the ranks in the parallelization group.
        2. Each rank deterministically plans the load for the whole workload
            so that the loads are as uniform as possible.
        3. Each ranks loads its planned shard of the checkpoint.
        4. All ranks exchange the loaded shards.

        Internode communication is involved in steps (1) (with metadata)
        and (4) (with actual data). Storage interaction is involved in step (3).

        Currently, the load distribution (step 2) is realized with a greedy algorithm
        described in `distribute_shards_to_ranks` (same as for saving distribution).

        Currently, the shards are all gathered between all ranks in the parallelization
        group. This might not be optimal (some ranks do not need all tensors),
        but it's a reasonable approximation for an optimal exchange in most scenarios.

        Args:
            sharded_state_dict (ShardedStateDict): sharded state dict to load
            checkpoint_dir (Path): checkpoint directory to load from

        Returns:
            StateDict: loaded state dict. The state dict should be equivalent to
            a state dict that would be loaded with the underlying strategy
            without this wrapper.
        """

        loaded_state_dict = {}

        if get_pg_size(self.parallelization_group) <= 1:
            return self.base_strategy.load(sharded_state_dict, checkpoint_dir)

        # Step 1 and 2: exchange load metadata and distribute the load
        with debug_time("self.apply_loading_parallelization", logger):
            precomputed_distribution: ShardDistribution | None = self.apply_loading_parallelization(
                sharded_state_dict
            )
            assert (
                precomputed_distribution is not None
            ), 'Expecting non-trivial distribution for non-trivial parallelization group'

        # Step 3: load part of the checkpoint.
        # Load only sharded objects first. ShardedTensors will be loaded separately
        # so that we can keep track of sharded tensors loaded by this rank
        (sharded_tensors, sharded_state_dict, to_load_shards, unloaded_shards) = (
            self._defer_loading_sharded_tensors(sharded_state_dict)
        )

        (sharded_objects, sharded_state_dict, to_load_objects, unloaded_objects) = (
            self._defer_loading_sharded_objects(sharded_state_dict)
        )

        assert (
            len(sharded_state_dict) == 0
        ), "sharded_state_dict is not empty after deferring tensors and objects"
        with debug_time("base_load_ShardedObjects", logger):
            # Load sharded objects first
            loaded_objects = self.base_strategy.load(to_load_objects, checkpoint_dir)

        with debug_time("base_load_ShardedTensors", logger):
            # Load sharded tensors separately
            loaded_tensors = self.base_strategy.load(to_load_shards, checkpoint_dir)

        with debug_time("self.exchange_loaded_tensors", logger):

            # Step 4: exchange data between ranks
            logger.debug(f'Applying parallel load with algo {self.exchange_algo}')
            all_loaded_tensors = exchange_by_distribution(
                loaded_tensors,
                unloaded_shards,
                precomputed_distribution,
                self.parallelization_group,
                self.exchange_algo,
            )
            if not set(unloaded_shards.keys()).issubset(all_loaded_tensors.keys()):
                missing_shards = set(unloaded_shards.keys()) - all_loaded_tensors.keys()
                raise CheckpointingException(
                    f'Missing shards after fully parallel loading: {missing_shards}'
                )

            with debug_time("torch.cuda.synchronize", logger):
                torch.cuda.synchronize()

        all_loaded_objects = exchange_loaded_objects_gather_object(loaded_objects)

        if not set(unloaded_objects.keys()).issubset(all_loaded_objects.keys()):
            missing_object_shards = set(unloaded_objects.keys()) - all_loaded_objects.keys()
            raise CheckpointingException(
                f'Missing object shards after fully parallel loading: {missing_object_shards}'
            )
        torch.cuda.synchronize()

        self.fill_in_deferred_sharded_tensors(sharded_tensors, all_loaded_tensors)
        self.fill_in_deferred_sharded_objects(sharded_objects, all_loaded_objects)

        merge(loaded_state_dict, sharded_objects)
        merge(loaded_state_dict, sharded_tensors)
        if hasattr(self.base_strategy, "cached_global_metadata"):
            self.cached_global_metadata = self.base_strategy.cached_global_metadata
        return loaded_state_dict

    @staticmethod
    def _defer_loading_sharded_objects(
        sharded_state_dict: ShardedStateDict,
    ) -> Tuple[
        ShardedStateDict,
        ShardedStateDict,
        Dict[_ShardId, ShardedObject],
        Dict[_ShardId, ShardedObject],
    ]:
        return _defer_loading_sharded_items(sharded_state_dict, ShardedObject, _sharded_object_id)

    @staticmethod
    def _defer_loading_sharded_tensors(
        sharded_state_dict: ShardedStateDict,
    ) -> Tuple[
        ShardedStateDict,
        ShardedStateDict,
        Dict[_ShardId, ShardedTensor],
        Dict[_ShardId, ShardedTensor],
    ]:
        return _defer_loading_sharded_items(
            sharded_state_dict, ShardedTensor, _sharded_tensor_shard_id
        )

    @staticmethod
    def fill_in_deferred_sharded_objects(
        sharded_state_dict: ShardedStateDict, loaded_objects: Dict[_ShardId, Any]
    ) -> None:
        """Fill in objects not loaded by current rank with objects from `loaded_objects` map.

        Args:
            sharded_state_dict (ShardedStateDict): sharded state dict to fill in.
                ShardedObjects are completely replaced with corresponding objects.
            loaded_objects (Dict[_ShardId, Any]): dict allowing to map
                ShardedObject from the sharded_state_dict to loaded objects.

        Returns:
            None
        """
        _fill_in_deferred_sharded_items(
            sharded_state_dict, loaded_objects, ShardedObject, _sharded_object_id
        )

    @staticmethod
    def fill_in_deferred_sharded_tensors(
        sharded_state_dict: ShardedStateDict, loaded_tensors: Dict[_ShardId, torch.Tensor]
    ) -> None:
        """Fill in tensors not loaded by current rank with tensors from `loaded_tensors` map.

        Args:
            sharded_state_dict (ShardedStateDict): sharded state dict to fill in.
                ShardedTensors are completely replaced with corresponding torch.Tensors.
            loaded_tensors (Dict[_ShardId, torch.Tensor]): dict allowing to map
                ShardedTensor from the sharded_state_dict to loaded tensors.

        Returns:
            None
        """
        _fill_in_deferred_sharded_items(
            sharded_state_dict, loaded_tensors, ShardedTensor, _sharded_tensor_shard_id
        )

    def apply_loading_parallelization(
        self, sharded_state_dict: ShardedStateDict
    ) -> Optional[ShardDistribution]:
        """Distributes the load across ranks by exchanging metadata.

        Exchanges metadata from the state dict and computes the uniform
        (as close as possible) distribution of loads among the ranks.
        Marks ShardedTensors to be loaded by the current rank with replica_id 0
        (and others with non 0 values).

        If `self.do_cache_distribution` is True, caches the distribution between
        the calls and subsequent distributions happen without any inter-rank
        communication.

        Args:
            sharded_state_dict (ShardedStateDict): state dict to distribute the loading

        Returns:
            ShardDistribution (optional): the computed loading distribution
        """
        if self.do_cache_distribution and self.cached_distribution is not None:
            logger.debug(f'Apply *cached* load parallelization')
            precomputed_distribution = self.cached_distribution
        else:
            logger.debug(f'Apply load parallelization')
            precomputed_distribution = determine_main_replica_uniform_distribution(
                sharded_state_dict, self.parallelization_group, True
            )

        distribute_main_replicas_with_precomputed_distribution(
            sharded_state_dict, self.parallelization_group, precomputed_distribution
        )
        if self.do_cache_distribution:
            self.cached_distribution = precomputed_distribution

        return precomputed_distribution

    @property
    def can_handle_sharded_objects(self):
        return self.base_strategy.can_handle_sharded_objects

    def load_tensors_metadata(self, checkpoint_dir: Path):
        return self.base_strategy.load_tensors_metadata(checkpoint_dir)

    def load_sharded_metadata(self, checkpoint_dir: Path):
        return self.base_strategy.load_sharded_metadata(checkpoint_dir)

    def check_backend_compatibility(self, loaded_version):
        return self.base_strategy.check_backend_compatibility(loaded_version)

    def check_version_compatibility(self, loaded_version):
        return self.base_strategy.check_version_compatibility(loaded_version)


def distribute_main_replicas_with_precomputed_distribution(
    sharded_state_dict: ShardedStateDict,
    parallelization_group: torch.distributed.ProcessGroup,
    precomputed_distribution: Optional[ShardDistribution],
):
    """Applies the save distribution computed with `determine_main_replica_uniform_distribution`.

    Based on rank assignment, sets replica ids of the shards saved by current rank to 0
    and all the other replica ids to 1.

    Args:
        sharded_state_dict (ShardedStateDict): state dict to apply the save distribution to
        parallelization_group (ProcessGroup): distribution will be applied within this
            process group. Must match with the process group passed to
            `determine_main_replica_uniform_distribution`.
        precomputed_distribution (ShardDistribution): distribution computed with
            `determine_main_replica_uniform_distribution`

    Returns: None

    Example replica ids of tensors A, B, C before distribution:
    rank0: A: (0, 0, 0), B: (0, 0, 0), C: (0, 0, 0)
    rank1: A: (0, 0, 1), B: (0, 0, 1), C: (0, 0, 1)
    rank2: A: (0, 0, 2), B: (0, 0, 2), C: (0, 0, 2)

    Replicas after distribution for the example above:
    rank0: A: 0, B: 1, C: 1
    rank1: A: 1, B: 0, C: 1
    rank2: A: 1, B: 1, C: 0
    """
    if parallelization_group is None:
        parallelization_group = torch.distributed.group.WORLD
    if get_pg_size(group=parallelization_group) <= 1:
        return
    if precomputed_distribution is None:
        raise ValueError(
            'precomputed_distribution must be not None for non-trivial parallelization group'
        )

    local_shards = list(
        sh_base
        for sh_base in nested_values(sharded_state_dict)
        if isinstance(sh_base, ShardedTensor)
    )

    rank_within_dp_group = get_pg_rank(group=parallelization_group)
    for sh_ten in local_shards:
        shard_id = _sharded_tensor_shard_id(sh_ten)
        if (
            shard_id in precomputed_distribution.shards_in_this_group
            and rank_within_dp_group == precomputed_distribution.main_rank_for_shard[shard_id]
        ):
            sh_ten.replica_id = 0
        else:
            sh_ten.replica_id = 1


def _defer_loading_sharded_items(
    sharded_state_dict: ShardedStateDict, item_type: type, shard_id_func: Callable[[T], _ShardId]
) -> Tuple[ShardedStateDict, ShardedStateDict, Dict[_ShardId, T], Dict[_ShardId, T]]:
    """Divides state dict into parts loaded by this vs other ranks.

    Args:
        sharded_state_dict (ShardedStateDict): state dict with sharded items
            that will be divided.
        item_type: The type of sharded item (ShardedObject or ShardedTensor)
        shard_id_func: Function to get the shard ID for the item type

    Returns: a tuple of:
        - ShardedStateDict: sub-state dict only with sharded items
        - ShardedStateDict: sub-state dict with non-sharded items
        - Dict[_ShardId, T]: mapping from shard id to items loaded by *this* rank
        - Dict[_ShardId, T]: mapping from shard id to items loaded by *other* ranks
    """
    to_load_shards = {}
    unloaded_shards = {}

    sharded_items, remaining_state_dict = extract_matching_values(
        sharded_state_dict, lambda v: isinstance(v, item_type)
    )

    def wrap_non_main_replicas(x: Any) -> Any:
        if isinstance(x, item_type):
            shard_id = shard_id_func(x)
            if is_main_replica(x.replica_id):
                to_load_shards[shard_id] = x
            else:
                unloaded_shards[shard_id] = x
        return x

    dict_list_map_inplace(wrap_non_main_replicas, sharded_items)
    return sharded_items, remaining_state_dict, to_load_shards, unloaded_shards


def _fill_in_deferred_sharded_items(
    sharded_state_dict: ShardedStateDict,
    loaded_items: Dict[_ShardId, Any],
    item_type: type,
    shard_id_func: Callable[[T], _ShardId],
) -> None:
    """Helper function to fill in items not loaded by current rank."""

    def fill_in_sharded_item(x: Any) -> Any:
        if isinstance(x, item_type):
            try:
                x = loaded_items[shard_id_func(x)]
            except KeyError as e:
                raise CheckpointingException(
                    f'Missing loaded item shard: {shard_id_func(x)}'
                ) from e
        return x

    dict_list_map_inplace(fill_in_sharded_item, sharded_state_dict)


================================================
FILE: megatron/core/dist_checkpointing/strategies/state_dict_saver.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.

""" State dict saver for PyT Distributed format allowing asynchronous save. """

from logging import getLogger
from time import time
from typing import TYPE_CHECKING, List, Optional, Tuple, Union

import torch
import torch.distributed as dist
from torch.distributed.checkpoint import CheckpointException
from torch.distributed.checkpoint.default_planner import DefaultSavePlanner
from torch.distributed.checkpoint.metadata import STATE_DICT_TYPE, Metadata
from torch.distributed.checkpoint.planner import SavePlan, SavePlanner
from torch.distributed.checkpoint.utils import _DistWrapper, _get_failure_dict

if TYPE_CHECKING:
    from .filesystem_async import FileSystemWriterAsync
    from .torch import MCoreSavePlanner


logger = getLogger(__name__)

from dataclasses import fields


def _compare_dataclasses(obj1, obj2):
    if type(obj1) != type(obj2):
        return f"Objects are of different types: {type(obj1)} and {type(obj2)}"

    differences = []
    for field in fields(obj1):
        value1 = getattr(obj1, field.name)
        value2 = getattr(obj2, field.name)
        if value1 != value2:
            differences.append(f"{field.name}: {value1} != {value2}")

    return differences if differences else "All fields are equal"


def save_state_dict_async_plan(
    state_dict: STATE_DICT_TYPE,
    storage_writer: 'FileSystemWriterAsync',
    process_group: Optional[dist.ProcessGroup] = None,
    coordinator_rank: int = 0,
    planner: Optional[Union[SavePlanner, 'MCoreSavePlanner']] = None,
    cached_ckpt_structure: Optional[Tuple[SavePlan, SavePlan, bool]] = None,
    loaded_all_plans: Optional[List[SavePlan]] = None,
) -> Tuple[Tuple['FileSystemWriterAsync', Union[Metadata, None], _DistWrapper], SavePlan, bool]:
    """
    First stage of saving a state dict to storage.

    This is an async adjustment of torch.distributed.checkpoint.state_dict_saver.
    In order to support async save, saving should be split into three parts:
    1. Planning
    2. Actual saving
    3. Finalization

    Out of these, step (2) *must* happen asynchronously.
    The first step is realized with this function.

    The planning part consists of several steps, described here:
    https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.SavePlanner

    Args:
        state_dict (STATE_DICT_TYPE): state dict to save
        storage_writer (FileSystemWriterAsync): in current version only an instance of
            FileSystemWriterAsync
        process_group (dist.ProcessGroup, optional): process group used for save planning
        coordinator_rank (int, optional): coordinator rank for planning. Defaults to 0.
        planner (SavePlanner, optional): save planner for torch.distributed.checkpoint format
        cached_ckpt_structure (Tuple[SavePlan, SavePlan, bool], Optional):
            Each object of this tuple will be used in the order as following
            cached_central_plan (SavePlan): a globally coordinated save plan
                cached in the previous iteration
            cached_local_plan (SavePlan): a local plan
                cached in the previous iteration
            validated_cache_reuse (bool): boolean value to tell global_metadata and planning dict
                is consistent over iterations

    Returns: Tuple of:
        - storage writer (the one passed as input)
        - metadata from planning (or None if we reuse cached global metadata)
        - distributed wrapper used for planning
    The return value of this function should be passed as an input to
    `save_state_dict_async_finalize` and cached_plan to skip `reduce_scatter` at planning.
    """
    cached_central_plan, cached_local_plan, validated_cache_reuse = (None, None, False)
    if cached_ckpt_structure:
        cached_central_plan, cached_local_plan, validated_cache_reuse = cached_ckpt_structure

    rank = torch.distributed.get_rank() if torch.distributed.is_initialized() else 0
    dist_wrapper = _DistWrapper(process_group, True, coordinator_rank)
    if planner is None:
        planner = DefaultSavePlanner()
    assert planner is not None

    global_metadata = None
    logger.debug(f"rank: {rank}, starting state dict save")
    local_plan = cached_local_plan
    global_md_verify_reuse = False

    def local_step():
        nonlocal local_plan
        assert planner is not None
        # PyTorch 2.4 introduced additional `metadata` argument,
        # we have to reference `is_coordinator` args by name
        planner.set_up_planner(state_dict, is_coordinator=dist_wrapper.is_coordinator)
        storage_writer.set_up_storage_writer(dist_wrapper.is_coordinator)
        if not validated_cache_reuse and local_plan is None:
            local_plan = planner.create_local_plan()
        local_plan = storage_writer.prepare_local_plan(local_plan)
        return local_plan

    def global_step(all_local_plans):
        nonlocal global_metadata
        assert planner is not None
        all_local_plans, global_metadata = planner.create_global_plan(all_local_plans)
        all_local_plans = storage_writer.prepare_global_plan(all_local_plans)
        return all_local_plans

    # Execute local and global planning
    # Ideally we want to use the cached plan. Otherwise if the planner and storage_writer
    # allow it (`can_run_decentralized_global_plan`) we gather the plans to create
    # the metadata but prepare the plans independently on each rank.
    # In the worst case we have to reduce_scatter all the plans.
    start_plan = time()
    if validated_cache_reuse and cached_central_plan:
        logger.debug(f"rank: {rank}, Passed cache reusable")
        local_step()
        central_plan = cached_central_plan
    elif getattr(planner, 'can_run_decentralized_global_plan', False) and getattr(
        storage_writer, 'can_run_decentralized_global_plan', False
    ):
        local_plan = local_step()
        global_md_verify_reuse = verify_global_md_reuse(
            loaded_all_plans, local_plan, rank, dist_wrapper
        )

        if not loaded_all_plans or not global_md_verify_reuse:
            all_local_plans = dist_wrapper.gather_object(local_plan)
            if dist_wrapper.is_coordinator:
                _, global_metadata = planner.create_global_plan(all_local_plans)
                global_metadata.all_local_plans = all_local_plans
        else:
            logger.debug(f"rank: {rank}, Passed cached global metadata")
            global_metadata = None
        local_plan = planner.create_decentralized_global_plan(local_plan)
        local_plan = storage_writer.prepare_decentralized_global_plan(local_plan)
        central_plan = local_plan
    else:
        central_plan = dist_wrapper.reduce_scatter("plan", local_step, global_step)
    central_plan = planner.finish_plan(central_plan)
    end_plan = time()
    logger.debug(f"rank: {rank}, plan time: {end_plan - start_plan}")
    # Prepare async writing of tensors.
    # The `storage_writer` will store the information about tensors it needs to save
    start = time()
    storage_writer.prepare_write_data(central_plan, planner)
    end = time()
    logger.debug(f"{time()} rank: {rank}, write(async) time: {end - start}")
    return (
        (storage_writer, global_metadata, dist_wrapper),
        central_plan,
        local_plan,
        cached_central_plan == central_plan,
        global_md_verify_reuse,
    )


def verify_global_md_reuse(
    loaded_all_plans: List[SavePlan], local_plan: SavePlan, rank: int, dist_wrapper: _DistWrapper
) -> bool:
    """
    Verifies that global metadata reuse is possible by checking the loaded plans from the
     checkpoint are consistent, which means we have the same settings when resuming training.
    Args:
        loaded_all_plans: List[SavePlan], The loaded plans from the checkpoint
         (stored in checkpoint metadata).
        local_plan: SavePlan, The local save plan.
        rank: Current process rank.
        dist_wrapper (_DistWrapper): distributed wrapper created during planning

    Returns: True iff the global metadata reuse is possible.

    """
    logger.debug(f"verifying reuse of global metadata")
    if not loaded_all_plans:
        global_md_verify_reuse = False
        logger.debug("loaded global metadata reuse verification: no loaded plans passed")

    elif len(loaded_all_plans) == dist_wrapper.get_world_size():
        local_verify_reuse = all(
            getattr(local_plan, f.name) == getattr(loaded_all_plans[rank], f.name)
            for f in fields(local_plan)
            if f.name != 'storage_data'
        )

        if not local_verify_reuse:
            logger.debug(
                f"local_verify_reuse is False: diffs -"
                f" {_compare_dataclasses(local_plan, loaded_all_plans[rank])}"
            )
        all_results = torch.tensor([local_verify_reuse], dtype=torch.int, device='cuda')
        torch.distributed.all_reduce(all_results, op=torch.distributed.ReduceOp.MIN)
        # Check if all reduced results are True
        global_md_verify_reuse = all_results.item() == 1
    else:
        global_md_verify_reuse = False
    return global_md_verify_reuse


def save_state_dict_async_finalize(
    storage_writer: 'FileSystemWriterAsync', global_metadata: Metadata, dist_wrapper: _DistWrapper
) -> None:
    """
    Finalization of save_state_dict_async_plan.

    The input arguments are the same as the save_state_dict_async_plan output,
    the `write_results` are retrieved from the storage_writer.

    Args:
        storage_writer (FileSystemWriterAsync): storage writer used for planning
        global_metadata (Metadata): metadata created during planning
        dist_wrapper (_DistWrapper): distributed wrapper created during planning

    Returns: None
    """
    write_results = storage_writer.retrieve_write_results()

    # Gather the write results that will be saved to the metadata file.
    gather_start = time()
    all_results = dist_wrapper.gather_object(write_results)
    gather_end = time()
    logger.debug(f"{gather_end}, {torch.distributed.get_rank()}, gather: {gather_end-gather_start}")

    # Store the metadata on coordinator rank
    if dist_wrapper.is_coordinator:
        node_failures = _get_failure_dict(all_results)
        if len(node_failures) == 0:
            assert global_metadata is not None
            write_start = time()
            storage_writer.finish(global_metadata, all_results)
            write_end = time()
            logger.debug(f"{write_end}, metadata_write: {write_end - write_start}")
    else:
        node_failures = {}

    # Broadcast failure status to all ranks to raise exceptions everywhere if needed.
    # The failure details are only raised on the coordinator.
    failures_occurred = torch.tensor(
        [int(len(node_failures) > 0)], dtype=torch.int, device=torch.cuda.current_device()
    )
    torch.distributed.broadcast(
        failures_occurred, src=dist_wrapper.coordinator_rank, group=dist_wrapper.group
    )
    if failures_occurred:
        raise CheckpointException("write", node_failures)


================================================
FILE: megatron/core/dist_checkpointing/strategies/torch.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

""" Strategies using PyTorch distributed.checkpoint as an underlying format. """
import io
import os
import pickle
import warnings
from collections import defaultdict
from contextlib import contextmanager
from itertools import product
from logging import getLogger
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast

import torch
from packaging.version import Version as PkgVersion
from torch.distributed import checkpoint
from torch.distributed._shard.metadata import ShardMetadata
from torch.distributed._shard.sharded_tensor import Shard
from torch.distributed._shard.sharded_tensor import ShardedTensor as TorchShardedTensor
from torch.distributed._shard.sharded_tensor import ShardedTensorMetadata, TensorProperties
from torch.distributed.checkpoint import (
    BytesStorageMetadata,
    DefaultLoadPlanner,
    DefaultSavePlanner,
    FileSystemReader,
    FileSystemWriter,
    LoadPlan,
    Metadata,
    ReadItem,
    SavePlan,
    TensorStorageMetadata,
    WriteItem,
)
from torch.distributed.checkpoint._nested_dict import FLATTEN_MAPPING, unflatten_state_dict
from torch.distributed.checkpoint._traverse import OBJ_PATH, traverse_state_dict
from torch.distributed.checkpoint.metadata import Metadata
from torch.distributed.checkpoint.planner_helpers import _create_write_items

from ...utils import get_torch_version, is_torch_min_version
from ..core import CheckpointingException
from ..dict_utils import nested_values
from ..mapping import (
    ShardedBase,
    ShardedObject,
    ShardedStateDict,
    ShardedTensor,
    StateDict,
    is_main_replica,
)
from .async_utils import AsyncRequest
from .base import (
    AsyncSaveShardedStrategy,
    LoadShardedStrategy,
    StrategyAction,
    register_default_strategy,
)
from .cached_metadata_filesystem_reader import CachedMetadataFileSystemReader
from .checkpointable import CheckpointableShardedTensor, LocalShardsContainer
from .filesystem_async import FileSystemWriterAsync
from .state_dict_saver import save_state_dict_async_finalize, save_state_dict_async_plan

try:
    if not torch.cuda.is_available():
        raise ImportError
    from transformer_engine.pytorch.float8_tensor import Float8Tensor

    HAVE_TE = True
except ImportError:
    HAVE_TE = False

try:
    from torch.distributed._tensor import DTensor

    HAVE_DTENSOR = True
except ImportError:
    HAVE_DTENSOR = False

from megatron.core.msc_utils import MultiStorageClientFeature

MSC_PREFIX = "msc://"

_metadata_fn: str = ".metadata"


# dummy class needed to load mcore v0.15 checkpoints with optim. states
class MCoreMetadata:
    """ """

    pass


# dummy class needed to load old checkpoint weights
class MCoreSavePlan:
    """ """

    pass


def register_default_torch_strategies():
    """Register default strategies related to PyT Distributed backend."""
    register_default_strategy(
        StrategyAction.LOAD_SHARDED, 'torch_dist', 1, TorchDistLoadShardedStrategy()
    )
    register_default_strategy(
        StrategyAction.SAVE_SHARDED, 'torch_dist', 1, TorchDistSaveShardedStrategy('torch_dist', 1)
    )


logger = getLogger(__name__)


def flatten_state_dict(
    state_dict: ShardedStateDict,
) -> Tuple[ShardedStateDict, Dict[str, OBJ_PATH]]:
    """Flattens state dict into a single level dict.

    It's a copy of torch.distributed.checkpoint._nested_dict.flatten_state_dict
    which also accepts ShardedBase tensors as terminal objects

    Args:
        state_dict (ShardedStateDict): state dict to be flattened

    Returns (tuple): flattened state dict and a mapping allowing to recreate the original one

    """
    flattened = {}
    mappings = {}

    def flat_copy(path: OBJ_PATH, value: Any) -> None:
        new_fqn = ".".join(map(str, path))
        if new_fqn in flattened:
            raise ValueError(f"duplicated flatten key {new_fqn}")
        flattened[new_fqn] = value
        mappings[new_fqn] = path

    traverse_state_dict(state_dict, flat_copy, lambda x: isinstance(x, (torch.Tensor, ShardedBase)))
    return flattened, mappings


def sharded_tensor_to_torch_sharded_tensor(
    sh_tens: List[ShardedTensor],
    rank: Optional[int] = None,
    load_legacy_1d_flatten_tensors: bool = False,
) -> TorchShardedTensor:
    """Convert MCore ShardedTensor to PyT ShardedTensor. PyT requires information about all chunks.

    On high-level, this function follows the logic of
    torch.distributed.fsdp._shard_utils._create_chunk_sharded_tensor.
    Additionally, it saves `prepend_axis_num` and `has_flattened_range` (specific to MCore)
    as attributes for further restoration in `_unwrap_pyt_sharded_tensor`.

    NOTE: this function assumes regular (grid) sharding of the MCore ShardedTensor.
    The only local irregularities could be introduced with a `flattened_range` attribute.

    This function handles 2 different type of ShardedTensors:
    1. Non-flat regular ShardedTensors (`not has_flattened_range`)
    2. N-D flattened ShardedTensors (`has_flattened_range`)

    (1) type are saved according to their original shape.
    Type (2) however requires global shape adjustment for efficiency:
    we treat [X, Y, Z] global shape tensor with local shape [x, y, z]
    as a [X // x, Y // y, Z // z, x * y * z] tensor with last axis
    partitioned according to `flattened_range` slices.
    This will need special handling while resharding.

    Args:
        sh_tens (List[ShardedTensor]): list of sharded tensors to convert
        rank (int, optional): current process rank passed to PyT ShardedTensor.
            If None, assumes rank in the default pg.
        load_legacy_1d_flatten_tensors (bool, optional): flag indicating if 1-D flattened tensors
            should be loaded in a legacy way. Defaults to False.

    Returns (TorchShardedTensor): PyT ShardedTensor containing all passed shards.

    """
    if rank is None:
        rank = torch.distributed.get_rank()

    some_sh_ten = sh_tens[0]

    for sh_ten in sh_tens:
        if not sh_ten.data.is_contiguous():
            sh_ten.data = sh_ten.data.contiguous()

    local_global_offsets = {}

    prepend_axis_num = sh_tens[0].prepend_axis_num
    # Determine local shards according to tensor type (see docs)
    # Type (1) case: non-flat regular ShardedTensors
    for sh_ten in sh_tens:
        local_global_offsets.setdefault(sh_ten.global_offset, []).append(sh_ten)
        sh_ten.data = sh_ten.data.view(
            (1,) * prepend_axis_num + sh_ten.local_shape
        )  # adjust to prepended_axis_num

    global_shape = some_sh_ten.global_shape
    offsets_shape = some_sh_ten.data.shape  # includes prepended axes

    local_shards = [
        Shard.from_tensor_and_offsets(sh_ten.data, list(sh_ten.global_offset), rank)  # simple case
        for sh_ten in sh_tens
    ]

    # Create a ShardedTensor without invoking communication. Determine global shards
    world_size = torch.distributed.get_world_size()
    shard_metadata = []
    # NOTE: here we assume a regular grid of shards
    for fragment_offsets in product(*map(range, some_sh_ten.axis_fragmentations)):
        offset = tuple(map(lambda x: x[0] * x[1], zip(fragment_offsets, offsets_shape)))
        if offset in local_global_offsets:
            # local shard
            placement = f"rank:{rank}/cuda"
            for sh_ten in local_global_offsets[offset]:
                size = sh_ten.data.shape
                shard_metadata.append(ShardMetadata(offset, size, placement))
        else:
            # pylint: disable=line-too-long
            # for shards from other ranks we provide simplistic data - this information will be discarded
            # during TorchShardedTensor._init_from_local_shards_and_global_metadata call.
            # Due to a bug in PyT 24.05 container we must specify some concrete rank within a world size.
            # The exact rank doesn't matter as long as it's different than my rank - hence (rank + 1) % WS.
            placement = f"rank:{(rank + 1) % world_size}/cuda"
            size = offsets_shape
            shard_metadata.append(ShardMetadata(offset, size, placement))

    tensor = some_sh_ten.data
    sharded_tensor_metadata = ShardedTensorMetadata(
        shards_metadata=shard_metadata,
        size=torch.Size(global_shape),
        tensor_properties=TensorProperties(
            dtype=tensor.dtype,
            layout=tensor.layout,
            requires_grad=tensor.requires_grad,
            memory_format=torch.contiguous_format,
            pin_memory=tensor.is_pinned(),
        ),
    )
    pyt_sh_ten = TorchShardedTensor._init_from_local_shards_and_global_metadata(
        local_shards, sharded_tensor_metadata=sharded_tensor_metadata, process_group=None
    )
    # Store MCore related data as PyTShardedTensor attribute.
    # This won't be stored in the checkpoint, only for runtime purposes
    pyt_sh_ten.mcore_sh_ten = sh_ten.without_data()
    return pyt_sh_ten


def mcore_to_pyt_state_dict(
    state_dict: Dict[str, List[ShardedBase]],
    is_loading: bool = False,
    init_device: torch.device = torch.device("cpu"),
    load_legacy_1d_flatten_tensors: bool = False,
) -> Dict[str, Union[TorchShardedTensor, io.BytesIO]]:
    """Convert state dict with ShardedTensors and ShardedObjects
    to state dict compatible with PyT Dist format.

    Operates in-place and returns the original state dict.

    Args:
        state_dict (Dict[str, List[ShardedBase]]): flattened state dict, where values
            are lists of either ShardedTensor or ShardedObjects.
        is_loading (bool, optional): flag indicating if loading or saving. Defaults to False.
        init_device (torch.device, optional): device to initialize potentially missing tensors
            during loading. Defaults to 'cpu'.

    Returns (Dict[str, Union[TorchShardedTensor, io.BytesIO]]): original dictionary with values
        converted either into PyT ShardedTensors or io.BytesIO.

    """
    rank = torch.distributed.get_rank()
    pyt_state_dict = {}

    def _mcore_to_dcp_compatible_tensor(sh_tens: List[ShardedTensor]) -> TorchShardedTensor:
        """Build a PyT ShardedTensor from given shards.

        During loading:
        - if data is None, initialize it with an empty tensor (will be used to copy the data into)
        - if `allow_shape_mismatch` is True, the data is initialized with zeros
            prior to loading (not all parts of the tensor will be read from the checkpoint)
        """
        assert all(isinstance(sh_ten, ShardedTensor) for sh_ten in sh_tens), sh_tens
        for sh_ten in sh_tens:
            if sh_ten.data is None:
                if is_loading:
                    sh_ten.init_data(
                        init_device,
                        init_fn=torch.zeros if sh_ten.allow_shape_mismatch else torch.empty,
                    )
                else:
                    raise CheckpointingException(f'`data` attr is None for {sh_ten}')
            else:
                sh_ten.data = sh_ten.data.detach()
                if sh_ten.allow_shape_mismatch and is_loading:
                    sh_ten.data.zero_()

        is_pre_mcore_014_sh_ten = (
            sh_tens[0].prepend_axis_num or sh_tens[0].flattened_range is not None
        )
        if (
            not is_pre_mcore_014_sh_ten or not sh_tens[0].has_regular_grid
        ) and is_torch_min_version("2.6a0"):
            assert sh_tens[0].flattened_range is None
            if len(sh_tens) > 1:
                return LocalShardsContainer(
                    [CheckpointableShardedTensor.from_sh_ten(sh_ten) for sh_ten in sh_tens]
                )
            else:
                return CheckpointableShardedTensor.from_sh_ten(sh_tens[0])
        else:
            if not sh_tens[0].has_regular_grid and not is_torch_min_version("2.6a0"):
                raise CheckpointingException(
                    f"Uneven sharding not supported for PyTorch version {get_torch_version()}"
                )
            torch_sh_ten = sharded_tensor_to_torch_sharded_tensor(
                sh_tens, rank, load_legacy_1d_flatten_tensors
            )
            torch_sh_ten.key = sh_tens[0].key
            return torch_sh_ten

    def _mcore_to_torch_sharded_object(sh_objs: List[ShardedObject]) -> io.BytesIO:
        """Build io.BytesIO from given sharded objects data."""
        assert all(isinstance(sh_obj, ShardedObject) for sh_obj in sh_objs), sh_objs
        serialized_data = io.BytesIO()
        torch.save([sh_obj.data for sh_obj in sh_objs], serialized_data)
        return serialized_data

    for k, v in state_dict.items():
        if isinstance(v[0], ShardedTensor):
            v = cast(List[ShardedTensor], v)
            pyt_state_dict[k] = _mcore_to_dcp_compatible_tensor(v)
        else:
            v = cast(List[ShardedObject], v)
            pyt_state_dict[k] = _mcore_to_torch_sharded_object(v)

    return pyt_state_dict


def _unwrap_pyt_sharded_tensor(
    sh_ten: Union[TorchShardedTensor, CheckpointableShardedTensor, LocalShardsContainer, Any]
) -> Union[List[torch.Tensor], Any]:
    """Unwrap tensor from PyT ShardedTensor instance.

    If `prepend_axis_num` was non-zero (which is specific to MCore ShardedTensor)
    then the tensor has additional singleton dimensions which should be squeezed.
    """
    if isinstance(sh_ten, CheckpointableShardedTensor):
        return [sh_ten._sh_ten.data]
    if isinstance(sh_ten, LocalShardsContainer):
        return [local_shard._sh_ten.data for local_shard in sh_ten._local_shards]
    if not isinstance(sh_ten, TorchShardedTensor):
        return sh_ten
    mcore_sh_ten = sh_ten.mcore_sh_ten
    ret_tensors = []
    for sh in sh_ten.local_shards():
        ten = sh.tensor
        for _ in range(mcore_sh_ten.prepend_axis_num):
            assert ten.size(0) == 1
            ten = ten[0]  # NOTE: ten.squeeze(0) uses more memory for FP8 tensors
        ret_tensors.append(ten)
    return ret_tensors


def _replace_state_dict_keys_with_sharded_keys(
    sharded_state_dict: ShardedStateDict, keep_only_main_replica: bool = False
) -> Tuple[Dict[str, List[ShardedBase]], FLATTEN_MAPPING, Dict[str, List[str]]]:
    """Group ShardedBase objects by keys and
    return mappings required for recreating the original dict."""
    flat_sd, flat_mapping = flatten_state_dict(sharded_state_dict)
    rename_mapping = defaultdict(list)
    new_flat_sd = defaultdict(list)
    for k, sh_base in flat_sd.items():
        assert isinstance(sh_base, ShardedBase), type(sh_base)
        key = sh_base.unique_key if isinstance(sh_base, ShardedObject) else sh_base.key
        if is_main_replica(sh_base.replica_id) or not keep_only_main_replica:
            rename_mapping[key].append(k)
            new_flat_sd[key].append(sh_base)
    return new_flat_sd, flat_mapping, rename_mapping


def _replace_sharded_keys_with_state_dict_keys(
    state_dict: Dict[str, List[Union[torch.Tensor, io.BytesIO]]],
    flat_mapping: FLATTEN_MAPPING,
    rename_mapping: Dict[str, List[str]],
):
    """Inverse of _replace_state_dict_keys_with_sharded_keys."""
    recovered_sd = {}
    for k, tensors in state_dict.items():
        assert len(tensors) == len(rename_mapping[k])
        for ten, recovered_k in zip(tensors, rename_mapping[k]):
            recovered_sd[recovered_k] = ten

    return unflatten_state_dict(recovered_sd, flat_mapping)


def _restore_dict_types(x: Union[dict, list, Any], keys_template: Union[dict, list, Any]):
    """Recursively update `x` keys, based on `keys_template`."""
    if isinstance(keys_template, dict):
        assert isinstance(x, dict), type(x)
        for k, v in keys_template.items():
            if not isinstance(k, str):
                assert str(k) in x, (k, x.keys)
                x[k] = x.pop(str(k))
            _restore_dict_types(x[k], v)
    elif isinstance(keys_template, list):
        assert isinstance(x, list), type(x)
        for x_val, templ_val in zip(x, keys_template):
            _restore_dict_types(x_val, templ_val)


class MCoreSavePlanner(DefaultSavePlanner):
    """Differs with the default planner by saving BytesIO objects on all ranks.

    In the integration of MCore with PyT Distributed format, BytesIO objects
    come from ShardedObjects, which should be treated as separate objects on each rank
    (not common on all ranks).

    Also, the objects are already packed in io.BytesIO, so no need to redo it
    in transform_object.
    """

    def __init__(
        self,
        *args,
        dedup_replicated_tensors: Optional[bool] = None,
        can_run_decentralized_global_plan: bool = True,
        **kwargs,
    ) -> None:
        # `dedup_replicated_tensors` was deprecated in 2.3; this check avoids warnings
        # during saving.
        if get_torch_version() <= PkgVersion("2.2"):
            kwargs['dedup_replicated_tensors'] = dedup_replicated_tensors
        super().__init__(*args, **kwargs)
        self.can_run_decentralized_global_plan = can_run_decentralized_global_plan
        if can_run_decentralized_global_plan:
            assert (
                not dedup_replicated_tensors
            ), 'Cannot run decentralized plan with dedup_replicated_tensors=True'
            assert (
                not self.flatten_state_dict
            ), 'Cannot run decentralized plan with flatten_state_dict=True'

    def create_local_plan(self) -> SavePlan:
        """Adds IOBytes write request on non-coordinator ranks."""

        # NOTE: for PyT 2.4.0a0 we can't rely on `create_default_local_save_plan` because
        # some alpha versions (specifically 2.4.0a0+f70bd71a48 in 24.06 NGC PyTorch container)
        # add iobytes request only on coordinator ranks and some alpha versions
        # (specifically 2.4.0a0+3bcc3cddb5 in 24.07 NGC PyTorch container)
        # add those requests on all ranks. We inline a simplified version of this method below.
        write_items = []
        for fqn, obj in self.state_dict.items():
            assert not HAVE_DTENSOR or not isinstance(
                obj, DTensor
            )  # translation from MCore ShardedTensors shouldn't result in DTensors
            # Create write requests for tensor and bytes values.
            # For MCore, these should be already non-duplicates.
            write_items += _create_write_items(fqn, obj)

        self.plan = SavePlan(items=write_items, planner_data=self.mappings)
        return self.plan

    def create_decentralized_global_plan(self, local_plan: SavePlan) -> SavePlan:
        """Nothing to do, just some checks.

        Args:
            local_plan (SavePlan): local plan to turn to a global plan
                (without interactions with other ranks)

        Returns:
            SavePlan - locally transformed plan equivalent to the plan that would be
                created by the coordinator
        """
        assert (
            not self.flatten_state_dict
        ), 'Cannot run decentralized plan with flatten_state_dict=True'
        assert not local_plan.planner_data, 'Planner data should be empty with decentralized plan'
        return local_plan

    def transform_object(self, write_item: WriteItem, object: Any):
        """Make no transformations - bytes objects are already serialized."""
        return object


class MCoreLoadPlanner(DefaultLoadPlanner):
    """Adds global shape validation to the default planner.

    If global shape validation can be ignored (shouldn't!), the default
    load planner can be used.
    """

    def __init__(
        self,
        *args,
        shapes_validation_sharded_tensors: Iterable[ShardedTensor] = (),
        allow_shape_mismatch_sharded_tensors: Optional[Dict[str, ShardedTensor]] = None,
        **kwargs,
    ) -> None:
        super().__init__(*args, **kwargs)
        self.shapes_validation_sharded_tensors = shapes_validation_sharded_tensors
        self.allow_shape_mismatch_sharded_tensors = allow_shape_mismatch_sharded_tensors
        self._intermediate_read_item_and_target: Optional[Tuple[ReadItem, torch.Tensor]] = None

    def _validate_global_shapes(self, metadata, sharded_tensors):
        for sh_ten in sharded_tensors:
            if sh_ten.key not in metadata.state_dict_metadata:
                raise KeyError(
                    f"{sh_ten.key} from model not in state dict:"
                    f" {sorted(metadata.state_dict_metadata.keys())}"
                )
            loaded_shape = metadata.state_dict_metadata[sh_ten.key].size
            expected_shape = sh_ten.global_shape
            if loaded_shape != expected_shape:
                _msg = (
                    f'Global shape mismatch for loaded ({loaded_shape})'
                    f' and expected ({expected_shape}) tensor'
                    f' for key {sh_ten.key}'
                )
                raise CheckpointingException(_msg)

    @contextmanager
    def _temporarily_bypass_shape_validation(self):
        """
        Temporarily set the size of tensors to their expected shapes to bypass DCP shape validation.
        This is used when validating the shapes during local plan creation.
        """
        if not self.allow_shape_mismatch_sharded_tensors:
            yield
            return

        tensor_metadata = self.metadata.state_dict_metadata
        metadata_with_sizes = [
            (tensor_metadata[key], tensor_metadata[key].size, sharded_tensor)
            for key, sharded_tensor in self.allow_shape_mismatch_sharded_tensors.items()
        ]
        try:
            # Temporarily set sizes to expected shapes
            for md, _, sharded_tensor in metadata_with_sizes:
                md.size = sharded_tensor.global_shape
            yield
        finally:
            # Restore original sizes after yield
            for md, size, _ in metadata_with_sizes:
                md.size = size

    def create_local_plan(self) -> LoadPlan:
        """Runs additional shapes validation."""
        self._validate_global_shapes(self.metadata, self.shapes_validation_sharded_tensors)

        with self._temporarily_bypass_shape_validation():
            local_plan = super().create_local_plan()

        return local_plan

    def resolve_tensor(self, read_item: ReadItem):
        """Override to add FP8 support.

        Narrowing the Float8Tensor can create incontiguous tensors and there are
        no `copy` kernels for such cases. This method creates a contiguous FP8
        tensors so that the subsequent `copy_` in FileSystemReader succeeds.
        Note that this requires tracking the original tensor
        (as `self._intermediate_read_item_and_target` attribute)
        and restoring it in `commit_tensor` method.
        """
        target_tensor = super().resolve_tensor(read_item)
        if (
            not target_tensor.is_contiguous()
            and HAVE_TE
            and isinstance(target_tensor, Float8Tensor)
        ):
            self._intermediate_read_item_and_target = (read_item, target_tensor)
            target_tensor = Float8Tensor.make_like(
                target_tensor, data=target_tensor._data.contiguous()
            )
        return target_tensor

    def commit_tensor(self, read_item: ReadItem, tensor: torch.Tensor) -> None:
        """Restores the original FP8 tensor saved in `resolve_tensor`."""
        if self._intermediate_read_item_and_target is not None:
            interm_read_item, target_tensor = self._intermediate_read_item_and_target
            assert (
                interm_read_item is read_item
            ), '`commit_tensor` method should be called right after `resolve_tensor`'
            target_tensor.copy_(tensor)
            tensor = target_tensor
            self._intermediate_read_item_and_target = None
        return super().commit_tensor(read_item, tensor)


class TorchDistSaveShardedStrategy(AsyncSaveShardedStrategy):
    """Async save strategy for the PyT Distributed format.

    The idea is to translate MCore ShardedTensors into PyT ShardedTensors
    and use the async-adjusted torch.distributed.checkpoint saving mechanism
    provided by the FileSystemWriterAsync writer.
    """

    def __init__(
        self,
        backend: str,
        version: int,
        keep_only_main_replica: bool = True,
        thread_count: int = 1,
        cached_metadata: bool = False,
        separation_hint: Optional[str] = None,
    ):
        """Adds parameters specific to PyT Distributed format
        Args:
            backend (str): format backend string
            version (int): format version
            keep_only_main_replica (bool, optional): PyT Distributed has a mechanism
                for deduplication, but replica_id aware deduplication is more coherent.
                Default is True (recommended to keep it).
            thread_count (int, optional): threads to use during saving.
                Affects the number of files in the checkpoint (saving ranks * num_threads).
            cached_metadata (bool, optional): Enables using cached global metadata to avoid
                gathering local metadata every checkpointing invocation
            separation_hint(str, optional): If provided, all tensors whose keys have this
                prefix will be saved to a separate file.
        """
        super().__init__(backend, version)
        self.keep_only_main_replica = keep_only_main_replica
        self.thread_count = thread_count

        # Cached SavePlans to skip plan in `save_state_dict_async_plan`
        # cached outcome of `SavePlan.prepare_global_plan`,
        # which aggregates local plans from all ranks
        self.cached_central_plan: SavePlan = None
        # cached outcome of `SavePlan.prepare_local_plan` describes how local state_dict is written
        self.cached_local_plan: SavePlan = None
        # Cached global metadata, only `coordinator` for dist-ckpt holds
        # if central plans are consistent over iters
        self.cached_global_metadata: Metadata = None
        # This variable records if the ckpt structures are consistent
        # so the following checkpoint savings reuse `cached_global_metadata`
        self.validated_cache_reuse: bool = False
        # The knob to enable cached metadata communication in saving
        self.use_cached_ckpt_structure: bool = cached_metadata

        self.separation_hint = separation_hint

        self.validated_loaded_metadata_reuse = False

    def async_save(
        self, sharded_state_dict: ShardedStateDict, checkpoint_dir: Path
    ) -> AsyncRequest:
        """Translates MCore ShardedTensors to PyT ShardedTensors & saves in PyT Distributed format.

        Args:
            sharded_state_dict (ShardedStateDict): sharded state dict to save
            checkpoint_dir (Path): checkpoint directory

        Returns: None
        """
        # Translate the state dict
        (sharded_state_dict, flat_mapping, rename_mapping) = (
            _replace_state_dict_keys_with_sharded_keys(
                sharded_state_dict, self.keep_only_main_replica
            )
        )
        pyt_state_dict = mcore_to_pyt_state_dict(sharded_state_dict, False)

        if self.separation_hint is not None and self.thread_count <= 1:
            self.thread_count = 2

        # Use PyT saving mechanism
        writer = FileSystemWriterAsync(
            checkpoint_dir,
            separation_hint=self.separation_hint,
            thread_count=self.thread_count,
            use_msc=MultiStorageClientFeature.is_enabled(),
        )
        # This should be set differently if we run in a smaller process group than the default
        coordinator = 0
        # Try twice to validate the generated `central_plan` is the same across iterations
        # If so, reuse `cached_central_plan` and `cached_global_metadata`
        # From the 3rd iteration, `save_state_dict_async_plan` will not generate `global_metadata`
        # (return None) so `self.cached_global_metadata` is reused
        args_cached_plans = None
        loaded_all_plans = None
        if self.use_cached_ckpt_structure:
            loaded_all_plans = getattr(self.cached_global_metadata, "all_local_plans", None)
            if loaded_all_plans is None:
                logger.debug(
                    "no all_local_plans in metadata - can't verify global metadata reuse..."
                )

            args_cached_plans = (
                self.cached_central_plan,
                self.cached_local_plan,
                self.validated_cache_reuse,
            )

        (
            save_state_dict_ret,
            self.cached_central_plan,
            self.cached_local_plan,
            self.validated_cache_reuse,
            self.validated_loaded_metadata_reuse,
        ) = save_state_dict_async_plan(
            pyt_state_dict,
            writer,
            None,
            coordinator,
            # flatten_sharded_tensors=False: MCore doesn't use nested ShardedTensors (FSDP 2D),
            # so skip the expensive traverse_state_dict copy in _flatten_sharded_tensors
            planner=MCoreSavePlanner(
                dedup_replicated_tensors=not self.keep_only_main_replica,
                flatten_state_dict=False,
                flatten_sharded_tensors=False,
            ),
            cached_ckpt_structure=args_cached_plans,
            loaded_all_plans=loaded_all_plans,
        )
        rank = torch.distributed.get_rank()
        if self.use_cached_ckpt_structure:
            if (
                loaded_all_plans
                and self.cached_global_metadata
                and self.validated_loaded_metadata_reuse
            ):
                if coordinator == rank:
                    logger.debug(
                        f"rank: {rank}, reuse global metadata from loaded"
                        f" .metadata, {save_state_dict_ret[1]}"
                    )
                    save_state_dict_ret = list(save_state_dict_ret)
                    save_state_dict_ret[1] = self.cached_global_metadata

            elif self.validated_cache_reuse:
                logger.debug(f"rank: {rank}, cache validated")
                if save_state_dict_ret[1]:  # when global_metadata is not cached
                    self.cached_global_metadata = save_state_dict_ret[1]  # Cache Metadata
                # Only Coordinator rank holds cached global_metadata
                # (None is returned for global_metadata)
                elif coordinator == rank:
                    logger.debug(
                        f"rank: {rank}, reuse global metadata cached from previous"
                        f" save iteration, {save_state_dict_ret[1]}"
                    )
                    save_state_dict_ret = list(save_state_dict_ret)
                    save_state_dict_ret[1] = self.cached_global_metadata

        return self._get_save_and_finalize_callbacks(writer, save_state_dict_ret)

    def _get_save_and_finalize_callbacks(self, writer, save_state_dict_ret) -> AsyncRequest:
        save_fn_args = writer.get_save_function_and_args()
        save_fn, preload_fn, save_args = save_fn_args

        def finalize_fn():
            save_state_dict_async_finalize(*save_state_dict_ret)

        return AsyncRequest(save_fn, save_args, [finalize_fn], preload_fn=preload_fn)

    def can_handle_sharded_objects(self):
        return True


def _get_filesystem_reader(
    checkpoint_dir: Union[str, Path], cache_metadata: bool = False
) -> FileSystemReader:
    if MultiStorageClientFeature.is_enabled():
        msc = MultiStorageClientFeature.import_package()
        return msc.torch.MultiStorageFileSystemReader(checkpoint_dir, thread_count=2)

    if cache_metadata:
        return CachedMetadataFileSystemReader(checkpoint_dir, cache_metadata=cache_metadata)

    return FileSystemReader(checkpoint_dir)


class TorchDistLoadShardedStrategy(LoadShardedStrategy):
    """Basic load strategy for the PyT Distributed format."""

    def __init__(self, cache_metadata: bool = False):
        self.cached_global_metadata: Optional[Metadata] = None
        self.cache_metadata = cache_metadata
        super().__init__()

    def load(self, sharded_state_dict: ShardedStateDict, checkpoint_dir: Path) -> StateDict:
        """Translates MCore ShardedTensors to PyT ShardedTensors & loads from PyT Distributed fmt.

        Args:
            sharded_state_dict (ShardedStateDict): sharded state dict with mapping
                information to instruct loading
            checkpoint_dir (Path): checkpoint directory

        Returns: loaded state dict
        """
        flexible_shape_sharded_tensors = [
            sh_ten
            for sh_ten in nested_values(sharded_state_dict)
            if isinstance(sh_ten, ShardedTensor) and not sh_ten.allow_shape_mismatch
        ]
        allow_shape_mismatch_sharded_tensors = {
            sh_ten.key: sh_ten
            for sh_ten in nested_values(sharded_state_dict)
            if isinstance(sh_ten, ShardedTensor) and sh_ten.allow_shape_mismatch
        }

        orig_sharded_state_dict = sharded_state_dict
        # MCore state dict to PyT Distributed compatible
        (sharded_state_dict, flat_mapping, rename_mapping) = (
            _replace_state_dict_keys_with_sharded_keys(sharded_state_dict)
        )
        pyt_state_dict = mcore_to_pyt_state_dict(sharded_state_dict, True)
        # Load PyT Distributed format
        fsr = _get_filesystem_reader(checkpoint_dir, cache_metadata=self.cache_metadata)
        checkpoint.load_state_dict(
            pyt_state_dict,
            fsr,
            planner=MCoreLoadPlanner(
                shapes_validation_sharded_tensors=flexible_shape_sharded_tensors,
                allow_shape_mismatch_sharded_tensors=allow_shape_mismatch_sharded_tensors,
                flatten_state_dict=False,
                flatten_sharded_tensors=False,
            ),
        )

        if self.cache_metadata:
            self.cached_global_metadata = (
                fsr.read_metadata()
            )  # no storage interaction thanks to caching

        pyt_state_dict = cast(
            Dict[str, Union[TorchShardedTensor, List[io.BytesIO]]], pyt_state_dict
        )
        # Unwrap ShardedTensors and return to original state dict
        mcore_state_dict = {k: _unwrap_pyt_sharded_tensor(v) for k, v in pyt_state_dict.items()}
        mcore_state_dict = _replace_sharded_keys_with_state_dict_keys(
            mcore_state_dict, flat_mapping, rename_mapping  # type: ignore[arg-type]
        )
        _restore_dict_types(mcore_state_dict, orig_sharded_state_dict)
        return mcore_state_dict

    def load_tensors_metadata(self, checkpoint_dir: Path, metadata: Metadata = None):
        """Uses tensors metadata stored in the metadata file."""
        if metadata is None:
            fs_reader = _get_filesystem_reader(checkpoint_dir)
            metadata = fs_reader.read_metadata()

        mcore_data = getattr(metadata, 'mcore_data', {})
        sharded_metadata = {}
        for k, tp in metadata.state_dict_metadata.items():
            if not isinstance(tp, TensorStorageMetadata):
                continue  # load only tensors

            # Regular tensor
            sharded_metadata[k] = ShardedTensor.from_rank_offsets(
                k, torch.empty(tp.size, **tp.properties.__dict__, device='meta')
            ).without_data()
        return sharded_metadata

    def load_sharded_metadata(self, checkpoint_dir: Path) -> ShardedStateDict:
        """Uses tensors and objects metadata stored in the metadata file."""
        fs_reader = _get_filesystem_reader(checkpoint_dir)
        metadata = fs_reader.read_metadata()

        sharded_metadata = {}
        for metadata_key, storage_metadata in metadata.state_dict_metadata.items():
            if not isinstance(storage_metadata, BytesStorageMetadata):
                continue
            sh_obj = ShardedObject.empty_from_unique_key(metadata_key)
            sharded_metadata[sh_obj.unique_key] = sh_obj

        sharded_metadata.update(self.load_tensors_metadata(checkpoint_dir, metadata))
        return sharded_metadata

    def remove_sharded_tensors(self, checkpoint_dir: str, key_prefix: str):
        """Removes checkpoint files whose keys have the given prefix.

        Performs the following steps:
        1. checks whether there are files that start with the key_prefix
        2. loads metadata
        3. removes all entries from the metadata that start with the key_prefix
        4. resaves the new metadata and removes the old metadata
        5. removes the relevant files
        """

        assert is_torch_min_version(
            "2.3.0"
        ), f'torch >= 2.3.0 is required for remove_sharded_tensors'

        distckpt_files = [f for f in os.listdir(checkpoint_dir) if f.endswith("distcp")]
        files_to_remove = [f for f in distckpt_files if f.startswith(key_prefix)]

        if not files_to_remove:
            warnings.warn(
                f'There are no files in {checkpoint_dir} that begin with "{key_prefix}".'
                f' Skipping removal.'
            )
            return

        fs_reader = FileSystemReader(checkpoint_dir)
        original_metadata = fs_reader.read_metadata()

        new_state_dict_metadata = {}
        new_planner_data = {}
        new_storage_data = {}
        for k in original_metadata.state_dict_metadata.keys():
            if k.startswith(key_prefix):
                continue
            new_state_dict_metadata[k] = original_metadata.state_dict_metadata[k]
        original_planner_data = original_metadata.planner_data
        if original_planner_data is not None:
            for k in original_planner_data.keys():
                if k.startswith(key_prefix):
                    continue
                new_planner_data[k] = original_metadata.planner_data[k]
        original_storage_data = original_metadata.storage_data
        if original_storage_data is not None:
            for k in original_storage_data.keys():
                if k.fqn.startswith(key_prefix):
                    continue
                new_storage_data[k] = original_metadata.storage_data[k]
        metadata = Metadata(
            state_dict_metadata=new_state_dict_metadata,
            planner_data=new_planner_data,
            storage_data=new_storage_data,
        )
        fs_writer = FileSystemWriter(checkpoint_dir)
        metadata_filename = cast(Path, fs_writer.fs.concat_path(fs_writer.path, _metadata_fn))
        tmp_path = cast(
            metadata_filename,  # type: ignore[valid-type]
            fs_writer.fs.concat_path(fs_writer.path, f"{_metadata_fn}.tmp"),
        )
        old_path = cast(
            metadata_filename,  # type: ignore[valid-type]
            fs_writer.fs.concat_path(fs_writer.path, f"{_metadata_fn}.bck"),
        )
        ## save the new metadata
        with fs_writer.fs.create_stream(tmp_path, "wb") as metadata_file:
            pickle.dump(metadata, metadata_file)
            try:
                os.fsync(metadata_file.fileno())
            except AttributeError:
                os.sync()
        ## move the old metadata
        fs_writer.fs.rename(fs_writer.metadata_path, old_path)
        try:
            ## rename the new metadata
            fs_writer.fs.rename(tmp_path, fs_writer.metadata_path)

            ## finally, remove the files we want to drop
            for f in files_to_remove:
                fs_writer.fs.rm_file(checkpoint_dir / f)
        except Exception as e:
            fs_writer.fs.rename(old_path, fs_writer.metadata_path)
            raise e
        else:
            fs_writer.fs.rm_file(old_path)

    def can_handle_sharded_objects(self):
        return True

    def check_backend_compatibility(self, loaded_version):
        pass  # TODO

    def check_version_compatibility(self, loaded_version):
        pass  # TODO


================================================
FILE: megatron/core/dist_checkpointing/tensor_aware_state_dict.py
================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.

"""Utilities for transforming state_dict, including a tensor-aware implementation."""

import logging
from dataclasses import dataclass
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple

import torch

from .dict_utils import dict_list_map_inplace, dict_list_map_outplace, merge, nested_values
from .exchange_utils import (
    ShardDistribution,
    determine_main_replica_uniform_distribution,
    exchange_by_distribution,
)
from .mapping import ShardedObject, ShardedStateDict, ShardedTensor, StateDict, apply_factory_merges
from .state_dict_utils import load_preprocess, save_preprocess
from .utils import (
    _sharded_object_id,
    _sharded_tensor_shard_id,
    debug_time,
    extract_sharded_base,
    zip_strict,
)
from .validation import (
    StrictHandling,
    determine_global_metadata,
    parse_strict_flag,
    validate_integrity_and_strict_load,
)

logger = logging.getLogger(__name__)

try:
    from nvidia_resiliency_ext.checkpointing.local.base_state_dict import TensorAwareStateDict

    HAVE_NVRX = True
except ImportError:
    import types

    # Create a dummy class that mimics the real one
    TensorAwareStateDict = types.new_class("TensorAwareStateDict", ())
    HAVE_NVRX = False


@dataclass
class MCoreTensorAwareStateDict(TensorAwareStateDict):
    """
    MCore-specific class defining the interface between the MCore state dict and checkpoint manager.

    This class distinguishes between raw objects, the common state dict, and sharded state dicts
    (tensor parts). It also handles optional metadata needed for fully parallel save/load.
    """

    common: StateDict
    sharded_state_dict: ShardedStateDict
    _is_hollow: bool = False

    @staticmethod
    def _validate_params(algo):
        if algo != "atomic" and algo != "fully_parallel":
            raise NotImplementedError(
                'Only "atomic" and "fully_parallel" sharding algorithms are supported.'
            )

    @staticmethod
    def _get_distribution(
        fully_parallel, sharded_part, parallelization_group, cached_distribution=None
    ):
        if fully_parallel:
            if cached_distribution is None:
                distribution = determine_main_replica_uniform_distribution(
                    sharded_part, parallelization_group, True
                )
                logger.debug(f"MCore_TASD._get_distribution calculated distribution")
            else:
                distribution = cached_distribution
                logger.debug(f"MCore_TASD._get_distribution used cache")
        else:
            distribution = (None, None, None, None)
            logger.debug(f"MCore_TASD._get_distribution returned empty distribution")
        return distribution

    @staticmethod
    def _remove_redundant_data(
        fully_parallel, sharded_part, shard_to_saving_rank, parallelization_group
    ):
        if parallelization_group is None:
            parallelization_group = torch.distributed.group.WORLD
        if fully_parallel:
            for sh_base in nested_values(sharded_part):
                # TODO remove redundant objects as well
                if isinstance(sh_base, ShardedTensor):
                    shard_id = _sharded_tensor_shard_id(sh_base)
                    if shard_to_saving_rank[shard_id] != parallelization_group.rank():
                        sh_base.data = None

    @classmethod
    @debug_time("from_state_dict", logger)
    def from_state_dict(
        cls,
        sharded_state_dict: ShardedStateDict,
        algo: str = "fully_parallel",
        parallelization_group: Optional[torch.distributed.ProcessGroup] = None,
        cached_metadata: ShardDistribution = None,
    ) -> Tuple[TensorAwareStateDict, ShardDistribution]:
        """
        Constructs a TensorAwareStateDict from a sharded state dictionary.

        This method preprocesses the input `sharded_state_dict`, validates parameters,
        and extracts the necessary data to create an instance of `MCoreTensorAwareStateDict`.

        Args:
            sharded_state_dict: The input sharded state dictionary to be converted.
            algo (str, optional): Initialization algorithm. Defaults to 'fully_parallel'.
                - 'fully_parallel' enables fully parallel initialization.
            parallelization_group (Optional): A distributed process group for parallelization.
            cached_metadata (Optional): Precomputed metadata from previous saves.
                - Reuses data that doesn't need recalculation, optimizing the creation process.

        Returns:
            TensorAwareStateDict: An instance initialized with the provided sharded state dictionary
            and optional cached metadata.
            - The metadata is stored in memory to speed up future saves.
        """
        if not HAVE_NVRX:
            raise ImportError(
                "nvidia_resiliency_ext is not installed. "
                "Please install it with "
                "`pip install nvidia-resiliency-ext`"
            )

        with debug_time("_get_distribution", logger):
            cls._validate_params(algo)
            fully_parallel = algo == "fully_parallel"
            sharded_part, common_state_dict = save_preprocess(
                sharded_state_dict, cached_metadata is None
            )
            cacheable_distribution = cls._get_distribution(
                fully_parallel, sharded_part, parallelization_group, cached_metadata
            )
        if cacheable_distribution is not None:
            shard_to_saving_rank, _, _, _ = cacheable_distribution
            cls._remove_redundant_data(
                fully_parallel, sharded_part, shard_to_saving_rank, parallelization_group
            )

        return (
            MCoreTensorAwareStateDict(common=common_state_dict, sharded_state_dict=sharded_part),
            cacheable_distribution,
        )

    @property
    def is_hollow(self):
        """
        True iff tensors had been extracted and have not been inserted back yet.
        """
        return self._is_hollow

    @property
    def _sharded_tensors(self):
        # Three possible states for sharded_tensor:
        # 1. sharded_tensor with data (.data = tensor)
        # 2. sharded_tensor hollow (.data = None, .orig_device = orig_device)
        # 3. removed sharded_tensor (.data = None, no device information)
        # TODO: Consider simplifying by removing the entire sharded_tensor instead of just the data
        if self.is_hollow:
            for sh_base in nested_values(self.sharded_state_dict):
                # FIXME: Hacky way to store the original device of the popped tensor
                if isinstance(sh_base, ShardedTensor) and hasattr(sh_base, "orig_device"):
                    yield sh_base
        else:
            for sh_base in nested_values(self.sharded_state_dict):
                if isinstance(sh_base, ShardedTensor) and sh_base.data is not None:
                    yield sh_base

    @property
    def tensors(self) -> Iterator[torch.Tensor]:
        """
        Get the tensor data from the state dict.
        """
        assert not self.is_hollow  # TODO raise exception
        return map(lambda sh_ten: sh_ten.data, self._sharded_tensors)

    @property
    def common_state_dict(self) -> Dict:
        """
        Get the common state dict from the state dict.
        """
        return self.common

    def pop_tensors(self) -> List[torch.Tensor]:
        """
        Extracts the tensor data from the wrapped state dict, preserving metadata.

        Replaces the tensor data in sharded_tensors with device type of extracted tensors.
        After this operation, the state dictionary is "hollow", containing no tensor data.
        Further calls to `pop_tensor` will raise an error.

        @return List of extracted tensors
        """
        assert not self.is_hollow  # TODO raise exception
        result = []
        for sh_ten in self._sharded_tensors:
            result.append(sh_ten.data)
            # FIXME: Hacky way to store the original device, which is not included in the metadata
            setattr(sh_ten, "orig_device", sh_ten.data.device.type)
            sh_ten.data = None
        self._is_hollow = True
        return result

    def insert_tensors(self, tensor_data: Iterable[torch.Tensor]):
        """
        Reverse of `pop_tensors`. Replaces device type in sharded_tensors with actual values
        Value of `self` is considered to be the same after:
            ```
            self.insert_tensors(self.pop_tensors())
            ```
        """
        assert self.is_hollow  # TODO raise exception
        for sh_ten, ten in zip_strict(self._sharded_tensors, tensor_data):
            # FIXME: Hacky way to store the original device
            if sh_ten.orig_device == ten.device.type:
                delattr(sh_ten, "orig_device")
            # Tensor might be on non-original device
            sh_ten.data = ten
        self._is_hollow = False

    def init_tensors(self):
        """
        Initializes empty tensors with the same properties as the original tensors.

        This function should only be called after the original tensors have been popped.
        It ensures that the newly created empty tensors match the shape,
        dtype, and device of the originals, but contain no data.
        """
        assert self.is_hollow  # TODO raise exception
        for sh_ten in self._sharded_tensors:
            # Hacky way to retrieve the original device
            sh_ten.init_data(sh_ten.orig_device)
            delattr(sh_ten, "orig_device")
        self._is_hollow = False

    def copy_tensors_to_cpu(self, non_blocking=False):
        """
        Stores CPU copies of tensors in the state_dict, replacing the originals,
        but without destroying them.
        The original devices are remembered for restoration with restore_tensor_device().
        Using non_blocking=True allows for asynchronous copying.
        """
        assert not self.is_hollow  # TODO raise exception
        for sh_ten in self._sharded_tensors:
            if sh_ten.data.device.type == "cpu":
                # Skip cloning if it's already confirmed to be a copy
                if not hasattr(sh_ten, "orig_device"):
                    sh_ten.data = sh_ten.data.clone()
            else:
                # FIXME: Hacky way to store the original device
                if not hasattr(sh_ten, "orig_device"):
                    setattr(sh_ten, "orig_device", sh_ten.data.device.type)
                sh_ten.data = sh_ten.data.detach().to("cpu", non_blocking=non_blocking)

    def restore_tensor_device(self, non_blocking=True):
        """
        Restores all tensors to their original devices, if a move is required.
        Using non_blocking=True allows for asynchronous copying.
        """
        assert not self.is_hollow  # TODO raise exception
        for sh_ten in self._sharded_tensors:
            # FIXME: Hacky way to store the original device
            if hasattr(sh_ten, "orig_device"):
                sh_ten.data = sh_ten.data.to(sh_ten.orig_device, non_blocking=non_blocking)
                delattr(sh_ten, "orig_device")

    def _insert_sharded_data(
        self, fully_parallel, sharded_part, parallelization_group, exchange_algo
    ):
        loaded_tensors = {}
        for sh_ten in self._sharded_tensors:
            loaded_tensors[_sharded_tensor_shard_id(sh_ten)] = sh_ten.data
        if fully_parallel:
            with debug_time("_get_distribution", logger):
                distribution = self._get_distribution(
                    fully_parallel, sharded_part, parallelization_group
                )
            if distribution is not None:
                unloaded_shards = {}
                for sh_base in nested_values(sharded_part):
                    # TODO retrieve redundant ShardedObjects once removed in _remove_redundant_data
                    if isinstance(sh_base, ShardedTensor):
                        shard_id = _sharded_tensor_shard_id(sh_base)
                        if shard_id not in loaded_tensors:
                            unloaded_shards[shard_id] = sh_base

                with debug_time("exchange_by_distribution", logger):
                    loaded_tensors = exchange_by_distribution(
                        loaded_tensors,
                        unloaded_shards,
                        distribution,
                        parallelization_group,
                        exchange_algo,
                    )
                    torch.cuda.synchronize()
        loaded_objects = {}
        for sh_base in nested_values(self.sharded_state_dict):
            if not isinstance(sh_base, ShardedTensor):
                assert isinstance(sh_base, ShardedObject)
                loaded_objects[_sharded_object_id(sh_base)] = sh_base.data

        def load_sharded_base(x: Any):
            if isinstance(x, ShardedTensor):
                shard_id = _sharded_tensor_shard_id(x)
                assert shard_id in loaded_tensors, (x, shard_id, loaded_tensors.keys())
                x = loaded_tensors[shard_id]
            if isinstance(x, ShardedObject):
                object_id = _sharded_object_id(x)
                assert object_id in loaded_objects, (x, object_id, loaded_objects.keys())
                x = loaded_objects[object_id]
            return x

        dict_list_map_inplace(load_sharded_base, sharded_part)

    @debug_time("to_state_dict", logger)
    def to_state_dict(
        self,
        sharded_state_dict: ShardedStateDict,
        algo: str = "atomic",
        exchange_algo: str = "broadcast",
        validate_access_integrity: bool = True,
        parallelization_group: Optional[torch.distributed.ProcessGroup] = None,
        strict: StrictHandling = StrictHandling.ASSUME_OK_UNEXPECTED,
        return_mismatch_keys: bool = False,
    ):
        """
        Convert tensor-aware dict back to the original state_dict
        """
        with debug_time("load_preprocess_and_state_dict_manipulations", logger):
            assert not self.is_hollow  # TODO raise exception
            self._validate_params(algo)
            fully_parallel = algo == "fully_parallel"

            # __adding__ common part
            recreated_state_dict = dict_list_map_outplace(lambda x: x, self.common)

            if not sharded_state_dict:
                return recreated_state_dict
            # TODO validate self.sharded_state_dict"] and sharded_state_dict are compatible

            sharded_state_dict, nonpersistent_state_dict, sh_ten_factories = load_preprocess(
                sharded_state_dict
            )
            # __adding__ nonpersistent part
            merge(recreated_state_dict, nonpersistent_state_dict)

            sharded_part, _ = extract_sharded_base(sharded_state_dict)

        # Strictness
        ckpt_sharded_metadata = None
        local_metadata, global_metadata = None, None
        strict = parse_strict_flag(strict)

        if StrictHandling.requires_explicit_ckpt_mismatch_check(strict):
            ckpt_sharded_metadata = {
                sh_base.key: sh_base.without_data()
                for sh_base in nested_values(self.sharded_state_dict)
            }

        if validate_access_integrity or StrictHandling.requires_global_app_metadata(strict):
            local_metadata, global_metadata = determine_global_metadata(sharded_part)

        sharded_state_dict, missing_keys, unexpected_keys = validate_integrity_and_strict_load(
            sharded_part,
            strict,
            validate_access_integrity,
            local_metadata,
            global_metadata,
            ckpt_sharded_metadata,
        )

        # load sharded tensors and sharded objects to sharded_part
        with debug_time("_insert_sharded_data", logger):
            self._insert_sharded_data(
                fully_parallel, sharded_part, parallelization_group, exchange_algo
            )
        with debug_time("apply_factory_merges", logger):
            sharded_part = apply_factory_merges(sharded_part, sh_ten_factories)
            # __adding__ sharded_part
            merge(recreated_state_dict, sharded_part)

        if return_mismatch_keys:
            return recreated_state_dict, missing_keys, unexpected_keys
        else:
            return recreated_state_dict


================================================
FILE: megatron/core/dist_checkpointing/utils.py
================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.

""" Helpers for manipulating sharded tensors and sharded state dicts. """
import logging
from contextlib import contextmanager
from time import time
from typing import Dict, Optional, Tuple

from .dict_utils import dict_list_map_inplace, extract_matching_values, nested_values
from .mapping import (
    LocalNonpersistentObject,
    ShardedBase,
    ShardedObject,
    ShardedStateDict,
    ShardedTensor,
    ShardedTensorFactory,
    StateDict,
)

# _ShardId uniquely identifies a ShardedTensor. This is a subset of ShardedTensor
# attributes: key (str), global_offset (tuple) and flattened_range (optional tuple)
_ShardId = Tuple[str, tuple, Optional[tuple]]


def zip_strict(*args):
    """
    Alternative to Python's builtin zip(..., strict=True) (available in 3.10+).
    Apart from providing functionality in earlier versions of Python is also more verbose.
    (Python's zip does not print lengths, only which iterable has finished earlier)
    """
    args = [list(a) for a in args]
    lens = [len(a) for a in args]
    assert len(set(lens)) <= 1, f"Tried to zip iterables of unequal lengths: {lens}!"
    return zip(*args)


def _sharded_tensor_shard_id(sharded_tensor: ShardedTensor) -> _ShardId:
    """Unique id of the sharded tensor data.

    Should yield the same value for same data replicated on different ranks.

    Args:
        sharded_tensor (ShardedTensor): sharded tensor representing the data shard

    Returns (tuple): unique id of a data shard
    """
    f_range = sharded_tensor.flattened_range
    return (
        sharded_tensor.key,
        sharded_tensor.global_offset,
        None if f_range is None else (f_range.start, f_range.stop),
    )


def _sharded_object_id(sharded_object: ShardedObject) -> _ShardId:
    """Unique id of the sharded object data.

    Should yield the same value for same data replicated on different ranks.

    Args:
        sharded_object (ShardedObject): sharded object representing the data shard

    Returns (tuple): unique id of a data shard
    """
    return (sharded_object.key, sharded_object.global_offset, sharded_object.global_shape)


def extract_sharded_tensors(
    sharded_state_dict: ShardedStateDict,
) -> Tuple[ShardedStateDict, StateDict]:
    """Extract a dict consisting of only ShardedTensor objects
    from a given state dict with any objects.

    Args:
        sharded_state_dict: state dict possibly containing ShardedTensor objects

    Returns:
        Tuple[ShardedStateDict, StateDict]: tuple of:
            - state dict with all ShardedTensor (keeping the original state dict structure)
            - state dict with all objects other than ShardedTensor
              (keeping the original state dict structure)
    """
    return extract_matching_values(sharded_state_dict, lambda v: isinstance(v, ShardedTensor))


def extract_sharded_tensors_and_factories(
    sharded_state_dict: ShardedStateDict,
) -> Tuple[ShardedStateDict, StateDict]:
    """Extract a dict consisting of only ShardedTensor and ShardedTensorFactory objects
    from a given state dict with any objects.

    Args:
        sharded_state_dict:
            state dict possibly containing ShardedTensor and ShardedTensorFactory objects

    Returns:
        Tuple[ShardedStateDict, StateDict]: tuple of:
            - state dict with all ShardedTensor and ShardedTensorFactory
              (keeping the original state dict structure)
            - state dict with all other objects (keeping the original state dict structure)
    """
    return extract_matching_values(
        sharded_state_dict, lambda v: isinstance(v, (ShardedTensor, ShardedTensorFactory))
    )


def extract_sharded_tensors_or_nonpersistent(
    sharded_state_dict: ShardedStateDict,
) -> Tuple[ShardedStateDict, StateDict]:
    """Extract a dict consisting of only ShardedTensor, ShardedTensorFactory
    and LocalNonpersistentObject objects from a given state dict with any objects.

    Args:
        sharded_state_dict: state dict possibly containing ShardedTensor, ShardedTensorFactory
        and LocalNonpersistentObject objects

    Returns:
        Tuple[ShardedStateDict, StateDict]: tuple of:
            - state dict with all ShardedTensor, ShardedTensorFactory and LocalNonpersistentObject
              (keeping the original state dict structure)
            - state dict with all other objects (keeping the original state dict structure)
    """
    return extract_matching_values(
        sharded_state_dict,
        lambda v: isinstance(v, (ShardedTensor, LocalNonpersistentObject, ShardedTensorFactory)),
    )


def extract_sharded_base(
    sharded_state_dict: ShardedStateDict,
) -> Tuple[ShardedStateDict, StateDict]:
    """Extract a dict consisting of only ShardedBase from a given state dict with any objects.

    Args:
        sharded_state_dict: state dict possibly containing ShardedBase objects

    Returns:
        Tuple[ShardedStateDict, StateDict]: tuple of:
            - state dict with all ShardedBase objects (keeping the original state dict structure)
            - state dict with all other objects (keeping the original state dict structure)
    """
    return extract_matching_values(sharded_state_dict, lambda v: isinstance(v, ShardedBase))


def extract_nonpersistent(
    sharded_state_dict: ShardedStateDict,
) -> Tuple[ShardedStateDict, StateDict]:
    """Extract a dict consisting of only LocalNonpersistentObjects from a given state dict.

    Args:
        sharded_state_dict: state dict possibly containing LocalNonpersistentObjects

    Returns:
        Tuple[ShardedStateDict, StateDict]: tuple of:
            - state dict with all LocalNonpersistentObjects
              (keeping the original state dict structure)
            - state dict with all other objects (keeping the original state dict structure)
    """

    return extract_matching_values(
        sharded_state_dict, lambda v: isinstance(v, LocalNonpersistentObject)
    )


def add_prefix_for_sharding(sharded_state_dict: ShardedStateDict, prefix: str):
    """Prepend a given prefix to all ShardedBase objects in a given state dict *in-place*.

    Args:
        sharded_state_dict (ShardedStateDict): sharded state dict
        prefix (str): prefix to be prepended

    Returns:
        None: state dict is modified in-place
    """

    def add_prefix(t):
        if isinstance(t, ShardedBase):
            t.key = f'{prefix}{t.key}'
        return t

    dict_list_map_inplace(add_prefix, sharded_state_dict)


def replace_prefix_for_sharding(
    sharded_state_dict: ShardedStateDict, old_prefix: str, new_prefix: str
):
    """Replaces the given prefix in *all* sharded keys in a given state dict.

    Errors out if some key does not begin with a given prefix.

    Args:
        sharded_state_dict (ShardedStateDict): sharded state dict to replace keys in
        old_prefix (str): prefix to be replaced in each key
        new_prefix (str): new prefix

    Returns:
        None: state dict is modified in place
    """

    def _replace_prefix(x):
        if isinstance(x, (ShardedTensor, ShardedTensorFactory, ShardedObject)):
            if not x.key.startswith(old_prefix):
                raise ValueError(f'Expected {x.key} to begin with prefix {old_prefix}')
            x.key = f'{new_prefix}{x.key[len(old_prefix):]}'  # str.removeprefix in Python >= 3.9
        return x

    dict_list_map_inplace(_replace_prefix, sharded_state_dict)


def apply_prefix_mapping(sharded_state_dict: ShardedStateDict, prefix_map: Dict[str, str]):
    """Replaces prefixes *only in keys matching* with one of prefixes in the map.

    Args:
        sharded_state_dict (ShardedStateDict): sharded state dict to replace keys in
        prefix_map (Dict[str, str]):
            map of old->new prefixes. The first matching prefix for each key is used

    Returns:
        None: state dict is modified in place
    """

    def _replace_prefixes(x):
        if not isinstance(x, (ShardedTensor, ShardedTensorFactory, ShardedObject)):
            return x
        for old_prefix, new_prefix in prefix_map.items():
            if x.key.startswith(old_prefix):
                x.key = (
                    f'{new_prefix}{x.key[len(old_prefix):]}'  # str.removeprefix in Python >= 3.9
                )
                break
        return x

    dict_list_map_inplace(_replace_prefixes, sharded_state_dict)


def force_all_tensors_to_non_fp8(sharded_state_dict: ShardedStateDict):
    """Force all tensors in state dict to be non-fp8.

    Args:
        sharded_state_dict (ShardedStateDict): sharded state dict.
    """
    from ..fp8_utils import dequantize_fp8_tensor, is_float8tensor  # Avoid circular import

    for v in nested_values(sharded_state_dict):
        if hasattr(v, "data") and is_float8tensor(v.data):
            v.data = dequantize_fp8_tensor(v.data)


fallback_logger = logging.getLogger(__name__)
__LOGGER_NAME_STACK = []
__LOGGER_STACK = []


@contextmanager
def logger_stack(name: Optional[str] = None, current_logger: Optional[logging.Logger] = None):
    """Context manager for managing logger and name stack.

    Temporarily pushes a logger and/or name onto their respective stacks, allowing hierarchical
    logging and contextual logger usage. Ensures the logger stack is restored afterward.

    Args:
        name (str, optional): Name to add to the logger stack. Defaults to None.
        current_logger (logging.Logger, optional): Logger to use. Defaults to the last logger in
                                                  the stack or a fallback if none exist.

    Yields:
        Tuple[str, logging.Logger]: A tuple with the concatenated logger name stack and
                                    the current logger for the block.

    Example:
        with logger_stack("scope", logger):
            logger.info("Log within 'scope'")
    """
    if name:
        __LOGGER_NAME_STACK.append(name)
    if current_logger:
        __LOGGER_STACK.append(current_logger)
        last_logger = current_logger
    elif __LOGGER_STACK:
        last_logger = __LOGGER_STACK[-1]
    else:
        last_logger = fallback_logger
    try:
        yield ".".join(__LOGGER_NAME_STACK), last_logger
    finally:
        if name and __LOGGER_NAME_STACK:
            __LOGGER_NAME_STACK.pop(-1)
        if current_logger and __LOGGER_STACK:
            __LOGGER_STACK.pop(-1)


@contextmanager
def debug_time(
    name: str, logger: Optional[logging.Logger] = None, threshold: float = float("-inf"), level=None
):
    """Simple context manager for timing functions/code blocks.

    Args:
        name (str): Label describing the code being measured.
        logger (logging.Logger, optional): Logger for output. Defaults to the lowest logger.
        threshold (float, optional): Minimum time (seconds) to log. Skips logging if faster.
        level (int, optional): Logging level. Defaults to DEBUG if `threshold` is unset;
                               WARNING otherwise.
    """
    with logger_stack(name, logger) as (stacked_name, last_logger):
        start = time()
        try:
            yield
        finally:
            result = time() - start
            if result < threshold:
                return
            if level is None:
                level = logging.DEBUG if threshold == float("-inf") else logging.WARNING
            last_logger.log(level, f"{stacked_name} took {result:.4f}s")


def debug_msg(msg: str):
    """Logs a debug message using the current logger stack.

    This function formats and logs a debug message with the current logger
    and name stack, preserving context from the logger_stack context manager.

    Args:
        msg (str): The message to be logged at the debug level.

    Example:
        debug_msg("Checkpoint initialized")
        # Logs: "scope_name Checkpoint initialized" if called within logger_stack("scope_name")
    """
    with logger_stack(None, None) as (stacked_name, last_logger):
        last_logger.debug(f"{stacked_name} {msg}")


def _clean_metadata_for_serialization(metadata: dict) -> dict:
    """Create a clean copy of metadata for serialization by removing non-serializable objects.

    Args:
        metadata: Original metadata dict

    Returns:
        Clean metadata dict suitable for serialization
    """
    if metadata is None:
        return None
    clean_metadata = metadata.copy()
    # Remove dp_cp_group as it's not serializable
    clean_metadata.pop('dp_cp_group', None)
    return clean_metadata


================================================
FILE: megatron/core/dist_checkpointing/validation.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import logging
import os
from collections import Counter, defaultdict
from enum import Enum
from typing import TYPE_CHECKING, List, Optional, Set, Tuple, Union

import numpy as np
import torch

from megatron.core.dist_checkpointing import ShardedTensor
from megatron.core.dist_checkpointing.core import CheckpointingException, maybe_load_config
from megatron.core.dist_checkpointing.dict_utils import diff, extract_matching_values, nested_values
from megatron.core.dist_checkpointing.mapping import (
    CommonStateDict,
    ShardedBase,
    ShardedObject,
    ShardedStateDict,
    is_main_replica,
)
from megatron.core.dist_checkpointing.strategies.base import (
    LoadCommonStrategy,
    LoadShardedStrategy,
    SaveCommonStrategy,
    SaveShardedStrategy,
    StrategyAction,
    get_default_strategy,
)
from megatron.core.msc_utils import MultiStorageClientFeature

if TYPE_CHECKING:
    from megatron.core.dist_checkpointing.serialization import CkptShardedMetadata


logger = logging.getLogger(__name__)
# pylint: disable=line-too-long
# list of local saved/loaded ShardedBase objects
_LocalMetadata = List[Union[ShardedTensor, ShardedObject]]
# list of lists of global saved/loaded ShardedBase objects (each element corresponds to global rank)
_GlobalMetadata = List[_LocalMetadata]


class StrictHandling(Enum):
    """Determines handling of load mismatch (non-empty "unexpected" or "missing" keys).

    Different flags carry different implications on performance and behaviour and
    are divided into two groups:
    - *_UNEXPECTED
    - *_ALL
    The first group ignores missing keys (present in the checkpoint but missing
    in the sharded state dict) which is created in order to avoid inter-rank
    metadata exchange. Note that the metadata exchange will happen anyway
    with `load(..., validate_access_integrity=True)` flag in which case using the
    `*_ALL` option is recommended as it provides a more thorough check with no
    performance penalty wrt. `*_UNEXPECTED` group.

    All options except for the first one (`ASSUME_OK_UNEXPECTED`) require
    extra disk access before the load in order to remove unexpected keys
    from the sharded state dict requested to load.
    """

    # Relies on the underlying strategy to raise error on unexpected keys
    ASSUME_OK_UNEXPECTED = "assume_ok_unexpected"
    # Logs (with WARNING level) "unexpected" keys. Missing keys are ignored.
    # This is treated as a reasonable default for a "non-strict" load
    LOG_UNEXPECTED = "log_unexpected"
    # Logs (with WARNING level) all mismatched keys.
    LOG_ALL = "log_all"
    # Raise error on unexpected keys before load attempt.
    # Gives cleaner error message than `ASSUME_OK_UNEXPECTED` but requires
    # extra disk access.
    RAISE_UNEXPECTED = "raise_unexpected"
    # Raise error on any mismatch. Similar to `RAISE_UNEXPECTED` but requires
    # metadata exchange.
    RAISE_ALL = "raise_all"
    # "Unexpected" mismatches are not reported, but returned by the `load`
    # function along with the loaded state dict. Missing keys are ignored.
    RETURN_UNEXPECTED = "return_unexpected"
    # All mismatches are returned along with the loaded state dict.
    RETURN_ALL = "return_all"
    # Simply ignores mismatches (not recommended)
    IGNORE_ALL = "ignore_all"

    @staticmethod
    def requires_explicit_ckpt_mismatch_check(val: "StrictHandling") -> bool:
        """Whether a given strict flag involves mismatch check against the checkpoint."""
        return val != StrictHandling.ASSUME_OK_UNEXPECTED

    @staticmethod
    def requires_global_app_metadata(val: "StrictHandling") -> bool:
        """Whether a given strict option requires global metadata for validation."""
        return val in (
            StrictHandling.IGNORE_ALL,
            StrictHandling.RAISE_ALL,
            StrictHandling.RETURN_ALL,
            StrictHandling.LOG_ALL,
        )

    @staticmethod
    def requires_returning_mismatch_keys(val: "StrictHandling") -> bool:
        """Whether a given strict option results in extra return value from the `load` function."""
        return val in (StrictHandling.RETURN_UNEXPECTED, StrictHandling.RETURN_ALL)


def parse_strict_flag(strict: Union[str, StrictHandling]) -> StrictHandling:
    """Parse user passed strict flag from a string to StrictHandling instance.

    Args:
        strict (str, StrictHandling): strict flag to parse. If already an instance
            of StrictHandling, this function is a noop.

    Returns:
        StrictHandling: enum instance
    """
    if isinstance(strict, StrictHandling):
        return strict
    try:
        return StrictHandling(strict)
    except (ValueError, TypeError) as e:
        raise ValueError(f"Invalid strict flag: {e}") from e


def validate_integrity_and_strict_load(
    sharded_state_dict: ShardedStateDict,
    strict: StrictHandling,
    validate_access_integrity: bool,
    local_metadata: Optional[_LocalMetadata] = None,
    global_metadata: Optional[_GlobalMetadata] = None,
    ckpt_sharded_metadata: Optional["CkptShardedMetadata"] = None,
) -> Tuple[ShardedStateDict, Set[str], Set[str]]:
    """Validates sharding integrity and potential mismatches with the checkpoint.

    `validate_access_integrity` controls sharding integrity check (orthogonal
    to strictness checking) which verifies `sharded_state_dict` runtime completeness
    (in isolation from the actual checkpoint).

    `strict` flag controls handling of mismatches between the requested
    sharded state dict to load and the actual checkpoint. See `StrictHandling`
    docs for details regarding flag behavior and performance implications
    (disk interactions or inter-rank communication).

    Args:
        sharded_state_dict (ShardedStateDict): sharded state dict to verify.
        strict (StrictHandling): flag determining how to handle sharded keys mismatch.
        validate_access_integrity (bool): whether to perform sharding validation.
        local_metadata (_LocalMetadata, optional): local sharded state dict metadata.
            Defaults to None, in which case it's determined based on `sharded_state_dict`.
        global_metadata (_GlobalMetadata, optional): global sharded state dict metadata
            (exchanged between ranks). Defaults to None, in which case "missing"
            keys are not determined.
        ckpt_sharded_metadata (CkptShardedMetadata, optional): sharded metadata
            from the checkpoint. Defaults to None, which only makes sense
            for the `StrictHandling.ASSUME_OK_UNEXPECTED` strict value.

    Returns:
        Tuple[ShardedStateDict, Set[str], Set[str]]: tuple of: sharded state dict
            without unexpected keys, missing and unexpected keys. Missing keys are equal
            on all ranks, unexpected keys might differ across ranks. Additionally,
            missing keys might be erroneously empty (depending on `strict` value).
    """
    missing_keys, unexpected_keys = set(), set()
    if StrictHandling.requires_explicit_ckpt_mismatch_check(strict):
        if ckpt_sharded_metadata is None:
            raise CheckpointingException(
                "Cannot verify checkpoint mismatch with ckpt_sharded_metadata=None."
            )
        if local_metadata is None:
            local_metadata = [
                sh_base.without_data() for sh_base in nested_values(sharded_state_dict)
            ]
        # We don't want to check for missing keys even if we could
        _skip_missing_keys = strict in (
            StrictHandling.ASSUME_OK_UNEXPECTED,
            StrictHandling.LOG_UNEXPECTED,
            StrictHandling.RAISE_UNEXPECTED,
            StrictHandling.RETURN_UNEXPECTED,
        )
        missing_keys, unexpected_keys = _determine_missing_and_unexpected_keys(
            ckpt_sharded_metadata, local_metadata, None if _skip_missing_keys else global_metadata
        )

        sharded_state_dict = adjust_non_strict_load(sharded_state_dict, unexpected_keys)

        if strict == StrictHandling.IGNORE_ALL:
            missing_keys, unexpected_keys = set(), set()
        elif strict in (StrictHandling.RAISE_UNEXPECTED, StrictHandling.RAISE_ALL):
            maybe_report_missing_and_unexpected_keys(missing_keys, unexpected_keys, True)
        elif strict in (StrictHandling.LOG_UNEXPECTED, StrictHandling.LOG_ALL):
            maybe_report_missing_and_unexpected_keys(missing_keys, unexpected_keys, False)

    if validate_access_integrity:
        if global_metadata is None:
            raise CheckpointingException(
                "Cannot check sharding intergrity without global_metadata (None)."
            )
        validate_sharding_integrity(global_metadata)

    return sharded_state_dict, missing_keys, unexpected_keys


def verify_checkpoint_and_load_strategy(
    checkpoint_dir: str,
    sharded_strategy: Union[LoadShardedStrategy, Tuple[str, int], None] = None,
    common_strategy: Union[LoadCommonStrategy, Tuple[str, int], None] = None,
    cache_metadata: bool = False,
) -> Tuple[LoadShardedStrategy, LoadCommonStrategy]:
    """Verifies if checkpoint metadata exists and matches given strategies.

    If no strategies are passed, they are determined based on the checkpoint metadata.

    Args:
        checkpoint_dir (str): checkpoint directory
        sharded_strategy (LoadShardedStrategy, Tuple[str, int], optional): sharded load strategy to be verified
            if compatible with the checkpoint content. If None, the default sharded load strategy
            for the checkpoint backend will be returned.
        common_strategy (LoadCommonStrategy, Tuple[str, int], optional): common load strategy to be verified
            if compatible with the checkpoint content. If None, the default common load strategy
            for the checkpoint backend will be returned.
        cache_metadata (bool): if True and checkpoint backend is torch_dist, use a load strategy that caches
            metadata (e.g. when ckpt_assume_constant_structure is enabled). Ignored if sharded_strategy is set.
    """
    isdir = True
    if MultiStorageClientFeature.is_enabled():
        msc = MultiStorageClientFeature.import_package()
        isdir = msc.os.path.isdir(str(checkpoint_dir), strict=False)
    else:
        isdir = os.path.isdir(checkpoint_dir)
    if not isdir:
        raise CheckpointingException(f"Checkpoint directory {checkpoint_dir} does not exist")

    saved_config = maybe_load_config(checkpoint_dir)
    if saved_config is None:
        raise CheckpointingException(f"{checkpoint_dir} is not a distributed checkpoint")

    if sharded_strategy is None:
        if cache_metadata and saved_config.sharded_backend == 'torch_dist':
            from megatron.core.dist_checkpointing.strategies.torch import (
                TorchDistLoadShardedStrategy,
            )

            sharded_strategy = TorchDistLoadShardedStrategy(cache_metadata=True)
        else:
            sharded_strategy = get_default_strategy(
                StrategyAction.LOAD_SHARDED,
                saved_config.sharded_backend,
                saved_config.sharded_backend_version,
            )
    elif isinstance(sharded_strategy, tuple):
        sharded_strategy = get_default_strategy(StrategyAction.LOAD_SHARDED, *sharded_strategy)

    if common_strategy is None:
        common_strategy = get_default_strategy(
            StrategyAction.LOAD_COMMON,
            saved_config.common_backend,
            saved_config.common_backend_version,
        )
    elif isinstance(common_strategy, tuple):
        sharded_strategy = get_default_strategy(StrategyAction.LOAD_COMMON, *common_strategy)

    sharded_strategy.check_backend_compatibility(saved_config.sharded_backend)
    sharded_strategy.check_version_compatibility(saved_config.sharded_backend_version)
    common_strategy.check_backend_compatibility(saved_config.common_backend)
    common_strategy.check_version_compatibility(saved_config.common_backend_version)
    return sharded_strategy, common_strategy


def adjust_non_strict_load(
    sharded_state_dict: ShardedStateDict, sharded_keys_to_remove: Set[str]
) -> ShardedStateDict:
    """Adjusts sharded state dict removing keys not existing in the checkpoint.

    Args:
        sharded_state_dict (ShardedStateDict): sharded state dict to modify
        sharded_keys_to_remove (Set[str]): keys to remove from the state dict

    Returns:
        ShardedStateDict: state dict without ShardedBase objects with specified keys
    """

    def is_unexpected_key(x: ShardedBase):
        assert isinstance(x, ShardedBase), f"Unexpected type {type(x)}"
        return x.key in sharded_keys_to_remove

    _, sharded_state_dict = extract_matching_values(sharded_state_dict, is_unexpected_key)
    return sharded_state_dict


def _determine_missing_and_unexpected_keys(
    ckpt_sharded_metadata: "CkptShardedMetadata",
    local_metadata: _LocalMetadata,
    global_metadata: Optional[_GlobalMetadata] = None,
) -> Tuple[Set[str], Set[str]]:
    """Determines load mismatches based on metadata.

    There is an asymmetry between "unexpected" and "missing" keys.
    Unexpected keys can be determined based only on local metadata.
    Missing keys must be based on global metadata, since other ranks might access
    different keys than the current rank.
    In consequence, the return value of this function is different on each rank:
    "missing_keys" are equal, but "unexpected_keys" might differ across ranks.

    Args:
        ckpt_sharded_metadata (CkptShardedMetadata): sharded state dict (without data)
            constructed based on the checkpoint content
        local_metadata (_LocalMetadata): list of local ShardedBase objects
            requested to be loaded by this rank
        global_metadata (_GlobalMetadata, optional): list of global ShardedBase objects
            requested to be loaded by all ranks. Defaults to None, in which case
            returned "missing" keys are empty.

    Returns:
        Tuple[Set[str], Set[str]]: missing and unexpected keys. Missing keys are equal
            on all ranks, unexpected keys might differ across ranks. If passed
            `global_metadata` is empty, returned missing keys are empty as well.

    """
    local_accessed_keys = set(sh_base.key for sh_base in local_metadata)
    ckpt_keys = set(sh_base.key for sh_base in ckpt_sharded_metadata.values())
    unexpected_keys = local_accessed_keys - ckpt_keys
    if global_metadata is not None:
        global_accessed_keys = set(
            sh_base.key for rank_metadata in global_metadata for sh_base in rank_metadata
        )
        missing_keys = ckpt_keys - global_accessed_keys
    else:
        missing_keys = set()

    if missing_keys:
        logger.debug(f"Dist ckpt load missing keys: {missing_keys}")
    if unexpected_keys:
        logger.debug(f"Dist ckpt load unexpected keys: {unexpected_keys}")

    return missing_keys, unexpected_keys


def maybe_report_missing_and_unexpected_keys(
    missing_keys: Set[str], unexpected_keys: Set[str], raise_error: bool = True
) -> None:
    """Raises or logs an error in case missing or unexpected keys are non-empty.

    Args:
        missing_keys (Set[str]): missing keys in the state dict
        unexpected_keys (Set[str]): unexpected keys in the state dict
        raise_error: If True, raises error on mismatch. Otherwise, logs mismatch
            with WARNING level.

    Returns:
        None

    Raises:
        CheckpointingException: if `raise_error` is True and at least one of
        `missing_keys` or `unexpected_keys` are non-empty.
    """
    if not missing_keys and not unexpected_keys:
        return
    missing_title_msg = (
        f"Some keys found in the checkpoint are missing in the provided sharded state dict. "
    )
    missing_body_msg = f"Missing keys (for all ranks): {missing_keys}. "
    unexpected_title_msg = f"Unexpected keys (not found in the checkpoint) encountered in the provided sharded state dict. "
    unexpected_body_msg = f"Unexpected keys (for this rank): {unexpected_keys}. "
    error_msg = ""
    if missing_keys:
        error_msg += missing_title_msg
    if unexpected_keys:
        error_msg += unexpected_title_msg

    error_msg += "\n"
    if missing_keys:
        error_msg += missing_body_msg
    if unexpected_keys:
        error_msg += unexpected_body_msg

    if raise_error:
        raise CheckpointingException(error_msg)
    else:
        logger.warning(error_msg)


def _validate_common_state_dict(common_state_dict: CommonStateDict) -> None:
    """Validate consistancy across ranks for the common state dict

    We save the common state dict only on rank 0. We validate to make sure that the common dict is consistent across ranks before saving.

    Args:
        common_state_dict: The common state dict present in all ransk
    """
    if not torch.distributed.is_initialized():
        return

    # Broadcast the common state dict from rank 0 to all other ranks
    # Each rank will do a comparison with its local rank vs the broadcasted state dict from rank 0
    rank = torch.distributed.get_rank()

    object_list = [common_state_dict] if rank == 0 else [None]
    torch.distributed.broadcast_object_list(object_list, src=0)
    rank0_state_dict = object_list[0]

    # Skip comparing rank 0 with itself
    if rank > 0:
        current_rank_state_dict = common_state_dict
        only_in_rank0, only_in_current_rank, mismatch = diff(
            rank0_state_dict, current_rank_state_dict
        )
        if only_in_rank0 or only_in_current_rank or mismatch:
            logger.warning(
                f"Rank {rank} common state dict differs from rank 0 common state dict. "
                f"Keys only on rank 0: {only_in_rank0}, "
                f"Keys only on {rank}: {only_in_current_rank}, "
                f"Mismatched keys: {mismatch}"
            )


def validate_sharding_integrity(
    global_metadata: _GlobalMetadata, common_state_dict: CommonStateDict = None
) -> None:
    """Validate if the ShardedTensors and ShardedObjects from multiple processes define correct sharding.

    Local ShardedTensors and ShardedObject metadata is exchanged with `torch.distributed.all_gather_object`
    and then process with global rank 0 checks if main replicas of the shards:
    - cover the whole global tensors
    - don't overlap

    Args:
        global_metadata (_GlobalMetadata): ShardedTensor and ShardedObject objects from all ranks.
        common_state_dict (CommonStateDict): The common state dict stored by rank 0

    Returns:
        None

    Raises:
        CheckpointingException for invalid access pattern
    """

    if common_state_dict is not None:
        _validate_common_state_dict(common_state_dict)

    if torch.distributed.get_rank() != 0:
        return

    key_shardings = defaultdict(list)
    for rank, rank_shardings in enumerate(global_metadata):
        for sharding in rank_shardings:
            key_shardings[sharding.key].append((rank, sharding))
    errors = []
    for key, shardings in key_shardings.items():
        if isinstance(shardings[0][1], ShardedObject):
            errors.extend(_validate_objects_for_key(shardings))
        else:
            errors.extend(_validate_sharding_for_key(shardings))

    if errors:
        errors = '\n'.join(str(e) for e in errors)
        raise CheckpointingException(f'Invalid sharding pattern validation. Errors: {errors}')


def _validate_sharding_for_key(
    rank_sharding: List[Tuple[int, ShardedTensor]]
) -> List[CheckpointingException]:
    some_rank_shard = rank_sharding[0][1]
    global_shape = some_rank_shard.global_shape
    local_shape = some_rank_shard.local_shape
    dtype = some_rank_shard.dtype
    has_regular_sharding_grid = some_rank_shard.has_regular_grid
    for rank, sharding in rank_sharding:
        assert sharding.dtype == dtype, (sharding.dtype, dtype, some_rank_shard)
        assert sharding.global_shape == global_shape, (
            sharding.global_shape,
            global_shape,
            some_rank_shard,
        )
        assert sharding.has_regular_grid == has_regular_sharding_grid, (
            has_regular_sharding_grid,
            some_rank_shard,
        )
        if has_regular_sharding_grid:
            assert sharding.local_shape == local_shape, (
                sharding.local_shape,
                local_shape,
                some_rank_shard,
            )

    errors = []
    if not has_regular_sharding_grid:
        # In case of uneven sharding we defer the validation to DCP
        return errors

    shard_access_cnt = _compute_shards_access(rank_sharding)
    if not torch.all(shard_access_cnt == 1):
        errors.append(
            CheckpointingException(
                f'Invalid access pattern for {rank_sharding[0][1]}: {shard_access_cnt}'
            )
        )

    return errors


def _compute_shards_access(rank_sharding):
    shard_access_cnt = torch.zeros(
        rank_sharding[0][1].axis_fragmentations, dtype=torch.int, device="cpu"
    )
    for rank, sharding in rank_sharding:
        if is_main_replica(sharding.replica_id):
            shard_access_cnt[sharding.local_chunk_offset_in_global()] += 1
    return shard_access_cnt


def _validate_objects_for_key(sharded_objects: List[ShardedObject]) -> List[CheckpointingException]:
    """Ensure uniqueness of saved objects."""
    unique_keys = [
        sh_obj.unique_key for _, sh_obj in sharded_objects if is_main_replica(sh_obj.replica_id)
    ]
    errors = []
    if len(unique_keys) != len(set(unique_keys)):
        duplicates = {k: cnt for k, cnt in Counter(unique_keys).items() if cnt > 1}
        logger.error(f"Duplicate ShardedObject keys and counts: {duplicates}")
        errors.append(
            CheckpointingException(f'Duplicate ShardedObject keys: {list(duplicates.keys())}')
        )
    expected_shard_num = np.prod(sharded_objects[0][1].global_shape)
    if len(unique_keys) != expected_shard_num:
        err_msg = f"Invalid access pattern: {expected_shard_num - len(unique_keys)} ShardedObject are missing."
        logger.error(f"{err_msg} Existing shards: {unique_keys}")
        errors.append(CheckpointingException(err_msg))
    return errors


def determine_global_metadata(
    sharded_state_dict: ShardedStateDict,
) -> Tuple[_LocalMetadata, _GlobalMetadata]:
    """Exchanges local metadata with `all_gather_object` to determine global metadata.

    Args:
        sharded_state_dict (ShardedStateDict): local sharded state dict

    Returns:
        Tuple[_LocalMetadata, _GlobalMetadata]: local and global ShardedBase objects with stripped data
    """
    local_metadata = [ten.without_data() for ten in nested_values(sharded_state_dict)]
    global_metadata = [None] * torch.distributed.get_world_size()
    torch.distributed.all_gather_object(global_metadata, local_metadata)
    return local_metadata, global_metadata  # type: ignore[return-value]


def validate_sharded_objects_handling(
    sharded_strategy: Union[SaveShardedStrategy, LoadShardedStrategy],
    common_strategy: Union[SaveCommonStrategy, LoadCommonStrategy],
) -> None:
    """Checks if either of the passed strategies can handle sharded objects.

    Args:
        sharded_strategy (Union[SaveShardedStrategy, LoadShardedStrategy]): sharded strategy used for saving/loading
        common_strategy (Union[SaveCommonStrategy, LoadCommonStrategy]): common strategy used for saving/loading

    Returns:
        None

    Raises:
        CheckpointingException: if both strategies can't handle ShardedObjects
    """
    if (
        not sharded_strategy.can_handle_sharded_objects
        and not common_strategy.can_handle_sharded_objects
    ):
        raise CheckpointingException(
            f"Either sharded strategy or common strategy must implement ShardedObjects handling."
            f" Both {sharded_strategy} and {common_strategy} specify can_handle_sharded_objects=False"
        )


================================================
FILE: megatron/core/distributed/README.md
================================================
## How to use pytorch FSDP2?

Add these flag to enable Torch FSDP2.

```
--use-torch-fsdp2
--no-gradient-accumulation-fusion
--ckpt-format torch_dist
```

It is worth noting that CUDA_MAX_CONNECTIONS=1 should not be enabled to ensure that the communication of FSDP and the computation on the primary stream can be fully parallelized.


================================================
FILE: megatron/core/distributed/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

try:
    from packaging.version import Version
except ImportError:
    pass

from .distributed_data_parallel import DistributedDataParallel
from .distributed_data_parallel_config import DistributedDataParallelConfig
from .finalize_model_grads import finalize_model_grads
from .fsdp.mcore_fsdp_adapter import FullyShardedDataParallel
from .torch_fully_sharded_data_parallel import TorchFullyShardedDataParallel
from .torch_fully_sharded_data_parallel_config import TorchFullyShardedDataParallelConfig


================================================
FILE: megatron/core/distributed/data_parallel_base.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from contextlib import contextmanager

import torch

from ..transformer.module import MegatronModule
from ..transformer.transformer_config import TransformerConfig


class _BaseDataParallel(MegatronModule):
    """A template class for DistributedDataParallel implementations."""

    def __init__(self, config: TransformerConfig, module: torch.nn.Module):
        super().__init__(config=config)
        self.module = module

    def forward(self, *inputs, **kwargs):
        """
        Calls the wrapped module's forward() method.
        """
        return self.module(*inputs, **kwargs)

    @contextmanager
    def no_sync(self):
        """
        Context manager that turns off gradient synchronization.
        """
        try:
            yield
        finally:
            pass

    def start_grad_sync(self, *unused):
        """
        Initiates grad sync (all-reduce or reduce-scatter) communication operations
        for all model gradients.

        When overlap_grad_reduce is set to True, dispatches asynchronous communication
        calls. When overlap_grad_reduce is set to False, calls synchronous
        communication ops.
        """
        pass

    def scale_gradients(self, scaling_factor: float) -> None:
        """Scale all gradients inside the buffers by `scaling_factor`."""
        pass

    def finish_grad_sync(self):
        """
        Finishes grad sync (all-reduce or reduce-scatter) communication operations
        for all model gradients.

        When overlap_grad_reduce is set to True, waits for asynchronous communication
        calls to complete. When overlap_grad_reduce is set to False, calls synchronous
        communication ops.
        """
        pass

    def zero_grad_buffer(self):
        """
        Zeros out all grad buffers. Needs to be called at the beginning of each
        training iteration.
        """
        pass

    def broadcast_params(self):
        """
        Syncs parameters across all DP ranks.
        """
        pass

    def state_dict(self, prefix='', keep_vars=False, destination=None):
        """
        Returns a dictionary containing references to the whole state of the
        wrapped module.

        Both parameters and persistent buffers (e.g. running averages) are included.
        Keys are corresponding parameter and buffer names. Parameters and buffers
        set to None are not included.
        """
        return self.module.state_dict(prefix=prefix, keep_vars=keep_vars, destination=destination)

    def state_dict_for_save_checkpoint(self, prefix='', keep_vars=False):
        """
        Returns wrapped module's state_dict for checkpoint saving.
        """
        return self.module.state_dict_for_save_checkpoint(prefix=prefix, keep_vars=keep_vars)

    def load_state_dict(self, state_dict, strict=True):
        """
        Copies parameters and buffers from state_dict into the wrapped module and its
        descendants. If strict is True, then the keys of state_dict must exactly match
        the keys returned by this module’s state_dict() function.
        """
        self.module.load_state_dict(state_dict, strict=strict)


================================================
FILE: megatron/core/distributed/distributed_data_parallel.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import logging
from contextlib import contextmanager
from typing import Optional

import torch

from ..config_logger import has_config_logger_enabled, log_config_to_disk
from ..fp8_utils import is_float8tensor, post_all_gather_processing
from ..process_groups_config import ProcessGroupCollection
from ..transformer.cuda_graphs import is_graph_capturing
from ..transformer.transformer_config import TransformerConfig
from ..utils import log_single_rank
from .data_parallel_base import _BaseDataParallel
from .distributed_data_parallel_config import DistributedDataParallelConfig
from .param_and_grad_buffer import _ParamAndGradBuffer, partition_buckets

logger = logging.getLogger(__name__)


class DistributedDataParallel(_BaseDataParallel):
    """
    DDP wrapper which stores grads in contiguous buffers. Also has option of overlapping
    communication with backprop computation by breaking up full model's gradients into smaller
    buckets and running all-reduce / reduce-scatter on each bucket asynchronously. This class
    also provides the option to do the gradient accumulation in a type other than the param type
    (e.g., fp32 for a bf16 model).

    Args:
        config: Transformer config object.
        ddp_config: DistributedDataParallel config object.
        module: Underlying model.
        disable_bucketing: If true, force assign all parameters to a single bucket. If false,
            use standard bucketing policy: assign parameters to smaller buckets and all-reduce
            per bucket _if_ overlap_grad_reduce is True and pp_rank is 0.
        pg_collection: Optional unified process group for distributed training.

    """

    def __init__(
        self,
        config: TransformerConfig,
        ddp_config: DistributedDataParallelConfig,
        module: torch.nn.Module,
        disable_bucketing: bool = False,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ):
        super().__init__(config=config, module=module)
        if has_config_logger_enabled(config):
            log_config_to_disk(config, locals(), prefix=type(self).__name__)

        # If bucket_size is not provided as an input, use sane default.
        # If using very large dp_sizes, make buckets larger to ensure that chunks used in NCCL
        # ring-reduce implementations are large enough to remain bandwidth-bound rather than
        # latency-bound.
        # Setup process groups, handling both None and provided pg_collection values.
        process_group_dict = ProcessGroupCollection.setup_process_groups_for_ddp(
            pg_collection, config, ddp_config
        )

        # If bucket_size is not provided as an input, use sane default based on dp_group size.
        dp_group = process_group_dict['dp_group']
        if ddp_config.bucket_size is None:
            ddp_config.bucket_size = max(40000000, 1000000 * dp_group.size())
        # Set bucket_size to infinity if overlap_grad_reduce is False.
        if not ddp_config.overlap_grad_reduce:
            ddp_config.bucket_size = None

        self.ddp_config = ddp_config
        log_single_rank(
            logger,
            logging.INFO,
            f'Setting up DistributedDataParallel with config {self.ddp_config}',
        )

        # Assign all required process groups
        self.dp_group = process_group_dict['dp_group']
        self.dp_cp_group = process_group_dict['dp_cp_group']
        self.intra_dp_cp_group = process_group_dict['intra_dp_cp_group']
        self.expt_dp_group = process_group_dict['expt_dp_group']
        self.intra_expt_dp_group = process_group_dict['intra_expt_dp_group']
        self.tp_group = process_group_dict['tp_group']
        self.pp_group = process_group_dict['pp_group']
        self.ep_group = process_group_dict['ep_group']

        # Set inter_dist_opt_group if multiple optimizer instances
        if self.ddp_config.num_distributed_optimizer_instances > 1:
            self.inter_dist_opt_group = process_group_dict['inter_dist_opt_group']

        # Turn off bucketing if we are on a pipeline stage that is not the first (since
        # data-parallel communication on these stages is not on the critical path), or if
        # disable_bucketing is True (e.g., we might not want to break up model parameters
        # into buckets for model chunks after the first in the interleaved schedule).
        self.bucket_size = self.ddp_config.bucket_size
        self.force_all_reduce = False
        if isinstance(self.pp_group, list):
            pp_rank = self.pp_group[0].rank()
        else:
            pp_rank = self.pp_group.rank()
        if disable_bucketing or pp_rank > 0:
            self.bucket_size = None

        self.param_to_bucket_group = {}

        # Group parameters by their gradient type.
        param_to_name = {}
        dense_params = []
        expert_parallel_params = []
        self.params_with_grad = []
        for name, param in self.module.named_parameters():
            if not param.requires_grad:
                continue

            # Track params with grad to enable direct setting
            # of param.grad_added_to_main_grad
            self.params_with_grad.append(param)

            param.grad_added_to_main_grad = False
            param_to_name[param] = name

            if getattr(param, 'allreduce', True):
                dense_params.append(param)
            else:
                expert_parallel_params.append(param)

        def _allocate_buffers_for_parameters(
            input_params, data_parallel_group, gradient_scaling_factor
        ):
            param_and_grad_dtype_to_params = {}
            param_and_grad_dtype_to_offsets = {}
            param_and_grad_dtype_to_indices = {}

            # Group parameters by their gradient type.
            for param in input_params:
                assert param.requires_grad

                param_dtype = param.dtype
                if is_float8tensor(param):
                    # Currently TE's Float8Tensor is a wrapper of torch.Tensor. It has a "fake"
                    # dtype (usually a higher precision dtype such as bfloat16), but its actual
                    # data is stored in the form of a torch uint8 tensor within the Float8Tensor's
                    # ".data" attribute. Therefore, when creating the param buffer for fp8 params,
                    # it is necessary to use torch.uint8, not the "fake" dtype got from
                    # "param.dtype".
                    param_dtype = torch.uint8
                grad_dtype = torch.float if self.ddp_config.grad_reduce_in_fp32 else param.dtype

                params = param_and_grad_dtype_to_params.get((param_dtype, grad_dtype), [])
                params.append(param)
                param_and_grad_dtype_to_params[(param_dtype, grad_dtype)] = params

                # Get the index of each param among the params with same dtype, if a param is fp8,
                # use its "fake" high precision dtype to find which params have same dtype with it.
                # For example:
                #     Case 1:
                #         params = [p1(bf16), p2(bf16), p3(bf16), p4(bf16)]
                #         param_and_grad_dtype_to_indices = {
                #             (torch.bfloat16, torch.float32): [0, 1, 2, 3],
                #         }
                #     Case 2:
                #         params = [p1(bf16), p2(fp8), p3(fp8), p4(bf16)]
                #         param_and_grad_dtype_to_indices = {
                #             (torch.bfloat16, torch.float32): [0, 3],
                #             (torch.uint8, torch.float32): [1, 2],
                #         }
                # We need these indices to load a non-native-fp8 checkpoint in native-fp8 mode.
                offset = param_and_grad_dtype_to_offsets.get((param.dtype, grad_dtype), 0)
                param_and_grad_dtype_to_offsets[(param.dtype, grad_dtype)] = offset + 1
                indices = param_and_grad_dtype_to_indices.get((param_dtype, grad_dtype), [])
                indices.append(offset)
                param_and_grad_dtype_to_indices[(param_dtype, grad_dtype)] = indices

            if not config.calculate_per_token_loss:
                target_gradient_scaling_factor = 1.0 / self.dp_cp_group.size()
                if self.ddp_config.average_in_collective:
                    if self.ddp_config.num_distributed_optimizer_instances == 1:
                        # Collective is averaging gradients in collective with data_parallel_group.
                        assert (
                            gradient_scaling_factor / data_parallel_group.size()
                            == target_gradient_scaling_factor
                        )
                    else:
                        # For non-expert parameters, gradient_scaling_factor is 1.
                        # For expert parameters, gradient_scaling_factor is edp_size/dp_size.
                        assert (gradient_scaling_factor == 1) or (
                            gradient_scaling_factor
                            == (self.expt_dp_group.size() / self.dp_cp_group.size())
                        )
                else:
                    assert gradient_scaling_factor == target_gradient_scaling_factor

            # Allocate the grad buffers and map the grads.
            buffers = []
            pg_collection = ProcessGroupCollection()
            pg_collection.tp = self.tp_group
            pg_collection.dp_cp = self.dp_cp_group
            for (param_dtype, grad_dtype), params in param_and_grad_dtype_to_params.items():
                buffers.append(
                    _ParamAndGradBuffer(
                        self.ddp_config,
                        param_dtype,
                        grad_dtype,
                        params,
                        data_parallel_group,
                        self.bucket_size,
                        param_to_name,
                        gradient_scaling_factor,
                        param_and_grad_dtype_to_indices[(param_dtype, grad_dtype)],
                        self.ddp_config.nccl_ub,
                        pg_collection,
                    )
                )

            # In some scenarios, we want to put buckets from different buffers into a group so that
            # their communication can be aggregated. For example, when there are both fp8 buffers
            # and bf16 buffers in the model and vpp is enabled, each model chunk will have an fp8
            # bucket and a bf16 bucket, which doubles the number of communication kernels, and
            # because of the use of CUDA_DEVICE_MAX_CONNECTIONS=1, having multiple back-to-back
            # communications will prevent the overlap of the communication kernels with computation
            # kernels.
            # If bucketing is explicitly disabled, then put all buckets in a buffer into a single
            # bucket group.
            bucket_groups = partition_buckets(buffers, force_single_bucket_group=disable_bucketing)

            if self.ddp_config.num_distributed_optimizer_instances > 1:
                assert (
                    self.ddp_config.use_distributed_optimizer
                ), 'Partial DistOpt cannot be used without DistOpt'
                communication_stream = torch.cuda.Stream(device=torch.cuda.current_device())
                for bucket_group in bucket_groups:
                    bucket_group.inter_distributed_optimizer_instance_group = (
                        self.inter_dist_opt_group
                    )
                    bucket_group.communication_stream = communication_stream

            # Set `next_param_gather_bucket_group` for different bucket groups by iterating through
            # buckets in reverse order (since all-gathers happen in reverse order of buckets).
            # Note: overlap_param_gather covers both the distributed optimizer and the
            # layer-wise optimizer cases; the latter sets overlap_param_gather=True
            # without use_distributed_optimizer.
            if self.ddp_config.overlap_param_gather:
                num_bucket_groups = len(bucket_groups)
                for i in range(1, num_bucket_groups):
                    bucket_groups[num_bucket_groups - i].next_param_gather_bucket_group = (
                        bucket_groups[num_bucket_groups - i - 1]
                    )

            # Create map from param to bucket group, used in pre_hook.
            for bucket_group in bucket_groups:
                for bucket in bucket_group.buckets:
                    for param in bucket.params_list:
                        self.param_to_bucket_group[param] = bucket_group

            return buffers, bucket_groups

        if config.calculate_per_token_loss:
            assert (
                not self.ddp_config.average_in_collective
            ), "Cannot average in collective when calculating per-token loss!"
            gradient_scaling_factor = 1.0
            expert_gradient_scaling_factor = 1.0
        else:
            # The goal is to scale reduced gradients by 1/dp_size.
            # This can be achieved in two ways:
            #
            # Case 1: average_in_collective=True
            # - Non-expert parameters:
            #   1. No pre-scaling (gradient_scaling_factor=1.0)
            #   2. Do average reduction over dp group (equals to sum then divide by dp_size)
            #   3. Final result is scaled by 1/dp_size as desired
            #
            # - Expert parameters:
            #   1. Scale by edp_size/dp_size before reduction
            #   2. Do average reduction over edp group (equals to sum then divide by edp_size)
            #   3. Resulted scaling: (edp_size/dp_size) * (1/edp_size) = 1/dp_size as desired
            #   (edp_size = expert data parallel world size)
            #
            # Case 2: average_in_collective=False
            # - Both expert and non-expert parameters:
            #   1. Scale gradients by 1/dp_size before reduction
            #   2. Do sum reduction across data parallel ranks
            #   3. Final result is scaled by 1/dp_size as desired
            if self.ddp_config.average_in_collective:
                gradient_scaling_factor = 1.0
                expert_gradient_scaling_factor = self.expt_dp_group.size() / self.dp_cp_group.size()
            else:
                data_parallel_world_size = self.dp_cp_group.size()

                gradient_scaling_factor = 1.0 / data_parallel_world_size
                expert_gradient_scaling_factor = 1.0 / data_parallel_world_size

        # Allocate the param+grad buffers for dense params' grads.
        self.buffers, self.bucket_groups = _allocate_buffers_for_parameters(
            dense_params, self.intra_dp_cp_group, gradient_scaling_factor=gradient_scaling_factor
        )

        # Allocate separate param+grad buffers for expert parallel params' grads.
        self.expert_parallel_buffers, self.expert_parallel_bucket_groups = (
            _allocate_buffers_for_parameters(
                expert_parallel_params,
                self.intra_expt_dp_group,
                gradient_scaling_factor=expert_gradient_scaling_factor,
            )
        )

        # Delete references to weight_tensor if they exist since we don't want two parameter copies
        # if we re-mapped parameters (which happens when we use the distributed optimizer).
        # This is a temporary workaround around a TE bug that is fixed with
        # https://github.com/NVIDIA/TransformerEngine/pull/719.
        if self.ddp_config.use_distributed_optimizer:

            @torch.no_grad()
            def unmap_weight_tensor(m):
                if hasattr(m, 'weight_tensor'):
                    m.weight_tensor = None

            self.module.apply(unmap_weight_tensor)

        # Register backward hook.
        # Accumulation function for the gradients need to be stored so they
        # don't go out of scope.
        self.grad_accs = []
        for param in self.module.parameters():
            if param.requires_grad:
                # When delay_wgrad_compute is True and the param is marked with
                # skip_backward_post_hook, register the backward post hook for its module
                # instead of the param so that the wgrad accumulation and reduce will be performed
                # in backward_dw() method of the module instead of the hook of backward() method.
                # Otherwise, register the backward post hook for the param.
                if self.ddp_config.delay_wgrad_compute and getattr(
                    param, 'skip_backward_post_hook', False
                ):
                    for module in self.module.modules():
                        if hasattr(module, "register_wgrad_accumulation_and_reduce_hooks"):
                            for param_value in module.parameters():
                                if param is param_value:
                                    module.register_wgrad_accumulation_and_reduce_hooks(
                                        self._make_backward_post_hook(param)
                                    )
                                    break
                else:
                    # Expand so we get access to grad_fn.
                    param_tmp = param.expand_as(param)
                    # Get the gradient accumulator function.
                    grad_acc = param_tmp.grad_fn.next_functions[0][0]
                    grad_acc.register_hook(self._make_backward_post_hook(param))
                    self.grad_accs.append(grad_acc)

        # Note: overlap_param_gather covers both the distributed optimizer and the
        # layer-wise optimizer cases; the latter sets overlap_param_gather=True
        # without use_distributed_optimizer.
        self.use_forward_hook = self.ddp_config.overlap_param_gather
        self.remove_forward_pre_hook_handles = {}
        if self.use_forward_hook:
            self.enable_forward_pre_hook()
        self.overlap_param_gather_with_optimizer_step = False

    def enable_forward_pre_hook(self):
        """
        Enable forward pre-hooks needed for param all-gather overlap with forward compute.
        """
        assert self.use_forward_hook
        assert len(self.remove_forward_pre_hook_handles) == 0
        # Register forward pre-hook for all sub-modules.
        for module in self.module.modules():
            self.remove_forward_pre_hook_handles[module] = module.register_forward_pre_hook(
                self._make_forward_pre_hook()
            )

    def disable_forward_pre_hook(self, param_sync: bool = True):
        """
        Disable forward pre-hooks needed for param all-gather overlap with forward compute.
        Skip synchronous param all-gather if `param_sync` is False.
        """
        assert self.use_forward_hook
        # De-register forward pre-hook for all sub-modules.
        for module in self.module.modules():
            assert self.remove_forward_pre_hook_handles[module] is not None
            self.remove_forward_pre_hook_handles[module].remove()
            del self.remove_forward_pre_hook_handles[module]
        assert len(self.remove_forward_pre_hook_handles) == 0

        # Force synchronize parameters.
        if param_sync:
            self.start_param_sync(force_sync=True)

    def _make_forward_pre_hook(self):
        """
        Create a forward pre-hook to wait on all-gather handles when necessary (i.e.,
        when a module uses a parameter in a bucket with a still incomplete all-gather).
        """

        def hook(module, *unused):
            assert (
                self.use_forward_hook
            ), "Should use pre-hook only when overlap_param_gather is True"

            if is_graph_capturing():
                return

            # Make sure all parameters in this module have been all-gathered as necessary.
            for param in module.parameters(recurse=False):
                # Skip parameters without an associated buffer (such parameters have a
                # .requires_grad field equal to False).
                if param not in self.param_to_bucket_group:
                    continue
                assert param.requires_grad

                # If aligning param all-gather across pipeline stages, all-gather is dispatched
                # by start_param_sync calls in core/pipeline_parallelism/schedules.py.
                # If overlapping param all-gather with optimizer step, then all-gather has
                # already been dispatched in optimizer step.
                skip_next_bucket_dispatch = (
                    self.ddp_config.align_param_gather
                    or self.overlap_param_gather_with_optimizer_step
                )
                self.param_to_bucket_group[param].finish_param_sync(
                    skip_next_bucket_dispatch=skip_next_bucket_dispatch
                )

        return hook

    def _make_backward_post_hook(self, param: torch.nn.Parameter):
        """
        Creates a backward post-hook to dispatch an all-reduce / reduce-scatter when
        ready (i.e., when all grads in a bucket have been computed in all microbatches
        in a batch).
        """

        def hook(*unused):
            if is_graph_capturing():
                return

            if param in self.param_to_bucket_group:
                assert param.requires_grad
                if self.ddp_config.overlap_grad_reduce:
                    assert (
                        param.grad is not None
                    ), 'param.grad being None is not safe when overlap_grad_reduce is True'
                if param.grad is not None and (
                    not param.grad_added_to_main_grad or getattr(param, 'zero_out_wgrad', False)
                ):
                    param.main_grad.add_(param.grad.data)
                param.grad = None

                if self.ddp_config.overlap_grad_reduce:
                    self.param_to_bucket_group[param].register_grad_ready(
                        param, self.force_all_reduce
                    )

        return hook

    @contextmanager
    def no_sync(self):
        """
        Context manager that turns off gradient synchronization.
        """
        for bucket_group in self.bucket_groups + self.expert_parallel_bucket_groups:
            bucket_group.is_last_microbatch = False
        try:
            yield
        finally:
            for bucket_group in self.bucket_groups + self.expert_parallel_bucket_groups:
                bucket_group.is_last_microbatch = True

    def start_param_sync(self, *unused, force_sync: bool = False, force_dispatch: bool = False):
        """
        Initiates param sync (all-gather) communication operations for all model parameters.

        By default, when overlap_param_gather is set to True, dispatches asynchronous communication
        calls; when overlap_param_gather is set to False, calls synchronous communication
        ops. Can override this default behavior using flags below.

        Args:
            force_sync (bool, optional): force synchronous collective regardless of
                other settings.
            force_dispatch (bool, optional): force dispatch regardless of other settings.
        """
        if not force_sync:
            # If overlapping param AG with optimizer step, AG should not be dispatched again
            # in forward_backward_step.
            if self.overlap_param_gather_with_optimizer_step and not force_dispatch:
                return

        for bucket_group in self.bucket_groups + self.expert_parallel_bucket_groups:
            bucket_group.start_param_sync(force_sync=force_sync)

            if not self.ddp_config.overlap_param_gather:
                # For MXFP8 params, we need to copy the all-gathered param data from the buffer to
                # the param.data, since param buffer is not mapped to model params for MXFP8 case.
                # The paramaters are cast from bf16 to MXFP8 during copy.
                # In the case of "overlap_param_gather=True", the param copy is done
                # in "finish_param_sync" stage after zeroing the shared gardient buffers.
                if self.ddp_config.reuse_grad_buf_for_mxfp8_param_ag:
                    for bucket in bucket_group.buckets:
                        is_bf16_weight_bucket = False
                        for param in bucket.params:
                            # Skip copying since bf16 weights in the mxfp8 model
                            # are already mapped to param.data.
                            if not is_float8tensor(param):
                                is_bf16_weight_bucket = True
                                break
                            param_start, param_end = bucket.param_to_index[param]
                            param_slice = bucket.param_data.view(-1)[param_start:param_end]
                            param.data.copy_(param_slice.view(param.data.shape))
                        if is_bf16_weight_bucket:
                            continue
                        # All-gathered params are not needed after being copied to param.data.
                        # Zero out the param buffer (shared with grad buffer) for gradient
                        # accumulation. We cannot zero out the entire grad buffer because one grad
                        # buffer may correspond to multiple param buffers. If we zero out the entire
                        # grad buffer, it would clear the data of those param buffers that have not
                        # yet completed AG.
                        bucket.param_data.zero_()
                else:
                    fp8_params = []
                    for bucket in bucket_group.buckets:
                        for param in bucket.params:
                            if is_float8tensor(param):
                                fp8_params.append(param)
                    if len(fp8_params) > 0:
                        post_all_gather_processing(fp8_params)

    def start_grad_sync(self, *unused):
        """
        Initiates grad sync (all-reduce or reduce-scatter) communication operations
        for all model gradients.

        When overlap_grad_reduce is set to True, dispatches asynchronous communication
        calls. When overlap_grad_reduce is set to False, calls synchronous
        communication ops.
        """
        for bucket_group in self.bucket_groups + self.expert_parallel_bucket_groups:
            bucket_group.start_grad_sync()

    def finish_grad_sync(self, force_all_reduce: Optional[bool] = False):
        """
        Finishes grad sync (all-reduce or reduce-scatter) communication operations
        for all model gradients.

        When overlap_grad_reduce is set to True, waits for asynchronous communication
        calls to complete. When overlap_grad_reduce is set to False, calls synchronous
        communication ops.
        """
        for bucket_group in self.bucket_groups + self.expert_parallel_bucket_groups:
            bucket_group.finish_grad_sync(force_all_reduce=force_all_reduce)

    def free_overlap_buffers(self):
        """Free overlap param-gather GPU buffers across all bucket groups."""
        for bucket_group in self.bucket_groups + self.expert_parallel_bucket_groups:
            bucket_group.free_overlap_buffers()

    def scale_gradients(self, scaling_factor: float):
        """Scale all gradients inside the buffers by `scaling_factor`."""
        for buffer in self.buffers + self.expert_parallel_buffers:
            buffer.scale_gradients(scaling_factor)

    def zero_grad_buffer(self):
        """
        Zeros out all grad buffers. Needs to be called at the beginning of each
        training iteration.
        """
        if getattr(self.config, 'cuda_graph_impl', 'none') != 'transformer_engine':
            # Don't reset grad_added_to_main_grad when CUDA Graph is used.
            # Because in CUDA Graph it no longer has the opportunity to set it back
            # to True, and there will be a double-GA.
            for param in self.params_with_grad:
                param.grad_added_to_main_grad = False
        for buffer in self.buffers + self.expert_parallel_buffers:
            buffer.reset()
        for bucket_group in self.bucket_groups + self.expert_parallel_bucket_groups:
            bucket_group.reset()

    def broadcast_params(self):
        """
        Syncs parameters across all DP ranks.
        """
        for param in self.module.parameters():
            is_expert_parallel = not getattr(param, 'allreduce', True)

            if is_expert_parallel:
                data_parallel_group = self.expt_dp_group
            else:
                data_parallel_group = self.dp_cp_group
            torch.distributed.broadcast(
                param.data,
                src=torch.distributed.get_global_rank(data_parallel_group, 0),
                group=data_parallel_group,
            )

    def offload_grad_buffers(self, synchronize: bool = True, empty_cache: bool = True) -> None:
        """
        Free all grad_data tensors to release GPU memory.

        Uses storage().resize_(0) to release memory while keeping tensor views intact.
        All bucket.grad_data and param.main_grad views remain valid tensor objects
        (though accessing them during offload is undefined behavior).

        Args:
            synchronize: Whether to call torch.cuda.synchronize() before freeing.
            empty_cache: Whether to call torch.cuda.empty_cache() after freeing.
        """
        if synchronize:
            torch.cuda.synchronize()

        for buffer in self.buffers + self.expert_parallel_buffers:
            buffer.offload_to_cpu(move_params=False, move_grads=True)

        if empty_cache:
            torch.cuda.empty_cache()

    def restore_grad_buffers(self, synchronize: bool = True) -> None:
        """
        Reallocate grad_data tensors on GPU.

        All existing views (bucket.grad_data, param.main_grad) automatically
        become valid again since they share the same storage. The grad_data
        is zeroed after reallocation.

        Args:
            synchronize: Whether to call torch.cuda.synchronize() after allocation.
        """
        for buffer in self.buffers + self.expert_parallel_buffers:
            buffer.reload_from_cpu(move_params=False, move_grads=True)

        if synchronize:
            torch.cuda.synchronize()


================================================
FILE: megatron/core/distributed/distributed_data_parallel_config.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from dataclasses import dataclass
from typing import Optional

import torch


@dataclass
class DistributedDataParallelConfig:
    """Configuration for DistributedDataParallel."""

    grad_reduce_in_fp32: bool = False
    """If true, reduce grads in fp32."""

    overlap_grad_reduce: bool = False
    """If true, overlap grad all-reduce / reduce-scatter with backward compute."""

    overlap_param_gather: bool = False
    """If true, overlap param all-gather with forward compute."""

    align_param_gather: bool = False
    """If true, all PP stages will launch param all-gathers simultaneously. Otherwise, each
    PP stage will independently launch as needed.
    """

    use_distributed_optimizer: bool = False
    """If true, issue reduce-scatter collectives to aggregate gradients and clean up
       originally allocated model parameters, otherwise issue all-reduce collectives.
    """

    num_distributed_optimizer_instances: int = 1
    """Sets the factor by which the DP domain is sharded to have the partial DistOpt
       enabled. Defaults to 1, which means DistOpt is across entire DP domain.
    """

    check_for_nan_in_grad: bool = False
    """
    If true, check for NaNs and Infs in gradients _before_ communication collective.
    Invoked by `start_grad_sync` such as in the Megatron-LM DDP training API.
    """

    check_for_large_grads: bool = False
    """If true, check for unexpectedly large gradients _before_ communication collective."""

    bucket_size: Optional[int] = None
    """Maximum number of parameters in each bucket. If unspecified, MCore uses a default
       value of max(40000000, 1000000 * dp_size) parameters (larger DP sizes need larger
       buckets to ensure collectives do not become latency-bound)."""

    pad_buckets_for_high_nccl_busbw: bool = False
    """If true, make sure the bucket size is divisible by a large power of 2 (2^16) to
       ensure NCCL collectives have high bus bandwidth at large DP counts, since NCCL
       message size (which for ring algorithms is bucket_size / dp_size) apparently needs
       to be divisible by a power of 2 for high busbw."""

    reduce_scatter_with_fp32_accumulation: bool = False
    """If true, use a reduce-scatter implementation which sends lower-precision values
       over the wire (using an all-to-all to keep total communication overhead in line
       with the standard ring implementation) but performs accumulation locally in FP32."""

    average_in_collective: bool = False
    """If true, compute average in collective directly, as opposed to dividing by the
       dp_size first and then computing sum in the collective."""

    fp8_param_gather: bool = False
    """If true, keep the compute param in fp8 (do not use any other intermediate dtype) and
       perform the param all-gather in fp8."""

    reuse_grad_buf_for_mxfp8_param_ag: bool = False
    """If true, reuse the grad buffer for param AG when using mxfp8 recipe. Should be 
       set to True only when fp8_recipe is mxfp8 and fp8_param_gather is True."""

    use_megatron_fsdp: bool = False
    """If true, use the FSDP code path for DDP."""

    use_custom_fsdp: bool = False
    """
    NOTE: The flag `use_custom_fsdp` is deprecated and will be removed in future versions.
    Please use `use_megatron_fsdp` instead, as all functionality will be migrated there.
    Future updates will drop support for `use_custom_fsdp` to avoid confusion.
    """

    data_parallel_sharding_strategy: str = 'no_shard'
    """Sharding strategy for FSDP. Valid values are 'no_shard', 'optim',
      'optim_grads', 'optim_grads_params'."""

    gradient_reduce_div_fusion: bool = True
    """If true, perform gradient reduce and division fusion."""

    suggested_communication_unit_size: int = None
    """Specifies the number of elements to communicate at once during
      FSDP (Fully Sharded Data Parallel) operations. 
      This flag also affects FSDP all-gather prefetch behavior. Setting a larger
      value increases the communication buffer size, while a smaller value
      disables prefetching and may degrade performance. Adjust this value
      based on your system's memory and performance requirements."""

    keep_fp8_transpose_cache: bool = False
    """If true, keep the fp8 transpose cache when using Megatron FSDP."""

    nccl_ub: bool = False
    """If true, allocate and register NCCL userbuffer for param and grad buffer.
      This flag enables SM efficient nccl algorithm that could improve the performance
      of FSDP and DP with comm_overlap. This flag will be much more effective when used
      together with sharp. 
      The follwoing will be the expected number of SM usage for various cases.
      (Note that this is just a reference number and the number of SM usage could vary 
      on message size, communication domain size and nccl version.)
      | Communication domain | use_sharp | SM usage of "AG/RS" |
      |----------------------|-----------|---------------------|
      | NVL                  | N/A       | 4 / 5               |
      | NVL+IB               | False     | 16 / 16             |
      | NVL+IB               | True      | 6 / 6               |
      | IB                   | False     | 1 / 4               |
      | IB                   | True      | 1 / 1               |
    """

    fsdp_double_buffer: bool = False
    """If true, use persistently allocated double buffers for the 
      temporary memory needed in the Megatron FSDP communications.
      This option will cause additional memory overhead, however, it is necessary for
      to register user buffer (nccl_ub=True) for the Megatron FSDP. 
      This option will be automatically set to True when nccl_ub=True.
    """

    fsdp_db_use_persist_buf_on_alloc_fail: bool = False
    """Whether to fall back to persistent buffer when a bucket does not
       fit FSDP double buffer size. If true, FSDP will use the persistently 
       allocated buffer for the bucket that does not fit, it will enable NCCL 
       user buffer with the cost of more memory usage. If false, FSDP will use
       Dynamic memory allocator, NCCL user buffer won't not enabled, which 
       usually leads to low performance.
    """

    fsdp_all_gather_in_start_param_sync: bool = True
    """
    If True, use all-gather during the initial Megatron-FSDP parameter
    synchronization step. This can increase overlap between the first
    parameter all-gather and computation, helping to better hide the
    initial communication cost.
    """

    outer_dp_sharding_strategy: str = 'no_shard'
    """
    Sharding strategy for outer data parallel group in Hybrid Sharded Data Parallel (HSDP) mode.
    Valid values are 'no_shard', 'optim'. This option is only effective when Hybrid FSDP is enabled.
    """

    disable_symmetric_registration: bool = False
    """If true, disable symmetric (window) registration for NCCL userbuffer registration.
      This option will force to use conventional (local) userbuffer registration 
      when nccl_ub is set.
    """

    fsdp_manual_registration: bool = False
    """If true, manually register the FSDP communication buffers to NCCL user buffer.
      This option is only effective when use_megatron_fsdp and nccl_ub is set.
      For symmetric registration with large models, the registration itself can take 
      a significant amount of time. This option minimizes the number of registration calls
      to minimize the registration time.
    """

    delay_wgrad_compute: bool = False
    """Delay the weight gradient computation to improve batch-level communication overlapping"""

    megatron_fsdp_main_params_dtype: Optional[torch.dtype] = torch.float32
    """Data type for the main weight buffer utilized for distributed optimization
      and quantization with Megatron-FSDP. If set to None, the model compute weight
      buffer will take the role of the main weights, or when no sharding is applied,
      the native model weights become the main weights. Defaults to torch.float32.
    """

    megatron_fsdp_main_grads_dtype: Optional[torch.dtype] = None
    """Data type for the main gradient buffer utilized for distributed optimization with
      Megatron-FSDP. If set to None, main gradients will match the dtype of the model
      compute parameters specified by the user model. Defaults to None.
    """

    megatron_fsdp_grad_comm_dtype: Optional[torch.dtype] = None
    """Data type for gradient gather / scatter communications. Can be utilized to reduce
      communication latency, but adds overhead for type-casting and copy operations.
      If using NCCL UBR v2.27+, gradient reduction may be performed in high-precision
      depending on the network domain (NVLink or IB), and can enable mixed-precision
      communication and accumulation, e.g. setting grad_comm_dtype to `BF16` can support
      `FP32` reduction even though we have `BF16` input and output communication buffers.
      If set to None, the `main_grads_dtype` is used. If using HSDP (either DP-Replicate
      or DP-Outer in `outer_dp_sharding_strategy`), `no_shard`, `optim`, or a
      `FixedPoolAllocator` (`fsdp_double_buffer`), allocating `dtype`-custom gradient
      communication buffers (per FSDP group) adds memory overhead. Defaults to None.
      No additional memory is allocated when `grad_comm_dtype == main_grads_dtype`.
    """

    def __post_init__(self):
        import os

        """Check the validity of the config."""
        if self.reuse_grad_buf_for_mxfp8_param_ag:
            assert self.fp8_param_gather, "Reuse grad buffer only when keeping params in MXFP8."

        if self.nccl_ub:
            if 'expandable_segments:True' in os.getenv('PYTORCH_CUDA_ALLOC_CONF', '').split(','):
                raise ValueError(
                    "PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True is currently not supported "
                    "with nccl_ub due to compatibility issue with torch.cuda.MemPool API."
                )


================================================
FILE: megatron/core/distributed/finalize_model_grads.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from functools import partial
from typing import Callable, List, Optional, Union

import torch
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors

try:
    from torch.distributed._tensor import DTensor, distribute_tensor

    HAVE_DTENSOR = True
except ImportError:
    HAVE_DTENSOR = False

from megatron.core.pipeline_parallel.utils import (
    get_pp_last_rank,
    is_pp_first_stage,
    is_pp_last_stage,
)
from megatron.core.process_groups_config import ProcessGroupCollection

from .. import parallel_state
from ..transformer.moe.moe_utils import get_updated_expert_bias
from ..transformer.transformer_config import TransformerConfig
from ..utils import (
    get_attr_wrapped_model,
    get_model_config,
    get_pg_size,
    get_tensor_model_parallel_group_if_none,
)


def _get_main_grad_attr(param: torch.nn.Parameter):
    if hasattr(param, "main_grad"):
        return "main_grad"
    return "grad"


def _unshard_if_dtensor(tensor: Union[torch.Tensor, "DTensor"]) -> torch.Tensor:
    """
    Unshards the input tensor if it is a DTensor and otherwise returns the
    tensor unmodified.

    Args:
        tensor (Union[torch.Tensor, DTensor]): The tensor to potentially unshard.

    Returns:
        An unsharded version of the input tensor if it is a DTensor, or the
        input tensor unmodified if it is not a DTensor.
    """
    if HAVE_DTENSOR and isinstance(tensor, DTensor):
        unsharded_tensor = tensor.full_tensor()
        for k, v in vars(tensor).items():
            setattr(unsharded_tensor, k, v)
        return unsharded_tensor
    return tensor


def _reshard_if_dtensor(
    tensor_to_shard: torch.Tensor, reference_tensor: Union[torch.Tensor, "DTensor"]
) -> Union[torch.Tensor, "DTensor"]:
    """
    Reshards the input tensor to match the sharding configuration of the
    reference tensor if the reference tensor is a DTensor. Otherwise, returns
    the reference tensor unmodified.

    Args:
        tensor_to_shard (torch.Tensor): The tensor to be potentially sharded.
        reference_tensor (Union[torch.Tensor, DTensor]): The reference tensor
            for the sharding configuration.

    Returns:
        Union[torch.Tensor, DTensor]: The sharded tensor matching the reference tensor's
        configuration, or the reference tensor itself if it is not a DTensor.
    """
    if HAVE_DTENSOR and isinstance(reference_tensor, DTensor):
        sharded_tensor = distribute_tensor(
            tensor_to_shard,
            device_mesh=reference_tensor.device_mesh,
            placements=reference_tensor.placements,
        )
        for k, v in vars(reference_tensor).items():
            setattr(sharded_tensor, k, v)
        return sharded_tensor
    return reference_tensor


def _allreduce_conditional_embedding_grads(
    model: List[torch.nn.Module],
    config: TransformerConfig,
    pp_group: Optional[torch.distributed.ProcessGroup] = None,
):
    """
    All-reduce conditional embedding grads.

    Reduce grads across all the pp stages to ensure that parameters of the conditional embedders
    (e.g., timestep embedder, FPS embedder, label embedder) stay in sync.
    This is for the models with replicated embedders on each PP / VPP rank, like diffusion models.
    """
    if pp_group is None:
        pp_group = parallel_state.get_pipeline_model_parallel_group()

    if pp_group.size() > 1 and getattr(config, "has_cond_embedder", False):
        grads_dict = {}
        for model_chunk in model:
            for name, param in get_attr_wrapped_model(model_chunk, 'named_parameters')():
                if param.requires_grad and getattr(param, 'pipeline_parallel', False):
                    grad = param.main_grad
                    if name in grads_dict:
                        # Add all the virtual PP rank's gradients to
                        # the first local virtual PP rank.
                        grads_dict[name][0].add_(grad)
                        # Append to the end for later update after cross-rank reduce.
                        grads_dict[name].append(grad)
                    else:
                        grads_dict[name] = [grad]
        if grads_dict:
            # All-reduce the gradient on the first VPP rank.
            grads = [param_grad[0] for _, param_grad in grads_dict.items()]
            coalesced = _flatten_dense_tensors(grads)
            torch.distributed.all_reduce(coalesced, group=pp_group)
            for buf, synced in zip(grads, _unflatten_dense_tensors(coalesced, grads)):
                buf.copy_(synced)

            # Update the gradients on other VPP ranks.
            for grads in grads_dict.values():
                for grad in grads[1:]:
                    grad.copy_(grads[0])


def _get_shared_word_embedding_weight(
    model_module: torch.nn.Module, config: TransformerConfig
) -> Optional[torch.nn.Parameter]:
    """Return the shared word-embedding weight if it is duplicated across stages.

    Args:
        model_module: The model module from which to extract the
            word-embedding weight.
        config: Transformer config.

    Returns:
        The shared embedding or output weight if available; otherwise ``None``.
    """
    # Only reduce if weights are duplicated across stages.
    if model_module.share_embeddings_and_output_weights or getattr(config, 'mtp_num_layers', 0):
        return model_module.shared_embedding_or_output_weight()
    return None


def _get_position_embedding_weight(model_module: torch.nn.Module) -> torch.nn.Parameter:
    """Return the position-embedding weight tensor from the given model module.

    Args:
        model_module: The model module that owns the
            position-embedding parameter.

    Returns:
        The position-embedding weight tensor.
    """
    return getattr(model_module, 'position_embeddings').weight  # type: ignore[attr-defined]


def _allreduce_word_embedding_grads(
    model: List[torch.nn.Module],
    config: TransformerConfig,
    embd_group: Optional[torch.distributed.ProcessGroup] = None,
    pp_group: Optional[torch.distributed.ProcessGroup] = None,
):
    """All-reduce word-embedding gradients across the first and last PP stages.

    This ensures that the ``word_embeddings`` parameters stay in sync when they
    are shared between the input and output layers.

    Args:
        model: A list containing the pipeline chunks
            that constitute the model on the current rank (including any
            virtual pipeline chunks).
        config: Transformer configuration. Used for edge
            cases like MTP where embeddings might be shared differently.
        embd_group: The process
            group over which to all-reduce the word-embedding gradients. If
            ``None``, it will be looked up based on the current pipeline model
            parallel group.
        pp_group: The pipeline
            parallel process group used to identify first/last stages. If
            ``None``, it will be looked up.
    """
    if embd_group is None:
        embd_group = parallel_state.get_embedding_group(check_initialized=False)
        if get_pg_size(embd_group) > 1:
            assert pp_group is None
            pp_group = parallel_state.get_pipeline_model_parallel_group()

    _allreduce_embedding_grad(
        model,
        embd_group,
        pp_group,
        partial(_get_shared_word_embedding_weight, config=config),
        config=config,
    )


def _allreduce_embedding_grad(
    model: List[torch.nn.Module],
    embd_group: torch.distributed.ProcessGroup,
    pp_group: torch.distributed.ProcessGroup,
    weight_getter: Callable[[torch.nn.Module], Optional[torch.nn.Parameter]],
    skip_if_none: bool = True,
    config: TransformerConfig = None,
):
    """Unified helper to all-reduce embedding parameters across pipeline stages.

    Args:
        model (List[torch.nn.Module]): A list of model chunks (PP/VPP).
        embd_group (torch.distributed.ProcessGroup): The process group over which to reduce.
        pp_group (torch.distributed.ProcessGroup): The pipeline parallel process group for
            first/last stage detection.
        weight_getter (Callable[[torch.nn.Module], Optional[torch.nn.Parameter]]): A function
            that takes the *pre-process* model chunk and returns the parameter to be reduced
            (or ``None`` if not applicable).
        skip_if_none (bool, optional): If True, quietly returns when the parameter or its
            gradient is ``None``. Defaults to True.
    """

    if (
        # embd_group can be None in cases there is no embd_group
        # get_pg_size(embd_group) will return 1 and the all-reduce will be skipped.
        get_pg_size(embd_group) > 1
        and torch.distributed.get_rank() in torch.distributed.get_process_group_ranks(embd_group)
    ):

        if is_pp_first_stage(pp_group):
            model_module = model[0]
        elif is_pp_last_stage(pp_group):
            model_module = model[-1]
        elif getattr(config, 'mtp_num_layers', None) is not None and config.mtp_num_layers > 0:
            # Embedding for MTP layers is in the last virtual pipeline model parallel stage.
            model_module = model[-1]
        else:  # We do not support an interleaved schedule for models with encoders yet.
            model_module = model[0]

        ddp_config = model_module.ddp_config
        model_module = get_attr_wrapped_model(model_module, 'pre_process', return_model_obj=True)

        weight = weight_getter(model_module)
        if weight is None and skip_if_none:
            return

        grad_attr = _get_main_grad_attr(weight)
        orig_grad = getattr(weight, grad_attr)
        if ddp_config.use_megatron_fsdp:
            orig_grad = orig_grad._local_tensor if orig_grad is not None else None
        grad = _unshard_if_dtensor(orig_grad)
        # When the embedding is frozen, the grad is None.
        if grad is None and skip_if_none:
            return
        torch.distributed.all_reduce(grad, group=embd_group)
        setattr(weight, grad_attr, _reshard_if_dtensor(grad, orig_grad))


def _allreduce_position_embedding_grads(
    model: List[torch.nn.Module],
    config: TransformerConfig,
    pos_emb_group: torch.distributed.ProcessGroup,
    pp_group: torch.distributed.ProcessGroup,
):
    """
    All-reduce position_embeddings grad across encoder and decoder stages to ensure that position
    embeddings parameters stay in sync.
    """

    _allreduce_embedding_grad(
        model, pos_emb_group, pp_group, _get_position_embedding_weight, skip_if_none=False
    )


def reset_model_temporary_tensors(config: TransformerConfig, model: List[torch.nn.Module]):
    """
    Reset the temporary tensors of the model.
    """
    for model_chunk in model:
        for module in get_attr_wrapped_model(model_chunk, 'modules')():
            if config.moe_router_enable_expert_bias and hasattr(module, 'expert_bias'):
                module.local_tokens_per_expert.zero_()
            if (
                config.moe_router_load_balancing_type == "global_aux_loss"
                or "global_aux_loss" in config.moe_router_load_balancing_type
            ) and hasattr(module, 'reset_global_aux_loss_tracker'):
                module.reset_global_aux_loss_tracker()


def _update_router_expert_bias(model: List[torch.nn.Module], config: TransformerConfig):
    """
    Update the expert bias of the router for a global batch.
    This requires all-reduce of local_tokens_per_expert across TPxCPxDP ranks
    """
    tokens_per_expert_list = []
    expert_bias_list = []
    for model_chunk in model:
        for module in get_attr_wrapped_model(model_chunk, 'modules')():
            # Only update expert_bias if this module is in the training mode. There are special
            # cases where only the student is in training mode but the teacher is in eval mode
            # when using online knoweldge-distillation with Model-Optimizer. In this case, we want
            # to avoid updating teacher's expert_bias.
            if hasattr(module, 'expert_bias') and module.training:
                tokens_per_expert_list.append(module.local_tokens_per_expert)
                expert_bias_list.append(module.expert_bias)
    # For hybrid models with both MoE and Dense layers, this list can be empty.
    if len(expert_bias_list) == 0:
        return
    stacked_tokens_per_expert = torch.stack(tokens_per_expert_list, dim=0)
    stacked_expert_bias = torch.stack(expert_bias_list, dim=0)
    stacked_updated_expert_bias = get_updated_expert_bias(
        stacked_tokens_per_expert, stacked_expert_bias, config.moe_router_bias_update_rate
    )

    for expert_bias, updated_expert_bias in zip(expert_bias_list, stacked_updated_expert_bias):
        expert_bias.copy_(updated_expert_bias)


def _allreduce_non_tensor_model_parallel_grads(
    model: List[torch.nn.Module],
    config: TransformerConfig,
    tp_group: Optional[torch.distributed.ProcessGroup] = None,
):
    """
    All-reduce both layernorm grads (for sequence parallelism) and
    gradients from modules with average_gradients_across_tp_domain=True
    across tensor-model-parallel ranks.
    """
    tp_group = get_tensor_model_parallel_group_if_none(tp_group)
    if tp_group.size() <= 1:
        return

    params_sum = []
    grads_sum = []
    params_avg = []
    grads_avg = []

    for model_chunk in model:
        ddp_config = model_chunk.ddp_config
        for name, param in get_attr_wrapped_model(model_chunk, 'named_parameters')():
            if param.requires_grad:
                # Check if this param needs average reduction (average_gradients_across_tp_domain)
                if getattr(param, "average_gradients_across_tp_domain", False):
                    grad_attr = _get_main_grad_attr(param)
                    grad = getattr(param, grad_attr)
                    if grad is None:
                        continue
                    params_avg.append(param)
                    if ddp_config.use_megatron_fsdp:
                        grads_avg.append(grad._local_tensor.data)
                    else:
                        grad = _unshard_if_dtensor(grad)
                        grads_avg.append(grad.data)
                # Check if this param needs sum reduction (sequence parallel or qk_layernorm)
                elif (config.sequence_parallel and getattr(param, "sequence_parallel", False)) or (
                    config.qk_layernorm and ("q_layernorm" in name or "k_layernorm" in name)
                ):
                    grad_attr = _get_main_grad_attr(param)
                    grad = getattr(param, grad_attr)
                    if grad is None:
                        continue
                    params_sum.append(param)
                    if ddp_config.use_megatron_fsdp:
                        grads_sum.append(grad._local_tensor.data)
                    else:
                        grad = _unshard_if_dtensor(grad)
                        grads_sum.append(grad.data)

    # Loop grads and perform correct all-reduce
    for params, grads, all_reduce_op in zip(
        [params_sum, params_avg],
        [grads_sum, grads_avg],
        [torch.distributed.ReduceOp.SUM, torch.distributed.ReduceOp.AVG],
    ):
        if grads:
            coalesced = _flatten_dense_tensors(grads)
            torch.distributed.all_reduce(coalesced, op=all_reduce_op, group=tp_group)
            for param, buf, synced in zip(
                params, grads, _unflatten_dense_tensors(coalesced, grads)
            ):
                buf.copy_(synced)
                grad_attr = _get_main_grad_attr(param)
                orig_grad = getattr(param, grad_attr)
                if ddp_config.use_megatron_fsdp:
                    setattr(param, grad_attr, orig_grad)
                else:
                    setattr(param, grad_attr, _reshard_if_dtensor(buf, orig_grad))


"""
This is an alias to _allreduce_non_tensor_model_parallel_grads that we must
maintain for legacy tests. We can remove this proxy in mcore 0.14.
"""
_allreduce_layernorm_grads = _allreduce_non_tensor_model_parallel_grads


def finalize_model_grads(
    model: List[torch.nn.Module],
    num_tokens: Optional[torch.Tensor] = None,
    pg_collection: Optional[ProcessGroupCollection] = None,
    force_all_reduce: Optional[bool] = False,
):
    """
    All-reduce all model grads across DP replicas, layernorm grads for sequence parallelism,
    embedding grads across first and last pipeline stages (if not tied),
    scale gradients by `num_tokens`.
    """

    config = get_model_config(model[0])
    if pg_collection is not None:
        assert hasattr(pg_collection, 'tp')
        assert hasattr(pg_collection, 'pp')
        assert hasattr(pg_collection, 'embd'), (
            "pg_collection must have a embd. In previous version, it is used default "
            "`parallel_state.default_embedding_ranks` to create the process group."
            " If you are using the default process group, please use"
            " `parallel_state.get_embedding_group()` "
            "If you don't need embd_group, you need to explicitly set it to None."
        )
        assert hasattr(pg_collection, 'pos_embd'), (
            "pg_collection must have a pos_embd. In previous version, it is used default "
            "`parallel_state.default_position_embedding_ranks` to create the process group."
            " If you are using the default process group, please use "
            " `parallel_state.get_position_embedding_group()` "
            "If you don't need pos_embd_group, you need to explicitly set it to None."
        )
        assert hasattr(pg_collection, 'dp_cp')
        tp_group = pg_collection.tp
        pp_group = pg_collection.pp
        embd_group = pg_collection.embd
        pos_emb_group = pg_collection.pos_embd
        dp_cp_group = pg_collection.dp_cp
    else:
        tp_group = parallel_state.get_tensor_model_parallel_group()
        pp_group = parallel_state.get_pipeline_model_parallel_group()
        embd_group = parallel_state.get_embedding_group(check_initialized=False)
        pos_emb_group = parallel_state.get_position_embedding_group(check_initialized=False)
        dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)

    # All-reduce / reduce-scatter across DP replicas.
    if config.timers is not None:
        config.timers('all-grads-sync', log_level=1).start(barrier=config.barrier_with_L1_time)
    for model_chunk in model:
        model_chunk.finish_grad_sync(force_all_reduce=force_all_reduce)
    if config.timers is not None:
        config.timers('all-grads-sync').stop()

    # All-reduce t_embedder grads (for pp & vpp of DiT).
    if config.timers is not None:
        config.timers('conditional-embedder-grads-all-reduce', log_level=1).start(
            barrier=config.barrier_with_L1_time
        )
    _allreduce_conditional_embedding_grads(model, config, pp_group)
    if config.timers is not None:
        config.timers('conditional-embedder-grads-all-reduce').stop()

    # All-reduce layer-norm grads (for sequence parallelism) and non-tensor parallel modules.
    if config.timers is not None:
        config.timers('non-tensor-parallel-grads-all-reduce', log_level=1).start(
            barrier=config.barrier_with_L1_time
        )
    _allreduce_non_tensor_model_parallel_grads(model, config, tp_group)
    if config.timers is not None:
        config.timers('non-tensor-parallel-grads-all-reduce').stop()

    # All-reduce embedding grads (for pipeline parallelism).
    if config.timers is not None:
        config.timers('embedding-grads-all-reduce', log_level=1).start(
            barrier=config.barrier_with_L1_time
        )
    _allreduce_word_embedding_grads(model, config, embd_group, pp_group)
    _allreduce_position_embedding_grads(model, config, pos_emb_group, pp_group)

    if config.timers is not None:
        config.timers('embedding-grads-all-reduce').stop()

    if config.moe_router_enable_expert_bias:
        _update_router_expert_bias(model, config)

    reset_model_temporary_tensors(config, model)

    # normalize gradients for per-token loss normalization.
    # if we are using by the number of tokens, then we use that as a divisor. this number
    # will be the total number of non-padded tokens in the global batch.
    if num_tokens is not None:

        # the number of tokens is only present on the last stage, so broadcast it
        # to the other ranks in the pipeline parallel group.
        assert not isinstance(pp_group, list)
        last_rank = get_pp_last_rank(pp_group)
        torch.distributed.broadcast(num_tokens, src=last_rank, group=pp_group)

        # all-reduce across DP ranks.
        torch.distributed.all_reduce(num_tokens, group=dp_cp_group)
        for model_chunk in model:
            if num_tokens > 0:
                scaling = 1.0 / num_tokens
                model_chunk.scale_gradients(scaling)


================================================
FILE: megatron/core/distributed/fsdp/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from .mcore_fsdp_adapter import FullyShardedDataParallel


================================================
FILE: megatron/core/distributed/fsdp/mcore_fsdp_adapter.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import random
from typing import List, Optional

try:
    import einops

    HAVE_EINOPS = True
except ImportError:
    HAVE_EINOPS = False

import numpy as np
import torch
import torch.distributed as dist

try:
    from torch.distributed import DeviceMesh

    HAVE_DTENSOR = True
except ImportError:
    HAVE_DTENSOR = False

from megatron.core import parallel_state, tensor_parallel
from megatron.core.config_logger import has_config_logger_enabled, log_config_to_disk
from megatron.core.distributed.data_parallel_base import _BaseDataParallel
from megatron.core.distributed.distributed_data_parallel_config import DistributedDataParallelConfig
from megatron.core.extensions.transformer_engine import TELinear
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.transformer_layer import TransformerLayer
from megatron.core.utils import is_te_min_version, log_single_rank

try:
    from megatron.core.distributed.fsdp.src.megatron_fsdp import (
        FSDPDistributedIndex,
        MegatronFSDP,
        MixedPrecisionPolicy,
    )

    HAVE_MEGATRON_FSDP = True
except ImportError as import_megatron_fsdp_error:
    IMPORT_MEGATRON_FSDP_ERROR = import_megatron_fsdp_error
    HAVE_MEGATRON_FSDP = False

logger = logging.getLogger(__name__)


class FullyShardedDataParallel(_BaseDataParallel):
    """
    Fully Sharded Data Parallel (FSDP) wrapper for the Megatron model.
    """

    def __init__(
        self,
        config: TransformerConfig,
        ddp_config: DistributedDataParallelConfig,
        module: torch.nn.Module,
        fsdp_unit_modules: Optional[List[torch.nn.Module]] = None,
        disable_bucketing: bool = False,
        device: Optional[torch.device] = None,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ):
        if not HAVE_MEGATRON_FSDP:
            raise IMPORT_MEGATRON_FSDP_ERROR

        if has_config_logger_enabled(config):
            log_config_to_disk(config, locals(), prefix=type(self).__name__)

        self.ddp_config = ddp_config
        log_single_rank(
            logger,
            logging.INFO,
            f'Setting up DistributedDataParallel with config {self.ddp_config}',
        )
        self.mp_policy = MixedPrecisionPolicy(
            main_params_dtype=ddp_config.megatron_fsdp_main_params_dtype,
            # Grandfathered Argument: grad_reduce_in_fp32
            main_grads_dtype=(
                torch.float32
                if ddp_config.grad_reduce_in_fp32
                else ddp_config.megatron_fsdp_main_grads_dtype
            ),
            grad_comm_dtype=(
                torch.float32
                if ddp_config.grad_reduce_in_fp32
                else ddp_config.megatron_fsdp_grad_comm_dtype
            ),
        )
        log_single_rank(
            logger,
            logging.INFO,
            f'Setting up Megatron-FSDP MixedPrecisionPolicy with config {self.mp_policy}',
        )

        self.megatron_fsdp_dist_index = self._init_dist_index(pg_collection)

        if config.gradient_accumulation_fusion:
            assert is_te_min_version("2.10"), (
                "Megatron-FSDP with gradient_accumulation_fusion requires "
                "Transformer Engine version 2.10 or higher."
            )

        self.bucket_size = self.ddp_config.bucket_size
        if disable_bucketing:
            self.bucket_size = None
        self.device = device if device else torch.device(f'cuda:{torch.cuda.current_device()}')

        if fsdp_unit_modules is not None:
            self.fsdp_unit_modules = fsdp_unit_modules
        else:
            if self.ddp_config.data_parallel_sharding_strategy == "optim_grads_params":
                self.fsdp_unit_modules = [TransformerLayer]
            else:
                self.fsdp_unit_modules = []

        self._fix_tensor_parallel_attributes(module)

        super().__init__(
            config=config,
            module=MegatronFSDP(
                ddp_config=ddp_config,
                mixed_precision_policy=self.mp_policy,
                module=module,
                fsdp_unit_modules=self.fsdp_unit_modules,
                disable_bucketing=disable_bucketing,
                device=self.device,
                dist_index=self.megatron_fsdp_dist_index,
                calculate_per_token_loss=config.calculate_per_token_loss,
                init_model_with_meta_device=config.init_model_with_meta_device,
                enable_fine_grained_param_gather_hook=(
                    config.fp8_recipe == "mxfp8" and ddp_config.fp8_param_gather
                ),
            ),
        )
        self.param_and_grad_buffer = self.module.param_and_grad_buffer
        self.no_sync = self.module.no_sync
        self.start_param_sync = self.module.start_param_sync
        self.start_grad_sync = self.module.start_grad_sync
        self.finish_grad_sync = self.module.finish_grad_sync
        self.scale_gradients = self.module.scale_gradients
        self.zero_grad_buffer = self.module.zero_grad_buffer
        self.broadcast_params = self.module.broadcast_params
        self.module.state_dict_for_save_checkpoint = self.module.state_dict
        self.state_dict_for_save_checkpoint = self.state_dict
        self.module.config = config

        self.sync_rng_states_across_tp_group()

    def load_state_dict(self, state_dict, strict=True):
        """
        Load the state dictionary into the module.
        """
        custom_state_dict = {}
        for key, value in state_dict.items():
            if self.config.fp8 and key.endswith('._extra_state'):
                # Skip extra state keys
                continue
            custom_state_dict[f"module.{key}"] = value

        if self.config.fp8 or self.config.gated_linear_unit:
            strict = False
            log_single_rank(
                logger,
                logging.WARNING,
                "Loading state_dict with strict=False due to fp8 configuration. "
                "This is expected as some keys may not match exactly.",
            )

        self.module.load_state_dict(custom_state_dict, strict=strict)

    def _fix_tensor_parallel_attributes(self, module):
        is_expert_param = lambda n, p: ".experts." in n
        is_router_param = lambda n, p: ".router.weight" in n

        if parallel_state.get_tensor_model_parallel_group():
            tp_size = parallel_state.get_tensor_model_parallel_group().size()
        else:
            tp_size = 1

        if parallel_state.get_expert_tensor_parallel_group():
            expt_tp_size = parallel_state.get_expert_tensor_parallel_group().size()
        else:
            expt_tp_size = 1

        param_to_direct_module = {}
        for name, m in module.named_modules():
            for p in m.parameters(recurse=False):
                param_to_direct_module[p] = (name, m)

        for name, param in module.named_parameters():
            if is_expert_param(name, param) and expt_tp_size > 1:
                setattr(param, "_mcore_tp", True)
                if "linear_fc1.weight" in name:
                    setattr(param, "_tp_partition_dim", 0)
                elif "linear_fc2.weight" in name:
                    setattr(param, "_tp_partition_dim", 1)

            if not is_expert_param(name, param) and tp_size > 1:
                m_name, direct_module = param_to_direct_module[param]
                if isinstance(direct_module, (TELinear,)):
                    parallel_mode = getattr(direct_module, "parallel_mode", None)
                    if parallel_mode is None:
                        setattr(param, "_mcore_tp", True)
                        setattr(param, "_tp_duplicated", True)
                elif is_router_param(name, param):
                    setattr(param, "_mcore_tp", True)
                    setattr(param, "_tp_duplicated", True)

    def _init_dist_index(self, pg_collection):
        """
        Initialize the distributed index for the module.
        """
        if not HAVE_DTENSOR:
            raise ImportError(
                "This module requires PyTorch with DTensor support. "
                "Please install a compatible version of PyTorch."
            )

        enable_hsdp = self.ddp_config.num_distributed_optimizer_instances > 1
        if pg_collection is None:
            tp_group = parallel_state.get_tensor_model_parallel_group()
            expt_tp_group = parallel_state.get_expert_tensor_parallel_group()
            if enable_hsdp:
                dp_cp_group = parallel_state.get_data_parallel_group(
                    with_context_parallel=True, partial_data_parallel=True
                )
                outer_fsdp_group = parallel_state.get_inter_distributed_optimizer_instance_group()
                hybrid_fsdp_group = parallel_state.get_data_parallel_group(
                    with_context_parallel=True, partial_data_parallel=False
                )
                expt_dp_group = parallel_state.get_expert_data_parallel_group(
                    partial_expert_data_parallel=True
                )
                hybrid_fsdp_expt_group = parallel_state.get_expert_data_parallel_group(
                    partial_expert_data_parallel=False
                )
                ep_group = parallel_state.get_expert_model_parallel_group()
            else:
                dp_cp_group = parallel_state.get_data_parallel_group(
                    with_context_parallel=True, partial_data_parallel=False
                )
                outer_fsdp_group = None
                hybrid_fsdp_group = None
                expt_dp_group = parallel_state.get_expert_data_parallel_group()
                ep_group = parallel_state.get_expert_model_parallel_group()
        else:
            tp_group = getattr(pg_collection, 'tp', None)
            expt_tp_group = getattr(pg_collection, 'expt_tp', None)
            if enable_hsdp:
                dp_cp_group = pg_collection.intra_dp_cp
                outer_fsdp_group = pg_collection.inter_dist_opt
                hybrid_fsdp_group = pg_collection.dp_cp
                # This has not been tested yet.
                expt_dp_group = getattr(pg_collection, 'intra_expt_dp', None)
                hybrid_fsdp_expt_group = getattr(pg_collection, 'expt_dp', None)
                ep_group = getattr(pg_collection, 'ep', None)
            else:
                dp_cp_group = pg_collection.dp_cp
                outer_fsdp_group = None
                hybrid_fsdp_group = None
                expt_dp_group = getattr(pg_collection, 'expt_dp', None)
                ep_group = getattr(pg_collection, 'ep', None)

        if tp_group is None:
            single_rank_group = dist.new_group(ranks=[dist.get_rank()])
            tp_group = single_rank_group

        if expt_tp_group is None:
            single_rank_group = dist.new_group(ranks=[dist.get_rank()])
            expt_tp_group = single_rank_group

        if enable_hsdp:
            if expt_dp_group is not None:
                expt_mesh = _get_hsdp_tp_mesh(
                    outer_fsdp_group, expt_dp_group, expt_tp_group, ep_size=ep_group.size()
                )
                expt_device_mesh = DeviceMesh.from_group(
                    [outer_fsdp_group, expt_dp_group, expt_tp_group],
                    device_type="cuda",
                    mesh=expt_mesh.tolist(),
                    mesh_dim_names=["outer_fsdp_dp", "dp_cp", "tp"],
                )
            else:
                expt_device_mesh = None
            mesh = _get_hsdp_tp_mesh(outer_fsdp_group, dp_cp_group, tp_group)
            dist_index = FSDPDistributedIndex(
                hsdp_outer_dp_shard=self.ddp_config.outer_dp_sharding_strategy != "no_shard",
                device_mesh=DeviceMesh.from_group(
                    [outer_fsdp_group, dp_cp_group, tp_group],
                    device_type="cuda",
                    mesh=mesh.tolist(),
                    mesh_dim_names=["outer_fsdp_dp", "dp_cp", "tp"],
                ),
                dp_outer_dim="outer_fsdp_dp",  # Use Hybrid FSDP!
                dp_shard_dim="dp_cp",
                tp_dim="tp",
                hybrid_fsdp_group=hybrid_fsdp_group,
                hybrid_fsdp_expt_group=hybrid_fsdp_expt_group,
                expt_device_mesh=expt_device_mesh,
            )
        else:
            if ep_group is not None:
                expt_mesh = _get_dp_tp_mesh(expt_dp_group, expt_tp_group, ep_size=ep_group.size())
                expt_device_mesh = DeviceMesh.from_group(
                    [expt_dp_group, expt_tp_group],
                    device_type="cuda",
                    mesh=expt_mesh.tolist(),
                    mesh_dim_names=["dp_cp", "tp"],
                )
            else:
                expt_device_mesh = None

            mesh = _get_dp_tp_mesh(dp_cp_group, tp_group)
            dist_index = FSDPDistributedIndex(
                device_mesh=DeviceMesh.from_group(
                    [dp_cp_group, tp_group],
                    device_type="cuda",
                    mesh=mesh.tolist(),
                    mesh_dim_names=["dp_cp", "tp"],
                ),
                dp_shard_dim="dp_cp",
                tp_dim="tp",
                expt_device_mesh=expt_device_mesh,
            )

        self.tp_group = tp_group

        return dist_index

    def stop_communication(self):
        """
        Stop communication for the module.
        """
        self.module.synchronize_gradient_reduce()
        self.module.synchronize_param_gather()

    def sync_rng_states_across_tp_group(self):
        """
        Synchronize the tensor parallel random number generator states.
        """
        if self.tp_group.size() <= 1:
            return

        if self.tp_group.rank() == 0:
            broadcast_list = [_get_rng_state_dict()]
        else:
            broadcast_list = [None]
        torch.distributed.broadcast_object_list(broadcast_list, group=self.tp_group, group_src=0)
        _load_rng_state_dict(broadcast_list[0])


def _get_hsdp_tp_mesh(outer_fsdp_dp_group, dp_cp_group, tp_group, ep_size=1):
    assert HAVE_EINOPS, "einops is not installed. Please install it with `pip install einops`."
    world_size = dist.get_world_size()

    mesh = einops.rearrange(
        torch.arange(world_size),
        "(outer_fsdp_dp fsdp ep tp) -> ep outer_fsdp_dp fsdp tp",
        outer_fsdp_dp=outer_fsdp_dp_group.size(),
        tp=tp_group.size(),
        ep=ep_size,
    )

    mesh_fsdp_ranks = einops.rearrange(
        mesh,
        'ep outer_fsdp_dp fsdp tp -> (outer_fsdp_dp ep tp) fsdp',
        tp=tp_group.size(),
        fsdp=dp_cp_group.size(),
        ep=ep_size,
    )
    fsdp_group_ranks = dist.get_process_group_ranks(dp_cp_group)
    assert _check_mesh_ranks_and_group_ranks_are_consistent(mesh_fsdp_ranks, fsdp_group_ranks), (
        f"[Megatron-FSDP] FSDP ranks in the mesh {mesh_fsdp_ranks} "
        f"do not match the ranks in the FSDP group {fsdp_group_ranks}."
    )

    mesh_tp_ranks = einops.rearrange(
        mesh,
        'ep outer_fsdp_dp fsdp tp -> (outer_fsdp_dp fsdp ep) tp',
        tp=tp_group.size(),
        fsdp=dp_cp_group.size(),
    )
    tp_group_ranks = dist.get_process_group_ranks(tp_group)
    assert _check_mesh_ranks_and_group_ranks_are_consistent(mesh_tp_ranks, tp_group_ranks), (
        f"[Megatron-FSDP] Tensor Parallel ranks in the mesh {mesh_tp_ranks} "
        f"do not match the ranks in the TP group {tp_group_ranks}."
    )

    mesh_outer_fsdp_dp_ranks = einops.rearrange(
        mesh,
        'ep outer_fsdp_dp fsdp tp -> (fsdp ep tp) outer_fsdp_dp',
        tp=tp_group.size(),
        fsdp=dp_cp_group.size(),
        ep=ep_size,
    )
    outer_fsdp_dp_group_ranks = dist.get_process_group_ranks(outer_fsdp_dp_group)
    assert _check_mesh_ranks_and_group_ranks_are_consistent(
        mesh_outer_fsdp_dp_ranks, outer_fsdp_dp_group_ranks
    ), (
        f"[Megatron-FSDP] Outer FSDP Data Parallel ranks in the mesh {mesh_outer_fsdp_dp_ranks} "
        f"do not match the ranks in the Outer FSDP DP group {outer_fsdp_dp_group_ranks}."
    )

    # Exclude the expert parallel dimension
    rank = dist.get_rank()
    dp_tp_meshes = [per_ep_mesh for per_ep_mesh in mesh if rank in per_ep_mesh.reshape(-1).tolist()]
    assert (
        len(dp_tp_meshes) == 1
    ), f"[Megatron-FSDP] Current rank {rank} is not unique in the mesh ranks {mesh.tolist()}."
    assert (
        len(dp_tp_meshes[0].reshape(-1).tolist())
        == outer_fsdp_dp_group.size() * dp_cp_group.size() * tp_group.size()
    ), (
        f"[Megatron-FSDP] DP-TP mesh size {len(dp_tp_meshes[0].reshape(-1).tolist())} "
        f"does not match the expected size"
        f"{outer_fsdp_dp_group.size() * dp_cp_group.size() * tp_group.size()}."
    )
    return dp_tp_meshes[0]


def _get_dp_tp_mesh(dp_cp_group, tp_group, ep_size=1):
    assert HAVE_EINOPS, "einops is not installed. Please install it with `pip install einops`."
    world_size = dist.get_world_size()

    tp_size = dist.get_world_size(tp_group) if tp_group is not None else 1
    # TODO: Supports configurable (dp, cp, ep, tp) order.
    mesh = einops.rearrange(
        torch.arange(world_size),
        "(dp_cp ep tp) -> ep dp_cp tp",
        dp_cp=dp_cp_group.size(),
        tp=tp_size,
        ep=ep_size,
    )

    mesh_dp_ranks = einops.rearrange(mesh, 'ep dp_cp tp -> (ep tp) dp_cp', dp_cp=dp_cp_group.size())
    dp_cp_group_ranks = dist.get_process_group_ranks(dp_cp_group)
    assert _check_mesh_ranks_and_group_ranks_are_consistent(mesh_dp_ranks, dp_cp_group_ranks), (
        f"[Megatron-FSDP] Data Parallel ranks in the mesh {mesh_dp_ranks} "
        f"do not match the ranks in the DP group {dp_cp_group_ranks}."
    )

    mesh_tp_ranks = einops.rearrange(mesh, 'ep dp_cp tp -> (dp_cp ep) tp', tp=tp_size)
    tp_group_ranks = dist.get_process_group_ranks(tp_group)
    assert _check_mesh_ranks_and_group_ranks_are_consistent(mesh_tp_ranks, tp_group_ranks), (
        f"[Megatron-FSDP] Tensor Parallel ranks in the mesh {mesh_tp_ranks} "
        f"do not match the ranks in the TP group {tp_group_ranks}."
    )

    # Exclude the expert parallel dimension
    rank = dist.get_rank()
    dp_tp_meshes = [per_ep_mesh for per_ep_mesh in mesh if rank in per_ep_mesh.reshape(-1).tolist()]
    assert (
        len(dp_tp_meshes) == 1
    ), f"[Megatron-FSDP] Current rank {rank} is not unique in the mesh ranks {mesh.tolist()}."
    assert len(dp_tp_meshes[0].reshape(-1).tolist()) == dp_cp_group.size() * tp_group.size(), (
        f"[Megatron-FSDP] DP-TP mesh size {len(dp_tp_meshes[0].reshape(-1).tolist())} "
        f"does not match expected size {dp_cp_group.size() * tp_group.size()}."
    )

    return dp_tp_meshes[0]


def _check_mesh_ranks_and_group_ranks_are_consistent(mesh_ranks, group_ranks):
    current_rank = dist.get_rank()
    current_ranks = list(filter(lambda ranks: current_rank in ranks, mesh_ranks.tolist()))
    assert len(current_ranks) == 1, (
        f"[Megatron-FSDP] Current rank {current_rank} is not unique in "
        f"the mesh ranks {mesh_ranks.tolist()}."
    )
    assert sorted(current_ranks[0]) == sorted(group_ranks), (
        f"[Megatron-FSDP] Current rank {current_rank} in the mesh ranks "
        f"{mesh_ranks.tolist()} does not match the group ranks {group_ranks}."
    )
    return sorted(current_ranks[0]) == sorted(group_ranks)


def _get_rng_state_dict():
    rng_state_dict = {
        'random_rng_state': random.getstate(),
        'np_rng_state': np.random.get_state(),
        'torch_rng_state': torch.get_rng_state(),
        'cuda_rng_state': torch.cuda.get_rng_state(),
        'rng_tracker_states': tensor_parallel.get_cuda_rng_tracker().get_states(),
    }
    return rng_state_dict


def _load_rng_state_dict(rng_state_dict):
    random.setstate(rng_state_dict['random_rng_state'])
    np.random.set_state(rng_state_dict['np_rng_state'])
    torch.set_rng_state(rng_state_dict['torch_rng_state'])
    torch.cuda.set_rng_state(rng_state_dict['cuda_rng_state'])
    tensor_parallel.get_cuda_rng_tracker().set_states(rng_state_dict['rng_tracker_states'])


================================================
FILE: megatron/core/distributed/fsdp/src/README.md
================================================
<div align="center">

# 🚀 Megatron-FSDP

</div>

<div align="center">

[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/release/python-3100/)

</div>

## ✨ What is Megatron-FSDP?

**Megatron-FSDP** is an NVIDIA-developed PyTorch extension that provides a high-performance implementation of Fully Sharded Data Parallelism (FSDP). It offers seamless cross-compatibility with major deep learning frameworks and parallelism libraries, making it easy to scale your PyTorch models across multiple GPUs and nodes.

Megatron-FSDP can provide up to 25% speed up and 23% memory savings compared to FSDP2.

### Compatibility

- **[PyTorch DTensor](https://docs.pytorch.org/docs/stable/distributed.tensor.html)**
- **[Megatron Core](https://github.com/NVIDIA/Megatron-LM)**
- **[TransformerEngine](https://github.com/NVIDIA/TransformerEngine)**

## ✨ Features

- **Easy Integration**: Simple `fully_shard` function for quick model parallelization
- **High Performance**: Optimized for NVIDIA GPUs with efficient memory management
- **Cross-Framework**: Works seamlessly with PyTorch, Huggingface Transformers, Megatron-LM, Megatron Bridge and TransformerEngine
- **Scalable**: Supports both single-node multi-GPU and multi-node distributed training
- **Flexible Configuration**: Configurable sharding strategies and process groups

## ⚡ Optimizations

- **Advanced Bucketing**: Data-type aware bucketing system to minimize the overhead of collective operations
- **Buffer Management**: Zero copy communication is achieved by reorganizing the storage of parameters and main grad with `ParamAndGradBuffer` class
- **Communication Overlapping**: Improved communication overlap of paramter all-gather and gradient reduce-scatter
- **FP8 Mixed Precision with Transformer Engine**: Compatibility with Transformer Engine enables efficient FP8 mixed precision training
- **Gradient accumulate fusion support with Transformer Engine**: Remove the explicit gradient copy to the communication buffer in backwards pass

### Advanced Collective Communication
- **SM Usage Reduction with SHARP**: FSDP's `All-Gather` (AG) and `Reduce-Scatter` (RS) collectives are designed to overlap with compute kernels. However, standard NCCL communication kernels can consume a significant number of GPU SMs (e.g., 16-32 SMs), "stealing" resources from compute (GEMM) kernels and reducing overall TFLOPS.
- **In-Switch Processing**: We leverage **SHARP** (Scalable Hierarchical Aggregation and Reduction Protocol) to offload these collective operations. SHARP performs aggregation and reduction computations directly on the network switches (InfiniBand or NVLink Switch) instead of on the GPU SMs. This dramatically reduces the SM consumption for communication to **1-6 SM** freeing up GPU resources for compute. It also provides lower communication latency, especially in large, scaled-out workloads.
- **Symmetric Optimizations for MNNVL**: We support **symmetric-based optimizations**, introduced in NCCL v2.27, which enable switch offloading for **Multi-Node NVLink (MNNVL)** systems such as GB200/GB300. This allows the same SM-saving benefits over the high-bandwidth NVLink fabric itself.
- **Hierarchical Collectives**: When an FSDP sharding domain spans both NVLink and InfiniBand, the library utilizes **hierarchical SHARP collectives** (e.g., NVL-SHARP + IB-SHARP) to optimize the communication path across the entire system topology.
<!-- ## 📊 Performance  -->

## 📦 Installation

```
pip install megatron-fsdp
```

- PyPI: https://pypi.org/project/megatron-fsdp/
- Source Code: https://github.com/NVIDIA/Megatron-LM/tree/main/megatron/core/distributed/fsdp/src

## 🚀 Quick Start

### Basic Usage

Transform your PyTorch model to use Fully Sharded Data Parallelism with just a few lines:

```python
import torch
from megatron_fsdp import (
    fully_shard_model,
    fully_shard_optimizer,
)

"""
Enable FSDP with Megatron-FSDP via the `fully_shard_*` API.
"""
# Shard your model.
model = fully_shard_model(
    model,
    fsdp_unit_modules=[
        YourModelLayerClass,
        "import.path.to.model.class.YourModelLayerClass",
    ],
    ...
)
# Shard your optimizer.
optimizer = fully_shard_optimizer(
    torch.optim.Adam(model.parameters(), lr=1e-3)
)

# Your model is now ready for distributed training!
```

### Comparison with FSDP-2

`fully_shard` / `fully_shard_model` / `fully_shard_optimizer` are simple entrypoints into `MegatronFSDP`.

- No need to call `fully_shard` on all the sub-modules, just pass your sub-module classes or import paths to `fully_shard`!
- Seamlessly preserves the identity of your training loop with only a few lines of code and multiple options for initialization:
  - `fully_shard_*` is a two-line change when sharding the model and optimizer separately.
  - `fully_shard` is a one-line change for previously-initialized models and optimizers.

Compare this with FSDP2:

```python
import torch
from torch.distributed.fsdp import fully_shard

# Your existing model and optimizer.
model = YourModel()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Enable FSDP with FSDP2.
for module in model.modules():
    # Sub-Modules to shard.
    if isinstance(module, YourModelLayerClass):
        fully_shard(module)
fully_shard(model)

# Your model is now ready for distributed training!
```

### `torch.compile` Compatibility

Megatron-FSDP is compatible with `torch.compile`, but this feature is still experimental and may introduce performance regressions in some workloads.

## 📖 Megatron-FSDP Comprehensive Walkthrough

### Import `megatron_fsdp`.

```python
import torch
from megatron_fsdp import (
    fully_shard_model,
    fully_shard_optimizer,
    MixedPrecisionPolicy,
)
```

### Set up a distributed environment using `DeviceMesh`.

`DeviceMesh` simplifies the construction of complex arrangements of devices
to support various parallelisms.

```python
from torch.distributed.device_mesh import DeviceMesh

# Initialize DeviceMesh.
device_mesh = torch.distributed.device_mesh.init_device_mesh(
    "cuda",
    mesh_shape=(dp_outer_size, dp_shard_size, cp_size, tp_size),
    mesh_dim_names=("dp_outer", "dp_shard", "cp", "tp"),
)
# Only relevant when using HSDP, where we also need the full DP group for data parallelism,
# This sub-mesh can be provided to distributed samplers or dataloaders.
device_mesh[("dp_outer", "dp_shard")]._flatten("dp")
# Only required if using CP. Otherwise, just pass dp_shard to FSDP.
device_mesh[("dp_shard", "cp")]._flatten("dp_shard_cp")
# Only required if using HSDP. Otherwise, don't pass hybrid_fsdp_group.
device_mesh[("dp_outer", "dp_shard", "cp")]._flatten("hsdp")
hsdp_group = device_mesh["hsdp"].get_group()

# Initialize DeviceMesh for expert parallel (EP) modules when using FSDP + EP.
expert_device_mesh = torch.distributed.device_mesh.init_device_mesh(
    "cuda",
    mesh_shape=(dp_outer_size, expt_dp_shard_size, expt_tp_size),
    mesh_dim_names=("dp_outer", "dp_shard_cp", "tp"),
)
expert_device_mesh[("dp_outer", "dp_shard_cp")].flatten("hsdp")
hsdp_expt_group = expert_device_mesh["hsdp"].get_group()
```

### Convert models into fully-sharded `MegatronFSDP` models with `fully_shard_model`.

This wraps the model in a MegatronFSDP class that schedules the sharding
lifecycle of the model parameters and gradients during training and inference.

```python
model = fully_shard_model(
    # PyTorch (Root) Module
    model,
    # Sharded Modules
    fsdp_unit_modules=[...],
    # Device Mesh
    device_mesh=device_mesh
    # Always required for FSDP or HSDP.
    dp_shard_dim="dp_shard_cp",
    # Set this required argument to use HSDP instead of FSDP. Otherwise, set this to None.
    dp_outer_dim="dp_outer",
    # Only required for TP-sensitive models (i.e. Megatron-LM / TransformerEngine)
    # or when using DTensor-based TP. Otherwise, set this to None.
    tp_dim="tp",
    # Only required when using HSDP. Otherwise, set this to None.
    hybrid_fsdp_group=hsdp_group,
    # Only required when using HSDP + EP. Otherwise, set this to None.
    hybrid_fsdp_expt_group=hsdp_expt_group,
    # Only required for FSDP + EP. Otherwise, set this to None.
    expt_device_mesh=expt_device_mesh,
    # FSDP Sharding Strategy: no_shard (0) / optim (1) / optim_grads (2) / optim_grads_params (3)
    zero_dp_strategy=3,
    outer_dp_sharding_strategy=1,
    # Initialize the model on devices in shards to avoid OOM. Requires device("meta")-init for model.
    init_model_with_meta_device=True,
    # Mixed-Precision Policy for controlling compute and communication precision in Megatron-FSDP.
    mixed_precision_policy=MixedPrecisionPolicy(),
    # Sync parameters and gradients each step. Allows for gradient transformations after backward pass,
    # and synchronizes parameters and gradients across HSDP groups, but deactivates compute-communication
    # overlap going into the subsequent training step.
    sync_model_each_microbatch=True,
    # Preprocess state dict for DCP checkpointing. Required for Torch Distributed Checkpoint.
    preproc_state_dict_for_dcp_ckpt=True,
)
```

The original `torch.nn.Module` can be accessed at `MegatronFSDP.module`.

### Initialize and fully-shard your optimizer on the `MegatronFSDP` model.

Initialize your optimizer on the Megatron-FSDP model distributed `Parameter`(s).
If your optimizer has already been initialized, either use the `fully_shard`
entrypoint, or use `optimizer.add_param_group({"params": model.parameters()})`
after resetting your optimizer state via `optimizer.param_groups.clear()`
and `optimizer.state.clear()`.

```python
optimizer = torch.optim.Optimizer(model.parameters())
```

`fully_shard_optimizer` modifies your `optimizer.step()`, `optimizer.zero_grad()`,
and distributed optimizer parameters to punctually trigger scheduled FSDP operations
for Megatron-FSDP.

```python
fully_shard_optimizer(
    # PyTorch Optimizer
    optimizer,
    # Preprocess state dict for DCP checkpointing.
    # Required for Torch Distributed Checkpoint.
    preproc_state_dict_for_dcp_ckpt=True,
)
```

Extended arguments to `step()` and `zero_grad()` control these FSDP operations:

```python
    optimizer.step(
        ...,
        # Sync all gradients before the optimizer step. Alternatively enabled using
        # `sync_model_each_microbatch=True` in MegatronFSDP.
        sync_grad_before_optimizer_step=True,
        # After `optimizer.step()`, install optimized weights into MegatronFSDP's buffers.
        install_optimized_model_weights=True,
    )

    optimizer.zero_grad(
        ...,
        # Also zero out MegatronFSDP's gradient accumulation buffers.
        zero_grad_buffer=True
    )
```

### `MegatronFSDP` Distributed Checkpointing

Distributed checkpoints can be saved and loaded using Torch DCP. Alternatively,
you can load non-distributed checkpoints before fully-sharding your model with
any existing checkpoint utility compatible with PyTorch Modules.

```python
# Save model and optimizer state.
torch.distributed.checkpoint.save(
    {"model": model.state_dict(), "optimizer": optimizer.state_dict()},
    checkpoint_id=str(CKPT_DIR)
)

# Load model and optimizer state.
ckpt_state_dict = {"model": model.state_dict(), "optimizer": optimizer.state_dict()}
torch.distributed.checkpoint.load(state_dict=ckpt_state_dict, checkpoint_id=str(CKPT_DIR))
# `model.load_state_dict(strict=False)` is only necessary to ignore TE FP8 extra state
# that is missing from the DCP checkpoint but present in TEBaseModule.
# Megatron-FSDP does not support TE FP8 extra state checkpointing with DCP.
model.load_state_dict(ckpt_state_dict["model"], strict=False)
optimizer.load_state_dict(ckpt_state_dict["optimizer"])
```

## ⚙️ `fully_shard` / `MegatronFSDP` API - Advanced Features

Megatron-FSDP's `fully_shard_*` API has a comprehensive set of arguments for fine-tuning your model's performance.

- `fsdp_unit_modules` is a list of sub-module classes or `str` import-paths associated with modules that you want `MegatronFSDP` to fully-shard.
  - Required if `1`, `2`, or `3` are specified as the sharding strategy. Defaults to `None`, in which case Megatron-FSDP will replicate the parameters similar to DDP.
- `zero_dp_strategy` (and `outer_dp_sharding_strategy`) configure different degrees of zero-redundancy data parallelism as described in [ZeRO (Zero Redundancy Optimizer)](https://arxiv.org/abs/1910.02054). It reduces CUDA memory utilization during model training by distributing model parameters, gradients, and optimizer states across multiple devices in the DP `ProcessGroup`, and collectively communicating subsets of parameters and gradients to specific devices when needed for computation or differentiation. More aggressive sharding strategies will entail more communication overhead, with `no_shard` being the least memory efficient but most communication efficient, and `optim_grads_params` being the most memory efficient but least communication efficient. Additionally, `outer_dp_sharding_strategy` supports `no_shard` ([Hybrid-Sharded Data Parallelism (HSDP)](https://arxiv.org/pdf/2304.11277)) and `optim` (`HFSDP` = Fully-Sharded Optimizer State + _HSDP_, requires `zero_dp_strategy='optim_grads_params'`), after specifying the "outer" DP group (`dp_outer_dim` / `hybrid_fsdp_group`).
  - Default: `optim_grads_params` or `3` for `zero_dp_strategy` and `no_shard` or `0` for `outer_dp_sharding_strategy`
  - `0` or `no_shard` implies that your model is not sharded. Similar memory usage to `DDP`.
  - `1` or `optim` implies that your optimizer state is sharded for distributed optimization. Similar to optimizer state sharding in `ZeRO-DP`.
  - `2` or `optim_grads` implies that your optimizer state and gradients are sharded. Similar to `ZeRO-2`.
  - `3` or `optim_grads_params` implies that your optimizer state, gradients, and training parameters are sharded. Similar to `ZeRO-3`.
- `device_mesh` is a [`torch.distributed.DeviceMesh`](https://docs.pytorch.org/docs/stable/distributed.html#devicemesh) that informs `MegatronFSDP` of your distributed environment for sharding in conjunction with hardware configuration and other parallelisms. If not provided, `megatron_fsdp.fully_shard(_model)` will build an FSDP DeviceMesh for you automatically.
  - `dp_shard_dim` is the name of the sub-mesh required for FSDP sharding, and is commonly the flattened combination of the data parallel (DP) and context parallel (CP) sub-meshes.
    - When model parameters are replicated across DP-CP during the backward pass, resultant gradients across DP and CP ranks are reduced simultaneously, normalized by the DP-CP world size. For more information about how ring attention shards the sequence dimension through the attention and non-attention layers of the Transformer, refer to: [Ring Attention with Blockwise Transformers for Near-Infinite Context](https://arxiv.org/abs/2310.01889).
  - `dp_outer_dim` is the name of the sub-mesh corresponding to the "outer" DP group, which is required for replication or sharding in HSDP. `fully_shard` will perform HSDP if `dp_outer_dim` is specified.
  - `tp_dim` is the name of the sub-mesh used for tensor parallelism (TP), which is required for `(FSDP, TP)`-strided sharding when using Megatron-LM or Torch-native `DTensor` TP.
    - For more information about tensor parallelism, refer to: [Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism](https://arxiv.org/abs/1909.08053).
  - `hybrid_fsdp_group` is the `ProcessGroup` which contains all ranks in the flattened `dp_shard_dim` and `dp_outer_dim` sub-meshes utilized to specify the `(DP-Outer, DP-Shard)` sharded mesh coordinates for the weight and gradient buffers. Required for HSDP.
  - `hybrid_fsdp_expt_group` defines the data-parallel communication group for expert parameters. It is required for HSDP.
- `expt_device_mesh` is another [`torch.distributed.DeviceMesh`](https://docs.pytorch.org/docs/stable/distributed.html#devicemesh) tailored for the expert parallel (EP) modules in `MegatronFSDP`.
  - `dp_shard_dim` is the name of the sub-mesh required for FSDP sharding of the EP modules, enabling expert data parallelism (EDP).
  - `tp_dim` is the name of the sub-mesh used for expert tensor parallelism (ETP), which is required for `(FSDP, ETP)`-strided sharding when using Megatron-LM or Torch-native `DTensor` ETP.
- `init_model_with_meta_device` has `MegatronFSDP` initialize your `meta`-device model in shards on every CUDA device to avoid OOM when initializing extremely large models that cannot fit on a single device. Users can initialize their model on a [`meta`-device](https://docs.pytorch.org/docs/stable/meta.html) (`with torch.device('meta'): ...`), and ``MegatronFSDP`` will further shard and initialize the model parameters layer-by-layer adhering to the customizable `module.reset_parameters` method, which prevents the entire model from being allocated in memory at any point during runtime.
    - Defaults to `False`.
    - Note that the `device` argument which installs your model on a specific device or rank will be deactivated when `init_model_with_meta_device=True`.
- `mixed_precision_policy` takes a `megatron_fsdp.MixedPrecisionPolicy` that configures mixed-precision compute and communication for Megatron-FSDP. Configuration options include:
    - `main_params_dtype` controls the data-type for parameters used in distributed optimization or quantization. 
        - Defaults to `torch.float32`.
        - If set to `None`, the native model compute parameter data-type will be utilized.
        - Requires specification (cannot be `None`) when using `FP8` parameters with Megatron-FSDP.
    - `main_grads_dtype` controls the data-type for gradients used in distributed optimization.
        - Defaults to `None`, the model native gradient data-type will be utilized.
        - While `torch.float32` (or higher) is recommended for accuracy at scale, as `main_grads_dtype` controls the data-type for gradient accumulation, `None` is more flexible and uses pre-determined parameter gradient logic in mixed-precision scenarios, such as `BF16` for `FP8`/`FP4` parameters quantized via TransformerEngine.
    - `grad_comm_dtype` controls the data-type for gradient communications (RS / AR) when reducing gradients. Lower precision `grad_comm_dtype` improves (communication) performance, but may increase memory utilization or sacrifice gradient precision in certain cases.
        - Defaults to `None`, the `main_grads_dtype` data-type will be utilized, and no additional memory is allocated when `grad_comm_dtype == main_grads_dtype`.
        - If using HSDP (either DP-Replicate or DP-Outer in `outer_dp_sharding_strategy`), `no_shard`, `optim`, or a `FixedPoolAllocator` (`fsdp_double_buffer`), allocating `dtype`-custom gradient communication buffers (per FSDP group) adds memory overhead of up to 10% or more, and users should consider the performance-memory trade-off when using this feature.
        - If using NCCL UBR v2.27+ (`nccl_ub=True`), gradient reduction may be performed in high-precision depending on the network domain (NVLink or IB), and can enable mixed-precision communication and accumulation, e.g. setting grad_comm_dtype to `BF16` can support `FP32` reduction even though we have `BF16` input and output communication buffers. Otherwise, gradients will be reduced in `grad_comm_dtype` (and accumulated in `main_grads_dtype`) as usual.
- `overlap_grad_reduce` and `overlap_param_gather` will overlap gradient [`reduce-scatter`](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/collectives.html#reducescatter) and parameter [`all-gather`](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/collectives.html#allgather) group communications with backward and forward compute with asynchronous calls and pre-fetching. (In the case of `no_shard`, parameters are not gathered but gradient [`all-reduce`](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/collectives.html#allreduce) is overlapped.)
    - Both default to `True`.
- `sync_model_each_microbatch` will trigger a `wait` (`MegatronFSDP.finish_grad_sync()`) on gradient reduction, parameter de-allocation, and optimizer parameter / gradient installation (in preparation for `optimizer.step()`) after every forward-backward pass. When using HSDP, parameters and gradients will be all-gathered and reduced respectively on the "outer" DP group each training step instead of each optimization cycle. This behavior is desirable for a transparent and user-friendly sharded training loop where post-backward transformations on the gradient and a clean compute / memory state are necessary within and between training iterations, but damages performance in situations where optimization is delayed (e.g. gradient accumulation) when the communications of the previous training iteration can be overlapped with the compute of the next training iteration. Will also override `is_last_microbatch` / `microbatch_count` logic in `MegatronFSDP`.
    - Defaults to `True` for `fully_shard`, but defaults to `False` when using the `MegatronFSDP` class directly.
    - Can also be controlled with the `MegatronFSDP.sync()` context manager, or through invoking `MegatronFSDP.set_model_auto_sync(bool)`.
    - WARNING: When this synchronization feature is activated in conjunction with `no_shard` / `0` or `optim` / `1` sharding strategies, the user is responsible for calling `MegatronFSDP.zero_grad_buffer()` or `optimizer.zero_grad()` after the subsequent forward-backward pass. This is because un-sharded gradients are all-reduced directly into the gradient accumulation buffer, and this buffer should not be all-reduced more than once per optimization cycle! Analogous to the justification for the [`no_sync()` API for PyTorch DistributedDataParallel](https://docs.pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html#torch.nn.parallel.DistributedDataParallel.no_sync).
- `enable_fine_grained_param_gather` modifies FSDP to all-gather parameters with per-Module granularity instead of collectively unsharding all sub-modules of a unit module in Megatron-FSDP.
    - Defaults to `False`.
- `keep_fp8_transpose_cache` will keep the fp8 transpose cache when using `MegatronFSDP`. This option will cause (number of parameter $\times$ 1 Byte) of memory overhead, but can skip the weight transpose operation in the backward propagation. This feature will not give any benefit from the Blackwell architecture.
    - Defaults to `False`.
- `nccl_ub` will allocate and register the NCCL userbuffer for param and grad buffers. This option enables an SM-efficient NCCL algorithm that could improve the performance of overlapped computations. This flag will be much more effective when used together with SHARP if the FSDP communication includes both NVL and IB domains. Enabling this option will cause additional memory overhead due to the requirement to enable the `fsdp_double_buffer` option.
    - **Only effective when using with Megatron-Core.**
    - Defaults to `False`.
    - By default we try to use NCCL window (symmetric) registration if it is available. If not it falls back to conventional local registration.
- `fsdp_manual_registration` will manually register the FSDP communication buffers with the NCCL user buffer. For symmetric registration with large models, the registration itself can take a significant amount of time. This option minimizes the number of registration calls to reduce the registration time. However, with this option enabled, you need to manually call the `ParamAndGradBuffer.manual_buffer_registration()` function after the first iteration. This is already implemented in the Megatron-LM training loop. In other use cases, users are expected to call this function themselves.
    - **Only effective when using with Megatron-Core.**
    - This option is only effective when `nccl_ub` is enabled.
    - Defaults to `False`.
- `disable_symmetric_registration` will disable NCCL window (i.e. symmetric) registration when using `nccl_ub`. 
    - Defaults to `False`.
- `fsdp_double_buffer` will use persistently allocated double buffers for temporarily-defined memory needed in `MegatronFSDP` communications. Having persistent double buffers may increase peak VRAM utilization, but is required to register NCCL user buffers (`nccl_ub=True`) for `MegatronFSDP`. Currently, this is only supported for simple repetitive model structures such as GPT.
    - Defaults to `False`. Automatically overridden to `True` when `nccl_ub` is enabled.
- `preproc_state_dict_for_dcp_ckpt` adds `model.state_dict()` and `optimizer.state_dict()` post-hooks that modify the model and optimizer state in preparation for `torch.distributed.checkpoint.{save,load}` ([Torch DCP](https://docs.pytorch.org/docs/stable/distributed.checkpoint.html)) checkpointing. Specifically, it adds `__create_write_items__` and `__create_chunk_list__` methods to Tensors utilized by Torch DCP to redistribute parameters when saving and loading model and optimizer checkpoints. Can be deactivated should the user need a custom distributed checkpointing strategy.
    - Defaults to `True`.

## 🧮 Using Megatron-FSDP with [`TransformerEngine`](https://github.com/NVIDIA/TransformerEngine)

Megatron-FSDP natively supports mixed-precision activations and parameter sharding in conjunction with [TransformerEngine](https://github.com/NVIDIA/TransformerEngine).

- Within the [`transformer_engine.pytorch.autocast(recipe: transformer_engine.common.recipe.Recipe)`](https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/api/pytorch.html#transformer_engine.pytorch.autocast) context, model activations are converted based on the recipe.
- Within the [`transformer_engine.pytorch.quantized_model_init(recipe: transformer_engine.common.recipe.Recipe)`](https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/api/pytorch.html#transformer_engine.pytorch.quantized_model_init) context, TransformerEngine native modules (e.g. [`transformer_engine.pytorch.TransformerLayer`](https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/api/pytorch.html#transformer_engine.pytorch.TransformerLayer)) have their parameters converted based on the recipe.
    - Requires FP8 model activations, i.e. `transformer_engine.pytorch.autocast`.

```python
# FP8 Recipe
fp8_recipe = transformer_engine.common.recipe.MXFP8BlockScaling(
    fp8_format=transformer_engine.common.recipe.Format.HYBRID,
)

# Construct TransformerEngine model with FP8 parameters.
with transformer_engine.pytorch.quantized_model_init(
    recipe=fp8_recipe,
    # Needed for FP8 parameters with Megatron-FSDP.
    preserve_high_precision_init_val=True,
):
    te_model = transformer_engine.pytorch.TransformerLayer(...)

# Fully-shard the model.
mfsdp_model = fully_shard_model(
    module=te_model,
    fsdp_unit_modules=[te.pytorch.TransformerLayer],
    # Only FSDP / ZeRO-3 supports FP8 parameters.
    zero_dp_strategy=3,
    # FP32 main weights needed for FP8 parameters.
    mixed_precision_policy=MixedPrecisionPolicy(
        main_params_dtype=torch.float32
    ),
    # Needed for select FP8 recipes.
    keep_fp8_transpose_cache=True,
)

# Evaluate and differentiate the model with FP8 activations.
with transformer_engine.pytorch.autocast(recipe=fp8_recipe):
    mfsdp_model(x).sum().backward()
```

ℹ️ `TransformerEngine` kernels have a fair bit of configuration constraints when using FP8-quantized parameters, such as using fused QKV parameters or defining activations and parameters with shapes compatible to FP8 CuBLAS kernels on supported hardware from NVIDIA. To properly initialize `TransformerLayer`, you can refer to the toy model used in our FP8 unit tests: `Megatron-LM/tests/unit_tests/distributed/fsdp/test_mfsdp_fully_shard.py::TestMegatronFsdpFullyShard::test_fully_shard_te_quantized`.

================================================
FILE: megatron/core/distributed/fsdp/src/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: megatron/core/distributed/fsdp/src/megatron_fsdp/__init__.py
================================================
# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .distributed_data_parallel_config import DistributedDataParallelConfig
from .fully_shard import fully_shard, fully_shard_model, fully_shard_optimizer
from .megatron_fsdp import MegatronFSDP
from .mixed_precision import MixedPrecisionPolicy
from .package_info import (
    __contact_emails__,
    __contact_names__,
    __description__,
    __download_url__,
    __homepage__,
    __keywords__,
    __license__,
    __package_name__,
    __repository_url__,
    __shortversion__,
    __version__,
)
from .utils import FSDPDistributedIndex

__all__ = [
    "DistributedDataParallelConfig",
    "MegatronFSDP",
    "FSDPDistributedIndex",
    "MixedPrecisionPolicy",
    "fully_shard",
    "fully_shard_model",
    "fully_shard_optimizer",
    "__contact_emails__",
    "__contact_names__",
    "__description__",
    "__download_url__",
    "__homepage__",
    "__keywords__",
    "__license__",
    "__package_name__",
    "__repository_url__",
    "__shortversion__",
    "__version__",
]


================================================
FILE: megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from dataclasses import dataclass
from typing import Optional

import torch


@dataclass
class DistributedDataParallelConfig:
    """
    Megatron-FSDP `fully_shard` API sub-configuration
    derived from Megatron-Core DistributedDataParallel.
    """

    overlap_grad_reduce: bool = False
    """If true, overlap grad all-reduce / reduce-scatter with backward compute."""

    overlap_param_gather: bool = False
    """If true, overlap param all-gather with forward compute."""

    check_for_nan_in_grad: bool = False
    """
    If True, check for NaNs and Infs in gradients _before_ communication collective.
    Invoked by `start_grad_sync` such as in the Megatron-LM DDP training API.
    """

    bucket_size: Optional[int] = None
    """Maximum number of parameters in each bucket. If unspecified, MCore uses a default
       value of max(40000000, 1000000 * dp_size) parameters (larger DP sizes need larger
       buckets to ensure collectives do not become latency-bound)."""

    average_in_collective: bool = False
    """If true, compute average in collective directly, as opposed to dividing by the
       dp_size first and then computing sum in the collective."""

    fp8_param_gather: bool = False
    """If true, keep the compute param in fp8 (do not use any other intermediate dtype) and
       perform the param all-gather in fp8."""

    data_parallel_sharding_strategy: str = 'no_shard'
    """Sharding strategy for FSDP. Valid values are 'no_shard', 'optim',
      'optim_grads', 'optim_grads_params'."""

    gradient_reduce_div_fusion: bool = True
    """If true, perform gradient reduce and division fusion."""

    suggested_communication_unit_size: int = None
    """Specifies the number of elements to communicate at once during
      FSDP (Fully Sharded Data Parallel) operations. 
      This flag also affects FSDP all-gather prefetch behavior. Setting a larger
      value increases the communication buffer size, while a smaller value
      disables prefetching and may degrade performance. Adjust this value
      based on your system's memory and performance requirements."""

    keep_fp8_transpose_cache: bool = False
    """If true, keep the fp8 transpose cache when using Megatron FSDP."""

    nccl_ub: bool = False
    """If true, allocate and register NCCL userbuffer for param and grad buffer.
      This flag enables SM efficient nccl algorithm that could improve the performance
      of FSDP and DP with comm_overlap. This flag will be much more effective when used
      together with sharp. 
      The follwoing will be the expected number of SM usage for various cases.
      (Note that this is just a reference number and the number of SM usage could vary 
      on message size, communication domain size and nccl version.)
      | Communication domain | use_sharp | SM usage of "AG/RS" |
      |----------------------|-----------|---------------------|
      | NVL                  | N/A       | 4 / 5               |
      | NVL+IB               | False     | 16 / 16             |
      | NVL+IB               | True      | 6 / 6               |
      | IB                   | False     | 1 / 4               |
      | IB                   | True      | 1 / 1               |
    """

    fsdp_double_buffer: bool = False
    """If true, use persistently allocated double buffers for the 
      temporary memory needed in the Megatron FSDP communications.
      This option will cause additional memory overhead, however, it is necessary for
      to register user buffer (nccl_ub=True) for the Megatron FSDP. 
      This option will be automatically set to True when nccl_ub=True.
    """

    fsdp_all_gather_in_start_param_sync: bool = True
    """
    If True, use all-gather during the initial Megatron-FSDP parameter
    synchronization step. This can increase overlap between the first
    parameter all-gather and computation, helping to better hide the
    initial communication cost.
    """

    fsdp_db_use_persist_buf_on_alloc_fail: bool = False
    """Whether to fall back to persistent buffer when a bucket does not
       fit FSDP double buffer size. If true, FSDP will use the persistently 
       allocated buffer for the bucket that does not fit, it will enable NCCL 
       user buffer with the cost of more memory usage. If false, FSDP will use
       Dynamic memory allocator, NCCL user buffer won't not enabled, which 
       usually leads to low performance. 
    """

    outer_dp_sharding_strategy: str = 'no_shard'
    """
    Sharding strategy for outer data parallel group in Hybrid Sharded Data Parallel (HSDP) mode.
    Valid values are 'no_shard', 'optim'. This option is only effective when Hybrid FSDP is enabled.
    """

    disable_symmetric_registration: bool = False
    """If true, disable symmetric (window) registration for NCCL userbuffer registration.
      This option will force to use conventional (local) userbuffer registration 
      when nccl_ub is set.
    """

    fsdp_manual_registration: bool = False
    """If true, manually register the FSDP communication buffers to NCCL user buffer.
      This option is only effective when use_megatron_fsdp and nccl_ub is set.
      For symmetric registration with large models, the registration itself can take 
      a significant amount of time. This option minimizes the number of registration calls
      to minimize the registration time.
    """

    megatron_fsdp_main_params_dtype: Optional[torch.dtype] = torch.float32
    """Data type for the main weight buffer utilized for distributed optimization
      and quantization with Megatron-FSDP. If set to None, the model compute weight
      buffer will take the role of the main weights, or when no sharding is applied,
      the native model weights become the main weights. Defaults to torch.float32.
    """

    megatron_fsdp_main_grads_dtype: Optional[torch.dtype] = None
    """Data type for the main gradient buffer utilized for distributed optimization with
      Megatron-FSDP. If set to None, main gradients will match the dtype of the model
      compute parameters specified by the user model. Defaults to None.
    """

    megatron_fsdp_grad_comm_dtype: Optional[torch.dtype] = None
    """Data type for gradient gather / scatter communications. Can be utilized to reduce
      communication latency, but adds overhead for type-casting and copy operations.
      If using NCCL UBR v2.27+, gradient reduction may be performed in high-precision
      depending on the network domain (NVLink or IB), and can enable mixed-precision
      communication and accumulation, e.g. setting grad_comm_dtype to `BF16` can support
      `FP32` reduction even though we have `BF16` input and output communication buffers.
      If set to None, the `main_grads_dtype` is used. If using HSDP (either DP-Replicate
      or DP-Outer in `outer_dp_sharding_strategy`), `no_shard`, `optim`, or a
      `FixedPoolAllocator` (`fsdp_double_buffer`), allocating `dtype`-custom gradient
      communication buffers (per FSDP group) adds memory overhead. Defaults to None.
      No additional memory is allocated when `grad_comm_dtype == main_grads_dtype`.
    """

    def __post_init__(self):
        import os

        """Check the validity of the config."""
        if self.nccl_ub:
            if 'expandable_segments:True' in os.getenv('PYTORCH_CUDA_ALLOC_CONF', '').split(','):
                raise ValueError(
                    "PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True is currently not supported "
                    "with nccl_ub due to compatibility issue with torch.cuda.MemPool API."
                )


================================================
FILE: megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import types
from enum import IntEnum
from typing import Callable, Optional, Sequence, Type

import torch
from torch.distributed import DeviceMesh
from torch.distributed.device_mesh import init_device_mesh
from torch.distributed.tensor import DTensor

from .megatron_fsdp import MegatronFSDP
from .mixed_precision import MixedPrecisionPolicy
from .uneven_dtensor import preprocess_state_dict_for_uneven_dtensor
from .utils import FSDPDistributedIndex, create_updated_function_signature

try:
    # Default to Megatron-LM FW.
    from megatron.core.distributed.distributed_data_parallel_config import (
        DistributedDataParallelConfig,
    )
except ImportError:
    # Megatron-LM is not installed, use Megatron-FSDP as a standalone module.
    from .distributed_data_parallel_config import DistributedDataParallelConfig


logger = logging.getLogger(__name__)


class ShardingStrategy(IntEnum):
    """
    IntEnum to track the abbreviated sharding strategy for Megatron-FSDP.

    - `0` or `no_shard` implies that your model is not sharded. Similar memory usage to `DDP`.
    - `1` or `optim` implies that your optimizer state is sharded. Similar to optimizer
        state sharding in `ZeRO-DP`.
    - `2` or `optim_grads` implies that your optimizer state and gradients are sharded.
        Similar to optimizer state and gradient sharding in `ZeRO-2`.
    - `3` or `optim_grads_params` implies that your optimizer state, gradients, and
        training parameters are sharded. Similar to optimizer state, gradient, and
        training parameter sharding in `ZeRO-3`.
    """

    NO_SHARD = 0
    OPTIM = 1
    OPTIM_GRADS = 2
    OPTIM_GRADS_PARAMS = 3


def experimental_api(func: Callable) -> Callable:
    """
    Mark a function or class as experimental API in Megatron CI/CD.

    TODO(@cspades): Copied from megatron.core.utils to avoid depending on MCore
    for Megatron-FSDP. Should remove when the API is no longer experimental.
    """
    func._experimental_api = True
    return func


@experimental_api
def fully_shard_model(
    module: torch.nn.Module,
    device_mesh: Optional[DeviceMesh] = None,
    dp_shard_dim: Optional[str] = None,
    dp_outer_dim: Optional[str] = None,
    tp_dim: Optional[str] = None,
    hybrid_fsdp_group: Optional[torch.distributed.ProcessGroup] = None,
    hybrid_fsdp_expt_group: Optional[torch.distributed.ProcessGroup] = None,
    expt_device_mesh: Optional[DeviceMesh] = None,
    fsdp_unit_modules: Optional[Sequence[Type[torch.nn.Module]] | Sequence[str]] = None,
    zero_dp_strategy: str | int = 3,
    outer_dp_sharding_strategy: str | int = 0,
    device: Optional[torch.device] = None,
    init_model_with_meta_device: bool = False,
    mixed_precision_policy: MixedPrecisionPolicy = MixedPrecisionPolicy(),
    overlap_grad_reduce: bool = True,
    overlap_param_gather: bool = True,
    sync_model_each_microbatch: bool = True,
    preproc_state_dict_for_dcp_ckpt: bool = True,
    report_nan_in_param_grad: bool = False,
    average_in_collective: bool = False,
    disable_bucketing: bool = False,
    calculate_per_token_loss: bool = False,
    keep_fp8_transpose_cache: bool = False,
    nccl_ub: bool = False,
    fsdp_double_buffer: bool = False,
    fsdp_db_use_persist_buf_on_alloc_fail: bool = False,
    disable_symmetric_registration: bool = False,
    enable_fine_grained_param_gather: bool = False,
) -> torch.nn.Module:
    """
    Fully-shard the model for Megatron-FSDP. This wraps the model in a MegatronFSDP
    class that schedules the sharding lifecycle of the model parameters and gradients
    during training and inference.

    The original `torch.nn.Module` can be accessed at `MegatronFSDP.module`.

    Args:
        module (torch.nn.Module):
            The PyTorch module fully-sharded and managed by Megatron-FSDP.

        device_mesh (Optional[DeviceMesh]):
            Device mesh object defining the topology for distributed training. If not provided,
            Megatron-FSDP will build a default FSDP DeviceMesh.

        dp_shard_dim (Optional[str]):
            Name of the data parallel sharding sub-mesh in the device_mesh. Supports
            a flattened DP-CP sub-mesh, in which case parameters, gradients, and
            optimizer state will be sharded across both DP and CP ranks.

        dp_outer_dim (Optional[str]):
            Name of the "outer" DP sub-mesh in the device_mesh for hybrid-sharding (HSDP),
            which supports "DP-Replicate" as well as optimizer state sharding (HFSDP).
            Defaults to None. Required for HSDP, which is enabled by this argument.

        tp_dim (Optional[str]):
            Name of the tensor parallel sub-mesh in the device_mesh, which is necessary
            for strided sharding between TP and FSDP (and fully-sharded HSDP) dimensions.
            Defaults to None. Required if TP is used in the model, or if TransformerEngine
            layers are utilized, as TE defaults to "TP=1".

        hybrid_fsdp_group (Optional[torch.distributed.ProcessGroup]):
            Cumulative data parallel process group for hybrid FSDP that can be manufactured
            by flattening the outer-FSDP (dp_outer_dim) and FSDP (dp_shard_dim) process groups
            or sub-meshes. Defaults to None. Required for HSDP, i.e. if dp_outer_dim is not None.

        expt_device_mesh (Optional[DeviceMesh]):
            Expert parallel device mesh object defining the topology for MoE distributed training.
            Utilizes the mesh dimension names specified by the *_dim arguments.

        fsdp_unit_modules (Optional[Sequence[Type[torch.nn.Module]] | Sequence[str]]):
            List of (sub-)module classes or (sub-)module class import paths that are "units",
            which are torch.nn.Module(s) that are sharded and scheduled by Megatron-FSDP.
            In particular, FSDP unit module parameters can be "safely" deallocated after
            the forward() or backward() pass without interfering with other computational
            operations that rely on those parameters in the complete PyTorch model.
            This information is utilized by Megatron-FSDP to optimally shard, gather, and
            overlap communications during the forward and backward pass of the module.
            Defaults to None, which is peak-memory-equivalent to DDP / "no_shard".

        zero_dp_strategy (str | int):
            Zero-redundancy sharding strategy for sharding data parallel parameters and gradients.
            - "no_shard" / 0: No optimizer, gradient, or parameter sharding. Similar
                memory usage to DDP.
            - "optim" / 1: Shards optimizer states (and main weights for mixed precision training),
                which is conceptually similar to optimizer state sharding in `ZeRO-DP`.
            - "optim_grads" / 2: Shards gradients and optimizer states, which is conceptually
                similar to "ZeRO-2".
            - "optim_grads_params" / 3: Shards parameters, gradients and optimizer states, which
                is conceptually similar to "ZeRO-3".
            Defaults to "optim_grads_params" / 3.

        outer_dp_sharding_strategy (str | int):
            Sharding strategy for outer data parallel group in Hybrid Sharded Data Parallel (HSDP).
            Shares the same semantics as zero_dp_strategy, but only 'no_shard' / 0 (DP Replication)
            and 'optim' / 1 (Optimizer State Hybrid Sharding) are supported, and 'optim' / 1 is only
            supported when zero_dp_strategy='optim_grads_params'.
            This option is only effective when HSDP is enabled, i.e. when dp_outer_dim is not None.
            Defaults to "no_shard" / 0, which replicates model parameters across the dp_outer group.

        device (Optional[torch.device]):
            Target device for the sharded model. Used to migrate all parameters in the model
            to an expected device. If init_model_with_meta_device=True, this argument is ignored.
            Defaults to None.

        init_model_with_meta_device (bool):
            Utilized to initialize large models that do not fit on a single device, and requires
            implementing a custom Module.reset_parameters() or Module._reset_parameters() method.
            Defaults to False.

        mixed_precision_policy (megatron_fsdp.MixedPrecisionPolicy):
            Megatron-FSDP mixed-precision config that controls compute and communication precision.
            Default values are defined in `megatron_fsdp.MixedPrecisionPolicy`.

        overlap_grad_reduce (bool):
            Whether to overlap gradient reduce-scatter (or all-reduce) with backward compute.
            Defaults to True.

        overlap_param_gather (bool):
            Whether to overlap parameter all-gather with forward and backward compute.
            Defaults to True.

        sync_model_each_microbatch (bool): Whether to sync parameters and install gradients on
            each training step. When disabled, Megatron-FSDP will overlap reduce-scatter with
            subsequent compute and delay HSDP gather and reduce operations per optimization cycle,
            which improves performance and throughput when using delayed optimization strategies
            such as gradient accumulation. Defaults to True, can be modified before the model
            forward / backward pass via MegatronFSDP.set_model_auto_sync(bool) or controlled
            with the (no_)sync context managers or microbatch_count and is_last_microbatch.

        preproc_state_dict_for_dcp_ckpt (bool):
            Whether to preprocess the unevenly-sharded state dict for DCP checkpointing,
            for both the model and the optimizer.
            Defaults to True.

        report_nan_in_param_grad (bool):
            Whether to precisely check for NaN values in gradients for every weight. Can
            significantly degrade performance. Defaults to False.

        average_in_collective (bool):
            Whether to average gradients in collective communication. Defaults to False.

        disable_bucketing (bool):
            Whether to disable gradient bucketing optimization, which permits more granular
            and precise communication of parameters and gradients. Defaults to False.

        calculate_per_token_loss (bool):
            Whether to calculate loss per token, which deactivates gradient scaling.
            Defaults to False.

        keep_fp8_transpose_cache (bool):
            Whether to keep the FP8 transpose cache when using a Megatron FSDP.
            Defaults to False.

        nccl_ub (bool):
            Whether to use NCCL UCC for communication. Defaults to False.

        fsdp_double_buffer (bool):
            Whether to use double buffer for FSDP. Defaults to False.

        fsdp_db_use_persist_buf_on_alloc_fail (bool):
            Whether to fall back to persistent buffer allocator when a bucket does not
            fit FSDP double buffer size.

        disable_symmetric_registration (bool):
            Whether to disable symmetric (window) registration for NCCL UB registration.
            This option forces conventional (local) UB registration when nccl_ub is set.
            Defaults to False.

        enable_fine_grained_param_gather (bool):
            Whether to enable "fine-grained" param all-gather, which can improve performance
            when using MXFP8 parameters with activation recomputation. Specifically, it
            unshards parameters per-Module instead of unsharding all sub-modules of an FSDP
            unit module simultaneously. Defaults to False.

    Returns:
        model (MegatronFSDP): The wrapped Megatron-FSDP model configured for FSDP.
    """
    # If no DeviceMesh or FSDP dimension is provided, then build an FSDP DeviceMesh.
    # Modify arguments into arguments necessary for vanilla FSDP.
    if device_mesh is None:
        if dp_shard_dim is None:
            dp_shard_dim = "fsdp"
        if tp_dim is None:
            # Trivial TP dimension to seamlessly support TransformerEngine.
            tp_dim = "tp"
        # Deactivate DP-Outer, which needs to be consistent with Expert DeviceMesh.
        dp_outer_dim = None
        hybrid_fsdp_group = None
        outer_dp_sharding_strategy = ShardingStrategy.NO_SHARD
        device_mesh = init_device_mesh(
            device_type="cuda",
            mesh_shape=(torch.distributed.get_world_size(), 1),
            mesh_dim_names=(dp_shard_dim, tp_dim),
        )

    # Parse zero_dp_strategy and outer_dp_sharding_strategy.
    # TODO(@cspades): Integrate this Enum into MegatronFSDP.
    if zero_dp_strategy == ShardingStrategy.NO_SHARD:
        zero_dp_strategy = "no_shard"
    elif zero_dp_strategy == ShardingStrategy.OPTIM:
        zero_dp_strategy = "optim"
    elif zero_dp_strategy == ShardingStrategy.OPTIM_GRADS:
        zero_dp_strategy = "optim_grads"
    elif zero_dp_strategy == ShardingStrategy.OPTIM_GRADS_PARAMS:
        zero_dp_strategy = "optim_grads_params"
    elif zero_dp_strategy in ["no_shard", "optim", "optim_grads", "optim_grads_params"]:
        # Valid string sharding strategy.
        pass
    else:
        # Invalid sharding strategy.
        raise ValueError(
            f"Invalid FSDP / Inner DP Sharding Strategy: {zero_dp_strategy}\n"
            f"Valid Sharding Strategies: {ShardingStrategy.NO_SHARD}, "
            f"{ShardingStrategy.OPTIM}, {ShardingStrategy.OPTIM_GRADS}, "
            f"{ShardingStrategy.OPTIM_GRADS_PARAMS}, "
            "no_shard, optim, optim_grads, optim_grads_params"
        )
    if outer_dp_sharding_strategy == ShardingStrategy.NO_SHARD:
        outer_dp_sharding_strategy = "no_shard"
    elif outer_dp_sharding_strategy == ShardingStrategy.OPTIM:
        outer_dp_sharding_strategy = "optim"
    elif outer_dp_sharding_strategy in ["no_shard", "optim"]:
        # Valid string sharding strategy.
        pass
    else:
        # Invalid sharding strategy.
        raise ValueError(
            f"Invalid Hybrid DP-Outer Sharding Strategy: {outer_dp_sharding_strategy}\n"
            f"Valid Sharding Strategies: {ShardingStrategy.NO_SHARD}, "
            f"{ShardingStrategy.OPTIM}, no_shard, optim"
        )

    # Validate more arguments.
    _outer_fsdp_sharding = outer_dp_sharding_strategy == "optim"
    if _outer_fsdp_sharding and zero_dp_strategy != "optim_grads_params":
        # If sharding on outer DP using HSDP, then we must use HSDP buffers and
        # we must be fully-sharding on inner DP. HSDP is an extension of FSDP.
        # TODO(@shjwudp, @cspades): Requires various modifications to support.
        raise ValueError(
            f"Sharding with Hybrid (Fully) Sharded Data Parallel (HSDP) requires "
            "zero_dp_strategy to use FSDP ('optim_grads_params', 3), because "
            "outer sharding is dependent on inner sharding."
        )
    if (dp_outer_dim is None) ^ (hybrid_fsdp_group is None):
        # XOR - HSDP requires both or neither of dp_outer_dim and hybrid_fsdp_group
        # to be specified, so if XOR then raise an error.
        raise ValueError(
            f"dp_outer_dim={dp_outer_dim} and hybrid_fsdp_group={hybrid_fsdp_group} must be "
            "specified together for Hybrid FSDP (HSDP), or both set to None (for FSDP)."
        )
    if init_model_with_meta_device and zero_dp_strategy == "no_shard":
        raise ValueError(
            "Meta device initialization (init_model_with_meta_device=True) is not "
            "supported or necessary for the 'no_shard' / 0 sharding strategy."
        )

    # DDP Config for Megatron FSDP.
    ddp_config = DistributedDataParallelConfig(
        data_parallel_sharding_strategy=zero_dp_strategy,
        outer_dp_sharding_strategy=outer_dp_sharding_strategy,
        overlap_grad_reduce=overlap_grad_reduce,
        overlap_param_gather=overlap_param_gather,
        average_in_collective=average_in_collective,
        keep_fp8_transpose_cache=keep_fp8_transpose_cache,  # pylint: disable=C0301
        nccl_ub=nccl_ub,
        fsdp_double_buffer=fsdp_double_buffer or nccl_ub,
        fsdp_db_use_persist_buf_on_alloc_fail=fsdp_db_use_persist_buf_on_alloc_fail,
        disable_symmetric_registration=disable_symmetric_registration,
    )

    # Create FSDPDistributedIndex.
    dist_index = FSDPDistributedIndex(
        device_mesh=device_mesh,
        # Always required for Megatron-FSDP.
        dp_shard_dim=dp_shard_dim,
        # Only required for HSDP.
        dp_outer_dim=dp_outer_dim,
        # TODO(@cspades): TP sub-mesh should be optional if not using TP, but is
        # required for Megatron, TransformerEngine (default TP=1), and strided
        # sharding when using DTensor-based TP.
        tp_dim=tp_dim,
        # Only required for HSDP.
        hybrid_fsdp_group=hybrid_fsdp_group,
        # Only required for HSDP + EP.
        hybrid_fsdp_expt_group=hybrid_fsdp_expt_group,
        # Access to flattened DP rank assignments for HSDP.
        hsdp_outer_dp_shard=_outer_fsdp_sharding,
        # Only required for Megatron-FSDP + EP.
        expt_device_mesh=expt_device_mesh,
    )

    # Wrap model in Megatron FSDP.
    model = MegatronFSDP(
        module=module,
        dist_index=dist_index,
        ddp_config=ddp_config,
        mixed_precision_policy=mixed_precision_policy,
        fsdp_unit_modules=fsdp_unit_modules,
        disable_bucketing=disable_bucketing,
        device=device,
        calculate_per_token_loss=calculate_per_token_loss,
        init_model_with_meta_device=init_model_with_meta_device,
        sync_model_each_microbatch=sync_model_each_microbatch,
        enable_fine_grained_param_gather_hook=enable_fine_grained_param_gather,
        report_nan_in_param_grad=report_nan_in_param_grad,
    )

    # Register a state dict post-hook to add Torch DCP metadata for writing checkpoints.
    if preproc_state_dict_for_dcp_ckpt and zero_dp_strategy != "no_shard":

        def remove_te_extra_state(state_dict):
            # Megatron-FSDP does not support FP8 extra state checkpointing in TE.
            extra_state_keys = [k for k in state_dict.keys() if k.endswith("_extra_state")]
            for key in extra_state_keys:
                state_dict.pop(key)

        def preprocess_dcp_and_te_extra_state(state_dict):
            # Preprocess the state dict for uneven DTensor checkpointing.
            remove_te_extra_state(state_dict)
            return preprocess_state_dict_for_uneven_dtensor(state_dict)

        model._register_state_dict_hook(
            lambda module, state_dict, prefix, local_metadata: preprocess_dcp_and_te_extra_state(
                state_dict
            )
        )

    # Return the wrapped Megatron-FSDP model.
    return model


@experimental_api
def fully_shard_optimizer(
    optimizer: torch.optim.Optimizer, preproc_state_dict_for_dcp_ckpt: bool = True
) -> torch.optim.Optimizer:
    """
    Fully shard the optimizer for Megatron-FSDP. This is an in-place operation on the optimizer
    instance, which modifies the optimizer to call methods exposed by the MegatronFSDP model API.

    The optimizer should be registered on the MegatronFSDP distributed model parameters:
    ```
        # Fully-shard the model.
        mfsdp_model = fully_shard_model(model, ...)

        # Register the fully-sharded parameters with the optimizer.
        # Use MegatronFSDP._replace_param_with_distributed_if_needed()
        # to swap to the distributed optimizer state parameters.
        optimizer = fully_shard_optimizer(Adam(params=mfsdp_model.parameters()))
    ```

    Args:
        optimizer (torch.optim.Optimizer):
            (Distributed) optimizer for training the model, which is extended to automatically
            execute necessary Megatron-FSDP operations during the training loop.

        preproc_state_dict_for_dcp_ckpt (bool):
            Whether to preprocess the state dict for DCP checkpointing. Defaults to True.

    Returns:
        optimizer (torch.optim.Optimizer): The in-place modified optimizer for Megatron-FSDP.
    """
    # Extract a reference to MegatronFSDP from the first registered Parameter.
    if not optimizer.param_groups:
        raise ValueError(
            f"[MegatronFSDP fully_shard_optimizer()] Provided optimizer doesn't "
            f"have any registered parameters: {optimizer}"
        )
    first_mfsdp_param = optimizer.param_groups[0][next(iter(optimizer.param_groups[0]))][0]
    if not getattr(first_mfsdp_param, "_megatron_fsdp_model", None):
        raise ValueError(
            f"[MegatronFSDP fully_shard_optimizer()] Could not retrieve a reference to "
            f"MegatronFSDP from the first registered Parameter: {first_mfsdp_param} \n"
            "Make sure the optimizer is registered to the MegatronFSDP distributed "
            "parameters via MegatronFSDP._replace_param_with_distributed_if_needed() "
            "before initializing the optimizer on the MegatronFSDP model. "
        )
    mfsdp_model = first_mfsdp_param._megatron_fsdp_model

    # Save a reference to the optimizer.step() and optimizer.zero_grad() methods.
    optimizer_step_base_func = type(optimizer).step
    optimizer_zero_grad_base_func = type(optimizer).zero_grad

    # Pre-initialize the optimizer state for checkpoint loading via DCP.
    for group in optimizer.param_groups:
        for param in group["params"]:
            if param.numel() == 0 or (
                hasattr(param, "_local_tensor") and param._local_tensor.numel() == 0
            ):
                # Avoid FusedAdam errors on empty tensor input.
                continue
            # Optimizer state is built from wgrad.
            param.grad = torch.zeros_like(param)
    # Non-lazy optimizer state initialization.
    optimizer.step()
    optimizer.zero_grad()

    # Define a new optimizer.step() method that distributes optimizer state and gradients,
    # waits for asynchronous gradient reduce-scatter work to be completed, and updates
    # model weights. These options can be turned off via arguments in optimizer.step().
    def megatron_fsdp_optimizer_step(optimizer, *args, **kwargs):
        # Extract extended kwargs.
        sync_grad_before_optimizer_step = kwargs.pop("sync_grad_before_optimizer_step", True)
        install_optimized_model_weights = kwargs.pop("install_optimized_model_weights", True)

        # Synchronize reduce-scatter and all-gather operations for all model gradients
        # and parameters, attach gradients to the optimizer state, and replace the raw
        # module parameters with Megatron-FSDP-managed optimizer parameters & states in
        # preparation for (distributed) optimization.
        # NOTE: Only necessary if MegatronFSDP.model_auto_sync = False, in which case
        # gradient synchronization is not automatically handled by MegatronFSDP during
        # the post-backward hook and we need to synchronize manually.
        if sync_grad_before_optimizer_step and not mfsdp_model.model_auto_sync:
            mfsdp_model.finish_grad_sync()

        # Execute the base optimizer.step() on the model optimizer named parameters.
        optimizer_step_base_func(optimizer, *args, **kwargs)

        # Update the raw module training parameters with optimized values.
        if install_optimized_model_weights:
            mfsdp_model.install_optimized_model_weights()

    # Define a new optimizer.zero_grad() method that zeros the gradient in both
    # the optimizer as well as the Megatron-FSDP gradient buffer. These options
    # can be turned off via arguments in optimizer.zero_grad().
    def megatron_fsdp_optimizer_zero_grad(optimizer, *args, **kwargs):
        # Extract extended kwargs.
        zero_grad_buffer = kwargs.pop("zero_grad_buffer", True)

        # Execute the base optimizer.zero_grad() on the model optimizer named parameters.
        optimizer_zero_grad_base_func(optimizer, *args, **kwargs)

        # Zero out the gradient in the Megatron-FSDP gradient buffer.
        if zero_grad_buffer:
            mfsdp_model.zero_grad_buffer()

    # Override the optimizer.step() and optimizer.zero_grad() methods to support
    # Megatron-FSDP operations.
    megatron_fsdp_optimizer_step.__signature__ = create_updated_function_signature(
        optimizer_step_base_func,
        sync_grad_before_optimizer_step=True,
        install_optimized_model_weights=True,
    )
    optimizer.step = types.MethodType(megatron_fsdp_optimizer_step, optimizer)
    megatron_fsdp_optimizer_zero_grad.__signature__ = create_updated_function_signature(
        optimizer_zero_grad_base_func, zero_grad_buffer=True
    )
    optimizer.zero_grad = types.MethodType(megatron_fsdp_optimizer_zero_grad, optimizer)

    if preproc_state_dict_for_dcp_ckpt:

        def dict_nested_shallow_copy(d: dict):
            """Create a nested shallow copy of a dict. Same values, different pointers."""
            if not isinstance(d, dict):
                return d
            return {
                k: dict_nested_shallow_copy(v) if isinstance(v, dict) else v for k, v in d.items()
            }

        def preprocess_optimizer_state_dict_for_uneven_dtensor(optimizer, state_dict):
            """
            Hook that mocks the global optimizer state for unevenly-distributed
            DTensors, as the optimizer state is only initialized for non-empty
            parameters, and preprocesses the optimizer `state_dict` DTensors
            in-place for Torch DCP.
            """
            # Retrieve a template optimizer state.
            optim_state_template = next(iter(optimizer.state.values())) if optimizer.state else {}
            # All-gather the optimizer state keys as this rank could have empty state.
            optim_state_dtensor_keys = [None] * torch.distributed.get_world_size()
            torch.distributed.all_gather_object(
                optim_state_dtensor_keys,
                [
                    # Only track keys associated with DTensors for DCP.
                    key
                    for key, val in optim_state_template.items()
                    if isinstance(val, DTensor)
                ],
            )
            optim_state_dtensor_keys = list(
                set([key for state in optim_state_dtensor_keys for key in state])
            )

            # NOTE(@cspades): Re-construct the Megatron-FSDP distributed parameter
            # to index mapping as implemented in torch.optim.Optimizer.state_dict():
            # https://github.com/pytorch/pytorch/blob/main/torch/optim/optimizer.py
            # Simply put, the index maps to the very first appearance of id(param)
            # looping through all parameters in all groups with memory address
            # equivalent to the distributed parameter managed by Megatron-FSDP.
            param_state_idx = {}
            idx = 0
            # For all empty parameters, mock empty DTensors for all empty parameters
            # of Megatron-FSDP's unevenly-distributed optimizer state into a shallow
            # copy of the state dictionary to synchronize and pre-process a global
            # variant of the optimizer state in preparation for Torch DCP. This allows
            # us to sync the non-empty DTensor shard metadata across sharding groups
            # while excluding empty DTensor shards from the optimizer checkpoint.
            optim_state_extended = dict_nested_shallow_copy(state_dict)
            for param_group in optimizer.param_groups:
                for param in param_group["params"]:
                    # Update the parameter state index.
                    # For shared params, use same index.
                    if id(param) not in param_state_idx:
                        # New parameter, assign an index.
                        param_state_idx[id(param)] = idx
                        idx += 1
                    if param in optimizer.state or not isinstance(param, DTensor):
                        # Only mock optimizer state for parameters that are missing state.
                        # No need to mock for non-DTensor params. Not relevant to DCP.
                        continue
                    for key in optim_state_dtensor_keys:
                        # Construct a mock DTensor state for the empty DTensor parameter.
                        param_idx = param_state_idx[id(param)]
                        optim_state_extended["state"].setdefault(param_idx, {})[key] = (
                            DTensor.from_local(
                                local_tensor=torch.empty(0, dtype=param.dtype, device=param.device),
                                device_mesh=param.device_mesh,
                                placements=param.placements,
                                shape=param.shape,
                                stride=param.stride(),
                            )
                        )

            # Synchronize and preprocess DTensor metadata for Torch DCP.
            preprocess_state_dict_for_uneven_dtensor(optim_state_extended)

        # Attach the optimizer state_dict() post-hook to prepare DTensors for Torch DCP.
        # args = (optimizer, state_dict)
        optimizer.register_state_dict_post_hook(
            lambda *args, **kwargs: preprocess_optimizer_state_dict_for_uneven_dtensor(
                args[0], args[1]
            )
        )

    # Return the in-place modified optimizer.
    return optimizer


@experimental_api
def fully_shard(
    module: torch.nn.Module,
    optimizer: torch.optim.Optimizer,
    device_mesh: Optional[DeviceMesh] = None,
    dp_shard_dim: Optional[str] = None,
    dp_outer_dim: Optional[str] = None,
    tp_dim: Optional[str] = None,
    hybrid_fsdp_group: Optional[torch.distributed.ProcessGroup] = None,
    hybrid_fsdp_expt_group: Optional[torch.distributed.ProcessGroup] = None,
    expt_device_mesh: Optional[DeviceMesh] = None,
    fsdp_unit_modules: Optional[Sequence[Type[torch.nn.Module]] | Sequence[str]] = None,
    zero_dp_strategy: str | int = 3,
    outer_dp_sharding_strategy: str | int = 0,
    device: Optional[torch.device] = None,
    init_model_with_meta_device: bool = False,
    mixed_precision_policy: MixedPrecisionPolicy = MixedPrecisionPolicy(),
    overlap_grad_reduce: bool = True,
    overlap_param_gather: bool = True,
    sync_model_each_microbatch: bool = True,
    preproc_state_dict_for_dcp_ckpt: bool = True,
    report_nan_in_param_grad: bool = False,
    average_in_collective: bool = False,
    disable_bucketing: bool = False,
    calculate_per_token_loss: bool = False,
    keep_fp8_transpose_cache: bool = False,
    nccl_ub: bool = False,
    fsdp_double_buffer: bool = False,
    fsdp_db_use_persist_buf_on_alloc_fail: bool = False,
    disable_symmetric_registration: bool = False,
    enable_fine_grained_param_gather: bool = False,
) -> tuple[MegatronFSDP, torch.optim.Optimizer]:
    """
    Fully shard the model and the optimizer for Megatron-FSDP.

    Wraps the model as an Megatron-FSDP module, and modifies the optimizer to
    be compatible with the Megatron-FSDP training strategy.

    Args:
        Union of arguments from fully_shard_model and fully_shard_optimizer.

    Returns:
        torch.nn.Module: The wrapped Megatron-FSDP model configured for distributed training.
        torch.optim.Optimizer: The Megatron-FSDP-compliant optimizer for training the model.

    Note:
        This implementation uses NVIDIA's FSDP which includes optimizations specific
        to NVIDIA hardware and software stack.
    """

    model = fully_shard_model(
        module=module,
        device_mesh=device_mesh,
        dp_shard_dim=dp_shard_dim,
        dp_outer_dim=dp_outer_dim,
        tp_dim=tp_dim,
        hybrid_fsdp_group=hybrid_fsdp_group,
        hybrid_fsdp_expt_group=hybrid_fsdp_expt_group,
        expt_device_mesh=expt_device_mesh,
        fsdp_unit_modules=fsdp_unit_modules,
        zero_dp_strategy=zero_dp_strategy,
        outer_dp_sharding_strategy=outer_dp_sharding_strategy,
        device=device,
        init_model_with_meta_device=init_model_with_meta_device,
        mixed_precision_policy=mixed_precision_policy,
        overlap_grad_reduce=overlap_grad_reduce,
        overlap_param_gather=overlap_param_gather,
        sync_model_each_microbatch=sync_model_each_microbatch,
        preproc_state_dict_for_dcp_ckpt=preproc_state_dict_for_dcp_ckpt,
        report_nan_in_param_grad=report_nan_in_param_grad,
        average_in_collective=average_in_collective,
        disable_bucketing=disable_bucketing,
        calculate_per_token_loss=calculate_per_token_loss,
        keep_fp8_transpose_cache=keep_fp8_transpose_cache,
        nccl_ub=nccl_ub,
        fsdp_double_buffer=fsdp_double_buffer,
        fsdp_db_use_persist_buf_on_alloc_fail=fsdp_db_use_persist_buf_on_alloc_fail,
        disable_symmetric_registration=disable_symmetric_registration,
        enable_fine_grained_param_gather=enable_fine_grained_param_gather,
    )

    # Extend optimizer methods to support Megatron-FSDP operations.
    # Replace the optimizer module parameter references with
    # Megatron-FSDP-managed distributed parameters.
    model._replace_param_with_distributed_if_needed()
    optimizer.param_groups.clear()
    optimizer.state.clear()
    optimizer.add_param_group({"params": model.parameters()})
    fully_shard_optimizer(
        optimizer, preproc_state_dict_for_dcp_ckpt=preproc_state_dict_for_dcp_ckpt
    )

    # Return model and optimizer.
    return model, optimizer


================================================
FILE: megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.import functools

import functools
import importlib
import logging
from contextlib import contextmanager
from enum import Enum, auto
from typing import Any, Dict, List, Optional, Tuple

import torch
import torch.nn as nn
from torch.utils._pytree import tree_flatten, tree_map, tree_unflatten

from .mixed_precision import (
    MixedPrecisionPolicy,
    fp8_create_transpose_cache,
    fp8_discard_transpose_cache,
    is_float8tensor,
)
from .param_and_grad_buffer import (
    AllGatherPipeline,
    BucketingPolicy,
    GradReducePipeline,
    ParamAndGradBuffer,
    PrefetchOrder,
    _check_nan_in_grad,
    override_sharded_param_methods_with_safety_checks,
    to_local_if_dtensor,
)
from .utils import FSDPDistributedIndex, log_single_rank

logger = logging.getLogger(__name__)


try:
    # Default to Megatron-LM FW.
    logger.info("Detected Megatron Core, using Megatron-FSDP with Megatron.")
    from megatron.core.distributed.distributed_data_parallel_config import (
        DistributedDataParallelConfig,
    )
    from megatron.core.utils import is_submodule
except ImportError:
    # Megatron-LM is not installed, use Megatron-FSDP as a standalone module.
    logger.info("Megatron Core is not installed, Megatron-FSDP will run without Megatron Core.")
    from .distributed_data_parallel_config import DistributedDataParallelConfig
    from .utils import is_submodule


class TrainingState(Enum):
    """States of a FSDP parameter group, which are coupled with
    the sharding activity of parameters and gradients during training."""

    # From pre-forward before post-forward, where parameters should be unsharded
    FORWARD = auto()
    # Prior to backward computation, where parameters should be unsharded
    PRE_BACKWARD = auto()
    # After backward computation, where gradients should be re-sharded
    POST_BACKWARD = auto()
    # Before and after module forward computaton or before pre-backward and
    # after post-backward states, where no un/sharding activity happens
    IDLE = auto()


class MegatronFSDP(torch.nn.Module):
    """Fully Sharded Data Parallel training.

    A distributed training wrapper that shards model parameters, gradients and optimizer
    states across data parallel workers. Integrates seamlessly with MCore's tensor
    and expert parallelism features, and in native PyTorch.

    We supports following modes:
    - no_shard: Traditional data parallel training without parameter sharding.
    - optim: Shards optimizer states, this is conceptually close to "ZeRO-1", and
        main weights for mixed precision training, meanwhile the following `optim_grads`
        and `optim_grads_params` will also sharding main weights
        during mixed-precision training, omitted without detailed notation.
    - optim_grads: Shards gradients and optimizer states, this is conceptually close to "ZeRO-2".
    - optim_grads_params: Shards parameters, gradients and optimizer states, this
        is conceptually close to "ZeRO-3".

    Key Features:
    - Compatible with MCore's tensor, context and expert parallelism
    - Compatible with Native PyTorch's tensor and context parallelism with DTensor
    - Automatic mixed precision training (BF16/FP8)
    - Gradient accumulation and bucketing
    - Optimized activation recompute with shard-aware communication: When recomputing
        a whole Transformer layer, gather parameters once for both the recomputation
        and backward computation
    - Compatible with MCore's distributed checkpointing, and native PyTorch.

    Args:
        module (torch.nn.Module): Underlying Torch Module.
        dist_index (FSDPDistributedIndex): FSDPDistributedIndex object containing references to the
            process groups and device meshes used by Megatron-FSDP.
        ddp_config (DistributedDataParallelConfig): FullyShardedDataParallel configuration
            dataclass containing a variety of Megatron-derived parameters that control the
            behavior of Megatron-FSDP.
        mixed_precision_policy (megatron_fsdp.MixedPrecisionPolicy): Configuration for
            mixed-precision customization of compute and communications in Megatron-FSDP.
        fsdp_unit_modules (List[torch.nn.Module] | List[str]): List of modules that
            should be treated as an FSDP Unit, i.e. the minimum releasable model unit.
            It affects the granularity of the communication parameter grouping and
            triggers aggregate collective communication in FP8 mixed precision training.
        device (torch.device): Target device for the sharded model. Used to migrate
            all model parameters to an expected device. If init_model_with_meta_device=True,
            this argument is ignored.
        init_model_with_meta_device (bool): Whether to initialize model parameters
            in shards across all devices of the fsdp_group. Utilized to initialize
            large models that do not fit on a single device.
        sync_model_each_microbatch (bool): Whether to sync parameters and install gradients on
            each training step. When disabled, Megatron-FSDP will overlap reduce-scatter with
            subsequent compute and delay HSDP gather and reduce operations per optimization cycle,
            which improves performance and throughput when using delayed optimization strategies
            such as gradient accumulation. Defaults to True, can be modified before the model
            forward / backward pass via MegatronFSDP.set_model_auto_sync(bool) or controlled
            with the (no_)sync context managers or microbatch_count and is_last_microbatch.
        disable_bucketing: If true, force assign all parameters to a single bucket. If false,
            use standard bucketing policy: assign parameters to smaller buckets and all-reduce
            per bucket.
        keep_fp8_transpose_cache (bool): Whether to keep
            the fp8 transpose cache when using Megatron-FSDP. It will use significantly
            more GPU memory but can improve performance.
        nccl_ub (bool): Whether to use NCCL userbuffer for the FSDP communication
            operands, which uses less number of SMs, resulting better overlapped
            computation performance.
            This flag automatically sets fsdp_double_buffer to True, which uses
            additional GPU memory.
        fsdp_double_buffer (bool): Whether to use persistently allocated double buffers
            for the temporary memory needed in the FSDP communication. This flag is
            automatically set to True when nccl_ub is True.
        fsdp_db_use_persist_buf_on_alloc_fail (bool): Whether to fall back to persistent buffer
            allocator when a bucket does not fit FSDP double buffer size.
        disable_symmetric_registration (bool): Whether to disable symmetric (window) registration
            for NCCL userbuffer registration. This option will force to use conventional (local)
            userbuffer registration when nccl_ub is set.
        enable_fine_grained_param_gather (bool): Whether to enable "fine-grained" param all-gather,
            which can improve performance when using MXFP8 parameters with activation recomputation.
        report_nan_in_param_grad (bool): Whether to enable precise NaN-checking for parameter wgrad.
            Can significantly degrade performance. Defaults to False.

    Examples:
        >>> model = GPTModel(config)
        >>> model = MegatronFSDP(
        ...     model,
        ...     dist_index,
        ...     ddp_config,
        ...     fsdp_unit_modules = [TransformerLayer, LanguageModelEmbedding],
        ...     device=torch.device(f"cuda:{torch.cuda.current_device()}"),
        ...     init_model_with_meta_device=False,
        ...     disable_bucketing=False,
        ...     keep_fp8_transpose_cache=False,
        ...     nccl_ub=False,
        ...     fsdp_double_buffer=False,
        ...     fsdp_db_use_persist_buf_on_alloc_fail=False,
        ...     disable_symmetric_registration=False,
        ... )
    """

    def __init__(
        self,
        module: torch.nn.Module,
        dist_index: FSDPDistributedIndex,
        ddp_config: DistributedDataParallelConfig = None,
        mixed_precision_policy: MixedPrecisionPolicy = MixedPrecisionPolicy(),
        fsdp_unit_modules: Optional[List[torch.nn.Module] | List[str]] = None,
        disable_bucketing: bool = False,
        device: Optional[torch.device] = None,
        calculate_per_token_loss: bool = False,
        init_model_with_meta_device: bool = False,
        sync_model_each_microbatch: bool = False,
        keep_fp8_transpose_cache: bool = False,
        nccl_ub: bool = False,
        fsdp_double_buffer: bool = False,
        fsdp_db_use_persist_buf_on_alloc_fail: bool = False,
        disable_symmetric_registration: bool = False,
        enable_fine_grained_param_gather_hook: bool = False,
        report_nan_in_param_grad: bool = False,
    ):
        super().__init__()
        # If device is not specified, use the current device.
        self.device = (
            device if device is not None else torch.device(f"cuda:{torch.cuda.current_device()}")
        )
        if self.device != torch.device(f"cuda:{torch.cuda.current_device()}"):
            logger.warning(
                f"[Rank {torch.distributed.get_rank()}] Megatron-FSDP is "
                f"using device {self.device} instead of the current device "
                f"{torch.device(f'cuda:{torch.cuda.current_device()}')}, "
                "which may cause process-to-device mapping issues or "
                "cross-device Tensor operation errors. If necessary, "
                "send all Tensors in the module to the Megatron-FSDP "
                f"device ({self.device}) during initialization or to "
                "the device used by corresponding Tensors during "
                "operations of the module forward pass."
            )
        # Only map the module to the device if the original device argument is not None,
        # otherwise Megatron-FSDP will proceed with the existing module and send the model
        # weights to the current device via copy during initialization.
        self.module = (
            # Send module to user-specified device.
            module.to(self.device)
            if device is not None and not init_model_with_meta_device
            else module
        )

        # if ddp_config is not provided, use the default config
        # "optim_grads_params" is the default strategy
        if ddp_config is None:
            self.ddp_config = DistributedDataParallelConfig(
                data_parallel_sharding_strategy="optim_grads_params",
                outer_dp_sharding_strategy="no_shard",
                overlap_grad_reduce=True,
                overlap_param_gather=True,
                average_in_collective=False,
                keep_fp8_transpose_cache=keep_fp8_transpose_cache,  # pylint: disable=C0301
                nccl_ub=nccl_ub,
                fsdp_double_buffer=fsdp_double_buffer or nccl_ub,
                fsdp_db_use_persist_buf_on_alloc_fail=fsdp_db_use_persist_buf_on_alloc_fail,
                disable_symmetric_registration=disable_symmetric_registration,
                check_for_nan_in_grad=False,
            )
        else:
            self.ddp_config = ddp_config
        self.data_parallel_sharding_strategy = self.ddp_config.data_parallel_sharding_strategy
        self.mp_policy = mixed_precision_policy
        self.calculate_per_token_loss = calculate_per_token_loss
        self.init_model_with_meta_device = init_model_with_meta_device
        self.enable_fine_grained_param_gather_hook = enable_fine_grained_param_gather_hook
        self.report_nan_in_param_grad = report_nan_in_param_grad

        # FSDPDistributedIndex stores the process groups and meshes used by Megatron-FSDP.
        # If not provided, Megatron-FSDP will default to a simple data parallel index
        # supported by torch.distributed.group.WORLD.
        self.dist_index = dist_index

        # Whether to constantly synchronize the model every training iteration,
        # which defaults to False to overlap communication with computation
        # across training steps for performance. When enabled, the next training
        # step of the model will reduce all gradients and gather all parameters
        # for synchronized operations such as distributed optimization and
        # distributed checkpointing particularly sharding with HSDP / DP-Outer.
        self.set_model_auto_sync(sync_model_each_microbatch)

        # Check if the module contains (Megatron-Core) expert parallel parameters or DTensors.
        has_expert_parameters = self._check_module_parameter_types()

        # If Megatron Expert Parallelism is enabled, you need to provide an expt_dp_group.
        if (
            has_expert_parameters
            and self.dist_index.get_fsdp_group(is_expert_parallel=True) is None
        ):
            raise ValueError(
                "[Megatron-FSDP] Megatron Expert Parallelism is enabled, but no expt_dp_group is"
                "provided."
            )

        self.bucket_size = self.ddp_config.bucket_size
        if disable_bucketing:
            self.bucket_size = None

        # Parse FSDP unit modules. If given a list of strings, import the classes.
        self.fsdp_unit_modules = (
            [
                (self._import_class_from_path(cls_path) if isinstance(cls_path, str) else cls_path)
                for cls_path in fsdp_unit_modules
            ]
            if fsdp_unit_modules is not None
            else []
        )

        # Determine if we should delay the gradient reduction.
        self.is_delay_grad_reduce = self.data_parallel_sharding_strategy in ["no_shard", "optim"]
        if self.data_parallel_sharding_strategy == "optim_grads_params":
            # Default to overlapped parameter gather when fully-sharding.
            self.ddp_config.overlap_param_gather = True
        if self.ddp_config.data_parallel_sharding_strategy in ["optim_grads_params", "optim_grads"]:
            # Default to overlapped gradient reduce-scatter when sharding gradients.
            self.ddp_config.overlap_grad_reduce = True
        if not self.is_delay_grad_reduce:
            # Gradient reduce-scatter must be overlapped when using sharding optimizer
            # and gradients.
            assert self.ddp_config.overlap_grad_reduce

        for param in self.module.parameters():
            if not hasattr(param, "grad_added_to_main_grad"):
                # This is to ensure that the param.grad_added_to_main_grad is set to False
                # when the parameter is created.
                param.grad_added_to_main_grad = False
            if not hasattr(param, "__fsdp_param__"):
                # This is to ensure that the param.__fsdp_param__ is set to True
                # when the parameter is created.
                param.__fsdp_param__ = True

        self._init_fsdp_param_and_grad_buffer()
        self._register_fsdp_hooks(self.module)
        self.microbatch_count = 0

        # Add a reference from the distributed parameters to self for API
        # accessibility, e.g. when attaching MegatronFSDP scheduled ops
        # to the distributed optimizer.step() and optimizer.zero_grad().
        self.is_param_fsdp_distributed = False
        self._replace_param_with_distributed_if_needed()
        for param in self.module.parameters():
            # Attach MegatronFSDP reference to the parameter.
            setattr(param, "_megatron_fsdp_model", self)

    def _check_module_parameter_types(self):
        """
        Check if the module parameters include special parameters
        such as Megatron-Core Expert Parallel (EP/EXPT) parameters.
        """
        expert_params = False
        for _, param in self.module.named_parameters():
            if not getattr(param, "allreduce", True):
                expert_params = True
            if expert_params:
                # Detected. No need to check further.
                break
        return expert_params

    def _init_fsdp_param_and_grad_buffer(self):
        if self.calculate_per_token_loss:
            # We don't need to scale the gradients in this case.
            gradient_scaling_factor = None
            expert_gradient_scaling_factor = None
        else:
            if self.ddp_config.average_in_collective:
                gradient_scaling_factor = 1.0
                expert_gradient_scaling_factor = (
                    self.dist_index.get_dp_group(is_expert_parallel=True).size()
                    / self.dist_index.get_dp_group().size()
                )
            else:
                data_parallel_world_size = self.dist_index.get_dp_group().size()
                gradient_scaling_factor = 1.0 / data_parallel_world_size
                expert_gradient_scaling_factor = 1.0 / data_parallel_world_size

        # Initialize the param and grad buffer.
        self.param_and_grad_buffer = ParamAndGradBuffer(
            self.ddp_config,
            self.module,
            bucketing_policy=BucketingPolicy(
                suggested_bucket_size=self.bucket_size,
                fsdp_unit_modules=self.fsdp_unit_modules,
                data_parallel_sharding_strategy=self.data_parallel_sharding_strategy,
            ),
            dist_index=self.dist_index,
            mixed_precision_policy=self.mp_policy,
            gradient_scaling_factor=gradient_scaling_factor,
            expert_gradient_scaling_factor=expert_gradient_scaling_factor,
            device=self.device,
            reset_parameters_for_meta_device_init_module=self.init_model_with_meta_device,
        )
        self.param_to_name = {p: name for name, p in self.module.named_parameters()}
        self.raw_param = dict(self.module.named_parameters())

        # Initialize a gradient buffer and accumulation stream for the GradReducePipeline.
        self.side_stream_for_buffer_copy_and_grad_accum = torch.cuda.Stream()
        self.side_stream_for_param_gather = torch.cuda.Stream()

        # Initialize the reduce-scatter pipeline.
        self.grad_reduce_pipeline = GradReducePipeline(
            self.param_and_grad_buffer, rs_stream=self.side_stream_for_buffer_copy_and_grad_accum
        )

        # Initialize the all-gather pipeline.
        self.all_gather_pipeline = AllGatherPipeline(
            self.param_and_grad_buffer, ag_stream=self.side_stream_for_param_gather
        )

        # Set the suggested communication unit size for reduce-scatter and all-gather pipelines.
        suggested_communication_unit_size = self.ddp_config.suggested_communication_unit_size
        if suggested_communication_unit_size is None:
            if self.data_parallel_sharding_strategy == "optim_grads_params":
                total_param_elements = 0
                total_fsdp_module = 0
                for module in self.module.modules():
                    if isinstance(module, tuple(self.fsdp_unit_modules)):
                        total_fsdp_module += 1
                        total_param_elements += sum(p.numel() for p in module.parameters())
                # The suggested size is twice the number of elements in the FSDP modules.
                # This ensures we process the current FSDP module and attempt to prefetch
                # the next FSDP module, making the flow of communication better.
                suggested_communication_unit_size = total_param_elements // total_fsdp_module * 2
            elif self.bucket_size is not None:
                suggested_communication_unit_size = self.bucket_size
            else:
                suggested_communication_unit_size = 1_000_000_000

            # Cap to 1B elements.
            suggested_communication_unit_size = max(
                1_000_000_000, suggested_communication_unit_size
            )

        self.suggested_RS_queue_capacity = suggested_communication_unit_size
        self.suggested_AG_prefetch_size = suggested_communication_unit_size // 2

        if self.data_parallel_sharding_strategy == "optim_grads_params":
            override_sharded_param_methods_with_safety_checks(
                self.module.parameters(), self.all_gather_pipeline
            )

    def _import_class_from_path(self, class_path: str):
        """Helper function to import classes from string paths."""
        module_path, class_name = class_path.rsplit(".", 1)
        module = importlib.import_module(module_path)
        cls = getattr(module, class_name)
        return cls

    def all_gather_and_wait_parameters_ready(
        self,
        params,
        prefetch=True,
        prefetch_order=PrefetchOrder.FORWARD_PASS_ORDER,
        wait_bucket_ready=True,
        bwd=False,
    ):
        """
        All-gather parameters across the data parallel group and wait for
        the all-gather operation to complete.
        """
        if self.data_parallel_sharding_strategy == "no_shard":
            return

        ag_pipeline = self.all_gather_pipeline
        # Only all-gather HSDP buffer parameters in the beginning of a new optimization
        # step cycle, or on every step if model_auto_sync is enabled, i.e. update
        # the model training weights to reflect the reduced gradient descent step.
        ag_pipeline.all_gather_params(
            params=params,
            prefetch=prefetch,
            prefetch_order=prefetch_order,
            suggested_AG_prefetch_size=self.suggested_AG_prefetch_size,
            outer_fsdp_group_param_gather=(
                # All-gather the (DP-Outer, DP-Shard) weight shards from the DP-backed
                # main weight buffer into the (DP-Shard)-backed hybrid weight buffer.
                # This is performed at the beginning of a new optimization step cycle,
                # and only necessary when at least the optimizer state is sharded.
                self.dist_index.use_hybrid_fsdp
                and self.ddp_config.outer_dp_sharding_strategy != "no_shard"
                and (self.microbatch_count == 0 or self.model_auto_sync)
            ),
            bwd=bwd,
        )
        if wait_bucket_ready:
            for param in params:
                bucket_id = self.param_and_grad_buffer.param_to_param_group[param]
                ag_pipeline.wait_bucket_ready(bucket_id, bwd)
                if bwd and is_float8tensor(param):
                    fp8_create_transpose_cache(param)

        for param in params:
            # This setting is needed to make FSDP store the weight object when used
            # with TE's activation offloading for the first global batch.
            param.grad_added_to_main_grad = False
            # This setting is needed to have this attribute present after every
            # un-shard of the FSDP params.
            param.__fsdp_param__ = True
            # Transformer Engine accumulates gradient on top of the `main_grad`
            # buffer when gradient accumulation fusion in enabled. But with FSDP,
            # we want to overwrite the `main_grad` which is enabled by this
            # attribute.
            param.overwrite_main_grad = True

    def _register_fsdp_hooks(self, root_module):
        """Register necessary hooks for Fully Sharded Data Parallel (FSDP) execution on the model.

        This function sets up various hooks required for FSDP operations, including parameter
        resharding/unsharding and gradient handling. The registered hooks are:
            - Pre-forward hook: Unshards parameters before forward pass
            - Post-forward hook: Reshards parameters after forward pass
            - Pre-backward hook: Unshards parameters before backward pass
            - Post-backward hook: Reshards parameters and reduces gradients after backward pass

        Args:
            root_module: The PyTorch module to register FSDP hooks on

        Note:
            These hooks are essential for FSDP's memory efficiency as they manage:
            1. Dynamic parameter sharding/unsharding to reduce memory footprint
            2. Proper gradient synchronization across distributed processes
            3. Gradient accumulation for large batch training

        Returns:
            None
        """

        # Initialize module training state.
        for m in root_module.modules():
            setattr(m, "_training_state", TrainingState.IDLE)

        self.forward_pre_hooks = {}
        self.forward_hooks = {}
        self.backward_pre_hooks = {}
        self.grad_acc_hooks = {}

        """
        An FSDP unit is a module designed to manage the lifecycle of model parameters
        in Fully Sharded Data Parallel (FSDP) training. It ensures that parameters
        are only used within the module and are released immediately after
        the forward and backward computations are completed.
        This approach is crucial for efficient memory management, as releasing
        parameters too early can lead to issues if other computations depend on them.

        `optim` and `optim_grads` do not require FSDP units because they do not
        shard model parameters.
        """
        fsdp_unit_modules = self.fsdp_unit_modules

        def release_module_parameters(module, bwd, lazy=False, *unused):
            """
            Release the parameters of a given module after completing the forward
            and backward passes.

            Args:
                module: The module whose parameters should be released.
                bwd (bool): Indicates if the release is triggered during the backward pass.
                lazy (bool, optional): Determines when the parameter buffer (bucket) is released.
                    - If False, the buffer is released immediately.
                    - If True, the release is deferred until just before the all-gather pipeline
                    requests a new buffer. The delayed release is performed by invoking
                    `recycle_unused_buckets`.
                *unused: Placeholder for any unused arguments.

            Notes:
                - The function maps each parameter to its corresponding buffer group,
                then releases the associated bucket through the all-gather pipeline.
                - If `ddp_config.keep_fp8_transpose_cache` is False, it also clears
                the FP8 transpose cache associated with the module’s parameters.
            """
            for param in module.parameters():
                bucket_id = self.param_and_grad_buffer.param_to_param_group[param]
                self.all_gather_pipeline.release_bucket(bucket_id, bwd, lazy=lazy)

            if not self.ddp_config.keep_fp8_transpose_cache:
                release_params_fp8_transpose_cache(module.parameters())

        def release_params_fp8_transpose_cache(params):
            for param in params:
                if is_float8tensor(param):
                    fp8_discard_transpose_cache(param)

        def _grad_acc(param):
            """
            Accumulate the gradient in the main_grad buffer.

            Utilizes the patched main_grad property of the parameter to allocate
            or fetch the main gradient bucket for the parameter.
            """
            group_id = self.param_and_grad_buffer.param_to_param_group[param]
            group = self.param_and_grad_buffer.parameter_groups[group_id]
            if not group.requires_grad:
                return

            # Sharded Gradient Buffer
            gbuf = group.hsdp_gbuf if group.hsdp_gbuf else group.main_grad_buffer
            if gbuf.is_data_distributed:
                if not param.grad_added_to_main_grad:
                    # Get `main_grad` will allocate bucket, check that the currently
                    # used main_grad buffer does not exceed the scope of two FSDP Unit
                    # Modules, i.e., the buffer limit imposed by double-buffer allocator.
                    if self.ddp_config.fsdp_double_buffer:
                        self.grad_reduce_pipeline._enforce_double_buffer_limit([group_id])

                    param.main_grad = param.get_main_grad()
                    if param.grad is not None:
                        if self.report_nan_in_param_grad:
                            _check_nan_in_grad(to_local_if_dtensor(param.grad))
                        # Copy the gradient into the allocated main gradient bucket.
                        # It will be reduce-scattered and accumulated into gbuf.
                        param.main_grad.copy_(to_local_if_dtensor(param.grad))
                        del param.grad
                    else:
                        # Prepare for fused wgrad accumulation.
                        param.main_grad.zero_()
            # Unsharded Gradient Buffer
            else:
                if not param.grad_added_to_main_grad:
                    if param.grad is not None:
                        if self.report_nan_in_param_grad:
                            _check_nan_in_grad(to_local_if_dtensor(param.grad))
                        # Accumulate the gradient into the main gradient buffer,
                        # because we only reduce once per optimization cycle.
                        param.main_grad = param.get_main_grad()
                        param.main_grad.add_(to_local_if_dtensor(param.grad))
                        del param.grad

            if param.grad_added_to_main_grad and param.grad is not None:
                del param.grad

            # Reset the grad accumulate flag.
            param.grad_added_to_main_grad = False

        self._params_require_handle_grad = set()

        def _post_backward_release_module(module, *unused):
            """
            Post-backward hook for an FSDP unit to release parameters and process
            its gradients after the backward pass.

            This hook:
            - Validates that the module is an FSDP unit and that the data-parallel
            sharding strategy is ``"optim_grads_params"``.
            - Releases the module's parameters for the backward phase to free memory.
            - Marks the module as IDLE in the training state machine.
            """
            assert isinstance(module, tuple(fsdp_unit_modules))
            assert self.data_parallel_sharding_strategy == "optim_grads_params"

            # Release parameters for this module after backward.
            release_module_parameters(module, bwd=True)

            # Transition this module back to the IDLE training state.
            module._training_state = TrainingState.IDLE

        @torch.compiler.disable
        def _process_post_backward_gradients(param_list):
            """
            Process gradients for a list of parameters after the backward pass.

            This helper accumulates gradients into the main_grad buffer and, when
            appropriate, launches asynchronous reduce-scatter operations according
            to the data-parallel sharding strategy and training phase.

            Args:
                param_list (List[torch.nn.Parameter]): Parameters whose gradients
                    should be processed.

            Behavior:
                - Skips processing for shared parameters (those with ``_is_shared=True``),
                since their gradients are handled by the root post-backward hook.
                - Determines whether to reduce gradients based on:
                    * Data-parallel sharding strategy (``"optim_grads"`` or
                        ``"optim_grads_params"``).
                    * Whether this is the last microbatch of the iteration.
                    * Whether ``model_auto_sync`` is enabled.
                - When reduction conditions are met, performs an asynchronous
                reduce-scatter of gradients prior to the optimizer step, which
                requires a subsequent call to ``finish_grad_sync()`` to complete.
                - Marks parameters as processed by adding them to
                    ``_params_require_handle_grad``.

            Notes:
                - With gradient-sharding strategies, gradient reduction occurs on
                every backward propagation.
                - Without gradient sharding, gradient reduction is deferred until
                the last microbatch or when auto-sync is enabled.
                - In hybrid FSDP configurations, an outer FSDP group gradient reduction
                may be triggered.
            """
            # Filter out shared parameters whose gradients are handled by the root hook.
            param_list = [p for p in param_list if not getattr(p, "_is_shared", False)]
            for param in param_list:
                _grad_acc(param)

            grad_reduce_every_bprop = self.data_parallel_sharding_strategy in [
                "optim_grads",
                "optim_grads_params",
            ]
            is_last_microbatch = getattr(self, "is_last_microbatch", False)

            if grad_reduce_every_bprop or is_last_microbatch or self.model_auto_sync:
                # Launch asynchronous reduce-scatter of gradients before the optimizer
                # step. This requires a later call to finish_grad_sync() to wait for
                # completion.
                self.grad_reduce_pipeline.reduce_gradients(
                    param_list,
                    suggested_queue_capacity=self.suggested_RS_queue_capacity,
                    outer_fsdp_group_grad_reduce=(
                        # HSDP all-reduce or HFSDP reduce-scatter on the DP-Outer PG.
                        self.dist_index.use_hybrid_fsdp
                        and (is_last_microbatch or self.model_auto_sync)
                    ),
                )

            # Mark parameters as processed.
            for param in param_list:
                self._params_require_handle_grad.discard(param)

        @torch.compiler.disable
        def _pre_forward_param_unshard(
            module: nn.Module, args: Tuple[Any, ...], kwargs: Dict[str, Any]
        ):
            # Unshard the parameters before the forward pass.
            input_training_state = module._training_state
            fsdp_forward_prefetch = True
            if input_training_state == TrainingState.PRE_BACKWARD:
                # In activation recomputation case, we need to cancel forward prefetch.
                fsdp_forward_prefetch = False
            else:
                module._training_state = TrainingState.FORWARD

            if isinstance(module, tuple(fsdp_unit_modules)):
                param_list = list(module.parameters())
            else:
                # All-gather the shallow parameters in every forward pass for modules
                # that are not FSDP units. Do not recurse unless absolutely necessary,
                # to allocate as little memory as possible for this forward pass.
                param_list = list(module.parameters(recurse=False))

            if self.enable_fine_grained_param_gather_hook:
                param_list = list(module.parameters(recurse=False))

            # All-gather the parameters before the forward pass.
            self.all_gather_and_wait_parameters_ready(
                params=param_list,
                prefetch=fsdp_forward_prefetch,
                prefetch_order=PrefetchOrder.FORWARD_PASS_ORDER,
            )
            return args, kwargs

        @torch.compiler.disable
        def _register_post_backward_hook(
            post_backward_hook: callable,
            module: nn.Module,
            args: Tuple[Any, ...],
            kwargs: Dict[str, Any],
        ):
            """
            Register a post-backward hook for the given module by inserting an autograd
            Function in front of it. Note that a post-backward hook implemented in this
            way is not compatible with in-place modifications of the module's inputs,
            since such operations can trigger an autograd error that
            "the output is a view and is being modified in-place".
            """
            if not torch.is_grad_enabled():
                # No gradients / backward pass, don't attach the post-backward hook.
                return args, kwargs

            # Preprocess the input arguments.
            args_list, args_spec = tree_flatten(args)
            kwargs_list, kwargs_spec = tree_flatten(kwargs)
            args_kwargs_list = list(args_list) + list(kwargs_list)
            inp_tensor_indices: List[int] = []
            inp_tensors: List[torch.Tensor] = []
            for i, obj in enumerate(args_kwargs_list):
                if torch.is_tensor(obj) and obj.requires_grad:
                    inp_tensor_indices.append(i)
                    inp_tensors.append(obj)

            if len(inp_tensors) == 0:
                return args, kwargs

            """
            Identity autograd Function that attaches a post-backward "hook" to the
            module, triggering parameter deallocation immediately after the module's
            backward pass has completed in order to shard this layer's model memory
            once the current backward stage is done.
            """
            inp_tensors = RegisterFSDPBackwardFunction.apply(
                functools.partial(post_backward_hook, module), *inp_tensors
            )

            # Post-process the input arguments for input into the module.
            for inp_tensor_idx, inp_tensor in zip(inp_tensor_indices, inp_tensors):
                args_kwargs_list[inp_tensor_idx] = inp_tensor
            args_list = args_kwargs_list[: len(args_list)]
            kwargs_list = args_kwargs_list[len(args_list) :]
            args = tree_unflatten(args_list, args_spec)
            kwargs = tree_unflatten(kwargs_list, kwargs_spec)

            # Return original input to the module forward pass.
            return args, kwargs

        def _root_post_backward(*unused):
            # Make sure all the gradients are handled.
            ordered_params = sorted(
                list(self._params_require_handle_grad), key=lambda p: self.param_to_name[p]
            )
            for param in ordered_params:
                _grad_acc(param)

            # Reduce the remaining gradients.
            grad_reduce_every_bprop = self.data_parallel_sharding_strategy in [
                "optim_grads",
                "optim_grads_params",
            ]
            # Only reduce if we are sharding gradients, or are on the final microbatch.
            # If is_last_microbatch is not specified, then we should reduce gradients
            # if model_auto_sync is enabled, otherwise wait until is_last_microbatch
            # is specified by the user, context manager, or FW before reduction.
            is_last_microbatch = getattr(self, "is_last_microbatch", False)
            if grad_reduce_every_bprop or is_last_microbatch or self.model_auto_sync:
                self.grad_reduce_pipeline.reduce_gradients(
                    ordered_params,
                    suggested_queue_capacity=self.suggested_RS_queue_capacity,
                    outer_fsdp_group_grad_reduce=(
                        self.dist_index.use_hybrid_fsdp
                        and (is_last_microbatch or self.model_auto_sync)
                    ),
                )
                self.grad_reduce_pipeline.reset()

            # Reset root_pre_backward_hook_issued flag.
            self._root_pre_backward_hook_issued = False
            self.microbatch_count += 1

            # If model_auto_sync is enabled, we automatically synchronize gradients
            # so the user does not have to call finish_grad_sync() manually. However,
            # this will reduce training performance when using delayed optimization
            # techniques such as gradient accumulation, because asynchronous gradient
            # reduce-scatter calls can be overlapped with subsequent compute.
            # This will also reset the microbatch counter to 0, to trigger initial
            # microbatch operations on the next iteration of the training loop.
            if self.model_auto_sync:
                self.finish_grad_sync()

        @torch.compiler.disable
        def _pre_backward_param_unshard(module: nn.Module, *unused):
            """
            Sub-module pre-backward hook to all-gather the module parameters
            before the backward pass.
            """
            # Set the module's training state to PRE_BACKWARD.
            module._training_state = TrainingState.PRE_BACKWARD

            if isinstance(module, tuple(fsdp_unit_modules)):
                param_list = list(module.parameters())
            else:
                param_list = list(module.parameters(recurse=False))

            if self.enable_fine_grained_param_gather_hook:
                param_list = list(module.parameters(recurse=False))

            # All-gather / unshard the module parameters before the backward pass.
            self.all_gather_and_wait_parameters_ready(
                param_list, prefetch_order=PrefetchOrder.BACKWARD_PASS_ORDER, bwd=True
            )

        self._root_pre_backward_hook_issued = False

        def _root_pre_backward(module: nn.Module, *unused):
            """Marks the module's training state as PRE_BACKWARD before the
            backprop, this function is registered on the root module.

            This root pre-backward hook informs all modules to skip forward
            pre-fetching in the pre-forward hooks (for activation recomputation)
            and skip weight deallocation / resharding in the post-forward hooks
            during the backward pass, which are instead performed by backward hooks.
            """
            if self._root_pre_backward_hook_issued:
                return
            self._root_pre_backward_hook_issued = True

            if self.data_parallel_sharding_strategy == "optim_grads_params":
                for module in root_module.modules():
                    if isinstance(module, tuple(fsdp_unit_modules)):
                        # Set PRE_BACKWARD state to skip resharding and forward pre-fetching
                        # when performing activation recomputation / gradient checkpointing.
                        module._training_state = TrainingState.PRE_BACKWARD
                # set all param buckets can be released
                ag_pipeline = self.all_gather_pipeline
                for bucket_id in range(ag_pipeline.num_buckets):
                    group = self.param_and_grad_buffer.parameter_groups[bucket_id]
                    if group.fsdp_unit_id is not None:
                        ag_pipeline.bucket_can_be_released[
                            ag_pipeline.get_bucket_key(bucket_id, bwd=False)
                        ] = True
            # Track parameters that require gradient reduction and optimization.
            self._params_require_handle_grad = set()
            for param_group in self.param_and_grad_buffer.parameter_groups:
                if not param_group.requires_grad:
                    continue
                self._params_require_handle_grad |= set(param_group.params)
                for param in param_group.params:
                    param.grad_added_to_main_grad = False
            # Queue the root post-backward hook to reduce leftover gradients after
            # the backward pass.
            torch.autograd.Variable._execution_engine.queue_callback(_root_post_backward)

        @torch.compiler.disable
        def _post_forward(module: nn.Module, input: Any, output: Any):
            # When composed with module-hook-based activation recomputation, the
            # post-backward hook is responsible for resharding the module parameters
            # after the forward pass. In this case, the resharding is performed lazily.
            if module._training_state == TrainingState.PRE_BACKWARD:
                # Delay parameter resharding because this is currently running inside
                # the activation recomputation forward. The corresponding backward
                # pass may still need these parameters, and delaying avoids an
                # unnecessary all-gather.
                lazy_release = True
            else:
                lazy_release = False
                module._training_state = TrainingState.IDLE

            assert isinstance(
                module, tuple(fsdp_unit_modules)
            ), "_post_forward hook should only be registered on FSDP unit modules."

            # Release the module parameters after the forward pass to save memory.
            release_module_parameters(module, bwd=False, lazy=lazy_release)

            return output

        @torch.compiler.disable
        def _release_module_fp8_transpose_cache(module: nn.Module, *unused):
            release_params_fp8_transpose_cache(module.parameters(recurse=False))

        def create_custom_backward_hook(module, custom_backward_handler):
            """
            Creates a custom backward hook via attaching a gradient-triggered hook
            to the output tensor(s) of a module during a post-forward hook.
            """

            @torch.compiler.disable
            def forward_hook(_module, inputs, output):
                # Replace the output to avoid the output tensor being the same as
                # the input tensor, which makes it impossible to identify which
                # layer's output it is. Using view_as to make it does not cause
                # additional memory consumption.
                output = tree_map(lambda t: t.view_as(t) if torch.is_tensor(t) else t, output)

                output_list = []

                # Post-process forward output.
                if isinstance(output, torch.Tensor):
                    output_list = [output]
                elif isinstance(output, (tuple, list)):
                    output_list = [t for t in output if isinstance(t, torch.Tensor)]

                # Register pre-backward hook on the output tensor(s). This hook
                # will trigger immediately after the gradients of the output
                # tensor(s) have been computed.
                torch.autograd.graph.register_multi_grad_hook(
                    output_list, lambda grads: custom_backward_handler(_module, grads), mode="any"
                )
                return output

            # Register the post-forward hook that attaches the custom backward hook
            # on the output tensor(s).
            return module.register_forward_hook(forward_hook)

        def _register_pre_forward_param_unshard_hook(module):
            """
            Register the forward pre-hook to unshard parameters before the forward pass.
            If we are not sharding anything, we do not have a model weight buffer and thus
            have nothing to all-gather / un-shard.
            """
            if self.data_parallel_sharding_strategy != "no_shard":
                self.forward_pre_hooks[f"{module._get_name()} parameter unshard"] = (
                    module.register_forward_pre_hook(
                        _pre_forward_param_unshard, prepend=True, with_kwargs=True
                    )
                )

        def _register_pre_backward_param_unshard_hook(module):
            """
            Register the backward pre-hook to unshard FSDP unit module parameters
            immediately before the backward pass via attaching a gradient-triggered
            hook to the output tensor(s) of a module during a post-forward hook.
            """
            self.backward_pre_hooks[f"all-gather {module._get_name()} parameters"] = (
                create_custom_backward_hook(module, _pre_backward_param_unshard)
            )

        fsdp_modules = []
        for name, module in root_module.named_modules():
            if self.enable_fine_grained_param_gather_hook:
                _register_pre_forward_param_unshard_hook(module)
                _register_pre_backward_param_unshard_hook(module)

            # Skip if the module is already registered in fsdp_modules.
            if any(is_submodule(module, fsdp_module) for fsdp_module in fsdp_modules):
                continue

            if not self.enable_fine_grained_param_gather_hook:
                _register_pre_forward_param_unshard_hook(module)

            if isinstance(module, tuple(fsdp_unit_modules)):
                fsdp_modules.append(module)
                # Register the forward post-hook to reshard FSDP unit module parameters
                # after the forward pass, except when recomputing forward activations,
                # in which case we skip resharding for the subsequent backward pass.
                self.forward_hooks[f"release module {name} parameters"] = (
                    module.register_forward_hook(_post_forward, prepend=False)
                )

                if not self.enable_fine_grained_param_gather_hook:
                    _register_pre_backward_param_unshard_hook(module)
            elif (
                not self.ddp_config.keep_fp8_transpose_cache
                and self.data_parallel_sharding_strategy == "optim_grads_params"
            ):
                # Register the forward post-hook to release FP8 transpose cache
                # after the forward pass for non-FSDP unit modules.
                # NOTE: We only need to remove the transpose cache in parameter
                # sharding strategy.
                self.forward_hooks[f"remove module {name} fp8 transpose cache"] = (
                    module.register_forward_hook(_release_module_fp8_transpose_cache, prepend=False)
                )

            # Register the post-backward hook to deallocate model parameters
            # and reduce-scatter gradients after the backward pass.
            if isinstance(module, tuple(fsdp_unit_modules)):
                if self.ddp_config.data_parallel_sharding_strategy == "optim_grads_params":
                    self.forward_pre_hooks[f"module {name} register post-backward hook"] = (
                        module.register_forward_pre_hook(
                            functools.partial(
                                _register_post_backward_hook, _post_backward_release_module
                            ),
                            with_kwargs=True,
                        )
                    )
                grad_acc_param_list = [p for p in module.parameters() if p.requires_grad]
            else:
                grad_acc_param_list = [
                    p for p in module.parameters(recurse=False) if p.requires_grad
                ]

            for param in grad_acc_param_list:
                self.grad_acc_hooks[f"grad_acc and reduce for {self.param_to_name[param]}"] = (
                    param.register_post_accumulate_grad_hook(
                        lambda p: _process_post_backward_gradients([p])
                    )
                )

        # Register root module pre- and post-backward hooks in cases where the
        # forward function of root module is not called, but rather the forward
        # function of the root module from named_modules() is called instead.
        for name, module in root_module.named_modules():
            if len(list(module.parameters())) != len(list(root_module.parameters())):
                # Only attach to root sub-module.
                continue
            # Install the root pre-backward hook.
            self.backward_pre_hooks[f"{name} _root_pre_backward"] = create_custom_backward_hook(
                module, _root_pre_backward
            )
        self._root_pre_backward_hook_handle = create_custom_backward_hook(
            module, _root_pre_backward
        )

        # Register post load state_dict hook to ensure that the module parameters
        # are properly updated.
        # NOTE: This does NOT attach a hook to torch.distributed.checkpoint.load!
        self._load_state_dict_post_hook = self.module.register_load_state_dict_post_hook(
            lambda module, incompatible_keys: self.install_optimized_model_weights()
        )

        # Register pre state_dict hook to ensure that the module parameters are
        # distributed before saving the state_dict.
        for name, module in self.named_modules():
            module.register_state_dict_pre_hook(
                lambda *args, **kwargs: self._replace_param_with_distributed_if_needed()
            )

    @contextmanager
    def no_sync(self):
        """
        Context manager that turns off gradient synchronization.
        For grads shard mode there will actually always be gradient sync happening.
        """
        # FIXME: Better handling of grads shard mode and no_sync in the training loop so that
        # the code doesn't bog down developers.
        self.is_last_microbatch = False
        try:
            yield
        finally:
            self.is_last_microbatch = True

    @contextmanager
    def sync(self):
        """
        Context manager that synchronizes the MegatronFSDP model parameters and gradients
        every training step as opposed to every optimization cycle.
        """
        self.set_model_auto_sync(True)
        try:
            yield
        finally:
            self.set_model_auto_sync(False)

    def set_model_auto_sync(self, sync_model: bool = True):
        """
        Activate or deactivate flag that controls Megatron-FSDP model synchronization.
        When activated, the model parameters and gradients will be synchronized EVERY
        training step, i.e. gradient reduction will be waited upon instead of overlapped
        with subsequent compute, and all-gather + reduce operations across the DP-Outer
        ProcessGroup will be executed when sharding on DP-Outer during HSDP / HFSDP.
        Otherwise, MegatronFSDP will perform such synchronizations every optimization
        cycle depending on is_last_microbatch = True or microbatch_count = 0, which
        are more flexible but difficult to manage, e.g. microbatch_count and
        is_last_microbatch can be modified elsewhere for custom training strategies.

        Will commonly be called on the final microbatch of a training step before the
        model forward pass and gradient backward pass to ensure that the model gradients
        (prior to optimizer.step()) and model parameters (prior to dist. checkpointing)
        are synchronized and representative of the model trained at that particular
        training step. Otherwise, model training performance will slightly degrade when
        MegatronFSDP.model_auto_sync = True.

        Args:
            sync_model (bool, optional): Whether to synchronize the model every training step.
                MegatronFSDP.model_auto_sync will be set to the value of sync_model.
                Defaults to True. MegatronFSDP.model_auto_sync defaults to False.
        """
        if sync_model and (
            # Will reduce gradient buffer data in-place across DP-Shard.
            self.data_parallel_sharding_strategy in ["no_shard", "optim"]
            # Will reduce gradient buffer data in-place across DP-Outer.
            or self.dist_index.use_hybrid_fsdp
        ):
            log_single_rank(
                logger_=logger,
                level=logging.WARNING,
                msg="[Megatron-FSDP.set_model_auto_sync()] Detected Megatron-FSDP "
                "auto-synchronization with the 'no_shard' / 'optim' sharding "
                "strategy for either FSDP (DP-Shard) or HSDP (DP-Outer). "
                "`MegatronFSDP.zero_grad_buffer()` or `optimizer.zero_grad()` "
                "must be invoked every forward-backward optimization cycle to "
                "prevent successive in-place gradient buffer reductions from "
                "corrupting previously accumulated gradients.",
            )
        self.model_auto_sync = sync_model

    def get_distributed_index(self) -> FSDPDistributedIndex:
        """
        Get the distributed environment of Megatron-FSDP, which contains references
        to the process groups and device meshes used by Megatron-FSDP.
        """
        return self.dist_index

    @contextmanager
    def mixed_precision_context(self, mixed_precision_policy: MixedPrecisionPolicy):
        """
        Context manager for re-configuring the MixedPrecisionPolicy
        for MegatronFSDP / ParamAndGradBuffer.
        """
        mp_policy_backup = self.mp_policy
        self.reset_mixed_precision_policy(mixed_precision_policy)
        try:
            yield
        finally:
            self.reset_mixed_precision_policy(mp_policy_backup)

    def reset_mixed_precision_policy(self, mixed_precision_policy: MixedPrecisionPolicy):
        """
        Re-configure MixedPrecisionPolicy for MegatronFSDP / ParamAndGradBuffer.
        """
        mp_policy_reset = MixedPrecisionPolicy(
            # Preserve the original main parameter + gradient data-type.
            main_params_dtype=self.mp_policy.main_params_dtype,
            main_grads_dtype=self.mp_policy.main_grads_dtype,
            # Gradient communication data-type can only be reset
            # if symmetric buffers / NCCL UB are not used, because
            # inflates FixedPoolAllocator memory & breaks NCCL UBR.
            grad_comm_dtype=(
                mixed_precision_policy.grad_comm_dtype
                if self.ddp_config.nccl_ub or self.ddp_config.fsdp_double_buffer
                else self.mp_policy.grad_comm_dtype
            ),
        )
        self.mp_policy = mp_policy_reset
        self.param_and_grad_buffer.mp_policy = mp_policy_reset

    def start_param_sync(self, *unused, force_sync: bool = False, force_dispatch: bool = False):
        """
        Initiates param sync (all-gather) communication operations for all model parameters.

        By default, when overlap_param_gather is set to True, dispatches asynchronous communication
        calls; when overlap_param_gather is set to False, calls synchronous communication
        ops. Can override this default behavior using flags below.

        Args:
            force_sync (bool, optional): force synchronous collective regardless of
                other settings.
            force_dispatch (bool, optional): force dispatch regardless of other settings.
        """
        self._replace_param_with_raw_if_needed()

        if not force_sync and self.ddp_config.overlap_param_gather:
            # All-gather the first bucket before the forward pass.
            if self.ddp_config.fsdp_all_gather_in_start_param_sync:
                first_param = list(self.module.parameters())[0]
                self.all_gather_and_wait_parameters_ready(
                    params=[first_param], prefetch=True, wait_bucket_ready=False
                )
        else:
            self.synchronize_param_gather()
            for bucket_id in range(self.all_gather_pipeline.num_buckets):
                self.all_gather_pipeline.async_bucket_gather(bucket_id=bucket_id, bwd=False)
                group = self.param_and_grad_buffer.parameter_groups[bucket_id]
                if group.model_weight_buffer is None:
                    continue

                if group.model_weight_buffer.is_data_distributed:
                    # If model weight is sharded, we wait for the all-gather to complete and
                    # then release the bucket immediately to save memory usage.
                    self.all_gather_pipeline.wait_bucket_ready(bucket_id, False)

            for bucket_id in range(self.all_gather_pipeline.num_buckets):
                self.all_gather_pipeline.wait_bucket_ready(bucket_id, False)

    def start_grad_sync(self, *unused):
        """
        Initiates grad sync (all-reduce or reduce-scatter) communication operations
        for all model gradients.

        When overlap_grad_reduce is set to True, dispatches asynchronous communication
        calls. When overlap_grad_reduce is set to False, calls synchronous
        communication ops.
        """
        if not self.ddp_config.overlap_grad_reduce:
            if self.data_parallel_sharding_strategy == "no_shard":
                self.param_and_grad_buffer.all_reduce_gradients(
                    async_op=self.ddp_config.overlap_grad_reduce
                )
            else:
                self.param_and_grad_buffer.reduce_scatter_gradients()

    def synchronize_param_gather(self):
        """
        Synchronize parameter all-gather operations for all model parameters.
        """
        self.all_gather_pipeline.reset()
        self._replace_param_with_distributed_if_needed()

    def synchronize_gradient_reduce(self):
        """
        Synchronize gradient reduce-scatter operations for all model gradients.
        """
        if self.ddp_config.overlap_grad_reduce:
            # Asynchronous reduce-scatter from overlap_grad_reduce=True,
            # i.e. when sharding optimizer and gradients.
            self.grad_reduce_pipeline.wait_for_previous_grad_reduce(0)
            self.grad_reduce_pipeline.reset()
        else:
            # Synchronous gradient all-reduce when sharding optimizer state or not sharding.
            self.start_grad_sync()

    def attach_grad_to_optimizer_state(self):
        """
        Attach gradients to optimizer named parameters
        in preparation for optimizer.step().
        """
        self.param_and_grad_buffer.update_main_grads()

    def finish_grad_sync(self, force_all_reduce: Optional[bool] = False):
        """
        Finishes grad sync (all-reduce or reduce-scatter) communication operations
        for all model gradients. Call prior to the optimization step to resolve
        asynchronous gradient reductions.

        When overlap_grad_reduce is set to True, waits for asynchronous communication
        calls to complete. When overlap_grad_reduce is set to False, calls synchronous
        communication ops.

        NOTE: force_all_reduce is included as an argument to maintain API compatibility
        with DDP.force_grad_sync.
        """
        # Synchronize gradient reduce-scatter operations for all model gradients.
        self.synchronize_gradient_reduce()

        # Once the gradients have been reduced and scattered into main_grad_buffer,
        # update the gradients for all buffered weights in optimizer_named_parameters.
        self.attach_grad_to_optimizer_state()

        # Synchronize parameter all-gather operations for all model parameters,
        # which are triggered during the backward pass for FSDP.
        if self.ddp_config.overlap_param_gather:
            self.synchronize_param_gather()

        # Before the optimizer.step(), replace raw module parameters with distributed
        # optimizer named parameters for distributed optimization.
        self._replace_param_with_distributed_if_needed()

        # Reset the microbatch count to zero after the gradient sync is complete.
        self.microbatch_count = 0

    def _replace_param_with_distributed_if_needed(self):
        if self.is_param_fsdp_distributed:
            return
        self.is_param_fsdp_distributed = True

        pg_buffer = self.param_and_grad_buffer
        fsdp_params = dict(pg_buffer.optimizer_named_parameters)
        for name, _ in self.module.named_parameters():
            assert name in fsdp_params, f"Parameter {name} not found in FSDP parameters."
            dist_param = fsdp_params[name]
            # Set the __fsdp_param__ attribute to True to indicate that this
            # DTensor parameter is managed by Megatron FSDP.
            if not hasattr(dist_param, "__fsdp_param__"):
                dist_param.__fsdp_param__ = True
            _replace_module_parameter(self.module, name, dist_param)

        # Handle shared weights
        self._reestablish_shared_weights(self.raw_param, fsdp_params)

    def _replace_param_with_raw_if_needed(self):
        if not self.is_param_fsdp_distributed:
            return
        self.is_param_fsdp_distributed = False

        for name, _ in self.module.named_parameters():
            assert name in self.raw_param, f"Raw parameter {name} not found in module."
            _replace_module_parameter(self.module, name, self.raw_param[name])

        # Handle shared weights
        pg_buffer = self.param_and_grad_buffer
        fsdp_params = dict(pg_buffer.optimizer_named_parameters)
        self._reestablish_shared_weights(fsdp_params, self.raw_param)

    def _reestablish_shared_weights(self, old_params, new_params):
        """
        Reestablishes shared (tied) weights in a PyTorch module after parameter replacement.

        When iterating over `named_parameters()`, PyTorch skips parameters that are shared
        via weight-tying (e.g., `lm_head.weight` referencing `wte.weight`). After replacing
        parameters, these shared weights become independent, causing previously hidden
        parameters to appear in the parameter list. This function restores the original
        shared structure by ensuring parameters that were previously tied remain shared.

        Args:
            old_params (dict): Mapping from parameter names to original parameter tensors.
            new_params (dict): Mapping from parameter names to new parameter tensors.
        """
        for name, param in self.module.named_parameters():
            if name in new_params:
                # Parameter was explicitly replaced; nothing to do.
                continue

            # Attempt to find the corresponding shared parameter in old_params.
            shared_param = None
            for old_name, old_weight in old_params.items():
                # Found a shared parameter; get the new version.
                if id(param) == id(old_weight):
                    shared_param = new_params.get(old_name)
                    break
            assert (
                shared_param is not None
            ), f"Parameter {name} not found in new parameters or as a shared weight."

            # Replace the module parameter with the restored shared parameter.
            _replace_module_parameter(self.module, name, shared_param)
            setattr(shared_param, "_is_shared", True)  # Mark as shared

    def scale_gradients(self, scaling_factor: float):
        """Scale all gradients inside the buffers by `scaling_factor`."""
        self.param_and_grad_buffer.scale_gradients(scaling_factor)

    def zero_grad_buffer(self):
        """
        Zeros out all grad buffers. Needs to be called at the beginning of each
        training iteration alongside optimizer.zero_grad().
        """
        for param in self.module.parameters():
            if param.requires_grad:
                param.grad_added_to_main_grad = False
        self.param_and_grad_buffer.zero_grad()

    def install_optimized_model_weights(self):
        """
        Copies optimized parameter values into the model training parameters
        managed by Megatron-FSDP. Should be called after the optimizer.step().
        """
        self.param_and_grad_buffer.copy_main_weights_to_model_weights()

    def broadcast_params(self):
        """
        Syncs parameters across all DP ranks.
        """
        for param in self.module.parameters():
            is_expert_parallel = not getattr(param, "allreduce", True)

            data_parallel_group = self.dist_index.get_dp_group(
                is_expert_parallel=is_expert_parallel
            )
            torch.distributed.broadcast(
                param.data,
                src=torch.distributed.get_global_rank(data_parallel_group, 0),
                group=data_parallel_group,
            )

    def forward(self, *inputs, **kwargs):
        """
        Wrapped forward pass of the model managed by FSDP.
        """
        self._replace_param_with_raw_if_needed()
        with torch.autograd.profiler.record_function("CustomFSDP.forward"):
            # Call the forward pass of the wrapped module.
            output = self.module.forward(*inputs, **kwargs)
            return output


class RegisterFSDPBackwardFunction(torch.autograd.Function):
    """
    Register a backward function that will be called after the backward pass
    of the model. This function is used to release the parameters after the
    backward pass.
    """

    @staticmethod
    def forward(ctx, post_backward, *inputs: torch.Tensor):
        """
        Forward pass of the RegisterFSDPBackwardFunction function.
        """
        ctx.post_backward = post_backward
        return inputs

    @staticmethod
    def backward(ctx, *grads: torch.Tensor):
        """
        Backward pass of the RegisterFSDPBackwardFunction function.
        """
        ctx.post_backward()
        return (None,) + grads


def _replace_module_parameter(module, name, new_param):
    """
    Replace a module's parameter with a new parameter, preserving the hierarchy.
    """
    parts = name.split(".")
    parent = module
    for part in parts[:-1]:  # Navigate to parent module
        parent = getattr(parent, part)

    # Replace the parameter
    setattr(parent, parts[-1], new_param)


================================================
FILE: megatron/core/distributed/fsdp/src/megatron_fsdp/mixed_precision.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
from contextlib import nullcontext
from dataclasses import dataclass
from importlib.metadata import version
from typing import List, Optional, Tuple

import torch
from packaging.version import Version as PkgVersion

logger = logging.getLogger(__name__)

# Detect if Transformer Engine is installed
try:
    import transformer_engine  # pylint: disable=W0611
    from transformer_engine.pytorch.module.base import TransformerEngineBaseModule

    HAVE_TE = True
except (ImportError, ModuleNotFoundError):
    TransformerEngineBaseModule = None
    HAVE_TE = False
    logger.info("Using Megatron-FSDP without Transformer Engine.")

# Detect the Transformer Engine version
try:
    import transformer_engine as te

    if hasattr(te, "__version__"):
        TE_VERSION = PkgVersion(str(te.__version__))
    else:
        TE_VERSION = PkgVersion(version("transformer-engine"))
except:
    TE_VERSION = None

# Detect the quantized_model_init or fp8_model_init context manager.
if HAVE_TE:
    try:
        from transformer_engine.pytorch import quantized_model_init

        QUANTIZED_MODEL_INIT_CLASS = quantized_model_init
    except:
        # Fallback to original FP8 model init.
        from transformer_engine.pytorch import fp8_model_init

        QUANTIZED_MODEL_INIT_CLASS = fp8_model_init
else:
    QUANTIZED_MODEL_INIT_CLASS = nullcontext

# Detect the FP8 tensor class
try:
    from transformer_engine.pytorch.tensor import QuantizedTensor

    HAVE_TE_FP8_TENSOR_CLASS = True
    FP8_TENSOR_CLASS = QuantizedTensor
except:
    try:
        from transformer_engine.pytorch.float8_tensor import Float8Tensor

        HAVE_TE_FP8_TENSOR_CLASS = True
        FP8_TENSOR_CLASS = Float8Tensor
    except:
        HAVE_TE_FP8_TENSOR_CLASS = False

# Detect the MXFP8 tensor class
try:
    from transformer_engine.pytorch.tensor.mxfp8_tensor import MXFP8Tensor

    HAVE_TE_MXFP8TENSOR = True
except:
    HAVE_TE_MXFP8TENSOR = False

# Detect the Blockwise FP8 tensor class
try:
    from transformer_engine.pytorch.tensor.float8_blockwise_tensor import Float8BlockwiseQTensor

    HAVE_TE_BLOCKWISE_FP8TENSOR = True
except:
    HAVE_TE_BLOCKWISE_FP8TENSOR = False

# Detect the "cast_master_weights_to_fp8" function of Transformer Engine
try:
    from transformer_engine.pytorch.tensor.utils import cast_master_weights_to_fp8

    HAVE_TE_CAST_MASTER_WEIGHTS_TO_FP8 = True
except:
    HAVE_TE_CAST_MASTER_WEIGHTS_TO_FP8 = False

    # Try to import multi_tensor_apply, used in the fallback of fp8 quantization.
    try:
        from transformer_engine.pytorch.optimizers import multi_tensor_applier, multi_tensor_scale

        multi_tensor_scale_impl = multi_tensor_scale
    except ImportError:
        try:
            import amp_C
            from apex.multi_tensor_apply import multi_tensor_applier

            multi_tensor_scale_impl = amp_C.multi_tensor_scale
        except ImportError:
            import warnings

            warnings.warn(
                "Transformer Engine and Apex are not installed. "
                "Falling back to local implementations of "
                "multi_tensor_applier and multi_tensor_scale"
            )

            def local_multi_tensor_applier(op, noop_flag_buffer, tensor_lists, *args):
                """Multi tensor op applier"""
                return op(2048 * 32, noop_flag_buffer, tensor_lists, *args)

            def local_multi_tensor_scale(chunk_size, noop_flag, tensor_lists, scale):
                """Works as a drop-in replacement for amp_C.multi_tensor_scale."""
                for src, dst in zip(tensor_lists[0], tensor_lists[1]):
                    dst.copy_(src * scale)

            multi_tensor_applier = local_multi_tensor_applier
            multi_tensor_scale_impl = local_multi_tensor_scale

    def _multi_tensor_copy_this_to_that(
        this: List[torch.Tensor],
        that: List[torch.Tensor],
        overflow_buf: Optional[torch.Tensor] = None,
    ):
        """
        Use multi-tensor-applier to copy values from one list to another.
        We don't have a bfloat16 implementation so for now if the overflow_buf
        is not provided, we default back to simple loop copy to be compatible
        with bfloat16.
        """
        if overflow_buf is not None:
            overflow_buf.fill_(0)
            # Scaling with factor `1.0` is equivalent to copy.
            multi_tensor_applier(multi_tensor_scale_impl, overflow_buf, [this, that], 1.0)
        else:
            for this_, that_ in zip(this, that):
                that_.copy_(this_)


# Detect the "post_all_gather_processing" function of Transformer Engine
try:
    from transformer_engine.pytorch.tensor.utils import post_all_gather_processing

    HAVE_TE_POST_ALL_GATHER_PROCESSING = True
except:
    HAVE_TE_POST_ALL_GATHER_PROCESSING = False


def is_te_min_version(vers, check_equality=True):
    """Check if minimum version of `transformer-engine` is installed."""
    if not isinstance(TE_VERSION, PkgVersion):
        return False

    if check_equality:
        return TE_VERSION >= PkgVersion(vers)
    else:
        return TE_VERSION > PkgVersion(vers)


def is_float8tensor(tensor: torch.Tensor) -> bool:
    """Check if a tensor is a FP8 tensor."""
    return HAVE_TE and isinstance(tensor, FP8_TENSOR_CLASS)


def is_blockwise_float8tensor(tensor: torch.Tensor) -> bool:
    """Check if a tensor is a Blockwise FP8 tensor."""
    return HAVE_TE_BLOCKWISE_FP8TENSOR and isinstance(tensor, Float8BlockwiseQTensor)


def fp8_need_transpose_data(tensor: torch.Tensor) -> bool:
    """Check if a FP8 tensor needs transpose data."""
    return HAVE_TE_MXFP8TENSOR and isinstance(tensor, MXFP8Tensor)


def fp8_need_transpose_data_for_meta_device_init(module: TransformerEngineBaseModule) -> bool:
    """Check if a FP8 tensor needs transpose data, for meta device init scenario."""
    return HAVE_TE_MXFP8TENSOR and module.fp8_meta["recipe"].mxfp8()


def fp8_discard_transpose_cache(tensor: torch.Tensor) -> None:
    """Discard the transpose cache of a FP8 tensor."""
    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"

    if hasattr(tensor, "_transpose_invalid"):
        tensor._transpose_invalid = True
        tensor._transpose = None
    elif not fp8_need_transpose_data(tensor):
        tensor.update_usage(rowwise_usage=True, columnwise_usage=False)


def fp8_create_transpose_cache(tensors: List[torch.Tensor]) -> None:
    """Create the transpose cache of a FP8 tensor."""
    if HAVE_TE_POST_ALL_GATHER_PROCESSING:
        post_all_gather_processing(tensors)
    else:
        _fp8_create_transpose_cache_fallback(tensors)


def _fp8_create_transpose_cache_fallback(tensors: List[torch.Tensor]) -> None:
    if not isinstance(tensors, list):
        tensors = [tensors]
    for tensor in tensors:
        assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
        if hasattr(tensor, "_create_transpose"):
            tensor._create_transpose()
        else:
            tensor._create_columnwise()


def fp8_set_raw_data(tensor: torch.Tensor, data: torch.Tensor, set_transpose: bool = False) -> None:
    """Set the raw data of a Transformer Engine Float8Tensor."""
    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"

    if set_transpose:
        assert fp8_need_transpose_data(tensor), f"Type {type(tensor)} does not need transpose data"
        data_attr = "_columnwise_data"
    else:
        data_attr = "_rowwise_data" if hasattr(tensor, "_rowwise_data") else "_data"

    old_data = getattr(tensor, data_attr)
    if old_data is not None:
        assert (
            old_data.dtype == data.dtype
        ), f"The data types of raw data don't match {old_data.dtype} vs {data.dtype}"
        assert (
            old_data.shape == data.shape
        ), f"Shape {old_data.shape} of old_data doesn't match {data.shape} of new_data"
    setattr(tensor, data_attr, data)


def fp8_get_raw_data(tensor: torch.Tensor, get_transpose: bool = False) -> torch.Tensor:
    """Get the underlying raw storage of a FP8 tensor."""
    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"

    if get_transpose:
        assert fp8_need_transpose_data(tensor), f"Type {type(tensor)} does not need transpose data"
        data_attr = "_columnwise_data"
    else:
        data_attr = "_rowwise_data" if hasattr(tensor, "_rowwise_data") else "_data"

    return getattr(tensor, data_attr)


def fp8_dequantize(tensor: torch.Tensor) -> torch.Tensor:
    """Dequantize a FP8 tensor to a higher precision."""
    assert is_float8tensor(tensor), f"Type {type(tensor)} is not a FP8 tensor"
    assert is_te_min_version(
        "2.0"
    ), "Transformer Engine >= 2.0 is required for dequantizing parameters."
    return tensor.dequantize()


def fp8_quantize(
    model_params: List[torch.Tensor],
    main_params: List[torch.Tensor],
    start_offsets: List[int],
    data_parallel_group: torch.distributed.ProcessGroup,
    fsdp_shard_model_params: List[Tuple[torch.Tensor, Optional[torch.Tensor]]],
) -> None:
    """Quantize sharded parameters to FP8."""
    if len(model_params) == 0:
        return
    fsdp_shard_model_params = [x[0] if x[1] is None else x for x in fsdp_shard_model_params]

    if HAVE_TE_CAST_MASTER_WEIGHTS_TO_FP8:
        cast_master_weights_to_fp8(
            model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params
        )
    else:
        _fp8_quantize_fallback(
            model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params
        )


def _fp8_quantize_fallback(
    model_params: List[torch.Tensor],
    main_params: List[torch.Tensor],
    start_offsets: List[int],
    data_parallel_group: torch.distributed.ProcessGroup,
    fsdp_shard_model_params: List[Tuple[torch.Tensor, Optional[torch.Tensor]]],
) -> None:
    for model_param, main_param, start_offset, fsdp_shard_model_param in zip(
        model_params, main_params, start_offsets, fsdp_shard_model_params
    ):
        if main_param is None:
            continue

        if fsdp_shard_model_param is not None:
            shard_model_param = fsdp_shard_model_param
        else:
            shard_model_param = model_param._data.view(-1)[
                start_offset : start_offset + main_param.numel()
            ]

        quantizer = model_param._quantizer
        # When not using fp8 params, the main_param (fp32) is first cast to bf16/fp16, and then
        # cast to fp8 during forward. This logic keeps numerical consistency with bf16 params.
        main_param = main_param.to(model_param.dtype)
        out = Float8Tensor(
            shape=main_param.size(),
            dtype=model_param.dtype,
            requires_grad=False,
            data=shard_model_param,
            fp8_scale_inv=model_param._scale_inv,
            fp8_dtype=model_param._fp8_dtype,
            quantizer=quantizer,
        )
        quantizer.update_quantized(main_param, out)

        amaxes = []
        scales = []
        scale_invs = []
        for model_param in model_params:
            quantizer = model_param._quantizer
            amaxes.append(quantizer.amax.view(1))
            scales.append(quantizer.scale.view(1))
            scale_invs.append(model_param._scale_inv.view(1))
            model_param._reset_caches()

        dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device="cuda")

        # Update scaling factors.
        packed_scales = torch.empty(len(scales), dtype=torch.float32, device=scales[0].device)
        packed_scale_views = [packed_scales[i].view(1) for i in range(len(scales))]
        _multi_tensor_copy_this_to_that(scales, packed_scale_views, dummy_overflow_buf)
        torch.reciprocal(packed_scales, out=packed_scales)
        _multi_tensor_copy_this_to_that(packed_scale_views, scale_invs, dummy_overflow_buf)

        # Reduce amaxes.
        # Note: Assume each param has a separate amax.
        packed_amaxes = torch.empty(len(amaxes), dtype=torch.float32, device=amaxes[0].device)
        packed_amax_views = [packed_amaxes[i].view(1) for i in range(len(amaxes))]
        _multi_tensor_copy_this_to_that(amaxes, packed_amax_views, dummy_overflow_buf)
        torch.distributed.all_reduce(
            packed_amaxes, op=torch.distributed.ReduceOp.MAX, group=data_parallel_group
        )
        _multi_tensor_copy_this_to_that(packed_amax_views, amaxes, dummy_overflow_buf)


def get_quantized_model_init_context_cls():
    """
    Get the TransformerEngine model parameter quantization context manager.
    """
    if QUANTIZED_MODEL_INIT_CLASS is nullcontext:
        logger.warning(
            f"quantized_model_init / fp8_model_init context was requested but does not exist. "
            f"Verify TransformerEngine is installed (TE_INSTALLED={HAVE_TE})."
        )
    return QUANTIZED_MODEL_INIT_CLASS


@dataclass(frozen=True)
class MixedPrecisionPolicy:
    """Megatron-FSDP Mixed Precision Dataclass"""

    main_params_dtype: Optional[torch.dtype] = torch.float32
    """Data type for the main weight buffer utilized for distributed optimization
      and quantization with Megatron-FSDP. If set to None, the model compute weight
      buffer will take the role of the main weights, or when no sharding is applied,
      the native model weights become the main weights. Defaults to torch.float32.
    """

    main_grads_dtype: Optional[torch.dtype] = None
    """Data type for the main gradient buffer utilized for distributed optimization with
      Megatron-FSDP. If set to None, main gradients will match the dtype of the model
      compute parameters specified by the user model. Defaults to None.
    """

    grad_comm_dtype: Optional[torch.dtype] = None
    """Data type for gradient gather / scatter communications. Can be utilized to reduce
      communication latency, but adds overhead for type-casting and copy operations.
      If using NCCL UBR v2.27+, gradient reduction may be performed in high-precision
      depending on the network domain (NVLink or IB), and can enable mixed-precision
      communication and accumulation, e.g. setting grad_comm_dtype to `BF16` can support
      `FP32` reduction even though we have `BF16` input and output communication buffers.
      If set to None, the `main_grads_dtype` is used. If using HSDP (either DP-Replicate
      or DP-Outer in `outer_dp_sharding_strategy`), `no_shard`, `optim`, or a
      `FixedPoolAllocator` (`fsdp_double_buffer`), allocating `dtype`-custom gradient
      communication buffers (per FSDP group) adds memory overhead. Defaults to None.
      No additional memory is allocated when `grad_comm_dtype == main_grads_dtype`.
    """


================================================
FILE: megatron/core/distributed/fsdp/src/megatron_fsdp/package_info.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.


MAJOR = 0
MINOR = 3
PATCH = 0
PRE_RELEASE = 'rc0'

# Use the following formatting: (major, minor, patch, pre-release)
VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE)

__shortversion__ = '.'.join(map(str, VERSION[:3]))
__version__ = '.'.join(map(str, VERSION[:3])) + ''.join(VERSION[3:])

__package_name__ = 'megatron_fsdp'
__contact_names__ = 'NVIDIA'
__contact_emails__ = 'nemo-toolkit@nvidia.com'  # use NeMo Email
__homepage__ = 'https://docs.nvidia.com/megatron-core/developer-guide/latest/user-guide/index.html'
__repository_url__ = 'https://github.com/NVIDIA/Megatron-LM/tree/main/megatron/core'
__download_url__ = 'https://github.com/NVIDIA/Megatron-LM/releases'
__description__ = (
    'Megatron Core - a library for efficient and scalable training of transformer based models'
)
__license__ = 'BSD-3'
__keywords__ = (
    'deep learning, machine learning, gpu, NLP, NLU, language, transformer, nvidia, pytorch, torch'
)


================================================
FILE: megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# TODO: Split this file into smaller files.

import copy
import dataclasses
import functools
import gc
import inspect
import logging
import math
import traceback
import warnings
from collections import defaultdict, namedtuple
from contextlib import ExitStack, nullcontext
from enum import Enum
from typing import Any, Callable, Dict, List, Optional, Tuple, cast

import torch
from torch.distributed import _coalescing_manager
from torch.distributed.tensor import DTensor, Replicate, Shard

from .mixed_precision import (
    MixedPrecisionPolicy,
    fp8_discard_transpose_cache,
    fp8_get_raw_data,
    fp8_need_transpose_data,
    fp8_need_transpose_data_for_meta_device_init,
    fp8_quantize,
    fp8_set_raw_data,
    get_quantized_model_init_context_cls,
    is_blockwise_float8tensor,
    is_float8tensor,
    is_te_min_version,
)
from .uneven_dtensor import update_uneven_dtensor_chunk_metadata, validate_uneven_dtensor
from .utils import (
    _MODEL_PARALLEL_RNG_TRACKER_NAME,
    FSDPDistributedIndex,
    get_global_memory_buffer,
    get_mcore_tensor_parallel_partition_dim,
    is_mcore_tensor_model_parallel,
    is_mcore_tensor_parallel_duplicated,
    log_single_rank,
)

logger = logging.getLogger(__name__)


try:
    # Default to Megatron-LM FW.
    from megatron.core.distributed.distributed_data_parallel_config import (
        DistributedDataParallelConfig,
    )
    from megatron.core.tensor_parallel import get_cuda_rng_tracker
    from megatron.core.utils import is_submodule

    HAVE_MCORE = True
    logger.info("Detected Megatron Core, using Megatron-FSDP with Megatron.")

except ImportError:
    # Megatron-LM is not installed, use Megatron-FSDP as a standalone module.
    from .distributed_data_parallel_config import DistributedDataParallelConfig
    from .utils import get_cuda_rng_tracker, is_submodule

    HAVE_MCORE = False
    logger.info("Megatron Core is not installed, Megatron-FSDP will run without Megatron Core.")

try:
    from transformer_engine.pytorch.module.base import TransformerEngineBaseModule

    HAVE_TE = True
except Exception:
    HAVE_TE = False

NCCL_ALLOCATOR = None

try:
    # Try to import the MCore NCCL nccl_allocator first.
    # If it fails, try to import the APEX NCCL nccl_allocator.
    import megatron.core.nccl_allocator as nccl_allocator

    NCCL_ALLOCATOR = "MCORE"
except ImportError:
    try:
        import apex.contrib.nccl_allocator as nccl_allocator

        NCCL_ALLOCATOR = "APEX"
    except ImportError:
        nccl_allocator = None

NCCL_MEMORY_POOL = None


def _p_assert(cond: Any, s: str, raise_assertion_error: bool = True) -> None:
    """Alternate to ``assert`` when in the backward context to print the error
    message ``s`` since otherwise, it is swallowed.
    """
    if not cond:
        logger.error(s)
        logger.error(''.join(traceback.format_stack()))
        if raise_assertion_error:
            raise AssertionError(s)


def _alloc_storage(tensor: torch.Tensor, size: torch.Size) -> None:
    """
    Allocate storage for ``tensor`` with the given size.

    Returns:
        bool: ``True`` if this method allocated storage and ``False`` if the
        storage was already allocated.
    """
    with torch.no_grad():
        if not torch.distributed._functional_collectives.is_torchdynamo_compiling():
            already_allocated = tensor._typed_storage()._size() == size.numel()
            if not already_allocated:
                tensor_storage_size = tensor._typed_storage()._size()
                _p_assert(
                    tensor_storage_size == 0,
                    "Tensor storage should have been resized to be 0 but got PLACEHOLDEr",
                )
                tensor._typed_storage()._resize_(size.numel())


def _free_storage(tensor: torch.Tensor):
    """
    Frees the underlying storage of ``tensor``.

    Returns:
        bool: ``True`` if the method freed the storage and ``False`` if the
        storage was already freed.
    """
    with torch.no_grad():
        if not torch.distributed._functional_collectives.is_torchdynamo_compiling():
            already_freed = tensor._typed_storage()._size() == 0
            if not already_freed:
                _p_assert(
                    tensor.storage_offset() == 0,
                    "Freeing a tensor's storage is unsafe when it is not the sole occupant\n"
                    f"storage offset: {tensor.storage_offset()}\n"
                    f"storage size: {tensor._typed_storage()._size()}\n"
                    f"tensor shape: {tensor.shape}",
                )
                tensor._typed_storage()._resize_(0)


TensorItemIndex = namedtuple(
    "TensorItemIndex", ["global_data_index", "size", "item_id", "bucket_id", "shape"]
)
BucketIndex = namedtuple("BucketIndex", ["bucket_id", "global_data_index", "size", "items"])
ShardBucketIndex = namedtuple(
    "ShardBucketIndex",
    ["bucket_id", "global_data_index", "local_data_index", "bucket_data_index", "size"],
)


class MultiGroupUBRAllocator:
    """
    A custom allocator class that registers a single memory pool with multiple different
    communication groups, which is not natively supported by apex's nccl_allocator.

    This is particularly useful for Mixture of Experts (MoE) models where:
    - Non-expert parameters/gradients use the data-parallel + context-parallel group (dp_cp_group)
    - Expert parameters/gradients use the expert-parallel + data-parallel group (ep_dp_group)

    Since Megatron-Core FSDP uses a contiguous single tensor for the entire model's parameters, we
    need to register the same memory pool with both communication groups to enable nccl algorithms
    that is relying on the user buffer registration for both expert and non-expert parameters.

    Implementation:
        It uses apex nccl_allocator internally to create a Tensor using ncclMemAlloc
        and register to the `group` and then registers the Mempool also for the `additional_group`

    Example:
        ```
        import apex.contrib.nccl_allocator as nccl_allocator
        nccl_allocator.init()
        pool = nccl_allocator.create_nccl_mem_pool()
        group_1 = torch.distributed.new_group(ranks=[0, 1, 2, 3, 4, 5, 6, 7], backend="nccl")
        group_2 = torch.distributed.new_group(ranks=[0, 2, 4, 6], backend="nccl")
        with MultiGroupUBRAllocator(pool, groups=[group_1, group_2]):
            a = torch.zeros(1024, dtype=torch.float32, device="cuda")
            b = torch.zeros(1024, dtype=torch.float32, device="cuda")
        ```
    """

    def __init__(self, pool, groups):  # torch.cuda.MemPool  # torch.distributed.ProcessGroup
        self.pool = pool
        self.groups = groups
        self.mem_allocator = nccl_allocator.nccl_mem(self.pool, group=self.groups[0])
        assert len(self.groups) > 1, "MultiGroupUBRAllocator requires at least two groups"

    def __enter__(self):
        for group in self.groups[1:]:
            backend = group._get_backend(torch.device("cuda", torch.cuda.current_device()))
            try:
                # Since the registration is done in mempool granularity, we need to deregister
                # the tensors in the mempool and re-register the mempool including the newly created
                # tensors after the context is exited.
                backend.deregister_mem_pool(self.pool)
            except RuntimeError:
                pass
        self.mem_allocator.__enter__()

    def __exit__(self, *args):
        self.mem_allocator.__exit__(*args)
        for group in self.groups[1:]:
            backend = group._get_backend(torch.device("cuda", torch.cuda.current_device()))
            log_single_rank(
                logger,
                logging.INFO,
                f"[MultiGroupUBRAllocator] Registering mem pool to group {group}, "
                f"group.group_desc:{group.group_desc}",
            )
            backend.register_mem_pool(self.pool)


@dataclasses.dataclass
class BucketingPolicy:
    """
    A policy for bucketing in Fully Sharded Data Parallel (FSDP) training.

    Attributes:
        suggested_bucket_size (int): The suggested size of each bucket in num of elements.
        fsdp_unit_modules (list): A list of module classes that are treated as a
            single unit for FSDP bucketing.
        data_parallel_sharding_strategy (str): The strategy used for sharding
            data parallel modules.

    Note:
        This policy is used to configure the bucketing behavior in FSDP training.
    """

    suggested_bucket_size: Optional[int] = 40_000_000
    fsdp_unit_modules: List[torch.nn.Module] = dataclasses.field(default_factory=list)
    data_parallel_sharding_strategy: str = "no_shard"


def _pad(number_to_be_padded: int, divisor: int) -> int:
    return int(math.ceil(number_to_be_padded / divisor) * divisor)


def build_data_parallel_buffer_index(
    elements: List[torch.Size],
    data_parallel_rank: int,
    data_parallel_world_size: int,
    is_data_distributed: bool,
    ddp_config: DistributedDataParallelConfig,
    bucket_id: int = 0,
    chunk_size_factor: int = 1,
) -> Tuple[List[tuple], BucketIndex, ShardBucketIndex]:
    """
    Assuming that all input tensor elements contiguously compose a global
    buffer, give the index range of every tensor, the bucket in the buffer,
    and the (distributed) shard within the bucket. Note that the global bucket
    buffer is only temporarily allocated, but is abstractly tracked via indices
    deduced from the number of raw parameters assigned to this buffer / bucket.

    Args:
        elements (List[torch.Size]): List of input tensor.
        data_parallel_rank (int): Rank of the current process in the data parallel group.
        data_parallel_world_size (int): World size of the data parallel group.
        bucket_id (int, optional): The id of the bucket. Defaults to 0.

    Returns:
        Tuple[Dict[int, TensorItemIndex], BucketIndex, ShardBucketIndex]: The index
            range of every tensor, every bucket and every in bucket local buffer.
    """

    def _pad_if_needed(data_index: int) -> int:
        if ddp_config.data_parallel_sharding_strategy != "no_shard":
            return _pad(data_index, data_parallel_world_size * chunk_size_factor)
        return data_index

    def add_item(item_id, item, offset, item_index_map):
        # The item index map contains information on where each parameter item will
        # be stored in the tensor data buffer in a bucket.
        item_index_map[item_id] = TensorItemIndex(
            # Global data index of the starting idx of this parameter
            # = running global data index + updated bucket size - the parameter size.
            global_data_index=offset,
            # Number of tensor elements in the parameter.
            size=item.numel(),
            # Index of the parameter to be buffered in the list of parameter shapes.
            item_id=item_id,
            # ID of the bucket that this parameter belongs to.
            bucket_id=bucket_id,
            # Shape of the parameter.
            shape=item,
        )

    fragment_items = []
    regular_items = []
    for item_id, item in enumerate(elements):
        if item.numel() < chunk_size_factor:
            fragment_items.append((item_id, item))
        else:
            item[1:].numel()
            regular_items.append((item_id, item))

    # Sort the fragments so that items with larger sizes come first.
    # When filling the remaining space, prioritize placing the larger fragments first.
    sorted(fragment_items, key=lambda id_item: -id_item[1].numel())

    # For all bucket parameters, add information on the parameter to the item index map,
    # and add the size of the parameter to the bucket.
    item_index_map = {}
    data_index = 0
    while len(regular_items) > 0:
        item_id, item = regular_items.pop(0)
        add_item(item_id, item, data_index, item_index_map)
        if item.numel() % chunk_size_factor == 0:
            data_index += item.numel()
            continue

        gap_offset = data_index + item.numel()
        data_index += (item.numel() // chunk_size_factor + 1) * chunk_size_factor
        remain = item.numel() % chunk_size_factor
        space = chunk_size_factor - remain
        found_rhs = False
        for id_rhs in regular_items[:]:
            rhs_id, rhs = id_rhs
            if rhs.numel() % chunk_size_factor == 0:
                continue
            rhs_remain = rhs.numel() % chunk_size_factor
            if remain + rhs_remain <= chunk_size_factor:
                found_rhs = True
                regular_items.remove(id_rhs)
                break

        # If a item is found to have remnants, then the remnants of the two
        # items are placed in one "grid".
        if found_rhs:
            add_item(rhs_id, rhs, data_index - rhs_remain, item_index_map)
            space -= rhs_remain
            data_index += rhs.numel() // chunk_size_factor * chunk_size_factor

        # Try adding the fragments into the gaps
        for id_frag in fragment_items[:]:
            frag_id, frag = id_frag
            if frag.numel() > space:
                continue
            add_item(frag_id, frag, gap_offset, item_index_map)
            space -= frag.numel()
            gap_offset += frag.numel()
            fragment_items.remove(id_frag)

    for frag_id, frag in fragment_items:
        add_item(frag_id, frag, data_index, item_index_map)
        data_index += frag.numel()

    # Bucket index contains information on what tensor items are in this bucket.
    bucket_index = BucketIndex(
        bucket_id=bucket_id,
        global_data_index=0,
        size=_pad_if_needed(data_index),
        items=list(item_index_map.values()),
    )

    # Sharded bucket index contains local bucket shard information.
    shard_bucket_index = _get_dp_buffer_shard_bucket_index(
        bucket_index, is_data_distributed, data_parallel_world_size, data_parallel_rank
    )

    # Return the tensor item index map in the buffer,
    # the bucket index with information on what items this bucket contains,
    # and the sharded bucket index.
    return item_index_map, bucket_index, shard_bucket_index


def _get_dp_buffer_shard_bucket_index(
    bucket_index: BucketIndex,
    is_data_distributed: bool,
    data_parallel_world_size: int,
    data_parallel_rank: int,
) -> ShardBucketIndex:
    """
    Build the data parallel buffer shard bucket index from the bucket index.

    Args:
        bucket_index (BucketIndex): The bucket index containing information on the
            items in the bucket.
        is_data_distributed (bool): Whether the data is distributed across multiple
            processes.
        data_parallel_world_size (int): The world size of the data parallel group.
        data_parallel_rank (int): The rank of the current process in the data parallel group.

    Returns:
        ShardBucketIndex: The shard bucket index containing information on the
            location and size of the buffer shard in the global bucket.
    """
    # Calculate the shard size and the starting index of this shard in the global bucket.
    # Each rank / process will have a different shard size and starting index regardless
    # of whether the buffer is sharded or not, i.e. a "virtual shard" for unsharded buffers.
    shard_size = bucket_index.size // data_parallel_world_size
    bucket_data_index = shard_size * data_parallel_rank

    # Calculate the global data index of the starting index of this shard in the global bucket.
    global_data_index = bucket_index.global_data_index + bucket_data_index

    if is_data_distributed:
        # Sharded Data Buffer - This index stores the location (start) and size (end) of the
        # buffer shard in the global bucket.
        shard_bucket_index = ShardBucketIndex(
            bucket_id=bucket_index.bucket_id,
            # Location of the buffer shard in the global bucket.
            global_data_index=global_data_index,
            # When the buffer is sharded, the local index of the data in this shard starts at 0.
            local_data_index=0,
            # Location of the buffer shard relative to the global starting index of the bucket.
            bucket_data_index=bucket_data_index,
            size=shard_size,  # Size of the bucket shard.
        )
    else:
        # Virtual sharding for bijections with other sharded buffers. But the buffer
        # itself is not actually sharded and contains the entire global bucket.
        shard_bucket_index = ShardBucketIndex(
            bucket_id=bucket_index.bucket_id,
            global_data_index=global_data_index,
            # When the buffer is not sharded, the local index of the data in this
            # "virtual" shard begins at the location of the buffer shard in the
            # global bucket, because the entire bucket is stored in this buffer.
            local_data_index=global_data_index,
            bucket_data_index=bucket_data_index,
            size=shard_size,
        )
    return shard_bucket_index


@dataclasses.dataclass
class Bucket:
    """
    A container for holding data in Fully Sharded Data Parallel (FSDP) training.

    Attributes:
        data (torch.Tensor): A tensor containing the data elements
            grouped together in a bucket.
            used to synchronize data operations.

    Note:
        Buckets are used to optimize communication in FSDP training by
            grouping small tensors together.
    """

    data: torch.Tensor


class TemporaryBucketAllocator:
    """
    A utility class for managing temporary buckets (buffers) used in FSDP
    operations like parameters unshard and gradients reduction.

    This allocator handles the dynamic allocation and deallocation of temporary memory buffers
    needed during FSDP (Fully Sharded Data Parallel) operations, particularly for parameters
    unshard and gradients reduction. It helps optimize memory usage by allowing temporary
    buckets to be released when no longer needed.

    Key Features:
        - Dynamic allocation of temporary buckets for FSDP operations
        - Memory-efficient management of temporary buffers
        - Support for both parameters unshard and gradients reduction operations
        - Automatic cleanup of unused buckets to save memory

    Usage:
        ```python
        # Create an allocator instance
        allocator = TemporaryBucketAllocator(name="gpt_parameters")

        # Allocate a temporary bucket
        temp_bucket = allocator.allocate(size=1024, dtype=torch.float32)

        # Use the temporary bucket for FSDP operations
        # ... perform all-gather or reduce-scatter ...

        # Free the bucket when done
        allocator.free(temp_bucket)
        ```

    Note:
        It's important to release temporary buckets after use to prevent memory leaks
        and optimize memory usage during training.
    """

    def __init__(self):
        self.buckets = {}

    def allocate(
        self,
        bucket_id: int,
        size: int,
        dtype: torch.dtype,
        device: torch.device,
        mem_alloc_context: Optional[Callable] = None,
    ) -> Bucket:
        """
        allocate a temporary bucket.
        """
        if bucket_id not in self.buckets:
            self.buckets[bucket_id] = Bucket(data=torch.empty(size, dtype=dtype, device=device))
        return self.buckets[bucket_id]

    def free(self, bucket_id: int):
        """
        free a temporary bucket.
        """
        if bucket_id in self.buckets:
            _free_storage(self.buckets[bucket_id].data)
            del self.buckets[bucket_id]


class StorageResizeBasedBucketAllocator(TemporaryBucketAllocator):
    """
    A specialized temporary bucket allocator that resizes the storage of temporary buckets
    based on the required size.
    """

    def __init__(self):
        super().__init__()

    def allocate(
        self,
        bucket_id: int,
        size: int,
        dtype: torch.dtype,
        device: torch.device,
        mem_alloc_context: Optional[Callable] = None,
    ) -> Bucket:
        """
        allocate a temporary bucket.
        """
        if bucket_id not in self.buckets:
            self.buckets[bucket_id] = Bucket(data=torch.empty(size, dtype=dtype, device=device))
        bucket = self.buckets[bucket_id]
        _alloc_storage(bucket.data, torch.Size([size]))
        return bucket

    def free(self, bucket_id: int):
        """
        free a temporary bucket.
        """
        if bucket_id in self.buckets:
            _free_storage(self.buckets[bucket_id].data)


class RotaryBucketAllocator(TemporaryBucketAllocator):
    """A specialized temporary bucket allocator that implements a circular buffer recycling strategy
    to minimize memory fragmentation in FSDP operations.

    RotaryBucketAllocator extends TemporaryBucketAllocator by maintaining a limited pool of
    pre-allocated buffers that are reused in a circular manner. This approach helps prevent
    memory fragmentation that typically occurs with frequent allocation and deallocation of
    temporary buffers during FSDP operations.

    Key Features:
        - Circular buffer recycling strategy for memory efficiency
        - Reduced memory fragmentation compared to dynamic allocation
        - Pre-allocated buffer pool for faster access
        - Automatic buffer reuse without explicit deallocation

    Usage:
        ```python
        # Create a rotary allocator
        allocator = RotaryBucketAllocator(name="gpt_parameters")

        # Get a temporary buffer from the pool
        temp_bucket = allocator.allocate(dtype=torch.float32)

        # Use the temporary bucket for FSDP operations
        # ... perform all-gather or reduce-scatter ...

        # Free the bucket when done, make it in idle buffer pool
        allocator.free(temp_bucket)
        ```
    """

    def __init__(self, name: str):
        super().__init__()
        self.name = name
        self.num_global_buffer = 0
        self.idle_buffer = []  # [buffer_id]
        self.using_buffer = {}  # {bucket_id: buffer_id}

    def allocate(
        self,
        bucket_id: int,
        size: int,
        dtype: torch.dtype,
        device: torch.device,
        mem_alloc_context: Optional[Callable] = None,
    ) -> Bucket:
        """
        allocate a temporary bucket.
        """

        def _get_global_buffer(buffer_id: int):
            return get_global_memory_buffer().get_tensor(
                [size],
                dtype=dtype,
                name=self._get_gbuf_name(buffer_id),
                mem_alloc_context=mem_alloc_context,
            )

        if bucket_id in self.using_buffer:
            buffer_id = self.using_buffer[bucket_id]
            return Bucket(data=_get_global_buffer(buffer_id))

        if len(self.idle_buffer) == 0:
            # allocate new buffer
            buffer_id = self.num_global_buffer
            self.num_global_buffer += 1
            self.idle_buffer.append(buffer_id)

        buffer_id = self.idle_buffer.pop(0)
        self.using_buffer[bucket_id] = buffer_id
        return Bucket(data=_get_global_buffer(buffer_id))

    def _get_gbuf_name(self, buffer_id: int):
        return f"{self.name}_{buffer_id}"

    def free(self, bucket_id: int):
        """
        free a temporary bucket.
        """
        if bucket_id in self.using_buffer:
            buffer_id = self.using_buffer.pop(bucket_id)
            self.idle_buffer.append(buffer_id)


class FixedPoolAllocator(TemporaryBucketAllocator):
    """
    A specialized temporary bucket allocator that implements a buffer recycling strategy
    to minimize memory fragmentation in FSDP operations.

    This allocator maintains a fixed pool of pre-allocated buffers, reusing them
    to reduce the overhead and fragmentation caused by frequent allocation and
    deallocation of temporary buffers during FSDP operations.
    """

    def __init__(
        self,
        name: str,
        fsdp_param_groups: List["ParameterGroup"],
        size: int = 2,
        fallback_to_persistent_buffer: bool = False,
    ):
        self.name = name
        self.fsdp_param_groups = fsdp_param_groups
        self.size = size  # Number of buffers in the pool (default is 2 for double buffering)
        self.allocation_tracker = {}  # tracking the global buffer allocation status

        # Build a mapping from FSDP unit id to its associated bucket ids.
        fsdp_unit_buckets = defaultdict(list)
        for bucket_id, param_group in enumerate(fsdp_param_groups):
            if param_group.fsdp_unit_id == -1 or param_group.fsdp_unit_id is None:
                continue
            fsdp_unit_buckets[param_group.fsdp_unit_id].append(bucket_id)
        self.fsdp_unit_buckets = fsdp_unit_buckets

        # Identify the largest group of FSDP units that share the same buffer storage.
        fsdp_units_to_double_buffer = []
        for fsdp_unit_id, bucket_ids in fsdp_unit_buckets.items():
            same_storage_fsdp_units = []
            for i in fsdp_unit_buckets:
                if self._is_two_bucket_group_equal(fsdp_unit_buckets[i], bucket_ids):
                    same_storage_fsdp_units.append(i)
            # Track the largest group of FSDP units sharing the same buffer storage
            if len(same_storage_fsdp_units) > len(fsdp_units_to_double_buffer):
                fsdp_units_to_double_buffer = same_storage_fsdp_units

        # --- Fixed Pool Buffering Check ---
        # Ensure there is at least one group of FSDP units eligible for fixed pool buffering.
        # If not, the allocator cannot provide its intended memory recycling benefits.
        assert (
            len(fsdp_units_to_double_buffer) > 0
        ), "Found no FSDP units to use fixed-size buffering"
        self.fsdp_double_buffer_units = fsdp_units_to_double_buffer

        if torch.distributed.get_rank() == 0:
            for bucket_id, param_group in enumerate(fsdp_param_groups):
                if (
                    param_group.fsdp_unit_id == -1
                    or param_group.fsdp_unit_id is None
                    or param_group.fsdp_unit_id not in self.fsdp_double_buffer_units
                ):
                    logging.info(
                        f"FSDP unit (id={param_group.fsdp_unit_id}) does not fit "
                        "in FixedPoolAllcator"
                    )
                    if fallback_to_persistent_buffer is False:
                        logging.info(
                            "It will fall back to dynamic memory allocator, NCCL user "
                            "buffer is not supported"
                        )
                    else:
                        logging.info(
                            "It will be allocated a persistent buffer. If the memory "
                            "budget is tight, set "
                            "trainer.strategy.ddp.fsdp_db_use_persist_buf_on_alloc_fail to False."
                        )

        # Initialize buffer group status.
        # Each buffer group represents a set of buffers associated with an FSDP unit's bucket group.
        self.idle_buffer = []  # List of available (buf_group_id, offset) tuples.
        self.using_buffer = {}  # Map from bucket_id to (buf_group_id, offset) in use.

        # Populate the idle buffer pool with all buffer group and bucket offset combinations.
        for buf_group_id in range(self.size):  # Iterate over each buffer group in the pool.
            num_bucket = len(self.fsdp_unit_buckets[self.fsdp_double_buffer_units[0]])
            for bucket_offset in range(num_bucket):
                self.idle_buffer.append((buf_group_id, bucket_offset))

        # Fallback allocator used if the fixed pool allocator cannot fulfill a request.
        self.fallback_to_persistent_buffer = fallback_to_persistent_buffer
        self.backup_allocator = TemporaryBucketAllocator()

    def _is_two_bucket_group_equal(self, group_a, group_b):
        # Check if two bucket groups are equivalent in dtype and size.
        if len(group_a) != len(group_b):
            return False

        for a, b in zip(group_a, group_b):
            pg_a = self.fsdp_param_groups[a]
            pg_b = self.fsdp_param_groups[b]
            a_size = sum(p.numel() for p in pg_a.params)
            b_size = sum(p.numel() for p in pg_b.params)
            if pg_a.dtype != pg_b.dtype or a_size != b_size:
                return False
        return True

    def allocate(
        self,
        bucket_id: int,
        size: int,
        dtype: torch.dtype,
        device: torch.device,
        mem_alloc_context: Optional[Callable] = None,
    ) -> Bucket:
        """
        allocate a temporary bucket.
        """
        fsdp_unit_id = self.fsdp_param_groups[bucket_id].fsdp_unit_id
        if fsdp_unit_id in self.fsdp_double_buffer_units:
            # Try to allocate from the buffer pool.
            bucket_offset = self.fsdp_unit_buckets[fsdp_unit_id].index(bucket_id)
            buffer_name = None
            if bucket_id in self.using_buffer:
                # If this bucket is already using a buffer, reuse it.
                buf_group_id, bucket_offset = self.using_buffer[bucket_id]
                buffer_name = self._get_gbuf_name(buf_group_id, bucket_offset)
            else:
                # Otherwise, find an available buffer group for this bucket offset.
                for buf_group_id in range(self.size):
                    if (buf_group_id, bucket_offset) in self.idle_buffer:
                        self.using_buffer[bucket_id] = (buf_group_id, bucket_offset)
                        buffer_name = self._get_gbuf_name(buf_group_id, bucket_offset)
                        self.idle_buffer.remove((buf_group_id, bucket_offset))
                        break

            assert buffer_name is not None, (
                f"[FSDP][Rank {torch.distributed.get_rank()}][{self.name}] "
                f"No buffer found for bucket_id: {bucket_id}, fsdp_unit_id: {fsdp_unit_id}, "
                f"bucket_offset: {bucket_offset} \n"
                f"current using_buffer: {self.using_buffer} \n"
                f"current idle_buffer: {self.idle_buffer}"
            )
        elif self.fallback_to_persistent_buffer is True:
            buffer_name = f"{self.name}_not_fit_in_fixed_pool_{bucket_id}_{size}_{dtype}_{device}"
        else:
            # If the bucket is not eligible for fixed pool buffering, or no buffer is available,
            # fall back to dynamic allocation via the backup allocator. This means that we
            # will do dynamic memory allocation.
            logging.debug(f"[FSDP] Using backup allocator for {bucket_id} {fsdp_unit_id}")
            return self.backup_allocator.allocate(
                bucket_id=bucket_id, size=size, dtype=dtype, device=device
            )

        # Use buffer_name to get memory from global memory.
        if mem_alloc_context is not None and mem_alloc_context != nullcontext:
            # Check if a new buffer allocation is required
            if (
                self.allocation_tracker.get((buffer_name, dtype), None) is None
                or self.allocation_tracker[(buffer_name, dtype)] < size
            ):
                # Requires synchronization for new buffer allocation
                self.allocation_tracker[(buffer_name, dtype)] = size
                torch.cuda.synchronize()
        return Bucket(
            data=get_global_memory_buffer().get_tensor(
                [size], dtype=dtype, name=buffer_name, mem_alloc_context=mem_alloc_context
            )
        )

    def _get_gbuf_name(self, buf_group_id: int, bucket_index: int):
        return f"{self.name}_{buf_group_id}_{bucket_index}"

    def free(self, bucket_id: int):
        """
        free a temporary bucket.
        """
        fsdp_unit_id = self.fsdp_param_groups[bucket_id].fsdp_unit_id
        if fsdp_unit_id in self.fsdp_double_buffer_units:
            if bucket_id not in self.using_buffer:
                # This bucket is not allocated by fixed pool allocator.
                return
            # Return the buffer to the idle pool.
            self.idle_buffer.append(self.using_buffer[bucket_id])
            del self.using_buffer[bucket_id]
            return
        if self.fallback_to_persistent_buffer is False:
            # If not managed by fixed pool allocator, delegate to the backup allocator.
            logging.debug(f"[FSDP] Free from the backup allocator for {bucket_id} {fsdp_unit_id}")
            self.backup_allocator.free(bucket_id)


class DataParallelBuffer:
    """
    A class that manages the data parallel buffer for Fully Sharded Data Parallel (FSDP) training.
    It has two operating modes given a bucket of module parameters:

        - Sharded: The bucket is sharded across the data parallel group, and each
            rank will manage a shard of the bucket that is persistently stored in this buffer.
        - Unsharded: The bucket is not sharded, and the entire bucket is persistently
            stored in this buffer. Virtual shards of this unsharded buffer can be
            retrieved from each rank when needed.

    This design supports interoperability of sharded and unsharded buffers,
    e.g. optim and optim_grads,
    where buffers associated with sharded parameters can be utilized with buffers
    associated with unsharded parameters through the use of "virtual" or rank-specific
    shards for the unsharded buffers.
    """

    def __init__(
        self,
        ddp_config: DistributedDataParallelConfig,
        params: List[torch.nn.Parameter],
        is_data_distributed: bool,
        bucket_id: int,
        dtype: Optional[torch.dtype] = None,
        device: Optional[torch.device] = None,
        data_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
        dp_rank: Optional[int] = None,
        temporary_bucket_allocator: Optional[TemporaryBucketAllocator] = None,
        is_transpose_buffer: bool = False,
        gradient_scaling_factor: Optional[float] = None,
        chunk_size_factor: int = 1,
        mem_alloc_context: Optional[Callable] = None,
        item_index_map: Optional[Dict[int, TensorItemIndex]] = None,
        bucket_index: Optional[BucketIndex] = None,
        shard_bucket_index: Optional[ShardBucketIndex] = None,
    ) -> None:
        self.ddp_config = ddp_config
        self.params = params
        _param_dtype = {p.dtype for p in self.params}

        assert len(_param_dtype) == 1, f"params have different dtypes: {_param_dtype}"
        self.is_data_distributed = is_data_distributed
        self.bucket_id = bucket_id
        self.dtype = dtype if dtype else next(iter(_param_dtype))
        self.device = device
        self.data_parallel_group = data_parallel_group
        # NOTE: Specifying dp_rank is a tricky thing. Currently, only full-shard
        # hybrid FSDP needs to do this to set dp rank that is different from the group rank.
        if dp_rank is not None:
            logger.warning(
                f"[FSDP] DataParallelBuffer[{bucket_id}] initialized with dp_rank={dp_rank}, "
                f"native dp_rank={torch.distributed.get_rank(data_parallel_group)}, "
                f"global_rank={torch.distributed.get_rank()}"
            )
            self.dp_rank = dp_rank
        else:
            self.dp_rank = torch.distributed.get_rank(data_parallel_group)
        self.dp_world_size = torch.distributed.get_world_size(data_parallel_group)
        self.temporary_bucket_allocator = (
            temporary_bucket_allocator if temporary_bucket_allocator else TemporaryBucketAllocator()
        )
        self.is_transpose_buffer = is_transpose_buffer
        self.gradient_scaling_factor = gradient_scaling_factor
        self.mem_alloc_context = mem_alloc_context if mem_alloc_context else nullcontext

        # Setup the item index map, bucket index, and shard bucket index from
        # the provided arguments, or build them if not provided.
        if item_index_map or bucket_index or shard_bucket_index:
            assert (
                item_index_map is not None
            ), "item_index_map must be provided if bucket_index or shard_bucket_index is provided"
            assert (
                bucket_index is not None
            ), "bucket_index must be provided if item_index_map or shard_bucket_index is provided"
            assert (
                shard_bucket_index is not None
            ), "shard_bucket_index must be provided if item_index_map or bucket_index is provided"
            self.item_index_map = item_index_map
            self.bucket_index = bucket_index
            self.shard_bucket_index = shard_bucket_index
        else:
            # Build the data parallel buffer index, which contains information
            # on where each parameter / gradient tensor will be stored in this
            # distributed buffer.
            (self.item_index_map, self.bucket_index, self.shard_bucket_index) = (
                build_data_parallel_buffer_index(
                    [to_local_if_dtensor(p).shape for p in self.params],
                    self.dp_rank,
                    self.dp_world_size,
                    is_data_distributed,
                    ddp_config,
                    bucket_id=bucket_id,
                    chunk_size_factor=chunk_size_factor,
                )
            )

        self.data_size = (
            self.bucket_index.size if not is_data_distributed else self.shard_bucket_index.size
        )

        # Count all parameters in this buffer and store their enumerated index.
        self.param_idx = {p: i for i, p in enumerate(self.params)}

    def init_data(self, data: torch.Tensor):
        """Allocate a buffer Tensor to persistently store the data for this
        (shard of) the buffer.
        """
        assert data.dtype == self.dtype, f"Data type mismatch: {data.dtype} != {self.dtype}"
        assert (
            data.numel() == self.data_size
        ), f"Data size mismatch: {data.numel()} != {self.data_size}"
        self.data = data

    def fetch_bucket(
        self, dtype: Optional[torch.dtype] = None, set_param_data: bool = False
    ) -> Bucket:
        """
        Fetch a communication buffer for data-parallel operations. If the buffer
        is not distributed and doesn't have a custom data-type, the local un-sharded
        buffer data will be returned. Otherwise, an un-sharded bucket is allocated.

        The size of the bucket is defined by the `DataParallelBuffer` instance.

        Args:
            dtype (Optional[torch.dtype]): The data type of the tensor
                to fetch a buffer for. Defaults to None.

        Returns:
            Bucket: The communication buffer for the specified data type.
        """
        if dtype is None:
            dtype = self.dtype

        if not self.is_data_distributed and dtype == self.dtype:
            # Use pre-allocated un-sharded bucket data as the communication buffer.
            bucket_index = self.bucket_index
            bucket = Bucket(
                data=self.data[
                    bucket_index.global_data_index : bucket_index.global_data_index
                    + bucket_index.size
                ]
            )
        else:
            # Sharded or dtype-custom buffers require un-sharded bucket allocation.
            bucket = self.allocate_bucket_storage(dtype=dtype, device=self.device)

        # Need to set parameter data after resize model weight buffer data-storage.
        if set_param_data:
            for p in self.params:
                item_id = self.param_idx[p]
                p = to_local_if_dtensor(p)
                data = self.get_item_from_bucket(bucket, item_id).view(p.shape)
                if is_float8tensor(p):
                    fp8_set_raw_data(p, data, self.is_transpose_buffer)
                else:
                    p.data = data
        return bucket

    def allocate_bucket_storage(
        self,
        shard: bool = False,
        dtype: Optional[torch.dtype] = None,
        device: Optional[torch.device] = None,
        init_values: Optional[torch.Tensor] = None,
    ) -> Bucket:
        """
        Allocate a temporary flat communication buffer using the cached
        allocator associated with this DataParallelBuffer. If no bucket
        corresponding to the Bucket ID of this DataParallelBuffer has
        been allocated yet, an empty Tensor will be allocated.

        Can allocate sharded or un-sharded buckets.

        Optionally, if init_values is provided, the flattened values of
        the init_values will be copied into the newly-allocated storage.

        Args:
            shard (bool):
                Whether to allocate a sharded or un-sharded bucket with
                sizes defined by this DataParallelBuffer.
            dtype (Optional[torch.dtype]):
                Data-type of the allocated bucket.
            device (Optional[torch.device]):
                Device of the allocated bucket.
            init_values (Optional[torch.Tensor]):
                If provided, the allocated storage will be initialized
                to the values of this (flattened) Tensor.

        Returns:
            Bucket: The communication buffer for the specified data type.
        """
        # Default to buffer data-type.
        if dtype is None:
            dtype = self.dtype
        if device is None:
            device = self.device
        # Allocate temporary storage using standardized sizes.
        alloc_size = self.shard_bucket_index.size if shard else self.bucket_index.size
        bucket = self.temporary_bucket_allocator.allocate(
            bucket_id=self.bucket_index.bucket_id,
            size=alloc_size,
            dtype=dtype,
            device=device,
            mem_alloc_context=self.mem_alloc_context,
        )
        # Copy Tensor values into Bucket data.
        if init_values is not None:
            assert bucket.data.shape == init_values.flatten().shape, (
                "[allocate_bucket_storage] Size mismatch between allocated bucket "
                f"({bucket.data.shape}) and init_values ({init_values.shape})!"
            )
            bucket.data.detach().copy_(init_values.flatten())
        return bucket

    def free_bucket_storage(self):
        """
        Release the storage of a temporarily-allocated communication bucket.
        """
        self.temporary_bucket_allocator.free(self.bucket_index.bucket_id)

    def reset_param_main_grad(self):
        """
        Dereference param.main_grad for grad buckets managed by this buffer.
        """
        # Reset the main grad tensor to None to release the memory.
        for param in self.params:
            if hasattr(param, "main_grad"):
                param.main_grad = None

    def _get_item_slice_in_shard(self, item_id: int) -> Tuple[int, int]:
        """
        Return the coordinates of the slice of the item that is contained
        in this buffer shard. In other words, this returns the coordinates
        of all of the data in this item that is stored in this shard.

        Maps to the global coordinates of the item in the bucket when added to
        the starting coordinate of the item in the bucket, and maps to the local
        coordinates of the item in the shard when added to the difference between
        the starting coordinate of the item and the starting coordinate of the
        shard in the global bucket (i.e. mapping from item coordinates to global
        coordinates to shard coordinates).
        """
        item_index = self.item_index_map[item_id]
        shard_bucket_index = self.shard_bucket_index

        # Define the boundaries of the item in the global buffer,
        # as well as the boundaries of the shard in the buffer.
        # The tensor and shard boundaries may not align, so we
        # need to find their intersection, i.e. the slice of the
        # item that is contained in this shard.
        item_global_start = item_index.global_data_index
        item_global_end = item_index.global_data_index + item_index.size
        shard_bucket_start = shard_bucket_index.global_data_index
        shard_bucket_end = shard_bucket_index.global_data_index + shard_bucket_index.size

        # If the item is not in the shard, return 0, 0.
        if item_global_start > shard_bucket_end or item_global_end < shard_bucket_start:
            return (0, 0)

        # Find the slice of the item that is contained in this buffer shard relative
        # to the starting index of the item in the global bucket. If the item starts
        # before the shard, then the offset to reach the start of the slice of the item
        # in the shard from the starting index of the item is the difference between
        # the start of the shard and the start of the item. Otherwise, the offset is 0,
        # because the start of the item is within the shard.
        start = max(item_global_start, shard_bucket_start) - item_global_start
        # If the item ends after the shard, then the offset to reach the end of the
        # slice of the item in the shard from the starting index of the item is the
        # difference between the end of the shard and the start of the item. Otherwise,
        # the offset is just the size of the item, because the end of the item is
        # contained within the shard.
        end = min(item_global_end, shard_bucket_end) - item_global_start

        # Return the boundaries of the item in the shard relative to the global
        # start of the item.
        return (start, end)

    def locate_item_in_global_item(self, item_id: int) -> Tuple[int, int]:
        """
        Return the coordinates of the slice of the item that is contained
        in this buffer shard. In other words, this returns the coordinates
        of all of the data in this item that is stored in this shard.

        Helper function that adds a shortcut when the buffer is not sharded,
        in which case we don't need to compute the item-shard intersection,
        and can simply return the coordinates of the entire item.
        """
        item_index = self.item_index_map[item_id]
        if not self.is_data_distributed:
            # Buffer is not sharded, so we don't need to compute item-shard intersection.
            return (0, item_index.size)

        # Get item-shard intersection, and return item coordinates!
        slice_start, slice_end = self._get_item_slice_in_shard(item_id)
        if slice_start == slice_end:
            return (0, 0)
        return (slice_start, slice_end)

    def _get_item_local_shard_index(self, item_id: int) -> Tuple[int, int]:
        """
        Return the local coordinates of the slice of this buffer's shard that
        contains the item with the given ID. In other words, this returns the
        coordinates of all of the data in this shard associated with the item.

        Maps to the global coordinates of the item in the bucket when added to
        the starting coordinate of the shard in the global bucket, and maps to
        the coordinates of the item contained in the shard when added to the
        difference between the starting coordinate of the shard and the starting
        coordinate of the item in the global bucket (i.e. mapping from shard
        coordinates to global coordinates to item coordinates).
        """
        # Get the coordinates of the slice of the item that is contained in this shard.
        slice_start, slice_end = self._get_item_slice_in_shard(item_id)
        if slice_start == slice_end:
            # The item does not intersect this shard.
            return (0, 0)

        item_index = self.item_index_map[item_id]
        shard_bucket_index = self.shard_bucket_index

        """
        Compute the offset that maps the coordinates of the slice of the item in
        this shard to the local coordinates of the slice of this shard that contains
        the item, for retrieval of the item's data stored in this shard.
            - If distributed, then evaluates to item_start - shard_start
                (because shard_local_data_index = 0).
            - If not distributed, then evaluates to item_start (because
                shard_local_data_index = shard_global_data_index).
                This maps the coordinates of the slice of the item in this shard to
                the global coordinates of the slice of the item in the bucket because
                the unsharded buffer entirely backs the global bucket.
        """
        offset = (
            item_index.global_data_index
            - shard_bucket_index.global_data_index
            + shard_bucket_index.local_data_index
        )

        # Return the local coordinates of the slice of the item contained in this
        # (sharded or unsharded) buffer.
        return (offset + slice_start, offset + slice_end)

    def _get_item_local_index(self, item_id: int) -> Tuple[int, int]:
        """
        Return the local coordinates of the slice of this buffer's data that
        contains the item with the given ID.
        """
        if not self.is_data_distributed:
            # Return the boundary indices of the item in the bucket buffer.
            # Shortcut case where the buffer / bucket is not sharded, so we
            # can retrieve the untruncated item tensor from the buffer without
            # calculating the intersection of the item and the shard.
            item_index = self.item_index_map[item_id]
            # Note: Buffer coordinates = bucket coordinates when the buffer is not sharded.
            return (item_index.global_data_index, item_index.global_data_index + item_index.size)
        # Otherwise, return the local coordinates of the slice of this
        # buffer's shard that intersects the specified item tensor.
        return self._get_item_local_shard_index(item_id)

    def set_item(self, item_id: int, item_data: torch.Tensor) -> None:
        """
        Update a Tensor item managed by the `DataParallelBuffer` instance,
        i.e. store (a shard of) the Tensor in this buffer's datastore.

        The storage of the item is mapped to the communication bucket.
        This method updates the item data and ensures consistency with the bucket.

        Args:
            item_id (int): The ID of the tensor item to update.
            item_data (torch.Tensor): The new data for the tensor item.

        Returns:
            None
        """
        # When fully sharded, we need to get the slice of the item to be stored in this shard.
        # Otherwise, we can just flatten the entire item since this buffer contains
        # the entire bucket.
        if is_float8tensor(item_data):
            item_data = fp8_get_raw_data(item_data, self.is_transpose_buffer)

        if self.is_data_distributed:
            # Get the coordinates of the slice of the item that is contained in this shard.
            slice_start, slice_end = self._get_item_slice_in_shard(item_id)
            # Flatten the item data and get the slice of the item to place in the shard.
            item_data = item_data.flatten()[slice_start:slice_end]
        # Get the local coordinates of the slice of this buffer's shard that
        # intersects the specified item tensor.
        local_index_start, local_index_end = self._get_item_local_index(item_id)
        # Copy the slice of the item associated with this sharded buffer into the
        # slice of this buffer's shard that intersects the specified item tensor.
        shard = self.data[local_index_start:local_index_end]
        if shard.numel() > 0:
            shard.data.copy_(item_data.flatten())

    def get_item(self, item_id: int, only_shard: bool = False) -> torch.Tensor:
        """
        Retrieve a tensor item managed by the `DataParallelBuffer` instance,
        i.e. get all the item data stored in this sharded or unsharded buffer.

        The storage of the item is mapped to the communication bucket.
        If `only_shard` is True, returns only the shard of the item corresponding
            to the current process / rank, a "virtual shard" for unsharded buffers.
        Otherwise, returns the entire item, which could be a bucket shard or bucket.

        Args:
            item_id (int): The ID of the tensor item to retrieve.
            only_shard (bool, optional): Whether to return only the shard of the
                item. Defaults to False.

        Returns:
            torch.Tensor: The retrieved tensor item.
        """
        if only_shard:
            # Get segment of the item saved in the shard associated with this rank.
            # Used in situations where the buffer is unsharded but another buffer
            # associated with this buffer's data is sharded, so you need to retrieve
            # a "virtual shard" of the item corresponding to this process / rank
            # from this unsharded buffer.
            start, end = self._get_item_local_shard_index(item_id)
        else:
            # Retrieve all item data stored in this buffer. Buffer could be sharded or unsharded.
            # When sharded, return the intersection of the item and the bucket shard
            # stored in this buffer.
            # When unsharded, return the entire item in the unsharded bucket stored in this buffer.
            start, end = self._get_item_local_index(item_id)

        return self.data[start:end]

    def get_item_from_bucket(self, bucket: Bucket, item_id: int):
        """
        Get Tensor item data from the given bucket specified by the item ID.
        """
        item_index = self.item_index_map[item_id]
        bucket_index = self.bucket_index
        start_index = item_index.global_data_index - bucket_index.global_data_index
        end_index = start_index + item_index.size
        item = bucket.data[start_index:end_index]
        return item

    def get_shard_from_bucket(self, bucket: Bucket):
        """
        Get the shard from the provided bucket associated with the sharding strategy of this buffer.
        """
        shard_bucket_index = self.shard_bucket_index
        offset = shard_bucket_index.bucket_data_index
        shard_size = shard_bucket_index.size
        shard = bucket.data[offset : offset + shard_size]
        return shard

    def get_shard_from_local_buffer(self) -> torch.Tensor:
        """
        Get the shard or virtual shard of the data persistently stored in this buffer.
        """
        index = self.shard_bucket_index
        # If the buffer is sharded, return the shard stored in this buffer.
        # Otherwise, return the virtual shard of the bucket associated with this buffer,
        # corresponding to the process / rank of this buffer.
        return self.data[index.local_data_index : index.local_data_index + index.size]


@dataclasses.dataclass
class ParameterGroup:
    """
    Represents a group of model parameters along with metadata for managing
    data-parallel training in PyTorch.

    This class encapsulates a list of parameters and associated information
    such as data type, gradient requirements, and references to buffers used
    in distributed training contexts.

    Attributes:
        params (List[torch.nn.Parameter]):
            The list of model parameters grouped together.
        dtype (Optional[torch.dtype]):
            The desired data type for the parameters.
        is_expert_param (bool):
            Indicates if this group contains expert parameters
            (e.g., in mixture-of-experts).
        requires_grad (Optional[bool]):
            Specifies if gradients should be computed for these parameters.
        fsdp_unit_id (Optional[int]):
            Identifier for Fully Sharded Data Parallel (FSDP) unit grouping.
        chunk_size_factor (int):
            Factor determining chunk size for grouped parameter processing.
        model_weight_buffer (Optional[DataParallelBuffer]):
            Buffer used to store model weights for data-parallel operations.
        transpose_weight_buffer (Optional[DataParallelBuffer]):
            Buffer used to store transpose weights for data-parallel operations.
        main_weight_buffer (Optional[DataParallelBuffer]):
            Buffer used to store main model weights for data-parallel operations.
        main_grad_buffer (Optional[DataParallelBuffer]):
            Buffer used to store main gradients for data-parallel operations.
        hsdp_wbuf (Optional[DataParallelBuffer]):
            Buffer for weights used in Hybrid Sharded Data Parallel (HSDP).
            Exists only if full sharding (HFSDP) is enabled in HSDP.
        hsdp_gbuf (Optional[DataParallelBuffer]):
            Buffer for gradients used in HSDP.
            Exists only if full sharding (HFSDP) is enabled in HSDP.
        hsdp_comm_gbuf (Optional[DataParallelBuffer]):
            Extra buffer to allocate buffers that enable custom gradient
            communication data-types when using HSDP or HFSDP only.
            Only allocates memory when `MixedPrecisionPolicy.grad_comm_dtype`
            is set to a non-`None` `torch.dtype`. Contains no local data.
    """

    params: List[torch.nn.Parameter]
    dtype: Optional[torch.dtype] = None
    is_expert_param: bool = False
    requires_grad: Optional[bool] = None
    fsdp_unit_id: Optional[int] = None
    chunk_size_factor: int = 1
    model_weight_buffer: Optional[DataParallelBuffer] = None
    transpose_weight_buffer: Optional[DataParallelBuffer] = None
    main_weight_buffer: Optional[DataParallelBuffer] = None
    main_grad_buffer: Optional[DataParallelBuffer] = None
    hsdp_wbuf: Optional[DataParallelBuffer] = None
    hsdp_gbuf: Optional[DataParallelBuffer] = None
    hsdp_comm_gbuf: Optional[DataParallelBuffer] = None


def _get_parameter_groups(
    module: torch.nn.Module,
    policy: BucketingPolicy,
    meta_device_init_fp8_params: dict,
    bucket_group_by_fsdp_unit: bool = True,
):
    """
    Get the parameter group for the given module and parameters.

    Args:
        module (torch.nn.Module): The module whose parameters are to be grouped
            and flattened.
        policy (BucketingPolicy): The bucketing policy.
        meta_device_init_fp8_params (dict): A dictionary mapping parameter names to
            a boolean indicating whether the parameter is initialized on the meta device.
        bucket_group_by_fsdp_unit (bool): Whether to group buckets by FSDP unit.

    Returns:
        Tuple[List[ParameterGroup], Dict[torch.nn.Parameter, int], Dict[int, List[int]]]:
            - The list of parameter groups.
            - The mapping from parameters to their bucket group ID.
            - The mapping from bucket ID to the full group of bucket IDs that are
                NCCL-aggregated with this bucket ID.
    """

    # Step 0: Register new FSDP unit modules.
    param_to_name = {p: name for name, p in module.named_parameters()}
    # fsdp_units is a list of lists of parameter names, one list per FSDP unit module.
    fsdp_units = []
    if policy.fsdp_unit_modules:
        fsdp_modules = []
        # Loop through all sub-modules of the module.
        for m in module.modules():
            # Skip nested FSDP module, i.e. FSDP modules already have their
            # sub-module parameters registered.
            if any(is_submodule(module, fsdp_module) for fsdp_module in fsdp_modules):
                continue
            # If the sub-module is a FSDP unit module, add its parameter (names)
            # to the list of FSDP units.
            if isinstance(m, tuple(policy.fsdp_unit_modules)):
                fsdp_units.append([param_to_name[p] for p in m.parameters()])
                fsdp_modules.append(m)

    def _does_param_require_new_bucket(param):
        """
        Split shared embedding parameters into separate bucket if using distributed
        optimizer that makes use of reduce-scatters instead of all-reduces.
        This ensures that the first and last pipeline stage partition optimizer state
        for the shared embedding parameters the same way across DP replicas, allowing
        the DP reduce-scatter to be before the embedding all-reduce.
        """
        return (
            getattr(param, "shared_embedding", False)
            and policy.data_parallel_sharding_strategy != "no_shard"
        )

    is_expert_parameter = lambda n, p: ".experts." in n

    # Step 1: Group the parameters according to their execution order and attributes.
    # FSDP unit module parameters are split into multiple parameter sub-groups.
    # All parameters in the module are assigned a parameter group, even non-FSDP modules.
    parameter_groups = []
    for name, param in module.named_parameters():
        # We need this information to correctly dynamically allocate Tensors!
        is_fp8 = is_float8tensor(param)
        is_fp8_meta_device_init = meta_device_init_fp8_params.get(name, (False, False))[0]
        param_attrs = dict(
            dtype="float8" if (is_fp8 or is_fp8_meta_device_init) else param.dtype,
            is_expert_param=is_expert_parameter(name, param),
            requires_grad=param.requires_grad,
            fsdp_unit_id=None,
        )

        # For all the new FSDP unit parameters collected, assign an ID number
        # associated with which unit module the parameter belongs to.
        for fsdp_unit_id, fsdp_unit in enumerate(fsdp_units):
            if name in fsdp_unit:
                param_attrs["fsdp_unit_id"] = fsdp_unit_id
                break

        found_group = False
        # Check if the parameter already belongs to a group.
        for param_group in parameter_groups:
            group_attrs = {
                key: value for key, value in param_group.__dict__.items() if key in param_attrs
            }
            # Parameters are grouped by their attributes and FSDP unit module ID.
            if group_attrs == param_attrs:
                param_group.params.append(param)
                found_group = True
                break

        # If the parameter does not belong to any group, create a new group for it.
        if not found_group:
            parameter_groups.append(ParameterGroup([param], **param_attrs))

    # Step 2: Bucket the parameters based on the guide bucket size.
    # Parameter groups can be split into multiple buckets based on bucket size.
    suggested_bucket_size = policy.suggested_bucket_size
    bucket_groups = []
    for group in parameter_groups:
        bucket = []

        # Bucket attributes.
        basic_attrs = {
            key: value
            for key, value in group.__dict__.items()
            if key in ["dtype", "is_expert_param", "requires_grad", "fsdp_unit_id"]
        }
        for param in group.params:
            if _does_param_require_new_bucket(param):
                # We may share the embedding model weight and the final output layer,
                # which will cause the gradient of this parameter to be generated twice.
                # To reduce and identify both gradients of these parameters, create a new
                # bucket for every instance of these parameters in our parameter groups.
                if len(bucket) > 0:
                    # Append the current bucket to the list of bucket groups.
                    bucket_groups.append(ParameterGroup(bucket, **basic_attrs))
                # Create a new bucket for the parameter.
                bucket_groups.append(ParameterGroup([param], **basic_attrs))
                bucket = []
                continue

            # Append the parameter to the current bucket.
            bucket.append(param)
            # If the current bucket has reached the suggested bucket size,
            # append the bucket as a parameter group to the list of bucket groups
            # and create a new bucket. Used to control the size of parameter
            # groups that are not members of FSDP unit modules.
            if (
                group.fsdp_unit_id is None
                and suggested_bucket_size
                and sum([to_local_if_dtensor(p).shape.numel() for p in bucket])
                >= suggested_bucket_size
            ):
                # Create a new parameter group from a subset of the original
                # parameter group's parameters.
                bucket_groups.append(ParameterGroup(bucket, **basic_attrs))
                bucket = []
                continue

        # Append the parameter group bucket to the list of bucket groups.
        if bucket:
            bucket_groups.append(ParameterGroup(bucket, **basic_attrs))

    # Step 3: Split parameter groups to meet communication segmentation requirements.
    new_bucket_groups = []
    for group in bucket_groups:
        params = sorted(
            group.params, key=lambda p: to_local_if_dtensor(p).shape[1:].numel(), reverse=True
        )
        while len(params) > 0:
            chunk_size_factor = to_local_if_dtensor(params[0]).shape[1:].numel()
            same_factor_params = []
            remaining_params = []
            for param in params:
                param_shape = to_local_if_dtensor(param).shape
                if (
                    param_shape[1:].numel() == chunk_size_factor
                    or (
                        chunk_size_factor % param_shape[1:].numel() == 0
                        and param_shape.numel() % chunk_size_factor == 0
                    )
                    or (param_shape.numel() < chunk_size_factor)
                ):
                    same_factor_params.append(param)
                else:
                    lcm_chunk_size_factor = math.lcm(chunk_size_factor, param_shape[1:].numel())
                    chunk_size_factor = lcm_chunk_size_factor
                    same_factor_params.append(param)
            # Create a new parameter group with the same chunk size factor.
            new_bucket_groups.append(
                ParameterGroup(
                    same_factor_params,
                    dtype=group.dtype,
                    is_expert_param=group.is_expert_param,
                    requires_grad=group.requires_grad,
                    fsdp_unit_id=group.fsdp_unit_id,
                    chunk_size_factor=chunk_size_factor,
                )
            )
            params = remaining_params
    bucket_groups = new_bucket_groups

    # Map each parameter to its bucket group ID.
    param_to_param_group = {}
    for group_id, group in enumerate(bucket_groups):
        for param in group.params:
            param_to_param_group[param] = group_id

    # Step 4: Generate the groups of collective buckets, where each group aggregates
    # the collectives per FSDP unit. This improves performance by reducing
    # the number of collective calls and increasing per-collective efficiency.
    bucket_to_bucket_group = {}
    # This initializes the mapping from bucket ID to the full group of bucket IDs
    # that are associated with this bucket ID.
    for bucket_id in range(len(bucket_groups)):
        # Every bucket group associated with a bucket ID should contain the bucket ID.
        bucket_to_bucket_group[bucket_id] = [bucket_id]

    # Set aggregate buckets by FSDP units, i.e. buckets pertaining to the same
    # FSDP unit module and are either expert or non-expert parameters should
    # end up in the same bucket group for NCCL.
    # Non-FSDP unit parameters will be assigned to the identity bucket group.
    if bucket_group_by_fsdp_unit:
        bucket_group_map = {}

        # Assign bucket IDs to bucket groups from the same FSDP unit module.
        for bucket_id, param_group in enumerate(bucket_groups):
            if param_group.fsdp_unit_id is None:
                # Ignore parameter groups without FSDP unit IDs.
                # These come from the parameter group processing loop
                # which loops over all module parameters and groups by
                # everything else if the fsdp_unit_id is not set.
                continue
            # Create an FSDP unit ID sub-classified by expert / non-expert parameters.
            # Then index this pair in bucket_group_map.
            id = (param_group.fsdp_unit_id, param_group.is_expert_param)
            if id not in bucket_group_map:
                bucket_group_map[id] = []
            bucket_group_map[id].append(bucket_id)

        # For each aggregated bucket group based on FSDP unit module and parameter type,
        # overwrite the previously initialized bucket group associated with the bucket ID.
        for bucket_group in bucket_group_map.values():
            for bucket_id in bucket_group:
                bucket_to_bucket_group[bucket_id] = bucket_group

    # Return the full list of split bucket / parameter groups, the mapping from
    # parameters to their bucket group ID, and the mapping from bucket ID to the
    # full group of bucket IDs that are NCCL-aggregated with this bucket ID.
    return (bucket_groups, param_to_param_group, bucket_to_bucket_group)


class ParamAndGradBuffer:
    """A class that manages parameter grouping, buffer allocation, and
    communication operations for data-parallel distributed training.

    This class provides functionality to:
    1.  Group parameters based on their data types and communication group sizes.
    2.  Create contiguous buffers for model weights, gradients, and high-precision
        main weights.
    3.  Handle parameter unsharding, gradient reduction, and weight
        synchronization operations.

    Key Features:
        - Efficient parameter grouping based on data types and communication patterns
        - Memory-efficient contiguous buffer allocation
        - Support for mixed-precision training with main weights
        - Distributed operations including parameters all-gather and gradients
            reduce-scatter/all-reduce
        - Synchronized weight updates between model and main weights

    Note:
        This class is designed for distributed training scenarios where efficient
        parameter management and communication are crucial for performance.

    Args:
        ddp_config (DistributedDataParallelConfig): The distributed data parallel
            configuration.
        module (torch.nn.Module): The module whose parameters are to be grouped
            and flatten.
        bucketing_policy (BucketingPolicy): The bucketing policy.
        dist_index (FSDPDistributedIndex): FSDPDistributedIndex object containing references
            to the process groups and device meshes used by Megatron-FSDP.
        mixed_precision_policy (megatron_fsdp.MixedPrecisionPolicy): Configuration for
            mixed-precision customization of compute and communications in Megatron-FSDP.
        gradient_scaling_factor (Optional[float]): The gradient scaling factor.
        expert_gradient_scaling_factor (Optional[float]): The expert gradient
            scaling factor.
        device (torch.device): The parameter and gradient buffer device.
        only_create_grad_buffer_and_main_weight_buffer_for_param_requires_grad (bool):
            Whether to only create the gradient buffer and main weight buffer
            for parameters that require gradients. Default is True.
    """

    def __init__(
        self,
        ddp_config: DistributedDataParallelConfig,
        module: torch.nn.Module,
        bucketing_policy: BucketingPolicy,
        dist_index: FSDPDistributedIndex,
        mixed_precision_policy: MixedPrecisionPolicy = MixedPrecisionPolicy(),
        gradient_scaling_factor: Optional[float] = None,
        expert_gradient_scaling_factor: Optional[float] = None,
        device: torch.device = torch.device("cuda"),
        only_create_grad_buffer_and_main_weight_buffer_for_param_requires_grad: bool = True,
        reset_parameters_for_meta_device_init_module: bool = False,
    ):
        # Ensure consistent data parallel sharding settings across configurations
        # This assignment aligns the 'bucketing_policy' setting with the 'ddp_config'
        bucketing_policy.data_parallel_sharding_strategy = (
            ddp_config.data_parallel_sharding_strategy
        )

        self.ddp_config = ddp_config
        self.module = module
        self.bucketing_policy = bucketing_policy
        self.param_to_name = {p: name for name, p in self.module.named_parameters()}
        self.mp_policy = mixed_precision_policy
        self.dist_index = dist_index
        self.params = list(module.parameters())
        self.gradient_scaling_factor = gradient_scaling_factor
        self.expert_gradient_scaling_factor = expert_gradient_scaling_factor
        self.device = device
        self.only_create_grad_buffer_and_main_weight_buffer_for_param_requires_grad = (
            only_create_grad_buffer_and_main_weight_buffer_for_param_requires_grad
        )
        self.reset_parameters_for_meta_device_init_module = (
            reset_parameters_for_meta_device_init_module
        )
        self.ubr_groups = None
        self.already_registered = False
        # User buffer registration related settings
        if self.ddp_config.nccl_ub:
            assert nccl_allocator is not None, (
                "To use user buffer registration, "
                "either requires megatron.core.nccl_allocator or apex.contrib.nccl_allocator"
            )
            # Since the user buffer registration requires (non-dynamic) persistent memory,
            # it always uses fsdp double buffer.
            self.ddp_config.fsdp_double_buffer = True
            # Initialize the NCCL memory pool.
            global NCCL_MEMORY_POOL
            # Initialize NCCL allocator runtime if available
            nccl_allocator.init()
            NCCL_MEMORY_POOL = nccl_allocator.create_nccl_mem_pool(
                symmetric=not self.ddp_config.disable_symmetric_registration
            )
            log_single_rank(
                logger,
                logging.INFO,
                f"[Rank {torch.distributed.get_rank()}] Created NCCL memory pool for "
                "UserBuffer Registration",
            )
            log_single_rank(
                logger,
                logging.INFO,
                f"[Rank {torch.distributed.get_rank()}] FSDP double buffer is enabled.",
            )
            # Select the communicator groups to register FSDP buffers.
            self.ubr_groups = [self.dist_index.get_fsdp_group(is_expert_parallel=False)]
            if self.dist_index.get_fsdp_group(is_expert_parallel=True) is not None:
                # Expert-DP group when using EP
                self.ubr_groups.append(self.dist_index.get_fsdp_group(is_expert_parallel=True))
            if self.dist_index.get_outer_fsdp_group() is not None:
                # Outer/Inter-FSDP group when using hybrid FSDP
                self.ubr_groups.append(self.dist_index.get_outer_fsdp_group())
            if (
                self.dist_index.get_fsdp_group(
                    is_expert_parallel=False, independent_all_gather=True
                )
                is not None
            ):
                # All-gather group used when overlapping all-gather and gradient reduction.
                self.ubr_groups.append(
                    self.dist_index.get_fsdp_group(
                        is_expert_parallel=False, independent_all_gather=True
                    )
                )

            log_single_rank(
                logger,
                logging.INFO,
                f"[ParamAndGradBuffer] FSDP UBRegistration Groups ({len(self.ubr_groups)}):",
            )
            # All ranks in each group must participate in the collective to avoid deadlock.
            for i, group in enumerate(self.ubr_groups):
                log_single_rank(
                    logger,
                    logging.INFO,
                    f"Group [{i+1}/{len(self.ubr_groups)}] "
                    f"group.group_desc: {group.group_desc}, group.size(): {group.size()}",
                )
                torch.distributed.barrier(group=group, async_op=False)
                log_single_rank(
                    logger,
                    logging.INFO,
                    f"Call Success with the group [{i+1}/{len(self.ubr_groups)}] "
                    f"group.group_desc: {group.group_desc}",
                )
            # Call barrier from the global communitcator group
            torch.distributed.barrier(async_op=False)
            log_single_rank(logger, logging.INFO, "Call Success with the global communicator group")

        # If using nccl_ub, it returns a function that registers buffers to the NCCL memory pool
        # Buffer is registered to data_parallel_group and expert_data_parallel_group if it exists
        # In the case of not using nccl_ub, it returns a nullcontext
        self.mem_alloc_context = self.get_mem_alloc_context(
            groups=self.ubr_groups, symmetric=not self.ddp_config.disable_symmetric_registration
        )

        # Mark FP8 params. If TransformerEngine is not installed, we can skip this.
        meta_device_init_fp8_params = {}
        if reset_parameters_for_meta_device_init_module and HAVE_TE:
            for m in module.modules():
                if not isinstance(m, TransformerEngineBaseModule):
                    continue
                for name, param in m.named_parameters(recurse=False):
                    # The fp8 param initialized from the meta device may NOT be
                    # an fp8 tensor, according to the internal logic of the TE
                    # to determine whether this parameter is fp8 or not.
                    fp8_meta_index = m.param_init_meta[name].fp8_meta_index
                    if m.primary_weights_in_fp8 and fp8_meta_index is not None:
                        meta_device_init_fp8_params[self.param_to_name[param]] = (
                            True,
                            fp8_need_transpose_data_for_meta_device_init(m),
                        )

        # Get the parameter groups.
        (self.parameter_groups, self.param_to_param_group, self.bucket_to_bucket_group) = (
            _get_parameter_groups(module, bucketing_policy, meta_device_init_fp8_params)
        )
        self._init_each_parameter_group_buffers(meta_device_init_fp8_params)
        self._init_distributed_params()

        # Initialize the optimizer named parameters.
        self.optimizer_named_parameters = self._init_optimizer_named_parameters()

        self._log_parameter_groups()

    def get_mem_alloc_context(self, groups=None, symmetric=True):
        """
        Get the memory allocation context for the parameter and gradient buffers.
        """
        if self.ddp_config.nccl_ub:
            assert nccl_allocator is not None, (
                "To use user buffer registration, "
                "either requires megatron.core.nccl_allocator or apex.contrib.nccl_allocator"
            )
            global NCCL_MEMORY_POOL
            if groups is None:
                # data parallel group is a default group for user buffer registration
                groups = [self.dist_index.get_fsdp_group(is_expert_parallel=False)]

            if NCCL_ALLOCATOR == "MCORE":
                if self.ddp_config.fsdp_manual_registration:
                    return functools.partial(
                        nccl_allocator.MemPoolAllocatorWithoutRegistration, NCCL_MEMORY_POOL
                    )
                if len(groups) == 1:
                    # register buffers to the default group directly using nccl memory allocator
                    mem_alloc_context = functools.partial(
                        nccl_allocator.nccl_mem,
                        NCCL_MEMORY_POOL,
                        group=groups[0],
                        symmetric=symmetric,
                    )
                else:
                    mem_alloc_context = functools.partial(
                        nccl_allocator.MultiGroupMemPoolAllocator,
                        NCCL_MEMORY_POOL,
                        groups=groups,
                        symmetric=symmetric,
                    )
            elif NCCL_ALLOCATOR == "APEX":
                if self.ddp_config.fsdp_manual_registration:
                    logging.warning(
                        "FSDP manual registration is not supported for APEX NCCL allocator."
                        "falling back to default registration. "
                        "Please use Megatron Core NCCL allocator for manual registration."
                    )
                if symmetric:
                    logging.warning(
                        "Symmetric registration is not supported for APEX NCCL allocator."
                        "falling back to non-symmetric registration. "
                        "Please use Megatron Core NCCL allocator for symmetric registration."
                    )

                if len(groups) == 1:
                    # register buffers to the default group directly using nccl memory allocator
                    mem_alloc_context = functools.partial(
                        nccl_allocator.nccl_mem, NCCL_MEMORY_POOL, group=groups[0]
                    )
                else:
                    # Supports multiple groups registration for APEX NCCL allocator.
                    mem_alloc_context = functools.partial(
                        MultiGroupUBRAllocator, NCCL_MEMORY_POOL, groups=groups
                    )
            else:
                raise ValueError(f"Invalid NCCL allocator: {NCCL_ALLOCATOR}")
            return mem_alloc_context
        else:
            return nullcontext

    def manual_buffer_registration(self):
        """
        Manually register the FSDP communication buffers to NCCL user buffer.
        """
        assert self.ddp_config.nccl_ub, "NCCL UBR is not enabled"
        assert self.ddp_config.fsdp_double_buffer, "FSDP double buffer is not enabled"
        assert self.ddp_config.fsdp_manual_registration, "FSDP manual registration is not enabled"
        assert not self.already_registered, "Mem pool is already registered"

        self.already_registered = True

        global NCCL_MEMORY_POOL
        torch.cuda.synchronize()
        torch.distributed.barrier(async_op=False)
        torch.cuda.synchronize()

        for group in self.ubr_groups:
            log_single_rank(
                logger,
                logging.INFO,
                f"[MCORE][FSDP][Manual REG] Registering mem pool to group {group},"
                f"group.group_desc:{group.group_desc}, group.size(): {group.size()}",
            )
            nccl_allocator.register_mem_pool(
                NCCL_MEMORY_POOL,
                group,
                symmetric=not self.ddp_config.disable_symmetric_registration,
            )
            log_single_rank(
                logger,
                logging.INFO,
                f"[MCORE][FSDP][Manual REG] Registered mem pool to group {group},"
                f"group.group_desc:{group.group_desc}, group.size(): {group.size()}",
            )

    def _log_parameter_groups(self):
        """Compact log of FSDP parameter groups and their parameters."""

        def _bytes_to_mb(bytes_val: int) -> str:
            return f"{bytes_val / 1_000_000:.2f} MB"

        bucket_groups = self.parameter_groups
        param_to_name = self.param_to_name
        total_padded_bytes = 0
        total_comm_bytes = 0
        log_lines = [f"Number of FSDP Parameter Groups: {len(bucket_groups)}"]

        for idx, group in enumerate(bucket_groups):
            numel = sum(to_local_if_dtensor(p).shape.numel() for p in group.params)
            buffers = {
                "weight": group.model_weight_buffer,
                "transpose_weight": group.transpose_weight_buffer,
                "main_weight": group.main_weight_buffer,
                "grad": group.main_grad_buffer,
            }
            group_padded = 0
            group_comm = 0
            buf_flags = []
            for k, buf in buffers.items():
                if buf:
                    elem_size = _dtype_size(buf.dtype)
                    group_padded += (buf.bucket_index.size - numel) * elem_size
                    group_comm += buf.bucket_index.size * elem_size
                    buf_flags.append(k)
            total_padded_bytes += group_padded
            total_comm_bytes += group_comm

            # One-line summary for the group
            log_lines.append(
                f"[FSDP_UNIT {group.fsdp_unit_id}] Group {idx}: elems={numel} dtype={group.dtype} "
                f"bufs={','.join(buf_flags) or 'None'} pad={_bytes_to_mb(group_padded)}"
            )
            # List parameters below
            for param in group.params:
                log_lines.append(f"\t{param_to_name[param]} {tuple(param.shape)}")

        # Add summary
        log_lines.append(
            f"Total comm: {_bytes_to_mb(total_comm_bytes)}, "
            f"Total pad: {_bytes_to_mb(total_padded_bytes)}"
        )

        log_single_rank(logger, logging.INFO, "\n".join(log_lines))

    def _init_each_parameter_group_buffers(self, meta_device_init_fp8_params):
        """
        Initialize the buffers for each parameter group.
        """
        # FSDP Sharding Strategy: no-shard, optim, optim_grads, optim_grads_params
        data_parallel_sharding_strategy = self.ddp_config.data_parallel_sharding_strategy
        if data_parallel_sharding_strategy == "no_shard":
            is_model_weight_buffer_distributed = False
            is_main_weight_buffer_distributed = False
            is_grad_buffer_distributed = False
        elif data_parallel_sharding_strategy == "optim":
            is_model_weight_buffer_distributed = False
            is_main_weight_buffer_distributed = True
            is_grad_buffer_distributed = False
        elif data_parallel_sharding_strategy == "optim_grads":
            is_model_weight_buffer_distributed = False
            is_main_weight_buffer_distributed = True
            is_grad_buffer_distributed = True
        elif data_parallel_sharding_strategy == "optim_grads_params":
            is_model_weight_buffer_distributed = True
            is_main_weight_buffer_distributed = True
            is_grad_buffer_distributed = True
        else:
            raise ValueError(
                f"Invalid data_parallel_sharding_strategy: {data_parallel_sharding_strategy}"
            )

        # Only create HSDP buffers if sharding on DP-Outer. Otherwise, no need to all-gather
        # parameters on DP-Outer, but still need to all-reduce gradients on DP-Outer.
        should_create_hfsdp_wbuf_and_gbuf = (
            self.dist_index.use_hybrid_fsdp
            and self.ddp_config.outer_dp_sharding_strategy != "no_shard"
        )
        # DP-Outer sharding is only supported for fully-sharded DP-Shard.
        # NOTE(@cspades): Important guard for HFSDP functionality!
        if (
            should_create_hfsdp_wbuf_and_gbuf
            and self.ddp_config.data_parallel_sharding_strategy != "optim_grads_params"
        ):
            raise NotImplementedError(
                "[Megatron-FSDP] Optimizer fully-sharded HFSDP is only supported "
                "with full-sharding on DP-Shard.\nMegatron-FSDP DP-Shard Strategy: "
                f"{self.ddp_config.data_parallel_sharding_strategy}\nMegatron-FSDP "
                f"DP-Outer Strategy: {self.ddp_config.outer_dp_sharding_strategy}"
            )

        if self.ddp_config.nccl_ub:
            assert self.ddp_config.fsdp_double_buffer, (
                "NCCL UB is only supported with FSDP double buffer. "
                "Please set fsdp_double_buffer=True in the ddp config."
            )
        if self.ddp_config.fsdp_double_buffer and len(self.bucketing_policy.fsdp_unit_modules) > 0:
            UB_BUFFER_NUM = 2
            self.weight_alloc = FixedPoolAllocator(
                name="fsdp_params",
                fsdp_param_groups=self.parameter_groups,
                size=UB_BUFFER_NUM,
                fallback_to_persistent_buffer=self.ddp_config.fsdp_db_use_persist_buf_on_alloc_fail,
            )
            self.transpose_weight_alloc = FixedPoolAllocator(
                name="fsdp_fp8_transpose_params",
                fsdp_param_groups=self.parameter_groups,
                size=UB_BUFFER_NUM,
            )
            self.main_grad_alloc = FixedPoolAllocator(
                name="fsdp_grads",
                fsdp_param_groups=self.parameter_groups,
                size=UB_BUFFER_NUM,
                fallback_to_persistent_buffer=(
                    self.ddp_config.fsdp_db_use_persist_buf_on_alloc_fail
                ),
            )
            if self.dist_index.use_hybrid_fsdp:
                # Only required for custom communication dtype buffer allocation
                # to leverage NCCL UBR for high-precision gradient reduction with
                # low-precision gradient communication over DP-Outer for H(F)SDP.
                # Otherwise, this allocator will never be used.
                self.hsdp_grad_comm_alloc = FixedPoolAllocator(
                    name="hsdp_grad_comm",
                    fsdp_param_groups=self.parameter_groups,
                    size=UB_BUFFER_NUM,
                    fallback_to_persistent_buffer=(
                        self.ddp_config.fsdp_db_use_persist_buf_on_alloc_fail
                    ),
                )
            self.double_buf_units = self.weight_alloc.fsdp_double_buffer_units
        else:
            self.weight_alloc = StorageResizeBasedBucketAllocator()
            self.transpose_weight_alloc = StorageResizeBasedBucketAllocator()
            self.main_grad_alloc = None
            if self.dist_index.use_hybrid_fsdp:
                # Only required for custom communication dtype buffer allocation
                # for low-precision gradient communication over DP-Outer for H(F)SDP.
                # Otherwise, this allocator will never be used.
                self.hsdp_grad_comm_alloc = None
            self.double_buf_units = []

        self.buffer_all_in_one = True
        buffer_size = {torch.float32: 0, torch.float16: 0, torch.bfloat16: 0, "float8": 0}

        # For all bucket groups (partitioned parameter groups)...
        for group_id, group in enumerate(self.parameter_groups):
            main_buf_extra_kwargs = {}
            if should_create_hfsdp_wbuf_and_gbuf:
                # DP-Outer + DP-Shard
                main_buf_dp_group = self.dist_index.get_dp_group(
                    is_expert_parallel=group.is_expert_param
                )
                # DP-Shard
                hsdp_buf_dp_group = self.dist_index.get_fsdp_group(
                    is_expert_parallel=group.is_expert_param
                )
                main_buf_extra_kwargs["dp_rank"] = self.dist_index.get_logical_hybrid_fsdp_rank(
                    is_expert_parallel=group.is_expert_param
                )
            else:
                # DP-Shard only, since we're not sharding on DP-Outer.
                main_buf_dp_group = self.dist_index.get_fsdp_group(
                    is_expert_parallel=group.is_expert_param
                )

            # When --create-all-gather-group is enabled, use a separate process group for
            # all-gather operations (model_weight_buffer) to enable overlap with gradient reduction
            # operations (main_grad_buffer). This avoids head-of-line blocking between forward
            # all-gather and backward reduce-scatter on the same communicator.
            model_wbuf_dp_group = main_buf_dp_group
            if not group.is_expert_param and not should_create_hfsdp_wbuf_and_gbuf:
                ag_group = self.dist_index.get_fsdp_group(
                    is_expert_parallel=False, independent_all_gather=True
                )
                if ag_group is not None:
                    model_wbuf_dp_group = ag_group

            gradient_scaling_factor = (
                self.gradient_scaling_factor
                if not group.is_expert_param
                else self.expert_gradient_scaling_factor
            )
            # Check if the parameter group is FP8.
            one_param = group.params[0]
            is_dtype_float8 = (
                is_float8tensor(one_param)
                or meta_device_init_fp8_params.get(self.param_to_name[one_param], (False, False))[0]
            )

            # Designate buffer data-types for compute parameters and main gradients.
            if is_dtype_float8:
                param_dtype = torch.uint8
                main_grads_dtype = torch.bfloat16
            else:
                param_dtype = group.params[0].dtype
                main_grads_dtype = param_dtype
            # Use a custom main gradient data-type.
            if self.mp_policy.main_grads_dtype is not None:
                main_grads_dtype = self.mp_policy.main_grads_dtype

            # Check if the parameter group needs a transpose buffer for model weights.
            # Currently, only mxfp8 needs it.
            need_transpose_data = is_float8tensor(one_param) and fp8_need_transpose_data(one_param)
            need_transpose_data_for_meta_device_init = meta_device_init_fp8_params.get(
                self.param_to_name[one_param], (False, False)
            )[1]
            should_create_transpose_weight_buffer = (
                need_transpose_data or need_transpose_data_for_meta_device_init
            )

            # Check if the parameter group requires a grad buffer or main weight buffer.
            should_create_grad_buffer_or_main_weight_buffer = (
                not self.only_create_grad_buffer_and_main_weight_buffer_for_param_requires_grad
                or group.requires_grad
            )

            # Initialize the model weight buffer from bucket parameters.
            if data_parallel_sharding_strategy != "no_shard":
                group.model_weight_buffer = DataParallelBuffer(
                    self.ddp_config,
                    group.params,
                    is_data_distributed=is_model_weight_buffer_distributed
                    and model_wbuf_dp_group.size() > 1,
                    dtype=param_dtype,
                    device=self.device,
                    # Note: This will be DP-Outer + DP-Shard when sharding
                    # the optimizer state in HFSDP, else just DP-Shard when
                    # using basic HSDP or FSDP.
                    data_parallel_group=model_wbuf_dp_group,
                    is_transpose_buffer=False,
                    temporary_bucket_allocator=self.weight_alloc,
                    bucket_id=group_id,
                    chunk_size_factor=group.chunk_size_factor,
                    mem_alloc_context=self.mem_alloc_context,
                    **main_buf_extra_kwargs,
                )
                if should_create_transpose_weight_buffer:
                    group.transpose_weight_buffer = DataParallelBuffer(
                        self.ddp_config,
                        group.params,
                        is_data_distributed=is_model_weight_buffer_distributed
                        and main_buf_dp_group.size() > 1,
                        dtype=param_dtype,
                        device=self.device,
                        data_parallel_group=main_buf_dp_group,
                        is_transpose_buffer=True,
                        temporary_bucket_allocator=self.transpose_weight_alloc,
                        bucket_id=group_id,
                        chunk_size_factor=group.chunk_size_factor,
                        mem_alloc_context=self.mem_alloc_context,
                        **main_buf_extra_kwargs,
                    )

            # Initialize the main weight buffer if a main weight data-type is specified.
            # Otherwise, don't create this buffer, and use the model compute weight buffer instead.
            if (
                should_create_grad_buffer_or_main_weight_buffer
                and self.mp_policy.main_params_dtype is not None
            ):
                group.main_weight_buffer = DataParallelBuffer(
                    self.ddp_config,
                    group.params,
                    is_data_distributed=is_main_weight_buffer_distributed
                    and main_buf_dp_group.size() > 1,
                    dtype=self.mp_policy.main_params_dtype,
                    device=self.device,
                    data_parallel_group=main_buf_dp_group,
                    bucket_id=group_id,
                    chunk_size_factor=group.chunk_size_factor,
                    mem_alloc_context=self.mem_alloc_context,
                    **main_buf_extra_kwargs,
                )

            # Initialize the main grad buffer.
            if should_create_grad_buffer_or_main_weight_buffer:
                assert (
                    main_grads_dtype.is_floating_point
                ), f"Main gradient dtype ({main_grads_dtype}) must be Float."
                group.main_grad_buffer = DataParallelBuffer(
                    self.ddp_config,
                    # Proxy because the number of gradient parameters is the same
                    # as the number of model parameters.
                    group.params,
                    is_data_distributed=is_grad_buffer_distributed and main_buf_dp_group.size() > 1,
                    dtype=main_grads_dtype,
                    device=self.device,
                    # Note: This will be DP-Outer + DP-Shard when sharding
                    # the optimizer state in HFSDP, else just DP-Shard when
                    # using basic HSDP or FSDP.
                    data_parallel_group=main_buf_dp_group,
                    is_transpose_buffer=False,
                    temporary_bucket_allocator=self.main_grad_alloc,
                    gradient_scaling_factor=gradient_scaling_factor,
                    bucket_id=group_id,
                    chunk_size_factor=group.chunk_size_factor,
                    mem_alloc_context=self.mem_alloc_context,
                    **main_buf_extra_kwargs,
                )

                # Track number of elements in the main grad buffer by dtype.
                buffer_size[group.main_grad_buffer.dtype] += group.main_grad_buffer.data_size

            # Initialize the HSDP weight and grad buffers if hsdp full sharding is enabled.
            if should_create_hfsdp_wbuf_and_gbuf:
                # Initialize the HSDP weight buffer.
                wbuf = group.model_weight_buffer
                group.hsdp_wbuf = DataParallelBuffer(
                    self.ddp_config,
                    group.params,
                    is_data_distributed=is_main_weight_buffer_distributed
                    and hsdp_buf_dp_group.size() > 1,
                    dtype=wbuf.dtype,
                    device=wbuf.device,
                    data_parallel_group=hsdp_buf_dp_group,
                    is_transpose_buffer=False,
                    temporary_bucket_allocator=self.weight_alloc,
                    bucket_id=group_id,
                    chunk_size_factor=group.chunk_size_factor,
                    mem_alloc_context=self.mem_alloc_context,
                    item_index_map=wbuf.item_index_map,
                    bucket_index=wbuf.bucket_index,
                    shard_bucket_index=_get_dp_buffer_shard_bucket_index(
                        wbuf.bucket_index,
                        is_data_distributed=is_main_weight_buffer_distributed
                        and hsdp_buf_dp_group.size() > 1,
                        data_parallel_world_size=hsdp_buf_dp_group.size(),
                        data_parallel_rank=hsdp_buf_dp_group.rank(),
                    ),
                )

                if group.transpose_weight_buffer is not None:
                    # TODO(@kunlunl, @cspades): Create a hybrid-sharded transpose buffer
                    # to map fully-sharded transpose weights to partially-sharded transpose
                    # weights before and after fully-distributed optimization.
                    raise NotImplementedError(
                        "HFSDP (HSDP + fully-sharded optimizer state) doesn't "
                        "support FP8 recipes that require a transpose buffer."
                    )

                if should_create_grad_buffer_or_main_weight_buffer:
                    # Initialize the HSDP grad buffer.
                    gbuf = group.main_grad_buffer
                    group.hsdp_gbuf = DataParallelBuffer(
                        self.ddp_config,
                        group.params,
                        is_data_distributed=is_grad_buffer_distributed
                        and hsdp_buf_dp_group.size() > 1,
                        dtype=gbuf.dtype,
                        device=gbuf.device,
                        data_parallel_group=hsdp_buf_dp_group,
                        is_transpose_buffer=False,
                        temporary_bucket_allocator=self.main_grad_alloc,
                        gradient_scaling_factor=gradient_scaling_factor,
                        bucket_id=group_id,
                        chunk_size_factor=group.chunk_size_factor,
                        mem_alloc_context=self.mem_alloc_context,
                        item_index_map=gbuf.item_index_map,
                        bucket_index=gbuf.bucket_index,
                        shard_bucket_index=_get_dp_buffer_shard_bucket_index(
                            gbuf.bucket_index,
                            is_data_distributed=is_grad_buffer_distributed
                            and hsdp_buf_dp_group.size() > 1,
                            data_parallel_world_size=hsdp_buf_dp_group.size(),
                            data_parallel_rank=hsdp_buf_dp_group.rank(),
                        ),
                    )
                    buffer_size[group.main_grad_buffer.dtype] -= group.main_grad_buffer.data_size
                    buffer_size[group.main_grad_buffer.dtype] += group.hsdp_gbuf.data_size

            # Only create an extra grad comm buffer for HSDP.
            if should_create_grad_buffer_or_main_weight_buffer and self.dist_index.use_hybrid_fsdp:
                # Create a (DP-Shard)-sized gradient communication buffer that manages
                # allocation for custom gradient communication data-types during runtime.
                # If gradient communication data-type customization is not used, the
                # allocator associated with this buffer will never allocate memory.
                # This buffer will never initialize local data, i.e. self.data = None.
                gbuf = group.main_grad_buffer
                fsdp_group = self.dist_index.get_fsdp_group(
                    is_expert_parallel=group.is_expert_param
                )
                hfsdp_kwargs = {}
                if should_create_hfsdp_wbuf_and_gbuf:
                    hfsdp_kwargs["item_index_map"] = gbuf.item_index_map
                    hfsdp_kwargs["bucket_index"] = gbuf.bucket_index
                    hfsdp_kwargs["shard_bucket_index"] = _get_dp_buffer_shard_bucket_index(
                        gbuf.bucket_index,
                        is_data_distributed=is_grad_buffer_distributed and fsdp_group.size() > 1,
                        data_parallel_world_size=fsdp_group.size(),
                        data_parallel_rank=fsdp_group.rank(),
                    )
                group.hsdp_comm_gbuf = DataParallelBuffer(
                    self.ddp_config,
                    group.params,
                    is_data_distributed=is_grad_buffer_distributed and fsdp_group.size() > 1,
                    # Set allocation to grad_comm_dtype, or default to param(.grad).dtype.
                    dtype=self.mp_policy.grad_comm_dtype,
                    device=gbuf.device,
                    data_parallel_group=fsdp_group,
                    is_transpose_buffer=False,
                    # Use the HSDP gradient communication allocator!
                    temporary_bucket_allocator=self.hsdp_grad_comm_alloc,
                    bucket_id=group_id,
                    chunk_size_factor=group.chunk_size_factor,
                    mem_alloc_context=self.mem_alloc_context,
                    **hfsdp_kwargs,
                )

        reset_context_args = {"init_param_with_fp8": self.ddp_config.fp8_param_gather}
        module_reset_flag = {}
        if self.reset_parameters_for_meta_device_init_module:
            self.param_to_direct_module = {}
            for name, m in self.module.named_modules():
                for p in m.parameters(recurse=False):
                    self.param_to_direct_module[p] = (name, m)

            meta_params_numel = 0
            cuda_params_numel = 0
            cpu_params_numel = 0
            for group in self.parameter_groups:
                for p in group.params:
                    p_numel = to_local_if_dtensor(p).shape.numel()
                    if p.is_meta:
                        meta_params_numel += p_numel
                    elif p.device.type == "cuda":
                        cuda_params_numel += p_numel
                    else:
                        cpu_params_numel += p_numel
            log_str = (
                f"Meta params numel: {meta_params_numel / 1_000_000:.2f} M, "
                f"CUDA params numel: {cuda_params_numel / 1_000_000:.2f} M, "
                f"CPU params numel: {cpu_params_numel / 1_000_000:.2f} M"
            )
            log_single_rank(logger, logging.INFO, log_str)

        # Initialize the model weight buffer data of each parameter group.
        # Specifically, replace the Torch module's parameter data with tensors
        # whose memory managed by the model weight buffer, and store a shard
        # of all the parameters across ranks in the model weight buffer.
        for group in self.parameter_groups:
            wbuf = group.model_weight_buffer
            if wbuf:
                with self.mem_alloc_context():
                    if group.hsdp_wbuf:
                        # When using HSDP, the hybrid-sharded buffer shards across the FSDP group,
                        # while the main buffer shards across the larger / more granular DP group.
                        # The main weight buffer data is a shard of the hybrid-sharded buffer data.
                        # Because the hybrid buffer data is persistently allocated, the weight and
                        # gradient memory footprint is similar to not sharding on DP-Outer, i.e.
                        # replicating on DP-Outer. However, optimizer states based on main buffer
                        # weights (self.dist_main_weight) and gradients (self.dist_main_grad) will
                        # be sharded persistently upon initialization.
                        hsdp_wbuf = group.hsdp_wbuf
                        hsdp_wbuf.init_data(
                            torch.empty(
                                hsdp_wbuf.data_size, dtype=hsdp_wbuf.dtype, device=self.device
                            )
                        )
                        outer_fsdp_group = self.dist_index.get_outer_fsdp_group()
                        wbuf_data = hsdp_wbuf.data[
                            # Requires FSDP sharding for (DP-Shard, DP-Outer) to cover DP-Shard.
                            wbuf.data_size
                            * outer_fsdp_group.rank() : wbuf.data_size
                            * (outer_fsdp_group.rank() + 1)
                        ]
                        wbuf.init_data(wbuf_data)
                    else:
                        # When not using HSDP, the main buffer shards across the FSDP group.
                        wbuf.init_data(
                            torch.empty(wbuf.data_size, dtype=wbuf.dtype, device=self.device)
                        )
                bucket = wbuf.fetch_bucket()

            tbuf = group.transpose_weight_buffer
            if tbuf:
                with self.mem_alloc_context():
                    if group.hsdp_wbuf:
                        raise NotImplementedError(
                            "HFSDP (HSDP + fully-sharded optimizer state) doesn't "
                            "support FP8 recipes that require a transpose buffer."
                        )
                    else:
                        # Initialize the transpose buffer.
                        tbuf.init_data(
                            torch.empty(tbuf.data_size, dtype=tbuf.dtype, device=self.device)
                        )
                transpose_bucket = tbuf.fetch_bucket()

            mbuf = group.main_weight_buffer
            if mbuf:
                # Manually instantiate an empty tensor into the main weight buffer.
                mbuf.init_data(torch.empty(mbuf.data_size, dtype=mbuf.dtype, device=self.device))
            for item_id, p in enumerate(group.params):
                # Model Weight (Low-Precision) Buffer Initialization
                if wbuf:
                    if self.reset_parameters_for_meta_device_init_module and p.is_meta:
                        m_name, m = self.param_to_direct_module[p]
                        # Validate that the module has a reset_parameters or
                        # _reset_parameters method to initialize this meta
                        # parameter (p.is_meta=True).
                        if not hasattr(m, "reset_parameters") and not hasattr(
                            m, "_reset_parameters"
                        ):
                            raise ValueError(
                                f"[init_model_with_meta_device=True] Module {m_name} "
                                "does not have a reset_parameters or _reset_parameters method."
                            )
                        if not module_reset_flag.get(m_name, False):
                            old_params = list(m.parameters(recurse=False))

                            # If the GPU memory over threshold, empty cache to leave
                            # some memory for initialization of the model on the
                            # CUDA device.
                            if check_gpu_memory(threshold=0.5):
                                gc.collect()
                                torch.cuda.empty_cache()

                            m.to_empty(device=self.device, recurse=False)
                            if (
                                HAVE_TE
                                and is_te_min_version("0.9.0")
                                and not isinstance(m, TransformerEngineBaseModule)
                            ):
                                reset_context_args["with_cuda_rng_tracker"] = True
                            with ResetParametersContext(**reset_context_args):
                                # Initialize original model meta parameters.
                                if hasattr(m, "reset_parameters"):
                                    m.reset_parameters()
                                elif hasattr(m, "_reset_parameters"):
                                    m._reset_parameters()
                            module_reset_flag[m_name] = True
                            new_params = list(m.parameters(recurse=False))

                            self._reset_parameters(old_params, new_params)
                            p = group.params[item_id]

                            # After resetting parameters, delete fp8 transpose cache
                            # if we do not need keep cache.
                            if not self.ddp_config.keep_fp8_transpose_cache:
                                for _param in m.parameters(recurse=False):
                                    if is_float8tensor(_param):
                                        fp8_discard_transpose_cache(_param)
                    # Raise error if a meta parameter still exists after initialization.
                    assert not p.is_meta, (self.param_to_name[p], module_reset_flag)

                    p_local = to_local_if_dtensor(p)

                    # Copy the model weight parameter tensor into the buffer.
                    # When distributed, this shards and preserves the data across all ranks.
                    wbuf.set_item(item_id, p_local)
                    if tbuf:
                        tbuf.set_item(item_id, p_local)

                    # Retrieve the newly allocated parameter data from the global bucket.
                    # Attach the bucket-allocated parameter data to the module parameter,
                    # to use the bucket-allocated data for autograd and NCCL.
                    new_param_data = wbuf.get_item_from_bucket(bucket, item_id).view(p_local.shape)
                    if tbuf:
                        new_transpose_data = tbuf.get_item_from_bucket(
                            transpose_bucket, item_id
                        ).view(p_local.shape)
                    else:
                        new_transpose_data = None

                    if is_float8tensor(p_local):
                        # Attach FP8 row-wise data in the FP8 parameter
                        # to slice of the model compute weight bucket.
                        old_param_data = fp8_get_raw_data(p_local)
                        assert old_param_data._base is None
                        new_param_data.detach().copy_(old_param_data)
                        fp8_set_raw_data(p_local, new_param_data)
                        del old_param_data
                        if new_transpose_data is not None:
                            # Attach FP8 col-wise data in the FP8 parameter
                            # to slice of the FP8 transpose bucket.
                            old_transpose_data = fp8_get_raw_data(p_local, True)
                            assert old_transpose_data._base is None
                            new_transpose_data.detach().copy_(old_transpose_data)
                            fp8_set_raw_data(p_local, new_transpose_data, True)
                            del old_transpose_data
                    elif isinstance(p, DTensor):
                        # Same as Tensor case, except for DTensor parameters
                        # in the original model. Tensor = DTensor.to_local().
                        old_param_data = p._local_tensor.data
                        p._local_tensor.data = new_param_data
                        assert old_param_data._base is None
                        p._local_tensor.data.detach().copy_(old_param_data)
                        del old_param_data
                    else:
                        # Detach the bucket-allocated parameter data from the computational graph
                        # before copying the old parameter data into the new parameter data
                        # to prevent backpropagation into a deleted parameter / Tensor.

                        # Copy the values of the original parameter data into the bucket-allocated
                        # parameter data. Detach the module parameter because
                        # parameters that require gradients in the computational
                        # graph do not support in-place operations.
                        old_param_data = p.data
                        p.data = new_param_data
                        assert old_param_data._base is None
                        p.data.detach().copy_(old_param_data)
                        del old_param_data

                # Main Weight (High-Precision) Buffer Initialization
                if mbuf:
                    if hasattr(p, "get_high_precision_init_val"):
                        assert not isinstance(p, DTensor), (
                            self.param_to_name[p],
                            "not support fp8 DTensor.",
                        )
                        # Needed to instantiate FP8 parameters. Requires installing
                        # TransformerEngine.
                        mbuf.set_item(item_id, p.get_high_precision_init_val())
                        p.clear_high_precision_init_val()
                    else:
                        # Insert a copy of the model weight parameter tensor into
                        # the (high-precision) main weight buffer.
                        # Nothing else needs to be done, because the main weights
                        # do not require autograd operations, only possibly sharding.
                        p_local = to_local_if_dtensor(p)
                        assert not is_float8tensor(p_local), (
                            self.param_to_name[p],
                            "fp8 param should use get_high_precision_init_val method.",
                        )
                        mbuf.set_item(item_id, p_local)

            if wbuf and wbuf.is_data_distributed:
                # Free the memory backing the temporarily-allocated bucket associated
                # with this buffer.
                # The module parameters will still reference the (now empty) bucket Tensor.
                # Each rank of the data buffer will persistently store a shard of the module.
                # This reduces the memory footprint of the model in FSDP, such that the only
                # time the entire model's weights are allocated in memory is during initialization,
                # before forward activations and gradients are allocated in training.
                wbuf.free_bucket_storage()

            if tbuf and tbuf.is_data_distributed:
                tbuf.free_bucket_storage()

        # Allocate the main_weight buffer and main_grad buffer data in one buffer.
        if self.buffer_all_in_one:
            with self.mem_alloc_context():
                self.buffer = {
                    torch.float32: torch.empty(
                        buffer_size[torch.float32], dtype=torch.float32, device=self.device
                    ),
                    torch.float16: torch.empty(
                        buffer_size[torch.float16], dtype=torch.float16, device=self.device
                    ),
                    torch.bfloat16: torch.empty(
                        buffer_size[torch.bfloat16], dtype=torch.bfloat16, device=self.device
                    ),
                    "float8": torch.empty(
                        buffer_size["float8"], dtype=torch.uint8, device=self.device
                    ),
                }
            offset = {torch.float32: 0, torch.float16: 0, torch.bfloat16: 0, "float8": 0}

        def _alloc(dtype, size):
            """
            If using a single buffer for main model weights and gradients,
            allocate memory per dtype buffer with size at the current offset.
            Return the allocated slice of the buffer data Tensor.

            If not using a single buffer, then return an empty Tensor on this device.
            """
            if self.buffer_all_in_one:
                if dtype == torch.uint8:
                    dtype = "float8"
                data = self.buffer[dtype][offset[dtype] : offset[dtype] + size]
                offset[dtype] += size
                return data
            return torch.empty(size, dtype=dtype, device=self.device)

        # Main Gradient Buffer Initialization
        for group in self.parameter_groups:
            gbuf = group.main_grad_buffer
            if not gbuf:
                # No gradient sharding.
                continue
            # Allocate the main grad buffer data, and attach it to the main grad buffer.
            with self.mem_alloc_context():
                if group.hsdp_gbuf:
                    # When using HSDP, the hybrid-sharded buffer shards across the FSDP group,
                    # while the main buffer shards across the larger / more granular DP group.
                    # The main weight buffer data is a shard of the hybrid-sharded buffer data.
                    # Because the hybrid buffer data is persistently allocated, the weight and
                    # gradient memory footprint is similar to not sharding on DP-Outer, i.e.
                    # replicating on DP-Outer. However, optimizer states based on main buffer
                    # weights (self.dist_main_weight) and gradients (self.dist_main_grad) will
                    # be sharded persistently upon initialization.
                    hsdp_gbuf = group.hsdp_gbuf
                    hsdp_gbuf.init_data(_alloc(hsdp_gbuf.dtype, hsdp_gbuf.data_size))
                    outer_fsdp_group = self.dist_index.get_outer_fsdp_group()
                    gbuf_data = hsdp_gbuf.data[
                        # Requires FSDP sharding for (DP-Shard, DP-Outer) to cover DP-Shard.
                        gbuf.data_size
                        * outer_fsdp_group.rank() : gbuf.data_size
                        * (outer_fsdp_group.rank() + 1)
                    ]
                    gbuf.init_data(gbuf_data)
                    hsdp_gbuf.data.zero_()
                else:
                    # When not using HSDP, the main buffer shards across the FSDP group.
                    gbuf.init_data(_alloc(gbuf.dtype, gbuf.data_size))
                    gbuf.data.zero_()
            gbuf.data.zero_()
            for item_id, p in enumerate(group.params):
                # Attach the main grad buffer data and metadata to the parameter.
                p._gbuf = group.hsdp_gbuf if group.hsdp_gbuf else gbuf
                p._item_id = item_id

                def main_grad_getter(p):
                    # Make sure main_grad memory is allocated when initially accessed.
                    # When gradients are sharded, we can pre-allocate a communication
                    # bucket to avoid casting to a communication data-type. Otherwise,
                    # return the item backed by the main gradient buffer required to
                    # support un-sharded gradient accumulation at high precision.
                    bucket = p._gbuf.fetch_bucket(
                        dtype=(
                            self.mp_policy.grad_comm_dtype if p._gbuf.is_data_distributed else None
                        )
                    )
                    gbuf = p._gbuf
                    item_id = p._item_id
                    # View it as p.shape so you can insert the param.grad into
                    # the bucket seamlessly.
                    return gbuf.get_item_from_bucket(bucket, item_id).view(
                        to_local_if_dtensor(p).shape
                    )

                # Patch the parameter class to include a main_grad property.
                # Utilized in the gradient reduction pipeline to save computed
                # data-parallel gradients on every rank and reduce-scatter them.
                # Enables TransformerEngine's fuse_wgrad_accumulation=True feature
                # which dumps gradients into param.main_grad with zero-copy.
                p.get_main_grad = main_grad_getter.__get__(p)

        # Clean up deallocated memory.
        gc.collect()
        torch.cuda.empty_cache()

    def _reset_parameters(self, old_params, new_params):
        assert len(old_params) == len(new_params)
        param_map = {}
        for old_param, new_param in zip(old_params, new_params):
            param_map[old_param] = new_param
            self.param_to_name[new_param] = self.param_to_name[old_param]
            del self.param_to_name[old_param]

            self.param_to_param_group[new_param] = self.param_to_param_group[old_param]
            del self.param_to_param_group[old_param]

            self.param_to_direct_module[new_param] = self.param_to_direct_module[old_param]
            del self.param_to_direct_module[old_param]

            new_param.requires_grad_(old_param.requires_grad)

            for tp_attr in ["_mcore_tp", "_tp_partition_dim", "_tp_duplicated"]:
                if getattr(old_param, tp_attr, None) is not None:
                    setattr(new_param, tp_attr, getattr(old_param, tp_attr))

        for item_id, p in enumerate(self.params):
            if p in param_map:
                new_p = param_map[p]
                self.params[item_id] = new_p

        for group in self.parameter_groups:
            for item_id, p in enumerate(group.params):
                if p not in param_map:
                    continue
                new_p = param_map[p]
                group.params[item_id] = new_p
                for buf in [
                    group.model_weight_buffer,
                    group.transpose_weight_buffer,
                    group.main_weight_buffer,
                    group.main_grad_buffer,
                    group.hsdp_wbuf,
                    group.hsdp_gbuf,
                ]:
                    if buf is None:
                        continue
                    buf.param_idx[new_p] = buf.param_idx[p]
                    del buf.param_idx[p]

    def scale_gradients(self, scaling_factor: float) -> None:
        """Scale the gradient data by `scaling_factor`."""
        for group in self.parameter_groups:
            if group.main_grad_buffer is None:
                continue
            group.main_grad_buffer.data *= scaling_factor
        self.update_main_grads()

    def zero_grad(self):
        """
        Zero out the underlying grad_buffer and reset all buckets in preparation
        for the next iteration of training.
        """
        for name, param in self.optimizer_named_parameters:
            param.grad = None
            if hasattr(param, "decoupled_grad"):
                param.decoupled_grad = None
            if name in self.dist_main_grad:
                self.dist_main_grad[name]._local_tensor = None

        for group in self.parameter_groups:
            if group.main_grad_buffer:
                group.main_grad_buffer.data.zero_()
            if group.hsdp_gbuf:
                group.hsdp_gbuf.data.zero_()

    def _init_distributed_params(self):
        """
        Register model training and high-precision parameters as optimizer
        named parameters and DTensor(s). Specifically, we utilize the highest
        precision weights available for optimization using fall-back logic
        on mbuf -> wbuf -> orig_param depending on if main_params_dtype is
        specified or "no_shard" is utilized.
        """
        dist_main_weight = {}
        for pg in self.parameter_groups:
            wbuf = pg.model_weight_buffer
            tbuf = pg.transpose_weight_buffer
            mbuf = pg.main_weight_buffer
            for item_id, orig_param in enumerate(pg.params):
                param_name = self.param_to_name[orig_param]

                # If the optimizer state is sharded, we need to track references to shards
                # of the main weight or training weight buffer data for distributed
                # optimization, regardless whether the buffers are sharded or not.
                # mbuf and wbuf won't exist in the case of "no_shard", in which case
                # we simply take the original unsharded parameter weight from the model.
                sharded_optimizer_state = (
                    self.bucketing_policy.data_parallel_sharding_strategy != "no_shard"
                )

                # Register model training and high-precision parameters as DTensor(s).
                if mbuf:
                    dist_param = make_fsdp_dtensor(
                        local_tensor=mbuf.get_item(item_id, only_shard=sharded_optimizer_state),
                        param=orig_param,
                        dist_index=self.dist_index,
                        is_sharded_param=sharded_optimizer_state,
                        is_expert_param=pg.is_expert_param,
                        run_check=True,
                        update_uneven_dtensor_chunk_meta=True,
                        force_sync_tp_duplicated_param=True,
                    )
                    dist_main_weight[param_name] = dist_param
                elif wbuf:
                    assert tbuf is None, "Transpose buffer should only exist when main params exist"
                    dist_param = make_fsdp_dtensor(
                        local_tensor=wbuf.get_item(item_id, only_shard=sharded_optimizer_state),
                        param=orig_param,
                        dist_index=self.dist_index,
                        is_sharded_param=sharded_optimizer_state,
                        is_expert_param=pg.is_expert_param,
                        run_check=True,
                        update_uneven_dtensor_chunk_meta=True,
                        force_sync_tp_duplicated_param=True,
                    )
                    dist_main_weight[param_name] = dist_param
                else:
                    # If neither the wbuf nor the mbuf are utilized in the case of "no_shard",
                    # we fall-back to using the original parameter data for optimization,
                    # and register the new parameter as a model training weight.
                    dist_param = make_fsdp_dtensor(
                        local_tensor=orig_param.data,
                        param=orig_param,
                        dist_index=self.dist_index,
                        is_sharded_param=False,
                        is_expert_param=pg.is_expert_param,
                        run_check=True,
                        update_uneven_dtensor_chunk_meta=False,
                        force_sync_tp_duplicated_param=True,
                    )
                    dist_main_weight[param_name] = dist_param

        # Store the registered parameters.
        self.dist_main_weight = dist_main_weight
        self.dist_main_grad = {}

    def _init_optimizer_named_parameters(self) -> List[Tuple[str, torch.nn.Parameter]]:
        named_parameters = []
        for pg in self.parameter_groups:
            for item_id, orig_param in enumerate(pg.params):
                param_name = self.param_to_name[orig_param]

                # Set optimizer parameters to DTensor-wrapped parameters.
                if param_name in self.dist_main_weight:
                    param_data = self.dist_main_weight[param_name]
                else:
                    # Could not find registered parameters for distributed optimization.
                    raise ValueError(
                        f"Parameter {param_name} not found in dist model weight "
                        "or dist main weight."
                    )
                dist_param = torch.nn.Parameter(param_data)

                def set_param_attribute_closure(param, orig_param):
                    def set_param_attribute():
                        for attr_name in [
                            "requires_grad",
                            "sequence_parallel",
                            "shared",
                            "tensor_model_parallel",
                            "partition_dim",
                            "partition_stride",
                            "is_embedding_or_output_parameter",
                            "is_embedding_parameter",
                            "_mcore_tp",
                            "_tp_duplicated",
                            "_tp_partition_dim",
                        ]:
                            if hasattr(orig_param, attr_name):
                                setattr(param, attr_name, getattr(orig_param, attr_name))

                    return set_param_attribute

                setattr(
                    dist_param,
                    "reset_attribute",
                    set_param_attribute_closure(dist_param, orig_param),
                )
                setattr(dist_param, "orig_param", orig_param)
                setattr(dist_param, "megatron_fsdp_dist_index", self.dist_index)

                # NOTE: megatron_fsdp_slice is used to solve the SwiGLU TP dist-ckpt problem in
                # MCore.
                mbuf = pg.model_weight_buffer
                if mbuf:
                    _start, _end = mbuf._get_item_slice_in_shard(item_id)
                    setattr(dist_param, "megatron_fsdp_slice", slice(_start, _end))

                dist_param.reset_attribute()
                named_parameters.append((param_name, dist_param))

        return named_parameters

    def update_main_grads(self):
        """
        Update the gradients in the model parameters with the main gradients
        from the main gradient buffer. If the model parameters are sharded,
        we only need to update the gradient shard associated with the model
        parameter shard, as both are sharded symmetrically.

        Checks if high-precision main weights are utilized for optimization.
        Otherwise, falls back to low-precision model weights, and further
        falls back to the original module parameters not managed by cFSDP
        in the case of no sharding / cFSDP OFF.
        """
        for name, param in self.optimizer_named_parameters:
            orig_param = param.orig_param
            group = self.parameter_groups[self.param_to_param_group[orig_param]]
            gbuf = group.main_grad_buffer
            if gbuf is None:
                # No gradients to attach to the optimizer parameters.
                continue

            # If the optimizer state is sharded, we need to shard the gradient,
            # even if the gradient buffer is not sharded.
            sharded_optimizer_state = (
                self.bucketing_policy.data_parallel_sharding_strategy != "no_shard"
            )

            # Retrieve the gradient from the gradient buffer.
            item_id = group.main_grad_buffer.param_idx[orig_param]
            optimizer_grad = group.main_grad_buffer.get_item(
                item_id, only_shard=sharded_optimizer_state
            )
            if group.main_weight_buffer is not None:
                if not getattr(self, "use_precision_aware_optimizer", False):
                    # Convert the gradient to the main weight buffer dtype.
                    optimizer_grad = optimizer_grad.to(param.dtype)

            if name not in self.dist_main_grad:
                # Register the gradient as a distributed tensor.
                self.dist_main_grad[name] = make_fsdp_dtensor(
                    local_tensor=optimizer_grad,
                    param=orig_param,
                    dist_index=self.dist_index,
                    is_sharded_param=sharded_optimizer_state,
                    is_expert_param=group.is_expert_param,
                )
            else:
                # Update the existing distributed tensor with the new gradient.
                if len(orig_param.shape) > 1:
                    local_shape = (-1, *orig_param.shape[1:])
                else:
                    local_shape = (-1,)
                self.dist_main_grad[name]._local_tensor = optimizer_grad.view(local_shape)
            grad = self.dist_main_grad[name]

            if optimizer_grad.numel() == 0:
                grad = None

            # The presence of main_grad_buffer but no main_weight_buffer may imply
            # that a precision-aware optimizer is used.
            if getattr(self, "use_precision_aware_optimizer", False):
                setattr(param, "decoupled_grad", grad)
            else:
                # Attach the gradient to the optimizer parameter.
                setattr(param, "grad", grad.to(param.dtype) if grad is not None else None)

    @property
    def num_buckets(self):
        """Return the number of buckets."""
        return len(self.parameter_groups)

    @torch.no_grad()
    def copy_main_weights_to_model_weights(self):
        """
        Update the model weights from the main weights.

        If FP8 parameters are utilized, this function will quantize the high-precision
        main weights prior to installation into the model compute weight buffers.
        """
        dense_param_quantize_kwargs = {
            "model_params": [],
            "main_params": [],
            "start_offsets": [],
            "fsdp_shard_model_params": [],
        }
        expert_param_quantize_kwargs = copy.deepcopy(dense_param_quantize_kwargs)
        data_parallel_group = None
        expert_data_parallel_group = None
        clear_quantize_kwargs = lambda kwargs: [d.clear() for d in kwargs.values()]

        def _fp8_quantize_params(dense_param_quantize_kwargs, expert_param_quantize_kwargs):
            if len(dense_param_quantize_kwargs["model_params"]) > 0:
                # If we have FP8 parameters, we need to quantize them.
                fp8_quantize(data_parallel_group=data_parallel_group, **dense_param_quantize_kwargs)

            if len(expert_param_quantize_kwargs["model_params"]) > 0:
                # If we have FP8 expert parameters, we need to quantize them.
                fp8_quantize(
                    data_parallel_group=expert_data_parallel_group, **expert_param_quantize_kwargs
                )

            clear_quantize_kwargs(dense_param_quantize_kwargs)
            clear_quantize_kwargs(expert_param_quantize_kwargs)

        # Special handling of blockwise FP8
        BATCH_QUANT_MEMORY_LIMIT_BYTES = 5 * 1024**3  # 5 GB
        blockwise_fp8_weight_buffers = []
        blockwise_fp8_param_buffers = []

        def _batch_quantize_blockwise_fp8_params(
            dense_param_quantize_kwargs, expert_param_quantize_kwargs, blockwise_fp8_param_buffers
        ):
            if len(blockwise_fp8_param_buffers) == 0:
                return

            # Copy original param shards into their blockwise FP8 working buffers
            for bufs in blockwise_fp8_param_buffers:
                bufs["bucket_param"].copy_(bufs["param"])

            # Apply FP8 quantization to blockwise FP8 parameters
            _fp8_quantize_params(dense_param_quantize_kwargs, expert_param_quantize_kwargs)

            # Copy quantized params back from working buffers to original param tensors
            for bufs in blockwise_fp8_param_buffers:
                bufs["param"].copy_(bufs["bucket_param"])
            blockwise_fp8_param_buffers.clear()

            # Free bucket storage for blockwise FP8 weight buffers
            for wbuf in blockwise_fp8_weight_buffers:
                wbuf.free_bucket_storage()
            blockwise_fp8_weight_buffers.clear()

        for pg in self.parameter_groups:
            mbuf = pg.main_weight_buffer
            wbuf = pg.model_weight_buffer
            tbuf = pg.transpose_weight_buffer
            if mbuf is None:
                continue

            if pg.is_expert_param:
                quantize_func_kwargs = expert_param_quantize_kwargs
                expert_data_parallel_group = mbuf.data_parallel_group
            else:
                quantize_func_kwargs = dense_param_quantize_kwargs
                data_parallel_group = mbuf.data_parallel_group

            fp8_params = quantize_func_kwargs["model_params"]
            shard_fp32_from_fp8 = quantize_func_kwargs["main_params"]
            shard_offsets_in_fp8 = quantize_func_kwargs["start_offsets"]
            shard_model_params = quantize_func_kwargs["fsdp_shard_model_params"]

            has_blockwise_fp8_param = False
            for param in pg.params:
                item_id = mbuf.param_idx[param]
                if wbuf:
                    if wbuf.is_data_distributed or mbuf.is_data_distributed:
                        model_param = wbuf.get_item(item_id, only_shard=True)
                        if tbuf:
                            transpose_param = tbuf.get_item(item_id, only_shard=True)
                        else:
                            transpose_param = None
                        main_weight = mbuf.get_item(item_id, only_shard=True)
                    else:
                        model_param = wbuf.get_item(item_id)
                        if tbuf:
                            transpose_param = tbuf.get_item(item_id)
                        else:
                            transpose_param = None
                        main_weight = mbuf.get_item(item_id)
                else:
                    assert not mbuf.is_data_distributed
                    model_param = to_local_if_dtensor(param)
                    main_weight = mbuf.get_item(item_id)

                # TODO(@kunlunl, @cspades): Currently, we only support FP8 parameters
                # for FSDP, i.e. fully-sharded compute parameters with a high-precision
                # main weight buffer. Would it be possible to add if branches here to
                # quantize the original param (no_shard) or wbuf data (optim, optim_grads)
                # for a seamless user experience and coverage for ZeRO-1 and ZeRO-2?

                if is_blockwise_float8tensor(param):
                    fp8_params.append(param)
                    if model_param.numel() == 0:
                        # Empty parameter.
                        shard_fp32_from_fp8.append(None)
                        shard_offsets_in_fp8.append(None)
                        shard_model_params.append([None, None])
                    else:
                        shard_fp32_from_fp8.append(main_weight)
                        shard_offsets_in_fp8.append(wbuf.locate_item_in_global_item(item_id)[0])
                        bucket = wbuf.fetch_bucket()
                        b_model_param = wbuf.get_item_from_bucket(bucket, item_id)[
                            slice(*wbuf.locate_item_in_global_item(item_id))
                        ]
                        assert (
                            transpose_param is None
                        ), "Blockwise FP8 does not support transpose param."
                        shard_model_params.append([b_model_param, None])
                        assert b_model_param.numel() == model_param.numel(), (
                            f"Blockwise FP8 bucket param numel {b_model_param.numel()} does"
                            f" not match model param numel {model_param.numel()}"
                            f" name: {self.param_to_name[param]}"
                        )
                        blockwise_fp8_param_buffers.append(
                            {"bucket_param": b_model_param, "param": model_param}
                        )
                        has_blockwise_fp8_param = True
                    continue

                if is_float8tensor(param):
                    fp8_params.append(param)
                    if model_param.numel() == 0:
                        # Empty parameter.
                        shard_fp32_from_fp8.append(None)
                        shard_offsets_in_fp8.append(None)
                        shard_model_params.append([None, None])
                    else:
                        shard_fp32_from_fp8.append(main_weight)
                        shard_offsets_in_fp8.append(wbuf.locate_item_in_global_item(item_id)[0])
                        shard_model_params.append([model_param, transpose_param])
                    continue

                if model_param.numel() > 0:
                    model_param.data.copy_(main_weight.view(model_param.shape))

            if has_blockwise_fp8_param:
                blockwise_fp8_weight_buffers.append(wbuf)
                if (
                    sum([wbuf.bucket_index.size for wbuf in blockwise_fp8_weight_buffers])
                    > BATCH_QUANT_MEMORY_LIMIT_BYTES
                ):
                    _batch_quantize_blockwise_fp8_params(
                        dense_param_quantize_kwargs,
                        expert_param_quantize_kwargs,
                        blockwise_fp8_param_buffers,
                    )

        _batch_quantize_blockwise_fp8_params(
            dense_param_quantize_kwargs, expert_param_quantize_kwargs, blockwise_fp8_param_buffers
        )
        _fp8_quantize_params(dense_param_quantize_kwargs, expert_param_quantize_kwargs)

    @torch.no_grad()
    def copy_model_weights_to_main_weights(self):
        """Copy the model weights to the main weights."""
        for group in self.parameter_groups:
            mbuf = group.main_weight_buffer
            if mbuf is None:
                continue
            wbuf = group.model_weight_buffer
            if mbuf.is_data_distributed:
                copyin_data = wbuf.get_shard_from_local_buffer()
            else:
                copyin_data = wbuf.data
            assert mbuf.data.numel() == copyin_data.numel(), (
                f"Master weight buffer size {mbuf.data.numel()} does not match "
                f"model weight buffer size {copyin_data.numel()}"
            )
            # TODO(mxfp8): Make sure it's not a fp8 buf?
            mbuf.data.copy_(copyin_data.data)

    def all_gather_parameters(self, async_op: bool = True):
        """All gather the parameters.
        Args:
            async_op (bool, optional): Whether to do the all-reduce
                asynchronously. Defaults to False.
        """
        assert all(
            [not g.model_weight_buffer.is_data_distributed for g in self.parameter_groups]
        ), "all_gather_parameters() should only be called when parameters are not sharded."
        assert (
            self.ddp_config.outer_dp_sharding_strategy == "no_shard"
        ), "all_gather_parameters() should not be called when outer-DP sharding is enabled."

        all_gather_ops = []
        for g in self.parameter_groups:
            for buf in [g.model_weight_buffer, g.transpose_weight_buffer]:
                if buf is None:
                    continue
                shard = buf.get_shard_from_local_buffer()
                all_gather_handler = torch.distributed.all_gather_into_tensor(
                    output_tensor=buf.data,
                    input_tensor=shard,
                    group=buf.data_parallel_group,
                    async_op=async_op,
                )
                if async_op:
                    all_gather_ops.append(all_gather_handler)

        for op in all_gather_ops:
            op.wait()

    def reduce_scatter_gradients(self, async_op: bool = True):
        """Reduce scatter the gradients.
        Args:
            async_op (bool, optional): Whether to do the all-reduce
                asynchronously. Defaults to False.
        """
        assert all(
            [not g.main_grad_buffer.is_data_distributed for g in self.parameter_groups]
        ), "reduce_scatter_gradients() should only be called when gradients are not sharded."
        assert (
            self.ddp_config.outer_dp_sharding_strategy == "no_shard"
        ), "reduce_scatter_gradients() should not be called when outer-DP sharding is enabled."

        reduce_scatter_ops = []
        for g in self.parameter_groups:
            gbuf = g.main_grad_buffer
            if gbuf is None:
                continue
            scaling_factor = gbuf.gradient_scaling_factor
            if self.ddp_config.check_for_nan_in_grad:
                _check_nan_in_grad(gbuf.data)
            reduce_op = gradient_reduce_preprocessing(gbuf.data, scaling_factor, self.ddp_config)
            reduce_scatter_handler = torch.distributed.reduce_scatter_tensor(
                output=gbuf.get_shard_from_local_buffer(),
                input=gbuf.data,
                op=reduce_op,
                group=g.main_grad_buffer.data_parallel_group,
                async_op=async_op,
            )

            if async_op:
                reduce_scatter_ops.append(reduce_scatter_handler)

        for op in reduce_scatter_ops:
            op.wait()

    def all_reduce_gradients(self, async_op: bool = False):
        """All reduce the gradients.
        Args:
            async_op (bool, optional): Whether to do the all-reduce
                asynchronously. Defaults to False.
        """
        assert all(
            [
                not g.main_grad_buffer.is_data_distributed
                for g in self.parameter_groups
                if g.main_grad_buffer
            ]
        ), "all_reduce_gradients() should only be called when gradients are not sharded."
        assert (
            self.ddp_config.outer_dp_sharding_strategy == "no_shard"
        ), "all_reduce_gradients() should not be called when outer-DP sharding is enabled."

        all_reduce_ops = []
        for g in self.parameter_groups:
            gbuf = g.main_grad_buffer
            if gbuf is not None:
                continue
            scaling_factor = gbuf.gradient_scaling_factor
            if self.ddp_config.check_for_nan_in_grad:
                _check_nan_in_grad(gbuf.data)
            reduce_op = gradient_reduce_preprocessing(gbuf.data, scaling_factor, self.ddp_config)
            all_reduce_handler = torch.distributed.all_reduce(
                gbuf.data, op=reduce_op, group=gbuf.data_parallel_group, async_op=async_op
            )
            if async_op:
                all_reduce_ops.append(all_reduce_handler)

        for op in all_reduce_ops:
            op.wait()


class BucketStatus(Enum):
    """
    An enumeration of possible statuses for a data-parallel communication bucket.

    Attributes:
        EMPTY (int): The bucket is empty and not in use.
        COMMUNICATING (int): The bucket is currently being used for communication.
        READY_TO_USE (int): The bucket is filled with data and ready for use.
    """

    EMPTY = 1
    COMMUNICATING = 2
    READY_TO_USE = 3


class GradReducePipeline:
    """
    Pipeline for reducing gradients.
    """

    def __init__(
        self,
        param_and_grad_buffer: ParamAndGradBuffer,
        rs_stream: Optional[torch.cuda.Stream] = None,
        check_nans: bool = False,
    ) -> None:
        self.buffer = param_and_grad_buffer
        # Track the status of ongoing gradient reduce-scatter operations before optimizer step.
        self.grad_reduce_queue = []
        self.bucket_status = {
            # All buckets are initially deallocated / empty after initialization
            # of ParamAndGradBuffer.
            i: BucketStatus.EMPTY
            for i in range(self.buffer.num_buckets)
            if self.buffer.parameter_groups[i].main_grad_buffer
        }
        # Track the number of parameters in each bucket that are ready for gradient reduce-scatter.
        self.bucket_grad_ready_params = [set() for _ in range(self.buffer.num_buckets)]
        self.rs_stream = rs_stream
        self.check_nans = check_nans

        # Init outer-DP group gradient reduction related attributes.
        dist_index = self.buffer.dist_index
        if dist_index.use_hybrid_fsdp:
            # If there are multiple FSDP groups, we need to reduce gradients across groups.
            self.outer_fsdp_group_grad_reduce = True
            self.outer_fsdp_group_grad_reduce_stream = torch.cuda.Stream()
        else:
            self.outer_fsdp_group_grad_reduce = False

    @property
    def num_buckets(self):
        """Return the number of buckets."""
        return self.buffer.num_buckets

    def reset(self):
        """Handle the processing tasks and reset the pipeline."""
        self.wait_for_previous_grad_reduce(0)
        for bucket_id, grad_ready_params in enumerate(self.bucket_grad_ready_params):
            param_list = self.buffer.parameter_groups[bucket_id].params
            n_params = len(param_list)
            param_to_name = self.buffer.param_to_name
            assert len(grad_ready_params) == 0, (
                f"Found {len(grad_ready_params)} out of {n_params} parameters that are ready for "
                f"reduce-scatter/all-reduce, but the pipeline is being reset. "
                f"grad_ready_params: {[param_to_name[p] for p in grad_ready_params]} "
                f"param_list: {[param_to_name[p] for p in param_list]}"
            )

        for bucket_id, _ in self.bucket_status.items():
            gbuf = self.get_fsdp_buffer(bucket_id)
            gbuf.free_bucket_storage()
            gbuf.reset_param_main_grad()
            self.bucket_status[bucket_id] = BucketStatus.EMPTY

    def reduce_gradients(
        self,
        params: List[torch.Tensor],
        suggested_queue_capacity: Optional[int] = None,
        outer_fsdp_group_grad_reduce: bool = False,
    ):
        """Reduce the gradients for the given parameters.
        Args:
            params (List[torch.Tensor]): The parameters.
            suggested_queue_capacity (int, optional): The suggested queue capacity.
                Defaults to None.
            outer_fsdp_group_grad_reduce (bool, optional): Whether to reduce gradients
                across outer-DP groups. Defaults to False.
        """
        # Sort parameters by their bucket IDs to ensure a deterministic processing order.
        # Performing reduce-scatter operations out of order can lead to hangs.
        params = sorted(list(params), key=lambda x: self.buffer.param_to_param_group[x])
        for param in params:
            bucket_id = self.buffer.param_to_param_group[param]
            param_group = self.buffer.parameter_groups[bucket_id]
            if not param.requires_grad:
                assert param_group.requires_grad is False, (
                    f"Param {self.buffer.param_to_name[param]} has requires_grad=False, "
                    f"but it is in a parameter group with requires_grad=True."
                )
                continue
            assert param_group.requires_grad, (
                f"Param {self.buffer.param_to_name[param]} has requires_grad=True, "
                f"but it is in a parameter group with requires_grad=False."
            )

            # Mark grad as ready for reduce-scatter/all-reduce.
            self.bucket_grad_ready_params[bucket_id].add(param)
            bucket_group = self.get_ready_bucket_group_for_reduction(bucket_id)
            if bucket_group:
                self.wait_for_previous_grad_reduce(
                    suggested_queue_capacity=suggested_queue_capacity
                )
                self._bucket_group_gradient_reduce(
                    bucket_group,
                    async_op=True,
                    outer_fsdp_group_grad_reduce=outer_fsdp_group_grad_reduce,
                )

    def wait_for_previous_grad_reduce(
        self, suggested_queue_size: int = 1, suggested_queue_capacity: Optional[int] = None
    ):
        """
        Wait for the previous reduce-scatter/all-reduce to finish.
        Args:
            suggested_queue_size (int, optional): The recommended queue size in
                buckets. Defaults to 1.
            suggested_queue_capacity (Optional[int], optional): The recommended queue capacity
                in number of parameters in all buckets in the reduction queue. Defaults to None.
        """
        if suggested_queue_capacity is not None:
            queue_space = sum(
                [
                    self.buffer.parameter_groups[bucket_id].main_grad_buffer.bucket_index.size
                    for _, _, bucket_id in self.grad_reduce_queue
                ]
            )
            while queue_space > suggested_queue_capacity:
                grad_reduce_event, free_up_grad_bucket, bucket_id = self.grad_reduce_queue.pop(0)
                grad_reduce_event.wait()
                free_up_grad_bucket()
                queue_space -= self.buffer.parameter_groups[
                    bucket_id
                ].main_grad_buffer.bucket_index.size
        else:
            suggested_queue_size = max(0, min(suggested_queue_size, self.buffer.num_buckets - 1))
            while len(self.grad_reduce_queue) > suggested_queue_size:
                grad_reduce_event, free_up_grad_bucket, _ = self.grad_reduce_queue.pop(0)
                grad_reduce_event.wait()
                free_up_grad_bucket()

    def _enforce_double_buffer_limit(self, add_buckets):
        if not self.buffer.ddp_config.fsdp_double_buffer:
            return

        param_groups = self.buffer.parameter_groups
        double_buf_units = set()
        for bucket_id in add_buckets:
            fsdp_unit_id = param_groups[bucket_id].fsdp_unit_id
            if fsdp_unit_id in self.buffer.double_buf_units:
                double_buf_units.add(fsdp_unit_id)
        assert (
            len(double_buf_units) <= 2
        ), f"Double buffer limit exceeded. Current double_buf_units: {double_buf_units}."

        keep_n = len(self.grad_reduce_queue)
        for _, _, bucket_id in reversed(self.grad_reduce_queue):
            fsdp_unit_id = param_groups[bucket_id].fsdp_unit_id
            double_buf_units.add(fsdp_unit_id)
            if len(double_buf_units) > 2:
                keep_n -= 1
        self.wait_for_previous_grad_reduce(keep_n)

    def get_ready_bucket_group_for_reduction(self, bucket_id: int) -> Optional[List[int]]:
        """Checks if all buckets in the bucket group containing the given bucket_id
        are ready for gradient reduction.
        If so, returns the list of ready bucket IDs for reduction; otherwise, returns None.

        Args:
            bucket_id (int): The bucket to mark as ready for reduce-scatter or all-reduce.

        Returns:
            Optional[List[int]]: The bucket group ready for gradient reduction,
            or None if not all buckets are ready.
        """
        # Prepare bucket group for gradient reduce. Note that the
        # some bucket parameters do not require grad, so we need to
        # remove them from the bucket group.
        bucket_group = self.buffer.bucket_to_bucket_group[bucket_id]
        bucket_group = [i for i in bucket_group if self.buffer.parameter_groups[i].main_grad_buffer]
        # If any bucket in the bucket group is not ready, skip the gradient reduce
        # waiting for the bucket group to be all ready before executing.
        for bucket_id in bucket_group:
            param_group = self.buffer.parameter_groups[bucket_id]
            if len(self.bucket_grad_ready_params[bucket_id]) != len(param_group.params):
                return None

        return bucket_group

    def get_fsdp_buffer(self, bucket_id: int) -> DataParallelBuffer:
        """Get the FSDP buffer for the given bucket ID."""
        param_group = self.buffer.parameter_groups[bucket_id]
        if self.buffer.ddp_config.outer_dp_sharding_strategy != "no_shard":
            return param_group.hsdp_gbuf
        return param_group.main_grad_buffer

    def _bucket_group_gradient_reduce(
        self,
        bucket_group: List[int],
        async_op: bool = False,
        outer_fsdp_group_grad_reduce: bool = False,
    ) -> bool:
        """Mark the bucket ready for reduce-scatter/all-reduce, if all bucket in
        the bucket group are ready, then do the reduce-scatter/all-reduce.
        Args:
            bucket_id (int): The bucket to be marked.
            bucket_group (List[int]): The bucket group to be reduced.
            async_op (bool, optional): Whether to do the reduce-scatter/all-reduce
                asynchronously. Defaults to False.
        Returns:
            bool: True if the bucket is go for reduce-scatter/all-reduce.
        """
        # When using FSDP double buffer, waiting for the necessary bucket to be
        # released ensures that our double buffer will not explode due to too
        # many empty bucket requests.
        ddp_config = self.buffer.ddp_config
        mp_policy = self.buffer.mp_policy
        if ddp_config.fsdp_double_buffer:
            self._enforce_double_buffer_limit(bucket_group)

        current_stream = torch.cuda.current_stream()
        reduce_scatter_stream = (
            self.rs_stream if self.rs_stream is not None else torch.cuda.current_stream()
        )
        reduce_scatter_stream.wait_stream(current_stream)

        # DP-Shard Gradient Reduction
        dp_group = self.get_fsdp_buffer(bucket_group[0]).data_parallel_group
        with torch.cuda.stream(reduce_scatter_stream):
            with _coalescing_manager(dp_group):
                # List of gradient accumulation closure tasks.
                # (grad_buffer, reduced_grad)
                grad_accum_closure = []
                for bucket_id in bucket_group:

                    # Get the DP-Shard gradient buffer associated with this bucket ID.
                    gbuf = self.get_fsdp_buffer(bucket_id)

                    # Get the unreduced gradients associated with the gradient buffer.
                    unreduced_grad_bucket = gbuf.fetch_bucket(
                        dtype=mp_policy.grad_comm_dtype if gbuf.is_data_distributed else None
                    )
                    # NOTE(@cspades): `no_shard` or `optim`
                    # Un-sharded gradient buffers accumulate un-reduced gradients locally
                    # without allocating an un-sharded buffer. For custom communication
                    # data-type(s), an extra un-sharded buffer needs to be allocated!
                    custom_grad_comm_dtype = (
                        mp_policy.grad_comm_dtype is not None
                        and unreduced_grad_bucket.data.dtype != mp_policy.grad_comm_dtype
                    )
                    if not gbuf.is_data_distributed and custom_grad_comm_dtype:
                        # Create a custom communication buffer with gbuf.
                        # Introduces copy and memory overhead.
                        unreduced_grad_bucket = gbuf.allocate_bucket_storage(
                            dtype=mp_policy.grad_comm_dtype,
                            device=unreduced_grad_bucket.data.device,
                            init_values=unreduced_grad_bucket.data,
                        )
                    unreduced_grad = unreduced_grad_bucket.data

                    # Pre-scale unsharded bucket gradient and prepare the ReduceOp.
                    scaling_factor = gbuf.gradient_scaling_factor
                    reduce_op = gradient_reduce_preprocessing(
                        unreduced_grad, scaling_factor, ddp_config
                    )

                    # Reduce-scatter or all-reduce the unsharded gradient.
                    if ddp_config.data_parallel_sharding_strategy == "no_shard":
                        # All-reduce un-sharded gradients from every rank.
                        torch.distributed.all_reduce(
                            unreduced_grad, op=reduce_op, group=gbuf.data_parallel_group
                        )
                        if custom_grad_comm_dtype:
                            # Reduction used a temporary communication buffer.
                            grad_accum_closure.append(
                                # Un-sharded buffer data.
                                (gbuf.data, unreduced_grad)
                            )
                    else:
                        # Slice a gradient shard from the communication bucket.
                        grad_shard = gbuf.get_shard_from_bucket(unreduced_grad_bucket)

                        # Execute the reduce-scatter collective.
                        torch.distributed.reduce_scatter_tensor(
                            output=grad_shard,
                            input=unreduced_grad,
                            op=reduce_op,
                            group=gbuf.data_parallel_group,
                        )

                        # Track closure tasks to accumulate the reduced gradient shard.
                        # NOTE: If the gradient buffer is unsharded and no communication
                        # bucket is allocated, then the output bucket shard is backed by
                        # the unsharded gradient buffer and the reduce-scatter result
                        # has already been installed into the gradient buffer.
                        if gbuf.is_data_distributed or custom_grad_comm_dtype:
                            grad_accum_closure.append(
                                # Target for sharded or un-sharded gradient buffers.
                                (gbuf.get_shard_from_local_buffer(), grad_shard)
                            )

                    # Mark bucket ID as CUDA work-in-progress.
                    self.bucket_status[bucket_id] = BucketStatus.COMMUNICATING

            for local_grad, reduced_grad in grad_accum_closure:
                if ddp_config.data_parallel_sharding_strategy in ["no_shard", "optim"]:
                    # Copy the reduced gradient into the main gradient buffer.
                    local_grad.copy_(reduced_grad)
                else:
                    # Accumulate the reduced gradient into the local gradient buffer.
                    # Accumulation data-type is type-promoted with respect to the
                    # accumulated gradient and the buffer main_grads_dtype.
                    local_grad += reduced_grad

            # Record a checkpoint for the event to synchronize against the reduce-scatter stream.
            reduce_scatter_view_out_event = reduce_scatter_stream.record_event()

        # DP-Outer Gradient Reduction
        if outer_fsdp_group_grad_reduce:
            # Wait on the DP-Shard reduction before further reduction.
            self.outer_fsdp_group_grad_reduce_stream.wait_stream(reduce_scatter_stream)
            outer_fsdp_group = self.buffer.dist_index.get_outer_fsdp_group()
            with torch.cuda.stream(self.outer_fsdp_group_grad_reduce_stream):
                with _coalescing_manager(outer_fsdp_group):
                    # List of gradient accumulation closure tasks.
                    # (grad_buffer, reduced_grad)
                    grad_accum_closure = []
                    for bucket_id in bucket_group:
                        # Skip gradient scaling for DP-Outer, because the
                        # (DP-Shard, DP-Outer) scaling is already applied.
                        if ddp_config.average_in_collective:
                            reduce_op = torch.distributed.ReduceOp.AVG
                        else:
                            reduce_op = torch.distributed.ReduceOp.SUM

                        # (DP-Shard, DP-Outer) if HFSDP, otherwise just DP-Shard for HSDP
                        main_grad_buffer = self.buffer.parameter_groups[bucket_id].main_grad_buffer

                        # FSDP buffer can be un-sharded or sharded for HSDP, but sharded for HFSDP.
                        # TODO(@cspades): For `optim`, we don't need to reduce the local un-sharded
                        # gradient, just the shard updated via reduce-scatter.
                        fsdp_grad_buffer = self.get_fsdp_buffer(bucket_id)
                        unreduced_grad = fsdp_grad_buffer.data
                        assert (
                            main_grad_buffer.dtype == fsdp_grad_buffer.dtype
                        ), "Main and DP-Shard gradient buffer must share the exact same dtype."

                        # Cast DP-Shard gradient to communication dtype if specified and necessary.
                        custom_grad_comm_dtype = (
                            mp_policy.grad_comm_dtype is not None
                            and unreduced_grad.dtype != mp_policy.grad_comm_dtype
                        )
                        if custom_grad_comm_dtype:
                            # Allocate a custom communication buffer with the HSDP gradient
                            # communication buffer. Introduces copy and memory overhead.
                            hsdp_comm_gbuf = self.buffer.parameter_groups[bucket_id].hsdp_comm_gbuf
                            unreduced_grad = hsdp_comm_gbuf.allocate_bucket_storage(
                                # Allocate memory for the sharded or un-sharded
                                # gradient reduced over DP-Shard.
                                shard=fsdp_grad_buffer.is_data_distributed,
                                dtype=mp_policy.grad_comm_dtype,
                                device=unreduced_grad.device,
                                init_values=unreduced_grad,
                            ).data

                        # All-reduce or reduce-scatter the DP-Shard gradients across DP-Outer.
                        if ddp_config.outer_dp_sharding_strategy != "no_shard":
                            # Retrieve the (DP-Outer, DP-Shard) gradient shard from the
                            # main gradient buffer which shards across the entire DP group,
                            # i.e. across all DP-Shard and DP-Outer ranks.
                            main_grad_shard = main_grad_buffer.get_shard_from_local_buffer()
                            if custom_grad_comm_dtype:
                                # Scatter back into communication buffer.
                                dp_outer_rank = outer_fsdp_group.rank()
                                output_buffer = unreduced_grad[
                                    dp_outer_rank
                                    * main_grad_shard.numel() : (dp_outer_rank + 1)
                                    * main_grad_shard.numel()
                                ]
                            else:
                                # Scatter directly into the main gradient buffer.
                                output_buffer = main_grad_shard
                            # Reduce-scatter the FSDP gradient buffer shard further
                            # into the (DP-Outer, DP-Shard) gradient shard.
                            torch.distributed.reduce_scatter_tensor(
                                output=output_buffer,
                                input=unreduced_grad,
                                op=reduce_op,
                                group=outer_fsdp_group,
                            )
                            if custom_grad_comm_dtype:
                                # Reduce-scatter output was a temporary communication buffer.
                                grad_accum_closure.append((main_grad_shard, output_buffer))
                        else:  # HSDP -> main_grad_buffer = (DP-Shard,)
                            # No DP-Outer sharding, so all-reduce FSDP gradients across DP-Outer.
                            # All FSDP buffers will have reduced un-sharded or sharded gradients.
                            torch.distributed.all_reduce(
                                unreduced_grad, group=outer_fsdp_group, op=reduce_op
                            )
                            if custom_grad_comm_dtype:
                                # Reduction used a temporary communication buffer.
                                grad_accum_closure.append((main_grad_buffer.data, unreduced_grad))

                for main_grad_buffer, reduced_grad in grad_accum_closure:
                    # Update the (DP-Outer, DP-Shard) gradient shard in the main gradient buffer.
                    # No accumulation should happen in the (DP-Shard, DP-Outer) gradient buffer.
                    main_grad_buffer.copy_(reduced_grad)

            reduce_scatter_view_out_event = self.outer_fsdp_group_grad_reduce_stream.record_event()

        free_up_grad_bucket_func = {}
        for bucket_id in bucket_group:

            def get_closure(bucket_id):
                def free_up_grad_bucket():
                    # Empty the set of parameters that are ready for gradient reduction.
                    self.bucket_grad_ready_params[bucket_id] = set()
                    gbuf = self.get_fsdp_buffer(bucket_id)
                    # Free the memory backing the temporarily-allocated communication
                    # bucket associated with this buffer. Only exists for sharded
                    # gradient buffers, or if a custom gradient data-type is used!
                    gbuf.free_bucket_storage()
                    # Gradient reduction completed, can de-reference param.main_grad.
                    gbuf.reset_param_main_grad()
                    hsdp_comm_gbuf = self.buffer.parameter_groups[bucket_id].hsdp_comm_gbuf
                    if hsdp_comm_gbuf is not None:
                        # Also de-allocate any communication buffers used for H(F)SDP.
                        hsdp_comm_gbuf.free_bucket_storage()
                    # Mark the bucket as deallocated / empty.
                    self.bucket_status[bucket_id] = BucketStatus.EMPTY

                return free_up_grad_bucket

            free_up_grad_bucket_func[bucket_id] = get_closure(bucket_id)

        if async_op:
            for bucket_id, free_up_grad_bucket in free_up_grad_bucket_func.items():
                self.grad_reduce_queue.append(
                    (reduce_scatter_view_out_event, free_up_grad_bucket, bucket_id)
                )
            return True

        reduce_scatter_view_out_event.wait()
        for free_up_grad_bucket in free_up_grad_bucket_func.values():
            free_up_grad_bucket()
        return True


class PrefetchOrder(Enum):
    """
    An enumeration of possible prefetch orders for data-parallel operations.

    Attributes:
        FORWARD_PASS_ORDER (int): Prefetch in the order of forward pass computation.
        BACKWARD_PASS_ORDER (int): Prefetch in the order of backward pass computation.
    """

    FORWARD_PASS_ORDER = 0
    BACKWARD_PASS_ORDER = 1


class AllGatherPipeline:
    """
    Pipeline for all-gathering parameters.
    """

    def __init__(
        self,
        param_and_grad_buffer: ParamAndGradBuffer,
        ag_stream: Optional[torch.cuda.Stream] = None,
    ) -> None:
        self.buffer = param_and_grad_buffer
        self.ag_stream = ag_stream
        # Track the status of all-gather operations for each bucket.
        self.param_gather_event_map = {}
        # All buckets are initially deallocated / empty after initialization of ParamAndGradBuffer.
        self.bucket_status = {}
        for i in range(self.buffer.num_buckets):
            for bwd in [False, True]:
                self.bucket_status[self.get_bucket_key(i, bwd)] = BucketStatus.EMPTY

        # Track whether each bucket can be deallocated.
        self.bucket_can_be_released = {}
        for i in range(self.buffer.num_buckets):
            for bwd in [False, True]:
                self.bucket_can_be_released[self.get_bucket_key(i, bwd)] = False

        # Map each bucket to the bucket group it belongs to by enumerated ID.
        # Made to collect a subset of buckets in the same bucket group.
        self.bucket_to_bucket_group = {}
        group_id = 0
        for bucket_group in self.buffer.bucket_to_bucket_group.values():
            new_group = False
            for bucket_id in bucket_group:
                if bucket_id not in self.bucket_to_bucket_group:
                    new_group = True
                    break
            if new_group:
                group_id += 1
                for bucket_id in bucket_group:
                    self.bucket_to_bucket_group[bucket_id] = group_id

        if (
            self.buffer.dist_index.use_hybrid_fsdp
            and self.buffer.ddp_config.outer_dp_sharding_strategy != "no_shard"
        ):
            # If there are multiple FSDP groups and full sharding, we need to
            # all-gather parameters across groups.
            self.outer_fsdp_group_param_gather_stream = torch.cuda.Stream()

    def get_bucket_key(self, bucket_id, bwd):
        """Get the key for the bucket."""
        has_transpose_buffer = (
            self.buffer.parameter_groups[bucket_id].transpose_weight_buffer is not None
        )
        return (bucket_id, has_transpose_buffer and bwd)

    @property
    def num_buckets(self):
        """Return the number of buckets."""
        return self.buffer.num_buckets

    def reset(self):
        """Reset the pipeline state."""
        if len(self.param_gather_event_map) > 0:
            warnings.warn(
                (
                    f"There are still pending all-gather tasks, process them. "
                    f"Bucket status: {self.bucket_status}."
                ),
                UserWarning,
            )
            while len(self.param_gather_event_map) > 0:
                (bucket_id, bwd) = next(iter(self.param_gather_event_map))
                self.wait_bucket_ready(bucket_id, bwd)
        for bucket_id in range(self.num_buckets):
            for bwd in [False, True]:
                self.bucket_can_be_released[self.get_bucket_key(bucket_id, bwd)] = True
        self.recycle_unused_buckets()

        assert all([status is BucketStatus.EMPTY for status in self.bucket_status.values()]), (
            f"There are still working buckets, it is not safe to reset. "
            f"bucket_status: {self.bucket_status}."
        )
        assert all(
            [not can_be_released for can_be_released in self.bucket_can_be_released.values()]
        ), (
            f"The bucket can be released table is in an abnormal state, not safe to reset. "
            f"bucket_can_be_released: {self.bucket_can_be_released}."
        )

    def all_gather_params(
        self,
        params: List[torch.Tensor],
        prefetch: bool = False,
        prefetch_order: PrefetchOrder = PrefetchOrder.FORWARD_PASS_ORDER,
        suggested_AG_prefetch_size: Optional[int] = None,
        async_param_gather: bool = True,
        outer_fsdp_group_param_gather: bool = False,
        bwd: bool = False,
    ):
        """All-gather the params. If prefetch is enabled, prefetch next buckets
        in the order of `prefetch_order`.

        Args:
            params (List[torch.Tensor]): The list of params to be all-gathered.
            prefetch (bool, optional): Whether to prefetch the next bucket. Defaults to False.
            prefetch_order (PrefetchOrder, optional): The order of prefetching.
                Defaults to PrefetchOrder.FORWARD_PASS_ORDER.
            suggested_AG_prefetch_size (Optional[int], optional):
                The suggested prefetch size for all-gathering. Defaults to None.
            outer_fsdp_group_param_gather (bool, optional):
                Whether to all-gather parameters across DP-Outer. Defaults to False.
            bwd (bool, optional):
                Whether to all-gather column-wise parameters instead of row-wise parameters
                for the backward pass for formats that require a transpose buffer like MXFP8.
        """
        if len(params) == 0:
            return

        ag_buckets = [self.buffer.param_to_param_group[item] for item in params]
        ag_buckets = list(sorted(set(ag_buckets)))  # Sort in order of unique bucket ID.
        parameter_groups = self.buffer.parameter_groups
        if self.buffer.ddp_config.fsdp_double_buffer:
            double_buf_units = set()
            for bucket_id in ag_buckets:
                fsdp_unit_id = parameter_groups[bucket_id].fsdp_unit_id
                if fsdp_unit_id in self.buffer.double_buf_units:
                    double_buf_units.add(fsdp_unit_id)
            if len(double_buf_units) > 2:
                raise ValueError(
                    f"{double_buf_units} FSDP units were requested, "
                    "but double buffers can support no more than 2 FSDP units."
                )

        # Do not release the buckets that are being all-gathered.
        for bucket_id in ag_buckets:
            self.bucket_can_be_released[self.get_bucket_key(bucket_id, bwd)] = False

        # If prefetch is enabled, we will add prefetch buckets to ag_buckets.
        if prefetch:

            def next_bucket_id(ag_buckets):
                """
                Search for the next bucket ID that is not in the list of all-gather buckets.
                """
                if prefetch_order == PrefetchOrder.FORWARD_PASS_ORDER:
                    # Search from the initial bucket.
                    bucket_id = ag_buckets[0] + 1
                    for i in ag_buckets[1:]:
                        if i != bucket_id:
                            break
                        bucket_id += 1
                else:
                    # Search from the last bucket.
                    bucket_id = ag_buckets[-1] - 1
                    for i in reversed(ag_buckets[:-1]):
                        if i != bucket_id:
                            break
                        bucket_id -= 1
                if bucket_id < 0 or bucket_id >= self.buffer.num_buckets:
                    # Out of bounds, return None.
                    return None
                return bucket_id

            def need_skip_prefetch(bucket_id):
                # If use double buffer, we need to check if the next bucket
                # is exceeding the coverage of the double buffer.
                if self.buffer.ddp_config.fsdp_double_buffer:
                    fsdp_unit_id = parameter_groups[bucket_id].fsdp_unit_id
                    double_buf_units.add(fsdp_unit_id)
                    if len(double_buf_units) > 2:
                        # Prefetching the next bucket will exceed the coverage of
                        # the double buffer, so we need to stop prefetching.
                        return True
                return False

            if suggested_AG_prefetch_size is None:
                # Default 500M
                suggested_AG_prefetch_size = 500_000_000

            base_all_gather_size = sum(
                [parameter_groups[i].model_weight_buffer.bucket_index.size for i in ag_buckets]
            )
            bucket_id = next_bucket_id(ag_buckets)
            while bucket_id is not None:
                prefetch_all_gather_size = (
                    sum(
                        [
                            parameter_groups[i].model_weight_buffer.bucket_index.size
                            for i in ag_buckets
                        ]
                    )
                    - base_all_gather_size
                )
                if prefetch_all_gather_size >= suggested_AG_prefetch_size:
                    # Reached the prefetch limit.
                    break

                if need_skip_prefetch(bucket_id):
                    break

                # Extend the list of all-gather buckets with another group of buckets.
                ag_buckets.extend(self.buffer.bucket_to_bucket_group[bucket_id])
                # Re-sort and find the next bucket not in the list.
                ag_buckets = list(sorted(set(ag_buckets)))
                bucket_id = next_bucket_id(ag_buckets)

        # Only all-gather on buckets that have not been allocated yet.
        ag_buckets = [
            bucket_id
            for bucket_id in ag_buckets
            if self.bucket_status[self.get_bucket_key(bucket_id, bwd)] == BucketStatus.EMPTY
        ]
        if len(ag_buckets) == 0:
            return

        # Divide buckets into aggregate groups. We need to reconstruct the bucket groups
        # because the all-gather parameter groups may be a subset of the buckets.
        bucket_group_to_buckets = {}
        for bucket_id in ag_buckets:
            group_id = self.bucket_to_bucket_group[bucket_id]
            if group_id not in bucket_group_to_buckets:
                bucket_group_to_buckets[group_id] = []
            bucket_group_to_buckets[group_id].append(bucket_id)

        # Coalesce all-gather operations for all buckets in the same data-parallel-group
        for _, buckets in bucket_group_to_buckets.items():
            all_gather_stream = (
                self.ag_stream if self.ag_stream is not None else torch.cuda.current_stream()
            )
            if outer_fsdp_group_param_gather:
                # TODO(@kunlunl): Support MXFP8 with HFSDP. Requires an HFSDP transpose buffer.
                self.outer_fsdp_group_param_gather_stream.wait_stream(torch.cuda.current_stream())
                with torch.cuda.stream(self.outer_fsdp_group_param_gather_stream):
                    outer_fsdp_group = self.buffer.dist_index.get_outer_fsdp_group()
                    with _coalescing_manager(outer_fsdp_group, async_ops=False):
                        for bucket_id in buckets:
                            # All-gather the (DP-Outer, DP-Shard) weight shards from the DP-backed
                            # main weight buffer into the (DP-Shard)-backed hybrid weight buffer.
                            wbuf = self.buffer.parameter_groups[bucket_id].model_weight_buffer
                            hsdp_wbuf = self.buffer.parameter_groups[bucket_id].hsdp_wbuf
                            torch.distributed.all_gather_into_tensor(
                                output_tensor=hsdp_wbuf.data,
                                input_tensor=wbuf.data,
                                group=outer_fsdp_group,
                            )
                # Wait for the DP-Outer group all-gather to finish.
                all_gather_stream.wait_stream(self.outer_fsdp_group_param_gather_stream)

            # Coalesce the asynchronous NCCL operations in this context.
            all_gather_stream.wait_stream(torch.cuda.current_stream())
            dp_group = self.get_fsdp_buffer(buckets[0]).data_parallel_group
            with torch.cuda.stream(all_gather_stream):
                with _coalescing_manager(
                    dp_group, async_ops=async_param_gather
                ) as coalescing_event:
                    for bucket_id in buckets:
                        # All-gather the module weights from each FSDP buffer shard
                        # into an allocated bucket containing unsharded weights.
                        self.async_bucket_gather(bucket_id, bwd)

            # Replace the parameter all-gather event with coalescing event.
            for bucket_id in buckets:
                bucket_key = self.get_bucket_key(bucket_id, bwd)
                _, mark_bucket_ready_to_use = self.param_gather_event_map[bucket_key]
                self.param_gather_event_map[bucket_key] = (
                    coalescing_event,
                    mark_bucket_ready_to_use,
                )

        # Wait for all-gather to finish
        if not async_param_gather:
            for bucket_id in buckets:
                self.wait_bucket_ready(bucket_id, bwd)

    def wait_bucket_ready(self, bucket_id, bwd, empty_ok=False):
        """Wait for the bucket to be ready."""
        bucket_key = self.get_bucket_key(bucket_id, bwd)

        if self.bucket_status[bucket_key] == BucketStatus.READY_TO_USE:
            # Already ready to use.
            return
        if self.bucket_status[bucket_key] == BucketStatus.EMPTY:
            if empty_ok:
                return
            # Bucket shouldn't be empty, this implies that the bucket
            # was not allocated or NCCL operations are not complete.
            raise ValueError(f"Bucket {bucket_id} is empty.")

        # Wait for asynchronous / overlapped NCCL operations to complete.
        param_gather_event, mark_bucket_ready_to_use = self.param_gather_event_map.pop(bucket_key)
        param_gather_event.wait()
        mark_bucket_ready_to_use()

    @torch.no_grad()
    def release_bucket(self, bucket_id, bwd, lazy: bool = False):
        """
        Release the specified parameter bucket, freeing its associated buffer storage.

        This function marks or frees the memory of a parameter bucket depending on
        whether lazy release is enabled. It ensures that buckets are not released
        while still being communicated or in use by the pipeline.

        Args:
            bucket_id (int): Identifier of the bucket to be released.
            bwd (bool): Indicates if the release is triggered during the backward pass.
            lazy (bool, optional): Determines when the parameter buffer (bucket) is released.
                - If False, the buffer is released immediately.
                - If True, the release is deferred until just before the all-gather pipeline
                requests a new buffer. The delayed release is performed by invoking
                `recycle_unused_buckets`.

        Raises:
            ValueError: If the specified bucket is currently in communication and
                cannot be safely released.

        Notes:
            - Buckets marked as lazy will be released later when the pipeline determines
            they are no longer needed.
            - If the bucket has a transpose weight buffer (used in FP8 backward passes),
            this buffer is freed; otherwise, the model weight buffer is released.
            - This function should NOT be invoked on buckets associated with modules not
            identified as FSDP unit modules, even when weights are sharded in the case of
            `optim_grads_params`. Non-unit modules should remain persistently allocated
            because they do not satisfy FSDP unit module state requirements, e.g. their
            parameters are simultaneously modified or shared with other modules.
        """
        bucket_key = self.get_bucket_key(bucket_id, bwd)
        if self.bucket_status[bucket_key] == BucketStatus.EMPTY:
            return

        if lazy:
            # Mark the bucket can be released later.
            self.bucket_can_be_released[bucket_key] = True
            return

        self.wait_bucket_ready(bucket_id, bwd, empty_ok=True)
        if self.bucket_status[bucket_key] == BucketStatus.COMMUNICATING:
            raise ValueError(f"Bucket {bucket_id} is communicating.")

        if bwd and self.buffer.parameter_groups[bucket_id].transpose_weight_buffer is not None:
            buf = self.buffer.parameter_groups[bucket_id].transpose_weight_buffer
        else:
            buf = self.buffer.parameter_groups[bucket_id].model_weight_buffer

        buf.free_bucket_storage()
        self.bucket_status[bucket_key] = BucketStatus.EMPTY

    def recycle_unused_buckets(self):
        """Recycle the unused buckets."""
        for bucket_key, can_be_released in self.bucket_can_be_released.items():
            if can_be_released:
                bucket_id, is_transpose_weight = bucket_key[0], bucket_key[1]
                self.release_bucket(bucket_id, is_transpose_weight)
                self.bucket_can_be_released[bucket_key] = False

    def get_fsdp_buffer(self, bucket_id: int, bwd=False) -> DataParallelBuffer:
        """
        Get the FSDP / DP-Shard buffer with the given bucket ID.
        If bwd=True, return the FSDP transpose buffer instead.
        """
        param_group = self.buffer.parameter_groups[bucket_id]
        if self.buffer.ddp_config.outer_dp_sharding_strategy != "no_shard":
            if bwd and param_group.transpose_weight_buffer is not None:
                raise RuntimeError("Transpose buffer is not supported for HSDP")
            else:
                return param_group.hsdp_wbuf
        if bwd and param_group.transpose_weight_buffer is not None:
            return param_group.transpose_weight_buffer
        else:
            return param_group.model_weight_buffer

    @torch.no_grad()
    def async_bucket_gather(self, bucket_id, bwd) -> None:
        """All-gather the bucket and set the items."""
        bucket_key = self.get_bucket_key(bucket_id, bwd)

        self.bucket_can_be_released[bucket_key] = False
        if self.bucket_status[bucket_key] != BucketStatus.EMPTY:
            return

        self.bucket_status[bucket_key] = BucketStatus.COMMUNICATING

        # Retrieve the buffer associated with the DP-Shard PG
        # that backs the model compute weights.
        wbuf = self.get_fsdp_buffer(bucket_id, bwd)

        # Lazy release the unused buckets.
        self.recycle_unused_buckets()

        # Allocate an empty bucket to store the module weights.
        bucket = wbuf.fetch_bucket(set_param_data=True)

        # All-gather the module weights in each buffer shard into the allocated bucket.
        # Now each rank will have a copy of this FSDP unit module's weights.
        param_gather_event = torch.distributed.all_gather_into_tensor(
            output_tensor=bucket.data,
            input_tensor=wbuf.get_shard_from_local_buffer(),
            group=wbuf.data_parallel_group,
            async_op=True,
        )

        def get_closure(bucket_id, bwd):
            @torch.no_grad()
            def mark_bucket_ready_to_use():
                # Mark the bucket as ready to use - all NCCL operations are complete.
                self.bucket_status[self.get_bucket_key(bucket_id, bwd)] = BucketStatus.READY_TO_USE

            return mark_bucket_ready_to_use

        mark_bucket_ready_to_use = get_closure(bucket_id, bwd)

        # Track the async all-gather operation for the bucket.
        self.param_gather_event_map[self.get_bucket_key(bucket_id, bwd)] = (
            param_gather_event,
            mark_bucket_ready_to_use,
        )


@torch.no_grad()
def gradient_reduce_preprocessing(grad_data, scaling_factor, ddp_config):
    """
    Gradient reduce preprocessing for gradient averaging and gradient scaling.
    """

    # TODO(@cspades): Clean up this logic in conjunction with
    # gradient reduction arguments: calculate_per_token_loss,
    # and average_in_collective.
    if scaling_factor is None:
        # No scaling - use SUM reduction.
        reduce_op = torch.distributed.ReduceOp.SUM
    elif ddp_config.average_in_collective:
        # Scaling overridden by AVG reduction.
        reduce_op = torch.distributed.ReduceOp.AVG
    elif ddp_config.gradient_reduce_div_fusion and grad_data.dtype != torch.bfloat16:
        # Fused SUM reduction.
        reduce_op = torch.distributed._make_nccl_premul_sum(scaling_factor)
    else:
        # Scale gradients with SUM reduction.
        grad_data.mul_(scaling_factor)
        reduce_op = torch.distributed.ReduceOp.SUM

    return reduce_op


def _check_nan_in_grad(grad: torch.Tensor):
    """
    Check if there are any NaN or Inf in grad.
    """
    # Compute gradient norm.
    grad_norm = torch.linalg.norm(grad)
    if torch.isnan(grad_norm) or not torch.isfinite(grad_norm):
        raise ValueError(
            f"[Megatron-FSDP](check_for_nan_in_grad=True) Detected NaN or Inf in wgrad: {grad}"
        )


def check_gpu_memory(threshold=0.9):
    """
    Check if the GPU memory is over the threshold.
    Args:
        threshold (float, optional): The threshold to check if the GPU memory is over.
            Defaults to 0.9.
    Returns:
        bool: True if the GPU memory is over the threshold.
    """
    if not torch.cuda.is_available():
        return False
    device = torch.cuda.current_device()
    allocated = torch.cuda.memory_allocated(device)
    reserved = torch.cuda.memory_reserved(device)
    total = torch.cuda.get_device_properties(device).total_memory

    allocated_ratio = allocated / total
    reserved_ratio = reserved / total

    near_full = allocated_ratio >= threshold or reserved_ratio >= threshold

    if near_full:
        log_single_rank(
            logger,
            logging.INFO,
            f"GPU Memory: Allocated: {allocated_ratio:.2%}, Reserved: {reserved_ratio:.2%}",
        )
    return near_full


class ResetParametersContext:
    """
    Context manager for resetting parameters for meta device initialization module.
    """

    def __init__(self, init_param_with_fp8=False, with_cuda_rng_tracker=False):
        self.init_param_with_fp8 = init_param_with_fp8
        self.with_cuda_rng_tracker = with_cuda_rng_tracker

    def __enter__(self):
        self.stack = ExitStack()
        if self.init_param_with_fp8:
            # FIXME(@cspades): This appears to be a legacy dependency that is not needed for
            # more recent versions of TransformerEngine, which only requires this context during
            # TransformerEngineBaseModule.__init__. Should be removed if backwards compatibility
            # is confirmed, because overwrites the quantized_model_init context specified by user.
            assert (
                HAVE_TE
            ), "TransformerEngine is required for using FP8 parameters with Megatron-FSDP."
            # Retrieve import for quantized_model_init (new) or fp8_model_init (old).
            # Will be nullcontext if TE is not installed.
            te_quantized_model_init_cls = get_quantized_model_init_context_cls()
            if te_quantized_model_init_cls is not nullcontext:
                # Enable TE quantized parameter context manager.
                args = {"enabled": True}
                if (
                    "preserve_high_precision_init_val"
                    in inspect.signature(te_quantized_model_init_cls).parameters
                ):
                    # Required for Megatron-FSDP + FP8 parameters.
                    args["preserve_high_precision_init_val"] = True
                self.stack.enter_context(te_quantized_model_init_cls(**args))

        if self.with_cuda_rng_tracker:
            # Megatron / TE RNG tracker needs to be initialized and seeded by the user or FW
            # as needed for model parallelisms that require consistent RNG across ranks.
            cuda_rng_tracker = get_cuda_rng_tracker()
            if _MODEL_PARALLEL_RNG_TRACKER_NAME in cuda_rng_tracker.states_:
                # Only fork the RNG tracker if the _MODEL_PARALLEL_RNG_TRACKER_NAME seed is added.
                self.stack.enter_context(get_cuda_rng_tracker().fork())

        return self

    def __exit__(self, *exc_details):
        self.stack.__exit__(*exc_details)


def override_sharded_param_methods_with_safety_checks(params, all_gather_pipeline):
    """
    Override the methods of the parameters to prevent undefined behavior.
    Args:
        params (List[torch.Tensor]): The parameters to add hint on shard to functions.
        all_gather_pipeline (AllGatherPipeline): The all-gather pipeline.
    """
    for p in params:
        to_function = p.to
        cpu_function = p.cpu

        def override_sharded_param_to_function_closure(p, to_function):
            def override_sharded_param_to_function(*args, **kwargs):
                if p._typed_storage()._size() == 0:
                    warnings.warn(
                        "The parameter may be sharded by Megatron-FSDP, "
                        "no actual 'to' operation is performed."
                    )
                    return torch.empty([])
                return to_function(*args, **kwargs)

            return override_sharded_param_to_function

        setattr(p, "to", override_sharded_param_to_function_closure(p, to_function))

        def override_sharded_param_cpu_function_closure(p, cpu_function):
            def override_sharded_param_cpu_function(*args, **kwargs):
                if p._typed_storage()._size() == 0:
                    warnings.warn(
                        "The parameter may be sharded by Megatron-FSDP, "
                        "no actual 'cpu' operation is performed."
                    )
                    return torch.empty([], device="cpu")
                return cpu_function(*args, **kwargs)

            return override_sharded_param_cpu_function

        setattr(p, "cpu", override_sharded_param_cpu_function_closure(p, cpu_function))


def _dtype_size(dtype: torch.dtype) -> int:
    """
    Get the size of the dtype. Note that many data-types un-common to ML
    or not supported by NCCL communication (e.g. CFloat) are listed here
    for mixed-precision coverage and to avoid allocating a dummy Tensor.

    Args:
        dtype (torch.dtype): The dtype to get the size of.
    Returns:
        int: The size of the dtype.
    """
    if dtype == torch.float16 or dtype == torch.bfloat16 or dtype == torch.int16:
        return 2
    elif dtype == torch.float32 or dtype == torch.int32 or torch.complex32:
        return 4
    elif dtype == torch.float64 or dtype == torch.int64 or torch.complex64:
        return 8
    elif dtype == torch.uint8 or dtype == torch.int8:
        return 1
    elif dtype == "float8":
        return 1
    else:
        try:
            # Allocate an empty Tensor on-the-fly to check the size.
            # Non-ideal fall-back option before sizing the new dtype.
            # Why does torch.dtype not support this without alloc?
            return torch.empty((), dtype=dtype).element_size()
        except:
            raise ValueError(f"Unsupported dtype: {dtype}")


def to_local_if_dtensor(tensor):
    """
    Convert a DTensor to a local tensor.
    Args:
        tensor (torch.Tensor): The tensor to convert.
    Returns:
        torch.Tensor: The local tensor.
    """
    if isinstance(tensor, DTensor):
        return tensor._local_tensor
    return tensor


def _get_fsdp_tensor_spec(
    param, dist_index: FSDPDistributedIndex, is_sharded_param, is_expert_param
):
    """
    Get the DeviceMesh for the parameter and modify the placement for Megatron-FSDP.
    """
    # Check if the parameter is a DTensor and has more than one shard (TP enabled).
    if isinstance(param, DTensor) and cast(DTensor, param)._spec.num_shards > 1:
        # Retrieve original DTensorSpec (for TP).
        dtensor_spec = cast(DTensor, param)._spec

        # Get the placements for the parameter.
        assert len(dtensor_spec.placements) == 1, (
            "When using DTensor with Megatron-FSDP, the DTensorSpec should have only one placement."
            f"Current placements: {dtensor_spec.placements}"
        )
        dtensor_placement = dtensor_spec.placements[0]

        if dist_index.use_hybrid_fsdp:
            mesh_dim_names = (dist_index.dp_outer_dim, dist_index.dp_shard_dim, dist_index.tp_dim)
        else:
            mesh_dim_names = (dist_index.dp_shard_dim, dist_index.tp_dim)

        # Introducing shard placement order to solve the situation where more than two shard
        # appears in the same tensor dimension.
        # https://dev-discuss.pytorch.org/t/dtensor-status-design-and-looking-forward/2749
        shard_order = None
        if not is_sharded_param:
            if dist_index.use_hybrid_fsdp:
                placements = [Replicate(), Replicate(), dtensor_placement]
            else:
                placements = [Replicate(), dtensor_placement]
        elif dist_index.use_hybrid_fsdp:
            if dist_index.hsdp_outer_dp_shard:
                # If the parameter is sharded in hybrid FSDP, we need to add the HS-DP dimension.
                placements = [Shard(0), Shard(0), dtensor_placement]
                shard_order = [2, 1, 0]
            else:
                placements = [Replicate(), Shard(0), dtensor_placement]
                shard_order = [2, 1, 0]
        else:
            placements = [Shard(0), dtensor_placement]
            shard_order = [1, 0]

        device_mesh = dist_index.get_submesh(mesh_dim_names, is_expert_parallel=is_expert_param)
        if shard_order is not None:
            setattr(device_mesh, "_shard_order", shard_order)

        return device_mesh, placements

    shard_order = None

    if dist_index.use_hybrid_fsdp:
        mesh_dim_names = (dist_index.dp_outer_dim, dist_index.dp_shard_dim)
    else:
        mesh_dim_names = (dist_index.dp_shard_dim,)

    if not is_sharded_param:
        placements = [Replicate()]
    elif dist_index.use_hybrid_fsdp:
        # If the parameter is sharded in hybrid FSDP, we need to add the HS-DP dimension.
        if dist_index.hsdp_outer_dp_shard:
            placements = [Shard(0), Shard(0)]
            shard_order = [1, 0]
        else:
            placements = [Replicate(), Shard(0)]
    else:
        placements = [Shard(0)]

    device_mesh = dist_index.get_submesh(mesh_dim_names, is_expert_parallel=is_expert_param)
    if shard_order is not None:
        setattr(device_mesh, "_shard_order", shard_order)

    return device_mesh, placements


def make_fsdp_dtensor(
    local_tensor: torch.Tensor,
    param: torch.nn.Parameter,
    dist_index: FSDPDistributedIndex,
    is_sharded_param: bool = True,
    is_expert_param: bool = False,
    run_check: bool = False,
    update_uneven_dtensor_chunk_meta: bool = False,
    force_sync_tp_duplicated_param: bool = False,
):
    """
    Creates a distributed tensor (DTensor) from a local tensor with support for
    Megatron-FSDP and Tensor Parallel scenarios.

    This function is typically used in a FSDP setup where tensor data needs to be converted
    into sharded DTensors across a device mesh. It also supports model configurations
    involving tensor model parallelism such as Megatron-Core.

    Args:
        local_tensor (torch.Tensor): The local tensor data to be converted to a DTensor.
        param (nn.Parameter): Template parameter used to infer shape, stride,
            and partition attributes.
        dist_index (FSDPDistributedIndex): Metadata object providing the distributed device mesh.
        is_sharded_param (bool, optional): Whether the parameter is sharded across
            devices. Defaults to True.
        is_expert_param (bool, optional): Indicates if the tensor corresponds to
            Megatron-Core expert (Mixture-of-Experts) parameters. Defaults to False.
        run_check (bool, optional): Enables additional internal validation for
            DTensor. Defaults to False.
        update_uneven_dtensor_chunk_meta (bool, optional): Whether to update metadata
            for uneven chunk distributions. Defaults to False.

    Returns:
        DTensor: A DTensor object sharded appropriately across devices.

    Example:
        >>> import torch
        >>> from torch.distributed.device_mesh import init_device_mesh
        >>> from torch.distributed._tensor import DeviceMesh
        >>> from my_fsdp_utils import FSDPDistributedIndex  # assumed utility
        >>>
        >>> # Initialize device mesh (4 GPUs)
        >>> device_mesh = DeviceMesh("cuda", (2, 2), dim_names=("tp", "dp"))
        >>> dist_index = FSDPDistributedIndex(
        ...     device_mesh=device_mesh,
        ...     dp_mesh_dim_name="dp",
        ...     tp_mesh_dim_name="tp"
        ... )
        >>>
        >>> # Dummy local tensor and parameter
        >>> local_tensor = torch.randn(8, 16, device="cuda")
        >>> param = torch.nn.Parameter(torch.empty(32, 32))
        >>>
        >>> # Attach partition metadata for tensor model parallelism
        >>> param.tensor_model_parallel = True
        >>> param.partition_dim = 0
        >>> param.partition_stride = 1
        >>>
        >>> # Convert to DTensor
        >>> dtensor = make_fsdp_dtensor(
        ...     local_tensor=local_tensor,
        ...     param=param,
        ...     dist_index=dist_index,
        ...     is_sharded_param=True,
        ...     run_check=True
        ... )
        >>> print(dtensor)
        DTensor(sharded(...))

    Note:
        - For tensor model parallel use cases, the `param` object must either:
            * Be a tensor-parallel (TP) DTensor, or
            * Include all of these attributes: `tensor_model_parallel`, `partition_dim`,
                and `partition_stride`.
    """
    # TODO: Add validation checks for the legality of DTensor.
    if not is_sharded_param and param.numel() != local_tensor.numel():
        raise ValueError(
            f"[Megatron-FSDP] Mismatch between param shape {param.shape} and local tensor "
            f"shape {local_tensor.shape}. "
            "If the parameter is not sharded, they must match exactly."
        )

    # Save original parameter for later use
    orig_param = param

    # Handle tensor model parallel specific logic
    if is_mcore_tensor_model_parallel(param):
        # Ensure parameter is not already a DTensor
        assert not isinstance(param, DTensor), (
            "[Megatron-FSDP] Parameter is already a DTensor, yet tensor_model_parallel " "is True."
        )

        tp_mesh = dist_index.get_submesh(dist_index.tp_dim, is_expert_parallel=is_expert_param)
        global_shape = list(param.shape)
        if tp_mesh.mesh.numel() > 1:
            if is_mcore_tensor_parallel_duplicated(param):
                placements = [Replicate()]
                if force_sync_tp_duplicated_param:
                    if local_tensor.numel() > 0:
                        torch.distributed.broadcast(
                            local_tensor, group=tp_mesh.get_group(), group_src=0
                        )
                elif run_check:
                    # TODO: Implement consistency check for duplicated TP parameters
                    pass
            else:
                tp_dim = get_mcore_tensor_parallel_partition_dim(param)
                assert tp_dim is not None, (
                    "[Megatron-FSDP] Parameter is not tensor model parallel, "
                    "yet tensor_model_parallel is True."
                )
                placements = [Shard(tp_dim)]
                global_shape[tp_dim] *= tp_mesh.mesh.numel()

            # Construct TP-sharded DTensor using Megatron-style placement
            param = DTensor.from_local(
                local_tensor=local_tensor,
                device_mesh=tp_mesh,
                placements=placements,
                run_check=run_check,
                shape=tuple(global_shape),
                stride=torch.empty(global_shape).stride(),
            )

    # Get FSDP-configured mesh and placements from provided param
    device_mesh, placements = _get_fsdp_tensor_spec(
        param, dist_index, is_sharded_param=is_sharded_param, is_expert_param=is_expert_param
    )

    # Reshape local tensor for sharded layouts beyond 1D
    if len(orig_param.shape) > 1:
        local_shape = (-1, *orig_param.shape[1:])
    else:
        local_shape = (-1,)

    # Create the FSDP-compliant DTensor
    fsdp_tensor = DTensor.from_local(
        local_tensor=local_tensor.view(local_shape),
        device_mesh=device_mesh,
        placements=placements,
        run_check=False,
        shape=param.shape,
        stride=param.stride(),
    )

    if run_check:
        validate_uneven_dtensor(fsdp_tensor)

    # Update metadata if uneven sharding is expected
    if update_uneven_dtensor_chunk_meta:
        update_uneven_dtensor_chunk_metadata(fsdp_tensor)

    return fsdp_tensor


================================================
FILE: megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Iterable, List, Optional, Union

import torch
import torch.distributed as dist
from torch.distributed._tensor import DTensor
from torch.distributed.checkpoint.metadata import (
    ChunkStorageMetadata,
    MetadataIndex,
    TensorProperties,
)
from torch.distributed.checkpoint.planner import TensorWriteData, WriteItem, WriteItemType
from torch.distributed.tensor.placement_types import Replicate, Shard, _StridedShard

from .utils import get_mesh_names


def gather_and_compute_chunk_metadata(dtensor: DTensor) -> ChunkStorageMetadata:
    """
    Gather chunk metadata for a DTensor across all ranks and compute the
    offsets and sizes of each chunk. This is necessary for handling uneven
    sharding in distributed tensors.
    """
    local_tensor = dtensor.to_local()
    local_shape = local_tensor.shape
    device_mesh = dtensor.device_mesh

    offsets = [0] * len(local_shape)
    cumulative_shape = list(local_shape).copy()

    def _update_offsets_and_cumulative_shape(
        mesh_dim: int, offsets: List[int], cumulative_shape: List[int]
    ):
        shard_group = device_mesh.get_group(mesh_dim)
        shard_dim = p.dim

        # Synchronize local shard dimensions across ranks
        world_size = dist.get_world_size(shard_group)
        global_shapes = [None] * world_size
        dist.all_gather_object(global_shapes, cumulative_shape, group=shard_group)

        # Calculate global offset for current rank's shard
        rank = dist.get_rank(shard_group)
        offset = sum(s[shard_dim] for s in global_shapes[:rank])
        # TODO: add documentation for the offset calculation
        # Add on the offset of the current mesh dimension
        offsets[shard_dim] += offset
        # Calculate the global shape using the sum of the sharding dim sizes.
        cumulative_shape[shard_dim] = sum(s[shard_dim] for s in global_shapes)

    # Get the shard placements order.
    shard_order = getattr(device_mesh, "_shard_order", None)
    if shard_order is None:
        shard_order = []
        reversed_shard_order = []
        mesh_dims = list(range(len(dtensor.placements)))
        strided_shard_count = 0
        for mesh_dim, p in enumerate(dtensor.placements):
            if isinstance(p, _StridedShard):
                reversed_shard_order.append(mesh_dim)
                mesh_dims.remove(mesh_dim)
                strided_shard_count += 1
        if strided_shard_count > 1:
            raise ValueError(
                f"DTensor has multiple strided shards ({strided_shard_count}), "
                "which is not supported."
            )
        reversed_shard_order += mesh_dims
        shard_order = list(reversed(reversed_shard_order))

    for mesh_dim in reversed(shard_order):
        p = dtensor.placements[mesh_dim]
        if isinstance(p, (Shard, _StridedShard)):
            _update_offsets_and_cumulative_shape(mesh_dim, offsets, cumulative_shape)
        elif isinstance(p, Replicate):
            # If we have a replicate placement, we do not need to update offsets
            # or cumulative shape, as it does not affect the chunk metadata.
            continue
        else:
            raise ValueError(f"Unsupported placement type {type(p)} in DTensor: {dtensor}")

    return ChunkStorageMetadata(offsets=tuple(offsets), sizes=tuple(local_shape))


def update_uneven_dtensor_chunk_metadata(dtensor: DTensor) -> dict:
    """
    Update the DTensor's chunk metadata to handle uneven sharding.
    This function modifies the DTensor in-place to include chunk metadata
    and write items closures for saving and loading.
    """

    def _chunk_list_closure(chunk_meta):
        return lambda: chunk_meta

    def _write_items_closure(uneven_chunk_meta):
        def _write_items(fqn: str, tensor: DTensor) -> List[WriteItem]:
            if tensor.to_local().numel() == 0:
                # If the tensor is empty, return an empty list
                return []

            return [
                WriteItem(
                    type=WriteItemType.SHARD,
                    index=MetadataIndex(fqn, uneven_chunk_meta.offsets),
                    tensor_data=TensorWriteData(
                        chunk=uneven_chunk_meta,
                        properties=TensorProperties.create_from_tensor(tensor.to_local()),
                        size=tensor.size(),
                    ),
                )
            ]

        return _write_items

    # Get uneven chunk metadata for the DTensor
    # TODO: Optimize gather_and_compute_chunk_metadata synchronization:
    # 1. Add pre-check validation to verify tensor shape consistency
    #    across devices before entering barrier (prevents potential hangs)
    # 2. Implement batched barrier using grouped collectives
    #    to amortize synchronization overhead
    uneven_chunk_meta = gather_and_compute_chunk_metadata(dtensor)

    # Set the chunk list and write items closure for the DTensor
    dtensor._local_tensor.__create_chunk_list__ = _chunk_list_closure([uneven_chunk_meta])
    dtensor._local_tensor.__create_write_items__ = _write_items_closure(uneven_chunk_meta)


def validate_uneven_dtensor(dtensor: DTensor) -> None:
    """
    Validates the chunk metadata of an uneven DTensor to ensure correctness and boundary coverage.

    Notes:
    - `gather_and_compute_chunk_metadata` will ensure that all chunks do not overlap.

    This function performs the following checks:
      - All chunk offsets and sizes are within the tensor shape bounds.
      - All boundaries of each dimension are actually covered by shard placements.

    Args:
        dtensor (DTensor): The distributed tensor to validate.

    Raises:
        AssertionError: If any chunk falls out of bounds or not all boundaries are touched.
    """

    # gather_and_compute_chunk_metadata will ensure that all chunks do not overlap.
    chunk_meta = gather_and_compute_chunk_metadata(dtensor)

    # Validate that each chunk's metadata is within bounds.
    assert all(
        [
            0 <= offset and offset + size <= dtensor.shape[dim]
            for (dim, (offset, size)) in enumerate(zip(chunk_meta.offsets, chunk_meta.sizes))
        ]
    ), (
        "[Megatron-FSDP] DTensor chunk metadata is invalid. "
        f"Offsets: {chunk_meta.offsets}, "
        f"Sizes: {chunk_meta.sizes}, "
        f"Global shape: {dtensor.shape}, "
        f"Local shape: {dtensor.to_local().shape}, "
        f"Device mesh: {dtensor.device_mesh}."
    )

    # Check that all boundaries (start and end) are touched.
    # Skip under fake process group — all_reduce is a no-op so only rank 0's
    # boundaries are visible, which makes the end-boundary check always fail.
    if torch.distributed.is_initialized() and torch.distributed.get_backend() == 'fake':
        return

    boundary_checks = torch.tensor(
        [
            [offset == 0, offset + size == dtensor.shape[dim]]
            for (dim, (offset, size)) in enumerate(zip(chunk_meta.offsets, chunk_meta.sizes))
        ],
        dtype=torch.int,
    ).cuda()

    for i, p in enumerate(dtensor.placements):
        if isinstance(p, Shard) or isinstance(p, _StridedShard):
            torch.distributed.all_reduce(
                boundary_checks,
                op=torch.distributed.ReduceOp.MAX,
                group=dtensor.device_mesh.get_group(i),
            )
    assert torch.all(boundary_checks), (
        "[Megatron-FSDP] DTensor chunk metadata boundary check failed. "
        f"Offsets: {chunk_meta.offsets}, "
        f"Sizes: {chunk_meta.sizes}, "
        f"Global shape: {dtensor.shape}, "
        f"Local shape: {dtensor.to_local().shape}, "
        f"Device mesh: {dtensor.device_mesh}."
    )


def filter_unflattened_state_dict(state_dict, key_chain=[], visit_condition=lambda x: False):
    """
    Recursively traverses an unflattened state_dict and collects keys
    of items that meet the visit_condition. The keys are returned as lists
    of strings representing the path to each item in the state_dict.
    """
    visit_items = []
    for key, value in state_dict.items():
        if isinstance(value, dict):
            # Recurse into nested dictionaries
            visit_items += filter_unflattened_state_dict(
                value, key_chain=key_chain + [key], visit_condition=visit_condition
            )
        elif visit_condition(value):
            # If the value meets the visit condition, process it
            visit_items.append(key_chain + [key])
    return visit_items


def get_unflattened_state_dict(state_dict, key_chain=[]):
    """Get a value from an unflattened state_dict at the specified key chain."""
    current = state_dict
    for key in key_chain:
        if isinstance(current, dict) and key in current:
            # Navigate through the nested dictionary
            current = current[key]
        else:
            raise KeyError(f"Key {key_chain} not found in state_dict")

    return current


def preprocess_state_dict_for_uneven_dtensor(state_dict: dict) -> dict:
    """
    Preprocess the state_dict to prepare it for saving or loading unevenly sharded DTensors.
    This function modifies the DTensors in the state_dict to include chunk metadata
    and write items closures.
    """
    visit_dtensor = filter_unflattened_state_dict(
        state_dict, visit_condition=lambda x: isinstance(x, DTensor)
    )
    # Sort the keys, since some state dictionaries are mocked
    # and extended to include empty global keys.
    for key_chain in sorted(visit_dtensor):
        # Get the DTensor at the key chain
        dtensor = get_unflattened_state_dict(state_dict, key_chain)
        update_uneven_dtensor_chunk_metadata(dtensor)
    return state_dict


def gather_uneven_dtensor_to_full_tensor(
    dtensor: DTensor, target_device: Optional[torch.device] = None
) -> DTensor:
    """
    Gather an unevenly sharded DTensor distributed across multiple ranks,
    reconstructing the full (unsharded) tensor on each rank.

    This function handles uneven chunk sizes and offsets by collecting
    chunk metadata from all ranks, performing all-gather operations,
    and assembling the full tensor accordingly. The returned tensor
    is fully replicated across the given device mesh.

    Args:
        dtensor (DTensor): Distributed tensor with uneven sharding across ranks.
        target_device (Optional[torch.device]): If specified, move the resulting
            full tensor to this device. Otherwise, use the original device.

    Returns:
        DTensor: Fully replicated DTensor representing the reconstructed full tensor.
    """
    if not isinstance(dtensor, DTensor):
        raise TypeError("Input must be a DTensor.")

    device_mesh = dtensor.device_mesh
    if not device_mesh.mesh_dim_names:
        process_group = device_mesh.get_group()
    else:
        # Check if the fully-flattened mesh exists first.
        full_flattened_mesh_dim_name = "_".join(device_mesh.mesh_dim_names)
        if full_flattened_mesh_dim_name in get_mesh_names(device_mesh):
            # Retrieve the existing flattened DeviceMesh ProcessGroup.
            try:
                # Two Cases: Name is a root dimension, or using the old DeviceMesh
                # API which allows us to get flattened dimensions.
                process_group = device_mesh[full_flattened_mesh_dim_name].get_group()
            except:
                # Name is a flattened dimension that cannot be retrieved from the
                # DeviceMesh.__getitem__, so fall-back to new DeviceMesh API.
                process_group = (
                    device_mesh._get_root_mesh()
                    ._flatten_mapping[full_flattened_mesh_dim_name]
                    .get_group()
                )
        else:
            # Create the _-separated flattened DeviceMesh ProcessGroup.
            process_group = device_mesh._flatten().get_group()

    # Collect chunk metadata for uneven shards (update if missing)
    if not hasattr(dtensor._local_tensor, "__create_chunk_list__"):
        update_uneven_dtensor_chunk_metadata(dtensor)

    chunk_metadata_list = dtensor.__create_chunk_list__()
    if len(chunk_metadata_list) != 1:
        raise ValueError(f"Expected exactly one chunk metadata, got {len(chunk_metadata_list)}.")

    local_chunk_metadata = chunk_metadata_list[0]
    world_size = process_group.size()

    # Prepare local chunk info dictionary
    local_chunk_info = {
        "shape": list(dtensor.to_local().shape),
        "offset": getattr(local_chunk_metadata, "offsets", [0] * len(dtensor.shape)),
        "rank": process_group.rank(),
    }

    # Gather chunk info from all ranks
    all_chunk_info = [None] * world_size
    dist.all_gather_object(all_chunk_info, local_chunk_info, group=process_group)

    # Delegate to helper function
    return _assemble_full_tensor_from_uneven_chunks(
        dtensor, all_chunk_info, process_group, target_device
    )


def _assemble_full_tensor_from_uneven_chunks(
    dtensor: DTensor,
    all_chunk_info: List[dict],
    process_group: torch.distributed.ProcessGroup,
    target_device: Optional[torch.device],
) -> DTensor:
    """
    Assemble the full tensor from unevenly sized chunks gathered from all ranks.

    Args:
        dtensor (DTensor): The original distributed tensor.
        all_chunk_info (List[Dict]): List of shard info dicts from all ranks,
            including shapes and offsets.
        process_group: Process group for collective communication.
        target_device: Optional device to move the final full tensor onto.

    Returns:
        DTensor: Fully replicated tensor constructed by placing chunks at
        the appropriate offsets.
    """
    local_tensor = dtensor.to_local()

    # Check if the DTensor has any shard placements
    have_shard_placement = any(
        isinstance(placement, Shard) or isinstance(placement, _StridedShard)
        for placement in dtensor.placements
    )

    if not have_shard_placement:
        # No sharding (replicated tensor), just clone and move if needed
        full_tensor = local_tensor.clone()
        if target_device:
            full_tensor = full_tensor.to(target_device)
    else:
        # Prepare empty buffers to receive tensors from each rank
        gathered_tensors = [
            torch.empty(rank_info["shape"], dtype=local_tensor.dtype, device=local_tensor.device)
            for rank_info in all_chunk_info
        ]

        # Gather local tensors from all ranks
        dist.all_gather(gathered_tensors, local_tensor, group=process_group)

        # Allocate full tensor buffer
        full_tensor = torch.empty(
            dtensor.shape, dtype=local_tensor.dtype, device=local_tensor.device
        )

        # Copy each gathered shard into the full tensor at its offset
        for rank_info, local_shard in zip(all_chunk_info, gathered_tensors):
            offset = rank_info["offset"]
            slices = tuple(slice(o, o + s) for o, s in zip(offset, local_shard.shape))
            full_tensor[slices] = local_shard

        # Optionally move to target device
        if target_device is not None:
            full_tensor = full_tensor.to(target_device)

        # Free memory of gathered shards as they are copied
        del gathered_tensors

    # Wrap into a replicated DTensor and return
    return DTensor.from_local(
        full_tensor,
        placements=[Replicate()] * len(dtensor.placements),
        device_mesh=dtensor.device_mesh,
    )


def _intersection(s1, s2):
    # Only works for step=1
    start = max(s1.start, s2.start)
    stop = min(s1.stop, s2.stop)
    if start >= stop:
        return slice(0, 0)  # Empty slice if no intersection
    return slice(start, stop)


def _offset_slice(s, offset):
    return slice(s.start + offset, s.stop + offset)


def split_dtensor(
    dtensor: DTensor,
    split_size_or_sections: Union[int, List[int]],
    dim: int = 0,
    update_uneven_dtensor_chunk_meta: bool = False,
) -> Iterable[DTensor]:
    """
    Splits a DTensor into smaller DTensors along a specified dimension.

    This function manages uneven sharding by accurately assigning chunk metadata
    for each split. Unlike the native PyTorch DTensor split functionality,
    it does not redistribute `Replicate` placements, which helps avoid Out-Of-Memory (OOM) issues.

    Args:
        dtensor (DTensor): The DTensor to split.
        split_size_or_sections (int or list of int): If int, defines the size of each chunk.
            If a list, specifies the sizes of each chunk in order.
        dim (int, optional): The axis along which to split. Default is 0.
        update_uneven_dtensor_chunk_meta (bool, optional): Whether to update chunk
            metadata for each resulting DTensor. Default is False.

    Yields:
        DTensor: Sub-DTensor resulting from the split, maintaining correct metadata.

    Example:
        >>> for chunk in split_dtensor(dt, 3, dim=1):
        ...     print(chunk)
    """
    tensor_size = dtensor.shape[dim]

    # Calculate boundary indices for each split
    if isinstance(split_size_or_sections, int):
        split_points = list(range(0, tensor_size, split_size_or_sections))
        split_points.append(tensor_size)
    else:
        split_points = [0]
        for size in split_size_or_sections:
            split_points.append(split_points[-1] + size)

    chunk_meta = gather_and_compute_chunk_metadata(dtensor)
    chunk_slice = slice(chunk_meta.offsets[dim], chunk_meta.offsets[dim] + chunk_meta.sizes[dim])
    local_offset = chunk_meta.offsets[dim]
    local_tensor = dtensor.to_local()

    # Create chunks using manual slicing
    for i in range(len(split_points) - 1):
        split_slice = slice(split_points[i], split_points[i + 1])
        s = _intersection(split_slice, chunk_slice)
        if s.start < s.stop:
            s = _offset_slice(s, -local_offset)

        if s.start < 0 or s.stop < s.start and torch.distributed.get_rank() == 0:
            raise ValueError(
                f"Invalid split slice {s} for DTensor with shape {dtensor.shape} "
                f"and local offset {local_offset} on dimension {dim}."
            )

        # Slice the local tensor
        sliced_tensor = local_tensor.narrow(dim, s.start, s.stop - s.start)
        out_shape = list(dtensor.shape)
        out_shape[dim] = split_slice.stop - split_slice.start

        new_dtensor = DTensor.from_local(
            sliced_tensor,
            shape=out_shape,
            stride=sliced_tensor.stride(),
            placements=dtensor.placements,
            device_mesh=dtensor.device_mesh,
        )

        if update_uneven_dtensor_chunk_meta:
            update_uneven_dtensor_chunk_metadata(new_dtensor)

        yield new_dtensor


================================================
FILE: megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import contextlib
import inspect
import logging
import operator
from contextlib import nullcontext
from functools import reduce
from importlib.metadata import version
from typing import Callable, Optional, Sequence, Union

try:
    import megatron.core.parallel_state as parallel_state

    HAVE_MEGATRON_CORE = True
except (ImportError, ModuleNotFoundError):
    HAVE_MEGATRON_CORE = False

try:
    import einops

    HAVE_EINOPS = True
except ImportError:
    HAVE_EINOPS = False

import torch
from packaging.version import Version as PkgVersion
from torch import _C
from torch.cuda import _lazy_call, _lazy_init
from torch.cuda import device as device_ctx_manager
from torch.distributed import DeviceMesh, ProcessGroup

logger = logging.getLogger(__name__)

try:
    import transformer_engine  # pylint: disable=W0611

    HAVE_TE = True
except (ImportError, ModuleNotFoundError):
    # Transformer Engine not found
    HAVE_TE = False


_MODEL_PARALLEL_RNG_TRACKER_NAME = "model-parallel-rng"


def get_te_version():
    """Get TE version from __version__; if not available use pip's. Use caching."""
    if not HAVE_TE:
        # No TE installed, so return None.
        return None

    def get_te_version_str():
        import transformer_engine as te

        if hasattr(te, "__version__"):
            return str(te.__version__)
        else:
            return version("transformer-engine")

    return PkgVersion(get_te_version_str())


def is_te_min_version(vers, check_equality=True):
    """Check if minimum version of `transformer-engine` is installed."""
    te_version = get_te_version()
    if not isinstance(te_version, PkgVersion):
        # No TE installed, so cannot satisfy any version requirement.
        return False

    if check_equality:
        return te_version >= PkgVersion(vers)
    return te_version > PkgVersion(vers)


def is_submodule(module, parent_module, strict=True):
    """
    Check if a module is a submodule of another module.
    """
    if strict:
        if module is parent_module:
            return False
    for m in parent_module.modules():
        if m is module:
            return True
    return False


def get_mesh_names(
    device_mesh: Optional[DeviceMesh] = None, only_submesh_dims: bool = False
) -> list[str]:
    """
    Get all the sub-mesh ("dp", "cp", etc.) and flattened-mesh ("dp_cp", etc.) names
    in the DeviceMesh. When only_submesh_dims=True, only checks for sub-mesh dimensions.
    """
    if device_mesh is None:
        # Device mesh does not exist.
        return []

    # Sub-mesh dimension names.
    submesh_dim_names = (
        list(device_mesh.mesh_dim_names) if device_mesh.mesh_dim_names is not None else []
    )

    # Flattened mesh dimension names.
    try:
        # Retrieve all flattened meshes associated with DeviceMesh.
        # The flattened DeviceMesh are all located in the _flatten_mapping
        # dictionary of the root DeviceMesh.
        flatten_mesh_names = [
            flat_dim
            for flat_dim, flat_mesh in device_mesh._get_root_mesh()._flatten_mapping.items()
        ]
    except AttributeError:
        # Fallback to the DeviceMesh global state to retrieve flattened
        # meshes associated with the DeviceMesh.
        from torch.distributed.device_mesh import _mesh_resources

        flatten_mesh_names = [
            child_mesh_dim_name
            for child_mesh, root_mesh in _mesh_resources.child_to_root_mapping.items()
            for child_mesh_dim_name in (child_mesh.mesh_dim_names or [])
            if root_mesh == device_mesh and child_mesh_dim_name not in submesh_dim_names
        ]

    # Order of the returned list of mesh dimension names must match the index
    # of the root mesh dimension names followed by flattened sub-meshes:
    # [<root mesh dimension names>, <flattened mesh dimension names>]
    if only_submesh_dims:
        return submesh_dim_names
    else:
        return submesh_dim_names + flatten_mesh_names


def contains_submesh(
    device_mesh: Optional[DeviceMesh], submesh_names: Optional[str | Sequence[str]]
) -> bool:
    """
    Check if a sub-mesh exists in the device mesh by name.
    """
    if device_mesh is None or submesh_names is None:
        # Device mesh does not exist.
        return False
    if isinstance(submesh_names, str):
        submesh_names = (submesh_names,)
    device_mesh_names = get_mesh_names(device_mesh)
    return all(submesh_name in device_mesh_names for submesh_name in submesh_names)


def _get_cuda_rng_state(
    device: Union[int, str, torch.device] = "cuda", clone: bool = False, graph_safe: bool = False
) -> torch.Tensor:
    """Return the random number generator state of the specified GPU.

    Arguments:
        device (int): The gpu to retrieve the rng state
        clone (bool): Whether to also clone the retrieved RNG state
        graph_safe (bool): Get the rng state in a graph safe manner.

    This function is adapted from torch.cuda.random.get_rng_state()"""

    # if not using cuda graphs, just use the builtin pytorch function
    if not graph_safe:
        return torch.cuda.random.get_rng_state(device=device)

    _lazy_init()
    if isinstance(device, str):
        device = torch.device(device)
    elif isinstance(device, int):
        device = torch.device("cuda", device)
    idx = device.index
    if idx is None:
        idx = torch.cuda.current_device()

    default_generator = torch.cuda.default_generators[idx]
    if clone:
        return default_generator.clone_state()
    return default_generator.graphsafe_get_state()


def _set_cuda_rng_state(new_state: torch.Tensor, device: int = -1, graph_safe: bool = False):
    """Sets the random number generator state of the current GPU.

    Arguments:
        new_state (torch.ByteTensor): The desired state
        device (int): The gpu to retrieve the rng state
        graph_safe (bool): Set the rng state in a graph safe manner.

    This function is adapted from PyTorch repo (torch.cuda.set_rng_state)
    with a single change: the input state is not cloned. Cloning caused
    major performance issues for +4 GPU cases.
    """
    if hasattr(_C, "_cuda_setRNGState") and callable(_C._cuda_setRNGState):
        # older PyTorch
        def cb():
            with device_ctx_manager(device):
                _C._cuda_setRNGState(new_state)

    else:
        # newer PyTorch
        if device == -1:
            device = torch.device("cuda")
        elif isinstance(device, str):
            device = torch.device(device)
        elif isinstance(device, int):
            device = torch.device("cuda", device)

        def cb():
            idx = device.index
            if idx is None:
                idx = torch.cuda.current_device()
            default_generator = torch.cuda.default_generators[idx]

            # if graph capturing, set the rng state in a cudagraphable way
            if graph_safe:
                default_generator.graphsafe_set_state(new_state)
            else:
                default_generator.set_state(new_state)

    _lazy_call(cb)


def initialize_rng_tracker(
    use_te_rng_tracker: bool = False,
    inference_rng_tracker: bool = False,
    use_cudagraphable_rng: bool = False,
    force_reset: bool = False,
):
    """Create the RNG tracker. 'use_te_rng_tracker' determines whether to use
    Megatron or TransformerEngine's implementation.
    In particular, TransformerEngine's implementation is cudagraphable and supports FP8.
    """
    global _CUDA_RNG_STATE_TRACKER
    global _CUDA_RNG_STATE_TRACKER_INITIALIZED
    if force_reset:
        _CUDA_RNG_STATE_TRACKER = None
        _CUDA_RNG_STATE_TRACKER_INITIALIZED = False

    if "_CUDA_RNG_STATE_TRACKER_INITIALIZED" in globals() and _CUDA_RNG_STATE_TRACKER_INITIALIZED:
        return

    # Get the base tracker class
    base_tracker = None
    if HAVE_TE and use_te_rng_tracker:
        if not is_te_min_version("1.5.0"):
            raise RuntimeError("use_te_rng_tracker requires TransformerEngine version >= 1.5")

        class TECudaRNGStatesTracker(transformer_engine.pytorch.distributed.CudaRNGStatesTracker):
            """Wraps TransformerEngine's CudaRNGStatesTracker so that it is
            interchangeable with Megatron's RNG tracker"""

            def __init__(self, is_inference_rng_tracker=False):
                super().__init__()
                self.reset()
                self.is_inference_rng_tracker = is_inference_rng_tracker

            def is_initialized(self):
                """Checks if the internal RNG state has been set with set_states()."""
                return self._is_initialized

            def reset(self):
                """Reset the internal RNG state."""
                super().reset()
                self._is_initialized = False

            def set_states(self, states):
                """Set the internal RNG state."""
                super().set_states(states)
                self._is_initialized = True

            def add(self, name, seed):
                """Track the rng state."""
                super().add(name, seed)
                self._is_initialized = True

        base_tracker = TECudaRNGStatesTracker
        tracker_kwargs = {"is_inference_rng_tracker": inference_rng_tracker}
    else:

        class CudaRNGStatesTracker:
            """Tracker for the cuda RNG states.

            Using the `add` method, a cuda rng state is initialized based on
            the input `seed` and is assigned to `name`. Later, by forking the
            rng state, we can perform operations and return to our starting
            cuda state.
            """

            def __init__(self, use_cudagraphable_rng=False, is_inference_rng_tracker=False):
                self.reset()
                self.use_cudagraphable_rng = use_cudagraphable_rng
                self.is_inference_rng_tracker = is_inference_rng_tracker

                if self.use_cudagraphable_rng:
                    assert (
                        hasattr(torch.cuda.CUDAGraph, "register_generator_state")
                        and hasattr(torch.Generator, "graphsafe_set_state")
                        and hasattr(torch.Generator, "graphsafe_get_state")
                        and hasattr(torch.Generator, "clone_state")
                    ), "Tried using cudagraphs with RNG, however not detected in pytorch!"

            def is_initialized(self):
                """Checks if the internal RNG state has been set wirth set_states()."""
                return self._is_initialized

            def reset(self):
                """Set to the initial state (no tracker)."""

                # Track if initialized.
                self._is_initialized = False

                # Map from a string name to the cuda rng state.
                self.states_ = {}

                # Seeds are just for book keeping and ensure no seed is set twice.
                self.seeds_ = set()

            def get_states(self):
                """Get rng states. Copy the dictionary so we have direct
                pointers to the states, not just a pointer to the dictionary."""
                states = {}
                for name in self.states_:
                    states[name] = self.states_[name]
                return states

            def set_states(self, states):
                """Set the rng states. For efficiency purposes, we do not check
                the size of seed for compatibility."""
                self._is_initialized = True
                self.states_ = states

            def add(self, name, seed):
                """Track the rng state."""
                self._is_initialized = True
                # Check seed is not already used.
                if seed in self.seeds_:
                    raise Exception("seed {} already exists".format(seed))
                self.seeds_.add(seed)
                # Check that state is not already defined.
                if name in self.states_:
                    raise Exception("cuda rng state {} already exists".format(name))

                # If available, create the state in a graph safe manner
                if self.use_cudagraphable_rng:
                    new_state = _get_cuda_rng_state(clone=True, graph_safe=True)
                    new_state.manual_seed(seed)
                    self.states_[name] = new_state
                else:
                    # Get the current rng state.
                    orig_rng_state = torch.cuda.get_rng_state()
                    # Set the new state and store it.
                    torch.cuda.manual_seed(seed)
                    self.states_[name] = torch.cuda.get_rng_state()
                    # Reset rng state to what it was.
                    _set_cuda_rng_state(orig_rng_state)

            @contextlib.contextmanager
            def fork(self, name=_MODEL_PARALLEL_RNG_TRACKER_NAME):
                """Fork the cuda rng state, perform operations, and exit with
                the original state."""
                # Check if we have added the state
                if name not in self.states_:
                    raise Exception("cuda rng state {} is not added".format(name))
                # Store current rng state.
                orig_cuda_rng_state = _get_cuda_rng_state(graph_safe=self.use_cudagraphable_rng)
                # Set rng state to the desired one
                _set_cuda_rng_state(self.states_[name], graph_safe=self.use_cudagraphable_rng)
                # Record cpu RNG state
                cpu_rng_state = torch.get_rng_state()
                # Do the stuff we wanted to do.
                try:
                    yield
                finally:
                    # Throw a warning if cpu RNG state changed
                    if not torch.all(cpu_rng_state == torch.get_rng_state()).item():
                        logging.getLogger(__name__).warning(
                            "CPU RNG state changed within GPU RNG context"
                        )
                    # Update the current rng state for later use.
                    self.states_[name] = _get_cuda_rng_state(graph_safe=self.use_cudagraphable_rng)
                    # And set the state to the original state we started with.
                    _set_cuda_rng_state(orig_cuda_rng_state, graph_safe=self.use_cudagraphable_rng)

        base_tracker = CudaRNGStatesTracker
        tracker_kwargs = {
            "use_cudagraphable_rng": use_cudagraphable_rng,
            "is_inference_rng_tracker": inference_rng_tracker,
        }

    if inference_rng_tracker:

        class InferenceCudaRNGStatesTracker(base_tracker):  # type: ignore[valid-type, misc]
            """RNG tracker for inference."""

            def add(self, name, seed):
                """Mirrors the interface from the training RNG tracker."""
                pass

            def set_states(self, states):
                """Mirrors the interface from the training RNG tracker."""
                pass

            def fork(self, name=_MODEL_PARALLEL_RNG_TRACKER_NAME):
                """Mirrors the interface from the training RNG tracker."""
                return contextlib.nullcontext()

        tracker_class = InferenceCudaRNGStatesTracker
    else:
        tracker_class = base_tracker

    _CUDA_RNG_STATE_TRACKER = tracker_class(**tracker_kwargs)
    _CUDA_RNG_STATE_TRACKER_INITIALIZED = True


def get_cuda_rng_tracker(
    use_te_rng_tracker: bool = False,
    inference_rng_tracker: bool = False,
    use_cudagraphable_rng: bool = False,
):
    """Get cuda rng tracker."""
    initialize_rng_tracker(use_te_rng_tracker, inference_rng_tracker, use_cudagraphable_rng)
    return _CUDA_RNG_STATE_TRACKER


def safe_get_rank() -> int:
    """Safely get the rank of the current process.

    Returns the rank from torch.distributed if initialized, otherwise falls back
    to the RANK environment variable, defaulting to 0.

    Returns:
        int: The rank of the current process.
    """
    if torch.distributed.is_initialized():
        return torch.distributed.get_rank()

    # If torch.distributed is not initialized, try to read environment variables.
    try:
        return int(os.environ.get("RANK", 0))
    except (ValueError, TypeError):
        # Return rank 0 regardless of the actual rank.
        return 0


def log_single_rank(logger_: logging.Logger, level: int, msg: str, *args, rank: int = 0, **kwargs):
    """Log on a single rank."""
    if safe_get_rank() == rank:
        logger_.log(level, msg, *args, **kwargs)


# TODO(@cspades): Migrate this to a new module: fsdp_dist_index.py.
# Needs more visibility and is easily refactored / standalone.
class FSDPDistributedIndex:
    """
    Class containing references to the process groups utilized by Megatron-FSDP.

    This class tracks the device mesh and different process groups required
    for full-sharded data parallelism (FSDP), including support for hybrid
    and tensor/data parallel strategies.
    """

    def __init__(
        self,
        device_mesh: DeviceMesh,
        dp_shard_dim: Optional[str] = None,
        dp_outer_dim: Optional[str] = None,
        tp_dim: Optional[str] = None,
        hybrid_fsdp_group: Optional[torch.distributed.ProcessGroup] = None,
        hybrid_fsdp_expt_group: Optional[torch.distributed.ProcessGroup] = None,
        hsdp_outer_dp_shard: bool = False,
        expt_device_mesh: Optional[DeviceMesh] = None,
    ):
        """
        Args:
            device_mesh (DeviceMesh): The DeviceMesh to use for the DistributedIndex.
            dp_shard_dim (Optional[str]): The dimension name of the data parallel
                (and context parallel) sharding sub-mesh.
            dp_outer_dim (Optional[str]): The dimension name of the "outer" data parallel
                sub-mesh for replication or sharding when using HSDP.
            tp_dim (Optional[str]): The dimension name of the tensor parallel sub-mesh.
            hybrid_fsdp_group (Optional[torch.distributed.ProcessGroup]): The
                process group for hybrid FSDP communication, which is the flattened
                combination of the dp_outer and dp_shard process groups.
            hybrid_fsdp_expt_group (Optional[torch.distributed.ProcessGroup]): The
                process group for hybrid FSDP expert communication, which is the flattened
                combination of the expert dp_outer and expert dp_shard process groups.
            hsdp_outer_dp_shard (bool): Whether to have outer DP group sharding
                in hybrid FSDP. Specifying outer sharding will lift the bucket sharding
                coordinate system to flattened ranks of (dp_shard, dp_outer) instead of
                just sharding across dp_shard ranks and replicating across dp_outer ranks.
            expt_device_mesh (Optional[DeviceMesh]): The expert parallel device mesh
                to use for the DistributedIndex.
        """
        # Device mesh arguments.
        self.device_mesh = device_mesh
        self.dp_shard_dim = dp_shard_dim
        self.dp_outer_dim = dp_outer_dim
        self.tp_dim = tp_dim
        # Helper flag to denote if we are using hybrid FSDP.
        self.use_hybrid_fsdp = dp_outer_dim is not None
        # Helper flag to denote if we are outer-sharding in hybrid FSDP.
        self.hsdp_outer_dp_shard = hsdp_outer_dp_shard
        self.expt_device_mesh = expt_device_mesh

        # Handling the situation where M-Core MoE EP=1
        if self.expt_device_mesh is None:
            self.expt_device_mesh = device_mesh

        # Hybrid FSDP Process Groups
        # Retrieve the FSDP process group from the DeviceMesh.
        self.fsdp_group = (
            self.device_mesh[self.dp_shard_dim].get_group()
            if contains_submesh(self.device_mesh, self.dp_shard_dim)
            else None
        )
        # AG group comes from parallel_state, not the mesh
        # the purpose of this independent group is to overlap all-gather and gradient reduction.
        self.fsdp_group_ag = None
        if HAVE_MEGATRON_CORE and parallel_state.has_separate_all_gather_group():
            self.fsdp_group_ag = parallel_state.get_data_parallel_group(
                with_context_parallel=True, independent_all_gather=True
            )
        # Retrieve the outer-FSDP process group from the DeviceMesh.
        self.outer_fsdp_group = (
            self.device_mesh[self.dp_outer_dim].get_group()
            if contains_submesh(self.device_mesh, self.dp_outer_dim)
            else None
        )
        # Save a reference to the overall HSDP process group, which is the flattened
        # combination of the outer-FSDP and FSDP process groups.
        self.hybrid_fsdp_group = hybrid_fsdp_group
        self.hybrid_fsdp_expt_group = hybrid_fsdp_expt_group

        # Retrieve the expert parallel process groups from the DeviceMesh.
        self.expt_fsdp_group = (
            self.expt_device_mesh[self.dp_shard_dim].get_group()
            if self.expt_device_mesh is not None
            and contains_submesh(self.expt_device_mesh, self.dp_shard_dim)
            else None
        )

        self.expt_outer_fsdp_group = (
            self.expt_device_mesh[self.dp_outer_dim].get_group()
            if self.expt_device_mesh is not None
            and contains_submesh(self.expt_device_mesh, self.dp_outer_dim)
            else None
        )

        """
        Megatron-FSDP is responsible for storing all required DeviceMesh
        as per best practices recommended by the DeviceMesh API.

        NOTE(@cspades): In PyTorch 2.11, retrieving flattened mesh dimensions
        will be impossible via the device_mesh[...] API. We will require all
        users to correctly _unflatten() their DeviceMesh such that all
        dimensions used by Megatron-FSDP are sub-meshes of the DeviceMesh.
        contains_submesh(...) -> get_mesh_names(only_submesh_dims=True).
        """
        self.mesh_library = {}

        def register_submesh(device_mesh, submesh, is_expert_parallel):
            """Register a submesh with identifier: (*submesh, is_expert_parallel)
            in the mesh library."""
            if contains_submesh(device_mesh, submesh):
                submesh_identifier = tuple(list(submesh) + [is_expert_parallel])
                self.mesh_library[submesh_identifier] = device_mesh[submesh]

        # Define common submesh patterns
        tp_submesh = (self.tp_dim,)
        hsdp_tp_submesh = (self.dp_outer_dim, self.dp_shard_dim, self.tp_dim)
        fsdp_tp_submesh = (self.dp_shard_dim, self.tp_dim)
        hsdp_submesh = (self.dp_outer_dim, self.dp_shard_dim)
        fsdp_submesh = (self.dp_shard_dim,)

        # Register non-EP submeshes
        register_submesh(self.device_mesh, tp_submesh, False)
        register_submesh(self.device_mesh, hsdp_tp_submesh, False)
        register_submesh(self.device_mesh, fsdp_tp_submesh, False)
        register_submesh(self.device_mesh, hsdp_submesh, False)
        register_submesh(self.device_mesh, fsdp_submesh, False)

        # Register EP submeshes
        if self.expt_device_mesh is not None:
            register_submesh(self.device_mesh, hsdp_submesh, True)
            register_submesh(self.device_mesh, hsdp_tp_submesh, True)
            register_submesh(self.expt_device_mesh, tp_submesh, True)
            register_submesh(self.expt_device_mesh, fsdp_tp_submesh, True)
            register_submesh(self.expt_device_mesh, fsdp_submesh, True)
            register_submesh(self.expt_device_mesh, hsdp_submesh, True)
            register_submesh(self.expt_device_mesh, hsdp_tp_submesh, True)

        # Validate FSDP arguments.
        if self.fsdp_group is None:
            raise ValueError(
                "Megatron-FSDP (FSDPDistributedIndex) requires an FSDP process group "
                "(dp_shard_dim, fsdp_group) for core functionality."
            )

        # Validate HSDP arguments.
        if self.use_hybrid_fsdp:
            if self.outer_fsdp_group is None:
                raise ValueError(
                    "[FSDPDistributedIndex][use_hybrid_fsdp=True] Hybrid FSDP requires "
                    "an outer-DP process group (dp_outer_dim, outer_fsdp_group)."
                )
            if self.hybrid_fsdp_group is None:
                raise ValueError(
                    "[FSDPDistributedIndex][use_hybrid_fsdp=True] Hybrid FSDP requires "
                    "a hybrid FSDP process group (hybrid_fsdp_group). "
                    "This group can be manufactured by flattening the outer-DP "
                    "(dp_outer_dim, outer_fsdp_group) and FSDP (dp_shard_dim, fsdp_group) "
                    "process groups or sub-meshes."
                )

    def get_submesh(
        self, mesh_dim_names: str | Sequence[str], is_expert_parallel: bool = False
    ) -> DeviceMesh:
        """
        Retrieve an Megatron-FSDP-registered submesh by name(s).
        """
        if isinstance(mesh_dim_names, str):
            mesh_dim_names = (mesh_dim_names,)

        # Construct submesh identifier: (*mesh_dim_names, is_expert_parallel)
        submesh_identifier = tuple(list(mesh_dim_names) + [is_expert_parallel])

        # Retrieve the submesh from the mesh library
        device_submesh = self.mesh_library.get(submesh_identifier, None)

        if device_submesh is None:
            # Warn about not specifying tp_dim for layers or frameworks that depend on this.
            if self.tp_dim is None and not is_expert_parallel:
                logger.warning(
                    "[FSDPDistributedIndex] Note: For TransformerEngine, or "
                    "other machine learning frameworks like Megatron that assume "
                    "TP=1, you must specify tp_dim to use Megatron-FSDP. "
                    "Create a trivial TP dimension by setting the TP dimension size "
                    "to 1 in the DeviceMesh.\n"
                    f"DeviceMesh: {self.device_mesh}"
                )
            elif self.tp_dim is None and is_expert_parallel:
                logger.warning(
                    "[FSDPDistributedIndex] Note: For TransformerEngine, or "
                    "other machine learning frameworks like Megatron that assume "
                    "ETP=1, you must specify tp_dim to use Megatron-FSDP. "
                    "Create a trivial ETP dimension by setting the ETP dimension size "
                    "to 1 in the DeviceMesh.\n"
                    f"DeviceMesh: {self.expt_device_mesh}"
                )

            raise ValueError(
                f"[FSDPDistributedIndex][get_submesh] No submesh with "
                f"mesh_dim_names={mesh_dim_names}, is_expert_parallel={is_expert_parallel} "
                f"has been registered with Megatron-FSDP."
            )

        return device_submesh

    def get_dp_group(self, is_expert_parallel: bool = False) -> ProcessGroup:
        """Get the data parallel process group."""
        if is_expert_parallel:
            if self.use_hybrid_fsdp:
                return self.hybrid_fsdp_expt_group
            return self.expt_fsdp_group
        if self.use_hybrid_fsdp:
            return self.hybrid_fsdp_group
        return self.fsdp_group

    def get_fsdp_group(
        self, is_expert_parallel: bool = False, independent_all_gather: bool = False
    ) -> ProcessGroup:
        """Get the FSDP process group."""
        if is_expert_parallel:
            return self.expt_fsdp_group
        if independent_all_gather:
            return self.fsdp_group_ag
        return self.fsdp_group

    def get_outer_fsdp_group(self, is_expert_parallel: bool = False) -> ProcessGroup:
        """Get the outer-FSDP process group."""
        if not self.use_hybrid_fsdp:
            return None
        if is_expert_parallel:
            return self.expt_outer_fsdp_group
        return self.outer_fsdp_group

    def get_root_mesh(self, is_expert_parallel: bool = False) -> DeviceMesh:
        """Get the device mesh."""
        # NOTE(@cspades): This is FSDPDistributedIndex's root mesh, NOT the actual
        # root mesh that the DeviceMesh or expert DeviceMesh was un-flattened from.
        # To get the root mesh, use: DeviceMesh._get_root_mesh().
        if is_expert_parallel:
            return self.expt_device_mesh
        return self.device_mesh

    def get_logical_hybrid_fsdp_rank(self, is_expert_parallel: bool = False):
        """
        Returns the logical rank of the current process within the full-shard hybrid FSDP group.

        In full-shard hybrid FSDP, parameters are first sharded across the inner
        data-parallel group, then across the outer data-parallel group. This changes
        the effective rank mapping compared to standard data parallelism. Use this
        method to get the correct rank index for the hybrid group.

        Returns:
            int: The index of the current process in the hybrid FSDP group.

        Raises:
            AssertionError: If full-shard hybrid FSDP is not enabled.
        """
        assert HAVE_EINOPS, "get_logical_hybrid_fsdp_rank requires einops to be installed."
        assert (
            self.hsdp_outer_dp_shard
        ), "get_logical_hybrid_fsdp_rank is only valid when full-shard hybrid FSDP is enabled."

        _hybrid_fsdp_group_name = (
            "_hybrid_fsdp_group_ranks"
            if not is_expert_parallel
            else "_hybrid_fsdp_expt_group_ranks"
        )

        if not hasattr(self, _hybrid_fsdp_group_name):
            dp_world_size = self.get_dp_group(is_expert_parallel).size()

            # Reorder the flat ranks: (outer_dp, inner_dp) -> (inner_dp, outer_dp)
            mesh = einops.rearrange(
                torch.arange(dp_world_size),
                "(outer_dp inner_dp) -> (inner_dp outer_dp)",
                outer_dp=self.get_outer_fsdp_group(is_expert_parallel).size(),
                inner_dp=self.get_fsdp_group(is_expert_parallel).size(),
            )
            setattr(self, _hybrid_fsdp_group_name, mesh.tolist())

        # Find the index for the current rank in the hybrid group
        return getattr(self, _hybrid_fsdp_group_name).index(
            self.get_dp_group(is_expert_parallel).rank()
        )


class GlobalMemoryBuffer:
    """Global buffer to avoid dynamic memory allocations.
    Caller should ensure that buffers of the same name
    are not used concurrently."""

    def __init__(self):
        self.buffer = {}

    def get_tensor(self, tensor_shape, dtype, name, mem_alloc_context: Optional[Callable] = None):
        """
        Returns (potentially) a sub-tensor from the self.buffer for the given shape.
        """
        required_len = reduce(operator.mul, tensor_shape, 1)
        if (
            self.buffer.get((name, dtype), None) is None
            or self.buffer[(name, dtype)].numel() < required_len
        ):
            mem_alloc_context = mem_alloc_context if mem_alloc_context else nullcontext
            with mem_alloc_context():
                self.buffer[(name, dtype)] = torch.empty(
                    required_len,
                    dtype=dtype,
                    device=torch.cuda.current_device(),
                    requires_grad=False,
                )

        return self.buffer[(name, dtype)][0:required_len].view(*tensor_shape)


def get_global_memory_buffer():
    """Return the global GlobalMemoryBuffer object"""
    global _GLOBAL_MEMORY_BUFFER
    if "_GLOBAL_MEMORY_BUFFER" not in globals() or _GLOBAL_MEMORY_BUFFER is None:
        _GLOBAL_MEMORY_BUFFER = GlobalMemoryBuffer()
    return _GLOBAL_MEMORY_BUFFER


def create_updated_function_signature(original_function, **extended_kwargs: dict):
    """
    Given a function, create a new version of the function with
    extended keyword-only arguments or parameters. Used to patch
    or extend methods in instances of a class.
    """
    # Get the original function signature.
    params = list(inspect.signature(original_function).parameters.values())

    # Add new keyword-only parameters
    for name, value in extended_kwargs.items():
        params.append(
            inspect.Parameter(
                name,
                kind=inspect.Parameter.KEYWORD_ONLY,
                default=value,
                annotation=(type(value) if value is not None else inspect.Parameter.empty),
            )
        )

    # Return the updated function signature.
    return inspect.Signature(params)


def is_mcore_tensor_model_parallel(param: torch.Tensor) -> bool:
    """
    Check if the given parameter is Megatron-Core tensor model parallel.
    """
    return getattr(param, "_mcore_tp", False) or getattr(param, "tensor_model_parallel", False)


def is_mcore_tensor_parallel_duplicated(param: torch.Tensor) -> bool:
    """
    Check if the given parameter is Megatron-Core tensor model parallel and duplicated.
    """
    return getattr(param, "_tp_duplicated", False)


def get_mcore_tensor_parallel_partition_dim(param: torch.Tensor) -> Optional[int]:
    """
    Get the partition dimension for a Megatron-Core tensor model parallel parameter.
    """
    if is_mcore_tensor_model_parallel(param):
        if hasattr(param, "_tp_partition_dim"):
            return param._tp_partition_dim
        else:
            return param.partition_dim
    return None


================================================
FILE: megatron/core/distributed/fsdp/src/pyproject.toml
================================================
# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.

[build-system]
requires = ["setuptools<80.0.0", "pybind11"]
build-backend = "setuptools.build_meta"

[tool.setuptools]
include-package-data = true

[tool.setuptools.packages.find]
include = ["megatron_fsdp", "megatron_fsdp.*"]

[tool.setuptools.dynamic]
version = { attr = "megatron_fsdp.package_info.__version__" }
readme = { file = "README.md", content-type = "text/markdown" }

[project]
name = "megatron-fsdp"
dynamic = ["version", "readme"]
description = "**Megatron-FSDP** is an NVIDIA-developed PyTorch extension that provides a high-performance implementation of Fully Sharded Data Parallelism (FSDP)"
requires-python = ">=3.10"
license = { text = "Apache 2.0" }
dependencies = ["torch", "einops", "packaging"]
authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
maintainers = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
keywords = [
    "NLP",
    "NLU",
    "deep",
    "gpu",
    "language",
    "learning",
    "machine",
    "nvidia",
    "pytorch",
    "torch",
    "transformer",
]
classifiers = [
    "Development Status :: 5 - Production/Stable",
    "Environment :: Console",
    "Intended Audience :: Developers",
    "Intended Audience :: Information Technology",
    "Intended Audience :: Science/Research",
    "License :: OSI Approved :: BSD License",
    "Natural Language :: English",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.8",
    "Programming Language :: Python :: 3.9",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
    "Topic :: Scientific/Engineering :: Image Recognition",
    "Topic :: Scientific/Engineering :: Mathematics",
    "Topic :: Scientific/Engineering",
    "Topic :: Software Development :: Libraries :: Python Modules",
    "Topic :: Software Development :: Libraries",
    "Topic :: Utilities",
]

[project.urls]
Download = "https://github.com/NVIDIA/Megatron-LM/releases"
Homepage = "https://github.com/NVIDIA/Megatron-LM"


[tool.uv]

[tool.uv.sources]


================================================
FILE: megatron/core/distributed/param_and_grad_buffer.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import functools
import logging
import math
import warnings
from contextlib import nullcontext
from enum import Enum
from functools import partial
from typing import Dict, List, Optional

import torch
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
from torch.distributed import _coalescing_manager

import megatron.core.nccl_allocator as nccl_allocator
from megatron.core import parallel_state
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.rerun_state_machine import get_rerun_state_machine
from megatron.core.utils import log_single_rank

from ..fp8_utils import (
    is_float8tensor,
    is_mxfp8tensor,
    modify_underlying_storage,
    post_all_gather_processing,
)
from ..utils import is_torch_min_version, log_on_each_pipeline_stage
from .distributed_data_parallel_config import DistributedDataParallelConfig
from .reduce_scatter_with_fp32_accumulation import reduce_scatter_with_fp32_accumulation

logger = logging.getLogger(__name__)

try:
    if is_torch_min_version("1.13.0"):
        dist_all_gather_func = torch.distributed.all_gather_into_tensor
        dist_reduce_scatter_func = torch.distributed.reduce_scatter_tensor
    else:
        dist_all_gather_func = torch.distributed._all_gather_base
        dist_reduce_scatter_func = torch.distributed._reduce_scatter_base
except:
    dist_all_gather_func = torch.distributed._all_gather_base
    dist_reduce_scatter_func = torch.distributed._reduce_scatter_base

import megatron.core.nccl_allocator as nccl_allocator


class BufferType(Enum):
    """
    Enumeration for buffer type.
    """

    PARAM = 1
    GRAD = 2


def shard_buffer(buffer: torch.Tensor, data_parallel_world_size: int):
    """
    Shard buffer into data_parallel_world_size chunks of equal size.
    """
    assert buffer.numel() % data_parallel_world_size == 0
    shard_size = buffer.numel() // data_parallel_world_size
    sharded_buffer = [
        buffer[(r * shard_size) : ((r + 1) * shard_size)] for r in range(data_parallel_world_size)
    ]
    return sharded_buffer


class _ParamAndGradBucket:
    """
    Bucket to keep track of a subset of the model's parameters and gradients.

    Args:
        params: List of parameters whose gradients are collated in this bucket.
        param_data: View in _ParamAndGradBuffer.param_data that this bucket is responsible for.
        grad_data: View in _ParamAndGradBuffer.grad_data that this bucket is responsible for.
        offset: Offset of this bucket's view in the larger _ParamAndGradBuffer.
        numel_unpadded: Number of unpadded elements in bucket.
        gradient_scaling_factor: This factor is utilized to scale gradients prior to their
            communication. Its application is twofold: it facilitates the averaging of gradients
            and the scaling of gradients in the context of the Mixture of Experts (MoE) model.
        bucket_id: Index of bucket in buffer.
        param_index_map: Mapping from param to (start, end, bucket_id) in the global buffer.
            Used to derive bucket-local offsets for param_to_index.
    """

    def __init__(
        self,
        params: List[torch.nn.Parameter],
        param_data: Optional[torch.Tensor],
        grad_data: torch.Tensor,
        offset: int,
        numel_unpadded: int,
        gradient_scaling_factor: float,
        bucket_id: int,
        param_index_map: Dict[torch.nn.Parameter, tuple],
    ):
        self.params_list = params
        self.params = set(params)
        # Make sure there are no duplicate params.
        assert len(self.params_list) == len(self.params)
        self.param_data = param_data
        self.grad_data = grad_data
        # The distributed optimizer needs to keep track of this bucket's offset
        # within the full grad_buffer.
        self.offset = offset
        self.numel_unpadded = numel_unpadded
        self.gradient_scaling_factor = gradient_scaling_factor
        self.bucket_id = bucket_id
        # Derive bucket-local param offsets from the global param_index_map.
        self.param_to_index = {}
        for param in params:
            global_start, global_end, _ = param_index_map[param]
            self.param_to_index[param] = (global_start - offset, global_end - offset)

        # Layer-wise optimizer attributes for async param gather.
        self.layerwise_params_list = None
        self.layerwise_param_flat_sizes = None
        self.layerwise_gather_list = None
        self._layerwise_src_buffer = None

    def set_layerwise_params_list(self, layerwise_params_list: List[List[torch.nn.Parameter]]):
        """Set per-rank parameter lists for layer-wise async all-gather.

        Args:
            layerwise_params_list: List of param lists, one per rank in the DP group.
                Each inner list contains the parameters owned by that rank's
                layer-wise optimizer that also belong to this bucket.
        """
        self.layerwise_params_list = layerwise_params_list
        self.layerwise_param_flat_sizes = [
            sum([p.numel() for p in param_list]) for param_list in layerwise_params_list
        ]


class _LayerwiseAllGatherHandle:
    """Handle wrapping multiple async all-gather work objects.

    NCCL guarantees in-order completion on the same communicator, so waiting
    on only the last handle is sufficient.
    """

    def __init__(self, handles):
        self.handles = handles

    def wait(self):
        """Wait on the last handle and clear all handles."""
        if self.handles:
            self.handles[-1].wait()
        self.handles = None


class _ParamAndGradBucketGroup:
    """
    Put multiple buckets into a group so that their communications can be aggregated together.
    Provides functionality to register when params in the bucket group have grads ready to be
    synced; an asynchronous communication call is automatically launched when _all_ params in
    the bucket group have grads ready.

    Args:
        buckets: A list of buckets.
        ddp_config: DistributedDataParallel config object.
        collective_group: intra_distributed_optimizer_instance_group if using distributed
            optimizer, data_parallel_group if not.
        collective_group_size: World size using the intra data-parallel group.
    """

    def __init__(
        self,
        buckets: List[_ParamAndGradBucket],
        ddp_config: DistributedDataParallelConfig,
        collective_group: torch.distributed.ProcessGroup,
        collective_group_size: int,
    ):
        self.buckets = buckets
        self.ddp_config = ddp_config

        # overlap_param_gather covers the layer-wise optimizer case, which sets
        # overlap_param_gather=True without use_distributed_optimizer.
        if self.ddp_config.use_distributed_optimizer or self.ddp_config.overlap_param_gather:
            self.intra_distributed_optimizer_instance_group = collective_group
            self.intra_distributed_optimizer_instance_size = collective_group_size
            self.intra_distributed_optimizer_instance_rank = collective_group.rank()
        if not self.ddp_config.use_distributed_optimizer:
            self.data_parallel_group = collective_group

        # State for bookkeeping: params is the set of parameters this bucket group is
        # responsible for, param_to_bucket maps params to the corresponding bucket.
        self.param_to_bucket = {}
        self.params = set()
        for bucket in self.buckets:
            for param in bucket.params_list:
                self.param_to_bucket[param] = bucket
                self.params.add(param)

        self.next_param_gather_bucket_group = None

        if self.ddp_config.num_distributed_optimizer_instances > 1:
            self.inter_distributed_optimizer_instance_group = None
            self.communication_stream = None
            assert (
                not self.ddp_config.reduce_scatter_with_fp32_accumulation
            ), "RS w/ FP32 accumulation not supported with num_distributed_optimizer_instances > 1"

        global dist_reduce_scatter_func
        if self.ddp_config.reduce_scatter_with_fp32_accumulation:
            dist_reduce_scatter_func = reduce_scatter_with_fp32_accumulation
            log_single_rank(
                logger,
                logging.INFO,
                "Using reduce_scatter_with_fp32_accumulation as reduce-scatter implementation",
            )

        # per_param_grad_ready_counts is a dict mapping parameters to number of times
        # `register_grad_ready` is called for that parameter *when
        # self.is_last_microbatch is True*. Should be 1 for most params but could be greater
        # than 1 if control flow passes through the same parameter multiple times. We lazily
        # populate this in the first batch, hence the .is_first_batch attribute.
        # When overlap_grad_reduce is True, communication (all-reduce or reduce-scatter)
        # is issued when per_param_grad_ready_counts equals golden_per_param_grad_ready_counts.
        # In other words, communication is dispatched as soon as all gradients in this bucket
        # are *ready*, as marked by the backward hook.
        # The set of keys in per_param_grad_ready_counts should be equal to `params`.
        self.golden_per_param_grad_ready_counts = {}
        self.per_param_grad_ready_counts = {}
        self.is_last_microbatch = True
        self.is_first_batch = True

        # Other metadata to keep track of collectives.
        self.param_gather_handle = None
        self.param_gather_dispatched = False
        self.grad_reduce_handle = None

        # Each time a local shard is created from bucket.param_data or bucket.grad_data, it
        # introduces some CPU overheads. We use these two lists to cache the created local
        # shards to avoid unnecessary CPU operations. This does not increase GPU memory usage
        # because it only saves a slice view, which shares the same memory with bucket.param_data
        # or bucket.grad_data.
        self.cached_param_buffer_shard_list = [None] * len(self.buckets)
        self.cached_grad_buffer_shard_list = [None] * len(self.buckets)

    def reset(self):
        """
        Reset metadata in bucket group in preparation for the next iteration of training.
        """
        if self.is_first_batch and len(self.per_param_grad_ready_counts) > 0:
            # Record golden per_param_grad_ready_counts.
            assert len(self.per_param_grad_ready_counts) == len(self.params)
            self.golden_per_param_grad_ready_counts = self.per_param_grad_ready_counts
            self.is_first_batch = False
        self.per_param_grad_ready_counts = {}
        self.is_last_microbatch = True

    def check_grads(self, check_for_nan_or_inf, check_for_large):
        """
        Make sure norm of grads in bucket are not NaN prior to data-parallel
        all-reduce / reduce-scatter.
        """
        rerun_state_machine = get_rerun_state_machine()
        for i in range(len(self.buckets)):
            grad_norm = self.buckets[i].grad_data.norm(p=2)
            # check for NaN, Inf and unexpectedly large grads
            if check_for_nan_or_inf:
                rerun_state_machine.validate_result(
                    result=grad_norm,
                    rejection_func=torch.isnan,
                    message=f"found NaN in local grad norm for bucket #{i} "
                    f"in backward pass before data-parallel communication collective",
                    tolerance=0.001,  # 0.1% tolerance to account for non-deterministic FA backward
                    fatal=True,
                )
                rerun_state_machine.validate_result(
                    result=grad_norm,
                    rejection_func=torch.isinf,
                    message=f"found Inf in local grad norm for bucket #{i} "
                    f"in backward pass before data-parallel communication collective",
                    tolerance=0.001,  # 0.1% tolerance to account for non-deterministic FA backward
                    fatal=True,
                )
            if check_for_large:
                rerun_state_machine.validate_result(
                    result=grad_norm,
                    rejection_func=partial(
                        rerun_state_machine.is_unexpectedly_large, threshold=10, context="grads"
                    ),
                    message=f"found unexpected large grads in bucket #{i} "
                    f"in backward pass before data-parallel communication collective",
                    tolerance=0.001,  # 0.1% tolerance to account for non-deterministic FA backward
                    fatal=False,
                )

    def start_param_sync(self, force_sync: bool = False):
        """
        Initiates all necessary param all-gathers for this bucket.

        When ddp_config.overlap_param_gather is set to True, dispatches an asynchronous
        communication call (unless force_sync is True). When ddp_config.overlap_param_gather
        is set to False, makes synchronous call.

        Args:
            force_sync (bool, optional): force synchronous collective regardless of
                other settings if true.
        """
        # overlap_param_gather covers the layer-wise optimizer case, which sets
        # overlap_param_gather=True without use_distributed_optimizer.
        assert self.ddp_config.use_distributed_optimizer or self.ddp_config.overlap_param_gather

        if force_sync:
            if self.param_gather_handle is not None:
                self.param_gather_handle.wait()
                self.param_gather_handle = None
                return
        else:
            assert self.param_gather_handle is None

        async_op = self.ddp_config.overlap_param_gather and not force_sync

        if not self.ddp_config.use_distributed_optimizer:
            # Layer-wise optimizer path: use all_gather for variable-size
            # param gather.
            #
            # Each rank may own a different number of params per bucket, so
            # layerwise_param_flat_sizes can vary across ranks.  PyTorch's NCCL
            # backend handles uneven tensor sizes in torch.distributed.all_gather
            # (falling back to grouped send/recv internally when sizes differ),
            # so no manual padding is needed.
            dp_size = self.intra_distributed_optimizer_instance_size
            local_rank = self.intra_distributed_optimizer_instance_rank
            group = self.intra_distributed_optimizer_instance_group
            layerwise_work_handles = []
            for bucket in self.buckets:
                # Use param dtype (e.g., bf16), NOT grad dtype (which may be
                # fp32 when grad_reduce_in_fp32 is enabled).
                param_dtype = bucket.params_list[0].dtype

                if max(bucket.layerwise_param_flat_sizes) == 0:
                    # All ranks have empty params for this bucket — skip.
                    bucket.layerwise_gather_list = None
                    continue

                # Flatten local params.  Detach from the autograd graph because
                # start_param_sync can be called during the forward pass (where
                # autograd is active) and all_gather will write into gather_list
                # entries in-place.
                local_size = bucket.layerwise_param_flat_sizes[local_rank]
                if local_size > 0:
                    flat_local_params = _flatten_dense_tensors(
                        bucket.layerwise_params_list[local_rank]
                    ).detach()
                else:
                    flat_local_params = torch.empty(
                        0, device=bucket.grad_data.device, dtype=param_dtype
                    )
                # Keep flat_local_params alive until the async operation completes.
                bucket._layerwise_src_buffer = flat_local_params

                # Allocate per-rank receive buffers with actual sizes (no padding).
                # Reuse flat_local_params for local_rank's slot to avoid an extra allocation.
                gather_list = []
                for i in range(dp_size):
                    if i == local_rank:
                        gather_list.append(flat_local_params)
                    else:
                        gather_list.append(
                            torch.empty(
                                bucket.layerwise_param_flat_sizes[i],
                                device=flat_local_params.device,
                                dtype=flat_local_params.dtype,
                            )
                        )
                bucket.layerwise_gather_list = gather_list

                work = torch.distributed.all_gather(
                    gather_list, flat_local_params, group=group, async_op=async_op
                )
                if async_op and work is not None:
                    layerwise_work_handles.append(work)

            if async_op:
                self.param_gather_handle = _LayerwiseAllGatherHandle(layerwise_work_handles)
            else:
                # Synchronous: unflatten and copy gathered params immediately.
                for bucket in self.buckets:
                    if bucket.layerwise_gather_list is None:
                        continue
                    for idx, params in enumerate(bucket.layerwise_params_list):
                        if len(params) == 0 or idx == local_rank:
                            continue
                        updated_params = _unflatten_dense_tensors(
                            bucket.layerwise_gather_list[idx], params
                        )
                        for updated_p, model_p in zip(updated_params, params):
                            model_p.data.copy_(updated_p)
                    bucket.layerwise_gather_list = None
                    bucket._layerwise_src_buffer = None
                self.param_gather_handle = None
        else:
            # Standard distributed optimizer path: use _coalescing_manager.
            # all_gather_into_tensor writes directly into a contiguous output buffer and
            # does not need a copy-back step, so coalescing works correctly.
            with _coalescing_manager(
                self.intra_distributed_optimizer_instance_group, async_ops=async_op
            ) as cm:
                for idx, bucket in enumerate(self.buckets):
                    if self.cached_param_buffer_shard_list[idx] is None:
                        self.cached_param_buffer_shard_list[idx] = shard_buffer(
                            bucket.param_data, self.intra_distributed_optimizer_instance_size
                        )
                    local_data_view = self.cached_param_buffer_shard_list[idx][
                        self.intra_distributed_optimizer_instance_rank
                    ]
                    dist_all_gather_func(
                        bucket.param_data,
                        local_data_view,
                        group=self.intra_distributed_optimizer_instance_group,
                        async_op=async_op,
                    )
            if async_op:
                self.param_gather_handle = cm
            else:
                # When using `_coalescing_manager`, even if a synchronous op
                # (async_op=False) is used, `cm` is not None. Manually set to None for
                # consistency with prior code.
                self.param_gather_handle = None
        self.param_gather_dispatched = True

    def finish_param_sync(self, skip_next_bucket_dispatch: bool = False):
        """
        Finishes param sync communication operation for this bucket. Dispatches
        next bucket's param sync if available, unless skip_next_bucket_dispatch
        is True.

        When ddp_config.overlap_param_gather is set to True, waits for asynchronous
        communication call to complete (and dispatches one if one is not already
        outstanding). Throws assertion error if ddp_config.overlap_param_gather is set to
        False.

        Args:
            skip_next_bucket_dispatch (bool, optional): if true, dispatch next
                bucket's communication if available.
        """
        assert self.ddp_config.overlap_param_gather

        # If current bucket's param AG has not been dispatched, dispatch it now (e.g., first
        # AG bucket in first model chunk if ddp_config.align_param_gather is False).
        if not self.param_gather_dispatched:
            self.start_param_sync()

        if self.param_gather_handle is not None:
            self.param_gather_handle.wait()
            self.param_gather_handle = None
            # Dispatch next bucket's asynchronous param AG only if it has not been dispatched yet.
            if self.next_param_gather_bucket_group is not None and not skip_next_bucket_dispatch:
                if self.next_param_gather_bucket_group.param_gather_dispatched:
                    warnings.warn(
                        "The next bucket's parameter all-gather operation has already been "
                        "dispatched. This may be caused by a mismatch between the order of "
                        "parameter registration and forward pass execution, which will "
                        "hurt the communication-computation overlap performance."
                    )
                else:
                    self.next_param_gather_bucket_group.start_param_sync()

            # For the mxfp8_param with "reuse_grad_buf_for_mxfp8_param_ag=True",
            # we need to copy the param_data from the shared_param/grad_buffer to param.data
            # after the param all-gather.
            if self.ddp_config.reuse_grad_buf_for_mxfp8_param_ag:
                for bucket in self.buckets:
                    is_bf16_weight_bucket = False
                    for param in bucket.params:
                        # Skip copying since bf16 weights in the mxfp8 model
                        # are already mapped to param.data.
                        if not is_float8tensor(param):
                            is_bf16_weight_bucket = True
                            break
                        param_start, param_end = bucket.param_to_index[param]
                        param_slice = bucket.param_data.view(-1)[param_start:param_end]
                        param.data.copy_(param_slice.view(param.data.shape))
                    if is_bf16_weight_bucket:
                        continue
                    # All-gathered params are not needed after being copied to param.data.
                    # Zero out the param buffer (shared with grad buffer) for gradient accumulation.
                    # We cannot zero out the entire grad buffer because one grad buffer may
                    # correspond to multiple param buffers. If we zero out the entire grad buffer,
                    # it would clear the data of those param buffers that have not yet completed AG.
                    bucket.param_data.zero_()
            elif not self.ddp_config.use_distributed_optimizer:
                for bucket in self.buckets:
                    if bucket.layerwise_gather_list is None:
                        continue
                    # Unflatten and copy gathered params for each rank.
                    for idx, params in enumerate(bucket.layerwise_params_list):
                        # Skip local params and empty tensors.
                        if (
                            len(params) == 0
                            or idx == self.intra_distributed_optimizer_instance_rank
                        ):
                            continue
                        updated_params = _unflatten_dense_tensors(
                            bucket.layerwise_gather_list[idx], params
                        )
                        for updated_p, model_p in zip(updated_params, params):
                            model_p.data.copy_(updated_p)
                    bucket.layerwise_gather_list = None
                    bucket._layerwise_src_buffer = None
            else:
                fp8_params = []
                for bucket in self.buckets:
                    for param in bucket.params:
                        if is_float8tensor(param):
                            fp8_params.append(param)
                if len(fp8_params) > 0:
                    post_all_gather_processing(fp8_params)

    def start_grad_sync(self, force_all_reduce: Optional[bool] = False):
        """
        Initiates grad sync (all-reduce or reduce-scatter) communication operations
        for all buckets in the bucket group.

        When ddp_config.overlap_grad_reduce is set to True, dispatches an asynchronous
        communication call. When ddp_config.overlap_grad_reduce is set to False, makes
        synchronous call.
        """
        if self.is_first_batch and self.grad_reduce_handle is not None:
            # Make this start_grad_sync call a no-op if in first batch and collective has
            # already been dispatched.
            return

        assert (
            self.grad_reduce_handle is None
        ), "Should not have multiple communication calls outstanding at once"

        if self.ddp_config.check_for_nan_in_grad or self.ddp_config.check_for_large_grads:
            self.check_grads(
                check_for_nan_or_inf=self.ddp_config.check_for_nan_in_grad,
                check_for_large=self.ddp_config.check_for_large_grads,
            )

        # gradient_scaling_factor already takes into account whether we are computing
        # an average or sum in the data-parallel collective.
        for bucket in self.buckets:
            if bucket.gradient_scaling_factor != 1.0:
                bucket.grad_data *= bucket.gradient_scaling_factor

        # Decide reduce_op.
        reduce_op = torch.distributed.ReduceOp.SUM
        if self.ddp_config.average_in_collective:
            reduce_op = torch.distributed.ReduceOp.AVG

        # We use the following stream synchronization for the gradient reduction
        # within and across DistOpt instances.

        # Compute Stream: -------------Gradient compute-------------------
        # Comm. Stream:   ------(wait for NCCL)-----(wait for NCCL)-------
        # NCCL Stream:          -------RS------     -------AR------

        # Use async communications only when overlap_grad_reduce is True.
        async_op = (
            self.ddp_config.overlap_grad_reduce
            and self.ddp_config.num_distributed_optimizer_instances == 1
        )
        if (
            self.ddp_config.num_distributed_optimizer_instances > 1
            and self.ddp_config.overlap_grad_reduce
        ):
            # Assign a communication stream if we have multiple DistOpt instances and we
            # need to overlap communication.
            stream_context = torch.cuda.stream(self.communication_stream)

            # The RS/AR communication stream needs to wait for the current stream
            # to complete its gradient computation before launching the next
            # gradient reduction collective.
            self.communication_stream.wait_stream(torch.cuda.current_stream())
        else:
            stream_context = nullcontext()

        if self.ddp_config.use_distributed_optimizer:
            communication_group = self.intra_distributed_optimizer_instance_group
        else:
            communication_group = self.data_parallel_group

        # Coalesce communication kernels across buckets in the bucket group.
        grad_reduce_handle = None
        with stream_context, _coalescing_manager(communication_group, async_ops=async_op) as cm:
            for idx, bucket in enumerate(self.buckets):
                if self.ddp_config.use_distributed_optimizer and not force_all_reduce:
                    if self.cached_grad_buffer_shard_list[idx] is None:
                        self.cached_grad_buffer_shard_list[idx] = shard_buffer(
                            bucket.grad_data, self.intra_distributed_optimizer_instance_size
                        )
                    local_data_view = self.cached_grad_buffer_shard_list[idx][
                        self.intra_distributed_optimizer_instance_rank
                    ]
                    grad_reduce_handle = dist_reduce_scatter_func(
                        local_data_view,
                        bucket.grad_data,
                        op=reduce_op,
                        group=communication_group,
                        async_op=async_op,
                    )
                else:
                    if torch.distributed.get_rank() == 0 and force_all_reduce:
                        logger.info(
                            f"Performing reduction using all_reduce because {force_all_reduce=}"
                        )
                    torch.distributed.all_reduce(
                        bucket.grad_data, op=reduce_op, group=communication_group, async_op=async_op
                    )

        # With multiple DistOpt instances, we need to all-reduce across instances.
        if (
            self.ddp_config.use_distributed_optimizer
            and self.ddp_config.num_distributed_optimizer_instances > 1
        ):
            assert self.inter_distributed_optimizer_instance_group is not None
            # Create a new coalescing manager for the inter-instance all-reduce.
            with (
                stream_context,
                _coalescing_manager(
                    self.inter_distributed_optimizer_instance_group, async_ops=async_op
                ) as cm,
            ):
                for idx, bucket in enumerate(self.buckets):
                    if self.cached_grad_buffer_shard_list[idx] is None:
                        self.cached_grad_buffer_shard_list[idx] = shard_buffer(
                            bucket.grad_data, self.intra_distributed_optimizer_instance_size
                        )
                    local_data_view = self.cached_grad_buffer_shard_list[idx][
                        self.intra_distributed_optimizer_instance_rank
                    ]

                    torch.distributed.all_reduce(
                        local_data_view,
                        op=reduce_op,
                        group=self.inter_distributed_optimizer_instance_group,
                        async_op=async_op,
                    )

        if async_op:
            if self.ddp_config.reduce_scatter_with_fp32_accumulation and not force_all_reduce:
                assert (
                    len(self.buckets) == 1
                ), "Only 1 bucket supported with reduce_scatter_with_fp32_accumulation=True"
                # torch.distributed._coalescing_manager does not correctly handle calling our custom
                # collective handle's .wait() method, so we take matters into our own hands here.
                assert grad_reduce_handle is not None
                self.grad_reduce_handle = grad_reduce_handle
            else:
                self.grad_reduce_handle = cm
        else:
            # When using `_coalescing_manager`, even if a synchronous op (async_op=False) is used,
            # `cm` is not None, which is different from when `_coalescing_manager` is not used in
            # which case the torch.distributed._reduce_scatter_base() will return None. In order to
            # maintain consistency with prior code, we need to manually set communication handle to
            # None.
            self.grad_reduce_handle = None

    def finish_grad_sync(self, force_all_reduce: Optional[bool] = False):
        """
        Finishes grad sync (all-reduce or reduce-scatter) communication operations
        for all buckets in the bucket group.

        When ddp_config.overlap_grad_reduce is set to True, waits for asynchronous
        communication call to complete. When ddp_config.overlap_grad_reduce is set to False,
        makes synchronous call.
        """
        self.param_gather_dispatched = False
        # If overlap_grad_reduce is False, start (and finish) synchronous communication call here.
        if not self.ddp_config.overlap_grad_reduce:
            self.start_grad_sync(force_all_reduce=force_all_reduce)
            return
        # If first batch, start asynchronous communication here. register_grad_ready() launches
        # asynchronous communication only once self.golden_per_param_grad_ready_counts is
        # populated at the end of this first batch.
        if self.is_first_batch:
            self.start_grad_sync(force_all_reduce=force_all_reduce)
        # When using multiple DistOpt instances, we don't need to sync here as we launch
        # communications on a separate communication stream.
        if self.ddp_config.num_distributed_optimizer_instances > 1:
            torch.cuda.current_stream().wait_stream(self.communication_stream)
            return
        assert self.grad_reduce_handle is not None, (
            f"Communication call has not been issued for this bucket "
            f"({len(self.per_param_grad_ready_counts)}/{len(self.params)} "
            "params have grad available)"
        )
        self.grad_reduce_handle.wait()
        self.grad_reduce_handle = None

    def free_overlap_buffers(self):
        """Free GPU buffers used by overlap param gather.

        Waits on any pending param all-gather handle, then releases the
        per-bucket temporary buffers so that the CUDA memory allocator can
        reclaim them.  Called before async checkpoint saves to avoid OOM in
        the persistent checkpoint worker process.
        """
        if self.param_gather_handle is not None:
            self.param_gather_handle.wait()
            self.param_gather_handle = None
        for bucket in self.buckets:
            bucket.layerwise_gather_list = None
            bucket._layerwise_src_buffer = None

    def register_grad_ready(
        self, param: torch.nn.Parameter, force_all_reduce: Optional[bool] = False
    ):
        """
        Registers grads for the passed-in param to be "ready" for grad sync.

        When the number of microbatches is greater than 1, we only want to register
        grads as ready when processing the last microbatch and ddp_config.overlap_grad_reduce
        is True.
        """
        assert (
            self.ddp_config.overlap_grad_reduce
        ), "register_grad_ready() should only be called when overlap_grad_reduce is True"
        if self.is_last_microbatch:
            assert param in self.param_to_bucket, "Param is not in the bucket group"
            if param not in self.per_param_grad_ready_counts:
                self.per_param_grad_ready_counts[param] = 0
            self.per_param_grad_ready_counts[param] += 1
            # If all params in bucket group have grads available, issue communication call.
            if not self.is_first_batch:
                if self.per_param_grad_ready_counts == self.golden_per_param_grad_ready_counts:
                    assert len(self.per_param_grad_ready_counts) == len(self.params)
                    self.start_grad_sync(force_all_reduce=force_all_reduce)


class _ParamAndGradBuffer:
    """
    Groups parameters and gradients into a contiguous buffer, and then breaks the buffer into
    buckets with roughly `bucket_size` parameters each.

    Args:
        ddp_config: DistributedDataParallel config object.
        param_dtype: Type of param tensor.
        grad_dtype: Type of grad tensor.
        params: List of parameters whose parameters and gradients are collated in the underlying
            tensor.
        data_parallel_group: Data-parallel process group.
        bucket_size: The rough size of each bucket in terms of number of parameters.
        param_to_name: Mapping from `torch.nn.Parameter` to name (for logging purposes).
        gradient_scaling_factor: This factor is utilized to scale gradients prior to their
            communication. Its application is twofold: it facilitates the averaging of gradients
            and the scaling of gradients in the context of the Mixture of Experts (MoE) model.
        param_indices: The index of each param among the params with same dtype, if a param is fp8,
            use its "fake" high precision dtype to determine which params have same dtype with it.
            These indices are needed when loading a non-native-fp8 checkpoint in native-fp8 mode.
    """

    def __init__(
        self,
        ddp_config: DistributedDataParallelConfig,
        param_dtype: torch.dtype,
        grad_dtype: torch.dtype,
        params: List[torch.nn.Parameter],
        data_parallel_group: torch.distributed.ProcessGroup,
        bucket_size: int,
        param_to_name: Dict[torch.nn.Parameter, str],
        gradient_scaling_factor: float,
        param_indices: List[int],
        nccl_ub: bool,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ):

        if pg_collection is None:
            self.dp_cp_group = parallel_state.get_data_and_context_parallel_group(
                with_context_parallel=True
            )
            self.tp_group = parallel_state.get_tensor_model_parallel_group()
        else:
            assert hasattr(pg_collection, 'tp') and hasattr(pg_collection, 'dp_cp')
            self.dp_cp_group = pg_collection.dp_cp
            self.tp_group = pg_collection.tp

        self.ddp_config = ddp_config
        self.params = params
        self.param_indices = param_indices

        # Check that params are unique.
        unique_params = set()
        for param in params:
            assert param not in unique_params
            unique_params.add(param)
        del unique_params

        # Store attributes that will be needed later.
        self.param_dtype = param_dtype
        self.grad_dtype = grad_dtype
        self.data_parallel_group = data_parallel_group
        self.data_parallel_world_size = self.data_parallel_group.size()
        self.gradient_scaling_factor = gradient_scaling_factor
        self.nccl_ub = nccl_ub

        # Data structures to store underlying buckets and relevant indexing data.
        self.buckets = []
        self.param_to_bucket = {}  # Param -> bucket mapping.
        self.param_index_map = {}  # Param -> location in buffer mapping (used in dist. optimizer).

        def _pad(number_to_be_padded: int, divisor: int) -> int:
            return int(math.ceil(number_to_be_padded / divisor) * divisor)

        def _pad_end_of_bucket_if_needed(bucket_end_index: int) -> int:
            """
            Pads end index of bucket if using distributed optimizer (to ensure uniform sharding).
            """
            if self.ddp_config.use_distributed_optimizer:
                # Workaround for TE bug causing cuBLAS to pick an incompatible algorithm.
                # This also helps cuBLAS pick more efficient algorithms for GEMMs.
                # We now ensure that all buckets start at a memory address that is 256-byte
                # aligned (128 values since params and grads use >= 16-bit precision).
                if self.ddp_config.pad_buckets_for_high_nccl_busbw:
                    # Make sure the bucket size is divisible by a large power of 2 (2^16) to
                    # ensure NCCL collectives have high bus bandwidth at large DP counts,
                    # since NCCL message size (which for ring algorithms is bucket_size /
                    # dp_size) apparently needs to be divisible by a power of 2 for high busbw.
                    bucket_size_divisor = math.lcm(self.data_parallel_world_size, 128, 2**16)
                else:
                    bucket_size_divisor = math.lcm(self.data_parallel_world_size, 128)
                return _pad(bucket_end_index, bucket_size_divisor)
            return bucket_end_index

        def _pad_start_of_param_if_needed(param_start_index: int) -> int:
            """
            Pads start index of param if using distributed optimizer (to ensure "good" alignment).
            """
            if self.ddp_config.use_distributed_optimizer:
                # Ensure that params start at 128-byte aligned addresses (64 values
                # since params are >= 16-bit precision).
                return _pad(param_start_index, 64)
            return param_start_index

        # First, figure out how many elements should be in the underlying buffer storage.
        # Note that if we need to split the buffer into smaller buckets, each of these
        # might need to be padded as well (if using the distributed optimizer).
        param_start_index = 0
        bucket_start_index = param_start_index
        bucket_params = set()
        self.bucket_indices = []
        per_bucket_numel_unpadded = []
        bucket_id = 0

        def _update_bucket_metadata(param_end_index: int) -> int:
            """
            Record metadata for the bucket starting at bucket_start_index and ending with the
            passed-in param_end_index. Returns the bucket's end_index.
            """
            nonlocal bucket_start_index, bucket_params, bucket_id
            per_bucket_numel_unpadded.append(param_end_index - bucket_start_index)
            bucket_end_index = _pad_end_of_bucket_if_needed(param_end_index)

            # Record metadata of new bucket.
            self.bucket_indices.append((bucket_start_index, bucket_end_index))
            bucket_start_index = bucket_end_index

            # Prepare for next bucket.
            bucket_params = set()
            bucket_id += 1

            # Return the potentially padded bucket_end_index.
            return bucket_end_index

        def _does_param_require_new_bucket(param):
            """
            Split shared embedding parameters into separate bucket if using distributed
            optimizer that makes use of reduce-scatters instead of all-reduces.
            This ensures that the first and last pipeline stage partition optimizer state
            for the shared embedding parameters the same way across DP replicas, allowing
            the DP reduce-scatter to be before the embedding all-reduce.
            """
            return (
                getattr(param, "shared_embedding", False)
                and self.ddp_config.use_distributed_optimizer
            )

        for param in params[::-1]:
            # Iterate through parameters in reverse order to roughly follow backprop order.

            this_numel = param.data.nelement()
            param_start_index = _pad_start_of_param_if_needed(param_start_index)

            # Create bucket with collected parameters if current param needs its own bucket.
            if _does_param_require_new_bucket(param) and len(bucket_params) > 0:
                # Ensure this param accounts for the new padding introduced at end of
                # previous bucket.
                param_start_index = _update_bucket_metadata(param_start_index)

            param_end_index = param_start_index + this_numel
            self.param_index_map[param] = (param_start_index, param_end_index, bucket_id)
            bucket_params.add(param)

            # If we have enough elements already or the current param is part of the shared
            # embedding layer and needs a separate bucket, form a new bucket.
            if (
                bucket_size is not None and (param_end_index - bucket_start_index) >= bucket_size
            ) or _does_param_require_new_bucket(param):
                bucket_end_index = _update_bucket_metadata(param_end_index)
                param_start_index = bucket_end_index
            else:
                param_start_index = param_end_index

        # Add remaining params to a new bucket.
        if len(bucket_params) > 0:
            bucket_end_index = _update_bucket_metadata(param_end_index)

        # Next, create underlying storage for buffer (with numel elements that includes
        # padding as necessary).
        self.numel = bucket_end_index
        self.numel_unpadded = sum(per_bucket_numel_unpadded)
        assert self.numel_unpadded <= self.numel
        if self.ddp_config.use_distributed_optimizer:
            assert self.numel % self.data_parallel_world_size == 0
        else:
            assert self.numel == self.numel_unpadded

        self.param_data = None

        if self.nccl_ub:
            # If nccl_ub is True, use nccl_allocator to allocate memory for param_data/grad_data.
            nccl_allocator.init()
            pool = nccl_allocator.create_nccl_mem_pool(
                symmetric=not self.ddp_config.disable_symmetric_registration
            )
            mem_alloc_context = functools.partial(
                nccl_allocator.nccl_mem,
                pool,
                group=self.data_parallel_group,
                symmetric=not self.ddp_config.disable_symmetric_registration,
            )
            # Since nccl communicator group is created lazily, we need to perform a warmup call to
            # initialize NCCL comm buffers for this dp_group before doing buffer registration.
            torch.distributed.barrier()
            tmp_warmup_tensor = torch.zeros([1], device="cuda")
            torch.distributed.all_reduce(tmp_warmup_tensor, group=self.data_parallel_group)
            torch.distributed.barrier()
        else:
            # If nccl_ub is False, mem_alloc_context is nullcontext.
            mem_alloc_context = nullcontext

        with mem_alloc_context():
            # For MXFP8 param: Create a shared buffer for param AG and grad RS for memory efficiency
            # The buffer is mapped to weight gradients whose dtype is either bf16 or FP32.
            # It can be temporarily reused by param AG.
            if self.ddp_config.use_distributed_optimizer and any(is_mxfp8tensor(p) for p in params):
                self.shared_buffer = torch.zeros(
                    self.numel,
                    dtype=self.grad_dtype,
                    device=torch.cuda.current_device(),
                    requires_grad=False,
                )
                # For FP32 weight grads, only half of the buffer is used to store params in bf16.
                if self.grad_dtype == torch.float32:
                    self.param_data = self.shared_buffer[: math.ceil(self.numel / 2)].view(
                        torch.bfloat16
                    )
                else:
                    self.param_data = self.shared_buffer
                self.grad_data = self.shared_buffer
            else:
                # Only re-map param tensors if using distributed optimizer.
                if self.ddp_config.use_distributed_optimizer:
                    self.param_data = torch.zeros(
                        self.numel,
                        dtype=self.param_dtype,
                        device=torch.cuda.current_device(),
                        requires_grad=False,
                    )
                self.grad_data = torch.zeros(
                    self.numel,
                    dtype=self.grad_dtype,
                    device=torch.cuda.current_device(),
                    requires_grad=False,
                )

        self.grad_data_size = 0
        self.param_data_size = 0
        self.param_data_cpu = None

        # Finally, map param.data and param.main_grad fields to buffers.
        bucket_params = []
        bucket_start_index = 0
        cur_bucket_id = 0
        for param in params[::-1]:
            param_start_index, param_end_index, bucket_id = self.param_index_map[param]
            # For MXFP8 param:
            # we only need to map bf16 weights (layernorm, embedding, etc) to the buffer.
            if not self.ddp_config.reuse_grad_buf_for_mxfp8_param_ag or not is_mxfp8tensor(param):
                if self.param_data is not None:
                    new_param_data = self._get(
                        param.data.shape, param_start_index, buffer_type=BufferType.PARAM
                    )
                    if is_float8tensor(param):
                        modify_underlying_storage(param, new_param_data)
                    else:
                        old_param_data = param.data
                        param.data = new_param_data
                        assert old_param_data._base is None
                        # Copy tensor values (from initialization or checkpoint).
                        param.data.detach().copy_(old_param_data)
                        del old_param_data

            param.main_grad = self._get(
                param.data.shape, param_start_index, buffer_type=BufferType.GRAD
            )
            if bucket_id != cur_bucket_id:
                bucket_end_index = _pad_end_of_bucket_if_needed(param_start_index)
                self.buckets.append(
                    self._new_bucket(
                        bucket_params=bucket_params,
                        start_index=bucket_start_index,
                        end_index=bucket_end_index,
                        numel_unpadded=per_bucket_numel_unpadded[cur_bucket_id],
                        bucket_id=cur_bucket_id,
                    )
                )
                bucket_start_index = bucket_end_index
                bucket_params = []
                assert cur_bucket_id + 1 == len(self.buckets)
                assert bucket_id == cur_bucket_id + 1
                cur_bucket_id = bucket_id
            bucket_params.append(param)

        # Add remaining params to a new bucket.
        if len(bucket_params) > 0:
            bucket_end_index = _pad_end_of_bucket_if_needed(param_end_index)
            self.buckets.append(
                self._new_bucket(
                    bucket_params=bucket_params,
                    start_index=bucket_start_index,
                    end_index=bucket_end_index,
                    numel_unpadded=per_bucket_numel_unpadded[cur_bucket_id],
                    bucket_id=cur_bucket_id,
                )
            )

        # Log buckets for all PP stages.
        log_strs = []
        log_strs.append(
            f"Number of buckets for gradient all-reduce / reduce-scatter: {len(self.buckets)}"
        )
        for index, bucket in enumerate(self.buckets):
            numel = 0
            for param in bucket.params:
                numel += param.data.nelement()
            log_strs.append(
                f"Params for bucket {index + 1} ({numel} elements, "
                f"{bucket.grad_data.nelement()} padded size):"
            )
            for param in bucket.params:
                log_strs.append(f"\t{param_to_name[param]}")
        log_on_each_pipeline_stage(
            logger,
            logging.INFO,
            "\n".join(log_strs),
            tp_group=self.tp_group,
            dp_cp_group=self.dp_cp_group,
        )

    def scale_gradients(self, scaling_factor: float) -> None:
        """Scale the gradient data by `scaling_factor`."""
        self.grad_data *= scaling_factor

    def _get(self, shape: torch.Size, start_index: int, buffer_type: BufferType) -> torch.Tensor:
        """
        Return a tensor with the input `shape` as a view into the 1-D data starting at
        `start_index`.
        """
        end_index = start_index + shape.numel()
        assert end_index <= self.numel, "Requested tensor is out of buffer range"
        if buffer_type == BufferType.PARAM:
            assert self.param_data is not None
            buffer_tensor = self.param_data[start_index:end_index]
        elif buffer_type == BufferType.GRAD:
            buffer_tensor = self.grad_data[start_index:end_index]
        else:
            raise Exception("Illegal buffer type provided to GradBuffer._get() function")
        buffer_tensor = buffer_tensor.view(shape)
        return buffer_tensor

    def _new_bucket(
        self,
        bucket_params: List[torch.nn.Parameter],
        start_index: int,
        end_index: int,
        numel_unpadded: int,
        bucket_id: int,
    ) -> _ParamAndGradBucket:
        """
        Helper function that creates a new bucket. Also updates param->bucket mapping.
        """

        # Assert that indices are correctly padded (if needed), and that bucket
        # position is same as originally computed.
        if self.ddp_config.use_distributed_optimizer:
            assert start_index % self.data_parallel_world_size == 0
            assert end_index % self.data_parallel_world_size == 0
        assert (start_index, end_index) == self.bucket_indices[bucket_id]

        # Get appropriate view into global _ParamAndGradBuffer.
        bucketed_param_data = None
        if self.param_data is not None:
            bucketed_param_data = self._get(
                torch.Size([end_index - start_index]), start_index, buffer_type=BufferType.PARAM
            )
        bucketed_grad_data = self._get(
            torch.Size([end_index - start_index]), start_index, buffer_type=BufferType.GRAD
        )
        bucket = _ParamAndGradBucket(
            params=bucket_params,
            param_data=bucketed_param_data,
            grad_data=bucketed_grad_data,
            offset=start_index,
            numel_unpadded=numel_unpadded,
            gradient_scaling_factor=self.gradient_scaling_factor,
            bucket_id=bucket_id,
            param_index_map=self.param_index_map,
        )
        for bucket_param in bucket_params:
            assert bucket_param not in self.param_to_bucket
            self.param_to_bucket[bucket_param] = bucket

        return bucket

    def reset(self):
        """
        Zero out the underlying grad_buffer.
        """
        self.grad_data.zero_()

    def offload_to_cpu(self, move_params: bool = True, move_grads: bool = True) -> None:
        """
        Offload the buffers to CPU.
        """
        if move_grads and self.grad_data is not None and self.grad_data.storage().size() > 0:
            self.grad_data_size = self.grad_data.storage().size()
            self.grad_data.storage().resize_(0)
        if move_params and self.param_data is not None and self.param_data.storage().size() > 0:
            self.param_data_size = self.param_data.storage().size()
            if self.param_data_cpu is not None:
                self.param_data_cpu.copy_(self.param_data, non_blocking=True)
            else:
                self.param_data_cpu = self.param_data.cpu().pin_memory()
            self.param_data.storage().resize_(0)

    def reload_from_cpu(self, move_params: bool = True, move_grads: bool = True):
        """
        Reload the buffers from CPU.
        """
        if (
            move_params
            and self.param_data is not None
            and self.param_data_cpu is not None
            and self.param_data.storage().size() == 0
        ):
            self.param_data.storage().resize_(self.param_data_size)
            self.param_data.copy_(self.param_data_cpu, non_blocking=True)
        if move_grads and self.grad_data is not None and self.grad_data_size > 0:
            self.grad_data.storage().resize_(self.grad_data_size)
            self.grad_data.zero_()
            self.grad_data_size = 0


def partition_buckets(
    buffers: List[_ParamAndGradBuffer], force_single_bucket_group: bool = False
) -> List[_ParamAndGradBucketGroup]:
    """
    Automatically regroup the buckets of input buffers and return a list of bucket groups.

    In some scenarios, we need to put buckets from different buffers into a group so that their
    communication can be aggregated.

    For example, when there are both fp8 weights and bf16 biases in the model and virtual
    pipeline parallelism is enabled, each model chunk will have an fp8 bucket and a bf16 bucket,
    which doubles the number of communication kernels, and because of the use of
    CUDA_DEVICE_MAX_CONNECTIONS=1, having multiple back-to-back communications will prevent the
    overlap of communication kernels with computation kernels.

    The grouping strategy is:
    1. If force_single_bucket_group is True, put all buckets across all buffers into a single
       bucket group.
    2. If force_single_bucket_group is False, when there is no fp8 buffer in the input buffers,
       let each bucket group have only one bucket.
    3. If force_single_bucket_group is False, when using fp8 params, merge all non-fp8 buckets
       into the last fp8 bucket group.
       - Since the non-fp8 parameters (typically the biases of various layers) are relatively
         small, they are likely to be grouped into a single non-fp8 bucket.
       - The fp8 buckets start from the end of the model, i.e., the first bucket corresponds to
         the end of the model, while the last bucket corresponds to the beginning.
       - If we combine the non-fp8 bucket with the first fp8 bucket, we cannot initiate the
         reduce-scatter to synchronize gradients after the backward pass at the end of the model
         has completed. This is because we need to wait for the non-fp8 params from the beginning
         layers to obtain their gradients.
       - Combining the non-fp8 bucket with the last fp8 bucket can help avoid this issue.

    Args:
        buffers (list): list of input buffers.
        single_bucket_group_per_buffer (bool, optional): force group all buckets in each buffer
            into a single bucket group.
    """

    if len(buffers) == 0:
        return []

    dtype_to_buffer_map = {}
    for buffer in buffers:
        dtype = buffer.param_dtype
        # Make sure that the param_dtype of any two buffers is different.
        assert dtype not in dtype_to_buffer_map
        dtype_to_buffer_map[dtype] = buffer

    # Case 1: Put all buckets into a single bucket group if force_single_bucket_group is True.
    if force_single_bucket_group:
        buckets = []
        ddp_config = buffers[0].ddp_config
        data_parallel_group = buffers[0].data_parallel_group
        data_parallel_world_size = buffers[0].data_parallel_world_size
        for buffer in buffers:
            assert ddp_config == buffer.ddp_config
            assert data_parallel_group == buffer.data_parallel_group
            assert data_parallel_world_size == buffer.data_parallel_world_size
            buckets.extend(buffer.buckets)

        bucket_group = _ParamAndGradBucketGroup(
            buckets, ddp_config, data_parallel_group, data_parallel_world_size
        )
        return [bucket_group]

    if torch.uint8 not in dtype_to_buffer_map:
        # Case 2: When there is no fp8 buffer in the input buffers, let each bucket group have
        #         only one bucket.
        bucket_groups = []
        for buffer in buffers:
            for bucket in buffer.buckets:
                bucket_groups.append(
                    _ParamAndGradBucketGroup(
                        [bucket],
                        buffer.ddp_config,
                        buffer.data_parallel_group,
                        buffer.data_parallel_world_size,
                    )
                )
        return bucket_groups
    else:
        # Case 3: When using fp8 params, merge all non-fp8 buckets into the last fp8 bucket group.
        non_fp8_buckets = []
        for buffer in buffers:
            if buffer.param_dtype != torch.uint8:
                for bucket in buffer.buckets:
                    non_fp8_buckets.append(bucket)

        bucket_groups = []
        fp8_buffer = dtype_to_buffer_map[torch.uint8]
        for bucket in fp8_buffer.buckets:
            if len(bucket_groups) == len(fp8_buffer.buckets) - 1:
                # The last bucket group.
                group_buckets = [bucket] + non_fp8_buckets
            else:
                # The first N-1 bucket groups.
                group_buckets = [bucket]
            bucket_groups.append(
                _ParamAndGradBucketGroup(
                    group_buckets,
                    buffer.ddp_config,
                    buffer.data_parallel_group,
                    buffer.data_parallel_world_size,
                )
            )
        return bucket_groups


================================================
FILE: megatron/core/distributed/reduce_scatter_with_fp32_accumulation.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.


from typing import Any

import torch


class _ReduceScatterWithFP32AccumulationWorkHandle:
    """Work handle to return to user when using reduce_scatter_with_fp32_accumulation with
    async_op=True."""

    def __init__(
        self,
        all_to_all_handle: Any,
        all_to_all_output_tensor: torch.Tensor,
        output_tensor: torch.Tensor,
        world_size: int,
    ):
        """Initialize WorkHandle object."""
        self.all_to_all_handle = all_to_all_handle
        self.all_to_all_output_tensor = all_to_all_output_tensor
        self.output_tensor = output_tensor
        self.world_size = world_size

    def wait(self):
        """Wait until communication (and associated computation) is completed."""
        # Wait for communication to complete if needed.
        if self.all_to_all_handle is not None:
            self.all_to_all_handle.wait()

        # Accumulate into a fp32 sum.
        output_tensor_in_fp32 = torch.sum(
            self.all_to_all_output_tensor.view((self.world_size, -1)), dim=0, dtype=torch.float32
        )
        assert output_tensor_in_fp32.dtype == torch.float32

        # Copy downcasted sum into output_tensor.
        self.output_tensor.copy_(output_tensor_in_fp32)


def reduce_scatter_with_fp32_accumulation(
    output_tensor: torch.Tensor,
    input_tensor: torch.Tensor,
    op: torch.distributed.ReduceOp,
    group: torch.distributed.ProcessGroup,
    async_op: bool,
):
    """Reduce-scatter with FP32 accumulation.

    Collects input_tensor in lower precision using an all-to-all, then locally accumulates in FP32
    precision, then downcasts final sum back into right location in input_tensor.


    Args:
        output_tensor (torch.Tensor): Output tensor with reduce-scattered output (only the shard).
        input_tensor (torch.Tensor): Input tensor that needs to be reduce-scattered.
        op (torch.distributed.ReduceOp): Only torch.distributed.ReduceOp.SUM is supported.
        group (torch.distributed.ProcessGroup): Process group to use for reduce-scatter.
        async_op (bool): Only False is supported right now.
    """
    # Make sure arguments conform to the implementation.
    assert op == torch.distributed.ReduceOp.SUM

    # Get world_size.
    if group is None:
        world_size = torch.distributed.get_world_size()
    else:
        world_size = group.size()

    # Make sure input_tensor size is divisible by world size.
    assert input_tensor.numel() % world_size == 0

    # Call all_to_all (every rank should have their respective gradient shards collected from
    # all ranks). We also create a tensor for the all-to-all output (the all-to-all collective
    # cannot be performed in-place).
    all_to_all_output_tensor = torch.empty_like(input_tensor)
    all_to_all_handle = torch.distributed.all_to_all_single(
        output=all_to_all_output_tensor, input=input_tensor, group=group, async_op=async_op
    )

    # Create a work handle to finish communication and reduction.
    reduce_scatter_handle = _ReduceScatterWithFP32AccumulationWorkHandle(
        all_to_all_handle, all_to_all_output_tensor, output_tensor, world_size
    )
    if async_op:
        # Return work handle; consumers can call .wait() to ensure communication and associated
        # reduction complete.
        return reduce_scatter_handle
    else:
        # Wait on work handle.
        reduce_scatter_handle.wait()


================================================
FILE: megatron/core/distributed/torch_fully_sharded_data_parallel.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from typing import Optional, Set

import torch

try:
    from torch.distributed import DeviceMesh
    from torch.distributed.fsdp import fully_shard

    HAVE_FSDP = True
except ImportError:
    HAVE_FSDP = False

from torch.distributed import ProcessGroup

from megatron.core.fp8_utils import is_float8tensor

from .. import parallel_state, tensor_parallel
from ..models.common.embeddings.language_model_embedding import LanguageModelEmbedding
from ..models.common.embeddings.rotary_pos_embedding import RotaryEmbedding
from ..transformer.transformer_config import TransformerConfig
from ..transformer.transformer_layer import TransformerLayer
from .data_parallel_base import _BaseDataParallel
from .distributed_data_parallel_config import DistributedDataParallelConfig


class TorchFullyShardedDataParallel(_BaseDataParallel):
    """
    Enables fully sharded data parallelism by wrapping the given model with
    the PyTorch FSDP2 API:
    https://github.com/pytorch/torchtitan/blob/main/docs/fsdp.md
    To utilize this class, PyTorch version >= 2.4.0 is required.

    Args:
        config: Transformer config object.
        ddp_config: TorchDistributedDataParallel config object.
        module: Underlying model.
        sub_modules_to_wrap: Set of sub_modules to shard with FSDP.
            Parameters within each sub_module will be all-gathered just-in-time.
            The default set includes the following submodules derived from the
            GPT model architecture:
                TransformerLayer (all Transformer layers)
                LanguageModelEmbedding (initial embedding layer)
                RotaryEmbedding  (initial RoPE layer)
                tensor_parallel.ColumnParallelLinear (final output layer)

            User can set _fsdp_modules attribute on submodules to set additional
            submodules to shard with FSDP.
        process_group: Optional ProcessGroup to use for distributed operations.
            If None (default), the data parallel process group will be obtained from
            parallel_state.get_data_parallel_group(with_context_parallel=True).
    """

    def __init__(
        self,
        config: TransformerConfig,
        ddp_config: DistributedDataParallelConfig,
        module: torch.nn.Module,
        sub_modules_to_wrap: Set[torch.nn.Module] = {
            TransformerLayer,
            LanguageModelEmbedding,
            RotaryEmbedding,
            tensor_parallel.ColumnParallelLinear,
        },
        disable_bucketing: bool = False,
        process_group: Optional[ProcessGroup] = None,
    ):

        assert (
            HAVE_FSDP
        ), 'TorchFullyShardedDataParallel requires PyTorch >= 2.4.0 with FSDP 2 support.'

        super().__init__(config=config, module=module)

        if process_group is None:
            self.process_group = parallel_state.get_data_parallel_group(with_context_parallel=True)
        else:
            self.process_group = process_group

        self.device_mesh = DeviceMesh.from_group(self.process_group, "cuda")
        kwargs = {
            "mesh": self.device_mesh,
            "reshard_after_forward": getattr(ddp_config, "reshard_after_forward", True),
        }

        self.ddp_config = ddp_config

        def save_custom_attrs(module):
            custom_attrs = {}
            for name, param in module.named_parameters():
                attrs = vars(param)
                if is_float8tensor(param):
                    # disable fp8 transpose cache and perform transposing fp8 weights
                    # at each micro-batch because torch-FSDP doesn't recognize the
                    # micro-batch id, thus removing unnecessary memory stores
                    attrs['_fp8_attrs']['transpose_invalid'] = False
                    del attrs['_fp8_attrs']['transpose']
                custom_attrs[name] = {k: v for k, v in attrs.items()}
            return custom_attrs

        def restore_custom_attrs(module, custom_attrs):
            for name, param in module.named_parameters():
                if name in custom_attrs:
                    for attr_name, attr_value in custom_attrs[name].items():
                        setattr(param, attr_name, attr_value)

        # Save the custom attributes on Parameters before FSDP overwrites them.
        # See https://github.com/pytorch/pytorch/issues/136929.
        attrs = save_custom_attrs(self.module)

        # Local transformer implementation does not support ColumnParallelLinear.
        if config.transformer_impl == "local":
            sub_modules_to_wrap = [
                sub_module
                for sub_module in sub_modules_to_wrap
                if sub_module != tensor_parallel.ColumnParallelLinear
            ]
        sub_modules_to_wrap = set(sub_modules_to_wrap)
        for sub_module in self.module.modules():
            fsdp_modules = getattr(sub_module, "_fsdp_modules", [])
            for f in fsdp_modules:
                sub_modules_to_wrap.add(f)

        prev_module = None
        for sub_module in self.module.modules():
            # Wrap individual submodules to fetch parameters just-in-time rather than
            # conservatively fetching all parameters at the start of each iteration.
            # See https://github.com/pytorch/pytorch/issues/114299.
            if any(
                isinstance(sub_module, sub_module_to_wrap)
                for sub_module_to_wrap in sub_modules_to_wrap
            ):
                fully_shard(sub_module, **kwargs)

                # Explicitly set the FSDP backward prefetch schedule to prevent activation
                # recomputation from disrupting the automatically generated default schedule.
                if config.recompute_granularity is not None:
                    sub_module.set_modules_to_backward_prefetch(
                        [prev_module] if prev_module else []
                    )
                prev_module = sub_module

        # Wrap the root module as required by the FSDP API.
        # See https://github.com/pytorch/pytorch/issues/114299.
        fully_shard(self.module, **kwargs)

        restore_custom_attrs(self.module, attrs)

    def load_state_dict(self, state_dict, strict=True):
        """
        No-op because tensors are already loaded in-place by
        `_load_base_checkpoint` with FSDP2."""
        pass


================================================
FILE: megatron/core/distributed/torch_fully_sharded_data_parallel_config.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from dataclasses import dataclass
from typing import Union

from megatron.core.distributed.distributed_data_parallel_config import DistributedDataParallelConfig


@dataclass
class TorchFullyShardedDataParallelConfig(DistributedDataParallelConfig):
    """Configuration for TorchFullyShardedDataParallel."""

    reshard_after_forward: Union[bool, int] = True
    """
    Controls the parameter behavior after forward.

    See PyTorch for complete documentation:
    https://github.com/pytorch/pytorch/blob/ac8ddf115065106f038865389a07f2d0c9ed5e11/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L97C31-L97C49 # pylint: disable=line-too-long 
    """


================================================
FILE: megatron/core/energy_monitor.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""Megatron Energy Monitoring (NVML)"""

import torch
import torch.distributed as dist

try:
    from pynvml import (
        NVMLError,
        nvmlDeviceGetHandleByIndex,
        nvmlDeviceGetTotalEnergyConsumption,
        nvmlInit,
        nvmlShutdown,
    )

    has_nvml = True
except ImportError:
    has_nvml = False


class EnergyMonitor:
    """
    Energy monitoring using NVML.

    All ranks in the process group are expected to call functions lap() and get_total().
    Energy is monitored across all ranks and aggregated with an all-reduce.
    """

    def __init__(self) -> None:
        """Initialize EnergyMonitor."""
        self._total_energy = 0
        self._lap_energy = 0
        self._last_energy = 0
        self._handle = None

    def setup(self) -> None:
        """Setup the NVML Handler."""
        if has_nvml:
            nvmlInit()
            self._handle = nvmlDeviceGetHandleByIndex(torch.cuda.current_device())

    def shutdown(self) -> None:
        """Shutdown NVML."""
        if has_nvml:
            nvmlShutdown()

    def pause(self) -> None:
        """Pause energy monitor (must resume afterward)."""
        if has_nvml:
            energy = self._get_energy()
            self._lap_energy += energy - self._last_energy

    def resume(self) -> None:
        """Resume/start energy monitor."""
        if has_nvml:
            self._last_energy = self._get_energy()

    def _get_energy(self) -> int:
        """Get current energy consumption from NVML."""
        try:
            return nvmlDeviceGetTotalEnergyConsumption(self._handle)
        except NVMLError:
            return self._last_energy  # return *something* if it errors

    def lap(self) -> float:
        """Returns lap (iteration) energy (J) and updates total energy."""
        if not has_nvml:
            return 0.0

        energy = self._get_energy()
        lap_energy = self._lap_energy + (energy - self._last_energy)

        self._total_energy += lap_energy
        self._lap_energy = 0
        self._last_energy = energy

        lap_tensor = torch.tensor([lap_energy], dtype=torch.int64, device='cuda')
        dist.all_reduce(lap_tensor, op=dist.ReduceOp.SUM)

        return lap_tensor.item() / 1000.0

    def get_total(self) -> float:
        """Get total energy consumption (J) across all GPUs."""
        if not has_nvml:
            return 0.0

        energy_tensor = torch.tensor([self._total_energy], dtype=torch.int64, device='cuda')
        dist.all_reduce(energy_tensor, op=dist.ReduceOp.SUM)

        return energy_tensor.item() / 1000.0


================================================
FILE: megatron/core/enums.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

import enum


class ModelType(enum.Enum):
    """Model type."""

    encoder_or_decoder = 1


class Fp8Recipe(str, enum.Enum):
    """FP8 recipe names: delayed, tensorwise, mxfp8, blockwise, custom."""

    delayed = "delayed"
    tensorwise = "tensorwise"
    mxfp8 = "mxfp8"
    blockwise = "blockwise"
    custom = "custom"


class Fp4Recipe(str, enum.Enum):
    """FP4 recipe names: nvfp4, custom."""

    nvfp4 = "nvfp4"
    custom = "custom"


================================================
FILE: megatron/core/export/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/export/data_type.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from enum import Enum

DataType = Enum('DataType', ["bfloat16", "float16", "float32"])


================================================
FILE: megatron/core/export/export_config.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import warnings
from dataclasses import dataclass
from typing import Optional


@dataclass
class ExportConfig:
    """Base configuration for Megatron Core Export

    These parameters control the export setting for trtllm
    """

    inference_tp_size: int = 1

    inference_pp_size: int = 1

    use_parallel_embedding: bool = False

    use_embedding_sharing: Optional[bool] = None

    def __post_init__(self):
        if self.use_embedding_sharing is not None:
            with warnings.catch_warnings():
                warnings.simplefilter("always")
                warnings.warn(
                    "use_embedding_sharing is deprecated in ExportConfig, "
                    "use share_embeddings_and_output_weights in TRTLLMHelper instead",
                    DeprecationWarning,
                    stacklevel=3,
                )


================================================
FILE: megatron/core/export/model_type.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from enum import Enum

ModelType = Enum(
    'ModelType',
    ["gpt", "gptnext", "llama", "falcon", "starcoder", "mixtral", "gemma", "nemotron_nas"],
)


================================================
FILE: megatron/core/export/trtllm/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/export/trtllm/engine_builder/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


try:
    import tensorrt_llm
    from tensorrt_llm._common import check_max_num_tokens
    from tensorrt_llm.builder import BuildConfig
    from tensorrt_llm.commands.build import build as build_trtllm
    from tensorrt_llm.logger import logger
    from tensorrt_llm.lora_manager import LoraConfig
    from tensorrt_llm.models.modeling_utils import optimize_model, preprocess_weights
    from tensorrt_llm.plugin import PluginConfig

    HAVE_TRTLLM = True
except ImportError:
    HAVE_TRTLLM = False


class TRTLLMEngineBuilder:
    """A utility class to build TRTLLM engine"""

    @staticmethod
    def build_and_save_engine(
        engine_dir: str,
        trtllm_model_weights: dict,
        trtllm_model_config,
        max_input_len: int = 1024,
        max_output_len: int = 1024,
        max_batch_size: int = 4,
        lora_ckpt_list=None,
        use_lora_plugin=None,
        max_lora_rank: int = 64,
        lora_target_modules=None,
        max_prompt_embedding_table_size: int = 0,
        paged_kv_cache: bool = True,
        remove_input_padding: bool = True,
        paged_context_fmha: bool = False,
        use_refit: bool = False,
        max_num_tokens: int = None,
        max_seq_len: int = None,
        opt_num_tokens: int = None,
        max_beam_width: int = 1,
        tokens_per_block: int = 128,
        multiple_profiles: bool = False,
        gpt_attention_plugin: str = "auto",
        gemm_plugin: str = "auto",
        reduce_fusion: bool = False,
    ):
        """Method to build the TRTLLM Engine

        This method uses the TRTLLMEngineBuilder to build and save the engine to engine dir

        Args:
            engine_dir (str): The file path to save the engine
            trtllm_model_weights (dict): The TRTLLM converted model weights dict
            trtllm_model_config : The TRTLLM Config
            max_input_len (int, optional): Max input length. Defaults to 1024.
            max_output_len (int, optional): Max output length. Defaults to 1024.
            max_batch_size (int, optional): Max batch size. Defaults to 4.
            model_type (ModelType, optional): ModelType enum. Defaults to ModelType.gpt.
            lora_ckpt_list (_type_, optional): Lora checkpoint list. Defaults to None.
            use_lora_plugin (_type_, optional): Use lora plugin. Defaults to None.
            max_lora_rank (int, optional): Max lora rank. Defaults to 64.
            lora_target_modules (_type_, optional): Lora target modules. Defaults to None.
            max_prompt_embedding_table_size (int, optional): Defaults to 0.
            paged_kv_cache (bool, optional): Use Paged KV cache. Defaults to True.
            remove_input_padding (bool, optional): Remove input padding. Defaults to True.
            paged_context_fmha (bool, optional): Paged context fmha. Defaults to False.
            use_refit (bool, optional): Use refit. Defaults to False.
            max_num_tokens (int, optional): Max num of tokens. Defaults to None.
            max_seq_len (int, optional): Max seq length. Defaults to None.
            opt_num_tokens (int, optional): Opt number of tokens. Defaults to None.
            max_beam_width (int, optional): Max beam width. Defaults to 1.
            tokens_per_block (int, optional): Nmber of tokens per block. Defaults to 128.
            multiple_profiles (bool, optional): Use multiple profiles. Defaults to False.
            gpt_attention_plugin (str, optional): Gpt attention plugin to use. Defaults to "auto".
            gemm_plugin (str, optional): Gemma plugin to use. Defaults to "auto".
        """

        if not HAVE_TRTLLM:
            raise ImportError(
                "tensorrt_llm is not installed. Please install it with `pip install tensorrt-llm`"
            )

        architecture = (
            "LLaMAForCausalLM"
            if trtllm_model_config.architecture == "LlamaForCausalLM"
            else trtllm_model_config.architecture
        )
        try:
            model_cls = getattr(tensorrt_llm.models, architecture)
        except:
            raise AttributeError(f"Could not find TRTLLM model for architecture: {architecture}!")

        logger.set_level("info")
        plugin_config = PluginConfig()
        plugin_config.gpt_attention_plugin = gpt_attention_plugin
        plugin_config.gemm_plugin = gemm_plugin
        if paged_kv_cache:
            plugin_config.enable_paged_kv_cache(tokens_per_block=tokens_per_block)
        else:
            plugin_config.paged_kv_cache = False
        plugin_config.remove_input_padding = remove_input_padding
        plugin_config.use_paged_context_fmha = paged_context_fmha
        plugin_config.multiple_profiles = multiple_profiles
        plugin_config.reduce_fusion = reduce_fusion

        if max_seq_len is None:
            max_seq_len = max_input_len + max_output_len

        max_num_tokens, opt_num_tokens = check_max_num_tokens(
            max_num_tokens=max_num_tokens,
            opt_num_tokens=opt_num_tokens,
            max_seq_len=max_seq_len,
            max_batch_size=max_batch_size,
            max_input_len=max_input_len,
            max_beam_width=max_beam_width,
            remove_input_padding=remove_input_padding,
            enable_context_fmha=plugin_config.context_fmha,
            tokens_per_block=tokens_per_block,
            multiple_profiles=multiple_profiles,
        )

        build_dict = {
            "max_input_len": max_input_len,
            "max_output_len": max_output_len,
            "max_batch_size": max_batch_size,
            "max_beam_width": max_beam_width,
            "max_seq_len": max_seq_len,
            "max_num_tokens": max_num_tokens,
            "opt_num_tokens": opt_num_tokens,
            "max_prompt_embedding_table_size": max_prompt_embedding_table_size,
            "gather_context_logits": False,
            "gather_generation_logits": False,
            "strongly_typed": False,
            "builder_opt": None,
            "use_refit": use_refit,
            "multiple_profiles": multiple_profiles,
        }

        if trtllm_model_config.architecture == "DeciLMForCausalLM":
            build_dict["strongly_typed"] = True
            build_dict["use_fused_mlp"] = False
            plugin_config.use_fused_mlp = False

        build_config = BuildConfig.from_dict(build_dict, plugin_config=plugin_config)

        if use_lora_plugin is not None:
            # build_config.plugin_config.set_lora_plugin(use_lora_plugin)
            # build_config.plugin_config._lora_plugin = use_lora_plugin
            lora_config = LoraConfig(
                lora_dir=lora_ckpt_list,
                lora_ckpt_source="nemo",  # TODO : NEED TO SEE HOW TO HANDLE THIS FOR MCORE
                max_lora_rank=max_lora_rank,
                lora_target_modules=lora_target_modules,
            )
            build_config.lora_config = lora_config

        model = model_cls.from_config(trtllm_model_config)

        model = optimize_model(
            model,
            use_parallel_embedding=trtllm_model_config.use_parallel_embedding,
            share_embedding_table=trtllm_model_config.share_embedding_table,
        )

        preprocess_weights(trtllm_model_weights, trtllm_model_config)
        model.load(trtllm_model_weights)
        engine = build_trtllm(model, build_config)

        engine.save(engine_dir)
        return engine


================================================
FILE: megatron/core/export/trtllm/model_to_trllm_mapping/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/export/trtllm/model_to_trllm_mapping/default_conversion_dict.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from megatron.core.export.trtllm.trtllm_layers import TRTLLMLayers

# Map the most common mcore layers to TRTLLM layers
# pylint: disable=line-too-long
DEFAULT_CONVERSION_DICT = {
    # INPUT
    'embedding.word_embeddings.weight': TRTLLMLayers.vocab_embedding,
    'embedding.position_embeddings.weight': TRTLLMLayers.position_embedding,
    # ATTENTION
    'decoder.layers.input_layernorm.weight': TRTLLMLayers.input_layernorm_weight,
    'decoder.layers.input_layernorm.bias': TRTLLMLayers.input_layernorm_bias,
    'decoder.layers.self_attention.linear_qkv.weight': TRTLLMLayers.attention_qkv_weight,
    'decoder.layers.self_attention.linear_qkv.bias': TRTLLMLayers.attention_qkv_bias,
    'decoder.layers.self_attention.linear_proj.weight': TRTLLMLayers.attention_dense_weight,
    'decoder.layers.self_attention.linear_proj.bias': TRTLLMLayers.attention_dense_bias,
    # MLP
    'decoder.layers.pre_mlp_layernorm.weight': TRTLLMLayers.post_layernorm_weight,
    'decoder.layers.pre_mlp_layernorm.bias': TRTLLMLayers.post_layernorm_bias,
    'decoder.layers.mlp.linear_fc1.weight': TRTLLMLayers.mlp_fc_weight,
    'decoder.layers.mlp.linear_fc1.bias': TRTLLMLayers.mlp_fc_bias,
    'decoder.layers.mlp.linear_fc2.weight': TRTLLMLayers.mlp_projection_weight,
    'decoder.layers.mlp.linear_fc2.bias': TRTLLMLayers.mlp_projection_bias,
    # EXPERTS
    'decoder.layers.mlp.experts.experts.linear_fc1.weight': TRTLLMLayers.mlp_fc_weight_mixture_of_experts,
    'decoder.layers.mlp.experts.experts.linear_fc2.weight': TRTLLMLayers.mlp_projection_weight_mixture_of_experts,
    'decoder.layers.mlp.router.weight': TRTLLMLayers.mlp_router_weight,
    # FINAL LAYER NORM
    'decoder.final_layernorm.weight': TRTLLMLayers.final_layernorm_weight,
    'decoder.final_layernorm.bias': TRTLLMLayers.final_layernorm_bias,
    # OUTPUT LAYER
    'output_layer.weight': TRTLLMLayers.lm_head,
    # TRANSFORMER ENGINE LAYER NORM
    # ATTENTION
    'decoder.layers.self_attention.linear_qkv.layer_norm_weight': TRTLLMLayers.input_layernorm_weight,
    'decoder.layers.self_attention.linear_qkv.layer_norm_bias': TRTLLMLayers.input_layernorm_bias,
    # MLP
    'decoder.layers.mlp.linear_fc1.layer_norm_weight': TRTLLMLayers.post_layernorm_weight,
    'decoder.layers.mlp.linear_fc1.layer_norm_bias': TRTLLMLayers.post_layernorm_bias,
}

NEMOTRON_NAS_CONVERSION_DICT = {
    # Deci's (nemotron-nas) replace_with_linear Attention
    'decoder.layers.self_attention.weight': TRTLLMLayers.attention_linear_weight,
    # Deci's (nemotron-nas) replace_with_linear MLP
    'decoder.layers.mlp.weight': TRTLLMLayers.ffn_linear_weight,
    # Deci's (nemotron-nas) MLP
    'decoder.layers.mlp.linear_fc1.weight': TRTLLMLayers.ffn_fc_weight,
    'decoder.layers.mlp.linear_fc2.weight': TRTLLMLayers.ffn_projection_weight,
}


================================================
FILE: megatron/core/export/trtllm/trt_model_config.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


from megatron.core.export.model_type import ModelType

try:
    import tensorrt_llm

    HAVE_TRTLLM = True
except ImportError:
    from unittest.mock import MagicMock

    tensorrt_llm = MagicMock()
    HAVE_TRTLLM = False

TRT_MODEL_CONFIG = {
    ModelType.gpt: tensorrt_llm.models.gpt.config.GPTConfig,
    ModelType.gptnext: tensorrt_llm.models.gpt.config.GPTConfig,
    ModelType.starcoder: tensorrt_llm.models.gpt.config.GPTConfig,
    ModelType.mixtral: tensorrt_llm.models.llama.config.LLaMAConfig,
    ModelType.llama: tensorrt_llm.models.llama.config.LLaMAConfig,
    ModelType.gemma: tensorrt_llm.models.GemmaConfig,
    ModelType.falcon: tensorrt_llm.models.falcon.config.FalconConfig,
    ModelType.nemotron_nas: tensorrt_llm.models.nemotron_nas.config.DeciConfig,
}


================================================
FILE: megatron/core/export/trtllm/trt_model_type.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from megatron.core.export.model_type import ModelType

TRT_MODEL_TYPE_STRING = {
    ModelType.gpt: 'GPTForCausalLM',
    ModelType.gptnext: 'GPTForCausalLM',
    ModelType.starcoder: 'GPTForCausalLM',
    ModelType.mixtral: 'LlamaForCausalLM',
    ModelType.llama: 'LlamaForCausalLM',
    ModelType.gemma: 'GemmaForCausalLM',
    ModelType.falcon: 'FalconForCausalLM',
    ModelType.nemotron_nas: 'DeciLMForCausalLM',
}


================================================
FILE: megatron/core/export/trtllm/trtllm_helper.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import json
from typing import Union

import torch

from megatron.core.export.data_type import DataType
from megatron.core.export.export_config import ExportConfig
from megatron.core.export.model_type import ModelType
from megatron.core.export.trtllm.engine_builder.trtllm_engine_builder import TRTLLMEngineBuilder
from megatron.core.export.trtllm.model_to_trllm_mapping.default_conversion_dict import (
    DEFAULT_CONVERSION_DICT,
    NEMOTRON_NAS_CONVERSION_DICT,
)
from megatron.core.export.trtllm.trt_model_config import TRT_MODEL_CONFIG
from megatron.core.export.trtllm.trt_model_type import TRT_MODEL_TYPE_STRING
from megatron.core.export.trtllm.trtllm_layers import TRTLLMLayers

# pylint: disable=line-too-long
from megatron.core.export.trtllm.trtllm_weights_converter.distributed_trtllm_model_weights_converter import (
    DistributedTRTLLMModelWeightsConverter,
)
from megatron.core.export.trtllm.trtllm_weights_converter.single_device_trtllm_model_weights_converter import (
    SingleDeviceTRTLLMModelWeightsConverter,
)
from megatron.core.export.trtllm.trtllm_weights_converter.utils import is_gated_activation
from megatron.core.transformer.transformer_config import TransformerConfig

try:
    import tensorrt_llm
    from tensorrt_llm.functional import non_gated_version
    from tensorrt_llm.layers import MoeConfig

    HAVE_TRTLLM = True
except ImportError:
    HAVE_TRTLLM = False


class TRTLLMHelper:
    """TRTLLM Helper class to convert export and build TRTLLM model."""

    def __init__(
        self,
        *,
        transformer_config: TransformerConfig,
        model_type: ModelType,
        trtllm_conversion_dict: dict = {},
        position_embedding_type: str = "learned_absolute",
        max_position_embeddings: int = None,
        rotary_percentage: int = 1.0,
        rotary_base: int = 10000,
        rope_scaling_factor: float = 8.0,
        moe_tp_mode: int = 2,
        multi_query_mode: bool = False,
        activation: str = "gelu",
        seq_len_interpolation_factor: float = None,
        moe_renorm_mode=None,
        share_embeddings_and_output_weights=False,
    ):
        """Constructor for the TRTLLMHelper

        There are two public API's supported  by this helper.
        a) get_trtllm_pretrained_config_and_model_weights
        b) build_and_save_engine

        Args:
            transformer_config (TransformerConfig): The transformer config
            model_type (ModelType): The type of the input model. Enum (megatron.core.export.model_type.ModelType)
            trtllm_conversion_dict (dict, optional): A conversion dictionary that will map your model layer names to trtllm equivalent layer names. Default dictionary is given megatron/core/export/model_to_trtllm_mapping. This dict is merged into the default dict. NOTE: Ignore layer numbers in the model layer names. (e.g) decoder.layers.0.attention_qkv.weight will be decoder.layers.attention_qkv.weight in the mapping dictionary. Defaults to {}.
            position_embedding_type (str, optional): The position embedding type. Defaults to None.
            max_position_embeddings (int, optional): Max posistion embeddings value. Defaults to None.
            rotary_percentage (int, optional): The rotary percentage if using rope embedding. Defaults to 1.0.
            rotary_base (int, optional): The rotary base (theta value) if using rope embeddings. Defaults to 10000.
            moe_tp_mode (int, optional): TRTLLM Config. Defaults to 2.
            multi_query_mode (bool, optional): Defaults to False.
            activation (str, optional): Defaults to "gelu".
            seq_len_interpolation_factor (float, optional): The sequence length interpolation factor if using rope embeddings. Defaults to None.
            moe_renorm_mode (optional) : Renormalization mode if using mixture of experts. Defaults to None.
            share_embeddings_and_output_weights (bool, optional): True if input and output layers share weights. Defaults to False.
        """

        if not HAVE_TRTLLM:
            raise ImportError(
                "tensorrt_llm is not installed. Please install it with `pip install tensorrt-llm`"
            )

        self.transformer_config = transformer_config
        self.model_type = model_type
        self.trtllm_conversion_dict = DEFAULT_CONVERSION_DICT.copy()
        if model_type == ModelType.nemotron_nas:
            self.trtllm_conversion_dict.update(NEMOTRON_NAS_CONVERSION_DICT)
        self.trtllm_conversion_dict.update(trtllm_conversion_dict)
        assert position_embedding_type in [
            "learned_absolute",
            "rope",
        ], f"Position embedding type should be one of learned_absolute, rope. You entered {position_embedding_type}"
        self.position_embedding_type = position_embedding_type
        self.max_position_embeddings = max_position_embeddings
        self.rotary_percentage = rotary_percentage
        self.rotary_base = rotary_base
        self.rope_scaling_factor = rope_scaling_factor
        self.moe_tp_mode = moe_tp_mode
        self.multi_query_mode = multi_query_mode
        self.activation = activation
        self.seq_len_interpolation_factor = seq_len_interpolation_factor
        self.moe_renorm_mode = moe_renorm_mode
        self.share_embeddings_and_output_weights = share_embeddings_and_output_weights
        self.weights_converter = None

    def _get_trtllm_config(
        self,
        export_config: ExportConfig,
        world_size: int,
        gpus_per_node: int,
        vocab_size_padded: int,
        dtype: DataType,
        fp8_quantized: bool = False,
        fp8_kvcache: bool = False,
    ):
        """Get TRTLLM Config

        Returns appropriate TRTLLM PretrainedConfig used by TRTLLM for building engine

        Args:
            export_config (ExportConfig): The export config that defines inference tp , pp size etc.
            world_size (int): The number of gpus (Mostly TP * PP)
            gpus_per_node (int): Num gpus per node
            vocab_size_padded (int): Padded vocab size
            dtype (DataType): The datatype or model precision

        Returns:
            GPTConfig or the LLamaConfig or the PretrainedConfig constructed from your model config
        """
        hidden_act = self.activation
        hidden_act = (
            hidden_act.split("-")[-1]
            if self.transformer_config.num_moe_experts
            else non_gated_version(hidden_act)
        )

        config = {
            "architecture": TRT_MODEL_TYPE_STRING[self.model_type],
            "dtype": dtype.name,
            "num_hidden_layers": self.transformer_config.num_layers,
            "num_attention_heads": self.transformer_config.num_attention_heads,
            "num_key_value_heads": (
                self.transformer_config.num_query_groups
                if self.transformer_config.num_query_groups
                else self.transformer_config.num_attention_heads
            ),
            "head_size": self.transformer_config.kv_channels,
            "hidden_size": self.transformer_config.hidden_size,
            "intermediate_size": self.transformer_config.ffn_hidden_size,
            "norm_epsilon": self.transformer_config.layernorm_epsilon,
            "vocab_size": vocab_size_padded,
            "position_embedding_type": (
                "rope_gpt_neox" if self.position_embedding_type == "rope" else "learned_absolute"
            ),
            "max_position_embeddings": self.max_position_embeddings,
            "hidden_act": hidden_act,
            "use_parallel_embedding": export_config.use_parallel_embedding,
            "embedding_sharding_dim": 0,
            "share_embedding_table": self.share_embeddings_and_output_weights,
            "quantization": {
                "quant_algo": "FP8" if fp8_quantized else None,
                "kv_cache_quant_algo": "FP8" if fp8_kvcache else None,
            },
            "bias": self.transformer_config.add_bias_linear,
            "apply_query_key_layer_scaling": False,
            "rotary_pct": self.rotary_percentage,
            "rotary_base": self.rotary_base,
            "moe_num_experts": (
                0
                if self.transformer_config.moe_router_topk == 0
                else (self.transformer_config.num_moe_experts or 1)
            ),
            "moe_top_k": self.transformer_config.moe_router_topk,
            "moe_normalization_mode": self.moe_renorm_mode
            or MoeConfig.ExpertScaleNormalizationMode.RENORMALIZE,
            "moe_tp_mode": self.moe_tp_mode,
            "logits_dtype": "float32",
            "world_size": world_size,
            "tp_size": export_config.inference_tp_size,
            "pp_size": export_config.inference_pp_size,
            "gpus_per_node": gpus_per_node,
        }

        if self.model_type == ModelType.falcon:
            config["new_decoder_architecture"] = (
                False if self.transformer_config.num_layers == 32 else True
            )
            config["parallel_attention"] = True

        if self.seq_len_interpolation_factor is not None:
            config["rotary_scaling"] = {
                "type": "linear",
                "factor": float(self.seq_len_interpolation_factor),
            }

        if self.model_type == ModelType.nemotron_nas:
            hf_config_dict = json.loads(
                self.transformer_config.heterogeneous_layers_config_encoded_json
            )
            config["block_configs"] = hf_config_dict["block_configs"]
            config["rotary_scaling"] = {"type": "llama3", "factor": self.rope_scaling_factor}

        config_cls = TRT_MODEL_CONFIG[self.model_type]
        return config_cls(**config)

    def _load_scaling_factors(self, model_state_dict: dict) -> dict:
        """Loads scaling factors from model state dictionary.

        Args:
            model_state_dict (dict): Model state dictionary
        Returns:
            dict: Maps scaling factor key, to its value and the inverse. The inverse is used for casting the quantized weights.
        """
        weight_scaling_suffix = ".weights_scaling_factor"
        activation_scaling_suffix = ".activation_scaling_factor"
        mock_scales_dict = {}
        extra_state_infix = "._extra_state"
        mock_suffix = ".weight"

        for key, val in model_state_dict.items():
            if extra_state_infix in key and not key.endswith("core_attention._extra_state"):
                mock_key = key.split(extra_state_infix)[0] + mock_suffix
                mock_scales_dict[mock_key] = val

        mock_scales_dict = TRTLLMLayers.rename_input_layer_names_to_trtllm_layer_names(
            mock_scales_dict, self.trtllm_conversion_dict, False
        )
        split_gated_activation = is_gated_activation(self)

        scales = {}
        for key, val in mock_scales_dict.items():
            if val is None:
                continue

            val.seek(0)
            extra_states = torch.load(val)

            activation_scaling_factor_key = key.replace(mock_suffix, activation_scaling_suffix)
            weight_scaling_factor_key = key.replace(mock_suffix, weight_scaling_suffix)

            activation_scales = {
                "trt_llm_scale": extra_states["scale_inv_fwd"][0].view(1),
                "weight_multiplier": extra_states["scale_fwd"][0].view(1),
            }

            weight_scales = {
                "trt_llm_scale": extra_states["scale_inv_fwd"][1].view(1),
                "weight_multiplier": extra_states["scale_fwd"][1].view(1),
            }

            scales[activation_scaling_factor_key] = activation_scales
            scales[weight_scaling_factor_key] = weight_scales
            if split_gated_activation and ".mlp.fc" in key:
                scales[activation_scaling_factor_key.replace("fc", "gate")] = activation_scales
                scales[weight_scaling_factor_key.replace("fc", "gate")] = weight_scales

        return scales

    # pylint: disable=line-too-long
    def get_trtllm_pretrained_config_and_model_weights(
        self,
        model_state_dict,
        dtype: DataType,
        export_config: ExportConfig = None,
        on_device_distributed_conversion: bool = False,
        vocab_size: int = None,
        gpus_per_node: int = None,
        state_dict_split_by_layer_numbers: bool = True,
        fp8_quantized: bool = False,
        fp8_kvcache: bool = False,
    ):
        """Get TRTLLM Config and Converted Model Weights

        This function returns the trtllm model weights as a list.
        There are two modes for conversion. The default is to use a single device cpu/gpu for conversion.
        NOTE: For faster performance, if your entire model will fit in memory, pre transfer the model state dict to cuda device and then call this function.
        For on device conversion it returns weights which will be used on the device itself.
        Same thing happens with the pretrained config

        Args:
            model_state_dict (dict): The input model state dictionary (Entire model state loaded on CPU) or the model state dict of each GPU in the case of on_device conversion)
            export_config (ExportConfig): The export config used to define inference tp size, pp size etc. Used only for on device conversion.
            dtype (DataType): The data type of model precision
            on_device_distributed_conversion (bool, optional): Convert on gpus in distributed setting. This assumes that the model state dict is sharded according to required inference model parallelism and that each gpu gets its part of the model state dict . Defaults to False.
            vocab_size (int, optional): The vocabulary size. Defaults to None.
            gpus_per_node (int, optional): The number of gpus per node. Used for on device conversion.
            state_dict_split_by_layer_numbers (bool, optional): Are the model layers split by layer numbers in state dict. For example : mlp.fc1.weight can be represented like mlp.fc1.weight of shape [num_layers, hidden_dim, ffn_hidden_dim]} or it can be like mlp.fc1.layers.0.weight of shape [hidden_dim, ffn_hidden_dim], then mlp.fc1.layers.1.weight ... for all layers. If you use represenation 2 set this to True. Defaults to True

        Returns:
            Two lists . First list of trtllm converted model weights(Either on device, or a list of weights for each gpu) and the trtllm_model_configs.
        """
        assert model_state_dict is not None, "Model state dict is not set"

        scales = self._load_scaling_factors(model_state_dict) if fp8_quantized else {}
        model_state_dict = {k: v for k, v in model_state_dict.items() if "extra_state" not in k}

        if on_device_distributed_conversion:
            assert vocab_size is not None, "Need to pass in vocab_size for on device"
            supported_model = self.model_type in [
                ModelType.gpt,
                ModelType.gptnext,
                ModelType.llama,
                ModelType.nemotron_nas,
            ]
            assert (
                supported_model
            ), "On device conversion only supported for model types gptnext and llama"
            assert export_config is None, (
                "Export config is inferred based on the parallel state. "
                "If you want to set inference tp 2, then load the model with this TP2 setting and just pass in the model state dict."
            )

            assert (
                gpus_per_node is not None
            ), "Need to pass in gpus_per_node for on device conversion"
            trtllm_model_weights_on_device, trtllm_model_config = (
                self._get_trtllm_pretrained_config_and_model_weights_in_distributed_setting(
                    model_state_dict,
                    dtype,
                    vocab_size,
                    gpus_per_node,
                    scales,
                    fp8_quantized,
                    fp8_kvcache,
                )
            )
            return [trtllm_model_weights_on_device], [trtllm_model_config]

        else:
            assert (
                vocab_size is None
            ), "Vocab size is inferred from the input layer for cpu conversion. So leave it as None"
            trtllm_model_weights_list, trtllm_model_config_list = (
                self._get_trtllm_pretrained_config_and_model_weights_list_on_single_device(
                    export_config,
                    model_state_dict,
                    dtype,
                    gpus_per_node,
                    state_dict_split_by_layer_numbers,
                    scales,
                    fp8_quantized,
                    fp8_kvcache,
                )
            )

            return trtllm_model_weights_list, trtllm_model_config_list

    def _add_scales_to_converter(
        self,
        converter: Union[
            SingleDeviceTRTLLMModelWeightsConverter, DistributedTRTLLMModelWeightsConverter
        ],
        scales: dict,
        fp8_kvcache: bool,
    ):
        """Adds scaling factors to the distributed and single device converters.

        Args:
            converter (ModelWeightConverter): Converter, holding the TRT-LLM model weights.
            scales (dict): Dictionary holding TRT-LLM scaling factors
            fp8_kvcache (bool): If true, creates scaling factors (equal to 1.0) for kv_cache quantization
        """
        trt_scales = {key: scale["trt_llm_scale"] for key, scale in scales.items()}
        kv_scales = {}
        if fp8_kvcache:
            for key in converter.trtllm_model_weights:
                if ".attention.qkv.weight" in key:
                    kv_key = key.split(".qkv")[0] + ".kv_cache_scaling_factor"
                    kv_scales[kv_key] = torch.tensor([1.0], dtype=torch.float32)

        converter.trtllm_model_weights |= trt_scales | kv_scales

    def _get_trtllm_pretrained_config_and_model_weights_in_distributed_setting(
        self,
        model_state_dict: dict,
        dtype: DataType,
        vocab_size: int,
        gpus_per_node: int,
        scales: dict,
        fp8_quantized: bool,
        fp8_kvcache: bool,
    ):
        """Get the TRTLLM Pretrained config and model weights list in a distributed setting

        This function assumes the  model state dict is distributed according to model parallelism .
        Each device gets its own model state dict

        Args:
            export_config (ExportConfig): The export config to set inference tp, pp size etc.
            model_state_dict (dict): The model state dictionary (All collected on cpu)
            dtype (DataType): The data type or model precision
            vocab_size (int): Tokenizer vocab size
            gpus_per_node (int): The number of gpus per node
            scales (dict): Dictionary with fp8 scaling factors
            fp8_quantized (bool): True for fp8 checkpoint export
            fp8_kvcache (bool): True for fp8 KV-cache quantization
        Returns:
            Two lists . List of trtllm converted model weights and trtllm model configs (One for each gpu).
        """

        self.weights_converter = DistributedTRTLLMModelWeightsConverter(
            transformer_config=self.transformer_config,
            dtype=dtype,
            multi_query_mode=self.multi_query_mode,
            activation=self.activation,
            scales=scales,
        )
        self.weights_converter.convert(
            model_state_dict=model_state_dict,
            trtllm_conversion_dict=self.trtllm_conversion_dict,
            tokenizer_vocab_size=vocab_size,
        )
        self._add_scales_to_converter(self.weights_converter, scales, fp8_kvcache)

        export_config = ExportConfig(
            inference_pp_size=self.weights_converter.inference_pp_size,
            inference_tp_size=self.weights_converter.inference_tp_size,
            use_parallel_embedding=True,
        )

        world_size = export_config.inference_tp_size * export_config.inference_pp_size

        trtllm_model_config = self._get_trtllm_config(
            export_config=export_config,
            world_size=world_size,
            gpus_per_node=gpus_per_node,
            vocab_size_padded=vocab_size,
            dtype=dtype,
            fp8_quantized=fp8_quantized,
            fp8_kvcache=fp8_kvcache,
        )

        model_parallel_rank = (
            self.weights_converter.pp_rank * self.weights_converter.inference_tp_size
            + self.weights_converter.tp_rank
        )

        trtllm_model_config.mapping = tensorrt_llm.Mapping(
            world_size=world_size,
            rank=model_parallel_rank,
            tp_size=export_config.inference_tp_size,
            pp_size=export_config.inference_pp_size,
        )

        return self.weights_converter.trtllm_model_weights, trtllm_model_config

    def _get_trtllm_pretrained_config_and_model_weights_list_on_single_device(
        self,
        export_config: ExportConfig,
        model_state_dict: dict,
        dtype: DataType,
        gpus_per_node,
        state_dict_split_by_layer_numbers,
        scales: dict,
        fp8_quantized: bool,
        fp8_kvcache: bool,
    ):
        """Get the TRTLLM Pretrained config and model weights list (one per gpu rank) on single device (CPU/GPU)

        This function assumes the entire model state dict is present in CPU or on one GPU

        Args:
            export_config (ExportConfig): The export config to set inference tp, pp size etc.
            model_state_dict (dict): The model state dictionary (All collected on cpu)
            dtype (DataType): The data type or model precision
            gpus_per_node (int, optional): Number of gpus per node
            state_dict_split_by_layer_numbers (bool, optional): Are the model layers split by layer numbers in state dict. For example : mlp.fc1.weight can be represented like mlp.fc1.weight of shape [num_layers, hidden_dim, ffn_hidden_dim]} or it can be like mlp.fc1.layers.0.weight of shape [hidden_dim, ffn_hidden_dim], then mlp.fc1.layers.1.weight ... for all layers. If you use represenation 2 set this to True. Defaults to True
            scales (dict): Dictionary with fp8 scaling factors
            fp8_quantized (bool): True for fp8 checkpoint export
            fp8_kvcache (bool): True for fp8 KV-cache quantization

        Returns:
            Two lists . List of trtllm converted model weights and trtllm model configs (One for each gpu).
        """
        trtllm_model_configs_list = []
        trtllm_model_weights_list = []

        self.weights_converter = SingleDeviceTRTLLMModelWeightsConverter(
            export_config=export_config,
            transformer_config=self.transformer_config,
            dtype=dtype,
            activation=self.activation,
            multi_query_mode=self.multi_query_mode,
            scales=scales,
        )
        # Convert the input model state dict to trtllm model weights dictionary
        self.weights_converter.convert(
            model_state_dict=model_state_dict,
            trtllm_conversion_dict=self.trtllm_conversion_dict,
            state_dict_split_by_layer_numbers=state_dict_split_by_layer_numbers,
        )

        self._add_scales_to_converter(self.weights_converter, scales, fp8_kvcache)

        vocab_size_padded = self.weights_converter.get_padded_vocab_size()
        world_size = export_config.inference_tp_size * export_config.inference_pp_size
        gpus_per_node = gpus_per_node or export_config.inference_tp_size

        for gpu_rank in range(world_size):
            mapping = tensorrt_llm.Mapping(
                world_size=world_size,
                rank=gpu_rank,
                tp_size=export_config.inference_tp_size,
                pp_size=export_config.inference_pp_size,
            )

            # Important to create a new instance everytime so that the list elements have differnt rank values in the mapping object
            trtllm_model_config = self._get_trtllm_config(
                export_config=export_config,
                world_size=world_size,
                gpus_per_node=gpus_per_node,
                vocab_size_padded=vocab_size_padded,
                dtype=dtype,
                fp8_quantized=fp8_quantized,
                fp8_kvcache=fp8_kvcache,
            )
            trtllm_model_config.mapping = mapping
            trtllm_model_configs_list.append(trtllm_model_config)

            # Get the model weights for each rank and append it to the trtllm_model_weights_list
            trtllm_model_weights_per_gpu = self.weights_converter.get_local_model_weights_per_gpu(
                mapping, trtllm_model_config
            )
            trtllm_model_weights_list.append(trtllm_model_weights_per_gpu)

        return trtllm_model_weights_list, trtllm_model_configs_list

    def build_and_save_engine(
        self,
        engine_dir: str,
        trtllm_model_weights: dict,
        trtllm_model_config,
        max_input_len: int = 1024,
        max_output_len: int = 1024,
        max_batch_size: int = 4,
        lora_ckpt_list=None,
        use_lora_plugin=None,
        max_lora_rank: int = 64,
        lora_target_modules=None,
        max_prompt_embedding_table_size: int = 0,
        paged_kv_cache: bool = True,
        remove_input_padding: bool = True,
        paged_context_fmha: bool = False,
        use_refit: bool = False,
        max_num_tokens: int = None,
        max_seq_len: int = None,
        opt_num_tokens: int = None,
        max_beam_width: int = 1,
        tokens_per_block: int = 128,
        multiple_profiles: bool = False,
        gpt_attention_plugin: str = "auto",
        gemm_plugin: str = "auto",
    ):
        """Method to build the TRTLLM Engine

        This method uses the TRTLLMEngineBuilder to build and save the engine to engine dir

        Args:
            engine_dir (str): The file path to save the engine
            trtllm_model_weights (dict): The TRTLLM converted model weights dict
            trtllm_model_config : The TRTLLM Config
            max_input_len (int, optional): Max input length. Defaults to 1024.
            max_output_len (int, optional): Max output length. Defaults to 1024.
            max_batch_size (int, optional): Max batch size. Defaults to 4.
            lora_ckpt_list (_type_, optional): Lora checkpoint list. Defaults to None.
            use_lora_plugin (_type_, optional): Use lora plugin. Defaults to None.
            max_lora_rank (int, optional): Max lora rank. Defaults to 64.
            lora_target_modules (_type_, optional): Lora target modules. Defaults to None.
            max_prompt_embedding_table_size (int, optional): Max size of prompt embedding table. Defaults to 0.
            paged_kv_cache (bool, optional): Use Paged KV cache. Defaults to True.
            remove_input_padding (bool, optional): Remove input padding. Defaults to True.
            paged_context_fmha (bool, optional): Paged context fmha. Defaults to False.
            use_refit (bool, optional): Use refit. Defaults to False.
            max_num_tokens (int, optional): Max num of tokens. Defaults to None.
            max_seq_len (int, optional): Max seq length. Defaults to None.
            opt_num_tokens (int, optional): Opt number of tokens. Defaults to None.
            max_beam_width (int, optional): Max beam width. Defaults to 1.
            tokens_per_block (int, optional): Nmber of tokens per block. Defaults to 128.
            multiple_profiles (bool, optional): Use multiple profiles. Defaults to False.
            gpt_attention_plugin (str, optional): Gpt attention plugin to use. Defaults to "auto".
            gemm_plugin (str, optional): Gemma plugin to use. Defaults to "auto".
        """

        engine = TRTLLMEngineBuilder.build_and_save_engine(
            engine_dir,
            trtllm_model_weights,
            trtllm_model_config,
            max_input_len,
            max_output_len,
            max_batch_size,
            lora_ckpt_list,
            use_lora_plugin,
            max_lora_rank,
            lora_target_modules,
            max_prompt_embedding_table_size,
            paged_kv_cache,
            remove_input_padding,
            paged_context_fmha,
            use_refit,
            max_num_tokens,
            max_seq_len,
            opt_num_tokens,
            max_beam_width,
            tokens_per_block,
            multiple_profiles,
            gpt_attention_plugin,
            gemm_plugin,
        )

        return engine


================================================
FILE: megatron/core/export/trtllm/trtllm_layers.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import re
from enum import Enum
from typing import Tuple


class TRTLLMLayers(Enum):
    """TRTLLM Layer names

    This Enum will be used to map input model layer names to TRTLLM Layer names
    """

    # ONE TIME LAYERS (NOT ASSOCIATED TO TRANSFORMER BLOCK)
    # Input layers
    position_embedding = 'transformer.position_embedding.weight'
    vocab_embedding = 'transformer.vocab_embedding.weight'
    lm_head = 'lm_head.weight'

    # Output layers
    final_layernorm_weight = 'transformer.ln_f.weight'
    final_layernorm_bias = 'transformer.ln_f.bias'

    # TRANSFORMER LAYERS
    # Attention block related layers
    input_layernorm_weight = 'transformer.layers.input_layernorm.weight'
    input_layernorm_bias = 'transformer.layers.input_layernorm.bias'
    attention_qkv_weight = 'transformer.layers.attention.qkv.weight'
    attention_qkv_bias = 'transformer.layers.attention.qkv.bias'
    attention_dense_weight = 'transformer.layers.attention.dense.weight'
    attention_dense_bias = 'transformer.layers.attention.dense.bias'

    # Deci's replace_with_linear Attention
    attention_linear_weight = 'transformer.layers.attention.weight'

    # mlp layers
    mlp_fc_weight = 'transformer.layers.mlp.fc.weight'
    mlp_fc_bias = 'transformer.layers.mlp.fc.bias'
    post_layernorm_weight = 'transformer.layers.post_layernorm.weight'
    post_layernorm_bias = 'transformer.layers.post_layernorm.bias'
    mlp_projection_weight = 'transformer.layers.mlp.proj.weight'
    mlp_projection_bias = 'transformer.layers.mlp.proj.bias'

    # Deci's (nemotron-nas) FFN
    ffn_fc_weight = 'transformer.layers.ffn.fc.weight'
    ffn_projection_weight = 'transformer.layers.ffn.proj.weight'
    # Deci's replace_with_linear FFN
    ffn_linear_weight = 'transformer.layers.ffn.weight'

    # mixture of expert layers
    mlp_router_weight = 'transformer.layers.mlp.router.weight'
    mlp_fc_weight_mixture_of_experts = 'transformer.layers.mlp.fc.weight.expert'
    mlp_projection_weight_mixture_of_experts = 'transformer.layers.mlp.proj.weight.expert'

    @staticmethod
    def return_layer_name_and_number(layer_name: str) -> Tuple[str, int]:
        """Helper function to return layer name and number
        Given an input layer e.g decoder.layers.2.self_attention.linear_qkv.weight,
        this function returns decoder.layers.self_attention.linear_qkv.weight and layernumber 2.
        In case no layer number is present, it returns None for the layer number
        Args:
            layer_name (dict): The input layer name

        Returns:
            Tuple[str, int]: The layer name , layer number (layer number could be None)
        """
        # Use regular expression to find the number specifically after 'layers.'
        match = re.search(r'(?<=layers\.)\d+(?=\.)', layer_name)
        if match:
            # Extract the number and remove it from the layer name
            number = match.group(0)
            layer_name_without_number = re.sub(r'\.{}\.'.format(number), '.', layer_name)
            return layer_name_without_number, int(number)
        else:
            # Return the original name if no number is found
            return layer_name, None

    # pylint: disable=line-too-long
    @staticmethod
    def rename_input_layer_names_to_trtllm_layer_names(
        model_state_dict: dict,
        trtllm_conversion_dict: dict,
        state_dict_split_by_layer_numbers: bool = True,
    ) -> dict:
        """Helper function to rename model layer names to TRTLLM Layer names

        We go through each layer (keys) in the model state dict,
        and map it to the equivalent TRTLLMLayer name (megatron/core/export/trtllm/trtllm).
        If we have a layer number associated with layer, we extract it out,
        map the original layer name to equivalent trtllm layer name and add layer number back.
        CPU Conversion will pass in model state dict without layer numbers
        (i.e decoder.layers.mlp.linear_fc1.weight of shape [num_layers, hidden_dim, 4 * hidden_dim]) .
        GPU conversion will pass model state dict with each layer separated
        (i.e decoder.layers.2.mlp.linear_fc1.weight of shape [hidden_dim, 4 * hidden_dim]).

        Args:
            model_state_dict (dict): The original model state dict
            trtllm_conversion_dict (dict): The conversion dictionary mapping input model layer names to trtllm layer names
            state_dict_split_by_layer_numbers (bool, optional): Are the model layers split by layer numbers in state dict. For example : mlp.fc1.weight can be represented like mlp.fc1.weight of shape [num_layers, hidden_dim, ffn_hidden_dim]} or it can be like mlp.fc1.layers.0.weight of shape [hidden_dim, ffn_hidden_dim], then mlp.fc1.layers.1.weight ... for all layers. If you use represenation 2 set this to True. Defaults to True

        Raises:
            ValueError: In case the keys dont match to trtllm keys or if all model layers are not mapped to equivalent trtllm keys

        Returns:
            dict: The model state dict with the key (i.e original model layer name) replaced by trtllm layer names
        """
        for original_model_layer_name in list(model_state_dict.keys()):
            if (
                "_extra_state" in original_model_layer_name
                or "adapter_layer" in original_model_layer_name
            ):
                del model_state_dict[original_model_layer_name]
                continue

            original_layer_name_without_number, layer_number = (
                TRTLLMLayers.return_layer_name_and_number(original_model_layer_name)
            )
            if 'layers' in original_layer_name_without_number and state_dict_split_by_layer_numbers:
                assert (
                    layer_number is not None
                ), f"Layer number is None for {original_model_layer_name} and state_dict_split_by_layer_numbers is set to True. Consider setting it False"

            if original_layer_name_without_number not in trtllm_conversion_dict:
                raise ValueError(
                    f'Unable to rename key {original_layer_name_without_number}. Provide an appropriate mapping in the trtllm_conversion_dict when you initialize TRTLLMHelper'
                )

            trtllm_layer = trtllm_conversion_dict[original_layer_name_without_number]
            assert isinstance(
                trtllm_layer, TRTLLMLayers
            ), f"{trtllm_layer} is not supported for conversion. Please use one of the TRTLLMLayerNames we provided in megatron/core/export/trtllm/trtllm_layer_names"

            value = model_state_dict.pop(original_model_layer_name)

            if layer_number is not None:
                trtllm_layer_name_with_number = re.sub(
                    r'(?<=layers\.)', f'{layer_number}.', trtllm_layer.value
                )
                model_state_dict[trtllm_layer_name_with_number] = value
            else:
                model_state_dict[trtllm_layer.value] = value

        return model_state_dict


# These layers are not associated within the transformer block.
# So they dont have a layer number (i.e independant of number of layers in the model)
NON_TRANSFORMER_LAYERS_NAMES = [
    TRTLLMLayers.vocab_embedding.value,
    TRTLLMLayers.position_embedding.value,
    TRTLLMLayers.lm_head.value,
    TRTLLMLayers.final_layernorm_weight.value,
    TRTLLMLayers.final_layernorm_bias.value,
]


def get_layer_name_without_prefix(layer: TRTLLMLayers) -> str:
    """Get TRTLayer name without prefix

    Given a layer e.g TRTLLMLayers.attention_qkv_weight it returns 'attention.qkv.weight'

    Args:
        layer (TRTLLMLayers): The TRTLLMLayer

    Returns:
        str: The TRTLLMLayers suffix (i.e Removing transformer.layers. fromt he layer name)
    """
    layer_name_without_prefix = layer.value.replace("transformer.layers.", "")
    return layer_name_without_prefix


================================================
FILE: megatron/core/export/trtllm/trtllm_weights_converter/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/export/trtllm/trtllm_weights_converter/distributed_trtllm_model_weights_converter.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from typing import Optional

import torch

from megatron.core import parallel_state
from megatron.core.export.data_type import DataType
from megatron.core.export.trtllm.trtllm_layers import NON_TRANSFORMER_LAYERS_NAMES, TRTLLMLayers
from megatron.core.export.trtllm.trtllm_layers import get_layer_name_without_prefix as suffix
from megatron.core.export.trtllm.trtllm_weights_converter.utils import is_gated_activation
from megatron.core.tensor_parallel.utils import VocabUtility
from megatron.core.transformer.transformer_config import TransformerConfig

try:
    from tqdm import tqdm

    HAVE_TQDM = True
except ImportError:
    HAVE_TQDM = False


def str_dtype_to_torch(dtype: DataType):
    """Get torch datatype from input datatype"""
    from tensorrt_llm._utils import str_dtype_to_torch

    return str_dtype_to_torch(dtype.name)


# pylint: disable=line-too-long
class DistributedTRTLLMModelWeightsConverter:
    """The TRTLLM Converter class used for GPU (on device) conversion

    This class is used to convert models sharded and on gpus. (It assumes that the model is already sharded appropriate to how you want to export it). (i.e) If you want to export to tp2pp2, then load the model in tp2pp2 setting and pass in their respective state dictionaries
    """

    def __init__(
        self,
        transformer_config: TransformerConfig,
        dtype: DataType,
        multi_query_mode: bool = False,
        activation: str = "gelu",
        scales: Optional[dict] = None,
    ):
        """Constructor for the TRTLLMModelWeightsConverterGPU class

        This class is responsible to convert the model weights to TRTLLM equivalent weights.

        Args:
            transformer_config (TransformerConfig): The transformer config
            dtype (DataType): The data type or model precision
            multi_query_mode (bool, optional): Defaults to False.
            activation (str, optional): Defaults to "gelu".
            scales (dict, optional): Dictionary with fp8 scaling factors.
        """
        if scales is None:
            scales = {}
        self.transformer_config = transformer_config
        self.trtllm_model_weights = {}
        self.storage_type = str_dtype_to_torch(dtype)
        self.activation = activation
        self.scales = scales
        num_kv_heads = self.transformer_config.num_query_groups
        if num_kv_heads == 0:
            if multi_query_mode:
                num_kv_heads = 1
            else:
                num_kv_heads = self.transformer_config.num_attention_heads
        self.num_kv_heads = num_kv_heads

        self.inference_pp_size = parallel_state.get_pipeline_model_parallel_world_size()
        self.inference_tp_size = parallel_state.get_tensor_model_parallel_world_size()
        self.tp_rank = parallel_state.get_tensor_model_parallel_rank()
        self.pp_rank = parallel_state.get_pipeline_model_parallel_rank()
        self.tp_group = parallel_state.get_tensor_model_parallel_group()
        vp_size = self.transformer_config.virtual_pipeline_model_parallel_size

        assert (
            vp_size is None or vp_size == 1
        ), "Virtual parallelism is not supported in GPU Converter. Gather the VP chunks and use PP config."

    def _add_to_trtllm_model_weights(self, val: torch.Tensor, layer_name: str):
        assert torch.is_tensor(val), f"Expected a tensor for {layer_name} but got {type(val)}"
        scale_key = ".".join(layer_name.split(".")[:-1]) + ".weights_scaling_factor"
        storage = self.storage_type
        if scale_key in self.scales and layer_name.endswith("weight"):
            storage = torch.float8_e4m3fn
            val = val * self.scales[scale_key]["weight_multiplier"].to(val.device)

        val = val.to(storage)
        val = val.detach().contiguous()
        if val.ndim >= 2:
            val = torch.transpose(val.reshape(val.shape[0], -1), 0, 1)
        if layer_name not in self.trtllm_model_weights:
            self.trtllm_model_weights[layer_name] = torch.empty(
                val.size(), dtype=val.dtype, layout=val.layout, device="cpu", pin_memory=True
            )
        self.trtllm_model_weights[layer_name].copy_(val, non_blocking=True)

    def _convert_transformer_layer(self, layer_name: str, val: torch.Tensor):
        """Convert Transformer layers to TRTLLM weights

        Transformer layers referes to layers within the transformber block. They have a layer number associated with them. Depending on the layer we either directly save it to trtllm_model_weights, or split it across some dimension and save the splits

        Args:
            model_state_dict (dict): The input model state dictionary (All collected on CPU)
            layer (TRTLLMLayerNames): The TRTLLM Layer that we want to change
        """
        if val.ndim == 2:
            val = val.T

        if (
            layer_name.endswith(suffix(TRTLLMLayers.input_layernorm_weight))
            or layer_name.endswith(suffix(TRTLLMLayers.input_layernorm_bias))
            or layer_name.endswith(suffix(TRTLLMLayers.post_layernorm_weight))
            or layer_name.endswith(suffix(TRTLLMLayers.post_layernorm_bias))
            or layer_name.endswith(suffix(TRTLLMLayers.attention_dense_bias))
            or layer_name.endswith(suffix(TRTLLMLayers.mlp_projection_bias))
            or layer_name.endswith(suffix(TRTLLMLayers.mlp_router_weight))
            or layer_name.endswith(suffix(TRTLLMLayers.ffn_projection_weight))
            or layer_name.endswith(suffix(TRTLLMLayers.attention_dense_weight))
            or layer_name.endswith(suffix(TRTLLMLayers.mlp_projection_weight))
        ):
            # Same as layernorm1p in NeMo
            if (
                self.transformer_config.layernorm_zero_centered_gamma
                and self.transformer_config.normalization == "LayerNorm"
                and "layernorm.weight" in layer_name
            ):
                val = val + 1.0

            self._add_to_trtllm_model_weights(val=val, layer_name=layer_name)

        elif (
            layer_name.endswith(suffix(TRTLLMLayers.mlp_fc_weight))
            or layer_name.endswith(suffix(TRTLLMLayers.mlp_fc_bias))
            or layer_name.endswith(suffix(TRTLLMLayers.ffn_fc_weight))
        ):
            split_gated_activation = is_gated_activation(self)
            if split_gated_activation:
                vals, gates = [[n] for n in torch.chunk(val, 2, axis=-1)]
                gate_layer_name = layer_name.replace("fc", "gate")
                self._add_to_trtllm_model_weights(val=gates[0], layer_name=gate_layer_name)
                val = vals[0]

            self._add_to_trtllm_model_weights(val=val, layer_name=layer_name)

        elif layer_name.endswith(suffix(TRTLLMLayers.ffn_linear_weight)) or layer_name.endswith(
            suffix(TRTLLMLayers.attention_linear_weight)
        ):
            self._add_to_trtllm_model_weights(val=val, layer_name=layer_name)

        elif layer_name.endswith(suffix(TRTLLMLayers.attention_qkv_bias)):
            qkv_hidden_dim = val.shape[0]
            size_per_head = (
                qkv_hidden_dim
                // (self.transformer_config.num_attention_heads + 2 * self.num_kv_heads)
                * self.inference_tp_size
            )
            q_num = self.transformer_config.num_attention_heads // self.num_kv_heads

            # We first concat all sub weights per tp rank together.
            val = val.reshape(self.num_kv_heads // self.inference_tp_size, q_num + 2, size_per_head)
            qkv = torch.split(val, [q_num, 1, 1], dim=1)
            split_vals = torch.concatenate(
                [qkv[0].reshape(-1), qkv[1].reshape(-1), qkv[2].reshape(-1)], dim=0
            )
            self._add_to_trtllm_model_weights(val=split_vals, layer_name=layer_name)

        # TODO : Should add a atten layer dimension "qkvqkv, qqkkvv etc to see how to reshape here"
        elif layer_name.endswith(suffix(TRTLLMLayers.attention_qkv_weight)):
            hidden_dim = val.shape[0]
            size_per_head = self.transformer_config.kv_channels
            if size_per_head is None:
                size_per_head = hidden_dim // self.transformer_config.num_attention_heads
            q_num = self.transformer_config.num_attention_heads // self.num_kv_heads

            val = val.reshape(
                hidden_dim, self.num_kv_heads // self.inference_tp_size, q_num + 2, size_per_head
            )
            qkv = torch.split(val, [q_num, 1, 1], dim=2)
            split_vals = torch.concatenate(
                [
                    qkv[0].reshape(hidden_dim, -1),
                    qkv[1].reshape(hidden_dim, -1),
                    qkv[2].reshape(hidden_dim, -1),
                ],
                dim=1,
            )
            self._add_to_trtllm_model_weights(val=split_vals, layer_name=layer_name)

        else:
            raise ValueError(f"{layer_name} cannot be handled by GPU converter")

    def _convert_non_transformer_layer(self, model_state_dict: dict, layer_name: str):
        """Convert Non Transformer layers to TRTLLM weights

        Non transformer layers referes to layers that occur only once in the model (e.g Embedding , final output layer etc. ) They dont have any layer number associated with them. We remove this layer from the original state dict and cast it to storage type and convert to numpy and add it to trtllm_model_weights

        Args:
            model_state_dict (dict): The input model state dictionary (All collected on CPU)
            layer (TRTLLMLayerNames): The TRTLLM Layer that we want to change
        """
        if layer_name in model_state_dict:
            val = model_state_dict.pop(layer_name)
            self._add_to_trtllm_model_weights(val=val, layer_name=layer_name)

    # ----------------Convert Embeddings----------------
    def _get_remove_vocab_padding(self, layer_name, model_state_dict, tokenizer_vocab_size):
        val = model_state_dict.get(layer_name, None)
        if val is None:
            return None

        if self.inference_tp_size > 1:  # Gather padded tensor chunks
            vocab_size_padded = val.shape[0] * self.inference_tp_size
            vocab_start_index, vocab_end_index = VocabUtility.vocab_range_from_global_vocab_size(
                vocab_size_padded, self.tp_rank, self.inference_tp_size
            )
            dim_size = list(val.size())
            dim_size[0] = vocab_size_padded
            gathered_val = torch.zeros(
                dim_size, dtype=val.dtype, device=torch.cuda.current_device()
            )
            gathered_val[vocab_start_index:vocab_end_index] = val
            torch.distributed.all_reduce(gathered_val, group=self.tp_group)
            val = gathered_val
        unpadded = val[:tokenizer_vocab_size]
        if self.inference_tp_size > 1:  # Split gathered val for val parallel embedding
            vocab_start_index, vocab_end_index = VocabUtility.vocab_range_from_global_vocab_size(
                tokenizer_vocab_size, self.tp_rank, self.inference_tp_size
            )
            unpadded = unpadded[vocab_start_index:vocab_end_index]
        return unpadded.T  # TRTLLM expects (vocab_size, hidden_size) so need extra transpose

    @torch.no_grad()
    def convert(
        self, model_state_dict: dict, trtllm_conversion_dict: dict, tokenizer_vocab_size: int
    ):
        """Convert model weights to trtllm model weights

        This method goes through each layer in the model state dict and converts to equivalent trtllm model weights. It also handles splitting across TP dimension , expert split etc.

        Args:
            model_state_dict (dict): The full model state dict (all on CPU)
            trtllm_conversion_dict (dict): The conversion dictionary used to convert model layer names to trtllm layer names
            tokenizer_vocab_size (int): The vocab size of the tokenizer
        """

        # First step is to convert input model layer names to equivalent trtllm layer names
        model_state_dict = TRTLLMLayers.rename_input_layer_names_to_trtllm_layer_names(
            model_state_dict=model_state_dict, trtllm_conversion_dict=trtllm_conversion_dict
        )

        # Convert the non transformer layers
        for layer_name in NON_TRANSFORMER_LAYERS_NAMES:
            if layer_name not in model_state_dict:
                continue
            if (
                layer_name in TRTLLMLayers.vocab_embedding.value
                or layer_name in TRTLLMLayers.lm_head.value
            ):
                # For embedding layers alone we do some pre processing
                embed_val = self._get_remove_vocab_padding(
                    layer_name, model_state_dict, tokenizer_vocab_size
                )
                model_state_dict[layer_name] = embed_val
            # TODO : Check if this handling of position embedding is right.
            if layer_name == TRTLLMLayers.position_embedding.value:
                position_embedding = model_state_dict[layer_name]
                req_position_embedding = position_embedding.chunk(self.inference_tp_size)[
                    self.tp_rank
                ]
                model_state_dict[layer_name] = req_position_embedding.T
            if layer_name == TRTLLMLayers.final_layernorm_weight.value:
                # Same as layernorm1p in NeMo
                if (
                    self.transformer_config.layernorm_zero_centered_gamma
                    and self.transformer_config.normalization == "LayerNorm"
                ):
                    model_state_dict[layer_name] = model_state_dict[layer_name] + 1.0
            self._convert_non_transformer_layer(
                model_state_dict=model_state_dict, layer_name=layer_name
            )

        if not HAVE_TQDM:
            raise ImportError(
                "tqdm is required for DistributedTRTLLMModelWeightsConverter, please install it with `pip install tqdm`"
            )

        for layer_name, value in tqdm(
            model_state_dict.items(), desc="Converting to TRTLLM Weights"
        ):
            self._convert_transformer_layer(layer_name, value)


================================================
FILE: megatron/core/export/trtllm/trtllm_weights_converter/single_device_trtllm_model_weights_converter.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import re
from typing import Optional

import torch

from megatron.core.export.data_type import DataType
from megatron.core.export.export_config import ExportConfig
from megatron.core.export.trtllm.trtllm_layers import NON_TRANSFORMER_LAYERS_NAMES, TRTLLMLayers
from megatron.core.export.trtllm.trtllm_layers import get_layer_name_without_prefix as suffix
from megatron.core.export.trtllm.trtllm_weights_converter.utils import is_gated_activation
from megatron.core.transformer.transformer_config import TransformerConfig

try:
    from tqdm import tqdm

    HAVE_TQDM = True
except ImportError:
    HAVE_TQDM = False


# pylint: disable=line-too-long
# TODO: Writing TRT imports this way so that it can be mocked in the test_trtllm_cpu_converter.py unit test
# TODO: Figure out how to patch it directly from the trtllm library
def pad_vocab_size(vocab_size: int, tp_size: int):
    """Pad vocab size based on inference size"""
    from tensorrt_llm._utils import pad_vocab_size

    return pad_vocab_size(vocab_size, tp_size)


def str_dtype_to_torch(dtype: DataType):
    """Get torch datatype from input datatype"""
    from tensorrt_llm._utils import str_dtype_to_torch

    return str_dtype_to_torch(dtype.name)


class SingleDeviceTRTLLMModelWeightsConverter:
    """Class to convert Model weights to TRTLLM weights on CPU"""

    def __init__(
        self,
        export_config: ExportConfig,
        transformer_config: TransformerConfig,
        dtype: DataType,
        multi_query_mode: bool = False,
        activation: str = "gelu",
        scales: Optional[dict] = None,
    ):
        """Constructor for the TRTLLMModelWeightsConverterCPU class

        This class is responsible to convert the model weights to TRTLLM equivalent weights and also split them for each GPU rank and return as a list.

        Args:
            export_config (ExportConfig): The export config with inference tp size, pp size etc.
            transformer_config (TransformerConfig): The transformer config
            dtype (DataType): The data type or model precision
            multi_query_mode (bool, optional): Defaults to False.
            activation (str, optional): Defaults to "gelu".
            scales (dict, optional): Dictionary with fp8 scaling factors.
        """
        if scales is None:
            scales = {}

        self.export_config = export_config
        self.transformer_config = transformer_config
        self.trtllm_model_weights = {}
        self.storage_type = str_dtype_to_torch(dtype)
        self.activation = activation
        self.scales = scales
        num_kv_heads = self.transformer_config.num_query_groups
        if num_kv_heads == 0:
            if multi_query_mode:
                num_kv_heads = 1
            else:
                num_kv_heads = self.transformer_config.num_attention_heads
        self.num_kv_heads = num_kv_heads

    def _convert_non_transformer_layer(self, model_state_dict: dict, layer_name: str):
        """Convert Non Transformer layers to TRTLLM weights

        Non transformer layers referes to layers that occur only once in the model (e.g Embedding , final output layer etc. ) They dont have any layer number associated with them. We remove this layer from the original state dict and cast it to storage type and convert to numpy and add it to trtllm_model_weights

        Args:
            model_state_dict (dict): The input model state dictionary (All collected on CPU)
            layer_name (str): The TRTLLM Layer name that we want to convert
        """
        if layer_name in model_state_dict:
            val = model_state_dict.pop(layer_name)
            val = val.to(self.storage_type).detach().contiguous()
            self.trtllm_model_weights[layer_name] = val

    def _cast_value(self, val: torch.Tensor, layer_name: str) -> torch.Tensor:
        """Casts weights to the expected datatype.
            When appropriate scaling factor is found inside self.scales, the weight gets scaled before the cast.

        Args:
            val (torch.Tensor): Model weight
            layer_name (str): Layer name, used for determining the scaling factor dictionary key
        Returns:
            torch.Tensor: The casted weight
        """
        storage = self.storage_type

        scale_key = ".".join(layer_name.split(".")[:-1]) + ".weights_scaling_factor"
        if scale_key in self.scales and layer_name.endswith("weight"):
            storage = torch.float8_e4m3fn
            val = val * self.scales[scale_key]["weight_multiplier"].to(val.device)

        return val.to(storage)

    def _convert_transformer_layer(self, layer_name: str, val: torch.Tensor):
        """Convert Transformer layers to TRTLLM weights

        Transformer layers referes to layers within the transformber block. They have a layer number associated with them. Depending on the layer we either directly save it to trtllm_model_weights, or split it across some dimension and save the splits

        Args:
            model_state_dict (dict): The input model state dictionary (All collected on CPU)
            layer (TRTLLMLayerNames): The TRTLLM Layer that we want to change
        """

        def _add_to_trtllm_model_weights(val: torch.Tensor, layer_name: str, split_type=None):
            """Add the input weight to trtllm_model_weights

            Depending on split (Expert split/Tensor split/None) we split the input data and add accordingly

            Args:
                val (torch.Tensor): The model weight to be added
                layer_name (str): The TRTLLMlayername as a string
                split_type (str, optional): The split type. Defaults to None.
            """
            if split_type == "expert_split":
                for split_num, split_val in enumerate(val):
                    self.trtllm_model_weights[f"{layer_name}.{split_num}.bin"] = (
                        self._cast_value(split_val, layer_name).detach().contiguous()
                    )
            elif split_type == "tensor_split":
                for split_num, split_val in enumerate(val):
                    if split_val.ndim >= 2:
                        split_val = torch.transpose(split_val.reshape(split_val.shape[0], -1), 1, 0)

                    self.trtllm_model_weights[f"{layer_name}.{split_num}.bin"] = (
                        self._cast_value(split_val, layer_name).detach().contiguous()
                    )
            else:
                if val.ndim >= 2:
                    val = torch.transpose(val.reshape(val.shape[0], -1), 1, 0)

                self.trtllm_model_weights[layer_name] = (
                    self._cast_value(val, layer_name).detach().contiguous()
                )

        def _duplicate_kv_head(val: torch.Tensor, rep: int, dim: int):
            """Duplicates a kv tensor along specified dimension.
            [hidden_dim, num_kv_heads, 1, size_per_head] -> [hidden_dim, num_kv_heads, rep, size_per_head]
            or [num_kv_heads, 1, size_per_head] -> [num_kv_heads, rep, size_per_head]
            """
            shapes = list(val.shape)
            shapes[dim] = rep

            return val.expand(*shapes)

        if val.ndim == 2:
            val = val.T

        if (
            layer_name.endswith(suffix(TRTLLMLayers.input_layernorm_weight))
            or layer_name.endswith(suffix(TRTLLMLayers.input_layernorm_bias))
            or layer_name.endswith(suffix(TRTLLMLayers.post_layernorm_weight))
            or layer_name.endswith(suffix(TRTLLMLayers.post_layernorm_bias))
            or layer_name.endswith(suffix(TRTLLMLayers.attention_dense_bias))
            or layer_name.endswith(suffix(TRTLLMLayers.attention_dense_bias))
            or layer_name.endswith(suffix(TRTLLMLayers.mlp_projection_bias))
            or layer_name.endswith(suffix(TRTLLMLayers.mlp_router_weight))
        ):
            # Same as layernorm1p in NeMo
            if (
                self.transformer_config.layernorm_zero_centered_gamma
                and self.transformer_config.normalization == "LayerNorm"
                and "layernorm.weight" in layer_name
            ):
                val = val + 1.0

            _add_to_trtllm_model_weights(val=val, layer_name=layer_name, split_type=None)

        elif (
            layer_name.endswith(suffix(TRTLLMLayers.attention_dense_weight))
            or layer_name.endswith(suffix(TRTLLMLayers.mlp_projection_weight))
            or layer_name.endswith(suffix(TRTLLMLayers.ffn_projection_weight))
        ):
            split_vals = torch.chunk(val, self.export_config.inference_tp_size, axis=0)
            _add_to_trtllm_model_weights(
                val=split_vals, layer_name=layer_name, split_type="tensor_split"
            )

        elif (
            layer_name.endswith(suffix(TRTLLMLayers.mlp_fc_weight))
            or layer_name.endswith(suffix(TRTLLMLayers.mlp_fc_bias))
            or layer_name.endswith(suffix(TRTLLMLayers.ffn_fc_weight))
        ):
            split_gated_activation = is_gated_activation(self)
            if split_gated_activation:
                val, gate = torch.chunk(val, 2, axis=-1)
                gate_layer_name = layer_name.replace("fc", "gate")
                split_vals = torch.chunk(gate, self.export_config.inference_tp_size, axis=-1)
                _add_to_trtllm_model_weights(
                    val=split_vals, layer_name=gate_layer_name, split_type="tensor_split"
                )

            split_vals = torch.chunk(val, self.export_config.inference_tp_size, axis=-1)
            _add_to_trtllm_model_weights(
                val=split_vals, layer_name=layer_name, split_type="tensor_split"
            )

        elif layer_name.endswith(suffix(TRTLLMLayers.ffn_linear_weight)) or layer_name.endswith(
            suffix(TRTLLMLayers.attention_linear_weight)
        ):
            split_vals = torch.chunk(val, self.export_config.inference_tp_size, axis=-1)
            _add_to_trtllm_model_weights(
                val=split_vals, layer_name=layer_name, split_type="tensor_split"
            )

        elif layer_name.endswith(suffix(TRTLLMLayers.attention_qkv_bias)):
            qkv_hidden_dim = val.shape[0]
            size_per_head = qkv_hidden_dim // (
                self.transformer_config.num_attention_heads + 2 * self.num_kv_heads
            )
            q_num = self.transformer_config.num_attention_heads // self.num_kv_heads

            # We first concat all sub weights per tp rank together.
            val = val.reshape(self.num_kv_heads, q_num + 2, size_per_head)

            q_bias, k_bias, v_bias = torch.split(val, [q_num, 1, 1], dim=1)

            if self.num_kv_heads < self.export_config.inference_tp_size:
                rep = self.export_config.inference_tp_size // self.num_kv_heads
                k_bias = _duplicate_kv_head(k_bias, rep, dim=1)
                v_bias = _duplicate_kv_head(v_bias, rep, dim=1)

            # Reshape before splitting for num_kv_heads < tp_size
            q_split = torch.chunk(q_bias.reshape(-1), self.export_config.inference_tp_size, axis=0)
            k_split = torch.chunk(k_bias.reshape(-1), self.export_config.inference_tp_size, axis=0)
            v_split = torch.chunk(v_bias.reshape(-1), self.export_config.inference_tp_size, axis=0)

            # Concatenate Q, K, and V together
            split_vals = [
                torch.concatenate([q_split[i], k_split[i], v_split[i]], dim=0)
                for i in range(self.export_config.inference_tp_size)
            ]
            _add_to_trtllm_model_weights(
                val=split_vals, layer_name=layer_name, split_type="tensor_split"
            )

        # TODO : Should add a atten layer dimension "qkvqkv, qqkkvv etc to see how to reshape here"
        elif layer_name.endswith(suffix(TRTLLMLayers.attention_qkv_weight)):
            hidden_dim = val.shape[0]
            size_per_head = self.transformer_config.kv_channels
            if size_per_head is None:
                size_per_head = hidden_dim // self.transformer_config.num_attention_heads
            q_num = self.transformer_config.num_attention_heads // self.num_kv_heads

            # When the merge factor exceeds 1, the 'vals' list will have multiple entries.
            # Depending on the format, 'vals' can look like either [QQQQ..KV, QQQQ..KV, ...](for GQA) or [QKV, QKV, ...](for MHA).
            # We first concat all sub weights per tp rank together.
            val = val.reshape(hidden_dim, self.num_kv_heads, q_num + 2, size_per_head)

            # Split the QKV to separate variables.
            q_weight, k_weight, v_weight = torch.split(val, [q_num, 1, 1], dim=2)

            if self.num_kv_heads < self.export_config.inference_tp_size:
                if self.export_config.inference_tp_size % self.num_kv_heads != 0:
                    raise Exception(
                        "Number of query groups of the models is {0}. Please select tensor parallelism size "
                        "that can duplicate or split the number of query groups to equal number of query matrices in the "
                        "each GPU.".format(self.num_kv_heads)
                    )
                rep = self.export_config.inference_tp_size // self.num_kv_heads
                k_weight = _duplicate_kv_head(k_weight, rep, dim=2)
                v_weight = _duplicate_kv_head(v_weight, rep, dim=2)
            elif (self.num_kv_heads % self.export_config.inference_tp_size) != 0:
                raise Exception(
                    "Number of query groups of the models is {0}. Please select tensor parallelism size "
                    "that can duplicate or split the number of query groups to equal number of query matrices in the "
                    "each GPU.".format(self.num_kv_heads)
                )

            # Reshape before splitting for num_kv_heads < tp_size
            q_split = torch.chunk(
                q_weight.reshape(hidden_dim, -1), self.export_config.inference_tp_size, axis=1
            )
            k_split = torch.chunk(
                k_weight.reshape(hidden_dim, -1), self.export_config.inference_tp_size, axis=1
            )
            v_split = torch.chunk(
                v_weight.reshape(hidden_dim, -1), self.export_config.inference_tp_size, axis=1
            )

            # Concatenate Q, K, and V together
            split_vals = [
                torch.concatenate([q_split[i], k_split[i], v_split[i]], dim=1)
                for i in range(self.export_config.inference_tp_size)
            ]
            _add_to_trtllm_model_weights(
                val=split_vals, layer_name=layer_name, split_type="tensor_split"
            )

        elif layer_name.endswith(suffix(TRTLLMLayers.mlp_fc_weight_mixture_of_experts)):
            w1, w3 = torch.chunk(val, 2, axis=1)
            # w1 splits
            split_w1s = torch.chunk(w1, self.export_config.inference_tp_size, axis=1)
            # w3 splits
            split_w3s = torch.chunk(w3, self.export_config.inference_tp_size, axis=1)

            split_vals = [torch.concatenate(item, dim=1) for item in zip(split_w3s, split_w1s)]
            layer_name = layer_name.replace(".expert", "")  # Remove suffix .expert from key
            _add_to_trtllm_model_weights(
                val=split_vals, layer_name=layer_name, split_type="expert_split"
            )

        elif layer_name.endswith(suffix(TRTLLMLayers.mlp_projection_weight_mixture_of_experts)):
            split_vals = torch.chunk(val, self.export_config.inference_tp_size, axis=-1)
            layer_name = layer_name.replace(".expert", "")  # Remove suffix .expert from key
            _add_to_trtllm_model_weights(
                val=split_vals, layer_name=layer_name, split_type="expert_split"
            )
        else:
            raise ValueError(f"{layer_name} cannot be handled by converter")

    @torch.no_grad()
    def convert(
        self, model_state_dict: dict, trtllm_conversion_dict, state_dict_split_by_layer_numbers=True
    ):
        """Convert model weights to trtllm model weights

        This method goes through each layer in the model state dict and converts to equivalent trtllm model weights. It also handles splitting across TP dimension , expert split etc.

        Args:
            model_state_dict (dict): The full model state dict (all on CPU)
            trtllm_conversion_dict (dict): The conversion dictionary used to convert model layer names to trtllm layer names
            state_dict_split_by_layer_numbers (bool, optional): Are the model layers split by layer numbers in state dict. For example : mlp.fc1.weight can be represented like mlp.fc1.weight of shape [num_layers, hidden_dim, ffn_hidden_dim]} or it can be like mlp.fc1.layers.0.weight of shape [hidden_dim, ffn_hidden_dim], then mlp.fc1.layers.1.weight ... for all layers. If you use represenation 2 set this to True. Defaults to True
        """

        # First step is to convert input model layer names to equivalent trtllm layer names
        model_state_dict = TRTLLMLayers.rename_input_layer_names_to_trtllm_layer_names(
            model_state_dict=model_state_dict,
            trtllm_conversion_dict=trtllm_conversion_dict,
            state_dict_split_by_layer_numbers=state_dict_split_by_layer_numbers,
        )

        # Convert the non transformer layers
        for layer_name in NON_TRANSFORMER_LAYERS_NAMES:
            # For vocab embedding layer alone we pad the weights to be divisible by inference tp size
            if (
                layer_name == TRTLLMLayers.vocab_embedding.value
                and self.export_config.use_parallel_embedding
            ):
                val = model_state_dict[TRTLLMLayers.vocab_embedding.value]
                vocab_size = val.shape[0]
                if vocab_size % self.export_config.inference_tp_size != 0:
                    vocab_size_padded = pad_vocab_size(
                        vocab_size, self.export_config.inference_tp_size
                    )
                    pad_width = vocab_size_padded - vocab_size
                    val = torch.nn.functional.pad(val, (0, 0, 0, pad_width), value=0)
                    model_state_dict[layer_name] = val
            if layer_name == TRTLLMLayers.final_layernorm_weight.value:
                # Same as layernorm1p in NeMo
                if (
                    self.transformer_config.layernorm_zero_centered_gamma
                    and self.transformer_config.normalization == "LayerNorm"
                ):
                    model_state_dict[layer_name] = model_state_dict[layer_name] + 1.0

            self._convert_non_transformer_layer(
                model_state_dict=model_state_dict, layer_name=layer_name
            )

        transformer_layers_dict = {}
        # Convert the transformer layers
        if state_dict_split_by_layer_numbers:
            # Already model dict is split by layer numbers
            transformer_layers_dict = model_state_dict
        else:
            # Here we split the model state dict into individual layers
            for layer_name in list(model_state_dict.keys()):
                value = model_state_dict.pop(layer_name)
                for layer_number in range(self.transformer_config.num_layers):
                    # e.g transformer.layers.mlp.fc.bias => transformer.layers.2.mlp.fc.bias
                    layer_name_with_layer_number = re.sub(
                        r"(?<=layers\.)", f"{layer_number}.", layer_name
                    )
                    transformer_layers_dict[layer_name_with_layer_number] = value[layer_number]
        if not HAVE_TQDM:
            raise ImportError(
                "tqdm is required for SingleDeviceTRTLLMModelWeightsConverter, please install it with `pip install tqdm`"
            )

        for layer_name, value in tqdm(
            transformer_layers_dict.items(), desc="Converting to TRTLLM Weights"
        ):
            self._convert_transformer_layer(layer_name, value)

    def get_padded_vocab_size(self) -> int:
        """Return the paded vocab size

        We extract the lm head and vocab embedding and use that to determine padded_vocab_size

        Returns:
            int: Padded vocab size
        """
        lm_head_weight = self.trtllm_model_weights.get(TRTLLMLayers.lm_head.value, None)
        vocab_size = self.trtllm_model_weights[TRTLLMLayers.vocab_embedding.value].shape[0]
        vocab_size_padded = (
            vocab_size
            if lm_head_weight is None
            else pad_vocab_size(vocab_size, self.export_config.inference_tp_size)
        )
        return vocab_size_padded

    def get_local_model_weights_per_gpu(self, mapping, trtllm_model_config: dict):
        """Get the trtllm model weights split per gpu

        Given the trtllm mapping information (tp, pp rank etc) we split the model weights in a list, with each element of the list corresponding to the weights of each gpu rank

        Args:
            mapping : The trtllm mapping information
            trtllm_model_config (dict): The trtllm model config
        """

        def _split(torch_tensor, tp_size, idx, dim=0):
            """Splits the np tensor v on dim and return the idx's slice."""
            if tp_size == 1:
                return torch_tensor
            if len(torch_tensor.shape) == 1:
                return torch.chunk(torch_tensor, tp_size)[idx].contiguous()
            else:
                return torch.chunk(torch_tensor, tp_size, axis=dim)[idx].contiguous()

        pp_layer_range = mapping.pp_layers(self.transformer_config.num_layers)

        trtllm_model_weights_per_gpu = {}
        for layer_name, value in self.trtllm_model_weights.items():
            if layer_name in NON_TRANSFORMER_LAYERS_NAMES:
                continue

            # Happens in the case of TP split or expert split
            if layer_name.endswith(".bin"):
                if layer_name.endswith(f"{mapping.tp_rank}.bin"):
                    layer_name = layer_name.replace(f".{mapping.tp_rank}.bin", "")
                else:
                    continue

            layer_num = int(layer_name.split(".")[2])
            if layer_num in pp_layer_range:
                layer_name = layer_name.replace(
                    f"layers.{layer_num}", f"layers.{layer_num - pp_layer_range[0]}"
                )
            else:
                continue
            if (
                hasattr(trtllm_model_config, "new_decoder_architecture")
                and trtllm_model_config.new_decoder_architecture
                and "post_layernorm" in layer_name
            ):
                layer_name = layer_name.replace("post_layernorm", "mlp_layernorm")

            trtllm_model_weights_per_gpu[layer_name] = value

        if mapping.is_first_pp_rank():
            embedding_weight = (
                _split(
                    self.trtllm_model_weights[TRTLLMLayers.vocab_embedding.value],
                    mapping.tp_size,
                    mapping.tp_rank,
                )
                if self.export_config.use_parallel_embedding
                else self.trtllm_model_weights[TRTLLMLayers.vocab_embedding.value]
            )

            trtllm_model_weights_per_gpu[TRTLLMLayers.vocab_embedding.value] = embedding_weight

            pos_embedding_weight = self.trtllm_model_weights.get(
                TRTLLMLayers.position_embedding.value
            )
            if pos_embedding_weight is not None:
                if self.export_config.use_parallel_embedding:
                    pos_embedding_weight = _split(
                        pos_embedding_weight, mapping.tp_size, mapping.tp_rank
                    )

                trtllm_model_weights_per_gpu[TRTLLMLayers.position_embedding.value] = (
                    pos_embedding_weight
                )

        if mapping.is_last_pp_rank():
            lm_head_weight = self.trtllm_model_weights.get(TRTLLMLayers.lm_head.value, None)
            if lm_head_weight is not None:
                trtllm_model_weights_per_gpu[TRTLLMLayers.lm_head.value] = _split(
                    lm_head_weight, mapping.tp_size, mapping.tp_rank
                )

            trtllm_model_weights_per_gpu[TRTLLMLayers.final_layernorm_weight.value] = (
                self.trtllm_model_weights[TRTLLMLayers.final_layernorm_weight.value]
            )

            ln_f_bias = self.trtllm_model_weights.get(TRTLLMLayers.final_layernorm_bias.value)
            if ln_f_bias is not None:
                trtllm_model_weights_per_gpu[TRTLLMLayers.final_layernorm_bias.value] = ln_f_bias

        return trtllm_model_weights_per_gpu


================================================
FILE: megatron/core/export/trtllm/trtllm_weights_converter/utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

GATED_ACTIVATION = ["swiglu", "geglu", "fast-swiglu", "fast-geglu"]


def is_gated_activation(helper):
    """Check whether the model is gated activation"""
    return helper.activation in GATED_ACTIVATION or helper.transformer_config.gated_linear_unit


================================================
FILE: megatron/core/extensions/TransformerEngineMixedPrecision.md
================================================
# About

Megatron training exposes the argument "--te-precision-config-file"
to allow experimentation with fine-grained control over the precision
of modules within a megatron network.

## Design Goals

The design aims to support configuration of the precision of linear
and grouped linear modules via the selection of a transformer engine
quantization recipe.

The fp8_autocast abstraction is already used to enable and disable a
single quantization recipe when evaluating the forward pass of a network.
This same mechanism is extended to execute targeted layers with the
desired quantization recipe, permitting mixed precision recipes.

The configurations function by optionally overriding the precision a module
would execute in. Not every module must have a configured override. Modules
are checked by module name against a sequence of patterns to determine if
an override recipe is applicable. By default, if the non-overridden precision
of a layer is non-quantized, as the primary desired use case is to customize
modules that are already quantized, and it is useful to respect other arguments
like `--first-last-layers-bf16`.

## Limitations

Relying on the module name to match against a configuration means the match is
executed post-initialization, and initialization customization for a recipe
override such as `fp4-param` and `fp8-param` are not in scope.

The validation precision configurations rely on self.training. They have not
yet been verified compatible with cuda-graphs and/or activation recompute.

There are some decisions in megatron that are made using the TransformerConfig's
settings for fp4 and fp8, possibly including layer number rather than using the
quantization autocast context. The configured overrides do not inform these
decisions with the current implementation.

## Validation precision

It is supported to configure a different precision when evaluating against the
validation set (when module.training is False). When evaluating a quantization
recipe, having a consistent forward pass for evaluation versus a baseline isolates
the quality of learning from the ability to infer with the quantization.

## Recipe configuration

Recipe configurations are named entries in a "configs" dictionary.

These examples show an mxfp8 recipe, a bf16 recipe, an mxfp8 recipe that
evaluates in bf16, and an nvfp4 recipe that evaluates in bf16.
```
configs:
  mxfp8:
    transformer_engine_config_type: "TEQuantizationParams"
    training_recipe:
      fp8_quantization_recipe: "mxfp8"
  bf16:
    transformer_engine_config_type: "TEQuantizationParams"
    training_recipe: {}
  mxfp8_evaluate_bf16:
    transformer_engine_config_type: "TEQuantizationParams"
    training_recipe:
      fp8_quantization_recipe: "mxfp8"
    evaluation_recipe: {}
  nvfp4_evaluate_bf16:
    transformer_engine_config_type: "TEQuantizationParams"
    training_recipe:
      fp4_quantization_recipe: "nvfp4"
    evaluation_recipe: {}
```

Recipes are selected by matchers. Currently implemented are glob style
expressions.

Matchers are ordered, and the first enabled matcher to match against
a module name chooses the config from the configs list.

In this example, assuming a default quantization recipe is enabled,
attention linear modules `linear_qkv` and `linear_proj` are selected
for the "bf16" recipe override and mamba mixer linear layers `out_proj`
and `in_proj` are selected for the "mxfp8" recipe override.

```
matchers:
  attn_qkv_bf16:
    config: "bf16"
    type: "glob"
    pattern: "*.linear_qkv"
    enabled: true
  attn_proj_bf16:
    config: "bf16"
    type: "glob"
    pattern: "*.linear_proj"
    enabled: true
  mamba_outproj_mxfp8:
    config: "mxfp8"
    type: "glob"
    pattern: "*mixer.out_proj"
    enabled: true
  mamba_inproj_mxfp8:
    config: "mxfp8"
    type: "glob"
    pattern: "*mixer.in_proj"
    enabled: true
```

Matches or modules that do not match to a configuration, and execute with their
default precision, will be logged so that quantization configurations can be
observed. Make sure to set `--logging-level` (to 20) in order to emit to logs.


================================================
FILE: megatron/core/extensions/__init__.py
================================================


================================================
FILE: megatron/core/extensions/kitchen.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

HAVE_KITCHEN = False

from unittest.mock import MagicMock

AutogradFunctionImplementation = MagicMock()
KitchenSpecProvider = MagicMock()

QAttentionParamsConfigSchema = MagicMock()
QFlashAttentionParamsConfigSchema = MagicMock()
QLinearParamsConfigSchema = MagicMock()
QLinearParams = MagicMock()
QuantizeRecipe = MagicMock()
QuantizeRecipeAttnBMM = MagicMock()
get_qattention_params_from_predefined = MagicMock()
get_qfa_params_from_recipe_name = MagicMock()
get_qlinear_params_from_predefined = MagicMock()
get_qlinear_params_from_qat_params = MagicMock()

KitchenColumnParallelGroupedLinear = MagicMock()
KitchenColumnParallelLinear = MagicMock()
KitchenDotProductAttention = MagicMock()
KitchenFlashAttention = MagicMock()
KitchenLayerNormColumnParallelLinear = MagicMock()
KitchenRowParallelGroupedLinear = MagicMock()
KitchenRowParallelLinear = MagicMock()

# N.B. Kitchen extension is not released publicly.
# This extension is just a stub.


================================================
FILE: megatron/core/extensions/transformer_engine.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import dataclasses
import enum
import inspect
import io
import os
import pickle
import warnings
from contextlib import nullcontext
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Set, Tuple, cast

import torch
import torch.nn.functional as F
from packaging.version import Version as PkgVersion
from torch import Tensor
from torch.nn.parameter import Parameter
from typing_extensions import override

from megatron.core.dist_checkpointing.mapping import ShardedStateDict
from megatron.core.dist_checkpointing.utils import replace_prefix_for_sharding
from megatron.core.enums import Fp4Recipe, Fp8Recipe
from megatron.core.model_parallel_config import ModelParallelConfig
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.parallel_state import (
    get_amax_reduction_group,
    get_context_parallel_group,
    get_hierarchical_context_parallel_groups,
    get_tensor_model_parallel_group,
    get_tensor_model_parallel_world_size,
    model_parallel_is_initialized,
)
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.quantization.quant_config import QuantizationConfig
from megatron.core.tensor_parallel.layers import (
    _initialize_affine_weight_cpu,
    set_tensor_model_parallel_attributes,
)
from megatron.core.tensor_parallel.random import (
    get_cuda_rng_tracker,
    get_data_parallel_rng_tracker_name,
    get_expert_parallel_rng_tracker_name,
)
from megatron.core.tensor_parallel.utils import divide
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.mlp import MLP
from megatron.core.transformer.torch_norm import LayerNormInterface
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.utils import (
    ensure_metadata_has_dp_cp_group,
    is_layer_window_attention,
    make_sharded_tensors_for_checkpoint,
)
from megatron.core.typed_torch import copy_signature
from megatron.core.utils import (
    get_pg_rank,
    get_pg_size,
    get_te_version,
    get_tensor_model_parallel_group_if_none,
    is_te_min_version,
    is_torch_min_version,
)

try:
    import transformer_engine as te
    from transformer_engine.pytorch.fp8 import FP8GlobalStateManager, fp8_autocast

    HAVE_TE = True
except ImportError:
    if TYPE_CHECKING:
        # For type checking, treat transformer_engine as always available.
        import transformer_engine as te
        from transformer_engine.pytorch.fp8 import FP8GlobalStateManager, fp8_autocast

        HAVE_TE = True
    else:
        from unittest.mock import MagicMock

        te = MagicMock()
        HAVE_TE = False

_TE_CONFIG_TYPE_KEY = "transformer_engine_config_type"


class TransformerEngineConfigType(enum.Enum):
    """Configuration object types in config dictionary"""

    TEQuantizationParams = "TEQuantizationParams"


@dataclasses.dataclass
class TEQuantizationRecipe:
    """Class to capture options for opening an autocast context in forward"""

    fp8_quantization_recipe: Optional[Fp8Recipe] = None
    """
    An FP8 quantization override if the module should use FP8.
    If no FP8 or FP4 quantization is configured, the recipe is execution
    in high-precision (BF16).
    """
    fp4_quantization_recipe: Optional[Fp4Recipe] = None
    """
    An FP4 quantization override if the module should use FP4.
    If no FP8 or FP4 quantization is configured, the recipe is execution
    in high-precision (BF16).
    """
    custom_recipe_factory: Optional[str] = None
    """The path to a custom recipe factory if a custom Fp4 or Fp8 recipe is configured"""
    fp8_format: str = "e4m3"
    """A format to select from an FP8Recipe"""
    override_quantized_autocast: bool = True
    """
    If the quantization autocast context for a targeted module is enabled,
    whether to override it and change (or disable) the quantization recipe.
    """
    override_nonquantized_autocast: bool = False
    """
    If the quantization autocast context for a targeted module is not enabled,
    whether to override it and enable a quantization recipe.
    """
    tp_only_amax_red: bool = False
    """
    If an amax reduction is applicable, such as in per-tensor quantization recipe,
    whether to reduce only along TP groups.
    """

    @classmethod
    def parse_from_config(cls, quant_config: Dict[Any, Any]) -> "TEQuantizationRecipe":
        """
        Parse config from quantization dictionary.
        """
        kwargs = {}
        class_keys = cls.get_config_keys()
        for field in class_keys:
            if field in quant_config:
                kwargs[field] = quant_config[field]
        for field in quant_config:
            if field not in class_keys:
                raise ValueError(f"Field '{field}' not valid for this configuration.")
        instance = TEQuantizationRecipe(**kwargs)
        if instance.fp8_quantization_recipe == Fp8Recipe.delayed:
            raise ValueError("Delayed scaling not in scope of te per-module quantization config.")
        if (
            instance.fp8_quantization_recipe is not None
            and instance.fp4_quantization_recipe is not None
        ):
            raise ValueError("fp8 and fp4 quantization settings are mutually exclusive.")
        if (
            instance.fp8_quantization_recipe == Fp8Recipe.custom
            or instance.fp4_quantization_recipe == Fp4Recipe.custom
        ):
            if instance.custom_recipe_factory is None:
                raise ValueError("custom fp8 or fp4 recipe requires custom_recipe_factory")
        return instance

    @classmethod
    def get_config_keys(cls) -> Set[str]:
        """Get expected keys from the dataclass fields."""
        return {field.name for field in dataclasses.fields(cls)}


@dataclasses.dataclass
class TEQuantizationParams:
    """Class to capture precision options for training and evaluation."""

    training_recipe: TEQuantizationRecipe
    """Precision override for when self.training is True"""
    evaluation_recipe: Optional[TEQuantizationRecipe]
    """
    Precision override for when self.training is False.
    If None, training_recipe is used.
    """

    @staticmethod
    def parse_from_config(quant_config: QuantizationConfig) -> "TEQuantizationParams":
        """Parses quantization config for a layer or throw an error."""
        config = quant_config.config
        try:
            config_type = TransformerEngineConfigType(config[_TE_CONFIG_TYPE_KEY])
        except KeyError:
            raise ValueError(
                f"TransformerEngine config dictionary must have '{_TE_CONFIG_TYPE_KEY}' key."
            )
        except ValueError:
            raise ValueError(f"Unsupported config type '{config[_TE_CONFIG_TYPE_KEY]}'.")

        if config_type == TransformerEngineConfigType.TEQuantizationParams:
            if 'training_recipe' not in config.keys():
                raise ValueError(
                    "TransformerEngine config dictionary must have 'training_recipe' key"
                )
            training_recipe = TEQuantizationRecipe.parse_from_config(config['training_recipe'])
            if 'evaluation_recipe' not in config.keys():
                evaluation_recipe = None
                assert len(config.keys()) == 2
            else:
                evaluation_recipe = TEQuantizationRecipe.parse_from_config(
                    config['evaluation_recipe']
                )
                assert len(config.keys()) == 3
            return TEQuantizationParams(
                training_recipe=training_recipe, evaluation_recipe=evaluation_recipe
            )
        else:
            raise NotImplementedError(f"Unhandled configuration type {config_type}")


def _get_fp8_autocast_for_quant_recipe(qrecipe: TEQuantizationRecipe):
    if FP8GlobalStateManager.is_fp8_enabled():
        if not qrecipe.override_quantized_autocast:
            return nullcontext()
    else:
        if not qrecipe.override_nonquantized_autocast:
            return nullcontext()

    if qrecipe.fp8_quantization_recipe is None and qrecipe.fp4_quantization_recipe is None:
        # Force BF16 for this layer and override autocast
        return fp8_autocast(enabled=False)
    else:
        amax_group = None
        if model_parallel_is_initialized():
            amax_group = get_amax_reduction_group(
                with_context_parallel=True, tp_only_amax_red=qrecipe.tp_only_amax_red
            )
        if (
            qrecipe.fp8_quantization_recipe == Fp8Recipe.custom
            or qrecipe.fp4_quantization_recipe == Fp4Recipe.custom
        ):
            from megatron.core.fp8_utils import _get_custom_recipe

            assert qrecipe.custom_recipe_factory is not None
            quant_recipe = _get_custom_recipe(qrecipe.custom_recipe_factory)
        elif qrecipe.fp8_quantization_recipe is not None:
            if qrecipe.fp8_format == "e4m3":
                fp8_format = te.common.recipe.Format.E4M3
            elif qrecipe.fp8_format == "hybrid":
                fp8_format = te.common.recipe.Format.HYBRID
            else:
                raise ValueError(f"Unhandled fp8_format {qrecipe.fp8_format}")

            if qrecipe.fp8_quantization_recipe == Fp8Recipe.tensorwise:
                quant_recipe = te.common.recipe.Float8CurrentScaling(fp8_format=fp8_format)
            elif qrecipe.fp8_quantization_recipe == Fp8Recipe.blockwise:
                quant_recipe = te.common.recipe.Float8BlockScaling(fp8_format=fp8_format)
            elif qrecipe.fp8_quantization_recipe == Fp8Recipe.mxfp8:
                quant_recipe = te.common.recipe.MXFP8BlockScaling(fp8_format=fp8_format)
            else:
                raise ValueError(f"Unhandled fp8 recipe: {qrecipe.fp8_quantization_recipe}")
        else:
            # Fp4 configured.
            if qrecipe.fp4_quantization_recipe == Fp4Recipe.nvfp4:
                quant_recipe = te.common.recipe.NVFP4BlockScaling()
            else:
                raise ValueError(f"Unhandled fp4 recipe: {qrecipe.fp8_quantization_recipe}")

        return fp8_autocast(enabled=True, fp8_recipe=quant_recipe, fp8_group=amax_group)


def _get_fp8_autocast_for_quant_params(qparams: TEQuantizationParams | None, training: bool):
    if qparams is None:
        return nullcontext()
    elif not training and qparams.evaluation_recipe is not None:
        return _get_fp8_autocast_for_quant_recipe(qparams.evaluation_recipe)
    else:
        return _get_fp8_autocast_for_quant_recipe(qparams.training_recipe)


def _get_should_context_be_quantized_recipe(
    qrecipe: TEQuantizationRecipe, is_original_context_quantized: bool
):
    if is_original_context_quantized:
        if not qrecipe.override_quantized_autocast:
            return is_original_context_quantized
    else:
        if not qrecipe.override_nonquantized_autocast:
            return is_original_context_quantized
    if qrecipe.fp8_quantization_recipe is None and qrecipe.fp4_quantization_recipe is None:
        # Force BF16 for this layer and override autocast
        return False
    else:
        return True


def _get_should_context_be_quantized_params(
    qparams: TEQuantizationParams | None, training: bool, is_context_quantized: bool
):
    if qparams is None:
        return is_context_quantized
    elif not training and qparams.evaluation_recipe is not None:
        return _get_should_context_be_quantized_recipe(
            qparams.evaluation_recipe, is_context_quantized
        )
    else:
        return _get_should_context_be_quantized_recipe(
            qparams.training_recipe, is_context_quantized
        )


def _get_extra_te_kwargs(config: TransformerConfig):
    extra_transformer_engine_kwargs = {"params_dtype": config.params_dtype}

    if is_te_min_version("0.12.0"):
        if config.use_cpu_initialization:
            extra_transformer_engine_kwargs["device"] = "cpu"
        elif config.init_model_with_meta_device:
            extra_transformer_engine_kwargs["device"] = "meta"
        else:
            extra_transformer_engine_kwargs["device"] = torch.cuda.current_device()
    return extra_transformer_engine_kwargs


def condition_init_method(config, init_method):
    """Condition TE init_method on config.perform_initialization."""
    return init_method if config.perform_initialization else (lambda w: None)


def split_te_layernorm_column_parallel_linear(
    fused_layer,
    config,
    init_method: Optional[callable] = None,
    tp_group: Optional[torch.distributed.ProcessGroup] = None,
):
    """
    Split a TELayerNormColumnParallelLinear into separate TENorm and TEColumnParallelLinear layers.

    Args:
        fused_layer: The fused TELayerNormColumnParallelLinear layer to split
        config: TransformerConfig to use for creating the new layers
        init_method: Initialization method for the linear layer (optional)
        tp_group: Tensor parallel group (optional)

    Returns:
        A tuple of (TENorm, TEColumnParallelLinear) with weights copied from the fused layer
    """

    # Extract dimensions from the fused layer
    in_features = fused_layer.in_features
    out_features = fused_layer.out_features * fused_layer.tp_size

    # Create the norm layer
    norm_layer = TENorm(config=config, hidden_size=in_features, eps=fused_layer.eps)

    with torch.no_grad():
        # Copy layer norm weight
        norm_layer.weight.copy_(fused_layer.layer_norm_weight)

        # Copy layer norm bias if it exists
        if hasattr(norm_layer, 'bias') and hasattr(fused_layer, 'layer_norm_bias'):
            if fused_layer.layer_norm_bias is not None:
                norm_layer.bias.copy_(fused_layer.layer_norm_bias)

    # Create the column parallel linear layer
    linear_layer = TEColumnParallelLinear(
        input_size=in_features,
        output_size=out_features,
        config=config,
        init_method=init_method or (lambda x: None),  # Dummy init since we'll copy weights
        gather_output=False,
        bias=fused_layer.use_bias,
        skip_bias_add=fused_layer.te_return_bias,
        is_expert=False,
        tp_comm_buffer_name=fused_layer.ub_name,
        tp_group=tp_group or fused_layer.tp_group,
    )

    with torch.no_grad():
        # Copy weight
        linear_layer.weight.copy_(fused_layer.weight)

        # Copy bias if it exists
        if fused_layer.use_bias and hasattr(fused_layer, 'bias'):
            linear_layer.bias.copy_(fused_layer.bias)

    # TODO(Peter): Do we need this
    # Copy FP8 metadata if applicable
    if hasattr(fused_layer, 'fp8_meta') and fused_layer.fp8_meta is not None:
        if hasattr(linear_layer, 'fp8_meta'):
            # Copy FP8 scaling factors and other metadata
            for key in fused_layer.fp8_meta:
                if key in linear_layer.fp8_meta:
                    if isinstance(fused_layer.fp8_meta[key], dict):
                        for subkey in fused_layer.fp8_meta[key]:
                            if subkey in linear_layer.fp8_meta[key]:
                                linear_layer.fp8_meta[key][subkey] = fused_layer.fp8_meta[key][
                                    subkey
                                ]
                    else:
                        linear_layer.fp8_meta[key] = fused_layer.fp8_meta[key]

    # Set the same configuration flags
    linear_layer.sequence_parallel = fused_layer.sequence_parallel
    linear_layer.is_first_microbatch = fused_layer.is_first_microbatch
    linear_layer.disable_parameter_transpose_cache = fused_layer.disable_parameter_transpose_cache

    return norm_layer, linear_layer


if HAVE_TE and is_te_min_version("1.13.0"):

    class TEActivationOp:
        """
        A conditional wrapper to initialize an instance of Transformer-Engine's activation
        function operators (e.g. Silu, SwiGLU, etc)
        """

        def __new__(cls, config: TransformerConfig):

            layer_type = None
            if config.gated_linear_unit:
                if config.activation_func == F.silu:
                    layer_type = te.pytorch.ops.SwiGLU
                elif config.activation_func == F.gelu:
                    layer_type = te.pytorch.ops.GEGLU
                elif config.activation_func == F.silu:
                    layer_type = te.pytorch.ops.ReGLU
            else:
                if config.activation_func == F.gelu:
                    layer_type = te.pytorch.ops.GELU
                elif config.activation_func == F.silu:
                    layer_type = te.pytorch.ops.ReLU
            if layer_type is None:
                raise Exception(
                    'Only SwiGLU, GEGLU, ReGLU, GELU, ReLU are supported by '
                    'transformer engine. Please set use_te_activation_func=False'
                )
            activation_func_kwargs = {}
            if config.activation_func_fp8_input_store:
                activation_func_kwargs["cache_quantized_input"] = True
            layer = layer_type(**activation_func_kwargs)
            return layer

else:
    TEActivationOp = None


if HAVE_TE and is_te_min_version("1.13.0"):

    class TEFusedResidualRMSNorm(te.pytorch.RMSNorm):
        """
        RMSNorm with fused residual output for Megatron Core.

        Inherits from te.pytorch.RMSNorm to maintain all parameter management,
        checkpoint compatibility, and Megatron-specific features. Creates a fused
        implementation using TE's ops API that shares the base class parameters.

        The fused implementation uses:
        - MakeExtraOutput: Forks the residual connection
        - RMSNorm: Normalizes the main path

        Forward pass returns: (normalized_output, residual)
        """

        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)
            # Fused implementation (stored in tuple to avoid submodule registration)
            self._fused_impl: Optional[Tuple[te.pytorch.ops.Sequential]] = None

        def _make_fused_impl(self) -> te.pytorch.ops.Sequential:
            """
            Construct fused ops pipeline that shares parameters with base RMSNorm.

            Creates MakeExtraOutput + RMSNorm ops, where the RMSNorm op shares
            the weight parameter with self.weight from the base class.
            """

            fused_impl = te.pytorch.ops.Sequential()

            # Op 1: MakeExtraOutput - forks the residual
            fused_impl.append(te.pytorch.ops.MakeExtraOutput())

            # Op 2: RMSNorm - shares weight parameter with self
            kwargs = {
                "eps": self.eps,
                "device": "meta",  # Already initialized
                "dtype": self.weight.dtype,
                "zero_centered_gamma": self.zero_centered_gamma,
            }

            # Add sm_margin if available (TE 2.5+)
            if hasattr(self, '_sm_margins'):
                kwargs["sm_margin"] = self._sm_margins

            rmsnorm_op = te.pytorch.ops.RMSNorm(self.weight.shape, **kwargs)

            rmsnorm_op.weight = self.weight

            fused_impl.append(rmsnorm_op)

            self._register_hooks_on_fused_impl(fused_impl)

            return fused_impl

        def _register_hooks_on_fused_impl(self, fused_impl: torch.nn.Module) -> None:

            forward_pre_hooks = []
            forward_post_hooks = []
            backward_pre_hooks = []
            backward_post_hooks = []

            for submodule in self.modules():
                for hook in submodule._forward_pre_hooks.values():
                    forward_pre_hooks.append((submodule, hook))
                for hook in submodule._forward_hooks.values():
                    forward_post_hooks.append((submodule, hook))
                for hook in submodule._backward_pre_hooks.values():
                    backward_pre_hooks.append((submodule, hook))
                for hook in submodule._backward_hooks.values():
                    backward_post_hooks.append((submodule, hook))

            # Pre-forward hooks
            # Note: DDP pre-forward hooks are safe since they do not
            # interact with input tensor.
            if forward_pre_hooks:
                from megatron.core.distributed import distributed_data_parallel

                if any(
                    inspect.getmodule(hook) != distributed_data_parallel
                    for _, hook in forward_pre_hooks
                ):
                    warnings.warn(
                        "TEFusedResidualRMSNorm module has a submodule with a pre-forward hook. "
                        "TEFusedResidualRMSNorm module does not expose intermediate tensors, "
                        "so the hook may have incorrect behavior if it attempts to "
                        "access the input tensor."
                    )

                def forward_pre_hook(module, *_) -> None:
                    for submodule, hook in forward_pre_hooks:
                        # Assume that hook does not interact with input
                        ret = hook(submodule, None)
                        if ret is not None:
                            raise RuntimeError(
                                "TEFusedResidualRMSNorm module does not expose "
                                "intermediate tensors, but submodule has "
                                "pre-forward hook that modifies input tensor."
                            )

                fused_impl.register_forward_pre_hook(forward_pre_hook)

            # Post-forward hooks
            if forward_post_hooks:
                warnings.warn(
                    "TEFusedResidualRMSNorm module has a submodule with a post-forward hook. "
                    "TEFusedResidualRMSNorm module does not expose intermediate tensors, "
                    "so the hook may have incorrect behavior if it attempts to "
                    "access the input or output tensors."
                )

                def forward_post_hook(module, *_) -> None:
                    for submodule, hook in forward_post_hooks:
                        # Assume that hook does not interact with input or output
                        ret = hook(submodule, None, None)
                        if ret is not None:
                            raise RuntimeError(
                                "TEFusedResidualRMSNorm module does not expose "
                                "intermediate tensors, but submodule has "
                                "post-forward hook that modifies output tensor."
                            )

                fused_impl.register_forward_hook(forward_post_hook)

            # Backward hooks
            if backward_pre_hooks:
                raise RuntimeError(
                    "TEFusedResidualRMSNorm module does not support "
                    "submodules with pre-backward hooks"
                )
            if backward_post_hooks:
                raise RuntimeError(
                    "TEFusedResidualRMSNorm module does not support "
                    "submodules with post-backward hooks"
                )

        def forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
            """
            Forward pass with fused residual output.

            Args:
                hidden_states: Input tensor [s, b, h]

            Returns:
                Tuple of (normalized_output, residual), both [s, b, h]

            Note:
                Sequential.forward() automatically returns (output, extra_outputs...)
                when MakeExtraOutput is present, so we don't need manual unpacking.
            """

            # Construct fused impl lazily on first forward
            # (in case parameters are modified after __init__)
            if self._fused_impl is None:
                self._fused_impl = (self._make_fused_impl(),)

            # Apply fused implementation
            # Sequential returns (normalized_output, residual) automatically
            return self._fused_impl[0](hidden_states)

else:
    TEFusedResidualRMSNorm = None  # type: ignore[assignment, misc]


class TENorm:
    """A conditional wrapper to initialize an instance of
    Transformer-Engine's `LayerNorm` or `RMSNorm` based on input.

    Residual fusion is a two-level opt-in mechanism:

    1. Global capability: config.fused_residual_rmsnorm must be True (enables the feature)
    2. Local intent: has_residual=True must be passed at build site (declares this specific
       norm is followed by a residual connection)

    Fusion only happens when BOTH conditions are met.

    """

    # TODO should we ditch normalization config and just use spec to choose LayerNorm vs RMSNorm?
    def __new__(
        cls,
        config: TransformerConfig,
        hidden_size: int,
        eps: float = 1e-5,
        has_residual: bool = False,
    ):
        if not HAVE_TE:
            raise ImportError(
                "Transformer Engine is not installed. "
                "Please install it with `pip install transformer-engine`."
            )

        use_fused_residual = config.fused_residual_rmsnorm and has_residual
        if use_fused_residual and config.normalization != "RMSNorm":
            raise ValueError("Fused residual is only supported " "for RMSNorm normalization")

        if config.normalization == "LayerNorm":
            norm_module = te.pytorch.LayerNorm
        elif config.normalization == "RMSNorm":
            assert hasattr(
                te.pytorch, "RMSNorm"
            ), "Transformer-Engine >= v0.11 required to use this feature"
            if use_fused_residual:
                assert (
                    TEFusedResidualRMSNorm is not None
                ), "TEFusedResidualRMSNorm requires Transformer-Engine >= v1.13.0"
                norm_module = TEFusedResidualRMSNorm
            else:
                norm_module = te.pytorch.RMSNorm
        else:
            raise Exception("Only LayerNorm and RMSNorm are currently supported")

        instance = norm_module(
            normalized_shape=hidden_size,
            eps=eps,
            sequence_parallel=config.sequence_parallel,
            zero_centered_gamma=config.layernorm_zero_centered_gamma,
            **_get_extra_te_kwargs(config),
        )

        return cast(LayerNormInterface, instance)


class TELinear(te.pytorch.Linear):
    """Wrapper for the Transformer-Engine's `Linear` layer.

    Note that if Megatron's parallel_state has not been initialized
    yet, the tp_group passed to TE will be None and must be set later
    via set_tensor_parallel_group().

    parallel_mode currently supports 3 different values:
        - "column": Split the weight matrix along output dimension (used in TEColumnParallelLinear)
        - "row": Split the weight matrix along input dimension (used in TERowParallelLinear)
        - "duplicated": No tensor parallelism and weight is duplicated across TP ranks
        - Note: For expert linear layers, we will disable communication logic here
                as TP communication is handled in token_dispatcher.
    """

    def __init__(
        self,
        input_size: int,
        output_size: int,
        *,
        parallel_mode: Optional[str],
        config: ModelParallelConfig,
        init_method: Callable,
        bias: bool,
        skip_bias_add: bool,
        skip_weight_param_allocation: bool,
        tp_comm_buffer_name: Optional[str] = None,
        is_expert: bool = False,
        symmetric_ar_type: Optional[str] = None,
        tp_group: Optional[torch.distributed.ProcessGroup] = None,
    ):
        if not HAVE_TE:
            raise ImportError(
                "Transformer Engine is not installed. "
                "Please install it with `pip install transformer-engine`."
            )

        self.config = config

        # TE returns a zero length Tensor when bias=False and
        # return_bias=True, but we prefer None.  So in that case we
        # tell TE to not return the bias, and return None
        # ourselves. This way our forward always returns two values
        # and we don't have to deal with the zero length Tensor.
        self.te_return_bias = skip_bias_add and bias
        self.is_first_microbatch = True
        self.disable_parameter_transpose_cache = self.config.disable_parameter_transpose_cache
        self.symmetric_ar_type = symmetric_ar_type
        if skip_weight_param_allocation:
            raise ValueError(
                "Transformer Engine linear layers do not support skip_weight_param_allocation"
            )

        extra_kwargs = _get_extra_te_kwargs(config)

        if self.config.delay_wgrad_compute:
            if is_te_min_version("2.3.0"):
                extra_kwargs["delay_wgrad_compute"] = self.config.delay_wgrad_compute
            else:
                raise RuntimeError("Only TE with version >=2.3.0 supports delay_wgrad_compute now.")

        if (
            self.config.tp_comm_overlap
            and tp_comm_buffer_name
            and tp_comm_buffer_name not in ["qkv", "proj", "fc1", "fc2"]
        ):
            self.config.tp_comm_overlap = False
            warnings.warn(
                f"The user buffer name {tp_comm_buffer_name} is not supported in"
                "Transformer Engine. Disabling TP communication overlap "
                "for this layer."
            )

        if is_te_min_version("0.8.0"):
            if self.config.tp_comm_overlap and parallel_mode != "duplicated":
                if is_te_min_version("1.5.0"):
                    # Use old overlap flags if they were supplied instead
                    extra_kwargs["ub_overlap_ag"] = (
                        self.config.tp_comm_overlap_ag
                        if hasattr(self.config, "tp_comm_overlap_ag")
                        else self.config.tp_comm_split_ag or self.config.tp_comm_atomic_ag
                    )
                    extra_kwargs["ub_overlap_rs"] = (
                        self.config.tp_comm_overlap_rs
                        if hasattr(self.config, "tp_comm_overlap_rs")
                        else self.config.tp_comm_split_rs or self.config.tp_comm_atomic_rs
                    )
                    # Disable ub overlap for experts.
                    if is_expert:
                        extra_kwargs["ub_overlap_ag"] = False
                        extra_kwargs["ub_overlap_rs"] = False
                else:
                    extra_kwargs["ub_split_ag"] = self.config.tp_comm_split_ag
                    extra_kwargs["ub_atomic_gemm_ag"] = self.config.tp_comm_atomic_ag
                    extra_kwargs["ub_split_rs"] = self.config.tp_comm_split_rs
                    extra_kwargs["ub_atomic_gemm_rs"] = self.config.tp_comm_atomic_rs
                    # Disable ub overlap for experts.
                    if is_expert:
                        extra_kwargs["ub_split_ag"] = False
                        extra_kwargs["ub_atomic_gemm_ag"] = False
                        extra_kwargs["ub_split_rs"] = False
                        extra_kwargs["ub_atomic_gemm_rs"] = False
                if is_te_min_version("1.0.0", check_equality=False):
                    assert (
                        tp_comm_buffer_name is not None
                    ), "Buffer name should be set to configure communication overlap settings"
                    extra_kwargs["ub_name"] = tp_comm_buffer_name

        if symmetric_ar_type is not None:
            assert is_torch_min_version("2.7.0a0"), "Must have at least torch version 2.7 or higher"
            assert is_te_min_version("2.3.0") or get_te_version() == PkgVersion(
                "2.3.0.dev0+39c0e70"
            ), "Must have at least TE version 2.3 or higher to use symmetric memory all reduce"
            extra_kwargs["symmetric_ar_type"] = symmetric_ar_type
        if parallel_mode == "duplicated":
            assert tp_group is None, "duplicated linear should not have tp_group set"
            tp_size = 1
        else:
            tp_size = get_pg_size(tp_group)

        self.expert_parallel = self.config.expert_model_parallel_size > 1
        if is_expert:
            rng_tracker_name = get_expert_parallel_rng_tracker_name()
        else:
            if parallel_mode == "duplicated":
                rng_tracker_name = get_data_parallel_rng_tracker_name()
            else:
                rng_tracker_name = None
        if is_te_min_version("1.7.0"):
            extra_kwargs["rng_tracker_name"] = rng_tracker_name

        te_parallel_mode = parallel_mode
        tp_group_for_te = tp_group
        if parallel_mode == "duplicated":
            # Handle non-parallel case
            tp_group_for_te = None
            tp_size = 1
            explicit_expert_comm = False
            te_parallel_mode = None
        else:
            # Disable communications in TE when using TP or EP by
            explicit_expert_comm = is_expert and (tp_size > 1 or self.expert_parallel)

            if explicit_expert_comm:
                if parallel_mode == "column":
                    output_size = divide(output_size, tp_size)
                elif parallel_mode == "row":
                    input_size = divide(input_size, tp_size)
                te_parallel_mode = None
                tp_size = 1
                tp_group_for_te = None

        super().__init__(
            in_features=input_size,
            out_features=output_size,
            sequence_parallel=self.config.sequence_parallel,
            fuse_wgrad_accumulation=self.config.gradient_accumulation_fusion,
            # Pass None if not initialized for backward compatibility with the ckpt converter.
            tp_group=tp_group_for_te if torch.distributed.is_initialized() else None,
            tp_size=tp_size,
            get_rng_state_tracker=(
                get_cuda_rng_tracker if get_cuda_rng_tracker().is_initialized() else None
            ),
            init_method=condition_init_method(config, init_method),
            bias=bias,
            return_bias=self.te_return_bias,
            parallel_mode=te_parallel_mode,
            **extra_kwargs,
        )
        self.te_quant_params: Optional[TEQuantizationParams] = None

        for param in self.parameters():
            if is_expert:
                # Reduce the gradient on the expert_data_parallel group for expert linear layers
                setattr(param, "allreduce", not self.expert_parallel)
            else:
                # Reduce the gradient on DP group
                setattr(param, "allreduce", True)
                if parallel_mode == "duplicated":
                    # Reduce the gradient further on the TP group since the weight is
                    # duplicated across TP ranks
                    setattr(param, "sequence_parallel", self.config.sequence_parallel)
                    # Mark as NOT tensor parallel since weight is duplicated
                    setattr(param, "tensor_model_parallel", False)

        tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
        self._tp_group = tp_group

    def finish_init(self, quantization_config: QuantizationConfig):
        """Post-init of quantization override"""
        if quantization_config is None:
            self.te_quant_params = None
        else:
            self.te_quant_params = TEQuantizationParams.parse_from_config(quantization_config)

    def will_execute_quantized(self, is_context_quantized: bool) -> bool:
        """Returns whether the module is configured to execute quantized."""
        return _get_should_context_be_quantized_params(
            self.te_quant_params, self.training, is_context_quantized
        )

    def forward(self, x):
        """Forward."""
        _is_first_microbatch = (
            None if self.disable_parameter_transpose_cache else self.is_first_microbatch
        )
        quant_context = _get_fp8_autocast_for_quant_params(self.te_quant_params, self.training)

        with quant_context:
            out = super().forward(x, is_first_microbatch=_is_first_microbatch)
        self.is_first_microbatch = False

        # TE only returns a tuple when return_bias is True, otherwise
        # it returns a single Tensor, we always want to return two
        # values regardless of the arguments.
        if self.te_return_bias:
            return out
        return out, None

    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
        """Replicate cross TP/DP."""

        # Provide the dist-ckpt support when TELinear is directly used
        # It can only happen with duplicated parallel mode
        assert (
            self.parallel_mode is None
        ), "TELinear sharded_state_dict can only be used with duplicated parallel mode"
        state_dict = self.state_dict(prefix="", keep_vars=True)
        return make_sharded_tensors_for_checkpoint(
            state_dict,
            prefix,
            None,
            sharded_offsets,
            tp_group=self._tp_group,
            dp_cp_group=metadata["dp_cp_group"],
        )

    def backward_dw(self):
        """Compute weight gradients during the backward pass if delay_wgrad_compute is enabled."""
        if self.config.delay_wgrad_compute:
            super().backward_dw()


class TELayerNormColumnParallelLinear(te.pytorch.LayerNormLinear):
    """Wrapper for the Transformer-Engine's `LayerNormLinear` layer
    that combines layernorm and linear layers."""

    def __init__(
        self,
        input_size: int,
        output_size: int,
        *,
        config: TransformerConfig,
        init_method: Callable,
        gather_output: bool,
        bias: bool,
        skip_bias_add: bool,
        is_expert: bool,
        skip_weight_param_allocation: bool = False,
        tp_comm_buffer_name: Optional[str] = None,
        tp_group: Optional[torch.distributed.ProcessGroup] = None,
        stride: int = 1,
    ):
        if not HAVE_TE:
            raise ImportError(
                "Transformer Engine is not installed. "
                "Please install it with `pip install transformer-engine`."
            )

        self.config = config

        if gather_output:
            raise ValueError("Transformer Engine linear layers do not support gather_output = True")

        if is_expert:
            raise ValueError("Transformer Engine linear layers do not yet support MoE")

        if skip_weight_param_allocation:
            raise ValueError(
                "Transformer Engine linear layers do not support skip_weight_param_allocation"
            )

        # TODO: For backward compatibility, remove in v0.15.
        tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
        self._tp_group = tp_group

        # TE returns a zero length Tensor when bias=False and
        # return_bias=True, but we prefer None.  So in that case we
        # tell TE to not return the bias, and return None
        # ourselves. This way our forward always returns two values
        # and we don't have to deal with the zero length Tensor.
        self.te_return_bias = skip_bias_add and bias
        self.is_first_microbatch = True
        self.disable_parameter_transpose_cache = self.config.disable_parameter_transpose_cache
        extra_kwargs = _get_extra_te_kwargs(config)
        self.tp_size = get_pg_size(tp_group)
        self.tp_rank = get_pg_rank(tp_group)

        if self.config.delay_wgrad_compute:
            if is_te_min_version("2.3.0"):
                extra_kwargs["delay_wgrad_compute"] = self.config.delay_wgrad_compute
            else:
                raise RuntimeError("Only TE with version >=2.3.0 supports delay_wgrad_compute now.")

        # Only Transformer-Engine version >= 0.11.0 supports `RMSNorm`
        if is_te_min_version("0.11.0"):
            extra_kwargs["normalization"] = self.config.normalization
        elif self.config.normalization != "LayerNorm":
            te_version = get_te_version()
            raise ValueError(
                f"Transformer Engine v{te_version} does not support {self.config.normalization}."
            )

        if is_te_min_version("0.8.0"):
            if self.config.tp_comm_overlap:
                extra_kwargs["ub_bulk_wgrad"] = self.config.tp_comm_bulk_wgrad
                extra_kwargs["ub_bulk_dgrad"] = self.config.tp_comm_bulk_dgrad
                if is_te_min_version("1.5.0", check_equality=False):
                    # Use old overlap flags if they were supplied instead
                    extra_kwargs["ub_overlap_ag"] = (
                        self.config.tp_comm_overlap_ag
                        if hasattr(self.config, "tp_comm_overlap_ag")
                        else self.config.tp_comm_split_ag or self.config.tp_comm_atomic_ag
                    )
                    if is_te_min_version("1.6.0.dev0", check_equality=False):
                        extra_kwargs["ub_overlap_rs_dgrad"] = (
                            self.config.tp_comm_overlap_rs_dgrad
                            if hasattr(self.config, "tp_comm_overlap_rs_dgrad")
                            else False
                        )
                    if tp_comm_buffer_name == "qkv" and self.config.tp_comm_overlap_disable_qkv:
                        extra_kwargs["ub_overlap_ag"] = False
                        extra_kwargs["ub_overlap_rs_dgrad"] = False

                    if tp_comm_buffer_name == "fc1" and self.config.tp_comm_overlap_disable_fc1:
                        extra_kwargs["ub_overlap_ag"] = False
                        extra_kwargs["ub_overlap_rs_dgrad"] = False
                else:
                    extra_kwargs["ub_atomic_gemm_ag"] = self.config.tp_comm_atomic_ag
                    extra_kwargs["ub_split_ag"] = self.config.tp_comm_split_ag
                if is_te_min_version("1.0.0", check_equality=False):
                    assert (
                        tp_comm_buffer_name is not None
                    ), "Buffer name should be set to configure communication overlap settings"
                    extra_kwargs["ub_name"] = tp_comm_buffer_name

        if self.config.symmetric_ar_type is not None:
            assert is_torch_min_version("2.7.0a0"), "Must have at least torch version 2.7 or higher"
            assert is_te_min_version("2.3.0") or get_te_version() == PkgVersion(
                "2.3.0.dev0+39c0e70"
            ), "Must have at least TE version 2.3 or higher to use symmetric memory all reduce"
            extra_kwargs["symmetric_ar_type"] = self.config.symmetric_ar_type

        self.stride = stride

        super().__init__(
            in_features=input_size,
            out_features=output_size,
            eps=self.config.layernorm_epsilon,
            sequence_parallel=self.config.sequence_parallel,
            fuse_wgrad_accumulation=self.config.gradient_accumulation_fusion,
            tp_group=tp_group if torch.distributed.is_initialized() else None,
            tp_size=self.config.tensor_model_parallel_size,
            get_rng_state_tracker=(
                get_cuda_rng_tracker if get_cuda_rng_tracker().is_initialized() else None
            ),
            init_method=(
                condition_init_method(config, init_method)
                if not config.use_cpu_initialization
                else lambda w: None
            ),
            bias=bias,
            return_bias=self.te_return_bias,
            parallel_mode="column",
            return_layernorm_output=False,
            zero_centered_gamma=self.config.layernorm_zero_centered_gamma,
            **extra_kwargs,
        )
        self.te_quant_params: Optional[TEQuantizationParams] = None

        # Set proper partition_stride
        setattr(self.weight, 'partition_stride', stride)
        if bias and hasattr(self, 'bias') and self.bias is not None:
            setattr(self.bias, 'partition_stride', stride)

        if config.use_cpu_initialization:
            output_size_per_partition = divide(output_size, self.tp_size)
            _ = _initialize_affine_weight_cpu(
                self.weight,
                output_size,
                input_size,
                output_size_per_partition,
                0,
                init_method=condition_init_method(config, init_method),
                stride=stride,
                return_master_weight=False,
                rank=self.tp_rank,
                world_size=self.tp_size,
                skip_set_tensor_parallel_attributes=True,
            )
            if bias:
                self.bias = Parameter(
                    torch.empty(output_size_per_partition, dtype=config.params_dtype)
                )
                set_tensor_model_parallel_attributes(self.bias, True, 0, stride)
                with torch.no_grad():
                    self.bias.zero_()
                setattr(self.bias, "allreduce", True)

    def finish_init(self, quantization_config: QuantizationConfig):
        """Post-init of quantization override"""
        if quantization_config is None:
            self.te_quant_params = None
        else:
            self.te_quant_params = TEQuantizationParams.parse_from_config(quantization_config)

    def will_execute_quantized(self, is_context_quantized: bool) -> bool:
        """Returns whether the module is configured to execute quantized."""
        return _get_should_context_be_quantized_params(
            self.te_quant_params, self.training, is_context_quantized
        )

    def forward(self, x):
        """Forward."""
        _is_first_microbatch = (
            None if self.disable_parameter_transpose_cache else self.is_first_microbatch
        )
        quant_context = _get_fp8_autocast_for_quant_params(self.te_quant_params, self.training)

        with quant_context:
            out = super().forward(x, is_first_microbatch=_is_first_microbatch)

        self.is_first_microbatch = False

        # TE only returns a tuple when return_bias is True, otherwise
        # it returns a single Tensor, we always want to return two
        # values regardless of the arguments.
        if self.te_return_bias:
            return out
        return out, None

    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
        """Sharding along axis 0, bias sharded"""
        metadata = ensure_metadata_has_dp_cp_group(metadata)
        state_dict = self.state_dict(prefix="", keep_vars=True)
        return make_sharded_tensors_for_checkpoint(
            state_dict,
            prefix,
            {"weight": 0, "bias": 0},
            sharded_offsets,
            tp_group=self._tp_group,
            dp_cp_group=metadata["dp_cp_group"],
        )

    @override
    def extra_repr(self) -> str:
        """Extra context to add to the module's string representation."""
        return (
            f"in_features={self.in_features}, "
            f"out_features={self.out_features}, "
            f"bias={self.use_bias}, "
            f"TP={self.tp_size}"
        )

    def backward_dw(self):
        """Compute weight gradients during the backward pass if delay_wgrad_compute is enabled."""
        if self.config.delay_wgrad_compute:
            super().backward_dw()


class TEColumnParallelLinear(TELinear):
    """Wrapper for the Transformer-Engine's `Linear` layer
    but specialized similar to megatron's `ColumnParallelLinear` layer."""

    def __init__(
        self,
        input_size: int,
        output_size: int,
        *,
        config: ModelParallelConfig,
        init_method: Callable,
        gather_output: bool,
        bias: bool,
        skip_bias_add: bool,
        is_expert: bool,
        skip_weight_param_allocation: bool = False,
        tp_comm_buffer_name: Optional[str] = None,
        tp_group: Optional[torch.distributed.ProcessGroup] = None,
        stride: int = 1,
    ):
        if not HAVE_TE:
            raise ImportError(
                "Transformer Engine is not installed. "
                "Please install it with `pip install transformer-engine`."
            )

        if gather_output:
            raise ValueError("Transformer Engine linear layers do not support gather_output = True")
        tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
        self._tp_group = tp_group
        world_size = get_pg_size(tp_group)
        rank = get_pg_rank(tp_group)
        self.stride = stride

        super().__init__(
            input_size=input_size,
            output_size=output_size,
            parallel_mode="column",
            config=config,
            init_method=(
                condition_init_method(config, init_method)
                if not config.use_cpu_initialization
                else lambda w: None
            ),
            bias=bias,
            skip_bias_add=skip_bias_add,
            is_expert=is_expert,
            skip_weight_param_allocation=skip_weight_param_allocation,
            tp_comm_buffer_name=tp_comm_buffer_name,
            symmetric_ar_type=config.symmetric_ar_type,
            tp_group=tp_group,
        )

        # Set proper partition_stride
        setattr(self.weight, 'partition_stride', stride)
        if bias and hasattr(self, 'bias') and self.bias is not None:
            setattr(self.bias, 'partition_stride', stride)

        if config.use_cpu_initialization:
            output_size_per_partition = divide(output_size, world_size)
            _ = _initialize_affine_weight_cpu(
                self.weight,
                output_size,
                input_size,
                output_size_per_partition,
                0,
                init_method=condition_init_method(config, init_method),
                stride=stride,
                return_master_weight=False,
                rank=rank,
                world_size=world_size,
                skip_set_tensor_parallel_attributes=True,
            )
            if bias:
                self.bias = Parameter(
                    torch.empty(output_size_per_partition, dtype=config.params_dtype)
                )
                set_tensor_model_parallel_attributes(self.bias, True, 0, stride)
                with torch.no_grad():
                    self.bias.zero_()
                setattr(self.bias, "allreduce", True)

    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
        """Sharding along axis 0, bias sharded"""
        state_dict = self.state_dict(prefix="", keep_vars=True)
        return make_sharded_tensors_for_checkpoint(
            state_dict,
            prefix,
            {"weight": 0, "bias": 0},
            sharded_offsets,
            tp_group=self._tp_group,
            dp_cp_group=metadata["dp_cp_group"],
        )

    @override
    def extra_repr(self) -> str:
        """Extra context to add to the module's string representation."""
        return (
            f"in_features={self.in_features}, "
            f"out_features={self.out_features}, "
            f"bias={self.use_bias}, "
            f"TP={self.tp_size}"
        )

    def backward_dw(self):
        """Compute weight gradients during the backward pass if delay_wgrad_compute is enabled."""
        if self.config.delay_wgrad_compute:
            super().backward_dw()


class TERowParallelLinear(TELinear):
    """Wrapper for the Transformer-Engine's `Linear` layer
    but specialized similar to megatron's `RowParallelLinear` layer."""

    def __init__(
        self,
        input_size: int,
        output_size: int,
        *,
        config: ModelParallelConfig,
        init_method: Callable,
        bias: bool,
        input_is_parallel: bool,
        skip_bias_add: bool,
        is_expert: bool,
        tp_comm_buffer_name: Optional[str] = None,
        tp_group: Optional[torch.distributed.ProcessGroup] = None,
    ):
        if not HAVE_TE:
            raise ImportError(
                "Transformer Engine is not installed. "
                "Please install it with `pip install transformer-engine`."
            )

        if not input_is_parallel:
            raise ValueError(
                "Transformer Engine linear layers do not support input_is_parallel = False"
            )
        tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
        self._tp_group = tp_group

        super().__init__(
            input_size=input_size,
            output_size=output_size,
            parallel_mode="row",
            config=config,
            init_method=(
                condition_init_method(config, init_method)
                if not config.use_cpu_initialization
                else lambda w: None
            ),
            bias=bias,
            skip_bias_add=skip_bias_add,
            skip_weight_param_allocation=False,
            # We don't currently use this for row parallel layers # pylint: disable=line-too-long
            is_expert=is_expert,
            tp_comm_buffer_name=tp_comm_buffer_name,
            symmetric_ar_type=config.symmetric_ar_type,
            tp_group=tp_group,
        )
        if config.use_cpu_initialization:
            world_size = get_pg_size(tp_group)
            rank = get_pg_rank(tp_group)
            input_size_per_partition = divide(input_size, world_size)
            self.master_weight = _initialize_affine_weight_cpu(
                self.weight,
                output_size,
                input_size,
                input_size_per_partition,
                1,
                init_method=condition_init_method(config, init_method),
                stride=1,
                return_master_weight=False,
                params_dtype=config.params_dtype,
                rank=rank,
                world_size=world_size,
                skip_set_tensor_parallel_attributes=True,
            )
            if bias:
                self.bias = Parameter(torch.empty(output_size, dtype=config.params_dtype))
                # Always initialize bias to zero.
                with torch.no_grad():
                    self.bias.zero_()
                setattr(self.bias, "allreduce", True)
                setattr(self.bias, "sequence_parallel", config.sequence_parallel)

    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
        """Sharding along axis 1, bias not sharded"""
        state_dict = self.state_dict(prefix="", keep_vars=True)
        return make_sharded_tensors_for_checkpoint(
            state_dict,
            prefix,
            {"weight": 1},
            sharded_offsets,
            tp_group=self._tp_group,
            dp_cp_group=metadata["dp_cp_group"],
        )

    @override
    def extra_repr(self) -> str:
        """Extra context to add to the module's string representation."""
        return (
            f"in_features={self.in_features}, "
            f"out_features={self.out_features}, "
            f"bias={self.use_bias}, "
            f"TP={self.tp_size}"
        )

    def backward_dw(self):
        """Compute weight gradients during the backward pass if delay_wgrad_compute is enabled."""
        if self.config.delay_wgrad_compute:
            super().backward_dw()


class TEDotProductAttention(te.pytorch.DotProductAttention):
    """Wrapper for the Transformer-Engine's `DotProductAttention` layer
    that also has "flash attention" enabled.

    Note that if Megatron's parallel_state has not been initialized yet, the
    tp_group and cp_group passed to TE will be None and must be set later
    via set_tensor_parallel_group() and set_context_parallel_group().
    """

    cp_stream: torch.cuda.Stream = None

    def __init__(
        self,
        config: TransformerConfig,
        layer_number: int,
        attn_mask_type: AttnMaskType,
        attention_type: str,
        attention_dropout: Optional[float] = None,
        softmax_scale: Optional[float] = None,
        k_channels: Optional[int] = None,
        v_channels: Optional[int] = None,
        num_splits: Optional[int] = None,
        cp_comm_type: Optional[str] = "p2p",
        pg_collection: Optional[ProcessGroupCollection] = None,
    ):
        if not HAVE_TE:
            raise ImportError(
                "Transformer Engine is not installed. "
                "Please install it with `pip install transformer-engine`."
            )

        self.config = config
        self.te_forward_mask_type = False
        self.qkv_format: str = "sbhd"
        # Default to 1 split when batch-invariant mode is enabled, unless explicitly overridden
        self.num_splits: Optional[int] = (
            1 if (num_splits is None and self.config.batch_invariant_mode) else num_splits
        )

        if self.config.apply_query_key_layer_scaling != bool(
            int(os.getenv("NVTE_APPLY_QK_LAYER_SCALING", "0"))
        ):
            raise ValueError(
                f"apply_query_key_layer_scaling is {self.config.apply_query_key_layer_scaling} "
                f"but environment variable NVTE_APPLY_QK_LAYER_SCALING is "
                f"{os.getenv('NVTE_APPLY_QK_LAYER_SCALING')}. Transformer Engine does not support "
                f"setting query key layer scaling via argument, so these two must match."
            )

        extra_kwargs: dict[str, Any] = {}
        if is_te_min_version("0.11.0"):
            extra_kwargs["num_gqa_groups"] = self.config.num_query_groups
        elif self.config.num_query_groups != self.config.num_attention_heads:
            raise ValueError(
                f"Transformer Engine v{get_te_version()} does not support Grouped Query Attention, "
                f"use a newer version of Transformer Engine. "
                f"(num_query_groups ({self.config.num_query_groups}) != "
                f"num_attention_heads ({self.config.num_attention_heads}))"
            )

        if pg_collection is None:
            pg_collection = ProcessGroupCollection(
                tp=get_tensor_model_parallel_group(check_initialized=False),
                cp=get_context_parallel_group(check_initialized=False),
                hcp=get_hierarchical_context_parallel_groups(check_initialized=False),
            )
        else:
            assert hasattr(
                pg_collection, "tp"
            ), "TEDotProductAttention pg_collection must have tp pg"
            assert hasattr(
                pg_collection, "cp"
            ), "TEDotProductAttention pg_collection must have cp pg"
            if cp_comm_type == "a2a+p2p":
                assert hasattr(
                    pg_collection, "hcp"
                ), "TEDotProductAttention pg_collection must have hierarchical cp pg"
        self._tp_group = pg_collection.tp

        if is_te_min_version("0.10.0"):
            extra_kwargs["attention_type"] = attention_type
            # older version don't need attention_type

        if is_te_min_version("0.12.0", check_equality=False):
            self.te_forward_mask_type = True

        # This check is important as CP config can be disabled while having a valid CP group
        # Example - Disabling CP for encoder while a valid CP group exists for decoder
        if self.config.context_parallel_size > 1:
            assert is_te_min_version(
                "1.0.0"
            ), "Only Transformer-Engine version >= 1.0.0 supports context parallelism!"
            if getattr(TEDotProductAttention, "cp_stream") is None:
                TEDotProductAttention.cp_stream = torch.cuda.Stream()
            extra_kwargs["cp_group"] = pg_collection.cp
            extra_kwargs["cp_global_ranks"] = torch.distributed.get_process_group_ranks(
                pg_collection.cp
            )
            extra_kwargs["cp_stream"] = TEDotProductAttention.cp_stream
            if is_te_min_version("1.10.0"):
                if cp_comm_type is None:
                    extra_kwargs["cp_comm_type"] = "p2p"
                elif cp_comm_type == "a2a+p2p":
                    assert is_te_min_version("1.12.0"), (
                        f"Transformer-Engine v{get_te_version()} must be >= 1.12.0 to support"
                        "hierarchical cp commucation."
                    )
                    extra_kwargs["cp_comm_type"] = "a2a+p2p"
                    extra_kwargs["cp_group"] = get_hierarchical_context_parallel_groups(
                        check_initialized=False
                    )
                else:
                    extra_kwargs["cp_comm_type"] = cp_comm_type

        if self.config.deterministic_mode:
            if int(os.getenv("NVTE_ALLOW_NONDETERMINISTIC_ALGO", "1")) != 0:
                raise RuntimeError(
                    "deterministic_mode is on and we are using DotProductAttention from "
                    "Transformer Engine, but NVTE_ALLOW_NONDETERMINISTIC_ALGO is not 0. "
                    f"Currently set to: {os.getenv('NVTE_ALLOW_NONDETERMINISTIC_ALGO', 'not set')}."
                )

        if is_layer_window_attention(
            config.window_size, config.window_attn_skip_freq, layer_number
        ):
            # Check version
            assert is_te_min_version("1.2.0"), (
                f"Transformer-Engine v{get_te_version()} must be >= 1.2.0 to support"
                "sliding window attention."
            )
            extra_kwargs["window_size"] = config.window_size

        if is_te_min_version("1.10.0"):
            # TE 1.10.0 introduces the ability to set the different k and v channels
            kv_channels = (
                (k_channels, v_channels)
                if k_channels is not None and v_channels is not None
                else self.config.kv_channels
            )
            extra_kwargs["softmax_scale"] = softmax_scale
        else:
            kv_channels = self.config.kv_channels

        if self.config.softmax_type != "vanilla":
            assert is_te_min_version("2.8.0"), (
                f"Transformer-Engine v{get_te_version()} must be >= 2.8.0 to support"
                "`softmax_type`."
            )
            extra_kwargs["softmax_type"] = self.config.softmax_type

        self.kept_packed_seq_params = set(
            field.name for field in dataclasses.fields(PackedSeqParams)
        )

        if get_te_version() < PkgVersion("1.3.0"):
            # TE 1.3.0 introduces precomputing max_seqlen to remove unnecessary kernels and D2H
            # copies (#555)
            # These two arguments did not exist prior to 1.3.0
            self.kept_packed_seq_params.discard("max_seqlen_q")
            self.kept_packed_seq_params.discard("max_seqlen_kv")

        if get_te_version() < PkgVersion("1.10.0"):
            # TE 1.8.0 introduces cu_seqlens_padded which is the cu_seqlens with paddings counted
            # in each individual sequence in THD format dataset
            # These two arguments did not exist prior to 1.8.0. Full support added in 1.10.0 (#1012)
            self.kept_packed_seq_params.discard("cu_seqlens_q_padded")
            self.kept_packed_seq_params.discard("cu_seqlens_kv_padded")

        # total_tokens and seq_idx are only for Mamba and should not be forwarded to TE attention.
        self.kept_packed_seq_params.discard("total_tokens")
        self.kept_packed_seq_params.discard("seq_idx")

        if config.qk_clip or config.log_max_attention_logit:
            # qk-clip is only supported in TE 2.9.0 and later
            assert is_te_min_version("2.9.0"), "qk-clip is only supported in TE 2.9.0 and later"

            # TE 2.9.0 introduces return_max_logit for qk-clip getting the max attention logits
            extra_kwargs["return_max_logit"] = True
            self.current_max_attn_logits = None

        super().__init__(
            num_attention_heads=self.config.num_attention_heads,
            kv_channels=kv_channels,
            attention_dropout=(
                self.config.attention_dropout if attention_dropout is None else attention_dropout
            ),
            attn_mask_type=attn_mask_type.name,
            sequence_parallel=self.config.sequence_parallel,
            tp_size=self.config.tensor_model_parallel_size,
            get_rng_state_tracker=(
                get_cuda_rng_tracker if get_cuda_rng_tracker().is_initialized() else None
            ),
            tp_group=pg_collection.tp,
            layer_number=layer_number,
            **extra_kwargs,
        )

    def forward(
        self,
        query: Tensor,
        key: Tensor,
        value: Tensor,
        attention_mask: Optional[Tensor],
        attn_mask_type: AttnMaskType,
        attention_bias: Optional[Tensor] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
        num_splits: Optional[int] = None,
    ) -> torch.Tensor:
        """Forward."""
        if packed_seq_params is not None:
            # If Dynamic CP group is provided, update TE DPA CP group
            if packed_seq_params.cp_group is not None:
                self.cp_group = packed_seq_params.cp_group
                super().set_context_parallel_group(
                    self.cp_group,
                    torch.distributed.get_process_group_ranks(self.cp_group),
                    TEDotProductAttention.cp_stream,
                    self.cp_comm_type,
                )
            # If cp_group is None but local_cp_size is provided,
            # Indicates to turn off CP dynamically
            elif packed_seq_params.local_cp_size is not None:
                assert (
                    packed_seq_params.local_cp_size == 1
                ), "local_cp_size must be == 1 if provided without cp_group"
                super().set_context_parallel_group(None, None, None, self.cp_comm_type)
            self.kept_packed_seq_params.discard("cp_group")
            self.kept_packed_seq_params.discard("local_cp_size")

        # Default to constructor-provided num_splits unless explicitly overridden
        if num_splits is None:
            num_splits = self.num_splits
        if num_splits is not None:
            assert is_te_min_version("2.10.0"), (
                f"Transformer-Engine v{get_te_version()} must be >= 2.10.0 to support" "num_splits."
            )

        packed_seq_kwargs = (
            {key: getattr(packed_seq_params, key) for key in self.kept_packed_seq_params}
            if packed_seq_params is not None
            else {}
        )
        qkv_format = packed_seq_kwargs.get('qkv_format', self.qkv_format)

        attention_bias_kwargs = {}
        if attention_bias is not None:
            assert is_te_min_version("1.2.0"), (
                f"Transformer-Engine v{get_te_version()} must be >= 1.2.0 to support"
                "`attention_bias`."
            )
            attention_bias_kwargs = dict(
                core_attention_bias_type="post_scale_bias", core_attention_bias=attention_bias
            )

        if attn_mask_type == AttnMaskType.no_mask and self.config.window_size is not None:
            if (qkv_format == "bshd" and query.size(1) == 1) or (
                qkv_format == "sbhd" and query.size(0) == 1
            ):
                #  need to change mask type for SWA inference decode stage.
                attn_mask_type = AttnMaskType.causal_bottom_right
        if self.te_forward_mask_type:
            if qkv_format == "thd" and is_te_min_version("1.7.0"):
                # thd format uses flash attention with cuDNN kernel which requires is_padding=True,
                # so the only acceptable mask types are `padding_causal` and `padding`. These do not
                # necessarily indicate there are padded tokens in the sequence.
                if attn_mask_type == AttnMaskType.causal:
                    attn_mask_type = AttnMaskType.padding_causal
                elif attn_mask_type == AttnMaskType.no_mask:
                    attn_mask_type = AttnMaskType.padding
            _fa_kwargs = dict(
                attn_mask_type=attn_mask_type.name, **attention_bias_kwargs, **packed_seq_kwargs
            )
            if num_splits is not None:
                _fa_kwargs["num_splits"] = num_splits

            core_attn_out = super().forward(query, key, value, attention_mask, **_fa_kwargs)

            if self.config.qk_clip or self.config.log_max_attention_logit:
                # qk-clip is only supported in TE 2.9.0 and later
                assert is_te_min_version("2.9.0"), "qk-clip is only supported in TE 2.9.0 and later"

                # Update Q K outside of TE Attention API
                core_attn_out, batch_max_attention_logits = core_attn_out

                # Update QK_Clip balancing eta
                if self.current_max_attn_logits is None:
                    self.current_max_attn_logits = batch_max_attention_logits
                else:
                    self.current_max_attn_logits = torch.max(
                        self.current_max_attn_logits, batch_max_attention_logits
                    )

        else:
            _fa_kwargs = dict(**attention_bias_kwargs, **packed_seq_kwargs)
            if num_splits is not None:
                _fa_kwargs["num_splits"] = num_splits
            core_attn_out = super().forward(query, key, value, attention_mask, **_fa_kwargs)

        return core_attn_out

    def sharded_state_dict(
        self,
        prefix: str = '',
        sharded_offsets: Tuple[Tuple[int, int, int]] = (),
        metadata: Optional[dict] = None,
    ) -> ShardedStateDict:
        """Sharded state dict for the learnable softmax offset parameter"""
        if self.config.softmax_type == "learnable":
            state_dict = self.state_dict(prefix="", keep_vars=True)
        else:
            state_dict = {}
        return make_sharded_tensors_for_checkpoint(
            state_dict,
            prefix,
            {'softmax_offset': 0},
            sharded_offsets,
            tp_group=self._tp_group,
            dp_cp_group=metadata["dp_cp_group"],
        )


if HAVE_TE and is_te_min_version("1.9.0.dev0"):

    class TEGroupedLinear(te.pytorch.GroupedLinear):
        """
        Wrapper for the Transformer-Engine's `GroupedLinear` layer.

        Note that if Megatron's parallel_state has not been initialized
        yet, the tp_group passed to TE will be None and must be set later
        via set_tensor_parallel_group().
        """

        def __init__(
            self,
            num_gemms: int,
            input_size: int,
            output_size: int,
            *,
            parallel_mode: Optional[str],
            config: ModelParallelConfig,
            init_method: Callable,
            bias: bool,
            skip_bias_add: bool,
            is_expert: bool = False,
            tp_comm_buffer_name: Optional[str] = None,
            pg_collection: Optional[ProcessGroupCollection] = None,
        ):
            self.config = config

            # TE returns a zero length Tensor when bias=False and
            # return_bias=True, but we prefer None.  So in that case we
            # tell TE to not return the bias, and return None
            # ourselves. This way our forward always returns two values
            # and we don't have to deal with the zero length Tensor.
            self.te_return_bias = skip_bias_add and bias
            self.is_first_microbatch = True
            self.disable_parameter_transpose_cache = self.config.disable_parameter_transpose_cache

            extra_kwargs = _get_extra_te_kwargs(config)

            if self.config.delay_wgrad_compute:
                if is_te_min_version("2.3.0"):
                    extra_kwargs["delay_wgrad_compute"] = self.config.delay_wgrad_compute
                else:
                    raise RuntimeError(
                        "Only TE with version >=2.3.0 supports delay_wgrad_compute now."
                    )

            extra_kwargs["ub_name"] = tp_comm_buffer_name

            self.expert_parallel = self.config.expert_model_parallel_size > 1
            if is_expert:
                extra_kwargs["rng_tracker_name"] = get_expert_parallel_rng_tracker_name()

            # The comms between TP and EP group is explicitly handled by MoE token dispatcher.
            # So we disable comms by making TE agnostic of model parallel.
            if pg_collection is None:
                pg_collection = ProcessGroupCollection.use_mpu_process_groups()
            self._pg_collection = pg_collection
            assert is_expert, "TEGroupedLinear only supports expert parallelism"
            tp_group = pg_collection.expt_tp
            self._tp_group = tp_group
            tp_size = get_pg_size(tp_group)
            tp_group_for_te = tp_group

            self.explicit_expert_comm = is_expert and (tp_size > 1 or self.expert_parallel)

            # Save original parallel_mode before clearing it for explicit_expert_comm.
            # When explicit_expert_comm is True, Megatron handles TP communication externally
            # and passes parallel_mode=None to TE. This causes TE to set partition_dim=0 on
            # all weights (its default for non-parallel mode). We need to fix this after init
            # so that refit/resharding can correctly identify which dimension is TP-partitioned.
            original_parallel_mode = parallel_mode

            if self.explicit_expert_comm:
                if parallel_mode == "column":
                    output_size = divide(output_size, tp_size)
                elif parallel_mode == "row":
                    input_size = divide(input_size, tp_size)
                parallel_mode = None
                tp_size = 1
                tp_group_for_te = None

            super().__init__(
                num_gemms=num_gemms,
                in_features=input_size,
                out_features=output_size,
                sequence_parallel=self.config.sequence_parallel,
                fuse_wgrad_accumulation=self.config.gradient_accumulation_fusion,
                tp_group=tp_group_for_te if torch.distributed.is_initialized() else None,
                tp_size=tp_size,
                get_rng_state_tracker=(
                    get_cuda_rng_tracker if get_cuda_rng_tracker().is_initialized() else None
                ),
                init_method=condition_init_method(config, init_method),
                bias=bias,
                return_bias=self.te_return_bias,
                parallel_mode=parallel_mode,
                **extra_kwargs,
            )
            self.te_quant_params: Optional[TEQuantizationParams] = None
            for param in self.parameters():
                setattr(param, "allreduce", not (is_expert and self.expert_parallel))

            # Explicitly stamp partition_dim and partition_stride on expert weight
            # tensors when explicit_expert_comm cleared parallel_mode.  TE ≤2.12
            # set these internally; TE ≥2.13 no longer does (parallel_mode=None
            # is passed due to explicit_expert_comm).  The resharding/refit planner
            # relies on partition_dim to correctly plan TP gather/scatter operations.
            # NOTE: we intentionally do NOT stamp tensor_model_parallel here —
            # doing so would change num-zeros gradient counting.
            if self.explicit_expert_comm and original_parallel_mode in ("column", "row"):
                part_dim = 0 if original_parallel_mode == "column" else 1
                for i in range(num_gemms):
                    weight = getattr(self, f"weight{i}", None)
                    if weight is not None:
                        setattr(weight, "partition_dim", part_dim)
                        setattr(weight, "partition_stride", 1)

            def merge_extra_states(
                self,
                state_dict,
                prefix,
                local_metadata,
                strict,
                missing_keys,
                unexpected_keys,
                error_msgs,
            ):
                """
                Merge multiple "_extra_state" into one.
                """
                self.init_fp8_metadata(num_gemms=self.num_gemms)
                # When resume training, loading ckpt is out of fp8_autocast context.
                # So we need to manually detect from the state_dict.
                fp8_checkpoint = any("_extra_state" in str(key) for key in state_dict.keys())

                if not fp8_checkpoint:
                    return

                try:
                    state_list = [
                        state_dict.pop(f"{prefix}_extra_state{i}") for i in range(1, self.num_gemms)
                    ]
                except KeyError:
                    # "_extra_state{i}" only exists for dist-ckpt. Return for torch native ckpt.
                    return

                # Early return conditions:
                # 1. Empty state_dict
                # 2. Empty state_list
                # 3. _extra_state is None
                # 4. _extra_state does not contain any information
                if (
                    not state_dict
                    or not state_list
                    or state_dict.get(f"{prefix}_extra_state") is None
                    or self._decode_extra_state(state_dict[f"{prefix}_extra_state"]) is None
                ):
                    return

                state_list = [state_dict.pop(f"{prefix}_extra_state")] + state_list
                state_list = [self._decode_extra_state(state) for state in state_list]
                extra_fp8_variables = state_list[0]["extra_fp8_variables"]
                extra_fp8_variables["num_gemms"] = self.num_gemms
                extra_state = {"extra_fp8_variables": extra_fp8_variables}
                # TE 2.0 adds recipe in extra_state
                if is_te_min_version("2.0.0"):
                    self.fp8_meta["recipe"] = state_list[0]["recipe"]
                    extra_state["recipe"] = self.fp8_meta["recipe"]
                # Only delayed scaling has global fp8 meta tensors. We're not using
                # self.fp8_meta["recipe"].delayed() because it's available in TE 2.0 and later.
                if isinstance(self.fp8_meta["recipe"], te.common.recipe.DelayedScaling):
                    extra_state.update(
                        {
                            "scale_fwd": torch.cat(
                                [state["scale_fwd"].view(-1, 1) for state in state_list], dim=1
                            ).view(-1),
                            "amax_history_fwd": torch.cat(
                                [state["amax_history_fwd"].view(-1, 1) for state in state_list],
                                dim=1,
                            ).view(self.fp8_meta["recipe"].amax_history_len, -1),
                            "scale_bwd": torch.cat(
                                [state["scale_bwd"].view(-1, 1) for state in state_list], dim=1
                            ).view(-1),
                            "amax_history_bwd": torch.cat(
                                [state["amax_history_bwd"].view(-1, 1) for state in state_list],
                                dim=1,
                            ).view(self.fp8_meta["recipe"].amax_history_len, -1),
                        }
                    )
                    # TE 2.0 removes scale_inv_fwd and scale_inv_bwd
                    if not is_te_min_version("2.0.0"):
                        extra_state.update(
                            {
                                "scale_inv_fwd": torch.cat(
                                    [state["scale_inv_fwd"].view(-1, 1) for state in state_list],
                                    dim=1,
                                ).view(-1),
                                "scale_inv_bwd": torch.cat(
                                    [state["scale_inv_bwd"].view(-1, 1) for state in state_list],
                                    dim=1,
                                ).view(-1),
                            }
                        )
                state_dict[f"{prefix}_extra_state"] = self._encode_extra_state(extra_state)

            self._register_load_state_dict_pre_hook(merge_extra_states, with_module=True)

        def finish_init(self, quantization_config: QuantizationConfig):
            """Post-init of quantization override"""
            if quantization_config is None:
                self.te_quant_params = None
            else:
                self.te_quant_params = TEQuantizationParams.parse_from_config(quantization_config)

        def will_execute_quantized(self, is_context_quantized: bool) -> bool:
            """Returns whether the module is configured to execute quantized."""
            return _get_should_context_be_quantized_params(
                self.te_quant_params, self.training, is_context_quantized
            )

        def forward(self, x, m_splits):
            """Forward."""
            _is_first_microbatch = (
                None if self.disable_parameter_transpose_cache else self.is_first_microbatch
            )
            quant_context = _get_fp8_autocast_for_quant_params(self.te_quant_params, self.training)

            with quant_context:
                out = super().forward(x, m_splits, is_first_microbatch=_is_first_microbatch)
            self.is_first_microbatch = False

            # TE only returns a tuple when return_bias is True, otherwise
            # it returns a single Tensor, we always want to return two
            # values regardless of the arguments.
            if self.te_return_bias:
                return out
            return out, None

        def _encode_extra_state(self, state):
            # TE 2.0 changed the format of extra_state to be a byte tensor
            if is_te_min_version("2.0.0"):
                torch.cuda.synchronize()
                state_serialized = bytearray(pickle.dumps(state))
                state_serialized = torch.frombuffer(state_serialized, dtype=torch.uint8)
            else:
                state_serialized = io.BytesIO()
                torch.save(state, state_serialized)
            return state_serialized

        def _decode_extra_state(self, state):
            if isinstance(state, torch.Tensor):
                # No FP8 is indicated by an empty tensor we don't need to unpickle.
                if state.numel() == 0:
                    return
                return pickle.loads(state.detach().cpu().numpy().tobytes())
            elif isinstance(state, io.BytesIO):
                state.seek(0)
                return torch.load(state, map_location="cuda")
            else:
                raise RuntimeError("Unsupported checkpoint format.")

        def _split_extra_state(self, state):
            fp8_checkpoint = self.fp8_meta["fp8_checkpoint"] or self.fp8 or self.fp8_calibration

            if not fp8_checkpoint:
                return [state] * self.num_gemms

            state = self._decode_extra_state(state)
            extra_states = []
            extra_fp8_variables = state["extra_fp8_variables"]
            extra_fp8_variables["num_gemms"] = 1
            for gemm_idx in range(self.num_gemms):
                tmp_state = {"extra_fp8_variables": extra_fp8_variables}
                # TE 2.0 adds recipe in extra_state
                if is_te_min_version("2.0.0"):
                    tmp_state["recipe"] = state["recipe"]
                # Only delayed scaling has global fp8 meta tensors. We're not using
                # self.fp8_meta["recipe"].delayed() because it's available in TE 2.0 and later.
                if isinstance(self.fp8_meta["recipe"], te.common.recipe.DelayedScaling):
                    tmp_state.update(
                        {
                            "scale_fwd": state["scale_fwd"].view(3, -1)[:, gemm_idx],
                            "amax_history_fwd": state["amax_history_fwd"].view(
                                self.fp8_meta["recipe"].amax_history_len, 3, -1
                            )[:, :, gemm_idx],
                            "scale_bwd": state["scale_bwd"].view(2, -1)[:, gemm_idx],
                            "amax_history_bwd": state["amax_history_bwd"].view(
                                self.fp8_meta["recipe"].amax_history_len, 2, -1
                            )[:, :, gemm_idx],
                        }
                    )
                    # TE 2.0 removes scale_inv_fwd and scale_inv_bwd
                    if not is_te_min_version("2.0.0"):
                        tmp_state.update(
                            {
                                "scale_inv_fwd": state["scale_inv_fwd"].view(3, -1)[:, gemm_idx],
                                "scale_inv_bwd": state["scale_inv_bwd"].view(2, -1)[:, gemm_idx],
                            }
                        )
                extra_states.append(self._encode_extra_state(tmp_state))
            return extra_states

        def _sharded_state_dict_grouped(
            self, tp_axis_map, prefix="", sharded_offsets=(), metadata=None
        ):
            """
            prefix should be module_name to make keys identical to sequetial ones.
            """
            singleton_local_shards = (metadata or {}).get('singleton_local_shards', False)
            sharded_state_dict = {}
            full_state_dict = self.state_dict(prefix="", keep_vars=True)
            num_global_experts = get_pg_size(self._pg_collection.ep) * self.num_gemms
            local_expert_indices_offset = get_pg_rank(self._pg_collection.ep) * self.num_gemms
            ep_axis = len(sharded_offsets)
            extra_states = self._split_extra_state(full_state_dict["_extra_state"])
            for gemm_idx in range(self.num_gemms):
                global_expert_idx = local_expert_indices_offset + gemm_idx
                state_dict = {
                    f"{gemm_idx}.weight": full_state_dict[f"weight{gemm_idx}"],
                    f"{gemm_idx}._extra_state": extra_states[gemm_idx],
                }
                if self.use_bias:
                    state_dict[f"{gemm_idx}.bias"] = full_state_dict[f"bias{gemm_idx}"]
                if singleton_local_shards:
                    expert_prefix = f"{global_expert_idx}.{prefix}"
                    new_sharded_offsets = sharded_offsets
                else:
                    expert_prefix = prefix
                    new_sharded_offsets = (
                        *sharded_offsets,
                        (ep_axis, global_expert_idx, num_global_experts),
                    )
                sub_sd = make_sharded_tensors_for_checkpoint(
                    state_dict,
                    '',
                    tp_axis_map,
                    new_sharded_offsets,
                    tp_group=self._tp_group,
                    dp_cp_group=metadata["dp_cp_group"],
                )
                # Remove expert layers indexing from sharded keys
                replace_prefix_for_sharding(sub_sd, f"{gemm_idx}.", expert_prefix)
                sharded_state_dict.update(
                    {
                        f"{prefix}weight{gemm_idx}": sub_sd[f"{gemm_idx}.weight"],
                        f"{prefix}_extra_state{'' if gemm_idx == 0 else gemm_idx}": sub_sd[
                            f"{gemm_idx}._extra_state"
                        ],
                    }
                )
                if self.use_bias:
                    sharded_state_dict[f"{prefix}bias{gemm_idx}"] = sub_sd[f"{gemm_idx}.bias"]
            # Adjust replica ids - replication along DP modulo EP
            for k, sh_ten in sharded_state_dict.items():
                replica_id = sh_ten.replica_id
                assert (
                    len(replica_id) == 3
                ), f"Expected replica_id for {k} to be in (PP, TP, DP) format, got: {replica_id}"
                if getattr(sh_ten, "is_data_parallel_fully_shard", False):
                    edp_replica_id = 0
                else:
                    edp_replica_id = get_pg_rank(self._pg_collection.expt_dp)
                sh_ten.replica_id = (*replica_id[:2], edp_replica_id)
            return sharded_state_dict

        def backward_dw(self):
            """
            Compute weight gradients during the backward pass
            if delay_wgrad_compute is enabled.
            """
            if self.config.delay_wgrad_compute:
                super().backward_dw()

    class TEColumnParallelGroupedLinear(TEGroupedLinear):
        """
        Wrapper for the Transformer-Engine's `GroupedLinear` layer but specialized
        to column-parallel style.
        """

        def __init__(
            self,
            num_gemms: int,
            input_size: int,
            output_size: int,
            *,
            config: ModelParallelConfig,
            init_method: Callable,
            bias: bool,
            skip_bias_add: bool,
            is_expert: bool,
            tp_comm_buffer_name: Optional[str] = None,
            pg_collection: Optional[ProcessGroupCollection] = None,
        ):
            super().__init__(
                num_gemms=num_gemms,
                input_size=input_size,
                output_size=output_size,
                parallel_mode="column",
                config=config,
                init_method=condition_init_method(config, init_method),
                bias=bias,
                skip_bias_add=skip_bias_add,
                is_expert=is_expert,
                tp_comm_buffer_name=tp_comm_buffer_name,
                pg_collection=pg_collection,
            )

        def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
            """
            For each gemm, sharding along axis 0, bias sharded.
            Assume sharded_offsets[-1] is the expert parallel offset.
            """
            tp_axis_map = {}
            for gemm_idx in range(self.num_gemms):
                tp_axis_map.update({f"{gemm_idx}.weight": 0, f"{gemm_idx}.bias": 0})
            return super()._sharded_state_dict_grouped(
                tp_axis_map, prefix, sharded_offsets, metadata
            )

    class TERowParallelGroupedLinear(TEGroupedLinear):
        """
        Wrapper for the Transformer-Engine's `GroupedLinear` layer but specialized
        to row-parallel style.
        """

        def __init__(
            self,
            num_gemms: int,
            input_size: int,
            output_size: int,
            *,
            config: ModelParallelConfig,
            init_method: Callable,
            bias: bool,
            skip_bias_add: bool,
            is_expert: bool,
            tp_comm_buffer_name: Optional[str] = None,
            pg_collection: Optional[ProcessGroupCollection] = None,
        ):
            super().__init__(
                num_gemms=num_gemms,
                input_size=input_size,
                output_size=output_size,
                parallel_mode="row",
                config=config,
                init_method=condition_init_method(config, init_method),
                bias=bias,
                skip_bias_add=skip_bias_add,
                is_expert=is_expert,
                tp_comm_buffer_name=tp_comm_buffer_name,
                pg_collection=pg_collection,
            )

        def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
            """
            For each gemm, sharding along axis 1, bias not sharded.
            Assume sharded_offsets[-1] is the expert parallel offset.
            """
            tp_axis_map = {f"{gemm_idx}.weight": 1 for gemm_idx in range(self.num_gemms)}
            return super()._sharded_state_dict_grouped(
                tp_axis_map, prefix, sharded_offsets, metadata
            )

else:
    TEGroupedLinear = None  # type: ignore[assignment, misc]
    TEColumnParallelGroupedLinear = None  # type: ignore[assignment, misc]
    TERowParallelGroupedLinear = None  # type: ignore[assignment, misc]


if HAVE_TE and is_te_min_version("1.13.0"):

    class TEFusedMLP(MLP):
        """MLP wrapper using Transformer Engine's operation-based API."""

        @copy_signature(MLP.__init__)
        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)

            # Fused implementation
            self._fused_impl: Optional[Tuple[te.pytorch.ops.Sequential]] = None

        def _make_fused_impl(self) -> te.pytorch.ops.Sequential:
            """Construct fused module matching MLP."""

            # Container for fusible ops
            fused_impl = te.pytorch.ops.Sequential()

            # Tensor parallelism configuration
            tp_world_size = get_tensor_model_parallel_world_size()
            tp_group = None
            if tp_world_size > 1:
                tp_group = get_tensor_model_parallel_group()

            # RNG state
            rng_state_tracker_function = None
            if get_cuda_rng_tracker().is_initialized():
                rng_state_tracker_function = get_cuda_rng_tracker

            # Check submodule types
            if not isinstance(self.linear_fc1, te.pytorch.LayerNormLinear):
                raise ValueError(
                    f"{self.__class__.__name__} expects FC1 to be "
                    "Transformer Engine LayerNormLinear, but found "
                    f"{self.linear_fc1.__class__.__name__}."
                )
            if not isinstance(self.linear_fc2, te.pytorch.Linear):
                raise ValueError(
                    f"{self.__class__.__name__} expects FC1 to be "
                    "Transformer Engine Linear, but found "
                    f"{self.linear_fc2.__class__.__name__}."
                )

            # Norm op
            norm_type = self.linear_fc1.normalization
            norm_shape = self.linear_fc1.weight.size(1)
            kwargs = {
                "eps": self.linear_fc1.eps,
                "device": "meta",
                "dtype": self.linear_fc1.layer_norm_weight.dtype,
                "zero_centered_gamma": self.linear_fc1.zero_centered_gamma,
            }
            op = None
            if norm_type == "LayerNorm":
                op = te.pytorch.ops.LayerNorm(norm_shape, **kwargs)
                op.weight = self.linear_fc1.layer_norm_weight
                op.bias = self.linear_fc1.layer_norm_bias
            elif norm_type == "RMSNorm":
                op = te.pytorch.ops.RMSNorm(norm_shape, **kwargs)
                op.weight = self.linear_fc1.layer_norm_weight
            else:
                raise ValueError(f"Unsupported normalization ({norm_type})")
            fused_impl.append(op)

            # FC1 linear op
            weight = self.linear_fc1.weight
            userbuffers_options = None
            if self.linear_fc1.config.tp_comm_overlap and self.linear_fc1.ub_name is not None:
                userbuffers_options = {"comm_name": self.linear_fc1.ub_name}
            op = te.pytorch.ops.BasicLinear(
                weight.size(1),
                weight.size(0) * tp_world_size,
                device="meta",
                dtype=weight.dtype,
                tensor_parallel_mode="column" if tp_world_size > 1 else None,
                tensor_parallel_group=tp_group,
                sequence_parallel=self.linear_fc1.sequence_parallel,
                rng_state_tracker_function=rng_state_tracker_function,
                accumulate_into_main_grad=self.linear_fc1.fuse_wgrad_accumulation,
                userbuffers_options=userbuffers_options,
            )
            op.weight = weight
            fused_impl.append(op)

            # FC1 bias op
            bias = self.linear_fc1.bias
            if isinstance(bias, torch.Tensor) and bias.numel() == 0:
                bias = None
            if bias is not None:
                op = te.pytorch.ops.Bias(bias.numel(), device="meta", dtype=bias.dtype)
                op.bias = bias
                fused_impl.append(op)

            # Activation op
            op = self._make_activation_op(
                self.activation_func,
                self.config.gated_linear_unit,
                self.config.activation_func_fp8_input_store,
            )
            fused_impl.append(op)

            # FC2 linear op
            weight = self.linear_fc2.weight
            userbuffers_options = None
            if self.linear_fc2.config.tp_comm_overlap and self.linear_fc2.ub_name is not None:
                userbuffers_options = {"comm_name": self.linear_fc2.ub_name}
            op = te.pytorch.ops.BasicLinear(
                weight.size(1),
                weight.size(0),
                device="meta",
                dtype=weight.dtype,
                rng_state_tracker_function=rng_state_tracker_function,
                accumulate_into_main_grad=self.linear_fc2.fuse_wgrad_accumulation,
                userbuffers_options=userbuffers_options,
            )
            op.weight = weight
            fused_impl.append(op)
            if tp_world_size > 1:
                if self.linear_fc2.sequence_parallel:
                    fused_impl.append(te.pytorch.ops.ReduceScatter(tp_group))
                else:
                    fused_impl.append(te.pytorch.ops.AllReduce(tp_group))

            # FC2 bias op
            if not self.linear_fc2.te_return_bias:
                bias = self.linear_fc2.bias
                if isinstance(bias, torch.Tensor) and bias.numel() == 0:
                    bias = None
                if bias is not None:
                    op = te.pytorch.ops.Bias(bias.numel(), device="meta", dtype=bias.dtype)
                    op.bias = bias
                    fused_impl.append(op)

            # Emulate submodule forward hooks if needed
            self._register_hooks_on_fused_impl(fused_impl)

            return fused_impl

        def _make_activation_op(
            self, activation_func: Callable, gated_linear_unit: bool, cache_quantized_input: bool
        ) -> te.pytorch.ops.FusibleOperation:
            """Construct activation op."""

            # Get op type
            op_type = None
            if (activation_func, gated_linear_unit) == (F.gelu, False):
                op_type = te.pytorch.ops.GELU
            elif (activation_func, gated_linear_unit) == (F.gelu, True):
                op_type = te.pytorch.ops.GEGLU
            elif (activation_func, gated_linear_unit) == (F.silu, False):
                if not is_te_min_version("2.8.0"):
                    raise NotImplementedError("SiLU activation requires Transformer Engine 2.8+")
                op_type = te.pytorch.ops.SiLU
            elif (activation_func, gated_linear_unit) == (F.silu, True):
                op_type = te.pytorch.ops.SwiGLU
            elif (activation_func, gated_linear_unit) == (F.relu, False):
                op_type = te.pytorch.ops.ReLU
            elif (activation_func, gated_linear_unit) == (F.relu, True):
                op_type = te.pytorch.ops.ReGLU

            # Could not find corresponding activation op
            if op_type is None:
                raise NotImplementedError(
                    "Transformer Engine operation-based API does not support "
                    f"activation_func={activation_func}, "
                    f"gated_linear_unit={gated_linear_unit}"
                )

            # Construct op
            kwargs = {}
            if is_te_min_version("2.3"):
                kwargs["cache_quantized_input"] = cache_quantized_input
            return op_type(**kwargs)

        def _register_hooks_on_fused_impl(self, fused_impl: torch.nn.Module) -> None:
            """Attempt to emulate submodule callback hooks.

            This is not always possible because Transformer Engine's
            op fuser does not expose intermediate tensors. Depending
            on what kernel fusions the op fuser chooses, the
            intermediate tensors may not even exist. Hooks that modify
            tensors will result in incorrect behavior.

            """

            # Get submodule hooks
            forward_pre_hooks = []
            forward_post_hooks = []
            backward_pre_hooks = []
            backward_post_hooks = []
            for submodule in self.modules():
                for hook in submodule._forward_pre_hooks.values():
                    forward_pre_hooks.append((submodule, hook))
                for hook in submodule._forward_hooks.values():
                    forward_post_hooks.append((submodule, hook))
                for hook in submodule._backward_pre_hooks.values():
                    backward_pre_hooks.append((submodule, hook))
                for hook in submodule._backward_hooks.values():
                    backward_post_hooks.append((submodule, hook))

            # Pre-forward hooks
            # Note: DDP pre-forward hooks are safe since they do not
            # interact with input tensor.
            if forward_pre_hooks:
                from megatron.core.distributed import distributed_data_parallel

                if any(
                    inspect.getmodule(hook) != distributed_data_parallel
                    for _, hook in forward_pre_hooks
                ):
                    warnings.warn(
                        "TEFusedMLP module has a submodule with a pre-forward hook. "
                        "TEFusedMLP module does not expose intermediate tensors, "
                        "so the hook may have incorrect behavior if it attempts to "
                        "access the input tensor."
                    )

                def forward_pre_hook(module, *_) -> None:
                    for submodule, hook in forward_pre_hooks:
                        # Assume that hook does not interact with input
                        ret = hook(submodule, None)
                        if ret is not None:
                            raise RuntimeError(
                                "TEFusedMLP module does not expose intermediate tensors, but "
                                "submodule has pre-forward hook that modifies input tensor."
                            )

                fused_impl.register_forward_pre_hook(forward_pre_hook)

            # Post-forward hooks
            if forward_post_hooks:
                warnings.warn(
                    "TEFusedMLP module has a submodule with a post-forward hook. "
                    "TEFusedMLP module does not expose intermediate tensors, "
                    "so the hook may have incorrect behavior if it attempts to "
                    "access the input or output tensors."
                )

                def forward_post_hook(module, *_) -> None:
                    for submodule, hook in forward_post_hooks:
                        # Assume that hook does not interact with input or output
                        ret = hook(submodule, None, None)
                        if ret is not None:
                            raise RuntimeError(
                                "TEFusedMLP module does not expose intermediate tensors, but "
                                "submodule has post-forward hook that modifies output tensor."
                            )

                fused_impl.register_forward_hook(forward_post_hook)

            # Backward hooks
            if backward_pre_hooks:
                raise RuntimeError(
                    "TEFusedMLP module does not support submodules with pre-backward hooks"
                )
            if backward_post_hooks:
                raise RuntimeError(
                    "TEFusedMLP module does not support submodules with post-backward hooks"
                )

        def forward(self, hidden_states: torch.Tensor, **kwargs) -> Tuple[Tensor, Optional[Tensor]]:
            """Forward."""

            # Construct fused impl if needed
            # Note: We initialize during the first forward pass in
            # case the params are modified after the constructor.
            # Note: The fused impl is stored in a tuple to avoid
            # registering as a submodule.
            if self._fused_impl is None:
                self._fused_impl = (self._make_fused_impl(),)

            # Apply fused impl
            out = self._fused_impl[0](hidden_states)

            # Return bias tensor if requested
            bias = None
            if self.linear_fc2.te_return_bias:
                bias = self.linear_fc2.bias
                if isinstance(bias, torch.Tensor) and bias.numel() == 0:
                    bias = None

            return out, bias

else:
    TEFusedMLP = None  # type: ignore[assignment, misc]


class TEDelayedScaling(te.common.recipe.DelayedScaling):
    """
    Wrapper for the Transformer-Engine's `DelayedScaling` layer.
    """

    def __init__(
        self,
        config: ModelParallelConfig,
        fp8_format: int,
        override_linear_precision: tuple = (False, False, False),
    ):
        if not HAVE_TE:
            raise ImportError(
                "Transformer Engine is not installed. "
                "Please install it with `pip install transformer-engine`."
            )

        extra_kwargs = _get_extra_te_kwargs(config)
        if is_te_min_version("1.6.0.dev0"):
            extra_kwargs["fp8_dpa"] = config.fp8_dot_product_attention
            extra_kwargs["fp8_mha"] = config.fp8_multi_head_attention
        if get_te_version() < PkgVersion("1.8.0"):
            extra_kwargs["interval"] = config.fp8_interval
        elif config.fp8_interval != 1:
            warnings.warn("fp8_interval is deprecated and ignored from Transformer-Engine v1.8.0.")

        super().__init__(
            margin=config.fp8_margin,
            fp8_format=fp8_format,
            amax_compute_algo=config.fp8_amax_compute_algo,
            amax_history_len=config.fp8_amax_history_len,
            override_linear_precision=override_linear_precision,
            **extra_kwargs,
        )


class TECudaRNGStatesTracker(te.pytorch.distributed.CudaRNGStatesTracker):
    """Wraps TransformerEngine's CudaRNGStatesTracker so that it is
    interchangeable with Megatron's RNG tracker"""

    def __init__(self, is_inference_rng_tracker=False):
        if not HAVE_TE:
            raise ImportError(
                "Transformer Engine is not installed. "
                "Please install it with `pip install transformer-engine`."
            )

        super().__init__()
        self.reset()
        self.is_inference_rng_tracker = is_inference_rng_tracker

    def is_initialized(self):
        """Checks if the internal RNG state has been set with set_states()."""
        return self._is_initialized

    def reset(self):
        """Reset the internal RNG state."""
        super().reset()
        self._is_initialized = False

    def set_states(self, states):
        """Set the internal RNG state."""
        super().set_states(states)
        self._is_initialized = True

    def add(self, name, seed):
        """Track the rng state."""
        super().add(name, seed)
        self._is_initialized = True


def te_checkpoint(
    forward_func, distribute_saved_activations, get_rng_state_tracker, tp_group, *args, **kwargs
):
    """Checkpointing with Transformer-Engine."""
    if not HAVE_TE:
        raise ImportError(
            "Transformer Engine is not installed. "
            "Please install it with `pip install transformer-engine`."
        )

    from transformer_engine.pytorch.distributed import checkpoint

    if is_te_min_version("1.5.0"):
        return checkpoint(
            forward_func,
            *args,
            distribute_saved_activations=distribute_saved_activations,
            get_rng_state_tracker=get_rng_state_tracker,
            tp_group=tp_group,
            **kwargs,
        )
    else:
        return checkpoint(
            forward_func, distribute_saved_activations, get_rng_state_tracker, tp_group, *args
        )


try:
    from transformer_engine.pytorch.attention import _SplitAlongDim

    SplitAlongDim = _SplitAlongDim.apply

except ImportError:
    SplitAlongDim = None

try:
    from transformer_engine.pytorch.cpu_offload import (
        get_cpu_offload_context as _get_cpu_offload_context,
    )

    def get_cpu_offload_context(
        enabled,
        num_layers,
        model_layers,
        activation_offloading,
        weight_offloading,
        double_buffering,
        retain_pinned_cpu_buffers,
    ):
        """Get CPU offload context and sync function."""
        if is_te_min_version("2.5.0"):
            # Enables the additional double buffering switch for activations during LLM training
            context, sync_func = _get_cpu_offload_context(
                enabled,
                num_layers,
                model_layers,
                activation_offloading,
                weight_offloading,
                double_buffering,
                retain_pinned_cpu_buffers=retain_pinned_cpu_buffers,
            )
        elif is_te_min_version("1.10.0.dev0"):
            context, sync_func = _get_cpu_offload_context(
                enabled, num_layers, model_layers, activation_offloading, weight_offloading
            )
        else:
            context, sync_func = _get_cpu_offload_context(
                enabled, num_layers, activation_offloading, weight_offloading
            )

        return context, sync_func

except ImportError:
    get_cpu_offload_context = None  # type: ignore[assignment, misc]

try:
    if HAVE_TE and is_te_min_version("2.3.0"):
        from transformer_engine.pytorch.attention.rope import apply_rotary_pos_emb
    else:
        from transformer_engine.pytorch.attention import apply_rotary_pos_emb

    def fused_apply_rotary_pos_emb(
        t: torch.Tensor,
        freqs: torch.Tensor,
        transpose_output_memory: bool = False,
        interleaved: bool = False,
    ) -> torch.Tensor:
        """Apply rotary positional embedding to input tensor T in `sbhd` format."""
        if transpose_output_memory:
            warnings.warn(
                "transpose_output_memory is not supported by TE's fused RoPE and will be ignored."
            )
        if is_te_min_version("2.3.0"):
            return apply_rotary_pos_emb(
                t, freqs, tensor_format="sbhd", interleaved=interleaved, fused=True
            )
        else:
            if interleaved:
                raise ValueError("Only TE >= 2.3.0 supports interleaved fused RoPE.")

            return apply_rotary_pos_emb(t, freqs, tensor_format="sbhd", fused=True)

    def fused_apply_rotary_pos_emb_thd(
        t: torch.Tensor,
        cu_seqlens: torch.Tensor,
        freqs: torch.Tensor,
        cp_size: int = 1,
        cp_rank: int = 0,
        interleaved: bool = False,
    ) -> torch.Tensor:
        """
        Apply rotary positional embedding to input tensor T in `thd` format with CP support.
        """
        if interleaved:
            assert is_te_min_version("2.3.0"), "Only TE >= 2.3.0 supports interleaved fused RoPE."

        if is_te_min_version("2.3.0", check_equality=True):
            return apply_rotary_pos_emb(
                t,
                freqs,
                tensor_format="thd",
                fused=True,
                cu_seqlens=cu_seqlens,
                cp_size=cp_size,
                cp_rank=cp_rank,
                interleaved=interleaved,
            )
        elif is_te_min_version("1.12.0", check_equality=True):
            return apply_rotary_pos_emb(
                t,
                freqs,
                tensor_format="thd",
                fused=True,
                cu_seqlens=cu_seqlens,
                cp_size=cp_size,
                cp_rank=cp_rank,
            )
        else:
            assert cp_size == 1, "Only TE >= 1.12 supports RoPE fusion for THD format with CP."
            return apply_rotary_pos_emb(
                t, freqs, tensor_format="thd", fused=True, cu_seqlens=cu_seqlens
            )

except ImportError:
    pass

try:
    from transformer_engine.pytorch import Fp8Padding, Fp8Unpadding  # pylint: disable=unused-import

except ImportError:
    Fp8Padding = None
    Fp8Unpadding = None

try:
    from transformer_engine.pytorch.permutation import (
        moe_permute,
        moe_permute_with_probs,
        moe_sort_chunks_by_index,
        moe_sort_chunks_by_index_with_probs,
        moe_unpermute,
    )

    fused_permute = moe_permute
    fused_permute_with_probs = moe_permute_with_probs
    fused_sort_chunks_by_index = moe_sort_chunks_by_index
    fused_sort_chunks_by_index_with_probs = moe_sort_chunks_by_index_with_probs
    fused_unpermute = moe_unpermute

except ImportError:
    fused_permute = None
    fused_permute_with_probs = None
    fused_sort_chunks_by_index = None
    fused_sort_chunks_by_index_with_probs = None
    fused_unpermute = None

try:
    from transformer_engine.pytorch.permutation import moe_permute_and_pad_with_probs

    fused_permute_and_pad_with_probs = moe_permute_and_pad_with_probs

except ImportError:
    fused_permute_and_pad_with_probs = None

try:
    from transformer_engine.pytorch.cross_entropy import parallel_cross_entropy

    _TE_SUPPORTS_CG_CAPTURABLE = is_te_min_version("2.7.0")
    current_te_version = get_te_version()

    def te_parallel_cross_entropy(
        logits: torch.Tensor,
        labels: torch.Tensor,
        tp_group: torch.distributed.ProcessGroup,
        is_cg_capturable: bool = False,
    ):
        """Wrapper function for TE's Cross Entropy Loss kernel"""
        if _TE_SUPPORTS_CG_CAPTURABLE:
            # According to TE CrossEntropyFunction, ignore_idx defaults to -100
            return parallel_cross_entropy(
                logits, labels, 0.0, False, tp_group, -100, is_cg_capturable
            )
        else:
            return parallel_cross_entropy(logits, labels, 0.0, False, tp_group)

except ImportError:
    te_parallel_cross_entropy = None  # type: ignore[assignment, misc]

try:
    from transformer_engine.pytorch.cpp_extensions import general_gemm

    try:
        from transformer_engine.pytorch.module.base import get_workspace

        _get_workspace = get_workspace
    except ImportError:
        _get_workspace = None

    def te_general_gemm(
        A: torch.Tensor,
        B: torch.Tensor,
        out_dtype: Optional[torch.dtype] = None,
        layout: str = "TN",
        out: Optional[torch.Tensor] = None,
        bias: Optional[torch.Tensor] = None,
        grad: bool = False,
    ) -> List[torch.Tensor]:
        """
        Wrapper for TE's general_gemm function.
        It supports fp32, bf16, fp16, and fp8 GEMMs with TN, NN, and NT layouts.
        The output dtype can be specified by `out_dtype`.
        Note: not all combinations of these settings are supported. If not supported,
        cublaslt will throw an error.
        """
        kwargs = dict(
            out_dtype=out_dtype,
            quantization_params=None,
            gelu=None,
            gelu_in=None,
            accumulate=False,
            layout=layout,
            out=out,
            bias=bias,
            use_split_accumulator=False,
            grad=grad,
            ub=None,
            ub_type=None,
            extra_output=None,
            bulk_overlap=False,
        )
        if _get_workspace is not None:
            kwargs["workspace"] = _get_workspace()
        return general_gemm(A, B, **kwargs)

except ImportError:
    te_general_gemm = None  # type: ignore[assignment, misc]


if HAVE_TE and is_te_min_version("2.7.0.dev"):
    from transformer_engine.pytorch.router import (  # pylint: disable=unused-import
        fused_compute_score_for_moe_aux_loss,
        fused_moe_aux_loss,
        fused_topk_with_score_function,
    )

else:
    fused_topk_with_score_function = None
    fused_compute_score_for_moe_aux_loss = None
    fused_moe_aux_loss = None


def set_save_original_input(module):
    """
    Set the module to save the original input tensors.

    Some transformer-engine modules would save the quantized tensors by default in fp8 training.
    This method is used to set these modules to save the original input tensors directly.

    This can save the memory usage in some FP8 training scenarios, such as the attn linear_proj and
    the shared experts.
    The output-discarding recompute method also relies on this.
    """
    if hasattr(module, 'save_original_input'):
        module.save_original_input = True
    else:
        raise ValueError(
            "set_save_original_input is only needed on transformer-engine modules that save "
            "quantized tensors by default. It needs transformer-engine>=2.6.0dev0."
        )


try:
    # pylint: disable=unused-import
    from transformer_engine.pytorch import cpu_offload_v1 as cpu_offload
except ImportError:
    try:
        from transformer_engine.pytorch import cpu_offload
    except ImportError:
        cpu_offload = None
try:
    # pylint: disable=unused-import
    from transformer_engine.pytorch.float8_tensor import Float8Tensor
except ImportError:
    Float8Tensor = None


================================================
FILE: megatron/core/extensions/transformer_engine_spec_provider.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
from __future__ import annotations

import warnings
from typing import Optional, cast

from megatron.core.extensions.transformer_engine import (
    TEActivationOp,
    TEColumnParallelGroupedLinear,
    TEColumnParallelLinear,
    TEDotProductAttention,
    TELayerNormColumnParallelLinear,
    TELinear,
    TENorm,
    TERowParallelGroupedLinear,
    TERowParallelLinear,
)
from megatron.core.fusions.fused_layer_norm import FusedLayerNorm
from megatron.core.models.backends import BackendSpecProvider
from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
from megatron.core.transformer.mlp import MLPSubmodules, TEActivationFunctionBuilder
from megatron.core.transformer.moe.experts import (
    SequentialMLP,
    TEGroupedMLP,
    TEGroupedMLPSubmodules,
)
from megatron.core.transformer.torch_norm import LayerNormBuilder
from megatron.core.utils import get_te_version, is_te_min_version


class _TENormWithResidual:
    """Class adapter for TENorm with residual fusion enabled."""

    def __new__(cls, *args, **kwargs):
        return TENorm(*args, has_residual=True, **kwargs)


class TESpecProvider(BackendSpecProvider):
    """A protocol for providing the submodules used in Spec building."""

    def linear(self) -> type:
        """Which linear module TE backend uses"""
        return TELinear

    def column_parallel_linear(self) -> type:
        """Which column parallel linear module TE backend uses"""
        return TEColumnParallelLinear

    def row_parallel_linear(self) -> type:
        """Which row parallel linear module TE backend uses"""
        return TERowParallelLinear

    def fuse_layernorm_and_linear(self) -> bool:
        """TE backend chooses a single module for layernorm and linear"""
        return True

    def column_parallel_layer_norm_linear(self) -> Optional[type]:
        """Which module for sequential layernorm and linear"""
        return TELayerNormColumnParallelLinear

    def layer_norm(
        self, rms_norm: bool = False, for_qk: bool = False, has_residual: bool = False
    ) -> LayerNormBuilder:
        """Which module to use for layer norm"""
        if for_qk and not is_te_min_version("1.9.0"):
            # TENorm significantly harms convergence when used
            # for QKLayerNorm if TE Version < 1.9;
            # we instead use the Apex implementation.
            return FusedLayerNorm
        # Keep returning a class so this path stays aligned with build_module's class handling.
        return _TENormWithResidual if has_residual else TENorm

    def core_attention(self) -> type:
        """Which module to use for attention"""
        return TEDotProductAttention

    def grouped_mlp_modules(
        self, moe_use_grouped_gemm: bool
    ) -> (
        tuple[type[TEGroupedMLP], TEGroupedMLPSubmodules]
        | tuple[type[SequentialMLP], MLPSubmodules]
    ):
        """Which module and submodules to use for grouped mlp"""
        if moe_use_grouped_gemm and TEColumnParallelGroupedLinear is not None:
            return TEGroupedMLP, TEGroupedMLPSubmodules(
                linear_fc1=TEColumnParallelGroupedLinear, linear_fc2=TERowParallelGroupedLinear
            )
        else:
            if not is_te_min_version("1.7.0.dev0"):
                warnings.warn(
                    "Only transformer-engine>=1.7.0 supports MoE experts, "
                    f"but your version is {get_te_version()}. "
                    "Use local linear implementation instead."
                )
                return SequentialMLP, MLPSubmodules(
                    linear_fc1=ColumnParallelLinear, linear_fc2=RowParallelLinear
                )
            return SequentialMLP, MLPSubmodules(
                linear_fc1=TEColumnParallelLinear, linear_fc2=TERowParallelLinear
            )

    def activation_func(self) -> TEActivationFunctionBuilder | None:
        """Which module to use for activation function"""
        # transformer_engine.BasicOperation.forward has an overly permissive return type, but by
        # design these classes always meet the interface.
        return cast(TEActivationFunctionBuilder, TEActivationOp)


================================================
FILE: megatron/core/fp4_utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Utility functions related to FP4 that are used throughout Megatron core"""

from contextlib import nullcontext

import torch

from megatron.core.enums import Fp4Recipe
from megatron.core.fp8_utils import _get_custom_recipe
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.utils import is_te_min_version

# Check if Transformer Engine is installed
HAVE_TE = False
try:
    import transformer_engine  # pylint: disable=W0611

    HAVE_TE = True
except (ImportError, ModuleNotFoundError):
    # Transformer Engine not found
    pass


# Check if Transformer Engine has class for fp4 tensors.
HAVE_TE_FP4_TENSOR_CLASS = False
if HAVE_TE:
    if is_te_min_version("2.7.0.dev0"):
        try:
            from transformer_engine.pytorch.tensor.nvfp4_tensor import (
                NVFP4Tensor as FP4_TENSOR_CLASS,
            )

            HAVE_TE_FP4_TENSOR_CLASS = True
        except (ImportError, ModuleNotFoundError):
            HAVE_TE_FP4_TENSOR_CLASS = False
            FP4_TENSOR_CLASS = None
    else:
        HAVE_TE_FP4_TENSOR_CLASS = False
        FP4_TENSOR_CLASS = None
else:
    HAVE_TE_FP4_TENSOR_CLASS = False
    FP4_TENSOR_CLASS = None


def is_nvfp4tensor(tensor: torch.Tensor) -> bool:
    """Check if a tensor is a Transformer Engine NVFP4Tensor."""
    return HAVE_TE_FP4_TENSOR_CLASS and isinstance(tensor, FP4_TENSOR_CLASS)


def get_fp4_align_size(fp4_recipe: Fp4Recipe) -> int:
    """
    Get the alignment size required for FP4 GEMM.
    FP4 GEMM requires Blackwell and later architectures.

    The value 32 is a hardware requirement: TMA (Tensor Memory Accelerator) requires
    a 16-byte aligned address for efficient memory access. Since FP4 uses 4 bits per value,
    16 bytes (128 bits) corresponds to 32 FP4 values. Therefore, the alignment size for FP4
    is 32. With this alignment, NVFP4 GEMM can be performed efficiently.

    Note that since we are also random hadamard transform for NVFP4 training, we want
    fused group nvfp4 quantize plus hadamard transform. Hadamard transform will leverage
    tensor core instructions for better performance, while group quantize kernels also
    prefer a more aligned size in token dimension M. The efficiently leverage grouped
    kernels, padding needs to be 64 multiple, but 128 multiple will bring even faster.

    When it comes to MOE cuda graph support, the number of tokens for each expert should
    be a buffer on device memory, which means that we don't know the token dimension for
    each expertin host, therefore we cannot calculate the zero padded scaling factors shape
    on host to comply with the NVFP4 GEMM scaling factor layout. However, if we have already
    zero padded the tokens to 128 multiple, then there is no need for such padding, so that
    host doesn't need to copy the token distribution from device to host (which will break
    the CUDA graph).

    Paper link: https://arxiv.org/pdf/2509.25149
    Scaling factor layout: https://docs.nvidia.com/cuda/cublas/#d-block-scaling-factors-layout
    TE NVFP4 Grouped Quantization: https://github.com/NVIDIA/TransformerEngine/pull/2411
    """
    # pylint: disable=unused-argument
    return 128


def dequantize_fp4_tensor(fp4_tensor: torch.Tensor) -> torch.Tensor:
    """Dequantize a fp4 tensor to a higher precision tensor."""
    if is_te_min_version("2.7.0.dev0"):
        return fp4_tensor.dequantize()
    else:
        raise RuntimeError("FP4 dequantization requires Transformer Engine >= 2.7.0.dev0")


if HAVE_TE:
    from megatron.core import parallel_state

    def get_fp4_recipe(config: TransformerConfig):
        """Return fp4 recipe."""
        if is_te_min_version("2.7.0.dev0"):
            if config.fp4_recipe == Fp4Recipe.nvfp4:
                try:
                    fp4_recipe = transformer_engine.common.recipe.NVFP4BlockScaling(
                        fp8_dpa=config.fp8_dot_product_attention
                    )
                except AttributeError:
                    raise ValueError(
                        """NVFP4BlockScaling recipe is not available in this version of 
                        Transformer Engine. Please make sure you are using TE version 
                        >= 2.7.0.dev0."""
                    )
            elif config.fp4_recipe == Fp4Recipe.custom:
                fp4_recipe = _get_custom_recipe(config.fp4_quantizer_factory)
            else:
                raise ValueError(
                    "NVFP4BlockScaling and custom are the only supported FP4 recipes. "
                    "Please make sure you are using a compatible TE version >= 2.7.0.dev0."
                )
        else:
            raise ValueError(
                """FP4 support requires TransformerEngine version >= 2.7.0.dev0 
                for NVFP4BlockScaling."""
            )
        return fp4_recipe

    def get_fp4_context(config: TransformerConfig, layer_no: int = -1, is_init: bool = False):
        """Return fp4 context manager."""
        num_bf16_layers_at_start = (
            config.num_layers_at_start_in_bf16 if config.first_last_layers_bf16 else 0
        )
        num_bf16_layers_at_end = (
            config.num_layers_at_end_in_bf16 if config.first_last_layers_bf16 else 0
        )
        is_first_layer = layer_no < num_bf16_layers_at_start
        is_last_layer = layer_no >= config.num_layers - num_bf16_layers_at_end

        need_fp4_context = config.fp4 if not is_init else config.fp4_param

        if not need_fp4_context:
            fp4_context = nullcontext()
        elif layer_no >= 0 and config.first_last_layers_bf16 and (is_first_layer or is_last_layer):
            fp4_context = nullcontext()
        else:
            fp4_recipe = get_fp4_recipe(config)
            fp4_group = None
            if parallel_state.model_parallel_is_initialized():
                fp4_group = parallel_state.get_amax_reduction_group(
                    with_context_parallel=True, tp_only_amax_red=config.tp_only_amax_red
                )

            if not is_init:
                # TE currently uses fp8_autocast for fp8 and fp4 quantization.
                fp4_context = transformer_engine.pytorch.fp8_autocast(
                    enabled=True, fp8_recipe=fp4_recipe, fp8_group=fp4_group
                )
            else:
                import inspect

                context_args = {"enabled": True}
                if (
                    "recipe"
                    in inspect.signature(transformer_engine.pytorch.fp8_model_init).parameters
                ):
                    context_args["recipe"] = fp4_recipe
                fp4_context = transformer_engine.pytorch.fp8_model_init(**context_args)

        return fp4_context

else:

    def get_fp4_recipe(config: TransformerConfig):
        """Return None when Transformer Engine is not available."""
        return None

    def get_fp4_context(config: TransformerConfig, layer_no: int = -1, is_init: bool = False):
        """Return nullcontext when Transformer Engine is not available."""
        return nullcontext()


================================================
FILE: megatron/core/fp8_utils.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

"""Utility functions related to FP8 that are used throughout Megatron core"""

import importlib
import weakref
from contextlib import nullcontext
from functools import wraps
from typing import List, Optional, Union

import torch

from megatron.core.enums import Fp4Recipe, Fp8Recipe
from megatron.core.tensor_parallel import (
    ColumnParallelLinear,
    RowParallelLinear,
    gather_from_sequence_parallel_region,
    reduce_scatter_to_sequence_parallel_region,
)
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.utils import get_te_version, is_te_min_version

# Check if Transformer Engine is installed
HAVE_TE = False
try:
    import transformer_engine  # pylint: disable=W0611

    HAVE_TE = True
except (ImportError, ModuleNotFoundError):
    # Transformer Engine not found
    pass

try:
    from packaging.version import Version as PkgVersion

    HAVE_PACKAGING = True
except ImportError:
    HAVE_PACKAGING = False

# Check if Transformer Engine has class for fp8 tensors.
HAVE_TE_FP8_TENSOR_CLASS = False
if HAVE_TE:
    if is_te_min_version("2.0"):
        # In TE2.x, QuantizedTensor is the base class for all different type of fp8 tensors,
        # including fp8 tensor for delayed scaling, current scaling and mxfp8, etc.
        from transformer_engine.pytorch.tensor import QuantizedTensor as FP8_TENSOR_CLASS
    else:
        from transformer_engine.pytorch.float8_tensor import Float8Tensor as FP8_TENSOR_CLASS

    HAVE_TE_FP8_TENSOR_CLASS = True
else:
    HAVE_TE_FP8_TENSOR_CLASS = False
    FP8_TENSOR_CLASS = None

# Check if Transformer Engine has MXFP8Tensor class

try:
    from transformer_engine.pytorch.tensor.mxfp8_tensor import MXFP8Tensor

    HAVE_TE_MXFP8TENSOR = True
except (ImportError, ModuleNotFoundError):
    # MXFP8Tensor not found
    HAVE_TE_MXFP8TENSOR = False

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import (
        TEColumnParallelLinear,
        TELayerNormColumnParallelLinear,
        TELinear,
        TERowParallelLinear,
    )

    TE_LINEAR_TYPES = (
        TELinear,
        TEColumnParallelLinear,
        TERowParallelLinear,
        TELayerNormColumnParallelLinear,
    )
else:
    TE_LINEAR_TYPES = ()

try:
    from megatron.core.extensions.transformer_engine import Fp8Padding, Fp8Unpadding
except ImportError:
    Fp8Padding = None
    Fp8Unpadding = None

try:
    from transformer_engine.pytorch.tensor.utils import (
        post_all_gather_processing as te_post_all_gather_processing,
    )
except ImportError:
    te_post_all_gather_processing = None


def is_float8tensor(tensor: torch.Tensor) -> bool:
    """Check if a tensor is a Transformer Engine Float8Tensor.

    Note that in TE2.x, in order to support more recipes, the design of the fp8 tensor class has
    changed. Now Float8Tensor is only used for current scaling and delayed scaling. And mxfp8
    and blockwise scaling have their own fp8 tensor classes. These different fp8 tensor classes
    are both inherited from QuantizedTensor. So, for TE1.x, FP8_TENSOR_CLASS is Float8Tensor,
    and for TE2.x, FP8_TENSOR_CLASS is QuantizedTensor.
    """
    return HAVE_TE_FP8_TENSOR_CLASS and isinstance(tensor, FP8_TENSOR_CLASS)


def is_mxfp8tensor(tensor: torch.Tensor) -> bool:
    """Check if a tensor is a Transformer Engine MXFP8Tensor"""
    return HAVE_TE_MXFP8TENSOR and isinstance(tensor, MXFP8Tensor)


def dequantize_fp8_tensor(fp8_tensor: torch.Tensor) -> torch.Tensor:
    """Dequantize a fp8 tensor to a higher precision tensor."""
    if is_te_min_version("2.0"):
        return fp8_tensor.dequantize()
    else:
        return fp8_tensor.from_float8()


def _resolve_callable_from_python_import_path(dotted_path: str):
    """Resolve a Python import path like 'pkg.mod.func' to a callable.

    Raises ValueError with clear message on failure.
    """
    if not isinstance(dotted_path, str) or not dotted_path:
        raise ValueError(
            "fp8_quantizer_factory must be a non-empty string with format 'pkg.mod.func'."
        )

    parts = dotted_path.rsplit(".", 1)
    if len(parts) == 1:
        raise ValueError(f"Invalid fp8_quantizer_factory '{dotted_path}'. Expected 'pkg.mod.func'.")
    module_path, attr = parts[0], parts[1]

    try:
        mod = importlib.import_module(module_path)
    except Exception as exc:
        raise ValueError(
            f"Failed to import module '{module_path}' for fp8_quantizer_factory: {exc}"
        ) from exc

    fn = getattr(mod, attr, None)
    if fn is None:
        raise ValueError(
            f"Attribute '{attr}' not found in module '{module_path}' for fp8_quantizer_factory."
        )
    if not callable(fn):
        raise ValueError(
            f"Resolved attribute '{module_path}.{attr}' is not callable for fp8_quantizer_factory."
        )
    return fn


def _get_custom_recipe(quantizer_factory_python_path: str) -> Union[Fp8Recipe, Fp4Recipe]:
    quantizer_factory = _resolve_callable_from_python_import_path(quantizer_factory_python_path)
    try:
        custom_recipe = transformer_engine.common.recipe.CustomRecipe(qfactory=quantizer_factory)
    except AttributeError:
        raise ValueError(
            """CustomRecipe recipe is not available in this version of 
            Transformer Engine. Please make sure you are using TE version 
            >= 2.9.0.dev0."""
        )
    return custom_recipe


def get_fp8_align_size(fp8_recipe: Fp8Recipe) -> int:
    """Get the alignment size required for fp8 GEMM."""
    if fp8_recipe == Fp8Recipe.mxfp8:
        return 32
    else:
        return 16


def is_column_parallel_linear(module):
    """Returns whether the given module is a ColumnParallelLinear layer."""
    if HAVE_TE and (
        isinstance(module, TEColumnParallelLinear)
        or isinstance(module, TELayerNormColumnParallelLinear)
    ):
        return True
    elif isinstance(module, ColumnParallelLinear):
        return True
    return False


def is_row_parallel_linear(module):
    """Returns whether the given module is a RowParallelLinear layer."""
    if HAVE_TE and isinstance(module, TERowParallelLinear):
        return True
    elif isinstance(module, RowParallelLinear):
        return True
    return False


"""
The code below abstracts the functionalities needed for implementing "--fp8-param-gather" into
several functions. It provides different implementations for each function based on different
versions of TE, ensuring compatibility across various TE versions.

Currently, there are three functions:
    - modify_underlying_storage
        This function is used in DDP to place all parameters into a contiguous buffer. For
        non-fp8 tensors, replacing their data is simple, just using code like
        "tensor.data = new_data". However, for fp8 tensors, their raw data is not stored in the
        ".data" attribute, and it varies with different TE versions and different recipes. This
        function provides a unified interface to replace the underlying storage of a fp8 tensor.
    - quantize_param_shard
        This function is used in dist-opt to cast fp32 main params to fp8 params. For non-fp8
        params, this casting is as simple as "bf16_params.copy_(fp32_main_params)"; but for fp8
        params, the casting logic varies with different TE versions and different recipes. This
        function provides a unified interface to cast fp32 main params to fp8 params, and also
        updates the necessary attributes (like amax, scale, scale_inv or transpose cache) of the
        fp8 model params.
    - correct_amax_history_if_needed
        This function is used to correct the amax history of fp8 tensors. In TE1.x, some inplace
        copy operations will write unwanted values to the amax_history of fp8 tensors. This function
        corrects the amax_history back. For TE2.x, it's an empty function.
        Only useful for delayed scaling.
"""
if HAVE_TE and is_te_min_version("2.2"):
    # Supported TE versions: 2.2+
    from transformer_engine.pytorch.tensor import QuantizedTensor

    def _modify_underlying_storage_impl(
        fp8_tensor: QuantizedTensor, new_raw_data: torch.Tensor
    ) -> None:
        from transformer_engine.pytorch.tensor.utils import replace_raw_data

        replace_raw_data(fp8_tensor, new_raw_data)

    def _quantize_param_shard_impl(
        model_params: List[QuantizedTensor],
        main_params: List[torch.Tensor],
        start_offsets: List[int],
        data_parallel_group: torch.distributed.ProcessGroup,
        fsdp_shard_model_params: Optional[List[torch.Tensor]] = None,
    ) -> None:
        if len(model_params) == 0:
            return

        from transformer_engine.pytorch.tensor.utils import cast_master_weights_to_fp8

        args = [model_params, main_params, start_offsets, data_parallel_group]
        if fsdp_shard_model_params is not None:
            if not HAVE_PACKAGING:
                raise ImportError(
                    "packaging not found, please install it with `pip install packaging`"
                )
            if get_te_version() == PkgVersion("2.3.0.dev0+5fdd7bb") or is_te_min_version("2.3.0"):
                args.append(fsdp_shard_model_params)
            else:
                raise NotImplementedError(
                    f"FSDP with --fp8-param-gather is not supported in TE v{get_te_version()}"
                )

        # For newer TE versions (i.e., have post_all_gather_processing function), we keep the
        # columnwise data and manually call post_all_gather_processing after all-gather, this
        # makes fp8 params compatible with CUDA graph.
        kwargs = {}
        if te_post_all_gather_processing is not None:
            kwargs["manual_post_all_gather_processing"] = True

        cast_master_weights_to_fp8(*args, **kwargs)

    def _correct_amax_history_if_needed_impl(model: List[torch.nn.Module]) -> None:
        pass

elif HAVE_TE and is_te_min_version("2.0"):
    # Supported TE versions: 2.0
    from transformer_engine.pytorch.tensor import QuantizedTensor
    from transformer_engine.pytorch.tensor.float8_tensor import Float8Tensor

    def _modify_underlying_storage_impl(
        fp8_tensor: QuantizedTensor, new_raw_data: torch.Tensor
    ) -> None:
        old_raw_data = fp8_tensor._data
        assert old_raw_data.dtype == new_raw_data.dtype
        new_raw_data.detach().copy_(old_raw_data)
        fp8_tensor._data = new_raw_data
        del old_raw_data

    def _quantize_param_shard_impl(
        model_params: List[QuantizedTensor],
        main_params: List[torch.Tensor],
        start_offsets: List[int],
        data_parallel_group: torch.distributed.ProcessGroup,
        fsdp_shard_model_params: Optional[List[torch.Tensor]] = None,
    ) -> None:
        # Avoid circular import
        from megatron.core.optimizer.optimizer import _multi_tensor_copy_this_to_that

        if len(model_params) == 0:
            return

        if fsdp_shard_model_params is None:
            fsdp_shard_model_params = [None] * len(model_params)

        for model_param, main_param, start_offset, fsdp_shard_model_param in zip(
            model_params, main_params, start_offsets, fsdp_shard_model_params
        ):
            if main_param is None:
                continue

            if fsdp_shard_model_param is not None:
                shard_model_param = fsdp_shard_model_param
            else:
                shard_model_param = model_param._data.view(-1)[
                    start_offset : start_offset + main_param.numel()
                ]

            quantizer = model_param._quantizer
            # When not using --fp8-param-gather, the main_param (fp32) is first cast to bf16/fp16,
            # and then cast to fp8 during forward.
            # Although it's not necessary when --fp8-param-gather is enabled, we still keep this
            # logic to keep numerical consistency. So here cast the main_param to model_param.dtype.
            main_param = main_param.to(model_param.dtype)
            out = Float8Tensor(
                shape=main_param.size(),
                dtype=model_param.dtype,
                requires_grad=False,
                data=shard_model_param,
                fp8_scale_inv=model_param._scale_inv,
                fp8_dtype=model_param._fp8_dtype,
                quantizer=quantizer,
            )
            quantizer.update_quantized(main_param, out)

        amaxes = []
        scales = []
        scale_invs = []
        for model_param in model_params:
            quantizer = model_param._quantizer
            amaxes.append(quantizer.amax.view(1))
            scales.append(quantizer.scale.view(1))
            scale_invs.append(model_param._scale_inv.view(1))
            model_param._reset_caches()

        dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device="cuda")

        # Update scaling factors.
        packed_scales = torch.empty(len(scales), dtype=torch.float32, device=scales[0].device)
        packed_scale_views = [packed_scales[i].view(1) for i in range(len(scales))]
        _multi_tensor_copy_this_to_that(scales, packed_scale_views, dummy_overflow_buf)
        torch.reciprocal(packed_scales, out=packed_scales)
        _multi_tensor_copy_this_to_that(packed_scale_views, scale_invs, dummy_overflow_buf)

        # Reduce amaxes.
        # Note: Assume each param has a separate amax.
        packed_amaxes = torch.empty(len(amaxes), dtype=torch.float32, device=amaxes[0].device)
        packed_amax_views = [packed_amaxes[i].view(1) for i in range(len(amaxes))]
        _multi_tensor_copy_this_to_that(amaxes, packed_amax_views, dummy_overflow_buf)
        torch.distributed.all_reduce(
            packed_amaxes, op=torch.distributed.ReduceOp.MAX, group=data_parallel_group
        )
        _multi_tensor_copy_this_to_that(packed_amax_views, amaxes, dummy_overflow_buf)

    def _correct_amax_history_if_needed_impl(model: List[torch.nn.Module]) -> None:
        pass

elif HAVE_TE and is_te_min_version("1.0"):
    # Supported TE versions: 1.0 - 1.14
    from transformer_engine.pytorch.cpp_extensions import cast_to_fp8
    from transformer_engine.pytorch.float8_tensor import Float8Tensor

    def _modify_underlying_storage_impl(tensor: Float8Tensor, new_raw_data: torch.Tensor) -> None:
        old_raw_data = tensor._data
        assert old_raw_data.dtype == new_raw_data.dtype
        new_raw_data.detach().copy_(old_raw_data)
        tensor._data = new_raw_data
        del old_raw_data

    def _quantize_param_shard_impl(
        model_params: List[Float8Tensor],
        main_params: List[torch.Tensor],
        start_offsets: List[int],
        data_parallel_group: torch.distributed.ProcessGroup,
        fsdp_shard_model_params: Optional[List[torch.Tensor]] = None,
    ) -> None:
        # Avoid circular import
        from megatron.core.optimizer.optimizer import _multi_tensor_copy_this_to_that

        if len(model_params) == 0:
            return

        if fsdp_shard_model_params is None:
            fsdp_shard_model_params = [None] * len(model_params)

        for model_param, main_param, start_offset, fsdp_shard_model_param in zip(
            model_params, main_params, start_offsets, fsdp_shard_model_params
        ):
            if main_param is None:
                continue

            if fsdp_shard_model_param is not None:
                shard_model_param = fsdp_shard_model_param
            else:
                shard_model_param = model_param._data.view(-1)[
                    start_offset : start_offset + main_param.numel()
                ]

            # When not using --fp8-param-gather, the main_param (fp32) is first cast to bf16/fp16,
            # and then cast to fp8 during forward.
            # Although it's not necessary when --fp8-param-gather is enabled, we still keep this
            # logic to keep numerical consistency. So here cast the main_param to model_param.dtype.
            main_param = main_param.to(model_param.dtype)
            cast_to_fp8(
                main_param.view(1, -1),
                model_param._fp8_meta["scaling_fwd"],
                model_param._fp8_meta_index,
                model_param._fp8_dtype,
                out=shard_model_param.view(1, -1),
            )

        amaxes = []
        scales = []
        scale_invs = []
        for model_param in model_params:
            fp8_meta = model_param._fp8_meta["scaling_fwd"]
            fp8_meta_index = model_param._fp8_meta_index
            amaxes.append(fp8_meta.amax_history[0][fp8_meta_index].view(1))
            scales.append(fp8_meta.scale[fp8_meta_index].view(1))
            scale_invs.append(model_param._scale_inv.view(1))
            model_param._reset_caches()

        dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device="cuda")

        # Update scaling factors.
        packed_scales = torch.empty(len(scales), dtype=torch.float32, device=scales[0].device)
        packed_scale_views = [packed_scales[i].view(1) for i in range(len(scales))]
        _multi_tensor_copy_this_to_that(scales, packed_scale_views, dummy_overflow_buf)
        torch.reciprocal(packed_scales, out=packed_scales)
        _multi_tensor_copy_this_to_that(packed_scale_views, scale_invs, dummy_overflow_buf)

        # Reduce amaxes.
        # Note: Assume each param has a separate amax.
        packed_amaxes = torch.empty(len(amaxes), dtype=torch.float32, device=amaxes[0].device)
        packed_amax_views = [packed_amaxes[i].view(1) for i in range(len(amaxes))]
        _multi_tensor_copy_this_to_that(amaxes, packed_amax_views, dummy_overflow_buf)
        torch.distributed.all_reduce(
            packed_amaxes, op=torch.distributed.ReduceOp.MAX, group=data_parallel_group
        )
        _multi_tensor_copy_this_to_that(packed_amax_views, amaxes, dummy_overflow_buf)

    def _correct_amax_history_if_needed_impl(model: List[torch.nn.Module]) -> None:
        for model_module in model:
            for param in model_module.parameters():
                if is_float8tensor(param) and param._fp8_meta is not None:
                    fp8_meta = param._fp8_meta["scaling_fwd"]
                    fp8_meta_index = param._fp8_meta_index
                    if hasattr(param, "get_high_precision_init_val"):
                        fp8_meta.amax_history[0][fp8_meta_index].copy_(
                            param.get_high_precision_init_val().abs().max()
                        )
                    else:
                        fp8_meta.amax_history[0][fp8_meta_index] = 0

else:
    # Fallback impl if TE version is invalid or TE is not installed.
    def _modify_underlying_storage_impl(*args, **kwargs):
        raise RuntimeError("Invalid Transformer Engine version for FP8 distributed optimizer")

    def _quantize_param_shard_impl(model_params, *args, **kwargs):
        if len(model_params) == 0:
            return
        else:
            # If TE is not installed, there shouldn't be any fp8 params.
            raise RuntimeError("Invalid Transformer Engine version for FP8 distributed optimizer")

    def _correct_amax_history_if_needed_impl(*args, **kwargs):
        # If TE is not installed, we are definitely not using fp8 for training, so no correction
        # is needed.
        pass


# Interface Function
def modify_underlying_storage(tensor: torch.Tensor, new_raw_data: torch.Tensor):
    """Replace the underlying raw data of a tensor with new data."""
    _modify_underlying_storage_impl(tensor, new_raw_data)


# Interface Function
def quantize_param_shard(
    model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params=None
):
    """Cast shard fp32 main params to fp8 model params."""
    _quantize_param_shard_impl(
        model_params, main_params, start_offsets, data_parallel_group, fsdp_shard_model_params
    )


# Interface Function
def correct_amax_history_if_needed(model: List[torch.nn.Module]):
    """Correct the amax history of fp8 tensors when it's necessary (i.e., in TE1.x)."""
    _correct_amax_history_if_needed_impl(model)


def post_all_gather_processing(model_params):
    """
    Post-processing after all-gather for weights in distributed optimizer.
    - tensorwise: may need to create a transposed view to match backend GEMM.
    - blockwise: create column-wise storage.
    """
    if te_post_all_gather_processing is not None:
        te_post_all_gather_processing(model_params)
    else:
        # If the TE version is old and does not have post_all_gather_processing function, this is
        # a no-op, and the transpose/columnwise data will be created in the next forward pass.
        pass


def is_first_last_bf16_layer(config: TransformerConfig, layer_no: int):
    """Check if the layer is in bf16."""
    num_bf16_layers_at_start = (
        config.num_layers_at_start_in_bf16 if config.first_last_layers_bf16 else 0
    )
    num_bf16_layers_at_end = (
        config.num_layers_at_end_in_bf16 if config.first_last_layers_bf16 else 0
    )
    # Since layer_no is a global layer index, additional checks on whether
    # we are in the first or last pipeline-parallel rank are not needed.
    is_first_layer = layer_no < num_bf16_layers_at_start
    is_last_layer = layer_no >= config.num_layers - num_bf16_layers_at_end

    if layer_no >= 0 and config.first_last_layers_bf16 and (is_first_layer or is_last_layer):
        return True
    else:
        return False


if HAVE_TE:
    from megatron.core import parallel_state
    from megatron.core.extensions.transformer_engine import TEDelayedScaling

    def get_fp8_recipe(config: TransformerConfig):
        """Return fp8 recipe.

        Arguments:
            config (TransformerConfig): Configuration object.

        Returns:
            FP8 recipe.
        """
        if config.fp8 == "e4m3":
            fp8_format = transformer_engine.common.recipe.Format.E4M3
        elif config.fp8 == "hybrid":
            fp8_format = transformer_engine.common.recipe.Format.HYBRID
        else:
            raise ValueError("E4M3 and HYBRID are the only supported FP8 formats.")

        # Select fp8 recipe (TE version >= 2.1.0).
        fp8_recipe = None
        if is_te_min_version("2.1.0"):
            if config.fp8_recipe == Fp8Recipe.delayed:
                fp8_recipe = TEDelayedScaling(
                    config=config,
                    fp8_format=fp8_format,
                    override_linear_precision=(False, False, not config.fp8_wgrad),
                )
            elif config.fp8_recipe == Fp8Recipe.tensorwise and is_te_min_version("2.2.0.dev0"):
                fp8_recipe = transformer_engine.common.recipe.Float8CurrentScaling(
                    fp8_format=fp8_format, fp8_dpa=config.fp8_dot_product_attention
                )
            elif config.fp8_recipe == Fp8Recipe.blockwise and is_te_min_version("2.3.0.dev0"):
                fp8_recipe = transformer_engine.common.recipe.Float8BlockScaling(
                    fp8_format=fp8_format
                )
            elif config.fp8_recipe == Fp8Recipe.mxfp8:
                fp8_recipe = transformer_engine.common.recipe.MXFP8BlockScaling(
                    fp8_format=fp8_format
                )
            elif config.fp8_recipe == Fp8Recipe.custom:
                assert config.fp8_quantizer_factory is not None
                fp8_recipe = _get_custom_recipe(config.fp8_quantizer_factory)
            else:
                raise ValueError(
                    "Float8CurrentScaling, MXFP8BlockScaling, Float8BlockwiseScaling and "
                    "DelayedScaling are the only supported FP8 recipes. Please also make sure "
                    "you are using a compatible TE version."
                )
        else:
            # Assert that the user is using delayed scaling.
            assert config.fp8_recipe == Fp8Recipe.delayed, (
                "Please make sure to use TransformerEngine version >= 2.2.0.dev0 for "
                "Float8CurrentScaling, >= 2.1.0 for MXFP8BlockScaling, and >= 2.3.0.dev0 for "
                "Float8BlockScaling."
            )
            fp8_recipe = TEDelayedScaling(
                config=config,
                fp8_format=fp8_format,
                override_linear_precision=(False, False, not config.fp8_wgrad),
            )
        return fp8_recipe

    def get_fp8_context(config: TransformerConfig, layer_no: int = -1, is_init: bool = False):
        """Return fp8 context manager.

        Arguments:
            config (TransformerConfig): Configuration object.
            layer_no (int): *Global* layer index (including layers on other
                pipeline-parallel ranks).
            is_init (bool): Whether the context is fp8_model_init (True) or fp8_autocast (False).

        Returns:
            FP8 context.
            If layer_no < 0, we return a fp8 context for all layers regardless of layer_no.
            We return nullcontext() when: a) not using fp8 to train, b) layer_no is a layer
            that needs to be trained in bf16.
        """

        need_fp8_context = config.fp8 if not is_init else config.fp8_param

        if not need_fp8_context or is_first_last_bf16_layer(config, layer_no):
            # bf16 training or bf16 layer in fp8 training
            fp8_context = nullcontext()
        else:
            # fp8 training and this layer_no is in fp8
            fp8_recipe = get_fp8_recipe(config)

            fp8_group = None
            if parallel_state.model_parallel_is_initialized():
                fp8_group = parallel_state.get_amax_reduction_group(
                    with_context_parallel=True, tp_only_amax_red=config.tp_only_amax_red
                )

            if not is_init:
                fp8_context = transformer_engine.pytorch.fp8_autocast(
                    enabled=True, fp8_recipe=fp8_recipe, fp8_group=fp8_group
                )
            else:
                import inspect

                context_args = {"enabled": True}
                # Check if fp8_model_init supports setting recipe
                if "recipe" in (
                    inspect.signature(transformer_engine.pytorch.fp8_model_init).parameters
                ):
                    context_args["recipe"] = fp8_recipe
                # Check if fp8_model_init supports preserve_high_precision_init_val
                if "preserve_high_precision_init_val" in (
                    inspect.signature(transformer_engine.pytorch.fp8_model_init).parameters
                ):
                    context_args["preserve_high_precision_init_val"] = torch.is_grad_enabled()
                fp8_context = transformer_engine.pytorch.fp8_model_init(**context_args)

            # First / last layer in bf16 isn't supported with delayed scaling since it
            # requires entering/exiting fp8 context per layer, causing incorrect amax
            # reduction behavior.
            assert not (
                config.first_last_layers_bf16 and isinstance(fp8_recipe, TEDelayedScaling)
            ), "Delayed scaling does not support first / last layer in BF16."

        return fp8_context

else:

    def get_fp8_recipe(config: TransformerConfig):
        """Returns None since TE is not available."""
        return None

    def get_fp8_context(config: TransformerConfig, layer_no: int = -1, is_init: bool = False):
        """Returns dummy fp8 context manager since TE is not available."""
        return nullcontext()


if HAVE_TE:
    from transformer_engine.pytorch.fp8 import FP8GlobalStateManager

    # Modules that have been wrapped for inference for fp8
    _fp8_inference_wrapped_modules = weakref.WeakSet()

    def _wrap_te_linear_for_padding(module: torch.nn.Module):
        """Wrap a TE linear module to automatically pad sequences for FP8 inference.

        Modifies the module's forward method to:
        1. Pad input sequences to FP8 alignment requirements
        2. Run the original forward pass
        3. Unpad outputs to original sequence length

        Args:
            module: A Transformer Engine linear layer (TELinear, TEColumnParallelLinear, etc.)
        """
        if module in _fp8_inference_wrapped_modules:
            return
        _pad_func = Fp8Padding(1)
        _unpad_func = Fp8Unpadding(1)

        original_forward = module.forward

        @wraps(original_forward)
        def padded_forward(input_tensor, *args, **kwargs):
            is_context_quantized = FP8GlobalStateManager.is_fp8_enabled()
            if hasattr(module, "will_execute_quantized"):
                module_uses_quant = module.will_execute_quantized(is_context_quantized)
            else:
                module_uses_quant = is_context_quantized
            # Only do padding for fp8 if we are in fp8 or fp4 context
            if not module_uses_quant:
                return original_forward(input_tensor, *args, **kwargs)

            # With sequence parallelism we need to all-gather before padding
            # and reduce-scatter after unpadding
            if is_sequence_parallel := getattr(module, "sequence_parallel", False):
                if is_column_parallel_linear(module):
                    input_tensor = gather_from_sequence_parallel_region(
                        input_tensor, group=module.tp_group
                    )

                # Disable sequence parallelism on the module because we are handling the
                # all-gather and reduce-scatter externally
                module.sequence_parallel = False

            seq_len, batch_size, hidden_size = input_tensor.shape
            # Reshape to (S, B*H) to pad sequence dimension
            input_2d = input_tensor.reshape(seq_len, -1)
            # Pad the sequence dimension
            padded_input_2d, _ = _pad_func(input_2d, [seq_len])
            padded_seq_len = padded_input_2d.shape[0]

            # Reshape back to (padded_S, B, H)
            padded_input_3d = padded_input_2d.view(padded_seq_len, batch_size, hidden_size)
            output = original_forward(padded_input_3d, *args, **kwargs)

            # Handle output
            if isinstance(output, tuple):
                output_tensor = output[0]
                other_outputs = output[1:]
            else:
                output_tensor = output
                other_outputs = ()

            # Unpad output - reshape to 2D, unpad, reshape back
            _, _, output_hidden_size = output_tensor.shape
            output_2d = output_tensor.reshape(padded_seq_len, -1)
            unpadded_output_2d = _unpad_func(output_2d, [seq_len])
            unpadded_output = unpadded_output_2d.reshape(seq_len, batch_size, output_hidden_size)

            if is_sequence_parallel:
                # Reduce-scatter after unpadding
                if is_row_parallel_linear(module):
                    unpadded_output = reduce_scatter_to_sequence_parallel_region(
                        unpadded_output, group=module.tp_group
                    )

                # Reset sequence parallelism flag on the module
                module.sequence_parallel = True

            if other_outputs:
                return (unpadded_output,) + other_outputs
            else:
                return unpadded_output

        module.forward = padded_forward
        _fp8_inference_wrapped_modules.add(module)

    def prepare_model_for_fp8_inference(model):
        """Prepare a model for FP8 inference by wrapping TE linear layers with padding support.

        FP8 TE Gemms have specific shape requirements. This function wraps all Transformer
        Engine linear layers in the model to automatically pad/unpad sequences during inference.

        Args:
            model (model (GPTModel): Model containing TE linear layers.

        Returns:
            GPTModel: The same model with wrapped linear layers (modified in-place).

        """
        assert Fp8Padding and Fp8Unpadding, "TE version does not have FP8 padding functions"
        # Find and wrap all TE linear layers
        for module in model.modules():
            if isinstance(module, TE_LINEAR_TYPES):
                _wrap_te_linear_for_padding(module)

        return model

else:

    def prepare_model_for_fp8_inference(model):
        """If trys using prepare_model_for_fp8_inference without TE we error"""
        raise RuntimeError(
            "prepare_model_for_fp8_inference requires Transformer Engine to be installed. "
            "Please install transformer-engine to use FP8 inference."
        )


================================================
FILE: megatron/core/full_cuda_graph.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Full iteration CUDA graph for training."""

import logging

import torch

from megatron.core.tensor_parallel.random import get_all_rng_states

logger = logging.getLogger(__name__)

# The below functions traverse through nested data structures (tuples, lists, dicts)
# present in src and creates a deep copy where all PyTorch tensors are cloned,
# detached from the computation graph, and moved to CUDA device. Non-tensor objects
# are returned as-is.


def copy_tensors_in_struct(src):
    """Copy src to new tensors."""
    if isinstance(src, tuple):
        return tuple(copy_tensors_in_struct(i) for i in src)
    elif isinstance(src, list):
        return list(copy_tensors_in_struct(i) for i in src)
    elif isinstance(src, dict):
        return {k: copy_tensors_in_struct(src[k]) for k in src}
    elif isinstance(src, torch.Tensor):
        return src.clone().detach().cuda()
    else:
        return src


def clone_tensors_in_struct(tgt, src):
    """Copy src to pre-existing tensors in tgt."""
    if isinstance(src, tuple):
        raise Exception(f"Unsupported copy for tuple yet: {type(src)}")
    elif isinstance(src, list):
        for i in range(len(src)):
            if isinstance(src[i], (tuple, list, dict, torch.Tensor)):
                clone_tensors_in_struct(tgt[i], src[i])
            else:
                tgt[i] = src[i]
    elif isinstance(src, dict):
        for k in src:
            if isinstance(src[k], (tuple, list, dict, torch.Tensor)):
                clone_tensors_in_struct(tgt[k], src[k])
            else:
                tgt[k] = src[k]
    elif isinstance(src, torch.Tensor):
        tgt.copy_(src, non_blocking=True)
    else:
        raise Exception(f"Expect top-level as container type but got: {type(src)}")


# Class to copy dataloader output to static CUDA tensors for CUDA graph input. This
# maintains separate static buffers for training and validation CUDA graphs.
class StaticBufferLoader:
    """Load data to static buffers."""

    static_buffers: dict = {'training': [], 'validation': []}

    def __init__(self):
        self.stream = torch.cuda.Stream()

    def __call__(self, inputs, stage, microbatch):
        assert stage in ['training', 'validation']
        assert microbatch <= len(StaticBufferLoader.static_buffers[stage])
        if isinstance(inputs, tuple) and isinstance(inputs[0], dict):
            inputs = inputs[0]

        assert isinstance(inputs, dict)
        if microbatch == len(StaticBufferLoader.static_buffers[stage]):
            with torch.cuda.stream(self.stream):
                StaticBufferLoader.static_buffers[stage].append(copy_tensors_in_struct(inputs))
        else:

            for k in inputs.keys():
                if k not in StaticBufferLoader.static_buffers[stage][microbatch]:
                    if isinstance(inputs[k], torch.Tensor):
                        StaticBufferLoader.static_buffers[stage][microbatch][k] = torch.empty_like(
                            inputs[k], device="cuda"
                        )
                    else:
                        StaticBufferLoader.static_buffers[stage][microbatch][k] = inputs[k]

            with torch.cuda.stream(self.stream):
                clone_tensors_in_struct(
                    StaticBufferLoader.static_buffers[stage][microbatch], inputs
                )
        torch.cuda.current_stream().wait_stream(self.stream)
        return StaticBufferLoader.static_buffers[stage][microbatch]


class FullCudaGraphWrapper:
    """Wrapper class to enable FullIterationCUDAgraph."""

    curr_iteration = {'training': 0, 'validation': 0}
    cuda_graph = {'training': None, 'validation': None}
    result = {'training': None, 'validation': None}

    def __init__(self, forward_backward_func, cuda_graph_warmup_steps=1):
        self.forward_backward_func = forward_backward_func
        self.static_loader = StaticBufferLoader()
        self.cuda_graph_warmup_steps = cuda_graph_warmup_steps

    def data_read(self, data_iterator, model, training, num_microbatches):
        """Read all microbatch inputs from Dataloader and copy to static buffers."""
        if not isinstance(model, list) or len(model) == 1:
            assert not isinstance(data_iterator, list) or len(data_iterator) == 1
            iterator0 = data_iterator if not isinstance(data_iterator, list) else data_iterator[0]
            data_list = []
            if iterator0 is not None:
                for b in range(num_microbatches):
                    data_list.append(
                        self.static_loader(
                            next(iterator0), 'training' if training else 'validation', b
                        )
                    )
                data_list = [iter(data_list)]
            else:
                data_list.append(None)
        else:
            assert isinstance(data_iterator, list) and len(data_iterator) == len(model)
            data_list = []
            for i in range(len(model)):
                if data_iterator[i] is not None:
                    data_list_i = []
                    for b in range(num_microbatches):
                        data_list_i.append(
                            self.static_loader(
                                next(data_iterator[i]), 'training' if training else 'validation', b
                            )
                        )
                    data_list.append(iter(data_list_i))
                else:
                    data_list.append(None)
        return data_list

    def __call__(self, *args, **kwargs):
        assert len(args) == 0, 'forward_backward_func does not accept positional args'
        assert all(
            [
                kwarg in kwargs
                for kwarg in [
                    'model',
                    'data_iterator',
                    'num_microbatches',
                    'seq_length',
                    'forward_only',
                ]
            ]
        )
        model = kwargs['model']
        num_microbatches = kwargs['num_microbatches']

        training = not kwargs['forward_only']
        data_iterator = kwargs['data_iterator']
        data_list = self.data_read(data_iterator, model, training, num_microbatches)
        kwargs['data_iterator'] = data_list

        training_str = 'training' if training else 'validation'
        curr_iteration = self.curr_iter(training_str)
        if curr_iteration == self.cuda_graph_warmup_steps:
            logger.info(f'Capture CUDA graph for {training_str}!!!')
            torch.distributed.barrier()
            assert FullCudaGraphWrapper.cuda_graph[training_str] is None
            FullCudaGraphWrapper.cuda_graph[training_str] = torch.cuda.CUDAGraph()
            for _, state in get_all_rng_states().items():
                FullCudaGraphWrapper.cuda_graph[training_str].register_generator_state(state)
            torch.cuda.synchronize()
            capture_stream = torch.cuda.Stream()
            with torch.cuda.graph(
                FullCudaGraphWrapper.cuda_graph[training_str],
                stream=capture_stream,
                capture_error_mode="thread_local",
            ):
                FullCudaGraphWrapper.result[training_str] = self.forward_backward_func(
                    *args, **kwargs
                )
            torch.cuda.synchronize()
            torch.distributed.barrier()
            logger.info(f'CUDA graph capture done for {training_str}!!!')

        if FullCudaGraphWrapper.cuda_graph[training_str] is None:
            FullCudaGraphWrapper.result[training_str] = self.forward_backward_func(*args, **kwargs)
        else:
            FullCudaGraphWrapper.cuda_graph[training_str].replay()

        self.next_iter(training_str)
        return FullCudaGraphWrapper.result[training_str]

    def curr_iter(self, stage):
        """Return current training/validation iteration."""
        return FullCudaGraphWrapper.curr_iteration[stage]

    def next_iter(self, stage):
        """Increment current training/validation iteration."""
        FullCudaGraphWrapper.curr_iteration[stage] += 1


================================================
FILE: megatron/core/fusions/__init__.py
================================================


================================================
FILE: megatron/core/fusions/fused_bias_dropout.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
from typing import Optional, Tuple

import torch

from megatron.core.jit import jit_fuser

# pylint: disable=missing-function-docstring


def _bias_dropout_add_func(x_with_bias, residual, prob, training):
    # type: (Tuple[Tensor, Optional[Tensor]], Tensor, float, bool) -> Tensor
    # NOTE: Previously, the argument `bias` used to be passed as
    # `bias.expand_as(residual)` when the `bias_dropout_func` is called from the
    # transformer layer but broadcasting should automatically take care of that.
    # Also, looking at broadcasting semantics, `expand_as` and broadcasting
    # seem to be identical performance-wise (both just change the view).

    x, bias = x_with_bias  # unpack

    # Run in-place if in eval mode and inputs do not require gradients
    inplace = (
        not training
        and not x.requires_grad
        and not residual.requires_grad
        and (bias is None or not bias.requires_grad)
    )

    # For fp32 residual connections: upcast x (and bias) to residual's dtype so that
    # the addition and output remain in fp32, preserving numerical precision in the
    # residual stream across layers. When fp32_residual_connection is enabled,
    # pipeline parallel communication dtype should be set to fp32 accordingly.
    if x.dtype != residual.dtype:
        x = x.to(residual.dtype)
        if bias is not None:
            bias = bias.to(residual.dtype)

    # The Dropout operation, Residual Addition and the tensor returning can be
    # done generically outside the if statement, but that stops fusing of Bias
    # Addition-Dropout-Residual Addition operation. So doing it together inside
    # the conditional branch to improve performance
    if bias is not None:
        if inplace:
            x.add_(bias)
        else:
            x = x + bias
        out = torch.nn.functional.dropout(x, p=prob, training=training, inplace=inplace)
        if inplace:
            out.add_(residual)
        else:
            out = residual + out
        return out
    else:
        out = torch.nn.functional.dropout(x, p=prob, training=training, inplace=inplace)
        if inplace:
            out.add_(residual)
        else:
            out = residual + out
        return out


def bias_dropout_add_unfused(training):
    def _bias_dropout_add(x_with_bias, residual, prob):
        return _bias_dropout_add_func(x_with_bias, residual, prob, training)

    return _bias_dropout_add


@jit_fuser
def bias_dropout_add_fused_train(
    x_with_bias: Tuple[torch.Tensor, Optional[torch.Tensor]], residual: torch.Tensor, prob: float
) -> torch.Tensor:
    return _bias_dropout_add_func(x_with_bias, residual, prob, True)


@jit_fuser
def bias_dropout_add_fused_inference(
    x_with_bias: Tuple[torch.Tensor, Optional[torch.Tensor]], residual: torch.Tensor, prob: float
) -> torch.Tensor:
    return _bias_dropout_add_func(x_with_bias, residual, prob, False)


def get_bias_dropout_add(training, fused):
    if fused:
        # jit scripting for a nn.module (with dropout) is not
        # triggering the fusion kernel. For now, we use two
        # different nn.functional routines to account for varying
        # dropout semantics during training and inference phases.
        if training:
            return bias_dropout_add_fused_train
        else:
            return bias_dropout_add_fused_inference
    else:
        return bias_dropout_add_unfused(training)


================================================
FILE: megatron/core/fusions/fused_bias_geglu.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

import torch

from megatron.core.jit import jit_fuser

###### BIAS GELU FUSION/ NO AUTOGRAD ################
# 1/sqrt(2*pi)-> 0.3989423
# 1/sqrt(2)   -> 0.70710678
# sqrt(2/pi)  -> 0.79788456
# this function is tanh approximation of gelu
# actual gelu is:
# x * 0.5 * (1.0 + torch.erf(x * 0.70710678))


@jit_fuser
def geglu(y):
    """Performs GEGLU (GELU-Gated Linear Unit) activation.

    Args:
        y (torch.Tensor): Input tensor to be split into two halves along the last dimension.

    Returns:
        torch.Tensor: Result of GEGLU activation: GELU(y1) * y2, where y1, y2 are the split halves.
    """
    y_1, y_2 = torch.chunk(y, 2, -1)
    return (y_1 * 0.5 * (1.0 + torch.tanh(0.79788456 * y_1 * (1 + 0.044715 * y_1 * y_1)))) * y_2


@jit_fuser
def bias_geglu(bias, y):
    """Performs GEGLU activation with bias addition.

    Args:
        bias (torch.Tensor): Bias tensor to be added to the input.
        y (torch.Tensor): Input tensor to be split and gated.

    Returns:
        torch.Tensor: Result of bias addition followed by GEGLU activation.
    """
    y = y + bias
    return geglu(y)


# gradient of tanh approximation of gelu
# gradient of actual gelu is:
# 0.5 * (1. + torch.erf(x * 0.70710678)) + 0.3989423 * x * torch.exp(-0.5 * x * x)
@jit_fuser
def geglu_back(g, y):
    """Computes the gradient for the GEGLU activation.

    Args:
        g (torch.Tensor): Gradient tensor from the subsequent layer.
        y (torch.Tensor): Input tensor that was used in the forward pass.

    Returns:
        torch.Tensor: Gradient with respect to the input tensor.
    """
    y_1, y_2 = torch.chunk(y, 2, -1)
    tanh_out = torch.tanh(0.79788456 * y_1 * (1 + 0.044715 * y_1 * y_1))
    # sqrt(2/pi) * 3 * 0.044715 -> 0.1070322243
    ff = 0.5 * y_1 * ((1 - tanh_out * tanh_out) * (0.79788456 + 0.1070322243 * y_1 * y_1)) + 0.5 * (
        1 + tanh_out
    )
    return torch.cat(((g * y_2) * ff, g * (y_1 * 0.5 * (1.0 + tanh_out))), -1)


@jit_fuser
def bias_geglu_back(g, y, bias):
    """Computes the gradient for the biased GEGLU activation.

    Args:
        g (torch.Tensor): Gradient tensor from the subsequent layer.
        y (torch.Tensor): Input tensor that was used in the forward pass.
        bias (torch.Tensor): Bias tensor that was added in the forward pass.

    Returns:
        torch.Tensor: Gradient with respect to the input tensor after bias addition.
    """
    y = y + bias
    return geglu_back(g, y)


class BiasGeGLUFunction(torch.autograd.Function):
    """Custom autograd function for GEGLU activation with bias support."""

    @staticmethod
    # bias is an optional argument
    def forward(ctx, input, bias):
        """Forward pass of biased GEGLU activation.

        Args:
            ctx: Autograd context object for saving tensors for backward pass.
            input (torch.Tensor): Input tensor to apply GEGLU to.
            bias (torch.Tensor): Bias tensor to be added to input before GEGLU.

        Returns:
            torch.Tensor: Result of applying bias addition followed by GEGLU activation.
        """
        ctx.save_for_backward(input, bias)
        return bias_geglu(input, bias)

    @staticmethod
    def backward(ctx, grad_output):
        """Backward pass of biased GEGLU activation.

        Args:
            ctx: Autograd context object containing saved tensors from forward pass.
            grad_output (torch.Tensor): Gradient of the loss with respect to the output.

        Returns:
            tuple: Tuple containing gradients with respect to the input and bias tensors.
        """
        input, bias = ctx.saved_tensors
        tmp = bias_geglu_back(grad_output, input, bias)
        return tmp, tmp


class GeGLUFunction(torch.autograd.Function):
    """Custom autograd function for GEGLU activation without bias."""

    @staticmethod
    # bias is an optional argument
    def forward(ctx, input):
        """Forward pass of GEGLU activation.

        Args:
            ctx: Autograd context object for saving tensors for backward pass.
            input (torch.Tensor): Input tensor to apply GEGLU to.

        Returns:
            torch.Tensor: Result of applying GEGLU activation.
        """
        ctx.save_for_backward(input)
        return geglu(input)

    @staticmethod
    def backward(ctx, grad_output):
        """Backward pass of GEGLU activation.

        Args:
            ctx: Autograd context object containing saved tensors from forward pass.
            grad_output (torch.Tensor): Gradient of the loss with respect to the output.

        Returns:
            torch.Tensor: Gradient with respect to the input tensor.
        """
        input = ctx.saved_tensors
        tmp = geglu_back(grad_output, input[0])
        return tmp


def bias_geglu_impl(input, bias):
    """Implementation of biased GEGLU that handles different input shapes.

    This function reshapes the input if necessary, applies the GEGLU activation
    (with or without bias), and restores the original shape.

    Args:
        input (torch.Tensor): Input tensor to apply GEGLU activation.
        bias (torch.Tensor, optional): Bias tensor to be added to input. If None,
            uses the bias-free GEGLU variant.

    Returns:
        torch.Tensor: Result of biased GEGLU activation.

    Raises:
        AssertionError: If input tensor does not have 2 or 3 dimensions.
    """
    ori_shape = input.shape
    assert len(ori_shape) in [2, 3]
    input = input.view(-1, ori_shape[-1])
    if bias is not None:
        output = BiasGeGLUFunction.apply(input, bias)
    else:
        output = GeGLUFunction.apply(input)

    return output if len(ori_shape) == 2 else output.view(ori_shape[0], ori_shape[1], -1)


# ------------------------- QUICK GEGLU FUSION --------------------------


@jit_fuser
def quick_gelu(y: torch.Tensor) -> torch.Tensor:
    """Sigmoid approximation of gelu"""
    return y * torch.sigmoid(1.702 * y)


@jit_fuser
def quick_geglu(y: torch.Tensor, linear_offset: float = 0.0) -> torch.Tensor:
    """Performs Quick-GELU-based GEGLU activation : quick_gelu(y1) * (y2 + offset).

    Args:
        y: Input tensor split into two halves on the last dimension.
        linear_offset: Optional linear offset added to the second half before gating.

    Returns:
        Tensor after applying the GEGLU activation.
    """
    y_1, y_2 = torch.chunk(y, 2, dim=-1)
    return quick_gelu(y_1) * (y_2 + linear_offset)


@jit_fuser
def weighted_quick_geglu(
    y: torch.Tensor, weights: torch.Tensor, linear_offset: float = 0.0
) -> torch.Tensor:
    """Token-wise-weighted Quick-GEGLU activation.

    The weights tensor is expected to have the same first-dimension length as ``y`` and a trailing
    singleton dimension so that it broadcasts over the feature dimension.
    """
    dtype = y.dtype
    res = quick_geglu(y, linear_offset) * weights
    return res.to(dtype)


# gradient of sigmoid approximation of gelu
@jit_fuser
def quick_geglu_back(g, y, linear_offset: float = 0.0) -> torch.Tensor:
    """Backward helper for Quick-GEGLU.

    Args:
        g (torch.Tensor): Upstream gradient tensor.
        y (torch.Tensor): Input tensor used in the forward pass.
        linear_offset (float, optional): Linear offset used in the forward pass. Defaults to 0.0.

    Returns:
        torch.Tensor: Gradient with respect to the input tensor.
    """
    y_1, y_2 = torch.chunk(y, 2, -1)
    sigmoid_out = torch.sigmoid(1.702 * y_1)
    dy_1 = g * sigmoid_out * (1 + 1.702 * y_1 * (1 - sigmoid_out)) * (y_2 + linear_offset)
    dy_2 = g * y_1 * sigmoid_out
    return torch.cat((dy_1, dy_2), -1)


@jit_fuser
def weighted_quick_geglu_back(g, y, weights, linear_offset: float = 0.0):
    """Backward helper for weighted Quick-GEGLU.
    Returns gradient w.r.t input `y` and `weights`.
    """
    input_dtype = y.dtype
    w_dtype = weights.dtype
    # Gradient w.r.t input uses the chain rule with weighting.
    input_grad = quick_geglu_back(g * weights, y, linear_offset)
    # Gradient w.r.t weights is the activation times upstream grad (cast to weight dtype).
    weights_grad = quick_geglu(y, linear_offset) * g.to(w_dtype)
    # Sum across the feature dimension to keep weights shape `[tokens, 1]`.
    weights_grad = torch.sum(weights_grad, dim=-1, keepdim=True)
    return input_grad.to(input_dtype), weights_grad.to(w_dtype)


# ---------------- Weighted Bias Quick-GEGLU helpers -----------------


@jit_fuser
def weighted_bias_quick_geglu(
    y: torch.Tensor, bias: torch.Tensor, weights: torch.Tensor, linear_offset: float = 0.0
) -> torch.Tensor:
    """Token-wise weighted Quick-GEGLU activation with bias.

    Args:
        y: Input tensor before bias addition.
        bias: Bias tensor broadcastable to `y`.
        weights: Weight tensor with shape `[tokens, 1]` broadcasting over feature dim.
        linear_offset: Optional linear offset for the second half before gating.

    Returns:
        Activated tensor with same dtype as `y`.
    """
    dtype = y.dtype
    res = quick_geglu(y + bias, linear_offset) * weights
    return res.to(dtype)


@jit_fuser
def weighted_bias_quick_geglu_back(g, y, bias, weights, linear_offset: float = 0.0):
    """Backward helper for weighted Quick-GEGLU with bias.

    Returns gradients w.r.t input `y`, `bias`, and `weights`.
    """
    input_dtype = y.dtype
    w_dtype = weights.dtype

    # Forward input with bias
    x = y + bias

    # Gradient w.r.t input (and thus bias) via chain rule
    input_grad = quick_geglu_back(g * weights, x, linear_offset)

    # Gradient w.r.t weights
    weights_grad = quick_geglu(x, linear_offset) * g.to(w_dtype)
    weights_grad = torch.sum(weights_grad, dim=-1, keepdim=True)

    # bias gradient identical to input gradient
    bias_grad = input_grad

    return input_grad.to(input_dtype), bias_grad.to(input_dtype), weights_grad.to(w_dtype)


class WeightedQuickGeGLUFunction(torch.autograd.Function):
    """Autograd function for token-wise weighted Quick-GEGLU (no bias)."""

    @staticmethod
    def forward(
        ctx,
        input: torch.Tensor,
        weights: torch.Tensor,
        fp8_input_store: bool,
        linear_offset: torch.Tensor,
    ):
        """Forward pass of weighted Quick-GEGLU.

        Args:
            ctx: Autograd context object for saving tensors for backward pass.
            input (torch.Tensor): Input tensor of shape [N, 2H].
            weights (torch.Tensor): Per-token weights of shape [N, 1].
            fp8_input_store (bool): If True, stores input for backward in FP8.
            linear_offset (torch.Tensor): Scalar tensor offset added to the linear half.

        Returns:
            torch.Tensor: Output tensor of shape [N, H] after weighted Quick-GEGLU.
        """
        input_for_backward = input.to(torch.float8_e4m3fn) if fp8_input_store else input
        ctx.save_for_backward(input_for_backward, weights, linear_offset)
        ctx.ori_input_dtype = input.dtype
        ctx.fp8_input_store = fp8_input_store
        return weighted_quick_geglu(input, weights, linear_offset)

    @staticmethod
    def backward(ctx, grad_output):
        """Backward pass of weighted Quick-GEGLU.

        Args:
            ctx: Autograd context object containing saved tensors from forward pass.
            grad_output (torch.Tensor): Upstream gradient w.r.t. the output.

        Returns:
            tuple: Gradients with respect to (input, weights, fp8_input_store, linear_offset).
                The latter two gradients are None.
        """
        input, weights, linear_offset = ctx.saved_tensors
        input = input.to(ctx.ori_input_dtype) if ctx.fp8_input_store else input
        input_grad, wgrad = weighted_quick_geglu_back(grad_output, input, weights, linear_offset)
        return input_grad, wgrad, None, None


class WeightedBiasQuickGeGLUFunction(torch.autograd.Function):
    """Autograd function for token-wise weighted Quick-GEGLU with bias support."""

    @staticmethod
    def forward(
        ctx,
        input: torch.Tensor,
        bias: torch.Tensor,
        weights: torch.Tensor,
        fp8_input_store: bool,
        linear_offset: torch.Tensor,
    ):
        """Forward pass of weighted Quick-GEGLU.

        Args:
            ctx: Autograd context object for saving tensors for backward pass.
            input (torch.Tensor): Input tensor of shape [N, 2H].
            bias (torch.Tensor): Bias tensor of shape [N, 1].
            weights (torch.Tensor): Per-token weights of shape [N, 1].
            fp8_input_store (bool): If True, stores input for backward in FP8.
            linear_offset (torch.Tensor): Scalar tensor offset added to the linear half.

        Returns:
            torch.Tensor: Output tensor of shape [N, H] after weighted Quick-GEGLU with bias.
        """
        # Optionally store the input in FP8 for memory savings.
        input_for_backward = input.to(torch.float8_e4m3fn) if fp8_input_store else input

        # Save tensors for backward.
        ctx.save_for_backward(input_for_backward, bias, weights, linear_offset)
        ctx.ori_input_dtype = input.dtype
        ctx.fp8_input_store = fp8_input_store

        # Compute activation using fused helper that includes bias and weighting.
        return weighted_bias_quick_geglu(input, bias, weights, linear_offset)

    @staticmethod
    def backward(ctx, grad_output):
        """Backward pass of weighted Quick-GEGLU with bias.

        Args:
            ctx: Autograd context object containing saved tensors from forward pass.
            grad_output (torch.Tensor): Upstream gradient w.r.t. the output.

        Returns:
            tuple: Gradients with respect to (input, bias, weights, fp8_input_store, linear_offset).
                The latter two gradients are None.
        """
        input, bias, weights, linear_offset = ctx.saved_tensors

        # Restore original input dtype if it was stored in FP8.
        input = input.to(ctx.ori_input_dtype) if ctx.fp8_input_store else input

        input_grad, bias_grad, weights_grad = weighted_bias_quick_geglu_back(
            grad_output, input, bias, weights, linear_offset
        )

        return input_grad, bias_grad, weights_grad, None, None


def weighted_bias_quick_geglu_impl(
    input, bias, weights, fp8_input_store=False, linear_offset=0.0, clamp_value=None
):
    """
    Token-wise-weighted bias quick_geglu fusion.
        input: [num_selected_experts * seq_len, hidden_size * 2]
        bias: None
        weights: [num_selected_experts * seq_len, 1]
        fp8_input_store: bool
        linear_offset: float
        output: [num_selected_experts * seq_len, hidden_size]
    """
    ori_shape = input.shape
    assert len(ori_shape) in [2, 3]
    if clamp_value is not None:
        x_glu, x_linear = input.chunk(2, -1)
        input = torch.cat(
            (
                x_glu.clamp(min=None, max=clamp_value),
                x_linear.clamp(min=-clamp_value, max=clamp_value),
            ),
            -1,
        )
    input = input.view(-1, ori_shape[-1])
    linear_offset = torch.tensor(linear_offset, dtype=input.dtype, device=input.device)
    if bias is not None:
        output = WeightedBiasQuickGeGLUFunction.apply(
            input, bias, weights, fp8_input_store, linear_offset
        )
    else:
        output = WeightedQuickGeGLUFunction.apply(input, weights, fp8_input_store, linear_offset)

    return output if len(ori_shape) == 2 else output.view(ori_shape[0], ori_shape[1], -1)


================================================
FILE: megatron/core/fusions/fused_bias_gelu.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

import torch

from megatron.core.jit import jit_fuser

# BIAS GELU FUSION/ NO AUTOGRAD ################
# 1/sqrt(2*pi)-> 0.3989423
# 1/sqrt(2)   -> 0.70710678
# sqrt(2/pi)  -> 0.79788456
# this function is tanh approximation of gelu
# actual gelu is:
# x * 0.5 * (1.0 + torch.erf(x * 0.70710678))


@jit_fuser
def bias_gelu(bias, y):
    x = bias + y
    return x * 0.5 * (1.0 + torch.tanh(0.79788456 * x * (1 + 0.044715 * x * x)))


# gradient of tanh approximation of gelu
# gradient of actual gelu is:
# 0.5 * (1. + torch.erf(x * 0.70710678)) + 0.3989423 * x * torch.exp(-0.5 * x * x)
@jit_fuser
def bias_gelu_back(g, bias, y):
    x = bias + y
    tanh_out = torch.tanh(0.79788456 * x * (1 + 0.044715 * x * x))
    # sqrt(2/pi) * 3 * 0.044715 -> 0.1070322243
    ff = 0.5 * x * ((1 - tanh_out * tanh_out) * (0.79788456 + 0.1070322243 * x * x)) + 0.5 * (
        1 + tanh_out
    )
    return ff * g


class GeLUFunction(torch.autograd.Function):
    @staticmethod
    # bias is an optional argument
    def forward(ctx, input, bias):
        ctx.save_for_backward(input, bias)
        return bias_gelu(bias, input)

    @staticmethod
    def backward(ctx, grad_output):
        input, bias = ctx.saved_tensors
        tmp = bias_gelu_back(grad_output, bias, input)
        return tmp, tmp

    # This is required to make Sphinx happy :-(
    @classmethod
    def apply(cls, *args, **kwargs):
        return super().apply(*args, **kwargs)


bias_gelu_impl = GeLUFunction.apply


================================================
FILE: megatron/core/fusions/fused_bias_swiglu.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


# pylint: disable=missing-function-docstring, missing-class-docstring

import torch
import torch.nn.functional as F

from megatron.core.jit import jit_fuser
from megatron.core.utils import nvtx_decorator

###### BIAS SWIGLU FUSION/ NO AUTOGRAD ################


@jit_fuser
def swiglu(y):
    """Performs SwiGLU (Swish-Gated Linear Unit) activation function.

    Args:
        y (torch.Tensor): Input tensor to be split into two halves along the last dimension.

    Returns:
        torch.Tensor: Result of SwiGLU activation: SiLU(y1) * y2, where y1, y2 are the split halves.
    """
    y_1, y_2 = torch.chunk(y, 2, -1)
    return F.silu(y_1) * y_2


@jit_fuser
def bias_swiglu(y, bias):
    """Performs SwiGLU activation with bias addition.

    Args:
        y (torch.Tensor): Input tensor.
        bias (torch.Tensor): Bias tensor to be added to input.

    Returns:
        torch.Tensor: Result of bias addition followed by SwiGLU activation.
    """
    y = y + bias
    return swiglu(y)


@jit_fuser
def weighted_swiglu(y, weights):
    dtype = y.dtype
    res = swiglu(y) * weights
    return res.to(dtype)


# gradient of tanh approximation of gelu
# gradient of actual gelu is:
# 0.5 * (1. + torch.erf(x * 0.70710678)) + 0.3989423 * x * torch.exp(-0.5 * x * x)
@jit_fuser
def swiglu_back(g, y):
    """Computes the gradient for the SwiGLU activation function.

    Args:
        g (torch.Tensor): Gradient tensor from the subsequent layer.
        y (torch.Tensor): Input tensor that was used in the forward pass.

    Returns:
        torch.Tensor: Gradient with respect to the input tensor, computed using the
            chain rule and the derivative of the SiLU activation function.
    """
    y_1, y_2 = torch.chunk(y, 2, -1)
    return torch.cat(
        (g * torch.sigmoid(y_1) * (1 + y_1 * (1 - torch.sigmoid(y_1))) * y_2, g * F.silu(y_1)), -1
    )


@jit_fuser
def bias_swiglu_back(g, y, bias):
    """Computes the gradient for the biased SwiGLU activation function.

    Args:
        g (torch.Tensor): Gradient tensor from the subsequent layer.
        y (torch.Tensor): Input tensor that was used in the forward pass.
        bias (torch.Tensor): Bias tensor that was added in the forward pass.

    Returns:
        torch.Tensor: Gradient with respect to the input tensor, computed after
            applying the bias addition.
    """
    y = y + bias
    return swiglu_back(g, y)


@jit_fuser
def weighted_swiglu_back(g, y, weights):
    input_dtype = y.dtype
    w_dtype = weights.dtype
    input_grad = swiglu_back(g * weights, y)
    # precison of w may be higher than y and g, so we need to cast g to w_dtype
    weights_grad = swiglu(y) * g.to(w_dtype)
    weights_grad = torch.sum(weights_grad, dim=-1, keepdim=True)
    return input_grad.to(input_dtype), weights_grad.to(w_dtype)


class BiasSwiGLUFunction(torch.autograd.Function):
    """Custom autograd function for SwiGLU activation with bias support."""

    @staticmethod
    @nvtx_decorator()
    def forward(ctx, input, bias, fp8_input_store, cpu_offload_input):
        """Forward pass of biased SwiGLU activation.

        Args:
            ctx: Autograd context object for saving tensors for backward pass.
            input (torch.Tensor): Input tensor to apply SwiGLU to.
            bias (torch.Tensor): Bias tensor to be added to input before SwiGLU.
            fp8_input_store (bool): If True, stores intermediate values in FP8 format.

        Returns:
            torch.Tensor: Result of applying bias addition followed by SwiGLU activation.
        """
        input_for_backward = input.to(torch.float8_e4m3fn) if fp8_input_store else input
        if cpu_offload_input:
            input_for_backward.activation_offloading = True
            bias.activation_offloading = True
        ctx.save_for_backward(input_for_backward, bias)
        ctx.ori_input_dtype = input.dtype
        ctx.fp8_input_store = fp8_input_store
        return bias_swiglu(input, bias)

    @staticmethod
    @nvtx_decorator()
    def backward(ctx, grad_output):
        """Backward pass of biased SwiGLU activation.

        Args:
            ctx: Autograd context object containing saved tensors from forward pass.
            grad_output (torch.Tensor): Gradient of the loss with respect to the output.

        Returns:
            tuple: Tuple containing:
                - Gradient with respect to the input tensor
                - Gradient with respect to the bias tensor
                - None for fp8_input_store parameter
        """
        input, bias = ctx.saved_tensors
        input = input.to(ctx.ori_input_dtype) if ctx.fp8_input_store else input
        tmp = bias_swiglu_back(grad_output, input, bias)
        return tmp, tmp, None, None


class SwiGLUFunction(torch.autograd.Function):
    """Custom autograd function for SwiGLU activation without bias."""

    @staticmethod
    @nvtx_decorator()
    def forward(ctx, input, fp8_input_store, cpu_offload_input):
        """Forward pass of SwiGLU activation.

        Args:
            ctx: Autograd context object for saving tensors for backward pass.
            input (torch.Tensor): Input tensor to apply SwiGLU to.
            fp8_input_store (bool): If True, stores intermediate values in FP8 format.

        Returns:
            torch.Tensor: Result of applying SwiGLU activation.
        """
        input_for_backward = input.to(torch.float8_e4m3fn) if fp8_input_store else input
        if cpu_offload_input:
            input_for_backward.activation_offloading = True
        ctx.save_for_backward(input_for_backward)
        ctx.ori_input_dtype = input.dtype
        ctx.fp8_input_store = fp8_input_store
        return swiglu(input)

    @staticmethod
    @nvtx_decorator()
    def backward(ctx, grad_output):
        """Backward pass of SwiGLU activation.

        Args:
            ctx: Autograd context object containing saved tensors from forward pass.
            grad_output (torch.Tensor): Gradient of the loss with respect to the output.

        Returns:
            tuple: Tuple containing:
                - Gradient with respect to the input tensor
                - None for fp8_input_store parameter
        """
        input = ctx.saved_tensors[0]
        input = input.to(ctx.ori_input_dtype) if ctx.fp8_input_store else input
        tmp = swiglu_back(grad_output, input)
        return tmp, None, None


class WeightedSwiGLUFunction(torch.autograd.Function):
    @staticmethod
    # bias is an optional argument
    def forward(ctx, input, weights, fp8_input_store):
        input_for_backward = input.to(torch.float8_e4m3fn) if fp8_input_store else input
        ctx.save_for_backward(input_for_backward, weights)
        ctx.ori_input_dtype = input.dtype
        ctx.fp8_input_store = fp8_input_store
        return weighted_swiglu(input, weights)

    @staticmethod
    def backward(ctx, grad_output):
        input, weights = ctx.saved_tensors
        input = input.to(ctx.ori_input_dtype) if ctx.fp8_input_store else input
        tmp, wgrad = weighted_swiglu_back(grad_output, input, weights)
        return tmp, wgrad, None


def bias_swiglu_impl(input, bias, fp8_input_store=False, cpu_offload_input=False):
    """Implementation of biased SwiGLU that handles different input shapes.

    This function reshapes the input if necessary, applies the SwiGLU activation
    (with or without bias), and restores the original shape.

    Args:
        input (torch.Tensor): Input tensor to apply SwiGLU activation.
        bias (torch.Tensor, optional): Bias tensor to be added to input. If None,
            uses the bias-free SwiGLU variant.
        fp8_input_store (bool, optional): Whether to store intermediate values in FP8 format.
            Defaults to False.

    Returns:
        torch.Tensor: Result of biased SwiGLU activation.

    Raises:
        AssertionError: If input tensor does not have 2 or 3 dimensions.
    """
    ori_shape = input.shape
    assert len(ori_shape) in [2, 3]
    input = input.view(-1, ori_shape[-1])
    if bias is not None:
        output = BiasSwiGLUFunction.apply(input, bias, fp8_input_store, cpu_offload_input)
    else:
        output = SwiGLUFunction.apply(input, fp8_input_store, cpu_offload_input)

    return output if len(ori_shape) == 2 else output.view(ori_shape[0], ori_shape[1], -1)


def weighted_bias_swiglu_impl(input, bias, weights, fp8_input_store=False):
    """
    Token-wise-weighted bias swiglu fusion.
    """
    ori_shape = input.shape
    assert len(ori_shape) in [2, 3]
    input = input.view(-1, ori_shape[-1])
    if bias is not None:
        raise NotImplementedError("Bias is not supported for weighted swiglu fusion")
    else:
        output = WeightedSwiGLUFunction.apply(input, weights, fp8_input_store)

    return output if len(ori_shape) == 2 else output.view(ori_shape[0], ori_shape[1], -1)


# bias_swiglu_impl = BiasSwiGLUFunction.apply
# swiglu_impl = SwiGLUFunction.apply


================================================
FILE: megatron/core/fusions/fused_cross_entropy.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from typing import Tuple

import torch

from megatron.core.jit import jit_fuser
from megatron.core.tensor_parallel.cross_entropy import VocabParallelCrossEntropy
from megatron.core.tensor_parallel.utils import VocabUtility


@jit_fuser
def calculate_logits_max(vocab_parallel_logits: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Calculates the maximum logits of the predicted tokens.
    """

    vocab_parallel_logits, logits_max = VocabParallelCrossEntropy.calculate_logits_max(
        vocab_parallel_logits
    )

    return vocab_parallel_logits, logits_max


@jit_fuser
def calculate_predicted_logits(
    vocab_parallel_logits: torch.Tensor,
    target: torch.Tensor,
    logits_max: torch.Tensor,
    vocab_start_index: int,
    vocab_end_index: int,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
    """
    Calculates the predicted logits for the tokens.
    """
    (target_mask, masked_target_1d, predicted_logits, sum_exp_logits, exp_logits) = (
        VocabParallelCrossEntropy.calculate_predicted_logits(
            vocab_parallel_logits, target, logits_max, vocab_start_index, vocab_end_index
        )
    )

    predicted_logits_sum_exp_logits = torch.cat((predicted_logits, sum_exp_logits))

    return target_mask, masked_target_1d, predicted_logits_sum_exp_logits, exp_logits


@jit_fuser
def calculate_cross_entropy_loss(
    exp_logits: torch.Tensor, predicted_logits_sum_exp_logits: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Calculates the final cross entropy loss for the tokens.
    """
    split_val = predicted_logits_sum_exp_logits.size()[0] // 2
    predicted_logits, sum_exp_logits = torch.split(predicted_logits_sum_exp_logits, split_val)

    exp_logits, loss = VocabParallelCrossEntropy.calculate_cross_entropy_loss(
        exp_logits, predicted_logits, sum_exp_logits
    )

    return exp_logits, loss


@jit_fuser
def calculate_gradients(
    softmax: torch.Tensor,
    grad_output: torch.Tensor,
    target_mask: torch.Tensor,
    masked_target_1d: torch.Tensor,
) -> torch.Tensor:
    """
    Calculate the logits gradients scaled based on the CE loss
    """
    (grad_2d, arange_1d, softmax_update, grad_input) = (
        VocabParallelCrossEntropy.prepare_gradient_calculation_operands(softmax, target_mask)
    )

    grad_input = VocabParallelCrossEntropy.calculate_gradients(
        grad_2d, arange_1d, masked_target_1d, softmax_update, grad_input, grad_output
    )

    grad_input = grad_input.to(torch.bfloat16)

    return grad_input


class _VocabParallelCrossEntropy(torch.autograd.Function):
    @staticmethod
    def forward(ctx, vocab_parallel_logits, target, tp_group):
        """
        Forward implementation for the cross entropy loss.
        """
        vocab_parallel_logits, logits_max = calculate_logits_max(vocab_parallel_logits)
        torch.distributed.all_reduce(logits_max, op=torch.distributed.ReduceOp.MAX, group=tp_group)

        # Get the partition's vocab indices
        get_vocab_range = VocabUtility.vocab_range_from_per_partition_vocab_size
        partition_vocab_size = vocab_parallel_logits.size()[-1]
        vocab_start_index, vocab_end_index = get_vocab_range(
            partition_vocab_size, tp_group.rank(), tp_group.size()
        )

        (target_mask, masked_target_1d, predicted_logits_sum_exp_logits, exp_logits) = (
            calculate_predicted_logits(
                vocab_parallel_logits, target, logits_max, vocab_start_index, vocab_end_index
            )
        )

        # All reduce is needed to get the chunks from other GPUs.
        # In the fused case, tensors are batches to invoke a single
        # AllReduce call
        torch.distributed.all_reduce(
            predicted_logits_sum_exp_logits, op=torch.distributed.ReduceOp.SUM, group=tp_group
        )

        exp_logits, loss = calculate_cross_entropy_loss(exp_logits, predicted_logits_sum_exp_logits)

        # Store softmax, target-mask and masked-target for backward pass.
        ctx.save_for_backward(exp_logits, target_mask, masked_target_1d)

        return loss

    @staticmethod
    def backward(ctx, grad_output):
        """
        Backward implementation for the cross entropy loss.
        """
        # Retreive tensors from the forward path.
        softmax, target_mask, masked_target_1d = ctx.saved_tensors

        grad_input = calculate_gradients(softmax, grad_output, target_mask, masked_target_1d)

        return grad_input, None, None


def fused_vocab_parallel_cross_entropy(vocab_parallel_logits, target, tp_group):
    """
    Performs cross entropy loss when logits are split across tensor parallel ranks

    Args:
        vocab_parallel_logits: logits split across tensor parallel ranks
                               dimension is [sequence_length, batch_size, hidden_size]

        target: correct vocab ids of dimseion [sequence_length, micro_batch_size]
        tp_group: the tensor parallel group over which to all reduce

    """
    return _VocabParallelCrossEntropy.apply(vocab_parallel_logits, target, tp_group)


================================================
FILE: megatron/core/fusions/fused_indices_converter.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import math
from unittest.mock import MagicMock

import torch
from packaging import version

from megatron.core.utils import null_decorator

try:
    import triton
    import triton.language as tl

    if version.parse(triton.__version__) < version.parse("3.4.0") and not torch.cuda.is_available():
        HAVE_TRITON = False
    else:
        HAVE_TRITON = tl.constexpr(version.parse(triton.__version__) >= version.parse("2.0.0"))
except ImportError:
    HAVE_TRITON = False

if not HAVE_TRITON:
    triton = MagicMock()
    triton.jit = null_decorator
    triton.autotune = null_decorator
    triton.heuristics = null_decorator
    tl = MagicMock()


# Assign a block to a row([1,topk]), generate a local routing map([1,num_of_local_experts])
@triton.jit
def _indices_to_multihot_kernel(
    indices_ptr,
    probs_in_indices_ptr,
    multihot_indices_ptr,  # bool
    probs_in_multihot_ptr,
    position_map_ptr,
    num_of_local_experts: tl.constexpr,
    num_of_local_experts_next_power_of_2: tl.constexpr,
    topk: tl.constexpr,
    topk_next_power_of_2: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
):
    '''
    Triton kernel for converting indices to multihot representation.

    Input:
        indices: [num_of_tokens, topk]
        probs_in_indices: [num_of_tokens, topk]
    Output:
        multihot_indices: [num_of_tokens, num_of_local_experts]
        probs_in_multihot: [num_of_tokens, num_of_local_experts]

    Assume that topk = 2 , num_of_local_experts = 4, num_of_tokens = 2,
    then the kernel can process the following conversion:

    Input Example:
        indices = [
                [0, 1],
                [1, 2]
            ]
        probs_in_indices = [
                [0.1, 0.2],
                [0.3, 0.4]
            ]
    Output Example:
        multihot_indices = [
                [1, 1, -1, -1],
                [-1, 1, 1, -1]
            ]
        probs_in_multihot = [
                [0.1, 0.2, 0.0, 0.0],
                [0.0, 0.3, 0.4, 0.0]
            ]
    '''
    # Prepare the [0, topk) row
    topk_row = tl.arange(0, topk_next_power_of_2)
    topk_row = tl.where(topk_row < topk, topk_row, -1)
    topk_row_mask = topk_row != -1
    # Prepare the [0, num_of_local_experts) row
    num_exp_row = tl.arange(0, num_of_local_experts_next_power_of_2)
    num_exp_row = tl.where(num_exp_row < num_of_local_experts, num_exp_row, -1)
    num_exp_row_mask = num_exp_row != -1

    # Load a [1, topk] row from the indices buffer
    row_idx = tl.program_id(0)
    indices_row = tl.load(indices_ptr + row_idx * topk + topk_row, mask=topk_row_mask)
    indices_row = tl.where(topk_row_mask, indices_row, -1)
    probs_row = tl.load(probs_in_indices_ptr + row_idx * topk + topk_row, mask=topk_row_mask)

    # Get the position of the each index in the indices_row, which is saved for backwards
    position_row = tl.where(indices_row != -1, topk_row, -1)
    # Mask of the valid indices
    mask = (indices_row != -1) & (indices_row < num_of_local_experts)

    row_idx_offset = row_idx * num_of_local_experts
    # Store to initialize
    tl.store(multihot_indices_ptr + row_idx_offset + num_exp_row, 0, mask=num_exp_row_mask)
    tl.store(probs_in_multihot_ptr + row_idx_offset + num_exp_row, 0, mask=num_exp_row_mask)
    tl.store(position_map_ptr + row_idx_offset + num_exp_row, -1, mask=num_exp_row_mask)
    # Use barrier to make sure the initialization is done
    tl.debug_barrier()
    # Store the indices and probs_in_indices
    tl.store(multihot_indices_ptr + row_idx_offset + indices_row, 1, mask)
    tl.store(probs_in_multihot_ptr + row_idx_offset + indices_row, probs_row, mask)
    # Store the position of the position_row for backwards
    tl.store(position_map_ptr + row_idx_offset + indices_row, position_row, mask)


# Assign a block to a row([1,topk]), generate a probs_indices([1,topk])
@triton.jit
def _multihot_to_indices_kernel(
    probs_in_multihot_ptr,
    position_map_ptr,
    probs_indices_ptr,
    num_of_local_experts: tl.constexpr,
    num_of_local_experts_next_power_of_2: tl.constexpr,
    topk: tl.constexpr,
    topk_next_power_of_2: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
):
    '''
    Triton kernel for converting multihot representation to indices.

    Input:
        probs_in_multihot: [num_of_tokens, num_of_local_experts]
        position_map: [num_of_tokens, num_of_local_experts]
    Output:
        probs_indices: [num_of_tokens, topk]

    Assume that topk = 2 , num_of_local_experts = 4, num_of_tokens = 2,
    then the kernel can process the following conversion:

    Input Example:
        probs_in_multihot = [
                [0.7, 0.8, 0.0, 0.0],
                [0.0, 0.1, 0.9, 0.0]
            ]
        position_map = [
                [1, 1, -1, -1],
                [-1, 1, 1, -1]
            ]
    Output Example:
        probs_indices = [
                [0.7, 0.8],
                [0.1, 0.9]
            ]
    '''
    # Prepare the [0, topk) row
    topk_row = tl.arange(0, topk_next_power_of_2)
    topk_row = tl.where(topk_row < topk, topk_row, -1)
    topk_row_mask = topk_row != -1
    # Prepare the [0, num_of_local_experts) row
    num_exp_row = tl.arange(0, num_of_local_experts_next_power_of_2)
    num_exp_row = tl.where(num_exp_row < num_of_local_experts, num_exp_row, -1)
    num_exp_row_mask = num_exp_row != -1

    # Load a [1, num_of_local_experts] row from the local routing map
    row_idx = tl.program_id(0)
    ptr_offset = row_idx * num_of_local_experts + num_exp_row
    probs_in_multihot_row = tl.load(probs_in_multihot_ptr + ptr_offset, mask=num_exp_row_mask)

    # Get the original position of the valid value in the the indices
    position_map_row = tl.load(position_map_ptr + ptr_offset, mask=num_exp_row_mask)
    position_map_row = tl.where(num_exp_row_mask, position_map_row, -1)
    mask = position_map_row != -1

    # Store to initialize
    tl.store(probs_indices_ptr + row_idx * topk + topk_row, 0, mask=topk_row_mask)
    # Use barrier to make sure the initialization is done
    tl.debug_barrier()
    # Restore the indices and probs_indices
    tl.store(probs_indices_ptr + row_idx * topk + position_map_row, probs_in_multihot_row, mask)


class IndicesToMultihot(torch.autograd.Function):
    """Convert moe topk indices to multihot representation.

    This class implements a custom forward and backward propagation
    operation for efficiently converting indices to multihot
    representation.
    It is an experimental feature and may change in future versions.
    """

    @staticmethod
    def forward(ctx, indices, probs_indices, num_of_local_experts):
        '''Forward function for IndicesToMultihot

        Convert indices to multihot representation.

        Args:
            indices: [num_of_tokens, topk]
            probs_indices: [num_of_tokens, topk]
            num_of_local_experts: int

        Returns:
            multihot_indices: [num_of_tokens, num_of_local_experts]
            probs_in_multihot: [num_of_tokens, num_of_local_experts]
        '''
        num_of_tokens = indices.shape[0]
        assert (
            indices.shape == probs_indices.shape
        ), "indices and probs_indices must have the same shape"
        topk = indices.shape[1]
        multihot_indices = torch.empty(
            (num_of_tokens, num_of_local_experts), dtype=torch.bool, device="cuda"
        )
        probs_in_multihot = torch.empty(
            (num_of_tokens, num_of_local_experts), dtype=probs_indices.dtype, device="cuda"
        )
        position_map = torch.empty(
            (num_of_tokens, num_of_local_experts), dtype=torch.int32, device="cuda"
        )
        # Compute the next power of 2 for the topk and num_of_local_experts
        topk_next_power_of_2 = 2 ** int(math.ceil(math.log2(topk)))
        num_of_local_experts_next_power_of_2 = 2 ** int(math.ceil(math.log2(num_of_local_experts)))
        grid = (num_of_tokens,)
        _indices_to_multihot_kernel[grid](
            indices,
            probs_indices,
            multihot_indices,
            probs_in_multihot,
            position_map,
            num_of_local_experts,
            num_of_local_experts_next_power_of_2,
            topk,
            topk_next_power_of_2,
            BLOCK_SIZE=32,  # use only 1 warp per block
            num_warps=1,
        )

        ctx.save_for_backward(position_map)
        ctx.num_of_tokens = num_of_tokens
        ctx.num_of_local_experts = num_of_local_experts
        ctx.topk = topk
        return multihot_indices, probs_in_multihot

    @staticmethod
    def backward(ctx, grad_multihot_indices, grad_probs_in_multihot):
        '''Backward function for IndicesToMultihot

        Convert multihot probs representation to indices.
        indices is ignored in the backward function.

        Args:
            grad_multihot_indices: [num_of_tokens, num_of_local_experts]
            grad_probs_in_multihot: [num_of_tokens, num_of_local_experts]

        Returns:
            grad_probs_indices: [num_of_tokens, topk]
        '''
        position_map = ctx.saved_tensors[0]
        num_of_tokens = ctx.num_of_tokens
        num_of_local_experts = ctx.num_of_local_experts
        topk = ctx.topk

        # Initialize the gradient of the indices and probs_indices
        grad_probs_indices = torch.empty(
            (num_of_tokens, topk), dtype=grad_probs_in_multihot.dtype, device="cuda"
        )
        # Compute the next power of 2 for the topk and num_of_local_experts
        topk_next_power_of_2 = 2 ** int(math.ceil(math.log2(topk)))
        num_of_local_experts_next_power_of_2 = 2 ** int(math.ceil(math.log2(num_of_local_experts)))

        grid = (num_of_tokens,)
        _multihot_to_indices_kernel[grid](
            # if the grad_probs_in_multihot is all-one/all-zero,
            # overlapping stride will cause error without contiguous()
            grad_probs_in_multihot.contiguous(),
            position_map,
            grad_probs_indices,
            num_of_local_experts,
            num_of_local_experts_next_power_of_2,
            topk,
            topk_next_power_of_2,
            BLOCK_SIZE=32,  # use only 1 warp per block
            num_warps=1,
        )
        return None, grad_probs_indices, None, None


def fused_indices_to_multihot(indices, probs_indices, num_of_local_experts):
    """Convert moe topk indices to multihot representation.

    This function is an experimental feature and may change in future versions.
    """
    return IndicesToMultihot.apply(indices, probs_indices, num_of_local_experts)


================================================
FILE: megatron/core/fusions/fused_layer_norm.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

import importlib
import inspect
import numbers

import torch
from torch import Tensor
from torch.nn import init
from torch.nn.parameter import Parameter

from megatron.core.transformer import TransformerConfig
from megatron.core.utils import make_viewless_tensor

try:
    from apex.contrib.layer_norm.layer_norm import FastLayerNormFN

    HAVE_PERSIST_LAYER_NORM = True
except ImportError:
    HAVE_PERSIST_LAYER_NORM = False

try:
    from apex.normalization.fused_layer_norm import FusedLayerNormAffineFunction

    HAVE_FUSED_LAYER_NORM = True
except ImportError:
    HAVE_FUSED_LAYER_NORM = False


class FusedLayerNorm(torch.nn.Module):
    """Layer Norm, fused into a single CUDA kernel.

    Args:
      hidden_size (int): Transformer hidden dimension.

      eps (float): Epsilon added to denominator, for numerical stability.

      persist_layer_norm (bool): Use persistent fused layer norm kernel.
      This kernel supports only a set of hidden sizes. Please
      check persist_ln_hidden_sizes if your hidden size is supported.

      zero_centered_gamma (bool): Adjust LayerNorm weights such that they are
      centered around zero. This improves numerical stability.

      config (TransformerConfig): Transformer config. Include to match custom
      layer norm interfaces.

      normalization (str): Normalization type, used for Transformer Engine.
      Must equal 'LayerNorm' here.
    """

    def __init__(
        self,
        config: TransformerConfig,
        hidden_size: int,
        eps: float = 1e-5,
        persist_layer_norm: bool = True,
        zero_centered_gamma: bool = False,
        normalization: str = "LayerNorm",  # included to match TE interface
    ):
        super().__init__()

        self.config = config

        self.zero_centered_gamma = self.config.layernorm_zero_centered_gamma
        assert (
            self.config.normalization == "LayerNorm"
        ), f'({self.config.normalization}) is not supported in FusedLayerNorm'

        # List of hiddens sizes supported in the persistent layer norm kernel
        # If the hidden size is not supported, fall back to the non-persistent
        # kernel.
        persist_ln_hidden_sizes = [
            1024,
            1536,
            2048,
            2304,
            3072,
            3840,
            4096,
            5120,
            6144,
            8192,
            10240,
            12288,
            12800,
            15360,
            16384,
            18432,
            20480,
            24576,
            25600,
            30720,
            32768,
            40960,
            49152,
            65536,
        ]
        persist_layer_norm = self.config.persist_layer_norm
        if hidden_size not in persist_ln_hidden_sizes or not HAVE_PERSIST_LAYER_NORM:
            persist_layer_norm = False

        if not persist_layer_norm and not HAVE_FUSED_LAYER_NORM:
            # TODO: Add pytorch only layer norm
            raise ValueError(f'Apex must be installed to use FusedLayerNorm.')

        if isinstance(hidden_size, numbers.Integral):
            hidden_size = (hidden_size,)
        self.hidden_size = torch.Size(hidden_size)
        self.eps = eps
        # Parameters need to be initialized with torch.empty rather than torch.Tensor for correct device placement with nemo2.
        self.weight = Parameter(torch.empty(*hidden_size))
        self.bias = Parameter(torch.empty(*hidden_size))
        self.reset_parameters()
        self.persist_layer_norm = persist_layer_norm
        self.sequence_parallel = self.config.sequence_parallel

        # set sequence parallelism flag on weight and bias parameters
        setattr(self.weight, 'sequence_parallel', self.sequence_parallel)
        setattr(self.bias, 'sequence_parallel', self.sequence_parallel)

    def reset_parameters(self):

        if self.zero_centered_gamma:
            init.zeros_(self.weight)
            init.zeros_(self.bias)
        else:
            init.ones_(self.weight)
            init.zeros_(self.bias)

    def forward(self, input: Tensor) -> Tensor:

        weight = self.weight + 1 if self.zero_centered_gamma else self.weight

        if self.persist_layer_norm:
            if 'memory_efficient' in inspect.getfullargspec(FastLayerNormFN.forward).args:
                output = FastLayerNormFN.apply(
                    input, weight, self.bias, self.eps, self.config.memory_efficient_layer_norm
                )
            else:
                output = FastLayerNormFN.apply(input, weight, self.bias, self.eps)

            # Apex's fast layer norm function outputs a 'view' tensor (i.e., has
            # a populated '_base' field). This will result in schedule.py's
            # deallocate_output_tensor() throwing an error, so a viewless tensor is
            # created to prevent this.
            output = make_viewless_tensor(
                inp=output, requires_grad=input.requires_grad, keep_graph=True
            )

        else:
            if (
                'memory_efficient'
                in inspect.getfullargspec(FusedLayerNormAffineFunction.forward).args
            ):
                return FusedLayerNormAffineFunction.apply(
                    input,
                    weight,
                    self.bias,
                    self.hidden_size,
                    self.eps,
                    self.config.memory_efficient_layer_norm,
                )
            else:
                return FusedLayerNormAffineFunction.apply(
                    input, weight, self.bias, self.hidden_size, self.eps
                )

        return output


================================================
FILE: megatron/core/fusions/fused_mla_yarn_rope_apply.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from typing import Optional
from unittest.mock import MagicMock

import torch
from packaging import version

from megatron.core.utils import null_decorator

try:
    import triton
    import triton.language as tl

    if version.parse(triton.__version__) < version.parse("3.4.0") and not torch.cuda.is_available():
        HAVE_TRITON = False
    else:
        HAVE_TRITON = tl.constexpr(version.parse(triton.__version__) >= version.parse("2.0.0"))
except ImportError:
    HAVE_TRITON = False

if not HAVE_TRITON:
    triton = MagicMock()
    triton.jit = null_decorator
    triton.autotune = null_decorator
    triton.heuristics = null_decorator
    tl = MagicMock()


@triton.jit
def _get_thd_token_idx(cu_seqlens, pid_m, seq_num, cp_rank, cp_size):
    token_idx = -1
    this_seq_len = 0
    seq_idx = 0
    last_cum_seqlen = tl.load(cu_seqlens) // cp_size
    while seq_idx < seq_num:
        cur_cum_seqlen = tl.load(cu_seqlens + seq_idx + 1) // cp_size
        if token_idx == -1 and cur_cum_seqlen > pid_m:
            token_idx = pid_m - last_cum_seqlen
            this_seq_len = cur_cum_seqlen - last_cum_seqlen
        last_cum_seqlen = cur_cum_seqlen
        seq_idx += 1
    if cp_size > 1:
        if token_idx < this_seq_len // 2:
            token_idx = token_idx + cp_rank * this_seq_len // 2
        else:
            token_idx = (token_idx - this_seq_len // 2) + (
                2 * cp_size - cp_rank - 1
            ) * this_seq_len // 2
    return token_idx


@triton.autotune(
    configs=[
        triton.Config({"BLOCK_H": 1}),
        triton.Config({"BLOCK_H": 2}),
        triton.Config({"BLOCK_H": 4}),
        triton.Config({"BLOCK_H": 8}),
        triton.Config({"BLOCK_H": 16}),
        triton.Config({"BLOCK_H": 32}),
        triton.Config({"BLOCK_H": 64}),
        triton.Config({"BLOCK_H": 128}),
    ],
    key=["emb_dim", "head_num"],
    restore_value=["Q"],
)
@triton.jit
def rotary_fwd_q_kernel(
    Q,
    COS,
    SIN,
    qk_head_dim,
    emb_dim: tl.constexpr,
    head_num: tl.constexpr,
    batch_size,
    seq_num,
    cu_seqlens_q,
    stride_x_seq,
    stride_x_nheads,
    cp_rank,
    cp_size,
    BLOCK_H: tl.constexpr,
):
    """
    Triton kernel of the forward pass for applying YARN RoPE to MLA's query.
    This kernel inplace modifies the input tensor Q.

    Input:
        Q: [seq_len, batch_size, head_num, qk_head_dim + emb_dim]
            or [total_seq_len, head_num, qk_head_dim + emb_dim]
        COS/SIN: [max_seq_len, emb_dim]

        batch_size: batch size for sbhd format, not used for thd format
        seq_num: number of sequences for thd format, not used for sbhd format
        cu_seqlens_q: [seq_num + 1] accumulated sequence lengths for thd format
    """
    pid_m = tl.program_id(axis=0)
    pid_head = tl.program_id(axis=1)

    if cu_seqlens_q is None:
        token_idx = pid_m // batch_size
    else:
        token_idx = _get_thd_token_idx(cu_seqlens_q, pid_m, seq_num, cp_rank, cp_size)

    cos_left = tl.load(COS + token_idx * emb_dim + tl.arange(0, emb_dim // 2))
    sin_left = tl.load(SIN + token_idx * emb_dim + tl.arange(0, emb_dim // 2))
    cos_right = tl.load(COS + token_idx * emb_dim + emb_dim // 2 + tl.arange(0, emb_dim // 2))
    sin_right = tl.load(SIN + token_idx * emb_dim + emb_dim // 2 + tl.arange(0, emb_dim // 2))
    cos_left = cos_left.expand_dims(0).broadcast_to(BLOCK_H, emb_dim // 2)
    sin_left = sin_left.expand_dims(0).broadcast_to(BLOCK_H, emb_dim // 2)
    cos_right = cos_right.expand_dims(0).broadcast_to(BLOCK_H, emb_dim // 2)
    sin_right = sin_right.expand_dims(0).broadcast_to(BLOCK_H, emb_dim // 2)

    Q = Q + pid_m * stride_x_seq + pid_head * BLOCK_H * stride_x_nheads

    x_off = tl.arange(0, BLOCK_H)[:, None] * stride_x_nheads + qk_head_dim
    mask = x_off < head_num * stride_x_nheads
    # x1 = t[..., 0::2], x2 = t[..., 1::2]
    x_1_off = x_off + tl.arange(0, emb_dim // 2)[None, :] * 2
    x_2_off = x_1_off + 1
    x_1 = tl.load(Q + x_1_off, mask=mask)
    x_2 = tl.load(Q + x_2_off, mask=mask)

    x_left = x_1 * cos_left - x_2 * sin_left
    x_right = x_2 * cos_right + x_1 * sin_right

    x_left_off = x_off + tl.arange(0, emb_dim // 2)[None, :]
    x_right_off = x_left_off + emb_dim // 2
    tl.store(Q + x_left_off, x_left, mask=mask)
    tl.store(Q + x_right_off, x_right, mask=mask)


@triton.autotune(
    configs=[
        triton.Config({"BLOCK_H": 1}),
        triton.Config({"BLOCK_H": 2}),
        triton.Config({"BLOCK_H": 4}),
        triton.Config({"BLOCK_H": 8}),
        triton.Config({"BLOCK_H": 16}),
        triton.Config({"BLOCK_H": 32}),
        triton.Config({"BLOCK_H": 64}),
        triton.Config({"BLOCK_H": 128}),
    ],
    key=["emb_dim", "head_num"],
    restore_value=["DO"],
)
@triton.jit
def rotary_bwd_q_kernel(
    DO,
    COS,
    SIN,
    qk_head_dim,
    emb_dim: tl.constexpr,
    head_num: tl.constexpr,
    batch_size,
    seq_num,
    cu_seqlens_q,
    stride_x_seq,
    stride_x_nheads,
    cp_rank,
    cp_size,
    BLOCK_H: tl.constexpr,
):
    """
    Triton kernel of the backward pass for applying YARN RoPE to MLA's query.
    This kernel inplace modifies the input tensor DO.

    Input:
        DO: [seq_len, batch_size, head_num, qk_head_dim + emb_dim]
            or [total_seq_len, head_num, qk_head_dim + emb_dim]
        COS/SIN: [max_seq_len, emb_dim]

        batch_size, seq_num, and cu_seqlens_q are the same as in the forward pass
    """
    pid_m = tl.program_id(axis=0)
    pid_head = tl.program_id(axis=1)

    if cu_seqlens_q is None:
        token_idx = pid_m // batch_size
    else:
        token_idx = _get_thd_token_idx(cu_seqlens_q, pid_m, seq_num, cp_rank, cp_size)

    cos_left = tl.load(COS + token_idx * emb_dim + tl.arange(0, emb_dim // 2))
    sin_left = tl.load(SIN + token_idx * emb_dim + tl.arange(0, emb_dim // 2))
    cos_right = tl.load(COS + token_idx * emb_dim + emb_dim // 2 + tl.arange(0, emb_dim // 2))
    sin_right = tl.load(SIN + token_idx * emb_dim + emb_dim // 2 + tl.arange(0, emb_dim // 2))
    cos_left = cos_left.expand_dims(0).broadcast_to(BLOCK_H, emb_dim // 2)
    sin_left = sin_left.expand_dims(0).broadcast_to(BLOCK_H, emb_dim // 2)
    cos_right = cos_right.expand_dims(0).broadcast_to(BLOCK_H, emb_dim // 2)
    sin_right = sin_right.expand_dims(0).broadcast_to(BLOCK_H, emb_dim // 2)

    DO = DO + pid_m * stride_x_seq + pid_head * BLOCK_H * stride_x_nheads

    x_off = tl.arange(0, BLOCK_H)[:, None] * stride_x_nheads + qk_head_dim
    mask = x_off < head_num * stride_x_nheads
    x_left_off = x_off + tl.arange(0, emb_dim // 2)[None, :]
    x_right_off = x_left_off + emb_dim // 2
    x_left = tl.load(DO + x_left_off, mask=mask)
    x_right = tl.load(DO + x_right_off, mask=mask)

    x_1 = x_left * cos_left + x_right * sin_right
    x_2 = -x_left * sin_left + x_right * cos_right

    x_1_off = x_off + tl.arange(0, emb_dim // 2)[None, :] * 2
    x_2_off = x_1_off + 1
    tl.store(DO + x_1_off, x_1, mask=mask)
    tl.store(DO + x_2_off, x_2, mask=mask)


class ApplyMLARotaryEmbQ(torch.autograd.Function):
    """
    Autograd function for applying YARN RoPE to MLA's query.
    """

    @staticmethod
    def forward(
        ctx,
        q,
        cos,
        sin,
        qk_head_dim,
        emb_dim,
        cu_seqlens_q,
        cp_rank,
        cp_size,
        rotary_interleaved=False,
    ):
        """
        Forward function for ApplyMLARotaryEmbQ.

        Args:
            q: [seq_len, batch_size, head_num, qk_head_dim + emb_dim]
                or [total_seq_len, head_num, qk_head_dim + emb_dim]
            cos/sin: [max_seq_len, 1, 1, emb_dim]
            cu_seqlens_q: [seq_num + 1] accumulated sequence lengths for thd format
            rotary_interleaved: whether to apply RoPE interleaved, only supports False for now
        """
        assert not rotary_interleaved
        max_seqlen = None
        batch_size = None
        seq_num = None
        if cu_seqlens_q is None:
            # sbhd
            max_seqlen, batch_size, nheads, headdim = q.shape
            q = q.view(-1, nheads, headdim)
            total_seqlen = q.shape[0]
        else:
            # thd
            total_seqlen, nheads, headdim = q.shape
            seq_num = len(cu_seqlens_q) - 1
        assert q.stride(-1) == 1
        assert cos.is_contiguous()
        assert sin.is_contiguous()
        assert headdim == qk_head_dim + emb_dim
        assert emb_dim % 4 == 0

        grid = lambda META: (total_seqlen, triton.cdiv(nheads, META["BLOCK_H"]))
        rotary_fwd_q_kernel[grid](
            q,
            cos,
            sin,
            qk_head_dim,
            emb_dim,
            nheads,
            batch_size,
            seq_num,
            cu_seqlens_q,
            q.stride(0),
            q.stride(1),
            cp_rank,
            cp_size,
        )
        ctx.save_for_backward(cos, sin)
        ctx.qk_head_dim = qk_head_dim
        ctx.emb_dim = emb_dim
        ctx.cu_seqlens_q = cu_seqlens_q
        ctx.rotary_interleaved = rotary_interleaved
        ctx.cp_rank = cp_rank
        ctx.cp_size = cp_size
        if cu_seqlens_q is None:
            q = q.view(max_seqlen, batch_size, nheads, headdim)
        return q

    @staticmethod
    def backward(ctx, grad):
        """
        Backward function for ApplyMLARotaryEmbQ.

        Args:
            grad: [seq_len, batch_size, head_num, qk_head_dim + emb_dim]
                or [total_seq_len, head_num, qk_head_dim + emb_dim]
        """
        cos, sin = ctx.saved_tensors
        max_seqlen = None
        batch_size = None
        seq_num = None
        if ctx.cu_seqlens_q is None:
            max_seqlen, batch_size, nheads, headdim = grad.shape
            grad = grad.contiguous().view(-1, nheads, headdim)
            total_seqlen = grad.shape[0]
        else:
            seq_num = len(ctx.cu_seqlens_q) - 1
            total_seqlen, nheads, headdim = grad.shape
        assert grad.stride(-1) == 1

        grid = lambda META: (total_seqlen, triton.cdiv(nheads, META["BLOCK_H"]))
        rotary_bwd_q_kernel[grid](
            grad,
            cos,
            sin,
            ctx.qk_head_dim,
            ctx.emb_dim,
            nheads,
            batch_size,
            seq_num,
            ctx.cu_seqlens_q,
            grad.stride(0),
            grad.stride(1),
            ctx.cp_rank,
            ctx.cp_size,
        )
        if ctx.cu_seqlens_q is None:
            grad = grad.view(max_seqlen, batch_size, nheads, headdim)
        return grad, None, None, None, None, None, None, None, None


def fused_apply_mla_rope_for_q(
    t: torch.Tensor,
    cos: torch.Tensor,
    sin: torch.Tensor,
    qk_head_dim: int,
    emb_dim: int,
    cu_seqlens_q: Optional[torch.Tensor] = None,
    cp_rank: int = 0,
    cp_size: int = 1,
    rotary_interleaved: bool = False,
):
    """
    Fused function for applying YARN RoPE to MLA's query.
    This function inplace modifies the input tensor t.
    Along the last dimension of t, the last emb_dim elements are applied with RoPE.
    The first qk_head_dim elements are not modified.
    It is an experimental feature and may change in future versions.
    It supports both sbhd and thd input formats.

    For the notations below, seq_len is the length of the sequence per batch for sbhd format,
    total_seq_len is the total length of the sequences for thd format.
    max_seq_len is the maximum length of the sequences in the input tensor.

    Args:
        t: [seq_len, batch_size, head_num, qk_head_dim + emb_dim]
            or [total_seq_len, head_num, qk_head_dim + emb_dim]
        cos/sin: [max_seq_len, 1, 1, emb_dim]
        cu_seqlens_q: [seq_num + 1] accumulated sequence lengths for thd format
        rotary_interleaved: whether to apply RoPE interleaved, only supports False for now

    Returns:
        t: inplace modified input tensor
    """
    return ApplyMLARotaryEmbQ.apply(
        t, cos, sin, qk_head_dim, emb_dim, cu_seqlens_q, cp_rank, cp_size, rotary_interleaved
    )


@triton.autotune(
    configs=[
        triton.Config({"BLOCK_H": 1}),
        triton.Config({"BLOCK_H": 2}),
        triton.Config({"BLOCK_H": 4}),
        triton.Config({"BLOCK_H": 8}),
        triton.Config({"BLOCK_H": 16}),
        triton.Config({"BLOCK_H": 32}),
        triton.Config({"BLOCK_H": 64}),
        triton.Config({"BLOCK_H": 128}),
    ],
    key=["emb_dim", "k_dim", "v_dim", "head_num"],
)
@triton.jit
def rotary_fwd_kv_kernel(
    KV,
    K_POS_EMB,
    O_KEY,
    O_VALUE,
    COS,
    SIN,
    emb_dim: tl.constexpr,
    k_dim: tl.constexpr,
    v_dim: tl.constexpr,
    head_num: tl.constexpr,
    batch_size,
    seq_num,
    cu_seqlens_kv,
    stride_kv_seq,
    stride_kv_nheads,
    stride_emb_seq,
    stride_k_seq,
    stride_k_nheads,
    stride_v_seq,
    stride_v_nheads,
    cp_rank,
    cp_size,
    BLOCK_H: tl.constexpr,
):
    """
    Triton kernel of the forward pass for applying YARN RoPE to MLA's key and value.
    It splits the input tensor KV into key and value,
    and concatenates the processed RoPE to the key.

    Input:
        KV: [seq_len, batch_size, head_num, k_dim + v_dim]
            or [total_seq_len, head_num, k_dim + v_dim]
        K_POS_EMB: [seq_len, batch_size, emb_dim] or [total_seq_len, emb_dim]
        COS/SIN: [max_seq_len, emb_dim]

        batch_size: batch size for sbhd format, not used for thd format
        seq_num: number of sequences for thd format, not used for sbhd format
        cu_seqlens_kv: [seq_num + 1] accumulated sequence lengths for thd format

    Output:
        O_KEY: [seq_len, batch_size, head_num, emb_dim + k_dim]
            or [total_seq_len, head_num, emb_dim + k_dim]
        O_VALUE: [seq_len, batch_size, head_num, v_dim] or [total_seq_len, head_num, v_dim]
    """
    pid_m = tl.program_id(axis=0)
    pid_head = tl.program_id(axis=1)

    if cu_seqlens_kv is None:
        token_idx = pid_m // batch_size
    else:
        token_idx = _get_thd_token_idx(cu_seqlens_kv, pid_m, seq_num, cp_rank, cp_size)

    cos_left = tl.load(COS + token_idx * emb_dim + tl.arange(0, emb_dim // 2))
    sin_left = tl.load(SIN + token_idx * emb_dim + tl.arange(0, emb_dim // 2))
    cos_right = tl.load(COS + token_idx * emb_dim + emb_dim // 2 + tl.arange(0, emb_dim // 2))
    sin_right = tl.load(SIN + token_idx * emb_dim + emb_dim // 2 + tl.arange(0, emb_dim // 2))

    KV_ptr = KV + pid_m * stride_kv_seq + pid_head * BLOCK_H * stride_kv_nheads
    kv_off = tl.arange(0, BLOCK_H)[:, None] * stride_kv_nheads
    mask = kv_off < head_num * stride_kv_nheads
    k_in_off = kv_off + tl.arange(0, k_dim)[None, :]
    v_in_off = kv_off + k_dim + tl.arange(0, v_dim)[None, :]
    k = tl.load(KV_ptr + k_in_off, mask=mask)
    v = tl.load(KV_ptr + v_in_off, mask=mask)

    K_ptr = O_KEY + pid_m * stride_k_seq + pid_head * BLOCK_H * stride_k_nheads
    V_ptr = O_VALUE + pid_m * stride_v_seq + pid_head * BLOCK_H * stride_v_nheads

    k_out_off = tl.arange(0, BLOCK_H)[:, None] * stride_k_nheads + tl.arange(0, k_dim)[None, :]
    v_out_off = tl.arange(0, BLOCK_H)[:, None] * stride_v_nheads + tl.arange(0, v_dim)[None, :]
    tl.store(K_ptr + k_out_off, k, mask=mask)
    tl.store(V_ptr + v_out_off, v, mask=mask)

    EMB = K_POS_EMB + pid_m * stride_emb_seq
    # x1 = t[..., 0::2], x2 = t[..., 1::2]
    x_1 = tl.load(EMB + tl.arange(0, emb_dim // 2) * 2)
    x_2 = tl.load(EMB + tl.arange(0, emb_dim // 2) * 2 + 1)

    x_left = x_1 * cos_left - x_2 * sin_left
    x_right = x_2 * cos_right + x_1 * sin_right
    x_left = x_left.expand_dims(0).broadcast_to(BLOCK_H, emb_dim // 2)
    x_right = x_right.expand_dims(0).broadcast_to(BLOCK_H, emb_dim // 2)

    x_left_off = (
        tl.arange(0, BLOCK_H)[:, None] * stride_k_nheads
        + k_dim
        + tl.arange(0, emb_dim // 2)[None, :]
    )
    x_right_off = x_left_off + emb_dim // 2
    tl.store(K_ptr + x_left_off, x_left, mask=mask)
    tl.store(K_ptr + x_right_off, x_right, mask=mask)


@triton.autotune(
    configs=[
        triton.Config({"BLOCK_H": 1}),
        triton.Config({"BLOCK_H": 2}),
        triton.Config({"BLOCK_H": 4}),
        triton.Config({"BLOCK_H": 8}),
        triton.Config({"BLOCK_H": 16}),
        triton.Config({"BLOCK_H": 32}),
        triton.Config({"BLOCK_H": 64}),
        triton.Config({"BLOCK_H": 128}),
    ],
    key=["emb_dim", "k_dim", "v_dim", "head_num"],
)
@triton.jit
def rotary_bwd_kv_kernel(
    dK,
    dV,
    dKV,
    dEMB,
    COS,
    SIN,
    emb_dim: tl.constexpr,
    k_dim: tl.constexpr,
    v_dim: tl.constexpr,
    head_num: tl.constexpr,
    batch_size,
    seq_num,
    cu_seqlens_kv,
    stride_dk_seq,
    stride_dk_nheads,
    stride_dv_seq,
    stride_dv_nheads,
    stride_dkv_seq,
    stride_dkv_nheads,
    stride_demb_seq,
    cp_rank,
    cp_size,
    BLOCK_H: tl.constexpr,
):
    """
    Triton kernel of the backward pass for applying YARN RoPE to MLA's key and value.

    Input:
        dK: [seq_len, batch_size, head_num, emb_dim + k_dim]
            or [total_seq_len, head_num, emb_dim + k_dim]
        dV: [seq_len, batch_size, head_num, v_dim] or [total_seq_len, head_num, v_dim]
        COS/SIN: [max_seq_len, emb_dim]

        batch_size, seq_num, and cu_seqlens_kv are the same as in the forward pass

    Output:
        dKV: [seq_len, batch_size, head_num, k_dim + v_dim]
            or [total_seq_len, head_num, k_dim + v_dim]
        dEMB: [seq_len, batch_size, emb_dim] or [total_seq_len, emb_dim]
    """
    pid_m = tl.program_id(axis=0)
    pid_head = tl.program_id(axis=1)

    if cu_seqlens_kv is None:
        token_idx = pid_m // batch_size
    else:
        token_idx = _get_thd_token_idx(cu_seqlens_kv, pid_m, seq_num, cp_rank, cp_size)

    dKV_ptr = dKV + pid_m * stride_dkv_seq + pid_head * BLOCK_H * stride_dkv_nheads
    dkv_off = tl.arange(0, BLOCK_H)[:, None] * stride_dkv_nheads
    mask = dkv_off < head_num * stride_dkv_nheads
    dk_out_off = dkv_off + tl.arange(0, k_dim)[None, :]
    dv_out_off = dkv_off + k_dim + tl.arange(0, v_dim)[None, :]

    dK_ptr = dK + pid_m * stride_dk_seq + pid_head * BLOCK_H * stride_dk_nheads
    dV_ptr = dV + pid_m * stride_dv_seq + pid_head * BLOCK_H * stride_dv_nheads
    dk_in_off = tl.arange(0, BLOCK_H)[:, None] * stride_dk_nheads + tl.arange(0, k_dim)[None, :]
    dv_in_off = tl.arange(0, BLOCK_H)[:, None] * stride_dv_nheads + tl.arange(0, v_dim)[None, :]
    dk = tl.load(dK_ptr + dk_in_off, mask=mask)
    dv = tl.load(dV_ptr + dv_in_off, mask=mask)
    tl.store(dKV_ptr + dk_out_off, dk, mask=mask)
    tl.store(dKV_ptr + dv_out_off, dv, mask=mask)

    if pid_head == 0:
        x_left_accum = tl.zeros((BLOCK_H, emb_dim // 2), dtype=tl.float32)
        x_right_accum = tl.zeros((BLOCK_H, emb_dim // 2), dtype=tl.float32)
        for i in tl.static_range(triton.cdiv(head_num, BLOCK_H)):
            dK_ptr = dK + pid_m * stride_dk_seq + i * BLOCK_H * stride_dk_nheads
            x_off = tl.arange(0, BLOCK_H)[:, None] * stride_dk_nheads + k_dim
            mask = x_off < head_num * stride_dk_nheads
            x_left_off = x_off + tl.arange(0, emb_dim // 2)[None, :]
            x_right_off = x_left_off + emb_dim // 2
            x_left = tl.load(dK_ptr + x_left_off, mask=mask)
            x_right = tl.load(dK_ptr + x_right_off, mask=mask)
            x_left_accum += x_left
            x_right_accum += x_right
        x_left_accum = tl.sum(x_left_accum, axis=0)
        x_right_accum = tl.sum(x_right_accum, axis=0)
        x_left_accum = x_left_accum.to(dEMB.dtype.element_ty)
        x_right_accum = x_right_accum.to(dEMB.dtype.element_ty)

        cos_left = tl.load(COS + token_idx * emb_dim + tl.arange(0, emb_dim // 2))
        sin_left = tl.load(SIN + token_idx * emb_dim + tl.arange(0, emb_dim // 2))
        cos_right = tl.load(COS + token_idx * emb_dim + emb_dim // 2 + tl.arange(0, emb_dim // 2))
        sin_right = tl.load(SIN + token_idx * emb_dim + emb_dim // 2 + tl.arange(0, emb_dim // 2))

        x_1 = x_left_accum * cos_left + x_right_accum * sin_right
        x_2 = -x_left_accum * sin_left + x_right_accum * cos_right
        dEMB_ptr = dEMB + pid_m * stride_demb_seq
        tl.store(dEMB_ptr + tl.arange(0, emb_dim // 2) * 2, x_1)
        tl.store(dEMB_ptr + tl.arange(0, emb_dim // 2) * 2 + 1, x_2)


class ApplyMLARotaryEmbKV(torch.autograd.Function):
    """
    Autograd function for applying YARN RoPE to MLA's key and value.
    """

    @staticmethod
    def forward(
        ctx,
        kv,
        k_pos_emb,
        cos,
        sin,
        emb_dim,
        k_dim,
        v_dim,
        cu_seqlens_kv,
        cp_rank,
        cp_size,
        rotary_interleaved=False,
    ):
        """
        Forward function for ApplyMLARotaryEmbKV.

        Args:
            kv: [seq_len, batch_size, head_num, k_dim + v_dim]
                or [total_seq_len, head_num, k_dim + v_dim]
            k_pos_emb: [seq_len, batch_size, 1, emb_dim] or [total_seq_len, 1, emb_dim]
            cos/sin: [max_seq_len, 1, 1, emb_dim]
            cu_seqlens_kv: [seq_num + 1] accumulated sequence lengths for thd format
            rotary_interleaved: whether to apply RoPE interleaved, only supports False for now
        """
        assert not rotary_interleaved
        max_seqlen = None
        batch_size = None
        seq_num = None
        if cu_seqlens_kv is None:
            # sbhd
            max_seqlen, batch_size, nheads, headdim = kv.shape
            kv = kv.view(-1, nheads, headdim)
            k_pos_emb = k_pos_emb.view(-1, emb_dim)
            total_seqlen = kv.shape[0]
        else:
            # thd
            seq_num = len(cu_seqlens_kv) - 1
            total_seqlen, nheads, headdim = kv.shape
        assert headdim == k_dim + v_dim
        assert kv.stride(-1) == 1
        assert k_pos_emb.stride(-1) == 1
        assert cos.is_contiguous()
        assert sin.is_contiguous()
        assert emb_dim % 4 == 0

        o_key = kv.new_empty(total_seqlen, nheads, emb_dim + k_dim)
        o_value = kv.new_empty(total_seqlen, nheads, v_dim)

        grid = lambda META: (total_seqlen, triton.cdiv(nheads, META["BLOCK_H"]))
        rotary_fwd_kv_kernel[grid](
            kv,
            k_pos_emb,
            o_key,
            o_value,
            cos,
            sin,
            emb_dim,
            k_dim,
            v_dim,
            nheads,
            batch_size,
            seq_num,
            cu_seqlens_kv,
            kv.stride(0),
            kv.stride(1),
            k_pos_emb.stride(0),
            o_key.stride(0),
            o_key.stride(1),
            o_value.stride(0),
            o_value.stride(1),
            cp_rank,
            cp_size,
        )
        ctx.save_for_backward(cos, sin)
        ctx.rotary_interleaved = rotary_interleaved
        ctx.emb_dim = emb_dim
        ctx.k_dim = k_dim
        ctx.v_dim = v_dim
        ctx.cu_seqlens_kv = cu_seqlens_kv
        ctx.cp_rank = cp_rank
        ctx.cp_size = cp_size
        if cu_seqlens_kv is None:
            o_key = o_key.view(max_seqlen, -1, nheads, emb_dim + k_dim)
            o_value = o_value.view(max_seqlen, -1, nheads, v_dim)
        return o_key, o_value

    @staticmethod
    def backward(ctx, dk, dv):
        """
        Backward function for ApplyMLARotaryEmbKV.

        Args:
            dk: [seq_len, batch_size, head_num, emb_dim + k_dim]
                or [total_seq_len, head_num, emb_dim + k_dim]
            dv: [seq_len, batch_size, head_num, v_dim] or [total_seq_len, head_num, v_dim]
        """
        cos, sin = ctx.saved_tensors
        max_seqlen = None
        batch_size = None
        seq_num = None
        if ctx.cu_seqlens_kv is None:
            # sbhd
            max_seqlen, batch_size, nheads, _ = dk.shape
            dk = dk.contiguous().view(-1, nheads, ctx.emb_dim + ctx.k_dim)
            dv = dv.contiguous().view(-1, nheads, ctx.v_dim)
            total_seqlen = dk.shape[0]
        else:
            # thd
            seq_num = len(ctx.cu_seqlens_kv) - 1
            total_seqlen, nheads, _ = dk.shape
        assert dk.stride(-1) == 1
        assert dv.stride(-1) == 1

        d_kv = dk.new_empty(total_seqlen, nheads, ctx.k_dim + ctx.v_dim)
        d_emb = dk.new_empty(total_seqlen, 1, ctx.emb_dim)

        grid = lambda META: (total_seqlen, triton.cdiv(nheads, META["BLOCK_H"]))
        rotary_bwd_kv_kernel[grid](
            dk,
            dv,
            d_kv,
            d_emb,
            cos,
            sin,
            ctx.emb_dim,
            ctx.k_dim,
            ctx.v_dim,
            nheads,
            batch_size,
            seq_num,
            ctx.cu_seqlens_kv,
            dk.stride(0),
            dk.stride(1),
            dv.stride(0),
            dv.stride(1),
            d_kv.stride(0),
            d_kv.stride(1),
            d_emb.stride(0),
            ctx.cp_rank,
            ctx.cp_size,
        )
        if ctx.cu_seqlens_kv is None:
            d_kv = d_kv.view(max_seqlen, batch_size, nheads, ctx.k_dim + ctx.v_dim)
            d_emb = d_emb.view(max_seqlen, batch_size, 1, ctx.emb_dim)
        return d_kv, d_emb, None, None, None, None, None, None, None, None, None


def fused_apply_mla_rope_for_kv(
    kv: torch.Tensor,
    k_pos_emb: torch.Tensor,
    cos: torch.Tensor,
    sin: torch.Tensor,
    emb_dim: int,
    k_dim: int,
    v_dim: int,
    cu_seqlens_kv: Optional[torch.Tensor] = None,
    cp_rank: int = 0,
    cp_size: int = 1,
    rotary_interleaved: bool = False,
):
    """
    Fused function for applying YARN RoPE to MLA's key and value.
    It splits the input tensor kv into key and value,
    and concatenates the processed RoPE to the key.

    For the notations below, seq_len is the length of sequence per batch for sbhd format,
    total_seq_len is the total length of the sequences for thd format.
    max_seq_len is the maximum length of the sequences in the input tensor.

    Args:
        kv: [seq_len, batch_size, head_num, k_dim + v_dim]
            or [total_seq_len, head_num, k_dim + v_dim]
        k_pos_emb: [seq_len, batch_size, 1, emb_dim] or [total_seq_len, 1, emb_dim]
        cos/sin: [max_seq_len, 1, 1, emb_dim]
        cu_seqlens_kv: [seq_num + 1] accumulated sequence lengths for thd format
        rotary_interleaved: whether to apply RoPE interleaved, only supports False for now

    Returns:
        key: [seq_len, batch_size, head_num, emb_dim + k_dim]
            or [total_seq_len, head_num, emb_dim + k_dim]
        value: [seq_len, batch_size, head_num, v_dim] or [total_seq_len, head_num, v_dim]
    """
    return ApplyMLARotaryEmbKV.apply(
        kv,
        k_pos_emb,
        cos,
        sin,
        emb_dim,
        k_dim,
        v_dim,
        cu_seqlens_kv,
        cp_rank,
        cp_size,
        rotary_interleaved,
    )


================================================
FILE: megatron/core/fusions/fused_pad_routing_map.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from unittest.mock import MagicMock

import torch
from packaging import version

from megatron.core.jit import jit_fuser
from megatron.core.utils import null_decorator

try:
    import triton
    import triton.language as tl

    if version.parse(triton.__version__) < version.parse("3.4.0") and not torch.cuda.is_available():
        HAVE_TRITON = False
    else:
        HAVE_TRITON = tl.constexpr(version.parse(triton.__version__) >= version.parse("2.0.0"))
except ImportError:
    HAVE_TRITON = False

if not HAVE_TRITON:
    triton = MagicMock()
    triton.jit = null_decorator
    triton.autotune = null_decorator
    triton.heuristics = null_decorator
    tl = MagicMock()


@triton.jit
def _pad_routing_map_kernel(
    routing_map_ptr, output_ptr, num_tokens, pad_multiple: tl.constexpr, BLOCK_SIZE: tl.constexpr
):
    expert_idx = tl.program_id(axis=0)

    # Pointers for the current expert's row
    row_offset = expert_idx * num_tokens
    input_row_ptr = routing_map_ptr + row_offset
    output_row_ptr = output_ptr + row_offset

    # Token indices for this block
    token_indices = tl.arange(0, BLOCK_SIZE)
    token_mask = token_indices < num_tokens

    # Load the row for the current expert, masking out-of-bounds elements
    row = tl.load(input_row_ptr + token_indices, mask=token_mask, other=0)

    # 1. Calculate num_ones for the current expert
    # Ensure summation happens correctly even with masking
    # Convert boolean/int row to int if necessary before sum
    num_ones = tl.sum(row.to(tl.int32), axis=0)

    # 2. Calculate num_to_pad for the current expert
    remainder = num_ones % pad_multiple
    num_to_pad = tl.where(remainder != 0, pad_multiple - remainder, 0)

    # 3. Calculate zero ranks using cumsum (vectorized)
    is_zero = row == 0
    # Cast to int32 for cumsum
    zero_ranks = tl.cumsum(is_zero.to(tl.int32), axis=0)

    # 4. Create mask for elements to be flipped to 1
    # Only flip if the element is zero AND its rank is within the padding limit
    mask_to_flip = (zero_ranks <= num_to_pad) & is_zero

    # 5. Determine the output row values
    output_row = tl.where(mask_to_flip, 1, row)

    # 6. Store the result, masking out-of-bounds elements
    tl.store(output_row_ptr + token_indices, output_row, mask=token_mask)


@jit_fuser
def fused_pad_routing_map(routing_map: torch.Tensor, pad_multiple: int) -> torch.Tensor:
    """Fused version of pad_routing_map.
    Args:
        routing_map (torch.Tensor): A boolean or integer tensor of shape [num_tokens,
            num_experts] indicating which tokens are routed to which experts.
        pad_multiple (int): The multiple to pad each expert's token count to.

    Returns:
        torch.Tensor: The padded routing map of shape [num_tokens, num_experts].
    """
    num_tokens, num_experts = routing_map.shape
    if num_tokens == 0:
        return routing_map

    input_map = routing_map.transpose(0, 1).contiguous().int()  # [num_experts, num_tokens]

    output_map = torch.empty_like(input_map)

    # Kernel launch
    grid = (num_experts,)
    BLOCK_SIZE = triton.next_power_of_2(num_tokens)

    _pad_routing_map_kernel[grid](
        input_map, output_map, num_tokens, pad_multiple, BLOCK_SIZE=BLOCK_SIZE
    )

    return output_map.transpose(0, 1)  # [num_tokens, num_experts]


================================================
FILE: megatron/core/fusions/fused_softmax.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
from typing import Optional, Union

import torch
import torch.nn as nn

from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.utils import get_default_causal_mask, get_sliding_window_causal_mask


class ScaledUpperTriangMaskedSoftmax(torch.autograd.Function):
    """
    Fused operation which performs following three operations in sequence
    1. Scale the tensor.
    2. Apply upper triangular mask (typically used in gpt models).
    3. Perform softmax.
    """

    @staticmethod
    def forward(ctx, inputs, scale):
        """Forward pass for scaled upper-triangular masked softmax.

        Args:
            ctx: Autograd context used to stash tensors for backward.
            inputs (torch.Tensor): Input tensor of shape [attn_batches, sq, sk].
            scale (float): Scaling factor applied prior to softmax.

        Returns:
            torch.Tensor: Softmax results after applying scale and causal upper-triangular mask.
        """
        import scaled_upper_triang_masked_softmax_cuda

        scale_t = torch.tensor([scale])
        softmax_results = scaled_upper_triang_masked_softmax_cuda.forward(inputs, scale_t[0])

        ctx.save_for_backward(softmax_results, scale_t)
        return softmax_results

    @staticmethod
    def backward(ctx, output_grads):
        """Backward pass for scaled upper-triangular masked softmax.

        Args:
            ctx: Autograd context containing saved tensors from forward.
            output_grads (torch.Tensor): Upstream gradients matching forward output shape.

        Returns:
            Tuple[torch.Tensor, None]: Gradient with respect to inputs and None for scale.
        """
        import scaled_upper_triang_masked_softmax_cuda

        softmax_results, scale_t = ctx.saved_tensors
        input_grads = scaled_upper_triang_masked_softmax_cuda.backward(
            output_grads, softmax_results, scale_t[0]
        )

        return input_grads, None


class ScaledMaskedSoftmax(torch.autograd.Function):
    """
    Fused operation which performs following three operations in sequence
    1. Scale the tensor.
    2. Apply the mask.
    3. Perform softmax.
    """

    @staticmethod
    def forward(ctx, inputs, mask, scale):
        """Forward pass for scaled masked softmax.

        Args:
            ctx: Autograd context used to stash tensors for backward.
            inputs (torch.Tensor): Input tensor of shape [b, np, sq, sk].
            mask (torch.Tensor): Additive mask broadcastable to inputs.
            scale (float): Scaling factor applied prior to softmax.

        Returns:
            torch.Tensor: Softmax results after applying scale and mask.
        """
        import scaled_masked_softmax_cuda

        scale_t = torch.tensor([scale])

        softmax_results = scaled_masked_softmax_cuda.forward(inputs, mask, scale_t[0])
        ctx.save_for_backward(softmax_results, scale_t)
        return softmax_results

    @staticmethod
    def backward(ctx, output_grads):
        """Backward pass for scaled masked softmax.

        Args:
            ctx: Autograd context containing saved tensors from forward.
            output_grads (torch.Tensor): Upstream gradients matching forward output shape.

        Returns:
            Tuple[torch.Tensor, None, None]: Gradient w.r.t inputs; None for mask and scale.
        """
        import scaled_masked_softmax_cuda

        softmax_results, scale_t = ctx.saved_tensors

        input_grads = scaled_masked_softmax_cuda.backward(output_grads, softmax_results, scale_t[0])
        return input_grads, None, None


class ScaledSoftmax(torch.autograd.Function):
    """
    Fused operation which performs following two operations in sequence
    1. Scale the tensor.
    2. Perform softmax.
    """

    @staticmethod
    def forward(ctx, inputs, scale):
        """Forward pass for scaled softmax (no mask).

        Args:
            ctx: Autograd context used to stash tensors for backward.
            inputs (torch.Tensor): Input tensor of shape [b, np, sq, sk] or [attn_batches, sq, sk].
            scale (float): Scaling factor applied prior to softmax.

        Returns:
            torch.Tensor: Softmax results after applying scale.
        """
        import scaled_softmax_cuda

        scale_t = torch.tensor([scale])

        softmax_results = scaled_softmax_cuda.forward(inputs, scale_t[0])
        ctx.save_for_backward(softmax_results, scale_t)
        return softmax_results

    @staticmethod
    def backward(ctx, output_grads):
        """Backward pass for scaled softmax (no mask).

        Args:
            ctx: Autograd context containing saved tensors from forward.
            output_grads (torch.Tensor): Upstream gradients matching forward output shape.

        Returns:
            Tuple[torch.Tensor, None, None]: Gradient w.r.t inputs; None for unused args.
        """
        import scaled_softmax_cuda

        softmax_results, scale_t = ctx.saved_tensors

        input_grads = scaled_softmax_cuda.backward(output_grads, softmax_results, scale_t[0])
        return input_grads, None, None


class SoftmaxOne(nn.Module):
    r"""
    Softmax-off-by-one function as introduced in
    https://www.evanmiller.org/attention-is-off-by-one.html
    Supports fixed or learnable offset
    """

    def __init__(
        self, dim: Optional[int] = None, denominator_offset: Union[torch.Tensor, float] = 1.0
    ) -> None:
        super().__init__()
        self.dim = dim
        self.denominator_offset = denominator_offset

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """forward pass"""
        # sink: [np] --> [1, np, 1, 1] --> [b, np, sq, 1]
        sink = self.denominator_offset.reshape(1, -1, 1, 1).expand(x.size(0), -1, x.size(2), -1)
        # qk: [b, np, sq, sk] --> [b, np, sq, sk+1]
        qk = torch.cat([x, sink], dim=-1)
        # do softmax, and remove sink token at the end
        ret = torch.softmax(qk, dim=-1)[..., :-1]
        return ret


class FusedScaleMaskSoftmax(nn.Module):
    """
    fused operation: scaling + mask + softmax

    Args:
        input_in_fp16: flag to indicate if input in fp16 data format.
        input_in_bf16: flag to indicate if input in bf16 data format.
        attn_mask_type: attention mask type (pad or causal)
        scaled_masked_softmax_fusion: flag to indicate user want to use softmax fusion
        mask_func: mask function to be applied.
        softmax_in_fp32: if true, softmax in performed at fp32 precision.
        scale: scaling factor used in input tensor scaling.
    """

    def __init__(
        self,
        input_in_fp16,
        input_in_bf16,
        attn_mask_type,
        scaled_masked_softmax_fusion,
        mask_func,
        softmax_in_fp32,
        scale,
        window_size=None,
    ):
        super(FusedScaleMaskSoftmax, self).__init__()
        self.input_in_fp16 = input_in_fp16
        self.input_in_bf16 = input_in_bf16
        assert not (
            self.input_in_fp16 and self.input_in_bf16
        ), "both fp16 and bf16 flags cannot be active at the same time."
        self.input_in_float16 = self.input_in_fp16 or self.input_in_bf16
        self.attn_mask_type = attn_mask_type
        self.scaled_masked_softmax_fusion = scaled_masked_softmax_fusion
        self.mask_func = mask_func
        self.softmax_in_fp32 = softmax_in_fp32
        self.scale = scale
        self.window_size = window_size
        assert self.scale is None or softmax_in_fp32, "softmax should be in fp32 when scaled"

    def forward(
        self,
        input: torch.Tensor,
        mask: Optional[torch.Tensor],
        softmax_offset: Optional[torch.Tensor] = None,
    ):
        """Forward pass of softmax with masked input.

        In case attn_mask_type is causal the mask is generated and None can be passed.
        A user-defined mask is only needed when attn_mask_type is not causal.
        """
        # [b, np, sq, sk]
        assert input.dim() == 4

        if self.is_kernel_available(mask, *input.size()) and softmax_offset is None:
            return self.forward_fused_softmax(input, mask)
        else:
            return self.forward_torch_softmax(input, mask, softmax_offset)

    def is_kernel_available(self, mask, b, np, sq, sk):
        """Check whether the fused CUDA kernel can be used for the given shapes and settings.

        Args:
            mask (Optional[torch.Tensor]): Attention mask or None.
            b (int): Batch size.
            np (int): Number of attention heads per tensor-parallel partition.
            sq (int): Query sequence length.
            sk (int): Key sequence length.

        Returns:
            bool: True if the fused kernel constraints are satisfied; otherwise False.
        """
        attn_batches = b * np

        if (
            self.scaled_masked_softmax_fusion  # user want to fuse
            and self.input_in_float16  # input must be fp16
            and 16 < sk <= 4096  # sk must be 16 ~ 2048
            and sq % 4 == 0  # sq must be divisor of 4
            and sk % 4 == 0  # sk must be divisor of 4
            and attn_batches % 4 == 0  # np * b must be divisor of 4
        ):
            if 0 <= sk <= 4096:
                batch_per_block = self.get_batch_per_block(sq, sk, b, np)

                if self.attn_mask_type == AttnMaskType.causal:
                    if attn_batches % batch_per_block == 0:
                        return True
                else:
                    if sq % batch_per_block == 0:
                        return True
        return False

    def forward_fused_softmax(self, input, mask):
        """Compute softmax using fused CUDA kernels when available.

        Args:
            input (torch.Tensor): Attention scores of shape [b, np, sq, sk].
            mask (Optional[torch.Tensor]): Optional mask for non-causal attention.

        Returns:
            torch.Tensor: Attention probabilities of shape [b, np, sq, sk].
        """
        b, np, sq, sk = input.size()
        scale = self.scale if self.scale is not None else 1.0

        if self.attn_mask_type == AttnMaskType.causal:
            assert sq == sk, "causal mask is only for self attention"

            # input is 3D tensor (attn_batches, sq, sk)
            input = input.view(-1, sq, sk)
            probs = ScaledUpperTriangMaskedSoftmax.apply(input, scale)
            return probs.view(b, np, sq, sk)
        else:
            # input is 4D tensor (b, np, sq, sk)
            if mask is not None:
                return ScaledMaskedSoftmax.apply(input, mask, scale)
            else:
                return ScaledSoftmax.apply(input, scale)

    def forward_torch_softmax(self, input, mask, softmax_offset=None):
        """Fallback PyTorch implementation for masked softmax.

        Applies optional scaling, constructs a causal or sliding-window mask if needed,
        applies the mask, and computes softmax in PyTorch. Optionally casts back to
        float16/bfloat16 when requested.

        Args:
            input (torch.Tensor): Attention scores of shape [b, np, sq, sk].
            mask (Optional[torch.Tensor]): Optional additive mask.

        Returns:
            torch.Tensor: Attention probabilities of shape [b, np, sq, sk].
        """
        if self.input_in_float16 and self.softmax_in_fp32:
            input = input.float()

        if self.scale is not None:
            input = input * self.scale

        # Generate causal mask if not given
        sq, sk = input.size(2), input.size(3)
        if self.window_size is not None:
            mask = get_sliding_window_causal_mask(sq, sk, self.window_size)
        elif self.attn_mask_type == AttnMaskType.causal and mask is None and sq > 1:
            # If sq == 1 then either KV cache is used or one-element context is passed
            # so keeping mask=None in this case; subsequent code should handle it
            assert sq == sk, "causal mask is only for self attention"
            mask = get_default_causal_mask(sq)

        mask_output = self.mask_func(input, mask) if mask is not None else input
        if softmax_offset is None:
            softmax_fn = torch.nn.Softmax(dim=-1)
        else:
            softmax_fn = SoftmaxOne(-1, softmax_offset.to(input.device))

        probs = softmax_fn(mask_output)
        if self.input_in_float16 and self.softmax_in_fp32:
            if self.input_in_fp16:
                probs = probs.half()
            else:
                probs = probs.bfloat16()

        return probs

    @staticmethod
    def get_batch_per_block(sq, sk, b, np):
        """Return CUDA kernel's batch-per-block parameter for masked softmax.

        Args:
            sq (int): Query sequence length.
            sk (int): Key sequence length.
            b (int): Batch size.
            np (int): Number of attention heads per tensor-parallel partition.

        Returns:
            int: Batch-per-block value as computed by the CUDA extension.
        """
        import scaled_masked_softmax_cuda

        return scaled_masked_softmax_cuda.get_batch_per_block(sq, sk, b, np)


================================================
FILE: megatron/core/fusions/fused_weighted_squared_relu.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import torch
import torch.nn.functional as F

from megatron.core.activations import squared_relu
from megatron.core.jit import jit_fuser
from megatron.core.utils import nvtx_decorator

######################  WEIGHTED SQUARED ReLU FUSION  ######################


@jit_fuser
def weighted_squared_relu(x: torch.Tensor, weights: torch.Tensor) -> torch.Tensor:
    """Element-wise weight applied after Squared-ReLU.

    Args:
        x (torch.Tensor): Input tensor.
        weights (torch.Tensor): Weight tensor that will be broadcast-multiplied with the
            activation result. Typically of shape ``(B, 1)`` so it can be broadcast across
            the hidden dimension.

    Returns:
        torch.Tensor: ``squared_relu(x) * weights`` with original ``dtype`` preserved.
    """
    out_dtype = x.dtype
    res = torch.pow(F.relu(x), 2) * weights
    return res.to(out_dtype)


@jit_fuser
def _squared_relu_back(g: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
    """Gradient of Squared-ReLU.

    The derivative of ``(ReLU(x))^2`` w.r.t ``x`` is ``2 * ReLU(x)``.
    """
    return g * 2 * F.relu(x)


@jit_fuser
def weighted_squared_relu_back(g: torch.Tensor, x: torch.Tensor, weights: torch.Tensor):
    """Backward for weighted Squared-ReLU.

    Returns gradients w.r.t ``x`` and ``weights``.
    """
    input_dtype = x.dtype
    w_dtype = weights.dtype

    # Gradient w.r.t. the input.
    input_grad = _squared_relu_back(g * weights, x)

    # Gradient w.r.t. the weights.
    weights_grad = squared_relu(x) * g.to(w_dtype)
    # Sum across the hidden dimension so each token has a single scalar weight.
    weights_grad = torch.sum(weights_grad, dim=-1, keepdim=True)

    return input_grad.to(input_dtype), weights_grad.to(w_dtype)


class WeightedSquaredReLUFunction(torch.autograd.Function):
    """Autograd wrapper around the weighted Squared-ReLU fused kernels."""

    @staticmethod
    @nvtx_decorator()
    def forward(ctx, input: torch.Tensor, weights: torch.Tensor):
        """forward method for `WeightedSquaredReLUFunction`

        Args:
            ctx : context object to store intermediate tensors.
            input (torch.Tensor): input tensor.
            weights (torch.Tensor): weight tensor.
            fp8_input_store (bool): a bool flag to indicate if storing input in fp8.
        """
        ctx.save_for_backward(input, weights)
        return weighted_squared_relu(input, weights)

    @staticmethod
    @nvtx_decorator()
    def backward(ctx, grad_output: torch.Tensor):
        """backward method for `WeightedSquaredReLUFunction`

        Args:
            ctx : context object to store intermediate tensors.
            grad_output (torch.Tensor): gradient of the output of the forward function.
        """
        input, weights = ctx.saved_tensors
        inp_grad, w_grad = weighted_squared_relu_back(grad_output, input, weights)
        return inp_grad, w_grad


def weighted_squared_relu_impl(input: torch.Tensor, weights: torch.Tensor) -> torch.Tensor:
    """Token-wise weighted Squared-ReLU fusion with optional FP8 storage.

    Args:
        input (torch.Tensor): Input tensor of shape ``(B, *, hidden_size)`` where ``*`` can be
            the sequence dimension.
        weights (torch.Tensor): Per-token weights broadcastable to the output of
            ``squared_relu``.

    Returns:
        torch.Tensor: Output tensor with the same shape as ``input`` except that the hidden
            dimension remains unchanged.
    """
    ori_shape = input.shape
    assert len(ori_shape) in [2, 3]
    input = input.view(-1, ori_shape[-1])

    output = WeightedSquaredReLUFunction.apply(input, weights)

    return output if len(ori_shape) == 2 else output.view(ori_shape[0], ori_shape[1], -1)


================================================
FILE: megatron/core/hyper_comm_grid.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import os
from operator import itemgetter
from typing import Any, Optional, Tuple, Union

import numpy as np
import torch.distributed as dist

try:
    import einops

    HAVE_EINOPS = True
except ImportError:
    HAVE_EINOPS = False

try:
    from absl import logging

    HAVE_ABSL = True
except ImportError:
    import logging
    import warnings

    logging = logging.getLogger(__name__)
    warnings.warn(
        "absl.logging is not installed. Using logging.getLogger(__name__) instead. "
        "Please install absl.logging with `pip install absl-py` to use absl.logging."
    )
    HAVE_ABSL = False


class HyperCommGrid:
    r"""N-dimensional communication grid.

    Manages an arbitrary number of parallelisms as a hyperrectangle. Each dimension is given a name
    at initialization time. The order of ``dim_names`` implies the mapping order equivalent to
    the ``order`` argument of MCore's ``initialize_model_parallel``. Internally, it has to be
    reversed to match n-D array.

    For any combination of dimensions, a process group can only be created once.
    Creating process groups for the same combination with different options is not supported.

    Note:
        ``create_pg()`` over specific dims must be explicitly called to create a process group.
        We don't create a process group in the ``get_pg()`` function because there are many options
        (kwargs) that can be passed when creating a process group, which ``get_pg()`` should not
        be exposed to.

    Examples:
        >>> grid = HyperCommGrid([2, 3, 4, 5], ["tp", "cp", "pp", "dp"])
        >>> dp_group = grid.create_pg("dp")
        >>> # retrieve dp_group from grid after creation
        >>> # dp_group = grid.get_pg("dp")
        >>>
        >>> # It is equivalent to calling the following functions in MCore parallel_state
        >>> # with world size 120.
        >>> parallel_state.initialize_model_parallel(
        >>>     tensor_model_parallel_size=2,
        >>>     context_parallel_size=3,
        >>>     pipeline_model_parallel_size=4,
        >>>     order="tp-cp-pp-dp")
        >>> dp_group_mcore = parallel_state.get_data_parallel_group()
        >>>
        >>> # We can create group from multiple leading dims and also pass more options.
        >>> pg_options = ProcessGroupNCCL.Options()
        >>> pg_options.config.max_ctas = 8
        >>> dp_cp_group = grid.create_pg(
        >>>     ["cp", "dp"], pg_options=pg_options,
        >>>     group_desc="WEIGHT_GRADIENT_COMM_GROUP")


    Args:
        shape: Shape of the communication grid.
        dim_names: Name of each dimension corresponding to shape. Must have the same length as
            shape.
        rank_offset: Starting rank when the grid doesn't span the entire communication world.
            Default 0.
        backend: Backend for creating process group. Default None and will use default backend.
    """

    def __init__(
        self,
        shape: list[int],
        dim_names: list[str],
        rank_offset: int = 0,
        backend: Optional[str] = None,
    ) -> None:
        if len(shape) != len(dim_names):
            raise ValueError(f"len(shape) {shape} != len(dim_names) {dim_names}")

        # Querying environment instead of calling torch.distributed.get_world_size() for mock
        # testing without initializing process group.
        if "WORLD_SIZE" in os.environ:
            world_size = int(os.environ["WORLD_SIZE"])
        elif dist.is_initialized():
            world_size = dist.get_world_size()
        else:
            raise RuntimeError(
                "Cannot determine world size: WORLD_SIZE environment variable not set and "
                "torch.distributed is not initialized. Please either set WORLD_SIZE or "
                "initialize torch.distributed before creating HyperCommGrid."
            )
        self.rank_offset = rank_offset
        self.size = np.prod(shape)
        if rank_offset < 0:
            raise ValueError(f"rank_offset must be non-negative, got {rank_offset}")
        if self.size > world_size - rank_offset:
            raise RuntimeError(
                f"Grid shape {shape} is over sized with world size {world_size} and rank "
                f"offset {self.rank_offset}"
            )

        # [:] insures a copy
        self.shape = shape[:]
        self.dim_names = dim_names[:]
        self.backend = backend
        self._pgs: dict[str, dist.ProcessGroup] = {}

    def create_pg(self, dims: Union[str, list[str]], **kwargs: Any) -> dist.ProcessGroup | None:
        r"""Create a process group based on a list of dimension names

        Note: The unique key used to store the process group internally will follow the reversed
        order of the original dim_names. For example, if dim_names=["tp", "cp", "dp"] and you
        create a process group with dims=["dp", "tp"], the unique_group_key will be "dp-tp"
        (ordered according to the reversed dim_names order: ["dp", "cp", "tp"]).

        Args:
            dims: Name of leading dimensions to create process group

        Keyword arguments are directly passed into new_subgroups_by_enumeration(). The docstring
        is copied from new_subgroups_by_enumeration().

        Keyword args from `dist.new_subgroups_by_enumeration`:
            timeout (timedelta, optional): see `init_process_group` for details and default value.
            pg_options (ProcessGroupOptions, optional): process group options
                specifying what additional options need to be passed in during
                the construction of specific process groups.
            group_desc (str, optional): A string describing the group. Each subgroup will
                inherit its group_desc.

        Returns:
            dist.ProcessGroup | None: The created process group.

        Raises:
            KeyError: If attempting to recreate a process group with an existing key.
        """
        # ordered_dims and unique_group_key will follow the reversed order of self.dim_names
        ordered_dims, unique_group_key = self._order_dims(dims)

        if unique_group_key in self._pgs:
            raise KeyError(
                f"Process group {dims} has already been created. Because there is no way to check "
                f"whether options to create process group matches the first, we error out instead "
                f"of returning the process group that has already been created before."
            )

        rank_enum = self._gen_rank_enum(ordered_dims)
        pg, _ = dist.new_subgroups_by_enumeration(rank_enum, backend=self.backend, **kwargs)

        if dist.get_rank() == 0:
            logging.info(
                f"Generated process group for {unique_group_key} with enumeration {rank_enum}"
            )
        self._pgs[unique_group_key] = pg
        return pg

    def destroy(self) -> None:
        """Destroy all process groups created by this grid."""
        for pg in self._pgs.values():
            if pg is not None:
                dist.destroy_process_group(pg)
        self._pgs.clear()

    def get_pg(self, dims: Union[str, list[str]]) -> dist.ProcessGroup:
        r"""Get a process group based on a list of dimension names

        Args:
            dims: Name of leading dimensions to create process group
        """
        _, unique_group_key = self._order_dims(dims)

        if unique_group_key not in self._pgs:
            raise KeyError(
                f"Process group for {unique_group_key} hasn't been created. Call create_pg first."
            )

        return self._pgs[unique_group_key]

    def get_rank_enum(self, dims: Union[str, list[str]]) -> list[list[int]]:
        r"""Get the rank enumeration for the requested dimension(s).

        This is the exact enumeration that would be used by create_pg for the same
        dims. It is useful for creating additional groups whose membership is derived from
        the grid (e.g., embedding/position-embedding groups derived from PP groups).

        Args:
            dims: Dimension name or list of dimension names.

        Returns:
            List of rank lists (one per subgroup).
        """
        ordered_dims, _ = self._order_dims(dims)
        return self._gen_rank_enum(ordered_dims)

    def _gen_rank_enum(self, dims: list[str]) -> list[list[int]]:
        r"""Generate rank enumeration before calling new_subgroups_by_enumeration

        This function returns ranks grouped by the specified dimensions, but in REVERSE order
        of the input dimensions. For example, if you request dimensions ["a", "b"],
        the ranks will be grouped by "b-a" order.

        Example:
            For a grid with shape [2, 2, 2] and dim_names ["a", "b", "c"]:
            _gen_rank_enum(["a", "b"]) returns [[0, 2, 1, 3], [4, 6, 5, 7]]

            This groups ranks first by dimension "b", then by dimension "a":
            - Group 0: ranks where c=0, grouped by b-a: [0, 2, 1, 3]
            - Group 1: ranks where c=1, grouped by b-a: [4, 6, 5, 7]

        Args:
            dims: Name of leading dimensions to create process group

        Although the function is lightweight enough to be inlined, a standalone one makes it
        easier to test against MCore's RankGenerator
        """

        if not HAVE_EINOPS:
            raise RuntimeError(
                "einops is not installed. Please install it with `pip install einops`."
            )

        # Need to reverse order of dim_names to match MCore convention
        dim_names_reverse = self.dim_names[::-1]

        remaining_dims = []
        for v in dim_names_reverse:
            if v not in dims:
                remaining_dims.append(v)

        rearrange_str = (
            f"({' '.join(dim_names_reverse)}) -> ({' '.join(remaining_dims)}) ({' '.join(dims)})"
        )
        logging.debug(rearrange_str)

        shape_dict = {d: s for d, s in zip(self.dim_names, self.shape)}
        return einops.rearrange(
            np.arange(self.rank_offset, self.rank_offset + self.size), rearrange_str, **shape_dict
        ).tolist()

    def _order_dims(self, dims: Union[str, list[str]]) -> Tuple[list[str], str]:
        r"""Reorder dims based on the order of self.dim_names"""
        if not isinstance(dims, list):
            ordered_dims = [dims]
        else:
            dim_names_reverse = self.dim_names[::-1]
            indices = sorted([dim_names_reverse.index(d) for d in dims])
            if len(indices) == 1:
                ordered_dims = [dim_names_reverse[indices[0]]]
            else:
                ordered_dims = list(itemgetter(*indices)(dim_names_reverse))

        unique_group_key = "-".join(ordered_dims)
        return ordered_dims, unique_group_key


================================================
FILE: megatron/core/inference/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/inference/async_stream.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
# Copyright 2025 The vLLM authors.
#
# This code was adopted from https://github.com/vllm-project/vllm/
# This source code is licensed under the Apache license found in the
# LICENSE file in the root directory of this source tree.

import asyncio
from typing import Any, AsyncGenerator, Callable, Optional, Type, Union

from megatron.core.inference.inference_request import InferenceRequest
from megatron.core.utils import get_asyncio_loop

STOP_ITERATION = Exception()


class AsyncStream:
    """
    Class for encapsulating an asynchronous stream of InferenceRequest outputs.

    Adopted from https://github.com/vllm-project/vllm/blob/eb881ed006ca458b052905e33f0d16dbb428063a/vllm/v1/engine/async_stream.py # pylint: disable=line-too-long
    """

    def __init__(
        self,
        request_id: int,
        cancel: Callable[[str], None],
        loop: Optional[asyncio.AbstractEventLoop] = None,
    ) -> None:
        self._request_id = request_id
        self._cancel = cancel
        self._queue: asyncio.Queue = asyncio.Queue()
        self._finished = False
        self._loop = get_asyncio_loop(loop)

    def put(self, item: Union[InferenceRequest, Exception]) -> None:
        """Adds a new value to the stream"""
        if not self._finished:
            self._loop.call_soon_threadsafe(self._queue.put_nowait, item)

    def finish(self, exception: Optional[Union[BaseException, Type[BaseException]]] = None) -> None:
        """Completes the stream by adding a sentinel value"""
        if not self._finished:
            self._finished = True
            self._loop.call_soon_threadsafe(
                self._queue.put_nowait,
                exception if self._is_raisable(exception) else STOP_ITERATION,
            )

    @property
    def finished(self) -> bool:
        """Whether the stream has finished"""
        return self._finished

    async def generator(self) -> AsyncGenerator[InferenceRequest, None]:
        """Creates an AsyncGenerator over the stream queue"""
        try:
            while True:
                result = await self._queue.get()
                if self._is_raisable(result):
                    if result == STOP_ITERATION:
                        return
                    raise result
                yield result
        except GeneratorExit:
            self._cancel()
            raise asyncio.CancelledError from None

    @staticmethod
    def _is_raisable(value: Any):
        return isinstance(value, BaseException) or (
            isinstance(value, type) and issubclass(value, BaseException)
        )


================================================
FILE: megatron/core/inference/batch_dimensions_utils.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""
Batch dimensions utilities.

This module contains utilities for managing batch dimensions,
including the InferenceBatchDimensions dataclass and CUDAGraphBatchDimensionBuilder for generating
and matching CUDA graph batch dimensions.
"""

import math
from dataclasses import dataclass
from typing import List, Optional, Tuple

import torch

from megatron.core.utils import get_pg_size


@dataclass(order=True, frozen=True)
class InferenceBatchDimensions:
    """Batch dimensions for dynamic inference.

    Attributes:
        token_count : number of total input tokens
        prefill_req_count : number of prefill requests
        decode_req_count : number of decode requests

    The batch dimensions are ordered by token_count, then by prefill_req_count,
    then by decode_req_count.

    """

    token_count: int = 0
    prefill_req_count: int = 0
    decode_req_count: int = 0

    def __str__(self):
        """
        Returns a string representation of the batch dimensions.
        """
        return f"[{self.token_count}]: {self.prefill_req_count} P + {self.decode_req_count} D"

    def is_applicable_for_batch_dim(
        self, real_batch_dim: "InferenceBatchDimensions", strict: bool = False
    ) -> bool:
        """
        Checks if this batch dimension is applicable for the given real batch dimension.
        Applicable batch dimensions are those that have enough tokens and
        requests budget to handle the real batch dimensions.

        Note that if strict is False, prefill slots can be used
        for prefill or decode requests. Otherwise, prefill slots
        can only be used for prefill requests.
        """
        if real_batch_dim.prefill_req_count == 0:
            return (
                self.token_count >= real_batch_dim.token_count
                and self.decode_req_count >= real_batch_dim.decode_req_count
                and self.prefill_req_count == 0  # keep decode only property
            )
        if strict:
            return (
                self.token_count >= real_batch_dim.token_count
                and self.prefill_req_count >= real_batch_dim.prefill_req_count
                and self.decode_req_count >= real_batch_dim.decode_req_count
            )
        else:
            return (
                self.token_count >= real_batch_dim.token_count
                and self.prefill_req_count >= real_batch_dim.prefill_req_count
                and self.prefill_req_count + self.decode_req_count
                >= real_batch_dim.prefill_req_count + real_batch_dim.decode_req_count
            )

    def is_valid(
        self, max_requests: int, max_sequence_length: int, num_speculative_tokens: int
    ) -> bool:
        """
        Checks if the batch dimension is valid based on resource constraints.

        Args:
            max_requests: Maximum number of requests allowed

        Returns:
            True if the config is valid, False otherwise
        """
        # Check if total requests exceed maximum
        if self.prefill_req_count + self.decode_req_count > max_requests:
            return False

        # Check for negative request counts
        if self.prefill_req_count < 0 or self.decode_req_count < 0:
            return False

        # Check if token count is sufficient for requests
        if self.token_count < self.prefill_req_count + self.decode_req_count * (
            num_speculative_tokens + 1
        ):
            return False

        # Check if the prefill requests are shorter than the max sequence length
        if (
            self.token_count
            > self.prefill_req_count * max_sequence_length
            + self.decode_req_count * (num_speculative_tokens + 1)
        ):
            return False

        return True

    def __hash__(self):
        """
        Returns a hash of the batch dimension.
        In cuda graph quick matching, the batch dimension is used as a key in a dictionary.
        """
        return hash((self.token_count, self.prefill_req_count, self.decode_req_count))

    def __eq__(self, other: "InferenceBatchDimensions") -> bool:
        """
        Checks if this batch dimension is equal to another batch dimension.
        """
        if other is None:
            return False
        return (self.token_count, self.prefill_req_count, self.decode_req_count) == (
            other.token_count,
            other.prefill_req_count,
            other.decode_req_count,
        )

    @property
    def req_count(self) -> int:
        """
        Returns the total number of requests.
        """
        return self.prefill_req_count + self.decode_req_count

    @staticmethod
    def adjust_batch_dims_for_expert_parallelism(
        local_batch_dims,
        strict: bool,
        decode_only_cuda_graphs: bool,
        explicit_chunked_prefill: bool,
        smallest_non_decode_cuda_graph_size: int,
        requires_mamba_state_extraction: bool = False,
        ep_group: Optional[torch.distributed.ProcessGroup] = None,
    ) -> Optional["InferenceBatchDimensions"]:
        """Adjusted cuda graph batch dimensions for expert parallelism.
            We take the max token count across expert model parallel group.

        Args:
            local_batch_dims: The local batch dimensions to adjust.
            strict: Whether to use strict matching for batch dimensions.
            decode_only_cuda_graphs: Whether CUDA graphs are only used for decode steps.
            explicit_chunked_prefill: Whether chunked prefill is enabled with explicit requests
            requires_mamba_state_extraction: Whether this rank needs to extract intermediate
                Mamba states
            ep_group: Optional expert parallel process group. If None, uses global parallel state.
                      When using different EP sizes for inference vs training, pass the
                      inference EP group explicitly.

        Return:
            (InferenceBatchDimensions) A new InferenceBatchDimensions object with
            adjusted dimensions, or None if eager mode should be used.
        """
        ep_size = get_pg_size(ep_group)
        if ep_size <= 1:
            return local_batch_dims
        # all reduce local work across expert model parallel group

        is_non_decode = local_batch_dims.prefill_req_count > 0

        sync_tensor = torch.tensor(
            [
                local_batch_dims.token_count,
                int(is_non_decode),
                local_batch_dims.prefill_req_count,
                local_batch_dims.decode_req_count,
                int(requires_mamba_state_extraction),
            ],
            dtype=torch.int32,
            device=torch.cuda.current_device(),
        )

        torch.distributed.all_reduce(sync_tensor, op=torch.distributed.ReduceOp.MAX, group=ep_group)

        sync_tensor = sync_tensor.cpu()
        is_any_ep_rank_in_non_decode = sync_tensor[1].item() == 1
        is_any_ep_rank_requiring_mamba_extraction = sync_tensor[4].item() == 1

        # We force eager mode for scenarios where some ranks will run with CUDA graphs
        # while others will not. Without this check, communication in the
        # expert routing layer would pad up to the maximum capacity only for the ranks that
        # are using CUDA graphs in this step, leading to a hang.
        # This can happen in the following cases:
        #   1. If we only allow decode CUDA graphs but some ranks are running non-decode batches
        #   2. Some ranks are running explicit chunked prefill requests
        #       (graphs are not recorded for batches with explicit chunked prefill requests)
        #   3. Some ranks need to extract intermediate Mamba model states
        if is_any_ep_rank_in_non_decode and (decode_only_cuda_graphs or explicit_chunked_prefill):
            return None  # indicate no match, run in eager mode
        elif is_any_ep_rank_requiring_mamba_extraction:
            return None

        # If strict matching is enabled, we sync the request counts across EP ranks
        # to ensure the graph captures the maximum needed capacity.
        # TODO(ksanthanam): Add functional test for this scenario
        adjusted_prefill_req_count = (
            int(sync_tensor[2].item()) if strict else local_batch_dims.prefill_req_count
        )
        adjusted_decode_req_count = (
            int(sync_tensor[3].item()) if strict else local_batch_dims.decode_req_count
        )
        adjusted_token_count = int(sync_tensor[0].item())

        # When any EP rank has prefill requests (non-strict mode), elevate
        # the token count to be >= the smallest prefill/mixed cuda graph.
        # This ensures decode-only ranks don't match a fine-grained decode
        # graph while prefill ranks match a coarser mixed graph, which would
        # produce inconsistent token counts across EP ranks.
        if is_any_ep_rank_in_non_decode and not strict:
            adjusted_token_count = max(adjusted_token_count, smallest_non_decode_cuda_graph_size)

        adjusted_batch_dim = InferenceBatchDimensions(
            token_count=adjusted_token_count,
            prefill_req_count=adjusted_prefill_req_count,
            decode_req_count=adjusted_decode_req_count,
        )

        return adjusted_batch_dim


class CUDAGraphBatchDimensionBuilder:
    """Builder for creating and managing CUDA graph batch dimensions.

    This class provides static methods for generating lists of CUDA graph batch dimensions
    and matching the best batch dimension for a given real batch dimension.
    """

    # Constant for rounding token counts when generating CUDA graph batch dimensions
    CUDA_GRAPH_ROUNDER = 8

    @staticmethod
    def _calculate_cuda_graph_token_counts(
        tp_size: int, num_cuda_graphs: int, cuda_graph_max_tokens: int
    ) -> List[int]:
        """
        Calculate CUDA graph token counts for a given configuration.

        This method computes evenly-spaced token counts from step_size up to
        cuda_graph_max_tokens, ensuring proper rounding and TP alignment.

        Args:
            tp_size: Tensor parallel size (for alignment)
            num_cuda_graphs: Number of CUDA graphs to generate (must be >= 1)
            cuda_graph_max_tokens: Maximum token count for CUDA graphs (must be > 0)

        Returns:
            List of token counts in descending order

        Example:
            >>> _calculate_cuda_graph_token_counts
            (tp_size=2, num_cuda_graphs=4, cuda_graph_max_tokens=1000)
            [1000, 752, 504, 256]
        """
        if num_cuda_graphs == -1:
            # automatically determine the number of CUDA graphs to
            # capture based on the `max_requests` value
            cuda_graph_token_counts = (
                [1, 2, 4] + list(range(8, 256, 8)) + list(range(256, cuda_graph_max_tokens + 1, 16))
            )
            # Align each entry to TP size
            cuda_graph_token_counts = list(
                dict.fromkeys(math.ceil(s / tp_size) * tp_size for s in cuda_graph_token_counts)
            )
            # Clamp to max tokens
            cuda_graph_token_counts = [
                s for s in cuda_graph_token_counts if s <= cuda_graph_max_tokens
            ]
            if not cuda_graph_token_counts or cuda_graph_token_counts[-1] != cuda_graph_max_tokens:
                cuda_graph_token_counts.append(cuda_graph_max_tokens)
            cuda_graph_token_counts.reverse()
            return cuda_graph_token_counts

        assert num_cuda_graphs >= 1, f"num_cuda_graphs must be >= 1, got {num_cuda_graphs}"
        assert (
            cuda_graph_max_tokens > 0
        ), f"cuda_graph_max_tokens must be > 0, got {cuda_graph_max_tokens}"

        # Cuda graph step size.
        cuda_graph_step_size = cuda_graph_max_tokens / num_cuda_graphs
        cuda_graph_step_size = CUDAGraphBatchDimensionBuilder.CUDA_GRAPH_ROUNDER * int(
            math.ceil(int(cuda_graph_step_size) / CUDAGraphBatchDimensionBuilder.CUDA_GRAPH_ROUNDER)
        )
        # Make sure divisible by TP size
        cuda_graph_step_size = math.ceil(cuda_graph_step_size / tp_size) * tp_size

        # round down cuda graph max tokens to be multiple of TP size
        cuda_graph_max_tokens = (cuda_graph_max_tokens // tp_size) * tp_size

        # Cuda graph token counts.
        if num_cuda_graphs == 1:
            cuda_graph_token_counts = [cuda_graph_max_tokens]
        else:
            cuda_graph_token_counts = list(
                range(cuda_graph_step_size, cuda_graph_max_tokens, cuda_graph_step_size)
            )
            if (
                len(cuda_graph_token_counts) == 0
                or cuda_graph_token_counts[-1] != cuda_graph_max_tokens
            ):
                cuda_graph_token_counts.append(cuda_graph_max_tokens)
            cuda_graph_token_counts.reverse()

        return cuda_graph_token_counts

    @staticmethod
    def generate_cuda_graph_batch_dimensions_list(
        tp_size: int,
        num_cuda_graphs: Optional[int],
        cuda_graph_max_tokens: int,
        cuda_graph_mixed_prefill_request_count: Optional[int],
        max_requests: int,
        max_tokens: int,
        max_sequence_length: int,
        use_cuda_graphs_for_non_decode_steps: bool,
        num_speculative_tokens: int = 0,
    ) -> Tuple[List[InferenceBatchDimensions], Optional[List[int]]]:
        """
        Generate CUDA graph batch dimensions.

        This function constructs CUDA graph batch dimensions for different token counts
        and request patterns, then filters them based on resource constraints.
        The construction process involves:

        Construction Rules:
        1. Token count generation: Creates token counts from step_size to max_tokens,
           rounded to multiples of 8
        2. Tensor parallelism alignment: Ensures step_size is divisible by tensor parallel size
        3. Batch dimension creation: For each token count, creates three types of batch dimensions:
           - Decode-only: (token_count, 0, token_count) - all tokens used for decode requests
           - Mixed prefill+decode: (token_count, prefill_req_count, token_count - prefill_req_count)
           - Prefill-only:
             (token_count, max(prefill_req_count, ceil(token_count/(max_seq_len-1))), 0)

        Filtering Rules:
        1. Request limit: prefill_req_count + decode_req_count <= max_requests
        2. Non-negative counts: Both prefill_req_count and decode_req_count must be >= 0
        3. Token sufficiency: token_count >= prefill_req_count + decode_req_count

        Sorting Rules for Attention Metadata Construction:
        1. Batch dimensions are sorted by prefill token count (token_count - decode_req_count)
           in descending order

        Args:
            tp_size: Tensor parallel size
            num_cuda_graphs: Number of CUDA graphs to generate
            cuda_graph_max_tokens: Maximum tokens for CUDA graphs
            cuda_graph_mixed_prefill_request_count: Number of mixed prefill requests for CUDA graphs
            max_requests: Maximum number of requests
            max_tokens: Maximum total tokens
            max_sequence_length: Maximum sequence length
            use_cuda_graphs_for_non_decode_steps: Whether to use CUDA graphs for non-decode steps
            num_speculative_tokens: Number of speculative tokens

        Returns:
            Tuple containing:
            - List of InferenceBatchDimensions objects,
              sorted by prefill token count in descending order
            - Optional list of CUDA graph token counts
        """

        def add_if_valid(token_count: int, prefill_req_count: int, decode_req_count: int) -> None:
            """Helper to create and append batch dimension to list only if it's valid."""
            batch_dim = InferenceBatchDimensions(token_count, prefill_req_count, decode_req_count)
            if batch_dim.is_valid(max_requests, max_sequence_length, num_speculative_tokens):
                cuda_graph_batch_dimensions_list.append(batch_dim)

        # Cuda graph token-counts
        # (i.e., token counts used by cuda-graph steps, both decode and non-decode).
        cuda_graph_prefill_token_counts = None
        cuda_graph_decode_token_counts = None
        if num_cuda_graphs is not None:

            # Ensure valid num_cuda_graphs.
            if (
                cuda_graph_max_tokens is None
                or cuda_graph_max_tokens > max_tokens
                or cuda_graph_max_tokens <= 0
            ):
                cuda_graph_max_tokens = max_tokens

            assert cuda_graph_max_tokens == max_requests * (num_speculative_tokens + 1), (
                f"cuda_graph_max_tokens ({cuda_graph_max_tokens}) must equal max_requests *"
                f"(num_speculative_tokens + 1) ({max_requests * (num_speculative_tokens + 1)}). "
                "This is required for correctly syncing EP ranks: "
                f"prefill and decode graph pools must have the same token count granularity."
            )

            if num_cuda_graphs != -1:
                # if -1, no need to adjust. This will be taken care of in
                # the _calculate_cuda_graph_token_counts function where we will generate
                # the token counts based on the max_tokens value and the step size.
                num_cuda_graphs = min(max(num_cuda_graphs, 1), cuda_graph_max_tokens)

            # Calculate token counts for prefill and mixed graphs.
            # These need the full cuda_graph_max_tokens to handle variable-length sequences.
            cuda_graph_prefill_token_counts = (
                CUDAGraphBatchDimensionBuilder._calculate_cuda_graph_token_counts(
                    tp_size=tp_size,
                    num_cuda_graphs=num_cuda_graphs,
                    cuda_graph_max_tokens=cuda_graph_max_tokens,
                )
            )

            # Calculate separate token counts for decode-only graphs.
            cuda_graph_max_tokens_decode = min(
                cuda_graph_max_tokens, max_requests * (num_speculative_tokens + 1)
            )
            cuda_graph_decode_token_counts = (
                CUDAGraphBatchDimensionBuilder._calculate_cuda_graph_token_counts(
                    tp_size=tp_size,
                    num_cuda_graphs=num_cuda_graphs,
                    cuda_graph_max_tokens=cuda_graph_max_tokens_decode,
                )
            )

        cuda_graph_batch_dimensions_list = []
        if num_cuda_graphs is None:
            cuda_graph_batch_dimensions_list = []
        elif (
            not cuda_graph_mixed_prefill_request_count
            or cuda_graph_mixed_prefill_request_count <= 0
            or not use_cuda_graphs_for_non_decode_steps
        ):  # decode only
            # Use decode-specific token counts for decode-only graphs
            for size in cuda_graph_decode_token_counts:
                decode_req_count = min(size // (num_speculative_tokens + 1), max_requests)
                token_count = decode_req_count * (num_speculative_tokens + 1)
                token_count = token_count // tp_size * tp_size
                add_if_valid(
                    token_count=token_count, prefill_req_count=0, decode_req_count=decode_req_count
                )
        else:
            # Mixed prefill and decode mode
            # Create prefill and mixed dimensions with full token counts
            for size in cuda_graph_prefill_token_counts:
                assert size % tp_size == 0
                prefill_req_count = min(cuda_graph_mixed_prefill_request_count, max_requests)
                decode_req_count = max(
                    0,
                    min(
                        (size - prefill_req_count) // (num_speculative_tokens + 1),
                        max_requests - prefill_req_count,
                    ),
                )
                add_if_valid(
                    token_count=size,
                    prefill_req_count=prefill_req_count,
                    decode_req_count=decode_req_count,
                )
                # We need to ensure the prefill requests are shorter than the max sequence length,
                # considering the one decode token is used for prefill request construction
                prefill_only_minimal_num = max(
                    cuda_graph_mixed_prefill_request_count,
                    math.ceil(size / max(1, max_sequence_length - 1)),
                )
                if prefill_only_minimal_num < max_requests:
                    add_if_valid(
                        token_count=size,
                        prefill_req_count=max(prefill_only_minimal_num, min(max_requests, size)),
                        decode_req_count=0,
                    )

            # Create decode-only dimensions with optimized token counts
            for size in cuda_graph_decode_token_counts:
                decode_req_count = min(size // (num_speculative_tokens + 1), max_requests)
                token_count = decode_req_count * (num_speculative_tokens + 1)
                token_count = token_count // tp_size * tp_size
                add_if_valid(
                    token_count=token_count, prefill_req_count=0, decode_req_count=decode_req_count
                )

        # Remove duplicates and sort by prefill token count
        cuda_graph_batch_dimensions_list = list(set(cuda_graph_batch_dimensions_list))
        cuda_graph_batch_dimensions_list.sort(
            key=lambda x: (
                (x.token_count - x.decode_req_count * (num_speculative_tokens + 1)),
                x.decode_req_count,
            ),
            reverse=True,
        )

        # Collect actual token counts from batch dimensions, then unique and sort
        if num_cuda_graphs is None or len(cuda_graph_batch_dimensions_list) == 0:
            # No CUDA graphs or no valid batch dimensions
            cuda_graph_token_counts = None
        else:
            # Extract unique token counts from the batch dimensions we actually created
            token_counts_set = {
                batch_dim.token_count for batch_dim in cuda_graph_batch_dimensions_list
            }
            cuda_graph_token_counts = sorted(list(token_counts_set), reverse=True)

        return cuda_graph_batch_dimensions_list, cuda_graph_token_counts

    @staticmethod
    def match_graph_config(
        real_batch_dim: InferenceBatchDimensions,
        cuda_graph_batch_dimensions_list: List[InferenceBatchDimensions],
        smallest_non_decode_cuda_graph_size: int,
        strict: bool = False,
        decode_only_cuda_graphs: bool = False,
        explicit_chunked_prefill: bool = False,
        requires_mamba_state_extraction: bool = False,
        ep_group: Optional[torch.distributed.ProcessGroup] = None,
    ) -> Optional[InferenceBatchDimensions]:
        """
        Matches the best CUDA graph batch dimension for the given real batch dimension.

        Args:
            real_batch_dim: The real batch dimension to match
            cuda_graph_batch_dimensions_list: List of available CUDA graph batch dimensions
            strict: If False, prefill slots can be used for prefill or decode requests.
                   If True, prefill slots can only be used for prefill requests.
            decode_only_cuda_graphs: Used by expert parallel matching. If this is true,
            and one of the EP ranks is running a non-decode step, we elect to run in
            eager mode instead of matching a decode-only cuda graph.
            explicit_chunked_prefill: Whether chunked prefill is enabled with explicit requests
            requires_mamba_state_extraction: Whether intermediate Mamba states need to be extracted
            ep_group: Optional expert parallel process group. If None, uses global parallel state.
                      When using different EP sizes for inference vs training, pass the
                      inference EP group explicitly.
        Returns:
            The best matching CUDA graph batch dimension, or None if no applicable match is found
        """

        if not cuda_graph_batch_dimensions_list:
            # no need to match if no cuda graph batch dimensions are provided
            return None

        adjusted_batch_dim = InferenceBatchDimensions.adjust_batch_dims_for_expert_parallelism(
            real_batch_dim,
            strict=strict,
            decode_only_cuda_graphs=decode_only_cuda_graphs,
            explicit_chunked_prefill=explicit_chunked_prefill,
            requires_mamba_state_extraction=requires_mamba_state_extraction,
            ep_group=ep_group,
            smallest_non_decode_cuda_graph_size=smallest_non_decode_cuda_graph_size,
        )

        if adjusted_batch_dim is None:
            # we hit this scenario if decode_only_cuda_graphs is true,
            # and one of the EP ranks is running a non-decode step
            # in that case, all ranks have to run in eager mode
            return None

        if explicit_chunked_prefill and real_batch_dim.prefill_req_count > 0:
            return None

        if requires_mamba_state_extraction:
            return None

        # first filter out batch dimensions with smaller token count, prefill req count,
        # or decode req count, as they are not applicable
        graph_batch_dims_applicable = [
            graph_batch_dim
            for graph_batch_dim in cuda_graph_batch_dimensions_list
            if graph_batch_dim.is_applicable_for_batch_dim(adjusted_batch_dim, strict=strict)
        ]
        if len(graph_batch_dims_applicable) == 0:
            return None
        # then find the best batch dimension
        best_batch_dim = min(graph_batch_dims_applicable)

        return best_batch_dim


================================================
FILE: megatron/core/inference/common_inference_params.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
from megatron.core.inference.sampling_params import (  # noqa: F401 # pylint: disable=unused-import
    SamplingParams as CommonInferenceParams,
)


================================================
FILE: megatron/core/inference/communication/torch_symm_triton/__init__.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from .collectives import multimem_all_gather, multimem_all_gather_fused, multimem_reduce_scatter
from .fused_collectives import fused_multimem_rs_add_norm_ag
from .utils import are_tensors_nvls_eligible, is_device_nvls_capable


================================================
FILE: megatron/core/inference/communication/torch_symm_triton/barrier.py
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.

# Adapted from: https://github.com/meta-pytorch/kraken.git

from unittest.mock import MagicMock

from megatron.core.utils import null_decorator

try:
    import triton
    import triton.language as tl
except ImportError:
    triton = MagicMock()
    tl = MagicMock()
    triton.jit = null_decorator

from .utils import get_flat_bid, get_flat_tid


@triton.jit
def _send_signal(addrs, sem: tl.constexpr):
    tl.inline_asm_elementwise(
        f"""
        {{
            .reg .u32   %tmp32_<1>;
            .reg .pred  %p<1>;

            send_signal:
                atom.global.{sem}.sys.cas.b32 %tmp32_0, [$1], 0, 1;
                setp.eq.u32 %p0, %tmp32_0, 0;
                @!%p0 bra send_signal;
        }}
        """,
        "=r, l",
        [addrs],
        dtype=addrs.dtype,
        is_pure=False,
        pack=1,
    )


@triton.jit
def _wait_signal(addrs, sem: tl.constexpr):
    tl.inline_asm_elementwise(
        f"""
        {{
            .reg .u32   %tmp32_<1>;
            .reg .pred  %p<1>;

            wait_signal:
                atom.global.sys.{sem}.cas.b32 %tmp32_0, [$1], 1, 0;
                setp.eq.u32 %p0, %tmp32_0, 1;
                @!%p0 bra wait_signal;
        }}
        """,
        "=r, l",
        [addrs],
        dtype=tl.int32,
        is_pure=False,
        pack=1,
    )


@triton.jit
def symm_mem_sync(
    signal_pad_ptrs,
    block_id,
    rank: tl.constexpr,
    world_size: tl.constexpr,
    hasPreviousMemAccess: tl.constexpr = False,
    hasSubsequentMemAccess: tl.constexpr = False,
):
    """
    Synchronizes blocks with matching block_id across participating devices.

    Note: the function itself is not a system level barrier/fence. It is a
    building block for expressing different synchronization patterns.

    Pattern 0: Ensures that all writes to symm_mem buffers from previous
    kernels across all devices are visible to the current kernel:

        symm_mem_sync(..., hasPreviousMemAccess=False, hasSubsequentMemAccess=True)

    Pattern 1: Ensures that all writes to symm_mem buffers from the current
    block are visible to all remote blocks with matching blockIdx:

        symm_mem_sync(..., hasPreviousMemAccess=True, hasSubsequentMemAccess=True)

    Pattern 2: Ensures that symm_mem buffers read by the current kernel are safe
    for writing by subsequent kernels across all devices.

        symm_mem_sync(..., hasPreviousMemAccess=True, hasSubsequentMemAccess=False)

    CUDA graph friendliness:

        This barrier operates through atomic operations on a zero-filled signal
        pad, which resets to a zero-filled state after each successful
        synchronization. This design eliminates the need for incrementing a
        flag from host.
    """
    if block_id is None:
        block_id = get_flat_bid()
    flat_tid = get_flat_tid()

    remote_ranks = tl.arange(0, world_size)
    signal_pad_ptrs = signal_pad_ptrs.to(tl.pointer_type(tl.uint64))
    remote_signal_pad_addrs = tl.load(signal_pad_ptrs + remote_ranks).to(tl.pointer_type(tl.uint32))
    send_addrs = remote_signal_pad_addrs + block_id * world_size + rank

    local_signal_pad_addr = tl.load(signal_pad_ptrs + rank).to(tl.pointer_type(tl.uint32))
    wait_addrs = local_signal_pad_addr + block_id * world_size + remote_ranks

    if flat_tid < world_size:
        _send_signal(send_addrs, "release" if hasPreviousMemAccess else "relaxed")
        _wait_signal(wait_addrs, "acquire" if hasSubsequentMemAccess else "relaxed")


================================================
FILE: megatron/core/inference/communication/torch_symm_triton/collectives.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from unittest.mock import MagicMock

import torch

from megatron.core.utils import null_decorator

try:
    import triton
    import triton.language as tl

    HAVE_TRITON = True
except ImportError:
    triton = MagicMock()
    triton.jit = null_decorator
    tl = MagicMock()
    HAVE_TRITON = False
try:
    from torch._C._distributed_c10d import _SymmetricMemory
except ImportError:
    _SymmetricMemory = MagicMock()

from .barrier import symm_mem_sync
from .multimem_asm import ld_128, st_128
from .utils import are_tensors_nvls_eligible, get_flat_tid, sync_threads


@triton.jit
def _ag_phase(
    local_ptr, multicast_ptr, byte_offset, numel, BLOCK_SIZE, NUMEL_PER_THREAD, RANK, WORLD_SIZE
):
    """
    Core all-gather phase: load from local memory, multicast-store to symmetric buffer.
    This is the building block for both single-tensor and fused multi-tensor all-gathers.

    Each thread handles 128-bit (NUMEL_PER_THREAD elements) at a time.
    byte_offset locates the tensor within the multicast buffer.

    """
    pid = tl.program_id(axis=0)
    tid = get_flat_tid()

    numel_128 = numel // NUMEL_PER_THREAD
    numel_per_rank = tl.cdiv(numel_128, WORLD_SIZE)
    block_start = pid * BLOCK_SIZE

    while block_start < numel_per_rank:
        offsets = block_start + tid
        mask = offsets < numel_per_rank

        # byte_offset // 8 -> converts byte offset to uint64 offset
        # RANK * numel_per_rank -> start of our rank's segment
        # * 2 -> each 128-bit pack is 2 uint64s
        multicast_ptrs = (
            multicast_ptr.to(tl.pointer_type(tl.uint64))
            + byte_offset // 8
            + (RANK * numel_per_rank + offsets) * 2
        )
        local_ptrs = local_ptr.to(tl.pointer_type(tl.uint64)) + offsets * 2
        (x, y, z, w) = ld_128(local_ptrs, mask=mask, multicast_op=False)
        st_128(multicast_ptrs, x, y, z, w, mask=mask, multicast_op=True)

        block_start += tl.num_programs(axis=0) * BLOCK_SIZE


@triton.jit
def _multimem_all_gather_kernel(
    local_ptr,
    multicast_ptr,
    signal_pad_ptrs,
    numel,
    byte_offset,
    BLOCK_SIZE: tl.constexpr,
    NUMEL_PER_THREAD: tl.constexpr,
    RANK: tl.constexpr,
    WORLD_SIZE: tl.constexpr,
):
    """Single-tensor multicast all-gather kernel."""
    _ag_phase(
        local_ptr, multicast_ptr, byte_offset, numel, BLOCK_SIZE, NUMEL_PER_THREAD, RANK, WORLD_SIZE
    )
    sync_threads()
    symm_mem_sync(
        signal_pad_ptrs,
        None,
        RANK,
        WORLD_SIZE,
        hasPreviousMemAccess=True,
        hasSubsequentMemAccess=True,
    )


@triton.jit
def _multimem_all_gather_3_kernel(
    local_ptr_0,
    local_ptr_1,
    local_ptr_2,
    multicast_ptr,
    signal_pad_ptrs,
    numel_0,
    byte_offset_0,
    numel_1,
    byte_offset_1,
    numel_2,
    byte_offset_2,
    BLOCK_SIZE: tl.constexpr,
    NUMEL_PER_THREAD: tl.constexpr,
    RANK: tl.constexpr,
    WORLD_SIZE: tl.constexpr,
):
    """
    Fused 3-tensor multicast all-gather. Processes three tensors in sequence
    then synchronizes once, eliminating 2 kernel launches and 2 barriers
    compared to three separate multimem_all_gather calls.
    """
    _ag_phase(
        local_ptr_0,
        multicast_ptr,
        byte_offset_0,
        numel_0,
        BLOCK_SIZE,
        NUMEL_PER_THREAD,
        RANK,
        WORLD_SIZE,
    )
    _ag_phase(
        local_ptr_1,
        multicast_ptr,
        byte_offset_1,
        numel_1,
        BLOCK_SIZE,
        NUMEL_PER_THREAD,
        RANK,
        WORLD_SIZE,
    )
    _ag_phase(
        local_ptr_2,
        multicast_ptr,
        byte_offset_2,
        numel_2,
        BLOCK_SIZE,
        NUMEL_PER_THREAD,
        RANK,
        WORLD_SIZE,
    )
    sync_threads()
    symm_mem_sync(
        signal_pad_ptrs,
        None,
        RANK,
        WORLD_SIZE,
        hasPreviousMemAccess=True,
        hasSubsequentMemAccess=True,
    )


@triton.jit
def _multimem_reduce_scatter_kernel(
    local_ptr,
    multicast_ptr,
    signal_pad_ptrs,
    numel,
    BLOCK_SIZE: tl.constexpr,
    NUMEL_PER_THREAD: tl.constexpr,
    RANK: tl.constexpr,
    WORLD_SIZE: tl.constexpr,
    REDUCE_F32: tl.constexpr = False,
):
    """
    Triton kernel to perform multicast reduce-scatter over nvlink using multimem instructions.
    When REDUCE_F32=True, uses fp32 reduction instead of bf16x2 reduction.
    """
    symm_mem_sync(
        signal_pad_ptrs,
        None,
        RANK,
        WORLD_SIZE,
        hasPreviousMemAccess=False,
        hasSubsequentMemAccess=False,
    )
    sync_threads()

    pid = tl.program_id(axis=0)
    tid = get_flat_tid()

    # From this point on, we pretend each element is 128-bit
    numel = numel // NUMEL_PER_THREAD
    numel_per_rank = tl.cdiv(numel, WORLD_SIZE)
    block_start = pid * BLOCK_SIZE

    while block_start < numel_per_rank:
        offsets = block_start + tid
        mask = offsets < numel_per_rank

        # Each pointer points to a 128-bit bit pack
        multicast_ptrs = (
            multicast_ptr.to(tl.pointer_type(tl.uint64)) + (RANK * numel_per_rank + offsets) * 2
        )
        local_ptrs = local_ptr.to(tl.pointer_type(tl.uint64)) + offsets * 2
        (x, y, z, w) = ld_128(multicast_ptrs, mask=mask, multicast_op=True, reduce_f32=REDUCE_F32)
        st_128(local_ptrs, x, y, z, w, mask=mask, multicast_op=False)

        block_start += tl.num_programs(axis=0) * BLOCK_SIZE


# ── Python wrappers ─────────────────────────────────────────────────────────

_DEFAULT_KERNEL_CONFIG = {"max_num_blocks": 128, "num_warps": 32, "BLOCK_SIZE": 1024}


def _kernel_launch_config(element_size: int, max_numel: int, world_size: int, **kwargs):
    """Compute kernel launch config shared by all collective wrappers.

    Args:
        element_size: bytes per element (e.g. 2 for bf16).
        max_numel: largest tensor numel (determines grid size).
        world_size: number of ranks.

    Returns:
        (numel_per_thread, num_blocks, config) tuple.
    """
    config = {k: kwargs.get(k, v) for k, v in _DEFAULT_KERNEL_CONFIG.items()}
    numel_per_thread = 128 // (element_size * 8)
    num_threads = triton.cdiv(max_numel // numel_per_thread, world_size)
    num_blocks = min(triton.cdiv(num_threads, config["BLOCK_SIZE"]), config["max_num_blocks"])
    return numel_per_thread, num_blocks, config


def multimem_all_gather(
    output_tensor: torch.Tensor,
    input_tensor: torch.Tensor,
    symm_mem_hdl: _SymmetricMemory,
    byte_offset: int = 0,
    **kwargs,
) -> torch.Tensor:
    """
    Multicast all-gather for a single tensor.
    Output tensor must be a symmetric memory buffer.
    Input tensor can be a regular torch tensor.
    """
    assert HAVE_TRITON, "Triton is required for multimem all-gather."
    assert are_tensors_nvls_eligible(
        input_tensor
    ), "Input tensor must be 16-byte divisible on Hopper+ for NVLS."
    assert (
        output_tensor.numel() % input_tensor.numel() == 0
        and output_tensor.numel() // input_tensor.numel() == symm_mem_hdl.world_size
    ), "Output numel must be exactly world_size * input numel for all-gather."

    numel_per_thread, num_blocks, config = _kernel_launch_config(
        input_tensor.element_size(), output_tensor.numel(), symm_mem_hdl.world_size, **kwargs
    )
    _multimem_all_gather_kernel[(num_blocks, 1, 1)](
        input_tensor.data_ptr(),
        symm_mem_hdl.multicast_ptr,
        symm_mem_hdl.signal_pad_ptrs_dev,
        numel=output_tensor.numel(),
        byte_offset=byte_offset,
        BLOCK_SIZE=config["BLOCK_SIZE"],
        NUMEL_PER_THREAD=numel_per_thread,
        RANK=symm_mem_hdl.rank,
        WORLD_SIZE=symm_mem_hdl.world_size,
        num_warps=config["num_warps"],
    )

    return output_tensor


def multimem_all_gather_fused(
    output_0: torch.Tensor,
    input_0: torch.Tensor,
    byte_offset_0: int,
    output_1: torch.Tensor,
    input_1: torch.Tensor,
    byte_offset_1: int,
    output_2: torch.Tensor,
    input_2: torch.Tensor,
    byte_offset_2: int,
    symm_mem_hdl: _SymmetricMemory,
    **kwargs,
) -> None:
    """
    Fused 3-tensor multicast all-gather. Equivalent to calling multimem_all_gather
    three times but with a single kernel launch and a single barrier.

    All tensors must share the same symmetric memory handle.
    """
    assert HAVE_TRITON, "Triton is required for multimem all-gather."
    assert are_tensors_nvls_eligible(
        input_0, input_1, input_2
    ), "All input tensors must be 16-byte divisible on Hopper+ for NVLS."
    for inp, out in [(input_0, output_0), (input_1, output_1), (input_2, output_2)]:
        assert (
            out.numel() % inp.numel() == 0 and out.numel() // inp.numel() == symm_mem_hdl.world_size
        ), "Output numel must be exactly world_size * input numel for all-gather."

    max_numel = max(output_0.numel(), output_1.numel(), output_2.numel())

    numel_per_thread, num_blocks, config = _kernel_launch_config(
        input_0.element_size(), max_numel, symm_mem_hdl.world_size, **kwargs
    )
    _multimem_all_gather_3_kernel[(num_blocks, 1, 1)](
        input_0.data_ptr(),
        input_1.data_ptr(),
        input_2.data_ptr(),
        symm_mem_hdl.multicast_ptr,
        symm_mem_hdl.signal_pad_ptrs_dev,
        numel_0=output_0.numel(),
        byte_offset_0=byte_offset_0,
        numel_1=output_1.numel(),
        byte_offset_1=byte_offset_1,
        numel_2=output_2.numel(),
        byte_offset_2=byte_offset_2,
        BLOCK_SIZE=config["BLOCK_SIZE"],
        NUMEL_PER_THREAD=numel_per_thread,
        RANK=symm_mem_hdl.rank,
        WORLD_SIZE=symm_mem_hdl.world_size,
        num_warps=config["num_warps"],
    )


def multimem_reduce_scatter(
    output_tensor: torch.Tensor,
    input_tensor: torch.Tensor,
    symm_mem_hdl: _SymmetricMemory,
    **kwargs,
) -> torch.Tensor:
    """
    Multicast reduce-scatter for a single tensor.
    Input tensor must be a symmetric memory buffer.
    Output tensor can be a regular torch tensor.
    Supports bfloat16 and float32 dtypes.
    """
    assert HAVE_TRITON, "Triton is required for multimem reduce-scatter."
    assert input_tensor.dtype in (
        torch.bfloat16,
        torch.float32,
    ), f"Only bfloat16 and float32 are supported, got {input_tensor.dtype}"
    assert (
        input_tensor.dtype == output_tensor.dtype
    ), f"Input and output dtypes must match: {input_tensor.dtype} vs {output_tensor.dtype}"
    assert are_tensors_nvls_eligible(
        output_tensor
    ), "Output tensor must be 16-byte divisible on Hopper+ for NVLS."
    assert (
        input_tensor.numel() % output_tensor.numel() == 0
        and input_tensor.numel() // output_tensor.numel() == symm_mem_hdl.world_size
    ), "Input numel must be exactly world_size * output numel for reduce-scatter."

    reduce_f32 = input_tensor.dtype == torch.float32
    numel_per_thread, num_blocks, config = _kernel_launch_config(
        output_tensor.element_size(), input_tensor.numel(), symm_mem_hdl.world_size, **kwargs
    )
    _multimem_reduce_scatter_kernel[(num_blocks, 1, 1)](
        output_tensor.data_ptr(),
        symm_mem_hdl.multicast_ptr,
        symm_mem_hdl.signal_pad_ptrs_dev,
        numel=input_tensor.numel(),
        BLOCK_SIZE=config["BLOCK_SIZE"],
        NUMEL_PER_THREAD=numel_per_thread,
        RANK=symm_mem_hdl.rank,
        WORLD_SIZE=symm_mem_hdl.world_size,
        num_warps=config["num_warps"],
        REDUCE_F32=reduce_f32,
    )

    return output_tensor


================================================
FILE: megatron/core/inference/communication/torch_symm_triton/fused_collectives.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import torch

from .barrier import symm_mem_sync
from .multimem_asm import add_v8_bf16_from_u32, asm_rsqrt, ld_128, st_128
from .utils import sync_threads

try:
    import triton
    import triton.language as tl
except ImportError:
    from unittest.mock import MagicMock

    from megatron.core.utils import null_decorator

    triton = MagicMock()
    tl = MagicMock()
    triton.jit = null_decorator


@triton.jit
def unpack_bf16x2(x, mask):
    """
    Unpack x, which is in bf16x2 packed format stored in uint32,
    into two float32 tensors representing the high and low bf16 values.

    Args:
        x: tl.uint32 tensor containing packed bf16x2 values.
        mask: boolean mask tensor, 1 denotes that x is valid.
    Returns:
        x_hi: float32 tensor containing the high bf16 values.
        x_lo: float32 tensor containing the low bf16 values.
    """
    x = x * mask
    x_hi = (x >> 16).cast(tl.uint16).cast(tl.bfloat16, bitcast=True).cast(tl.float32)
    x_lo = x.cast(tl.uint16).cast(tl.bfloat16, bitcast=True).cast(tl.float32)
    return x_hi, x_lo


@triton.jit
def sum_sq(x, y, z, w, mask):
    """
    First computes the squared sum of 8 bf16 values
    packed in x, y, z, w. Then does an SM-wide
    reduction to get the total sqaured sum.
    Args:
        x, y, z, w: tl.uint32 tensors containing packed bf16x2 values.
        mask: boolean mask tensor, 1 denotes that x,y,z,w are valid.
    Returns:
        sq_sum: float32 scalar, the total squared sum.
    """
    x_hi, x_lo = unpack_bf16x2(x, mask)
    y_hi, y_lo = unpack_bf16x2(y, mask)
    z_hi, z_lo = unpack_bf16x2(z, mask)
    w_hi, w_lo = unpack_bf16x2(w, mask)
    # thread local sum
    sq_sum = (
        x_hi * x_hi
        + x_lo * x_lo
        + y_hi * y_hi
        + y_lo * y_lo
        + z_hi * z_hi
        + z_lo * z_lo
        + w_hi * w_hi
        + w_lo * w_lo
    )
    # sm-wide reduction
    sq_sum = tl.sum(sq_sum)
    return sq_sum


@triton.jit
def apply_norm(x, y, z, w, wx, wy, wz, ww, rrms, mask):
    """
    Apply RMS norm to the input bf16x2 tensors x,y,z,w using
    the rms norm weights wx,wy,wz,ww and the reciprocal
    root mean square rrms.
    """
    # todo: try converting to pure ASM code
    x_hi, x_lo = unpack_bf16x2(x, mask)
    y_hi, y_lo = unpack_bf16x2(y, mask)
    z_hi, z_lo = unpack_bf16x2(z, mask)
    w_hi, w_lo = unpack_bf16x2(w, mask)
    wx_hi, wx_lo = unpack_bf16x2(wx, mask)
    wy_hi, wy_lo = unpack_bf16x2(wy, mask)
    wz_hi, wz_lo = unpack_bf16x2(wz, mask)
    ww_hi, ww_lo = unpack_bf16x2(ww, mask)

    x_hi = (x_hi * rrms * wx_hi).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(
        tl.uint32
    ) << 16
    x_lo = (x_lo * rrms * wx_lo).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(tl.uint32)
    y_hi = (y_hi * rrms * wy_hi).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(
        tl.uint32
    ) << 16
    y_lo = (y_lo * rrms * wy_lo).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(tl.uint32)
    z_hi = (z_hi * rrms * wz_hi).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(
        tl.uint32
    ) << 16
    z_lo = (z_lo * rrms * wz_lo).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(tl.uint32)
    w_hi = (w_hi * rrms * ww_hi).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(
        tl.uint32
    ) << 16
    w_lo = (w_lo * rrms * ww_lo).cast(tl.bfloat16).cast(tl.uint16, bitcast=True).cast(tl.uint32)
    # pack back to bf16x2, to be used by nvls multicast store.
    x = x_hi | x_lo
    y = y_hi | y_lo
    z = z_hi | z_lo
    w = w_hi | w_lo
    return x, y, z, w


@triton.jit
def _multimem_reduce_scatter_residual_add_kernel(
    residual_output_ptr,
    residual_input_ptr,
    rms_norm_weights_ptr,
    multicast_ptr,  # points to symmetric memory buffer
    signal_pad_ptrs,
    num_tokens,
    eps,
    HIDDEN_SIZE: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
    NUMEL_PER_THREAD: tl.constexpr,
    RANK: tl.constexpr,
    WORLD_SIZE: tl.constexpr,
):
    symm_mem_sync(
        signal_pad_ptrs,
        None,
        RANK,
        WORLD_SIZE,
        hasPreviousMemAccess=False,
        hasSubsequentMemAccess=False,
    )
    sync_threads()

    pid = tl.program_id(axis=0)
    tid = tl.arange(0, BLOCK_SIZE)

    tokens_per_rank = tl.cdiv(num_tokens, WORLD_SIZE)
    numel_per_token = tl.cdiv(HIDDEN_SIZE, NUMEL_PER_THREAD)
    numel_per_rank = tokens_per_rank * numel_per_token

    # each program handles 1 token at a time
    program_offset = pid * numel_per_token
    thread_mask = tid < numel_per_token

    for token_offset in range(pid, tokens_per_rank, tl.num_programs(axis=0)):
        # Step 1: - reduce-scatter + residual add for this token + collect sq sum
        program_offset = token_offset * numel_per_token
        sq_sum_ = 0.0
        for thread_offset in range(0, numel_per_token, BLOCK_SIZE):
            offsets = program_offset + thread_offset + tid
            mask = (offsets < numel_per_rank) & (thread_mask)
            multicast_ptrs = (
                multicast_ptr.to(tl.pointer_type(tl.uint64)) + (RANK * numel_per_rank + offsets) * 2
            )
            res_out_ptrs = residual_output_ptr.to(tl.pointer_type(tl.uint64)) + offsets * 2
            res_in_ptrs = residual_input_ptr.to(tl.pointer_type(tl.uint64)) + offsets * 2
            # reduce-scatter
            (x, y, z, w) = ld_128(multicast_ptrs, mask=mask, multicast_op=True)
            # load residual
            (rx, ry, rz, rw) = ld_128(res_in_ptrs, mask=mask, multicast_op=False)
            # add residual
            (x, y, z, w) = add_v8_bf16_from_u32(x, y, z, w, rx, ry, rz, rw)
            # store residual
            st_128(res_out_ptrs, x, y, z, w, mask=mask, multicast_op=False)
            # update squared sum for computing the norm later
            sq_sum_ += sum_sq(x, y, z, w, mask=mask)

        # sum_sq is now the sum of squares for this token
        # it is a SM-wide reduction, so no need to sync_threads()
        mean_sq = sq_sum_ / HIDDEN_SIZE
        rrms = asm_rsqrt(mean_sq, eps)

        # Step 2 - apply-rms-norm + all-gather
        for thread_offset in range(0, numel_per_token, BLOCK_SIZE):
            offsets = program_offset + thread_offset + tid
            # first offset is a token offset
            # second offset is a hidden-dim offset (in units of 128-bit)
            mask = (offsets < numel_per_rank) & (thread_mask)

            multicast_ptrs = (
                multicast_ptr.to(tl.pointer_type(tl.uint64)) + (RANK * numel_per_rank + offsets) * 2
            )
            res_out_ptrs = residual_output_ptr.to(tl.pointer_type(tl.uint64)) + offsets * 2

            rms_norm_weights_ptrs = (
                rms_norm_weights_ptr.to(tl.pointer_type(tl.uint64)) + (thread_offset + tid) * 2
            )

            (rx, ry, rz, rw) = ld_128(res_out_ptrs, mask=mask, multicast_op=False)
            (wx, wy, wz, ww) = ld_128(rms_norm_weights_ptrs, mask=mask, multicast_op=False)
            (nx, ny, nz, nw) = apply_norm(rx, ry, rz, rw, wx, wy, wz, ww, rrms, mask)
            st_128(multicast_ptrs, nx, ny, nz, nw, mask=mask, multicast_op=True)

    sync_threads()
    symm_mem_sync(
        signal_pad_ptrs,
        None,
        RANK,
        WORLD_SIZE,
        hasPreviousMemAccess=True,
        hasSubsequentMemAccess=True,
    )


def fused_multimem_rs_add_norm_ag(
    residual_output_tensor: torch.Tensor,
    input_tensor: torch.Tensor,
    symm_mem_hdl,
    residual_input_tensor: torch.Tensor,
    rms_norm_weights: torch.Tensor,
    eps: float,
) -> torch.Tensor:
    """
    Calls a multicast reduce-scatter + residual add + rms norm + all-gather
    triton kernel. Writes out the output of the residual add to residual_output_tensor.
    The output of the full kernel is written in-place to the symmetric memory buffer.
    input_tensor must be a symmetric memory buffer.
    Args:
        residual_output_tensor: torch.Tensor to write the output of the residual add.
        input_tensor: torch.Tensor, symmetric memory buffer to read the input from.
        symm_mem_hdl: _SymmetricMemory handle for the symmetric memory buffer.
        residual_input_tensor: torch.Tensor, the residual input to be added.
        rms_norm_weights: torch.Tensor, the weights for rms norm.
        eps: float, epsilon value for rms norm.
    Returns:
        residual_output_tensor: torch.Tensor, the output of the full fused operation.
    """
    WARP_SIZE = 32
    MAX_NUM_BLOCKS = 128
    MAX_BLOCK_SIZE = 1024
    BYTES_PER_THREAD = 16

    assert input_tensor.dtype == torch.bfloat16, "Only bfloat16 is supported for now."
    assert residual_output_tensor.dtype == torch.bfloat16, "Only bfloat16 is supported for now."
    assert residual_input_tensor.dtype == torch.bfloat16, "Only bfloat16 is supported for now."

    # this evaluates to 128 for bf16.
    # each thread will process 128 bits (8 bf16 values) at a time.
    numel_per_thread = BYTES_PER_THREAD // residual_input_tensor.element_size()

    assert (
        input_tensor.numel() % numel_per_thread == 0
    ), "The number of elements must be 128-bit aligned."

    num_threads = triton.cdiv(input_tensor.numel() // numel_per_thread, symm_mem_hdl.world_size)

    if num_threads < MAX_BLOCK_SIZE:
        block_size = 1
        while block_size < num_threads:
            block_size *= 2
        num_warps = block_size // WARP_SIZE
        num_blocks = 1
    else:
        block_size = MAX_BLOCK_SIZE
        num_warps = MAX_BLOCK_SIZE // WARP_SIZE
        num_blocks = min(triton.cdiv(num_threads, MAX_BLOCK_SIZE), MAX_NUM_BLOCKS)

    hsize = input_tensor.size(-1)
    _multimem_reduce_scatter_residual_add_kernel[(num_blocks, 1, 1)](
        residual_output_tensor.data_ptr(),
        residual_input_tensor.data_ptr(),
        rms_norm_weights.data_ptr(),
        symm_mem_hdl.multicast_ptr,
        symm_mem_hdl.signal_pad_ptrs_dev,
        input_tensor.numel() // hsize,
        eps=eps,
        HIDDEN_SIZE=hsize,
        BLOCK_SIZE=block_size,
        NUMEL_PER_THREAD=numel_per_thread,
        RANK=symm_mem_hdl.rank,
        WORLD_SIZE=symm_mem_hdl.world_size,
        num_warps=num_warps,
    )

    return residual_output_tensor


================================================
FILE: megatron/core/inference/communication/torch_symm_triton/multimem_asm.py
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# pylint: disable=line-too-long

# Adapted from https://github.com/yifuwang/symm-mem-recipes.git


from unittest.mock import MagicMock

from megatron.core.utils import null_decorator

try:
    import triton
    import triton.language as tl
except ImportError:
    triton = MagicMock()
    tl = MagicMock()
    triton.jit = null_decorator


@triton.jit
def ld_128(ptr, mask, multicast_op: tl.constexpr, reduce_f32: tl.constexpr = False):
    """
    Loads 128 bits from memory into registers.

    This function abstracts two distinct hardware behaviors based on `multicast_op`:

    1.  **Standard Load (`multicast_op=False`)**:
        -   **Semantics:** Local Global Memory Load.
        -   **Action:** Reads 128 bits from `ptr` in global memory into the local register file.

    2.  **Multicast Reduce-Load (`multicast_op=True`)**:
        -   **Semantics:** "Pull" Reduction over NVLink.
        -   **Action:** Simultaneously reads 128 bits from the *same* address across all peer GPUs
            in the multicast group, sums them, and loads the result into the local register file.
        -   **Hardware:** Uses `multimem.ld_reduce` (Hopper+).
        -   When `reduce_f32=False` (default): bf16x2 addition with f32 accumulation
            (128 bits = 8 x bf16, 2 per register).
        -   When `reduce_f32=True`: native f32 addition
            (128 bits = 4 x fp32, 1 per register).

    Args:
        ptr: Memory pointer to the source buffer.
        mask: Boolean predicate. If False, the operation is skipped (no-op).
        multicast_op (tl.constexpr): Toggles between standard load (False)
            and multicast-reduce (True).
        reduce_f32 (tl.constexpr): When True and multicast_op=True, uses f32 reduction
            instead of bf16x2 reduction. Default False.

    Returns:
        Four 32-bit registers (tl.uint32), representing 128 bits of loaded data.
    """
    if multicast_op:
        if reduce_f32:
            # fp32 reduction: multimem.ld_reduce.add.v4.f32
            # Each 128-bit load reduces 4 x fp32 values across peers.
            return tl.inline_asm_elementwise(
                """
                {
                    .reg .pred %p0;
                    setp.ne.s32 %p0, $5, 1;
                    @%p0 bra end;
                    multimem.ld_reduce.relaxed.sys.global.add.v4.f32 {$0, $1, $2, $3}, [$4];
                    end:
                }
                """,
                "=r,=r,=r,=r,l,r",
                args=[ptr, mask.to(tl.int32)],
                dtype=(tl.uint32, tl.uint32, tl.uint32, tl.uint32),
                is_pure=True,
                pack=1,
            )
        else:
            # bf16x2 reduction with f32 accumulation: multimem.ld_reduce.add.acc::f32.v4.bf16x2
            # Each 128-bit load reduces 8 x bf16 values (packed as 4 x bf16x2) across peers.
            return tl.inline_asm_elementwise(
                """
                {
                    .reg .pred %p0;
                    setp.ne.s32 %p0, $5, 1;
                    @%p0 bra end;
                    multimem.ld_reduce.relaxed.sys.global.add.acc::f32.v4.bf16x2 {$0, $1, $2, $3}, [$4]; 
                    end:
                }
                """,
                "=r,=r,=r,=r,l,r",
                args=[ptr, mask.to(tl.int32)],
                dtype=(tl.uint32, tl.uint32, tl.uint32, tl.uint32),
                is_pure=True,
                pack=1,
            )
    else:
        return tl.inline_asm_elementwise(
            """
        {
            .reg .pred %p0;
            setp.ne.s32 %p0, $5, 1;
            @%p0 bra end;
            ld.global.v4.u32 {$0, $1, $2, $3}, [$4];
            end:
        }
        """,
            "=r,=r,=r,=r,l,r",
            args=[ptr, mask.to(tl.int32)],
            dtype=(tl.uint32, tl.uint32, tl.uint32, tl.uint32),
            is_pure=True,
            pack=1,
        )


@triton.jit
def st_128(ptr, x, y, z, w, mask, multicast_op):
    """
    Stores 128 bits (8 x bf16) from registers to memory.

    This function abstracts two distinct hardware behaviors based on `multicast_op`:

    1.  **Standard Store (`multicast_op=False`)**:
        -   **Semantics:** Local Global Memory Store.
        -   **Action:** Writes 128 bits from local registers to `ptr` in global memory.

    2.  **Multicast Store (`multicast_op=True`)**:
        -   **Semantics:** "Push" Broadcast over NVLink.
        -   **Action:** Writes 128 bits from local registers to the `ptr` address in
            the global memory of **all** peer GPUs in the multicast group simultaneously.
        -   **Hardware:** Uses `multimem.st` (Hopper+).
        -   **Use Case:** The "Broadcast" or "All-Gather" step in collective operations.

    Args:
        ptr: Memory pointer to the destination buffer.
        x, y, z, w: Four 32-bit registers containing the data to store.
        mask: Boolean predicate. If False, the store is skipped.
        multicast_op (tl.constexpr): Toggles between standard store (False)
        and multicast broadcast (True).
    """
    # PTX Assembly Logic:
    # 1. @$6: Predication. Only execute if argument 6 (mask) is True.
    # 2. Opcode Selection:
    #    - 'multimem.st...v4.f32': Broadcasts data to all peers.
    #      (Note: .f32 type used for bit-movement, equivalent to .u32 for storage).
    #    - 'st.global...v4.u32': Standard 128-bit memory write.
    # 3. Operands:
    #    - [$1]: Destination memory address.
    #    - {$2, $3, $4, $5}: Source registers containing data.
    if multicast_op:
        return tl.inline_asm_elementwise(
            """
            {
                .reg .pred %p0;
                setp.ne.s32 %p0, $6, 1;
                @%p0 bra end;
                multimem.st.relaxed.sys.global.v4.f32 [$1], {$2, $3, $4, $5};
                end:
            }
            """,
            "=r,l,r,r,r,r,r",
            args=[ptr, x, y, z, w, mask.to(tl.int32)],
            dtype=(tl.uint32),
            is_pure=False,
            pack=1,
        )
    else:
        return tl.inline_asm_elementwise(
            """
        {
            .reg .pred %p0;
            setp.ne.s32 %p0, $6, 1;
            @%p0 bra end;
            st.global.v4.f32 [$1], {$2, $3, $4, $5};
            end:
        }
        """,
            "=r,l,r,r,r,r,r",
            args=[ptr, x, y, z, w, mask.to(tl.int32)],
            dtype=(tl.uint32),
            is_pure=False,
            pack=1,
        )


@triton.jit
def add_v8_bf16_from_u32(
    a0,
    a1,
    a2,
    a3,  # First vector of 8 bf16s, packed in 4 uint32s
    b0,
    b1,
    b2,
    b3,  # Second vector of 8 bf16s, packed in 4 uint32s
):
    """
    Adds two vectors of 8 bfloat16 numbers.
    Each vector is passed as four tl.uint32 tensors.
    Returns the result as a tuple of four tl.uint32 tensors.
    """
    return tl.inline_asm_elementwise(
        """
        {
            add.bf16x2 $0, $4, $8;
            add.bf16x2 $1, $5, $9;
            add.bf16x2 $2, $6, $10;
            add.bf16x2 $3, $7, $11;
        }
        """,
        # 8 outputs (=r), 8 inputs (r)
        "=r,=r,=r,=r,r,r,r,r,r,r,r,r",
        args=[a0, a1, a2, a3, b0, b1, b2, b3],
        dtype=(tl.uint32, tl.uint32, tl.uint32, tl.uint32),
        is_pure=True,
        pack=1,
    )


@triton.jit
def asm_rsqrt(x, eps):
    """
    Computes the reciprocal square root of a float32 number using inline assembly.
    """
    return tl.inline_asm_elementwise(
        """
        {
            add.f32 $1, $1, $2;
            rsqrt.approx.f32 $0, $1;
        }
        """,
        "=f, f, f",
        args=[x, eps],
        dtype=(tl.float32),
        is_pure=True,
        pack=1,
    )


================================================
FILE: megatron/core/inference/communication/torch_symm_triton/utils.py
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.

# Adapted from: https://github.com/meta-pytorch/kraken.git

from unittest.mock import MagicMock

import torch

from megatron.core.utils import null_decorator

try:
    import triton
    import triton.language as tl
except ImportError:
    triton = MagicMock()
    tl = MagicMock()
    triton.jit = null_decorator


def is_device_nvls_capable(device: torch.device) -> bool:
    """Check if the device supports NVLS (multicast) collectives.
    Requires CUDA Hopper+ (SM >= 9)."""
    return device.type == "cuda" and torch.cuda.get_device_properties(device).major >= 9


def are_tensors_nvls_eligible(*tensors: torch.Tensor) -> bool:
    """Check if tensors are eligible for NVLS (multicast) collectives.

    Requirements:
    - Hopper+ GPU (SM >= 9)
    - All tensor byte sizes are divisible by 16 (128-bit), since NVLS
      kernels process data in 128-bit chunks.
    """
    if not tensors:
        return False
    return is_device_nvls_capable(tensors[0].device) and all(
        t.element_size() * t.numel() % 16 == 0 for t in tensors
    )


@triton.jit
def get_tid():
    """
    Returns the thread IDs in x, y, z dimensions.
    """
    return tl.inline_asm_elementwise(
        """
        mov.u32 $0, %tid.x;
        mov.u32 $1, %tid.y;
        mov.u32 $2, %tid.z;
        """,
        "=r,=r,=r",
        [],
        dtype=(tl.uint32, tl.uint32, tl.uint32),
        is_pure=True,
        pack=1,
    )


@triton.jit
def get_ntid():
    """
    Returns the number of threads in x, y, z dimensions.
    """
    return tl.inline_asm_elementwise(
        """
        mov.u32 $0, %ntid.x;
        mov.u32 $1, %ntid.y;
        mov.u32 $2, %ntid.z;
        """,
        "=r,=r,=r",
        [],
        dtype=(tl.uint32, tl.uint32, tl.uint32),
        is_pure=True,
        pack=1,
    )


@triton.jit
def get_flat_tid():
    """
    Calculates a unique, one-dimensional ID for each thread within its thread block.
    """
    tid_x, tid_y, tid_z = get_tid()
    ntid_x, ntid_y, _ = get_ntid()
    return tid_z * ntid_y * ntid_x + tid_y * ntid_x + tid_x


@triton.jit
def get_flat_bid():
    """
    Calculates a unique, one-dimensional ID for each block within the grid."""
    return (
        tl.program_id(2) * tl.num_programs(1) * tl.num_programs(0)
        + tl.program_id(1) * tl.num_programs(0)
        + tl.program_id(0)
    )


@triton.jit
def sync_threads():
    """
    Synchronize all threads within a block.
    """
    tl.inline_asm_elementwise("bar.sync 0;", "=r", [], dtype=tl.int32, is_pure=False, pack=1)


================================================
FILE: megatron/core/inference/communication_utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from typing import List, Optional

import torch
from torch.distributed import ProcessGroup

from megatron.core import parallel_state


def is_pipeline_first_stage(pp_group: ProcessGroup):
    """Check if the current process is the first stage of the pipeline"""
    if pp_group is None:
        # set ignore_virtual=True since vpp is not used in inference
        return parallel_state.is_pipeline_first_stage(ignore_virtual=True)
    else:
        return pp_group.rank() == 0


def is_pipeline_last_stage(pp_group: ProcessGroup):
    """Check if the current process is the last stage of the pipeline"""
    if pp_group is None:
        # set ignore_virtual=True since vpp is not used in inference
        return parallel_state.is_pipeline_last_stage(ignore_virtual=True)
    else:
        return pp_group.rank() == pp_group.size() - 1


def _is_cuda(tensor):
    """Check if a tensor is not none and is cuda."""
    assert tensor is not None
    assert tensor.is_cuda


def _is_cuda_contiguous(tensor):
    """Check if a tensor is not none, is cuda, and is contiguous."""
    _is_cuda(tensor)
    assert tensor.is_contiguous()


def broadcast_from_last_pipeline_stage(
    size: List[int],
    dtype: torch.dtype,
    tensor: Optional[torch.Tensor] = None,
    pp_group: Optional[ProcessGroup] = None,
):
    """Broadcast a tensor from last pipeline stage to all ranks.

    Args:
        size: Expected tensor size
        dtype: Expected tensor dtype
        tensor: Tensor to broadcast (only on last stage)
        pp_group: Custom process group (if None, uses global state)
    """
    # Use custom process group or fall back to global state
    if pp_group is None:
        pp_group = parallel_state.get_pipeline_model_parallel_group()
        last_rank = parallel_state.get_pipeline_model_parallel_last_rank()

        # add ignore_virtual=True since vpp is not used in inference
        is_last_stage = parallel_state.is_pipeline_last_stage(ignore_virtual=True)
    else:
        # Lists of ProcessGroups are used for multimodal inference but not supported here
        assert isinstance(
            pp_group, ProcessGroup
        ), "pp_group must be a single ProcessGroup, not a list of ProcessGroups"
        last_rank = torch.distributed.get_process_group_ranks(pp_group)[pp_group.size() - 1]
        is_last_stage = pp_group.rank() == pp_group.size() - 1

    if is_last_stage:
        assert size == list(
            tensor.shape
        ), f"Expected tensor of shape {size} but got {list(tensor.shape)}"
        assert dtype == tensor.dtype, f"Expected tensor of type {dtype} but got {tensor.dtype}"
        _is_cuda_contiguous(tensor)
    else:
        tensor = torch.empty(size, dtype=dtype, device=torch.cuda.current_device())

    # Broadcast the tensor
    torch.distributed.broadcast(tensor, src=last_rank, group=pp_group)
    return tensor


def recv_from_prev_pipeline_rank_(
    recv_buffer: torch.Tensor = None, pp_group: Optional[ProcessGroup] = None
):
    """Receive from previous pipeline stage and update the input buffer inplace.

    Args:
        recv_buffer: Buffer to receive data into
        pp_group: Custom process group (if None, uses global state)
    """
    # Determine previous rank
    if pp_group is None:
        prev_rank = parallel_state.get_pipeline_model_parallel_prev_rank()
    else:
        # Lists of ProcessGroups are used for multimodal inference but not supported here
        assert isinstance(
            pp_group, ProcessGroup
        ), "pp_group must be a single ProcessGroup, not a list of ProcessGroups"
        prev_rank = torch.distributed.get_process_group_ranks(pp_group)[
            (pp_group.rank() - 1) % pp_group.size()
        ]

    # Create receive operation
    recv_prev_op = torch.distributed.P2POp(torch.distributed.irecv, recv_buffer, prev_rank)

    reqs = torch.distributed.batch_isend_irecv([recv_prev_op])
    for req in reqs:
        req.wait()
    # To protect against race condition when using batch_isend_irecv().
    torch.cuda.synchronize()


def send_to_next_pipeline_rank(
    tensor: torch.Tensor = None, pp_group: Optional[ProcessGroup] = None
):
    """Send output to the next pipeline stage.

    Args:
        tensor: Tensor to send
        pp_group: Custom process group (if None, uses global state)
    """
    # Determine next rank
    if pp_group is None:
        next_rank = parallel_state.get_pipeline_model_parallel_next_rank()
    else:
        # Lists of ProcessGroups are used for multimodal inference but not supported here
        assert isinstance(
            pp_group, ProcessGroup
        ), "pp_group must be a single ProcessGroup, not a list of ProcessGroups"
        next_rank = torch.distributed.get_process_group_ranks(pp_group)[
            (pp_group.rank() + 1) % pp_group.size()
        ]

    # Create send operation
    send_next_op = torch.distributed.P2POp(torch.distributed.isend, tensor, next_rank)

    reqs = torch.distributed.batch_isend_irecv([send_next_op])
    for req in reqs:
        req.wait()
    # To protect against race condition when using batch_isend_irecv().
    torch.cuda.synchronize()


def broadcast_tensor(size, dtype, tensor=None, rank=0, data_parallel=False):
    """Given size and type of a tensor on all ranks and the tensor value
    only on a specific rank, broadcast from that rank to all other ranks.

    Args:
        data_parallel (bool): Broadcast across a single data parallel model replica.
    """
    if data_parallel:
        rank = parallel_state.get_model_parallel_src_rank()

    if torch.distributed.get_rank() == rank:
        _is_cuda_contiguous(tensor)
    else:
        tensor = torch.empty(size, dtype=dtype, device=torch.cuda.current_device())

    group = None
    if data_parallel:
        group = parallel_state.get_model_parallel_group()

    torch.distributed.broadcast(tensor, rank, group=group)

    return tensor


def broadcast_list(size, dtype, list_values=None, rank=0, data_parallel=False):
    """Broadcast a list of values with a given type.

    Args:
        data_parallel (bool): Broadcast across a single data parallel model replica.
    """

    tensor = None

    if data_parallel:
        if parallel_state.get_model_parallel_src_rank() == torch.distributed.get_rank():
            tensor = torch.tensor(list_values, dtype=dtype, device=torch.cuda.current_device())

        rank = parallel_state.get_model_parallel_src_rank()
    else:
        if torch.distributed.get_rank() == rank:
            tensor = torch.tensor(list_values, dtype=dtype, device=torch.cuda.current_device())

    return broadcast_tensor(size, dtype, tensor=tensor, rank=rank, data_parallel=data_parallel)


def broadcast_int_list(size, int_list=None, rank=0, data_parallel=False):
    """Broadcast a list of integer values.

    Args:
        data_parallel (bool): Broadcast across a single data parallel model replica.
    """

    return broadcast_list(
        size, torch.int64, list_values=int_list, rank=rank, data_parallel=data_parallel
    )


def broadcast_float_list(size, float_list=None, rank=0, data_parallel=False):
    """Broadcast a list of float values.

    Args:
        data_parallel (bool): Broadcast across a single data parallel model replica.
    """

    return broadcast_list(
        size, torch.float32, list_values=float_list, rank=rank, data_parallel=data_parallel
    )


================================================
FILE: megatron/core/inference/config.py
================================================
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from dataclasses import dataclass
from enum import Enum
from typing import List, Optional, Tuple

import torch

from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.transformer.module import MegatronModule
from megatron.core.utils import get_attr_wrapped_model


@dataclass
class MambaInferenceStateConfig:
    """
    Config for initializing Mamba model inference state tensors.

    Note that we maintain separate metadata for decode, regular prefill, and
    chunked prefill requests because the Mamba kernels do not yet support mixing
    these. Once the kernels have been updated we can simplify this code.
    """

    layer_type_list: List[str]
    """
    A list of strings that indicates the layer type (Mamba / Attention / MLP) for each layer.
    See `megatron/core/ssm/mamba_hybrid_layer_allocation.py` for the list of symbols.
    """

    conv_states_shape: Tuple[int]
    """Mamba conv states shape per request."""

    ssm_states_shape: Tuple[int]
    """Mamba SSM states shape per request."""

    conv_states_dtype: torch.dtype
    """The dtype to use for the Mamba conv state tensor. Defaults to the model dtype."""

    ssm_states_dtype: torch.dtype
    """The dtype to use for the Mamba SSM state tensor. Defaults to the model dtype."""

    mamba_chunk_size: int = 128
    """The chunk size used by the Mamba SSM Triton kernels."""

    @classmethod
    def from_model(
        cls,
        model: MegatronModule,
        conv_states_dtype: Optional[torch.dtype] = None,
        ssm_states_dtype: Optional[torch.dtype] = None,
    ) -> Optional["MambaInferenceStateConfig"]:
        """Returns Mamba inference state config from the model if it is a hybrid model."""
        from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols

        decoder = get_attr_wrapped_model(model, "decoder")
        layer_type_list = getattr(decoder, "layer_type_list", None)
        if layer_type_list is not None and Symbols.MAMBA in layer_type_list:
            (mamba_conv_states_shape, mamba_ssm_states_shape) = (
                decoder.mamba_state_shapes_per_request()
            )
            if conv_states_dtype is None:
                conv_states_dtype = model.config.params_dtype
            if ssm_states_dtype is None:
                ssm_states_dtype = model.config.params_dtype
            mamba_chunk_size = 128
            for layer_type, layer in zip(decoder.layer_type_list, decoder.layers):
                if layer_type == Symbols.MAMBA and hasattr(layer, 'mixer'):
                    mamba_chunk_size = layer.mixer.chunk_size
                    break
            return cls(
                layer_type_list=layer_type_list,
                conv_states_shape=mamba_conv_states_shape,
                ssm_states_shape=mamba_ssm_states_shape,
                conv_states_dtype=conv_states_dtype,
                ssm_states_dtype=ssm_states_dtype,
                mamba_chunk_size=mamba_chunk_size,
            )
        return None


class PrefixCachingEvictionPolicy(str, Enum):
    """Eviction policy for prefix caching blocks.

    Only applies when enable_prefix_caching is True.
    """

    REF_ZERO = "ref_zero"
    """Deregister blocks immediately when ref_count hits 0. No caching after release."""

    LRU = "lru"
    """Keep released blocks in hash table. Evict oldest ref=0 blocks when space is needed."""


class PrefixCachingCoordinatorPolicy(str, Enum):
    """Routing policy for the DP inference coordinator with prefix caching."""

    LONGEST_PREFIX = "longest_prefix"
    """Route to the rank with the longest consecutive prefix match."""

    FIRST_PREFIX_BLOCK = "first_prefix_block"
    """Route to the rank that has the first block hash cached. O(ranks) check."""

    ROUND_ROBIN = "round_robin"
    """Route requests to ranks in round-robin order, ignoring prefix affinity."""


class KVCacheManagementMode(str, Enum):
    """Mode for handling large tensors (KV cache, Mamba states) during suspend/resume."""

    PERSIST = "persist"
    """Do not deallocate and reallocate large tensors; keep them on GPU."""

    OFFLOAD = "offload"
    """Offload large tensors to CPU during deallocation; onload during allocation."""

    RECOMPUTE = "recompute"
    """Deallocate large tensors and recompute them from scratch during allocation."""


@dataclass
class InferenceConfig:
    """
    Config for inference.

    NOTE: Must remain mutually exclusive with the `TransformerConfig`.
    """

    # =================================
    # KV cache and Mamba states config
    # =================================
    block_size_tokens: int = 256
    """Size of KV cache block size."""

    buffer_size_gb: int = 20
    """
    Buffer size reserved on the GPU for the KV cache.
    If `unified_memory_level` >= 1, then CPU memory is additionally utilized, resulting in a total
    buffer size of `buffer_size_gb + paused_buffer_size_gb`.
    """

    paused_buffer_size_gb: Optional[int] = None
    """
    Portion of buffer reserved for paused requests. Active requests are paused when there are not
    enough active blocks available to continue generating a request. The total buffer size
    (active + paused) depends on `unified_memory_level` (uvm):
        - uvm 0: buffer_size_gb (paused buffer is inclusive)
        - uvm 1: buffer_size_gb + paused_buffer_size_gb
    """

    mamba_inference_state_config: Optional[MambaInferenceStateConfig] = None
    """The Mamba inference state config if the model is a hybrid model."""

    mamba_memory_ratio: Optional[float] = None
    """
    Percentage of memory buffer to allocate for Mamba states. If not specified, allocates Mamba
    state tensors for each KV cache block. Only used for hybrid models.
    """

    max_requests: Optional[int] = None
    """
    Max number of active requests to use for decode-only forward passes.
    This is primarily limited by the combination of `buffer_size_gb` and `max_sequence_length`.
    """

    max_tokens: Optional[int] = None
    """
    Max number of tokens to use for forward passes. This is primarily limited by prefill activation
    memory usage. (Defaults to 16384).
    """

    unified_memory_level: int = 0
    """
    Sets unified memory usage within the dynamic inference context.
    The levels are:
        0) no unified memory (default)
        1) allocate `memory_buffer` in unified memory.
    Eventually, additional levels will be included to control other tensors within the context.
    """

    kv_cache_management_mode: KVCacheManagementMode = KVCacheManagementMode.PERSIST
    """
    Mode used to determine how large tensors are handled by the allocate and deallocate methods.
    See `KVCacheManagementMode` for options.
    """

    # =================================
    # CUDA graph config
    # =================================
    num_cuda_graphs: Optional[int] = None
    """
    Maximum number of cuda graphs to capture, where the cuda graph batch sizes range from 1 to
    `max_requests`. Due to rounding, the actual number of cuda graphs may not equal this argument.
    """

    cuda_graph_mixed_prefill_count: Optional[int] = 16
    """ 
    The number of mixed prefill graphs to capture if mixed prefill/decode graphs are enabled.
    """

    use_cuda_graphs_for_non_decode_steps: bool = True
    """
    Whether to use CUDA graphs for non-decode steps.
    """

    static_kv_memory_pointers: bool = False
    """
    Whether the KV cache (and Mamba states) will reside at the same memory addresses
    after suspend/resume as before. When True, CUDA graphs that reference these buffers
    remain valid across suspend/resume cycles and do not need to be recaptured.
    Requires either UVM or `torch_memory_saver` when `kv_cache_management_mode` is not PERSIST.
    """

    # =================================
    # Model config
    # =================================
    max_sequence_length: int = 2560
    """Max possible sequence length (prompt + output) that will occur."""

    pg_collection: Optional[ProcessGroupCollection] = None
    """A `ProcessGroupCollection` for distributed execution."""

    use_flashinfer_fused_rope: Optional[bool] = False
    """
    If True, use flashinfer's fused rope implementation.
    If None, defaults to using flash-infer if available.
    """

    materialize_only_last_token_logits: bool = True
    """
    Whether to only materialize logits for the last token. This should be set to False
    if returning log probs.
    """

    # =================================
    # Engine config
    # =================================
    enable_chunked_prefill: bool = False
    """Whether to enable chunked prefill."""

    num_speculative_tokens: int = 0
    """The number of speculative tokens to generate for decode steps."""

    enable_prefix_caching: bool = False
    """Whether to enable prefix caching for KV cache block sharing."""

    prefix_caching_eviction_policy: PrefixCachingEvictionPolicy = (
        PrefixCachingEvictionPolicy.REF_ZERO
    )
    """Eviction policy for prefix caching blocks. See `PrefixCachingEvictionPolicy` for options.

    Only applies when enable_prefix_caching is True.
    """

    prefix_caching_coordinator_policy: PrefixCachingCoordinatorPolicy = (
        PrefixCachingCoordinatorPolicy.FIRST_PREFIX_BLOCK
    )
    """Routing policy for the DP inference coordinator. See
    `PrefixCachingCoordinatorPolicy` for options.

    Only applies when enable_prefix_caching is True and using a coordinator.
    """

    prefix_caching_mamba_gb: Optional[float] = None
    """GPU memory budget (in GB) for the Mamba state cache used by prefix caching
    on hybrid models. Each cache slot stores SSM and conv states for all Mamba layers
    at a single block boundary. When set, Mamba states at KV divergence and last-aligned
    block boundaries are cached and reused across requests with matching prefixes."""

    use_triton_conv1d: bool = False
    """Use Triton varlen conv1d kernel for Mamba prefill instead of
    per-request causal_conv1d_fn calls."""

    # =================================
    # Logging config
    # =================================
    track_paused_request_events: bool = False
    """
    Whether to track paused request events. If True, `add_event_pause()` is called on
    requests when they are paused during bookkeeping.
    """

    track_generated_token_events: bool = False
    """
    Whether to track per-token events with timestamps for each generated token.
    When enabled, each generated token creates a GENERATED_TOKEN event with a
    timestamp, useful for per-token latency analysis.
    """

    metrics_writer: Optional["WandbModule"] = None
    """Wandb module for writing metrics."""

    logging_step_interval: int = 0
    """
    The step interval at which to log inference metrics to wandb.
    Defaults to 0, which means no logging.
    """

    request_metadata_types: Optional[List[Tuple[str, torch.dtype, bool]]] = None
    """
    A list of the per-request metadata types to track. Each entry is a tuple
    consisting of the string label, the target dtype, and whether to store the data on GPU.
    """

    use_synchronous_zmq_collectives: bool = False
    """Whether to use synchronous ZMQ collectives for inference. If True, the 
    all_reduce_max operation will be performed synchronously, which can help reduce 
    performance variability for MoEs.
    """


================================================
FILE: megatron/core/inference/contexts/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import warnings

from .base_context import BaseInferenceContext
from .kv_block_allocator import KVBlockAllocator
from .static_context import StaticInferenceContext

warnings.warn(
    "The following imports from `dynamic_context.py` will be removed "
    "in this file in `megatron-core` 0.14. The imports here result in "
    "a cyclic import issue that causes rotary embeddings to import "
    "from Apex rather than Transformer Engine.",
    DeprecationWarning,
)
from .dynamic_context import (
    ActiveRequestCountOverflowError,
    BlockOverflowError,
    ContextOverflowError,
    DynamicInferenceContext,
    RequestOverflowError,
    TokenOverflowError,
)


================================================
FILE: megatron/core/inference/contexts/attention_context/mamba_metadata.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from typing import Optional

import torch

from megatron.core.inference.batch_dimensions_utils import InferenceBatchDimensions


class MambaMetadata:
    """Manages the metadata tensors required for Mamba layers during inference."""

    def __init__(self, max_requests: int, max_tokens: int, mamba_chunk_size: int = 128):
        """
        Initializes the Mamba slot allocator.

        Args:
            max_requests (int): The maximum number of concurrent requests.
            max_tokens (int): The maximum number of tokens.
            mamba_chunk_size (int): The chunk size used by the Mamba SSM Triton kernels.
        """
        self.max_requests = max_requests
        self.max_tokens = max_tokens
        self.mamba_chunk_size = mamba_chunk_size
        self.device = torch.cuda.current_device()

        # Maximum possible chunks across all batch configurations
        self.max_chunks = max_tokens // mamba_chunk_size + max_requests

        # Map from requests to slots in the static Mamba state buffer
        self.request_to_mamba_state_idx = torch.full(
            (self.max_requests,), -1, dtype=torch.int32, device=torch.cuda.current_device()
        )

        # Map from requests to slots in the static Mamba state buffer for active decode requests
        self._batch_indices_decode_buffer = torch.full(
            (self.max_requests,), -1, dtype=torch.int32, device=self.device
        )

        # Map from requests to slots in the static Mamba state buffer for active prefill requests
        self._batch_indices_prefill_buffer = torch.full(
            (self.max_requests,), -1, dtype=torch.int32, device=self.device
        )

        # Map from token id to request id for active prefill requests
        self._seq_idx_buffer = torch.full(
            (1, self.max_tokens), -1, dtype=torch.int32, device=self.device
        )

        # Cumulative sequence lengths for active prefill requests
        self._cu_seqlens_buffer = torch.zeros(
            (self.max_requests + 1,), dtype=torch.int32, device=self.device
        )

        # Tuple of (active decode request count, active prefill request count)
        self._device_decode_prefill_buffer = torch.zeros(
            (2,), dtype=torch.int32, device=self.device
        )

        # SSM chunk boundaries for varlen kernel
        self._cu_chunk_seqlens_buffer = torch.zeros(
            self.max_chunks + 1, dtype=torch.int32, device=self.device
        )

        # Index of the last chunk per sequence
        self._last_chunk_indices_buffer = torch.zeros(
            max_requests, dtype=torch.int32, device=self.device
        )

        # Request ID per chunk
        self._seq_idx_for_varlen_buffer = torch.zeros(
            self.max_chunks, dtype=torch.int32, device=self.device
        )

        # Conv1d per-token metadata (request ID and request start position)
        self._conv_seq_idx_buffer = torch.zeros(max_tokens, dtype=torch.int32, device=self.device)
        self._conv_seq_start_buffer = torch.zeros(max_tokens, dtype=torch.int32, device=self.device)

        # Allocator for Mamba state slots
        self.mamba_state_free_slots = torch.arange(
            self.max_requests, dtype=torch.int32, device=torch.cuda.current_device()
        )
        self.mamba_state_free_slot_count = self.max_requests

        self.reset_varlen_metadata()

    def reset(self) -> None:
        """
        Resets all Mamba states and frees all allocated slots.
        """
        self.request_to_mamba_state_idx.fill_(-1)

        self.reset_varlen_metadata()

        # Re-initialize the free slot pool
        self.mamba_state_free_slots = torch.arange(
            self.max_requests, dtype=torch.int32, device=torch.cuda.current_device()
        )
        self.mamba_state_free_slot_count = self.max_requests

    def reset_varlen_metadata(self) -> None:
        """Resets varlen metadata."""
        self.batch_indices_decode = None
        self.batch_indices_prefill = None
        self.cu_seqlens = None
        self.seq_idx = None
        self.device_decode_prefill = None

        # SSM/conv1d precomputed views
        self.cu_chunk_seqlens = None
        self.last_chunk_indices = None
        self.seq_idx_for_varlen = None
        self.conv_seq_idx = None
        self.conv_seq_start = None

        # Python-side precomputed values
        self.real_prefill_token_count = 0
        self.cu_seqlens_list = [0]

    def update(
        self,
        active_mamba_indices: torch.Tensor,
        token_to_request_idx: torch.Tensor,
        cu_seqlens: torch.Tensor,
        batch_dimensions: InferenceBatchDimensions,
        padded_batch_dimensions: InferenceBatchDimensions,
        enable_chunked_prefill: bool,
    ) -> None:
        """
        Updates the dedicated CUDA graph mapping tensor with the indices
        of currently active requests.

        Args:
            active_mamba_indices (Tensor): Tensor containing the Mamba slot indices
                                           for active requests.
            token_to_request_idx (Tensor): Map from token index to request index.
            cu_seqlens (Tensor): Cumulative sequence lengths.
            batch_dimensions (InferenceBatchDimensions): Dimensions of the current batch.
            padded_batch_dimensions (InferenceBatchDimensions): Dimensions of the padded batch.
        """
        real_decode_count = batch_dimensions.decode_req_count
        real_prefill_count = batch_dimensions.prefill_req_count

        padded_decode_count = padded_batch_dimensions.decode_req_count
        padded_prefill_count = padded_batch_dimensions.prefill_req_count
        padded_token_count = padded_batch_dimensions.token_count

        if padded_decode_count > 0:
            # Update decode indices
            self._batch_indices_decode_buffer[:real_decode_count].copy_(
                active_mamba_indices[:real_decode_count]
            )
            if padded_decode_count > real_decode_count:
                self._batch_indices_decode_buffer[real_decode_count:padded_decode_count] = -1
            self.batch_indices_decode = self._batch_indices_decode_buffer[:padded_decode_count]

        if padded_prefill_count > 0:
            # Update prefill indices (all prefill requests go through varlen)
            if real_prefill_count > 0:
                prefill_start_idx = real_decode_count
                self._batch_indices_prefill_buffer[:real_prefill_count].copy_(
                    active_mamba_indices[prefill_start_idx : prefill_start_idx + real_prefill_count]
                )

            if padded_prefill_count > real_prefill_count:
                self._batch_indices_prefill_buffer[real_prefill_count:padded_prefill_count] = -1

            self.batch_indices_prefill = self._batch_indices_prefill_buffer[:padded_prefill_count]

            # Update seq_idx for all prefill requests
            prefill_start_req_idx = real_decode_count
            end_prefill_req_idx = real_decode_count + real_prefill_count

            start_prefill_token_idx = cu_seqlens[prefill_start_req_idx]
            end_prefill_token_idx = cu_seqlens[end_prefill_req_idx]

            seq_len = end_prefill_token_idx - start_prefill_token_idx

            if seq_len > 0:
                # Normalize request IDs to 0-based relative to prefill requests
                self._seq_idx_buffer[:, :seq_len].copy_(
                    token_to_request_idx[start_prefill_token_idx:end_prefill_token_idx]
                    - token_to_request_idx[start_prefill_token_idx]
                )

            if padded_token_count > seq_len:
                self._seq_idx_buffer[:, seq_len:padded_token_count] = -1
            self.seq_idx = self._seq_idx_buffer[:, :padded_token_count]

            # Update cu_seqlens for all prefill requests
            self._cu_seqlens_buffer[0] = 0
            if real_prefill_count > 0:
                self._cu_seqlens_buffer[1 : real_prefill_count + 1].copy_(
                    cu_seqlens[prefill_start_req_idx + 1 : end_prefill_req_idx + 1]
                    - cu_seqlens[prefill_start_req_idx]
                )

            # Pad the rest with the last value (effectively length 0 segments)
            last_val = self._cu_seqlens_buffer[real_prefill_count]
            self._cu_seqlens_buffer[real_prefill_count + 1 : padded_prefill_count + 1].fill_(
                last_val
            )
            self.cu_seqlens = self._cu_seqlens_buffer[: padded_prefill_count + 1]

            # --- Precompute SSM and conv1d metadata for CUDA graph compatibility ---
            # All values the forward pass needs are computed here (before CUDA graph
            # capture/replay) so that the forward pass has no .item() calls or
            # data-dependent control flow.

            # Transfer cu_seqlens to CPU for Python-side precomputation
            cu_seqlens_real = self._cu_seqlens_buffer[: real_prefill_count + 1].tolist()
            self.cu_seqlens_list = cu_seqlens_real
            self.real_prefill_token_count = (
                cu_seqlens_real[real_prefill_count] if real_prefill_count > 0 else 0
            )

            # Build cu_chunk_seqlens, last_chunk_indices, seq_idx_for_varlen.
            # Covers all padded sequences (real + padding). Each sequence is
            # subdivided into chunks of at most mamba_chunk_size tokens. Zero-length
            # sequences get a single zero-length chunk.
            cu_seqlens_all = self._cu_seqlens_buffer[: padded_prefill_count + 1].tolist()
            chunk_size = self.mamba_chunk_size
            chunk_boundaries = [0]
            last_chunk_idx_list = []
            chunk_to_seq_list = []

            for i in range(padded_prefill_count):
                start = cu_seqlens_all[i]
                end = cu_seqlens_all[i + 1]
                pos = start + chunk_size
                while pos < end:
                    chunk_boundaries.append(pos)
                    chunk_to_seq_list.append(i)
                    pos += chunk_size
                chunk_boundaries.append(end)
                chunk_to_seq_list.append(i)
                last_chunk_idx_list.append(len(chunk_boundaries) - 2)

            # Pad to fixed size for CUDA graph compatibility
            padded_max_chunks = padded_token_count // chunk_size + padded_prefill_count
            last_boundary = chunk_boundaries[-1]
            while len(chunk_boundaries) < padded_max_chunks + 1:
                chunk_boundaries.append(last_boundary)
            while len(chunk_to_seq_list) < padded_max_chunks:
                chunk_to_seq_list.append(0)

            # Fill GPU buffers
            n_cu = padded_max_chunks + 1
            self._cu_chunk_seqlens_buffer[:n_cu].copy_(
                torch.tensor(chunk_boundaries[:n_cu], dtype=torch.int32)
            )
            self.cu_chunk_seqlens = self._cu_chunk_seqlens_buffer[:n_cu]

            self._last_chunk_indices_buffer[:padded_prefill_count].copy_(
                torch.tensor(last_chunk_idx_list, dtype=torch.int32)
            )
            self.last_chunk_indices = self._last_chunk_indices_buffer[:padded_prefill_count]

            self._seq_idx_for_varlen_buffer[:padded_max_chunks].copy_(
                torch.tensor(chunk_to_seq_list[:padded_max_chunks], dtype=torch.int32)
            )
            self.seq_idx_for_varlen = self._seq_idx_for_varlen_buffer[:padded_max_chunks]

            # Build conv1d per-token metadata (request ID and request start position)
            real_tokens = self.real_prefill_token_count
            if real_tokens > 0:
                conv_seq_idx_list = []
                conv_seq_start_list = []
                for i in range(real_prefill_count):
                    start = cu_seqlens_real[i]
                    length = cu_seqlens_real[i + 1] - start
                    conv_seq_idx_list.extend([i] * length)
                    conv_seq_start_list.extend([start] * length)
                self._conv_seq_idx_buffer[:real_tokens].copy_(
                    torch.tensor(conv_seq_idx_list, dtype=torch.int32)
                )
                self._conv_seq_start_buffer[:real_tokens].copy_(
                    torch.tensor(conv_seq_start_list, dtype=torch.int32)
                )
            if padded_token_count > real_tokens:
                self._conv_seq_idx_buffer[real_tokens:padded_token_count] = 0
                self._conv_seq_start_buffer[real_tokens:padded_token_count] = 0

            self.conv_seq_idx = self._conv_seq_idx_buffer[:padded_token_count]
            self.conv_seq_start = self._conv_seq_start_buffer[:padded_token_count]

        if padded_decode_count > 0 and padded_prefill_count > 0:
            self._device_decode_prefill_buffer[0] = cu_seqlens[real_decode_count]
            self._device_decode_prefill_buffer[1] = (
                cu_seqlens[real_decode_count + real_prefill_count] - cu_seqlens[real_decode_count]
            )
            self.device_decode_prefill = self._device_decode_prefill_buffer

    def allocate_slot(self) -> Optional[int]:
        """
        Allocates a new slot for a request in the Mamba state buffers.

        Returns:
            int: The index of the allocated slot.
            Returns None if no slots are available.
        """
        if self.mamba_state_free_slot_count == 0:
            return None

        # Get a free slot
        self.mamba_state_free_slot_count -= 1
        mamba_idx = self.mamba_state_free_slots[self.mamba_state_free_slot_count]

        return mamba_idx

    def batch_allocate_slots(self, num_slots: int) -> Optional[torch.Tensor]:
        """
        Allocates new slots for the given number of requests in the Mamba state buffers.

        Returns:
            torch.Tensor: The indices of the allocated slots.
            Returns None if not enough slots are available.
        """
        if self.mamba_state_free_slot_count < num_slots:
            return None

        # Get free slots
        self.mamba_state_free_slot_count -= num_slots
        mamba_idx = self.mamba_state_free_slots[
            self.mamba_state_free_slot_count : self.mamba_state_free_slot_count + num_slots
        ]

        return mamba_idx

    def free_slots(self, request_indices: torch.Tensor) -> None:
        """
        Frees the Mamba state slots associated with the given request indices.

        Args:
            request_indices (Tensor): A 1D tensor of request indices to free.
        """
        # Get the Mamba state indices for finished requests
        mamba_indices_to_free = self.request_to_mamba_state_idx[request_indices]

        # Filter out any invalid indices (e.g., -1)
        mamba_indices_to_free = mamba_indices_to_free[mamba_indices_to_free != -1]
        num_to_free = len(mamba_indices_to_free)

        if num_to_free > 0:
            # Add the freed indices back to the free slot pool
            start_idx = self.mamba_state_free_slot_count
            end_idx = start_idx + num_to_free
            self.mamba_state_free_slots[start_idx:end_idx] = mamba_indices_to_free
            self.mamba_state_free_slot_count = end_idx

        # Invalidate the Mamba state index for the finished requests
        self.request_to_mamba_state_idx[request_indices] = -1


================================================
FILE: megatron/core/inference/contexts/attention_context/metadata_base.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.


class MetadataBase:
    """
    Base class for attention metadata.
    High-performance attention kernels often require input metadata in specific
    formats—such as cumulative query lengths, cumulative key/value lengths,
    and similar structures. Moreover, when using CUDA Graphs, these metadata
    buffers must be statically allocated. This class serves as a unified container
    that manages all such metadata in one place.
    """

    def __init__(self):
        """
        Initialize the metadata.
        """
        self.state_data = {}

    def update(self, *args, **kwargs):
        """
        Construct the metadata from request states.
        """
        pass

    def reset(self):
        """
        Reset the metadata.
        """
        pass

    def tensor_copy_and_pad(
        self,
        tensor_buf,
        unpadded_tensor,
        real_batch_size,
        padded_batch_size,
        is_cumulative_tensor=False,
        pad_value=0,
    ):
        """
        Copy the unpadded tensor to the tensor_buf,
        pad the tensor_buf with zero or the last value of the tensor,
        depending on whether the tensor is cumulative.
        Args:
            tensor_buf: The destination tensor, at least padded_batch_size long.
            unpadded_tensor: The tensor to copy, at least real_batch_size long.
            real_batch_size: The real batch size.
            padded_batch_size: Padded boundary of the tensor.
            is_cumulative_tensor: Whether the tensor is cumulative.
                If True, we pad the tensor_buf with the last value of the unpadded_tensor.
            pad_value: The value to pad the tensor_buf with when the tensor is not cumulative.
        """
        assert real_batch_size <= padded_batch_size
        assert tensor_buf.shape[0] >= padded_batch_size
        assert unpadded_tensor.shape[0] >= real_batch_size
        if is_cumulative_tensor:
            if real_batch_size == 0:
                value = pad_value
            else:
                value = unpadded_tensor[real_batch_size - 1]
        else:
            value = pad_value
        tensor_buf[0:real_batch_size] = unpadded_tensor[:real_batch_size]
        tensor_buf[real_batch_size:padded_batch_size] = value
        return tensor_buf

    def __str__(self):
        """
        Return a string representation of the metadata.
        """
        return "\n".join([f"{key}: {value}" for key, value in self.state_data.items()])


================================================
FILE: megatron/core/inference/contexts/attention_context/mha_metadata.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
import torch

from megatron.core.inference.batch_dimensions_utils import InferenceBatchDimensions

from .metadata_base import MetadataBase


class MHAMetadata(MetadataBase):
    """
    Metadata for MHA layer using flash-attention.
    """

    def __init__(
        self, block_count_total, max_kv_block_count, max_requests, block_size_tokens, max_seqlen
    ):
        super().__init__()
        device = torch.cuda.current_device()
        self.device = device
        self.max_blocks = block_count_total
        self.max_kv_blocks = max_kv_block_count
        self.max_bs = max_requests
        self.max_seqlen = max_seqlen
        self._query_lengths_buf = torch.zeros(self.max_bs, dtype=torch.int32, device=device)
        self._cu_query_seq_lengths_buf = torch.zeros(
            self.max_bs + 1, dtype=torch.int32, device=device
        )
        self._cu_kv_seq_lengths_buf = torch.zeros(self.max_bs + 1, dtype=torch.int32, device=device)
        self._kv_seq_lengths_buf = torch.zeros(self.max_bs, dtype=torch.int32, device=device)
        self._block_table_buf = torch.zeros(
            (self.max_bs, self.max_kv_blocks), dtype=torch.int32, device=device
        )
        self._max_seqlen_q = 0
        self._max_seqlen_k = 0
        self.state_data = {}

    def update(
        self,
        request_query_lengths: torch.Tensor,
        request_kv_length_offsets: torch.Tensor,
        request_to_kv_block_ids: torch.Tensor,
        batch_dimensions: InferenceBatchDimensions,
        padded_batch_dimensions: InferenceBatchDimensions,
        num_speculative_tokens: int = 0,
    ):
        """
        Args:
            request_query_lengths: (>real_batch_size,)
            request_kv_length_offsets: (>real_batch_size,)
            request_to_kv_block_ids: (>real_batch_size, max_kv_blocks)
            batch_dimensions: Configuration object containing real batch settings
            padded_batch_dimensions: Configuration object containing padded batch settings
            num_speculative_tokens: Number of speculative tokens
        """
        # Extract values from configs
        real_batch_size = batch_dimensions.req_count
        padded_active_token_count = padded_batch_dimensions.token_count
        padded_active_request_count = padded_batch_dimensions.req_count

        assert real_batch_size <= padded_active_request_count <= self.max_bs
        assert request_query_lengths.shape[0] == real_batch_size
        assert request_kv_length_offsets.shape[0] == real_batch_size
        assert request_to_kv_block_ids.shape[0] == real_batch_size

        self.tensor_copy_and_pad(
            self._query_lengths_buf,
            request_query_lengths,
            real_batch_size,
            padded_active_request_count,
        )
        self._cu_query_seq_lengths_buf[0] = 0
        self.tensor_copy_and_pad(
            self._cu_query_seq_lengths_buf[1:],
            torch.cumsum(request_query_lengths, dim=0),
            real_batch_size,
            padded_active_request_count,
            is_cumulative_tensor=True,
        )
        self.tensor_copy_and_pad(
            self._kv_seq_lengths_buf,
            request_kv_length_offsets + request_query_lengths,
            real_batch_size,
            padded_active_request_count,
        )
        self.tensor_copy_and_pad(
            self._block_table_buf,
            request_to_kv_block_ids,
            real_batch_size,
            padded_active_request_count,
            pad_value=torch.tensor(self.max_kv_blocks, dtype=torch.int32, device=self.device).fill_(
                -1
            ),
        )
        self._cu_kv_seq_lengths_buf[0] = 0
        self.tensor_copy_and_pad(
            self._cu_kv_seq_lengths_buf[1:],
            torch.cumsum(self._kv_seq_lengths_buf, dim=0),
            real_batch_size,
            padded_active_request_count,
            is_cumulative_tensor=True,
        )

        if padded_batch_dimensions.prefill_req_count == 0:
            self._max_seqlen_q = num_speculative_tokens + 1
        else:
            # Make sure we will launch the prefill kernel for prefill graphs
            self._max_seqlen_q = max(2, padded_batch_dimensions.token_count)

        self._max_seqlen_k = self.max_seqlen

        self.state_data = {
            "query_lengths": self._query_lengths_buf[:padded_active_request_count],
            "cu_query_seq_lengths": self._cu_query_seq_lengths_buf[
                : padded_active_request_count + 1
            ],
            "cu_kv_seq_lengths": self._cu_kv_seq_lengths_buf[: padded_active_request_count + 1],
            "kv_seq_lengths": self._kv_seq_lengths_buf[:padded_active_request_count],
            "block_table": self._block_table_buf[0:padded_active_request_count, :],
            "max_seqlen_q": self._max_seqlen_q,
            "max_seqlen_k": self._max_seqlen_k,
        }

    def reset(self):
        """
        Reset the metadata for the next batch.
        """
        self._query_lengths_buf.fill_(0)
        self._cu_query_seq_lengths_buf.fill_(0)
        self._cu_kv_seq_lengths_buf.fill_(0)
        self._kv_seq_lengths_buf.fill_(0)
        self._block_table_buf.fill_(0)
        self._max_seqlen_q = 0
        self._max_seqlen_k = 0


class GraphedMHAMetadata(MHAMetadata):
    """
    Metadata for MHA layer using flash-attention with CUDA graphs.
    """

    def __init__(
        self, block_count_total, max_kv_block_count, max_requests, block_size_tokens, max_seqlen
    ):
        super().__init__(
            block_count_total, max_kv_block_count, max_requests, block_size_tokens, max_seqlen
        )

    def update(
        self,
        request_query_lengths: torch.Tensor,
        request_kv_length_offsets: torch.Tensor,
        request_to_kv_block_ids: torch.Tensor,
        batch_dimensions: InferenceBatchDimensions,
        padded_batch_dimensions: InferenceBatchDimensions,
        num_speculative_tokens: int = 0,
    ):
        """
        Args:
            request_query_lengths: (>real_batch_size,)
            request_kv_length_offsets: (>real_batch_size,)
            request_to_kv_block_ids: (>real_batch_size, max_kv_blocks)
            batch_dimensions: Configuration object containing real batch settings
            padded_batch_dimensions: Configuration object containing padded batch settings
            num_speculative_tokens: Number of speculative tokens
        """
        super().update(
            request_query_lengths,
            request_kv_length_offsets,
            request_to_kv_block_ids,
            batch_dimensions,
            padded_batch_dimensions,
            num_speculative_tokens,
        )

    def reset(self):
        super().reset()


class NonGraphedMHAMetadata(MHAMetadata):
    """
    Metadata for MHA layer using flash-attention without CUDA graphs.
    """

    def update(
        self,
        request_query_lengths: torch.Tensor,
        request_kv_length_offsets: torch.Tensor,
        request_to_kv_block_ids: torch.Tensor,
        batch_dimensions: InferenceBatchDimensions,
        padded_batch_dimensions: InferenceBatchDimensions,
        num_speculative_tokens: int = 0,
    ):
        """
        Args:
            request_query_lengths: (>real_batch_size,)
            request_kv_length_offsets: (>real_batch_size,)
            request_to_kv_block_ids: (>real_batch_size, max_kv_blocks)
            batch_dimensions: Configuration object containing real batch settings
            padded_batch_dimensions: Configuration object containing padded batch settings
            num_speculative_tokens: Number of speculative tokens
        """
        super().update(
            request_query_lengths,
            request_kv_length_offsets,
            request_to_kv_block_ids,
            batch_dimensions,
            padded_batch_dimensions,
            num_speculative_tokens,
        )
        if len(self.state_data["query_lengths"]) > 0:
            self.state_data["max_seqlen_q"] = torch.max(self.state_data["query_lengths"]).item()
            self.state_data["max_seqlen_k"] = torch.max(self.state_data["kv_seq_lengths"]).item()
        else:
            self.state_data["max_seqlen_q"] = num_speculative_tokens + 1
            self.state_data["max_seqlen_k"] = 1


================================================
FILE: megatron/core/inference/contexts/attention_context/triton/tensor_ops.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from typing import Optional

import torch

try:
    import triton
    import triton.language as tl

    HAVE_TRITON = True
except ImportError:
    from unittest.mock import MagicMock

    from megatron.core.utils import null_decorator

    triton = MagicMock()
    triton.jit = null_decorator
    tl = MagicMock()
    HAVE_TRITON = False


@triton.jit
def _tensor_get_slice_after_kernel(
    INPUT_TENSOR,
    OUTPUT_TENSOR,
    POS_ON_DEVICE,
    INPUT_BATCH_SIZE: tl.constexpr,
    OUTPUT_BATCH_SIZE: tl.constexpr,
    ROW_SIZE: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
):
    """Kernel to copy rows from INPUT_TENSOR[pos_on_device:] into OUTPUT_TENSOR."""

    pid = tl.program_id(0)
    pos_on_device = tl.load(POS_ON_DEVICE)
    copy_size = INPUT_BATCH_SIZE - pos_on_device

    if pid < copy_size and pid < OUTPUT_BATCH_SIZE:
        input_idx = pos_on_device + pid

        if input_idx < INPUT_BATCH_SIZE:
            row_offsets = tl.arange(0, BLOCK_SIZE)
            row_mask = row_offsets < ROW_SIZE

            input_ptr = INPUT_TENSOR + input_idx * ROW_SIZE + row_offsets
            output_ptr = OUTPUT_TENSOR + pid * ROW_SIZE + row_offsets

            input_data = tl.load(input_ptr, mask=row_mask, other=0.0)
            tl.store(output_ptr, input_data, mask=row_mask)


@triton.jit
def _tensor_merge_kernel(
    TENSOR_A,
    TENSOR_B,
    OUTPUT_TENSOR,
    POS_ON_DEVICE,
    TENSOR_B_BATCH_SIZE: tl.constexpr,
    ROW_SIZE: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
    OUTPUT_BATCH_SIZE: tl.constexpr,
    IS_INPLACE: tl.constexpr,
):
    """
    Kernel to merge rows from tensor_a and tensor_b into output_tensor.

    - output[:pos_on_device] = tensor_a[:pos_on_device]
    - output[pos_on_device:pos_on_device + tensor_b_batch] = tensor_b[:tensor_b_batch]
    """

    pid = tl.program_id(0)
    pos_on_device = tl.load(POS_ON_DEVICE)

    if pid < pos_on_device:
        if not IS_INPLACE:
            row_offsets = tl.arange(0, BLOCK_SIZE)
            row_mask = row_offsets < ROW_SIZE

            tensor_a_ptr = TENSOR_A + pid * ROW_SIZE + row_offsets
            output_ptr = OUTPUT_TENSOR + pid * ROW_SIZE + row_offsets

            tensor_a_data = tl.load(tensor_a_ptr, mask=row_mask, other=0.0)
            tl.store(output_ptr, tensor_a_data, mask=row_mask)

    elif pid < pos_on_device + TENSOR_B_BATCH_SIZE and pid < OUTPUT_BATCH_SIZE:
        tensor_b_idx = pid - pos_on_device

        if tensor_b_idx < TENSOR_B_BATCH_SIZE:
            row_offsets = tl.arange(0, BLOCK_SIZE)
            row_mask = row_offsets < ROW_SIZE

            tensor_b_ptr = TENSOR_B + tensor_b_idx * ROW_SIZE + row_offsets
            output_ptr = OUTPUT_TENSOR + pid * ROW_SIZE + row_offsets

            tensor_b_data = tl.load(tensor_b_ptr, mask=row_mask, other=0.0)
            tl.store(output_ptr, tensor_b_data, mask=row_mask)


@triton.jit
def _tensor_masked_update_kernel_2d(
    STATES_PTR,
    IDX_PTR,
    NEW_STATES_PTR,
    stride_state_b,
    stride_state_d0,
    stride_new_b,
    stride_new_d0,
    ROW_SIZE,
    BLOCK_SIZE: tl.constexpr,
):
    """Kernel to update values in a 2D states tensor using a mask."""
    pid_batch = tl.program_id(0).to(tl.int64)
    pid_row_chunk = tl.program_id(1).to(tl.int64)

    target_idx = tl.load(IDX_PTR + pid_batch)
    if target_idx == -1:
        return

    row_start_offset = pid_row_chunk * BLOCK_SIZE
    row_offsets = row_start_offset + tl.arange(0, BLOCK_SIZE)
    mask = row_offsets < ROW_SIZE

    # 2D Calculation: base + batch * stride0 + col * stride1
    dst_ptr = (
        STATES_PTR
        + (target_idx.to(tl.int64) * stride_state_b)
        + (row_offsets.to(tl.int64) * stride_state_d0)
    )
    src_ptr = (
        NEW_STATES_PTR
        + (pid_batch * stride_new_b.to(tl.int64))
        + (row_offsets.to(tl.int64) * stride_new_d0)
    )

    val = tl.load(src_ptr, mask=mask)
    tl.store(dst_ptr, val, mask=mask)


@triton.jit
def _tensor_masked_update_kernel_3d(
    STATES_PTR,
    IDX_PTR,
    NEW_STATES_PTR,
    stride_state_b,
    stride_state_d0,
    stride_state_d1,
    stride_new_b,
    stride_new_d0,
    stride_new_d1,
    SIZE_D0,
    SIZE_D1,  # Dimensions of the non-batch axes
    ROW_SIZE,  # Total elements per batch item (D0 * D1)
    BLOCK_SIZE: tl.constexpr,
):
    """Kernel to update values in a 3D states tensor using a mask."""
    pid_batch = tl.program_id(0).to(tl.int64)
    pid_row_chunk = tl.program_id(1).to(tl.int64)

    target_idx = tl.load(IDX_PTR + pid_batch)
    if target_idx == -1:
        return

    # Linear index within the "row" (flattened 3D volume)
    row_start_offset = pid_row_chunk * BLOCK_SIZE
    flat_offsets = row_start_offset + tl.arange(0, BLOCK_SIZE)
    mask = flat_offsets < ROW_SIZE

    # Reconstruct 3D coordinates from linear index
    # Given shape (batch, D0, D1)
    # idx_d1 = flat_idx % D1
    # idx_d0 = flat_idx // D1
    idx_d1 = flat_offsets % SIZE_D1.to(tl.int64)
    idx_d0 = flat_offsets // SIZE_D1.to(tl.int64)

    # Calculate pointers using specific strides
    dst_offset = (
        (target_idx.to(tl.int64) * stride_state_b.to(tl.int64))
        + (idx_d0 * stride_state_d0)
        + (idx_d1 * stride_state_d1)
    )

    src_offset = (
        (pid_batch * stride_new_b.to(tl.int64))
        + (idx_d0 * stride_new_d0)
        + (idx_d1 * stride_new_d1)
    )

    dst_ptr = STATES_PTR + dst_offset
    src_ptr = NEW_STATES_PTR + src_offset

    val = tl.load(src_ptr, mask=mask)
    tl.store(dst_ptr, val, mask=mask)


@triton.jit
def _tensor_masked_update_kernel_4d(
    STATES_PTR,
    IDX_PTR,
    NEW_STATES_PTR,
    stride_state_b,
    stride_state_d0,
    stride_state_d1,
    stride_state_d2,
    stride_new_b,
    stride_new_d0,
    stride_new_d1,
    stride_new_d2,
    SIZE_D0,
    SIZE_D1,
    SIZE_D2,  # Dimensions (C, H, W)
    ROW_SIZE,  # Total elements (C * H * W)
    BLOCK_SIZE: tl.constexpr,
):
    """Kernel to update values in a 4D states tensor using a mask."""
    pid_batch = tl.program_id(0).to(tl.int64)
    pid_row_chunk = tl.program_id(1).to(tl.int64)

    target_idx = tl.load(IDX_PTR + pid_batch)
    if target_idx == -1:
        return

    # Linear index
    row_start_offset = pid_row_chunk * BLOCK_SIZE
    flat_offsets = row_start_offset + tl.arange(0, BLOCK_SIZE)
    mask = flat_offsets < ROW_SIZE

    # Reconstruct 4D coordinates from linear index
    # Given shape (batch, D0, D1, D2)
    # idx_d2 = flat % D2
    # temp   = flat // D2
    # idx_d1 = temp % D1
    # idx_d0 = temp // D1

    idx_d2 = flat_offsets % SIZE_D2.to(tl.int64)
    temp = flat_offsets // SIZE_D2.to(tl.int64)
    idx_d1 = temp % SIZE_D1.to(tl.int64)
    idx_d0 = temp // SIZE_D1.to(tl.int64)

    # Calculate pointers using specific strides
    dst_offset = (
        (target_idx.to(tl.int64) * stride_state_b.to(tl.int64))
        + (idx_d0 * stride_state_d0)
        + (idx_d1 * stride_state_d1)
        + (idx_d2 * stride_state_d2)
    )

    src_offset = (
        (pid_batch * stride_new_b.to(tl.int64))
        + (idx_d0 * stride_new_d0)
        + (idx_d1 * stride_new_d1)
        + (idx_d2 * stride_new_d2)
    )

    dst_ptr = STATES_PTR + dst_offset
    src_ptr = NEW_STATES_PTR + src_offset

    val = tl.load(src_ptr, mask=mask)
    tl.store(dst_ptr, val, mask=mask)


def _compute_row_size(tensor):
    if tensor.ndim == 1:
        return 1

    row_size = 1
    for dim in tensor.shape[1:]:
        row_size *= dim
    return row_size


def tensor_get_slice_after(input_tensor, output_tensor, pos_on_device, check_bounds: bool = False):
    """
    Copy from input_tensor[pos_on_device:] to output_tensor[:copy_size].
    """

    assert (
        input_tensor.device == output_tensor.device
    ), "Input and output tensors must be on the same device"
    assert (
        input_tensor.dtype == output_tensor.dtype
    ), "Input and output tensors must have the same dtype"
    assert (
        input_tensor.is_contiguous() and output_tensor.is_contiguous()
    ), "Input and output tensors must be contiguous"

    if check_bounds:
        assert (
            input_tensor.ndim == output_tensor.ndim
        ), "Input and output tensors must have the same number of dimensions"

        for i in range(1, input_tensor.ndim):
            assert (
                input_tensor.shape[i] == output_tensor.shape[i]
            ), f"Dimension {i} must match between input and output tensors"

        pos_on_device_val = pos_on_device[0].item()
        assert (
            0 <= pos_on_device_val <= input_tensor.shape[0]
        ), "pos_on_device must be between 0 and input_tensor.shape[0]"

        copy_size = input_tensor.shape[0] - pos_on_device_val
        assert (
            copy_size <= output_tensor.shape[0]
        ), f"Copy size ({copy_size}) exceeds output_tensor batch size ({output_tensor.shape[0]})"

    input_batch_size = input_tensor.shape[0]
    output_batch_size = output_tensor.shape[0]

    row_size = _compute_row_size(input_tensor)
    block_size = triton.next_power_of_2(row_size)

    grid = (input_batch_size,) if input_batch_size > 0 else (1,)

    if input_batch_size > 0:
        _tensor_get_slice_after_kernel[grid](
            input_tensor,
            output_tensor,
            POS_ON_DEVICE=pos_on_device,
            INPUT_BATCH_SIZE=input_batch_size,
            OUTPUT_BATCH_SIZE=output_batch_size,
            ROW_SIZE=row_size,
            BLOCK_SIZE=block_size,
        )


def tensor_merge(
    tensor_a: torch.Tensor,
    tensor_b: torch.Tensor,
    pos_on_device: torch.Tensor,
    output_tensor: Optional[torch.Tensor] = None,
    check_bounds: bool = False,
):
    """
    Merge tensor_a and tensor_b.

    If output_tensor is None, the operation is performed in-place on tensor_a.
    """

    is_inplace = False
    if output_tensor is None:
        output_tensor = tensor_a
        is_inplace = True

    assert (
        tensor_a.device == tensor_b.device == output_tensor.device
    ), "All tensors must be on the same device"
    assert (
        tensor_a.dtype == tensor_b.dtype == output_tensor.dtype
    ), "All tensors must have the same dtype"
    assert (
        tensor_a.is_contiguous() and tensor_b.is_contiguous() and output_tensor.is_contiguous()
    ), "All tensors must be contiguous"

    if check_bounds:
        assert (
            tensor_a.ndim == tensor_b.ndim == output_tensor.ndim
        ), "All tensors must have the same number of dimensions"

        for i in range(1, tensor_a.ndim):
            assert (
                tensor_a.shape[i] == tensor_b.shape[i] == output_tensor.shape[i]
            ), f"Dimension {i} must match across all tensors"

        assert (
            output_tensor.shape[0] >= tensor_a.shape[0]
        ), "output_tensor batch size must be >= tensor_a batch size"

        pos_on_device_val = pos_on_device[0].item()
        assert (
            0 <= pos_on_device_val <= tensor_a.shape[0]
        ), "pos_on_device must be between 0 and tensor_a batch size"

    tensor_b_batch_size = tensor_b.shape[0]
    output_batch_size = output_tensor.shape[0]

    row_size = _compute_row_size(tensor_a)
    block_size = triton.next_power_of_2(row_size)

    grid = (output_batch_size,)

    _tensor_merge_kernel[grid](
        tensor_a,
        tensor_b,
        output_tensor,
        POS_ON_DEVICE=pos_on_device,
        TENSOR_B_BATCH_SIZE=tensor_b_batch_size,
        ROW_SIZE=row_size,
        BLOCK_SIZE=block_size,
        OUTPUT_BATCH_SIZE=output_batch_size,
        IS_INPLACE=is_inplace,
    )


def tensor_masked_update(states: torch.Tensor, idx: torch.Tensor, new_states: torch.Tensor):
    """
    Update `states` to `new_states` at `idx`, but ignore any -1 values in `idx`.
    Works for 2D, 3D, or 4D tensors.

    Args:
        states: (N, ...) - Destination tensor (2D, 3D, or 4D)
        idx: (B,) - Indices to update. -1 means skip.
        new_states: (B, ...) - Source tensor. Must match states shape[1:]
    """
    assert states.is_cuda and idx.is_cuda and new_states.is_cuda
    assert idx.ndim == 1
    assert states.shape[1:] == new_states.shape[1:], "State dimensions must match"

    ndim = states.ndim
    assert ndim in [2, 3, 4], "Only 2D, 3D, and 4D tensors are supported"

    n_updates = idx.shape[0]

    row_size = 1
    for dim in states.shape[1:]:
        row_size *= dim

    BLOCK_SIZE = 1024
    grid = lambda meta: (n_updates, triton.cdiv(row_size, meta["BLOCK_SIZE"]))

    if ndim == 2:
        _tensor_masked_update_kernel_2d[grid](
            STATES_PTR=states,
            IDX_PTR=idx,
            NEW_STATES_PTR=new_states,
            stride_state_b=states.stride(0),
            stride_state_d0=states.stride(1),
            stride_new_b=new_states.stride(0),
            stride_new_d0=new_states.stride(1),
            ROW_SIZE=row_size,
            BLOCK_SIZE=BLOCK_SIZE,
        )

    elif ndim == 3:
        # Shapes: (N, D0, D1)
        _tensor_masked_update_kernel_3d[grid](
            STATES_PTR=states,
            IDX_PTR=idx,
            NEW_STATES_PTR=new_states,
            # Strides
            stride_state_b=states.stride(0),
            stride_state_d0=states.stride(1),
            stride_state_d1=states.stride(2),
            stride_new_b=new_states.stride(0),
            stride_new_d0=new_states.stride(1),
            stride_new_d1=new_states.stride(2),
            # Dims
            SIZE_D0=states.shape[1],
            SIZE_D1=states.shape[2],
            ROW_SIZE=row_size,
            BLOCK_SIZE=BLOCK_SIZE,
        )

    elif ndim == 4:
        # Shapes: (N, D0, D1, D2)
        _tensor_masked_update_kernel_4d[grid](
            STATES_PTR=states,
            IDX_PTR=idx,
            NEW_STATES_PTR=new_states,
            # Strides
            stride_state_b=states.stride(0),
            stride_state_d0=states.stride(1),
            stride_state_d1=states.stride(2),
            stride_state_d2=states.stride(3),
            stride_new_b=new_states.stride(0),
            stride_new_d0=new_states.stride(1),
            stride_new_d1=new_states.stride(2),
            stride_new_d2=new_states.stride(3),
            # Dims
            SIZE_D0=states.shape[1],
            SIZE_D1=states.shape[2],
            SIZE_D2=states.shape[3],
            ROW_SIZE=row_size,
            BLOCK_SIZE=BLOCK_SIZE,
        )


================================================
FILE: megatron/core/inference/contexts/base_context.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import abc

from megatron.core.inference.config import InferenceConfig


class BaseInferenceContext(abc.ABC):
    """Base class for inference contexts.

    Currently extended by `StaticInferenceContext` and `DynamicInferenceContext`.
    Extend this class for any future contexts types.
    """

    def __init__(self, inference_config: InferenceConfig):
        """
        Args:
        """
        self.config = inference_config

    @abc.abstractmethod
    def is_static_batching(self) -> bool:
        """Return `True` if context uses static batching."""
        pass

    def is_dynamic_batching(self) -> bool:
        """Return `True` if context uses dynamic batching."""
        return not self.is_static_batching()

    def increment_sequence_len_offset(self, increment: int) -> None:
        """Update sequence length offset. No-op for dynamic batching."""
        if self.is_static_batching():
            self.sequence_len_offset += increment

    def increment_batch_size_offset(self, increment: int) -> None:
        """Update batch size offset. No-op for dynamic batching."""
        if self.is_static_batching():
            self.batch_size_offset += increment

    def reset_batch_size_offset(self) -> None:
        """Reset batch size offset to 0. No-op for dynamic batching."""
        if self.is_static_batching():
            self.batch_size_offset = 0


================================================
FILE: megatron/core/inference/contexts/dynamic_context.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import logging
import math
import warnings
from contextlib import nullcontext
from typing import List, Optional, Sequence, Tuple

import torch  # type: ignore
import torch.nn.functional as F  # type: ignore
from torch import Tensor  # type: ignore

from megatron.core import parallel_state
from megatron.core.inference.batch_dimensions_utils import (
    CUDAGraphBatchDimensionBuilder,
    InferenceBatchDimensions,
)
from megatron.core.inference.config import (
    InferenceConfig,
    KVCacheManagementMode,
    PrefixCachingEvictionPolicy,
)
from megatron.core.inference.inference_request import DynamicInferenceRequest
from megatron.core.inference.sampling_params import SamplingParams
from megatron.core.inference.unified_memory import (
    UnifiedMemoryUnsupportedError,
    create_unified_mempool,
)
from megatron.core.inference.utils import device_memory_summary, tensor_swap
from megatron.core.models.common.embeddings.rope_utils import apply_rotary_pos_emb
from megatron.core.package_info import __version__ as mcore_version
from megatron.core.ssm.mamba_hybrid_layer_allocation import get_layer_maps_from_layer_type_list
from megatron.core.transformer import MLATransformerConfig, TransformerConfig
from megatron.core.utils import deprecate_args
from megatron.core.utils import divide as core_divide
from megatron.core.utils import get_pg_size, internal_api

from .attention_context.mamba_metadata import MambaMetadata
from .attention_context.mha_metadata import GraphedMHAMetadata, NonGraphedMHAMetadata
from .base_context import BaseInferenceContext
from .kv_block_allocator import KVBlockAllocator
from .mamba_slot_allocator import MambaSlotAllocator
from .routing_metadata import RoutingMetadata

try:
    from .fused_kv_append_kernel import triton_append_key_value_cache
except ImportError:
    triton_append_key_value_cache = None

try:
    import flashinfer  # type: ignore # pylint: disable=unused-import

    HAVE_FLASHINFER = True
except ImportError:
    HAVE_FLASHINFER = False

try:
    from torch_memory_saver import torch_memory_saver

    torch_memory_saver.hook_mode = "torch"
    HAVE_TORCH_MEMORY_SAVER = True
except ImportError:
    HAVE_TORCH_MEMORY_SAVER = False

DEPRECATED_ARGS = [
    "params_dtype",
    "num_layers",
    "kv_channels",
    "num_attention_heads",
    "max_sequence_length",
    "buffer_size_gb",
    "paused_buffer_size_gb",
    "max_requests",
    "max_tokens",
    "block_size_tokens",
    "tensor_model_parallel_size",
    "pipeline_model_parallel_size",
    "pg_collection",
    "cache_mla_latent",
    "kv_lora_rank",
    "qk_pos_emb_head_dim",
    "num_cuda_graphs",
    "materialize_only_last_token_logits",
    "mamba_inference_state_config",
    "use_cuda_graphs_for_non_decode_steps",
    "use_flashinfer_fused_rope",
    "unified_memory_level",
    "cuda_graph_max_tokens",
    "cuda_graph_mixed_prefill_count",
    "metrics_writer",
    "request_metadata_types",
    "persist_cuda_graphs",
    "offload_kv_cache",
]


class ContextOverflowError(Exception):
    """Base exception for when a new request does not fit.

    Args:
        is_transient (bool): Flag marking whether error is transient (i.e., may
            work if we try again, but fails due to the current context state), or
            permanent (i.e., request will never fit in this context).
    """

    def __init__(
        self, request_id: Optional[int], message: Optional[str] = None, *, is_transient: bool = True
    ):
        request_str = '--' if request_id is None else str(request_id)
        _message = "" if message is None else f" | {message}"
        super().__init__(f"request {request_str}{_message}")
        self.request_id = request_id
        self.message = message
        self.is_transient = is_transient


class RequestOverflowError(ContextOverflowError):
    """Adding request would overflow max request count."""

    pass


class TokenOverflowError(ContextOverflowError):
    """Adding request would overflow max token count."""

    pass


class MaxSequenceLengthOverflowError(ContextOverflowError):
    """Adding request would overflow max sequence length."""

    def __init__(self, request_id, message: Optional[str] = None):
        super().__init__(request_id, message=message, is_transient=False)


class BlockOverflowError(ContextOverflowError):
    """Adding request would overflow available memory blocks."""

    pass


class ActiveRequestCountOverflowError(ContextOverflowError):
    '''Used when `initialize_attention_state()` is called with
    `num_warmup_requests > max_requests.'''

    def __init__(self, max_request_count, active_request_count):
        assert active_request_count > max_request_count
        super().__init__(
            None,
            "active_request_count (%d) > max_request_count (%d)."
            % (active_request_count, max_request_count),
        )


class TensorStateDeallocatedError(ContextOverflowError):
    """Context's tensor state is currently deallocated, such as when the engine
    has been suspended."""

    pass


class ContextErrorFactory:
    """Factory class for serializing/deserializing context errors."""

    @classmethod
    def serialize(cls, error: ContextOverflowError) -> dict:
        """Serialize error.

        Args:
            error (ContextOverflowError): Error.

        Returns:
            (dict) Serialized error data.
        """
        assert isinstance(error, ContextOverflowError)
        return {
            "type": type(error).__name__,
            "request_id": error.request_id,
            "message": error.message,
            "is_transient": error.is_transient,
        }

    @classmethod
    def deserialize(cls, obj: dict) -> ContextOverflowError:
        """Deserialize error.

        Args:
            obj (dict): Serialized error data.

        Returns:
            (ContextOverflowError) Deserialized error.
        """
        error_cls = {
            "ContextOverflowError": ContextOverflowError,
            "RequestOverflowError": RequestOverflowError,
            "TokenOverflowError": TokenOverflowError,
            "MaxSequenceLengthOverflowError": MaxSequenceLengthOverflowError,
            "BlockOverflowError": BlockOverflowError,
            "ActiveRequestCountOverflowError": ActiveRequestCountOverflowError,
        }[obj["type"]]
        error = ContextOverflowError(**{k: v for k, v in obj.items() if k != "type"})
        error.__class__ = error_cls  # todo (@lmcafee): better/safer alternative?
        return error


def get_mem_size_str(n_bytes: int) -> str:
    """Convert number of bytes to human-readable string."""
    for exp, suffix in ((4, "TB"), (3, "GB"), (2, "MB"), (3, "KB"), (0, "bytes")):
        nquery = int(1024**exp)
        if round(n_bytes / nquery) >= 1:
            return "%.3g %s" % (n_bytes / nquery, suffix)
    raise Exception(f"something went wrong, n_bytes={n_bytes}.")


@internal_api
# pylint: disable=line-too-long
class DynamicInferenceContext(BaseInferenceContext):
    """Inference context that is passed to the main model in order
    to efficiently calculate and store the KV cache during inference.

    The dynamic inference context manages both: 1) in-flight batching, and 2) a
    memory buffer for the block-level KV cache. For in-flight batching, requests of
    arbitrary sequence length may be added, paused, or removed from the context
    at any step. The only constraint is the maximum number of requests or tokens
    that the context is defined to support. For the block-level KV cache, a memory
    buffer is allocated up front (size `buffer_size_gb` if `unified_memory_level`
    == 0, or `buffer_size_gb + paused_buffer_size_gb` if `unified_memory_level` ==
    1), that is divided into blocks and dynamically assigned to requests. At any
    given step, any unassigned blocks equate to unused space.

    Args:
        model_config (TransformerConfig): Model config.
        inference_config (InferenceConfig): Inference config.
    """

    DEFAULT_MAX_TOKENS = 16384
    TOKEN_ROUNDER = 64
    REQUEST_ROUNDER = 4
    TMS_TAG = "inference_context"

    @deprecate_args(
        *DEPRECATED_ARGS,
        message=(
            "Argument `{name}` has been deprecated. "
            "Only pass `model_config` and `inference_config`"
        ),
    )
    def __init__(self, model_config: TransformerConfig, inference_config: InferenceConfig):
        super().__init__(inference_config=inference_config)

        # Prefix caching configuration
        self.enable_prefix_caching = inference_config.enable_prefix_caching
        self.prefix_caching_eviction_policy = inference_config.prefix_caching_eviction_policy
        self.prefix_caching_coordinator_policy = inference_config.prefix_caching_coordinator_policy
        self.use_triton_conv1d = inference_config.use_triton_conv1d
        self._use_triton_conv1d_this_step = inference_config.use_triton_conv1d

        # Engine step counter (used for logging, metrics, and event tracking)
        self.step_count = 0

        # Separate monotonic clock for prefix caching LRU eviction ordering.
        # Incremented each engine step but kept independent so the engine step
        # counter is not overloaded with cache-eviction semantics.
        self.prefix_cache_lru_clock = 0

        # Prefix caching hit tracking (accumulated, reset by engine after logging).
        self.prefix_cache_hits = 0  # requests that matched at least one cached block
        self.prefix_cache_blocks_matched = 0  # total matched blocks across all requests

        self.cache_mla_latent = (
            isinstance(model_config, MLATransformerConfig) and model_config.cache_mla_latents
        )
        if self.cache_mla_latent:
            assert (
                inference_config.block_size_tokens == 64
            ), "Flash MLA requires a block size of 64. Set --inference-dynamic-batching-block-size 64 to fix this assert"

        # Per partition num heads and hidden size.
        num_attention_heads = model_config.num_query_groups or model_config.num_attention_heads
        projection_size = model_config.kv_channels * num_attention_heads
        pg_collection = inference_config.pg_collection
        if pg_collection is not None:
            tp_size = get_pg_size(pg_collection.tp)
            pp_size = get_pg_size(pg_collection.pp)
        else:
            tp_size = model_config.tensor_model_parallel_size
            pp_size = model_config.pipeline_model_parallel_size
        self.hidden_size_per_attention_head = core_divide(projection_size, num_attention_heads)
        if num_attention_heads >= tp_size:
            self.num_attention_heads_per_partition = core_divide(num_attention_heads, tp_size)
        else:
            self.num_attention_heads_per_partition = 1

        self.num_speculative_tokens = inference_config.num_speculative_tokens
        assert self.num_speculative_tokens < inference_config.block_size_tokens, (
            f"num_speculative_tokens ({self.num_speculative_tokens}) must be < "
            f"block_size_tokens ({inference_config.block_size_tokens})"
        )

        # Cache the PP group we should use for PP collectives inside the context.
        # If the model provides a pg_collection with a pp group, prefer it.
        # Otherwise:
        # - for PP=1 we don't need a PP group at all
        # - for PP>1 we require Megatron parallel_state to be initialized
        if pg_collection is not None and get_pg_size(pg_collection.pp) > 1:
            self.pipeline_parallel_group = pg_collection.pp
        elif pp_size > 1:
            self.pipeline_parallel_group = parallel_state.get_pipeline_model_parallel_group()
        else:
            self.pipeline_parallel_group = None

        if pg_collection is not None:
            self.expert_model_parallel_group = pg_collection.ep
        elif parallel_state.get_expert_model_parallel_world_size() > 1:
            self.expert_model_parallel_group = parallel_state.get_expert_model_parallel_group()
        else:
            self.expert_model_parallel_group = None

        # Mamba states.
        mamba_inference_state_config = inference_config.mamba_inference_state_config
        self.is_hybrid_model = mamba_inference_state_config is not None
        if self.is_hybrid_model:
            self.mamba_conv_states_shape = mamba_inference_state_config.conv_states_shape
            self.mamba_ssm_states_shape = mamba_inference_state_config.ssm_states_shape
            self.mamba_conv_states_dtype = mamba_inference_state_config.conv_states_dtype
            self.mamba_ssm_states_dtype = mamba_inference_state_config.ssm_states_dtype
            self.mamba_chunk_size = mamba_inference_state_config.mamba_chunk_size

            # For hybrid models, the layer map converts the global layer index to the
            # corresponding attention layer index or Mamba layer index depending on the
            # layer type.
            attention_layer_map, mamba_layer_map, _, _ = get_layer_maps_from_layer_type_list(
                mamba_inference_state_config.layer_type_list
            )
            self.num_attention_layers = len(attention_layer_map)
            self.num_mamba_layers = len(mamba_layer_map)
            self.layer_map = attention_layer_map | mamba_layer_map
        else:
            # The layer map is the identity function for pure Transformer models.
            self.num_attention_layers = model_config.num_layers // pp_size
            self.num_mamba_layers = 0
            (self.mamba_conv_states_shape, self.mamba_ssm_states_shape) = (None, None)
            self.layer_map = {i: i for i in range(self.num_attention_layers)}

        if self.num_attention_layers == 0:
            raise NotImplementedError(
                f"Using `DynamicInferenceContext` with no attention is not supported."
            )

        # Block size tokens, bytes.
        kv_dtype_size_bytes = model_config.params_dtype.itemsize
        self.block_size_tokens = inference_config.block_size_tokens
        if self.cache_mla_latent:
            #   one vector  c_t  (rank)  +  optional RoPE phase slice
            self.kv_reduced_dim = model_config.kv_lora_rank + model_config.qk_pos_emb_head_dim
            self.block_size_bytes = (
                kv_dtype_size_bytes
                * self.num_attention_layers
                * self.block_size_tokens
                * self.kv_reduced_dim
            )
        else:
            self.block_size_bytes = (
                kv_dtype_size_bytes
                * 2  # key, value
                * self.num_attention_layers
                * self.block_size_tokens
                * self.num_attention_heads_per_partition
                * self.hidden_size_per_attention_head
            )
        assert self.block_size_bytes > 0

        mamba_states_memory_per_request = 0
        if self.is_hybrid_model:
            mamba_states_memory_per_request += (
                math.prod(self.mamba_conv_states_shape) * self.mamba_conv_states_dtype.itemsize
            )
            mamba_states_memory_per_request += (
                math.prod(self.mamba_ssm_states_shape) * self.mamba_ssm_states_dtype.itemsize
            )
            mamba_states_memory_per_request *= self.num_mamba_layers
            if self.num_speculative_tokens > 0:
                # Add memory for intermediate conv and SSM states
                intermediate_memory_per_request = (
                    math.prod(self.mamba_conv_states_shape) * self.mamba_conv_states_dtype.itemsize
                    + math.prod(self.mamba_ssm_states_shape) * self.mamba_ssm_states_dtype.itemsize
                )
                intermediate_memory_per_request *= self.num_mamba_layers
                intermediate_memory_per_request *= self.num_speculative_tokens + 1
                mamba_states_memory_per_request += intermediate_memory_per_request

        # Unified memory and general tensor management.
        self.unified_memory_level = inference_config.unified_memory_level
        self.static_kv_memory_pointers = inference_config.static_kv_memory_pointers
        self.kv_cache_management_mode = inference_config.kv_cache_management_mode

        if self.unified_memory_level != 0:
            try:
                self.unified_memory_mempool = create_unified_mempool()
            except UnifiedMemoryUnsupportedError:
                if torch.distributed.get_rank() == 0:
                    warnings.warn(
                        "Unified memory requested but not available; defaulting to GPU memory."
                    )
                self.unified_memory_level = 0
        # If we are in a mode that requires static KV memory pointers,
        # we must have either UVM or torch_memory_saver.
        if (
            self.static_kv_memory_pointers
            and self.kv_cache_management_mode != KVCacheManagementMode.PERSIST
        ):
            assert HAVE_TORCH_MEMORY_SAVER or self.unified_memory_level != 0, (
                "Static KV memory pointers require UVM or torch_memory_saver when not persisted. "
                "Use --rl-kv-cache-management-mode=persist, UVM, or install torch_memory_saver."
            )

        # When not using `torch_memory_saver`, we manually offload/restore tensors.
        # We use storage resize, similar to the logic in `core/distributed/param_and_grad_buffer.py`
        self._offloadable_tensor_names: set[str] = set()
        self._offloadable_cpu_backups: dict[str, torch.Tensor] = {}
        self._offloadable_storage_sizes: dict[str, int] = {}
        self._uses_torch_memory_saver: bool = False

        # Initialize block allocator.
        buffer_size_bytes = int(inference_config.buffer_size_gb * 1024**3)
        paused_buffer_size_bytes = (
            0
            if inference_config.paused_buffer_size_gb is None
            else int(inference_config.paused_buffer_size_gb * 1024**3)
        )

        mamba_max_requests = float('inf')

        if (mamba_memory_ratio := inference_config.mamba_memory_ratio) is not None:
            assert self.is_hybrid_model
            assert mamba_memory_ratio > 0 and mamba_memory_ratio < 1

            # Calculate total memory before partition
            total_memory = buffer_size_bytes + paused_buffer_size_bytes
            mamba_memory_bytes = total_memory * mamba_memory_ratio
            mamba_max_requests = int(mamba_memory_bytes // mamba_states_memory_per_request)

            # Reduce buffer sizes for KV cache
            buffer_size_bytes = int(buffer_size_bytes * (1.0 - mamba_memory_ratio))
            paused_buffer_size_bytes = int(paused_buffer_size_bytes * (1.0 - mamba_memory_ratio))

            block_count = buffer_size_bytes // self.block_size_bytes
            block_count = max(2, block_count)  # need >= 1 active block + 1 dummy block
            paused_block_count = paused_buffer_size_bytes // self.block_size_bytes
        else:
            block_count = buffer_size_bytes // (
                self.block_size_bytes + mamba_states_memory_per_request
            )
            block_count = max(2, block_count)  # need >= 1 active block + 1 dummy block
            paused_block_count = paused_buffer_size_bytes // (
                self.block_size_bytes + mamba_states_memory_per_request
            )

        # If using pipeline parallelism synchronize the total block count in case the
        # pipeline stages have different layer allocations. Non-uniform block counts
        # can lead to some ranks pausing requests earlier than other ranks
        # (i.e., divergence in the scheduling behavior).
        if pp_size > 1:
            block_count_tensor = torch.tensor(
                (block_count, paused_block_count),
                dtype=torch.int32,
                device=torch.cuda.current_device(),
            )
            torch.distributed.all_reduce(
                block_count_tensor,
                op=torch.distributed.ReduceOp.MIN,
                group=self.pipeline_parallel_group,
            )
            block_count = block_count_tensor[0].item()
            paused_block_count = block_count_tensor[1].item()

        self.kv_block_allocator = KVBlockAllocator(
            context=self,
            total_count=(
                block_count if self.unified_memory_level == 0 else block_count + paused_block_count
            ),
            paused_count=paused_block_count,
            enable_prefix_caching=self.enable_prefix_caching,
            prefix_caching_eviction_policy=self.prefix_caching_eviction_policy,
        )

        # Track request metadata.
        request_metadata_types = inference_config.request_metadata_types
        if request_metadata_types is None:
            request_metadata_types = DynamicInferenceRequest.get_metadata_types()
        self.request_metadata_types = request_metadata_types

        # Initialize context state.
        self.params_dtype = model_config.params_dtype
        self.max_sequence_length = inference_config.max_sequence_length

        # Block ids.
        self.max_kv_block_count = math.ceil(self.max_sequence_length / self.block_size_tokens)

        # Set max_requests, max_tokens.
        if inference_config.max_requests is None:
            # Maximize compute utilization by defaulting to 1 block per request.
            self.max_requests = self.kv_block_allocator.total_count - 1  # -1 for dummy block

            # Adjust max_requests for Mamba memory constraints if necessary
            if self.is_hybrid_model and mamba_max_requests < self.max_requests:
                self.max_requests = int(mamba_max_requests)

            self.max_requests = self.max_requests // tp_size * tp_size
            self.max_requests = self.max_requests // self.REQUEST_ROUNDER * self.REQUEST_ROUNDER
        else:
            # User can control request overflow via max_requests.
            self.max_requests = inference_config.max_requests

        assert (
            self.max_requests % tp_size == 0
        ), f"max_requests must be divisible by tp_size ({tp_size}), but got {self.max_requests}"

        self.max_tokens = inference_config.max_tokens or self.DEFAULT_MAX_TOKENS

        assert self.max_tokens >= self.max_requests, (
            f"max_tokens ({self.max_tokens}) must be >= "
            f"max_requests ({self.max_requests}), "
            "to have consistency between cuda graph sizes and the block table size."
        )

        # Attention metadata initialization (tensors are now handled by MHAMetadata classes)

        self.graph_attn_metadata = {}
        self.non_graph_attn_metadata = {}

        self.graph_attn_metadata["mha_metadata"] = GraphedMHAMetadata(
            block_count_total=self.kv_block_allocator.total_count,
            max_kv_block_count=self.max_kv_block_count,
            max_requests=self.max_requests,
            block_size_tokens=self.block_size_tokens,
            max_seqlen=self.max_sequence_length,
        )

        self.non_graph_attn_metadata["mha_metadata"] = NonGraphedMHAMetadata(
            block_count_total=self.kv_block_allocator.total_count,
            max_kv_block_count=self.max_kv_block_count,
            max_requests=self.max_requests,
            block_size_tokens=self.block_size_tokens,
            max_seqlen=self.max_sequence_length,
        )

        self.moe_enable_routing_replay = model_config.moe_enable_routing_replay
        if self.moe_enable_routing_replay:
            assert (
                model_config.num_moe_experts is not None
            ), "Router recording/replay requested but no MoE experts specified!"
            self.moe_routing_metadata = RoutingMetadata(self, model_config.moe_router_topk)

        # CUDA graph config list
        self.use_cuda_graphs_for_non_decode_steps = (
            inference_config.use_cuda_graphs_for_non_decode_steps
        )
        self.cuda_graph_batch_dimensions_list, self.cuda_graph_token_counts = (
            CUDAGraphBatchDimensionBuilder.generate_cuda_graph_batch_dimensions_list(
                tp_size=tp_size,
                num_cuda_graphs=inference_config.num_cuda_graphs,
                cuda_graph_max_tokens=self.max_requests * (self.num_speculative_tokens + 1),
                cuda_graph_mixed_prefill_request_count=inference_config.cuda_graph_mixed_prefill_count,
                max_requests=self.max_requests,
                max_tokens=self.max_tokens,
                max_sequence_length=self.max_sequence_length,
                use_cuda_graphs_for_non_decode_steps=self.use_cuda_graphs_for_non_decode_steps,
                num_speculative_tokens=self.num_speculative_tokens,
            )
        )

        self.smallest_non_decode_cuda_graph_size = min(
            inference_config.cuda_graph_mixed_prefill_count, self.max_requests
        )

        # Deal with chunked prefill
        self.enable_chunked_prefill = inference_config.enable_chunked_prefill

        # FlashInfer.
        if inference_config.use_flashinfer_fused_rope is True:
            assert HAVE_FLASHINFER, "flashinfer is not installed"
        elif inference_config.use_flashinfer_fused_rope is None:
            inference_config.use_flashinfer_fused_rope = HAVE_FLASHINFER
        self.use_flashinfer_fused_rope = inference_config.use_flashinfer_fused_rope

        # Allocate GPU state.
        self.is_tensor_state_allocated = False
        self.initialize_all_tensors()

        # Print info.
        logging.info(
            "DynamicInferenceContext: allocated context with active buffer size %s (%d blocks)."
            % (
                get_mem_size_str(self.kv_block_allocator.active_count * self.block_size_bytes),
                self.kv_block_allocator.active_count,
            )
        )

    def _allocate_memory_buffer(self):
        """Allocate the KV cache memory buffer."""
        if self.cache_mla_latent:
            self.memory_buffer = torch.empty(
                (
                    self.num_attention_layers,
                    self.kv_block_allocator.total_count,
                    self.block_size_tokens,
                    self.kv_reduced_dim,
                ),
                dtype=self.params_dtype,
                device=torch.cuda.current_device(),
            )
        else:
            self.memory_buffer = torch.empty(
                (
                    2,  # key and value
                    self.num_attention_layers,
                    self.kv_block_allocator.total_count,
                    self.block_size_tokens,
                    self.num_attention_heads_per_partition,
                    self.hidden_size_per_attention_head,
                ),
                dtype=self.params_dtype,
                device=torch.cuda.current_device(),
            )
        if (
            self.kv_cache_management_mode == KVCacheManagementMode.OFFLOAD
            and not self._uses_torch_memory_saver
        ):
            assert self.unified_memory_level == 0
            self._offloadable_tensor_names.add("memory_buffer")
            self._offloadable_cpu_backups["memory_buffer"] = torch.empty_like(
                self.memory_buffer, device="cpu"
            ).pin_memory()

    def _allocate_mamba_states(self):
        """Allocate Mamba states for hybrid models."""
        if self.is_hybrid_model:
            self.mamba_metadata = MambaMetadata(
                max_requests=self.max_requests, max_tokens=self.max_tokens
            )
            self.mamba_conv_states = torch.empty(
                (self.num_mamba_layers, self.max_requests) + self.mamba_conv_states_shape,
                dtype=self.mamba_conv_states_dtype,
                device=torch.cuda.current_device(),
            )
            self.mamba_ssm_states = torch.empty(
                (self.num_mamba_layers, self.max_requests) + self.mamba_ssm_states_shape,
                dtype=self.mamba_ssm_states_dtype,
                device=torch.cuda.current_device(),
            )
            if self.num_speculative_tokens > 0:
                self.mamba_intermediate_conv_states = torch.empty(
                    (
                        self.num_mamba_layers,
                        self.max_requests,
                        self.num_speculative_tokens + 1,
                        *self.mamba_conv_states_shape,
                    ),
                    dtype=self.mamba_conv_states_dtype,
                    device=torch.cuda.current_device(),
                )
                self.mamba_intermediate_ssm_states = torch.empty(
                    (
                        self.num_mamba_layers,
                        self.max_requests,
                        self.num_speculative_tokens + 1,
                        *self.mamba_ssm_states_shape,
                    ),
                    dtype=self.mamba_ssm_states_dtype,
                    device=torch.cuda.current_device(),
                )
            if (
                self.kv_cache_management_mode == KVCacheManagementMode.OFFLOAD
                and not self._uses_torch_memory_saver
            ):
                assert self.unified_memory_level == 0
                self._offloadable_tensor_names.add("mamba_conv_states")
                self._offloadable_cpu_backups["mamba_conv_states"] = torch.empty_like(
                    self.mamba_conv_states, device="cpu"
                ).pin_memory()
                self._offloadable_tensor_names.add("mamba_ssm_states")
                self._offloadable_cpu_backups["mamba_ssm_states"] = torch.empty_like(
                    self.mamba_ssm_states, device="cpu"
                ).pin_memory()
                if self.num_speculative_tokens > 0:
                    self._offloadable_tensor_names.add("mamba_intermediate_conv_states")
                    self._offloadable_cpu_backups["mamba_intermediate_conv_states"] = (
                        torch.empty_like(
                            self.mamba_intermediate_conv_states, device="cpu"
                        ).pin_memory()
                    )
                    self._offloadable_tensor_names.add("mamba_intermediate_ssm_states")
                    self._offloadable_cpu_backups["mamba_intermediate_ssm_states"] = (
                        torch.empty_like(
                            self.mamba_intermediate_ssm_states, device="cpu"
                        ).pin_memory()
                    )
        else:
            self.mamba_metadata = None

    def initialize_all_tensors(self) -> None:
        """Allocate all GPU state during initial construction."""
        # Mark allocated.
        if self.is_tensor_state_allocated:
            return
        self.is_tensor_state_allocated = True

        # Validate no tensors allocated prior to this method.
        for key in vars(self).keys():
            value = getattr(self, key)
            assert not isinstance(value, torch.Tensor), (
                "All tensors should be allocated within `initialize_all_tensors()`. "
                f"Please move tensor '{key}'."
            )

        # Per-request state.
        self.request_ids = torch.full(
            (self.max_requests,), -1, dtype=torch.int32, device=torch.cuda.current_device()
        )
        # request_query_lengths is the input prompt tokens length during prefill phase (1st step) and then 1 for the decode phase (i.e During generation)
        self.request_query_lengths = torch.empty_like(self.request_ids)
        # True only for a new request , then after a forward pass it is set to False
        self.request_in_prefill_status_tensor = torch.empty_like(self.request_ids)
        # request_output_lengths is len(input_prompt_tokens) + num_tokens_to_generate
        self.request_output_lengths = torch.empty_like(self.request_ids)
        # request_kv_length_offsets is the same as query length during prefill phase (1st step) and then 1 for the decode phase (i.e During generation)
        self.request_kv_length_offsets = torch.empty_like(self.request_ids)
        self.request_kv_block_counts = torch.empty_like(self.request_ids)
        self.request_last_kv_block_id = torch.empty_like(self.request_ids)
        # request_last_kv_block_offset represents number of tokens in the last kv block
        self.request_last_kv_block_offset = torch.empty_like(self.request_ids)
        self.request_to_kv_block_ids = torch.full(
            (self.max_requests, self.max_kv_block_count),
            -1,
            dtype=torch.int,
            device=torch.cuda.current_device(),
        )

        # Track request metadata.
        self.request_metadata = {
            label: torch.empty(
                (self.max_requests,), dtype=dtype, device=torch.cuda.current_device()
            )
            for label, dtype, _ in self.request_metadata_types
        }

        # Per-token state.
        self.token_to_input_ids = torch.full(
            (self.max_tokens,), 0, dtype=torch.long, device=torch.cuda.current_device()
        )
        self.token_to_pos_ids = torch.full_like(self.token_to_input_ids, 0)
        self.token_to_request_idx = torch.empty_like(self.token_to_input_ids)
        self.token_to_block_idx = torch.empty_like(self.token_to_input_ids)
        # i.e For a set of tokens A B C D E F ..  and block_size 4:
        # token_to_position_in_request is  [0, 1, 2, 3, 4, 5]
        # token_to_local_position_within_kv_block is [0 , 1, 2, 3, 0, 1, 2]
        self.token_to_position_in_request = torch.empty_like(self.token_to_input_ids)
        self.token_to_local_position_within_kv_block = torch.empty_like(self.token_to_input_ids)

        # NOTE: Need to build this outside the UVM / TMS context to avoid IMA.
        if self.is_hybrid_model:
            self.mamba_metadata = MambaMetadata(
                max_requests=self.max_requests,
                max_tokens=self.max_tokens,
                mamba_chunk_size=self.mamba_chunk_size,
            )

        # Allocate large non-graphed buffers.
        need_static_addr = (
            self.static_kv_memory_pointers
            and self.kv_cache_management_mode != KVCacheManagementMode.PERSIST
        )

        ctx_manager = nullcontext()
        if self.unified_memory_level != 0:
            ctx_manager = torch.cuda.use_mem_pool(self.unified_memory_mempool)
        elif HAVE_TORCH_MEMORY_SAVER and need_static_addr:
            ctx_manager = torch_memory_saver.region(
                tag=self.TMS_TAG,
                enable_cpu_backup=(self.kv_cache_management_mode == KVCacheManagementMode.OFFLOAD),
            )
            self._uses_torch_memory_saver = True
        with ctx_manager:
            self._allocate_memory_buffer()
            self._allocate_mamba_states()

        # Allocate Mamba prefix cache if configured
        self.mamba_slot_allocator: Optional[MambaSlotAllocator] = None
        if (
            self.is_hybrid_model
            and self.config.prefix_caching_mamba_gb is not None
            and self.config.prefix_caching_mamba_gb > 0
            and self.config.enable_prefix_caching
        ):
            self._allocate_mamba_cache(self.config.prefix_caching_mamba_gb)

        # Reset tensor-related metadata.
        self.reset_metadata()

    def reinitialize_inference_state_buffers(self):
        """Restore large tensors (KV cache, Mamba states) after a suspend.

        Called by the engine during `resume()`. Initial allocation is in `initialize_all_tensors()`.
        """
        if self.is_tensor_state_allocated:
            return
        self.is_tensor_state_allocated = True

        if self.kv_cache_management_mode == KVCacheManagementMode.PERSIST:
            return

        if self.unified_memory_level != 0 or self._uses_torch_memory_saver:
            # Need to bring back the memory block before we reset it.
            if self._uses_torch_memory_saver:
                tag = self.TMS_TAG
                if torch.distributed.get_rank() == 0:
                    logging.info(
                        "torch_memory_saver: resuming %s, before: %s", tag, device_memory_summary()
                    )
                torch_memory_saver.resume(tag)
                if torch.distributed.get_rank() == 0:
                    logging.info(
                        "torch_memory_saver: resumed  %s, after:  %s", tag, device_memory_summary()
                    )
            if self.kv_cache_management_mode == KVCacheManagementMode.RECOMPUTE:
                self.reset_metadata()
            return

        if self.kv_cache_management_mode == KVCacheManagementMode.OFFLOAD:
            for name, tensor in ((n, getattr(self, n)) for n in self._offloadable_tensor_names):
                tensor.storage().resize_(self._offloadable_storage_sizes[name])
                tensor.copy_(self._offloadable_cpu_backups[name], non_blocking=True)
        elif self.kv_cache_management_mode == KVCacheManagementMode.RECOMPUTE:
            self.is_tensor_state_allocated = False
            self.initialize_all_tensors()

    def deallocate_inference_state_buffers(self):
        """Deallocate large tensors (KV cache, Mamba states) during suspend.

        Called by the engine during `suspend()`. Mirror to `reinitialize_inference_state_buffers()`.
        """
        if not self.is_tensor_state_allocated:
            return
        self.is_tensor_state_allocated = False

        if self.kv_cache_management_mode == KVCacheManagementMode.PERSIST:
            return

        if self.unified_memory_level != 0:
            return

        if self._uses_torch_memory_saver:
            tag = self.TMS_TAG
            if torch.distributed.get_rank() == 0:
                logging.info(
                    "torch_memory_saver: pausing %s, before: %s", tag, device_memory_summary()
                )
            torch_memory_saver.pause(tag)
            if torch.distributed.get_rank() == 0:
                logging.info(
                    "torch_memory_saver: paused  %s, after:  %s", tag, device_memory_summary()
                )
            return

        if self.kv_cache_management_mode == KVCacheManagementMode.OFFLOAD:
            for name, tensor in ((n, getattr(self, n)) for n in self._offloadable_tensor_names):
                self._offloadable_storage_sizes[name] = tensor.storage().size()
                self._offloadable_cpu_backups[name].copy_(tensor, non_blocking=True)
                tensor.storage().resize_(0)
        elif self.kv_cache_management_mode == KVCacheManagementMode.RECOMPUTE:
            # TODO(@lmcafee): check that device == 'cuda'?
            for key in list(vars(self).keys()):
                value = getattr(self, key)
                if isinstance(value, torch.Tensor):
                    delattr(self, key)

    @classmethod
    def round_up_tokens(cls, value, tp_size=None):
        """Round up to nearest multiple of `TOKEN_ROUNDER` that is also divisible by tensor model parallel size."""
        # Make sure divisible by TP size
        if tp_size is None:
            # Check if parallel state is initialized before trying to get TP size
            if parallel_state.is_initialized():
                tp_size = parallel_state.get_tensor_model_parallel_world_size()
            else:
                tp_size = 1
        token_rounder = math.ceil(cls.TOKEN_ROUNDER / tp_size) * tp_size

        return token_rounder * int(math.ceil(int(value) / token_rounder))

    @classmethod
    def round_up_requests(cls, value, tp_size=None):
        """Round up to nearest multiple of `REQUEST_ROUNDER` that is also divisible by tensor model parallel size."""
        # Make sure divisible by TP size
        if tp_size is None:
            # Check if parallel state is initialized before trying to get TP size
            if parallel_state.is_initialized():
                tp_size = parallel_state.get_tensor_model_parallel_world_size()
            else:
                tp_size = 1
        request_rounder = math.ceil(cls.REQUEST_ROUNDER / tp_size) * tp_size

        return request_rounder * int(math.ceil(int(value) / request_rounder))

    def is_static_batching(self) -> bool:
        """Is static batching? False."""
        return False

    def is_decode_only(self) -> bool:
        """
        Return if this iteration we run decode only implementation.
        """
        return self.num_prefill_requests == 0

    def using_cuda_graph_this_step(self) -> bool:
        """Returns True if cuda graphs are being used for this step."""
        return self._using_cuda_graph_this_step

    def has_unfinished_requests(self) -> bool:
        """Test if any requests remain."""
        return self.total_request_count > 0

    def cu_query_lengths(self) -> Tuple[Tensor, int]:
        """Cumulative query sequence lengths."""
        assert self.active_attn_metadata is not None
        return (
            self.active_attn_metadata["mha_metadata"].state_data["cu_query_seq_lengths"],
            self.active_attn_metadata["mha_metadata"].state_data["max_seqlen_q"],
        )

    def cu_kv_lengths(self) -> Tuple[Tensor, Tensor, int]:
        """Cumulative key/value sequence lengths."""
        assert self.active_attn_metadata is not None
        return (
            self.active_attn_metadata["mha_metadata"].state_data["cu_kv_seq_lengths"],
            self.active_attn_metadata["mha_metadata"].state_data["kv_seq_lengths"],
            self.active_attn_metadata["mha_metadata"].state_data["max_seqlen_k"],
        )

    def get_active_sequence_lengths(self) -> Tensor:
        """Total sequence length (query + key) for active requests."""
        lengths = self.request_kv_length_offsets + self.request_query_lengths
        lengths = lengths[self.paused_request_count : self.total_request_count]
        return lengths

    def get_max_sequence_lengths(self) -> Tensor:
        """Maximum sequence length for active requests."""
        return self.request_output_lengths[self.paused_request_count : self.total_request_count]

    def get_active_request_count(self):
        """Returns the current number of active requests."""
        return self.total_request_count - self.paused_request_count

    def append_key_value_cache(self, layer_number: int, key: Tensor, value: Tensor) -> None:
        """Append to KV cache.

        Args:
            layer_number (int): Layer number.
            key (Tensor): Key tensor.
            value (Tensor): Value tensor.
        """
        attention_layer_number = self.layer_map[layer_number - 1]

        if triton_append_key_value_cache is not None and not self.cache_mla_latent:
            # currently does not support MLA latent cache
            return triton_append_key_value_cache(
                layer_number=attention_layer_number,
                key=key,
                value=value,
                memory_buffer=self.memory_buffer,
                padded_active_token_count=self.padded_active_token_count,
                token_to_block_idx=self.token_to_block_idx,
                token_to_local_position_within_kv_block=self.token_to_local_position_within_kv_block,
            )

        block_idx = self.token_to_block_idx[: self.padded_active_token_count]
        local_kv_seq_idx = self.token_to_local_position_within_kv_block[
            : self.padded_active_token_count
        ]

        if not self.cache_mla_latent:
            assert key.size(1) == 1 and value.size(1) == 1

        key = key.squeeze(1)
        # There is no value cache in FlashMLA/absorption
        if not self.cache_mla_latent:
            value = value.squeeze(1)

        if self.cache_mla_latent:
            # We pass the kv_concat as the key in cache_mla_latent
            kv_concat = key
            self.memory_buffer[attention_layer_number, block_idx, local_kv_seq_idx] = kv_concat[
                : self.padded_active_token_count
            ]
        else:
            self.memory_buffer[0, attention_layer_number, block_idx, local_kv_seq_idx] = key[
                : self.padded_active_token_count
            ]
            self.memory_buffer[1, attention_layer_number, block_idx, local_kv_seq_idx] = value[
                : self.padded_active_token_count
            ]

    def key_value_cache(self, layer_number: int) -> Tuple[Tensor, Optional[Tensor], Tensor]:
        """Read from KV cache.

        Args:
            layer_number (int): Layer number.

        Return:
            (Tuple[Tensor, Tensor, Tensor]) The key and value pointer tensors that point
            to blocks within the block-level memory buffer as well as the block table.
        """
        attention_layer_number = self.layer_map[layer_number - 1]

        assert self.active_attn_metadata is not None

        if self.cache_mla_latent:
            return (
                self.memory_buffer[attention_layer_number],
                None,
                self.active_attn_metadata["mha_metadata"].state_data["block_table"],
            )
        else:
            return (
                self.memory_buffer[0, attention_layer_number],
                self.memory_buffer[1, attention_layer_number],
                self.active_attn_metadata["mha_metadata"].state_data["block_table"],
            )

    def mamba_states_cache(
        self, layer_number: int, intermediate: bool = False
    ) -> Tuple[Tensor, Tensor]:
        """Returns the Mamba state tensors for the given layer."""
        assert self.is_hybrid_model, "Only hybrid models have Mamba state tensors"

        mamba_layer_number = self.layer_map[layer_number - 1]
        if intermediate:
            conv_state = self.mamba_intermediate_conv_states[mamba_layer_number]
            ssm_state = self.mamba_intermediate_ssm_states[mamba_layer_number]
        else:
            conv_state = self.mamba_conv_states[mamba_layer_number]
            ssm_state = self.mamba_ssm_states[mamba_layer_number]

        return (conv_state, ssm_state)

    # =========================================================================
    # Mamba prefix cache infrastructure
    # =========================================================================

    def _allocate_mamba_cache(self, mamba_gb: float) -> None:
        """Allocate the Mamba state cache for prefix caching.

        Args:
            mamba_gb: GPU memory budget in GB for the cache.
        """
        import math as _math

        conv_size = _math.prod(self.mamba_conv_states_shape) * self.mamba_conv_states_dtype.itemsize
        ssm_size = _math.prod(self.mamba_ssm_states_shape) * self.mamba_ssm_states_dtype.itemsize
        per_slot_bytes = self.num_mamba_layers * (conv_size + ssm_size)
        total_bytes = int(mamba_gb * 1024**3)
        max_slots = total_bytes // per_slot_bytes
        if max_slots < 1:
            logging.warning(
                "Mamba cache budget (%.3f GB) too small for even 1 slot "
                "(need %.3f GB per slot). Mamba caching disabled.",
                mamba_gb,
                per_slot_bytes / 1024**3,
            )
            return

        self.mamba_slot_allocator = MambaSlotAllocator(
            context=self,
            max_slots=max_slots,
            num_mamba_layers=self.num_mamba_layers,
            conv_states_shape=self.mamba_conv_states_shape,
            ssm_states_shape=self.mamba_ssm_states_shape,
            conv_states_dtype=self.mamba_conv_states_dtype,
            ssm_states_dtype=self.mamba_ssm_states_dtype,
        )
        self.kv_block_allocator.on_blocks_deregistered = (
            self.mamba_slot_allocator.on_kv_blocks_deregistered
        )

        logging.info(
            "Mamba prefix cache: %d slots (%.3f GB), per-slot %.1f KB",
            max_slots,
            max_slots * per_slot_bytes / 1024**3,
            per_slot_bytes / 1024,
        )

    def apply_fused_qk_rotary_emb(
        self, query: Tensor, key: Tensor, cos_sin_emb: Tensor, config: TransformerConfig
    ) -> Tuple[Tensor, Tensor]:
        """
        Apply rotary embedding to query and key tensors using flashinfer's fused rope.
        Args:
            query (Tensor): Query tensor.
            key (Tensor): Key tensor.
            cos_sin_emb (Tensor): Rotary embeddings.
            config (TransformerConfig): Transformer config.

        Return:
            (Tuple[Tensor, Tensor]) Query and Key tensors after applying rotary embeddings.
        """
        assert self.use_flashinfer_fused_rope, "flashinfer fused rope is not enabled"
        n = self.padded_active_token_count
        num_q_heads, head_size = query.shape[-2], query.shape[-1]
        num_k_heads = key.shape[-2]

        # use .view instead of .reshape to avoid extra transpose operations
        query_rope, key_rope = flashinfer.rope.apply_rope_with_cos_sin_cache(
            positions=self.token_to_pos_ids[:n],
            query=query[:n].reshape(n, num_q_heads * head_size),
            key=key[:n].reshape(n, num_k_heads * head_size),
            head_size=head_size,
            cos_sin_cache=cos_sin_emb,
            is_neox=not config.rotary_interleaved,
        )
        return query_rope.reshape(n, 1, num_q_heads, head_size), key_rope.reshape(
            n, 1, num_k_heads, head_size
        )

    def apply_rotary_emb_query(
        self,
        query: Tensor,
        query_emb: Tensor,
        config: TransformerConfig,
        cu_seqlens_q: Tensor,
        cp_group: torch.distributed.ProcessGroup,
        mscale: float = 1.0,
    ) -> Tensor:
        """Apply rotary embedding to query tensor.

        Args:
            query (Tensor): Query tensor.
            query_emb (Tensor): Query rotary embeddings.
            config (TransformerConfig): Transformer config.
            cu_seqlens_q (Tensor): Cumulative sequence lengths.
            cp_group (torch.distributed.ProcessGroup): Process group for context parallel.

        Return:
            (Tensor) Query tensor after applying rotary embeddings.
        """
        n = self.padded_active_token_count
        query_seq_idx = self.token_to_pos_ids[:n]
        query_emb = query_emb[query_seq_idx]
        query[:n] = apply_rotary_pos_emb(
            t=query[:n],
            freqs=query_emb[:n],
            config=config,
            cu_seqlens=cu_seqlens_q,
            cp_group=cp_group,
            mscale=mscale,
        )
        return query

    def apply_rotary_emb_key(
        self,
        key: Tensor,
        key_emb: Tensor,
        config: TransformerConfig,
        cp_group: torch.distributed.ProcessGroup,
        mscale: float = 1.0,
    ) -> Tensor:
        """Apply rotary embedding to key tensor.

        Args:
            key (Tensor): Key tensor.
            key_emb (Tensor): Key rotary embeddings.
            config (TransformerConfig): Transformer config.
            cp_group (torch.distributed.ProcessGroup): Process group for context parallel.

        Return:
            (Tensor) Key tensor after applying rotary embeddings.
        """
        n = self.padded_active_token_count
        key_seq_idx = self.token_to_position_in_request[:n]
        key_emb = key_emb[key_seq_idx]
        if self.is_decode_only():
            if key.shape[0] != n:
                raise AssertionError(
                    f"apply_rotary_emb_key: key.shape[0]={key.shape[0]} != n={n}; "
                    f"padded_active_request_count={self.padded_active_request_count}, "
                    f"active_token_count={self.active_token_count}, total_request_count={self.total_request_count}, "
                    f"paused_request_count={self.paused_request_count}"
                )
            key = apply_rotary_pos_emb(
                t=key[:n], freqs=key_emb[:n], config=config, cp_group=cp_group, mscale=mscale
            )
        else:
            key[:n] = apply_rotary_pos_emb(
                t=key[:n], freqs=key_emb[:n], config=config, cp_group=cp_group, mscale=mscale
            )
        return key

    def reset_attention_state(self) -> None:
        """Reset state used within attention, after each step."""
        # Attention metadata reset is now handled by MHAMetadata.reset()
        for attn_metadata in self.non_graph_attn_metadata.values():
            attn_metadata.reset()
        for attn_metadata in self.graph_attn_metadata.values():
            attn_metadata.reset()
        self.active_attn_metadata = None

        if self.is_hybrid_model:
            self.mamba_metadata.reset_varlen_metadata()

    def reset_mamba_state(self) -> None:
        """Reset state used within Mamba layers."""
        if self.is_hybrid_model:
            self.mamba_metadata.reset()

    def add_dummy_requests_parallel(
        self, requests: Sequence[DynamicInferenceRequest], *, count_as_prefill: bool = True
    ) -> None:
        """Fast path to add dummy requests without allocating real KV blocks."""

        if not requests:
            return

        num_new_requests = len(requests)
        if self.total_request_count + num_new_requests > self.max_requests:
            raise RequestOverflowError(requests[-1].request_id)

        lengths: List[int] = []
        num_tokens_to_generate: List[int] = []
        request_ids: List[int] = []
        prompt_tokens: List[Tensor] = []
        metadata_cols: List[List] = [[] for _ in self.request_metadata_types]

        for req in requests:
            assert isinstance(
                req, DynamicInferenceRequest
            ), "add_dummy_requests_parallel expects DynamicInferenceRequest objects"
            assert (
                req.finished_chunk_token_count == 0
            ), "chunked requests are not supported in add_dummy_requests_parallel"
            assert req.remaining_prompt_tokens is not None, "request missing prompt tokens"
            assert req.sampling_params is not None, "request missing sampling params"
            prefill_chunk_length = req.remaining_prompt_length
            assert prefill_chunk_length > 0, "request without prompt tokens is not supported"
            lengths.append(prefill_chunk_length)
            num_tokens_to_generate.append(req.sampling_params.num_tokens_to_generate)
            request_ids.append(req.request_id)
            prompt_tokens.append(
                req.remaining_prompt_tokens.to(
                    device=self.token_to_input_ids.device, dtype=self.token_to_input_ids.dtype
                )
            )
            for i, m in enumerate(req.tracked_metadata):
                metadata_cols[i].append(m)

        total_new_tokens = sum(lengths)
        if self.active_token_count + total_new_tokens > self.max_tokens:
            raise TokenOverflowError(requests[-1].request_id)

        device = self.request_ids.device
        lengths_tensor = torch.tensor(
            lengths, dtype=self.request_query_lengths.dtype, device=device
        )
        tokens_to_generate_tensor = torch.tensor(
            num_tokens_to_generate, dtype=self.request_query_lengths.dtype, device=device
        )
        request_ids_tensor = torch.tensor(request_ids, dtype=self.request_ids.dtype, device=device)

        block_counts = torch.div(
            lengths_tensor + (self.block_size_tokens - 1),
            self.block_size_tokens,
            rounding_mode="floor",
        )

        start_request_idx = self.total_request_count
        end_request_idx = start_request_idx + num_new_requests
        request_slice = slice(start_request_idx, end_request_idx)

        self.request_ids[request_slice] = request_ids_tensor
        self.request_query_lengths[request_slice] = lengths_tensor
        self.request_in_prefill_status_tensor[request_slice] = 1
        self.request_output_lengths[request_slice] = lengths_tensor + tokens_to_generate_tensor
        self.request_kv_length_offsets[request_slice] = 0
        self.request_kv_block_counts[request_slice] = block_counts
        for i, (label, dtype, _) in enumerate(self.request_metadata_types):
            self.request_metadata[label][request_slice] = torch.tensor(
                metadata_cols[i], dtype=dtype, device=torch.cuda.current_device()
            )

        dummy_block_idx = self.kv_block_allocator.dummy_block_idx
        self.request_last_kv_block_id[request_slice] = dummy_block_idx
        self.request_last_kv_block_offset[request_slice] = torch.remainder(
            lengths_tensor - 1, self.block_size_tokens
        )

        kv_block_view = self.request_to_kv_block_ids[request_slice]
        kv_block_view.fill_(-1)
        block_counts_list = block_counts.tolist()
        for row, block_count in enumerate(block_counts_list):
            kv_block_view[row, :block_count] = dummy_block_idx

        token_start = self.active_token_count
        token_end = token_start + total_new_tokens
        token_slice = slice(token_start, token_end)

        concatenated_tokens = torch.cat(prompt_tokens, dim=0)
        assert concatenated_tokens.numel() == total_new_tokens
        self.token_to_input_ids[token_slice] = concatenated_tokens

        lengths_long = lengths_tensor.to(dtype=torch.long)
        request_indices = torch.arange(
            start_request_idx,
            end_request_idx,
            device=self.token_to_request_idx.device,
            dtype=self.token_to_request_idx.dtype,
        )
        token_request_indices = torch.repeat_interleave(
            request_indices.to(dtype=torch.long), lengths_long
        )
        self.token_to_request_idx[token_slice] = token_request_indices

        max_length = int(lengths_tensor.max().item())
        position_template = torch.arange(
            max_length,
            device=self.token_to_position_in_request.device,
            dtype=self.token_to_position_in_request.dtype,
        )
        expanded_positions = position_template.unsqueeze(0).expand(num_new_requests, -1)
        mask = position_template.unsqueeze(0) < lengths_long.unsqueeze(1)
        positions = expanded_positions[mask]
        assert positions.numel() == total_new_tokens
        self.token_to_position_in_request[token_slice] = positions
        self.token_to_pos_ids[token_slice] = positions
        self.token_to_local_position_within_kv_block[token_slice] = torch.remainder(
            positions, self.block_size_tokens
        )
        self.token_to_block_idx[token_slice] = dummy_block_idx

        if self.is_hybrid_model:
            for logical_idx, request_idx in enumerate(range(start_request_idx, end_request_idx)):
                mamba_idx = self.mamba_metadata.allocate_slot()
                if mamba_idx is None:
                    raise ContextOverflowError(
                        requests[logical_idx].request_id, "No Mamba slots available"
                    )
                self.mamba_conv_states[:, mamba_idx] = 0.0
                self.mamba_ssm_states[:, mamba_idx] = 0.0
                self.mamba_metadata.request_to_mamba_state_idx[request_idx] = mamba_idx

        self.active_token_count = token_end
        self.total_request_count = end_request_idx
        if count_as_prefill:
            self.num_prefill_requests += num_new_requests

    def add_dummy_requests_for_cudagraph_capture(
        self, graph_dimensions: InferenceBatchDimensions
    ) -> None:
        """
        Adds dummy requests to reflect the number of prefill and decode requests in the graph config.
        These are using during cuda graph captures.
        """
        prefill_tokens = graph_dimensions.token_count - (
            graph_dimensions.decode_req_count * (self.num_speculative_tokens + 1)
        )

        # Pre-construct shared objects (safe due to deep copy in DynamicInferenceRequest.__post_init__)
        shared_sampling_params = SamplingParams(num_tokens_to_generate=1, termination_id=-1)
        shared_decode_tokens = torch.zeros(
            self.num_speculative_tokens + 1, dtype=torch.long, device=torch.cuda.current_device()
        )

        decode_requests = [
            DynamicInferenceRequest(
                request_id=i,
                prompt_tokens=shared_decode_tokens,
                sampling_params=shared_sampling_params,
            )
            for i in range(graph_dimensions.decode_req_count)
        ]
        self.add_dummy_requests_parallel(decode_requests, count_as_prefill=False)
        if graph_dimensions.prefill_req_count == 0:
            self.num_prefill_requests = 0
            return

        per_prefill_tokens = prefill_tokens // graph_dimensions.prefill_req_count
        rem_prefill_tokens = prefill_tokens % graph_dimensions.prefill_req_count

        # If there are remaining prefill tokens, we evenly distribute them to the prefill requests
        # starting from the first prefill request until we run out of remaining prefill tokens

        prefill_token_counts = [
            per_prefill_tokens + (1 if i < rem_prefill_tokens else 0)
            for i in range(graph_dimensions.prefill_req_count)
        ]

        assert per_prefill_tokens > 0
        # Create a single large tensor and slice from it for each prefill request
        max_prefill_tokens = per_prefill_tokens + (1 if rem_prefill_tokens > 0 else 0)
        shared_prefill_tokens = torch.zeros(
            max_prefill_tokens, dtype=torch.long, device=torch.cuda.current_device()
        )

        prefill_requests = [
            DynamicInferenceRequest(
                request_id=i + graph_dimensions.decode_req_count,
                prompt_tokens=shared_prefill_tokens[: prefill_token_counts[i]],
                sampling_params=shared_sampling_params,
            )
            for i in range(graph_dimensions.prefill_req_count)
        ]
        self.add_dummy_requests_parallel(prefill_requests)
        self.num_prefill_requests = graph_dimensions.prefill_req_count

    @property
    def num_decode_requests(self) -> int:
        """
        Returns the number of decode requests.
        """
        return self.total_request_count - self.paused_request_count - self.num_prefill_requests

    def add_dummy_requests_for_expert_parallel_step(self) -> None:
        """Minimal context setup so an EP rank with no real requests can replay
        an already-captured cuda graph without crashing or corrupting memory.

        This is the fast alternative to add_dummy_requests_for_cudagraph_capture
        (which goes through the heavyweight add_dummy_requests_parallel path).

        We setup minimal state such the initialize_attention_state and the forward
        pass can run without error.

        """
        smallest_cuda_graph_dimensions = min(
            [x for x in self.cuda_graph_batch_dimensions_list if x.prefill_req_count == 0]
        )
        # the smallest cuda graph is decode only.
        assert smallest_cuda_graph_dimensions.prefill_req_count == 0

        N = smallest_cuda_graph_dimensions.decode_req_count
        tokens_per_request = self.num_speculative_tokens + 1
        T = smallest_cuda_graph_dimensions.token_count  # N * tokens_per_request
        dummy_block_idx = self.kv_block_allocator.dummy_block_idx

        # 1. Request counts and token count.
        #    With speculative decoding each decode request has (num_speculative_tokens + 1) tokens.
        self.total_request_count = N
        self.active_token_count = T
        self.num_prefill_requests = 0

        # 2. Per-request state consumed by mha_metadata.update().
        self.request_query_lengths[0:N].fill_(tokens_per_request)
        self.request_kv_length_offsets[0:N].fill_(0)
        self.request_to_kv_block_ids[0:N, 0] = dummy_block_idx

        # 3. Token-level state consumed by the triton KV append kernel.
        self.token_to_block_idx[0:T] = dummy_block_idx
        self.token_to_local_position_within_kv_block[0:T] = (
            torch.arange(T, device=self.token_to_block_idx.device) % tokens_per_request
        )

        if self.is_hybrid_model:
            # 4. token_to_request_idx: needed by mamba_metadata.update() for hybrid models.
            self.token_to_request_idx[0:T] = torch.repeat_interleave(
                torch.arange(
                    0,
                    N,
                    device=self.token_to_request_idx.device,
                    dtype=self.token_to_request_idx.dtype,
                ),
                tokens_per_request,
            )

            # 5. Mamba state: allocate slots for dummy requests.
            self.mamba_metadata.request_to_mamba_state_idx[0:N] = (
                self.mamba_metadata.batch_allocate_slots(N)
            )

    def initialize_attention_state(
        self,
        *,
        construct_graph_dimensions: Optional[InferenceBatchDimensions] = None,
        is_expert_parallel_dummy_cuda_graph_step: bool = False,
    ) -> None:
        """Initialize attention state so that every layer can use it.

        Args:
            construct_graph_dimensions (Optional[InferenceBatchDimensions]):
                The graph config to use for constructing the cuda graphs.
            is_expert_parallel_dummy_cuda_graph_step (bool):
                Whether this is a dummy expert model parallel step.
        Return:
            None.
        """
        self.is_creating_cuda_graphs = construct_graph_dimensions is not None
        assert not (
            self.is_creating_cuda_graphs and is_expert_parallel_dummy_cuda_graph_step
        ), "Dummy expert model parallel steps should not be creating cuda graphs."

        # If in CUDA graph creation mode, add dummy requests for CUDA graph capture
        if is_expert_parallel_dummy_cuda_graph_step:
            self.add_dummy_requests_for_expert_parallel_step()
        elif self.is_creating_cuda_graphs:
            self.add_dummy_requests_for_cudagraph_capture(construct_graph_dimensions)

        batch_dimensions = InferenceBatchDimensions(
            token_count=self.active_token_count,
            prefill_req_count=self.num_prefill_requests,
            decode_req_count=self.num_decode_requests,
        )

        self.batch_dimensions = batch_dimensions

        requires_mamba_state_extraction = False
        if self.is_hybrid_model and self.mamba_slot_allocator is not None:
            requires_mamba_state_extraction = (
                self.mamba_slot_allocator.get_intermediate_offsets() is not None
            )

        best_graph = CUDAGraphBatchDimensionBuilder.match_graph_config(
            batch_dimensions,
            self.cuda_graph_batch_dimensions_list,
            smallest_non_decode_cuda_graph_size=self.smallest_non_decode_cuda_graph_size,
            strict=self.is_hybrid_model,
            decode_only_cuda_graphs=(not self.use_cuda_graphs_for_non_decode_steps),
            explicit_chunked_prefill=self.is_chunked_prefill_enabled() and self.is_hybrid_model,
            requires_mamba_state_extraction=requires_mamba_state_extraction,
            ep_group=self.expert_model_parallel_group,
        )
        self._using_cuda_graph_this_step = best_graph is not None

        if construct_graph_dimensions is not None:
            assert self._using_cuda_graph_this_step

        if is_expert_parallel_dummy_cuda_graph_step and not self.using_cuda_graph_this_step():
            # If we are here, this means that CUDAGraphBatchDimensionBuilder.match_graph_config
            # could not find a compatible cuda graph for the dummy forward step.
            # Now, we need not do the remaining setup. The controller
            # will directly call the model forward pass with a single token.
            return

        if self.using_cuda_graph_this_step():
            self.padded_batch_dimensions = best_graph
        else:
            if self.is_decode_only():
                if self.num_speculative_tokens > 0:
                    padded_decode_req_count = min(
                        self.max_requests, self.round_up_requests(self.num_decode_requests)
                    )
                    padded_token_count = padded_decode_req_count * (self.num_speculative_tokens + 1)
                else:
                    padded_token_count = min(
                        self.max_tokens,
                        self.max_requests,
                        self.round_up_tokens(self.active_token_count),
                    )
                    padded_decode_req_count = padded_token_count
                padded_prefill_req_count = 0
            else:
                padded_token_count = self.round_up_tokens(self.active_token_count)
                target_padding_req_count = min(
                    self.max_requests,
                    self.round_up_requests(self.total_request_count - self.paused_request_count),
                )
                padded_decode_req_count = self.num_decode_requests
                padded_prefill_req_count = target_padding_req_count - padded_decode_req_count
            self.padded_batch_dimensions = InferenceBatchDimensions(
                token_count=padded_token_count,
                prefill_req_count=padded_prefill_req_count,
                decode_req_count=padded_decode_req_count,
            )
        self.padded_active_token_count = self.padded_batch_dimensions.token_count
        self.padded_active_request_count = self.padded_batch_dimensions.req_count
        self.padding_slice = slice(self.active_token_count, self.padded_active_token_count)

        # Update token position indexes.
        self.token_to_block_idx[self.active_token_count : self.padded_active_token_count] = (
            self.kv_block_allocator.dummy_block_idx
        )
        self.token_to_local_position_within_kv_block[
            self.active_token_count : self.padded_active_token_count
        ] = 0
        self.token_to_position_in_request[
            self.active_token_count : self.padded_active_token_count
        ] = 0

        self.active_attn_metadata = (
            self.graph_attn_metadata  # type: ignore[assignment]
            if self.using_cuda_graph_this_step()
            else self.non_graph_attn_metadata  # type: ignore[assignment]
        )

        # Update cu_query_seq_lengths, max_seqlen_q.
        active_slice = slice(self.paused_request_count, self.total_request_count)
        query_lengths_view = self.request_query_lengths[active_slice]
        request_kv_length_offsets_view = self.request_kv_length_offsets[active_slice]
        request_to_kv_block_ids_view = self.request_to_kv_block_ids[active_slice]

        attn_dimensions = batch_dimensions
        if self.using_cuda_graph_this_step():
            # Treat some decode requests as prefill requests to fit the cuda graph batch dimension.
            if batch_dimensions.decode_req_count > self.padded_batch_dimensions.decode_req_count:
                total_req = batch_dimensions.req_count
                adjusted_decode_req_count = self.padded_batch_dimensions.decode_req_count
                adjusted_prefill_req_count = total_req - adjusted_decode_req_count
                attn_dimensions = InferenceBatchDimensions(
                    token_count=batch_dimensions.token_count,
                    prefill_req_count=adjusted_prefill_req_count,
                    decode_req_count=adjusted_decode_req_count,
                )

        assert self.active_attn_metadata is not None
        self.active_attn_metadata["mha_metadata"].update(
            request_query_lengths=query_lengths_view,
            request_kv_length_offsets=request_kv_length_offsets_view,
            request_to_kv_block_ids=request_to_kv_block_ids_view,
            batch_dimensions=attn_dimensions,
            padded_batch_dimensions=self.padded_batch_dimensions,
            num_speculative_tokens=self.num_speculative_tokens,
        )

        if self.is_hybrid_model:
            active_mamba_indices_view = self.mamba_metadata.request_to_mamba_state_idx[active_slice]
            token_to_request_idx_view = self.token_to_request_idx[: self.active_token_count]
            cu_seqlens = self.active_attn_metadata["mha_metadata"].state_data[
                "cu_query_seq_lengths"
            ]
            self.mamba_metadata.update(
                active_mamba_indices_view,
                token_to_request_idx_view,
                cu_seqlens,
                batch_dimensions=attn_dimensions,
                padded_batch_dimensions=self.padded_batch_dimensions,
                enable_chunked_prefill=self.is_chunked_prefill_enabled(),
            )

            # Auto-enable Triton conv1d for CUDA graph steps. The per-request
            # conv loop launches a variable number of kernels with .item()
            # calls, which is incompatible with CUDA graph capture/replay.
            if self._using_cuda_graph_this_step:
                self._use_triton_conv1d_this_step = True
            else:
                self._use_triton_conv1d_this_step = self.use_triton_conv1d

        if self.moe_enable_routing_replay:
            if self.using_cuda_graph_this_step():
                self.moe_routing_metadata.enable_static_buffer_recording()
            else:
                self.moe_routing_metadata.disable_static_buffer_recording()

    def reset_tensors(self) -> None:
        """Fill all GPU tensors with sentinel values."""

        # Reset request indexes.
        self.request_ids.fill_(-1)
        self.request_query_lengths.fill_(0)
        self.request_output_lengths.fill_(0)
        self.request_kv_length_offsets.fill_(0)
        self.request_kv_block_counts.fill_(0)
        self.request_last_kv_block_id.fill_(-1)
        self.request_last_kv_block_offset.fill_(0)
        self.request_to_kv_block_ids.fill_(-1)
        self.request_in_prefill_status_tensor.fill_(-1)

        # Reset request metadata.
        for metadata_tensor in self.request_metadata.values():
            metadata_tensor.fill_(0)

        # Reset token indexes.
        self.token_to_input_ids.fill_(0)
        self.token_to_pos_ids.fill_(0)
        self.token_to_request_idx.fill_(-1)
        self.token_to_position_in_request.fill_(0)
        self.token_to_block_idx.fill_(-1)
        self.token_to_local_position_within_kv_block.fill_(0)

    def reset_metadata(self) -> None:
        """Reset all bookkeeping state: counters, block allocator, attention/mamba state.

        This must be called after ``initialize_all_tensors()`` and after any
        suspend/resume cycle to bring the context back to a clean state.
        """

        # Reset request/token counts.
        self.total_request_count = 0
        self.active_token_count = 0
        self.lifetime_prefill_token_count = 0
        self.paused_request_count = 0
        self.batch_dimensions = InferenceBatchDimensions(
            token_count=0, prefill_req_count=0, decode_req_count=0
        )
        self.padded_batch_dimensions = InferenceBatchDimensions(
            token_count=0, prefill_req_count=0, decode_req_count=0
        )
        self.padded_active_token_count = 0
        self.padded_active_request_count = 0
        self.paused_tokens = None
        self.paused_speculative_tokens = None

        # Reset attention, mamba, and block allocator state.
        self.reset_attention_state()
        self.reset_mamba_state()
        self.kv_block_allocator.reset()
        self.request_to_kv_block_ids.fill_(-1)

        # Reset step counter and LRU clock
        self.step_count = 0
        self.prefix_cache_lru_clock = 0

        # Reset chunked prefill state
        self.chunked_prefill_request_id = -1
        self.num_prefill_requests = 0
        self._using_cuda_graph_this_step = False
        self._use_triton_conv1d_this_step = self.use_triton_conv1d
        self.is_creating_cuda_graphs = False
        self.padded_batch_dimensions = InferenceBatchDimensions(
            token_count=0, prefill_req_count=0, decode_req_count=0
        )

    def reset(self) -> None:
        """Reset entire context.

        This method does:
        - Fill all GPU tensors with sentinel values.
        - Reset active/paused request/token counts to zero.
        - Reset available blocks to entire memory.

        This method is useful after cuda graph warmup iterations, where the
        context's memory buffer is referenced by the cuda graph system and
        cannot be deallocated.
        """
        self.reset_tensors()
        self.reset_metadata()

        # Reset Mamba cache state
        if self.mamba_slot_allocator is not None:
            self.mamba_slot_allocator.reset()

    def current_input_and_position_ids(
        self, *, num_warmup_tokens: Optional[int] = None
    ) -> Tuple[Tensor, Tensor]:
        """Flattened input and position IDs for forward pass.

        Args:
            num_warmup_tokens (Optional[int]): Number of tokens to return for
                warming up cuda graphs. Must be less than or equal to
                `max_tokens`.

        Return:
            (Tuple[Tensor, Tensor]) Flattened active input and position IDs.
        """
        num_tokens = num_warmup_tokens or self.padded_active_token_count
        assert num_tokens >= self.padded_batch_dimensions.decode_req_count * (
            self.num_speculative_tokens + 1
        )
        return (
            self.token_to_input_ids[:num_tokens].unsqueeze(0),
            self.token_to_pos_ids[:num_tokens].unsqueeze(0),
        )

    def last_token_logits(self, logits: Tensor) -> Tensor:
        """Last tokens of logits.

        Args:
            logits (Tensor): Output logits of forward pass.

        Return:
            (Tensor) Last token logits.
        """
        paused = self.paused_request_count
        total = self.total_request_count
        query_lengths = self.request_query_lengths[paused:total]

        # todo: @lmcafee, remove these asserts?
        assert logits.size(0) == 1, f"logits.size(0) ({tuple(logits.shape)}) != 1"
        assert logits.size(1) == self.padded_active_token_count, (
            f"logits.size(1) ({tuple(logits.shape)}) != "
            f"padded_active_token_count ({self.padded_active_token_count})."
        )
        logits_2d = logits.squeeze(0)
        last_token_idxs = torch.cumsum(query_lengths, dim=0) - 1
        return logits_2d[last_token_idxs, :]

    def _compute_prefix_match(
        self, req: DynamicInferenceRequest, prefill_chunk_length: int
    ) -> Tuple[list, int, int, int, int, int]:
        """Compute prefix match results and skip counts for a request chunk.

        Shared by check_availability (budget checks) and add_request (execution).

        Returns:
            Tuple of (matched_block_ids, num_blocks_from_pool,
                      already_allocated_blocks, overall_required_blocks,
                      prefix_skip_tokens, effective_prefill_chunk_length).
        """
        finished = req.finished_chunk_token_count
        already_allocated_blocks = (finished + self.block_size_tokens - 1) // self.block_size_tokens
        overall_required_blocks = (
            finished + prefill_chunk_length + self.block_size_tokens - 1
        ) // self.block_size_tokens

        # Fast path: skip all prefix matching when disabled.
        if not self.enable_prefix_caching:
            num_blocks_from_pool = max(0, overall_required_blocks - already_allocated_blocks)
            return (
                [],
                num_blocks_from_pool,
                already_allocated_blocks,
                overall_required_blocks,
                0,
                prefill_chunk_length,
            )

        matched_block_ids, _ = self._find_kv_match_count(
            req, already_allocated_blocks, overall_required_blocks
        )
        num_matched = len(matched_block_ids)

        block_aligned = finished % self.block_size_tokens == 0
        if num_matched > 0 and block_aligned:
            prefix_skip_tokens = min(num_matched * self.block_size_tokens, prefill_chunk_length - 1)
        else:
            prefix_skip_tokens = 0

        # Hybrid models with Mamba caching: skip based on Mamba match count
        if self.is_hybrid_model and self.mamba_slot_allocator is not None:
            num_mamba_matched = getattr(req, '_mamba_num_matched_blocks', 0)
            assert (
                num_mamba_matched <= num_matched
            ), f"Mamba match ({num_mamba_matched}) > KV match ({num_matched})"
            if num_mamba_matched > 0 and block_aligned:
                raw_skip = num_mamba_matched * self.block_size_tokens
                if raw_skip >= prefill_chunk_length:
                    # Back off to previous block with cached Mamba state
                    mamba_map = self.mamba_slot_allocator.hash_to_block_id
                    backed_off_blocks = 0
                    for j in range(num_mamba_matched - 2, -1, -1):
                        if req.precomputed_block_hashes[j] in mamba_map:
                            backed_off_blocks = j + 1
                            break
                    prefix_skip_tokens = backed_off_blocks * self.block_size_tokens
                else:
                    prefix_skip_tokens = raw_skip
            else:
                prefix_skip_tokens = 0
        elif self.is_hybrid_model:
            prefix_skip_tokens = 0

        effective_prefill_chunk_length = prefill_chunk_length - prefix_skip_tokens
        num_blocks_from_pool = max(
            0, overall_required_blocks - already_allocated_blocks - num_matched
        )

        return (
            matched_block_ids,
            num_blocks_from_pool,
            already_allocated_blocks,
            overall_required_blocks,
            prefix_skip_tokens,
            effective_prefill_chunk_length,
        )

    def check_availability(self, req: DynamicInferenceRequest) -> Tuple[bool, bool, bool]:
        """
        Check if the request can be added to the context.
        """
        # Note that for hybrid models checking the total request count is sufficient
        # because we allocate a single set of Mamba state tensors for each request
        request_can_be_added = (
            self.total_request_count < self.max_requests and self.paused_request_count == 0
        )

        (_, num_blocks_from_pool, _, _, _, effective_prefill_chunk_length) = (
            self._compute_prefix_match(req, req.remaining_prompt_length)
        )

        request_tokens_can_be_added = (
            self.active_token_count + effective_prefill_chunk_length <= self.max_tokens
        )
        kv_cache_available = self.kv_block_allocator.is_memory_available(num_blocks_from_pool)
        return request_can_be_added, request_tokens_can_be_added, kv_cache_available

    def _find_kv_match_count(
        self, req: DynamicInferenceRequest, start_block: int, end_block: int
    ) -> tuple[list[int], int]:
        """Find cached blocks matching a range of the prompt using precomputed hashes.

        Looks up hashes in req.precomputed_block_hashes[start_block:end_block] against
        the block allocator's hash-to-block mapping. Stops at the first non-match.

        Args:
            req: The inference request with precomputed_block_hashes set.
            start_block: First block index to match (inclusive).
            end_block: Last block index to match (exclusive); clamped to hash count.

        Returns:
            Tuple of:
            - List of matched block IDs (consecutive from start_block)
            - Parent hash of the last matched block (0 if no matches)
        """
        # Early return if prefix caching is disabled
        if not self.enable_prefix_caching:
            return [], 0

        # Early return if request has no precomputed hashes
        if not req.precomputed_block_hashes:
            return [], 0

        # Clamp end_block to the number of precomputed hashes (the trailing
        # partial block has no hash).
        end_block = min(end_block, len(req.precomputed_block_hashes))
        if start_block >= end_block:
            return [], 0

        hashes = req.precomputed_block_hashes[start_block:end_block]
        kv_hash_to_block = self.kv_block_allocator.kv_hash_to_block_id

        # Find longest KV prefix by iterating block hashes from end.
        # Parent-chained hashes guarantee: if hash at position N exists,
        # all hashes 0..N also exist. So first match from end = longest prefix.
        for i in range(len(hashes) - 1, -1, -1):
            if hashes[i] in kv_hash_to_block:
                num_matched = i + 1
                matched_blocks = [kv_hash_to_block[hashes[j]] for j in range(num_matched)]
                parent_hash = hashes[num_matched - 1]
                return matched_blocks, parent_hash

        return [], 0

    def add_request(
        self, req: DynamicInferenceRequest, prefill_chunk_length: Optional[int] = None
    ) -> None:
        """Add request to context. At this stage, we assume that the request is valid and can be added, as the checks are done in the schedule function.

        Args:
            req (DynamicInferenceRequest): Request to add.
            prefill_chunk_length (Optional[int]): Length of prefill chunk to add. If None, the request will be fully added.

        Return:
            None
        """
        # If tensor state is deallocated, do not add request.
        if not self.is_tensor_state_allocated:
            raise TensorStateDeallocatedError(req.request_id)

        # Prefill chunk length.
        if prefill_chunk_length is None:
            prefill_chunk_length = req.remaining_prompt_length

        assert prefill_chunk_length > 0, "Chunk length is 0"
        assert (
            prefill_chunk_length <= req.remaining_prompt_length
        ), "Prefill chunk length is greater than remaining prompt length"

        # =========================================================================
        # Block allocation + prefix matching + prefill skipping
        # =========================================================================
        (
            matched_block_ids,
            num_blocks_from_pool,
            already_allocated_blocks,
            overall_required_blocks,
            prefix_skip_tokens,
            effective_prefill_chunk_length,
        ) = self._compute_prefix_match(req, prefill_chunk_length)
        num_matched_blocks = len(matched_block_ids)
        effective_kv_offset = req.finished_chunk_token_count + prefix_skip_tokens

        # Track prefix cache hits.
        if num_matched_blocks > 0:
            self.prefix_cache_hits += 1
            self.prefix_cache_blocks_matched += num_matched_blocks

        # Slice tokens to skip matched prefix
        this_round_tokens = req.remaining_prompt_tokens[prefix_skip_tokens:prefill_chunk_length]

        new_block_ids = None
        if num_blocks_from_pool > 0:
            new_block_ids = self.kv_block_allocator.allocate_memory_blocks(num_blocks_from_pool)
            if new_block_ids is None or len(new_block_ids) != num_blocks_from_pool:
                raise BlockOverflowError(req.request_id)

        # Increment ref counts and update timestamps for matched (shared) blocks
        if num_matched_blocks > 0:
            matched_tensor = torch.tensor(
                matched_block_ids, dtype=torch.int32, device=torch.cuda.current_device()
            )
            self.kv_block_allocator.block_ref_counts[matched_tensor] += 1
            if self.prefix_caching_eviction_policy == PrefixCachingEvictionPolicy.LRU:
                self.kv_block_allocator.update_timestamps(matched_tensor)

        # Note that we decremented the total_request_count for the chunked prefill request
        # in update_requests, so setting current_id to the total_request_count will again
        # make the last request the continuing chunked prefill request if one exists.
        current_id = self.total_request_count

        if current_id >= self.max_requests:
            raise RequestOverflowError(req.request_id)

        if self.active_token_count + effective_prefill_chunk_length > self.max_tokens:
            raise TokenOverflowError(req.request_id)

        self.request_ids[current_id] = req.request_id

        # Handle request metadata.
        assert (
            req.get_metadata_types() == self.request_metadata_types
        ), "Request added to context with invalid metadata types"
        metadata = req.tracked_metadata
        metadata_types = req.get_metadata_types()
        for m, m_type in zip(metadata, metadata_types):
            label, _, _ = m_type
            if not isinstance(m, torch.Tensor):
                m = torch.as_tensor(
                    m,
                    device=self.request_metadata[label].device,
                    dtype=self.request_metadata[label].dtype,
                )

            self.request_metadata[label][current_id] = m

        # Handle length and block assignments.
        self.request_query_lengths[current_id] = effective_prefill_chunk_length
        self.request_in_prefill_status_tensor[current_id] = 1
        self.request_output_lengths[current_id] = (
            req.finished_chunk_token_count
            + prefill_chunk_length
            + req.sampling_params.num_tokens_to_generate
        )

        # Assign blocks: matched blocks at [already_allocated, already_allocated + num_matched),
        # then newly allocated blocks after that.
        match_start = already_allocated_blocks
        new_block_start = already_allocated_blocks + num_matched_blocks
        if num_matched_blocks > 0:
            self.request_to_kv_block_ids[current_id][
                match_start : match_start + num_matched_blocks
            ] = matched_tensor
        if new_block_ids is not None:
            self.request_to_kv_block_ids[current_id][
                new_block_start : new_block_start + len(new_block_ids)
            ] = new_block_ids

        self.request_kv_length_offsets[current_id] = effective_kv_offset
        self.request_kv_block_counts[current_id] = overall_required_blocks
        self.request_last_kv_block_id[current_id] = self.request_to_kv_block_ids[current_id][
            overall_required_blocks - 1
        ]
        self.request_last_kv_block_offset[current_id] = (
            prefill_chunk_length + req.finished_chunk_token_count - 1
        ) % self.block_size_tokens

        token_offset_range = torch.arange(
            effective_kv_offset,
            effective_kv_offset + effective_prefill_chunk_length,
            device=self.token_to_pos_ids.device,
        )
        self.token_to_pos_ids[
            self.active_token_count : self.active_token_count + effective_prefill_chunk_length
        ] = token_offset_range
        self.token_to_input_ids[
            self.active_token_count : self.active_token_count + effective_prefill_chunk_length
        ] = this_round_tokens
        self.token_to_request_idx[
            self.active_token_count : self.active_token_count + effective_prefill_chunk_length
        ] = current_id
        self.token_to_position_in_request[
            self.active_token_count : self.active_token_count + effective_prefill_chunk_length
        ] = token_offset_range
        self.token_to_block_idx[
            self.active_token_count : self.active_token_count + effective_prefill_chunk_length
        ] = self.request_to_kv_block_ids[current_id][token_offset_range // self.block_size_tokens]
        self.token_to_local_position_within_kv_block[
            self.active_token_count : self.active_token_count + effective_prefill_chunk_length
        ] = (token_offset_range % self.block_size_tokens)

        # Register hashes for completely filled blocks (skip matched blocks).
        # Two disjoint ranges may need registration:
        #   Range 1: [previously_complete, min(already_allocated_blocks, num_complete_blocks))
        #       — the partial block from a prior chunk that this chunk's tokens completed
        #   Range 2: [already_allocated_blocks + num_matched_blocks, num_complete_blocks)
        #       — newly allocated blocks that are now complete
        if self.enable_prefix_caching and req.precomputed_block_hashes:
            total_tokens_after = req.finished_chunk_token_count + prefill_chunk_length
            num_complete_blocks = total_tokens_after // self.block_size_tokens
            previously_complete = req.finished_chunk_token_count // self.block_size_tokens

            def _register_range(start: int, end: int):
                if start >= end:
                    return
                block_ids_to_hash = self.request_to_kv_block_ids[current_id][start:end].tolist()
                block_hashes_slice = req.precomputed_block_hashes[start:end]
                self.kv_block_allocator.register_kv_block_hashes(
                    block_ids_to_hash, block_hashes_slice
                )

            # Range 1: prior-chunk partial block that this chunk just completed
            _register_range(previously_complete, min(already_allocated_blocks, num_complete_blocks))
            # Range 2: newly allocated (non-matched) blocks that are now complete
            _register_range(already_allocated_blocks + num_matched_blocks, num_complete_blocks)

        if self.is_hybrid_model and req.finished_chunk_token_count == 0:
            # Allocate a slot for Mamba states
            mamba_idx = self.mamba_metadata.allocate_slot()
            if mamba_idx is None:
                raise ContextOverflowError(req.request_id, "No Mamba slots available")
            self.mamba_metadata.request_to_mamba_state_idx[self.total_request_count] = mamba_idx

            # Restore Mamba state from the block corresponding to prefix_skip_tokens
            restore_block_count = prefix_skip_tokens // self.block_size_tokens
            restored = False
            if restore_block_count > 0 and self.mamba_slot_allocator is not None:
                restore_block_id = matched_block_ids[restore_block_count - 1]
                restored = self.mamba_slot_allocator.restore_to_live(
                    self.total_request_count, restore_block_id
                )
            if not restored:
                self.mamba_conv_states[:, mamba_idx] = 0.0
                self.mamba_ssm_states[:, mamba_idx] = 0.0

            # Compute intermediate offsets for state extraction during forward pass
            if self.mamba_slot_allocator is not None:
                self.mamba_slot_allocator.compute_and_store_offsets(
                    req,
                    current_id,
                    prefix_skip_tokens,
                    prefill_chunk_length,
                    num_matched_blocks,
                    matched_block_ids,
                    overall_required_blocks,
                )

        self.active_token_count += effective_prefill_chunk_length
        self.lifetime_prefill_token_count += effective_prefill_chunk_length
        self.total_request_count += 1
        self.num_prefill_requests += 1

    def _move_book_keeping_tensors(
        self, src_idxs, dst_idxs, next_tokens, new_speculative_tokens=None
    ):
        """
        Move all the relevent booking tensors with src idxs to dst idxs
        """
        self.request_kv_length_offsets[dst_idxs] = self.request_kv_length_offsets[src_idxs]
        self.request_in_prefill_status_tensor[dst_idxs] = self.request_in_prefill_status_tensor[
            src_idxs
        ]
        self.request_query_lengths[dst_idxs] = self.request_query_lengths[src_idxs]
        self.request_output_lengths[dst_idxs] = self.request_output_lengths[src_idxs]
        self.request_ids[dst_idxs] = self.request_ids[src_idxs]
        next_tokens[dst_idxs] = next_tokens[src_idxs]  # num tokens sames as num samples
        if new_speculative_tokens is not None:
            new_speculative_tokens[:, dst_idxs] = new_speculative_tokens[:, src_idxs]
        self.request_to_kv_block_ids[dst_idxs] = self.request_to_kv_block_ids[src_idxs]
        self.request_kv_block_counts[dst_idxs] = self.request_kv_block_counts[src_idxs]
        self.request_last_kv_block_id[dst_idxs] = self.request_last_kv_block_id[src_idxs]
        self.request_last_kv_block_offset[dst_idxs] = self.request_last_kv_block_offset[src_idxs]

        for metadata_tensor in self.request_metadata.values():
            metadata_tensor[dst_idxs] = metadata_tensor[src_idxs]

        if self.is_hybrid_model:
            self.mamba_metadata.request_to_mamba_state_idx[dst_idxs] = (
                self.mamba_metadata.request_to_mamba_state_idx[src_idxs]
            )

    def _swap_book_keeping_tensors(
        self, src_idxs, dst_idxs, next_tokens=None, new_speculative_tokens=None
    ):
        """
        Swaps all the relevent booking tensors with src idxs to dst idxs
        """
        tensor_swap(self.request_kv_length_offsets, src_idxs, dst_idxs)
        tensor_swap(self.request_query_lengths, src_idxs, dst_idxs)
        tensor_swap(self.request_in_prefill_status_tensor, src_idxs, dst_idxs)
        tensor_swap(self.request_output_lengths, src_idxs, dst_idxs)
        tensor_swap(self.request_ids, src_idxs, dst_idxs)
        tensor_swap(self.request_to_kv_block_ids, src_idxs, dst_idxs)
        tensor_swap(self.request_kv_block_counts, src_idxs, dst_idxs)
        tensor_swap(self.request_last_kv_block_id, src_idxs, dst_idxs)
        tensor_swap(self.request_last_kv_block_offset, src_idxs, dst_idxs)

        if next_tokens is not None:
            tensor_swap(next_tokens, src_idxs, dst_idxs)

        if new_speculative_tokens is not None:
            # new_speculative_tokens has request dimension as second dimension,
            # so swap on transposed view
            tensor_swap(new_speculative_tokens.t(), src_idxs, dst_idxs)

        for metadata_tensor in self.request_metadata.values():
            tensor_swap(metadata_tensor, src_idxs, dst_idxs)

        if self.is_hybrid_model:
            tensor_swap(self.mamba_metadata.request_to_mamba_state_idx, src_idxs, dst_idxs)

    def get_index_of_chunked_prefill_request(self, safe: bool = True) -> int:
        """
        Get the index of the chunked prefill request in the context.

        If `safe` is True, then clamp the search space to the current total request count.
        Otherwise, expand the search beyond the current total request count.

        Return:
            (int) Index of the chunked prefill request, or -1 if none exists.
        """
        if self.chunked_prefill_request_id == -1:
            return -1

        request_ids = self.request_ids
        if safe:
            request_ids = request_ids[: self.total_request_count]

        matches = torch.where(request_ids == self.chunked_prefill_request_id)[0]
        if len(matches) > 0:
            return matches[0].item()
        return -1

    def is_chunked_prefill_enabled(self) -> bool:
        """Returns whether chunked prefill is enabled."""
        if self.is_hybrid_model:
            return self.enable_chunked_prefill and not self.is_creating_cuda_graphs
        return self.enable_chunked_prefill

    def release_memory_blocks_from_request_indexes(self, request_indexes) -> None:
        """Release memory blocks used by the given request idxs.

        Args:
            request_indexes (torch.Tensor): Request indexes. (*Note*, NOT request
                ids.)
        """
        kv_blocks_assigned = self.request_to_kv_block_ids[request_indexes]
        non_zero_values_in_kv_memory = kv_blocks_assigned[kv_blocks_assigned != -1]
        self.kv_block_allocator.release_memory_blocks(non_zero_values_in_kv_memory)

        # Reset the KV blocks for finished requests.
        # Note: do not use fill_() (or add_() and similar inplace ops) here.
        # The combinition of indexing with a tensor (like finished_idxs) and
        # fill_()/add_() creates a clone and updates it instead of the original
        # tensor.
        self.request_to_kv_block_ids[request_indexes] = -1

        # Free Mamba slots.
        if self.is_hybrid_model:
            self.mamba_metadata.free_slots(request_indexes)

        # Clear intermediate offset entries for released requests
        if self.mamba_slot_allocator is not None:
            idx_list = (
                request_indexes.tolist() if hasattr(request_indexes, 'tolist') else request_indexes
            )
            for idx in idx_list:
                self.mamba_slot_allocator._intermediate_offsets[idx] = None
                self.mamba_slot_allocator._intermediate_block_ids[idx] = None
                self.mamba_slot_allocator._eos_cache_block_id[idx] = None

    def resume_paused_requests(
        self, active_request_count: int, newly_paused_request_ids: torch.Tensor
    ) -> tuple[int, torch.Tensor]:
        """Resume as many paused requests as we have space for in the active buffer.

        Args:
            active_request_count (int): Number of active requests.
            newly_paused_request_ids (torch.Tensor): List of newly paused request ids.
            next_tokens (torch.Tensor): Sampled tokens.

        Returns:
            (tuple[int, torch.Tensor]) active_request_count, newly_paused_request_ids.
        """

        # Assign released blocks to paused requests.
        # todo: @shanmugamr, un-pause requests using FIFO, rather than LIFO.
        resume_request_count = 0
        if self.paused_request_count > 0:
            active_block_count_avail = self.kv_block_allocator.get_active_avail()
            # Clone not needed: flip() makes a copy.
            paused_block_counts = self.request_kv_block_counts[: self.paused_request_count]
            # Flip counts before cumsum, since paused requests are resumed from
            # the right-most index, so we must count resumed blocks starting from
            # the right side.
            paused_block_counts = paused_block_counts.flip(dims=[0])

            # Check which paused requests will actually need a new block upon resuming
            offsets = self.request_last_kv_block_offset[: self.paused_request_count]
            needs_new_block = (
                offsets >= self.block_size_tokens - 1 - self.num_speculative_tokens
            ).to(paused_block_counts.dtype)
            needs_new_block = needs_new_block.flip(dims=[0])

            # Add +1 ONLY to the block counts of requests that finished their previous memory block
            paused_block_counts += needs_new_block
            paused_block_counts_cumsum = paused_block_counts.cumsum(dim=0)
            resume_request_count = min(
                torch.nonzero(paused_block_counts_cumsum <= active_block_count_avail).numel(),
                self.kv_block_allocator.total_avail,
            )

            # Constrain resumptions by the maximum allowed active requests and tokens
            max_allowed_active = min(
                self.max_requests, self.max_tokens // (self.num_speculative_tokens + 1)
            )
            allowed_to_resume = max(0, max_allowed_active - active_request_count)
            resume_request_count = min(resume_request_count, allowed_to_resume)

        self.paused_request_count -= resume_request_count
        active_request_count += resume_request_count

        # Resume requests by assigning blocks and updating bookkeeping tensors.
        if resume_request_count > 0:
            resume_start = self.paused_request_count
            resume_end = self.paused_request_count + resume_request_count

            # Check which resumed requests actually need a new block
            offsets = self.request_last_kv_block_offset[resume_start:resume_end]
            needs_new_block = offsets >= (self.block_size_tokens - 1 - self.num_speculative_tokens)
            num_new_blocks = needs_new_block.sum().item()

            if num_new_blocks > 0:
                assert num_new_blocks <= self.kv_block_allocator.total_avail
                block_ids = self.kv_block_allocator.allocate_memory_blocks(num_new_blocks)

                # Apply updates only to the requests that required a new block
                relative_row_idx = torch.nonzero(needs_new_block).squeeze(1)
                row_idx = resume_start + relative_row_idx
                col_idx = self.request_kv_block_counts[row_idx]

                self.request_to_kv_block_ids[row_idx, col_idx] = block_ids
                self.request_kv_block_counts[row_idx] += 1
                self.request_last_kv_block_id[row_idx] = block_ids

        # Remove resumed requests from newly_paused_request_ids. We do this by
        # truncating the end of newly_paused_request_ids, which works because we
        # resume requests in LIFO order. If resume_request_count >
        # len(newly_paused_request_ids), this means that none of the paused
        # requests are newly paused during this update.
        if newly_paused_request_ids is not None and resume_request_count > 0:
            newly_paused_request_ids = newly_paused_request_ids[:-resume_request_count]

        return active_request_count, newly_paused_request_ids

    def evict_overflow_paused_requests(
        self,
        active_request_count: int,
        next_tokens: torch.Tensor,
        new_speculative_tokens: Optional[torch.Tensor] = None,
    ) -> Optional[tuple[torch.Tensor, torch.Tensor]]:
        """Evict requests that overflow the paused buffer.

        Args:
            active_request_count (int): Number of active requests.
            next_tokens (torch.Tensor): Sampled tokens.

        Returns:
            (torch.Tensor) Evicted request ids.
        """

        # Overflow paused block count.
        overflow_paused_block_count = (
            self.kv_block_allocator.get_paused_used() - self.kv_block_allocator.paused_count
        )

        # Nothing to evict?
        if overflow_paused_block_count <= 0:
            return None

        # Overflow paused block count.
        paused_block_counts = self.request_kv_block_counts[: self.paused_request_count]
        paused_block_counts_cumsum = paused_block_counts.cumsum(dim=0)
        valid_paused_request_count = torch.nonzero(
            paused_block_counts_cumsum <= self.kv_block_allocator.paused_count
        ).numel()
        overflow_paused_request_count = self.paused_request_count - valid_paused_request_count

        # Nothing to evict? (Similar to checking overflow_paused_block_count
        # above, but here we allow up to one paused request to overflow into the
        # active buffer.
        if overflow_paused_request_count == 0:
            return None

        # Evict request count. (Flip paused_block_counts because evictions are
        # counted from the right-most paused requests.
        paused_block_counts = paused_block_counts[-overflow_paused_request_count:].flip(dims=[0])
        paused_block_counts_cumsum = paused_block_counts.cumsum(dim=0)
        remaining_paused_request_counts = torch.arange(
            overflow_paused_request_count - 1,
            -1,
            -1,
            dtype=paused_block_counts_cumsum.dtype,
            device=torch.cuda.current_device(),
        )
        net_block_counts = paused_block_counts_cumsum - remaining_paused_request_counts
        evict_request_count = torch.nonzero(net_block_counts >= 0)[0].item() + 1

        # Eviction index range.
        evict_start_idx = self.paused_request_count - evict_request_count
        evict_end_idx = self.paused_request_count
        evict_request_idxs = torch.arange(
            evict_start_idx, evict_end_idx, device=torch.cuda.current_device()
        )
        # Clone needed: subsequent release_memory_blocks_from_request_indexes and
        # _swap_book_keeping_tensors calls mutate self.request_ids in place.
        evict_request_ids = self.request_ids[evict_start_idx:evict_end_idx].clone()

        # Release memory.
        self.release_memory_blocks_from_request_indexes(evict_request_idxs)

        # Move evicted requests to the right of active requests, while minimizing
        # movement.
        if evict_request_count < active_request_count:
            # Swap all evicted requests with right-most active requests.
            src_idxs = torch.arange(
                self.paused_request_count - evict_request_count,
                self.paused_request_count,
                device=torch.cuda.current_device(),
            )
            dst_idxs = torch.arange(
                self.total_request_count - evict_request_count,
                self.total_request_count,
                device=torch.cuda.current_device(),
            )
        else:
            # Swap all active requests with left-most evicted requests.
            src_idxs = torch.arange(
                self.paused_request_count - evict_request_count,
                self.paused_request_count - evict_request_count + active_request_count,
                device=torch.cuda.current_device(),
            )
            dst_idxs = torch.arange(
                self.paused_request_count,
                self.paused_request_count + active_request_count,
                device=torch.cuda.current_device(),
            )

        # Swap evicted and active requests.
        self._swap_book_keeping_tensors(
            src_idxs=src_idxs,
            dst_idxs=dst_idxs,
            next_tokens=next_tokens,
            new_speculative_tokens=new_speculative_tokens,
        )

        # Update tracking vars.
        self.paused_request_count -= evict_request_count
        self.total_request_count -= evict_request_count

        # Reset unused block ids.
        evict_slice = slice(
            self.total_request_count, self.total_request_count + evict_request_count
        )
        self.request_to_kv_block_ids[evict_slice] = -1
        if self.is_hybrid_model:
            self.mamba_metadata.request_to_mamba_state_idx[evict_slice] = -1

        return evict_request_ids

    def update_requests(
        self,
        active_requests_mask: Tensor,
        new_tokens: Tensor,
        new_speculative_tokens: Tensor = None,
    ) -> Tensor:
        """Update context state after calling engine.step().

        This method is responsible for:
        - Update prefill requests to decode requests.
        - Persist decode requests as decode requests.
        - Terminate requests by length or termination id.

        *Note*: All bookkeeping tensors (i.e., `self.request_*`) are laid out
        contiguously, with a conceptual division between paused requests on the
        'left' (or, lower indices) and active requests in the 'middle' (or, middle
        indices) and completed requests on the 'right' (or, higher indices). The integers
        `paused_request_count` and `total_request_count`  are used to track the boundaries
        between these request groups.
        - 0:paused_request_count -> paused requests
        - paused_request_count:total_request_count -> active requests
        - total_request_count:max_requests -> completed requests are moved here.
        The reason for maintaining contiguous tensors rather than multiple
        smaller (e.g., per-group or per-request) tensors is for both 1) speed
        (avoid unnecessary tensor allocations), and 2) compatibility with the
        Flash Attention kernels, which packed contiguous tensors.

        The following happens in this code :
        1. The active token mask tells us which requests are still active and which are completed
        2. If no paused requests are present and no active requests we release all memory and reset.
        3. Concatenate the paused tokens to the active tokens
        4. For the finished requests we release memory blocks and move them to the right
        5. We identify requests that require a new block and add them to the paused requests (i.e move them left)
        6. Resume paused requests & evict overflowing paused requests.
        7. We make changes to the request book keeping tesnsors and setup the tokens for next iteration
        8. We make relevant changes to the token bookkeeping tensors

        Args:
            active_requests_mask (Tensor): 1D Mask tensor marking active requests. (Active request length)
            new_tokens (Tensor): Newly sampled tokens, with one token per active request. (Active request length)
            new_speculative_tokens (Tensor): Newly sampled speculative tokens,
                with num_speculative tokens per active request.
                (num_speculative_tokens, active_request_length)

        Return:
            (Tensor) Newly paused request IDs.
        """
        # 1. The active token mask tells us which requests are still active and which are completed
        # active_request_count -> This corresponds to requests that have not reached EOD or max length
        # finished_request_count are requests that have reached the termination criterion

        self.num_prefill_requests = 0  # all turns to decode
        # All request that were in prefill become decode requests.
        # For the chunked prefill request we will overwrite this the next time add_request
        # is called on that request.
        self.request_in_prefill_status_tensor[self.request_in_prefill_status_tensor == 1] = 0

        if (
            chunked_prefill_request_idx := self.get_index_of_chunked_prefill_request(safe=True)
        ) != -1:
            # Chunked prefill request was active this step.
            # We must keep it active so that the next iteration will add a new chunk to it.
            active_requests_mask[-1] = 1

        active_request_count = (active_requests_mask == 1).sum().item()
        finished_request_count = (active_requests_mask == 0).sum().item()
        assert (
            active_request_count + finished_request_count + self.paused_request_count
            == self.total_request_count
        )

        # Reset attention state.
        self.reset_attention_state()

        # Update total_request_count.
        self.total_request_count = active_request_count + self.paused_request_count

        # 2. If no paused requests are present and no active requests we release memory and reset.
        # Note that this requires no pending chunked prefill request
        if (
            active_request_count + self.paused_request_count == 0
            and self.get_index_of_chunked_prefill_request(safe=False) == -1
        ):
            if finished_request_count > 0:
                finished_idxs = (
                    torch.nonzero(active_requests_mask == 0, as_tuple=True)[0]
                    + self.paused_request_count
                )
                self.release_memory_blocks_from_request_indexes(finished_idxs)

            # Reset request/token counts.
            self.request_to_kv_block_ids.fill_(-1)
            self.total_request_count = 0
            self.active_token_count = 0

            # Reset Mamba state.
            self.reset_mamba_state()
            return

        # 3. Concatenate the paused tokens to the active tokens if present.
        if self.paused_request_count != 0:
            assert self.paused_tokens is not None
            next_tokens = torch.cat((self.paused_tokens, new_tokens))
            if new_speculative_tokens is not None and self.paused_speculative_tokens is not None:
                new_speculative_tokens = torch.cat(
                    (self.paused_speculative_tokens, new_speculative_tokens), dim=1
                )
        else:
            next_tokens = new_tokens

        # 4. For the finished requests we release memory blocks and move them to the right:-
        #       a) Release all their memory
        #       b) Swap them to the right, so that we have this order [Paused, Active, Finished]
        if finished_request_count > 0:
            finished_idxs = (
                torch.nonzero(active_requests_mask == 0, as_tuple=True)[0]
                + self.paused_request_count
            )
            self.release_memory_blocks_from_request_indexes(finished_idxs)

            if active_request_count > 0:
                finished_idxs_on_left = (
                    torch.nonzero(active_requests_mask[:active_request_count] == 0, as_tuple=True)[
                        0
                    ]
                    + self.paused_request_count
                )
                active_idxs_on_right = (
                    torch.nonzero(active_requests_mask[active_request_count:], as_tuple=True)[0]
                    + active_request_count
                    + self.paused_request_count
                )

                self._move_book_keeping_tensors(
                    src_idxs=active_idxs_on_right,
                    dst_idxs=finished_idxs_on_left,
                    next_tokens=next_tokens,
                    new_speculative_tokens=new_speculative_tokens,
                )

                # Reset chunk ids for recently moved requests.
                self.request_to_kv_block_ids[active_idxs_on_right] = -1
                if self.is_hybrid_model:
                    self.mamba_metadata.request_to_mamba_state_idx[active_idxs_on_right] = -1

        # 5. We identify requests that require a new block and add them to the paused requests (i.e move them left) :-
        #       a) Put requests that have filled their current block and  require a new one in a pause state temporarily
        #       b) Move the paused requests to the left, and active requets to the right
        #       c) Update the paused request count and active_request_count appropriately
        newly_paused_request_ids = None
        if active_request_count > 0:
            num_tokens_in_last_block = self.request_last_kv_block_offset[
                self.paused_request_count : (active_request_count + self.paused_request_count)
            ]
            active_requests_requiring_new_block = (
                num_tokens_in_last_block >= self.block_size_tokens - 1 - self.num_speculative_tokens
            ).byte()

            # Find the id in request_ids that is the chunked_prefill_request_id. Only one request should be chunked.
            if (
                chunked_prefill_request_idx := self.get_index_of_chunked_prefill_request(safe=True)
            ) != -1:
                active_requests_requiring_new_block[
                    chunked_prefill_request_idx - self.paused_request_count
                ] = 0  # chunked prefill should not be paused
            else:
                max_allowed_active = min(
                    self.max_requests, self.max_tokens // (self.num_speculative_tokens + 1)
                )
                if active_request_count > max_allowed_active:
                    # Force-pause excess requests in a decode-only batch
                    active_requests_requiring_new_block[max_allowed_active:] = 1

            active_requests_requiring_new_block_count = (
                (active_requests_requiring_new_block == 1).sum().item()
            )

            if active_requests_requiring_new_block_count > 0:
                newly_paused_request_ids = self.request_ids[
                    torch.nonzero(active_requests_requiring_new_block) + self.paused_request_count
                ]

            # Swap unfinished active requests on the left side with paused requests on the right side
            # NOTE : We add paused request count because we concatenate
            # paused tokens to the left at the beginning of update requests
            if (
                active_requests_requiring_new_block_count > 0
                and active_requests_requiring_new_block_count != active_request_count
            ):
                active_request_ids_on_left = (
                    torch.nonzero(
                        active_requests_requiring_new_block[
                            :active_requests_requiring_new_block_count
                        ]
                        == 0,
                        as_tuple=True,
                    )[0]
                    + self.paused_request_count
                )
                paused_requests_idxs_on_right = (
                    torch.nonzero(
                        active_requests_requiring_new_block[
                            active_requests_requiring_new_block_count:
                        ],
                        as_tuple=True,
                    )[0]
                    + active_requests_requiring_new_block_count
                    + self.paused_request_count
                )
                dst_idxs = torch.cat((active_request_ids_on_left, paused_requests_idxs_on_right))
                src_idxs = torch.cat((paused_requests_idxs_on_right, active_request_ids_on_left))
                self._move_book_keeping_tensors(
                    src_idxs=src_idxs,
                    dst_idxs=dst_idxs,
                    next_tokens=next_tokens,
                    new_speculative_tokens=new_speculative_tokens,
                )

            self.paused_request_count += active_requests_requiring_new_block_count
            active_request_count -= active_requests_requiring_new_block_count

        # 6. Now that we have the requests in following order [Paused, Active, Finished]
        # We determine how many requests we can resume and resume them

        # For multi-token generation: store previous block IDs BEFORE resume allocates new blocks.
        # This allows us to know which block tokens should go to if they don't cross the boundary.
        # After resume_paused_requests, request_last_kv_block_id will be updated to the NEW block
        # for resumed requests, but we need the OLD block for tokens that don't cross.
        prev_last_block_ids = None
        if self.num_speculative_tokens > 0:
            # Clone needed: resume_paused_requests mutates request_last_kv_block_id
            # (assigns new block IDs), but we need the old values later to determine
            # which block tokens should go to when they don't cross a block boundary.
            prev_last_block_ids = self.request_last_kv_block_id.clone()

        # 6.a. First, resume temporarily paused requests.
        active_request_count, newly_paused_request_ids = self.resume_paused_requests(
            active_request_count, newly_paused_request_ids
        )

        # 6.b. Evict requests that overflow the paused buffer.
        evict_request_ids = self.evict_overflow_paused_requests(
            active_request_count, next_tokens, new_speculative_tokens
        )

        # 6.c. Resume any additional requests.
        active_request_count, newly_paused_request_ids = self.resume_paused_requests(
            active_request_count, newly_paused_request_ids
        )

        assert active_request_count > 0 or self.chunked_prefill_request_id != -1, (
            "active_request_count == %d with no hidden chunked prefill." % active_request_count
        )

        # 6.d. Swap the chunked prefill request to the end of the active requests
        # to obey the invariance.
        if (
            chunked_prefill_request_idx := self.get_index_of_chunked_prefill_request(safe=False)
        ) != -1:
            if chunked_prefill_request_idx < self.total_request_count:
                # Chunked prefill request was active this step.
                # Swap to the end of active, then hide it out of bounds.
                self._swap_book_keeping_tensors(
                    src_idxs=torch.tensor(
                        [chunked_prefill_request_idx], device=self.request_ids.device
                    ),
                    dst_idxs=torch.tensor(
                        [self.total_request_count - 1], device=self.request_ids.device
                    ),
                    next_tokens=next_tokens,
                    new_speculative_tokens=new_speculative_tokens,
                )

                # Explicitly decrement the active and total request counts here so that the chunked
                # prefill request metadata is not updated. This will all be restored when the next
                # chunk is added through add_request.
                active_request_count -= 1
                self.total_request_count -= 1
            else:
                # Chunked prefill request was inactive/hidden this step.
                # Pull it to the new boundary so it doesn't drift.
                if chunked_prefill_request_idx != self.total_request_count:
                    self._swap_book_keeping_tensors(
                        src_idxs=torch.tensor(
                            [chunked_prefill_request_idx], device=self.request_ids.device
                        ),
                        dst_idxs=torch.tensor(
                            [self.total_request_count], device=self.request_ids.device
                        ),
                        next_tokens=None,  # Do not swap next_tokens as these indices are out of bounds
                        new_speculative_tokens=None,
                    )

        # 7. We make changes to the request book keeping tesnsors and setup the tokens for next iteration
        assert self.total_request_count == active_request_count + self.paused_request_count

        if self.paused_request_count > 0:
            # Clone needed: next_tokens is a shared buffer that will be overwritten in
            # the next iteration; paused_tokens must persist independently.
            self.paused_tokens = next_tokens[: self.paused_request_count].clone()
            if new_speculative_tokens is not None:
                # Clone needed: same reason as paused_tokens above.
                self.paused_speculative_tokens = new_speculative_tokens[
                    :, : self.paused_request_count
                ].clone()

        # add_ and fill_ calls seems to work as intended with sliced indexing
        # (i.e. x[3:5].add(...) or x[3:5].fill_) but when another tensor is used
        # for indexing, it does not work as expected (i.e. x[y] if x and y are torch tensors)
        self.request_kv_length_offsets[self.paused_request_count : self.total_request_count].add_(
            self.request_query_lengths[self.paused_request_count : self.total_request_count]
        )

        num_generated_tokens = 1 + self.num_speculative_tokens
        self.request_query_lengths[self.paused_request_count : self.total_request_count].fill_(
            num_generated_tokens
        )

        # Clone needed: old_offsets is reused later to compute raw_positions
        # for block-boundary detection. The write-back on the next line overwrites the
        # underlying tensor, so without clone the boundary-crossing logic would see the
        # new offsets instead of the pre-update values.
        old_offsets = self.request_last_kv_block_offset[
            self.paused_request_count : self.total_request_count
        ].clone()

        self.request_last_kv_block_offset[self.paused_request_count : self.total_request_count] = (
            old_offsets + num_generated_tokens
        ) % self.block_size_tokens

        self.active_token_count = active_request_count * num_generated_tokens
        sampled_tokens = next_tokens[self.paused_request_count : self.total_request_count]

        if self.num_speculative_tokens > 0:
            # new_speculative_tokens has shape [num_spec_tokens, num_requests],
            # slice the request dimension (dim 1)
            sampled_speculative_tokens = new_speculative_tokens[
                :, self.paused_request_count : self.total_request_count
            ]
            # This will become [sampled, spec1, spec2, sampled, spec1, spec2 ...]
            # For every request we will have the sampled token followed by the
            # speculative tokens (i.e next indices)
            next_tokens = torch.vstack(
                [sampled_tokens.unsqueeze(0), sampled_speculative_tokens]
            ).T.reshape(-1)
        else:
            next_tokens = sampled_tokens

        self.token_to_input_ids[: self.active_token_count] = next_tokens

        # Req kv length offsets : [0, 5, 10 ... ]
        # For num spec tokens = 2 , this will become [0, 1, 2, 5, 6, 7 10, 11, 12 ...]
        self.token_to_pos_ids[: self.active_token_count] = self.request_kv_length_offsets[
            self.paused_request_count : self.total_request_count
        ].repeat_interleave(num_generated_tokens) + torch.arange(
            num_generated_tokens, device=torch.cuda.current_device()
        ).repeat(
            active_request_count
        )
        #
        # Token to request idx : [0, 0, 0, 1, 1, 1, 2, 2, 2 ...]
        self.token_to_request_idx[: self.active_token_count] = torch.arange(
            self.paused_request_count, self.total_request_count, device=torch.cuda.current_device()
        ).repeat_interleave(num_generated_tokens)

        self.token_to_position_in_request[: self.active_token_count] = self.token_to_pos_ids[
            : self.active_token_count
        ]

        self.token_to_local_position_within_kv_block[: self.active_token_count] = (
            self.token_to_pos_ids[: self.active_token_count] % self.block_size_tokens
        )

        current_block_ids = self.request_last_kv_block_id[
            self.paused_request_count : self.total_request_count
        ]

        # raw positions shape : [active_request_count, num_generated_tokens]
        # e.g block size 6, old_offsets = [1,5,2] , num_generated_tokens = 3
        # raw_positions = [[1, 2, 3], [5, 6, 7], [2, 3, 4]]
        # crosses_boundary = [[False, False, False], [False, True, True], [False, False, False]]
        raw_positions = (
            old_offsets[:, None]
            + 1  # Offset by 1 because old_offsets points to the LAST token
            + torch.arange(num_generated_tokens, device=torch.cuda.current_device())[None, :]
        )
        #
        # A token crosses to the next block if its raw_position >= block_size
        crosses_boundary = raw_positions >= self.block_size_tokens

        if not crosses_boundary.any() or self.num_speculative_tokens == 0:
            # Fast path: no tokens cross block boundary, all use current block
            self.token_to_block_idx[: self.active_token_count] = self.request_last_kv_block_id[
                self.paused_request_count : self.total_request_count
            ].repeat_interleave(num_generated_tokens)
        else:

            # Some tokens cross to the next block (this happens for resumed requests)
            #
            # When a request is paused and resumed:
            # 1. It was paused because remaining_space < num_tokens_per_step
            # 2. A NEW block is allocated in resume_paused_requests
            # 3. request_last_kv_block_id is updated to the NEW block
            # 4. The old offset is preserved (wasn't reset)
            #
            # So for resumed requests:
            # - Tokens before the boundary (raw_pos < block_size): go to PREVIOUS block
            # - Tokens at/after the boundary (raw_pos >= block_size): go to CURRENT (new) block
            #
            # For non-resumed requests (no boundary crossing): all go to current block
            #
            # We use prev_last_block_ids which was stored BEFORE resume_paused_requests
            # was called, so it contains the OLD block IDs before new blocks were allocated.

            # Get previous block IDs (stored before resume_paused_requests)
            prev_block_ids = prev_last_block_ids[
                self.paused_request_count : self.total_request_count
            ]  # [active_count]

            # For each request, check if ANY token crosses (i.e., request was resumed)
            request_has_crossing = crosses_boundary.any(dim=1)  # [active_count]

            # Build block_idx: [active_count, N]
            # Start with current (new) block for all
            # Lets say current block ids is [a1, a2 , a3] and num generated_tokens is 3
            # This will be [[a1, a1, a1], [a2, a2, a2], [a3, a3, a3]]
            # No clone needed: expand() returns a read-only view, and downstream
            # torch.where() and .flatten() both return new tensors without in-place mutation.
            block_idx = current_block_ids[:, None].expand(
                -1, num_generated_tokens
            )  # [active_count, N]

            # For requests that have crossing, tokens BEFORE boundary use prev block
            # crosses_boundary is False for tokens before boundary
            # So: where request_has_crossing AND NOT crosses_boundary, use prev_block
            use_prev_block = request_has_crossing[:, None] & ~crosses_boundary  # [active_count, N]

            # Apply previous block IDs where needed
            prev_block_ids_expanded = prev_block_ids[:, None].expand(-1, num_generated_tokens)
            block_idx = torch.where(use_prev_block, prev_block_ids_expanded, block_idx)

            # Convert back to 1d tensor
            self.token_to_block_idx[: self.active_token_count] = block_idx.flatten()

        return {
            "newly_paused_request_ids": newly_paused_request_ids,
            "evict_request_ids": evict_request_ids,
        }

    def calculate_log_probs(
        self, logits: Tensor, new_tokens: Tensor, only_last_token_logits: Optional[bool] = False
    ) -> Tuple[List[List[float]], Tensor]:
        """Calculate log probs for all active requests and return them.

        TODO: @wdykas support top-n log probs.

        Args:
            logits (Tensor): Raw model output logits with shape [1, sequence_length, vocab_size].
            new_tokens (Tensor): The newly sampled tokens.
            only_last_token_logits (bool): If set, the logits are from only the last token in each request

        Returns:
            List of lists where each inner list contains log probs for a request in the
            same order as the active requests (from paused_request_count to total_request_count).
            log_probs (Tensor): Used to compute top n logprobs later if required.
        """

        # Calculate log_probs (sequence_length x vocab_size)
        logits_squeezed = logits.squeeze(0).float()

        if only_last_token_logits or self.is_decode_only():
            seq_idx = torch.arange(len(new_tokens), dtype=torch.int32, device=logits.device)
            log_probs = F.log_softmax(logits_squeezed[seq_idx], dim=-1)
            selected_log_probs = log_probs[seq_idx, new_tokens]
            return [[lp] for lp in selected_log_probs.tolist()], log_probs

        log_probs = F.log_softmax(logits_squeezed, dim=-1)
        # Get the selected token ids for all tokens.
        # We shift the active token window left by one to remove the first prompt token for
        # prefill requests and then set the token ids explicitly for the newly generated tokens.
        # This is necessary because we calculate the log probs *before* updating the request metadata.
        #
        # Example (decode & prefill mix):
        #
        #   active_query_lengths: [ 1 | 1 | 2 | 5 ]
        #
        #   new_tokens          : [ 52 | 12 | 3 | 86 ]
        #
        #   seq_idx             : [ 0 | 1 | 2 3 | 4 5 6 7 8 ]
        #
        #   new_token_idx       : [ 0 | 1 | 3 | 8 ]
        #
        #   active_token_ids before left shift:
        #                       : [ 31 | 75 | 45 16 | 90 12 72 24 88 ]
        #
        #   active_token_ids after shift:
        #                       : [ XX | XX | 16 XX | 12 72 24 88 XX ]   (XX = undefined)
        #
        #   active_token_ids[new_token_idx] = new_tokens
        #                       : [ 52 | 12 | 16  3 | 12 72 24 88 86 ]
        active_token_ids = self.token_to_input_ids[: self.active_token_count].roll(-1, 0)
        active_query_lengths = self.request_query_lengths[
            self.paused_request_count : self.total_request_count
        ]

        new_token_idx = active_query_lengths.cumsum(0) - 1
        active_token_ids[new_token_idx] = new_tokens

        # Extract the log probs for only the selected tokens.
        # (sequence_length x vocab_size) -> (sequence_length)
        seq_idx = torch.arange(self.active_token_count, device=log_probs.device)
        selected_log_probs = log_probs[seq_idx, active_token_ids]

        # Split the log probs across request boundaries
        selected_log_probs_list = selected_log_probs.cpu().split(
            active_query_lengths.tolist(), dim=0
        )

        # Convert each log prob tensor into a list
        return [lp.tolist() for lp in selected_log_probs_list], log_probs

    def get_kvcache_utilization_stats(self) -> dict:
        """Compute KV cache buffer utilization stats for the current step.

        Returns a dictionary with counts and percentages for both allocated block
        usage (overall buffer occupancy) and active usage (blocks referenced by
        currently active requests this step).

        Return:
            {
            'total_blocks': int,
            'allocated_blocks': int,
            'active_unique_blocks': int,
            'allocated_utilization': float,
            'active_utilization': float,
            'active_request_count': int,
            'paused_request_count': int,
            'gtd_block_count': int,
            }
        """
        # Total usable blocks exclude the reserved dummy block.
        total_blocks = max(self.kv_block_allocator.total_count - 1, 1)
        block_count_avail = int(self.kv_block_allocator.total_avail)

        # Overall allocated blocks in the buffer right now.
        allocated_blocks = (self.kv_block_allocator.total_count - 1) - block_count_avail
        allocated_blocks = int(max(0, allocated_blocks))

        # Active unique blocks referenced by current active requests only.
        active_start = self.paused_request_count
        active_end = self.total_request_count
        if active_end > active_start:
            active_rows = self.request_to_kv_block_ids[active_start:active_end]
            # Filter valid block ids (>= 0) and count unique ids.
            valid_ids = active_rows[active_rows >= 0]
            if valid_ids.numel() > 0:
                unique_ids = torch.unique(valid_ids)
                active_unique_blocks = int(unique_ids.numel())
            else:
                active_unique_blocks = 0
        else:
            active_unique_blocks = 0

        allocated_utilization = float(allocated_blocks) / float(total_blocks)
        active_utilization = float(active_unique_blocks) / float(total_blocks)

        # Diagnostic helpers
        total_request_count = int(self.total_request_count)
        return {
            'total_blocks': int(total_blocks),
            'allocated_blocks': int(allocated_blocks),
            'active_unique_blocks': int(active_unique_blocks),
            'allocated_utilization': allocated_utilization,
            'active_utilization': active_utilization,
            'active_request_count': int(self.get_active_request_count()),
            'paused_request_count': int(self.paused_request_count),
            'block_count_avail': int(block_count_avail),
            'active_token_count': int(self.active_token_count),
            'total_request_count': int(total_request_count),
            'max_requests': int(self.max_requests),
        }


================================================
FILE: megatron/core/inference/contexts/fused_kv_append_kernel.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from torch import Tensor

try:
    import triton
    import triton.language as tl

    HAVE_TRITON = True
except ImportError:
    from unittest.mock import MagicMock

    from megatron.core.utils import null_decorator

    triton = MagicMock()
    triton.jit = null_decorator
    tl = MagicMock()
    HAVE_TRITON = False


@triton.jit
def _append_kv_cache_kernel(
    # --- Pointers to Tensors ---
    key_ptr,
    value_ptr,
    key_cache_ptr,
    value_cache_ptr,
    block_idx_ptr,
    local_kv_seq_idx_ptr,
    # --- Strides for Tensor Memory Layout ---
    stride_key_token,
    stride_key_head,
    stride_key_hdim,
    stride_value_token,
    stride_value_head,
    stride_value_hdim,
    stride_cache_block,
    stride_cache_pos,
    stride_cache_head,
    stride_cache_hdim,
    # --- Other Parameters ---
    n_tokens: tl.int32,
    num_heads: tl.int32,
    H_DIM: tl.int32,
    # --- Compile-Time Constants ---
    BLOCK_SIZE_H: tl.constexpr,
):
    """
    Triton kernel to append key and value vectors to pre-sliced paged KV cache tensors.

    Each program instance handles one head of one token. The grid is 2D: (n_tokens, num_heads).

    1. It identifies which token and head it is responsible for using `tl.program_id`.
    2. It loads the `block_idx` and `local_pos` for that token.
    3. It loads the `h_dim` vector for its assigned key/value head.
    4. It calculates the destination address in the 4D cache slices.
    5. It writes (scatters) the head vector to its destination in the cache.
    """

    token_idx = tl.program_id(0)
    head_idx = tl.program_id(1)

    if token_idx >= n_tokens or head_idx >= num_heads:
        return

    # --- Load destination indices for the current token ---
    block_idx = tl.load(block_idx_ptr + token_idx)
    local_pos = tl.load(local_kv_seq_idx_ptr + token_idx)

    # --- Load the key and value data for the current head of the current token ---
    offs_h = tl.arange(0, BLOCK_SIZE_H)
    mask_h = offs_h < H_DIM

    key_head_ptr = key_ptr + token_idx * stride_key_token + head_idx * stride_key_head
    value_head_ptr = value_ptr + token_idx * stride_value_token + head_idx * stride_value_head

    key_to_write = tl.load(key_head_ptr + offs_h * stride_key_hdim, mask=mask_h, other=0.0)
    value_to_write = tl.load(value_head_ptr + offs_h * stride_value_hdim, mask=mask_h, other=0.0)

    # --- Calculate destination pointers in the 4D KV cache slices ---
    dest_offset = (
        block_idx * stride_cache_block + local_pos * stride_cache_pos + head_idx * stride_cache_head
    )

    key_dest_ptr = key_cache_ptr + dest_offset
    value_dest_ptr = value_cache_ptr + dest_offset

    # --- Store the head data into the cache ---
    tl.store(key_dest_ptr + offs_h * stride_cache_hdim, key_to_write, mask=mask_h)
    tl.store(value_dest_ptr + offs_h * stride_cache_hdim, value_to_write, mask=mask_h)


def triton_append_key_value_cache(
    layer_number: int,
    key: Tensor,
    value: Tensor,
    memory_buffer: Tensor,
    padded_active_token_count: int,
    token_to_block_idx: Tensor,
    token_to_local_position_within_kv_block: Tensor,
) -> None:
    """
    Append to KV cache using a high-performance, standalone Triton kernel.

    Args:
        layer_number (int): Layer number (1-based).
        key (Tensor): Key tensor of shape (batch_size, 1, num_heads, h_dim).
        value (Tensor): Value tensor of shape (batch_size, 1, num_heads, h_dim).
        memory_buffer (Tensor): The 6D KV cache tensor to write to.
        padded_active_token_count (int): The number of active tokens to process.
        token_to_block_idx (Tensor): Tensor mapping token index to its block index in
        the cache.
        token_to_local_position_within_kv_block (Tensor): Tensor mapping token index
        to its position within a block.
    """
    # --- Input Validation and Preparation ---
    assert (
        key.device.type == 'cuda'
        and value.device.type == 'cuda'
        and memory_buffer.device.type == 'cuda'
    ), "All tensors must be on CUDA devices."

    assert (
        key.size(1) == 1 and value.size(1) == 1
    ), "Key and Value should have a sequence length of 1."
    key = key.squeeze(1)
    value = value.squeeze(1)

    n_tokens = padded_active_token_count
    if n_tokens == 0:
        return

    _, num_heads, h_dim = key.shape

    key_cache = memory_buffer[0, layer_number]
    value_cache = memory_buffer[1, layer_number]

    key_to_cache = key[:n_tokens]
    value_to_cache = value[:n_tokens]
    block_idx_active = token_to_block_idx[:n_tokens]
    local_kv_seq_idx_active = token_to_local_position_within_kv_block[:n_tokens]

    assert (
        key_cache.dim() == 4 and value_cache.dim() == 4
    ), f"Sliced key_cache and value_cache should be 4D"
    assert (
        num_heads == key_cache.shape[-2]
    ), f"Head count mismatch. Key/Value has {num_heads} but cache expects {key_cache.shape[-2]}."
    assert (
        h_dim == key_cache.shape[-1]
    ), f"Head dimension mismatch. Key/Value has {h_dim} but cache expects {key_cache.shape[-1]}."

    block_idx_active = block_idx_active.contiguous()
    local_kv_seq_idx_active = local_kv_seq_idx_active.contiguous()

    grid = (n_tokens, num_heads)
    BLOCK_SIZE_H = triton.next_power_of_2(h_dim)

    cache_strides = key_cache.stride()

    _append_kv_cache_kernel[grid](
        # Pointers
        key_to_cache,
        value_to_cache,
        key_cache,
        value_cache,
        block_idx_active,
        local_kv_seq_idx_active,
        # Strides for 3D key/value tensors
        key_to_cache.stride(0),
        key_to_cache.stride(1),
        key_to_cache.stride(2),
        value_to_cache.stride(0),
        value_to_cache.stride(1),
        value_to_cache.stride(2),
        # Strides for the 4D sliced cache
        cache_strides[0],
        cache_strides[1],
        cache_strides[2],
        cache_strides[3],
        # Other parameters
        n_tokens=n_tokens,
        num_heads=num_heads,
        H_DIM=h_dim,
        # Compile-time constant
        BLOCK_SIZE_H=BLOCK_SIZE_H,
    )


================================================
FILE: megatron/core/inference/contexts/kv_block_allocator.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from collections import deque
from typing import Callable, Dict, Optional

import torch
from torch import Tensor

from megatron.core.inference.config import PrefixCachingEvictionPolicy


class KVBlockAllocator:
    """Allocator that manages blocks of memory for the KV cache.

    This allocator is responsible for:
    - Initializing a pool of block IDs
    - Allocating blocks from the pool
    - Releasing blocks back to the pool

    Args:
        context (DynamicInferenceContext): Dynamic inference context.
        total_count (int): Total number of blocks in the buffer.
        paused_count (int): Number of paused blocks in the buffer. Must be less
            than `total_count`.
    """

    def __init__(
        self,
        context: "DynamicInferenceContext",
        total_count: int,
        paused_count: int,
        enable_prefix_caching: bool = False,
        prefix_caching_eviction_policy: PrefixCachingEvictionPolicy = (
            PrefixCachingEvictionPolicy.REF_ZERO
        ),
    ):

        self.context = context
        self.enable_prefix_caching = enable_prefix_caching
        self.prefix_caching_eviction_policy = prefix_caching_eviction_policy
        self.on_blocks_deregistered: Optional[Callable] = None

        self.total_count = total_count
        self.total_avail = total_count - 1  # -1 for dummy_block_idx (see below)
        self.paused_count = paused_count
        self.active_count = total_count - paused_count - 1  # -1 for dummy_block_idx
        assert self.active_count >= 1  # ensures paused_count < total_count - 1
        self.dummy_block_idx = self.total_count - 1

        # Initialize block pool as a "stack" data structure
        self.block_bag = torch.arange(
            self.total_count, dtype=torch.int32, device=torch.cuda.current_device()
        )

        if self.enable_prefix_caching:
            # Block hash tracking for prefix caching: -1 = uncomputed, positive = valid hash
            self.block_hashes = torch.full(
                (self.total_count,), -1, dtype=torch.int64, device=torch.cuda.current_device()
            )

            # Hash-to-block mapping for O(1) prefix lookup
            self.kv_hash_to_block_id: Dict[int, int] = {}

            # Reference count per block: 0 = cached (evictable), >0 = actively used
            self.block_ref_counts = torch.zeros(
                (self.total_count,), dtype=torch.int32, device=torch.cuda.current_device()
            )

            # LRU timestamps for eviction ordering (higher = more recently used)
            # Only needed in LRU mode; RZ mode evicts immediately on ref_count==0
            if self.prefix_caching_eviction_policy == PrefixCachingEvictionPolicy.LRU:
                self.block_timestamps = torch.zeros(
                    (self.total_count,), dtype=torch.int64, device=torch.cuda.current_device()
                )

    def __str__(self):
        return (
            f"using: total {self.get_total_used()}/{self.total_count - 1}"
            f"; active {self.get_active_used()}/{self.active_count}"
            f"; paused {self.get_paused_used()}/{self.paused_count}"
        )

    def get_total_used(self):
        """Compute number of total blocks used."""
        return self.total_count - self.total_avail - 1

    def get_active_used(self):
        """Compute number of active blocks used."""
        if not self.enable_prefix_caching:
            return (
                self.context.request_kv_block_counts[
                    self.context.paused_request_count : self.context.total_request_count
                ]
                .sum()
                .item()
            )

        active_start = self.context.paused_request_count
        active_end = self.context.total_request_count
        if active_end > active_start:
            active_rows = self.context.request_to_kv_block_ids[active_start:active_end]
            valid_ids = active_rows[active_rows >= 0]
            if valid_ids.numel() > 0:
                return int(torch.unique(valid_ids).numel())
        return 0

    def get_paused_used(self):
        """Compute number of paused blocks used."""
        if not self.enable_prefix_caching:
            return (
                self.context.request_kv_block_counts[: self.context.paused_request_count]
                .sum()
                .item()
            )

        if self.context.paused_request_count > 0:
            paused_rows = self.context.request_to_kv_block_ids[: self.context.paused_request_count]
            valid_ids = paused_rows[paused_rows >= 0]
            if valid_ids.numel() > 0:
                return int(torch.unique(valid_ids).numel())
        return 0

    def get_active_avail(self):
        """Compute number of active blocks available."""
        return self.active_count - self.get_active_used()

    def get_paused_avail(self):
        """Compute number of paused blocks available."""
        return self.paused_count - self.get_paused_used()

    def is_memory_available(self, num_blocks: int) -> bool:
        """Check if memory blocks are available.

        Includes both free pool blocks and evictable cached blocks (ref_count == 0).

        Args:
            num_blocks (int): Number of blocks to check.

        Return:
            (bool) Is memory available?
        """
        # Fast path: avoid expensive evictable count computation when free pool suffices
        if self.total_avail >= num_blocks:
            return True
        if not self.enable_prefix_caching:
            return False
        if self.prefix_caching_eviction_policy == PrefixCachingEvictionPolicy.REF_ZERO:
            return False  # RZ: no cached blocks to evict
        # Also count evictable cached blocks
        evictable_count = self.get_evictable_block_count()
        return (self.total_avail + evictable_count) >= num_blocks

    def allocate_memory_blocks(self, num_blocks: int) -> Optional[Tensor]:
        """Allocate memory blocks if available, else return None.

        Will attempt LRU eviction of cached blocks if the free pool is insufficient.

        Args:
            num_blocks (int): Number of blocks to allocate.

        Return:
            (Optional[Tensor]) Allocated block IDs.
        """
        # Try to evict cached blocks if free pool is insufficient
        if self.total_avail < num_blocks:
            if (
                not self.enable_prefix_caching
                or self.prefix_caching_eviction_policy == PrefixCachingEvictionPolicy.REF_ZERO
            ):
                return None  # RZ: no eviction path; disabled: no cached blocks
            blocks_needed_from_eviction = num_blocks - self.total_avail
            if not self.evict_lru_blocks(blocks_needed_from_eviction):
                return None  # Not enough blocks even after eviction

        # Now allocate from the free pool
        self.total_avail -= num_blocks
        block_ids = self.block_bag[self.total_avail : (self.total_avail + num_blocks)]
        assert num_blocks == block_ids.numel()

        if self.enable_prefix_caching:
            # Initialize ref counts for newly allocated blocks
            self.block_ref_counts[block_ids] = 1
            if self.prefix_caching_eviction_policy == PrefixCachingEvictionPolicy.LRU:
                self.update_timestamps(block_ids)

        return block_ids

    def release_memory_blocks(self, blocks: Tensor) -> None:
        """Release memory blocks by decrementing reference counts.

        Blocks with ref_count == 0 remain cached (in hash map) for potential reuse.
        They will be evicted via LRU when space is needed.

        Args:
            blocks (Tensor): Block IDs to release.

        Return:
            None
        """
        if blocks.numel() == 0:
            return

        if self.enable_prefix_caching:
            self.block_ref_counts[blocks] -= 1
            if self.prefix_caching_eviction_policy == PrefixCachingEvictionPolicy.REF_ZERO:
                zero_mask = self.block_ref_counts[blocks] == 0
                if zero_mask.any():
                    self._deregister_blocks(blocks[zero_mask])
            elif self.prefix_caching_eviction_policy == PrefixCachingEvictionPolicy.LRU:
                # Unregistered blocks (hash == -1, ref_count == 0) have no hash
                # entry to preserve for reuse (e.g., partial blocks at the end of
                # a request). Return them directly to the free pool so they are not
                # leaked.
                unreg_mask = (self.block_ref_counts[blocks] == 0) & (
                    self.block_hashes[blocks] == -1
                )
                if unreg_mask.any():
                    unreg_blocks = blocks[unreg_mask]
                    num_unreg = unreg_blocks.numel()
                    self.block_bag[self.total_avail : self.total_avail + num_unreg] = unreg_blocks
                    self.total_avail += num_unreg
        else:
            num_blocks = blocks.numel()
            self.block_bag[self.total_avail : self.total_avail + num_blocks] = blocks
            self.total_avail += num_blocks

    def reset(self) -> None:
        """Reset the allocator to initial state.

        This resets the available block count to the entire memory pool
        (except for the dummy block).
        """

        # Reset block bag to so we start consuming from the beginning of the pool
        # for UVM performance.
        # *Note*: Resetting the block bag is essential because if engine has been
        # suspended, then the block bag contains non-unique IDs since the
        # right-most IDs have been 'popped' off and are owned by the context.
        # Without resetting the block bag, context request memory will clash and
        # requests will point to each other's memory blocks, resulting in faulty
        # generations.
        self.block_bag = torch.arange(
            self.total_count, dtype=torch.int32, device=torch.cuda.current_device()
        )

        self.total_avail = self.total_count - 1

        if self.enable_prefix_caching:
            # Reset all block hashes
            self.block_hashes.fill_(-1)

            # Reset prefix caching state
            self.kv_hash_to_block_id.clear()
            self.block_ref_counts.fill_(0)
            if self.prefix_caching_eviction_policy == PrefixCachingEvictionPolicy.LRU:
                self.block_timestamps.fill_(0)

    # =========================================================================
    # Prefix caching methods
    # =========================================================================

    def register_kv_block_hashes(self, block_ids: list[int], block_hashes: list[int]) -> None:
        """Register blocks in the hash-to-block mapping for discovery (batch).

        Args:
            block_ids: List of block IDs.
            block_hashes: List of computed hash values (same length as block_ids).
        """
        if not block_ids:
            return
        id_tensor = torch.tensor(block_ids, dtype=torch.int64, device=self.block_hashes.device)
        hash_tensor = torch.tensor(block_hashes, dtype=torch.int64, device=self.block_hashes.device)
        self.block_hashes[id_tensor] = hash_tensor
        self.kv_hash_to_block_id.update(zip(block_hashes, block_ids))

    def _deregister_blocks(self, block_ids: Tensor) -> None:
        """Remove blocks from prefix caching state and return to free pool.

        Shared cleanup logic for both LRU eviction and RZ proactive eviction.

        Args:
            block_ids: Tensor of block IDs to deregister.
        """
        num_blocks = block_ids.numel()
        if num_blocks == 0:
            return

        # Gather hashes via batched tensor indexing
        block_ids_i64 = block_ids.to(torch.int64)
        hashes = self.block_hashes[block_ids_i64].tolist()

        # Remove from kv_hash_to_block_id dict (set ops + C-level map, no Python loop)
        keys_to_delete = set(hashes) - {-1}
        deque(
            map(self.kv_hash_to_block_id.pop, keys_to_delete & self.kv_hash_to_block_id.keys()),
            maxlen=0,
        )

        # Notify Mamba slot allocator (if wired) to clean up its state
        if self.on_blocks_deregistered is not None:
            self.on_blocks_deregistered(block_ids.tolist(), keys_to_delete)

        # Reset block state (batched tensor ops)
        self.block_hashes[block_ids] = -1
        self.block_ref_counts[block_ids] = 0
        if self.prefix_caching_eviction_policy == PrefixCachingEvictionPolicy.LRU:
            self.block_timestamps[block_ids] = 0

        # Return blocks to free pool
        self.block_bag[self.total_avail : self.total_avail + num_blocks] = block_ids
        self.total_avail += num_blocks

    def update_timestamps(self, block_ids: Tensor) -> None:
        """Update LRU timestamps for accessed blocks. No-op in RZ mode.

        Args:
            block_ids: Tensor of block IDs that were accessed.
        """
        if (
            self.prefix_caching_eviction_policy != PrefixCachingEvictionPolicy.LRU
            or block_ids.numel() == 0
        ):
            return
        self.block_timestamps[block_ids] = self.context.prefix_cache_lru_clock

    def get_evictable_block_count(self) -> Tensor:
        """Get count of cached blocks that can be evicted (ref_count == 0, hash set).

        Returns:
            Scalar tensor with the number of evictable cached blocks.
        """
        cached_mask = (self.block_ref_counts == 0) & (self.block_hashes != -1)
        return cached_mask.sum()

    def evict_lru_blocks(self, num_blocks_needed: int) -> bool:
        """Evict LRU cached blocks to free up space in the pool.

        Evicts blocks with ref_count == 0, starting with oldest timestamps.

        Args:
            num_blocks_needed: Number of blocks to evict.

        Returns:
            True if enough blocks were evicted, False otherwise.
        """
        # Find all cached blocks (ref_count == 0, hash != -1)
        cached_mask = (self.block_ref_counts == 0) & (self.block_hashes != -1)
        cached_block_ids = torch.nonzero(cached_mask, as_tuple=True)[0]

        if cached_block_ids.numel() < num_blocks_needed:
            return False  # Not enough cached blocks to evict

        # Sort by timestamp (ascending = oldest first)
        cached_timestamps = self.block_timestamps[cached_block_ids]
        sorted_indices = torch.argsort(cached_timestamps)
        blocks_to_evict = cached_block_ids[sorted_indices[:num_blocks_needed]]

        self._deregister_blocks(blocks_to_evict)

        return True


================================================
FILE: megatron/core/inference/contexts/mamba_slot_allocator.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from typing import TYPE_CHECKING, Dict, List, Optional

import torch
from torch import Tensor

from megatron.core.inference.config import PrefixCachingEvictionPolicy

if TYPE_CHECKING:
    from .dynamic_context import DynamicInferenceContext


class MambaSlotAllocator:
    """Manages Mamba state caching for prefix caching in hybrid models.

    Owns the Mamba cache slot pool, block-to-slot mappings, hash-to-block
    mapping, and intermediate state tracking. Accesses KV allocator state
    (ref counts, timestamps, block hashes) via the parent context.

    Args:
        context: The DynamicInferenceContext that owns this allocator.
        max_slots: Maximum number of cache slots.
        num_mamba_layers: Number of Mamba layers in the model.
        conv_states_shape: Shape of per-slot conv state (excluding layer/slot dims).
        ssm_states_shape: Shape of per-slot SSM state (excluding layer/slot dims).
        conv_states_dtype: Dtype for conv state tensors.
        ssm_states_dtype: Dtype for SSM state tensors.
    """

    def __init__(
        self,
        context: "DynamicInferenceContext",
        max_slots: int,
        num_mamba_layers: int,
        conv_states_shape: tuple,
        ssm_states_shape: tuple,
        conv_states_dtype: torch.dtype,
        ssm_states_dtype: torch.dtype,
    ):
        self.context = context
        self.max_slots = max_slots
        self.num_mamba_layers = num_mamba_layers

        device = torch.cuda.current_device()
        num_blocks = context.kv_block_allocator.total_count

        # Block <-> slot mappings
        self.block_to_slot = torch.full((num_blocks,), -1, dtype=torch.int32, device=device)
        self.slot_to_block = torch.full((max_slots,), -1, dtype=torch.int32, device=device)

        # Free slot pool (stack)
        self.free_slots = torch.arange(max_slots, dtype=torch.int32, device=device)
        self.free_count = max_slots

        # State tensors
        self.conv_states = torch.zeros(
            (num_mamba_layers, max_slots) + conv_states_shape,
            dtype=conv_states_dtype,
            device=device,
        )
        self.ssm_states = torch.zeros(
            (num_mamba_layers, max_slots) + ssm_states_shape, dtype=ssm_states_dtype, device=device
        )

        # Hash-to-block mapping: only blocks with cached Mamba state
        self.hash_to_block_id: Dict[int, int] = {}

        # Per-request intermediate state storage
        self._intermediate_offsets: list = [None] * context.max_requests
        self._intermediate_block_ids: list = [None] * context.max_requests
        self._eos_cache_block_id: list = [None] * context.max_requests
        self._intermediate_buffer: dict = {}

    # =========================================================================
    # Slot management
    # =========================================================================

    def allocate_slot(self, block_id: int) -> int:
        """Get a free Mamba cache slot for a block, evicting if necessary.

        Args:
            block_id: The KV block ID to associate with this slot.

        Returns:
            The allocated slot index.
        """
        # Check if block already has a slot
        existing = self.block_to_slot[block_id].item()
        if existing >= 0:
            return existing

        # Try free pool
        if self.free_count > 0:
            self.free_count -= 1
            slot = self.free_slots[self.free_count].item()
        else:
            slot = self._evict_lru_slot()

        self.block_to_slot[block_id] = slot
        self.slot_to_block[slot] = block_id
        return slot

    def _evict_lru_slot(self) -> int:
        """Evict the least recently used Mamba cache slot.

        Returns:
            The freed slot index.
        """
        kv_alloc = self.context.kv_block_allocator
        # Find blocks that have mamba slots and ref_count == 0
        has_slot_mask = self.block_to_slot[: kv_alloc.total_count] >= 0
        ref_zero_mask = kv_alloc.block_ref_counts[: kv_alloc.total_count] == 0
        candidates = has_slot_mask & ref_zero_mask
        candidate_ids = torch.nonzero(candidates, as_tuple=True)[0]

        if candidate_ids.numel() == 0:
            raise RuntimeError("No evictable Mamba cache slots available")

        # Pick block with oldest timestamp if LRU, otherwise just pick first
        if self.context.prefix_caching_eviction_policy == PrefixCachingEvictionPolicy.LRU:
            timestamps = kv_alloc.block_timestamps[candidate_ids]
            evict_idx = candidate_ids[torch.argmin(timestamps)].item()
        else:
            evict_idx = candidate_ids[0].item()

        slot = self.block_to_slot[evict_idx].item()
        block_hash = kv_alloc.block_hashes[evict_idx].item()

        # Clean up mappings
        self.block_to_slot[evict_idx] = -1
        self.slot_to_block[slot] = -1
        if block_hash > 0 and block_hash in self.hash_to_block_id:
            del self.hash_to_block_id[block_hash]

        return slot

    def get_slot(self, block_id: int) -> int:
        """Return the cache slot for a block, or -1 if none.

        Args:
            block_id: The KV block ID.

        Returns:
            Slot index or -1.
        """
        return self.block_to_slot[block_id].item()

    def has_state(self, block_id: int) -> bool:
        """Check if a block has cached Mamba state."""
        return self.block_to_slot[block_id].item() >= 0

    def invalidate_block(self, block_id: int) -> None:
        """Free cache slot and clear mappings for a block.

        Called when KV blocks are evicted/deregistered.

        Args:
            block_id: The KV block ID.
        """
        slot = self.block_to_slot[block_id].item()
        if slot < 0:
            return
        self.block_to_slot[block_id] = -1
        self.slot_to_block[slot] = -1
        # Return slot to free pool
        self.free_slots[self.free_count] = slot
        self.free_count += 1

    # =========================================================================
    # State store/restore
    # =========================================================================

    def store_from_tensors(
        self, block_id: int, layer_idx: int, ssm_state: Tensor, conv_state: Tensor
    ) -> None:
        """Write provided state tensors to a cache slot for a specific layer.

        Args:
            block_id: The KV block ID.
            layer_idx: The Mamba layer index.
            ssm_state: SSM state tensor to store.
            conv_state: Conv state tensor to store.
        """
        slot = self.block_to_slot[block_id].item()
        assert slot >= 0, f"Block {block_id} has no Mamba cache slot"
        self.ssm_states[layer_idx, slot].copy_(ssm_state)
        self.conv_states[layer_idx, slot].copy_(conv_state)

    def store_from_live(self, block_id: int, request_idx: int) -> None:
        """Copy all layers from live per-request buffer to cache slot.

        Used for block-aligned EOS case where the final kernel state
        is in the live buffer.

        Args:
            block_id: The KV block ID.
            request_idx: The context request index.
        """
        slot = self.block_to_slot[block_id].item()
        assert slot >= 0, f"Block {block_id} has no Mamba cache slot"
        mamba_idx = self.context.mamba_metadata.request_to_mamba_state_idx[request_idx].item()
        self.conv_states[:, slot].copy_(self.context.mamba_conv_states[:, mamba_idx])
        self.ssm_states[:, slot].copy_(self.context.mamba_ssm_states[:, mamba_idx])

    def restore_to_live(self, request_idx: int, block_id: int) -> bool:
        """Copy all layers from cache slot to live request state.

        Args:
            request_idx: The context request index.
            block_id: The KV block ID.

        Returns:
            True if state was restored, False if block has no cached state.
        """
        slot = self.block_to_slot[block_id].item()
        if slot < 0:
            return False
        mamba_idx = self.context.mamba_metadata.request_to_mamba_state_idx[request_idx].item()
        self.context.mamba_conv_states[:, mamba_idx].copy_(self.conv_states[:, slot])
        self.context.mamba_ssm_states[:, mamba_idx].copy_(self.ssm_states[:, slot])
        return True

    # =========================================================================
    # Hash registration
    # =========================================================================

    def register_block_hash(self, block_id: int, block_hash: int) -> None:
        """Register a block as having cached Mamba state.

        Args:
            block_id: The block ID.
            block_hash: The block's hash value.
        """
        self.hash_to_block_id[block_hash] = block_id

    # =========================================================================
    # Deregistration callback
    # =========================================================================

    def on_kv_blocks_deregistered(self, block_ids_list: list, hashes_to_delete: set) -> None:
        """Handle KV block deregistration by cleaning up Mamba state.

        Called by KVBlockAllocator._deregister_blocks via callback.

        Args:
            block_ids_list: List of deregistered block IDs.
            hashes_to_delete: Set of hashes being deregistered (excludes -1).
        """
        if self.hash_to_block_id:
            mamba_keys = hashes_to_delete & self.hash_to_block_id.keys()
            if mamba_keys:
                from collections import deque

                deque(map(self.hash_to_block_id.pop, mamba_keys), maxlen=0)
                for bid in block_ids_list:
                    self.invalidate_block(bid)

    # =========================================================================
    # Intermediate offset tracking
    # =========================================================================

    def compute_and_store_offsets(
        self,
        req,
        current_id: int,
        skip_tokens: int,
        prefill_chunk_length: int,
        num_matched_blocks: int,
        matched_block_ids: list,
        overall_required_blocks: int,
    ) -> None:
        """Compute intermediate state extraction offsets and store per-request.

        Args:
            req: The inference request.
            current_id: Context request index.
            skip_tokens: Number of tokens being skipped (mamba match).
            prefill_chunk_length: Total prefill chunk length before skipping.
            num_matched_blocks: Number of KV-matched blocks.
            matched_block_ids: List of matched KV block IDs.
            overall_required_blocks: Total blocks needed for this request.
        """
        ctx = self.context
        prompt_len = len(req.prompt_tokens)
        num_kv_matched = num_matched_blocks
        kv_div_abs = num_kv_matched * ctx.block_size_tokens
        last_aligned_abs = (prompt_len // ctx.block_size_tokens) * ctx.block_size_tokens
        seq_len = prefill_chunk_length - skip_tokens  # effective prefill length

        # Compute relative offsets (relative to prefill start after skip)
        kv_div_rel = kv_div_abs - skip_tokens
        last_aligned_rel = last_aligned_abs - skip_tokens
        penultimate_abs = (overall_required_blocks - 1) * ctx.block_size_tokens
        penultimate_rel = penultimate_abs - skip_tokens

        # Determine mamba_chunk_size from mamba config (128 is the standard SSM kernel chunk size)
        mamba_chunk_size = 128

        # Build offset list: include if > 0, < seq_len, and % mamba_chunk_size == 0
        offsets_set = set()
        for offset in [kv_div_rel, last_aligned_rel, penultimate_rel]:
            if offset > 0 and offset < seq_len and offset % mamba_chunk_size == 0:
                offsets_set.add(offset)

        offsets = sorted(offsets_set)

        # Map each offset back to block index and block ID
        block_ids_for_offsets = []
        for offset in offsets:
            abs_token = skip_tokens + offset
            block_idx = abs_token // ctx.block_size_tokens - 1
            bid = ctx.request_to_kv_block_ids[current_id][block_idx].item()
            block_ids_for_offsets.append(bid)

        self._intermediate_offsets[current_id] = offsets if offsets else None
        self._intermediate_block_ids[current_id] = (
            block_ids_for_offsets if block_ids_for_offsets else None
        )

        # Block-aligned EOS: prompt_len is exactly block-aligned
        if last_aligned_abs == prompt_len and prompt_len > 0:
            last_block_idx = prompt_len // ctx.block_size_tokens - 1
            if last_block_idx >= 0:
                eos_bid = ctx.request_to_kv_block_ids[current_id][last_block_idx].item()
                self._eos_cache_block_id[current_id] = eos_bid
            else:
                self._eos_cache_block_id[current_id] = None
        else:
            self._eos_cache_block_id[current_id] = None

    def get_intermediate_offsets(self) -> Optional[List[List[int]]]:
        """Get intermediate token offsets for all prefill requests in the current batch.

        Returns:
            List of offset lists (one per prefill request), or None if no
            request has intermediate offsets.
        """
        ctx = self.context
        prefill_count = ctx.batch_dimensions.prefill_req_count
        if prefill_count == 0:
            return None

        # Prefill requests are the last `prefill_count` active requests
        active_start = ctx.paused_request_count
        decode_count = ctx.batch_dimensions.decode_req_count
        prefill_start = active_start + decode_count

        result = []
        has_any = False
        for i in range(prefill_start, prefill_start + prefill_count):
            offsets = self._intermediate_offsets[i]
            if offsets is not None:
                has_any = True
                result.append(offsets)
            else:
                result.append([])

        return result if has_any else None

    def buffer_intermediate_states(
        self, mamba_layer_idx: int, intermediate_states_per_request: list
    ) -> None:
        """Buffer intermediate states from a single Mamba layer's forward pass.

        Args:
            mamba_layer_idx: The Mamba layer index.
            intermediate_states_per_request: Per-request list of
                (ssm_states, conv_states) tuples or None.
        """
        self._intermediate_buffer[mamba_layer_idx] = intermediate_states_per_request

    def commit_intermediate_states(self) -> None:
        """Commit buffered intermediate states to the Mamba cache.

        Called after the forward pass completes. For each prefill request:
        - Intermediate states at kv_divergence/last_aligned: allocate cache slot,
          write state, register hash in hash_to_block_id.
        - Block-aligned EOS: copy final state from live buffer to cache slot.
        """
        ctx = self.context
        prefill_count = ctx.batch_dimensions.prefill_req_count
        if prefill_count == 0:
            self._clear_intermediate_state()
            return

        active_start = ctx.paused_request_count
        decode_count = ctx.batch_dimensions.decode_req_count
        prefill_start = active_start + decode_count
        has_buffer = bool(self._intermediate_buffer)

        for req_batch_idx in range(prefill_count):
            ctx_idx = prefill_start + req_batch_idx
            offsets = self._intermediate_offsets[ctx_idx]
            block_ids = self._intermediate_block_ids[ctx_idx]

            # Commit intermediate states from forward pass
            if offsets is not None and block_ids is not None and has_buffer:
                for offset_idx in range(len(offsets)):
                    bid = block_ids[offset_idx]
                    slot = self.allocate_slot(bid)

                    # Write states from each mamba layer
                    for layer_idx, states_list in self._intermediate_buffer.items():
                        if states_list[req_batch_idx] is not None:
                            ssm_states, conv_states = states_list[req_batch_idx]
                            self.ssm_states[layer_idx, slot].copy_(ssm_states[offset_idx])
                            self.conv_states[layer_idx, slot].copy_(conv_states[offset_idx])

                    # Register in mamba hash map
                    block_hash = ctx.kv_block_allocator.block_hashes[bid].item()
                    if block_hash > 0:
                        self.register_block_hash(bid, block_hash)

            # Handle block-aligned EOS: copy final state from live buffer
            eos_bid = self._eos_cache_block_id[ctx_idx]
            if eos_bid is not None:
                slot = self.allocate_slot(eos_bid)
                self.store_from_live(eos_bid, ctx_idx)
                block_hash = ctx.kv_block_allocator.block_hashes[eos_bid].item()
                if block_hash > 0:
                    self.register_block_hash(eos_bid, block_hash)

        self._clear_intermediate_state()

    def _clear_intermediate_state(self) -> None:
        """Clear all per-request intermediate state tracking."""
        self._intermediate_buffer.clear()
        ctx = self.context
        prefill_count = ctx.batch_dimensions.prefill_req_count
        if prefill_count > 0:
            active_start = ctx.paused_request_count
            decode_count = ctx.batch_dimensions.decode_req_count
            prefill_start = active_start + decode_count
            for i in range(prefill_start, prefill_start + prefill_count):
                self._intermediate_offsets[i] = None
                self._intermediate_block_ids[i] = None
                self._eos_cache_block_id[i] = None

    # =========================================================================
    # Reset
    # =========================================================================

    def reset(self) -> None:
        """Reset all state (mappings, free pool, cache, intermediate tracking)."""
        self.block_to_slot.fill_(-1)
        self.slot_to_block.fill_(-1)
        self.free_slots = torch.arange(
            self.max_slots, dtype=torch.int32, device=torch.cuda.current_device()
        )
        self.free_count = self.max_slots
        self.hash_to_block_id.clear()
        self._intermediate_buffer.clear()
        for i in range(self.context.max_requests):
            self._intermediate_offsets[i] = None
            self._intermediate_block_ids[i] = None
            self._eos_cache_block_id[i] = None


================================================
FILE: megatron/core/inference/contexts/routing_metadata.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from typing import TYPE_CHECKING, Optional

import torch

if TYPE_CHECKING:
    from megatron.core.inference.contexts.dynamic_context import DynamicInferenceContext

from megatron.core.transformer.moe.router_replay import RouterReplay


class RoutingMetadata:
    """Manages routing indices metadata for MoE layers during inference.

    This class provides static buffers for CUDA graph compatibility when
    recording routing decisions. It holds a reference to the inference context
    to automatically determine whether to use static buffers based on CUDA graph state.

    Args:
        context (DynamicInferenceContext): The inference context.
        moe_router_topk (int): Number of experts selected per token.
    """

    def __init__(self, context: 'DynamicInferenceContext', moe_router_topk: int):
        self.context = context
        self.max_tokens = context.max_tokens
        self.moe_router_topk = moe_router_topk
        self.device = torch.cuda.current_device()

        # Static buffer allocated lazily in _ensure_buffer_allocated().
        # We defer allocation because RouterReplay instances don't exist yet at init time.
        self.routing_indices_buffer: Optional[torch.Tensor] = None
        self.num_moe_layers: Optional[int] = None

    def _ensure_buffer_allocated(self) -> None:
        """Allocate the static buffer if not already allocated.

        Gets the actual number of MoE layers from RouterReplay instances.
        """
        if self.routing_indices_buffer is not None:
            return

        self.num_moe_layers = len(RouterReplay.global_router_replay_instances)

        if self.num_moe_layers == 0:
            return

        # Static buffer for CUDA graph compatibility.
        # Shape: [max_tokens, num_moe_layers, moe_router_topk]
        self.routing_indices_buffer = torch.empty(
            (self.max_tokens, self.num_moe_layers, self.moe_router_topk),
            dtype=torch.int32,
            device=self.device,
        )

    def get_routing_indices(self) -> Optional[torch.Tensor]:
        """Get the recorded routing indices.

        Automatically uses the static buffer when CUDA graphs are active,
        otherwise retrieves from RouterReplay utility.

        Returns:
            Tensor of shape [num_tokens, num_moe_layers, topk] or None if not available.
        """
        if self.context.using_cuda_graph_this_step():
            # Return view of static buffer up to current token count.
            if self.routing_indices_buffer is None:
                return None
            # Only return up to active token count, to skip entries
            # for padding tokens.
            return self.routing_indices_buffer[: self.context.active_token_count]
        else:
            # Get from RouterReplay and stack into [num_tokens, num_layers, topk].
            recorded_data = RouterReplay.get_recorded_data()
            if recorded_data is None or len(recorded_data) == 0:
                return None
            if recorded_data[0] is None:
                return None
            # Stack: list of [num_tokens, topk] -> [num_tokens, num_layers, topk]
            return torch.stack(recorded_data, dim=1)

    def enable_static_buffer_recording(self) -> None:
        """Enable recording into the static buffer for CUDA graph compatibility.

        This sets up RouterReplay instances to copy routing indices into our
        pre-allocated static buffer instead of creating new tensors.
        Allocates the buffer lazily on first call.
        """
        self._ensure_buffer_allocated()
        if self.routing_indices_buffer is not None:
            RouterReplay.set_global_static_buffers(self.routing_indices_buffer)

    def disable_static_buffer_recording(self) -> None:
        """Disable static buffer recording, reverting to normal tensor assignment."""
        RouterReplay.clear_global_static_buffers()


================================================
FILE: megatron/core/inference/contexts/static_context.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from megatron.core.inference.config import InferenceConfig

from .base_context import BaseInferenceContext


class StaticInferenceContext(BaseInferenceContext):
    """Static inference context that is passed to the main model in order
    to efficiently manage the KV cache during inference.

    Args:
        max_batch_size (int): Max supported batch size.
        max_sequence_length (int): Max supported sequence length.
    """

    def __init__(
        self, max_batch_size: int, max_sequence_length: int, use_flashinfer_fused_rope: bool = None
    ):
        config = InferenceConfig(materialize_only_last_token_logits=True)
        super().__init__(inference_config=config)
        self.max_sequence_length = max_sequence_length
        self.max_batch_size = max_batch_size
        self.sequence_len_offset = 0
        self.batch_size_offset = 0
        self.key_value_memory_dict = {}
        self.decode_mode = False

    def swap_key_value_dict(self, batch_idx):
        "swap between batches"
        if len(self.key_value_memory_dict) == 0:
            raise ValueError("should not swap when dict in empty")

        for layer_number in self.key_value_memory_dict.keys():
            inference_key_memory, inference_value_memory = self.key_value_memory_dict[layer_number]
            assert (
                len(batch_idx) == inference_key_memory.shape[1]
            )  # make sure batch size is the same
            new_inference_key_memory = inference_key_memory[:, batch_idx]
            new_inference_value_memory = inference_value_memory[:, batch_idx]
            self.key_value_memory_dict[layer_number] = (
                new_inference_key_memory,
                new_inference_value_memory,
            )

    def enable_prefill_mode(self):
        """
        Indicates the generation loop is in the prefill phase (still processing
        input prompt tokens). This should be enabled if the generation loop is
        encoding prompt tokens for *any* request in a batch.
        """
        self.decode_mode = False

    def enable_decode_mode(self):
        """
        Indicates the generation loop is in the decode phase (generating new output
        tokens). This should only be enabled if the generation loop has fully encoded
        the prompts for *all* requests in a batch.
        """
        self.decode_mode = True

    def is_decode_only(self):
        """Functional access to `.decode_mode`, to match dynamic context."""
        return self.decode_mode

    def reset(self):
        """Resets the inference state for a new batch."""
        self.sequence_len_offset = 0
        self.batch_size_offset = 0
        self.enable_prefill_mode()

    def __str__(self):
        return (
            f"StaticInferenceContext(max_seq_len = {self.max_sequence_length}, "
            f"max_batch_size = {self.max_batch_size}, "
            f"sequence_len_offset = {self.sequence_len_offset}, "
            f"batch_size_offset = {self.batch_size_offset}, "
            f"key_value_memory_dict = {self.key_value_memory_dict.keys()})"
            f"decode_mode = {self.decode_mode}"
            f"materialize_only_last_token_logits = {self.materialize_only_last_token_logits}"
        )

    def __eq__(self, other):

        if id(self) == id(other):
            return True

        if not isinstance(other, StaticInferenceContext):
            return False

        # Check all attributes match
        basic_attrs = [
            'max_sequence_length',
            'max_batch_size',
            'sequence_len_offset',
            'batch_size_offset',
            'decode_mode',
            'materialize_only_last_token_logits',
        ]

        if not all(hasattr(other, attr) for attr in basic_attrs):
            return False

        # Check dictionary keys match; i.e. the same number of layers are cached
        if self.key_value_memory_dict.keys() != other.key_value_memory_dict.keys():
            return False

        # Check each tensor tuple in the dictionary
        for key in self.key_value_memory_dict:
            self_tensors = self.key_value_memory_dict[key]
            other_tensors = other.key_value_memory_dict[key]

            # Compare each key, value tensor in the tuple
            for self_tensor, other_tensor in zip(self_tensors, other_tensors):
                if (
                    self_tensor.data_ptr() != other_tensor.data_ptr()
                    or self_tensor.shape != other_tensor.shape
                ):
                    return False

    def is_static_batching(self):
        return True


================================================
FILE: megatron/core/inference/data_parallel_inference_coordinator.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import errno
import faulthandler
import json
import logging
import signal
import socket
from collections import deque
from enum import Enum, auto
from multiprocessing import Event
from multiprocessing.connection import Connection

import torch

from megatron.core.inference.config import PrefixCachingCoordinatorPolicy
from megatron.core.inference.headers import Headers, UnknownHeaderError
from megatron.core.inference.inference_request import compute_block_hashes_batched
from megatron.core.inference.text_generation_controllers.text_generation_controller import (
    TextGenerationController,
)

try:
    import zmq

    HAVE_ZMQ = True
except:
    HAVE_ZMQ = False

try:
    import msgpack

    HAVE_MSGPACK = True
except:
    HAVE_MSGPACK = False

# Register faulthandler to emit stack traces upon process kill.
faulthandler.enable()
faulthandler.register(signal.SIGTERM, all_threads=False, chain=True)
faulthandler.register(signal.SIGINT, all_threads=False, chain=True)


class DataParallelInferenceCoordinator:
    """
    Coordinates inference requests between clients and distributed model engines.

    This class acts as a central server. It uses a ZMQ ROUTER socket to manage
    communication flows between multiple clients and multiple data parallel ranks.

    The coordinator's main responsibilities are:
    1.  **Worker Registration**: It waits for a specified number of data parallel ranks
        (representing distributed model instances) to connect and register themselves.
    2.  **Client Connection**: It accepts connections from external clients, like
        `InferenceClient`, and performs a simple handshake.
    3.  **Request Forwarding**: It receives inference requests from clients, assigns a
        unique server-side request ID, tokenizes the prompt, and forwards the request
        to one of the available data parallel rank using a round-robin scheduling
        strategy.
    4.  **Response Routing**: It receives completed results from
        the data parallel ranks and routes them back to the original client that made the
        request.
    5.  **Control Signal Broadcasting**: It relays control signals (e.g., PAUSE, STOP)
        from a client to all connected data parallel ranks.

    Attributes:
        router_socket (zmq.Socket): The central ZMQ ROUTER socket for all communication.
        data_parallel_size (int): The number of data parallel workers to expect.
        identities_of_data_parallel_ranks (deque): A deque holding the ZMQ
            identities of connected TP-coordinators, used for round-robin scheduling.
        request_id_to_client_id (dict): Maps server-side request IDs to the ZMQ
            identity of the client that initiated the request.
        request_id_to_client_request_id (dict): Maps server-side request IDs to the
            original request ID provided by the client.
        next_request_id (int): A counter for generating unique server-side request IDs.
    """

    class CoordinatorState(Enum):
        """State machine for the coordinator."""

        RUNNING = auto()
        PAUSED = auto()
        SUSPENDED = auto()
        STOPPING = auto()

    def __init__(
        self,
        pipe_connection: Connection,
        data_parallel_size: int,
        tokenizer,
        inference_coordinator_port: int | None = None,
        deterministic_mode: bool = False,
        block_size_tokens: int | None = None,
        enable_prefix_caching: bool = False,
        prefix_caching_coordinator_policy: PrefixCachingCoordinatorPolicy = (
            PrefixCachingCoordinatorPolicy.FIRST_PREFIX_BLOCK
        ),
        schedule_output_path: str | None = None,
    ):
        """
        Initializes the inference coordinator.

        This sets up the ZMQ context and a ROUTER socket, binding it to the given
        port. It then enters a blocking loop to wait for all expected data parallel
        ranks to connect before proceeding.

        Args:
            pipe_connection (Connection): A connecting pipe to the parent process.
            data_parallel_size (int): The number of TP-coordinator workers that are
                expected to connect.
            tokenizer: The tokenizer to use for prompt tokenization and detokenization.
            inference_coordinator_port (Optional[int]): The TCP port number to bind the server to.
        """
        assert HAVE_ZMQ, (
            "please install the pyzmq library to use DataParallelInferenceCoordinator\n"
            "pip install pyzmq"
        )
        assert HAVE_MSGPACK, (
            "please install the messagepack library to use DataParallelInferenceCoordinator\n"
            "pip install msgpack"
        )
        self.pipe_connection = pipe_connection
        self.data_parallel_size = data_parallel_size
        self.context = zmq.Context()

        # This is the central router socket
        # 1. data parallel ranks connect to this socket to register themselves
        # 2. Users connect to this socket and submit their requests. We transmit them to
        #    data parallel ranks in a round robin fashion
        # 3. data parallel ranks return completed requests to this socket. We route them back to
        #    the user that had submitted the request originally.

        # Get local IP.
        local_ip = socket.gethostname()

        self.router_socket = self.context.socket(zmq.ROUTER)
        # Raise error if the other side of the connection has dropped.
        self.router_socket.setsockopt(zmq.ROUTER_MANDATORY, 1)
        is_bound = False
        if inference_coordinator_port is not None:
            try:
                self.router_socket.bind(f"tcp://{local_ip}:{inference_coordinator_port}")
                is_bound = True
            except zmq.error.ZMQError as e:
                if e.errno == errno.EADDRINUSE:
                    logging.warning(
                        f"Port {inference_coordinator_port} is already in use. "
                        "Binding to a random available port instead."
                    )
            except Exception:
                logging.warning(
                    f"Unknown error when binding to port {inference_coordinator_port}. "
                    "Attempting to bind to a random available port instead."
                )
        if not is_bound:
            self.router_socket.bind_to_random_port(f"tcp://{local_ip}")
        self.addr = self.router_socket.getsockopt_string(zmq.LAST_ENDPOINT)

        # Send the address to the parent process.
        self.pipe_connection.send(self.addr)
        self.pipe_connection.close()

        logging.info("Inference Coordinator: waiting for connections from data parallel ranks...")
        # First wait for all data parallel ranks to establish connections.
        self.identities_of_data_parallel_ranks = deque([])
        # time.sleep(5)  # Give data parallel ranks time to spawn and connect.
        for _ in range(data_parallel_size):
            identity, _ = self.router_socket.recv_multipart()
            assert identity not in self.identities_of_data_parallel_ranks
            self.identities_of_data_parallel_ranks.append(identity)
        logging.info("Inference Coordinator: Connected with data parallel ranks...")

        # In deterministic mode, sort identities for consistent scheduling order.
        if deterministic_mode:
            self.identities_of_data_parallel_ranks = deque(
                sorted(self.identities_of_data_parallel_ranks)
            )
        self._round_robin_idx = 0

        self.request_id_to_client_id = {}
        self.request_id_to_client_request_id = {}

        self.next_request_id = 0
        self.tokenizer = tokenizer
        self.state = self.CoordinatorState.RUNNING

        # Prefix caching state for routing.
        self.block_size_tokens = block_size_tokens
        self.enable_prefix_caching = enable_prefix_caching
        self.prefix_caching_coordinator_policy = prefix_caching_coordinator_policy
        self.hash_to_rank_info = {}  # Dict[int, Dict[bytes, int]]: hash → {rank → timestamp}
        self._assignment_counter = 0

        # Schedule recording.
        self.schedule_output_path = schedule_output_path
        self.schedule_records = [] if schedule_output_path else None

        # Deterministic rank index mapping (sorted identity -> 0-based index).
        sorted_identities = sorted(self.identities_of_data_parallel_ranks)
        self.identity_to_rank_index = {
            identity: idx for idx, identity in enumerate(sorted_identities)
        }

    def get_next_data_parallel_rank(self):
        """
        Selects the next data parallel rank using round-robin scheduling.

        Returns:
            bytes: The ZMQ identity of the next data parallel rank to receive a request.
        """
        identities = self.identities_of_data_parallel_ranks
        if not identities:
            raise RuntimeError("No engines connected")
        idx = self._round_robin_idx % len(identities)
        self._round_robin_idx = idx + 1
        return identities[idx]

    def _remove_engine(self, identity):
        """Remove a disconnected engine from the routing pool."""
        self.identities_of_data_parallel_ranks.remove(identity)
        logging.warning(
            "Coordinator: removed engine %s (now %d engines)",
            identity,
            len(self.identities_of_data_parallel_ranks),
        )

    def _send_to_engine(self, identity, payload):
        """Send payload to an engine, removing it from the pool if unreachable.

        Returns:
            True if the send succeeded, False if the engine was unreachable and removed.
        """
        try:
            self.router_socket.send_multipart([identity, payload])
            return True
        except zmq.error.ZMQError as e:
            if e.errno == zmq.EHOSTUNREACH:
                self._remove_engine(identity)
                return False
            raise

    def compute_request_hashes(self, prompt):
        """Compute block hashes for a prompt on CPU.

        Args:
            prompt: Either a string (to be tokenized) or a list of token IDs.

        Returns:
            List of integer block hashes, or empty list if prefix caching is disabled.
        """
        if not self.enable_prefix_caching or self.block_size_tokens is None:
            return []
        if isinstance(prompt, str):
            tokens = self.tokenizer.tokenize(prompt)
        else:
            tokens = list(prompt)
        token_tensor = torch.tensor(tokens, dtype=torch.int64)
        return compute_block_hashes_batched(token_tensor, self.block_size_tokens)

    def get_best_data_parallel_rank(self, request_hashes):
        """Select the best DP rank based on prefix cache affinity.

        Iterates request hashes in reverse order and picks the rank that cached
        the longest matching prefix (the furthest hash found). Since hashes are
        parent-chained, finding hash[i] in a rank guarantees hash[0..i-1] are
        also present. Among ranks that share the longest match, the most recently
        assigned rank (highest timestamp) is preferred. Falls back to round-robin
        when no rank matches.

        Args:
            request_hashes: List of block hashes for the request.

        Returns:
            bytes: The ZMQ identity of the selected data parallel rank.
        """
        if (
            not self.enable_prefix_caching
            or not request_hashes
            or self.prefix_caching_coordinator_policy == PrefixCachingCoordinatorPolicy.ROUND_ROBIN
        ):
            return self.get_next_data_parallel_rank()

        # Reverse scan: first match is the longest prefix (parent-chained hashes).
        for h in reversed(request_hashes):
            rank_info = self.hash_to_rank_info.get(h)
            if rank_info:
                # Pick the most recently assigned rank.
                best_rank = max(rank_info, key=rank_info.get)
                return best_rank

        return self.get_next_data_parallel_rank()

    def _update_rank_hashes(self, rank_identity, request_hashes):
        """Record that a rank owns the given hashes.

        Args:
            rank_identity: ZMQ identity of the target rank.
            request_hashes: List of block hashes assigned to this rank.
        """
        self._assignment_counter += 1
        ts = self._assignment_counter
        for h in request_hashes:
            self.hash_to_rank_info.setdefault(h, {})[rank_identity] = ts

    def start(self):
        """
        Starts the main event loop for the coordinator.

        This method runs an infinite loop, continuously listening for incoming
        messages on the ZMQ ROUTER socket. It parses the message header to
        determine the message type and takes appropriate action, such as
        handling new client connections, forwarding requests, broadcasting
        control signals, or processing replies from the engines.
        """
        # Todo [Siddharth]: Make this more robust to handle invalid messages.
        known_clients = set()
        while True:
            sender_identity, serialized_payload = self.router_socket.recv_multipart()

            # Allow for re-registration if connecting to a running coordinator.
            if serialized_payload == b"":
                if sender_identity not in self.identities_of_data_parallel_ranks:
                    self.identities_of_data_parallel_ranks.append(sender_identity)
                continue

            deserialized_payload = msgpack.unpackb(serialized_payload, raw=False)
            header = Headers(deserialized_payload[0])

            if header == Headers.CONNECT:
                if sender_identity in known_clients:
                    logging.info(
                        f"Client {sender_identity} sent a duplicate connect request. Ignoring .."
                    )
                    continue

                # print(f"New client connected: {sender_identity}")
                known_clients.add(sender_identity)
                self.router_socket.send_multipart(
                    [sender_identity, msgpack.packb([Headers.CONNECT_ACK.value], use_bin_type=True)]
                )

            elif header == Headers.SUBMIT_REQUEST:
                # ToDo [Siddharth]: We might want to tokenize the prompt on the
                # assigned data parallel rank for this process instead
                # of the coordinator.

                # Message from a known client
                if sender_identity not in known_clients:
                    logging.info(
                        f"Received message from unknown client {sender_identity}. Ignoring."
                    )
                    continue
                # this is a message from a client.
                # route it to a data parallel rank
                client_request_id, prompt, sampling_params = deserialized_payload[1:]
                # map client request_id to server request_id
                # necessary because multiple clients might have the same request_id.
                request_id = self.next_request_id
                self.next_request_id += 1
                self.request_id_to_client_id[request_id] = sender_identity
                self.request_id_to_client_request_id[request_id] = client_request_id

                # Serialize prompt.
                if isinstance(prompt, (str, list)):
                    pass
                elif isinstance(prompt, torch.Tensor):
                    prompt = prompt.tolist()
                else:
                    raise Exception("specialize for <%s> prompt." % type(prompt).__name__)

                payload = msgpack.packb(
                    [Headers.SUBMIT_REQUEST.value, request_id, prompt, sampling_params],
                    use_bin_type=True,
                )

                request_hashes = self.compute_request_hashes(prompt)
                if (
                    self.prefix_caching_coordinator_policy
                    == PrefixCachingCoordinatorPolicy.FIRST_PREFIX_BLOCK
                ):
                    request_hashes = request_hashes[:1]

                # Account for the fact that some engines may have died.
                for _ in range(len(self.identities_of_data_parallel_ranks)):
                    next_identity = self.get_best_data_parallel_rank(request_hashes)
                    if self._send_to_engine(next_identity, payload):
                        break
                else:
                    # If all engines have died, we are in an abnormal state, and must exit cleanly.
                    logging.error("Coordinator: no reachable engines for request %d", request_id)
                    del self.request_id_to_client_id[request_id]
                    del self.request_id_to_client_request_id[request_id]
                    return

                if request_hashes:
                    self._update_rank_hashes(next_identity, request_hashes)
                if self.schedule_records is not None:
                    self.schedule_records.append(
                        {
                            "request_id": request_id,
                            "rank_index": self.identity_to_rank_index[next_identity],
                            "num_hashes": len(request_hashes),
                        }
                    )

            elif header in (
                Headers.PAUSE,
                Headers.UNPAUSE,
                Headers.SUSPEND,
                Headers.RESUME,
                Headers.SET_GENERATION_EPOCH,
                Headers.STOP,
            ):
                # Start by checking the current state against the control signal.
                if sender_identity not in known_clients:
                    logging.warning("Coordinator: ignoring signal from unknown client.")
                    continue

                if header == Headers.PAUSE:
                    idem_states = (self.CoordinatorState.PAUSED, self.CoordinatorState.SUSPENDED)
                    if self.state == self.CoordinatorState.RUNNING:
                        self.state = self.CoordinatorState.PAUSED
                    elif self.state in idem_states:
                        # Already paused/suspended, ignore redundant PAUSE.
                        continue
                    else:
                        logging.warning("Coordinator: ignoring PAUSE in state %s", self.state)
                        continue
                elif header == Headers.UNPAUSE:
                    if self.state != self.CoordinatorState.PAUSED:
                        logging.warning("Coordinator: ignoring UNPAUSE in state %s", self.state)
                        continue
                    self.state = self.CoordinatorState.RUNNING
                elif header == Headers.SUSPEND:
                    if self.state != self.CoordinatorState.PAUSED:
                        logging.warning("Coordinator: ignoring SUSPEND in state %s", self.state)
                        continue
                    self.state = self.CoordinatorState.SUSPENDED
                elif header == Headers.RESUME:
                    if self.state != self.CoordinatorState.SUSPENDED:
                        logging.warning("Coordinator: ignoring RESUME in state %s", self.state)
                        continue
                    self.state = self.CoordinatorState.PAUSED
                elif header == Headers.STOP:
                    good_states = (self.CoordinatorState.PAUSED, self.CoordinatorState.SUSPENDED)
                    if self.state not in good_states:
                        logging.warning("Coordinator: ignoring STOP in state %s", self.state)
                        continue
                    self.state = self.CoordinatorState.STOPPING

                # Broadcast the control signal if we're in a good state.
                # Forward the full deserialized payload so that data-bearing
                # signals (e.g. SET_GENERATION_EPOCH) retain their arguments.
                broadcast_payload = msgpack.packb(deserialized_payload, use_bin_type=True)
                for data_parallel_rank_id in list(self.identities_of_data_parallel_ranks):
                    self._send_to_engine(data_parallel_rank_id, broadcast_payload)

                # STOP affects engines; reset coordinator to RUNNING to allow future engines.
                if header == Headers.STOP:
                    self.state = self.CoordinatorState.RUNNING

            elif header == Headers.ENGINE_REPLY:
                # This is the output of a single engine step on some data parallel rank.
                assert sender_identity in self.identities_of_data_parallel_ranks
                finished_requests = deserialized_payload[1]

                for finished_request in finished_requests:
                    self.detokenize(finished_request)
                    fid = finished_request["request_id"]
                    client_identity = self.request_id_to_client_id[fid]
                    client_request_identity = self.request_id_to_client_request_id[fid]
                    del self.request_id_to_client_id[fid]
                    del self.request_id_to_client_request_id[fid]

                    self.router_socket.send_multipart(
                        [
                            client_identity,
                            msgpack.packb(
                                [header.value, client_request_identity, finished_request],
                                use_bin_type=True,
                            ),
                        ]
                    )

            elif header == Headers.SHUTDOWN:
                if sender_identity not in known_clients:
                    logging.warning("Coordinator: ignoring signal from unknown client.")
                    continue
                break

            elif header == Headers.DISCONNECT:
                if sender_identity in self.identities_of_data_parallel_ranks:
                    self._remove_engine(sender_identity)

            else:
                raise UnknownHeaderError(header)

    def detokenize(self, finished_request):
        """
        Detokenizes the generated tokens in the finished request.

        This method uses the coordinator's tokenizer to convert the list of
        generated token IDs back into human-readable text.

        Args:
            finished_request (dict): The serialized merged request containing the
                generated tokens to be detokenized. It is modified in place.
        """
        if finished_request["prompt"] is None:
            finished_request["prompt"] = TextGenerationController.detokenize(
                self.tokenizer, finished_request["prompt_tokens"][1], remove_EOD=False
            )
        detokenize_stop_sequence = (finished_request.get("sampling_params", {}) or {}).get(
            "detokenize_stop_sequence", False
        )
        finished_request["generated_text"] = TextGenerationController.detokenize(
            self.tokenizer,
            finished_request["generated_tokens"],
            remove_EOD=not detokenize_stop_sequence,
        )

    @classmethod
    def entrypoint(
        cls,
        pipe_connection: Connection,
        ready_event: Event,
        data_parallel_size: int,
        tokenizer,
        inference_coordinator_port: int | None = None,
        deterministic_mode: bool = False,
        block_size_tokens: int | None = None,
        enable_prefix_caching: bool = False,
        prefix_caching_coordinator_policy: PrefixCachingCoordinatorPolicy = (
            PrefixCachingCoordinatorPolicy.FIRST_PREFIX_BLOCK
        ),
        schedule_output_path: str | None = None,
    ):
        """
        Class method to instantiate and run the coordinator, for use in a separate process.

        This method initializes the coordinator, signals a `ready_event` to indicate
        that it is fully initialized and listening, and then starts the main event loop.

        Args:
            pipe_connection (Connection): A connecting pipe to the parent process.
            ready_event (Event): A threading or multiprocessing event object that is set()
                once the coordinator is ready to accept connections.
            inference_coordinator_port (int): The port to bind to.
            data_parallel_size (int): The number of expected TP-coordinators.
            deterministic_mode (bool): Whether to enable deterministic scheduling.
            block_size_tokens (Optional[int]): Token block size for prefix caching hashing.
            enable_prefix_caching (bool): Whether prefix caching is enabled.
            prefix_caching_coordinator_policy (PrefixCachingCoordinatorPolicy): Routing policy.
            schedule_output_path (Optional[str]): Path to write scheduling decisions JSON.
        """
        coordinator = cls(
            pipe_connection,
            data_parallel_size,
            tokenizer,
            inference_coordinator_port,
            deterministic_mode=deterministic_mode,
            block_size_tokens=block_size_tokens,
            enable_prefix_caching=enable_prefix_caching,
            prefix_caching_coordinator_policy=prefix_caching_coordinator_policy,
            schedule_output_path=schedule_output_path,
        )
        ready_event.set()
        try:
            coordinator.start()
        except KeyboardInterrupt:
            logging.info("Coordinator process interrupted. Exiting...")
        coordinator.stop()
        logging.info("Inference Coordinator: shut down successfully.")

    def stop(self):
        """
        Stops the inference coordinator, performing any necessary cleanup operations.
        """
        if self.schedule_output_path and self.schedule_records:
            schedule_data = {
                "policy": self.prefix_caching_coordinator_policy.value,
                "data_parallel_size": self.data_parallel_size,
                "num_requests": len(self.schedule_records),
                "records": self.schedule_records,
            }
            with open(self.schedule_output_path, "w") as f:
                json.dump(schedule_data, f, indent=2)
        self.router_socket.close()
        self.context.term()


================================================
FILE: megatron/core/inference/engines/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from .abstract_engine import AbstractEngine
from .dynamic_engine import DynamicInferenceEngine, EngineSuspendedError
from .static_engine import StaticInferenceEngine


================================================
FILE: megatron/core/inference/engines/abstract_engine.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from abc import ABC, abstractmethod
from typing import List


class AbstractEngine(ABC):
    @staticmethod
    @abstractmethod
    def generate(self) -> dict:
        """The abstract backend's generate function.

        To define a new backend, implement this and return the outputs as a dictionary.

        Returns:
            dict: The output dictionary containing keys for `input_prompt`, `generated_text`, `generated_tokens`.
        """
        pass


================================================
FILE: megatron/core/inference/engines/async_zmq_communicator.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import asyncio
import socket
import struct

import torch.distributed as dist

try:
    import zmq

    HAVE_ZMQ = True
except ImportError:
    from unittest.mock import MagicMock

    zmq = MagicMock()
    HAVE_ZMQ = False


class AsyncZMQCommunicator:
    """
    An asyncio-friendly communicator abstraction using ZMQ.
    Can be used to implement collective operations like all-reduce,
    and bcast which are asyncio friendly on top of ZMQ sockets.
    Only to be used with small amounts of data (e.g., 1 integer)
    on the CPU.
    """

    def __init__(self, zmq_context: zmq.Context, process_group: dist.ProcessGroup):
        """
        Constructor for AsyncZMQCommunicator. Sets up ZMQ sockets
        for communication among ranks in the given process group.
        Args:
            zmq_context (zmq.Context): ZMQ context to create sockets.
            process_group (dist.ProcessGroup): Process group for communication.
        """
        self.rank = dist.get_rank(process_group)
        self.world_size = dist.get_world_size(process_group)
        self.is_leader = self.rank == 0
        # Get the global rank of the leader (first rank in the process group)
        src_rank = dist.get_process_group_ranks(process_group)[0]

        if self.is_leader:
            local_ip = socket.gethostname()
            self.gather_sock = zmq_context.socket(zmq.PULL)
            self.gather_sock.bind_to_random_port(f"tcp://{local_ip}")
            gather_socket_addr = self.gather_sock.getsockopt_string(zmq.LAST_ENDPOINT)

            self.bcast_sock = zmq_context.socket(zmq.PUB)
            self.bcast_sock.bind_to_random_port(f"tcp://{local_ip}")
            bcast_socket_addr = self.bcast_sock.getsockopt_string(zmq.LAST_ENDPOINT)

            # Share the socket addresses with all peers
            dist.broadcast_object_list(
                [gather_socket_addr, bcast_socket_addr], src=src_rank, group=process_group
            )

        else:
            bcast_output = [None, None]
            dist.broadcast_object_list(bcast_output, src=src_rank, group=process_group)
            gather_socket_addr, bcast_socket_addr = bcast_output
            self.gather_sock = zmq_context.socket(zmq.PUSH)
            self.gather_sock.connect(gather_socket_addr)
            self.bcast_sock = zmq_context.socket(zmq.SUB)
            self.bcast_sock.connect(bcast_socket_addr)
            self.bcast_sock.setsockopt_string(zmq.SUBSCRIBE, "")

    async def all_reduce_max(self, *local_vals: int, async_op=True) -> int | tuple[int, ...]:
        """Element-wise all-reduce max of one or more integers.

        Packs all values into a single message so the communication cost
        is independent of the number of values.

        Returns a single int when called with one argument, otherwise a tuple.
        """
        n = len(local_vals)
        if n == 0:
            raise ValueError("all_reduce_max requires at least one value")

        if self.world_size <= 1:
            return local_vals[0] if n == 1 else local_vals

        fmt = f'!{n}i'
        payload = struct.pack(fmt, *local_vals)

        if self.is_leader:
            rows = [local_vals]

            while len(rows) < self.world_size:
                try:
                    if async_op:
                        msg = self.gather_sock.recv(flags=zmq.NOBLOCK)
                    else:
                        msg = self.gather_sock.recv()
                    rows.append(struct.unpack(fmt, msg))
                except zmq.Again:
                    await asyncio.sleep(0.001)

            maxes = tuple(max(row[i] for row in rows) for i in range(n))
            self.bcast_sock.send(struct.pack(fmt, *maxes))
            if not async_op:
                await asyncio.sleep(
                    0
                )  # Yield control once to ensure that other coroutines can run.
                # This might be needed for colocated RL.
            return maxes[0] if n == 1 else maxes

        else:
            self.gather_sock.send(payload)

            while True:
                try:
                    if async_op:
                        msg = self.bcast_sock.recv(flags=zmq.NOBLOCK)
                    else:
                        msg = self.bcast_sock.recv()
                    result = struct.unpack(fmt, msg)
                    if not async_op:
                        await asyncio.sleep(
                            0
                        )  # Yield control once to ensure that other coroutines can run.
                        # This might be needed for colocated RL.
                    return result[0] if n == 1 else result
                except zmq.Again:
                    await asyncio.sleep(0.001)

    def close(self):
        """
        Close the ZMQ sockets.
        """
        # linger=0: discard unsent messages immediately on close rather than blocking until sent.
        # The ZMQ default is to not allow `close` until all messages have been successfully sent.
        self.gather_sock.close(linger=0)
        self.bcast_sock.close(linger=0)


================================================
FILE: megatron/core/inference/engines/dynamic_engine.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import asyncio
import concurrent.futures
import logging
import multiprocessing
import socket
import struct
import time
import warnings
from collections import deque
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import datetime
from enum import Enum, auto
from itertools import repeat
from typing import Dict, List, Optional, Tuple, Union

import torch
from torch import Tensor
from torch.cuda.nvtx import range_pop, range_push

from megatron.core.inference.config import KVCacheManagementMode
from megatron.core.inference.contexts.dynamic_context import (
    DynamicInferenceContext,
    MaxSequenceLengthOverflowError,
    TokenOverflowError,
)
from megatron.core.inference.data_parallel_inference_coordinator import (
    DataParallelInferenceCoordinator,
)
from megatron.core.inference.engines.abstract_engine import AbstractEngine
from megatron.core.inference.headers import Headers, UnknownHeaderError
from megatron.core.inference.inference_request import (
    DynamicInferenceEvent,
    DynamicInferenceEventType,
    DynamicInferenceRequest,
    DynamicInferenceRequestRecord,
    Status,
)
from megatron.core.inference.sampling_params import SamplingParams
from megatron.core.inference.text_generation_controllers.text_generation_controller import (
    TextGenerationController,
)
from megatron.core.inference.utils import (
    Counter,
    await_process_call,
    set_inference_cuda_graphed_iteration_for_ep_inference,
    unset_inference_cuda_graphed_iteration_for_ep_inference,
)
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.transformer.cuda_graphs import delete_cuda_graphs
from megatron.core.transformer.enums import CudaGraphScope
from megatron.core.transformer.moe.router_replay import RouterReplay, RouterReplayAction
from megatron.core.utils import (
    deprecate_args,
    experimental_api,
    get_asyncio_loop,
    get_pg_rank,
    get_pg_size,
    get_pg_src_rank,
    internal_api,
    trace_async_exceptions,
)

from .async_zmq_communicator import AsyncZMQCommunicator

try:
    from tqdm import tqdm

    HAVE_TQDM = True
except:
    HAVE_TQDM = False

try:
    import zmq

    HAVE_ZMQ = True
except:
    HAVE_ZMQ = False

try:
    import msgpack

    HAVE_MSGPACK = True
except:
    HAVE_MSGPACK = False

try:
    import wandb

    HAVE_WANDB = True
except ImportError:
    HAVE_WANDB = False
    wandb = None

try:
    import psutil

    HAVE_PSUTIL = True
except ImportError:
    HAVE_PSUTIL = False

DEPRECATED_ARGS = [
    "enable_cuda_graph",
    "random_seed",
    "track_paused_request_events",
    "enable_chunked_prefill",
    "inference_logging_step_interval",
    "pg_collection",
]


class EngineState(Enum):
    """State machine for the inference engine."""

    RUNNING = auto()  # Processing requests
    PAUSING = auto()  # PAUSE received; waiting for EP consensus + world barrier
    PAUSED = auto()  # Globally confirmed idle
    UNPAUSING = auto()  # UNPAUSE received; waiting for world barrier
    SUSPENDING = auto()  # SUSPEND received; offloading GPU; waiting for world barrier
    SUSPENDED = auto()  # GPU offloaded, all ranks confirmed
    RESUMING = auto()  # RESUME received; onloading GPU; waiting for world barrier
    RESUMED = auto()  # GPU onloaded, all ranks confirmed; cleared on next SUSPEND
    STOPPING = auto()  # STOP received; futures cancelled; waiting for world barrier
    STOPPED = auto()  # All ranks confirmed; teardown complete


class EngineSuspendedError(Exception):
    """Engine is currently suspended and not performing steps."""

    pass


def format_mem_bytes(mem_bytes):
    """Convert a byte count to a human-readable string in tb, gb, mb, kb, or bytes."""
    for power, suffix in [(4, "tb"), (3, "gb"), (2, "mb"), (1, "kb"), (0, "bytes")]:
        suffix_bytes = 1024**power
        if mem_bytes >= suffix_bytes:
            return "%.1f %s" % (mem_bytes / suffix_bytes, suffix)
    return "%d bytes" % mem_bytes


@dataclass(kw_only=True)
class RequestEntry:
    """Entry in the engine's `self.requests` dict."""

    record: DynamicInferenceRequestRecord
    future: asyncio.Future


# pylint: disable=line-too-long
@experimental_api
class DynamicInferenceEngine(AbstractEngine):
    """The dynamic inference engine.

    This engine allows requests of varying length to be dynamically added and
    removed in each inference step. In contrast to the static engine that has a
    set batch size and sequence length during the forward pass, each request in
    the dynamic engine can have different *current* prompt and output length at
    any given step, and the processing is restricted only by a max number of total
    tokens across all requests.

    Args:
        text_generation_controller (TextGenerationController): A text generation
            controller that will be used to define how to preprocess prompts, generate
            outputs and detokenizer the output tokens.
        inference_context (DynamicInferenceContext): Context for managing in-flight
            batching and a dynamic block-level KV cache (similar to paged attention).
    """

    # Map stable states to their corresponding asyncio events.
    _STATE_EVENTS = (
        EngineState.RUNNING,
        EngineState.PAUSED,
        EngineState.SUSPENDED,
        EngineState.RESUMED,
        EngineState.STOPPED,
    )

    @deprecate_args(
        *DEPRECATED_ARGS,
        message="Argument `{name}` has been deprecated. Only pass `controller` and `context`",
    )
    def __init__(self, controller: TextGenerationController, context: DynamicInferenceContext):

        assert isinstance(
            controller, TextGenerationController
        ), f"controller must be a TextGenerationController, got {type(controller)}"
        assert isinstance(
            context, DynamicInferenceContext
        ), f"context must be a DynamicInferenceContext, got {type(context)}"

        model_config = controller.inference_wrapped_model.model.config
        inference_config = context.config

        if inference_config.pg_collection is not None:
            self.pg_collection = inference_config.pg_collection
        else:
            self.pg_collection = ProcessGroupCollection.use_mpu_process_groups()

        # Initialization options.
        self.controller = controller
        self.context = context

        self.num_speculative_tokens = inference_config.num_speculative_tokens
        self.materialize_only_last_token_logits = (
            inference_config.materialize_only_last_token_logits
        )

        assert self.num_speculative_tokens >= 0, "Number of speculative tokens must be non-negative"

        if self.num_speculative_tokens > 0:
            assert (
                self.num_speculative_tokens <= self.controller.num_mtp_heads
            ), f"Number of speculative tokens {self.num_speculative_tokens} must be less than or equal to number of MTP heads {self.controller.num_mtp_heads}"
            assert (
                not self.materialize_only_last_token_logits
            ), "materialize_only_last_token_logits must be False when num_speculative_tokens > 0"

        self.track_paused_request_events = inference_config.track_paused_request_events
        self.track_generated_token_events = inference_config.track_generated_token_events
        self.enable_chunked_prefill = inference_config.enable_chunked_prefill
        self.metrics_writer = inference_config.metrics_writer
        self.logging_step_interval = inference_config.logging_step_interval
        self.unified_memory_level = inference_config.unified_memory_level
        self.use_synchronous_zmq_collectives = inference_config.use_synchronous_zmq_collectives
        self.cuda_graph_impl = model_config.cuda_graph_impl
        self.cuda_graph_scope = model_config.cuda_graph_scope
        # Initialize engine.
        self.reset()

        # Set callback for getting stop word finished request IDs
        self.controller.set_stop_word_finished_ids_callback(
            self._get_and_clear_stop_word_finished_ids
        )

        # Configure wandb to use separate step counter for inference metrics (only once)
        if self.logging_step_interval > 0 and self.metrics_writer is not None:
            logging.info(
                f"\033[1;93m[INFERENCE]\033[0m "
                f"\033[1;95mLogging inference metrics to wandb (rank {self.rank})\033[0m"
            )
            if HAVE_WANDB and self.metrics_writer.__name__ == "wandb":
                # Make all inference/* metrics use inference_step as their x-axis
                # This allows inference and training to have independent step counters
                context.metrics_writer.define_metric(
                    "inference/*", step_metric="inference/inference_step"
                )
                # Initialize inference step offset by querying existing run history
                self.inference_step_offset = 0
                if wandb.run is not None:
                    api_run = wandb.Api().run(
                        f"{wandb.run.entity}/{wandb.run.project}/{wandb.run.id}"
                    )
                    max_step = 0
                    for row in api_run.scan_history(keys=["inference/inference_step"]):
                        val = row.get("inference/inference_step")
                        if isinstance(val, (int, float)) and int(val) > max_step:
                            max_step = int(val)
                    self.inference_step_offset = int(max_step)

        # Create cuda graphs.
        self.create_cuda_graphs()

    def reset(self) -> None:
        """Reset by removing all requests and reset all state."""

        self.context.reset()

        # Request state.
        self.request_counter = Counter()
        self.finished_request_count = 0
        self.evicted_request_count = 0

        self.requests: Dict[int, RequestEntry] = {}
        self.waiting_request_ids = deque()
        self.failed_request_ids = []
        self._generation_epoch: Optional[int] = None
        # Track requests that should stop due to stop words (detected in post_process_requests)
        self.stop_word_finished_request_ids: set[int] = set()
        # Track requests currently being finished due to stop words (to skip extra token)
        self.stop_word_being_finished_ids: set[int] = set()

        # Timing and logging variables.
        self.rank = torch.distributed.get_rank()
        self.step_start_event = torch.cuda.Event(enable_timing=True)
        self.step_end_event = torch.cuda.Event(enable_timing=True)
        self.capture_stats = None

        # Runtime state.
        self._loop = get_asyncio_loop(getattr(self, "_loop", None))
        self._cond = asyncio.Condition()
        self._state_events = {k: asyncio.Event() for k in self._STATE_EVENTS}
        self.state = EngineState.RUNNING
        self._state_events[EngineState.RUNNING].set()
        self._pending_signals = deque()

        self.resume_request_ids = None

        # Speculative decoding acceptance tracking.
        self._spec_tokens_proposed = 0
        self._spec_tokens_accepted = 0
        self._spec_steps = 0

        # Prefix caching tracking.
        self._prefix_cache_hits = 0
        self._prefix_cache_blocks_matched = 0
        self._prefix_coordination_waits = 0

        # Coordinator state.
        self.use_coordinator = False

    async def wait_until(self, state: EngineState):
        """Wait until the engine reaches the given state.

        Only stable states (RUNNING, PAUSED, SUSPENDED, RESUMED,
        STOPPED) are supported.  Transient states (PAUSING, SUSPENDING,
        RESUMING, STOPPING) are not directly waitable.
        """
        event = self._state_events.get(state)
        if event is None:
            raise ValueError(f"Cannot wait for transient state {state}")
        await event.wait()

    def create_cuda_graphs(self, reset_context: bool = True):
        """Create cuda graphs.

        This method iterates the dynamic context's `cuda_graph_request_counts`
        to record and capture cuda graphs.

        Args:
            reset_context (bool): Whether to reset the context after building cuda graphs.
        """

        if self.cuda_graph_impl != "local":
            return

        if (
            CudaGraphScope.full_iteration in self.cuda_graph_scope
            and CudaGraphScope.full_iteration_inference not in self.cuda_graph_scope
        ):
            warnings.warn(
                "\n\n*** WARNING: 'full_iteration' CUDA graph scope used during inference! "
                "This will not create inference CUDA graphs. Use '--cuda-graph-scope=full_iteration_inference' instead. ***\n"
            )

        context = self.context
        controller = self.controller

        time_start = time.time()
        mem_stats_start = torch.cuda.memory_stats()

        logging.info("> dynamic_engine.py: building cuda graphs for ")
        for graph in context.cuda_graph_batch_dimensions_list:
            logging.info(graph)

        # Enable inference dispatcher for EP during graph capture
        model_config = controller.inference_wrapped_model.model.config
        is_inference_optimized_ep = (
            model_config.transformer_impl == "inference_optimized"
            and model_config.expert_model_parallel_size > 1
        )
        if is_inference_optimized_ep:
            unwrapped_model = controller.inference_wrapped_model.model
            set_inference_cuda_graphed_iteration_for_ep_inference(unwrapped_model)

        tbar = enumerate(context.cuda_graph_batch_dimensions_list)
        if HAVE_TQDM:
            tbar = tqdm(tbar, total=len(context.cuda_graph_batch_dimensions_list))
        for tbar_idx, cuda_graph_batch_dimension in tbar:
            input_ids, position_ids = self.controller._dynamic_step_context_init(
                construct_graph_dimensions=cuda_graph_batch_dimension
            )
            # Progress.
            tbar_str = f"cuda graph warmup - {cuda_graph_batch_dimension}"
            if HAVE_TQDM:
                tbar.set_description(tbar_str)
            else:
                logging.info(
                    f"{tbar_idx}/{len(context.cuda_graph_batch_dimensions_list)}. {tbar_str}"
                )

            # Enable routing recording during warmup if routing replay is enabled.
            # This ensures the record_indices copy operation is captured in the CUDA graph.
            model_config = controller.inference_wrapped_model.model.config
            if model_config.moe_enable_routing_replay:
                RouterReplay.set_global_router_replay_action(RouterReplayAction.RECORD)

            # Forward pass -> logits.
            controller._dynamic_step_forward_logits(input_ids, position_ids)

            context.reset()

        # Disable inference dispatcher after graph capture
        if is_inference_optimized_ep:
            unset_inference_cuda_graphed_iteration_for_ep_inference(unwrapped_model)

        # Memory usage.
        time_end = time.time()
        mem_stats_end = torch.cuda.memory_stats()
        capture_stats = {
            "time": time_end - time_start,
            "allocated_bytes": (
                mem_stats_end["allocated_bytes.all.current"]
                - mem_stats_start["allocated_bytes.all.current"]
            ),
            "reserved_bytes": (
                mem_stats_end["reserved_bytes.all.current"]
                - mem_stats_start["reserved_bytes.all.current"]
            ),
        }
        logging.info(
            "> built cuda graph(s) in %.2f sec, with total memory usage: "
            "allocated %s, reserved %s.",
            capture_stats["time"],
            format_mem_bytes(capture_stats["allocated_bytes"]),
            format_mem_bytes(capture_stats["reserved_bytes"]),
        )

        self.capture_stats = capture_stats

    @internal_api
    async def start_listening_to_data_parallel_coordinator(
        self,
        inference_coordinator_port: int | None = None,
        launch_inference_coordinator: bool = True,
        *,
        coordinator_schedule_output_path: str | None = None,
        loop: Optional[asyncio.AbstractEventLoop] = None,
    ):
        """Initializes ZMQ communication to connect the engine with an inference coordinator.

        This asynchronous method sets up the distributed communication infrastructure
        that allows this inference engine to act as a worker under a central
        `InferenceCoordinator`. It configures different ZMQ socket patterns
        based on the rank's role within the distributed topology.

        Note that this method must be called on all ranks, as it uses blocking torch broadcasts.

        The setup involves two primary roles within each data-parallel group:
        1.  **MP Coordinator (TP_rank=0, PP_rank=0)**: This rank connects directly
            to the central coordinator via a ZMQ `DEALER` socket. It receives
            requests and uses a ZMQ `PUB` (publisher) socket to broadcast them
            to all other ranks within its model-parallel (MP) group.
        2.  **MP Workers (all other ranks)**: These ranks use ZMQ `SUB` (subscriber)
            sockets to listen for requests broadcast by their local MP Coordinator.

        This architecture uses TCP sockets for both inter-node and intra-node broadcasts
        within an MP group.

        Finally, after setting up the communication channels and ensuring all ranks
        are synchronized, this method starts the main engine processing loop
        (`self.run_engine`) as a background asyncio task.

        Args:
            inference_coordinator_port (int | None): The network port where the central
                `InferenceCoordinator` is or will be listening.
                If None, a random available port will be selected.
                If not None, the coordinator will attempt to bind to this port, but should it
                not succeed (e.g., if the port is already in use), it may bind to a different port.
                The actual port used is returned by this method.
            launch_inference_coordinator (bool, optional): If True, the global rank 0
                process will spawn and manage the `InferenceCoordinator`
                process. Defaults to True.

        Returns:
            inference_coordinator_addresss (str): The network address of the central
                `InferenceCoordinator`, which may not have the same port as what the user requested
                with `inference_coordinator_port`.
        """

        assert HAVE_ZMQ, (
            "please install the pyzmq library to use InferenceCoordinator\n" "pip install pyzmq"
        )
        assert HAVE_MSGPACK, (
            "please install the messagepack library to use InferenceCoordinator\n"
            "pip install msgpack"
        )

        self.zmq_context = zmq.Context.instance()
        self.zmq_sockets = []  # keep track of all sockets created by this engine

        # Get world info.
        dp_group = self.pg_collection.dp
        dp_src = get_pg_src_rank(dp_group)
        dp_size = get_pg_size(self.pg_collection.dp)
        dp_rank = get_pg_rank(self.pg_collection.dp)

        mp_group = self.pg_collection.mp
        mp_src = get_pg_src_rank(mp_group)
        tp_rank = get_pg_rank(self.pg_collection.tp)
        pp_rank = get_pg_rank(self.pg_collection.pp)

        self.is_mp_coordinator = tp_rank == 0 and pp_rank == 0
        self.is_dp_coordinator = (dp_rank == 0) and self.is_mp_coordinator

        local_ip = socket.gethostname()

        # Spawn a DP coordinator process and get the connection info.
        if launch_inference_coordinator and self.is_dp_coordinator:
            spawn_context = multiprocessing.get_context('spawn')
            deterministic_mode = torch.are_deterministic_algorithms_enabled()
            dp_pipe, dp_process_pipe = spawn_context.Pipe()
            coordinator_ready_event = spawn_context.Event()
            self.inference_coordinator_process = spawn_context.Process(
                target=DataParallelInferenceCoordinator.entrypoint,
                args=(
                    dp_process_pipe,
                    coordinator_ready_event,
                    get_pg_size(self.pg_collection.dp),
                    self.controller.tokenizer,
                    inference_coordinator_port,
                    deterministic_mode,
                    self.context.block_size_tokens,
                    self.context.enable_prefix_caching,
                    self.context.prefix_caching_coordinator_policy,
                    coordinator_schedule_output_path,
                ),
            )
            self.inference_coordinator_process.start()
            await await_process_call(dp_pipe.poll, self.inference_coordinator_process)
            dp_addr = dp_pipe.recv()
            dp_pipe.close()

            # Check if the port number is not inference_coordinator_port
            actual_port = int(dp_addr.rsplit(":", 1)[-1])
            if inference_coordinator_port != None and actual_port != inference_coordinator_port:
                logging.warning(
                    f"Requested InferenceCoordinator port {inference_coordinator_port} "
                    f"but got port {actual_port} instead. This happens if the request port "
                    f"is already in use."
                )
        elif not launch_inference_coordinator:
            dp_addr = f"tcp://{local_ip}:{inference_coordinator_port}"
        else:
            dp_addr = None

        # Find available ports for MP and bind to them.
        if self.is_mp_coordinator:
            mp_req_sock = self.zmq_context.socket(zmq.PUB)
            mp_req_sock.bind_to_random_port(f"tcp://{local_ip}")
            mp_req_addr = mp_req_sock.getsockopt_string(zmq.LAST_ENDPOINT)

            mp_len_sock = self.zmq_context.socket(zmq.PUB)
            mp_len_sock.bind_to_random_port(f"tcp://{local_ip}")
            mp_len_addr = mp_len_sock.getsockopt_string(zmq.LAST_ENDPOINT)
        else:
            mp_req_addr = None
            mp_len_addr = None

        # Broadcast addresses to respective ranks.
        bcast = [dp_addr]
        torch.distributed.broadcast_object_list(bcast, src=dp_src, group=dp_group)
        [dp_addr] = bcast
        bcast = [mp_req_addr, mp_len_addr]
        torch.distributed.broadcast_object_list(bcast, src=mp_src, group=mp_group)
        [mp_req_addr, mp_len_addr] = bcast

        identity = f'mp-coord-{dp_rank}'
        if self.is_mp_coordinator:
            # 1. Create dealer sockets where tp_rank = 0 and pp_rank = 0
            #    These will receive requests from an InferenceCoordinator.
            self.socket_for_receiving_requests = self.zmq_context.socket(zmq.DEALER)

            self.socket_for_receiving_requests.setsockopt(zmq.IDENTITY, identity.encode('utf-8'))
            self.socket_for_receiving_requests.connect(dp_addr)

            # send empty string. this is used to register with the coordinator.
            self.socket_for_receiving_requests.send(b"")

            # 2. Create a publisher socket. This is used to publish or broadcast
            #    requests within the model parallel group
            self.model_parallel_publisher_socket = mp_req_sock

            # 3. Create another publisher socket to broadcast the number of messages to receive.
            self.model_parallel_num_msgs_publisher_socket = mp_len_sock
            self.zmq_sockets += [
                self.socket_for_receiving_requests,
                self.model_parallel_num_msgs_publisher_socket,
                self.model_parallel_publisher_socket,
            ]
        # All MP ranks subscribe to the two publisher sockets
        self.model_parallel_subscriber_socket = self.zmq_context.socket(zmq.SUB)
        self.model_parallel_subscriber_socket.connect(mp_req_addr)
        self.model_parallel_subscriber_socket.setsockopt_string(zmq.SUBSCRIBE, "")

        self.model_parallel_num_msgs_subscriber_socket = self.zmq_context.socket(zmq.SUB)
        self.model_parallel_num_msgs_subscriber_socket.connect(mp_len_addr)
        self.model_parallel_num_msgs_subscriber_socket.setsockopt_string(zmq.SUBSCRIBE, "")

        self.zmq_sockets += [
            self.model_parallel_subscriber_socket,
            self.model_parallel_num_msgs_subscriber_socket,
        ]

        torch.distributed.barrier(mp_group)

        # initialize zmq-based EP communicator
        self.ep_rank = get_pg_rank(self.pg_collection.ep)
        self.ep_world_size = get_pg_size(self.pg_collection.ep)
        if self.ep_world_size > 1:
            self.expert_parallel_zmq_communicator = AsyncZMQCommunicator(
                self.zmq_context, process_group=self.pg_collection.ep
            )

        # initialize zmq-based world communicator for consensus barriers
        total_world_size = torch.distributed.get_world_size()
        if total_world_size > 1:
            self.world_zmq_communicator = AsyncZMQCommunicator(self.zmq_context, process_group=None)

        if launch_inference_coordinator and self.is_dp_coordinator:
            await await_process_call(
                coordinator_ready_event.wait, self.inference_coordinator_process
            )
            logging.info("Inference co-ordinator is ready to receive requests!")
            logging.info(f"Data parallel coordinator can be found at {dp_addr}")

        # Finally run the engine infinite loop.
        loop = get_asyncio_loop(loop)
        self.engine_loop_task = loop.create_task(self.run_engine_with_coordinator(loop=loop))

        return dp_addr

    @contextmanager
    @staticmethod
    def suspend_resume_ctx(key: str, *, unified_memory_level: int) -> None:
        """Context manager for of suspending and resuming the engine.

        This context manager records the time and memory usage when suspending
        and resuming the context. TODO(@lmcafee): add argument to optionally
        return nullcontext, to avoid overhead.

        Args:
            key (str): Key that identifies caller (e.g., 'suspend' or 'resume').

        Return:
            None.
        """

        try:

            start_mem = torch.cuda.memory_stats()
            start_time = time.time()
            range_push(f"{key}-inference-context")
            torch.cuda.synchronize()

            yield

        finally:

            range_pop()
            end_time = time.time()

            end_mem = torch.cuda.memory_stats()
            start_mem_alloc = start_mem["allocated_bytes.all.current"]
            end_mem_alloc = end_mem["allocated_bytes.all.current"]
            start_mem_res = start_mem["reserved_bytes.all.current"]
            end_mem_res = end_mem["reserved_bytes.all.current"]

            rank_str = torch.distributed.get_rank()
            dir_str = "deallocating" if end_mem_alloc <= start_mem_alloc else "allocating"
            relative_time_str = f"{end_time - start_time:.3f} sec"
            relative_mem_str = f"{abs(start_mem_alloc - end_mem_alloc) / 1024**3:.1f} gb"

            if HAVE_PSUTIL:
                process = psutil.Process()
                mem_info = process.memory_info()
                cpu_mem_str = f"{mem_info.rss / 1024**3:.1f} gb"
            else:
                cpu_mem_str = "--"

            total_mem_str = ", ".join(
                (
                    f"cpu: {cpu_mem_str}",
                    f"gpu: alloc {end_mem_alloc / 1024**3:.1f} gb",
                    f"res {end_mem_res / 1024**3:.1f} gb",
                )
            )
            logging.info(
                f"[rank {rank_str}] dynamic engine {key}, "
                f"unified {unified_memory_level}, "
                f"{dir_str} "
                f"{relative_mem_str} in {relative_time_str} ... "
                f"abs mem usage: {total_mem_str}"
            )

    def suspend(self):
        """Suspend engine by deallocating context's GPU state."""

        # Skip if already suspended or in the process of suspending.
        if self.state in (EngineState.SUSPENDED, EngineState.SUSPENDING):
            return

        # Deallocate context tensors.
        with self.__class__.suspend_resume_ctx(
            "suspended", unified_memory_level=self.unified_memory_level
        ):
            self.context.deallocate_inference_state_buffers()

        if (
            self.context.kv_cache_management_mode != KVCacheManagementMode.PERSIST
            and not self.context.static_kv_memory_pointers
        ):
            delete_cuda_graphs()

        # Build the list of requests to re-add on resume.
        # All waiting requests are always included; active requests are included
        # only if they are marked for recompute (their KV cache will be gone).
        waiting_request_ids = list(self.waiting_request_ids)
        active_request_ids = set(self.requests.keys()) - set(waiting_request_ids)
        if self.context.kv_cache_management_mode == KVCacheManagementMode.RECOMPUTE:
            recompute_active_ids = active_request_ids

            # Reset any partially prefilled requests so they recompute from the start
            for req_id in [*waiting_request_ids, *recompute_active_ids]:
                req = self.get_request(req_id)
                if req.finished_chunk_token_count > 0:
                    req.remaining_prompt_tokens = req.prompt_tokens
                    req.finished_chunk_token_count = 0

            # Reset the chunked prefill request id
            self.chunked_prefill_request_id = -1
        else:
            recompute_active_ids = set()
        self.resume_request_ids = [*recompute_active_ids, *waiting_request_ids]
        self.waiting_request_ids.clear()

        # Checkpoint active requests that are marked for recompute.
        for request_id in recompute_active_ids:
            self.requests[request_id].record.checkpoint()

        # If we are not using the inference coordinator, we need to manually handle state.
        if not self.use_coordinator:
            self.state = EngineState.SUSPENDED

    def resume(self):
        """Resume engine by reallocating context's GPU state."""

        # Skip if not suspended or in the process of suspending.
        if self.state not in (EngineState.SUSPENDED, EngineState.SUSPENDING):
            return

        # Resume.
        with self.__class__.suspend_resume_ctx(
            "resumed", unified_memory_level=self.unified_memory_level
        ):

            # Allocate context tensors.
            alloc_time = time.time()
            torch.cuda.synchronize()
            self.context.reinitialize_inference_state_buffers()
            torch.cuda.synchronize()
            alloc_time = time.time() - alloc_time

            capture_time = time.time()
            if (
                self.context.kv_cache_management_mode != KVCacheManagementMode.PERSIST
                and not self.context.static_kv_memory_pointers
            ):
                self.create_cuda_graphs()
            capture_time = time.time() - capture_time

            # Re-add requests saved during suspend.
            add_time = time.time()
            torch.cuda.synchronize()
            for request_id in self.resume_request_ids:
                self._add_request(self.get_request(request_id))

            # Ensure chunked prefill request remains at the head of the waiting queue
            if self.context.chunked_prefill_request_id != -1:
                if self.context.chunked_prefill_request_id in self.waiting_request_ids:
                    self.waiting_request_ids.remove(self.context.chunked_prefill_request_id)
                    self.waiting_request_ids.appendleft(self.context.chunked_prefill_request_id)

            torch.cuda.synchronize()
            add_time = time.time() - add_time

        # Print inner timing (must be outside context manager above for correct formatting).
        logging.info(
            "    > "
            + ", ".join(
                (
                    f"inner timing: alloc {alloc_time:.3f}",
                    f"add {add_time:.3f}",
                    f"capture {capture_time:.3f}.",
                )
            )
        )

        # If we are not using the inference coordinator, we need to manually handle state.
        if not self.use_coordinator:
            self.state = EngineState.RUNNING
            # Notify the condition variable that run_engine() waits on.
            self._loop.call_soon_threadsafe(
                asyncio.create_task, self._notify_cond_for_new_request()
            )

    @trace_async_exceptions
    async def _notify_cond_for_new_request(self):
        """Helper function to notify condition variable when a new request is added."""
        async with self._cond:
            self._cond.notify_all()

    def _handle_failed_request(self, request_id: int):
        """Handle a failed request by sending the reply immediately.

        The request is added to failed_request_ids so that the next bookkeeping pass can return it.
        """
        request_entry = self.requests[request_id]
        request = request_entry.record[-1]

        if self.rank == 0:
            warnings.warn(
                f"Request {request_id} failed to be added to the engine due to errors. "
                f"Prompt Tokens: {len(request.prompt_tokens)} "
                f"Tokens to generate: {request.sampling_params.num_tokens_to_generate} "
                f"Max sequence length: {self.context.max_sequence_length} "
                f"Chunked prefill enabled: {self.enable_chunked_prefill}"
            )

        request.status = Status.FAILED
        request.add_event_fail()
        self.failed_request_ids.append(request_id)

        # Send the reply immediately, because it may never get a chance to be sent again.
        if self.use_coordinator and self.is_mp_coordinator:
            payload = msgpack.packb(
                [Headers.ENGINE_REPLY.value, [request_entry.record.merge().serialize()]],
                use_bin_type=True,
            )
            self.socket_for_receiving_requests.send(payload)
        elif not self.use_coordinator:
            if request.prompt is None:
                request.prompt = self.controller.tokenizer.detokenize(
                    request.prompt_tokens.tolist()
                )
            if request.generated_tokens:
                request.generated_text = self.controller.tokenizer.detokenize(
                    request.generated_tokens
                )
            else:
                request.generated_text = ""
        request_entry.future.set_result(request_entry.record)

    def has_unfinished_requests(self) -> bool:
        """Test if context contains unfinished requests."""
        return self.context.has_unfinished_requests() or len(self.waiting_request_ids) > 0

    def get_request(self, request_id: int) -> DynamicInferenceRequest:
        """Get most recent request from a request record.

        Args:
            request_id (int): Request id.

        Returns:
            (DynamicInferenceRequest) The most recent request in the record.
        """
        return self.requests[request_id].record[-1]

    def _add_request(
        self, request: DynamicInferenceRequest
    ) -> asyncio.Future[DynamicInferenceRequest]:

        request_id = request.request_id

        # Add request to self.requests. If the engine has previously been
        # suspended, then the request may already exist.
        if request_id not in self.requests:
            self.requests[request_id] = RequestEntry(
                record=DynamicInferenceRequestRecord.from_request(request),
                future=self._loop.create_future(),
            )
            request.add_event_add_engine()  # Record when request enters engine

            # Stamp new request with the current generation epoch.
            if self._generation_epoch is not None:
                epoch = self._generation_epoch
                request.policy_epoch = [(0, epoch)]
                request.kv_cache_epoch = [(0, epoch)]

        if request.status is None:
            request.status = Status.ACTIVE_AND_GENERATING_TOKENS

        assert (
            request.sampling_params.num_tokens_to_generate is None
            or request.sampling_params.num_tokens_total is None
        )
        if request.sampling_params.top_n_logprobs > 0:
            assert (
                request.sampling_params.return_log_probs
            ), "top_n_logprobs requires sampling_params.return_log_probs to be True"
        if (
            request.sampling_params.return_log_probs
            and not request.sampling_params.skip_prompt_log_probs
        ):
            assert not self.materialize_only_last_token_logits, (
                "Prompt log probs cannot be calculated if only last token logits are materialized. "
                "Set materialize_only_last_token_logits to False in DynamicInferenceContext "
                "or skip_prompt_log_probs to True in SamplingParams."
            )

        if request.sampling_params.num_tokens_total is not None:
            request.sampling_params.num_tokens_to_generate = (
                request.sampling_params.num_tokens_total - len(request.prompt_tokens)
            )
            request.sampling_params.num_tokens_total = None
        if request.sampling_params.num_tokens_to_generate is None:
            request.sampling_params.num_tokens_to_generate = self.context.max_sequence_length - len(
                request.prompt_tokens
            )
        if request.sampling_params.termination_id is None:
            try:
                eod = self.controller.tokenizer.eod
            except AttributeError:
                if self.rank == 0:
                    warnings.warn(
                        "Termination ID not specified, and tokenizer does not define eod."
                        "Defaulting to not using termination id."
                    )
                eod = -1
            request.sampling_params.termination_id = eod

        if (
            len(request.prompt_tokens) + request.sampling_params.num_tokens_to_generate
            > self.context.max_sequence_length
        ) or (request.sampling_params.num_tokens_to_generate < 0):
            request.status = Status.FAILED
            request.add_event_error_nontransient(MaxSequenceLengthOverflowError(request_id))

        if len(request.prompt_tokens) > self.context.max_tokens and not self.enable_chunked_prefill:
            request.status = Status.FAILED
            request.add_event_error_nontransient(TokenOverflowError(request_id))

        # Tokenize stop words if provided
        if request.sampling_params.stop_words:
            stop_word_ids = [
                self.controller.tokenize_prompt(self.controller.tokenizer, stop_word, add_BOS=False)
                for stop_word in request.sampling_params.stop_words
            ]
            request.stop_word_ids = stop_word_ids

        if request.status != Status.FAILED:
            self.waiting_request_ids.append(request_id)
        else:
            self._handle_failed_request(request_id)

        return self.requests[request_id].future

    def add_request(
        self,
        request_id: int,
        prompt: Union[str, List[int], Tensor],
        sampling_params: Optional[SamplingParams] = None,
    ) -> asyncio.Future[DynamicInferenceRequest]:
        """Add request to inference context.

        Args:
            request_id (int): Unique ID of request.
            prompt (Union[str, Tensor]): Prompt as either a text string or token IDs.
            sampling_params (Optional[SamplingParams]): Sampling parameters for the request.

        Return:
            Returns an asyncio `Future[DynamicInferenceRequest]` for the user to wait on.
        """
        prompt_str = None
        # Tokenize prompt if text.
        if isinstance(prompt, str):
            # Tokenize prompt if text. Support legacy single-arg mocks.
            prompt_str = prompt
            try:
                prompt_token_ids = self.controller.tokenize_prompt(
                    self.controller.tokenizer, prompt, sampling_params.add_BOS
                )
            except TypeError:
                prompt_token_ids = self.controller.tokenize_prompt(
                    self.controller.tokenizer, prompt
                )
            tokens = torch.tensor(
                prompt_token_ids, dtype=torch.int64, device=torch.cuda.current_device()
            )
        elif isinstance(prompt, list):
            # Convert List[int] -> Tensor.
            tokens = torch.tensor(prompt, dtype=torch.int64, device=torch.cuda.current_device())
        elif isinstance(prompt, torch.Tensor):
            # Prompt already tokenized.
            assert prompt.dtype == torch.int64, prompt.dtype
            assert prompt.device == torch.device(
                f"cuda:{torch.cuda.current_device()}"
            ), prompt.device
            tokens = prompt

        else:
            raise Exception("specialize for <%s>." % type(prompt).__name__)

        # Initialize request.
        request = DynamicInferenceRequest(
            request_id=request_id,
            prompt=prompt_str,
            prompt_tokens=tokens,
            sampling_params=sampling_params,
            block_size_tokens=self.context.block_size_tokens,
            enable_prefix_caching=self.context.enable_prefix_caching,
        )

        # Add request.
        return self._add_request(request)

    def post_process_requests(
        self,
        request_ids: torch.Tensor,
        finished_request_ids: torch.Tensor,
        evict_request_ids: torch.Tensor,
        step_time: float,
        sample: torch.Tensor,
        accepted_tokens: torch.Tensor,
        log_probs: torch.Tensor,
        top_n_logprobs: Optional[Dict[int, List[Tuple[torch.Tensor, torch.Tensor]]]] = None,
        routing_indices_per_request: Optional[Dict[int, torch.Tensor]] = None,
        pre_fwd_active_token_count: Optional[int] = None,
        pre_fwd_step_count: Optional[int] = None,
    ) -> Tuple[List[DynamicInferenceRequest], List[DynamicInferenceRequest]]:
        """
        Handles post-processing for requests after a step.

        Args:
            request_ids (torch.Tensor): A list of request_ids
            finished_request_ids (torch.Tensor): A list of finished request ids
            evict_request_ids (torch.Tensor): A list of evicted request ids.
            step_time (float): The latency of the last step
            sample: Tensor: The newly generated token for each request
            accepted_tokens: Tensor: The additional accepted tokens for each request
            log_probs: (List): Log probs for each request
            top_n_logprobs: (Dict): Top-n log probs for each request. Maps request_idx to
                list of (top_n_logprobs, top_n_indices) tuples.
            routing_indices_per_request: (Dict[int, Tensor]): MoE routing indices
                pre-mapped by request_id. Each value is a tensor of shape
                [num_tokens_this_step, num_layers, topk].

        Returns:
            A list of active requests and completed requests as `DynamicInferenceRequest` objects
        """
        active_request_ids: list[int] = []
        finished_request_ids = set(finished_request_ids.tolist())
        finished_request_records: list[DynamicInferenceRequestRecord] = []
        self.finished_request_count += len(finished_request_ids)
        if evict_request_ids is not None:
            self.evicted_request_count += evict_request_ids.numel()

        log_probs_iter = log_probs if log_probs else repeat(None)
        block_allocator = self.context.kv_block_allocator

        # Pre-compute step-level block stats (before the per-request loop)
        if self.track_generated_token_events:
            blocks_allocated = block_allocator.total_count - block_allocator.total_avail
            if block_allocator.enable_prefix_caching:
                blocks_hashed_active = int((block_allocator.block_ref_counts > 0).sum().item())
                blocks_ref_count = block_allocator.block_ref_counts.sum().item()
            else:
                blocks_hashed_active = blocks_allocated
                blocks_ref_count = None

        # When accepted_tokens is None (no speculative decoding), use repeat([]) to provide
        # empty lists for each request, so the zip produces the correct number of iterations
        accepted_tokens_iter = repeat([]) if accepted_tokens is None else accepted_tokens.tolist()

        if self.num_speculative_tokens > 0 and accepted_tokens is not None:
            self._spec_steps += 1

        for req_idx, (request_id, tokens, accepted_tokens_list, request_log_probs) in enumerate(
            zip(request_ids.tolist(), sample.tolist(), accepted_tokens_iter, log_probs_iter)
        ):

            # Ensure tokens is always a list for consistent handling
            if not isinstance(tokens, list):
                tokens = [tokens]

            request: DynamicInferenceRequest = self.get_request(request_id)

            if self.num_speculative_tokens > 0:
                accepted_tokens = list(filter(lambda tok: tok != -1, accepted_tokens_list))

                # The order `accepted_tokens + tokens` is correct here.
                # `accepted_tokens` contains the sequence of
                # successfully verified draft tokens. `tokens` (from `sample`) is the
                # brand new token generated by the target model based on that accepted prefix.
                # Therefore, the newly sampled token must go at the end of the sequence.
                tokens = accepted_tokens + tokens

            num_stop_word_trim = 0
            if request_id != self.context.chunked_prefill_request_id:
                # Skip appending token for requests being finished due to stop words
                # (they already have their final token from the previous step)
                # If the request already has more tokens, then we only append as much as is necessary
                if (
                    len(request.generated_tokens) + len(tokens)
                    >= request.sampling_params.num_tokens_to_generate
                ):
                    tokens = tokens[
                        : request.sampling_params.num_tokens_to_generate
                        - len(request.generated_tokens)
                    ]
                if request_id not in self.stop_word_being_finished_ids:
                    is_first_token = len(request.generated_tokens) == 0
                    request.generated_tokens += tokens
                    first_token_event = None
                    if self.track_generated_token_events:
                        for token in tokens:
                            if block_allocator.enable_prefix_caching:
                                event = request.add_event_generated_token(
                                    token,
                                    blocks_total=block_allocator.total_count,
                                    blocks_hashed_total=blocks_allocated,
                                    blocks_hashed_active=blocks_hashed_active,
                                    blocks_ref_count=blocks_ref_count,
                                    pre_fwd_active_token_count=pre_fwd_active_token_count,
                                    pre_fwd_step_count=pre_fwd_step_count,
                                )
                            else:
                                event = request.add_event_generated_token(
                                    token,
                                    blocks_total=block_allocator.total_count,
                                    blocks_hashed_total=blocks_allocated,
                                    blocks_hashed_active=blocks_hashed_active,
                                    pre_fwd_active_token_count=pre_fwd_active_token_count,
                                    pre_fwd_step_count=pre_fwd_step_count,
                                )
                            if first_token_event is None:
                                first_token_event = event
                    if is_first_token:
                        if not self.track_generated_token_events:
                            first_token_event = DynamicInferenceEvent(
                                type=DynamicInferenceEventType.GENERATED_TOKEN,
                                payload={"token_id": tokens[0]},
                            )
                        request.ttft = (
                            first_token_event.timestamp - request.event_add_engine.timestamp
                        )
                    if request.tpot is None:
                        request.tpot = []
                    per_token_step_time = step_time / len(tokens)
                    request.tpot.extend([per_token_step_time] * len(tokens))

                # Check for stop words (after token is appended).
                # With speculative decoding, a stop word may end before the last
                # appended token. The check truncates generated_tokens in-place and
                # returns how many trailing tokens were removed so we can also trim
                # the corresponding log probs below.
                stop_word_hit, num_stop_word_trim = self._check_stop_words_for_request_post_append(
                    request
                )

                # Track acceptance statistics for logging.
                if len(request.generated_tokens) > 0 and self.num_speculative_tokens > 0:
                    actual_proposed = max(0, self.num_speculative_tokens - num_stop_word_trim)
                    actual_accepted = max(0, len(accepted_tokens) - num_stop_word_trim)

                    self._spec_tokens_proposed += actual_proposed
                    self._spec_tokens_accepted += actual_accepted

                if request_id in finished_request_ids:
                    # Request finished by normal means (termination_id, max_length, or stop word from previous step)
                    request.generated_length = len(request.generated_tokens)
                    request.status = Status.COMPLETED
                    request.add_event_finish()
                    finished_entry = self.requests.pop(request_id)
                    finished_request = finished_entry.record[-1]
                    finished_request.generated_length = len(finished_request.generated_tokens)
                    finished_request_records.append(finished_entry.record)
                    finished_entry.future.set_result(finished_entry.record)
                elif stop_word_hit:
                    # Stop word detected - mark for removal in next step's bookkeeping
                    # Don't pop yet; let the next step handle it properly via callback
                    self.stop_word_finished_request_ids.add(request_id)
                    active_request_ids.append(request_id)
                else:
                    active_request_ids.append(request_id)
            else:
                # The chunked prefill produces useless tokens
                # so we are not appending them to the generated tokens.
                # Additionally, chunked prefill request do not finish.
                active_request_ids.append(request_id)

            # When a stop word was found mid-speculative-batch, trim log probs
            # and top_n_logprobs to match the truncated generated_tokens.
            if num_stop_word_trim > 0:
                if request_log_probs is not None:
                    request_log_probs = request_log_probs[:-num_stop_word_trim]
                if top_n_logprobs is not None and req_idx in top_n_logprobs:
                    top_n_logprobs[req_idx] = top_n_logprobs[req_idx][:-num_stop_word_trim]

            # Process log_probs if available (unified for both regular and chunked prefill)
            if request_log_probs is not None:
                # Initialize lists if they don't exist
                if not request.prompt_log_probs:
                    request.prompt_log_probs = []
                if not request.generated_log_probs:
                    request.generated_log_probs = []

                is_chunked_prefill = request_id == self.context.chunked_prefill_request_id
                is_prefill = len(request.generated_log_probs) == 0

                if request.sampling_params.skip_prompt_log_probs:
                    # We only want decode log probs.
                    if is_chunked_prefill:
                        pass
                    elif is_prefill:
                        request.generated_log_probs.append(request_log_probs[-1])
                    else:
                        request.generated_log_probs.extend(request_log_probs)
                else:
                    # Split log probs between prompt and generated based on remaining prompt slots.
                    prompt_length = len(request.prompt_tokens)
                    total_accumulated = len(request.prompt_log_probs) + len(
                        request.generated_log_probs
                    )
                    remaining_prompt_slots = max(0, prompt_length - 1 - total_accumulated)
                    split_idx = min(remaining_prompt_slots, len(request_log_probs))

                    if split_idx > 0:
                        request.prompt_log_probs.extend(request_log_probs[:split_idx])
                    if split_idx < len(request_log_probs):
                        request.generated_log_probs.extend(request_log_probs[split_idx:])

            # Process top_n_logprobs if available (unified for both regular and chunked prefill)
            if top_n_logprobs is not None and req_idx in top_n_logprobs:
                # Initialize lists if they don't exist
                if request.prompt_top_n_logprobs is None:
                    request.prompt_top_n_logprobs = []
                if request.generated_top_n_logprobs is None:
                    request.generated_top_n_logprobs = []

                top_n_data_list = top_n_logprobs[req_idx]
                prompt_length = len(request.prompt_tokens)

                # Process each token's top-n logprobs
                for top_n_values, top_n_indices in top_n_data_list:
                    logit_dict = {}
                    for logprob, logprob_index in zip(
                        top_n_values.cpu().tolist(), top_n_indices.cpu().tolist()
                    ):
                        key = self.controller.tokenizer.detokenize([logprob_index])
                        logit_dict[key] = logprob

                    # Simple decision: check total count accumulated so far
                    total_accumulated = len(request.prompt_top_n_logprobs) + len(
                        request.generated_top_n_logprobs
                    )

                    # If skip_prompt_log_probs is False and we haven't reached prompt end,
                    # append to prompt_top_n_logprobs. Otherwise append to generated_top_n_logprobs.
                    if (
                        not request.sampling_params.skip_prompt_log_probs
                        and total_accumulated < prompt_length - 1
                    ):
                        request.prompt_top_n_logprobs.append(logit_dict)
                    else:
                        request.generated_top_n_logprobs.append(logit_dict)

            # Process routing indices if available (keyed by request_id)
            # Each step's routing is a tensor of shape [num_tokens_this_step, num_layers, topk]
            # We concatenate along dim=0 to accumulate: [total_tokens, num_layers, topk]
            if (
                routing_indices_per_request is not None
                and request_id in routing_indices_per_request
            ):
                step_routing = routing_indices_per_request[
                    request_id
                ]  # [num_tokens, num_layers, topk]
                if request.routing_indices is None:
                    request.routing_indices = step_routing.clone()
                else:
                    request.routing_indices = torch.cat(
                        [request.routing_indices, step_routing], dim=0
                    )

        # Handle evicted requests.
        if evict_request_ids is not None and evict_request_ids.numel() > 0:

            evict_request_ids = evict_request_ids.tolist()

            # Insert into waiting_request_ids after any chunk prefill request.
            self.waiting_request_ids.extendleft(evict_request_ids)
            if self.context.chunked_prefill_request_id != -1:
                chunked_prefill_id = self.waiting_request_ids[len(evict_request_ids)]
                del self.waiting_request_ids[len(evict_request_ids)]
                self.waiting_request_ids.appendleft(chunked_prefill_id)

            # Checkpoint requests (i.e., prompt += generations) + add eviction event.
            for request_id in evict_request_ids:
                self.requests[request_id].record.checkpoint()
                self.get_request(request_id).add_event_evict()

        # Clear the stop word being finished set after processing
        self.stop_word_being_finished_ids.clear()

        return active_request_ids, finished_request_records

    def _get_and_clear_stop_word_finished_ids(self, active_request_ids: list[int]) -> set[int]:
        """Get and clear the set of request IDs that should be finished due to stop words.

        This callback is called from the controller during bookkeeping to get request IDs
        that were detected as hitting stop words in the previous step's post_process_requests.

        Args:
            active_request_ids: List of currently active request IDs.

        Returns:
            Set of request IDs from active_request_ids that should be marked as finished.
        """
        if not self.stop_word_finished_request_ids:
            return set()

        # Find which stop word finished IDs are in the current active requests
        result = self.stop_word_finished_request_ids & set(active_request_ids)
        # Move to "being finished" set so post_process_requests can skip the extra token
        self.stop_word_being_finished_ids = result
        # Clear the IDs that we're returning (they'll be marked as finished)
        self.stop_word_finished_request_ids -= result
        return result

    def _check_stop_words_for_request_post_append(
        self, request: DynamicInferenceRequest
    ) -> Tuple[bool, int]:
        """Check if a request should stop due to stop words (after token is appended).

        This method is called from post_process_requests after the token has already
        been appended to request.generated_tokens. In the speculative decoding case,
        multiple tokens may have been appended at once. If a stop word is found in the
        middle of the speculative tokens, the trailing tokens after the stop word are
        truncated from generated_tokens.

        With speculative decoding, multiple tokens are appended at once. The stop word
        may end before the last appended token, leaving extra tokens that must be
        trimmed. When this happens, generated_tokens is truncated in-place and the
        number of trimmed tokens is returned so the caller can also trim log probs.

        Args:
            request: The request to check.

        Returns:
            Tuple of (stop_word_hit, num_tokens_trimmed):
                stop_word_hit: True if the generated sequence contains a stop word.
                num_tokens_trimmed: Number of tokens removed from the end of
                    generated_tokens (0 when the stop word is at the very end
                    or when no stop word was found).
        """
        if request.stop_word_ids is None or len(request.stop_word_ids) == 0:
            return False, 0

        generated_tokens = request.generated_tokens

        for stop_word_ids in request.stop_word_ids:
            stop_len = len(stop_word_ids)
            if len(generated_tokens) >= stop_len:
                # Check the last stop_len tokens shifting by 1 up to num_speculative_tokens.
                # Speculative decoding can append multiple tokens at once, so the stop
                # word might end at any position within the newly appended tokens.
                for i in range(self.num_speculative_tokens + 1):
                    end_idx = -i if i > 0 else None
                    if list(generated_tokens[-stop_len - i : end_idx]) == stop_word_ids:
                        trim = (
                            i if request.sampling_params.detokenize_stop_sequence else i + stop_len
                        )
                        if trim > 0:
                            request.generated_tokens = request.generated_tokens[:-trim]
                        return True, trim
        return False, 0

    def get_prefix_coordination_metrics(self) -> dict:
        """Return prefix caching coordination metrics.

        Returns:
            Dict with coordination stats including the number of scheduling waits.
        """
        return {"waits": self._prefix_coordination_waits}

    def _find_mamba_match_count(self, req: DynamicInferenceRequest) -> int:
        """Find farthest block with cached Mamba state by iterating from the end.

        Not all blocks have Mamba state cached in mamba_hash_to_block_id,
        only divergence and last-aligned blocks do. Iterating from the end
        finds the farthest block with cached state, which is the only one
        needed for restore since Mamba state is cumulative.
        """
        if not req.precomputed_block_hashes:
            return 0
        mamba_map = self.context.mamba_slot_allocator.hash_to_block_id
        for i in range(len(req.precomputed_block_hashes) - 1, -1, -1):
            if req.precomputed_block_hashes[i] in mamba_map:
                return i + 1
        return 0

    def schedule_waiting_requests(self):
        """Tries to schedule any requests in the waiting pool."""
        # Keep track of which requests get scheduled.
        waiting_before = set(self.waiting_request_ids)
        if self.enable_chunked_prefill:
            self.schedule_chunked_prefill()
        else:
            self.schedule_non_chunked_prefill()
        waiting_after = set(self.waiting_request_ids)

        # Re-stamp kv_cache_epoch on requests that were just scheduled.
        if self._generation_epoch is not None:
            for request_id in waiting_before - waiting_after:
                req = self.get_request(request_id)
                if req.kv_cache_epoch is None:
                    req.kv_cache_epoch = [(0, self._generation_epoch)]

    def schedule_non_chunked_prefill(self):
        """
        Perform the same original scheduling logic for non-chunked runs
        """
        prefix_caching_enabled = self.context.enable_prefix_caching
        mamba_caching_enabled = (
            prefix_caching_enabled
            and self.context.is_hybrid_model
            and self.context.mamba_slot_allocator is not None
        )
        if prefix_caching_enabled:
            pending_block_hashes = set()
            pending_request_ids = []
        while self.waiting_request_ids:
            req = self.get_request(self.waiting_request_ids[0])

            # Check for conflicting block hashes.
            if prefix_caching_enabled:
                has_pending_hash = False
                for block_hash in req.precomputed_block_hashes:
                    if block_hash in pending_block_hashes:
                        has_pending_hash = True
                        break
                if has_pending_hash:
                    self._prefix_coordination_waits += 1
                    pending_request_ids.append(self.waiting_request_ids.popleft())
                    continue

            # Find Mamba prefix match before check_availability (sets skip count)
            if mamba_caching_enabled:
                req._mamba_num_matched_blocks = self._find_mamba_match_count(req)

            request_can_be_added, request_tokens_can_be_added, kv_cache_available = (
                self.context.check_availability(req)
            )
            if request_can_be_added and request_tokens_can_be_added and kv_cache_available:
                # Add these hashes to pending.
                if prefix_caching_enabled:
                    for block_hash in req.precomputed_block_hashes:
                        if block_hash not in self.context.kv_block_allocator.kv_hash_to_block_id:
                            pending_block_hashes.add(block_hash)
                self.context.add_request(req)
                self._loop.call_soon_threadsafe(
                    self._loop.create_task, self._notify_cond_for_new_request()
                )
                req.remaining_prompt_tokens = req.remaining_prompt_tokens.new_empty(0)
                req.add_event_add_context()
                self.waiting_request_ids.popleft()
            else:
                break

        # Prepend pending request ids to waiting queue.
        if prefix_caching_enabled and pending_request_ids:
            self.waiting_request_ids.extendleft(reversed(pending_request_ids))

    def schedule_chunked_prefill(self):
        """
        This function schedules chunked prefill requests.
        Invariant:
            - There are at most one chunked prefill request in the waiting pool,
                which should be the head
            - There are at most one chunked prefill request in the context,
                which should be the last active request
            - context.chunked_prefill_request_id == -1 if no chunked prefill request is scheduled,
                otherwise it is the request id of the chunked prefill request
            - For each request, finished_chunk_token_count is the number of tokens
                that have been prefilled for this request, non-zero means
                it is during a chunked prefill
            - For each request, remaining_prompt_tokens holds the **unprefilled** prompt tokens
        """
        prefix_caching_enabled = self.context.enable_prefix_caching
        mamba_caching_enabled = (
            prefix_caching_enabled
            and self.context.is_hybrid_model
            and self.context.mamba_slot_allocator is not None
        )
        if prefix_caching_enabled:
            pending_block_hashes = set()
            pending_request_ids = []
        can_schedule = True
        while self.waiting_request_ids and can_schedule:
            can_schedule = False
            req = self.get_request(self.waiting_request_ids[0])

            # is_continuing_chunked_prefill is True if we are scheduling next
            # chunk of a existing chunked prefill request
            is_continuing_chunked_prefill = self.context.chunked_prefill_request_id >= 0

            # Check for conflicting block hashes.
            if prefix_caching_enabled and not is_continuing_chunked_prefill:
                has_pending_hash = False
                for block_hash in req.precomputed_block_hashes:
                    # pylint: disable-next=possibly-used-before-assignment
                    if block_hash in pending_block_hashes:
                        has_pending_hash = True
                        break
                if has_pending_hash:
                    self._prefix_coordination_waits += 1
                    pending_request_ids.append(  # pylint: disable=possibly-used-before-assignment
                        self.waiting_request_ids.popleft()
                    )
                    continue

            # Find Mamba prefix match for non-continuing requests
            if mamba_caching_enabled and not is_continuing_chunked_prefill:
                req._mamba_num_matched_blocks = self._find_mamba_match_count(req)

            # Use remaining prompt tokens for scheduling decisions
            remaining_len = len(req.remaining_prompt_tokens)
            token_fully_can_be_added = (
                self.context.active_token_count + remaining_len <= self.context.max_tokens
            )
            token_partially_can_be_added = self.context.active_token_count < self.context.max_tokens
            request_can_be_added, _, kv_cache_available = self.context.check_availability(req)
            request_can_be_added = is_continuing_chunked_prefill or request_can_be_added

            if request_can_be_added and kv_cache_available:
                if token_fully_can_be_added:
                    # Add these hashes to pending.
                    if prefix_caching_enabled:
                        for block_hash in req.precomputed_block_hashes:
                            if (
                                block_hash
                                not in self.context.kv_block_allocator.kv_hash_to_block_id
                            ):
                                pending_block_hashes.add(block_hash)
                    self.context.chunked_prefill_request_id = -1
                    self.context.add_request(req)
                    self._loop.call_soon_threadsafe(
                        self._loop.create_task, self._notify_cond_for_new_request()
                    )
                    req.remaining_prompt_tokens = req.remaining_prompt_tokens.new_empty(0)
                    req.add_event_add_context()
                    # Fully scheduled, so we remove from waiting pool
                    self.waiting_request_ids.popleft()
                    # Only this case we keep checking the rest of the waiting queue
                    can_schedule = True
                elif token_partially_can_be_added:
                    # Add these hashes to pending.
                    if prefix_caching_enabled:
                        for block_hash in req.precomputed_block_hashes:
                            if (
                                block_hash
                                not in self.context.kv_block_allocator.kv_hash_to_block_id
                            ):
                                pending_block_hashes.add(block_hash)
                    prefill_chunk_length = self.context.max_tokens - self.context.active_token_count

                    # If this chunk would leave exactly 1 token for the final chunk, reduce
                    # this chunk by 1 or skip scheduling so the final chunk has 2 tokens.
                    # This avoids the edge case where max_seqlen_q=1 which results in a bug
                    # with the Flash Attention kernel.
                    # See https://github.com/Dao-AILab/flash-attention/issues/1537
                    if remaining_len - prefill_chunk_length == 1:
                        if prefill_chunk_length > 1:
                            prefill_chunk_length -= 1
                        else:
                            # We only have space for 1 token, but remaining is 2.
                            # Delay scheduling to avoid leaving exactly 1 token for the final chunk.
                            can_schedule = False
                            break

                    self.context.add_request(req, prefill_chunk_length=prefill_chunk_length)
                    self._loop.call_soon_threadsafe(
                        self._loop.create_task, self._notify_cond_for_new_request()
                    )
                    self.context.chunked_prefill_request_id = req.request_id
                    req.remaining_prompt_tokens = req.remaining_prompt_tokens[prefill_chunk_length:]
                    req.finished_chunk_token_count += prefill_chunk_length
                    # Still have tokens to prefill, so we break and keep the
                    # chunked prefill request at the head of the waiting queue
                    # Note that we do not need to continue check the queue, as the tokens are full

        # Prepend pending request ids to waiting queue.
        if prefix_caching_enabled and pending_request_ids:
            is_continuing_chunked_prefill = self.context.chunked_prefill_request_id >= 0
            if is_continuing_chunked_prefill:
                chunked_request_id = self.waiting_request_ids.popleft()
                self.waiting_request_ids.extendleft(reversed(pending_request_ids))
                self.waiting_request_ids.appendleft(chunked_request_id)
            else:
                self.waiting_request_ids.extendleft(reversed(pending_request_ids))

    async def async_forward(self) -> Tuple[Dict, Dict, float]:
        """Uses `asyncio` for continuous generation.
        Sleeps when no requests are available, until new requests have been added.

        Returns:
            A tuple comprised of:
                step_result (Optional[Dict]): The result of the step.
                context_state (Dict): A tuple consisting of the state of the context.
                is_decode_only, total/paused request count, active token count.
                step_time (float): How long this step took.
        """

        # If suspended, no stepping.
        if self.state in (EngineState.SUSPENDED, EngineState.SUSPENDING):
            raise EngineSuspendedError(self.context.step_count)

        # schedule requests
        self.schedule_waiting_requests()

        # Saving pre-step state, for printing output below.
        is_decode_only = self.context.is_decode_only()
        pre_step_context_state = {
            "is_decode_only": is_decode_only,
            "max_requests": self.context.max_requests,
            "total_request_count": self.context.total_request_count,
            "paused_request_count": self.context.paused_request_count,
            "active_token_count": self.context.active_token_count,
            "step_count": self.context.step_count,
        }

        # Generate tokens.
        range_push("Prefill" if not is_decode_only else "Decode")
        # TODO @TDE: Account for this line when overlapping forward and bookkeep.
        self.is_decode_only = is_decode_only

        self.step_start_event.record()
        result = await self.controller.async_generate_output_tokens_dynamic_batch()
        self.step_end_event.record()
        self.step_end_event.synchronize()
        step_time = self.step_start_event.elapsed_time(self.step_end_event) / 1e3
        self.context.step_count += 1
        self.context.prefix_cache_lru_clock += 1

        range_pop()

        if (
            self.logging_step_interval > 0
            and self.context.step_count > 0
            and self.context.step_count % self.logging_step_interval == 0
            and self.metrics_writer is not None
        ):
            kvcache_util_stats = self.context.get_kvcache_utilization_stats()
        else:
            kvcache_util_stats = None

        post_step_context_state = {
            "waiting_request_count": len(self.waiting_request_ids),
            "finished_request_count": self.finished_request_count,
            "evicted_request_count": self.evicted_request_count,
            "kv_stats": kvcache_util_stats,
            "padded_active_token_count": self.context.padded_active_token_count,
            "using_cuda_graph_this_step": self.context.using_cuda_graph_this_step(),
            "total_active_block_count": self.context.kv_block_allocator.active_count,
            "total_paused_block_count": self.context.kv_block_allocator.paused_count,
            "total_active_used_blocks": self.context.kv_block_allocator.get_active_used(),
            "total_paused_used_blocks": self.context.kv_block_allocator.get_paused_used(),
        }

        context_state = {**pre_step_context_state, **post_step_context_state}

        return result, context_state, step_time

    async def async_bookkeep(
        self, step_result: Optional[Dict], context_state: Dict, step_time: float
    ):
        """Uses `asyncio` for continuous bookkeeping.

        Args:
            step_result (Optional[Dict]): The result of the step.
            context_state (Dict): is_decode_only, total/paused request count, active token count.
            step_time (float): How long this step took.

        Returns:
            A dictionary containing:
                active_requests (List): Requests that ran in the last step and are still active.
                finished_requests (List): Requests that ran in the last step and have now finished.
                step_time (float): The step time in seconds.
                cuda_graph_request_count (int): The CUDA graph batch size matching this step.
        """
        # Increment finished_request_count.
        range_push("bookkeeping")
        cuda_graph_request_count = None

        if step_result is not None:
            active_request_ids = step_result["active_request_ids"]
            finished_request_ids = step_result["finished_request_ids"]
            newly_paused_request_ids = step_result.get("newly_paused_request_ids")
            evict_request_ids = step_result.get("evict_request_ids")
            sample = step_result["sample"]
            accepted_tokens = step_result["accepted_tokens"]
            log_probs = step_result["log_probs"]
            top_n_logprobs = step_result.get("top_n_logprobs", None)
            routing_indices_per_request = step_result.get("routing_indices_per_request", None)
            cuda_graph_request_count = step_result["cuda_graph_request_count"]

            # Add paused events.
            if newly_paused_request_ids is not None and self.track_paused_request_events:
                newly_paused_request_ids = newly_paused_request_ids.tolist()
                [self.get_request(i).add_event_pause() for i in newly_paused_request_ids]

            # Process finished requests (adds FINISH events and returns records).
            (active_request_ids, finished_request_records) = self.post_process_requests(
                active_request_ids,
                finished_request_ids,
                evict_request_ids,
                step_time,
                sample,
                accepted_tokens,
                log_probs,
                top_n_logprobs,
                routing_indices_per_request,
                pre_fwd_active_token_count=context_state.get("active_token_count"),
                pre_fwd_step_count=context_state.get("step_count"),
            )

        else:
            active_request_ids: list[int] = []
            finished_request_records: list[DynamicInferenceRequestRecord] = []

        # Failed requests. Status and events were already set in _handle_failed_request;
        # here we just clean up the entry and include it in finished_request_records.
        for failed_request_id in self.failed_request_ids:
            failed_entry = self.requests.pop(failed_request_id)
            finished_request_records.append(failed_entry.record)
            assert (
                failed_entry.future.done()
            ), f"Failed request {failed_request_id} future has not been properly resolved."
        self.failed_request_ids.clear()

        range_pop()

        # Detokenize all finished requests if not using
        # the coordinator. Otherwise, the coordinator will
        # overlap detokenization with the engine.
        if not self.use_coordinator:
            range_push("detokenization")
            for record in finished_request_records:
                for request in record.requests:
                    if request.prompt is None:
                        request.prompt = self.controller.detokenize(
                            self.controller.tokenizer,
                            request.prompt_tokens.tolist(),
                            remove_EOD=False,
                        )
                    request.generated_text = self.controller.detokenize(
                        self.controller.tokenizer,
                        request.generated_tokens,
                        remove_EOD=not request.sampling_params.detokenize_stop_sequence,
                    )
            range_pop()

        # Handle necessary ZMQ DP coordinator communication.
        # Failed request replies were already sent in _handle_failed_request,
        # so only send completed records here.
        if self.use_coordinator and self.is_mp_coordinator:
            records_to_send = [
                r for r in finished_request_records if r.requests[-1].status != Status.FAILED
            ]
            if records_to_send:
                range_push("coordinator_communication")
                payload = msgpack.packb(
                    [Headers.ENGINE_REPLY.value, [r.merge().serialize() for r in records_to_send]],
                    use_bin_type=True,
                )
                self.socket_for_receiving_requests.send(payload)
                range_pop()

        # Drain prefix cache hit counters from context into engine accumulators.
        if self.context.enable_prefix_caching:
            self._prefix_cache_hits += self.context.prefix_cache_hits
            self._prefix_cache_blocks_matched += self.context.prefix_cache_blocks_matched
            self.context.prefix_cache_hits = 0
            self.context.prefix_cache_blocks_matched = 0

        # Log KV cache utilization stats to W&B
        if context_state["kv_stats"] is not None:
            # Prepare metrics dictionary with all stats
            # Use 'inference/' prefix for all metrics to separate from training metrics
            metrics = {
                'inference/inference_step': int(
                    self.inference_step_offset + int(self.context.step_count)
                ),
                'inference/step_time_s': float(step_time),
                'inference/waiting_queue_len': int(len(self.waiting_request_ids)),
                'inference/total_requests_dict_size': int(len(self.requests)),
            }
            # Add KV stats with inference/ prefix
            # Convert utilization metrics from 0-1 range to 0-100 percentage range for better visualization
            for key, value in context_state["kv_stats"].items():
                if 'utilization' in key:
                    # Convert to percentage (0-100) and group under kvcache_utilization
                    metrics[f'inference/{key}'] = float(value * 100.0)
                else:
                    metrics[f'inference/{key}'] = value

            # Add speculative decoding acceptance metrics.
            if self.num_speculative_tokens > 0 and self._spec_tokens_proposed > 0:
                acceptance_rate = self._spec_tokens_accepted / self._spec_tokens_proposed
                metrics['inference/spec_decode_acceptance_rate'] = float(acceptance_rate * 100.0)
                metrics['inference/spec_decode_tokens_proposed'] = int(self._spec_tokens_proposed)
                metrics['inference/spec_decode_tokens_accepted'] = int(self._spec_tokens_accepted)
                metrics['inference/spec_decode_num_steps'] = int(self._spec_steps)

            # Add prefix caching metrics.
            if self.context.enable_prefix_caching and self._prefix_cache_hits > 0:
                metrics['inference/prefix_cache_hits'] = int(self._prefix_cache_hits)
                metrics['inference/prefix_cache_blocks_matched'] = int(
                    self._prefix_cache_blocks_matched
                )

            if HAVE_WANDB and self.metrics_writer.__name__ == "wandb":
                self.metrics_writer.log(metrics, commit=True)
            else:
                raise ValueError(f"Unsupported metrics writer type: {type(self.metrics_writer)}")

        # Print context state.
        if (
            self.logging_step_interval > 0
            and self.context.step_count % self.logging_step_interval == 0
        ):
            mem = torch.cuda.memory_stats()
            step_type = "decode" if context_state["is_decode_only"] else "non-decode"
            output_str = (
                "* rank %d | step %d | %s ... time: %.3f ms%s ... "
                "reqs: a %d/%d, p %d, w %d, f %d, e %d ... "
                "blocks: a %d/%d, p %d/%d ... "
                "mem: tensors %d, alloc %.1f gb, res %.1f gb."
                % (
                    self.rank,
                    self.context.step_count,
                    datetime.now().strftime("%H:%M:%S"),
                    step_time * 1000,
                    (
                        " [%s + real config %s + cuda graph %s]"
                        % (
                            step_type,
                            self.context.batch_dimensions,
                            (
                                "OFF"
                                if not self.context.using_cuda_graph_this_step()
                                else self.context.padded_batch_dimensions
                            ),
                        )
                    ),
                    context_state["total_request_count"] - context_state["paused_request_count"],
                    context_state["max_requests"],
                    context_state["paused_request_count"],
                    context_state["waiting_request_count"],
                    context_state["finished_request_count"],
                    context_state["evicted_request_count"],
                    context_state["total_active_used_blocks"],
                    context_state["total_active_block_count"],
                    context_state["total_paused_used_blocks"],
                    context_state["total_paused_block_count"],
                    mem["allocation.all.current"],
                    mem["allocated_bytes.all.current"] / (1024**3),
                    mem["reserved_bytes.all.current"] / (1024**3),
                )
            )
            if self.num_speculative_tokens > 0 and self._spec_tokens_proposed > 0:
                spec_rate = self._spec_tokens_accepted / self._spec_tokens_proposed * 100.0
                output_str += " ... spec: accept %.1f%% (%d/%d in %d steps)" % (
                    spec_rate,
                    self._spec_tokens_accepted,
                    self._spec_tokens_proposed,
                    self._spec_steps,
                )
            if self.context.enable_prefix_caching and self._prefix_cache_hits > 0:
                output_str += " ... prefix cache: %d hits, %d blocks matched" % (
                    self._prefix_cache_hits,
                    self._prefix_cache_blocks_matched,
                )
            if context_state["is_decode_only"]:
                output_str = f"\033[94m{output_str}\033[0m"
            logging.info(output_str)

            # Reset speculative decoding accumulators after both wandb and console logging.
            if self.num_speculative_tokens > 0:
                self._spec_tokens_proposed = 0
                self._spec_tokens_accepted = 0
                self._spec_steps = 0

            # Reset prefix caching accumulators after both wandb and console logging.
            if self.context.enable_prefix_caching:
                self._prefix_cache_hits = 0
                self._prefix_cache_blocks_matched = 0

        return {
            "active_request_ids": active_request_ids,
            "finished_request_records": finished_request_records,
            "step_time": step_time,
            "cuda_graph_request_count": cuda_graph_request_count,
        }

    async def async_step(
        self,
    ) -> Tuple[List[DynamicInferenceRequest], List[DynamicInferenceRequest], float]:
        """
        Wrapper for controller.generate_output_tokens_dynamic_batch(), to
        match vLLM API. Uses `asyncio` for continuous generation which allows this
        method to sleep and wake up when new requests are available.

        Returns:
            A tuple comprised of:
                1. Requests that ran in the last step and are still active.
                2. Requests that ran in the last step and have now finished.
                3. The step time in seconds.
        """
        last_step_data = await self.async_forward()
        ret = await self.async_bookkeep(*last_step_data)
        # Keep for compatibility with current test suite.
        return ret

    def _run_coroutine_sync(self, coro):
        """Run a coroutine synchronously, handling the case when already in an event loop.

        This method safely runs an async coroutine from synchronous code, even when
        called from within an already running event loop (e.g., when used with async
        frameworks like pytriton).
        """
        try:
            # Check if there's already a running event loop
            asyncio.get_running_loop()
            # We're inside a running loop - run in a separate thread
            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
                future = executor.submit(asyncio.run, coro)
                return future.result()
        except RuntimeError:
            # No running loop - safe to use run_until_complete
            return self._loop.run_until_complete(coro)

    def step_modern(
        self,
    ) -> Tuple[List[DynamicInferenceRequest], List[DynamicInferenceRequest], float]:
        """Synchronous wrapper for `self.async_step`."""
        return self._run_coroutine_sync(self.async_step())

    def step_legacy(
        self, sampling_params: SamplingParams
    ) -> Tuple[List[DynamicInferenceRequest], List[DynamicInferenceRequest], float]:
        """Synchronous wrapper for `self.async_step`."""
        warnings.warn(
            "`step_legacy()` is deprecated and will be removed in `megatron-core` "
            "0.16. Please use `step_modern()` going forward, which will eventually "
            "be renamed to `step()`."
        )
        result = self._run_coroutine_sync(self.async_step())
        active_requests = [self.get_request(i) for i in result["active_request_ids"]]
        finished_requests = [r.merge() for r in result["finished_request_records"]]
        return active_requests, finished_requests, result["step_time"]

    # For backwards compatibility, point `step()` to `step_legacy()`. Starting in
    # `megatron-core` 0.16, `step_modern()` will be renamed to `step()`.
    step = step_legacy

    def generate(
        self, prompts: List[str], sampling_params: Optional[SamplingParams] = SamplingParams()
    ) -> List[DynamicInferenceRequest]:
        """Generates completions for a static list of prompts."""

        for prompt in prompts:
            request_id = int(next(self.request_counter))
            _ = self.add_request(request_id, prompt, sampling_params)

        finished_request_records_list = []
        while self.has_unfinished_requests():
            result = self.step_modern()
            finished_request_records_list.extend(result["finished_request_records"])

        # Ensure requests are returned in the same order they were passed in.
        finished_request_records_list.sort(key=lambda r: r.request_id)

        return finished_request_records_list

    def schedule_requests(self) -> int:
        """Drains the ZMQ socket for a batch of requests and adds them to the engine.

        This method is a collective and synchronous operation that must be called
        by all ranks in a Model Parallel (MP) group at the same time. It ensures
        that all ranks process the exact same batch of incoming requests and
        control signals.

        The synchronization works as follows:
        1.  The MP rank 0 drains all pending messages from its subscriber socket
            in a non-blocking manner.
        2.  MP rank 0 then broadcasts the number of messages it received to all other
            ranks in its MP group using a dedicated publisher socket.
        3.  The other MP ranks wait to receive this count, and then receive exactly
            that many messages from their subscriber sockets.

        Once all ranks have the same batch of messages, they are unpacked and
        processed. New requests are added to the engine's queue, and control
        signals (PAUSE, UNPAUSE, SUSPEND, RESUME, STOP) update the engine's
        internal state.

        Note:
            This function is synchronous and must be called collectively by all
            ranks in a MP group. It should not be launched in a separate coroutine
            to ensure all ranks execute it in lockstep before proceeding to the
            next engine step.

        Returns:
            int: The number of messages that were received and processed in this batch.
        """

        range_push("drain_zmq_socket")
        all_messages = []
        if self.is_mp_coordinator:
            while True:
                try:
                    # Receive messages in a non-blocking way.
                    all_messages.append(self.socket_for_receiving_requests.recv(flags=zmq.NOBLOCK))
                except zmq.Again:
                    # This exception is hit as soon as the socket is empty.
                    break
            messages_to_dequeue = len(all_messages)
            # First publish the number of messages to dequeue.
            # This is important because we want all tensor parallel ranks
            # to dequeue the same number of messages.
            self.model_parallel_num_msgs_publisher_socket.send(
                struct.pack('!i', messages_to_dequeue)
            )
            # Now publish the actual messages to all model parallel ranks
            if messages_to_dequeue > 0:
                self.model_parallel_publisher_socket.send_multipart(all_messages)
        else:
            # First, receive the number of messages to dequeue from mp-rank 0
            messages_to_dequeue = struct.unpack(
                '!i', self.model_parallel_num_msgs_subscriber_socket.recv()
            )[0]
            # Now, dequeue the same number of messages from the subscriber socket.
            # Note that these receives are blocking, because the messages
            # are guaranteed to be available after the tp-rank 0 has sent them.
            if messages_to_dequeue > 0:
                all_messages = self.model_parallel_subscriber_socket.recv_multipart()
            else:
                all_messages = []

        range_pop()

        # First pass: add requests.
        # Control signals are queued for the second pass.
        new_generation_epoch = None
        for message in all_messages:
            data = msgpack.unpackb(message, raw=False)
            header = Headers(data[0])
            if header == Headers.SUBMIT_REQUEST:
                request_id, prompt, sampling_params = data[1:]
                sampling_params = SamplingParams.deserialize(sampling_params)
                range_push("add_request")
                self.add_request(request_id, prompt, sampling_params)
                range_pop()
            elif header == Headers.SET_GENERATION_EPOCH:
                new_generation_epoch = data[1]
            else:
                # Control signal: queue for second pass.
                self._pending_signals.append(message)

        if new_generation_epoch is not None:
            self._generation_epoch = new_generation_epoch
            # Stamp all active requests with the new epoch.
            # Each field stores a sparse list of (start_token_index, epoch) boundaries.
            for entry in self.requests.values():
                request = entry.record[-1]
                total = len(request.prompt_tokens) + len(request.generated_tokens)
                if total > 0:
                    boundary = (total - 1, new_generation_epoch)
                    if request.policy_epoch is None:
                        request.policy_epoch = [(0, new_generation_epoch)]
                    else:
                        request.policy_epoch.append(boundary)
                    if request.kv_cache_epoch is None:
                        request.kv_cache_epoch = [(0, new_generation_epoch)]
                    else:
                        request.kv_cache_epoch.append(boundary)

        # Second pass: apply at most one control signal (the engine loop
        # processes one state transition per iteration).
        if self._pending_signals:
            message = self._pending_signals.popleft()
            data = msgpack.unpackb(message, raw=False)
            header = Headers(data[0])

            if header == Headers.PAUSE:
                if self.state == EngineState.RUNNING:
                    self.state = EngineState.PAUSING
                    self._state_events[EngineState.RUNNING].clear()
                # Any other state can safely ignore PAUSE.

            elif header == Headers.UNPAUSE:
                assert self.state == EngineState.PAUSED, f"Received UNPAUSE in state {self.state}"
                self.state = EngineState.UNPAUSING

            elif header == Headers.SUSPEND:
                assert self.state == EngineState.PAUSED, f"Received SUSPEND in state {self.state}"
                self._state_events[EngineState.RESUMED].clear()
                self.suspend()
                self.state = EngineState.SUSPENDING

            elif header == Headers.RESUME:
                assert self.state == EngineState.SUSPENDED, f"Received RESUME in state {self.state}"
                self._state_events[EngineState.SUSPENDED].clear()
                self.resume()
                self.state = EngineState.RESUMING

            elif header == Headers.STOP:
                assert self.state in (
                    EngineState.PAUSED,
                    EngineState.SUSPENDED,
                ), f"Received STOP in state {self.state}"
                if self.state == EngineState.SUSPENDED:
                    self._state_events[EngineState.SUSPENDED].clear()
                self.state = EngineState.STOPPING

            else:
                raise UnknownHeaderError(header)

        return len(all_messages)

    async def shutdown(self):
        """Shut down the engine and clean up ZMQ resources.

        Called from the engine loop's finally block after the loop exits.
        """
        self.state = EngineState.STOPPED

        # Cleanup the request futures.
        for entry in self.requests.values():
            if not entry.future.done():
                entry.future.cancel()

        # ZMQ cleanup; designed to be idempotent.
        sock = getattr(self, 'socket_for_receiving_requests', None)
        if sock is not None and not sock.closed:
            try:
                sock.send(msgpack.packb([Headers.DISCONNECT.value], use_bin_type=True))
            except Exception:
                pass
        for socket in getattr(self, 'zmq_sockets', []):
            socket.close(linger=0)
        if hasattr(self, 'zmq_sockets'):
            self.zmq_sockets.clear()
        if hasattr(self, "expert_parallel_zmq_communicator"):
            self.expert_parallel_zmq_communicator.close()
        if hasattr(self, "world_zmq_communicator"):
            self.world_zmq_communicator.close()
        if not self.zmq_context.closed:
            self.zmq_context.term()

        # Set the stopped state at the very end.
        self._state_events[EngineState.STOPPED].set()

    @trace_async_exceptions
    async def run_engine(self, *, loop: Optional[asyncio.AbstractEventLoop] = None):
        """Continually steps the engine asynchronously."""
        self._loop = get_asyncio_loop(loop)
        self.use_coordinator = False
        try:
            while True:
                # Wait until there are active requests before proceeding.
                async with self._cond:
                    await self._cond.wait_for(
                        lambda: (
                            self.state not in (EngineState.SUSPENDED, EngineState.SUSPENDING)
                            and (
                                self.context.get_active_request_count() > 0
                                or self.waiting_request_ids
                            )
                        )
                    )
                await self.async_step()
        except asyncio.CancelledError:
            pass

    async def _ep_establish_consensus(
        self, local_work: int, signal_consensus: bool
    ) -> tuple[int, bool]:
        """EP all-reduce to share work counts and pause consensus.

        All-reduces two integers at once:
        - local_work: actual pending request count (always >= 0).
        - consensus flag: -1 if this rank wants to pause, 0 otherwise.

        Using max for both:
        - max(work) > 0 means at least one EP peer has real work.
        - max(consensus) == -1 means ALL peers signaled -1 (all PAUSING).
          Any RUNNING peer contributes 0, pulling the max to 0.

        Args:
            local_work: Pending request count for this rank.
            signal_consensus: True if this rank is ready to pause.
        Returns:
            (global_work, all_pausing): max work across EP, and whether
            all peers signaled consensus.
        """
        range_push("_ep_establish_consensus")

        consensus_val = -1 if signal_consensus else 0

        # Signals can be received asynchronously on EP ranks.
        # We do not want a rank to pause prematurely if its peers have yet to receive the signal.
        # So this is an *attempt* to process the signal. This rank has received the signal
        # and passes -1 to the all-reduce. If any other rank in the EP group has not received
        # the signal yet, it will pass a zero value to the all-reduce, hence the global consensus
        # will be zero and we will defer processing the signal.
        # When all ranks receive the signal, global consensus will be -1 and we can process.

        if self.ep_world_size > 1:
            # Note that it is important to use a non-blocking asyncio-friendly all-reduce here.
            # The user may have other tasks running in the event loop that need to be serviced.
            # Do not using a torch.distributed blocking all-reduce here using nccl/gloo.
            # We have tried that and it blocks the event loop in megatron-rl.
            global_work, global_consensus = (
                await self.expert_parallel_zmq_communicator.all_reduce_max(
                    local_work, consensus_val, async_op=(not self.use_synchronous_zmq_collectives)
                )
            )
        else:
            global_work, global_consensus = local_work, consensus_val

        range_pop()
        return global_work, global_consensus == -1

    async def _world_barrier(self):
        """World-wide ZMQ all-reduce barrier for global rank consensus.

        Used for all state transitions that require global synchronization:
        PAUSING → PAUSED, UNPAUSING → RUNNING, SUSPENDING → SUSPENDED,
        RESUMING → PAUSED, and STOPPING → STOPPED.

        No-op when world_size == 1 (communicator is not created).
        """
        range_push("world_barrier")
        if hasattr(self, 'world_zmq_communicator'):
            await self.world_zmq_communicator.all_reduce_max(
                1, async_op=(not self.use_synchronous_zmq_collectives)
            )
        range_pop()

    @trace_async_exceptions
    async def run_engine_with_coordinator(
        self, *, loop: Optional[asyncio.AbstractEventLoop] = None
    ):
        """Continually steps the engine asynchronously.

        State-dependent behavior:
        - RUNNING: EP all-reduce to check for work, then step or idle.
        - PAUSING: EP all-reduce to reach consensus, then world barrier.
        - PAUSED / SUSPENDED: Idle-sleep, wait for signals via schedule_requests().
        - UNPAUSING / SUSPENDING / RESUMING / STOPPING: World barrier, then transition.
        - STOPPED: Teardown and exit.
        """
        self._loop = get_asyncio_loop(loop)
        self.use_coordinator = True

        try:
            while True:
                self.schedule_requests()

                if self.state in (EngineState.RUNNING, EngineState.PAUSING):
                    local_pending = self.context.get_active_request_count() + len(
                        self.waiting_request_ids
                    )
                    global_work, all_pausing = await self._ep_establish_consensus(
                        local_pending, signal_consensus=(self.state == EngineState.PAUSING)
                    )

                    if all_pausing:
                        # All EP peers are PAUSING: pause immediately.
                        await self._world_barrier()
                        self.state = EngineState.PAUSED
                        self._state_events[EngineState.PAUSED].set()
                    elif global_work > 0:
                        # At least one EP peer has work: all must participate.
                        if local_pending > 0:
                            await self.async_step()
                        else:
                            # Dummy forward to participate in the EP collective.
                            self.step_start_event.record()
                            self.controller.dummy_forward()
                            self.step_end_event.record()
                            self.step_end_event.synchronize()
                            self.context.step_count += 1
                            self.context.prefix_cache_lru_clock += 1
                    else:
                        # No work, but not all pausing: idle.
                        await asyncio.sleep(0.02)

                elif self.state == EngineState.PAUSED:
                    await asyncio.sleep(0.02)

                elif self.state == EngineState.UNPAUSING:
                    await self._world_barrier()
                    self.state = EngineState.RUNNING
                    self._state_events[EngineState.PAUSED].clear()
                    self._state_events[EngineState.RUNNING].set()

                elif self.state == EngineState.SUSPENDING:
                    await self._world_barrier()
                    self.state = EngineState.SUSPENDED
                    self._state_events[EngineState.SUSPENDED].set()

                elif self.state == EngineState.SUSPENDED:
                    await asyncio.sleep(0.02)

                elif self.state == EngineState.RESUMING:
                    await self._world_barrier()
                    self.state = EngineState.PAUSED
                    self._state_events[EngineState.RESUMED].set()

                elif self.state == EngineState.STOPPING:
                    await self._world_barrier()
                    if self.rank == 0:
                        logging.info("Stopping engine.")
                    break

        finally:
            await self.shutdown()


================================================
FILE: megatron/core/inference/engines/mcore_engine.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from .static_engine import (  # noqa: F401 # pylint: disable=unused-import
    StaticInferenceEngine as MCoreEngine,
)


================================================
FILE: megatron/core/inference/engines/static_engine.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import asyncio
import warnings
from collections import OrderedDict
from typing import AsyncGenerator, Dict, List, Optional, Union

import torch

from megatron.core.inference.async_stream import AsyncStream
from megatron.core.inference.config import InferenceConfig, MambaInferenceStateConfig
from megatron.core.inference.contexts import DynamicInferenceContext, StaticInferenceContext
from megatron.core.inference.engines.abstract_engine import AbstractEngine
from megatron.core.inference.engines.dynamic_engine import DynamicInferenceEngine
from megatron.core.inference.inference_request import InferenceRequest
from megatron.core.inference.sampling_params import SamplingParams
from megatron.core.inference.scheduler import Scheduler
from megatron.core.inference.text_generation_controllers.text_generation_controller import (
    TextGenerationController,
)
from megatron.core.utils import get_asyncio_loop

try:
    from tqdm import tqdm

    HAVE_TQDM = True
except ImportError:
    from unittest.mock import MagicMock

    tqdm = MagicMock()
    HAVE_TQDM = False


# pylint: disable=line-too-long
class StaticInferenceEngine(AbstractEngine):
    """The Megatron core backend constructor

    This is the backend that does a simple forward pass on the model.
    Supports any model that is callable (Accepts the inputs and outputs the tensor)

    Args:
        text_generation_controller (TextGenerationController): A text generation
            controller that will be used to define how to preprocess prompts, generate
            outputs and detokenizer the output tokens.
        max_batch_size (int, optional): The maximum number of requests to process at once.
        random_seed (int, optional): Use a random seed if you want deterministic
            results. Defaults to None.
    """

    def __init__(
        self,
        text_generation_controller: TextGenerationController,
        max_batch_size: Optional[int] = None,
        random_seed: Optional[int] = None,
        legacy=False,
        buffer_size_gb: Optional[float] = 40,
    ):
        self.legacy = legacy
        if legacy:
            warnings.warn(
                "The static engine will be deprecated and removed in the future version of megatron-core. Switch to DynamicInferenceEngine."
            )
        else:
            warnings.warn(
                "`StaticInferenceEngine` will be deprecated in a future version of Megatron-core. "
                "Please directly use `DynamicInferenceEngine` instead. "
                "`StaticInferenceEngine` currently uses `DynamicInferenceEngine` under the hood.",
                DeprecationWarning,
            )

        self.controller = text_generation_controller
        self.inference_wrapped_model = self.controller.inference_wrapped_model
        self.config = self.inference_wrapped_model.config
        self.random_seed = random_seed or 1234

        # Store original context in case we need to fall back to legacy static engine
        original_context = self.inference_wrapped_model.inference_context
        assert original_context is not None
        assert isinstance(original_context, StaticInferenceContext)

        if max_batch_size is None:
            max_batch_size = original_context.max_batch_size
        elif max_batch_size > original_context.max_batch_size:
            warnings.warn(
                f"Engine `max_batch_size` ({max_batch_size}) > "
                f"`context.max_batch_size` in `inference_wrapped_model.inference_context` "
                f"({original_context.max_batch_size}); setting `max_batch_size` to "
                f"{original_context.max_batch_size}",
                UserWarning,
            )
            max_batch_size = original_context.max_batch_size

        self.scheduler = Scheduler(max_batch_size=max_batch_size)

        mamba_inference_state_config = MambaInferenceStateConfig.from_model(
            self.inference_wrapped_model.model
        )

        try:
            if not legacy:
                dynamic_context = DynamicInferenceContext(
                    model_config=self.config,
                    inference_config=InferenceConfig(
                        max_sequence_length=original_context.max_sequence_length,
                        buffer_size_gb=buffer_size_gb,
                        mamba_inference_state_config=mamba_inference_state_config,
                        max_requests=max_batch_size,
                        num_cuda_graphs=1,
                        block_size_tokens=256,
                        unified_memory_level=0,
                    ),
                )

                self.controller.inference_wrapped_model.inference_context = dynamic_context
                self.controller.inference_wrapped_model.prep_model_for_inference()
                self.controller._init_dynamic_sampling_tensors()

                self.dynamic_engine = DynamicInferenceEngine(
                    controller=self.controller, context=dynamic_context
                )
        except Exception as e:
            # Get exception details for better debugging
            exception_msg = str(e) if str(e) else f"{type(e).__name__}: {repr(e)}"
            warnings.warn(
                f"Error initializing dynamic engine: {exception_msg} , using legacy static engine",
                UserWarning,
            )
            # Restore original context when falling back to legacy static engine
            self.controller.inference_wrapped_model.inference_context = original_context
            self.legacy = True

    def get_new_request_id(self) -> str:
        """Gets a new request id from the scheduler"""
        return self.scheduler.get_new_request_id()

    def add_request(
        self,
        prompt: Optional[str] = None,
        add_BOS: bool = False,
        encoder_prompt: Optional[str] = None,
        sampling_params: Optional[SamplingParams] = None,
        streaming: bool = False,
        inference_request: Optional[InferenceRequest] = None,
        *,
        inference_parameters: Optional[SamplingParams] = None,
    ) -> int:
        """
        Adds a request to the scheduler and returns the request ID.

        Args:
            prompt (str): A prompt string
            add_BOS (bool): Whether to add BOS token to beginning of the prompt
            encoder_prompt (str): The encoder prompt string
            sampling_params (SamplingParams): The inference parameters
            streaming (bool): Whether to stream incremental outputs for this request
            inference_request (InferenceRequest, optional): A fully constructed request.
                Defaults to None.
            inference_parameters (SamplingParams, optional): Deprecated and
                renamed to `SamplingParams`.

        Returns:
            The newly created request ID.
        """
        assert (
            prompt is not None or inference_request is not None
        ), f"At least one of `prompt` or `inference_request` must be specified"

        if sampling_params is None and inference_parameters is not None:
            warnings.warn(
                "`inference_parameters` has been renamed to `sampling_params`, "
                "and the previous name will be removed in Mcore v0.14."
            )
            sampling_params = inference_parameters

        if inference_request is None:
            # Support legacy single-arg tokenize_prompt mocks in tests.
            prompt_tokens = self.controller.tokenize_prompt(
                self.controller.tokenizer, prompt, add_BOS
            )
        else:
            prompt_tokens = inference_request.prompt_tokens

        return self.scheduler.add_request(
            prompt=prompt,
            prompt_tokens=prompt_tokens,
            encoder_prompt=encoder_prompt,
            sampling_params=sampling_params,
            streaming=streaming,
            inference_request=inference_request,
        )

    def get_stream_generator(
        self, request_id: int
    ) -> Union[AsyncGenerator[InferenceRequest, None], None]:
        """Returns the stream generator for the given request ID if it exists."""
        stream = self.scheduler.streams.get(request_id, None)
        if stream is not None:
            return stream.generator()
        return None

    @torch.inference_mode()
    def generate_using_dynamic_engine(
        self,
        prompts: Optional[List[str]] = None,
        add_BOS: bool = False,
        encoder_prompts: Optional[List[str]] = None,
        common_inference_params: Optional[SamplingParams] = None,
        sampling_params: Optional[SamplingParams] = None,
        inference_requests: Optional[List[InferenceRequest]] = None,
    ) -> List[InferenceRequest]:
        """Generate using dynamic engine

        Generate using dynamic engine.

        Args:
            prompts (List[str]): All the prompts as a list of strings
            add_BOS (bool): Whether to add BOS token to beginning of prompts
            encoder_prompts (List[dict]): All the encoder prompts as a list of strings
            common_inference_params: Deprecated. Only used for backward compatibility with
            MCore <= 0.9.0. Use `sampling_params` going forward.
            sampling_params (SamplingParams): The request-level sampling parameters
            inference_requests (List[InferenceRequest]): A pre-populated list of inference requests

        Returns:
            List[InferenceRequest]: The output is list of inference requests containing the
            generated tokens, texts and log probs if required
        """
        assert hasattr(self, 'dynamic_engine'), "Dynamic engine not initialized"

        if common_inference_params:
            sampling_params = common_inference_params
        if prompts:
            if add_BOS:
                sampling_params.add_BOS = True
            request_records = self.dynamic_engine.generate(
                prompts=prompts, sampling_params=sampling_params
            )
        elif inference_requests:
            prompts = [request.prompt for request in inference_requests]
            sampling_params = inference_requests[0].sampling_params
            if add_BOS:
                sampling_params.add_BOS = True
            request_records = self.dynamic_engine.generate(
                prompts=prompts, sampling_params=sampling_params
            )

        # Return the underlying `InferenceRequest` objects from the `DynamicInferenceRequestRecord`s.
        return [record.merge() for record in request_records]

    def generate_using_legacy_static_engine(
        self,
        prompts: Optional[List[str]] = None,
        add_BOS: bool = False,
        encoder_prompts: Optional[List[str]] = None,
        common_inference_params: Optional[SamplingParams] = None,
        sampling_params: Optional[SamplingParams] = None,
        inference_requests: Optional[List[InferenceRequest]] = None,
    ) -> List[InferenceRequest]:
        """The megatron core inference backend generate function

        This backend returns the output generations as a list.
        Args:
            prompts (List[str]): All the prompts as a list of strings
            add_BOS (bool): Whether to add BOS token to beginning of prompts
            encoder_prompts (List[dict]): All the encoder prompts as a list of strings
            common_inference_params: Deprecated. Only used for backward compatibility with
            MCore <= 0.9.0. Use `sampling_params` going forward.
            sampling_params (SamplingParams): The request-level sampling parameters
            inference_requests (List[InferenceRequest]): A pre-populated list of inference requests

        Returns:
            List[InferenceRequest]: The output is list of inference requests containing the
            generated tokens, texts and log probs if required
        """
        request_ids: List[str] = []

        if self.random_seed:
            torch.random.manual_seed(self.random_seed)

        if inference_requests is None:
            assert prompts is not None

            if common_inference_params:
                sampling_params = common_inference_params

            for i in range(len(prompts)):
                prompt = prompts[i]
                encoder_prompt = encoder_prompts[i] if encoder_prompts is not None else None
                request_id = self.add_request(
                    prompt=prompt, encoder_prompt=encoder_prompt, sampling_params=sampling_params
                )
                request_ids.append(request_id)
        else:
            for inference_request in inference_requests:
                request_ids.append(inference_request.request_id)
                self.scheduler.add_request(inference_request=inference_request)

        self.run_engine()

        result: List[InferenceRequest] = [
            self.scheduler.completed_request_pool[request_id] for request_id in request_ids
        ]
        return result

    def generate(
        self,
        prompts: Optional[List[str]] = None,
        add_BOS: bool = False,
        encoder_prompts: Optional[List[str]] = None,
        common_inference_params: Optional[SamplingParams] = None,
        sampling_params: Optional[SamplingParams] = None,
        inference_requests: Optional[List[InferenceRequest]] = None,
    ) -> List[InferenceRequest]:
        """The megatron core inference backend generate function

        This uses dynamic engine if available, otherwise uses legacy static engine.

        Args:
            prompts (List[str]): All the prompts as a list of strings
            add_BOS (bool): Whether to add BOS token to beginning of prompts
            encoder_prompts (List[dict]): All the encoder prompts as a list of strings
            common_inference_params: Deprecated. Only used for backward compatibility with
            MCore <= 0.9.0. Use `sampling_params` going forward.
            sampling_params (SamplingParams): The request-level sampling parameters
            inference_requests (List[InferenceRequest]): A pre-populated list of inference requests

        Returns:
            List[InferenceRequest]: The output is list of inference requests containing the
            generated tokens, texts and log probs if required
        """
        # TODO :M core- get rng state tracker
        if not self.legacy:
            return self.generate_using_dynamic_engine(
                prompts=prompts,
                add_BOS=add_BOS,
                encoder_prompts=encoder_prompts,
                common_inference_params=common_inference_params,
                sampling_params=sampling_params,
                inference_requests=inference_requests,
            )
        else:
            return self.generate_using_legacy_static_engine(
                prompts=prompts,
                add_BOS=add_BOS,
                encoder_prompts=encoder_prompts,
                common_inference_params=common_inference_params,
                sampling_params=sampling_params,
                inference_requests=inference_requests,
            )

    def run_engine(self):
        """Main functionality to run inference

        Runs the engine until there are no requests in the queue.

        Args:
            dynamic_generation (bool, optional): Set this to True, if you want
                to enable dynamic batching. Mainly used with an inference server.
                Defaults to False.
        """

        if not HAVE_TQDM:
            raise ImportError(
                "tqdm is required for StaticInferenceEngine, "
                "please install it with `pip install tqdm`"
            )

        prev_num_requests_pending = self.scheduler.num_requests_pending()
        tbar = tqdm(desc="static requests", total=prev_num_requests_pending)
        while self.scheduler.have_requests_pending():
            active_requests: Dict[str, InferenceRequest] = self.scheduler.active_request_pool.copy()
            active_streams: Dict[str, AsyncStream] = OrderedDict()
            for request_id in active_requests:
                if (stream := self.scheduler.streams.get(request_id, None)) is not None:
                    assert isinstance(stream, AsyncStream), stream
                    active_streams[request_id] = stream
            result_dict: Dict[str, InferenceRequest] = (
                self.controller.generate_all_output_tokens_static_batch(
                    active_requests, active_streams
                )
            )

            self.scheduler.update_requests_pools(result_dict=result_dict)

            crnt_num_requests_pending = self.scheduler.num_requests_pending()
            tbar.update(prev_num_requests_pending - crnt_num_requests_pending)
            prev_num_requests_pending = crnt_num_requests_pending

    def _wrapped_run_engine(self, cuda_device):
        """
        Explicitly sets the CUDA device before running the engine.

        This is to ensure that the CUDA device is correctly propagated when running
        in a new thread context.
        """
        torch.cuda.set_device(cuda_device)
        self.run_engine()

    async def run_engine_async(self, loop: Optional[asyncio.AbstractEventLoop] = None):
        """Runs the engine asynchronously using asyncio"""
        loop = get_asyncio_loop(loop)

        await loop.run_in_executor(None, self._wrapped_run_engine, torch.cuda.current_device())


================================================
FILE: megatron/core/inference/headers.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from enum import Enum, auto


class Headers(Enum):
    """
    Enum representing headers used for communication with the inference-coordinator.
    """

    CONNECT = auto()
    CONNECT_ACK = auto()
    SUBMIT_REQUEST = auto()
    ENGINE_REPLY = auto()
    PAUSE = auto()
    UNPAUSE = auto()
    SUSPEND = auto()
    RESUME = auto()
    SET_GENERATION_EPOCH = auto()
    STOP = auto()
    DISCONNECT = auto()
    SHUTDOWN = auto()


class UnknownHeaderError(Exception):
    """A signal with an unrecognized header was received by the coordinator."""

    def __init__(self, header):
        super().__init__(f"specialize for {header}.")


================================================
FILE: megatron/core/inference/inference_client.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import asyncio
import logging
import time
from typing import List, Optional, Union

from megatron.core.inference.inference_request import DynamicInferenceRequest
from megatron.core.inference.sampling_params import SamplingParams
from megatron.core.utils import get_asyncio_loop, trace_async_exceptions

from .headers import Headers

try:
    import zmq

    HAVE_ZMQ = True
except:
    HAVE_ZMQ = False

try:
    import msgpack

    HAVE_MSGPACK = True
except:
    HAVE_MSGPACK = False


class InferenceClient:
    """
    An asynchronous client for communicating with an inference coordinator service.

    This client uses ZeroMQ (ZMQ) for messaging and MessagePack (msgpack) for
    serialization. It is designed to work within an asyncio event loop. It can
    submit inference requests, listen for completed results, and send control
    signals (e.g., pause, stop) to the inference engines.

    The client operates by connecting a ZMQ DEALER socket to the inference
    coordinator's ROUTER socket. Requests are sent with a unique ID, and an
    `asyncio.Future` is created for each request. A background task listens for
    replies from the coordinator, and when a reply is received, it resolves the
    corresponding future with the result.

    Attributes:
        context (zmq.Context): The ZeroMQ context.
        socket (zmq.Socket): The ZMQ DEALER socket used for communication.
        completion_futures (dict[int, asyncio.Future]): A dictionary mapping
            request IDs to the asyncio Future objects that will hold the results.
        next_request_id (int): A counter for generating unique request IDs.
        listener_task (asyncio.Task): The background task that listens for
            completed requests.
    """

    def __init__(self, inference_coordinator_address: str, deserialize: bool = False):
        """
        Initializes the InferenceClient.

        Args:
            inference_coordinator_address (str): The address on which the
                inference coordinator is listening.
            deserialize (bool): If True, deserialize completed requests
                into DynamicInferenceRequest objects. If False (default), return
                the raw serialized dict for lower overhead.
        """
        assert (
            HAVE_ZMQ
        ), "please install the pyzmq library to use InferenceClient - pip install pyzmq"
        assert (
            HAVE_MSGPACK
        ), "please install the messagepack library to use InferenceClient - pip install msgpack"
        self.context = zmq.Context()
        socket = self.context.socket(zmq.DEALER)

        # Prevent socket.send() from thread-blocking at >1000 concurrent requests
        socket.setsockopt(zmq.SNDHWM, 0)
        socket.setsockopt(zmq.RCVHWM, 0)

        socket.connect(inference_coordinator_address)

        self._loop = None
        self.socket = socket
        self.deserialize = deserialize
        self.completion_futures = {}
        self.request_submission_times = {}
        self.next_request_id = 0

    def add_request(
        self, prompt: Union[str, List[int]], sampling_params: SamplingParams
    ) -> asyncio.Future:
        """
        Submits a new inference request to the coordinator.

        This method sends the prompt and sampling parameters to the inference
        coordinator. It immediately returns an asyncio.Future, which can be
        awaited to get the result of the inference request when it is complete.

        Args:
            prompt (str): The input prompt to send to the language model.
            sampling_params: An object containing the sampling parameters for
                text generation (e.g., temperature, top_p). It must have a
                `serialize()` method.

        Returns:
            asyncio.Future: A future that will be resolved with a
            `DynamicInferenceRequest` object (if deserialize=True) or a raw
            serialized dict (if deserialize=False) containing the completed result.
        """
        request_id = self.next_request_id
        self.next_request_id += 1
        payload = [Headers.SUBMIT_REQUEST.value, request_id, prompt, sampling_params.serialize()]
        payload_serialized = msgpack.packb(payload, use_bin_type=True)
        self.socket.send(payload_serialized)
        assert request_id not in self.completion_futures
        self.completion_futures[request_id] = asyncio.get_running_loop().create_future()
        self.request_submission_times[request_id] = time.perf_counter()
        return self.completion_futures[request_id]

    @trace_async_exceptions
    async def _recv_task(self):
        """
        Listens for completed inference requests from the coordinator.

        This coroutine runs in an infinite loop, continuously polling the socket
        for data.
        When a request reply is received, it unpacks the message, finds the
        corresponding Future using the request ID, and sets the result.
        Other control packets are handled appropriately.

        This method is started as a background task by the `start()` method.
        """
        while True:
            try:
                data = msgpack.unpackb(self.socket.recv(flags=zmq.NOBLOCK), raw=False)
                header = Headers(data[0])
                if header == Headers.ENGINE_REPLY:
                    request_id, reply = data[1:]
                    reply['latency'] = time.perf_counter() - self.request_submission_times.pop(
                        request_id
                    )
                    completion_future = self.completion_futures.pop(request_id)
                    if completion_future.done():
                        logging.warning(f"Client: The future for {request_id} has been cancelled!")
                        continue
                    completed_request = (
                        DynamicInferenceRequest.deserialize(reply) if self.deserialize else reply
                    )
                    completion_future.set_result(completed_request)
            except zmq.Again:
                await asyncio.sleep(0.005)
                continue
            except KeyboardInterrupt:
                break

    def _connect_with_inference_coordinator(self):
        """
        Performs the initial handshake with the inference coordinator.

        Sends a CONNECT signal and waits for a CONNECT_ACK reply to ensure the
        connection is established and acknowledged by the coordinator.
        """
        payload = [Headers.CONNECT.value]
        self.socket.send(msgpack.packb(payload, use_bin_type=True))
        reply = msgpack.unpackb(self.socket.recv(), raw=False)[0]
        assert Headers(reply) == Headers.CONNECT_ACK

    def start(self, loop: Optional[asyncio.AbstractEventLoop] = None):
        """
        Connects to the coordinator and starts the background listener task.

        This must be called before submitting any requests. It handles
        the initial handshake and spawns the `listen_for_completed_requests`
        coroutine.
        """
        logging.info("Client: Connecting to InferenceCoordinator...")
        self._loop = get_asyncio_loop(loop)
        self._connect_with_inference_coordinator()
        self.listener_task = self._loop.create_task(self._recv_task())

    def _send_signal_to_engines(self, signal, *args):
        """
        Sends a generic control signal to the inference coordinator.

        Args:
            signal: The signal to send, typically a value from the `Headers` enum.
            *args: Optional extra values to include in the payload.
        """
        payload = [signal.value, *args]
        payload_serialized = msgpack.packb(payload, use_bin_type=True)
        self.socket.send(payload_serialized)

    def pause_engines(self):
        """Sends PAUSE to all engines via coordinator.

        The coordinator broadcasts PAUSE. Each engine reaches EP consensus,
        then synchronizes via a world-wide barrier before transitioning to
        PAUSED. Callers should await engine.paused for confirmation.
        """
        self._send_signal_to_engines(Headers.PAUSE)

    def unpause_engines(self) -> None:
        """Sends UNPAUSE to all engines. No synchronization needed."""
        self._send_signal_to_engines(Headers.UNPAUSE)

    def set_generation_epoch(self, generation_epoch: int):
        """Sends a signal to stamp all in-flight requests with the given generation epoch.

        Args:
            generation_epoch: The current generation epoch number.
        """
        self._send_signal_to_engines(Headers.SET_GENERATION_EPOCH, generation_epoch)

    def suspend_engines(self):
        """Sends SUSPEND to all engines via coordinator. Requires PAUSED.

        Callers should await engine.suspended for confirmation.
        """
        self._send_signal_to_engines(Headers.SUSPEND)

    def resume_engines(self):
        """Sends RESUME to all engines via coordinator. Requires SUSPENDED.

        Callers should await engine.paused (or engine.running after UNPAUSE) for confirmation.
        """
        self._send_signal_to_engines(Headers.RESUME)

    def stop_engines(self):
        """Sends STOP to all engines via coordinator. Requires PAUSED or SUSPENDED.

        Callers should await engine.stopped for confirmation.
        Does not affect the coordinator.
        """
        self._send_signal_to_engines(Headers.STOP)

    def shutdown_coordinator(self):
        """Tells the coordinator process to exit its main loop.

        Does not affect the engines.
        """
        self._send_signal_to_engines(Headers.SHUTDOWN)

    def stop(self):
        """
        Stops the client and cleans up all resources.

        This method cancels the background listener task, closes the ZMQ socket,
        and terminates the ZMQ context. It should be called when the client is
        no longer needed to ensure a graceful shutdown.
        """
        if hasattr(self, 'listener_task') and not self.listener_task.done():
            self.listener_task.cancel()
        # Wake up any listeners.
        for future in self.completion_futures.values():
            if not future.done():
                future.cancel()
        self.completion_futures.clear()
        self.socket.close(linger=0)
        self.context.term()


================================================
FILE: megatron/core/inference/inference_request.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import copy
import time
import warnings
from dataclasses import asdict, dataclass, field
from enum import Enum, auto
from itertools import accumulate
from typing import Any, Dict, List, Optional, Tuple

import torch

from megatron.core.inference.sampling_params import SamplingParams
from megatron.core.tokenizers import MegatronTokenizer
from megatron.core.utils import experimental_api


def serialize_tensor(tensor: torch.Tensor) -> List:
    """Serialize tensor to bytes.

    Args:
        tensor (Tensor): Tensor.

    Returns:
        (List) Tensor as a list
    """
    torch.cuda.nvtx.range_push("serialize_tensor")

    # simply convert tensor into a list
    tensor = tensor.cpu().tolist()

    torch.cuda.nvtx.range_pop()
    return tensor


def deserialize_tensor(tensor_as_list: List) -> torch.Tensor:
    """Deserialize tensor from bytes.

    Args:
        tensor_as_list (List): List representation of tensor.

    Returns:
        (Tensor) Tensor.
    """
    tensor = torch.tensor(tensor_as_list)
    return tensor


def unwrap_serialized_tensors(serialized_request: dict) -> dict:
    """Unwrap ("tensor", [...]) tuples produced by serialize() into plain lists.

    Args:
        serialized_request (dict): A dict produced by `serialize()`.

    Returns:
        dict: A shallow copy with tensor wrapper tuples replaced by their inner lists.
    """
    return {
        k: v[1] if isinstance(v, (list, tuple)) and len(v) == 2 and v[0] == "tensor" else v
        for k, v in serialized_request.items()
    }


# class syntax
class Status(Enum):
    """Enum for status"""

    WAITING_IN_QUEUE = 1
    ACTIVE_AND_GENERATING_TOKENS = 2
    ACTIVE_BUT_NOT_GENERATING_TOKENS = 3
    COMPLETED = 4
    FAILED = 5


# =========================================================================
# Hash computation for prefix caching
# =========================================================================

# Constants for hash computation
# Using 2^61 - 1 (Mersenne prime) for ~10^18 hash space, reducing collision probability
# from ~10^-9 to ~10^-18 compared to the previous prime (1000000007).
HASH_PRIME = 2305843009213693951
HASH_BASE = 31

_hash_powers: Optional[torch.Tensor] = None


def compute_block_hashes_batched(prompt_tokens: torch.Tensor, block_size: int) -> List[int]:
    """Compute hashes for all complete blocks in a prompt in one batched operation.

    Reshapes prompt tokens into [num_blocks, block_size], computes all per-block
    token hashes via a single GPU matmul, transfers results with one .tolist() call,
    and chains parent hashes on CPU.

    Args:
        prompt_tokens: All prompt token IDs, shape [seq_len].
        block_size: Number of tokens per block.

    Returns:
        List of positive integer hash values (1 to HASH_PRIME), one per complete block.
    """
    num_complete_blocks = len(prompt_tokens) // block_size
    if num_complete_blocks == 0:
        return []

    global _hash_powers
    if _hash_powers is None or _hash_powers.shape[0] != block_size:
        positions = torch.arange(block_size, device=prompt_tokens.device, dtype=torch.int64)
        _hash_powers = torch.pow(HASH_BASE, positions).to(torch.int64) % HASH_PRIME

    # Reshape to [num_blocks, block_size] (zero-copy view) and compute all token hashes
    blocks = prompt_tokens[: num_complete_blocks * block_size].view(num_complete_blocks, block_size)
    token_hashes = (blocks.to(torch.int64) * _hash_powers).sum(dim=1) % HASH_PRIME

    # Single GPU→CPU transfer
    token_hashes_list = token_hashes.tolist()

    # Chain parent hashes on CPU (C-level accumulate, no Python loop)
    hashes = list(
        accumulate(
            token_hashes_list,
            lambda parent, th: (parent * HASH_BASE + th) % HASH_PRIME + 1,
            initial=0,
        )
    )[1:]

    return hashes


@dataclass(kw_only=True)
class InferenceRequest:
    """Class for one inference request

    Containing relevant data for an inference request

    """

    request_id: int
    prompt: str
    sampling_params: Optional[SamplingParams] = None
    inference_parameters: Optional[SamplingParams] = None
    prompt_tokens: Optional[List[int]] = None
    arrival_time: Optional[float] = None
    status: Optional[Status] = None
    encoder_prompt: Optional[str] = None
    generated_text: Optional[str] = None
    segments: Optional[List[str]] = None
    generated_segments: Optional[List[str]] = None
    generated_sequence_lengths: Optional[List[int]] = None
    generated_tokens: Optional[torch.Tensor] = None
    prompt_log_probs: Optional[torch.Tensor] = None
    generated_log_probs: Optional[torch.Tensor] = None
    prompt_top_n_logprobs: Optional[List[Dict[str, float]]] = None
    generated_top_n_logprobs: Optional[List[Dict[str, float]]] = None
    generated_length: Optional[int] = None
    tpot: Optional[List[int]] = None

    def __post_init__(self):
        if self.sampling_params is None and self.inference_parameters is not None:
            warnings.warn(
                "`inference_parameters` renamed to `sampling_params`, and the "
                "previous name will be removed in Mcore 0.14."
            )
            self.sampling_params = self.inference_parameters

    def serialize(self) -> dict:
        """Converts the instance into a serializable dictionary.

        Returns:
            (dict) A dictionary representation of the instance suitable for
                serialization.
        """
        # Dataclass to dict.
        # do not use asdict(self) - it has very high CPU overheads
        # and if there are tensors, it will try to deepcopy them
        obj = self.__dict__.copy()  # shallow dict copy
        obj["status"] = self.status.name if self.status else None
        obj["sampling_params"] = self.sampling_params.serialize() if self.sampling_params else None
        obj["inference_parameters"] = (
            self.inference_parameters.serialize() if self.inference_parameters else None
        )

        # Serialize tensors.
        obj = {
            k: (("tensor", serialize_tensor(v)) if isinstance(v, torch.Tensor) else v)
            for k, v in obj.items()
        }
        return obj

    @classmethod
    def deserialize(cls, obj: dict) -> "InferenceRequest":
        """Deserialize request.

        Args:
            obj (dict): Serialized request data.

        Returns:
            (InferenceRequest) Deserialized request.
        """

        # Initialize request.
        request = cls(**obj)
        request._post_deserialize(obj)
        return request

    def _post_deserialize(self, obj: dict):
        """
        This is called after the dataclass is initialized to handle any special
        deserialization logic.
        """
        # Deserialize status.
        self.status = None if obj["status"] is None else Status[obj["status"]]
        self.sampling_params = (
            None
            if obj["sampling_params"] is None
            else SamplingParams.deserialize(obj["sampling_params"])
        )
        self.inference_parameters = (
            None
            if obj["inference_parameters"] is None
            else SamplingParams.deserialize(obj["inference_parameters"])
        )

        # Deserialize tensors and sampling params.
        for k, v in obj.items():
            if isinstance(v, list) and len(v) == 2 and v[0] == "tensor":
                setattr(self, k, deserialize_tensor(v[1]))


class DynamicInferenceEventType(Enum):
    """Dynamic inference event type."""

    ADD_ENGINE = auto()  # When request is added to engine via _add_request()
    ADD_CONTEXT = auto()  # When request is added to context (scheduled for prefill)
    GENERATED_TOKEN = auto()  # When an output token is generated (payload = {"token_id": int})
    PAUSE = auto()
    EVICT = auto()
    FINISH = auto()
    FAIL = auto()
    ERROR_TRANSIENT = auto()
    ERROR_NONTRANSIENT = auto()


@dataclass(kw_only=True)
class DynamicInferenceEvent:
    """A lifecycle event for a dynamic inference requests.

    An event is currently one of the following:

    - request added
    - request paused
    - request evicted
    - request finished
    - request failed
    - request error (transient)
    - request error (non-transient, i.e. fatal)
    """

    timestamp: Optional[float] = None
    type: DynamicInferenceEventType
    payload: Optional[Any] = None

    def __post_init__(self):

        # Timestamp.
        if self.timestamp is None:
            self.timestamp = time.time()

        # Validate type.
        assert isinstance(self.type, DynamicInferenceEventType)

        # Validate payload.
        if self.type in (
            DynamicInferenceEventType.ERROR_TRANSIENT,
            DynamicInferenceEventType.ERROR_NONTRANSIENT,
        ):
            assert self.payload is not None
        elif self.type == DynamicInferenceEventType.GENERATED_TOKEN:
            assert (
                self.payload is not None
                and isinstance(self.payload, dict)
                and "token_id" in self.payload
            )
        else:
            assert self.payload is None

    def __str__(self):
        if self.type == DynamicInferenceEventType.GENERATED_TOKEN:
            payload_str = f", token={self.payload['token_id']}"
        elif self.payload is None:
            payload_str = ""
        else:
            payload_str = f", {type(self.payload).__name__}"
        return f"[{self.timestamp:.3f}] {self.type.name}{payload_str}"

    def serialize(self) -> dict:
        """Converts the instance into a serializable dictionary.

        Returns:
            dict: Full event dict.
        """
        torch.cuda.nvtx.range_push("DynamicInferenceEvent.serialize")
        # do not use asdict(self) - it has very high CPU overheads
        # and if there are tensors, it will try to deepcopy them
        obj = self.__dict__.copy()
        obj["type"] = self.type.name

        # Serialize payload.
        if self.payload is not None:
            if self.type in (
                DynamicInferenceEventType.ERROR_TRANSIENT,
                DynamicInferenceEventType.ERROR_NONTRANSIENT,
            ):
                from .contexts.dynamic_context import ContextErrorFactory  # avoid circular import.

                obj["payload"] = ContextErrorFactory.serialize(self.payload)

        torch.cuda.nvtx.range_pop()
        return obj

    @classmethod
    def deserialize(cls, obj: dict) -> "DynamicInferenceEvent":
        """Deserialize event.

        Args:
            obj: Serialized event data dict.

        Returns:
            (DynamicInferenceEvent) Deserialized event.
        """
        event_type = DynamicInferenceEventType[obj["type"]]

        # Pre-process payload before construction (since __post_init__ validates types).
        init_obj = {**obj, "type": event_type}
        if obj["payload"] is not None:
            if event_type in (
                DynamicInferenceEventType.ERROR_TRANSIENT,
                DynamicInferenceEventType.ERROR_NONTRANSIENT,
            ):
                from .contexts.dynamic_context import ContextErrorFactory  # avoid circular import.

                init_obj["payload"] = ContextErrorFactory.deserialize(obj["payload"])

        return cls(**init_obj)


@experimental_api
@dataclass(kw_only=True)
class DynamicInferenceRequest(InferenceRequest):
    """Class for one inference request

    Containing relevant data for an dynamic inference request

    """

    request_id: int
    prompt: Optional[str] = None
    prompt_tokens: Optional[torch.Tensor] = None
    # remaining prompt tokens are used for chunked prefill
    remaining_prompt_tokens: Optional[torch.Tensor] = None
    policy_epoch: Optional[list[tuple[int, int]]] = None
    kv_cache_epoch: Optional[list[tuple[int, int]]] = None
    latency: Optional[float] = None
    # routing_indices stores MoE routing decisions for all tokens generated so far.
    # Shape: [total_tokens, num_layers, topk] - accumulated across all generation steps
    routing_indices: Optional[torch.Tensor] = None
    finished_chunk_token_count: int = 0
    stop_word_ids: Optional[List[List[int]]] = None  # Tokenized stop words (populated internally)

    # Prefix caching fields
    block_size_tokens: Optional[int] = None  # Block size for hash computation
    enable_prefix_caching: bool = False  # Whether prefix caching is enabled

    # Computed field - not passed by caller
    precomputed_block_hashes: List[int] = field(default_factory=list)

    def __post_init__(self):
        self.sampling_params = copy.deepcopy(self.sampling_params)
        if self.prompt_tokens is not None:
            self.remaining_prompt_tokens = self.prompt_tokens

        # Compute block hashes for prefix matching (skip if already provided, e.g. from `merge`).
        if (
            self.enable_prefix_caching
            and self.block_size_tokens is not None
            and self.prompt_tokens is not None
            and not self.precomputed_block_hashes
        ):
            self._compute_block_hashes()

    def _compute_block_hashes(self) -> None:
        """Compute hashes for all complete blocks in the prompt.

        After this call:
        - precomputed_block_hashes is [] if prompt < block_size (no complete blocks)
        - precomputed_block_hashes is [hash1, ...] for N complete blocks
        """
        self.precomputed_block_hashes = compute_block_hashes_batched(
            self.prompt_tokens, self.block_size_tokens
        )

    @property
    def remaining_prompt_length(self):
        """
        Get the length of the remaining prompt tokens.
        """
        return len(self.remaining_prompt_tokens)

    ttft: Optional[float] = None
    events: List[DynamicInferenceEvent] = field(default_factory=list)
    event_add_engine: Optional[DynamicInferenceEvent] = field(default=None, repr=False)
    generated_tokens: List[int] = field(default_factory=list)

    def __str__(self):
        return ", ".join(
            (
                f"id {self.request_id}",
                f"{self.status.name}" if self.status is not None else "[NOT ADDED]",
                f"prompt len {len(self.prompt_tokens)}",
                f"gen len {len(self.generated_tokens)}",
                f"num events {len(self.events)}",
            )
        )

    def serialize(self):
        """Converts the instance into a serializable dictionary.

        Returns:
            (dict) A dictionary representation of the instance suitable for
                serialization.
        """
        torch.cuda.nvtx.range_push("DynamicInferenceRequest.serialize")
        obj = super().serialize()
        obj["events"] = [e.serialize() for e in self.events]
        obj.pop("event_add_engine", None)

        # Sanity check routing_indices: Tensor [total_tokens - 1, num_layers, topk]
        if self.routing_indices is not None:
            total_tokens = len(self.prompt_tokens) + len(self.generated_tokens)
            # the last generated token does not undergo a forward pass
            # hence we expect routing indices for total_tokens - 1
            assert self.routing_indices.shape[0] == total_tokens - 1, (
                f"routing_indices first dimension {self.routing_indices.shape[0]} does not match "
                f"total tokens {total_tokens-1}."
            )

        torch.cuda.nvtx.range_pop()
        return obj

    def _post_deserialize(self, obj):
        super()._post_deserialize(obj)
        self.events = [DynamicInferenceEvent.deserialize(e) for e in obj.get("events", [])]

    @property
    def tracked_metadata(self) -> List[Any]:
        """Obtain an ordered list of all request metadata to be tracked by the context.

        This consists of metadata that is used to inform text generation.
        The values of such fields are tensorized and kept aligned with the current active batch.

        Note that while the general request object is mutable, this metadata is
        inherently assumed to remain immutable once the request becomes active.
        """
        sp = self.sampling_params
        if sp.termination_id is None:
            if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0:
                warnings.warn(
                    f"DynamicInferenceRequest {self.request_id} has no termination_id set "
                    "in its sampling_params. Defaulting to -1."
                )
            sp.termination_id = -1
        return [getattr(sp, field) for field, _, _ in self.get_metadata_types()]

    @staticmethod
    def get_metadata_types() -> List[Tuple[str, torch.dtype, bool]]:
        """Keeps track of all request metadata names, dtypes, and target device.

        Returns:
            List[Tuple[str, torch.dtype, bool]]: Mapping from metadata name to:
                name (str) - The name of the metadata field.
                dtype (torch.dtype) - The datatype of the metadata.
                on_device (bool) - Whether the metadata lives on GPU (True) or CPU (False).
        """
        return [
            ("temperature", torch.float32, False),  # CPU for torch sampling
            ("top_k", torch.int32, False),  # CPU for torch sampling
            ("top_p", torch.float32, False),  # CPU for torch sampling
            ("termination_id", torch.int64, True),
            ("return_log_probs", torch.bool, False),  # CPU for non-selective logprobs
            ("skip_prompt_log_probs", torch.bool, False),  # CPU for non-selective logprobs
            ("top_n_logprobs", torch.int32, False),  # CPU for torch sampling
        ]

    def add_event(
        self, type: DynamicInferenceEventType, payload: Optional[Any] = None
    ) -> DynamicInferenceEvent:
        """Add event."""
        event = DynamicInferenceEvent(type=type, payload=payload)
        self.events.append(event)
        return event

    def add_event_add_engine(self):
        """Add 'add_engine' event - called when request enters the engine queue."""
        self.event_add_engine = self.add_event(DynamicInferenceEventType.ADD_ENGINE)
        return self.event_add_engine

    def add_event_add_context(self):
        """Add 'add_context' event - called when request is added to context for prefill."""
        return self.add_event(DynamicInferenceEventType.ADD_CONTEXT)

    def add_event_generated_token(
        self,
        token: int,
        blocks_total: Optional[int] = None,
        blocks_hashed_total: Optional[int] = None,
        blocks_hashed_active: Optional[int] = None,
        blocks_ref_count: Optional[int] = None,
        pre_fwd_active_token_count: Optional[int] = None,
        pre_fwd_step_count: Optional[int] = None,
    ):
        """Add 'generated_token' event - records each generated token.

        Args:
            token (int): The token ID that was generated.
            blocks_total (int): Total block capacity from allocator.
            blocks_hashed_total (int): All allocated (hashed) blocks.
            blocks_hashed_active (int): Blocks with ref_count > 0.
            blocks_ref_count (int): Sum of block ref counts from allocator.
            pre_fwd_active_token_count (int): Active token count before forward pass.
            pre_fwd_step_count (int): Step count before forward pass.
        """
        payload = {"token_id": token}
        if blocks_total is not None:
            payload["blocks_total"] = blocks_total
        if blocks_hashed_total is not None:
            payload["blocks_hashed_total"] = blocks_hashed_total
        if blocks_hashed_active is not None:
            payload["blocks_hashed_active"] = blocks_hashed_active
        if blocks_ref_count is not None:
            payload["blocks_ref_count"] = blocks_ref_count
        if pre_fwd_active_token_count is not None:
            payload["pre_fwd_active_token_count"] = pre_fwd_active_token_count
        if pre_fwd_step_count is not None:
            payload["pre_fwd_step_count"] = pre_fwd_step_count
        return self.add_event(DynamicInferenceEventType.GENERATED_TOKEN, payload)

    def add_event_pause(self):
        """Add 'pause' event."""
        return self.add_event(DynamicInferenceEventType.PAUSE)

    def add_event_evict(self):
        """Add 'evict' event."""
        return self.add_event(DynamicInferenceEventType.EVICT)

    def add_event_finish(self):
        """Add 'finish' event."""
        return self.add_event(DynamicInferenceEventType.FINISH)

    def add_event_fail(self):
        """Add 'fail' event."""
        return self.add_event(DynamicInferenceEventType.FAIL)

    def add_event_error_transient(self, error: Exception):
        """Add transient error event."""
        return self.add_event(DynamicInferenceEventType.ERROR_TRANSIENT, error)

    def add_event_error_nontransient(self, error: Exception):
        """Add non-transient error event."""
        return self.add_event(DynamicInferenceEventType.ERROR_NONTRANSIENT, error)

    def succeeded(self) -> bool:
        """Request experienced no non-transient errors."""
        return self.status == Status.COMPLETED

    def failed(self) -> bool:
        """Request experienced non-transient error."""
        return self.status == Status.FAILED


@dataclass(kw_only=True)
class DynamicInferenceRequestRecord:
    """History of DynamicInferenceRequest objects over multiple request
    checkpoints."""

    requests: list[DynamicInferenceRequest] = field(default_factory=list)
    latency: Optional[float] = None

    @classmethod
    def from_request(cls, request: DynamicInferenceRequest) -> "DynamicInferenceRequestRecord":
        """Initialize record from a single request.

        Args:
            request (DynamicInferenceRequest): Initial request.

        Returns:
            (DynamicInferenceRequestRecord) A record.
        """
        record = cls()
        record.requests.append(request)
        return record

    def __getitem__(self, idx: int) -> DynamicInferenceRequest:
        """Get request by index.

        Args:
            idx (int): Request index.

        Returns:
            (DynamicInferenceRequest) Request object.
        """
        return self.requests[idx]

    @property
    def request_id(self) -> int:
        """Get request id.

        Returns:
            (int) Request id.
        """
        return self.requests[0].request_id

    def checkpoint(self, tokenizer: MegatronTokenizer | None = None):
        """Maintain reference to previous request, and then append a new request
        that concatenates the previous prompt and generations.

        Args:
            tokenizer (MegatronTokenizer | None): (Deprecated) Tokenizer.
        """

        old_request = self[-1]

        # Carry forward policy_epoch as-is.
        policy_epoch = old_request.policy_epoch

        # Reset kv_cache_epoch to None: the KV cache is recomputed fresh after checkpoint;
        # the engine's stamping logic will initialize a new stamp record with the recompute epoch.
        kv_cache_epoch = None

        # New prompt (concatenate prompt + generated tokens).
        new_prompt_tokens = torch.cat(
            (
                old_request.prompt_tokens,
                torch.tensor(
                    old_request.generated_tokens,
                    dtype=old_request.prompt_tokens.dtype,
                    device=old_request.prompt_tokens.device,
                ),
            ),
            dim=0,
        )

        # New sampling params.
        new_sampling_params = SamplingParams(
            **{
                **asdict(old_request.sampling_params),
                "num_tokens_to_generate": (
                    old_request.sampling_params.num_tokens_to_generate
                    - len(old_request.generated_tokens)
                ),
            }
        )

        # New request.
        new_request = DynamicInferenceRequest(
            request_id=old_request.request_id,
            prompt_tokens=new_prompt_tokens,
            sampling_params=new_sampling_params,
            policy_epoch=policy_epoch,
            kv_cache_epoch=kv_cache_epoch,
        )
        # Preserve event_add_engine from old request if it exists, otherwise set it.
        # This ensures TTFT calculation works correctly for evicted/resumed requests.
        if old_request.event_add_engine is not None:
            new_request.event_add_engine = old_request.event_add_engine
        else:
            new_request.add_event_add_engine()
        self.requests.append(new_request)

    def merge(self, tokenizer: MegatronTokenizer | None = None) -> DynamicInferenceRequest:
        """Merge requests into a single checkpoint-agnostic request object.

        Args:
            tokenizer (MegatronTokenizer | None): (Deprecated) Tokenizer.

        Returns:
            (DynamicInferenceRequest) Merged request.
        """

        def merge_lists(key):
            if getattr(self.requests[0], key) is None:
                return None
            else:
                return [v for r in self.requests for v in getattr(r, key)]

        prompt_tokens = self.requests[0].prompt_tokens
        prompt_text = self.requests[0].prompt
        routing_indices = None
        if self.requests[0].routing_indices is not None:
            routing_indices = torch.cat([r.routing_indices for r in self.requests])
        generated_tokens = merge_lists("generated_tokens")
        try:
            generated_text = "".join(r.generated_text for r in self.requests)
        except TypeError as e:  # generally means r.generated_text is None
            generated_text = None

        policy_epoch = self.requests[-1].policy_epoch
        kv_cache_epoch = self.requests[-1].kv_cache_epoch

        # Merged request.
        request = DynamicInferenceRequest(
            request_id=self.requests[0].request_id,
            prompt=prompt_text,
            prompt_tokens=prompt_tokens,
            prompt_log_probs=self.requests[0].prompt_log_probs,
            prompt_top_n_logprobs=self.requests[0].prompt_top_n_logprobs,
            generated_text=generated_text,
            generated_tokens=generated_tokens,
            generated_length=len(generated_tokens),
            generated_log_probs=merge_lists("generated_log_probs"),
            generated_top_n_logprobs=merge_lists("generated_top_n_logprobs"),
            sampling_params=self.requests[0].sampling_params,
            policy_epoch=policy_epoch,
            kv_cache_epoch=kv_cache_epoch,
            ttft=self.requests[0].ttft,
            tpot=merge_lists("tpot"),
            status=self.requests[-1].status,
            latency=self.latency,
            events=merge_lists("events"),
            routing_indices=routing_indices,
            block_size_tokens=self.requests[0].block_size_tokens,
            enable_prefix_caching=self.requests[0].enable_prefix_caching,
            precomputed_block_hashes=self.requests[0].precomputed_block_hashes,
        )

        return request

    def serialize(self) -> dict:
        """Converts the instance into a serializable dictionary.

        Returns:
            (dict) A dictionary representation of the instance suitable for
                serialization.
        """
        torch.cuda.nvtx.range_push("DynamicInferenceRequestRecord.serialize")
        obj = self.__dict__.copy()  # shallow dict copy
        obj["requests"] = [r.serialize() for r in obj["requests"]]
        torch.cuda.nvtx.range_pop()
        return obj

    @classmethod
    def deserialize(cls, obj: dict) -> "DynamicInferenceRequestRecord":
        """Deserialize record.

        Args:
            obj (dict): Serialized record data.

        Returns:
            (DynamicInferenceRequestRecord) Deserialized record.
        """
        request = cls(**obj)
        request.requests = [DynamicInferenceRequest.deserialize(r) for r in obj["requests"]]
        return request


@dataclass(kw_only=True)
class VLMInferenceRequest(InferenceRequest):
    """Class for a VLM inference request"""

    num_img_embeddings_per_tile: int
    imgs: torch.Tensor
    num_tiles: torch.Tensor
    decoder_seq_length: int


================================================
FILE: megatron/core/inference/model_inference_wrappers/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import abc
from typing import Any, Dict, Iterable, Optional, Union

import torch

from megatron.core.fp8_utils import prepare_model_for_fp8_inference
from megatron.core.inference.communication_utils import (
    is_pipeline_first_stage,
    is_pipeline_last_stage,
    recv_from_prev_pipeline_rank_,
    send_to_next_pipeline_rank,
)
from megatron.core.inference.contexts import BaseInferenceContext
from megatron.core.models.gpt.gpt_model import GPTModel
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.utils import deprecate_args, get_attr_wrapped_model, get_model_config

DEPRECATED_ARGS = ["inference_wrapper_config", "pg_collection"]


class AbstractModelInferenceWrapper(abc.ABC):
    """Abstract inference wrapper

    Extend this to create a version for your model.

    The wrapper prepares the model for inference, provides the required input data and
    runs the forward pass.

    Args:
        model (Union[GPTModel, LegacyGPTModel]): The actual GPT model (MCore
            or MLM).
        inference_context (BaseInferenceContext): Context for managing KV
            cache and other inference params.
    """

    @deprecate_args(*DEPRECATED_ARGS)
    def __init__(
        self,
        model: Union['LegacyGPTModel', GPTModel],  # type: ignore[name-defined]
        inference_context: BaseInferenceContext,
    ):
        assert not isinstance(
            model, Iterable
        ), 'interleaving schedule is not supported for inference'
        self.model = model
        self.config = get_model_config(self.model)
        self.pipeline_communication_dtype = (
            torch.float if self.config.fp32_residual_connection else self.config.params_dtype
        )
        self.sequence_parallel = self.config.sequence_parallel

        self.inference_context = inference_context

        # Get the inference pg_collection from the config if it exists; otherwise the training
        # pg_collection might be used during RL
        if (pg_collection := self.inference_context.config.pg_collection) is None:
            pg_collection = ProcessGroupCollection.use_mpu_process_groups()

        self.tp_group = pg_collection.tp
        self.pp_group = pg_collection.pp
        self.tp_size = torch.distributed.get_world_size(self.tp_group)

        if self.config.fp8 is not None and self.config.transformer_impl != "inference_optimized":
            self.model = prepare_model_for_fp8_inference(self.model)

        # TODO(ksanthanam): Add support for fp4

    def prep_model_for_inference(self):
        """A utility function for preparing model for inference

        The function gets called once before the auto regressive inference loop.
        It puts the model in eval mode.

        """
        self.model.eval()

        # For TP only model both is_pp_first_stage and _is_pp_last_stage returns True
        self.model_is_pipeline_parallel = not (
            is_pipeline_first_stage(self.pp_group) and is_pipeline_last_stage(self.pp_group)
        )

        self.inference_context.reset()

    @abc.abstractmethod
    def prep_inference_input(self, prompt_tokens) -> Dict[str, Any]:
        """Prepares the inference input data.

        Args:
            prompts_tokens (torch.Tensor): A tensor of shape [batch_size, max_seq_len]

        Returns:
            A dict with all the inference input needed for the batch.
        """
        raise NotImplementedError()

    @abc.abstractmethod
    def get_batch_for_context_window(self, *args, **kwargs) -> Dict[str, Any]:
        """Returns the input data for inference

        This function gets called iteratively in the inference loop.
        It can be used to extract relevant input from the prompt tokens, attention mask etc.
        required for each step in inference.

        """
        raise NotImplementedError()

    def _forward(self, inference_input):
        """Runs a forward pass of the model.

        Args:
            inference_input(Dict[str, Any]): The input data.

        Returns:
            The model output logits.
        """
        tokens = inference_input["tokens"]
        position_ids = inference_input["position_ids"]
        attention_mask = inference_input["attention_mask"]
        return self.model(
            tokens,
            position_ids,
            attention_mask,
            inference_context=self.inference_context,
            runtime_gather_output=True,  # Inference should always gather the logits
        )

    @torch.inference_mode()
    def dummy_forward(self):
        """Run a dummy forward pass through the model, with a single token.
        Use-case: Used in EP on ranks which do not have any work, but are needed
        for the all-to-all communication.
        Runs under inference_mode so that transformer layers can distinguish this eager
        dummy_forward from training/validation passes and skip matching on CUDA graphs."""

        # we use num_dummy_tokens equal to tensor model parallel size
        # so that the dummy forward pass will work with sequence parallel
        num_dummy_tokens = self.tp_size
        tokens = torch.zeros(
            (1, num_dummy_tokens), dtype=torch.long, device=torch.cuda.current_device()
        )
        position_ids = torch.zeros(
            (1, num_dummy_tokens), dtype=torch.long, device=torch.cuda.current_device()
        )
        attention_mask = None
        # Always skip MTP during dummy forwards.  When num_speculative_tokens > 0
        # the serial MTP path handles MTP separately (with its own dummy forward).
        # When num_speculative_tokens == 0 MTP is not needed at all.  In both
        # cases, running MTP here would issue MoE all-to-all collectives that the
        # real EP ranks do not execute, causing a hang.
        is_spec_decode = (
            self.inference_context.is_dynamic_batching() and self.config.mtp_num_layers is not None
        )
        return self.model(tokens, position_ids, attention_mask, is_spec_decode=is_spec_decode)

    def _get_batch_size_and_seq_len(
        self, tokens: torch.Tensor, recv_buffer_seq_len: Optional[int] = None
    ):
        """
        Returns the batch size and sequence length based on the tokens tensor and
        recv_buffer_seq_len.

        Args:
            tokens (torch.Tensor): The input tensor of shape (batch_size, seq_len).
            recv_buffer_seq_len (int, optional): An optional recv buffer sequence length.

        Returns:
            tuple: A tuple (batch_size, seq_len), where batch_size is the first dimension of
                tokens and seq_len is either the second dimension or recv_buffer_seq_len.
        """
        batch_size = tokens.shape[0]
        seq_len = recv_buffer_seq_len if recv_buffer_seq_len is not None else tokens.shape[1]
        return batch_size, seq_len

    def _allocate_recv_buffer(self, batch_size, seq_len):
        """Receive happens between the layers with size [seq_len, batch_size, hidden_size]."""
        if self.sequence_parallel and self.inference_context.is_dynamic_batching():
            # For dynamic inference we need to explicitly adjust the recv buffer size here for
            # sequence parallelism. Static batching does not support sequence parallelism
            # except for the MoE layers which is handled separately.
            seq_len = seq_len // self.tp_size
        recv_size = (seq_len, batch_size, self.config.hidden_size)
        return torch.empty(
            recv_size, dtype=self.pipeline_communication_dtype, device=torch.cuda.current_device()
        )

    def forward_pass_without_pipeline_parallel(
        self, inference_input: Dict[str, Any]
    ) -> torch.Tensor:
        """Utility to carry out simple forward pass for TP or no model parallel models

        Runs a very simple forward pass for model. Used  in the case of models without any
        parallelism or only tensor parallelism.

        Args:
            inference_input (Dict[str, Any]): A dict containg the inputs for the gpt model
                [tokens, position ids, attention mask]

        Returns:
            torch.Tensor: The output logits of shape [batch_size, seq_len, padded_vocab_size]
        """
        tokens = inference_input["tokens"]
        logits = self._forward(inference_input)
        self.inference_context.increment_sequence_len_offset(tokens.size(1))

        return logits

    def forward_pass_with_pipeline_parallel(
        self, inference_input: Dict[str, Any], recv_buffer_seq_len: Optional[int] = None
    ) -> torch.Tensor:
        """Utility to carry out forward pass for PP models

        TODO: Add support for asynchronous microbatches

        Args:
            inference_input (Dict[str, Any]): A dict containing the inputs for the gpt model
                [tokens, position ids, attention mask]
            recv_buffer_seq_len (int): An optional sequence length for the pipeline parallel
                recv buffer.

        Returns:
            torch.Tensor: The output logits of shape [batch_size, seq_len, padded_vocab_size]
        """
        tokens = inference_input["tokens"]
        position_ids = inference_input["position_ids"]
        attention_mask = inference_input["attention_mask"]

        batch_size, seq_len = self._get_batch_size_and_seq_len(tokens, recv_buffer_seq_len)
        recv_buffer = None
        if not is_pipeline_first_stage(self.pp_group):
            recv_buffer = self._allocate_recv_buffer(batch_size, seq_len)
            recv_from_prev_pipeline_rank_(recv_buffer, self.pp_group)

        set_input_tensor = get_attr_wrapped_model(self.model, "set_input_tensor")
        set_input_tensor(recv_buffer)
        output_tensor = self._forward(inference_input)

        if not is_pipeline_last_stage(self.pp_group):
            send_to_next_pipeline_rank(
                output_tensor.type(dtype=self.pipeline_communication_dtype), self.pp_group
            )

        self.inference_context.increment_sequence_len_offset(seq_len)

        logits = None
        if is_pipeline_last_stage(self.pp_group):
            logits = output_tensor

            # Explicitly cast logits to expected dtype
            logits = logits.to(self.config.params_dtype)

        return logits

    @torch.inference_mode()
    def run_one_forward_step(
        self, inference_input: Dict[str, Any], recv_buffer_seq_len: Optional[int] = None
    ) -> torch.Tensor:
        """The forward pass of the model for inference

        Appropriate utility is called for the forward pass depending on the type of model
        parallelism used

        Args:
            inference_input (Dict[str, Any]): A dict containing the inputs for the gpt model
                [tokens, position ids, attention mask]
            recv_buffer_seq_len (int): An optional sequence length for the pipeline parallel
                recv buffer.

        Returns:
            torch.Tensor: The output logits of shape [batch_size, seq_len, padded_vocab_size].
            The logits are returned only in the last pipeline stage for PP models.
        """
        # Check if we are in a PP model
        if not (is_pipeline_first_stage(self.pp_group) and is_pipeline_last_stage(self.pp_group)):
            tokens = inference_input["tokens"]
            current_batch_size, seq_len = self._get_batch_size_and_seq_len(
                tokens, recv_buffer_seq_len
            )
            return self.forward_pass_with_pipeline_parallel(inference_input, recv_buffer_seq_len)
        else:
            return self.forward_pass_without_pipeline_parallel(inference_input)


================================================
FILE: megatron/core/inference/model_inference_wrappers/gpt/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from typing import Any, Dict, Optional, Tuple

import torch

from megatron.core.inference.contexts import BaseInferenceContext
from megatron.core.inference.model_inference_wrappers.abstract_model_inference_wrapper import (
    AbstractModelInferenceWrapper,
)
from megatron.core.inference.utils import get_attention_mask
from megatron.core.models.gpt import GPTModel
from megatron.core.transformer.enums import AttnBackend
from megatron.core.utils import deprecate_args, get_model_config

DEPRECATED_ARGS = ["inference_wrapper_config", "pg_collection"]


# pylint: disable=line-too-long
class GPTInferenceWrapper(AbstractModelInferenceWrapper):
    """Inference wrapper for GPT model.

    The wrapper prepares the model for inference, provides the required input data, and runs the forward pass

    Args:
        model (GPTModel): The GPT model (MCore or legacy)
        inference_context (BaseInferenceContext): Manages KV cache, and tracks
            sequence/token/batch offsets.
    """

    @deprecate_args(*DEPRECATED_ARGS)
    def __init__(self, model: GPTModel, inference_context: Optional[BaseInferenceContext] = None):
        super().__init__(model, inference_context)

    def prep_inference_input(self, prompts_tokens: torch.Tensor) -> Dict[str, Any]:
        """Prepares the inference input data.

        Args:
            prompts_tokens (torch.Tensor): A tensor of shape [batch_size, max_seq_len]

        Returns:
            A dict with all the inference input needed for the batch.
        """
        assert (
            not self.inference_context.is_decode_only()
        ), "`prep_inference_input` should only be called in prefill mode"

        attention_mask, position_ids = self._build_attention_mask_and_position_ids(prompts_tokens)
        return {
            "tokens": prompts_tokens,
            "attention_mask": attention_mask,
            "position_ids": position_ids,
        }

    def _build_attention_mask_and_position_ids(
        self, prompts_tokens: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """Builds the full attention mask and position ids for the input tokens

        Args:
            prompts_tokens (torch.Tensor): A tensor of shape [batch_size, max_seq_len]

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: The attention mask of shape [1, 1, max_seq_len, max_seq_len] and position ids of shape [batch_size, max_seq_len]
        """
        seq_length = prompts_tokens.size(1)
        config = get_model_config(self.model)

        attention_backend = config.attention_backend

        if attention_backend == AttnBackend.local:
            attention_mask = get_attention_mask(seq_length)
        elif (
            attention_backend == AttnBackend.flash
            or attention_backend == AttnBackend.fused
            or attention_backend == AttnBackend.unfused
            or attention_backend == AttnBackend.auto
        ):
            # TE creates the attention mask internally
            attention_mask = None
        else:
            raise ValueError(f"Unknown attention backend {attention_backend}")

        position_ids = (
            torch.arange(seq_length, dtype=torch.long, device=prompts_tokens.device)
            .unsqueeze(0)
            .expand_as(prompts_tokens)
        )

        return attention_mask, position_ids

    def get_batch_for_context_window(
        self,
        inference_input: Dict[str, Any],
        context_start_position: int,
        context_end_position: int,
    ) -> Dict[str, Any]:
        """Returns the inference data given context window

        This function gets called iteratively in a loop . Given the start and end context positions , it extracts the appropriate data.

        Args:
            inference_input (Dict[str, Any]): The inference input for the batch.
            context_start_position (int): Start of the context window. During the first inference step it is mostly 0
            context_end_position (int): End of the context window. During the last inference step it will mostly be the max generated sequence length.

        Returns:
            Dict[str, Any]: A dict of inputs that will be used by your model in the forward step
        """
        tokens = inference_input["tokens"]
        position_ids = inference_input["position_ids"]
        attention_mask = inference_input["attention_mask"]
        tokens2use = tokens[:, context_start_position:context_end_position]
        positions2use = position_ids[:, context_start_position:context_end_position]
        if attention_mask is not None:
            attention_mask2use = attention_mask[
                ..., context_start_position:context_end_position, :context_end_position
            ]
        else:
            attention_mask2use = None
        return {
            "tokens": tokens2use,
            "position_ids": positions2use,
            "attention_mask": attention_mask2use,
        }


================================================
FILE: megatron/core/inference/model_inference_wrappers/multimodal/vlm_inference_wrapper.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import warnings
from typing import Any, Dict, Optional

import torch

from megatron.core.inference.communication_utils import (
    is_pipeline_first_stage,
    is_pipeline_last_stage,
)
from megatron.core.inference.contexts import StaticInferenceContext
from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import (
    GPTInferenceWrapper,
)


# pylint: disable=line-too-long
class VLMInferenceWrapper(GPTInferenceWrapper):
    """Inference wrapper for VLMs"""

    def prep_model_for_inference(self, prompts_tokens: Optional[torch.Tensor] = None):
        """A utility function for preparing model for inference

        The function gets called once before the auto regressive inference loop.
        It puts the model in eval mode.

        Args:
            prompts_tokens (torch.Tensor): Deprecated, will be removed in `megatron-core` 0.13
        """
        if prompts_tokens is not None:
            warnings.warn(
                "Passing `prompts_tokens` is deprecated and this argument will be ignored."
                "This parameter will be removed in `megatron-core` 0.13."
            )

        super().prep_model_for_inference()

        # For TP only model both is_pp_first_stage and _is_pp_last_stage returns True
        # set ignore_virtual=True since vpp is not used in inference
        self.model_is_pipeline_parallel = not (
            is_pipeline_first_stage(self.pp_group) and is_pipeline_last_stage(self.pp_group)
        )

        self._recv_only_vision_embeds = False
        pp_rank = self.pp_group.rank()
        # Checks if the previous stage only has a vision encoder, and that the current stage
        # has part of the LM decoder. In this case, the current stage should only receive
        # vision embeddings.
        if pp_rank > 0:
            self._recv_only_vision_embeds = False  # TODO: Implement new logic for vision embeddings

        # Checks if the current stage only has a vision encoder
        self._encoder_only = False  # TODO: Implement new logic for encoder-only stages

    def prep_inference_input(
        self,
        prompts_tokens: torch.Tensor,
        num_img_embeddings_per_tile: int,
        images: torch.Tensor,
        num_tiles: torch.Tensor,
        decoder_seq_length: int,
    ):
        """Prepares the inference input data.

        Args:
            prompts_tokens (torch.Tensor): A tensor of shape [batch_size, max_seq_len]
            num_img_embeddings_per_tile (int): The number of image embeddings per tile
            images (torch.Tensor): The image embeddings
            num_tiles (torch.Tensor): The number of tiles for each input image
            decoder_seq_length (int): The decoder sequence length
        """
        inference_input = super().prep_inference_input(prompts_tokens)

        total_num_tiles = torch.sum(num_tiles).item()
        num_img_embeddings = num_img_embeddings_per_tile * total_num_tiles

        batch_size, max_sequence_length = prompts_tokens.shape
        self.inference_context = StaticInferenceContext(
            batch_size, max_sequence_length + num_img_embeddings
        )

        inference_input["images"] = images
        inference_input["num_tiles"] = num_tiles
        inference_input["num_img_embeddings"] = num_img_embeddings
        inference_input["decoder_seq_length"] = decoder_seq_length

        return inference_input

    def get_batch_for_context_window(
        self,
        inference_input: Dict[str, Any],
        context_start_position: int,
        context_end_position: int,
    ) -> Dict[str, Any]:
        """Returns the inference data given context window

        This function gets called iteratively in a loop . Given the start and end context positions , it extracts the appropriate data.

        Args:
            inference_input (Dict[str, Any]): The inference input for the batch.
            context_start_position (int): Start of the context window. During the first inference step it is mostly 0
            context_end_position (int): End of the context window. During the last inference step it will mostly be the max generated sequence length.

        Returns:
            Dict[str, Any]: A dict of inputs that will be used by your model in the forward step
        """
        tokens = inference_input["tokens"]
        position_ids = inference_input["position_ids"]
        images = inference_input["images"]
        num_tiles = inference_input["num_tiles"]
        num_img_embeddings = inference_input["num_img_embeddings"]
        decoder_seq_length = inference_input["decoder_seq_length"]

        tokens2use = tokens[:, context_start_position:context_end_position]
        positions2use = position_ids[:, context_start_position:context_end_position]

        return {
            "tokens": tokens2use,
            "position_ids": positions2use,
            "images": images,
            "num_tiles": num_tiles,
            "num_img_embeddings": num_img_embeddings,
            "decoder_seq_length": decoder_seq_length,
        }

    def _forward(self, inference_input: Dict[str, Any]):
        """Runs a forward pass of the model.

        Args:
            inference_input(Dict[str, Any]): The input data.

        Returns:
            The model output logits.
        """
        images = inference_input["images"]
        tokens = inference_input["tokens"]
        position_ids = inference_input["position_ids"]
        num_image_tiles = inference_input["num_tiles"]

        output = self.model(
            images,
            tokens,
            position_ids=position_ids,
            attention_mask=None,
            inference_context=self.inference_context,
            num_image_tiles=num_image_tiles,
            runtime_gather_output=True,
        )
        if isinstance(output, tuple):
            logits, _ = output
        else:
            logits = output
        return logits

    def run_one_forward_step(self, inference_input: Dict[str, Any]) -> torch.Tensor:
        """The forward pass of the model for inference

        Args:
            inference_input (Dict[str, Any]): A dict containing the inputs for the VLM model

        Returns:
            torch.Tensor: The output logits of shape [batch_size, seq_len, padded_vocab_size].
            The logits are returned only in the last pipeline stage for PP models.
        """
        tokens = inference_input["tokens"]
        num_image_tokens = (tokens == self.model.module.image_token_index).sum().item()
        num_img_embeddings = inference_input["num_img_embeddings"]
        decoder_seq_length = inference_input["decoder_seq_length"]
        num_tokens = tokens.size(1)
        recv_buffer_seq_len = None
        if num_image_tokens > 0:
            # When there are image tokens and this stage only receives vision embeddings,
            # adjust the recv buffer seq length to match the image embeddings sequence length.
            # If there are image tokens and this stage receives full embeddings, make sure we
            # compensate for expansion of image tokens.
            # Note that this will set a recv_buffer_seq_len for the encoder stage,
            # this length is irrelevant since that recv buffer is never allocated.
            if self._recv_only_vision_embeds:
                recv_buffer_seq_len = num_img_embeddings
            else:
                recv_buffer_seq_len = min(
                    num_img_embeddings + num_tokens - num_image_tokens, decoder_seq_length
                )
        elif self._recv_only_vision_embeds:
            # If this stage only receives vision embeddings and there are no image tokens
            # we won't run the encoder and therefore shouldn't try to recv.
            recv_buffer_seq_len = 0

        # If the pipeline stage only has a vision encoder, then it only needs to
        # run when there are image tokens
        if not (self._encoder_only and num_image_tokens == 0):
            output = super().run_one_forward_step(
                inference_input, recv_buffer_seq_len=recv_buffer_seq_len
            )
        else:
            output = None
        logits = output

        # On the first inference iteration, we compute image tokens.
        # On every PP stage(although inference params should only matter for decoder),
        # update the sequence length offset by the number of image tokens.
        if num_tokens > 1 and num_image_tokens > 0:
            if "image_tokens_count" not in self.inference_context.key_value_memory_dict:
                self.inference_context.key_value_memory_dict["image_tokens_count"] = (
                    num_img_embeddings
                )

            if num_img_embeddings + num_tokens - num_image_tokens > decoder_seq_length:
                self.inference_context.sequence_len_offset += decoder_seq_length - num_tokens
            else:
                self.inference_context.sequence_len_offset += (
                    self.inference_context.key_value_memory_dict["image_tokens_count"]
                    - num_image_tokens
                )

        return logits


================================================
FILE: megatron/core/inference/model_inference_wrappers/t5/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/inference/model_inference_wrappers/t5/t5_inference_wrapper.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from collections import deque
from typing import Any, Dict, List, Optional

import numpy
import torch

from megatron.core import tensor_parallel
from megatron.core.datasets.t5_dataset import T5MaskedWordPieceDataset
from megatron.core.inference.contexts import BaseInferenceContext
from megatron.core.inference.model_inference_wrappers.abstract_model_inference_wrapper import (
    AbstractModelInferenceWrapper,
)
from megatron.core.models.T5 import T5Model
from megatron.core.utils import get_attr_wrapped_model


# pylint: disable=line-too-long
class T5InferenceWrapper(AbstractModelInferenceWrapper):
    """Inference wrapper for T5 model.

    The wrapper prepares the model for inference, provides the required input
    data, and runs the forward pass

    Args:
        model (T5Model): The T5 model (MCore or legacy)
        inference_context (BaseInferenceContext): Manages KV cache, and tracks
            sequence/token/batch offsets.
        use_local (bool): Whether  the T5 model's transformer impl
            is local (vs transformer_engine)
    """

    def __init__(
        self,
        model: T5Model,
        inference_context: Optional[BaseInferenceContext] = None,
        use_local: bool = False,
    ):
        super().__init__(model, inference_context)
        self.use_local = use_local

    def prep_inference_input(
        self,
        prompts_tokens: torch.Tensor,
        encoder_prompts: Optional[List[str]] = None,
        tokenizer: Any = None,
    ) -> Dict[str, Any]:
        """Prepares the inference input data.

        Args:
            prompts_tokens (torch.Tensor): A tensor of shape [batch_size, max_seq_len]
            encoder_prompts (dict): List of string of encoder input prompts
            tokenizer (_type_): Tokenizer used for tokenizing and detokenizing text

        Returns:
            A dict with all the inference input needed for the batch.
        """

        # get max_sequence_length
        max_sequence_length = get_attr_wrapped_model(self.model, "max_sequence_length")

        encoder_prompts_tokens_list = [
            self.tokenize_encoder_prompt(encoder_prompt, tokenizer)
            for encoder_prompt in encoder_prompts
        ]
        batch_encoder_prompts_tokens = self.pad_encoder_prompts_tokens(
            encoder_prompts_tokens_list, max_sequence_length, tokenizer
        )

        # create batch mask for encoder_prompt (self.batch_input_tokens) and
        # decoder_input (prompts_tokens), similar to megatron/core/datasets/t5_dataset.py
        decoder_prompts_tokens = prompts_tokens
        encoder_prompts_tokens = batch_encoder_prompts_tokens
        decoder_prompts_tokens_numpy = decoder_prompts_tokens.cpu().numpy()
        encoder_prompts_tokens_numpy = encoder_prompts_tokens.cpu().numpy()
        batch_mask_encoder = []
        batch_mask_decoder = []
        for i in range(len(prompts_tokens)):
            mask_encoder = encoder_prompts_tokens_numpy[i] == tokenizer.pad
            mask_decoder = decoder_prompts_tokens_numpy[i] == tokenizer.pad
            batch_mask_encoder.append(mask_encoder)
            batch_mask_decoder.append(mask_decoder)
        batch_mask_encoder = torch.tensor(numpy.array(batch_mask_encoder)).cuda()
        batch_mask_decoder = torch.tensor(numpy.array(batch_mask_decoder)).cuda()

        return {
            "encoder_tokens": encoder_prompts_tokens,
            "decoder_tokens": decoder_prompts_tokens,
            "encoder_mask": batch_mask_encoder,
            "decoder_mask": batch_mask_decoder,
        }

    def tokenize_encoder_prompt(self, encoder_prompt: str, tokenizer) -> torch.Tensor:
        """Utility to tokenize the encoder_prompt

        Args:
            encoder_prompt (str): The encoder_prompt
            tokenizer (_type_): Tokenizer used for tokenizing and detokenizing string

        Returns:
            torch.Tensor: Returns the tokenized prompt
        """

        # if there is the word "<mask>" in prompt, replacing it with special_additional_token,
        # similar to processing step in megatron/core/datasets/t5_dataset.py
        divided_encoder_prompt_list = encoder_prompt.split("<mask>")
        masks_count = len(divided_encoder_prompt_list) - 1
        sentinels = deque(tokenizer.additional_special_tokens_ids)

        encoder_prompt_tokens = []
        for divided_encoder_prompt in divided_encoder_prompt_list:
            divided_encoder_prompt_tokens = tokenizer.tokenize(divided_encoder_prompt)
            encoder_prompt_tokens.extend(divided_encoder_prompt_tokens)
            if masks_count > 0:
                sentinel = sentinels.popleft()
                encoder_prompt_tokens.extend([sentinel])
                masks_count -= 1

        return encoder_prompt_tokens

    def pad_encoder_prompts_tokens(
        self, encoder_prompts_tokens_list: List[List[int]], max_sequence_length: int, tokenizer
    ) -> torch.Tensor:
        """Method to pad input prompts

        Given a list of prompts, pad them all to uniform length

        Args:
            encoder_prompts_tokens_list (List[List[int]]): A list containing the
                encoder_input_tokens
            max_sequence_length (int): Maximum of the length of the encoder inputs tokens
            tokenizer (_type_): Tokenizer used for tokenizing and detokenizing text

        Returns:
            torch.Tensor: A torch tensor of shape [bs, max_sequence_length]
        """

        for encoder_prompt_tokens in encoder_prompts_tokens_list:
            padding_size = max_sequence_length - len(encoder_prompt_tokens)
            encoder_prompt_tokens.extend([tokenizer.pad] * padding_size)

        return torch.tensor(encoder_prompts_tokens_list).cuda()

    def get_batch_for_context_window(
        self,
        inference_input: Dict[str, Any],
        context_start_position: int,
        context_end_position: int,
    ) -> Dict[str, Any]:
        """Returns the inference data given context window

        This function gets called iteratively in a loop . Given the start and end context
        positions , it extracts the appropriate data.

        Args:
            inference_input (Dict[str, Any]): The inference input for the batch.
            context_start_position (int): Start of the context window. During
                the first inference step it is mostly 0
            context_end_position (int): End of the context window. During the
                last inference step it will mostly be the max generated sequence length.

        Returns:
            Dict: A dict of inputs that will be used by your model in the forward step
        """

        # T5 inference not yet support kv_cache
        encoder_tokens2use = inference_input["encoder_tokens"]
        decoder_tokens2use = inference_input["decoder_tokens"][:, :context_end_position]
        encoder_mask2use = inference_input["encoder_mask"]
        decoder_mask2use = inference_input["decoder_mask"][:, :context_end_position]

        # Configure attention mask based on different conditions
        # (e.g., transformer-impl, TE versions, TE backends)
        [encoder_mask2use, decoder_mask2use, encoder_decoder_mask2use] = (
            T5MaskedWordPieceDataset.config_attention_mask(
                encoder_tokens2use,
                decoder_tokens2use,
                encoder_mask2use,
                decoder_mask2use,
                self.use_local,
            )
        )

        return {
            "encoder_tokens": encoder_tokens2use,
            "decoder_tokens": decoder_tokens2use,
            "encoder_mask": encoder_mask2use,
            "decoder_mask": decoder_mask2use,
            "encoder_decoder_mask": encoder_decoder_mask2use,
        }

    def forward_pass_without_pipeline_parallel(
        self, inference_input: Dict[str, Any]
    ) -> torch.Tensor:
        """Utility to carry out simple forward pass for TP or no model parallel models

        Runs a very simple forward pass for model. Used  in the case of models without
        any parallelism or only tensor parallelism.

        Args:
            inference_input (Dict[str, Any]): A dict containg the inputs for the gpt
                model [tokens, position ids, attention mask]

        Returns:
            torch.Tensor: The output logits of shape [batch_size, seq_len, padded_vocab_size]
        """
        encoder_tokens = inference_input["encoder_tokens"]
        decoder_tokens = inference_input["decoder_tokens"]
        encoder_mask = inference_input["encoder_mask"]
        decoder_mask = inference_input["decoder_mask"]
        encoder_decoder_mask = inference_input["encoder_decoder_mask"]
        tokens = decoder_tokens

        # T5 inference not yet support kv_cache
        logits = self.model(
            encoder_tokens,
            decoder_tokens,
            encoder_mask,
            decoder_mask,
            encoder_decoder_mask,
            inference_context=None,
        )
        logits = tensor_parallel.gather_from_tensor_model_parallel_region(logits, self.tp_group)

        return logits


================================================
FILE: megatron/core/inference/moe/__init__.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import enum

import torch

from .fused_moe import ActivationType, mcore_fused_moe


class InferenceGroupedGemmBackend(enum.Enum):
    """Resolved backend for grouped GEMM operations during inference."""

    FLASHINFER = "flashinfer"
    TORCH = "torch"
    TE = "te"


def resolve_inference_grouped_gemm_backend(
    backend: str, is_cuda_graphed: bool, is_mxfp8: bool = False
) -> InferenceGroupedGemmBackend:
    """Resolve the grouped GEMM backend to use for the current iteration.

    Prerequisites are validated at init time in MoELayer; this function
    simply maps (backend, is_cuda_graphed) to the concrete backend enum.

    Args:
        backend: One of 'auto', 'torch', 'te'.
        is_cuda_graphed: Whether this is a CUDA-graphed iteration.
        is_mxfp8: Whether the model is using MXFP8 quantization (affects auto backend choice).
    Returns:
        An InferenceGroupedGemmBackend enum value.
    """
    if backend == 'auto':
        if is_cuda_graphed:
            if is_mxfp8:
                assert hasattr(torch.nn.functional, 'scaled_grouped_mm'), (
                    "Auto backend selection for MXFP8 requires "
                    "torch.nn.functional.scaled_grouped_mm. "
                    "Please install PyTorch 2.10+."
                )
                return InferenceGroupedGemmBackend.TORCH
            else:
                return InferenceGroupedGemmBackend.FLASHINFER
        else:
            if hasattr(torch.nn.functional, 'grouped_mm'):
                return InferenceGroupedGemmBackend.TORCH
            else:
                return InferenceGroupedGemmBackend.TE
    elif backend == 'torch':
        return InferenceGroupedGemmBackend.TORCH
    elif backend == 'te':
        return InferenceGroupedGemmBackend.TE
    else:
        raise ValueError(
            f"Unknown inference_grouped_gemm_backend: '{backend}'. "
            "Must be 'auto', 'torch', or 'te'."
        )


================================================
FILE: megatron/core/inference/moe/activations.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
"""Padding-aware activation kernels for fused MoE.

These kernels skip padding rows (where permutation_map == -1) to avoid
wasted computation on aligned-but-empty expert slots.
"""

from unittest.mock import MagicMock

import torch

from megatron.core.utils import null_decorator

try:
    import triton
    import triton.language as tl

    HAVE_TRITON = True
except ImportError:
    HAVE_TRITON = False

if not HAVE_TRITON:
    triton = MagicMock()
    triton.jit = null_decorator
    tl = MagicMock()


def _ceil_div(a, b):
    return (a + b - 1) // b


@triton.jit
def _squared_relu_kernel(input_ptr, output_ptr, src_idx_ptr, M, N, BLOCK_N: tl.constexpr):
    """Squared ReLU that skips padding rows (permutation_map == -1)."""
    row = tl.program_id(0)
    if tl.load(src_idx_ptr + row) < 0:
        return
    for n in tl.range(0, N, BLOCK_N):
        o = n + tl.arange(0, BLOCK_N)
        m = o < N
        x = tl.load(input_ptr + row * N + o, mask=m).to(tl.float32)
        r = tl.maximum(x, 0.0)
        tl.store(output_ptr + row * N + o, (r * r).to(tl.bfloat16), mask=m)


def padded_squared_relu(x: torch.Tensor, permutation_map: torch.Tensor) -> torch.Tensor:
    """Squared ReLU activation that skips padding rows."""
    M, N = x.shape
    out = torch.zeros(M, N, dtype=x.dtype, device=x.device)
    BLOCK_N = min(triton.next_power_of_2(N), 1024)
    _squared_relu_kernel[(M,)](x, out, permutation_map, M, N, BLOCK_N=BLOCK_N)
    return out


@triton.jit
def _squared_relu_quantize_kernel(
    input_ptr,
    out_fp8_ptr,
    out_scale_ptr,
    src_idx_ptr,
    K,
    n_col_blocks,
    skip_padding: tl.constexpr,
    REAL_GROUPS: tl.constexpr,
    BLOCK_K: tl.constexpr,
    BLOCK_GROUPS: tl.constexpr,
):
    """Fused squared ReLU + MXFP8 quantize + swizzle in one kernel.

    Grid: (M,) — one program per row.
    Reads BF16 FC1 output, applies squared ReLU, quantizes to FP8,
    writes FP8 data + swizzled scales in place.
    """
    row = tl.program_id(0)
    if skip_padding:
        if tl.load(src_idx_ptr + row) < 0:
            return

    offs = tl.arange(0, BLOCK_K)
    mask = offs < K

    # Load and apply squared ReLU
    x = tl.load(input_ptr + row * K + offs, mask=mask, other=0.0).to(tl.float32)
    relu = tl.maximum(x, 0.0)
    activated = relu * relu

    # Per-group-of-32 quantization
    x_grouped = tl.reshape(activated, [BLOCK_GROUPS, 32])
    abs_grouped = tl.abs(x_grouped)
    max_vals = tl.max(abs_grouped, axis=1)

    dequant_scale = max_vals / 448.0
    dequant_exp = (dequant_scale.to(tl.uint32, bitcast=True) + 0x007FFFFF) & 0x7F800000
    dequant_rounded = dequant_exp.to(tl.float32, bitcast=True)
    quant_scale = tl.where(dequant_rounded == 0, 0.0, 1.0 / dequant_rounded)

    quantized = x_grouped * quant_scale[:, None]
    quantized_flat = tl.reshape(quantized, [BLOCK_K])
    out_fp8 = quantized_flat.to(tl.float8e4nv)

    # Store FP8 data
    tl.store(out_fp8_ptr + row * K + offs, out_fp8, mask=mask)

    # Store swizzled scales
    scale_exp = (dequant_exp >> 23).to(tl.uint8)
    col_offs = tl.arange(0, BLOCK_GROUPS)
    col_mask = col_offs < REAL_GROUPS

    macro_row_block = row // 128
    macro_col_block = col_offs // 4
    local_row = row % 128
    local_col = col_offs % 4
    group = local_row // 32
    sub_row = local_row % 32
    tile_idx = macro_row_block * n_col_blocks + macro_col_block
    swizzled_offs = tile_idx * 512 + sub_row * 16 + group * 4 + local_col

    tl.store(out_scale_ptr + swizzled_offs, scale_exp, mask=col_mask)


def squared_relu_and_quantize_mxfp8(
    x: torch.Tensor, permutation_map: torch.Tensor, skip_padding: bool = True
):
    """Fused squared ReLU + MXFP8 quantize + swizzle.

    Reads BF16 FC1 output, applies squared ReLU, quantizes to FP8 with
    swizzled scales. Single kernel replaces padded_squared_relu + mxfp8_quantize.

    Args:
        x: [M, K] BF16 FC1 output.
        permutation_map: [M] int32, original token index or -1 for padding.
        skip_padding: if True, skip rows where permutation_map == -1.

    Returns:
        MXFP8Tensor with .data [M, K] float8_e4m3fn and .scale (swizzled e8m0).
    """
    from megatron.core.inference.quantization.mxfp8_tensor import MXFP8Tensor

    M, K = x.shape
    assert K % 32 == 0

    scale_cols = K // 32
    n_row_blocks = _ceil_div(M, 128)
    n_col_blocks = _ceil_div(scale_cols, 4)
    total_scale_bytes = n_row_blocks * n_col_blocks * 512

    out_fp8 = torch.empty(M, K, dtype=torch.float8_e4m3fn, device=x.device)
    out_scale = torch.zeros(total_scale_bytes, dtype=torch.uint8, device=x.device)

    BLOCK_K = triton.next_power_of_2(K)
    BLOCK_GROUPS = BLOCK_K // 32

    _squared_relu_quantize_kernel[(M,)](
        x,
        out_fp8,
        out_scale,
        permutation_map,
        K,
        n_col_blocks,
        skip_padding,
        REAL_GROUPS=scale_cols,
        BLOCK_K=BLOCK_K,
        BLOCK_GROUPS=BLOCK_GROUPS,
    )

    return MXFP8Tensor(data=out_fp8, scale=out_scale.view(torch.float8_e8m0fnu), backend="triton")


================================================
FILE: megatron/core/inference/moe/fused_moe.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
"""Fused MoE: permute -> FC1 -> activation -> FC2 -> unpermute.

Supports BF16 weights with torch.nn.functional.grouped_mm.
All permutation logic is handled internally — callers invoke a single function.
"""

from enum import Enum
from typing import Callable, Optional

import torch

from megatron.core.inference.moe.activations import (
    padded_squared_relu,
    squared_relu_and_quantize_mxfp8,
)
from megatron.core.inference.moe.pad import pad_to_alignment, unpad_from_alignment
from megatron.core.inference.moe.permute import (
    permute_and_quantize_mxfp8,
    permute_tokens,
    unpermute_tokens,
)
from megatron.core.inference.quantization.mxfp8_tensor import MXFP8Tensor

try:
    from torch.nn.functional import grouped_mm

    HAVE_GROUPED_MM = True
except ImportError:
    HAVE_GROUPED_MM = False

try:
    from torch.nn.functional import ScalingType, SwizzleType, scaled_grouped_mm

    HAVE_SCALED_GMM = True
except ImportError:
    HAVE_SCALED_GMM = False


class ActivationType(Enum):
    """Activation functions supported by mcore_fused_moe."""

    SQUARED_RELU = "squared_relu"


def _bf16_grouped_mm(
    x_bf16: torch.Tensor, weight: torch.Tensor, offs: torch.Tensor
) -> torch.Tensor:
    """BF16 grouped GEMM using torch.nn.functional.grouped_mm."""
    assert x_bf16.dtype == torch.bfloat16, f"Expected bf16 input, got {x_bf16.dtype}"
    return grouped_mm(x_bf16, weight.transpose(1, 2), offs=offs)


def _mxfp8_grouped_mm(act: MXFP8Tensor, weight: MXFP8Tensor, offs: torch.Tensor) -> torch.Tensor:
    """MXFP8 scaled_grouped_mm with pre-quantized activations and weights."""
    return scaled_grouped_mm(
        act.data,
        weight.data.transpose(1, 2),
        act.scale_2d(),
        ScalingType.BlockWise1x32,
        weight.scale,
        ScalingType.BlockWise1x32,
        swizzle_a=SwizzleType.SWIZZLE_32_4_4,
        swizzle_b=SwizzleType.SWIZZLE_32_4_4,
        offs=offs,
        output_dtype=torch.bfloat16,
    )


def _get_activation_func(activation_type: ActivationType, fused_quant: bool = False) -> Callable:
    """Resolve ActivationType enum to a concrete kernel.

    If fused_quant=True, returns the fused activation + MXFP8 quantize kernel.
    """
    if activation_type == ActivationType.SQUARED_RELU:
        return squared_relu_and_quantize_mxfp8 if fused_quant else padded_squared_relu
    else:
        raise ValueError(f"Unsupported activation type: {activation_type}")


def mcore_fused_moe(
    hidden_states: torch.Tensor,
    probs: torch.Tensor,
    fc1_weight,
    fc2_weight,
    activation_type: ActivationType,
    num_local_experts: int,
    local_expert_start: int,
    routing_map: Optional[torch.Tensor] = None,
    tokens_per_expert: Optional[torch.Tensor] = None,
    skip_permute: bool = False,
    disable_fused_quant_kernels: bool = False,
) -> torch.Tensor:
    """Fused MoE: [permute ->] pad -> FC1 -> activation -> FC2 -> unpad [-> unpermute].

    Two modes:
    - skip_permute=False (default): tokens are unpermuted. Requires routing_map.
      Performs full permute -> compute -> unpermute.
    - skip_permute=True: tokens are already permuted by the dispatcher. Requires
      tokens_per_expert. Pads to alignment, computes, then unpads. Probs are
      applied during unpad.

    Unless disable_fused_quant_kernels=True, when weights are MXFP8, uses fused
    kernels that combine permute/activation with MXFP8 quantization into single
    kernel launches.

    Args:
        hidden_states: [num_tokens, hidden_size] BF16 input.
        probs: routing probabilities. Shape is [num_tokens, topk] when
            skip_permute=False, or [num_tokens] (already gathered) when
            skip_permute=True.
        fc1_weight: stacked weight for FC1 (torch.Tensor for BF16, MXFP8Tensor for MXFP8).
        fc2_weight: stacked weight for FC2 (same type as fc1_weight).
        activation_type: ActivationType enum (SQUARED_RELU).
        num_local_experts: number of experts on this rank.
        local_expert_start: first global expert index on this rank.
        routing_map: [num_tokens, topk] int expert assignments. Required when skip_permute=False.
        tokens_per_expert: [num_local_experts] int32 token counts. Required when skip_permute=True.
        skip_permute: if True, skip permute/unpermute (tokens already in expert order).
        disable_fused_quant_kernels: if True, disable fused permute+quantize and
            activation+quantize kernels for MXFP8, using separate launches instead.
            Useful for debugging. Ignored when weights are BF16.

    Returns:
        [num_tokens, hidden_size] BF16 output.
    """
    assert (
        hidden_states.dtype == torch.bfloat16
    ), f"mcore_fused_moe requires bf16 input, got {hidden_states.dtype}"

    num_tokens = hidden_states.shape[0]
    use_mxfp8 = isinstance(fc1_weight, MXFP8Tensor)
    # Fused quant kernels only apply to MXFP8 path
    use_fused_quant = use_mxfp8 and not disable_fused_quant_kernels

    if use_mxfp8:
        assert (
            HAVE_SCALED_GMM
        ), "torch.nn.functional.scaled_grouped_mm not available. Install PyTorch 2.10+."
        mm_fn = _mxfp8_grouped_mm
        # scaled_grouped_mm requires each expert's token count aligned to 32,
        # but swizzled MXFP8 scales require alignment to 128. Use 128 to
        # satisfy both constraints.
        expert_alignment = 128
    else:
        assert (
            HAVE_GROUPED_MM
        ), "torch.nn.functional.grouped_mm not available. Install PyTorch 2.10+."
        mm_fn = _bf16_grouped_mm
        expert_alignment = 16

    activation_func = _get_activation_func(activation_type, fused_quant=use_fused_quant)

    # --- Pre-processing: permute or pad ---
    if skip_permute:
        assert tokens_per_expert is not None, "tokens_per_expert is required when skip_permute=True"
        tokens_per_expert = tokens_per_expert.cuda().int()
        assert routing_map is None, "routing_map must be None when skip_permute=True"
        hidden_states, permutation_map, offs = pad_to_alignment(
            hidden_states, tokens_per_expert, expert_alignment
        )
        permuted_probs = None

    else:
        assert routing_map is not None, "routing_map is required when skip_permute=False"
        if use_fused_quant:
            # Fused permute + MXFP8 quantize: single kernel produces MXFP8Tensor
            hidden_states, permuted_probs, permutation_map, offs = permute_and_quantize_mxfp8(
                hidden_states,
                probs,
                routing_map,
                local_expert_start,
                num_local_experts,
                alignment=expert_alignment,
            )
        else:
            hidden_states, permuted_probs, permutation_map, offs = permute_tokens(
                hidden_states,
                probs,
                routing_map,
                local_expert_start,
                num_local_experts,
                alignment=expert_alignment,
            )

    # --- FC1 -> activation -> FC2 ---
    # Quantize if MXFP8 path and hidden_states not already quantized (fused permute+quant
    # produces MXFP8Tensor directly; skip_permute path always needs separate quant).
    needs_quant = use_mxfp8 and not isinstance(hidden_states, MXFP8Tensor)
    if needs_quant:
        hidden_states = MXFP8Tensor.from_bf16(hidden_states, backend="triton")
    fc1_output = mm_fn(hidden_states, fc1_weight, offs)

    activation_out = activation_func(fc1_output, permutation_map)
    # Fused activation+quant returns MXFP8Tensor; otherwise quantize separately.
    if use_mxfp8 and not isinstance(activation_out, MXFP8Tensor):
        activation_out = MXFP8Tensor.from_bf16(activation_out, backend="triton")
    fc2_output = mm_fn(activation_out, fc2_weight, offs)
    # --- Post-processing: unpermute or unpad ---
    if skip_permute:
        probs_1d = probs.squeeze(-1) if probs.dim() > 1 else probs
        return unpad_from_alignment(fc2_output, permutation_map, num_tokens, probs=probs_1d)
    else:
        return unpermute_tokens(fc2_output, permuted_probs, permutation_map, num_tokens)


================================================
FILE: megatron/core/inference/moe/pad.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
"""Pad / unpad utilities for already-permuted expert tokens.

When the token dispatcher has already permuted tokens into expert-grouped
order, these functions insert/remove alignment padding so that each expert's
token block satisfies the alignment requirements of grouped_mm /
scaled_grouped_mm.
"""

from unittest.mock import MagicMock

import torch
from packaging import version

from megatron.core.utils import null_decorator

try:
    import triton
    import triton.language as tl

    if version.parse(triton.__version__) < version.parse("3.4.0") and not torch.cuda.is_available():
        HAVE_TRITON = False
    else:
        HAVE_TRITON = tl.constexpr(version.parse(triton.__version__) >= version.parse("2.0.0"))
except ImportError:
    HAVE_TRITON = False

if not HAVE_TRITON:
    triton = MagicMock()
    triton.jit = null_decorator
    tl = MagicMock()

from megatron.core.inference.moe.permute import compute_expert_offsets


@triton.jit
def _pad_tokens_kernel(
    src_ptr,
    dst_ptr,
    perm_map_ptr,
    tpe_ptr,  # tokens_per_expert [num_experts]
    hidden_dim,
    num_experts: tl.constexpr,
    alignment: tl.constexpr,
    BLOCK_H: tl.constexpr,
):
    """Copy one input row into the padded output buffer.

    Computes unpadded and padded cumulative offsets inline from
    tokens_per_expert, avoiding a separate cumsum kernel launch.
    """
    row = tl.program_id(0)

    # Walk tokens_per_expert to find which expert this row belongs to
    # and compute both unpadded and padded start offsets on the fly.
    unpadded_start = tl.zeros([], dtype=tl.int32)
    padded_start = tl.zeros([], dtype=tl.int32)
    expert_id = -1
    for e in tl.static_range(0, num_experts):
        count = tl.load(tpe_ptr + e).to(tl.int32)
        if expert_id < 0 and row < unpadded_start + count:
            expert_id = e
        if expert_id < 0:
            unpadded_start += count
            aligned = tl.where(
                count > 0,
                ((count + alignment - 1) // alignment) * alignment,
                tl.zeros([], dtype=tl.int32),
            )
            padded_start += aligned

    if expert_id < 0:
        return

    local_idx = row - unpadded_start
    dst_row = padded_start + local_idx

    # Write permutation_map: padded row → original unpadded row
    tl.store(perm_map_ptr + dst_row, row)

    # Copy hidden state
    for h in tl.range(0, hidden_dim, BLOCK_H):
        o = h + tl.arange(0, BLOCK_H)
        m = o < hidden_dim
        tl.store(
            dst_ptr + dst_row * hidden_dim + o,
            tl.load(src_ptr + row * hidden_dim + o, mask=m),
            mask=m,
        )


def pad_to_alignment(
    hidden_states: torch.Tensor, tokens_per_expert: torch.Tensor, alignment: int
) -> tuple:
    """Pad already-permuted tokens so each expert's block is aligned.

    Args:
        hidden_states: [total_tokens, hidden_size] already permuted by dispatcher.
        tokens_per_expert: [num_local_experts] int32 token counts.
        alignment: per-expert alignment.

    Returns:
        (padded_hidden, permutation_map, inclusive_offsets)
        - padded_hidden: [padded_total, hidden_size]
        - permutation_map: [padded_total] int32, original row index or -1 for padding.
        - inclusive_offsets: [num_local_experts] int32 cumulative aligned offsets for grouped_mm.
    """
    num_experts = tokens_per_expert.shape[0]
    total_tokens = hidden_states.shape[0]
    hidden_dim = hidden_states.shape[1]

    # We still need padded_inc for the return value (used as offs by grouped_mm)
    _, padded_inc = compute_expert_offsets(tokens_per_expert, alignment=alignment)
    padded_total = int(padded_inc[-1].item())

    padded_hidden = torch.zeros(
        padded_total, hidden_dim, dtype=hidden_states.dtype, device=hidden_states.device
    )
    permutation_map = torch.full(
        (padded_total,), -1, dtype=torch.int32, device=hidden_states.device
    )

    if total_tokens > 0:
        BLOCK_H = min(triton.next_power_of_2(hidden_dim), 1024)
        _pad_tokens_kernel[(total_tokens,)](
            hidden_states,
            padded_hidden,
            permutation_map,
            tokens_per_expert,
            hidden_dim,
            num_experts,
            alignment,
            BLOCK_H=BLOCK_H,
        )

    return padded_hidden, permutation_map, padded_inc


@triton.jit
def _unpad_tokens_kernel(
    src_ptr,
    dst_ptr,
    perm_map_ptr,
    probs_ptr,
    hidden_dim,
    has_probs: tl.constexpr,
    BLOCK_H: tl.constexpr,
):
    """Copy one real (non-padding) row from padded to unpadded layout.

    Optionally multiplies each row by its routing probability.
    """
    row = tl.program_id(0)
    dst_row = tl.load(perm_map_ptr + row)
    if dst_row < 0:
        return
    if has_probs:
        prob = tl.load(probs_ptr + dst_row)
    for h in tl.range(0, hidden_dim, BLOCK_H):
        o = h + tl.arange(0, BLOCK_H)
        m = o < hidden_dim
        v = tl.load(src_ptr + row * hidden_dim + o, mask=m)
        if has_probs:
            v = v * prob
        tl.store(dst_ptr + dst_row * hidden_dim + o, v, mask=m)


def unpad_from_alignment(
    padded_output: torch.Tensor,
    permutation_map: torch.Tensor,
    original_size: int,
    probs: torch.Tensor = None,
) -> torch.Tensor:
    """Remove alignment padding, scattering results back to original positions.

    Args:
        padded_output: [padded_total, hidden_size] output from expert computation.
        permutation_map: [padded_total] int32, original row index or -1 for padding.
        original_size: number of rows in the unpadded output.
        probs: optional [original_size] routing probabilities to multiply during unpad.

    Returns:
        [original_size, hidden_size] unpadded output.
    """
    hidden_dim = padded_output.shape[1]
    output = torch.zeros(
        original_size, hidden_dim, dtype=padded_output.dtype, device=padded_output.device
    )
    has_probs = probs is not None
    if padded_output.shape[0] > 0:
        BLOCK_H = min(triton.next_power_of_2(hidden_dim), 1024)
        _unpad_tokens_kernel[(padded_output.shape[0],)](
            padded_output,
            output,
            permutation_map,
            probs if has_probs else padded_output,  # dummy pointer when no probs
            hidden_dim,
            has_probs,
            BLOCK_H=BLOCK_H,
        )
    return output


================================================
FILE: megatron/core/inference/moe/permute.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
"""Triton kernels for token permutation and unpermutation in fused MoE.

Includes:
- Token counting per expert
- Expert offset computation (aligned prefix sums)
- Permute tokens into expert-grouped order
- Unpermute expert outputs back to original token order
"""

from unittest.mock import MagicMock

import torch

from megatron.core.utils import null_decorator

try:
    import triton
    import triton.language as tl

    HAVE_TRITON = True
except ImportError:
    HAVE_TRITON = False

if not HAVE_TRITON:
    triton = MagicMock()
    triton.jit = null_decorator
    tl = MagicMock()


def _ceil_div(a, b):
    return (a + b - 1) // b


@triton.jit
def _count_local_tokens_kernel(
    routing_map_ptr,  # [num_tokens * topk] flattened expert assignments
    tokens_per_expert_ptr,  # [num_local_experts] output counters (zeroed by caller)
    total_pairs,  # num_tokens * topk — total (token, topk) pairs
    local_expert_start,  # first global expert index owned by this rank
    num_local_experts: tl.constexpr,  # number of experts on this rank
    BLOCK_SIZE: tl.constexpr,  # number of pairs processed per program
):
    """Count tokens routed to experts on this rank, ignoring tokens routed elsewhere.

    Each program processes BLOCK_SIZE (token, topk) pairs. Tokens assigned to
    experts outside [local_expert_start, local_expert_start + num_local_experts)
    are silently skipped.
    """
    pid = tl.program_id(0)
    offsets = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
    mask = offsets < total_pairs
    expert_ids = tl.load(routing_map_ptr + offsets, mask=mask, other=-1)
    # Map global expert IDs to local indices; non-local experts become negative
    local_ids = expert_ids - local_expert_start
    is_local = (local_ids >= 0) & (local_ids < num_local_experts) & mask
    tl.atomic_add(tokens_per_expert_ptr + local_ids, 1, mask=is_local)


def compute_local_tokens_per_expert(
    routing_map: torch.Tensor, local_expert_start: int, num_local_experts: int
) -> torch.Tensor:
    """Count tokens routed to each local expert."""
    total_pairs = routing_map.numel()
    tokens_per_expert = torch.zeros(num_local_experts, dtype=torch.int32, device=routing_map.device)
    BLOCK = 256
    _count_local_tokens_kernel[(_ceil_div(total_pairs, BLOCK),)](
        routing_map,
        tokens_per_expert,
        total_pairs,
        local_expert_start,
        num_local_experts,
        BLOCK_SIZE=BLOCK,
    )
    return tokens_per_expert


@triton.jit
def _prefix_sum_kernel(
    tokens_per_expert_ptr,  # [num_local_experts] raw token counts
    exclusive_offsets_ptr,  # [num_local_experts] output: exclusive prefix sum of aligned counts
    inclusive_offsets_ptr,  # [num_local_experts] output: inclusive prefix sum of aligned counts
    num_local_experts,  # number of experts on this rank
    alignment: tl.constexpr,  # per-expert alignment (counts rounded up to this multiple)
    BLOCK_SIZE: tl.constexpr,  # next_power_of_2(num_local_experts) for tl.cumsum
):
    """Exclusive and inclusive prefix sums of aligned token counts.

    Each expert's token count is rounded up to the nearest multiple of
    `alignment` (experts with 0 tokens stay at 0). The inclusive offsets
    are used as `offs` by grouped_mm / scaled_grouped_mm.
    """
    r = tl.arange(0, BLOCK_SIZE)
    mask = r < num_local_experts
    h = tl.load(tokens_per_expert_ptr + r, mask=mask, other=0)
    # Round up non-zero counts to alignment boundary
    if alignment > 1:
        h = tl.where(h > 0, ((h + alignment - 1) // alignment) * alignment, h)
    inc = tl.cumsum(h, axis=0)
    tl.store(exclusive_offsets_ptr + r, inc - h, mask=mask)
    tl.store(inclusive_offsets_ptr + r, inc, mask=mask)


def compute_expert_offsets(tokens_per_expert: torch.Tensor, alignment: int = 1) -> tuple:
    """Compute exclusive and inclusive prefix sums of aligned token counts."""
    n = tokens_per_expert.shape[0]
    exclusive_cumsum = torch.empty_like(tokens_per_expert)
    inclusive_cumsum = torch.empty_like(tokens_per_expert)
    _prefix_sum_kernel[(1,)](
        tokens_per_expert,
        exclusive_cumsum,
        inclusive_cumsum,
        n,
        alignment,
        BLOCK_SIZE=triton.next_power_of_2(n),
    )
    return exclusive_cumsum, inclusive_cumsum


@triton.jit
def _permute_tokens_kernel(
    hidden_ptr,  # [num_tokens, hidden_dim] input hidden states
    probs_ptr,  # [num_tokens, topk] routing probabilities
    routing_map_ptr,  # [num_tokens, topk] expert assignments (global IDs)
    out_hidden_ptr,  # [output_size, hidden_dim] output: permuted hidden states
    out_probs_ptr,  # [output_size] output: permuted probabilities
    out_src_idx_ptr,  # [output_size] output: permutation_map (original token index, -1 for padding)
    counters_ptr,  # [num_local_experts] exclusive offsets,
    # atomically incremented to assign positions
    num_tokens,  # number of input tokens
    hidden_dim,  # hidden dimension
    topk: tl.constexpr,  # number of expert choices per token
    local_expert_start,  # first global expert index on this rank
    num_local_experts: tl.constexpr,  # number of experts on this rank
    BLOCK_H: tl.constexpr,  # tile size for copying hidden_dim
):
    """Permute tokens into expert-grouped order.

    Grid: one program per (token, topk) pair. Each program looks up the assigned
    expert, skips non-local experts, then atomically claims a position within
    that expert's block and copies the hidden state + prob + source index.
    """
    # Each program handles one (token, topk) pair
    pair = tl.program_id(0)
    tok = pair // topk
    k = pair % topk
    if tok >= num_tokens:
        return
    eid = tl.load(routing_map_ptr + tok * topk + k)
    lid = eid - local_expert_start
    # Skip tokens routed to non-local experts
    if lid < 0 or lid >= num_local_experts:
        return
    # Atomically claim a position within this expert's aligned block
    pos = tl.atomic_add(counters_ptr + lid, 1)
    # Copy hidden state row
    for h in tl.range(0, hidden_dim, BLOCK_H):
        o = h + tl.arange(0, BLOCK_H)
        m = o < hidden_dim
        tl.store(
            out_hidden_ptr + pos * hidden_dim + o,
            tl.load(hidden_ptr + tok * hidden_dim + o, mask=m),
            mask=m,
        )
    tl.store(out_probs_ptr + pos, tl.load(probs_ptr + tok * topk + k))
    # Record source token index for unpermute
    tl.store(out_src_idx_ptr + pos, tok)


def permute_tokens(
    hidden_states: torch.Tensor,
    probs: torch.Tensor,
    routing_map: torch.Tensor,
    local_expert_start: int,
    num_local_experts: int,
    alignment: int = 1,
) -> tuple:
    """Permute tokens into expert-grouped order.

    Computes token counts, aligned expert offsets, output sizing, and
    permutation in a single call.

    Args:
        hidden_states: [num_tokens, hidden_size] input.
        probs: [num_tokens, topk] routing probabilities.
        routing_map: [num_tokens, topk] expert assignments.
        local_expert_start: first global expert index on this rank.
        num_local_experts: number of experts on this rank.
        alignment: per-expert token alignment (default 1).

    Returns:
        (permuted_hidden, permuted_probs, permutation_map, inclusive_offsets)
        - permuted_hidden: [output_size, hidden_size]
        - permuted_probs: [output_size]
        - permutation_map: [output_size] int32, maps each permuted row back to
          its original token index. Used by unpermute_tokens to scatter expert
          outputs back and by activation kernels to skip padding rows (-1).
        - inclusive_offsets: [num_local_experts] int32 cumulative offsets for grouped_mm
    """
    num_tokens, hidden_dim = hidden_states.shape
    topk = probs.shape[1]

    # Count how many (token, topk) pairs are routed to each local expert.
    # Non-local experts are ignored. Result is [num_local_experts] int32.
    tokens_per_expert = compute_local_tokens_per_expert(
        routing_map, local_expert_start, num_local_experts
    )

    # exclusive_expert_offsets[i] = start of expert i's block in the padded output.
    #   Used as the initial counter for atomic position assignment in the permute kernel.
    # inclusive_expert_offsets[i] = end of expert i's block (= start of expert i+1).
    #   Passed as `offs` to grouped_mm / scaled_grouped_mm to delimit expert boundaries.
    exclusive_expert_offsets, inclusive_expert_offsets = compute_expert_offsets(
        tokens_per_expert, alignment=alignment
    )
    output_size = num_tokens * min(topk, num_local_experts) + alignment * num_local_experts

    permuted_hidden = torch.empty(
        output_size, hidden_dim, dtype=hidden_states.dtype, device=hidden_states.device
    )
    permuted_probs = torch.empty(output_size, dtype=probs.dtype, device=probs.device)
    permutation_map = torch.full((output_size,), -1, dtype=torch.int32, device=probs.device)
    BLOCK_H = min(triton.next_power_of_2(hidden_dim), 1024)
    _permute_tokens_kernel[(num_tokens * topk,)](
        hidden_states,
        probs,
        routing_map,
        permuted_hidden,
        permuted_probs,
        permutation_map,
        exclusive_expert_offsets,
        num_tokens,
        hidden_dim,
        topk,
        local_expert_start,
        num_local_experts,
        BLOCK_H=BLOCK_H,
    )
    return permuted_hidden, permuted_probs, permutation_map, inclusive_expert_offsets


@triton.jit
def _unpermute_tokens_kernel(
    expert_out_ptr,  # [output_size, hidden_dim] expert outputs in permuted order
    probs_ptr,  # [output_size] fp32 routing probabilities (permuted)
    src_idx_ptr,  # [output_size] permutation_map: original token index, or -1 for padding
    output_ptr,  # [num_tokens, hidden_dim] fp32 output buffer (zeroed by caller)
    hidden_dim,  # hidden dimension
    BLOCK_H: tl.constexpr,  # tile size for processing hidden_dim
):
    """Scatter weighted expert outputs back to original token positions.

    Grid: one program per row of expert_out. Padding rows (src_idx == -1) are
    skipped. Multiple topk selections for the same token are accumulated via
    atomic adds. All arithmetic is in fp32 to avoid precision loss.
    """
    row = tl.program_id(0)
    source_idx = tl.load(src_idx_ptr + row)
    # Skip padding rows
    if source_idx < 0:
        return
    prob = tl.load(probs_ptr + row)  # fp32
    for h in tl.range(0, hidden_dim, BLOCK_H):
        offsets = h + tl.arange(0, BLOCK_H)
        m = offsets < hidden_dim
        # Upcast bf16 expert output to fp32 before multiply + accumulate
        v = tl.load(expert_out_ptr + row * hidden_dim + offsets, mask=m).to(tl.float32)
        tl.atomic_add(output_ptr + source_idx * hidden_dim + offsets, v * prob, mask=m)


def unpermute_tokens(
    expert_output: torch.Tensor,
    permuted_probs: torch.Tensor,
    permutation_map: torch.Tensor,
    num_tokens: int,
) -> torch.Tensor:
    """Unpermute expert outputs back to original token order.

    Accumulates in fp32 to avoid precision loss from multiple topk atomic adds.
    Returns fp32 output.
    """
    assert (
        permuted_probs.dtype == torch.float32
    ), f"permuted_probs must be fp32, got {permuted_probs.dtype}"
    output_size, hidden_dim = expert_output.shape
    output = torch.zeros(num_tokens, hidden_dim, dtype=torch.float32, device=expert_output.device)
    BLOCK_H = min(triton.next_power_of_2(hidden_dim), 1024)
    _unpermute_tokens_kernel[(output_size,)](
        expert_output, permuted_probs, permutation_map, output, hidden_dim, BLOCK_H=BLOCK_H
    )
    return output


@triton.jit
def _permute_quantize_mxfp8_kernel(
    hidden_ptr,
    probs_ptr,
    routing_map_ptr,
    out_fp8_ptr,
    out_scale_ptr,
    out_probs_ptr,
    out_src_idx_ptr,
    counters_ptr,
    num_tokens,
    K,
    n_col_blocks,
    topk: tl.constexpr,
    local_expert_start,
    num_local_experts: tl.constexpr,
    REAL_GROUPS: tl.constexpr,
    BLOCK_K: tl.constexpr,
    BLOCK_GROUPS: tl.constexpr,
):
    """Fused permute + MXFP8 quantize + swizzle in one kernel.

    Grid: (num_tokens * topk,) — one program per (token, k) pair.
    Reads BF16 from source token, quantizes to FP8 e4m3, writes FP8 data +
    swizzled e8m0 scales to the permuted write position.
    """
    pair = tl.program_id(0)
    tok = pair // topk
    k = pair % topk
    if tok >= num_tokens:
        return
    eid = tl.load(routing_map_ptr + tok * topk + k)
    lid = eid - local_expert_start
    if lid < 0 or lid >= num_local_experts:
        return

    pos = tl.atomic_add(counters_ptr + lid, 1)

    # Load full row from source token
    offs = tl.arange(0, BLOCK_K)
    mask = offs < K
    x = tl.load(hidden_ptr + tok * K + offs, mask=mask, other=0.0).to(tl.float32)

    # Per-group-of-32 quantization
    x_grouped = tl.reshape(x, [BLOCK_GROUPS, 32])
    abs_grouped = tl.abs(x_grouped)
    max_vals = tl.max(abs_grouped, axis=1)

    dequant_scale = max_vals / 448.0
    dequant_exp = (dequant_scale.to(tl.uint32, bitcast=True) + 0x007FFFFF) & 0x7F800000
    dequant_rounded = dequant_exp.to(tl.float32, bitcast=True)
    quant_scale = tl.where(dequant_rounded == 0, 0.0, 1.0 / dequant_rounded)

    quantized = x_grouped * quant_scale[:, None]
    quantized_flat = tl.reshape(quantized, [BLOCK_K])
    out_fp8 = quantized_flat.to(tl.float8e4nv)

    # Store FP8 data at permuted position
    tl.store(out_fp8_ptr + pos * K + offs, out_fp8, mask=mask)

    # Store swizzled scales at permuted position
    scale_exp = (dequant_exp >> 23).to(tl.uint8)
    col_offs = tl.arange(0, BLOCK_GROUPS)
    col_mask = col_offs < REAL_GROUPS

    macro_row_block = pos // 128
    macro_col_block = col_offs // 4
    local_row = pos % 128
    local_col = col_offs % 4
    group = local_row // 32
    sub_row = local_row % 32
    tile_idx = macro_row_block * n_col_blocks + macro_col_block
    swizzled_offs = tile_idx * 512 + sub_row * 16 + group * 4 + local_col

    tl.store(out_scale_ptr + swizzled_offs, scale_exp, mask=col_mask)

    # Store prob and source index
    tl.store(out_probs_ptr + pos, tl.load(probs_ptr + tok * topk + k))
    tl.store(out_src_idx_ptr + pos, tok)


def permute_and_quantize_mxfp8(
    hidden_states: torch.Tensor,
    probs: torch.Tensor,
    routing_map: torch.Tensor,
    local_expert_start: int,
    num_local_experts: int,
    alignment: int = 128,
) -> tuple:
    """Fused permute + MXFP8 quantize + swizzle.

    Self-contained API matching permute_tokens: computes token counts, aligned
    expert offsets, output sizing, permutation, and MXFP8 quantization in a
    single kernel launch.

    Args:
        hidden_states: [num_tokens, hidden_size] BF16 input.
        probs: [num_tokens, topk] routing probabilities.
        routing_map: [num_tokens, topk] expert assignments.
        local_expert_start: first global expert index on this rank.
        num_local_experts: number of experts on this rank.
        alignment: per-expert token alignment (default 128, required for MXFP8 swizzle).

    Returns:
        (permuted_mxfp8, permuted_probs, permutation_map, inclusive_offsets)
        - permuted_mxfp8: MXFP8Tensor with .data [output_size, K] and .scale (swizzled)
        - permuted_probs: [output_size] routing probs
        - permutation_map: [output_size] int32, original token index or -1 for padding
        - inclusive_offsets: [num_local_experts] int32 cumulative offsets for scaled_grouped_mm
    """
    from megatron.core.inference.quantization.mxfp8_tensor import MXFP8Tensor

    num_tokens, K = hidden_states.shape
    topk = probs.shape[1]
    assert K % 32 == 0

    # Count how many (token, topk) pairs are routed to each local expert.
    tokens_per_expert = compute_local_tokens_per_expert(
        routing_map, local_expert_start, num_local_experts
    )

    # exclusive_expert_offsets[i] = start of expert i's block in the padded output.
    # inclusive_expert_offsets[i] = end of expert i's block (= start of expert i+1).
    exclusive_expert_offsets, inclusive_expert_offsets = compute_expert_offsets(
        tokens_per_expert, alignment=alignment
    )
    output_size = num_tokens * min(topk, num_local_experts) + alignment * num_local_experts

    scale_cols = K // 32
    n_row_blocks = _ceil_div(output_size, 128)
    n_col_blocks = _ceil_div(scale_cols, 4)
    total_scale_bytes = n_row_blocks * n_col_blocks * 512

    out_fp8 = torch.empty(output_size, K, dtype=torch.float8_e4m3fn, device=hidden_states.device)
    out_scale = torch.zeros(total_scale_bytes, dtype=torch.uint8, device=hidden_states.device)
    permuted_probs = torch.empty(output_size, dtype=probs.dtype, device=probs.device)
    permutation_map = torch.full((output_size,), -1, dtype=torch.int32, device=probs.device)

    BLOCK_K = triton.next_power_of_2(K)
    BLOCK_GROUPS = BLOCK_K // 32

    _permute_quantize_mxfp8_kernel[(num_tokens * topk,)](
        hidden_states,
        probs,
        routing_map,
        out_fp8,
        out_scale,
        permuted_probs,
        permutation_map,
        exclusive_expert_offsets,
        num_tokens,
        K,
        n_col_blocks,
        topk,
        local_expert_start,
        num_local_experts,
        REAL_GROUPS=scale_cols,
        BLOCK_K=BLOCK_K,
        BLOCK_GROUPS=BLOCK_GROUPS,
    )

    permuted_mxfp8 = MXFP8Tensor(
        data=out_fp8, scale=out_scale.view(torch.float8_e8m0fnu), backend="triton"
    )
    return permuted_mxfp8, permuted_probs, permutation_map, inclusive_expert_offsets


================================================
FILE: megatron/core/inference/quantization/__init__.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/inference/quantization/mxfp8_quantize.py
================================================
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

"""Standalone MXFP8 quantization kernel with fused scale swizzle.

One block per token. Quantizes BF16 → FP8 e4m3 and writes scales directly
in cuBLAS 2D blocked (swizzled) layout. No FP4, no triton_kernels dependency.

Usage:
    from megatron.core.inference.quantization.mxfp8_quantize import mxfp8_quantize
    data, swizzled_scales, total_scale_bytes = mxfp8_quantize(x_bf16)
    # data: [M, K] float8_e4m3fn
    # swizzled_scales: 1D uint8 in cuBLAS blocked layout
"""

import torch

try:
    import triton
    import triton.language as tl

    HAVE_TRITON = True
except ImportError:
    from unittest.mock import MagicMock

    from megatron.core.utils import null_decorator

    triton = MagicMock()
    triton.jit = null_decorator
    tl = MagicMock()
    HAVE_TRITON = False


def _ceil_div(a, b):
    return (a + b - 1) // b


@triton.jit
def _mxfp8_quant_swizzle_kernel(
    out_ptr,  # [M, K] output buffer for float8_e4m3fn quantized data
    scale_ptr,  # 1D output buffer for swizzled uint8 scales (e8m0 exponents)
    src_ptr,  # [M, K] input tensor in bf16/fp16/fp32
    K,  # number of columns in the input (must be divisible by 32)
    n_col_blocks,  # ceil(K/32 / 4) — number of macro-tile columns in the swizzle layout
    REAL_GROUPS: tl.constexpr,  # actual number of scale groups per row (K // 32)
    BLOCK_K: tl.constexpr,  # next_power_of_2(K) — padded column count for tl.reshape
    BLOCK_GROUPS: tl.constexpr,  # BLOCK_K // 32 — padded group count (must be power of 2)
):
    """Each triton block quantizes one row → FP8 e4m3, write scales directly in swizzled layout.

    We use round up in scale calculation. see: Mishra et al.,
    Recipes for Pre-training LLMs with MXFP8 (https://arxiv.org/pdf/2506.08027)

    The implementation borrows code from the triton upstream MXFP downcast kernel:
    https://github.com/triton-lang/triton/blob/main/python/triton_kernels/triton_kernels/numerics_details/mxfp_details/_downcast_to_mxfp.py

    Note on swizzled scale layout (torch.nn.functional.SwizzleType.SWIZZLE_32_4_4):

        Background: In MXFP8, every group of 32 elements shares one 1-byte scale
        (an e8m0 exponent). For an [M, K] matrix, this gives an [M, K//32] scale
        matrix. cuBLAS doesn't read these scales in simple row-major order — it
        expects a "swizzled" layout optimized for its internal access patterns.

        Step 1 — Divide into macro-tiles:
            The scale matrix is partitioned into 128-row x 4-col macro-tiles.
            Each tile is stored as a contiguous 512-byte (128 x 4) block.

        Step 2 — Interleave within each tile:
            Within a macro-tile, the 128 rows are NOT stored sequentially.
            Instead, they are split into 4 groups of 32 rows:
                group 0: rows   0- 31
                group 1: rows  32- 63
                group 2: rows  64- 95
                group 3: rows  96-127

            Rows with the same position within their group (same "sub_row")
            are placed next to each other. So the memory layout is:

            Concretely, for sub_row=0:
                byte 0:  row  0, col 0
                byte 1:  row  0, col 1
                byte 2:  row  0, col 2
                byte 3:  row  0, col 3
                byte 4:  row 32, col 0
                byte 5:  row 32, col 1
                byte 6:  row 32, col 2
                byte 7:  row 32, col 3
                byte 8:  row 64, col 0
                ...
                byte 15: row 96, col 3

        The formula to map logical (row, col) → byte offset:
            tile_idx = (row // 128) * n_col_blocks + (col // 4)
            sub_row  = row % 32
            group    = (row % 128) // 32
            local_col = col % 4
            offset   = tile_idx * 512 + sub_row * 16 + group * 4 + local_col

    """
    row = tl.program_id(0)
    src_row = src_ptr + row * K
    out_row = out_ptr + row * K

    offs = tl.arange(0, BLOCK_K)
    mask = offs < K

    # Load full row
    x = tl.load(src_row + offs, mask=mask, other=0.0).to(tl.float32)

    # Per-group-of-32 max
    x_grouped = tl.reshape(x, [BLOCK_GROUPS, 32])
    abs_grouped = tl.abs(x_grouped)
    max_vals = tl.max(abs_grouped, axis=1)

    # 448 is the max representable value in FP8 e4m3.
    # dequant_scale = min scale s.t. max_val / scale <= 448.
    dequant_scale = max_vals / 448.0
    # Round up to next power of 2 via integer bit manipulation:
    # Adding 0x007FFFFF (mantissa mask) before masking with 0x7F800000
    # (exponent-only mask) bumps the exponent if any mantissa bits are set.
    # Result: 2^ceil(log2(max/448)) as a uint32-encoded float.
    dequant_exp = (dequant_scale.to(tl.uint32, bitcast=True) + 0x007FFFFF) & 0x7F800000
    # Reinterpret uint32 back as float32 — now a power-of-2 dequantization scale.
    dequant_rounded = dequant_exp.to(tl.float32, bitcast=True)
    # Quantization scale is the reciprocal; guard against div-by-zero for all-zero groups.
    quant_scale = tl.where(dequant_rounded == 0, 0.0, 1.0 / dequant_rounded)

    # Quantize
    quantized = x_grouped * quant_scale[:, None]
    quantized_flat = tl.reshape(quantized, [BLOCK_K])
    out_fp8 = quantized_flat.to(tl.float8e4nv)

    # Store FP8 data
    tl.store(out_row + offs, out_fp8, mask=mask)

    # Store swizzled scales
    scale_exp = (dequant_exp >> 23).to(tl.uint8)
    col_offs = tl.arange(0, BLOCK_GROUPS)
    col_mask = col_offs < REAL_GROUPS

    # Compute swizzled offsets for each scale element.
    #
    # The scale matrix [M, K//32] is divided into 128×4 macro-tiles.
    # Within each tile, rows are split into 4 groups of 32 (group = local_row // 32).
    # Rather than flattening row-major, the layout interleaves groups so that
    # rows 32 apart are adjacent in memory:
    #
    #   offset = tile_idx * 512 + sub_row * 16 + group * 4 + local_col
    macro_row_block = row // 128
    macro_col_block = col_offs // 4
    local_row = row % 128
    local_col = col_offs % 4
    group = local_row // 32
    sub_row = local_row % 32
    tile_idx = macro_row_block * n_col_blocks + macro_col_block
    swizzled_offs = tile_idx * 512 + sub_row * 16 + group * 4 + local_col

    tl.store(scale_ptr + swizzled_offs, scale_exp, mask=col_mask)


def mxfp8_quantize(x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
    """Quantize a 2D tensor to MXFP8 with fused scale swizzle.

    Args:
        x: [M, K] tensor in bf16/fp16/fp32. K must be divisible by 32.

    Returns:
        (data, swizzled_scales):
            data: [M, K] float8_e4m3fn
            swizzled_scales: 1D tensor in cuBLAS blocked layout (uint8/e8m0)
    """
    assert x.is_cuda and x.dim() == 2
    assert x.dtype in (torch.bfloat16, torch.float16, torch.float32)
    M, K = x.shape
    assert K % 32 == 0, f"K ({K}) must be divisible by 32"

    scale_cols = K // 32
    n_row_blocks = _ceil_div(M, 128)
    n_col_blocks = _ceil_div(scale_cols, 4)
    total_scale_bytes = n_row_blocks * n_col_blocks * 512

    out_data = torch.empty(M, K, dtype=torch.float8_e4m3fn, device=x.device)
    out_scale = torch.zeros(total_scale_bytes, dtype=torch.uint8, device=x.device)

    BLOCK_K = triton.next_power_of_2(K)
    BLOCK_GROUPS = BLOCK_K // 32

    _mxfp8_quant_swizzle_kernel[(M,)](
        out_data,
        out_scale,
        x,
        K,
        n_col_blocks,
        REAL_GROUPS=scale_cols,
        BLOCK_K=BLOCK_K,
        BLOCK_GROUPS=BLOCK_GROUPS,
    )

    return out_data, out_scale.view(torch.float8_e8m0fnu)


================================================
FILE: megatron/core/inference/quantization/mxfp8_tensor.py
================================================
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from dataclasses import dataclass
from typing import Optional

import torch

try:
    from flashinfer import mxfp8_quantize as flashinfer_mxfp8_quantize

    HAVE_FLASHINFER = True
except ImportError:
    HAVE_FLASHINFER = False

from megatron.core.inference.quantization.mxfp8_quantize import (
    mxfp8_quantize as mcore_mxfp8_quantize,
)


def _ceil_div(a, b):
    return (a + b - 1) // b


@dataclass
class MXFP8Tensor:
    """MXFP8 tensor wrapper storing quantized fp8_e4m3 data and swizzled e8m0 scales."""

    data: torch.Tensor  # [M, K] fp8_e4m3fn
    scale: torch.Tensor  # 1D, swizzled cuBLAS blocked layout, e8m0
    backend: Optional[str] = None  # quantization backend: 'flashinfer' or 'triton'

    def size(self, idx: Optional[int] = None):
        """Wrapper for calling self.data.size()"""
        return self.data.size(idx)

    def scale_2d(self, K: Optional[int] = None) -> torch.Tensor:
        """Reshape 1D swizzled scale to 2D for scaled_grouped_mm / scaled_mm.

        Swizzle pads rows to multiples of 128 and cols to multiples of 4.
        Returns (padded_M, padded_cols) where padded_cols = ceil(K//32, 4) * 4.
        """
        if self.scale.dim() == 2:
            return self.scale
        if K is None:
            K = self.data.shape[-1]
        n_col_blocks = _ceil_div(K // 32, 4)
        padded_cols = n_col_blocks * 4
        return self.scale.reshape(-1, padded_cols)

    @classmethod
    def from_bf16(cls, x: torch.Tensor, group_size: int = 32, backend: str = "flashinfer"):
        """Quantize BF16 tensor to MXFP8.

        Args:
            x: [M, K] BF16 tensor on CUDA.
            group_size: MXFP8 group size (default 32).
            backend: 'triton' (fused quantize + swizzle Triton kernel) or
                     'flashinfer' (single fused FlashInfer CUDA kernel).
        """
        assert x.is_cuda and x.dim() == 2
        assert x.shape[-1] % group_size == 0
        if backend == "flashinfer":
            assert HAVE_FLASHINFER, "FlashInfer not available"
            return cls(*flashinfer_mxfp8_quantize(x), backend=backend)
        elif backend == "triton":
            xq, xs = mcore_mxfp8_quantize(x)
            return cls(data=xq, scale=xs, backend=backend)
        else:
            raise ValueError(
                f"Unknown MXFP8 quantization backend: '{backend}'. "
                "Must be 'triton' or 'flashinfer'."
            )


================================================
FILE: megatron/core/inference/quantization/utils.py
================================================
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from typing import Dict, Optional, Tuple

import torch

from megatron.core.inference.quantization.mxfp8_tensor import MXFP8Tensor

try:
    from transformer_engine.pytorch.tensor.mxfp8_tensor import MXFP8Tensor as TEMXFP8Tensor

    HAVE_TE = True
except ImportError:
    HAVE_TE = False

try:
    from flashinfer import mm_mxfp8 as flashinfer_mm_mxfp8

    HAVE_FLASHINFER = True
except ImportError:
    HAVE_FLASHINFER = False

try:
    from torch.nn.functional import ScalingType, SwizzleType
    from torch.nn.functional import scaled_mm as torch_scaled_mm

    HAVE_TORCH_SCALED_MM = True
except ImportError:
    HAVE_TORCH_SCALED_MM = False


def _verify_te_to_mcore_mxfp8_conversion(te_dequantized, fi_quantized: MXFP8Tensor) -> None:
    # Sanity check: compare the first logical block (32 values)
    # Slice logical dimensions first to naturally handle any data swizzling/strides
    te_block = te_dequantized[0, :32].float()

    # Safely extract bytes from the first logical block, then view as e4m3
    fi_data_bytes = fi_quantized.data[0, :32].contiguous().view(torch.uint8)
    fi_data_e4m3 = fi_data_bytes.view(torch.float8_e4m3fn).float()

    # Extract the scale. Logical block (0, 0) is always at physical index 0,
    # bypassing any scale swizzling layout complexity (like SWIZZLED_128x4)
    fi_scale_byte = fi_quantized.scale.contiguous().flatten()[0:1].view(torch.uint8).to(torch.int32)
    fi_scale_f32 = (fi_scale_byte << 23).view(torch.float32)

    fi_block = fi_data_e4m3 * fi_scale_f32

    if not torch.allclose(te_block, fi_block):
        diff_norm = torch.norm(te_block - fi_block)
        raise ValueError(f"MXFP8 sanity check failed. Diff norm: {diff_norm}")


def quantize_model_to_mxfp8(model: torch.nn.Module, backend: str = "flashinfer") -> None:
    """Convert TE MXFP8 weights to mcore MXFP8Tensor format.

    Recursively walks the model and replaces each TEMXFP8Tensor parameter
    with an MXFP8Tensor re-quantized via the specified backend.

    Args:
        model: The model whose TE MXFP8 parameters should be converted.
        backend: 'flashinfer' or 'triton' quantization backend.
    """
    assert HAVE_TE
    import logging

    rank = torch.distributed.get_rank()
    if backend == "flashinfer":
        assert HAVE_FLASHINFER, "FlashInfer not available for MXFP8 quantization"

    for child in model.children():
        quantize_model_to_mxfp8(child, backend=backend)

    def replace_in_dict(attr_dict):
        """Helper function to replace TE MXFP8 weights."""
        keys = list(attr_dict.keys())
        for key in keys:
            val = attr_dict[key]
            is_te_mxfp8 = isinstance(val, TEMXFP8Tensor) or (
                hasattr(val, 'data') and isinstance(val.data, TEMXFP8Tensor)
            )
            if is_te_mxfp8:
                # Undo the TE quantization and re-quantize
                # Note that this introduces a one-time overhead but avoids any
                # numerical differences between TE and mcore MXFP8 formats
                te_dequantized = val.dequantize()
                mcore_quantized = MXFP8Tensor.from_bf16(te_dequantized, backend=backend)
                _verify_te_to_mcore_mxfp8_conversion(te_dequantized, mcore_quantized)
                del model._parameters[key]
                setattr(model, key, mcore_quantized)

    if hasattr(model, '_parameters') and model._parameters:
        replace_in_dict(model._parameters)

    return model


def _should_quantize_param(val: torch.Tensor) -> bool:
    """Return True if a parameter should be quantized to FlashInfer MXFP8."""
    if not val.is_cuda:
        return False
    if HAVE_TE and isinstance(val, TEMXFP8Tensor):
        return True
    if HAVE_TE and hasattr(val, 'data') and isinstance(val.data, TEMXFP8Tensor):
        return True
    if (
        isinstance(val, torch.nn.Parameter)
        and val.dim() == 2
        and val.dtype in (torch.bfloat16, torch.float16)
    ):
        return True
    return False


def _to_bf16(val: torch.Tensor) -> torch.Tensor:
    """Convert a parameter value to BF16 for quantization."""
    if HAVE_TE and isinstance(val, TEMXFP8Tensor):
        return val.dequantize()
    if HAVE_TE and hasattr(val, 'data') and isinstance(val.data, TEMXFP8Tensor):
        return val.data.dequantize()
    return val.data.to(torch.bfloat16)


def collect_mxfp8_param_metadata(
    model: torch.nn.Module,
) -> Dict[str, Tuple[torch.Size, torch.dtype, torch.device]]:
    """Record shape/dtype/device for each parameter that will be quantized.

    Called once before the first quantization to record the original parameter
    metadata (shape, dtype, device) before any format conversion.
    """
    metadata: Dict[str, Tuple[torch.Size, torch.dtype, torch.device]] = {}
    for name, param in model.named_parameters():
        if _should_quantize_param(param):
            if HAVE_TE and isinstance(param, TEMXFP8Tensor):
                bf16 = param.dequantize()
                metadata[name] = (bf16.shape, bf16.dtype, bf16.device)
            else:
                metadata[name] = (param.shape, param.dtype, param.device)
    return metadata


def quantize_params_to_mxfp8(
    model: torch.nn.Module,
    persistent_buffers: Optional[Dict[str, MXFP8Tensor]] = None,
    _prefix: str = "",
    backend: str = "flashinfer",
) -> Dict[str, MXFP8Tensor]:
    """Quantize model parameters to MXFP8Tensor format.

    Handles both TEMXFP8Tensor (fp8_param=True) and BF16/FP16 nn.Parameter
    inputs.  When *persistent_buffers* is provided, new quantized values are
    ``copy_()``'d into the existing MXFP8Tensor objects so that CUDA-graph
    device-pointer captures remain valid.

    Args:
        model: The model whose parameters should be quantized.
        persistent_buffers: If not ``None``, a dict mapping fully-qualified
            parameter names to previously-created ``MXFP8Tensor`` objects.
            Updated in-place and returned.
        _prefix: Internal recursion prefix – callers should not set this.
        backend: 'flashinfer' or 'triton' quantization backend.

    Returns:
        The ``persistent_buffers`` dict (created on first call if ``None``).
    """
    if backend == "flashinfer":
        assert HAVE_FLASHINFER, "FlashInfer not available for MXFP8 quantization"

    if persistent_buffers is None:
        persistent_buffers = {}

    # Recurse through child modules
    for child_name, child_module in model.named_children():
        child_prefix = f"{_prefix}{child_name}." if _prefix else f"{child_name}."
        quantize_params_to_mxfp8(
            child_module, persistent_buffers, _prefix=child_prefix, backend=backend
        )

    # Process parameters owned directly by this module
    if hasattr(model, '_parameters') and model._parameters:
        keys = list(model._parameters.keys())
        for key in keys:
            val = model._parameters[key]
            if val is None:
                continue
            if not _should_quantize_param(val):
                continue

            fqn = f"{_prefix}{key}"
            bf16_data = _to_bf16(val)

            if fqn in persistent_buffers:
                # Subsequent call: copy into existing tensors to preserve addresses
                new_tensor = MXFP8Tensor.from_bf16(bf16_data, backend=backend)
                persistent_buffers[fqn].data.copy_(new_tensor.data)
                persistent_buffers[fqn].scale.copy_(new_tensor.scale)
                mcore_tensor = persistent_buffers[fqn]
            else:
                # First call: create new MXFP8Tensor
                mcore_tensor = MXFP8Tensor.from_bf16(bf16_data, backend=backend)

                # Verify correctness for TEMXFP8Tensor inputs
                if HAVE_TE and isinstance(val, TEMXFP8Tensor):
                    _verify_te_to_mcore_mxfp8_conversion(bf16_data, mcore_tensor)

                persistent_buffers[fqn] = mcore_tensor

            # Replace nn.Parameter with MXFP8Tensor attribute
            del model._parameters[key]
            setattr(model, key, mcore_tensor)

    return persistent_buffers


def _mm_mxfp8_flashinfer(x_mxfp8: MXFP8Tensor, weight: MXFP8Tensor, out=None):
    """MXFP8 matmul via FlashInfer."""
    return flashinfer_mm_mxfp8(
        x_mxfp8.data, weight.data.T, x_mxfp8.scale, weight.scale, out_dtype=torch.bfloat16, out=out
    )


def _mm_mxfp8_torch(x_mxfp8: MXFP8Tensor, weight: MXFP8Tensor, out=None):
    """MXFP8 matmul via torch.nn.functional.scaled_mm."""
    result = torch_scaled_mm(
        x_mxfp8.data,
        weight.data.t(),
        x_mxfp8.scale_2d(),
        ScalingType.BlockWise1x32,
        weight.scale,
        ScalingType.BlockWise1x32,
        swizzle_a=SwizzleType.SWIZZLE_32_4_4,
        swizzle_b=SwizzleType.SWIZZLE_32_4_4,
        output_dtype=torch.bfloat16,
    )
    if out is not None:
        out.copy_(result)
        return out
    return result


def mm_mxfp8(x: torch.Tensor, weight: MXFP8Tensor, out: torch.Tensor = None):
    """Compute a matmul in MXFP8.

    Quantizes the bf16 input activation tensor on the fly. Weight must be
    pre-quantized. Dispatches to FlashInfer or torch based on weight.backend.
    """
    backend = weight.backend
    assert (
        backend is not None
    ), "weight.backend is None — was the weight created via MXFP8Tensor.from_bf16?"

    x_squeezed = x.squeeze(1)
    x_mxfp8 = MXFP8Tensor.from_bf16(x_squeezed, backend=backend)

    if backend == "flashinfer":
        assert HAVE_FLASHINFER, "FlashInfer not available for MXFP8 matmul"
        result = _mm_mxfp8_flashinfer(x_mxfp8, weight, out=out)
    elif backend == "triton":
        assert (
            HAVE_TORCH_SCALED_MM
        ), "torch.nn.functional.scaled_mm with ScalingType/SwizzleType not available"
        result = _mm_mxfp8_torch(x_mxfp8, weight, out=out)
    else:
        raise ValueError(f"Unknown MXFP8 backend: '{backend}'")

    return result.unsqueeze(1)


================================================
FILE: megatron/core/inference/sampling_params.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import warnings
from dataclasses import dataclass
from typing import List, Optional


@dataclass
class SamplingParams:
    """Inference parameters sent along with the prompts.
    This class contains request-level attributes that control the sampling techniques used when
    generating text. This is distinct from megatron.core.inference.contexts.BaseInferenceContext,
        which is sets model-level
    inference attributes such as the maximum sequence length, and contains the KV cache.

    For an explanation of these parameters refer to this blog
    https://ivibudh.medium.com/a-guide-to-controlling-llm-model-output-exploring-top-k-top-p-and-
    temperature-parameters-ed6a31313910
    """

    temperature: float = 1.0
    top_k: int = 0
    top_p: float = 0.0
    return_log_probs: bool = False
    skip_prompt_log_probs: bool = False
    return_segments: bool = False  # Whether to return individually detokenized tokens
    num_tokens_to_generate: int = None
    num_tokens_total: Optional[int] = None  # Cannot set both this and num_tokens_to_generate
    termination_id: Optional[int] = None
    top_n_logprobs: int = 0
    return_prompt_top_n_logprobs: bool = False  # Deprecated field for backwards compatibility
    add_BOS: bool = False
    stop_words: Optional[List[str]] = (
        None  # List of strings that will stop generation when produced
    )
    detokenize_stop_sequence: bool = False  # Keep stop words and EOD in generated text

    def __post_init__(self):
        """Ensure backward compatibility for return_prompt_top_n_logprobs.

        Sets return_prompt_top_n_logprobs based on skip_prompt_log_probs and top_n_logprobs:
        - return_prompt_top_n_logprobs = not skip_prompt_log_probs and top_n_logprobs > 0
        """
        self._sync_prompt_logprobs_fields()

    def _sync_prompt_logprobs_fields(self):
        """Synchronize return_prompt_top_n_logprobs with skip_prompt_log_probs."""

        if self.return_prompt_top_n_logprobs:
            warnings.warn(
                "return_prompt_top_n_logprobs is deprecated, use skip_prompt_log_probs instead",
                DeprecationWarning,
            )
            assert (
                not self.skip_prompt_log_probs
            ), "return_prompt_top_n_logprobs requires skip_prompt_log_probs to be False"
        if self.top_n_logprobs > 0:
            self.return_prompt_top_n_logprobs = not self.skip_prompt_log_probs
        else:
            self.return_prompt_top_n_logprobs = False

    def add_attributes(self, attribute_value_pair: dict):
        """Utility to add more attributes to sampling params

        Use this method to pass in a custom dictionary to add more sampling parameter attributes.
        c = SamplingParams
        c.add_attributes({'min_length':4, 'eod_id':153})

        Args:
            attribute_value_pair (dict): A dictionary containing attributes as the key names and
            their values as the values.
        """
        for key, value in attribute_value_pair.items():
            setattr(self, key, value)

        # Synchronize fields after setting attributes
        self._sync_prompt_logprobs_fields()

    def serialize(self) -> dict:
        """Return a dictionary that is msgpack-serializable."""
        return self.__dict__.copy()

    @classmethod
    def deserialize(cls, data: dict) -> "SamplingParams":
        """Construct SamplingParams from a msgpack-compatible dictionary."""
        obj = cls()
        obj.add_attributes(data)
        return obj


================================================
FILE: megatron/core/inference/scheduler.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import functools
import time
import typing
import warnings
from collections import OrderedDict
from typing import Dict, Optional, Type, Union

import torch

from megatron.core.inference.async_stream import AsyncStream
from megatron.core.inference.inference_request import InferenceRequest, Status
from megatron.core.inference.sampling_params import SamplingParams
from megatron.core.inference.utils import Counter


class Scheduler:
    """Scheduler for handling requests to inference engine

    This class is responsible for handing of all the incomign requests

    Args:
        max_batch_size (int): The max batch size that we can pass to the
            inference engine at a time.
        request_type (InferenceRequest): The class to use for instantiating new requests.
    """

    def __init__(self, max_batch_size):
        self.max_batch_size = max_batch_size
        self.requests: Dict[int, InferenceRequest] = OrderedDict()
        self.streams: Dict[int, AsyncStream] = OrderedDict()
        self.active_request_pool: Dict[int, InferenceRequest] = OrderedDict()
        self.waiting_request_pool: Dict[int, InferenceRequest] = OrderedDict()
        self.completed_request_pool: Dict[int, InferenceRequest] = OrderedDict()
        self.request_counter = Counter()

    def get_new_request_id(self) -> int:
        """Gets a new request id"""
        request_id = int(next(self.request_counter))
        return request_id

    def add_request(
        self,
        prompt: Optional[str] = None,
        prompt_tokens: Optional[torch.Tensor] = None,
        encoder_prompt: Optional[str] = None,
        sampling_params: Optional[SamplingParams] = None,
        arrival_time: Optional[float] = None,
        streaming: bool = False,
        inference_request: Optional[InferenceRequest] = None,
        *,
        inference_parameters: Optional[SamplingParams] = None,
    ) -> int:
        """Add an incoming request

        This method will add the request to either the active pool or the waiting pool
        depending on the batch size.

        Args:
            prompt (str): Input prompt string
            prompt_tokens (torch.Tensor): A torch tensor having the input prompts tokenized
            encoder_prompt (str): Encoder input string
            sampling_params (SamplingParams): The sampling parameters
            arrival_time (float, optional): The incoming request time. Defaults to None.
            streaming (bool, optional): Whether to asynchronously stream tokens for this request.
            inference_request (InferenceRequest, optional): A fully constructed request.
                Defaults to None.

        Returns:
            The request_id for the new request.
        """
        status = (
            Status.ACTIVE_BUT_NOT_GENERATING_TOKENS
            if len(self.active_request_pool) < self.max_batch_size
            else Status.WAITING_IN_QUEUE
        )

        # Deprecation warning for `inference_parameters`.
        if inference_parameters is not None:
            warnings.warn(
                "`inference_parameters` has been renamed to `sampling_params`, and the "
                "previous name will be removed in `megatron-core` 0.13."
            )
            if sampling_params is None:
                sampling_params = inference_parameters

        if inference_request is None:
            assert prompt is not None
            assert prompt_tokens is not None

            request_id = self.get_new_request_id()

            if arrival_time is None:
                arrival_time = time.time()

            inference_request = InferenceRequest(
                request_id=request_id,
                prompt=prompt,
                sampling_params=sampling_params,
                arrival_time=arrival_time,
                prompt_tokens=prompt_tokens,
                status=status,
                encoder_prompt=encoder_prompt,
            )
        else:
            request_id = inference_request.request_id
            inference_request.status = status
            if inference_request.arrival_time is None:
                inference_request.arrival_time = time.time()

        self.requests[request_id] = inference_request

        if streaming:
            abort_request = functools.partial(self.abort_request, request_id=request_id)
            self.streams[request_id] = AsyncStream(request_id, abort_request)

        if status == status.ACTIVE_BUT_NOT_GENERATING_TOKENS:
            self.active_request_pool[request_id] = inference_request
        else:
            self.waiting_request_pool[request_id] = inference_request

        return request_id

    def num_requests_pending(self) -> int:
        """Get the number of requests pending.

        This method returns the number of active + waiting requests.
        """
        return len(self.active_request_pool) + len(self.waiting_request_pool)

    def have_requests_pending(self) -> bool:
        """Method to check if there are requests pending.

        This method returns False only when there are no active requests or waiting requests.
        """
        return self.num_requests_pending() > 0

    def add_earliest_waiting_request_to_active_pool(self):
        """Utility to add the waiting request to active pool

        This method will add the earliest request (FIFO) that is in the waiting request
        pool to the active request pool.
        """
        assert (
            len(self.active_request_pool) < self.max_batch_size
        ), "Active request pool is already full. Cant add any more requests"
        if len(self.waiting_request_pool) > 0:
            (earliest_waiting_request_request_id, earliest_waiting_request) = (
                self.waiting_request_pool.popitem(last=False)
            )
            earliest_waiting_request.status = Status.ACTIVE_BUT_NOT_GENERATING_TOKENS
            self.active_request_pool[earliest_waiting_request_request_id] = earliest_waiting_request

    def update_requests_pools(
        self, result_dict: Optional[typing.OrderedDict[int, InferenceRequest]] = None
    ):
        """Update request pool status

        This method will full up the active request pool, if it has less than max batch size
        elements from the waiting request pool.
        If provided with a request dict, it will put the completed requests into the completed
        request pool and add waiting request into active pool.

        Args:
            result (typing.OrderedDict[int, InferenceRequest], optional): The result returned
                by the engine. A dictionary with keys as the request ids, and values as the
                requests. Defaults to None.
        """
        for result_request_id in list(result_dict.keys()):
            active_request = self.active_request_pool[result_request_id]

            # If a request has completed put it into the completed request pool.
            if active_request.status == Status.COMPLETED:
                completed_request = self.active_request_pool.pop(result_request_id)
                self.completed_request_pool[result_request_id] = completed_request

        # If the active request pool is not full, add waiting requests in FIFO order
        while (
            len(self.active_request_pool) < self.max_batch_size
            and len(self.waiting_request_pool) > 0
        ):
            self.add_earliest_waiting_request_to_active_pool()

    def abort_request(
        self,
        request_id: int,
        *,
        exception: Optional[Union[BaseException, Type[BaseException]]] = None,
    ):
        """Cancels the given request"""
        stream = self.streams.get(request_id, None)
        if stream is not None:
            stream.finish(exception=exception)


================================================
FILE: megatron/core/inference/symmetric_memory.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

"""Lazy-initialized symmetric memory manager for inference.

Provides a registry of SymmetricMemoryBuffer instances keyed by a
user-supplied identifier (e.g. "tp", "ep").  Buffers are created on first
access so that callers never need to worry about initialization ordering
relative to the inference context.
"""

from __future__ import annotations

import operator
from functools import reduce
from typing import Optional

import torch

try:
    import torch.distributed._symmetric_memory as symm_mem

    HAVE_TORCH_SYMM_MEM = True
except ImportError:
    HAVE_TORCH_SYMM_MEM = False

try:
    import triton  # pylint: disable=unused-import

    HAVE_TRITON = True
except ImportError:
    HAVE_TRITON = False


class SymmetricMemoryBuffer:
    """
     symmetric memory buffer used in inference.
    This buffer is used by mcore-inference's low-latency
    NVLS all-gather and reduce-scatter collectives.
    """

    def __init__(self, size_in_mb, process_group):
        if not HAVE_TORCH_SYMM_MEM or not HAVE_TRITON:
            # This should be hit if the user is running an older
            # version of torch, or if they do not have triton
            # installed.
            self.symm_buffer = None
            self.symm_mem_hdl = None
        else:
            numel = int(size_in_mb * 1024 * 1024)  # size in bytes
            try:
                symm_mem.enable_symm_mem_for_group(process_group.group_name)
                self.symm_buffer = symm_mem.empty(numel, dtype=torch.uint8, device='cuda')
                self.symm_mem_hdl = symm_mem.rendezvous(self.symm_buffer, process_group)
            except RuntimeError as e:
                # If symmetric memory initialization fails, set buffer and handle to None
                # This should happen if the process group is not contained within NVlink
                self.symm_buffer = None
                self.symm_mem_hdl = None

    def _can_allocate(self, numel, dtype) -> bool:
        """
        Returns whether enough symmetric memory is available
        for the given tensor shape and dtype.
        """
        if self.symm_mem_hdl is None:
            return False
        size_of_dtype = torch.tensor([], dtype=dtype).element_size()
        required_len = numel * size_of_dtype
        return required_len <= self.symm_buffer.numel()

    def _allocate(self, numel, dtype) -> torch.Tensor:
        """
        Allocates a sub-tensor from the self.symm_buffer for the given numel and dtype"""
        required_bytes = numel * torch.tensor([], dtype=dtype).element_size()
        return self.symm_buffer[0:required_bytes].view(dtype).view(numel)

    def maybe_get_tensors(self, tensor_specs, alignment=16):
        """
        Pack multiple tensors contiguously in the symmetric buffer with alignment.

        Each tensor's starting offset is aligned to `alignment` bytes (default 16
        for 128-bit multimem access).

        Args:
            tensor_specs: list of (numel, dtype) tuples.
            alignment: byte alignment for each tensor's start offset (default 16).

        Returns:
            {"handle": None, "tensors": None} if unavailable or insufficient space.
            {"handle": symm_mem_hdl, "tensors": [(raw_byte_view, byte_offset), ...]}
            on success, where raw_byte_view is a uint8 slice of the buffer.
        """
        _NONE_RESULT = {"handle": None, "tensors": None}
        if self.symm_mem_hdl is None:
            return _NONE_RESULT

        # Compute aligned byte sizes and running offsets
        slices = []
        current_offset = 0
        for numel, dtype in tensor_specs:
            nbytes = numel * torch.tensor([], dtype=dtype).element_size()
            aligned_nbytes = ((nbytes + alignment - 1) // alignment) * alignment
            slices.append((current_offset, nbytes))
            current_offset += aligned_nbytes

        if not self._can_allocate(current_offset, torch.uint8):
            return _NONE_RESULT

        tensors = []
        for offset, nbytes in slices:
            tensors.append((self.symm_buffer[offset : offset + nbytes], offset))

        return {"handle": self.symm_mem_hdl, "tensors": tensors}

    def maybe_get_tensor(self, tensor_shape, dtype):
        """
        Returns (potentially) a sub-tensor from the self.symm_buffer for the given shape.
        If enough symmetric memory is not available, returns None.
        """
        if self.symm_mem_hdl is None:
            return {"tensor": None, "handle": None}
        numel = reduce(operator.mul, tensor_shape, 1)
        if not self._can_allocate(numel, dtype):
            return {"tensor": None, "handle": None}
        return {
            "tensor": self._allocate(numel, dtype).view(*tensor_shape),
            "handle": self.symm_mem_hdl,
        }


class SymmetricMemoryManager:
    """Registry of lazily-initialized symmetric memory buffers.

    Usage::

        buf = SymmetricMemoryManager.get_buffer("tp", process_group=tp_group)
        result = buf.maybe_get_tensor(shape, dtype)
    """

    _buffers: dict[str, SymmetricMemoryBuffer] = {}
    _default_size_mb: int = 256

    @classmethod
    def get_buffer(
        cls,
        key: str,
        process_group: Optional[torch.distributed.ProcessGroup] = None,
        size_mb: Optional[int] = None,
    ) -> SymmetricMemoryBuffer:
        """Return the buffer for *key*, creating it on first call.

        Args:
            key: Unique identifier (e.g. "tp", "ep").
            process_group: Required on the first call for a given key.
                Subsequent calls may omit it.
            size_mb: Buffer size in MiB (default 256).
        """
        if key not in cls._buffers:
            assert (
                process_group is not None
            ), f"SymmetricMemoryManager: process_group is required on first access for key='{key}'"
            cls._buffers[key] = SymmetricMemoryBuffer(
                size_in_mb=size_mb or cls._default_size_mb, process_group=process_group
            )
        return cls._buffers[key]

    @classmethod
    def destroy(cls, key: Optional[str] = None) -> None:
        """Destroy one or all buffers.

        Args:
            key: If provided, destroy only that buffer. Otherwise destroy all.
        """
        if key is not None:
            cls._buffers.pop(key, None)
        else:
            cls._buffers.clear()

    @classmethod
    def is_initialized(cls, key: str) -> bool:
        """Check whether a buffer has been created for *key*."""
        return key in cls._buffers


================================================
FILE: megatron/core/inference/text_generation_controllers/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/inference/text_generation_controllers/encoder_decoder_text_generation_controller.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from typing import Any, Dict, OrderedDict

import torch

from megatron.core.inference.inference_request import InferenceRequest
from megatron.core.inference.text_generation_controllers.text_generation_controller import (
    TextGenerationController,
)
from megatron.core.inference.utils import get_attention_mask


class EncoderDecoderTextGenerationController(TextGenerationController):
    """The text generation controller for encoder-decoder architecture

    This class inherits from TextGenerationController, adding features
    relating to encoder input encoder_prompt

    """

    def prep_inference_input(
        self,
        prompts_tokens: torch.Tensor,
        active_requests: OrderedDict[str, InferenceRequest],
        use_attention_mask: bool = False,
    ) -> Dict[str, Any]:
        """Preparing input data for inference, using respective wrapper's prep_inference_input method # pylint: disable=line-too-long

        Args:
            prompts_tokens (torch.Tensor): A tensor of shape [batch_size, max_sequence_length]
            active_requests (OrderedDict[str, InferenceRequest]): The input active requests
            use_attention_mask (bool): Whether to use an attention mask. Should be set to True only
                when exclusively doing prefill (no decode) with variable prompt lengths.

        Returns:
            A dict of the inference input for the current batch.
        """
        encoder_prompts = list(
            map(lambda request: request.encoder_prompt, active_requests.values())
        )

        inference_input = self.inference_wrapped_model.prep_inference_input(
            prompts_tokens, encoder_prompts, tokenizer=self.tokenizer
        )

        if use_attention_mask and (
            attention_mask := inference_input.get("attention_mask", None) is None
        ):
            inference_input["attention_mask"] = get_attention_mask(prompts_tokens.size(1))

        return inference_input


================================================
FILE: megatron/core/inference/text_generation_controllers/text_generation_controller.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import asyncio
import concurrent
import copy
import functools
import inspect
from collections import defaultdict
from typing import Any, Dict, List, Optional, OrderedDict, Tuple, Union

import torch
import torch.nn.functional as F
from torch import Tensor

from megatron.core import parallel_state
from megatron.core.inference.async_stream import AsyncStream
from megatron.core.inference.communication_utils import (
    broadcast_from_last_pipeline_stage,
    is_pipeline_last_stage,
)
from megatron.core.inference.contexts.dynamic_context import MaxSequenceLengthOverflowError
from megatron.core.inference.contexts.static_context import StaticInferenceContext
from megatron.core.inference.inference_request import InferenceRequest, Status
from megatron.core.inference.model_inference_wrappers.abstract_model_inference_wrapper import (
    AbstractModelInferenceWrapper,
)
from megatron.core.inference.sampling_params import SamplingParams
from megatron.core.inference.utils import get_attention_mask, set_decode_expert_padding
from megatron.core.models.multimodal.llava_model import LLaVAModel
from megatron.core.tensor_parallel.mappings import gather_from_sequence_parallel_region
from megatron.core.transformer.enums import CudaGraphScope
from megatron.core.transformer.moe.moe_layer import BaseMoELayer
from megatron.core.transformer.moe.router_replay import RouterReplay, RouterReplayAction
from megatron.core.transformer.utils import set_model_to_sequence_parallel
from megatron.core.utils import get_asyncio_loop, get_model_config, get_pg_size, unwrap_model

try:
    import transformer_engine as te  # pylint: disable=unused-import

    HAVE_TE = True

except ImportError:
    HAVE_TE = False

from megatron.core.inference.batch_dimensions_utils import InferenceBatchDimensions


# pylint: disable=line-too-long
class TextGenerationController:
    """The text generation controller (the main sampling loop)

    This class tokenizes the input, runs inference, samples from logits, and detokenizes the output.

    Args:
        inference_wrapped_model (AbstractModelInferenceWrapper): A model that
            is wrapped using the specs given in the abstract_model_inference_wrapper.py
        tokenizer (_type_): Tokenizer used for tokenizing and detokenizing the prompts
    """

    def __init__(self, inference_wrapped_model: AbstractModelInferenceWrapper, tokenizer):
        self.inference_wrapped_model = inference_wrapped_model
        self.model_config = self.inference_wrapped_model.model.config
        inference_config = self.inference_wrapped_model.inference_context.config
        self.tokenizer = tokenizer
        self.num_speculative_tokens = inference_config.num_speculative_tokens

        pg_collection = inference_config.pg_collection
        if pg_collection is not None:
            self.pp_group = pg_collection.pp
        else:
            self.pp_group = parallel_state.get_pipeline_model_parallel_group()

        self.model_is_pipeline_parallel = self.model_config.pipeline_model_parallel_size > 1

        # Use padded vocab size because tokenizer vocab size might pad to nearest power of 2.
        # TODO(ksanthanam): Consider deprecating this check if LLaVAModel is no longer used
        unwrapped_model = unwrap_model(self.inference_wrapped_model.model)
        if isinstance(unwrapped_model, LLaVAModel):
            self.vocab_size = unwrapped_model.language_model.vocab_size
        else:
            self.vocab_size = unwrapped_model.vocab_size

        self.sampling_rng = torch.Generator(device=torch.cuda.current_device())
        self.num_mtp_heads = self._get_mtp_num_heads()
        self.sampling_rng.manual_seed(self.model_config.inference_sampling_seed)

        if self.inference_wrapped_model.inference_context.is_dynamic_batching():
            self._init_dynamic_sampling_tensors()

    def _get_mtp_num_heads(self) -> int:
        """Get the number of MTP layers from the model config."""
        model = self.inference_wrapped_model.model
        if hasattr(model, 'config') and hasattr(model.config, 'mtp_num_layers'):
            return model.config.mtp_num_layers or 0
        return 0

    def set_stop_word_finished_ids_callback(self, callback):
        """Set a callback to get request IDs that should be marked as finished due to stop words.

        The callback should have signature: callback(active_request_ids: List[int]) -> Set[int]
        Returns a set of request IDs from active_request_ids that should be marked as finished.

        Args:
            callback: Function that returns request IDs to mark as finished.
        """
        self._get_stop_word_finished_ids_callback = callback

    def _init_dynamic_sampling_tensors(self):
        """Initialize tensors needed for dynamic sampling."""
        context = self.inference_wrapped_model.inference_context
        max_requests = context.max_requests

        # Callback to get request IDs that should be marked as finished due to stop words
        self._get_stop_word_finished_ids_callback = None

        device = torch.cuda.current_device()
        logits_dtype = self.inference_wrapped_model.config.params_dtype

        self._sampling_backend = "torch"
        self._sampled_tokens_cuda = torch.empty(max_requests, dtype=torch.int64, device=device)
        # Speculative tokens tensor will be allocated later when num_speculative_tokens is set by the engine
        self._accepted_tokens_per_request = None
        # MTP tensor will be allocated later when num_speculative_tokens is set by the engine
        self._sampled_mtp_tokens_cuda = None
        # Last accepted sequence indices for serial MTP computation
        self._last_accepted_seq_indices = None

        # Keep track of request metadata.
        self._request_metadata: Dict[str, Tensor] = {}
        for label, dtype, on_gpu in context.request_metadata_types:
            tensor = context.request_metadata[label]
            if not on_gpu:
                # Create pinned tensors for request metadata that lives on CPU.
                # This is metadata which requires D2H copies, such as top_k for torch sampling.
                tensor = torch.empty_like(tensor, device="cpu", pin_memory=True)
            self._request_metadata[label] = tensor

        # Used for inefficient torch sampling.
        if self._sampling_backend == "torch":
            self._torch_sampling_buckets: List[Tuple] = []

        self._init_mtp_sampling_tensor()

    def _init_mtp_sampling_tensor(self):
        """Initialize the MTP sampling tensor after num_speculative_tokens is set."""
        if self.num_speculative_tokens is not None and self.num_speculative_tokens > 0:
            context = self.inference_wrapped_model.inference_context
            max_requests = context.max_requests
            device = torch.cuda.current_device()
            self._sampled_mtp_tokens_cuda = torch.empty(
                [self.num_speculative_tokens, max_requests], dtype=torch.int64, device=device
            )
            self._accepted_tokens_per_request = (
                torch.ones(
                    [max_requests, self.num_speculative_tokens], dtype=torch.int64, device=device
                )
                * -1
            )

    @staticmethod
    def tokenize_prompt(tokenizer, prompt: str, add_BOS: bool = False) -> List[int]:
        """Utility to tokenize the input prompts.

        Args:
            tokenizer: The tokenizer to use.
            prompt (str): The input prompt.
            add_BOS (bool): Whether to add a BOS token.

        Returns:
            List[int]: Returns the tokenized prompt.
        """

        prompt_tokens = tokenizer.tokenize(prompt)

        if add_BOS:
            assert tokenizer.bos is not None

        while prompt_tokens and prompt_tokens[0] == tokenizer.bos:
            prompt_tokens.pop(0)

        if add_BOS:
            prompt_tokens = [tokenizer.bos] + prompt_tokens

        return prompt_tokens

    @staticmethod
    def detokenize(
        tokenizer, tokens: List[int], remove_EOD: bool = True, skip_special_tokens: bool = True
    ) -> str:
        """
        Detokenize a sequence of token IDs, optionally removing trailing EOD
        tokens and handling skip_special_tokens for different tokenizer APIs.

        Args:
            tokenizer: The tokenizer to use for detokenization.
            tokens (List[int]): The token IDs to convert back to text.
            remove_EOD (bool): Whether to remove trailing EOD tokens before
                detokenization. Defaults to True.
            skip_special_tokens (bool): Whether to remove special tokens (e.g. BOS/EOS)
                during detokenization. Only passed through if the tokenizer supports it.

        Returns:
            str: The detokenized string.
        """
        if remove_EOD and getattr(tokenizer, "eod", None) is not None:
            while tokens and tokens[-1] == tokenizer.eod:
                tokens = tokens[:-1]

        sig_params = inspect.signature(tokenizer.detokenize).parameters.values()
        detok_accepts_skip = any(
            p.name == "skip_special_tokens" or p.kind == inspect.Parameter.VAR_KEYWORD
            for p in sig_params
        )
        if detok_accepts_skip:
            return tokenizer.detokenize(tokens, skip_special_tokens=skip_special_tokens)
        else:
            return tokenizer.detokenize(tokens)

    def detokenize_generations(
        self,
        tokens_gpu_tensor: torch.Tensor,
        lengths_gpu_tensor: torch.Tensor,
        detokenize_segments: bool,
        skip_special_tokens: bool = True,
    ) -> tuple[str, Optional[List[List[str]]]]:
        """Detokenize the generated tokens.

        Args:
            tokens_gpu_tensor (torch.Tensor): Tensor containing the tokens
            lengths_gpu_tensor (torch.Tensor): Tensor containing the lengths of each sequence
            detokenize_segments (bool): If True, returns individually detokenized tokens. If False,
            returns None as second element. Helpful for understanding per-token boundaries in
            generated text.
            skip_special_tokens (bool): If True removes special tokens like bos
            during detokenization.

        Returns:
            tuple[str, List[str] | None]: A tuple containing:
            - str: The complete detokenized text
            - List[str] | None: List of segmented tokens if detokenize_segments is True, else None
        """
        # TODO(helenn): Unify with `detokenize_generations` from legacy textgen path

        if not detokenize_segments:
            tokens = tokens_gpu_tensor.tolist()
            return (
                self.detokenize(self.tokenizer, tokens, skip_special_tokens=skip_special_tokens),
                None,
            )

        prompts_plus_generations: List[str] = []
        prompts_plus_generations_segments: List[List[str]] = []
        tokens_gpu_tensor = torch.unsqueeze(tokens_gpu_tensor, 0)
        tokens = tokens_gpu_tensor.tolist()
        lengths = lengths_gpu_tensor.tolist()

        for sequence_tokens, length in zip(tokens, lengths):
            sequence_tokens = sequence_tokens[:length]
            detok_str = self.detokenize(self.tokenizer, sequence_tokens)
            prompts_plus_generations.append(detok_str)
            offsets = self.tokenizer.offsets(sequence_tokens, detok_str)
            words = [
                detok_str[start:end] for start, end in zip(offsets, offsets[1:] + [len(detok_str)])
            ]

            prompts_plus_generations_segments.append(words)

        text = self.detokenize(self.tokenizer, tokens[0], skip_special_tokens=skip_special_tokens)

        return text, prompts_plus_generations_segments

    def _torch_sampling_func(
        self,
        last_token_logits: torch.Tensor,
        temperature: float,
        top_k: int,
        top_p: float,
        vocab_size: Optional[int] = None,
    ):
        """Samples the logits to generate outputs

        Given the logits of the last token, this function samples it
        according to the parameters defined in sampling_params
        and returns the samples. If sampling parameters top_n_logprobs > 0
        at each step it also updates the top_n_logprobs dict.

        Args:
            last_token_logits (torch.Tensor): The last token logits. A tensor of
                size [batch_size, vocab_size].
            temperature (float): The temperature to use for sampling.
            top_k (int): The top-k value to use for sampling.
            top_p (float): The top-p value to use for sampling.
            vocab_size (int): Obtained from the tokenizer. Defaults to None.

        Returns:
            sampled_logits (torch.Tensor): 1D tensor with [batch_size] elements
        """
        assert isinstance(top_p, float)
        assert isinstance(top_k, int)
        assert not (top_k > 0 and top_p > 0.0), "Cannot have top-p and top-k both greater than zero"
        assert top_p <= 1.0, "top-p should be in (0,1]"

        def modify_logits_for_top_k_filtering(logits, top_k):
            """Set the logits for none top-k values to -inf."""
            filter_ = logits < torch.topk(logits, top_k)[0][..., -1, None]
            logits.masked_fill_(filter_, float("-Inf"))

        def modify_logits_for_top_p_filtering(logits, top_p):
            """Set the logits for none top-p values to -inf."""
            # First sort and calculate cumulative sum of probabilities.
            sorted_logits, sorted_indices = torch.sort(logits, descending=True)
            cumulative_probs = sorted_logits.softmax(dim=-1).cumsum(dim=-1)

            # Filteration based on the cumulative sum.
            filter_ = cumulative_probs > top_p
            # This shift by 1 is weird and I cannot justify it. This existed
            # in the original implementation:
            #   https://github.com/ari-holtzman/degen/blob/master/gen.py
            # and I guess it is needed so keeping it for now.
            # Clone needed: filter_[:, 1:] and filter_[:, :-1] are overlapping views;
            # without clone, each write would corrupt the next read during the shift.
            filter_[:, 1:] = filter_[:, :-1].clone()
            # Make sure we at least have one token to select from.
            filter_[..., 0] = 0

            # Fill in the filtered part
            filter_ = filter_.scatter(1, sorted_indices, filter_)
            logits.masked_fill_(filter_, float("-Inf"))

        # Greedy sampling
        if top_k == 1:
            sampled_logits = torch.argmax(last_token_logits, dim=-1)
        else:
            # Clone needed: .div_() and masked_fill_() below modify in-place,
            # which would mutate the caller's tensor without this clone.
            last_token_logits = last_token_logits.clone()
            if temperature != 1.0:
                last_token_logits.div_(temperature)
            if top_k > 1:
                assert top_k <= last_token_logits.size(1), "top-k is larger than logit size."
                if vocab_size:
                    assert top_k < vocab_size, "top-k is larger than vocab size."
                modify_logits_for_top_k_filtering(last_token_logits, top_k)

            elif top_p > 0.0:
                modify_logits_for_top_p_filtering(last_token_logits, top_p)

            # After filtering, we need to recalculate the distribution.
            probabilities = last_token_logits.softmax(dim=-1)

            sampled_logits = torch.multinomial(
                probabilities, num_samples=1, generator=self.sampling_rng
            ).view(-1)

            # If vocab size is provided, make sure the samples are in in the range [0, vocab-size).
            if vocab_size:
                sampled_logits = torch.clamp(sampled_logits, min=0, max=(vocab_size - 1))

        return sampled_logits

    def sample_from_logits(
        self,
        last_token_logits: torch.Tensor,
        sampling_params: Optional[SamplingParams] = None,
        vocab_size: Optional[int] = None,
        generation_started: Optional[torch.Tensor] = None,
        top_n_logprobs_dict: Dict[int, List[Dict[str, float]]] = None,
        logits: Optional[torch.Tensor] = None,
        **kwargs,
    ) -> torch.Tensor:
        """Samples the logits to generate outputs

        Given the logits of the last token, this function samples it
        according to the parameters defined in sampling_params
        and returns the samples. If sampling parameters top_n_logprobs > 0
        at each step it also updates the top_n_logprobs dict.

        Args:
            last_token_logits (torch.Tensor): The last token logits. A tensor of
                size [batch_size, vocab_size]
            sampling_params (SamplingParams): The parameters to use for inference.
            vocab_size (int): Obtained from the tokenizer. Defaults to None
            generation_started (torch.Tensor): A boolean tensor of shape [batch_size]. True
                            indicates the prompt at that index has started generating tokens.
            top_n_logprobs_dict (top_n_logprobs_dict): The dict to be updated

        Returns:
            sampled_logits (torch.Tensor): 1D tensor with [batch_size] elements
            top_n_logprobs_this_step (torch.return_types.topk): a topk tensor with values as logits
                and indices as the top k elements. None if sampling params top_n_logprobs is 0.
        """

        if kwargs.get("common_inference_params"):
            sampling_params = kwargs["common_inference_params"]

        if sampling_params.top_n_logprobs > 0:
            # NOTE : This thing can also be clubbed with where we compute log probs
            # when --return-log-probs is enabled. This is just more efficient
            assert generation_started is not None
            if logits is None:
                batch_size = last_token_logits.shape[0]
                last_token_log_probs = F.log_softmax(last_token_logits, dim=1).to(torch.float32)
                top_n_logits_this_step = torch.topk(
                    last_token_log_probs, k=sampling_params.top_n_logprobs
                )
                top_n_logprobs_this_step = top_n_logits_this_step.values.cpu()
                top_n_logprobs_indices = top_n_logits_this_step.indices.cpu()

                # If we skip prompt log_probs then we only append for generated tokens.
                # Otherwise we always append to the logprobs dict.
                if sampling_params.skip_prompt_log_probs:
                    mask = generation_started.cpu()
                else:
                    mask = torch.ones(batch_size, dtype=torch.bool)

                self._update_top_n_logprobs_dict(
                    top_n_logprobs_this_step, top_n_logprobs_indices, mask, top_n_logprobs_dict
                )
            else:
                assert not sampling_params.skip_prompt_log_probs

                # Compute the prompt logprobs
                batch_size, seq_length, _ = logits.shape
                log_probs = F.log_softmax(logits, dim=2).to(torch.float32)
                top_n_logits_this_step = torch.topk(log_probs, k=sampling_params.top_n_logprobs)

                # Move the token dimension to the front and then add each token logprobs
                # individually for every request in the batch
                top_n_logprobs_this_step = top_n_logits_this_step.values.permute(1, 0, 2).cpu()
                top_n_logprobs_indices = top_n_logits_this_step.indices.permute(1, 0, 2).cpu()

                # We append to the logprobs dict for every prompt token
                mask = torch.ones(batch_size, dtype=torch.bool)

                for i in range(seq_length):
                    self._update_top_n_logprobs_dict(
                        top_n_logprobs_this_step[i],
                        top_n_logprobs_indices[i],
                        mask,
                        top_n_logprobs_dict,
                    )

        top_p = sampling_params.top_p
        top_k = sampling_params.top_k
        temperature = sampling_params.temperature

        return self._torch_sampling_func(last_token_logits, temperature, top_k, top_p, vocab_size)

    def update_generation_status(
        self,
        updated_prompts_tokens: torch.Tensor,
        generation_started: torch.Tensor,
        current_context_end_position: int,
        is_generation_done_tensor: torch.Tensor,
        generated_sequence_lengths: torch.Tensor,
        termination_id: Optional[int] = None,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """Checks which prompts have reached an end condition

        We check which prompts have reached an end condition and set the corresponding
        flags of the is_generation_done_tensor to True. The generated sequence lengths
        increase as we keep generating, until that prompts hits an end condition. The
        generation_started tensor determines which prompts have started generating.

        Args:
            updated_prompts_tokens (torch.Tensor): The prompts tokens updated with the latest
                generated tokens. A tensor of shape [batch_size, max_seq_len]
                (i.e max_seq_len = max_prompt_len + tokens_to_generate)
            generation_started (torch.Tensor): A boolean tensor of shape [batch_size]. True
                indicates the prompt at that index has started generating tokens.
            current_context_end_position (int): An integer indicating which position to
                extract from the prompts tokens to get the latest generated tokens.
            is_generation_done_tensor (torch.Tensor): A boolean tensor of shape [batch_size].
                True indicates the prompt at that index has reached end condition.
            generated_sequence_lengths (torch.Tensor): A int tensor of shape [batch_size].
                Each value represents the generated sequence lengths for that prompt.

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: Returns the boolean
                is_generation_done_tensor and the generated_sequence_lengths after updating it
        """
        if termination_id is None:
            termination_id = self.tokenizer.eod
        latest_samples = updated_prompts_tokens[:, current_context_end_position]
        # Make sure we are checking eod criterion only for prompts that have started generating
        # (i.e) We only look at the generated tokenns and not the input tokens.
        reached_eod = (latest_samples == termination_id) & generation_started
        is_generation_done_tensor = is_generation_done_tensor | reached_eod
        # We increment generated sequence lengths when that prompt has not hit the
        # EOD and generation has started
        generated_sequence_lengths += ~is_generation_done_tensor & generation_started

        return is_generation_done_tensor, generated_sequence_lengths.int()

    def pad_input_prompt_tokens(
        self,
        batch_prompt_tokens_list: List[List[int]],
        padded_batch_size: int,
        padded_sequence_length: int,
    ) -> torch.Tensor:
        """Method to pad input prompts

        Given a list of prompts, pad them all to uniform length

        Args:
            batch_prompt_tokens_list (List[List[int]]): A list containing the prompt tokens
            padded_batch_size (int): The maximum number of requests for this batch
            padded_sequence_length (int): The maximum number of input + output tokens for this batch

        Returns:
            torch.Tensor: A torch tensor of shape [padded_batch_size, padded_sequence_length]
        """
        batch_size = len(batch_prompt_tokens_list)

        # Pad existing tokens to maximum sequence length
        for prompt_tokens in batch_prompt_tokens_list:
            padding_size = padded_sequence_length - len(prompt_tokens)
            prompt_tokens.extend([self.tokenizer.eod] * padding_size)

        # Pad to maximum batch size
        padded_prompt_tokens_list = batch_prompt_tokens_list
        num_padded_requests = padded_batch_size - len(batch_prompt_tokens_list)
        padded_prompt_tokens_list += [
            [self.tokenizer.eod] * padded_sequence_length for _ in range(num_padded_requests)
        ]

        tokens = torch.tensor(padded_prompt_tokens_list, device=torch.cuda.current_device())

        return tokens

    def unpad_input_prompt_tokens(
        self, padded_batch_prompt_tokens: torch.Tensor, original_batch_size: int
    ):
        """Truncates the given input tensor back to the original prompt size before padding.

        Args:
            padded_batch_prompt_tokens (torch.Tensor): The padded tokens tensor
            original_batch_size (int): The original batch size before padding
        """
        return padded_batch_prompt_tokens[:original_batch_size]

    def _dynamic_step_context_init(
        self,
        construct_graph_dimensions: Optional[InferenceBatchDimensions] = None,
        is_dummy_forward: bool = False,
    ):
        """Initializes the inference context for dynamic batching.

        Args:
            construct_graph_dimensions (Optional[InferenceBatchDimensions]): The graph config to use
                for constructing the cuda graphs.
            is_dummy_forward (bool): Whether we are running an expert parallel dummy forward pass

        Return:
            input_ids (Tensor): The active input IDs.
            position_ids (Tensor): The active position IDs.
        """
        context = self.inference_wrapped_model.inference_context
        active_request_slice = slice(context.paused_request_count, context.total_request_count)

        # Remove Float16Module wrapper if it exists
        unwrapped_model = unwrap_model(self.inference_wrapped_model.model)
        model_config = get_model_config(unwrapped_model)

        # Initialize attention state.
        context.initialize_attention_state(
            construct_graph_dimensions=construct_graph_dimensions,
            is_expert_parallel_dummy_cuda_graph_step=is_dummy_forward,
        )

        # If using symmetric kernels and we are using using nccl
        # for prefill turn off symmetric kernels
        symmetric_ar_type = self.model_config.symmetric_ar_type
        nccl_all_reduce_for_prefill = self.model_config.nccl_all_reduce_for_prefill
        # Turning on/off MoE padding for cuda-graphs
        moe_pad_experts_for_cuda_graph_inference = (
            self.model_config.moe_pad_experts_for_cuda_graph_inference
        )
        is_inference_optimized = self.model_config.transformer_impl == "inference_optimized"
        if is_inference_optimized:
            assert not moe_pad_experts_for_cuda_graph_inference, (
                "moe_pad_experts_for_cuda_graph_inference cannot be True when "
                "transformer_impl is 'inference_optimized'"
            )
        if moe_pad_experts_for_cuda_graph_inference:
            if context.using_cuda_graph_this_step():
                capacity_factor = model_config.num_moe_experts / model_config.moe_router_topk
                set_decode_expert_padding(unwrapped_model, True, capacity_factor=capacity_factor)
            else:
                set_decode_expert_padding(unwrapped_model, False)

        if nccl_all_reduce_for_prefill and symmetric_ar_type is not None:
            if context.is_decode_only():
                # Turn on symmetric all reduce when in decode mode
                unwrapped_model.set_symmetric_ar(symmetric_ar_type)
            else:
                # Turn off symmetric all reduces for prefill
                unwrapped_model.set_symmetric_ar(None)

        # Get request metadata for this step.
        for label, dtype, on_gpu in context.request_metadata_types:
            if not on_gpu:
                # We need a D2H copy from the context to the pinned memory buffer.
                self._request_metadata[label].copy_(
                    context.request_metadata[label], non_blocking=True
                )

        # Get flat tokens, position ids.
        # If we are running a dummy forward step we want to use the token count agreed upon
        # by all EP ranks rather than the minimum number of tokens.
        if construct_graph_dimensions is not None and not is_dummy_forward:
            return context.current_input_and_position_ids(
                num_warmup_tokens=construct_graph_dimensions.token_count
            )
        else:
            return context.current_input_and_position_ids()

    def _dynamic_step_forward_logits(self, input_ids: Tensor, position_ids: Tensor) -> Tensor:
        """Forward step the model to get logits for dynamic batching.

        This also handles logits-broadcasting for pipeline parallelism.

        Args:
            input_ids (Tensor): The input token IDs.
            position_ids (Tensor): The position IDs.
        """
        context = self.inference_wrapped_model.inference_context
        active_request_count = context.total_request_count - context.paused_request_count

        with torch.inference_mode():
            logits = self.inference_wrapped_model.run_one_forward_step(
                {"tokens": input_ids, "position_ids": position_ids, "attention_mask": None}
            )
            # logits shape: [1, seq_len, vocab_size]

        # Note: When speculative decoding is active (num_speculative_tokens > 0),
        # the model skips MTP computation during the forward pass. MTP logits
        # will be computed serially after verification to ensure they are
        # conditioned on verified tokens only.

        if self.model_is_pipeline_parallel:
            if context.config.materialize_only_last_token_logits:
                logits_seq_len = active_request_count
            else:
                logits_seq_len = input_ids.shape[1]
            logits_shape = [1, logits_seq_len, self.vocab_size]

            if is_pipeline_last_stage(self.pp_group):
                assert logits is not None and torch.Size(logits_shape) == logits.shape

            logits = broadcast_from_last_pipeline_stage(
                logits_shape,
                dtype=self.model_config.params_dtype,
                tensor=logits,
                pp_group=self.pp_group,
            )

        return logits

    def _dynamic_step_sample_bookkeeping(self):
        """Perform bookkeeping necessary to sample logits for dynamic batching."""
        context = self.inference_wrapped_model.inference_context
        active_request_slice = slice(context.paused_request_count, context.total_request_count)

        if self._sampling_backend == "torch":
            # Bucketize the core sampling parameters.
            # Doing so via list comprehension is orders of magnitude faster than via torch.
            bucket_map = defaultdict(list)

            # Shorthands for the dictionary comprehension.
            temp = self._request_metadata["temperature"][active_request_slice].tolist()
            top_k = self._request_metadata["top_k"][active_request_slice].tolist()
            top_p = self._request_metadata["top_p"][active_request_slice].tolist()

            for request_index, (t, k, p) in enumerate(zip(temp, top_k, top_p)):
                sampling_params = (t, k, p)
                bucket_map[sampling_params].append(request_index)

            # Just unpack the key directly!
            self._torch_sampling_buckets = [
                (indices, *sampling_params) for sampling_params, indices in bucket_map.items()
            ]

    def _rewind_kv_cache(self):
        """Update the KV cache bookkeeping for speculative decoding.

        After forward pass with speculative tokens, some tokens may be rejected.
        This function "rewinds" the KV cache bookkeeping to reflect only the accepted tokens.

        When speculative tokens are rejected, we need to:
        1. Update request_kv_length_offsets (total sequence length)
        2. Update request_last_kv_block_offset (position within last block)
        3. If rewinding crosses a block boundary:
           - Reduce request_kv_block_counts
           - Update request_last_kv_block_id to point to the previous block
           - Clear the entry in request_to_kv_block_ids for the released block
           - Release the block back to the allocator
        """
        context = self.inference_wrapped_model.inference_context
        active_request_count = context.total_request_count - context.paused_request_count
        active_request_slice = slice(context.paused_request_count, context.total_request_count)

        # Get the accepted token counts for each request
        # Note: _accepted_token_counts is indexed from 0 to active_request_count-1
        accepted_tokens_per_request = self._accepted_token_counts_per_request[:active_request_count]

        # Number of tokens to rewind (rejected speculative tokens)
        num_tokens_to_rewind = self.num_speculative_tokens - accepted_tokens_per_request

        # For prefill requests, no speculative tokens were forwarded through the model,
        # so there is nothing to rewind.
        request_in_prefill_status = context.request_in_prefill_status_tensor[active_request_slice]
        num_tokens_to_rewind[request_in_prefill_status == 1] = 0

        # Save the original offset BEFORE modifying to correctly detect block boundary crossing
        original_offset = context.request_last_kv_block_offset[active_request_slice].clone()

        # Check which requests need to rewind to a previous block BEFORE modifying
        # A request crosses back to a previous block if: original_offset - num_tokens_to_rewind < 0
        remove_allocated_blocks_mask = (original_offset - num_tokens_to_rewind) < 0

        # Update the offsets
        context.request_last_kv_block_offset[active_request_slice] = (
            original_offset - num_tokens_to_rewind
        ) % context.block_size_tokens

        context.request_kv_length_offsets[active_request_slice] = (
            context.request_kv_length_offsets[active_request_slice] - num_tokens_to_rewind
        )

        # No need to update request_query_lengths (It will be set correctly in the next iteration)

        # For requests that crossed back to a previous block, we need to:
        # 1. Reduce the block count by 1
        # 2. Get the block ID to release (current request_last_kv_block_id)
        # 3. Update request_last_kv_block_id to point to the previous block
        # 4. Clear the entry in request_to_kv_block_ids for the released block
        # 5. Release the block back to the allocator
        if remove_allocated_blocks_mask.any():
            # Get indices of requests that need to release a block (relative to active requests)
            requests_needing_release = torch.nonzero(remove_allocated_blocks_mask, as_tuple=True)[0]
            # Convert to absolute indices in the context tensors
            absolute_indices = requests_needing_release + context.paused_request_count

            # No clone needed: advanced (fancy) indexing with a tensor already returns
            # a copy, not a view.
            blocks_to_release = context.request_last_kv_block_id[absolute_indices]

            # Reduce block counts for requests that crossed back
            context.request_kv_block_counts[absolute_indices] -= 1

            # Get the new block counts after decrement
            new_block_counts = context.request_kv_block_counts[absolute_indices]

            # Update request_last_kv_block_id to point to the previous block
            # and clear the released block entry in request_to_kv_block_ids
            # Vectorized implementation using advanced indexing:
            # Note: new_block_counts is guaranteed to be > 0 for all requests here, since
            # crossing back to a previous block implies the request had at least 2 blocks.

            # Update request_last_kv_block_id to point to the previous block (at index new_count - 1)
            context.request_last_kv_block_id[absolute_indices] = context.request_to_kv_block_ids[
                absolute_indices, new_block_counts - 1
            ]

            # Clear the released block entry (at index new_count, which was the old last block)
            context.request_to_kv_block_ids[absolute_indices, new_block_counts] = -1

            # Release the blocks back to the allocator
            context.kv_block_allocator.release_memory_blocks(blocks_to_release)

        # Mamba speculative rewind state update
        if context.is_hybrid_model:
            active_mamba_indices = context.mamba_metadata.request_to_mamba_state_idx[
                active_request_slice
            ]
            is_decode_mask = context.request_in_prefill_status_tensor[active_request_slice] == 0
            decode_mamba_indices = active_mamba_indices[is_decode_mask]
            accepted_tokens_per_decode_request = accepted_tokens_per_request[is_decode_mask]

            if decode_mamba_indices.numel() > 0:
                context.mamba_conv_states[:, decode_mamba_indices] = (
                    context.mamba_intermediate_conv_states[
                        :, decode_mamba_indices, accepted_tokens_per_decode_request
                    ]
                )
                context.mamba_ssm_states[:, decode_mamba_indices] = (
                    context.mamba_intermediate_ssm_states[
                        :, decode_mamba_indices, accepted_tokens_per_decode_request
                    ]
                )

    def _sample_from_logits_2d(self, logits_2d: Tensor) -> Tensor:
        """Sample tokens from 2D logits using existing sampling parameters.

        Args:
            logits_2d (Tensor): Logits of shape [num_requests, vocab_size].

        Returns:
            Tensor: Sampled tokens of shape [num_requests].
        """
        spec_token_list = []
        indices_list = []
        for request_indices, temp, top_k, top_p in self._torch_sampling_buckets:
            request_indices_tensor = torch.tensor(
                request_indices, device=logits_2d.device, dtype=torch.long
            )
            spec_token_list.append(
                self._torch_sampling_func(logits_2d[request_indices_tensor, :], temp, top_k, top_p)
            )
            indices_list.append(request_indices_tensor)

        spec_tokens = torch.empty(logits_2d.shape[0], device=logits_2d.device, dtype=torch.int64)
        for tokens, indices in zip(spec_token_list, indices_list):
            spec_tokens[indices] = tokens
        return spec_tokens

    def _compute_serial_mtp_and_sample(self):
        """Compute MTP logits serially after verification and sample speculative tokens.

        This ensures that MTP predictions are always conditioned on verified tokens.
        Each MTP depth receives the correctly sampled token from the previous depth
        (or the base token for depth 0) rather than stale speculative tokens from
        the previous step.
        """
        context = self.inference_wrapped_model.inference_context
        active_request_count = context.total_request_count - context.paused_request_count
        active_slice = slice(context.paused_request_count, context.total_request_count)

        unwrapped_model = unwrap_model(self.inference_wrapped_model.model)

        # On non-last pipeline stages, the model won't have decoder hidden states.
        has_mtp = is_pipeline_last_stage(self.pp_group) and hasattr(
            unwrapped_model, '_decoder_hidden_states_cache'
        )

        if has_mtp:
            # Get decoder hidden states at last accepted positions.
            hidden_states = unwrapped_model._decoder_hidden_states_cache
            last_accepted_hidden = hidden_states[self._last_accepted_seq_indices, :, :]
            # Shape: [active_request_count, 1, hidden_size]
        else:
            last_accepted_hidden = None

        # Compute position IDs for the next tokens.
        # After rewind, request_kv_length_offsets has been adjusted. The actual
        # KV cache length is: adjusted_offset + processed_tokens.
        # The next position to predict starts at that cache length.
        adjusted_offsets = context.request_kv_length_offsets[active_slice]
        processed_tokens = context.request_query_lengths[active_slice]
        base_position = adjusted_offsets + processed_tokens

        # Start with the freshly sampled base token.
        next_token_ids = self._sampled_tokens_cuda[:active_request_count].clone()
        current_hidden = last_accepted_hidden if has_mtp else None

        num_depths = min(self.num_speculative_tokens, self.num_mtp_heads)
        for depth in range(num_depths):
            position_ids = (base_position + depth).unsqueeze(0)  # [1, active_request_count]
            token_ids = next_token_ids.unsqueeze(0)  # [1, active_request_count]

            mtp_logits_2d = None
            if has_mtp:
                current_hidden, mtp_logits = unwrapped_model.compute_mtp_single_step(
                    hidden_states=current_hidden,
                    next_token_ids=token_ids,
                    position_ids=position_ids,
                    depth=depth,
                )
                # mtp_logits: [active_request_count, 1, vocab_size]
                mtp_logits_2d = mtp_logits.squeeze(1)  # [active_request_count, vocab_size]

            # Broadcast MTP logits across pipeline stages.
            if self.model_is_pipeline_parallel:
                mtp_logits_2d = broadcast_from_last_pipeline_stage(
                    [active_request_count, self.vocab_size],
                    dtype=self.model_config.params_dtype,
                    tensor=mtp_logits_2d,
                    pp_group=self.pp_group,
                )

            # Sample speculative token using the same sampling parameters.
            spec_tokens = self._sample_from_logits_2d(mtp_logits_2d)
            self._sampled_mtp_tokens_cuda[depth, :active_request_count] = spec_tokens

            # Use sampled token as input for the next depth.
            next_token_ids = spec_tokens

        # Clean up cached hidden states.
        if has_mtp:
            del unwrapped_model._decoder_hidden_states_cache

    def _get_required_logit_indices(
        self,
        request_in_prefill_status_tensor: Tensor,
        request_query_lengths: Tensor,
        num_decode_requests: int,
        num_prefill_requests: int,
        device: torch.device,
    ) -> Tensor:
        """Get indices into the logits tensor for tokens that need sampling.

        For decode requests, all tokens (base + speculative) are needed.
        For prefill requests, only the last token logits are needed.
        Decode requests will always be on the left, followed by prefill requests.

        Example with 5 requests (2 spec tokens):
            Assume input ids :                  [ a5  a6s  a7s |  b3    b4s  b5s   |  c6   c7s   c8s   |  d1    d2   | e1    e2    e3   e4]
            Request to prefill                  [    0         |         0         |          0        |    1        |         1          ]
            Request query lengths               [    3         |         3         |          3        |    2        |         4          ]
            OUTPUT : required_logit_indices     [ 0    1    2  |  3     4     5    |  6     7     8    |      10     |         14         ]

        Returns:
            Tensor: Indices into the sequence dimension of the logits tensor.
        """
        decode_request_indices = torch.arange(
            num_decode_requests * (self.num_speculative_tokens + 1), device=device
        )
        prefill_request_indices = (
            request_query_lengths.cumsum(dim=0)[request_in_prefill_status_tensor == 1] - 1
        )  # Last token indices for prefill requests
        required_logit_indices = torch.cat([decode_request_indices, prefill_request_indices])
        assert (
            len(required_logit_indices)
            == num_decode_requests * (self.num_speculative_tokens + 1) + num_prefill_requests
        ), (
            f"Expected length of required_logit_indices to be "
            f"num_decode_requests * (self.num_speculative_tokens + 1) + num_prefill_requests, "
            f"but got {len(required_logit_indices)} for num_decode_requests {num_decode_requests} "
            f"and num_prefill_requests {num_prefill_requests}"
        )
        return required_logit_indices

    def _sample_speculative_logits(
        self, required_logits: Tensor, request_in_prefill_status_tensor: Tensor
    ) -> tuple:
        """Sample tokens from logits using sampling buckets.

        For torch sampling buckets: [request_indices, temp, top_k, top_p]

        Example with 5 requests:
            token_to_request_idx :              [ 0    0     0  |  1     1     1     |  2     2     2     |   3    |   4  ]
            required_logits :                   [ a5l  a6l  a7l |  b3l    b4l  b5l   |  c6l   c7l   c8l   |  d2l   | e4l  ]  # Shape [11, vocab_size]

            Sampling buckets: [[[0,2], temp1, top_k1, top_p1], [[1], temp3, top_k3, top_p3], [[3, 4], temp2, top_k2, top_p2]]

            Final output tokens : [a5s  a6s  a7s  c6s  c7s  c8s  b3s  b4s  b5s  d2s  e4s]  # Shape [11]
            (Rearranged from sampling bucket order back to input order using token_order)

        Returns:
            tuple: (output_tokens, repeats) where output_tokens has shape [total_required_tokens]
        """
        repeats = torch.where(
            request_in_prefill_status_tensor == 0, 1 + self.num_speculative_tokens, 1
        )
        token_to_request_index = torch.repeat_interleave(
            torch.arange(
                len(request_in_prefill_status_tensor),
                device=request_in_prefill_status_tensor.device,
            ),
            repeats,
        )

        output_tokens_jumbled_list = []
        token_order_list = []

        for request_indices, temp, top_k, top_p in self._torch_sampling_buckets:
            request_indices_tensor = torch.tensor(
                request_indices, device=token_to_request_index.device
            )
            required_indices = torch.where(
                torch.isin(token_to_request_index, request_indices_tensor)
            )[0]
            output_tokens_jumbled_list.append(
                self._torch_sampling_func(required_logits[required_indices, :], temp, top_k, top_p)
            )
            token_order_list.append(required_indices)

        output_tokens_jumbled = torch.cat(output_tokens_jumbled_list, dim=0)
        output_tokens = torch.empty(
            len(output_tokens_jumbled),
            device=output_tokens_jumbled.device,
            dtype=output_tokens_jumbled.dtype,
        )
        token_order = torch.cat(token_order_list, dim=0)
        # Rearrange output tokens from sampling_bucket request order back to input ids order
        output_tokens[token_order] = output_tokens_jumbled

        return output_tokens, repeats

    def _verify_speculative_tokens(
        self,
        output_tokens: Tensor,
        input_tokens_required: Tensor,
        request_in_prefill_status_tensor: Tensor,
        repeats: Tensor,
        num_decode_requests: int,
        num_prefill_requests: int,
        active_request_count: int,
    ) -> tuple:
        """Verify speculative tokens against input tokens and compute acceptance.

        Creates an accepted tokens mask where:
        - For prefill requests, the token is always accepted.
        - For decode requests, the first token (base token) is always accepted, then we compare
          sampled tokens with input tokens and accept consecutive matches.
        Then finds the index of the last accepted token per request.

        Example (assume 1, 2, and 0 spec tokens are accepted in the first 3 decode requests):
            input_tokens_required:              [ a5  a6s  a7s |  b3    b4s  b5s   |  c6   c7s   c8s   |     d2      |         e4         ]  # Size 11
            Output tokens                       [ a6o a7o  a8o |  b40   b5o  b6o   |  c7o  c8o   c9o   |     d3o     |         e5o        ]
            Output tokens right shift           [ d3o a6o  a7o |  a8o   b40  b5o   |  b6o  c7o   c8o   |     c9o     |         d3o        ]
            Accepted tokens  mask               [  1   1    0  |  1      1    1    |   1    0     0    |      1      |         1          ]
            Last one indices                    [      1       |         5         |        6          |      9      |         10         ]

        Returns:
            tuple: (last_one_indices, accepted_tokens_mask, input_tokens_required) where
                last_one_indices contains the index of the last accepted token per request.
        """
        if input_tokens_required.ndim == 2:
            assert (
                input_tokens_required.shape[0] == 1
            ), f"Expected input_tokens_required to have 1 row, but got {input_tokens_required.shape}"
            input_tokens_required = input_tokens_required.squeeze(0)

        # Initialize mask with False to prevent boundary bleed
        accepted_tokens_mask = torch.zeros_like(input_tokens_required, dtype=torch.bool)

        # Make all prefill tokens accepted
        token_to_prefill_idx = torch.repeat_interleave(request_in_prefill_status_tensor, repeats)
        accepted_tokens_mask[token_to_prefill_idx == 1] = True

        # Safe decode token verification without cross-batch boundary contamination
        decode_mask_2d = None
        if num_decode_requests > 0:
            decode_len = num_decode_requests * (self.num_speculative_tokens + 1)

            decode_inputs = input_tokens_required[:decode_len].reshape(
                num_decode_requests, self.num_speculative_tokens + 1
            )
            decode_outputs = output_tokens[:decode_len].reshape(
                num_decode_requests, self.num_speculative_tokens + 1
            )

            # Shift outputs right by 1 *within* each request to align sampled tokens with input targets
            decode_outputs_shifted = decode_outputs.roll(1, dims=1)
            decode_mask_2d = decode_inputs == decode_outputs_shifted
            # The first token (base token) is always accepted
            decode_mask_2d[:, 0] = True
            # Enforce consecutive acceptance: cummin propagates False to the right
            decode_mask_2d = decode_mask_2d.cummin(dim=1).values
            accepted_tokens_mask[:decode_len] = decode_mask_2d.flatten()

        last_one_indices = torch.full(
            (active_request_count,), -1, device=input_tokens_required.device
        )

        if num_decode_requests > 0:
            # Summing the consecutive mask gives the count; subtract 1 for the local index
            local_last_indices = decode_mask_2d.sum(dim=1) - 1
            row_offsets = torch.arange(num_decode_requests, device=last_one_indices.device) * (
                self.num_speculative_tokens + 1
            )
            last_one_indices[:num_decode_requests] = row_offsets + local_last_indices

        if num_prefill_requests > 0:
            decode_len = num_decode_requests * (self.num_speculative_tokens + 1)
            prefill_valid = (
                torch.nonzero(accepted_tokens_mask[decode_len:]).squeeze(-1) + decode_len
            )
            last_one_indices[num_decode_requests:] = prefill_valid

        return last_one_indices, accepted_tokens_mask, input_tokens_required

    def _dynamic_step_sample_logits_and_verify_tokens(self, logits: Tensor, input_ids: Tensor):
        """
        Sample tokens from logits for dynamic batching with speculative tokens and verify the tokens.
        """
        context = self.inference_wrapped_model.inference_context
        active_request_count = context.total_request_count - context.paused_request_count

        request_in_prefill_status_tensor = context.request_in_prefill_status_tensor[
            context.paused_request_count : context.total_request_count
        ]
        request_query_lengths = context.request_query_lengths[
            context.paused_request_count : context.total_request_count
        ]

        num_prefill_requests = request_in_prefill_status_tensor.sum().item()
        num_decode_requests = active_request_count - num_prefill_requests

        # Get the logit indices for tokens that need sampling.
        required_logit_indices = self._get_required_logit_indices(
            request_in_prefill_status_tensor,
            request_query_lengths,
            num_decode_requests,
            num_prefill_requests,
            logits.device,
        )

        required_logits = logits.squeeze(0)[
            required_logit_indices, :
        ]  # Shape [num_required, vocab_size]

        # Sample tokens from logits
        output_tokens, repeats = self._sample_speculative_logits(
            required_logits, request_in_prefill_status_tensor
        )

        # Verify speculative tokens against input tokens.
        input_tokens_required = input_ids[0, required_logit_indices]
        last_one_indices, accepted_tokens_mask, input_tokens_required = (
            self._verify_speculative_tokens(
                output_tokens,
                input_tokens_required,
                request_in_prefill_status_tensor,
                repeats,
                num_decode_requests,
                num_prefill_requests,
                active_request_count,
            )
        )

        # Store the final sampled tokens for the next forward pass.
        final_sampled_tokens = output_tokens[last_one_indices]
        self._sampled_tokens_cuda[: len(final_sampled_tokens)] = final_sampled_tokens

        # Store the last accepted positions in the packed sequence for serial
        # MTP computation after verification.
        self._last_accepted_seq_indices = required_logit_indices[last_one_indices]

        # Extract accepted tokens and counts for decode requests.
        # For prefill it is always set to 1. For decode, the first token is always accepted,
        # then we compare with input tokens and accept the next tokens if its a match.
        #
        # Example (continuing from above):
        #   input_tokens_required:              [ a5  a6s  a7s |  b3    b4s  b5s   |  c6   c7s   c8s   |     d2      |         e4         ]
        #   Accepted tokens  mask               [  1   1    0  |  1      1    1    |   1    0     0    |      1      |         1          ]
        #   Accepted tokens                     [   [a6s  -1]  |     [b4s  b5s]    |     [-1  -1]      ]  # Only decode requests (prefill defaults to -1)
        #   Accepted token counts               [      1       |         2         |         0         ]  # Prefill defaults to 0
        input_tokens_required[accepted_tokens_mask == 0] = -1  # Mask out non-accepted tokens
        input_tokens_decode_mode = input_tokens_required[
            : num_decode_requests * (self.num_speculative_tokens + 1)
        ]
        input_tokens_reshaped = input_tokens_decode_mode.reshape(
            -1, self.num_speculative_tokens + 1
        )  # shape: [num_decode_requests, num_speculative_tokens + 1]

        # Skip the first token of every decode request (i.e a5, b3, c6)
        accepted_tokens = input_tokens_reshaped[:, 1:]
        self._accepted_tokens_per_request[: accepted_tokens.shape[0], :] = accepted_tokens
        self._accepted_token_counts_per_request = (self._accepted_tokens_per_request != -1).sum(
            dim=1
        )

    def _dynamic_step_sample_logits(self, logits: Tensor):
        """Sample tokens from logits for dynamic batching.

        Args:
            logits (Tensor): The logits from the forward pass.
        """
        # TODO(ksanthanam): Evaluate whether it makes more sense to sample on 1 rank
        # and then broadcast the sampled tokens rather than broadcasting the raw logits.

        # Last token logits.
        context = self.inference_wrapped_model.inference_context
        if context.config.materialize_only_last_token_logits:
            # When materialize_only_last_token_logits is true, last_token_logits is
            # already called in the forward pass of GPT.
            required_token_logits = logits.squeeze(0)
        else:
            # todo : Should do verification here and get approrpiate las token logits
            required_token_logits = context.last_token_logits(logits)

        if self._sampling_backend == "torch":
            # Concatenate the outputs once to prevent repeated small writes.
            token_list = []
            indices_list = []

            # e.g torch sample buckets will be
            # i.e (for all unique comibnation of t, topk, topk what are the associated
            # requests indices (based on the active slices)
            # [ [req at index 0, req at index 2], t1, topk1, topp1 ]]
            # [ [req at index 1, req at index 3, req at index 4] , t2, topk2, topp2]
            for indices, temp, top_k, top_p in self._torch_sampling_buckets:
                token_list.append(
                    self._torch_sampling_func(required_token_logits[indices, :], temp, top_k, top_p)
                )
                indices_list.append(torch.tensor(indices))

            # Single write to the output tensor.
            sampled_tokens = torch.cat(token_list, dim=0)
            sampled_indices = torch.cat(indices_list, dim=0)

            self._sampled_tokens_cuda[sampled_indices] = sampled_tokens

    def _dynamic_step_log_probs_bookkeeping(self) -> Tuple[bool, bool]:
        """Perform bookkeeping necessary to compute log probs for dynamic batching.

        Returns:
            return_log_probs (bool): Whether to return the sampled log_probs.
        """
        context = self.inference_wrapped_model.inference_context
        active_request_slice = slice(context.paused_request_count, context.total_request_count)

        return_log_probs = self._request_metadata["return_log_probs"][active_request_slice]
        top_n_log_probs = self._request_metadata["top_n_logprobs"][active_request_slice] > 0

        return return_log_probs.any(), top_n_log_probs.any()

    def _router_record_bookkeeping(self) -> Optional[Dict[int, Tensor]]:
        """Collect and map routing indices per request for MoE router recording.

        This method retrieves recorded routing decisions and maps them to individual
        requests using the context's request_ids and query_lengths. Uses the context's
        routing_metadata when available (which handles CUDA graph static buffers automatically).
        Must be called while context attributes are still valid (before request transitions).

        Returns:
            Optional[Dict[int, Tensor]]: A dictionary mapping request_id to a tensor of
                shape [num_tokens, num_layers, topk]. Returns None if routing replay is
                disabled or no routing data was recorded.
        """
        config = self.inference_wrapped_model.model.config
        if not config.moe_enable_routing_replay:
            return None

        # Get routing indices - use routing_metadata if available (handles CUDA graph static buffers)
        context = self.inference_wrapped_model.inference_context
        if context.moe_routing_metadata is None:
            return None

        stacked_routing = context.moe_routing_metadata.get_routing_indices()

        if stacked_routing is None:
            return None

        # Get active request info from context
        active_request_slice = slice(context.paused_request_count, context.total_request_count)
        active_request_ids = context.request_ids[active_request_slice].tolist()
        active_query_lengths = context.request_query_lengths[active_request_slice].tolist()
        active_token_count = context.active_token_count

        # Get TP group for all-gather if using sequence parallelism
        # With sequence parallelism, each TP rank only sees a portion of the tokens,
        # so we need to gather routing indices across all TP ranks.
        tp_group = self.inference_wrapped_model.tp_group
        tp_size = get_pg_size(tp_group)

        # All-gather across TP group if using sequence parallelism (tp_size > 1)
        if tp_size > 1 and get_model_config(self.inference_wrapped_model.model).sequence_parallel:
            # gather_from_sequence_parallel_region gathers along dim 0
            # [local_token_count, num_layers, topk] -> [global_token_count, num_layers, topk]
            stacked_routing = gather_from_sequence_parallel_region(stacked_routing, group=tp_group)

        # Slice to real tokens (remove CUDA padding)
        stacked_routing = stacked_routing[:active_token_count]

        # Split by request along token dimension
        # stacked_routing has shape [active_token_count, num_layers, topk]
        routing_splits = stacked_routing.split(active_query_lengths, dim=0)

        # Map to request IDs
        routing_indices_per_request = {}
        for req_id, routing_split in zip(active_request_ids, routing_splits):
            # routing_split has shape [num_tokens_for_request, num_layers, topk]
            routing_indices_per_request[req_id] = routing_split

        return routing_indices_per_request

    def _dynamic_step_calculate_log_probs(self, logits: Tensor) -> Optional[Tensor]:
        """Calculate log probs from logits."""
        context = self.inference_wrapped_model.inference_context
        active_request_count = context.total_request_count - context.paused_request_count

        return context.calculate_log_probs(
            logits,
            self._sampled_tokens_cuda[:active_request_count],
            only_last_token_logits=context.config.materialize_only_last_token_logits,
        )

    def _dynamic_step_calculate_log_probs_speculative(
        self, logits: Tensor
    ) -> Tuple[List[List[float]], Tensor]:
        """Calculate log probs from logits for speculative decoding.

        For decode requests, computes log probs for each accepted speculative token
        and the newly sampled token using the main model logits. For prefill requests,
        handles prompt log probs the same way as non-speculative decoding.

        The main model logits at position j predict the token at position j+1. So:
        - log_prob(accepted_token[j]) comes from logits at position j
        - log_prob(newly_sampled_token) comes from logits at position accepted_count

        Args:
            logits (Tensor): The main model logits [1, seq_len, vocab_size].

        Returns:
            Tuple of (log_probs_list, log_probs_tensor):
                log_probs_list: List of lists, one per active request, containing
                    log probs for the tokens emitted in this step.
                log_probs_tensor: Full log_softmax tensor for top-n computation.
        """
        context = self.inference_wrapped_model.inference_context
        active_request_count = context.total_request_count - context.paused_request_count

        request_in_prefill_status_tensor = context.request_in_prefill_status_tensor[
            context.paused_request_count : context.total_request_count
        ]
        request_query_lengths = context.request_query_lengths[
            context.paused_request_count : context.total_request_count
        ]

        num_prefill_requests = request_in_prefill_status_tensor.sum().item()
        num_decode_requests = active_request_count - num_prefill_requests

        logits_squeezed = logits.squeeze(0).float()
        log_probs_tensor = F.log_softmax(logits_squeezed[: context.active_token_count], dim=-1)

        log_probs_list_decode = []

        if num_decode_requests > 0:
            decode_len = num_decode_requests * (self.num_speculative_tokens + 1)
            decode_log_probs = log_probs_tensor[:decode_len].reshape(
                num_decode_requests, self.num_speculative_tokens + 1, -1
            )
            accepted_counts = self._accepted_token_counts_per_request[:num_decode_requests]

            # Build a [num_decode, num_spec+1] token ID matrix for gathering.
            # Columns 0..num_spec-1 hold accepted speculative tokens (clamped to 0
            # where rejected, since those positions will be masked out).
            # At column accepted_count[i], place the newly sampled token.
            gather_tokens = torch.zeros(
                num_decode_requests,
                self.num_speculative_tokens + 1,
                device=logits.device,
                dtype=torch.long,
            )
            gather_tokens[:, : self.num_speculative_tokens] = self._accepted_tokens_per_request[
                :num_decode_requests
            ].clamp(min=0)
            gather_tokens[
                torch.arange(num_decode_requests, device=logits.device), accepted_counts
            ] = self._sampled_tokens_cuda[:num_decode_requests]

            # Gather: [num_decode, num_spec+1]
            gathered_log_probs = decode_log_probs.gather(2, gather_tokens.unsqueeze(-1)).squeeze(-1)

            log_probs_list_decode = [
                gathered_log_probs[i, : accepted_counts[i].item() + 1].tolist()
                for i in range(num_decode_requests)
            ]

        log_probs_list_prefill = []
        if num_prefill_requests > 0:
            decode_len = num_decode_requests * (self.num_speculative_tokens + 1)
            prefill_log_probs = log_probs_tensor[decode_len:]

            prefill_token_ids = context.token_to_input_ids[
                decode_len : context.active_token_count
            ].roll(-1, 0)
            prefill_query_lengths = request_query_lengths[request_in_prefill_status_tensor == 1]
            new_token_idx = prefill_query_lengths.cumsum(0) - 1
            prefill_new_tokens = self._sampled_tokens_cuda[num_decode_requests:active_request_count]
            prefill_token_ids[new_token_idx] = prefill_new_tokens

            prefill_token_count = context.active_token_count - decode_len
            seq_idx = torch.arange(prefill_token_count, device=logits.device)
            selected_log_probs = prefill_log_probs[seq_idx, prefill_token_ids]

            prefill_log_probs_split = selected_log_probs.cpu().split(
                prefill_query_lengths.tolist(), dim=0
            )
            log_probs_list_prefill = [lp.tolist() for lp in prefill_log_probs_split]

        log_probs_list = log_probs_list_decode + log_probs_list_prefill

        return log_probs_list, log_probs_tensor

    def _dynamic_step_calculate_top_n_logprobs_speculative(
        self, log_probs_tensor: Tensor
    ) -> Optional[Dict[int, List[Tuple[Tensor, Tensor]]]]:
        """Calculate top-n log probs for speculative decoding.

        For decode requests, computes top-n at each position that produced an
        emitted token (accepted speculative positions + the newly sampled position).
        For prefill requests, behaves identically to the non-speculative path.

        Args:
            log_probs_tensor (Tensor): Pre-computed log_softmax tensor from
                _dynamic_step_calculate_log_probs_speculative.

        Returns:
            A dictionary mapping request_idx to list of (top_n_values, top_n_indices)
            tuples, one per emitted token position.
        """
        context = self.inference_wrapped_model.inference_context
        active_request_count = context.total_request_count - context.paused_request_count
        active_request_slice = slice(context.paused_request_count, context.total_request_count)

        request_in_prefill_status_tensor = context.request_in_prefill_status_tensor[
            context.paused_request_count : context.total_request_count
        ]
        request_query_lengths = context.request_query_lengths[
            context.paused_request_count : context.total_request_count
        ]

        num_prefill_requests = request_in_prefill_status_tensor.sum().item()
        num_decode_requests = active_request_count - num_prefill_requests

        top_n_results = {}

        if num_decode_requests > 0:
            decode_len = num_decode_requests * (self.num_speculative_tokens + 1)
            decode_log_probs = log_probs_tensor[:decode_len].reshape(
                num_decode_requests, self.num_speculative_tokens + 1, -1
            )
            accepted_counts = self._accepted_token_counts_per_request[:num_decode_requests]
            top_n_per_request = self._request_metadata["top_n_logprobs"][active_request_slice][
                :num_decode_requests
            ]
            max_top_n = int(top_n_per_request.max().item())

            if max_top_n > 0:

                # Single batched topk on GPU: [num_decode, num_spec+1, max_top_n]
                topk_results = torch.topk(decode_log_probs, k=max_top_n, dim=-1)

                # Single CPU transfer instead of O(num_decode * num_spec) transfers
                topk_values_cpu = topk_results.values.cpu()
                topk_indices_cpu = topk_results.indices.cpu()

                for i in range(num_decode_requests):
                    top_n = int(top_n_per_request[i].item())
                    if top_n > 0:
                        num_valid = accepted_counts[i].item() + 1
                        top_n_results[i] = [
                            (topk_values_cpu[i, j, :top_n], topk_indices_cpu[i, j, :top_n])
                            for j in range(num_valid)
                        ]

        if num_prefill_requests > 0:
            decode_len = num_decode_requests * (self.num_speculative_tokens + 1)
            prefill_log_probs = log_probs_tensor[decode_len:]
            prefill_query_lengths = request_query_lengths[request_in_prefill_status_tensor == 1]
            prefill_log_probs_per_request = prefill_log_probs.split(
                prefill_query_lengths.tolist(), dim=0
            )

            for i in range(num_prefill_requests):
                req_idx = num_decode_requests + i
                top_n = int(
                    self._request_metadata["top_n_logprobs"][active_request_slice][req_idx].item()
                )
                if top_n > 0:
                    request_lp = prefill_log_probs_per_request[i]
                    skip_prompt = bool(
                        self._request_metadata["skip_prompt_log_probs"][req_idx].item()
                    )

                    if skip_prompt and request_lp.size(0) > 1:
                        top_n_logits = torch.topk(request_lp[-1], k=top_n)
                        top_n_results[req_idx] = [
                            (top_n_logits.values.cpu(), top_n_logits.indices.cpu())
                        ]
                    else:
                        top_n_logits = torch.topk(request_lp, k=top_n, dim=-1)
                        top_n_values_cpu = top_n_logits.values.cpu()
                        top_n_indices_cpu = top_n_logits.indices.cpu()
                        top_n_results[req_idx] = [
                            (top_n_values_cpu[t], top_n_indices_cpu[t])
                            for t in range(request_lp.size(0))
                        ]

        return top_n_results if top_n_results else None

    def _dynamic_step_calculate_top_n_logprobs(
        self, logits: Tensor, log_probs_tensor: Optional[Tensor] = None
    ) -> Optional[Dict[int, List[Tuple[Tensor, Tensor]]]]:
        """Calculate top-n log probs from logits for dynamic batching.

        Args:
            logits (Tensor): The logits to compute top-n log probs from.
            log_probs_tensor (Optional[Tensor]): Pre-computed log probabilities tensor.
                If provided, avoids recomputing log_softmax. Should be the tensor
                returned by calculate_log_probs.

        Returns:
            A dictionary mapping request_idx to list of (top_n_logprobs, top_n_indices) tuples.
            Each tuple in the list represents one token position.
        """
        assert log_probs_tensor is not None, (
            "log_probs_tensor must be provided. This should be guaranteed by the calling code "
            "computing log_probs when return_top_n_logprobs is True."
        )

        context = self.inference_wrapped_model.inference_context
        active_request_count = context.total_request_count - context.paused_request_count
        active_request_slice = slice(context.paused_request_count, context.total_request_count)

        # Handle decode-only mode (only last token)
        if context.config.materialize_only_last_token_logits or context.is_decode_only():
            # In decode mode or when only last token logits are materialized,
            # logits already represent only the last tokens
            log_probs = log_probs_tensor[:active_request_count]

            top_n_results = {}
            for req_idx in range(active_request_count):
                top_n = int(
                    self._request_metadata["top_n_logprobs"][active_request_slice][req_idx].item()
                )
                if top_n > 0:
                    # Get top-n logprobs and indices for this request (single token)
                    top_n_logits = torch.topk(log_probs[req_idx], k=top_n)
                    top_n_results[req_idx] = [
                        (top_n_logits.values.cpu(), top_n_logits.indices.cpu())
                    ]
            return top_n_results if top_n_results else None

        # Handle prefill mode - need to extract top-n for tokens per request
        # This follows the same pattern as calculate_log_probs in dynamic_context.py
        # Note: logits may be padded, so we only take the first active_token_count tokens
        log_probs = log_probs_tensor[: context.active_token_count]

        active_query_lengths = context.request_query_lengths[active_request_slice]

        # Split log_probs across request boundaries
        # log_probs has shape [active_token_count, vocab_size]
        log_probs_per_request = log_probs.split(active_query_lengths.tolist(), dim=0)

        top_n_results = {}
        for req_idx in range(active_request_count):
            top_n = int(
                self._request_metadata["top_n_logprobs"][active_request_slice][req_idx].item()
            )
            if top_n > 0:
                request_log_probs = log_probs_per_request[
                    req_idx
                ]  # [num_tokens_for_request, vocab_size]
                skip_prompt = bool(self._request_metadata["skip_prompt_log_probs"][req_idx].item())

                # If skip_prompt_log_probs is True, only compute for last token
                if skip_prompt and request_log_probs.size(0) > 1:
                    # Only compute top-n for the last token (first generated token)
                    top_n_logits = torch.topk(request_log_probs[-1], k=top_n)
                    top_n_results[req_idx] = [
                        (top_n_logits.values.cpu(), top_n_logits.indices.cpu())
                    ]
                else:
                    # Compute top-n for all tokens in the request
                    top_n_per_token = []
                    for token_idx in range(request_log_probs.size(0)):
                        top_n_logits = torch.topk(request_log_probs[token_idx], k=top_n)
                        top_n_per_token.append(
                            (top_n_logits.values.cpu(), top_n_logits.indices.cpu())
                        )
                    top_n_results[req_idx] = top_n_per_token

        return top_n_results if top_n_results else None

    def dummy_forward(self):
        """Perform a dummy forward pass. This is used in expert model parallelism
        on ranks that do not have any real requests. It may run in eager mode."""

        context = self.inference_wrapped_model.inference_context
        # if no cuda graphs, directly use dummy forward
        if not context.cuda_graph_batch_dimensions_list:
            self.inference_wrapped_model.dummy_forward()

            # Disable MoE padding for MTP computation
            if self.model_config.moe_pad_experts_for_cuda_graph_inference:
                unwrapped_model = unwrap_model(self.inference_wrapped_model.model)
                set_decode_expert_padding(unwrapped_model, False)

            self._dummy_serial_mtp_forward()

            return

        # attempt to use cuda-graph if possible
        input_ids, position_ids = self._dynamic_step_context_init(is_dummy_forward=True)

        # _dynamic_step_context_init tries to find a cuda-graph that is compatible
        # with all EP ranks. It can also return no match, in which case
        # we run in eager mode.

        if context.using_cuda_graph_this_step():
            # we found a cuda-graph to run
            self._dynamic_step_forward_logits(input_ids, position_ids)
        else:
            # fallback to eager dummy forward
            self.inference_wrapped_model.dummy_forward()

        # Disable MoE padding for MTP computation
        if self.model_config.moe_pad_experts_for_cuda_graph_inference:
            unwrapped_model = unwrap_model(self.inference_wrapped_model.model)
            set_decode_expert_padding(unwrapped_model, False)

        # When speculative decoding is active, the real EP ranks perform serial
        # MTP forward passes after the main forward pass. MTP layers may contain
        # MoE sublayers (inherited from the decoder spec), which require EP
        # all-to-all collectives. The dummy rank must participate in these
        # collectives to avoid a hang.
        self._dummy_serial_mtp_forward()

        # clear the context of any temporary state from the dummy forward
        context.reset()

    @torch.inference_mode()
    def _dummy_serial_mtp_forward(self):
        """Run dummy MTP forward passes to participate in EP collectives.

        When speculative decoding is active and MTP layers contain MoE sublayers
        (inherited from the decoder layer spec), each serial MTP step triggers
        EP all-to-all collectives. The dummy EP rank must issue matching
        collective calls so the real ranks do not hang.

        This mirrors the structure of ``_compute_serial_mtp_and_sample``:
        - On the last PP stage (where MTP resides): run ``compute_mtp_single_step``
          with dummy tensors so the MoE all-to-all is executed.
        - When PP > 1: participate in the ``broadcast_from_last_pipeline_stage``
          that the real ranks also perform.
        """
        if self.num_speculative_tokens == 0 or self.num_mtp_heads == 0:
            return
        if self.model_config.expert_model_parallel_size <= 1:
            return

        unwrapped_model = unwrap_model(self.inference_wrapped_model.model)

        is_last_stage = is_pipeline_last_stage(self.pp_group)
        has_mtp = is_last_stage and hasattr(unwrapped_model, '_decoder_hidden_states_cache')
        if not has_mtp and not self.model_is_pipeline_parallel:
            # No MTP on this rank and no PP broadcast to participate in.
            return

        device = torch.cuda.current_device()
        dtype = self.model_config.params_dtype
        hidden_size = self.model_config.hidden_size
        num_depths = min(self.num_speculative_tokens, self.num_mtp_heads)

        dummy_hidden = None
        if has_mtp:
            # Minimal dummy tensors — just enough to drive the MTP layer forward
            # so that the MoE all-to-all collectives are issued.
            dummy_hidden = torch.zeros((1, 1, hidden_size), device=device, dtype=dtype)
            dummy_token_ids = torch.zeros((1, 1), device=device, dtype=torch.long)
            dummy_position_ids = torch.zeros((1, 1), device=device, dtype=torch.long)

        for depth in range(num_depths):
            mtp_logits_2d = None
            if has_mtp:
                dummy_hidden, mtp_logits = unwrapped_model.compute_mtp_single_step(
                    hidden_states=dummy_hidden,
                    next_token_ids=dummy_token_ids,
                    position_ids=dummy_position_ids,
                    depth=depth,
                )
                mtp_logits_2d = mtp_logits.squeeze(1)  # [1, vocab_size]

            # Match the PP broadcast that real ranks do in _compute_serial_mtp_and_sample.
            if self.model_is_pipeline_parallel:
                broadcast_from_last_pipeline_stage(
                    [1, self.vocab_size], dtype=dtype, tensor=mtp_logits_2d, pp_group=self.pp_group
                )

    def _dynamic_step_context_bookkeeping(self) -> Dict[str, Tensor]:
        """Update the dynamic inference context after sampling.

        Args:
            new_sample (Tensor): The newly sampled tokens.
            request_metadata (Optional[Dict[str, Tensor]]): An override for the tensors
                that manage request metadata, such as sampling parameters. By default, this
                metadata is retrieved from the context.

        Return:
            Dict [str, Tensor]: A dictionary containing:
                active_request_ids (Tensor): Current active request IDs.
                newly_paused_request_ids (Tensor): Newly paused request IDs.
                finished_request_ids (Tensor): Finished request IDs.
        """
        context = self.inference_wrapped_model.inference_context
        active_request_count = context.total_request_count - context.paused_request_count
        active_request_slice = slice(context.paused_request_count, context.total_request_count)

        # Active sequence lengths.
        active_request_ids = context.request_ids[active_request_slice].long()
        active_sequence_lengths = context.get_active_sequence_lengths()

        if self.num_speculative_tokens > 0:
            active_sequence_lengths += (
                self._accepted_token_counts_per_request[:active_request_count] + 1
            )
        else:
            active_sequence_lengths += 1
        max_sequence_lengths = context.get_max_sequence_lengths()

        # Request finished if termination_id or length >= max_sequence_length.
        # Note: termination_id tensor has per-request termination IDs from mixed sampling
        active_request_mask = (
            self._sampled_tokens_cuda[:active_request_count]
            != self._request_metadata["termination_id"][active_request_slice]
        ).byte() & torch.less(active_sequence_lengths, max_sequence_lengths).byte()

        # Mark requests as finished if they hit stop words (detected in previous step's post_process_requests)
        if self._get_stop_word_finished_ids_callback is not None:
            request_ids_list = active_request_ids.tolist()
            stop_word_finished_ids = self._get_stop_word_finished_ids_callback(request_ids_list)
            if stop_word_finished_ids:
                for idx, request_id in enumerate(request_ids_list):
                    if request_id in stop_word_finished_ids:
                        active_request_mask[idx] = 0

        finished_idxs = (
            torch.nonzero(active_request_mask == 0, as_tuple=True)[0] + context.paused_request_count
        )
        finished_request_ids = context.request_ids[finished_idxs]

        # Clone needed: update_requests mutates next_tokens in-place via tensor_swap,
        # which would corrupt the reused _sampled_tokens_cuda buffer.
        new_sample_copy = self._sampled_tokens_cuda[:active_request_count].clone()

        # Update requests.
        # _sampled_mtp_tokens_cuda has shape [num_speculative_tokens, max_requests]
        if self.num_speculative_tokens > 0:
            sampled_mtp_tokens_cuda = self._sampled_mtp_tokens_cuda[:, :active_request_count]
        else:
            sampled_mtp_tokens_cuda = None
        update_result = context.update_requests(
            active_request_mask, new_sample_copy, sampled_mtp_tokens_cuda
        )

        return {
            "active_request_ids": active_request_ids,
            "finished_request_ids": finished_request_ids,
            **(update_result or {}),
        }

    @torch.inference_mode()
    async def async_generate_output_tokens_dynamic_batch(
        self, skip_bookkeeping: Optional[bool] = False
    ) -> Optional[Dict]:
        """Forward step the model and update the inference context.

        Args:
            skip_bookkeeping (Optional[bool]): If true, skip the context bookkeeping step.

        Return:
            (Optional[Dict]): A dictionary containing:
                active_request_ids (Tensor): Current active request IDs.
                newly_paused_request_ids (Tensor): Newly paused request IDs.
                finished_request_ids (Tensor): Finished request IDs.
                sample (Tensor): New sample.
                log_probs (Optional[Tensor]): Log probabilities of the new sample, if requested.
                cuda_graph_request_count (Optional[int]): Size of cuda graph used for this step.
        """
        context = self.inference_wrapped_model.inference_context
        active_request_count = context.total_request_count - context.paused_request_count

        # No tokens and no active requests?
        if context.active_token_count == 0 and active_request_count == 0:
            return None

        input_ids, position_ids = self._dynamic_step_context_init()

        cuda_graph_request_count = (
            context.padded_active_request_count if context.is_decode_only() else None
        )

        # Enable routing recording before forward pass if routing replay is enabled
        config = self.inference_wrapped_model.model.config
        if config.moe_enable_routing_replay:
            RouterReplay.set_global_router_replay_action(RouterReplayAction.RECORD)

        # Forward pass produces only base logits. When speculative decoding is
        # active, MTP logits are computed serially after verification.
        logits = self._dynamic_step_forward_logits(input_ids, position_ids)

        # Commit Mamba intermediate states before update_requests, which
        # may swap request indices. The Python lists tracking EOS block IDs
        # and intermediate offsets are not swapped along with tensors, so
        # commit must run while indices are still valid.
        if context.is_hybrid_model and context.mamba_slot_allocator is not None:
            context.mamba_slot_allocator.commit_intermediate_states()

        # Collect routing indices per request (must be done before context transitions)
        routing_indices_per_request = self._router_record_bookkeeping()

        # This is the best place to yield control back to event loop.
        # At this point we have enqueued FW pass GPU kernels asynchronously.
        # While they are running, we can do other useful CPU work.
        # Note: This can be moved further ahead if sampling can be made
        # asynchronous.
        # Todo [Siddharth]: Can we condition the sleep on a cuda event?
        # NOTE [TDE]: This will be moved once CPU and GPU methods are separated.
        await asyncio.sleep(0)
        return_log_probs, return_top_n_logprobs = self._dynamic_step_log_probs_bookkeeping()

        self._dynamic_step_sample_bookkeeping()

        if self.num_speculative_tokens > 0:
            # Phase 1: Verify speculative tokens using base logits only.
            self._dynamic_step_sample_logits_and_verify_tokens(logits, input_ids)
            # Phase 2: Rewind KV cache for rejected tokens.
            self._rewind_kv_cache()

            # Disable MoE padding for MTP computation
            if self.model_config.moe_pad_experts_for_cuda_graph_inference:
                unwrapped_model = unwrap_model(self.inference_wrapped_model.model)
                set_decode_expert_padding(unwrapped_model, False)

            # Phase 3: Compute MTP serially with correct (verified) inputs.
            self._compute_serial_mtp_and_sample()
        else:
            self._dynamic_step_sample_logits(logits)

        log_probs = None
        top_n_logprobs = None
        if return_log_probs or return_top_n_logprobs:
            if self.num_speculative_tokens > 0:
                log_probs, log_probs_tensor = self._dynamic_step_calculate_log_probs_speculative(
                    logits
                )
                if return_top_n_logprobs:
                    top_n_logprobs = self._dynamic_step_calculate_top_n_logprobs_speculative(
                        log_probs_tensor
                    )
            else:
                log_probs, log_probs_tensor = self._dynamic_step_calculate_log_probs(logits)
                if return_top_n_logprobs:
                    top_n_logprobs = self._dynamic_step_calculate_top_n_logprobs(
                        logits, log_probs_tensor
                    )

        if skip_bookkeeping:
            request_bookkeeping = {}
        else:
            request_bookkeeping = self._dynamic_step_context_bookkeeping()

        ret = {
            # Clone needed: _sampled_tokens_cuda is a reused buffer overwritten each step.
            "sample": self._sampled_tokens_cuda[:active_request_count].clone(),
            "accepted_tokens": (
                # Clone needed: .fill_(-1) on line 1480 would corrupt the returned value.
                self._accepted_tokens_per_request.clone()
                if self.num_speculative_tokens > 0
                else None
            ),
            "log_probs": log_probs,
            "top_n_logprobs": top_n_logprobs,
            "routing_indices_per_request": routing_indices_per_request,
            "cuda_graph_request_count": cuda_graph_request_count,
        }
        if self.num_speculative_tokens > 0:
            self._accepted_tokens_per_request.fill_(-1)
            self._accepted_token_counts_per_request.fill_(0)
        ret.update(request_bookkeeping)
        return ret

    @torch.inference_mode()
    def generate_output_tokens_dynamic_batch(
        self, loop: Optional[asyncio.AbstractEventLoop] = None
    ) -> Optional[Dict]:
        """Synchronous wrapper for `self.async_generate_output_tokens_dynamic_batch."""
        loop = get_asyncio_loop(loop)
        return loop.run_until_complete(self.async_generate_output_tokens_dynamic_batch())

    def _update_top_n_logprobs_dict(
        self,
        top_n_logprobs_this_step: torch.Tensor,
        top_n_logprobs_indices: torch.Tensor,
        mask: torch.Tensor,
        top_n_logprobs_dict: Dict[int, List[Dict[str, float]]],
    ):
        """Function to update the top_n_logprobs at each step

        This function goes through the topn logprobs generated for each, and for whichever
        batch has started generating tokens, it updates the top_n_logprobs_dict with the
        decoded token (string) as the key and the logit as the value.
        top_n_logprobs_dict has as keys the batch idx, the values is a list, where each element
        represents a dictionary of decoded token as key and logit as value generated at each step

        Args:
            top_n_logprobs_this_step (torch.Tensor): The top n logprob values
            top_n_logprobs_indices (torch.Tensor): The indices corresponding to the top n logprobs
            mask (torch.Tensor): A mask to indicate which requests should append to the dict
            top_n_logprobs_dict (top_n_logprobs_dict): The dict to be updated
        """
        for batch_idx, (logprob_values, logprob_indices) in enumerate(
            zip(top_n_logprobs_this_step, top_n_logprobs_indices)
        ):
            if mask[batch_idx]:
                logit_dict = {}
                for logprob, logprob_index in zip(logprob_values, logprob_indices):
                    key = self.tokenizer.detokenize([logprob_index.item()])
                    logit_dict[key] = logprob.item()
                top_n_logprobs_dict[batch_idx].append(logit_dict)

    @torch.inference_mode()
    def generate_all_output_tokens_static_batch(
        self,
        active_requests: OrderedDict[int, InferenceRequest],
        active_streams: Optional[OrderedDict[str, AsyncStream]] = None,
    ) -> OrderedDict[int, InferenceRequest]:
        """Utility to generate all the output tokens and probabilities for the prompts.

        This utility generates the output tokens for a static batch. It runs the forward steps till
        all prompts complete generation, updates the status of these requests to completed, adds
        the generated result and returns these requests

        Args:
            active_requests (OrderedDict[int, InferenceRequest]): The input active requests.

        Returns:
            OrderedDict[int, InferenceRequest]: The result for each of the incoming requests
        """
        assert all(request.prompt_tokens is not None for request in active_requests.values())

        # Perform a deep copy so that the request prompt tokens do not get modified.
        batch_prompt_tokens_list: List[List[int]] = list(
            map(
                lambda request: copy.deepcopy(request.prompt_tokens),  # type: ignore[arg-type]
                active_requests.values(),
            )
        )
        prompt_lengths_in_batch = torch.tensor(
            [len(prompt_tokens) for prompt_tokens in batch_prompt_tokens_list],
            device=torch.cuda.current_device(),
        )
        max_prompt_length_in_batch = max(prompt_lengths_in_batch)
        min_prompt_length_in_batch = min(prompt_lengths_in_batch)

        # For batch inference the sampling params are the same for all request
        sampling_params: SamplingParams = list(active_requests.values())[0].sampling_params

        # Remove Float16Module wrapper if it exists
        unwrapped_model = unwrap_model(self.inference_wrapped_model.model)
        model_config = get_model_config(unwrapped_model)

        # We only need an attention mask if we are exclusively doing prefill over
        # prompts of variable length
        use_attention_mask = (
            sampling_params.num_tokens_to_generate == 0
            and min_prompt_length_in_batch != max_prompt_length_in_batch
        )

        # Check whether CUDA graphs are enabled
        enable_cuda_graph = (
            model_config.cuda_graph_impl == "local"
            and CudaGraphScope.full_iteration not in model_config.cuda_graph_scope
        )

        # Pad batch tokens if necessary
        batch_size = len(active_requests)
        max_sequence_length = max_prompt_length_in_batch + sampling_params.num_tokens_to_generate
        context = self.inference_wrapped_model.inference_context
        assert isinstance(context, StaticInferenceContext)
        inference_max_batch_size = context.max_batch_size
        inference_max_sequence_length = context.max_sequence_length
        padded_batch_size = inference_max_batch_size if enable_cuda_graph else batch_size
        if padded_batch_size > inference_max_batch_size:
            raise ValueError(
                f"Padded batch size {padded_batch_size} > max batch size {inference_max_batch_size}"
            )
        padded_batch_prompt_tokens = self.pad_input_prompt_tokens(
            batch_prompt_tokens_list,
            padded_batch_size=padded_batch_size,
            padded_sequence_length=max_sequence_length,
        )

        # Verify that output sequence length is within configured limit
        if max_sequence_length > inference_max_sequence_length:
            raise MaxSequenceLengthOverflowError(
                f"Maximum allowed sequence length was set to {inference_max_sequence_length} "
                f"tokens but requested generation of {max_sequence_length} tokens"
            )

        top_n_logprobs_dict = defaultdict(list)

        # Pre allocate log probs tensor
        output_log_probs = None
        if sampling_params.return_log_probs:
            output_log_probs = torch.empty(
                (batch_size, max_sequence_length - 1),
                dtype=torch.float32,
                device=torch.cuda.current_device(),
            )

        # An array to check which of the prompts have reached end of generation condition
        is_generation_done_tensor = torch.zeros(
            batch_size, dtype=torch.bool, device=torch.cuda.current_device()
        )

        # An array to act as a counter to keep track of generated sequence lengths
        generated_sequence_lengths = torch.zeros(
            batch_size, device=torch.cuda.current_device()
        ).cuda()

        # Check whether early termination is enabled
        no_early_termination = getattr(sampling_params, "no_early_termination", False)
        termination_id = -1 if no_early_termination else self.tokenizer.eod

        streaming_enabled = active_streams is not None and len(active_streams) > 0
        if streaming_enabled:
            # Start a separate thread for streaming tokens to avoid blocking the
            # main computation
            streaming_idx: List[int] = [
                i
                for (i, request_id) in enumerate(active_requests.keys())
                if request_id in active_streams
            ]
            streaming_request_ids: List[int] = list(active_streams.keys())
            streams: List[AsyncStream] = list(active_streams.values())
            streaming_requests: List[InferenceRequest] = [
                active_requests[request_id] for request_id in streaming_request_ids
            ]
            streaming_executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
            stream_tokens = functools.partial(self.stream_tokens, sampling_params)

        for request in active_requests.values():
            # Initialize to a list to store a latency measurement for each generated token.
            request.tpot = []
        timing_events = []

        with torch.inference_mode():
            self.inference_wrapped_model.prep_model_for_inference()

            inference_input: Dict[str, Any] = self.prep_inference_input(
                prompts_tokens=padded_batch_prompt_tokens,
                active_requests=active_requests,
                use_attention_mask=use_attention_mask,
            )

            assert (
                not self.inference_wrapped_model.inference_context.is_decode_only()
            ), f"Generation must start in prefill mode"

            # Sequence parallelism is required for MoE layers when using expert parallelism (EP)
            # becausethe expert routing mechanism relies on sequence parallelism's communication
            # infrastructure to distribute tokens across expert ranks. However, sequence parallelism
            # is not currently supported for non-MoE layers during inference, so we selectively
            # disable it for all other layer types. This is safe because MoE layers perform an
            # all-gather operation on sequences before passing data to subsequent layers, ensuring
            # that each rank has the complete sequence data needed for the next non-MoE layer.
            tp_size = model_config.tensor_model_parallel_size
            ep_size = model_config.expert_model_parallel_size
            model_is_tp_ep = tp_size > 1 and ep_size > 1
            if model_is_tp_ep:
                set_model_to_sequence_parallel(
                    unwrapped_model, False, exclude_modules=[BaseMoELayer]
                )
            elif model_config.sequence_parallel and (ep_size == 1 or tp_size == 1):
                raise NotImplementedError(
                    f"Sequence parallellism is only supported for static batching with MoE models"
                )

            # If using symmetric kernels and we are using using nccl
            # for prefill turn off symmetric kernels
            symmetric_ar_type = self.model_config.symmetric_ar_type
            nccl_all_reduce_for_prefill = self.model_config.nccl_all_reduce_for_prefill
            if symmetric_ar_type is not None and nccl_all_reduce_for_prefill:
                unwrapped_model.set_symmetric_ar(None)

            # Turning off MoE padding for prefill
            moe_pad_experts_for_cuda_graph_inference = (
                self.model_config.moe_pad_experts_for_cuda_graph_inference
            )
            if moe_pad_experts_for_cuda_graph_inference:
                set_decode_expert_padding(unwrapped_model, False)

            context_start_position = 0

            # If we are exclusively doing prefill then we can process all prompt tokens
            # together even if the prompt lengths are different
            if sampling_params.num_tokens_to_generate == 0:
                context_end_position = max_prompt_length_in_batch
            else:
                context_end_position = min_prompt_length_in_batch

            # The initial iteration of this loop runs the prefill phase up to the shortest
            # prompt length in the batch. Then every subsequent iterations runs a decode step.
            # At least one new token will be generated in each iteration. The generated token
            # will be ignored for requests which have prompt length > the current generated
            # sequence length. Similarly, the generated token is ignored for requests which
            # have maximum total sequence length < the current generated sequence length.
            while True:
                # Add a timing event at the start of each iteration. The token generation
                # time will be the elapsed time between consective timing events.
                timing_events.append(torch.cuda.Event(enable_timing=True))
                timing_events[-1].record()

                # Pick the context window that we need to pass through the network.
                inference_input_for_context_window: Dict[str, Any] = (
                    self.inference_wrapped_model.get_batch_for_context_window(
                        inference_input, context_start_position, context_end_position
                    )
                )

                # Disable attention mask when using CUDA graphs for decode
                if (
                    enable_cuda_graph
                    and self.inference_wrapped_model.inference_context.is_decode_only()
                    and "attention_mask" in inference_input_for_context_window
                ):
                    inference_input_for_context_window["attention_mask"] = None
                elif use_attention_mask:
                    assert (
                        attention_mask := inference_input_for_context_window.get(
                            "attention_mask", None
                        )
                        is not None
                    )

                # Only materialize prompt log probs if the user requests log probs
                materialize_only_last_token_logits = (
                    self.inference_wrapped_model.inference_context.is_decode_only()
                    or not (sampling_params.return_log_probs or sampling_params.top_n_logprobs > 0)
                )
                inference_context = self.inference_wrapped_model.inference_context
                inference_context.config.materialize_only_last_token_logits = (
                    materialize_only_last_token_logits
                )

                # Returns the final logits of shape [batch_size, context_length, vocab_size]
                # Note: This is returned in all TP ranks or last PP stage in PP models
                logits = self.inference_wrapped_model.run_one_forward_step(
                    inference_input_for_context_window
                )

                # Undo padding if necessary
                batch_prompt_tokens = self.unpad_input_prompt_tokens(
                    padded_batch_prompt_tokens, batch_size
                )
                assert batch_prompt_tokens.shape[0] == batch_size, batch_prompt_tokens.shape[0]
                if is_pipeline_last_stage(self.pp_group):
                    logits = logits[:batch_size]

                if self.model_is_pipeline_parallel:
                    context_length = context_end_position - context_start_position
                    logits_seq_len = 1 if materialize_only_last_token_logits else context_length
                    logits_shape = [batch_size, logits_seq_len, self.vocab_size]
                    if is_pipeline_last_stage(self.pp_group):
                        assert logits is not None and torch.Size(logits_shape) == logits.shape
                    # TODO(ksanthanam): Evaluate whether it makes more sense to sample on 1 rank
                    # and then broadcast the sampled tokens rather than broadcasting the raw logits.
                    logits = broadcast_from_last_pipeline_stage(
                        [batch_size, logits_seq_len, self.vocab_size],
                        dtype=self.model_config.params_dtype,
                        tensor=logits,
                        pp_group=self.pp_group,
                    )

                # Turn on symmetric all reduce kernels for decode stage
                # if we turned it off for prefill
                if (
                    context_end_position == min_prompt_length_in_batch
                    and symmetric_ar_type is not None
                    and nccl_all_reduce_for_prefill
                ):
                    if symmetric_ar_type is not None and nccl_all_reduce_for_prefill:
                        unwrapped_model.set_symmetric_ar(symmetric_ar_type)

                # Indicates which of the input prompts have started generating tokens.
                # A 1D boolean tensor with [batch_size] elements (i.e) The shortest
                # prompts will start generating first and so on
                generation_started = prompt_lengths_in_batch <= context_end_position
                last_token_logits = logits[:, -1, :]

                logits_for_top_n_prompt_logprobs = (
                    logits
                    if context_start_position == 0 and not sampling_params.skip_prompt_log_probs
                    else None
                )
                sampled_logits = self.sample_from_logits(
                    last_token_logits,
                    sampling_params,
                    self.vocab_size,
                    generation_started=generation_started,
                    top_n_logprobs_dict=top_n_logprobs_dict,
                    logits=logits_for_top_n_prompt_logprobs,
                )

                if sampling_params.num_tokens_to_generate > 0:
                    # Substitute the sampled logits only for the prompts that
                    # have started generating tokens
                    batch_prompt_tokens[generation_started, context_end_position] = sampled_logits[
                        generation_started
                    ]

                # Compute log probs
                if sampling_params.return_log_probs:
                    log_probs = F.log_softmax(logits, dim=2).to(torch.float32)

                    indices = torch.unsqueeze(
                        batch_prompt_tokens[
                            :, (context_start_position + 1) : (context_end_position + 1)
                        ],
                        2,
                    )
                    # Get the log probabilities for only the prompt tokens
                    assert output_log_probs is not None
                    output_log_probs[:, context_start_position:context_end_position] = torch.gather(
                        log_probs, 2, indices
                    ).squeeze(2)

                context_start_position = context_end_position

                if sampling_params.num_tokens_to_generate > 0:
                    # Check end of generation status for each tensor
                    # and update generated sequence lengths
                    (is_generation_done_tensor, generated_sequence_lengths) = (
                        self.update_generation_status(
                            updated_prompts_tokens=batch_prompt_tokens,
                            generation_started=generation_started,
                            current_context_end_position=context_end_position,
                            is_generation_done_tensor=is_generation_done_tensor,
                            generated_sequence_lengths=generated_sequence_lengths,
                            termination_id=termination_id,
                        )
                    )

                    # Stream intermediate outputs
                    if streaming_enabled:
                        streaming_executor.submit(
                            stream_tokens,
                            streaming_request_ids,
                            streaming_requests,
                            streams,
                            generation_started[streaming_idx].cpu(),
                            is_generation_done_tensor[streaming_idx].cpu(),
                            batch_prompt_tokens[streaming_idx].cpu(),
                            prompt_lengths_in_batch[streaming_idx].cpu(),
                            generated_sequence_lengths[streaming_idx].cpu(),
                            (
                                output_log_probs[streaming_idx].cpu()
                                if output_log_probs is not None
                                else [None] * len(streaming_idx)
                            ),
                        )

                # Boolean flag indicating if all prompts are finished
                all_prompts_done = torch.all(is_generation_done_tensor)
                if all_prompts_done:
                    break

                # Change to decode mode if all prefill is complete
                if torch.all(generation_started):
                    self.inference_wrapped_model.inference_context.enable_decode_mode()
                    # Turn on padding for decode if flag set
                    if moe_pad_experts_for_cuda_graph_inference:
                        capacity_factor = (
                            model_config.num_moe_experts / model_config.moe_router_topk
                        )
                        set_decode_expert_padding(
                            unwrapped_model, True, capacity_factor=capacity_factor
                        )

                context_end_position = context_start_position + 1
                if context_end_position >= max_sequence_length:
                    break

        # Add a final timing event to compute the latency of every loop iteration
        timing_events.append(torch.cuda.Event(enable_timing=True))
        timing_events[-1].record()

        # Close all streams
        if streaming_enabled:
            streaming_executor.shutdown()
            for stream in streams:
                stream.finish()

        # Include all the generated tokens
        batch_prompt_tokens_with_generations = padded_batch_prompt_tokens[
            :batch_size, : (context_end_position + 1)
        ]
        if sampling_params.return_log_probs:
            assert output_log_probs is not None
            output_log_probs = output_log_probs[:, :context_end_position]

        generated_sequence_lengths[
            generated_sequence_lengths > sampling_params.num_tokens_to_generate
        ] = sampling_params.num_tokens_to_generate

        timing_events[-1].synchronize()
        tpot = torch.tensor(
            [
                timing_events[i].elapsed_time(timing_events[i + 1]) / 1e3
                for i in range(len(timing_events) - 1)
            ],
            dtype=torch.float32,
        )

        for idx, request in enumerate(active_requests.values()):
            input_prompt_length = int(prompt_lengths_in_batch[idx])
            # Shorter prompts might have generated more than required tokens. So we trim them down
            required_sequence_length = int(
                min(generated_sequence_lengths[idx], sampling_params.num_tokens_to_generate)
            )
            # Extract only the generated tokens
            required_result_tokens = batch_prompt_tokens_with_generations[
                idx, input_prompt_length : (input_prompt_length + required_sequence_length)
            ]
            generated_sequence_lengths = generated_sequence_lengths.to(dtype=torch.int32)
            request.generated_sequence_lengths = generated_sequence_lengths.to(dtype=torch.int32)
            request.generated_length = required_sequence_length
            request.generated_tokens = required_result_tokens

            # Record the decode latencies for only the generated tokens
            request_tpot = tpot.clone()
            # Sum up the latencies of the first prompt tokens if the
            # request prompt length > minimum prompt length
            spill_length = input_prompt_length - min_prompt_length_in_batch
            if spill_length > 0:
                spill_latency = request_tpot[:spill_length].sum()
                request_tpot = torch.cat((spill_latency.unsqueeze(0), request_tpot[spill_length:]))

            # Remove the extraneous latencies if the
            # request sequence length < maximum sequence length
            request_tpot = request_tpot[:required_sequence_length]
            request.tpot = request_tpot.tolist()

            if output_log_probs is not None:
                request.prompt_log_probs = output_log_probs[idx, : input_prompt_length - 1].tolist()
                request.generated_log_probs = output_log_probs[
                    idx,
                    input_prompt_length - 1 : (input_prompt_length + required_sequence_length - 1),
                ].tolist()
            if sampling_params.top_n_logprobs > 0:
                if not sampling_params.skip_prompt_log_probs:
                    assert (
                        len(top_n_logprobs_dict[idx])
                        >= input_prompt_length + required_sequence_length - 1
                    ), (
                        "Did not collect required number of top-N logprobs: "
                        f"{len(top_n_logprobs_dict[idx])}"
                    )
                    request.prompt_top_n_logprobs = top_n_logprobs_dict[idx][
                        : input_prompt_length - 1
                    ]
                    request.generated_top_n_logprobs = top_n_logprobs_dict[idx][
                        input_prompt_length
                        - 1 : (input_prompt_length + required_sequence_length - 1)
                    ]
                else:
                    assert len(top_n_logprobs_dict[idx]) >= required_sequence_length, (
                        "Did not collect required number of top-N logprobs: "
                        f"{len(top_n_logprobs_dict[idx])}"
                    )
                    request.generated_top_n_logprobs = top_n_logprobs_dict[idx][
                        :required_sequence_length
                    ]

            request.status = Status.COMPLETED

            text, segments = self.detokenize_generations(
                batch_prompt_tokens_with_generations[
                    idx, : (input_prompt_length + required_sequence_length)
                ],
                input_prompt_length + generated_sequence_lengths,
                sampling_params.return_segments,
            )
            request.text = text  # Inference server returns prompts & generations together
            if sampling_params.return_segments:
                request.segments = segments[0]
            request.generated_text = text[len(request.prompt) :]
        return active_requests

    def prep_inference_input(
        self,
        prompts_tokens: torch.Tensor,
        active_requests: OrderedDict[int, InferenceRequest],
        use_attention_mask: bool = False,
    ) -> Dict[str, Any]:
        """Preparing input data for inference, using respective wrapper's prep_inference_input method # pylint: disable=line-too-long

        Args:
            prompts_tokens (torch.Tensor): A tensor of shape [batch_size, max_sequence_length]
            active_requests (OrderedDict[int, InferenceRequest]): The input active requests
            use_attention_mask (bool): Whether to use an attention mask. Should be set to True only
                when exclusively doing prefill (no decode) with variable prompt lengths.

        Returns:
            A dict of the inference input for the current batch.
        """
        inference_input = self.inference_wrapped_model.prep_inference_input(prompts_tokens)

        if use_attention_mask and (
            attention_mask := inference_input.get("attention_mask", None) is None
        ):
            inference_input["attention_mask"] = get_attention_mask(prompts_tokens.size(1))

        return inference_input

    def stream_tokens(
        self,
        sampling_params: SamplingParams,
        request_ids: List[int],
        requests: List[InferenceRequest],
        streams: List[AsyncStream],
        generation_started: List[bool],
        is_generation_done: List[bool],
        tokens: torch.Tensor,
        prompt_lengths: List[int],
        generated_lengths: List[int],
        output_log_probs: Union[torch.Tensor, None],
    ):
        """Asynchronously streams tokens for the given requests.

        Args:
            sampling_params (SamplingParams): The sampling parameters.
            request_ids (List[int]): The request IDs.
            request (List[InferenceRequest]): The requests.
            stream (List[AsyncStream]): The streams over which to send tokens.
            generation_started (List[bool]): Whether the decode step has started.
            is_generation_done (List[bool]): Whether generation has completed.
            tokens (torch.Tensor): The tokens for this request.
            prompt_lengths (List[int]): The number of prompt tokens for each request.
            generated_lengths (List[int]): The number of output tokens for each request.
            output_log_probs (torch.Tensor, optional): The log probs for each request.
        """

        def stream_token(
            request_id: int,
            request: InferenceRequest,
            stream: AsyncStream,
            generation_started: bool,
            is_generation_done: bool,
            tokens: torch.Tensor,
            prompt_length: int,
            generated_length: int,
            output_log_probs: Union[torch.Tensor, None],
        ):
            """Asynchronously streams a token for the given request."""

            if (
                not generation_started
                or stream.finished
                or sampling_params.num_tokens_to_generate == 0
            ):
                return

            return_segments = sampling_params.return_segments
            detokenize_streaming_text = not getattr(
                sampling_params, "no_detokenize_streaming_text", False
            )

            generated_tokens = tokens[prompt_length : prompt_length + generated_length]

            if detokenize_streaming_text:
                generated_text, generated_segments = self.detokenize_generations(
                    generated_tokens, prompt_length + generated_length, return_segments
                )
            else:
                generated_text = ""
                generated_segments = []

            if output_log_probs is not None:
                generated_log_probs = output_log_probs[
                    prompt_length - 1 : prompt_length + generated_length - 1
                ].tolist()
            else:
                generated_log_probs = None

            stream.put(
                InferenceRequest(
                    request_id=request_id,
                    prompt=request.prompt,
                    sampling_params=request.sampling_params,
                    prompt_tokens=request.prompt_tokens,
                    arrival_time=request.arrival_time,
                    status=request.status,
                    encoder_prompt=request.encoder_prompt,
                    generated_text=generated_text,
                    generated_segments=generated_segments,
                    generated_tokens=generated_tokens,
                    generated_log_probs=generated_log_probs,
                    generated_length=generated_length,
                )
            )

            if is_generation_done or generated_length == sampling_params.num_tokens_to_generate:
                stream.finish()

        ret = map(
            stream_token,
            request_ids,
            requests,
            streams,
            generation_started,
            is_generation_done,
            tokens,
            prompt_lengths,
            generated_lengths,
            output_log_probs,
        )
        list(ret)


================================================
FILE: megatron/core/inference/text_generation_controllers/vlm_text_generation_controller.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from typing import OrderedDict

import torch

from megatron.core.inference.inference_request import InferenceRequest, VLMInferenceRequest
from megatron.core.inference.text_generation_controllers.text_generation_controller import (
    TextGenerationController,
)
from megatron.core.inference.utils import get_attention_mask


class VLMTextGenerationController(TextGenerationController):
    """The text generation controller for VLMs"""

    def prep_inference_input(
        self,
        prompts_tokens: torch.Tensor,
        active_requests: OrderedDict[str, InferenceRequest],
        use_attention_mask: bool = False,
    ):
        """Preparing input data for inference, using respective wrapper's prep_inference_input method # pylint: disable=line-too-long

        Currently only supports batch size 1 inference.

        Args:
            prompts_tokens (torch.Tensor): A tensor of shape [batch_size, max_sequence_length]
            active_requests (OrderedDict[str, InferenceRequest]): The input active requests
            use_attention_mask (bool): Whether to use an attention mask. Should be set to True only
                when exclusively doing prefill (no decode) with variable prompt lengths.
        """
        assert len(active_requests) == 1, f"VLM inference currently only supports batch size 1"

        request = list(active_requests.values())[0]

        assert isinstance(
            request, VLMInferenceRequest
        ), f"Found inference request of type {type(request)}, expected VLMInferenceRequest"

        inference_input = self.inference_wrapped_model.prep_inference_input(
            prompts_tokens,
            request.num_img_embeddings_per_tile,
            request.imgs,
            request.num_tiles,
            request.decoder_seq_length,
        )

        if use_attention_mask and (
            attention_mask := inference_input.get("attention_mask", None) is None
        ):
            inference_input["attention_mask"] = get_attention_mask(prompts_tokens.size(1))

        return inference_input


================================================
FILE: megatron/core/inference/text_generation_server/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from .text_generation_server import MegatronServer


================================================
FILE: megatron/core/inference/text_generation_server/dynamic_text_gen_server/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from .text_generation_server import start_text_gen_server, stop_text_gen_server


================================================
FILE: megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.


try:
    from .chat_completions import bp as ChatCompletions
    from .completions import bp as Completions
    from .health import bp as Health

    __all__ = [Completions, ChatCompletions, Health]
except ImportError:
    __all__ = []


================================================
FILE: megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/chat_completions.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import asyncio
import json
import logging
import time
import traceback
import uuid
import warnings

from megatron.core.inference.inference_request import unwrap_serialized_tensors
from megatron.core.inference.sampling_params import SamplingParams
from megatron.core.tokenizers.text.parsers import PARSER_MAPPING

logger = logging.getLogger(__name__)

# pylint: disable=line-too-long


def _get_field(obj, key, default=None):
    """Read a field from dict-like or object-like values."""
    if isinstance(obj, dict):
        return obj.get(key, default)
    return getattr(obj, key, default)


def _normalize_tool_calls(tool_calls):
    """Normalize tool calls to OpenAI-compatible JSON primitives."""
    normalized = []
    for call in tool_calls or []:
        fn = _get_field(call, "function", {}) or {}
        fn_name = _get_field(fn, "name")
        fn_args = _get_field(fn, "arguments", "")
        if fn_name is None:
            continue
        if not isinstance(fn_args, str):
            try:
                fn_args = json.dumps(fn_args, ensure_ascii=False)
            except TypeError:
                fn_args = str(fn_args)
        normalized.append(
            {
                "id": str(_get_field(call, "id", f"call_{uuid.uuid4().hex[:24]}")),
                "type": "function",
                "function": {"name": str(fn_name), "arguments": fn_args},
            }
        )
    return normalized


def _coerce_arguments_mapping(arguments):
    """Coerce function.arguments to a mapping for HF/Jinja chat templates.

    Examples:
    - {"x": 1} -> {"x": 1}
    - '{"x": 1}' -> {"x": 1}
    - "[1, 2]" -> {}  # JSON parses, but not a mapping
    - "not-json" -> {}
    - None -> {}
    """
    if isinstance(arguments, dict):
        return arguments
    if isinstance(arguments, str):
        try:
            parsed = json.loads(arguments)
        except (TypeError, ValueError):
            return {}
        return parsed if isinstance(parsed, dict) else {}
    return {}


def _sanitize_messages_for_template(messages):
    """Prepare messages so tokenizer chat templates can safely consume them.

    This only normalizes tool-call argument payloads inside each message:
    - messages[*].tool_calls[*].function.arguments is coerced to a dict.

    Example transformation:
    Input:
      [{"role": "assistant", "tool_calls": [{"function": {"name": "f", "arguments": "{\"x\": 1}"}}]}]
    Output:
      [{"role": "assistant", "tool_calls": [{"function": {"name": "f", "arguments": {"x": 1}}}]}]

    Another example:
    - arguments: "[1,2,3]" -> arguments: {}
    """
    if not isinstance(messages, list):
        return messages
    sanitized = []
    for message in messages:
        if not isinstance(message, dict):
            sanitized.append(message)
            continue
        msg_copy = dict(message)
        tool_calls = msg_copy.get("tool_calls")
        if isinstance(tool_calls, list):
            sanitized_tool_calls = []
            for call in tool_calls:
                if not isinstance(call, dict):
                    sanitized_tool_calls.append(call)
                    continue
                call_copy = dict(call)
                function = call_copy.get("function")
                if isinstance(function, dict):
                    function_copy = dict(function)
                    function_copy["arguments"] = _coerce_arguments_mapping(
                        function_copy.get("arguments", {})
                    )
                    call_copy["function"] = function_copy
                sanitized_tool_calls.append(call_copy)
            msg_copy["tool_calls"] = sanitized_tool_calls
        sanitized.append(msg_copy)
    return sanitized


def _sanitize_tools_for_template(tools):
    """Ensure tools payload is template-safe and has mapping parameters.

    Example transformations:
    - {"function": {"name": "f", "parameters": "not-a-dict"}}
      -> {"function": {"name": "f", "parameters": {"type": "object", "properties": {}}}}
    - non-dict tool entries are dropped.
    - non-list input returns None.
    """
    if not isinstance(tools, list):
        return None

    sanitized = []
    for tool in tools:
        if not isinstance(tool, dict):
            continue
        tool_copy = dict(tool)
        function = tool_copy.get("function")
        if isinstance(function, dict):
            function_copy = dict(function)
            if not isinstance(function_copy.get("parameters"), dict):
                function_copy["parameters"] = {"type": "object", "properties": {}}
            tool_copy["function"] = function_copy
        sanitized.append(tool_copy)
    return sanitized


def _reconstruct_reasoning_content(messages: list[dict]) -> list[dict]:
    """Reconstruct <think> tags from reasoning_content fields on assistant messages.

    For parity with vLLM, assistant messages may carry reasoning in the reasoning_content field.
    Before applying the chat template, we must inline those tags back into content.
    """
    for message in messages:
        if message.get("role") != "assistant":
            continue
        reasoning_content = message.pop("reasoning_content", None)
        if reasoning_content is not None:
            content = message.get("content") or ""
            message["content"] = f"<think>{reasoning_content}</think>{content}"
    return messages


def _replace_prefix_tokens(
    eos_token_id,
    previous_turn_token_ids,
    retokeenized_previous_turn_token_ids,
    current_turn_token_ids,
):
    """Replace the token ids that are associated with the previous turn with the actual tokens
    from the previous generation (rather than the ones from the chat template application)."""

    # Strip the EOS from the previous turn token ids if it exists
    if previous_turn_token_ids[-1] == eos_token_id:
        previous_turn_token_ids = previous_turn_token_ids[:-1]

    # Find the last EOS token id in the previous turn token ids
    last_eos_token_id_index = len(retokeenized_previous_turn_token_ids) - 1
    for i in reversed(range(len(retokeenized_previous_turn_token_ids))):
        if current_turn_token_ids[i] == eos_token_id:
            last_eos_token_id_index = i
            break

    # Replace the current turn token ids with the tokens from the previous generation
    current_turn_additional_token_ids = current_turn_token_ids[last_eos_token_id_index:]

    # Return the previous turn token ids + the current turn token ids
    return previous_turn_token_ids + current_turn_additional_token_ids


try:
    import orjson

    HAVE_ORJSON = True
except ImportError:
    HAVE_ORJSON = False


try:
    from quart import Blueprint, Response, current_app, jsonify, request

    bp = Blueprint('chat_completions_api', __name__)

    def apply_parsers(message_text, tools, parsers_list, tools_requested):
        """Runs CPU-intensive text parsing."""
        meta = {}
        for parser in parsers_list:
            if parser not in PARSER_MAPPING:
                raise ValueError(f"Parser {parser} not found in PARSER_MAPPING")

            prev_text = message_text
            parsed_text, new_info = PARSER_MAPPING[parser].parse(message_text, tools=tools)
            if "tool_calls" in new_info:
                new_info["tool_calls"] = _normalize_tool_calls(new_info.get("tool_calls", []))
                if not tools_requested:
                    # Ignore incidental tool-call syntax in plain chat mode.
                    parsed_text = prev_text
                    new_info.pop("tool_calls", None)
            message_text = parsed_text

            assert not (
                meta.keys() & new_info.keys()
            ), "Multiple parsers found the same information."
            meta.update(new_info)

        return message_text, meta

    @bp.route('/chat/completions', methods=['POST'])
    @bp.route('/v1/chat/completions', methods=['POST'])
    async def chat_completions():
        """Handles async POST requests for chat completions."""
        client = current_app.config['client']
        tokenizer = current_app.config['tokenizer']
        parsers = current_app.config['parsers']

        req = await request.get_json()
        tools = req.get("tools", None)
        tools_requested = bool(tools)
        messages = req.get("messages")
        chat_template_kwargs = req.get("chat_template_kwargs", {})
        if not isinstance(chat_template_kwargs, dict):
            logger.warning(
                "Ignoring non-dict chat_template_kwargs: %s", type(chat_template_kwargs).__name__
            )
            chat_template_kwargs = {}
        # --- 1. Parse Messages ---
        if not messages:
            return Response("Missing 'messages' field", status=400)
        if not isinstance(messages, list):
            return Response("'messages' must be a list", status=400)
        template_messages = _sanitize_messages_for_template(messages)
        template_messages = _reconstruct_reasoning_content(template_messages)
        template_tools = _sanitize_tools_for_template(tools)

        try:
            if (
                hasattr(tokenizer, 'apply_chat_template')
                and getattr(tokenizer, "chat_template", None) is not None
            ):
                prompt_tokens = tokenizer.apply_chat_template(
                    template_messages,
                    tokenize=True,
                    add_generation_prompt=True,
                    tools=template_tools,
                    **chat_template_kwargs,
                )

                if req.get("prevent_retokenization", True):
                    # If we are avoiding retokenization, we need to replace some prompt tokens with the prompt/generation tokens from the previous generation
                    # This improves prefix cache hits and reduces logprob variation between training and inference.

                    # Find the last assistant message
                    last_assistant_message_idx = None
                    for i in reversed(range(len(template_messages))):
                        if template_messages[i]["role"] == "assistant":
                            last_assistant_message_idx = i
                            break

                    last_assistant_message = (
                        template_messages[last_assistant_message_idx]
                        if last_assistant_message_idx is not None
                        else None
                    )

                    # Only proceed if the last assistant message has the token IDs from a previous generation.
                    # Dataset-provided conversation history won't have these fields.
                    if (
                        last_assistant_message is not None
                        and "prompt_token_ids" in last_assistant_message
                        and "generation_token_ids" in last_assistant_message
                    ):
                        eos_token_id = tokenizer.eos_id
                        assert eos_token_id is not None, "Your tokenizer must have an EOS token ID!"

                        warnings.warn(
                            "Avoiding prefix retokenization."
                            " This is a patch that ensures subsequent generations are not retokenized differently than the previous generation."
                            " This may cause unexpected behavior if messages (including system messages) are altered between generations."
                        )

                        messages_to_last_assistant_message = template_messages[
                            : last_assistant_message_idx + 1
                        ]

                        # Get the templated tokenization of just the previous generation
                        retokenized_previous_turn_token_ids = tokenizer.apply_chat_template(
                            messages_to_last_assistant_message,
                            tokenize=True,
                            add_generation_prompt=False,
                            tools=template_tools,
                            **chat_template_kwargs,
                        )

                        # Replace the prefix tokens with the tokens from the previous generation
                        previous_turn_token_ids = (
                            last_assistant_message["prompt_token_ids"]
                            + last_assistant_message["generation_token_ids"]
                        )
                        prompt_tokens = _replace_prefix_tokens(
                            eos_token_id,
                            previous_turn_token_ids,
                            retokenized_previous_turn_token_ids,
                            prompt_tokens,
                        )

            else:
                warnings.warn(
                    "Tokenizer does not support 'apply_chat_template'. Using tokenize instead."
                )
                prompt_tokens = tokenizer.tokenize(
                    "\n".join([message["content"] for message in messages])
                )
        except Exception as e:
            logger.error(f"{traceback.format_exc()}")
            return Response(f"Error processing 'messages': {e}", status=500)

        # --- 2. Parse Sampling Params ---
        try:
            temperature = float(req.get("temperature", 1.0))
            top_p = float(req.get("top_p", 1.0))
            top_k = int(req.get("top_k", 0))
            n = int(req.get("n", 1))  # Number of choices to generate

            if temperature == 0.0:
                top_k = 1
                top_p = 0.0

            # Check for 'logprobs' (bool) and 'top_logprobs' (int)
            return_log_probs = bool(req.get("logprobs", False))
            top_n_logprobs = int(req.get("top_logprobs", 0)) if return_log_probs else 0
            skip_prompt_log_probs = bool(req.get("skip_prompt_log_probs", True))
            add_BOS = bool(req.get("add_BOS", False))

            # The engine only handles add_BOS for string prompts, not pre-tokenized
            # input. Since we pre-tokenize via apply_chat_template, we must handle
            # BOS ourselves, matching the logic in tokenize_prompt().
            if hasattr(tokenizer, 'bos') and tokenizer.bos is not None:
                start_idx = 0
                while start_idx < len(prompt_tokens) and prompt_tokens[start_idx] == tokenizer.bos:
                    start_idx += 1
                if start_idx > 0:
                    prompt_tokens = prompt_tokens[start_idx:]

                if add_BOS:
                    prompt_tokens = [tokenizer.bos] + prompt_tokens

            max_tokens = req.get("max_completion_tokens", None) or req.get("max_tokens", None)

            sampling_params = SamplingParams(
                temperature=temperature,
                top_k=top_k,
                top_p=top_p,
                return_log_probs=return_log_probs,
                top_n_logprobs=top_n_logprobs,
                num_tokens_to_generate=(int(max_tokens) if max_tokens is not None else None),
                skip_prompt_log_probs=skip_prompt_log_probs,
                add_BOS=add_BOS,
            )
        except ValueError as e:
            return Response(f"Invalid sampling parameter: {e}", status=400)

        # --- 3. Send Requests to Engine ---
        tasks = [client.add_request(prompt_tokens, sampling_params) for _ in range(n)]

        if current_app.config['verbose']:
            start_time = time.perf_counter()

        try:
            batch_results = await asyncio.gather(*tasks)
        except Exception as e:
            logger.error(f"Error during inference: {e}")
            return Response(f"Error during inference: {e}", status=500)

        if current_app.config['verbose']:
            logging.info(
                f"Batch of {len(tasks)} requests (n={n}) processed in "
                f"{time.perf_counter() - start_time:.2f}s"
            )

        # --- 4. Check for failed requests ---
        failed_errors = []
        has_nontransient_error = False
        for i, record in enumerate(batch_results):
            if record.get("status") == "FAILED":
                events = record.get("events", [])
                error_events = [
                    e for e in events if e.get("type") in ("ERROR_NONTRANSIENT", "ERROR_TRANSIENT")
                ]
                if any(e.get("type") == "ERROR_NONTRANSIENT" for e in error_events):
                    has_nontransient_error = True
                error_msg = (
                    str(error_events[-1].get("payload", "Unknown error"))
                    if error_events
                    else "Unknown error"
                )
                failed_errors.append(f"Request {i}: {error_msg}")

        if failed_errors:
            error_detail = "; ".join(failed_errors)
            status = 400 if has_nontransient_error else 500
            logger.error(f"Inference request(s) failed: {error_detail}")
            return Response(f"Inference request(s) failed: {error_detail}", status=status)

        # --- 5. Format OpenAI Response ---
        choices = []
        total_completion_tokens = 0
        prompt_tokens_counts = []

        request_idx = 0
        for result_item in batch_results:
            result = unwrap_serialized_tensors(result_item)

            prompt_tokens_out = result["prompt_tokens"]  # The engine can modify prompt_tokens.
            text_output = result["generated_text"]
            prompt_tokens_count = len(prompt_tokens_out) if prompt_tokens_out is not None else 0
            prompt_tokens_counts.append(prompt_tokens_count)

            logprobs_content = None
            if sampling_params.return_log_probs:
                token_logprobs = result.get('log_probs', [])

                tokens_to_decode = [[tok] for tok in result["generated_tokens"]]
                tokens = list(map(tokenizer.detokenize, tokens_to_decode))

                # Get top_n_logprobs if available
                generated_top_n_logprobs = result.get('generated_top_n_logprobs')

                logprobs_content = []
                for i, (tok, lp) in enumerate(zip(tokens, token_logprobs)):
                    # Build top_logprobs list for this token position
                    top_logprobs_list = []
                    if generated_top_n_logprobs and i < len(generated_top_n_logprobs):
                        top_n_dict = generated_top_n_logprobs[i]
                        for token_str, logprob in top_n_dict.items():
                            top_logprobs_list.append(
                                {
                                    "token": token_str,
                                    "logprob": logprob,
                                    "bytes": list(token_str.encode("utf-8")),
                                }
                            )

                    logprobs_content.append(
                        {
                            "token": tok,
                            "logprob": lp,
                            "bytes": list(tok.encode("utf-8")),
                            "top_logprobs": top_logprobs_list,
                        }
                    )

            metadata = {}
            message_text = text_output

            if parsers:
                message_text, metadata = apply_parsers(
                    message_text, req.get("tools", None), parsers, tools_requested
                )

            message = {"role": "assistant", "content": message_text}
            if metadata.get("tool_calls", []):
                message["tool_calls"] = metadata["tool_calls"]
            if "reasoning" in metadata:
                message["reasoning_content"] = metadata["reasoning"]

            # Replicate data in the message field for compatibility.
            message["prompt_token_ids"] = result["prompt_tokens"]
            message["generation_token_ids"] = result["generated_tokens"]
            message["generation_log_probs"] = result.get("generated_log_probs", [])
            return_log_probs = sampling_params.return_log_probs

            finish_reason = "tool_calls" if metadata.get("tool_calls", []) else "stop"
            if (
                len(result["generated_tokens"])
                >= result["sampling_params"]["num_tokens_to_generate"]
            ):
                finish_reason = "length"

            choice_data = {
                "index": request_idx,
                "message": message,
                "prompt_token_ids": result["prompt_tokens"],
                "generation_token_ids": result["generated_tokens"],
                "generation_log_probs": result.get("generated_log_probs", []),
                "raw_text": result["prompt"] + result["generated_text"],
                # 'logprobs' in chat API is an object containing 'content'
                # "logprobs": {"content": logprobs_content} if logprobs_content else None,
                "logprobs": {"content": logprobs_content} if return_log_probs else None,
                "finish_reason": finish_reason,
            }
            choice_data["policy_epoch"] = result["policy_epoch"]
            choice_data["kv_cache_epoch"] = result["kv_cache_epoch"]
            choice_data["num_evictions"] = sum(
                1 for e in result["events"] if e.get("type") == "EVICT"
            )
            if current_app.config['verbose']:
                logging.info(result)

            if result["routing_indices"] is not None:
                choice_data["moe_topk_indices"] = result["routing_indices"]
                if prompt_tokens_count:
                    choice_data["prompt_moe_topk_indices"] = result["routing_indices"][
                        :prompt_tokens_count
                    ]

            choices.append(choice_data)
            if choice_data["generation_log_probs"] is None:
                logger.warning(
                    "Generation log probs is None for request:\n%s", json.dumps(result, indent=4)
                )
            total_completion_tokens += len(result["generated_tokens"])
            request_idx += 1

        prompt_token_count = max(prompt_tokens_counts) if prompt_tokens_counts else 0
        response = {
            "id": str(uuid.uuid4()),
            "created": int(time.time()),
            "model": "EMPTY",
            "object": "chat.completion",
            "choices": choices,
            "usage": {
                "prompt_tokens": prompt_token_count,
                "completion_tokens": total_completion_tokens,
                "total_tokens": prompt_token_count + total_completion_tokens,
            },
        }

        if HAVE_ORJSON:
            # Use orjson for faster serialization
            return Response(orjson.dumps(response), mimetype="application/json")
        else:
            return jsonify(response)

except ImportError as e:
    logger.warning(f"Could not import quart: {e}")


================================================
FILE: megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/common.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import threading

import torch

GENERATE_NUM = 0
LOCK = threading.Lock()


def send_do_generate():
    """Broadcasts a message to perform a generation to all tensor parallel ranks."""
    choice = torch.tensor([GENERATE_NUM], dtype=torch.long, device=torch.cuda.current_device())
    torch.distributed.broadcast(choice, 0)


================================================
FILE: megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/completions.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import asyncio
import logging
import time

from megatron.core.inference.inference_request import unwrap_serialized_tensors
from megatron.core.inference.sampling_params import SamplingParams

logger = logging.getLogger(__name__)


try:
    from quart import Blueprint, current_app, jsonify, request

    bp = Blueprint('completions_api', __name__)

    @bp.route('/completions', methods=['POST'])
    @bp.route('/v1/completions', methods=['POST'])
    async def completions():
        """Handles async POST requests for completions."""
        client = current_app.config['client']
        tokenizer = current_app.config['tokenizer']

        req = await request.get_json(force=True)
        if req is None:
            return "Invalid or missing JSON body", 400

        # --- 1. Parse Prompt ---
        prompt_data = req.get("prompt")
        if not prompt_data:
            return "Missing 'prompt' field", 400

        try:
            if isinstance(prompt_data, str):
                prompts_as_tokens = [tokenizer.tokenize(prompt_data)]
                prompts_as_strings = [prompt_data]
            elif isinstance(prompt_data, list):
                if not prompt_data:
                    return "'prompt' list is empty", 400
                if all(isinstance(p, str) for p in prompt_data):
                    prompts_as_tokens = [tokenizer.tokenize(p) for p in prompt_data]
                    prompts_as_strings = prompt_data
                elif all(isinstance(p, int) for p in prompt_data):
                    prompts_as_tokens = [prompt_data]
                    prompts_as_strings = [tokenizer.detokenize(prompt_data)]
                elif all(
                    isinstance(p, list) and all(isinstance(t, int) for t in p) for p in prompt_data
                ):
                    prompts_as_tokens = prompt_data
                    prompts_as_strings = [tokenizer.detokenize(p) for p in prompt_data]
                else:
                    return (
                        (
                            "Invalid 'prompt' format. Must be str, list[str], "
                            "list[int], or list[list[int]]"
                        ),
                        400,
                    )
            else:
                return "Invalid 'prompt' type. Must be str or list", 400
        except Exception as e:
            return f"Error tokenizing prompt: {e}", 500

        # --- 2. Parse Sampling Params ---
        try:
            temperature = float(req.get("temperature", 1.0))
            top_p = float(req.get("top_p", 1.0))
            top_k = int(req.get("top_k", 0))
            echo = bool(req.get("echo", False))

            if temperature == 0.0:
                top_k = 1
                top_p = 0.0

            # Parse logprobs - can be an integer (number of top logprobs to return) or None
            logprobs_param = req.get("logprobs", None)

            if logprobs_param is not None:
                top_n_logprobs = int(logprobs_param)
                return_log_probs = True
            else:
                top_n_logprobs = 0
                return_log_probs = False

            # When echo=True and logprobs are requested, we need prompt logprobs
            # skip_prompt_log_probs=False ensures the engine computes logprobs for prompt tokens
            skip_prompt_log_probs = not (echo and return_log_probs)

            # Parse stop sequences
            stop = req.get("stop", None)
            if isinstance(stop, str):
                stop = [stop]

            sampling_params = SamplingParams(
                temperature=temperature,
                top_k=top_k,
                top_p=top_p,
                return_log_probs=return_log_probs,
                top_n_logprobs=top_n_logprobs,
                skip_prompt_log_probs=skip_prompt_log_probs,
                num_tokens_to_generate=int(req.get("max_tokens", 16)),
                stop_words=stop,
            )
        except ValueError as e:
            return f"Invalid sampling parameter: {e}", 400

        # --- 3. Send Requests to Engine ---
        tasks = []
        for prompt_tokens in prompts_as_tokens:
            per_req_params = SamplingParams(
                temperature=sampling_params.temperature,
                top_k=sampling_params.top_k,
                top_p=sampling_params.top_p,
                return_log_probs=sampling_params.return_log_probs,
                top_n_logprobs=sampling_params.top_n_logprobs,
                skip_prompt_log_probs=sampling_params.skip_prompt_log_probs,
                num_tokens_to_generate=sampling_params.num_tokens_to_generate,
                stop_words=sampling_params.stop_words,
            )
            tasks.append(client.add_request(prompt_tokens, per_req_params))

        if current_app.config['verbose']:
            start_time = time.perf_counter()

        try:
            batch_results = await asyncio.gather(*tasks)
        except Exception as e:
            return f"Error during inference: {e}", 500

        if current_app.config['verbose']:
            logging.info(
                f"Batch of {len(tasks)} requests processed in "
                f"{time.perf_counter() - start_time:.2f}s"
            )

        # --- 4. Check for failed requests ---
        failed_errors = []
        has_nontransient_error = False
        for i, record in enumerate(batch_results):
            if record.get("status") == "FAILED":
                events = record.get("events", [])
                error_events = [
                    e for e in events if e.get("type") in ("ERROR_NONTRANSIENT", "ERROR_TRANSIENT")
                ]
                if any(e.get("type") == "ERROR_NONTRANSIENT" for e in error_events):
                    has_nontransient_error = True
                error_msg = (
                    str(error_events[-1].get("payload", "Unknown error"))
                    if error_events
                    else "Unknown error"
                )
                failed_errors.append(f"Request {i}: {error_msg}")

        if failed_errors:
            error_detail = "; ".join(failed_errors)
            status = 400 if has_nontransient_error else 500
            logger.error(f"Inference request(s) failed: {error_detail}")
            return f"Inference request(s) failed: {error_detail}", status

        # --- 5. Format Response (matching old_completions.py) ---
        choices = []

        request_idx = 0
        for completed_request in batch_results:
            result = unwrap_serialized_tensors(completed_request)
            full_text = result["generated_text"] or ""
            text_output = (prompts_as_strings[request_idx] + full_text) if echo else full_text

            logprobs_data = None
            if sampling_params.return_log_probs:
                # Get prompt tokens and logprobs
                prompt_tokens_list = result["prompt_tokens"] or []

                prompt_log_probs = result.get('prompt_log_probs') or []
                prompt_top_n_logprobs = result.get('prompt_top_n_logprobs') or []

                # Get generated tokens and logprobs
                generated_tokens_list = result["generated_tokens"] or []
                generated_log_probs = result.get('generated_log_probs') or []
                generated_top_n_logprobs = result.get('generated_top_n_logprobs') or []

                if echo:
                    # When echo=True, include prompt tokens and their logprobs
                    # Prompt logprobs are for tokens [1:] (first token has no logprob)
                    all_token_ids = prompt_tokens_list + generated_tokens_list
                    tokens = [tokenizer.detokenize([tok]) for tok in all_token_ids]

                    # Build token_logprobs: [None] for first token, then prompt logprobs,
                    # then generated logprobs
                    token_logprobs = [None] + list(prompt_log_probs) + list(generated_log_probs)

                    # Build top_logprobs: [None] for first token, then prompt top_n,
                    # then generated top_n
                    top_logprobs = None
                    if prompt_top_n_logprobs or generated_top_n_logprobs:
                        top_logprobs = (
                            [None] + list(prompt_top_n_logprobs) + list(generated_top_n_logprobs)
                        )

                    # Calculate text_offset: cumulative character positions starting from 0
                    text_offset = []
                    current_offset = 0
                    for tok_str in tokens:
                        text_offset.append(current_offset)
                        current_offset += len(tok_str)
                else:
                    # When echo=False, only return generated tokens and their logprobs
                    tokens = [tokenizer.detokenize([tok]) for tok in generated_tokens_list]

                    # Prepend [None] to match OpenAI format
                    token_logprobs = [None] + list(generated_log_probs)

                    # Build top_logprobs
                    top_logprobs = None
                    if generated_top_n_logprobs:
                        top_logprobs = [None] + list(generated_top_n_logprobs)

                    # Calculate text_offset for generated tokens only
                    text_offset = []
                    current_offset = 0
                    for tok_str in tokens:
                        text_offset.append(current_offset)
                        current_offset += len(tok_str)

                logprobs_data = {
                    "token_logprobs": token_logprobs,
                    "tokens": tokens,
                    "text_offset": text_offset,
                    "top_logprobs": top_logprobs,
                }

            choices.append({"index": request_idx, "text": text_output, "logprobs": logprobs_data})
            if result["routing_indices"] is not None:
                choices[-1]["moe_topk_indices"] = result["routing_indices"]
                prompt_length = (
                    len(result["prompt_tokens"]) if result["prompt_tokens"] is not None else 0
                )
                if prompt_length:
                    choices[-1]["prompt_moe_topk_indices"] = result["routing_indices"][
                        :prompt_length
                    ]

            request_idx += 1

        return jsonify({"choices": choices})

except ImportError as e:
    logger.warning(f"Could not import quart: {e}")


================================================
FILE: megatron/core/inference/text_generation_server/dynamic_text_gen_server/endpoints/health.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import logging

logger = logging.getLogger(__name__)

try:
    from quart import Blueprint, current_app, jsonify

    bp = Blueprint('health_api', __name__)

    @bp.route('/health', methods=['GET'])
    @bp.route('/v1/health', methods=['GET'])
    async def health():
        """
        Handles GET requests for service health.
        Checks if the inference client is initialized and reachable.
        """
        status_response = {"status": "ok", "service": "Megatron Inference Server", "ready": False}

        try:
            client = current_app.config.get('client')

            if client is not None:
                status_response["ready"] = True
                return jsonify(status_response), 200
            else:
                logger.error("Health check failed: Client not found in app config.")
                status_response["status"] = "error"
                status_response["details"] = "Inference client not initialized"
                return jsonify(status_response), 503

        except Exception as e:
            logger.error(f"Health check failed with exception: {e}")
            return jsonify({"status": "error", "details": str(e)}), 500

except ImportError as e:
    logger.warning(f"Could not import quart: {e}")


================================================
FILE: megatron/core/inference/text_generation_server/dynamic_text_gen_server/text_generation_server.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import asyncio
import logging
import multiprocessing as mp
import socket
from contextlib import contextmanager
from typing import List, Optional

try:
    from hypercorn.asyncio import serve
    from hypercorn.config import Config
    from quart import Quart

    HAS_BACKEND = True
except ImportError as e:
    HAS_BACKEND = False

import megatron.core.inference.text_generation_server.dynamic_text_gen_server.endpoints as endpoints
from megatron.core.inference.inference_client import InferenceClient
from megatron.core.utils import trace_async_exceptions

logger = logging.getLogger(__name__)

# Global reference to manage the background server processes
_SERVER_PROCESSES: List[mp.Process] = []
_SHARED_SOCKET = None


@contextmanager
def temp_log_level(level, logger=None):
    """Enables temporarily overriding the logging level."""
    logger = logger or logging.getLogger()
    old_level = logger.level
    logger.setLevel(level)
    try:
        yield
    finally:
        logger.setLevel(old_level)


@trace_async_exceptions
async def _run_text_gen_server(
    coordinator_addr: str,
    tokenizer,
    rank: int,
    server_port: int,
    parsers: Optional[List[str]] = None,
    verbose: bool = False,
    fd: Optional[int] = None,
):
    """
    Initializes and runs the async web server. Automatically starts and
    manages its own InferenceClient connected to the provided coordinator address.
    """
    if not HAS_BACKEND:
        raise RuntimeError(f"Web backend framework (Quart) not available")

    # Create and start the client locally inside this process
    inference_client = InferenceClient(coordinator_addr, deserialize=False)
    inference_client.start()
    logger.info(f"Rank {rank}: InferenceClient connected.")

    try:
        try:
            hostname = socket.gethostname()
        except Exception as e:
            logger.warning(f"Could not get hostname: {e}")
            hostname = "0.0.0.0"

        app = Quart(__name__)

        # Quart native way to handle max body size (1 GB; needed for large prompts)
        app.config['MAX_CONTENT_LENGTH'] = 2**30

        # Store client and tokenizer in app config for Blueprints to use
        app.config['client'] = inference_client
        app.config['tokenizer'] = tokenizer
        app.config['parsers'] = parsers
        app.config['verbose'] = verbose

        # Register all blueprints from the 'endpoints' package
        for endpoint in endpoints.__all__:
            app.register_blueprint(endpoint)

        config = Config()
        config.keep_alive_timeout = 30.0  # Keep connection alive between long-running requests.
        config.backlog = 2**14  # Expect high load; ensure we do not drop connections.
        config.h2_max_concurrent_streams = (
            2**14
        )  # Allow many concurrent streams for HTTP/2 clients.

        if fd is not None:
            config.bind = [f"fd://{fd}"]
        else:
            config.bind = [f"0.0.0.0:{server_port}"]

        with temp_log_level(logging.INFO, logger):
            logger.info(f"Starting text generation server on http://{hostname}:{server_port}")
            logger.info(f"Using tokenizer: {type(tokenizer)}")
            logger.info(f"Using parsers: {parsers}")

        # Quart is natively ASGI, so we can serve the app directly
        await serve(app, config)

    finally:
        # Gracefully shut down the client when the server stops
        inference_client.stop()
        logger.info(f"Rank {rank}: Web server and client shut down.")


def _server_process_worker(
    coordinator_addr: str,
    tokenizer,
    rank: int,
    server_port: int,
    parsers: Optional[List[str]] = None,
    verbose: bool = False,
    fd: Optional[int] = None,
):
    """Synchronous worker function that sets up a new event loop for the separate process."""
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    try:
        loop.run_until_complete(
            _run_text_gen_server(
                coordinator_addr, tokenizer, rank, server_port, parsers, verbose, fd
            )
        )
    except KeyboardInterrupt:
        logger.info(f"Rank {rank}: text gen server process interrupted.")
    finally:
        pending = asyncio.all_tasks(loop)
        for task in pending:
            task.cancel()
        if pending:
            loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
        loop.close()


def start_text_gen_server(
    coordinator_addr: str,
    tokenizer,
    rank: int,
    server_port: int,
    parsers: Optional[List[str]] = None,
    verbose: bool = False,
    num_replicas: int = 4,
):
    """Start the text generation server."""
    global _SERVER_PROCESSES
    global _SHARED_SOCKET

    if _SERVER_PROCESSES:
        logger.warning("Text gen server processes are already running.")
        return

    _SHARED_SOCKET = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    _SHARED_SOCKET.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)

    if hasattr(socket, 'SO_REUSEPORT'):
        try:
            _SHARED_SOCKET.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
        except OSError:
            pass

    _SHARED_SOCKET.bind(("0.0.0.0", server_port))
    _SHARED_SOCKET.setblocking(False)

    _SHARED_SOCKET.set_inheritable(True)
    fd = _SHARED_SOCKET.fileno()

    for i in range(num_replicas):
        p = mp.Process(
            target=_server_process_worker,
            args=(coordinator_addr, tokenizer, rank, server_port, parsers, verbose, fd),
            daemon=True,
        )
        p.start()
        _SERVER_PROCESSES.append(p)
        logger.info(f"Started text gen frontend replica {i+1}/{num_replicas} (PID: {p.pid})")


def stop_text_gen_server():
    """Stop the text generation server."""
    global _SERVER_PROCESSES
    global _SHARED_SOCKET

    if not _SERVER_PROCESSES:
        return

    logger.info(f"Terminating {len(_SERVER_PROCESSES)} Text Gen frontend processes...")

    for p in _SERVER_PROCESSES:
        if p.is_alive():
            p.terminate()

    for p in _SERVER_PROCESSES:
        p.join(timeout=3)
        if p.is_alive():
            p.kill()
            p.join()

    # Clean up the master socket
    if _SHARED_SOCKET is not None:
        _SHARED_SOCKET.close()
        _SHARED_SOCKET = None

    _SERVER_PROCESSES = []
    logger.info("All text gen frontend processes terminated.")


================================================
FILE: megatron/core/inference/text_generation_server/dynamic_text_gen_server/tokenization.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""Tokenization utilities."""


import torch

from megatron.core import parallel_state
from megatron.core.inference.communication_utils import broadcast_int_list, broadcast_tensor


def tokenize_prompts(
    tokenizer, prompts=None, tokens_to_generate=None, add_BOS=None, rank=0, data_parallel=False
):
    """Tokenize prompts and make them avaiable on all ranks.

    Args:
        data_parallel (bool): Broadcast tokens across a single data parallel model replica.
    """

    # On all ranks set to None so we can pass them to functions
    sizes_list = None
    prompts_tokens_cuda_long_tensor = None
    prompts_length_cuda_long_tensor = None

    # On the specified rank, build the above.
    src_rank = torch.distributed.get_rank()
    if data_parallel:
        src_rank = parallel_state.get_data_parallel_src_rank()

    if src_rank == rank:
        assert prompts is not None
        assert tokens_to_generate is not None
        # Tensor of tokens padded and their unpadded length.
        prompts_tokens_cuda_long_tensor, prompts_length_cuda_long_tensor = (
            _tokenize_prompts_and_batch(tokenizer, prompts, tokens_to_generate, add_BOS)
        )
        # We need the sizes of these tensors for the boradcast
        sizes_list = [
            prompts_tokens_cuda_long_tensor.size(0),  # Batch size
            prompts_tokens_cuda_long_tensor.size(1),
        ]  # Sequence lenght

    # First, broadcast the sizes.
    sizes_tensor = broadcast_int_list(
        2, int_list=sizes_list, rank=rank, data_parallel=data_parallel
    )

    # Now that we have the sizes, we can boradcast the tokens
    # and length tensors.
    sizes = sizes_tensor.tolist()
    prompts_tokens_cuda_long_tensor = broadcast_tensor(
        sizes,
        torch.int64,
        tensor=prompts_tokens_cuda_long_tensor,
        rank=rank,
        data_parallel=data_parallel,
    )
    prompts_length_cuda_long_tensor = broadcast_tensor(
        sizes[0],
        torch.int64,
        tensor=prompts_length_cuda_long_tensor,
        rank=rank,
        data_parallel=data_parallel,
    )

    return prompts_tokens_cuda_long_tensor, prompts_length_cuda_long_tensor


def _tokenize_prompts_and_batch(tokenizer, prompts, tokens_to_generate, add_BOS):
    """Given a set of prompts and number of tokens to generate:
    - tokenize prompts
    - set the sequence length to be the max of length of prompts
      plus the number of tokens we would like to generate
    - pad all the sequences to this length so we can convert them
      into a 2D tensor.
    """

    # Tokenize all the prompts.
    if hasattr(tokenizer, 'eod'):
        eod_token = tokenizer.eod
    elif hasattr(tokenizer, 'eos_id'):
        eod_token = tokenizer.eos_id
    else:
        raise AttributeError('No eod token found in Tokenizer')
    if add_BOS:
        prompts_tokens = [[eod_token] + tokenizer.tokenize(prompt) for prompt in prompts]
    else:
        prompts_tokens = [tokenizer.tokenize(prompt) for prompt in prompts]

    # Now we have a list of list of tokens which each list has a different
    # size. We want to extend this list to:
    #   - incorporate the tokens that need to be generated
    #   - make all the sequences equal length.
    # Get the prompts length.
    prompts_length = [len(prompt_tokens) for prompt_tokens in prompts_tokens]
    # Get the max prompts length.
    max_prompt_len = max(prompts_length)
    # Number of tokens in the each sample of the batch.
    samples_length = max_prompt_len + tokens_to_generate
    # Now update the list of list to be of the same size: samples_length.
    for prompt_tokens, prompt_length in zip(prompts_tokens, prompts_length):
        padding_size = samples_length - prompt_length
        prompt_tokens.extend([eod_token] * padding_size)

    # Now we are in a structured format, we can convert to tensors.
    prompts_tokens_tensor = torch.tensor(prompts_tokens, dtype=torch.long, device='cuda')
    prompts_length_tensor = torch.tensor(prompts_length, dtype=torch.long, device='cuda')

    return prompts_tokens_tensor, prompts_length_tensor


================================================
FILE: megatron/core/inference/text_generation_server/endpoints/common.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import threading

import torch

GENERATE_NUM = 0
LOCK = threading.Lock()


def send_do_generate():
    """Broadcasts a message to perform a generation to all tensor parallel ranks."""
    choice = torch.tensor([GENERATE_NUM], dtype=torch.long, device=torch.cuda.current_device())
    torch.distributed.broadcast(choice, 0)


================================================
FILE: megatron/core/inference/text_generation_server/endpoints/completions.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""This endpoint is for mimicking the OpenAI completions API.
See https://platform.openai.com/docs/api-reference/completions/create
"""

import numpy as np
import torch

try:
    from flask import jsonify, request
    from flask_restful import Resource

    HAVE_FLASK = True
except ImportError:
    Resource = object

    HAVE_FLASK = False

from megatron.core.inference.text_generation_server.endpoints.common import LOCK, send_do_generate
from megatron.core.inference.text_generation_server.run_mcore_engine import run_mcore_engine


def detokenize(prompt, tok) -> list[str]:
    """Detokenizes the given prompt."""
    if isinstance(prompt, str):
        return [prompt]
    elif isinstance(prompt, list):
        if not prompt:  # The list is empty, can't determine its intended type.
            raise ValueError(f"prompt contains no items: {prompt}")
        if all(isinstance(item, str) for item in prompt):
            return prompt
        elif all(isinstance(item, int) for item in prompt):
            return [tok.detokenize(prompt[0])]
        elif all(  # list[list[int]]
            isinstance(item, list) and all(isinstance(subitem, int) for subitem in item)
            for item in prompt
        ):
            return [tok.detokenize(item) for item in prompt]
        else:
            raise ValueError(f"Unknown prompt type: {type(prompt)}")
    else:
        raise ValueError(f"Unknown prompt type: {type(prompt)}")


class MegatronCompletions(Resource):
    """Completions endpoint."""

    def __init__(self, engine, args):
        self.engine = engine
        self.args = args

    def post(self):
        """Handles a POST request."""
        assert HAVE_FLASK

        req = request.get_json()
        tokenizer = self.engine.controller.tokenizer
        prompts = detokenize(req["prompt"], tokenizer)

        # convert the openai-local-completions api to the format
        # expected by the generate_and_post_process function
        local_kwargs = {
            "prompts": prompts,
            "tokens_to_generate": int(req["max_tokens"]),
            "temperature": float(req.get("temperature", 1.0)),
            "top_k_sampling": int(req.get("top_k", 0)),
            "top_p_sampling": float(req.get("top_p", 1.0)),
            "return_topk_logprobs": int(req.get("logprobs", 0)),
            "echo": bool(req.get("echo", False)),
            "random_seed": int(req.get("seed", -1)),
            "best_of": int(req.get("best_of", 1)),
            "num_completions": int(req.get("n", 1)),
            "stop": req.get("stop", [tokenizer.detokenize([tokenizer.eod])]),
            "return_output_log_probs": True,
        }

        if isinstance(local_kwargs["stop"], str):
            local_kwargs["stop"] = [local_kwargs["stop"]]

        if local_kwargs["temperature"] == 0:
            # temperature = 0 is openai api's way of specifying greedy
            # deterministic sampling but actually passing temperature=0
            # is undefined and leads to div by zero, so set top-k = 1
            local_kwargs["top_k_sampling"] = 1
            local_kwargs["top_p_sampling"] = 0

        echo = local_kwargs.pop("echo")
        if not echo and local_kwargs["tokens_to_generate"] == 0:
            return "echo=False not supported when tokens_to_generate == 0", 400

        if local_kwargs.pop("best_of") > 1:
            return "best_of > 1 not supported", 400

        if local_kwargs.pop("num_completions") > 1:
            return "num_completions > 1 not supported", 400

        if local_kwargs["tokens_to_generate"] > 0 and local_kwargs["return_topk_logprobs"] > 0:
            return "cannot return top-k unless tokens_to_generate=0 at this time", 400

        if local_kwargs["return_topk_logprobs"] > 10:
            return "return_topk_logprobs > 10 not supported", 400

        stop_until = local_kwargs.pop("stop")

        with LOCK:
            send_do_generate()

            temperature = local_kwargs["temperature"]
            top_k = local_kwargs["top_k_sampling"]
            top_p = local_kwargs["top_p_sampling"]
            tokens_to_generate = local_kwargs["tokens_to_generate"]
            logprobs = local_kwargs["return_output_log_probs"]
            top_n_logprobs = local_kwargs["return_topk_logprobs"]
            random_seed = local_kwargs["random_seed"]
            response_dict = run_mcore_engine(
                self.engine,
                prompts,
                temperature,
                top_k,
                top_p,
                logprobs,
                tokens_to_generate,
                top_n_logprobs=top_n_logprobs,
                random_seed=random_seed,
            )
            result = [
                response_dict["text"],
                response_dict["segments"],
                response_dict.get("logprobs", None),
                response_dict["tokens"],
            ]
            result.append(response_dict.get("top_n_logprobs", None))

        prompts_plus_generations, prompts_plus_generations_segments = result[:2]
        output_log_probs, tokens = result[2:4]
        logprobs_topk = result[4]

        if top_n_logprobs > 0:
            assert logprobs_topk is not None

        if "debug_fname" in req:
            torch.save(
                {
                    "args": local_kwargs,
                    "tokenizer": tokenizer,
                    "prompts_plus_generations": prompts_plus_generations,
                    "prompts_plus_generations_segments": prompts_plus_generations_segments,
                    "output_log_probs": output_log_probs,
                    "tokens": tokens,
                    "logprobs_topk": logprobs_topk,
                },
                f"completions_result_{req['debug_fname']}.pt",
            )

        batch_size = len(tokens)

        results = []
        for batch_idx, (prompt_plus_generation, prompt) in enumerate(
            zip(prompts_plus_generations, prompts)
        ):
            tok_offsets = tokenizer.offsets(tokens[batch_idx], prompt_plus_generation)
            if echo:
                str_trunc_start_idx, tok_idx_start = 0, 0
            else:
                str_trunc_start_idx = len(prompt)
                tok_idx_start = np.searchsorted(tok_offsets, len(prompt))

            # truncate the generation at the first stop token
            trunc_idxs = [
                prompt_plus_generation.find(suffix, str_trunc_start_idx)
                for suffix in stop_until
                if suffix and suffix in prompt_plus_generation
            ]
            str_trunc_end_idx = min(
                filter(lambda x: x != -1, trunc_idxs), default=len(prompt_plus_generation)
            )
            truncated_generation = prompt_plus_generation[str_trunc_start_idx:str_trunc_end_idx]

            # TODO(sasatheesh): handle cases where truncated_generation is not a full token
            tok_idx_end = np.searchsorted(tok_offsets, len(truncated_generation))

            truncated_generation_logprobs = output_log_probs[batch_idx][tok_idx_start:tok_idx_end]
            truncated_generation_tokens = tokens[batch_idx][tok_idx_start:tok_idx_end]
            truncated_generation_topk_logprobs = (
                logprobs_topk[batch_idx][tok_idx_start:tok_idx_end]
                if logprobs_topk is not None
                else None
            )

            truncated_generation_tok_offsets = tok_offsets[tok_idx_start:tok_idx_end]

            results.append(
                {
                    "index": batch_idx,
                    "text": truncated_generation,
                    "logprobs": {
                        "token_logprobs": [None] + truncated_generation_logprobs,
                        "tokens": [
                            tokenizer.detokenize([tk]) for tk in truncated_generation_tokens
                        ],
                        "text_offset": truncated_generation_tok_offsets,
                        "top_logprobs": (
                            [None] + truncated_generation_topk_logprobs
                            if truncated_generation_topk_logprobs is not None
                            else None
                        ),
                    },
                }
            )

        return jsonify({"choices": results})


================================================
FILE: megatron/core/inference/text_generation_server/run_mcore_engine.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import inspect

from megatron.core import mpu
from megatron.core.inference.communication_utils import broadcast_float_list
from megatron.core.inference.inference_request import InferenceRequest
from megatron.core.inference.sampling_params import SamplingParams
from megatron.core.inference.text_generation_server.tokenization import tokenize_prompts


def run_mcore_engine(
    engine,
    prompts=None,
    temperature=1.0,
    top_k=0,
    top_p=0.0,
    logprobs=True,
    tokens_to_generate=0,
    top_n_logprobs=0,
    random_seed=-1,
):
    """Server-compatible version of the MCore Engine, used in
    tools/run_text_generation_server.py."""

    values = [tokens_to_generate, logprobs, top_k, top_p, temperature, top_n_logprobs, random_seed]
    values_float_tensor = broadcast_float_list(len(values), float_list=values, data_parallel=False)
    tokens_to_generate = int(values_float_tensor[0].item())
    return_output_log_probs = bool(values_float_tensor[1].item())
    top_k = int(values_float_tensor[2].item())
    top_p = values_float_tensor[3].item()
    temperature = values_float_tensor[4].item()
    top_n_logprobs = int(values_float_tensor[5].item())
    random_seed = int(values_float_tensor[6].item())

    if random_seed > 0:
        engine.controller.sampling_rng.manual_seed(random_seed)

    sampling_params = SamplingParams(
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        return_segments=True,
        return_log_probs=return_output_log_probs,
        num_tokens_to_generate=tokens_to_generate,
        top_n_logprobs=top_n_logprobs,
        skip_prompt_log_probs=False,
    )

    tokenizer = engine.controller.tokenizer
    context_tokens_tensor, context_length_tensor = tokenize_prompts(
        tokenizer=tokenizer,
        prompts=prompts,
        tokens_to_generate=tokens_to_generate,
        add_BOS=False,
        data_parallel=False,
    )

    tokenized_prompts = []
    for p, l in zip(context_tokens_tensor, context_length_tensor):
        tokenized_prompts.append(p[:l].cpu().numpy().tolist())

    # detect if detokenize supports skip_special_tokens or **kwargs
    sig_params = inspect.signature(tokenizer.detokenize).parameters.values()
    accepts_skip = any(
        p.name == "skip_special_tokens" or p.kind == inspect.Parameter.VAR_KEYWORD
        for p in sig_params
    )

    # Detokenize prompts into strings to pass through the engine
    detokenized_prompts = [
        (
            tokenizer.detokenize(p, skip_special_tokens=True)
            if accepts_skip
            else tokenizer.detokenize(p)
        )
        for p in tokenized_prompts
    ]

    requests = []
    for i in range(len(tokenized_prompts)):
        req = InferenceRequest(
            prompt=detokenized_prompts[i],
            prompt_tokens=tokenized_prompts[i],
            sampling_params=sampling_params,
            request_id=engine.get_new_request_id(),
        )
        requests.append(req)

    result = engine.generate(inference_requests=requests)

    # Only post-process on first stage.
    if mpu.is_pipeline_first_stage():
        response_dict = {
            "text": [x.prompt + x.generated_text for x in result],
            "tokens": [x.prompt_tokens + x.generated_tokens.tolist() for x in result],
        }
        if sampling_params.return_log_probs:
            response_logprobs = [x.prompt_log_probs + x.generated_log_probs for x in result]
            response_dict["logprobs"] = response_logprobs
        if sampling_params.return_segments:
            response_dict["segments"] = [x.segments for x in result]
        if sampling_params.top_n_logprobs > 0:
            # TODO(ksanthanam): Support enabling `skip_prompt_log_probs`
            assert (
                sampling_params.return_prompt_top_n_logprobs
            ), "skip_prompt_log_probs must be False"
            response_dict["top_n_logprobs"] = [
                x.prompt_top_n_logprobs + x.generated_top_n_logprobs for x in result
            ]

        return response_dict
    return None


================================================
FILE: megatron/core/inference/text_generation_server/text_generation_server.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import datetime
import json
import logging
import os
import sys

try:
    from flask import Flask, jsonify, request
    from flask_restful import Api, Resource

    HAVE_FLASK = True
except ImportError as e:
    Resource = object

    HAVE_FLASK = False

from megatron.core.inference.text_generation_server.endpoints.common import LOCK, send_do_generate
from megatron.core.inference.text_generation_server.endpoints.completions import MegatronCompletions
from megatron.core.inference.text_generation_server.run_mcore_engine import run_mcore_engine

sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir))
)


class MegatronGenerate(Resource):
    """Text generation endpoint."""

    def __init__(self, engine, args):
        self.engine = engine
        self.args = args
        self.verbose = getattr(args, 'inference_flask_server_logging', False)

    def put(self):
        """Handle generation request."""
        if not "prompts" in request.get_json():
            return "prompts argument required", 400

        if "max_len" in request.get_json():
            return "max_len is no longer used.  Replace with tokens_to_generate", 400

        if "sentences" in request.get_json():
            return "sentences is no longer used.  Replace with prompts", 400

        if "beam_width" in request.get_json():
            return "Beam search is no longer supported.", 400

        prompts = request.get_json()["prompts"]
        if not isinstance(prompts, list):
            return "prompts is not a list of strings", 400

        if len(prompts) == 0:
            return "prompts is empty", 400

        if len(prompts) > 128:
            return "Maximum number of prompts is 128", 400

        tokens_to_generate = 64  # Choosing hopefully sane default.  Full sequence is slow
        if "tokens_to_generate" in request.get_json():
            tokens_to_generate = request.get_json()["tokens_to_generate"]
            if not isinstance(tokens_to_generate, int):
                return "tokens_to_generate must be an integer greater than 0"
            if tokens_to_generate < 0:
                return "tokens_to_generate must be an integer greater than or equal to 0"

        logprobs = False
        if "logprobs" in request.get_json():
            logprobs = request.get_json()["logprobs"]
            if not isinstance(logprobs, bool):
                return "logprobs must be a boolean value"

        if tokens_to_generate == 0 and not logprobs:
            return "tokens_to_generate=0 implies logprobs should be True"

        temperature = 1.0
        if "temperature" in request.get_json():
            temperature = request.get_json()["temperature"]
            if not (isinstance(temperature, (int, float))):
                return "temperature must be a positive number less than or equal to 100.0"
            if not (0.0 < temperature <= 100.0):
                return "temperature must be a positive number less than or equal to 100.0"

        top_k = 0
        if "top_k" in request.get_json():
            top_k = request.get_json()["top_k"]
            if not (isinstance(top_k, int)):
                return (
                    "top_k must be an integer equal to or greater than 0 "
                    "and less than or equal to 1000"
                )
            if not (0 <= top_k <= 1000):
                return "top_k must be equal to or greater than 0 and less than or equal to 1000"

        top_p = 0.0
        if "top_p" in request.get_json():
            top_p = request.get_json()["top_p"]
            if not (isinstance(top_p, float)):
                return "top_p must be a positive float less than or equal to 1.0"
            if top_p > 0.0 and top_k > 0.0:
                return "cannot set both top-k and top-p samplings."
            if not (0 <= top_p <= 1.0):
                return "top_p must be less than or equal to 1.0"

        top_p_decay = 0.0
        if "top_p_decay" in request.get_json():
            top_p_decay = request.get_json()["top_p_decay"]
            if not (isinstance(top_p_decay, float)):
                return "top_p_decay must be a positive float less than or equal to 1.0"
            if top_p == 0.0:
                return "top_p_decay cannot be set without top_p"
            if not (0 <= top_p_decay <= 1.0):
                return "top_p_decay must be less than or equal to 1.0"

        top_p_bound = 0.0
        if "top_p_bound" in request.get_json():
            top_p_bound = request.get_json()["top_p_bound"]
            if not (isinstance(top_p_bound, float)):
                return "top_p_bound must be a positive float less than or equal to top_p"
            if top_p == 0.0:
                return "top_p_bound cannot be set without top_p"
            if not (0.0 < top_p_bound <= top_p):
                return "top_p_bound must be greater than 0 and less than top_p"

        add_BOS = False
        if "add_BOS" in request.get_json():
            add_BOS = request.get_json()["add_BOS"]
            if not isinstance(add_BOS, bool):
                return "add_BOS must be a boolean value"

        if any([len(prompt) == 0 for prompt in prompts]) and not add_BOS:
            return "Empty prompts require add_BOS=true"

        stop_on_double_eol = False
        if "stop_on_double_eol" in request.get_json():
            stop_on_double_eol = request.get_json()["stop_on_double_eol"]
            if not isinstance(stop_on_double_eol, bool):
                return "stop_on_double_eol must be a boolean value"

        stop_on_eol = False
        if "stop_on_eol" in request.get_json():
            stop_on_eol = request.get_json()["stop_on_eol"]
            if not isinstance(stop_on_eol, bool):
                return "stop_on_eol must be a boolean value"

        prevent_newline_after_colon = False
        if "prevent_newline_after_colon" in request.get_json():
            prevent_newline_after_colon = request.get_json()["prevent_newline_after_colon"]
            if not isinstance(prevent_newline_after_colon, bool):
                return "prevent_newline_after_colon must be a boolean value"

        random_seed = -1
        if "random_seed" in request.get_json():
            random_seed = request.get_json()["random_seed"]
            if not isinstance(random_seed, int):
                return "random_seed must be integer"
            if random_seed < 0:
                return "random_seed must be a positive integer"

        stop_token = 50256
        if "stop_token" in request.get_json():
            stop_token = request.get_json()["stop_token"]
            if not isinstance(stop_token, int):
                return "stop_token must be an integer"

        length_penalty = 1
        if "length_penalty" in request.get_json():
            length_penalty = request.get_json()["length_penalty"]
            if not isinstance(length_penalty, float):
                return "length_penalty must be a float"

        with LOCK:  # Need to get lock to keep multiple threads from hitting code

            if self.verbose:
                logging.info(f"request IP: {str(request.remote_addr)}")
                logging.info(json.dumps(request.get_json()))
                logging.info(f"start time: {datetime.datetime.now()}")

            try:
                send_do_generate()  # Tell other ranks we're doing generate

                response_dict = run_mcore_engine(
                    self.engine, prompts, temperature, top_k, top_p, logprobs, tokens_to_generate
                )

                return jsonify(response_dict)

            except ValueError as ve:
                return ve.args[0]


class MegatronServer(object):
    """Megatron text generation server."""

    def __init__(self, model, args=None):
        if not HAVE_FLASK:
            raise RuntimeError(f"`flask` and/or `flask_restful` are not installed.")

        self.app = Flask(__name__, static_url_path='')
        api = Api(self.app)
        api.add_resource(MegatronGenerate, '/api', resource_class_args=[model, args])
        api.add_resource(MegatronCompletions, '/completions', resource_class_args=[model, args])

    def run(self, url, port):
        """Run the server."""
        self.app.run(url, threaded=True, debug=False, port=port)


================================================
FILE: megatron/core/inference/text_generation_server/tokenization.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""Tokenization utilities."""


import torch

from megatron.core import parallel_state
from megatron.core.inference.communication_utils import broadcast_int_list, broadcast_tensor


def tokenize_prompts(
    tokenizer, prompts=None, tokens_to_generate=None, add_BOS=None, rank=0, data_parallel=False
):
    """Tokenize prompts and make them avaiable on all ranks.

    Args:
        data_parallel (bool): Broadcast tokens across a single data parallel model replica.
    """

    # On all ranks set to None so we can pass them to functions
    sizes_list = None
    prompts_tokens_cuda_long_tensor = None
    prompts_length_cuda_long_tensor = None

    # On the specified rank, build the above.
    src_rank = torch.distributed.get_rank()
    if data_parallel:
        src_rank = parallel_state.get_data_parallel_src_rank()

    if src_rank == rank:
        assert prompts is not None
        assert tokens_to_generate is not None
        # Tensor of tokens padded and their unpadded length.
        prompts_tokens_cuda_long_tensor, prompts_length_cuda_long_tensor = (
            _tokenize_prompts_and_batch(tokenizer, prompts, tokens_to_generate, add_BOS)
        )
        # We need the sizes of these tensors for the boradcast
        sizes_list = [
            prompts_tokens_cuda_long_tensor.size(0),  # Batch size
            prompts_tokens_cuda_long_tensor.size(1),
        ]  # Sequence lenght

    # First, broadcast the sizes.
    sizes_tensor = broadcast_int_list(
        2, int_list=sizes_list, rank=rank, data_parallel=data_parallel
    )

    # Now that we have the sizes, we can boradcast the tokens
    # and length tensors.
    sizes = sizes_tensor.tolist()
    prompts_tokens_cuda_long_tensor = broadcast_tensor(
        sizes,
        torch.int64,
        tensor=prompts_tokens_cuda_long_tensor,
        rank=rank,
        data_parallel=data_parallel,
    )
    prompts_length_cuda_long_tensor = broadcast_tensor(
        sizes[0],
        torch.int64,
        tensor=prompts_length_cuda_long_tensor,
        rank=rank,
        data_parallel=data_parallel,
    )

    return prompts_tokens_cuda_long_tensor, prompts_length_cuda_long_tensor


def _tokenize_prompts_and_batch(tokenizer, prompts, tokens_to_generate, add_BOS):
    """Given a set of prompts and number of tokens to generate:
    - tokenize prompts
    - set the sequence length to be the max of length of prompts
      plus the number of tokens we would like to generate
    - pad all the sequences to this length so we can convert them
      into a 2D tensor.
    """

    # Tokenize all the prompts.
    if hasattr(tokenizer, 'eod'):
        eod_token = tokenizer.eod
    elif hasattr(tokenizer, 'eos_id'):
        eod_token = tokenizer.eos_id
    else:
        raise AttributeError('No eod token found in Tokenizer')
    if add_BOS:
        prompts_tokens = [[eod_token] + tokenizer.tokenize(prompt) for prompt in prompts]
    else:
        prompts_tokens = [tokenizer.tokenize(prompt) for prompt in prompts]

    # Now we have a list of list of tokens which each list has a different
    # size. We want to extend this list to:
    #   - incorporate the tokens that need to be generated
    #   - make all the sequences equal length.
    # Get the prompts length.
    prompts_length = [len(prompt_tokens) for prompt_tokens in prompts_tokens]
    # Get the max prompts length.
    max_prompt_len = max(prompts_length)
    # Number of tokens in the each sample of the batch.
    samples_length = max_prompt_len + tokens_to_generate
    # Now update the list of list to be of the same size: samples_length.
    for prompt_tokens, prompt_length in zip(prompts_tokens, prompts_length):
        padding_size = samples_length - prompt_length
        prompt_tokens.extend([eod_token] * padding_size)

    # Now we are in a structured format, we can convert to tensors.
    prompts_tokens_tensor = torch.tensor(prompts_tokens, dtype=torch.long, device='cuda')
    prompts_length_tensor = torch.tensor(prompts_length, dtype=torch.long, device='cuda')

    return prompts_tokens_tensor, prompts_length_tensor


================================================
FILE: megatron/core/inference/unified_memory.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import ctypes
import os
import signal
import threading
import warnings
from contextlib import contextmanager
from enum import Enum, auto
from pathlib import Path

import torch
from torch.cuda.memory import CUDAPluggableAllocator
from torch.utils.cpp_extension import CUDA_HOME, load_inline

from megatron.core.utils import is_torch_min_version

try:
    if is_torch_min_version("2.8.0"):
        from torch.cuda.memory import MemPool
    else:
        from torch.cuda import MemPool
    _has_mem_pool = True
except ImportError:
    _has_mem_pool = False


class CompilationState(Enum):
    """Enum to distinguish between unified memory (UVM) compilation states."""

    UNATTEMPTED = auto()  # Compilation has not been attempted.
    FAILURE = auto()  # Compilation attempted, but failed.
    SUCCESS = auto()  # Compilation attempted, and succeeded.


class UnifiedMemoryUnsupportedError(Exception):
    """Unified memory is not supported on this system."""


class UnifiedMemoryCompileTimeoutError(UnifiedMemoryUnsupportedError):
    """Unified memory compilation timed out."""


# Compilation vars.
_compilation_state = CompilationState.UNATTEMPTED
_alloc = None  # must remain global until process exit.
_mod = None  # must remain global until process exit.
_so_path = None  # path to compiled extension .so (must remain global until exit).
_ctypes_lib = None  # ctypes handle to compiled extension
_ctypes_lock = threading.Lock()
_compilation_error: str | None = None  # store last failure reason for better error messages


@contextmanager
def _compile_timeout(timeout_s: int):
    """Context manager to timeout compilation.

    Args:
        timeout_s (int): Timeout in seconds.
    """

    def _handler(signum, frame):
        raise UnifiedMemoryCompileTimeoutError(
            "Unified memory compilation has been forcefully timed out. "
            "This is almost certainly due to stale lock files associated with your Unix user. "
            "The official PyTorch advice is to resolve this issue with the following command:\n"
            "`rm -rf /tmp/torch_extensions/`\n"
            "Alternately, the TORCH_EXTENSIONS_DIR env var may be set to a different path. "
            "Please clean up your stale cache and try again."
        )

    # Signal-based timeout only works in the main thread.
    # In non-main threads (e.g., Ray actors), skip the timeout mechanism.
    if threading.current_thread() is not threading.main_thread():
        yield
        return

    curr_handler = signal.signal(signal.SIGALRM, _handler)
    try:
        signal.alarm(timeout_s)
        yield
    finally:
        signal.alarm(0)
        signal.signal(signal.SIGALRM, curr_handler)


def compile_allocator():
    """Attempt to compile UVM allocator."""

    global _compilation_state, _alloc, _mod, _so_path, _ctypes_lib, _compilation_error

    if _compilation_state != CompilationState.UNATTEMPTED:
        return

    if not _has_mem_pool:
        _compilation_state = CompilationState.FAILURE
        _compilation_error = (
            "PyTorch does not expose CUDA MemPool on this build/version. "
            "UVM mempool requires torch.cuda.MemPool or torch.cuda.memory.MemPool."
        )
        return

    _mempool_c_src = r"""
    #include <cuda_runtime_api.h>
    #include <cstddef>

    #define EXPORT extern "C"

    EXPORT void* managed_malloc(size_t size, int device, void* stream) {
      (void)stream;
      int prev_device = -1;
      cudaGetDevice(&prev_device);
      if (device != prev_device && device >= 0) cudaSetDevice(device);

      // cudaMallocManaged allows for more memory to be allocated than the device memory size.
      // The cudaMemAttachGlobal flag makes the memory accessible from both host and device.
      void* ptr = nullptr;
      cudaError_t err = cudaMallocManaged(&ptr, (size_t)size, cudaMemAttachGlobal);
      if (err != cudaSuccess) return nullptr;

      if (device >= 0) {
        // cudaMemAdviseSetPreferredLocation sets the preferred location for the memory.
        // This is a hint that tries to prevent data from being migrated away from the device.

        #if CUDART_VERSION >= 13000
          // For CUDA >= 13, the cudaMemAdvise device arg is type cudaMemLocation
          // instead of an int, so we setup the location and conditionally use it
          // in calls to cudaMemAdvise.
          cudaMemLocation location;
          location.type = cudaMemLocationTypeDevice;
          location.id = device;

          cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetPreferredLocation, location);

          // cudaMemAdviseSetAccessedBy ensures the memory always lives in the device's page table.
          // Even if the memory has to be migrated away from the device, it still does not page fault.
          // The CUDA docs claim that cudaMemAdviseSetPreferredLocation completely overrides this flag,
          // but there is no harm in adding this flag as well for future-proofing.
          cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetAccessedBy, location);
        #else
          cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetPreferredLocation, device);
          // cudaMemAdviseSetAccessedBy ensures the memory always lives in the device's page table.
          // Even if the memory has to be migrated away from the device, it still does not page fault.
          // The CUDA docs claim that cudaMemAdviseSetPreferredLocation completely overrides this flag,
          // but there is no harm in adding this flag as well for future-proofing.
          cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetAccessedBy, device);
        #endif
      }
      if (device != prev_device && prev_device >= 0) cudaSetDevice(prev_device);
      return ptr;
    }

    EXPORT void managed_free(void* ptr, size_t size, int device, void* stream) {
      // Memory allocated with cudaMallocManaged should be released with cudaFree.
      (void)size; (void)device; (void)stream;
      if (ptr) cudaFree(ptr);
    }

    // Prefetch managed memory to a device (or to CPU with cudaCpuDeviceId == -1).
    EXPORT int managed_prefetch(void* ptr, size_t size, int device, void* stream) {
      cudaStream_t s = (cudaStream_t)stream;
      cudaError_t err;
      #if CUDART_VERSION >= 13000
        cudaMemLocation location;
        if (device == (int)-1) {
          location.type = cudaMemLocationTypeHost;
          location.id = 0;
        } else {
          location.type = cudaMemLocationTypeDevice;
          location.id = device;
        }
        err = cudaMemPrefetchAsync(ptr, (size_t)size, location, 0, s);
      #else
        err = cudaMemPrefetchAsync(ptr, (size_t)size, device, s);
      #endif
      return (int)err;
    }

    // Update preferred location advice for managed memory (GPU device id, or CPU with cudaCpuDeviceId == -1).
    EXPORT int managed_advise_preferred_location(void* ptr, size_t size, int device) {
      cudaError_t err;
      #if CUDART_VERSION >= 13000
        cudaMemLocation location;
        if (device == (int)-1) {
          location.type = cudaMemLocationTypeHost;
          location.id = 0;
        } else {
          location.type = cudaMemLocationTypeDevice;
          location.id = device;
        }
        err = cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetPreferredLocation, location);
      #else
        err = cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetPreferredLocation, device);
      #endif
      return (int)err;
    }

    // Ensure a device is in the page table for this managed region.
    EXPORT int managed_advise_accessed_by(void* ptr, size_t size, int device) {
      cudaError_t err;
      #if CUDART_VERSION >= 13000
        cudaMemLocation location;
        location.type = cudaMemLocationTypeDevice;
        location.id = device;
        err = cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetAccessedBy, location);
      #else
        err = cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetAccessedBy, device);
      #endif
      return (int)err;
    }
    """

    # Define a timeout of 30s for how long the build is allowed to run.
    timeout_s = 30

    # Build the .so upon import; this avoids issues.
    if _has_mem_pool:
        _extra_ldflags = ["-lcudart"]
        if CUDA_HOME:
            _cuda_lib = os.path.join(CUDA_HOME, "lib64")
            if os.path.isdir(_cuda_lib):
                _extra_ldflags = [f"-L{_cuda_lib}", "-lcudart"]
        try:
            with _compile_timeout(timeout_s):
                _mod = load_inline(
                    name="managed_alloc_runtime",
                    cpp_sources=[_mempool_c_src],
                    functions=[],
                    with_cuda=True,
                    extra_ldflags=_extra_ldflags,
                    verbose=True,
                )
                _so_path = Path(_mod.__file__).as_posix()
                _cpa = CUDAPluggableAllocator(_so_path, "managed_malloc", "managed_free")
                _alloc = _cpa.allocator()
                _compilation_state = CompilationState.SUCCESS
                _compilation_error = None
        except (RuntimeError, ImportError, OSError, UnifiedMemoryCompileTimeoutError) as e:
            _compilation_error = str(e)
            warnings.warn(f"Failed to create unified memory mempool: '{e}'.")
            _compilation_state = CompilationState.FAILURE
            _so_path = None
            _ctypes_lib = None

        # Synchronize failure state across ranks. (For currently unknown reasons,
        # one rank can show as FAILURE while the remaining ranks show as SUCCESS.)
        local_state = torch.tensor(
            [_compilation_state.value], dtype=torch.uint8, device=torch.cuda.current_device()
        )
        world_states = [
            torch.empty(1, dtype=torch.uint8, device=torch.cuda.current_device())
            for _ in range(torch.distributed.get_world_size())
        ]
        torch.distributed.all_gather(world_states, local_state)
        world_states = set(s.item() for s in world_states)
        if CompilationState.FAILURE.value in world_states:
            _compilation_state = CompilationState.FAILURE


def create_unified_mempool() -> "MemPool":
    """Create a unified memory mempool using CUDA managed memory.

    Returns:
        (MemPool) Unified memory mempool.
    """

    # Attempt to compile allocator.
    compile_allocator()

    # Return mempool.
    if _compilation_state != CompilationState.SUCCESS:
        details = _compilation_error
        if details is None:
            details = "Unknown reason (allocator compilation did not succeed)."
        raise UnifiedMemoryUnsupportedError(
            "Unified virtual memory (UVM) mempool is unsupported or failed to initialize: "
            + details
        )
    else:
        return MemPool(allocator=_alloc)


def _get_ctypes_lib() -> "ctypes.CDLL":
    """Return a ctypes handle to the compiled UVM extension (.so)."""
    global _ctypes_lib
    compile_allocator()
    if _compilation_state != CompilationState.SUCCESS or _so_path is None:
        raise UnifiedMemoryUnsupportedError()
    if _ctypes_lib is not None:
        return _ctypes_lib
    with _ctypes_lock:
        if _ctypes_lib is None:
            _ctypes_lib = ctypes.CDLL(_so_path)
            # Configure argtypes/restype for exported helpers.
            _ctypes_lib.managed_prefetch.argtypes = [
                ctypes.c_void_p,
                ctypes.c_size_t,
                ctypes.c_int,
                ctypes.c_void_p,
            ]
            _ctypes_lib.managed_prefetch.restype = ctypes.c_int
            _ctypes_lib.managed_advise_preferred_location.argtypes = [
                ctypes.c_void_p,
                ctypes.c_size_t,
                ctypes.c_int,
            ]
            _ctypes_lib.managed_advise_preferred_location.restype = ctypes.c_int
            _ctypes_lib.managed_advise_accessed_by.argtypes = [
                ctypes.c_void_p,
                ctypes.c_size_t,
                ctypes.c_int,
            ]
            _ctypes_lib.managed_advise_accessed_by.restype = ctypes.c_int
    return _ctypes_lib


def prefetch_managed_tensor(tensor, *, device: int, stream=None) -> None:
    """Prefetch a CUDA tensor allocated from the UVM mempool to a specific device.

    This uses `cudaMemPrefetchAsync` to physically migrate the pages backing the tensor.
    The virtual address (pointer) remains unchanged, making this safe for use with
    recorded CUDA graphs.

    Args:
        tensor (torch.Tensor): CUDA tensor allocated from the UVM mempool.
        device (int): Target device ID. Use -1 (cudaCpuDeviceId) to prefetch to CPU.
        stream (torch.cuda.Stream, optional): Stream to use for the asynchronous prefetch.
            Defaults to the current stream.
    """
    if tensor is None:
        return
    if not isinstance(tensor, torch.Tensor):
        raise TypeError("prefetch_managed_tensor expects a torch.Tensor")
    if tensor.numel() == 0:
        return
    if not tensor.is_cuda:
        raise ValueError("prefetch_managed_tensor expects a CUDA tensor")

    lib = _get_ctypes_lib()
    nbytes = tensor.nbytes
    if stream is None:
        stream = torch.cuda.current_stream()
    # torch.cuda.Stream exposes a cuda_stream integer handle.
    stream_ptr = ctypes.c_void_p(int(stream.cuda_stream))
    err = lib.managed_prefetch(
        ctypes.c_void_p(int(tensor.data_ptr())), ctypes.c_size_t(nbytes), int(device), stream_ptr
    )
    if err != 0:
        raise RuntimeError(f"cudaMemPrefetchAsync failed with cudaError={err}")


def advise_managed_tensor_preferred_location(tensor, *, device: int) -> None:
    """Set the preferred physical location hint for a managed tensor.

    This uses `cudaMemAdviseSetPreferredLocation`. It tells the CUDA driver where the
    pages should ideally reside. Unlike prefetch, this is a hint and does not
    immediately trigger migration unless the driver decides it is necessary.

    Args:
        tensor (torch.Tensor): CUDA tensor allocated from the UVM mempool.
        device (int): Preferred device ID. Use -1 (cudaCpuDeviceId) for CPU.
    """
    if tensor is None:
        return
    if not isinstance(tensor, torch.Tensor):
        raise TypeError("advise_managed_tensor_preferred_location expects a torch.Tensor")
    if tensor.numel() == 0:
        return
    if not tensor.is_cuda:
        raise ValueError("advise_managed_tensor_preferred_location expects a CUDA tensor")

    lib = _get_ctypes_lib()
    nbytes = tensor.nbytes
    err = lib.managed_advise_preferred_location(
        ctypes.c_void_p(int(tensor.data_ptr())), ctypes.c_size_t(nbytes), int(device)
    )
    if err != 0:
        raise RuntimeError(f"cudaMemAdviseSetAccessedBy failed with cudaError={err}")


def advise_managed_tensor_accessed_by(tensor, *, device: int) -> None:
    """Hint that a specific device will access the managed tensor.

    This uses `cudaMemAdviseSetAccessedBy`. It ensures that the mapping for this
    memory region is established in the page tables of the specified device,
    reducing page fault latency when the device first touches the data.

    Args:
        tensor (torch.Tensor): CUDA tensor allocated from the UVM mempool.
        device (int): Device ID that will access the tensor. Must be a GPU ID.
    """
    if tensor is None:
        return
    if not isinstance(tensor, torch.Tensor):
        raise TypeError("advise_managed_tensor_accessed_by expects a torch.Tensor")
    if tensor.numel() == 0:
        return
    if not tensor.is_cuda:
        raise ValueError("advise_managed_tensor_accessed_by expects a CUDA tensor")

    lib = _get_ctypes_lib()
    nbytes = tensor.nbytes
    err = lib.managed_advise_accessed_by(
        ctypes.c_void_p(int(tensor.data_ptr())), ctypes.c_size_t(nbytes), int(device)
    )
    if err != 0:
        raise RuntimeError(f"cudaMemAdviseSetAccessedBy failed with cudaError={err}")


def prefetch_managed_module_parameters(
    module, *, device: int, include_buffers: bool = False
) -> int:
    """Prefetch all UVM-allocated parameters (and optionally buffers) of a module.

    Iterates through all parameters of the module and initiates an asynchronous
    migration to the target device. This is typically used to offload weights to
    CPU during training or prefetch them to GPU before inference.

    Args:
        module (torch.nn.Module): The module containing UVM parameters.
        device (int): Target device ID (-1 for CPU).
        include_buffers (bool, optional): Whether to also prefetch module buffers.
            Defaults to False.

    Returns:
        int: The total number of bytes for which prefetch was initiated.
    """
    if module is None:
        return 0

    # Avoid duplicate prefetch on shared tensors.
    seen_ptrs: set[int] = set()
    total_nbytes = 0
    stream = torch.cuda.current_stream()

    for name, p in module.named_parameters(recurse=True):
        if p is None:
            continue
        t = p.data
        if not isinstance(t, torch.Tensor) or not t.is_cuda or t.numel() == 0:
            continue
        ptr = int(t.data_ptr())
        if ptr in seen_ptrs:
            continue
        seen_ptrs.add(ptr)
        nbytes = t.nbytes
        err = prefetch_managed_tensor(t, device=device, stream=stream)
        if err:
            raise RuntimeError(
                f"cudaMemPrefetchAsync failed (cudaError={err}) for parameter '{name}': "
                f"shape={tuple(t.shape)}, dtype={t.dtype}, device={t.device}, "
                f"data_ptr=0x{t.data_ptr():x}, nbytes={nbytes}. "
                "This tensor is not UVM-allocated."
            )
        total_nbytes += nbytes

    if include_buffers:
        for name, b in module.named_buffers(recurse=True):
            if b is None:
                continue
            if not isinstance(b, torch.Tensor) or not b.is_cuda or b.numel() == 0:
                continue
            ptr = int(b.data_ptr())
            if ptr in seen_ptrs:
                continue
            seen_ptrs.add(ptr)
            nbytes = b.nbytes
            err = prefetch_managed_tensor(b, device=device, stream=stream)
            if err:
                raise RuntimeError(
                    f"cudaMemPrefetchAsync failed (cudaError={err}) for buffer '{name}': "
                    f"shape={tuple(b.shape)}, dtype={b.dtype}, device={b.device}, "
                    f"data_ptr=0x{b.data_ptr():x}, nbytes={nbytes}. "
                    "This tensor is not UVM-allocated."
                )
            total_nbytes += nbytes

    return total_nbytes


def advise_managed_module_parameters_preferred_location(
    module, *, device: int, include_buffers: bool = False
) -> None:
    """Set the preferred physical location hint for all UVM parameters in a module.

    Args:
        module (torch.nn.Module): The module containing UVM parameters.
        device (int): Preferred device ID (-1 for CPU).
        include_buffers (bool, optional): Whether to also advise on module buffers.
            Defaults to False.
    """
    if module is None:
        return

    seen_ptrs: set[int] = set()
    for name, p in module.named_parameters(recurse=True):
        if p is None:
            continue
        t = p.data
        if not isinstance(t, torch.Tensor) or not t.is_cuda or t.numel() == 0:
            continue
        ptr = int(t.data_ptr())
        if ptr in seen_ptrs:
            continue
        seen_ptrs.add(ptr)
        err = advise_managed_tensor_preferred_location(t, device=device)
        if err:
            raise RuntimeError(
                f"cudaMemAdviseSetPreferredLocation failed (cudaError={err}) for param '{name}': "
                f"shape={tuple(t.shape)}, dtype={t.dtype}, device={t.device}, "
                f"data_ptr=0x{t.data_ptr():x}, nbytes={t.nbytes}. "
                "This tensor is not UVM-allocated."
            )

    if include_buffers:
        for name, b in module.named_buffers(recurse=True):
            if b is None:
                continue
            if not isinstance(b, torch.Tensor) or not b.is_cuda or b.numel() == 0:
                continue
            ptr = int(b.data_ptr())
            if ptr in seen_ptrs:
                continue
            seen_ptrs.add(ptr)
            err = advise_managed_tensor_preferred_location(b, device=device)
            if err:
                raise RuntimeError(
                    f"cudaMemAdviseSetPreferredLocation failed (err={err}) for buf '{name}': "
                    f"shape={tuple(b.shape)}, dtype={b.dtype}, device={b.device}, "
                    f"data_ptr=0x{b.data_ptr():x}, nbytes={b.nbytes}. "
                    "This tensor is not UVM-allocated."
                )


================================================
FILE: megatron/core/inference/utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import asyncio
import logging
import multiprocessing
import sys
from importlib.metadata import PackageNotFoundError, version

import torch

from megatron.core.transformer.moe.moe_layer import MoELayer
from megatron.core.utils import get_model_config

try:
    FLASHINFER_JIT_CACHE_VERSION = version("flashinfer-jit-cache")
except PackageNotFoundError:
    FLASHINFER_JIT_CACHE_VERSION = None


def device_memory_summary() -> str:
    """One-line GPU memory summary for torch_memory_saver logging."""
    dev = torch.cuda.current_device()
    stats = torch.cuda.memory_stats(dev)
    try:
        segs = torch.cuda.memory_snapshot(include_traces=False)
    except TypeError:  # include_traces was added in PyTorch 2.11
        segs = torch.cuda.memory_snapshot()
    M = 1024**2
    private = sum(
        s.get("active_size", 0)
        for s in segs
        if s.get("device", dev) == dev and tuple(s.get("segment_pool_id", (0, 0))) != (0, 0)
    )
    alloc = stats.get("allocated_bytes.all.current", 0)
    resv = stats.get("reserved_bytes.all.current", 0)
    dev_mem = torch.cuda.device_memory_used()
    return (
        f"alloc={alloc/M:.0f}MiB private={private/M:.0f}MiB "
        f"resv-alloc={(resv-alloc)/M:.0f}MiB resv={resv/M:.0f}MiB dev_mem={dev_mem/M:.0f}MiB"
    )


class Counter:
    """A simple counter class

    This class is responsible for assigning request ids to incoming requests
    """

    def __init__(self, start: int = 0) -> None:
        self.counter = start

    def __next__(self) -> int:
        i = self.counter
        self.counter += 1
        return i

    def reset(self) -> None:
        """Reset counter"""
        self.counter = 0


def get_attention_mask(seq_length: int) -> torch.Tensor:
    """Constructs an attention mask given the input sequence length."""
    attention_mask = torch.tril(
        torch.ones((1, seq_length, seq_length), device=torch.cuda.current_device())
    ).view(1, 1, seq_length, seq_length)

    # Convert to boolean
    attention_mask = attention_mask < 0.5

    return attention_mask


# Initialize cache for sequence parallel modules
moe_layer_cache = None


def _init_moe_expert_cache(model):
    """
    Initialize the cache of MoE layers once
    """
    global moe_layer_cache
    if moe_layer_cache is not None:
        return  # already initialized

    # Cache for moe layers.
    moe_layer_cache = []
    seen_modules = set()

    def walk(module):
        # Collect from MoELayer fields
        if isinstance(module, MoELayer):
            oid = id(module)
            if oid not in seen_modules:
                moe_layer_cache.append(module)

        for child in module.children():
            walk(child)

    walk(model)


def set_decode_expert_padding(model, set_to: bool = False, capacity_factor: int = None):
    """
    Toggle MoE drop-and-pad for decode.

    Applies ``capacity_factor`` to the router and all token dispatchers so
    decode runs with fixed shapes (CUDA graph-safe). When enabling
    (``set_to=True``), clears variable-size dispatcher metadata from prefill.
    For no-drop decode, use ``capacity_factor = num_moe_experts / moe_router_topk``.

    Args:
    - model: Module containing MoE layers.
    - set_to: Enable (True) or disable (False) padding.
    - capacity_factor: Capacity scaling shared by router and dispatchers.
    """
    global moe_layer_cache
    if moe_layer_cache is None:
        _init_moe_expert_cache(model)

    cfg = get_model_config(model)

    # Flip global/config knobs read by the router
    cfg.moe_pad_expert_input_to_capacity = bool(set_to)
    cfg.moe_expert_capacity_factor = capacity_factor

    # Update all token dispatchers
    for moe_layer in moe_layer_cache:

        dispatcher = moe_layer.token_dispatcher
        # turn padding on/off
        dispatcher.drop_and_pad = bool(set_to)

        # make sure attribute exists even if class didn't define it
        setattr(dispatcher, "moe_expert_capacity_factor", capacity_factor)

        # Check fliping the modules config
        if hasattr(dispatcher, "config"):
            dispatcher.config.moe_pad_expert_input_to_capacity = bool(set_to)
            dispatcher.config.moe_expert_capacity_factor = capacity_factor

        if set_to:
            # clear any variable-size metadata from dropless prefill
            for attr in (
                "input_splits",
                "output_splits",
                "output_splits_tp",
                "tokens_per_expert",
                "num_global_tokens_per_local_expert",
                "reversed_local_input_permutation_mapping",
                "capacity",
            ):
                if hasattr(dispatcher, attr):
                    setattr(dispatcher, attr, None)
            if hasattr(dispatcher, "cuda_sync_point"):
                dispatcher.cuda_sync_point = "no_sync"

        router = moe_layer.router
        setattr(router, "moe_expert_capacity_factor", capacity_factor)
        if hasattr(router, "config"):
            router.config.moe_expert_capacity_factor = capacity_factor
            router.config.moe_pad_expert_input_to_capacity = bool(set_to)


def check_flashinfer_jit_cache_installed(log_version: bool = False):
    """Verify that the flashinfer-jit-cache package is installed.

    The flashinfer-jit-cache package provides pre-compiled CUTLASS fused MoE kernels
    so they don't need to be JIT-compiled at runtime. This avoids a multi-minute
    compilation step during CUDA graph warmup.

    Raises:
        RuntimeError: If flashinfer-jit-cache is not installed and CUDA version is 12 or 13.
    """
    if FLASHINFER_JIT_CACHE_VERSION is not None:
        if log_version:
            logging.info(
                f"Found flashinfer-jit-cache {FLASHINFER_JIT_CACHE_VERSION} with "
                "pre-compiled CUTLASS kernels."
            )
        return

    cuda_major = torch.version.cuda.split(".")[0] if torch.version.cuda else None

    if cuda_major == "12":
        install_cmd = (
            "Install it with:\n\npip install flashinfer-jit-cache "
            "--index-url https://flashinfer.ai/whl/cu129\n"
        )
    elif cuda_major == "13":
        install_cmd = (
            "Install it with:\n\npip install flashinfer-jit-cache "
            "--index-url https://flashinfer.ai/whl/cu130\n"
        )
    else:
        install_cmd = ""

    raise RuntimeError(
        "The 'flashinfer-jit-cache' package is required for expert parallel inference "
        f"but is not installed. {install_cmd}"
    )


def set_inference_cuda_graphed_iteration_for_ep_inference(model):
    """Enable CUDA graph compatibility for expert parallel inference.

    Sets a flag in all MoELayers indicating the current iteration is being
    captured/executed in a CUDA graph. This allows the dispatcher to adjust
    its behavior for CUDA graph compatibility.
    """
    global moe_layer_cache
    if moe_layer_cache is None:
        _init_moe_expert_cache(model)

    for moe_layer in moe_layer_cache:
        moe_layer.set_inference_cuda_graphed_iteration()


def unset_inference_cuda_graphed_iteration_for_ep_inference(model):
    """Disable CUDA graph compatibility for expert parallel inference.

    Clears the flag in all MoELayers, restoring standard dispatcher behavior.
    """
    global moe_layer_cache
    if moe_layer_cache is None:
        _init_moe_expert_cache(model)

    for moe_layer in moe_layer_cache:
        moe_layer.unset_inference_cuda_graphed_iteration()


def tensor_swap(x, src_idxs, dst_idxs):
    """
    Swap x[src_idxs] and x[dst_idxs]
    """
    x[dst_idxs], x[src_idxs] = x[src_idxs], x[dst_idxs]


async def await_process_call(call, process: multiprocessing.Process, timeout: float = 1.0):
    """Repeatedly wait for a multiprocessing callable to resolve, aborting upon process failure.

    Note that the timeout in this function is only for checking process liveness.
    Its value should be set to a relatively high number. The only problem a high timeout
    introduces is that an error is raised slighly later.
    The timeout does not have any effect on the event-waiting, only on process failure detection.

    Args:
        event: The multiprocessing event to wait on.
        process: The process to monitor for failure.
        timeout: The timeout for each wait iteration in seconds.
    """
    while True:
        if await asyncio.to_thread(call, timeout):
            return
        if not process.is_alive():
            raise RuntimeError(
                f"Process {process.name} (pid {process.pid}) has exited unexpectedly."
            )


# Compatibility for Python < 3.13 asyncio Queue functionality.
# This is necessary because asyncio Queues are broken in Python < 3.13.
if sys.version_info < (3, 13):

    _SHUTDOWN_SENTINEL = object()

    class asyncio_QueueShutDown(Exception):
        """Compatibility exception for Python < 3.13."""

        pass

    class asyncio_Queue(asyncio.Queue):
        """An asyncio.Queue with Python 3.13 compatibility features for Python < 3.13."""

        def __init__(self, maxsize: int = 0):
            super().__init__(maxsize)
            self._is_shutdown = False

        async def get(self):
            """Get an item from the queue with Python < 3.13 compatibility."""
            if self._is_shutdown and self.empty():
                raise asyncio_QueueShutDown
            ret = await super().get()
            if ret is _SHUTDOWN_SENTINEL:
                super().put_nowait(_SHUTDOWN_SENTINEL)
                super().task_done()
                raise asyncio_QueueShutDown
            return ret

        def put_nowait(self, item):
            """Put an item into the queue without blocking"""
            if self._is_shutdown:
                raise asyncio_QueueShutDown
            if item is _SHUTDOWN_SENTINEL:
                raise ValueError(f"{item} is reserved for shutdown purposes for Python < 3.13")
            super().put_nowait(item)

        def shutdown(self):
            """Shutdown the queue for Python < 3.13.

            Note that the listening side of the queue can continue to get old data
            off the queue even after it has already been shutdown. The listener only
            shutdowns when the queue is BOTH shutdown AND empty.
            """
            if not self._is_shutdown:
                super().put_nowait(_SHUTDOWN_SENTINEL)
                super().task_done()
                self._is_shutdown = True

else:
    asyncio_QueueShutDown = asyncio.QueueShutDown
    asyncio_Queue = asyncio.Queue


================================================
FILE: megatron/core/inference_params.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from .inference.contexts import (  # noqa: F401 # pylint: disable=unused-import
    StaticInferenceContext as InferenceParams,
)


================================================
FILE: megatron/core/jit.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import torch

from megatron.core.utils import is_torch_min_version

jit_fuser = torch.jit.script
# nvFuser is deprecated in PyTorch JIT starting from 2.2


def noop_decorator(func):
    '''No-op decorator'''
    return func


def enable_jit_fuser():
    '''Enable the JIT fuser'''
    global jit_fuser
    try:
        if is_torch_min_version("2.2.0a0"):
            jit_fuser = torch.compile
    except ImportError:

        jit_fuser = noop_decorator


def disable_jit_fuser():
    '''Disable the JIT fuser'''
    global jit_fuser
    jit_fuser = noop_decorator


enable_jit_fuser()


================================================
FILE: megatron/core/model_parallel_config.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import warnings
from dataclasses import dataclass, field
from typing import Callable, ContextManager, Literal, Optional

import torch


@dataclass
class ModelParallelConfig:
    """Base configuration for Megatron Core

    The initialization function has an argument for each parameter.
    """

    ###################
    # Model parallelism
    ###################
    tensor_model_parallel_size: int = 1
    """Intra-layer model parallelism. Splits tensors across GPU ranks."""

    pipeline_model_parallel_comm_backend: Optional[Literal["nccl", "ucc"]] = None
    """Configuring backend option of pipeline parallel communication (e.g., nccl, ucc)
       If None, the default backend will be used.
    """

    pipeline_model_parallel_size: int = 1
    """Inter-layer model parallelism. Splits transformer layers across GPU ranks."""

    virtual_pipeline_model_parallel_size: Optional[int] = None
    """Interleaved pipeline parallelism is used to improve performance by reducing the pipeline
       bubble.  Considers a transformer block as a list of smaller transformer (virtual) blocks.
       The number of virtual blocks per pipeline model parallel rank is the virtual model parallel
       size.  See Efficient Large-Scale Language Model Training on GPU Clusters Using Megatron-LM:
       arxiv.org/pdf/2104.04473.pdf for more details.
    """

    sequence_parallel: bool = False
    """Makes tensor parallelism more memory efficient for LLMs (20B+) by parallelizing layer norms
       and dropout sequentially.  See Reducing Activation Recomputation in Large Transformer Models
       (https://arxiv.org/abs/2205.05198) for more details.
    """

    context_parallel_size: int = 1
    """Splits network input along sequence dimension across GPU ranks."""

    hierarchical_context_parallel_sizes: Optional[list[int]] = None
    """Degrees of the hierarchical context parallelism. Users should provide a list to specify 
       the sizes for different levels. Taking the a2a+p2p cp comm type as example, it contains
       groups of two levels, so the first value of the list indicates the group size of the a2a
       communication type, and the second value indicates the group size of the p2p communication
       type.
    """

    max_seqlen_per_dp_cp_rank: Optional[int] = None
    """
    Maximum sequence length per DPxCP rank. This is the maximum sequence length each rank
    can handle without overflowing the memory. Typically, a good starting point is to set this
    to maximum sequence length / context parallel size.
    This is used to calculate the number and length of sub-samples assigned to 
    each rank when using hybrid_context_parallel.
    """

    hybrid_context_parallel: bool = False
    """
    If true, enables hybrid context parallel. This is used to balance the workload of 
    each CP rank when we use packed samples with variable sequence lengths.
    Please set max_seqlen_per_dp_cp_rank when using hybrid_context_parallel.
    """

    expert_model_parallel_size: int = 1
    """Distributes Moe Experts across sub data parallel dimension."""

    expert_tensor_parallel_size: Optional[int] = None
    """Intra-layer tensor model parallelism for expert layer. Splits tensors across GPU ranks.
       Default is None, which will be set to the value of tensor_model_parallel_size.
    """

    ###################
    # Initialization
    ###################
    perform_initialization: bool = field(
        default=True, metadata={"argparse_meta": {"arg_names": ["--no-initialization"]}}
    )
    """Controls weights initialization. This option can be useful when you know you are going to
       load values from a checkpoint.
    """

    use_cpu_initialization: bool = field(
        default=False, metadata={"argparse_meta": {"default": None}}
    )
    """When set to False, we initialize the weights directly on the GPU. CPU initialization is the
       same regardless of tensor model parallelism, but GPU initialization is not. Transferring
       weights from CPU to GPU can take a significant amount of time for large models.
    """

    ###################
    # Training
    ###################
    fp16: bool = False
    """If true, train with fp16 mixed precision training."""

    bf16: bool = False
    """If true, train with bf16 mixed precision training."""

    params_dtype: torch.dtype = torch.float32
    """dtype used when intializing the weights."""

    timers: Optional[Callable] = None
    """Timers object to call for various timing functions. See megatron.core.timers.Timers"""

    finalize_model_grads_func: Optional[Callable] = None
    """Function that finalizes gradients on all workers. Could include ensuring that grads are
       all-reduced across data parallelism, pipeline parallelism, and sequence parallelism
       dimensions.
    """

    grad_scale_func: Optional[Callable] = None
    """If using loss scaling, this function should take the loss and return the scaled loss. If
       None, no function is called on the loss.
    """

    no_sync_func: Optional[Callable] = None
    """Function that creates a context that suppresses asynchronous data-parallel communication. If
       the model is an instance of core.distributed.DistributedDataParallel, the default is to use
       core.distributed.DistributedDataParallel.no_sync.
    """

    grad_sync_func: Optional[Callable] = None
    """Function that launches asynchronous gradient reductions (e.g. distributed optimizer gradient
       reduce-scatters). The function should take one argument: an iterable of parameters whose
       gradients are to be synchronized.
    """

    param_sync_func: Optional[Callable] = None
    """Function that launches asynchronous parameter synchronizations (e.g. distributed optimizer
       parameter all-gathers). The function should take one argument: an iterable of parameters to
       be synchronized.
    """

    deterministic_mode: bool = False
    """If true, code that has deterministic execution will be chosen. This usually
       means slower execution, but is good for debugging and testing. Defaults to False."""

    enable_autocast: bool = False
    """If true runs the forward step function inside torch.autocast context."""

    autocast_dtype: Optional[torch.dtype] = None
    """dtype to pass to torch.amp.autocast when enabled. If None, is set to pipeline_dtype."""

    num_microbatches_with_partial_activation_checkpoints: Optional[int] = None
    """If int, set the number of microbatches where not all of the layers will be checkpointed and
       recomputed. The rest of the microbatches within the window of maximum outstanding
       microbatches will recompute all layers (either full recompute or selective recompute). If
       None, the checkpoint and recompute will be left up to the forward_step function.

    """

    ###################
    # Optimizations
    ###################
    gradient_accumulation_fusion: bool = False
    """If true, fuses weight gradient accumulation to GEMMs. Requires the custom CUDA extension
       fused_weight_gradient_mlp_cuda module. To use gradient_accumulation_fusion you must install
       APEX with --cpp_ext and --cuda_ext. For example: "pip install --global-option=\"--cpp_ext\"
       --global-option=\"--cuda_ext\" ". Note that the extension requires CUDA>=11. Otherwise, you
       must turn off gradient accumulation fusion.
    """

    use_te_rng_tracker: bool = field(
        default=False, metadata={"argparse_meta": {"arg_names": ["--te-rng-tracker"]}}
    )
    """If true, uses RNG state tracker in TransformerEngine if exists.
    Required for CUDA graphs support.
    """

    tp_comm_overlap: bool = False
    """If true, allows overlapping of Linear layer execution with tensor parallel communication
       collectives like AllGather/ReduceScatter. Overlapping is done for the linear layers wherever
       possible during the forward and the backward pass.
    """

    tp_comm_bulk_wgrad: bool = True
    """Controls All-Gather overlap with Bprop activation gradient GEMM. Don't care if
       tp_comm_overlap is False.
    """

    tp_comm_bulk_dgrad: bool = True
    """Controls Reduce-Scatter overlap with Bprop weight gradient GEMM. Don't care if
       tp_comm_overlap is False.
    """

    tp_comm_overlap_ag: bool = True
    """Controls All-Gather overlap with GEMM by pipelining the GEMM and All-Gather.
       Don't care if tp_comm_overlap is False.
    """

    tp_comm_overlap_rs: bool = True
    """Controls Reduce-Scatter overlap with GEMM by pipelining the GEMM and Reduce-Scatter.
       Don't care if tp_comm_overlap is False.
    """

    tp_comm_overlap_rs_dgrad: bool = False
    """If true, allows Reduce-Scatter overlap with DGRAD GEMM by pipelining the
       GEMM and Reduce-Scatter splits. Don't care if tp_comm_overlap is False.
    """

    tp_comm_split_ag: bool = True
    """Deprecated from TransformerEngine v1.6.0.
       Controls All-Gather overlap with Fprop GEMM by pipelining the GEMM and All-Gather
       splits. Don't care if tp_comm_overlap is False.
    """

    tp_comm_atomic_ag: bool = False
    """Deprecated from TransformerEngine v1.6.0.
       If true, allows All-Gather overlap with Fprop GEMM by pipelining the GEMM and All-Gather
       both done atomically. Don't care if tp_comm_overlap is False.
    """

    tp_comm_split_rs: bool = True
    """Deprecated from TransformerEngine v1.6.0.
       Controls Reduce-Scatter overlap with Fprop GEMM by pipelining the GEMM and
       Reduce-Scatter splits. Don't care if tp_comm_overlap is False.
    """

    tp_comm_atomic_rs: bool = False
    """Deprecated from TransformerEngine v1.6.0.
       If true, allows Reduce-Scatter overlap with Fprop GEMM by pipelining the GEMM and
       Reduce-Scatter both done atomically. Don't care if tp_comm_overlap is False.
    """

    cross_entropy_loss_fusion: bool = False
    """If this is enabled, the fused cross entropy implementation would be used.
       Defaults to False.
    """

    cross_entropy_fusion_impl: Literal['native', 'te'] = 'native'
    """If 'native', MCore based CE loss fusion is used, if 'te', Parallel CE loss
       from Transformer Engine library is used. Defaults to 'native'.
    """

    tp_comm_overlap_disable_qkv: bool = False
    """
       If true, the AllGather -> Gemm overlap for QKV gets disabled
    """

    tp_comm_overlap_disable_fc1: bool = False
    """
       If true, the AllGather -> Gemm overlap for FC1 layer of MLP gets disabled
    """

    tp_comm_bootstrap_backend: Literal['nccl', 'mpi', 'gloo'] = 'nccl'
    """Set the bootstrapping backend of Tensor parallel communications."""

    overlap_moe_expert_parallel_comm: bool = False
    """Overlap EP A2A communications with independent computations of different micro-batches
    in 1f1b phase of pipelining or non-pipelining schedule.
    """

    delay_wgrad_compute: bool = False
    """Delay the weight gradient computation to improve batch-level communication overlapping"""

    ep_overlap_early_attn_memory_release: bool = False
    """Enable early memory release of attention activations during EP overlap.
    EP overlap can increase peak memory usage when the overlapped forward module allocates 
    more memory than what is freed by the backward module. This flag addresses this by 
    reordering the attention backward pass to occur earlier in the schedule.
    Specifically:
    - Without this flag: attn_bwd executes after moe_combine_fwd
    - With this flag: attn_bwd executes before mlp_fwd
    The earlier execution releases attention activations sooner, reducing peak memory.
    Note: This may impact performance as moe_combine_fwd and moe_dispatch_bwd become 
    exposed (not overlapped with other computation).
    """

    ###################
    # Pipeline Parallel
    ###################
    pipeline_dtype: torch.dtype = None
    """dtype used in p2p communication, usually params_dtype"""

    variable_seq_lengths: bool = False
    """Support for variable sequence lengths across microbatches. Setting this communicates the size
        of tensors during pipeline parallelism communication, because of this extra overhead it
        should only be set if the sequence length varies by microbatch within a global batch.
    """

    overlap_p2p_comm: bool = False
    """When True some of the peer to peer communication for pipeline parallelism will overlap with
       computation. Must be False if batch_p2p_comm is true.
    """

    batch_p2p_comm: bool = True
    """Use batch_isend_irecv instead of individual isend/irecv calls. Must be False if
       overlap_p2p_comm is True.
    """

    batch_p2p_sync: bool = True
    """When using batch_isend_irecv, do a cuda.device.synchronize afterward to work around a bug in
       older version of PyTorch.
    """

    use_ring_exchange_p2p: bool = False
    """Use custom ring_exchange kernel instead of torch.distributed.batch_isend_irecv(). Requires
       custom built torch with torch.distributed.ring_exchange.
    """

    deallocate_pipeline_outputs: bool = False
    """If True, output data is deallocated after the tensor is sent to the next pipeline stage.
       Helps with saving memory, does nothing when pipeline parallel is not used.
    """

    defer_embedding_wgrad_compute: bool = False
    """If true, defers the embedding WGRAD GEMMs while pipeline flush is
       taking place enabling us to hide pipeline flush latency. Defaults to False.
    """

    wgrad_deferral_limit: int = 0
    """This value tunes the number of micro-batches for which the embedding weight gradient compute
       needs to be deferred to pipeline flush, this argument is invalid if
       `defer_embedding_wgrad_compute` is False.
       Defaults to 0, which means all micro-batches are deferred.
    """

    overlap_p2p_comm_warmup_flush: bool = field(
        default=False,
        metadata={"argparse_meta": {"arg_names": ["--overlap-p2p-communication-warmup-flush"]}},
    )
    """If true, overlap communication and computation in warm up and flush phase.
       Only valid when overlap_p2p_comm is True and batch_p2p_comm is False. 
       Defaults to False.
    """

    microbatch_group_size_per_vp_stage: Optional[int] = field(
        default=None,
        metadata={
            "argparse_meta": {"arg_names": ["--microbatch-group-size-per-virtual-pipeline-stage"]}
        },
    )
    """This value specifies the number of micro-batches that are executed 
       at a time for a given virtual stage (both forward and backward).
       Default (in __post_init__() method below) to pipeline_parallel_size 
       which specifies a depth-first schedule.
       Example: for PP=2 VP=2, when microbatch_group_size_per_vp_stage=2, 
       num_microbatches = 4, we have 
       rank 0 | 0 1 0 1 2 3 2 3
       rank 1 |   0 1 0 1 2 3 2 3
       When microbatch_group_size_per_vp_stage=3, num_microbatches = 5, 
       we have
       rank 0 | 0 1 2 0 1 2 3 4 3 4 
       rank 1 |   0 1 2 0 1 2 3 4 3 4
    """

    mtp_standalone: bool = False
    """This will be set automatically according to the pipeline layout, 
    and will be set to True if MTP is in a separate vpp stage."""

    ###################
    # CPU Offloading
    ###################
    cpu_offloading: bool = False
    """When set to True, all the activations are offloaded to the CPU asynchronously."""

    cpu_offloading_num_layers: int = 0
    """Tells the number of transformer layers for which activations has to be offloaded."""

    _cpu_offloading_context: Optional[ContextManager] = (
        None
        # Used for internal use only, not to be set by a user.
        # TODO: Need to move to the 'right' place when possible.
    )
    """For internal use only, do not set."""

    cpu_offloading_activations: bool = True
    """If True, offloads the activations to CPU."""

    cpu_offloading_weights: bool = False
    """If True, offloads the weights to CPU."""

    cpu_offloading_double_buffering: bool = False
    """If True, enables double buffering across layers while reloading activations from CPU."""

    cpu_offloading_retain_pinned_cpu_buffers: bool = False
    """If True, the pinned CPU buffers are retained after offloading and reused for the
       next iteration. It is useful for cuda graphs capture.
    """

    ###################
    # Timing
    ###################
    barrier_with_L1_time: bool = field(
        default=True,
        metadata={"argparse_meta": {"arg_names": ["--no-barrier-with-level-1-timing"]}},
    )
    """Controls barrier with level 1 time measurements. It is up to the user to make sure
       calling barrier with their timers will not result in hangs. This can happen if for example
       the user adds a level 1 timer that is not called by all ranks.
    """

    def __post_init__(self):
        """Python dataclass method that is used to modify attributes after initialization.
        See https://docs.python.org/3/library/dataclasses.html#post-init-processing for more
        details.
        """
        if self.sequence_parallel:
            if self.tensor_model_parallel_size <= 1:
                raise ValueError("Cannot use sequence parallelism without tensor parallelism")

        if self.expert_tensor_parallel_size is None:
            self.expert_tensor_parallel_size = self.tensor_model_parallel_size

        if self.pipeline_model_parallel_size > 1:
            if self.pipeline_dtype is None:
                raise ValueError(
                    "When using pipeline parallelism, pipeline_dtype must be specified"
                )

        if self.autocast_dtype is None:
            self.autocast_dtype = self.params_dtype

        if self.defer_embedding_wgrad_compute and self.pipeline_model_parallel_size == 1:
            raise ValueError(
                "Cannot defer embedding wgrad compute when pipeline model parallel is not used"
            )

        if self.defer_embedding_wgrad_compute and not self.gradient_accumulation_fusion:
            raise ValueError(
                "Cannot defer embedding wgrad compute when gradient accumulation fusion is not used"
            )

        if self.defer_embedding_wgrad_compute and self.wgrad_deferral_limit < 0:
            raise ValueError(
                "Wgrad deferral limit should be greater than or equal to 0 when it is enabled!"
            )

        if self.expert_model_parallel_size > 1 and self.tensor_model_parallel_size > 1:
            if self.sequence_parallel is False:
                warnings.warn(
                    "When using expert parallelism and tensor parallelism for training, "
                    "sequence parallelism must be used"
                )

        if self.microbatch_group_size_per_vp_stage is None:
            self.microbatch_group_size_per_vp_stage = self.pipeline_model_parallel_size

        if self.overlap_p2p_comm_warmup_flush:
            if not self.overlap_p2p_comm or self.batch_p2p_comm:
                raise ValueError(
                    "Pipeline parallel communication overlapping in warmup and flush is only "
                    "compatible with overlap_p2p_comm but not batch_p2p_comm."
                )


================================================
FILE: megatron/core/models/T5/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from .t5_model import T5Model


================================================
FILE: megatron/core/models/T5/t5_model.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

from typing import List, Literal, Optional, Tuple

import torch
from torch import Tensor

from megatron.core import tensor_parallel
from megatron.core.config_logger import has_config_logger_enabled, log_config_to_disk
from megatron.core.dist_checkpointing.mapping import ShardedStateDict
from megatron.core.enums import ModelType
from megatron.core.inference.contexts import BaseInferenceContext
from megatron.core.models.common.embeddings.language_model_embedding import LanguageModelEmbedding
from megatron.core.models.common.embeddings.relative_pos_embedding import RelativePositionEmbedding
from megatron.core.models.common.embeddings.rotary_pos_embedding import RotaryEmbedding
from megatron.core.models.common.language_module.language_module import LanguageModule
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.tensor_parallel.mappings import scatter_to_tensor_model_parallel_region
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_block import TransformerBlock
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.utils import deprecate_inference_params, get_tensor_model_parallel_group_if_none


class T5LMHead(MegatronModule):
    """Masked LM head for T5

    Args:
        config (TransformerConfig): transformer config
        parallel_output (bool): wether output logits being distributed or not.
        vocab_size (int): vocabulary size
        pre_process (bool): Include embedding layer
        share_embeddings_and_output_weights (bool): When True, input
            embeddings and output logit weights are shared.
    """

    def __init__(
        self,
        config: TransformerConfig,
        parallel_output: bool,
        vocab_size: int,
        pre_process: bool = True,
        share_embeddings_and_output_weights: bool = False,
        tp_group: Optional[torch.distributed.ProcessGroup] = None,
    ):
        super(T5LMHead, self).__init__(config=config)

        if has_config_logger_enabled(config):
            log_config_to_disk(config, locals(), prefix=type(self).__name__)

        self.parallel_output = parallel_output

        self.output_layer = tensor_parallel.ColumnParallelLinear(
            config.hidden_size,
            vocab_size,
            config=config,
            init_method=(
                config.embedding_init_method
                if config.use_mup and not share_embeddings_and_output_weights
                else config.init_method
            ),
            bias=share_embeddings_and_output_weights,
            skip_bias_add=not share_embeddings_and_output_weights,
            gather_output=not self.parallel_output,
            skip_weight_param_allocation=pre_process and share_embeddings_and_output_weights,
            tp_group=tp_group,
        )

    def forward(self, hidden_states: Tensor, word_embeddings_weight: Tensor) -> Tensor:
        """Forward pass.

        Args:
            hidden_states (Tensor): output hidden states from decoder
            word_embeddings_weight (Tensor): word embedding weight

        Returns:
            Tensor: logits tensor
        """

        logits, _ = self.output_layer(hidden_states, weight=word_embeddings_weight)
        return logits


class T5Model(LanguageModule):
    """T5 Language model.

    Args:
        config (TransformerConfig): transformer config

        encoder_config (TransformerConfig): encoder transformer config

        transformer_encoder_layer_spec (ModuleSpec): transformer layer
            customization specs for encoder

        transformer_decoder_layer_spec (ModuleSpec): transformer layer
            customization specs for decoder

        vocab_size (int): vocabulary size

        max_sequence_length (int): maximum size of sequence. This is used for positional embedding

        pre_process (bool): Include embedding layer (used with pipeline parallelism)

        post_process (bool): Include an output layer (used with pipeline parallelism)

        fp16_lm_cross_entropy (bool, optional): Defaults to False

        parallel_output (bool): Do not gather the outputs,
            keep them split across tensor parallel ranks

        share_embeddings_and_output_weights (bool): When True,
            input embeddings and output logit weights are shared. Defaults to False.

        position_embedding_type (string): Position embedding type.
            Options ['learned_absolute', 'rope'].
            Defaults is 'learned_absolute'.

        rotary_percent (float): Percent of rotary dimension to use for rotary position embeddings.
            Defaults to 1.0 (100%). Ignored unless position_embedding_type is 'rope'.

        seq_len_interpolation_factor (float): scale of linearly interpolating
            RoPE for longer sequences. The value must be a float larger than 1.0.
            Defaults to None.

        add_encoder (bool): Create the encoder (used with pipeline parallelism).
            When using pipelining, the encoder will only be created on a subset
            of the pipeline ranks.

        add_decoder (bool): Include an output layer (used with pipeline parallelism).
            As with `add_encoder`, when using this model and pipelining,
            the decoder will only be created on a subset of the pipeline ranks.
    """

    def __init__(
        self,
        config: TransformerConfig,
        encoder_config: TransformerConfig,
        transformer_encoder_layer_spec: ModuleSpec,
        transformer_decoder_layer_spec: ModuleSpec,
        vocab_size: int,
        max_sequence_length: int,
        pre_process: bool = True,
        post_process: bool = True,
        fp16_lm_cross_entropy: bool = False,
        parallel_output: bool = True,
        share_embeddings_and_output_weights: bool = False,
        position_embedding_type: Literal[
            'learned_absolute', 'rope', 'relative'
        ] = 'learned_absolute',
        rotary_percent: float = 1.0,
        seq_len_interpolation_factor: Optional[float] = None,
        relative_attention_num_buckets: int = 32,
        relative_attention_max_distance: int = 128,
        add_encoder: bool = True,
        add_decoder: bool = True,
        pg_collection: ProcessGroupCollection = None,
    ):

        super(T5Model, self).__init__(config=config)

        self.config: TransformerConfig = config
        self.encoder_config: TransformerConfig = encoder_config
        self.transformer_encoder_layer_spec: ModuleSpec = transformer_encoder_layer_spec
        self.transformer_decoder_layer_spec: ModuleSpec = transformer_decoder_layer_spec
        self.vocab_size = vocab_size
        self.max_sequence_length = max_sequence_length
        self.pre_process = pre_process
        self.post_process = post_process
        self.add_encoder = add_encoder
        self.add_decoder = add_decoder
        self.fp16_lm_cross_entropy = fp16_lm_cross_entropy
        self.parallel_output = parallel_output
        self.share_embeddings_and_output_weights = share_embeddings_and_output_weights
        self.position_embedding_type = position_embedding_type
        self.encoder_hidden_state = None
        if pg_collection is None:
            pg_collection = ProcessGroupCollection.use_mpu_process_groups(
                required_pgs=['tp', 'cp', 'pp']
            )
        self.tp_group = get_tensor_model_parallel_group_if_none(pg_collection.tp)

        self.model_type = ModelType.encoder_or_decoder

        # Tells schedules.py that this model has a skip connection
        # between the encoder's output and the decoder
        # (and hence both the encoder and decoder's tensors are required for correct backprop).
        self.xattn_needed = True

        # specify the position embeddings as a member
        # variable in the T5 class so that they are easy to
        # find for `finalize_model_grads._allreduce_position_embedding_grads`
        self.position_embeddings = None
        if self.pre_process:
            self.embedding = LanguageModelEmbedding(
                config=self.config,
                vocab_size=self.vocab_size,
                max_sequence_length=self.max_sequence_length,
                position_embedding_type=self.position_embedding_type,
                tp_group=self.tp_group,
            )
            if position_embedding_type == "learned_absolute":
                self.position_embeddings = self.embedding.position_embeddings
            else:
                self.position_embeddings = None

        # Rotary Position Embeddings
        if self.position_embedding_type == 'rope':
            self.rotary_pos_emb = RotaryEmbedding(
                kv_channels=self.config.kv_channels,
                rotary_percent=rotary_percent,
                rotary_interleaved=self.config.rotary_interleaved,
                seq_len_interpolation_factor=seq_len_interpolation_factor,
                use_cpu_initialization=self.config.use_cpu_initialization,
                cp_group=pg_collection.cp,
            )

        # Relative Position Embeddings
        if self.position_embedding_type == 'relative':
            self.encoder_relative_pos_emb = RelativePositionEmbedding(
                bidirectional=True,
                init_method=self.config.init_method,
                num_attention_heads=self.config.num_attention_heads,
                relative_attention_num_buckets=relative_attention_num_buckets,
                relative_attention_max_distance=relative_attention_max_distance,
            )
            self.decoder_relative_pos_emb = RelativePositionEmbedding(
                bidirectional=False,
                init_method=self.config.init_method,
                num_attention_heads=self.config.num_attention_heads,
                relative_attention_num_buckets=relative_attention_num_buckets,
                relative_attention_max_distance=relative_attention_max_distance,
            )

        # Transformer encoder
        encoder_spec, decoder_spec = (
            self.transformer_encoder_layer_spec,
            self.transformer_decoder_layer_spec,
        )
        if self.add_encoder:
            self.encoder = TransformerBlock(
                config=self.encoder_config,
                spec=encoder_spec,
                pre_process=self.pre_process,
                post_process=self.post_process,
                pg_collection=pg_collection,
            )
        else:
            self.encoder = None

        if self.add_decoder:
            # Transformer decoder
            self.decoder = TransformerBlock(
                config=self.config,
                spec=decoder_spec,
                pre_process=self.pre_process,
                post_process=self.post_process,
                pg_collection=pg_collection,
            )
        else:
            self.decoder = None

        # Output
        if post_process:
            self.lm_head = T5LMHead(
                config,
                parallel_output,
                self.vocab_size,
                self.pre_process,
                self.share_embeddings_and_output_weights,
                tp_group=self.tp_group,
            )
            self.output_layer = self.lm_head.output_layer

        if self.pre_process or self.post_process:
            self.setup_embeddings_and_output_layer()

    def forward(
        self,
        encoder_input_ids: Tensor,
        decoder_input_ids: Tensor,
        encoder_attn_mask: Tensor,
        decoder_attn_mask: Tensor,
        encoder_decoder_attn_mask: Tensor,
        lm_labels: Tensor = None,
        encoder_hidden_states: Tensor = None,
        output_encoder_hidden_only: bool = False,
        inference_context: BaseInferenceContext = None,
        packed_seq_params: PackedSeqParams = None,
        *,
        inference_params: Optional[BaseInferenceContext] = None,
    ) -> Tensor:
        """Forward pass.

        Args:
            encoder_input_ids (Tensor): input ids for encoder
            decoder_input_ids (Tensor): input ids for decoder
            encoder_attn_mask (Tensor): self-attention mask for encoder
            decoder_attn_mask (Tensor): self-attention mask for decoder
            encoder_decoder_attn_mask (Tensor): cross-attention mask between encoder and decoder
            lm_labels (Tensor): labels for decoder output
            inference_context (BaseInferenceContext): relevant arguments for inferencing

        Returns:
            Tensor: loss tensor
        """

        inference_context = deprecate_inference_params(inference_context, inference_params)

        ## Encoder forward
        if encoder_hidden_states is None:

            # Encoder position ids
            encoder_position_ids = t5_position_ids(encoder_input_ids)

            # Encoder embedding.
            if self.pre_process:
                encoder_input = self.embedding(
                    input_ids=encoder_input_ids, position_ids=encoder_position_ids
                )
            else:
                # intermediate stage of pipeline
                encoder_input = None

            # Rotary positional embeddings
            rotary_pos_emb = None
            if self.position_embedding_type == 'rope':
                rotary_seq_len = self.rotary_pos_emb.get_rotary_seq_len(
                    inference_context, self.encoder, encoder_input, self.config, packed_seq_params
                )
                rotary_pos_emb = self.rotary_pos_emb(rotary_seq_len)

            # Relative positional embeddings
            encoder_attention_bias_parallel = None
            if self.position_embedding_type == 'relative':
                query_seq_length = RelativePositionEmbedding.get_relative_seq_len(
                    inference_context, self.encoder, encoder_input, self.config
                )
                key_seq_length = query_seq_length
                attention_bias = self.encoder_relative_pos_emb(query_seq_length, key_seq_length)

                # Scatter attention_bias to TP ranks
                # First, reshape [1, num_head, seqlen_q, seqlen_kv] to
                # [1, seqlen_q, seqlen_kv, num_head] to be scatter along
                # the last (num_heads dimension)
                attention_bias = torch.permute(attention_bias, (0, 2, 3, 1))
                # Then, scatter to TP region
                attention_bias_parallel = scatter_to_tensor_model_parallel_region(
                    attention_bias, self.tp_group
                )
                # Lastly, revert the dimension back to [1, num_head, seqlen_q, seqlen_kv]
                encoder_attention_bias_parallel = torch.permute(
                    attention_bias_parallel, (0, 3, 1, 2)
                )

            # Run encoder.
            if self.add_encoder:
                encoder_hidden_states = self.encoder(
                    hidden_states=encoder_input,
                    attention_mask=encoder_attn_mask,
                    inference_context=inference_context,
                    rotary_pos_emb=rotary_pos_emb,
                    attention_bias=encoder_attention_bias_parallel,
                )
            else:
                encoder_hidden_states = self.encoder_hidden_state

        if not self.add_decoder or output_encoder_hidden_only:
            return encoder_hidden_states

        ## Decoder forward
        # Decoder position ids
        decoder_position_ids = t5_position_ids(decoder_input_ids)

        # Decoder embedding.
        if self.pre_process:
            decoder_input = self.embedding(
                input_ids=decoder_input_ids, position_ids=decoder_position_ids
            )
        else:
            # intermediate stage of pipeline
            decoder_input = None  ### should it take encoder_hidden_states

        # Rotary positional embeddings
        rotary_pos_emb = None
        if self.position_embedding_type == 'rope':
            rotary_seq_len = self.rotary_pos_emb.get_rotary_seq_len(
                inference_context, self.decoder, decoder_input, self.config, packed_seq_params
            )
            rotary_pos_emb = self.rotary_pos_emb(rotary_seq_len)

        # Relative positional embeddings
        decoder_attention_bias_parallel = None
        if self.position_embedding_type == 'relative':
            query_seq_length = RelativePositionEmbedding.get_relative_seq_len(
                inference_context, self.decoder, decoder_input, self.config
            )
            key_seq_length = query_seq_length
            attention_bias = self.decoder_relative_pos_emb(query_seq_length, key_seq_length)

            # Scatter attention_bias to TP ranks
            # First, reshape [1, num_head, seqlen_q, seqlen_kv] to
            # [1, seqlen_q, seqlen_kv, num_head] to be scatter along
            # the last (num_heads dimension)
            attention_bias = torch.permute(attention_bias, (0, 2, 3, 1))
            # Then, scatter to TP region
            attention_bias_parallel = scatter_to_tensor_model_parallel_region(
                attention_bias, self.tp_group
            )
            # Lastly, revert the dimension back to [1, num_head, seqlen_q, seqlen_kv]
            decoder_attention_bias_parallel = torch.permute(attention_bias_parallel, (0, 3, 1, 2))

        # Run decoder.
        decoder_hidden_states = self.decoder(
            hidden_states=decoder_input,
            attention_mask=decoder_attn_mask,
            context=encoder_hidden_states,
            context_mask=encoder_decoder_attn_mask,
            inference_context=inference_context,
            rotary_pos_emb=rotary_pos_emb,
            attention_bias=decoder_attention_bias_parallel,
        )

        if self.post_process:
            output_weight = None
            if self.share_embeddings_and_output_weights:
                output_weight = self.shared_embedding_or_output_weight()
            lm_logits = self.lm_head(decoder_hidden_states, word_embeddings_weight=output_weight)

            if lm_labels is None:
                # [s b h] => [b s h]
                return lm_logits.transpose(0, 1).contiguous()
            else:
                # [b s] => [s b]
                lm_loss = self.compute_language_model_loss(lm_labels, lm_logits)
                return lm_loss
        else:
            return decoder_hidden_states

    def set_input_tensor(self, input_tensor):
        """See megatron.model.transformer.set_input_tensor()"""

        # This is usually handled in schedules.py but some inference code still
        # gives us non-lists or None
        if not isinstance(input_tensor, list):
            input_tensor = [input_tensor]

        if self.add_encoder and self.add_decoder:
            assert (
                len(input_tensor) == 1
            ), 'input_tensor should only be length 1 for stage with both encoder and decoder'
            self.encoder.set_input_tensor(input_tensor[0])
        elif self.add_encoder:
            assert (
                len(input_tensor) == 1
            ), 'input_tensor should only be length 1 for stage with only encoder'
            self.encoder.set_input_tensor(input_tensor[0])
        elif self.add_decoder:
            if len(input_tensor) == 2:
                self.decoder.set_input_tensor(input_tensor[0])
                self.encoder_hidden_state = input_tensor[1]
            elif len(input_tensor) == 1:
                self.decoder.set_input_tensor(None)
                self.encoder_hidden_state = input_tensor[0]
            else:
                raise Exception('input_tensor must have either length 1 or 2')
        else:
            raise Exception('Stage must have at least either encoder or decoder')

    def shared_embedding_or_output_weight(self) -> Tensor:
        """Function to share the input embeddings and output logit weights."""

        if self.pre_process:
            return self.embedding.word_embeddings.weight
        elif self.post_process:
            return self.lm_head.output_layer.weight
        return None

    def sharded_state_dict(
        self,
        prefix: str = '',
        sharded_offsets: Tuple[Tuple[int, int, int]] = (),
        metadata: Optional[dict] = None,
    ) -> ShardedStateDict:
        """Sharded state dict implementation handling duplication of encoder and decoder layers.

        Some layers (output, embedding) are shared between the encoder and decoder.
        This method sets the replica_id for them to ensure there is only one
        layer instance with replica_id (0, 0, 0).

        Args:
            prefix (str): Module name prefix.
            sharded_offsets (tuple): PP related offsets, expected to be empty at this module level.
            metadata (Optional[Dict]): metadata controlling sharded state dict creation.

        Returns:
            ShardedStateDict: sharded state dict for the T5Model
        """
        sharded_sd = super().sharded_state_dict(prefix, sharded_offsets, metadata)
        return sharded_sd


def t5_extended_attention_mask(attention_mask_list: List[Tensor]) -> List[Tensor]:
    """Creates the extended attention mask

    Converts the attention mask of dimension [batch size, seq_len, seq_len]
    to [batch size, 1, seq_len, seq_len]

    Args:
        attention_mask (Tensor): The input attention mask

    Returns:
        Tensor: The extended binary attention mask
    """

    def attn_mask_postprocess(attn_mask):
        # [b, 1, s, s]
        extended_attention_mask = attn_mask.unsqueeze(1)
        return extended_attention_mask

    return [
        (attn_mask_postprocess(attn_mask) if attn_mask is not None else None)
        for attn_mask in attention_mask_list
    ]


def t5_position_ids(token_ids: Tensor) -> Tensor:
    """Calculate position ids from token ids
    Args:
        token_ids (Tensor): input tokens

    Returns:
        Tensor: position ids
    """
    seq_length = token_ids.size(1)
    position_ids = torch.arange(seq_length, dtype=torch.long, device=token_ids.device)
    position_ids = position_ids.unsqueeze(0).expand_as(token_ids)

    return position_ids


================================================
FILE: megatron/core/models/T5/t5_spec.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
from megatron.core.transformer.attention import (
    CrossAttention,
    CrossAttentionSubmodules,
    SelfAttention,
    SelfAttentionSubmodules,
)
from megatron.core.transformer.dot_product_attention import DotProductAttention
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.identity_op import IdentityOp
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_block import TransformerBlockSubmodules
from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules
from megatron.core.typed_torch import not_none

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import (
        TEColumnParallelLinear,
        TEDotProductAttention,
        TELayerNormColumnParallelLinear,
        TENorm,
        TERowParallelLinear,
    )
else:
    (
        TEColumnParallelLinear,
        TEDotProductAttention,
        TELayerNormColumnParallelLinear,
        TENorm,
        TERowParallelLinear,
    ) = (None, None, None, None, None)

try:
    import apex  # pylint: disable=unused-import

    from megatron.core.fusions.fused_layer_norm import FusedLayerNorm

    HAVE_APEX = True
    LNImpl = FusedLayerNorm
except ImportError:
    import warnings

    from megatron.core.transformer.torch_norm import WrappedTorchNorm

    warnings.warn(f"Apex is not installed. Falling back to Torch Norm")
    LNImpl = WrappedTorchNorm
    HAVE_APEX = False


def encoder_model_with_transformer_engine_default_spec() -> ModuleSpec:
    """T5 encoder TE spec (uses Transformer Engine components)."""

    return ModuleSpec(
        module=TransformerLayer,
        submodules=TransformerLayerSubmodules(
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": AttnMaskType.padding},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=not_none(TELayerNormColumnParallelLinear),
                    core_attention=not_none(TEDotProductAttention),
                    linear_proj=TERowParallelLinear,
                    q_layernorm=IdentityOp,
                    k_layernorm=IdentityOp,
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            mlp=ModuleSpec(
                module=MLP,
                submodules=MLPSubmodules(
                    linear_fc1=not_none(TELayerNormColumnParallelLinear),
                    linear_fc2=not_none(TERowParallelLinear),
                ),
            ),
            mlp_bda=get_bias_dropout_add,
        ),
    )


def decoder_model_with_transformer_engine_default_spec() -> ModuleSpec:
    """T5 decoder TE spec (uses Transformer Engine components)."""

    return ModuleSpec(
        module=TransformerLayer,
        submodules=TransformerLayerSubmodules(
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": AttnMaskType.causal},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=not_none(TELayerNormColumnParallelLinear),
                    core_attention=not_none(TEDotProductAttention),
                    linear_proj=TERowParallelLinear,
                    q_layernorm=IdentityOp,
                    k_layernorm=IdentityOp,
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_cross_attn_layernorm=not_none(TENorm),
            cross_attention=ModuleSpec(
                module=CrossAttention,
                params={"attn_mask_type": AttnMaskType.padding},
                submodules=CrossAttentionSubmodules(
                    linear_q=not_none(TEColumnParallelLinear),
                    linear_kv=not_none(TEColumnParallelLinear),
                    core_attention=not_none(TEDotProductAttention),
                    linear_proj=TERowParallelLinear,
                ),
            ),
            cross_attn_bda=get_bias_dropout_add,
            mlp=ModuleSpec(
                module=MLP,
                submodules=MLPSubmodules(
                    linear_fc1=not_none(TELayerNormColumnParallelLinear),
                    linear_fc2=not_none(TERowParallelLinear),
                ),
            ),
            mlp_bda=get_bias_dropout_add,
        ),
    )


def encoder_model_with_local_spec() -> ModuleSpec:
    """T5 encoder local spec (uses Megatron-Core components)."""

    return ModuleSpec(
        module=TransformerLayer,
        submodules=TransformerLayerSubmodules(
            input_layernorm=LNImpl,
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": AttnMaskType.arbitrary},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=ColumnParallelLinear,
                    core_attention=DotProductAttention,
                    linear_proj=RowParallelLinear,
                    q_layernorm=IdentityOp,
                    k_layernorm=IdentityOp,
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_mlp_layernorm=LNImpl,
            mlp=ModuleSpec(
                module=MLP,
                submodules=MLPSubmodules(
                    linear_fc1=ColumnParallelLinear, linear_fc2=RowParallelLinear
                ),
            ),
            mlp_bda=get_bias_dropout_add,
            sharded_state_dict_keys_map={
                "input_layernorm.": "self_attention.linear_qkv.layer_norm_",
                "pre_mlp_layernorm.": "mlp.linear_fc1.layer_norm_",
            },
        ),
    )


def decoder_model_with_local_spec() -> ModuleSpec:
    """T5 decoder local spec (uses Megatron-Core components)."""

    return ModuleSpec(
        module=TransformerLayer,
        submodules=TransformerLayerSubmodules(
            input_layernorm=LNImpl,
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": AttnMaskType.causal},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=ColumnParallelLinear,
                    core_attention=DotProductAttention,
                    linear_proj=RowParallelLinear,
                    q_layernorm=IdentityOp,
                    k_layernorm=IdentityOp,
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_cross_attn_layernorm=LNImpl,
            cross_attention=ModuleSpec(
                module=CrossAttention,
                params={"attn_mask_type": AttnMaskType.arbitrary},
                submodules=CrossAttentionSubmodules(
                    linear_q=ColumnParallelLinear,
                    linear_kv=ColumnParallelLinear,
                    core_attention=DotProductAttention,
                    linear_proj=RowParallelLinear,
                ),
            ),
            cross_attn_bda=get_bias_dropout_add,
            pre_mlp_layernorm=LNImpl,
            mlp=ModuleSpec(
                module=MLP,
                submodules=MLPSubmodules(
                    linear_fc1=ColumnParallelLinear, linear_fc2=RowParallelLinear
                ),
            ),
            mlp_bda=get_bias_dropout_add,
            sharded_state_dict_keys_map={
                "input_layernorm.": "self_attention.linear_qkv.layer_norm_",
                "pre_mlp_layernorm.": "mlp.linear_fc1.layer_norm_",
            },
        ),
    )


def get_t5_encoder_with_transformer_engine_block_spec(
    num_layers: int,
) -> TransformerBlockSubmodules:
    """T5 encoder block spec for Transformer Engine

    Args:
      config (TransformerConfig): config, containing number of layers for encoder
    """

    layer_spec = encoder_model_with_transformer_engine_default_spec()
    block_spec = TransformerBlockSubmodules([layer_spec] * num_layers, layer_norm=TENorm)
    return block_spec


def get_t5_decoder_with_transformer_engine_block_spec(
    num_layers: int,
) -> TransformerBlockSubmodules:
    """T5 decoder block spec for Transformer Engine

    Args:
      config (TransformerConfig): config, containing number of layers for decoder
    """

    layer_spec = decoder_model_with_transformer_engine_default_spec()
    block_spec = TransformerBlockSubmodules([layer_spec] * num_layers, layer_norm=TENorm)
    return block_spec


def get_t5_encoder_with_local_block_spec(num_layers: int) -> TransformerBlockSubmodules:
    """T5 encoder block spec for local (uses Megatron-Core components)

    Args:
      num_layers (int): number of encoder layers
    """

    layer_spec = encoder_model_with_local_spec()
    block_spec = TransformerBlockSubmodules([layer_spec] * num_layers, layer_norm=TENorm)
    return block_spec


def get_t5_decoder_with_local_block_spec(num_layers: int) -> TransformerBlockSubmodules:
    """T5 decoder block spec for local (uses Megatron-Core components)

    Args:
      num_layers (int): number of decoder layers
    """

    layer_spec = decoder_model_with_local_spec()
    block_spec = TransformerBlockSubmodules([layer_spec] * num_layers, layer_norm=TENorm)
    return block_spec


================================================
FILE: megatron/core/models/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/models/backends.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
from __future__ import annotations

import warnings
from abc import abstractmethod
from typing import Optional, Protocol, Tuple, cast

from megatron.core.extensions.transformer_engine import (
    TEColumnParallelGroupedLinear,
    TERowParallelGroupedLinear,
)
from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
from megatron.core.transformer.dot_product_attention import DotProductAttention
from megatron.core.transformer.mlp import MLPSubmodules, TEActivationFunctionBuilder
from megatron.core.transformer.moe.experts import (
    InferenceGroupedMLP,
    SequentialMLP,
    TEGroupedMLPSubmodules,
)
from megatron.core.transformer.torch_norm import LayerNormBuilder, WrappedTorchNorm
from megatron.core.typed_torch import not_none
from megatron.core.utils import is_te_min_version

try:
    import apex  # pylint: disable=unused-import

    from megatron.core.fusions.fused_layer_norm import FusedLayerNorm

    HAVE_APEX = True
    LNImpl = FusedLayerNorm
except ImportError:
    warnings.warn("Apex is not installed. Falling back to Torch Norm")
    FusedLayerNorm = None
    HAVE_APEX = False
    LNImpl = WrappedTorchNorm

from megatron.core.extensions.transformer_engine import (
    TEActivationOp,
    TEDotProductAttention,
    TELinear,
    TENorm,
)
from megatron.core.tensor_parallel.inference_layers import (
    InferenceColumnParallelLinear,
    InferenceLayerNormColumnParallelLinear,
    InferenceRowParallelLinear,
)
from megatron.core.utils import is_te_min_version


class BackendSpecProvider(Protocol):
    """A protocol for providing the submodules used in Spec building."""

    @abstractmethod
    def column_parallel_linear(self) -> type:
        """Which column parallel linear module the backend uses"""
        ...

    @abstractmethod
    def row_parallel_linear(self) -> type:
        """Which row parallel linear module the backend uses"""
        ...

    @abstractmethod
    def fuse_layernorm_and_linear(self) -> bool:
        """Does the backend support a single module for layernorm and linear"""
        ...

    @abstractmethod
    def column_parallel_layer_norm_linear(self) -> Optional[type]:
        """Which module for sequential layernorm and linear"""
        ...

    @abstractmethod
    def layer_norm(
        self, rms_norm: bool = False, for_qk: bool = False, has_residual: bool = False
    ) -> LayerNormBuilder:
        """Which module for layernorm"""
        ...

    @abstractmethod
    def core_attention(self) -> type:
        """Which module to use for attention"""
        ...

    @abstractmethod
    def grouped_mlp_modules(
        self, moe_use_grouped_gemm: bool
    ) -> tuple[type, MLPSubmodules | TEGroupedMLPSubmodules | None]:
        """Which module and submodules to use for grouped mlp"""
        ...

    @abstractmethod
    def activation_func(self) -> TEActivationFunctionBuilder | None:
        """Which module to use for activation function"""
        ...


class LocalSpecProvider(BackendSpecProvider):
    """A protocol for providing Local submodules used in Spec building."""

    def column_parallel_linear(self) -> type:
        """Which column parallel linear module the backend uses"""
        return ColumnParallelLinear

    def row_parallel_linear(self) -> type:
        """Which row parallel linear module the backend uses"""
        return RowParallelLinear

    def fuse_layernorm_and_linear(self) -> bool:
        """Does the backend choose a single module for layernorm and linear"""
        return False

    def column_parallel_layer_norm_linear(self) -> Optional[type]:
        """Which module for sequential layernorm and linear"""
        return None

    def layer_norm(
        self, rms_norm: bool = False, for_qk: bool = False, has_residual: bool = False
    ) -> LayerNormBuilder:
        """Which module to use for layer norm"""
        if rms_norm:
            # Matching get_gpt_layer_local_spec.
            # Why does the global need to be updated?
            global LNImpl
            LNImpl = WrappedTorchNorm
        return LNImpl

    def core_attention(self) -> type:
        """Which module to use for attention"""
        return DotProductAttention

    def grouped_mlp_modules(
        self, moe_use_grouped_gemm: bool
    ) -> tuple[type[SequentialMLP], MLPSubmodules]:
        """Which module and submodules to use for grouped mlp"""
        return SequentialMLP, MLPSubmodules(
            linear_fc1=ColumnParallelLinear, linear_fc2=RowParallelLinear
        )

    def activation_func(self) -> TEActivationFunctionBuilder | None:
        """Which module to use for activation function"""
        return None


class InferenceSpecProvider(BackendSpecProvider):
    """A protocol for providing the submodules used in Spec building."""

    def linear(self) -> type:
        """Which linear module TE backend uses"""
        return TELinear

    def column_parallel_linear(self) -> type:
        """Which column parallel linear module TE backend uses"""
        return InferenceColumnParallelLinear

    def row_parallel_linear(self) -> type:
        """Which row parallel linear module TE backend uses"""
        return InferenceRowParallelLinear

    def fuse_layernorm_and_linear(self) -> bool:
        """TE backend chooses a single module for layernorm and linear"""
        return True

    def column_parallel_layer_norm_linear(self) -> type[InferenceLayerNormColumnParallelLinear]:
        """Which module for sequential layernorm and linear"""
        return InferenceLayerNormColumnParallelLinear

    def layer_norm(
        self, rms_norm: bool = False, for_qk: bool = False, has_residual: bool = False
    ) -> LayerNormBuilder:
        """Which module to use for layer norm"""
        if for_qk and not is_te_min_version("1.9.0"):
            # TENorm significantly harms convergence when used
            # for QKLayerNorm if TE Version < 1.9;
            # we instead use the Apex implementation.
            return not_none(FusedLayerNorm)
        return TENorm

    def core_attention(self) -> type[TEDotProductAttention]:
        """Which module to use for attention"""
        return TEDotProductAttention

    def activation_func(self) -> TEActivationFunctionBuilder | None:
        """Which module to use for activation function"""
        # transformer_engine.BasicOperation.forward has an overly permissive return type, but by
        # design these classes always meet the interface.
        return cast(TEActivationFunctionBuilder, TEActivationOp)

    def grouped_mlp_modules(
        self, moe_use_grouped_gemm: bool
    ) -> Tuple[type, Optional[MLPSubmodules]]:
        """Which module and submodules to use for grouped mlp"""
        return InferenceGroupedMLP, MLPSubmodules(
            linear_fc1=TEColumnParallelGroupedLinear, linear_fc2=TERowParallelGroupedLinear
        )


================================================
FILE: megatron/core/models/bert/__init__.py
================================================


================================================
FILE: megatron/core/models/bert/bert_layer_specs.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import warnings

from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
from megatron.core.transformer.dot_product_attention import DotProductAttention
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.identity_op import IdentityOp
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules
from megatron.core.typed_torch import not_none

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import (
        TEDotProductAttention,
        TELayerNormColumnParallelLinear,
        TERowParallelLinear,
    )
else:
    (TEDotProductAttention, TELayerNormColumnParallelLinear, TERowParallelLinear) = (
        None,
        None,
        None,
    )

try:
    import apex  # pylint: disable=unused-import

    from megatron.core.fusions.fused_layer_norm import FusedLayerNorm

    HAVE_APEX = True
    LNImpl = FusedLayerNorm
except ImportError:
    from megatron.core.transformer.torch_norm import WrappedTorchNorm

    warnings.warn("Apex is not installed. Falling back to Torch Norm")
    LNImpl = WrappedTorchNorm
    HAVE_APEX = False


def get_bert_layer_with_transformer_engine_submodules() -> TransformerLayerSubmodules:
    """Use these submodules to use lower-level Transformer Engine modules (required for fp8
    training).

    Returns:
        TransformerLayerSubmodules: Submodules with TE modules.
    """
    if not HAVE_TE:
        raise ImportError(
            "Transformer Engine is not installed. Please use local Bert layer spec instead."
        )

    return TransformerLayerSubmodules(
        self_attention=ModuleSpec(
            module=SelfAttention,
            params={"attn_mask_type": AttnMaskType.padding},
            submodules=SelfAttentionSubmodules(
                linear_qkv=not_none(TELayerNormColumnParallelLinear),
                core_attention=not_none(TEDotProductAttention),
                linear_proj=not_none(TERowParallelLinear),
                q_layernorm=IdentityOp,
                k_layernorm=IdentityOp,
            ),
        ),
        self_attn_bda=get_bias_dropout_add,
        mlp=ModuleSpec(
            module=MLP,
            submodules=MLPSubmodules(
                linear_fc1=not_none(TELayerNormColumnParallelLinear),
                linear_fc2=not_none(TERowParallelLinear),
            ),
        ),
        mlp_bda=get_bias_dropout_add,
    )


def get_bert_layer_with_transformer_engine_spec():
    """Use this spec to use lower-level Transformer Engine modules (required for fp8 training).

    Returns:
        ModuleSpec: Module specification with TE modules
    """
    return ModuleSpec(
        module=TransformerLayer, submodules=get_bert_layer_with_transformer_engine_submodules()
    )


def __getattr__(name):
    if name == "bert_layer_with_transformer_engine_spec":
        warnings.warn(
            """Attribute bert_layer_specs.bert_layer_with_transformer_engine_spec is on a
            deprecation track and will be removed in future releases. Please migrate to
            bert_layer_specs.get_bert_layer_with_transformer_engine_spec()."""
        )

        return get_bert_layer_with_transformer_engine_spec()


# Use this spec for an implementation using only modules in megatron core
bert_layer_local_spec = ModuleSpec(
    module=TransformerLayer,
    submodules=TransformerLayerSubmodules(
        input_layernorm=LNImpl,
        self_attention=ModuleSpec(
            module=SelfAttention,
            params={"attn_mask_type": AttnMaskType.padding},
            submodules=SelfAttentionSubmodules(
                linear_qkv=ColumnParallelLinear,
                core_attention=DotProductAttention,
                linear_proj=RowParallelLinear,
                q_layernorm=IdentityOp,
                k_layernorm=IdentityOp,
            ),
        ),
        self_attn_bda=get_bias_dropout_add,
        pre_mlp_layernorm=LNImpl,
        mlp=ModuleSpec(
            module=MLP,
            submodules=MLPSubmodules(linear_fc1=ColumnParallelLinear, linear_fc2=RowParallelLinear),
        ),
        mlp_bda=get_bias_dropout_add,
        sharded_state_dict_keys_map={
            "input_layernorm.": "self_attention.linear_qkv.layer_norm_",
            "pre_mlp_layernorm.": "mlp.linear_fc1.layer_norm_",
        },
    ),
)


================================================
FILE: megatron/core/models/bert/bert_lm_head.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import torch
from torch import Tensor

from megatron.core.fusions.fused_layer_norm import HAVE_FUSED_LAYER_NORM, FusedLayerNorm
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.utils import get_linear_layer

if HAVE_FUSED_LAYER_NORM:
    LNImpl = FusedLayerNorm
else:
    import warnings

    warnings.warn(f'Apex is not installed. Falling back to Torch Norm')
    from megatron.core.transformer.torch_norm import WrappedTorchNorm as LNImpl


class BertLMHead(MegatronModule):
    """Masked LM head for Bert.

    Args:
        hidden_size: hidden size
        config (TransformerConfig): TransformerConfig object
    """

    def __init__(self, hidden_size: int, config: TransformerConfig):
        super().__init__(config=config)

        # TODO: Should switch this to TE ?
        self.dense = get_linear_layer(
            hidden_size, hidden_size, config.init_method, config.perform_initialization
        )

        setattr(self.dense.weight, 'sequence_parallel', config.sequence_parallel)
        setattr(self.dense.bias, 'sequence_parallel', config.sequence_parallel)

        self.layer_norm = LNImpl(
            config=config, hidden_size=hidden_size, eps=config.layernorm_epsilon
        )

        self.gelu = torch.nn.functional.gelu

    def forward(self, hidden_states: Tensor) -> Tensor:
        """forward pass"""

        hidden_states = self.dense(hidden_states)
        hidden_states = self.gelu(hidden_states)
        hidden_states = self.layer_norm(hidden_states)
        return hidden_states


================================================
FILE: megatron/core/models/bert/bert_model.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

import warnings
from typing import Literal, Optional

import torch
from torch import Tensor

from megatron.core import parallel_state, tensor_parallel
from megatron.core.config_logger import has_config_logger_enabled, log_config_to_disk
from megatron.core.inference.contexts import BaseInferenceContext
from megatron.core.models.bert.bert_lm_head import BertLMHead
from megatron.core.models.bert.pooler import Pooler
from megatron.core.models.common.embeddings.language_model_embedding import LanguageModelEmbedding
from megatron.core.models.common.embeddings.rotary_pos_embedding import RotaryEmbedding
from megatron.core.models.common.language_module.language_module import LanguageModule
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.transformer.attention import SelfAttentionSubmodules
from megatron.core.transformer.dot_product_attention import (
    DotProductAttention as MCoreDotProductAttention,
)
from megatron.core.transformer.enums import AttnBackend, AttnMaskType, ModelType
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_block import TransformerBlock
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.transformer_layer import TransformerLayerSubmodules
from megatron.core.transformer.utils import get_linear_layer
from megatron.core.utils import deprecate_inference_params, is_te_min_version


class BertModel(LanguageModule):
    """Transformer language model.

    Args:
        config (TransformerConfig): transformer config
        num_tokentypes (int) : Set to 2 when args.bert_binary_head is True, and 0 otherwise.
            Defaults to 0.
        transformer_layer_spec (ModuleSpec): Specifies module to use for transformer layers
        vocab_size (int): vocabulary size
        max_sequence_length (int): maximum size of sequence. This is used for positional embedding
        pre_process (bool): Include embedding layer (used with pipeline parallelism)
        post_process (bool): Include an output layer (used with pipeline parallelism)
        parallel_output (bool): Do not gather the outputs, keep them split across tensor parallel
            ranks
        share_embeddings_and_output_weights (bool): When True, input embeddings and output logit
            weights are shared. Defaults to False.
        position_embedding_type (string): Position embedding type.
            Options ['learned_absolute', 'rope']. Defaults is 'learned_absolute'.
        rotary_percent (float): Percent of rotary dimension to use for rotary position embeddings.
            Defaults to 1.0 (100%). Ignored unless position_embedding_type is 'rope'.
        vp_stage (int): Virtual pipeline stage.
    """

    def __init__(
        self,
        config: TransformerConfig,
        num_tokentypes: int,
        transformer_layer_spec: ModuleSpec,
        vocab_size: int,
        max_sequence_length: int,
        pre_process: bool = True,
        post_process: bool = True,
        fp16_lm_cross_entropy: bool = False,
        parallel_output: bool = True,
        share_embeddings_and_output_weights: bool = False,
        position_embedding_type: Literal['learned_absolute', 'rope'] = 'learned_absolute',
        rotary_percent: float = 1.0,
        seq_len_interpolation_factor: Optional[float] = None,
        add_binary_head=True,
        return_embeddings=False,
        vp_stage: Optional[int] = None,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ):
        super(BertModel, self).__init__(config=config, pg_collection=pg_collection)

        if has_config_logger_enabled(config):
            log_config_to_disk(config, locals(), prefix=type(self).__name__)

        if return_embeddings:
            assert self.post_process and self.add_binary_head

        self.config: TransformerConfig = config
        self.transformer_layer_spec: ModuleSpec = transformer_layer_spec
        self.vocab_size = vocab_size
        self.max_sequence_length = max_sequence_length
        self.pre_process = pre_process
        self.post_process = post_process
        self.fp16_lm_cross_entropy = fp16_lm_cross_entropy
        self.parallel_output = parallel_output
        self.share_embeddings_and_output_weights = share_embeddings_and_output_weights
        self.position_embedding_type = position_embedding_type
        self.add_binary_head = add_binary_head
        self.return_embeddings = return_embeddings
        self.vp_stage = vp_stage

        # megatron core pipelining currently depends on model type
        self.model_type = ModelType.encoder_or_decoder

        self.attn_mask_dimensions = self._sanity_check_attention_and_get_attn_mask_dimension()

        # Embeddings.
        if self.pre_process:
            self.embedding = LanguageModelEmbedding(
                config=self.config,
                vocab_size=self.vocab_size,
                max_sequence_length=self.max_sequence_length,
                position_embedding_type=position_embedding_type,
                num_tokentypes=num_tokentypes,
            )

        if self.position_embedding_type == 'rope':
            self.rotary_pos_emb = RotaryEmbedding(
                kv_channels=self.config.kv_channels,
                rotary_percent=rotary_percent,
                rotary_interleaved=self.config.rotary_interleaved,
                seq_len_interpolation_factor=seq_len_interpolation_factor,
                use_cpu_initialization=self.config.use_cpu_initialization,
            )

        # Transformer.
        self.encoder = TransformerBlock(
            config=self.config,
            spec=self.transformer_layer_spec,
            pre_process=self.pre_process,
            post_process=self.post_process,
            vp_stage=vp_stage,
        )

        # Output
        if post_process:
            # TODO: Make sure you are passing in the mpu_vocab_size properly
            self.lm_head = BertLMHead(config.hidden_size, config)

            self.output_layer = tensor_parallel.ColumnParallelLinear(
                config.hidden_size,
                self.vocab_size,
                config=config,
                init_method=(
                    config.embedding_init_method
                    if config.use_mup and not self.share_embeddings_and_output_weights
                    else config.init_method
                ),
                bias=True,
                skip_bias_add=False,
                gather_output=not self.parallel_output,
                skip_weight_param_allocation=pre_process and share_embeddings_and_output_weights,
            )

            self.binary_head = None
            if self.add_binary_head:
                # TODO: Shoudl switch this to TE ?
                self.binary_head = get_linear_layer(
                    config.hidden_size, 2, config.init_method, config.perform_initialization
                )

                self.pooler = Pooler(
                    config.hidden_size, config.init_method, config, config.sequence_parallel
                )

        if self.pre_process or self.post_process:
            self.setup_embeddings_and_output_layer()

    # pylint: disable=line-too-long
    def _sanity_check_attention_and_get_attn_mask_dimension(self) -> str:
        """We do some checks and return attention mask dimensions for self attention

        Transformer engine library underwent a lot of change. So we need to change dimensions of
        the attention mask depending on the TE version. We also santiy check some arguments.

        1. If we use local version of attention dimension of the mask is [b,1,s,s]
        2. If we use transformer engine > 1.10 we support all 3 backends with padding mask and [b,1,s,s]
        3. If we use transformer engine >= 1.7 but less than 1.10
          a ) Flash and Fused attention uses padding mask with [b,1,1,s]
          b ) Unfused attention works with arbitrary mask with [b,1,s,s]
        4. If we use transformer engine < 1.7
          Flash and fused attention is not supported. Unfused attention will work with padding mask [b,1,s,s]

        Default if you dont set any NVTE_ATTN flag will it will just use the fused path for transformer engine version >= 1.7 and unfused path for other

        Args:
            transformer_layer_spec (ModuleSpec): The transformer layer spec

        Returns:
            str: A string showing the format of the attn mask dimensions
        """
        attention_backend = self.config.attention_backend
        attn_mask_dimensions = None
        assert isinstance(self.transformer_layer_spec.submodules, TransformerLayerSubmodules)
        assert isinstance(
            self.transformer_layer_spec.submodules.self_attention.submodules,
            SelfAttentionSubmodules,
        )
        # For local layer spec we just use b1ss
        if (
            self.transformer_layer_spec.submodules.self_attention.submodules.core_attention
            == MCoreDotProductAttention
        ):
            assert attention_backend in [
                AttnBackend.local,
                AttnBackend.auto,
            ], f'Expected AttnBackend to be local or auto while using mcore self attention, but found {attention_backend}. Set --attn-backend to local or dont use MCore SelfAttention submodule in layer specs'
            attn_mask_dimensions = "b1ss"
        else:
            attn_mask_type = self.transformer_layer_spec.submodules.self_attention.params[
                'attn_mask_type'
            ]
            # For TE >= 1.10 (We always use padding mask and use b11s)
            if is_te_min_version("1.10.0"):
                attn_mask_dimensions = "b11s"
                if attn_mask_type != AttnMaskType.padding:
                    warnings.warn(
                        f'For TE versions >= 1.10 , flash/fused/unfused support padding mask. Setting attention mask from {attn_mask_type} to padding'
                    )
                    self.transformer_layer_spec.submodules.self_attention.params[
                        'attn_mask_type'
                    ] = AttnMaskType.padding
            # For 1.7 >= TE < 1.10 flash and fused path use padding mask with b11s and unfused path uses arbitrary mask with b1ss
            elif is_te_min_version("1.7.0"):
                if attention_backend in [AttnBackend.flash, AttnBackend.fused, AttnBackend.auto]:
                    attn_mask_dimensions = "b11s"
                else:
                    if attn_mask_type != AttnMaskType.arbitrary:
                        warnings.warn(
                            f'For TE versions >= 1.7 but < 1.10 , unfused path supports only arbitrary mask. Setting attention mask from {attn_mask_type} to arbitray'
                        )
                        self.transformer_layer_spec.submodules.self_attention.params[
                            'attn_mask_type'
                        ] = AttnMaskType.arbitrary
                    attn_mask_dimensions = "b1ss"
            # For TE < 1.7 we only support unfused attention with b1ss and padding mask
            else:
                attn_mask_dimensions = "b1ss"
                assert not (attention_backend in [AttnBackend.flash, AttnBackend.fused]), (
                    "Flash and fused attention is not supported with transformer engine version "
                    "< 1.7. Set --attention-backend to unfused or leave it to be default (auto) or upgrade transformer engine >= 1.7"
                )

        return attn_mask_dimensions

    def bert_extended_attention_mask(self, attention_mask: Tensor) -> Tensor:
        """Creates the extended attention mask

        Converts the attention mask of dimension
        [batch size, 1, seq len] to [batch size, 1, seq len, seq len]
        or [batch size, 1, 1, seq_len] and makes it binary

        Args:
            attention_mask (Tensor): The input attention mask

        Returns:
            Tensor: The extended binary attention mask
        """
        # We create a 3D attention mask from a 2D tensor mask.
        if self.attn_mask_dimensions == "b1ss":
            # [b, 1, s]
            attention_mask_b1s = attention_mask.unsqueeze(1)
            # [b, s, 1]
            attention_mask_bs1 = attention_mask.unsqueeze(2)
            # [b, s, s]
            attention_mask_bss = attention_mask_b1s * attention_mask_bs1
            # [b, 1, s, s]
            extended_attention_mask = attention_mask_bss.unsqueeze(1)
        else:
            # [b, 1, 1, s]
            extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(1)

        # Convert attention mask to binary:
        extended_attention_mask = extended_attention_mask < 0.5

        return extended_attention_mask

    def bert_position_ids(self, token_ids):
        """Position ids for bert model"""
        # Create position ids
        seq_length = token_ids.size(1)
        position_ids = torch.arange(seq_length, dtype=torch.long, device=token_ids.device)
        position_ids = position_ids.unsqueeze(0).expand_as(token_ids)

        return position_ids

    def set_input_tensor(self, input_tensor: Tensor) -> None:
        """Sets input tensor to the model.

        See megatron.model.transformer.set_input_tensor()

        Args:
            input_tensor (Tensor): Sets the input tensor for the model.
        """
        # This is usually handled in schedules.py but some inference code still
        # gives us non-lists or None
        if not isinstance(input_tensor, list):
            input_tensor = [input_tensor]

        assert len(input_tensor) == 1, 'input_tensor should only be length 1 for gpt/bert'
        self.encoder.set_input_tensor(input_tensor[0])

    def forward(
        self,
        input_ids: Tensor,
        attention_mask: Tensor,
        tokentype_ids: Tensor = None,
        lm_labels: Tensor = None,
        inference_context=None,
        *,
        inference_params: Optional[BaseInferenceContext] = None,
    ):
        """Forward function of BERT model

        Forward function of the BERT Model This function passes the input tensors
        through the embedding layer, and then the encoder and finally into the post
        processing layer (optional).

        It either returns the Loss values if labels are given  or the final hidden units
        """

        inference_context = deprecate_inference_params(inference_context, inference_params)

        extended_attention_mask = self.bert_extended_attention_mask(attention_mask)

        if parallel_state.is_pipeline_first_stage():
            input_ids = input_ids
            position_ids = self.bert_position_ids(input_ids)
        else:
            position_ids = None
            input_ids = None

        # Encoder embedding.
        if self.pre_process:
            encoder_input = self.embedding(
                input_ids=input_ids, position_ids=position_ids, tokentype_ids=tokentype_ids
            )
        else:
            # intermediate stage of pipeline
            # encoder will get hidden_states from encoder.input_tensor
            encoder_input = None

        # Rotary positional embeddings (Why not move this into BERT/GPTEmberdding ?)
        rotary_pos_emb = None
        if self.position_embedding_type == 'rope':
            rotary_seq_len = self.rotary_pos_emb.get_rotary_seq_len(
                inference_context, self.encoder, encoder_input, self.config
            )
            rotary_pos_emb = self.rotary_pos_emb(rotary_seq_len)

        # Run encoder.
        hidden_states = self.encoder(
            hidden_states=encoder_input,
            attention_mask=extended_attention_mask,
            inference_context=inference_context,
            rotary_pos_emb=rotary_pos_emb,
        )
        if not self.post_process:
            return hidden_states

        if self.add_binary_head:
            pooled_output = self.pooler(hidden_states, 0)
        else:
            pooled_output = None  # for pylint.

        if self.return_embeddings:
            embeddings = torch.transpose(hidden_states, 0, 1)
            masks = torch.sum(attention_mask, dim=1)
            # Collect masked embeddings.
            output = torch.zeros(
                size=(embeddings.shape[0], embeddings.shape[2]),
                dtype=torch.float32,
                device=torch.cuda.current_device(),
            )
            for i, (embedding, mask) in enumerate(zip(embeddings, masks)):
                output[i, :] = torch.mean(embedding[1 : mask - 1], dim=0)
            return output

        # logits and loss
        output_weight = None
        if self.share_embeddings_and_output_weights:
            output_weight = self.shared_embedding_or_output_weight()

        hidden_states_after_lm_head = self.lm_head(hidden_states=hidden_states)
        logits, _ = self.output_layer(hidden_states_after_lm_head, weight=output_weight)

        binary_logits = None
        if self.binary_head is not None:
            binary_logits = self.binary_head(pooled_output)

        if lm_labels is None:
            # [s b h] => [b s h]
            return logits.transpose(0, 1).contiguous(), binary_logits

        loss = self.compute_language_model_loss(lm_labels, logits)

        return loss, binary_logits


================================================
FILE: megatron/core/models/bert/pooler.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import torch
from torch import Tensor

from megatron.core import tensor_parallel
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.utils import get_linear_layer


class Pooler(MegatronModule):
    """Pooler layer.

    Pool hidden states of a specific token (for example start of the
    sequence) and add a linear transformation followed by a tanh.

    Args:
        hidden_size (int): The hidden size_
        init_method (callable): weight initialization method for the linear layer. bias is set to zero.
        config (TransformerConfig): The transformer configuration
        sequence_parallel (bool): Using squence parallel ? Defaults to False
    """

    def __init__(
        self,
        hidden_size: int,
        init_method: callable,
        config: TransformerConfig,
        sequence_parallel: bool = False,
    ):
        super(Pooler, self).__init__(config)
        # TODO: Shoudl switch this to TE ?
        self.dense = get_linear_layer(
            hidden_size, hidden_size, init_method, config.perform_initialization
        )
        self.sequence_parallel = sequence_parallel

    def forward(self, hidden_states: Tensor, sequence_index=0):
        # hidden_states: [s, b, h]
        # sequence_index: index of the token to pool.

        # gather data along sequence dimensions
        # same pooler is run on all tensor parallel nodes
        if self.sequence_parallel:
            hidden_states = tensor_parallel.gather_from_sequence_parallel_region(
                hidden_states, tensor_parallel_output_grad=False
            )

        pooled = hidden_states[sequence_index, :, :]
        pooled = self.dense(pooled)
        pooled = torch.tanh(pooled)
        return pooled


================================================
FILE: megatron/core/models/common/__init__.py
================================================


================================================
FILE: megatron/core/models/common/embeddings/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from .rope_utils import apply_rotary_pos_emb
from .rotary_pos_embedding import MultimodalRotaryEmbedding, RotaryEmbedding
from .yarn_rotary_pos_embedding import YarnRotaryEmbedding, _yarn_get_mscale


================================================
FILE: megatron/core/models/common/embeddings/language_model_embedding.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

from typing import Literal, Optional

import torch
from torch import Tensor

from megatron.core import tensor_parallel
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.utils import get_tensor_model_parallel_group_if_none, nvtx_decorator


class LanguageModelEmbedding(MegatronModule):
    """Language model embeddings.

    Args:
        config (TransformerConfig): config object with all necessary configs for TransformerBlock
        vocab_size (int): vocabulary size
        max_sequence_length (int): maximum size of sequence. This
                             is used for positional embedding
        add_position_embedding (bool): Add a position embedding.
        embedding_dropout_prob (float): dropout probability for embeddings
        num_tokentypes (int): Set to 0 without binary head, and 2 with a binary head. Defaults to 0.
        scatter_to_sequence_parallel (bool): Set to False to disable scatter of embedding
            across sequence parallel region. Defaults to True.
    """

    def __init__(
        self,
        config: TransformerConfig,
        vocab_size: int,
        max_sequence_length: int,
        position_embedding_type: Literal['learned_absolute', 'rope', 'none'] = 'learned_absolute',
        num_tokentypes: int = 0,
        scatter_to_sequence_parallel: bool = True,
        tp_group: Optional[torch.distributed.ProcessGroup] = None,
    ):
        super().__init__(config=config)

        self.config: TransformerConfig = config
        self.vocab_size: int = vocab_size
        self.max_sequence_length: int = max_sequence_length
        self.add_position_embedding: bool = position_embedding_type == 'learned_absolute'
        self.num_tokentypes = num_tokentypes
        self.scatter_to_sequence_parallel = scatter_to_sequence_parallel
        self.tp_group = get_tensor_model_parallel_group_if_none(tp_group)
        self.reduce_scatter_embeddings = (
            (not self.add_position_embedding)
            and self.num_tokentypes <= 0
            and self.config.sequence_parallel
            and self.scatter_to_sequence_parallel
        )

        # Word embeddings (parallel).
        self.word_embeddings = tensor_parallel.VocabParallelEmbedding(
            num_embeddings=self.vocab_size,
            embedding_dim=self.config.hidden_size,
            init_method=self.config.embedding_init_method,
            reduce_scatter_embeddings=self.reduce_scatter_embeddings,
            config=self.config,
            tp_group=self.tp_group,
        )

        # Position embedding (serial).
        if self.add_position_embedding:
            self.position_embeddings = torch.nn.Embedding(
                self.max_sequence_length, self.config.hidden_size
            )

            # Initialize the position embeddings.
            if self.config.perform_initialization:
                self.config.embedding_init_method(self.position_embeddings.weight)

        if self.num_tokentypes > 0:
            self.tokentype_embeddings = torch.nn.Embedding(
                self.num_tokentypes, self.config.hidden_size
            )
            # Initialize the token-type embeddings.
            if self.config.perform_initialization:
                self.config.embedding_init_method(self.tokentype_embeddings.weight)
        else:
            self.tokentype_embeddings = None

        # Embeddings dropout
        self.embedding_dropout = torch.nn.Dropout(self.config.hidden_dropout)

    def zero_parameters(self):
        """Zero out all parameters in embedding."""
        self.word_embeddings.weight.data.fill_(0)
        self.word_embeddings.weight.shared = True
        self.position_embeddings.weight.data.fill_(0)
        self.position_embeddings.weight.shared = True
        if self.num_tokentypes > 0:
            self.tokentype_embeddings.weight.data.fill_(0)
            self.tokentype_embeddings.weight.shared = True

    @nvtx_decorator()
    def forward(self, input_ids: Tensor, position_ids: Tensor, tokentype_ids: int = None) -> Tensor:
        """Forward pass of the embedding module.

        Args:
            input_ids (Tensor): The input tokens
            position_ids (Tensor): The position id's used to calculate position embeddings
            tokentype_ids (int): The token type ids. Used when args.bert_binary_head is
                set to True. Defaults to None

        Returns:
            Tensor: The output embeddings
        """
        word_embeddings = self.word_embeddings(input_ids)
        if self.add_position_embedding:
            position_embeddings = self.position_embeddings(position_ids)
            embeddings = word_embeddings + position_embeddings
        else:
            embeddings = word_embeddings

        if not self.reduce_scatter_embeddings:
            # Data format change to avoid explicit tranposes : [b s h] --> [s b h].
            embeddings = embeddings.transpose(0, 1).contiguous()

        if tokentype_ids is not None:
            assert self.tokentype_embeddings is not None
            # [b s h] -> [s b h] (So that it can be added with embeddings)
            tokentype_embedding = self.tokentype_embeddings(tokentype_ids).permute(1, 0, 2)
            embeddings = embeddings + tokentype_embedding
        else:
            assert self.tokentype_embeddings is None

        # MuP: scale embeddings by alpha_input.
        if self.config.use_mup and self.config.mup_embedding_mult != 1.0:
            embeddings = embeddings * self.config.mup_embedding_mult

        # If the input flag for fp32 residual connection is set, convert for float.
        if self.config.fp32_residual_connection:
            embeddings = embeddings.float()

        # Dropout.
        if self.config.sequence_parallel:
            if not self.reduce_scatter_embeddings and self.scatter_to_sequence_parallel:
                embeddings = tensor_parallel.scatter_to_sequence_parallel_region(
                    embeddings, group=self.tp_group
                )
            # `scatter_to_sequence_parallel_region` returns a view, which prevents
            # the original tensor from being garbage collected. Clone to facilitate GC.
            # Has a small runtime cost (~0.5%).
            if self.config.clone_scatter_output_in_embedding and self.scatter_to_sequence_parallel:
                embeddings = embeddings.clone()
            with tensor_parallel.get_cuda_rng_tracker().fork():
                embeddings = self.embedding_dropout(embeddings)
        else:
            embeddings = self.embedding_dropout(embeddings)

        return embeddings


================================================
FILE: megatron/core/models/common/embeddings/relative_pos_embedding.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import logging
import math
from typing import Callable, Optional

import torch
from torch import Tensor, nn

from megatron.core.inference.contexts import BaseInferenceContext
from megatron.core.transformer.transformer_block import TransformerBlock
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.utils import deprecate_inference_params, nvtx_decorator

logger = logging.getLogger(__name__)


__all__ = ['RelativePositionEmbedding']


class RelativePositionEmbedding(nn.Module):
    """Relative Position Embedding for language model.

    Args:

    """

    def __init__(
        self,
        bidirectional: bool,
        init_method: Callable,
        num_attention_heads: int,
        relative_attention_num_buckets: int = 32,
        relative_attention_max_distance: int = 128,
    ) -> None:
        super().__init__()

        self.bidirectional = bidirectional
        self.relative_attention_num_buckets = relative_attention_num_buckets
        self.relative_attention_max_distance = relative_attention_max_distance
        self.relative_attention_bias = torch.nn.Embedding(
            self.relative_attention_num_buckets, num_attention_heads
        )
        init_method(self.relative_attention_bias.weight)

    def _relative_position_bucket(
        self, relative_position, bidirectional=True, num_buckets=32, max_distance=128
    ):
        """
        Adapted from HuggingFace T5 Model:
        https://github.com/huggingface/transformers/blob/329f5dbf97a5cb2473914c88c05aa3dcb242e19a/
        src/transformers/models/t5/modeling_t5.py#L397

        Translate relative position to a bucket number for relative attention.
        The relative position is defined as memory_position - query_position, i.e. the
        distance in tokens from the attending position to the attended-to position.
        If bidirectional=False, then positive relative positions are invalid. We use
        smaller buckets for small absolute relative_position and larger buckets for
        larger absolute relative_positions. All relative positions >=max_distance map
        to the same bucket. All relative positions <=-max_distance map to the same bucket.
        This should allow for more graceful generalization to longer sequences than the
        model has been trained on.

        Args:
            relative_position: an int32 Tensor
            bidirectional: a boolean - whether the attention is bidirectional
            num_buckets: an integer
            max_distance: an integer
        Returns:
            a Tensor with the same shape as relative_position,
            containing int32 values in the range [0, num_buckets)
        """
        relative_buckets = 0
        if bidirectional:
            num_buckets //= 2
            relative_buckets += (relative_position > 0).to(torch.long) * num_buckets
            relative_position = torch.abs(relative_position)
        else:
            relative_position = -torch.min(relative_position, torch.zeros_like(relative_position))
        # now relative_position is in the range [0, inf)

        # half of the buckets are for exact increments in positions
        max_exact = num_buckets // 2
        is_small = relative_position < max_exact

        # The other half of the buckets are for logarithmically bigger
        # bins in positions up to max_distance
        relative_position_if_large = max_exact + (
            torch.log(relative_position.float() / max_exact)
            / math.log(max_distance / max_exact)
            * (num_buckets - max_exact)
        ).to(torch.long)
        relative_position_if_large = torch.min(
            relative_position_if_large, torch.full_like(relative_position_if_large, num_buckets - 1)
        )

        relative_buckets += torch.where(is_small, relative_position, relative_position_if_large)
        return relative_buckets

    def _compute_bias(self, query_length, key_length):
        """
        Adapted from HuggingFace T5 Model
        https://github.com/huggingface/transformers/blob/329f5dbf97a5cb2473914c88c05aa3dcb242e19a/
        src/transformers/models/t5/modeling_t5.py#L444C9-L444C21

        Compute binned relative position bias

        Args:
            query_length (int): The length of the query sequence
            (e.g., the input sequence in attention).
            key_length (int): The length of the key sequence
            (e.g., the sequence to compare against in attention).

        Returns:
            torch.Tensor: A tensor representing the relative position bias, with shape
            (1, num_heads, query_length, key_length).
        """
        device = self.relative_attention_bias.weight.device
        context_position = torch.arange(query_length, dtype=torch.long, device=device)[:, None]
        memory_position = torch.arange(key_length, dtype=torch.long, device=device)[None, :]

        relative_position = memory_position - context_position  # shape(query_length,key_length)
        relative_position_bucket = self._relative_position_bucket(
            relative_position,  # shape (query_length, key_length)
            bidirectional=self.bidirectional,
            num_buckets=self.relative_attention_num_buckets,
            max_distance=self.relative_attention_max_distance,
        )
        values = self.relative_attention_bias(
            relative_position_bucket
        )  # shape(query_length,key_length,num_heads)
        values = values.permute([2, 0, 1]).unsqueeze(
            0
        )  # shape(1, num_heads,query_length,key_length)
        return values

    @staticmethod
    def get_relative_seq_len(
        inference_context: BaseInferenceContext,
        transformer: TransformerBlock,
        transformer_input: Tensor,
        transformer_config: TransformerConfig,
        *,
        inference_params: Optional[BaseInferenceContext] = None,
    ) -> float:
        """Function to get the rotary sequence length.

        Args:
            inference_context (BaseInferenceContext): Used during Inference time
            transformer (TransformerBlock): The transformer block (decoder/encoder) used
                by the model
            transformer_input (Tensor): Input tensor to the transformer
            transformer_config (TransformerConfig): Transformer config used by the model

        Returns:
            float: The rotary sequence length
        """

        inference_context = deprecate_inference_params(inference_context, inference_params)

        if inference_context is not None:
            relative_seq_len = inference_context.max_sequence_length
        else:
            if transformer.input_tensor is not None:
                relative_seq_len = transformer.input_tensor.size(0)
            else:
                relative_seq_len = transformer_input.size(0)

            if transformer_config.sequence_parallel:
                relative_seq_len *= transformer_config.tensor_model_parallel_size

        return relative_seq_len

    @nvtx_decorator()
    def forward(self, query_seq_length, key_seq_length):
        """
        Args:
        Returns:
        """
        return self._compute_bias(query_seq_length, key_seq_length)


================================================
FILE: megatron/core/models/common/embeddings/rope_utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from __future__ import annotations

import warnings
from typing import TYPE_CHECKING, Optional

if TYPE_CHECKING:
    from megatron.core.transformer.transformer_config import TransformerConfig

import logging

import torch
from torch import Tensor

from megatron.core import parallel_state

logger = logging.getLogger(__name__)

try:
    from megatron.core.extensions.transformer_engine import fused_apply_rotary_pos_emb
except ImportError:
    fused_apply_rotary_pos_emb = None


try:
    from megatron.core.extensions.transformer_engine import fused_apply_rotary_pos_emb_thd
except ImportError:
    fused_apply_rotary_pos_emb_thd = None


try:
    from flash_attn.layers.rotary import apply_rotary_emb as apply_rotary_emb_flash
except ImportError:
    apply_rotary_emb_flash = None


__all__ = [
    'apply_rotary_pos_emb',
    'apply_rotary_emb_flash',
    'apply_rotary_pos_emb_with_cos_sin',
    'fused_apply_rotary_pos_emb',
    'fused_apply_rotary_pos_emb_thd',
    'get_pos_emb_on_this_cp_rank',
]


def get_pos_emb_on_this_cp_rank(
    pos_emb: Tensor, seq_dim: int, cp_group: torch.distributed.ProcessGroup
) -> Tensor:
    """Get the position embedding on the current context parallel rank.

    Args:
        pos_emb (Tensor): Positional embedding tensor
        seq_dim (int): Sequence dimension
        cp_group (torch.distributed.ProcessGroup): The context parallel group
    """
    if cp_group is None:
        raise ValueError("cp_group must be provided to get positional embedding per CP rank")
    cp_size = cp_group.size()
    cp_rank = cp_group.rank()
    cp_idx = torch.tensor(
        [cp_rank, (2 * cp_size - cp_rank - 1)], device="cpu", pin_memory=True
    ).cuda(non_blocking=True)
    pos_emb = pos_emb.view(
        *pos_emb.shape[:seq_dim], 2 * cp_size, -1, *pos_emb.shape[(seq_dim + 1) :]
    )
    pos_emb = pos_emb.index_select(seq_dim, cp_idx)
    pos_emb = pos_emb.view(*pos_emb.shape[:seq_dim], -1, *pos_emb.shape[(seq_dim + 2) :])
    return pos_emb


def _rotate_half(x: Tensor, rotary_interleaved: bool) -> Tensor:
    """Change sign so the last dimension becomes [-odd, +even]

    Args:
        x (Tensor): Input tensor

    Returns:
        Tensor: Tensor rotated half
    """
    if not rotary_interleaved:
        x1, x2 = torch.chunk(x, 2, dim=-1)
        return torch.cat((-x2, x1), dim=-1)
    else:
        x1 = x[:, :, :, ::2]
        x2 = x[:, :, :, 1::2]
        x_new = torch.stack((-x2, x1), dim=-1)
        return x_new.view(x_new.shape[0], x_new.shape[1], x_new.shape[2], -1)


def _apply_rotary_pos_emb_bshd(
    t: Tensor,
    freqs: Tensor,
    rotary_interleaved: bool = False,
    multi_latent_attention: bool = False,
    mscale: float = 1.0,
) -> Tensor:
    """Apply rotary positional embedding to input tensor T.

    check https://kexue.fm/archives/8265 for detailed formulas

    Args:
        t (Tensor): Input tensor T is of shape [seq_length, ... , dim]
        freqs (Tensor): Rotary Positional embedding tensor freq is of shape [seq_length, ..., dim]

    Returns:
        Tensor: The input tensor after applying RoPE
    """
    rot_dim = freqs.shape[-1]

    # ideally t_pass is empty so rotary pos embedding is applied to all tensor t
    t, t_pass = t[..., :rot_dim], t[..., rot_dim:]

    if multi_latent_attention:
        x1 = t[..., 0::2]
        x2 = t[..., 1::2]
        t = torch.cat((x1, x2), dim=-1)

    # first part is cosine component
    # second part is sine component, need to change signs with _rotate_half method
    cos_ = (torch.cos(freqs) * mscale).to(t.dtype)
    sin_ = (torch.sin(freqs) * mscale).to(t.dtype)

    t = (t * cos_) + (_rotate_half(t, rotary_interleaved) * sin_)
    return torch.cat((t, t_pass), dim=-1)


def _get_thd_freqs_on_this_cp_rank(
    cp_rank: int, cp_size: int, x: Tensor, freqs: Tensor, offset: int = 0
) -> Tensor:
    """Get the correct frequency slice for this context parallel rank with optional sequence offset.

    Args:
        cp_rank: Current context parallel rank
        cp_size: Total context parallel size
        x: Input tensor for current sequence
        freqs: Frequency tensor - either full batch positions or max sequence length
        offset: Starting position offset for this sequence in the original batch (default: 0)

    Returns:
        Tensor: Frequency slice corresponding to this CP rank's portion of the sequence

    Note:
        This function supports two modes based on the offset parameter:
        1. offset > 0: Exact mapping mode - freqs contains all positions across all sequences.
           The offset ensures each sequence gets frequencies from its actual position within
           the overall batch. Critical for non-1D RoPE in VLMs where spatial positions matter.
        2. offset = 0: Traditional mode - freqs contains only max sequence length positions.
           All sequences use frequencies starting from position 0, preserving backward
           compatibility.
    """
    if cp_size > 1:
        cp_seg = x.size(0) // 2
        full_seqlen = cp_size * x.size(0)
        # Apply offset to both forward and backward segments for context parallelism
        # offset=0: traditional behavior, freqs[0:cp_seg] and freqs[...]
        # offset>0: exact mapping, freqs[offset+0:offset+cp_seg] and freqs[offset+...]
        return torch.cat(
            [
                freqs[offset + cp_rank * cp_seg : offset + (cp_rank + 1) * cp_seg],
                freqs[
                    offset
                    + full_seqlen
                    - (cp_rank + 1) * cp_seg : offset
                    + full_seqlen
                    - cp_rank * cp_seg
                ],
            ]
        )
    else:
        # For single context parallel rank:
        # offset=0: use freqs[0:x.size(0)] (traditional)
        # offset>0: use freqs[offset:offset+x.size(0)] (exact mapping)
        return freqs[offset : offset + x.size(0)]


def _apply_rotary_pos_emb_thd(
    t: Tensor,
    cu_seqlens: Tensor,
    freqs: Tensor,
    rotary_interleaved: bool = False,
    multi_latent_attention: bool = False,
    mscale: float = 1.0,
    cp_group: torch.distributed.ProcessGroup = None,
) -> Tensor:
    """A baseline implementation of applying RoPE for `thd` format.

    Args:
        t (Tensor): Input tensor T is of shape [t, h, d]
        cu_seqlens(Tensor):  Cumulative sum of sequence lengths in a batch for `t`,
        with shape [b + 1] and dtype torch.int32.
        freqs (Tensor): Rotary Positional embedding tensor freq is of shape [max_s, 1, 1, d]
        cp_group (torch.distributed.ProcessGroup): The context parallel group

    Returns:
        Tensor: Shape [t, h, d]. The input tensor after applying RoPE.
    """

    if cp_group is None:
        raise ValueError("cp_group must be provided for THD format RoPE")
    cp_size = cp_group.size()
    cp_rank = cp_group.rank()
    seqlens = ((cu_seqlens[1:] - cu_seqlens[:-1]) // cp_size).tolist()

    # Handle two different frequency tensor formats:
    # 1. If freqs.size(0) == cu_seqlens[-1]: freqs contains all positions across all sequences
    #    -> Use offset-based mapping for exact positional correspondence
    # 2. Otherwise: freqs contains only max sequence length positions
    #    -> Use traditional mapping without offsets (map first :seqlen part)
    if freqs.dim() >= 1 and freqs.size(0) == cu_seqlens[-1]:
        # CASE 1: Exact mapping with offsets
        # Build packed freqs in one pass, then apply once to the whole packed tensor
        sequence_splits = torch.split(t, seqlens)
        freq_slices = []
        for i, x in enumerate(sequence_splits):
            # cu_seqlens[i] is the starting offset of this sequence in the original batch
            seq_start_offset = cu_seqlens[i].item()
            freq_slices.append(
                _get_thd_freqs_on_this_cp_rank(cp_rank, cp_size, x, freqs, seq_start_offset)
            )

        freqs_packed = torch.cat(freq_slices, dim=0)

        return _apply_rotary_pos_emb_bshd(
            t.unsqueeze(1),
            freqs_packed,
            rotary_interleaved=rotary_interleaved,
            multi_latent_attention=multi_latent_attention,
            mscale=mscale,
        ).squeeze(1)
    else:
        # CASE 2: Traditional mapping without offsets
        # Build packed freqs for all sequences using the standard mapping, then apply once
        sequence_splits = torch.split(t, seqlens)
        freqs_packed = torch.cat(
            [_get_thd_freqs_on_this_cp_rank(cp_rank, cp_size, x, freqs) for x in sequence_splits],
            dim=0,
        )

        return _apply_rotary_pos_emb_bshd(
            t.unsqueeze(1),
            freqs_packed,
            rotary_interleaved=rotary_interleaved,
            multi_latent_attention=multi_latent_attention,
            mscale=mscale,
        ).squeeze(1)


def apply_rotary_pos_emb(
    t: Tensor,
    freqs: Tensor,
    config: TransformerConfig,
    cu_seqlens: Optional[Tensor] = None,
    mscale: float = 1.0,
    cp_group: torch.distributed.ProcessGroup = None,
):
    """
    Reroute to the appropriate apply_rotary_pos_emb function depending on
    fused/unfused kernels, or bshd (conventional) / thd (packed seq) format
    """
    global fused_apply_rotary_pos_emb, fused_apply_rotary_pos_emb_thd

    # Keep for backward compatibility. Will deprecate in the future.
    if cp_group is None:
        cp_group = parallel_state.get_context_parallel_group()

    if config.apply_rope_fusion:
        if cu_seqlens is None:
            # NOTE: TE backends do not support mRoPE in bshd format when bs > 1.
            use_unfused = False
            if config.mrope_section is not None and freqs.shape[1] > 1:
                # TODO: Add a check in TransformerConfig and remove this unfused implementation.
                warnings.warn(
                    "apply_rope_fusion does not support mRoPE in bshd format when bs > 1. "
                    "Please set apply_rope_fusion to false. This will become an error in v0.16."
                )
                use_unfused = True
            if mscale != 1.0:
                warnings.warn(
                    f"mscale={mscale} is not supported by TE's fused RoPE. "
                    "Using unfused implementation."
                )
                use_unfused = True
            if not use_unfused:
                assert fused_apply_rotary_pos_emb is not None, "apply_rope_fusion is not available."
                return fused_apply_rotary_pos_emb(t, freqs, interleaved=config.rotary_interleaved)
        else:
            assert fused_apply_rotary_pos_emb_thd is not None, "apply_rope_fusion is not available."
            return fused_apply_rotary_pos_emb_thd(
                t,
                cu_seqlens,
                freqs,
                cp_size=cp_group.size(),
                cp_rank=cp_group.rank(),
                interleaved=config.rotary_interleaved,
            )
    # use unfused implementation
    if cu_seqlens is None:
        return _apply_rotary_pos_emb_bshd(
            t,
            freqs,
            rotary_interleaved=config.rotary_interleaved,
            multi_latent_attention=config.multi_latent_attention,
            mscale=mscale,
        )
    else:
        return _apply_rotary_pos_emb_thd(
            t,
            cu_seqlens,
            freqs,
            rotary_interleaved=config.rotary_interleaved,
            multi_latent_attention=config.multi_latent_attention,
            mscale=mscale,
            cp_group=cp_group,
        )


def apply_rotary_pos_emb_with_cos_sin(
    t: Tensor, cos: Tensor, sin: Tensor, rotary_interleaved: bool = False
) -> Tensor:
    """
    This function applies rotary positional embedding to the target tensor t
    using precomputed cos and sin of size (seq_len, d_rot / 2)
    """
    cos = cos.to(t.dtype)
    sin = sin.to(t.dtype)

    if apply_rotary_emb_flash is None:
        # Combine cos and sin into freqs
        freqs = torch.stack([cos, sin], dim=-1).flatten(start_dim=-2)

        # Expand freqs to match t's shape
        while freqs.dim() < t.dim():
            freqs = freqs.unsqueeze(1)
        freqs = freqs.expand(t.shape[:-1] + (-1,))

        y = _apply_rotary_pos_emb_bshd(
            t,
            freqs,
            rotary_interleaved=rotary_interleaved,
            multi_latent_attention=False,
            mscale=1.0,
        )
    else:
        # Use Flash Attention's optimized kernel for rotary embedding
        t = t.permute(1, 0, 2, 3)
        y = apply_rotary_emb_flash(t, cos, sin, rotary_interleaved)
        y = y.permute(1, 0, 2, 3)

    return y


================================================
FILE: megatron/core/models/common/embeddings/rotary_pos_embedding.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

from __future__ import annotations

from typing import TYPE_CHECKING, List, Optional

if TYPE_CHECKING:
    from megatron.core.transformer.transformer_config import TransformerConfig
    from megatron.core.transformer.transformer_block import TransformerBlock
    from megatron.core.inference.contexts import BaseInferenceContext
    from megatron.core.packed_seq_params import PackedSeqParams

import logging
import math
from functools import lru_cache

import torch
from torch import Tensor, nn

from megatron.core import parallel_state
from megatron.core.models.common.embeddings.rope_utils import (  # for backward compatibility; pylint: disable=unused-import
    _apply_rotary_pos_emb_bshd,
    _apply_rotary_pos_emb_thd,
    _rotate_half,
    apply_rotary_pos_emb,
    get_pos_emb_on_this_cp_rank,
)
from megatron.core.utils import deprecate_inference_params, internal_api

logger = logging.getLogger(__name__)


__all__ = ['RotaryEmbedding', 'MultimodalRotaryEmbedding']


class RotaryEmbedding(nn.Module):
    """Rotary Embedding for language model.

    Args:
        kv_channels (int): Projection weights dimension in multi-head attention. Obtained
            from transformer config
        rotary_percent (float): Percent of rotary dimension to use for rotary position
            embeddings.
        rotary_interleaved (bool, optional): If True, interleaved rotary position embeddings.
            Defaults to False.
        seq_len_interpolation_factor (float, optional): scale of linearly interpolating RoPE
            for longer sequences. The value must be a float larger than 1.0. Defaults to None
        rotary_base (int, optional): Base period for rotary position embeddings. Defaults to
            10000.
        rope_scaling (bool, optional): Apply rope scaling as used in llama 3.x.
        rope_scaling_factor (float, optional): rope scaling factor in llama 3.x. Defaults to 8.
        use_cpu_initialization (bool, optional): If False, initialize the inv_freq directly
            on the GPU. Defaults to False
        cp_group (torch.distributed.ProcessGroup, optional): Process group for context parallel.
            Defaults to None.
    """

    def __init__(
        self,
        kv_channels: int,
        rotary_percent: float,
        rotary_interleaved: bool = False,
        seq_len_interpolation_factor: float = None,
        rotary_base: int = 10000,
        rope_scaling: bool = False,
        rope_scaling_factor: float = 8.0,
        use_cpu_initialization: bool = False,
        cp_group: Optional[torch.distributed.ProcessGroup] = None,
    ) -> None:
        super().__init__()

        dim = kv_channels
        if rotary_percent < 1.0:
            dim = int(dim * rotary_percent)
        self.rotary_interleaved = rotary_interleaved

        self.seq_len_interpolation_factor = seq_len_interpolation_factor
        device = 'cpu' if use_cpu_initialization else torch.cuda.current_device()
        self.inv_freq = 1.0 / (
            rotary_base ** (torch.arange(0, dim, 2, dtype=torch.float32, device=device) / dim)
        )

        if rope_scaling:
            self.inv_freq = self._apply_scaling(self.inv_freq, factor=rope_scaling_factor)

        self.cp_group = (
            cp_group
            if cp_group is not None
            else parallel_state.get_context_parallel_group(check_initialized=False)
        )

    def _apply_scaling(
        self,
        freqs,
        factor=8,
        low_freq_factor=1,
        high_freq_factor=4,
        original_max_position_embeddings=8192,
    ):
        # This implementation is adapted from:
        # https://github.com/huggingface/transformers/blob/2a5a6ad18aa22e98429bb5ecb880660328030ea0/src/transformers/modeling_rope_utils.py#L303-L343

        factor = factor  # `8` in the original implementation
        low_freq_factor = low_freq_factor  # `1` in the original implementation
        high_freq_factor = high_freq_factor  # `4` in the original implementation
        old_context_len = original_max_position_embeddings  # `8192` in the original implementation

        low_freq_wavelen = old_context_len / low_freq_factor
        high_freq_wavelen = old_context_len / high_freq_factor

        wavelen = 2 * math.pi / freqs
        # wavelen < high_freq_wavelen: do nothing
        # wavelen > low_freq_wavelen: divide by factor
        inv_freq_llama = torch.where(wavelen > low_freq_wavelen, freqs / factor, freqs)
        # otherwise: interpolate between the two, using a smooth factor
        smooth_factor = (old_context_len / wavelen - low_freq_factor) / (
            high_freq_factor - low_freq_factor
        )
        smoothed_inv_freq = (
            1 - smooth_factor
        ) * inv_freq_llama / factor + smooth_factor * inv_freq_llama
        is_medium_freq = ~(wavelen < high_freq_wavelen) * ~(wavelen > low_freq_wavelen)
        inv_freq_llama = torch.where(is_medium_freq, smoothed_inv_freq, inv_freq_llama)

        return inv_freq_llama

    def get_freqs_non_repeated(self, max_seq_len: int, offset: int = 0) -> Tensor:
        """Generates matrix of frequencies based on positions in the sequence,
        used to create positional encodings"""
        seq = (
            torch.arange(max_seq_len, device=self.inv_freq.device, dtype=self.inv_freq.dtype)
            + offset
        )

        if self.seq_len_interpolation_factor is not None:
            seq *= 1 / self.seq_len_interpolation_factor

        freqs = torch.outer(seq, self.inv_freq)  # [seq len, dim]

        return freqs

    def get_cos_sin(self, max_seq_len: int, offset: int = 0) -> (Tensor, Tensor):
        """Cosine and sine values for RoPE are precomputed for all positions up to the maximum
        sequence length"""
        freqs = self.get_freqs_non_repeated(max_seq_len, offset)
        cos = torch.cos(freqs)
        sin = torch.sin(freqs)
        return cos, sin

    def get_emb(self, max_seq_len: int, offset: int = 0) -> Tensor:
        """Forward pass of RoPE embedding before CP sharding.

        Args:
            max_seq_len (int): Maximum size of sequence
            offset (int, optional): RoPE offset. Defaults to 0.

        Returns:
            Tensor: Embeddings after applying RoPE.
        """
        if self.inv_freq.device.type == 'cpu':
            # move `inv_freq` to GPU once at the first micro-batch forward pass
            self.inv_freq = self.inv_freq.to(device=torch.cuda.current_device())

        freqs = self.get_freqs_non_repeated(max_seq_len, offset)
        # first part even vector components, second part odd vector components,
        #  2 * dim in dimension size
        if not self.rotary_interleaved:
            emb = torch.cat((freqs, freqs), dim=-1)
        else:
            emb = torch.stack((freqs.view(-1, 1), freqs.view(-1, 1)), dim=-1).view(
                freqs.shape[0], -1
            )
        # emb [seq_length, .., dim]
        emb = emb[:, None, None, :]
        return emb

    @lru_cache(maxsize=32)
    @internal_api
    def forward(
        self,
        max_seq_len: int,
        offset: int = 0,
        packed_seq: bool = False,
        cp_group: Optional[torch.distributed.ProcessGroup] = None,
    ) -> Tensor:
        """Forward pass of RoPE embedding.

        Args:
            max_seq_len (int): Maximum size of sequence
            offset (int, optional): RoPE offset. Defaults to 0.
            packed_seq (bool, optional): Whether to use packed sequence. Defaults to False.
            cp_group (torch.distributed.ProcessGroup, optional): Context parallel group.
                Defaults to None.

        Returns:
            Tensor: Embeddings after applying RoPE.
        """
        emb = self.get_emb(max_seq_len, offset)
        if cp_group is None:
            cp_group = self.cp_group
        if cp_group is not None and cp_group.size() > 1 and not packed_seq:
            # slice rotary_pos_emb along sequence dimension
            # and select the parition of the current CP rank
            emb = get_pos_emb_on_this_cp_rank(emb, 0, cp_group)

        return emb

    def _load_from_state_dict(self, state_dict, prefix, *args, **kwargs):
        state_dict.pop(f'{prefix}inv_freq', None)
        return super()._load_from_state_dict(state_dict, prefix, *args, **kwargs)

    def get_rotary_seq_len(
        self,
        inference_context: BaseInferenceContext,
        transformer: TransformerBlock,
        transformer_input: Tensor,
        transformer_config: TransformerConfig,
        packed_seq_params: Optional[PackedSeqParams] = None,
        *,
        inference_params: Optional[BaseInferenceContext] = None,
    ) -> int:
        """Function to get the rotary sequence length.

        Args:
            inference_context : Used during Inference time
            transformer (TransformerBlock): The transformer block (decoder/encoder) used
                by the model
            transformer_input (Tensor): Input tensor to the transformer
            transformer_config (TransformerConfig): Transformer config used by the model
            packed_seq_params (PackedSeqParams): Packed sequence params

        Returns:
            int: The rotary sequence length
        """

        inference_context = deprecate_inference_params(inference_context, inference_params)

        if packed_seq_params is not None:
            # max_seqlen are the max sequence length in the packed sequence before being divived
            # by the tp and cp size.
            return max(packed_seq_params.max_seqlen_q, packed_seq_params.max_seqlen_kv)
        elif inference_context is not None:
            # For dynamic batching, use the max of context's max_sequence_length and the actual
            # input size to ensure rotary embeddings cover CUDA graph warmup token counts
            context_max_seq_len = inference_context.max_sequence_length
            input_seq_len = 0
            if transformer_input is not None:
                input_seq_len = transformer_input.size(0)
            elif transformer is not None and transformer.input_tensor is not None:
                input_seq_len = transformer.input_tensor.size(0)
            rotary_seq_len = max(context_max_seq_len, input_seq_len)
        else:
            if transformer is not None and transformer.input_tensor is not None:
                rotary_seq_len = transformer.input_tensor.size(0)
            else:
                rotary_seq_len = transformer_input.size(0)

            if transformer_config.sequence_parallel:
                rotary_seq_len *= transformer_config.tensor_model_parallel_size

        rotary_seq_len *= transformer_config.context_parallel_size

        return rotary_seq_len


class MultimodalRotaryEmbedding(nn.Module):
    """Multimodal Rotary Embedding for language model.
    Based on https://github.com/alibaba/Pai-Megatron-Patch/blob/
    efa5a752e845267936db9ae7df1b6aba92e9ff9a/megatron_patch/model/qwen2_vl/rotary_pos_embedding.py
    Copyright (c) 2025 alibaba/Pai-Megatron-Patch. Apache 2.0 license.

    Args:
        kv_channels (int): Projection weights dimension in multi-head attention. Obtained
            from transformer config
        rotary_percent (float): Percent of rotary dimension to use for rotary position
            embeddings.
        rotary_interleaved (bool, optional): If True, interleaved rotary position embeddings.
            Defaults to False.
        seq_len_interpolation_factor (float, optional): scale of linearly interpolating RoPE
            for longer sequences. The value must be a float larger than 1.0. Defaults to None
        rotary_base (int, optional): Base period for rotary position embeddings. Defaults to
            10000.
    """

    def __init__(
        self,
        kv_channels: int,
        rotary_percent: float,
        rotary_interleaved: bool = False,
        seq_len_interpolation_factor: Optional[float] = None,
        rotary_base: int = 10000,
        cp_group: Optional[torch.distributed.ProcessGroup] = None,
    ) -> None:
        super().__init__()

        dim = kv_channels
        if rotary_percent < 1.0:
            dim = int(dim * rotary_percent)
        self.rotary_interleaved = rotary_interleaved

        self.seq_len_interpolation_factor = seq_len_interpolation_factor
        self.inv_freq = 1.0 / (
            rotary_base
            ** (
                torch.arange(0, dim, 2, dtype=torch.float32, device=torch.cuda.current_device())
                / dim
            )
        )
        self.cp_group = (
            cp_group
            if cp_group is not None
            else parallel_state.get_context_parallel_group(check_initialized=False)
        )

    def forward(
        self,
        position_ids: torch.Tensor,
        mrope_section: List[int],
        cp_group: Optional[torch.distributed.ProcessGroup] = None,
    ) -> Tensor:
        """Forward pass of multimodal RoPE embedding.

        Args:
            position_ids (torch.Tensor): A postion_id tensor with shape [3, batchsize, seqlens]
            mrope_section (list[int]): Multimodal rope section is for channel dimension of temporal,
                height and width in rope calculation.
            cp_group (torch.distributed.ProcessGroup, optional): Context parallel group.
                Defaults to None.

        Returns:
            Tensor: Embeddings after applying RoPE.
        """
        seq = position_ids.to(device=self.inv_freq.device, dtype=self.inv_freq.dtype)

        if self.seq_len_interpolation_factor is not None:
            seq *= 1 / self.seq_len_interpolation_factor

        # shape (3, bs, dim, 1)
        inv_freq_expanded = self.inv_freq[None, None, :, None].expand(3, seq.shape[1], -1, 1)
        # shape (3, bs, 1, seq_length)
        seq_expanded = seq[:, :, None, :].float()
        # shape (3, bs, seq_length, dim)
        freqs = (inv_freq_expanded @ seq_expanded).transpose(2, 3)
        # first part even vector components, second part odd vector components,
        #  2 * dim in dimension size
        if not self.rotary_interleaved:
            emb = torch.cat((freqs, freqs), dim=-1)  # shape (3, bs, seq_length, 2 * dim)
        else:
            bs = freqs.shape[1]
            emb = torch.stack((freqs.view(3, bs, -1, 1), freqs.view(3, bs, -1, 1)), dim=-1).view(
                3, bs, freqs.shape[0], -1
            )

        # generate freqs with mrope_section
        # shape (bs, seq_length, 2 * dim)
        mrope_section = mrope_section * 2
        emb = torch.cat([m[i % 3] for i, m in enumerate(emb.split(mrope_section, dim=-1))], dim=-1)

        # shape (seq_length, bs, 1, 2 * dim)
        emb = emb[..., None, :].transpose(0, 1).contiguous()
        if cp_group is None:
            cp_group = self.cp_group
        if cp_group is not None and cp_group.size() > 1:
            # slice rotary_pos_emb along sequence dimension and select the parition of the current
            # CP rank
            emb = get_pos_emb_on_this_cp_rank(emb, 0, cp_group)
        return emb


================================================
FILE: megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from __future__ import annotations

import logging
import math
from functools import lru_cache
from typing import Optional

import torch
from torch import Tensor

from megatron.core.models.common.embeddings.rope_utils import get_pos_emb_on_this_cp_rank
from megatron.core.models.common.embeddings.rotary_pos_embedding import RotaryEmbedding
from megatron.core.transformer import TransformerConfig
from megatron.core.utils import internal_api

logger = logging.getLogger(__name__)


class YarnRotaryEmbedding(RotaryEmbedding):
    """Yarn Rotary Embedding for language model.

    Args:
        kv_channels (int): Projection weights dimension in multi-head attention. Obtained from
            transformer config.
        rotary_percent (float): Percent of rotary dimension to use for rotary position embeddings.
        rotary_interleaved (bool, optional): If True, interleaved rotary position embeddings.
            Defaults to False.
        seq_len_interpolation_factor (float, optional): scale of linearly interpolating RoPE for
            longer sequences. The value must be a float larger than 1.0. Defaults to None
        rotary_base (float, optional): Base period for rotary position embeddings. Defaults to
            10000.
        use_cpu_initialization (bool, optional): If False, initialize the inv_freq directly on
            the GPU. Defaults to False.
        scaling_factor (float, optional): Scaling factor for Yarn RoPE. Defaults to 1.0.
        original_max_position_embeddings (int, optional): Original maximum position embeddings
            length. Defaults to 4096.
        beta_fast (float, optional): Fast beta value for Yarn RoPE. Defaults to 32.
        beta_slow (float, optional): Slow beta value for Yarn RoPE. Defaults to 1.
        mscale (float, optional): Mscale value for Yarn RoPE. Defaults to 1.
        mscale_all_dim (float, optional): Mscale all dim value for Yarn RoPE. Defaults to 0.
        correction_range_round_to_int (bool): Whether to round dim range bounds to integer.
            Defaults to True
        cp_group (torch.distributed.ProcessGroup, optional): Process group for context parallel.
            Defaults to None.
    """

    def __init__(
        self,
        kv_channels: int,
        rotary_percent: float = 1.0,
        rotary_interleaved: bool = False,
        seq_len_interpolation_factor: Optional[float] = None,
        rotary_base: float = 10000.0,
        use_cpu_initialization: bool = False,
        scaling_factor: float = 1.0,
        original_max_position_embeddings: int = 4096,
        beta_fast: float = 32.0,
        beta_slow: float = 1.0,
        mscale: float = 1.0,
        mscale_all_dim: float = 0.0,
        correction_range_round_to_int: bool = True,
        cp_group: Optional[torch.distributed.ProcessGroup] = None,
    ):
        self.dim = kv_channels
        self.rotary_base = rotary_base
        self.scaling_factor = scaling_factor
        self.original_max_position_embeddings = original_max_position_embeddings
        self.beta_fast = beta_fast
        self.beta_slow = beta_slow
        self.mscale = mscale
        self.mscale_all_dim = mscale_all_dim
        self.correction_range_round_to_int = correction_range_round_to_int

        device = 'cpu' if use_cpu_initialization else torch.cuda.current_device()

        with torch.device(device):
            self.inv_freq_extra = 1.0 / (
                self.rotary_base
                ** (torch.arange(0, self.dim, 2, dtype=torch.float32, device=device) / self.dim)
            )
            self.inv_freq_inter = 1.0 / (
                self.scaling_factor
                * self.rotary_base
                ** (torch.arange(0, self.dim, 2, dtype=torch.float32, device=device) / self.dim)
            )
            super().__init__(
                kv_channels=kv_channels,
                rotary_percent=rotary_percent,
                rotary_interleaved=rotary_interleaved,
                seq_len_interpolation_factor=seq_len_interpolation_factor,
                rotary_base=rotary_base,
                use_cpu_initialization=use_cpu_initialization,
                cp_group=cp_group,
            )

            self._set_cos_sin_cache(
                self.original_max_position_embeddings, offset=0, dtype=torch.get_default_dtype()
            )

            # clear the lru_cache for the forward method. If not cleared, the cache of forward
            # method causes a memory leak in NeMo-RL.
            self.forward.cache_clear()

    def get_emb(self, max_seq_len: int, offset: int = 0) -> Tensor:
        """Forward pass of Yarn Rotary Embedding.

        Args:
            max_seq_len (int): Maximum size of sequence
            offset (int, optional): RoPE offset. Defaults to 0.

        Returns:
            Tensor: Embeddings after applying Yarn RoPE.
        """
        assert (
            not self.rotary_interleaved
        ), "Yarn RoPE does not support interleaved rotary embeddings"

        if self.inv_freq_extra.device.type == 'cpu':
            # move `inv_freq_extra` to GPU once at the first micro-batch forward pass
            self.inv_freq_extra = self.inv_freq_extra.to(device=torch.cuda.current_device())

        if self.inv_freq_inter.device.type == 'cpu':
            # move `inv_freq_inter` to GPU once at the first micro-batch forward pass
            self.inv_freq_inter = self.inv_freq_inter.to(device=torch.cuda.current_device())

        low, high = _yarn_find_correction_range(
            self.beta_fast,
            self.beta_slow,
            self.dim,
            self.rotary_base,
            self.original_max_position_embeddings,
            self.correction_range_round_to_int,
        )
        inv_freq_mask = 1.0 - _yarn_linear_ramp_mask(
            low, high, self.dim // 2, device=self.inv_freq_extra.device
        ).to(dtype=torch.float32)
        inv_freq = self.inv_freq_inter * (1 - inv_freq_mask) + self.inv_freq_extra * inv_freq_mask

        seq = (
            torch.arange(
                max_seq_len, device=self.inv_freq_extra.device, dtype=self.inv_freq_extra.dtype
            )
            + offset
        )

        freqs = torch.outer(seq, inv_freq)

        _mscale = _yarn_get_concentration_factor(
            self.scaling_factor, self.mscale, self.mscale_all_dim
        )

        emb = torch.cat((freqs, freqs), dim=-1)
        # emb [seq_length, .., dim]
        emb = emb[:, None, None, :]
        return emb, _mscale

    @lru_cache(maxsize=32)
    @internal_api
    def forward(
        self,
        max_seq_len: int,
        offset: int = 0,
        packed_seq: bool = False,
        cp_group: Optional[torch.distributed.ProcessGroup] = None,
    ) -> Tensor:
        """Forward pass of Yarn Rotary Embedding.

        Args:
            max_seq_len (int): Maximum size of sequence
            offset (int, optional): RoPE offset. Defaults to 0.
            packed_seq (bool, optional): Whether to use packed sequence. Defaults to False.
            cp_group (torch.distributed.ProcessGroup, optional): Context parallel group.
                Defaults to None.

        Returns:
            Tensor: Embeddings after applying Yarn RoPE.
        """
        emb, _mscale = self.get_emb(max_seq_len, offset)
        if cp_group is None:
            cp_group = self.cp_group
        if cp_group is not None and cp_group.size() > 1 and not packed_seq:
            # slice rotary_pos_emb along sequence dimension
            # and select the parition of the current CP rank
            emb = get_pos_emb_on_this_cp_rank(emb, 0, cp_group)
        return emb, _mscale

    def _set_cos_sin_cache(self, seq_len, offset, dtype, packed_seq=False):
        self.max_seq_len_cached = seq_len
        self.offset_cached = offset
        self.dtype_cached = dtype
        self.packed_seq_cached = packed_seq

        emb, _mscale = self.forward(seq_len, offset, packed_seq)
        self.register_buffer(
            "cos_cached", (emb.cos() * _mscale).to(dtype).contiguous(), persistent=False
        )
        self.register_buffer(
            "sin_cached", (emb.sin() * _mscale).to(dtype).contiguous(), persistent=False
        )

    def get_cached_cos_sin(
        self, seq_len, offset=0, dtype=torch.get_default_dtype(), packed_seq=False
    ):
        """Get cached cos and sin values."""
        if (
            seq_len > self.max_seq_len_cached
            or offset != self.offset_cached
            or dtype != self.dtype_cached
            or packed_seq != self.packed_seq_cached
        ):
            self._set_cos_sin_cache(seq_len, offset, dtype, packed_seq)
        return (self.cos_cached[:seq_len, ...], self.sin_cached[:seq_len, ...])


# Inverse dim formula to find dim based on number of rotations
def _yarn_find_correction_dim(
    num_rotations: float, dim: int, rotary_base: float = 10000, max_position_embeddings: int = 2048
) -> float:
    return (dim * math.log(max_position_embeddings / (num_rotations * 2 * math.pi))) / (
        2 * math.log(rotary_base)
    )


# Find dim range bounds based on rotations
def _yarn_find_correction_range(
    low_rot: float,
    high_rot: float,
    dim: int,
    rotary_base: float = 10000,
    max_position_embeddings: int = 2048,
    round_to_int: bool = True,
) -> tuple[int, int]:
    low = _yarn_find_correction_dim(low_rot, dim, rotary_base, max_position_embeddings)
    high = _yarn_find_correction_dim(high_rot, dim, rotary_base, max_position_embeddings)
    if round_to_int:
        low = math.floor(low)
        high = math.ceil(high)
    return max(low, 0), min(high, dim - 1)  # Clamp values just in case


def _yarn_linear_ramp_mask(min: float, max: float, dim: int, device: torch.device) -> Tensor:
    if min == max:
        max += 0.001  # Prevent singularity

    linear_func = (torch.arange(dim, dtype=torch.float32, device=device) - min) / (max - min)
    ramp_func = torch.clamp(linear_func, 0, 1)
    return ramp_func


def _yarn_get_mscale(scale: float = 1, mscale: float = 1) -> float:
    if scale <= 1:
        return 1.0
    return 0.1 * mscale * math.log(scale) + 1.0


@lru_cache(maxsize=8)
def _yarn_get_concentration_factor(
    scaling_factor: float, mscale: Optional[float], mscale_all_dim: Optional[float]
) -> float:
    """
    Get the concentration factor (factor multiplied to the sine and cosine components of the
    embedding). This factor is also known as attention factor, and sometimes homonymously known as
    "mscale"
    """
    if mscale is None or mscale_all_dim is None:
        return _yarn_get_mscale(scaling_factor)
    return float(
        _yarn_get_mscale(scaling_factor, mscale) / _yarn_get_mscale(scaling_factor, mscale_all_dim)
    )


def _yarn_get_concentration_factor_from_config(config: TransformerConfig) -> float:
    if hasattr(config, "yarn_rotary_scaling_factor"):
        return _yarn_get_concentration_factor(
            config.yarn_rotary_scaling_factor,
            getattr(config, "yarn_mscale", None),
            getattr(config, "yarn_mscale_all_dim", None),
        )
    return 1.0


================================================
FILE: megatron/core/models/common/language_module/__init__.py
================================================


================================================
FILE: megatron/core/models/common/language_module/language_module.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
import logging
import os
from typing import Optional, Tuple

import torch
from torch import Tensor

from megatron.core import parallel_state, tensor_parallel
from megatron.core.dist_checkpointing.mapping import ShardedStateDict

try:
    from megatron.core.extensions.transformer_engine import te_parallel_cross_entropy
except:
    te_parallel_cross_entropy = None
from megatron.core.fusions.fused_cross_entropy import fused_vocab_parallel_cross_entropy
from megatron.core.pipeline_parallel.utils import (
    is_pp_first_stage,
    is_pp_last_stage,
    is_vp_first_stage,
    is_vp_last_stage,
)
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.transformer.enums import AttnBackend, CudaGraphScope
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.multi_token_prediction import tie_word_embeddings_state_dict
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.utils import ensure_metadata_has_dp_cp_group
from megatron.core.utils import (
    get_tensor_model_parallel_group_if_none,
    is_te_min_version,
    make_tp_sharded_tensor_for_checkpoint,
)


class LanguageModule(MegatronModule):
    """Base language module that has common helper functions used across GPT, BERT etc.

    Args:
        config (TransformerConfig): Input transformer config for the model
        pg_collection (ProcessGroupCollection): Model communication process groups
    """

    def __init__(
        self, config: TransformerConfig, pg_collection: Optional[ProcessGroupCollection] = None
    ) -> None:
        super().__init__(config=config)
        self._set_attention_backend()
        if pg_collection is None:
            pg_collection = ProcessGroupCollection.use_mpu_process_groups()
        self.pg_collection = pg_collection
        self.cp_group = pg_collection.cp
        self.tp_group = get_tensor_model_parallel_group_if_none(pg_collection.tp)
        self.pp_group = pg_collection.pp
        assert hasattr(self.pg_collection, 'embd'), (
            "pg_collection must have a embd. In previous version, it used default "
            "`parallel_state.default_embedding_ranks` to create the process group."
            "If you are using the default process group, please use"
            "`parallel_state.get_embedding_group()` "
            "If you don't need embd_group, you need to explicitly set it to None."
        )
        self.embd_group = pg_collection.embd
        self.vp_stage = None
        self.vp_size = self.config.virtual_pipeline_model_parallel_size

    def _is_in_embd_group(self):
        if self.embd_group is None:
            return False
        if torch.distributed.get_rank() in torch.distributed.get_process_group_ranks(
            self.embd_group
        ):
            if getattr(self, 'mtp_process', False):
                return True
            if (
                torch.distributed.get_rank()
                == torch.distributed.get_process_group_ranks(self.embd_group)[0]
            ):
                return is_vp_first_stage(self.vp_stage, self.vp_size) and is_pp_first_stage(
                    self.pp_group
                )
            elif (
                torch.distributed.get_rank()
                == torch.distributed.get_process_group_ranks(self.embd_group)[-1]
            ):
                return is_vp_last_stage(self.vp_stage, self.vp_size) and is_pp_last_stage(
                    self.pp_group
                )
            else:
                return True
        return False

    # pylint: disable=line-too-long
    def _set_attention_backend(self):
        """Set attention backend

        Transformer engine works based on optout. By default all three attention backend flags are set to 1. So if the user choses a particular attention backend we set the other two to 0. If the user choses local, we set all 3 TE env variables to 0.
        """

        def check_and_set_env_variable(
            env_variable_name: str, expected_value: int, attn_type: AttnBackend
        ) -> None:
            current_value = os.getenv(env_variable_name)
            assert current_value is None or current_value == str(
                expected_value
            ), f'{env_variable_name} set to {current_value}, but expected {expected_value} for attention backend type {attn_type.name}. unset NVTE_FLASH_ATTN, NVTE_FUSED_ATTN and NVTE_UNFUSED_ATTN. Use the --attention-backend argument if you want to choose between (flash/fused/unfused/auto/local). Default is auto.'
            os.environ[env_variable_name] = str(expected_value)

        if self.config.attention_backend == AttnBackend.local:
            check_and_set_env_variable("NVTE_FLASH_ATTN", 0, AttnBackend.flash)
            check_and_set_env_variable("NVTE_FUSED_ATTN", 0, AttnBackend.flash)
            check_and_set_env_variable("NVTE_UNFUSED_ATTN", 0, AttnBackend.flash)
        elif self.config.attention_backend == AttnBackend.flash:
            check_and_set_env_variable("NVTE_FLASH_ATTN", 1, AttnBackend.flash)
            check_and_set_env_variable("NVTE_FUSED_ATTN", 0, AttnBackend.flash)
            check_and_set_env_variable("NVTE_UNFUSED_ATTN", 0, AttnBackend.flash)
        elif self.config.attention_backend == AttnBackend.fused:
            check_and_set_env_variable("NVTE_FLASH_ATTN", 0, AttnBackend.fused)
            check_and_set_env_variable("NVTE_FUSED_ATTN", 1, AttnBackend.fused)
            check_and_set_env_variable("NVTE_UNFUSED_ATTN", 0, AttnBackend.fused)
        elif self.config.attention_backend == AttnBackend.unfused:
            check_and_set_env_variable("NVTE_FLASH_ATTN", 0, AttnBackend.unfused)
            check_and_set_env_variable("NVTE_FUSED_ATTN", 0, AttnBackend.unfused)
            check_and_set_env_variable("NVTE_UNFUSED_ATTN", 1, AttnBackend.unfused)
        elif self.config.attention_backend == AttnBackend.auto:
            check_and_set_env_variable("NVTE_FLASH_ATTN", 1, AttnBackend.auto)
            check_and_set_env_variable("NVTE_FUSED_ATTN", 1, AttnBackend.auto)
            check_and_set_env_variable("NVTE_UNFUSED_ATTN", 1, AttnBackend.auto)

    def compute_language_model_loss(self, labels: Tensor, logits: Tensor) -> Tensor:
        """Computes the language model loss (Cross entropy across vocabulary)

        Args:
            labels (Tensor): The labels of dimension [batch size, seq length]
            logits (Tensor): The final logits returned by the output layer of the transformer model

        Returns:
            Tensor: Loss tensor of dimensions [batch size, sequence_length]
        """
        # [b s] => [s b]
        labels = labels.transpose(0, 1).contiguous()
        if self.config.cross_entropy_loss_fusion:
            if self.config.cross_entropy_fusion_impl == 'te':
                if te_parallel_cross_entropy is not None:
                    labels = torch.as_strided(labels, labels.size(), (labels.size()[1], 1))
                    # Use is_cg_capturable=True for full iteration CUDA graphs to avoid torch.equal checks
                    is_cg_capturable = (
                        hasattr(self.config, 'cuda_graph_scope')
                        and CudaGraphScope.full_iteration in self.config.cuda_graph_scope
                    )
                    if is_cg_capturable and not is_te_min_version("2.7.0"):
                        from megatron.core.utils import get_te_version

                        current_version = get_te_version()
                        raise AssertionError(
                            f"CUDA graph compatible cross entropy requires TransformerEngine >= 2.7.0, "
                            f"but found version {current_version}. Please upgrade TransformerEngine "
                            f"or set cuda_graph_scope to a value other than 'full_iteration'."
                        )

                    loss = te_parallel_cross_entropy(
                        logits, labels, self.pg_collection.tp, is_cg_capturable
                    )
                else:
                    raise RuntimeError("Trying to use a TE block when it's not present.")
            elif self.config.cross_entropy_fusion_impl == 'native':
                loss = fused_vocab_parallel_cross_entropy(logits, labels, self.pg_collection.tp)
        else:
            loss = tensor_parallel.vocab_parallel_cross_entropy(logits, labels)

        # [s b] => [b, s]
        loss = loss.transpose(0, 1).contiguous()
        return loss

    def setup_embeddings_and_output_layer(self) -> None:
        """Sets up embedding layer in first stage and output layer in last stage.

        This function initalizes word embeddings in the final stage when we are
        using pipeline parallelism and sharing word embeddings, and sets up param
        attributes on the embedding and output layers.

        Parameter attributes set:
        - `is_embedding_or_output_parameter`: True for embedding + output layer weights.
          Used by decoupled_lr, Muon optimizer, and other Megatron features.
        - `is_embedding_parameter`: True for MuP "embedding-class" parameters.
          Used by MuP for table-8 style optimizer grouping (base LR/eps for vector-like params).
        """

        # Mark embedding and output layer for decoupled_lr and other features.
        # This is the original Megatron attribute used by decoupled_lr, Muon, FSDP, etc.
        if self.pre_process and hasattr(self, 'embedding'):
            self.embedding.word_embeddings.weight.is_embedding_or_output_parameter = True
        if (
            self.post_process
            and hasattr(self, 'output_layer')
            and self.output_layer.weight is not None
        ):
            self.output_layer.weight.is_embedding_or_output_parameter = True

        # Mark embedding-class parameters for MuP optimizer grouping.
        # Under MuP table-8-style grouping, embeddings/output use base LR/eps while
        # hidden matrix-like params use width-scaled LR/eps.
        mtp_process = getattr(self, 'mtp_process', False)
        if self.config.use_mup and (self.pre_process or mtp_process) and hasattr(self, 'embedding'):
            for param in self.embedding.parameters():
                param.is_embedding_parameter = True
        if (
            self.config.use_mup
            and self.post_process
            and hasattr(self, 'output_layer')
            and self.output_layer.weight is not None
        ):
            self.output_layer.weight.is_embedding_parameter = True

        # If share_embeddings_and_output_weights is True, we need to maintain duplicated
        # embedding weights in post processing stage. If use Multi-Token Prediction (MTP),
        # we also need to maintain duplicated embedding weights in mtp process stage.
        # So we need to copy embedding weights from pre processing stage as initial parameters
        # in these cases.
        if not self.share_embeddings_and_output_weights and not getattr(
            self.config, 'mtp_num_layers', 0
        ):
            return

        if self.config.pipeline_model_parallel_size == 1:
            # Zero out wgrad if sharing embeddings between two layers on same
            # pipeline stage to make sure grad accumulation into main_grad is
            # correct and does not include garbage values (e.g., from torch.empty).
            self.shared_embedding_or_output_weight().zero_out_wgrad = True
            return

        if (
            is_vp_first_stage(self.vp_stage, self.vp_size)
            and is_pp_first_stage(self.pp_group)
            and self.pre_process
            and not self.post_process
        ):
            self.shared_embedding_or_output_weight().shared_embedding = True

        if (
            (self.post_process and self.share_embeddings_and_output_weights)
            or getattr(self, 'mtp_process', False)
        ) and not self.pre_process:
            assert not (
                is_vp_first_stage(self.vp_stage, self.vp_size) and is_pp_first_stage(self.pp_group)
            )
            # set weights of the duplicated embedding to 0 here,
            # then copy weights from pre processing stage using all_reduce below.
            weight = self.shared_embedding_or_output_weight()
            weight.data.fill_(0)
            weight.shared = True
            weight.shared_embedding = True
            # Keep optimizer grouping consistent for tied embedding/output copies.
            if self.config.use_mup:
                weight.is_embedding_parameter = True

        # Parameters are shared between the word embeddings layers, and the
        # heads at the end of the model. In a pipelined setup with more than
        # one stage, the initial embedding layer and the head are on different
        # workers, so we do the following:
        # 1. Create a second copy of word_embeddings on the last stage, with
        #    initial parameters of 0.0.
        # 2. Do an all-reduce between the first and last stage to ensure that
        #    the two copies of word_embeddings start off with the same
        #    parameter values.
        # 3. In the training loop, before an all-reduce between the grads of
        #    the two word_embeddings layers to ensure that every applied weight
        #    update is the same on both stages.

        # Ensure that first and last stages have the same initial parameter
        # values.
        if torch.distributed.is_initialized():
            if self._is_in_embd_group() and not self.config.init_model_with_meta_device:
                weight = self.shared_embedding_or_output_weight()
                weight.data = weight.data.cuda()
                torch.distributed.all_reduce(weight.data, group=self.embd_group)

        elif not getattr(LanguageModule, "embedding_warning_printed", False):
            logging.getLogger(__name__).warning(
                "Distributed processes aren't initialized, so the output layer "
                "is not initialized with weights from the word embeddings. "
                "If you are just manipulating a model this is fine, but "
                "this needs to be handled manually. If you are training "
                "something is definitely wrong."
            )
            LanguageModule.embedding_warning_printed = True

    def _scale_logits(self, logits: Tensor) -> Tensor:
        """Apply MuP output scaling to logits.

        When MuP is enabled, scales logits by mup_output_mult (auto-set to 1/width_mult
        if left at default) to keep output variance stable across widths.

        Args:
            logits (Tensor): Raw logits from the output layer.

        Returns:
            Tensor: Scaled logits if MuP is enabled and mup_output_mult != 1.0,
                    otherwise unchanged logits.
        """
        if not self.config.use_mup:
            return logits
        if self.config.mup_output_mult != 1.0:
            return logits * self.config.mup_output_mult
        return logits

    def shared_embedding_or_output_weight(self) -> Tensor:
        """Gets the embedding weight or output logit weights when share embedding and output weights set to True
          or when use Multi-Token Prediction (MTP).

        Returns:
            Tensor: During pre processing or MTP process it returns the input embeddings weight while during post processing it returns the final output layers weight
        """
        if self.pre_process or getattr(self, 'mtp_process', False):
            # Multi-Token Prediction (MTP) need both embedding layer and output layer.
            # So there will be both embedding layer and output layer in the mtp process stage.
            # When share_embeddings_and_output_weights is True, the embedding weight is the
            # canonical shared weight and is passed to the output layer during forward.
            assert hasattr(
                self, 'embedding'
            ), f"embedding is needed in this pipeline stage, but it is not initialized."
            return self.embedding.word_embeddings.weight
        elif self.post_process:
            return self.output_layer.weight
        return None

    def sharded_state_dict(
        self,
        prefix: str = '',
        sharded_offsets: Tuple[Tuple[int, int, int]] = (),
        metadata: Optional[dict] = None,
    ) -> ShardedStateDict:
        """Sharded state dict implementation that handles the output layer weights tying.

        Args:
            prefix (str): Module name prefix.
            sharded_offsets (tuple): PP related offsets, expected to be empty at this module level.
            metadata (Optional[Dict]): metadata controlling sharded state dict creation.

        Returns:
            ShardedStateDict: sharded state dict for the LanguageModel
        """
        assert not sharded_offsets, "Unexpected sharded offsets"

        # Guard for cases metadata is not provided
        metadata = ensure_metadata_has_dp_cp_group(metadata)

        sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)

        first_stage_word_emb_key = f'{prefix}embedding.word_embeddings.weight'
        output_layer_weight_key = f'{prefix}output_layer.weight'
        output_layer_bias_key = f'{prefix}output_layer.bias'

        # Multi-Token Prediction (MTP) needs embedding layer in mtp process stage.
        # If MTP is not placed in the pre processing stage, we need to maintain a copy of
        # embedding layer in the mtp process stage and tie it to the embedding in the pre
        # processing stage.
        # Note: MTP loss is computed at post_process stage, so the output_layer on mtp_process
        # rank doesn't need special tying - it's not used for loss computation.
        if getattr(self, 'mtp_process', False) and not self.pre_process:
            emb_weight = self.embedding.word_embeddings.weight
            tie_word_embeddings_state_dict(
                sharded_state_dict,
                emb_weight,
                first_stage_word_emb_key,
                tp_group=self.tp_group,
                dp_cp_group=metadata['dp_cp_group'],
            )
        if self.share_embeddings_and_output_weights:
            self.tie_embeddings_and_output_weights_state_dict(
                sharded_state_dict, output_layer_weight_key, first_stage_word_emb_key, metadata
            )
        elif self.post_process:
            # Make sure the output layer follows the embeddings padding logic
            sharded_state_dict[output_layer_weight_key].allow_shape_mismatch = True

        # Regardless of sharing the output weights with embeddings, we must handle the bias padding
        if self.post_process and output_layer_bias_key in sharded_state_dict:
            sharded_state_dict[output_layer_bias_key].allow_shape_mismatch = True

        return sharded_state_dict

    def tie_embeddings_and_output_weights_state_dict(
        self,
        sharded_state_dict: ShardedStateDict,
        output_layer_weight_key: str,
        first_stage_word_emb_key: str,
        metadata: dict = {},
    ) -> None:
        """Ties the embedding and output weights in a given sharded state dict.

        Args:
            sharded_state_dict (ShardedStateDict): state dict with the weight to tie
            output_layer_weight_key (str): key of the output layer weight in the state dict.
                This entry will be replaced with a tied version
            first_stage_word_emb_key (str): this must be the same as the
                ShardedTensor.key of the first stage word embeddings.

        Returns: None, acts in-place
        """
        if not self.post_process:
            # No output layer
            assert output_layer_weight_key not in sharded_state_dict, sharded_state_dict.keys()
            return

        if self.pre_process:
            # Output layer is equivalent to the embedding already
            return

        # If use Multi-Token Prediction (MTP), we need maintain both embedding layer and output
        # layer in mtp process stage. In this case, if share_embeddings_and_output_weights is True,
        # the shared weights will be stored in embedding layer, and output layer will not have
        # any weight.
        if getattr(self, 'mtp_process', False):
            # No output layer
            assert output_layer_weight_key not in sharded_state_dict, sharded_state_dict.keys()
            return

        # Replace the default output layer with a one sharing the weights with the embedding
        del sharded_state_dict[output_layer_weight_key]
        tensor = self.shared_embedding_or_output_weight()
        last_stage_word_emb_replica_id = (
            1,  # copy of first stage embedding
            0,
            parallel_state.get_data_parallel_rank(with_context_parallel=True),
        )

        sharded_state_dict[output_layer_weight_key] = make_tp_sharded_tensor_for_checkpoint(
            tensor=tensor,
            key=first_stage_word_emb_key,
            replica_id=last_stage_word_emb_replica_id,
            allow_shape_mismatch=True,
            tp_group=self.tp_group,
            dp_cp_group=metadata['dp_cp_group'],
        )


================================================
FILE: megatron/core/models/common/model_chunk_schedule_plan.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from contextlib import nullcontext
from typing import Optional

import torch
from torch import Tensor

from megatron.core.enums import Fp8Recipe
from megatron.core.fp8_utils import get_fp8_context
from megatron.core.pipeline_parallel.utils import (
    AbstractSchedulePlan,
    NoopScheduleNode,
    get_comm_stream,
    get_comp_stream,
)


class ModelChunkState:
    """State shared across a model chunk.

    This class holds state that is shared between different components
    of a model chunk, such as input tensors, parameters, and configuration.
    """

    pass


class TransformerLayerSchedulePlan:
    """Schedule the executing plan of the nodes in a transformer/mtp layer.

    This class organizes the sub-modules of a transformer/mtp layer,
    including attention, post attention, MLP, dispatch, combine and
    mtp post process nodes.

    layer (TransformerLayerSchedulePlan)
    ├── attn (TransformerLayerNode): attention -> layernorm -> router -> dispatch preprocess
    ├── moe_dispatch (TransformerLayerNode): dispatch All2All
    ├── mlp (TransformerLayerNode): mlp module
    ├── moe_combine (TransformerLayerNode): combine All2All
    └── mtp_post_process (PostProcessNode): mtp post process

    Note that MTP layer has the same operation and execution order with TransformerLayer regarding
    moe_dispatch, mlp, moe_combine, but contains extra operations in attn and mtp_post_process:
    * mtp.attn wraps around transformer_layer.attn with extra norm, proj and embedding operations.
    * mtp.mtp_post_process contains output_layer, mtp loss operations, whereas
      transformer_layer.mtp_post_process is empty.
    """

    attn = None
    moe_dispatch = None
    mlp = None
    moe_combine = None
    mtp_post_process = None

    def __init__(self, layer, event, chunk_state, comp_stream, comm_stream, extra_args={}):
        """Initializes a transformer layer schedule plan.

        Args:
            layer (TransformerLayer):
                split a transformer layer into multiple nodes for fine-grained scheduling.
            event (torch.cuda.Event):
                record CUDA event across multiple nodes on different streams for synchronization.
            chunk_state (ModelChunkState): model state shared in the model chunk.
            comp_stream (torch.cuda.Stream): CUDA stream for computation.
            comm_stream (torch.cuda.Stream): CUDA stream for communication.
            extra_args (dict): extra arguments for the layer.

        The event and chunk_state are binded to the TransformerModelChunkSchedulePlan
        and shared across all layers in the model chunk.
        """
        from megatron.core.models.gpt.fine_grained_callables import TransformerLayerState

        self.config = layer.config
        self.layer_state = TransformerLayerState()
        self.chunk_state = chunk_state
        self.layer = layer
        self.event = event
        self.comp_stream = comp_stream
        self.comm_stream = comm_stream

        # get callable nodes for transformer/mtp layer
        self._build_callable_nodes(event, comp_stream, comm_stream, extra_args)

    def release_state(self):
        """Release reference, this helps avoid memory leak."""
        if hasattr(self, 'attn') and self.attn is not None:
            del self.attn
            self.attn = None
        if hasattr(self, 'moe_dispatch') and self.moe_dispatch is not None:
            del self.moe_dispatch
            self.moe_dispatch = None
        if hasattr(self, 'mlp') and self.mlp is not None:
            del self.mlp
            self.mlp = None
        if hasattr(self, 'moe_combine') and self.moe_combine is not None:
            del self.moe_combine
            self.moe_combine = None
        if hasattr(self, 'mtp_post_process') and self.mtp_post_process is not None:
            del self.mtp_post_process
            self.mtp_post_process = None
        if hasattr(self, 'layer_state') and self.layer_state is not None:
            del self.layer_state
            self.layer_state = None
        if hasattr(self, 'layer'):
            del self.layer

    def _build_callable_nodes(self, event, comp_stream, comm_stream, extra_args):
        """
        Builds the callable nodes for the transformer/mtp layer:
            attn, mlp, moe_dispatch and moe_combine, and mtp_post_process.
        """
        from megatron.core.models.gpt.fine_grained_callables import (
            TransformerLayerNode,
            build_layer_callables,
        )
        from megatron.core.transformer.moe.moe_layer import MoELayer
        from megatron.core.transformer.multi_token_prediction import MultiTokenPredictionLayer

        # build the forward and backward callables for the transformer/mtp layer
        fwd_callables, bwd_dw_callable_map = build_layer_callables(self.layer)

        # get flags for latter use
        is_mtp = isinstance(self.layer, MultiTokenPredictionLayer)
        transformer_layer = self.layer.mtp_model_layer if is_mtp else self.layer
        is_moe = isinstance(transformer_layer.mlp, MoELayer)
        num_local_experts = transformer_layer.mlp.num_local_experts if is_moe else None

        extra_args["config"] = self.layer.config
        extra_args["is_moe"] = is_moe
        extra_args["num_local_experts"] = num_local_experts
        extra_args["delay_wgrad_compute"] = self.layer.config.delay_wgrad_compute
        extra_args["is_mtp"] = is_mtp

        # wrapper to help create TransformerLayerNode
        def create_node(stream, module, name):
            bwd_dw_callables = bwd_dw_callable_map.get(name, None)
            return TransformerLayerNode(
                stream,
                event,
                self.layer_state,
                self.chunk_state,
                module,
                name=name,
                bwd_dw_callables=bwd_dw_callables,
                extra_args=extra_args,
            )

        (
            attn_module,
            moe_dispatch_module,
            mlp_module,
            moe_combine_module,
            mtp_post_process_module,
        ) = fwd_callables

        # Create nodes for different operations in the layer
        # Each node type has a predefined name that determines its memory strategy
        self.attn = create_node(comp_stream, attn_module, "attn")
        self.mlp = create_node(comp_stream, mlp_module, "mlp")
        if is_moe:
            self.moe_dispatch = create_node(comm_stream, moe_dispatch_module, "moe_dispatch")
            self.moe_combine = create_node(comm_stream, moe_combine_module, "moe_combine")
        else:
            self.moe_dispatch = NoopScheduleNode()
            self.moe_combine = NoopScheduleNode()

        if is_mtp:
            self.mtp_post_process = create_node(
                comp_stream, mtp_post_process_module, "mtp_post_process"
            )
        else:
            self.mtp_post_process = NoopScheduleNode()

    def get_fp8_context(self):
        """
        Get the fp8 context for the transformer layer.
        """
        use_inner_fp8_context = (
            self.layer.config.fp8 and self.layer.config.fp8_recipe != Fp8Recipe.delayed
        )
        return (
            get_fp8_context(self.layer.config, self.layer.layer_number - 1)
            if use_inner_fp8_context
            else nullcontext()
        )

    @staticmethod
    def run(f_layer, b_layer, f_input=None, b_grad=None, is_last_layer_in_bwd=False):
        """Schedule one-forward-one-backward operations for a single transformer layer.

        This function interleaves forward and backward operations, overlapping the communications
        (dispatch or combine) of one with the computations (att or mlp) of the other
        to maximize parallelism and efficiency.

        When f_layer and b_layer are not None, forward and backward pass are overlapped as follows:
        comm_stream: combine_bwd | dispatch_fwd->dispatch_bwd  | combine_fwd
        comp_stream: attn_fwd    | mlp_bwd->mlp_bwd_dw->mlp_fwd| attn_bwd
        For MTP, mtp_post_process_fwd is executed after the combine_fwd in the comp_stream,
        and mtp_post_process_bwd is executed before the combine_bwd in the comp_stream.

        Args:
            f_layer (TransformerLayerSchedulePlan): Forward layer (for current microbatch)
            b_layer (TransformerLayerSchedulePlan): Backward layer (for previous microbatch)
            f_input (Tensor): Input for forward computation
            b_grad (Tensor): Gradient for backward computation
            is_last_layer_in_bwd (bool):
                Whether the current layer is the last layer in the backward pass.

        Returns:
            Functions or values for next iteration's computation
        """

        if b_layer is not None:
            b_grad = b_layer.mtp_post_process.backward(b_grad)
            b_grad = b_layer.moe_combine.backward(b_grad)

        if f_layer is not None:
            with f_layer.get_fp8_context():
                f_input = f_layer.attn.forward(f_input)

        if b_layer is not None:
            b_grad = b_layer.mlp.backward(b_grad)

        if f_layer is not None:
            with f_layer.get_fp8_context():
                f_input = f_layer.moe_dispatch.forward(f_input)

        if b_layer is not None:
            b_layer.mlp.backward_dw()
            b_grad = b_layer.moe_dispatch.backward(b_grad)

        if b_layer is not None and b_layer.config.ep_overlap_early_attn_memory_release:
            b_grad = b_layer.attn.backward(b_grad)

        if f_layer is not None:
            with f_layer.get_fp8_context():
                f_input = f_layer.mlp.forward(f_input)

        if f_layer is not None:
            with f_layer.get_fp8_context():
                f_input = f_layer.moe_combine.forward(f_input)
                f_input = f_layer.mtp_post_process.forward(f_input)

        if b_layer is not None and not b_layer.config.ep_overlap_early_attn_memory_release:
            b_grad = b_layer.attn.backward(b_grad)

        # Delay the last attn_dw in backward pass (attn_dw of the first layer)
        # for overlapping with the p2p comm
        if b_layer is not None and not is_last_layer_in_bwd:
            b_layer.attn.backward_dw()

        return f_input, b_grad


class TransformerModelChunkSchedulePlan(AbstractSchedulePlan):
    """Schedule the executing plan of the sub-modules in a model chunk sub-modules.

    This class organizes the computation nodes for a model chunk,
    including preprocessing, transformer layers, and postprocessing.

    TransformerModelChunkSchedulePlan
    ├── pre_process: PreProcessNode
    ├── layers: List[TransformerLayerSchedulePlan]
    │   ├── layer[0]: TransformerLayerSchedulePlan
    │   ├── layer[1]: TransformerLayerSchedulePlan
    │   └── ...
    └── post_process: PostProcessNode
    """

    def __init__(
        self,
        model,
        input_ids: Tensor,
        position_ids: Tensor,
        attention_mask: Tensor,
        decoder_input: Tensor = None,
        labels: Tensor = None,
        packed_seq_params=None,
        extra_block_kwargs=None,
        runtime_gather_output: Optional[bool] = None,
        loss_mask: Optional[Tensor] = None,
        padding_mask=None,
    ):
        """Initialize the schedule plan of all Transformer layers' sub-modules.

        This function creates a schedule plan for a model chunk, including
        preprocessing, transformer layers, and postprocessing.

        Args:
            model: The model to build a schedule plan for.
            input_ids: Input token IDs.
            position_ids: Position IDs.
            attention_mask: Attention mask.
            decoder_input: Decoder input tensor.
            labels: Labels for loss computation.
            packed_seq_params: Parameters for packed sequences.
            extra_block_kwargs: Additional keyword arguments for blocks.
            runtime_gather_output: Whether to gather output at runtime.
            loss_mask (torch.Tensor): Used to mask out some portions of the loss

        Returns:
            The model chunk schedule plan.
        """
        from megatron.core.models.gpt.fine_grained_callables import PostProcessNode, PreProcessNode

        self._model_chunk_state = ModelChunkState()
        self._transformer_layers = []
        self._event = torch.cuda.Event()
        self.pre_process = None
        self.post_process = None
        self.vp_stage = model.vp_stage

        comp_stream = get_comp_stream()
        comm_stream = get_comm_stream()

        # save the inputs of model.forward() to ModelChunkState
        self._model_chunk_state.input_ids = input_ids
        self._model_chunk_state.position_ids = position_ids
        self._model_chunk_state.attention_mask = attention_mask
        self._model_chunk_state.decoder_input = decoder_input
        self._model_chunk_state.labels = labels
        self._model_chunk_state.mtp_hidden_states = None
        self._model_chunk_state.loss_mask = loss_mask
        self._model_chunk_state.packed_seq_params = packed_seq_params
        self._model_chunk_state.padding_mask = padding_mask
        self._model_chunk_state.extra_block_kwargs = extra_block_kwargs
        self._model_chunk_state.runtime_gather_output = runtime_gather_output
        self._model_chunk_state.model = model
        self._model_chunk_state.context = None
        self._model_chunk_state.context_mask = None
        self._model_chunk_state.attention_bias = None

        # build preprocess
        self.pre_process = PreProcessNode(model, self._model_chunk_state, self._event, comp_stream)

        # build layer schedule plan for each layer.
        # The methods to obtain layers are different for MTP so we need the other build plan for
        # MTP. Also, this can help annotate MTP layer so that it can know where MTP is.
        self._build_layer_schedule_plan(model.decoder, comp_stream, comm_stream)
        self._build_layer_schedule_plan(getattr(model, "mtp", None), comp_stream, comm_stream)

        # build post process
        if model.post_process:
            self.post_process = PostProcessNode(
                model, self._model_chunk_state, self._event, comp_stream
            )

    def _build_layer_schedule_plan(self, module, comp_stream, comm_stream):
        if module is None:
            return
        num_layers = len(module.layers)
        for layer_idx in range(num_layers):
            extra_args = {
                "is_first_layer": layer_idx == 0,
                "is_last_layer": layer_idx == num_layers - 1,
            }
            layer_plan = TransformerLayerSchedulePlan(
                module.layers[layer_idx],
                self.event,
                self.state,
                comp_stream,
                comm_stream,
                extra_args,
            )
            self._transformer_layers.append(layer_plan)

    @property
    def event(self):
        """Gets the CUDA event for synchronization."""
        return self._event

    def record_current_stream(self):
        """Records the current CUDA stream in the event."""
        stream = torch.cuda.current_stream()
        self.event.record(stream)

    def wait_current_stream(self):
        """Waits for the event to complete on the current CUDA stream."""
        stream = torch.cuda.current_stream()
        self.event.wait(stream)

    def get_layer(self, i):
        """Gets the transformer layer at the specified index."""
        assert i < self.num_layers()
        return self._transformer_layers[i]

    def pop_layer(self):
        """Pops the transformer layer in FILO order."""
        return self._transformer_layers.pop()

    def num_layers(self):
        """Gets the number of transformer layers."""
        return len(self._transformer_layers)

    @property
    def state(self):
        """Gets the model chunk state."""
        return self._model_chunk_state

    def release_state(self):
        """Release reference, this helps avoid memory leak."""
        self._model_chunk_state.model = None
        self.pre_process.model_chunk_state = None
        self.pre_process = None

        if self.post_process is not None:
            self.post_process.model_chunk_state = None
            self.post_process = None

    @staticmethod
    def run(
        f_schedule_plan,
        b_schedule_plan,
        b_grad=None,
        pre_forward=None,
        pre_backward=None,
        post_forward=None,
        post_backward=None,
    ):
        """Model Chunk level 1f1b fine-grained scheduler.

        This function schedules the forward and backward passes for a model chunk,
        which interleaves forward and backward function of multiple Transformer layers
        within a model chunk, and this is needed to overlap the submodules between the individual
        forward and backward functions.

        Assume there are 4 layers in the given model chunk:
        Phase 0: p2p_comm_sync -> forward_preprocess -> p2p_comm_sync -> backward_postprocess
        Phase 1: forward_layer[0] + backward_layer[3], overlapped execution by schedule_layer_1f1b
        Phase 2: forward_layer[1] + backward_layer[2], overlapped execution by schedule_layer_1f1b
        Phase 3: forward_layer[2] + backward_layer[1], overlapped execution by schedule_layer_1f1b
        Phase 4: forward_layer[3] + backward_layer[0], overlapped execution by schedule_layer_1f1b
        Phase 5: send_forward_recv_backward -> send_backward_recv_forward
        Phase 6: backward_dw of the first layer -> forward_postprocess -> backward_preprocess

        Args:
            f_schedule_plan (TransformerModelChunkSchedulePlan): The forward schedule plan
            b_schedule_plan (TransformerModelChunkSchedulePlan): The backward schedule plan
            b_grad (Tensor or None): The gradient of the loss function
            pre_forward (callable or None): The function to call before the forward pass
            pre_backward (callable or None): The function to call before the backward pass
            post_forward (callable or None): The function to call after the forward pass
            post_backward (callable or None): The function to call after the backward pass
        Returns:
            The output of the forward pass.
        """
        f_input = None
        if f_schedule_plan:
            # pp output send/receive sync
            if pre_forward is not None:
                pre_forward(f_schedule_plan.vp_stage)
            f_schedule_plan.record_current_stream()
            f_input = f_schedule_plan.pre_process.forward()

        if b_schedule_plan:
            b_schedule_plan.record_current_stream()
            assert b_grad is not None
            if pre_backward is not None:
                pre_backward(b_schedule_plan.vp_stage)
                b_schedule_plan.record_current_stream()

            if b_schedule_plan.post_process is not None:
                b_grad = b_schedule_plan.post_process.backward(b_grad)

        f_num_layers = f_schedule_plan.num_layers() if f_schedule_plan is not None else 0
        b_num_layers = b_schedule_plan.num_layers() if b_schedule_plan is not None else 0
        overlapped_layers = min(f_num_layers, b_num_layers)

        f_layer = b_layer = None
        # combined forward and backward pass for overlapped layers
        for i in range(overlapped_layers):
            f_layer = f_schedule_plan.get_layer(i)
            b_layer = b_schedule_plan.pop_layer()
            torch.cuda.nvtx.range_push(f"layer_{i}f-layer_{b_schedule_plan.num_layers()}b")
            f_input, b_grad = TransformerLayerSchedulePlan.run(
                f_layer,
                b_layer,
                f_input=f_input,
                b_grad=b_grad,
                is_last_layer_in_bwd=(i == b_num_layers - 1),
            )
            if i < b_num_layers - 1:
                b_layer.release_state()
            torch.cuda.nvtx.range_pop()

        # backward pass for the remaining layers
        for i in range(overlapped_layers, b_num_layers):
            b_layer = b_schedule_plan.pop_layer()
            torch.cuda.nvtx.range_push(f"layer_{b_schedule_plan.num_layers()}b")
            _, b_grad = TransformerLayerSchedulePlan.run(
                None, b_layer, b_grad=b_grad, is_last_layer_in_bwd=(i == b_num_layers - 1)
            )
            if i < b_num_layers - 1:
                b_layer.release_state()
            torch.cuda.nvtx.range_pop()

        # forward pass for the remaining layers
        for i in range(overlapped_layers, f_num_layers):
            f_layer = f_schedule_plan.get_layer(i)
            torch.cuda.nvtx.range_push(f"layer_{i}f")
            f_input, _ = TransformerLayerSchedulePlan.run(f_layer, None, f_input=f_input)
            torch.cuda.nvtx.range_pop()

        if f_schedule_plan is not None and post_forward is not None:
            # post_forward()/send_forward_recv_forward() is running in the communication stream,
            # so the p2p comm could be overlapped with the attn backward
            with torch.cuda.stream(get_comm_stream()):
                f_schedule_plan.wait_current_stream()
                post_forward(f_input, f_schedule_plan.vp_stage)

        # post_backward()/send_backward_recv_backward() is running in the computation stream,
        # so the p2p comm could be overlapped with the wgrad of attn backward
        if b_schedule_plan is not None and post_backward is not None:
            b_schedule_plan.wait_current_stream()
            post_backward(b_grad, b_schedule_plan.vp_stage)

        # Delay the last attn_dw in backward pass (attn_dw of the first layer)
        # for overlapping with the p2p comm
        if b_num_layers > 0:
            assert b_layer is not None
            b_layer.attn.backward_dw()
            b_layer.release_state()

        # post process forward
        if f_schedule_plan is not None and f_schedule_plan.post_process is not None:
            f_input = f_schedule_plan.post_process.forward(f_input)
        # pre process backward
        if b_schedule_plan is not None:
            b_schedule_plan.pre_process.backward(b_grad)

        if f_schedule_plan:
            f_schedule_plan.wait_current_stream()
        if b_schedule_plan:
            b_schedule_plan.wait_current_stream()
            # Release reference as early as possible, this helps avoid memory leak.
            b_schedule_plan.release_state()

        return f_input


================================================
FILE: megatron/core/models/common/vision_module/__init__.py
================================================


================================================
FILE: megatron/core/models/common/vision_module/vision_module.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
"""Megatron Vision Module."""

from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.transformer_config import TransformerConfig


# Note: This is only a stub at the moment. This will be expanded in follow-up changes.
class VisionModule(MegatronModule):
    """Base vision module that has common helper functions used across CLIP, ViT, etc.

    Args:
        config (TransformerConfig): Input transformer config for the model
    """

    def __init__(self, config: TransformerConfig) -> None:
        super().__init__(config=config)


================================================
FILE: megatron/core/models/gpt/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from .gpt_model import GPTModel


================================================
FILE: megatron/core/models/gpt/experimental_attention_variant_module_specs.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.

from typing import List, Optional

from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
from megatron.core.models.backends import BackendSpecProvider
from megatron.core.ssm.gated_delta_net import GatedDeltaNet, GatedDeltaNetSubmodules
from megatron.core.transformer.enums import AttnMaskType, LayerType
from megatron.core.transformer.experimental_attention_variant.dsa import (
    DSAIndexer,
    DSAIndexerSubmodules,
    DSAttention,
    DSAttentionSubmodules,
)
from megatron.core.transformer.identity_op import IdentityOp
from megatron.core.transformer.multi_latent_attention import (
    MLASelfAttention,
    MLASelfAttentionSubmodules,
)
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_block import (
    TransformerBlockSubmodules,
    get_num_layers_to_build,
)
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.transformer_layer import (
    TransformerLayer,
    TransformerLayerSubmodules,
    get_transformer_layer_offset,
)

try:
    import transformer_engine as te  # type: ignore[import-untyped]  # pylint: disable=unused-import

    from megatron.core.extensions.transformer_engine_spec_provider import TESpecProvider

    HAVE_TE = True
except ImportError:
    HAVE_TE = False

try:
    import nvidia_kitchen  # type: ignore[import-not-found]  # pylint: disable=unused-import

    from megatron.core.extensions.kitchen import KitchenSpecProvider

    HAVE_KITCHEN = True
except ImportError:
    HAVE_KITCHEN = False


##########
# Experimental Attention Variant Module Specs
##########


def get_gated_delta_net_module_spec(
    config: TransformerConfig, backend: BackendSpecProvider = None
) -> ModuleSpec:
    """Build module spec for GatedDeltaNet attention."""

    if backend is None:
        backend = _get_backend_spec_provider(config=config)

    rms_norm = config.normalization == "RMSNorm"
    attention = ModuleSpec(
        module=GatedDeltaNet,
        submodules=GatedDeltaNetSubmodules(
            in_proj=backend.column_parallel_layer_norm_linear(),
            out_norm=backend.layer_norm(rms_norm=rms_norm, for_qk=False),
            out_proj=backend.row_parallel_linear(),
        ),
        metainfo={"fuse_input_layernorm": True},
    )
    return attention


def get_dsa_module_spec_for_backend(
    config: TransformerConfig, backend: BackendSpecProvider = None
) -> ModuleSpec:
    """Helper function to get module spec for Sparse Attention."""
    assert config.multi_latent_attention, "Currently only MLA supports sparse attention."
    assert config.qk_l2_norm is False, "qk_l2_norm is not supported with MLA."

    linear_q_up_proj = (
        backend.column_parallel_layer_norm_linear()
        if config.qk_layernorm
        else backend.column_parallel_linear()
    )
    linear_kv_up_proj = (
        backend.column_parallel_layer_norm_linear()
        if config.qk_layernorm
        else backend.column_parallel_linear()
    )

    # Because TransformerEngine does not support sparse attention yet, we use local
    # implementation whether the backend is TransformerEngine or not.
    core_attention = ModuleSpec(
        module=DSAttention,
        submodules=DSAttentionSubmodules(
            indexer=ModuleSpec(
                module=DSAIndexer,
                submodules=DSAIndexerSubmodules(
                    linear_wq_b=backend.linear(),
                    linear_wk=backend.linear(),
                    k_norm=backend.layer_norm(rms_norm=False, for_qk=True),
                    linear_weights_proj=backend.linear(),
                ),
            )
        ),
    )

    attention = ModuleSpec(
        module=MLASelfAttention,
        params={"attn_mask_type": AttnMaskType.causal},
        submodules=MLASelfAttentionSubmodules(
            linear_q_proj=backend.column_parallel_linear(),
            linear_q_down_proj=backend.linear(),
            linear_q_up_proj=linear_q_up_proj,
            linear_kv_down_proj=backend.linear(),
            linear_kv_up_proj=linear_kv_up_proj,
            core_attention=core_attention,
            linear_proj=backend.row_parallel_linear(),
            q_layernorm=IdentityOp,
            kv_layernorm=IdentityOp,
        ),
    )

    return attention


def get_experimental_attention_variant_module_spec(
    config: TransformerConfig, backend: BackendSpecProvider = None
) -> ModuleSpec:
    """Helper function to get module spec for experimental attention variant"""

    if backend is None:
        backend = _get_backend_spec_provider(config=config)

    if config.experimental_attention_variant == "gated_delta_net":
        return get_gated_delta_net_module_spec(config=config, backend=backend)
    else:
        raise ValueError(
            f"Invalid experimental attention variant: {config.experimental_attention_variant}"
        )


##########
# Experimental GPT Decoder Block Spec
##########


def get_transformer_block_with_experimental_attention_variant_spec(
    config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
) -> TransformerBlockSubmodules:
    """Build transformer block spec with experimental attention variants (e.g., linear attention).

    This function constructs a heterogeneous transformer block that supports mixing different
    attention mechanisms (experimental vs standard) and MLP types (MoE vs dense) across layers.
    **Note that, this API is a experimental API in the short term, and might be deprecated in the
    future. In the long run, we will move to a new design that better support hybrid models.**

    Key Design:
        1. Attention and MLP patterns: The attention pattern and MLP pattern are orthogonal
           and determined independently. This allows flexible combinations (e.g., linear attention
           with MoE, or standard attention with dense MLP).
           - Attention pattern: derived from `config.linear_attention_freq` or
             `config.experimental_attention_variant`.
           - MLP pattern: derived from `config.moe_layer_freq`.

        2. Per-Layer Spec Construction: Iterates through layers, constructing transformer
           layer specs based on attention and MLP patterns.

        3. Pipeline Slicing: Extracts layer specs for the current pipeline stage.

    Args:
        config: Transformer configuration containing model hyperparameters and feature flags.
        vp_stage: Virtual pipeline stage index for interleaved pipeline parallelism.
        pp_rank: Pipeline model parallel rank.

    Returns:
        TransformerBlockSubmodules containing per-layer specs and final layer norm.

    Note:
        Currently only supports transformer_engine backend. Kitchen backend can be used as a
        wrapper with TE fallback for unsupported operations.
    """

    backend = _get_backend_spec_provider(config=config)

    # Get attention patterns and specs
    experimental_attention_pattern = [0] * config.num_layers
    if is_linear_attention_variant(config.experimental_attention_variant):
        experimental_attention_pattern = get_linear_attention_pattern(config=config)
    elif config.experimental_attention_variant is not None:
        experimental_attention_pattern = [1] * config.num_layers

    if 1 in experimental_attention_pattern:
        experimental_attention_spec = get_experimental_attention_variant_module_spec(
            config=config, backend=backend
        )
    else:
        experimental_attention_spec = None

    if 0 in experimental_attention_pattern:
        standard_attention_spec = _get_self_attention_module_spec(config=config, backend=backend)
    else:
        standard_attention_spec = None

    # Get MLP patterns and specs
    if config.num_moe_experts is not None:
        moe_layer_pattern = get_moe_layer_pattern(config=config)
    else:
        moe_layer_pattern = [0] * config.num_layers

    if 1 in moe_layer_pattern:
        moe_layer_spec = _get_moe_module_spec(config=config, backend=backend)
    else:
        moe_layer_spec = None

    if 0 in moe_layer_pattern:
        dense_mlp_layer_spec = _get_dense_mlp_module_spec(config=config, backend=backend)
    else:
        dense_mlp_layer_spec = None

    # Get GPT decoder block layer specs
    rms_norm = config.normalization == "RMSNorm"
    layer_specs = []
    for layer_number in range(config.num_layers):
        attention = (
            experimental_attention_spec
            if experimental_attention_pattern[layer_number] == 1
            else standard_attention_spec
        )
        mlp = moe_layer_spec if moe_layer_pattern[layer_number] == 1 else dense_mlp_layer_spec
        input_layernorm = (
            IdentityOp
            if attention.metainfo["fuse_input_layernorm"]
            else backend.layer_norm(rms_norm=rms_norm, for_qk=False)
        )
        pre_mlp_layernorm = (
            IdentityOp
            if mlp.metainfo["fuse_pre_mlp_layernorm"]
            else backend.layer_norm(rms_norm=rms_norm, for_qk=False)
        )

        layer_specs.append(
            ModuleSpec(
                module=TransformerLayer,
                submodules=TransformerLayerSubmodules(
                    input_layernorm=input_layernorm,
                    self_attention=attention,
                    self_attn_bda=get_bias_dropout_add,
                    pre_mlp_layernorm=pre_mlp_layernorm,
                    mlp=mlp,
                    mlp_bda=get_bias_dropout_add,
                ),
            )
        )

    # Slice the layer specs to only include the layers that are built in this pipeline stage.
    if config.pipeline_model_parallel_layout is not None:
        local_layer_ids = config.pipeline_model_parallel_layout.get_layer_id_list(
            layer_type=LayerType.decoder, vp_stage=vp_stage, pp_rank=pp_rank
        )
    else:
        offset = get_transformer_layer_offset(config, vp_stage=vp_stage, pp_rank=pp_rank)
        num_layers_to_build = get_num_layers_to_build(config, vp_stage=vp_stage, pp_rank=pp_rank)
        local_layer_ids = range(offset, offset + num_layers_to_build)

    layer_specs = [layer_specs[layer_id] for layer_id in local_layer_ids]

    # Get GPT decoder block spec
    gpt_decoder_block_spec = TransformerBlockSubmodules(
        layer_specs=layer_specs, layer_norm=backend.layer_norm(rms_norm=rms_norm, for_qk=False)
    )

    return gpt_decoder_block_spec


##########
# Utilities
##########


def is_linear_attention_variant(experimental_attention_variant: Optional[str]) -> bool:
    """Check if the experimental attention variant is a linear attention variant."""
    linear_attention_variants = ["gated_delta_net"]
    return experimental_attention_variant in linear_attention_variants


def get_moe_layer_pattern(config: TransformerConfig) -> List[int]:
    """Parse config.moe_layer_freq to get per-layer MoE pattern (1=MoE, 0=dense).

    - int N: one MoE layer every N layers (e.g., N=2 -> [1,0,1,0,...])
    - list: use directly as the pattern."""

    if isinstance(config.moe_layer_freq, int):
        # [1,0,0,...,0,1,0,0,...,0,...]
        moe_layer_pattern = [
            1 if (i % config.moe_layer_freq == 0) else 0 for i in range(config.num_layers)
        ]
    elif isinstance(config.moe_layer_freq, list):
        moe_layer_pattern = config.moe_layer_freq
        assert len(moe_layer_pattern) == config.num_layers, (
            f"Invalid length of moe_layer_pattern: {len(moe_layer_pattern)}, "
            f"expected {config.num_layers}, "
            f"current moe layer pattern: {config.moe_layer_freq}"
        )
    else:
        raise ValueError(
            f"Invalid moe_layer_freq: {type(config.moe_layer_freq)}, {config.moe_layer_freq}"
        )
    return moe_layer_pattern


def get_linear_attention_pattern(config: TransformerConfig) -> List[int]:
    """Parse config.linear_attention_freq to get per-layer attention pattern (1=LA, 0=SDPA).

    - int N: one SDPA layer every N layers (e.g., N=4 -> [1,1,1,0,1,1,1,0,...])
    - list: use directly as the pattern."""

    if isinstance(config.linear_attention_freq, int):
        linear_attention_pattern = [
            # [1,1,...,1,0,1,1,...,1,0,...]
            0 if ((i + 1) % config.linear_attention_freq == 0) else 1
            for i in range(config.num_layers)
        ]
    elif isinstance(config.linear_attention_freq, list):
        linear_attention_pattern = config.linear_attention_freq
        assert len(linear_attention_pattern) == config.num_layers, (
            f"Invalid length of linear_attention_pattern: {len(linear_attention_pattern)}, "
            f"expected {config.num_layers}, "
            f"current linear attention pattern: {config.linear_attention_freq}"
        )
    elif config.linear_attention_freq is None:
        if not is_linear_attention_variant(config.experimental_attention_variant):
            linear_attention_pattern = [0] * config.num_layers
        else:
            # This should be caught by config validation, but raise here as a safety check
            raise ValueError(
                f"Linear attention type {config.experimental_attention_variant} is specified "
                "but linear_attention_freq is None. "
                "Please set linear_attention_freq to specify the LA/SDPA layer pattern."
            )
    else:
        raise ValueError(
            f"Invalid linear_attention_freq: {type(config.linear_attention_freq)},"
            f" {config.linear_attention_freq}"
        )
    return linear_attention_pattern


def _get_backend_spec_provider(config: TransformerConfig) -> BackendSpecProvider:
    """Get backend spec provider for experimental attention variant."""

    assert config.transformer_impl == "transformer_engine", (
        "Experimental GPT decoder block spec only supports "
        "transformer engine implementation for now."
    )
    backend: BackendSpecProvider = (
        KitchenSpecProvider(
            fallback=TESpecProvider(),
            use_kitchen_attention=config.use_kitchen_attention,
            kitchen_attention_backend=config.kitchen_attention_backend,
        )
        if config.use_kitchen
        else TESpecProvider()
    )
    return backend


##########
# Spec functions for non-experimental self attention and MLP layer.
##########


def _get_self_attention_module_spec(
    config: TransformerConfig, backend: BackendSpecProvider = None
) -> ModuleSpec:
    """Get non-experimental self-attention module spec.
    For hybrid models that mix experimental and non-experimental attention architectures.

    Warning: This function may be deprecated in the future."""

    if backend is None:
        backend = _get_backend_spec_provider(config=config)

    from megatron.core.models.gpt.gpt_layer_specs import get_gpt_layer_with_transformer_engine_spec

    layer_spec = get_gpt_layer_with_transformer_engine_spec(
        num_experts=config.num_moe_experts,
        moe_grouped_gemm=config.moe_grouped_gemm,
        qk_layernorm=config.qk_layernorm,
        multi_latent_attention=config.multi_latent_attention,
        qk_l2_norm=config.qk_l2_norm,
        use_kitchen=config.use_kitchen,
        use_te_activation_func=config.use_te_activation_func,
        use_kitchen_attention=config.use_kitchen_attention,
        kitchen_attention_backend=config.kitchen_attention_backend,
        mla_down_proj_fusion=getattr(config, "mla_down_proj_fusion", False),
    )
    attn_spec = layer_spec.submodules.self_attention
    if config.multi_latent_attention:
        attn_spec.metainfo["fuse_input_layernorm"] = False
    else:
        attn_spec.metainfo["fuse_input_layernorm"] = backend.fuse_layernorm_and_linear()

    return attn_spec


def _get_dense_mlp_module_spec(
    config: TransformerConfig, backend: BackendSpecProvider = None
) -> ModuleSpec:
    """Get dense MLP module spec.
    For hybrid models that mix dense MLP and experimental attention architectures.

    Warning: This function may be deprecated in the future."""

    if backend is None:
        backend = _get_backend_spec_provider(config=config)

    from megatron.core.models.gpt.gpt_layer_specs import get_mlp_module_spec_for_backend

    mlp_spec = get_mlp_module_spec_for_backend(backend=backend, num_experts=None)
    mlp_spec.metainfo["fuse_pre_mlp_layernorm"] = backend.fuse_layernorm_and_linear()

    return mlp_spec


def _get_moe_module_spec(
    config: TransformerConfig, backend: BackendSpecProvider = None
) -> ModuleSpec:
    """Get MoE module spec.
    For hybrid models that mix MoE and experimental attention architectures.

    Warning: This function may be deprecated in the future."""

    if backend is None:
        backend = _get_backend_spec_provider(config=config)

    from megatron.core.models.gpt.moe_module_specs import get_moe_module_spec_for_backend

    moe_spec = get_moe_module_spec_for_backend(
        backend=backend,
        num_experts=config.num_moe_experts,
        moe_grouped_gemm=config.moe_grouped_gemm,
        use_te_activation_func=config.use_te_activation_func,
    )
    moe_spec.metainfo["fuse_pre_mlp_layernorm"] = False
    return moe_spec


================================================
FILE: megatron/core/models/gpt/fine_grained_callables.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import weakref
from contextlib import nullcontext
from functools import partial
from typing import Optional

import torch
from torch import Tensor

from megatron.core import tensor_parallel
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
    FineGrainedActivationOffloadingInterface as off_interface,
)
from megatron.core.pipeline_parallel.utils import ScheduleNode, make_viewless
from megatron.core.transformer.enums import CudaGraphScope
from megatron.core.transformer.module import GraphableMegatronModule, float16_to_fp32
from megatron.core.transformer.moe.moe_layer import MoELayer
from megatron.core.transformer.multi_token_prediction import (
    MultiTokenPredictionLayer,
    get_mtp_layer_offset,
)
from megatron.core.transformer.transformer_layer import TransformerLayer, make_viewless_tensor
from megatron.core.typed_torch import apply_module, copy_signature
from megatron.core.utils import internal_api


def weak_method(method):
    """Creates a weak reference to a method to prevent circular references.

    This function creates a weak reference to a method and returns a wrapper function
    that calls the method when invoked. This helps prevent memory leaks from circular
    references.
    """
    method_ref = weakref.WeakMethod(method)
    del method

    def wrapped_func(*args, **kwarg):
        # nonlocal object_ref
        return method_ref()(*args, **kwarg)

    return wrapped_func


@internal_api
def should_free_input(name, is_moe, config, num_local_experts):
    """Determine if the node should free its input memory.

    Args:
        name: Node name
        is_moe: Whether it's a MoE model
        config: TransformerConfig object
        num_local_experts: Number of local experts in MoE module

    Returns:
        bool: Whether to free input memory
    """
    # For dense layers [attn, fake, mlp, fake], the input is needed during backward pass
    if not is_moe:
        return False
    enable_deepep = (
        config.moe_token_dispatcher_type == "flex"
        and config.moe_flex_dispatcher_backend == "deepep"
    )
    enable_hybridep = (
        config.moe_token_dispatcher_type == "flex"
        and config.moe_flex_dispatcher_backend == "hybridep"
    )
    # Define which nodes should free input memory
    # Since we split the computing graph into multiple nodes, we can manually control
    # when and how to free the input memory.
    # The input and output of A2A are not needed anymore after the forward pass,
    # so we can free the input memory after the forward pass.

    # When low precision fp8/4 is enabled, the casted tensors are saved and the
    # original bf16 tensors are safe to be freed.
    free_mlp = config.fp8 is not None or config.fp4 is not None
    if not free_mlp:
        # AlltoAll dispatcher with local_num_experts=1 and HybridEP both use identity
        # operation for `dispatch_postprocess`, hence the mlp inputs will be directly
        # passed to GroupedGemm and should be saved for backward pass.
        free_mlp = num_local_experts > 1 or config.moe_token_dispatcher_type != "alltoall"
        free_mlp = free_mlp and not enable_hybridep

    free_input_nodes = {
        "mlp": free_mlp,
        "moe_combine": True,
        # For non-DeepEP and non-HybridEP dispatcher mode, the input is the un-dispatched tokens
        # and probs before dispatch A2A and it's not needed anymore after the forward pass
        # For DeepEP and HybridEP dispatcher mode, they are both needed in backward pass
        # and cannot be freed.
        # If moe_preprocess is in cuda graph scope, tokens and probs are fixed size tensors,
        # so they cannot be freed.
        "moe_dispatch": not (enable_deepep or enable_hybridep)
        and (CudaGraphScope.moe_preprocess not in config.cuda_graph_scope),
    }

    return free_input_nodes.get(name, False)


class TransformerLayerState:
    """State shared within a transformer layer.

    This class holds state that is shared between different nodes
    within a transformer layer.
    """

    pass


class PreProcessNode(ScheduleNode):
    """Node responsible for preprocessing operations in the model.

    This node handles embedding and rotary positional embedding computations
    before the main transformer layers.
    """

    def __init__(self, gpt_model, chunk_state, event, stream):
        """Initializes a preprocessing node.

        Args:
            gpt_model: The GPT model instance.
            chunk_state (TransformerChunkState): State shared within a chunk
            event: CUDA event for synchronization.
            stream: CUDA stream for execution.
        """
        super().__init__(weak_method(self.forward_impl), stream, event, name="pre_process")
        self.gpt_model = gpt_model
        self.chunk_state = chunk_state

    def forward_impl(self):
        """forward pass for pre-processing.

        This method handles:
        1. Decoder embedding computation
        2. Rotary positional embedding computation
        3. Sequence length offset computation for flash decoding

        Returns:
            The processed decoder input tensor.
        """
        # Get decoder input
        if not self.gpt_model.pre_process:
            self.chunk_state.decoder_input = self.gpt_model.decoder.input_tensor
        # Run GPTModel._preprocess
        (
            decoder_input,
            rotary_pos_emb,
            rotary_pos_cos,
            rotary_pos_sin,
            sequence_len_offset,
            padding_mask,
        ) = self.gpt_model._preprocess(
            input_ids=self.chunk_state.input_ids,
            position_ids=self.chunk_state.position_ids,
            decoder_input=self.chunk_state.decoder_input,
            packed_seq_params=self.chunk_state.packed_seq_params,
            padding_mask=self.chunk_state.padding_mask,
        )

        # Saved for later use
        self.chunk_state.decoder_input = decoder_input
        self.chunk_state.rotary_pos_emb = rotary_pos_emb
        self.chunk_state.rotary_pos_cos = rotary_pos_cos
        self.chunk_state.rotary_pos_sin = rotary_pos_sin
        self.chunk_state.sequence_len_offset = sequence_len_offset
        self.chunk_state.padding_mask = padding_mask
        return decoder_input


class PostProcessNode(ScheduleNode):
    """Node responsible for postprocessing operations in the model.

    This node handles final layer normalization and output layer computation
    after the main transformer layers.
    """

    def __init__(self, gpt_model, chunk_state, event, stream):
        """Initializes a postprocessing node.

        Args:
            gpt_model: The GPT model instance.
            chunk_state (TransformerChunkState): State shared within a chunk
            event: CUDA event for synchronization.
            stream: CUDA stream for execution.
        """
        super().__init__(weak_method(self.forward_impl), stream, event, name="post_process")
        self.gpt_model = gpt_model
        self.chunk_state = chunk_state

    def forward_impl(self, hidden_states):
        """Implements the forward pass for postprocessing.

        This method handles:
        1. Output layer computation
        2. Loss computation if labels are provided

        Args:
            hidden_states: The hidden states from the transformer layers.

        Returns:
            The logits or loss depending on whether labels are provided.
        """

        empty_decoder = len(self.gpt_model.decoder.layers) == 0
        layer_norm = self.gpt_model.decoder.final_layernorm
        if not self.gpt_model.config.mtp_num_layers and empty_decoder and layer_norm:
            hidden_states = layer_norm(hidden_states)
            hidden_states = make_viewless_tensor(
                inp=hidden_states, requires_grad=True, keep_graph=True
            )

        # Run GPTModel._postprocess
        loss = self.gpt_model._postprocess(
            hidden_states=hidden_states,
            input_ids=self.chunk_state.input_ids,
            position_ids=self.chunk_state.position_ids,
            labels=self.chunk_state.labels,
            decoder_input=self.chunk_state.decoder_input,
            rotary_pos_emb=self.chunk_state.rotary_pos_emb,
            rotary_pos_cos=self.chunk_state.rotary_pos_cos,
            rotary_pos_sin=self.chunk_state.rotary_pos_sin,
            mtp_in_postprocess=False,
            loss_mask=self.chunk_state.loss_mask,
            attention_mask=self.chunk_state.attention_mask,
            packed_seq_params=self.chunk_state.packed_seq_params,
            sequence_len_offset=self.chunk_state.sequence_len_offset,
            runtime_gather_output=self.chunk_state.runtime_gather_output,
            extra_block_kwargs=self.chunk_state.extra_block_kwargs,
        )

        # For now, 1f1b only supports fp16 module
        return float16_to_fp32(loss)


class TransformerLayerNode(ScheduleNode):
    """Base class for transformer layer computation nodes.

    This class provides common functionality for different types of
    transformer layer nodes (attention, MLP, etc.)
    """

    def __init__(
        self,
        stream,
        event,
        layer_state,
        chunk_state,
        submodule,
        name="default",
        bwd_dw_callables=None,
        extra_args={},
    ):
        """Initialize a transformer layer node.

        Args:
            stream (torch.cuda.Stream): CUDA stream for execution
            event (torch.cuda.Event): Synchronization event
            layer_state (TransformerLayerState): State shared within a layer
            chunk_state (TransformerChunkState): State shared within a chunk
            submodule (function): The submodule contain forward and dw function
            it's the per_batch_state_context, o.w. nullcontext
            name (str): Node name, also used to determine memory strategy
            bwd_dw_callables (list): List of weight gradient functions for the layer.
            extra_args (dict): Extra arguments for the node: is_moe, config.
        """
        # determine whether to free input memory
        config = extra_args.get("config", None)
        assert config is not None, "model config must be passed to TransformerLayerNode."
        is_moe = extra_args.get("is_moe", False)
        num_local_experts = extra_args.get("num_local_experts", None)
        free_input = should_free_input(name, is_moe, config, num_local_experts)
        self.delay_wgrad_compute = extra_args.get("delay_wgrad_compute", False)

        super().__init__(
            weak_method(self.forward_impl),
            stream,
            event,
            weak_method(self.backward_impl),
            free_input=free_input,
            name=name,
        )
        self.layer_state = layer_state
        self.chunk_state = chunk_state
        self.submodule = submodule
        self.detached = tuple()
        self.before_detached = tuple()
        self.is_mtp = extra_args.get("is_mtp", False)

        # Create flags to indicate first and last layer
        self.is_first_layer = extra_args.get("is_first_layer", False)
        self.is_last_layer = extra_args.get("is_last_layer", False)

        # Initialize list to store registered dw callables
        self.bwd_dw_callables = []
        if bwd_dw_callables is not None:
            self.bwd_dw_callables = (
                bwd_dw_callables if isinstance(bwd_dw_callables, list) else [bwd_dw_callables]
            )

    def detach(self, t):
        """Detaches a tensor and stores it for backward computation."""
        detached = make_viewless(t).detach()
        detached.requires_grad = t.requires_grad
        self.before_detached = self.before_detached + (t,)
        self.detached = self.detached + (detached,)
        return detached

    def forward_impl(self, *args):
        """Calls the submodule as the forward pass."""
        return self.submodule(self, *args)

    def backward_impl(self, outputs, output_grad):
        """Implements the backward pass for the transformer layer node."""
        detached_grad = tuple([e.grad for e in self.detached])
        grads = output_grad + detached_grad
        self.default_backward_func(outputs + self.before_detached, grads)
        # release the output grad memory after backward finishes,
        # except when delay_wgrad_comptue is enabled, the grad should be
        # kept until all modules' backward_dw has been invoked.
        if self.delay_wgrad_compute:
            self.output_grads = grads
            self.delay_grads_release = len(self.bwd_dw_callables) > 0

        # return grads for record stream
        return grads

    def backward_dw(self):
        """Computes the weight gradients for the transformer layer node."""
        if not self.delay_wgrad_compute:
            return
        with torch.cuda.stream(self.stream):
            torch.cuda.nvtx.range_push(f"{self.name} wgrad")
            for module in self.bwd_dw_callables:
                module.backward_dw()
            torch.cuda.nvtx.range_pop()

        # the output grad memory is last used in wgrad compute, should be safe to release.
        assert self.delay_grads_release, "output grad memory should be valid before wgrad."
        if self.manual_release_grads:
            for tensor in self.output_grads:
                tensor.untyped_storage().resize_(0)
        self.output_grads = None

        self.bwd_dw_callables = None

    def __del__(self):
        # Release reference as early as possible, this helps avoid memory leak.
        self.before_detached = None
        self.detached = None
        self.layer_state = None
        self.chunk_state = None
        self.submodule = None


class _BackwardDWWrapper:
    """Wrapper for managing backward weight gradient computation of attn module.

    This class handles the execution of weight gradient computations for transformer layers,
    coordinating between CUDA graphed and non-graphed components. It is used when
    overlap_moe_expert_parallel_comm and delay_wgrad_compute are enabled to manage
    the delayed weight gradient computation in MoE models.

    The wrapper stores references to the attention and shared expert backward weight gradient
    callables, and determines which components should be executed based on whether CUDA graphs
    are being replayed and which scopes are covered by the graphs.
    """

    def __init__(self, layer):
        assert isinstance(
            layer, GraphableMegatronModule
        ), "cuda graphed ep overlap only supports GraphableMegatronModule."
        assert isinstance(
            layer, TransformerLayer
        ), "cuda graphed ep overlap only supports TransformerLayer for now."
        self.layer = layer
        self.graphed_backward_dw_callable = None
        self.attn_dw_callable = layer.self_attention.backward_dw
        if layer.is_moe_layer:
            self.shared_expert_dw_callable = partial(
                layer.mlp.backward_dw, routed_experts=False, shared_experts=True
            )
        else:
            self.shared_expert_dw_callable = None
        self.cuda_graph_scope = layer.config.cuda_graph_scope

    def backward_dw(self):
        """Execute weight gradients, skipping CUDA graphed components during replay."""
        is_replay = hasattr(self.layer, 'cuda_graphs') and self.layer.cuda_graphs
        if self.shared_expert_dw_callable is not None and (
            not is_replay or CudaGraphScope.moe_router not in self.cuda_graph_scope
        ):
            self.shared_expert_dw_callable()
        if not is_replay or CudaGraphScope.attn not in self.cuda_graph_scope:
            self.attn_dw_callable()
        if is_replay and self.graphed_backward_dw_callable is not None:
            self.graphed_backward_dw_callable()
        self.layer = None

    def set_graphed_backward_dw_callable(self, graphed_backward_dw_callable):
        """Store the CUDA graphed backward weight gradient callable."""
        self.graphed_backward_dw_callable = graphed_backward_dw_callable


def build_transformer_layer_callables(layer: TransformerLayer):
    """Create callables for transformer layer nodes.
    Divides the transformer layer's operations into a sequence of smaller, independent
    functions. This decomposition separates computation-heavy tasks (e.g., self-attention,
    MLP) from communication-heavy tasks (e.g., MoE's All-to-All).

    The five callables are:
    1. Attention (computation)
    2. Post-Attention (computation)
    3. MoE Dispatch (communication)
    4. MLP / MoE Experts (computation)
    5. MoE Combine (communication)

    By assigning these functions to different CUDA streams (e.g., a compute stream
    and a communication stream), the scheduler can overlap their execution, preventing
    tasks from competing for resources and hiding communication latency by running them
    in parallel with functions from other micro-batches.

    Args:
        layer: The transformer layer to build callables for.

    Returns:
        A tuple containing:
        - forward_funcs: List of callable functions for the layer
        - backward_dw: Dict of weight gradient functions for the layer
    """

    is_moe = isinstance(layer.mlp, MoELayer)
    enable_deepep = (
        layer.config.moe_token_dispatcher_type == "flex"
        and layer.config.moe_flex_dispatcher_backend == "deepep"
    )
    enable_hybridep = (
        layer.config.moe_token_dispatcher_type == "flex"
        and layer.config.moe_flex_dispatcher_backend == "hybridep"
    )

    def submodule_attn_forward(node: ScheduleNode, hidden_states: torch.Tensor):
        """
        Performs same attnention forward logic as GPT Model and forward pass for
        computations between attention and dispatch:
            pre mlp layernorm->router->dispatch preprocess
        """

        if (
            isinstance(layer, GraphableMegatronModule)
            and hasattr(layer, 'cuda_graphs')
            and layer.cuda_graphs
        ):
            layer.set_te_cuda_graph_backward_dw_wrapper()
            forward_func = layer._te_cuda_graph_replay
        else:
            # wrapper function that keeps consistent api with cuda graph replay
            def forward_func(
                hidden_states: Tensor,
                attention_mask: Optional[Tensor] = None,
                rotary_pos_emb: Optional[Tensor] = None,
                rotary_pos_cos: Optional[Tensor] = None,
                rotary_pos_sin: Optional[Tensor] = None,
                packed_seq_params: Optional[PackedSeqParams] = None,
                sequence_len_offset: Optional[Tensor] = None,
            ):
                hidden_states, _ = layer._forward_attention(
                    hidden_states=hidden_states,
                    attention_mask=attention_mask,
                    rotary_pos_emb=rotary_pos_emb,
                    rotary_pos_cos=rotary_pos_cos,
                    rotary_pos_sin=rotary_pos_sin,
                    packed_seq_params=packed_seq_params,
                    sequence_len_offset=sequence_len_offset,
                )
                if not isinstance(layer.mlp, MoELayer):
                    return hidden_states, None, None, None
                if layer.recompute_pre_mlp_layernorm:
                    layer.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
                    with off_interface(
                        layer.offload_mlp_norm, hidden_states, "mlp_norm"
                    ) as hidden_states:
                        pre_mlp_layernorm_output = layer.pre_mlp_norm_checkpoint.checkpoint(
                            apply_module(layer.pre_mlp_layernorm), hidden_states
                        )
                else:
                    with off_interface(
                        layer.offload_mlp_norm, hidden_states, "mlp_norm"
                    ) as hidden_states:
                        pre_mlp_layernorm_output = apply_module(layer.pre_mlp_layernorm)(
                            hidden_states
                        )

                shared_expert_output = layer.mlp.shared_experts_compute(pre_mlp_layernorm_output)
                probs, routing_map = layer.mlp.route(pre_mlp_layernorm_output)
                local_tokens, probs = layer.mlp.preprocess(
                    pre_mlp_layernorm_output, probs, routing_map
                )
                return hidden_states, local_tokens, probs, shared_expert_output

        hidden_states, local_tokens, probs, shared_expert_output = forward_func(
            hidden_states=hidden_states,
            attention_mask=node.chunk_state.attention_mask,
            rotary_pos_emb=node.chunk_state.rotary_pos_emb,
            rotary_pos_cos=node.chunk_state.rotary_pos_cos,
            rotary_pos_sin=node.chunk_state.rotary_pos_sin,
            packed_seq_params=node.chunk_state.packed_seq_params,
            sequence_len_offset=node.chunk_state.sequence_len_offset,
        )
        if not isinstance(layer.mlp, MoELayer):
            return hidden_states

        # Detach here for mlp_bda residual connection
        node.layer_state.residual = node.detach(hidden_states)
        if layer.mlp.use_shared_expert and not layer.mlp.shared_expert_overlap:
            # Detach here for shared expert connection in moe_combine
            node.layer_state.shared_expert_output = node.detach(shared_expert_output)

        return local_tokens, probs

    def submodule_dispatch_forward(
        node: ScheduleNode, local_tokens: torch.Tensor, probs: torch.Tensor
    ):
        """
        Dispatches tokens to the experts based on the router output.
        """
        token_dispatcher = layer.mlp.token_dispatcher
        if enable_deepep or enable_hybridep:
            # update token_probs to be the detached version, prevents
            # backward graph from connecting to attn submodule
            token_dispatcher._comm_manager.token_probs = probs

        dispatched_tokens, dispatched_probs = layer.mlp.dispatch(local_tokens, probs)

        # `dispatched_probs` is needed by backward pass of swiglu, therefore it's
        # passed to moe_forward within `layer_state` to avoid the free_input process
        # of the input tensors.
        node.layer_state.dispatched_probs = node.detach(dispatched_probs)
        return dispatched_tokens

    def submodule_moe_forward(node: ScheduleNode, dispatched_tokens: torch.Tensor):
        """
        Run forward pass for computations between dispatch and combine:
            post dispatch->experts->combine preprocess
        """
        dispatched_probs = node.layer_state.dispatched_probs
        token_dispatcher = layer.mlp.token_dispatcher
        if enable_deepep or enable_hybridep:
            # update dispatched_probs to be detached version, prevents
            # backward graph from connecting to dispatch submodule
            token_dispatcher._comm_manager.dispatched_probs = dispatched_probs

        expert_output, _ = layer.mlp.routed_experts_compute(dispatched_tokens, dispatched_probs)

        # For HybridEP, tokens_per_expert is generated on comm stream, as the input to
        # `routed_experts_compute`, a ref is needed to prevent it from being freed.
        if enable_hybridep:
            tokens_per_expert = token_dispatcher._comm_manager.get_number_of_tokens_per_expert()
            node.layer_state.tokens_per_expert = tokens_per_expert

        if layer.recompute_pre_mlp_layernorm:
            # discard the output of the pre-mlp layernorm and register the recompute
            # as a gradient hook of expert_output
            layer.pre_mlp_norm_checkpoint.discard_output_and_register_recompute(expert_output)

        return expert_output

    def submodule_combine_forward(node: ScheduleNode, output: torch.Tensor):
        """
        # Triggers token combine and the remaining computation in the transformer layer.
        # The `mlp_bda` computation is placed after `mlp.combine` due to data dependency.
        # This ordering is also critical for pipeline performance. Starting the `mlp.combine`
        # communication at first allows it to be overlapped with computation from another
        # microbatch. If `mlp_bda` were to run first, it would compete for SM resources
        # with another microbatch's computation and expose the communication.
        """
        residual = node.layer_state.residual
        shared_expert_output = getattr(node.layer_state, 'shared_expert_output', None)
        output = layer.mlp.combine(output)
        output = layer.mlp.postprocess(output, shared_expert_output)

        mlp_output_with_bias = (output, None)
        if hasattr(layer, 'cuda_graphs') and layer.cuda_graphs:
            layer.mlp.cudagraph_tensor_store.clear()
        with layer.bias_dropout_add_exec_handler():
            hidden_states = layer.mlp_bda(layer.training, layer.config.bias_dropout_fusion)(
                mlp_output_with_bias, residual, layer.hidden_dropout
            )
        # Delay the offload of the mlp norm until after the mlp_bda has been computed
        # because the residual is needed in the mlp_bda.
        if layer.offload_mlp_norm:
            hidden_states = off_interface.group_commit(
                hidden_states, name="mlp_norm", forced_released_tensors=[residual]
            )
        output = make_viewless_tensor(
            inp=hidden_states, requires_grad=hidden_states.requires_grad, keep_graph=True
        )

        # Need to record tensors created on comp stream to comm stream
        node.layer_state.residual.record_stream(torch.cuda.current_stream())
        if shared_expert_output is not None:
            shared_expert_output.record_stream(torch.cuda.current_stream())

        # release tensor reference after use
        node.layer_state.residual = None
        node.layer_state.shared_expert_output = None

        # final layer norm from decoder
        final_layernorm = node.chunk_state.model.decoder.final_layernorm
        if not node.is_mtp and final_layernorm and node.is_last_layer:
            output = final_layernorm(output)
            output = make_viewless_tensor(inp=output, requires_grad=True, keep_graph=True)
        return output

    @copy_signature(layer._forward_mlp, handle_first_dst_param='preserve')
    def mlp_wrapper(node: ScheduleNode, *args, **kwargs):
        """Wrapper for Dense forward."""
        return layer._forward_mlp(*args, **kwargs)

    def raise_not_implemented(*args):
        """Raise NotImplementedError for Dense layer."""
        raise NotImplementedError("This callable is not implemented for Dense layer.")

    # Build forward and backward callable functions
    attn_func = submodule_attn_forward
    dispatch_func = submodule_dispatch_forward if is_moe else raise_not_implemented
    mlp_func = submodule_moe_forward if is_moe else mlp_wrapper
    combine_func = submodule_combine_forward if is_moe else raise_not_implemented

    layer.init_backward_dw_wrapper()

    forward_funcs = [attn_func, dispatch_func, mlp_func, combine_func, None]
    backward_dw = {"attn": layer.backward_dw_wrapper, "mlp": layer.mlp}
    return forward_funcs, backward_dw


def build_mtp_layer_callables(layer):
    """Callables for multi-token prediction layer nodes.

    This class contains the callable functions for different types of
    multi-token prediction layer nodes (attention, MLP, etc.)
    """

    forward_funcs, backward_dw = build_transformer_layer_callables(layer.mtp_model_layer)
    attn_forward, dispatch_forward, mlp_forward, combine_forward, _ = forward_funcs
    is_moe = isinstance(layer.mtp_model_layer.mlp, MoELayer)
    assert is_moe, "MTP layer in a2a overlap only supports MoE layer for now."

    def submodule_mtp_attn_forward(node, hidden_states):
        # MTP Block Preprocess
        if node.is_first_layer:
            offset = get_mtp_layer_offset(layer.config, node.chunk_state.model.vp_stage)
            node.chunk_state.mtp_hidden_states = list(torch.chunk(hidden_states, 1 + offset, dim=0))
            hidden_states = node.chunk_state.mtp_hidden_states[offset]

        input_ids, position_ids, decoder_input, hidden_states = layer._get_embeddings(
            input_ids=node.chunk_state.input_ids,
            position_ids=node.chunk_state.position_ids,
            embedding=node.chunk_state.model.embedding,
            hidden_states=hidden_states,
        )
        node.chunk_state.input_ids = input_ids
        node.chunk_state.position_ids = position_ids

        # MTP Layer Preprocess
        # norm, linear projection and transformer
        assert (
            node.chunk_state.context is None
        ), f"multi token prediction + cross attention is not yet supported."
        assert (
            node.chunk_state.packed_seq_params is None
        ), f"multi token prediction + sequence packing is not yet supported."

        if layer.config.sequence_parallel:
            rng_context = tensor_parallel.get_cuda_rng_tracker().fork()
        else:
            rng_context = nullcontext()

        # fp8 context is added in 1f1b schedule, so we don't need to add it here
        with rng_context:
            hidden_states = layer._concat_embeddings(hidden_states, decoder_input)
            return attn_forward(node, hidden_states)

    def submodule_mtp_postprocess_forward(node, hidden_states):
        hidden_states = layer._postprocess(hidden_states)
        node.chunk_state.mtp_hidden_states.append(hidden_states)
        if node.is_last_layer:
            hidden_states = torch.cat(node.chunk_state.mtp_hidden_states, dim=0)
            node.chunk_state.mtp_hidden_states = None
        return hidden_states

    def rng_context_wrapper(func, *args, **kwargs):
        """
        Wrapper to add rng context to submodule callables
        """
        if layer.config.sequence_parallel:
            rng_context = tensor_parallel.get_cuda_rng_tracker().fork()
        else:
            rng_context = nullcontext()
        with rng_context:
            return func(*args, **kwargs)

    # Build forward and backward callable functions
    # attn_forward already has rng context, no need to wrap
    attn_func = submodule_mtp_attn_forward
    dispatch_func = partial(rng_context_wrapper, dispatch_forward)
    mlp_func = partial(rng_context_wrapper, mlp_forward)
    combine_func = partial(rng_context_wrapper, combine_forward)
    mtp_post_process_func = submodule_mtp_postprocess_forward

    forward_funcs = [attn_func, dispatch_func, mlp_func, combine_func, mtp_post_process_func]
    if isinstance(backward_dw["attn"], list):
        backward_dw["attn"].append(layer.eh_proj)
    else:
        backward_dw["attn"] = [backward_dw["attn"], layer.eh_proj]

    return forward_funcs, backward_dw


def build_layer_callables(layer):
    """
    Builds the callable functions(forward and dw) for the given layer.
    For now, 1f1b overlap only support TransformerLayer and MultiTokenPredictionLayer.

    Args:
        layer: The layer to build callables for.

    Returns:
        forward_funcs: list of callable functions for the layer.
        backward_dw: dict of weight gradient functions for the layer.
    """
    if isinstance(layer, TransformerLayer):
        return build_transformer_layer_callables(layer)
    elif isinstance(layer, MultiTokenPredictionLayer):
        return build_mtp_layer_callables(layer)

    raise ValueError(f"Unsupported layer type: {type(layer)}")


================================================
FILE: megatron/core/models/gpt/gpt_layer_specs.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
import warnings
from typing import Optional, Union

from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
from megatron.core.models.backends import (
    BackendSpecProvider,
    InferenceSpecProvider,
    LocalSpecProvider,
)
from megatron.core.models.gpt.moe_module_specs import get_moe_module_spec_for_backend
from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
from megatron.core.transformer.enums import AttnMaskType, LayerType
from megatron.core.transformer.identity_op import IdentityOp
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.transformer.multi_latent_attention import (
    FusedMLASelfAttention,
    MLASelfAttention,
    MLASelfAttentionSubmodules,
)
from megatron.core.transformer.multi_token_prediction import (
    MultiTokenPredictionBlockSubmodules,
    get_mtp_layer_offset,
    get_mtp_layer_spec_for_backend,
    get_mtp_num_layers_to_build,
)
from megatron.core.transformer.pipeline_parallel_layer_layout import PipelineParallelLayerLayout
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.torch_norm import L2Norm
from megatron.core.transformer.transformer_block import (
    TransformerBlockSubmodules,
    get_num_layers_to_build,
)
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.transformer_layer import (
    TransformerLayer,
    TransformerLayerSubmodules,
    get_transformer_layer_offset,
)
from megatron.core.typed_torch import copy_signature
from megatron.core.utils import is_te_min_version

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import TEFusedMLP, TENorm
    from megatron.core.extensions.transformer_engine_spec_provider import TESpecProvider
else:
    TEFusedMLP, TENorm, TESpecProvider = None, None, None

try:
    from megatron.core.extensions.kitchen import HAVE_KITCHEN, KitchenSpecProvider

except ImportError:
    HAVE_KITCHEN = False

try:
    import apex  # type: ignore[import-untyped]  # pylint: disable=unused-import

    from megatron.core.fusions.fused_layer_norm import FusedLayerNorm

    HAVE_APEX = True
    LNImpl = FusedLayerNorm
except ImportError:
    import warnings

    from megatron.core.transformer.torch_norm import WrappedTorchNorm

    warnings.warn("Apex is not installed. Falling back to Torch Norm")
    LNImpl = WrappedTorchNorm
    HAVE_APEX = False


def get_gpt_layer_with_inference_submodules(
    qk_layernorm: Optional[bool] = False,
    multi_latent_attention: Optional[bool] = False,
    qk_l2_norm: Optional[bool] = False,
    num_experts: Optional[int] = None,
    moe_grouped_gemm: Optional[bool] = False,
    moe_use_legacy_grouped_gemm: Optional[bool] = False,
) -> TransformerLayerSubmodules:
    """Use these submodules for inference optimized linear layers.
    Args:
        qk_layernorm (bool, optional): To use layernorm for queries/keys. Defaults to False.
        multi_latent_attention (bool, optional): To use MLA. Defaults to False.
        qk_l2_norm (bool, optional): To use l2 norm for queries/keys. Defaults to False.
    """
    assert HAVE_TE, "--transformer-impl inference_optimized requires transformer engine"
    backend = InferenceSpecProvider()

    mlp = get_mlp_module_spec_for_backend(
        backend=backend,
        num_experts=num_experts,
        moe_grouped_gemm=moe_grouped_gemm,
        use_te_op_fuser=False,
        use_te_activation_func=False,
    )

    if multi_latent_attention:
        assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
        linear_q_up_proj = (
            backend.column_parallel_layer_norm_linear()
            if qk_layernorm
            else backend.column_parallel_linear()
        )
        linear_kv_up_proj = (
            backend.column_parallel_layer_norm_linear()
            if qk_layernorm
            else backend.column_parallel_linear()
        )
        return TransformerLayerSubmodules(
            input_layernorm=backend.layer_norm(has_residual=True),
            self_attention=ModuleSpec(
                module=MLASelfAttention,
                params={"attn_mask_type": AttnMaskType.causal},
                submodules=MLASelfAttentionSubmodules(
                    linear_q_proj=backend.column_parallel_linear(),
                    linear_q_down_proj=backend.linear(),
                    linear_q_up_proj=linear_q_up_proj,
                    linear_kv_down_proj=backend.linear(),
                    linear_kv_up_proj=linear_kv_up_proj,
                    core_attention=backend.core_attention(),
                    linear_proj=backend.row_parallel_linear(),
                    q_layernorm=IdentityOp,
                    kv_layernorm=IdentityOp,
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_mlp_layernorm=IdentityOp,
            mlp=mlp,
            mlp_bda=get_bias_dropout_add,
        )
    else:
        qk_norm = backend.layer_norm(for_qk=True)
        return TransformerLayerSubmodules(
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": AttnMaskType.causal},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=backend.column_parallel_layer_norm_linear(),
                    core_attention=backend.core_attention(),
                    linear_proj=backend.row_parallel_linear(),
                    q_layernorm=(
                        L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
                    ),
                    k_layernorm=(
                        L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
                    ),
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_mlp_layernorm=backend.layer_norm() if num_experts else IdentityOp,
            mlp=mlp,
            mlp_bda=get_bias_dropout_add,
            sharded_state_dict_keys_map={
                "mlp.0.weight": "mlp.linear_fc1.layer_norm_weight",
                "mlp.0.bias": "mlp.linear_fc1.layer_norm_bias",
                "mlp.1.basic_ops.0.weight": "mlp.linear_fc1.weight",
                "mlp.1.basic_ops.1.bias": "mlp.linear_fc1.bias",
                "mlp.3.basic_ops.0.weight": "mlp.linear_fc2.weight",
                "mlp.3.basic_ops.1.bias": "mlp.linear_fc2.bias",
            },
        )


@copy_signature(get_gpt_layer_with_inference_submodules)
def get_gpt_layer_with_inference_spec(*args, **kwargs) -> ModuleSpec:
    """Use this spec to use inference optimized linear layers."""
    return ModuleSpec(
        module=TransformerLayer, submodules=get_gpt_layer_with_inference_submodules(*args, **kwargs)
    )


def get_gpt_layer_with_transformer_engine_submodules(
    num_experts: Optional[int] = None,
    moe_grouped_gemm: Optional[bool] = False,
    qk_layernorm: Optional[bool] = False,
    multi_latent_attention: Optional[bool] = False,
    fp8: Optional[str] = None,  # pylint: disable=unused-argument
    qk_l2_norm: Optional[bool] = False,
    use_te_op_fuser: Optional[bool] = False,
    use_kitchen: bool = False,
    use_te_activation_func: bool = False,
    use_kitchen_attention: bool = False,
    kitchen_attention_backend: str = "sdpa",
    mla_down_proj_fusion: bool = False,
) -> TransformerLayerSubmodules:
    """Use these submodules to use lower-level Transformer Engine modules (required for fp8
    training).


    Args:
        num_experts (int, optional): Number of experts. Defaults to None.
        moe_grouped_gemm (bool, optional): To use Grouped GEMM. Defaults to False.
        qk_layernorm (bool, optional): To use layernorm for queries/keys. Defaults to False.
        multi_latent_attention (bool, optional): To use MLA. Defaults to False.
        fp8 (str, optional): Deprecated. For temporary Nemo compatibility.
        qk_l2_norm (bool, optional): To use l2 norm for queries/keys. Defaults to False.
        use_te_op_fuser (bool, optional): Use Transformer Engine's operation-based API, which may
                                          enable certain operation fusions. Defaults to False.
        mla_down_proj_fusion (bool, optional): Enable fused q/kv down-projection and fused input
                                               layernorm when backend supports. Otherwise fall back
                                               to the unfused MLA.

    Returns:
        TransformerLayerSubmodules: TE modules to construct a TransformerLayer

    """
    if fp8 is not None:
        warnings.warn(
            'The fp8 argument in "get_gpt_layer_with_transformer_engine_spec" has been deprecated'
            " and will be removed soon. Please update your code accordingly."
        )

    if use_kitchen:
        assert HAVE_KITCHEN
        backend: BackendSpecProvider = KitchenSpecProvider(
            fallback=TESpecProvider(),
            use_kitchen_attention=use_kitchen_attention,
            kitchen_attention_backend=kitchen_attention_backend,
        )
        if use_te_op_fuser:
            raise AssertionError("use_te_op_fuser not compatible with using kitchen in mlp.")
        if use_te_activation_func:
            raise AssertionError("use_te_activation_func not compatible with using kitchen.")
    else:
        backend = TESpecProvider()

    mlp = get_mlp_module_spec_for_backend(
        backend=backend,
        num_experts=num_experts,
        moe_grouped_gemm=moe_grouped_gemm,
        use_te_op_fuser=use_te_op_fuser,
        use_te_activation_func=use_te_activation_func,
    )

    if multi_latent_attention:
        assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
        linear_q_up_proj = (
            backend.column_parallel_layer_norm_linear()
            if qk_layernorm
            else backend.column_parallel_linear()
        )
        linear_kv_up_proj = (
            backend.column_parallel_layer_norm_linear()
            if qk_layernorm
            else backend.column_parallel_linear()
        )

        if mla_down_proj_fusion:
            fuse_input_layernorm = backend.column_parallel_layer_norm_linear() is not None
            input_layernorm = IdentityOp if fuse_input_layernorm else backend.layer_norm()
            down_proj_linear = (
                backend.column_parallel_layer_norm_linear()
                if fuse_input_layernorm
                else backend.linear()
            )
            return TransformerLayerSubmodules(
                input_layernorm=input_layernorm,
                self_attention=ModuleSpec(
                    module=FusedMLASelfAttention,
                    params={"attn_mask_type": AttnMaskType.causal},
                    submodules=MLASelfAttentionSubmodules(
                        linear_q_proj=backend.column_parallel_linear(),
                        linear_qkv_down_proj=down_proj_linear,
                        linear_q_up_proj=linear_q_up_proj,
                        linear_kv_up_proj=linear_kv_up_proj,
                        core_attention=backend.core_attention(),
                        linear_proj=backend.row_parallel_linear(),
                        q_layernorm=IdentityOp,
                        kv_layernorm=IdentityOp,
                    ),
                ),
                self_attn_bda=get_bias_dropout_add,
                pre_mlp_layernorm=backend.layer_norm() if num_experts else IdentityOp,
                mlp=mlp,
                mlp_bda=get_bias_dropout_add,
                sharded_state_dict_keys_map=(
                    {
                        "self_attention.linear_q_down_proj.layer_norm_": "input_layernorm.",
                        "self_attention.linear_kv_down_proj.layer_norm_": "input_layernorm.",
                        "self_attention.linear_qkv_down_proj.layer_norm_": "input_layernorm.",
                    }
                    if fuse_input_layernorm
                    else {}
                ),
            )
        return TransformerLayerSubmodules(
            input_layernorm=backend.layer_norm(has_residual=True),
            self_attention=ModuleSpec(
                module=MLASelfAttention,
                params={"attn_mask_type": AttnMaskType.causal},
                submodules=MLASelfAttentionSubmodules(
                    linear_q_proj=backend.column_parallel_linear(),
                    linear_q_down_proj=backend.linear(),
                    linear_q_up_proj=linear_q_up_proj,
                    linear_kv_down_proj=backend.linear(),
                    linear_kv_up_proj=linear_kv_up_proj,
                    core_attention=backend.core_attention(),
                    linear_proj=backend.row_parallel_linear(),
                    q_layernorm=IdentityOp,
                    kv_layernorm=IdentityOp,
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_mlp_layernorm=backend.layer_norm(has_residual=True) if num_experts else IdentityOp,
            mlp=mlp,
            mlp_bda=get_bias_dropout_add,
        )
    else:
        qk_norm = backend.layer_norm(for_qk=True)
        return TransformerLayerSubmodules(
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": AttnMaskType.causal},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=backend.column_parallel_layer_norm_linear(),
                    core_attention=backend.core_attention(),
                    linear_proj=backend.row_parallel_linear(),
                    q_layernorm=(
                        L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
                    ),
                    k_layernorm=(
                        L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
                    ),
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_mlp_layernorm=backend.layer_norm(has_residual=True) if num_experts else IdentityOp,
            mlp=mlp,
            mlp_bda=get_bias_dropout_add,
            sharded_state_dict_keys_map={
                "mlp.0.weight": "mlp.linear_fc1.layer_norm_weight",
                "mlp.0.bias": "mlp.linear_fc1.layer_norm_bias",
                "mlp.1.basic_ops.0.weight": "mlp.linear_fc1.weight",
                "mlp.1.basic_ops.1.bias": "mlp.linear_fc1.bias",
                "mlp.3.basic_ops.0.weight": "mlp.linear_fc2.weight",
                "mlp.3.basic_ops.1.bias": "mlp.linear_fc2.bias",
            },
        )


@copy_signature(get_gpt_layer_with_transformer_engine_submodules)
def get_gpt_layer_with_transformer_engine_spec(*args, **kwargs) -> ModuleSpec:
    """Use this spec to use lower-level Transformer Engine modules (required for fp8 training)."""
    return ModuleSpec(
        module=TransformerLayer,
        submodules=get_gpt_layer_with_transformer_engine_submodules(*args, **kwargs),
    )


def get_gpt_layer_local_submodules(
    num_experts: Optional[int] = None,
    moe_grouped_gemm: Optional[bool] = False,
    qk_layernorm: Optional[bool] = False,
    multi_latent_attention: Optional[bool] = False,
    fp8: Optional[str] = None,  # pylint: disable=unused-argument
    normalization: Optional[str] = None,
    qk_l2_norm: Optional[bool] = False,
    use_kitchen: bool = False,
    use_kitchen_attention: bool = False,
    kitchen_attention_backend: str = "sdpa",
) -> TransformerLayerSubmodules:
    """Use these submodules for an implementation using only modules in Megatron-Core.


    Args:
        num_experts (int, optional): Number of experts. Defaults to None.
        moe_grouped_gemm (bool, optional): To use Grouped GEMM. Defaults to False.
        qk_layernorm (bool, optional): To use layernorm for queries/keys. Defaults to False.
        multi_latent_attention (bool, optional): To use MLA. Defaults to False.
        fp8 (str, optional): Deprecated. For temporary Nemo compatibility.
        qk_l2_norm (bool, optional): To use l2 norm for queries/keys. Defaults to False.

    Returns:
        TransformerLayerSubmodules: Megatron-Core modules to construct a TransformerLayer
    """

    if use_kitchen:
        assert HAVE_KITCHEN
        backend = KitchenSpecProvider(
            fallback=LocalSpecProvider(),
            use_kitchen_attention=use_kitchen_attention,
            kitchen_attention_backend=kitchen_attention_backend,
        )
    else:
        backend = LocalSpecProvider()
    # Adjust for RMS norm.
    if normalization == "RMSNorm":
        layer_norm = backend.layer_norm(rms_norm=True, for_qk=False, has_residual=True)
        qk_norm = backend.layer_norm(rms_norm=True, for_qk=True)
    else:
        layer_norm = backend.layer_norm(rms_norm=False, for_qk=False, has_residual=True)
        qk_norm = backend.layer_norm(rms_norm=False, for_qk=True)

    if fp8 is not None:
        warnings.warn(
            'The fp8 argument in "get_gpt_layer_local_spec" has been deprecated'
            " and will be removed soon. Please update your code accordingly."
        )

    mlp = get_mlp_module_spec_for_backend(
        backend=backend, num_experts=num_experts, moe_grouped_gemm=moe_grouped_gemm
    )

    if multi_latent_attention:
        assert qk_l2_norm is False, "qk_l2_norm is not supported with MLA."
        return TransformerLayerSubmodules(
            input_layernorm=layer_norm,
            self_attention=ModuleSpec(
                module=MLASelfAttention,
                params={"attn_mask_type": AttnMaskType.causal},
                submodules=MLASelfAttentionSubmodules(
                    linear_q_proj=backend.column_parallel_linear(),
                    linear_q_down_proj=backend.column_parallel_linear(),
                    linear_q_up_proj=backend.column_parallel_linear(),
                    linear_kv_down_proj=backend.column_parallel_linear(),
                    linear_kv_up_proj=backend.column_parallel_linear(),
                    core_attention=backend.core_attention(),
                    linear_proj=backend.row_parallel_linear(),
                    q_layernorm=qk_norm if qk_layernorm else IdentityOp,
                    kv_layernorm=qk_norm if qk_layernorm else IdentityOp,
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_mlp_layernorm=layer_norm,
            mlp=mlp,
            mlp_bda=get_bias_dropout_add,
        )
    else:
        return TransformerLayerSubmodules(
            input_layernorm=layer_norm,
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": AttnMaskType.causal},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=backend.column_parallel_linear(),
                    core_attention=backend.core_attention(),
                    linear_proj=backend.row_parallel_linear(),
                    q_layernorm=(
                        L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
                    ),
                    k_layernorm=(
                        L2Norm if qk_l2_norm else (qk_norm if qk_layernorm else IdentityOp)
                    ),
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_mlp_layernorm=layer_norm,
            mlp=mlp,
            mlp_bda=get_bias_dropout_add,
            sharded_state_dict_keys_map={
                "input_layernorm.": "self_attention.linear_qkv.layer_norm_",
                "pre_mlp_layernorm.": "mlp.linear_fc1.layer_norm_",
            },
        )


@copy_signature(get_gpt_layer_local_submodules)
def get_gpt_layer_local_spec(*args, **kwargs) -> ModuleSpec:
    """Use this spec for an implementation using only modules in Megatron-Core."""
    return ModuleSpec(
        module=TransformerLayer, submodules=get_gpt_layer_local_submodules(*args, **kwargs)
    )


def _get_mlp_module_spec(
    use_te: Optional[bool] = True,
    num_experts: Optional[int] = None,
    moe_grouped_gemm: Optional[bool] = False,
    fp8: Optional[str] = None,  # pylint: disable=unused-argument
):
    warnings.warn(
        """This private function is on a deprecation track. Please switch to `get_mlp_module_spec`
        since it will be removed in a future release."""
    )

    return get_mlp_module_spec(
        use_te=use_te, num_experts=num_experts, moe_grouped_gemm=moe_grouped_gemm, fp8=fp8
    )


def get_mlp_module_spec(
    use_te: Optional[bool] = True,
    num_experts: Optional[int] = None,
    moe_grouped_gemm: Optional[bool] = False,
    fp8: Optional[str] = None,  # pylint: disable=unused-argument
    use_te_op_fuser: Optional[bool] = False,
) -> ModuleSpec:
    """Helper function to get module spec for MLP/MoE"""
    if fp8 is not None:
        warnings.warn(
            'The fp8 argument in "_get_mlp_module_spec" has been deprecated'
            " and will be removed soon. Please update your code accordingly."
        )
    if use_te_op_fuser:
        if not is_te_min_version("1.13.0"):
            raise ValueError(
                "Transformer Engine operation-based API requires Transformer Engine 1.13+"
            )
        if num_experts is not None:
            raise ValueError(
                "Transformer Engine operation-based API does not support mixture-of-experts"
            )

    return get_mlp_module_spec_for_backend(
        backend=TESpecProvider() if use_te else LocalSpecProvider(),
        num_experts=num_experts,
        moe_grouped_gemm=moe_grouped_gemm,
        use_te_op_fuser=use_te_op_fuser,
    )


def get_mlp_module_spec_for_backend(
    backend: BackendSpecProvider,
    num_experts: Optional[int] = None,
    moe_grouped_gemm: Optional[bool] = False,
    use_te_op_fuser: Optional[bool] = False,
    use_te_activation_func: bool = False,
) -> ModuleSpec:
    """Helper function to get module spec for MLP/MoE"""

    linear_fc2 = backend.row_parallel_linear()
    activation_func = backend.activation_func() if use_te_activation_func else None

    if num_experts is None:
        # Dense MLP w/ or w/o TE modules.
        module = TEFusedMLP if use_te_op_fuser else MLP
        if backend.fuse_layernorm_and_linear():
            linear_fc1 = backend.column_parallel_layer_norm_linear()
            assert linear_fc1 is not None
        else:
            linear_fc1 = backend.column_parallel_linear()
        return ModuleSpec(
            module=module,
            submodules=MLPSubmodules(
                linear_fc1=linear_fc1, linear_fc2=linear_fc2, activation_func=activation_func
            ),
        )
    else:
        # Mixture of experts with modules in megatron core.
        return get_moe_module_spec_for_backend(
            backend=backend,
            num_experts=num_experts,
            moe_grouped_gemm=moe_grouped_gemm,
            use_te_activation_func=use_te_activation_func,
        )


def get_gpt_decoder_layer_specs(
    config: TransformerConfig,
    use_transformer_engine: bool,
    normalization: Optional[str] = None,
    qk_l2_norm: Optional[bool] = False,
    vp_stage: Optional[int] = None,
    pp_rank: Optional[int] = None,
) -> TransformerBlockSubmodules:
    """GPT block spec."""
    if use_transformer_engine:
        layer_norm_impl = TENorm
        dense_layer_spec = get_gpt_layer_with_transformer_engine_spec(
            num_experts=None,
            moe_grouped_gemm=False,
            qk_layernorm=config.qk_layernorm,
            multi_latent_attention=config.multi_latent_attention,
            qk_l2_norm=qk_l2_norm,
            use_kitchen=config.use_kitchen,
            use_te_activation_func=config.use_te_activation_func,
            use_kitchen_attention=config.use_kitchen_attention,
            kitchen_attention_backend=config.kitchen_attention_backend,
            mla_down_proj_fusion=getattr(config, "mla_down_proj_fusion", False),
        )
        moe_layer_spec = get_gpt_layer_with_transformer_engine_spec(
            num_experts=config.num_moe_experts,
            moe_grouped_gemm=config.moe_grouped_gemm,
            qk_layernorm=config.qk_layernorm,
            multi_latent_attention=config.multi_latent_attention,
            qk_l2_norm=qk_l2_norm,
            use_kitchen=config.use_kitchen,
            use_te_activation_func=config.use_te_activation_func,
            use_kitchen_attention=config.use_kitchen_attention,
            kitchen_attention_backend=config.kitchen_attention_backend,
            mla_down_proj_fusion=getattr(config, "mla_down_proj_fusion", False),
        )
    elif config.transformer_impl == "inference_optimized":
        layer_norm_impl = TENorm
        dense_layer_spec = get_gpt_layer_with_inference_spec(
            qk_layernorm=config.qk_layernorm,
            multi_latent_attention=config.multi_latent_attention,
            qk_l2_norm=qk_l2_norm,
        )
        moe_layer_spec = get_gpt_layer_with_inference_spec(
            qk_layernorm=config.qk_layernorm,
            multi_latent_attention=config.multi_latent_attention,
            qk_l2_norm=qk_l2_norm,
            num_experts=config.num_moe_experts,
            moe_grouped_gemm=config.moe_grouped_gemm,
            moe_use_legacy_grouped_gemm=config.moe_use_legacy_grouped_gemm,
        )
    else:
        layer_norm_impl = LNImpl
        dense_layer_spec = get_gpt_layer_local_spec(
            num_experts=None,
            moe_grouped_gemm=False,
            qk_layernorm=config.qk_layernorm,
            multi_latent_attention=config.multi_latent_attention,
            normalization=normalization,
            qk_l2_norm=qk_l2_norm,
            use_kitchen=config.use_kitchen,
            use_kitchen_attention=config.use_kitchen_attention,
            kitchen_attention_backend=config.kitchen_attention_backend,
        )
        moe_layer_spec = get_gpt_layer_local_spec(
            num_experts=config.num_moe_experts,
            moe_grouped_gemm=config.moe_grouped_gemm,
            qk_layernorm=config.qk_layernorm,
            multi_latent_attention=config.multi_latent_attention,
            normalization=normalization,
            qk_l2_norm=qk_l2_norm,
            use_kitchen=config.use_kitchen,
            use_kitchen_attention=config.use_kitchen_attention,
            kitchen_attention_backend=config.kitchen_attention_backend,
        )

    # Parse config.moe_layer_freq to determine the pattern of expert/dense layers.
    # 0 stands for dense layers, 1 stands for expert layers.
    # For integer N: Creates a pattern with one expert layer every N layers.
    # For string pattern: Evaluates the str directly (e.g. "[1,0,1]" for alternating expert/dense).
    if isinstance(config.moe_layer_freq, int):
        moe_layer_pattern = [
            1 if (i % config.moe_layer_freq == 0) else 0 for i in range(config.num_layers)
        ]
    elif isinstance(config.moe_layer_freq, list):
        moe_layer_pattern = config.moe_layer_freq
        assert len(moe_layer_pattern) == config.num_layers, (
            f"Invalid length of moe_layer_pattern: {len(moe_layer_pattern)}, "
            f"expected {config.num_layers}, "
            f"current moe layer pattern: {config.moe_layer_freq}"
        )
    else:
        raise ValueError(
            f"Invalid moe_layer_freq: {type(config.moe_layer_freq)}, {config.moe_layer_freq}"
        )

    # Create the layer specs for the model.
    layer_specs = []
    for layer_number in range(config.num_layers):
        if moe_layer_pattern[layer_number] == 1:
            layer_specs.append(moe_layer_spec)
        elif moe_layer_pattern[layer_number] == 0:
            layer_specs.append(dense_layer_spec)
        else:
            raise ValueError(f"Invalid layer pattern: {moe_layer_pattern}")

    return layer_specs


def get_gpt_decoder_block_spec(
    config: TransformerConfig,
    use_transformer_engine: bool,
    normalization: Optional[str] = None,
    qk_l2_norm: Optional[bool] = False,
    vp_stage: Optional[int] = None,
    pp_rank: Optional[int] = None,
) -> TransformerBlockSubmodules:
    """GPT block spec."""
    layer_specs = get_gpt_decoder_layer_specs(
        config, use_transformer_engine, normalization, qk_l2_norm
    )
    # Slice the layer specs to only include the layers that are built in this pipeline stage.
    # Note: MCore layer_number starts at 1
    num_layers_to_build = get_num_layers_to_build(config, vp_stage=vp_stage, pp_rank=pp_rank)

    if config.pipeline_model_parallel_layout is not None:
        layout = config.pipeline_model_parallel_layout
        assert isinstance(layout, PipelineParallelLayerLayout)
        local_layer_specs = [
            layer_specs[layer_id]
            for layer_id in layout.get_layer_id_list(
                layer_type=LayerType.decoder, vp_stage=vp_stage, pp_rank=pp_rank
            )
        ]
    else:
        offset = get_transformer_layer_offset(config, vp_stage=vp_stage, pp_rank=pp_rank)
        local_layer_specs = layer_specs[offset : offset + num_layers_to_build]

    if use_transformer_engine:
        layer_norm_impl = TENorm
    elif config.transformer_impl == "inference_optimized":
        layer_norm_impl = TENorm
    else:
        layer_norm_impl = LNImpl
    # Block spec.
    block_spec = TransformerBlockSubmodules(
        layer_specs=local_layer_specs, layer_norm=layer_norm_impl
    )

    return block_spec


def get_gpt_mtp_block_spec(
    config: TransformerConfig,
    spec: Union[TransformerBlockSubmodules, ModuleSpec],
    use_transformer_engine: bool,
    vp_stage: Optional[int] = None,
    pp_rank: Optional[int] = None,
) -> MultiTokenPredictionBlockSubmodules:
    """GPT Multi-Token Prediction (MTP) block spec."""
    if use_transformer_engine:
        backend: BackendSpecProvider = (
            KitchenSpecProvider(
                fallback=TESpecProvider(),
                use_kitchen_attention=config.use_kitchen_attention,
                kitchen_attention_backend=config.kitchen_attention_backend,
            )
            if config.use_kitchen
            else TESpecProvider()
        )
    else:
        backend = (
            KitchenSpecProvider(
                fallback=LocalSpecProvider(),
                use_kitchen_attention=config.use_kitchen_attention,
                kitchen_attention_backend=config.kitchen_attention_backend,
            )
            if config.use_kitchen
            else LocalSpecProvider()
        )
    return get_gpt_mtp_block_spec_for_backend(
        config=config, spec=spec, backend=backend, vp_stage=vp_stage, pp_rank=pp_rank
    )


def get_gpt_mtp_block_spec_for_backend(
    config: TransformerConfig,
    spec: Union[TransformerBlockSubmodules, ModuleSpec],
    backend: BackendSpecProvider,
    vp_stage: Optional[int] = None,
    pp_rank: Optional[int] = None,
) -> MultiTokenPredictionBlockSubmodules:
    """GPT Multi-Token Prediction (MTP) block spec."""
    num_layers_to_build = get_mtp_num_layers_to_build(config, vp_stage=vp_stage, pp_rank=pp_rank)
    if num_layers_to_build == 0:
        return None

    if isinstance(spec, TransformerBlockSubmodules):
        # get the spec for the last layer of decoder block
        transformer_layer_spec = spec.layer_specs[-1]
    elif isinstance(spec, ModuleSpec) and spec.module == TransformerLayer:
        transformer_layer_spec = spec
    else:
        raise ValueError(f"Invalid spec: {spec}")

    mtp_layer_spec = get_mtp_layer_spec_for_backend(
        mtp_model_layer_spec=transformer_layer_spec, backend=backend
    )
    mtp_num_layers = config.mtp_num_layers if config.mtp_num_layers else 0
    mtp_layer_specs = [mtp_layer_spec] * mtp_num_layers

    offset = get_mtp_layer_offset(config, vp_stage=vp_stage)
    # split the mtp layer specs to only include the layers that are built in this pipeline stage.
    mtp_layer_specs = mtp_layer_specs[offset : offset + num_layers_to_build]
    if len(mtp_layer_specs) > 0:
        assert (
            len(mtp_layer_specs) == config.mtp_num_layers
        ), f"currently all of the mtp layers must stage in the same pipeline stage."
        mtp_block_spec = MultiTokenPredictionBlockSubmodules(layer_specs=mtp_layer_specs)
    else:
        mtp_block_spec = None

    return mtp_block_spec


================================================
FILE: megatron/core/models/gpt/gpt_model.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from collections import OrderedDict
from typing import Dict, Literal, Optional

import torch
from torch import Tensor

from megatron.core import tensor_parallel
from megatron.core.config_logger import has_config_logger_enabled, log_config_to_disk
from megatron.core.dist_checkpointing.mapping import ShardedStateDict
from megatron.core.inference.contexts import BaseInferenceContext
from megatron.core.models.common.embeddings import YarnRotaryEmbedding
from megatron.core.models.common.embeddings.language_model_embedding import LanguageModelEmbedding
from megatron.core.models.common.embeddings.rotary_pos_embedding import (
    MultimodalRotaryEmbedding,
    RotaryEmbedding,
)
from megatron.core.models.common.language_module.language_module import LanguageModule
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
    FineGrainedActivationOffloadingInterface as off_interface,
)
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.quantization.utils import get_quant_config_or_none
from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
from megatron.core.transformer.enums import CudaGraphScope, ModelType
from megatron.core.transformer.multi_token_prediction import (
    MultiTokenPredictionBlock,
    mtp_on_this_rank,
    process_mtp_loss,
)
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_block import TransformerBlock
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.utils import (
    WrappedTensor,
    deprecate_inference_params,
    is_using_quantization_scales,
)


class GPTModel(LanguageModule):
    """GPT Transformer language model.

    Args:
        config (TransformerConfig):
            Transformer config
        transformer_layer_spec (ModuleSpec):
            Specifies module to use for transformer layers
        vocab_size (int):
            Vocabulary size
        max_sequence_length (int):
            maximum size of sequence. This is used for positional embedding
        pre_process (bool, optional):
            Include embedding layer (used with pipeline parallelism). Defaults to True.
        post_process (bool, optional):
            Include an output layer (used with pipeline parallelism). Defaults to True.
        fp16_lm_cross_entropy (bool, optional):
            Defaults to False.
        parallel_output (bool, optional):
            Do not gather the outputs, keep them split across tensor
            parallel ranks. Defaults to True.
        share_embeddings_and_output_weights (bool, optional):
            When True, input embeddings and output logit weights are shared. Defaults to False.
        position_embedding_type (Literal[learned_absolute,rope], optional):
            Position embedding type.. Defaults to 'learned_absolute'.
        rotary_percent (float, optional):
            Percent of rotary dimension to use for rotary position embeddings.
            Ignored unless position_embedding_type is 'rope'. Defaults to 1.0.
        rotary_base (int, optional):
            Base period for rotary position embeddings. Ignored unless
            position_embedding_type is 'rope'.
            Defaults to 10000.
        rope_scaling (bool, optional): Toggle RoPE scaling.
        rope_scaling_factor (float): RoPE scaling factor. Default 8.
        scatter_embedding_sequence_parallel (bool, optional):
            Whether embeddings should be scattered across sequence parallel
            region or not. Defaults to True.
        seq_len_interpolation_factor (Optional[float], optional):
            scale of linearly interpolating RoPE for longer sequences.
            The value must be a float larger than 1.0. Defaults to None.
        pg_collection (ProcessGroupCollection): Model communication process groups
    """

    def __init__(
        self,
        config: TransformerConfig,
        transformer_layer_spec: ModuleSpec,
        vocab_size: int,
        max_sequence_length: int,
        pre_process: bool = True,
        post_process: bool = True,
        fp16_lm_cross_entropy: bool = False,
        parallel_output: bool = True,
        share_embeddings_and_output_weights: bool = False,
        position_embedding_type: Literal[
            'learned_absolute', 'rope', 'mrope', 'yarn', 'none'
        ] = 'learned_absolute',
        rotary_percent: float = 1.0,
        rotary_base: int = 10000,
        rope_scaling: bool = False,
        rope_scaling_factor: float = 8.0,
        scatter_embedding_sequence_parallel: bool = True,
        seq_len_interpolation_factor: Optional[float] = None,
        mtp_block_spec: Optional[ModuleSpec] = None,
        pg_collection: Optional[ProcessGroupCollection] = None,
        vp_stage: Optional[int] = None,
    ) -> None:
        super().__init__(config=config, pg_collection=pg_collection)

        if has_config_logger_enabled(config):
            log_config_to_disk(config, locals(), prefix=type(self).__name__)

        self.transformer_layer_spec: ModuleSpec = transformer_layer_spec
        self.vocab_size = vocab_size
        self.max_sequence_length = max_sequence_length
        self.pre_process = pre_process
        self.post_process = post_process
        self.fp16_lm_cross_entropy = fp16_lm_cross_entropy
        self.parallel_output = parallel_output
        self.share_embeddings_and_output_weights = share_embeddings_and_output_weights
        self.vp_stage = vp_stage
        self.disable_param_offloading = True

        if hasattr(self.config, 'position_embedding_type'):
            self.position_embedding_type = self.config.position_embedding_type
        else:
            self.position_embedding_type = position_embedding_type

        # megatron core pipelining currently depends on model type
        # TODO: remove this dependency ?
        self.model_type = ModelType.encoder_or_decoder

        # These 4 attributes are needed for TensorRT-LLM export.
        self.max_position_embeddings = max_sequence_length
        self.rotary_percent = rotary_percent

        if hasattr(self.config, 'rotary_base'):
            self.rotary_base = self.config.rotary_base
        else:
            self.rotary_base = rotary_base
        self.rotary_scaling = rope_scaling
        self.mtp_block_spec = mtp_block_spec
        self.mtp_process = mtp_block_spec is not None and mtp_on_this_rank(
            self.config, ignore_virtual=False, vp_stage=vp_stage
        )

        if self.pre_process or self.mtp_process:
            self.embedding = LanguageModelEmbedding(
                config=self.config,
                vocab_size=self.vocab_size,
                max_sequence_length=self.max_sequence_length,
                position_embedding_type=position_embedding_type,
                scatter_to_sequence_parallel=scatter_embedding_sequence_parallel,
                tp_group=self.pg_collection.tp,
            )

        if self.position_embedding_type == 'rope' and not self.config.multi_latent_attention:
            self.rotary_pos_emb = RotaryEmbedding(
                kv_channels=self.config.kv_channels,
                rotary_percent=rotary_percent,
                rotary_interleaved=self.config.rotary_interleaved,
                seq_len_interpolation_factor=seq_len_interpolation_factor,
                rotary_base=rotary_base,
                rope_scaling=rope_scaling,
                rope_scaling_factor=rope_scaling_factor,
                use_cpu_initialization=self.config.use_cpu_initialization,
                cp_group=self.pg_collection.cp,
            )

        elif self.position_embedding_type == 'yarn':
            self.rotary_pos_emb = YarnRotaryEmbedding(
                kv_channels=self.config.kv_channels,
                rotary_percent=rotary_percent,
                rotary_interleaved=self.config.rotary_interleaved,
                seq_len_interpolation_factor=seq_len_interpolation_factor,
                rotary_base=rotary_base,
                scaling_factor=getattr(self.config, "yarn_rotary_scaling_factor"),
                original_max_position_embeddings=getattr(
                    self.config, "yarn_original_max_position_embeddings"
                ),
                beta_fast=getattr(self.config, "yarn_beta_fast"),
                beta_slow=getattr(self.config, "yarn_beta_slow"),
                mscale=getattr(self.config, "yarn_mscale"),
                mscale_all_dim=getattr(self.config, "yarn_mscale_all_dim"),
                correction_range_round_to_int=getattr(
                    self.config, "yarn_correction_range_round_to_int"
                ),
                use_cpu_initialization=self.config.use_cpu_initialization,
            )
        elif self.position_embedding_type == 'mrope' and not self.config.multi_latent_attention:
            self.rotary_pos_emb = MultimodalRotaryEmbedding(
                kv_channels=self.config.kv_channels,
                rotary_percent=rotary_percent,
                rotary_interleaved=self.config.rotary_interleaved,
                seq_len_interpolation_factor=seq_len_interpolation_factor,
                rotary_base=rotary_base,
            )
            self.mrope_section = self.config.mrope_section
            assert (
                self.mrope_section is not None
            ), "mrope require mrope_section setting, but we got None from TransformerConfig"

        # Cache for RoPE tensors which do not change between iterations.
        self.rotary_pos_emb_cache = {}

        # Transformer.
        self.decoder = TransformerBlock(
            config=self.config,
            spec=transformer_layer_spec,
            pre_process=self.pre_process,
            post_process=self.post_process,
            pg_collection=self.pg_collection,
            vp_stage=vp_stage,
        )

        if self.mtp_process:
            self.mtp = MultiTokenPredictionBlock(
                config=self.config,
                spec=self.mtp_block_spec,
                vp_stage=vp_stage,
                pg_collection=self.pg_collection,
            )

        # Output
        if self.post_process:

            if self.config.defer_embedding_wgrad_compute:
                # The embedding activation buffer preserves a reference to the input activations
                # of the final embedding projection layer GEMM. It will hold the activations for
                # all the micro-batches of a global batch for the last pipeline stage. Once we are
                # done with all the back props for all the microbatches for the last pipeline stage,
                # it will be in the pipeline flush stage. During this pipeline flush we use the
                # input activations stored in embedding activation buffer and gradient outputs
                # stored in gradient buffer to calculate the weight gradients for the embedding
                # final linear layer.
                self.embedding_activation_buffer = []
                self.grad_output_buffer = []
            else:
                self.embedding_activation_buffer = None
                self.grad_output_buffer = None

            self.output_layer = tensor_parallel.ColumnParallelLinear(
                config.hidden_size,
                self.vocab_size,
                config=config,
                init_method=(
                    config.embedding_init_method
                    if config.use_mup and not self.share_embeddings_and_output_weights
                    else config.init_method
                ),
                bias=False,
                skip_bias_add=False,
                gather_output=not self.parallel_output,
                skip_weight_param_allocation=self.pre_process
                and self.share_embeddings_and_output_weights,
                embedding_activation_buffer=self.embedding_activation_buffer,
                grad_output_buffer=self.grad_output_buffer,
                tp_group=self.pg_collection.tp,
            )

        if self.pre_process or self.post_process or self.mtp_process:
            self.setup_embeddings_and_output_layer()

        if has_config_logger_enabled(self.config):
            log_config_to_disk(
                self.config, self.state_dict(), prefix=f'{type(self).__name__}_init_ckpt'
            )
        for name, module in self.named_modules():
            if hasattr(module, 'finish_init'):
                quant_config = get_quant_config_or_none(name, self.config.quant_recipe)
                module.finish_init(quant_config)

    def set_input_tensor(self, input_tensor: Tensor) -> None:
        """Sets input tensor to the model.

        See megatron.model.transformer.set_input_tensor()

        Args:
            input_tensor (Tensor): Sets the input tensor for the model.
        """
        # This is usually handled in schedules.py but some inference code still
        # gives us non-lists or None
        if not isinstance(input_tensor, list):
            input_tensor = [input_tensor]

        assert len(input_tensor) == 1, 'input_tensor should only be length 1 for gpt/bert'
        self.decoder.set_input_tensor(input_tensor[0])

    def _preprocess(
        self,
        input_ids: Tensor,
        position_ids: Tensor,
        decoder_input: Tensor = None,
        inference_context: BaseInferenceContext = None,
        packed_seq_params: PackedSeqParams = None,
        padding_mask: Optional[Tensor] = None,
    ):
        """Preprocesses inputs for the transformer decoder.

        Applies embeddings to input tokens, or uses `decoder_input` from a previous
        pipeline stage. Also sets up rotary positional embeddings.
        """

        # If decoder_input is provided (not None), then input_ids and position_ids are ignored.
        # Otherwise, apply embedding layer on input_ids and position_ids to get decoder_input.

        in_inference_mode = inference_context is not None and not self.training

        # Decoder embedding.
        if decoder_input is not None:
            pass
        elif self.pre_process:
            if padding_mask is not None:
                assert padding_mask.shape == input_ids.shape, (
                    f"padding_mask shape {padding_mask.shape} does not match "
                    f"input_ids shape {input_ids.shape}"
                )
            decoder_input = self.embedding(input_ids=input_ids, position_ids=position_ids)
            if padding_mask is not None and self.config.sequence_parallel:
                padding_mask = (
                    tensor_parallel.scatter_to_sequence_parallel_region(
                        padding_mask.transpose(0, 1).contiguous()
                    )
                    .transpose(0, 1)
                    .contiguous()
                )
        else:
            # intermediate stage of pipeline
            # decoder will get hidden_states from encoder.input_tensor
            decoder_input = None

        # Rotary positional embeddings (embedding is None for PP intermediate devices)
        rotary_pos_emb = None
        rotary_pos_cos = None
        rotary_pos_sin = None
        # this is used to store combined cos/sin embeddings, exclusively for flash infer rope
        rotary_pos_cos_sin = None

        if self.position_embedding_type == 'rope' and not self.config.multi_latent_attention:
            use_flash_infer_fused_rope = (
                hasattr(inference_context, 'use_flashinfer_fused_rope')
                and inference_context.use_flashinfer_fused_rope
            )
            if in_inference_mode and (self.config.flash_decode or use_flash_infer_fused_rope):
                assert (
                    not self.config.flash_decode
                ) or inference_context.is_static_batching(), (
                    "Flash decode is only applicable to static batching."
                )
                # Flash decoding uses precomputed cos and sin for RoPE
                if self.config.flash_decode:
                    rotary_pos_cos, rotary_pos_sin = self.rotary_pos_emb_cache.setdefault(
                        inference_context.max_sequence_length,
                        self.rotary_pos_emb.get_cos_sin(inference_context.max_sequence_length),
                    )
                elif use_flash_infer_fused_rope:
                    assert not self.mtp_process, "MTP not tested with flashinfer_fused_rope"
                    rotary_pos_cos_sin = self.rotary_pos_emb_cache.setdefault(
                        inference_context.max_sequence_length,
                        torch.cat(
                            self.rotary_pos_emb.get_cos_sin(inference_context.max_sequence_length),
                            -1,
                        ),
                    )
            else:
                rotary_seq_len = self.rotary_pos_emb.get_rotary_seq_len(
                    inference_context, self.decoder, decoder_input, self.config, packed_seq_params
                )
                rotary_pos_emb = self.rotary_pos_emb(
                    rotary_seq_len,
                    packed_seq=packed_seq_params is not None
                    and packed_seq_params.qkv_format == 'thd',
                    cp_group=packed_seq_params.cp_group if packed_seq_params is not None else None,
                )
        elif self.position_embedding_type == 'yarn':
            if self.training or not self.config.flash_decode:
                rotary_seq_len = self.rotary_pos_emb.get_rotary_seq_len(
                    inference_context, self.decoder, decoder_input, self.config, packed_seq_params
                )
                rotary_pos_emb, _ = self.rotary_pos_emb(
                    rotary_seq_len,
                    packed_seq=packed_seq_params is not None
                    and packed_seq_params.qkv_format == 'thd',
                    cp_group=packed_seq_params.cp_group if packed_seq_params is not None else None,
                )
            else:
                raise NotImplementedError(
                    "Flash decoding uses precomputed cos and sin for RoPE, not implemented in "
                    "YarnRotaryEmbedding yet."
                )
        elif self.position_embedding_type == 'mrope' and not self.config.multi_latent_attention:
            if self.training or not self.config.flash_decode:
                rotary_pos_emb = self.rotary_pos_emb(
                    position_ids,
                    self.mrope_section,
                    cp_group=packed_seq_params.cp_group if packed_seq_params is not None else None,
                )
            else:
                # Flash decoding uses precomputed cos and sin for RoPE
                raise NotImplementedError(
                    "Flash decoding uses precomputed cos and sin for RoPE, not implemented in "
                    "MultimodalRotaryEmbedding yet."
                )

        if (
            in_inference_mode
            and (
                (
                    self.config.cuda_graph_impl == "local"
                    and CudaGraphScope.full_iteration not in self.config.cuda_graph_scope
                )
                or self.config.flash_decode
            )
            and inference_context.is_static_batching()
        ):
            current_batch_size = input_ids.shape[0]
            sequence_len_offset = torch.tensor(
                [inference_context.sequence_len_offset] * current_batch_size,
                dtype=torch.int32,
                device=torch.cuda.current_device(),
            )
        else:
            sequence_len_offset = None

        if in_inference_mode:
            # Clear the outputs for padding tokens when using dynamic batching with
            # quantization scales to avoid corrupting amax calculations
            if inference_context.is_dynamic_batching() and is_using_quantization_scales(
                self.config
            ):
                decoder_input[inference_context.padding_slice] = 0.0

            # Wrap decoder_input to allow the decoder (TransformerBlock) to delete the
            # reference held by this caller function, enabling early garbage collection for
            # inference. Skip wrapping if decoder_input is logged after decoder completion.
            if not has_config_logger_enabled(self.config):
                decoder_input = WrappedTensor(decoder_input)

        preproc_output = (
            decoder_input,
            rotary_pos_emb,
            rotary_pos_cos,
            rotary_pos_sin,
            sequence_len_offset,
            padding_mask,
        )
        if rotary_pos_cos_sin is not None:
            # only in the case of flashinfer fused rope will we
            # return this extra tensor
            # this is for backwards compatibility with
            # legacy unit tests, which break if you
            # return a 7 tuple instead of 6.
            preproc_output += (rotary_pos_cos_sin,)

        return preproc_output

    def preprocess_for_fine_grained_offloading(self):
        """Preprocess for fine-grained activation offloading."""
        off_interface.init_chunk_handler(
            vp_size=self.config.virtual_pipeline_model_parallel_size,
            vp_stage=self.vp_stage,
            min_offloaded_tensor_size=self.config.min_offloaded_tensor_size,
        )
        if self.disable_param_offloading:
            for param in self.decoder.parameters():
                off_interface.mark_not_offloadable(param)
            if self.mtp_process:
                for param in self.mtp.parameters():
                    off_interface.mark_not_offloadable(param)
            if self.post_process:
                for param in self.output_layer.parameters():
                    off_interface.mark_not_offloadable(param)
            self.disable_param_offloading = False

    def forward(
        self,
        input_ids: Tensor,
        position_ids: Tensor,
        attention_mask: Tensor,
        decoder_input: Tensor = None,
        labels: Tensor = None,
        inference_context: BaseInferenceContext = None,
        packed_seq_params: PackedSeqParams = None,
        extra_block_kwargs: dict = None,
        runtime_gather_output: Optional[bool] = None,
        *,
        inference_params: Optional[BaseInferenceContext] = None,
        loss_mask: Optional[Tensor] = None,
        padding_mask: Optional[Tensor] = None,
        is_spec_decode: Optional[bool] = None,
    ) -> Tensor:
        """Forward function of the GPT Model This function passes the input tensors
        through the embedding layer, and then the decoder and finally into the post
        processing layer (optional).

        It either returns the Loss values if labels are given  or the final hidden units

        Args:
            runtime_gather_output (bool): Gather output at runtime. Default None means
                `parallel_output` arg in the constructor will be used.
            padding_mask (Tensor, optional): Padding mask for MoE routing.
                Shape [bsz, seq_length]. True = padding (exclude), False = valid (include).
                Only used for MoE layers to exclude padding tokens from routing computations.
            is_spec_decode (bool, optional): Explicitly override whether speculative
                decoding is active.  When ``None`` (default) the flag is inferred from
                ``inference_context.num_speculative_tokens``.
        """
        if self.config.fine_grained_activation_offloading:
            self.preprocess_for_fine_grained_offloading()

        inference_context = deprecate_inference_params(inference_context, inference_params)

        preproc_output = self._preprocess(
            input_ids=input_ids,
            position_ids=position_ids,
            decoder_input=decoder_input,
            inference_context=inference_context,
            packed_seq_params=packed_seq_params,
            padding_mask=padding_mask,
        )

        (
            decoder_input,
            rotary_pos_emb,
            rotary_pos_cos,
            rotary_pos_sin,
            sequence_len_offset,
            padding_mask,
        ) = preproc_output[:6]

        rotary_pos_cos_sin = preproc_output[6] if len(preproc_output) == 7 else None

        # Run decoder.
        hidden_states = self.decoder(
            hidden_states=decoder_input,
            attention_mask=attention_mask,
            inference_context=inference_context,
            rotary_pos_emb=rotary_pos_emb,
            rotary_pos_cos=rotary_pos_cos,
            rotary_pos_sin=rotary_pos_sin,
            rotary_pos_cos_sin=rotary_pos_cos_sin,
            packed_seq_params=packed_seq_params,
            sequence_len_offset=sequence_len_offset,
            padding_mask=padding_mask,
            **(extra_block_kwargs or {}),
        )

        return self._postprocess(
            hidden_states=hidden_states,
            input_ids=input_ids,
            position_ids=position_ids,
            labels=labels,
            rotary_pos_emb=rotary_pos_emb,
            rotary_pos_cos=rotary_pos_cos,
            rotary_pos_sin=rotary_pos_sin,
            mtp_in_postprocess=self.mtp_process,
            loss_mask=loss_mask,
            decoder_input=decoder_input,
            attention_mask=attention_mask,
            inference_params=inference_params,
            packed_seq_params=packed_seq_params,
            sequence_len_offset=sequence_len_offset,
            runtime_gather_output=runtime_gather_output,
            extra_block_kwargs=extra_block_kwargs,
            inference_context=inference_context,
            is_spec_decode=is_spec_decode,
        )

    def _postprocess(
        self,
        hidden_states,
        input_ids,
        position_ids,
        labels,
        rotary_pos_emb,
        rotary_pos_cos,
        rotary_pos_sin,
        mtp_in_postprocess=None,
        loss_mask=None,
        decoder_input=None,
        attention_mask=None,
        inference_params=None,
        packed_seq_params=None,
        sequence_len_offset=None,
        runtime_gather_output=None,
        extra_block_kwargs=None,
        inference_context=None,
        is_spec_decode=None,
    ):
        """Postprocesses decoder hidden states to generate logits or compute loss.

        Applies Multi-Token Prediction if enabled, generates output logits through
        the output layer, and computes language model loss when labels are provided.
        """
        in_inference_mode = inference_context is not None and not self.training
        if in_inference_mode:
            assert runtime_gather_output, "Inference must always gather TP logits"

        # Check if speculative decoding is active. When it is, MTP must be
        # computed *after* verification so that it is conditioned on verified
        # tokens rather than stale speculative tokens from the previous step.
        if is_spec_decode is None:
            is_spec_decode = (
                in_inference_mode
                and inference_context.is_dynamic_batching()
                and inference_context.num_speculative_tokens > 0
            )

        # logits and loss
        output_weight = None
        if self.share_embeddings_and_output_weights:
            output_weight = self.shared_embedding_or_output_weight()
        if mtp_in_postprocess and not (in_inference_mode or is_spec_decode):
            hidden_states = self.mtp(
                input_ids=input_ids,
                position_ids=position_ids,
                hidden_states=hidden_states,
                attention_mask=attention_mask,
                inference_params=None,  # MTP layers don't use KV cache
                rotary_pos_emb=rotary_pos_emb,
                rotary_pos_cos=rotary_pos_cos,
                rotary_pos_sin=rotary_pos_sin,
                packed_seq_params=packed_seq_params,
                sequence_len_offset=sequence_len_offset,
                embedding=self.embedding,
                **(extra_block_kwargs or {}),
            )

        if not self.post_process:
            return hidden_states

        if self.config.mtp_num_layers:
            assert self.config.mtp_num_layers > 0
            if in_inference_mode or is_spec_decode:
                # Cache decoder hidden states for serial MTP computation
                # after speculative token verification.
                self._decoder_hidden_states_cache = hidden_states
            else:
                # In training/eval, use the utility function for processing MTP loss/scaling.
                hidden_states = process_mtp_loss(
                    hidden_states=hidden_states,
                    labels=labels,
                    loss_mask=loss_mask,
                    output_layer=self.output_layer,
                    output_weight=output_weight,
                    runtime_gather_output=runtime_gather_output,
                    is_training=self.training,
                    compute_language_model_loss=self.compute_language_model_loss,
                    config=self.config,
                    cp_group=self.pg_collection.cp,
                    packed_seq_params=packed_seq_params,
                    scale_logits_fn=self._scale_logits if self.config.use_mup else None,
                )
        sequence_parallel_override = False

        if in_inference_mode and inference_context.config.materialize_only_last_token_logits:
            if inference_context.is_static_batching():
                hidden_states = hidden_states[-1:, :, :]
            else:
                if self.output_layer.sequence_parallel:
                    # Perform the sequence parallel gather here instead of after the output layer
                    # because we need to slice the last token logits from the full view of the
                    # packed logits across all requests.
                    hidden_states = gather_from_sequence_parallel_region(
                        hidden_states, group=self.pg_collection.tp
                    )
                    self.output_layer.sequence_parallel = False
                    sequence_parallel_override = True

                # Reshape [S, B, H] (with B=1) to [1, S, H] for logit extraction,
                # then back to [S’, B, H] for the output layer.
                reshaped = hidden_states.squeeze(1).unsqueeze(0)
                hidden_states = inference_context.last_token_logits(reshaped).unsqueeze(1)

        logits, _ = self.output_layer(
            hidden_states, weight=output_weight, runtime_gather_output=runtime_gather_output
        )

        # Apply MuP output scaling to logits
        logits = self._scale_logits(logits)

        # Restore sequence parallel execution to the output layer if necessary.
        if sequence_parallel_override:
            assert (
                in_inference_mode
                and inference_context.is_dynamic_batching()
                and inference_context.config.materialize_only_last_token_logits
            )
            self.output_layer.sequence_parallel = True

        if has_config_logger_enabled(self.config):
            payload = OrderedDict(
                {
                    'input_ids': input_ids,
                    'position_ids': position_ids,
                    'attention_mask': attention_mask,
                    'decoder_input': decoder_input,
                    'logits': logits,
                }
            )
            log_config_to_disk(self.config, payload, prefix='input_and_logits')

        if labels is None:
            # [s b h] => [b s h]
            return logits.transpose(0, 1).contiguous()

        loss = self.compute_language_model_loss(labels, logits)

        return loss

    @torch.inference_mode()
    def compute_mtp_single_step(
        self,
        hidden_states: Tensor,
        next_token_ids: Tensor,
        position_ids: Tensor,
        depth: int,
        runtime_gather_output: bool = True,
    ) -> tuple:
        """Compute a single MTP depth for speculative decoding.

        This is called after speculative token verification to compute MTP
        predictions conditioned on verified tokens only.

        Args:
            hidden_states (Tensor): Hidden states at last accepted positions [N, 1, H].
            next_token_ids (Tensor): Correct next token IDs [1, N].
            position_ids (Tensor): Position IDs for the next tokens [1, N].
            depth (int): MTP depth index (0-indexed).
            runtime_gather_output (bool): Whether to gather output across TP.

        Returns:
            tuple: (new_hidden_states [N, 1, H], logits [N, 1, vocab_size]).
        """
        layer_idx = 0 if self.mtp.mtp_use_repeated_layer else depth
        mtp_hidden = self.mtp.layers[layer_idx].forward_single_position(
            hidden_states=hidden_states,
            next_token_ids=next_token_ids,
            position_ids=position_ids,
            embedding=self.embedding,
        )

        output_weight = None
        if self.share_embeddings_and_output_weights:
            output_weight = self.shared_embedding_or_output_weight()

        logits, _ = self.output_layer(
            mtp_hidden, weight=output_weight, runtime_gather_output=runtime_gather_output
        )
        logits = self._scale_logits(logits)

        return mtp_hidden, logits

    def build_schedule_plan(
        self,
        input_ids: Tensor,
        position_ids: Tensor,
        attention_mask: Tensor,
        decoder_input: Tensor = None,
        labels: Tensor = None,
        inference_context: BaseInferenceContext = None,
        packed_seq_params: PackedSeqParams = None,
        extra_block_kwargs: dict = None,
        runtime_gather_output: Optional[bool] = None,
        inference_params: Optional[BaseInferenceContext] = None,
        loss_mask: Optional[Tensor] = None,
        padding_mask: Optional[Tensor] = None,
    ):
        """Builds a computation schedule plan for the model.

        This function creates a schedule plan for a model chunk, including
        preprocessing, transformer layers, and postprocessing.
        The schedule plan is used to optimize computation and memory usage
        in distributed environments.

        Args:
            input_ids (Tensor): Input token IDs.
            position_ids (Tensor): Position IDs.
            attention_mask (Tensor): Attention mask.
            decoder_input (Tensor, optional): Decoder input tensor. Defaults to None.
            labels (Tensor, optional): Labels for loss computation. Defaults to None.
            inference_context (BaseInferenceContext, optional):
                Inference context. Defaults to None.
            packed_seq_params (PackedSeqParams, optional):
                Parameters for packed sequences. Defaults to None.
            extra_block_kwargs (dict, optional):
                Additional keyword arguments for blocks. Defaults to None.
            runtime_gather_output (Optional[bool], optional):
                Whether to gather output at runtime. Defaults to None.
            inference_params (InferenceParams, optional):
                Parameters for inference. Defaults to None.
            loss_mask (Optional[Tensor], optional): Loss mask. Defaults to None.
            padding_mask (Optional[Tensor], optional): Padding mask. Defaults to None.

        Returns:
            TransformerModelChunkSchedulePlan: The model chunk schedule plan.
        """

        if self.config.fine_grained_activation_offloading:
            self.preprocess_for_fine_grained_offloading()

        from ..common.model_chunk_schedule_plan import TransformerModelChunkSchedulePlan

        return TransformerModelChunkSchedulePlan(
            self,
            input_ids,
            position_ids,
            attention_mask,
            decoder_input,
            labels,
            packed_seq_params,
            extra_block_kwargs,
            runtime_gather_output,
            loss_mask,
            padding_mask,
        )

    def sharded_state_dict(
        self, prefix: str = '', sharded_offsets: tuple = (), metadata: Optional[Dict] = None
    ) -> ShardedStateDict:
        """Sharded state dict implementation for GPTModel backward-compatibility.

        Removing extra state.
        Tie word embeddings and output layer in mtp process stage.

        Args:
            prefix (str): Module name prefix.
            sharded_offsets (tuple): PP related offsets, expected to be empty at this module level.
            metadata (Optional[Dict]): metadata controlling sharded state dict creation.

        Returns:
            ShardedStateDict: sharded state dict for the GPTModel
        """
        sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)
        output_layer_extra_state_key = f'{prefix}output_layer._extra_state'

        # Old GPT checkpoints only stored the output layer weight key. So we remove the
        # _extra_state key but check that it doesn't contain any data anyway
        output_extra_state = sharded_state_dict.pop(output_layer_extra_state_key, None)
        assert not (
            output_extra_state and output_extra_state.data
        ), f'Expected output layer extra state to be empty, got: {output_extra_state}'

        return sharded_state_dict


================================================
FILE: megatron/core/models/gpt/heterogeneous/heterogeneous_layer_specs.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import warnings
from typing import Optional

from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
from megatron.core.transformer.dot_product_attention import DotProductAttention
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.heterogeneous.heterogeneous_config import (
    AttentionConfig,
    HeterogeneousTransformerConfig,
    MLPConfig,
    TransformerBlockConfig,
)
from megatron.core.transformer.heterogeneous.linear_replacements import ColumnParallelLinearGathered
from megatron.core.transformer.identity_op import IdentityFuncOp, IdentityOp
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_block import (
    TransformerBlockSubmodules,
    get_num_layers_to_build,
)
from megatron.core.transformer.transformer_layer import (
    TransformerLayer,
    TransformerLayerSubmodules,
    get_transformer_layer_offset,
)
from megatron.core.typed_torch import not_none
from megatron.core.utils import is_te_min_version

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import (
        TEDotProductAttention,
        TELayerNormColumnParallelLinear,
        TENorm,
        TERowParallelLinear,
    )
    from megatron.core.transformer.heterogeneous.linear_replacements import (
        TELayerNormColumnParallelLinearGathered,
    )
else:
    (
        TEDotProductAttention,
        TELayerNormColumnParallelLinear,
        TENorm,
        TERowParallelLinear,
        TELayerNormColumnParallelLinearGathered,
    ) = (None, None, None, None, None)

from megatron.core.transformer.torch_norm import WrappedTorchNorm

try:
    import apex  # pylint: disable=unused-import

    from megatron.core.fusions.fused_layer_norm import FusedLayerNorm

    HAVE_APEX = True
    LNImpl = FusedLayerNorm
except ImportError:
    import warnings

    from megatron.core.transformer.torch_norm import WrappedTorchNorm

    warnings.warn("Apex is not installed. Falling back to Torch Norm")
    LNImpl = WrappedTorchNorm
    HAVE_APEX = False


def _get_layer_norm(config: AttentionConfig | MLPConfig, use_te: bool, normalization: str):
    # RMSNorm is not supported in FusedLayerNorm
    ln_impl = LNImpl if normalization == "LayerNorm" else WrappedTorchNorm

    # We don't use layernorm when the attention/mlp is no-op or
    # when we are using TE (the layernorm is fused with the first linear).
    return IdentityOp if use_te or config.no_op else ln_impl


def _get_qk_layernorm(use_te: bool, normalization: str):
    # RMSNorm is not supported in FusedLayerNorm
    ln_impl = LNImpl if normalization == "LayerNorm" else WrappedTorchNorm

    if use_te:
        if is_te_min_version("1.9.0"):
            # TENorm significantly harms convergence when used
            # for QKLayerNorm if TE Version < 1.9;
            # we instead use the Apex implementation.
            qk_norm = TENorm
        else:
            qk_norm = ln_impl
    else:
        qk_norm = ln_impl

    return qk_norm


def _get_heterogenous_attention_spec(
    attn_config: AttentionConfig, use_te: bool, qk_layernorm: bool, normalization: str
):
    if attn_config.no_op:
        self_attention = ModuleSpec(module=IdentityOp)
    elif attn_config.replace_with_linear:
        self_attention = ModuleSpec(
            module=(
                TELayerNormColumnParallelLinearGathered if use_te else ColumnParallelLinearGathered
            ),
            params={"tp_comm_buffer_name": "linear_attn"},
        )
    else:
        ln = _get_qk_layernorm(use_te, normalization) if qk_layernorm else IdentityOp
        self_attention = ModuleSpec(
            module=SelfAttention,
            params={"attn_mask_type": AttnMaskType.causal},
            submodules=SelfAttentionSubmodules(
                linear_qkv=(
                    not_none(TELayerNormColumnParallelLinear) if use_te else ColumnParallelLinear
                ),
                core_attention=not_none(TEDotProductAttention) if use_te else DotProductAttention,
                linear_proj=TERowParallelLinear if use_te else RowParallelLinear,
                q_layernorm=ln,
                k_layernorm=ln,
            ),
        )
    return self_attention


def _get_heterogenous_mlp_spec(mlp_config: MLPConfig, use_te: bool):
    if mlp_config.no_op:
        mlp = ModuleSpec(module=IdentityOp)
    elif mlp_config.replace_with_linear:
        mlp = ModuleSpec(
            module=(
                TELayerNormColumnParallelLinearGathered if use_te else ColumnParallelLinearGathered
            ),
            params={"tp_comm_buffer_name": "linear_mlp"},
        )
    else:
        mlp = ModuleSpec(
            module=MLP,
            submodules=MLPSubmodules(
                linear_fc1=(
                    not_none(TELayerNormColumnParallelLinear) if use_te else ColumnParallelLinear
                ),
                linear_fc2=not_none(TERowParallelLinear) if use_te else RowParallelLinear,
            ),
        )
    return mlp


def _get_sharded_state_dict_keys_map(block_config: TransformerBlockConfig, use_te: bool):
    """
    Generate a mapping of sharded state dictionary keys.
    Mapping in case of not using Transformer Engine with regular attention and mlp.
    Args:
        block_config (TransformerBlockConfig): The configuration of the transformer block.
        use_te (bool): Flag indicating whether to use Transformer Engine.

    Returns:
        dict: A dictionary mapping sharded state dictionary keys.
    """
    mapping = {}
    if not use_te:
        if block_config.attention.num_query_groups is not None:
            mapping.update({"input_layernorm.": "self_attention.linear_qkv.layer_norm_"})
        if block_config.attention.replace_with_linear:
            mapping.update({"input_layernorm.": "self_attention.layer_norm_"})
        if block_config.mlp.ffn_hidden_size is not None:
            mapping.update({"pre_mlp_layernorm.": "mlp.linear_fc1.layer_norm_"})
        if block_config.mlp.replace_with_linear:
            mapping.update({"pre_mlp_layernorm.": "mlp.layer_norm_"})
    return mapping


def get_gpt_heterogeneous_layer_spec(
    config: HeterogeneousTransformerConfig,
    use_te: bool = False,
    vp_stage: Optional[int] = None,
    pp_rank: Optional[int] = None,
):
    """
    Returns a list of ModuleSpec objects for the transformer layers in the heterogeneous model.

    Args:
        config (HeterogeneousTransformerConfig): Heterogeneous Transformer configuration.
        use_te (bool, optional): To use Transformer-Engine. Defaults to False.
        vp_stage (Optional[int]): Virtual pipeline stage number.
        pp_rank (Optional[int]): Pipeline parallel rank.

    Returns:
        ModuleSpec: Module specification for the transformer layers
    """
    qk_layernorm = config.qk_layernorm
    layer_specs = [
        ModuleSpec(
            module=TransformerLayer,
            submodules=TransformerLayerSubmodules(
                input_layernorm=_get_layer_norm(
                    block_params.attention, use_te, config.normalization
                ),
                self_attention=_get_heterogenous_attention_spec(
                    block_params.attention, use_te, qk_layernorm, config.normalization
                ),
                self_attn_bda=(
                    get_bias_dropout_add if not block_params.attention.no_op else IdentityFuncOp
                ),
                pre_mlp_layernorm=_get_layer_norm(block_params.mlp, use_te, config.normalization),
                mlp=_get_heterogenous_mlp_spec(block_params.mlp, use_te),
                mlp_bda=get_bias_dropout_add if not block_params.mlp.no_op else IdentityFuncOp,
                sharded_state_dict_keys_map=_get_sharded_state_dict_keys_map(block_params, use_te),
            ),
        )
        for block_params in config.per_block_parameters
    ]

    # Slice the layer specs to only include the layers that are built in this pipeline stage.
    # Note: MCore layer_number starts at 1
    offset = get_transformer_layer_offset(config, vp_stage=vp_stage, pp_rank=pp_rank)
    num_layers_to_build = get_num_layers_to_build(config, vp_stage=vp_stage, pp_rank=pp_rank)
    layer_specs = layer_specs[offset : offset + num_layers_to_build]

    # Submodules layer_norm determines the type of layernorm used in the last layernorm
    if use_te:
        layer_norm = TENorm
    else:
        layer_norm = LNImpl if config.normalization == "LayerNorm" else WrappedTorchNorm
    return TransformerBlockSubmodules(layer_specs, layer_norm=layer_norm)


================================================
FILE: megatron/core/models/gpt/moe_module_specs.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

from typing import Optional

from megatron.core.extensions.transformer_engine_spec_provider import TESpecProvider
from megatron.core.models.backends import (
    BackendSpecProvider,
    InferenceSpecProvider,
    LocalSpecProvider,
)
from megatron.core.transformer.mlp import MLPSubmodules
from megatron.core.transformer.moe.moe_layer import MoELayer, MoESubmodules
from megatron.core.transformer.moe.router import InferenceTopKRouter
from megatron.core.transformer.moe.shared_experts import SharedExpertMLP
from megatron.core.transformer.spec_utils import ModuleSpec


def get_moe_module_spec(
    use_te: Optional[bool] = True,
    num_experts: Optional[int] = None,
    moe_grouped_gemm: Optional[bool] = False,
) -> ModuleSpec:
    """Helper function to get module spec for MoE.

    Called by mamba_layer_specs.py for standard (non-inference) MoE specs.
    The GPT layer specs call get_moe_module_spec_for_backend directly.

    Args:
        use_te: Whether to use Transformer Engine.
        num_experts: Number of experts.
        moe_grouped_gemm: Whether to use grouped GEMM.
        moe_use_legacy_grouped_gemm: Whether to use legacy grouped GEMM.
    """
    if use_te is not None and use_te:
        backend: BackendSpecProvider = TESpecProvider()
    else:
        backend = LocalSpecProvider()
    return get_moe_module_spec_for_backend(
        backend=backend, num_experts=num_experts, moe_grouped_gemm=moe_grouped_gemm
    )


def get_moe_module_spec_for_backend(
    backend: BackendSpecProvider,
    num_experts: Optional[int] = None,
    moe_grouped_gemm: Optional[bool] = False,
    use_te_activation_func: bool = False,
) -> ModuleSpec:
    """Helper function to get module spec for MoE"""
    assert num_experts is not None

    linear_fc1 = backend.column_parallel_linear()
    linear_fc2 = backend.row_parallel_linear()
    activation_func = backend.activation_func()

    mlp = MLPSubmodules(
        linear_fc1=linear_fc1, linear_fc2=linear_fc2, activation_func=activation_func
    )

    expert_module, expert_submodule = backend.grouped_mlp_modules(
        moe_grouped_gemm is not None and moe_grouped_gemm
    )
    if expert_submodule is not None:
        expert_submodule.activation_func = activation_func

    experts = ModuleSpec(module=expert_module, submodules=expert_submodule)

    # shared experts spec
    shared_experts = ModuleSpec(module=SharedExpertMLP, submodules=mlp)

    # MoE module spec
    moe_module_spec = ModuleSpec(
        module=MoELayer,
        submodules=MoESubmodules(experts=experts, shared_experts=shared_experts),
        metainfo={"fuse_pre_mlp_layernorm": False},
    )
    return moe_module_spec


def get_inference_optimized_moe_spec() -> ModuleSpec:
    """MoE module spec for inference-optimized transformer impl.

    Uses InferenceSpecProvider to select inference-optimized modules:
    InferenceTopKRouter, InferenceGroupedMLP. MoELayer detects inference mode
    via config.transformer_impl and sets up the inference dispatcher internally.

    Called by mamba_layer_specs.py and gpt_layer_specs.py.
    """
    backend = InferenceSpecProvider()
    activation_func = backend.activation_func()

    expert_module, expert_submodule = backend.grouped_mlp_modules(True)
    if expert_submodule is not None:
        expert_submodule.activation_func = activation_func

    experts = ModuleSpec(module=expert_module, submodules=expert_submodule)
    shared_experts = ModuleSpec(
        module=SharedExpertMLP,
        submodules=MLPSubmodules(
            linear_fc1=backend.column_parallel_linear(),
            linear_fc2=backend.row_parallel_linear(),
            activation_func=activation_func,
        ),
    )

    return ModuleSpec(
        module=MoELayer,
        submodules=MoESubmodules(
            router=InferenceTopKRouter, experts=experts, shared_experts=shared_experts
        ),
        metainfo={"fuse_pre_mlp_layernorm": False},
    )


================================================
FILE: megatron/core/models/huggingface/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
from .module import HuggingFaceModule, build_hf_model


================================================
FILE: megatron/core/models/huggingface/clip_model.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.

from megatron.core.models.huggingface import HuggingFaceModule

try:
    from transformers import AutoModel
    from transformers.models.siglip.modeling_siglip import SiglipEncoderLayer

    HAVE_TRANSFORMERS = True
except ImportError:
    from unittest.mock import MagicMock

    AutoModel = MagicMock()
    SiglipEncoderLayer = MagicMock()

    HAVE_TRANSFORMERS = False


class SiglipHuggingFaceModel(HuggingFaceModule):
    """
    Wrapper for Siglip HuggingFace models.
    """

    # Currently applies to FSDP2 only, not the Megatron FSDP implementation.
    _fsdp_modules = [SiglipEncoderLayer]

    def __init__(self, config):
        if not HAVE_TRANSFORMERS:
            raise ImportError(
                "transformers is required for SiglipHuggingFaceModel, "
                "please install it with `pip install transformers`"
            )

        super().__init__(config)
        self.model = AutoModel.from_pretrained(config.vision_model_type.split("hf://")[1])

    def forward(self, *args, **kwargs):
        """Siglip forward."""
        x = self.model(*args, **kwargs)
        x = x["last_hidden_state"]

        return x


================================================
FILE: megatron/core/models/huggingface/module.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.

import torch

from megatron.core.transformer.module import MegatronModule

try:
    from transformers import AutoConfig, AutoModel

    HAVE_TRANSFORMERS = True
except ImportError:
    HAVE_TRANSFORMERS = False


class HuggingFaceModule(MegatronModule):
    """
    Basic module for huggingface.
    """

    def __init__(self, config):
        super().__init__(config=config)

    def set_input_tensor(self, input_tensor):
        """Dummy function for set_input_tensor"""
        self.input_tensor = input_tensor

    def __setattr__(self, name: str, value):
        """
        Set average_gradients_across_tp_domain attribute true on all params so that during
        finalize_model_grads an all-reduce is performed on this module’s gradients across
        tensor parallel ranks. This keeps replicated weights synchronized and prevents drift
        due to non determinism in HF models producing slightly different grads in replicated
        models on the same inputs.
        """
        super().__setattr__(name, value)

        if isinstance(value, torch.nn.Module):
            for param in value.parameters(recurse=True):
                setattr(param, "average_gradients_across_tp_domain", True)


class AutoHuggingFaceModel(HuggingFaceModule):
    """
    Wrapper for HuggingFace AutoModel
    """

    def __init__(self, config):
        if not HAVE_TRANSFORMERS:
            raise ImportError(
                "transformers is required for AutoHuggingFaceModel, "
                "please install it with `pip install transformers`"
            )

        super().__init__(config)
        self.model = AutoModel.from_pretrained(config.huggingface_model_name_or_path)

    def forward(self, *args, **kwargs):
        """Forward function"""
        return self.model(*args, **kwargs)


def get_hf_model_type(model_path):
    """Get the Huggingface model type."""

    if not HAVE_TRANSFORMERS:
        raise ImportError(
            "transformers is required for get_hf_model_type, "
            "please install it with `pip install transformers`"
        )

    hf_config = AutoConfig.from_pretrained(model_path.split("hf://")[1])
    model_type = hf_config.architectures[0].lower()

    if "qwen" in model_type:
        return "qwen"
    elif "siglip" in model_type:
        return "siglip"
    else:
        raise NotImplementedError(f"unsupported huggingface model {model_type}")


def build_hf_model(config, model_path):
    """Builds Huggingface wrapper model given config and model path."""
    model_type = get_hf_model_type(model_path)

    if "qwen" in model_type:
        from megatron.core.models.huggingface.qwen_model import QwenHuggingFaceModel

        model = QwenHuggingFaceModel(config)
    elif "siglip" in model_type:
        from megatron.core.models.huggingface.clip_model import SiglipHuggingFaceModel

        model = SiglipHuggingFaceModel(config)
    else:
        raise NotImplementedError(f"unsupported huggingface model {config.hf_config}")

    return model


================================================
FILE: megatron/core/models/huggingface/qwen_model.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.

import torch

from megatron.core.models.huggingface import HuggingFaceModule

try:
    from transformers.models.qwen2 import Qwen2ForCausalLM
    from transformers.models.qwen2.modeling_qwen2 import Qwen2DecoderLayer

    HAVE_TRANSFORMERS = True
except ImportError:
    from unittest.mock import MagicMock

    Qwen2ForCausalLM = MagicMock()
    Qwen2DecoderLayer = MagicMock()

    HAVE_TRANSFORMERS = False


class QwenHuggingFaceModel(HuggingFaceModule):
    """
    Wrapper for Qwen LM HuggingFace models.
    """

    # Currently applies to FSDP2 only, not the Megatron FSDP implementation.
    _fsdp_modules = [Qwen2DecoderLayer]

    def __init__(self, config):
        if not HAVE_TRANSFORMERS:
            raise ImportError(
                "transformers is required for QwenHuggingFaceModel, "
                "please install it with `pip install transformers`"
            )

        super().__init__(config)
        self.model = Qwen2ForCausalLM.from_pretrained(config.language_model_type.split("hf://")[1])

    def forward(self, *args, **kwargs):
        """Qwen forward."""
        labels = kwargs["labels"]
        combined_embeddings = kwargs["decoder_input"].permute(1, 0, 2)

        x = self.model(
            position_ids=None,  # uses arange
            attention_mask=kwargs["attention_mask"],  # Typically None -> causal.
            inputs_embeds=combined_embeddings,
        )
        logits = x["logits"]

        if labels is not None:
            loss_fn = torch.nn.CrossEntropyLoss(reduction="none")
            x = loss_fn(logits.permute(0, 2, 1), labels)

        return x

    def embedding(self, input_ids, position_ids=None):
        """Function to run process tokens with input embeddings"""
        return self.model.get_input_embeddings()(input_ids).transpose(1, 0).contiguous()


================================================
FILE: megatron/core/models/mamba/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from .mamba_model import MambaModel


================================================
FILE: megatron/core/models/mamba/mamba_layer_specs.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

from megatron.core.extensions.transformer_engine import (
    TEColumnParallelLinear,
    TEDotProductAttention,
    TELayerNormColumnParallelLinear,
    TENorm,
    TERowParallelLinear,
)
from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
from megatron.core.models.gpt.moe_module_specs import (
    get_inference_optimized_moe_spec,
    get_moe_module_spec,
)
from megatron.core.ssm.mamba_block import MambaStack, MambaStackSubmodules
from megatron.core.ssm.mamba_layer import MambaLayer, MambaLayerSubmodules
from megatron.core.ssm.mamba_mixer import MambaMixer, MambaMixerSubmodules
from megatron.core.ssm.mlp_layer import MLPLayer
from megatron.core.tensor_parallel import (
    InferenceLayerNormColumnParallelLinear,
    InferenceRowParallelLinear,
)
from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.transformer.multi_token_prediction import (
    MultiTokenPredictionBlock,
    MultiTokenPredictionBlockSubmodules,
    MultiTokenPredictionLayer,
    MultiTokenPredictionLayerSubmodules,
)
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_layer import (
    MoETransformerLayer,
    TransformerLayer,
    TransformerLayerSubmodules,
)

# This should be private and should not be used outside of this file.
moe = get_moe_module_spec(
    use_te=True,
    num_experts=8,  # Can be any positive integer (must not be None).
    moe_grouped_gemm=True,
)

# Inference-optimized MoE spec
moe_inference = get_inference_optimized_moe_spec()


# MTP block spec for Mamba - provides norms and projection only.
# Inner layers are built by MultiTokenPredictionLayer using nested MambaStack
_mamba_mtp_block_spec = ModuleSpec(
    module=MultiTokenPredictionBlock,
    submodules=MultiTokenPredictionBlockSubmodules(
        layer_specs=[
            ModuleSpec(
                module=MultiTokenPredictionLayer,
                submodules=MultiTokenPredictionLayerSubmodules(
                    enorm=TENorm,
                    hnorm=TENorm,
                    eh_proj=TEColumnParallelLinear,
                    mtp_model_layer=None,  # Built via pattern + mamba_submodules
                    layer_norm=TENorm,
                ),
            )
        ]
    ),
)


mamba_stack_spec = ModuleSpec(
    module=MambaStack,
    submodules=MambaStackSubmodules(
        mamba_layer=ModuleSpec(
            module=MambaLayer,
            submodules=MambaLayerSubmodules(
                mixer=ModuleSpec(
                    module=MambaMixer,
                    submodules=MambaMixerSubmodules(
                        in_proj=TELayerNormColumnParallelLinear, out_proj=TERowParallelLinear
                    ),
                ),
                mamba_bda=get_bias_dropout_add,
            ),
        ),
        # Started with spec from gpt_layer_specs.py (with MLP removed)
        # Using the TE spec because we had problems getting the non-TE spec
        # working
        attention_layer=ModuleSpec(
            module=TransformerLayer,
            submodules=TransformerLayerSubmodules(
                self_attention=ModuleSpec(
                    module=SelfAttention,
                    params={"attn_mask_type": AttnMaskType.causal},
                    submodules=SelfAttentionSubmodules(
                        linear_qkv=TELayerNormColumnParallelLinear,
                        core_attention=TEDotProductAttention,
                        linear_proj=TERowParallelLinear,
                    ),
                ),
                self_attn_bda=get_bias_dropout_add,
            ),
        ),
        # Started with spec from gpt_layer_specs.py
        # Using the TE spec because we had problems getting the non-TE spec
        # working
        mlp_layer=ModuleSpec(
            module=MLPLayer,
            submodules=TransformerLayerSubmodules(
                mlp=ModuleSpec(
                    module=MLP,
                    submodules=MLPSubmodules(
                        linear_fc1=TELayerNormColumnParallelLinear, linear_fc2=TERowParallelLinear
                    ),
                ),
                mlp_bda=get_bias_dropout_add,
            ),
        ),
        moe_layer=ModuleSpec(
            module=MoETransformerLayer,
            submodules=TransformerLayerSubmodules(
                pre_mlp_layernorm=TENorm, mlp=moe, mlp_bda=get_bias_dropout_add
            ),
        ),
        mtp_block_spec=_mamba_mtp_block_spec,
    ),
)


mamba_inference_stack_spec = ModuleSpec(
    module=MambaStack,
    submodules=MambaStackSubmodules(
        mamba_layer=ModuleSpec(
            module=MambaLayer,
            submodules=MambaLayerSubmodules(
                mixer=ModuleSpec(
                    module=MambaMixer,
                    submodules=MambaMixerSubmodules(
                        in_proj=InferenceLayerNormColumnParallelLinear,
                        out_proj=InferenceRowParallelLinear,
                    ),
                ),
                mamba_bda=get_bias_dropout_add,
            ),
        ),
        # Started with spec from gpt_layer_specs.py (with MLP removed)
        # Using the TE spec because we had problems getting the non-TE spec
        # working
        attention_layer=ModuleSpec(
            module=TransformerLayer,
            submodules=TransformerLayerSubmodules(
                self_attention=ModuleSpec(
                    module=SelfAttention,
                    params={"attn_mask_type": AttnMaskType.causal},
                    submodules=SelfAttentionSubmodules(
                        linear_qkv=InferenceLayerNormColumnParallelLinear,
                        core_attention=TEDotProductAttention,
                        linear_proj=InferenceRowParallelLinear,
                    ),
                ),
                self_attn_bda=get_bias_dropout_add,
            ),
        ),
        # Started with spec from gpt_layer_specs.py
        # Using the TE spec because we had problems getting the non-TE spec
        # working
        mlp_layer=ModuleSpec(
            module=MLPLayer,
            submodules=TransformerLayerSubmodules(
                mlp=ModuleSpec(
                    module=MLP,
                    submodules=MLPSubmodules(
                        linear_fc1=InferenceLayerNormColumnParallelLinear,
                        linear_fc2=InferenceRowParallelLinear,
                    ),
                ),
                mlp_bda=get_bias_dropout_add,
            ),
        ),
        moe_layer=ModuleSpec(
            # Use inference-optimized MoE layer for end-to-end CUDA graph support
            module=TransformerLayer,
            submodules=TransformerLayerSubmodules(
                pre_mlp_layernorm=TENorm, mlp=moe_inference, mlp_bda=get_bias_dropout_add
            ),
        ),
        mtp_block_spec=_mamba_mtp_block_spec,
    ),
)


================================================
FILE: megatron/core/models/mamba/mamba_model.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

import logging
from typing import Literal, Optional

import torch
from torch import Tensor

from megatron.core import tensor_parallel
from megatron.core.config_logger import has_config_logger_enabled, log_config_to_disk
from megatron.core.inference.contexts import BaseInferenceContext
from megatron.core.models.common.embeddings.language_model_embedding import LanguageModelEmbedding
from megatron.core.models.common.embeddings.rotary_pos_embedding import RotaryEmbedding
from megatron.core.models.common.language_module.language_module import LanguageModule
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.quantization.utils import get_quant_config_or_none
from megatron.core.tensor_parallel import gather_from_sequence_parallel_region
from megatron.core.transformer import TransformerConfig
from megatron.core.transformer.enums import ModelType
from megatron.core.transformer.multi_token_prediction import (
    MultiTokenPredictionBlock,
    mtp_on_this_rank,
    process_mtp_loss,
)
from megatron.core.transformer.spec_utils import ModuleSpec, build_module
from megatron.core.utils import (
    WrappedTensor,
    deprecate_inference_params,
    is_using_quantization_scales,
    log_single_rank,
)

logger = logging.getLogger(__name__)


class MambaModel(LanguageModule):
    """Mamba language model.

    Args:
        config (TransformerConfig): Model config
        mamba_stack_spec (ModuleSpec): Specifies the modules to use for the various layer types
        vocab_size (int): Vocabulary size
        max_sequence_length (int): maximum size of sequence.
            This is used for positional embedding
        hybrid_layer_pattern (str): Unified hybrid layer pattern with optional MTP and
            pipeline stage boundaries.
            Format: "<main_pattern>/<mtp_pattern>/<mtp_pattern>/..."
            The main pattern may contain "|" to define pipeline stage boundaries.
            Examples:
                - "M*M*" -> main decoder only, no MTP
                - "M*M*/MM/MM" -> main="M*M*", mtp="MM", 2 depths
                - "M-M-|M-M*-|M-M-|M-M*-" -> 4 pipeline segments
        hybrid_attention_ratio (float, optional): Deprecated. Use hybrid_layer_pattern instead.
            If set to a value > 0.0 and hybrid_layer_pattern is None, a pattern will be
            generated from the ratio with a deprecation warning.
        hybrid_mlp_ratio (float, optional): Deprecated. Use hybrid_layer_pattern instead.
            If set to a value > 0.0 and hybrid_layer_pattern is None, a pattern will be
            generated from the ratio with a deprecation warning.
        hybrid_override_pattern (str, optional): Deprecated. Use hybrid_layer_pattern instead.
            If set and hybrid_layer_pattern is None, the value is copied to hybrid_layer_pattern
            with a deprecation warning.
        pre_process (bool, optional): Include embedding layer
            (used with pipeline parallelism). Defaults to True.
        post_process (bool, optional): Include an output layer (used with pipeline parallelism).
            Defaults to True.
        fp16_lm_cross_entropy (bool, optional): Defaults to False.
        parallel_output (bool, optional): Do not gather the outputs, keep them split across tensor
            parallel ranks. Defaults to True.
        share_embeddings_and_output_weights (bool, optional): When True, input embeddings and
            output logit weights are shared. Defaults to False.
        position_embedding_type (Literal[learned_absolute,rope,none], optional):  Position
            embedding type. Defaults to 'none'.
        rotary_percent (float, optional): Percent of rotary dimension to use for rotary position
            embeddings. Ignored unless position_embedding_type is 'rope'. Defaults to 1.0.
        rotary_base (int, optional): Base period for rotary position embeddings. Ignored unless
            position_embedding_type is 'rope'. Defaults to 10000.
        seq_len_interpolation_factor (Optional[float], optional): scale of linearly
            interpolating RoPE for longer sequences. The value must be a float larger than 1.0.
             Defaults to None.
        pg_collection (ProcessGroupCollection, optional): Model communication process groups.
        vp_stage (Optional[int], optional): Virtual pipeline stage index. Defaults to None.
    """

    def __init__(
        self,
        config: TransformerConfig,
        mamba_stack_spec: ModuleSpec,
        vocab_size: int,
        max_sequence_length: int,
        hybrid_layer_pattern: Optional[str] = None,
        hybrid_attention_ratio: Optional[float] = None,
        hybrid_mlp_ratio: Optional[float] = None,
        hybrid_override_pattern: Optional[str] = None,
        pre_process: bool = True,
        post_process: bool = True,
        fp16_lm_cross_entropy: bool = False,
        parallel_output: bool = True,
        share_embeddings_and_output_weights: bool = False,
        # Mamba with no attention has no need for position embeddings, so none is default
        position_embedding_type: Literal['learned_absolute', 'rope', 'none'] = 'none',
        rotary_percent: float = 1.0,
        rotary_base: int = 10000,
        scatter_embedding_sequence_parallel: bool = True,
        seq_len_interpolation_factor: Optional[float] = None,
        pg_collection: Optional[ProcessGroupCollection] = None,
        vp_stage: Optional[int] = None,
    ) -> None:
        super().__init__(config=config, pg_collection=pg_collection)

        if has_config_logger_enabled(config):
            log_config_to_disk(config, locals(), prefix=type(self).__name__)

        if self.config.use_mup and not getattr(MambaModel, "mup_warning_printed", False):
            log_single_rank(
                logger,
                logging.WARNING,
                "MuP for MambaModel is experimental and not fully validated yet.",
            )
            MambaModel.mup_warning_printed = True

        self.mamba_stack_spec: ModuleSpec = mamba_stack_spec
        self.vocab_size = vocab_size
        self.max_sequence_length = max_sequence_length
        self.hybrid_layer_pattern = hybrid_layer_pattern
        self.pre_process = pre_process
        self.post_process = post_process
        self.fp16_lm_cross_entropy = fp16_lm_cross_entropy
        self.parallel_output = parallel_output
        self.share_embeddings_and_output_weights = share_embeddings_and_output_weights
        self.position_embedding_type = position_embedding_type
        self.vp_stage = vp_stage

        # Backward compatibility for deprecated hybrid parameters
        if hybrid_override_pattern is not None:
            if self.hybrid_layer_pattern is None:
                log_single_rank(
                    logger,
                    logging.WARNING,
                    "hybrid_override_pattern has been deprecated. "
                    "Use hybrid_layer_pattern instead.",
                )
                self.hybrid_layer_pattern = hybrid_override_pattern
            else:
                raise ValueError(
                    "hybrid_override_pattern and hybrid_layer_pattern cannot both be set. "
                    "hybrid_override_pattern has been deprecated; use hybrid_layer_pattern instead."
                )
        if (hybrid_attention_ratio is not None and hybrid_attention_ratio > 0.0) or (
            hybrid_mlp_ratio is not None and hybrid_mlp_ratio > 0.0
        ):
            if hybrid_layer_pattern is not None:
                raise ValueError(
                    "hybrid_layer_pattern cannot be used together with "
                    "hybrid_attention_ratio or hybrid_mlp_ratio. "
                    "These ratios have been deprecated; use hybrid_layer_pattern alone."
                )
            log_single_rank(
                logger,
                logging.WARNING,
                "hybrid_attention_ratio and hybrid_mlp_ratio have been deprecated. "
                "Use hybrid_layer_pattern instead.",
            )
            if self.hybrid_layer_pattern is None:
                from megatron.core.ssm.mamba_hybrid_layer_allocation import pattern_from_ratios

                attn_ratio = hybrid_attention_ratio if hybrid_attention_ratio else 0.0
                mlp_ratio = hybrid_mlp_ratio if hybrid_mlp_ratio else 0.0
                self.hybrid_layer_pattern = pattern_from_ratios(
                    config.num_layers, attn_ratio, mlp_ratio
                )

        # Parse unified pattern to extract main and MTP components, and
        # determine the pipeline segment for this model instance.
        from megatron.core.ssm.mamba_hybrid_layer_allocation import (
            parse_hybrid_pattern,
            select_pipeline_segment,
        )

        parsed = parse_hybrid_pattern(self.hybrid_layer_pattern)
        self.mtp_pattern = parsed.mtp_pattern
        self.mtp_num_depths = parsed.mtp_num_depths

        layer_type_list, layer_offset = select_pipeline_segment(
            parsed.main_pattern or '',
            self.pg_collection.pp,
            vp_stage,
            first_stage_layers=self.config.num_layers_in_first_pipeline_stage,
            last_stage_layers=self.config.num_layers_in_last_pipeline_stage,
        )

        # Determine if MTP is needed (based on pattern parsing)
        self.mtp_process = (
            self.mtp_pattern is not None
            and self.mtp_num_depths > 0
            # The following forces MTP to be on the final pipeline stage. It might be more optimal
            # to split the hybrid layer pattern into pipeline stages before parsing the pattern for
            # the current pipeline stage. This could also enable MTP standalone (MTP in a pipeline
            # stage separate from loss) to be supported in the hybrid model.
            and mtp_on_this_rank(self.config, ignore_virtual=False, vp_stage=self.vp_stage)
        )

        # megatron core pipelining currently depends on model type
        # TODO: remove this dependency ?
        self.model_type = ModelType.encoder_or_decoder

        if self.pre_process or self.mtp_process:
            self.embedding = LanguageModelEmbedding(
                config=self.config,
                vocab_size=self.vocab_size,
                max_sequence_length=self.max_sequence_length,
                position_embedding_type=position_embedding_type,
                scatter_to_sequence_parallel=scatter_embedding_sequence_parallel,
                tp_group=self.pg_collection.tp,
            )

        if self.position_embedding_type == 'rope':
            self.rotary_pos_emb = RotaryEmbedding(
                kv_channels=self.config.kv_channels,
                rotary_percent=rotary_percent,
                seq_len_interpolation_factor=seq_len_interpolation_factor,
                rotary_base=rotary_base,
                use_cpu_initialization=self.config.use_cpu_initialization,
                cp_group=self.pg_collection.cp,
            )

        self.decoder = build_module(
            mamba_stack_spec,
            self.config,
            pre_process=self.pre_process,
            layer_type_list=layer_type_list,
            pp_layer_offset=layer_offset,
            post_process=self.post_process,
            dtype=config.params_dtype,
            pg_collection=self.pg_collection,
        )

        # MTP block - uses mtp_block_spec from mamba_stack_spec.submodules
        if self.mtp_process:
            mamba_submodules = mamba_stack_spec.submodules
            mtp_block_spec = mamba_submodules.mtp_block_spec
            assert mtp_block_spec is not None, (
                "MTP pattern specified but mtp_block_spec is None in mamba_stack_spec.submodules. "
                "Ensure mamba_stack_spec includes mtp_block_spec for MTP support."
            )

            self.mtp = MultiTokenPredictionBlock(
                config=self.config,
                spec=mtp_block_spec,
                pg_collection=self.pg_collection,
                vp_stage=self.vp_stage,
                mtp_layer_pattern=self.mtp_pattern,
                mtp_num_depths=self.mtp_num_depths,
                mamba_submodules=mamba_submodules,
            )

        # Output
        if post_process or self.mtp_process:
            self.output_layer = tensor_parallel.ColumnParallelLinear(
                config.hidden_size,
                self.vocab_size,
                config=config,
                init_method=(
                    config.embedding_init_method
                    if config.use_mup and not self.share_embeddings_and_output_weights
                    else config.init_method
                ),
                bias=False,
                skip_bias_add=False,
                gather_output=not self.parallel_output,
                skip_weight_param_allocation=self.pre_process
                and self.share_embeddings_and_output_weights,
                tp_group=self.pg_collection.tp,
            )

        if self.pre_process or self.post_process or self.mtp_process:
            self.setup_embeddings_and_output_layer()

        for name, module in self.named_modules():
            if hasattr(module, 'finish_init'):
                quant_config = get_quant_config_or_none(name, self.config.quant_recipe)
                module.finish_init(quant_config)

    def set_input_tensor(self, input_tensor: Tensor) -> None:
        """Sets input tensor to the model.

        See megatron.model.transformer.set_input_tensor()

        Args:
            input_tensor (Tensor): Sets the input tensor for the model.
        """
        # This is usually handled in schedules.py but some inference code still
        # gives us non-lists or None
        if not isinstance(input_tensor, list):
            input_tensor = [input_tensor]

        assert len(input_tensor) == 1, 'input_tensor should only be length 1 for gpt/bert'
        self.decoder.set_input_tensor(input_tensor[0])

    def forward(
        self,
        input_ids: Tensor,
        position_ids: Tensor,
        attention_mask: Tensor,
        decoder_input: Tensor = None,
        labels: Tensor = None,
        inference_context: BaseInferenceContext = None,
        runtime_gather_output: Optional[bool] = None,
        *,
        inference_params: Optional[BaseInferenceContext] = None,
        loss_mask: Optional[Tensor] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
        padding_mask: Optional[Tensor] = None,
        is_spec_decode: Optional[bool] = None,
    ) -> Tensor:
        """Forward function of the Mamba model. This function passes the input tensors
        through the embedding layer, and then the decoder and finally into the post
        processing layer (optional).

        It either returns the Loss values if labels are given or the final hidden units
        """
        # If decoder_input is provided (not None), then input_ids and position_ids are ignored.
        # Otherwise, apply embedding layer on input_ids and position_ids to get decoder_input.

        inference_context = deprecate_inference_params(inference_context, inference_params)

        in_inference_mode = inference_context is not None and not self.training

        if in_inference_mode:
            assert runtime_gather_output, "Inference must always gather TP logits"

        # Decoder embedding.
        if decoder_input is not None:
            pass
        elif self.pre_process:
            decoder_input = self.embedding(input_ids=input_ids, position_ids=position_ids)

            # Clear the outputs for padding tokens when using dynamic batching with
            # quantization scales to avoid corrupting amax calculations
            if (
                in_inference_mode
                and inference_context.is_dynamic_batching()
                and is_using_quantization_scales(self.config)
            ):
                decoder_input[inference_context.padding_slice] = 0.0
        else:
            # intermediate stage of pipeline
            # decoder will get hidden_states from encoder.input_tensor
            decoder_input = None

        rotary_pos_emb = None
        if self.position_embedding_type == 'rope':
            rotary_seq_len = self.rotary_pos_emb.get_rotary_seq_len(
                inference_context, self.decoder, decoder_input, self.config, packed_seq_params
            )
            rotary_pos_emb = self.rotary_pos_emb(
                rotary_seq_len,
                packed_seq=packed_seq_params is not None and packed_seq_params.qkv_format == 'thd',
            )

        # Wrap decoder_input to allow the decoder (MambaBlock) to delete the
        # reference held by this caller function, enabling early garbage collection
        # for inference.
        if in_inference_mode:
            decoder_input = WrappedTensor(decoder_input)

        # The following assert will currently fail when running inference.
        # Commented out for now.
        # TODO (duncan/rwaleffe): (1) confirm that the externally-generated
        #   attention mask is not needed and is ignored by the model in
        #   inference mode, (2) reduce the size of the externally-generated
        #   attention mask to prevent CPU OOM (as we did for training), (3)
        #   force the attention mask passed to the model in inference mode to
        #   be None, so this assert will succeed.
        # assert attention_mask is None, "The attention mask is ignored and should be set to None"

        # Run decoder.
        hidden_states = self.decoder(
            hidden_states=decoder_input,
            attention_mask=attention_mask,
            inference_context=inference_context,
            rotary_pos_emb=rotary_pos_emb,
            packed_seq_params=packed_seq_params,
            padding_mask=padding_mask,
        )

        output_weight = None
        if self.share_embeddings_and_output_weights:
            output_weight = self.shared_embedding_or_output_weight()

        # Check if speculative decoding is active. When it is, MTP must be
        # computed *after* verification so that it is conditioned on verified
        # tokens rather than stale speculative tokens from the previous step.
        if is_spec_decode is None:
            is_spec_decode = (
                in_inference_mode
                and inference_context.is_dynamic_batching()
                and inference_context.num_speculative_tokens > 0
            )

        mtp_forward_ran = self.mtp_process and not (in_inference_mode or is_spec_decode)
        if mtp_forward_ran:
            hidden_states = self.mtp(
                input_ids=input_ids,
                position_ids=position_ids,
                hidden_states=hidden_states,
                attention_mask=attention_mask,
                inference_params=inference_params,
                rotary_pos_emb=rotary_pos_emb,
                packed_seq_params=packed_seq_params,
                embedding=self.embedding,
            )

        if not self.post_process:
            return hidden_states

        if self.config.mtp_num_layers is not None and self.mtp_process:
            assert self.config.mtp_num_layers > 0
            if in_inference_mode or is_spec_decode:
                self._decoder_hidden_states_cache = hidden_states
            else:
                hidden_states = process_mtp_loss(
                    hidden_states=hidden_states,
                    labels=labels,
                    loss_mask=loss_mask,
                    output_layer=self.output_layer,
                    output_weight=output_weight,
                    runtime_gather_output=runtime_gather_output,
                    is_training=self.training,
                    compute_language_model_loss=self.compute_language_model_loss,
                    config=self.config,
                    cp_group=self.pg_collection.cp,
                    packed_seq_params=packed_seq_params,
                    scale_logits_fn=self._scale_logits if self.config.use_mup else None,
                )
        sequence_parallel_override = False
        if in_inference_mode and inference_context.config.materialize_only_last_token_logits:
            if inference_context.is_static_batching():
                hidden_states = hidden_states[-1:, :, :]
            else:
                if self.output_layer.sequence_parallel:
                    # Perform the sequence parallel gather here instead of after the output layer
                    # because we need to slice the last token logits from the full view of the
                    # packed logits across all requests.
                    hidden_states = gather_from_sequence_parallel_region(
                        hidden_states, group=self.pg_collection.tp
                    )
                    self.output_layer.sequence_parallel = False
                    sequence_parallel_override = True

                # Reshape [S, B, H] (with B=1) to [1, S, H] for logit extraction,
                # then back to [S', B, H] for the output layer.
                reshaped = hidden_states.squeeze(1).unsqueeze(0)
                hidden_states = inference_context.last_token_logits(reshaped).unsqueeze(1)

        logits, _ = self.output_layer(
            hidden_states, weight=output_weight, runtime_gather_output=runtime_gather_output
        )
        logits = self._scale_logits(logits)

        # Restore sequence parallel execution to the output layer if necessary.
        if sequence_parallel_override:
            assert (
                in_inference_mode
                and inference_context.is_dynamic_batching()
                and inference_context.config.materialize_only_last_token_logits
            )
            self.output_layer.sequence_parallel = True

        if labels is None:
            # [s b h] => [b s h]
            return logits.transpose(0, 1).contiguous()

        loss = self.compute_language_model_loss(labels, logits)

        return loss

    @torch.inference_mode()
    def compute_mtp_single_step(
        self,
        hidden_states: Tensor,
        next_token_ids: Tensor,
        position_ids: Tensor,
        depth: int,
        runtime_gather_output: bool = True,
    ) -> tuple:
        """Compute a single MTP depth for speculative decoding.

        This is called after speculative token verification to compute MTP
        predictions conditioned on verified tokens only.

        Args:
            hidden_states (Tensor): Hidden states at last accepted positions [N, 1, H].
            next_token_ids (Tensor): Correct next token IDs [1, N].
            position_ids (Tensor): Position IDs for the next tokens [1, N].
            depth (int): MTP depth index (0-indexed).
            runtime_gather_output (bool): Whether to gather output across TP.

        Returns:
            tuple: (new_hidden_states [N, 1, H], logits [N, 1, vocab_size]).
        """
        layer_idx = 0 if self.mtp.mtp_use_repeated_layer else depth
        mtp_hidden = self.mtp.layers[layer_idx].forward_single_position(
            hidden_states=hidden_states,
            next_token_ids=next_token_ids,
            position_ids=position_ids,
            embedding=self.embedding,
        )

        output_weight = None
        if self.share_embeddings_and_output_weights:
            output_weight = self.shared_embedding_or_output_weight()

        logits, _ = self.output_layer(
            mtp_hidden, weight=output_weight, runtime_gather_output=runtime_gather_output
        )
        logits = self._scale_logits(logits)

        return mtp_hidden, logits


================================================
FILE: megatron/core/models/mimo/README.md
================================================
# MIMO: Multimodal In/Out Model

## What is MIMO?

MIMO is a model architecture that enables language models to understand and generate multiple modalities (text, images, audio, etc.). It achieves this through:

- A core language model that processes unified embeddings
- Modality-specific submodules that:
  - Encode inputs into embeddings (e.g. image->embeddings)
  - Decode embeddings back to outputs (e.g. embeddings->image)
  - Project between modality and language model spaces
- The MimoModel handles:
  - Aligning modality embeddings at special token positions in the sequence
  - Processing the combined embeddings through the language model

MIMO provides a flexible and canonical architecture that can be configured into various multimodal models, for example

- Vision-Language Models (VLMs)
- Audio-Visual Language Models  
- Multimodal understanding and generation

## How It Works

The model architecture consists of 2 main components:

1) Language model
2) Modality submodules

The complete data flow:

```
Input → Encoder → Projection → Align input embeddings → Language Model → Hidden states for special generation tokens ->  Output Projection → Decoder → Output
```

1. **Encoding**:
   - Modality submodules convert inputs to embeddings (e.g., images → embeddings).
   - The MimoModel aligns all modality embeddings along with text embeddings by token positions.
   - The language model processes the unified embeddings.

2. **Decoding**:
   - We select hidden states that correspond to special modality generation tokens.
   - Modality submodules convert embeddings back to outputs (e.g., embeddings → images).

## Components in Detail

### Language Model

The language model is the core component that processes all modality information in a unified embedding space:

- Acts as the central processor for all modalities through a shared vocabulary
- Processes the combined sequence containing both text and modality tokens

### Modality Submodules

`ModalitySubmodules` connect raw modality data with the language model:

- Each submodule handles **encoding** (modality → embeddings) and **decoding** (embeddings → modality) 
- Manages the **projection** between modality space and language model dimensions

```python
# Base class constructor with named encoders and decoders
class ModalitySubmodules(ABC, nn.Module):
    def __init__(
        self,
        encoders: Optional[Dict[str, nn.Module]] = None,
        decoders: Optional[Dict[str, nn.Module]] = None,
        input_projections: Optional[List[nn.Module]] = None,
        output_projections: Optional[List[nn.Module]] = None,
    ):
```

MIMO provides default implementations (`VisionModalitySubmodules`, `AudioModalitySubmodules`), but you can create custom submodules for specialized processing:

```python
# Custom implementation
class CustomVisionSubmodules(ModalitySubmodules):
    def encode(self, inputs):
        # Specialized encoding logic
        return projected_embeddings

# Use custom submodules when creating the model
model = MimoModel(
    mimo_config,
    modality_submodules={"images": ModuleSpec(module=CustomVisionSubmodules, params={...})}
)
```

### Embedding Alignment

The `MimoModel` handles the integration of different modality embeddings through its `align_embeddings_by_token_positions` method:

- Places modality embeddings at their special token positions in the input sequence
- Handles dimension matching and position tracking for proper embedding placement

Example of what happens internally:
```python
# Inside MimoModel's forward method
aligned_embeddings = self.align_embeddings_by_token_positions(
    modality_embeddings={"text": text_emb, "images": image_emb},
    input_ids=tokens,
    special_token_ids={"images": 32000}
)
```

## Configuration and Usage

### MimoModel Parameters

```python
MimoModel(
    config: MimoModelConfig,    # Required: Configuration for the model
)
```

### Configuration Details

MIMO models are instantiated with a `MimoModelConfig`, which contains:
1. A specification for the language model
2. A dictionary mapping modality names to their submodule specifications

```python
MimoModelConfig(
    language_model: ModuleSpec,                         # Specification for the language model
    modality_submodules: Dict[str, ModuleSpec],         # Dictionary mapping modality names to their submodule specifications
    special_token_ids: Dict[str, int] = {}              # Dictionary mapping modality names to their special token IDs
)
```

### Example: Creating a Vision-Language Model (VLM)

```python
# Language model specification
lm_spec = ModuleSpec(
    module=GPTModel,
    params={
        "config": language_config,
        "transformer_layer_spec": get_mock_language_layer_spec(),
        "vocab_size": 50304,
    }
)

# Vision modality specification
vision_submodule_spec = ModuleSpec(
    module=VisionModalitySubmodules,
    params={
        # Any general parameters for the submodule can go here
    },
    submodules={
        "encoders": {
            "clip_encoder": ModuleSpec(
                module=CLIPViTModel,
                params={
                    "transformer_config": vision_config,
                    "transformer_layer_spec": get_mock_vision_layer_spec(),
                    "patch_dim": 16,
                    "img_h": 224,
                    "img_w": 224,
                }
            ),
        },
        "input_projections": [
            ModuleSpec(
                module=MultimodalProjector,
                params={
                    "config": get_mock_projection_config(),
                    "submodules": get_mock_projection_layer_spec().submodules,
                    "projector_type": "mlp",
                    "input_size": 128
                }
            ),
        ],
    }
)

# Instantiate the model
vlm = MimoModel(
    MimoModelConfig(
        language_model=lm_spec,
        modality_submodules={"images": vision_submodule_spec},
        special_token_ids={"images": 32000}
    )
)
```

### MIMO Forward Method Usage

```python
# Prepare inputs for multiple modalities and encoders
modality_inputs = {
    # modality names and encoder names should match the keys used in mimo config during initialization.
    "images": {
        "clip_encoder": {"pixel_values": images},  # Encoder-specific inputs
        "vit_encoder": {"images": vit_images}
    },
    "audio": {
        "whisper_encoder": {"input_features": audio_features}
    }
}

# Call forward method
outputs, _ = mimo_model(
    input_ids=input_ids,
    position_ids=position_ids,
    modality_inputs=modality_inputs,
)
```


================================================
FILE: megatron/core/models/mimo/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from megatron.core.models.mimo.config.base_configs import MimoModelConfig
from megatron.core.models.mimo.model import MimoModel
from megatron.core.models.mimo.submodules.audio import AudioModalitySubmodules
from megatron.core.models.mimo.submodules.base import ModalitySubmodules
from megatron.core.models.mimo.submodules.vision import VisionModalitySubmodules

__all__ = [
    'MimoModelConfig',
    'MimoModel',
    # Submodule classes
    'ModalitySubmodules',
    'VisionModalitySubmodules',
    'AudioModalitySubmodules',
]


================================================
FILE: megatron/core/models/mimo/config/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from megatron.core.models.mimo.config.base_configs import MimoModelConfig

__all__ = ['MimoModelConfig']


================================================
FILE: megatron/core/models/mimo/config/base_configs.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import warnings
from dataclasses import dataclass, field
from typing import Dict

from megatron.core.transformer.spec_utils import ModuleSpec


@dataclass
class MimoModelConfig:
    """Configuration for a multi-modal model.

    Args:
        language_model_spec (ModuleSpec):
            Specification for the language model
        modality_submodules_spec (Dict[str, ModuleSpec]):
            Dictionary mapping modality names to their submodule specifications
        special_token_ids (Dict[str, int]):
            Dictionary mapping modality names to their special token IDs.
            For example, {"vision": -200, "audio":32000}, these represent placeholders
            in the input_ids to insert the modality embeddings at the correct positions.
        kv_format (str):
            Key-value format for attention: "sbhd" (seq-batch-head-dim) or "thd" (total-head-dim).
            Default is "sbhd".
    """

    warnings.warn(
        "MimoModelConfig is experimental and still under active development. "
        "The API may change without notice in future releases.",
        category=UserWarning,
        stacklevel=2,
    )

    language_model_spec: ModuleSpec = field(default_factory=ModuleSpec)
    modality_submodules_spec: Dict[str, ModuleSpec] = field(default_factory=dict)
    special_token_ids: Dict[str, int] = field(default_factory=dict)
    kv_format: str = "sbhd"


================================================
FILE: megatron/core/models/mimo/model/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
from megatron.core.models.mimo.model.base import MimoModel

__all__ = ['MimoModel']


================================================
FILE: megatron/core/models/mimo/model/base.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import logging
import warnings
from typing import Any, Dict, Optional

import torch  # type: ignore[import-not-found]

from megatron.core.models.mimo.config import MimoModelConfig
from megatron.core.models.mimo.partition.utils import PartitionAdapter, PartitionConfig
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.transformer import MegatronModule
from megatron.core.transformer.spec_utils import build_module

logger = logging.getLogger(__name__)


class MimoModel(MegatronModule):
    """Multimodal In/Out Model supporting arbitrary combinations of modalities.

    .. warning::
        **EXPERIMENTAL**: This class is experimental, still under active development,
        and the API is subject to change without notice. Use at your own risk.

    .. note::
        This implementation is in development and may undergo API changes.


    This model processes multiple modalities (e.g., vision, audio) alongside text,
    combining their embeddings before passing them through a language model.

    Args:
        mimo_config (MimoModelConfig):
            Configuration for the model, including language model and modality submodules
    """

    def __init__(self, mimo_config: MimoModelConfig, cp_group=None, tp_group=None) -> None:
        """Initialize the multimodal model.

        Example:
            ```python
            # Create a model with default configuration
            model = MimoModel(mimo_config)
            ```
        """
        # Initialize with language model's transformer config for MegatronModule compatibility
        super().__init__(mimo_config.language_model_spec.params['config'])

        warnings.warn(
            "MimoModel is experimental and still under active development. "
            "The API may change without notice in future releases.",
            category=UserWarning,
            stacklevel=2,
        )

        self.mimo_config = mimo_config

        # Use special token IDs from the config
        self.special_token_ids = (
            mimo_config.special_token_ids.copy() if mimo_config.special_token_ids else {}
        )

        # Extract language model config for partition adapter
        language_config = mimo_config.language_model_spec.params['config']
        assert (
            language_config.pipeline_model_parallel_size == 1
        ), "Pipeline parallelism is not supported in MimoModel"
        max_seq_len = mimo_config.language_model_spec.params.get('max_sequence_length', 4096)

        self.partition_adapter: Optional[PartitionAdapter] = None
        # Create partition adapter only if parallelism is enabled
        if language_config.context_parallel_size > 1 or language_config.sequence_parallel:
            partition_config = PartitionConfig.from_mp_config(
                mp=language_config,
                max_seq_len=max_seq_len,
                kv_format=mimo_config.kv_format,
                cp_group=cp_group,
                tp_group=tp_group,
            )
            self.partition_adapter = PartitionAdapter(partition_config)

        # Initialize modality submodules from specifications
        self.modality_submodules = torch.nn.ModuleDict()
        self._initialize_submodules()
        self._initialize_language_model()

    def align_embeddings_by_token_positions(
        self,
        modality_embeddings: Dict[str, torch.Tensor],  # [num_embeddings, hidden_dim]
        input_ids: torch.Tensor,  # [bs, seq_len]
        special_token_ids: Dict[str, int],
    ) -> torch.Tensor:
        """Align embeddings from different modalities based on special token positions in input_ids.

        Args:
            modality_embeddings: Dictionary mapping modality names to their embeddings.
                For all modalities: tensor of shape (N, H).
                Shape: (num_tokens_for_modality, hidden_dim)
            input_ids: Input token IDs. Shape: (B, S) or (S,)
                Contains special tokens that mark where each modality's embeddings should go.
                The number of special tokens for each modality should exactly match the number
                of embeddings for that modality.
            special_token_ids: Dictionary mapping modality names to their special token IDs

        Returns:
            Combined embeddings tensor. Shape: (S, B, H)
        """
        # Ensure we have at least one modality
        if not modality_embeddings:
            raise ValueError("No modality embeddings provided. At least one modality is required.")

        logger.debug(f"Merging embeddings for modalities: {list(modality_embeddings.keys())}")

        # Use text embeddings if available, otherwise use any modality
        reference_embeddings = modality_embeddings.get(
            "text", next(iter(modality_embeddings.values()))
        )
        hidden_dim = reference_embeddings.size(-1)
        device = reference_embeddings.device
        dtype = reference_embeddings.dtype

        batch_size, seq_length = input_ids.size()  # input_ids is [B, S]
        logger.debug(
            f"Combined output tensor will have shape: [{seq_length}, {batch_size}, {hidden_dim}]"
        )

        combined_embeddings = torch.zeros(
            (batch_size, seq_length, hidden_dim), dtype=dtype, device=device
        )

        # Process each modality in modality_embeddings
        for modality_name, modality_emb in modality_embeddings.items():
            if modality_name == "text":
                mask = torch.ones_like(input_ids, dtype=torch.bool, device=input_ids.device)
                for token_id in special_token_ids.values():
                    mask &= input_ids != token_id
            elif modality_name in special_token_ids:
                token_id = special_token_ids[modality_name]
                mask = input_ids == token_id
            else:
                raise ValueError(f"No special token ID defined for modality {modality_name}")

            num_tokens = mask.sum().item()
            if num_tokens != modality_emb.size(0):
                raise ValueError(
                    f"Number of {modality_name} tokens ({num_tokens}) does not match "
                    f"number of {modality_name} embeddings ({modality_emb.size(0)})"
                )

            expanded_mask = mask.unsqueeze(-1).expand_as(combined_embeddings)
            combined_embeddings.masked_scatter_(expanded_mask, modality_emb.flatten())

        return combined_embeddings.transpose(0, 1).contiguous()  # [S, B, H]

    def _initialize_submodules(self) -> None:
        """Initialize modality submodules from the ModuleSpec configurations.

        Only modalities present in the config will be instantiated.
        For each modality in the config, builds the corresponding submodule using from_spec.
        """

        for modality_name, submodule_spec in self.mimo_config.modality_submodules_spec.items():
            # Get the submodule class
            submodule_class = submodule_spec.module
            logger.debug(f"Building {modality_name} submodule using {submodule_class.__name__}")

            # Use from_spec to instantiate the submodule
            submodule = submodule_class.from_spec(submodule_spec)
            self.modality_submodules[modality_name] = submodule

    def _initialize_language_model(self) -> None:
        """Initialize the language model."""
        logger.debug(
            f"Building language model using {self.mimo_config.language_model_spec.module.__name__}"
        )
        self.language_model = build_module(self.mimo_config.language_model_spec)

    def set_input_tensor(self, input_tensor):
        """Set input tensor for pipeline parallelism.

        This method is required by Megatron's pipeline parallel mechanism.
        It passes the output tensor from the previous stage as input to this stage.

        Args:
            input_tensor: Tensor or list of tensors passed between pipeline stages

        Returns:
            None
        """
        # Handle case where input_tensor might be a list or a single tensor
        if isinstance(input_tensor, list):
            # For simplicity, just use the first tensor
            input_tensor = input_tensor[0]

        # Pass the input tensor to the language model if it has a set_input_tensor method
        if hasattr(self.language_model, 'set_input_tensor'):
            self.language_model.set_input_tensor(input_tensor)

    def get_text_embeddings(
        self, input_ids: torch.Tensor, position_ids: torch.Tensor, special_token_ids: Dict[str, int]
    ) -> torch.Tensor:
        """Get embeddings for text tokens in the input.
        Args:
            input_ids: Input token IDs. Shape: (B, S)
                Contains text tokens and potentially special tokens for other modalities.
            position_ids: Position IDs corresponding to input tokens, used for positional encoding.
                Shape: (B, S)
            special_token_ids: Dictionary mapping modality names to their special token IDs.
                Used to identify non-text tokens in the input_ids.

        Returns:
            torch.Tensor: Embeddings for text tokens.
            Shape: (N, H), where N is the number of text tokens.
        """
        text_mask = torch.ones_like(input_ids, dtype=torch.bool)  # [b, s]
        for special_token_id in special_token_ids.values():
            text_mask &= input_ids != special_token_id

        batch_idx, seq_idx = text_mask.nonzero(as_tuple=True)
        input_ids_text = input_ids[batch_idx, seq_idx].unsqueeze(0)

        position_ids_text = (
            position_ids[batch_idx, seq_idx].unsqueeze(0) if position_ids is not None else None
        )

        text_embeddings = self.language_model.embedding(
            input_ids=input_ids_text, position_ids=position_ids_text
        ).squeeze(
            1
        )  # Shape: [num_text_tokens, hidden_dim]
        return text_embeddings

    def forward(
        self,
        input_ids: torch.Tensor,
        position_ids: Optional[torch.Tensor] = None,
        attention_mask: Optional[torch.Tensor] = None,
        loss_mask: Optional[torch.Tensor] = None,
        labels: Optional[torch.Tensor] = None,
        modality_inputs: Optional[Dict[str, Dict[str, Any]]] = None,
        packing_kwargs: Optional[dict] = None,
    ):
        """Forward pass through the multimodal model.

        Args:
            input_ids: Input token IDs. Shape: (B, S)
            position_ids: Position IDs. Shape: (B, S)
            attention_mask: Attention mask. Shape: (B, S)
            loss_mask: Loss mask. Shape: (B, S)
            labels: Labels for training. Shape: (B, S)
            modality_inputs: Dictionary mapping modality names to encoder inputs. For example:
                {
                    "images": {
                        "clip_encoder": {"pixel_values": clip_images},
                        "vit_encoder": {"images": vit_images}
                    },
                    "audio": {
                        "whisper_encoder": {"input_features": whisper_features}
                    }
                }
            packing_kwargs: Optional dictionary of kwargs to construct PackedSeqParams
                            if packed_seq_params is not provided. For example:
                                {
                                    "cu_seqlens_q": cu_seqlens,
                                    "cu_seqlens_kv": cu_seqlens,
                                    "cu_seqlens_q_padded": cu_seqlens_padded,
                                    "cu_seqlens_kv_padded": cu_seqlens_padded,
                                    "max_seqlen_q": torch.tensor(
                                        max(seqlens_padded), dtype=torch.int32
                                    ),
                                    "max_seqlen_kv": torch.tensor(
                                        max(seqlens_padded), dtype=torch.int32
                                    ),
                                }

        Returns:
            tuple: Tuple containing model outputs and loss mask
                - lm_output: Model output. Shape: (B, S, ...) or (B, S, V)
                - loss_mask: Loss mask. Shape: (B, S)
        """
        # If packing_kwargs is provided, construct PackedSeqParams
        packed_seq_params = None
        if packing_kwargs is not None:
            # Ensure correct dtype for seqlens tensors
            for key in packing_kwargs:
                if 'cu_seqlens' in key and packing_kwargs[key] is not None:
                    packing_kwargs[key] = packing_kwargs[key].to(dtype=torch.int32)
            packed_seq_params = PackedSeqParams(**packing_kwargs)
            packed_seq_params.qkv_format = 'thd'
            logger.debug(f"Packed sequence parameters: {packed_seq_params}")

        # 1. Process each modality to get embeddings
        modality_embeddings = {}

        for modality_name, submodule in self.modality_submodules.items():
            # Process the modality through its submodule
            if (
                modality_inputs
                and modality_name in modality_inputs
                and modality_inputs[modality_name] is not None
            ):
                logger.debug(f"Processing {modality_name} modality")
                # Get embeddings for this modality
                embeddings = submodule.forward(encoder_inputs=modality_inputs[modality_name])
                if embeddings is not None:
                    # All embeddings are now in the format [num_tokens, hidden_dim]
                    modality_embeddings[modality_name] = embeddings
                    logger.debug(
                        f"Generated embeddings for {modality_name} with shape {embeddings.shape}"
                    )

        # Get text embeddings
        text_embeddings = self.get_text_embeddings(input_ids, position_ids, self.special_token_ids)
        logger.debug(f"Generated text embeddings with shape {text_embeddings.shape}")

        modality_embeddings["text"] = text_embeddings

        # 2. Merge embeddings from different modalities
        logger.debug(f"Merging embeddings from {len(modality_embeddings)} modalities")
        combined_embeddings = self.align_embeddings_by_token_positions(
            modality_embeddings=modality_embeddings,
            input_ids=input_ids,
            special_token_ids=self.special_token_ids,
        )
        logger.debug(f"Combined embeddings shape: {combined_embeddings.shape}")

        # 3. If sharding is needed, apply PartitionAdapter.
        # combined_embeddings is [S, B, H]; transpose to [B, S, H] for shard() which expects
        # batch-first layout (required by get_batch_on_this_cp_rank). After CP sharding each
        # rank holds [B, S/cp, H]; transpose back to [S/cp, B, H] for the language model.
        if self.partition_adapter is not None:
            combined_embeddings = combined_embeddings.transpose(0, 1).contiguous()  # [B, S, H]
            combined_embeddings, labels, loss_mask, _, packed_seq_params = (
                self.partition_adapter.shard(
                    embeddings=combined_embeddings,
                    labels=labels,
                    loss_mask=loss_mask,
                    attention_mask=attention_mask,
                    packed_seq_params=packed_seq_params,
                )
            )
            # shard() returns embeddings in [B, S/cp, H]; transpose to [S/cp, B, H]
            # which is what the language model expects.
            if combined_embeddings is not None:
                combined_embeddings = combined_embeddings.transpose(0, 1).contiguous()

        # 5. Forward pass through language model
        lm_output = self.language_model(
            input_ids=None,
            position_ids=None,
            decoder_input=combined_embeddings,
            labels=labels,
            attention_mask=None,
            packed_seq_params=packed_seq_params,
        )

        logger.debug(f"Language model output shape: {lm_output.shape}")

        return lm_output, loss_mask


================================================
FILE: megatron/core/models/mimo/partition/utils.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
"""Token and weight partitioning helper (CP, TP, SP).

The adapter slices sequences across *context-parallel* ranks and can further
scatter them across *sequence-parallel* ranks when sequence-parallelism is
enabled.
"""
from __future__ import annotations

from dataclasses import dataclass
from typing import Optional, Tuple

import torch  # type: ignore[import-not-found]
from torch.distributed import ProcessGroup  # type: ignore[import-not-found]

from megatron.core import tensor_parallel
from megatron.core.model_parallel_config import ModelParallelConfig
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.parallel_state import get_context_parallel_group, get_tensor_model_parallel_group
from megatron.core.utils import (
    get_batch_on_this_cp_rank,
    get_pg_rank,
    get_pg_size,
    is_te_min_version,
)

try:
    import transformer_engine_torch as tex  # type: ignore

    _HAVE_TEX = True
except ModuleNotFoundError:  # pragma: no cover
    tex = None  # type: ignore
    _HAVE_TEX = False


@dataclass(frozen=True)
class PartitionConfig:
    """Minimal runtime information needed to shard inputs.

    NOTE: Always construct PartitionConfig using the provided classmethod
    (from_mp_config) to ensure all fields, including cp_group and tp_group,
    are set correctly.
    """

    seq_parallel: bool
    use_cp: bool
    tp_comm_overlap: bool
    max_seq_len: int
    kv_format: str = "sbhd"  # "sbhd" | "thd"
    cp_group: Optional[ProcessGroup] = None
    tp_group: Optional[ProcessGroup] = None

    @property
    def is_partitioning_enabled(self) -> bool:
        """Returns True if context parallelism or sequence parallelism is active."""
        return self.use_cp or self.seq_parallel

    @classmethod
    def from_mp_config(
        cls,
        mp: ModelParallelConfig,
        *,
        max_seq_len: int,
        kv_format: str = "sbhd",
        cp_group: Optional[ProcessGroup] = None,
        tp_group: Optional[ProcessGroup] = None,
    ) -> "PartitionConfig":
        """
        Creates a PartitionConfig from a ModelParallelConfig.
        """
        if not isinstance(mp, ModelParallelConfig):
            raise TypeError("mp must be a ModelParallelConfig instance")

        if mp.context_parallel_size > 1 and cp_group is None:
            cp_group = get_context_parallel_group()

        if mp.sequence_parallel and tp_group is None:
            tp_group = get_tensor_model_parallel_group()

        return cls(
            seq_parallel=mp.sequence_parallel,
            use_cp=get_pg_size(cp_group) > 1,
            tp_comm_overlap=mp.tp_comm_overlap,
            max_seq_len=max_seq_len,
            kv_format=kv_format,
            cp_group=cp_group,
            tp_group=tp_group,
        )


class PartitionAdapter:
    """Shard batch-first embeddings & label tensors for Context and Sequence Parallelism."""

    def __init__(self, cfg: PartitionConfig):
        """Initialize the partition adapter.
        Args:
            cfg: PartitionConfig, the configuration for the partition adapter.
        """
        self.cfg = cfg

    def shard(
        self,
        embeddings: torch.Tensor,
        labels: torch.Tensor,
        loss_mask: torch.Tensor,
        attention_mask: torch.Tensor,
        packed_seq_params: Optional[PackedSeqParams] = None,
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Optional[PackedSeqParams]]:
        """
        Apply context parallel (CP) and sequence parallel (SP) sharding to input tensors.

        All input tensors must be in batch-first layout:
            - embeddings: (B, S, H)
            - labels / loss_mask / attention_mask: (B, S)

        After this call embeddings are still in (B, S/cp, H) batch-first layout.
        The caller is responsible for transposing to (S/cp, B, H) if the language model
        requires sequence-first tensors.

        Args:
            embeddings (torch.Tensor):
                Input embeddings tensor. Shape: (B, S, H)
            labels (torch.Tensor):
                Labels tensor. Shape: (B, S)
            loss_mask (torch.Tensor):
                Loss mask tensor. Shape: (B, S)
            attention_mask (torch.Tensor):
                Attention mask tensor. Shape: (B, S)
            packed_seq_params (PackedSeqParams, optional):
                Packed sequence parameters. Defaults to None.

        Returns:
            Tuple containing:
                - embeddings (torch.Tensor): Sharded embeddings. Shape: (B, S/cp, H)
                - labels (torch.Tensor): Possibly sharded labels. Shape: (B, S/cp)
                - loss_mask (torch.Tensor): Possibly sharded loss mask. Shape: (B, S/cp)
                - attention_mask (torch.Tensor): Possibly sharded attention mask. Shape: (B, S/cp)
                - packed_seq_params (PackedSeqParams, optional): Updated packed sequence parameters.
        """
        if not (self.cfg.use_cp or self.cfg.seq_parallel):
            return embeddings, labels, loss_mask, attention_mask, packed_seq_params

        # Sanity-check the sequence length before any sharding happens.
        if embeddings is not None:
            shard_factor = None
            seq_dim = None  # which dimension holds the token sequence

            if self.cfg.use_cp and self.cfg.seq_parallel:
                shard_factor = get_pg_size(self.cfg.tp_group) * get_pg_size(self.cfg.cp_group) * 2
                seq_dim = 1  # embeddings shape: [B, S, H]
            elif self.cfg.use_cp:
                shard_factor = get_pg_size(self.cfg.cp_group) * 2
                seq_dim = 1
            elif self.cfg.seq_parallel:
                shard_factor = get_pg_size(self.cfg.tp_group)
                seq_dim = 0  # embeddings shape: [S, B, H]

            if shard_factor is not None and (
                packed_seq_params is None
                or getattr(packed_seq_params, 'qkv_format', 'sbhd') == 'sbhd'
            ):
                assert embeddings.shape[seq_dim] % shard_factor == 0, (
                    f"Sequence length should be divisible by {shard_factor} "
                    "for Sequence/Context parallelism"
                )

                if self.cfg.seq_parallel and self.cfg.tp_comm_overlap:
                    assert embeddings.shape[seq_dim] == self.cfg.max_seq_len, (
                        "TP Comm overlap requires Vision+Text token length "
                        "== language_max_sequence_length"
                    )

        if self.cfg.use_cp:
            embeddings, labels, loss_mask, attention_mask, packed_seq_params = (
                self._apply_context_parallel(
                    embeddings, labels, loss_mask, attention_mask, packed_seq_params
                )
            )

        if self.cfg.seq_parallel and embeddings is not None:
            embeddings = tensor_parallel.scatter_to_sequence_parallel_region(embeddings)

        return embeddings, labels, loss_mask, attention_mask, packed_seq_params

    def _apply_context_parallel(
        self,
        embeddings: Optional[torch.Tensor],
        labels: Optional[torch.Tensor],
        loss_mask: Optional[torch.Tensor],
        attention_mask: Optional[torch.Tensor],
        packed_seq_params: Optional[PackedSeqParams],
    ) -> Tuple[
        Optional[torch.Tensor],
        Optional[torch.Tensor],
        Optional[torch.Tensor],
        Optional[torch.Tensor],
        Optional[PackedSeqParams],
    ]:
        """
        Apply context parallel (CP) sharding to input tensors.

        Args:
            embeddings (Optional[torch.Tensor]):
                Input embeddings tensor. Shape: (B, S, H)
            labels (Optional[torch.Tensor]):
                Labels tensor. Shape: (B, S)
            loss_mask (Optional[torch.Tensor]):
                Loss mask tensor. Shape: (B, S)
            attention_mask (Optional[torch.Tensor]):
                Attention mask tensor. Shape: (B, S)
            packed_seq_params (PackedSeqParams, optional):
                Packed sequence parameters. Defaults to None.

        Returns:
            Tuple containing:
                - embeddings (Optional[torch.Tensor]): Sharded embeddings. Shape: (B, S/cp, H)
                - labels (Optional[torch.Tensor]): Possibly sharded labels. Shape: (B, S/cp)
                - loss_mask (Optional[torch.Tensor]): Possibly sharded loss mask. Shape: (B, S/cp)
                - attention_mask (Optional[torch.Tensor]): Possibly sharded attention mask.
                                                           Shape: (B, S/cp)
                - packed_seq_params (PackedSeqParams, optional): Updated packed sequence parameters.
        """
        if not self.cfg.use_cp:
            return embeddings, labels, loss_mask, attention_mask, packed_seq_params

        # Distribute sequence across CP ranks
        batch = dict()
        if embeddings is not None:
            batch["embeddings"] = embeddings
        if labels is not None:
            batch["labels"] = labels
        if loss_mask is not None:
            batch["loss_mask"] = loss_mask
        if attention_mask is not None:
            batch["attention_mask"] = attention_mask

        if packed_seq_params is None or getattr(packed_seq_params, 'qkv_format', 'sbhd') == 'sbhd':
            batch = get_batch_on_this_cp_rank(batch)
        else:
            assert _HAVE_TEX and is_te_min_version("1.10.0"), (
                "Please update Transformer Engine to >= 1.10 "
                "to use Context Parallel with THD format data"
            )
            assert self.cfg.cp_group is not None
            cp_size = get_pg_size(self.cfg.cp_group)
            cp_rank = get_pg_rank(self.cfg.cp_group)
            for key, data in batch.items():
                index = tex.thd_get_partitioned_indices(
                    packed_seq_params.cu_seqlens_q_padded, data.size(1), cp_size, cp_rank
                )
                batch[key] = data.index_select(1, index)

        # Extract sharded tensors; embeddings remain in [B, S/cp, H] — the caller
        # is responsible for transposing to [S/cp, B, H] for the language model.
        embeddings = batch.get("embeddings", None)
        labels = batch.get("labels", None)
        loss_mask = batch.get("loss_mask", None)
        attention_mask = batch.get("attention_mask", None)

        return embeddings, labels, loss_mask, attention_mask, packed_seq_params


================================================
FILE: megatron/core/models/mimo/submodules/audio.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import logging
from typing import Any, Dict, List, Optional

import torch
import torch.nn as nn

from megatron.core.models.mimo.submodules.base import ModalitySubmodules

# Initialize logger
logger = logging.getLogger(__name__)


class AudioModalitySubmodules(ModalitySubmodules):
    """Audio modality submodules for encoding, decoding, and projecting audio data."""

    def __init__(
        self,
        encoders: Optional[Dict[str, nn.Module]] = None,
        decoders: Optional[Dict[str, nn.Module]] = None,
        input_projections: Optional[List[nn.Module]] = None,
        output_projections: Optional[List[nn.Module]] = None,
        **kwargs,
    ):
        """Initialize audio modality submodules.

        Args:
            encoders: Dictionary of encoder modules
            decoders: Dictionary of decoder modules
            input_projections: List of input projection modules
            output_projections: List of output projection modules
            **kwargs: Additional keyword arguments
        """
        super().__init__(encoders, decoders, input_projections, output_projections, **kwargs)

        if self.input_projections:
            assert (
                len(self.input_projections) <= 1
            ), "AudioModalitySubmodules currently supports only one input projection"

        if self.output_projections:
            assert (
                len(self.output_projections) <= 1
            ), "AudioModalitySubmodules currently supports only one output projection"

    def encode(self, encoders_data_batch: Dict) -> List[torch.Tensor]:
        """Encode audio data into a sequence of embeddings.

        Args:
            encoders_data_batch: Dictionary containing encoder-specific inputs.
                Keys should match encoder names in self.encoders.
                Each encoder receives its own specific inputs.

        Returns:
            List of encoded audio embeddings, one from each encoder.
            Each embedding is a flattened tensor of shape [total_tokens, hidden_dim]

        Raises:
            ValueError: If no data is provided for any encoder or if there's a parameter mismatch.
        """
        if not encoders_data_batch:
            return []

        embeddings = []

        for name, encoder in self.encoders.items():
            if name not in encoders_data_batch:
                raise ValueError(f"No inputs found for encoder '{name}'")

            encoder_inputs = encoders_data_batch[name]

            # Process inputs through the encoder
            encoder_outputs = encoder(**encoder_inputs)
            logger.debug(f"Encoder '{name}' output shape: {encoder_outputs.shape}")
            if encoder_outputs.ndim == 3:
                # its b,s,h -> we need to flatten it to b*s,h
                encoder_outputs = encoder_outputs.reshape(-1, encoder_outputs.size(-1))
                embeddings.append(encoder_outputs)
            elif encoder_outputs.ndim == 2:
                # its b*s,h -> encoder already returned the flattened output
                embeddings.append(encoder_outputs)
            else:
                raise ValueError(
                    f"Encoder '{name}' output shape {encoder_outputs.shape} is not supported"
                    "Expected 3D (b,s,h) or 2D (b*s,h) tensor, got {encoder_outputs.ndim}D"
                )
        return embeddings

    def decode(self, embeddings: torch.Tensor, data_batch: Dict) -> torch.Tensor:
        """Decode embeddings into audio data."""
        raise NotImplementedError("Audio decoding not implemented yet")

    def combine_embeddings(self, embeddings: List[torch.Tensor]) -> torch.Tensor:
        """Combine embeddings from different encoders."""
        if not embeddings:
            raise ValueError("Cannot combine empty list of embeddings")

        if len(embeddings) == 1:
            return embeddings[0]

        # Concatenate along sequence dimension
        # each embedding is [total_tokens, hidden_dim]
        combined = torch.cat(embeddings, dim=0)
        logger.debug(f"Combined audio embeddings shape: {combined.shape}")
        return combined

    def project_embeddings(
        self, embeddings: List[torch.Tensor], is_input: bool = True
    ) -> torch.Tensor:
        """Project embeddings to the language model dimension space."""

        if is_input:
            embeddings = self.combine_embeddings(embeddings)

        # Get the appropriate projections
        projections = self.input_projections if is_input else self.output_projections

        # Apply projection if available
        if projections:
            # We've asserted in __init__ that there's only one projection
            projection = projections[0]
            projected = projection(embeddings)
            logger.debug(f"Post-projection audio embeddings shape: {projected.shape}")
            return projected

        return embeddings

    def forward(self, encoder_inputs: Dict[str, Any]) -> Optional[torch.Tensor]:
        """Forward pass for audio modality submodules.

        Args:
            encoder_inputs: Dictionary where keys match encoder names in self.encoders
                and values are dictionaries of encoder-specific parameters.
                Example: {
                    "whisper": {"input_features": features},
                    "wav2vec": {"input_values": waveform}
                }

        Returns:
            Flattened audio embeddings with shape [total_embeddings, hidden_dim],
            or None if no valid inputs were provided.
        """

        embeddings = self.encode(encoder_inputs)
        # embeddings is a list of tensors, each tensor is a flattened audio embedding

        # If no embeddings were produced, return None
        if not embeddings:
            return None

        # Project embeddings
        projected = self.project_embeddings(embeddings, is_input=True)
        logger.debug(f"Projected audio embeddings shape: {projected.shape}")
        return projected  # [total_embeddings, hidden_dim]


================================================
FILE: megatron/core/models/mimo/submodules/base.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import logging
import warnings
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional

import torch
import torch.nn as nn

from megatron.core.transformer.spec_utils import ModuleSpec, build_module

# Initialize logger
logger = logging.getLogger(__name__)


class ModalitySubmodules(ABC, nn.Module):
    """Base abstract class for modality-specific submodules.

    Manages encoders, decoders, and projection layers for a specific modality
    in a multi-modal model architecture. Subclasses must implement methods for
    encoding, decoding, combining embeddings, and projecting embeddings.

    .. warning::
        **EXPERIMENTAL**: This class is experimental, still under active development,
        and the API is subject to change without notice. Use at your own risk.

    Args:
        encoders (Dict[str, nn.Module]):
            Dictionary of encoder modules for processing modality inputs
        decoders (Dict[str, nn.Module]):
            Dictionary of decoder modules for generating modality outputs
        input_projections (List[nn.Module]):
            List of projection modules for transforming encoder outputs
        output_projections (List[nn.Module]):
            List of projection modules for transforming decoder inputs
    """

    def __init__(
        self,
        encoders: Optional[Dict[str, nn.Module]] = None,
        decoders: Optional[Dict[str, nn.Module]] = None,
        input_projections: Optional[List[nn.Module]] = None,
        output_projections: Optional[List[nn.Module]] = None,
        **kwargs,
    ) -> None:
        """Initialize the modality submodules."""
        super().__init__()
        self.encoders = nn.ModuleDict(encoders or {})
        self.decoders = nn.ModuleDict(decoders or {})
        self.input_projections = nn.ModuleList(input_projections or [])
        self.output_projections = nn.ModuleList(output_projections or [])

        warnings.warn(
            "ModalitySubmodules is experimental and still under active development. "
            "The API may change without notice in future releases.",
            category=UserWarning,
            stacklevel=2,
        )

    @classmethod
    def from_spec(cls, module_spec: ModuleSpec) -> 'ModalitySubmodules':
        """Create a modality submodule from ModuleSpec configuration.

        Args:
            module_spec (ModuleSpec): The module specification for this modality submodule

        Returns:
            ModalitySubmodules: An instance of the modality submodule
        """
        logger.debug(f"Creating {cls.__name__} from spec")
        params = module_spec.params or {}
        submodules = module_spec.submodules or {}

        # Build component lists from submodules dictionary
        encoders = {}
        if 'encoders' in submodules:
            for encoder_name, encoder_spec in submodules['encoders'].items():
                logger.debug(f"Building {cls.__name__} encoder: {encoder_spec.module.__name__}")
                encoder = build_module(encoder_spec)
                encoders[encoder_name] = encoder

        decoders = {}
        if 'decoders' in submodules:
            for decoder_name, decoder_spec in submodules['decoders'].items():
                logger.debug(f"Building {cls.__name__} decoder: {decoder_spec.module.__name__}")
                decoder = build_module(decoder_spec)
                decoders[decoder_name] = decoder

        input_projections = []
        if 'input_projections' in submodules:
            for proj_spec in submodules['input_projections']:
                logger.debug(
                    f"Building {cls.__name__} input projection: {proj_spec.module.__name__}"
                )
                projection = build_module(proj_spec)
                input_projections.append(projection)

        output_projections = []
        if 'output_projections' in submodules:
            for proj_spec in submodules['output_projections']:
                logger.debug(
                    f"Building {cls.__name__} output projection: {proj_spec.module.__name__}"
                )
                projection = build_module(proj_spec)
                output_projections.append(projection)

        # Pass any additional parameters from the params dictionary
        additional_params = params.copy()
        if additional_params:
            logger.debug(
                f"Using additional parameters for {cls.__name__}: {list(additional_params.keys())}"
            )

        return cls(
            encoders=encoders,
            decoders=decoders,
            input_projections=input_projections,
            output_projections=output_projections,
            **additional_params,
        )

    @abstractmethod
    def combine_embeddings(self, embeddings: List[torch.Tensor]) -> torch.Tensor:
        """Combine multiple embeddings from different encoders.

        Args:
            embeddings (List[torch.Tensor]):
                List of embeddings to combine

        Returns:
            torch.Tensor: Combined embedding tensor
        """
        pass

    @abstractmethod
    def encode(self, data_batch: Dict) -> List[torch.Tensor]:
        """Encode data batch into a list of tensors.

        Args:
            data_batch (Dict):
                Dictionary containing input data

        Returns:
            List[torch.Tensor]: List of encoded embeddings
        """
        pass

    @abstractmethod
    def decode(self, embeddings: torch.Tensor, data_batch: Dict) -> torch.Tensor:
        """Decode embeddings into a tensor.

        Args:
            embeddings (torch.Tensor):
                Embeddings to decode
            data_batch (Dict):
                Dictionary containing additional data for decoding

        Returns:
            torch.Tensor: Decoded output
        """
        pass

    @abstractmethod
    def project_embeddings(
        self, embeddings: List[torch.Tensor], is_input: bool = True
    ) -> Optional[torch.Tensor]:
        """Project embeddings into a tensor.

        Args:
            embeddings (List[torch.Tensor]):
                List of embeddings to project
            is_input (bool):
                If True, use input projections, otherwise use output projections

        Returns:
            Optional[torch.Tensor]: Projected embeddings or None
        """
        pass

    @abstractmethod
    def forward(self, encoder_inputs: Dict[str, Any]) -> Optional[torch.Tensor]:
        """Process data for this modality through encoding and projection.

        Args:
            encoder_inputs (Dict[str, Any]):
                Dictionary containing encoder-specific inputs. Keys should match encoder names.

        Returns:
            Optional[torch.Tensor]:
                Processed and projected embeddings tensor, or None if no embeddings were produced.
        """
        pass


================================================
FILE: megatron/core/models/mimo/submodules/vision.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import logging
from typing import Any, Dict, List, Optional

import torch
import torch.nn as nn

from megatron.core.models.mimo.submodules.base import ModalitySubmodules

# Initialize logger
logger = logging.getLogger(__name__)


class VisionModalitySubmodules(ModalitySubmodules):
    """Vision modality submodules for encoding, decoding, and projecting image data.

    Handles image processing through vision encoders and projections in a multi-modal model.
    """

    def __init__(
        self,
        encoders: Optional[Dict[str, nn.Module]] = None,
        decoders: Optional[Dict[str, nn.Module]] = None,
        input_projections: Optional[List[nn.Module]] = None,
        output_projections: Optional[List[nn.Module]] = None,
        **kwargs,
    ):
        """Initialize vision modality submodules.

        Args:
            encoders: Dictionary of encoder modules
            decoders: Dictionary of decoder modules
            input_projections: List of input projection modules
            output_projections: List of output projection modules
            **kwargs: Additional keyword arguments
        """
        super().__init__(
            encoders=encoders,
            decoders=decoders,
            input_projections=input_projections,
            output_projections=output_projections,
        )

        if self.input_projections:
            assert (
                len(self.input_projections) <= 1
            ), "VisionModalitySubmodules currently supports only one input projection"

        if self.output_projections:
            assert (
                len(self.output_projections) <= 1
            ), "VisionModalitySubmodules currently supports only one output projection"

    def encode(self, encoders_data_batch: Dict) -> List[torch.Tensor]:
        """Encode image data batch into a list of tensors.

        Args:
            encoders_data_batch: Dictionary containing encoder-specific inputs.
                Keys should match encoder names in self.encoders.
                Each encoder receives its own specific inputs.

        Returns:
            List of encoded image embeddings, one from each encoder.
            Each embedding is a flattened tensor of shape [total_tokens, hidden_dim]

        Raises:
            ValueError: If no data is provided for any encoder or if there's a parameter mismatch.
        """
        if not encoders_data_batch:
            return []

        embeddings = []

        for name, encoder in self.encoders.items():
            if name not in encoders_data_batch:
                raise ValueError(f"No inputs found for encoder '{name}'")

            encoder_inputs = encoders_data_batch[name]

            # Process inputs through the encoder
            encoder_outputs = encoder(**encoder_inputs)
            logger.debug(f"Encoder '{name}' output shape: {encoder_outputs.shape}")
            if encoder_outputs.ndim == 3:
                # its b,s,h -> we need to flatten it to b*s,h
                encoder_outputs = encoder_outputs.reshape(-1, encoder_outputs.size(-1))
                embeddings.append(encoder_outputs)
            elif encoder_outputs.ndim == 2:
                # its b*s,h -> encoder already returned the flattened output
                embeddings.append(encoder_outputs)
            else:
                raise ValueError(
                    f"Encoder '{name}' output shape {encoder_outputs.shape} is not supported"
                    "Expected 3D (b,s,h) or 2D (b*s,h) tensor, got {encoder_outputs.ndim}D"
                )

        return embeddings

    def decode(self, embeddings: torch.Tensor, data_batch: Dict) -> torch.Tensor:
        """Decode embeddings into image tensors.

        Args:
            embeddings: Tensor of embeddings to decode.
            data_batch: Dictionary containing additional data for decoding.

        Returns:
            Tensor containing generated images.
        """

        raise NotImplementedError("No decoders support yet")

    def combine_embeddings(self, embeddings: List[torch.Tensor]) -> torch.Tensor:
        """Combine multiple embeddings from different encoders by concatenation.

        This method is used for combining encoder outputs before input projection.

        Args:
            embeddings: List of embeddings to combine

        Returns:
            Combined embedding tensor
        """
        if not embeddings:
            raise ValueError("Cannot combine empty list of embeddings")

        if len(embeddings) == 1:
            return embeddings[0]

        # each embedding is [total_tokens, hidden_dim]
        #  Make this configurable in the future
        combined = torch.cat(embeddings, dim=0)
        logger.debug(f"Combined embeddings shape after concatenation: {combined.shape}")
        return combined

    def project_embeddings(
        self, embeddings: List[torch.Tensor], is_input: bool = True
    ) -> torch.Tensor:
        """Project image embeddings using input or output projections.

        Args:
            embeddings: List of image embeddings to project
            is_input: If True, use input projections, otherwise use output projections

        Returns:
            Projected image embeddings or None if no embeddings
        """
        if is_input:
            embeddings = self.combine_embeddings(embeddings)

        # Get the appropriate projection (input or output)
        projections = self.input_projections if is_input else self.output_projections

        # Apply projection if available
        if projections:
            # We've asserted in __init__ that there's only one projection
            projection = projections[0]
            projected = projection(embeddings)
            logger.debug(f"Post-projection embeddings shape: {projected.shape}")
            return projected

        return embeddings

    def forward(self, encoder_inputs: Dict[str, Any]) -> Optional[torch.Tensor]:
        """Process image data through encoding and projection.

        Args:
            encoder_inputs: Dictionary where keys match encoder names in self.encoders
                and values are dictionaries of encoder-specific parameters.
                Example: {"clip": {"pixel_values": images}, "vit": {"images": vit_images}}

        Returns:
            Flattened image embeddings with shape [total_embeddings, hidden_dim],
            or None if no valid inputs were provided.
        """
        # Encode the images
        embeddings = self.encode(encoder_inputs)

        # If no embeddings were produced, return None
        if not embeddings:
            return None

        projected = self.project_embeddings(embeddings, is_input=True)
        logging.debug(f"Projected audio embeddings shape: {projected.shape}")
        return projected  # [total_embeddings, hidden_dim]


================================================
FILE: megatron/core/models/multimodal/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/models/multimodal/context_parallel.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
"""Multimodal Sequence Parallel (SP) and Context Parallel (CP) functionality."""

import torch

from megatron.core.packed_seq_params import PackedSeqParams


def get_padding(
    seq_len,
    cp_size,
    tp_size,
    has_sp,
    decoder_tp_comm_overlap=False,
    decoder_seq_len=None,
    fp8_enabled=False,
    fp8_recipe=None,
):
    """Calculate padding needed for SP, CP, TP comm overlap, and FP8.

    Args:
        seq_len (int): Model sequence length.
        cp_size (int): Context parallel size.
        tp_size (int): Tensor parallel size.
        has_sp (bool): Model uses sequence parallelism.
        decoder_tp_comm_overlap (bool): Decoder (LLM) uses tensor parallel communication overlap.
        decoder_seq_len (int): Decoder (LLM) maximum sequence length.
        fp8_enabled (bool): FP8 is enabled.
        fp8_recipe (str): FP8 recipe. Affects required padding.

    Returns:
        padding (int): Padding needed given model configuration.
    """

    padding = 0
    # TP Comm overlap is performed with combined text+image embeddings.
    if has_sp and decoder_tp_comm_overlap:
        # If TP Comm Overlap is enabled for combined text+image embedding in LM backbone,
        # user needs to provide decoder_seq_len with any potential padding needed for SP+CP
        assert (
            decoder_seq_len is not None
        ), "Please provide decoder seq length when using TP comm overlap for LM backbone"
        padding = decoder_seq_len - seq_len
        return padding

    padding_factor = 1
    if has_sp and cp_size > 1:
        # Padding to multiple of tp_size * cp_size * 2 when using CP + SP.
        padding_factor = tp_size * cp_size * 2
    elif cp_size > 1:
        padding_factor = cp_size * 2
    elif has_sp:
        padding_factor = tp_size
    elif fp8_enabled:
        padding_factor = 32 if fp8_recipe == "mxfp8" else 16

    padding = int((seq_len + padding_factor - 1) // padding_factor * padding_factor) - seq_len

    return padding


def get_packed_seq_params(tokens, img_seq_len, padding_needed, cp_size, use_packed_sequence=False):
    """Get PackedSeqParams for CP.

    Args:
        tokens (torch.Tensor): [batch, seq_len] input tokens.
        img_seq_len (int): Image sequence length.
        padding_needed (int): Padding to add.
        cp_size (int): Context parallel size.
        use_packed_sequence (bool): Uses sequence packing.

    Returns:
        packed_seq_params (PackedSeqParams): Parameters to be sent to Transformer Engine.
    """
    batch_size = tokens.shape[0]
    # Calculate the valid token seq len that LM backbone should compute on
    combined_valid_seqlen = tokens.shape[1] + img_seq_len - padding_needed
    cu_seqlens = torch.arange(
        0,
        (batch_size + 1) * (combined_valid_seqlen),
        step=(combined_valid_seqlen),
        dtype=torch.int32,
        device=tokens.device,
    )
    # Calculate the total padded token seq len
    combined_padded_seqlen = tokens.shape[1] + img_seq_len
    cu_seqlens_padded = None
    qkv_format = 'sbhd'
    if cp_size > 1 and (padding_needed > 0 or use_packed_sequence):
        # Provide cu_seqlens_<q/kv>_padded for CP support
        cu_seqlens_padded = torch.arange(
            0,
            (batch_size + 1) * (combined_padded_seqlen),
            step=(combined_padded_seqlen),
            dtype=torch.int32,
            device=tokens.device,
        )
        # CP with padding mask type requires THD format
        qkv_format = 'thd'

    packed_seq_params = PackedSeqParams(
        cu_seqlens_q=cu_seqlens,
        cu_seqlens_kv=cu_seqlens,
        cu_seqlens_q_padded=cu_seqlens_padded,
        cu_seqlens_kv_padded=cu_seqlens_padded,
        max_seqlen_q=combined_padded_seqlen,
        max_seqlen_kv=combined_padded_seqlen,
        qkv_format=qkv_format,
    )

    return packed_seq_params


================================================
FILE: megatron/core/models/multimodal/llava_model.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import logging
from collections import namedtuple
from functools import partial
from typing import List, Optional

import torch

from megatron.core import tensor_parallel
from megatron.core.config_logger import has_config_logger_enabled, log_config_to_disk
from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.inference.contexts import BaseInferenceContext
from megatron.core.models.gpt import GPTModel
from megatron.core.models.mamba import MambaModel
from megatron.core.models.vision.clip_vit_model import CLIPViTModel, get_num_image_embeddings
from megatron.core.models.vision.multimodal_projector import MultimodalProjector
from megatron.core.models.vision.radio import RADIOViTModel
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.transformer import MegatronModule
from megatron.core.transformer.attention import SelfAttentionSubmodules
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.transformer_layer import TransformerLayerSubmodules
from megatron.core.utils import deprecate_inference_params, is_te_min_version, log_single_rank

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import TEDotProductAttention

    try:
        import transformer_engine_torch as tex

        HAVE_TEX = True
    except ImportError:
        tex = None
        HAVE_TEX = False
else:
    TEDotProductAttention = None
    tex = None
    HAVE_TEX = False


IGNORE_INDEX = -100  # ID for labels that should be ignored.
# Image token index can be tokenizer dependent so the default value does not work in all cases.
DEFAULT_IMAGE_TOKEN_INDEX = -200
IMAGE_TOKEN = "<image>"
VIDEO_TOKEN = "<video>"


# Note: This is under development and may be missing features.
class LLaVAModel(MegatronModule):
    """LLaVA multi-modal model.

    Args:
        language_transformer_config (TransformerConfig): Transformer config for the language model.
        language_transformer_layer_spec (ModuleSpec): Language model spec.
        language_vocab_size (int): Language model vocabulary size.
        language_max_sequence_length (int): Language model maximum sequence length.
        vision_transformer_config (TransformerConfig): Transformer config for the vision model.
        vision_transformer_layer_spec (ModuleSpec): Vision model spec.
        drop_vision_class_token (bool): Drop vision class token(s) before the language model.
        vision_projection_config (TransformerConfig): Vision projection config.
        vision_projection_layer_spec (ModuleSpec): Vision projection spec.
        vision_projection_type (str): Type of the vision projection. Default: 2-layer MLP.
        allow_missing_vision_projection_checkpoint (bool): Allow vision projection weights to be
            missing when loading a checkpoint. Default False.
        parallel_output (bool): Keep outputs split across tensor parallel ranks.
            This is typically True for training and False for inference.
        share_embeddings_and_output_weights (bool): Input embedding and output layer share weights.
        language_position_embedding_type (str): Language model position embedding type.
        language_rotary_percent (float): RoPE percent. Defaults to 1.0.
        pre_process (bool): Include embedding layer in the decoder (used with pipeline parallel).
        post_process (bool): Include output layer in the decoder (used with pipeline parallel).
        add_encoder (bool): Construct the encoder (used with pipeline parallel).
            When we use pipelining, the encoder will live on only the first stage
        add_decoder (bool): Construct the decoder (used with pipeline parallel).
            When we use pipelining, the decoder will live on every stage after the first one.
        img_h (int): Input image height.
        img_w (int): Input image width.
        patch_dim (int): The size of each image patch side.
        language_rotary_base (int): RoPE base.
        language_rope_scaling (bool): Toggle RoPE scaling.
        language_rope_scaling_factor (float): RoPE scaling factor. Defaults to 8.
        image_token_index (int): Token ID for image token such as <image>.
        pixel_shuffle (bool): Enable pixel shuffle.
        tile_tags (list): Optional tile tags.
        pg_collection (ProcessGroupCollection): Model communication process groups.
        vp_stage (int): Virtual pipeline stage.
    """

    def __init__(
        self,
        language_transformer_config: TransformerConfig,
        language_transformer_layer_spec: ModuleSpec,
        language_vocab_size: int,
        language_max_sequence_length: int,
        vision_transformer_config: TransformerConfig,
        vision_transformer_layer_spec: ModuleSpec,
        drop_vision_class_token: bool,
        vision_projection_config: TransformerConfig,
        vision_projection_layer_spec: ModuleSpec,
        vision_projection_type: str = "mlp",
        allow_missing_vision_projection_checkpoint: bool = False,
        parallel_output: bool = True,
        share_embeddings_and_output_weights: bool = False,
        language_position_embedding_type: str = 'learned_absolute',
        language_rotary_percent: float = 1.0,
        pre_process: bool = True,
        post_process: bool = True,
        add_encoder: bool = True,
        add_decoder: bool = True,
        img_h: int = 336,
        img_w: int = 336,
        patch_dim: int = 14,
        language_rotary_base: int = 10000,
        language_rope_scaling: bool = False,
        language_rope_scaling_factor: float = 8.0,
        hybrid_layer_pattern: str = None,
        fp16_lm_cross_entropy: bool = False,
        image_token_index: int = DEFAULT_IMAGE_TOKEN_INDEX,
        pixel_shuffle: bool = False,
        tile_tags: Optional[list] = None,
        pg_collection: Optional[ProcessGroupCollection] = None,
        max_num_tiles: int = 0,
        tokenizer_type: str = "",
        vp_stage: Optional[int] = None,
        use_vision_backbone_fp8_arch: bool = False,
    ) -> None:
        super().__init__(config=language_transformer_config)

        if has_config_logger_enabled(language_transformer_config):
            log_config_to_disk(language_transformer_config, locals(), prefix=type(self).__name__)

        log_single_rank(
            logging.getLogger(__name__),
            logging.WARNING,
            "LLaVA is work in progress. Features are missing and methods can change.",
        )

        self.pre_process = pre_process
        self.post_process = post_process
        self.add_encoder = add_encoder
        self.add_decoder = add_decoder
        self.vp_stage = vp_stage

        self.encoder_hidden_state = None
        self.vision_model = None
        self.vision_projection = None
        self.language_model = None

        if pg_collection is None:
            pg_collection = ProcessGroupCollection.use_mpu_process_groups()
        self.pg_collection = pg_collection

        language_model_type = getattr(language_transformer_config, "language_model_type", "")
        self.sequence_parallel_lm = language_transformer_config.sequence_parallel
        self.tp_comm_overlap_lm = language_transformer_config.tp_comm_overlap
        self.context_parallel_lm = language_transformer_config.context_parallel_size
        if self.sequence_parallel_lm or self.context_parallel_lm > 1:
            if not language_model_type.startswith('nemotron5-hybrid'):
                assert isinstance(
                    language_transformer_layer_spec.submodules, TransformerLayerSubmodules
                )
                assert isinstance(
                    language_transformer_layer_spec.submodules.self_attention.submodules,
                    SelfAttentionSubmodules,
                )
                attn_submodules = (
                    language_transformer_layer_spec.submodules.self_attention.submodules
                )
                assert (
                    attn_submodules.core_attention == TEDotProductAttention and HAVE_TE
                ), "Sequence/Context Parallelism is supported only with TE DotProductAttention."
            if self.context_parallel_lm > 1:
                self.cp_group = self.pg_collection.cp
                assert (
                    self.cp_group.size() == self.context_parallel_lm
                ), "CP Group size should match the Language Model CP size"
                assert is_te_min_version(
                    "1.10.0"
                ), "Context Parallelism in LLaVA requires TE v1.10 or higher"
            else:
                self.cp_group = None
        self.tensor_model_parallel_size_lm = language_transformer_config.tensor_model_parallel_size

        # This attribute is needed to check if an all-reduce is required
        # on the word embeddings inside `finalize_model_grads._allreduce_word_embedding_grads`.
        self.share_embeddings_and_output_weights = share_embeddings_and_output_weights

        if self.add_decoder:
            if getattr(language_transformer_config, "language_model_type", "").startswith("hf://"):
                from megatron.core.models.huggingface.module import build_hf_model

                self.language_model = build_hf_model(
                    language_transformer_config, language_transformer_config.language_model_type
                )
                self.language_model = build_hf_model(language_transformer_config)
            elif language_model_type.startswith('nemotron5-hybrid'):
                self.language_model = MambaModel(
                    config=language_transformer_config,
                    mamba_stack_spec=language_transformer_layer_spec,
                    vocab_size=language_vocab_size,
                    max_sequence_length=language_max_sequence_length,
                    parallel_output=parallel_output,
                    position_embedding_type=language_position_embedding_type,
                    pre_process=self.pre_process,
                    hybrid_layer_pattern=hybrid_layer_pattern,
                    post_process=self.post_process,
                    rotary_percent=language_rotary_percent,
                    rotary_base=language_rotary_base,
                    fp16_lm_cross_entropy=fp16_lm_cross_entropy,
                    scatter_embedding_sequence_parallel=False,
                    pg_collection=self.pg_collection,
                )
            else:
                self.language_model = GPTModel(
                    config=language_transformer_config,
                    transformer_layer_spec=language_transformer_layer_spec,
                    vocab_size=language_vocab_size,
                    max_sequence_length=language_max_sequence_length,
                    parallel_output=parallel_output,
                    position_embedding_type=language_position_embedding_type,
                    rotary_percent=language_rotary_percent,
                    pre_process=self.pre_process,
                    post_process=self.post_process,
                    rotary_base=language_rotary_base,
                    rope_scaling=language_rope_scaling,
                    rope_scaling_factor=language_rope_scaling_factor,
                    scatter_embedding_sequence_parallel=False,
                    share_embeddings_and_output_weights=share_embeddings_and_output_weights,
                    pg_collection=self.pg_collection,
                    vp_stage=self.vp_stage,
                )

            self._language_max_sequence_length = language_max_sequence_length
            self._language_is_pipeline_parallel = (
                language_transformer_config.pipeline_model_parallel_size > 1
            )

            # Newer Transformer Engine versions add _extra_state keys in state_dict when using FP8.
            # Older models may not have _extra_state and can be ignored.
            self.language_model.register_load_state_dict_post_hook(
                _load_state_dict_hook_ignore_extra_state
            )

        class_token_len = 1
        if self.add_encoder:
            self._drop_vision_class_token = drop_vision_class_token
            add_class_token = True
            if vision_transformer_config.vision_model_type.startswith(
                ("clip", "siglip", "internvit")
            ):
                if vision_transformer_config.vision_model_type == "siglip":
                    class_token_len = 0
                    add_class_token = False
                    error_msg = (
                        "Siglip does not support vision class token, "
                        "set disable-vision-class-token to False."
                    )
                    assert not self._drop_vision_class_token, error_msg
                self.vision_model = CLIPViTModel(
                    vision_transformer_config,
                    vision_transformer_layer_spec,
                    img_h=img_h,
                    img_w=img_w,
                    class_token_len=class_token_len,
                    patch_dim=patch_dim,
                    model_subtype=vision_transformer_config.vision_model_type,
                    add_class_token=add_class_token,
                    pg_collection=self.pg_collection,
                    vp_stage=self.vp_stage,
                )
            elif vision_transformer_config.vision_model_type in ("radio", "radio-g", "cradio-g"):
                # TODO: should refactor into model code itself?
                class_token_len = 0
                max_img_h = 0
                max_img_w = 0
                embedder_bias = False
                ln_post_impl = None
                use_mask_token = False

                if vision_transformer_config.vision_model_type == "radio":
                    class_token_len = 8
                    max_img_h = 2048
                    max_img_w = 2048
                    embedder_bias = False
                    ln_post_impl = None
                    use_mask_token = False
                elif vision_transformer_config.vision_model_type == "radio-g":
                    class_token_len = 5
                    max_img_h = 1792
                    max_img_w = 1792
                    embedder_bias = True
                    from megatron.core.extensions.transformer_engine import TENorm

                    ln_post_impl = TENorm
                    use_mask_token = True
                elif vision_transformer_config.vision_model_type == "cradio-g":
                    class_token_len = 8
                    max_img_h = 2048
                    max_img_w = 2048
                    embedder_bias = False
                    ln_post_impl = None
                    use_mask_token = False

                if vision_transformer_config.fp8 or use_vision_backbone_fp8_arch:
                    # FP8 padding for final sequence length to be a multiple of 16 or 32.
                    class_token_len = 32 if vision_transformer_config.fp8_recipe == "mxfp8" else 16

                self.vision_model = RADIOViTModel(
                    vision_transformer_config,
                    vision_transformer_layer_spec,
                    ln_post_impl=ln_post_impl,
                    img_h=img_h,
                    img_w=img_w,
                    max_img_h=max_img_h,
                    max_img_w=max_img_w,
                    class_token_len=class_token_len,
                    patch_dim=patch_dim,
                    add_class_token=add_class_token,
                    embedder_bias=embedder_bias,
                    use_mask_token=use_mask_token,
                    pg_collection=self.pg_collection,
                    vp_stage=self.vp_stage,
                )
            elif vision_transformer_config.vision_model_type.startswith("hf://"):
                from megatron.core.models.huggingface.module import build_hf_model

                self.vision_model = build_hf_model(
                    vision_transformer_config, vision_transformer_config.vision_model_type
                )
            else:
                raise ValueError(
                    "Vision model "
                    f"{vision_transformer_config.vision_model_type} is not "
                    "supported."
                )

            self.vision_model.register_load_state_dict_post_hook(
                _load_state_dict_hook_ignore_extra_state
            )

            vision_projection_input_size = vision_transformer_config.hidden_size
            vision_projection_input_size *= 4 if pixel_shuffle else 1

            # Map (intermediate) vision model outputs to the language model input dimension.
            self.vision_projection = MultimodalProjector(
                vision_projection_config,
                vision_projection_layer_spec,
                vision_projection_type,
                vision_projection_input_size,
                tp_group=self.pg_collection.tp,
            )
            # Ignore missing weights for the vision projection during checkpoint loading.
            # This should be disabled by default but can be enabled if your checkpoint contains
            # pretrained vision and language models but not the projection from vision model
            # outputs to language model inputs.
            if allow_missing_vision_projection_checkpoint:
                vision_projection_param_names = [
                    f"vision_projection.{name}"
                    for name in self.vision_projection.state_dict().keys()
                ]
                self.vision_projection.register_load_state_dict_post_hook(
                    partial(_load_state_dict_hook_ignore_param_names, vision_projection_param_names)
                )

            self.vision_projection.register_load_state_dict_post_hook(
                _load_state_dict_hook_ignore_extra_state
            )

        self.img_seq_len = get_num_image_embeddings(
            img_h,
            img_w,
            patch_dim,
            vision_transformer_config.vision_model_type,
            drop_vision_class_token,
            class_token_len,
            pixel_shuffle,
            tile_tags is not None,  # Tile tags enabled/disabled.
            max_num_tiles,
            tokenizer_type,
        )

        self.image_token_index = image_token_index
        self._pixel_shuffle = pixel_shuffle
        self._tile_tags = tile_tags
        self._max_num_tiles = max_num_tiles

    def shared_embedding_or_output_weight(self):
        """This is a convenience method to surface the language model's word embeddings, which is
        necessary for `finalize_model_grads._allreduce_word_embedding_grads`."""
        if self.add_decoder:
            return self.language_model.shared_embedding_or_output_weight()
        return None

    def set_input_tensor(self, input_tensor) -> None:
        """Set model chunk input tensor."""
        # This is usually handled in schedules.py but some inference code still
        # gives us non-lists or None
        if not isinstance(input_tensor, list):
            input_tensor = [input_tensor]
        assert len(input_tensor) == 1, 'input_tensor should only be length 1 for llava'

        if self.add_encoder and self.add_decoder:
            self.vision_model.set_input_tensor(input_tensor[0])
        elif self.add_encoder:
            self.vision_model.set_input_tensor(input_tensor[0])
        elif self.pre_process:
            self.encoder_hidden_state = input_tensor[0]
        else:
            self.language_model.set_input_tensor(input_tensor[0])

    def freeze(
        self, freeze_language_model: bool, freeze_vision_model: bool, freeze_vision_projection: bool
    ):
        """Freeze model modules.

        Make specific modules non-trainable by setting requires_grad to False.

        Args:
            freeze_language_model (bool): Freeze the language model module.
            freeze_vision_model (bool): Freeze the vision model module.
            freeze_vision_projection (bool): Freeze the vision projection module.
        """
        modules = []
        if freeze_language_model and self.language_model is not None:
            modules.append(self.language_model)
        if freeze_vision_model and self.vision_model is not None:
            modules.append(self.vision_model)
        if freeze_vision_projection and self.vision_projection is not None:
            modules.append(self.vision_projection)

        for module in modules:
            for param in module.parameters():
                param.requires_grad = False

    def _preprocess_data(
        self,
        image_embeddings,
        language_embeddings,
        input_ids,
        loss_mask,
        labels,
        use_inference_kv_cache,
        inference_context,
        image_token_index,
        num_image_tiles,
        *,
        inference_params: Optional[BaseInferenceContext] = None,
    ):
        """Preprocess input data before input to language model.

        This function is adopted from
        https://github.com/huggingface/transformers/blob/85817d98fb60977c97e3014196a462b732d2ed1a/src/transformers/models/llava_next/modeling_llava_next.py#L409
        for our input data conventions.

        image_token_index = -200 indicates the image position in the input_ids = [0, 1, -200, 2, 3]
        and labels = [1, -200, 2, 3, 4], for example.
        We want to replace the image position (-200) with image_embeddings and return the following:
        - final_embeddings = [0, 1, image_embeddings, 2, 3],
        - final_labels = [1, -100, 2, 3, 4]
        - final_loss_mask = [1, 0, 0, 1, 1]

        This function handles samples without images (text-only sample). It also handles samples
        with images that are split into multiples tiles.

        If pipeline parallelism is not used, then self.pre_process and self.post_process
        are both True and we update both input embeddings, labels and loss masks (if available).

        If pipeline parallelism is used, then we do the following
        - the first language model chunk has self.pre_process = True and
          self.post_process = False. We update input embeddings.
        - the middle language model chunk(s) has self.pre_process = False and
          self.post_process = False. We don't need to update anything.
        - the last language model chunk has self.pre_process = False and
          self.post_process = True. We update labels and loss mask.

        TODO: This function should adjust the attention mask too.
        Currently, we assume the language model uses a causal mask.

        Returns:
            final_embedding (torch.Tensor): image and text embeddings [combined_seq_len, b, h].
            final_labels (torch.Tensor): labels for image and text positions [b, combined_seq_len].
            final_loss_mask (torch.Tensor): loss mask [b, combined_seq_len].
        """

        inference_context = deprecate_inference_params(inference_context, inference_params)

        assert self.add_decoder, "input text preprocessing is only needed for the language model"

        # No pre- or postprocessing needed.
        # With pipeline parallel > 2, this means a chunk in the middle of the model.
        if not self.pre_process and not self.post_process:
            return None, None, None

        # If using the inference KV cache, the image tokens are already computed.
        if use_inference_kv_cache:
            return language_embeddings, loss_mask, labels

        img_seq_len = self.img_seq_len
        batch_size, text_seq_len = input_ids.shape

        has_labels = labels is not None
        if has_labels:
            assert (
                labels.shape == loss_mask.shape
            ), f"mismatching labels shape {labels.shape} and loss mask shape {loss_mask.shape}"

        # Create indices for new text and label positions.
        with torch.no_grad():
            image_token_mask = input_ids == image_token_index
            num_images_per_sample = torch.sum(image_token_mask, dim=-1)

            # Number of tiles per sample.
            num_image_tiles_batch = num_image_tiles.split(num_images_per_sample.tolist(), dim=0)
            num_image_tiles_batch = torch.tensor(
                [x.sum() for x in num_image_tiles_batch], device=input_ids.device
            )

            # Sequence length for each sample is the image sequence length multiplied by
            # the number of tiles for that image, minus image token indices,
            # plus text sequence length.
            seq_lens = num_image_tiles_batch * img_seq_len - num_images_per_sample + text_seq_len
            max_seq_len = seq_lens.max()
            # Pipeline parallel expects fixed input size. Check if we need to pad.
            if (
                self._language_is_pipeline_parallel
                and max_seq_len < self._language_max_sequence_length
                and inference_context is None
            ):
                max_seq_len = self._language_max_sequence_length

            batch_indices, non_image_indices = torch.where(image_token_mask != True)

            # New position ids for the text tokens, shifted by the image sequence length.
            # E.g. for input_ids = [-200, 1, 2, 3] and img_seq_len = 576, we get
            # new_position_ids = [576, 577, 578, 579]. text_position_ids are then [577, 578, 579].
            image_token_mask_lens = image_token_mask.int().clone()
            # -1 is for the removed image token index.
            image_token_mask_lens[image_token_mask] = num_image_tiles * img_seq_len - 1
            # +1 is needed here for the cumulative sum. -1 is adjusting for zero-based indexing.
            new_position_ids = torch.cumsum((image_token_mask_lens + 1), dim=-1) - 1
            text_position_ids = new_position_ids[batch_indices, non_image_indices]

            label_batch_indices = None  # dummy value to pass formatting
            # Labels are shifted to left by one.
            # So, shift text position ids and non-image indices to left by one.
            label_batch_indices = None
            if has_labels:
                label_text_position_ids = text_position_ids - 1
                valid_label_text_position_ids = label_text_position_ids >= 0
                label_text_position_ids = label_text_position_ids[valid_label_text_position_ids]

                label_batch_indices = batch_indices[valid_label_text_position_ids]

                label_non_image_indices = non_image_indices - 1
                valid_label_non_image_indices = label_non_image_indices >= 0
                label_non_image_indices = label_non_image_indices[valid_label_non_image_indices]

            # Create a mask for the image embedding positions.
            images_mask = torch.full(
                (batch_size, max_seq_len), True, dtype=torch.bool, device=input_ids.device
            )
            # No images in the text positions.
            images_mask[batch_indices, text_position_ids] = False
            # Samples can have different amount of images tokens.
            # new_position_ids[:, -1] gives the last text position id for each sample.
            # Padding is needed when the number of image tokens differs.
            first_padding_idx = new_position_ids[:, -1] + 1
            images_mask[
                torch.arange(max_seq_len, device=first_padding_idx.device).repeat(batch_size, 1)
                >= first_padding_idx.unsqueeze(1)
            ] = False

        # Create the final input embedding (if this is the first language model stage).
        final_embedding = None
        if self.pre_process:
            embed_dim = language_embeddings.shape[-1]
            final_embedding = torch.zeros(
                batch_size,
                max_seq_len,
                embed_dim,
                dtype=language_embeddings.dtype,
                device=language_embeddings.device,
            )

            # Put text embeddings to the text positions in the result tensor.
            final_embedding[batch_indices, text_position_ids] = language_embeddings[
                batch_indices, non_image_indices
            ]

            # Put image embeddings to image positions.
            # NOTE: FSDP can hang with text-only samples so we use a workaround to run a dummy image
            # through the vision model and then zero-out the impact of the output here.
            if num_image_tiles.shape[0] == 0 and image_embeddings.shape[0] > 0:
                assert images_mask.sum() == 0 and getattr(
                    self.vision_model, "_is_fsdp_managed_module", False
                ), "expected FSDP and dummy image"
                final_embedding[:1, :1, :1] += 0 * image_embeddings[:1, :1, :1]
            else:
                final_embedding[images_mask] = (
                    image_embeddings.permute(1, 0, 2).reshape(-1, embed_dim).contiguous()
                )

        # Create the final labels and loss mask (if this is the last language model stage).
        final_labels, final_loss_mask = None, None
        if self.post_process and has_labels:
            final_labels = torch.full(
                (batch_size, max_seq_len), IGNORE_INDEX, dtype=labels.dtype, device=labels.device
            )
            final_loss_mask = torch.full(
                (batch_size, max_seq_len), 0, dtype=loss_mask.dtype, device=loss_mask.device
            )

            # Put text labels and loss mask to the text positions.
            final_labels[label_batch_indices, label_text_position_ids] = labels[
                label_batch_indices, label_non_image_indices
            ]

            final_loss_mask[batch_indices, text_position_ids] = loss_mask[
                batch_indices, non_image_indices
            ]

            # For labels, pick the last label index that got dropped by the shift to left.
            label_extra_text_position_ids = seq_lens - 1
            batch_range = torch.arange(len(label_extra_text_position_ids))
            final_labels[batch_range, label_extra_text_position_ids] = labels[batch_range, -1]

            # Loss mask the image positions.
            final_loss_mask[images_mask] = 0

            # Loss mask last text position just before an image
            # so that text token does not need to predict the first image token.
            batch_image_indices, image_indices = torch.where(image_token_mask)
            # Indices just before image tokens. If it's -1, skip it.
            before_image_indices = image_indices - 1
            valid = before_image_indices >= 0
            valid_batch_image_indices = batch_image_indices[valid]
            valid_before_image_indices = before_image_indices[valid]
            # Map those indices those position ids.
            valid_before_image_indices = new_position_ids[
                valid_batch_image_indices, valid_before_image_indices
            ]

            final_loss_mask[valid_batch_image_indices, valid_before_image_indices] = 0

        if final_embedding is not None and final_labels is not None:
            assert (
                final_embedding.shape[:2] == final_labels.shape == final_loss_mask.shape
            ), "unexpected shapes after data preprocessing"

        if final_embedding is not None:
            # Truncate if exceeding the language model's max sequence length.
            if final_embedding.shape[1] > self._language_max_sequence_length:
                final_embedding = final_embedding[:, : self._language_max_sequence_length]
            # Transpose to [s,b,h] only if not using CP because CP Sharding expects seq in dim=1
            if self.context_parallel_lm == 1:
                final_embedding = final_embedding.transpose(1, 0).contiguous()

        truncate_labels = (
            final_labels is not None and final_labels.shape[1] > self._language_max_sequence_length
        )
        if truncate_labels:
            final_labels = final_labels[:, : self._language_max_sequence_length]
            final_loss_mask = final_loss_mask[:, : self._language_max_sequence_length]

        return final_embedding, final_labels, final_loss_mask

    def _process_embedding_token_parallel(
        self, combined_embeddings, new_labels, new_loss_mask, packed_seq_params
    ):
        """Processes the input data for model parallelism support.

        When using sequence parallelism (SP) or context parallelism (CP), the sequence is sharded
        across different GPUs. This function performs the sharding and distributes the sequence
        across GPUs for SP and CP

        Context Parallelism is a feature that helps improve memory efficiency for
        long sequence training by distributing sequence across CP ranks.
        It requires token length to be divisible by (CP size *2) to ensure proper load balance.

        Sequence Parallelism is a feature that helps improve memory efficiency for
        long sequence training by distributing sequence across TP ranks.
        It requires token length to be divisible by TP size.

        Returns:
            combined_embeddings (torch.Tensor): image and text embeddings combined and distributed.
            new_labels (torch.Tensor): Distributed labels for image and text positions.
            new_loss_mask (torch.Tensor): Distributed loss mask.
            packed_seq_params (PackedSeqParams): Dict with padded token information.

        """

        # No pre or post processing needed with PP middle chunks.
        if not self.pre_process and not self.post_process:
            return combined_embeddings, new_labels, new_loss_mask, packed_seq_params

        shard_factor = seq_dim = None
        if self.pre_process:
            if self.context_parallel_lm > 1 and self.sequence_parallel_lm:
                shard_factor = self.tensor_model_parallel_size_lm * self.context_parallel_lm * 2
                seq_dim = 1
            elif self.context_parallel_lm > 1:
                shard_factor = self.context_parallel_lm * 2
                seq_dim = 1
            elif self.sequence_parallel_lm:
                shard_factor = self.tensor_model_parallel_size_lm
                seq_dim = 0

            assert (
                combined_embeddings.shape[seq_dim] % shard_factor == 0
            ), f"Sequence length should be divisible by {shard_factor} for \
                Sequence/Context parallelism"
            if self.sequence_parallel_lm and self.tp_comm_overlap_lm:
                assert (
                    combined_embeddings.shape[seq_dim] == self._language_max_sequence_length
                ), f"TP Comm overlap either requires Vision+Text token length \
                == language_max_sequence_length"

        if self.context_parallel_lm > 1:
            batch = dict()
            if self.pre_process:
                batch["combined_embeddings"] = combined_embeddings
            if self.post_process:
                batch["new_labels"] = new_labels
                batch["new_loss_mask"] = new_loss_mask
            # Distribute sequence across CP ranks
            if packed_seq_params is None or packed_seq_params.qkv_format == 'sbhd':
                from megatron.training.utils import get_batch_on_this_cp_rank

                batch = get_batch_on_this_cp_rank(batch)
            else:
                assert HAVE_TEX and is_te_min_version(
                    "1.10.0"
                ), "Please update Transformer Engine to >= 1.10 to use \
                    Context Parallel with THD format data"
                index = tex.thd_get_partitioned_indices(
                    packed_seq_params.cu_seqlens_q_padded,
                    batch[next(iter(batch))].size(1),
                    self.cp_group.size(),
                    self.cp_group.rank(),
                )
                for key, data in batch.items():
                    batch[key] = data.index_select(1, index)

            if self.pre_process:
                combined_embeddings = batch["combined_embeddings"]  # [B, S/CP, H]
                combined_embeddings = combined_embeddings.transpose(
                    1, 0
                ).contiguous()  # [B,S/CP,H] -> [S/CP,B,H]
            if self.post_process:
                new_labels = batch["new_labels"]
                new_loss_mask = batch["new_loss_mask"]

        if self.sequence_parallel_lm and self.pre_process:
            combined_embeddings = tensor_parallel.scatter_to_sequence_parallel_region(
                combined_embeddings
            )  # [S/(CP*TP),B,H]

        return combined_embeddings, new_labels, new_loss_mask, packed_seq_params

    def _apply_tile_tagging(self, image_embeddings, num_image_tiles):
        """Apply tile tagging.

        The image embeddings of multiple tiles are prepended with tile tags such as <tile_1>.
        This implements the method used in NVLM https://arxiv.org/pdf/2409.11402.

        Args:
            image_embeddings (torch.Tensor): [img_seq_len, num_tiles, h_language].
            num_image_tiles (torch.Tensor): Number of tiles for each input image [num_images].

        Returns:
            torch.Tensor: Tile tags prepended to image embeddings.
                [tile_seq_len (=5) + img_seq_len, num_tiles, h_language]
        """
        assert (
            num_image_tiles.shape[0] == 1 and len(num_image_tiles) == 1
        ), "multiple input images are not supported yet."

        num_tiles = num_image_tiles[0].item()
        tile_tags = self._tile_tags[: num_tiles - 1] + [self._tile_tags[-1]]

        # [num_tiles, tile_seq_len (=5)]
        tile_tag_input_ids = torch.tensor(
            tile_tags, dtype=torch.int64, device=num_image_tiles.device
        )

        # [tile_seq_len, num_tiles, h_language]
        tile_tag_embeds = self.language_model.embedding(tile_tag_input_ids, position_ids=None)

        # [num_tiles, dim] should be the same same
        assert tile_tag_embeds.shape[1:] == image_embeddings.shape[1:]

        image_embeddings = torch.cat([tile_tag_embeds, image_embeddings])

        return image_embeddings  # [tile_seq_len + img_seq_len, num_tiles, h_language]

    def forward(
        self,
        images: torch.Tensor,
        input_ids: torch.Tensor,
        position_ids: torch.Tensor,
        attention_mask: torch.Tensor,
        labels: Optional[torch.Tensor] = None,
        loss_mask: Optional[torch.Tensor] = None,
        inference_context: Optional[BaseInferenceContext] = None,
        num_image_tiles: Optional[List[int]] = None,
        image_token_index: Optional[int] = None,
        runtime_gather_output: Optional[bool] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
        *,
        inference_params: Optional[BaseInferenceContext] = None,
    ) -> torch.Tensor:
        """Forward function of the LLaVA model.

        Args:
            images (torch.Tensor): input images of shape [num_tiles, img_h, img_w].
                num_tiles means the number of image tiles in this batch.
                num_tiles = 0 if the batch doesn't contain images.
            input_ids (torch.Tensor): input text ids [batch, text_seq_len].
            position_ids (torch.Tensor): input text position ids [batch, text_seq_len].
            attention_mask (torch.Tensor): Language model attention mask
                [batch, 1, 1, combined_seq_len]. NOTE: attention_mask is typically None and
                attn_mask_type in layer specs determines the attention mask used.
            labels (torch.Tensor): Optional target text labels [batch, combined_seq_len].
            loss_mask (torch.Tensor): Text loss mask [batch, text_seq_len].
            inference_context (BaseInferenceContext): Inference-time parameters including KV cache.
            num_image_tiles (list of int): Number of tiles per image. Default 1 tile per image.
            image_token_index (int): ID for input images. Default None means `image_token_index`
                arg in the constructor will be used.
            runtime_gather_output (bool): Gather output at runtime. Default None means
                `parallel_output` arg in the constructor will be used.
            packed_seq_params (PackedSeqParams): 1) If using sequence packing, must contain
                subsample length information. 2) If using SP/CP with padding mask type,
                must contain padded token information.

        Returns:
            output (torch.Tensor): Loss of shape [b, s] if labels are provided,
                otherwise logits of shape [b, s, vocab_size].
            loss_mask (torch.Tensor): Loss mask expanded to combined sequence length. Shape [b, s].
        """

        inference_context = deprecate_inference_params(inference_context, inference_params)

        use_inference_kv_cache = (
            inference_context is not None
            and "image_tokens_count" in inference_context.key_value_memory_dict
        )
        has_images = images is not None and images.shape[0] > 0

        # If running inference, we can skip image token computation
        # if they were computed already earlier for this sample.
        if use_inference_kv_cache:
            image_embeddings = None
        elif self.add_encoder and not has_images:
            # If no images provided, use an empty image embeddings tensor.
            image_embeddings = torch.tensor([], dtype=images.dtype, device=images.device).reshape(
                0, 0, 0
            )
        elif self.add_encoder and has_images:
            image_embeddings = self.vision_model(images)  # [num_tiles, img_seq_len, h_vision]
            if self._drop_vision_class_token:
                image_embeddings = image_embeddings[:, self.vision_model.class_token_len :, :]

            if self._pixel_shuffle:
                image_embeddings = pixel_shuffle(
                    image_embeddings
                )  # [num_tiles, img_seq_len_shuffled, h_vision_shuffled]

            # contiguous() required as `permute` can sparsify the tensor and this breaks pipelining
            image_embeddings = image_embeddings.permute(
                1, 0, 2
            ).contiguous()  # [img_seq_len, num_tiles, h_vision]

            # map vision model output size to language model input size.
            image_embeddings = self.vision_projection(
                image_embeddings
            )  # [img_seq_len, num_tiles, h_language]

            # Apply tile tagging if enabled and an image token is present.
            if self._tile_tags is not None and torch.any(input_ids == self.image_token_index):
                image_embeddings = self._apply_tile_tagging(image_embeddings, num_image_tiles)

            # TODO: Support batched inference.
            # In inference, the language model KV cache will be updated for image token positions.
            # Store the image tokens sequence length to be used as an offset to the KV cache later.
            if inference_context is not None:
                inference_context.key_value_memory_dict["image_tokens_count"] = (
                    image_embeddings.shape[0] * image_embeddings.shape[1]
                )
        else:
            image_embeddings = self.encoder_hidden_state

        if not self.add_decoder:
            return image_embeddings, loss_mask

        language_embeddings = None
        if self.pre_process:
            input_ids_text = input_ids.clone()
            input_ids_text[input_ids_text == self.image_token_index] = 0
            # Note: This adds absolute position embedding but not RoPE.
            # Each image is counted as one position.
            # RoPE is added in language_model forward. Each image embedding is one position.
            language_embeddings = self.language_model.embedding(
                input_ids=input_ids_text, position_ids=position_ids
            )  # [text_seq_len, b, h_language]

            language_embeddings = language_embeddings.transpose(
                1, 0
            ).contiguous()  # [b, text_seq_len, h_language]

        # Assume 1 tile per image if the number of tiles is not provided.
        if num_image_tiles is None and images is not None:
            num_image_tiles = torch.ones(images.shape[0], dtype=torch.int, device=input_ids.device)

        combined_embeddings, new_labels, new_loss_mask = self._preprocess_data(
            image_embeddings,
            language_embeddings,
            input_ids,
            loss_mask,
            labels,
            use_inference_kv_cache,
            inference_context,
            image_token_index if image_token_index is not None else self.image_token_index,
            num_image_tiles,
        )  # [combined_seq_len, b, h_language], [b, combined_seq_len], [b, combined_seq_len]

        if self.context_parallel_lm > 1 or self.sequence_parallel_lm:
            combined_embeddings, new_labels, new_loss_mask, packed_seq_params = (
                self._process_embedding_token_parallel(
                    combined_embeddings, new_labels, new_loss_mask, packed_seq_params
                )
            )

        output = self.language_model(
            input_ids=None,
            position_ids=None,
            attention_mask=attention_mask,
            decoder_input=combined_embeddings,
            labels=new_labels,
            inference_context=inference_context,
            runtime_gather_output=runtime_gather_output,
            packed_seq_params=packed_seq_params,
        )

        return output, new_loss_mask


def _load_state_dict_hook_ignore_param_names(
    param_names: List[str], module: torch.nn.Module, incompatible_keys: namedtuple
):
    """Hook to ignore missing keys during checkpoint loading.

    By default, this should not be used to avoid accidentally missing weights in checkpoint loading.

    Example use case: Use this if you want to load a checkpoint that contains vision and language
    model weights but not the vision projection weights.

    Args:
        param_names (list str): Parameter names allowed to be missing when calling load_state_dict.
        module (torch.nn.Module): The torch module this hook applies to. Required by the torch API.
        incompatible_keys (namedtuple): Namedtuple with fields missing_keys and unexpected_keys,
            which collect the missing and unexpected keys, respectively.
    """
    for param_name in param_names:
        if param_name in incompatible_keys.missing_keys:
            logging.getLogger(__name__).warning(
                f"{param_name} being removed from incompatible_keys.missing_keys in LlavaModel"
            )
            incompatible_keys.missing_keys.remove(param_name)


def _load_state_dict_hook_ignore_extra_state(
    module: torch.nn.Module, incompatible_keys: namedtuple
):
    """Hook to ignore Transformer Engine _extra_state used for FP8.

    This is for backwards-compatibility. Newer TE versions add _extra_state keys to the state dict,
    while older models might not have those keys. Those keys can be ignored when not using FP8.

    Args:
        module (torch.nn.Module): The torch module this hook applies to. Required by the torch API.
        incompatible_keys (namedtuple): Namedtuple with fields missing_keys and unexpected_keys,
            which collect the missing and unexpected keys, respectively.
    """
    for name, keys in incompatible_keys._asdict().items():
        for key in keys[::-1]:
            if "extra_state" in key:
                logging.getLogger(__name__).warning(
                    f"_extra_state key {key} being removed from {name}"
                )
                keys.remove(key)


# pylint: disable-next=line-too-long
# Based on https://github.com/OpenGVLab/InternVL/blob/c7c5af1a8930b4862afe8ed14672307082ef61fa/internvl_chat/internvl/model/internvl_chat/modeling_internvl_chat.py#L218
# Copyright (c) 2023 OpenGVLab.
def pixel_shuffle(x, scale_factor=0.5, version=2):
    """Pixel shuffle based on InternVL but adapted for our use case.

    Args:
        x (torch.Tensor): Vision model outputs [num_tiles, img_seq_len, h_vision]
        version (int): Implementation version.

    Returns:
        Shuffled vision model outputs [num_tiles, (sq ** 2) * (scale ** 2), h_vision / (scale ** 2)]
    """
    h = w = int(x.shape[1] ** 0.5)  # sq
    x = x.reshape(x.shape[0], h, w, -1)  # [num_tiles, sq, sq, h_vision]

    n, w, h, c = x.size()
    # N, W, H, C --> N, W, H * scale, C // scale
    x = x.view(n, w, int(h * scale_factor), int(c / scale_factor))
    # N, W, H * scale, C // scale --> N, H * scale, W, C // scale
    x = x.permute(0, 2, 1, 3).contiguous()
    # N, H * scale, W, C // scale --> N, H * scale, W * scale, C // (scale ** 2)
    x = x.view(
        n, int(h * scale_factor), int(w * scale_factor), int(c / (scale_factor * scale_factor))
    )

    if version == 2:
        x = x.permute(0, 2, 1, 3).contiguous()

    x = x.reshape(x.shape[0], -1, x.shape[-1])

    return x


================================================
FILE: megatron/core/models/multimodal/llava_spec.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from typing import Optional

from megatron.core.extensions.transformer_engine import (
    TEDotProductAttention,
    TELayerNormColumnParallelLinear,
    TENorm,
    TERowParallelLinear,
)
from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
from megatron.core.models.gpt.gpt_layer_specs import get_mlp_module_spec
from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
from megatron.core.transformer.dot_product_attention import DotProductAttention
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.identity_op import IdentityOp
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules

try:
    import apex  # pylint: disable=unused-import

    from megatron.core.fusions.fused_layer_norm import FusedLayerNorm

    HAVE_APEX = True
    LNImpl = FusedLayerNorm
except ImportError:
    import warnings

    from megatron.core.transformer.torch_norm import WrappedTorchNorm

    warnings.warn("Apex is not installed. Falling back to Torch Norm")
    LNImpl = WrappedTorchNorm
    HAVE_APEX = False


def decoder_model_with_transformer_engine_default_spec(
    num_experts: Optional[int] = None, moe_grouped_gemm: bool = False, qk_layernorm: bool = False
) -> ModuleSpec:
    """LLava decoder TE spec (uses Transformer Engine components)."""
    mlp = get_mlp_module_spec(
        use_te=True, num_experts=num_experts, moe_grouped_gemm=moe_grouped_gemm
    )
    return ModuleSpec(
        module=TransformerLayer,
        submodules=TransformerLayerSubmodules(
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": AttnMaskType.causal},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=TELayerNormColumnParallelLinear,
                    core_attention=TEDotProductAttention,
                    linear_proj=TERowParallelLinear,
                    q_layernorm=TENorm if qk_layernorm else IdentityOp,
                    k_layernorm=TENorm if qk_layernorm else IdentityOp,
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            mlp=mlp,
            mlp_bda=get_bias_dropout_add,
        ),
    )


def decoder_model_with_local_default_spec(
    num_experts: Optional[int] = None, moe_grouped_gemm: bool = False, qk_layernorm: bool = False
) -> ModuleSpec:
    """LLava decoder local spec."""
    mlp = get_mlp_module_spec(
        use_te=False, num_experts=num_experts, moe_grouped_gemm=moe_grouped_gemm
    )
    return ModuleSpec(
        module=TransformerLayer,
        submodules=TransformerLayerSubmodules(
            input_layernorm=LNImpl,
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": AttnMaskType.causal},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=ColumnParallelLinear,
                    core_attention=DotProductAttention,
                    linear_proj=RowParallelLinear,
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_mlp_layernorm=LNImpl,
            mlp=mlp,
            mlp_bda=get_bias_dropout_add,
        ),
    )


================================================
FILE: megatron/core/models/vision/__init__.py
================================================


================================================
FILE: megatron/core/models/vision/clip_vit_model.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from typing import Optional, Union

import torch

from megatron.core.config_logger import has_config_logger_enabled, log_config_to_disk
from megatron.core.models.common.vision_module.vision_module import VisionModule
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.transformer.enums import ModelType
from megatron.core.transformer.spec_utils import ModuleSpec, build_module
from megatron.core.transformer.transformer_block import TransformerBlock
from megatron.core.transformer.transformer_config import TransformerConfig

try:
    import transformer_engine  # pylint: disable=unused-import

    from megatron.core.extensions.transformer_engine import TENorm

    NORM_IMPL = TENorm
except:
    NORM_IMPL = torch.nn.LayerNorm


# Note: This is under development and is missing features like position embedding interpolation.
class CLIPViTModel(VisionModule):
    """CLIP ViT vision model.

    Args:
        transformer_config (TransformerConfig): Transformer config.
        transformer_layer_spec (ModuleSpec): Specifies module to use for transformer layers.
        ln_pre_impl (ModuleSpec or type): Specifies the layer norm type to use for ln_pre.
        add_class_token (bool, optional): Include a class token. Defaults to True.
        class_token_len (int): Class token length. Defaults to 1 but 8 may be faster.
        patch_dim (int): Image patch size.
        img_h (int): Input image height.
        img_w (int): Input image width.
        pg_collection (ProcessGroupCollection): Model communication process groups
        vp_stage (int): Virtual pipeline stage
    """

    def __init__(
        self,
        transformer_config: TransformerConfig,
        transformer_layer_spec: ModuleSpec,
        ln_pre_impl: Union[ModuleSpec, type] = NORM_IMPL,
        ln_post_impl: Union[ModuleSpec, type] = NORM_IMPL,
        add_class_token: bool = True,
        class_token_len: int = 1,
        patch_dim: int = 14,
        img_h: int = 336,
        img_w: int = 336,
        model_subtype: str = "clip",
        pg_collection: Optional[ProcessGroupCollection] = None,
        vp_stage: Optional[int] = None,
    ) -> None:

        error_msg = f"CLIPViTModel model subtype {model_subtype} is not supported."
        assert model_subtype in ["clip", "siglip", "internvit", "internvit300M"], error_msg

        if model_subtype == "siglip":
            assert class_token_len == 0, "SigLIP does not support class tokens."
            assert not add_class_token, "SigLIP does not support class tokens."

        super().__init__(config=transformer_config)

        if has_config_logger_enabled(transformer_config):
            log_config_to_disk(transformer_config, locals(), prefix=type(self).__name__)

        self.class_token_len = class_token_len
        self.visual_hidden_size = transformer_config.hidden_size
        self.patch_dim = patch_dim
        self.img_h = img_h
        self.img_w = img_w

        assert self.img_h % self.patch_dim == 0
        assert self.img_w % self.patch_dim == 0
        self.num_patches_per_dim_h = self.img_h // self.patch_dim
        self.num_patches_per_dim_w = self.img_w // self.patch_dim
        self.num_patches = self.num_patches_per_dim_h * self.num_patches_per_dim_w

        self.add_class_token = add_class_token
        self.class_token_len = class_token_len

        self.seq_length = self.num_patches + (self.class_token_len if self.add_class_token else 0)

        self.ln_pre = None
        self.ln_post = None
        self.pg_collection = pg_collection
        self.vp_stage = vp_stage
        if model_subtype == "clip":
            self.ln_pre = build_module(
                ln_pre_impl,
                config=transformer_config,
                hidden_size=self.visual_hidden_size,
                eps=transformer_config.layernorm_epsilon,
            )
            conv_bias = False
            padding = 0
        elif model_subtype == "siglip":
            self.ln_post = build_module(
                ln_post_impl,
                config=transformer_config,
                hidden_size=self.visual_hidden_size,
                eps=transformer_config.layernorm_epsilon,
            )
            conv_bias = True
            padding = "valid"
        elif model_subtype.startswith("internvit"):
            conv_bias = True
            padding = 0
        else:
            raise ValueError(f"unsupported vision model type {model_subtype}")

        self.conv1 = torch.nn.Conv2d(
            in_channels=3,
            out_channels=self.visual_hidden_size,
            kernel_size=self.patch_dim,
            stride=self.patch_dim,
            bias=conv_bias,
            padding=padding,
        )

        self.position_ids = torch.arange(self.seq_length).expand(1, -1).cuda()

        self.position_embeddings = torch.nn.Embedding(
            self.seq_length, self.visual_hidden_size, dtype=transformer_config.params_dtype
        )

        self.add_class_token = add_class_token
        if self.add_class_token:
            self.class_token = torch.nn.Parameter(
                torch.randn(
                    1,
                    self.class_token_len,
                    self.visual_hidden_size,
                    dtype=transformer_config.params_dtype,
                )
            )

        self.model_type = ModelType.encoder_or_decoder

        # Transformer layers.
        # TODO: Make pre_process and post_process configurable.
        # NOTE: a final layer norm and/or linear layer in some implementations are omitted here.
        # They can be added separately where needed.
        self.decoder = TransformerBlock(
            config=transformer_config,
            spec=transformer_layer_spec,
            pre_process=True,
            post_process=False,
            pg_collection=self.pg_collection,
            vp_stage=self.vp_stage,
        )

    def set_input_tensor(self, input_tensor: torch.Tensor) -> None:
        """Sets input tensor to the model.

        Args:
            input_tensor (Tensor): Sets the input tensor for the model.
        """
        self.decoder.set_input_tensor(input_tensor)

    def forward(
        self, x: torch.Tensor, attention_mask: Optional[torch.Tensor] = None
    ) -> torch.Tensor:
        """Forward function of the CLIP ViT Model. This function passes the input tensors
        through the embedding layer and then the transformer.

        Args:
            x (torch.Tensor): input data of shape [batch, img_h, img_w]
            attention_mask (torch.Tensor with dtype=bool): Attention mask to use.

        Returns:
            x (torch.Tensor): output after final transformer block of shape [b, s, h].
        """
        x = self.conv1(x)  # shape = [batch, hidden_size, grid, grid]
        x = x.reshape(x.shape[0], x.shape[1], -1)  # [batch, hidden_size, grid ** 2]
        x = x.permute(0, 2, 1)  # [batch, grid ** 2, hidden_size]

        if self.add_class_token:
            class_token = self.class_token.expand(
                x.shape[0], -1, -1
            )  # [batch, class_token_len, hidden_size]
            x = torch.cat(
                [class_token, x], dim=1
            )  # [batch, grid ** 2 + class_token_len, hidden_size]

        assert x.shape[1] == self.seq_length, f"{x.shape[1]} != {self.seq_length}"
        x = x + self.position_embeddings(self.position_ids)
        if self.ln_pre:
            x = self.ln_pre(x)
        x = x.permute(1, 0, 2)  # [b, s, h] -> [s, b, h]
        # `permute` can make the tensor non-contiguous, breaking pipelining.
        x = x.contiguous()

        x = self.decoder(x, attention_mask)
        x = x.permute(1, 0, 2)  # [s, b, h] -> [b, s, h]
        x = x.contiguous()
        if self.ln_post:
            x = self.ln_post(x)
        return x


def get_num_image_embeddings(
    img_h,
    img_w,
    patch_dim,
    vision_model_type,
    disable_vision_class_token,
    class_token_len,
    pixel_shuffle,
    use_tile_tags=False,
    max_num_tiles=0,
    tokenizer_type=None,
):
    """Get the number of image embeddings per image tile."""
    if vision_model_type == "siglip":
        keep_class_token = False
    elif vision_model_type in ("clip", "internvit", "internvit300M"):
        keep_class_token = not disable_vision_class_token
    elif vision_model_type.startswith("radio"):
        keep_class_token = not disable_vision_class_token
    elif vision_model_type == "cradio-g":
        class_token_len = 8
        keep_class_token = not disable_vision_class_token
    elif vision_model_type.startswith("hf://"):
        from megatron.core.models.huggingface.module import get_hf_model_type

        model_type = get_hf_model_type(vision_model_type)

        if "siglip" in model_type:
            keep_class_token = False
        else:
            raise NotImplementedError(f"unsupported huggingface vision model: {vision_model_type}")
    else:
        raise NotImplementedError(f"unknown vision model type {vision_model_type}")

    num_patches_per_dim_h = img_h // patch_dim
    num_patches_per_dim_w = img_w // patch_dim
    num_patches = num_patches_per_dim_h * num_patches_per_dim_w
    num_image_embeddings_per_tile = num_patches + (class_token_len if keep_class_token else 0)

    if pixel_shuffle:
        num_image_embeddings_per_tile = int(num_image_embeddings_per_tile * (0.5**2))

    if use_tile_tags:
        if tokenizer_type in ("llama3p1", "chatml", "qwen2p0", "qwen2p5"):
            num_image_embeddings_per_tile += 5
        elif tokenizer_type.startswith("nemotron5"):
            num_image_embeddings_per_tile += 6
        else:
            raise ValueError("tokenizer type not defined")

        if 10 < max_num_tiles < 100:
            if tokenizer_type.startswith("qwen"):
                num_image_embeddings_per_tile += 1  # add padding 0
        elif max_num_tiles > 100:
            raise ValueError(f"max number of tiles {max_num_tiles} not supported")

    return num_image_embeddings_per_tile


================================================
FILE: megatron/core/models/vision/multimodal_projector.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from typing import Optional

import torch

from megatron.core.fp8_utils import get_fp8_context
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.typed_torch import apply_module, not_none
from megatron.core.utils import make_viewless_tensor


class MultimodalProjector(MegatronModule):
    """
    MultimodalProjector will take the encoded input with input_size hidden state and project
    it into the hidden size of the language model for multimodal training. When projector is
    type affine linear_fc1 from submodules is used.

    Args:
        transformer_config (TransformerConfig): Transformer config
        submodules (MLPSubmodules): Specifies MLP submodules for mlp type projector
        projector_type (str): Projector type
        input_size (int): Input size from feature encoder
        tp_group (torch.distributed.ProcessGroup): Tensor parallel group
    """

    def __init__(
        self,
        config: TransformerConfig,
        submodules: MLPSubmodules,
        projector_type: str,
        input_size: int,
        tp_group: Optional[torch.distributed.ProcessGroup] = None,
    ):
        super().__init__(config=config)
        self.projector_type = projector_type

        assert submodules is not None, "MLPSubmodules must be provided"

        fp8_init_context = get_fp8_context(config, 0, is_init=True)
        with fp8_init_context:
            if self.projector_type == "mlp":
                self.encoder = MLP(
                    config=config, submodules=submodules, input_size=input_size, tp_group=tp_group
                )
            elif self.projector_type == "affine":
                self.encoder = submodules.linear_fc1(
                    input_size,
                    config.hidden_size,
                    config=config,
                    init_method=not_none(config.init_method),
                    gather_output=True,
                    bias=config.add_bias_linear,
                    skip_bias_add=True,
                    is_expert=False,
                    tp_comm_buffer_name=None,
                    tp_group=tp_group,
                )
            else:
                raise Exception(f"Unsupported multimodal projection type {self.projector_type}")

    def forward(self, hidden_states):
        """Run multimodal projector.

        Args:
            hidden_states (torch.Tensor): Input.

        Returns:
            torch.Tensor: The projected output.
        """
        fp8_context = get_fp8_context(self.config)
        with fp8_context:
            # Run encoder.
            encoder_output, encoder_output_bias = apply_module(self.encoder)(hidden_states)

            if encoder_output_bias is not None:
                encoder_output = encoder_output + encoder_output_bias

            # the encoder produces "viewed" tensor. This will result in schedule.py's
            # deallocate_output_tensor() throwing an error, so a viewless tensor is
            # created to prevent this.
            encoder_output = make_viewless_tensor(
                inp=encoder_output, requires_grad=True, keep_graph=True
            )

        return encoder_output


================================================
FILE: megatron/core/models/vision/radio.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import math
from typing import Optional, Tuple, Union

import torch
import torch.nn.functional as F
from torch import nn

from megatron.core.config_logger import has_config_logger_enabled, log_config_to_disk
from megatron.core.models.common.vision_module.vision_module import VisionModule
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.tensor_parallel.layers import ColumnParallelLinear
from megatron.core.transformer.enums import ModelType
from megatron.core.transformer.spec_utils import ModuleSpec, build_module
from megatron.core.transformer.transformer_block import TransformerBlock
from megatron.core.transformer.transformer_config import TransformerConfig

# RADIO reference code: https://github.com/NVlabs/RADIO

try:
    from einops import rearrange

    HAVE_EINOPS = True
except ImportError:
    HAVE_EINOPS = False


class RADIOViTModel(VisionModule):
    """RADIO ViT vision model.

    Args:
        transformer_config (TransformerConfig): Transformer config.
        transformer_layer_spec (ModuleSpec): Specifies module to use for transformer layers.
        ln_pre_impl (ModuleSpec or type): Specifies the layer norm type to use for ln_pre.
        ln_post_impl (ModuleSpec or type): Specifies the layer norm type to use for ln_post.
        use_mask_token (bool, optional): Whether to use RADIO mask token. Default to False.
        add_class_token (bool, optional): Include a class token. Defaults to True.
        class_token_len (int): Class token length. Defaults to 1 but 8 may be faster.
        patch_dim (int): Image patch size.
        img_h (int): Input image height.
        img_w (int): Input image width.
        max_img_h (int): Max input image height.
        max_img_w (int): Max input image width.
        pos_dropout (int): Positional encoding dropout value. Defaults to 0.
        has_cpe: (bool): Whether to use conditional positional encoding. Defaults to True.
        embedder_bias: (bool): Bias in embedder linear. Defaults to False.
    """

    def __init__(
        self,
        transformer_config: TransformerConfig,
        transformer_layer_spec: ModuleSpec,
        ln_pre_impl: Union[ModuleSpec, type] = None,
        ln_post_impl: Union[ModuleSpec, type] = None,
        use_mask_token: bool = False,
        add_class_token: bool = True,
        class_token_len: int = 8,
        patch_dim: int = 16,
        img_h: int = 224,
        img_w: int = 224,
        max_img_h: int = 2048,
        max_img_w: int = 2048,
        pos_dropout: int = 0,
        has_cpe: bool = True,
        embedder_bias: bool = False,
        pg_collection: Optional[ProcessGroupCollection] = None,
        vp_stage: Optional[int] = None,
    ) -> None:
        super().__init__(config=transformer_config)

        if has_config_logger_enabled(transformer_config):
            log_config_to_disk(transformer_config, locals(), prefix=type(self).__name__)

        self.class_token_len = class_token_len
        self.visual_hidden_size = transformer_config.hidden_size
        self.patch_dim = patch_dim
        self.img_h = img_h
        self.img_w = img_w

        assert self.img_h % self.patch_dim == 0
        assert self.img_w % self.patch_dim == 0

        self.input_dims = (img_h // patch_dim, img_w // patch_dim)

        # used for positional embedding
        self.max_img_h = max_img_h
        self.max_img_w = max_img_w
        self.max_num_rows = max_img_h // patch_dim
        self.max_num_cols = max_img_w // patch_dim
        self.max_num_patches = self.max_num_rows * self.max_num_cols

        # TODO: are we actually going to use this anywhere?
        self.use_mask_token = use_mask_token
        if self.use_mask_token:
            self.mask_token = nn.Parameter(torch.zeros(1, self.visual_hidden_size))

        self.add_class_token = add_class_token
        self.class_token_len = class_token_len
        if self.add_class_token:
            self.class_token = nn.Parameter(
                torch.randn(
                    self.class_token_len,
                    self.visual_hidden_size,
                    dtype=transformer_config.params_dtype,
                )
            )
            if transformer_config.fp8:
                self.register_load_state_dict_pre_hook(fp8_pad_hook)

        self.seq_length = (img_h // self.patch_dim) * (img_w // self.patch_dim) + (
            self.class_token_len if self.add_class_token else 0
        )

        pos_scale = self.visual_hidden_size**-0.5
        self.position_embeddings = nn.Parameter(
            torch.randn(
                1,
                self.max_num_patches,
                self.visual_hidden_size,
                dtype=transformer_config.params_dtype,
            )
            * pos_scale
        )
        self.pos_dropout = pos_dropout
        self.has_cpe = has_cpe

        # Using non-TE version so we can force gather_output
        self.embedder = ColumnParallelLinear(
            input_size=3 * self.patch_dim * self.patch_dim,
            output_size=self.visual_hidden_size,
            bias=embedder_bias,
            config=transformer_config,
            gather_output=True,
            init_method=lambda tensor: torch.nn.init.normal_(tensor, mean=0.0, std=1.0),
        )

        self.model_type = ModelType.encoder_or_decoder

        self.ln_pre = None
        self.ln_post = None
        self.pg_collection = pg_collection
        self.vp_stage = vp_stage
        if ln_pre_impl is not None:
            self.ln_pre = build_module(
                ln_pre_impl,
                config=transformer_config,
                hidden_size=self.visual_hidden_size,
                eps=transformer_config.layernorm_epsilon,
            )
        if ln_post_impl is not None:
            self.ln_post = build_module(
                ln_post_impl,
                config=transformer_config,
                hidden_size=self.visual_hidden_size,
                eps=transformer_config.layernorm_epsilon,
            )

        self.decoder = TransformerBlock(
            config=transformer_config,
            spec=transformer_layer_spec,
            pre_process=True,
            post_process=False,
            pg_collection=self.pg_collection,
            vp_stage=self.vp_stage,
        )

    def set_input_tensor(self, input_tensor: torch.Tensor) -> None:
        """Sets input tensor to the model.

        Args:
            input_tensor (Tensor): Sets the input tensor for the model.
        """
        self.decoder.set_input_tensor(input_tensor)

    def forward(
        self, x: torch.Tensor, attention_mask: Optional[torch.Tensor] = None
    ) -> torch.Tensor:
        """Forward function of the RADIO ViT Model. This function passes the input tensors
        through the embedding layer and then the transformer.

        Args:
            x (torch.Tensor): input data of shape [batch, img_h, img_w]
            attention_mask (torch.Tensor with dtype=bool): Attention mask to use.

        Returns:
            x (torch.Tensor): output after final transformer block of shape [b, s, h].
        """

        if not HAVE_EINOPS:
            raise ImportError(
                "einops is required for RADIOViTModel, please install it with `pip install einops`"
            )

        input_size = x.shape[2:]
        py = x.shape[-2] // self.patch_dim
        px = x.shape[-1] // self.patch_dim
        x = rearrange(
            x,
            "b c (py yy) (px xx) -> b (py px) (c yy xx)",
            py=py,
            yy=self.patch_dim,
            px=px,
            xx=self.patch_dim,
        )
        x, _ = self.embedder(x)  # [batch, seq_length, hidden_size]

        x, _ = self.apply_pos_enc(x, input_size=input_size)

        if self.add_class_token:
            class_token = self.class_token.expand(
                x.shape[0], -1, -1
            )  # [batch, class_token_len, hidden_size]

            x = torch.cat(
                [class_token, x], dim=1
            )  # [batch, seq_length + class_token_len, hidden_size]

        assert x.shape[1] == self.seq_length, f"{x.shape[1]} != {self.seq_length}"

        if self.ln_pre:
            x = self.ln_pre(x)

        x = x.permute(1, 0, 2)  # [b, s, h] -> [s, b, h]
        x = x.contiguous()

        x = self.decoder(x, attention_mask=attention_mask)

        x = x.permute(1, 0, 2)  # [s, b, h] -> [b, s, h]
        x = x.contiguous()

        if self.ln_post:
            x = self.ln_post(x)

        return x

    def apply_pos_enc(
        self,
        patches: torch.Tensor,
        patch_idxs: Optional[torch.Tensor] = None,
        input_size: Optional[Tuple[int, int]] = None,
    ) -> torch.Tensor:
        """Apply positional encoding to patches"""
        pos_enc = self.get_pos_enc(patches.shape[0], patch_idxs, input_size)

        if self.training and self.pos_dropout > 0:
            keeps = (
                torch.rand(patches.shape[0], 1, 1, dtype=pos_enc.dtype, device=pos_enc.device)
                > self.pos_dropout
            )
            pos_enc_drop = torch.where(keeps, pos_enc, 0)
        else:
            pos_enc_drop = pos_enc

        return patches + pos_enc_drop, pos_enc

    def get_pos_enc(
        self,
        batch_size: int,
        patch_idxs: Optional[torch.Tensor] = None,
        input_size: Optional[Tuple[int, int]] = None,
    ) -> torch.Tensor:
        """Get positional encoding for certain input size"""
        if input_size is None:
            input_dims = self.input_dims
        else:
            input_dims = tuple(d // self.patch_dim for d in input_size)

        pos_embed = self._get_pos_embeddings(batch_size, input_dims)

        if patch_idxs is None:
            return pos_embed

        exp_patch_idxs = patch_idxs.unsqueeze(-1).expand(-1, -1, pos_embed.shape[-1])

        pos_embed = torch.gather(
            pos_embed.expand(patch_idxs.shape[0], -1, -1), dim=1, index=exp_patch_idxs
        )
        return pos_embed

    def _get_pos_embeddings(self, batch_size: int, input_dims: Tuple[int, int]):
        """Get RADIO absolute positional embeddings"""
        if (self.max_num_rows, self.max_num_cols) == input_dims:
            return self.position_embeddings

        pos_embed = self.position_embeddings.reshape(
            1, self.max_num_rows, self.max_num_cols, -1
        ).permute(0, 3, 1, 2)

        def window_select(pos_embed):
            if input_dims[0] < pos_embed.shape[-2]:
                pos_embed = pos_embed[..., : input_dims[0], :]
            if input_dims[1] < pos_embed.shape[-1]:
                pos_embed = pos_embed[..., :, : input_dims[1]]
            return pos_embed

        if self.has_cpe:
            if self.training:
                min_scale = math.sqrt(0.1)
                scale = (
                    torch.rand(batch_size, 1, 1, device=pos_embed.device) * (1 - min_scale)
                    + min_scale
                )
                aspect_min = math.log(3 / 4)
                aspect_max = -aspect_min
                aspect = torch.exp(
                    torch.rand(batch_size, 1, 1, device=pos_embed.device)
                    * (aspect_max - aspect_min)
                    + aspect_min
                )

                scale_x = scale * aspect
                scale_y = scale * (1 / aspect)
                scale_xy = torch.stack([scale_x, scale_y], dim=-1).clamp_(0, 1)

                pos_xy = torch.rand(batch_size, 1, 1, 2, device=pos_embed.device) * (1 - scale_xy)

                lin_x = torch.linspace(0, 1, steps=input_dims[1], device=pos_embed.device)[
                    None, None
                ].expand(batch_size, input_dims[0], -1)
                lin_y = torch.linspace(0, 1, steps=input_dims[0], device=pos_embed.device)[
                    None, :, None
                ].expand(batch_size, -1, input_dims[1])

                lin_xy = torch.stack([lin_x, lin_y], dim=-1)

                grid_xy = lin_xy * scale_xy + pos_xy

                # Convert to [-1, 1] range
                grid_xy.mul_(2).sub_(1)

                pos_embed = F.grid_sample(
                    pos_embed.float().expand(batch_size, -1, -1, -1),
                    grid=grid_xy,
                    mode="bilinear",
                    padding_mode="zeros",
                    align_corners=True,
                ).to(pos_embed.dtype)
            else:
                max_dim = max(input_dims)
                pos_embed = F.interpolate(
                    pos_embed.float(), size=(max_dim, max_dim), align_corners=True, mode="bilinear"
                ).to(pos_embed.dtype)

                pos_embed = window_select(pos_embed)
        else:
            pos_embed = window_select(pos_embed)

        if pos_embed.shape[-2:] != input_dims:
            pos_embed = F.interpolate(
                pos_embed.float(), size=input_dims, align_corners=True, mode="bilinear"
            ).to(pos_embed.dtype)

        pos_embed = pos_embed.flatten(2).permute(0, 2, 1)

        return pos_embed


def fp8_pad_hook(
    module, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
):
    """FP8 requires class token length to be a multiple of 16 (for this model).

    Original model checkpoint may not be padded for FP8 so pad it here.
    """
    if not "vision_model.class_token" in state_dict:
        return

    pad = 32 if module.config.fp8_recipe == "mxfp8" else 16

    class_token = state_dict["vision_model.class_token"]
    if class_token.shape[0] % pad != 0:
        pad_len = pad - (class_token.shape[0] % pad)
        pad_tensor = torch.randn(
            pad_len, class_token.shape[-1], dtype=class_token.dtype, device=class_token.device
        )
        class_token = torch.cat([pad_tensor, class_token], dim=0)
        state_dict["vision_model.class_token"] = class_token

    return


================================================
FILE: megatron/core/models/vision/vit_layer_specs.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from megatron.core.extensions.transformer_engine import (
    TEDotProductAttention,
    TELayerNormColumnParallelLinear,
    TERowParallelLinear,
)
from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
from megatron.core.transformer.dot_product_attention import DotProductAttention
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.identity_op import IdentityOp
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules

try:
    import apex  # pylint: disable=unused-import

    from megatron.core.fusions.fused_layer_norm import FusedLayerNorm

    HAVE_APEX = True
    LNImpl = FusedLayerNorm
except ImportError:
    import warnings

    from megatron.core.transformer.torch_norm import WrappedTorchNorm

    warnings.warn("Apex is not installed. Falling back to Torch Norm")
    LNImpl = WrappedTorchNorm
    HAVE_APEX = False


# Use this spec to use lower level Transformer Engine modules (required for fp8 training)
def get_vit_layer_with_transformer_engine_spec() -> ModuleSpec:
    """
    Returns ViT layer spec with Transformer Engine layers
    """
    mlp = _get_mlp_module_spec(use_te=True)
    return ModuleSpec(
        module=TransformerLayer,
        submodules=TransformerLayerSubmodules(
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": AttnMaskType.no_mask},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=TELayerNormColumnParallelLinear,
                    core_attention=TEDotProductAttention,
                    linear_proj=TERowParallelLinear,
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_mlp_layernorm=IdentityOp,
            mlp=mlp,
            mlp_bda=get_bias_dropout_add,
        ),
    )


def get_vit_layer_with_local_spec() -> ModuleSpec:
    """
    Returns ViT layer spec with Mcore local layers
    """
    mlp = _get_mlp_module_spec(use_te=False)
    return ModuleSpec(
        module=TransformerLayer,
        submodules=TransformerLayerSubmodules(
            input_layernorm=LNImpl,
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": AttnMaskType.causal},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=ColumnParallelLinear,
                    core_attention=DotProductAttention,
                    linear_proj=RowParallelLinear,
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_mlp_layernorm=LNImpl,
            mlp=mlp,
            mlp_bda=get_bias_dropout_add,
        ),
    )


# Helper function to get module spec for MLP/MoE
def _get_mlp_module_spec(use_te: bool = True) -> ModuleSpec:
    # Dense MLP w/ or w/o TE modules.
    return ModuleSpec(
        module=MLP,
        submodules=MLPSubmodules(
            linear_fc1=TELayerNormColumnParallelLinear if use_te else ColumnParallelLinear,
            linear_fc2=TERowParallelLinear if use_te else RowParallelLinear,
        ),
    )


================================================
FILE: megatron/core/msc_utils.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

import logging
from typing import Any

logger = logging.getLogger(__name__)

try:
    import multistorageclient as msc

    _msc_available = True
    logger.info('The multistorageclient package is available.')
except ModuleNotFoundError:
    msc = None
    _msc_available = False


class _FeatureFlag:

    def __init__(self, default: bool = False):
        self._enabled = default

    def enable(self) -> None:
        """Enable the feature flag."""
        self._enabled = True

    def disable(self) -> None:
        """Disable the feature flag."""
        self._enabled = False

    def is_enabled(self) -> bool:
        """Check if the feature flag is enabled."""
        return self._enabled and msc is not None

    def import_package(self) -> Any:
        """Import the package."""
        if msc is None:
            raise RuntimeError(
                "The multistorageclient package is not available. "
                "Please install it using `pip install multi-storage-client`."
            )
        if not self.is_enabled():
            raise RuntimeError(
                "The MSC feature is disabled. Please enable by removing the --disable-msc argument."
            )
        return msc

    def __getstate__(self):
        """Get the state for pickling."""
        return {'_enabled': self._enabled}

    def __setstate__(self, state):
        """Set the state during unpickling."""
        self._enabled = state['_enabled']


MultiStorageClientFeature = _FeatureFlag(_msc_available)


def open_file(*args, **kwargs):
    """Open a file with the appropriate method based on whether MSC is enabled."""
    if MultiStorageClientFeature.is_enabled():
        msc = MultiStorageClientFeature.import_package()
        return msc.open(*args, **kwargs)
    else:
        return open(*args, **kwargs)


__all__ = ['MultiStorageClientFeature', 'open_file']


================================================
FILE: megatron/core/nccl_allocator.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
import logging
import os
from contextlib import nullcontext
from functools import lru_cache

import torch

# This import is needed for the cpp extension to work.
# pylint: disable=unused-import
from torch.utils import cpp_extension

from megatron.core.utils import is_torch_min_version, log_single_rank

logger = logging.getLogger(__name__)

# MCORE NCCL Allocator copies and modifies the APEX NCCL allocator.
# The original APEX NCCL allocator is available at:
# https://github.com/NVIDIA/apex/blob/master/apex/contrib/nccl_allocator.py
# https://github.com/NVIDIA/apex/blob/master/apex/contrib/csrc/nccl_allocator/NCCLAllocator.cpp

_allocator = None


def _build_nccl_allocator():
    global _allocator
    # If the allocator is already built, return
    if _allocator is not None:
        return

    nccl_allocator_source = """
    #include <c10/cuda/CUDACachingAllocator.h>
    #include <c10/util/Exception.h>
    #include <torch/csrc/cuda/CUDAPluggableAllocator.h>
    #include <torch/extension.h>

    #include <nccl.h>
    #include <iostream>
    #include <cstdio>

    extern "C" {
        #define NCCL_CHECK(cmd) do { \
        ncclResult_t r = cmd; \
        if (r != ncclSuccess) { \
            printf("Failed, NCCL error %s:%d '%s':", \
                __FILE__,__LINE__,ncclGetErrorString(r)); \
            exit(EXIT_FAILURE); \
        } \
        } while(0)

        void* nccl_alloc_plug(size_t size, int device, void* stream) {
            void* ptr;
            NCCL_CHECK(ncclMemAlloc(&ptr, size));
            return ptr;
        }

        void nccl_free_plug(void* ptr, size_t size, int device, void* stream) {
            NCCL_CHECK(ncclMemFree(ptr));
        }

        std::shared_ptr<c10::cuda::CUDACachingAllocator::CUDAAllocator> nccl_allocator;

        void maybe_init() {
            if (!nccl_allocator) {
                nccl_allocator = std::make_shared<
                    torch::cuda::CUDAPluggableAllocator::CUDAPluggableAllocator>(
                    nccl_alloc_plug, nccl_free_plug);
            }
        }

        std::shared_ptr<c10::cuda::CUDACachingAllocator::CUDAAllocator>
        get_nccl_allocator() {
        maybe_init();
        return nccl_allocator;
        }

        PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
        m.def("get_nccl_allocator", []() { return get_nccl_allocator(); });
        };
    }
    """
    module_dir = os.path.dirname(__file__)
    source_dir = os.path.join(module_dir, "build")
    nccl_allocator_libname = "nccl_allocator"
    os.makedirs(source_dir, exist_ok=True)

    nccl_allocator = torch.utils.cpp_extension.load_inline(
        name=nccl_allocator_libname,
        cpp_sources=nccl_allocator_source,
        with_cuda=True,
        extra_ldflags=["-lnccl"],
        verbose=True,
        is_python_module=True,
        build_directory=source_dir,
    )

    _allocator = nccl_allocator.get_nccl_allocator()


@lru_cache(maxsize=None)
def get_func_args(func):
    """
    Get the argument names of a function.
    """
    import inspect

    sig = inspect.signature(func)
    return [arg.name for arg in sig.parameters.values()]


def create_nccl_mem_pool(symmetric=None):  # symmetric: bool | None = None -> torch.cuda.MemPool:
    """
    Create a memory pool using the NCCL allocator.
    """
    _build_nccl_allocator()
    if not is_torch_min_version("2.9.0a0") and symmetric is True:
        logging.info(
            f"Symmetric memory pool is not supported with torch version < 2.9.0a0"
            f"Current torch version: {torch.__version__}"
            "falling back to non-symmetric memory pool"
        )
        symmetric = False

    assert _allocator is not None, "NCCL allocator is not initialized"
    if not symmetric:
        _pool = torch.cuda.MemPool(_allocator)
    else:
        if 'symmetric' in get_func_args(torch.cuda.MemPool):
            # The PyTorch version >= 2.9.0a0 and before PyTorch PR #161238,
            # The symmetric knob should passed to the MemPool constructor.
            # Since PyTorch PR #161238 symmetric knob is now in registration function.
            _pool = torch.cuda.MemPool(_allocator, symmetric=symmetric)
        elif 'symm_mem' in get_func_args(torch.cuda.MemPool):
            # This path handles argument name divergence between
            # nvidia pytorch and the official pytorch.
            _pool = torch.cuda.MemPool(_allocator, symm_mem=symmetric)
        else:
            # This path handles the case where the symmetric knob is in the registration function.
            _pool = torch.cuda.MemPool(_allocator)
    return _pool


def init() -> None:
    """
    Initialize the NCCL allocator.

    PyTorch tracks memory registration at the pool level, not per allocation.
    If a pool already contains allocations from a previous context, attempting
    to register it again will re-register all existing allocations and may
    trigger NCCL errors. To avoid this, the pool is explicitly deregistered
    on entry and re-registered on exit for each context use.
    """
    # Enables NCCL NVLS algorithm
    os.environ["NCCL_NVLS_ENABLE"] = "1"
    # Disables the use of the tensor register allocator hook
    os.environ["TORCH_NCCL_USE_TENSOR_REGISTER_ALLOCATOR_HOOK"] = "0"
    _build_nccl_allocator()
    log_single_rank(logger, logging.INFO, "[MCORE][NCCL_ALLOCATOR] Initialized NCCL Allocator")


# register_mem_pool/deregister_mem_pool are used for manual (de)registration of the memory pool.
# They are used in the case of FSDP manual registration.
def register_mem_pool(pool, group, symmetric=True):
    """
    Register a memory pool to a group.
    symmetric: bool, this is for future use.
    """
    backend = group._get_backend(torch.device("cuda", torch.cuda.current_device()))
    if symmetric:
        try:
            backend.register_mem_pool(pool, symm=symmetric)
        except TypeError:
            # Older PyTorch/APIs without 'symm' keyword.
            log_single_rank(
                logger,
                logging.WARNING,
                "[MCORE][NCCL_ALLOCATOR] Failed in symmetric registration. "
                "Falling back to registration api without 'symm' keyword!!",
            )
            backend.register_mem_pool(pool)
    else:
        backend.register_mem_pool(pool)


def deregister_mem_pool(pool, group):
    """
    Deregister a memory pool from a group.
    """
    backend = group._get_backend(torch.device("cuda", torch.cuda.current_device()))
    if pool.snapshot():
        backend.deregister_mem_pool(pool)


# Preserve the original APEX NCCL allocator interface for backward compatibility
class nccl_mem:
    """
    An NCCL memory allocator, which inherits APEX nccl_allocator implementation.
    """

    def __init__(self, pool, enabled=True, device=None, group=None, symmetric=True):
        self.device = None
        self.group = None
        self.mem_context = None
        self.pool = pool
        self.symmetric = symmetric

        if enabled:
            if device is None:
                self.device = torch.device("cuda", torch.cuda.current_device())
            elif isinstance(device, int):
                self.device = torch.device("cuda", device)
            elif isinstance(device, str):
                assert "cuda" in device, "only cuda devices are supported"
                self.device = torch.device(device)

            if group is None:
                self.group = torch.distributed.distributed_c10d._get_default_group()
            else:
                self.group = group

            self.mem_context = torch.cuda.use_mem_pool(self.pool)
        else:
            self.mem_context = nullcontext()

    def __enter__(self):
        self.mem_context.__enter__()
        if self.group is not None:
            # If the pool is not empty, deregister the pool from the group.
            if self.pool.snapshot():
                backend = self.group._get_backend(self.device)
                try:
                    # Deregister first to avoid duplicate registration of previously
                    # registered memory.
                    backend.deregister_mem_pool(self.pool)
                except RuntimeError:
                    desc = getattr(self.group, "group_desc", None)
                    log_single_rank(
                        logger,
                        logging.WARNING,
                        f"[MCORE][NCCL_ALLOCATOR] Failed to deregister mem pool from"
                        f"{repr(self.group)}({desc}) group!!",
                    )

    def __exit__(self, *args):
        if self.group is not None:
            backend = self.group._get_backend(self.device)
            try:
                # Prefer attempting symmetric registration first; fall back if unsupported.
                if self.symmetric:
                    try:
                        # Since PyTorch PR #161238 symmetric knob is now in registration function.
                        backend.register_mem_pool(self.pool, symm=self.symmetric)
                    except TypeError:
                        # Older PyTorch/APIs without 'symm' keyword.
                        log_single_rank(
                            logger,
                            logging.WARNING,
                            "[MCORE][NCCL_ALLOCATOR] Failed in symmetric registration. "
                            "Falling back to non-symmetric registration!!",
                        )
                        backend.register_mem_pool(self.pool)
                else:
                    backend.register_mem_pool(self.pool)
            except RuntimeError:
                desc = getattr(self.group, "group_desc", None)
                log_single_rank(
                    logger,
                    logging.WARNING,
                    f"[MCORE][NCCL_ALLOCATOR] Failed to register mem pool to"
                    f"{repr(self.group)}({desc}) group!!",
                )

        self.mem_context.__exit__(*args)


class MultiGroupMemPoolAllocator:
    """
    A custom allocator class that registers a single memory pool with multiple communication groups.

    Use cases:
    - [FSDP+EP] In case of FSDP with EP, expert layer (expert-dp) and non-expert layer (dp) use
      different communicator groups. The same memory pool has to be registered to both the groups.
    - [Hybrid FSDP/DP] In case of Hybrid FSDP/DP, there are inter-dp group and intra-dp group.
      The same memory pool has to be registered to both the groups.
    - [Hybrid FSDP/DP + EP] In case of Hybrid FSDP/DP + EP, there are inter-dp, intra-dp, and
      expert-dp groups. The same memory pool has to be registered to all the groups.

    Example:
        ```
        import megatron.core.nccl_allocator as nccl_allocator
        nccl_allocator.init()
        pool = nccl_allocator.create_nccl_mem_pool()
        group_1 = torch.distributed.new_group(ranks=[0, 1, 2, 3, 4, 5, 6, 7], backend="nccl")
        group_2 = torch.distributed.new_group(ranks=[0, 2, 4, 6], backend="nccl")
        with MultiGroupMemPoolAllocator(pool, [group_1, group_2]):
            a = torch.zeros(1024, dtype=torch.float32, device="cuda")
            b = torch.zeros(1024, dtype=torch.float32, device="cuda")
        ```
    """

    def __init__(
        self, pool, groups, symmetric=True
    ):  # pool: torch.cuda.MemPool, groups: List[torch.distributed.ProcessGroup]
        self.pool = pool
        self.groups = groups
        self.mem_context = torch.cuda.use_mem_pool(self.pool)
        self.symmetric = symmetric

        assert isinstance(self.pool, torch.cuda.MemPool), "pool must be a torch.cuda.MemPool"
        assert isinstance(self.groups, list), "groups must be a list"
        assert all(
            isinstance(group, torch.distributed.ProcessGroup) for group in self.groups
        ), "groups must be a list of torch.distributed.ProcessGroup"

    def __enter__(self):
        self.mem_context.__enter__()
        # If the pool is not empty, deregister the pool from all the groups.
        if self.pool.snapshot():
            for group in self.groups:
                backend = group._get_backend(torch.device("cuda", torch.cuda.current_device()))
                try:
                    # Since the registration is done in mempool granularity, we need to deregister
                    # the tensors in the mempool and re-register the mempool including
                    # the newly created tensors after the context is exited.
                    backend.deregister_mem_pool(self.pool)
                except RuntimeError:
                    desc = getattr(group, "group_desc", None)
                    log_single_rank(
                        logger,
                        logging.WARNING,
                        f"[MCORE][MultiGroupMemPoolAllocator] Failed to deregister mem pool from"
                        f"{repr(group)}({desc}) group!!",
                    )

    def __exit__(self, *args):
        for group in self.groups:
            backend = group._get_backend(torch.device("cuda", torch.cuda.current_device()))
            try:
                # Prefer attempting symmetric registration first; fall back if unsupported.
                if self.symmetric:
                    try:
                        # Since PyTorch PR #161238 symmetric knob is now in registration function.
                        backend.register_mem_pool(self.pool, symm=self.symmetric)
                    except TypeError:
                        # Older PyTorch/APIs without 'symm' keyword.
                        log_single_rank(
                            logger,
                            logging.WARNING,
                            "[MCORE][MultiGroupMemPoolAllocator] "
                            "Failed in symmetric registration. "
                            "Falling back to non-symmetric registration!!",
                        )
                        backend.register_mem_pool(self.pool)
                else:
                    backend.register_mem_pool(self.pool)
            except RuntimeError:
                desc = getattr(group, "group_desc", None)
                log_single_rank(
                    logger,
                    logging.WARNING,
                    f"[MCORE][MultiGroupMemPoolAllocator] Failed to register mem pool to"
                    f"{repr(group)}({desc}) group!!",
                )
        self.mem_context.__exit__(*args)


class MemPoolAllocatorWithoutRegistration:
    """
    An allocator class that uses allocates memory without registering to any communication group.
    Users are expected to register the memory manually to the communication groups.
    """

    def __init__(self, pool):
        self.pool = pool
        self.mem_context = torch.cuda.use_mem_pool(self.pool)

    def __enter__(self):
        self.mem_context.__enter__()

    def __exit__(self, *args):
        self.mem_context.__exit__(*args)


================================================
FILE: megatron/core/num_microbatches_calculator.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""Megatron Core number of microbatches calculators."""

import logging
from abc import ABC, abstractmethod
from typing import List, Optional, Union

logger = logging.getLogger(__name__)

# TODO: global_var merge into mcore?
_GLOBAL_NUM_MICROBATCHES_CALCULATOR: Union[
    'ConstantNumMicroBatchesCalculator', 'RampupBatchsizeNumMicroBatchesCalculator'
] = None


def get_num_microbatches() -> int:
    """Get number of microbatches."""
    return _GLOBAL_NUM_MICROBATCHES_CALCULATOR.get()


def get_current_global_batch_size() -> int:
    """Get current global batch size."""
    return _GLOBAL_NUM_MICROBATCHES_CALCULATOR.get_current_global_batch_size()


def get_micro_batch_size() -> int:
    """Get micro batch size."""
    return _GLOBAL_NUM_MICROBATCHES_CALCULATOR.get_micro_batch_size()


def get_current_running_global_batch_size() -> int:
    """Get current running global batch size, taking into account number of DP replicas might be
    incompatible with true global batch size if `decrease_batch_size_if_needed` is True."""
    return _GLOBAL_NUM_MICROBATCHES_CALCULATOR.get_current_running_global_batch_size()


def update_num_microbatches(
    consumed_samples: int, consistency_check: bool = True, verbose: bool = False
) -> None:
    """Update number of microbatches.

    Args:
        consumed_samples (int):
            Number of samples consumed.
        consistency_check (bool, optional):
            Option to check current schedule's consistency. Defaults to True.
        verbose (bool, optional):
            Option to control logging. Defaults to False.
    """
    _GLOBAL_NUM_MICROBATCHES_CALCULATOR.update(consumed_samples, consistency_check, verbose)


def unset_num_microbatches_calculator():
    """Unset microbatches calculator.

    Useful for multiple runs. See `tests/unit_tests/ckpt_converter/test_ckpt_converter.py`
    for an example.
    """
    global _GLOBAL_NUM_MICROBATCHES_CALCULATOR
    _GLOBAL_NUM_MICROBATCHES_CALCULATOR = None


def init_num_microbatches_calculator(
    rank: int,
    rampup_batch_size: Optional[List[int]],
    global_batch_size: int,
    micro_batch_size: int,
    data_parallel_size: int,
    decrease_batch_size_if_needed: bool = False,
) -> None:
    """Initialize number of microbatches calculator. Supporting backward compatibility.

    Args:
        rank (int):
            Rank of the GPU, only rank 0 will log the information.
        rampup_batch_size (Optional[List[int]]):
            Rampup batch size, should be in format of [start_global_batch_size,
            batch_size_increment, ramup_samples].
        global_batch_size (int):
            Global batch size for the model.
        micro_batch_size (int):
            Micro batch size at initialization.
        data_parallel_size (int):
            Data parallel size.
        decrease_batch_size_if_needed (bool, optional):
            If true, scale down batch size to ensure divisibility by DP size * microbatch size.
            Defaults to False.
    """
    _configure_global_num_microbatches_calculator(
        rank,
        rampup_batch_size,
        global_batch_size,
        micro_batch_size,
        data_parallel_size,
        decrease_batch_size_if_needed,
        init=True,
    )


def destroy_num_microbatches_calculator():
    """Destroy number of microbatches calculator."""
    global _GLOBAL_NUM_MICROBATCHES_CALCULATOR
    _GLOBAL_NUM_MICROBATCHES_CALCULATOR = None


def reconfigure_num_microbatches_calculator(
    rank: int,
    rampup_batch_size: Optional[List[int]],
    global_batch_size: int,
    micro_batch_size: int,
    data_parallel_size: int,
    decrease_batch_size_if_needed: bool = False,
) -> None:
    """Reconfigure number of microbatches calculator. Supporting backward compatibility.

    Args:
        rank (int):
            Rank of the GPU, only rank 0 will log the information.
        rampup_batch_size (Optional[List[int]]):
            Rampup batch size, should be in format of
            [start_global_batch_size, batch_size_increment, ramup_samples].
        global_batch_size (int):
            Global batch size for the model.
        micro_batch_size (int):
            Micro batch size at initialization.
        data_parallel_size (int):
            Data parallel size.
        decrease_batch_size_if_needed (bool, optional):
            If true, scale down batch size to ensure divisibility by DP size * microbatch size.
            Defaults to False.
    """
    _configure_global_num_microbatches_calculator(
        rank,
        rampup_batch_size,
        global_batch_size,
        micro_batch_size,
        data_parallel_size,
        decrease_batch_size_if_needed,
        init=False,
    )


def _configure_global_num_microbatches_calculator(
    rank: int,
    rampup_batch_size: Optional[List[int]],
    global_batch_size: int,
    micro_batch_size: int,
    data_parallel_size: int,
    decrease_batch_size_if_needed: bool = False,
    init: bool = False,
) -> None:
    """Configure number of microbatches calculator. Can be used for initialization and
    reconfiguration.

    Args:
        rank (int):
            Rank of the GPU, only rank 0 will log the information.
        rampup_batch_size (Optional[List[int]]):
            Rampup batch size, should be in format of
            [start_global_batch_size, batch_size_increment, ramup_samples].
        global_batch_size (int):
            Global batch size for the model.
        micro_batch_size (int):
            Micro batch size at initialization.
        data_parallel_size (int):
            Data parallel size.
        decrease_batch_size_if_needed (bool, optional):
            If true, scale down batch size to ensure divisibility by DP size * microbatch size.
            Defaults to False.
        init (bool, optional):
            If true, initialize the calculator. Defaults to False.
    """
    global _GLOBAL_NUM_MICROBATCHES_CALCULATOR

    if init:
        assert (
            _GLOBAL_NUM_MICROBATCHES_CALCULATOR is None
        ), 'num microbatches calculator is already initialized.'

    _GLOBAL_NUM_MICROBATCHES_CALCULATOR = _build_num_microbatches_calculator(
        rank,
        rampup_batch_size,
        global_batch_size,
        micro_batch_size,
        data_parallel_size,
        decrease_batch_size_if_needed,
    )


def _build_num_microbatches_calculator(
    rank: int,
    rampup_batch_size: Optional[List[int]],
    global_batch_size: int,
    micro_batch_size: int,
    data_parallel_size: int,
    decrease_batch_size_if_needed: bool,
) -> Union['ConstantNumMicroBatchesCalculator', 'RampupBatchsizeNumMicroBatchesCalculator']:
    """Build number of microbatches calculator. Internal helper method.

    Args:
        rank (int):
            Rank of the GPU, only rank 0 will log the information.
        rampup_batch_size (Optional[List[int]]):
            Rampup batch size, should be in format of
            [start_global_batch_size, batch_size_increment, ramup_samples].
        global_batch_size (int):
            Global batch size for the model.
        micro_batch_size (int):
            Micro batch size at initialization.
        data_parallel_size (int):
            Data parallel size.
        decrease_batch_size_if_needed (bool):
            If true, scale down batch size to ensure divisibility by DP size * microbatch size.

    """

    # Constant batch size.
    if rampup_batch_size is None:
        num_microbatches_calculator = ConstantNumMicroBatchesCalculator(
            global_batch_size,
            micro_batch_size,
            data_parallel_size,
            decrease_batch_size_if_needed,
            rank,
        )
        if rank == 0:
            logger.info(
                f'setting number of microbatches to constant {num_microbatches_calculator.get()}'
            )
    # Batch size ramp up.
    else:
        assert len(rampup_batch_size) == 3, (
            'expected the following '
            'format: --rampup-batch-size <start batch size> '
            '<batch size incerement> <ramp-up samples>'
        )
        start_global_batch_size = int(rampup_batch_size[0])
        batch_size_increment = int(rampup_batch_size[1])
        ramup_samples = int(rampup_batch_size[2])
        if rank == 0:
            logger.info(
                f'will use batch size rampup starting from global batch size '
                f'{start_global_batch_size} to global batch size {global_batch_size} with batch'
                f'size increments {batch_size_increment} over {ramup_samples} samples.'
            )
        num_microbatches_calculator = RampupBatchsizeNumMicroBatchesCalculator(
            global_batch_size,
            micro_batch_size,
            data_parallel_size,
            decrease_batch_size_if_needed,
            rank,
            start_global_batch_size,
            batch_size_increment,
            ramup_samples,
        )

    return num_microbatches_calculator


def _round(batch_size: int, divisor: int) -> int:
    """Round `batch_size` down to nearest batch size divisible by `divisor`."""
    return (batch_size // divisor) * divisor


class NumMicroBatchesCalculator(ABC):
    """Base class for number of microbatches calculator."""

    def __init__(self) -> None:
        self.num_micro_batches = None
        self.current_global_batch_size = None
        self.micro_batch_size = None
        self.current_running_global_batch_size = None

    def get(self) -> int:
        """Get number of microbatches."""
        return self.num_micro_batches

    def get_current_global_batch_size(self) -> int:
        """Get current global batch size."""
        return self.current_global_batch_size

    def get_micro_batch_size(self) -> int:
        """Get current global batch size."""
        return self.micro_batch_size

    def get_current_running_global_batch_size(self) -> int:
        """Get current running global batch size. If decrease_batch_size_if_needed is False,
        this just equals global batch size."""
        return self.current_running_global_batch_size

    @abstractmethod
    def update(self, consumed_samples, consistency_check, verbose=False) -> None:
        """Update number of microbatches depending on batch size rampup."""
        pass


class ConstantNumMicroBatchesCalculator(NumMicroBatchesCalculator):
    """Calculator of number of microbatches with constant global batch size.

    Args:
        global_batch_size (int):
            Global batch size.
        micro_batch_size (int):
            Micro batch size.
        data_parallel_size (int):
            Data parallel size.
        decrease_batch_size_if_needed (bool):
            If true, decrease batch size to ensure divisibility by DP size * microbatch size
            (if needed).
        rank (int):
            Rank (to determine whether logging should be performed).
    """

    def __init__(
        self,
        global_batch_size: int,
        micro_batch_size: int,
        data_parallel_size: int,
        decrease_batch_size_if_needed: bool,
        rank: int,
    ) -> None:

        micro_batch_times_data_parallel_size = micro_batch_size * data_parallel_size
        if decrease_batch_size_if_needed:
            running_global_batch_size = _round(
                global_batch_size, micro_batch_times_data_parallel_size
            )
            assert running_global_batch_size % micro_batch_times_data_parallel_size == 0
            if rank == 0:
                logger.info(
                    f'decreasing batch size from {global_batch_size} to {running_global_batch_size}'
                    f'to keep divisiblity by micro_batch_size={micro_batch_size} * '
                    f'data_parallel_size={data_parallel_size}'
                )
            self.num_micro_batches = (
                running_global_batch_size // micro_batch_times_data_parallel_size
            )
        else:
            assert global_batch_size % micro_batch_times_data_parallel_size == 0, (
                'global batch size ({}) is not divisible by micro batch size ({})'
                ' times data parallel size ({})'.format(
                    global_batch_size, micro_batch_size, data_parallel_size
                )
            )
            running_global_batch_size = global_batch_size
            self.num_micro_batches = global_batch_size // micro_batch_times_data_parallel_size
        assert (
            self.num_micro_batches >= 1
        ), 'number of microbatches should be at least 1, got {}.'.format(self.num_micro_batches)

        self.current_global_batch_size = global_batch_size
        self.current_running_global_batch_size = running_global_batch_size
        self.micro_batch_size = micro_batch_size

    def update(self, consumed_samples, consistency_check, verbose=False) -> None:
        pass


class RampupBatchsizeNumMicroBatchesCalculator(NumMicroBatchesCalculator):
    """Calculator of number of microbatches with batch size rampup.
    Over `steps = (global-batch-size - start-batch-size) / batch_size_increment` increment batch
    size from start-batch-size to global-batch-size using rampup-samples / steps
    samples.

    Args:
        global_batch_size (int):
            Global batch size post rampup.
        micro_batch_size (int):
            Micro batch size.
        data_parallel_size (int):
            Data parallel size.
        decrease_batch_size_if_needed (bool):
            If true, decrease batch size to ensure divisibility by DP size * microbatch size
            (if needed).
        rank (int):
            Rank (to determine whether logging should be performed).
        start_global_batch_size (int):
            Global batch size to start with.
        batch_size_increment (int):
            Global batch size increments.
        ramup_samples (int):
            Number of samples to use ramp up global
            batch size from `start_global_batch_size` to `global_batch_size`.
    """

    def __init__(
        self,
        global_batch_size: int,
        micro_batch_size: int,
        data_parallel_size: int,
        decrease_batch_size_if_needed: bool,
        rank: int,
        start_global_batch_size: int,
        batch_size_increment: int,
        ramup_samples: int,
    ) -> None:
        assert global_batch_size > 0, 'global batch size should be positive, got {}.'.format(
            global_batch_size
        )
        assert start_global_batch_size > 0, 'start batch size should be positive, got {}.'.format(
            start_global_batch_size
        )
        assert batch_size_increment > 0, 'batch size increment should be positive, got {}.'.format(
            batch_size_increment
        )
        assert ramup_samples >= 0, 'ramp-up samples should be non-negative, got {}.'.format(
            ramup_samples
        )

        self.global_batch_size = global_batch_size
        self.micro_batch_size = micro_batch_size
        self.data_parallel_size = data_parallel_size
        self.decrease_batch_size_if_needed = decrease_batch_size_if_needed
        self.rank = rank
        self.start_global_batch_size = start_global_batch_size
        self.batch_size_increment = batch_size_increment
        self.ramup_samples = ramup_samples

        self.micro_batch_times_data_parallel_size = self.micro_batch_size * self.data_parallel_size
        assert self.micro_batch_times_data_parallel_size > 0
        self.current_global_batch_size = None

        diff_batch_size = self.global_batch_size - self.start_global_batch_size
        assert diff_batch_size >= 0, (
            'expected global batch size to be greater than or equal to start batch size, '
            f'got {self.global_batch_size} and {self.start_global_batch_size}'
        )
        assert diff_batch_size % batch_size_increment == 0, (
            'expected '
            f'global batch size interval ({diff_batch_size}) to be divisible by global batch '
            f'size increment ({batch_size_increment})'
        )

        num_increments = diff_batch_size // self.batch_size_increment
        self.rampup_samples_per_increment = self.ramup_samples / num_increments

        # Initialize number of microbatches.
        self.update(0, consistency_check=False, verbose=True)

    def update(self, consumed_samples: int, consistency_check: bool, verbose: bool = False) -> None:
        """Update number of microbatches.

        Args:
            consumed_samples (int): Number of samples consumed.
            consistency_check (bool): Option to check current schedule's consistency.
            verbose (bool, optional): Option to control logging. Defaults to False.
        """

        # Update current global batch size.
        global_batch_size_changed = False
        old_current_global_batch_size = self.current_global_batch_size
        if consumed_samples > self.ramup_samples:
            self.current_global_batch_size = self.global_batch_size
        else:
            steps = int(consumed_samples / self.rampup_samples_per_increment)
            self.current_global_batch_size = (
                self.start_global_batch_size + steps * self.batch_size_increment
            )
            assert self.current_global_batch_size <= self.global_batch_size

        if old_current_global_batch_size != self.current_global_batch_size:
            global_batch_size_changed = True
        if self.rank == 0 and global_batch_size_changed and verbose:
            if old_current_global_batch_size is None:
                logger.info(f'setting initial batch size to {self.current_global_batch_size}')
            else:
                logger.info(
                    f'ramping up batch size from {old_current_global_batch_size} to '
                    f'{self.current_global_batch_size}'
                )

        # Check consistency of the current global batch size.
        if consistency_check and not self.decrease_batch_size_if_needed:
            assert (
                self.current_global_batch_size % self.micro_batch_times_data_parallel_size == 0
            ), (
                'current global '
                'batch size ({}) is not divisible by micro-batch-size ({}) times'
                'data parallel size ({})'.format(
                    self.current_global_batch_size, self.micro_batch_size, self.data_parallel_size
                )
            )

        if (
            self.decrease_batch_size_if_needed
            and self.current_global_batch_size % self.micro_batch_times_data_parallel_size != 0
        ):
            self.current_running_global_batch_size = _round(
                self.current_global_batch_size, self.micro_batch_times_data_parallel_size
            )
            if self.rank == 0 and global_batch_size_changed and verbose:
                logger.info(
                    f'decreasing batch size from {self.current_global_batch_size} to '
                    f'{self.current_running_global_batch_size} to keep divisiblity by '
                    f'micro_batch_size={self.micro_batch_size} * '
                    f'data_parallel_size={self.data_parallel_size}'
                )
            assert (
                self.current_running_global_batch_size % self.micro_batch_times_data_parallel_size
                == 0
            )
        else:
            self.current_running_global_batch_size = self.current_global_batch_size

        self.num_micro_batches = (
            self.current_running_global_batch_size // self.micro_batch_times_data_parallel_size
        )


================================================
FILE: megatron/core/optimizer/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import copy
import logging
import warnings
from dataclasses import astuple
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import torch
from torch.optim import SGD as CPUSGD
from torch.optim import AdamW as CPUAdam

try:
    from transformer_engine.pytorch.optimizers import FusedAdam as Adam
    from transformer_engine.pytorch.optimizers import FusedSGD as SGD

    USING_PYTORCH_OPTIMIZER = False
except ImportError:
    try:
        from apex.optimizers import FusedAdam as Adam
        from apex.optimizers import FusedSGD as SGD

        USING_PYTORCH_OPTIMIZER = False
    except ImportError:
        warnings.warn(
            f'Transformer Engine and Apex are not installed. Falling back to Torch optimizers.'
        )

        # Apex's FusedAdam is a drop-in replacement for torch's AdamW.
        # pylint: disable-next=line-too-long.
        # See https://github.com/NVIDIA/apex/blob/7b73b12361068a10b0f44844534613f252a5ea75/apex/optimizers/fused_adam.py#L16.
        from torch.optim import SGD
        from torch.optim import AdamW as Adam

        USING_PYTORCH_OPTIMIZER = True

try:
    from emerging_optimizers.scalar_optimizers import Lion

    HAVE_LION = True
except ImportError:
    HAVE_LION = False

from megatron.core import parallel_state
from megatron.core.optimizer.cpu_offloading.hybrid_optimizer import HybridDeviceOptimizer
from megatron.core.optimizer_param_scheduler import (
    ParamGroupOverride,
    combine_param_group_overrides,
    param_group_override_to_tuple,
)
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.transformer.fsdp_dtensor_checkpoint import get_global_unique_param_name

from ..distributed.param_and_grad_buffer import _ParamAndGradBuffer
from ..transformer.module import MegatronModule
from ..utils import get_model_config, get_pg_rank, get_pg_size, is_te_min_version, log_single_rank
from .distrib_optimizer import DistributedOptimizer
from .grad_scaler import ConstantGradScaler, DynamicGradScaler
from .optimizer import (
    ChainedOptimizer,
    Float16OptimizerWithFloat16Params,
    FP32Optimizer,
    MegatronOptimizer,
    param_group_identifier_keys,
)
from .optimizer_config import (
    AdamOptimizerConfig,
    OptimizerConfig,
    ParamKey,
    ParamPredicate,
    ParamWithNamePredicate,
    SGDOptimizerConfig,
)

logger = logging.getLogger(__name__)


def get_standard_config_overrides(config: OptimizerConfig) -> Dict[ParamKey, ParamGroupOverride]:
    """Get standard config overrides for the optimizer, handling decoupled LR and common wd skips.

    Args:
        config (OptimizerConfig): optimizer configuration object.

    Returns:
        Dict[ParamKey, ParamGroupOverride]: standard config overrides.
    """
    config_overrides: Optional[Dict[ParamKey, ParamGroupOverride]] = {}
    # First, figure out how we are going to do wd skipping. The two main approaches are:
    #  1. The classic megatron approach of skipping all len 1 and bias parameters.
    #  2. The Qwen3-Next approach of doing 1, other than qk layernorm parameters.
    if config.apply_wd_to_qk_layernorm:
        shape_1_not_qkln_param = ParamWithNamePredicate(
            name="s1_not_qkln",
            fn=lambda param, name: (len(param.shape) == 1 or name.endswith(".bias"))
            and not ("q_layernorm." in name or "k_layernorm." in name),
        )
        param_wd_mult_key = ParamKey(with_name_predicate=shape_1_not_qkln_param)
    else:
        param_length_1_match = ParamPredicate(
            name="param_len_1", fn=lambda param: len(param.shape) == 1
        )
        param_wd_mult_key = ParamKey(name="*.bias", predicate=param_length_1_match)

    config_overrides[param_wd_mult_key] = ParamGroupOverride(wd_mult=0.0)

    if config.decoupled_lr is not None:
        decoupled_lr_config: ParamGroupOverride = {"max_lr": config.decoupled_lr}
        decoupled_param_key = ParamKey(attr="is_embedding_or_output_parameter")
        if config.decoupled_min_lr is not None:
            decoupled_lr_config["min_lr"] = config.decoupled_min_lr
        config_overrides[decoupled_param_key] = decoupled_lr_config

    return config_overrides


def get_mup_config_overrides(
    config: OptimizerConfig, mup_width_mult: float, optimizer_type: str = 'adam'
) -> Dict[ParamKey, ParamGroupOverride]:
    """Get MuP config overrides for per-layer LR and Adam epsilon scaling.

    In MuP, optimizer learning rates are adjusted by parameter class to ensure
    stable update scales across model widths and enable hyperparameter transfer.

    MuP optimizer scaling rules (as implemented here):
    - Adam/AdamW:
      - hidden (matrix-like) lr = base_lr / width_mult
      - hidden (matrix-like) eps = base_eps / width_mult
      - vector-like params keep base lr and eps
    - SGD:
      - vector-like lr = base_lr * width_mult
      - hidden (matrix-like) lr keeps base_lr in the current uniform-width setup
      - no eps override is applied
    - Non-Adam optimizers:
      - hidden (matrix-like) lr = base_lr / width_mult
      - no eps override is applied.

    With decoupled_lr enabled, embedding/output params continue using decoupled LR
    and MuP will not override those explicit decoupled values.

    Args:
        config (OptimizerConfig): optimizer configuration object.
        mup_width_mult (float): Width multiplier (hidden_size / base_hidden_size).
        optimizer_type (str): Optimizer type string from config.optimizer.

    Returns:
        Dict[ParamKey, ParamGroupOverride]: MuP optimizer overrides.
    """
    optimizer_type_lower = optimizer_type.lower()
    is_sgd_optimizer = optimizer_type_lower == 'sgd'
    is_adam_optimizer = 'adam' in optimizer_type_lower

    decoupled_lr_enabled = config.decoupled_lr is not None
    if decoupled_lr_enabled:
        message = (
            "Both decoupled_lr and MuP LR scaling are enabled. decoupled_lr sets an "
            "absolute LR for embedding+output params, and MuP LR scaling will not "
            "override those parameters."
        )
        if is_adam_optimizer:
            message += " MuP Adam epsilon scaling remains applied to hidden matrix-like parameters."
        log_single_rank(logger, logging.WARNING, message)

    if mup_width_mult == 1.0:
        # No scaling needed when width_mult is 1
        return {}

    hidden_lr_mult = 1.0 / mup_width_mult
    base_lr = config.lr
    base_min_lr = config.min_lr

    # Hidden matrix-like layers get scaled LR/eps; vector-like params keep base values.
    # Prefer the explicit parameter attribute set by LanguageModule. Fall back to
    # a conservative name check for older or non-language modules.
    def is_embedding_parameter(param: torch.nn.Parameter, param_name: str) -> bool:
        if getattr(param, 'shared_embedding', False):
            return True
        if hasattr(param, 'is_embedding_parameter'):
            return bool(param.is_embedding_parameter)
        return 'embedding' in param_name.lower()

    def is_vector_like_parameter(param: torch.nn.Parameter, param_name: str) -> bool:
        if is_embedding_parameter(param, param_name):
            return True
        if param.dim() <= 1:
            return True
        return False

    def should_scale_lr_with_mup(param: torch.nn.Parameter, param_name: str) -> bool:
        if decoupled_lr_enabled and getattr(param, 'is_embedding_or_output_parameter', False):
            return False
        return not is_vector_like_parameter(param, param_name)

    def should_scale_vector_like_lr_with_mup(param: torch.nn.Parameter, param_name: str) -> bool:
        if decoupled_lr_enabled and getattr(param, 'is_embedding_or_output_parameter', False):
            return False
        return is_vector_like_parameter(param, param_name)

    def should_scale_eps_with_mup(param: torch.nn.Parameter, param_name: str) -> bool:
        if is_vector_like_parameter(param, param_name):
            return False
        # MuP Appendix B.3: eps scales with fan_in when non-negligible.
        # This implementation follows the common denominator form: sqrt(v) + eps.
        return True

    mup_overrides: Dict[ParamKey, ParamGroupOverride] = {}

    if is_sgd_optimizer:
        vector_like_lr_mult = mup_width_mult
        vector_like_lr_override: ParamGroupOverride = {}
        if base_lr is not None:
            vector_like_lr_override["max_lr"] = base_lr * vector_like_lr_mult
        if base_min_lr is not None:
            vector_like_lr_override["min_lr"] = base_min_lr * vector_like_lr_mult

        if vector_like_lr_override:
            vector_like_predicate = ParamWithNamePredicate(
                name="mup_sgd_vector_like_excluding_embedding_output",
                fn=should_scale_vector_like_lr_with_mup,
            )
            mup_overrides[ParamKey(with_name_predicate=vector_like_predicate)] = (
                vector_like_lr_override
            )

        return mup_overrides

    lr_override: ParamGroupOverride = {}
    if base_lr is not None:
        lr_override["max_lr"] = base_lr * hidden_lr_mult
    if base_min_lr is not None:
        lr_override["min_lr"] = base_min_lr * hidden_lr_mult

    eps_override: ParamGroupOverride = {}
    if is_adam_optimizer and config.adam_eps is not None:
        eps_override["eps"] = config.adam_eps * hidden_lr_mult

    if decoupled_lr_enabled:
        if lr_override:
            hidden_predicate = ParamWithNamePredicate(
                name="mup_hidden_only_excluding_embedding_output", fn=should_scale_lr_with_mup
            )
            mup_overrides[ParamKey(with_name_predicate=hidden_predicate)] = lr_override

        if eps_override:
            hidden_output_predicate = ParamWithNamePredicate(
                name="mup_hidden_only_for_adam_eps", fn=should_scale_eps_with_mup
            )
            mup_overrides[ParamKey(with_name_predicate=hidden_output_predicate)] = eps_override
    else:
        combined_override: ParamGroupOverride = {}
        combined_override.update(lr_override)
        combined_override.update(eps_override)
        if combined_override:
            hidden_output_predicate = ParamWithNamePredicate(
                name="mup_hidden_and_output", fn=should_scale_eps_with_mup
            )
            mup_overrides[ParamKey(with_name_predicate=hidden_output_predicate)] = combined_override

    return mup_overrides


def _get_param_groups(
    model_chunks: List[MegatronModule],
    config: OptimizerConfig,
    config_overrides: Optional[Dict[ParamKey, ParamGroupOverride]],
) -> List[Dict]:
    """Create parameter groups for optimizer.

    Creates parameter groups from provided optimizer config object.

    NOTE There can be more than one match between a ParamKey and a parameter.
        What we do is merge all of the matching ParamKey overrides into a single ParamGroupOverride
        for that parameter and use that as the key for that parameter. Any parameters that get
        the same set of merged overrides will be mapped into the same parameter group.

    Args:
        model_chunks (List[MegatronModule]): model chunks to create parameter
            groups for.
        config (OptimizerConfig): optimizer configuration object.
        config_overrides (Optional[Dict[ParamKey, ParamGroupOverride]): optimizer overrides,
            specified on a per-layer basis. NOTE: if you want to skip applying weight decay on bias
            and length 1 parameters, and also do not want to do any other overrides, set this to an
            empty dictionary rather than the default value of None.
    Returns:
        List of parameter groups.
    """

    # Map (pg_overrides, is_expert_parallel) to params.
    params_map = {}

    if config_overrides is None:
        # TODO remove this default behavior eventually.
        #  This is only needed for backwards compatibility with the old config overrides API where
        #  the config_overrides argument by default lead to bias parameters and length 1 parameters.
        #  We assume that users of decoupled LR already provide config overrides so will adapt
        #  to the new API.
        config_overrides = get_standard_config_overrides(config=config)

    for model_chunk in model_chunks:
        for name, param in model_chunk.named_parameters():
            if not param.requires_grad:
                continue

            uses_default_config = False
            # Get optimizer config overrides for this parameter.
            param_overrides_list: list[ParamGroupOverride] = []
            if config_overrides is not None:
                for param_key, param_override in config_overrides.items():
                    if param_key.matches(param, name):
                        param_overrides_list.append(param_override)

            if param_overrides_list:
                param_override: ParamGroupOverride | None = combine_param_group_overrides(
                    param_overrides_list
                )
            else:
                param_override = None

            is_expert_parallel = not getattr(param, 'allreduce', True)

            # Create config_tuple that is hash-able, and has a consistent ordering of the keys.
            param_override_tuple: tuple[tuple[str, Any], ...] | None = (
                param_group_override_to_tuple(param_override)
            )
            key = (param_override_tuple, is_expert_parallel)
            if key not in params_map:
                params_map[key] = []
            params_map[key].append(param)

    # Distributed checkpoint requires all ranks to have the same param groups,
    # so we need to align the param groups across ranks, otherwise we may have
    # runtime error when loading the checkpoint or numerical error when resuming training.
    params_key = list(params_map.keys())
    gathered_params_key = [None for _ in range(torch.distributed.get_world_size())]
    torch.distributed.all_gather_object(gathered_params_key, params_key)
    for keys in gathered_params_key:
        for key in keys:
            if key not in params_key:
                params_key.append(key)
    # Need to pick one of the param_override_tuples to use for the param group.
    param_groups = []
    # Sort keys, None first.
    for key in sorted(params_key, key=lambda x: (x[0] is not None, x[0])):
        param_override_tuple, is_expert_parallel = key
        params = params_map[key] if key in params_map else []
        if param_override_tuple is None:
            param_override: ParamGroupOverride = {}
        else:
            param_override: ParamGroupOverride = {k: v for (k, v) in param_override_tuple}

        # False if param_group_override is None or empty tuple or if we do not modify the
        #  LR schedule.
        #  NOTE: "default_config" is used for logging the learning rate in training.py.
        #   so set to True if we do not modify the learning rate.
        #  if param_group['default_config']:
        #    learning_rate = param_group['lr']
        uses_default_lr_schedule: bool = (not bool(param_override_tuple)) or not any(
            ["lr" in k for k in param_override]
        )

        # TODO: Remove "backwards compatible" fields below eventually.
        default_config: ParamGroupOverride = {
            'wd_mult': 1.0,
            'lr_mult': 1.0,
            'is_decoupled_lr': False,
            # The following two fields may be important to keep even when we remove the
            #   above "backwards compatible" fields.
            "max_lr": config.lr,  # user may override this in param_override
            "min_lr": config.min_lr,  # user may override this in param_override
        }
        assert (
            "params" not in param_override
        ), "'params' should not be in param_override, this is a protected key"
        param_group = {
            'params': params,
            'is_expert_parallel': is_expert_parallel,
            'default_config': uses_default_lr_schedule,
            **default_config,
            **param_override,  # keep **param_override last so that users can override other fields.
        }
        param_groups.append(param_group)

    return param_groups


def _get_param_groups_and_buffers(
    model_chunks: List[MegatronModule],
    model_chunk_offset: int,
    config: OptimizerConfig,
    config_overrides: Optional[Dict[ParamKey, ParamGroupOverride]],
    filter_fn: Callable,
    buffer_name: str,
) -> Tuple[List[Dict], Dict[int, List[_ParamAndGradBuffer]]]:
    """Returns parameter groups and buffer for optimizer.

    Args:
        model_chunks (List[MegatronModule]): model chunks to create parameter
            groups for.
        model_chunk_offset (int): offset of model_chunks in global model_chunks list.
        config (OptimizerConfig): optimizer configuration object.
        config_overrides (Optional[Dict[ParamKey, ParamGroupOverride]): optimizer/scheduler
            overrides, specified on the basis of ParamKey matches with each parameter.
        lr (float): learning rate.
        min_lr (float): minimum learning rate.
        filter_fn (callable): filtering function for param_groups.
        buffer_name (str): name of buffer.

    Returns:
        List of parameter groups and dictionary of model chunk IDs to buffers.
    """
    param_groups = _get_param_groups(model_chunks, config, config_overrides)
    param_groups = list(filter(filter_fn, param_groups))
    buffers = {}
    for model_chunk_idx, model_chunk in enumerate(model_chunks):
        if hasattr(model_chunk, buffer_name):
            buffers[model_chunk_idx + model_chunk_offset] = getattr(model_chunk, buffer_name)

    return param_groups, buffers


def _get_megatron_optimizer_based_on_param_groups(
    config: OptimizerConfig,
    model_chunks: List[MegatronModule],
    param_groups: List,
    per_model_buffers: Optional[Dict[int, List[_ParamAndGradBuffer]]] = None,
    model_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
    data_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
    data_parallel_group_gloo: Optional[torch.distributed.ProcessGroup] = None,
    data_parallel_group_idx: Optional[int] = None,
    intra_dist_opt_group: Optional[torch.distributed.ProcessGroup] = None,
    distributed_optimizer_instance_id: Optional[int] = 0,
    pg_collection: Optional[ProcessGroupCollection] = None,
) -> MegatronOptimizer:
    """Get Megatron optimizer based on parameter groups.

    Args:
        config (OptimizerConfig): optimizer configuration object.
        model_chunks (list): list of model chunks.
        param_groups (list): list of parameter groups.
        per_model_buffers (dict, optional): buffers for distributed optimizer. Defaults to None.
        data_parallel_group (torch.distributed.ProcessGroup, optional): data-parallel group for
            distributed optimizer. Defaults to None.
        data_parallel_group_gloo (torch.distributed.ProcessGroup, optional): gloo data-parallel
            group for distributed optimizer. Defaults to None.
        data_parallel_group_idx (int, optional): data-parallel group index for distributed
            optimizer. Defaults to None.
        distributed_optimizer_instance_id (int, optional): Distributed optimizer instance. Defaults
            0.

    Returns:
        Instance of MegatronOptimizer.
    """
    # TODO: Logic needs to be updated to handle different optimizer types (i.e., param_groups
    # passed into this function need to correspond to the same optimizer).

    # When freezing sub-models we may have no trainable parameters on a rank and
    # hence an empty param_groups. However, we still need to create an optimizer
    # for the purposes of grad stats reductions.
    if param_groups:
        if config.optimizer_cpu_offload:
            if torch.__version__ < '2.3.0':
                warnings.warn(
                    "CPU offload is recommended for PyTorch >= 2.3.0, "
                    "untested versions below this may have convergence issues."
                )
            assert (
                config.decoupled_weight_decay
            ), "CPU offloading only supported with decoupled_weight_decay enabled (AdamW mode)."
            gpu_optimizer_cls = Adam if config.optimizer == 'adam' else SGD
            cpu_optimizer_cls = CPUAdam if config.optimizer == 'adam' else CPUSGD
            if config.use_torch_optimizer_for_cpu_offload:
                gpu_optimizer_cls = cpu_optimizer_cls
            if config.optimizer == 'adam':
                gpu_optimizer_cls = Adam
                cpu_optimizer_cls = CPUAdam
                optimizer_defaults = dict(
                    lr=config.lr,
                    weight_decay=config.weight_decay,
                    betas=(config.adam_beta1, config.adam_beta2),
                    eps=config.adam_eps,
                    bias_correction=True,
                    fused=True,  # this flag is used to improve the performance of the cpu optimizer
                )
            else:
                gpu_optimizer_cls = SGD
                cpu_optimizer_cls = CPUSGD
                optimizer_defaults = dict(
                    lr=config.lr, weight_decay=config.weight_decay, momentum=config.sgd_momentum
                )
            optimizer = HybridDeviceOptimizer(
                param_groups,
                offload_fraction=config.optimizer_offload_fraction,
                cpu_optimizer_cls=cpu_optimizer_cls,
                gpu_optimizer_cls=gpu_optimizer_cls,
                overlap_cpu_optimizer_d2h_h2d=config.overlap_cpu_optimizer_d2h_h2d,
                pin_cpu_grads=config.pin_cpu_grads,
                pin_cpu_params=config.pin_cpu_params,
                param_update_in_fp32=True,
                **optimizer_defaults,
            )
            init_state_fn = None
        elif config.optimizer == 'adam':
            kwargs = {
                "params": param_groups,
                "lr": config.lr,
                "weight_decay": config.weight_decay,
                "betas": (config.adam_beta1, config.adam_beta2),
                "eps": config.adam_eps,
            }

            # set Adam class and weight decay mode depending
            # on source of optimizer (Torch or TE/Apex)
            if USING_PYTORCH_OPTIMIZER:
                adam_cls = torch.optim.AdamW if config.decoupled_weight_decay else torch.optim.Adam
            else:
                kwargs["adam_w_mode"] = config.decoupled_weight_decay
                adam_cls = Adam

            if config.use_precision_aware_optimizer:
                kwargs.update(
                    {
                        "exp_avg_dtype": config.exp_avg_dtype,
                        "exp_avg_sq_dtype": config.exp_avg_sq_dtype,
                    }
                )
                # Master weight is managed by MCore when main_params_dtype is fp32. This is
                # because we want to use fp8 primary weight with precision aware optimizer.
                # Otherwise, master weight will be managed by TransformerEngine.
                # Delayed scaling is an exception because casting as well as the computation
                # of the scaling factor can be conducted in the adam kernel.
                if config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
                    kwargs.update(
                        {
                            "master_weights": True,
                            "use_decoupled_grad": True,
                            "master_weight_dtype": config.main_params_dtype,
                        }
                    )

                if is_te_min_version("2.1.0.dev0"):
                    kwargs.update({"store_param_remainders": config.store_param_remainders})

            optimizer = adam_cls(**kwargs)

            def init_state_fn(opt, config=None):
                for group in opt.param_groups:
                    for p in group['params']:
                        if len(opt.state[p]) == 0:
                            if config is None or not config.use_precision_aware_optimizer:
                                opt.state[p]['exp_avg'] = torch.zeros_like(p.data)
                                opt.state[p]['exp_avg_sq'] = torch.zeros_like(p.data)
                            else:
                                opt.initialize_state(p)

        elif config.optimizer == 'lion':
            if not HAVE_LION:
                raise ImportError(
                    "Lion optimizer requires the 'emerging_optimizers' package. "
                    "Please install it to use --optimizer lion."
                )
            optimizer = Lion(
                param_groups,
                lr=config.lr,
                betas=(config.lion_beta1, config.lion_beta2),
                weight_decay=config.weight_decay,
            )

            def init_state_fn(opt, config=None):
                for group in opt.param_groups:
                    for p in group['params']:
                        if len(opt.state[p]) == 0:
                            opt.state[p]['exp_avg'] = torch.zeros_like(p.data)

        elif config.optimizer == 'sgd':
            optimizer = SGD(
                param_groups,
                lr=config.lr,
                weight_decay=config.weight_decay,
                momentum=config.sgd_momentum,
            )
            init_state_fn = None
        else:
            raise Exception('{} optimizer is not supported.'.format(config.optimizer))
    else:
        optimizer = None
        init_state_fn = None

    # Mixed precision optimizer.
    # - Note: both the Float16Optimizer and the DistributedOptimizer inherit
    #   from the MixedPrecisionOptimizer, which manages any optimizer where
    #   the model params and main params are distinct.
    if config.fp16 or config.bf16 or config.use_distributed_optimizer:

        # Grad scaler:
        #    if loss-scale is provided, instantiate the constant scaler.
        #    if we are using fp16 and loss-scale is not present, use a
        #       dynamic scaler.
        #    otherwise we are running in bf16 with no loss-scale so
        #       leave it as None.
        grad_scaler = None

        # Constant loss scale.
        if config.loss_scale:
            grad_scaler = ConstantGradScaler(config.loss_scale)

        # Dynamic loss scale.
        else:
            if config.fp16:
                grad_scaler = DynamicGradScaler(
                    initial_scale=config.initial_loss_scale,
                    min_scale=config.min_loss_scale,
                    growth_factor=2.0,
                    backoff_factor=0.5,
                    growth_interval=config.loss_scale_window,
                    hysteresis=config.hysteresis,
                )

        optimizer_args = [optimizer, config, grad_scaler, init_state_fn]
        if config.use_distributed_optimizer:
            optimizer = DistributedOptimizer(
                *optimizer_args,
                model_chunks=model_chunks,
                per_model_buffers=per_model_buffers,
                data_parallel_group=data_parallel_group,
                data_parallel_group_gloo=data_parallel_group_gloo,
                data_parallel_group_idx=data_parallel_group_idx,
                distributed_optimizer_instance_id=distributed_optimizer_instance_id,
            )
            # This is needed for case where num_distributed_optimizer_instances > 1. In this case,
            # weight gradients are all-reduced across optimizer instances, so each instance has
            # the duplicated weight gradients, need to reduce gradient stats inside each instance.
            setattr(optimizer, 'grad_stats_parallel_group', intra_dist_opt_group)
        else:
            optimizer = Float16OptimizerWithFloat16Params(*optimizer_args)
            setattr(optimizer, 'grad_stats_parallel_group', model_parallel_group)
    else:
        # FP32 optimizer.
        optimizer = FP32Optimizer(optimizer, config, init_state_fn)
        setattr(optimizer, 'grad_stats_parallel_group', model_parallel_group)

    if pg_collection is None or not hasattr(pg_collection, 'tp'):
        tp_group = parallel_state.get_tensor_model_parallel_group()
    else:
        tp_group = pg_collection.tp
    # TODO(M4): plumb tp_group through optimizer constructors so this setattr disappears.
    setattr(optimizer, 'tp_group', tp_group)

    return optimizer


def check_config_overrides_consistency(
    config: OptimizerConfig, config_overrides: Optional[Dict[ParamKey, ParamGroupOverride]]
):
    """Check if the config overrides are consistent with the config."""

    # TODO: Remove `optimizer` from this eventually (e.g., if we use Muon for some layers and
    # Adam for other layers). This would need some more refactoring to work though (param_groups
    # filtered by optimizer passed into _get_megatron_optimizer_based_on_param_groups).
    if config_overrides is not None:
        fields_to_check_for_consistency = [
            'overlap_param_gather_with_optimizer_step',
            'optimizer',
            'optimizer_cpu_offload',
        ]
        for field_name in fields_to_check_for_consistency:
            base_field = getattr(config, field_name, None)
            all_config_overrides = list(config_overrides.values())
            for config_override in all_config_overrides:
                if field_name in config_override:
                    field = config_override[field_name]
                    if field != base_field:
                        raise ValueError(
                            f"Field {field_name} should not be overriden in a config override."
                        )
    return True


def get_megatron_optimizer(
    config: OptimizerConfig,
    model_chunks: List[MegatronModule],
    config_overrides: Optional[Dict[ParamKey, ParamGroupOverride]] = None,
    use_gloo_process_groups: bool = True,
    pg_collection: Optional[ProcessGroupCollection] = None,
    dump_param_to_param_group_map: Optional[str] = None,
) -> MegatronOptimizer:
    """Retrieve the Megatron optimizer for model chunks.

    We use separate optimizers for expert parameters and non-expert parameters.

    Args:
        config (OptimizerConfig): optimizer configuration object.
        model_chunks (List[MegatronModule]): model chunks to get optimizer for.
        config_overrides (Optional[Dict[ParamKey, OptimizerConfig]]): optional dictionary of
            optimizer configuration objects to override default optimizer behavior for different
            subsets of parameters (identified by ParamKey).
        use_gloo_process_groups (bool): if false, disable use of Gloo process groups
            in underlying Megatron optimizers.
        pg_collection: Optional unified process group for distributed training.
        dump_param_to_param_group_map (Optional[str]): path to dump parameter to param group map.

    Returns:
        Instance of MegatronOptimizer.
    """

    log_single_rank(logger, logging.INFO, f'Setting up optimizer with config {config}')

    check_config_overrides_consistency(config, config_overrides)

    # Separate out first model chunk if overlapping param AG with optimizer step.
    if config.overlap_param_gather_with_optimizer_step:
        all_dense_model_chunks = [[model_chunks[0]], model_chunks[1:]]
        overlap_param_gather_with_optimizer_step_flags = [True, False]
    else:
        all_dense_model_chunks = [model_chunks]
        overlap_param_gather_with_optimizer_step_flags = [False]

    # Setup process groups using helper method
    process_groups_dict = ProcessGroupCollection.setup_process_groups_for_optimizer(
        pg_collection, model_chunks, use_gloo_process_groups
    )

    dp_cp_group = process_groups_dict['dp_cp_group']
    intra_dp_cp_group = process_groups_dict['intra_dp_cp_group']
    intra_expt_dp_group = process_groups_dict['intra_expt_dp_group']
    mp_group = process_groups_dict['mp_group']
    expt_tp_pp_group = process_groups_dict['expt_tp_pp_group']
    intra_dp_cp_group_gloo = process_groups_dict['intra_dp_cp_group_gloo']
    intra_expt_dp_group_gloo = process_groups_dict['intra_expt_dp_group_gloo']
    intra_dist_opt_group = process_groups_dict['intra_dist_opt_group']

    model_parallel_rank = get_pg_rank(mp_group)

    if get_pg_size(dp_cp_group) > get_pg_size(intra_dp_cp_group):
        inter_dist_opt_group = process_groups_dict['inter_dist_opt_group']
        distributed_optimizer_instance_id = get_pg_rank(inter_dist_opt_group)
    else:
        distributed_optimizer_instance_id = 0

    optimizers = []
    model_chunk_offset = 0
    ddp_config = model_chunks[0].ddp_config  # Use the first model chunk's DDP config
    if ddp_config.use_megatron_fsdp:
        for model_chunk, overlap_param_gather_with_optimizer_step in zip(
            all_dense_model_chunks, overlap_param_gather_with_optimizer_step_flags
        ):
            param_groups, buffers = _get_param_groups_and_buffers(
                model_chunk,
                model_chunk_offset=model_chunk_offset,
                config=config,
                config_overrides=config_overrides,
                filter_fn=lambda g: True,
                buffer_name='buffers',
            )

            optimizers.append(
                _get_megatron_optimizer_based_on_param_groups(
                    config=config,
                    model_chunks=model_chunk,
                    param_groups=param_groups,
                    per_model_buffers=buffers,
                    model_parallel_group=mp_group,
                    data_parallel_group=dp_cp_group,
                    data_parallel_group_gloo=intra_dp_cp_group_gloo,
                    data_parallel_group_idx=model_parallel_rank,
                    intra_dist_opt_group=intra_dist_opt_group,
                    distributed_optimizer_instance_id=distributed_optimizer_instance_id,
                    pg_collection=pg_collection,
                )
            )
            model_chunk_offset += 1

        if len(optimizers) == 1:
            return optimizers[0]

        return ChainedOptimizer(optimizers)

    if dump_param_to_param_group_map is not None:
        param_to_param_group = {}
        param_group_id = 0
    for dense_model_chunks, overlap_param_gather_with_optimizer_step in zip(
        all_dense_model_chunks, overlap_param_gather_with_optimizer_step_flags
    ):
        param_groups, buffers = _get_param_groups_and_buffers(
            dense_model_chunks,
            model_chunk_offset=model_chunk_offset,
            config=config,
            config_overrides=config_overrides,
            filter_fn=lambda g: not g['is_expert_parallel'],
            buffer_name='buffers',
        )
        for model_chunk in dense_model_chunks:
            model_chunk.overlap_param_gather_with_optimizer_step = (
                overlap_param_gather_with_optimizer_step
            )
        if dump_param_to_param_group_map is not None:
            for param_group in param_groups:
                for param in param_group["params"]:
                    param_name = get_global_unique_param_name(model_chunks, param)
                    param_to_param_group[param_name] = param_group_id
                param_group_id += 1

        # Pass Gloo process groups into optimizer only if needed.
        optimizers.append(
            _get_megatron_optimizer_based_on_param_groups(
                config=config,
                model_chunks=dense_model_chunks,
                param_groups=param_groups,
                per_model_buffers=buffers,
                model_parallel_group=mp_group,
                data_parallel_group=intra_dp_cp_group,
                data_parallel_group_gloo=intra_dp_cp_group_gloo,
                data_parallel_group_idx=model_parallel_rank,
                intra_dist_opt_group=intra_dist_opt_group,
                distributed_optimizer_instance_id=distributed_optimizer_instance_id,
                pg_collection=pg_collection,
            )
        )
        model_chunk_offset += 1

    moe_param_groups, moe_buffers = _get_param_groups_and_buffers(
        model_chunks,
        model_chunk_offset=0,
        config=config,
        config_overrides=config_overrides,
        filter_fn=lambda g: g['is_expert_parallel'],
        buffer_name='expert_parallel_buffers',
    )
    if dump_param_to_param_group_map is not None:
        for param_group in moe_param_groups:
            for param in param_group["params"]:
                param_name = get_global_unique_param_name(model_chunks, param)
                param_to_param_group[param_name] = param_group_id
            param_group_id += 1
    if len(moe_param_groups) > 0:
        expt_model_parallel_rank = get_pg_rank(expt_tp_pp_group)
        # Pass Gloo process groups into optimizer only if needed.
        if use_gloo_process_groups:
            expt_data_parallel_group_gloo = intra_expt_dp_group_gloo
        else:
            expt_data_parallel_group_gloo = None
        optimizers.append(
            _get_megatron_optimizer_based_on_param_groups(
                config=config,
                model_chunks=model_chunks,
                param_groups=moe_param_groups,
                per_model_buffers=moe_buffers,
                model_parallel_group=expt_tp_pp_group,
                data_parallel_group=intra_expt_dp_group,
                data_parallel_group_gloo=expt_data_parallel_group_gloo,
                data_parallel_group_idx=expt_model_parallel_rank,
                intra_dist_opt_group=intra_dist_opt_group,
                distributed_optimizer_instance_id=distributed_optimizer_instance_id,
                pg_collection=pg_collection,
            )
        )

    if dump_param_to_param_group_map is not None:
        torch.distributed.checkpoint.save(
            state_dict=param_to_param_group, checkpoint_id=dump_param_to_param_group_map
        )

    return ChainedOptimizer(optimizers)


================================================
FILE: megatron/core/optimizer/clip_grads.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Gradient clipping."""

from typing import List, Optional, Union

import torch
from torch import inf

try:
    from transformer_engine.pytorch.optimizers import (
        multi_tensor_applier,
        multi_tensor_l2norm,
        multi_tensor_scale,
    )

    l2_norm_impl = multi_tensor_l2norm
    multi_tensor_scale_impl = multi_tensor_scale
except ImportError:
    try:
        import amp_C
        from apex.multi_tensor_apply import multi_tensor_applier

        l2_norm_impl = amp_C.multi_tensor_l2norm
        multi_tensor_scale_impl = amp_C.multi_tensor_scale
    except ImportError:
        import warnings

        warnings.warn(
            f'Transformer Engine and Apex are not installed. '
            'Falling back to local implementations of multi_tensor_applier, '
            'multi_tensor_l2norm, and multi_tensor_scale'
        )

        from megatron.core.utils import (
            local_multi_tensor_applier,
            local_multi_tensor_l2_norm,
            local_multi_tensor_scale,
        )

        multi_tensor_applier = local_multi_tensor_applier
        l2_norm_impl = local_multi_tensor_l2_norm
        multi_tensor_scale_impl = local_multi_tensor_scale


from ..tensor_parallel import param_is_not_tensor_parallel_duplicate
from ..transformer.module import param_is_not_shared
from ..utils import get_data_parallel_group_if_dtensor, to_local_if_dtensor


def get_grad_norm_fp32(
    grads_for_norm: Union[List[torch.Tensor], torch.Tensor],
    norm_type: Union[int, float] = 2,
    grad_stats_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
) -> float:
    """Calculate the norm of gradients in fp32.

    This is adapted from torch.nn.utils.clip_grad.clip_grad_norm_ and
    added functionality to handle model parallel parameters.

    Arguments:
        grads_for_norm (Iterable[Tensor] or Tensor): an iterable of Tensors or a single
            Tensor that will be used for calculating the grad norm.
        norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for
            infinity norm.
        grad_stats_parallel_group (group): Process group for reducing the grad norms. This is
            generally the model-parallel group for non-distributed optimizers, and the entire
            world for the distributed optimizer.

    Returns:
        Total norm of the parameters (viewed as a single vector).
    """

    if isinstance(grads_for_norm, torch.Tensor):
        grads_for_norm = [grads_for_norm]

    data_parallel_group = None
    for grad in grads_for_norm:
        data_parallel_group = get_data_parallel_group_if_dtensor(grad, data_parallel_group)

    grads_for_norm = [to_local_if_dtensor(grad) for grad in grads_for_norm]

    # Norm parameters.
    norm_type = float(norm_type)
    total_norm = 0.0

    # Calculate norm.
    if norm_type == inf:
        total_norm = max(grad.abs().max() for grad in grads_for_norm)
        total_norm_cuda = torch.tensor([float(total_norm)], dtype=torch.float, device='cuda')
        # Take max across all data-parallel GPUs if using FSDP and then all model-parallel GPUs.
        if data_parallel_group:
            torch.distributed.all_reduce(
                total_norm_cuda, op=torch.distributed.ReduceOp.MAX, group=data_parallel_group
            )
        torch.distributed.all_reduce(
            total_norm_cuda, op=torch.distributed.ReduceOp.MAX, group=grad_stats_parallel_group
        )
        total_norm = total_norm_cuda[0].item()

    else:
        if norm_type == 2.0:
            dummy_overflow_buf = torch.zeros(1, dtype=torch.int, device='cuda')
            # Use apex's multi-tensor applier for efficiency reasons.
            # Multi-tensor applier takes a function and a list of list
            # and performs the operation on that list all in one kernel.
            if grads_for_norm:
                grad_norm, _ = multi_tensor_applier(
                    l2_norm_impl,
                    dummy_overflow_buf,
                    [grads_for_norm],
                    False,  # no per-parameter norm
                )
            else:
                grad_norm = torch.zeros(1, dtype=torch.float, device='cuda')
            # Since we will be summing across data parallel groups,
            # we need the pow(norm-type).
            total_norm = grad_norm**norm_type

        else:
            for grad in grads_for_norm:
                grad_norm = torch.norm(grad, norm_type)
                total_norm += grad_norm**norm_type

        # Sum across all data-parallel GPUs if using FSDP and then all model-parallel GPUs.
        if data_parallel_group:
            torch.distributed.all_reduce(
                total_norm, op=torch.distributed.ReduceOp.SUM, group=data_parallel_group
            )
        torch.distributed.all_reduce(
            total_norm, op=torch.distributed.ReduceOp.SUM, group=grad_stats_parallel_group
        )
        total_norm = total_norm.item() ** (1.0 / norm_type)

    return total_norm


def clip_grad_by_total_norm_fp32(
    parameters: Union[List[torch.Tensor], torch.Tensor],
    max_norm: Union[int, float],
    total_norm: float,
    use_decoupled_grad: bool = False,
):
    """Clips gradient of an iterable of parameters in fp32 by total norm.

    Note that the gradients are modified in place.

    Args:
        parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
            single Tensor that will have gradients normalized.
        max_norm (float or int): max norm of the gradients.
        total_norm (float): total norm of the gradients.
        use_decoupled_grad (bool, optional): whether to read grad from ".grad" or ".decoupled_grad",
            default value is False.
    """
    # Grads.
    params = []
    grads = []
    for param in parameters:
        if use_decoupled_grad:
            if hasattr(param, "decoupled_grad") and param.decoupled_grad is not None:
                assert param.decoupled_grad.dtype in [torch.float32, torch.bfloat16]
                params.append(param)
                grads.append(to_local_if_dtensor(param.decoupled_grad).detach())
        else:
            if param.grad is not None:
                assert param.grad.type() == 'torch.cuda.FloatTensor'
                params.append(param)
                grads.append(to_local_if_dtensor(param.grad).detach())

    # Scale.
    clip_coeff = max_norm / (total_norm + 1.0e-6)
    if clip_coeff < 1.0:
        dummy_overflow_buf = torch.zeros(1, dtype=torch.int, device='cuda')
        multi_tensor_applier(
            multi_tensor_scale_impl, dummy_overflow_buf, [grads, grads], clip_coeff
        )


def count_zeros_fp32(
    parameters: Union[List[torch.Tensor], torch.Tensor],
    grad_stats_parallel_group: torch.distributed.ProcessGroup,
    use_decoupled_grad: bool = False,
    tp_group: Optional[torch.distributed.ProcessGroup] = None,
) -> float:
    """Counts the number of zeros in gradients associated with the passed-in list of
    parameters.

    Args:
        parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
            single Tensor that will have the number of zeros in its corresponding
            gradient counted.
        grad_stats_parallel_group (group): Process group for reducing the num_zeros count. This is
            generally the model-parallel group for non-distributed optimizers, and the entire
            world for the distributed optimizer.
        use_decoupled_grad (bool, optional) whether to read grad from ".grad" or ".decoupled_grad",
            default value is False.
    """

    if isinstance(parameters, torch.Tensor):
        parameters = [parameters]

    # Filter parameters based on:
    #   - grad should not be none
    #   - parameter should not be shared
    #   - should not be a replica due to tensor model parallelism
    total_num_zeros = torch.zeros(1, dtype=torch.float, device='cuda')
    data_parallel_group = None
    use_megatron_fsdp = False
    for param in parameters:
        if getattr(param, "__fsdp_param__", False) and param.grad is not None:
            # If the parameter is managed by Megatron FSDP, we need to handle it differently.
            use_megatron_fsdp = True
            grad = param.grad._local_tensor
            num_zeros = grad.numel() - torch.count_nonzero(grad)
            total_num_zeros += num_zeros
            continue

        grad_attr = "decoupled_grad" if use_decoupled_grad else "grad"
        grad_not_none = hasattr(param, grad_attr) and getattr(param, grad_attr) is not None
        is_not_shared = param_is_not_shared(param)
        is_not_tp_duplicate = param_is_not_tensor_parallel_duplicate(param, tp_group=tp_group)
        if grad_not_none and is_not_shared and is_not_tp_duplicate:
            grad_obj = getattr(param, grad_attr)
            data_parallel_group = get_data_parallel_group_if_dtensor(grad_obj, data_parallel_group)
            grad = to_local_if_dtensor(grad_obj).detach()
            num_zeros = grad.numel() - torch.count_nonzero(grad)
            total_num_zeros = num_zeros + total_num_zeros

    if use_megatron_fsdp and data_parallel_group is not None:
        raise ValueError(
            "Unexpected use of Megatron FSDP with data parallel group. "
            "Please ensure that the parameters are properly managed by Megatron FSDP."
        )

    # Sum across all data-parallel GPUs if using FSDP.
    if data_parallel_group:
        torch.distributed.all_reduce(
            total_num_zeros, op=torch.distributed.ReduceOp.SUM, group=data_parallel_group
        )
    # Sum across all model-parallel GPUs.
    torch.distributed.all_reduce(
        total_num_zeros, op=torch.distributed.ReduceOp.SUM, group=grad_stats_parallel_group
    )

    total_num_zeros = total_num_zeros.item()

    return total_num_zeros


================================================
FILE: megatron/core/optimizer/cpu_offloading/README.md
================================================
## How to use ?

Add these flags to enable optimizer cpu offload in MCore.

```bash
--optimizer-cpu-offload
--optimizer-offload-fraction 1.0
--use-precision-aware-optimizer
```

## Configuration Recommendations

Gradient copy from GPU to CPU, CPU optimizer step, and subsequent parameter copy from CPU to GPU can be time-consuming operations, and it is recommended to use the flag `--overlap-cpu-optimizer-d2h-h2d` to execute them concurrently.


================================================
FILE: megatron/core/optimizer/cpu_offloading/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
from .hybrid_optimizer import HybridDeviceOptimizer


================================================
FILE: megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION and Alibaba PAI. All rights reserved.
from collections import defaultdict
from typing import Dict

import torch


def _param_generator(cpu_optimizer):
    for group in cpu_optimizer.param_groups:
        for param in group["params"]:
            yield param


class HybridDeviceOptimizer(torch.optim.Optimizer):
    """
    HybridDeviceOptimizer is a custom optimizer designed to facilitate
    hybrid parameter updates across GPU and CPU. This optimizer allows
    users to adjust the fraction of parameters updated on the CPU and
    GPU through the `offload_fraction` parameter.

    It supports bf16 mixed-precision training. Additionally, the optimizer
    implements overlapping operations for improved performance, including
    gradient transfer from device to host (D2H) and parameter transfer
    from host to device (H2D).

    Example:
        from transformer_engine.pytorch.optimizers import FusedAdam as GPUAdam
        from torch.optim import AdamW as CPUAdam
        optimizer = HybridDeviceOptimizer(
            param_groups,
            cpu_optimizer_cls=CPUAdam,
            gpu_optimizer_cls=GPUAdam,
            offload_fraction=0.5,
            param_update_in_fp32=True,
            overlap_cpu_optimizer_d2h_h2d=True,
        )
        optimizer.step()

    Note:
        This optimizer is particularly useful in scenarios where memory
        constraints are present or when leveraging both CPU and GPU resources
        can lead to performance improvements.
    """

    def __init__(
        self,
        params,
        offload_fraction=0.5,
        cpu_optimizer_cls=None,
        gpu_optimizer_cls=None,
        param_update_in_fp32: bool = False,
        pin_cpu_grads: bool = True,
        pin_cpu_params: bool = True,
        overlap_cpu_optimizer_d2h_h2d: bool = True,
        **kwargs,
    ):
        super(HybridDeviceOptimizer, self).__init__(
            params,
            defaults={
                "offload_fraction": offload_fraction,
                "cpu_optimizer_cls": cpu_optimizer_cls,
                "gpu_optimizer_cls": gpu_optimizer_cls,
                "param_update_in_fp32": param_update_in_fp32,
                "pin_cpu_grads": pin_cpu_grads,
                "pin_cpu_params": pin_cpu_params,
                "overlap_cpu_optimizer_d2h_h2d": overlap_cpu_optimizer_d2h_h2d,
                **kwargs,
            },
        )

        self.offload_fraction = offload_fraction
        self.cpu_optimizer_cls = cpu_optimizer_cls
        self.gpu_optimizer_cls = gpu_optimizer_cls
        self.pin_cpu_grads = pin_cpu_grads
        self.pin_cpu_params = pin_cpu_params
        self.overlap_cpu_optimizer_d2h_h2d = overlap_cpu_optimizer_d2h_h2d
        self.param_update_in_fp32 = param_update_in_fp32
        self.sub_optimizer_kwargs = kwargs

        self._init_sub_optimizers()
        self._register_load_state_dict_hooks()

    def _set_sub_optimizer_grads(self):
        if self.param_update_in_fp32:
            for param in self.param_to_fp32_param:
                if param in self.gpu_params_map_cpu_copy:
                    # Skip if the param is offloaded to CPU, it should be handled
                    # in the following part.
                    continue
                fp32_param = self.param_to_fp32_param[param]
                grad = getattr(param, "decoupled_grad", param.grad)
                if grad is not None:
                    fp32_param.grad = grad.to(fp32_param.dtype)
                    fp32_param.requires_grad = True
                else:
                    fp32_param.requires_grad = False

        # Sync the grads from GPU to CPU.
        for optimizer in self.cpu_optimizers:
            for param in _param_generator(optimizer):
                gpu_param = self.cpu_copys_map_gpu_param[param]
                grad = getattr(gpu_param, "decoupled_grad", gpu_param.grad)
                if grad is None:
                    param.requires_grad = False
                    continue

                param.requires_grad = False
                if param not in self.cpu_copy_map_grad:
                    self.cpu_copy_map_grad[param] = torch.empty(
                        param.shape, dtype=param.dtype, pin_memory=self.pin_cpu_grads, device="cpu"
                    )
                    param.grad = self.cpu_copy_map_grad[param]

                self.cpu_copy_map_grad[param].data.copy_(grad, non_blocking=True)
            self._cpu_optimizer_map_data_event[optimizer] = self._d2h_stream.record_event()

    def _register_param_copy_back_gpu_hook(self):
        def param_copy_back_gpu_hook_closure():
            def param_copy_back_gpu_hook(optimizer, args, kwargs):
                self._h2d_stream.wait_stream(torch.cuda.current_stream())
                with torch.cuda.stream(self._h2d_stream):
                    for param in _param_generator(optimizer):
                        gpu_param = self.cpu_copys_map_gpu_param[param]
                        gpu_param.data.copy_(param.data, non_blocking=True)
                self._h2d_stream.record_event().wait(torch.cuda.current_stream())

            return param_copy_back_gpu_hook

        def fp32_param_copy_back_gpu_hook_closure():
            def fp32_param_copy_back_gpu_hook(optimizer, args, kwargs):
                for group in self.param_groups:
                    for param in group["params"]:
                        if param in self.gpu_params_map_cpu_copy:
                            # Skip if the param is offloaded to GPU, it has been
                            # copied back in the previous hook.
                            continue

                        if param in self.param_to_fp32_param:
                            fp32_param = self.param_to_fp32_param[param]
                            param.data.copy_(fp32_param.data)

            return fp32_param_copy_back_gpu_hook

        for optimizer in self.sub_optimizers:
            if optimizer is not self.gpu_optimizer:
                optimizer.register_step_post_hook(param_copy_back_gpu_hook_closure())
            elif self.param_update_in_fp32:
                optimizer.register_step_post_hook(fp32_param_copy_back_gpu_hook_closure())

    def step(self, closure=None):
        """
        Override the step method to perform the following operations:
            1. Sync the HDO param_groups to sub-optimizers.
            2. Sync the grads from GPU to CPU.
            3. Step the sub-optimizers.
            4. Sync the sub-optimizers state to HDO.
        """
        # Sync param_groups to sub-optimizers before each step to make sure
        # the lr, wd, etc. are up-to-date.
        self._sync_hdo_param_groups_to_sub_optimizers()

        self._d2h_stream.wait_stream(torch.cuda.current_stream())
        with torch.cuda.stream(self._d2h_stream):
            self._set_sub_optimizer_grads()

        # Step the sub-optimizers.
        if self.gpu_optimizer:
            self.gpu_optimizer.step(closure)

        for cpu_optimizer in self.cpu_optimizers:
            d2h_event = self._cpu_optimizer_map_data_event.pop(cpu_optimizer, None)
            if d2h_event is not None:
                d2h_event.synchronize()
            cpu_optimizer.step(closure)

        # Sync state and param_groups to HDO after each step.
        # NOTE: It is possible for the optimizer to change the properties
        #   in param_groups.
        self._sync_sub_optimizers_state_to_hdo()

    def _init_sub_optimizers(self):
        (
            self.cpu_param_groups,
            self.gpu_param_groups,
            self.gpu_params_map_cpu_copy,
            self.cpu_copys_map_gpu_param,
            self.param_to_fp32_param,
        ) = self._get_sub_optimizer_param_groups(self.offload_fraction)
        self.param_to_inner_param = {}
        self.inner_param_to_orig_param = {}
        for group in self.param_groups:
            for param in group["params"]:
                if param in self.param_to_fp32_param:
                    inner_param = self.param_to_fp32_param[param]
                elif param in self.gpu_params_map_cpu_copy:
                    inner_param = self.gpu_params_map_cpu_copy[param]
                else:
                    inner_param = param
                self.param_to_inner_param[param] = inner_param
                self.inner_param_to_orig_param[inner_param] = param
        self.fp32_param_to_orig_param = {v: k for k, v in self.param_to_fp32_param.items()}

        self.cpu_optimizers = []
        if self.overlap_cpu_optimizer_d2h_h2d:
            self.cpu_optimizers = self.build_cpu_optimizer_list(
                self.cpu_optimizer_cls, self.cpu_param_groups
            )
        elif len(self.cpu_param_groups) > 0:
            self.cpu_optimizers = [self.cpu_optimizer_cls(self.cpu_param_groups)]

        if len(self.gpu_param_groups) > 0:
            self.gpu_optimizer = self.gpu_optimizer_cls(self.gpu_param_groups)
        else:
            self.gpu_optimizer = None

        self.cpu_copy_map_grad: Dict[torch.Tensor, torch.Tensor] = defaultdict(torch.Tensor)
        self._d2h_stream = torch.cuda.current_stream()
        self._h2d_stream = torch.cuda.current_stream()
        if self.overlap_cpu_optimizer_d2h_h2d:
            self._d2h_stream = torch.cuda.Stream()
            self._h2d_stream = torch.cuda.Stream()
        self._cpu_optimizer_map_data_event = dict()

        self._register_param_copy_back_gpu_hook()

    @staticmethod
    def build_cpu_optimizer_list(cpu_optimizer_cls, cpu_param_groups):
        """Build several cpu optimizers to enable overlap. Currently we naively
        assign each parameter to an individual optimizer.

        Args:
            cpu_optimizer_cls (Type[torch.optim.Optimizer]): A torch optimizer class
            cpu_param_groups (List[Dict[str, Any]]): The CPU parameter groups
        """
        cpu_optimizers = []

        if len(cpu_param_groups) == 0:
            return cpu_optimizers

        for group in cpu_param_groups:
            group_defaults = group.copy()
            params = group_defaults.pop("params")
            if isinstance(params, torch.Tensor):
                params = [params]
            for param in params:
                _cpu_param_group = group_defaults.copy()
                _cpu_param_group["params"] = [param]
                cpu_optimizers.append(cpu_optimizer_cls([_cpu_param_group]))
        return cpu_optimizers

    def _get_sub_optimizer_param_groups(self, offload_fraction: float):
        params = []
        for group in self.param_groups:
            params.extend(group["params"])
        params_total_numel = sum([param.numel() for param in params])
        gpu_params_total_numel = sum([param.numel() for param in params if param.is_cuda])
        cpu_params_total_numel = params_total_numel - gpu_params_total_numel
        offload_threshold = gpu_params_total_numel * offload_fraction
        offload_params_numel = 0
        cpu_param_groups = []
        gpu_param_groups = []
        gpu_params_map_cpu_copy = {}
        cpu_copys_map_gpu_param = {}
        param_to_fp32_param = {}
        for group in self.param_groups:
            gpu_group = group.copy()
            cpu_group = group.copy()
            gpu_group["params"] = []
            cpu_group["params"] = []
            for param in group["params"]:
                orig_param = param
                cpu_copy = False
                if offload_params_numel < offload_threshold and param.is_cuda:
                    param = param.detach().clone().cpu().pin_memory()
                    offload_params_numel += param.numel()
                    cpu_copy = True
                if self.param_update_in_fp32 and param.dtype != torch.float32:
                    param = param.detach().clone().float()
                    param_to_fp32_param[orig_param] = param

                if cpu_copy:
                    gpu_params_map_cpu_copy[orig_param] = param
                    cpu_copys_map_gpu_param[param] = orig_param

                if param.is_cuda:
                    gpu_group["params"].append(param)
                else:
                    cpu_group["params"].append(param)
            if len(gpu_group["params"]) != 0:
                gpu_param_groups.append(gpu_group)
            if len(cpu_group["params"]) != 0:
                cpu_param_groups.append(cpu_group)

        return (
            cpu_param_groups,
            gpu_param_groups,
            gpu_params_map_cpu_copy,
            cpu_copys_map_gpu_param,
            param_to_fp32_param,
        )

    def _sync_sub_optimizers_state_to_hdo(self):
        """
        Update HDO state attribute to sub-optimizers.
        """

        # optimizer.state:
        # {
        #    torch.nn.Parameter: {
        #        str: Any,
        #    },
        #    ...
        # }
        new_state = defaultdict(dict)
        for optimizer in self.sub_optimizers:
            for param in optimizer.state:
                orig_param = self.inner_param_to_orig_param[param]
                new_state[orig_param] = optimizer.state[param]
                if self.param_update_in_fp32:
                    new_state[orig_param]["master_param"] = param
        self.state = new_state

    def _sync_hdo_state_to_sub_optimizers(self):
        for optimizer in self.sub_optimizers:
            new_state = defaultdict(dict)
            for group in optimizer.param_groups:
                for param in group["params"]:
                    orig_param = self.inner_param_to_orig_param[param]
                    new_state[param] = self.state[orig_param]
            optimizer.state = new_state
        self._update_fp32_params_by_new_state()
        self._move_new_state_to_right_device()

    def _sync_hdo_param_groups_to_sub_optimizers(self):
        """Sync HDO new param_groups attribute (e.g. lr, wd, etc.) to sub-optimizers."""
        param_in_param_group_index = {}
        for i, group in enumerate(self.param_groups):
            for p_id, param in enumerate(group["params"]):
                inner_param = self.param_to_inner_param[param]
                param_in_param_group_index[inner_param] = (i, p_id)

        for optimizer in self.sub_optimizers:
            new_param_groups = []
            for group in optimizer.param_groups:
                new_group = group.copy()
                # After sync-up the sub-optimizer last update, we need to sync-up the
                # HDO new param_groups attributes to the sub-optimizer.
                assert len(group["params"]) > 0, "param_groups should not be empty"
                group_id, _ = param_in_param_group_index[group["params"][0]]
                update_group_attrs = self.param_groups[group_id].copy()
                del update_group_attrs["params"]
                new_group.update(update_group_attrs)

                new_param_groups.append(new_group)
            optimizer.param_groups = new_param_groups

    def _move_new_state_to_right_device(self):
        for optimizer in self.sub_optimizers:
            for param, state in optimizer.state.items():
                for k, v in state.items():
                    if not isinstance(v, torch.Tensor):
                        continue
                    orig_param = self.inner_param_to_orig_param.get(param, param)
                    if isinstance(optimizer, self.defaults["cpu_optimizer_cls"]):
                        self.state[orig_param][k] = state[k] = v.to("cpu")
                    else:
                        self.state[orig_param][k] = state[k] = v.to("cuda")

    def _update_fp32_params_by_new_state(self):
        if not self.param_update_in_fp32:
            return
        for param, v in self.state.items():
            fp32_param = self.param_to_fp32_param[param]
            fp32_param.data.copy_(v["master_param"])

    def update_fp32_param_by_new_param(self):
        """
        Update the fp32 parameters by the new parameters.
        """
        for param, fp32_param in self.param_to_fp32_param.items():
            fp32_param.data.copy_(param)

    def _register_load_state_dict_hooks(self):
        def pre_load_state_dict_hook(self, state_dict):
            """
            Pre-load state dictionary hook to prevent loss of precision in
            mixed-precision training.

            When loading a state dictionary with `torch.load_state_dict`,
            optimizer states are reset and cast from `float32` to `bfloat16`/`float16`,
            potentially losing precision. This hook replaces parameters with
            their `float32` copies to mitigate this issue.

            Args:
                state_dict (dict): The state dictionary to be loaded.

            Returns:
                dict: The modified state dictionary with `float32` parameters.
            """
            if not self.param_update_in_fp32:
                return state_dict

            new_state = {}
            for param, v in self.state.items():
                param = self.param_to_fp32_param.get(param, param)
                new_state[param] = v
            self.state = new_state

            for group in self.param_groups:
                for i, param in enumerate(group["params"]):
                    group["params"][i] = self.param_to_fp32_param.get(param, param)

            return state_dict

        self.register_load_state_dict_pre_hook(pre_load_state_dict_hook)

        def post_load_state_dict_hook(self):
            # 1. Replace the temporarily replaced fp32 parameters back. Please
            # refer to the documentation in `pre_load_state_dict_hook`.
            if self.param_update_in_fp32:
                new_state = {}
                for param, v in self.state.items():
                    orig_param = self.fp32_param_to_orig_param.get(param, param)
                    new_state[orig_param] = v
                self.state = new_state

                for group in self.param_groups:
                    for i, param in enumerate(group["params"]):
                        group["params"][i] = self.fp32_param_to_orig_param.get(param, param)

            # 2. After loading state_dict, the parameters may change, and we need to
            # reinitialize the sub-optimizers to regenerate the new parameters and
            # cpu copy pairs.
            self._init_sub_optimizers()
            self._sync_hdo_param_groups_to_sub_optimizers()
            self._sync_hdo_state_to_sub_optimizers()

        self.register_load_state_dict_post_hook(post_load_state_dict_hook)

    def zero_grad(self, set_to_none: bool = True):
        """
        Zero or zero to none the gradients of all the parameters in the model.
        """
        super(HybridDeviceOptimizer, self).zero_grad(set_to_none)
        for group in self.param_groups:
            for param in group["params"]:
                if hasattr(param, "decoupled_grad"):
                    if set_to_none:
                        param.decoupled_grad = None
                    else:
                        param.decoupled_grad.zero_()

    def dummy_step(self):
        """
        The dummy step can be used to initialize the potential optimizer.state,
        which can solve the problem of checkpoint loading for an inplace operation
        such as loading a torch distributed checkpoint, for example.
        """
        for group in self.param_groups:
            for param in group["params"]:
                param.grad = torch.randn_like(param)
        self.step()
        self.zero_grad()

    @property
    def sub_optimizers(self):
        """
        Return the list of sub-optimizers.
        """
        if self.gpu_optimizer is not None:
            return self.cpu_optimizers + [self.gpu_optimizer]
        return self.cpu_optimizers


================================================
FILE: megatron/core/optimizer/distrib_optimizer.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Megatron distributed optimizer."""


import gc
import itertools
import logging
from collections import ChainMap
from dataclasses import replace
from logging import getLogger
from typing import Any, Callable, Dict, List, Optional, Tuple

import torch
import torch.nn.functional

from megatron.core.utils import log_single_rank

from ..dist_checkpointing.optimizer import KEEP_VARS_HINT

HAVE_APEX_OR_TE = True
USING_TE_OPTIMIZER = False
USING_APEX_OPTIMIZER = False
try:
    from transformer_engine.pytorch.optimizers import FusedAdam as Adam

    USING_TE_OPTIMIZER = True
except ImportError:
    try:
        from apex.optimizers import FusedAdam as Adam

        USING_APEX_OPTIMIZER = True
    except ImportError:
        from torch.optim import Adam as Adam

        HAVE_APEX_OR_TE = False

from megatron.core.optimizer.cpu_offloading import HybridDeviceOptimizer

from .. import tensor_parallel
from ..config_logger import has_config_logger_enabled, log_config_to_disk
from ..dist_checkpointing import ShardedTensor
from ..dist_checkpointing.dict_utils import nested_values
from ..dist_checkpointing.mapping import (
    LocalNonpersistentObject,
    ShardedObject,
    ShardedStateDict,
    ShardedTensorFactory,
)
from ..dist_checkpointing.utils import extract_sharded_tensors_and_factories
from ..distributed.param_and_grad_buffer import _ParamAndGradBuffer, partition_buckets
from ..fp8_utils import dequantize_fp8_tensor, is_float8tensor, quantize_param_shard
from ..transformer.fsdp_dtensor_checkpoint import handle_experts_in_state_dict
from ..transformer.module import MegatronModule
from .grad_scaler import MegatronGradScaler
from .optimizer import MixedPrecisionOptimizer, _zero_grad_group_helper, param_group_identifier_keys
from .optimizer_config import OptimizerConfig

logger = getLogger(__name__)


class Range:
    """
    A range represents a start and end points for indexing a shard
    from a full tensor.

    Args:
        start (int): Start index.
        end (int): End index.
    """

    def __init__(self, start: int, end: int):
        self.start = start
        self.end = end
        self.size = end - start

    def normalize(self, start: int = 0):
        """Shift start/end indexes to start at new start index.

        Both start and end indexes will be shifted by [new start] - [old start].

        Args:
            start (int): New start index.
        """
        return Range(start, start + self.size)

    def __str__(self):
        return "%d,%d [%d]" % (self.start, self.end, self.size)

    def __repr__(self):
        return str(self)

    def __len__(self):
        return self.end - self.start


class DistributedOptimizer(MixedPrecisionOptimizer):
    """Distributed optimizer, for all data types (fp16, bf16, and fp32).

    See __init__() below for argument details.
    """

    # enumerates fully reshardable optimizer formats (as opposed to formats
    # which depend on the internal optimizer buffers structure)
    checkpoint_fully_reshardable_formats: set[str] = {
        'fully_reshardable',
        'fully_sharded_model_space',
        'fsdp_dtensor',
    }

    @classmethod
    def _build_model_gbuf_param_range_map(
        cls,
        param_world_index_map: Dict[torch.nn.Parameter, Tuple],
        gbuf_world_range: Range,
        bucket_offset: int,
    ):
        """
        Build mapping from param reference to grad buffer shard ranges.

        This method builds a mapping from parameter references to grad
        buffer shard ranges, specific to each data-parallel (DP) rank's
        set of 'owned' parameters. Each grad buffer (padded to be an even
        multiple of DP-world-size) is conceptually divided into DP-world-size
        contiguous regions, where each DP rank 'owns' a contiguous region.
        Ownership in this sense means DP rank is responsible for reducing
        the relevant subset of grads, and updating the relevant subset of
        params.

        This conceptual partitioning of the grad buffer does NOT respect
        parameter boundaries, and as such it is assumed that each created
        range references a shard (or subset) of the full parameter. It is
        easiest to think of each DP rank as operating (i.e., reducing,
        gathering) purely on views into the grad buffer, for all model-to-
        main & main-to-model operations.

        This method creates four ranges:
        - The param's range within the entire grad buffer (i.e., world index).
        - The param's range within the relevant grad bucket's buffer.
        - The param's range within the DP rank's local view of the grad buffer.
        - The param's range within itself (i.e., its shard).
        """

        # Param range map.
        param_range_map = {}
        for param, param_world_indexes in param_world_index_map.items():

            # Param range.
            param_world_start, param_world_end, _ = param_world_indexes
            param_local_start = max(0, param_world_start - gbuf_world_range.start)
            param_local_end = min(gbuf_world_range.size, param_world_end - gbuf_world_range.start)

            # Add param, if within local gbuf range.
            if param_local_end > param_local_start:
                param_local_range = Range(param_local_start, param_local_end)
                param_world_range = param_local_range.normalize(
                    param_local_start + gbuf_world_range.start
                )
                param_world_range_in_bucket = Range(
                    param_world_range.start - bucket_offset, param_world_range.end - bucket_offset
                )
                sub_param_start = max(0, gbuf_world_range.start - param_world_start)
                sub_param_range = param_local_range.normalize(sub_param_start)
                param_range_map[param] = {
                    "gbuf_world": param_world_range,
                    "gbuf_world_in_bucket": param_world_range_in_bucket,
                    "gbuf_local": param_local_range,
                    "param": sub_param_range,
                }

        return param_range_map

    @classmethod
    def _build_model_gbuf_range(cls, param_and_grad_buffer: _ParamAndGradBuffer, bucket_index: int):
        """
        Build mapping between params and their grad buffers.

        This method does the initial setup for the method above. This setup
        includes determining the shard ranges into the param_and_grad_buffer
        for each data-parallel (DP) rank. Each DP rank keeps range info for
        all other DP ranks, for the purpose of creating args for
        reduce-scatter and all-gather.
        """

        data_parallel_rank = param_and_grad_buffer.data_parallel_group.rank()
        data_parallel_world_size = param_and_grad_buffer.data_parallel_group.size()

        bucket = param_and_grad_buffer.buckets[bucket_index]
        gbuf_size = bucket.grad_data.numel()
        assert (
            gbuf_size % data_parallel_world_size == 0
        ), f"Each bucket's buffer size should be divisible by {data_parallel_world_size}"
        max_gbuf_range_size = gbuf_size // data_parallel_world_size

        # All world ranges (i.e., across all data parallel ranks).
        gbuf_world_all_ranges = []
        for r in range(data_parallel_world_size):
            # Compute start of chunk in this bucket.
            gbuf_world_start = r * max_gbuf_range_size
            gbuf_world_end = min(gbuf_size, gbuf_world_start + max_gbuf_range_size)
            # Add bucket's offset in grad buffer.
            gbuf_world_range = Range(
                gbuf_world_start + bucket.offset, gbuf_world_end + bucket.offset
            )
            gbuf_world_all_ranges.append(gbuf_world_range)

        # Local DP's ranges.
        gbuf_world_range = gbuf_world_all_ranges[data_parallel_rank]

        # Get each param's ranges.
        param_range_map = cls._build_model_gbuf_param_range_map(
            param_and_grad_buffer.param_index_map, gbuf_world_range, bucket.offset
        )

        # Group into dict.
        data = {"param_map": param_range_map}

        return data

    @classmethod
    def _build_gbuf_range_map(cls, param_and_grad_buffer: _ParamAndGradBuffer):
        """
        Build mapping between params and their grad buffers. These mappings are
        partitioned according to data type.

        Iterate through all buckets of grad buffer to construct param ranges
        that this rank "owns" (the dp_rank'th shard of each bucket, where each
        shard is 1/dp_world_size of the bucket).

        Args:
            param_and_grad_buffer (_ParamAndGradBuffer): buffer to build mapping for.
        """
        return {
            (param_and_grad_buffer.param_dtype, param_and_grad_buffer.grad_dtype): [
                cls._build_model_gbuf_range(param_and_grad_buffer, bucket_index)
                for bucket_index in range(len(param_and_grad_buffer.buckets))
            ]
        }

    @classmethod
    def _build_model_param_gbuf_map(
        cls, gbuf_ranges: List[Dict]
    ) -> Dict[torch.nn.Parameter, Tuple]:
        """
        Create a reverse of the gbuf_ranges, for referencing in opposite direction.
        """
        param_gbuf_map = {}
        for gbuf_index, gbuf_range_map in enumerate(gbuf_ranges):
            for dtype, gbuf_range_map_for_all_buckets in gbuf_range_map.items():
                for bucket_index, gbuf_range_map in enumerate(gbuf_range_map_for_all_buckets):
                    for param, _ in gbuf_range_map["param_map"].items():
                        assert param not in param_gbuf_map, (
                            "Param should not be in param_gbuf_map; each param only belongs "
                            "to a single bucket."
                        )
                        param_gbuf_map[param] = (gbuf_index, dtype, bucket_index)
        return param_gbuf_map

    @classmethod
    def _build_optimizer_group_ranges(cls, param_groups: List[Dict], gbuf_ranges: List[Dict]):
        """
        Create optimizer groups.

        Given the set of parameter shard ranges that are owned by the current
        data-parallel (DP) rank, gather the set of parameters that will be
        used (in the method below) to create the current DP's optimizer
        groups.
        """

        # Param group map.
        # World param group map.
        # - Store a mapping of <model_parameter:group_index> for all parameters
        #   across all DP ranks. This is necessary because it is our first
        #   cross reference between the DDP mappings and the optimizer group
        #   parameters. This mapping only for use in the next step of building
        #   the local mapping over this DP rank's parameters.
        world_param_group_map = {}
        for group_index, group in enumerate(param_groups):
            for param in group["params"]:
                assert param.requires_grad
                world_param_group_map[param] = group_index

        # Optimizer group ranges & param-group mapping.
        # - Build a mapping from groups to their contained parameters, and also
        #   from parameters to their containing group index and order within
        #   the group. The group index and order are particularly important for
        #   saving and loading checkpoints.
        local_param_group_map = {}
        group_ranges = [{"params": []} for _ in param_groups]
        for gbuf_range_map in gbuf_ranges:
            for dtype, gbuf_range_map_for_all_buckets in gbuf_range_map.items():
                for gbuf_range_map in gbuf_range_map_for_all_buckets:
                    for param in gbuf_range_map["param_map"]:
                        group_index = world_param_group_map[param]
                        group_range = group_ranges[group_index]
                        group_range["params"].append(param)
                        local_param_group_map[param] = (group_index, len(group_range["params"]) - 1)

        # Squeeze zero-size group ranges.
        for group_index, group_range in enumerate(group_ranges):
            group_range["orig_group"] = param_groups[group_index]
            group_range["orig_group_idx"] = param_groups[group_index]

        return local_param_group_map, group_ranges

    @classmethod
    def _build_model_and_main_param_groups(
        cls,
        gbuf_ranges: List[Dict],
        param_gbuf_map: Dict[torch.nn.Parameter, Tuple],
        opt_group_ranges: List,
        config: OptimizerConfig,
    ):
        """
        Create main parameter groups needed for the optimizer step.

        These groups encompass both: 1) groups used by this class, for
        reducing/gather, and 2) groups used by the inner optimizer for the
        parameter update. Given that the conceptual grad buffer partitioning
        (created in earlier method) doesn't respect parameter boundaries,
        the optimizer operates on shards of the model parameters, rather than
        the full parameters.
        """

        # Parameter groups:
        #   model_float16_groups: original float16 parameters
        #   model_fp32_groups: original fp32 parameters
        #   shard_float16_groups: shards of original float16 parameters
        #   shard_fp32_groups: shards of original fp32 parameters
        #   shard_fp32_from_float16_groups: fp32 copy of float16 parameters
        model_float16_groups = []
        model_fp32_groups = []
        shard_float16_groups = []
        shard_fp32_groups = []
        shard_fp32_from_float16_groups = []

        # Allocate (or slice) each group's param shard.
        for group_range in opt_group_ranges:

            # Params of this group.
            model_float16_params_this_group = []
            model_fp32_params_this_group = []
            shard_float16_params_this_group = []
            shard_fp32_params_this_group = []
            shard_fp32_from_float16_params_this_group = []
            model_float16_groups.append(model_float16_params_this_group)
            model_fp32_groups.append(model_fp32_params_this_group)
            shard_float16_groups.append(shard_float16_params_this_group)
            shard_fp32_groups.append(shard_fp32_params_this_group)
            shard_fp32_from_float16_groups.append(shard_fp32_from_float16_params_this_group)

            for model_param in group_range["params"]:

                assert model_param.requires_grad

                gbuf_index, dtype, bucket_index = param_gbuf_map[model_param]
                gbuf_range = gbuf_ranges[gbuf_index][dtype][bucket_index]
                param_range = gbuf_range["param_map"][model_param]["param"]

                # fp16, bf16 params.
                if model_param.type() in ['torch.cuda.HalfTensor', 'torch.cuda.BFloat16Tensor']:

                    # Generate sharded model param.
                    if is_float8tensor(model_param) and config.fp8_recipe != "delayed":
                        # MXFP8Tensor and BlockwiseQTensor don't support view(-1)
                        shard_model_param = None
                    else:
                        shard_model_param = model_param.detach().view(-1)[
                            param_range.start : param_range.end
                        ]
                        tensor_parallel.copy_tensor_model_parallel_attributes(
                            shard_model_param, model_param
                        )
                        if hasattr(model_param, 'shared'):
                            shard_model_param.shared = model_param.shared

                    # Generate main param.
                    if not config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
                        # If we use FP8 params to initialize FP32 main params (compared to using the
                        # bf16/fp16 params to initialize the main params), there will be a loss of
                        # precision at the beginning of training (this problem will not occur if the
                        # training is long enough or if the main params are loaded from a
                        # checkpoint).
                        if is_float8tensor(model_param):
                            if hasattr(model_param, 'get_high_precision_init_val'):
                                shard_main_param = (
                                    model_param.get_high_precision_init_val()
                                    .view(-1)[param_range.start : param_range.end]
                                    .clone()
                                    .to(model_param.device)
                                    .float()
                                )
                                model_param.clear_high_precision_init_val()
                            else:
                                shard_main_param = model_param.float().view(-1)[
                                    param_range.start : param_range.end
                                ]
                        else:
                            shard_main_param = shard_model_param.clone().float()

                        tensor_parallel.copy_tensor_model_parallel_attributes(
                            shard_main_param, model_param
                        )
                        if hasattr(model_param, 'shared'):
                            shard_main_param.shared = model_param.shared
                    else:
                        # When using precision-aware optimizer, main params are held by FusedAdam.
                        shard_main_param = None

                    # Store handle to main_param.
                    model_param.main_param = shard_main_param
                    model_param.main_param_sharded = True

                    # Add to group.
                    model_float16_params_this_group.append(model_param)
                    shard_float16_params_this_group.append(shard_model_param)
                    shard_fp32_from_float16_params_this_group.append(shard_main_param)

                # fp32 params.
                elif model_param.type() == 'torch.cuda.FloatTensor':
                    shard_model_param = model_param.view(-1)[param_range.start : param_range.end]
                    model_fp32_params_this_group.append(model_param)
                    shard_fp32_params_this_group.append(shard_model_param)
                    tensor_parallel.copy_tensor_model_parallel_attributes(
                        shard_model_param, model_param
                    )
                    if hasattr(model_param, 'shared'):
                        shard_model_param.shared = model_param.shared

                else:
                    raise TypeError(
                        'Wrapped parameters must be one of '
                        'torch.cuda.FloatTensor,  '
                        'torch.cuda.HalfTensor, or '
                        'torch.cuda.BFloat16Tensor. '
                        'Received {}'.format(model_param.type())
                    )

            # Update optimizer's params.
            if not config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
                group_range["orig_group"]["params"] = [
                    *shard_fp32_params_this_group,
                    *shard_fp32_from_float16_params_this_group,
                ]
            else:
                group_range["orig_group"]["params"] = [
                    *shard_fp32_params_this_group,
                    *shard_float16_params_this_group,
                ]

        return (
            model_float16_groups,
            model_fp32_groups,
            shard_float16_groups,
            shard_fp32_groups,
            shard_fp32_from_float16_groups,
        )

    def __init__(
        self,
        optimizer: torch.optim.Optimizer,
        config: OptimizerConfig,
        grad_scaler: MegatronGradScaler,
        init_state_fn: Optional[Callable],
        model_chunks: List[MegatronModule],
        per_model_buffers: Dict[int, List[_ParamAndGradBuffer]],
        data_parallel_group: torch.distributed.ProcessGroup,
        data_parallel_group_gloo: Optional[torch.distributed.ProcessGroup],
        data_parallel_group_idx: int,
        distributed_optimizer_instance_id: int,
    ):
        """
        Distributed optimizer, for all data types (fp16, bf16, and fp32).

        The steps in this method create the core mapping between param and grad buffers,
        parameters, and parameter shard ranges, that is needed for converting between model
        param indexes and main parameter shard indexes. This method also updates the optimizer
        parameter groups with the newly created shards.

        Args:
            optimizer (torch.optim.Optimizer): base optimizer such as Adam or SGD.
            config (OptimizerConfig): configuration object for optimizer.
            grad_scaler (MegatronGradScaler): used for scaling gradients. Note that
                this can be None. This case happens when `bf16 = True` and we don't
                use any loss scale. Note that for `bf16 = True`, we can have
                a constant gradient scaler. Also for `bf16 = False`, we
                always require a grad scaler.
            init_state_fn (Callable, optional): function to initialize state in the optimizer.
            model_chunks (List[MegatronModule]): list of model chunks.
            per_model_buffers (Dict[int, List[_ParamAndGradBuffer]]): the implementation of the
                distributed optimizer is centered on using a contiguous buffer for
                communicating grads & params between the model state and the optimizer state.
                You can find a more detailed description in
                https://github.com/NVIDIA/Megatron-LM/blob/main/docs/source/distrib_optimizer.md.
            data_parallel_group (torch.distributed.ProcessGroup): data-parallel group to use to
                all-gather params after optimizer.step().
            data_parallel_group_gloo (torch.distributed.ProcessGroup): gloo data-parallel group
                (used in checkpoint loading and saving).
            data_parallel_group_idx (int): index in data-parallel group (used by
                distributed checkpointing logic).
            distributed_optimizer_instance_id (int): index of the Distributed Optimizer instance.
        """

        if has_config_logger_enabled(config):
            log_config_to_disk(config, locals(), prefix=type(self).__name__)

        super().__init__(optimizer, config, grad_scaler, init_state_fn)
        self.model_chunks = model_chunks
        self.ddp_config = self.model_chunks[0].ddp_config
        for model_chunk in self.model_chunks:
            assert self.ddp_config == model_chunk.ddp_config
        self.distributed_optimizer_instance_id = distributed_optimizer_instance_id

        assert (
            isinstance(optimizer, (Adam, torch.optim.AdamW, HybridDeviceOptimizer))
            or optimizer is None
        ), (
            "Only Adam and HybridDeviceOptimizer currently supported, "
            "due to checkpointing requirements."
        )

        # when freezing sub-models we have no real optimizer
        # but still need a stub DistributedOptimizer class
        if optimizer is None:
            self.is_stub_optimizer = True
            return

        self.is_stub_optimizer = False
        if self.ddp_config.use_megatron_fsdp:
            return

        # Model grad buffer ranges.
        assert per_model_buffers is not None, "per_model_buffers must be provided"
        self.buffers = list(itertools.chain(*per_model_buffers.values()))
        self.per_model_buffers = per_model_buffers
        self.data_parallel_group = data_parallel_group
        self.data_parallel_group_gloo = data_parallel_group_gloo
        self.data_parallel_group_idx = data_parallel_group_idx

        self.gbuf_idx_to_model_idx_map = {}
        gbuf_idx = 0
        for model_idx, buffers in self.per_model_buffers.items():
            for _ in buffers:
                self.gbuf_idx_to_model_idx_map[gbuf_idx] = model_idx
                gbuf_idx += 1

        self.per_model_bucket_groups = {}
        for model_idx, buffers in self.per_model_buffers.items():
            self.per_model_bucket_groups[model_idx] = partition_buckets(buffers)

        self.gbuf_ranges = []
        self.per_bucket_numel = []
        self.per_bucket_numel_unpadded = []
        for buffer in self.buffers:

            self.per_bucket_numel.append(
                {
                    (buffer.param_dtype, buffer.grad_dtype): [
                        bucket.grad_data.numel() for bucket in buffer.buckets
                    ]
                }
            )
            self.per_bucket_numel_unpadded.append(
                {
                    (buffer.param_dtype, buffer.grad_dtype): [
                        bucket.numel_unpadded for bucket in buffer.buckets
                    ]
                }
            )
            self.gbuf_ranges.append(self._build_gbuf_range_map(buffer))
        self.model_param_gbuf_map = self._build_model_param_gbuf_map(self.gbuf_ranges)

        # Add main_param field to each parameter. We will use this fp32 copy to compute
        # the param norm.
        # For parameters with optimizer state on this rank, None will be overwritten by
        # the corresponding sharded main_param tensor.
        for param_group in self.optimizer.param_groups:
            # For all the parameters in this group.
            for param in param_group['params']:
                if param.requires_grad:
                    # fp32 copy only needed for 16-bit parameters.
                    if param.type() in ['torch.cuda.HalfTensor', 'torch.cuda.BFloat16Tensor']:
                        param.main_param = None
                        param.main_param_sharded = True

        # Optimizer ranges.
        (self.model_param_group_index_map, self.opt_group_ranges) = (
            self._build_optimizer_group_ranges(self.optimizer.param_groups, self.gbuf_ranges)
        )

        # Allocate main param shards.
        (
            self.model_float16_groups,
            self.model_fp32_groups,
            self.shard_float16_groups,
            self.shard_fp32_groups,
            self.shard_fp32_from_float16_groups,
        ) = self._build_model_and_main_param_groups(
            self.gbuf_ranges, self.model_param_gbuf_map, self.opt_group_ranges, config
        )

        if isinstance(self.optimizer, HybridDeviceOptimizer):
            self.optimizer = HybridDeviceOptimizer(
                params=[g["orig_group"] for g in self.opt_group_ranges], **self.optimizer.defaults
            )
        else:
            self.optimizer.param_groups = [g["orig_group"] for g in self.opt_group_ranges]
            self.optimizer.load_state_dict(self.optimizer.state_dict())

    def _get_model_param_range_map(self, param: torch.nn.Parameter):
        """
        Given a model param, get the index sub-range of the param that this
        data-parallel rank owns.
        """
        gbuf_index, dtype, bucket_index = self.model_param_gbuf_map[param]
        gbuf_range_map = self.gbuf_ranges[gbuf_index][dtype][bucket_index]
        param_range_map = gbuf_range_map["param_map"][param]
        return param_range_map

    def get_grad_stats_parallel_group(self) -> torch.distributed.ProcessGroup:
        """
        With the distributed optimizer, gradient statistics (num_zeros & norm) are reduced over
        all ranks in the distributed optimizer instance (versus only the model-parallel ranks
        with the non-distributed optimizer).
        """
        return getattr(self, 'grad_stats_parallel_group', None)

    def state_dict(self):
        """
        The state dict contains all non-DP-rank-dependent (i.e., non-parameter-
        related) optimizer variables. The returned state dict can be stored in
        the standard model/RNG checkpoint file. The parameter and dependent
        optimizer state (e.g., exp_avg, exp_avg_sq) are stored in a separate
        checkpoint file by calling 'save_parameter_state()'.
        """
        inner_state_dict = self.optimizer.state_dict()
        state_dict = {}

        # Extract 'step', for non-Apex/TE support.
        if not HAVE_APEX_OR_TE:
            steps = list(set([s["step"].item() for s in inner_state_dict["state"].values()]))
            assert len(steps) == 1
            step = steps[0]
        elif isinstance(self.optimizer, HybridDeviceOptimizer):
            step = None
            for optimizer in self.optimizer.sub_optimizers:
                if isinstance(optimizer, (torch.optim.Adam, torch.optim.AdamW)):
                    if len(optimizer.state) == 0:
                        continue
                    steps = list(set([s["step"].item() for s in optimizer.state.values()]))
                    assert len(steps) == 1, f"steps: {optimizer.state}"
                    step = steps[0]
                    break
        elif USING_TE_OPTIMIZER or USING_APEX_OPTIMIZER:
            # Extract 'step', for TE FusedAdam support.
            steps = list(
                set(
                    [
                        g["step"]
                        for g in inner_state_dict["param_groups"]
                        if len(g["params"]) > 0 and "step" in g
                    ]
                )
            )
            assert len(steps) <= 1, f"steps: {steps}"
            step = steps[0] if len(steps) == 1 else None

        # Optimizer state (do not store parameter state here).
        state_dict['optimizer'] = {k: v for k, v in inner_state_dict.items() if k != "state"}
        for param_group in state_dict["optimizer"]["param_groups"]:
            del param_group["params"]
            if not HAVE_APEX_OR_TE:
                # Native PyTorch param group requires step (i.e., iteration).
                param_group["step"] = step
            elif (
                USING_TE_OPTIMIZER
                or USING_APEX_OPTIMIZER
                or isinstance(self.optimizer, HybridDeviceOptimizer)
            ) and step is not None:
                # TE FusedAdam will not accumulate step for empty param groups, so we need to
                # align the step across param groups.
                param_group["step"] = int(step)

        # Grad scaler state.
        if self.grad_scaler:
            state_dict['grad_scaler'] = self.grad_scaler.state_dict()

        return state_dict

    def load_state_dict(self, state_dict):
        """Load the state dict.

        As detailed in state_dict(), the state dict contains all non-
        parameter-related variables. This method is notably longer than
        state_dict(), because the Torch optimizers state has yet to be
        allocated at this point, and so we must do a cross referencing between
        the optimizers state (and the ordering it expects for parameter state)
        and this DP rank's shards. The optimizer at this point does not contain
        any tensor dimension information, so we must get these dimensions from
        the DP shards mapped during DistributedOptimizer.__init__().

        The tensor parameter state is loaded via load_parameter_state(), and
        so this method also must populate the loaded state dict with dummy
        tensor data (i.e., via torch.empty() below). This will be overwritten
        during load_parameter_state().

        ** Note: Torch optimizer's state structure. **
        The Torch optimizer stores its state in two levels. The top level is a
        list of groups, where each group contains a list of integer indexes
        (corresponding to parameters) that index into a master parameter list
        that is shared by all groups. As such, three values are necessary for
        maintaining this ordering:

        - group_index : The group to which a parameter belongs.
        - group_order : The index of a parameter within its group.
        - state_order : The index of a parameter within the shared parameter
            list.
        """
        if self.ddp_config.use_megatron_fsdp:
            if "param_to_group_meta" in state_dict:
                state_dict["param_groups"] = self._param2group_meta_to_param_groups(
                    state_dict["param_to_group_meta"], self.optimizer.param_groups
                )
                del state_dict["param_to_group_meta"]
            self.optimizer.load_state_dict(state_dict)
            return

        if len(self.optimizer.state) == 0:
            if isinstance(self.optimizer, HybridDeviceOptimizer):
                self.optimizer.dummy_step()

        # Get the Torch optimizer's state dict.
        # - This 'inner' optimizer at this point is unallocated, and only
        #   contains an integer ordering of parameters within each group, and
        #   the ordering of parameters within its flattened parameter state
        #   list.
        def make_needed_groups(param_group):
            needed_groups = []
            for key in param_group_identifier_keys:
                # NeMo changes these variable names from `lr_mult` and `wd_mult`
                # to `pre_lr_mult` and `pre_wd_mult`, so we need to check both.
                if key in param_group:
                    pass
                elif f"pre_{key}" in param_group:
                    key = f"pre_{key}"
                else:
                    raise ValueError(
                        f"Key {key} (or pre_{key}) not found in param_group {param_group}."
                    )
                needed_groups.append(param_group[key])
            needed_groups = tuple(needed_groups)
            return needed_groups

        param_groups_map = {}
        for param_group in state_dict["optimizer"]["param_groups"]:
            needed_groups = make_needed_groups(param_group)
            param_groups_map[needed_groups] = param_group
        inner_state_dict = self.optimizer.state_dict()
        state_dict_param_groups = []
        for inner_param_group in inner_state_dict["param_groups"]:
            needed_groups = make_needed_groups(inner_param_group)
            state_dict_param_groups.append(
                {**param_groups_map[needed_groups], "params": inner_param_group['params']}
            )

        # Allocate or retrieve optimizer state (i.e., tensors).
        if len(self.optimizer.state) == 0:
            # Allocate empty optimizer state if not previously initialized.
            # - If len(self.optimizer.state) == 0, this means that the optimizer
            #   state has not been previously initialized. Once it has been
            #   initialized, we skip this code block to avoid reallocating
            #   empty tensors (i.e., torch.empty), which in turn reduces memory
            #   fragmentation.
            # - Real data is overwritten during load_parameter_state().
            state_dict_state = []
            for gbuf_range_maps in self.gbuf_ranges:
                for gbuf_range_map_for_all_buckets in gbuf_range_maps.values():
                    for gbuf_range_map in gbuf_range_map_for_all_buckets:
                        for model_param, param_range_map in gbuf_range_map["param_map"].items():

                            # Get parameter ordering information (see method docstring
                            # for details).
                            group_index, group_order = self.model_param_group_index_map[model_param]
                            state_order = inner_state_dict["param_groups"][group_index]["params"][
                                group_order
                            ]

                            # Allocate dummy tensors.
                            numel = len(param_range_map["gbuf_world"])
                            init_shard = lambda dtype=torch.float32: torch.empty(
                                (numel,), dtype=dtype, device=torch.cuda.current_device()
                            )

                            # For precision_aware_optimizer, the empty tensors should also be
                            #  initialized with the correct dtype.
                            tensors = {
                                "exp_avg": init_shard(self.config.exp_avg_dtype),
                                "exp_avg_sq": init_shard(self.config.exp_avg_sq_dtype),
                            }
                            if self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
                                if self.config.store_param_remainders and self.config.bf16:
                                    tensors["master_param"] = init_shard(torch.int16)
                                else:
                                    tensors["master_param"] = init_shard(
                                        self.config.main_params_dtype
                                    )
                            state_dict_state.append((state_order, tensors))

            # Sort by state order (see method docstring for details).
            state_dict_state.sort(key=lambda s: s[0])
            state_dict_state = {s[0]: s[1] for s in state_dict_state}

        else:
            # Retrieve existing optimizer state.
            state_dict_state = inner_state_dict["state"]

        # Extract 'step', for non-Apex/TE support.
        if not HAVE_APEX_OR_TE:
            steps = list(set([g["step"] for g in state_dict["optimizer"]["param_groups"]]))
            assert len(steps) == 1
            step = torch.tensor(steps[0], dtype=torch.float)

            for s in state_dict_state.values():
                # Native PyTorch state dict requires step (i.e., iteration).
                s["step"] = step
        elif isinstance(self.optimizer, HybridDeviceOptimizer):
            # Handle Torch AdamW special case, which, unlike FusedAdam, Torch AdamW
            # has an extra optimizer state "step".
            steps = list(
                set([g["step"] for g in state_dict["optimizer"]["param_groups"] if "step" in g])
            )
            if len(steps) != 0:
                assert len(steps) == 1, f"steps: {steps}"
                step = torch.tensor(steps[0], dtype=torch.float32, device="cpu")
                for v in self.optimizer.state.values():
                    v["step"] = step.detach().clone()

        # Optimizer.
        self.optimizer.load_state_dict(
            {"state": state_dict_state, "param_groups": state_dict_param_groups}
        )

        # Grad scaler.
        if 'grad_scaler' not in state_dict:
            if self.config.fp16:
                log_single_rank(
                    logger,
                    logging.INFO,
                    '***WARNING*** found an old checkpoint, will not load grad scaler ...',
                )
        else:
            if self.grad_scaler:
                self.grad_scaler.load_state_dict(state_dict['grad_scaler'])
            else:
                log_single_rank(
                    logger,
                    logging.INFO,
                    '***WARNING*** fould the grad scaler in the '
                    'checkpoint but it is None in the class. '
                    'Skipping loading grad scaler ...',
                )

        if 'param_state' in state_dict:
            assert 'param_state_sharding_type' in state_dict, state_dict.keys()
            param_state = state_dict['param_state']
            sharding_type = state_dict['param_state_sharding_type']
            log_single_rank(
                logger,
                logging.INFO,
                f'Loading distributed optimizer sharded state of type {sharding_type}',
            )
            if sharding_type == 'dp_zero_gather_scatter':
                self.load_parameter_state_from_dp_zero(param_state)
            elif sharding_type == 'fully_reshardable':
                self.load_parameter_state_from_fully_reshardable(param_state)
            elif sharding_type == 'dp_reshardable':
                self.load_parameter_state_from_dp_reshardable(param_state)
            elif sharding_type == 'fully_sharded_model_space':
                self.load_parameter_state_from_fs_model_space(param_state)
            else:
                raise NotImplementedError(f'Unknown sharding_type: {sharding_type}')

    def _get_main_param_and_optimizer_states(self, model_param):
        """Return a dict containing the main param and optimizer states corresponding to the input
        model_param.

        The structure of the returned dict:
        tensors = {
            "param": torch.Tensor
            "exp_avg": torch.Tensor
            "exp_avg_sq": torch.Tensor
        }
        """
        group_index, group_order = self.model_param_group_index_map[model_param]
        if self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
            sharded_model_param = self.optimizer.param_groups[group_index]["params"][group_order]
            tensors = {}
            for k in self.optimizer.state[sharded_model_param]:
                if not isinstance(self.optimizer.state[sharded_model_param][k], torch.Tensor):
                    continue
                if isinstance(self.optimizer, HybridDeviceOptimizer):
                    tensors[k] = self.optimizer.state[sharded_model_param][k]
                    continue

                tensors[k] = self.optimizer.get_unscaled_state(sharded_model_param, k)
            tensors["param"] = tensors.pop("master_param")
        else:
            main_param = self.optimizer.param_groups[group_index]["params"][group_order]
            optim_state = self.optimizer.state[main_param]
            tensors = {"param": main_param}
            for k, v in optim_state.items():
                if isinstance(v, torch.Tensor):
                    tensors[k] = v
        return tensors

    def _set_main_param_and_optimizer_states(self, model_param, tensors):
        """Set the main param and optimizer states corresponding to the input model_param.

        The structure of the input `tensors`:
        tensors = {
            "param": torch.Tensor
            "exp_avg": torch.Tensor
            "exp_avg_sq": torch.Tensor
        }
        """
        group_index, group_order = self.model_param_group_index_map[model_param]
        if self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
            sharded_model_param = self.optimizer.param_groups[group_index]["params"][group_order]
            for k, v in tensors.items():
                if not isinstance(v, torch.Tensor):
                    continue
                if isinstance(self.optimizer, HybridDeviceOptimizer):
                    if k == "param":
                        k = "master_param"
                    self.optimizer.state[sharded_model_param][k] = v
                    continue

                if k == "param":
                    self.optimizer.set_scaled_state(sharded_model_param, "master_param", v)
                else:
                    self.optimizer.set_scaled_state(sharded_model_param, k, v)
        else:
            main_param = self.optimizer.param_groups[group_index]["params"][group_order]
            optim_state = self.optimizer.state[main_param]
            dst_tensors = {"param": main_param}
            for k, v in optim_state.items():
                if isinstance(v, torch.Tensor):
                    dst_tensors[k] = v
            for key in dst_tensors:
                if not isinstance(tensors[key], torch.Tensor):
                    continue
                dst_tensors[key].copy_(tensors[key])

    def get_parameter_state_dp_reshardable(self):
        """Get internal representation of parameter state without any copies and modifications.

        This is referred to as "fully sharded bucket space" because the optimizer state is
        fully sharded (e.g. no gather involved) and bucket-centric (the state
        follows the internal structure of the Distributed Optimizer buckets)
        as opposed to model-centric (typical structure of PyT optimizers)
        """
        state = {
            "per_bucket_numel": self.per_bucket_numel,
            "per_bucket_numel_unpadded": self.per_bucket_numel_unpadded,
        }
        for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges):

            # Iterate grad buffers (by data type).
            dtype_state = {}
            assert len(gbuf_range_maps) == 1, "single dtype supported, for now."
            for dtype, gbuf_range_map_for_all_buckets in gbuf_range_maps.items():
                buckets_state = []
                for bucket_idx, gbuf_range_map in enumerate(gbuf_range_map_for_all_buckets):
                    bucket_state = []
                    for model_param, param_range_map in gbuf_range_map["param_map"].items():
                        tensors = self._get_main_param_and_optimizer_states(model_param)
                        tensors.update(
                            {
                                "gbuf_local_start": param_range_map["gbuf_local"].start,
                                "gbuf_local_end": param_range_map["gbuf_local"].end,
                            }
                        )
                        bucket_state.append(tensors)
                    buckets_state.append(bucket_state)
                dtype_state[dtype] = buckets_state
            state[gbuf_idx] = dtype_state
        return state

    def get_parameter_state_dp_zero(
        self,
        use_gloo_comm: bool = True,
        empty_data: bool = False,
        return_on_all_ranks: bool = False,
    ):
        """Get parameter state (i.e., parameter & optimizer tensors).

        This method performs two steps:
        - For each DP rank, copy param & optimizer shards to contiguous CPU
          buffers (e.g., one buffer each for main_param, exp_avg, and
          exp_avg_sq).
        - Gather contiguous buffers on DP rank 0 and concatenate to world
          buffers.


        Args:
            use_gloo_comm (bool, optional): Whether to use Gloo communication for tensors
                gather. Defaults to True. Has effect only for non-FSDP case.
            empty_data (bool, optional): Whether to fill world tensors with actual data.
                Empty world tensors are used during checkpoint loading. Defaults to False.
                Has effect only for non-FSDP case.
            return_on_all_ranks (bool, optional): Whether to return the state dict on all ranks.
                If False, DP != 0 ranks will return None. Defaults to False.
                Has effect only for non-FSDP case. Returning the whole state dict on all ranks
                allows to utilize parallel saving and loading when used for sharded state dict
                creation.

        Returns:
            dict or None: optimizer state dict on DP rank 0, or all ranks if return_on_all_ranks.
                Returns None on non-zero DP ranks when return_on_all_ranks=False.
        """
        # Data parallelism variables.
        if use_gloo_comm:
            data_parallel_group = self.data_parallel_group_gloo
        else:
            data_parallel_group = self.data_parallel_group
        assert data_parallel_group is not None
        data_parallel_world_size = data_parallel_group.size()
        data_parallel_rank = data_parallel_group.rank()
        data_parallel_global_ranks = torch.distributed.get_process_group_ranks(data_parallel_group)

        # Collect param states.
        state = {"buckets_coalesced": True}
        for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges):

            # Iterate grad buffers (by data type).
            dtype_state = {}
            assert len(gbuf_range_maps) == 1, "single dtype supported, for now."
            for dtype, gbuf_range_map_for_all_buckets in gbuf_range_maps.items():
                buffer_numel_unpadded = self.buffers[gbuf_idx].numel_unpadded
                # Create coalesced tensors for all state related to parameters in this buffer.
                world_tensors = {}
                if data_parallel_rank == 0 or return_on_all_ranks:
                    world_tensors = {
                        key: torch.zeros(
                            (buffer_numel_unpadded,), dtype=torch.float32, device="cpu"
                        )
                        for key in ("param", "exp_avg", "exp_avg_sq")
                    }
                    world_tensors["numel_unpadded"] = buffer_numel_unpadded

                if not empty_data:
                    offset_in_world_tensors = 0
                    for bucket_idx, gbuf_range_map in enumerate(gbuf_range_map_for_all_buckets):

                        # Compute local DP contiguous shard's size.
                        gbuf_world_numel = (
                            self.buffers[gbuf_idx].buckets[bucket_idx].grad_data.numel()
                        )
                        assert gbuf_world_numel % data_parallel_world_size == 0
                        gbuf_local_numel = gbuf_world_numel // data_parallel_world_size

                        gbuf_world_numel_unpadded = (
                            self.buffers[gbuf_idx].buckets[bucket_idx].numel_unpadded
                        )
                        assert gbuf_world_numel_unpadded <= gbuf_world_numel

                        local_shards = {
                            key: torch.zeros((gbuf_local_numel,), dtype=torch.float32, device="cpu")
                            for key in ("param", "exp_avg", "exp_avg_sq")
                        }

                        # Build contiguous DP rank shards (for param + optim states).
                        for model_param, param_range_map in gbuf_range_map["param_map"].items():
                            tensors = self._get_main_param_and_optimizer_states(model_param)

                            # Copy states into contiguous shard.
                            gbuf_local_start = param_range_map["gbuf_local"].start
                            gbuf_local_end = param_range_map["gbuf_local"].end
                            for key in local_shards:
                                local_shards[key][gbuf_local_start:gbuf_local_end].data.copy_(
                                    tensors[key].detach().cpu()
                                )

                        # Gather contiguous shards on DP rank 0.
                        for key, send_tensor in local_shards.items():

                            # Gather tensor list.
                            if data_parallel_rank == 0 or return_on_all_ranks:
                                device = "cpu" if use_gloo_comm else torch.cuda.current_device()
                                recv_tensors = [
                                    torch.zeros(
                                        (gbuf_local_numel,), dtype=torch.float32, device=device
                                    )
                                    for _ in range(data_parallel_world_size)
                                ]
                            else:
                                recv_tensors = None

                            # Gather.
                            if not use_gloo_comm:
                                send_tensor = send_tensor.cuda()
                            if return_on_all_ranks:
                                torch.distributed.all_gather(
                                    recv_tensors, send_tensor, data_parallel_group
                                )
                            else:
                                torch.distributed.gather(
                                    send_tensor,
                                    recv_tensors,
                                    data_parallel_global_ranks[0],
                                    data_parallel_group,
                                )

                            send_tensor = None  # allow mem deallocation

                            # Concatenate.
                            if data_parallel_rank == 0 or return_on_all_ranks:
                                if not use_gloo_comm:
                                    recv_tensors = [t.cpu() for t in recv_tensors]
                                recv_tensors_concatenated = torch.cat(recv_tensors)
                                # Copy this bucket's collected all-gather tensors into the right
                                # place in the tensor for the buffer. The tensor for the buffer
                                # gets rid of the padding between buckets.
                                start = offset_in_world_tensors
                                end = offset_in_world_tensors + gbuf_world_numel_unpadded
                                world_tensors[key][start:end].copy_(
                                    recv_tensors_concatenated[:gbuf_world_numel_unpadded]
                                )

                        offset_in_world_tensors += gbuf_world_numel_unpadded

                # Collect world state.
                dtype_state[dtype] = world_tensors
            state[gbuf_idx] = dtype_state

        return state if data_parallel_rank == 0 or return_on_all_ranks else None

    def save_parameter_state(self, filename: str):
        """Save the distributed parameter state on DP rank 0.

        Args:
            filename (str): path to save parameter state to.
        """

        state_dict = self.get_parameter_state_dp_zero()
        if self.data_parallel_group.rank() == 0:
            torch.save(state_dict, filename)

    def _init_optimizer_states_with_dummy_values(self):
        # Initializes optimizer states with dummy values.

        # This is necessary to ensure that the optimizer's states are
        # initialized correctly. These dummy states will be replaced in-place
        # during the loading of distributed checkpoints.
        for group in self.optimizer.param_groups:
            for param in group["params"]:
                if param.numel() == 0 or (
                    hasattr(param, "_local_tensor") and param._local_tensor.numel() == 0
                ):
                    # Avoid FusedAdam errors on empty tensor input.
                    continue
                param.grad = torch.zeros_like(param)
        self.optimizer.step()
        self.optimizer.zero_grad()

    def _param_name(self, param: torch.nn.Parameter) -> str:
        """Get the name of the parameter."""
        if not hasattr(self, "param_to_name"):
            name_to_param = {}
            for model_chunk in self.model_chunks:
                _name_to_param = dict(model_chunk.named_parameters())
                common_keys = name_to_param.keys() & _name_to_param.keys()
                if common_keys:
                    raise ValueError(
                        f"Parameter names conflict between model chunks: {common_keys}. "
                        "Ensure that each model chunk has unique parameter names."
                    )
                name_to_param.update(_name_to_param)
            num_experts = self.model_chunks[0].config.num_moe_experts if self.model_chunks else None
            name_to_param = handle_experts_in_state_dict(name_to_param, num_experts)
            self.param_to_name = {param: name for name, param in name_to_param.items()}
        assert (
            param in self.param_to_name
        ), f"Parameter {param} not found in param_to_name mapping. "
        return self.param_to_name[param]

    def sharded_state_dict(
        self,
        model_sharded_state_dict: ShardedStateDict = {},
        is_loading: bool = False,
        sharding_type: Optional[str] = None,
        metadata: Optional[dict] = None,
    ):
        """
        Chooses between 3 param state sharding implementations as requested by
        `metadata['distrib_optim_sharding_type']`.

        Sharding type can be one of:
        - 'dp_reshardable': Sharded state dict where each noncontiguous buffer is a
            separate ShardedTensor. Results in fully parallel save and load without any
            inter-process communication or intermediate buffers/copies. Since the format relies
            on the internal DistributedOptimizer structure, it allows checkpoint resharding
            only in DP dimension.
        - 'fully_reshardable': During checkpoint save (`is_loading=False`) gathers all
            DistributedOptimizer buffers on DP rank 0 and transforms them into a canonical state
            representation similar to a regular optimizer where each model param corresponds to
            one or more optimizer state tensors of the same shape (possibly different precision).
            During checkpoint load each rank loads a superset of the required state and does
            rank specific flattening and slicing.
        - 'fsdp_dtensor': Sharded state dict where each parameter is a separate
            PyTorch DTensor. This is the default and recommended implementation for the distributed
            optimizer when using the megatron fsdp training.

        Deprecated sharding formats:
        - 'dp_zero_gather_scatter': Naive implementation which reuses gather/scatter from the
            legacy ckpt format. During saving, gathers the parameters state on DP rank 0 and saves
            a ShardedObject with fixed TPxPP structure. During loading, loads the saved data on DP
            rank 0 (None on other ranks). Relies on the parameters scatter done in load_state_dict.
        - 'fully_sharded_model_space': Sharded state dict where each parameter is a separate
            ShardedTensor, which is a flattened subset of the canonical state representation.
            Results in fully parallel save and load without any inter-process communication or
            intermediate buffers/copies.


        Regular state dict parameters are saved on DP rank 0 and loaded on all ranks.
        """
        if sharding_type is not None:
            log_single_rank(
                logger,
                logging.WARNING,
                'DistributedOptimizer.sharded_state_dict parameter `sharding_type`'
                ' is deprecated and will be removed.'
                ' Use `metadata["distrib_optim_sharding_type"] instead`.',
            )
        else:
            sharding_type = (metadata or {}).get(
                'distrib_optim_sharding_type', 'fully_sharded_model_space'
            )

        # Handle FSDP DistributedOptimizer States
        if self.ddp_config.use_megatron_fsdp and sharding_type != "fsdp_dtensor":
            raise NotImplementedError(
                f"sharding_type {sharding_type} is not supported with Megatron FSDP."
            )
        if sharding_type == "fsdp_dtensor":
            state_dict = self.sharded_param_state_fsdp_dtensor(is_loading)
            return state_dict

        if not is_loading and sharding_type == 'fully_sharded_bucket_space':
            log_single_rank(
                logger,
                logging.WARNING,
                '`fully_sharded_bucket_space` sharding for DistributedOptimizer'
                ' checkpoint is deprecated and will be removed in the future.'
                ' Please switch to `full_sharded_model_space`.',
            )

        state_dict = self.state_dict()
        if sharding_type not in self.checkpoint_fully_reshardable_formats:
            # State dict differs between different model parallel groups
            state_dict = {
                k: ShardedObject(
                    f'optimizer.distributed.dp_group_idx_{self.data_parallel_group_idx}.{k}',
                    v,
                    (1,),
                    (0,),
                    replica_id=(
                        self.distributed_optimizer_instance_id,
                        0,
                        self.data_parallel_group.rank(),
                    ),
                )
                for k, v in state_dict.items()
            }

        if is_loading:
            # Call the distributed optimizer's specialized load_state_dict(),
            # which conditionally skips re-allocating the optimizer's state if
            # already initialized, which in turn reduces memory fragmentation.
            self.load_state_dict(self.state_dict())
        if sharding_type == 'dp_reshardable':
            param_state = self.sharded_param_state_dp_reshardable(
                model_sharded_state_dict, is_loading, metadata
            )
        elif sharding_type == 'dp_zero_gather_scatter':
            # NOTE: this format will be deprecated
            param_state = self.sharded_param_state_dp_zero(
                model_sharded_state_dict, is_loading, metadata
            )
            gc.collect()  # Prevent memory leaks with GC disabled
        elif sharding_type == 'fully_reshardable':
            param_state = self.sharded_param_state_fully_reshardable(
                model_sharded_state_dict, is_loading, metadata
            )
            gc.collect()  # Prevent memory leaks with GC disabled
        elif sharding_type == 'fully_sharded_model_space':
            # NOTE: this format will be deprecated
            param_state = self.sharded_param_state_fs_model_space(
                model_sharded_state_dict, is_loading, metadata
            )
        else:
            raise NotImplementedError(f'Unknown sharding_type: {sharding_type}')

        state_dict['param_state'] = param_state
        state_dict['param_state_sharding_type'] = sharding_type
        return state_dict

    def _param_groups_to_param2group_meta(
        self, param_groups: list[dict[str, Any]]
    ) -> dict[str, Any]:
        """Convert a parameter group to a mapping of parameter names to group metadata."""
        param_to_group_meta = {}
        for group in param_groups:
            group_meta = group.copy()
            del group_meta["params"]
            for p in group["params"]:
                param_to_group_meta[self._param_name(p)] = group_meta
        return param_to_group_meta

    def _param2group_meta_to_param_groups(
        self,
        param_to_group_meta: dict[str, Any],
        param_groups: list[dict[str, Any]],
        strict: bool = True,
    ) -> list[dict[str, Any]]:
        """Convert a mapping of parameter names to group metadata to a list of parameter groups."""
        new_param_groups = []
        for group in param_groups:
            new_group = {"params": []}
            for param in group["params"]:
                param_name = self._param_name(param)
                if param_name not in param_to_group_meta:
                    if strict:
                        raise ValueError(
                            f"Parameter {param_name} not found in param_to_group_meta mapping."
                        )
                    continue
                group_meta = param_to_group_meta[param_name]
                new_group_wo_params = new_group.copy()
                del new_group_wo_params["params"]
                if new_group_wo_params and new_group_wo_params != group_meta:
                    error_info = (
                        f"Parameter {param_name} and the parameters in the same group "
                        f"{new_group['params']} have different metadata. Please check "
                        "that whether the checkpoint and current param_groups match. "
                        f"Parameter {param_name} has metadata {group_meta}, "
                        f"while others group metadata is {new_group}."
                    )
                    if strict:
                        raise ValueError(error_info)
                    else:
                        logger.warning(error_info)
                        continue
                new_group["params"].append(param_name)
                new_group.update(group_meta)
            new_param_groups.append(new_group)
        return new_param_groups

    def sharded_param_state_fsdp_dtensor(self, is_loading: bool = False):
        """
        Sharded state dict where each parameter is a separate PyTorch DTensor.
        """
        assert (
            self.ddp_config.use_megatron_fsdp
        ), "fsdp_dtensor sharding type is only supported with Megatron FSDP."

        # Initialize optimizer states with dummy values if loading.
        if is_loading:
            self._init_optimizer_states_with_dummy_values()

        # Get the optimizer's parameter groups in distributed key value format.
        param_to_group_meta = self._param_groups_to_param2group_meta(self.optimizer.param_groups)

        # Remap state to use order indices as keys
        packed_state = {
            (self._param_name(k) if isinstance(k, torch.Tensor) else k): v
            for k, v in self.state.items()
        }

        state_dict = {"state": packed_state, "param_to_group_meta": param_to_group_meta}
        return state_dict

    def sharded_param_state_dp_zero(
        self,
        model_sharded_state_dict: ShardedStateDict,
        is_loading: bool = False,
        metadata: Optional[dict] = None,
    ):
        """Naive implementation which reuses gather/scatter from the legacy ckpt format.

        During saving, gathers the parameters state on DP rank 0 and saves a ShardedObject
        with fixed TPxPP structure. During loading, loads the saved data on DP rank 0
        (None on other ranks). Relies on the parameters scatter done in load_state_dict.
        """
        if is_loading:
            param_state_data = None
        else:
            if self.distributed_optimizer_instance_id == 0:
                # Gather on rank 0
                param_state_data = self.get_parameter_state_dp_zero(use_gloo_comm=False)

        if self.data_parallel_group.rank() == 0 and self.distributed_optimizer_instance_id == 0:
            # Fixed TPxPP. Save on DP rank 0 only
            param_state = ShardedObject(
                f'optimizer.distributed.dp_group_idx_{self.data_parallel_group_idx}.param_state',
                param_state_data,  # pylint: disable=E0606
                (1,),
                (0,),
            )
        else:
            # DP ranks > 0 don't save. During loading, the param_state needs to be None.
            param_state = LocalNonpersistentObject(None)

        return param_state

    def sharded_param_state_fully_reshardable(
        self,
        model_sharded_state_dict: ShardedStateDict,
        is_loading: bool = False,
        metadata: Optional[dict] = None,
    ):
        """Exchange based format in model space representation.

        `fully_reshardable` format involves gathering the tensors on DP rank 0 during save.
        Flat DistOpt buffers are unflattened and reshaped into model param like sizes.
        This results in a state dict similar to a regular optimizer one, where each
        param of shape (X, Y, Z) has corresponding 'param', 'exp_avg' and 'exp_avg_sq'
        tensors of shape (X, Y, Z) in the optimizer state dict.

        During loading there is no data exchange - each rank requests to load the whole
        state dict (and flattens and trims the tensors afterwards). It is recommended
        to use fully parallel loading which will parallelize the load and avoid duplicated
        read from storage.

        Args:
            model_sharded_state_dict (ShardedStateDict): model sharded state dict
            is_loading (bool, optional): Whether the optimizer sharded state dict
                is used for loading or saving. Defaults to False.
            metadata (dict, optional): metadata passed to sharded_state_dict method.
                Allows some detailed control over the sharded state dict creation with
                `distrib_optim_fully_reshardable_mem_efficient` flag which enables memory
                efficient exchange. By default (False), data will be all_gathered with NCCL
                to all ranks which allows to further parallelize the save and load, but can
                use more memory. In memory efficient version (True) data is gather with Gloo
                and returned only on DP rank 0 (which prevent save/load parallelization along DP).
                The checkpoint storage structure can differ between those two flags, but from
                MCore perspective they are interchangeable.

        Returns:
            ShardedStateDict or None: optimizer sharded state dict if memory efficient mode is off
                (see flag `distrib_optim_fully_reshardable_mem_efficient` explanation above)
                or during checkpoint loading (`is_loading`). Otherwise, the sharded state dict
                is returned only on DP rank 0 (None on other ranks).
        """
        if metadata.get('distrib_optim_fully_reshardable_mem_efficient', False):
            use_gloo_comm = True
            return_on_all_ranks = False
        else:
            use_gloo_comm = False
            return_on_all_ranks = True
        dp_zero_state_dict = self.get_parameter_state_dp_zero(
            use_gloo_comm=use_gloo_comm,
            empty_data=is_loading,
            return_on_all_ranks=return_on_all_ranks or is_loading,
        )

        param_to_sharded_metadata = {}
        model_sharded_state_dict, _ = extract_sharded_tensors_and_factories(
            model_sharded_state_dict
        )
        for sh_base in nested_values(model_sharded_state_dict):
            param_to_sharded_metadata[sh_base.data] = sh_base

        prefix = 'optimizer.state'
        model_space_state = {}
        param_idx = 0

        if dp_zero_state_dict is None:
            return None

        for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges):
            buffer = self.buffers[gbuf_idx]
            assert len(gbuf_range_maps) == 1, "single dtype supported, for now."
            for dtype, gbuf_range_map_for_all_buckets in gbuf_range_maps.items():
                world_tensors = dp_zero_state_dict[gbuf_idx][dtype]
                world_tensor_keys = world_tensors.keys()
                for model_param, (
                    param_world_start,
                    param_world_end,
                    _,
                ) in buffer.param_index_map.items():
                    try:
                        sharded_metadata = param_to_sharded_metadata[model_param]
                    except KeyError as e:
                        raise ValueError(
                            f"Model param {model_param} not in model_sharded_state_dict."
                            f" Hint: {KEEP_VARS_HINT}"
                        ) from e
                    assert (
                        sharded_metadata.flattened_range is None
                    ), f"Flattened model tensor not supported ({sharded_metadata})"

                    # Note: replica_id is exactly the same as in the model param
                    replica_id = sharded_metadata.replica_id

                    tensors = {}
                    for state_key in world_tensor_keys:
                        if state_key == 'step' or state_key == 'numel_unpadded':
                            # The optimizer state of STEP is handled
                            # specifically and is read from param_groups.
                            # Numel unpadded is not needed.
                            continue
                        state_ten = world_tensors[state_key][param_world_start:param_world_end]
                        missing_elems_num = (param_world_end - param_world_start) - len(state_ten)

                        if missing_elems_num > 0:
                            # `state_ten` is shorter than the slice which means the world_tensor
                            # is shorter than `param_world_end` - this is a bug in the param ranges
                            # logic. Here we can only pad this with zeros as a workaround.
                            # TODO: this assert shouldn't hold and indicates a bug, see issue #504
                            assert param_world_end > buffer.numel_unpadded

                            logger.warning(
                                f"'{sharded_metadata.key}' param range exceeds"
                                f" unpadded buffer by {missing_elems_num} elements."
                                f" It will be padded with zeros which can lead to"
                                f" data corruption."
                            )
                            state_ten = torch.nn.functional.pad(state_ten, (0, missing_elems_num))

                        assert len(state_ten) == param_world_end - param_world_start, (
                            len(state_ten),
                            param_world_end - param_world_start,
                        )
                        state_ten = state_ten.reshape(sharded_metadata.data.shape)
                        replace_kwargs = dict(
                            key=f'{prefix}.{state_key}.{sharded_metadata.key}',
                            data=state_ten,
                            dtype=state_ten.dtype,
                            replica_id=replica_id,
                        )
                        if isinstance(sharded_metadata, ShardedTensorFactory):
                            replace_kwargs.pop('dtype')
                        tensors[state_key] = replace(sharded_metadata, **replace_kwargs)
                        tensors[state_key].validate_metadata_integrity()
                    model_space_state[param_idx] = tensors
                    param_idx += 1

        return model_space_state

    def sharded_param_state_dp_reshardable(
        self,
        model_sharded_state_dict: ShardedStateDict,
        is_loading: bool = False,
        metadata: Optional[dict] = None,
    ):
        """Sharded state dict where each noncontiguous buffer is a separate ShardedTensor.

        Results in fully parallel save and load without any inter-process
        communication or intermediate buffers/copies.

        Stores optimizer state in the format that corresponds to the internal Distributed
        Optimizer format, i.e. in buckets. Each buckets consists of state parameters and
        potentially some padding:
        - intra-param padding
        - param 1
        - intra-param padding
        - param 2
        - intra-param padding
        - param ...
        - intra-param padding
        - param N
        - intra-param padding
        - bucket padding to some DP multiple

        Different buckets are assigned a different ShardedTensor key. Within each bucket,
        each param and each padding above is represented with a different ShardedTensor object
        sharing the same key (so, corresponding to the same tensor in the checkpoint).

        For checkpointing, we include the intra-param padding for correctness
        but we must discard the last padding to DP multiple, because that might
        change during DP resharding - we want the checkpoint tensor to always have size
        `gbuf_world_numel_unpadded` which means everything except for the last padding above.
        """
        data_parallel_rank = self.data_parallel_group.rank()
        data_parallel_world_size = self.data_parallel_group.size()

        state = self.get_parameter_state_dp_reshardable()
        # per_bucket_numel metadata is saved separately for each TPxPP domain.
        for per_bucket_key in ('per_bucket_numel', 'per_bucket_numel_unpadded'):
            key = (
                f'optimizer.distributed.dp_group_idx_{self.data_parallel_group_idx}'
                f'.{per_bucket_key}'
            )
            state[per_bucket_key] = ShardedObject(
                key,
                state[per_bucket_key],
                (1,),
                (0,),
                replica_id=(self.distributed_optimizer_instance_id, 0, data_parallel_rank),
            )

        for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges):
            for dtype, gbuf_range_map_for_all_buckets in state[gbuf_idx].items():
                for bucket_idx, bucket_state in enumerate(gbuf_range_map_for_all_buckets):
                    # Compute local DP contiguous shard's size.
                    gbuf_world_numel_unpadded = (
                        self.buffers[gbuf_idx].buckets[bucket_idx].numel_unpadded
                    )
                    gbuf_world_numel = self.buffers[gbuf_idx].buckets[bucket_idx].grad_data.numel()
                    assert gbuf_world_numel_unpadded <= gbuf_world_numel
                    assert gbuf_world_numel % data_parallel_world_size == 0
                    gbuf_local_numel = gbuf_world_numel // data_parallel_world_size

                    sharded_bucket_key = (
                        f'optimizer.distributed.dp_group_idx_{self.data_parallel_group_idx}'
                        f'.gbuf_idx_{gbuf_idx}.dtype_{dtype}.bucket_idx_{bucket_idx}'
                    )

                    # The global ckpt tensors must be fully covered.
                    # We add extra empty padding if necessary
                    assert bucket_state, 'empty bucket encountered'

                    # Insert padding between parameter tensors to ensure full coverage as needed.
                    all_pad_tensors = {}
                    for i in range(-1, len(bucket_state)):
                        if i == len(bucket_state) - 1:
                            # Potential padding at the end
                            next_param_start = gbuf_local_numel
                        else:
                            next_param_start = bucket_state[i + 1]['gbuf_local_start']
                        if i == -1:
                            # Potential padding at the front
                            cur_param_end = 0
                        else:
                            cur_param_end = bucket_state[i]['gbuf_local_end']
                        world_param_end = data_parallel_rank * gbuf_local_numel + cur_param_end
                        # Insert padding if there is a gap between next param,
                        # but not exceeding unpadded gbuf size
                        if (
                            next_param_start != cur_param_end
                            and world_param_end < gbuf_world_numel_unpadded
                        ):
                            pad_tensors = {
                                k: torch.empty(
                                    next_param_start - cur_param_end, dtype=v.dtype, device=v.device
                                )
                                for k, v in bucket_state[i].items()
                                if isinstance(v, torch.Tensor)
                            }
                            all_pad_tensors[i + 1] = {
                                **pad_tensors,
                                'gbuf_local_start': cur_param_end,
                                'gbuf_local_end': next_param_start,
                                'padding': True,
                            }

                    # Insert from end so that insertion positions are still correct.
                    indices_to_insert = sorted(list(all_pad_tensors.keys()))
                    for index_to_insert in reversed(indices_to_insert):
                        bucket_state.insert(index_to_insert, all_pad_tensors[index_to_insert])

                    # Each tensor is mapped to a slice
                    # of a DP-local shard of size `gbuf_local_numel`.
                    for bucket_params_idx in range(len(bucket_state)):
                        tensors = bucket_state[bucket_params_idx]
                        gbuf_local_start = tensors.pop('gbuf_local_start')
                        gbuf_local_end = tensors.pop('gbuf_local_end')
                        if 'padding' not in tensors:
                            tensors['padding'] = False

                        for key in tensors:
                            if key == 'padding':
                                tensors[key] = LocalNonpersistentObject(tensors[key])
                                continue
                            if key == 'step':
                                # The optimizer state of STEP is a 0-dim tensor and is handled
                                # separately via param_groups, not as part of the gradient buffer.
                                tensors[key] = LocalNonpersistentObject(tensors[key])
                                continue
                            assert tensors[key].shape == (gbuf_local_end - gbuf_local_start,), (
                                tensors[key].shape,
                                gbuf_local_start,
                                gbuf_local_end,
                            )

                            tensors[key] = ShardedTensor(
                                f'{sharded_bucket_key}.{key}',
                                tensors[key],
                                tensors[key].dtype,
                                tensors[key].shape,
                                (gbuf_world_numel_unpadded,),
                                (data_parallel_rank * gbuf_local_numel + gbuf_local_start,),
                                axis_fragmentations=None,
                                flattened_range=None,
                                allow_shape_mismatch=False,
                                replica_id=(self.distributed_optimizer_instance_id, 0, 0),
                            )
        return state

    def sharded_param_state_fs_model_space(
        self,
        model_sharded_state_dict: ShardedStateDict,
        is_loading: bool = False,
        metadata: Optional[dict] = None,
    ):
        """Sharded state dict where each buffer is mapped to corresponding model param.

        In this approach the optimizer state tensors are directly related to model parameters
        by linking them with metadata from `model_sharded_state_dict`.
        This will allow changing TP and PP while using DistOpt (as with other optimizers).
        """

        param_to_sharded_metadata = {}
        model_sharded_state_dict, _ = extract_sharded_tensors_and_factories(
            model_sharded_state_dict
        )
        for sh_base in nested_values(model_sharded_state_dict):
            param_to_sharded_metadata[sh_base.data] = sh_base

        prefix = 'optimizer.state'
        state = {}

        # Not stored in the checkpoint, used only to identify params in
        # `sharded_param_state_fs_model_space`.
        def _get_param_state_sharded_tensors(model_param, item_slice):
            # Main param & optimizer states.
            tensors = self._get_main_param_and_optimizer_states(model_param)
            tensors["fp32_param"] = tensors.pop("param")

            # Match optimizer parameter with model ShardedTensor (or
            # ShardedTensorFactory).
            try:
                sharded_metadata = param_to_sharded_metadata[model_param]
            except KeyError as e:
                raise ValueError(
                    f"Model param {model_param} not in model_sharded_state_dict"
                    f" Hint: {KEEP_VARS_HINT}"
                ) from e

            # Set DP corresponding replica_id coordinate to 0.
            assert (
                len(sharded_metadata.replica_id) == 3
            ), f'Expected replica_id format (PP, TP, DP), got: {sharded_metadata}'
            replica_id = (*sharded_metadata.replica_id[:2], self.distributed_optimizer_instance_id)

            # Instantiate ShardedTensor (or ShardedTensorFactory) for optimizer
            # params.
            for state_key, state_ten in tensors.items():
                if state_key == 'step':
                    # Note that step is a 0-dim tensor, unlike other
                    # states have the same size as the parameter.
                    # The optimizer state of STEP is handled
                    # specifically and is read from param_groups.
                    continue
                replace_kwargs = dict(
                    key=f'{prefix}.{state_key}.{sharded_metadata.key}',
                    data=state_ten,
                    dtype=state_ten.dtype,
                    flattened_range=item_slice,
                    replica_id=replica_id,
                )
                if isinstance(sharded_metadata, ShardedTensorFactory):
                    replace_kwargs.pop('dtype')
                tensors[state_key] = replace(sharded_metadata, **replace_kwargs)
                tensors[state_key].validate_metadata_integrity()
            return tensors

        # Not stored in the checkpoint, used only to identify params in
        # `sharded_param_state_fs_model_space`.
        param_idx = 0
        for gbuf_range_maps in self.gbuf_ranges:
            for gbuf_range_map_for_all_buckets in gbuf_range_maps.values():
                for gbuf_range_map in gbuf_range_map_for_all_buckets:
                    for model_param, param_range_map in gbuf_range_map["param_map"].items():
                        param_range = param_range_map['param']
                        tensors = _get_param_state_sharded_tensors(
                            model_param, slice(param_range.start, param_range.end)
                        )
                        state[param_idx] = tensors
                        param_idx += 1
        return state

    def load_parameter_state_from_dp_reshardable(self, state_dict):
        """Loads the parameter state from an internal representation.

        Inverse of the `get_parameter_state_dp_reshardable` method.
        """
        if state_dict is not None and "per_bucket_numel_unpadded" in state_dict:
            per_bucket_numel_unpadded_in_checkpoint = state_dict["per_bucket_numel_unpadded"]
            assert self.per_bucket_numel_unpadded == per_bucket_numel_unpadded_in_checkpoint, (
                f"Number of unpadded elements in each bucket need to be the same in current run "
                f"({self.per_bucket_numel_unpadded}) and checkpoint "
                f"({per_bucket_numel_unpadded_in_checkpoint})"
            )

        for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges):
            assert len(gbuf_range_maps) == 1, "single dtype supported, for now."
            for dtype, gbuf_range_map_for_all_buckets in gbuf_range_maps.items():
                for bucket_idx, gbuf_range_map in enumerate(gbuf_range_map_for_all_buckets):
                    bucket_state = state_dict[gbuf_idx][dtype][bucket_idx]
                    bucket_state = [
                        bucket_state_elem
                        for bucket_state_elem in bucket_state
                        if not bucket_state_elem['padding']
                    ]

                    assert len(bucket_state) == len(gbuf_range_map["param_map"]), (
                        len(bucket_state),
                        len(gbuf_range_map["param_map"]),
                    )
                    for src_tensors, (model_param, param_range_map) in zip(
                        bucket_state, gbuf_range_map["param_map"].items()
                    ):
                        # Main param & optimizer states.
                        self._set_main_param_and_optimizer_states(model_param, src_tensors)

    @torch.no_grad()
    def load_parameter_state_from_fs_model_space(self, state_dict):
        """Loads the parameter state from a "model space" representation.

        Inverse of the `sharded_param_state_fs_model_space` method.
        """
        param_idx = 0  # matching order with `sharded_param_state_fs_model_space`
        for gbuf_range_maps in self.gbuf_ranges:
            for gbuf_range_map_for_all_buckets in gbuf_range_maps.values():
                for gbuf_range_map in gbuf_range_map_for_all_buckets:
                    for model_param, param_range_map in gbuf_range_map["param_map"].items():
                        src_tensors = {}
                        for k, v in state_dict[param_idx].items():
                            if k == "step":
                                # Handle torch Adam "step" state separately.
                                continue
                            if k == "fp32_param":
                                src_tensors["param"] = v
                            else:
                                src_tensors[k] = v
                        self._set_main_param_and_optimizer_states(model_param, src_tensors)
                        param_idx += 1
        if isinstance(self.optimizer, HybridDeviceOptimizer):
            self.optimizer._sync_hdo_state_to_sub_optimizers()

    @classmethod
    def _update_legacy_world_tensors(cls, old_tensors, new_numels):
        '''Reshard buckets (where each bucket is a tensor) to new target
        numels, where the total numel remains the same.'''

        old_total = sum([t.numel() for t in old_tensors])
        new_total = sum(new_numels)

        assert old_total == new_total

        unified_tensor = torch.cat(old_tensors, dim=0)

        new_tensors = []
        start_idx = 0
        for new_numel in new_numels:
            new_tensors.append(unified_tensor[start_idx : (start_idx + new_numel)])
            start_idx += new_numel

        return new_tensors

    def load_parameter_state_from_dp_zero_legacy(self, state_dict):
        """Load parameter state (i.e., parameter & optimizer tensors) from DP 0 rank,
        using the legacy checkpoint format as described below.

        The difference between this method and `load_parameter_state_from_dp_zero_modern()`
        is that this method is used for updating the format of checkpoints that
        were saved using code from before Feb 13, 2024. Starting on this date, a
        new format was used (i.e., different format for the parameter mapping and
        bucket sharding).

        Use arg `--ckpt-convert-update-legacy-dist-opt-format` to call this
        method, along with `--ckpt-convert-format` and `--ckpt-convert-save` to
        update a legacy-format checkpoint to the modern format.
        """

        # Data parallelism variables.
        assert self.data_parallel_group_gloo is not None
        data_parallel_world_size = self.data_parallel_group_gloo.size()
        data_parallel_rank = self.data_parallel_group_gloo.rank()
        data_parallel_group_gloo = self.data_parallel_group_gloo
        data_parallel_global_ranks = torch.distributed.get_process_group_ranks(
            self.data_parallel_group_gloo
        )

        # Scatter tensors to all DP ranks.
        for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges):
            for dtype, gbuf_range_map_for_all_buckets in gbuf_range_maps.items():
                if data_parallel_rank == 0:
                    buffer_numel_unpadded = self.buffers[gbuf_idx].numel_unpadded
                    model_numels = [b.numel_unpadded for b in self.buffers[gbuf_idx].buckets]
                    checkpoint_numels = [
                        t.numel() for t in state_dict[gbuf_idx][torch.float32]["param"]
                    ]
                    assert sum(model_numels) == sum(checkpoint_numels)
                for key in ("param", "exp_avg", "exp_avg_sq"):
                    legacy_world_tensors = self._update_legacy_world_tensors(
                        state_dict[gbuf_idx][torch.float32][key],
                        [
                            self.buffers[gbuf_idx].buckets[bi].numel_unpadded
                            for bi in range(len(gbuf_range_map_for_all_buckets))
                        ],
                    )
                    offset_in_world_tensors = 0
                    for bucket_idx, gbuf_range_map in enumerate(gbuf_range_map_for_all_buckets):
                        # Compute local DP contiguous shard's size.
                        gbuf_world_numel = (
                            self.buffers[gbuf_idx].buckets[bucket_idx].grad_data.numel()
                        )
                        assert gbuf_world_numel % data_parallel_world_size == 0
                        gbuf_local_numel = gbuf_world_numel // data_parallel_world_size
                        gbuf_world_numel_unpadded = (
                            self.buffers[gbuf_idx].buckets[bucket_idx].numel_unpadded
                        )
                        assert gbuf_world_numel_unpadded <= gbuf_world_numel

                        # Contiguous local shards (received from DP rank 0).
                        recv_tensor = torch.zeros(
                            (gbuf_local_numel,), dtype=torch.float32, device="cpu"
                        )

                        # Scatter tensor list.
                        if data_parallel_rank == 0:

                            start = offset_in_world_tensors
                            end = offset_in_world_tensors + gbuf_world_numel_unpadded

                            world_tensor = legacy_world_tensors[bucket_idx]
                            assert (
                                world_tensor.numel() == gbuf_world_numel_unpadded
                            ), "%d vs. %d." % (world_tensor.numel(), gbuf_world_numel_unpadded)
                            offset_in_world_tensors += gbuf_world_numel_unpadded

                            # Pad world_tensor to gbuf_world_numel. Don't pad at the front,
                            # pad at the back.
                            world_tensor = torch.nn.functional.pad(
                                world_tensor, (0, gbuf_world_numel - gbuf_world_numel_unpadded)
                            )
                            assert world_tensor.numel() == gbuf_world_numel
                            gbuf_start_idxs = list(range(0, gbuf_world_numel, gbuf_local_numel))
                            send_tensors = [
                                world_tensor[i : (i + gbuf_local_numel)] for i in gbuf_start_idxs
                            ]
                        else:
                            send_tensors = None

                        # Scatter.
                        torch.distributed.scatter(
                            recv_tensor,
                            send_tensors,
                            data_parallel_global_ranks[0],
                            data_parallel_group_gloo,
                        )

                        # Copy local contiguous shards to param/optim shards.
                        for model_param, param_range_map in gbuf_range_map["param_map"].items():

                            # Main param & optimizer states.
                            group_index, group_order = self.model_param_group_index_map[model_param]
                            main_param = self.optimizer.param_groups[group_index]["params"][
                                group_order
                            ]
                            if key == "param":
                                tensor_to_copy_into = main_param
                            else:
                                optim_state = self.optimizer.state[main_param]
                                tensor_to_copy_into = optim_state[key]

                            # Copy states into contiguous shard.
                            gbuf_local_start = param_range_map["gbuf_local"].start
                            gbuf_local_end = param_range_map["gbuf_local"].end
                            tensor_to_copy_into.data.copy_(
                                recv_tensor[gbuf_local_start:gbuf_local_end]
                            )

    def load_parameter_state_from_dp_zero(self, state_dict, *, update_legacy_format=False):
        """Load parameter state (i.e., parameter & optimizer tensors) from DP 0 rank,
        using the new checkpoint format with coalesced state across buckets.

        This method performs the reverse of get_parameter_state_dp_zero():
        - Scatter contiguous buffers from DP rank 0 to each DP rank (each DP
          rank receives its relevant subset of the world buffers).
        - For each DP rank, copy param & optimizer shards from contiguous CPU
          buffers. (e.g., one buffer each for main_param, exp_avg, and
          exp_avg_sq).
        """

        # Selectively load from a legacy checkpoint. The legacy format was used
        # prior to Feb 13, 2024.
        if update_legacy_format:
            return self.load_parameter_state_from_dp_zero_legacy(state_dict)

        # Data parallelism variables.
        assert self.data_parallel_group_gloo is not None
        data_parallel_world_size = self.data_parallel_group_gloo.size()
        data_parallel_rank = self.data_parallel_group_gloo.rank()
        data_parallel_group_gloo = self.data_parallel_group_gloo
        data_parallel_global_ranks = torch.distributed.get_process_group_ranks(
            self.data_parallel_group_gloo
        )

        if data_parallel_rank == 0:
            # Do nothing if "--fp8-param-gather" is not used.
            self.split_state_dict_if_needed(state_dict)

        # Scatter tensors to all DP ranks.
        for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges):
            for dtype, gbuf_range_map_for_all_buckets in gbuf_range_maps.items():
                if data_parallel_rank == 0:
                    buffer_numel_unpadded = self.buffers[gbuf_idx].numel_unpadded
                    checkpoint_numel_unpadded = state_dict[gbuf_idx][dtype]["numel_unpadded"]
                    assert buffer_numel_unpadded == checkpoint_numel_unpadded, (
                        f"Number of unpadded elements must be same in current run "
                        f"({buffer_numel_unpadded}) and checkpoint ({checkpoint_numel_unpadded})"
                    )
                recv_tensors = {}
                for key in ("param", "exp_avg", "exp_avg_sq"):
                    offset_in_world_tensors = 0
                    for bucket_idx, gbuf_range_map in enumerate(gbuf_range_map_for_all_buckets):
                        # Compute local DP contiguous shard's size.
                        gbuf_world_numel = (
                            self.buffers[gbuf_idx].buckets[bucket_idx].grad_data.numel()
                        )
                        assert gbuf_world_numel % data_parallel_world_size == 0
                        gbuf_local_numel = gbuf_world_numel // data_parallel_world_size
                        gbuf_world_numel_unpadded = (
                            self.buffers[gbuf_idx].buckets[bucket_idx].numel_unpadded
                        )
                        assert gbuf_world_numel_unpadded <= gbuf_world_numel

                        # Contiguous local shards (received from DP rank 0).
                        recv_tensor = torch.zeros(
                            (gbuf_local_numel,), dtype=torch.float32, device="cpu"
                        )

                        # Scatter tensor list.
                        if data_parallel_rank == 0:
                            world_tensors = state_dict[gbuf_idx][dtype][key]

                            start = offset_in_world_tensors
                            end = offset_in_world_tensors + gbuf_world_numel_unpadded
                            assert 0 <= start < end <= world_tensors.numel()
                            world_tensor = world_tensors[start:end]
                            offset_in_world_tensors += gbuf_world_numel_unpadded

                            # Pad world_tensor to gbuf_world_numel. Don't pad at the front,
                            # pad at the back.
                            world_tensor = torch.nn.functional.pad(
                                world_tensor, (0, gbuf_world_numel - gbuf_world_numel_unpadded)
                            )
                            assert world_tensor.numel() == gbuf_world_numel
                            gbuf_start_idxs = list(range(0, gbuf_world_numel, gbuf_local_numel))
                            send_tensors = [
                                world_tensor[i : (i + gbuf_local_numel)] for i in gbuf_start_idxs
                            ]
                        else:
                            send_tensors = None

                        # Scatter.
                        torch.distributed.scatter(
                            recv_tensor,
                            send_tensors,
                            data_parallel_global_ranks[0],
                            data_parallel_group_gloo,
                        )

                        for model_param, param_range_map in gbuf_range_map["param_map"].items():
                            # Copy states into contiguous shard.
                            gbuf_local_start = param_range_map["gbuf_local"].start
                            gbuf_local_end = param_range_map["gbuf_local"].end
                            if model_param not in recv_tensors:
                                recv_tensors[model_param] = {}
                            recv_tensors[model_param][key] = recv_tensor[
                                gbuf_local_start:gbuf_local_end
                            ]

                for model_param, tensors in recv_tensors.items():
                    self._set_main_param_and_optimizer_states(model_param, tensors)

    @torch.no_grad()
    def load_parameter_state_from_fully_reshardable(self, state_dict: dict):
        """Load counterpart of sharded_param_state_fully_reshardable.

        Iterates over the `state_dict` tensors (in the same order as
        `sharded_param_state_fully_reshardable` which determines the state dict tensors
        order), flattens and trims them according to local param ranges.

        Args:
            state_dict (dict): loaded optimizer state dict
        """
        param_idx = -1
        for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges):
            assert len(gbuf_range_maps) == 1, "single dtype supported, for now."
            for gbuf_range_map_for_all_buckets in gbuf_range_maps.values():
                all_buckets_param_range_map = ChainMap(
                    *(
                        gbuf_range_map["param_map"]
                        for gbuf_range_map in gbuf_range_map_for_all_buckets
                    )
                )
                # We only need locally tracked params, but we must match iteration order
                # (`param_idx`) with `sharded_param_state_fully_reshardable` order
                for model_param, (param_world_start, param_world_end, _) in self.buffers[
                    gbuf_idx
                ].param_index_map.items():
                    param_idx += 1  # increment even if skip param update
                    if model_param not in all_buckets_param_range_map:
                        continue
                    param_range_map = all_buckets_param_range_map[model_param]
                    src_tensors = {}
                    for k, v in state_dict[param_idx].items():
                        if k == "step":
                            # Handle torch Adam "step" state separately.
                            continue
                        v_flat = v.flatten()
                        v_flat = v_flat[
                            param_range_map["param"].start : param_range_map["param"].end
                        ]
                        assert (
                            len(v_flat)
                            == param_range_map["param"].end - param_range_map["param"].start
                        ), (len(v_flat), param_range_map["param"])
                        if k == "fp32_param":
                            src_tensors["param"] = v_flat
                        else:
                            src_tensors[k] = v_flat
                    self._set_main_param_and_optimizer_states(model_param, src_tensors)
        if isinstance(self.optimizer, HybridDeviceOptimizer):
            self.optimizer._sync_hdo_state_to_sub_optimizers()

    def split_state_dict_if_needed(self, state_dict):
        """
        When "--fp8-param-gather" is disabled, weights and biases are stored in the same
        `_ParamAndGradBuffer`. So, when saving a checkpoint, the optimizer's main parameters are
        saved in a single continuous tensor (this also applies to "exp_avg" and "exp_avg_sq").

        However, when "--fp8-param-gather" is enabled, weights(in fp8 dtype) and biases(in bf16/fp16
        dtype) are stored in separate `_ParamAndGradBuffer`. Therefore, when we enabled
        "--fp8-param-gather", and want to load a checkpoint saved without "--fp8-param-gather", we
        need to split the weights(fp8) and biases(bf16/fp16) in the static_dict into two separate
        tensors.
        """
        # Skip if there is no fp8 buffers.
        fp8_gbuf_indices = []
        for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges):
            for dtype, _ in gbuf_range_maps.items():
                if is_float8tensor(self.buffers[gbuf_idx].params[0]):
                    fp8_gbuf_indices.append(gbuf_idx)
        if len(fp8_gbuf_indices) == 0:
            return

        dtype_to_gbuf_idx = {}
        for key in state_dict.keys():
            if key != 'buckets_coalesced':
                for dtype in state_dict[key].keys():
                    assert dtype not in dtype_to_gbuf_idx
                    if dtype[0] == torch.uint8:
                        # If the `state_dict`` already contains a torch.uint8 buffer, we assumed
                        # that the fp8 weights and fp16/bf16 biases in the checkpoint are already
                        # separated. In this case, no action is required, so we can return directly.
                        return
                    dtype_to_gbuf_idx[dtype] = key

        # 1. Replace the gbuf_idx in the checkpoint with the new gbuf_idx.
        # 2. Copy the non-tensor data (i.e., the "buckets_coalesced") to `new_state_dict`.
        new_state_dict = {'buckets_coalesced': state_dict['buckets_coalesced']}
        for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges):
            for dtype, _ in gbuf_range_maps.items():
                if not is_float8tensor(self.buffers[gbuf_idx].params[0]):
                    new_state_dict[gbuf_idx] = state_dict[dtype_to_gbuf_idx[dtype]]

        for fp8_gbuf_idx in fp8_gbuf_indices:
            # Note that `self.buffers[fp8_gbuf_idx].params[0].dtype` is the dummy dtype of
            # `Float8Tensor`, not torch.uint8.
            non_fp8_param_and_grad_dtype = (
                self.buffers[fp8_gbuf_idx].params[0].dtype,
                self.buffers[fp8_gbuf_idx].grad_dtype,
            )

            # Iterate through all buffers to find the one that needs to be split.
            non_fp8_gbuf_idx = None
            for gbuf_idx, gbuf_range_maps in enumerate(self.gbuf_ranges):
                for dtype, _ in gbuf_range_maps.items():
                    if dtype == non_fp8_param_and_grad_dtype:
                        non_fp8_gbuf_idx = gbuf_idx
            assert non_fp8_gbuf_idx is not None

            # We need the fp8_flags to determine the order of weight (fp8) and bias (fp16/bf16) in
            # the buffer.
            index_to_fp8_map = {}
            for index in self.buffers[fp8_gbuf_idx].param_indices:
                assert index not in index_to_fp8_map
                index_to_fp8_map[index] = True
            for index in self.buffers[non_fp8_gbuf_idx].param_indices:
                assert index not in index_to_fp8_map
                index_to_fp8_map[index] = False
            param_indices = (
                self.buffers[fp8_gbuf_idx].param_indices
                + self.buffers[non_fp8_gbuf_idx].param_indices
            )
            assert min(param_indices) == 0
            assert max(param_indices) == len(param_indices) - 1
            fp8_flags = []
            for i in range(len(param_indices)):
                fp8_flags.append(index_to_fp8_map[i])

            fp8_buffer = self.buffers[fp8_gbuf_idx]
            non_fp8_buffer = self.buffers[non_fp8_gbuf_idx]

            fp8_idx = len(fp8_buffer.params) - 1
            non_fp8_idx = len(non_fp8_buffer.params) - 1
            offsets, fp8_offsets, non_fp8_offsets = [0], [0], [0]

            # Because the parameters in `_ParamAndGradBuffer` are traversed in reverse order, the
            # flag here also needs to be traversed in reverse order.
            for fp8_flag in fp8_flags[::-1]:
                if fp8_flag:
                    numel = fp8_buffer.params[fp8_idx].nelement()
                    fp8_idx -= 1
                    offsets.append(offsets[-1] + numel)
                    fp8_offsets.append(fp8_offsets[-1] + numel)
                else:
                    numel = non_fp8_buffer.params[non_fp8_idx].nelement()
                    non_fp8_idx -= 1
                    offsets.append(offsets[-1] + numel)
                    non_fp8_offsets.append(non_fp8_offsets[-1] + numel)

            # Split the target buffer into two separate buffers.
            fp8_state_dict, non_fp8_state_dict = {}, {}
            for key in ['param', 'exp_avg', 'exp_avg_sq']:
                tensor = state_dict[non_fp8_gbuf_idx][non_fp8_param_and_grad_dtype][key]
                fp8_tensor = torch.empty([fp8_offsets[-1]], dtype=tensor.dtype)
                non_fp8_tensor = torch.empty([non_fp8_offsets[-1]], dtype=tensor.dtype)

                fp8_idx, non_fp8_idx = 0, 0
                for i in range(len(offsets) - 1):
                    if fp8_flags[-(i + 1)]:
                        fp8_tensor[fp8_offsets[fp8_idx] : fp8_offsets[fp8_idx + 1]].copy_(
                            tensor[offsets[i] : offsets[i + 1]]
                        )
                        fp8_idx += 1
                    else:
                        non_fp8_tensor[
                            non_fp8_offsets[non_fp8_idx] : non_fp8_offsets[non_fp8_idx + 1]
                        ].copy_(tensor[offsets[i] : offsets[i + 1]])
                        non_fp8_idx += 1

                fp8_state_dict[key] = fp8_tensor
                non_fp8_state_dict[key] = non_fp8_tensor

            fp8_state_dict['numel_unpadded'] = fp8_offsets[-1]
            non_fp8_state_dict['numel_unpadded'] = non_fp8_offsets[-1]

            # Add the two separate buffers into `new_state_dict`.
            new_state_dict[fp8_gbuf_idx] = {}
            new_state_dict[fp8_gbuf_idx][(torch.uint8, fp8_buffer.grad_dtype)] = fp8_state_dict
            new_state_dict[non_fp8_gbuf_idx][non_fp8_param_and_grad_dtype] = non_fp8_state_dict

        # Inplace update state_dict
        state_dict.clear()
        for key, value in new_state_dict.items():
            state_dict[key] = value

    def load_parameter_state(self, filename: str, *, update_legacy_format=False):
        """Load the distributed parameter state from disk.

        Args:
            filename (str): path to load parameter state from.
        """
        if self.is_stub_optimizer:
            return
        state_dict = None
        if self.data_parallel_group.rank() == 0:
            state_dict = torch.load(filename)

        self.load_parameter_state_from_dp_zero(
            state_dict, update_legacy_format=update_legacy_format
        )

    def zero_grad(self, set_to_none: bool = True):
        """
        Zeroes grads for the model related parameters, i.e., model_float16_groups
        and model_fp32_groups. We additionally zero the remaining groups as a
        memory optimization to reduce fragmentation; in the case of
        set_to_none==True, the space used by this field can be safely deallocated.

        Args:
            set_to_none (bool): if true, set grads to None.
        """
        if self.ddp_config.use_megatron_fsdp:
            for model_chunk in self.model_chunks:
                model_chunk.zero_grad_buffer()
            return

        if self.is_stub_optimizer:
            return
        total_groups = [
            self.model_float16_groups,
            self.model_fp32_groups,
            self.shard_float16_groups,  # grad empty/unused here?
            self.shard_fp32_groups,  # throws grad-access warning
        ]
        if not self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
            total_groups.append(self.shard_fp32_from_float16_groups)
        for groups in total_groups:
            for group in groups:
                _zero_grad_group_helper(
                    group, set_to_none, self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8
                )

    def _collect_main_grad_data_for_unscaling(self):
        """
        Note: this should be equivalent to the float-16 optimizer's method,
        but written differently, so the two should be combined.
        """
        if self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
            return [
                param.decoupled_grad.data
                for group in self.optimizer.param_groups
                for param in group["params"]
            ]
        else:
            return [
                param.grad.data
                for group in self.optimizer.param_groups
                for param in group["params"]
            ]

    def _get_model_and_main_params_data_float16(self):
        """
        Get aligned list of model and main params.
        """
        model_data = []
        main_data = []
        for model_group, main_group in zip(
            self.shard_float16_groups, self.shard_fp32_from_float16_groups
        ):
            for model_param, main_param in zip(model_group, main_group):
                model_data.append(model_param.data)
                if self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
                    main_data.append(None)
                else:
                    main_data.append(main_param.data)
        return model_data, main_data

    def _get_fp8_params_and_shard_fp32_from_fp8(self):
        """
        Get lists of FP8 model params, corresponding shard main params, and the starting index of
        the shard main param in the FP8 param. Parameters in all three lists are in the same order.
        """
        fp8_params = []
        shard_fp32_from_fp8 = []
        shard_offsets_in_fp8 = []

        if self.ddp_config.use_megatron_fsdp:
            buffers = []
            for m in self.model_chunks:
                for group in m.param_and_grad_buffer.parameter_groups:
                    mbuf = group.model_weight_buffer
                    buffers.append(mbuf)
        else:
            buffers = self.buffers

        # Iterate over all parameters inside this optimizer to find FP8 parameters.
        fp8_param_to_idx_map = {}
        idx = 0
        for buffer in buffers:
            for param in buffer.params:
                if is_float8tensor(param):
                    fp8_params.append(param)
                    shard_fp32_from_fp8.append(None)
                    shard_offsets_in_fp8.append(None)
                    fp8_param_to_idx_map[param] = idx
                    idx += 1

        def get_shard_fp32_from_fp8(shard_main_groups, model_groups):
            """
            Traverse the param groups and collect the fp8 params, their corresponding main params
            and the starting offsets of the main params in the model params. Store them into three
            different lists.
            """
            for shard_main_group, model_group in zip(shard_main_groups, model_groups):
                for shard_main_param, model_param in zip(shard_main_group, model_group):
                    if is_float8tensor(model_param):
                        param_range_map = self._get_model_param_range_map(model_param)
                        param_range = param_range_map["param"]
                        assert param_range.size == shard_main_param.nelement()
                        idx = fp8_param_to_idx_map[model_param]
                        shard_fp32_from_fp8[idx] = shard_main_param
                        shard_offsets_in_fp8[idx] = param_range.start

        get_shard_fp32_from_fp8(self.shard_fp32_from_float16_groups, self.model_float16_groups)
        get_shard_fp32_from_fp8(self.shard_fp32_groups, self.model_fp32_groups)

        return fp8_params, shard_fp32_from_fp8, shard_offsets_in_fp8

    def _copy_model_grads_to_main_grads(self):
        """
        Copy model grads to main grads.

        Since this step follows a reduce-scatter through the DDP's grad
        buffer, this method is responsible for copying the updated grads
        from the grad buffer to the main shard's grad field.
        """
        if self.is_stub_optimizer:
            return

        if self.ddp_config.use_megatron_fsdp:
            return

        # Utility method for copying group grads.
        def copy_group_grads(model_groups, shard_main_groups):
            for model_group, shard_main_group in zip(model_groups, shard_main_groups):
                for model_param, shard_main_param in zip(model_group, shard_main_group):

                    param_range_map = self._get_model_param_range_map(model_param)
                    param_range = param_range_map["param"]
                    assert param_range.size == shard_main_param.nelement()

                    model_grad = model_param.main_grad
                    shard_model_grad = model_grad.view(-1)[param_range.start : param_range.end]
                    if self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
                        # Pytorch requires a param and its' grad to be the same dtype, but we want
                        # their types to be different in precision-aware optimizer. So we use
                        # ".decoupled_grad" to replace ".grad".
                        # Note that this requires corresponding modifications in the optimizer (Let
                        # the optimizer read gradients from ".decoupled_grad" instead of ".grad").
                        shard_main_param.decoupled_grad = shard_model_grad
                    else:
                        shard_main_param.grad = shard_model_grad.float()

        # Copy model groups to shard groups.
        if self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
            copy_group_grads(self.model_float16_groups, self.shard_float16_groups)
            copy_group_grads(self.model_fp32_groups, self.shard_fp32_groups)
        else:
            copy_group_grads(self.model_float16_groups, self.shard_fp32_from_float16_groups)
            copy_group_grads(self.model_fp32_groups, self.shard_fp32_groups)

    def _copy_main_params_to_model_params(self):
        """
        Copy main params to model params.

        Since this step is followed by an all-gather through the DDP's grad
        buffer, this method is responsible for copying the updated params
        from the main shards into the correct position in the grad buffer.
        """
        if self.is_stub_optimizer:
            return

        if self.ddp_config.use_megatron_fsdp:
            for model_chunk in self.model_chunks:
                model_chunk.param_and_grad_buffer.copy_main_weights_to_model_weights()
            return

        # When using precision-aware optimizer, main params are held by self.optimizer. It will also
        # do the work of copying data from main params to model params.
        if self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
            return

        quantize_param_shard(
            *self._get_fp8_params_and_shard_fp32_from_fp8(), self.data_parallel_group
        )

        # Utility method for copying group params.
        def copy_group_params(shard_main_groups, model_groups):
            for shard_main_group, model_group in zip(shard_main_groups, model_groups):
                for shard_main_param, model_param in zip(shard_main_group, model_group):

                    param_range_map = self._get_model_param_range_map(model_param)
                    world_range = param_range_map["gbuf_world_in_bucket"]

                    assert world_range.size == shard_main_param.nelement()

                    gbuf_index, _, bucket_id = self.model_param_gbuf_map[model_param]
                    model_param_buffer = self.buffers[gbuf_index].buckets[bucket_id].param_data

                    shard_model_param = model_param_buffer.view(-1)[
                        world_range.start : world_range.end
                    ]

                    if is_float8tensor(model_param):
                        # FP8 params are quantized in the above "quantize_param_shard" function.
                        continue
                    else:
                        shard_model_param.data.copy_(shard_main_param)

        # Copy shard groups to model groups.
        copy_group_params(self.shard_fp32_from_float16_groups, self.model_float16_groups)
        copy_group_params(self.shard_fp32_groups, self.model_fp32_groups)

    def _copy_main_params_to_param_buffer(self):
        """
        This function is only used for MXFP8 params.
        Copy FP32 main params directly to param buffer for param all-gather since
        param buffer is not mapped to model params for MXFP8 case.

        """
        for shard_main_group, model_group in zip(
            self.shard_fp32_from_float16_groups, self.model_float16_groups
        ):
            for shard_main_param, model_param in zip(shard_main_group, model_group):
                # Get position in param buffer
                param_range_map = self._get_model_param_range_map(model_param)
                world_range = param_range_map["gbuf_world_in_bucket"]

                # Get param buffer
                gbuf_index, _, bucket_id = self.model_param_gbuf_map[model_param]
                param_buffer = self.buffers[gbuf_index].buckets[bucket_id].param_data

                # Get the correct slice of param buffer
                shard_param_buffer = param_buffer.view(-1)[world_range.start : world_range.end]

                shard_param_buffer.copy_(shard_main_param)

    def _build_model_param_to_state_dict_param_map(self, state_dict):
        """Create a map from model params to tensors in state_dict based on their names."""
        state_dict_list = []
        if "model0" in state_dict or "model_0" in state_dict:
            # When there are multiple model chunks, the state_dict should have keys = "model0",
            # "model1", "model2", etc (For NeMo, it's "model_0", "model_1", "model_2", etc).
            prefix = "model" if "model0" in state_dict else "model_"
            for i in range(len(self.model_chunks)):
                k = f"{prefix}{i}"
                assert k in state_dict, f"Wrong state_dict format, cannot find '{k}'"
                state_dict_list.append(state_dict[k])
        elif "model" in state_dict:
            # When there is only one model chunk, the state_dict should have the key "model".
            assert len(self.model_chunks) == 1
            state_dict_list.append(state_dict["model"])
        else:
            assert len(self.model_chunks) == 1
            state_dict_list.append(state_dict)

        model_param_to_state_dict_param_map = {}
        for chunk_idx, model_chunk in enumerate(self.model_chunks):
            names_in_state_dict = set(state_dict_list[chunk_idx].keys())
            for name, model_param in model_chunk.named_parameters():
                while name.startswith("module."):
                    name = name[len("module.") :]
                matched_keys = [k for k in names_in_state_dict if k.endswith(name)]
                assert (
                    len(matched_keys) == 1
                ), f"Parameter {name} has {len(matched_keys)} matches in state dict"
                state_dict_param = state_dict_list[chunk_idx][matched_keys[0]]
                assert model_param.shape == state_dict_param.shape
                model_param_to_state_dict_param_map[model_param] = state_dict_param
                names_in_state_dict.remove(matched_keys[0])

        return model_param_to_state_dict_param_map

    def _copy_model_params_to_main_params(self, state_dict=None):
        """
        Copy model params to main params.

        During finetuning, this method is used to reload the main params from
        the model params. This copy does not make use of the grad buffer as
        an intermediary.
        """
        if isinstance(self.optimizer, HybridDeviceOptimizer):
            self.optimizer.update_fp32_param_by_new_param()
            return

        if self.ddp_config.use_megatron_fsdp:
            return

        # When using precision-aware optimizer, main params are held by self.optimizer. It will also
        # do the work of copying data from main params to model params.
        if self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
            return

        if state_dict is not None:
            # Build a mapping from the model params to the corresponding tensors in the state dict,
            # so that whenever the model params are used to initialize the main params, they can be
            # replaced by the corresponding tensors from the state dict to initialize the master
            # weights.
            model_param_to_state_dict_param_map = self._build_model_param_to_state_dict_param_map(
                state_dict
            )

        # Utility method for copying group params.
        def copy_group_params(model_groups, shard_main_groups):
            for model_group, shard_main_group in zip(model_groups, shard_main_groups):
                for model_param, shard_main_param in zip(model_group, shard_main_group):

                    param_range_map = self._get_model_param_range_map(model_param)
                    param_range = param_range_map["param"]
                    assert param_range.size == shard_main_param.nelement()

                    if state_dict is not None:
                        # Use param from state_dict to initialize main_param
                        model_param = model_param_to_state_dict_param_map[model_param]

                    if is_float8tensor(model_param):
                        shard_model_param = dequantize_fp8_tensor(model_param).view(-1)[
                            param_range.start : param_range.end
                        ]
                    else:
                        shard_model_param = model_param.view(-1)[
                            param_range.start : param_range.end
                        ]
                    shard_main_param.data.copy_(shard_model_param)

        # Copy model groups to shard groups.
        copy_group_params(self.model_float16_groups, self.shard_fp32_from_float16_groups)
        copy_group_params(self.model_fp32_groups, self.shard_fp32_groups)

    @torch.no_grad()
    def step_with_ready_grads(self) -> bool:
        """Step the optimizer with ready gradients, return successful.
        Under the hood, either launch synchronous param all-gathers or get ready to launch
        asynchorous all-gathers that get overlapped with the next forward pass.
        """
        update_successful = super().step_with_ready_grads()

        timers = self.config.timers
        if timers is not None:
            timers('params-all-gather', log_level=1).start(barrier=self.config.barrier_with_L1_time)

        if self.ddp_config.use_megatron_fsdp:
            for model_chunk in self.model_chunks:
                model_chunk.start_param_sync()
        else:
            # If not overlapping all-gather for parameters, launch synchronous all-gather
            # communication calls here. If overlapping all-gather for parameters, the following
            # the first all-gather is launched asynchronously in the next optimizer.zero_grad()
            # call and subsequent all-gathers are launched in the forward pre-hook.
            if not self.ddp_config.overlap_param_gather:
                for model_chunk in self.model_chunks:
                    model_chunk.start_param_sync()
        if timers is not None:
            timers('params-all-gather').stop()

        return update_successful


================================================
FILE: megatron/core/optimizer/grad_scaler.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Megatron grad scaler."""

from abc import ABC, abstractmethod
from typing import Dict

import torch


class MegatronGradScaler(ABC):
    def __init__(self, initial_scale: float):
        """Initialize scale value with the input initial scale."""
        assert initial_scale > 0.0
        self._scale = torch.tensor([initial_scale], dtype=torch.float, device='cuda')

    @property
    def scale(self):
        return self._scale

    @property
    def inv_scale(self):
        return self._scale.double().reciprocal().float()

    @abstractmethod
    def update(self, found_inf: bool):
        pass

    @abstractmethod
    def state_dict(self):
        pass

    @abstractmethod
    def load_state_dict(self, state_dict: Dict):
        pass


class ConstantGradScaler(MegatronGradScaler):
    """
    Constant grad scaler (loss scale is never adjusted regardless of NaNs seen in gradients).
    """

    def update(self, found_inf: bool):
        pass

    def state_dict(self):
        return dict()

    def load_state_dict(self, state_dict):
        pass


class DynamicGradScaler(MegatronGradScaler):
    """
    Grad scaler with dynamic scale that gets adjusted during training.

    Reduces loss scale by `backoff_factor` if `hysteresis` number of NaNs are seen in a row. Increases
    loss scale by `growth_factor` if NaNs are not seen for `growth_interval` iterations.
    """

    def __init__(
        self,
        initial_scale: float,
        min_scale: float,
        growth_factor: float,
        backoff_factor: float,
        growth_interval: int,
        hysteresis: int,
    ):
        """
        Grad scaler with dynamic scale that gets adjusted during training.

        Args:
            initial_scale (float): Initial loss scale value.
            min_scale (float): Minimum loss scale value.
            growth_factor (float): Factor to grow loss scale by if NaNs are not seen in `growth_interval`
                training iterations. Must be greater than 1.
            backoff_factor (float): Factor to decrease loss scale by if NaNs are seen in `hysteresis`
                consecutive training iterations. Must be between 0 and 1.
            growth_interval (int): Number of training iterations of no NaNs before loss scale is increased.
            hysteresis (int): Number of training iterations of consecutive NaNs before loss scale is decreased.
        """
        super(DynamicGradScaler, self).__init__(initial_scale)

        # Lower bound on the scale.
        assert min_scale > 0.0
        assert min_scale <= initial_scale
        self.min_scale = torch.tensor([min_scale], dtype=torch.float, device='cuda')
        # Growth and backoff factors for the scale.
        assert growth_factor > 1.0
        self.growth_factor = torch.tensor([growth_factor], dtype=torch.float, device='cuda')
        assert backoff_factor < 1.0
        assert backoff_factor > 0.0
        self.backoff_factor = torch.tensor([backoff_factor], dtype=torch.float, device='cuda')
        # Interval over which if we don't see any inf/nan,
        # we will scale the grad scale by the growth factor.
        assert growth_interval > 0
        self.growth_interval = growth_interval
        # Number of inf/nans we should see before scaling down
        # the grad scale by the backoff factor.
        assert hysteresis > 0
        self.hysteresis = hysteresis

        # Trackers.
        self._growth_tracker = 0
        self._hysteresis_tracker = self.hysteresis

    def update(self, found_inf: bool):
        """
        Updates internal state in grad scaler based on whether NaNs are seen in grads or not.
        """

        # If we have an inf/nan, growth tracker is set to 0
        # and hysterisis tracker is reduced by 1.
        if found_inf:
            self._growth_tracker = 0
            self._hysteresis_tracker -= 1
            # Now if we are out of hysteresis count, scale down the loss.
            if self._hysteresis_tracker <= 0:
                self._scale = torch.max(self._scale * self.backoff_factor, self.min_scale)
        else:
            # If there is no nan/inf, increment the growth tracker.
            self._growth_tracker += 1
            # If we have had enough consequitive intervals with no nan/inf:
            if self._growth_tracker == self.growth_interval:
                # Reset the tracker and hysteresis trackers,
                self._growth_tracker = 0
                self._hysteresis_tracker = self.hysteresis
                # and scale up the loss scale.
                self._scale = self._scale * self.growth_factor

    def state_dict(self):
        state_dict = {}
        state_dict['scale'] = self._scale
        state_dict['growth_tracker'] = self._growth_tracker
        state_dict['hysteresis_tracker'] = self._hysteresis_tracker
        return state_dict

    def load_state_dict(self, state_dict: Dict):
        self._scale = state_dict['scale'].cuda(torch.cuda.current_device())
        self._growth_tracker = state_dict['growth_tracker']
        self._hysteresis_tracker = state_dict['hysteresis_tracker']


================================================
FILE: megatron/core/optimizer/layer_wise_optimizer.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import logging
from typing import Callable, List, Optional

import torch
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors

from megatron.core.dist_checkpointing.dict_utils import nested_values
from megatron.core.dist_checkpointing.mapping import LocalNonpersistentObject, ShardedStateDict
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.utils import get_pg_rank, get_pg_size

from .clip_grads import count_zeros_fp32, get_grad_norm_fp32
from .optimizer import (
    ChainedOptimizer,
    Float16OptimizerWithFloat16Params,
    FP32Optimizer,
    MegatronOptimizer,
)
from .optimizer_config import OptimizerConfig

logger = logging.getLogger(__name__)


class LayerWiseDistributedOptimizer(ChainedOptimizer):
    """Layer-wise distributed optimizer for Megatron-core models.

    Experimental distributed optimizer wrapper that distributes weight to DP ranks by layer.
    Implemented as ChainedOptimizer to support multiple optimizers (e.g. muon + adamW)
    When using, keep all megatron distributed-optimizer related options OFF.

    How LayerWiseDistributedOptimizer work:
    1. weights are splited into lists and each rank only keep its shard in its optimizer
    2. Megatron DDP handle allreduce grad, note that each rank have full model and grad
    3. optimizer is already modified so only param belong to this DP rank is updated
    4. grad_norm and zero counting will reduce metrics globally in step function
    5. Do regular update with chained optimizers, modified optimizer only update shard
    6. allgather updated params to every rank
    """

    def __init__(
        self,
        optimizers: List[MegatronOptimizer],
        config: OptimizerConfig,
        pg_collection: Optional[ProcessGroupCollection] = None,
        init_state_fn_list: Optional[List[Callable]] = None,
        model_chunks: Optional[List] = None,
        async_allgather: bool = False,
    ) -> None:
        """
        Initialize LayerWiseDistributedOptimizer.

        Args:
            optimizers: List of MegatronOptimizers.
            config: OptimizerConfig.
            pg_collection: ProcessGroupCollection.
            init_state_fn_list: List of init state functions.
            model_chunks: DDP-wrapped model chunks (needed for async_allgather).
            async_allgather: If True, defer param all-gather to forward pre-hooks.
        """

        self.pg_collection = pg_collection
        self.shard_params(optimizers)

        # Set up async all-gather using DDP bucket infrastructure.
        self.async_allgather = async_allgather
        if self.async_allgather:
            assert (
                model_chunks is not None
            ), "model_chunks must be provided if async_allgather is True"
            self.set_bucket_layerwise_params_list(model_chunks)

        if init_state_fn_list:
            assert len(init_state_fn_list) == len(
                optimizers
            ), "init_state_fn_list must be the same length as optimizers if provided"

        # wrap optimizer after sharding to avoid unnecessary master weight creation
        # for higher precision, optimizers are wrapped with megatron already
        if config.bf16:
            # unwrap FP32 optimizer, possibly from reusing get_megatron_optimizer for adam
            for i in range(len(optimizers)):
                opt = optimizers[i]
                if isinstance(opt, Float16OptimizerWithFloat16Params):
                    raise TypeError(
                        'LayerWiseDistributedOptimizer received Float16 optimizer already.'
                    )
                # unwrap FP32 optimizer from reusing get_megatron_optimizer for adam
                if isinstance(opt, FP32Optimizer):
                    opt = opt.optimizer
                optimizers[i] = Float16OptimizerWithFloat16Params(
                    opt, config, None, init_state_fn_list[i] if init_state_fn_list else None
                )

        super().__init__(optimizers)

        # TODO(kunlun, deyuf): potential future perf optimization
        # since allreduce is unchanged and handled by megatron DDP, they're already in
        # contiguous gbuf. So instead of shard param by layer randomly, we can shard by
        # buf range but keep some "extras" to keep boundary weight not sharded.
        # This way each rank do some duplicated work but allgather_v is no longer needed
        # All current distopt optimization can also be potentially applied

    def shard_params(self, optimizers):
        """Shard all params into lists by rank."""
        # list of parameter are sorted by numel and assigned to ranks in ping-pong style
        # example of 4 ranks and 10 parameters p0-p9 after sorting, then dp_cp_params_list will be
        # [[p0, p7, p8], [p1, p6, p9], [p2, p5], [p3, p4]]

        # simplify when dp_cp group size is 1
        if get_pg_size(self.pg_collection.dp_cp) == 1:
            self.dp_cp_params_list = None
            self.expt_dp_params_list = None
            return

        dp_cp_idx, expt_dp_idx = 0, 0
        dp_cp_size = get_pg_size(self.pg_collection.dp_cp)
        expt_dp_size = get_pg_size(self.pg_collection.expt_dp)
        # create ping-pong style loop so memory is more balanced
        dp_cp_loop = list(range(dp_cp_size)) + list(range(dp_cp_size))[::-1]
        expt_dp_loop = list(range(expt_dp_size)) + list(range(expt_dp_size))[::-1]
        self.dp_cp_params_list = [[] for _ in range(dp_cp_size)]
        self.expt_dp_params_list = [[] for _ in range(expt_dp_size)]
        # get all param groups
        param_groups = []
        for optimizer in optimizers:
            param_groups += optimizer.param_groups

        # sort param in all groups by param numel and assign to each rank evenly
        param_list = []
        for group_index, group in enumerate(param_groups):
            for p in group["params"]:
                param_list.append((p, group_index))
        param_list.sort(key=lambda x: x[0].numel())
        param_groups_this_rank = [[] for g in param_groups]

        # assign params to rank in ping-pong style loop
        for p, group_index in param_list:
            if param_groups[group_index].get("is_expert_parallel", False):
                if expt_dp_loop[expt_dp_idx] == get_pg_rank(self.pg_collection.expt_dp):
                    param_groups_this_rank[group_index].append(p)
                self.expt_dp_params_list[expt_dp_loop[expt_dp_idx]].append(p)
                expt_dp_idx = (expt_dp_idx + 1) % len(expt_dp_loop)
            else:
                if dp_cp_loop[dp_cp_idx] == get_pg_rank(self.pg_collection.dp_cp):
                    param_groups_this_rank[group_index].append(p)
                self.dp_cp_params_list[dp_cp_loop[dp_cp_idx]].append(p)
                dp_cp_idx = (dp_cp_idx + 1) % len(dp_cp_loop)

        # now we modify the group to only handle local params
        for groups, params in zip(param_groups, param_groups_this_rank):
            groups["params"] = params

        # simplify when expt_dp group size is 1 or expert parallel is off
        if expt_dp_size == 1 or len(self.expt_dp_params_list[0]) == 0:
            self.expt_dp_params_list = None

    def set_bucket_layerwise_params_list(self, model_chunks):
        """Map sharded params to DDP buckets for async all-gather.

        For each bucket in each model chunk's bucket groups, build per-rank param lists
        by cross-referencing the layer-wise sharded param lists with the bucket's params.

        Args:
            model_chunks: DDP-wrapped model chunks with bucket_groups.
        """
        for model_chunk in model_chunks:
            for group in model_chunk.bucket_groups:
                for bucket in group.buckets:
                    bucket_params_list = [[] for _ in range(get_pg_size(self.pg_collection.dp_cp))]
                    for bucket_list, full_params_list in zip(
                        bucket_params_list, self.dp_cp_params_list
                    ):
                        for param in full_params_list:
                            if param in bucket.params:
                                bucket_list.append(param)
                    bucket.set_layerwise_params_list(bucket_params_list)
            # Do the same for expert parallel bucket groups.
            if self.expt_dp_params_list is not None:
                for group in model_chunk.expert_parallel_bucket_groups:
                    for bucket in group.buckets:
                        bucket_params_list = [
                            [] for _ in range(get_pg_size(self.pg_collection.expt_dp))
                        ]
                        for bucket_list, full_params_list in zip(
                            bucket_params_list, self.expt_dp_params_list
                        ):
                            for param in full_params_list:
                                if param in bucket.params:
                                    bucket_list.append(param)
                        bucket.set_layerwise_params_list(bucket_params_list)

    @torch.no_grad()
    def allgather_params(self) -> None:
        """All-gather updated params from all ranks."""

        # helper function to flatten local params, all-gather,
        # unflatten and copy to model params
        def _allgather_helper(params_list, group):
            device = params_list[0][0].device
            dtype = params_list[0][0].dtype
            rank = get_pg_rank(group)
            dp_size = get_pg_size(group)
            # Flatten this rank's params.
            src = (
                _flatten_dense_tensors(params_list[rank])
                if len(params_list[rank]) > 0
                else torch.empty(0, device=device, dtype=dtype)
            )
            flat_sizes = [sum(p.numel() for p in params) for params in params_list]
            if max(flat_sizes) == 0:
                return

            # Allocate per-rank receive buffers with actual sizes (no padding).
            # PyTorch's NCCL backend handles uneven sizes in all_gather via
            # grouped send/recv internally. Reuse src for local rank's slot.
            gather_list = []
            for i in range(dp_size):
                if i == rank:
                    gather_list.append(src)
                else:
                    gather_list.append(torch.empty(flat_sizes[i], device=device, dtype=dtype))

            torch.distributed.all_gather(gather_list, src, group=group)

            # Unflatten and copy gathered params for each rank.
            for idx, params in enumerate(params_list):
                if len(params) == 0 or idx == rank:
                    continue
                updated_params = _unflatten_dense_tensors(gather_list[idx], params)
                for updated_p, model_p in zip(updated_params, params):
                    model_p.data.copy_(updated_p)

        if self.pg_collection is None:
            return
        if self.dp_cp_params_list:
            _allgather_helper(self.dp_cp_params_list, self.pg_collection.dp_cp)
        if self.expt_dp_params_list:
            _allgather_helper(self.expt_dp_params_list, self.pg_collection.expt_dp)

    @torch.no_grad()
    def broadcast_params(self):
        """All rank broadcast updated local params."""
        # Broadcast linear layer weights to all other ranks. Kept as reference test.
        if self.dp_cp_params_list is None:
            return
        for i, params in enumerate(self.dp_cp_params_list):
            src_global_rank = torch.distributed.get_global_rank(self.pg_collection.dp_cp, i)
            for p in params:
                torch.distributed.broadcast(p, src_global_rank, self.pg_collection.dp_cp)
        if self.expt_dp_params_list is None:
            return
        for i, params in enumerate(self.expt_dp_params_list):
            src_global_rank = torch.distributed.get_global_rank(self.pg_collection.expt_dp, i)
            for p in params:
                torch.distributed.broadcast(p, src_global_rank, self.pg_collection.expt_dp)

    @torch.no_grad()
    def get_grad_norm(self):
        # similar to dist opt, always aggregate globally
        grads_for_norm = []
        for optimizer in self.chained_optimizers:
            grads_for_norm += optimizer.get_main_grads_for_grad_norm()
        grad_norm = get_grad_norm_fp32(grads_for_norm, grad_stats_parallel_group=None)
        return grad_norm

    @torch.no_grad()
    def count_zeros(self):
        params = []
        for optimizer in self.chained_optimizers:
            params += optimizer.get_parameters()
        return count_zeros_fp32(
            params,
            grad_stats_parallel_group=None,
            use_decoupled_grad=self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8,
        )

    @torch.no_grad()
    def step(self):  # type: ignore[no-untyped-def]
        """step function for layer-wise optimizer."""
        update_successful, grad_norm, num_zeros_in_grad = super().step()

        # All gather updated params. If async_allgather is True, the allgather
        # is deferred to the forward pre-hooks via DDP bucket infrastructure.
        if not self.async_allgather:
            self.allgather_params()

        return update_successful, grad_norm, num_zeros_in_grad

    # TODO(deyuf): need to improve dist checkpointing design to properly handle this
    # fp32_from_fp16_params is list, each sub list could be empty if group is empty
    # this breaks dist checkpointing assumption since extract_sharded_base drop list structure
    # for now, we convert it to dict with index as key and convert back in load_state_dict
    def load_state_dict(self, state_dict):
        if len(self.chained_optimizers) == 1:
            wrapped_state_dict = {1: state_dict}
        else:
            wrapped_state_dict = state_dict
        for sd in wrapped_state_dict.values():
            if 'fp32_from_fp16_params' in sd and isinstance(sd['fp32_from_fp16_params'], dict):
                logger.info('[layerwise] converting fp32_from_fp16_params from dict to list')
                sd['fp32_from_fp16_params'] = [
                    v for k, v in sorted(sd['fp32_from_fp16_params'].items())
                ]
        super().load_state_dict(state_dict)

    def sharded_state_dict(
        self, model_sharded_state_dict: ShardedStateDict, is_loading: bool = False, **kwargs
    ):
        """
        Sharded state dict for torch_dist format checkpointing.
        For fixed DP usage only, set replica_id to 0 for all ShardedTensor.
        """
        sharded_state_dict = super().sharded_state_dict(
            model_sharded_state_dict, is_loading, **kwargs
        )

        # for fixed DP usage only
        for sh_base in nested_values(sharded_state_dict):
            if hasattr(sh_base, 'replica_id'):
                assert (
                    isinstance(sh_base.replica_id, int) or len(sh_base.replica_id) == 3
                ), f'Expected replica_id as int or (PP, TP, DP), got: {sh_base}'
                sh_base.replica_id = (
                    0 if isinstance(sh_base.replica_id, int) else (*sh_base.replica_id[:2], 0)
                )

        # later code assume list but chained optimizer fallback to non-list if there's only one
        if len(self.chained_optimizers) == 1:
            wrapped_sharded_state_dict = {1: sharded_state_dict}
        else:
            wrapped_sharded_state_dict = sharded_state_dict

        # Adjust dict rank 0 output correct global metadata into common_dict
        for sd in wrapped_sharded_state_dict.values():
            # wrap empty containers into LocalNonpersistentObject so it won't be saved/loaded
            # params is already wrapped, we only need to handle fp32_from_fp16_params and state
            # more details in load_state_dict comment
            if 'fp32_from_fp16_params' in sd:
                sd['fp32_from_fp16_params'][:] = [
                    group if group else LocalNonpersistentObject(group)
                    for group in sd['fp32_from_fp16_params']
                ]
                sd['fp32_from_fp16_params'] = {
                    i: v for i, v in enumerate(sd['fp32_from_fp16_params'])
                }
            # state is a single dict and will be empty if optimizer is fully empty
            if not sd['optimizer']['state']:
                sd['optimizer']['state'] = LocalNonpersistentObject(sd['optimizer']['state'])
            # group keys(e.g. 'step') might be missing or not updated
            for i, group in enumerate(sd['optimizer']['param_groups']):
                # keep local param tensor so we only gather metadata
                local_params = group.pop('params')
                # save whether this group is empty, so we can use non-empty rank for metadata
                group['params'] = bool(local_params.unwrap())
                all_rank_groups = [None for _ in range(torch.distributed.get_world_size())]
                torch.distributed.all_gather_object(all_rank_groups, group)
                # find first non-empty group if it exists
                nonempty_rank_group = next((g for g in all_rank_groups if g['params']), group)
                nonempty_rank_group['params'] = local_params
                sd['optimizer']['param_groups'][i] = nonempty_rank_group
        return sharded_state_dict

    def save_state_dict_to_file(self, filename: str) -> None:
        """Save the parameter state of the optimizer. For torch format only.
        Args:
            filename: The filename to save the parameter state.
        """
        torch.save(super().state_dict(), filename)

    def load_state_dict_from_file(self, filename: str) -> None:
        """Load the parameter state of the optimizer. For torch format only."""
        super().load_state_dict(torch.load(filename))


================================================
FILE: megatron/core/optimizer/muon.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

"""Megatron muon optimizer wrapper to handle tensor-parallel."""

import logging
from typing import Any, Callable, Dict, List, Literal, Optional

import torch
from torch.optim.optimizer import ParamsT

from megatron.core.optimizer_param_scheduler import ParamGroupOverride
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.transformer.module import MegatronModule
from megatron.core.utils import get_pg_size, log_single_rank

from . import _get_param_groups, get_megatron_optimizer
from .layer_wise_optimizer import LayerWiseDistributedOptimizer
from .optimizer import (
    ChainedOptimizer,
    Float16OptimizerWithFloat16Params,
    FP32Optimizer,
    MegatronOptimizer,
)
from .optimizer_config import OptimizerConfig, ParamKey

try:
    from emerging_optimizers.orthogonalized_optimizers import (
        OrthogonalizedOptimizer,
        get_muon_scale_factor,
    )
    from emerging_optimizers.orthogonalized_optimizers.muon_utils import newton_schulz_tp

    HAVE_EMERGING_OPTIMIZERS = True
except ImportError:
    HAVE_EMERGING_OPTIMIZERS = False
    OrthogonalizedOptimizer = object

# TODO: Remove this separate try/except once the next version of emerging_optimizers
# (which includes Lion) is released. Then Lion can be imported in the block above.
try:
    from emerging_optimizers.scalar_optimizers import Lion  # pylint: disable=unused-import

    HAVE_LION = True
except ImportError:
    HAVE_LION = False


logger = logging.getLogger(__name__)


class TensorParallelMuon(OrthogonalizedOptimizer):
    """Tensor Parallel Muon optimizer."""

    def __init__(
        self,
        params: ParamsT,
        lr: float = 3e-4,
        momentum_beta: float = 0.95,
        use_nesterov: bool = True,
        weight_decay: float = 0.01,
        use_decoupled_weight_decay: bool = True,
        split_qkv: bool = False,
        is_qkv_fn: Callable[[torch.Tensor], bool] | None = None,
        qkv_split_shapes: tuple[int, int, int] | None = None,
        fp32_matmul_prec: str = "medium",
        coefficient_type: str = "quintic",
        num_ns_steps: int = 5,
        scale_mode: str = "spectral",
        extra_scale_factor: float = 1.0,
        pg_collection: Optional[ProcessGroupCollection] = None,
        mode: Literal["blockwise", "duplicated", "distributed"] = "duplicated",
    ) -> None:
        if num_ns_steps < 1:
            raise ValueError(f"num_ns_steps must be at least 1, got {num_ns_steps}")

        def scaled_orthogonalize_fn(
            grad: torch.Tensor,
            tp_group: torch.distributed.ProcessGroup,
            partition_dim: int | None = None,
        ) -> torch.Tensor:
            log_single_rank(
                logger,
                logging.DEBUG,
                f'Orthogonalizing grad with {num_ns_steps} steps, {coefficient_type} coefficient, '
                f'{scale_mode} scale mode, extra_scale_factor={extra_scale_factor}',
            )
            size = [grad.size(-2), grad.size(-1)]
            if partition_dim is not None:
                size[partition_dim] *= get_pg_size(tp_group)
            orth_grad = newton_schulz_tp(
                grad,
                steps=num_ns_steps,
                coefficient_type=coefficient_type,
                tp_group=tp_group,
                partition_dim=partition_dim,
                mode="duplicated" if mode == "blockwise" else mode,
            )
            scale_factor = get_muon_scale_factor(size[0], size[1], mode=scale_mode)
            return orth_grad * scale_factor * extra_scale_factor

        self.pg_collection = pg_collection
        self.mode = mode
        self.split_qkv = split_qkv
        self.is_qkv_fn = is_qkv_fn
        self.qkv_split_shapes = qkv_split_shapes

        weight_decay_method = "decoupled" if use_decoupled_weight_decay else "l2"
        super().__init__(
            params,
            lr,
            momentum_beta,
            use_nesterov=use_nesterov,
            weight_decay=weight_decay,
            weight_decay_method=weight_decay_method,
            fp32_matmul_prec=fp32_matmul_prec,
            scaled_orthogonalize_fn=scaled_orthogonalize_fn,
        )

    def orthogonalize(self, p: torch.Tensor, grad: torch.Tensor, **kwargs: Any) -> torch.Tensor:
        """Orthogonalize the momentum.

        Args:
            p: The parameter tensor. i is necessary to pass param tensor in addition to momentum
                because a lot of information is only available in the param tensor,
                attributes for example.
            grad: The momentum tensor.

        Returns:
            The orthogonalized gradient tensor.
        """
        # TODO(deyuf): switch to group
        if self.pg_collection:
            tp_group = (
                self.pg_collection.expt_tp
                if getattr(p, 'expert_tp', False)
                else self.pg_collection.tp
            )
        else:
            tp_group = None
        partition_dim = None if self.mode == "blockwise" else getattr(p, "partition_dim", None)
        if partition_dim == -1:
            # emerging-optimizers use None instead of -1 to indicate no tensor parallel
            partition_dim = None

        if self.split_qkv and self.is_qkv_fn(p):  # type: ignore[misc]
            # split grouped attention parameters (e.g., QKV, GQA, etc.)
            grad_shape = grad.shape
            log_single_rank(
                logger,
                logging.DEBUG,
                f'qkv split grad shape {grad_shape}, split shapes {self.qkv_split_shapes}',
            )
            num_query_groups = grad_shape[0] // sum(self.qkv_split_shapes)
            qkv_grads = torch.split(
                grad.view(num_query_groups, sum(self.qkv_split_shapes), -1),
                self.qkv_split_shapes,
                dim=1,
            )
            qkv_grads = [g.reshape(-1, grad_shape[-1]) for g in qkv_grads]

            # Apply Newton-Schulz and scales to each component, concat back
            qkv_grads = [
                self.scaled_orthogonalize_fn(g, tp_group, partition_dim).view(
                    num_query_groups, -1, grad_shape[-1]
                )
                for g in qkv_grads
            ]
            grad = torch.cat(qkv_grads, dim=1).view(grad_shape)
        else:
            grad = self.scaled_orthogonalize_fn(grad, tp_group, partition_dim)
        return grad


def get_megatron_muon_optimizer(
    config: OptimizerConfig,
    model_chunks: List[MegatronModule],
    config_overrides: Optional[Dict[ParamKey, ParamGroupOverride]] = None,
    use_gloo_process_groups: bool = True,
    layer_wise_distributed_optimizer: bool = False,
    pg_collection: Optional[ProcessGroupCollection] = None,
) -> MegatronOptimizer:
    """This function is used to get the muon optimizer for the model chunks.
    It is used to get the muon optimizer for the model chunks.

    Args:
        config (OptimizerConfig): optimizer configuration object.
        model_chunks (List[MegatronModule]): model chunks to get optimizer for.
        use_gloo_process_groups (bool): if false, disable use of Gloo process groups
            in underlying Megatron optimizers.
        layer_wise_distributed_optimizer (bool): if true, use layer-wise distributed optimizer.
            Defaults to False.
    """
    # TODO: Mutating config.optimizer is a side effect; clean up after
    # https://github.com/NVIDIA/Megatron-LM/pull/3638 lands.
    # Set the nonlinear optimizer for muon (used for embeddings, biases, norms).
    config.optimizer = config.muon_scalar_optimizer

    assert HAVE_EMERGING_OPTIMIZERS, "Emerging Optimizers is not installed."
    if config.muon_scalar_optimizer == 'lion':
        assert HAVE_LION, (
            "Lion optimizer requires a version of 'emerging_optimizers' that includes Lion. "
            "Please upgrade to use --muon-scalar-optimizer lion."
        )

    # Dist-opt is not supported due to strong coupling with how DDP init grad buffer
    # In theory we can change DDP to enable use muon and dist-opt-adam together
    if config.use_distributed_optimizer:
        raise Exception('muon with dist optimizer is not supported.')
    # only support bf16 w/o loss scale now
    if config.fp16:
        raise Exception('muon with fp16 is not supported.')

    # before this function receive properly created collection
    if pg_collection is None:
        pg_collection = ProcessGroupCollection.use_mpu_process_groups()

    log_single_rank(logger, logging.INFO, f'Setting up emerging optimizer with config {config}')

    # Needed for torch_dist ckpt_format, unlike torch ckpt_format
    # For other emerging optimizers, need to implement init_state_fn as well
    # TODO(boxiangw): Improve usability after optimizer refactor
    # TODO(boxiangw): support precision aware optimizer
    def muon_init_state_fn(opt, config=None):
        for group in opt.param_groups:
            for p in group['params']:
                if len(opt.state[p]) == 0:
                    opt.state[p]['momentum_buffer'] = torch.zeros_like(p.data)

    def adam_init_state_fn(opt, config=None):
        for group in opt.param_groups:
            for p in group['params']:
                if len(opt.state[p]) == 0:
                    if config is None or not config.use_precision_aware_optimizer:
                        opt.state[p]['exp_avg'] = torch.zeros_like(p.data)
                        opt.state[p]['exp_avg_sq'] = torch.zeros_like(p.data)
                    else:
                        opt.initialize_state(p)

    def lion_init_state_fn(opt, config=None):
        for group in opt.param_groups:
            for p in group['params']:
                if len(opt.state[p]) == 0:
                    opt.state[p]['exp_avg'] = torch.zeros_like(p.data)

    nonlinear_init_state_fn = (
        lion_init_state_fn if config.muon_scalar_optimizer == 'lion' else adam_init_state_fn
    )

    optimizers = []
    # record list of non/linear params
    linear_params = []
    nonlinear_params = []
    for model_chunk in model_chunks:
        # use config to determine qkv split shapes.
        # no need to check tp since tp splits by head and this is per head(group) dimension
        num_attention_heads = model_chunk.config.num_attention_heads
        num_query_groups = model_chunk.config.num_query_groups
        kv_channels = model_chunk.config.kv_channels
        qkv_split_shapes = [
            num_attention_heads // num_query_groups * kv_channels,
            kv_channels,
            kv_channels,
        ]
        for name, param in model_chunk.named_parameters():
            if not param.requires_grad:
                continue
            # add flag for expert weight so optimizer can figure which tp group it uses
            # alternatively, create new param group and save tp_group. this require more
            # change in optimizer
            if 'experts' in name and 'shared' not in name:
                param.expert_tp = True
            # add flag for qkv parameter
            # TODO(deyuf): support MLA
            if 'linear_qkv.weight' in name and len(param.shape) == 2:
                param.is_qkv = True
            # TODO(deyuf): currently only allow 2D non-embedding weight to avoid breaking
            if (
                not getattr(param, 'is_embedding_or_output_parameter', False)
                and len(param.shape) == 2
            ):
                linear_params.append(param)
            else:
                nonlinear_params.append(param)

    muon_kwargs = {
        "lr": config.lr,
        "momentum_beta": config.muon_momentum,
        "use_nesterov": config.muon_use_nesterov,
        "weight_decay": config.weight_decay,
        "fp32_matmul_prec": config.muon_fp32_matmul_prec,
        "num_ns_steps": config.muon_num_ns_steps,
        "scale_mode": config.muon_scale_mode,
        "split_qkv": config.muon_split_qkv,
        "is_qkv_fn": lambda p: getattr(p, "is_qkv", False),
        "qkv_split_shapes": qkv_split_shapes,
        "extra_scale_factor": config.muon_extra_scale_factor,
        "pg_collection": pg_collection,
        "mode": config.muon_tp_mode,
    }

    # freezing nonlinear params and get param groups for muon
    for param in nonlinear_params:
        param.requires_grad = False

    linear_param_groups = _get_param_groups(model_chunks, config, config_overrides)
    # if layerwise distributed optimizer is not used, need to handle ep params separately
    expert_param_groups = []
    if not layer_wise_distributed_optimizer:
        for group in linear_param_groups:
            if group['is_expert_parallel']:
                expert_param_groups.append(group)
                linear_param_groups.remove(group)

    optimizer = TensorParallelMuon(linear_param_groups, **muon_kwargs)

    reset_config_bf16 = False
    if config.bf16:
        if layer_wise_distributed_optimizer:
            # creating master weight before layerwise sharding will lead to unnecessary master
            # weight so here we delay master weight creation into layer_wise unset config.bf16
            # will also result in all optimizers below(adam) to also not be wrapped
            config.bf16 = False
            reset_config_bf16 = True
        else:
            # if not using layer_wise wrapper, just create master weight here is fine
            optimizer = Float16OptimizerWithFloat16Params(
                optimizer, config, None, muon_init_state_fn
            )
    else:
        optimizer = FP32Optimizer(optimizer, config, muon_init_state_fn)

    optimizers.append(optimizer)

    # expert optimizer exists meaning layerwise distributed optimizer is not used
    if len(expert_param_groups) > 0:
        expert_optimizer = TensorParallelMuon(expert_param_groups, **muon_kwargs)
        if config.bf16:
            expert_optimizer = Float16OptimizerWithFloat16Params(
                expert_optimizer, config, None, muon_init_state_fn
            )
        else:
            expert_optimizer = FP32Optimizer(expert_optimizer, config, muon_init_state_fn)
        setattr(expert_optimizer, 'grad_stats_parallel_group', pg_collection.tp_ep_pp)
        optimizers.append(expert_optimizer)

    # done with muon, unfreeze nonlinear and freeze linear
    for param in nonlinear_params:
        param.requires_grad = True
    for param in linear_params:
        param.requires_grad = False

    # call original get. linear params will be skipped since they're freezed
    chained_adam = get_megatron_optimizer(
        config,
        model_chunks,
        config_overrides=config_overrides,
        use_gloo_process_groups=use_gloo_process_groups,
    )

    # unfreeze everything
    for param in linear_params:
        param.requires_grad = True

    # chain everything together
    init_fns = [muon_init_state_fn] + len(chained_adam.chained_optimizers) * [
        nonlinear_init_state_fn
    ]
    optimizers += chained_adam.chained_optimizers

    if layer_wise_distributed_optimizer:
        log_single_rank(logger, logging.INFO, 'Using LayerWiseDistributedOptimizer for Muon')
        if reset_config_bf16:
            config.bf16 = True
        return LayerWiseDistributedOptimizer(
            optimizers,
            config,
            pg_collection,
            init_state_fn_list=init_fns,
            model_chunks=model_chunks,
            async_allgather=config.overlap_param_gather,
        )
    return ChainedOptimizer(optimizers)


================================================
FILE: megatron/core/optimizer/optimizer.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Megatron optimizer."""

import copy
import logging
import math
import warnings
from abc import ABC, abstractmethod
from itertools import chain
from logging import getLogger
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import torch
from typing_extensions import override

try:
    from transformer_engine.pytorch.optimizers import multi_tensor_applier, multi_tensor_scale

    multi_tensor_scale_impl = multi_tensor_scale
except ImportError:
    try:
        import amp_C
        from apex.multi_tensor_apply import multi_tensor_applier

        multi_tensor_scale_impl = amp_C.multi_tensor_scale
    except ImportError:

        warnings.warn(
            'Transformer Engine and Apex are not installed. '
            'Falling back to local implementations of '
            'multi_tensor_applier and multi_tensor_scale'
        )

        from megatron.core.utils import local_multi_tensor_applier, local_multi_tensor_scale

        multi_tensor_applier = local_multi_tensor_applier
        multi_tensor_scale_impl = local_multi_tensor_scale

from .. import parallel_state, tensor_parallel
from ..config_logger import has_config_logger_enabled, log_config_to_disk
from ..dist_checkpointing.mapping import ShardedStateDict
from ..dist_checkpointing.optimizer import (
    get_param_id_to_sharded_param_map,
    make_sharded_optimizer_tensor,
    optim_state_to_sharding_state,
)
from ..dist_checkpointing.utils import add_prefix_for_sharding
from ..transformer.module import param_is_not_shared
from ..utils import log_single_rank
from .clip_grads import clip_grad_by_total_norm_fp32, count_zeros_fp32, get_grad_norm_fp32
from .grad_scaler import MegatronGradScaler
from .optimizer_config import OptimizerConfig

logger = getLogger(__name__)


def _zero_grad_group_helper(
    group: List[torch.nn.Parameter], set_to_none: bool, use_decoupled_grad: bool = False
):
    """
    Zero out the gradient for a group of parameters.
    Note: copied from torch.optim.optimizer.
    """
    for param in group:
        grad_attr = "decoupled_grad" if use_decoupled_grad else "grad"
        if hasattr(param, grad_attr) and getattr(param, grad_attr) is not None:
            if set_to_none:
                setattr(param, grad_attr, None)
            else:
                grad_obj = getattr(param, grad_attr)
                if grad_obj.grad_fn is not None:
                    grad_obj.detach_()
                else:
                    grad_obj.requires_grad_(False)
                grad_obj.zero_()


def _multi_tensor_copy_this_to_that(
    this: List[torch.Tensor], that: List[torch.Tensor], overflow_buf: Optional[torch.Tensor] = None
):
    """
    Use multi-tensor-applier to copy values from one list to another.
    We don't have a bfloat16 implementation so for now if the overflow_buf
    is not provided, we default back to simple loop copy to be compatible
    with bfloat16.
    """
    if overflow_buf is not None:
        overflow_buf.fill_(0)
        # Scaling with factor `1.0` is equivalent to copy.
        multi_tensor_applier(multi_tensor_scale_impl, overflow_buf, [this, that], 1.0)
    else:
        for this_, that_ in zip(this, that):
            that_.copy_(this_)


param_group_identifier_keys = ('wd_mult', 'lr_mult', 'is_expert_parallel', 'is_decoupled_lr')


class MegatronOptimizer(ABC):
    """
    Base class for all Megatron optimizers.

    Args:
        optimizer (torch.optim.Optimizer): base optimizer such as Adam or SGD.
        config (OptimizerConfig): configuration object for optimizer.
        init_state_fn (Callable, optional): function to initialize state in the optimizer.
    """

    def __init__(
        self,
        optimizer: torch.optim.Optimizer,
        config: OptimizerConfig,
        init_state_fn: Callable = lambda x: None,
    ):
        """Input optimizer is the base optimizer (e.g., Adam)."""
        self.optimizer = optimizer
        if self.optimizer is None:
            warnings.warn(
                f"WARNING: there is no optimizer on RANK {torch.distributed.get_rank()}. "
                "This may be expected if you have frozen sub-models."
            )
        self.config = config
        self.init_state_fn = init_state_fn

    def get_parameters(self) -> List[torch.nn.Parameter]:
        """
        Get list of parameters wrapped in optimizer.
        """
        params = []
        if hasattr(self.optimizer, 'param_groups'):
            for param_group in self.optimizer.param_groups:
                for param in param_group['params']:
                    params.append(param)
        return params

    def get_main_grads_for_grad_norm(self) -> List[torch.Tensor]:
        """
        Get main_grads that should be taken into account to compute the grad norm.
        Filter parameters based on:
          - grad should not be None.
          - parameter should not be shared (i.e., grads shouldn't be double counted while
            computing norms).
          - should not be a replica due to tensor model parallelism.
        """
        params = self.get_parameters()
        grads_for_norm = []
        for param in params:
            if getattr(param, "__fsdp_param__", False):
                grad = param.grad._local_tensor if param.grad is not None else None
            elif self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8:
                grad = param.decoupled_grad if hasattr(param, "decoupled_grad") else None
            else:
                grad = param.grad
            grad_not_none = grad is not None
            is_not_shared = param_is_not_shared(param)
            is_not_tp_duplicate = tensor_parallel.param_is_not_tensor_parallel_duplicate(
                param, getattr(self, 'tp_group', None)
            )
            if grad_not_none and is_not_shared and is_not_tp_duplicate:
                grads_for_norm.append(grad)

        return grads_for_norm

    def get_grad_stats_parallel_group(self) -> torch.distributed.ProcessGroup:
        """Process group for reducing gradient statistics (num_zeros & norm).

        The two most common cases are:
        - Non-distributed optimizer (default): Return the model-parallel group.
        - Distributed optimizer (overridden in distrib_optimizer.py): Return the entire world.
        """
        if hasattr(self, 'model_parallel_group'):
            warnings.warn(
                "WARNING: `optimizer.model_parallel_group` deprecated and renamed to "
                "`optimizer.grad_stats_parallel_group`. The previous name will be "
                "removed in a future release."
            )
            self.grad_stats_parallel_group = self.model_parallel_group
            delattr(self, "model_parallel_group")
            return self.grad_stats_parallel_group
        if hasattr(self, 'grad_stats_parallel_group'):
            return self.grad_stats_parallel_group
        return parallel_state.get_model_parallel_group()

    @abstractmethod
    def prepare_grads(self) -> bool:
        """Pre-processing gradients before the optimizer step, returns whether inf/nan is found."""
        return False

    @abstractmethod
    def step_with_ready_grads(self) -> bool:
        """Step the optimizer with ready gradients, return successful."""
        return True

    @torch.no_grad()
    def get_grad_norm(self):
        """Compute and return grad norm."""
        grads_for_norm = self.get_main_grads_for_grad_norm()
        total_norm = get_grad_norm_fp32(
            grads_for_norm, grad_stats_parallel_group=self.get_grad_stats_parallel_group()
        )
        return total_norm

    def clip_grad_norm(self, clip_grad: float) -> float:
        """Compute and return grad norm, also clip grads."""
        params = self.get_parameters()
        if params:
            grads_for_norm = self.get_main_grads_for_grad_norm()
        else:
            grads_for_norm = []
        grad_norm = get_grad_norm_fp32(
            grads_for_norm, grad_stats_parallel_group=self.get_grad_stats_parallel_group()
        )

        if params:
            clip_grad_by_total_norm_fp32(
                params,
                clip_grad,
                grad_norm,
                self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8,
            )
        return grad_norm

    def count_zeros(self) -> float:
        """Count number of zeros in model's gradients."""
        params = self.get_parameters()
        return count_zeros_fp32(
            params,
            grad_stats_parallel_group=self.get_grad_stats_parallel_group(),
            use_decoupled_grad=self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8,
            tp_group=getattr(self, 'tp_group', None),
        )

    @abstractmethod
    def zero_grad(self, set_to_none: bool = True):
        """Zero gradients and prepare for next forward pass."""
        pass

    @abstractmethod
    def get_loss_scale(self) -> torch.Tensor:
        """
        Get current loss scale factor.
        NOTE: The output should be a CUDA tensor of size 1.
        """
        pass

    def scale_loss(self, loss: torch.Tensor) -> torch.Tensor:
        """Simple scaling."""
        return self.get_loss_scale() * loss

    @abstractmethod
    def reload_model_params(self, state_dict=None):
        """Refreshes any internal state from the current model parameters.
        Call whenever the parameters are changed outside of the optimizer.
        For example, when we load a model from a checkpoint  without loading
        the optimizer, the model parameters are updated but for fp16 optimizer
        with main parameters, the main parameters need to also be updated.

        Args:
            state_dict (dict, optional): When it is not None, we use the params
                from the input state_dict to initialize the main params, instead
                of using the model params for initialization. This is useful when
                the precision of the model params is lower than that of the params
                from the state dict, as it allows the main params to be more accurate.
        """
        pass

    @abstractmethod
    def state_dict(self):
        """Return state_dict."""
        pass

    @abstractmethod
    def load_state_dict(self, state_dict):
        """Load pass-in `state_dict`."""
        pass

    # Promote state so it can be retrieved or set via
    # "optimizer_instance.state"
    def _get_state(self):
        return self.optimizer.state

    def _set_state(self, value):
        self.optimizer.state = value

    state = property(_get_state, _set_state)

    # Promote param_groups so it can be retrieved or set via
    # "optimizer_instance.param_groups"
    # (for example, to adjust the learning rate)
    def _get_param_groups(self):
        if self.is_stub_optimizer:
            return []
        else:
            return self.optimizer.param_groups

    def _set_param_groups(self, value):
        self.optimizer.param_groups = value

    param_groups = property(_get_param_groups, _set_param_groups)

    @abstractmethod
    def step(self):
        """Step the optimizer."""
        pass

    @abstractmethod
    def sharded_state_dict(
        self,
        model_sharded_state_dict: ShardedStateDict,
        is_loading: bool = False,
        metadata: Optional[dict] = None,
    ) -> ShardedStateDict:
        """Builds sharded state dict for the optimizer, based on model's sharded state dict.

        Args:
            model_sharded_state_dict (ShardedStateDict): sharded state dict of the model
            is_loading (bool, optional): flag indicating whether the state dict will be
                used to save or load the optimizer state. Defaults to False.
            metadata (dict, optional): metadata controlling the sharded_state_dict logic.

        Returns: optimizer sharded state dict
        """

    @staticmethod
    def _extract_common_per_param_step(state_dict) -> Union[int, torch.Tensor, None]:
        common_step = None
        for param_idx, param_state in state_dict['state'].items():
            param_step = param_state.get('step', None)
            if param_step is not None:
                if common_step is None:
                    common_step = param_step
                elif common_step != param_step:
                    raise ValueError(
                        "The optimizer step differs per parameter. Mcore only supports "
                        "optimizers whose step is shared across all parameters."
                    )
        return common_step

    @staticmethod
    def _restore_common_per_param_step(state_dict: Dict, step: Union[int, torch.Tensor]):
        for param_idx, param_state in state_dict['state'].items():
            param_state['step'] = copy.deepcopy(step)

    def offload_to_cpu(self):
        """Function used for RL training.
        Move optimizer state tensors to CPU to free GPU memory during inference."""
        if getattr(self, 'optimizer', None) is not None and not getattr(
            self, 'is_stub_optimizer', False
        ):
            log_single_rank(logger, logging.INFO, '[OFFLOAD] moving optimizer state to CPU')
            # Move all optimizer tensors to CPU while keeping the optimizer instance
            for param_group in self.optimizer.param_groups:
                for p in param_group['params']:
                    if isinstance(p, torch.Tensor) and p.is_cuda:
                        p.data = p.data.cpu()

            for state_dict in self.optimizer.state.values():
                for k, v in state_dict.items():
                    if isinstance(v, torch.Tensor) and v.is_cuda:
                        state_dict[k] = v.cpu()

            torch.cuda.empty_cache()

    def restore_from_cpu(self):
        """Function used for RL training.
        Restore optimizer state tensors from CPU back to GPU for training."""
        if getattr(self, 'optimizer', None) is not None and not getattr(
            self, 'is_stub_optimizer', False
        ):
            log_single_rank(logger, logging.INFO, '[RESTORE] moving optimizer state back to GPU')
            # Move all optimizer tensors back to GPU
            for param_group in self.optimizer.param_groups:
                for p in param_group['params']:
                    if isinstance(p, torch.Tensor) and not p.is_cuda:
                        p.data = p.data.cuda()

            for state_dict in self.optimizer.state.values():
                for k, v in state_dict.items():
                    if isinstance(v, torch.Tensor) and not v.is_cuda:
                        state_dict[k] = v.cuda()

    @staticmethod
    def _filter_and_reorder_param_groups(
        current_groups: List[Dict], state_dict_groups: List[Dict]
    ) -> List[Dict]:
        """Filter and reorder state_dict parameter groups to match current optimizer groups.
        Keys used for matching align with those from _get_param_groups:
        (wd_mult, lr_mult, is_expert_parallel, is_decoupled_lr)

        Args:
            current_groups (List[Dict]): Parameter groups from the current optimizer instance.
            state_dict_groups (List[Dict]): Parameter groups loaded from a state dict.

        Returns:
            List[Dict]: Filtered and reordered parameter groups matching the current optimizer.

        Raises:
            ValueError: If parameter groups in state dict don't match current optimizer.
        """
        # Define groups order that is needed in the current optimizer (coming from runtime)
        needed_groups = [
            # NeMo may have different key for required fields, e.g., "wd_mult" to "pre_wd_mult"
            tuple(g[key] if key in g else g[f"pre_{key}"] for key in param_group_identifier_keys)
            for g in current_groups
        ]

        # Keep state_dict param group order since groups are LocalNonpersistentObject
        # and their order is determined at runtime, not from the checkpoint.
        params_in_state_dict_order = [g['params'] for g in state_dict_groups]
        loaded_groups_map = {
            tuple(
                # NeMo may have different key for required fields, e.g., "wd_mult" to "pre_wd_mult"
                group[key] if key in group else group[f"pre_{key}"]
                for key in param_group_identifier_keys
            ): group
            for group in state_dict_groups
        }

        final_groups = []
        for key, params in zip(needed_groups, params_in_state_dict_order):
            if key not in loaded_groups_map:
                available_keys = '\n'.join(str(k) for k in loaded_groups_map.keys())
                raise ValueError(
                    f"Could not find parameter group with key {key} in loaded checkpoint.\n"
                    f"Available keys:\n{available_keys}\n"
                    f"Parameter group key definition: {param_group_identifier_keys}"
                )

            # Update group's parameters to preserve state dict ordering
            group = loaded_groups_map[key]
            group['params'] = params
            final_groups.append(group)

        return final_groups


class MixedPrecisionOptimizer(MegatronOptimizer):
    """Base class for both the float-16 and the distributed optimizer.

    Args:
        optimizer (torch.optim.Optimizer): base optimizer such as Adam or SGD.
        config (OptimizerConfig): configuration object for optimizer.
        grad_scaler (MegatronGradScaler): used for scaling gradients. Note that
            this can be None. This case happens when `bf16 = True` and we don't
            use any loss scale. Note that for `bf16 = True`, we can have
            a constant gradient scaler. Also for `bf16 = False`, we
            always require a grad scaler.
        init_state_fn (Callable, optional): function to initialize state in the optimizer.
    """

    def __init__(
        self,
        optimizer: torch.optim.Optimizer,
        config: OptimizerConfig,
        grad_scaler: Optional[MegatronGradScaler],
        init_state_fn: Callable,
    ):
        if has_config_logger_enabled(config):
            log_config_to_disk(config, locals(), prefix=type(self).__name__)

        super().__init__(optimizer, config, init_state_fn)
        self.grad_scaler = grad_scaler

        # None grad scaler is only supported for bf16.
        if self.grad_scaler is None:
            assert not self.config.fp16, 'fp16 expects a grad scaler.'

        # Tensor used to determine if a nan/if has happend.
        # Any non-zero value indicates inf/nan.
        # Note that we keep this for the cases that grad scaler is none.
        # We still record nan/inf if we have a bfloat16 with a grad scaler.
        if self.grad_scaler:
            self.found_inf = torch.tensor([0.0], dtype=torch.float, device='cuda')

        # Dummy tensor needed for apex multi-apply tensor.
        # For bfloat, we don't have multi-tensor apply and for now
        # we set it to none so the multi-tensor apply gets ignored.
        if self.config.bf16:
            self._dummy_overflow_buf = None
        else:
            self._dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device='cuda')

        # In case grad scaler is not passed, define the unity scale.
        if self.grad_scaler is None:
            self._scale_one = torch.tensor([1.0], dtype=torch.float, device='cuda')

    def get_loss_scale(self):
        if self.grad_scaler is None:
            return self._scale_one
        return self.grad_scaler.scale

    def reload_model_params(self, state_dict=None):
        if self.param_groups:
            self._copy_model_params_to_main_params(state_dict=state_dict)

    def _unscale_main_grads_and_check_for_nan(self):

        # Collect main grads.
        if not self.is_stub_optimizer:
            main_grads = self._collect_main_grad_data_for_unscaling()

        # Reset found inf.
        self.found_inf.fill_(0.0)

        if not self.is_stub_optimizer:
            # Unscale and set found inf/nan
            torch._amp_foreach_non_finite_check_and_unscale_(
                main_grads, self.found_inf, self.grad_scaler.inv_scale
            )

        # Update across all model parallel instances.
        torch.distributed.all_reduce(
            self.found_inf,
            op=torch.distributed.ReduceOp.MAX,
            group=self.get_grad_stats_parallel_group(),
        )

        # Check for nan.
        found_inf_flag = self.found_inf.item() > 0

        return found_inf_flag

    @torch.no_grad()
    def prepare_grads(self) -> bool:
        """Pre-processing gradients before the optimizer step, returns whether inf/nan is found."""
        timers = self.config.timers

        # Copy gradients from model params to main params.
        if timers is not None:
            timers('optimizer-copy-to-main-grad', log_level=1).start(
                barrier=self.config.barrier_with_L1_time
            )
        if not self.is_stub_optimizer:
            self._copy_model_grads_to_main_grads()
        if timers is not None:
            timers('optimizer-copy-to-main-grad').stop()

        # Do unscale, check for inf, and update grad scaler only for
        # the case that grad scaler is provided.
        if self.grad_scaler:

            # Unscale and check for inf/nan.
            if timers is not None:
                timers('optimizer-unscale-and-check-inf', log_level=1).start(
                    barrier=self.config.barrier_with_L1_time
                )
            found_inf_flag = self._unscale_main_grads_and_check_for_nan()
            if timers is not None:
                timers('optimizer-unscale-and-check-inf').stop()

            # We are done with scaling gradients
            # so we can update the loss scale.
            self.grad_scaler.update(found_inf_flag)

            return found_inf_flag

        return False

    @torch.no_grad()
    def step_with_ready_grads(self) -> bool:
        """Step the optimizer with ready gradients, return successful."""
        timers = self.config.timers
        # Step the optimizer.
        if timers is not None:
            timers('optimizer-inner-step', log_level=1).start(
                barrier=self.config.barrier_with_L1_time
            )
        if not self.is_stub_optimizer:
            self.optimizer.step()
        if timers is not None:
            timers('optimizer-inner-step').stop()

        # Update params from main params.
        if timers is not None:
            timers('optimizer-copy-main-to-model-params', log_level=1).start(
                barrier=self.config.barrier_with_L1_time
            )
        if not self.is_stub_optimizer:
            if self.config.reuse_grad_buf_for_mxfp8_param_ag:
                # In the case of overlap_param_gather,
                # copy is manually called in the training loop
                if not self.config.overlap_param_gather:
                    self._copy_main_params_to_param_buffer()
            else:
                self._copy_main_params_to_model_params()

        if timers is not None:
            timers('optimizer-copy-main-to-model-params').stop()

        return True

    @torch.no_grad()
    def step(self):
        timers = self.config.timers

        found_inf_flag = self.prepare_grads()
        if found_inf_flag:
            return False, None, None

        # Clip the main gradients.
        if timers is not None:
            timers('optimizer-clip-main-grad', log_level=1).start(
                barrier=self.config.barrier_with_L1_time
            )
        grad_norm = 0.0
        if self.config.clip_grad > 0.0:
            grad_norm = self.clip_grad_norm(self.config.clip_grad)
        if timers is not None:
            timers('optimizer-clip-main-grad').stop()

        # Count the zeros in the grads.
        if timers is not None:
            timers('optimizer-count-zeros', log_level=1).start(
                barrier=self.config.barrier_with_L1_time
            )
        num_zeros_in_grad = self.count_zeros() if self.config.log_num_zeros_in_grad else 0
        if timers is not None:
            timers('optimizer-count-zeros').stop()

        success = self.step_with_ready_grads()

        # Successful update.
        return success, grad_norm, num_zeros_in_grad


class Float16OptimizerWithFloat16Params(MixedPrecisionOptimizer):
    """Float16 optimizer for fp16 and bf16 data types.

    Args:
        optimizer (torch.optim.Optimizer): base optimizer such as Adam or SGD.
        config (OptimizerConfig): configuration object for optimizer.
        grad_scaler (MegatronGradScaler): used for scaling gradients. Note that
            this can be None. This case happens when `bf16 = True` and we don't
            use any loss scale. Note that for `bf16 = True`, we can have
            a constant gradient scaler. Also for `bf16 = False`, we
            always require a grad scaler.
        init_state_fn (Callable, optional): function to initialize state in the optimizer.
    """

    def __init__(
        self,
        optimizer: torch.optim.Optimizer,
        config: OptimizerConfig,
        grad_scaler: MegatronGradScaler,
        init_state_fn: Callable,
    ):

        super().__init__(optimizer, config, grad_scaler, init_state_fn)

        # Handle main parameters.

        if optimizer:
            # Three groups of parameters:
            #   float16_groups: original float16 parameters
            #   fp32_from_float16_groups: fp32 copy of float16 parameters
            #   fp32_from_fp32_groups: original fp32 parameters
            self.float16_groups = []
            self.fp32_from_float16_groups = []
            self.fp32_from_fp32_groups = []

            # For all the groups in the original optimizer:
            for param_group in self.optimizer.param_groups:
                float16_params_this_group = []
                fp32_params_this_group = []
                fp32_from_float16_params_this_group = []
                # For all the parameters in this group:
                for i, param in enumerate(param_group['params']):
                    if param.requires_grad:

                        # float16 params:
                        if param.type() in ['torch.cuda.HalfTensor', 'torch.cuda.BFloat16Tensor']:
                            float16_params_this_group.append(param)
                            # Create a copy
                            main_param = param.detach().clone().float()
                            # Copy tensor model parallel attributes.
                            tensor_parallel.copy_tensor_model_parallel_attributes(main_param, param)
                            if hasattr(param, 'shared'):
                                main_param.shared = param.shared
                            # Replace the optimizer params with the new fp32 copy.
                            param_group['params'][i] = main_param

                            # Store handle to main_param.
                            param.main_param = main_param

                            fp32_from_float16_params_this_group.append(main_param)
                            # Reset existing state dict key to the new main param.
                            if param in self.optimizer.state:
                                self.optimizer.state[main_param] = self.optimizer.state.pop(param)
                        # fp32 params.
                        elif param.type() == 'torch.cuda.FloatTensor':
                            fp32_params_this_group.append(param)
                            param_group['params'][i] = param

                        else:
                            raise TypeError(
                                'Wrapped parameters must be one of '
                                'torch.cuda.FloatTensor,  '
                                'torch.cuda.HalfTensor, or '
                                'torch.cuda.BFloat16Tensor. '
                                'Received {}'.format(param.type())
                            )

                self.float16_groups.append(float16_params_this_group)
                self.fp32_from_float16_groups.append(fp32_from_float16_params_this_group)
                self.fp32_from_fp32_groups.append(fp32_params_this_group)
            self.is_stub_optimizer = False
        else:
            self.is_stub_optimizer = True

    def zero_grad(self, set_to_none=True):
        """We only need to zero the model related parameters, i.e.,
        float16_groups & fp32_from_fp32_groups. We additionally zero
        fp32_from_float16_groups as a memory optimization to reduce
        fragmentation; in the case of set_to_none==True, the space
        used by this field can be safely deallocated at this point."""
        if self.is_stub_optimizer:
            return
        for group in self.float16_groups:
            _zero_grad_group_helper(group, set_to_none)
        for group in self.fp32_from_float16_groups:
            _zero_grad_group_helper(group, set_to_none)
        for group in self.fp32_from_fp32_groups:
            _zero_grad_group_helper(group, set_to_none)

    def _collect_main_grad_data_for_unscaling(self):
        if self.is_stub_optimizer:
            return

        main_grads = []

        # fp32 params from float16 ones.
        for main_group in self.fp32_from_float16_groups:
            for main_param in main_group:
                if main_param.grad is not None:
                    main_grads.append(main_param.grad.data)

        # Append fp32 parameters.
        for main_group in self.fp32_from_fp32_groups:
            for main_param in main_group:
                if main_param.grad is not None:
                    main_grads.append(main_param.grad.data)

        return main_grads

    def _get_model_and_main_params_data_float16(self):
        model_data = []
        main_data = []
        for model_group, main_group in zip(self.float16_groups, self.fp32_from_float16_groups):
            for model_param, main_param in zip(model_group, main_group):
                model_data.append(model_param.data)
                main_data.append(main_param.data)
        return model_data, main_data

    def _copy_model_grads_to_main_grads(self):
        # This only needs to be done for the float16 group.
        for model_group, main_group in zip(self.float16_groups, self.fp32_from_float16_groups):
            for model_param, main_param in zip(model_group, main_group):
                if hasattr(model_param, 'main_grad'):
                    main_param.grad = model_param.main_grad.float()
                else:
                    if model_param.grad is not None:
                        main_param.grad = model_param.grad.float()

                # Safe to deallocate model's grad/main_grad after copying.
                # (If using contiguous buffers, main_grad's memory should
                # persist and therefore should not be deallocated.)
                model_param.grad = None

        # For fp32 grads, we need to reset the grads to main grad.
        for model_group in self.fp32_from_fp32_groups:
            for model_param in model_group:
                model_param.grad = model_param.main_grad

    def _copy_main_params_to_model_params(self):
        # Only needed for the float16 params.
        model_data, main_data = self._get_model_and_main_params_data_float16()
        _multi_tensor_copy_this_to_that(
            this=main_data, that=model_data, overflow_buf=self._dummy_overflow_buf
        )

    def _copy_model_params_to_main_params(self, state_dict=None):
        assert state_dict is None, "Initialize main params from state dict is not supported"
        # Only needed for the float16 params.
        model_data, main_data = self._get_model_and_main_params_data_float16()
        _multi_tensor_copy_this_to_that(
            this=model_data, that=main_data, overflow_buf=self._dummy_overflow_buf
        )

    def state_dict(self, is_loading: bool = False):
        if is_loading:
            self.init_state_fn(self.optimizer, self.config)

        state_dict = {}
        state_dict['optimizer'] = self.optimizer.state_dict()
        if self.grad_scaler:
            state_dict['grad_scaler'] = self.grad_scaler.state_dict()
        state_dict['fp32_from_fp16_params'] = self.fp32_from_float16_groups
        return state_dict

    def sharded_state_dict(
        self,
        model_sharded_state_dict: ShardedStateDict,
        is_loading: bool = False,
        metadata: Optional[dict] = None,
    ):

        if is_loading:
            self.init_state_fn(self.optimizer, self.config)

        state_dict = self.state_dict()

        id_to_sharded_param_map = get_param_id_to_sharded_param_map(
            model_sharded_state_dict, chain.from_iterable(g for g in self.float16_groups)
        )

        # Convert fp32_from_fp16_params
        assert len(state_dict['fp32_from_fp16_params']) == len(
            state_dict['optimizer']['param_groups']
        )
        state_dict['fp32_from_fp16_params'] = [
            [
                make_sharded_optimizer_tensor(
                    id_to_sharded_param_map[param_id],
                    fp32_param,
                    prefix=f'optimizer.state.fp32_param',
                )
                for param_id, fp32_param in zip(state_group['params'], fp32_group)
            ]
            for fp32_group, state_group in zip(
                state_dict['fp32_from_fp16_params'], state_dict['optimizer']['param_groups']
            )
        ]

        step = self._extract_common_per_param_step(state_dict['optimizer'])

        # Convert regular optimizer state
        # all optimizer parameters passed to optim_state_to_sharding_state are
        # expected to have the same shape as the model parameters,
        # so we save the step separately and ignore it here
        optim_state_to_sharding_state(
            state_dict['optimizer'], id_to_sharded_param_map, exclude_keys="step"
        )
        # save step as a shared step among all parameters. Separate per-parameter
        # steps are not supported
        if step:
            state_dict['optimizer']['state']['common_step'] = step
        return state_dict

    def load_state_dict(self, state_dict):
        # Optimizer.
        optimizer_key = 'optimizer'
        if optimizer_key not in state_dict:
            optimizer_key = 'optimizer_state_dict'
            logger.info('***WARNING*** loading optimizer from an old checkpoint ...')
        if 'common_step' in state_dict[optimizer_key]['state']:
            common_step = state_dict[optimizer_key]['state'].pop('common_step')
            self._restore_common_per_param_step(state_dict[optimizer_key], common_step)

        # Filter and reorder param groups to match current optimizer
        state_dict[optimizer_key]['param_groups'] = self._filter_and_reorder_param_groups(
            self.optimizer.param_groups, state_dict[optimizer_key]['param_groups']
        )
        self.optimizer.load_state_dict(state_dict[optimizer_key])

        # Grad scaler.
        if 'grad_scaler' not in state_dict:
            if self.config.fp16:
                logger.info('***WARNING*** found an old checkpoint, will not load grad scaler ...')
        else:
            if self.grad_scaler:
                self.grad_scaler.load_state_dict(state_dict['grad_scaler'])
            else:
                logger.info(
                    '***WARNING*** fould the grad scaler in the '
                    'checkpoint but it is None in the class. '
                    'Skipping loading grad scaler ...'
                )

        # Copy data for the main params.
        fp32_from_float16_params_key = 'fp32_from_fp16_params'
        if fp32_from_float16_params_key not in state_dict:
            fp32_from_float16_params_key = 'fp32_from_fp16'
        for current_group, saved_group in zip(
            self.fp32_from_float16_groups, state_dict[fp32_from_float16_params_key]
        ):
            for current_param, saved_param in zip(current_group, saved_group):
                current_param.data.copy_(saved_param.data)


class FP32Optimizer(MegatronOptimizer):
    """Float32 optimizer.

    Args:
        optimizer (torch.optim.Optimizer): base optimizer such as Adam or SGD.
        config (OptimizerConfig): configuration object for optimizer.
        init_state_fn (Callable, optional): function to initialize state in the optimizer.
    """

    def __init__(
        self, optimizer: torch.optim.Optimizer, config: OptimizerConfig, init_state_fn: Callable
    ):
        if has_config_logger_enabled(config):
            log_config_to_disk(config, locals(), prefix=type(self).__name__)

        super(FP32Optimizer, self).__init__(optimizer, config, init_state_fn)

        self._scale = torch.tensor([1.0], dtype=torch.float, device='cuda')
        self.is_stub_optimizer = True if optimizer is None else False

    def zero_grad(self, set_to_none=True):
        """Copied from torch.optim.optimizer"""
        if self.is_stub_optimizer:
            return
        for group in self.optimizer.param_groups:
            _zero_grad_group_helper(group['params'], set_to_none)

    def get_loss_scale(self):
        """FP32 optimizer does not do any scaling."""
        return self._scale

    @torch.no_grad()
    def prepare_grads(self) -> bool:
        """Pre-processing gradients before the optimizer step, returns whether inf/nan is found."""
        if self.is_stub_optimizer:
            return False
        timers = self.config.timers

        # Copy main_grads to grads.
        if timers is not None:
            timers('optimizer-copy-to-main-grad', log_level=1).start(
                barrier=self.config.barrier_with_L1_time
            )
        for param_group in self.optimizer.param_groups:
            for param in param_group['params']:
                if hasattr(param, 'main_grad'):
                    param.grad = param.main_grad
        if timers is not None:
            timers('optimizer-copy-to-main-grad').stop()

        return False

    @torch.no_grad()
    def step_with_ready_grads(self) -> bool:
        """Step the optimizer with ready gradients, return successful."""
        if self.is_stub_optimizer:
            return True
        timers = self.config.timers

        # Update parameters.
        if timers is not None:
            timers('optimizer-inner-step', log_level=1).start(
                barrier=self.config.barrier_with_L1_time
            )
        self.optimizer.step()
        if timers is not None:
            timers('optimizer-inner-step').stop()

        return True

    @torch.no_grad()
    def step(self):
        """Clip gradients (if needed) and step the base optimizer.
        Always return successful since there is no overflow."""
        timers = self.config.timers

        found_inf_flag = self.prepare_grads()
        if found_inf_flag:
            return False, None, None

        # Clip gradients.
        if timers is not None:
            timers('optimizer-clip-main-grad', log_level=1).start(
                barrier=self.config.barrier_with_L1_time
            )
        grad_norm = None
        if self.config.clip_grad > 0.0:
            grad_norm = self.clip_grad_norm(self.config.clip_grad)
        if timers is not None:
            timers('optimizer-clip-main-grad').stop()

        # Count the zeros in the grads.
        if timers is not None:
            timers('optimizer-count-zeros', log_level=1).start(
                barrier=self.config.barrier_with_L1_time
            )
        num_zeros_in_grad = self.count_zeros() if self.config.log_num_zeros_in_grad else None
        if timers is not None:
            timers('optimizer-count-zeros').stop()

        success = self.step_with_ready_grads()

        # No overflow for FP32 optimizer.
        return success, grad_norm, num_zeros_in_grad

    def reload_model_params(self, state_dict=None):
        pass

    def state_dict(self):
        return self.optimizer.state_dict()

    def load_state_dict(self, state_dict):
        if 'common_step' in state_dict['state']:
            common_step = state_dict['state'].pop('common_step')
            self._restore_common_per_param_step(state_dict, common_step)

        # Filter and reorder param groups to match current optimizer
        state_dict['param_groups'] = self._filter_and_reorder_param_groups(
            self.optimizer.param_groups, state_dict['param_groups']
        )
        self.optimizer.load_state_dict(state_dict)

    def sharded_state_dict(
        self,
        model_sharded_state_dict: ShardedStateDict,
        is_loading: bool = False,
        metadata: Optional[dict] = None,
    ):
        if is_loading:
            self.init_state_fn(self.optimizer, self.config)

        state_dict = self.state_dict()
        id_to_sharded_param_map = get_param_id_to_sharded_param_map(
            model_sharded_state_dict, self.get_parameters()
        )
        step = self._extract_common_per_param_step(state_dict)

        # all optimizer parameters passed to optim_state_to_sharding_state are
        # expected to have the same shape as the model parameters,
        # so we save the step separately and ignore it here
        optim_state_to_sharding_state(state_dict, id_to_sharded_param_map, exclude_keys="step")
        # save step as a shared step among all parameters. Separate per-parameter
        # steps are not supported
        if step:
            state_dict['state']['common_step'] = step
        return state_dict


class ProxyDict:
    """
    A dictionary-like object that proxies to a list of dictionaries.

    e.g., ProxyDict([{'a': 1}, {'b': 2}]) behaves like:
    {
        (0, 'a'): 1,
        (1, 'b'): 2,
    }
    We use tuples as keys to avoid ambiguity with the keys of the inner dicts.
    """

    def __init__(self, inner_dicts: List[dict]):
        self._inner_dicts = inner_dicts

    def __getitem__(self, key: Tuple[int, str]):
        idx, inner_key = key
        return self._inner_dicts[idx].get(inner_key)

    def __setitem__(self, key: Tuple[int, str], value: Any):
        idx, inner_key = key
        self._inner_dicts[idx][inner_key] = value

    def __len__(self) -> int:
        return sum([len(inner_dict) for inner_dict in self._inner_dicts])

    def __iter__(self):
        for idx, inner_dict in enumerate(self._inner_dicts):
            for inner_key in inner_dict:
                yield (idx, inner_key)

    def items(self):
        """Return generator over underlying items."""
        for idx, inner_dict in enumerate(self._inner_dicts):
            for inner_key, value in inner_dict.items():
                yield (idx, inner_key), value


class ChainedOptimizer(MegatronOptimizer):
    """ChainedOptimizer is designed for a collection of optimizers.

    These optimizers are responsible for different parts of multiple models for
    a training task and will be executed one-by-one when the model is updated.

    Args:
        chained_optimizers: a list of optimizers.
    """

    def __init__(self, chained_optimizers: List[MegatronOptimizer]):
        self.model_chunks = []
        # chained_optimizers would be empty in the case that a rank
        # has no trainable parameters
        if chained_optimizers:
            self.config = getattr(chained_optimizers[0], 'config', None)
            for optimizer in chained_optimizers:
                if hasattr(optimizer, 'model_chunks'):
                    for model_chunk in optimizer.model_chunks:
                        if model_chunk not in self.model_chunks:
                            self.model_chunks.append(model_chunk)
                assert self.config == getattr(optimizer, 'config', None)
            # If all optimizers are stub optimizers, the ChainedOptimizer is also a stub optimizer
            self.is_stub_optimizer = all(
                getattr(optimizer, 'is_stub_optimizer', False) for optimizer in chained_optimizers
            )

        else:
            self.is_stub_optimizer = True
        self.chained_optimizers = chained_optimizers

    @property
    def optimizer(self):
        """
        Access underlying optimizer when only one optimizer included for backward compatibility.
        """
        assert (
            len(self.chained_optimizers) == 1
        ), "ChainedOptimizer has more than one optimizer when accessing self.optimizer"
        return self.chained_optimizers[0].optimizer

    @property
    def param_groups(self) -> List[dict]:
        """Get param_groups aggregated over underlying optimizers."""
        param_groups = []
        for optimizer in self.chained_optimizers:
            param_groups += optimizer.param_groups
        return param_groups

    @override
    def get_parameters(self) -> List[torch.nn.Parameter]:
        """Get list of parameters wrapped in all chained optimizers."""
        params = []
        for optimizer in self.chained_optimizers:
            params.extend(optimizer.get_parameters())
        return params

    @property
    def state(self) -> ProxyDict:
        """
        Return optimizer state with tuple keys, where the first element is the
        index of the optimizer in the list of chained optimizers.
        """
        return ProxyDict([opt.state for opt in self.chained_optimizers])

    def zero_grad(self, set_to_none=True):
        for optimizer in self.chained_optimizers:
            optimizer.zero_grad(set_to_none)

    def get_loss_scale(self):
        if self.chained_optimizers:
            return self.chained_optimizers[0].get_loss_scale()
        else:
            return torch.tensor([1.0], dtype=torch.float32, device=torch.cuda.current_device())

    def _split_state_dict(self, state_dict):
        """Split the state dict into sub-state dicts according to the chunks of each sub-optimizer
        in this chained optimizer.

        For example, assume there are two sub-optimizers in total: the first has 1 model chunk, and
        the second has 7 model chunks. The state dict contains model0 ~ model7. This function splits
        the state dict into two sub-state dicts: the first contains model0, and the second contains
        model1 ~ model7 (but renamed as model0 ~ model6).
        """
        state_dicts = [None] * len(self.chained_optimizers)
        if state_dict is not None:
            if len(self.model_chunks) == 1:
                state_dicts[0] = state_dict
            else:
                # Split state_dict if needed
                prefix = "model" if "model0" in state_dict.keys() else "model_"
                offset = 0
                for optimizer_idx, optimizer in enumerate(self.chained_optimizers):
                    if hasattr(optimizer, "model_chunks"):
                        d = {}
                        for chunk_idx in range(len(optimizer.model_chunks)):
                            assert (
                                f"{prefix}{offset}" in state_dict
                            ), f"Wrong state_dict format, cannot find '{prefix}{offset}'"
                            d[f"{prefix}{chunk_idx}"] = state_dict[f"{prefix}{offset}"]
                            offset += 1
                        if len(d) > 0:
                            state_dicts[optimizer_idx] = d
        return state_dicts

    def reload_model_params(self, state_dict=None):
        state_dicts = self._split_state_dict(state_dict)
        for idx, optimizer in enumerate(self.chained_optimizers):
            optimizer.reload_model_params(state_dict=state_dicts[idx])

    def state_dict(self):
        if len(self.chained_optimizers) == 1:
            return self.chained_optimizers[0].state_dict()
        else:
            return [optimizer.state_dict() for optimizer in self.chained_optimizers]

    def sharded_state_dict(
        self, model_sharded_state_dict: ShardedStateDict, is_loading: bool = False, **kwargs
    ):
        metadata = kwargs.get('metadata') or {}
        # ChainedOptimizer should add its prefix to the tensor state keys only if
        # DistributedOptimizer is used (non-empty 'distrib_optim_sharding_type') and uses
        # a non fully-reshardable format. For backward compatibility we also add it
        # if `chained_optim_avoid_prefix` is False.
        from .distrib_optimizer import DistributedOptimizer

        should_add_prefix = (
            "distrib_optim_sharding_type" in metadata
            and metadata["distrib_optim_sharding_type"]
            not in DistributedOptimizer.checkpoint_fully_reshardable_formats
        ) or not metadata.get('chained_optim_avoid_prefix', False)

        if len(self.chained_optimizers) == 1:
            return self.chained_optimizers[0].sharded_state_dict(
                model_sharded_state_dict, is_loading, **kwargs
            )
        else:
            self._synchronize_steps()
            sharded_state_dict = {}
            for optimizer_idx, optimizer in enumerate(self.chained_optimizers):
                optim_state_dict = optimizer.sharded_state_dict(
                    model_sharded_state_dict, is_loading, **kwargs
                )
                if should_add_prefix:
                    add_prefix_for_sharding(optim_state_dict, f'chained_{optimizer_idx}.')
                sharded_state_dict[optimizer_idx] = optim_state_dict
            return sharded_state_dict

    def load_state_dict(self, state_dict):
        # If there is only one optimizer, we read the state dict as a single optimizer.
        if len(self.chained_optimizers) == 1:
            self.chained_optimizers[0].load_state_dict(state_dict)
            return
        if len(self.chained_optimizers) != len(state_dict):
            raise RuntimeError(
                f'Expected {len(self.chained_optimizers)} entries'
                f' in state dict, but got {len(state_dict)}.'
            )
        if isinstance(state_dict, dict):
            state_dict = (v for k, v in sorted(state_dict.items()))
        for optimizer, state in zip(self.chained_optimizers, state_dict):
            optimizer.load_state_dict(state)
        self._synchronize_steps()

    @torch.no_grad()
    def prepare_grads(self) -> bool:
        """Pre-processing gradients before the optimizer step, returns whether inf/nan is found."""
        found_inf_flag = False
        for optimizer in self.chained_optimizers:
            found_inf_flag |= optimizer.prepare_grads()

        return found_inf_flag

    @torch.no_grad()
    def step_with_ready_grads(self) -> bool:
        """Step the optimizer with ready gradients, return successful."""
        success = True
        for optimizer_idx, optimizer in enumerate(self.chained_optimizers):
            success &= optimizer.step_with_ready_grads()
            if self.config.overlap_param_gather_with_optimizer_step and optimizer_idx == 0:
                assert success
                assert len(optimizer.model_chunks) == 1
                optimizer.model_chunks[0].start_param_sync(force_dispatch=True)

        return success

    def grads_states_parallel_group_is_shared(self):
        """Check if all optimizers share the same gradient statistics parallel group."""
        reference_group = self.chained_optimizers[0].get_grad_stats_parallel_group()
        return all(
            optimizer.get_grad_stats_parallel_group() == reference_group
            for optimizer in self.chained_optimizers
        )

    def get_grad_stats_parallel_group(self) -> torch.distributed.ProcessGroup:
        assert self.grads_states_parallel_group_is_shared(), (
            "Can't use get_grad_stats_parallel_group() for ChainedOptimizer, "
            "since grads states parallel group are not shared across all optimizers"
        )
        return self.chained_optimizers[0].get_grad_stats_parallel_group()

    @torch.no_grad()
    def get_grad_norm(self):
        if len(self.chained_optimizers) == 1:
            return self.chained_optimizers[0].get_grad_norm()
        if self.grads_states_parallel_group_is_shared():
            grads_for_norm = []
            for optimizer in self.chained_optimizers:
                grads_for_norm += optimizer.get_main_grads_for_grad_norm()
            grad_norm = get_grad_norm_fp32(
                grads_for_norm, grad_stats_parallel_group=self.get_grad_stats_parallel_group()
            )
        else:
            grad_norms = []
            for optimizer in self.chained_optimizers:
                _grad_norm = optimizer.get_grad_norm()
                grad_norms += [_grad_norm if _grad_norm else 0.0]
            grad_norm = math.sqrt(sum([x**2 for x in grad_norms]))
        return grad_norm

    @torch.no_grad()
    def count_zeros(self):
        if self.grads_states_parallel_group_is_shared():
            params = []
            for optimizer in self.chained_optimizers:
                params += optimizer.get_parameters()
            return count_zeros_fp32(
                params,
                grad_stats_parallel_group=self.get_grad_stats_parallel_group(),
                use_decoupled_grad=self.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8,
            )
        else:
            num_zeros_in_grad = 0
            for optimizer in self.chained_optimizers:
                num_zeros_in_grad += (
                    optimizer.count_zeros() if optimizer.config.log_num_zeros_in_grad else 0
                )
            return num_zeros_in_grad

    @torch.no_grad()
    def step(self):
        """ChainedOptimizer will step all optimizers one by one."""
        found_inf_flag = self.prepare_grads()
        if found_inf_flag:
            return False, None, None

        grad_norm = self.get_grad_norm()

        # Clip gradients.
        for optimizer in self.chained_optimizers:
            if hasattr(optimizer, 'is_stub_optimizer') and optimizer.is_stub_optimizer:
                continue
            parameters = optimizer.get_parameters()
            if len(parameters) == 0:
                continue
            if optimizer.config.clip_grad > 0.0:
                clip_grad_by_total_norm_fp32(
                    parameters,
                    max_norm=optimizer.config.clip_grad,
                    total_norm=grad_norm,
                    use_decoupled_grad=(
                        optimizer.config.use_precision_aware_optimizer_no_fp8_or_ds_fp8
                    ),
                )

        # Count the zeros in the grads.
        num_zeros_in_grad = self.count_zeros() if self.config.log_num_zeros_in_grad else None

        update_successful = self.step_with_ready_grads()

        return update_successful, grad_norm, num_zeros_in_grad

    def save_parameter_state(self, filename: str):
        """Save the distributed parameter states of all optimizers to a file.

        Args:
            filename (str): path to save parameter state to.
        """
        if len(self.chained_optimizers) == 1:
            self.chained_optimizers[0].save_parameter_state(filename)
            return
        save_states = False
        states = []
        for optimizer in self.chained_optimizers:
            if hasattr(optimizer, 'get_parameter_state_dp_zero'):
                state_dict = optimizer.get_parameter_state_dp_zero()

                # Save checkpoint economically, only when DP rank = 0, state dict
                # needs to be saved.
                if optimizer.data_parallel_group.rank() == 0:
                    states.append(state_dict)
                    save_states = True
                else:
                    assert state_dict is None
                    states.append(None)

        if save_states:
            torch.save(states, filename)

    def load_parameter_state(self, filename: str, *, update_legacy_format: bool = False):
        """Load the distributed parameter states of all optimizers from a file.

        Args:
            filename (str): path to load parameter state from.
        """
        if len(self.chained_optimizers) == 1:
            self.chained_optimizers[0].load_parameter_state(
                filename, update_legacy_format=update_legacy_format
            )
            return
        states = None
        for idx, optimizer in enumerate(self.chained_optimizers):
            if not hasattr(optimizer, 'load_parameter_state_from_dp_zero'):
                continue

            # Lazy loading checkpoint, state dict is needed only when DP rank = 0.
            if optimizer.data_parallel_group.rank() == 0 and states is None:
                states = torch.load(filename)

            state_dict = states[idx] if states else None
            optimizer.load_parameter_state_from_dp_zero(
                state_dict, update_legacy_format=update_legacy_format
            )

    def _synchronize_steps(self):
        """
        Synchronize the step of all optimizers.
        TE FusedAdam will not accumulate "step" for empty param groups,
        so we need to align the step across param groups before saving and after loading.
        """

        steps = []
        for optimizer in self.chained_optimizers:
            for param_group in optimizer.optimizer.param_groups:
                if len(param_group['params']) > 0 and 'step' in param_group:
                    steps.append(param_group['step'])
        steps = list(set(steps))
        assert len(steps) <= 1, f"steps: {steps}"
        step = steps[0] if len(steps) == 1 else None
        for optimizer in self.chained_optimizers:
            for param_group in optimizer.optimizer.param_groups:
                if len(param_group['params']) > 0 and 'step' in param_group:
                    param_group['step'] = step

        return step

    def offload_to_cpu(self):
        """Move optimizer state to CPU to free GPU memory during inference."""
        for optimizer in self.chained_optimizers:
            optimizer.offload_to_cpu()

    def restore_from_cpu(self):
        """Restore optimizer state from CPU back to GPU for training."""
        for optimizer in self.chained_optimizers:
            optimizer.restore_from_cpu()


================================================
FILE: megatron/core/optimizer/optimizer_config.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import fnmatch
from dataclasses import dataclass, field
from typing import Callable, Optional, Tuple, Union

import torch

from ..utils import is_te_min_version


@dataclass(frozen=True)
class ParamPredicate:
    """Wraps a matching function to make it hashable for ParamKey.
    Example:
        >>> shape_1_param = ParamPredicate(name="s1", fn=lambda param: len(param.shape) == 1)
        >>> shape_1_param(torch.empty(10))
        True
        >>> shape_1_param_copy = ParamPredicate(name="s1", fn=lambda param: len(param.shape) == 1)
        >>> shape_1_param == shape_1_param_copy  # name is used to match
        True
        >>> {shape_1_param, shape_1_param_copy} == {shape_1_param}  # set hashing works properly

    NOTE:
        __hash__ and __eq__ are automatically generated by @dataclass(frozen=True)
        based solely on 'name' because we set compare=False/hash=False on 'fn'.
    """

    name: str
    fn: Callable[[torch.nn.Parameter], bool] = field(compare=False, hash=False)

    def __call__(self, param: torch.nn.Parameter) -> bool:
        return self.fn(param)


@dataclass(frozen=True)
class ParamWithNamePredicate:
    """Wraps a matching function to make it hashable for ParamKey.
    Example:
        >>> shape_1_not_qkln_param = ParamWithNamePredicate(
                name="s1_not_qkln",
                fn=lambda param, name: (
                    len(param.shape) == 1 or name.endswith(".bias")
                    and not ("q_layernorm." in name or "k_layernorm." in name)
                )
            )
        >>> shape_1_not_qkln_param(torch.empty(10), "interesting.bias")
        True
        >>> shape_1_not_qkln_param(torch.empty(10), "interesting.q_layernorm.bias")
        False

    NOTE:
        __hash__ and __eq__ are automatically generated by @dataclass(frozen=True)
        based solely on 'name' because we set compare=False/hash=False on 'fn'.
    """

    name: str
    fn: Callable[[torch.nn.Parameter, str], bool] = field(compare=False, hash=False)

    def __call__(self, param: torch.nn.Parameter, name: str) -> bool:
        return self.fn(param, name)


@dataclass(frozen=True, slots=True)
class ParamKey:
    """Key to group parameters by. All such grouped parameters can share an
    optimizer config specification."""

    # TODO: Can add layer_id here later.

    name: Union[str, Tuple[str]] = field(default_factory=tuple)
    """Parameter name(s), will use unix filesystem path syntax for matching."""

    attr: Union[str, Tuple[str]] = field(default_factory=tuple)
    """Parameter attribute(s)."""

    predicate: Union[ParamPredicate, Tuple[ParamPredicate]] = field(default_factory=tuple)
    """Predicate(s) to match parameters by. If multiple predicates are provided, any must match."""

    with_name_predicate: Union[ParamWithNamePredicate, Tuple[ParamWithNamePredicate]] = field(
        default_factory=tuple
    )
    """
    Predicate(s) to match parameters with their name. If multiple predicates are provided, 
      any must match. This is useful if you need to filter out some parameters from an otherwise 
      positive match by their name.
    """

    def matches(self, param: torch.nn.Parameter, param_name: str) -> bool:
        """Returns true if passed-in parameter (with name) matches `param_key`.

        Args:
            param (torch.nn.Parameter): Handle to parameter object.
            param_name (str): Name of parameter in underlying PyTorch module.

        Returns:
            bool: True if parameter matches passed-in param_key.
        """

        # Check if name matches.
        if isinstance(self.name, str):
            target_names = [self.name]
        else:
            target_names = list(self.name)
        for target_name in target_names:
            if fnmatch.fnmatch(param_name, target_name):
                return True

        # Check if attribute matches.
        if isinstance(self.attr, str):
            target_attrs = [self.attr]
        else:
            target_attrs = list(self.attr)
        for target_attr in target_attrs:
            if getattr(param, target_attr, False):
                return True

        # Check if predicate matches.
        if isinstance(self.predicate, ParamPredicate):
            if self.predicate(param):
                return True
        else:
            for predicate in self.predicate:
                if predicate(param):
                    return True

        # Check if with_name_predicate matches.
        if isinstance(self.with_name_predicate, ParamWithNamePredicate):
            if self.with_name_predicate(param, param_name):
                return True
        else:
            for predicate in self.with_name_predicate:
                if predicate(param, param_name):
                    return True
        return False


@dataclass
class OptimizerConfig:
    """Base optimizer configuration object."""

    ##############
    # General
    ##############

    lr: Optional[float] = None
    """Initial learning rate. Depending on decay style and initial warmup, the learning rate at each
       iteration would be different.
    """

    min_lr: Optional[float] = None
    """Minumum value for learning rate. The scheduler clip values below this threshold."""

    decoupled_lr: Optional[float] = None
    """Separate learning rate for the input and output layer."""

    decoupled_min_lr: Optional[float] = None
    """Minimum value for learning rate for the input and output layer. The scheduler clip values
       below this threshold.
    """

    weight_decay: float = 0.01
    """Weight decay coefficient for L2 regularization."""

    apply_wd_to_qk_layernorm: bool = False
    """If true, apply weight decay to qk layernorm as a special case."""

    ##############
    # Precision
    ##############
    fp8_recipe: Optional[str] = None
    """The type of fp8 recipe will affect the processing logic inside distributed optimizer."""

    fp16: bool = False
    """If true, train with fp16 mixed precision training. Defaults to False."""

    bf16: bool = False
    """If true, train with bf16 mixed precision training. Defaults to False."""

    reuse_grad_buf_for_mxfp8_param_ag: bool = False
    """If true, reuse the grad buffer for param AG when using mxfp8 recipe. Should be 
       set to True only when fp8_recipe is mxfp8 and fp8_param_gather is True."""

    params_dtype: torch.dtype = torch.float32
    """dtype used when intializing the weights. Defaults to torch.float32."""

    use_precision_aware_optimizer: bool = False
    """If true, allows optimizer-related tensors (master_param, gradients and optimizer states)
    to be set to lower precision. Defaults to False.
    """

    store_param_remainders: bool = True
    """If true, store the 16-bit FP32 parameter remainders in the optimizer state, excluding the
        16 bits shared with the BF16 parameters. This lowers GPU memory usage. Defaults to True.
    """

    main_grads_dtype: torch.dtype = torch.float32
    """dtype of main grads when enabling precision-aware-optimizer"""

    main_params_dtype: torch.dtype = torch.float32
    """dtype of main params when enabling precision-aware-optimizer"""

    exp_avg_dtype: torch.dtype = torch.float32
    """dtype of exp_avg when enabling precision-aware-optimizer"""

    exp_avg_sq_dtype: torch.dtype = torch.float32
    """dtype of exp_avg_sq when enabling precision-aware-optimizer"""

    optimizer: str = 'adam'
    """Optimizer name. NOTE: Deprecated, use individual optimizer classes instead."""

    ###############
    # Loss scaling
    ###############
    loss_scale: Optional[float] = None
    """Static loss scaling, positive power of 2 values can improve fp16 convergence. If None,
       dynamic loss scaling is used.
    """

    initial_loss_scale: float = 2**32
    """Initial loss-scale for dynamic loss scaling."""

    min_loss_scale: float = 1.0
    """Minimum loss scale for dynamic loss scaling."""

    loss_scale_window: float = 1000
    """Window over which to raise/lower dynamic scale."""

    hysteresis: int = 2
    """Hysteresis for dynamic loss scaling."""

    ###################################################################################
    # Optimizer (NOTE: Deprecated, use individual optimizer classes instead.).
    ###################################################################################
    # Adam.
    adam_beta1: float = 0.9
    """First coefficient for computing running averages of gradient and its square in Adam
    optimizer.
    """

    adam_beta2: float = 0.999
    """Second coefficient for computing running averages of gradient and its square in Adam
    optimizer.
    """

    adam_eps: float = 1e-08
    """Term added to the denominator to improve numerical stability in Adam optimizer."""

    decoupled_weight_decay: bool = True
    """If true, decouples weight decay from the gradient update, equivalent to AdamW. If false,
    original Adam update rule will be used. Defaults to True.
    """

    # SGD.
    sgd_momentum: float = 0.9
    """Momentum factor for SGD optimizer."""

    # Muon.
    # TODO: move muon configs to it's own `MuonConfig`.
    muon_momentum: float = 0.95
    """The momentum used by the internal SGD."""

    muon_split_qkv: bool = True
    """Whether to split QKV parameters for Muon optimizer."""

    muon_use_nesterov: bool = False
    """Whether to use Nesterov-style momentum in the internal SGD."""

    muon_scale_mode: str = "spectral"
    """The mode to use for the scale factor. Defaults to "spectral"."""

    muon_fp32_matmul_prec: str = "medium"
    """The precision to use for the fp32 matmul. Defaults to "medium"."""

    muon_num_ns_steps: int = 5
    """The number of iteration steps to use in the Newton-Schulz iteration."""

    muon_tp_mode: str = "blockwise"
    """How to perform NS calculation for tensor parallel weights. Defaults to "blockwise"."""

    muon_extra_scale_factor: float = 1.0
    """Additional scale factor for the muon update."""

    muon_scalar_optimizer: str = 'adam'
    """Optimizer for nonlinear parameters (embeddings, biases, norms) when using muon.
    One of 'adam' or 'lion'. Defaults to 'adam'."""

    # Lion.
    lion_beta1: float = 0.95
    """First beta coefficient for Lion optimizer (used in sign update). Defaults to 0.95."""

    lion_beta2: float = 0.98
    """Second beta coefficient for Lion optimizer (used in momentum EMA update).
    Defaults to 0.98."""

    #######################
    # Distributed optimizer
    #######################
    use_distributed_optimizer: bool = False
    """Distribute optimizer state over data-parallel replicas."""

    overlap_param_gather: bool = False
    """If true, overlap param all-gather with forward compute. 
        This argument is intended to have the same value as the "overlap_param_gather" argument 
        in the "distributed_data_parallel_config.py" file. In the optimizer, this argument is 
        only used when "reuse_grad_buf_for_mxfp8_param_ag=True & fp8_param_gather=True".
    """

    overlap_param_gather_with_optimizer_step: bool = False
    """If true, overlap param all-gather of first bucket with optimizer step."""

    #######################
    # Optimizer Offload
    #######################

    optimizer_cpu_offload: bool = False
    """If True, offload optimizer states tensor and compute to CPU."""

    optimizer_offload_fraction: float = 0.0
    """Specifies the fraction of optimizer states to offload from GPU memory to CPU."""

    use_torch_optimizer_for_cpu_offload: bool = False
    """If True, use torch.optim.Optimizer for CPU offload."""

    overlap_cpu_optimizer_d2h_h2d: bool = False
    """
    When set to `True`, this flag enables overlapping of the CPU optimizer
    update process with the data transfer operations. This can help improve
    overall training efficiency by reducing idle time during data movement,
    allowing the optimizer to perform updates while gradients and parameters
    are being transferred between devices.
    """

    pin_cpu_grads: bool = True
    """If True, pin the optimizer gradients to CPU memory."""

    pin_cpu_params: bool = True
    """If True, pin the optimizer parameters to CPU memory."""

    ################
    # Miscellaneous
    ################
    clip_grad: float = 1.0
    """Gradient clipping based on global L2 norm."""

    log_num_zeros_in_grad: bool = False
    """If true, calculate and log the number of zeros in gradient."""

    barrier_with_L1_time: bool = False
    """If true, use barrier with level 1 time measurements."""

    timers: Optional[Callable] = None
    """Function to get timers."""

    config_logger_dir: str = ""
    """When non-empty, dumps entry-point configs to config_logger_dir"""

    def __post_init__(self):
        """Check the validity of the config."""

        # The following condition is used to avoid repetition in distrib_optimizer.py.
        # This is because in distrib_optimizer.py, the process to handle parameters are
        # different for different training precision settings. FP8 cases require different
        # handling while FP8 delayed scaling is an exception because the Adam optimizer in
        # TransformerEngine supports it in the kernel computation.
        # This is also the flag to determine the usage of param.grad or param.decoupled_grad
        self.use_precision_aware_optimizer_no_fp8_or_ds_fp8 = (
            self.use_precision_aware_optimizer
            and (
                self.main_params_dtype != torch.float32
                or (self.fp8_recipe is None or self.fp8_recipe == "delayed")
                or self.optimizer_cpu_offload
            )
        )

        if self.fp8_recipe == "mxfp8":
            if not self.reuse_grad_buf_for_mxfp8_param_ag:
                import warnings

                warnings.warn(
                    "mxfp8 without using reuse_grad_buf_for_mxfp8_param_ag and fp8_param_gather"
                    "will use significant amount additional GPU memory."
                    "Setting --reuse-grad-buf-for-mxfp8-param-ag and --fp8-param-gather is "
                    "recommended for mxfp8 training."
                )

        if self.use_precision_aware_optimizer:
            assert (
                self.optimizer == 'adam'
            ), '--use-precision-aware-optimizer only supported with adam'
            assert (
                self.use_distributed_optimizer
            ), '--use-precision-aware-optimizer only supported with distributed optimizer'

            if not is_te_min_version("2.1.0"):
                self.store_param_remainders = False

            # Only the FusedAdam in TE and HybridDeviceOptimizer supports
            # --use-precision-aware-optimizer.
            # TODO: Remove this check when apex's FusedAdam is no longer used.
            if self.optimizer_cpu_offload:
                return
            try:
                import inspect

                # TODO: Move this below?
                from transformer_engine.pytorch.optimizers import FusedAdam as Adam

                adam_args = inspect.signature(Adam).parameters
                arg_names = [
                    'master_weight_dtype',
                    'exp_avg_dtype',
                    'exp_avg_sq_dtype',
                    'use_decoupled_grad',
                ]
                for name in arg_names:
                    assert name in adam_args, (
                        "Current FusedAdam of TE doesn't support --use-precision-aware-optimizer, "
                        "please update TE version."
                    )
            except ImportError:
                raise RuntimeError(
                    '--use-precision-aware-optimizer requires FusedAdam from TransformerEngine, '
                    'but not found.'
                )
        else:
            assert (
                self.main_grads_dtype == torch.float32
            ), "main_grads_dtype can only be fp32 when not using precision-aware optimizer"
            assert (
                self.main_params_dtype == torch.float32
            ), "main_params_dtype can only be fp32 when not using precision-aware optimizer"
            assert (
                self.exp_avg_dtype == torch.float32
            ), "exp_avg_dtype can only be fp32 when not using precision-aware optimizer"
            assert (
                self.exp_avg_sq_dtype == torch.float32
            ), "exp_avg_sq_dtype can only be fp32 when not using precision-aware optimizer"


@dataclass
class AdamOptimizerConfig(OptimizerConfig):
    """Adam optimizer configuration object."""

    optimizer: str = 'adam'
    """Optimizer name."""

    adam_beta1: float = 0.9
    """First coefficient for computing running averages of gradient and its square in Adam
    optimizer.
    """

    adam_beta2: float = 0.999
    """Second coefficient for computing running averages of gradient and its square in Adam
    optimizer.
    """

    adam_eps: float = 1e-08
    """Term added to the denominator to improve numerical stability in Adam optimizer."""


@dataclass
class SGDOptimizerConfig(OptimizerConfig):
    """SGD optimizer configuration object."""

    optimizer: str = 'sgd'
    """Optimizer name."""

    sgd_momentum: float = 0.9
    """Momentum factor for SGD optimizer."""


================================================
FILE: megatron/core/optimizer/qk_clip.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import torch

from megatron.core import parallel_state


def clip_qk(model, log_max_only=False) -> float:
    """
    Clip the QK attention logits to the threshold, recommended for Muon optimizer.

    Args:
        model: The model to clip the QK attention logits, a list of model chunks.
        log_only: Whether to only log the max attention logit, without updating the weights.

    Returns:
        The maximum attention logit, a float.
    """

    with torch.no_grad():
        log_max_attention_logit = 0
        for model_chunk in model:
            for transformer_layer in model_chunk.module.module.decoder.layers:
                if hasattr(transformer_layer.self_attention, 'clip_qk'):
                    if (
                        transformer_layer.self_attention.core_attention.current_max_attn_logits
                        is None
                    ):
                        continue
                    torch.distributed.all_reduce(
                        transformer_layer.self_attention.core_attention.current_max_attn_logits,
                        op=torch.distributed.ReduceOp.MAX,
                        group=parallel_state.get_data_parallel_group(with_context_parallel=True),
                    )
                    log_max_attention_logit = max(
                        log_max_attention_logit,
                        torch.max(
                            transformer_layer.self_attention.core_attention.current_max_attn_logits
                        ).item(),
                    )
                    if not log_max_only:
                        transformer_layer.self_attention.clip_qk()

    return log_max_attention_logit


================================================
FILE: megatron/core/optimizer_param_scheduler.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""Learning rate decay and weight decay incr functions."""
import logging
import math
from typing import TYPE_CHECKING, Any, Optional, TypedDict

from megatron.core.utils import log_single_rank

if TYPE_CHECKING:
    # Avoid circular import.
    from megatron.core.optimizer import MegatronOptimizer

logger = logging.getLogger(__name__)


class ParamGroupOverride(TypedDict):
    """Override values for a parameter group. These values may be optimizer-state/scheduler related.

    These are the values you see later in param_group.get(...) calls in the
        OptimizerParamScheduler.get_lr and get_wd methods. If you use a custom optimizer
        or scheduler, you could override those variables instead.

    Example:
        >>> param_group_override = ParamGroupOverride(min_lr=1e-4, wd_mult=0.1)
        >>> param_group_override == ParamGroupOverride(newvar=3) # this is ok too

    """

    max_lr: float
    min_lr: float
    start_wd: float
    end_wd: float
    wd_mult: float


def get_canonical_lr_for_logging(param_groups: list[dict]) -> float | None:
    """Return the lr of the first ``default_config=True`` param group.

    All ``default_config`` groups share the same LR schedule, so the first one
    is representative.  This includes empty rank-alignment stub groups, which
    the scheduler still writes a valid lr onto.

    Args:
        param_groups (list[dict]): parameter groups from the optimizer.

    Returns:
        float | None: the canonical learning rate, or None if no
            ``default_config=True`` group is found.
    """
    for param_group in param_groups:
        if param_group.get('default_config', False):
            return param_group.get('lr')
    return None


def param_group_override_to_tuple(
    param_group_override: ParamGroupOverride | None,
) -> tuple[tuple[str, Any], ...] | None:
    """Convert a param group override to a tuple for use as a key in a dictionary.

    The tuple is sorted by the keys of the param group override to handle different orderings of
     the keys in different override dictionaries which still mean the same thing.
    """
    if param_group_override is None:
        return None
    return tuple(sorted(param_group_override.items()))


def combine_param_group_overrides(
    param_group_overrides: list[ParamGroupOverride | None],
) -> ParamGroupOverride:
    """Combine a list of param group overrides into a single param group override.

    This function ensures that the overrides are not conflicting as well.

    Args:
        param_group_overrides (list[ParamGroupOverride]): list of param group overrides to combine

    Returns:
        ParamGroupOverride: combined param group override
    """
    combined_override = ParamGroupOverride()
    for override in param_group_overrides:
        if override is None:
            continue
        for key, value in override.items():
            if key in combined_override:
                if combined_override[key] != value:
                    raise ValueError(
                        f"Conflicting overrides for {key}: {combined_override[key]} and {value}"
                    )
            combined_override[key] = value
    return combined_override


class OptimizerParamScheduler:
    """Anneals learning rate and weight decay

    Args:
        optimizer (MegatronOptimizer): the optimizer to be used
        init_lr (float): initial learning rate
        max_lr (float): maximum learning rate
        min_lr (float): minimum learning rate
        lr_warmup_steps (int): number of warmup steps
        lr_decay_steps (int): number of decay steps
        lr_decay_style (str): decay style for learning rate
        start_wd (float): initial weight decay
        end_wd (float): final weight decay
        wd_incr_steps (int): number of weight decay increment steps
        wd_incr_style (str): weight decay increment style
        use_checkpoint_opt_param_scheduler (bool, optional): whether to use the checkpoint values
            for the optimizer param scheduler
        override_opt_param_scheduler (bool, optional): whether to override the optimizer param
            scheduler values with the class values
        wsd_decay_steps (int, optional): number of weight decay decay steps
        lr_wsd_decay_style (str, optional): decay style for learning rate during weight decay decay
            steps

    """

    def __init__(
        self,
        optimizer: "MegatronOptimizer",
        init_lr: float,
        max_lr: float,
        min_lr: float,
        lr_warmup_steps: int,
        lr_decay_steps: int,
        lr_decay_style: str,
        start_wd: float,
        end_wd: float,
        wd_incr_steps: int,
        wd_incr_style: str,
        use_checkpoint_opt_param_scheduler: Optional[bool] = True,
        override_opt_param_scheduler: Optional[bool] = False,
        wsd_decay_steps: Optional[int] = None,
        lr_wsd_decay_style: Optional[str] = None,
    ) -> None:

        # Class values.
        self.optimizer = optimizer

        self.init_lr = init_lr
        self.max_lr = float(max_lr)
        self.min_lr = min_lr
        assert self.min_lr >= 0.0
        assert self.max_lr >= self.min_lr
        assert self.init_lr <= self.max_lr

        self.lr_warmup_steps = lr_warmup_steps
        self.num_steps = 0
        self.lr_decay_steps = lr_decay_steps
        self.wsd_decay_steps = wsd_decay_steps
        self.lr_wsd_decay_style = lr_wsd_decay_style
        assert self.lr_decay_steps > 0
        assert self.lr_warmup_steps < self.lr_decay_steps

        self.lr_decay_style = lr_decay_style
        if self.lr_decay_style == "WSD":
            assert self.wsd_decay_steps is not None

        self.start_wd = start_wd
        self.end_wd = end_wd
        assert self.start_wd >= 0.0
        assert self.end_wd >= self.start_wd
        self.wd_incr_steps = wd_incr_steps
        self.wd_incr_style = wd_incr_style

        self.override_opt_param_scheduler = override_opt_param_scheduler
        self.use_checkpoint_opt_param_scheduler = use_checkpoint_opt_param_scheduler
        if self.override_opt_param_scheduler:
            assert not self.use_checkpoint_opt_param_scheduler, (
                'both override and ' 'use-checkpoint are set.'
            )

        # Set the learning rate
        self.step(0)
        log_single_rank(logger, logging.INFO, f"> learning rate decay style: {self.lr_decay_style}")

    def get_wd(self, param_group: Optional[dict] = None) -> float:
        """Weight decay incr functions

        Args:
            param_group (dict): parameter group from the optimizer."""

        if param_group is not None:
            start_wd = param_group.get('start_wd', self.start_wd)
            end_wd = param_group.get('end_wd', self.end_wd)
        else:
            start_wd = self.start_wd
            end_wd = self.end_wd

        if self.num_steps > self.wd_incr_steps:
            return end_wd

        if self.wd_incr_style == 'constant':
            assert start_wd == end_wd
            return end_wd

        incr_ratio = float(self.num_steps) / float(self.wd_incr_steps)
        assert incr_ratio >= 0.0
        assert incr_ratio <= 1.0
        delta_wd = end_wd - start_wd

        if self.wd_incr_style == 'linear':
            coeff = incr_ratio
        elif self.wd_incr_style == 'cosine':
            coeff = 0.5 * (math.cos(math.pi * (1 - incr_ratio)) + 1.0)
        else:
            raise Exception(f'{self.wd_incr_style} weight decay increment style is not supported.')

        return start_wd + coeff * delta_wd

    def get_lr(self, param_group: dict) -> float:
        """Learning rate decay functions from:
        https://openreview.net/pdf?id=BJYwwY9ll pg. 4

        Args:
            param_group (dict): parameter group from the optimizer.
        """

        max_lr = param_group.get('max_lr', self.max_lr)
        min_lr = param_group.get('min_lr', self.min_lr)

        # Use linear warmup for the initial part.
        if self.lr_warmup_steps > 0 and self.num_steps <= self.lr_warmup_steps:
            return self.init_lr + (
                (max_lr - self.init_lr) * float(self.num_steps) / float(self.lr_warmup_steps)
            )

        # If the learning rate is constant, just return the initial value.
        if self.lr_decay_style == 'constant':
            return max_lr

        # For any steps larger than `self.lr_decay_steps`, use `min_lr`.
        if self.num_steps > self.lr_decay_steps:
            return min_lr

        # If we are done with the warmup period, use the decay style.
        if self.lr_decay_style == 'inverse-square-root':
            warmup_steps = max(self.lr_warmup_steps, 1)
            num_steps = max(self.num_steps, 1)
            lr = max_lr * warmup_steps**0.5 / (num_steps**0.5)
            return max(min_lr, lr)

        num_steps_ = self.num_steps - self.lr_warmup_steps
        decay_steps_ = self.lr_decay_steps - self.lr_warmup_steps
        decay_ratio = float(num_steps_) / float(decay_steps_)
        assert decay_ratio >= 0.0
        assert decay_ratio <= 1.0
        delta_lr = max_lr - min_lr

        coeff = None
        if self.lr_decay_style == 'linear':
            coeff = 1.0 - decay_ratio
        elif self.lr_decay_style == 'cosine':
            coeff = 0.5 * (math.cos(math.pi * decay_ratio) + 1.0)
        elif self.lr_decay_style == 'WSD':
            wsd_anneal_start_ = self.lr_decay_steps - self.wsd_decay_steps
            if self.num_steps <= wsd_anneal_start_:
                coeff = 1.0
            else:
                wsd_steps = self.num_steps - wsd_anneal_start_
                wsd_decay_ratio = float(wsd_steps) / float(self.wsd_decay_steps)
                if self.lr_wsd_decay_style == "linear":
                    coeff = 1.0 - wsd_decay_ratio
                elif self.lr_wsd_decay_style == "cosine":
                    coeff = 0.5 * (math.cos(math.pi * wsd_decay_ratio) + 1.0)
                elif self.lr_wsd_decay_style == "exponential":
                    coeff = (2.0 * math.pow(0.5, wsd_decay_ratio)) - 1.0
                elif self.lr_wsd_decay_style == "minus_sqrt":
                    coeff = 1.0 - math.sqrt(wsd_decay_ratio)

        else:
            raise Exception(f'{self.lr_decay_style} decay style is not supported.')
        assert coeff is not None

        return min_lr + coeff * delta_lr

    def step(self, increment: int) -> None:
        """Set lr for all parameters groups.

        Args:
            increment (int): number of steps to increment
        """
        self.num_steps += increment
        # Do not skip empty param groups: get_canonical_lr_for_logging reads lr
        # from default_config groups regardless of whether they hold parameters.
        # This is important for logging under model parallelism that may leave
        # some ranks with empty default_config parameter groups.
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = self.get_lr(param_group)
            param_group['weight_decay'] = self.get_wd(param_group) * param_group.get('wd_mult', 1.0)

    def state_dict(self) -> dict:
        """Return the state dict."""
        state_dict = {
            'max_lr': self.max_lr,
            'lr_warmup_steps': self.lr_warmup_steps,
            'num_steps': self.num_steps,
            'lr_decay_style': self.lr_decay_style,
            'lr_decay_steps': self.lr_decay_steps,
            'min_lr': self.min_lr,
            'start_wd': self.start_wd,
            'end_wd': self.end_wd,
            'wd_incr_style': self.wd_incr_style,
            'wd_incr_steps': self.wd_incr_steps,
        }
        return state_dict

    def _check_and_set(self, cls_value: float, sd_value: float, name: str) -> float:
        """Auxiliary function for checking the values in the checkpoint and
        setting them.

        Args:
            cls_value (float): class value
            sd_value (float): checkpoint value
            name (str): name of the parameter
        """

        if self.override_opt_param_scheduler:
            log_single_rank(logger, logging.INFO, f" > overriding {name} value to {cls_value}")
            return cls_value

        if not self.use_checkpoint_opt_param_scheduler:
            assert cls_value == sd_value, (
                f'OptimizerParamScheduler: class input value {cls_value} and checkpoint'
                f'value {sd_value} for {name} do not match'
            )

        log_single_rank(logger, logging.INFO, f" > using checkpoint value {sd_value} for {name}")
        return sd_value

    def load_state_dict(self, state_dict: dict) -> None:
        """Load the state dict.

        Args:
            state_dict (dict): state dict to be load
        """

        if 'start_lr' in state_dict:
            max_lr_ = state_dict['start_lr']
        else:
            max_lr_ = state_dict['max_lr']
        self.max_lr = self._check_and_set(self.max_lr, max_lr_, 'learning rate')

        self.min_lr = self._check_and_set(
            self.min_lr, state_dict['min_lr'], 'minimum learning rate'
        )

        if 'warmup_iter' in state_dict:
            lr_warmup_steps_ = state_dict['warmup_iter']
        elif 'warmup_steps' in state_dict:
            lr_warmup_steps_ = state_dict['warmup_steps']
        else:
            lr_warmup_steps_ = state_dict['lr_warmup_steps']
        self.lr_warmup_steps = self._check_and_set(
            self.lr_warmup_steps, lr_warmup_steps_, 'warmup iterations'
        )

        if 'end_iter' in state_dict:
            lr_decay_steps_ = state_dict['end_iter']
        elif 'decay_steps' in state_dict:
            lr_decay_steps_ = state_dict['decay_steps']
        else:
            lr_decay_steps_ = state_dict['lr_decay_steps']
        self.lr_decay_steps = self._check_and_set(
            self.lr_decay_steps, lr_decay_steps_, 'total number of iterations'
        )

        if 'decay_style' in state_dict:
            lr_decay_style_ = state_dict['decay_style']
        else:
            lr_decay_style_ = state_dict['lr_decay_style']
        self.lr_decay_style = self._check_and_set(
            self.lr_decay_style, lr_decay_style_, 'learning rate decay style'
        )

        if 'num_iters' in state_dict:
            num_steps = state_dict['num_iters']
        else:
            num_steps = state_dict['num_steps']
        self.step(increment=num_steps)

        if 'start_wd' in state_dict:
            self.start_wd = self._check_and_set(
                self.start_wd, state_dict['start_wd'], "start weight decay"
            )
            self.end_wd = self._check_and_set(self.end_wd, state_dict['end_wd'], "end weight decay")
            self.wd_incr_steps = self._check_and_set(
                self.wd_incr_steps,
                state_dict['wd_incr_steps'],
                "total number of weight decay iterations",
            )
            self.wd_incr_style = self._check_and_set(
                self.wd_incr_style, state_dict['wd_incr_style'], "weight decay incr style"
            )


================================================
FILE: megatron/core/package_info.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.


MAJOR = 0
MINOR = 16
PATCH = 0
PRE_RELEASE = 'rc0'

# Use the following formatting: (major, minor, patch, pre-release)
VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE)

__shortversion__ = '.'.join(map(str, VERSION[:3]))
__version__ = '.'.join(map(str, VERSION[:3])) + ''.join(VERSION[3:])

__package_name__ = 'megatron_core'
__contact_names__ = 'NVIDIA'
__contact_emails__ = 'nemo-toolkit@nvidia.com'  # use NeMo Email
__homepage__ = 'https://docs.nvidia.com/megatron-core/developer-guide/latest/user-guide/index.html'
__repository_url__ = 'https://github.com/NVIDIA/Megatron-LM/tree/main/megatron/core'
__download_url__ = 'https://github.com/NVIDIA/Megatron-LM/releases'
__description__ = (
    'Megatron Core - a library for efficient and scalable training of transformer based models'
)
__license__ = 'BSD-3'
__keywords__ = (
    'deep learning, machine learning, gpu, NLP, NLU, language, transformer, nvidia, pytorch, torch'
)


================================================
FILE: megatron/core/packed_seq_params.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from dataclasses import dataclass

import torch
import torch.distributed as dist
from torch import Tensor


@dataclass
class PackedSeqParams:
    '''
    parameters to TEDotProductAttention and fused rope kernels for the
    `thd` (packed) sequence format
    '''

    qkv_format: str = None
    cu_seqlens_q: Tensor = None
    cu_seqlens_kv: Tensor = None
    cu_seqlens_q_padded: Tensor = None
    cu_seqlens_kv_padded: Tensor = None
    max_seqlen_q: int = None
    max_seqlen_kv: int = None
    local_cp_size: int = None
    cp_group: dist.ProcessGroup = None
    total_tokens: int = None
    seq_idx: Tensor = None

    def __post_init__(self):
        """Pre-compute seq_idx for Mamba mixer CUDA graph compatibility.

        If total_tokens is 16 (for example), this method takes packed_seq_params.cu_seqlens_q_padded
        (or cu_seqlens_q) which is of the form [0, 5, 7, 11] and returns a tensor of the form
        [0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3],
        which is [0]*(5-0) + [1]*(7-5) + [2]*(11-7) + [3]*(16-11)
        In the above example, there are three sequences in the pack.
        In general, the output has an additional sequence index (e.g. 0, 1, 2, 3) so that any tokens
        beyond the last padded input sequence are accounted for as an extra sequence. However, If
        cu_seqlens_q_padded[-1] == max_seqlen then this additional sequence index will not be
        included.
        """
        cu_seqlens = (
            self.cu_seqlens_q_padded if self.cu_seqlens_q_padded is not None else self.cu_seqlens_q
        )
        if isinstance(cu_seqlens, Tensor) and self.total_tokens is not None:
            total_tokens_tensor = torch.tensor(
                [self.total_tokens], dtype=cu_seqlens.dtype, device=cu_seqlens.device
            )
            # Example: [0, 5, 7, 11] -> [0, 5, 7, 11, 16]
            cu_seqlens_with_max = torch.cat([cu_seqlens, total_tokens_tensor])
            # Example: [0, 5, 7, 11, 16] -> [5, 2, 4, 5]
            seq_lengths = cu_seqlens_with_max[1:] - cu_seqlens_with_max[:-1]
            # Clamp to non-negative: cu_seqlens_q_padded may not be strictly
            # monotonic when context parallelism slices sequences across ranks,
            # or when padded cumulative lengths exceed total_tokens (e.g. the
            # appended total_tokens sentinel is smaller than cu_seqlens[-1]
            # due to padding). In either case the diff can go negative, which
            # causes torch.repeat_interleave to fail.
            seq_lengths = seq_lengths.clamp(min=0)
            # Example: [5, 2, 4, 5] -> [0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3]
            self.seq_idx = (
                torch.repeat_interleave(
                    torch.arange(seq_lengths.numel(), device=cu_seqlens.device), seq_lengths
                )
                .to(torch.int32)
                .unsqueeze(0)  # Add a batch dimension
            )


================================================
FILE: megatron/core/parallel_state.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""Model and data parallel groups."""

import logging
import os
import warnings
from datetime import timedelta
from math import log2
from typing import Callable, List, Optional

import numpy as np
import torch

from megatron.core.inference.symmetric_memory import SymmetricMemoryManager

from .utils import GlobalMemoryBuffer, is_torch_min_version

logger = logging.getLogger(__name__)

try:
    import einops

    HAVE_EINOPS = True
except ImportError:
    HAVE_EINOPS = False

# Intra-layer model parallel group that the current rank belongs to.
_TENSOR_MODEL_PARALLEL_GROUP = None
# Inter-layer model parallel group that the current rank belongs to.
_PIPELINE_MODEL_PARALLEL_GROUP = None
# Model parallel group (both intra- and pipeline) that the current rank belongs to.
_MODEL_PARALLEL_GROUP = None
# Model parallel group (both intra-, pipeline, and expert) that the current rank belongs to.
# Embedding group.
_EMBEDDING_GROUP = None
# Position embedding group.
_POSITION_EMBEDDING_GROUP = None
# Data parallel group that the current rank belongs to.
_DATA_PARALLEL_GROUP = None
_DATA_PARALLEL_GROUP_GLOO = None
# tensor model parallel group and data parallel group combined
# used for fp8 and moe training
_TENSOR_AND_DATA_PARALLEL_GROUP = None

### Expert-related parallel states
# Naming convention:
# _EXPERT prefix in group name means it's used for expert layer in MoE models.
# _EXPERT_MODEL denotes expert parallelism which splits number of experts across the group.
# _EXPERT_TENSOR denotes tensor parallelism of expert which splits tensor across the group.
# _EXPERT_DATA denotes data parallelism of expert which replicates weight across the group.

# Expert model parallel group that current rank belongs to.
_EXPERT_MODEL_PARALLEL_GROUP = None
# Expert tensor parallel group that current rank belongs to.
_EXPERT_TENSOR_PARALLEL_GROUP = None
# Expert tensor and model combined parallel group
_EXPERT_TENSOR_AND_MODEL_PARALLEL_GROUP = None
# Expert tensor, model, pipeline combined parallel group
_EXPERT_TENSOR_MODEL_PIPELINE_PARALLEL_GROUP = None
# Expert data parallel group
_EXPERT_DATA_PARALLEL_GROUP = None
_EXPERT_DATA_PARALLEL_GROUP_GLOO = None
_INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP = None
_INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO = None
_INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP = None
# Parallel state values changed on the fly
_MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE = None
_MPU_EXPERT_MODEL_PARALLEL_RANK = None
_MPU_EXPERT_TENSOR_PARALLEL_WORLD_SIZE = None
_MPU_EXPERT_TENSOR_PARALLEL_RANK = None
### End of expert related parallel states

_VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK = None
_VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE = None

# These values enable us to change the mpu sizes on the fly.
_MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = None
_MPU_PIPELINE_MODEL_PARALLEL_WORLD_SIZE = None
_MPU_DATA_PARALLEL_WORLD_SIZE = None
_MPU_DATA_PARALLEL_RANK = None
_MPU_TENSOR_MODEL_PARALLEL_RANK = None
_MPU_PIPELINE_MODEL_PARALLEL_RANK = None

# A list of ranks that have a copy of the embedding.
_EMBEDDING_GLOBAL_RANKS = None

# A list of ranks that have a copy of the position embedding.
_POSITION_EMBEDDING_GLOBAL_RANKS = None

# A list of global ranks for each pipeline group to ease calculation of the source
# rank when broadcasting from the first or last pipeline stage.
_PIPELINE_GLOBAL_RANKS = None

# A list of global ranks for each data parallel group to ease calculation of the source
# rank when broadcasting weights from src to all other data parallel ranks
_DATA_PARALLEL_GLOBAL_RANKS = None

# A list of global ranks for each tensor model parallel group to ease calculation of
# the first local rank in the tensor model parallel group
_TENSOR_MODEL_PARALLEL_GLOBAL_RANKS = None

# A list of global ranks for each expert model parallel group to ease calculation of
# the first local rank in the expert model parallel group
_EXPERT_MODEL_PARALLEL_RANKS = None

# A list of global ranks for each model parallel group to ease calculation of
# the first local rank in the model parallel group
_MODEL_PARALLEL_GLOBAL_RANKS = None

# Context parallel group that the current rank belongs to
_CONTEXT_PARALLEL_GROUP = None
# A list of global ranks for each context parallel group to ease calculation of the
# destination rank when exchanging KV/dKV between context parallel_ranks
_CONTEXT_PARALLEL_GLOBAL_RANKS = None
# Hierarchical context parallel groups
_HIERARCHICAL_CONTEXT_PARALLEL_GROUPS = None
# Hybrid context parallel groups
_HYBRID_DP_CP_GROUPS = {}

# Data parallel group information with context parallel combined.
_DATA_PARALLEL_GROUP_WITH_CP = None
_DATA_PARALLEL_GROUP_WITH_CP_AG = None
_DATA_PARALLEL_GROUP_WITH_CP_GLOO = None
_DATA_PARALLEL_GLOBAL_RANKS_WITH_CP = None

# Partial Data parallel group information with context parallel combined.
_INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP = None
_INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_GLOO = None

# combined parallel group of TP and CP
_TENSOR_AND_CONTEXT_PARALLEL_GROUP = None

# combined parallel group of TP, DP, and CP used for fp8
_TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP = None

# Paralel group of all GPUs in a distributed optimizer instance
_INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP = None

# Memory buffers to avoid dynamic memory allocation
_GLOBAL_MEMORY_BUFFER = None


# List of all process groups
# Used for updating the timeout for all process groups
# None represents the default process group
_global_process_group_list = None


def get_nccl_options(pg_name, nccl_comm_cfgs):
    """Set the NCCL process group options.

    Args:
        pg_name (str): process group name
        nccl_comm_cfgs (dict): nccl communicator configurations
    When an option (e.g., max_ctas) is not found in the config, use the NCCL default setting.
    """
    if pg_name in nccl_comm_cfgs:
        # When fields in nccl_options.config are not specified, NCCL applies default settings.
        # The default values for Hopper GPUs are as follows:
        # cga_cluster_size = 4, max_ctas = 32, min_ctas = 1
        # Default values may differ between GPU generations and NCCL versions.
        nccl_options = torch.distributed.ProcessGroupNCCL.Options(
            is_high_priority_stream=nccl_comm_cfgs[pg_name].get("is_high_priority_stream", False)
        )
        if "cga_cluster_size" in nccl_comm_cfgs[pg_name]:
            nccl_options.config.cga_cluster_size = nccl_comm_cfgs[pg_name]["cga_cluster_size"]
        if "max_ctas" in nccl_comm_cfgs[pg_name]:
            nccl_options.config.max_ctas = nccl_comm_cfgs[pg_name]["max_ctas"]
        if "min_ctas" in nccl_comm_cfgs[pg_name]:
            nccl_options.config.min_ctas = nccl_comm_cfgs[pg_name]["min_ctas"]
        if "net_name" in nccl_comm_cfgs[pg_name]:
            nccl_options.config.net_name = nccl_comm_cfgs[pg_name]["net_name"]
            # verify net_name value
            if nccl_options.config.net_name.lower() not in ["ib", "socket"]:
                raise RuntimeError(
                    f"net_name ({nccl_options.config.net_name}) is not supported."
                    f"Accepted values: 'IB' or 'socket'."
                )
        return nccl_options
    else:
        return None


def update_pg_timeout(
    timeout: timedelta, pg: Optional[torch._C._distributed_c10d.ProcessGroup] = None
):
    """Update the timeout for all process groups or a specific process group.
       Synchronize the process groups before updating the timeout.
    Args:
        timeout(datetime.timedelta): The timeout to set for the process group(s)
        pg(Optional[torch._C._distributed_c10d.ProcessGroup], default=None):
            The process group to update the timeout for.
            If None, all process groups are updated.
    """
    if hasattr(torch.distributed.distributed_c10d, "_set_pg_timeout"):
        torch.distributed.barrier(pg)
        torch.cuda.synchronize()
        try:
            if pg is None:
                global _global_process_group_list
                for group in _global_process_group_list:
                    torch.distributed.distributed_c10d._set_pg_timeout(timeout, group)
            else:
                torch.distributed.distributed_c10d._set_pg_timeout(timeout, pg)
        except Exception as e:
            logger.error(f"Error updating pg timeout: {e}")
            logger.error(f"Process group: {pg}")
            logger.error(f"Timeout: {timeout}")
            logger.error(f"Global process group list: {_global_process_group_list}")
            raise e


def create_group(
    ranks=None,
    timeout=None,
    backend=None,
    pg_options=None,
    use_local_synchronization=False,
    group_desc=None,
):
    """Creates a ProcessGroup."""
    kwargs = {
        "ranks": ranks,
        "timeout": timeout,
        "backend": backend,
        "pg_options": pg_options,
        "use_local_synchronization": use_local_synchronization,
        "group_desc": group_desc,
    }
    if not is_torch_min_version("2.4.0"):
        kwargs.pop("group_desc")
        if timeout is None:
            # Old version (e.g. v2.1.2) sets default_pg_timeout as default value to timeout
            # in function signature, then check tiemout value type.
            # New version sets None as default value to timeout in function signature. If value
            # is None, torch will give value according to the backend, then check type.
            # So need to unset timeout here if caller doesn't set value. Otherwise there is
            # type error.
            kwargs.pop("timeout")
    group = torch.distributed.new_group(**kwargs)
    global _global_process_group_list
    if _global_process_group_list is None:
        # None stands for the default process group
        _global_process_group_list = [None]
    if torch.distributed.get_rank() in ranks:
        _global_process_group_list.append(group)
    return group


def generate_masked_orthogonal_rank_groups(
    world_size: int, parallel_size: List[int], mask: List[bool]
) -> List[List[int]]:
    r"""Generate orthogonal parallel groups based on the parallel size and mask.

    Arguments:
        world_size (int): world size

        parallel_size (List[int]):
            The parallel size of each orthogonal parallel type. For example, if
            tensor_parallel_size = 2, pipeline_model_parallel_group = 3, data_parallel_size = 4,
            and the parallel mapping order is tp-pp-dp, then the parallel_size = [2, 3, 4].

        mask (List[bool]):
            The mask controls which parallel methods the generated groups represent. If mask[i] is
            True, it means the generated group contains the i-th parallelism method. For example,
            if parallel_size = [tp_size, pp_size, dp_size], and mask = [True, False , True], then
            the generated group is the `tp-dp` group, if the mask = [False, True, False], then the
            generated group is the `pp` group.

    Algorithm:
        For orthogonal parallelism, such as tp/dp/pp/cp, the global_rank and
        local_rank satisfy the following equation:
            global_rank = tp_rank + dp_rank * tp_size + pp_rank * tp_size * dp_size (1)
                tp_rank \in [0, tp_size)
                dp_rank \in [0, dp_size)
                pp_rank \in [0, pp_size)

        If we want to get the `dp_group` (tp_size * pp_size groups of dp_size ranks each.
        For example,  if the gpu size is 8 and order is 'tp-pp-dp', size is '2-2-2', and the
        dp_group here is [[0, 4], [1, 5], [2, 6], [3, 7]].)
        The tp_rank and pp_rank will be combined to form the `dp_group_index`.
            dp_group_index = tp_rank + pp_rank * tp_size (2)

        So, Given that tp_rank and pp_rank satisfy equation (2), and dp_rank in
        range(0, dp_size), the ranks in dp_group[dp_group_index] satisfies the
        equation (1).

        This function solve this math problem.

    For example, if the parallel_size = [tp_size, dp_size, pp_size] = [2, 3, 4],
    and the mask = [False, True, False]. Then,
        dp_group_index(0) = tp_rank(0) + pp_rank(0) * 2
        dp_group_index(1) = tp_rank(1) + pp_rank(0) * 2
        ...
        dp_group_index(7) = tp_rank(1) + pp_rank(3) * 2

        dp_group[0] = 0 + range(0, 3) * 2 + 0 = [0, 2, 4]
        dp_group[1] = 1 + range(0, 3) * 2 + 0 = [1, 3, 5]
        ...
        dp_group[7] = 1 + range(0, 3) * 2 + 3 * 2 * 3 = [19, 21, 23]
    """

    def prefix_product(a: List[int], init=1) -> List[int]:
        r = [init]
        for v in a:
            init = init * v
            r.append(init)
        return r

    def inner_product(a: List[int], b: List[int]) -> int:
        return sum([x * y for x, y in zip(a, b)])

    def decompose(index, shape, stride=None):
        """
        This function solve the math problem below:
            There is an equation:
                index = sum(idx[i] * stride[i])
            And given the value of index, stride.
            Return the idx.
        This function will be used to get the pp/dp/pp_rank
        from group_index and rank_in_group.
        """
        if stride is None:
            stride = prefix_product(shape)
        idx = [(index // d) % s for s, d in zip(shape, stride)]
        # stride is a prefix_product result. And the value of stride[-1]
        # is not used.
        assert (
            sum([x * y for x, y in zip(idx, stride[:-1])]) == index
        ), "idx {} with shape {} mismatch the return idx {}".format(index, shape, idx)
        return idx

    masked_shape = [s for s, m in zip(parallel_size, mask) if m]
    unmasked_shape = [s for s, m in zip(parallel_size, mask) if not m]

    global_stride = prefix_product(parallel_size)
    masked_stride = [d for d, m in zip(global_stride, mask) if m]
    unmasked_stride = [d for d, m in zip(global_stride, mask) if not m]

    group_size = prefix_product(masked_shape)[-1]
    num_of_group = world_size // group_size

    ranks = []
    for group_index in range(num_of_group):
        # get indices from unmaksed for group_index.
        decomposed_group_idx = decompose(group_index, unmasked_shape)
        rank = []
        for rank_in_group in range(group_size):
            # get indices from masked for rank_in_group.
            decomposed_rank_idx = decompose(rank_in_group, masked_shape)
            rank.append(
                inner_product(decomposed_rank_idx, masked_stride)
                + inner_product(decomposed_group_idx, unmasked_stride)
            )
        ranks.append(rank)
    return ranks


def create_hierarchical_groups(
    rank,
    ranks,
    hierarchical_group_sizes,
    create_gloo_process_groups=False,
    pg_options=None,
    timeout=None,
    group_desc=None,
):
    """Create hierarchical groups for a set of ranks.
    Taking a group size of 16 as example, so we have a total of 16 GPUs denoted by g0 ... g15.
    If the hierarchical group sizes are [2,2,4], we use 2 GPUs in the first and second level
    of sub-groups, and 4 GPUs in the last level of sub groups. The present function will
    create 8 level-1 sub-groups, 8 level-2 sub-groups and 4 level-3 sub-groups as:
        8 level-1 sub-groups:
            [g0, g1], [g2, g3], [g4, g5], [g6, g7], [g8, g9], [g10, g11], [g12, g13], [g14, g15]
        8 level-2 sub-groups:
            [g0, g2], [g1, g3], [g4, g6], [g5, g7], [g8, g10], [g9, g11], [g12, g14], [g13, g15]
        4 level-3 sub-groups:
            [g0, g4, g8, g12], [g1, g5, g9, g13], [g2, g6, g10, g14], [g3, g7, g11, g15]
    """

    if not HAVE_EINOPS:
        raise ImportError("einops is not installed. Please install it with `pip install einops`.")

    hierarchical_groups = []
    hierarchical_groups_gloo = []
    if not isinstance(pg_options, list):
        pg_options = [pg_options] * len(hierarchical_group_sizes)
    for level in range(len(hierarchical_group_sizes)):
        rearranged_ranks = einops.rearrange(
            np.array(ranks),
            "(l s u) -> (l u) s",
            u=int(np.prod(hierarchical_group_sizes[:level])),
            s=hierarchical_group_sizes[level],
            l=int(np.prod(hierarchical_group_sizes[level + 1 :])),
        ).tolist()
        for sub_ranks in rearranged_ranks:
            sub_group = create_group(
                sub_ranks,
                timeout=timeout,
                pg_options=pg_options[level],
                group_desc=f"HIERARCHICAL_{group_desc}_L{level}",
            )
            if create_gloo_process_groups:
                sub_group_gloo = create_group(
                    sub_ranks,
                    timeout=timeout,
                    backend="gloo",
                    pg_options=pg_options[level],
                    group_desc=f"HIERARCHICAL_{group_desc}_GLOO_L{level}",
                )
            else:
                sub_group_gloo = None
            if rank in sub_ranks:
                hierarchical_groups.append(sub_group)
                hierarchical_groups_gloo.append(sub_group_gloo)
    assert rank not in ranks or len(hierarchical_groups) == len(hierarchical_group_sizes)
    assert rank not in ranks or len(hierarchical_groups_gloo) == len(hierarchical_group_sizes)
    return hierarchical_groups, hierarchical_groups_gloo


def create_hybrid_dp_cp_groups(rank, ranks, pg_options):
    """
    Creates groups required for hybrid DPxCP.
    Creates a new group for every power of 2 up to the number of DPxCP ranks.
    Returns a dictionary indexed by group size.
    """
    hybrid_dp_cp_groups = {}
    # Generate group for every power of 2 up to the number of CP ranks
    # We limit the allowed group sizes in order to avoid excessive overhead.
    group_sizes = [2**i for i in range(int(log2(len(ranks))))][1:]
    for group_size in group_sizes:
        for i in range(0, len(ranks), group_size):
            group = create_group(
                ranks[i : i + group_size],
                pg_options=pg_options,
                group_desc=f"HYBRID_DP_CP_GROUP_{group_size}",
            )
            if rank in ranks[i : i + group_size]:
                assert (
                    group_size not in hybrid_dp_cp_groups
                ), f"Rank {rank} appears in multiple Hybrid DP CP groups of size {group_size}"
                hybrid_dp_cp_groups[group_size] = group
    return hybrid_dp_cp_groups


class RankGenerator(object):
    """A class for generating rank groups for different modes of parallelism."""

    def __init__(
        self, tp: int, ep: int, dp: int, pp: int, cp: int, order: str, rank_offset: int = 0
    ) -> None:
        assert (
            ep == 1 or cp == 1
        ), "Both EP and CP > 1 in not allow in one rank generator. \
            CP is only included in default RankGenerator, and EP only in expert RankGenerator."

        self.tp = tp
        self.ep = ep
        self.dp = dp
        self.pp = pp
        self.cp = cp
        self.rank_offset = rank_offset
        self.world_size = tp * dp * pp * cp * ep

        self.name_to_size = {
            "tp": self.tp,
            "pp": self.pp,
            "dp": self.dp,
            "ep": self.ep,
            "cp": self.cp,
        }
        self.order = order
        order = order.lower()

        for name in self.name_to_size.keys():
            if name not in order and self.name_to_size[name] != 1:
                raise RuntimeError(
                    f"The size of ({name}) is ({self.name_to_size[name]}), but you haven't"
                    f"specified the order ({self.order})."
                )
            elif name not in order:
                order = order + "-" + name

        self.order = order
        self.ordered_size = []

        for token in order.split("-"):
            self.ordered_size.append(self.name_to_size[token])

    def get_mask(self, order: str, token: str):
        """Create a mask for the specified tokens based on the given order.

        Args:
            order (str): The order of parallelism types (e.g., 'tp-dp-pp').
            token (str): The specific parallelism types to include in the mask,
                         separated by hyphens (e.g., 'tp-dp').
        """
        ordered_token = order.split("-")
        token_list = token.split("-")
        mask = [False] * len(ordered_token)
        for t in token_list:
            mask[ordered_token.index(t)] = True
        return mask

    def get_ranks(self, token):
        """Get rank group by input token.

        Args:
            token (str):
                Specify the ranks type that want to get. If we want
                to obtain multiple parallel types, we can use a hyphen
                '-' to separate them. For example, if we want to obtain
                the TP_DP group, the token should be 'tp-dp'.
        """
        mask = self.get_mask(self.order, token)
        ranks = generate_masked_orthogonal_rank_groups(self.world_size, self.ordered_size, mask)
        if self.rank_offset > 0:
            for rank_group in ranks:
                for i in range(len(rank_group)):
                    rank_group[i] += self.rank_offset
        return ranks


def default_embedding_ranks(pp_ranks):
    """Return the default ranks that constitute the stages on which the word embeddings live.
    For most models, these are the first and last pipeline stages."""
    if len(pp_ranks) == 1:
        return [pp_ranks[0]]
    else:
        return [pp_ranks[0], pp_ranks[-1]]


def default_position_embedding_ranks(pp_ranks):
    """Return the default ranks that constitute the stages on which the position embeddings live.
    For most models, this is only the first pipeline stage."""
    return [pp_ranks[0]]


def overwrite_nccl_comm_cfgs(nccl_comm_cfgs, pg_name, key_value_pair):
    """Overwrite the nccl_comm_cfgs for the given pg_name with the given key_value_pair."""
    if pg_name not in nccl_comm_cfgs:
        nccl_comm_cfgs[pg_name] = {}
    nccl_comm_cfgs[pg_name][key_value_pair[0]] = key_value_pair[1]


# pylint: disable=C0301
def initialize_model_parallel(
    tensor_model_parallel_size: int = 1,
    pipeline_model_parallel_size: int = 1,
    virtual_pipeline_model_parallel_size: Optional[int] = None,
    pipeline_model_parallel_comm_backend: Optional[str] = None,
    use_sharp: bool = False,
    context_parallel_size: int = 1,
    hierarchical_context_parallel_sizes: Optional[List[int]] = None,
    hybrid_context_parallel: bool = False,
    expert_model_parallel_size: int = 1,
    num_distributed_optimizer_instances: int = 1,
    expert_tensor_parallel_size: Optional[int] = None,
    nccl_communicator_config_path: Optional[str] = None,
    distributed_timeout_minutes: int = 30,
    order: str = "tp-cp-ep-dp-pp",
    get_embedding_ranks: Optional[Callable[[List[int], Optional[int]], List[int]]] = None,
    get_position_embedding_ranks: Optional[Callable[[List[int], Optional[int]], List[int]]] = None,
    create_gloo_process_groups: bool = True,
    high_priority_stream_groups: Optional[List[str]] = None,
    sharp_enabled_group: Optional[str] = None,
    create_all_gather_group: Optional[bool] = False,
    rank_offset: int = 0,
    local_world_size: Optional[int] = None,
) -> None:
    """Initialize model data parallel groups.

    Args:
        tensor_model_parallel_size (int, default = 1):
            The number of GPUs to split individual tensors across.

        pipeline_model_parallel_size (int, default = 1):
            The number of tensor parallel GPU groups to split the
            Transformer layers across. For example, if
            tensor_model_parallel_size is 4 and
            pipeline_model_parallel_size is 2, the model will be split
            into 2 groups of 4 GPUs.

        virtual_pipeline_model_parallel_size (int, optional):
            The number of stages that each pipeline group will have,
            interleaving as necessary. If None, no interleaving is
            performed. For example, if tensor_model_parallel_size is 1,
            pipeline_model_parallel_size is 4,
            virtual_pipeline_model_parallel_size is 2, and there are
            16 transformer layers in the model, the model will be
            split into 8 stages with two layers each and each GPU
            would get 2 stages as such (layer number starting with 1):

            GPU 0: [1, 2] [9, 10]
            GPU 1: [3, 4] [11, 12]
            GPU 2: [5, 6] [13, 14]
            GPU 3: [7, 8] [15, 16]

        pipeline_model_parallel_comm_backend (str, optional):
            The backend to use for pipeline parallel communication.
            If None, the default backend will be used.

        use_sharp (bool, default = False):
            Set the use of SHARP for the collective communications of
            data-parallel process groups. When `True`, run barrier
            within each data-parallel process group, which specifies
            the SHARP application target groups.

        context_parallel_size (int, default = 1):
            The number of tensor parallel GPU groups to split the
            network input sequence length across. Compute of attention
            module requires tokens of full sequence length, so GPUs
            in a context parallel group need to communicate with each
            other to exchange information of other sequence chunks.
            Each GPU and its counterparts in other tensor parallel
            groups compose a context parallel group.

            For example, assume we have 8 GPUs, if tensor model parallel
            size is 4 and context parallel size is 2, the network input
            will be split into two sequence chunks, which are processed
            by 2 different groups of 4 GPUs. One chunk is processed by
            GPU0-3, the other chunk is processed by GPU4-7. Four groups
            are build to do context parallel communications: [GPU0, GPU4],
            [GPU1, GPU5], [GPU2, GPU6], and [GPU3, GPU7].

            Context parallelism partitions sequence length, so it has no
            impact on weights, which means weights are duplicated among
            GPUs in a context parallel group. Hence, weight gradients
            all-reduce is required in backward. For simplicity, we piggyback
            GPUs of context parallelism on data parallel group for
            weight gradient all-reduce.

        expert_model_parallel_size (int, default = 1):
            The number of Mixture of Experts parallel GPUs in each expert
            parallel group.

        num_distributed_optimizer_instances (int, default = 1):
            The number of distributed optimizer replicas across the data-
            parallel domain.

        expert_tensor_parallel_size (int, default = tp_size):
            The number of GPUs to split individual tensors of expert.

        nccl_communicator_config_path (str, default = None):
            Path to the yaml file of NCCL communicator configurations.
            `min_ctas`, `max_ctas`, and `cga_cluster_size` can be set
            for each communicator.

        distributed_timeout_minutes (int, default = 30): Timeout, in
            minutes,for operations executed against distributed
            process groups. See PyTorch documentation at
            https://pytorch.org/docs/stable/distributed.html for
            caveats.

        order (str, default=tp-dp-pp):
            The rank initialization order of parallelism. Now we support
            tp-dp-pp and tp-pp-dp orders.

        get_embedding_ranks (Callable[[List[int], Optional[int]], List[int]], optional, default=None):
            A function that takes in a list of ranks for a pipeline group and returns
            those ranks that should have embeddings.

        get_position_embedding_ranks (Callable[[List[int], Optional[int]], List[int]], optional, default=None):
            A function that takes in a list of ranks for a pipeline group, and returns
            those ranks that should have position embeddings.

        create_gloo_process_groups (bool, default = True):
            Create Gloo process groups if set to True. If set to False, Gloo process groups are
            not created and calls to get Gloo process groups will result in assertion errors.

        high_priority_stream_groups (List[str], default = None):
            Specify which communicator groups should use high priority streams during creation.
            Assigning high priority to communication streams ensures that communication kernels
            are scheduled with higher priority, minimizing the exposed communication when it is
            overlapped with other computation kernels.
            Example: initialize_parallel_groups(..., high_priority_stream_groups=['dp_cp','ep_dp'])

        sharp_enabled_group (str, default = None):
            Specify which communicator group should use SHARP communication.
            This option is only valid when use_sharp is True.
            By default (None), it is enabled from dp group.
            Available options (choose one): [dp, dp_replica]

        create_all_gather_group (bool, default = False):
            Create a separate process group for all-gather operations to avoid
            head-of-line blocking with reduce-scatter operations. When enabled,
            creates an additional NCCL communicator with identical ranks as the
            dp-cp group but with independent progress engines for better communication
            overlap.

    Let's say we have a total of 16 GPUs denoted by g0 ... g15 and we
    use 2 GPUs to parallelize the model tensor, and 4 GPUs to parallelize
    the model pipeline. The present function will
    create 8 tensor model-parallel groups, 4 pipeline model-parallel groups
    and 8 data-parallel groups as:
        8 data_parallel groups:
            [g0, g2], [g1, g3], [g4, g6], [g5, g7], [g8, g10], [g9, g11], [g12, g14], [g13, g15]
        8 tensor model-parallel groups:
            [g0, g1], [g2, g3], [g4, g5], [g6, g7], [g8, g9], [g10, g11], [g12, g13], [g14, g15]
        4 pipeline model-parallel groups:
            [g0, g4, g8, g12], [g1, g5, g9, g13], [g2, g6, g10, g14], [g3, g7, g11, g15]
    Note that for efficiency, the caller should make sure adjacent ranks
    are on the same DGX box. For example if we are using 2 DGX-1 boxes
    with a total of 16 GPUs, rank 0 to 7 belong to the first box and
    ranks 8 to 15 belong to the second box.
    """
    # NCCL restricts IB SHARP usage to a single communicator group—the first one created
    # with NCCL_COLLNET_ENABLE=1. After this group is created, NCCL_COLLNET_ENABLE must be
    # set to 0 for subsequent groups.
    if "NCCL_COLLNET_ENABLE" in os.environ:
        del os.environ["NCCL_COLLNET_ENABLE"]

    if use_sharp:
        if sharp_enabled_group is None:
            # By default, SHARP is enabled from dp group.
            sharp_enabled_group = "dp"
        else:
            # Currently, only dp and dp_replica groups are supported for SHARP.
            assert sharp_enabled_group in ["dp", "dp_replica"], "Invalid sharp_enabled_group"
            if sharp_enabled_group == "dp_replica":
                assert (
                    num_distributed_optimizer_instances > 1
                ), "dp_replica group requires num_distributed_optimizer_instances > 1"
    else:
        assert (
            sharp_enabled_group is None
        ), "sharp_enabled_group is only valid when use_sharp is True"

    if get_embedding_ranks is None:
        get_embedding_ranks = default_embedding_ranks

    if get_position_embedding_ranks is None:
        get_position_embedding_ranks = default_position_embedding_ranks

    # Get world size and rank. Ensure some consistencies.
    assert torch.distributed.is_initialized()
    world_size: int = (
        local_world_size if local_world_size is not None else torch.distributed.get_world_size()
    )

    model_size = tensor_model_parallel_size * pipeline_model_parallel_size * context_parallel_size

    if world_size % model_size != 0:
        raise RuntimeError(f"world_size ({world_size}) is not divisible by {model_size}")

    data_parallel_size: int = world_size // model_size

    if virtual_pipeline_model_parallel_size is not None:
        if not pipeline_model_parallel_size > 1:
            raise RuntimeError(
                "pipeline-model-parallel size should be greater than 1 with interleaved schedule"
            )
        global _VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK
        global _VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE
        _VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK = 0
        _VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE = virtual_pipeline_model_parallel_size

    rank = torch.distributed.get_rank()

    nccl_comm_cfgs = {}
    if nccl_communicator_config_path is not None:
        try:
            import yaml
        except ImportError:
            raise RuntimeError(
                "Cannot import `yaml`. Setting custom nccl communicator configs "
                "requires the yaml package."
            )

        with open(nccl_communicator_config_path, "r") as stream:
            nccl_comm_cfgs = yaml.safe_load(stream)

    # Set is_high_priority_stream flag to the nccl_comm_cfgs if it is in high_priority_stream_groups
    high_priority_stream_groups = high_priority_stream_groups or []
    for pg_name in high_priority_stream_groups:
        overwrite_nccl_comm_cfgs(nccl_comm_cfgs, pg_name, ("is_high_priority_stream", True))

    decoder_rank_generator = RankGenerator(
        tp=tensor_model_parallel_size,
        ep=1,
        dp=data_parallel_size,
        pp=pipeline_model_parallel_size,
        cp=context_parallel_size,
        order=order,
        rank_offset=rank_offset,
    )

    # Build expert rank generator
    if expert_tensor_parallel_size is None:
        expert_tensor_parallel_size = tensor_model_parallel_size
    expert_tensor_model_pipeline_parallel_size = (
        expert_tensor_parallel_size * expert_model_parallel_size * pipeline_model_parallel_size
    )
    expert_data_parallel_size = world_size // expert_tensor_model_pipeline_parallel_size
    if world_size % expert_tensor_model_pipeline_parallel_size != 0:
        raise RuntimeError(
            f"world_size ({world_size}) is not divisible by expert_tensor_model_pipeline_parallel size ({expert_tensor_model_pipeline_parallel_size})"
        )

    # TODO: support expert specific ordering
    expert_decoder_rank_generator = RankGenerator(
        tp=expert_tensor_parallel_size,
        ep=expert_model_parallel_size,
        dp=expert_data_parallel_size,
        pp=pipeline_model_parallel_size,
        cp=1,
        order=order,
        rank_offset=rank_offset,
    )

    assert (
        order.endswith("pp")
        or pipeline_model_parallel_size == 1
        or expert_data_parallel_size == data_parallel_size
    ), "When not using pp-last rank ordering, the data parallel size of the attention and moe layers must be the same"

    assert decoder_rank_generator.get_ranks("pp") == expert_decoder_rank_generator.get_ranks(
        "pp"
    ), f"Pipeline parallel groups are expected to be the same for Non-Expert and Expert part, \
    but got {decoder_rank_generator.get_ranks('pp')} and {expert_decoder_rank_generator.get_ranks('pp')}"

    timeout = timedelta(minutes=distributed_timeout_minutes)

    # Build the data-parallel groups.
    global _DATA_PARALLEL_GROUP
    global _DATA_PARALLEL_GROUP_GLOO
    global _DATA_PARALLEL_GLOBAL_RANKS
    global _DATA_PARALLEL_GROUP_WITH_CP
    global _DATA_PARALLEL_GROUP_WITH_CP_AG
    global _DATA_PARALLEL_GROUP_WITH_CP_GLOO
    global _DATA_PARALLEL_GLOBAL_RANKS_WITH_CP
    global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP
    global _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_GLOO
    assert _DATA_PARALLEL_GROUP is None, "data parallel group is already initialized"

    assert (
        data_parallel_size * context_parallel_size
    ) % num_distributed_optimizer_instances == 0, (
        "Data parallel size should be divisible by partial DistOpt shard factor"
    )
    intra_partial_data_parallel_size = (
        data_parallel_size * context_parallel_size
    ) // num_distributed_optimizer_instances

    # Set NCCL_COLLNET_ENABLE to 1 to enable SHARP for the dp group.
    if sharp_enabled_group == "dp":
        os.environ["NCCL_COLLNET_ENABLE"] = "1"

    # In case of using SHARP, the dp-cp group requires to use NCCL COLLNET feature.
    # Due to the hardware limitation, only the initially created communication group
    # is eligible for using the NCCL COLLNET feature.
    # Therefore, dp-cp group, which potentially requires SHARP-enablement,
    # need to be created before all the other groups
    for ranks_with_cp in decoder_rank_generator.get_ranks('dp-cp'):
        group_with_cp = create_group(
            ranks_with_cp,
            timeout=timeout,
            pg_options=get_nccl_options("dp_cp", nccl_comm_cfgs),
            group_desc="DATA_PARALLEL_GROUP_WITH_CP",
        )
        if create_all_gather_group:
            group_with_cp_ag = create_group(
                ranks_with_cp,
                timeout=timeout,
                pg_options=get_nccl_options("dp_cp", nccl_comm_cfgs),
                group_desc="DATA_PARALLEL_GROUP_WITH_CP_AG",
            )
        else:
            group_with_cp_ag = None
        if create_gloo_process_groups:
            group_with_cp_gloo = create_group(
                ranks_with_cp,
                timeout=timeout,
                backend="gloo",
                group_desc="DATA_PARALLEL_GROUP_WITH_CP_GLOO",
            )
        else:
            group_with_cp_gloo = None
        if rank in ranks_with_cp:
            _DATA_PARALLEL_GROUP_WITH_CP = group_with_cp
            _DATA_PARALLEL_GROUP_WITH_CP_AG = group_with_cp_ag
            _DATA_PARALLEL_GROUP_WITH_CP_GLOO = group_with_cp_gloo
            _DATA_PARALLEL_GLOBAL_RANKS_WITH_CP = ranks_with_cp

        if num_distributed_optimizer_instances > 1:
            # Create groups for intra-partial DP domain
            for i in range(num_distributed_optimizer_instances):
                intra_partial_dp_ranks_with_cp = ranks_with_cp[
                    (i * intra_partial_data_parallel_size) : (
                        (i + 1) * intra_partial_data_parallel_size
                    )
                ]
                intra_partial_dp_group_with_cp = create_group(
                    intra_partial_dp_ranks_with_cp,
                    timeout=timeout,
                    pg_options=get_nccl_options("intra_dp_cp", nccl_comm_cfgs),
                    group_desc="INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP",
                )
                if create_gloo_process_groups:
                    intra_partial_dp_group_with_cp_gloo = create_group(
                        intra_partial_dp_ranks_with_cp,
                        timeout=timeout,
                        backend="gloo",
                        group_desc="INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_GLOO",
                    )
                else:
                    intra_partial_dp_group_with_cp_gloo = None
                if rank in intra_partial_dp_ranks_with_cp:
                    _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP = intra_partial_dp_group_with_cp
                    _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_GLOO = (
                        intra_partial_dp_group_with_cp_gloo
                    )
        else:
            _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP = _DATA_PARALLEL_GROUP_WITH_CP
            _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_GLOO = _DATA_PARALLEL_GROUP_WITH_CP_GLOO

    # Apply SHARP to the dp group.
    if sharp_enabled_group == "dp":
        if rank == 0:
            logger.info(
                "The number of process groups to use SHARP with depends on the type "
                "of the network switch. Nvidia QM1 switch supports SAHRP up to 8 "
                "process groups and QM2 supports up to 256 process groups. We apply "
                "SHARP to the communications of the data-parallel domain. If the "
                "number of data-parallel process groups is larger than the max "
                "process groups that the network switch supports, the communication "
                "will fall back to non-SHARP operators. To enable SHARP, "
                "`#SBATCH_NETWORK=sharp` should be set in the sbatch script."
            )
        # PyTorch is performing lazy initialization of the communicator group.
        # Therefore, we need to perform a nccl call to ensure that the communicator group is created.
        torch.distributed.barrier(
            group=get_data_parallel_group(with_context_parallel=True),
            device_ids=[torch.cuda.current_device()],
        )
        torch.cuda.synchronize()
        # Set `NCCL_COLLNET_ENABLE=0` to restrict SHARP application to the dp group.
        if "NCCL_COLLNET_ENABLE" in os.environ:
            del os.environ["NCCL_COLLNET_ENABLE"]

    if hybrid_context_parallel:
        global _HYBRID_DP_CP_GROUPS
        for ranks_with_cp in decoder_rank_generator.get_ranks('dp-cp'):
            assert (
                len(ranks_with_cp) % 2 == 0
            ), "Hybrid context parallel requires an even number of ranks"
            _HYBRID_DP_CP_GROUPS.update(
                create_hybrid_dp_cp_groups(
                    rank, ranks_with_cp, get_nccl_options("dp_cp", nccl_comm_cfgs)
                )
            )
        # TODO: Are gloo groups needed for hybrid cp?

    for ranks in decoder_rank_generator.get_ranks('dp'):
        group = create_group(
            ranks,
            timeout=timeout,
            pg_options=get_nccl_options("dp", nccl_comm_cfgs),
            group_desc="DATA_PARALLEL_GROUP",
        )
        if create_gloo_process_groups:
            group_gloo = create_group(
                ranks, timeout=timeout, backend="gloo", group_desc="DATA_PARALLEL_GROUP_GLOO"
            )
        else:
            group_gloo = None
        if rank in ranks:
            _DATA_PARALLEL_GROUP = group
            _DATA_PARALLEL_GROUP_GLOO = group_gloo
            _DATA_PARALLEL_GLOBAL_RANKS = ranks

    # Build the context-parallel groups.
    global _CONTEXT_PARALLEL_GROUP
    global _CONTEXT_PARALLEL_GLOBAL_RANKS
    assert _CONTEXT_PARALLEL_GROUP is None, 'context parallel group is already initialized'
    for ranks in decoder_rank_generator.get_ranks('cp'):
        group = create_group(
            ranks,
            timeout=timeout,
            pg_options=get_nccl_options("cp", nccl_comm_cfgs),
            group_desc="CONTEXT_PARALLEL_GROUP",
        )
        if rank in ranks:
            _CONTEXT_PARALLEL_GROUP = group
            _CONTEXT_PARALLEL_GLOBAL_RANKS = ranks
        if hierarchical_context_parallel_sizes:
            assert np.prod(hierarchical_context_parallel_sizes) == context_parallel_size
            global _HIERARCHICAL_CONTEXT_PARALLEL_GROUPS
            hierarchical_groups, _ = create_hierarchical_groups(
                rank,
                ranks,
                hierarchical_context_parallel_sizes,
                create_gloo_process_groups=False,
                pg_options=get_nccl_options("hcp", nccl_comm_cfgs),
                timeout=timeout,
                group_desc="CONTEXT_PARALLEL_GROUP",
            )
            if rank in ranks:
                _HIERARCHICAL_CONTEXT_PARALLEL_GROUPS = hierarchical_groups

    # Build the model-parallel groups.
    global _MODEL_PARALLEL_GROUP
    global _MODEL_PARALLEL_GLOBAL_RANKS
    assert _MODEL_PARALLEL_GROUP is None, 'model parallel group is already initialized'
    for ranks in decoder_rank_generator.get_ranks('tp-pp'):
        group = create_group(
            ranks,
            timeout=timeout,
            pg_options=get_nccl_options("mp", nccl_comm_cfgs),
            group_desc="MODEL_PARALLEL_GROUP",
        )
        if rank in ranks:
            _MODEL_PARALLEL_GROUP = group
            _MODEL_PARALLEL_GLOBAL_RANKS = ranks

    # Build the tensor model-parallel groups.
    global _TENSOR_MODEL_PARALLEL_GROUP
    global _TENSOR_MODEL_PARALLEL_GLOBAL_RANKS
    assert (
        _TENSOR_MODEL_PARALLEL_GROUP is None
    ), 'tensor model parallel group is already initialized'
    for ranks in decoder_rank_generator.get_ranks('tp'):
        group = create_group(
            ranks,
            timeout=timeout,
            pg_options=get_nccl_options("tp", nccl_comm_cfgs),
            group_desc="TENSOR_MODEL_PARALLEL_GROUP",
        )
        if rank in ranks:
            _TENSOR_MODEL_PARALLEL_GROUP = group
            _TENSOR_MODEL_PARALLEL_GLOBAL_RANKS = ranks

    # Build the pipeline model-parallel groups and embedding groups
    # (first and last rank in each pipeline model-parallel group).
    global _PIPELINE_MODEL_PARALLEL_GROUP
    global _PIPELINE_GLOBAL_RANKS
    assert (
        _PIPELINE_MODEL_PARALLEL_GROUP is None
    ), "pipeline model parallel group is already initialized"
    global _EMBEDDING_GROUP
    global _EMBEDDING_GLOBAL_RANKS
    assert _EMBEDDING_GROUP is None, "embedding group is already initialized"
    global _POSITION_EMBEDDING_GROUP
    global _POSITION_EMBEDDING_GLOBAL_RANKS
    assert _POSITION_EMBEDDING_GROUP is None, "position embedding group is already initialized"
    if pipeline_model_parallel_comm_backend == "ucc":
        # The UCC backend provides two key benefits:
        # 1) Achieves better bandwidth utilization than NCCL when using InfiniBand links.
        # 2) Does not use GPU SM resources (Zero-SM), mitigating performance interference
        #    with overlapping compute kernels.

        # The UCC backend is recommended in the following cases:
        # 1) When the exposed pipeline-parallel (PP) communications are significant.
        #    - E.g., Pipeline parallelism with very less gradient accumulation steps.
        #    - It may provide better performance due to improved bandwidth utilization.
        # 2) When the critical-path pipeline stage has substantial PP-communication overlap.
        #    - E.g., Uneven pipeline parallelism.
        #    - It may provide better performance due to zero SM resource usage.
        if "CUDA_DEVICE_MAX_CONNECTIONS" in os.environ:
            # UCC backend requires CUDA_DEVICE_MAX_CONNECTIONS variable to be larger than 1,
            # to gurantee the overlapped UCC communications. If this environment variable is set to 1,
            # all the UCC communication will be serialized.
            assert (
                os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] != "1"
            ), "UCC-backend requires CUDA_DEVICE_MAX_CONNECTIONS > 1"

        # Setting up required environment variables for ucc backend
        #
        # "TORCH_UCC_BLOCKING_WAIT=none" allows non-blocking waits of the communiction handle
        # "UCC_EC_CUDA_STREAM_TASK_MODE" controls how CUDA execution engines (EC)
        # schedule tasks on CUDA streams.
        # "UCX_TLS" controls transport layer selection
        # "NSYS_UCP_COMM_PARAMS=1" enables capturing ucx tracing in nsys profiling
        # "UCX_RNDV_THRESH" controls threshold threshold for switching between
        # eager and rendezvous (RNDV) communication protocols.
        # "UCX_NET_DEVICES" select which network interfaces UCX should use.
        # "UCC_CL_BASIC_TLS" controls which Transport Layers are used by
        # the Basic Collective libraray

        os.environ["TORCH_UCC_BLOCKING_WAIT"] = (
            os.environ["TORCH_UCC_BLOCKING_WAIT"]
            if "TORCH_UCC_BLOCKING_WAIT" in os.environ
            else "none"
        )
        os.environ["UCC_EC_CUDA_STREAM_TASK_MODE"] = (
            os.environ["UCC_EC_CUDA_STREAM_TASK_MODE"]
            if "UCC_EC_CUDA_STREAM_TASK_MODE" in os.environ
            else "driver"
        )
        os.environ["UCX_TLS"] = (
            os.environ["UCX_TLS"] if "UCX_TLS" in os.environ else "ib,cuda_copy"
        )  # cuda_ipc (i.e., NVLink-enablement) will be later supported
        os.environ["NSYS_UCP_COMM_PARAMS"] = "1"
        os.environ["UCX_RNDV_THRESH"] = "0"
        os.environ["UCX_NET_DEVICES"] = "all"
        os.environ["UCC_CL_BASIC_TLS"] = "^sharp,nccl"

    for ranks in decoder_rank_generator.get_ranks('pp'):
        group = create_group(
            ranks,
            timeout=timeout,
            backend=pipeline_model_parallel_comm_backend,
            pg_options=(
                None
                if pipeline_model_parallel_comm_backend == "ucc"
                else get_nccl_options("pp", nccl_comm_cfgs)
            ),
            group_desc="PIPELINE_MODEL_PARALLEL_GROUP",
        )
        assert (
            pipeline_model_parallel_comm_backend == None
            or pipeline_model_parallel_comm_backend == "nccl"
            or pipeline_model_parallel_comm_backend == "ucc"
        ), f'"{pipeline_model_parallel_comm_backend}" backend for PP communication is currently not supported'

        if rank in ranks:
            if _PIPELINE_MODEL_PARALLEL_GROUP is None:
                _PIPELINE_MODEL_PARALLEL_GROUP = group
                _PIPELINE_GLOBAL_RANKS = ranks
            elif isinstance(_PIPELINE_GLOBAL_RANKS[0], list):
                _PIPELINE_MODEL_PARALLEL_GROUP.append(group)
                _PIPELINE_GLOBAL_RANKS.append(ranks)
            else:
                _PIPELINE_MODEL_PARALLEL_GROUP = [_PIPELINE_MODEL_PARALLEL_GROUP, group]
                _PIPELINE_GLOBAL_RANKS = [_PIPELINE_GLOBAL_RANKS, ranks]

        embedding_ranks = get_embedding_ranks(ranks)
        group = create_group(
            embedding_ranks,
            timeout=timeout,
            pg_options=get_nccl_options("embd", nccl_comm_cfgs),
            group_desc="EMBEDDING_GROUP",
        )
        if rank in embedding_ranks:
            _EMBEDDING_GROUP = group
            _EMBEDDING_GLOBAL_RANKS = embedding_ranks

        position_embedding_ranks = get_position_embedding_ranks(ranks)
        group = create_group(
            position_embedding_ranks,
            timeout=timeout,
            pg_options=get_nccl_options("pos_embd", nccl_comm_cfgs),
            group_desc="POSITION_EMBEDDING_GROUP",
        )
        if rank in position_embedding_ranks:
            _POSITION_EMBEDDING_GROUP = group
            _POSITION_EMBEDDING_GLOBAL_RANKS = position_embedding_ranks

    # Build the tensor + data parallel groups.
    global _TENSOR_AND_DATA_PARALLEL_GROUP
    global _TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP
    assert (
        _TENSOR_AND_DATA_PARALLEL_GROUP is None
    ), 'Tensor + data parallel group is already initialized'
    for ranks in decoder_rank_generator.get_ranks('tp-dp-cp'):
        group = create_group(
            ranks,
            timeout=timeout,
            pg_options=get_nccl_options("tp_dp_cp", nccl_comm_cfgs),
            group_desc="TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP",
        )
        if rank in ranks:
            _TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP = group
    for ranks in decoder_rank_generator.get_ranks('tp-dp'):
        group = create_group(
            ranks,
            timeout=timeout,
            pg_options=get_nccl_options("tp_dp", nccl_comm_cfgs),
            group_desc="TENSOR_AND_DATA_PARALLEL_GROUP",
        )
        if rank in ranks:
            _TENSOR_AND_DATA_PARALLEL_GROUP = group

    global _TENSOR_AND_CONTEXT_PARALLEL_GROUP
    assert (
        _TENSOR_AND_CONTEXT_PARALLEL_GROUP is None
    ), 'Tensor + context parallel group is already initialized'
    for ranks in decoder_rank_generator.get_ranks('tp-cp'):
        group = create_group(
            ranks,
            timeout=timeout,
            pg_options=get_nccl_options("tp_cp", nccl_comm_cfgs),
            group_desc="TENSOR_AND_CONTEXT_PARALLEL_GROUP",
        )
        if rank in ranks:
            _TENSOR_AND_CONTEXT_PARALLEL_GROUP = group

    ### Expert-related parallel groups initialization
    # Build the expert model parallel group
    global _EXPERT_MODEL_PARALLEL_GROUP, _EXPERT_MODEL_PARALLEL_RANKS
    assert _EXPERT_MODEL_PARALLEL_GROUP is None, 'Expert parallel group is already initialized'
    for ranks in expert_decoder_rank_generator.get_ranks('ep'):
        group = create_group(
            ranks,
            timeout=timeout,
            pg_options=get_nccl_options("ep", nccl_comm_cfgs),
            group_desc="EXPERT_MODEL_PARALLEL_GROUP",
        )
        if rank in ranks:
            _EXPERT_MODEL_PARALLEL_GROUP = group
            _EXPERT_MODEL_PARALLEL_RANKS = ranks

    # Build the expert tensor parallel group
    global _EXPERT_TENSOR_PARALLEL_GROUP
    assert (
        _EXPERT_TENSOR_PARALLEL_GROUP is None
    ), 'Expert tensor model parallel group is already initialized'
    for ranks in expert_decoder_rank_generator.get_ranks('tp'):
        group = create_group(
            ranks,
            timeout=timeout,
            pg_options=get_nccl_options("ep_tp", nccl_comm_cfgs),
            group_desc="EXPERT_TENSOR_PARALLEL_GROUP",
        )
        if rank in ranks:
            _EXPERT_TENSOR_PARALLEL_GROUP = group

    # Build the tensor + expert parallel groups
    global _EXPERT_TENSOR_AND_MODEL_PARALLEL_GROUP
    assert (
        _EXPERT_TENSOR_AND_MODEL_PARALLEL_GROUP is None
    ), 'Expert tensor + model parallel group is already initialized'
    for ranks in expert_decoder_rank_generator.get_ranks('tp-ep'):
        group = create_group(
            ranks,
            timeout=timeout,
            pg_options=get_nccl_options("tp_ep_mp", nccl_comm_cfgs),
            group_desc="EXPERT_TENSOR_AND_MODEL_PARALLEL_GROUP",
        )
        if rank in ranks:
            _EXPERT_TENSOR_AND_MODEL_PARALLEL_GROUP = group

    # Build the expert+tensor+pipeline parallel groups
    global _EXPERT_TENSOR_MODEL_PIPELINE_PARALLEL_GROUP
    assert (
        _EXPERT_TENSOR_MODEL_PIPELINE_PARALLEL_GROUP is None
    ), 'The expert_tensor_model_pipeline parallel group is already initialized'
    for ranks in expert_decoder_rank_generator.get_ranks('tp-ep-pp'):
        group = create_group(
            ranks,
            timeout=timeout,
            pg_options=get_nccl_options("tp_ep_pp", nccl_comm_cfgs),
            group_desc="EXPERT_TENSOR_MODEL_PIPELINE_PARALLEL_GROUP",
        )
        if rank in ranks:
            _EXPERT_TENSOR_MODEL_PIPELINE_PARALLEL_GROUP = group

    # Build the expert data parallel group
    global _EXPERT_DATA_PARALLEL_GROUP
    assert _EXPERT_DATA_PARALLEL_GROUP is None, "Expert data group is already initialized"
    global _EXPERT_DATA_PARALLEL_GROUP_GLOO
    assert _EXPERT_DATA_PARALLEL_GROUP_GLOO is None, "Expert data group-gloo is already initialized"
    global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP
    assert (
        _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP is None
    ), "Intra partial expert data group is already initialized"
    global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO
    assert (
        _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO is None
    ), "Intra partial expert data group-gloo is already initialized"
    global _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP
    assert (
        _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP is None
    ), "Inter partial expert data group is already initialized"

    assert (
        expert_data_parallel_size % num_distributed_optimizer_instances == 0
    ), "Expert data parallel size should be divisible by partial DistOpt shard factor"
    intra_partial_expert_data_parallel_size = (
        expert_data_parallel_size // num_distributed_optimizer_instances
    )

    for ranks in expert_decoder_rank_generator.get_ranks('dp'):
        group = create_group(
            ranks,
            timeout=timeout,
            pg_options=get_nccl_options("ep_dp", nccl_comm_cfgs),
            group_desc="EXPERT_DATA_PARALLEL_GROUP",
        )
        if create_gloo_process_groups:
            group_gloo = create_group(
                ranks, backend="gloo", group_desc="EXPERT_DATA_PARALLEL_GROUP_GLOO"
            )
        else:
            group_gloo = None
        if rank in ranks:
            _EXPERT_DATA_PARALLEL_GROUP = group
            _EXPERT_DATA_PARALLEL_GROUP_GLOO = group_gloo

        if num_distributed_optimizer_instances > 1:
            # Create groups for Partial DistOpt, one for intra-partial DP domain
            # Another for inter-partial DP domain

            # Set NCCL_COLLNET_ENABLE to 1 to enable SHARP for the dp_replica group.
            if sharp_enabled_group == "dp_replica":
                os.environ["NCCL_COLLNET_ENABLE"] = "1"
            hierarchical_groups, hierarchical_groups_gloo = create_hierarchical_groups(
                rank,
                ranks,
                [intra_partial_expert_data_parallel_size, num_distributed_optimizer_instances],
                create_gloo_process_groups=create_gloo_process_groups,
                pg_options=[
                    get_nccl_options("intra_ep_dp", nccl_comm_cfgs),
                    get_nccl_options("inter_ep_dp", nccl_comm_cfgs),
                ],
                timeout=timeout,
                group_desc="EXPERT_DATA_PARALLEL_GROUP",
            )
            if rank in ranks:
                _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP = hierarchical_groups[0]
                _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO = hierarchical_groups_gloo[0]
                _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP = hierarchical_groups[1]

            if sharp_enabled_group == "dp_replica":
                # PyTorch is performing lazy initialization of the communicator group.
                # Therefore, we need to perform a nccl call to ensure that the communicator group is created.
                if _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP is not None:
                    torch.distributed.barrier(
                        group=_INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP,
                        device_ids=[torch.cuda.current_device()],
                    )
                    torch.cuda.synchronize()
                # Set NCCL_COLLNET_ENABLE to 0 to restrict SHARP application to the dp_replica group.
                if "NCCL_COLLNET_ENABLE" in os.environ:
                    del os.environ["NCCL_COLLNET_ENABLE"]
        else:
            _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP = _EXPERT_DATA_PARALLEL_GROUP
            _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO = _EXPERT_DATA_PARALLEL_GROUP_GLOO
    ### End of expert related parallel groups initialization

    # build the intra distributed optimizer instance group
    global _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP
    assert (
        _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP is None
    ), "Intra distributed optimizer instance group is already initialized"

    model_parallel_group_id = 0
    intra_dist_opt_ranks = []
    for ranks in expert_decoder_rank_generator.get_ranks('tp-ep-pp'):
        model_parallel_group_id += 1
        intra_dist_opt_ranks.extend(ranks)
        if model_parallel_group_id % intra_partial_expert_data_parallel_size == 0:
            intra_dist_opt_instance_group = create_group(
                intra_dist_opt_ranks,
                timeout=timeout,
                pg_options=get_nccl_options("intra_dist_opt_instance", nccl_comm_cfgs),
                group_desc="INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP",
            )
            if rank in intra_dist_opt_ranks:
                _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP = intra_dist_opt_instance_group
            intra_dist_opt_ranks = []

    # Initialize global memory buffer
    # This isn't really "parallel state" but there isn't another good place to
    # put this. If we end up with a more generic initialization of megatron-core
    # we could stick it there
    _set_global_memory_buffer()


def is_initialized():
    """Useful for code segments that may be accessed with or without mpu initialization"""
    return _DATA_PARALLEL_GROUP is not None


def model_parallel_is_initialized():
    """Check if model- and data-parallel groups are initialized."""
    if (
        _TENSOR_MODEL_PARALLEL_GROUP is None
        or _PIPELINE_MODEL_PARALLEL_GROUP is None
        or _DATA_PARALLEL_GROUP is None
    ):
        return False
    return True


def get_model_parallel_group(check_initialized=True):
    """Get the model-parallel group the caller rank belongs to."""
    if check_initialized:
        assert _MODEL_PARALLEL_GROUP is not None, "model parallel group is not initialized"
    return _MODEL_PARALLEL_GROUP


def get_tensor_model_parallel_group(check_initialized=True):
    """Get the tensor-model-parallel group the caller rank belongs to."""
    if check_initialized:
        assert (
            _TENSOR_MODEL_PARALLEL_GROUP is not None
        ), "tensor model parallel group is not initialized"
    return _TENSOR_MODEL_PARALLEL_GROUP


def get_pipeline_model_parallel_group(check_initialized=True):
    """Get the pipeline-model-parallel group the caller rank belongs to."""
    if check_initialized:
        assert (
            _PIPELINE_MODEL_PARALLEL_GROUP is not None
        ), "pipeline_model parallel group is not initialized"
    return _PIPELINE_MODEL_PARALLEL_GROUP


def get_data_parallel_group(
    with_context_parallel=False, partial_data_parallel=False, independent_all_gather=False
):
    """Get the data-parallel group the caller rank belongs to."""
    if with_context_parallel:
        if partial_data_parallel:
            assert (
                _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP is not None
            ), "Intra partial data parallel group is not initialized"
            return _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP
        if independent_all_gather:
            assert (
                _DATA_PARALLEL_GROUP_WITH_CP_AG is not None
            ), "data parallel group with context parallel AG is not initialized"
            return _DATA_PARALLEL_GROUP_WITH_CP_AG
        assert (
            _DATA_PARALLEL_GROUP_WITH_CP is not None
        ), "data parallel group with context parallel combined is not initialized"
        return _DATA_PARALLEL_GROUP_WITH_CP
    else:
        assert _DATA_PARALLEL_GROUP is not None, "data parallel group is not initialized"
        assert partial_data_parallel == False, "Partial DP for Optimizer needs to include CP"
        return _DATA_PARALLEL_GROUP


def has_separate_all_gather_group() -> bool:
    """Check if a separate all-gather process group has been created.

    Returns True if a dedicated all-gather process group exists for improved
    communication overlap, False otherwise.
    """
    return _DATA_PARALLEL_GROUP_WITH_CP_AG is not None


def get_data_parallel_group_gloo(with_context_parallel=False, partial_data_parallel=False):
    """Get the Gloo data-parallel group the caller rank belongs to."""
    if with_context_parallel:
        if partial_data_parallel:
            assert (
                _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_GLOO is not None
            ), "Intra partial data parallel group is not initialized"
            return _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP_GLOO
        assert (
            _DATA_PARALLEL_GROUP_WITH_CP_GLOO is not None
        ), "data parallel group-gloo with context parallel combined is not initialized"
        return _DATA_PARALLEL_GROUP_WITH_CP_GLOO
    else:
        assert _DATA_PARALLEL_GROUP_GLOO is not None, "data parallel group-gloo is not initialized"
        assert partial_data_parallel == False, "Partial DP for Optimizer needs to include CP"
        return _DATA_PARALLEL_GROUP_GLOO


def get_context_parallel_group(check_initialized=True):
    """Get the context-parallel group the caller rank belongs to."""
    if check_initialized:
        assert _CONTEXT_PARALLEL_GROUP is not None, "context parallel group is not initialized"
    return _CONTEXT_PARALLEL_GROUP


def get_context_parallel_global_ranks(check_initialized=True):
    """Get all global ranks of the context-parallel group that the caller rank belongs to."""
    if check_initialized:
        assert (
            _CONTEXT_PARALLEL_GLOBAL_RANKS is not None
        ), "context parallel group is not initialized"
    return _CONTEXT_PARALLEL_GLOBAL_RANKS


def get_hierarchical_context_parallel_groups(check_initialized=True):
    """Get the inner ring of context parallel group the caller rank belongs to."""
    if check_initialized:
        assert _HIERARCHICAL_CONTEXT_PARALLEL_GROUPS is not None
    return _HIERARCHICAL_CONTEXT_PARALLEL_GROUPS


def get_hybrid_data_context_parallel_groups(check_initialized=True, group_size=None):
    """Get the hybrid context parallel groups the caller rank belongs to."""
    # If the group size is the same as the entire DPxCP group, return the original group
    if get_data_parallel_world_size(with_context_parallel=True) == group_size:
        if check_initialized:
            assert _DATA_PARALLEL_GROUP_WITH_CP is not None
        return _DATA_PARALLEL_GROUP_WITH_CP
    if check_initialized:
        assert _HYBRID_DP_CP_GROUPS is not None
    return _HYBRID_DP_CP_GROUPS[group_size]


def get_embedding_group(check_initialized=True):
    """Get the embedding group the caller rank belongs to."""
    if check_initialized:
        assert _EMBEDDING_GROUP is not None, "embedding group is not initialized"
    return _EMBEDDING_GROUP


def get_position_embedding_group(check_initialized=True):
    """Get the position embedding group the caller rank belongs to."""
    if check_initialized:
        assert _POSITION_EMBEDDING_GROUP is not None, "position embedding group is not initialized"
    return _POSITION_EMBEDDING_GROUP


def get_amax_reduction_group(with_context_parallel=False, tp_only_amax_red=False):
    """Get the FP8 amax reduction group the caller rank belongs to."""
    if with_context_parallel:
        if not tp_only_amax_red:
            assert (
                _TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP is not None
            ), "FP8 amax reduction group is not initialized"
            return _TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP
        else:
            assert (
                _TENSOR_AND_CONTEXT_PARALLEL_GROUP is not None
            ), "FP8 amax reduction group is not initialized"
            return _TENSOR_AND_CONTEXT_PARALLEL_GROUP
    else:
        if not tp_only_amax_red:
            assert (
                _TENSOR_AND_DATA_PARALLEL_GROUP is not None
            ), "FP8 amax reduction group is not initialized"
            return _TENSOR_AND_DATA_PARALLEL_GROUP
        else:
            assert (
                _TENSOR_MODEL_PARALLEL_GROUP is not None
            ), "FP8 amax reduction group is not initialized"
            return _TENSOR_MODEL_PARALLEL_GROUP


def get_tensor_and_data_parallel_group(check_initialized=True, with_context_parallel=False):
    """Get the tensor- and data-parallel group the caller rank belongs to."""
    if with_context_parallel:
        if check_initialized:
            assert (
                _TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP is not None
            ), 'tensor and data parallel group is not initialized'
        return _TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP
    else:
        if check_initialized:
            assert (
                _TENSOR_AND_DATA_PARALLEL_GROUP is not None
            ), 'tensor and data parallel group is not initialized'
        return _TENSOR_AND_DATA_PARALLEL_GROUP


def get_tensor_and_context_parallel_group(check_initialized=True):
    """Get the tensor- and context-parallel group the caller rank belongs to."""
    if check_initialized:
        assert (
            _TENSOR_AND_CONTEXT_PARALLEL_GROUP is not None
        ), "tensor and context parallel group is not initialized"
    return _TENSOR_AND_CONTEXT_PARALLEL_GROUP


def set_tensor_model_parallel_world_size(world_size):
    """Set the tensor-model-parallel size"""
    global _MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE
    _MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = world_size


def set_pipeline_model_parallel_world_size(world_size):
    """Set the pipeline-model-parallel size"""
    global _MPU_PIPELINE_MODEL_PARALLEL_WORLD_SIZE
    _MPU_PIPELINE_MODEL_PARALLEL_WORLD_SIZE = world_size


def set_virtual_pipeline_model_parallel_world_size(world_size):
    """Set the pipeline-model-parallel size"""
    global _VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE
    _VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE = world_size


def get_tensor_model_parallel_world_size():
    """Return world size for the tensor-model-parallel group."""
    global _MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE
    if _MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE is not None:
        return _MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE
    return get_tensor_model_parallel_group().size()


def get_pipeline_model_parallel_world_size():
    """Return world size for the pipeline-model-parallel group."""
    global _MPU_PIPELINE_MODEL_PARALLEL_WORLD_SIZE
    if _MPU_PIPELINE_MODEL_PARALLEL_WORLD_SIZE is not None:
        return _MPU_PIPELINE_MODEL_PARALLEL_WORLD_SIZE
    return get_pipeline_model_parallel_group().size()


def set_tensor_model_parallel_rank(rank):
    """Set tensor-model-parallel rank."""
    global _MPU_TENSOR_MODEL_PARALLEL_RANK
    _MPU_TENSOR_MODEL_PARALLEL_RANK = rank


def set_pipeline_model_parallel_rank(rank):
    """Set pipeline-model-parallel rank."""
    global _MPU_PIPELINE_MODEL_PARALLEL_RANK
    _MPU_PIPELINE_MODEL_PARALLEL_RANK = rank


def get_tensor_model_parallel_rank():
    """Return caller's rank for the tensor-model-parallel group."""
    global _MPU_TENSOR_MODEL_PARALLEL_RANK
    if _MPU_TENSOR_MODEL_PARALLEL_RANK is not None:
        return _MPU_TENSOR_MODEL_PARALLEL_RANK
    return get_tensor_model_parallel_group().rank()


def get_pipeline_model_parallel_rank():
    """Return caller's rank for the pipeline-model-parallel group."""
    global _MPU_PIPELINE_MODEL_PARALLEL_RANK
    if _MPU_PIPELINE_MODEL_PARALLEL_RANK is not None:
        return _MPU_PIPELINE_MODEL_PARALLEL_RANK
    return torch.distributed.get_rank(group=get_pipeline_model_parallel_group())


def is_pipeline_first_stage(ignore_virtual=True, vp_stage=None):
    """Return True if in the first pipeline model-parallel stage, False otherwise."""
    if not ignore_virtual and get_virtual_pipeline_model_parallel_world_size() is not None:
        assert vp_stage is not None, "vp_stage must be passed if virtual pipeline is enabled"

        if vp_stage != 0:
            return False
    return get_pipeline_model_parallel_rank() == 0


def is_pipeline_last_stage(ignore_virtual=True, vp_stage=None):
    """Return True if in the last pipeline-model-parallel stage, False otherwise."""
    if not ignore_virtual and get_virtual_pipeline_model_parallel_world_size() is not None:
        assert vp_stage is not None, "vp_stage must be passed if virtual pipeline is enabled"

        if vp_stage != (get_virtual_pipeline_model_parallel_world_size() - 1):
            return False
    return get_pipeline_model_parallel_rank() == (get_pipeline_model_parallel_world_size() - 1)


def is_rank_in_embedding_group(ignore_virtual=True, vp_stage=None):
    """Return true if current rank is in embedding group, False otherwise."""
    rank = torch.distributed.get_rank()
    global _EMBEDDING_GLOBAL_RANKS
    if _EMBEDDING_GLOBAL_RANKS is None:
        return False
    if ignore_virtual:
        return rank in _EMBEDDING_GLOBAL_RANKS
    if rank in _EMBEDDING_GLOBAL_RANKS:
        if rank == _EMBEDDING_GLOBAL_RANKS[0]:
            return is_pipeline_first_stage(ignore_virtual=False, vp_stage=vp_stage)
        elif rank == _EMBEDDING_GLOBAL_RANKS[-1]:
            return is_pipeline_last_stage(ignore_virtual=False, vp_stage=vp_stage)
        else:
            return True
    return False


def is_rank_in_position_embedding_group():
    """Return true if current rank is in position embedding group, False otherwise."""
    rank = torch.distributed.get_rank()
    global _POSITION_EMBEDDING_GLOBAL_RANKS
    return _POSITION_EMBEDDING_GLOBAL_RANKS is not None and rank in _POSITION_EMBEDDING_GLOBAL_RANKS


def get_virtual_pipeline_model_parallel_rank():
    """Return the virtual pipeline-parallel rank."""
    global _VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK
    return _VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK


def set_virtual_pipeline_model_parallel_rank(rank):
    """Set the virtual pipeline-parallel rank."""
    warnings.warn(
        "set_virtual_pipeline_model_parallel_rank in global scope is deprecated. "
        "Pass vp_stage explicitly to is_pipeline_first_stage, is_pipeline_last_stage, etc.",
        DeprecationWarning,
    )
    global _VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK
    _VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK = rank


def get_virtual_pipeline_model_parallel_world_size():
    """Return the virtual pipeline-parallel world size."""
    global _VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE
    return _VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE


def get_tensor_model_parallel_src_rank():
    """Calculate the global rank corresponding to the first local rank
    in the tensor model parallel group."""
    assert (
        _TENSOR_MODEL_PARALLEL_GLOBAL_RANKS is not None
    ), "Tensor model parallel group is not initialized"
    return _TENSOR_MODEL_PARALLEL_GLOBAL_RANKS[0]


def get_model_parallel_src_rank():
    """Calculate the global rank corresponding to the first local rank
    in the model parallel group."""
    assert _MODEL_PARALLEL_GLOBAL_RANKS is not None, "Model parallel group is not initialized"
    return _MODEL_PARALLEL_GLOBAL_RANKS[0]


def get_data_parallel_src_rank(with_context_parallel=False):
    """Calculate the global rank corresponding to the first local rank
    in the data parallel group."""
    if with_context_parallel:
        assert (
            _DATA_PARALLEL_GLOBAL_RANKS_WITH_CP is not None
        ), "Data parallel group with context parallel combined is not initialized"
        return _DATA_PARALLEL_GLOBAL_RANKS_WITH_CP[0]
    else:
        assert _DATA_PARALLEL_GLOBAL_RANKS is not None, "Data parallel group is not initialized"
        return _DATA_PARALLEL_GLOBAL_RANKS[0]


def get_pipeline_model_parallel_first_rank():
    """Return the global rank of the first stage in the current rank's pipeline."""
    assert _PIPELINE_GLOBAL_RANKS is not None, "Pipeline parallel group is not initialized"
    return _PIPELINE_GLOBAL_RANKS[0]


def get_pipeline_model_parallel_last_rank():
    """Return the global rank of the last stage in the current rank's pipeline."""
    assert _PIPELINE_GLOBAL_RANKS is not None, "Pipeline parallel group is not initialized"
    last_rank_local = get_pipeline_model_parallel_world_size() - 1
    return _PIPELINE_GLOBAL_RANKS[last_rank_local]


def get_pipeline_model_parallel_next_rank():
    """Return the global rank that follows the caller in the pipeline."""
    assert _PIPELINE_GLOBAL_RANKS is not None, "Pipeline parallel group is not initialized"
    rank_in_pipeline = get_pipeline_model_parallel_rank()
    world_size = get_pipeline_model_parallel_world_size()
    return _PIPELINE_GLOBAL_RANKS[(rank_in_pipeline + 1) % world_size]


def get_pipeline_model_parallel_prev_rank():
    """Return the global rank that precedes the caller in the pipeline."""
    assert _PIPELINE_GLOBAL_RANKS is not None, "Pipeline parallel group is not initialized"
    rank_in_pipeline = get_pipeline_model_parallel_rank()
    world_size = get_pipeline_model_parallel_world_size()
    return _PIPELINE_GLOBAL_RANKS[(rank_in_pipeline - 1) % world_size]


def get_data_parallel_world_size(with_context_parallel=False, partial_data_parallel=False):
    """Return world size for the data parallel group."""
    global _MPU_DATA_PARALLEL_WORLD_SIZE
    if _MPU_DATA_PARALLEL_WORLD_SIZE is not None:
        return _MPU_DATA_PARALLEL_WORLD_SIZE
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        return get_data_parallel_group(
            with_context_parallel=with_context_parallel, partial_data_parallel=partial_data_parallel
        ).size()
    else:
        return 0


def set_data_parallel_rank(rank):
    """Return world size for the data parallel group."""
    global _MPU_DATA_PARALLEL_RANK
    _MPU_DATA_PARALLEL_RANK = rank


def get_data_parallel_rank(with_context_parallel=False, partial_data_parallel=False):
    """Return caller's rank in the data-parallel group."""
    global _MPU_DATA_PARALLEL_RANK
    if _MPU_DATA_PARALLEL_RANK is not None:
        return _MPU_DATA_PARALLEL_RANK
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        return get_data_parallel_group(
            with_context_parallel=with_context_parallel, partial_data_parallel=partial_data_parallel
        ).rank()
    else:
        return 0


def get_context_parallel_world_size():
    """Return world size for the context parallel group."""
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        return get_context_parallel_group().size()
    else:
        return 0


def get_context_parallel_rank():
    """Return caller's rank in the context-parallel group."""
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        return get_context_parallel_group().rank()
    else:
        return 0


def get_tensor_and_context_parallel_world_size():
    """Return world size for the tensor and context-parallel group."""
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        return get_tensor_and_context_parallel_group().size()
    else:
        return 0


def get_tensor_and_context_parallel_rank():
    """Return caller's rank in the joint tensor-model-parallel and context-parallel group."""
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        return get_tensor_and_context_parallel_group().rank()
    else:
        return 0


### Expert-related parallel states functions
def get_expert_model_parallel_group(check_initialized=True):
    """Get the expert-model-parallel group the caller rank belongs to."""
    if check_initialized:
        assert (
            _EXPERT_MODEL_PARALLEL_GROUP is not None
        ), "expert model parallel group is not initialized"
    return _EXPERT_MODEL_PARALLEL_GROUP


def get_expert_model_parallel_src_rank():
    """Calculate the global rank corresponding to the first local rank
    in the expert model parallel group."""
    assert (
        _EXPERT_MODEL_PARALLEL_RANKS is not None
    ), "Expert model parallel group is not initialized"
    return _EXPERT_MODEL_PARALLEL_RANKS[0]


def get_expert_model_parallel_world_size():
    """Return world size for the expert-model-parallel group."""
    if _MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE is not None:
        return _MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        return get_expert_model_parallel_group().size()
    else:
        return 0


def set_expert_model_parallel_world_size(world_size):
    """Sets the expert-model-parallel world size."""
    global _MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE
    _MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE = world_size


def get_expert_model_parallel_rank():
    """Return caller's rank in the expert-model-parallel group."""
    if _MPU_EXPERT_MODEL_PARALLEL_RANK is not None:
        return _MPU_EXPERT_MODEL_PARALLEL_RANK
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        return get_expert_model_parallel_group().rank()
    else:
        return 0


def set_expert_model_parallel_rank(rank):
    """Set expert-model-parallel rank."""
    global _MPU_EXPERT_MODEL_PARALLEL_RANK
    _MPU_EXPERT_MODEL_PARALLEL_RANK = rank


def get_expert_tensor_parallel_group(check_initialized=True):
    """Get the expert-tensor-parallel group the caller rank belongs to."""
    if check_initialized:
        assert (
            _EXPERT_TENSOR_PARALLEL_GROUP is not None
        ), "Expert tensor parallel group is not initialized"
    return _EXPERT_TENSOR_PARALLEL_GROUP


def get_expert_tensor_parallel_world_size():
    """Return world size for the expert tensor parallel group."""
    global _MPU_EXPERT_TENSOR_PARALLEL_WORLD_SIZE
    if _MPU_EXPERT_TENSOR_PARALLEL_WORLD_SIZE is not None:
        return _MPU_EXPERT_TENSOR_PARALLEL_WORLD_SIZE
    # Use tensor parallel group world size for backward compability otherwise
    if not _EXPERT_TENSOR_PARALLEL_GROUP:
        return _MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE
    else:
        return get_expert_tensor_parallel_group().size()


def set_expert_tensor_parallel_world_size(world_size):
    "Set expert tensor model parallel size"
    global _MPU_EXPERT_TENSOR_PARALLEL_WORLD_SIZE
    _MPU_EXPERT_TENSOR_PARALLEL_WORLD_SIZE = world_size


def get_expert_tensor_parallel_rank():
    """Return my rank for the expert tensor parallel group."""
    global _MPU_EXPERT_TENSOR_PARALLEL_RANK
    if _MPU_EXPERT_TENSOR_PARALLEL_RANK is not None:
        return _MPU_EXPERT_TENSOR_PARALLEL_RANK
    # Use tensor parallel group rank for backward compability otherwise
    if not _EXPERT_TENSOR_PARALLEL_GROUP:
        return _MPU_TENSOR_MODEL_PARALLEL_RANK
    else:
        return get_expert_tensor_parallel_group().rank()


def set_expert_tensor_parallel_rank(rank):
    "Set expert tensor model parallel rank"
    global _MPU_EXPERT_TENSOR_PARALLEL_RANK
    _MPU_EXPERT_TENSOR_PARALLEL_RANK = rank


def get_expert_tensor_and_model_parallel_group(check_initialized=True):
    """Get the expert-tensor and expert-model group the caller rank belongs to."""
    if check_initialized:
        assert (
            _EXPERT_TENSOR_AND_MODEL_PARALLEL_GROUP is not None
        ), "Expert tensor and model parallel group is not initialized"
    return _EXPERT_TENSOR_AND_MODEL_PARALLEL_GROUP


def get_expert_tensor_and_model_parallel_world_size():
    """Return world size for the expert model parallel group times expert tensor parallel group."""
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        world_size = get_expert_tensor_and_model_parallel_group().size()
        return world_size
    else:
        return 0


def get_expert_tensor_and_model_parallel_rank():
    """Return caller's rank in the joint tensor- and expert-model-parallel group."""
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        return get_expert_tensor_and_model_parallel_group().rank()
    else:
        return 0


def get_expert_tensor_model_pipeline_parallel_group(check_initialized=True):
    """Get expert tensor-model-pipeline parallel group."""
    if check_initialized:
        assert (
            _EXPERT_TENSOR_MODEL_PIPELINE_PARALLEL_GROUP is not None
        ), "Expert tensor-model-pipeline parallel group is not initialized"
    return _EXPERT_TENSOR_MODEL_PIPELINE_PARALLEL_GROUP


def get_expert_data_parallel_group(check_initialized=True, partial_expert_data_parallel=False):
    """Get expert data parallel group."""
    if partial_expert_data_parallel:
        if check_initialized:
            assert (
                _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP is not None
            ), "Intra partial expert data parallel group is not initialized"
        return _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP
    else:
        if check_initialized:
            assert (
                _EXPERT_DATA_PARALLEL_GROUP is not None
            ), "Expert data parallel group is not initialized"
        return _EXPERT_DATA_PARALLEL_GROUP


def get_expert_data_parallel_group_gloo(partial_expert_data_parallel=False):
    """Get expert data parallel group-gloo."""
    if partial_expert_data_parallel:
        assert (
            _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO is not None
        ), "Intra partial expert data parallel group-gloo is not initialized"
        return _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO
    else:
        assert (
            _EXPERT_DATA_PARALLEL_GROUP_GLOO is not None
        ), "Expert data parallel group-gloo is not initialized"
        return _EXPERT_DATA_PARALLEL_GROUP_GLOO


def get_expert_data_parallel_rank(partial_expert_data_parallel=False):
    """Return caller's rank in the expert data parallel group."""
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        return get_expert_data_parallel_group(
            partial_expert_data_parallel=partial_expert_data_parallel
        ).rank()
    else:
        return 0


def get_expert_data_parallel_world_size(partial_expert_data_parallel=False):
    """Return world size for the expert data parallel group."""
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        return get_expert_data_parallel_group(
            partial_expert_data_parallel=partial_expert_data_parallel
        ).size()
    else:
        return 0


def get_intra_distributed_optimizer_instance_group(check_initialized=True):
    """Get the group of all GPUs in a distributed optimizer instance."""
    if check_initialized:
        assert (
            _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP is not None
        ), "Intra distributed optimizer instance group is not initialized"
    return _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP


def get_inter_distributed_optimizer_instance_group(check_initialized=True):
    """Get the group spanning the different distributed optimizer instances.
    Attention and MLP/Expert share same inter-instance group, so only built
    inter_partial_expert_data_parallel_group, and return it at here.
    """
    if check_initialized:
        assert _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP is not None, (
            "Attention and MLP/Expert share same inter distributed optimize instance group, "
            "which has not been initialized"
        )
    return _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP


### End of expert-related functions region


def _set_global_memory_buffer():
    """Initialize global buffer."""
    global _GLOBAL_MEMORY_BUFFER
    assert _GLOBAL_MEMORY_BUFFER is None, "global memory buffer is already initialized"
    _GLOBAL_MEMORY_BUFFER = GlobalMemoryBuffer()


def get_global_memory_buffer():
    """Return the global GlobalMemoryBuffer object"""
    assert _GLOBAL_MEMORY_BUFFER is not None, "global memory buffer is not initialized"
    return _GLOBAL_MEMORY_BUFFER


def destroy_global_memory_buffer():
    """Sets the global memory buffer to None"""
    global _GLOBAL_MEMORY_BUFFER
    _GLOBAL_MEMORY_BUFFER = None


def get_all_ranks():
    """Get caller's rank in tensor-model-parallel, data-parallel, context-parallel,
    pipeline-model-parallel and expert-model-parallel groups."""
    ranks = [
        get_tensor_model_parallel_rank(),
        get_data_parallel_rank(),
        get_context_parallel_rank(),
        get_pipeline_model_parallel_rank(),
        get_expert_model_parallel_rank(),
    ]
    return "_".join(map(lambda x: str(x or 0), ranks))


def destroy_model_parallel():
    """Set the groups to none."""
    global _MODEL_PARALLEL_GROUP
    _MODEL_PARALLEL_GROUP = None

    global _TENSOR_MODEL_PARALLEL_GROUP
    _TENSOR_MODEL_PARALLEL_GROUP = None

    global _PIPELINE_MODEL_PARALLEL_GROUP
    _PIPELINE_MODEL_PARALLEL_GROUP = None

    global _DATA_PARALLEL_GROUP
    _DATA_PARALLEL_GROUP = None

    global _DATA_PARALLEL_GROUP_WITH_CP
    _DATA_PARALLEL_GROUP_WITH_CP = None

    global _DATA_PARALLEL_GROUP_WITH_CP_AG
    _DATA_PARALLEL_GROUP_WITH_CP_AG = None

    global _CONTEXT_PARALLEL_GROUP
    _CONTEXT_PARALLEL_GROUP = None

    global _CONTEXT_PARALLEL_GLOBAL_RANKS
    _CONTEXT_PARALLEL_GLOBAL_RANKS = None

    global _EMBEDDING_GROUP
    _EMBEDDING_GROUP = None

    global _POSITION_EMBEDDING_GROUP
    _POSITION_EMBEDDING_GROUP = None

    global _POSITION_EMBEDDING_GLOBAL_RANKS
    _POSITION_EMBEDDING_GLOBAL_RANKS = None

    global _TENSOR_AND_DATA_PARALLEL_GROUP
    _TENSOR_AND_DATA_PARALLEL_GROUP = None

    global _TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP
    _TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP = None

    global _TENSOR_AND_CONTEXT_PARALLEL_GROUP
    _TENSOR_AND_CONTEXT_PARALLEL_GROUP = None

    global _VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK
    _VIRTUAL_PIPELINE_MODEL_PARALLEL_RANK = None

    global _VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE
    _VIRTUAL_PIPELINE_MODEL_PARALLEL_WORLD_SIZE = None

    global _MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE
    _MPU_TENSOR_MODEL_PARALLEL_WORLD_SIZE = None

    global _MPU_PIPELINE_MODEL_PARALLEL_WORLD_SIZE
    _MPU_PIPELINE_MODEL_PARALLEL_WORLD_SIZE = None

    global _MPU_TENSOR_MODEL_PARALLEL_RANK
    _MPU_TENSOR_MODEL_PARALLEL_RANK = None

    global _MPU_PIPELINE_MODEL_PARALLEL_RANK
    _MPU_PIPELINE_MODEL_PARALLEL_RANK = None

    global _GLOBAL_MEMORY_BUFFER
    _GLOBAL_MEMORY_BUFFER = None

    global _DATA_PARALLEL_GROUP_GLOO
    if (
        _DATA_PARALLEL_GROUP_GLOO is not None
        and torch.distributed.distributed_c10d._world.pg_map.get(_DATA_PARALLEL_GROUP_GLOO, None)
        is not None
    ):
        torch.distributed.destroy_process_group(_DATA_PARALLEL_GROUP_GLOO)
    _DATA_PARALLEL_GROUP_GLOO = None

    global _DATA_PARALLEL_GROUP_WITH_CP_GLOO
    if (
        _DATA_PARALLEL_GROUP_WITH_CP_GLOO is not None
        and torch.distributed.distributed_c10d._world.pg_map.get(
            _DATA_PARALLEL_GROUP_WITH_CP_GLOO, None
        )
        is not None
    ):
        torch.distributed.destroy_process_group(_DATA_PARALLEL_GROUP_WITH_CP_GLOO)
    _DATA_PARALLEL_GROUP_WITH_CP_GLOO = None

    # Destroy parallel state related to expert parallelism.
    global _EXPERT_MODEL_PARALLEL_GROUP
    _EXPERT_MODEL_PARALLEL_GROUP = None

    global _MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE
    _MPU_EXPERT_MODEL_PARALLEL_WORLD_SIZE = None

    global _MPU_EXPERT_MODEL_PARALLEL_RANK
    _MPU_EXPERT_MODEL_PARALLEL_RANK = None

    global _EXPERT_TENSOR_PARALLEL_GROUP
    _EXPERT_TENSOR_PARALLEL_GROUP = None

    global _MPU_EXPERT_TENSOR_PARALLEL_WORLD_SIZE
    _MPU_EXPERT_TENSOR_PARALLEL_WORLD_SIZE = None

    global _MPU_EXPERT_TENSOR_PARALLEL_RANK
    _MPU_EXPERT_TENSOR_PARALLEL_RANK = None

    global _EXPERT_TENSOR_AND_MODEL_PARALLEL_GROUP
    _EXPERT_TENSOR_AND_MODEL_PARALLEL_GROUP = None

    global _EXPERT_TENSOR_MODEL_PIPELINE_PARALLEL_GROUP
    _EXPERT_TENSOR_MODEL_PIPELINE_PARALLEL_GROUP = None

    global _EXPERT_DATA_PARALLEL_GROUP
    _EXPERT_DATA_PARALLEL_GROUP = None

    global _EXPERT_DATA_PARALLEL_GROUP_GLOO
    if (
        _EXPERT_DATA_PARALLEL_GROUP_GLOO is not None
        and torch.distributed.distributed_c10d._world.pg_map.get(
            _EXPERT_DATA_PARALLEL_GROUP_GLOO, None
        )
        is not None
    ):
        torch.distributed.destroy_process_group(_EXPERT_DATA_PARALLEL_GROUP_GLOO)
    _EXPERT_DATA_PARALLEL_GROUP_GLOO = None

    global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP
    _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP = None

    global _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO
    if (
        _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO is not None
        and torch.distributed.distributed_c10d._world.pg_map.get(
            _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO, None
        )
        is not None
    ):
        torch.distributed.destroy_process_group(_INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO)
    _INTRA_PARTIAL_EXPERT_DATA_PARALLEL_GROUP_GLOO = None

    global _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP
    _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP = None
    # End of expert parallelism destroy.

    global _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP
    _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP = None

    global _global_process_group_list
    _global_process_group_list = None

    SymmetricMemoryManager.destroy()


================================================
FILE: megatron/core/pipeline_parallel/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from .schedules import get_forward_backward_func


================================================
FILE: megatron/core/pipeline_parallel/bridge_communicator.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import logging
from dataclasses import dataclass, field
from enum import Enum
from typing import Dict, List, Optional, Tuple

import torch
import torch.distributed as dist

from megatron.core.hyper_comm_grid import HyperCommGrid


class CommRole(Enum):
    """Communication role for ranks in bridge communication.

    SENDER: Leader tp-cp rank within each DP replica of source grid.
            Sends data to destination grid receivers.
    RECEIVER: Leader tp-cp rank within each DP replica of destination grid.
              Receives data from source grid senders.
    MEMBER: Non-leader ranks within DP replicas.
            Participate in broadcasts from their local leader.
    """

    SENDER = "SENDER"
    RECEIVER = "RECEIVER"
    MEMBER = "MEMBER"


@dataclass
class RankCommInfo:
    """Explicit communication plan for a single rank."""

    role: CommRole = CommRole.MEMBER
    send_to_ranks: List[int] = field(default_factory=list)
    recv_from_ranks: List[int] = field(default_factory=list)


class BridgeCommunicator:
    """Pipeline Communicator between two modules with different(TP/DP/PP/CP).

    BridgeCommunicator:
    - Initialize the communicator between a pair of source and destination grids
    - Build a communication schedule for each rank
    - Provide public methods: send_forward, recv_forward, send_forward_recv_backward,
      send_backward_recv_forward to be used by the pipeline schedule.
    """

    # Cache broadcast PGs to avoid creating duplicate NCCL communicators for identical rank sets.
    _broadcast_pg_cache: Dict[str, "torch.distributed.ProcessGroup"] = {}

    @classmethod
    def destroy_broadcast_pgs(cls):
        """Destroy all cached broadcast process groups."""
        for pg in cls._broadcast_pg_cache.values():
            if pg is not None:
                dist.destroy_process_group(pg)
        cls._broadcast_pg_cache.clear()

    def __init__(
        self,
        src_grid: HyperCommGrid,
        dest_grid: HyperCommGrid,
        dim_mapping: Optional[Dict[str, int]] = None,
        comm_dtype: Optional[torch.dtype] = None,
        src_module_name: Optional[str] = None,
        dest_module_name: Optional[str] = None,
    ):
        """Initialize the bridge communicator between source and destination grids.

        CP is not supported yet. Will be added in follow up PR.

        Args:
            src_grid: Source HyperCommGrid
            dest_grid: Destination HyperCommGrid
            dim_mapping: Dictionary mapping logical dimensions to tensor axes.
                        Expected keys: 's' (sequence), 'b' (batch), 'h' (hidden).
                        Defaults to {'s': 1, 'b': 0, 'h': 2} if None.
        """
        self.src_grid = src_grid
        self.dest_grid = dest_grid
        self.src_module_name = src_module_name
        self.dest_module_name = dest_module_name
        self.comm_dtype = comm_dtype

        # TODO (ykarnati, pthombre) - CP support will be added in follow up PR.
        if 'cp' in self.src_grid.dim_names:
            assert self.src_grid.shape[self.src_grid.dim_names.index('cp')] == 1, (
                f"Source grid CP size must be 1, got "
                f"{self.src_grid.shape[self.src_grid.dim_names.index('cp')]}"
            )

        if 'cp' in self.dest_grid.dim_names:
            assert self.dest_grid.shape[self.dest_grid.dim_names.index('cp')] == 1, (
                f"Destination grid CP size must be 1, got "
                f"{self.dest_grid.shape[self.dest_grid.dim_names.index('cp')]}"
            )

        self.current_rank = dist.get_rank()
        self.comm_map: Dict[int, RankCommInfo] = {}
        if dim_mapping is None:
            self.dim_mapping = {'s': 1, 'b': 0, 'h': 2}
        else:
            assert set(dim_mapping.keys()) == {
                's',
                'b',
                'h',
            }, f"dim_mapping must have keys 's', 'b', 'h', got {set(dim_mapping.keys())}"
            assert all(
                v in {0, 1, 2} for v in dim_mapping.values()
            ), f"dim_mapping values must be 0, 1, or 2, got {list(dim_mapping.values())}"
            self.dim_mapping = dim_mapping

        self.src_grid_broadcast_pg = None
        self.dest_grid_broadcast_pg = None

        src_grid_broadcast_ranks_list = self.get_boundary_pp_stage_ranks(self.src_grid, is_src=True)
        dest_grid_broadcast_ranks_list = self.get_boundary_pp_stage_ranks(
            self.dest_grid, is_src=False
        )

        self.src_grid_broadcast_ranks = []
        if src_grid_broadcast_ranks_list:
            self.src_grid_broadcast_pg = self._get_or_create_broadcast_pg(
                src_grid_broadcast_ranks_list
            )
            self.src_grid_broadcast_ranks = next(
                (ranks for ranks in src_grid_broadcast_ranks_list if self.current_rank in ranks), []
            )

        self.dest_grid_broadcast_ranks = []
        if dest_grid_broadcast_ranks_list:
            self.dest_grid_broadcast_pg = self._get_or_create_broadcast_pg(
                dest_grid_broadcast_ranks_list
            )
            self.dest_grid_broadcast_ranks = next(
                (ranks for ranks in dest_grid_broadcast_ranks_list if self.current_rank in ranks),
                [],
            )

        self.src_tp_leaders, self.src_local_leader_rank = self.get_leader_rank(
            self.src_grid, is_src=True
        )
        self.dest_tp_leaders, self.dest_local_leader_rank = self.get_leader_rank(
            self.dest_grid, is_src=False
        )

        log_msg = (
            f"[Rank {self.current_rank}] "
            f"srcLeader={self.src_local_leader_rank} "
            f"destLeader={self.dest_local_leader_rank} "
            f"srcBroadcastGrpRanks={self.src_grid_broadcast_ranks} "
            f"destBroadcastGrpRanks={self.dest_grid_broadcast_ranks}"
        )
        logging.info(log_msg)

        self.build_comm_map(self.src_tp_leaders, self.dest_tp_leaders)
        dist.barrier()

    @classmethod
    def _get_or_create_broadcast_pg(cls, ranks_list: List[List[int]]):
        """Get or create a broadcast PG, caching to avoid duplicate NCCL communicators."""
        cache_key = str(sorted([tuple(r) for r in ranks_list]))
        if cache_key not in cls._broadcast_pg_cache:
            pg, _ = dist.new_subgroups_by_enumeration(ranks_list, backend='nccl')
            cls._broadcast_pg_cache[cache_key] = pg
        return cls._broadcast_pg_cache[cache_key]

    def get_leader_rank(self, grid: HyperCommGrid, is_src: bool) -> List[int]:
        """Get the leader rank for a given grid and direction.

        We elect leader rank for each dp replica, the first tp-cp rank in the group
        in the last pp stage (for src grid) or first pp stage (for dest grid) is the leader.
        """
        leader_ranks = []
        local_leader_rank = None
        # grid.gen_rank_enum(["tp", "cp", "pp"]) # vary tp & cp, but same dp
        # returns a list of sublists, each sublist is a group of ranks
        # that have different tp & cp & pp, same dp
        per_dp_replica_ranks = grid._gen_rank_enum([x for x in grid.dim_names if x != "dp"])
        if is_src:
            # Add rank from last pp stage
            ranks = []
            for group in per_dp_replica_ranks:
                if self.current_rank in group:
                    assert (
                        local_leader_rank is None
                    ), "only one local leader rank is allowed per dp replica"
                    local_leader_rank = group[-1]
                ranks.append(group[-1])
            leader_ranks.extend(ranks)
        else:
            # Add rank from first pp stage
            ranks = []
            for group in per_dp_replica_ranks:
                if self.current_rank in group:
                    assert (
                        local_leader_rank is None
                    ), "only one local leader rank is allowed per dp replica"
                    local_leader_rank = group[0]
                ranks.append(group[0])
            leader_ranks.extend(ranks)
        return leader_ranks, local_leader_rank

    def get_boundary_pp_stage_ranks(self, grid: HyperCommGrid, is_src: bool):
        """Get TP-CP ranks at boundary PP stage for each DP replica.

        Returns ranks at the last PP stage (if src) or first PP stage (if dest)
        for each DP dimension, ordered by DP dimension.
        """

        # Get tp-cp rank enumeration (each list has same dp and pp, different tp and cp)
        tpcp_rank_lists = grid._gen_rank_enum(['tp', 'cp'])
        pp_size = grid.shape[grid.dim_names.index('pp')]

        # Determine boundary pp stage
        boundary_pp_stage = pp_size - 1 if is_src else 0

        boundary_pp_stage_ranks = []

        for rank_list in tpcp_rank_lists:
            # We can check any rank in the list since they all have the same pp coordinate
            if not rank_list:
                continue
            sample_rank = rank_list[0]
            # Calculate rank coordinates
            rank_coords = []
            temp_rank = sample_rank - grid.rank_offset

            # Extract coordinates in the original dimension order
            for dim_size in grid.shape:
                rank_coords.append(temp_rank % dim_size)
                temp_rank //= dim_size

            pp_coord = rank_coords[grid.dim_names.index('pp')]

            if pp_coord == boundary_pp_stage:
                # This rank list is at the boundary pp stage, add all ranks from this list
                boundary_pp_stage_ranks.append(rank_list)

        return boundary_pp_stage_ranks

    def is_current_rank_in_grid(self, grid: HyperCommGrid) -> bool:
        """Check if the current rank is in the grid."""
        return grid.rank_offset <= self.current_rank < (grid.rank_offset + grid.size)

    def build_comm_map(self, src_tp_leaders: List[int], dest_tp_leaders: List[int]):
        """Get src/dest tp leaders and populate comm_map for each rank.

        This method analyzes the source and destination grids to determine
        which ranks need to send/receive data and builds the communication
        schedule accordingly.
        """
        # Ensure that the number of leaders can be evenly divided
        src_count = len(src_tp_leaders)
        dest_count = len(dest_tp_leaders)

        if src_count % dest_count != 0 and dest_count % src_count != 0:
            raise ValueError(
                f"Source TP leaders count ({src_count}) and destination TP leaders count "
                f"({dest_count}) must be evenly divisible. One must be a multiple of the other."
            )
        # Get all ranks in source and destination grids
        src_all_ranks = list(
            range(self.src_grid.rank_offset, self.src_grid.rank_offset + self.src_grid.size)
        )
        dest_all_ranks = list(
            range(self.dest_grid.rank_offset, self.dest_grid.rank_offset + self.dest_grid.size)
        )

        all_ranks = src_all_ranks + dest_all_ranks

        # Initialize all ranks as MEMBER by default
        for rank in all_ranks:
            self.comm_map[rank] = RankCommInfo(role=CommRole.MEMBER)

        scale_factor = int(src_count / dest_count)
        if scale_factor > 1:
            # Fan-in: multiple source leaders send to fewer destination leaders
            for i, dest_rank in enumerate(dest_tp_leaders):
                # Each destination rank receives from scale_factor source ranks
                src_ranks = src_tp_leaders[i * scale_factor : (i + 1) * scale_factor]

                # Set up senders
                for src_rank in src_ranks:
                    self.comm_map[src_rank] = RankCommInfo(
                        role=CommRole.SENDER, send_to_ranks=[dest_rank]
                    )

                # Set up receiver
                self.comm_map[dest_rank] = RankCommInfo(
                    role=CommRole.RECEIVER, recv_from_ranks=src_ranks
                )
        else:
            # Fan-out: fewer source leaders send to more destination leaders
            scale_factor = int(dest_count / src_count)
            for i, src_rank in enumerate(src_tp_leaders):
                # Each source rank sends to scale_factor destination ranks
                dest_ranks = dest_tp_leaders[i * scale_factor : (i + 1) * scale_factor]

                # Set up sender
                self.comm_map[src_rank] = RankCommInfo(
                    role=CommRole.SENDER, send_to_ranks=dest_ranks
                )

                # Set up receivers
                for dest_rank in dest_ranks:
                    self.comm_map[dest_rank] = RankCommInfo(
                        role=CommRole.RECEIVER, recv_from_ranks=[src_rank]
                    )

    def send_forward(self, tensor_to_send: torch.Tensor):
        """Send forward activation tensor.

        Args:
            tensor_to_send: The tensor to send to the destination grid
        """
        if not self.is_current_rank_in_grid(self.src_grid):
            raise ValueError(
                f"[Bridge Communicator] [send_forward] Rank {self.current_rank} "
                "is not in the source grid."
            )

        rank_info = self.comm_map.get(self.current_rank)
        assert rank_info is not None, f"Rank {self.current_rank} is not in the comm map"

        if rank_info.role == CommRole.SENDER:
            # Send splits to destination ranks
            num_sends = len(rank_info.send_to_ranks)
            if num_sends > 0:
                tensor_splits = self._split_tensor_at_batch_dim(tensor_to_send, num_sends)
                self._communicate_shapes(tensor_to_send_next=tensor_splits[0])
                for dest_rank, tensor_split in zip(rank_info.send_to_ranks, tensor_splits):
                    logging.debug(
                        f"[Bridge Comunicator] [send_forward] Rank {self.current_rank} "
                        f"send to rank {dest_rank}"
                    )
                    dist.send(tensor_split, dst=dest_rank)

    def recv_forward(self) -> torch.Tensor:
        """Receive forward activation tensor.

        Args:
            tensor_shape: Expected tensor shape (None if using shape communication)

        Returns:
            torch.Tensor: The received activation tensor
        """
        # receive forward only gets called on the dest grid
        if not self.is_current_rank_in_grid(self.dest_grid):
            raise ValueError(
                f"[Bridge Communicator] [receive_forward] Rank {self.current_rank} "
                "is not in the destination grid."
            )

        rank_info = self.comm_map.get(self.current_rank)
        assert rank_info is not None, f"Rank {self.current_rank} is not in the comm map"
        logging.debug(
            f"[Bridge Communicator] [receive_forward] Rank {self.current_rank} "
            f"[src - {self.src_module_name}] [dest - {self.dest_module_name}] "
            f"rank_info: {rank_info}"
        )
        if rank_info.role == CommRole.RECEIVER:
            assert (
                self.current_rank == self.dest_local_leader_rank
            ), f"Rank {self.current_rank} is not the leader rank"
            # p2p call to receive the tensor
            recv_forward_shapes, recv_grad_shapes = self._communicate_shapes(recv_prev=True)
            logging.debug(
                f"[Bridge Communicator] [receive_forward] Rank {self.current_rank} "
                f"received forward shapes {recv_forward_shapes} and grad shapes {recv_grad_shapes}"
            )
            received_tensors_list = []
            for src_rank, shape in zip(rank_info.recv_from_ranks, recv_forward_shapes):
                tensor_to_recv = torch.empty(
                    shape,
                    device=torch.cuda.current_device(),
                    dtype=self.comm_dtype,
                    requires_grad=True,
                )
                dist.recv(tensor_to_recv, src=src_rank)
                logging.debug(
                    f"[Bridge Communicator] [receive_forward] Rank {self.current_rank} "
                    f"received tensor from src rank {src_rank} "
                    f"shape {tensor_to_recv.shape} sum {tensor_to_recv.sum()}"
                )
                received_tensors_list.append(tensor_to_recv)
            aggregated_tensor = torch.cat(received_tensors_list, dim=self.dim_mapping['b'])
            logging.debug(
                f"[Bridge Communicator] [receive_forward] Rank {self.current_rank} "
                f"broadcasting tensor {aggregated_tensor.shape} sum {aggregated_tensor.sum()}"
            )

            # Step 1: broadcast its shape so receivers can allocate
            shape_tensor = torch.tensor(
                aggregated_tensor.shape, device=aggregated_tensor.device, dtype=torch.int64
            )
            dist.broadcast(shape_tensor, src=self.current_rank, group=self.dest_grid_broadcast_pg)

            # Step 2: broadcast the actual tensor
            dist.broadcast(
                aggregated_tensor, src=self.current_rank, group=self.dest_grid_broadcast_pg
            )

            return aggregated_tensor

        elif (
            rank_info.role == CommRole.MEMBER
            and self.current_rank in self.dest_grid_broadcast_ranks
        ):
            # Non-leader rank - participate in broadcast
            shape_tensor = torch.empty((3), device=torch.cuda.current_device(), dtype=torch.int64)
            dist.broadcast(
                shape_tensor, src=self.dest_local_leader_rank, group=self.dest_grid_broadcast_pg
            )

            received_shape = tuple(shape_tensor.tolist())
            received_tensor = torch.empty(
                received_shape,
                device=torch.cuda.current_device(),
                dtype=self.comm_dtype,
                requires_grad=True,
            )

            # Receive the full tensor via broadcast
            dist.broadcast(
                received_tensor, src=self.dest_local_leader_rank, group=self.dest_grid_broadcast_pg
            )

            logging.debug(
                f"[Bridge Communicator] [receive_forward] Rank {self.current_rank} "
                f"received tensor via broadcast, shape {received_tensor.shape}"
            )
            return received_tensor

    def send_backward(self, grad_tensor: torch.Tensor):
        """Send backward gradient tensor.

        Note: Gradient senders are activation 'RECEIVERS'

        Args:
            grad_tensor: The gradient tensor to send back
        """
        if not self.is_current_rank_in_grid(self.dest_grid):
            raise ValueError(
                f"[Bridge Communicator] [send_backward] Rank {self.current_rank} "
                "is not in the destination grid."
            )

        rank_info = self.comm_map.get(self.current_rank)
        assert rank_info is not None, f"Rank {self.current_rank} is not in the comm map"

        if rank_info.role == CommRole.RECEIVER:
            assert (
                self.current_rank == self.dest_local_leader_rank
            ), f"Rank {self.current_rank} is not the leader rank"
            # Send gradients back to source ranks
            num_receives = len(rank_info.recv_from_ranks)
            tensor_splits = self._split_tensor_at_batch_dim(grad_tensor, num_receives)
            self._communicate_shapes(tensor_to_send_prev=tensor_splits[0])
            if num_receives > 0:
                for src_rank, tensor_split in zip(rank_info.recv_from_ranks, tensor_splits):
                    # Send the gradient split back to the source rank
                    logging.debug(
                        f"[Bridge Communicator] [send_backward] Rank {self.current_rank} "
                        f"sending gradient to src rank {src_rank} "
                        f"shape {tensor_split.shape} sum {tensor_split.sum()}"
                    )
                    dist.send(tensor_split, dst=src_rank)

    def recv_backward(self) -> torch.Tensor:
        """Receive backward gradient tensor.

        Note: Gradient receivers are activation 'SENDERS'

        Args:
            tensor_shape: Expected gradient tensor shape

        Returns:
            torch.Tensor: The received gradient tensor
        """
        # receive backward only gets called on the src grid
        if not self.is_current_rank_in_grid(self.src_grid):
            raise ValueError(
                f"[Bridge Communicator] [receive_backward] Rank {self.current_rank} "
                "is not in the source grid."
            )

        rank_info = self.comm_map.get(self.current_rank)
        assert rank_info is not None, f"Rank {self.current_rank} is not in the comm map"

        if rank_info.role == CommRole.SENDER:
            assert (
                self.current_rank == self.src_local_leader_rank
            ), f"Rank {self.current_rank} is not the leader rank"
            recv_forward_shapes, recv_grad_shapes = self._communicate_shapes(recv_next=True)
            logging.debug(
                f"[Bridge Communicator] [receive_backward] Rank {self.current_rank} "
                f"received forward shapes {recv_forward_shapes} and grad shapes {recv_grad_shapes}"
            )
            # Receive gradient tensors from destination ranks
            received_gradients_list = []
            for dest_rank, grad_shape in zip(rank_info.send_to_ranks, recv_grad_shapes):
                # The destination rank that we sent to will send us gradients back
                grad_tensor = torch.empty(
                    grad_shape, device=torch.cuda.current_device(), dtype=self.comm_dtype
                )
                dist.recv(grad_tensor, src=dest_rank)
                logging.debug(
                    f"[Bridge Communicator] [receive_backward] Rank {self.current_rank} "
                    f"received gradient from dest rank {dest_rank} "
                    f"shape {grad_tensor.shape} sum {grad_tensor.sum()}"
                )
                received_gradients_list.append(grad_tensor)

            # Concatenate received gradients
            aggregated_gradient = torch.cat(received_gradients_list, dim=self.dim_mapping['b'])
            logging.debug(
                f"[Bridge Communicator] [receive_backward] Rank {self.current_rank} "
                f"agg grad shape {aggregated_gradient.shape} sum {aggregated_gradient.sum()}"
            )

            shape_tensor = torch.tensor(
                aggregated_gradient.shape, device=torch.cuda.current_device(), dtype=torch.int64
            )
            dist.broadcast(shape_tensor, src=self.current_rank, group=self.src_grid_broadcast_pg)

            # Scatter the tensors to all ranks in the group
            dist.broadcast(
                aggregated_gradient, src=self.current_rank, group=self.src_grid_broadcast_pg
            )
            return aggregated_gradient

        elif (
            rank_info.role == CommRole.MEMBER and self.current_rank in self.src_grid_broadcast_ranks
        ):
            # Non-leader rank - participate in gather for gradients
            # Receive broadcasted tensor shape from leader rank
            shape_tensor = torch.empty((3), device=torch.cuda.current_device(), dtype=torch.int64)
            dist.broadcast(
                shape_tensor, src=self.src_local_leader_rank, group=self.src_grid_broadcast_pg
            )

            logging.debug(
                f"[Bridge Communicator] [receive_backward] Rank {self.current_rank} "
                f"received shape tensor {shape_tensor}"
            )
            received_shape = tuple(shape_tensor.tolist())
            received_gradient = torch.empty(
                received_shape, device=torch.cuda.current_device(), dtype=self.comm_dtype
            )

            dist.broadcast(
                received_gradient, src=self.src_local_leader_rank, group=self.src_grid_broadcast_pg
            )
            logging.debug(
                f"[Bridge Communicator] [receive_backward] Rank {self.current_rank} "
                f"received gradient from scatter operation, shape {received_gradient.shape}"
            )
            return received_gradient

    def send_forward_recv_backward(
        self, input_tensor: torch.Tensor, grad_shape: Optional[Tuple[int, ...]] = None
    ) -> torch.Tensor:
        """Combined operation: send forward activation and receive backward gradient.

        Args:
            input_tensor: The tensor to send forward
            grad_shape: Expected gradient tensor shape

        Returns:
            torch.Tensor: The received gradient tensor
        """
        if not self.is_current_rank_in_grid(self.src_grid):
            raise ValueError(
                f"Rank {self.current_rank} is not in the source grid. "
                "send_forward_recv_backward is only allowed on src grid"
            )

        rank_info = self.comm_map.get(self.current_rank)
        assert rank_info is not None, f"Rank {self.current_rank} is not in the comm map"
        logging.debug(
            f"[Bridge Communicator] [send_forward_recv_backward] Rank {self.current_rank} "
            f"[src - {self.src_module_name}] [dest - {self.dest_module_name}] "
            f"rank_info: {rank_info}"
        )
        if rank_info.role == CommRole.SENDER:
            assert (
                self.current_rank == self.src_local_leader_rank
            ), f"Rank {self.current_rank} is not the leader rank"

            num_sends = len(rank_info.send_to_ranks)
            activation_splits = self._split_tensor_at_batch_dim(input_tensor, num_sends)
            # Communicate shapes for both directions (send forward, receive backward)
            recv_forward_shapes, recv_grad_shapes = self._communicate_shapes(
                tensor_to_send_next=activation_splits[0], recv_next=True
            )
            logging.debug(
                f"[Bridge Communicator] [send_forward_recv_backward] Rank {self.current_rank} "
                f"received forward shapes {recv_forward_shapes} and grad shapes {recv_grad_shapes}"
            )

            # Prepare simultaneous send/receive operations
            if num_sends > 0:
                # Prepare gradient receive tensors
                received_gradients_list = []
                for i, recv_grad_shape in enumerate(recv_grad_shapes):
                    grad_tensor = torch.empty(
                        recv_grad_shape, device=torch.cuda.current_device(), dtype=self.comm_dtype
                    )
                    received_gradients_list.append(grad_tensor)

                # Create batch P2P operations for simultaneous send/receive
                ops = []
                for dest_rank, activation_split, grad_tensor in zip(
                    rank_info.send_to_ranks, activation_splits, received_gradients_list
                ):
                    # Send activation
                    ops.append(
                        torch.distributed.P2POp(
                            torch.distributed.isend, activation_split, dest_rank
                        )
                    )
                    # Receive gradient
                    ops.append(
                        torch.distributed.P2POp(torch.distributed.irecv, grad_tensor, dest_rank)
                    )

                logging.debug(
                    f"[Bridge Communicator] [send_forward_recv_backward] Rank {self.current_rank} "
                    f"executing {len(ops)} simultaneous P2P operations"
                )
                reqs = torch.distributed.batch_isend_irecv(ops)
                for req in reqs:
                    req.wait()

                # Concatenate received gradients
                aggregated_gradient = torch.cat(received_gradients_list, dim=self.dim_mapping['b'])
                logging.debug(
                    f"[Bridge Communicator] [send_forward_recv_backward] Rank {self.current_rank} "
                    f"agg grad shape {aggregated_gradient.shape} sum {aggregated_gradient.sum()}"
                )
                # Broadcast tensor shape to all ranks in scatter_pg
                tensor_shape_to_broadcast = aggregated_gradient.shape
                shape_tensor = torch.tensor(
                    tensor_shape_to_broadcast, device=torch.cuda.current_device(), dtype=torch.int64
                )
                dist.broadcast(
                    shape_tensor, src=self.current_rank, group=self.src_grid_broadcast_pg
                )

                # Broadcast the tensors to all ranks in the group
                dist.broadcast(
                    aggregated_gradient, src=self.current_rank, group=self.src_grid_broadcast_pg
                )

                return aggregated_gradient

        elif (
            rank_info.role == CommRole.MEMBER and self.current_rank in self.src_grid_broadcast_ranks
        ):
            # participate in both gather for gradients
            # Receive gradient from leader using broadcast
            shape_tensor = torch.empty((3), device=torch.cuda.current_device(), dtype=torch.int64)
            dist.broadcast(
                shape_tensor, src=self.src_local_leader_rank, group=self.src_grid_broadcast_pg
            )

            # Use the received shape to create tensor for broadcast
            received_shape = tuple(shape_tensor.tolist())
            received_gradient = torch.empty(
                received_shape, device=torch.cuda.current_device(), dtype=self.comm_dtype
            )
            dist.broadcast(
                received_gradient, src=self.src_local_leader_rank, group=self.src_grid_broadcast_pg
            )
            logging.debug(
                f"[Bridge Communicator] [send_forward_recv_backward] Rank {self.current_rank} "
                f"received gradient from broadcast, shape {received_gradient.shape}"
            )
            return received_gradient

    def send_backward_recv_forward(
        self, grad_tensor: torch.Tensor, forward_shape: Optional[Tuple[int, ...]] = None
    ) -> torch.Tensor:
        """Combined operation: send backward gradient and receive forward activation.

        Args:
            grad_tensor: The gradient tensor to send backward
            forward_shape: Expected forward tensor shape

        Returns:
            torch.Tensor: The received activation tensor
        """
        if not self.is_current_rank_in_grid(self.dest_grid):
            raise ValueError(
                f"Rank {self.current_rank} is not in the destination grid. "
                "send_backward_recv_forward is only allowed on dest grid"
            )

        rank_info = self.comm_map.get(self.current_rank)
        assert rank_info is not None, f"Rank {self.current_rank} is not in the comm map"

        if rank_info.role == CommRole.RECEIVER:
            assert (
                self.current_rank == self.dest_local_leader_rank
            ), f"Rank {self.current_rank} is not the leader rank"

            num_receives = len(rank_info.recv_from_ranks)
            gradient_splits = self._split_tensor_at_batch_dim(grad_tensor, num_receives)
            # Communicate shapes for both directions (send backward, receive forward)
            recv_forward_shapes, recv_grad_shapes = self._communicate_shapes(
                tensor_to_send_prev=gradient_splits[0], recv_prev=True
            )
            logging.debug(
                f"[Bridge Communicator] [send_backward_recv_backward] Rank {self.current_rank} "
                f"received forward shapes {recv_forward_shapes} and grad shapes {recv_grad_shapes}"
            )

            # Prepare simultaneous send/receive operations
            if num_receives > 0:
                # Prepare activation receive tensors
                received_activations_list = []
                for i, recv_forward_shape in enumerate(recv_forward_shapes):
                    activation_tensor = torch.empty(
                        recv_forward_shape,
                        device=torch.cuda.current_device(),
                        dtype=self.comm_dtype,
                        requires_grad=True,
                    )
                    received_activations_list.append(activation_tensor)

                # Create batch P2P operations for simultaneous send/receive
                ops = []
                for src_rank, gradient_split, activation_tensor in zip(
                    rank_info.recv_from_ranks, gradient_splits, received_activations_list
                ):
                    # Send gradient
                    ops.append(
                        torch.distributed.P2POp(torch.distributed.isend, gradient_split, src_rank)
                    )

                    # Receive activation
                    ops.append(
                        torch.distributed.P2POp(
                            torch.distributed.irecv, activation_tensor, src_rank
                        )
                    )

                # Execute all operations simultaneously
                logging.debug(
                    f"[Bridge Communicator] [send_backward_recv_backward] Rank {self.current_rank} "
                    f"executing {len(ops)} simultaneous P2P operations"
                )
                reqs = torch.distributed.batch_isend_irecv(ops)
                for req in reqs:
                    req.wait()

                # Concatenate received activations
                aggregated_activation = torch.cat(
                    received_activations_list, dim=self.dim_mapping['b']
                )
                logging.debug(
                    f"[Bridge Communicator] [send_backward_recv_forward] Rank {self.current_rank} "
                    f"agg act shape {aggregated_activation.shape} sum {aggregated_activation.sum()}"
                )

                # Broadcast tensor shape to all ranks in scatter_pg
                tensor_shape_to_scatter = aggregated_activation.shape
                shape_tensor = torch.tensor(
                    tensor_shape_to_scatter, device=torch.cuda.current_device(), dtype=torch.int64
                )
                dist.broadcast(
                    shape_tensor, src=self.current_rank, group=self.dest_grid_broadcast_pg
                )

                # Scatter the tensors to all ranks in the group
                dist.broadcast(
                    aggregated_activation, src=self.current_rank, group=self.dest_grid_broadcast_pg
                )
                return aggregated_activation

        elif (
            rank_info.role == CommRole.MEMBER
            and self.current_rank in self.dest_grid_broadcast_ranks
        ):
            shape_tensor = torch.empty((3), device=torch.cuda.current_device(), dtype=torch.int64)
            dist.broadcast(
                shape_tensor, src=self.dest_local_leader_rank, group=self.dest_grid_broadcast_pg
            )

            # Use the received shape to create tensor for scatter operation
            received_shape = tuple(shape_tensor.tolist())
            received_activation = torch.empty(
                received_shape,
                device=torch.cuda.current_device(),
                dtype=self.comm_dtype,
                requires_grad=True,
            )
            dist.broadcast(
                received_activation,
                src=self.dest_local_leader_rank,
                group=self.dest_grid_broadcast_pg,
            )
            logging.debug(
                f"[Bridge Communicator] [send_backward_recv_backward] Rank {self.current_rank}  "
                f"received activation from scatter operation, shape {received_activation.shape}"
            )
            return received_activation

    def _communicate_shapes(
        self,
        tensor_to_send_next: Optional[torch.Tensor] = None,
        recv_next: bool = False,
        recv_prev: bool = False,
        tensor_to_send_prev: Optional[torch.Tensor] = None,
    ) -> Tuple[List[Tuple[int, ...]], List[Tuple[int, ...]]]:
        """Communicate tensor shapes between sender and receiver ranks in the bridge.

        This is used to communicate tensor shapes before actual tensor communication
        when dealing with variable sequence lengths or dynamic shapes.

        Args:
            tensor_to_send_next: The tensor to send to the next rank (None if not sending)
            tensor_to_send_prev: The tensor to send to the previous rank (None if not sending)
            recv_next: Whether to receive from the next rank (None if not receiving)
            recv_prev: Whether to receive from the previous rank (None if not receiving)

        Returns:
            Tuple containing:
            - List of forward shapes that will be received (empty if not a receiver)
            - List of gradient shapes that will be received (empty if not expecting gradients)
        """
        rank_info = self.comm_map.get(self.current_rank)
        if not rank_info or rank_info.role == CommRole.MEMBER:
            return [], []

        recv_forward_shapes = []
        recv_grad_shapes = []
        logging.debug(
            f"[Bridge Communicator] [communicate_shapes] Rank {self.current_rank} "
            f"is a {rank_info.role} and is running the shape communication"
        )
        # Collect all P2P operations for batch execution
        ops = []
        recv_forward_shape_tensors = []
        recv_grad_shape_tensors = []

        if rank_info.role == CommRole.SENDER:
            # Prepare send operations for forward shapes
            if tensor_to_send_next is not None:
                send_shape = tensor_to_send_next.shape
                send_shape_tensor = torch.tensor(
                    send_shape, device=torch.cuda.current_device(), dtype=torch.int64
                )
                # Add send operations for each destination
                for dest_rank in rank_info.send_to_ranks:
                    ops.append(
                        torch.distributed.P2POp(
                            torch.distributed.isend, send_shape_tensor, dest_rank
                        )
                    )

            # If expecting gradients back, prepare receive operations
            if recv_next:
                for dest_rank in rank_info.send_to_ranks:
                    grad_shape_tensor = torch.empty(
                        (3), device=torch.cuda.current_device(), dtype=torch.int64
                    )
                    recv_grad_shape_tensors.append(grad_shape_tensor)
                    ops.append(
                        torch.distributed.P2POp(
                            torch.distributed.irecv, grad_shape_tensor, dest_rank
                        )
                    )

        elif rank_info.role == CommRole.RECEIVER:
            # Prepare receive operations for forward shapes
            if recv_prev:
                for src_rank in rank_info.recv_from_ranks:
                    forward_shape_tensor = torch.empty(
                        (3), device=torch.cuda.current_device(), dtype=torch.int64
                    )
                    recv_forward_shape_tensors.append(forward_shape_tensor)
                    ops.append(
                        torch.distributed.P2POp(
                            torch.distributed.irecv, forward_shape_tensor, src_rank
                        )
                    )

            # If we need to send gradient shapes back, prepare send operations
            if tensor_to_send_prev is not None:

                grad_shape = tensor_to_send_prev.shape
                grad_shape_tensor = torch.tensor(
                    grad_shape, device=torch.cuda.current_device(), dtype=torch.int64
                )

                for src_rank in rank_info.recv_from_ranks:
                    ops.append(
                        torch.distributed.P2POp(
                            torch.distributed.isend, grad_shape_tensor, src_rank
                        )
                    )

        # Execute all operations in a single batch
        if ops:
            reqs = torch.distributed.batch_isend_irecv(ops)
            for req in reqs:
                req.wait()

        # Extract shapes from received tensors
        for forward_shape_tensor in recv_forward_shape_tensors:
            shape = forward_shape_tensor.tolist()
            recv_forward_shapes.append(tuple(shape))

        for grad_shape_tensor in recv_grad_shape_tensors:
            shape = grad_shape_tensor.tolist()
            recv_grad_shapes.append(tuple(shape))

        return recv_forward_shapes, recv_grad_shapes

    def _split_tensor_at_batch_dim(
        self, aggregated_tensor: torch.Tensor, num_splits: int
    ) -> List[torch.Tensor]:
        """Split an aggregated tensor into multiple tensors at the batch dimension.

        Args:
            aggregated_tensor: The tensor to split
            num_splits: The number of splits to create

        Returns:
            List of tensors split at the batch dimension
        """
        if num_splits <= 0:
            raise ValueError(f"num_splits must be positive, got {num_splits}")

        batch_dim = self.dim_mapping['b']
        splits = torch.tensor_split(aggregated_tensor, num_splits, dim=batch_dim)
        # PyTorch p2p requires the tensors to be contiguous
        return [split.contiguous() for split in splits]


================================================
FILE: megatron/core/pipeline_parallel/combined_1f1b.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import contextlib
from contextlib import nullcontext
from typing import List, Union

import torch

from megatron.core.enums import Fp8Recipe
from megatron.core.fp8_utils import get_fp8_context
from megatron.core.pipeline_parallel.utils import AbstractSchedulePlan, ScheduleNode, set_streams
from megatron.core.utils import get_attr_wrapped_model

# Types
Shape = Union[List[int], torch.Size]


def combined_1f1b_schedule_for_no_pipelining(
    forward_step_func,
    data_iterator,
    model,
    num_microbatches,
    input_tensor,
    output_tensor_grad,
    forward_data_store,
    config,
    collect_non_loss_data,
    first_val_step,
    forward_only,
    no_sync_func,
    total_num_tokens,
    check_first_val_step,
):
    """Scheduler for 1f1b with no pipelining.

    This function schedules micro-batches in a way that the forward pass of Transformer layers
    for one micro-batch runs in parallel with the backward pass of another.
    Each layer's forward and backward operations are co-scheduled to maximize the overlap of
    their computations and communications.
    EP A2A in forward step is hidden by the attention/mlp computation in the backward step,
    and vice versa.
    Assuming we have 4 microbatches, the schedule is as follows:
    Phases 0: 1st microbatch forward
    Phases 1: 1st microbatch backward + 2nd microbatch forward
    Phases 2: 2nd microbatch backward + 3rd microbatch forward
    Phases 3: 3rd microbatch backward + 4th microbatch forward
    Phases 4: 4th microbatch backward
    """

    set_streams()
    # The forward step for the first microbatch is executed alone, no a2a overlapping
    output_tensor, num_tokens, _ = combined_forward_backward_step(
        forward_step_func,
        data_iterator,
        model,  # f_model
        num_microbatches,
        input_tensor,
        forward_data_store,
        None,  # b_model
        input_tensor,
        None,  # b_output_tensor
        None,  # b_output_tensor_grad
        config,
        collect_non_loss_data=collect_non_loss_data,
        checkpoint_activations_microbatch=None,
        is_first_microbatch=check_first_val_step(True),
        current_microbatch=0,
    )
    # The forward step is executed in parallel with the backward step of another microbatch
    # EP A2A in forward step is hidden by the attention/mlp computation in the backward step
    # Vice versa.
    with no_sync_func():
        for i in range(num_microbatches - 1):
            total_num_tokens += num_tokens
            output_tensor, num_tokens, _ = combined_forward_backward_step(
                forward_step_func,
                data_iterator,
                model,  # f_model
                num_microbatches,
                input_tensor,
                forward_data_store,
                model,  # b_model
                input_tensor,  # b_input_tensor
                output_tensor,  # b_output_tensor
                output_tensor_grad,  # b_output_tensor_grad
                config,
                collect_non_loss_data=collect_non_loss_data,
                checkpoint_activations_microbatch=None,
                is_first_microbatch=check_first_val_step((i + 1) == 0),
                current_microbatch=(i + 1),
            )
    total_num_tokens += num_tokens
    # The backward step for the last microbatch is executed alone, no a2a overlapping
    # Run computation for last microbatch out of context handler (want to synchronize gradients).
    output_tensor, num_tokens, _ = combined_forward_backward_step(
        forward_step_func,
        data_iterator,
        None,  # f_model
        num_microbatches,
        input_tensor,
        forward_data_store,
        model,  # b_model
        input_tensor,  # b_input_tensor
        output_tensor,  # b_output_tensor
        output_tensor_grad,  # b_output_tensor_grad
        config,
    )
    return forward_data_store, total_num_tokens


def combined_1f1b_schedule_for_interleaved_pipelining(
    config,
    forward_step_func,
    data_iterator,
    model,
    num_microbatches,
    forward_data_store,
    forward_step_helper_preprocess,
    forward_step_helper_postprocess,
    backward_step_helper_preprocess,
    backward_step_helper_postprocess,
    get_microbatch_id_in_model_chunk,
    get_model_chunk_id,
    check_first_val_step,
    is_first_microbatch_for_model_chunk,
    collect_non_loss_data,
    f_virtual_microbatch_id=None,
    b_virtual_microbatch_id=None,
    pre_forward=None,
    pre_backward=None,
    post_forward=None,
    post_backward=None,
):
    """Helper method to run combined forward and backward step for A2A communication hiding.
    This method merges the functionality of `forward_step_helper` and `backward_step_helper` and
    eventually calls `combined_forward_backward_step` method defined in `combined_1f1b.py`.
    This method is called only if `overlap_moe_expert_parallel_comm` is true.

    Args:
        The arguments could be categorized into 2 groups:
        - Common arguments
          - f_virtual_microbatch_id, b_virtual_microbatch_id,
        - Arguments for combined_forward_backward_step()
          - config, forward_step_func, data_iterator, model, num_microbatches, forward_data_store
          - check_first_val_step, is_first_microbatch_for_model_chunk, collect_non_loss_data
          - pre_forward, pre_backward, post_forward, post_backward
        - Callables for the forward_step_helper() and backward_step_helper()
          - forward_step_helper_preprocess, forward_step_helper_postprocess
          - backward_step_helper_preprocess, backward_step_helper_postprocess
          - get_microbatch_id_in_model_chunk, get_model_chunk_id

    Returns:
        output_tensor (Tensor or list[Tensor]): The output object(s) from the forward step.
        input_tensor_grad (Tensor): The grad of the input tensor.

    Descriptions:
        This method merges the forward_step_helper() and backward_step_helper() in schedules.py.
        Assuming that:
            def forward_step_helper():
                # forward_step_helper_preprocess()
                # forward_step()
                # forward_step_helper_postprocess()
            def backward_step_helper():
                # backward_step_helper_preprocess()
                # backward_step()
                # backward_step_helper_postprocess()
        Then the combined_1f1b_schedule_for_interleaved_pipelining() method will be:
            def combined_1f1b_schedule_for_interleaved_pipelining():
                # forward_step_helper_preprocess()
                # backward_step_helper_preprocess()
                # combined_forward_backward_step() // merged forward_step() and backward_step()
                # forward_step_helper_postprocess()
                # backward_step_helper_postprocess()
    """

    set_streams()
    # forward prepare
    f_model_chunk_id = None
    f_microbatch_id = None
    input_tensor = None
    if f_virtual_microbatch_id is not None:
        f_microbatch_id = get_microbatch_id_in_model_chunk(f_virtual_microbatch_id, forward=True)
    if f_virtual_microbatch_id is not None:
        f_model_chunk_id = get_model_chunk_id(f_virtual_microbatch_id, forward=True)
        input_tensor = forward_step_helper_preprocess(
            f_virtual_microbatch_id, f_model_chunk_id, f_microbatch_id
        )
    # backward prepare
    b_model_chunk_id = None
    b_input_tensor = None
    b_output_tensor = None
    b_output_tensor_grad = None
    if b_virtual_microbatch_id is not None:
        b_model_chunk_id = get_model_chunk_id(b_virtual_microbatch_id, forward=False)
        b_input_tensor, b_output_tensor, b_output_tensor_grad = backward_step_helper_preprocess(
            b_virtual_microbatch_id, b_model_chunk_id
        )
    # Call combined forward and backward step to overlap the communication and computation
    output_tensor, num_tokens, input_tensor_grad = combined_forward_backward_step(
        forward_step_func,
        data_iterator[f_model_chunk_id] if f_model_chunk_id is not None else None,
        model[f_model_chunk_id] if f_model_chunk_id is not None else None,
        num_microbatches,
        input_tensor,
        forward_data_store,
        model[b_model_chunk_id] if b_model_chunk_id is not None else None,
        b_input_tensor,
        b_output_tensor,
        b_output_tensor_grad,
        config,
        f_model_chunk_id=f_model_chunk_id,
        pre_forward=pre_forward,
        pre_backward=pre_backward,
        post_forward=post_forward,
        post_backward=post_backward,
        collect_non_loss_data=collect_non_loss_data,
        checkpoint_activations_microbatch=None,
        is_first_microbatch=check_first_val_step(
            is_first_microbatch_for_model_chunk(f_virtual_microbatch_id)
            if f_virtual_microbatch_id is not None
            else None
        ),
        current_microbatch=f_microbatch_id,
    )
    # forward post process
    if f_model_chunk_id is not None:
        forward_step_helper_postprocess(f_model_chunk_id, output_tensor, num_tokens)
    # backward post process
    if b_model_chunk_id:
        # The same as the backward_step_helper
        backward_step_helper_postprocess(b_virtual_microbatch_id)
        if input_tensor is not None:
            assert input_tensor_grad is not None
    return output_tensor, input_tensor_grad


def combined_forward_backward_step(
    forward_step_func,
    data_iterator,
    f_model,
    num_microbatches,
    input_tensor,
    forward_data_store,
    b_model,
    b_input_tensor,
    b_output_tensor,
    b_output_tensor_grad,
    config,
    f_model_chunk_id=None,
    pre_forward=None,
    pre_backward=None,
    post_forward=None,
    post_backward=None,
    collect_non_loss_data=False,
    checkpoint_activations_microbatch=None,
    is_first_microbatch=False,
    current_microbatch=None,
    encoder_decoder_xattn=False,
):
    """Merged forward and backward step for combined 1f1b scheduler.

    Args:
        Need to accept the argument of both forward_step() and backward_step().
        forward_step_func (callable): A function returning a forward schedule plan which is
            an input of schedule_chunk_1f1b function.

        Only exists in 1f1b steady state with p2p overlap.
            pre_forward (callable): The function to call before the forward_step.
            pre_backward (callable): The function to call before the backward_step.
            post_forward (callable): The function to call after the forward_step.
            post_backward (callable): The function to call after the backward_step.

    Returns:
        forward_output_tensor (Tensor or list[Tensor]): The output object(s) from the forward step.
        forward_num_tokens (Tensor): The number of tokens.
        backward_input_tensor_grad (Tensor): The grad of the input tensor.

    Descriptions:
        This method merges the forward_step() and backward_step() methods in the schedules.py file.
        Assuming that:
            def forward_step():
                # forward_preprocess()
                # forward_compute()
                # forward_postprocess()
            def backward_step():
                # backward_preprocess()
                # backward_compute()
                # backward_postprocess()
        Then the forward_backward_step() method will be:
            def forward_backward_step():
                # forward_preprocess() // the same as the forward_step()
                # GENERATE f_schedule_plan // schedule happens in schedule_chunk_1f1b()
                # backward_preprocess() // the same as the backward_step()
                # COMBINED_FORWARD_BACKWARD_COMPUTE() // by calling schedule_chunk_1f1b()
                # forward_postprocess() // the same as the forward_step()
                # backward_postprocess() // the same as the backward_step()
    """
    assert (
        checkpoint_activations_microbatch is None
    ), "checkpoint_activations_microbatch is not supported for overlap_moe_expert_parallel_comm"

    from .schedules import set_current_microbatch

    if f_model is not None and config.timers is not None:
        config.timers('forward-compute', log_level=2).start()

    if config.enable_autocast:
        context_manager = torch.autocast("cuda", dtype=config.autocast_dtype)
    else:
        context_manager = contextlib.nullcontext()

    # forward preprocess, the same as the forward_step()
    unwrap_output_tensor = False
    f_schedule_plan = None
    if f_model is not None:
        if is_first_microbatch and hasattr(f_model, 'set_is_first_microbatch'):
            f_model.set_is_first_microbatch()
        if current_microbatch is not None:
            set_current_microbatch(f_model, current_microbatch)
        if not isinstance(input_tensor, list):
            input_tensor = [input_tensor]
            unwrap_output_tensor = True

        set_input_tensor = get_attr_wrapped_model(f_model, "set_input_tensor")
        set_input_tensor(input_tensor)

    # build the schedule plan and get loss function for forward step
    if f_model is not None:
        # GPTModel.build_schedule_plan(model_forward_inputs) is called in the forward_step_func.
        # The return value becomes (forward_schedule_plan, loss_function),
        # which is used to be (forward_output_tensor, loss_function).
        with context_manager:  # autocast context
            unwrapped_model = get_attr_wrapped_model(
                f_model, "build_schedule_plan", return_model_obj=True
            )
            from megatron.core.models.gpt.gpt_model import GPTModel

            assert isinstance(unwrapped_model, GPTModel), (
                "The final unwrapped model must be a GPTModel instance "
                "since only GPTModel is supported for EP A2A overlapping."
            )
            f_schedule_plan, loss_func = forward_step_func(
                data_iterator, unwrapped_model, return_schedule_plan=True
            )
            assert isinstance(
                f_schedule_plan, AbstractSchedulePlan
            ), "first output of forward_step_func must be one instance of AbstractSchedulePlan"

    # backward preprocess, the same as the backward_step()
    unwrap_input_tensor_grad = False
    b_schedule_plan = None
    if b_model is not None:
        # Retain the grad on the input_tensor.
        if not isinstance(b_input_tensor, list):
            b_input_tensor = [b_input_tensor]
            unwrap_input_tensor_grad = True
        for x in b_input_tensor:
            if x is not None:
                x.retain_grad()

        if not isinstance(b_output_tensor, list):
            b_output_tensor = [b_output_tensor]
        if not isinstance(b_output_tensor_grad, list):
            b_output_tensor_grad = [b_output_tensor_grad]

        # Get the schedule plan from the output tensor
        b_schedule_plan = b_output_tensor[0].schedule_plan
        b_output_tensor[0].schedule_plan = None
        # Get the loss function from the output tensor
        loss_node = b_output_tensor[0].loss_func
        b_output_tensor[0].loss_func = None

        if b_output_tensor_grad[0] is None:
            if config.grad_scale_func is not None:
                b_output_tensor[0] = config.grad_scale_func(b_output_tensor[0])
            # Backward pass for loss function
            torch.autograd.backward(b_output_tensor[0], grad_tensors=b_output_tensor_grad[0])
            b_output_tensor_grad[0] = loss_node.get_grad()

    # If fp8_recipe is delayed, wrap the entire pass with get_fp8_context(),
    # otherwise do nothing extra at the outer level
    # if we are using other fp8 recipes, then the context manager enter&exit are free
    # we can wrap fp8_context within the for loop over layers, so that we can fine-grained
    # control which layer will be fp8 or bf16
    use_outer_fp8_context = config.fp8 and config.fp8_recipe == Fp8Recipe.delayed
    outer_fp8_context = get_fp8_context(config) if use_outer_fp8_context else nullcontext()

    b_grad = b_output_tensor_grad[0] if b_model else None
    # combined forward and backward model chunk execution of two micro-batches
    with context_manager and outer_fp8_context:  # autocast context and delayed fp8 context
        # For GPT models, it calls common::TransformerModelChunkSchedulePlan.run(),
        output_tensor = type(f_schedule_plan or b_schedule_plan).run(
            f_schedule_plan,
            b_schedule_plan,
            b_grad=b_grad,
            pre_forward=pre_forward,
            pre_backward=pre_backward,
            post_forward=post_forward,
            post_backward=post_backward,
        )

    # forward post process
    num_tokens = None
    if f_model is not None:
        from megatron.core.pipeline_parallel.schedules import forward_step_calc_loss

        loss_node = ScheduleNode(
            loss_func, torch.cuda.current_stream(), f_schedule_plan.event, name="loss_func"
        )
        loss_func = loss_node.forward
        output_tensor, num_tokens = forward_step_calc_loss(
            f_model,
            output_tensor,
            loss_func,
            config,
            f_model_chunk_id,
            collect_non_loss_data,
            num_microbatches,
            forward_data_store,
        )
        # Set the schedule plan and loss function to the output tensor
        # This is used to get the schedule plan and loss function in the backward pass
        output_tensor.schedule_plan = f_schedule_plan
        output_tensor.loss_func = loss_node

        if not unwrap_output_tensor:
            output_tensor, num_tokens = [output_tensor], num_tokens

    # backward post process, the same as the backward_step()
    input_tensor_grad = None
    if b_model is not None:
        input_tensor_grad = [None]
        if b_input_tensor is not None:
            input_tensor_grad = []
            for x in b_input_tensor:
                if x is None:
                    input_tensor_grad.append(None)
                else:
                    input_tensor_grad.append(x.grad)

        if unwrap_input_tensor_grad:
            input_tensor_grad = input_tensor_grad[0]

    return output_tensor, num_tokens, input_tensor_grad


================================================
FILE: megatron/core/pipeline_parallel/fine_grained_activation_offload.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from collections import deque
from contextlib import nullcontext
from typing import Any, Dict, Tuple

import torch

# CPU offload implementation for pipeline parallelism
DEBUG = False
DEBUG_RANK = 0

from megatron.core.transformer.cuda_graphs import is_graph_capturing


def debug_rank(message):
    """Print debug message for a specific rank when DEBUG is enabled."""
    # pylint: disable=bad-builtin
    if not DEBUG:
        return
    assert torch.distributed.is_initialized()
    if torch.distributed.get_rank() == DEBUG_RANK:
        print(message)


def print_offload_summary_table(total_offload_bytes: Dict[str, int]):
    """
    Print an ASCII table summarizing offload bytes across all ranks.

    Gathers offload data from all ranks and prints a formatted table on rank 0,
    with rows representing ranks and columns representing groups.

    Args:
        total_offload_bytes: Dict mapping group names to offload bytes for this rank.
    """
    # pylint: disable=bad-builtin
    assert torch.distributed.is_initialized()
    rank = torch.distributed.get_rank()
    world_size = torch.distributed.get_world_size()

    # Gather all group names across ranks
    local_names = list(total_offload_bytes.keys())
    all_names_list = [None] * world_size
    torch.distributed.all_gather_object(all_names_list, local_names)
    all_group_names = sorted(set(name for names in all_names_list for name in names))

    # Gather offload bytes from all ranks: each rank sends a list of bytes per group
    local_bytes = [total_offload_bytes.get(name, 0) for name in all_group_names]
    all_bytes_list = [None] * world_size
    torch.distributed.all_gather_object(all_bytes_list, local_bytes)

    # Print ASCII table on rank 0
    if rank == 0:
        # Calculate column widths
        col_width = max(12, max((len(name) for name in all_group_names), default=8) + 2)
        rank_col_width = max(6, len(f"Rank {world_size - 1}") + 2)

        # Build header
        header = "Rank".ljust(rank_col_width)
        header += "".join(name.rjust(col_width) for name in all_group_names)
        header += "Total".rjust(col_width)
        separator = "-" * len(header)

        print("\n" + "=" * len(header))
        print("Activation Offload Summary (MB)".center(len(header)))
        print("=" * len(header))
        print(header)
        print(separator)

        # Build rows for each rank
        grand_total = 0
        col_totals = [0] * len(all_group_names)
        for r in range(world_size):
            row_bytes = all_bytes_list[r]
            row_total = sum(row_bytes)
            grand_total += row_total
            for i, b in enumerate(row_bytes):
                col_totals[i] += b
            row_str = f"Rank {r}".ljust(rank_col_width)
            for b in row_bytes:
                row_str += f"{b / (1024 * 1024):.2f}".rjust(col_width)
            row_str += f"{row_total / (1024 * 1024):.2f}".rjust(col_width)
            print(row_str)

        # Print totals row
        print(separator)
        totals_row = "Total".ljust(rank_col_width)
        for ct in col_totals:
            totals_row += f"{ct / (1024 * 1024):.2f}".rjust(col_width)
        totals_row += f"{grand_total / (1024 * 1024):.2f}".rjust(col_width)
        print(totals_row)
        print("=" * len(header) + "\n")

    torch.distributed.barrier()


class GPUTensorPool:
    """
    GPU memory pool for efficient allocation and deallocation of tensors.

    Features:
    - Supports multiple tensor shapes and dtypes, each with its own pool
    - Dynamic allocation: tensors are created on-demand during allocation
    - Efficient reuse: freed tensors are returned to the pool for reuse
    - Uses queue-based management for O(1) allocation and deallocation

    Example:
        pool = GPUTensorPool(device='cuda:0')
        tensor = pool.allocate((128, 512), dtype=torch.float32)
        # ... use tensor ...
        pool.free(tensor, (128, 512), dtype=torch.float32)
    """

    def __init__(self, device: str = 'cuda', pin_memory: bool = False):
        """
        Initialize GPU tensor pool.

        Args:
            device: GPU device, default 'cuda'
            pin_memory: Whether to use pinned memory (mainly for CPU tensors)
        """
        self.device = torch.device(device)
        self.pin_memory = pin_memory

        # Maintain a separate pool for each (shape, dtype) combination
        # Structure: {(shape, dtype): {'free': deque, 'all': list, 'allocated_count': int}}
        self._pools: Dict[Tuple, Dict[str, Any]] = {}

        # Statistics
        self._stats = {
            'total_allocated': 0,  # Total number of tensors ever allocated
            'current_in_use': 0,  # Number of tensors currently in use
            'allocation_requests': 0,  # Number of allocation requests
            'free_requests': 0,  # Number of free requests
            'pool_hits': 0,  # Number of times a tensor was reused from pool
            'pool_misses': 0,  # Number of times a new tensor was created
        }

        debug_rank("GPUTensorPool: Initialized with dynamic allocation")

    def _get_pool_key(self, shape: Tuple, dtype: torch.dtype) -> Tuple:
        """Generate a unique key for the pool based on shape and dtype."""
        return (shape, dtype)

    @staticmethod
    def _calculate_memory_size(shape: Tuple, dtype: torch.dtype) -> int:
        """Calculate memory size in bytes."""
        element_size = torch.tensor([], dtype=dtype).element_size()
        numel = 1
        for dim in shape:
            numel *= dim
        return numel * element_size

    def allocate(self, shape: Tuple, dtype: torch.dtype = torch.float32) -> torch.Tensor:
        """
        Allocate a tensor with the specified shape and dtype.

        Args:
            shape: Shape of the tensor
            dtype: Data type of the tensor, default torch.float32

        Returns:
            Allocated tensor
        """
        self._stats['allocation_requests'] += 1

        pool_key = self._get_pool_key(shape, dtype)

        # Create pool for this (shape, dtype) if it doesn't exist
        if pool_key not in self._pools:
            self._pools[pool_key] = {
                'free': deque(),  # Queue of available tensors
                'all': [],  # List of all tensors (for tracking)
                'allocated_count': 0,  # Number of allocated tensors
            }

        pool = self._pools[pool_key]

        # Try to reuse a tensor from the pool
        if len(pool['free']) > 0:
            tensor = pool['free'].popleft()
            self._stats['pool_hits'] += 1
            debug_rank(
                f"GPUTensorPool.allocate: Reused tensor from pool, "
                f"shape={shape}, dtype={dtype}, "
                f"remaining in pool={len(pool['free'])}"
            )
        else:
            # Allocate a new tensor
            tensor = torch.empty(shape, dtype=dtype, device=self.device, pin_memory=self.pin_memory)
            pool['all'].append(tensor)
            self._stats['total_allocated'] += 1
            self._stats['pool_misses'] += 1

            memory_mb = self._calculate_memory_size(shape, dtype) / (1024**2)
            debug_rank(
                f"GPUTensorPool.allocate: Created new tensor, "
                f"shape={shape}, dtype={dtype}, "
                f"memory={memory_mb:.2f} MB, "
                f"total_created={len(pool['all'])}"
            )

        pool['allocated_count'] += 1
        self._stats['current_in_use'] += 1

        return tensor

    def free(self, tensor: torch.Tensor):
        """
        Return a tensor to the pool for reuse.

        Args:
            tensor: Tensor to free

        Raises:
            ValueError: If tensor doesn't belong to this pool
        """
        self._stats['free_requests'] += 1

        shape = tensor.shape
        dtype = tensor.dtype

        pool_key = self._get_pool_key(shape, dtype)

        if pool_key not in self._pools:
            raise ValueError(
                f"No pool exists for shape={shape}, dtype={dtype}. "
                f"Available pools: {list(self._pools.keys())}"
            )

        pool = self._pools[pool_key]

        # Verify tensor belongs to this pool (use identity check, not value comparison)
        tensor_found = any(tensor is t for t in pool['all'])
        if not tensor_found:
            raise ValueError(
                f"Attempting to free a tensor that doesn't belong to this pool "
                f"(shape={shape}, dtype={dtype})"
            )

        # Return tensor to the free queue
        pool['free'].append(tensor)
        pool['allocated_count'] -= 1
        self._stats['current_in_use'] -= 1

        debug_rank(
            f"GPUTensorPool.free: shape={shape}, dtype={dtype}, "
            f"available in pool={len(pool['free'])}"
        )

    def get_pool_status(self, shape: Tuple = None, dtype: torch.dtype = None) -> Dict[str, Any]:
        """
        Get the status of the memory pool.

        Args:
            shape: If specified along with dtype, return status for that specific pool
            dtype: Data type (required if shape is specified)

        Returns:
            Dictionary containing status information
        """
        if shape is not None:
            if dtype is None:
                raise ValueError("dtype must be specified when shape is provided")

            pool_key = self._get_pool_key(shape, dtype)

            if pool_key not in self._pools:
                raise ValueError(f"No pool exists for shape={shape}, dtype={dtype}")

            pool = self._pools[pool_key]
            total_count = len(pool['all'])

            return {
                'shape': shape,
                'dtype': dtype,
                'total_count': total_count,
                'allocated_count': pool['allocated_count'],
                'free_count': len(pool['free']),
                'utilization': (
                    pool['allocated_count'] / total_count * 100 if total_count > 0 else 0
                ),
            }
        else:
            # Return status for all pools
            status = {'global_stats': self._stats.copy(), 'pools': {}}

            for pool_key in self._pools:
                shape, dtype = pool_key
                status['pools'][pool_key] = self.get_pool_status(shape, dtype)

            return status

    def reset(self):
        """Reset the pool, marking all tensors as available."""
        debug_rank("GPUTensorPool: Resetting pool...")

        for pool_key, pool in self._pools.items():
            # Clear and refill the free queue
            pool['free'].clear()
            for tensor in pool['all']:
                pool['free'].append(tensor)
            pool['allocated_count'] = 0

        self._stats['current_in_use'] = 0
        debug_rank("GPUTensorPool: Reset complete")

    def clear(self):
        """Clear the pool and release all GPU memory."""
        debug_rank("GPUTensorPool: Clearing pool...")

        for pool_key, pool in self._pools.items():
            # Clear all references, allowing PyTorch GC to reclaim memory
            pool['free'].clear()
            pool['all'].clear()

        self._pools.clear()
        self._stats['current_in_use'] = 0

        # Trigger GPU cache cleanup
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

        debug_rank("GPUTensorPool: Clear complete")

    def __del__(self):
        """Destructor to ensure resources are released."""
        self.clear()


class OffloadTensorGroup:
    """
    A group of tensors to be offloaded together.
    """

    def __init__(self, name):
        self._name = name
        self._tensors = {}
        self._offload_event = torch.cuda.Event()
        self._reload_event = torch.cuda.Event()
        self.offload = True
        self.total_offload_bytes = 0
        self.total_tensor_count = 0
        # Using memory pool is for the compatibility with cuda graph.
        # Shapes of tensors for expert_fc1 and moe_act are not known in advance,
        # so we do not use CPU pool for them.
        if name == "expert_fc1" or name == "moe_act":
            self.use_cpu_pool = False
        else:
            self.use_cpu_pool = True

    def push_tensor(self, tag, tensor):
        """Push a tensor to the group."""
        self._tensors[tag] = tensor

    def pop_tensor(self, tag):
        """Pop a tensor from the group."""
        return self._tensors.pop(tag)

    def record_offload_event(self, stream):
        """Record the offload event."""
        self._offload_event.record(stream)

    def wait_offload_event(self, stream):
        """Wait for the offload event."""
        stream.wait_event(self._offload_event)

    def record_reload_event(self, stream):
        """Record the reload event."""
        self._reload_event.record(stream)

    def wait_reload_event(self, stream):
        """Wait for the reload event."""
        stream.wait_event(self._reload_event)

    def update_offload_info(self, tensor):
        """Update the offload information."""
        self.total_offload_bytes += tensor.numel() * tensor.element_size()
        self.total_tensor_count += 1


class PipelineOffloadManager:
    """
    Singleton manager for coordinating activation offloading across pipeline stages.
    Manages chunk handlers, synchronizes GPU-CPU transfers,
    and handles virtual pipeline parallelism.
    """

    OFFLOAD_MGR = None

    @classmethod
    def get_instance(cls):
        """Get the singleton instance of PipelineOffloadManager."""
        if cls.OFFLOAD_MGR is None:
            cls.OFFLOAD_MGR = PipelineOffloadManager()
        return cls.OFFLOAD_MGR

    @classmethod
    def reset_instance(cls):
        """Reset the singleton instance of PipelineOffloadManager."""
        cls.OFFLOAD_MGR = None
        cls.OFFLOAD_MGR = PipelineOffloadManager()

    def __init__(self):
        """Initialize the manager with queues and dedicated CUDA streams."""
        # Queue to store chunk handlers for backward pass
        self._queue = deque()
        # Cache chunk handlers for each virtual pipeline stage
        self._stages = None
        # allocate streams and events for synchronization
        self._d2h_stream = torch.cuda.Stream()
        self._h2d_stream = torch.cuda.Stream()
        # Shared CPU tensor pool for all chunks to improve reuse efficiency
        self._cpu_tensor_pool = GPUTensorPool(device="cpu", pin_memory=True)

        # Whether the manager is in warmup phase.
        self._is_warmup = True
        # Cache OffloadChunkHandler objects for each virtual pipeline stage and each forward pass.
        self._cached_chunks_forward = []
        # Cache OffloadChunkHandler objects for each virtual pipeline stage and each backward pass.
        self._cached_chunks_backward = []
        # Index of the current backward chunk in the cached chunks backward.
        self._cached_chunks_index_backward = 0
        # Index of the current forward chunk in the cached chunks forward.
        self._cached_chunks_index_forward = 0

        self.do_offload = True

        # Do not offload the last X groups so that the reloading won't block the computing stream.
        self._offload_margin = 0
        # Sometimes we need to delay the offloading and launch it later.
        # The delayed offload groups are stored in a queue.
        self._delayed_offload_groups = []
        self.reset()

    @property
    def d2h_stream(self):
        """Get the device-to-host (GPU to CPU) transfer stream."""
        return self._d2h_stream

    @property
    def h2d_stream(self):
        """Get the host-to-device (CPU to GPU) transfer stream."""
        return self._h2d_stream

    @property
    def cpu_tensor_pool(self):
        """Get the shared CPU tensor pool."""
        return self._cpu_tensor_pool

    def push_offload_groups(self, group_hook, forced_released_tensors):
        """Push the offload groups to the delayed queue."""
        debug_rank(f"pushing offload groups to the delayed queue")
        self._delayed_offload_groups.append((group_hook, forced_released_tensors))

    def flush_delayed_groups(self):
        """Flush the delayed groups."""
        debug_rank("flushing delayed groups")
        # Flush the delayed groups in reverse order to maintain the order of the groups.
        for group_hook, forced_released_tensors in reversed(self._delayed_offload_groups):
            group_hook(forced_released_tensors)
        self._delayed_offload_groups = []

    def reset(self):
        """Reset manager state for a new training iteration."""
        self._inside_context = False
        self._cur_forward_chunk = None
        self._cur_backward_chunk = None
        # Reset CPU tensor pool to reuse all CPU tensors for next iteration
        if hasattr(self, '_cpu_tensor_pool'):
            self._cpu_tensor_pool.reset()

        # Call post_warmup_callback after warmup to collect the offload information.
        if self._is_warmup and len(self._cached_chunks_forward) > 0:
            self.post_warmup_callback()
        self._cached_chunks_index_backward = 0
        self._cached_chunks_index_forward = 0

        for chunk in self._cached_chunks_forward:
            chunk.reset()
        self._delayed_offload_groups = []

    @property
    def offload_summary_bytes(self) -> Dict[str, int]:
        """Offload summary bytes per group collected after warmup."""
        return self._offload_summary_bytes

    @property
    def offload_summary_total_bytes(self) -> int:
        """Total offloaded bytes collected after warmup."""
        return self._offload_summary_total_bytes

    def flush(self):
        """Flush all staged chunks to the backward queue in reverse order."""
        # Ensure all virtual pipeline stages have the same number of chunks
        if len(self._stages[0]) == len(self._stages[-1]):
            lens = [len(e) for e in self._stages]
            assert min(lens) == max(lens), "All stages must have same chunk count"
            # Clear the last stage and push all chunks in reverse order for backward
            self._stages[-1] = []
            for chunks in reversed(self._stages):
                for chunk in chunks:
                    self.push(chunk)
            # Clear all stages after flushing
            for i in range(self._vpp):
                self._stages[i] = []

    def disable_offload(self):
        """Disable the offload."""
        debug_rank("disable_offload")
        self.do_offload = False
        for chunk in self._cached_chunks_forward:
            chunk.do_offload = False

    def enable_offload(self):
        """Enable the offload."""
        debug_rank("enable_offload")
        self.do_offload = True
        for chunk in self._cached_chunks_forward:
            chunk.do_offload = True

    def post_warmup_callback(self):
        """Callback after warmup."""
        # pylint: disable=bad-builtin
        debug_rank("post_warmup_callback")
        self._is_warmup = False
        assert len(self._cached_chunks_forward) == len(
            self._cached_chunks_backward
        ), "Cached chunks forward and backward must have the same length"
        for chunk in self._cached_chunks_forward:
            chunk.is_warmup = False
            assert (
                chunk in self._cached_chunks_backward
            ), "Chunk not found in cached chunks backward"
            # Update the offload margin to the maximum number of deduplicated groups
            self._offload_margin = max(self._offload_margin, chunk.get_max_deduplicated_groups())
            debug_rank(f"offload margin {self._offload_margin}")
        # Find the last group with the same name in the cached chunks backward
        last_group_with_same_name = {}
        for chunk_idx, chunk in enumerate(reversed(self._cached_chunks_backward)):
            for group in chunk.offload_groups:
                last_group_with_same_name[group._name] = group
        # Mark the last group with the same name as not offloadable to make sure
        # the reloading won't block the main stream.
        for name, group in last_group_with_same_name.items():
            if self._offload_margin > 0:
                group.offload = False
                self._offload_margin -= 1
                debug_rank(f"setting offload to false for group {name} at chunk index {chunk_idx}")
            else:
                break
        debug_rank(f"offload margin {self._offload_margin}")
        assert self._offload_margin == 0, "Offload margin is not 0"
        # Dump the offload information
        total_tensor_count = {}
        total_offload_bytes = {}
        for chunk in self._cached_chunks_forward:
            for group in chunk.offload_groups:
                if group.offload:
                    if group._name not in total_tensor_count:
                        total_tensor_count[group._name] = 0
                    total_tensor_count[group._name] += group.total_tensor_count
                    if group._name not in total_offload_bytes:
                        total_offload_bytes[group._name] = 0
                    total_offload_bytes[group._name] += group.total_offload_bytes
            # Stop statistics at the first backward chunk after which 1F1B is running,
            # where the memory cost will not increase anymore.
            if chunk is self._cached_chunks_backward[0]:
                break
        # Cache summary for downstream consumers (e.g., unit tests).
        self._offload_summary_bytes = dict(total_offload_bytes)
        self._offload_summary_total_bytes = int(sum(total_offload_bytes.values()))
        print_offload_summary_table(total_offload_bytes)

    def push(self, handler):
        """Add a chunk handler to the backward queue."""
        debug_rank(f"pushing handler {handler}")
        self._queue.append(handler)
        if self._is_warmup:
            self._cached_chunks_backward.append(handler)

    def pop_backward_chunk(self, name=None):
        """Get the next non-empty backward chunk containing the group with the given name."""
        self._cur_backward_chunk = None
        debug_rank(f"popping backward chunk {self._cached_chunks_index_backward}")
        debug_rank(f"cached chunks backward {self._cached_chunks_backward}")
        for idx, handler in enumerate(
            self._cached_chunks_backward[self._cached_chunks_index_backward :]
        ):
            self._cached_chunks_index_backward += 1
            if not handler.is_empty_chunk(name):
                self._cur_backward_chunk = (
                    handler  # set the first non-empty chunk as the current backward chunk
                )
                debug_rank(f"handler {handler} at index {idx} is not empty")
                break
        assert self._cur_backward_chunk is not None, "No non-empty chunk found"

    def front_backward_chunk(self, name=None):
        """Get the first non-empty backward chunk containing the group with the given name."""
        for idx, handler in enumerate(
            self._cached_chunks_backward[self._cached_chunks_index_backward :]
        ):
            if not handler.is_empty_chunk(name):
                debug_rank(f"front handler {handler} at index {idx}")
                return handler
        return None

    def init_model_chunk_offload_handler(
        self, vp_size, vp_stage, min_offloaded_tensor_size=1024 * 1024
    ):
        """
        Initialize a chunk offload handler for a model chunk (microbatch).

        Args:
            vp_size: Virtual pipeline size
            vp_stage: Virtual pipeline stage index (None means stage 0)
            min_offloaded_tensor_size: Minimum tensor size (in elements) to offload
        """
        if not self._is_warmup:
            return

        vp_size = 1 if vp_size is None else vp_size
        if self._stages is None:
            self._vpp = vp_size
            self._stages = [[] for _ in range(vp_size)]

        if vp_stage is None:
            cur_vpp_rank = 0
        else:
            cur_vpp_rank = vp_stage

        # Flush staged chunks when reaching the last virtual pipeline stage
        if cur_vpp_rank == self._vpp - 1:
            self.flush()

        # Use shared CPU tensor pool for better reuse across chunks
        cur_chunk = ChunkOffloadHandler(min_offloaded_tensor_size, self._cpu_tensor_pool)
        debug_rank(f"init_model_chunk_offload_handler {cur_chunk}")
        self._stages[cur_vpp_rank].append(cur_chunk)
        # For the last stage, push immediately and flush
        if cur_vpp_rank == self._vpp - 1:
            self.push(cur_chunk)
            self.flush()
        self._cur_forward_chunk = cur_chunk
        cur_chunk.vpp_rank = cur_vpp_rank
        self._cached_chunks_forward.append(cur_chunk)

    def pop_forward_chunk(self, name=None):
        """Get the next forward pass chunk handler."""
        debug_rank(f"pop_forward_chunk {self._cur_forward_chunk}")
        if not self.do_offload:
            return self._cur_forward_chunk
        while not self._is_warmup and (
            self._cur_forward_chunk is None or self._cur_forward_chunk.finish_all_groups(name)
        ):
            if self._cached_chunks_index_forward >= len(self._cached_chunks_forward):
                self._cur_forward_chunk = None
                break
            self._cur_forward_chunk = self._cached_chunks_forward[self._cached_chunks_index_forward]
            self._cached_chunks_index_forward += 1
            debug_rank(f"new cur_forward_chunk {self._cur_forward_chunk}")
        return self._cur_forward_chunk

    def cur_forward_chunk(self):
        """Get the current forward pass chunk handler."""
        return self._cur_forward_chunk

    def cur_backward_chunk(self):
        """Get the current backward pass chunk handler."""
        return self._cur_backward_chunk

    def mark_not_offloadable(self, tensor: torch.Tensor):
        """Mark the current forward chunk as not offloadable."""
        if tensor is not None:
            tensor.offloading_activation = False

    def __enter__(self):
        """Enter context manager to enable activation offloading hooks."""
        debug_rank("----__enter__")
        if self._cur_forward_chunk is None or not self.cur_forward_chunk().do_offload:
            return
        from megatron.core.extensions.transformer_engine import cpu_offload

        if cpu_offload is not None:
            cpu_offload.CPUOffloadEnabled = True
        else:
            raise RuntimeError("TE CPU offload is not available")
        self.inside_context = True

        torch._C._autograd._push_saved_tensors_default_hooks(
            self.on_save_for_backward, self.on_get_saved_tensor
        )

    def __exit__(self, *args: Any):
        """Exit context manager and restore original tensor saving behavior."""
        debug_rank("----__exit__")
        if self._cur_forward_chunk is None or not self.cur_forward_chunk().do_offload:
            return
        from megatron.core.extensions.transformer_engine import cpu_offload

        if cpu_offload is not None:
            cpu_offload.CPUOffloadEnabled = False
        else:
            raise RuntimeError("TE CPU offload is not available")
        self.inside_context = False
        torch._C._autograd._pop_saved_tensors_default_hooks()

    def on_save_for_backward(self, tensor: torch.Tensor) -> Any:
        """
        Hook called when autograd saves a tensor for backward pass.
        Returns a tag to identify the tensor later.
        """
        debug_rank(f"------on_save_for_backward {tensor.shape}")
        assert self.inside_context, "Must be inside offload context"
        return self.cur_forward_chunk().tensor_push(tensor)

    def on_get_saved_tensor(self, saved_state: Any) -> torch.Tensor:
        """
        Hook called when autograd retrieves a saved tensor during backward pass.
        Returns the actual tensor (potentially reloading from CPU).
        """
        debug_rank(f"----on_get_saved_tensor {saved_state}")
        return self.cur_backward_chunk().tensor_pop(saved_state)


class ChunkOffloadHandler:
    """
    Handles activation offloading and reloading for a single pipeline chunk (microbatch).
    Manages tensor groups, coordinates asynchronous GPU-CPU transfers, and handles synchronization.
    """

    def offload(self, src_tensor, pin_memory=True, use_cpu_pool=True):
        """Offload."""
        debug_rank("--------offload")

        if not src_tensor.is_contiguous():
            src_tensor = src_tensor.contiguous()

        if use_cpu_pool:
            cpu_backup = self.cpu_tensor_pool.allocate(src_tensor.shape, dtype=src_tensor.dtype)
        else:
            cpu_backup = torch.empty(
                src_tensor.shape, dtype=src_tensor.dtype, device="cpu", pin_memory=pin_memory
            )

        cpu_backup.copy_(src_tensor, non_blocking=pin_memory)
        state = (src_tensor.device, cpu_backup, use_cpu_pool)
        return state

    def reload(self, state, non_blocking=None):
        """Reload."""
        debug_rank("------reload")
        dev, cpu_backup, use_cpu_pool = state
        if non_blocking is None:
            non_blocking = cpu_backup.is_pinned()
        gpu_tensor = torch.empty(
            cpu_backup.size(), dtype=cpu_backup.dtype, layout=cpu_backup.layout, device=dev
        )
        gpu_tensor.copy_(cpu_backup, non_blocking=non_blocking)
        if use_cpu_pool:
            self.cpu_tensor_pool.free(cpu_backup)
        return gpu_tensor

    def __init__(self, min_offloaded_tensor_size, cpu_tensor_pool):
        self.do_offload = True

        # Group management for batching offload/reload operations
        self.offload_groups = []
        self._offloaded_group_index = 0
        # Groups to be offloaded.
        self._groups_to_offload = []
        # Groups to be reloaded.
        self._groups_to_reload = []
        # Tensor count for the current group.
        self._tensor_count_current_group = 0
        # Maximum number of groups to offload or reload.
        self._max_group_size = 0
        # Groups being reloaded.
        self._reloading_group = []
        # Counter for special torch tensor types (FakeTensor, FunctionalTensor)
        self.torch_tensor_count = 0
        self.d2h_stream = PipelineOffloadManager.get_instance().d2h_stream
        self.h2d_stream = PipelineOffloadManager.get_instance().h2d_stream
        self.min_offloaded_tensor_size = min_offloaded_tensor_size
        self.cpu_tensor_pool = cpu_tensor_pool
        self.is_warmup = True

    def reset(self):
        """Reset the chunk offload handler."""
        self._offloaded_group_index = 0
        self._groups_to_offload = []
        self._groups_to_reload = []
        self._tensor_count_current_group = 0
        self._reloading_group = []

    def find_group_with_name(self, name: str, start_index: int = 0):
        """Find the group with the given name starting from the given index."""
        return next(
            (group for group in self.offload_groups[start_index:] if group._name == name), None
        )

    def is_empty_chunk(self, name=None):
        """Check if this chunk has no tensors to manage."""
        debug_rank(f"------is_empty_chunk {self._max_group_size}")
        if name is not None:
            return self.find_group_with_name(name) is None
        return self._max_group_size == 0

    def finish_all_groups(self, name=None) -> bool:
        """Finish all groups."""
        debug_rank(
            f"------finish_all_groups {self} {self._max_group_size} {self._offloaded_group_index}"
        )
        # TODO: check if this is correct
        # Mark it as finished when there are no groups to offload or reload
        if (
            len(self._groups_to_reload) == 0
            and len(self._groups_to_offload) == 0
            and self._offloaded_group_index > 0
        ):
            return True
        assert name is not None, "Name is required"
        return self.find_group_with_name(name, self._offloaded_group_index) is None

    def find_next_group(self, name=None):
        """Find the next group with the given name."""
        assert name is not None, "Name is required"
        return self.find_group_with_name(name, self._offloaded_group_index)

    def tensor_push(self, tensor):
        """Push tensor to the offload handler."""
        torch_stray_tensor = isinstance(
            tensor,
            (
                torch._subclasses.fake_tensor.FakeTensor,
                torch._subclasses.functional_tensor.FunctionalTensor,
            ),
        )
        assert not torch_stray_tensor, "Stray tensor should not be offloaded"

        # Assign unique tag based on group index and position within group
        tensor_tag = (self._offloaded_group_index, self._tensor_count_current_group)
        self._tensor_count_current_group += 1
        self.offload_groups[self._offloaded_group_index - 1].push_tensor(tensor_tag, tensor)
        debug_rank(f"--------tensor_push {tensor_tag}")
        return tensor_tag

    def tensor_pop(self, tensor_tag):
        """Pop tensor from the offload handler."""
        debug_rank(f"--------tensor_pop {tensor_tag}")
        group_id, idx = tensor_tag
        tensor = self.offload_groups[group_id - 1].pop_tensor(tensor_tag)
        # If tensor is offloaded (stored as tuple), reload it
        if isinstance(tensor, tuple):
            tensor = self.reload(tensor)
        debug_rank(f"--------tensor_pop {tensor.shape}")
        return tensor

    def tensor_need_offloading_checker(self, tensor):
        """Check if the tensor needs to be offloaded."""
        debug_rank(
            f"tensor_need_offloading_checker {getattr(tensor, 'offloading_activation', None)}"
        )
        if tensor.numel() < self.min_offloaded_tensor_size:
            return False
        # Respect tensor's offload preference if specified
        if hasattr(tensor, "offloading_activation") and not tensor.offloading_activation:
            return False
        return True

    def bulk_offload_group(self):
        """offload a group of tensors recorded in tensor_push()."""
        debug_rank("------bulk_offload_group")
        group_to_offload = self._groups_to_offload[-1]
        torch.cuda.nvtx.range_push("activation offloading " + group_to_offload._name)
        with torch.cuda.stream(self.d2h_stream):
            for tensor_tag, tensor_on_device in group_to_offload._tensors.items():
                if self.tensor_need_offloading_checker(tensor_on_device):
                    state = self.offload(
                        tensor_on_device, use_cpu_pool=group_to_offload.use_cpu_pool
                    )
                    if self.is_warmup:
                        group_to_offload.update_offload_info(tensor_on_device)
                    tensor_on_device.record_stream(self.d2h_stream)
                    group_to_offload.push_tensor(tensor_tag, state)
            group_to_offload.record_offload_event(self.d2h_stream)
        self._groups_to_offload.pop()
        torch.cuda.nvtx.range_pop()

    def get_max_deduplicated_groups(self):
        """Get the maximum number of deduplicated groups."""
        count_modules = []
        for group in self.offload_groups:
            if group._name not in count_modules:
                count_modules.append(group._name)
        return len(count_modules)

    def bulk_reload_group(self):
        """Bulk reload group."""
        debug_rank("----bulk_reload_group")
        group_to_reload = self._groups_to_reload[-1]
        torch.cuda.nvtx.range_push("activation reloading " + group_to_reload._name)
        with torch.cuda.stream(self.h2d_stream):
            # Wait for offload to complete before reloading
            if not is_graph_capturing():
                group_to_reload.wait_offload_event(self.h2d_stream)
            for tensor_tag, state in group_to_reload._tensors.items():
                # Only reload if tensor was offloaded (stored as tuple)
                if isinstance(state, tuple):
                    recovered_tensor = self.reload(state)
                    debug_rank(f"----recovered_tensor {recovered_tensor.shape}")
                    group_to_reload.push_tensor(tensor_tag, recovered_tensor)
            group_to_reload.record_reload_event(self.h2d_stream)
        self._groups_to_reload.pop()
        # Add the group to the reloading group to wait for the reload event.
        self._reloading_group.append(group_to_reload)
        torch.cuda.nvtx.range_pop()

    def pre_reload_last_layer(self):
        """Pre-reload the last layer of this chunk to hide reload latency."""
        debug_rank("pre_reload_last_layer")
        debug_rank(f"len(self._groups_to_reload) {len(self._groups_to_reload)}")
        if len(self._groups_to_reload) > 0:
            # Reload the last group (last layer) early
            self.bulk_reload_group()

    def should_bulk_offload(self):
        """Determine if the current group should be offloaded."""
        assert len(self._groups_to_offload) > 0, "No groups to offload"
        group = self._groups_to_offload[-1]
        debug_rank(f"should_bulk_offload {self.is_warmup} {group.offload}")
        # Don't offload if the chunk is not in warmup stage
        if self.is_warmup:
            return True
        # Don't offload if the group is marked as not offloadable
        if not group.offload:
            return False

        # Check if next backward chunk is this chunk (for last pipeline stage)
        next_backward_chunk = PipelineOffloadManager.get_instance().front_backward_chunk(
            group._name
        )
        if next_backward_chunk is not None and next_backward_chunk is self:
            # Don't offload the last group with the same name if it's about to be used immediately
            if self.find_next_group(group._name) is None:
                debug_rank(f"next group {group._name} is not found")
                return False

        return True

    def bulk_offload(self, forced_released_tensors):
        """Offload a group of tensors and optionally release their GPU memory."""
        debug_rank("----bulk_offload")
        if self.should_bulk_offload():
            self._groups_to_reload.append(self._groups_to_offload[-1])
            self.bulk_offload_group()
            # Manually release tensors not auto-freed by torch GC
            if len(forced_released_tensors) > 0:
                cur_stream = torch.cuda.current_stream()
                for release_tensor in forced_released_tensors:
                    if self.tensor_need_offloading_checker(release_tensor):
                        # Ensure tensor is not in use before freeing
                        release_tensor.record_stream(cur_stream)
                        release_tensor.untyped_storage().resize_(0)

    def on_group_commit_forward(self, forced_released_tensors):
        """Called at the end of a layer group's forward pass to trigger offloading."""
        if not self.do_offload:
            return
        debug_rank("--on_group_commit_forward")
        # Wait for compute to finish before starting offload
        self.d2h_stream.wait_stream(torch.cuda.current_stream())
        self.bulk_offload(forced_released_tensors)

    def bulk_reload(self):
        """Reload the next group of tensors from CPU to GPU."""
        debug_rank("--bulk_reload")
        if len(self._groups_to_reload) > 0:
            # Reload the next layer group
            self.bulk_reload_group()
        else:
            # Pre-load the last layer of the next backward chunk to hide latency
            next_backward_chunk = PipelineOffloadManager.get_instance().front_backward_chunk()
            # Don't pre-reload the last layer if the next backward chunk hasn't finished fprop yet.
            if (
                next_backward_chunk is not None
                and next_backward_chunk._offloaded_group_index
                == next_backward_chunk._max_group_size
            ):
                next_backward_chunk.pre_reload_last_layer()

    def on_group_commit_backward(self, name):
        """
        Called at the end of a layer group's backward pass.
        Ensures correct chunk is active and synchronizes reloads.
        """
        if not self.do_offload:
            return
        debug_rank("--on_group_commit_backward")
        cur_backward_chunk = PipelineOffloadManager.get_instance().cur_backward_chunk()
        # Switch to this chunk if it's not already current
        if cur_backward_chunk is not self:
            PipelineOffloadManager.get_instance().pop_backward_chunk(name)
        cur_backward_chunk = PipelineOffloadManager.get_instance().cur_backward_chunk()
        assert cur_backward_chunk is self, f"Chunk mismatch {cur_backward_chunk} {self}"
        # Wait for reload to complete before using tensors
        if not is_graph_capturing() and len(self._reloading_group) > 0:
            for reloading_group in self._reloading_group:
                if reloading_group._name == name:
                    reloading_group.wait_reload_event(torch.cuda.current_stream())
                    self._reloading_group.remove(reloading_group)
                    break

    def on_group_start_forward(self, name):
        """
        Called at the start of a layer group's forward pass.
        Increments group index and prepares for offloading.
        """
        if not self.do_offload:
            return
        debug_rank(f"--on_group_start_forward {name}")
        self._offloaded_group_index = self._offloaded_group_index + 1
        if self.is_warmup:
            self.offload_groups.append(OffloadTensorGroup(name))
            self._max_group_size = max(self._max_group_size, self._offloaded_group_index)
            debug_rank(f"max group size {self._max_group_size}")
        else:
            for group in self.offload_groups[self._offloaded_group_index - 1 :]:
                if group._name == name:
                    break
                self._offloaded_group_index = self._offloaded_group_index + 1
        self._tensor_count_current_group = 0
        self._groups_to_offload.append(self.offload_groups[self._offloaded_group_index - 1])
        debug_rank(f"groups to offload {self._groups_to_offload}")

    def on_group_start_backward(self):
        """
        Called at the start of a layer group's backward pass.
        Triggers reloading of tensors from CPU.
        """
        if not self.do_offload:
            return
        debug_rank(f"--on_group_start_backward {self}")
        # Wait for compute to finish before starting reload
        self.h2d_stream.wait_stream(torch.cuda.current_stream())
        self.bulk_reload()


def fine_grained_offloading_disable_offload():
    """Disable the offload."""
    debug_rank("fine_grained_offloading_disable_offload")
    PipelineOffloadManager.get_instance().disable_offload()


def fine_grained_offloading_enable_offload():
    """Enable the offload."""
    debug_rank("fine_grained_offloading_enable_offload")
    PipelineOffloadManager.get_instance().enable_offload()


class FineGrainedOffloadingGroupCommitFunction(torch.autograd.Function):
    """
    Identity operation that marks the end of a layer group for offload synchronization.
    Triggers offload during forward and synchronizes reload during backward.
    """

    @staticmethod
    def forward(ctx, tensor, cur_forward_chunk, name, forced_released_tensors, delay_offload):
        # pylint: disable=missing-function-docstring
        debug_rank("FineGrainedOffloadingGroupCommitFunction forward")

        if delay_offload:
            PipelineOffloadManager.get_instance().push_offload_groups(
                cur_forward_chunk.on_group_commit_forward, forced_released_tensors
            )
        else:
            cur_forward_chunk.on_group_commit_forward(forced_released_tensors)
        ctx.cpu_offload_handler = cur_forward_chunk
        ctx.name = name
        return tensor

    @staticmethod
    def backward(ctx, *grad_output):
        # pylint: disable=missing-function-docstring
        debug_rank("FineGrainedOffloadingGroupCommitFunction backward")

        cpu_offload_handler = ctx.cpu_offload_handler
        cpu_offload_handler.on_group_commit_backward(ctx.name)
        return grad_output + (None, None, None, None)


def fine_grained_offloading_group_commit(
    tensor, name, forced_released_tensors=None, delay_offload=False
):
    """
    Specify the tensors to be released after offloading.
    forced_released_tensors is a list of tensors to be released after offloading.
    The tensors will be untyped_storage().resize_(0) after offloading.
    Note: specify the tensors only when they are not automatically released by torch gc.
    """
    # Be permissive: callers may pass a tuple/list of outputs (e.g., (q, k, v)).
    # We only need to insert a single identity op into the autograd graph; applying
    # it to the first tensor output is sufficient and keeps callers' code minimal.
    if forced_released_tensors is None:
        forced_released_tensors = []
    if isinstance(tensor, tuple):
        if len(tensor) == 0:
            return tensor
        committed0 = fine_grained_offloading_group_commit(
            tensor[0],
            name=name,
            forced_released_tensors=forced_released_tensors,
            delay_offload=delay_offload,
        )
        return (committed0,) + tensor[1:]
    if isinstance(tensor, list):
        if len(tensor) == 0:
            return tensor
        committed0 = fine_grained_offloading_group_commit(
            tensor[0],
            name=name,
            forced_released_tensors=forced_released_tensors,
            delay_offload=delay_offload,
        )
        return [committed0] + tensor[1:]

    cur_forward_chunk = PipelineOffloadManager.get_instance().cur_forward_chunk()
    if cur_forward_chunk is None:
        return tensor
    return FineGrainedOffloadingGroupCommitFunction.apply(
        tensor, cur_forward_chunk, name, forced_released_tensors, delay_offload
    )


def fine_grained_offloading_group_flush_delayed_groups():
    """Flush the delayed groups."""
    debug_rank("fine_grained_offloading_group_flush_delayed_groups")
    PipelineOffloadManager.get_instance().flush_delayed_groups()


class FineGrainedOffloadingGroupStartFunction(torch.autograd.Function):
    """
    Identity operation that marks the start of a layer group for offload/reload.
    Prepares for offload during forward and triggers reload during backward.
    """

    @staticmethod
    def forward(ctx, tensor, cpu_offload_handler, name):
        # pylint: disable=missing-function-docstring
        ctx.cpu_offload_handler = cpu_offload_handler
        debug_rank("FineGrainedOffloadingGroupStartFunction forward")

        cpu_offload_handler.on_group_start_forward(name)
        # return the identical tensor
        return tensor

    @staticmethod
    def backward(ctx, grad_output):
        # pylint: disable=missing-function-docstring
        debug_rank("FineGrainedOffloadingGroupStartFunction backward")
        cpu_offload_handler = ctx.cpu_offload_handler
        cpu_offload_handler.on_group_start_backward()
        return grad_output, None, None, None


def fine_grained_offloading_group_start(tensor, name=None):
    """Mark the start of a layer group and prepare for offload/reload."""
    cur_forward_chunk = PipelineOffloadManager.get_instance().pop_forward_chunk(name=name)
    if cur_forward_chunk is None:
        return tensor
    return FineGrainedOffloadingGroupStartFunction.apply(tensor, cur_forward_chunk, name)


def fine_grained_offloading_forward_record(event: torch.cuda.Event) -> None:
    """Record the forward event for cuda graph capture."""
    d2h_stream = PipelineOffloadManager.get_instance().d2h_stream
    torch.cuda.current_stream().record_event(event)
    torch.cuda.current_stream().wait_stream(d2h_stream)


class FineGrainedOffloadingBackwardRecordFunction(torch.autograd.Function):
    """
    Identity operation that marks the end of a layer group for offload synchronization.
    Triggers offload during forward and synchronizes reload during backward.
    """

    @staticmethod
    def forward(ctx, tensor, event: torch.cuda.Event) -> torch.Tensor:
        """Forward pass for cuda graph capture."""
        ctx.event = event
        return tensor

    @staticmethod
    def backward(ctx, grad_output):
        """Record the backward event and wait for the h2d stream on cuda graph stream."""
        h2d_stream = PipelineOffloadManager.get_instance().h2d_stream
        torch.cuda.current_stream().record_event(ctx.event)
        torch.cuda.current_stream().wait_stream(h2d_stream)
        return grad_output, None


def fine_grained_offloading_backward_record(tensor, event: torch.cuda.Event) -> torch.Tensor:
    """Record the backward event for cuda graph capture."""
    return FineGrainedOffloadingBackwardRecordFunction.apply(tensor, event)


class FineGrainedActivationOffloadingInterface:
    """Interface for fine-grained activation offloading."""

    def __init__(self, offload: bool, tensor: torch.Tensor, name: str):
        self.offload = offload
        self.tensor = tensor
        self.name = name

    def __enter__(self):
        """Enter context manager to enable activation offloading hooks."""
        if self.offload:
            self.tensor = fine_grained_offloading_group_start(self.tensor, self.name)
            PipelineOffloadManager.get_instance().__enter__()
        return self.tensor

    def __exit__(self, *args: Any):
        """Exit context manager to disable activation offloading hooks."""
        if self.offload:
            PipelineOffloadManager.get_instance().__exit__()

    @staticmethod
    def init_chunk_handler(vp_size, vp_stage, min_offloaded_tensor_size):
        """Initialize the chunk handler, called at the start of a microbatch forward pass."""
        PipelineOffloadManager.get_instance().init_model_chunk_offload_handler(
            vp_size, vp_stage, min_offloaded_tensor_size
        )

    @staticmethod
    def get_context(flag):
        """Get the fine-grained offload context"""
        return PipelineOffloadManager.get_instance() if flag else nullcontext()

    @staticmethod
    def group_commit(tensor, name, forced_released_tensors=None, delay_offload=False):
        """Group commit the tensors."""
        return fine_grained_offloading_group_commit(
            tensor, name, forced_released_tensors, delay_offload
        )

    @staticmethod
    def mark_not_offloadable(tensor: torch.Tensor):
        """Mark the tensor as not offloadable."""
        PipelineOffloadManager.get_instance().mark_not_offloadable(tensor)

    @staticmethod
    def forward_record(event: torch.cuda.Event) -> None:
        """Record the forward event for cuda graph capture."""
        d2h_stream = PipelineOffloadManager.get_instance().d2h_stream
        torch.cuda.current_stream().record_event(event)
        torch.cuda.current_stream().wait_stream(d2h_stream)

    @staticmethod
    def reset():
        """Reset the chunk handler."""
        PipelineOffloadManager.get_instance().reset()

    @staticmethod
    def reset_instance():
        """Reset the singleton instance."""
        PipelineOffloadManager.reset_instance()


================================================
FILE: megatron/core/pipeline_parallel/hybrid_cp_schedule.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.

from collections import deque
from functools import lru_cache
from math import ceil, log2
from typing import Callable, List, Optional, Tuple

import torch

from megatron.core import parallel_state
from megatron.core.rerun_state_machine import RerunDataIterator


class BalancedCPScheduler:
    """
    This class provides the functionality to form groups of sub-samples
    such that all DPxCP ranks have a roughly balanced workload in the group.
    """

    def __init__(self, max_seq_len_per_rank: int, dp_cp_group: torch.distributed.ProcessGroup):
        self.max_seq_len_per_rank = max_seq_len_per_rank
        self.num_subsamples = 0
        self.num_subsamples_processed = 0
        self.free_resources = []
        self.total_hdp_gpus = dp_cp_group.size()

    @lru_cache(maxsize=128)
    def get_total_workload(self, seq_length: int, cp_size: Optional[int] = None):
        """
        seq_length: sequence length of a sub-sample
        cp_size: total number of CP ranks working on this sub-sample

        Note:
        This function is used to estimate the relative workload intensity
        of a sub-sample. This is not meant to be an accurate flops calculator.

        Returns: workload of a sub-sample
        """
        if cp_size is None:
            cp_size = self.gpus_needed(seq_length)
        return (seq_length * seq_length) / cp_size

    @lru_cache(maxsize=128)
    def gpus_needed(self, seq_len: int) -> int:
        """
        Calculates the number of GPUs needed for a given sequence length
        and max sequence length per CP rank.
        This is used to determine the CP size of a sub-sample.

        The number is rounded up to the next power of 2 to match the available
        hybrid context parallel process group sizes.
        """
        return max(1, 2 ** ceil(log2((seq_len / self.max_seq_len_per_rank))))

    def make_buckets_equal(
        self,
        sample_seqlens: List[Tuple[int, int]],  # List of (sample_id, sequence_length) tuples
        compute_estimator: Callable[[int], float],
    ) -> List[deque]:
        """
        Makes as many buckets as unique CP sizes needed.
        This keeps sample IDs tethered to their sequence lengths throughout the bucketing process.
        """
        # Extract just the sequence lengths for determining k
        seqlens = [seq_len for _, seq_len in sample_seqlens]

        # Determine k based on unique GPU categories needed
        k = len({self.gpus_needed(L) for L in seqlens})

        # Create a work target for each bucket
        # This is the total work divided by the number of buckets
        work = []
        for _, s in sample_seqlens:
            cp_size = self.gpus_needed(s)
            work.append(compute_estimator(s, cp_size))
        total_work = sum(work)
        target = total_work / k
        buckets, cur, cur_work = [], [], 0.0
        remaining_work = total_work
        remaining_k = k

        for i, (sample_id, seq_len) in enumerate(sample_seqlens):
            work = compute_estimator(seq_len)
            projected = cur_work + work

            # Check if we should close this bucket
            if cur and (
                projected > target * 1.1  # Too much work
                or len(sample_seqlens) - i <= remaining_k - len(buckets)
            ):  # Need to save sequences for remaining buckets
                buckets.append(deque(cur))
                cur, cur_work = [], 0.0
                remaining_work -= sum(compute_estimator(seq_len) for _, seq_len in cur)
                remaining_k -= 1

            cur.append((sample_id, seq_len))
            cur_work += work

        if cur:
            buckets.append(deque(cur))

        return buckets

    def next_hdp_group(
        self,
        sample_seqlens: List[Tuple[int, int]],  # List of (sample_id, sequence_length) tuples
        compute_estimator: Callable[[int], float],
        total_gpus: int,
        delta: float = 0.05,  # balance slack (e.g. 5 %)
        strategy: str = "dp",  # "dp" or "pp"
        eps_bucket: float = 0.10,  # ε target for bucket balance
    ) -> Tuple[List[List[int]], List[Tuple[int, int]], List[float], List[List[int]]]:
        """
        Given a list of (sample_id, sequence_length) tuples, this function aims to assign
        sequences in a group such that all GPUs in the DPxCP group have a roughly balanced
        workload. Once each group is roughly balanced, we exit and return the
        group and the leftover sequences.

        The function performs the following passes in order to form a balanced microbatch:
        1. We create buckets of sequences that are roughly balanced.
        We try to create as many buckets as possible CP sizes.
        2. Given a bucket has sequences available, we assign the sample
            a. To a new set of GPUs if there are enough free GPUs.
            b. To an existing set of GPUs with the lowest load.
        3. We check if the group is balanced whenever we need to move onto a new CP size
        in the same set of GPUs.
        4. We trim the group if removing the last added sequence helps improve balance.
        5. If we run out of sequences to assign and there are empty GPUs,
        we redistribute work to empty GPUs by recursively increasing the CP size of a
        sample until no empty GPUs are left.

        Returns (micro_batches, leftover_sample_seqlens, exec_times, sample_ids_per_gpu).
        """
        if not sample_seqlens:
            return (
                [[] for _ in range(total_gpus)],
                [],
                [0.0 for _ in range(total_gpus)],
                [[] for _ in range(total_gpus)],
            )

        # Get buckets of sequences with balanced work
        buckets = self.make_buckets_equal(sample_seqlens, compute_estimator)

        # Initialize tracking structures
        micro_batches = [[] for _ in range(total_gpus)]
        exec_times = [0.0 for _ in range(total_gpus)]
        sample_ids_per_gpu = [[] for _ in range(total_gpus)]

        gpu_group_id = [None] * total_gpus
        group_members = {}
        group_size = {}
        next_gid = 0

        pp_cursor = 0
        prev_needed = None
        check_balance = False

        while buckets:
            # ---- Step 1 – pick the next sequence we COULD place ------------------
            sample_seq_tuple = bucket_idx = None
            needed = None

            scan_order = (
                range(len(buckets))
                if strategy == "dp"
                else [(pp_cursor + i) % len(buckets) for i in range(len(buckets))]
            )

            for idx in scan_order:
                if not buckets[idx]:
                    continue
                cand_tuple = buckets[idx][0]  # This is now (sample_id, seq_len)
                cand_seq_len = cand_tuple[1]
                needed = self.gpus_needed(cand_seq_len)

                # (a) Do we have an *existing* group of size `needed`?
                candidate_gids = [gid for gid, sz in group_size.items() if sz == needed]

                # (b) Or enough completely free GPUs to start a new group?
                free_ranks = [r for r, gid in enumerate(gpu_group_id) if gid is None]
                if candidate_gids or len(free_ranks) >= needed:
                    sample_seq_tuple, bucket_idx = cand_tuple, idx
                    break

            # No place to put any remaining sequence – finish this micro‑batch
            if sample_seq_tuple is None:
                break

            # TODO[pmannan]: PP not yet supported. Add PP scheduling.
            if strategy == "pp":
                pp_cursor = (bucket_idx + 1) % len(buckets)

            sample_id, seq_len = sample_seq_tuple
            needed = self.gpus_needed(seq_len)
            if prev_needed is None:
                prev_needed = needed

            # (a)  Existing groups of exactly this size
            candidate_gids = [gid for gid, sz in group_size.items() if sz == needed]
            if candidate_gids:
                best_gid, best_load = min(
                    (
                        (gid, max(exec_times[r] for r in group_members[gid]))
                        for gid in candidate_gids
                    ),
                    key=lambda t: t[1],
                )
            else:
                best_gid, best_load = None, float("inf")

            # (b)  Hypothetical **new** group from completely free GPUs
            free_ranks = [r for r, gid in enumerate(gpu_group_id) if gid is None]
            if len(free_ranks) >= needed:
                free_sorted = sorted(free_ranks, key=lambda r: exec_times[r])
                new_members = free_sorted[:needed]
                new_load = exec_times[new_members[-1]]

                if new_load < best_load:
                    best_gid = None
                    chosen_members = new_members
                else:
                    chosen_members = group_members[best_gid]
            else:
                chosen_members = group_members[best_gid]

            # ---- Step 2 – if we decided to create a fresh group ----------------
            if best_gid is None:
                best_gid = next_gid
                next_gid += 1
                group_members[best_gid] = chosen_members
                group_size[best_gid] = needed
                for r in chosen_members:
                    gpu_group_id[r] = best_gid

            # ---- Step 3 – assign the sequence to every member of that group ------
            per_gpu_cost = compute_estimator(seq_len)

            for r in chosen_members:
                micro_batches[r].append(seq_len)
                exec_times[r] += per_gpu_cost
                sample_ids_per_gpu[r].append(sample_id)

            # Remove the sequence definitively from its bucket
            buckets[bucket_idx].popleft()

            # ---- Step 4 – tidy, balance‑check, maybe early‑exit ------------------
            while buckets and not buckets[0]:
                buckets.pop(0)
                pp_cursor %= max(1, len(buckets))

            # TODO: Removing this helps reduce the number of groups when we have
            # lots of samples with same CP size.
            # But because we don't exit as soon as we get balanced,
            # even if there is one group available that can take the next sample,
            # we will keep adding samples to the same group.
            # trim_overload() does not help because it only checks if removing the
            # last added sample helps.
            # We cannot check after adding every sample because there will always be imbalance
            # if we don't wait for future scheduling.

            # IMPORTANT: So we need a solution here
            if needed < prev_needed:
                # When we get into a lower CP size in the same group,
                # we can start checking for balance. There is still a gotcha here.
                # Let's say we have a group of 3 GPU 0-2, then we move onto group of 2.
                # We keep assigning group of 2 as we do in descending order but GPU 7/15
                # never sees a microbatch assigned to it
                # until we run out of samples with CP2.
                # This means we are never balanced as min(exec_times) will always be 0.
                # We need a smart way of identifying that we have run out of big samples
                # and if we are having to assign work to a GPU already working,
                # is it because there are empty GPUs?
                # Would assigning work to empty GPUs first by moving onto next CP bucket help?
                # But we need to remember to come back to this CP size bucket and then
                # check for balance. Maybe the scheduling algorithm should look at empty
                # GPUs and find work rather than going sequence by sequence.
                check_balance = True

            if (
                check_balance
                and buckets
                and max(exec_times) - min(exec_times) <= delta * max(exec_times)
            ):
                break

        # Gather leftovers (flatten remaining buckets, preserve order)
        leftovers = []
        for b in buckets:
            for sample_seq_tuple in b:
                leftovers.append(sample_seq_tuple)

        # ---------------------------------------------------------------------------
        def trim_overload():
            """
            Iteratively pop the most‑recent sequence from the *most‑loaded group*
            whenever doing so reduces the global slack.
            """
            while True:
                cur_max = max(exec_times)
                cur_min = min(exec_times)
                cur_slack = cur_max - cur_min
                if cur_slack <= delta * cur_max:
                    # Slack is already within limit.
                    break
                if cur_min == 0:
                    # There are empty GPUs that will be
                    # handled in the next step.
                    break

                max_r = exec_times.index(cur_max)
                gid = gpu_group_id[max_r]
                members = group_members[gid]

                if not micro_batches[max_r] or len(micro_batches[max_r]) <= 1:
                    break

                seq = micro_batches[max_r][-1]
                need = group_size[gid]
                per_gpu_cost = compute_estimator(seq)

                proj_times = exec_times[:]
                for r in members:
                    proj_times[r] -= per_gpu_cost

                proj_slack = max(proj_times) - min(proj_times)

                # Check if trimming the workload helps imbalance
                if proj_slack < cur_slack:
                    sample_id_to_remove = sample_ids_per_gpu[max_r][-1]
                    for r in members:
                        micro_batches[r].pop()
                        exec_times[r] -= per_gpu_cost
                        sample_ids_per_gpu[r].pop()
                    leftovers.append((sample_id_to_remove, seq))
                else:
                    break

        trim_overload()

        # Track samples in this group before redistribution to empty GPUs
        total_work_before = sum(len(mb) for mb in micro_batches)

        # Check for empty GPUs and redistribute work
        def fill_empty_gpus(
            micro_batches, exec_times, sample_ids_per_gpu, group_members, group_size
        ):
            """
            Recursively check for empty GPUs and redistribute work by increasing
            the number of GPUs sharing samples. This ensures all GPUs have work.
            GPUs must be allocated consecutively so we may need to push existing
            work to other ranks in order to expand samples.
            """
            # Find empty GPUs
            empty_gpus = [i for i in range(total_gpus) if not micro_batches[i]]
            if not empty_gpus:
                return (
                    micro_batches,
                    exec_times,
                    sample_ids_per_gpu,
                    group_members,
                    group_size,
                )  # No empty GPUs, we're done

            # Find the smallest group size that exists
            existing_group_sizes = set(group_size.values())
            assert (
                existing_group_sizes
            ), "There should be at least one group existing, cannot reditribute, "
            "try to increase 'max-seqlen-per-cp-rank'."

            min_group_size = min(existing_group_sizes)
            # We have Hybrid DPxCP groups for every power of 2 of GPUs or the entire DPxCP group.
            next_power = min(min_group_size * 2, total_gpus)

            # Find the first group of min_group_size that can be expanded
            expandable_gid = None
            expandable_members = None
            expandable_new_gpus = None

            for gid, size in group_size.items():
                if size == min_group_size:
                    members = group_members[gid]
                    needed_count = next_power - min_group_size
                    group_start_gpu = members[0]
                    group_end_gpu = members[-1]
                    empty_gpu = [idx for idx, work in enumerate(micro_batches) if not work][0]
                    assert not all(
                        work for work in micro_batches[empty_gpu : empty_gpu + needed_count]
                    ), f"Empty GPUs were detected but not enough to expand."
                    work_to_push = micro_batches[
                        group_end_gpu + 1 : empty_gpu
                    ]  # This is work of all other subsequent sub-samples
                    exec_times_to_push = exec_times[group_end_gpu + 1 : empty_gpu]
                    sample_ids_to_push = sample_ids_per_gpu[group_end_gpu + 1 : empty_gpu]

                    new_micro_batches = [[]] * len(micro_batches)
                    new_exec_times = [0.0] * len(exec_times)
                    new_sample_ids_per_gpu = [[]] * len(sample_ids_per_gpu)

                    # No change in work until the group selected for expansion
                    for i in range(group_start_gpu):
                        new_micro_batches[i] = micro_batches[i]
                        new_exec_times[i] = exec_times[i]
                        new_sample_ids_per_gpu[i] = sample_ids_per_gpu[i]

                    # The work is distributed across the expanded group
                    for i in range(group_start_gpu, group_end_gpu + needed_count + 1):
                        new_micro_batches[i] = micro_batches[group_end_gpu]
                        new_exec_times[i] = self.get_total_workload(
                            micro_batches[group_end_gpu][0], next_power
                        )
                        new_sample_ids_per_gpu[i] = sample_ids_per_gpu[group_end_gpu]

                    # Any assigned work on expanded GPUs is pushed
                    for i, work in enumerate(work_to_push):
                        new_micro_batches[group_end_gpu + needed_count + 1 + i] = work
                        new_exec_times[group_end_gpu + needed_count + 1 + i] = exec_times_to_push[i]
                        new_sample_ids_per_gpu[group_end_gpu + needed_count + 1 + i] = (
                            sample_ids_to_push[i]
                        )

                    group_size[gid] = next_power
                    group_members[gid] = list(range(members[0], members[-1] + needed_count + 1))
                    for pushed_gid in group_size.keys():
                        if pushed_gid > gid:
                            group_members[pushed_gid] = [
                                x + needed_count for x in group_members[pushed_gid]
                            ]

                    return (
                        new_micro_batches,
                        new_exec_times,
                        new_sample_ids_per_gpu,
                        group_members,
                        group_size,
                    )

        empty_gpus = any([not micro_batches[i] for i in range(total_gpus)])
        while empty_gpus:
            micro_batches, exec_times, sample_ids_per_gpu, group_members, group_size = (
                fill_empty_gpus(
                    micro_batches, exec_times, sample_ids_per_gpu, group_members, group_size
                )
            )
            empty_gpus = any([not micro_batches[i] for i in range(total_gpus)])

        # Assert that no sample has been completely removed
        total_work_after = sum(len(mb) for mb in micro_batches)
        assert (
            total_work_after >= total_work_before
        ), f"Samples were removed: {total_work_before} -> {total_work_after}"

        return micro_batches, leftovers, exec_times, sample_ids_per_gpu

    def get_groups_and_subsamples(self, sample_id_seqlens, config):
        """
        This function recursively forms groups of sub-samples such that all DPxCP ranks
        have a roughly balanced workload in the group.
        """
        groups = []
        sample_id_groups = []
        # We assign a sample_id to each sub-sample in order to track assignment to each GPU.
        sample_id_seqlens = sorted(sample_id_seqlens, key=lambda x: x[1], reverse=True)
        while sample_id_seqlens:
            mb, sample_id_seqlens, exec_times, sample_ids = self.next_hdp_group(
                sample_id_seqlens, self.get_total_workload, self.total_hdp_gpus
            )
            groups.append(mb)
            if len(sample_ids) < self.total_hdp_gpus:
                sample_ids.extend([] * (self.total_hdp_gpus - len(sample_ids)))
            sample_id_groups.append(sample_ids)

        return groups, sample_id_groups


def hybrid_context_parallel_forward_backward(
    forward_step_func,
    data_iterator,
    model,
    num_microbatches,
    input_tensor,
    output_tensor_grad,
    forward_data_store,
    config,
    collect_non_loss_data,
    first_val_step,
    forward_only,
    no_sync_func,
    total_num_tokens,
    check_first_val_step,
    model_type,
):
    """
    Scheduler for Hybrid Context Parallel.

    This function performs the packed sample scheduling and determines
    1. The number of microbatches to schedule for each CP rank
    2. The number of groups each CP rank should execute
    3. The number of sub-samples per group each CP rank should execute

    A group is defined by a set of samples that can run across the CP domain without any barrier.
    There are many reasons why we may not be able to run endless samples within a single group.
    For example, if we have 8 GPUs,
    if GPU 0-5 are assigned a long sample that requires CP6,
    GPU 6-7 are assigned a short sample that requires CP2,
    The next sample which requires CP4 can be assigned GPU 4-7.
    But GPU 6-7 will finish first and get deadlocked if GPU 4-5 are not participating in the group.
    """
    from .schedules import backward_step, forward_step

    def _broadcast(item):
        if item is not None:
            torch.distributed.broadcast(
                item,
                parallel_state.get_tensor_model_parallel_src_rank(),
                group=parallel_state.get_tensor_model_parallel_group(),
            )

    def _broadcast_num_samples_this_group(num_samples_this_group):
        dev = torch.cuda.current_device()
        torch.distributed.barrier()

        n = 0 if num_samples_this_group is None else int(num_samples_this_group.numel())
        n = torch.tensor([n], dtype=torch.int64, device=dev)

        _broadcast(n)
        n = int(n.item())

        assert n > 0, "there should be at least 1 sub samples in the group"
        num_samples_this_group_broadcast = (
            torch.empty(n, dtype=torch.int32, device=dev)
            if num_samples_this_group is None
            else num_samples_this_group
        )
        _broadcast(num_samples_this_group_broadcast)
        return num_samples_this_group_broadcast

    def _get_new_data_iterator(sample_id_in_group, group_id):
        if is_first_tp_rank:
            sub_sample_id = sample_ids_this_group[sample_id_in_group]
            sample = batch[sub_sample_id]
            partner_cp_size = len(
                [True for sample_ids in sample_id_groups[group_id] if sub_sample_id in sample_ids]
            )
            sample["local_cp_size"] = torch.tensor(partner_cp_size, dtype=torch.int32)
            new_data_iterator = RerunDataIterator(iter([sample]))
            return new_data_iterator
        else:
            return None

    # We get data once per global batch and schedule the sub-samples.
    # TODO(pmannan): Should we wrap the data_iterator here instead of the training.py file?
    hdp_rank = parallel_state.get_data_parallel_rank(with_context_parallel=True)
    is_first_tp_rank = parallel_state.get_tensor_model_parallel_rank() == 0

    if is_first_tp_rank:
        data = next(data_iterator)
        sample_id_groups = data[1]
        batch = data[0]
    else:
        data, sample_id_groups, batch = None, None, None

    num_samples_this_group = None
    if is_first_tp_rank:
        num_samples_this_group = torch.tensor(
            [len(group[hdp_rank]) for group in sample_id_groups], dtype=torch.int32, device='cuda'
        )

    num_samples_this_group = _broadcast_num_samples_this_group(num_samples_this_group)
    num_samples_this_group = num_samples_this_group.cpu().numpy()
    num_total_groups = num_samples_this_group.shape[0]

    current_microbatch = 0

    # Upto last group, we don't need any sync.
    with no_sync_func():
        for j in range(num_total_groups - 1):
            sample_ids_this_group = sample_id_groups[j][hdp_rank] if is_first_tp_rank else None
            for i in range(num_samples_this_group[j]):
                # Call forward step for each sub-sample
                new_data_iterator = _get_new_data_iterator(i, j)
                # TODO: Find the usage of current_microbatch and is_first_microbatch and
                # how that may affect my usage.
                output_tensor, num_tokens = forward_step(
                    forward_step_func,
                    new_data_iterator,
                    model,
                    num_microbatches,
                    input_tensor,
                    forward_data_store,
                    config,
                    collect_non_loss_data,
                    is_first_microbatch=check_first_val_step(
                        first_val_step, forward_only, current_microbatch == 0
                    ),
                    current_microbatch=current_microbatch,
                )
                current_microbatch += 1
                total_num_tokens += num_tokens.item()
                if not forward_only:
                    backward_step(
                        input_tensor, output_tensor, output_tensor_grad, model_type, config
                    )

            # Create a barrier at end of each group.
            # This barrier ensures that all ranks are prepared to change assigned CP group sizes and
            # no rank is starting a sub-sample ahead of it's partner ranks.
            torch.distributed.barrier(
                parallel_state.get_data_parallel_group(with_context_parallel=True)
            )

    # For the last group, we need to run the last sub-sample out of the context handler.
    with no_sync_func():
        sample_ids_this_group = sample_id_groups[-1][hdp_rank] if is_first_tp_rank else None
        for i in range(num_samples_this_group[-1] - 1):
            new_data_iterator = _get_new_data_iterator(i, -1)
            # Call forward step for each sub-sample
            output_tensor, num_tokens = forward_step(
                forward_step_func,
                new_data_iterator,
                model,
                num_microbatches,
                input_tensor,
                forward_data_store,
                config,
                collect_non_loss_data,
                is_first_microbatch=check_first_val_step(
                    first_val_step, forward_only, current_microbatch == 0
                ),
                current_microbatch=current_microbatch,
            )
            current_microbatch += 1
            total_num_tokens += num_tokens.item()
            if not forward_only:
                backward_step(input_tensor, output_tensor, output_tensor_grad, model_type, config)

    # The last sub-sample of the last group of the last microbatch is
    # run out of the context handler.
    new_data_iterator = _get_new_data_iterator(-1, -1)
    # Call forward step for each sub-sample
    output_tensor, num_tokens = forward_step(
        forward_step_func,
        new_data_iterator,
        model,
        num_microbatches,
        input_tensor,
        forward_data_store,
        config,
        collect_non_loss_data,
        is_first_microbatch=check_first_val_step(
            first_val_step, forward_only, current_microbatch == 0
        ),
        current_microbatch=current_microbatch,
    )
    total_num_tokens += num_tokens.item()
    if not forward_only:
        backward_step(input_tensor, output_tensor, output_tensor_grad, model_type, config)

    return forward_data_store, total_num_tokens


================================================
FILE: megatron/core/pipeline_parallel/multimodule_communicator.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import logging
from dataclasses import dataclass
from typing import Dict, List, Optional, Union

import torch
import torch.distributed as dist

from megatron.core.hyper_comm_grid import HyperCommGrid
from megatron.core.model_parallel_config import ModelParallelConfig
from megatron.core.pipeline_parallel.bridge_communicator import BridgeCommunicator
from megatron.core.pipeline_parallel.p2p_communication import P2PCommunicator

# Types
Shape = Union[List[int], torch.Size]


@dataclass
class RankModuleInfo:
    """Information about a rank in a module.

    Attributes:
        pp_rank: The stage index of the current rank within the module's pipeline.
        pp_size: The total number of pipeline stages (ranks) in the module.
        p2p_communicator: Intra-module point-to-point communicator.
        bridge_comms_as_src_module: Bridge communicators for outgoing connections
            from this module to downstream modules. One module may have multiple
            bridge communicators if it has multiple outgoing connections.
        bridge_comms_as_dest_module: Bridge communicators for incoming connections
            to this module from upstream modules. One module may have multiple
            bridge communicators if it has multiple incoming connections.
        is_source_stage: True if this rank is at the absolute first stage in the
            overall model (no incoming connections).
        is_terminal_stage: True if this rank is at the absolute last stage in the
            overall model (no outgoing connections).
    """

    pp_rank: int
    pp_size: int
    p2p_communicator: Optional[P2PCommunicator]
    bridge_comms_as_src_module: Optional[List[BridgeCommunicator]]
    bridge_comms_as_dest_module: Optional[List[BridgeCommunicator]]
    is_source_stage: Optional[bool] = True
    is_terminal_stage: Optional[bool] = True


def _prepare_tensor_for_comm(
    tensor: Union[torch.Tensor, List[torch.Tensor], None]
) -> Union[torch.Tensor, List[torch.Tensor], None]:
    """Prepare tensor for P2P/bridge communication by expanding to 3D if needed.

    P2P and bridge communicators expect 3D tensors. 2D tensors are unsqueezed by adding
    a singleton last dimension, and _restore_tensor_from_comm will squeeze it back. 3D
    tensors are passed through unchanged.

    Note: 3D tensors with a singleton last dimension (shape [a, b, 1]) are not supported
    because _restore_tensor_from_comm cannot distinguish them from unsqueezed 2D tensors.

    Args:
        tensor: Input tensor (2D or 3D), list of tensors, or None.

    Returns:
        3D tensor (with singleton last dim if input was 2D), list of 3D tensors, or None.
    """
    if tensor is None:
        return None
    if isinstance(tensor, list):
        return [_prepare_tensor_for_comm(t) for t in tensor]
    if isinstance(tensor, torch.Tensor):
        if tensor.ndim == 2:
            return tensor.unsqueeze(-1)
        assert tensor.ndim != 3 or tensor.shape[-1] != 1, (
            f"3D tensor with singleton last dim {tuple(tensor.shape)} is ambiguous for "
            "multimodule comm. Cannot distinguish from an unsqueezed 2D tensor on the "
            "receiving rank. Use a 2D tensor or a 3D tensor with last_dim > 1."
        )
    return tensor


def _restore_tensor_from_comm(
    tensor: Union[torch.Tensor, List[torch.Tensor], None]
) -> Union[torch.Tensor, List[torch.Tensor], None]:
    """Restore tensor shape after P2P/bridge communication by squeezing singleton dim.

    Removes the extra dimension added by _prepare_tensor_for_comm if it was singleton.
    Handles both single tensors and lists of tensors (for VPP).

    Args:
        tensor: Input tensor (3D with singleton last dim), list of tensors, or None.

    Returns:
        2D tensor (if last dim was singleton), list of tensors, or None.
    """
    if tensor is None:
        return None
    if isinstance(tensor, list):
        return [_restore_tensor_from_comm(t) for t in tensor]
    if isinstance(tensor, torch.Tensor) and tensor.ndim == 3 and tensor.shape[-1] == 1:
        return tensor.squeeze(-1)
    return tensor


class MultiModulePipelineCommunicator:
    """Communicator for a multi-module pipeline."""

    def __init__(
        self,
        module_to_grid_map: Dict[str, HyperCommGrid],
        topology: Dict[str, List[str]],
        config: ModelParallelConfig,
        dim_mapping: Dict[str, List[int]] = None,
    ):
        """
        Initialize the MultiModulePipelineCommunicator.

        Args:
            module_to_grid_map (dict): A dictionary mapping module names to HyperCommGrids.
                Example:
                    module_to_grid_map = {
                        'image_encoder': image_encoder_grid,
                        'audio_encoder': audio_encoder_grid,
                        'llm': llm_grid,
                        'generator': generator_grid
                    }
            topology (dict): A dictionary mapping module names to lists of outgoing modules.
                Example:
                    topology = {
                        'image_encoder': ['llm'],
                        'audio_encoder': ['llm'],
                        'llm': ['generator'],
                        'generator': []
                    }
            config (ModelParallelConfig): A ModelParallelConfig object.
            dim_mapping (Dict[str, List[int]]): Dimension mapping for sequence, batch, hidden.
                Example:
                    dim_mapping = {'s': 0, 'h': 2, 'b': 1}
                Default: None
        """
        self.module_to_grid_map = module_to_grid_map
        self.topology = topology
        self.config = config
        self.dim_mapping = dim_mapping
        self.current_rank = dist.get_rank()

        # Build bridge communicators for all modules
        self.bridge_comms = []
        self._build_bridge_comms()

        self.rank_module_map = {}
        self._build_rank_module_info_map()

    def _build_bridge_comms(self):
        """Construct and store BridgeCommunicator objects that describe the outgoing
        communication relationships for all of the modules.
        """
        for src_module_name, src_grid in self.module_to_grid_map.items():
            for dest_module_name in self.topology[src_module_name]:
                dest_grid = self.module_to_grid_map[dest_module_name]
                bridge_comm = BridgeCommunicator(
                    src_grid=src_grid,
                    dest_grid=dest_grid,
                    dim_mapping=self.dim_mapping,
                    comm_dtype=self.config.pipeline_dtype,
                    src_module_name=src_module_name,
                    dest_module_name=dest_module_name,
                )
                self.bridge_comms.append(bridge_comm)

    @property
    def is_pp_first_stage(self):
        """Return True if the current rank has the absolute first stage in the overall model.

        The absolute first stage is defined as:
        1. The current rank must be in the first PP stage (pp_rank == 0) of some module
        2. That module must be a source module (no incoming connections in topology)
        """
        for module_name, rank_module_info in self.rank_module_map.items():
            # Check if this rank is at the first PP stage of this module
            if rank_module_info.pp_rank == 0:
                # Check if this module is a source module (no incoming connections)
                if self._is_source_module(module_name):
                    return True
        return False

    @property
    def is_pp_last_stage(self):
        """Return True if the current rank has the absolute last stage in the overall model.

        The absolute last stage is defined as:
        1. The current rank must be in the last PP stage of some module
        2. That module must be a sink module (no outgoing connections in topology)
        """
        for module_name, rank_module_info in self.rank_module_map.items():
            # Check if this rank is at the last PP stage of this module
            if rank_module_info.pp_rank == rank_module_info.pp_size - 1:
                # Check if this module is a sink module (no outgoing connections)
                if self._is_sink_module(module_name):
                    return True
        return False

    def _is_source_module(self, module_name: str) -> bool:
        """Check if a module is a source module (has no incoming connections)."""
        # A module is a source if no other module lists it as a destination
        for src_module, dest_modules in self.topology.items():
            if module_name in dest_modules:
                return False
        return True

    def _is_sink_module(self, module_name: str) -> bool:
        """Check if a module is a sink module (has no outgoing connections)."""
        return len(self.topology.get(module_name, [])) == 0

    def is_current_rank_in_grid(self, grid: HyperCommGrid) -> bool:
        """Check if the current rank is in the grid."""
        return grid.rank_offset <= self.current_rank < grid.rank_offset + grid.size

    @property
    def total_stages(self) -> int:
        """Return total number of pipeline stages across all modules.

        Computes the longest path through the module DAG weighted by each
        module's pipeline-parallel size.

        Returns:
            int: Total pipeline stages.
        """
        return self.compute_total_pipeline_stages(self.topology, self.module_to_grid_map)

    @property
    def current_stage(self) -> int:
        """Return current pipeline stage index (0-indexed) within the multi-module pipeline.

        Returns:
            int: Current stage index.
        """
        total = self.total_stages

        if self.rank_module_map:
            # Take the first module this rank belongs to
            # TODO: ykarnati - improve this logic.
            module_name = next(iter(self.rank_module_map.keys()))
            stage = (
                self.compute_total_pipeline_stages(
                    self.topology,
                    self.module_to_grid_map,
                    rank=self.current_rank,
                    module_name=module_name,
                )
                - 1
            )  # Convert from 1-indexed to 0-indexed
        else:
            stage = 0

        assert stage < total, f"current_stage: {stage} must be less than total_stages: {total}"
        logging.debug(
            f"[Rank {dist.get_rank()} ][MultiModulePipelineCommunicator] "
            f"current_stage: {stage} total_stages: {total} "
            f"num_warmup_microbatches: {total - stage - 1}"
        )
        return stage

    def _build_rank_module_info_map(self):
        """For each module in the current rank, initialize the P2P communicator
        and build the bridge communicator info for the module.
        Each rank may hold multiple modules when colocated.
        """
        for module_name, module_grid in self.module_to_grid_map.items():
            if self.is_current_rank_in_grid(module_grid):
                # Initialize P2P communicator
                pp_group = module_grid.get_pg('pp')
                p2p_comm = P2PCommunicator(pp_group, self.config)
                pp_size = dist.get_world_size(pp_group)
                rank_in_pp_group = dist.get_group_rank(pp_group, self.current_rank)
                pp_rank = rank_in_pp_group % pp_size

                bridge_comms_as_dest_module = []
                bridge_comms_as_src_module = []
                # If first stage, check if the module has any incoming modules
                # If so, initialize bridge communicator
                if pp_rank == 0:
                    for bridge_comm in self.bridge_comms:
                        if (
                            bridge_comm.is_current_rank_in_grid(bridge_comm.dest_grid)
                            and bridge_comm.dest_module_name == module_name
                        ):
                            bridge_comms_as_dest_module.append(bridge_comm)
                # If last stage, check if the module has any outgoing modules
                # If so, initialize bridge communicator
                if pp_rank == pp_size - 1:
                    for bridge_comm in self.bridge_comms:
                        if (
                            bridge_comm.is_current_rank_in_grid(bridge_comm.src_grid)
                            and bridge_comm.src_module_name == module_name
                        ):
                            bridge_comms_as_src_module.append(bridge_comm)
                # Build RankModuleInfo for the module
                rank_module_info = RankModuleInfo(
                    pp_rank=pp_rank,
                    pp_size=pp_size,
                    p2p_communicator=p2p_comm,
                    bridge_comms_as_dest_module=bridge_comms_as_dest_module,
                    bridge_comms_as_src_module=bridge_comms_as_src_module,
                )
                self.rank_module_map[module_name] = rank_module_info

    def recv_forward(
        self, tensor_shape: Optional[Shape] = None, is_first_stage: bool = False
    ) -> Dict[str, torch.Tensor]:
        """Receive forward activation tensor.

        Args:
            tensor_shape: Expected activation tensor shape

        Returns:
            A dictionary mapping module names to tensors.
        """
        logging.debug(
            f"[Rank {dist.get_rank()} ][MultiModulePipelineCommunicator] "
            f"[receive_forward] tensors_shape: {tensor_shape}, is_first_stage: {is_first_stage}"
        )
        input_dict = {}
        for module_name, rank_module_info in self.rank_module_map.items():

            if rank_module_info.pp_rank == 0:
                # If first stage, and has incoming modules, receive forward activation
                # from incoming modules.
                for bridge_comm in rank_module_info.bridge_comms_as_dest_module:
                    received_tensor = bridge_comm.recv_forward()
                    input_dict[bridge_comm.src_module_name] = _restore_tensor_from_comm(
                        received_tensor
                    )
            else:
                # If not first stage, receive forward activation tensor from P2P communicator.
                received_tensor = rank_module_info.p2p_communicator.recv_forward(
                    tensor_shapes=tensor_shape, is_first_stage=False
                )
                input_dict[module_name] = _restore_tensor_from_comm(received_tensor)
        return input_dict

    def send_forward(self, output_dict: Dict[str, torch.Tensor], is_last_stage: bool = False):
        """Send forward activation tensor.

        Args:
            output_dict: A dictionary mapping module names to tensors.
        """
        for module_name, rank_module_info in self.rank_module_map.items():
            if rank_module_info.pp_rank == rank_module_info.pp_size - 1:
                # If last stage, and has outgoing modules, send forward activation
                # by using bridge communicator.
                for bridge_comm in rank_module_info.bridge_comms_as_src_module:
                    tensor_to_send = _prepare_tensor_for_comm(output_dict[module_name])
                    bridge_comm.send_forward(tensor_to_send)
            else:
                # If not last stage, send forward activation by using P2P communicator.
                tensor_to_send = _prepare_tensor_for_comm(output_dict[module_name])
                rank_module_info.p2p_communicator.send_forward(tensor_to_send, is_last_stage=False)

    def send_forward_recv_backward(
        self,
        output_dict: Dict[str, torch.Tensor],
        tensor_shape: Optional[Shape] = None,
        is_last_stage: bool = False,
    ) -> Dict[str, torch.Tensor]:
        """Send forward activation tensor and receive backward activation tensor.

        Args:
            output_dict: A dictionary mapping module names to tensors.
            tensor_shape: Expected gradient tensor shape

        Returns:
            A dictionary mapping module names to tensors.
        """
        grad_dict = {}
        for module_name, rank_module_info in self.rank_module_map.items():
            if rank_module_info.pp_rank == rank_module_info.pp_size - 1:
                # If last stage, and has outgoing modules, send forward activation and
                # receive backward gradient by using bridge communicator.
                for bridge_comm in rank_module_info.bridge_comms_as_src_module:
                    tensor_to_send = _prepare_tensor_for_comm(output_dict[module_name])
                    grad = bridge_comm.send_forward_recv_backward(tensor_to_send)
                    grad_dict[bridge_comm.src_module_name] = _restore_tensor_from_comm(grad)
            else:
                # If not last stage, send forward activation and receive backward gradient
                # by using P2P communicator.
                tensor_to_send = _prepare_tensor_for_comm(output_dict[module_name])
                grad = rank_module_info.p2p_communicator.send_forward_recv_backward(
                    tensor_to_send, tensor_shapes=tensor_shape, is_last_stage=False
                )
                grad_dict[module_name] = _restore_tensor_from_comm(grad)
        return grad_dict

    def send_backward_recv_forward(
        self,
        grad_dict: Dict[str, torch.Tensor],
        tensor_shape: Optional[Shape] = None,
        is_first_stage: bool = False,
    ) -> Dict[str, torch.Tensor]:
        """Send backward activation tensor and receive forward activation tensor.

        Args:
            grad_dict: A dictionary mapping module names to tensors.
            tensor_shape: Expected gradient tensor shape

        Returns:
            A dictionary mapping module names to tensors.
        """
        input_dict = {}
        for module_name, rank_module_info in self.rank_module_map.items():
            if rank_module_info.pp_rank == 0:
                for bridge_comm in rank_module_info.bridge_comms_as_dest_module:
                    # If first stage, and has incoming modules, send backward gradient and
                    # receive forward activation by using bridge communicator.
                    grad_to_send = _prepare_tensor_for_comm(grad_dict[bridge_comm.src_module_name])
                    received_tensor = bridge_comm.send_backward_recv_forward(grad_to_send)
                    input_dict[bridge_comm.src_module_name] = _restore_tensor_from_comm(
                        received_tensor
                    )
            else:
                # If not first stage, send backward gradient and receive forward activation
                # by using P2P communicator.
                grad_to_send = _prepare_tensor_for_comm(grad_dict[module_name])
                received_tensor = rank_module_info.p2p_communicator.send_backward_recv_forward(
                    grad_to_send, tensor_shapes=tensor_shape, is_first_stage=False
                )
                input_dict[module_name] = _restore_tensor_from_comm(received_tensor)
        return input_dict

    def recv_backward(
        self, tensor_shape: Optional[Shape] = None, is_last_stage: bool = False
    ) -> Dict[str, torch.Tensor]:
        """Receive backward activation tensor.

        Args:
            tensor_shape: Expected gradient tensor shape

        Returns:
            A dictionary mapping module names to tensors.
        """
        logging.debug(
            f"[Rank {dist.get_rank()} ][MultiModulePipelineCommunicator] "
            f"[recv_backward] tensor_shape: {tensor_shape}, is_last_stage: {is_last_stage}"
        )
        grad_dict = {}
        for module_name, rank_module_info in self.rank_module_map.items():
            if rank_module_info.pp_rank == rank_module_info.pp_size - 1:
                # If last stage, and has incoming modules, receive backward gradient
                # by using bridge communicator.
                for bridge_comm in rank_module_info.bridge_comms_as_src_module:
                    grad = bridge_comm.recv_backward()
                    grad_dict[bridge_comm.src_module_name] = _restore_tensor_from_comm(grad)
            else:
                # If not last stage, receive backward gradient by using P2P communicator.
                grad = rank_module_info.p2p_communicator.recv_backward(
                    tensor_shapes=tensor_shape, is_last_stage=False
                )
                grad_dict[module_name] = _restore_tensor_from_comm(grad)
        return grad_dict

    def send_backward(self, grad_dict: Dict[str, torch.Tensor], is_first_stage: bool = False):
        """Send backward activation tensor.

        Args:
            grad_dict: A dictionary mapping module names to tensors.
        """
        for module_name, rank_module_info in self.rank_module_map.items():
            if rank_module_info.pp_rank == 0:
                # If first stage, and has incoming modules, send backward activation
                # by using bridge communicator.
                for bridge_comm in rank_module_info.bridge_comms_as_dest_module:
                    grad_to_send = _prepare_tensor_for_comm(grad_dict[bridge_comm.src_module_name])
                    bridge_comm.send_backward(grad_to_send)
            else:
                # If not first stage, send backward activation by using P2P communicator.
                grad_to_send = _prepare_tensor_for_comm(grad_dict[module_name])
                rank_module_info.p2p_communicator.send_backward(grad_to_send, is_first_stage=False)

    @staticmethod
    def compute_total_pipeline_stages(
        topology: Dict[str, List[str]],
        module_to_grid_map: Dict[str, HyperCommGrid],
        rank: Optional[int] = None,
        module_name: Optional[str] = None,
    ) -> int:
        """Compute the total number of pipeline stages across a multi-module chain.

        Interprets ``topology`` as a directed acyclic graph (DAG) where nodes are modules
        and edges indicate forward data flow from source to destination modules. Each node
        is assigned a weight equal to its pipeline parallel size (number of PP stages).

        The total number of stages is defined as the length of the longest path in this DAG
        under node weights.

        If ``rank`` is None (default), returns the maximum over all terminal (sink) modules of
        the sum of PP sizes along a path ending at that terminal. For example, given:

            image_encoder ->\
                              -> llm -> generator
            audio_encoder  ->/

        the total is: max(pp(image_encoder), pp(audio_encoder)) + pp(llm) + pp(generator).

        If ``rank`` is provided, the result is the total number of pipeline stages up to (and
        including) the PP stage that ``rank`` occupies inside its module. In this case, the
        weight of the target module equals (pp_rank_index(rank) + 1) instead of the module's
        full PP size; other modules still contribute their full PP sizes. If the rank belongs to
        multiple modules (colocation), pass ``module_name`` to disambiguate; otherwise the
        maximum across all candidate modules containing the rank is returned.

        Args:
            topology: Mapping from a module to its list of outgoing modules.
            module_to_grid_map: Mapping from module name to its ``HyperCommGrid``.

        Returns:
            The total number of pipeline stages along the longest path given the constraints.

        Raises:
            ValueError: If the topology contains cycles; or has no terminal nodes when
                ``rank`` is None
        """
        nodes = set(module_to_grid_map.keys())
        # Build adjacency and reverse-adjacency (predecessors).
        adj: Dict[str, List[str]] = {node: list(topology.get(node, [])) for node in nodes}
        preds: Dict[str, List[str]] = {node: [] for node in nodes}
        for src, outs in adj.items():
            for dst in outs:
                preds[dst].append(src)

        # Identify terminal nodes (no outgoing edges) for the rank=None case.
        sinks = [node for node, outs in adj.items() if not outs]
        if rank is None and not sinks:
            raise ValueError(
                "Topology must be a DAG with at least one terminal (no outgoing) module."
            )

        def pp_size(name: str) -> int:
            grid = module_to_grid_map[name]
            pp_dim_index = grid.dim_names.index('pp')
            return grid.shape[pp_dim_index]

        def partial_weight_for_target(target: str) -> Optional[int]:
            if rank is None:
                return None
            grid = module_to_grid_map.get(target)
            rank_groups = grid._gen_rank_enum(['pp'])
            stage_index: Optional[int] = None
            for group in rank_groups:
                if rank in group:
                    stage_index = group.index(rank)
                    break
            return stage_index + 1

        def longest_path_to(target: str) -> int:
            visiting = set()
            partial = partial_weight_for_target(target)

            def weight(name: str) -> int:
                if partial is not None and name == target:
                    return partial
                return pp_size(name)

            def dfs(node: str) -> int:
                if node in visiting:
                    raise ValueError("Topology contains cycles; expected a DAG.")
                visiting.add(node)
                best = 0
                for p in preds.get(node, []):
                    val = dfs(p)
                    if val > best:
                        best = val
                visiting.remove(node)
                return weight(node) + best

            return dfs(target)

        if rank is None:
            return max(longest_path_to(sink) for sink in sinks)

        return longest_path_to(module_name)


================================================
FILE: megatron/core/pipeline_parallel/p2p_communication.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.


from typing import List, Optional, Tuple, Union

import torch
import torch.distributed as dist

from megatron.core.model_parallel_config import ModelParallelConfig
from megatron.core.pipeline_parallel.utils import is_pp_first_stage, is_pp_last_stage
from megatron.core.utils import nvtx_decorator

# Types
Shape = Union[List[int], torch.Size]


def _batched_p2p_ops(
    *,
    tensor_send_prev: Optional[torch.Tensor],
    tensor_recv_prev: Optional[torch.Tensor],
    tensor_send_next: Optional[torch.Tensor],
    tensor_recv_next: Optional[torch.Tensor],
    group: torch.distributed.ProcessGroup,
    prev_pipeline_rank: int,
    next_pipeline_rank: int,
):
    ops = []
    if tensor_send_prev is not None:
        send_prev_op = torch.distributed.P2POp(
            torch.distributed.isend, tensor_send_prev, prev_pipeline_rank, group
        )
        ops.append(send_prev_op)
    if tensor_recv_prev is not None:
        recv_prev_op = torch.distributed.P2POp(
            torch.distributed.irecv, tensor_recv_prev, prev_pipeline_rank, group
        )
        ops.append(recv_prev_op)
    if tensor_send_next is not None:
        send_next_op = torch.distributed.P2POp(
            torch.distributed.isend, tensor_send_next, next_pipeline_rank, group
        )
        ops.append(send_next_op)
    if tensor_recv_next is not None:
        recv_next_op = torch.distributed.P2POp(
            torch.distributed.irecv, tensor_recv_next, next_pipeline_rank, group
        )
        ops.append(recv_next_op)
    if len(ops) > 0:
        reqs = torch.distributed.batch_isend_irecv(ops)
    else:
        reqs = []
    return reqs


def _p2p_ops(
    *,
    tensor_send_prev: Optional[torch.Tensor],
    tensor_recv_prev: Optional[torch.Tensor],
    tensor_send_next: Optional[torch.Tensor],
    tensor_recv_next: Optional[torch.Tensor],
    group: torch.distributed.ProcessGroup,
    prev_pipeline_rank: int,
    next_pipeline_rank: int,
):
    reqs = {}
    even_send_odd_recv_group = group
    if group.size() == 2 and torch.distributed.get_backend(group) != 'ucc':
        # Use the global process group for one of the two p2p communications
        # to allow the overlap of the independent communications.
        # Using the global process group is compatible because the pipeline-parallel
        # communications set the source and destination by global rank.
        # The only exception occurs when using the ‘ucc’ backend.
        # Because the global communicator always uses the ‘nccl’ backend,
        # we must ensure the else path is followed for the ‘ucc’ backend.
        even_recv_odd_send_group = torch.distributed.group.WORLD
    else:
        even_recv_odd_send_group = group

    if group.rank() % 2 == 0:
        if tensor_send_next is not None:
            send_next_req = torch.distributed.isend(
                tensor=tensor_send_next, dst=next_pipeline_rank, group=even_send_odd_recv_group
            )
            reqs["send_next"] = send_next_req

        if tensor_recv_prev is not None:
            recv_prev_req = torch.distributed.irecv(
                tensor=tensor_recv_prev, src=prev_pipeline_rank, group=even_recv_odd_send_group
            )
            reqs["recv_prev"] = recv_prev_req

        if tensor_send_prev is not None:
            send_prev_req = torch.distributed.isend(
                tensor=tensor_send_prev, dst=prev_pipeline_rank, group=even_send_odd_recv_group
            )
            reqs["send_prev"] = send_prev_req

        if tensor_recv_next is not None:
            recv_next_req = torch.distributed.irecv(
                tensor=tensor_recv_next, src=next_pipeline_rank, group=even_recv_odd_send_group
            )
            reqs["recv_next"] = recv_next_req

    else:
        if tensor_recv_prev is not None:
            recv_prev_req = torch.distributed.irecv(
                tensor=tensor_recv_prev, src=prev_pipeline_rank, group=even_send_odd_recv_group
            )
            reqs["recv_prev"] = recv_prev_req

        if tensor_send_next is not None:
            send_next_req = torch.distributed.isend(
                tensor=tensor_send_next, dst=next_pipeline_rank, group=even_recv_odd_send_group
            )
            reqs["send_next"] = send_next_req

        if tensor_recv_next is not None:
            recv_next_req = torch.distributed.irecv(
                tensor=tensor_recv_next, src=next_pipeline_rank, group=even_send_odd_recv_group
            )
            reqs["recv_next"] = recv_next_req

        if tensor_send_prev is not None:
            send_prev_req = torch.distributed.isend(
                tensor=tensor_send_prev, dst=prev_pipeline_rank, group=even_recv_odd_send_group
            )
            reqs["send_prev"] = send_prev_req
    return reqs


def is_single_shape(x) -> bool:
    """Check if the input is a single shape."""
    if isinstance(x, torch.Size):
        return True
    if isinstance(x, (list, tuple)) and len(x) > 0 and all(isinstance(d, int) for d in x):
        return True
    return False


class P2PCommunicator:
    """P2P (Point-to-Point) Communicator for pipeline parallelism.

    This class handles communication between pipeline stages by managing
    tensor exchanges between consecutive stages in the pipeline.
    """

    def __init__(self, pp_group: dist.ProcessGroup, config: ModelParallelConfig):
        # Basic attrs
        self.pp_group = pp_group
        self.config = config

        world_size = self.pp_group.size()
        curr_rank_in_pg = self.pp_group.rank()

        next_rank_pg = (curr_rank_in_pg + 1) % world_size
        prev_rank_pg = (curr_rank_in_pg - 1) % world_size

        self.next_rank: int | None = dist.get_global_rank(self.pp_group, next_rank_pg)
        self.prev_rank: int | None = dist.get_global_rank(self.pp_group, prev_rank_pg)
        self.virtual_pipeline_model_parallel_size = (
            config.virtual_pipeline_model_parallel_size
            if config.virtual_pipeline_model_parallel_size is not None
            else None
        )

    @property
    def is_pp_first_stage(self) -> bool:
        """Return True if pp first stage."""
        return is_pp_first_stage(self.pp_group)

    @property
    def is_pp_last_stage(self) -> bool:
        """Return True if pp last stage."""
        return is_pp_last_stage(self.pp_group)

    @property
    def total_stages(self) -> int:
        """Return total number of pipeline stages."""
        return self.pp_group.size()

    @property
    def current_stage(self) -> int:
        """Return current pipeline stage index (0-indexed)."""
        return self.pp_group.rank()

    def _communicate_shapes(self, tensor_send_next, tensor_send_prev, recv_prev, recv_next):
        """Communicate tensor shapes between stages. Used to communicate
        tensor shapes before the actual tensor communication happens.
        This is required when the sequence lengths across micro batches
        are not uniform.

        Args:
            tensor_send_next: tensor to send to next rank (no tensor sent if
                            set to None).
            tensor_send_prev: tensor to send to prev rank (no tensor sent if
                            set to None).
            recv_prev: boolean for whether tensor should be received from
                    previous rank.
            recv_next: boolean for whether tensor should be received from
                    next rank.
        Returns:
            (recv_prev_shape, recv_next_shape)
        """
        config = self.config
        recv_prev_shape_tensor = None
        recv_next_shape_tensor = None
        send_prev_shape_tensor = None
        send_next_shape_tensor = None
        if recv_prev:
            recv_prev_shape_tensor = torch.empty(
                (3,), device=torch.cuda.current_device(), dtype=torch.int64
            )
        if recv_next:
            recv_next_shape_tensor = torch.empty(
                (3,), device=torch.cuda.current_device(), dtype=torch.int64
            )
        if tensor_send_prev is not None:
            send_prev_shape_tensor = torch.tensor(
                tensor_send_prev.size(), device=torch.cuda.current_device(), dtype=torch.int64
            )
        if tensor_send_next is not None:
            send_next_shape_tensor = torch.tensor(
                tensor_send_next.size(), device=torch.cuda.current_device(), dtype=torch.int64
            )

        if config.use_ring_exchange_p2p:
            torch.distributed.ring_exchange(
                tensor_send_prev=send_prev_shape_tensor,
                tensor_recv_prev=recv_prev_shape_tensor,
                tensor_send_next=send_next_shape_tensor,
                tensor_recv_next=recv_next_shape_tensor,
                group=self.pp_group,
            )
        else:
            ops = []
            if send_prev_shape_tensor is not None:
                send_prev_op = torch.distributed.P2POp(
                    torch.distributed.isend, send_prev_shape_tensor, self.prev_rank, self.pp_group
                )
                ops.append(send_prev_op)
            if recv_prev_shape_tensor is not None:
                recv_prev_op = torch.distributed.P2POp(
                    torch.distributed.irecv, recv_prev_shape_tensor, self.prev_rank, self.pp_group
                )
                ops.append(recv_prev_op)
            if send_next_shape_tensor is not None:
                send_next_op = torch.distributed.P2POp(
                    torch.distributed.isend, send_next_shape_tensor, self.next_rank, self.pp_group
                )
                ops.append(send_next_op)
            if recv_next_shape_tensor is not None:
                recv_next_op = torch.distributed.P2POp(
                    torch.distributed.irecv, recv_next_shape_tensor, self.next_rank, self.pp_group
                )
                ops.append(recv_next_op)
            if len(ops) > 0:
                reqs = torch.distributed.batch_isend_irecv(ops)
                for req in reqs:
                    req.wait()

            # To protect against race condition when using batch_isend_irecv().
            # should take this out once the bug with batch_isend_irecv is resolved.
            torch.cuda.synchronize()

        recv_prev_shape = [0, 0, 0]
        if recv_prev_shape_tensor is not None:
            recv_prev_shape = recv_prev_shape_tensor.tolist()

        recv_next_shape = [0, 0, 0]
        if recv_next_shape_tensor is not None:
            recv_next_shape = recv_next_shape_tensor.tolist()

        return recv_prev_shape, recv_next_shape

    def _communicate(
        self,
        *,
        tensor_send_next: Optional[torch.Tensor],
        tensor_send_prev: Optional[torch.Tensor],
        recv_prev: bool,
        recv_next: bool,
        tensor_shape: Shape,
        wait_on_reqs: bool = True,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """Communicate tensors between stages. Used as helper method in other
        communication methods that are used in megatron/schedules.py.

        Args:
            tensor_send_next (torch.Tensor, optional):
                Tensor to send to next rank (no tensor sent if None)

            tensor_send_prev (torch.Tensor, optional):
                Tensor to send to prev rank (no tensor sent if None)

            recv_prev (boolean, required):
                whether tensor should be received from previous rank.

            recv_next (boolean, required):
                whether tensor should be received from next rank.

            tensor_shape (List[int] or torch.Size, required):
                shape of tensor to receive (this method assumes that all
                tensors sent and received in a single function call are
                the same shape).

            wait_on_reqs (boolean, optional, default=False):
                For non-batched p2p communication, wait on each request
                before returning.

        Returns:
            tuple containing

            - tensor_recv_prev: torch.Tensor if recv_prev is True, None otherwise.
            - tensor_recv_next: torch.Tensor if recv_next is True, None otherwise.

        """

        config = self.config
        tensor_recv_prev_func = None
        tensor_recv_next_func = None

        if config.variable_seq_lengths or config.mtp_standalone:
            recv_prev_shape, recv_next_shape = self._communicate_shapes(
                tensor_send_next, tensor_send_prev, recv_prev, recv_next
            )
        else:
            recv_prev_shape = tensor_shape
            recv_next_shape = tensor_shape

        def create_tensor_recv_prev():
            return torch.empty(
                recv_prev_shape,
                requires_grad=True,
                device=torch.cuda.current_device(),
                dtype=config.pipeline_dtype,
            )

        def create_tensor_recv_next():
            return torch.empty(
                recv_next_shape,
                requires_grad=True,
                device=torch.cuda.current_device(),
                dtype=config.pipeline_dtype,
            )

        if recv_prev:
            if config.pipeline_dtype is None:
                raise RuntimeError("pipeline_dtype must be provided if recv_prev is True")
            if tensor_shape is None:
                raise RuntimeError(
                    "tensor_shape must be specified if recv_prev is True. "
                    "Common tensor_shape is (seq_length, micro_batch_size, hidden_size)"
                )
            tensor_recv_prev_func = create_tensor_recv_prev

        if recv_next:
            if config.pipeline_dtype is None:
                raise RuntimeError("dtype must be provided if recv_next is True")
            if tensor_shape is None:
                raise RuntimeError(
                    "tensor_shape must be specified if recv_next is True. "
                    "Common tensor_shape is (seq_length, micro_batch_size, hidden_size)"
                )
            tensor_recv_next_func = create_tensor_recv_next

        # Send tensors in both the forward and backward directions as appropriate.
        if config.use_ring_exchange_p2p:

            def _ring_exchange_wrapper(**kwargs):
                torch.distributed.ring_exchange(**kwargs)
                return []

            p2p_func = _ring_exchange_wrapper
        elif config.batch_p2p_comm:
            assert wait_on_reqs
            p2p_func = _batched_p2p_ops
        else:
            p2p_func = _p2p_ops

        pp_group = self.pp_group
        next_rank = self.next_rank
        prev_rank = self.prev_rank

        if config.use_ring_exchange_p2p or config.batch_p2p_comm:
            reqs = []
        else:
            reqs = {}

        tensor_recv_prev = None
        tensor_recv_next = None
        if tensor_recv_prev_func is not None:
            tensor_recv_prev = tensor_recv_prev_func()

        if tensor_recv_next_func is not None:
            tensor_recv_next = tensor_recv_next_func()

        p2p_reqs = p2p_func(
            tensor_send_prev=tensor_send_prev,
            tensor_recv_prev=tensor_recv_prev,
            tensor_send_next=tensor_send_next,
            tensor_recv_next=tensor_recv_next,
            group=pp_group,
            prev_pipeline_rank=prev_rank,
            next_pipeline_rank=next_rank,
        )
        if isinstance(p2p_reqs, list):
            reqs.extend(p2p_reqs)
        else:
            reqs.update(p2p_reqs)

        if wait_on_reqs and len(reqs) > 0:
            for req in reqs if isinstance(reqs, list) else reqs.values():
                req.wait()
            reqs = None

        if config.batch_p2p_comm and config.batch_p2p_sync:
            # To protect against race condition when using batch_isend_irecv().
            # User should assert that we have a modern enough PyTorch to not need this
            torch.cuda.synchronize()

        return tensor_recv_prev, tensor_recv_next, reqs

    @nvtx_decorator()
    def recv_forward(
        self, tensor_shapes, is_first_stage: bool
    ) -> Union[torch.Tensor, list[torch.Tensor]]:
        """Receive tensor from previous rank in pipeline (forward receive)."""
        unwrap_tensor_shapes = False
        if is_single_shape(tensor_shapes):
            unwrap_tensor_shapes = True
            tensor_shapes = [tensor_shapes]
        input_tensors = []
        config = self.config
        for tensor_shape in tensor_shapes:
            if is_first_stage:
                input_tensor = None
            else:
                if config.timers is not None:
                    config.timers('forward-recv', log_level=2).start()
                input_tensor, _, _ = self._communicate(
                    tensor_send_next=None,
                    tensor_send_prev=None,
                    recv_prev=True,
                    recv_next=False,
                    tensor_shape=tensor_shape,
                )
                if config.timers is not None:
                    config.timers('forward-recv').stop()
            input_tensors.append(input_tensor)
        if unwrap_tensor_shapes:
            return input_tensors[0]
        return input_tensors

    @nvtx_decorator()
    def recv_backward(
        self, tensor_shapes, is_last_stage: bool
    ) -> Union[torch.Tensor, list[torch.Tensor]]:
        """Receive tensor from next rank in pipeline (backward receive)."""
        unwrap_tensor_shapes = False
        if is_single_shape(tensor_shapes):
            unwrap_tensor_shapes = True
            tensor_shapes = [tensor_shapes]
        config = self.config
        output_tensor_grads = []
        for tensor_shape in tensor_shapes:
            if is_last_stage:
                output_tensor_grad = None
            else:
                if config.timers is not None:
                    config.timers('backward-recv', log_level=2).start()
                _, output_tensor_grad, _ = self._communicate(
                    tensor_send_next=None,
                    tensor_send_prev=None,
                    recv_prev=False,
                    recv_next=True,
                    tensor_shape=tensor_shape,
                )
                if config.timers is not None:
                    config.timers('backward-recv').stop()
            output_tensor_grads.append(output_tensor_grad)
        if unwrap_tensor_shapes:
            return output_tensor_grads[0]
        return output_tensor_grads

    @nvtx_decorator()
    def send_forward(self, output_tensors, is_last_stage: bool) -> None:
        """Send tensor to next rank in pipeline (forward send)."""
        config = self.config
        if not isinstance(output_tensors, list):
            output_tensors = [output_tensors]

        for output_tensor in output_tensors:
            if not is_last_stage:
                if config.timers is not None:
                    config.timers('forward-send', log_level=2).start()
                self._communicate(
                    tensor_send_next=output_tensor,
                    tensor_send_prev=None,
                    recv_prev=False,
                    recv_next=False,
                    tensor_shape=None,
                )
                if config.timers is not None:
                    config.timers('forward-send').stop()

    @nvtx_decorator()
    def send_backward(self, input_tensor_grads, is_first_stage: bool) -> None:
        """Send tensor to previous rank in pipeline (backward send)."""
        if not isinstance(input_tensor_grads, list):
            input_tensor_grads = [input_tensor_grads]
        config = self.config
        for input_tensor_grad in input_tensor_grads:
            if not is_first_stage:
                if config.timers is not None:
                    config.timers('backward-send', log_level=2).start()
                self._communicate(
                    tensor_send_next=None,
                    tensor_send_prev=input_tensor_grad,
                    recv_prev=False,
                    recv_next=False,
                    tensor_shape=None,
                )
                if config.timers is not None:
                    config.timers('backward-send').stop()

    @nvtx_decorator()
    def send_forward_recv_backward(
        self, output_tensors, tensor_shapes, is_last_stage: bool
    ) -> Union[torch.Tensor, list[torch.Tensor]]:
        """Batched send and recv with next rank in pipeline."""
        config = self.config
        unwrap_output_tensors = False
        if not isinstance(output_tensors, list):
            unwrap_output_tensors = True
            output_tensors = [output_tensors]
        if not isinstance(tensor_shapes, list):
            tensor_shapes = [tensor_shapes]
        output_tensor_grads = []
        for output_tensor, tensor_shape in zip(output_tensors, tensor_shapes):
            if is_last_stage:
                output_tensor_grad = None
            else:
                if config.timers is not None:
                    config.timers('forward-send-backward-recv', log_level=2).start()
                _, output_tensor_grad, _ = self._communicate(
                    tensor_send_next=output_tensor,
                    tensor_send_prev=None,
                    recv_prev=False,
                    recv_next=True,
                    tensor_shape=tensor_shape,
                )
                if config.timers is not None:
                    config.timers('forward-send-backward-recv').stop()
            output_tensor_grads.append(output_tensor_grad)
        if unwrap_output_tensors:
            return output_tensor_grads[0]
        return output_tensor_grads

    @nvtx_decorator()
    def send_backward_recv_forward(
        self, input_tensor_grads, tensor_shapes, is_first_stage: bool
    ) -> Union[torch.Tensor, list[torch.Tensor]]:
        """Batched send and recv with previous rank in pipeline."""
        config = self.config
        unwrap_input_tensor_grads = False
        if not isinstance(input_tensor_grads, list):
            unwrap_input_tensor_grads = True
            input_tensor_grads = [input_tensor_grads]
        if not isinstance(tensor_shapes, list):
            tensor_shapes = [tensor_shapes]
        input_tensors = []
        for input_tensor_grad, tensor_shape in zip(input_tensor_grads, tensor_shapes):
            if is_first_stage:
                input_tensor = None
            else:
                if config.timers is not None:
                    config.timers('backward-send-forward-recv', log_level=2).start()
                input_tensor, _, _ = self._communicate(
                    tensor_send_next=None,
                    tensor_send_prev=input_tensor_grad,
                    recv_prev=True,
                    recv_next=False,
                    tensor_shape=tensor_shape,
                )
                if config.timers is not None:
                    config.timers('backward-send-forward-recv').stop()
            input_tensors.append(input_tensor)
        if unwrap_input_tensor_grads:
            return input_tensors[0]
        return input_tensors

    @nvtx_decorator()
    def send_forward_recv_forward(
        self,
        output_tensor: torch.Tensor,
        recv_prev: bool,
        tensor_shape: Shape,
        overlap_p2p_comm: bool = False,
    ) -> torch.Tensor:
        """Batched recv from previous rank and send to next rank in pipeline."""
        config = self.config
        if config.timers is not None:
            config.timers('forward-send-forward-recv', log_level=2).start()
        input_tensor, _, wait_handles = self._communicate(
            tensor_send_next=output_tensor,
            tensor_send_prev=None,
            recv_prev=recv_prev,
            recv_next=False,
            tensor_shape=tensor_shape,
            wait_on_reqs=(not overlap_p2p_comm),
        )
        if config.timers is not None:
            config.timers('forward-send-forward-recv').stop()
        if overlap_p2p_comm:
            return input_tensor, wait_handles
        return input_tensor

    @nvtx_decorator()
    def send_backward_recv_backward(
        self,
        input_tensor_grad: torch.Tensor,
        recv_next: bool,
        tensor_shape: Shape,
        overlap_p2p_comm: bool = False,
    ) -> torch.Tensor:
        """Batched recv from next rank and send to previous rank in pipeline."""
        config = self.config
        if config.timers is not None:
            config.timers('backward-send-backward-recv', log_level=2).start()
        _, output_tensor_grad, wait_handles = self._communicate(
            tensor_send_next=None,
            tensor_send_prev=input_tensor_grad,
            recv_prev=False,
            recv_next=recv_next,
            tensor_shape=tensor_shape,
            wait_on_reqs=(not overlap_p2p_comm),
        )
        if config.timers is not None:
            config.timers('backward-send-backward-recv').stop()
        if overlap_p2p_comm:
            return output_tensor_grad, wait_handles
        return output_tensor_grad

    @nvtx_decorator()
    def send_forward_backward_recv_forward_backward(
        self,
        output_tensor: torch.Tensor,
        input_tensor_grad: torch.Tensor,
        recv_prev: bool,
        recv_next: bool,
        tensor_shape: Shape,
    ) -> torch.Tensor:
        """Batched send and recv with previous and next ranks in pipeline."""
        config = self.config
        if config.timers is not None:
            config.timers('forward-backward-send-forward-backward-recv', log_level=2).start()
        input_tensor, output_tensor_grad, _ = self._communicate(
            tensor_send_next=output_tensor,
            tensor_send_prev=input_tensor_grad,
            recv_prev=recv_prev,
            recv_next=recv_next,
            tensor_shape=tensor_shape,
        )
        if config.timers is not None:
            config.timers('forward-backward-send-forward-backward-recv').stop()
        return input_tensor, output_tensor_grad


================================================
FILE: megatron/core/pipeline_parallel/schedules.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import contextlib
from functools import partial
from typing import Callable, Dict, Iterator, List, Optional, Union

import torch
from torch.autograd.variable import Variable

from megatron.core import parallel_state
from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
    FineGrainedActivationOffloadingInterface as off_interface,
)
from megatron.core.pipeline_parallel.multimodule_communicator import MultiModulePipelineCommunicator
from megatron.core.pipeline_parallel.p2p_communication import P2PCommunicator
from megatron.core.pipeline_parallel.utils import (
    is_pp_first_stage,
    is_pp_last_stage,
    is_vp_first_stage,
    is_vp_last_stage,
)
from megatron.core.process_groups_config import (
    MultiModuleProcessGroupCollection,
    ProcessGroupCollection,
)
from megatron.core.transformer.cuda_graphs import create_cudagraphs, set_current_microbatch
from megatron.core.transformer.enums import CudaGraphScope
from megatron.core.transformer.moe.router import MoEAuxLossAutoScaler
from megatron.core.utils import (
    drain_embedding_wgrad_compute,
    get_attr_wrapped_model,
    get_model_config,
    get_model_type,
    nvtx_range_pop,
    nvtx_range_push,
)

from .combined_1f1b import (
    combined_1f1b_schedule_for_interleaved_pipelining,
    combined_1f1b_schedule_for_no_pipelining,
)
from .hybrid_cp_schedule import hybrid_context_parallel_forward_backward

# Types
Shape = Union[List[int], torch.Size]


def get_forward_backward_func(pp_size: Optional[int] = None, vp_size: Optional[int] = None):
    """Retrieves the appropriate forward_backward function given the
    configuration of parallel_state.

    Returns a function that will perform all of the forward and
    backward passes of the model given the pipeline model parallel
    world size and virtual pipeline model parallel world size in the
    global parallel_state.

    Note that if using sequence parallelism, the sequence length component of
    the tensor shape is updated to original_sequence_length /
    tensor_model_parallel_world_size.

    The function returned takes the following arguments:

    forward_step_func (required): A function that takes a data
        iterator and a model as its arguments and return the model's
        forward output and the loss function. The loss function should
        take one torch.Tensor and return a torch.Tensor of loss and a
        dictionary of string -> torch.Tensor.

        A third argument, checkpoint_activations_microbatch, indicates
        that the activations for this microbatch should be
        checkpointed. A None value for this argument indicates that
        the default from the configuration should be used. This is
        used when the
        num_microbatches_with_partial_activation_checkpoints is used.

        For example:

        def loss_func(loss_mask, output_tensor):
            losses = output_tensor.float()
            loss_mask = loss_mask.view(-1).float()
            loss = torch.sum(losses.view(-1) * loss_mask) / loss_mask.sum()

            # Reduce loss for logging.
            averaged_loss = average_losses_across_data_parallel_group([loss])

            return loss, {'lm loss': averaged_loss[0]}

        def forward_step(data_iterator, model):
            data, loss_mask = next(data_iterator)
            output = model(data)
            return output, partial(loss_func, loss_mask)


        forward_backward_func(forward_step_func=forward_step, ...)


    data_iterator (required): an iterator over the data, will be
        passed as is to forward_step_func. Expected to be a list of
        iterators in the case of interleaved pipeline parallelism.

    model (required): the actual model. Expected to be a list of modules in the case of interleaved
        pipeline parallelism. Must be a (potentially wrapped) megatron.core.models.MegatronModule.

    num_microbatches (int, required):
        The number of microbatches to go through

    seq_length (int, required): Sequence length of the current global batch. If this is a dual-stack
        transformer, this is the encoder's sequence length. This is ignored if variable_seq_lengths
        in the config is True. Otherwise, each microbatch in the current global batch size must use
        this sequence length.

    micro_batch_size (int, required): The number of sequences in a microbatch.

    decoder_seq_length (int, optional): The sequence length for the decoder in a dual-stack
        transformer. This is ignored for a single-stack transformer.

    forward_only (optional, default = False): Perform only the forward step.

    collect_non_loss_data (optional, bool, default=False): TODO.

    first_val_step (bool, optional): Is the first step of the validation phase. Used by
        Transformer Engine modules to only update their fp8 weights only on the first validation
        step.

    adjust_tensor_shapes_fn (Callable, optional): A function that adjusts the receive and send
        tensor shapes. Only applicable in forward_backward_pipelining_without_interleaving for now.
        Takes in a list of receive shapes and a list of send shapes and returns the adjusted
        respective list of shapes. Thus it is not used in the other forward-backward functions
        which have different shape handling.

    force_all_reduce (bool, optional): If true, force use of all-reduce for gradient reduction
        instead of reduce-scatter (if using distributed optimizer) in this iteration to ensure all
        data-parallel ranks have fully reduced gradients. This is useful for easier wgrad saving
        (can just inspect DP replica 0 to get full set of wgrads for entire model).

    Args:
        pp_size (Optional[int]): Pipeline model parallel size to use.
        vp_size (Optional[int]): Virtual pipeline model parallel size to use.
            If both pp_size and vp_size are None, both values fall back to parallel_state.
            Otherwise, provided values are used as-is and None is treated as an explicit input.

    """
    if pp_size is None and vp_size is None:
        pp_size = parallel_state.get_pipeline_model_parallel_world_size()
        vp_size = parallel_state.get_virtual_pipeline_model_parallel_world_size()

    if pp_size > 1:
        if vp_size is not None:
            forward_backward_func = forward_backward_pipelining_with_interleaving
        else:
            forward_backward_func = forward_backward_pipelining_without_interleaving
    else:
        forward_backward_func = forward_backward_no_pipelining
    return forward_backward_func


def deallocate_output_tensor(out, deallocate_pipeline_outputs=False):
    '''Pseudo-deallocate (i.e., set to scalar) the output tensor's '.data' field.

    This method should be called right after the output tensor has been
    sent to the next pipeline stage. At this point, the output tensor is
    only useful for its '.grad_fn' field, and not its '.data'.

    Supports multiple formats:
    - torch.Tensor: Deallocates the tensor directly
    - List[Tensor]: Recursively deallocates each element
    - Dict[str, Tensor]: Recursively deallocates each value (for multi-module pipelines)
    '''
    if (out is None) or (not deallocate_pipeline_outputs):
        return

    # Handle dict format (multi-module pipelines)
    if isinstance(out, dict):
        for value in out.values():
            deallocate_output_tensor(value, deallocate_pipeline_outputs)
        return

    # Handle list format
    if isinstance(out, list):
        for item in out:
            deallocate_output_tensor(item, deallocate_pipeline_outputs)
        return

    # Base case: deallocate tensor
    assert isinstance(out, torch.Tensor), "expected Tensor, found %s." % type(out).__name__
    assert out._base is None, "counter-productive to free a view of another tensor."
    out.data = torch.empty((1,), device=out.device, dtype=out.dtype)


def custom_backward(output, grad_output):
    '''Directly call C++ autograd engine.

    To make the 'deallocate_output_tensor' (above) optimization work, the C++
    autograd engine must be called directly, bypassing Pytorch's
    torch.autograd.backward. Pytorch's 'backward' checks that the output and
    grad have the same shape, while C++'s 'backward' does not.
    '''

    assert output.numel() == 1, "output should be pseudo-'freed' in schedule, to optimize memory"
    assert isinstance(output, torch.Tensor), "output == '%s'." % type(output).__name__
    assert isinstance(grad_output, (torch.Tensor, type(None))), (
        "grad_output == '%s'." % type(grad_output).__name__
    )

    # Handle scalar output
    if grad_output is None:
        assert output.numel() == 1, "implicit grad requires scalar output."
        grad_output = torch.ones_like(output, memory_format=torch.preserve_format)

    # Call c++ engine [ see torch/csrc/autograd/python_engine.cpp ]
    Variable._execution_engine.run_backward(
        tensors=(output,),
        grad_tensors=(grad_output,),
        keep_graph=False,
        create_graph=False,
        inputs=tuple(),
        allow_unreachable=True,
        accumulate_grad=True,
    )


def forward_step_calc_loss(
    model,
    output_tensor,
    loss_func,
    config,
    vp_stage,
    collect_non_loss_data,
    num_microbatches,
    forward_data_store,
    cp_group_size=None,
    is_last_stage=None,
):
    """Calculate the loss and number of tokens for forward_step()"""

    from megatron.core.transformer.multi_token_prediction import MTPLossAutoScaler

    model_vp_stage = getattr(model, "vp_stage", None)
    if vp_stage is not None and model_vp_stage is not None:
        assert (
            vp_stage == model_vp_stage
        ), f"vp_stage ({vp_stage}) doesn't match model_vp_stage ({model_vp_stage})"

    if cp_group_size is None and is_last_stage is None:
        # fallback to parallel state
        cp_group_size = parallel_state.get_context_parallel_world_size()
        is_last_stage = parallel_state.is_pipeline_last_stage(
            ignore_virtual=False, vp_stage=vp_stage
        )
    else:
        assert is_last_stage is not None, "is_last_stage must be provided"
        if is_last_stage:
            assert cp_group_size is not None, "cp_group_size must be provided on last stage"

    num_tokens = torch.tensor(0, dtype=torch.int)
    if is_last_stage:
        if loss_func is None:
            forward_data_store.append(output_tensor)
        elif not collect_non_loss_data:
            outputs = loss_func(output_tensor)
            if len(outputs) == 3:
                output_tensor, num_tokens, loss_reduced = outputs
                if not config.calculate_per_token_loss:
                    # Protect against division by zero when all tokens are masked
                    #   in a microbatch.
                    output_tensor /= torch.clamp(num_tokens, min=1)
                    output_tensor /= num_microbatches
            else:
                # preserve legacy loss averaging behavior (ie, over the number of microbatches)
                assert len(outputs) == 2
                output_tensor, loss_reduced = outputs
                output_tensor *= cp_group_size
                output_tensor /= num_microbatches
            forward_data_store.append(loss_reduced)
        else:
            data = loss_func(output_tensor, non_loss_data=True)
            forward_data_store.append(data)

    if config.timers is not None:
        config.timers('forward-compute').stop()

    # Set the loss scale for the auxiliary loss of the MoE layer.
    # Since we use a trick to do backward on the auxiliary loss, we need to set the scale
    # explicitly.
    if hasattr(config, 'num_moe_experts') and config.num_moe_experts is not None:
        # Calculate the loss scale based on the grad_scale_func if available, else default to 1.
        loss_scale = (
            config.grad_scale_func(torch.ones(1, device=output_tensor.device))
            if config.grad_scale_func is not None
            else torch.ones(1, device=output_tensor.device)
        )
        # Set the loss scale
        if config.calculate_per_token_loss:
            MoEAuxLossAutoScaler.set_loss_scale(loss_scale)
        else:
            cp_size_for_scaling = cp_group_size if cp_group_size is not None else 1
            MoEAuxLossAutoScaler.set_loss_scale(loss_scale * cp_size_for_scaling / num_microbatches)

    # Set the loss scale for Multi-Token Prediction (MTP) loss.
    if hasattr(config, 'mtp_num_layers') and config.mtp_num_layers is not None:
        # Calculate the loss scale based on the grad_scale_func if available, else default to 1.
        loss_scale = (
            config.grad_scale_func(torch.ones(1, device=output_tensor.device))
            if config.grad_scale_func is not None
            else torch.ones(1, device=output_tensor.device)
        )
        # Set the loss scale
        if config.calculate_per_token_loss:
            MTPLossAutoScaler.set_loss_scale(loss_scale)
        else:
            MTPLossAutoScaler.set_loss_scale(loss_scale / num_microbatches)

    return output_tensor, num_tokens


def forward_step(
    forward_step_func,
    data_iterator,
    model,
    num_microbatches,
    input_tensor,
    forward_data_store,
    config,
    cp_group_size,
    collect_non_loss_data=False,
    checkpoint_activations_microbatch=None,
    is_first_microbatch=False,
    current_microbatch=None,
    vp_stage=None,
    is_last_stage=True,
):
    """Forward step for passed-in model.

    If it is the first stage, the input tensor is obtained from the data_iterator.
    Otherwise, the passed-in input_tensor is used.

    Args:
        forward_step_func (callable):
            The forward step function for the model that takes the
            data iterator as the first argument, and model as the second.
            This user's forward step is expected to output a tuple of two elements:

                1. The output object from the forward step. This output object needs to be a
                    tensor or some kind of collection of tensors. The only hard requirement
                    for this object is that it needs to be acceptible as input into the second
                    function.
                2. A function to reduce (optionally) the output from the forward step. This
                    could be a reduction over the loss from the model, it could be a function that
                    grabs the output from the model and reformats, it could be a function that just
                    passes through the model output. This function must have one of the following
                    patterns, and depending on the pattern different things happen internally:

                        a. A tuple of reduced loss and some other data. Note that in this case
                            the first argument is divided by the number of global microbatches,
                            assuming it is a loss, so that the loss is stable as a function of
                            the number of devices the step is split across.
                        b. A triple of reduced loss, number of tokens, and some other data. This
                            is similar to case (a), but the loss is further averaged across the
                            number of tokens in the batch. If the user is not already averaging
                            across the number of tokens, this pattern is useful to use.
                        c. Any arbitrary data the user wants (eg a dictionary of tensors, a list
                            of tensors, etc in the case of inference). To trigger case 3 you need
                            to specify `collect_non_loss_data=True` and you may also want to
                            specify `forward_only=True` in the call to the parent forward_backward
                            function.
        data_iterator (iterator):
            The data iterator.
        model (nn.Module):
            The model to perform the forward step on.
        num_microbatches (int):
            The number of microbatches.
        input_tensor (Tensor or list[Tensor]):
            The input tensor(s) for the forward step.
        forward_data_store (list):
            The list to store the forward data. If you go down path 2.a or
            2.b for the return of your forward reduction function then this will store only the
            final dimension of the output, for example the metadata output by the loss function.
            If you go down the path of 2.c then this will store the entire output of the forward
            reduction function applied to the model output.
        config (object):
            The configuration object.
        collect_non_loss_data (bool, optional):
            Whether to collect non-loss data. Defaults to False.
            This is the path to use if you want to collect arbitrary output from the model forward,
            such as with inference use cases. Defaults to False.
        checkpoint_activations_microbatch (int, optional):
            The microbatch to checkpoint activations.
            Defaults to None.
        is_first_microbatch (bool, optional):
            Whether it is the first microbatch. Defaults to False.
        current_microbatch (int, optional):
            The current microbatch. Defaults to None.
        vp_stage (int, optional):
            The virtual pipeline stage. Defaults to None.
        is_last_stage (bool, optional):
            Whether it is the last stage. Defaults to True.
            Also considering virtual stages.
            In case of PP/VPP, is_last_stage/is_vp_last_stage.

    Returns:
        Tensor or list[Tensor]: The output object(s) from the forward step.
        Tensor: The number of tokens.
    """
    from megatron.core.transformer.multi_token_prediction import MTPLossAutoScaler

    if config.timers is not None:
        config.timers('forward-compute', log_level=2).start()

    if is_first_microbatch and hasattr(model, 'set_is_first_microbatch'):
        model.set_is_first_microbatch()
    if current_microbatch is not None:
        set_current_microbatch(model, current_microbatch)

    unwrap_output_tensor = False
    if not isinstance(input_tensor, list):
        input_tensor = [input_tensor]
        unwrap_output_tensor = True

    set_input_tensor = get_attr_wrapped_model(model, "set_input_tensor")
    set_input_tensor(input_tensor)

    if config.enable_autocast:
        context_manager = torch.autocast("cuda", dtype=config.autocast_dtype)
    else:
        context_manager = contextlib.nullcontext()
    with context_manager:
        if checkpoint_activations_microbatch is None:
            output_tensor, loss_func = forward_step_func(data_iterator, model)
        else:
            output_tensor, loss_func = forward_step_func(
                data_iterator, model, checkpoint_activations_microbatch
            )
    output_tensor, num_tokens = forward_step_calc_loss(
        model,
        output_tensor,
        loss_func,
        config,
        vp_stage,
        collect_non_loss_data,
        num_microbatches,
        forward_data_store,
        cp_group_size,
        is_last_stage,
    )

    if unwrap_output_tensor:
        return output_tensor, num_tokens
    return [output_tensor], num_tokens


def backward_step(input_tensor, output_tensor, output_tensor_grad, config):
    """Backward step through passed-in output tensor.

    If last stage, output_tensor_grad is None, otherwise gradient of loss
    with respect to stage's output tensor.

    Returns gradient of loss with respect to input tensor (None if first stage)."""

    # NOTE: This code currently can handle at most one skip connection. It
    # needs to be modified slightly to support arbitrary numbers of skip
    # connections.

    if config.timers is not None:
        config.timers('backward-compute', log_level=2).start()

    # Retain the grad on the input_tensor.
    unwrap_input_tensor_grad = False
    if not isinstance(input_tensor, list):
        input_tensor = [input_tensor]
        unwrap_input_tensor_grad = True
    for x in input_tensor:
        if x is not None:
            x.retain_grad()

    if not isinstance(output_tensor, list):
        output_tensor = [output_tensor]
    if not isinstance(output_tensor_grad, list):
        output_tensor_grad = [output_tensor_grad]

    # Backward pass.
    if output_tensor_grad[0] is None and config.grad_scale_func is not None:
        output_tensor[0] = config.grad_scale_func(output_tensor[0])

    # In multi-modal models like VLM, some batches may not have images.
    # When no image is present, the vision encoder (as a separate pipeline stage)
    # will not participate in the computation.
    # This results in a tensor that does not require gradients.
    # In such cases, we intentionally skip the backward pass while preserving zero gradients.
    if output_tensor[0].requires_grad:
        if config.deallocate_pipeline_outputs:
            custom_backward(output_tensor[0], output_tensor_grad[0])
        else:
            torch.autograd.backward(output_tensor[0], grad_tensors=output_tensor_grad[0])

    # Collect the grad of the input_tensor.
    input_tensor_grad = [None]
    if input_tensor is not None:
        input_tensor_grad = []
        for x in input_tensor:
            if x is None:
                input_tensor_grad.append(None)
            else:
                input_tensor_grad.append(x.grad)

    if unwrap_input_tensor_grad:
        input_tensor_grad = input_tensor_grad[0]

    if config.timers is not None:
        config.timers('backward-compute').stop()

    return input_tensor_grad


def backward_step_multimodule(
    input_tensor: Dict[str, torch.Tensor],
    output_tensor: Union[torch.Tensor, Dict[str, torch.Tensor]],
    output_tensor_grad: Optional[Dict[str, torch.Tensor]],
    config,
    language_model_module_name: str,
) -> Dict[str, torch.Tensor]:
    """Backward step for multi-module pipelines.

    In multi-module pipelines, tensors are organized as dictionaries with
    module names as keys. Each module's backward pass is performed independently.
    """
    # Retain gradients on all input tensors.
    for module_name, tensor in input_tensor.items():
        if isinstance(tensor, list):
            tensor = tensor[0]
        if tensor is not None:
            tensor.retain_grad()

    # Last stage: output_tensor is a scalar loss from the language model.
    # Associate it with the language_model_module_name.
    if not isinstance(output_tensor, dict):
        output_tensor = {language_model_module_name: output_tensor}

    # Handle output_tensor_grad: None (last stage) or dict (intermediate stages).
    if not output_tensor_grad:
        output_tensor_grad = {key: None for key in output_tensor.keys()}

    # Apply grad scaling if needed (for last stage only).
    for module_name in output_tensor.keys():
        if output_tensor_grad[module_name] is None and config.grad_scale_func is not None:
            output_tensor[module_name] = config.grad_scale_func(output_tensor[module_name])

    # Perform backward pass for each module.
    for module_name in output_tensor.keys():
        output_tensor_module = output_tensor[module_name]
        output_tensor_grad_module = output_tensor_grad[module_name]

        # In multi-modal models like VLM, some batches may not have images.
        # In such cases, skip backward while preserving zero gradients.
        if output_tensor_module is not None and output_tensor_module.requires_grad:
            if config.deallocate_pipeline_outputs:
                custom_backward(output_tensor_module, output_tensor_grad_module)
            else:
                torch.autograd.backward(
                    output_tensor_module, grad_tensors=output_tensor_grad_module
                )

    # Collect gradients for input tensors.
    input_tensor_grad = {}
    for module_name, tensor in input_tensor.items():
        if isinstance(tensor, list):
            tensor = tensor[0]
        if tensor is None:
            input_tensor_grad[module_name] = None
        else:
            input_tensor_grad[module_name] = tensor.grad

    return input_tensor_grad


def check_first_val_step(first_val_step, forward_only, cond):
    """Check if it is the first validation step."""
    if (first_val_step is not None) and forward_only:
        return first_val_step and cond
    else:
        return cond


def forward_backward_no_pipelining(
    *,
    forward_step_func,
    data_iterator: Union[Iterator, List[Iterator]],
    model: Union[torch.nn.Module, List[torch.nn.Module]],
    num_microbatches: int,
    seq_length: int,  # unused
    micro_batch_size: int,  # unused
    decoder_seq_length: Optional[int] = None,  # unused
    forward_only: bool = False,
    collect_non_loss_data: bool = False,
    first_val_step: Optional[bool] = None,
    adjust_tensor_shapes_fn: Optional[Callable] = None,  # unused
    p2p_communicator: Optional[P2PCommunicator] = None,  # unused
    pg_collection: Optional[ProcessGroupCollection] = None,
    force_all_reduce: Optional[bool] = False,
):
    """Run forward and backward passes with no pipeline parallelism"""

    if pg_collection is None:
        tp_group = parallel_state.get_tensor_model_parallel_group()
        cp_group = parallel_state.get_context_parallel_group()
        embd_group = parallel_state.get_embedding_group(check_initialized=False)
        pp_group = parallel_state.get_pipeline_model_parallel_group()
        pos_emb_group = parallel_state.get_position_embedding_group(check_initialized=False)
        pg_collection = ProcessGroupCollection()
        pg_collection.tp = tp_group
        pg_collection.cp = cp_group
        pg_collection.embd = embd_group
        pg_collection.pos_embd = pos_emb_group
        pg_collection.pp = pp_group
        pg_collection.dp_cp = parallel_state.get_data_parallel_group(
            with_context_parallel=True, partial_data_parallel=False
        )

    elif pg_collection is not None:
        assert hasattr(pg_collection, 'tp'), "pg_collection must have tp"
        assert hasattr(pg_collection, 'cp'), "pg_collection must have cp"

    if isinstance(model, list):
        assert len(model) == 1, "non-pipeline-parallel schedule does not support model chunking"
        model = model[0]
    if isinstance(data_iterator, list):
        assert (
            len(data_iterator) == 1
        ), "non-pipeline-parallel schedule does not support model chunking"
        data_iterator = data_iterator[0]
    assert (
        adjust_tensor_shapes_fn is None
    ), "adjust_tensor_shapes_fn is not supported for non-pipeline-parallel schedule"

    config = get_model_config(model)
    if config.timers is not None:
        config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time)

    no_sync_func = config.no_sync_func
    if no_sync_func is None:
        no_sync_func = contextlib.nullcontext

    model_type = get_model_type(model)

    forward_data_store = []
    input_tensor, output_tensor_grad = None, None
    total_num_tokens = torch.zeros([], dtype=torch.int, device="cuda")

    if config.overlap_moe_expert_parallel_comm and not forward_only:
        forward_data_store, total_num_tokens = combined_1f1b_schedule_for_no_pipelining(
            forward_step_func,
            data_iterator,
            model,
            num_microbatches,
            input_tensor,
            output_tensor_grad,
            forward_data_store,
            config,
            collect_non_loss_data,
            first_val_step,
            forward_only,
            no_sync_func,
            total_num_tokens,
            partial(check_first_val_step, first_val_step, forward_only),
        )
    elif config.hybrid_context_parallel:
        forward_data_store, total_num_tokens = hybrid_context_parallel_forward_backward(
            forward_step_func,
            data_iterator,
            model,
            num_microbatches,
            input_tensor,
            output_tensor_grad,
            forward_data_store,
            config,
            collect_non_loss_data,
            first_val_step,
            forward_only,
            no_sync_func,
            total_num_tokens,
            check_first_val_step,
            model_type,
        )
    else:
        with no_sync_func():
            for i in range(num_microbatches - 1):
                output_tensor, num_tokens = forward_step(
                    forward_step_func,
                    data_iterator,
                    model,
                    num_microbatches,
                    input_tensor,
                    forward_data_store,
                    config,
                    pg_collection.cp.size(),
                    collect_non_loss_data,
                    is_first_microbatch=check_first_val_step(first_val_step, forward_only, i == 0),
                    current_microbatch=i,
                )
                total_num_tokens += num_tokens
                if not forward_only:
                    backward_step(input_tensor, output_tensor, output_tensor_grad, config)
        # Run computation for last microbatch out of context handler (want to
        # synchronize gradients).
        output_tensor, num_tokens = forward_step(
            forward_step_func,
            data_iterator,
            model,
            num_microbatches,
            input_tensor,
            forward_data_store,
            config,
            pg_collection.cp.size(),
            collect_non_loss_data,
            is_first_microbatch=check_first_val_step(
                first_val_step, forward_only, num_microbatches == 1
            ),
            current_microbatch=num_microbatches - 1,
        )

        total_num_tokens += num_tokens

        if not forward_only:
            backward_step(input_tensor, output_tensor, output_tensor_grad, config)

    if config.finalize_model_grads_func is not None and not forward_only:
        # Finalize model grads (perform full grad all-reduce / reduce-scatter for
        # data parallelism and layernorm all-reduce for sequence parallelism).
        config.finalize_model_grads_func(
            [model],
            total_num_tokens if config.calculate_per_token_loss else None,
            pg_collection=pg_collection,
            force_all_reduce=force_all_reduce,
        )

    if getattr(config, 'fine_grained_activation_offloading', False):
        off_interface.reset()

    if config.timers is not None:
        config.timers('forward-backward').stop()

    if (
        hasattr(config, 'cuda_graph_impl')
        and config.cuda_graph_impl == "local"
        and CudaGraphScope.full_iteration not in config.cuda_graph_scope
    ):
        create_cudagraphs()

    return forward_data_store


def clear_embedding_activation_buffer(config, model, is_last_stage):
    """Clear embedding activation buffer."""

    if is_last_stage and config.defer_embedding_wgrad_compute:
        if isinstance(model, list):
            embedding_module = get_attr_wrapped_model(
                model[-1], 'post_process', return_model_obj=True
            )
        else:
            embedding_module = get_attr_wrapped_model(model, 'post_process', return_model_obj=True)

        # Need to ensure no stray activations exists in this buffer
        embedding_module.embedding_activation_buffer.clear()

        return embedding_module
    else:
        return None


def finish_embedding_wgrad_compute(config, embedding_module, is_last_stage, tp_group):
    """Finish embedding wgrad compute."""
    if is_last_stage and config.defer_embedding_wgrad_compute:
        embedding_activation_buffer = embedding_module.embedding_activation_buffer
        grad_output_buffer = embedding_module.grad_output_buffer
        weight = (
            embedding_module.output_layer.weight
            if embedding_module.share_embeddings_and_output_weights
            else embedding_module.shared_embedding_or_output_weight()
        )

        drain_embedding_wgrad_compute(
            config, embedding_activation_buffer, grad_output_buffer, weight, tp_group
        )


def get_pp_rank_microbatches(
    num_microbatches,
    num_model_chunks,
    microbatch_group_size_per_vp_stage,
    forward_only=False,
    overlap_moe_expert_parallel_comm=False,
    p2p_communicator: Optional[P2PCommunicator] = None,
):
    """Get the number of total, warmup, and remaining microbatches in PP scheduling."""
    if p2p_communicator is not None:
        pipeline_parallel_size = p2p_communicator.pp_group.size()
        pipeline_parallel_rank = p2p_communicator.pp_group.rank()
        virtual_pipeline_parallel_size = p2p_communicator.virtual_pipeline_model_parallel_size
    else:
        pipeline_parallel_size = parallel_state.get_pipeline_model_parallel_world_size()
        pipeline_parallel_rank = parallel_state.get_pipeline_model_parallel_rank()
        virtual_pipeline_parallel_size = (
            parallel_state.get_virtual_pipeline_model_parallel_world_size()
        )

    total_num_microbatches = num_microbatches * num_model_chunks
    are_all_microbatches_in_warmup = False

    if forward_only:
        num_warmup_microbatches = total_num_microbatches
    elif pipeline_parallel_size > 1:
        if virtual_pipeline_parallel_size is None:
            # forward_backward_pipelining_without_interleaving
            num_warmup_microbatches = pipeline_parallel_size - pipeline_parallel_rank - 1
        else:
            # forward_backward_pipelining_with_interleaving
            # Run (num_model_chunks-1)*microbatch_group_size_per_vp_stage on
            # all workers, followed by more microbatches after depending on
            # stage ID (more forward passes for earlier stages, later stages can
            # immediately start with 1F1B).
            num_warmup_microbatches = (pipeline_parallel_size - pipeline_parallel_rank - 1) * 2
            num_warmup_microbatches += (num_model_chunks - 1) * microbatch_group_size_per_vp_stage
            # When enabling overlap_moe_expert_parallel_comm, we schedule one extra micro-batch
            # forward step before the 1f1b stages. This is needed to ensure the forward
            # and backward computations are independent in all 1f1b steps.
            if overlap_moe_expert_parallel_comm:
                num_warmup_microbatches = num_warmup_microbatches + 1
    else:
        # forward_backward_no_pipelining
        # This path is only used for cuda graph capturing compatibility for the PP=1 case.
        num_warmup_microbatches = 0

    if num_warmup_microbatches >= total_num_microbatches:
        num_warmup_microbatches = total_num_microbatches
        are_all_microbatches_in_warmup = True
    num_microbatches_remaining = total_num_microbatches - num_warmup_microbatches

    return (
        total_num_microbatches,
        are_all_microbatches_in_warmup,
        num_warmup_microbatches,
        num_microbatches_remaining,
    )


def get_schedule_table(num_microbatches, num_model_chunks, microbatch_group_size_per_vp_stage):
    """Get the schedule table for PP scheduling."""
    schedule_table = []
    for min_microbatch_id_in_group in range(
        0, num_microbatches, microbatch_group_size_per_vp_stage
    ):
        if min_microbatch_id_in_group + microbatch_group_size_per_vp_stage >= num_microbatches:
            # Construct schedule for the last microbatch group
            schedule_table.extend(
                [
                    (microbatch_id, model_chunk_id)
                    for model_chunk_id in range(num_model_chunks)
                    for microbatch_id in range(min_microbatch_id_in_group, num_microbatches)
                ]
            )
        else:
            # Construct schedule for other microbatch groups
            schedule_table.extend(
                [
                    (microbatch_id, model_chunk_id)
                    for model_chunk_id in range(num_model_chunks)
                    for microbatch_id in range(
                        min_microbatch_id_in_group,
                        min_microbatch_id_in_group + microbatch_group_size_per_vp_stage,
                    )
                ]
            )
    return schedule_table


def forward_backward_pipelining_with_interleaving(
    *,
    forward_step_func,
    data_iterator: Union[Iterator, List[Iterator]],
    model: Union[torch.nn.Module, List[torch.nn.Module]],
    num_microbatches: int,
    seq_length: int,
    micro_batch_size: int,
    decoder_seq_length: Optional[int] = None,
    forward_only: bool = False,
    collect_non_loss_data: bool = False,
    first_val_step: Optional[bool] = None,
    adjust_tensor_shapes_fn: Optional[Callable] = None,  # unused
    p2p_communicator: Optional[P2PCommunicator] = None,
    pg_collection: Optional[ProcessGroupCollection] = None,
    force_all_reduce: Optional[bool] = False,
):
    """Run interleaved 1F1B schedule (model split into model chunks), with
    communication between pipeline stages as needed.

    Returns dictionary with losses if the last stage, empty dict otherwise."""

    # Convention used in this function:
    # num_microbatches for number of microbatches per pipeline stage;
    # num_model_chunks for virtual pipeline size;
    # then total_num_microbatches = num_microbatches * num_model_chunks.
    # Their corresponding index variables are
    # microbatch_id in [0, num_microbatches)
    # model_chunk_id in [0, num_model_chunks)
    # virtual_microbatch_id in [0, total_num_microbatches)

    config = get_model_config(model[0])
    if p2p_communicator is None and pg_collection is None:
        p2p_communicator = P2PCommunicator(
            pp_group=parallel_state.get_pipeline_model_parallel_group(), config=config
        )
        tp_group = parallel_state.get_tensor_model_parallel_group()
        cp_group = parallel_state.get_context_parallel_group()
        cp_size = cp_group.size()
        embd_group = parallel_state.get_embedding_group(check_initialized=False)
        pp_group = parallel_state.get_pipeline_model_parallel_group()
        pos_emb_group = parallel_state.get_position_embedding_group(check_initialized=False)

        pg_collection = ProcessGroupCollection()
        pg_collection.tp = tp_group
        pg_collection.cp = cp_group
        pg_collection.embd = embd_group
        pg_collection.pos_embd = pos_emb_group
        pg_collection.pp = pp_group
        pg_collection.dp_cp = parallel_state.get_data_parallel_group(
            with_context_parallel=True, partial_data_parallel=False
        )

    elif p2p_communicator is not None and pg_collection is not None:
        model_type = get_model_type(model[0])
        assert hasattr(p2p_communicator, 'config'), "p2p_communicator must have a config"
        assert hasattr(pg_collection, 'tp'), "pg_collection must have tp"
        assert hasattr(pg_collection, 'cp'), "pg_collection must have cp"
        tp_group = pg_collection.tp
        cp_group = pg_collection.cp
        cp_size = cp_group.size()
    else:
        raise ValueError(
            "Invalid combination of p2p_communicator, pg_collection"
            " provide none or provide all the process groups"
        )

    assert isinstance(model, list), "interleaved pipeline parallelism expected model chunking"
    assert all(isinstance(chunk, torch.nn.Module) for chunk in model), "invalid model chunking"
    assert isinstance(
        data_iterator, list
    ), "interleaved pipeline parallelism expected each model chunk to have a data iterator"
    assert (
        adjust_tensor_shapes_fn is None
    ), "adjust_tensor_shapes_fn is not supported for interleaved pipeline parallelism"

    if config.overlap_p2p_comm and config.batch_p2p_comm:
        raise ValueError("Can not use both overlap_p2p_comm and batch_p2p_comm")

    # Needed only when gradients are finalized in M-Core
    if config.finalize_model_grads_func is not None and not forward_only:
        # vp is ignored for clear_embedding_activation_buffer
        embedding_module = clear_embedding_activation_buffer(
            config, model, is_pp_last_stage(p2p_communicator.pp_group)
        )

    if config.timers is not None:
        config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time)

    # Disable async grad reductions
    no_sync_func = config.no_sync_func
    if isinstance(no_sync_func, list):

        def multi_no_sync():
            stack = contextlib.ExitStack()
            for model_chunk_no_sync_func in config.no_sync_func:
                stack.enter_context(model_chunk_no_sync_func())
            return stack

        no_sync_func = multi_no_sync
    if no_sync_func is None:
        no_sync_func = contextlib.nullcontext
    no_sync_context = None

    if config.grad_sync_func is not None and not isinstance(config.grad_sync_func, list):
        config.grad_sync_func = [config.grad_sync_func for _ in model]

    if config.param_sync_func is not None and not isinstance(config.param_sync_func, list):
        config.param_sync_func = [config.param_sync_func for _ in model]

    # Disable config.grad_sync_func and config.param_sync_func if only running forward passes.
    # They will be re-enabled at the end of this function.
    grad_sync_func, param_sync_func = None, None
    if forward_only:
        grad_sync_func, param_sync_func = config.grad_sync_func, config.param_sync_func
        config.grad_sync_func, config.param_sync_func = None, None

    def disable_grad_sync():
        """Disable asynchronous grad reductions"""
        nonlocal no_sync_context
        if no_sync_context is None:
            no_sync_context = no_sync_func()
            no_sync_context.__enter__()

    def enable_grad_sync():
        """Enable asynchronous grad reductions"""
        nonlocal no_sync_context
        if no_sync_context is not None:
            no_sync_context.__exit__(None, None, None)
            no_sync_context = None

    disable_grad_sync()

    # Model chunk IDs with synchronized grads
    synchronized_model_chunks = set()

    input_tensors = [[] for _ in range(len(model))]
    output_tensors = [[] for _ in range(len(model))]
    total_num_tokens = torch.zeros([], dtype=torch.int, device="cuda")

    forward_data_store = []
    output_tensor_grads = None
    if not forward_only:
        output_tensor_grads = [[] for _ in range(len(model))]
    else:
        output_tensor_grads = None

    pipeline_parallel_size = p2p_communicator.pp_group.size()
    pipeline_parallel_rank = p2p_communicator.pp_group.rank()

    if (
        config.microbatch_group_size_per_vp_stage > num_microbatches
        or config.microbatch_group_size_per_vp_stage < pipeline_parallel_size
    ):
        msg = (
            'The number of contiguous micro-batches in a virtual pipeline stage'
            f'should range in [PP={pipeline_parallel_size} , M={num_microbatches}]'
        )
        raise ValueError(msg)

    # If the final micro-batch group has fewer micro-batches than pipeline-parallel size,
    # the pipeline will have dependency bubbles.
    final_microbatch_group_size = num_microbatches % config.microbatch_group_size_per_vp_stage
    if 0 < final_microbatch_group_size < pipeline_parallel_size:
        msg = 'The remainder of M (the total micro-batches) divided by N (number of '
        msg += 'contiguous micro-batches in a virtual pipeline stage) should be 0, '
        msg += 'or larger than or equal to the pipeline-parallel size, but it is '
        msg += f'{final_microbatch_group_size}. '
        msg += 'Otherwise, it introduces dependency bubbles in the pipeline '
        msg += 'and reduces throughput.'
        raise RuntimeError(msg)

    model_type = get_model_type(model[0])

    tensor_shape = [seq_length, micro_batch_size, config.hidden_size]
    tensor_shape[0] = tensor_shape[0] // cp_group.size()
    if config.sequence_parallel:
        tensor_shape[0] = tensor_shape[0] // tp_group.size()

    # Compute number of warmup and remaining microbatches.
    # seems only used for vpp
    num_model_chunks = len(model)
    (
        total_num_microbatches,
        are_all_microbatches_in_warmup,
        num_warmup_microbatches,
        num_microbatches_remaining,
    ) = get_pp_rank_microbatches(
        num_microbatches,
        num_model_chunks,
        config.microbatch_group_size_per_vp_stage,
        forward_only=forward_only,
        overlap_moe_expert_parallel_comm=config.overlap_moe_expert_parallel_comm,
        p2p_communicator=p2p_communicator,
    )

    # Checkpoint the activations of partial Transformer layers in a number of micro-batches
    # within the maximum outstanding micro-batch backpropagations.
    # Micro-batches with the ids less than 'num_microbatches_with_partial_activation_checkpoints'
    # checkpoint partial Transformer layers (or skip checkpointing) and
    # the rest of micro-batches within a window of micro-batches checkpoint
    # all Transformer layers. The window of micro-batches is set by the maximum
    # outstanding backpropagations and becomes smaller at later pipeline stages.
    # Please refer the appendix C in https://arxiv.org/pdf/2205.05198.pdf
    max_outstanding_backprops = None
    if config.num_microbatches_with_partial_activation_checkpoints is not None:
        max_outstanding_backprops = num_warmup_microbatches + 1

    # Synchronize params for first two model chunks
    if config.param_sync_func is not None:
        config.param_sync_func[0](model[0].parameters())
        config.param_sync_func[1](model[1].parameters())

    # Create a tunable schedule lookup table.
    # The schedule lookup table uses the virtual_microbatch_id to find the corresponding
    # microbatch_id and model_chunk_id. For example, the tunable schedule table for
    # PP2 N3M5 with VP2 is constructed as below:
    # virtual_microbatch_id | 0 1 2 3 4 5 6 7 8 9
    # microbatch_id         | 0 1 2 0 1 2 3 4 3 4
    # model_chunk_id        | 0 0 0 1 1 1 0 0 1 1
    schedule_table = get_schedule_table(
        num_microbatches, len(model), config.microbatch_group_size_per_vp_stage
    )

    # Decouple individual lookup table for microbatch_id and model_chunk_id.
    # For example, the micro-batch table for PP2 N3M5 with VP2 is
    # virtual_microbatch_id | 0 1 2 3 4 5 6 7 8 9
    # microbatch_id         | 0 1 2 0 1 2 3 4 3 4
    # Similarly, the model chunk table is
    # virtual_microbatch_id | 0 1 2 3 4 5 6 7 8 9
    # model_chunk_id        | 0 0 0 1 1 1 0 0 1 1
    # Both tables are indexed with virtual_microbatch_id.
    microbatch_id_table, model_chunk_id_table = zip(*schedule_table)

    def get_model_chunk_id(virtual_microbatch_id, forward):
        """Helper method to get the model chunk ID given the iteration number."""
        model_chunk_id = model_chunk_id_table[virtual_microbatch_id % total_num_microbatches]
        if not forward:
            model_chunk_id = num_model_chunks - model_chunk_id - 1
        return model_chunk_id

    def get_microbatch_id_in_model_chunk(iteration_id, forward):
        """Helper method to get the microbatch_id within model chunk given the iteration number."""
        assert forward
        microbatch_id_in_model_chunk = microbatch_id_table[iteration_id]
        return microbatch_id_in_model_chunk

    def num_released_microbatches(virtual_microbatch_id, model_chunk_id):
        """Helper method to count number of released (i.e. popped from input_tensors)
        microbatches for a model chunk."""
        if forward_only:  # Micro-batch is released after forward prop.
            return model_chunk_id_table[:virtual_microbatch_id].count(model_chunk_id)
        else:  # Micro-batch is released after backward prop.
            # Zero backward prop in warmup.
            if virtual_microbatch_id < num_warmup_microbatches:
                return 0
            else:
                backward_microbatch_id = virtual_microbatch_id - num_warmup_microbatches
                model_chunk_id = num_model_chunks - model_chunk_id - 1
                return model_chunk_id_table[:backward_microbatch_id].count(model_chunk_id)

    def is_first_microbatch_for_model_chunk(virtual_microbatch_id: int) -> bool:
        """Check if an iteration is the first for a model chunk."""
        if virtual_microbatch_id < total_num_microbatches:
            return microbatch_id_table[virtual_microbatch_id] == 0
        else:
            return False

    def is_last_microbatch_for_model_chunk(virtual_microbatch_id: int) -> bool:
        """Check if an iteration is the last for a model chunk."""
        if virtual_microbatch_id < total_num_microbatches:
            return microbatch_id_table[virtual_microbatch_id] == num_microbatches - 1
        else:
            return False

    def recv_tensor_from_previous_stage(virtual_microbatch_id, forward):
        """Determine if peers are sending, and where in data structure
        to put received tensors.
        Return a boolean if the pipeline stage expects to recv from peers, and the
        corresponding model_chunk_id for the received tensor.
        """
        recv = True
        # The leading pipeline stage is the first rank in fwd and the last rank in bwd.
        is_leading_pipeline_stage = (
            is_pp_first_stage(p2p_communicator.pp_group)
            if forward
            else is_pp_last_stage(p2p_communicator.pp_group)
        )

        last_model_chunk = (num_model_chunks - 1) if forward else 0

        if is_leading_pipeline_stage:
            # The leading pipeline stage is ahead of the ending pipeline stage
            # (i.e. last rank in fwd and first rank in bwd) by (pipeline_parallel_size - 1).
            # Let's consider bwd as an example with PP 4:
            #       0 1 2 3 ...
            #     0 1 2 3 ...
            #   0 1 2 3 ...
            # 0 1 2 3 ...
            if virtual_microbatch_id < (pipeline_parallel_size - 1):
                # The ending stage has not produced any tensors, so no recv will be initiated.
                recv = False
                next_model_chunk_id = get_model_chunk_id(virtual_microbatch_id + 1, forward)
            else:
                # Find the model chunk of the aligned microbatches in the ending stage.
                # For example, microbatch 0 in the ending stage is aligned with microbatch 3
                # in the leading stage.
                next_model_chunk_id = get_model_chunk_id(
                    virtual_microbatch_id - (pipeline_parallel_size - 1), forward
                )
            # Last model chunk in the final stage does not produce tensors.
            if next_model_chunk_id == last_model_chunk:
                recv = False
            if forward:
                # Model chunk id increases in forward.
                next_model_chunk_id += 1
            else:
                # Model chunk id decreases in backward.
                next_model_chunk_id -= 1
        else:
            next_model_chunk_id = get_model_chunk_id(virtual_microbatch_id + 1, forward)

        return recv, next_model_chunk_id

    def forward_step_helper_preprocess(virtual_microbatch_id, model_chunk_id, microbatch_id):
        """Preprocess for forward_step_helper"""
        # launch param synchronization for next model chunk
        # Note: Asynchronous communication tends to slow down compute.
        # To reduce idling from mismatched microbatch times, we launch
        # asynchronous communication at the same time across the
        # pipeline-parallel group.
        if config.param_sync_func is not None:
            param_sync_virtual_microbatch_id = virtual_microbatch_id + pipeline_parallel_rank
            if (
                param_sync_virtual_microbatch_id < total_num_microbatches
                and is_first_microbatch_for_model_chunk(param_sync_virtual_microbatch_id)
            ):
                param_sync_chunk_id = (
                    get_model_chunk_id(param_sync_virtual_microbatch_id, forward=True) + 1
                )
                if 1 < param_sync_chunk_id < num_model_chunks:
                    config.param_sync_func[param_sync_chunk_id](
                        model[param_sync_chunk_id].parameters()
                    )

        # forward step
        if _is_vp_first_stage(vp_stage=model_chunk_id) and is_pp_first_stage(pp_group):
            if len(input_tensors[model_chunk_id]) == len(output_tensors[model_chunk_id]):
                input_tensors[model_chunk_id].append(None)

        # For non-depth-first pipeline schedules, the first rank would buffer multiple received
        # activation tensors for a model chunk until accessed during warmup.
        # This input buffering is needed to overlap the computation with the receipt of
        # the next inputs. To index the proper buffered inputs for forword_step, we use
        # microbatch_id offset with number of released microbatches that have completed backprop.
        offset = num_released_microbatches(virtual_microbatch_id, model_chunk_id)
        input_tensor = input_tensors[model_chunk_id][microbatch_id - offset]

        return input_tensor

    def forward_step_helper_postprocess(model_chunk_id, output_tensor, num_tokens):
        """Postprocess for forward_step_helper"""
        output_tensors[model_chunk_id].append(output_tensor)

        nonlocal total_num_tokens
        total_num_tokens += num_tokens

        # If forward-only, no need to save tensors for a backward pass.
        if forward_only:
            # Release the tensor that have completed forward step.
            input_tensors[model_chunk_id].pop(0)
            output_tensors[model_chunk_id].pop()

        return

    def forward_step_helper(virtual_microbatch_id, checkpoint_activations_microbatch):
        """Helper method to run forward step with model split into chunks"""
        model_chunk_id = get_model_chunk_id(virtual_microbatch_id, forward=True)
        microbatch_id = get_microbatch_id_in_model_chunk(virtual_microbatch_id, forward=True)

        input_tensor = forward_step_helper_preprocess(
            virtual_microbatch_id, model_chunk_id, microbatch_id
        )

        output_tensor, num_tokens = forward_step(
            forward_step_func,
            data_iterator[model_chunk_id],
            model[model_chunk_id],
            num_microbatches,
            input_tensor,
            forward_data_store,
            config,
            cp_group_size=cp_size,
            collect_non_loss_data=collect_non_loss_data,
            checkpoint_activations_microbatch=checkpoint_activations_microbatch,
            is_first_microbatch=check_first_val_step(
                first_val_step,
                forward_only,
                is_first_microbatch_for_model_chunk(virtual_microbatch_id),
            ),
            current_microbatch=microbatch_id,
            vp_stage=model_chunk_id,
            is_last_stage=_is_vp_last_stage(vp_stage=model_chunk_id) and is_pp_last_stage(pp_group),
        )

        forward_step_helper_postprocess(model_chunk_id, output_tensor, num_tokens)

        return output_tensor

    def backward_step_helper_preprocess(virtual_microbatch_id, model_chunk_id):
        """Preprocess for backward_step_helper"""
        # launch grad synchronization (default)
        if config.grad_sync_func is None and is_last_microbatch_for_model_chunk(
            virtual_microbatch_id
        ):
            enable_grad_sync()
            synchronized_model_chunks.add(model_chunk_id)

        # pylint: disable=E0606
        if _is_vp_last_stage(vp_stage=model_chunk_id) and is_pp_last_stage(pp_group):
            if len(output_tensor_grads[model_chunk_id]) == 0:
                output_tensor_grads[model_chunk_id].append(None)
        input_tensor = input_tensors[model_chunk_id].pop(0)
        output_tensor = output_tensors[model_chunk_id].pop(0)
        output_tensor_grad = output_tensor_grads[model_chunk_id].pop(0)

        return input_tensor, output_tensor, output_tensor_grad

    def backward_step_helper_postprocess(virtual_microbatch_id):
        """Postprocess for backward_step_helper"""
        # launch grad synchronization (custom grad sync)
        # Note: Asynchronous communication tends to slow down compute.
        # To reduce idling from mismatched microbatch times, we launch
        # asynchronous communication at the same time across the
        # pipeline-parallel group.
        if config.grad_sync_func is not None:
            grad_sync_virtual_microbatch_id = virtual_microbatch_id - pipeline_parallel_rank
            if grad_sync_virtual_microbatch_id >= 0 and is_last_microbatch_for_model_chunk(
                grad_sync_virtual_microbatch_id
            ):
                grad_sync_chunk_id = get_model_chunk_id(
                    grad_sync_virtual_microbatch_id, forward=False
                )
                enable_grad_sync()
                config.grad_sync_func[grad_sync_chunk_id](model[grad_sync_chunk_id].parameters())
                synchronized_model_chunks.add(grad_sync_chunk_id)
        disable_grad_sync()

    def backward_step_helper(virtual_microbatch_id):
        """Helper method to run backward step with model split into chunks"""
        nonlocal output_tensor_grads
        model_chunk_id = get_model_chunk_id(virtual_microbatch_id, forward=False)

        input_tensor, output_tensor, output_tensor_grad = backward_step_helper_preprocess(
            virtual_microbatch_id, model_chunk_id
        )

        input_tensor_grad = backward_step(input_tensor, output_tensor, output_tensor_grad, config)

        backward_step_helper_postprocess(virtual_microbatch_id)

        return input_tensor_grad

    def forward_backward_helper_wrapper(
        f_virtual_microbatch_id=None,
        b_virtual_microbatch_id=None,
        pre_forward=None,
        pre_backward=None,
        post_forward=None,
        post_backward=None,
        checkpoint_activations_microbatch=None,
    ):
        """
        wrap forward_helper, backward_helper, and combined_forward_backward_helper in a unified way
        """
        if config.overlap_moe_expert_parallel_comm and not forward_only:  # Combined 1F1B path
            return combined_1f1b_schedule_for_interleaved_pipelining(
                config,
                forward_step_func,
                data_iterator,
                model,
                num_microbatches,
                forward_data_store,
                forward_step_helper_preprocess,
                forward_step_helper_postprocess,
                backward_step_helper_preprocess,
                backward_step_helper_postprocess,
                get_microbatch_id_in_model_chunk,
                get_model_chunk_id,
                partial(check_first_val_step, first_val_step, forward_only),
                is_first_microbatch_for_model_chunk,
                collect_non_loss_data,
                f_virtual_microbatch_id=f_virtual_microbatch_id,
                b_virtual_microbatch_id=b_virtual_microbatch_id,
                pre_forward=pre_forward,
                pre_backward=pre_backward,
                post_forward=post_forward,
                post_backward=post_backward,
            )
        else:  # Conventional interleaved 1F1B path
            forward_output_tensor = None
            backward_input_tensor_grad = None
            # forward pass
            if f_virtual_microbatch_id is not None:
                forward_model_chunk_id = get_model_chunk_id(f_virtual_microbatch_id, forward=True)
                if pre_forward is not None:
                    pre_forward()
                forward_output_tensor = forward_step_helper(
                    f_virtual_microbatch_id, checkpoint_activations_microbatch
                )
                if post_forward is not None:
                    forward_output_tensor = post_forward(forward_output_tensor)

            # Backward pass.
            if b_virtual_microbatch_id is not None:
                backward_model_chunk_id = get_model_chunk_id(b_virtual_microbatch_id, forward=False)
                if pre_backward is not None:
                    pre_backward()
                backward_input_tensor_grad = backward_step_helper(b_virtual_microbatch_id)
                if post_backward is not None:
                    backward_input_tensor_grad = post_backward(backward_input_tensor_grad)
            return forward_output_tensor, backward_input_tensor_grad

    # ==============================main logic=========================================
    _is_vp_first_stage = partial(
        is_vp_first_stage, vp_size=config.virtual_pipeline_model_parallel_size
    )
    _is_vp_last_stage = partial(
        is_vp_last_stage, vp_size=config.virtual_pipeline_model_parallel_size
    )
    pp_group = p2p_communicator.pp_group

    # Run warmup forward passes.
    nvtx_range_push(suffix="warmup")
    input_tensors[0].append(
        p2p_communicator.recv_forward(
            tensor_shape, _is_vp_first_stage(vp_stage=0) and is_pp_first_stage(pp_group)
        )
    )

    fwd_wait_handles = None
    fwd_wait_recv_handles = None
    bwd_wait_handles = None
    bwd_wait_recv_handles = None
    if is_pp_first_stage(p2p_communicator.pp_group):
        fwd_recv_buffer_size = (
            config.microbatch_group_size_per_vp_stage - pipeline_parallel_size + 1
        )
    else:
        fwd_recv_buffer_size = 1
    if is_pp_last_stage(p2p_communicator.pp_group):
        bwd_recv_buffer_size = (
            config.microbatch_group_size_per_vp_stage - pipeline_parallel_size + 1
        )
    else:
        bwd_recv_buffer_size = 1
    fwd_recv_buffer = [None] * fwd_recv_buffer_size
    bwd_recv_buffer = [None] * bwd_recv_buffer_size
    recv_prev_wait_handles = []
    send_next_wait_handle = None
    send_prev_wait_handle = None
    recv_next_wait_handles = []

    for k in range(num_warmup_microbatches):
        cur_model_chunk_id = get_model_chunk_id(k, forward=True)

        if config.overlap_p2p_comm_warmup_flush:
            if (
                not (
                    _is_vp_first_stage(vp_stage=cur_model_chunk_id) and is_pp_first_stage(pp_group)
                )
                and k != 0
            ):
                assert recv_prev_wait_handles, (
                    f'pp rank {pipeline_parallel_rank}, iteration {k},'
                    'should have registered recv handle'
                )
                recv_prev_wait_handle = recv_prev_wait_handles.pop(0)
                recv_prev_wait_handle.wait()

        # Determine if tensor should be received from previous stage.
        recv_prev, next_forward_model_chunk_id = recv_tensor_from_previous_stage(k, forward=True)

        # No receive in last iteration when recv iteration k+1.
        if k == (total_num_microbatches - 1):
            recv_prev = False

        # Prefetch recv for iteration k+1 for non-first ranks.
        if config.overlap_p2p_comm_warmup_flush and not is_pp_first_stage(
            p2p_communicator.pp_group
        ):
            fwd_recv_buffer[k % fwd_recv_buffer_size], fwd_wait_recv_handles = (
                p2p_communicator.send_forward_recv_forward(
                    output_tensor=None,  # No output_tensor to send.
                    recv_prev=recv_prev,
                    tensor_shape=tensor_shape,
                    overlap_p2p_comm=True,
                )
            )

            if fwd_wait_recv_handles:
                recv_prev_wait_handles.append(fwd_wait_recv_handles.pop("recv_prev"))

        # Decide to checkpoint all layers' activations of the current micro-batch.
        if max_outstanding_backprops is not None:
            checkpoint_activations_microbatch = (
                k % max_outstanding_backprops
                >= config.num_microbatches_with_partial_activation_checkpoints
            )
        else:
            checkpoint_activations_microbatch = None

        output_tensor, _ = forward_backward_helper_wrapper(
            f_virtual_microbatch_id=k,
            checkpoint_activations_microbatch=checkpoint_activations_microbatch,
        )

        # Don't send tensor downstream if on last stage.
        if _is_vp_last_stage(vp_stage=cur_model_chunk_id) and is_pp_last_stage(pp_group):
            output_tensor = None

        # Send and receive tensors as appropriate (send tensors computed
        # in this iteration; receive tensors for next iteration).
        if not config.overlap_p2p_comm_warmup_flush:
            if (
                k == (num_warmup_microbatches - 1)
                and not config.overlap_p2p_comm
                and not forward_only
                and not are_all_microbatches_in_warmup
            ):
                input_tensor_grad = None
                recv_next = True
                if is_pp_last_stage(p2p_communicator.pp_group):
                    recv_next = False
                (input_tensor, output_tensor_grad) = (
                    p2p_communicator.send_forward_backward_recv_forward_backward(
                        output_tensor,
                        input_tensor_grad,
                        recv_prev=recv_prev,
                        recv_next=recv_next,
                        tensor_shape=tensor_shape,
                    )
                )
                output_tensor_grads[num_model_chunks - 1].append(output_tensor_grad)
            else:
                input_tensor = p2p_communicator.send_forward_recv_forward(
                    output_tensor, recv_prev=recv_prev, tensor_shape=tensor_shape
                )
            if recv_prev:
                input_tensors[next_forward_model_chunk_id].append(input_tensor)
            deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs)
        else:
            if not is_pp_first_stage(p2p_communicator.pp_group):
                # Send only since recv prefetched.
                _, fwd_wait_handles = p2p_communicator.send_forward_recv_forward(
                    output_tensor, recv_prev=False, tensor_shape=tensor_shape, overlap_p2p_comm=True
                )
            else:  # No prefetch for first rank, so both send and recv initiated.
                fwd_recv_buffer[k % fwd_recv_buffer_size], fwd_wait_handles = (
                    p2p_communicator.send_forward_recv_forward(
                        output_tensor,
                        recv_prev=recv_prev,
                        tensor_shape=tensor_shape,
                        overlap_p2p_comm=True,
                    )
                )
            if send_next_wait_handle is not None:
                send_next_wait_handle.wait()
            if fwd_wait_handles is not None:
                send_next_wait_handle = (
                    fwd_wait_handles.pop("send_next") if "send_next" in fwd_wait_handles else None
                )
                if "recv_prev" in fwd_wait_handles:
                    recv_prev_wait_handles.append(fwd_wait_handles.pop("recv_prev"))

            deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs)
            if recv_prev:
                input_tensors[next_forward_model_chunk_id].append(
                    fwd_recv_buffer[k % fwd_recv_buffer_size]
                )
                fwd_recv_buffer[(k + 1) % fwd_recv_buffer_size] = None

        if config.overlap_p2p_comm:
            if (
                k == (num_warmup_microbatches - 1)
                and not forward_only
                and not are_all_microbatches_in_warmup
            ):
                input_tensor_grad = None
                recv_next = True
                if is_pp_last_stage(p2p_communicator.pp_group):
                    recv_next = False

                (bwd_recv_buffer[-1], bwd_wait_handles) = (
                    p2p_communicator.send_backward_recv_backward(
                        input_tensor_grad,
                        recv_next=recv_next,
                        tensor_shape=tensor_shape,
                        overlap_p2p_comm=True,
                    )
                )
                if send_prev_wait_handle is not None:
                    send_prev_wait_handle.wait()
                if bwd_wait_handles is not None:
                    send_prev_wait_handle = (
                        bwd_wait_handles.pop("send_prev")
                        if "send_prev" in bwd_wait_handles
                        else None
                    )
                    if "recv_next" in bwd_wait_handles:
                        recv_next_wait_handles.append(bwd_wait_handles.pop("recv_next"))

                if recv_next:
                    output_tensor_grads[num_model_chunks - 1].append(bwd_recv_buffer[-1])
    nvtx_range_pop(suffix="warmup")

    # Run 1F1B in steady state.
    nvtx_range_push(suffix="steady")
    for k in range(num_microbatches_remaining):
        # Forward pass.
        forward_k = k + num_warmup_microbatches

        # Decide to checkpoint all layers' activations of the current micro-batch.
        if max_outstanding_backprops is not None:
            checkpoint_activations_microbatch = (
                forward_k % max_outstanding_backprops
                >= config.num_microbatches_with_partial_activation_checkpoints
            )
        else:
            checkpoint_activations_microbatch = None

        cur_model_chunk_id = get_model_chunk_id(forward_k, forward=True)
        if config.overlap_p2p_comm:

            backward_k = k

            # Sync forward recv
            def pp_pre_forward(vp_stage=None):
                if vp_stage is None:
                    vp_stage = get_model_chunk_id(forward_k, forward=True)
                if not (_is_vp_first_stage(vp_stage=vp_stage) and is_pp_first_stage(pp_group)):
                    if config.overlap_p2p_comm_warmup_flush:
                        assert recv_prev_wait_handles, (
                            f'pp rank {pipeline_parallel_rank}, fwd iteration {forward_k}, '
                            'should have registered recv handle'
                        )
                        recv_prev_wait_handle = recv_prev_wait_handles.pop(0)
                        recv_prev_wait_handle.wait()
                    else:
                        if recv_prev_wait_handles is not None and recv_prev_wait_handles:
                            recv_prev_wait_handle = recv_prev_wait_handles.pop(0)
                            recv_prev_wait_handle.wait()

                deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs)

            # Async forward send / receive
            def pp_post_forward(output_tensor, vp_stage=None):
                nonlocal send_next_wait_handle
                nonlocal fwd_recv_buffer
                nonlocal fwd_wait_handles
                nonlocal recv_prev_wait_handles
                if vp_stage is None:
                    vp_stage = get_model_chunk_id(forward_k, forward=True)
                # Last virtual stage no activation tensor to send.
                if _is_vp_last_stage(vp_stage=vp_stage) and is_pp_last_stage(pp_group):
                    output_tensor = None

                recv_prev, next_forward_model_chunk_id = recv_tensor_from_previous_stage(
                    forward_k, forward=True
                )

                # If last iteration, don't receive; we already received one extra
                # before the start of the for loop.
                if k == (num_microbatches_remaining - 1):
                    recv_prev = False

                # Send activation tensor to the next stage and receive activation tensor from the
                # previous stage
                fwd_recv_buffer[forward_k % fwd_recv_buffer_size], fwd_wait_handles = (
                    p2p_communicator.send_forward_recv_forward(
                        output_tensor,
                        recv_prev=recv_prev,
                        tensor_shape=tensor_shape,
                        overlap_p2p_comm=True,
                    )
                )
                if send_next_wait_handle is not None:
                    send_next_wait_handle.wait()
                if fwd_wait_handles is not None:
                    send_next_wait_handle = (
                        fwd_wait_handles.pop("send_next")
                        if "send_next" in fwd_wait_handles
                        else None
                    )
                    if "recv_prev" in fwd_wait_handles:
                        recv_prev_wait_handles.append(fwd_wait_handles.pop("recv_prev"))
                # assert fwd_wait_handles is not None

                # Put input_tensor and output_tensor_grad in data structures in the
                # right location.
                if recv_prev:
                    input_tensors[next_forward_model_chunk_id].append(
                        fwd_recv_buffer[forward_k % fwd_recv_buffer_size]
                    )
                    fwd_recv_buffer[(forward_k + 1) % fwd_recv_buffer_size] = None

                return output_tensor

            # Sync backward recv
            def pp_pre_backward(vp_stage=None):
                nonlocal recv_next_wait_handles
                if vp_stage is None:
                    vp_stage = get_model_chunk_id(backward_k, forward=False)
                if not (_is_vp_last_stage(vp_stage=vp_stage) and is_pp_last_stage(pp_group)):
                    if config.overlap_p2p_comm_warmup_flush:
                        assert recv_next_wait_handles, (
                            f'pp rank {pipeline_parallel_rank}, bwd iteration {backward_k}, '
                            'should have registered recv next handle'
                        )
                        recv_next_wait_handle = recv_next_wait_handles.pop(0)
                        recv_next_wait_handle.wait()
                    else:
                        if recv_next_wait_handles is not None and recv_next_wait_handles:
                            recv_next_wait_handle = recv_next_wait_handles.pop(0)
                            recv_next_wait_handle.wait()

            # Async backward send / receive
            def pp_post_backward(input_tensor_grad, vp_stage=None):
                nonlocal send_prev_wait_handle
                nonlocal bwd_wait_handles
                nonlocal recv_next_wait_handles
                if vp_stage is None:
                    vp_stage = get_model_chunk_id(backward_k, forward=False)
                # First virtual stage no activation gradient tensor to send.
                if _is_vp_first_stage(vp_stage=vp_stage) and is_pp_first_stage(pp_group):
                    input_tensor_grad = None

                recv_next, next_backward_model_chunk_id = recv_tensor_from_previous_stage(
                    backward_k, forward=False
                )

                (bwd_recv_buffer[backward_k % bwd_recv_buffer_size], bwd_wait_handles) = (
                    p2p_communicator.send_backward_recv_backward(
                        input_tensor_grad,
                        recv_next=recv_next,
                        tensor_shape=tensor_shape,
                        overlap_p2p_comm=True,
                    )
                )
                if send_prev_wait_handle is not None:
                    send_prev_wait_handle.wait()
                if bwd_wait_handles is not None:
                    send_prev_wait_handle = (
                        bwd_wait_handles.pop("send_prev")
                        if "send_prev" in bwd_wait_handles
                        else None
                    )
                    if "recv_next" in bwd_wait_handles:
                        recv_next_wait_handles.append(bwd_wait_handles.pop("recv_next"))

                # Put input_tensor and output_tensor_grad in data structures in the
                # right location.

                if recv_next:
                    output_tensor_grads[next_backward_model_chunk_id].append(
                        bwd_recv_buffer[backward_k % bwd_recv_buffer_size]
                    )
                    bwd_recv_buffer[(backward_k + 1) % bwd_recv_buffer_size] = None
                return input_tensor_grad

            output_tensor, input_tensor_grad = forward_backward_helper_wrapper(
                f_virtual_microbatch_id=forward_k,
                b_virtual_microbatch_id=backward_k,
                pre_forward=pp_pre_forward,
                pre_backward=pp_pre_backward,
                post_forward=pp_post_forward,
                post_backward=pp_post_backward,
                checkpoint_activations_microbatch=checkpoint_activations_microbatch,
            )

        else:  # No p2p overlap.
            backward_k = k
            output_tensor, input_tensor_grad = forward_backward_helper_wrapper(
                f_virtual_microbatch_id=forward_k,
                b_virtual_microbatch_id=backward_k,
                checkpoint_activations_microbatch=checkpoint_activations_microbatch,
            )
            # Send output_tensor and input_tensor_grad, receive input_tensor
            # and output_tensor_grad.

            # Determine if current stage has anything to send in either direction,
            # otherwise set tensor to None.
            forward_model_chunk_id = get_model_chunk_id(forward_k, forward=True)
            if _is_vp_last_stage(vp_stage=forward_model_chunk_id) and is_pp_last_stage(pp_group):
                output_tensor = None

            backward_model_chunk_id = get_model_chunk_id(backward_k, forward=False)
            if _is_vp_first_stage(vp_stage=backward_model_chunk_id) and is_pp_first_stage(pp_group):
                input_tensor_grad = None

            recv_prev, next_forward_model_chunk_id = recv_tensor_from_previous_stage(
                forward_k, forward=True
            )

            recv_next, next_backward_model_chunk_id = recv_tensor_from_previous_stage(
                backward_k, forward=False
            )

            # If last iteration, don't receive; we already received one extra
            # before the start of the for loop.
            if k == (num_microbatches_remaining - 1):
                recv_prev = False

            # Communicate tensors.
            (input_tensor, output_tensor_grad) = (
                p2p_communicator.send_forward_backward_recv_forward_backward(
                    output_tensor,
                    input_tensor_grad,
                    recv_prev=recv_prev,
                    recv_next=recv_next,
                    tensor_shape=tensor_shape,
                )
            )
            deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs)
            # Put input_tensor and output_tensor_grad in data structures in the
            # right location.
            if recv_prev:
                input_tensors[next_forward_model_chunk_id].append(input_tensor)
            if recv_next:
                output_tensor_grads[next_backward_model_chunk_id].append(output_tensor_grad)

    deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs)
    nvtx_range_pop(suffix="steady")

    # Run cooldown backward passes (flush out pipeline) for the last model chunk.
    nvtx_range_push(suffix="cooldown")
    curr_vp_stage = config.virtual_pipeline_model_parallel_size - 1
    if not forward_only:
        if bwd_wait_handles is not None:
            for bwd_wait_handle in bwd_wait_handles.values():
                bwd_wait_handle.wait()

        if are_all_microbatches_in_warmup:
            output_tensor_grads[num_model_chunks - 1].append(
                p2p_communicator.recv_backward(
                    tensor_shape,
                    is_last_stage=(
                        _is_vp_last_stage(vp_stage=curr_vp_stage) and is_pp_last_stage(pp_group)
                    ),
                )
            )
        for k in range(num_microbatches_remaining, total_num_microbatches):
            cur_model_chunk_id = get_model_chunk_id(k, forward=False)
            if (
                not (_is_vp_last_stage(vp_stage=cur_model_chunk_id) and is_pp_last_stage(pp_group))
                and k != 0
            ):
                if config.overlap_p2p_comm_warmup_flush:
                    assert recv_next_wait_handles, (
                        f'pp rank {pipeline_parallel_rank}, backward iteration {k}, '
                        'should have registered recv next handle'
                    )
                    recv_next_wait_handle = recv_next_wait_handles.pop(0)
                    recv_next_wait_handle.wait()
                else:
                    if recv_next_wait_handles is not None and recv_next_wait_handles:
                        recv_next_wait_handle = recv_next_wait_handles.pop(0)
                        recv_next_wait_handle.wait()

            recv_next, next_backward_model_chunk_id = recv_tensor_from_previous_stage(
                k, forward=False
            )

            if k == (total_num_microbatches - 1):
                recv_next = False

            # Prefetch recv for backward iteration k+1 for non last ranks.
            if config.overlap_p2p_comm_warmup_flush and not is_pp_last_stage(
                p2p_communicator.pp_group
            ):
                bwd_recv_buffer[k % bwd_recv_buffer_size], bwd_wait_recv_handles = (
                    p2p_communicator.send_backward_recv_backward(
                        input_tensor_grad=None,  # No input_tensor_grad to send.
                        recv_next=recv_next,
                        tensor_shape=tensor_shape,
                        overlap_p2p_comm=True,
                    )
                )

                if bwd_wait_recv_handles:
                    recv_next_wait_handles.append(bwd_wait_recv_handles.pop("recv_next"))

            _, input_tensor_grad = forward_backward_helper_wrapper(b_virtual_microbatch_id=k)

            # First virtual stage no activation gradient tensor to send.
            if _is_vp_first_stage(vp_stage=cur_model_chunk_id) and is_pp_first_stage(pp_group):
                input_tensor_grad = None

            if config.overlap_p2p_comm_warmup_flush:
                if not is_pp_last_stage(p2p_communicator.pp_group):
                    _, bwd_wait_handles = p2p_communicator.send_backward_recv_backward(
                        input_tensor_grad,
                        recv_next=False,
                        tensor_shape=tensor_shape,
                        overlap_p2p_comm=True,
                    )
                else:
                    bwd_recv_buffer[k % bwd_recv_buffer_size], bwd_wait_handles = (
                        p2p_communicator.send_backward_recv_backward(
                            input_tensor_grad,
                            recv_next=recv_next,
                            tensor_shape=tensor_shape,
                            overlap_p2p_comm=True,
                        )
                    )

                if send_prev_wait_handle is not None:
                    send_prev_wait_handle.wait()
                if bwd_wait_handles is not None:
                    send_prev_wait_handle = (
                        bwd_wait_handles.pop("send_prev")
                        if "send_prev" in bwd_wait_handles
                        else None
                    )
                    if "recv_next" in bwd_wait_handles:
                        recv_next_wait_handles.append(bwd_wait_handles.pop("recv_next"))
                if recv_next:
                    output_tensor_grads[next_backward_model_chunk_id].append(
                        bwd_recv_buffer[k % bwd_recv_buffer_size]
                    )
                    bwd_recv_buffer[(k + 1) % bwd_recv_buffer_size] = None

            else:
                output_tensor_grad = p2p_communicator.send_backward_recv_backward(
                    input_tensor_grad, recv_next=recv_next, tensor_shape=tensor_shape
                )

                if recv_next:
                    output_tensor_grads[next_backward_model_chunk_id].append(output_tensor_grad)

        if send_prev_wait_handle is not None:
            send_prev_wait_handle.wait()

        # Launch any remaining grad reductions.
        enable_grad_sync()
        if config.grad_sync_func is not None:
            for model_chunk_id in range(num_model_chunks):
                if model_chunk_id not in synchronized_model_chunks:
                    config.grad_sync_func[model_chunk_id](model[model_chunk_id].parameters())
                    synchronized_model_chunks.add(model_chunk_id)
    nvtx_range_pop(suffix="cooldown")

    nvtx_range_push(suffix="misc")
    assert (
        not recv_prev_wait_handles
    ), 'recv_prev_wait_handles should be cleared at the end of a step'
    assert (
        not recv_next_wait_handles
    ), 'recv_next_wait_handles should be cleared at the end of a step'

    if config.finalize_model_grads_func is not None and not forward_only:

        # If defer_embedding_wgrad_compute is enabled we need to do the
        # weight gradient GEMM's here.
        finish_embedding_wgrad_compute(
            config, embedding_module, p2p_communicator.is_pp_last_stage, tp_group
        )

        # Finalize model grads (perform full grad all-reduce / reduce-scatter for
        # data parallelism, layernorm all-reduce for sequence parallelism, and
        # embedding all-reduce for pipeline parallelism).

        config.finalize_model_grads_func(
            model,
            total_num_tokens if config.calculate_per_token_loss else None,
            pg_collection=pg_collection,
            force_all_reduce=force_all_reduce,
        )

    if getattr(config, 'fine_grained_activation_offloading', False):
        off_interface.reset()
    # Restore config.grad_sync_func and config.param_sync_func.
    if forward_only:
        config.grad_sync_func, config.param_sync_func = grad_sync_func, param_sync_func

    if config.timers is not None:
        config.timers('forward-backward').stop()

    if (
        hasattr(config, 'cuda_graph_impl')
        and config.cuda_graph_impl == "local"
        and CudaGraphScope.full_iteration not in config.cuda_graph_scope
    ):
        create_cudagraphs()
    nvtx_range_pop(suffix="misc")

    return forward_data_store


def get_tensor_shapes(
    *,
    seq_length: int,
    micro_batch_size: int,
    decoder_seq_length: int,
    config,
    tp_group: Optional[torch.distributed.ProcessGroup] = None,
    cp_group: Optional[torch.distributed.ProcessGroup] = None,
):
    """Determine tensor shapes for pipeline communication.

    Returns [()] for variable_seq_lengths mode (shapes exchanged dynamically),
    or computed shapes for fixed sequence length mode.
    """
    tensor_shapes = []

    if config.variable_seq_lengths:
        # Shapes exchanged dynamically during P2P communication
        tensor_shapes.append(())
        return tensor_shapes

    # Fixed sequence lengths - compute shape
    effective_seq_length = decoder_seq_length if decoder_seq_length is not None else seq_length
    effective_seq_length = effective_seq_length // cp_group.size()

    if config.sequence_parallel:
        effective_seq_length = effective_seq_length // tp_group.size()

    tensor_shapes.append((effective_seq_length, micro_batch_size, config.hidden_size))
    return tensor_shapes


def forward_backward_pipelining_without_interleaving(
    *,
    forward_step_func,
    data_iterator: Union[Iterator, List[Iterator]],
    model: Union[torch.nn.Module, List[torch.nn.Module]],
    num_microbatches: int,
    seq_length: int,
    micro_batch_size: int,
    decoder_seq_length: Optional[int] = None,
    forward_only: bool = False,
    collect_non_loss_data: bool = False,
    first_val_step: Optional[bool] = None,
    adjust_tensor_shapes_fn: Optional[Callable] = None,
    p2p_communicator: Optional[P2PCommunicator] = None,
    pg_collection: Optional[
        Union[ProcessGroupCollection, MultiModuleProcessGroupCollection]
    ] = None,
    force_all_reduce: Optional[bool] = False,
):
    """Run non-interleaved 1F1B schedule, with communication between pipeline
    stages. Returns dictionary with losses if the last stage, empty dict otherwise."""

    if isinstance(model, list):
        assert (
            len(model) == 1
        ), "non-interleaved pipeline-parallel schedule does not support model chunking"
        model = model[0]
    if isinstance(data_iterator, list):
        assert (
            len(data_iterator) == 1
        ), "non-interleaved pipeline-parallel schedule does not support model chunking"
        data_iterator = data_iterator[0]

    config = get_model_config(model)
    if config.overlap_p2p_comm:
        raise ValueError(
            "Non-interleaved pipeline parallelism does not support overlapping p2p communication"
        )

    tp_group, cp_group, cp_size = None, None, None

    # Determine if this is a multi-module pipeline
    # (used for validation and backward function selection)
    is_multimodule = isinstance(pg_collection, MultiModuleProcessGroupCollection) or isinstance(
        p2p_communicator, MultiModulePipelineCommunicator
    )

    if p2p_communicator is None and pg_collection is None:
        # Default: single-module with parallel_state groups
        p2p_communicator = P2PCommunicator(
            pp_group=parallel_state.get_pipeline_model_parallel_group(), config=config
        )
        tp_group = parallel_state.get_tensor_model_parallel_group()
        cp_group = parallel_state.get_context_parallel_group()
        cp_size = cp_group.size()
        embd_group = parallel_state.get_embedding_group(check_initialized=False)
        pos_emb_group = parallel_state.get_position_embedding_group(check_initialized=False)
        pp_group = parallel_state.get_pipeline_model_parallel_group()

        pg_collection = ProcessGroupCollection()
        pg_collection.tp = tp_group
        pg_collection.pp = pp_group
        pg_collection.embd = embd_group
        pg_collection.pos_embd = pos_emb_group
        pg_collection.cp = cp_group
        pg_collection.dp_cp = parallel_state.get_data_parallel_group(
            with_context_parallel=True, partial_data_parallel=False
        )

    elif p2p_communicator is not None and pg_collection is not None:
        assert hasattr(p2p_communicator, 'config'), "p2p_communicator must have a config"

        if is_multimodule:
            # Multi-module: use language model's CP size for loss scaling
            if not config.variable_seq_lengths:
                raise ValueError(
                    "config.variable_seq_lengths=True required for multi-module pipelines"
                )
            if pg_collection.has_language_model():
                cp_size = pg_collection.get_language_model_cp_size()
            else:
                # Encoder-only ranks should not use CP loss scaling.
                cp_size = None

        elif isinstance(pg_collection, ProcessGroupCollection):
            # Single-module: extract tp/cp groups and cp_size
            assert hasattr(pg_collection, 'tp'), "pg_collection must have tp"
            assert hasattr(pg_collection, 'cp'), "pg_collection must have cp"
            tp_group = pg_collection.tp
            cp_group = pg_collection.cp
            cp_size = cp_group.size()

        else:
            raise TypeError(
                f"pg_collection must be ProcessGroupCollection or "
                f"MultiModuleProcessGroupCollection, got {type(pg_collection)}"
            )
    else:
        raise ValueError("Provide both p2p_communicator and pg_collection, or neither")

    # Needed only when gradients are finalized in M-Core
    if config.finalize_model_grads_func is not None and not forward_only:
        embedding_module = clear_embedding_activation_buffer(
            config, model, p2p_communicator.is_pp_last_stage
        )

    if config.timers is not None:
        config.timers('forward-backward', log_level=1).start(barrier=config.barrier_with_L1_time)

    # Disable async grad reductions
    no_sync_func = config.no_sync_func
    if no_sync_func is None:
        no_sync_func = contextlib.nullcontext
    no_sync_context = None

    def disable_grad_sync():
        """Disable asynchronous grad reductions"""
        nonlocal no_sync_context
        if no_sync_context is None:
            no_sync_context = no_sync_func()
            no_sync_context.__enter__()

    def enable_grad_sync():
        """Enable asynchronous grad reductions"""
        nonlocal no_sync_context
        if no_sync_context is not None:
            no_sync_context.__exit__(None, None, None)
            no_sync_context = None

    disable_grad_sync()

    # Compute number of warmup microbatches.
    num_warmup_microbatches = p2p_communicator.total_stages - p2p_communicator.current_stage - 1
    num_warmup_microbatches = min(num_warmup_microbatches, num_microbatches)
    num_microbatches_remaining = num_microbatches - num_warmup_microbatches

    # Checkpoint the activations of partial Transformer layers in a number of micro-batches
    # within the maximum outstanding micro-batch backpropagations.
    # Micro-batches with the ids less than 'num_microbatches_with_partial_activation_checkpoints'
    # checkpoint partial Transformer layers (or skip checkpointing) and
    # the rest of micro-batches within a window of micro-batches checkpoint
    # all Transformer layers. The window of micro-batches is set by the maximum
    # outstanding backpropagations and becomes smaller at later pipeline stages.
    # Please refer the appendix C in https://arxiv.org/pdf/2205.05198.pdf
    max_outstanding_backprops = None
    if config.num_microbatches_with_partial_activation_checkpoints is not None:
        max_outstanding_backprops = num_warmup_microbatches + 1

    # Select backward function based on whether multi-module or single-module
    if is_multimodule:
        backward_func = partial(
            backward_step_multimodule,
            language_model_module_name=pg_collection.language_model_module_name,
        )
    else:
        backward_func = backward_step

    recv_tensor_shapes = get_tensor_shapes(
        seq_length=seq_length,
        micro_batch_size=micro_batch_size,
        decoder_seq_length=decoder_seq_length,
        config=config,
        tp_group=tp_group,
        cp_group=cp_group,
    )
    send_tensor_shapes = get_tensor_shapes(
        seq_length=seq_length,
        micro_batch_size=micro_batch_size,
        decoder_seq_length=decoder_seq_length,
        config=config,
        tp_group=tp_group,
        cp_group=cp_group,
    )
    if adjust_tensor_shapes_fn is not None:
        recv_tensor_shapes, send_tensor_shapes = adjust_tensor_shapes_fn(
            recv_tensor_shapes, send_tensor_shapes
        )

    # Input, output tensors only need to be saved when doing backward passes
    input_tensors = None
    output_tensors = None
    total_num_tokens = torch.zeros([], dtype=torch.int, device="cuda")

    if not forward_only:
        input_tensors = []
        output_tensors = []
    forward_data_store = []

    # Run warmup forward passes.
    for i in range(num_warmup_microbatches):
        # Decide to checkpoint all layers' activations of the current micro-batch
        if max_outstanding_backprops is not None:
            checkpoint_activations_microbatch = (
                i % max_outstanding_backprops
                >= config.num_microbatches_with_partial_activation_checkpoints
            )
        else:
            checkpoint_activations_microbatch = None

        input_tensor = p2p_communicator.recv_forward(
            recv_tensor_shapes, p2p_communicator.is_pp_first_stage
        )
        output_tensor, num_tokens = forward_step(
            forward_step_func,
            data_iterator,
            model,
            num_microbatches,
            input_tensor,
            forward_data_store,
            config,
            cp_group_size=cp_size,
            collect_non_loss_data=collect_non_loss_data,
            checkpoint_activations_microbatch=checkpoint_activations_microbatch,
            is_first_microbatch=check_first_val_step(first_val_step, forward_only, i == 0),
            current_microbatch=i,
            is_last_stage=p2p_communicator.is_pp_last_stage,
        )
        p2p_communicator.send_forward(output_tensor, p2p_communicator.is_pp_last_stage)
        total_num_tokens += num_tokens

        if not forward_only:
            input_tensors.append(input_tensor)
            output_tensors.append(output_tensor)
            deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs)

    # Before running 1F1B, need to receive first forward tensor.
    # If all microbatches are run in warmup / cooldown phase, then no need to
    # receive this tensor here.
    if num_microbatches_remaining > 0:
        input_tensor = p2p_communicator.recv_forward(
            recv_tensor_shapes, p2p_communicator.is_pp_first_stage
        )

    # Run 1F1B in steady state.
    for i in range(num_microbatches_remaining):
        last_iteration = i == (num_microbatches_remaining - 1)

        # Decide to checkpoint all layers' activations of the current micro-batch
        if max_outstanding_backprops is not None:
            checkpoint_activations_microbatch = (
                (i + num_warmup_microbatches) % max_outstanding_backprops
            ) >= config.num_microbatches_with_partial_activation_checkpoints
        else:
            checkpoint_activations_microbatch = None

        output_tensor, num_tokens = forward_step(
            forward_step_func,
            data_iterator,
            model,
            num_microbatches,
            input_tensor,
            forward_data_store,
            config,
            cp_group_size=cp_size,
            collect_non_loss_data=collect_non_loss_data,
            checkpoint_activations_microbatch=checkpoint_activations_microbatch,
            is_first_microbatch=check_first_val_step(
                first_val_step, forward_only, (i == 0) and (num_warmup_microbatches == 0)
            ),
            current_microbatch=i + num_warmup_microbatches,
            is_last_stage=p2p_communicator.is_pp_last_stage,
        )
        total_num_tokens += num_tokens

        if forward_only:
            p2p_communicator.send_forward(output_tensor, p2p_communicator.is_pp_last_stage)
            if not last_iteration:
                input_tensor = p2p_communicator.recv_forward(
                    recv_tensor_shapes, p2p_communicator.is_pp_first_stage
                )
        else:
            output_tensor_grad = p2p_communicator.send_forward_recv_backward(
                output_tensor, send_tensor_shapes, p2p_communicator.is_pp_last_stage
            )

            # Add input_tensor and output_tensor to end of list.
            input_tensors.append(input_tensor)
            output_tensors.append(output_tensor)
            deallocate_output_tensor(output_tensor, config.deallocate_pipeline_outputs)

            # Pop input_tensor and output_tensor from the start of the list for
            # the backward pass.
            input_tensor = input_tensors.pop(0)
            output_tensor = output_tensors.pop(0)

            # Enable grad sync for the last microbatch in the batch if the full
            # backward pass completes in the 1F1B stage.
            if num_warmup_microbatches == 0 and last_iteration:
                if config.grad_sync_func is None or p2p_communicator.is_pp_first_stage:
                    enable_grad_sync()

            input_tensor_grad = backward_func(
                input_tensor, output_tensor, output_tensor_grad, config
            )

            if last_iteration:
                input_tensor = None
                p2p_communicator.send_backward(
                    input_tensor_grad, p2p_communicator.is_pp_first_stage
                )
            else:
                input_tensor = p2p_communicator.send_backward_recv_forward(
                    input_tensor_grad, recv_tensor_shapes, p2p_communicator.is_pp_first_stage
                )

    # Run cooldown backward passes.
    if not forward_only:
        for i in range(num_warmup_microbatches):

            # Enable async grad reduction in the last backward pass
            # Note: If grad sync function is provided, only enable
            # async grad reduction in first pipeline stage. Other
            # pipeline stages do grad reduction during pipeline
            # bubble.
            if i == num_warmup_microbatches - 1:
                if config.grad_sync_func is None or p2p_communicator.is_pp_first_stage:
                    enable_grad_sync()

            input_tensor = input_tensors.pop(0)
            output_tensor = output_tensors.pop(0)

            output_tensor_grad = p2p_communicator.recv_backward(
                send_tensor_shapes, p2p_communicator.is_pp_last_stage
            )

            input_tensor_grad = backward_func(
                input_tensor, output_tensor, output_tensor_grad, config
            )

            p2p_communicator.send_backward(input_tensor_grad, p2p_communicator.is_pp_first_stage)

        # Launch any remaining grad reductions.
        if no_sync_context is not None:
            enable_grad_sync()
            if config.grad_sync_func is not None:
                config.grad_sync_func(model.parameters())

    if config.finalize_model_grads_func is not None and not forward_only:

        # If defer_embedding_wgrad_compute is enabled we need to do the
        # weight gradient GEMM's here.
        finish_embedding_wgrad_compute(
            config, embedding_module, p2p_communicator.is_pp_last_stage, tp_group
        )

        # Finalize model grads (perform full grad all-reduce / reduce-scatter for
        # data parallelism, layernorm all-reduce for sequence parallelism, and
        # embedding all-reduce for pipeline parallelism).
        config.finalize_model_grads_func(
            [model],
            total_num_tokens if config.calculate_per_token_loss else None,
            pg_collection=pg_collection,
            force_all_reduce=force_all_reduce,
        )

    if getattr(config, 'fine_grained_activation_offloading', False):
        off_interface.reset()

    if config.timers is not None:
        config.timers('forward-backward').stop()

    if (
        hasattr(config, 'cuda_graph_impl')
        and config.cuda_graph_impl == "local"
        and CudaGraphScope.full_iteration not in config.cuda_graph_scope
    ):
        create_cudagraphs()

    return forward_data_store


================================================
FILE: megatron/core/pipeline_parallel/utils.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import logging
from abc import ABC, abstractmethod
from contextlib import contextmanager
from typing import Callable, Optional

import torch
from torch.autograd import Variable

from megatron.core.utils import get_pg_rank, get_pg_size, log_single_rank, make_viewless_tensor

logger = logging.getLogger(__name__)


def is_pp_first_stage(pp_group: torch.distributed.ProcessGroup):
    """Return True if in the first pipeline model-parallel stage, False otherwise."""
    return get_pg_rank(pp_group) == 0


def is_pp_last_stage(pp_group: torch.distributed.ProcessGroup):
    """Return True if in the last pipeline-model-parallel stage, False otherwise."""
    return get_pg_rank(pp_group) == (get_pg_size(pp_group) - 1)


def is_vp_first_stage(vp_stage: int, vp_size: int | None):
    """Return True if in the first virtual pipeline model-parallel stage, False otherwise."""
    if vp_size is None or vp_size <= 1:
        assert vp_stage is None or vp_stage == 0, (
            f"Expected vp_stage to be 0 or None when vp_size is <= 1 or None, "
            f"but got vp_stage={vp_stage} and vp_size={vp_size}"
        )
        return True
    return vp_stage == 0


def is_vp_last_stage(vp_stage: int, vp_size: int | None):
    """Return True if in the last virtual pipeline model-parallel stage, False otherwise."""
    if vp_size is None or vp_size <= 1:
        assert vp_stage is None or vp_stage == 0, (
            f"Expected vp_stage to be 0 or None when vp_size is <= 1 or None, "
            f"but got vp_stage={vp_stage} and vp_size={vp_size}"
        )
        return True
    return vp_stage == (vp_size - 1)


def get_pp_first_rank(pp_group: torch.distributed.ProcessGroup):
    """Return the global rank of the first rank in the pipeline parallel group."""
    pp_ranks = torch.distributed.get_process_group_ranks(pp_group)
    return pp_ranks[0]


def get_pp_last_rank(pp_group: torch.distributed.ProcessGroup):
    """Return the global rank of the last rank in the pipeline parallel group."""
    pp_ranks = torch.distributed.get_process_group_ranks(pp_group)
    return pp_ranks[-1]


def get_pp_next_rank(pp_group: torch.distributed.ProcessGroup):
    """Return the global rank of the next rank in the pipeline parallel group, or None if last
    stage."""
    if is_pp_last_stage(pp_group):
        return None
    current_rank_in_group = get_pg_rank(pp_group)
    pp_ranks = torch.distributed.get_process_group_ranks(pp_group)
    return pp_ranks[current_rank_in_group + 1]


def get_pp_prev_rank(pp_group: torch.distributed.ProcessGroup):
    """Return the global rank of the previous rank in the pipeline parallel group, or None if
    first stage."""
    if is_pp_first_stage(pp_group):
        return None
    current_rank_in_group = get_pg_rank(pp_group)
    pp_ranks = torch.distributed.get_process_group_ranks(pp_group)
    return pp_ranks[current_rank_in_group - 1]


def make_viewless(e):
    """Make_viewless util func"""
    e = make_viewless_tensor(inp=e, requires_grad=e.requires_grad, keep_graph=True)
    return e


def set_ideal_affinity_for_current_gpu():
    """Set CPU affinity for the current GPU to optimize host-device transfers."""
    import uuid

    try:
        import cuda.bindings.driver as cuda_driver
        import cuda.bindings.runtime as cuda_runtime
    except:
        try:
            import cuda.cuda as cuda_driver
            import cuda.cudart as cuda_runtime
        except:
            raise RuntimeError("Please install cuda-python to enable GPU affinity setting")
    import pynvml

    # Get current CUDA device ID
    err, device_id = cuda_runtime.cudaGetDevice()
    assert err == cuda_runtime.cudaError_t.cudaSuccess
    # Get device UUID
    err, device_uuid = cuda_driver.cuDeviceGetUuid(device_id)
    assert err == cuda_driver.CUresult.CUDA_SUCCESS
    # Set CPU affinity based on GPU's NUMA node
    pynvml.nvmlInit()
    handle = pynvml.nvmlDeviceGetHandleByUUID("GPU-" + str(uuid.UUID(bytes=device_uuid.bytes)))
    pynvml.nvmlDeviceSetCpuAffinity(handle)

    log_single_rank(
        logger,
        logging.WARNING,
        f"Set CPU affinity for all GPUs for optimal host-device transfer performance",
    )


class NoopScheduleNode:
    """A placeholder node in the computation graph that simply passes through inputs and outputs.

    This class is used as a no-op node in the scheduling system when a real computation node
    is not needed but the interface must be maintained (e.g., dense layer doesn't need
    moe_dispatch and moe_combine). It simply returns its inputs unchanged
    in both forward and backward passes.
    """

    def forward(self, inputs):
        """Passes through inputs unchanged in the forward pass."""
        return inputs

    def backward(self, outgrads):
        """Passes through gradients unchanged in the backward pass."""
        return outgrads


class ScheduleNode:
    """Base node for fine-grained scheduling.

    This class represents a computational node in the pipeline schedule.
    It handles the execution of forward and backward operations on a stream.
    """

    def __init__(
        self,
        forward_func: Callable,
        stream: torch.cuda.Stream,
        event: torch.cuda.Event,
        backward_func: Optional[Callable] = None,
        free_input: bool = False,
        name: str = "schedule_node",
    ):
        """Initialize a schedule node.

        Args:
            forward_func (callable): Function to execute during the forward pass.
            stream (torch.cuda.Stream): The CUDA stream for this node's computation.
                This can be either a 'compute' stream or a 'communicate' stream.
                - 'compute' stream: Used for computational nodes like attention and experts.
                - 'communicate' stream: Used for nodes that handle token communication,
                  such as token dispatch and combine operations in MoE layers.
            event (torch.cuda.Event): The CUDA event used for synchronization. Each
                microbatch within a model chunk shares the same event, which is used
                to manage dependencies between nodes operating on different streams.
            backward_func (callable, optional): Function for the backward pass.
            free_input (bool): Flag to indicate if the input should be freed after the
                forward pass.
            name (str): Name of the node for debugging purposes.
        """
        self.name = name
        self.forward_func = forward_func
        self.backward_func = backward_func if backward_func else self.default_backward_func
        self.stream = stream
        self.event = event
        self.free_input = free_input
        self.inputs = None
        self.outputs = None
        self.delay_grads_release = False
        self.manual_release_grads = False

    def default_backward_func(self, outputs, output_grad):
        """Default backward function"""
        Variable._execution_engine.run_backward(
            tensors=outputs,
            grad_tensors=output_grad,
            keep_graph=False,
            create_graph=False,
            inputs=tuple(),
            allow_unreachable=True,
            accumulate_grad=True,
        )
        return output_grad

    def forward(self, inputs=()):
        """Schedule node forward"""
        if not isinstance(inputs, tuple):
            inputs = (inputs,)
        return self._forward(*inputs)

    def _forward(self, *inputs):
        with self.stream_acquire_context(f"{self.name} forward"):
            self.inputs = [make_viewless(e).detach() if e is not None else None for e in inputs]
            for i, input in enumerate(self.inputs):
                if input is not None:
                    input.requires_grad = inputs[i].requires_grad

            data = tuple(self.inputs)
            data = self.forward_func(*data)

            if not isinstance(data, tuple):
                data = make_viewless(data)
            else:
                data = tuple([make_viewless(e) if isinstance(e, torch.Tensor) else e for e in data])

            self.output = data

        # Immediately frees input tensors after they are used for nodes
        # where inputs are no longer needed after computation.
        if self.free_input:
            for input in inputs:
                if input is not None:
                    input.record_stream(self.stream)
                    input.untyped_storage().resize_(0)

        return self.output

    def get_output(self):
        """Get the forward output"""
        return self.output

    def backward(self, output_grad):
        """Schedule node backward"""
        if not isinstance(output_grad, tuple):
            output_grad = (output_grad,)
        return self._backward(*output_grad)

    def _backward(self, *output_grad):
        with self.stream_acquire_context(f"{self.name} backward"):
            outputs = self.output
            if not isinstance(outputs, tuple):
                outputs = (outputs,)
            assert len(outputs) == len(output_grad), (
                f"{len(outputs)} of {type(outputs[0])} is not equal to "
                f"{len(output_grad)} of {type(output_grad[0])}"
            )
            output_grad = self.backward_func(outputs, output_grad)

        # output_grad maybe from another stream
        if output_grad:
            for g in output_grad:
                if g is not None:
                    g.record_stream(self.stream)
                    # Manually trigger the memory release of dgrad tensor
                    # to avoid delayed garbage collection. If
                    # delay_grads_release is True, dgrad is last used in
                    # wgrad compute and skip the release here.
                    if self.manual_release_grads and not self.delay_grads_release:
                        g.untyped_storage().resize_(0)

        grads = self.get_grad()
        self._release_state()

        return grads

    def get_grad(self):
        """Get the grad of inputs"""
        grad = tuple([e.grad if e is not None else None for e in self.inputs])
        # multiple in, multiple out
        if len(grad) == 1:
            grad = grad[0]
        return grad

    @contextmanager
    def stream_acquire_context(self, name=None):
        """Stream acquire context that handles event synchronization,
            NVTX profiling, and stream context.

        This context manager consolidates:
        1. Event wait/record for synchronization between streams
        2. NVTX range for profiling (if name is provided)
        3. torch.cuda.stream context for execution on the specified stream

        Args:
            name: Optional name for NVTX range profiling
        """
        self.event.wait(self.stream)
        if name:
            torch.cuda.nvtx.range_push(name)
        try:
            with torch.cuda.stream(self.stream):
                yield
        finally:
            if name:
                torch.cuda.nvtx.range_pop()
            self.event.record(self.stream)

    def _release_state(self):
        """Clear the state of the node"""
        self.inputs = None
        self.output = None
        del self.forward_func
        del self.backward_func


class AbstractSchedulePlan(ABC):
    """To use combined 1f1b, model must implement build_schedule_plan while take the same
    signature as model forward but return an instance of AbstractSchedulePlan"""

    @staticmethod
    @abstractmethod
    def run(
        f_schedule_plan,
        b_schedule_plan,
        grad=None,
        pre_forward=None,
        pre_backward=None,
        post_forward=None,
        post_backward=None,
    ):
        """run() is the protocol between our schedule logic and model, which is used to schedule
        the forward and backward schedule plans for the models.
        """
        ...


_COMP_STREAM = None
_COMM_STREAM = None


def set_streams(comp_stream=None, comm_stream=None):
    """Set the streams for communication and computation"""
    global _COMP_STREAM
    global _COMM_STREAM
    if _COMP_STREAM is not None and _COMM_STREAM is not None:
        return

    if comp_stream is None:
        comp_stream = torch.cuda.current_stream()
    if comm_stream is None:
        comm_stream = torch.cuda.Stream(device="cuda")

    assert _COMP_STREAM is None
    assert _COMM_STREAM is None
    _COMP_STREAM = comp_stream
    _COMM_STREAM = comm_stream


def get_comp_stream():
    """Get the stream for computation"""
    global _COMP_STREAM
    return _COMP_STREAM


def get_comm_stream():
    """Get the stream for communication"""
    global _COMM_STREAM
    return _COMM_STREAM


================================================
FILE: megatron/core/post_training/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/post_training/modelopt/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
"""Integrations with NVIDIA Model Optimizer (referred to as ModelOpt).

ModelOpt is a library comprising state-of-the-art model optimization techniques
including quantization and sparsity to compress model for efficient inference on
NVIDIA GPUs. ModelOpt is integrated with Megatron-core to provide a seamless
experience for users to optimize their Megatron-core models for inference.
More details on ModelOpt including installation and usage can be found at
https://github.com/NVIDIA/Model-Optimizer.
"""


================================================
FILE: megatron/core/post_training/modelopt/gpt/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/post_training/modelopt/gpt/model_specs.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from megatron.core.extensions.transformer_engine import TEDotProductAttention
from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
from megatron.core.models.gpt.gpt_layer_specs import get_mlp_module_spec
from megatron.core.post_training.modelopt.layers import (
    BlockwiseFP8WeightTransformerLayer,
    FP8WeightTransformerLayer,
    Linear,
    Norm,
)
from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
from megatron.core.transformer.dot_product_attention import DotProductAttention
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.identity_op import IdentityOp
from megatron.core.transformer.multi_latent_attention import (
    MLASelfAttention,
    MLASelfAttentionSubmodules,
)
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.torch_norm import L2Norm
from megatron.core.transformer.transformer_block import (
    TransformerBlockSubmodules,
    get_num_layers_to_build,
)
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.transformer_layer import (
    TransformerLayer,
    TransformerLayerSubmodules,
    get_transformer_layer_offset,
)


def get_gpt_modelopt_spec(
    config: TransformerConfig,
    local_core_attention: bool = False,
    remap_te_layernorm: bool = False,
    real_quant_cfg: str = "None",
    qk_l2_norm: bool = False,
    use_arbitrary_attention_mask: bool = False,
):
    """Mix the native spec with TENorm.

    This is essentially the native local spec except for the layernorm implementation
    is using TENorm from Transformer-Engine. The issue is that FusedLayerNorm from apex
    has stopped supporting RMSNorm needed by llama.

    Args:
        config: model's transformer config
        local_core_attention: whether to use local DotProductAttention or TEDotProductAttention
        remap_te_layernorm: whether to perform sharded state_dict prefix mapping on layernorm
        real_quant_cfg: Model Optimizer real quantization config
        qk_l2_norm: whether to use Llama4 L2 norm for Q and K
        use_arbitrary_attention_mask: whether to use arbitrary attention mask instead of causal
    """
    num_layers_to_build = get_num_layers_to_build(config)

    # Llama4 Scout-16E support for NeMo. NeMo's GPTConfig is using attribute .qk_l2_norm directly.
    qk_l2_norm = getattr(config, "qk_l2_norm", qk_l2_norm)
    if use_arbitrary_attention_mask:
        attn_mask_type = AttnMaskType.arbitrary
    else:
        attn_mask_type = AttnMaskType.causal

    moe_sharded_state_dict_keys_map = {}
    dense_sharded_state_dict_keys_map = {}
    if remap_te_layernorm:
        input_layernorm_map = {'input_layernorm.': 'self_attention.linear_qkv.layer_norm_'}
        mla_qk_layernorm_map = {
            "self_attention.q_layernorm.": 'self_attention.linear_q_up_proj.layer_norm_',
            "self_attention.kv_layernorm.": 'self_attention.linear_kv_up_proj.layer_norm_',
        }
        dense_sharded_state_dict_keys_map = {'pre_mlp_layernorm.': 'mlp.linear_fc1.layer_norm_'}
        if not config.multi_latent_attention:
            moe_sharded_state_dict_keys_map.update(input_layernorm_map)
            dense_sharded_state_dict_keys_map.update(input_layernorm_map)
        else:
            if config.qk_layernorm:
                moe_sharded_state_dict_keys_map.update(mla_qk_layernorm_map)
                dense_sharded_state_dict_keys_map.update(mla_qk_layernorm_map)

    if real_quant_cfg == "None":
        transformer_layer = TransformerLayer
    elif real_quant_cfg == "fp8_real_quant":
        transformer_layer = FP8WeightTransformerLayer
    elif real_quant_cfg == "fp8_blockwise_real_quant":
        transformer_layer = BlockwiseFP8WeightTransformerLayer
    else:
        raise ValueError("RealQuantTransformerLayer does not support {}".format(real_quant_cfg))

    core_attention = DotProductAttention if local_core_attention else TEDotProductAttention

    if config.multi_latent_attention:
        attn_module = MLASelfAttention
        attn_submodules = MLASelfAttentionSubmodules(
            linear_q_proj=ColumnParallelLinear,
            linear_q_down_proj=Linear,
            q_layernorm=Norm,
            linear_q_up_proj=ColumnParallelLinear,
            linear_kv_down_proj=Linear,
            kv_layernorm=Norm,
            linear_kv_up_proj=ColumnParallelLinear,
            core_attention=core_attention,
            linear_proj=RowParallelLinear,
        )
    else:
        norm = L2Norm if qk_l2_norm else Norm if config.qk_layernorm else IdentityOp
        attn_module = SelfAttention
        attn_submodules = SelfAttentionSubmodules(
            linear_qkv=ColumnParallelLinear,
            core_attention=core_attention,
            linear_proj=RowParallelLinear,
            q_layernorm=norm,
            k_layernorm=norm,
        )

    dense_mlp_spec = get_mlp_module_spec(use_te=False)

    dense_layer_spec = ModuleSpec(
        module=transformer_layer,
        submodules=TransformerLayerSubmodules(
            input_layernorm=Norm,
            self_attention=ModuleSpec(
                module=attn_module,
                params={"attn_mask_type": attn_mask_type},
                submodules=attn_submodules,
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_mlp_layernorm=Norm,
            mlp=dense_mlp_spec,
            mlp_bda=get_bias_dropout_add,
            # Map TE-layernorm-fusion keys back
            sharded_state_dict_keys_map=dense_sharded_state_dict_keys_map,
        ),
    )

    if config.num_moe_experts is None:
        return TransformerBlockSubmodules(
            layer_specs=[dense_layer_spec] * num_layers_to_build, layer_norm=Norm
        )

    moe_mlp_spec = get_mlp_module_spec(
        use_te=False,
        num_experts=config.num_moe_experts,
        moe_grouped_gemm=False,
        # use_te=True, num_experts=config.num_moe_experts, moe_grouped_gemm=True,
    )

    moe_layer_spec = ModuleSpec(
        module=transformer_layer,
        submodules=TransformerLayerSubmodules(
            input_layernorm=Norm,
            self_attention=ModuleSpec(
                module=attn_module,
                params={"attn_mask_type": attn_mask_type},
                submodules=attn_submodules,
            ),
            self_attn_bda=get_bias_dropout_add,
            pre_mlp_layernorm=Norm,
            mlp=moe_mlp_spec,
            mlp_bda=get_bias_dropout_add,
            # Map TE-layernorm-fusion keys back
            sharded_state_dict_keys_map=moe_sharded_state_dict_keys_map,
        ),
    )

    # Parse config.moe_layer_freq to determine the pattern of expert/dense layers.
    # 0 stands for dense layers, 1 stands for expert layers.
    # For integer N: Creates a pattern with one expert layer every N layers.
    # For string pattern: Evaluates the str directly (e.g. "[1,0,1]" for alternating expert/dense).
    if isinstance(config.moe_layer_freq, int):
        moe_layer_pattern = [
            1 if (i % config.moe_layer_freq == 0) else 0 for i in range(config.num_layers)
        ]
    elif isinstance(config.moe_layer_freq, list):
        moe_layer_pattern = config.moe_layer_freq
        assert len(moe_layer_pattern) == config.num_layers, (
            f"Invalid length of moe_layer_pattern: {len(moe_layer_pattern)}, "
            f"expected {config.num_layers}, "
            f"current moe layer pattern: {config.moe_layer_freq}"
        )
    else:
        raise ValueError(
            f"Invalid moe_layer_freq: {type(config.moe_layer_freq)}, {config.moe_layer_freq}"
        )

    # Create the layer specs for the model.
    layer_specs = []
    for layer_number in range(config.num_layers):
        if moe_layer_pattern[layer_number] == 1:
            layer_specs.append(moe_layer_spec)
        elif moe_layer_pattern[layer_number] == 0:
            layer_specs.append(dense_layer_spec)
        else:
            raise ValueError(f"Invalid layer pattern: {moe_layer_pattern}")

    # Slice the layer specs to only include the layers that are built in this pipeline stage.
    # Note: MCore layer_number starts at 1
    offset = get_transformer_layer_offset(config)
    layer_specs = layer_specs[offset : offset + num_layers_to_build]

    # Block spec.
    block_spec = TransformerBlockSubmodules(layer_specs=layer_specs, layer_norm=Norm)

    return block_spec


================================================
FILE: megatron/core/post_training/modelopt/gpt/state_dict_hooks.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import logging
from logging import getLogger

from megatron.core.utils import log_single_rank

logger = getLogger(__name__)


def mcore_gpt_load_te_state_dict_pre_hook(
    state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
):
    """Register a pre-hook to fix the state_dict key difference of.

    This prehook is used when trying to load the megatron/core GPTModel that uses a
    fused Transformer-Engine ParallelLinear into the variant that uses native ParallelLinear
    and Transformer-Engine Norm (effectively to restore the fusion).
    Only this particular spec supports post-training quantization and TensorRT-LLM
    config export through `nvidia-modelopt` package.

    Args:
        state_dict: state dictionary
        prefix: module name prefix
        local_metadata: local metatdata
        strict: whether is in strict mode
        missing_keys: missing state dict keys
        unexpected_keys: unexpected state dict keys
        error_msgs: error messages
    """
    if "modelopt_state" in state_dict:
        state_dict.pop("modelopt_state")

    key_with_te_extra_state_to_pop = []

    for key in key_with_te_extra_state_to_pop:
        state_dict.pop(key)

    module_name_rewrite_list = [
        ("self_attention.linear_qkv.layer_norm_weight", "input_layernorm.weight"),
        ("self_attention.linear_qkv.layer_norm_bias", "input_layernorm.bias"),
        ("self_attention.linear_q_up_proj.layer_norm_weight", "self_attention.q_layernorm.weight"),
        ("self_attention.linear_q_up_proj.layer_norm_bias", "self_attention.q_layernorm.bias"),
        (
            "self_attention.linear_kv_up_proj.layer_norm_weight",
            "self_attention.kv_layernorm.weight",
        ),
        ("self_attention.linear_kv_up_proj.layer_norm_bias", "self_attention.kv_layernorm.bias"),
        ("mlp.linear_fc1.layer_norm_weight", "pre_mlp_layernorm.weight"),
        ("mlp.linear_fc1.layer_norm_bias", "pre_mlp_layernorm.bias"),
        ("mixer.in_proj.layer_norm_weight", "norm.weight"),
    ]

    key_rewrite_list = []

    for key, _ in state_dict.items():
        for old_name, new_name in module_name_rewrite_list:
            if old_name in key:
                key_rewrite_list += [(key, key.replace(old_name, new_name))]

    for old_key, new_key in key_rewrite_list:
        log_single_rank(logger, logging.INFO, "replace {} with {}".format(old_key, new_key))
        state_dict[new_key] = state_dict[old_key]
        state_dict.pop(old_key)


================================================
FILE: megatron/core/post_training/modelopt/layers.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
from __future__ import annotations

import logging
from typing import TYPE_CHECKING, Callable, List, Optional, cast

import torch

from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.model_parallel_config import ModelParallelConfig
from megatron.core.transformer.torch_norm import LayerNormInterface
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.transformer_layer import TransformerLayer
from megatron.core.transformer.utils import make_sharded_tensors_for_checkpoint
from megatron.core.typed_torch import copy_signature

logger = logging.getLogger(__name__)

if HAVE_TE or TYPE_CHECKING:
    import transformer_engine as te  # type: ignore[import]

    from megatron.core.extensions.transformer_engine import _get_extra_te_kwargs
else:
    te = None
    _get_extra_te_kwargs = None


FP8_PER_TENSOR_REAL_QUANT_CFG = {
    "quant_cfg": {
        "*weight_quantizer": {"num_bits": (4, 3), "axis": None},
        "*input_quantizer": {"enable": False},
        "*output_layer*": {"enable": False},
        "default": {"enable": False},
    },
    "algorithm": "max",
}

# FP8 2D blockwise real quantization config for deepseek models
FP8_2D_BLOCKWISE_REAL_QUANT_CFG = {
    "quant_cfg": {
        "*weight_quantizer": {"num_bits": (4, 3), "block_sizes": {-1: 128, -2: 128}},
        "*input_quantizer": {"enable": False},
        "*output_layer*": {"enable": False},
        "default": {"enable": False},
    },
    "algorithm": "max",
}


class Norm:
    """
    A conditional wrapper to initialize an instance of Transformer-Engine's
    `LayerNorm` or `RMSNorm` based on input. If there is an additional _extra_state,
    insert _state_dict_hook and _load_state_dict_pre_hook to handle the state_dict
    mismatch issue.
    """

    def __new__(
        cls, config: TransformerConfig, hidden_size: int, eps: float = 1e-5
    ) -> LayerNormInterface:
        if not HAVE_TE:
            raise ImportError(
                "Transformer-Engine is not installed, please install it with "
                "`pip install transformer-engine`"
            )

        if config.normalization == "LayerNorm":
            instance = te.pytorch.LayerNorm(
                hidden_size=hidden_size,
                eps=eps,
                sequence_parallel=config.sequence_parallel,
                zero_centered_gamma=config.layernorm_zero_centered_gamma,
                **_get_extra_te_kwargs(config),
            )
        elif config.normalization == "RMSNorm":
            assert hasattr(
                te.pytorch, "RMSNorm"
            ), "Transformer-Engine >= v0.11 required to use this feature"
            instance = te.pytorch.RMSNorm(
                hidden_size=hidden_size,
                eps=eps,
                sequence_parallel=config.sequence_parallel,
                zero_centered_gamma=config.layernorm_zero_centered_gamma,
                **_get_extra_te_kwargs(config),
            )
        else:
            raise Exception("Only LayerNorm and RMSNorm are curently supported")

        def _state_dict_hook(self, state_dict, prefix, local_metadata):
            if "_extra_state" in state_dict:
                state_dict.pop("_extra_state")

        def _load_state_dict_pre_hook(
            state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
        ):
            state_dict[prefix + "_extra_state"] = None

        if "_extra_state" in instance.state_dict():
            instance._register_state_dict_hook(_state_dict_hook)
            instance._register_load_state_dict_pre_hook(_load_state_dict_pre_hook)

        return cast(LayerNormInterface, instance)


class Linear(torch.nn.Linear):
    """Local Linear impl as a replacement of ParallelLinear."""

    def __init__(
        self,
        input_size: int,
        output_size: int,
        *,
        config: ModelParallelConfig,
        init_method: Callable,
        bias: bool = True,
        gather_output: bool = False,
        stride: int = 1,
        keep_master_weight_for_test: bool = False,
        skip_bias_add: bool = False,
        skip_weight_param_allocation: bool = False,
        embedding_activation_buffer: Optional[List[torch.Tensor]] = None,
        grad_output_buffer: Optional[List[torch.Tensor]] = None,
        is_expert: bool = False,
        tp_comm_buffer_name: str = None,  # Not used
        disable_grad_reduce: bool = False,
        tp_group: Optional[torch.distributed.ProcessGroup] = None,
    ):
        self.config = config
        self.tp_group = tp_group

        self._return_bias = skip_bias_add and bias

        if stride != 1:
            raise ValueError("torch.nn.Linear does not support stride != 1")

        if skip_weight_param_allocation:
            raise ValueError("torch.nn.Linear layers do not support skip_weight_param_allocation")

        if embedding_activation_buffer is not None:
            raise ValueError("torch.nn.Linear does not support embedding_activation_buffer != None")

        if grad_output_buffer is not None:
            raise ValueError("torch.nn.Linear does not support grad_output_buffer != None")

        super().__init__(
            in_features=input_size, out_features=output_size, bias=bias, dtype=config.params_dtype
        )

        for param in self.parameters():
            if is_expert:
                # Reduce the gradient on the expert_data_parallel group for expert linear layers
                setattr(param, "allreduce", self.config.expert_model_parallel_size == 1)
            else:
                # Reduce the gradient on DP group
                setattr(param, "allreduce", True)
                setattr(param, "sequence_parallel", self.config.sequence_parallel)

    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
        """Sharding along axis 0, bias sharded"""
        state_dict = self.state_dict(prefix="", keep_vars=True)

        for k, v in state_dict.items():
            if "_amax" in k or "_scale" in k:
                if v.ndim == 0:
                    state_dict[k] = v.view(1)
        sharded_state_dict = make_sharded_tensors_for_checkpoint(
            state_dict,
            prefix,
            sharded_offsets=sharded_offsets,
            tp_group=self.tp_group,
            dp_cp_group=metadata['dp_cp_group'],
        )
        return sharded_state_dict

    def forward(self, x):
        """Forward."""
        out = super().forward(x)

        if self._return_bias:
            return out
        return out, None


class RealQuantTransformerLayer(TransformerLayer):
    """Real quantization transformer layer base class.

    This base class iniitialize the default TransformerLayer and immediately
    perform weight-only real quantization via Model Optimizer.
    All linear weights (Linear, ColumnParallelLinear, RowParallelLinear) picked
    up will be replaced with low-bit data type (default torch.uint8). If sub-byte
    real_quant_cfg is used, the weight shape will further be half.

    This module cannot be trained (all parameters frozen).
    """

    verbose: bool = False
    real_quant_cfg: str = "None"

    @copy_signature(TransformerLayer.__init__)
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        try:
            import modelopt.torch.quantization as mtq

            has_nvidia_modelopt = True
        except Exception:
            has_nvidia_modelopt = False

        if has_nvidia_modelopt and self.real_quant_cfg != "None":
            REAL_QUANT_CFG_CHOICES = {
                "fp8_real_quant": FP8_PER_TENSOR_REAL_QUANT_CFG,
                "fp8_blockwise_real_quant": FP8_2D_BLOCKWISE_REAL_QUANT_CFG,
            }
            mtq_cfg = REAL_QUANT_CFG_CHOICES.get(self.real_quant_cfg, None)
            if mtq_cfg is None:
                raise ValueError(
                    "RealQuantTransformerLayer does not support {}".format(self.real_quant_cfg)
                )

            self._collect_original_tensor_info()

            mtq.quantize(self, mtq_cfg)
            mtq.compress(self)

            delattr(self, "_modelopt_state")

            # Freeze all parameters since the real-quant linears cannot be trained.
            for param in self.parameters():
                param.requires_grad = False

            if self.verbose:
                self._report_quantize_tensor_info()

    def _collect_original_tensor_info(self):
        self._original_tensor_info = {}
        for k, v in self.state_dict().items():
            if isinstance(v, torch.Tensor):
                self._original_tensor_info[k] = (str(v.dtype), str(v.shape))

    def _report_quantize_tensor_info(self):
        torch.distributed.barrier()
        if torch.distributed.get_rank() == 0:
            for k, v in self.state_dict().items():
                if not isinstance(v, torch.Tensor):
                    continue
                original_dtype, original_shape = self._original_tensor_info.get(k, ("-", "-"))
                logger.info(
                    "{:<64} {:<16} {:<32} {:<16} {:<32}".format(
                        k, original_dtype, original_shape, str(v.dtype), str(v.shape)
                    )
                )
        torch.distributed.barrier()


class FP8WeightTransformerLayer(RealQuantTransformerLayer):
    """FP8 weight transformer layer."""

    real_quant_cfg: str = "fp8_real_quant"


class BlockwiseFP8WeightTransformerLayer(RealQuantTransformerLayer):
    """Blockwise FP8 weight transformer layer."""

    real_quant_cfg: str = "fp8_blockwise_real_quant"


================================================
FILE: megatron/core/post_training/modelopt/mamba/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/post_training/modelopt/mamba/model_specs.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from megatron.core.extensions.transformer_engine import TEDotProductAttention
from megatron.core.fusions.fused_bias_dropout import get_bias_dropout_add
from megatron.core.models.gpt.moe_module_specs import get_moe_module_spec
from megatron.core.post_training.modelopt.layers import Norm
from megatron.core.ssm.mamba_block import MambaStack, MambaStackSubmodules
from megatron.core.ssm.mamba_layer import MambaLayer, MambaLayerSubmodules
from megatron.core.ssm.mamba_mixer import MambaMixer, MambaMixerSubmodules
from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear
from megatron.core.transformer.attention import SelfAttention, SelfAttentionSubmodules
from megatron.core.transformer.dot_product_attention import DotProductAttention
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.transformer.spec_utils import ModuleSpec
from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules


# Use this spec for ModelOpt PTQ and TensorRT-LLM export
def get_mamba_stack_modelopt_spec(
    local_core_attention: bool = False,
    remap_te_layernorm: bool = False,
    use_default_te_spec: bool = False,
) -> ModuleSpec:
    """Get the Mamba stack spec for ModelOpt PTQ and TensorRT-LLM export.

    When use_default_te_spec=False (default), this is the native local spec with TENorm
    from Transformer-Engine for the layernorm implementation (since FusedLayerNorm from
    apex has stopped supporting RMSNorm needed by llama). The remap_te_layernorm flag
    can be used to add sharded state_dict key remapping for TE-compatible checkpoint
    saving/loading.

    When use_default_te_spec=True, this returns the standard mamba_stack_spec from
    mamba_layer_specs.py which uses full TE modules (TELayerNormColumnParallelLinear,
    TERowParallelLinear, TEDotProductAttention, TENorm, moe_grouped_gemm=True).


    Args:
        local_core_attention: whether to use local DotProductAttention
            (only for use_default_te_spec=False)
        remap_te_layernorm: whether to perform sharded state_dict prefix mapping
            on layernorm (only for use_default_te_spec=False)
        use_default_te_spec: whether to use the default Transformer-Engine spec
    """
    if use_default_te_spec:
        from megatron.core.models.mamba.mamba_layer_specs import mamba_stack_spec

        return mamba_stack_spec

    return _get_mamba_stack_local_spec(
        local_core_attention=local_core_attention, remap_te_layernorm=remap_te_layernorm
    )


def _get_mamba_stack_local_spec(
    local_core_attention: bool = False, remap_te_layernorm: bool = False
) -> ModuleSpec:
    """Get the Mamba stack spec with local (non-TE) modules.

    This is essentially the native local spec except for the layernorm implementation
    is using TENorm from Transformer-Engine.
    """
    mamba_state_dict_keys_map = {}
    transformer_state_dict_keys_map = {}
    if remap_te_layernorm:
        mamba_state_dict_keys_map = {'norm.': 'mixer.in_proj.layer_norm_'}
        transformer_state_dict_keys_map = {
            'input_layernorm.': 'self_attention.linear_qkv.layer_norm_',
            'pre_mlp_layernorm.': 'mlp.linear_fc1.layer_norm_',
        }

    mamba_layer = ModuleSpec(
        module=MambaLayer,
        submodules=MambaLayerSubmodules(
            norm=Norm,
            mixer=ModuleSpec(
                module=MambaMixer,
                submodules=MambaMixerSubmodules(
                    in_proj=ColumnParallelLinear, out_proj=RowParallelLinear
                ),
            ),
            mamba_bda=get_bias_dropout_add,
            sharded_state_dict_keys_map=mamba_state_dict_keys_map,
        ),
    )

    attn_mask_type = AttnMaskType.causal
    core_attention = DotProductAttention if local_core_attention else TEDotProductAttention
    attention_layer = ModuleSpec(
        module=TransformerLayer,
        submodules=TransformerLayerSubmodules(
            input_layernorm=Norm,
            self_attention=ModuleSpec(
                module=SelfAttention,
                params={"attn_mask_type": attn_mask_type},
                submodules=SelfAttentionSubmodules(
                    linear_qkv=ColumnParallelLinear,
                    core_attention=core_attention,
                    linear_proj=RowParallelLinear,
                ),
            ),
            self_attn_bda=get_bias_dropout_add,
            sharded_state_dict_keys_map=transformer_state_dict_keys_map,
        ),
    )

    mlp_layer = ModuleSpec(
        module=TransformerLayer,
        submodules=TransformerLayerSubmodules(
            pre_mlp_layernorm=Norm,
            mlp=ModuleSpec(
                module=MLP,
                submodules=MLPSubmodules(
                    linear_fc1=ColumnParallelLinear, linear_fc2=RowParallelLinear
                ),
            ),
            mlp_bda=get_bias_dropout_add,
            sharded_state_dict_keys_map=transformer_state_dict_keys_map,
        ),
    )

    moe_layer = ModuleSpec(
        module=TransformerLayer,
        submodules=TransformerLayerSubmodules(
            pre_mlp_layernorm=Norm,
            mlp=get_moe_module_spec(
                use_te=False, num_experts=8, moe_grouped_gemm=False  # Can be anything non None
            ),
            mlp_bda=get_bias_dropout_add,
        ),
    )

    return ModuleSpec(
        module=MambaStack,
        submodules=MambaStackSubmodules(
            mamba_layer=mamba_layer,
            attention_layer=attention_layer,
            mlp_layer=mlp_layer,
            moe_layer=moe_layer,
        ),
    )


================================================
FILE: megatron/core/process_groups_config.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""Dataclasses for organizing model parallelism and gradient communication process groups."""

from dataclasses import dataclass, field, fields
from functools import partial
from typing import Dict, List, Optional

import torch

from megatron.core import parallel_state


class ProcessGroupHelperMeta(type):
    """Metaclass to protect virtual_pipeline_model_parallel_size from direct assignment."""

    def __setattr__(cls, name, value):
        if name == 'virtual_pipeline_model_parallel_size':
            raise AttributeError(
                f"Cannot set '{name}' directly. Use set_virtual_pipeline_model_parallel_size() "
                f"method instead."
            )
        super().__setattr__(name, value)


@dataclass
class ProcessGroupCollection:
    """Unified process group collection for transformer model parallelism, gradient communication,
     and finalization.

    Fields use init=False and must be set after instance creation.

    Args:
        # Model Parallelism Groups
        tp: Tensor parallel process group
        pp: Pipeline parallel process group
        mp: Model parallel group (tensor + pipeline)
        embd: Embedding process group
        pos_embd: Position embedding process group
        cp: Context parallel process group
        tp_cp: Tensor and context parallel group
        hcp: Hierarchical context parallel groups
        ep: Expert model parallel group
        expt_tp: Expert tensor parallel group
        tp_ep: Tensor and expert parallel group
        tp_ep_pp: Tensor, expert, and pipeline parallel group

        # Data Parallelism Groups
        dp: Data parallel process group
        dp_cp: Data and context parallel group
        expt_dp: Expert data parallel group
        intra_dp_cp: Intra partial data parallel group
        intra_expt_dp: Intra partial expert data parallel group
        inter_dist_opt: Inter distributed optimizer instance group

    Example:
        # Create instance and set needed process groups
        pgs = ProcessGroupCollection()
        pgs.tp = tp_group
        pgs.pp = pp_group
        pgs.dp = dp_group

        # Pass to model components
        model = TransformerModel(..., pg_collection=pgs)
        ddp_model = DistributedDataParallel(..., pg_collection=pgs)
        finalize_model_grads(..., pg_collection=pgs)
    """

    # Model Parallelism Process Groups
    # _TENSOR_MODEL_PARALLEL_GROUP
    tp: torch.distributed.ProcessGroup = field(init=False)

    # _PIPELINE_MODEL_PARALLEL_GROUP
    pp: torch.distributed.ProcessGroup = field(init=False)

    # _MODEL_PARALLEL_GROUP
    mp: torch.distributed.ProcessGroup = field(init=False)

    # _EMBEDDING_GROUP
    embd: torch.distributed.ProcessGroup = field(init=False)

    # _POSITION_EMBEDDING_GROUP
    pos_embd: torch.distributed.ProcessGroup = field(init=False)

    # _CONTEXT_PARALLEL_GROUP
    cp: torch.distributed.ProcessGroup = field(init=False)

    # _TENSOR_AND_CONTEXT_PARALLEL_GROUP
    tp_cp: torch.distributed.ProcessGroup = field(init=False)

    # _HIERARCHICAL_CONTEXT_PARALLEL_GROUPS
    hcp: List[torch.distributed.ProcessGroup] = field(init=False)

    # Expert Parallelism Process Groups
    # _EXPERT_MODEL_PARALLEL_GROUP
    ep: torch.distributed.ProcessGroup = field(init=False)

    # _EXPERT_TENSOR_PARALLEL_GROUP
    expt_tp: torch.distributed.ProcessGroup = field(init=False)

    # _EXPERT_TENSOR_AND_MODEL_PARALLEL_GROUP
    tp_ep: torch.distributed.ProcessGroup = field(init=False)

    # _EXPERT_TENSOR_MODEL_PIPELINE_PARALLEL_GROUP
    tp_ep_pp: torch.distributed.ProcessGroup = field(init=False)

    # _TENSOR_AND_DATA_PARALLEL_GROUP_WITH_CP
    tp_dp_cp: torch.distributed.ProcessGroup = field(init=False)

    # Data Parallelism Process Groups
    # _DATA_PARALLEL_GROUP
    dp: torch.distributed.ProcessGroup = field(init=False)

    # _DATA_PARALLEL_GROUP_WITH_CP
    dp_cp: torch.distributed.ProcessGroup = field(init=False)

    # MoE layers need expt_dp group for sharded state dict
    # we need this workaround until distributed checkpoint is refactored
    # to have sharded_state_dict can take the PG and pass it down
    # TODO (Hepteract): remove this once distributed checkpoint is refactored
    # _EXPERT_DATA_PARALLEL_GROUP
    expt_dp: torch.distributed.ProcessGroup = field(init=False)

    # _INTRA_PARTIAL_DATA_PARALLEL_GROUP_WITH_CP
    intra_dp_cp: torch.distributed.ProcessGroup = field(init=False)

    # _INTRA_EXPERT_DATA_PARALLEL_GROUP
    intra_expt_dp: torch.distributed.ProcessGroup = field(init=False)

    # _INTER_PARTIAL_EXPERT_DATA_PARALLEL_GROUP
    inter_dist_opt: torch.distributed.ProcessGroup = field(init=False)

    # _INTRA_DISTRIBUTED_OPTIMIZER_INSTANCE_GROUP
    intra_dist_opt: torch.distributed.ProcessGroup = field(init=False)

    def __init__(self, **kwargs):
        for key in kwargs:
            if key in [field.name for field in fields(self)]:
                setattr(self, key, kwargs[key])
            else:
                raise ValueError(f"Unknown attribute: {key}")

    def __repr__(self):
        """Return a concise representation showing which process groups exist and their sizes."""
        active_pgs = []
        for field_info in fields(self):
            if hasattr(self, field_info.name):
                pg = getattr(self, field_info.name)
                if pg is not None:
                    active_pgs.append(f"{field_info.name}({pg.size()})")
                else:
                    # Field exists but is None
                    active_pgs.append(f"{field_info.name}(None)")
        return (
            f"ProcessGroupCollection({', '.join(active_pgs)})"
            if active_pgs
            else "ProcessGroupCollection(empty)"
        )

    @classmethod
    def use_mpu_process_groups(cls, required_pgs: Optional[List[str]] = None):
        """
        Use the default process groups from parallel_state.

        Args:
            required_pgs (List[str], optional): List of process group names to initialize.
                If None, pull all default process groups. Each string should correspond to
                one of the dataclass process group attributes.
        """
        # Get all available process groups
        all_pgs = {field.name for field in fields(cls)}

        # If no specific process groups requested, use all
        if required_pgs is None:
            required_pgs = list(all_pgs)

        # Validate requested process groups
        invalid_pgs = [pg for pg in required_pgs if pg not in all_pgs]
        if invalid_pgs:
            raise ValueError(f"Invalid process groups requested: {invalid_pgs}")

        # Mapping of attribute names to their initialization functions
        pg_to_func = {
            'tp': partial(parallel_state.get_tensor_model_parallel_group, check_initialized=False),
            'pp': partial(
                parallel_state.get_pipeline_model_parallel_group, check_initialized=False
            ),
            'mp': partial(parallel_state.get_model_parallel_group, check_initialized=False),
            'cp': partial(parallel_state.get_context_parallel_group, check_initialized=False),
            'tp_cp': partial(
                parallel_state.get_tensor_and_context_parallel_group, check_initialized=False
            ),
            'hcp': partial(
                parallel_state.get_hierarchical_context_parallel_groups, check_initialized=False
            ),
            'ep': partial(parallel_state.get_expert_model_parallel_group, check_initialized=False),
            'expt_tp': partial(
                parallel_state.get_expert_tensor_parallel_group, check_initialized=False
            ),
            'tp_ep': partial(
                parallel_state.get_expert_tensor_and_model_parallel_group, check_initialized=False
            ),
            'tp_ep_pp': partial(
                parallel_state.get_expert_tensor_model_pipeline_parallel_group,
                check_initialized=False,
            ),
            'embd': partial(parallel_state.get_embedding_group, check_initialized=False),
            'pos_embd': partial(
                parallel_state.get_position_embedding_group, check_initialized=False
            ),
            'dp': parallel_state.get_data_parallel_group,
            'dp_cp': partial(parallel_state.get_data_parallel_group, with_context_parallel=True),
            'intra_dp_cp': partial(
                parallel_state.get_data_parallel_group,
                with_context_parallel=True,
                partial_data_parallel=True,
            ),
            'intra_expt_dp': partial(
                parallel_state.get_expert_data_parallel_group,
                check_initialized=False,
                partial_expert_data_parallel=True,
            ),
            'inter_dist_opt': partial(
                parallel_state.get_inter_distributed_optimizer_instance_group,
                check_initialized=False,
            ),
            'intra_dist_opt': partial(
                parallel_state.get_intra_distributed_optimizer_instance_group,
                check_initialized=False,
            ),
            # TODO (Hepteract): remove this once distributed checkpoint is refactored
            'expt_dp': partial(
                parallel_state.get_expert_data_parallel_group, check_initialized=False
            ),
            'tp_dp_cp': partial(
                parallel_state.get_tensor_and_data_parallel_group,
                check_initialized=False,
                with_context_parallel=True,
            ),
        }

        assert all(
            pg in pg_to_func for pg in required_pgs
        ), f"Initialization function for process group not defined for all \
        ProcessGroupCollection fields"

        # Build initialization dict by calling appropriate parallel_state get_foo_group
        init_dict = {pg: pg_to_func[pg]() for pg in required_pgs}

        return cls(**init_dict)

    @staticmethod
    def setup_process_groups_for_optimizer(
        pg_collection: Optional['ProcessGroupCollection'],
        model_chunks: List,
        use_gloo_process_groups: bool = True,
    ):
        """
        Helper method to set up process groups for optimizer and DDP with proper validation
        and fallbacks.

        Args:
            pg_collection: Optional process group collection. If None, uses parallel_state groups.
            model_chunks: List of model chunks to extract configuration from.
            use_gloo_process_groups: Whether to set up gloo process groups.

        Returns:
            Dictionary containing all required process groups:
                - dp_group: Data parallel group
                - dp_cp_group: Data parallel with context parallel group
                - intra_dp_cp_group: Intra data parallel with context parallel group
                - expt_dp_group: Expert data parallel group
                - intra_expt_dp_group: Intra expert data parallel group
                - mp_group: Model parallel group
                - expt_tp_pp_group: Expert tensor-model-pipeline parallel group
                - inter_dist_opt_group: Inter distributed optimizer group (may be None)
                - intra_dist_opt_group: Intra distributed optimizer group (may be None)
                - intra_dp_cp_group_gloo: Gloo version of intra_dp_cp_group (may be None)
                - intra_expt_dp_group_gloo: Gloo version of intra_expt_dp_group (may be None)
        """
        from megatron.core import parallel_state
        from megatron.core.utils import get_model_config

        if pg_collection is None:
            # Use parallel_state groups
            dp_group = parallel_state.get_data_parallel_group(
                with_context_parallel=False, partial_data_parallel=False
            )
            dp_cp_group = parallel_state.get_data_parallel_group(
                with_context_parallel=True, partial_data_parallel=False
            )
            intra_dp_cp_group = parallel_state.get_data_parallel_group(
                with_context_parallel=True, partial_data_parallel=True
            )
            expt_dp_group = parallel_state.get_expert_data_parallel_group()
            intra_expt_dp_group = parallel_state.get_expert_data_parallel_group(
                partial_expert_data_parallel=True
            )
            intra_dist_opt_group = parallel_state.get_intra_distributed_optimizer_instance_group()

            # Gloo groups
            if use_gloo_process_groups:
                intra_dp_cp_group_gloo = parallel_state.get_data_parallel_group_gloo(
                    with_context_parallel=True, partial_data_parallel=True
                )
                intra_expt_dp_group_gloo = parallel_state.get_expert_data_parallel_group_gloo(
                    partial_expert_data_parallel=True
                )
            else:
                intra_dp_cp_group_gloo = None
                intra_expt_dp_group_gloo = None

            # Model communication groups
            mp_group = parallel_state.get_model_parallel_group()
            expt_tp_pp_group = parallel_state.get_expert_tensor_model_pipeline_parallel_group()

            # Inter distributed optimizer group
            if hasattr(model_chunks[0], 'ddp_config'):
                ddp_config = model_chunks[0].ddp_config
                if ddp_config.num_distributed_optimizer_instances > 1:
                    inter_dist_opt_group = (
                        parallel_state.get_inter_distributed_optimizer_instance_group()
                    )
                else:
                    inter_dist_opt_group = None
            else:
                inter_dist_opt_group = None

        else:
            # Use provided process group collection with validation and fallbacks

            # 1. dp group - this is always required
            if not hasattr(pg_collection, 'dp'):
                raise ValueError("dp process group is required but not provided in pg_collection")
            dp_group = pg_collection.dp

            # 2. dp_cp group: fallback logic based on context_parallel_size
            if hasattr(pg_collection, 'dp_cp'):
                dp_cp_group = pg_collection.dp_cp
            else:
                model_config = get_model_config(model_chunks[0])
                cp_size = getattr(model_config, 'context_parallel_size', 1)
                if cp_size == 1:
                    # If no context parallelism, dp_cp is same as dp
                    dp_cp_group = dp_group
                else:
                    raise ValueError(
                        "dp_cp process group is required when context_parallel_size > 1 "
                        "but not provided in pg_collection"
                    )

            # 3. Handle expert data parallel group
            if not hasattr(pg_collection, 'expt_dp'):
                raise ValueError(
                    "expt_dp process group is required but not provided in pg_collection. "
                    "Please explicitly set it to None if you don't need it."
                )
            expt_dp_group = pg_collection.expt_dp

            # 4. Handle intra_dp_cp, intra_expt_dp, and inter_dist_opt based on optimizer instances
            if hasattr(model_chunks[0], 'ddp_config'):
                ddp_config = model_chunks[0].ddp_config
                if ddp_config.num_distributed_optimizer_instances == 1:
                    # With a single optimizer instance:
                    # - intra_dp_cp is same as dp_cp
                    # - intra_expt_dp is same as expt_dp
                    # - inter_dist_opt is not needed (set to None)
                    intra_dp_cp_group = dp_cp_group
                    intra_expt_dp_group = expt_dp_group
                    inter_dist_opt_group = None
                else:
                    # With multiple optimizer instances, both groups must be provided
                    if not (
                        hasattr(pg_collection, 'intra_dp_cp')
                        and hasattr(pg_collection, 'intra_expt_dp')
                        and hasattr(pg_collection, 'inter_dist_opt')
                        and hasattr(pg_collection, 'intra_dist_opt')
                    ):
                        raise ValueError(
                            "intra_dp_cp, intra_expt_dp, inter_dist_opt, and intra_dist_opt "
                            "process groups are required when using multiple optimizer "
                            "instances (>1) but not provided in pg_collection"
                        )
                    intra_dp_cp_group = pg_collection.intra_dp_cp
                    intra_expt_dp_group = pg_collection.intra_expt_dp
                    inter_dist_opt_group = pg_collection.inter_dist_opt

                if ddp_config.use_distributed_optimizer:
                    if not hasattr(pg_collection, 'intra_dist_opt'):
                        raise ValueError(
                            "intra_dist_opt process group is required but not provided in "
                            "pg_collection. Please explicitly set it to None if you don't need it."
                        )
                    intra_dist_opt_group = pg_collection.intra_dist_opt
                else:
                    intra_dist_opt_group = None
            else:
                # No ddp_config available - use simple fallback
                intra_dp_cp_group = dp_cp_group
                intra_expt_dp_group = expt_dp_group
                inter_dist_opt_group = None
                intra_dist_opt_group = None

            # 5. Model communication groups
            if not hasattr(pg_collection, 'mp'):
                raise ValueError(
                    "mp process group is required but not provided in pg_collection. "
                    "Please explicitly set it to None if you don't need it."
                )
            mp_group = pg_collection.mp

            # Expert tensor-model-pipeline group for MoE
            if not hasattr(pg_collection, 'tp_ep_pp'):
                raise ValueError(
                    "tp_ep_pp process group is required but not provided in pg_collection. "
                    "Please explicitly set it to None if you don't need it."
                )
            expt_tp_pp_group = pg_collection.tp_ep_pp

            # Gloo groups - not supported when pg_collection is provided
            if use_gloo_process_groups:
                raise ValueError(
                    "Gloo process groups are not supported when pg_collection is "
                    "provided. Please set use_gloo_process_groups to False."
                )
            intra_dp_cp_group_gloo = None
            intra_expt_dp_group_gloo = None

        return {
            'dp_group': dp_group,
            'dp_cp_group': dp_cp_group,
            'intra_dp_cp_group': intra_dp_cp_group,
            'expt_dp_group': expt_dp_group,
            'intra_expt_dp_group': intra_expt_dp_group,
            'mp_group': mp_group,
            'expt_tp_pp_group': expt_tp_pp_group,
            'inter_dist_opt_group': inter_dist_opt_group,
            'intra_dist_opt_group': intra_dist_opt_group,
            'intra_dp_cp_group_gloo': intra_dp_cp_group_gloo,
            'intra_expt_dp_group_gloo': intra_expt_dp_group_gloo,
        }

    @staticmethod
    def setup_process_groups_for_ddp(
        pg_collection: Optional['ProcessGroupCollection'], config, ddp_config
    ):
        """
        Helper method to set up process groups for DDP with proper validation and fallbacks.

        Args:
            pg_collection: Optional process group collection. If None, uses parallel_state groups.
            config: Model config to extract context_parallel_size from.
            ddp_config: DDP config to extract num_distributed_optimizer_instances from.

        Returns:
            Dictionary containing all required process groups for DDP.
        """
        import logging

        import torch

        from megatron.core import parallel_state
        from megatron.core.utils import log_single_rank

        logger = logging.getLogger(__name__)

        if pg_collection is None:
            # Use parallel_state groups
            return {
                'dp_group': parallel_state.get_data_parallel_group(
                    with_context_parallel=False, partial_data_parallel=False
                ),
                'dp_cp_group': parallel_state.get_data_parallel_group(
                    with_context_parallel=True, partial_data_parallel=False
                ),
                'intra_dp_cp_group': parallel_state.get_data_parallel_group(
                    with_context_parallel=True, partial_data_parallel=True
                ),
                'expt_dp_group': parallel_state.get_expert_data_parallel_group(),
                'intra_expt_dp_group': parallel_state.get_expert_data_parallel_group(
                    partial_expert_data_parallel=True
                ),
                'tp_group': parallel_state.get_tensor_model_parallel_group(),
                'pp_group': parallel_state.get_pipeline_model_parallel_group(),
                'ep_group': parallel_state.get_expert_model_parallel_group(),
                'inter_dist_opt_group': (
                    parallel_state.get_inter_distributed_optimizer_instance_group()
                    if ddp_config.num_distributed_optimizer_instances > 1
                    else None
                ),
                'intra_dist_opt_group': (
                    parallel_state.get_intra_distributed_optimizer_instance_group()
                    if ddp_config.use_distributed_optimizer
                    else None
                ),
            }
        else:
            # Use provided process group collection with validation and fallbacks
            result = {}

            # 1. dp group - this is always required
            if not hasattr(pg_collection, 'dp'):
                raise ValueError("dp process group is required but not provided in pg_collection")
            result['dp_group'] = pg_collection.dp

            # 2. dp_cp group: fallback logic based on context_parallel_size
            if hasattr(pg_collection, 'dp_cp'):
                result['dp_cp_group'] = pg_collection.dp_cp
            else:
                cp_size = getattr(config, 'context_parallel_size', 1)
                if cp_size == 1:
                    # If no context parallelism, dp_cp is same as dp
                    result['dp_cp_group'] = result['dp_group']
                else:
                    raise ValueError(
                        "dp_cp process group is required when context_parallel_size > 1 "
                        "but not provided in pg_collection"
                    )

            # 3. Handle expert data parallel group (DDP-specific: create if missing)
            if hasattr(pg_collection, 'expt_dp') and pg_collection.expt_dp is not None:
                result['expt_dp_group'] = pg_collection.expt_dp
            else:
                # Create a new group with just the current rank for DDP
                log_single_rank(
                    logger,
                    logging.WARNING,
                    "No expert data parallel group provided in pg_collection, "
                    "creating a new one with just the current rank",
                )
                result['expt_dp_group'] = torch.distributed.new_group(
                    ranks=[torch.distributed.get_rank()]
                )

            # 4. Handle intra groups based on optimizer instances
            if ddp_config.num_distributed_optimizer_instances == 1:
                result['intra_dp_cp_group'] = result['dp_cp_group']
                result['intra_expt_dp_group'] = result['expt_dp_group']
                result['inter_dist_opt_group'] = None
            else:
                # With multiple optimizer instances, groups must be provided
                if not (
                    hasattr(pg_collection, 'intra_dp_cp')
                    and hasattr(pg_collection, 'intra_expt_dp')
                    and hasattr(pg_collection, 'inter_dist_opt')
                ):
                    raise ValueError(
                        "intra_dp_cp, intra_expt_dp, and inter_dist_opt "
                        "process groups are required when using multiple optimizer "
                        "instances (>1) but not provided in pg_collection"
                    )
                result['intra_dp_cp_group'] = pg_collection.intra_dp_cp
                result['intra_expt_dp_group'] = pg_collection.intra_expt_dp
                result['inter_dist_opt_group'] = pg_collection.inter_dist_opt

            # 5. Model parallel groups (DDP-specific: tp, pp, ep instead of mp, expt_tp_pp)
            if not all(
                [
                    hasattr(pg_collection, 'tp'),
                    hasattr(pg_collection, 'pp'),
                    hasattr(pg_collection, 'ep'),
                ]
            ):
                raise ValueError(
                    "tp, pp and ep process groups are required but not provided in pg_collection"
                )
            result['tp_group'] = pg_collection.tp
            result['pp_group'] = pg_collection.pp
            result['ep_group'] = pg_collection.ep

            return result


@dataclass
class MultiModuleProcessGroupCollection:
    """Process group collection for multi-module pipelines.

    Used when a rank participates in multiple modules (e.g., colocated encoder + LLM).
    The language_model_module_name identifies which module is the language model (used for
    CP size extraction, loss computation, and other LLM-specific operations).

    Attributes:
        module_pgs: Dict mapping module names to ProcessGroupCollection objects
        language_model_module_name: Key identifying the language model module
            (None if no LLM on this rank)

    Example:
        # Colocated rank with encoder and LLM
        pg_collection = MultiModuleProcessGroupCollection(
            module_pgs={"encoder": encoder_pg, "llm": llm_pg},
            language_model_module_name="llm"
        )

        # Rank with dual encoders (no LLM)
        pg_collection = MultiModuleProcessGroupCollection(
            module_pgs={"encoder_1": encoder_1_pg, "encoder_2": encoder_2_pg},
            language_model_module_name=None
        )

        # Single module (can also use ProcessGroupCollection directly)
        pg_collection = MultiModuleProcessGroupCollection(
            module_pgs={"llm": llm_pg},
            language_model_module_name="llm"
        )

        # Usage
        cp_size = pg_collection.get_language_model_cp_size()
        encoder_pg = pg_collection["encoder_1"]  # Dict-like access
        has_llm = pg_collection.has_language_model()
    """

    module_pgs: Dict[str, ProcessGroupCollection]
    language_model_module_name: Optional[str] = None

    def __post_init__(self):
        if not self.module_pgs:
            raise ValueError("module_pgs dict cannot be empty")
        if self.language_model_module_name is not None:
            if self.language_model_module_name not in self.module_pgs:
                raise ValueError(
                    f"language_model_module_name '{self.language_model_module_name}' not found in "
                    f"module_pgs keys: {list(self.module_pgs.keys())}"
                )

    def get_language_model_collection(self) -> ProcessGroupCollection:
        """Get the language model's process group collection.

        Returns:
            ProcessGroupCollection for the language model.

        Raises:
            ValueError: If no language model is specified for this collection.
        """
        if self.language_model_module_name is None:
            raise ValueError("No language model specified for this collection")
        return self.module_pgs[self.language_model_module_name]

    def get_language_model_cp_size(self) -> int:
        """Get context parallel size for the language model.

        Returns:
            Context parallel size for the language model.

        Raises:
            ValueError: If no language model is specified for this collection.
        """
        return self.get_language_model_collection().cp.size()

    def has_language_model(self) -> bool:
        """Check if this rank has a language model.

        Returns:
            True if this rank has a language model, False otherwise.
        """
        return self.language_model_module_name is not None

    def get_module_collection(self, module_name: str) -> ProcessGroupCollection:
        """Get process group collection for a specific module.

        Args:
            module_name: Name of the module.

        Returns:
            ProcessGroupCollection for the specified module.

        Raises:
            ValueError: If module_name is not found in collections.
        """
        if module_name not in self.module_pgs:
            raise ValueError(
                f"Module '{module_name}' not found in collections. "
                f"Available: {list(self.module_pgs.keys())}"
            )
        return self.module_pgs[module_name]

    def __len__(self):
        """Return the number of modules in this wrapper."""
        return len(self.module_pgs)

    def __getitem__(self, module_name: str):
        """Get process group collection for a module using dict-like access."""
        return self.module_pgs[module_name]

    def __iter__(self):
        """Iterate over all process group collections."""
        return iter(self.module_pgs.values())

    def keys(self):
        """Return module names."""
        return self.module_pgs.keys()

    def values(self):
        """Return process group collections."""
        return self.module_pgs.values()

    def items(self):
        """Return (module_name, collection) pairs."""
        return self.module_pgs.items()

    def __repr__(self):
        """Return a concise representation showing modules and their language model status."""
        modules_str = ', '.join(self.module_pgs.keys())
        lm_str = (
            f", language_model_module_name='{self.language_model_module_name}'"
            if self.language_model_module_name
            else ""
        )
        return f"MultiModuleProcessGroupCollection(modules=[{modules_str}]{lm_str})"


================================================
FILE: megatron/core/quantization/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/core/quantization/quant_config.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""
Provide base functionality for quantization purposes.

Usage comes from a user-provide YAML file, for example:

configs:
  nvfp4:
    $payload1
  mxfp8:
    $payload2

matchers:
  fc1:
    config: "nvfp4"
    type: "glob"
    pattern: "*fc1*"
    enabled: True
  fc2:
    config: "nvfp4"
    type: "glob"
    pattern: "*fc2*"
    enabled: True
  default:
    config: "mxfp8"
    type: "glob"
    pattern: "*"
    enabled: True

The user-passed configuration is split into 2 distinct pieces:
 * A set of quantization configs, describing *how* a given operator will be quantized
   Note: This is consumed by the operator(s), and the particular operators being instantiated
     are responsible for parsing this configuration if they support configurable quantization.
 * An ordered collection of matchers that determine what quantization config (if any) is
   applied to a given operator. The first matcher in the collection that successfully matches
   the context determines the key from the configs dict. If a matcher doesn't match, the rest
   of the matchers in the list are tested against.
   Matchers define a type, or style of matching - "glob" is bash-style, but this
   can be extended by inheriting from the abstract Matcher class to define a new match type.

The idea here is to provide an ability to define arbitrarily-complicated recipes in as
friendly a way as possible.
"""

import fnmatch
import logging
from abc import ABC, abstractmethod
from copy import deepcopy
from dataclasses import dataclass
from typing import Dict, List, Optional

from megatron.core.utils import log_single_rank

logger = logging.getLogger(__name__)

try:
    import yaml

    HAVE_YAML = True
except ImportError:
    HAVE_YAML = False


@dataclass
class MatchContext:
    """Layer context that can be matched to a quantization config."""

    module_path: str
    layer_number: Optional[int]


class QuantizationConfig:
    """Wrapper around configuration dictionary for layer's numerics."""

    def __init__(self, config: dict, match_input: MatchContext, config_key: str):
        """
        Initialize the quantization config.

        The configuration dictionary is copied to defend against modules that
        mutate the configuration corrupting the configuration of other modules.
        """
        self.config = deepcopy(config)
        self.match_input = match_input
        self.config_key = config_key

    def __repr__(self) -> str:
        return (
            f"{type(self).__name__}(config={self.config}, "
            f"match_input={self.match_input}, config_key={self.config_key})"
        )


class Matcher(ABC):
    """Matcher interface to select layers."""

    @abstractmethod
    def match(self, context: MatchContext) -> Optional[str]:
        """
        Match a layer based on its qualified name.

        If it does not match, return None. If it matches,
        return the configuration key to select for the layer.
        """
        return None


class GlobMatcher(Matcher):
    """Pattern based matcher using fnmatch to compare the module path against a pattern.
    fnmatch supplies glob-style matching similar to that used in bash, allowing for matches like:

    match_str="*fc2*" - match anything which includes "fc2" anywhere in the string.
    match_str="*fc2" - match anything which includes "fc2" at the end of the string.
    match_str="*layers.10*" - match anything with "layers.10" (layer #) in the string.
    """

    def __init__(self, pattern: str, config_key: str):
        self.pattern = pattern
        self.config_key = config_key

    def match(self, context: MatchContext) -> Optional[str]:
        """Pattern based match."""
        if fnmatch.fnmatch(context.module_path, self.pattern):
            return self.config_key
        return None

    def __repr__(self) -> str:
        return f"{type(self).__name__}(pattern={self.pattern}, config_key={self.config_key})"


class RecipeConfig:
    """Hold recipe information (matcher_fn) -> Configs)"""

    def __init__(self, matchers: List[Matcher], config_dict: Dict[str, Dict]):
        self.configs = config_dict
        self.matchers = matchers

    @staticmethod
    def _build_matchers(matchers_dict: Dict | None) -> List[Matcher]:
        # NOTE(slayton): We rely on order for matchers because it allows us to specify an
        # override ordering from the yaml structure. Process matchers in order of
        # definition, so we can have fallthrus.
        matchers: List[Matcher] = []
        if matchers_dict is None:
            return matchers

        for name, matcher in matchers_dict.items():
            enabled = matcher.get("enabled", False)

            if not enabled:
                continue

            match_type = matcher.get("type", None)
            assert match_type is not None, f'Matcher must specify a "type" field'

            if match_type == "glob":
                pattern = matcher.get("pattern", None)
                config = matcher.get("config", None)

                assert pattern is not None, f'GlobMatcher must specify "pattern" field'
                assert config is not None, f'GlobMatcher must specify "config" field'

                m = GlobMatcher(pattern, config)
            else:
                raise NotImplementedError(f"Match type '{match_type}' not implemented")

            matchers.append(m)

        return matchers

    @staticmethod
    def from_yaml_file(recipe_yaml_path: str) -> "RecipeConfig":
        """Loads recipe from yaml configuration."""

        if not HAVE_YAML:
            raise ImportError("yaml is not installed. Please install it with `pip install pyyaml`.")

        with open(recipe_yaml_path, "r") as f:
            config = yaml.load(f, Loader=yaml.SafeLoader)

        log_single_rank(
            logger,
            logging.INFO,
            f"Loaded quantization recipe from path '{recipe_yaml_path}'. " f"Contents: '{config}'",
        )

        return RecipeConfig.from_config_dict(config)

    @staticmethod
    def from_config_dict(config: Dict) -> "RecipeConfig":
        """Loads recipe from dict configuration."""

        matchers_config = config.get("matchers", None)
        matchers = RecipeConfig._build_matchers(matchers_config)
        config_dict = config.get("configs", {})

        return RecipeConfig(matchers, config_dict)

    def match_to_config_key(self, operator_context: MatchContext) -> str | None:
        """
        Gives an operator's context, return a configuration key if
        necessary, or sentinel (None) denoting no matchers matched.
        """
        for matcher in self.matchers:
            config_key = matcher.match(operator_context)
            if config_key is not None:
                log_single_rank(
                    logger,
                    logging.INFO,
                    f'Context ({operator_context}) matched to quant config "{config_key}"',
                )
                return config_key
        log_single_rank(
            logger, logging.INFO, f"No config key match found for Context ({operator_context})"
        )
        return None

    def match(self, operator_context: MatchContext) -> QuantizationConfig | None:
        """
        Gives an operator's context, return a QuantizationConfig if
        necessary, or sentinel (None) denoting no matchers matched.
        """
        config_key = self.match_to_config_key(operator_context)
        if config_key is not None:
            return QuantizationConfig(
                self.configs[config_key], match_input=operator_context, config_key=config_key
            )
        return None

    def __repr__(self) -> str:
        s = f"{type(self).__name__}(\n"
        for matcher in self.matchers:
            s += f"  matcher({repr(matcher)}\n"
        s += ")"
        return s


================================================
FILE: megatron/core/quantization/utils.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import re
from typing import Optional, Union

from .quant_config import GlobMatcher, MatchContext, QuantizationConfig, RecipeConfig


def get_quant_config_or_none(
    module_path: str, recipe: Optional[RecipeConfig] = None
) -> Union[QuantizationConfig, None]:
    """Resolve quantization config for a layer."""
    if recipe is None:
        return None
    re_match = re.search(r'layers\.(\d+)', module_path)
    if re_match:
        layer_number: Optional[int] = int(re_match.group(1))
    else:
        layer_number = None
    return recipe.match(MatchContext(module_path=module_path, layer_number=layer_number))


def load_quantization_recipe(recipe_path: str) -> RecipeConfig:
    """Loads a quantization recipe from a path."""
    recipe = RecipeConfig.from_yaml_file(recipe_path)
    return recipe


def kitchen_quantization_recipe_config(recipe_idx: int) -> RecipeConfig:
    """Loads a quantization recipe that uses a QAT_PARAMS recipe for all layers."""
    recipe = RecipeConfig(
        matchers=[GlobMatcher(pattern="*", config_key="default")],
        config_dict={"default": {"kitchen_config_type": "QLinearParams", "recipe_idx": recipe_idx}},
    )
    return recipe


================================================
FILE: megatron/core/requirements.txt
================================================
torch
packaging


================================================
FILE: megatron/core/rerun_state_machine.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import datetime
import inspect
import logging
import math
import os
import random
import re
from collections import defaultdict
from enum import Enum
from typing import Any, Callable, Iterable, List, NamedTuple, Optional, Set, Tuple, Union

import numpy as np
import torch

from megatron.core._rank_utils import log_single_rank, safe_get_rank
from megatron.core.dist_checkpointing.mapping import ShardedObject
from megatron.core.typed_torch import copy_signature

"""DISCLAIMER: THIS IS AN EXPERIMENTAL FEATURE.

The rerun state machine implementation in this file is alpha-level code to help
with attribution of unexpected results (e.g. NaN, spiky loss, etc.). This code
has not been tested at scale so should not be assumed to be accurate. Nodes
flagged by this code as potentially faulty should be subjected to standard
diagnostic test suites for a definitive diagnosis.

Also note that experimental features may break existing APIs.
"""

logger = logging.getLogger(__name__)

_GLOBAL_RERUN_STATE_MACHINE: Optional["RerunStateMachine"] = None

# Exit code returned when job needs to be restarted to disambiguate the results.
EXIT_CODE_RESUME_TO_DISAMBIGUATE: int = 16

# Exit code returned when job failed on result validation.
EXIT_CODE_FAILED_ON_RESULT_VALIDATION: int = 17

SerializableStateType = Union[list, dict]
DataIteratorArgType = Optional[Union["RerunDataIterator", list["RerunDataIterator"]]]


class Caller(NamedTuple):
    """Class capturing where validate_result() is called from."""

    message: str
    rank: int


class Call(NamedTuple):
    """Class capturing a function call."""

    caller: Caller
    sequence: int


class RerunDiagnostic(str, Enum):
    """Enum representing the different diagnostic attributions.

    CORRECT_RESULT: the result was the expected result given the input.
    TRANSIENT_ERROR: the result could not be reproduced on the same GPU.
    PERSISTENT_ERROR: the result could be reproduced on the same GPU, but
        not on a different GPU.
    """

    CORRECT_RESULT = "correct_result"
    TRANSIENT_ERROR = "transient_error"
    PERSISTENT_ERROR = "persistent_error"


class RerunMode(str, Enum):
    """Enum representing the different run mode for the rerun state machine."""

    DISABLED = "disabled"
    VALIDATE_RESULTS = "validate_results"
    REPORT_DETERMINISM_STATS = "report_determinism_stats"


class RerunState(Enum):
    """Enum representing the different states of the rerun state machine.

    Description of states (would benefit from a diagram):
    - NOT_RUNNING_YET
        State before the should_rerun_forward_and_backward while loop has been entered (and
        not restarting from a checkpoint for a 2nd re-run), and after it has been successfully
        completed (all validation succeeded).
    - INITIAL_RUN
        State during the initial run of the should_rerun_forward_and_backward while loop.
    - RERUNNING_IN_PLACE
        State during the second run of the should_rerun_forward_and_backward (1+ validation has
        failed).
    - WILL_RERUN_FROM_CHECKPOINT
        State after the should_rerun_forward_and_backward while loop has exited (on initial job run)
        and before the while loop has been entered (on the second job run restarted from the
        checkpoint) when the 1st re-run yielded the same result than on the initial run.
    - RERUNNING_FROM_CHECKPOINT
        State during first (and only) run of the should_rerun_forward_and_backward while loop when
        the job was restarted from a checkpoint.
    - RERUNNING_AGAIN_FROM_CHECKPOINT
        State when the re-run from checkpoint was rescheduled on the same potentially faulty GPU.
    """

    NOT_RUNNING_YET = 0
    INITIAL_RUN = 1
    RERUNNING_IN_PLACE = 2
    WILL_RERUN_FROM_CHECKPOINT = 3
    RERUNNING_FROM_CHECKPOINT = 4
    RERUNNING_AGAIN_FROM_CHECKPOINT = 5


class RerunValidationStatus(str, Enum):
    """Enum representing the status of a record in the tracker log file"""

    RERUN_DISABLED = "rerun_disabled"
    INITIAL_RUN = "initial_run"
    FIRST_RERUN_NOT_REPRODUCIBLE = "first_rerun_not_reproducible"
    FIRST_RERUN_REPRODUCIBLE = "first_rerun_reproducible"
    SECOND_RERUN_NOT_REPRODUCIBLE = "second_rerun_not_reproducible"
    SECOND_RERUN_REPRODUCIBLE = "second_rerun_reproducible"


COMPARISON_MATCH: float = 0.0
COMPARISON_MISMATCH: float = math.inf


class RerunStateMachine:
    """Class implementing the re-run state machine used to validate calculations.

    This class is a singleton and should not be instantiated directly. The instance
    should be initialized by calling the initialize_rerun_state_machine() helper function instead.

    Args:
        state_save_func: optional function to save any additional state that needs
                    to be restore to rerun the iteration.
        state_restore_func: optional function to restore the state saved by state_save_func.
        mode: operating mode for the rerun state machine, default is disabled.
        error_injector: optional result injection engine, default is no result injection.
        result_rejected_tracker_filename: optional name of file tracking `result rejected` events.

    Example usage:

        def state_save_func():
            # save any custom state that may change during the
            # forward-backward pass and that needs to be saved/restored
            # when re-running the iteration (Python/NumPy/Pytorch/CUDA
            # RNG states already taken care of)
            return {
                'mystate': get_state(...)
            }

        def state_restore_func(state_dict):
            restore_state(state_dict['mystate'])

        initialize_rerun_state_machine(
            state_save_func=state_save_func,
            state_restore_func=state_restore_func,
            error_injector=RerunErrorInjector(
                error_injection_rate=100000,
                error_injection_type=RerunDiagnostic.TRANSIENT_ERROR,
            ),
        )

    To use the rerun state machine, the training code needs to be modified as described in the
    documentation for each of the public methods.

    Caveats and assumptions:
    1) A core assumption of the rerun state machine is that execution (flow control) of the
    iteration is deterministic w.r.t. the state captured by the rerun state (_save_state() and
    _restore_state() methods below). More specifically, the requirement is that a re-run of the
    iteration yields the same calls to validate_results() as in the initial run.
    On the other hand, computations are NOT required to be deterministic, i.e. results may vary
    slightly across re-runs of the iteration.

    2) The re-run logic is currently only able to re-run the current step. It may be that an
    unexpected result (e.g. spiky loss) is the result of a calculation that happened at a previous
    iteration. The current implementation will not catch such issues. We're planning to add the
    capability to re-run multiple steps in a future implementation.
    """

    REPORTING_INTERVAL_ITERATIONS: int = 2

    def __init__(
        self,
        state_save_func: Optional[Callable[[], SerializableStateType]] = None,
        state_restore_func: Optional[Callable[[SerializableStateType], None]] = None,
        mode: RerunMode = RerunMode.DISABLED,
        error_injector: Optional["RerunErrorInjector"] = None,
        result_rejected_tracker_filename: Optional[str] = None,
    ) -> None:
        self.mode: RerunMode = mode
        self.state: RerunState = RerunState.NOT_RUNNING_YET
        self.current_iteration: int = -1
        self.first_iteration_complete = False
        # The flags below are per-rank flags that get all-reduced across all ranks
        # request to rerun iteration  because validation failed (1st re-run).
        self.rerun_requested: bool = False
        # Request to checkpoint to re-run iteration on different GPU (2nd re-run).
        self.checkpoint_requested: bool = False
        # Request to restart job again from checkpoint because got the same GPU (3rd+ re-run).
        self.restart_again_requested: bool = False
        # Request to resume normal execution when no HW fault was detected.
        self.continue_requested: bool = False
        self.logged_sdc_enabled: bool = False

        self.error_injector: RerunErrorInjector = error_injector or RerunErrorInjector()
        self.validation_counts: dict[Caller, int] = defaultdict(int)
        self.failed_validation_call: Optional[Call] = None
        self.initial_result: Any = None
        self.suspicious_node: str = None
        self.suspicious_device: int = None

        # Keep track of `result_rejected` events.
        # Make sure the file can be written to and abort if not.
        self.result_rejected_tracker_filename = result_rejected_tracker_filename
        if self.result_rejected_tracker_filename is not None:
            try:
                with open(self.result_rejected_tracker_filename, "a"):
                    pass
            except Exception as e:
                raise RuntimeError(
                    f"RerunStateMachine result validation log cannot be appended to! ({e})"
                )

        self.saved_state: Optional[SerializableStateType] = None
        self.state_save_func: Optional[Callable[[], SerializableStateType]] = state_save_func
        self.state_restore_func: Optional[Callable[[SerializableStateType], None]] = (
            state_restore_func
        )
        self.data_iterator_checkpoints: Optional[list[SerializableStateType]] = None

        self.large_value_counts: dict[str, int] = {}
        self.max_values: dict[str, float] = {}

        self.saved_results: dict[Call, Any] = {}
        self.stats: dict[Caller, QuickStats] = defaultdict(lambda: QuickStats())
        log_single_rank(logger, logging.WARNING, f"RerunStateMachine initialized in mode {mode}")

    def set_mode(self, mode: RerunMode) -> None:
        """Method to set the operating mode"""

        log_single_rank(logger, logging.WARNING, f"Setting RerunStateMachine mode {mode}")
        self.mode = mode

    def get_mode(self) -> RerunMode:
        """Method to get the operating mode"""

        return self.mode

    def _reduce_any(self, value: Union[bool, List[bool]]) -> Union[bool, Tuple[bool, ...]]:
        """
        All-reduce a boolean value (or multiple boolean values) across the world group.

        If any of the ranks have a True value, return True.
        If all the ranks have a False value, return False.

        For multiple inputs, returns a tuple.
        """
        if isinstance(value, list):
            val_tensor: torch.Tensor = torch.tensor(value, dtype=torch.int32, device='cuda')
            torch.distributed.all_reduce(val_tensor)
            return tuple([x > 0 for x in val_tensor.tolist()])
        else:
            val_tensor: torch.Tensor = torch.tensor([value], dtype=torch.int32, device='cuda')
            torch.distributed.all_reduce(val_tensor)
            return val_tensor.item() > 0

    def should_run_forward_backward(self, data_iterator: DataIteratorArgType) -> bool:
        """Method instructing whether to (re)run the forward-backward pass.

        Args:
            data_iterator: data iterator or list of data iterators used in this step,
                or None if no data iterator
        Returns:
            A boolean telling whether the forward-backward pass should be (re)run.

        Example usage:

            def train_step(data_iterator, ...):
                rerun_state_machine = get_rerun_state_machine()
                while rerun_state_machine.should_rerun_forward_and_backward(data_iterator):
                    optimizer.zero_grad()
                    data = next(data)
                    outputs = model(data)
                    loss = loss_fn(outputs)
                    loss.backward()
                ...
                optimizer.step()
        """

        self.validation_counts = defaultdict(int)

        data_iterators: list[RerunDataIterator] = self._sanitize_data_iterators(data_iterator)

        # Are we about to start the initial run?
        if self.state == RerunState.NOT_RUNNING_YET:
            if self.mode == RerunMode.DISABLED:
                self.state = RerunState.INITIAL_RUN
                self.current_iteration += 1  # Increment self.current_iteration for reporting.
                return True
            if self.data_iterator_checkpoints is not None:
                assert len(self.data_iterator_checkpoints) == len(
                    data_iterators
                ), "data iterator has different length than checkpointed data iterator"
                for i, d in enumerate(data_iterators):
                    d.load_state_dict(self.data_iterator_checkpoints[i])
                self.data_iterator_checkpoints = None
            self._save_state()
            if data_iterators:
                for d in data_iterators:
                    d.advance()
            self.rerun_requested = False
            self.checkpoint_requested = False
            self.restart_again_requested = False
            self.continue_requested = False
            self.injected_result = None
            self.current_iteration += 1
            self.state = RerunState.INITIAL_RUN
            return True
        # Are we done with the initial run?
        elif self.state == RerunState.INITIAL_RUN:
            if self.mode == RerunMode.DISABLED:
                self.state = RerunState.NOT_RUNNING_YET
                return False
            will_rerun = self._reduce_any(self.rerun_requested)
            if not will_rerun:
                self.state = RerunState.NOT_RUNNING_YET
                return False
            if self.mode == RerunMode.VALIDATE_RESULTS and safe_get_rank() == 0:
                logger.warning("Need to rerun step to check reproducibility of initial result")
            self.state = RerunState.RERUNNING_IN_PLACE
            self._restore_state()
            if data_iterators:
                for d in data_iterators:
                    d.rewind()
            return True
        # Are we done with the 1st re-run?
        elif self.state == RerunState.RERUNNING_IN_PLACE:
            # If we are reporting stats rather than validating results, we just continue with
            # normal execution after re-running the step once to compare results.
            if self.mode == RerunMode.REPORT_DETERMINISM_STATS:
                self.state = RerunState.NOT_RUNNING_YET
                self._maybe_report_stats()
                self.saved_results = defaultdict(list)
                return False
            # N.B. We may be able to rely on the behavior of the state machine
            # to produce an equivalent value of self.continue_requested across
            # ranks, since it depends on "fatal". That logic coupling seems
            # brittle though.
            will_continue, will_checkpoint = self._reduce_any(
                [self.continue_requested, self.checkpoint_requested]
            )
            if will_continue:
                log_single_rank(
                    logger,
                    logging.WARNING,
                    "Continuing normal execution because failed validation was not fatal",
                )
                self.state = RerunState.NOT_RUNNING_YET
                return False
            if will_checkpoint:
                self.state = RerunState.WILL_RERUN_FROM_CHECKPOINT
            self._restore_state()
            if data_iterators:
                for d in data_iterators:
                    d.rewind()
            return False
        # Are we about to re-run from a checkpoint?
        elif self.state == RerunState.WILL_RERUN_FROM_CHECKPOINT:
            self.state = RerunState.RERUNNING_FROM_CHECKPOINT
            return True
        # Are we done re-running from a checkpoint?
        elif self.state == RerunState.RERUNNING_FROM_CHECKPOINT:
            will_restart_again, will_continue = self._reduce_any(
                [self.restart_again_requested, self.continue_requested]
            )
            if will_restart_again:
                log_single_rank(
                    logger,
                    logging.WARNING,
                    "Need to restart job from the same checkpoint "
                    "because it was scheduled on the same node/GPU",
                )
                self.state = RerunState.RERUNNING_AGAIN_FROM_CHECKPOINT
            else:
                if will_continue:
                    log_single_rank(
                        logger,
                        logging.WARNING,
                        "Continuing normal execution because failed validation was not fatal",
                    )
                    self.state = RerunState.NOT_RUNNING_YET
            return False
        raise RuntimeError("Should not be here")

    def should_checkpoint_and_exit(self) -> Tuple[bool, bool, int]:
        """Method instructing whether to checkpoint and/or abort the job.

        Args:
            None
        Returns:
            A tuple formed of:
            - a boolean telling whether a checkpoint should be taken.
            - a boolean telling whether the job should be aborted.
            - an exit code (int) to return if aborting (0 if not aborting).

        Example usage:

            def train_step(data_iterator, ...):
                rerun_state_machine = get_rerun_state_machine()
                while rerun_state_machine.should_rerun_forward_and_backward(data_iterator):
                    ...
                should_checkpoint, should_exit, exit_code = (
                    rerun_state_machine.should_checkpoint_and_exit()
                )
                if should_checkpoint:
                    save_checkpoint()
                if should_exit:
                    sys.exit(exit_code)
                optimizer.step()
        """

        self.first_iteration_complete = True
        if self.mode in [RerunMode.DISABLED, RerunMode.REPORT_DETERMINISM_STATS]:
            return False, False, 0
        if self.state == RerunState.RERUNNING_IN_PLACE:
            log_single_rank(
                logger,
                logging.WARNING,
                "Exiting now. A checkpoint at the last iteration is being saved "
                "if further examination is needed",
            )
            return True, True, EXIT_CODE_FAILED_ON_RESULT_VALIDATION
        elif self.state == RerunState.WILL_RERUN_FROM_CHECKPOINT:
            log_single_rank(
                logger,
                logging.WARNING,
                "Saving a checkpoint and exiting now. Please resume the job "
                "from the checkpoint to rerun the last iteration "
                "and establish a diagnostic",
            )
            return True, True, EXIT_CODE_RESUME_TO_DISAMBIGUATE
        elif self.state == RerunState.RERUNNING_FROM_CHECKPOINT:
            log_single_rank(
                logger,
                logging.WARNING,
                "Exiting now. A checkpoint at the last iteration already exists "
                "if further examination is needed",
            )
            return False, True, EXIT_CODE_FAILED_ON_RESULT_VALIDATION
        elif self.state == RerunState.RERUNNING_AGAIN_FROM_CHECKPOINT:
            log_single_rank(
                logger,
                logging.WARNING,
                "Exiting now. Please resume the job from the same checkpoint "
                "to rerun the last iteration and establish a diagnostic",
            )
            return False, True, EXIT_CODE_RESUME_TO_DISAMBIGUATE
        return False, False, 0

    def validate_result(
        self,
        result: Any,
        rejection_func: Callable[[Any], bool],
        message: str,
        comparison_func: Optional[Callable[[Any, Any], float]] = None,
        tolerance: float = 0.0,
        fatal: bool = True,
    ) -> None:
        """This method verifies a result and possibly triggers a re-run.

        Args:
            result: result to verify.
            rejection_func: function taking a result as input and returning whether the result fails
                validation (e.g. torch.isnan, returns True if result is NaN).
            message: message describing the validation test (e.g. "spiky loss").
            comparison_func: optional function used to compare the results of the original run and
                of a rerun. It should return a float representing the relative difference between
                the 2. The default implementation is for 0-dim float tensors.
            tolerance: tolerance used in combination with comparison_func to determine
                reproducibility of results. Default is no tolerance (deterministic calculations).
            fatal: whether to abort the job when fault attribution is complete
                (transient/permanent/not HW)
        Returns:
            None

        Example usage:

            def train_step(data_iterator, ...):
                rerun_state_machine = get_rerun_state_machine()
                while rerun_state_machine.should_rerun_forward_and_backward(data_iterator):
                    optimizer.zero_grad()
                    data = next(data)
                    outputs = model(data)
                    loss = loss_fn(outputs)
                    rerun_state_machine.validate_result(
                        result=loss,
                        rejection_func=torch.is_nan,    # rejects result if NaN
                        message="loss is NaN",
                        tolerance=0.001,    # max 0.1% difference in results due to non-determinism
                        fatal=True,         # abort job if validation fails
                    )
                    loss.backward()

        We establish the diagnostic using this overall flow:
        - an irreproducible result is detected by rerunning the iteration locally (same GPU) and
          verifying the result is different.
        - a mismatching result is detected by rerunning the iteration on a different GPU by
          verifying the result is different.
        - an expected result is detected by rerunning the iteration on a different GPU and
          verifying the result is the same.
        """

        # If reruns are disabled, still validate the result and throw a RuntimeError if it is
        # rejected when fatal. This is a backward-compatible behavior for infs and NaNs.
        if self.mode == RerunMode.DISABLED:
            result_rejected: bool = rejection_func(result)
            if result_rejected:
                self._log_validation_error_to_file(
                    status=RerunValidationStatus.RERUN_DISABLED, result=result, message=message
                )
                rank: int = safe_get_rank()
                node: str = os.uname()[1]
                device: int = torch.cuda.current_device()
                full_message: str = (
                    f"Rank {rank}, node {node}, device {device}, "
                    f"iteration {self.current_iteration}: "
                    f"Unexpected result {result} (message='{message}')"
                )
                if fatal:
                    raise RuntimeError(full_message)
                else:
                    logger.warning(full_message)
            return

        if comparison_func is None:
            comparison_func = _compare_floats

        assert (
            self.state != RerunState.NOT_RUNNING_YET
        ), "validate_result should not be called outside of the forward-backward pass"

        validation_call: Call = self._get_validation_call_info(message)

        # Handle the stats reporting mode. In that mode, we rerun every iteration once to collect
        # stats about any non-determinism in the calculations (as a relative difference between the
        # calculations in the initial run and in the re-run). The only assumption here is that the
        # control flow is deterministic (so that the results corresponding to the nth invocation of
        # validate_result() can be compared).

        if self.mode == RerunMode.REPORT_DETERMINISM_STATS:
            if self.state == RerunState.INITIAL_RUN:
                self.rerun_requested = True
                self.saved_results[validation_call] = result
            elif self.state == RerunState.RERUNNING_IN_PLACE:
                initial_result = self.saved_results.get(validation_call)
                assert initial_result is not None, "Result from initial run missing"
                diff = comparison_func(initial_result, result)
                caller: Caller = Caller(message=message, rank=0)
                self.stats[caller].record(diff)
            return

        def log_failure(message: str, fatal: bool = True) -> None:
            rank: int = safe_get_rank()
            node: str = os.uname()[1]
            device: int = torch.cuda.current_device()
            if fatal:
                logger.error(
                    f"Rank {rank}, node {node}, device {device}, "
                    f"iteration #{self.current_iteration}: {message}!"
                )
            else:
                logger.warning(
                    f"Rank {rank}, node {node}, device {device}, "
                    f"iteration #{self.current_iteration}: {message}!"
                )

        # Emit message in log so that we can identify which jobs have this instrumentation
        # enabled. We do this from the validate_result() method because some jobs may run with
        # the check_for_nan_in_loss_and_grad option but never call validate_result.
        if not self.logged_sdc_enabled:
            self.logged_sdc_enabled = True
            log_single_rank(logger, logging.WARNING, "Result validation enabled")

        # If this the initial run of the iteration, and no unexpected result has already been
        # identified?
        if self.state == RerunState.INITIAL_RUN and not self.rerun_requested:

            # Do not validate results on the first iteration, as we cannot guarantee a checkpoint
            # can be taken before the optimizer has been stepped at least once.
            if not self.first_iteration_complete:
                return

            result_rejected = self.error_injector.maybe_inject() or rejection_func(result)
            if result_rejected:
                self.failed_validation_call = validation_call
                self.initial_result = result
                self.rerun_requested = True
                self._log_validation_error_to_file(
                    status=RerunValidationStatus.INITIAL_RUN, result=result, message=message
                )
                logger.error(
                    f"Unexpected result {result} "
                    f"on rank {safe_get_rank()} "
                    f"at iteration #{self.current_iteration} "
                    f"invocation #{validation_call.sequence} "
                    f"(message='{message}')"
                )
        # If this the first rerun (same GPU) or second 2nd rerun (different GPU), and have we
        # reached the validation call that failed during the initial run?
        elif (
            self.state in [RerunState.RERUNNING_IN_PLACE, RerunState.RERUNNING_FROM_CHECKPOINT]
            and validation_call == self.failed_validation_call
        ):
            comparison: float = self.error_injector.maybe_miscompare(
                comparison_func, self.initial_result, result, self.state
            )
            # This is the first re-run.
            if self.state == RerunState.RERUNNING_IN_PLACE:
                if comparison > tolerance:
                    if not fatal:
                        self.continue_requested = True
                    log_failure(
                        "First rerun: unexpected result is not reproducible within the tolerance "
                        f"({result} != {self.initial_result})",
                        fatal=fatal,
                    )
                    self._log_validation_error_to_file(
                        status=RerunValidationStatus.FIRST_RERUN_NOT_REPRODUCIBLE,
                        result=result,
                        message=message,
                    )
                    log_failure("Possible transient error!", fatal=fatal)

                else:
                    if fatal:
                        self.checkpoint_requested = True
                    else:
                        self.continue_requested = True
                    # Remember the node and device we're running on so that we can check we're not
                    # rerunning on the same GPU when we resume from the checkpoint.
                    self.suspicious_node = os.uname()[1]
                    self.suspicious_device = torch.cuda.current_device()
                    self._log_validation_error_to_file(
                        status=RerunValidationStatus.FIRST_RERUN_REPRODUCIBLE,
                        result=result,
                        message=message,
                    )
                    log_failure(
                        "First rerun: unexpected result is reproducible within the tolerance "
                        f"({result} = {self.initial_result}). "
                        "Need to rerun on a different GPU to verify correctness.",
                        fatal=fatal,
                    )
            # This is the second re-run.
            elif self.state == RerunState.RERUNNING_FROM_CHECKPOINT:
                # Ensure we're not on the same GPU as the first rerun.
                node = os.uname()[1]
                device = torch.cuda.current_device()
                if node == self.suspicious_node and device == self.suspicious_device:
                    logger.error(
                        f"Got rescheduled on the same GPU. Need to resume again from the same "
                        f"checkpoint (node: {self.suspicious_node}, gpu: {self.suspicious_device})"
                    )
                    self.restart_again_requested = True
                elif comparison > tolerance:
                    self._log_validation_error_to_file(
                        status=RerunValidationStatus.SECOND_RERUN_NOT_REPRODUCIBLE,
                        result=result,
                        message=message,
                    )
                    logger.warning(
                        "Second rerun: unexpected result is not reproducible on a different GPU, "
                        f"therefore was likely incorrect ({result} != {self.initial_result})"
                    )
                    log_failure("Possible persistent error!")
                    if not fatal:
                        self.continue_requested = True
                else:
                    self._log_validation_error_to_file(
                        status=RerunValidationStatus.SECOND_RERUN_REPRODUCIBLE,
                        result=result,
                        message=message,
                    )
                    logger.warning(
                        "Second rerun: unexpected result is reproducible on a different GPU, "
                        f"therefore it was likely correct ({result} = {self.initial_result})"
                    )
                    log_failure(f"Correct result (but possible Application error) ({message})")
                    if not fatal:
                        self.continue_requested = True
            else:
                raise RuntimeError("Should not be here")

    def is_unexpectedly_large(
        self,
        result: torch.Tensor,
        threshold: float,
        context: str,
        num_samples: int = 100,
        resample: bool = False,
    ) -> bool:
        """Helper method to estimate whether a result is unexpectedly large.

        Some calculation errors manifest themselves as results with unexpectedly large
        exponents, e.g. spiky loss or grads. This method keeps track of a value over time
        and flags it if it exceeds a certain threshold expressed as a multiple factor of
        the max value observed.

        Args:
            loss_tensor: a zero-dim tensor containing the current loss.
            threshold: a float representing the minimum trigger threshold
                e.g. 10 means > 10x max absolute value observed.
            context: a string identifying the value. This is used to differentiate
                between different invocations of validate_results targeting different
                values, e.g. loss and grads.
            num_samples: the sample size used to estimate the max value.
                Default is 100 value samples.
            reset: whether to resample the max value. Default is False.
        Returns:
            A boolean telling whether the current loss deviates from the previous
            loss by a factor greater than the threshold

        This method can be passed as a rejection function to the validate_result()
        method.

        Example usage:

            def train_step(data_iterator, ...):
                rerun_machine = get_rerun_machine()
                while rerun_machine.should_rerun_forward_and_backward(data_iterator):
                    optimizer.zero_grad()
                    data = next(data)
                    outputs = model(data)
                    loss = loss_fn(outputs)
                    rerun_machine.validate_result(
                        result=loss,
                        rejection_func=partial(
                            rerun_machine.is_unexpectedly_large,
                            threshold=10,
                            context="loss",
                        ),
                        message="Spiky loss",
                        tolerance=0.0,
                        fatal=False,
                    )
        """

        value: float = math.fabs(result.item())
        # Ignore NaNs and Infs. They should be checked separately.
        if math.isnan(value) or math.isinf(value):
            return False

        if resample or context not in self.large_value_counts:
            self.large_value_counts[context] = 0
        if self.large_value_counts[context] < num_samples:
            self.large_value_counts[context] += 1
            self.max_values[context] = max(self.max_values.get(context, 0.0), value)
            if self.large_value_counts[context] == num_samples:
                logger.warning(f"Max value for {context}: {self.max_values[context]}")
            return False

        return value >= self.max_values[context] * threshold

    def state_dict(
        self, data_iterator: DataIteratorArgType, ckpt_format: str, force: bool = False
    ) -> dict[str, Any]:
        """Method that returns a state dict to be checkpointed.

        Args:
            data_iterator: the data iterator that needs to be checkpointed (or None
                if this checkpoint is not requested by the rerun state machine).
            ckpt_format: the checkpoint format to use.
        Returns:
            A state dict representing the rerun state machine.

        Example usage:

            def save_my_model_checkpoint(data_iterator, ...):
                checkpoint = {}
                ...
                rerun_state_machine = get_rerun_state_machine()
                checkpoint['rerun_state_machine'] = (
                    rerun_state_machine.state_dict(data_iterator, "torch_dist")
                )
                ...
                return checkpoint
        """

        # Only save a checkpoint if a step needs to be rerun.
        if not force:
            if self.state == RerunState.NOT_RUNNING_YET:
                return None

            if ckpt_format != "torch_dist":
                log_single_rank(
                    logger,
                    logging.WARNING,
                    "RerunStateMachine checkpoints ONLY SUPPORTED "
                    "for checkpoint format torch_dist",
                )
                return None

        data_iterators: list[RerunDataIterator] = self._sanitize_data_iterators(data_iterator)

        # When saving a step to re-run, the RerunStateMachine state is different across all ranks.
        # We keep the common state in the non-sharded (common) checkpoint and move the rank-level
        # state to a sharded object.
        sharded_dict = {
            "rerun_requested": self.rerun_requested,
            "checkpoint_requested": self.checkpoint_requested,
            "restart_again_requested": self.restart_again_requested,
            "continue_requested": self.continue_requested,
            # logged_sdc_enabled should not be saved (set at the job startup time).
            "error_injector_checkpoint": self.error_injector.state_dict(),
            # validation_counts should not be saved (reset at start of training loop).
            "failed_validation_call": self.failed_validation_call,
            "initial_result": self.initial_result,
            "suspicious_node": self.suspicious_node,
            "suspicious_device": self.suspicious_device,
            # No need to save saved_state (RNG state  already captured in checkpoint).
            "data_iterator_checkpoints": (
                [d.state_dict() for d in data_iterators] if data_iterators else None
            ),
            "large_value_counts": self.large_value_counts,
            "max_values": self.max_values,
            # No need to save saved_results and stats (resets when job resumes).
        }
        sharded_dict = ShardedObject(
            "rerun_state_machine_state",
            sharded_dict,
            (torch.distributed.get_world_size(),),
            (torch.distributed.get_rank(),),
        )

        state_dict: dict[str, Any] = {
            "mode": self.mode,
            "state": self.state,
            "current_iteration": self.current_iteration,
            "sharded": sharded_dict,
        }
        return state_dict

    def validate_state_dict(self, state_dict: dict[str, Any]) -> bool:
        """Method that validate a checkpoint state dict before loading it.

        Args:
            state_dict: the state dict saved in the checkpoint and originally
                obtained from state_dict().
        Returns:
            bool
        """

        if state_dict is None:
            return False
        if 'state' not in state_dict or state_dict['state'] == RerunState.NOT_RUNNING_YET:
            return False
        return True

    def load_state_dict(self, state_dict: dict[str, Any]) -> None:
        """Method that restores the state from a checkpoint.

        Args:
            state_dict: the state dict saved in the checkpoint and originally
                obtained from state_dict().
        Returns:
            None

        Example usage:

            def load_checkpoint(checkpoint, ...)
                ...
                if 'rerun_state_machine' in checkpoint:
                    rerun_state_machine = get_rerun_state_machine()
                    rerun_state_machine.load_state_dict(checkpoint['rerun_state_machine'])
        """

        if self.mode == RerunMode.DISABLED:
            log_single_rank(
                logger,
                logging.WARNING,
                "RerunStateMachine disabled via CLI, ignoring machine state saved in checkpoint",
            )
            return
        log_single_rank(
            logger,
            logging.WARNING,
            "Getting RerunStateMachine state from checkpoint. Will rerun step.",
        )
        self.mode = state_dict["mode"]
        self.current_iteration = state_dict["current_iteration"]
        self.state = state_dict["state"]

        sharded_dict = state_dict["sharded"]
        self.rerun_requested = sharded_dict["rerun_requested"]
        self.checkpoint_requested = sharded_dict["checkpoint_requested"]
        self.restart_again_requested = sharded_dict["restart_again_requested"]
        self.continue_requested = sharded_dict["continue_requested"]
        self.error_injector.load_state_dict(sharded_dict["error_injector_checkpoint"])
        self.failed_validation_call = sharded_dict["failed_validation_call"]
        self.initial_result = sharded_dict["initial_result"]
        self.suspicious_node = sharded_dict["suspicious_node"]
        self.suspicious_device = sharded_dict["suspicious_device"]
        self.data_iterator_checkpoints = sharded_dict["data_iterator_checkpoints"]
        self.large_value_counts = sharded_dict["large_value_counts"]
        self.max_values = sharded_dict["max_values"]

    def _sanitize_data_iterators(
        self, data_iterator: DataIteratorArgType
    ) -> list["RerunDataIterator"]:
        data_iterators: list[RerunDataIterator]
        if self.mode == RerunMode.DISABLED:
            data_iterators = []
        elif not isinstance(data_iterator, list):
            data_iterators = [data_iterator]
        else:
            data_iterators = data_iterator
        data_iterators = [d for d in data_iterators if d is not None]
        for d in data_iterators:
            assert isinstance(
                d, RerunDataIterator
            ), "data iterator is not wrapped with RerunDataIterator"
        return data_iterators

    def _get_validation_call_info(self, message: str) -> Call:
        """Internal method to get the context about the caller to validate_result()."""

        frame = inspect.currentframe()
        assert frame is not None
        assert frame.f_back is not None
        frame = frame.f_back.f_back
        assert frame is not None
        filename: str = inspect.getframeinfo(frame).filename
        lineno: int = frame.f_lineno
        rank: int = safe_get_rank()
        caller = Caller(message=message, rank=rank)
        self.validation_counts[caller] += 1
        sequence: int = self.validation_counts[caller]
        return Call(caller=caller, sequence=sequence)

    def _save_state(self) -> None:
        """Internal method that saves the state that needs to be restored when rewound.

        Any state that may change during the execution of a step before the optimizer is updated,
        e.g. RNG state, should be saved here. The state of the data iterator is taken care
        separately by the RerunDataIterator class.

        At this point, this only consists in the RNG state.
        """

        self.saved_state = {
            "rng_state": {
                "random_rng_state": random.getstate(),
                "np_rng_state": np.random.get_state(),
                "torch_rng_state": torch.get_rng_state(),
                "cuda_rng_state": torch.cuda.get_rng_state(),
            },
            "other_state": self.state_save_func() if self.state_save_func else None,
            # any other state to save to guarantee deterministic execution?
        }

    def _restore_state(self) -> None:
        """Internal method that restores the state that was saved in _save_state()."""

        rng_state = self.saved_state["rng_state"]
        random.setstate(rng_state["random_rng_state"])
        np.random.set_state(rng_state["np_rng_state"])
        torch.set_rng_state(rng_state["torch_rng_state"])
        torch.cuda.set_rng_state(rng_state["cuda_rng_state"])
        if self.saved_state["other_state"] and self.state_restore_func:
            self.state_restore_func(self.saved_state["other_state"])

    def _maybe_report_stats(self) -> None:
        """Internal method that reports stats if needed."""

        if self.current_iteration % RerunStateMachine.REPORTING_INTERVAL_ITERATIONS == 0:
            if torch.distributed.is_initialized():
                world_size: int = torch.distributed.get_world_size()
                stats_list = [None for _ in range(world_size)]
                rank = torch.distributed.get_rank()
                torch.distributed.gather_object(dict(self.stats), stats_list if rank == 0 else None)
                if rank == 0:
                    callers: Set[Caller] = {c for s in stats_list for c in s.keys()}
                    logger.info("Stats on computation determinism in validation calls")
                    for caller in callers:
                        self.stats[caller].combine(
                            [s.get(caller) for s in stats_list[1:] if s.get(caller)]
                        )
                        logger.info(f"  From {caller.filename}, line {caller.lineno}:")
                        logger.info(f"    {self.stats[caller].print_stats()}")
                else:
                    for caller, stats in self.stats.items():
                        stats.reset()
            else:
                logger.info("Stats on computation determinism in validation calls")
                for caller, stats in self.stats.items():
                    logger.info(f"  From {caller.filename}, line {caller.lineno}:")
                    logger.info(f"    {stats.print_stats()}")

    def _log_validation_error_to_file(
        self, status: RerunValidationStatus, result: Any, message: str
    ) -> None:
        if self.result_rejected_tracker_filename is not None:
            # Append to log.
            try:
                rank: int = safe_get_rank()
                node: str = os.uname()[1]
                device: int = torch.cuda.current_device()
                with open(self.result_rejected_tracker_filename, "a") as f:
                    f.write(
                        f"ts={datetime.datetime.now()} node={node} device={device} "
                        f"jobID={os.getenv('SLURM_JOBID', 'N/A')} rank={rank} "
                        f"iteration={self.current_iteration} status={status} result={result} "
                        f"message='{message}'\n"
                    )
            except Exception as e:
                logger.error(f"Could not log validation error! ({e})")

    @classmethod
    def get_skipped_iterations_from_tracker_file(cls, tracker_file_name: str) -> list[int]:
        """Get list of iterations to skip from results recorded in tracker file. If an
        "abnormality" (e.g., NaN or infinity in gradient) is seen more than once on a
        given rank and iteration, the corresponding iteration is skipped.

        Args:
            tracker_file_name (str): Name of tracker file.

        Returns:
            list[int]: List of iterations to skip.
        """
        iterations_to_potentially_skip: set[int] = set()
        iterations_to_ignore: set[int] = set()
        iterations_seen: set[Tuple[int, int]] = set()
        iterations_seen_by_job: dict[str, dict[int, set]] = {}

        regex = (
            r"ts=.+ node=.+ device=.+ jobID=(.+) rank=(.+) iteration=(.+) status=(.+) "
            f"result=.+ message=.+"
        )
        try:
            with open(tracker_file_name, "r") as f:
                for line in f.readlines():
                    match = re.search(regex, line)
                    if match:
                        job = str(match[1])
                        rank = int(match[2])
                        iteration = int(match[3])
                        status = str(match[4]).split(".")[-1].upper()
                        # Skip an iteration if:
                        # - Reruns were disabled and it has failed on the same rank twice.
                        # or
                        # - Reruns were enabled and it was reproducible on the 2nd rerun
                        if status in [
                            "RERUN_DISABLED",
                            "FIRST_RERUN_REPRODUCIBLE",
                            "SECOND_RERUN_REPRODUCIBLE",
                        ]:
                            if (rank, iteration) in iterations_seen:
                                iterations_to_potentially_skip.add(iteration)
                            iterations_seen.add((rank, iteration))
                            if job not in iterations_seen_by_job:
                                iterations_seen_by_job[job] = {}
                            if iteration not in iterations_seen_by_job[job]:
                                iterations_seen_by_job[job][iteration] = set()
                            iterations_seen_by_job[job][iteration].add(rank)
                            if len(iterations_seen_by_job[job][iteration]) > 1:
                                iterations_to_ignore.add(iteration)
        except Exception as e:
            log_single_rank(
                logger, logging.ERROR, f"Could not parse iterations to skip in tracker file! ({e})"
            )
        iterations_to_skip = sorted(iterations_to_potentially_skip - iterations_to_ignore)
        log_single_rank(
            logger,
            logging.WARNING,
            f"Will skip these iterations from tracker file: {iterations_to_skip}",
        )
        if len(iterations_to_ignore) > 0:
            log_single_rank(
                logger,
                logging.WARNING,
                "Will not skip these iterations due to multiple rank errors: "
                f"{sorted(iterations_to_ignore)}",
            )
        return iterations_to_skip


class RerunDataIterator:
    """A wrapper class for data iterators that adds replay capability.

    Args:
        iterable: data iterator that needs the replay capability.
        make_iterable: if set, iterator is created by calling iter() on iterable.

    The RerunState class below uses the rewind capability to replay all the microbatches
    fetched during an iteration.

    Example usage:

        class MyDataIterator:
            ...

        data_iterator = MyDataIterator(...)
        replay_data_iterator = RerunDataIterator(data_iterator)
    """

    def __init__(self, iterable: Iterable[Any]) -> None:
        self.iterable: Iterable[Any] = iterable
        self.saved_microbatches: list[Any] = []
        self.replaying: bool = False
        self.replay_pos: int = 0

    def __next__(self) -> Any:
        """__next__ method override adding replay capability."""

        if self.replaying:
            # we should not read past the saved batches if execution is deterministic,
            # as the number of calls to get_batch() should remain the same across reruns
            assert len(self.saved_microbatches) > self.replay_pos, "No more batches to replay"
            n = self.saved_microbatches[self.replay_pos]
            self.replay_pos += 1
            return n
        n = next(self.iterable)
        if get_rerun_state_machine().get_mode() != RerunMode.DISABLED:
            self.saved_microbatches.append(n)
        return n

    def rewind(self) -> None:
        """Method to rewind the data iterator to the first microbatch of the iteration."""

        self.replaying = True
        self.replay_pos = 0

    def advance(self) -> None:
        """Method to drop all the buffered microbatches and jump to the next iteration."""

        self.replaying = False
        self.saved_microbatches = []

    def state_dict(self) -> SerializableStateType:
        """Method to capture the state of the iterator as a serializable dict."""

        return {
            "saved_microbatches": self.saved_microbatches,
            "replaying": self.replaying,
            "replay_pos": self.replay_pos,
        }

    def load_state_dict(self, state_dict: SerializableStateType) -> None:
        """Method to restore the state saved as a serializable dict."""

        self.saved_microbatches = state_dict["saved_microbatches"]
        self.replaying = state_dict["replaying"]
        self.replay_pos = state_dict["replay_pos"]


class QuickStats:
    """Simple class to keep track of distribution of a statistic.

    Args:
        max_size: maximum number of samples to keep.
    """

    def __init__(self, max_size: int = 100000) -> None:
        self.samples: list[float] = []
        self.pos: int = 0
        self.zero_cnt: int = 0
        self.max: float = 0.0
        self.max_size: int = max_size

    def record(self, data: float) -> None:
        """Record a new sample."""

        if data == 0.0:
            self.zero_cnt += 1
        else:
            if self.pos < self.max_size:
                self.samples.append(data)
            else:
                self.samples[self.pos % self.max_size] = data
            self.pos += 1
            if data > self.max:
                self.max = data

    def combine(self, others: list["QuickStats"]) -> None:
        """Append the samples from multiple instances into one object."""

        if len(others) == 0:
            return
        n = len(self.samples) + sum(len(o.samples) for o in others)
        if n <= self.max_size:
            for o in others:
                self.samples.extend(o.samples)
            self.pos = n
        self.zero_cnt += sum(o.zero_cnt for o in others)
        self.max = max(self.max, max(o.max for o in others))

    def reset(self) -> None:
        """Forget all data."""

        self.samples = []
        self.pos = 0
        self.zero_cnt = 0
        self.max = 0.0

    def print_stats(self) -> str:
        """Return a string describing the data distribution."""

        self.samples.sort()
        z = self.zero_cnt
        n = len(self.samples)
        if n > 0:
            t = z + n
            s = sum(self.samples)
            a = s / t
            ps = {}
            for p in [0.5, 0.9, 0.99, 0.999]:
                ps[p] = f"{self.samples[int(t * p) - z]:.3E}" if int(t * p) - z >= 0 else "0.0"
            mx = self.max
            return (
                f"{t:,}/{z:,} total/identical samples, rel. variability: avg= {a:.3E}, "
                f"p50= {ps[0.5]}, p90= {ps[0.9]}, p99= {ps[0.99]}, p99.9= {ps[0.999]}, "
                f"max: {mx:.3E}"
            )
        else:
            return f"{z:,} samples, all identical"

    def __getstate_(self) -> Any:
        """Pickle method, used by torch.distributed.gather_object."""

        return vars(self)

    def __setstate(self, state: Any) -> Any:
        """Unpickle method, used by torch.distributed.gather_object."""

        self.samples = state["samples"]
        self.pos = state["pos"]
        self.zero_cnt = state["zero_cnt"]
        self.max = state["max"]


class RerunErrorInjector:
    """A class to manage error injection into the rerun state machine."""

    _ERROR_NAMES: dict[RerunDiagnostic, str] = {
        RerunDiagnostic.CORRECT_RESULT: "Expected result",
        RerunDiagnostic.TRANSIENT_ERROR: "Transient error",
        RerunDiagnostic.PERSISTENT_ERROR: "Persistent error",
    }

    def __init__(
        self,
        error_injection_rate: int = 0,
        error_injection_type: RerunDiagnostic = RerunDiagnostic.TRANSIENT_ERROR,
    ) -> None:
        assert isinstance(
            error_injection_type, RerunDiagnostic
        ), "Injected result type must be a valid RerunDiagnostic"
        self.error_injection_rate: int = error_injection_rate
        self.error_injection_type: RerunDiagnostic = error_injection_type
        self.should_inject_errors: bool = error_injection_rate > 0
        self.injected_error_type: Optional[RerunDiagnostic] = (
            None  # set to a non-None value when a result is injected
        )

    def maybe_inject(self) -> bool:
        """Method that decides whether to inject an error."""

        # Do not inject an error if error injection is turned off or if an error was
        # already injected in this iteration.
        if not self.should_inject_errors or self.injected_error_type is not None:
            return False
        r: int = (
            random.randint(0, self.error_injection_rate - 1) + safe_get_rank()
        ) % self.error_injection_rate
        if r != 0:
            return False
        self.injected_error_type = self.error_injection_type
        logger.warning(
            f"Injecting error type {RerunErrorInjector._ERROR_NAMES[self.error_injection_type]}"
        )
        return True

    def maybe_miscompare(
        self,
        comparison_func: Callable[[Any, Any], float],
        initial_result: Any,
        result: Any,
        state: RerunState,
    ) -> float:
        """Method that introduces mismatching results during reruns when an error is injected.

        When no error is injected, this method defers to the user-provided comparison function.
        When an error is injected, it returns matching or mismatching results depending on the type
        of error being injected and on the re-run state."""

        if self.injected_error_type is None:
            return comparison_func(initial_result, result)
        # On the first re-run, return a different results and mark the injection processed when
        # injecting an irreproducible result.
        if state == RerunState.RERUNNING_IN_PLACE:
            if self.injected_error_type == RerunDiagnostic.TRANSIENT_ERROR:
                self.injected_error_type = None
                return COMPARISON_MISMATCH
            else:
                return COMPARISON_MATCH
        # On the second re-run, mark the injection processed and, when injecting a mismatching
        # result return a different result.
        elif state == RerunState.RERUNNING_FROM_CHECKPOINT:
            if self.injected_error_type == RerunDiagnostic.PERSISTENT_ERROR:
                self.injected_error_type = None
                return COMPARISON_MISMATCH
            elif self.injected_error_type == RerunDiagnostic.CORRECT_RESULT:
                self.injected_error_type = None
                return COMPARISON_MATCH
            else:
                raise RuntimeError("Should not be here")
        else:
            raise RuntimeError("Should not be here")

    def state_dict(self) -> SerializableStateType:
        """Method to capture the state of the error injector as a serializable dict."""

        return {
            "error_injection_rate": self.error_injection_rate,
            "error_injection_type": self.error_injection_type,
            # No need to checkpoint should_inject_errors (inferred from error_injection_rate).
            "injected_error_type": self.injected_error_type,
        }

    def load_state_dict(self, state_dict: SerializableStateType) -> None:
        """Method to restore the state saved as a serializable dict."""

        self.error_injection_rate = state_dict["error_injection_rate"]
        self.error_injection_type = state_dict["error_injection_type"]
        self.should_inject_errors = self.error_injection_rate > 0
        self.injected_error_type = state_dict["injected_error_type"]


@copy_signature(RerunStateMachine.__init__, handle_first_src_param='skip')
def initialize_rerun_state_machine(*args, **kwargs) -> None:
    """Helper function to initialize the rerun machine instance.

    Check the RerunStateMachine class for the details.
    """

    rerun_state_machine: RerunStateMachine = RerunStateMachine(*args, **kwargs)
    _set_rerun_state_machine(rerun_state_machine)


def destroy_rerun_state_machine() -> None:
    """Helper function to shut down the rerun machine instance."""

    global _GLOBAL_RERUN_STATE_MACHINE
    _GLOBAL_RERUN_STATE_MACHINE = None


def get_rerun_state_machine() -> RerunStateMachine:
    """Helper function to return the singleton instance of the rerun machine."""

    if _GLOBAL_RERUN_STATE_MACHINE is None:
        log_single_rank(logger, logging.WARNING, "Implicit initialization of Rerun State Machine!")
        initialize_rerun_state_machine()
        assert _GLOBAL_RERUN_STATE_MACHINE is not None
    return _GLOBAL_RERUN_STATE_MACHINE


def _set_rerun_state_machine(rerun_state_machine) -> None:
    """Internal function to set the singleton instance of the rerun machine."""

    global _GLOBAL_RERUN_STATE_MACHINE
    assert _GLOBAL_RERUN_STATE_MACHINE is None, "Rerun state machine is already initialized"
    _GLOBAL_RERUN_STATE_MACHINE = rerun_state_machine


def _compare_floats(a: torch.Tensor, b: torch.Tensor) -> float:
    """Internal function that implements the default compare_func.

    Check the validate_result() method of the RerunStateMachine class for details.
    """

    af: float = a.item()
    bf: float = b.item()
    if (af == bf) or (math.isnan(af) and math.isnan(bf)):
        return COMPARISON_MATCH
    if (
        (math.isnan(af) and not math.isnan(bf))
        or (not math.isnan(af) and math.isnan(bf))
        or (math.isinf(af) and not math.isinf(bf))
        or (not math.isinf(af) and math.isinf(bf))
        or (math.isnan(af) and math.isinf(bf))
        or (math.isinf(af) and math.isnan(bf))
    ):
        return COMPARISON_MISMATCH
    return math.fabs((af - bf) / (af + bf) * 2)


================================================
FILE: megatron/core/resharding/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from .execution import execute_reshard_plan
from .planner import build_centralized_reshard_plan
from .refit import (
    clear_service_cache,
    get_or_create_service,
    reshard_model_weights,
    swap_model_weights,
)
from .transforms import MXFP8ReshardTransform, ReshardTransform
from .utils import ParameterMetadata, ReshardPlan, ShardingDescriptor, TransferOp

__all__ = [
    "build_centralized_reshard_plan",
    "execute_reshard_plan",
    "MXFP8ReshardTransform",
    "ReshardTransform",
    "swap_model_weights",
    "reshard_model_weights",
    "get_or_create_service",
    "clear_service_cache",
    "ParameterMetadata",
    "ShardingDescriptor",
    "TransferOp",
    "ReshardPlan",
]


================================================
FILE: megatron/core/resharding/copy_services/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from __future__ import annotations

from .base import CopyService
from .nccl_copy_service import NCCLCopyService
from .nvshmem_copy_service import NVSHMEMCopyService

__all__ = ["CopyService", "NCCLCopyService", "NVSHMEMCopyService"]


================================================
FILE: megatron/core/resharding/copy_services/base.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from __future__ import annotations

from abc import ABC, abstractmethod

import torch


class CopyService(ABC):
    """Abstract interface for submitting and executing batched P2P copy operations."""

    @abstractmethod
    def submit_send(self, src_tensor: torch.Tensor, dest_rank: int):
        """Register a tensor send from the current rank to ``dest_rank``."""
        ...

    @abstractmethod
    def submit_recv(self, dest_tensor: torch.Tensor, src_rank: int):
        """Register a tensor receive into ``dest_tensor`` from ``src_rank``."""
        ...

    @abstractmethod
    def run(self):
        """Execute all previously submitted send/recv operations as a single batch."""
        ...


================================================
FILE: megatron/core/resharding/copy_services/gloo_copy_service.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from __future__ import annotations

import logging
from dataclasses import dataclass
from typing import List, Tuple

import torch
import torch.distributed as dist

from .base import CopyService

logger = logging.getLogger(__name__)


@dataclass
class SendOp:
    """Simple container describing a single send operation."""

    task_id: int | None
    tensor: torch.Tensor
    dest_rank: int


@dataclass
class RecvOp:
    """Simple container describing a single receive operation."""

    task_id: int | None
    tensor: torch.Tensor
    src_rank: int


class GlooCopyService(CopyService):
    """
    CopyService implementation that routes refit traffic over a CPU/Gloo
    process group instead of NCCL.
    """

    def __init__(self, group=None):
        if group is not None:
            self.gloo_pg = group
            self.rank = group.rank()
            self.world_size = group.size()
        else:
            self.rank = dist.get_rank()
            self.world_size = dist.get_world_size()
            self.gloo_pg = dist.new_group(backend="gloo")
        self.send_ops: List[SendOp] = []
        self.recv_ops: List[Tuple[RecvOp, torch.Tensor]] = []
        self._copy_stream = torch.cuda.Stream()
        if self.rank == 0:
            logger.info(
                f"GlooCopyService initialized on rank {self.rank} with {self.world_size} ranks"
            )

    def submit_send(self, src_tensor: torch.Tensor, dest_rank: int):
        self.send_ops.append(SendOp(task_id=None, tensor=src_tensor, dest_rank=dest_rank))

    def submit_send_with_id(self, task_id: int, src_tensor: torch.Tensor, dest_rank: int):
        """Submit a send operation with a unique task identifier."""
        self.send_ops.append(SendOp(task_id=task_id, tensor=src_tensor, dest_rank=dest_rank))

    def submit_recv(self, dest_tensor: torch.Tensor, src_rank: int):
        """Submit a receive operation."""
        # Allocate a CPU buffer that matches the destination view; we'll
        # copy into dest_tensor after the Gloo recv completes.
        cpu_buffer = torch.empty_like(dest_tensor, device="cpu").contiguous()
        self.recv_ops.append(
            (RecvOp(task_id=None, tensor=cpu_buffer, src_rank=src_rank), dest_tensor)
        )

    def submit_recv_with_id(self, task_id: int, dest_tensor: torch.Tensor, src_rank: int):
        """Submit a receive operation with a unique task identifier."""
        cpu_buffer = torch.empty_like(dest_tensor, device="cpu").contiguous()
        self.recv_ops.append(
            (RecvOp(task_id=task_id, tensor=cpu_buffer, src_rank=src_rank), dest_tensor)
        )

    def run(self):
        total_ops = len(self.send_ops) + len(self.recv_ops)
        if self.rank == 0:
            logger.info(
                f"GlooCopyService rank {self.rank}: executing batched communication: "
                f"{len(self.send_ops)} sends + {len(self.recv_ops)} recvs = {total_ops} ops"
            )

        p2p_ops: List[dist.P2POp] = []

        # Short-circuit self transfers into local device copies.
        local_sends = [op for op in self.send_ops if op.dest_rank == self.rank]
        remote_sends = [op for op in self.send_ops if op.dest_rank != self.rank]
        local_recvs = [(recv, dst) for (recv, dst) in self.recv_ops if recv.src_rank == self.rank]
        remote_recvs = [(recv, dst) for (recv, dst) in self.recv_ops if recv.src_rank != self.rank]

        if local_sends or local_recvs:
            local_sends_by_id = {op.task_id: op for op in local_sends}
            if None in local_sends_by_id:
                raise RuntimeError(
                    "GlooCopyService: local send missing task_id; "
                    "use submit_send_with_id/submit_recv_with_id for local copies"
                )
            local_recvs_by_id = {recv.task_id: (recv, dst) for (recv, dst) in local_recvs}
            if None in local_recvs_by_id:
                raise RuntimeError(
                    "GlooCopyService: local recv missing task_id; "
                    "use submit_send_with_id/submit_recv_with_id for local copies"
                )
            if len(local_sends_by_id) != len(local_sends) or len(local_recvs_by_id) != len(
                local_recvs
            ):
                raise RuntimeError(
                    f"GlooCopyService: unmatched local ops on rank {self.rank}: "
                    f"{len(local_sends)} local sends vs {len(local_recvs)} local recvs"
                )
            for task_id, (recv_op, dst_tensor) in local_recvs_by_id.items():
                send_op = local_sends_by_id.get(task_id)
                if send_op is None:
                    raise RuntimeError(
                        f"GlooCopyService: missing local send for task_id={task_id} "
                        f"on rank {self.rank}"
                    )
                with torch.no_grad():
                    src_tensor = send_op.tensor
                    if dst_tensor.device != src_tensor.device:
                        dst_tensor.copy_(src_tensor.to(dst_tensor.device))
                    else:
                        dst_tensor.copy_(src_tensor)

        # Build Gloo P2P ops over CPU tensors. For sends we clone to CPU;
        # for recvs we use the preallocated CPU buffers.
        # Use group_peer (not peer) to pass ranks directly in group space,
        # avoiding the global-to-group rank conversion in P2POp which doesn't
        # work for cross-world ProcessGroups.
        for op in remote_sends:
            cpu_tensor = op.tensor.detach().to("cpu").contiguous()
            p2p_ops.append(
                dist.P2POp(dist.isend, cpu_tensor, group=self.gloo_pg, group_peer=op.dest_rank)
            )
        for recv, _dst_tensor in remote_recvs:
            p2p_ops.append(
                dist.P2POp(dist.irecv, recv.tensor, group=self.gloo_pg, group_peer=recv.src_rank)
            )

        if p2p_ops:
            reqs = dist.batch_isend_irecv(p2p_ops)
            for req in reqs:
                req.wait()

        # Copy received CPU buffers back into the original destination tensors.
        for recv, dst_tensor in remote_recvs:
            if dst_tensor.is_cuda:
                dst_tensor.copy_(recv.tensor.to(dst_tensor.device))
            else:
                dst_tensor.copy_(recv.tensor)

        if self._copy_stream is not None:
            torch.cuda.current_stream().wait_stream(self._copy_stream)

        if self.rank == 0:
            logger.info("GlooCopyService: batched communication completed")
        self.send_ops.clear()
        self.recv_ops.clear()


================================================
FILE: megatron/core/resharding/copy_services/nccl_copy_service.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from __future__ import annotations

import logging
from dataclasses import dataclass
from typing import List

import torch
import torch.distributed as dist

from .base import CopyService

logger = logging.getLogger(__name__)


@dataclass
class SendOp:
    """Simple container describing a single NCCL send operation."""

    task_id: int | None
    tensor: torch.Tensor
    dest_rank: int


@dataclass
class RecvOp:
    """Simple container describing a single NCCL receive operation."""

    task_id: int | None
    tensor: torch.Tensor
    src_rank: int


class NCCLCopyService(CopyService):
    """
    Thin wrapper around torch.distributed batch_isend_irecv to submit and execute
    a batch of point-to-point sends and recvs.
    """

    def __init__(self, group=None):
        self.group = group
        # Use group.rank()/size() to support cross-cluster ProcessGroups
        self.rank = group.rank() if group is not None else dist.get_rank()
        self.world_size = group.size() if group is not None else dist.get_world_size()
        self.send_ops: List[SendOp] = []
        self.recv_ops: List[RecvOp] = []
        # Dedicated stream for local (same-rank) copies to avoid unnecessary
        # serialization with work on the default stream.
        self._copy_stream = torch.cuda.Stream()
        if self.rank == 0:
            logger.info(f"NCCLCopyService initialized with {self.world_size} ranks")

    def submit_send(self, src_tensor: torch.Tensor, dest_rank: int):
        self.send_ops.append(SendOp(task_id=None, tensor=src_tensor, dest_rank=dest_rank))

    def submit_send_with_id(self, task_id: int, src_tensor: torch.Tensor, dest_rank: int):
        """Submit a send operation with a unique task identifier."""
        self.send_ops.append(SendOp(task_id=task_id, tensor=src_tensor, dest_rank=dest_rank))

    def submit_recv(self, dest_tensor: torch.Tensor, src_rank: int):
        """Submit a receive operation."""
        self.recv_ops.append(RecvOp(task_id=None, tensor=dest_tensor, src_rank=src_rank))

    def submit_recv_with_id(self, task_id: int, dest_tensor: torch.Tensor, src_rank: int):
        """Submit a receive operation with a unique task identifier."""
        self.recv_ops.append(RecvOp(task_id=task_id, tensor=dest_tensor, src_rank=src_rank))

    def run(self):
        total_ops = len(self.send_ops) + len(self.recv_ops)
        if self.rank == 0:
            logger.info(
                "Executing batched communication: %d sends + %d recvs = %d ops",
                len(self.send_ops),
                len(self.recv_ops),
                total_ops,
            )

        local_sends = [op for op in self.send_ops if op.dest_rank == self.rank]
        remote_sends = [op for op in self.send_ops if op.dest_rank != self.rank]
        local_recvs = [op for op in self.recv_ops if op.src_rank == self.rank]
        remote_recvs = [op for op in self.recv_ops if op.src_rank != self.rank]

        if local_sends or local_recvs:
            local_sends_by_id = {op.task_id: op for op in local_sends}
            if None in local_sends_by_id:
                raise RuntimeError(
                    "NCCLCopyService: local send missing task_id; "
                    "use submit_send_with_id/submit_recv_with_id for local copies"
                )
            local_recvs_by_id = {op.task_id: op for op in local_recvs}
            if None in local_recvs_by_id:
                raise RuntimeError(
                    "NCCLCopyService: local recv missing task_id; "
                    "use submit_send_with_id/submit_recv_with_id for local copies"
                )
            if len(local_sends_by_id) != len(local_sends) or len(local_recvs_by_id) != len(
                local_recvs
            ):
                raise RuntimeError(
                    f"NCCLCopyService: unmatched local ops on rank {self.rank}: "
                    f"{len(local_sends)} local sends vs {len(local_recvs)} local recvs"
                )
            for task_id, recv_op in local_recvs_by_id.items():
                send_op = local_sends_by_id.get(task_id)
                if send_op is None:
                    raise RuntimeError(
                        f"NCCLCopyService: missing local send for task_id={task_id} "
                        f"on rank {self.rank}"
                    )
                with torch.no_grad():
                    with torch.cuda.stream(self._copy_stream):
                        recv_op.tensor.copy_(send_op.tensor)

        p2p_ops = []
        for op in remote_sends:
            p2p_ops.append(dist.P2POp(dist.isend, op.tensor, op.dest_rank, group=self.group))
        for op in remote_recvs:
            p2p_ops.append(dist.P2POp(dist.irecv, op.tensor, op.src_rank, group=self.group))

        if p2p_ops:
            reqs = dist.batch_isend_irecv(p2p_ops)
            for req in reqs:
                req.wait()

        # Make sure the copy stream is finished
        torch.cuda.current_stream().wait_stream(self._copy_stream)

        if self.rank == 0:
            logger.info("Batched communication completed")
        self.send_ops.clear()
        self.recv_ops.clear()


================================================
FILE: megatron/core/resharding/copy_services/nvshmem_copy_service.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from __future__ import annotations

import logging
from typing import Dict

import torch
import torch.distributed as dist

from ..nvshmem_copy_service import RemoteCopyService
from .base import CopyService

logger = logging.getLogger(__name__)


class NVSHMEMCopyService(CopyService):
    """CopyService implementation backed by NVSHMEM RemoteCopyService."""

    def __init__(self, group=None):
        if not dist.is_initialized():
            raise RuntimeError("torch.distributed must be initialized before NVSHMEMCopyService()")

        self._group = group
        self.rank = group.rank() if group is not None else dist.get_rank()
        self._remote = RemoteCopyService(group=group)
        # Lazily initialized on first use to avoid side effects at import time
        self._initialized = False

        # NOTE: keep the original typed tensors here (not uint8 views) so local copies
        # preserve shape/strides semantics and avoid byte-offset pitfalls.
        self._local_send_ops: Dict[int, torch.Tensor] = {}
        self._local_recv_ops: Dict[int, torch.Tensor] = {}
        self._local_copy_stream = torch.cuda.Stream()

        logger.info("NVSHMEMCopyService constructed")

    def _ensure_initialized(self):
        if not self._initialized:
            self._remote.init(log_level="INFO")
            self._initialized = True
            logger.info(
                "NVSHMEMCopyService initialized: PE %d / %d", self._remote.my_pe, self._remote.n_pes
            )

    def submit_send(self, src_tensor: torch.Tensor, dest_rank: int):
        """
        Basic CopyService API is not rich enough to drive the NVSHMEM planner
        (it lacks a globally shared task identifier), so this method is kept
        only for interface compatibility and should not be used directly.

        The resharding path calls into NVSHMEMCopyService via the
        submit_send_with_id/submit_recv_with_id helpers instead.
        """
        raise RuntimeError(
            "NVSHMEMCopyService.submit_send() is not supported; "
            "use submit_send_with_id(...) from execute_reshard_plan."
        )

    def submit_recv(self, dest_tensor: torch.Tensor, src_rank: int):
        raise RuntimeError(
            "NVSHMEMCopyService.submit_recv() is not supported; "
            "use submit_recv_with_id(...) from execute_reshard_plan."
        )

    #
    # New helper API used from execute_reshard_plan via monkey-patching:
    # we avoid changing the existing execute_reshard_plan signature by adding
    # a small adapter layer that batches up matched send/recv slices.
    #

    def submit_send_with_id(self, task_id: int, src_tensor: torch.Tensor, dest_rank: int):
        """Register a send with an explicit, globally shared task_id."""
        self._ensure_initialized()

        if not src_tensor.is_contiguous():
            src_tensor = src_tensor.contiguous()

        # Local transfers: keep them out of RemoteCopyService entirely.
        if dest_rank == self.rank:
            self._local_send_ops[task_id] = src_tensor
            return

        num_bytes = src_tensor.numel() * src_tensor.element_size()
        src_bytes = src_tensor.view(torch.uint8)

        logger.debug(
            "NVSHMEMCopyService: register_send task_id=%d, %d bytes (%d → %d)",
            task_id,
            num_bytes,
            self.rank,
            dest_rank,
        )

        # Use public API on RemoteCopyService
        self._remote.register_send(
            task_id=task_id, src_tensor=src_bytes, src_pos=0, size=num_bytes, dest_pe=dest_rank
        )

    def submit_recv_with_id(self, task_id: int, dest_tensor: torch.Tensor, src_rank: int):
        """Register a recv with an explicit, globally shared task_id."""
        self._ensure_initialized()

        if not dest_tensor.is_contiguous():
            dest_tensor = dest_tensor.contiguous()

        # Local transfers: keep them out of RemoteCopyService entirely.
        if src_rank == self.rank:
            self._local_recv_ops[task_id] = dest_tensor
            return

        num_bytes = dest_tensor.numel() * dest_tensor.element_size()
        dst_bytes = dest_tensor.view(torch.uint8)

        logger.debug(
            "NVSHMEMCopyService: register_recv task_id=%d, %d bytes (%d ← %d)",
            task_id,
            num_bytes,
            self.rank,
            src_rank,
        )

        self._remote.register_receive(
            task_id=task_id, dest_tensor=dst_bytes, dest_pos=0, size=num_bytes, src_pe=src_rank
        )

    def run(self):
        """
        Execute all registered transfer pairs via NVSHMEM.

        This converts the registered pairs into RemoteCopyService send/receive
        requests, builds a schedule, runs the pipelined NVSHMEM transfer, and
        then clears internal state.
        """
        self._ensure_initialized()

        # 1) Run same-rank copies (match by task_id), like NCCL backend.
        if self._local_send_ops or self._local_recv_ops:
            missing_sends = set(self._local_recv_ops.keys()) - set(self._local_send_ops.keys())
            missing_recvs = set(self._local_send_ops.keys()) - set(self._local_recv_ops.keys())
            if missing_sends or missing_recvs:
                raise RuntimeError(
                    "NVSHMEMCopyService: unmatched local ops on rank "
                    f"{self.rank}: missing_sends={sorted(list(missing_sends))[:10]} "
                    f"missing_recvs={sorted(list(missing_recvs))[:10]}"
                )

            with torch.no_grad():
                with torch.cuda.stream(self._local_copy_stream):
                    for task_id, dst in self._local_recv_ops.items():
                        src = self._local_send_ops[task_id]
                        if src.numel() != dst.numel() or src.element_size() != dst.element_size():
                            raise RuntimeError(
                                "NVSHMEMCopyService: local copy size mismatch on rank "
                                f"{self.rank} task_id={task_id}: "
                                f"src=({tuple(src.shape)}, {src.dtype}) "
                                f"dst=({tuple(dst.shape)}, {dst.dtype})"
                            )
                        dst.copy_(src, non_blocking=True)

            torch.cuda.current_stream().wait_stream(self._local_copy_stream)
            self._local_send_ops.clear()
            self._local_recv_ops.clear()

        # 2) Execute remote schedule (if any remote sends/recvs were registered).
        # NOTE: ALL ranks must call schedule() and run() because they contain collective
        # operations that require all ranks to participate:
        #  - schedule() has dist.all_gather_object() (torch distributed collective)
        #  - run() has nvshmem.core.barrier_all() (nvshmem collective)
        # This is critical for non-collocated refit where some ranks may have no work.
        logger.info("NVSHMEMCopyService: building NVSHMEM schedule and executing")
        self._remote.schedule()
        self._remote.run()
        self._remote.clear_requests()
        logger.info("NVSHMEMCopyService: NVSHMEM transfers complete")


================================================
FILE: megatron/core/resharding/execution.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from __future__ import annotations

import logging
from typing import Optional

import torch
import torch.distributed as dist

from .copy_services.base import CopyService
from .transforms import ReshardTransform, _ensure_sendable
from .utils import ReshardPlan

logger = logging.getLogger(__name__)


def _is_mxfp8_tensor(param):
    """Check if param is a TE MXFP8Tensor (fp8_param=true)."""
    return (
        hasattr(param, 'quantize_')
        and hasattr(param, 'dequantize')
        and hasattr(param, '_rowwise_data')
    )


def execute_reshard_plan(
    plan: ReshardPlan,
    src_module: torch.nn.Module,
    dst_module: torch.nn.Module,
    service: CopyService,
    group=None,
    transform: Optional[ReshardTransform] = None,
) -> None:
    """
    Execute a reshard plan (from centralized controller).
    A communication service must be provided to abstract transport.
    Expected service API: submit_send(tensor, dest_rank), submit_recv(tensor, src_rank), run().

    Supports None for src_module and/or dst_module to allow ranks in non-collocated mode:
    - src_module=None: Rank only receives data (destination-only)
    - dst_module=None: Rank only sends data (source-only)
    - Both provided: Rank participates in both send and recv (collocated mode)

    When *transform* is provided, parameters for which
    ``transform.should_transform(param_name)`` returns True use the
    transform's prepare_send / prepare_recv / finalize_recv methods instead
    of the default slice-and-copy logic.
    """

    # Extract parameters from models if present
    src_params = {}
    dst_params = {}
    if src_module is not None:
        src_params = {name: p for name, p in src_module.named_parameters(recurse=True)}
    if dst_module is not None:
        dst_params = {name: p for name, p in dst_module.named_parameters(recurse=True)}

    submit_send_with_id = getattr(service, "submit_send_with_id", None)
    submit_recv_with_id = getattr(service, "submit_recv_with_id", None)

    # Submit sends (only if we have source model)
    for op in plan.send_ops:
        if transform is not None and transform.should_transform(op.param_name):
            src_param = src_params.get(op.param_name)
            if src_param is not None:
                tensors = transform.prepare_send(op.param_name, op.my_slice, src_param)
                for t in tensors:
                    buf = t.contiguous()
                    if submit_send_with_id is not None and op.task_id is not None:
                        submit_send_with_id(op.task_id, buf, op.peer_rank)
                    else:
                        service.submit_send(buf, op.peer_rank)
        else:
            src_param = src_params.get(op.param_name)
            if src_param is not None:
                src_view = _ensure_sendable(src_param)[op.my_slice].contiguous()
                if submit_send_with_id is not None and op.task_id is not None:
                    submit_send_with_id(op.task_id, src_view, op.peer_rank)
                else:
                    service.submit_send(src_view, op.peer_rank)

    # Submit recvs (only if we have destination model)
    # Writebacks: each entry is either
    #   ('default', recv_buffer, dst_param, dst_slice)  or
    #   ('transform', param_name, dst_slice, [recv_buffers])
    recv_writebacks: list = []

    for op in plan.recv_ops:
        if transform is not None and transform.should_transform(op.param_name):
            recv_bufs = transform.prepare_recv(op.param_name, op.my_slice)
            for buf in recv_bufs:
                if submit_recv_with_id is not None and op.task_id is not None:
                    submit_recv_with_id(op.task_id, buf, op.peer_rank)
                else:
                    service.submit_recv(buf, op.peer_rank)
            recv_writebacks.append(('transform', op.param_name, op.my_slice, recv_bufs))
        else:
            dst_param = dst_params.get(op.param_name)
            if dst_param is not None:
                dst_slice_view = dst_param.data[op.my_slice]
                recv_buffer = torch.empty_like(dst_slice_view.contiguous())
                if submit_recv_with_id is not None and op.task_id is not None:
                    submit_recv_with_id(op.task_id, recv_buffer, op.peer_rank)
                else:
                    service.submit_recv(recv_buffer, op.peer_rank)
                recv_writebacks.append(('default', recv_buffer, dst_param, op.my_slice))

    # Execute
    logger.info(f"Executing {len(plan.send_ops)} sends + {len(plan.recv_ops)} recvs")
    service.run()
    torch.cuda.synchronize()
    dist.barrier(group=group)

    # Write back received buffers into their destination parameter slices.
    #
    # For quantized destination params (fp8_param=true on receiver),
    # accumulate ALL BF16 slices per-param before calling quantize_() once.
    # This avoids corrupting MXFP8 per-block scales from partial-slice updates.
    pending_quantized: dict[int, tuple[torch.nn.Parameter, torch.Tensor, list]] = {}

    for wb in recv_writebacks:
        with torch.no_grad():
            if wb[0] == 'transform':
                _, param_name, dst_slice, recv_bufs = wb
                transform.finalize_recv(param_name, dst_slice, recv_bufs)
            else:
                _, recv_buffer, dst_param, dst_slice = wb
                if _is_mxfp8_tensor(dst_param):
                    # Accumulate BF16 slices for deferred quantization
                    param_id = id(dst_param)
                    if param_id not in pending_quantized:
                        full_bf16 = dst_param.dequantize().clone()
                        pending_quantized[param_id] = (dst_param, full_bf16, [])
                    pending_quantized[param_id][2].append((dst_slice, recv_buffer))
                    pending_quantized[param_id][1][dst_slice].copy_(recv_buffer)
                else:
                    dst_param.data[dst_slice].copy_(recv_buffer)

    # Finalize deferred quantized param updates
    for param_id, (dst_param, full_bf16, slices) in pending_quantized.items():
        with torch.no_grad():
            dst_param.quantize_(full_bf16)

    # Ensure all writeback copies are visible to subsequent CUDA ops (e.g. CUDA
    # graph warmup).  The synchronize() above fires *before* the writeback loop,
    # so without this second sync the .copy_() kernels are still async when
    # execute_reshard_plan returns — creating a race with callers that immediately
    # inspect or capture (via CUDA graphs) the destination parameters.
    torch.cuda.synchronize()

    logger.info("Reshard complete")


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""
NVSHMEM-based remote copy service and supporting components.

This package is an in-tree integration of the standalone
`nvshmem_copy_service/python` implementation so that Megatron
can use it without relying on an external library.
"""

from .compat import ensure_nvshmem_compat

ensure_nvshmem_compat()

from . import nvshmem_types
from .core import GPUResourceManager, KernelLauncher, PipelineExecutor
from .memory import DoubleBufferManager, TensorPointerExtractor
from .planning import CommunicationScheduler, GPUExecutionPlanner, TaskSegmenter, WorkloadPacker
from .service import RemoteCopyService

__all__ = [
    "RemoteCopyService",
    "nvshmem_types",
    "GPUResourceManager",
    "KernelLauncher",
    "PipelineExecutor",
    "DoubleBufferManager",
    "TensorPointerExtractor",
    "CommunicationScheduler",
    "GPUExecutionPlanner",
    "TaskSegmenter",
    "WorkloadPacker",
]


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/compat.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""
Compatibility layer for cuda-core version differences.

cuda-core >=0.5 removed the ``cuda.core.experimental._memory`` and
``cuda.core.experimental._stream`` private submodules, but nvshmem4py
still imports from them.  We register ``sys.modules`` shims so those
imports resolve to the new ``cuda.core._memory`` / ``cuda.core._stream``
paths.

This module should be imported before any nvshmem.core usage.
"""

import importlib
import sys


def _patch_cuda_core_experimental():
    """Register cuda.core._memory / _stream as cuda.core.experimental._memory / _stream."""
    for submod in ("_memory", "_stream"):
        exp_key = f"cuda.core.experimental.{submod}"
        new_key = f"cuda.core.{submod}"
        if exp_key not in sys.modules:
            try:
                sys.modules[exp_key] = importlib.import_module(new_key)
            except ImportError:
                pass  # old cuda-core that still has experimental._memory


def get_cuda_core_device_class():
    """Return the ``Device`` class from whichever cuda-core location is available.

    cuda-core <0.5: ``cuda.core.experimental.Device``
    cuda-core >=0.5: ``cuda.core.Device``
    """
    try:
        from cuda.core import Device

        return Device
    except ImportError:
        from cuda.core.experimental import Device

        return Device


def ensure_nvshmem_compat():
    """Apply all compatibility patches.  Safe to call multiple times."""
    _patch_cuda_core_experimental()


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/core/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Core execution components for NVSHMEM operations."""

from .gpu_resource_manager import GPUResourceManager
from .kernel_launcher import KernelLauncher
from .pipeline_executor import PipelineExecutor

__all__ = ["GPUResourceManager", "KernelLauncher", "PipelineExecutor"]


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/core/gpu_resource_manager.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""
GPU resource management for NVSHMEM operations.

Handles NVSHMEM initialization, CUDA device setup, stream management,
and event lifecycle.
"""

import logging
from typing import Dict, Optional

from ..compat import ensure_nvshmem_compat, get_cuda_core_device_class

ensure_nvshmem_compat()

try:
    import nvshmem.core

    Device = get_cuda_core_device_class()

    HAVE_NVSHMEM = True
except ImportError:
    HAVE_NVSHMEM = False

import torch
import torch.distributed as dist

logger = logging.getLogger(__name__)


class GPUResourceManager:
    """Manages GPU resources including NVSHMEM, streams, and events."""

    def __init__(self):
        self.device = None
        self.my_pe: int = -1
        self.n_pes: int = -1
        self.initialized: bool = False

        # CUDA streams (cuda.core.experimental)
        self.pack_stream = None
        self.unpack_stream = None
        self.send_stream = None
        self.copy_stream = None

        # PyTorch stream wrappers
        self.torch_pack_stream_wrapper = None
        self.torch_unpack_stream_wrapper = None
        self.torch_send_stream_wrapper = None
        self.torch_copy_stream_wrapper = None

        # Stream name to PyTorch stream mapping
        self._torch_streams: Dict[str, torch.cuda.ExternalStream] = {}

    def init(self, group=None) -> None:
        """
        Initialize NVSHMEM, CUDA device, and streams.

        Expects torch.distributed to be already initialized.

        Args:
            group: Optional ProcessGroup for distributed operations.
                   If None, uses the default process group.
        """
        if self.initialized:
            return

        if not HAVE_NVSHMEM:
            raise RuntimeError(
                "nvshmem.core is not available. Please install nvshmem to use GPUResourceManager."
            )

        # torch.distributed must be initialized before calling this
        if not dist.is_initialized():
            raise RuntimeError(
                "torch.distributed must be initialized before " "GPUResourceManager.init()"
            )

        # Get current CUDA device (already set by caller based on LOCAL_RANK)
        local_rank = torch.cuda.current_device()

        # nvshmem4py requires a cuda.core Device at init time
        self.device = Device(local_rank)
        self.device.set_current()

        # Extract rank, nranks from the process group.
        # Use group.rank()/size() instead of dist.get_rank(group=) because
        # dist.get_rank(group=) maps via the default PG rank, which is wrong
        # for cross-cluster ProcessGroups where workers share default PG rank 0.
        num_ranks = group.size() if group is not None else dist.get_world_size()
        rank_id = group.rank() if group is not None else dist.get_rank()

        # Create/Broadcast UniqueID using broadcast_object_list
        uniqueid = nvshmem.core.get_unique_id(empty=True)
        if rank_id == 0:
            uniqueid = nvshmem.core.get_unique_id()
            broadcast_objects = [uniqueid]
        else:
            broadcast_objects = [None]

        # Broadcast ID to all ranks
        dist.broadcast_object_list(broadcast_objects, src=0, group=group)

        # Barrier to ensure everyone has the ID before NVSHMEM init
        dist.barrier(group=group)

        # Initialize NVSHMEM with the broadcasted UID
        nvshmem.core.init(
            device=self.device,
            uid=broadcast_objects[0],
            rank=rank_id,
            nranks=num_ranks,
            initializer_method="uid",
        )

        logger.info("NVSHMEM initialized")

        self.my_pe = nvshmem.core.my_pe()
        self.n_pes = nvshmem.core.n_pes()

        # Create CUDA streams
        self.pack_stream = self.device.create_stream()
        self.unpack_stream = self.device.create_stream()
        self.send_stream = self.device.create_stream()
        self.copy_stream = self.device.create_stream()

        # Get stream pointers and create PyTorch wrappers
        _, pack_stream_ptr = self.pack_stream.__cuda_stream__()
        _, unpack_stream_ptr = self.unpack_stream.__cuda_stream__()
        _, send_stream_ptr = self.send_stream.__cuda_stream__()
        _, copy_stream_ptr = self.copy_stream.__cuda_stream__()

        self.torch_pack_stream_wrapper = torch.cuda.ExternalStream(pack_stream_ptr)
        self.torch_unpack_stream_wrapper = torch.cuda.ExternalStream(unpack_stream_ptr)
        self.torch_send_stream_wrapper = torch.cuda.ExternalStream(send_stream_ptr)
        self.torch_copy_stream_wrapper = torch.cuda.ExternalStream(copy_stream_ptr)

        # Build stream mapping
        self._torch_streams = {
            "pack": self.torch_pack_stream_wrapper,
            "unpack": self.torch_unpack_stream_wrapper,
            "send": self.torch_send_stream_wrapper,
            "copy": self.torch_copy_stream_wrapper,
        }

        logger.info("Stream mapping built")

        self.initialized = True

        # Initial barrier to ensure all PEs are ready
        nvshmem.core.barrier_all(stream=self.send_stream)

    def get_stream(self, name: str):
        """
        Get CUDA stream by name.

        Args:
            name: Stream name ('pack', 'unpack', 'send', 'copy')

        Returns:
            CUDA stream object
        """
        streams = {
            "pack": self.pack_stream,
            "unpack": self.unpack_stream,
            "send": self.send_stream,
            "copy": self.copy_stream,
        }
        return streams.get(name)

    def get_torch_stream(self, name: str) -> Optional[torch.cuda.ExternalStream]:
        """
        Get PyTorch ExternalStream by name.

        Args:
            name: Stream name ('pack', 'unpack', 'send', 'copy')

        Returns:
            PyTorch ExternalStream
        """
        return self._torch_streams.get(name)

    def create_events(self, num_events: int = 2):
        """
        Create double-buffered CUDA events for pack, unpack, and barrier operations.

        Args:
            num_events: Number of events to create for each type
                (default: 2 for double buffering)

        Returns:
            tuple: (pack_events, unpack_events, barrier_events) lists of torch.cuda.Event
        """
        pack_events = [torch.cuda.Event(enable_timing=False) for _ in range(num_events)]
        unpack_events = [torch.cuda.Event(enable_timing=False) for _ in range(num_events)]
        barrier_events = [torch.cuda.Event(enable_timing=False) for _ in range(num_events)]
        return pack_events, unpack_events, barrier_events

    def finalize(self) -> None:
        """Cleanup resources (streams are automatically managed by CUDA)."""
        self.initialized = False
        self.my_pe = -1
        self.n_pes = -1
        # Streams are automatically cleaned up when objects are deleted


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/core/kernel_launcher.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""
CUDA kernel management and launching for pack/unpack operations.

Handles kernel compilation, launching, and stream coordination.
"""

import os
from typing import Any, Tuple

try:
    import cupy as cp

    HAVE_CUPY = True
except ImportError:
    HAVE_CUPY = False

import torch
import torch.cuda.nvtx as nvtx


class KernelLauncher:
    """Manages CUDA kernel loading and launching for data pack/unpack operations."""

    def __init__(self):
        self.chunked_copy_kernel = None
        # Cached CuPy stream wrappers for efficient kernel launching
        self.cp_pack_stream = None
        self.cp_unpack_stream = None

    def load_kernels(self) -> None:
        """Load and compile CUDA kernels from source."""
        if not HAVE_CUPY:
            raise RuntimeError("cupy is not available. Please install cupy to use KernelLauncher.")

        current_dir = os.path.dirname(os.path.abspath(__file__))
        kernel_path = os.path.join(current_dir, "..", "kernels", "chunked_kernel.cu")

        with open(kernel_path, "r") as f:
            kernel_source = f.read()

        self.chunked_copy_kernel = cp.RawKernel(
            kernel_source, "chunked_batched_copy_kernel", options=("-std=c++11",)
        )

    def set_streams(self, pack_stream, unpack_stream) -> None:
        """
        Cache CuPy stream wrappers for kernel launching.

        This eliminates per-launch overhead of stream pointer extraction
        and CuPy ExternalStream creation.

        Args:
            pack_stream: CUDA stream for pack operations
            unpack_stream: CUDA stream for unpack operations
        """
        _, pack_stream_ptr = pack_stream.__cuda_stream__()
        _, unpack_stream_ptr = unpack_stream.__cuda_stream__()
        self.cp_pack_stream = cp.cuda.ExternalStream(pack_stream_ptr)
        self.cp_unpack_stream = cp.cuda.ExternalStream(unpack_stream_ptr)

    def launch_pack(
        self,
        gpu_plan: Tuple[Any, Any, Any, int],
        pack_stream,
        torch_pack_stream_wrapper: torch.cuda.ExternalStream,
        pack_event: torch.cuda.Event,
    ) -> None:
        """
        Launch pack kernel to copy data from user tensors to send buffer.

        Args:
            gpu_plan: Tuple of (cp_src_addrs, cp_dst_addrs, cp_sizes, num_chunks)
                as CuPy arrays
            pack_stream: CUDA stream (cuda.core.experimental.Stream) - unused,
                kept for compatibility
            torch_pack_stream_wrapper: PyTorch external stream wrapper
            pack_event: CUDA event to record after kernel launch
        """
        nvtx.range_push("Launch Pack Kernel")
        if not gpu_plan:
            nvtx.range_pop()
            return

        # Unpack cached CuPy arrays from gpu_plan
        cp_src, cp_dst, cp_sizes, num_chunks = gpu_plan

        # Grid/Block configuration
        THREADS_PER_BLOCK = 1024
        NUM_BLOCKS = 75

        # Launch kernel using cached CuPy stream
        assert self.chunked_copy_kernel is not None
        assert self.cp_pack_stream is not None
        self.chunked_copy_kernel(
            (NUM_BLOCKS,),
            (THREADS_PER_BLOCK,),
            (cp_src, cp_dst, cp_sizes, num_chunks),
            stream=self.cp_pack_stream,
        )
        nvtx.range_pop()
        # Record event on PyTorch stream
        pack_event.record(stream=torch_pack_stream_wrapper)

    def launch_unpack(
        self,
        gpu_plan: Tuple[Any, Any, Any, int],
        unpack_stream,
        torch_unpack_stream_wrapper: torch.cuda.ExternalStream,
        unpack_event: torch.cuda.Event,
    ) -> None:
        """
        Launch unpack kernel to copy data from receive buffer to user tensors.

        Args:
            gpu_plan: Tuple of (cp_src_addrs, cp_dst_addrs, cp_sizes, num_chunks)
                as CuPy arrays
            unpack_stream: CUDA stream (cuda.core.experimental.Stream) - unused,
            kept for compatibility
            torch_unpack_stream_wrapper: PyTorch external stream wrapper
            unpack_event: CUDA event to record after kernel launch
        """
        nvtx.range_push("Launch Unpack Kernel")
        if not gpu_plan:
            nvtx.range_pop()
            return

        # Unpack cached CuPy arrays from gpu_plan
        cp_src, cp_dst, cp_sizes, num_chunks = gpu_plan

        # Grid/Block configuration
        THREADS_PER_BLOCK = 1024
        NUM_BLOCKS = 75

        # Launch kernel using cached CuPy stream
        assert self.chunked_copy_kernel is not None
        assert self.cp_unpack_stream is not None
        self.chunked_copy_kernel(
            (NUM_BLOCKS,),
            (THREADS_PER_BLOCK,),
            (cp_src, cp_dst, cp_sizes, num_chunks),
            stream=self.cp_unpack_stream,
        )
        nvtx.range_pop()
        # Record event on PyTorch stream
        unpack_event.record(stream=torch_unpack_stream_wrapper)


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/core/pipeline_executor.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""
Pipelined communication execution engine.

Orchestrates the pack/send/unpack pipeline with double-buffering
and proper stream synchronization.
"""

from typing import Dict, List, Optional

from ..compat import ensure_nvshmem_compat

ensure_nvshmem_compat()

try:
    import nvshmem.core

    HAVE_NVSHMEM = True
except ImportError:
    HAVE_NVSHMEM = False

import torch

from ..logger import PELogger
from ..memory.double_buffer_manager import DoubleBufferManager
from ..nvshmem_types import ReceiveRequest, ScheduledBatch, SendRequest
from .kernel_launcher import KernelLauncher


class PipelineExecutor:
    """Executes pipelined NVSHMEM communication with pack/send/unpack overlap."""

    def __init__(
        self, kernel_launcher: KernelLauncher, buffer_manager: DoubleBufferManager, my_pe: int
    ):
        """
        Initialize pipeline executor.

        Args:
            kernel_launcher: KernelLauncher instance for pack/unpack kernels
            buffer_manager: DoubleBufferManager for send/recv buffers
            my_pe: This PE's rank
        """
        self.kernel_launcher = kernel_launcher
        self.buffer_manager = buffer_manager
        self.my_pe = my_pe

        # Streams (will be set by service)
        self.pack_stream = None
        self.unpack_stream = None
        self.send_stream = None
        self.copy_stream = None

        self.torch_pack_stream_wrapper = None
        self.torch_unpack_stream_wrapper = None
        self.torch_send_stream_wrapper = None
        self.torch_copy_stream_wrapper = None

        # Events for double-buffered synchronization
        self.pack_events = []
        self.unpack_events = []
        self.barrier_events = []

    def set_streams(
        self,
        pack_stream,
        unpack_stream,
        send_stream,
        copy_stream,
        torch_pack_stream_wrapper,
        torch_unpack_stream_wrapper,
        torch_send_stream_wrapper,
        torch_copy_stream_wrapper,
    ):
        """Set CUDA streams for execution."""
        self.pack_stream = pack_stream
        self.unpack_stream = unpack_stream
        self.send_stream = send_stream
        self.copy_stream = copy_stream

        self.torch_pack_stream_wrapper = torch_pack_stream_wrapper
        self.torch_unpack_stream_wrapper = torch_unpack_stream_wrapper
        self.torch_send_stream_wrapper = torch_send_stream_wrapper
        self.torch_copy_stream_wrapper = torch_copy_stream_wrapper

    def set_events(self, pack_events: List, unpack_events: List, barrier_events: List):
        """Set double-buffered CUDA events."""
        self.pack_events = pack_events
        self.unpack_events = unpack_events
        self.barrier_events = barrier_events

    def execute_pipeline(
        self, iter_schedules: List[Dict[str, Optional[ScheduledBatch]]], num_iterations: int
    ) -> None:
        """
        Execute pipelined communication.

        Pipeline stages:
        1. Pack NEXT iteration (async)
        2. Unpack PRIOR iteration (async)
        3. Send CURRENT iteration
        4. Barrier + record barrier event
        5. Wait for async pack/unpack to complete

        Cross-stream synchronization uses lightweight CUDA events instead of
        cudaDeviceSynchronize (torch.cuda.synchronize). The pack kernel includes
        __threadfence_system() to ensure writes are visible to the NIC's DMA
        engine, and barrier_events propagate NVSHMEM RDMA completion from
        send_stream to unpack_stream.

        Args:
            iter_schedules: List of iteration schedules
            num_iterations: Total number of iterations
        """
        PELogger.info(f"Executing pipeline: {num_iterations} iterations")

        # Priming: Pack iteration 0 (async, no CPU sync needed —
        # step 3 uses GPU-level event wait for pack→put ordering)
        if num_iterations > 0 and iter_schedules[0]["send"]:
            torch.cuda.nvtx.range_push("Priming")
            PELogger.debug("Priming: Packing iteration 0")
            self._launch_pack(0, iter_schedules[0]["send"])
            torch.cuda.nvtx.range_pop()

        for i in range(num_iterations):
            torch.cuda.nvtx.range_push(f"Iteration {i}")
            has_send = iter_schedules[i]["send"] is not None
            has_recv = iter_schedules[i]["recv"] is not None
            has_next_send = i + 1 < num_iterations and iter_schedules[i + 1]["send"] is not None
            has_prior_recv = i > 0 and iter_schedules[i - 1]["recv"] is not None

            slot = i % 2

            # Log iteration start
            send_info = (
                f" → PE {iter_schedules[i]['send'].dest_pe} "
                f"({iter_schedules[i]['send'].total_size} bytes)"
                if has_send
                else ""
            )
            recv_info = (
                f" ← PE {iter_schedules[i]['recv'].src_pe} "
                f"({iter_schedules[i]['recv'].total_size} bytes)"
                if has_recv
                else ""
            )
            PELogger.debug(f"Iteration {i}/{num_iterations}: slot={slot}{send_info}{recv_info}")

            # Step 1: Pack NEXT iteration (async)
            if has_next_send:
                torch.cuda.nvtx.range_push("Step 1: Pack Next")
                next_batch = iter_schedules[i + 1]["send"]
                assert next_batch is not None
                PELogger.debug(
                    f"  Pack next (iter {i+1}): {len(next_batch.tasks)} tasks "
                    f"→ PE {next_batch.dest_pe}"
                )
                self._launch_pack(i + 1, next_batch)
                torch.cuda.nvtx.range_pop()

            # Step 2: Unpack PRIOR iteration (async)
            if has_prior_recv:
                torch.cuda.nvtx.range_push("Step 2: Unpack Prior")
                prior_batch = iter_schedules[i - 1]["recv"]
                assert prior_batch is not None
                PELogger.debug(
                    f"  Unpack prior (iter {i-1}): {prior_batch.total_size} bytes "
                    f"← PE {prior_batch.src_pe}"
                )
                # GPU-level event wait: ensures send_stream's barrier_all from
                # the prior iteration has completed before unpack_stream proceeds.
                self.torch_unpack_stream_wrapper.wait_event(self.barrier_events[(i - 1) % 2])
                self._launch_unpack(i - 1, prior_batch)
                torch.cuda.nvtx.range_pop()

            # Step 3: Send CURRENT iteration
            if has_send:
                torch.cuda.nvtx.range_push("Step 3: Send Current")
                batch = iter_schedules[i]["send"]
                assert batch is not None
                transfer_size = batch.total_size
                PELogger.debug(f"  Send current: {transfer_size} bytes → PE {batch.dest_pe}")

                # GPU-level event wait: ensures pack data in send_slot is visible
                # to send_stream before NVSHMEM put reads it. The pack kernel's
                # __threadfence_system() guarantees the writes are also visible to
                # the NIC's DMA engine.
                self.torch_send_stream_wrapper.wait_event(self.pack_events[slot])

                nvshmem.core.put(
                    self.buffer_manager.recv_slots[slot][0:transfer_size],
                    self.buffer_manager.send_slots[slot][0:transfer_size],
                    batch.dest_pe,
                    stream=self.send_stream,
                )
                torch.cuda.nvtx.range_pop()

            # Step 4a: Wait for prior unpack to complete BEFORE the barrier.
            torch.cuda.nvtx.range_push("Step 4a: Wait Unpack")
            if has_prior_recv:
                self.unpack_events[(i - 1) % 2].synchronize()
            torch.cuda.nvtx.range_pop()

            # Ensure all NVSHMEM operations on send_stream complete (stream-ordered)
            nvshmem.core.quiet(stream=self.send_stream)

            # Step 4b: Global barrier + CPU sync + record event
            torch.cuda.nvtx.range_push("Step 4b: Barrier")
            nvshmem.core.barrier_all(stream=self.send_stream)
            # CPU-sync the send_stream to ensure barrier_all has actually
            # completed (not just submitted). Without this, the barrier_event
            # can fire before RDMA data from the remote PE is visible, because
            # stream-ordered operations are only guaranteed to be submitted,
            # not completed, when the event is recorded.
            self.torch_send_stream_wrapper.synchronize()
            self.barrier_events[slot].record(stream=self.torch_send_stream_wrapper)
            torch.cuda.nvtx.range_pop()

            # Step 5: Wait for async pack to complete (double-buffer safety)
            torch.cuda.nvtx.range_push("Step 5: Wait Pack")
            if has_next_send:
                self.pack_events[(i + 1) % 2].synchronize()
            torch.cuda.nvtx.range_pop()

            torch.cuda.nvtx.range_pop()

        # Final unpack for last iteration
        if num_iterations > 0 and iter_schedules[num_iterations - 1]["recv"]:
            torch.cuda.nvtx.range_push("Final Unpack")
            PELogger.debug(f"Final unpack: iteration {num_iterations-1}")
            last_recv = iter_schedules[num_iterations - 1]["recv"]
            assert last_recv is not None
            # GPU-level event wait for NVSHMEM RDMA data visibility
            self.torch_unpack_stream_wrapper.wait_event(
                self.barrier_events[(num_iterations - 1) % 2]
            )
            self._launch_unpack(num_iterations - 1, last_recv)
            self.unpack_events[(num_iterations - 1) % 2].synchronize()
            torch.cuda.nvtx.range_pop()

        PELogger.info(f"Pipeline complete: {num_iterations} iterations")

    def _launch_pack(self, iteration: int, batch: ScheduledBatch) -> None:
        """Launch pack kernel for given iteration."""
        if not batch.gpu_plan:
            return

        self.kernel_launcher.launch_pack(
            batch.gpu_plan,
            self.pack_stream,
            self.torch_pack_stream_wrapper,
            self.pack_events[iteration % 2],
        )

    def _launch_unpack(self, iteration: int, batch: ScheduledBatch) -> None:
        """Launch unpack kernel for given iteration."""
        if not batch.gpu_plan:
            return

        self.kernel_launcher.launch_unpack(
            batch.gpu_plan,
            self.unpack_stream,
            self.torch_unpack_stream_wrapper,
            self.unpack_events[iteration % 2],
        )

    def process_self_moves(
        self, send_requests: List[SendRequest], receive_requests: List[ReceiveRequest]
    ) -> None:
        """
        Handle same-PE transfers (where src_pe == dest_pe == my_pe).

        Uses PyTorch copy on the copy stream for efficiency.

        Args:
            send_requests: List of send requests
            receive_requests: List of receive requests
        """
        # Match send/recv requests where src_pe == dest_pe == my_pe
        local_sends = {r.task_id: r for r in send_requests if r.dest_pe == self.my_pe}
        local_recvs = [r for r in receive_requests if r.src_pe == self.my_pe]

        if local_recvs:
            PELogger.debug(f"Processing {len(local_recvs)} self-moves")

        num_processed = 0
        with torch.cuda.stream(self.torch_copy_stream_wrapper):
            for recv_req in local_recvs:
                if recv_req.task_id in local_sends:
                    send_req = local_sends[recv_req.task_id]
                    PELogger.debug(
                        "  Self-move: task_id=%d, size=%d bytes", recv_req.task_id, send_req.size
                    )

                    # Create views of the tensors with offsets
                    src_view = send_req.src_tensor[
                        send_req.src_pos : send_req.src_pos + send_req.size
                    ]
                    dest_view = recv_req.dest_tensor[
                        recv_req.dest_pos : recv_req.dest_pos + send_req.size
                    ]

                    # Async copy on the copy stream
                    dest_view.copy_(src_view, non_blocking=True)
                    num_processed += 1

        # Synchronize the PyTorch stream
        self.torch_copy_stream_wrapper.synchronize()

        if num_processed > 0:
            PELogger.info("Self-moves complete: %d transfers", num_processed)


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/kernels/chunked_kernel.cu
================================================

#include <cuda_runtime.h>

// CUDA-compatible types (no C++ standard library headers for NVRTC)
typedef unsigned char uint8_t;
typedef unsigned long long uint64_t;
typedef uint64_t uintptr_t;

// ============================================================================
// Kernel Configuration Constants (from ChunkedKernel.h)
// ============================================================================

constexpr int CHUNK_SIZE = 128 * 1024;       // 128KB per chunk
constexpr int NUM_BLOCKS = 75;               // Fixed grid size
constexpr int THREADS_PER_BLOCK = 1024;      // Fixed block size
constexpr int FLOAT4_SIZE = 16;              // 16 bytes per float4
constexpr int MAX_CHUNKS_PER_BLOCK = 512;    // Max chunks per block for shared memory

extern "C" {

/**
 * Chunked batched copy kernel implementation
 *
 * This kernel performs efficient batched memory copies using:
 * 1. Contiguous block assignment for better load balancing
 * 2. Shared memory prefetching of chunk metadata
 * 3. Vectorized float4 (16-byte) copies for aligned data
 * 4. Byte-by-byte fallback for unaligned or small data
 */
__global__ void chunked_batched_copy_kernel(
    uint8_t** src_addrs,
    uint8_t** dst_addrs,
    size_t* sizes,
    int total_chunks
) {
    // Shared memory for metadata prefetching
    __shared__ uint8_t* s_src_addrs[MAX_CHUNKS_PER_BLOCK];
    __shared__ uint8_t* s_dst_addrs[MAX_CHUNKS_PER_BLOCK];
    __shared__ size_t s_sizes[MAX_CHUNKS_PER_BLOCK];

    // Contiguous block assignment: block i processes chunks [start_chunk, end_chunk)
    int chunks_per_block = (total_chunks + gridDim.x - 1) / gridDim.x;  // Ceiling division
    int start_chunk = blockIdx.x * chunks_per_block;
    int end_chunk = start_chunk + chunks_per_block;
    if (end_chunk > total_chunks) {
        end_chunk = total_chunks;
    }
    int num_chunks_this_block = end_chunk - start_chunk;

    // Phase 1: Cooperative loading of metadata to shared memory
    // All 1024 threads cooperate to load metadata from global memory
    for (int i = threadIdx.x; i < num_chunks_this_block; i += blockDim.x) {
        int global_chunk_id = start_chunk + i;
        s_src_addrs[i] = src_addrs[global_chunk_id];
        s_dst_addrs[i] = dst_addrs[global_chunk_id];
        s_sizes[i] = sizes[global_chunk_id];
    }
    __syncthreads();

    // Phase 2: Process each chunk using metadata from shared memory
    for (int chunk_id = 0; chunk_id < num_chunks_this_block; chunk_id++) {
        uint8_t* src = s_src_addrs[chunk_id];
        uint8_t* dst = s_dst_addrs[chunk_id];
        size_t size = s_sizes[chunk_id];

        // Check if both src and dst are aligned to 16 bytes for float4 access
        uintptr_t src_addr = (uintptr_t)src;
        uintptr_t dst_addr = (uintptr_t)dst;
        bool is_aligned = ((src_addr % FLOAT4_SIZE) == 0) && ((dst_addr % FLOAT4_SIZE) == 0);

        if (is_aligned && size >= FLOAT4_SIZE) {
            // Fast path: vectorized float4 copies
            size_t aligned_size = (size / FLOAT4_SIZE) * FLOAT4_SIZE;

            // All 1024 threads cooperate on float4 copies
            #pragma unroll 4
            for (size_t offset = threadIdx.x * FLOAT4_SIZE;
                 offset < aligned_size;
                 offset += blockDim.x * FLOAT4_SIZE) {
                // Vectorized 16-byte load and store
                float4 data = *((float4*)(src + offset));
                *((float4*)(dst + offset)) = data;
            }

            // Handle remaining bytes (< 16 bytes) with byte-by-byte copy
            for (size_t offset = aligned_size + threadIdx.x;
                 offset < size;
                 offset += blockDim.x) {
                dst[offset] = src[offset];
            }
        } else {
            // Fallback path: byte-by-byte copy for unaligned addresses
            // Still use all threads for parallelism
            for (size_t offset = threadIdx.x; offset < size; offset += blockDim.x) {
                dst[offset] = src[offset];
            }
        }
    }

    // System-wide memory fence: ensures all writes from this thread are visible
    // to system agents (NIC DMA engines, other GPUs via PCIe/NVLink).
    // Required so that NVSHMEM put() on another CUDA stream can read correct
    // pack data from the send buffer. Without this, writes may remain in L2
    // cache, invisible to the NIC's DMA engine.
    __threadfence_system();
}

}


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/logger.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""

Per-PE Logger with colored console and file output.


Similar to the C++ Logger implementation, provides:

- Per-PE colored console output

- Per-PE file logging

- Support for TRACE, DEBUG, INFO, SUMMARY, WARN, ERROR levels

"""

import logging
import os
from datetime import datetime
from typing import Optional


class ColoredFormatter(logging.Formatter):
    """Custom formatter that adds color codes for console output."""

    def __init__(self, fmt: str, pe_id: int, use_color: bool = True):
        super().__init__(fmt)
        self.pe_id = pe_id
        self.use_color = use_color

        # ANSI color codes matching C++ implementation
        self.colors = {
            0: "\033[31m",  # Red
            1: "\033[32m",  # Green
            2: "\033[33m",  # Yellow
            3: "\033[34m",  # Blue
            4: "\033[35m",  # Magenta
            5: "\033[36m",  # Cyan
            6: "\033[91m",  # Bright Red
            7: "\033[92m",  # Bright Green
        }
        self.reset = "\033[0m"

    def formatTime(self, record, datefmt=None):
        ct = self.converter(record.created)
        if datefmt:
            s = datetime.fromtimestamp(record.created).strftime(datefmt)
            # For file logs, replace %f with milliseconds
            if "%f" in datefmt:
                s = s.replace("%f", f"{int(record.msecs):03d}")
        else:
            s = datetime.fromtimestamp(record.created).strftime("%H:%M:%S")
            s = f"{s}.{int(record.msecs):03d}"
        return s

    def format(self, record):
        # Save original message
        original_msg = record.msg

        if self.use_color and self.pe_id >= 0:
            color = self.colors.get(self.pe_id, "\033[37m")  # White for others
            record.msg = f"{color}{record.msg}{self.reset}"

        result = super().format(record)

        # Restore original message for other handlers
        record.msg = original_msg

        return result


class PELogger:
    """Per-PE logger with colored console and file output."""

    _logger: Optional[logging.Logger] = None
    _pe_id: int = -1
    _level: int = logging.INFO

    @classmethod
    def init(cls, pe_id: int, level: str = "INFO", logs_dir: str = "logs"):
        """
        Initialize logger for this PE.

        Args:
            pe_id: Process element ID
            level: Log level (TRACE, DEBUG, INFO, WARN, ERROR)
            logs_dir: Directory for log files
        """
        cls._pe_id = pe_id

        # Convert level string to logging level
        level_map = {
            "TRACE": logging.DEBUG - 5,  # Custom level below DEBUG
            "DEBUG": logging.DEBUG,
            "INFO": logging.INFO,
            "SUMMARY": logging.INFO,
            "WARN": logging.WARNING,
            "WARNING": logging.WARNING,
            "ERROR": logging.ERROR,
            "CRITICAL": logging.CRITICAL,
        }
        cls._level = level_map.get(level.upper(), logging.INFO)

        # Create logs directory if it doesn't exist
        os.makedirs(logs_dir, exist_ok=True)

        # Create logger
        logger_name = f"PE_{pe_id}"
        cls._logger = logging.getLogger(logger_name)
        cls._logger.setLevel(cls._level)
        cls._logger.propagate = False

        # Remove existing handlers to avoid duplicates
        cls._logger.handlers.clear()

        # 1. Console handler with color
        console_handler = logging.StreamHandler()
        console_handler.setLevel(cls._level)
        console_format = "[PE %d] [%%(asctime)s] [%%(levelname)s] %%(message)s" % pe_id
        console_formatter = ColoredFormatter(console_format, pe_id, use_color=True)
        console_handler.setFormatter(console_formatter)
        cls._logger.addHandler(console_handler)

        # 2. File handler without color
        log_filename = os.path.join(logs_dir, f"pe_{pe_id}.log")
        file_handler = logging.FileHandler(log_filename, mode="w")
        file_handler.setLevel(cls._level)
        file_format = "[PE %d] [%%(asctime)s] [%%(levelname)s] %%(message)s" % pe_id
        file_formatter = ColoredFormatter(file_format, pe_id, use_color=False)
        file_handler.setFormatter(file_formatter)
        cls._logger.addHandler(file_handler)

    @classmethod
    def set_level(cls, level: str):
        """Set the logging level."""
        level_map = {
            "TRACE": logging.DEBUG - 5,
            "DEBUG": logging.DEBUG,
            "INFO": logging.INFO,
            "SUMMARY": logging.INFO,
            "WARN": logging.WARNING,
            "WARNING": logging.WARNING,
            "ERROR": logging.ERROR,
            "CRITICAL": logging.CRITICAL,
        }
        cls._level = level_map.get(level.upper(), logging.INFO)
        if cls._logger:
            cls._logger.setLevel(cls._level)
            for handler in cls._logger.handlers:
                handler.setLevel(cls._level)

    @classmethod
    def trace(cls, msg: str):
        """Log at TRACE level (most detailed)."""
        if cls._logger:
            cls._logger.log(logging.DEBUG - 5, msg)

    @classmethod
    def debug(cls, msg: str):
        """Log at DEBUG level."""
        if cls._logger:
            cls._logger.debug(msg)

    @classmethod
    def info(cls, msg: str):
        """Log at INFO level."""
        if cls._logger:
            cls._logger.info(msg)

    @classmethod
    def summary(cls, msg: str):
        """Log summary information (INFO level with [SUMMARY] prefix)."""
        if cls._logger:
            cls._logger.info(f"[SUMMARY] {msg}")

    @classmethod
    def warn(cls, msg: str):
        """Log at WARNING level."""
        if cls._logger:
            cls._logger.warning(msg)

    @classmethod
    def warning(cls, msg: str):
        """Log at WARNING level (alias for warn)."""
        cls.warn(msg)

    @classmethod
    def error(cls, msg: str):
        """Log at ERROR level."""
        if cls._logger:
            cls._logger.error(msg)

    @classmethod
    def critical(cls, msg: str):
        """Log at CRITICAL level."""
        if cls._logger:
            cls._logger.critical(msg)

    @classmethod
    def shutdown(cls):
        """Shutdown the logger and flush all handlers."""
        if cls._logger:
            for handler in cls._logger.handlers:
                handler.flush()
                handler.close()
            cls._logger.handlers.clear()
            cls._logger = None


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/memory/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Memory management utilities for NVSHMEM operations."""

from .double_buffer_manager import DoubleBufferManager
from .tensor_pointer_utils import TensorPointerExtractor

__all__ = ["DoubleBufferManager", "TensorPointerExtractor"]


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/memory/double_buffer_manager.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""
Double buffer management for NVSHMEM symmetric memory.

Manages send and receive buffers with double-buffering for pipelined communication.
"""

from ..compat import ensure_nvshmem_compat

ensure_nvshmem_compat()

try:
    import nvshmem.core.interop.torch

    HAVE_NVSHMEM = True
except ImportError:
    HAVE_NVSHMEM = False

import torch

from ..nvshmem_types import MAX_SEGMENT_SIZE


class DoubleBufferManager:
    """Manages double-buffered NVSHMEM symmetric buffers for send/receive operations."""

    def __init__(self, slot_size: int = MAX_SEGMENT_SIZE):
        """
        Initialize buffer manager.

        Args:
            slot_size: Size of each buffer slot in bytes (default: 256MB)
        """
        self.slot_size = slot_size
        self.send_slots = [None, None]
        self.recv_slots = [None, None]

    def allocate(self) -> None:
        """Allocate NVSHMEM symmetric buffers for double-buffering."""
        if not HAVE_NVSHMEM:
            raise RuntimeError(
                "nvshmem.core.interop.torch is not available. "
                "Please install nvshmem to use DoubleBufferManager."
            )

        for i in range(2):
            self.send_slots[i] = nvshmem.core.interop.torch.bytetensor(
                (self.slot_size,), dtype=torch.uint8
            )
            self.recv_slots[i] = nvshmem.core.interop.torch.bytetensor(
                (self.slot_size,), dtype=torch.uint8
            )
            # Zero out buffers
            self.send_slots[i].zero_()
            self.recv_slots[i].zero_()

    def get_send_slot(self, iteration: int):
        """
        Get send buffer for given iteration.

        Args:
            iteration: Iteration number

        Returns:
            NVSHMEM tensor for sending
        """
        return self.send_slots[iteration % 2]

    def get_recv_slot(self, iteration: int):
        """
        Get receive buffer for given iteration.

        Args:
            iteration: Iteration number

        Returns:
            NVSHMEM tensor for receiving
        """
        return self.recv_slots[iteration % 2]

    def free(self) -> None:
        """Free NVSHMEM symmetric buffers."""
        for i in range(2):
            if self.send_slots[i] is not None:
                nvshmem.core.interop.torch.free_tensor(self.send_slots[i])
                self.send_slots[i] = None
            if self.recv_slots[i] is not None:
                nvshmem.core.interop.torch.free_tensor(self.recv_slots[i])
                self.recv_slots[i] = None


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/memory/tensor_pointer_utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""
Utilities for extracting data pointers from different tensor types.

Supports PyTorch tensors, CuPy arrays, and raw integer pointers.
"""

from typing import Any

import torch


class TensorPointerExtractor:
    """Extract memory pointers from various tensor types."""

    @staticmethod
    def get_pointer(tensor: Any) -> int:
        """
        Extract the data pointer from a tensor.

        Args:
            tensor: Can be torch.Tensor, CuPy array, or raw int pointer

        Returns:
            int: Memory address of the tensor data

        Examples:

            >>> import torch

            >>> t = torch.zeros(100, device='cuda')

            >>> ptr = TensorPointerExtractor.get_pointer(t)

            >>> isinstance(ptr, int)

            True
        """
        if isinstance(tensor, torch.Tensor):
            return tensor.data_ptr()
        elif hasattr(tensor, "data"):  # CuPy array
            return tensor.data.ptr
        else:  # Assume raw integer pointer
            return tensor


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/nvshmem_types.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from dataclasses import dataclass, field
from typing import Any, List

# Constants
MAX_SEGMENT_SIZE = 256 * 1024 * 1024  # 256MB
MAX_TASKS_PER_BATCH = 10000


@dataclass
class SendRequest:
    """Container for a send operation request."""

    task_id: int
    src_tensor: Any  # cupy.ndarray or pointer
    src_pos: int
    size: int
    dest_pe: int


@dataclass
class ReceiveRequest:
    """Container for a receive operation request."""

    task_id: int
    dest_tensor: Any  # cupy.ndarray or pointer
    dest_pos: int
    size: int
    src_pe: int


@dataclass
class WorkloadGroup:
    """Container for a group of send requests to a specific destination PE."""

    dest_pe: int
    tasks: List[SendRequest] = field(default_factory=list)
    total_size: int = 0


@dataclass
class ScheduledBatch:
    """Metadata for a scheduled communication batch."""

    src_pe: int
    dest_pe: int
    batch_index: int
    iteration: int
    # Metadata for GPU execution
    gpu_plan: Any = None  # Placeholder for GPU-resident plan
    tasks: List[SendRequest] = field(default_factory=list)
    total_size: int = 0
    tasks_summary: Any = None  # WorkloadSummary


@dataclass
class WorkloadSummary:
    """Summary of a workload group for communication with other PEs."""

    total_size: int
    task_ids: List[int]
    task_sizes: List[int]


@dataclass
class TransferMetadata:
    """GPU-resident metadata for communication tasks."""

    ptrs: Any  # cupy array of uint64 (pointers)
    sizes: Any  # cupy array of uint64 (sizes)
    num_tasks: int
    total_size: int


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/planning/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Planning components for task segmentation, workload packing, and scheduling."""

from .communication_scheduler import CommunicationScheduler
from .gpu_execution_planner import GPUExecutionPlanner
from .task_segmenter import TaskSegmenter
from .workload_packer import WorkloadPacker

__all__ = ["CommunicationScheduler", "GPUExecutionPlanner", "TaskSegmenter", "WorkloadPacker"]


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/planning/communication_scheduler.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from typing import Dict, List, Tuple

from ..logger import PELogger
from ..nvshmem_types import ScheduledBatch, WorkloadGroup, WorkloadSummary


class CommunicationScheduler:
    """
    Builds a conflict-free, iteration-based schedule for communication.
    Ensures that in any given iteration, a PE is not overloaded.
    Uses greedy first-fit scheduling algorithm.
    """

    def __init__(self):
        self.num_iterations = 0

    def build_schedule(
        self, workloads: Dict[int, List[WorkloadGroup]], my_pe: int, n_pes: int, group=None
    ) -> Tuple[Dict[int, List[ScheduledBatch]], Dict[Tuple[int, int, int], WorkloadSummary]]:
        """
        Main scheduling method.
        1. Exchanges workload info with other PEs.
        2. Assigns batches to iterations.
        3. Returns:
           - local schedule (iteration -> list of batches)
           - global workload summaries (key: (src, dest, batch_idx) -> summary)

        Args:
            workloads: Dict mapping destination PE to list of workload groups.
            my_pe: This PE's rank.
            n_pes: Total number of PEs.
            group: Optional ProcessGroup for distributed operations.
        """
        total_local_batches = sum(len(groups) for groups in workloads.values())
        PELogger.info(f"Building schedule: {total_local_batches} local batches, {n_pes} PEs")

        # Step 1: Collect all batches across all PE pairs
        PELogger.debug("Collecting batches from all PEs...")
        all_batches = self._collect_all_batches(workloads, my_pe, n_pes, group=group)
        PELogger.debug(f"Collected {len(all_batches)} total batches globally")

        # Step 2: Assign batches to iterations using greedy conflict-free algorithm
        PELogger.debug("Assigning batches to iterations using greedy conflict-free algorithm...")
        self._assign_iterations(all_batches)
        PELogger.info(f"Schedule built: {self.num_iterations} iterations")

        # Step 3: Exchange detailed workload summaries (Task IDs/Sizes)
        # This is needed for receivers to know what tasks are in each batch
        PELogger.debug("Exchanging workload summaries...")
        global_summaries = self._exchange_workload_summaries(workloads, my_pe, n_pes, group=group)
        PELogger.debug(f"Exchanged {len(global_summaries)} workload summaries")

        # Step 4: Build schedule map for this PE
        my_batches = [b for b in all_batches if b.src_pe == my_pe or b.dest_pe == my_pe]
        my_batches.sort(key=lambda x: x.iteration)

        final_schedule: Dict[int, List[ScheduledBatch]] = {}
        for b in my_batches:
            final_schedule.setdefault(b.iteration, []).append(b)

        return final_schedule, global_summaries

    def _collect_all_batches(
        self, workloads: Dict[int, List[WorkloadGroup]], my_pe: int, n_pes: int, group=None
    ) -> List[ScheduledBatch]:
        """
        Exchanges batch counts and details with all PEs to build a global view.
        Uses torch.distributed for reliable communication.
        """
        import torch.distributed as dist

        # Build local batch list
        local_batches: List[Tuple[int, int, int]] = []
        for dest_pe, groups in workloads.items():
            if dest_pe == my_pe:
                continue
            for i, _ in enumerate(groups):
                local_batches.append((my_pe, dest_pe, i))  # (src, dest, batch_idx)

        PELogger.debug(f"  Local batch count: {len(local_batches)}")
        PELogger.debug(f"  Local batches: {local_batches}")

        # Gather all batches from all PEs using torch.distributed
        all_batches_list: List[List[Tuple[int, int, int]] | None] = [None] * n_pes
        dist.all_gather_object(all_batches_list, local_batches, group=group)

        # Flatten into global batch list
        global_batches: List[ScheduledBatch] = []
        for pe_batches in all_batches_list:
            if pe_batches is None:
                continue
            for src, dest, idx in pe_batches:
                global_batches.append(
                    ScheduledBatch(src_pe=src, dest_pe=dest, batch_index=idx, iteration=-1)
                )

        PELogger.debug(f"  Global batches collected: {len(global_batches)} total")

        # Group by source for readability
        batches_by_src: Dict[int, List[Tuple[int, int]]] = {}
        for b in global_batches:
            batches_by_src.setdefault(b.src_pe, []).append((b.dest_pe, b.batch_index))
        for src_pe in sorted(batches_by_src.keys()):
            PELogger.debug(f"    PE {src_pe} sends to: {batches_by_src[src_pe]}")

        return global_batches

    def _assign_iterations(self, batches: List[ScheduledBatch]):
        """
        Greedy first-fit scheduling algorithm.

        Assigns batches to iterations using simple greedy first-fit.
        Processes batches in sorted order and assigns each to the first
        available iteration with no conflicts.
        """
        self.num_iterations = 0

        # Calculate degree (conflict count) for each batch
        def calc_degree(batch: ScheduledBatch, all_batches: List[ScheduledBatch]) -> int:
            """Count how many other batches conflict with this batch."""
            conflicts = 0
            batch_pes = {batch.src_pe, batch.dest_pe}
            for other in all_batches:
                if other is batch:
                    continue
                other_pes = {other.src_pe, other.dest_pe}
                # Conflict if they share any PE
                if batch_pes & other_pes:
                    conflicts += 1
            return conflicts

        def has_conflict(batch: ScheduledBatch, iteration_state: Dict) -> bool:
            """
            Check if a batch conflicts with an iteration's current PE usage.

            A batch conflicts if either its source or destination PE is already
            being used (as sender or receiver) in the iteration.

            Args:
                batch: The batch to check
                iteration_state: Dict with 'src_pes' and 'dst_pes' sets

            Returns:
                True if there's a conflict, False if the batch can be scheduled
            """
            return (
                batch.src_pe in iteration_state['src_pes']
                or batch.src_pe in iteration_state['dst_pes']
                or batch.dest_pe in iteration_state['src_pes']
                or batch.dest_pe in iteration_state['dst_pes']
            )

        # Sort batches: process batches with more potential conflicts first
        # This heuristic (largest-degree-first) often produces better colorings
        # Sort by degree (descending), then total_size (descending) for tie-breaking
        batches.sort(key=lambda b: (-calc_degree(b, batches), -b.total_size))

        # Track which PEs are busy (sending or receiving) in each iteration
        # iteration -> {src_pes: set, dst_pes: set}
        iteration_usage = []

        for batch in batches:
            # Find first iteration where this batch fits (no conflicts)
            assigned = False
            for iter_idx in range(len(iteration_usage)):
                if not has_conflict(batch, iteration_usage[iter_idx]):
                    # No conflict - assign to this iteration
                    batch.iteration = iter_idx
                    iteration_usage[iter_idx]['src_pes'].add(batch.src_pe)
                    iteration_usage[iter_idx]['dst_pes'].add(batch.dest_pe)
                    assigned = True
                    PELogger.debug(
                        f"  Assigned batch ({batch.src_pe} → {batch.dest_pe}, "
                        f"idx={batch.batch_index}) to iteration {iter_idx}"
                    )
                    break

            if not assigned:
                # Need a new iteration
                new_iter = len(iteration_usage)
                batch.iteration = new_iter
                iteration_usage.append({'src_pes': {batch.src_pe}, 'dst_pes': {batch.dest_pe}})
                PELogger.debug(
                    f"  Assigned batch ({batch.src_pe} → {batch.dest_pe}, "
                    f"idx={batch.batch_index}) to NEW iteration {new_iter}"
                )

        self.num_iterations = len(iteration_usage)
        PELogger.info(
            f"Greedy scheduling: {len(batches)} batches → {self.num_iterations} iterations"
        )

    def _exchange_workload_summaries(
        self, workloads: Dict[int, List[WorkloadGroup]], my_pe: int, n_pes: int, group=None
    ) -> Dict[Tuple[int, int, int], WorkloadSummary]:
        """
        Exchange detailed workload content using torch.distributed.
        Simple and reliable - no NVSHMEM symmetric memory issues.
        """
        import torch.distributed as dist

        # Build local summaries as a simple dict:
        # (src, dest, batch_idx) -> {total_size, task_ids, task_sizes}
        local_summaries: Dict[Tuple[int, int, int], Dict[str, object]] = {}
        batch_count = 0
        total_tasks = 0

        for dest_pe, groups in workloads.items():
            if dest_pe == my_pe:
                continue
            for batch_idx, wl_group in enumerate(groups):
                key = (my_pe, dest_pe, batch_idx)
                local_summaries[key] = {
                    "total_size": wl_group.total_size,
                    "task_ids": [t.task_id for t in wl_group.tasks],
                    "task_sizes": [t.size for t in wl_group.tasks],
                }
                batch_count += 1
                total_tasks += len(wl_group.tasks)

        PELogger.debug(f"  Local summaries: {batch_count} batches, {total_tasks} tasks")

        # Gather all summaries from all PEs using torch.distributed
        all_summaries_list: List[Dict[Tuple[int, int, int], Dict[str, object]] | None] = [
            None
        ] * n_pes
        dist.all_gather_object(all_summaries_list, local_summaries, group=group)

        # Merge into global map
        global_map: Dict[Tuple[int, int, int], WorkloadSummary] = {}
        for pe_summaries in all_summaries_list:
            if pe_summaries is None:
                continue
            for key, data in pe_summaries.items():
                summary = WorkloadSummary(
                    total_size=int(data["total_size"]),
                    task_ids=list(data["task_ids"]),
                    task_sizes=list(data["task_sizes"]),
                )
                global_map[key] = summary

        PELogger.debug(f"  Exchanged {len(global_map)} workload summaries")
        return global_map


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/planning/gpu_execution_planner.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""
GPU execution planning for pack/unpack operations.

Converts high-level task descriptions into GPU-ready metadata
(pointer arrays, sizes, chunking) for kernel execution.
"""

from typing import Dict, List, Optional, Tuple

try:
    import cupy as cp

    HAVE_CUPY = True
except ImportError:
    HAVE_CUPY = False

import torch

from ..logger import PELogger
from ..memory.tensor_pointer_utils import TensorPointerExtractor
from ..nvshmem_types import ReceiveRequest, ScheduledBatch


class GPUExecutionPlanner:
    """Plans GPU kernel execution by building pointer arrays and metadata."""

    def __init__(self):
        self.tensor_utils = TensorPointerExtractor()
        self.CHUNK_SIZE = 128 * 1024  # 128KB chunks

    def create_gpu_plans(
        self,
        iter_schedules: List[Dict[str, Optional[ScheduledBatch]]],
        send_slots: List,
        recv_slots: List,
        receive_requests: List[ReceiveRequest],
    ) -> None:
        """
        Build GPU execution plans for all iterations.

        Modifies iter_schedules in-place by adding gpu_plan to each batch.

        Args:
            iter_schedules: List of iteration schedules (dicts with 'send' and 'recv')
            send_slots: List of send buffer slots
            recv_slots: List of receive buffer slots
            receive_requests: List of all receive requests for matching
        """
        if not HAVE_CUPY:
            raise RuntimeError(
                "cupy is not available. Please install cupy to use GPUExecutionPlanner."
            )

        PELogger.debug(f"Creating GPU plans for {len(iter_schedules)} iterations")
        for i, sched in enumerate(iter_schedules):
            send_batch = sched["send"]
            if send_batch:
                # Build Pack Metadata
                ptrs: List[int] = []
                positions: List[int] = []
                sizes: List[int] = []

                for t in send_batch.tasks:
                    # Extract pointer from tensor
                    ptr = self.tensor_utils.get_pointer(t.src_tensor)
                    ptrs.append(ptr)
                    positions.append(t.src_pos)
                    sizes.append(t.size)

                # Plan kernel args for packing
                send_batch.gpu_plan = self._plan_kernel_args(
                    ptrs, positions, sizes, is_pack=True, buffer_base=send_slots[i % 2].data_ptr()
                )
                task_ids = [t.task_id for t in send_batch.tasks]
                PELogger.debug(
                    f"  Iter {i} send plan: {len(send_batch.tasks)} tasks → "
                    f"PE {send_batch.dest_pe}, {send_batch.total_size} bytes"
                )
                displayed_ids = task_ids[:10] if len(task_ids) <= 10 else task_ids[:10] + ["..."]
                PELogger.debug(f"    Send task IDs: {displayed_ids}")

            recv_batch = sched["recv"]
            if recv_batch:
                # Build Unpack Metadata
                summary = recv_batch.tasks_summary

                # Skip if no summary available (shouldn't happen in normal operation)
                if summary is None:
                    PELogger.error(
                        f"Iter {i}: recv batch from PE {recv_batch.src_pe} has no "
                        "tasks_summary - UNPACK WILL BE SKIPPED!"
                    )
                    recv_batch.gpu_plan = None
                    continue

                PELogger.debug(
                    f"  Iter {i} recv from PE {recv_batch.src_pe}: "
                    f"{len(summary.task_ids)} tasks in summary"
                )

                ptrs = []
                positions = []
                sizes = []

                # Create fast lookup map for receive requests
                relevant_reqs: Dict[int, ReceiveRequest] = {
                    r.task_id: r for r in receive_requests if r.src_pe == recv_batch.src_pe
                }

                # Match summary tasks with receive requests
                matched_task_ids: List[int] = []
                unmatched_task_ids: List[int] = []
                for t_id, t_size in zip(summary.task_ids, summary.task_sizes):
                    if t_id in relevant_reqs:
                        req = relevant_reqs[t_id]
                        ptr = self.tensor_utils.get_pointer(req.dest_tensor)
                        ptrs.append(ptr)
                        positions.append(req.dest_pos)
                        sizes.append(t_size)  # Use sender's size
                        matched_task_ids.append(t_id)
                    else:
                        unmatched_task_ids.append(t_id)
                        PELogger.error(
                            f"Iter {i}: Unexpected task {t_id} from PE "
                            f"{recv_batch.src_pe} - no matching recv request!"
                        )

                if unmatched_task_ids:
                    PELogger.error(
                        f"  Iter {i}: {len(unmatched_task_ids)} unmatched tasks "
                        f"from PE {recv_batch.src_pe}: {unmatched_task_ids[:10]}"
                    )

                # Plan kernel args for unpacking
                recv_batch.gpu_plan = self._plan_kernel_args(
                    ptrs, positions, sizes, is_pack=False, buffer_base=recv_slots[i % 2].data_ptr()
                )

                if recv_batch.gpu_plan is None:
                    PELogger.error(
                        f"  Iter {i} recv plan: FAILED - no gpu_plan created for "
                        f"{len(sizes)} tasks from PE {recv_batch.src_pe}"
                    )
                else:
                    PELogger.debug(
                        f"  Iter {i} recv plan: {len(sizes)} tasks ← "
                        f"PE {recv_batch.src_pe}, {recv_batch.total_size} bytes"
                    )
                    displayed_recv_ids = (
                        matched_task_ids[:10]
                        if len(matched_task_ids) <= 10
                        else matched_task_ids[:10] + ["..."]
                    )
                    PELogger.debug(f"    Recv task IDs: {displayed_recv_ids}")

    def _plan_kernel_args(
        self,
        ptrs: List[int],
        positions: List[int],
        sizes: List[int],
        is_pack: bool,
        buffer_base: int,
    ) -> Optional[Tuple[object, object, object, int]]:
        """
        Generate GPU-ready pointer arrays for kernel execution.

        Applies 128KB chunking to break large transfers into smaller pieces.

        Args:
            ptrs: List of tensor data pointers
            positions: List of positions within tensors
            sizes: List of transfer sizes
            is_pack: True for pack (user->buffer), False for unpack (buffer->user)
            buffer_base: Base pointer of the buffer

        Returns:
            Tuple of (cp_src_addrs, cp_dst_addrs, cp_sizes, num_chunks) as
            CuPy arrays, or None if no work.
        """
        h_src_addrs: List[int] = []
        h_dst_addrs: List[int] = []
        h_sizes: List[int] = []

        packed_offset = 0

        for ptr, pos, size in zip(ptrs, positions, sizes):
            num_chunks = (size + self.CHUNK_SIZE - 1) // self.CHUNK_SIZE

            for c in range(num_chunks):
                chunk_offset = c * self.CHUNK_SIZE
                chunk_size = min(self.CHUNK_SIZE, size - chunk_offset)

                if is_pack:
                    # Pack: user tensor -> buffer
                    h_src_addrs.append(ptr + pos + chunk_offset)
                    h_dst_addrs.append(buffer_base + packed_offset + chunk_offset)
                else:
                    # Unpack: buffer -> user tensor
                    h_src_addrs.append(buffer_base + packed_offset + chunk_offset)
                    h_dst_addrs.append(ptr + pos + chunk_offset)

                h_sizes.append(chunk_size)

            packed_offset += size

        total_chunks = len(h_sizes)
        if total_chunks == 0:
            return None

        # Move to GPU using PyTorch, then convert to CuPy for kernel launching
        d_src_addrs = torch.tensor(h_src_addrs, dtype=torch.int64, device="cuda")
        d_dst_addrs = torch.tensor(h_dst_addrs, dtype=torch.int64, device="cuda")
        d_sizes = torch.tensor(h_sizes, dtype=torch.int64, device="cuda")

        # Convert to CuPy arrays (zero-copy) for kernel launching
        cp_src_addrs = cp.asarray(d_src_addrs)
        cp_dst_addrs = cp.asarray(d_dst_addrs)
        cp_sizes = cp.asarray(d_sizes)

        return (cp_src_addrs, cp_dst_addrs, cp_sizes, total_chunks)


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/planning/task_segmenter.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import logging
from typing import List

from ..nvshmem_types import MAX_SEGMENT_SIZE, ReceiveRequest, SendRequest

logger = logging.getLogger(__name__)

# Constants for ID encoding (from C++ implementation)
REQUEST_ID_BASE = 1000000000
SEGMENT_ID_MULTIPLIER = 1000
MAX_REQUESTS = 1000000
MAX_SEGMENTS_PER_REQUEST = 1000


class TaskSegmenter:
    """
    Splits large tasks (>256MB) into smaller segments to fit
    into the fixed-size communication slots.
    """

    def _encode_segment_id(self, task_id: int, segment_index: int) -> int:
        return REQUEST_ID_BASE + (task_id * SEGMENT_ID_MULTIPLIER) + segment_index

    def _calculate_num_segments(self, size: int) -> int:
        return (size + MAX_SEGMENT_SIZE - 1) // MAX_SEGMENT_SIZE

    def _validate_segmentation(self, task_id: int, size: int) -> bool:
        num_segments = self._calculate_num_segments(size)
        if num_segments > MAX_SEGMENTS_PER_REQUEST:
            logger.error(
                f"Error: Task {task_id} requires {num_segments} segments, "
                f"exceeds max {MAX_SEGMENTS_PER_REQUEST}"
            )
            return False
        if task_id >= MAX_REQUESTS:
            logger.error(f"Error: Task ID {task_id} exceeds max {MAX_REQUESTS}")
            return False
        return True

    def segment_send_request(self, req: SendRequest) -> List[SendRequest]:
        """
        Splits a single send request into multiple segments
        if larger than MAX_SEGMENT_SIZE.
        """
        if req.size <= MAX_SEGMENT_SIZE:
            return [req]

        if not self._validate_segmentation(req.task_id, req.size):
            raise ValueError(f"Task {req.task_id} validation failed")

        num_segments = self._calculate_num_segments(req.size)
        output_requests: List[SendRequest] = []

        for i in range(num_segments):
            segment_offset = i * MAX_SEGMENT_SIZE
            segment_size = min(MAX_SEGMENT_SIZE, req.size - segment_offset)
            segment_task_id = self._encode_segment_id(req.task_id, i)

            new_req = SendRequest(
                task_id=segment_task_id,
                src_tensor=req.src_tensor,
                src_pos=req.src_pos + segment_offset,
                size=segment_size,
                dest_pe=req.dest_pe,
            )
            output_requests.append(new_req)

        return output_requests

    def segment_receive_request(self, req: ReceiveRequest) -> List[ReceiveRequest]:
        """
        Splits a single receive request into multiple segments
        if larger than MAX_SEGMENT_SIZE.
        """
        if req.size <= MAX_SEGMENT_SIZE:
            return [req]

        if not self._validate_segmentation(req.task_id, req.size):
            raise ValueError(f"Task {req.task_id} validation failed")

        num_segments = self._calculate_num_segments(req.size)
        output_requests: List[ReceiveRequest] = []

        for i in range(num_segments):
            segment_offset = i * MAX_SEGMENT_SIZE
            segment_size = min(MAX_SEGMENT_SIZE, req.size - segment_offset)
            segment_task_id = self._encode_segment_id(req.task_id, i)

            new_req = ReceiveRequest(
                task_id=segment_task_id,
                dest_tensor=req.dest_tensor,
                dest_pos=req.dest_pos + segment_offset,
                size=segment_size,
                src_pe=req.src_pe,
            )
            output_requests.append(new_req)

        return output_requests


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/planning/workload_packer.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from typing import Dict, List

from ..logger import PELogger
from ..nvshmem_types import MAX_SEGMENT_SIZE, MAX_TASKS_PER_BATCH, SendRequest, WorkloadGroup


class WorkloadPacker:
    """
    Packs individual SendRequests into WorkloadGroups (batches)
    destined for the same PE, respecting size limits.
    """

    def pack_workloads(
        self, send_requests: List[SendRequest], n_pes: int
    ) -> Dict[int, List[WorkloadGroup]]:
        """
        Groups requests by destination PE and packs them into batches.
        Returns a map: dest_pe -> list of batches
        """
        PELogger.debug(f"Packing {len(send_requests)} send requests for {n_pes} PEs")
        workloads: Dict[int, List[WorkloadGroup]] = {}

        # Group requests by destination PE
        tasks_by_dest: Dict[int, List[SendRequest]] = {}
        for req in send_requests:
            tasks_by_dest.setdefault(req.dest_pe, []).append(req)

        # Pack tasks for each destination
        for dest_pe in range(n_pes):
            if dest_pe not in tasks_by_dest:
                workloads[dest_pe] = []
                PELogger.debug(f"  Dest PE {dest_pe}: 0 tasks → 0 batches")
                continue

            tasks = tasks_by_dest[dest_pe]
            workloads[dest_pe] = self._pack_single_destination(tasks, dest_pe)

            if workloads[dest_pe]:
                total_size = sum(b.total_size for b in workloads[dest_pe])
                PELogger.debug(
                    f"  Dest PE {dest_pe}: {len(tasks)} tasks → "
                    f"{len(workloads[dest_pe])} batches, {total_size} bytes total"
                )
            else:
                PELogger.debug(
                    f"  Dest PE {dest_pe}: {len(tasks)} tasks → 0 batches (empty after packing)"
                )

        return workloads

    def _pack_single_destination(
        self, tasks: List[SendRequest], dest_pe: int
    ) -> List[WorkloadGroup]:
        if not tasks:
            return []

        # Sort tasks by size (descending) for better bin packing efficiency
        tasks.sort(key=lambda x: x.size, reverse=True)

        batches: List[WorkloadGroup] = []
        current_batch = WorkloadGroup(dest_pe=dest_pe, tasks=[], total_size=0)

        for task in tasks:
            # Check if adding this task would exceed batch constraints
            would_exceed_size = current_batch.total_size + task.size > MAX_SEGMENT_SIZE
            would_exceed_task_cap = len(current_batch.tasks) >= MAX_TASKS_PER_BATCH

            if (would_exceed_size or would_exceed_task_cap) and current_batch.tasks:
                # Finalize current batch
                batches.append(current_batch)
                task_first_10_string = ", ".join([str(t.task_id) for t in current_batch.tasks[:10]])
                PELogger.debug(
                    f"  Packed batch to PE {dest_pe} idx {len(batches) - 1}: "
                    f"{task_first_10_string}... (total {len(current_batch.tasks)} tasks)"
                )
                # Start new batch
                current_batch = WorkloadGroup(dest_pe=dest_pe, tasks=[], total_size=0)

            # Add task to current batch
            current_batch.tasks.append(task)
            current_batch.total_size += task.size

        # Add final batch if not empty
        if current_batch.tasks:
            batches.append(current_batch)

        return batches


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/service.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""
Remote Copy Service - Main orchestrator for NVSHMEM-based GPU-to-GPU transfers.

This service coordinates task segmentation, workload packing, scheduling,

GPU resource management, and pipelined execution.
"""

from typing import Dict, List, Optional, Tuple

from .compat import ensure_nvshmem_compat

ensure_nvshmem_compat()

try:
    import nvshmem.core

    HAVE_NVSHMEM = True
except ImportError:
    HAVE_NVSHMEM = False

import torch
import torch.cuda.nvtx as nvtx

from .core import GPUResourceManager, KernelLauncher, PipelineExecutor
from .logger import PELogger
from .memory import DoubleBufferManager
from .nvshmem_types import ReceiveRequest, ScheduledBatch, SendRequest, WorkloadSummary
from .planning import CommunicationScheduler, GPUExecutionPlanner, TaskSegmenter, WorkloadPacker


class RemoteCopyService:
    """
    Main service for managing remote GPU-to-GPU data transfers.

    Provides high-level API for registering transfers, scheduling,
    and executing pipelined communication with NVSHMEM.
    """

    def __init__(self, group=None):
        # Core components
        self.gpu_resources = GPUResourceManager()
        self.buffer_manager = DoubleBufferManager()
        self.kernel_launcher = KernelLauncher()
        self.pipeline_executor = None  # Created after init

        # Planning components
        self.task_segmenter = TaskSegmenter()
        self.workload_packer = WorkloadPacker()
        self.comm_scheduler = CommunicationScheduler()
        self.gpu_planner = GPUExecutionPlanner()

        # Optional process group for distributed operations
        self._group = group

        # State
        self.send_requests: List[SendRequest] = []
        self.receive_requests: List[ReceiveRequest] = []
        self.iter_schedules: Optional[List[Dict]] = None
        self.num_iterations: int = 0

        # Events for double-buffering
        self.pack_events = []
        self.unpack_events = []
        self.barrier_events = []

    @property
    def my_pe(self) -> int:
        """Get this PE's rank."""
        return self.gpu_resources.my_pe

    @property
    def n_pes(self) -> int:
        """Get total number of PEs."""
        return self.gpu_resources.n_pes

    @property
    def device(self):
        """Get CUDA device."""
        return self.gpu_resources.device

    @property
    def initialized(self) -> bool:
        """Check if service is initialized."""
        return self.gpu_resources.initialized

    def init(self, log_level: str = "INFO") -> None:
        """
        Initialize the service.

        Sets up NVSHMEM, CUDA device, streams, buffers, and kernels.
        Expects to be launched with torchrun.

        Args:
            log_level: Logging level (TRACE, DEBUG, INFO, WARN, ERROR)
        """
        if not HAVE_NVSHMEM:
            raise RuntimeError(
                "nvshmem.core is not available. Please install nvshmem to use NVSHMEMCopyService."
            )

        # Initialize GPU resources (NVSHMEM, device, streams)
        self.gpu_resources.init(group=self._group)

        # Initialize logger after PE ID is known
        PELogger.init(self.my_pe, level=log_level)
        PELogger.info(f"Initializing RemoteCopyService on PE {self.my_pe}/{self.n_pes}")

        # Barrier to ensure ALL PEs finish NVSHMEM init before ANY PE starts buffer allocation
        # buffer_manager.allocate() calls bytetensor() which is a collective operation
        # Without this barrier, early PEs call bytetensor() while late PEs
        # are still in init() -> deadlock
        nvshmem.core.barrier_all(stream=self.gpu_resources.send_stream)
        self.gpu_resources.send_stream.sync()  # Ensure barrier completes on CPU

        # Allocate double-buffered send/recv slots
        self.buffer_manager.allocate()
        # The .zero_() calls inside allocate() go to the default CUDA stream.
        # Sync it now so the zeros are fully committed before any NVShmem
        # operations (which bypass CUDA streams via RDMA) touch the buffers.
        # Without this, a still-running zero() can race with the first
        # nvshmem.core.put() and overwrite received data.
        torch.cuda.synchronize()

        # Barrier to ensure all PEs complete buffer allocation before proceeding
        nvshmem.core.barrier_all(stream=self.gpu_resources.send_stream)

        PELogger.debug("Allocated double-buffered send/recv slots")

        # Load CUDA kernels
        self.kernel_launcher.load_kernels()
        PELogger.debug("Loaded CUDA kernels")

        # Cache CuPy stream wrappers for efficient kernel launching
        self.kernel_launcher.set_streams(
            self.gpu_resources.pack_stream, self.gpu_resources.unpack_stream
        )
        PELogger.debug("Cached CuPy stream wrappers")

        # Create pipeline executor with dependencies
        self.pipeline_executor = PipelineExecutor(
            self.kernel_launcher, self.buffer_manager, self.my_pe
        )

        # Set streams on pipeline executor
        self.pipeline_executor.set_streams(
            self.gpu_resources.pack_stream,
            self.gpu_resources.unpack_stream,
            self.gpu_resources.send_stream,
            self.gpu_resources.copy_stream,
            self.gpu_resources.torch_pack_stream_wrapper,
            self.gpu_resources.torch_unpack_stream_wrapper,
            self.gpu_resources.torch_send_stream_wrapper,
            self.gpu_resources.torch_copy_stream_wrapper,
        )

        # Synchronize all NVSHMEM streams before returning
        # This ensures all barrier operations complete and streams are idle
        # Without this, subsequent torch.cuda.synchronize() may hang waiting for pending work
        self.gpu_resources.send_stream.sync()
        self.gpu_resources.pack_stream.sync()
        self.gpu_resources.unpack_stream.sync()
        self.gpu_resources.copy_stream.sync()

        PELogger.info("Initialization complete")

    def register_send(
        self, task_id: int, src_tensor, src_pos: int, size: int, dest_pe: int
    ) -> None:
        """
        Register a send operation.

        Args:
            task_id: Unique task identifier
            src_tensor: Source tensor (PyTorch/CuPy tensor or pointer)
            src_pos: Starting position in source tensor
            size: Number of bytes to send
            dest_pe: Destination PE rank
        """
        if dest_pe >= self.n_pes or dest_pe < 0:
            PELogger.error(f"Error: Invalid destination PE {dest_pe}")
            return

        req = SendRequest(task_id, src_tensor, src_pos, size, dest_pe)
        self.send_requests.append(req)

    def register_receive(
        self, task_id: int, dest_tensor, dest_pos: int, size: int, src_pe: int
    ) -> None:
        """
        Register a receive operation.

        Args:
            task_id: Unique task identifier
            dest_tensor: Destination tensor (PyTorch/CuPy tensor or pointer)
            dest_pos: Starting position in destination tensor
            size: Number of bytes to receive
            src_pe: Source PE rank
        """
        if src_pe >= self.n_pes or src_pe < 0:
            PELogger.error(f"Error: Invalid source PE {src_pe}")
            return

        req = ReceiveRequest(task_id, dest_tensor, dest_pos, size, src_pe)
        self.receive_requests.append(req)

    def schedule(self) -> None:
        """
        Build execution schedule.

        Can be called once and followed by multiple run() calls for
        repeated execution with the same communication pattern.

        Steps:
        1. Segment large tasks into manageable chunks
        2. Pack tasks into batches
        3. Schedule batches to iterations (conflict-free)
        4. Build GPU execution plans (pointer arrays, chunking)
        5. Create synchronization events
        """
        if not self.initialized:
            raise RuntimeError("RemoteCopyService not initialized")

        PELogger.info(
            f"Starting schedule: {len(self.send_requests)} send requests, "
            f"{len(self.receive_requests)} receive requests"
        )

        # Step 1: Segment tasks (break large tasks into chunks)
        PELogger.debug("Step 1: Segmenting tasks...")
        orig_send_count = len(self.send_requests)
        orig_recv_count = len(self.receive_requests)
        self._segment_tasks()
        PELogger.info(
            f"Segmented: {orig_send_count} sends → {len(self.send_requests)} segments, "
            f"{orig_recv_count} recvs → {len(self.receive_requests)} segments"
        )

        # Step 2: Pack tasks into workload groups
        PELogger.debug("Step 2: Packing workloads...")
        workloads = self.workload_packer.pack_workloads(self.send_requests, self.n_pes)
        total_batches = sum(len(batches) for batches in workloads.values())
        active_pes = sum(1 for batches in workloads.values() if batches)
        PELogger.info(f"Packed: {total_batches} batches across {active_pes} destination PEs")

        # Step 3: Schedule workloads to iterations
        PELogger.debug("Step 3: Building communication schedule...")
        schedule, global_summaries = self.comm_scheduler.build_schedule(
            workloads, self.my_pe, self.n_pes, group=self._group
        )

        self.num_iterations = self.comm_scheduler.num_iterations
        PELogger.info(f"Scheduled: {total_batches} batches → {self.num_iterations} iterations")

        # Step 4: Prepare iteration schedules
        PELogger.debug("Step 4: Preparing iteration schedules...")
        self.iter_schedules = self._prepare_iter_schedules(
            schedule, workloads, global_summaries, self.num_iterations
        )

        # Step 5: Build GPU execution plans
        PELogger.debug("Step 5: Building GPU execution plans...")
        self.gpu_planner.create_gpu_plans(
            self.iter_schedules,
            self.buffer_manager.send_slots,
            self.buffer_manager.recv_slots,
            self.receive_requests,
        )

        # Step 6: Create double-buffered events
        PELogger.debug("Step 6: Creating synchronization events...")
        self.pack_events, self.unpack_events, self.barrier_events = (
            self.gpu_resources.create_events(num_events=2)
        )
        self.pipeline_executor.set_events(self.pack_events, self.unpack_events, self.barrier_events)

        PELogger.info(f"Schedule complete: {self.num_iterations} iterations ready")

    def run(self) -> None:
        """
        Execute the scheduled communication.

        Can be called multiple times after a single schedule() call
        to repeat the same communication pattern.
        """
        # import torch
        # torch.save(self.send_requests, f"send_requests_{torch.distributed.get_rank()}.pt")
        # torch.save(self.receive_requests, f"receive_requests_{torch.distributed.get_rank()}.pt")

        if not self.initialized:
            raise RuntimeError("RemoteCopyService not initialized")
        if self.iter_schedules is None:
            raise RuntimeError("Must call schedule() before run()")

        PELogger.info(f"Starting execution: {self.num_iterations} iterations")

        # Start timing
        nvtx.range_push("RemoteCopyService.run_total")

        # Global barrier before execution
        PELogger.debug("Barrier: Synchronizing all PEs before execution")
        nvshmem.core.barrier_all(stream=self.gpu_resources.send_stream)
        self.gpu_resources.send_stream.sync()

        # Execute pipelined communication
        nvtx.range_push("execute_pipeline")
        self.pipeline_executor.execute_pipeline(self.iter_schedules, self.num_iterations)
        nvtx.range_pop()  # execute_pipeline

        # Global barrier after execution
        PELogger.debug("Barrier: Synchronizing all PEs after pipeline")
        nvshmem.core.barrier_all(stream=self.gpu_resources.send_stream)

        # Process same-PE transfers
        self.pipeline_executor.process_self_moves(self.send_requests, self.receive_requests)

        # End timing range
        nvtx.range_pop()  # RemoteCopyService.run_total

    def clear_requests(self) -> None:
        """
        Clear registered requests and schedule.

        Call this before registering a new set of transfers.
        """
        self.send_requests = []
        self.receive_requests = []
        self.iter_schedules = None
        self.num_iterations = 0
        self.pack_events = []
        self.unpack_events = []
        self.barrier_events = []

    def finalize(self) -> None:
        """Cleanup resources."""
        PELogger.info("Finalizing RemoteCopyService")

        # Barrier to ensure all PEs are ready to finalize
        try:
            PELogger.debug("Barrier: Synchronizing all PEs before finalize")
            nvshmem.core.barrier_all(stream=self.gpu_resources.send_stream)
            self.gpu_resources.send_stream.sync()
        except Exception as e:
            PELogger.error(f"Error in final barrier: {e}")

        # Free buffers
        self.buffer_manager.free()

        # Finalize GPU resources (this will call nvshmem.core.finalize internally)
        self.gpu_resources.finalize()

        PELogger.info("RemoteCopyService finalized")
        PELogger.shutdown()

    def _segment_tasks(self) -> None:
        """Segment tasks into manageable chunks."""
        new_sends: List[SendRequest] = []
        for req in self.send_requests:
            segments = self.task_segmenter.segment_send_request(req)
            new_sends.extend(segments)
            if len(segments) > 1:
                PELogger.debug(
                    f"  Segmented send task {req.task_id}: "
                    f"{req.size} bytes → {len(segments)} segments"
                )
        self.send_requests = new_sends

        new_recvs: List[ReceiveRequest] = []
        for req in self.receive_requests:
            segments = self.task_segmenter.segment_receive_request(req)
            new_recvs.extend(segments)
            if len(segments) > 1:
                PELogger.debug(
                    f"  Segmented recv task {req.task_id}: "
                    f"{req.size} bytes → {len(segments)} segments"
                )
        self.receive_requests = new_recvs

    def _prepare_iter_schedules(
        self,
        schedule_batches: Dict[int, List[ScheduledBatch]],
        workloads: Dict[int, List],
        global_summaries: Dict[Tuple[int, int, int], WorkloadSummary],
        num_iterations: int,
    ) -> List[Dict]:
        """
        Organize schedule into iteration-based structure.

        Returns:
            List of dicts with 'send' and 'recv' keys for each iteration
        """
        iter_schedules: List[Dict[str, Optional[ScheduledBatch]]] = []

        for i in range(num_iterations):
            sched: Dict[str, Optional[ScheduledBatch]] = {"send": None, "recv": None}

            if i in schedule_batches:
                batches = schedule_batches[i]

                for b in batches:
                    # Skip same-PE transfers (handled separately by process_self_moves)
                    if b.src_pe == b.dest_pe:
                        PELogger.debug(
                            f"  Iter {i}: Skipping same-PE batch " f"({b.src_pe} → {b.dest_pe})"
                        )
                        continue

                    if b.src_pe == self.my_pe:
                        # This PE sends in this iteration
                        b.tasks = workloads[b.dest_pe][b.batch_index].tasks
                        b.total_size = workloads[b.dest_pe][b.batch_index].total_size
                        sched["send"] = b
                        PELogger.debug(
                            f"  Iter {i}: Send to PE {b.dest_pe}, batch "
                            f"{b.batch_index}, {len(b.tasks)} tasks, "
                            f"{b.total_size} bytes"
                        )

                    elif b.dest_pe == self.my_pe:
                        # This PE receives in this iteration
                        key = (b.src_pe, b.dest_pe, b.batch_index)
                        if key in global_summaries:
                            summary = global_summaries[key]
                            b.tasks_summary = summary
                            b.total_size = summary.total_size
                        else:
                            PELogger.error(
                                f"  Iter {i}: Missing workload summary for "
                                f"recv from PE {b.src_pe}, batch {b.batch_index}"
                            )
                            PELogger.error(
                                "  Available keys in global_summaries: "
                                f"{list(global_summaries.keys())}"
                            )
                            b.tasks_summary = None
                            b.total_size = 0
                        sched["recv"] = b
                        PELogger.debug(
                            f"  Iter {i}: Recv from PE {b.src_pe}, batch "
                            f"{b.batch_index}, {b.total_size} bytes"
                        )

            iter_schedules.append(sched)

        return iter_schedules


================================================
FILE: megatron/core/resharding/nvshmem_copy_service/validation.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""
Validation utilities for GPU-to-GPU communication.

Provides deterministic data generation and validation for verifying

correctness of communication operations."""

from dataclasses import dataclass
from typing import List

import torch

from .logger import PELogger


@dataclass
class ValidationResult:
    """Result of validating a single task."""

    task_id: int
    size: int
    passed: bool
    src_pe: int = -1
    mismatches: int = 0
    first_mismatch_idx: int = -1
    first_mismatch_expected: int = 0
    first_mismatch_actual: int = 0
    # Scheduling info - which batch/iteration this task was supposed to be handled
    batch_index: int = -1
    iteration: int = -1


@dataclass
class ValidationSummary:
    """Summary of validation across all tasks."""

    total_tasks: int
    passed_tasks: int
    failed_tasks: int
    total_bytes: int
    results: List[ValidationResult]

    @property
    def all_passed(self) -> bool:
        """Check if all validated tasks passed."""
        return self.failed_tasks == 0


def generate_deterministic_data(task_id: int, size: int, device: str = "cuda") -> torch.Tensor:
    """
    Generate deterministic data pattern for a task.

    Pattern: Each byte = (task_id * 31 + position) % 256
    This creates a unique pattern per task that varies along the data.

    Args:
        task_id: Unique task identifier
        size: Number of bytes to generate
        device: Device to create tensor on ('cuda' or 'cpu')

    Returns:
        torch.Tensor of uint8 with deterministic pattern
    """
    positions = torch.arange(size, dtype=torch.int64, device=device)
    pattern = ((task_id * 31 + positions) % 256).to(torch.uint8)
    return pattern


def validate_received_data(
    task_id: int, tensor: torch.Tensor, size: int, src_pe: int = -1
) -> ValidationResult:
    """
    Validate received data against expected deterministic pattern.

    Args:
        task_id: Task identifier to regenerate expected data
        tensor: Received tensor to validate
        size: Number of bytes to validate

    Returns:
        ValidationResult with pass/fail status and details
    """
    # Get the data slice to validate
    recv_data = tensor[:size]

    # Generate expected pattern on same device
    expected = generate_deterministic_data(task_id, size, device=recv_data.device.type)

    # Compare
    mismatches_mask = recv_data != expected
    num_mismatches = mismatches_mask.sum().item()

    result = ValidationResult(
        task_id=task_id,
        size=size,
        passed=(num_mismatches == 0),
        src_pe=src_pe,
        mismatches=num_mismatches,
    )

    if num_mismatches > 0:
        # Find first mismatch for debugging
        first_idx = mismatches_mask.nonzero(as_tuple=True)[0][0].item()
        result.first_mismatch_idx = first_idx
        result.first_mismatch_expected = expected[first_idx].item()
        result.first_mismatch_actual = recv_data[first_idx].item()

    return result


def log_validation_summary(summary: ValidationSummary) -> None:
    """Log validation summary."""
    if summary.all_passed:
        PELogger.info(
            "Validation PASSED: %d/%d tasks, %d bytes validated",
            summary.passed_tasks,
            summary.total_tasks,
            summary.total_bytes,
        )
    else:
        PELogger.error(
            "Validation FAILED: %d/%d tasks passed, %d failed",
            summary.passed_tasks,
            summary.total_tasks,
            summary.failed_tasks,
        )

        # Group failures by source PE
        failures_by_src = {}
        for r in summary.results:
            if not r.passed:
                failures_by_src.setdefault(r.src_pe, []).append(r)

        PELogger.error("  Failures by source PE:")
        for src_pe in sorted(failures_by_src.keys()):
            failed_tasks = failures_by_src[src_pe]
            task_ids = [r.task_id for r in failed_tasks]
            PELogger.error(
                "    PE %d: %d failed tasks: %s",
                src_pe,
                len(failed_tasks),
                task_ids[:15] if len(task_ids) <= 15 else task_ids[:15] + ["..."],
            )


================================================
FILE: megatron/core/resharding/planner.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from __future__ import annotations

import logging
import math

import torch
import torch.distributed as dist

from .utils import (
    ParameterMetadata,
    ReshardPlan,
    ShardingDescriptor,
    TransferOp,
    _build_layer_module_prefix_map,
    _get_rank_in_group,
    extract_param_metadata,
    select_src_metadata_balanced,
)

logger = logging.getLogger(__name__)


def _build_descriptors_for_param(
    src_metadata: ParameterMetadata, dst_metadata: ParameterMetadata
) -> list[ShardingDescriptor]:
    """Construct sharding descriptors (currently TP) for this parameter based on actual layout.
    Guard TP descriptor with size conservation so we don't mis-classify replicated tensors.
    """
    descriptors: list[ShardingDescriptor] = []

    # TP descriptor: allow when either side participates in TP
    if src_metadata.is_tp or dst_metadata.is_tp:
        # Prefer destination partition_dim, else source
        tp_dim = dst_metadata.partition_dim if dst_metadata.is_tp else src_metadata.partition_dim
        src_tp_ranks = src_metadata.tensor_parallel_group_ranks
        dst_tp_ranks = dst_metadata.tensor_parallel_group_ranks
        if src_tp_ranks is None or dst_tp_ranks is None:
            # Not enough context to build TP descriptor
            return descriptors
        src_stride = src_metadata.partition_stride if src_metadata.is_tp else 1
        dst_stride = dst_metadata.partition_stride if dst_metadata.is_tp else 1

        # Size conservation check on partition dim
        src_world = len(src_tp_ranks)
        dst_world = len(dst_tp_ranks)
        src_local = src_metadata.shape[tp_dim]
        dst_local = dst_metadata.shape[tp_dim]
        if src_world * src_local != dst_world * dst_local:
            raise RuntimeError(
                f"Cannot build TP descriptor for {dst_metadata.name} dim{tp_dim}: "
                f"src_world*src_local={src_world}*{src_local} != {dst_world}*{dst_local}. "
                "This usually means the param is marked TP but is effectively replicated on that "
                "dim or partition_dim/metadata is inconsistent between source and destination."
            )

        descriptors.append(
            ShardingDescriptor(
                name="tp",
                dim=tp_dim,
                src_stride=src_stride,
                dst_stride=dst_stride,
                src_dim_ranks=src_tp_ranks,
                dst_dim_ranks=dst_tp_ranks,
            )
        )
    return descriptors


def _plan_multi_dim_lcm(
    param_name: str,
    src_metadata: ParameterMetadata,
    dst_metadata: ParameterMetadata,
    descriptors: list[ShardingDescriptor],
    my_global_rank: int,
) -> list[tuple[int, tuple[slice, ...], tuple[slice, ...]]]:
    """
    TP-only planner using LCM tiling to support strides on source/destination.
    - Requires exactly one TP descriptor
    - Supports arbitrary integer strides (contiguous micro-tiles)
    """
    if not descriptors:
        return []
    if len(descriptors) != 1:
        raise NotImplementedError(
            f"{param_name}: _plan_multi_dim_lcm supports TP-only (one descriptor)"
        )
    if descriptors[0].name != "tp":
        raise NotImplementedError(f"{param_name}: _plan_multi_dim_lcm expects TP descriptor")
    d = descriptors[0]
    if my_global_rank not in d.dst_dim_ranks:
        return []

    src_shape = tuple(src_metadata.shape)
    dst_shape = tuple(dst_metadata.shape)
    dim = d.dim
    src_world = len(d.src_dim_ranks)
    dst_world = len(d.dst_dim_ranks)
    src_local = src_shape[dim]
    dst_local = dst_shape[dim]
    if src_world * src_local != dst_world * dst_local:
        raise RuntimeError(
            f"{param_name}: size mismatch on TP dim{dim} "
            f"(src_world={src_world}, src_local={src_local}, "
            f"dst_world={dst_world}, dst_local={dst_local})"
        )
    # LCM tiling with strides
    Ns = src_world * max(1, d.src_stride)
    Nd = dst_world * max(1, d.dst_stride)
    full_len = dst_local * dst_world
    g = math.gcd(Ns, Nd)
    L = (Ns // g) * Nd
    if full_len % L != 0:
        raise RuntimeError(
            f"{param_name}: TP dim{dim} full_len {full_len} not divisible by LCM {L} "
            f"(Ns={Ns}, Nd={Nd})"
        )
    unit = full_len // L  # micro-tile length
    cps = L // Ns  # micro-tiles per source segment
    cpd = L // Nd  # micro-tiles per destination segment
    seg_src = cps * unit  # contiguous length per source segment
    seg_dst = cpd * unit  # contiguous length per destination segment
    dst_local_rank = _get_rank_in_group(my_global_rank, d.dst_dim_ranks)
    ops: list[tuple[int, tuple[slice, ...], tuple[slice, ...]]] = []
    # Sweep destination segments owned by this rank (handle destination stride)
    for k in range(max(1, d.dst_stride)):
        g_dst_seg = dst_local_rank + k * dst_world
        # Within this segment, enumerate the cpd micro-tiles
        for off in range(cpd):
            g_micro = g_dst_seg * cpd + off
            s_idx = g_micro // cps
            in_seg = g_micro % cps
            src_owner_in_dim = s_idx % src_world
            src_global_rank = d.src_dim_ranks[src_owner_in_dim]
            src_local_seg_idx = s_idx // src_world
            src_start = src_local_seg_idx * seg_src + in_seg * unit
            dst_start = k * seg_dst + off * unit
            # Build full N-D slices
            src_slice = [slice(None)] * len(src_shape)
            dst_slice = [slice(None)] * len(dst_shape)
            src_slice[dim] = slice(src_start, src_start + unit)
            dst_slice[dim] = slice(dst_start, dst_start + unit)
            ops.append((src_global_rank, tuple(src_slice), tuple(dst_slice)))

    # Stable order by destination offset
    def dst_key(op):
        _, _, dsl = op
        s = dsl[dim]
        return s.start if isinstance(s, slice) else 0

    ops.sort(key=dst_key)
    return ops


def _plan_block_interleaved(
    param_name: str,
    src_metadata: ParameterMetadata,
    dst_metadata: ParameterMetadata,
    descriptors: list[ShardingDescriptor],
    my_global_rank: int,
) -> list[tuple[int, tuple[slice, ...], tuple[slice, ...]]]:
    """
    Block-interleaved TP planner for parameters with ``partition_sizes``.

    When a parameter packs multiple independently-sharded components of
    *different* sizes (e.g. Mamba in_proj packs z, x, B, C, dt), a simple
    contiguous concat produces the wrong layout.  This function treats each
    block independently: it gathers (or scatters) each block across TP ranks
    before moving to the next block.

    ``partition_sizes`` lists the per-TP-rank block sizes along the partition
    dim.  Block *i* occupies ``[sum(sizes[:i]), sum(sizes[:i+1]))`` in the
    local tensor on every TP rank.  In the *full* (TP-gathered) tensor, block
    *i* occupies ``[sum(full_sizes[:i]), sum(full_sizes[:i+1]))`` where
    ``full_sizes[i] = sizes[i] * src_tp_world``.
    """
    if not descriptors or descriptors[0].name != "tp":
        return []
    d = descriptors[0]
    if my_global_rank not in d.dst_dim_ranks:
        return []

    dim = d.dim
    src_shape = tuple(src_metadata.shape)
    dst_shape = tuple(dst_metadata.shape)
    src_world = len(d.src_dim_ranks)
    dst_world = len(d.dst_dim_ranks)
    dst_local_rank = _get_rank_in_group(my_global_rank, d.dst_dim_ranks)

    # Use partition_sizes from whichever side has it (prefer src)
    src_sizes = src_metadata.partition_sizes
    dst_sizes = dst_metadata.partition_sizes

    if src_sizes is None and dst_sizes is None:
        raise RuntimeError(f"{param_name}: _plan_block_interleaved called without partition_sizes")

    # Derive the full (un-sharded) block sizes
    if src_sizes is not None:
        num_blocks = len(src_sizes)
        full_sizes = [s * src_world for s in src_sizes]
    else:
        num_blocks = len(dst_sizes)
        full_sizes = [s * dst_world for s in dst_sizes]

    # Compute per-rank block sizes for both sides
    if src_sizes is None:
        src_sizes = [f // src_world for f in full_sizes]
    if dst_sizes is None:
        dst_sizes = [f // dst_world for f in full_sizes]

    # Validate conservation
    for i in range(num_blocks):
        if src_sizes[i] * src_world != dst_sizes[i] * dst_world:
            raise RuntimeError(
                f"{param_name}: block {i} size mismatch: "
                f"src_sizes[{i}]={src_sizes[i]}*{src_world} != "
                f"dst_sizes[{i}]={dst_sizes[i]}*{dst_world}"
            )

    ops: list[tuple[int, tuple[slice, ...], tuple[slice, ...]]] = []

    # For each block, compute the transfer ops independently
    src_block_offset = 0  # cumulative offset in source local tensor
    dst_block_offset = 0  # cumulative offset in destination local tensor

    for blk in range(num_blocks):
        src_blk_sz = src_sizes[blk]  # per-src-rank size of this block
        dst_blk_sz = dst_sizes[blk]  # per-dst-rank size of this block
        full_blk_sz = full_sizes[blk]

        # Within this block, use simple LCM tiling (stride=1)
        Ns = src_world
        Nd = dst_world
        g = math.gcd(Ns, Nd)
        L = (Ns // g) * Nd
        if full_blk_sz % L != 0:
            raise RuntimeError(
                f"{param_name}: block {blk} full_size {full_blk_sz} not divisible by LCM {L}"
            )
        unit = full_blk_sz // L
        cps = L // Ns
        cpd = L // Nd

        # This dst rank's segment within the block
        g_dst_seg = dst_local_rank
        for off in range(cpd):
            g_micro = g_dst_seg * cpd + off
            s_idx = g_micro // cps
            in_seg = g_micro % cps
            src_owner_in_dim = s_idx % src_world
            src_global_rank = d.src_dim_ranks[src_owner_in_dim]
            src_local_seg_idx = s_idx // src_world
            src_start = src_block_offset + src_local_seg_idx * (cps * unit) + in_seg * unit
            dst_start = dst_block_offset + off * unit

            src_slice = [slice(None)] * len(src_shape)
            dst_slice = [slice(None)] * len(dst_shape)
            src_slice[dim] = slice(src_start, src_start + unit)
            dst_slice[dim] = slice(dst_start, dst_start + unit)
            ops.append((src_global_rank, tuple(src_slice), tuple(dst_slice)))

        src_block_offset += src_blk_sz
        dst_block_offset += dst_blk_sz

    # Stable sort by destination offset
    def dst_key(op):
        _, _, dsl = op
        s = dsl[dim]
        return s.start if isinstance(s, slice) else 0

    ops.sort(key=dst_key)
    return ops


def _finalize_dp_transfers(
    param_name: str,
    src_metadata: ParameterMetadata,
    dst_metadata: ParameterMetadata,
    my_global_rank: int,
) -> list[tuple[int, tuple[slice, ...], tuple[slice, ...]]]:
    """Return receiver-side transfer for a parameter that is not TP-sharded.

    This is reached when we cannot build a TP sharding descriptor for the parameter
    (i.e., it is effectively replicated with respect to sharding).  We use this when the
    destination and source mode have no TP or the parameter is replicted on all ranks
    such as layernorm. If the source and destination DP groups match, we return a local
    full-tensor copy; otherwise we pick a source rank from the source DP group in a
    deterministic round-robin manner based on the receiver's global rank for better load
    distribution.
    """
    dst_dp_ranks = dst_metadata.data_parallel_group_ranks
    src_dp_ranks = src_metadata.data_parallel_group_ranks
    if my_global_rank not in dst_dp_ranks:
        return []

    dst_shape = dst_metadata.shape

    # Same DP layout - local copy (only if this rank has the source parameter)
    if src_dp_ranks == dst_dp_ranks and my_global_rank in src_dp_ranks:
        full_slice = tuple(slice(None) for _ in range(len(dst_shape)))
        return [(my_global_rank, full_slice, full_slice)]

    # Different DP groups - use round-robin based on destination global rank for
    # better load balancing across source ranks. This ensures that destination
    # ranks are distributed across source ranks even when they have the same
    # position within their respective DP groups.
    #
    # In non-collocated mode, src_dp_ranks might include ranks that don't
    # have the source model (e.g., idle ranks or destination ranks). Filter to only
    # include the rank that provided this metadata (src_metadata.owner_rank).
    # src_metadata was selected by select_src_metadata_balanced, so owner_rank is the
    # actual source rank for this parameter.
    actual_src_rank = src_metadata.owner_rank
    src_global_rank = src_dp_ranks[my_global_rank % len(src_dp_ranks)]
    # Override with the actual source rank if the selected rank doesn't have the parameter
    if src_global_rank != actual_src_rank:
        src_global_rank = actual_src_rank
    full_slice = tuple(slice(None) for _ in range(len(dst_shape)))
    return [(src_global_rank, full_slice, full_slice)]


def _determine_source_ranks_for_dst_param(
    param_name: str,
    src_metadata: ParameterMetadata,
    dst_metadata: ParameterMetadata,
    my_global_rank: int,
) -> list[tuple[int, tuple[slice, ...], tuple[slice, ...]]]:
    """Route to dimension-specific planner based on parameter sharding type."""

    # Regular TP/DP planning with EP-resolved metadata
    descriptors = _build_descriptors_for_param(src_metadata=src_metadata, dst_metadata=dst_metadata)
    if descriptors:
        # Use block-interleaved planner when partition_sizes is present
        # (e.g. Mamba in_proj packs components of different sizes)
        if src_metadata.partition_sizes is not None or dst_metadata.partition_sizes is not None:
            return _plan_block_interleaved(
                param_name=param_name,
                src_metadata=src_metadata,
                dst_metadata=dst_metadata,
                descriptors=descriptors,
                my_global_rank=my_global_rank,
            )
        return _plan_multi_dim_lcm(
            param_name=param_name,
            src_metadata=src_metadata,
            dst_metadata=dst_metadata,
            descriptors=descriptors,
            my_global_rank=my_global_rank,
        )
    # DP / replicated fallback
    return _finalize_dp_transfers(param_name, src_metadata, dst_metadata, my_global_rank)


def build_centralized_reshard_plan(
    src_module: torch.nn.Module,
    dst_module: torch.nn.Module,
    num_experts: int = None,
    group=None,
    src_rank_offset: int = 0,
    dst_rank_offset: int = 0,
) -> ReshardPlan:
    """
    Centralized planning: Rank 0 builds complete plan for all ranks, then scatters.

    Supports None for src_module and/or dst_module to enable non-collocated mode:
    - src_module=None: Rank doesn't have source model (destination-only)
    - dst_module=None: Rank doesn't have destination model (source-only)
    - Both provided: Rank has both models (collocated mode)

    Each rank provides metadata only for the models it owns, including parallel group
    membership (tensor_parallel_group_ranks, expert_parallel_group_ranks, etc.).
    This metadata is sufficient for rank 0 to build correct transfer plans without
    requiring dummy models.
    """
    # Use group.rank() instead of dist.get_rank(group) to support cross-cluster
    # ProcessGroups where members have independent default PGs (same default rank).
    my_global_rank = group.rank() if group is not None else dist.get_rank()
    world_size = group.size() if group is not None else dist.get_world_size()

    # Extract metadata from source model if present
    if src_module is not None:
        src_pg = getattr(src_module, "pg_collection", None)
        if src_pg is None:
            raise ValueError("Source module must have pg_collection")
        my_src_params = {name: p for name, p in src_module.named_parameters(recurse=True)}
        src_layer_prefix_map = _build_layer_module_prefix_map(src_module)
        my_src_metadata = [
            extract_param_metadata(
                p,
                name,
                my_global_rank,
                src_pg,
                num_experts=num_experts,
                layer_module_prefix_map=src_layer_prefix_map,
                rank_offset=src_rank_offset,
            )
            for name, p in my_src_params.items()
        ]
    else:
        # No source model on this rank - provide empty metadata
        my_src_metadata = []

    # Extract metadata from destination model if present
    if dst_module is not None:
        dst_pg = getattr(dst_module, "pg_collection", None)
        if dst_pg is None:
            raise ValueError("Destination module must have pg_collection")
        my_dst_params = {name: p for name, p in dst_module.named_parameters(recurse=True)}
        dst_layer_prefix_map = _build_layer_module_prefix_map(dst_module)
        my_dst_metadata = [
            extract_param_metadata(
                p,
                name,
                my_global_rank,
                dst_pg,
                num_experts=num_experts,
                layer_module_prefix_map=dst_layer_prefix_map,
                rank_offset=dst_rank_offset,
            )
            for name, p in my_dst_params.items()
        ]
    else:
        # No destination model on this rank - provide empty metadata
        my_dst_metadata = []

    all_src_metadata_by_rank = [None] * world_size
    all_dst_metadata_by_rank = [None] * world_size
    dist.all_gather_object(all_src_metadata_by_rank, my_src_metadata, group=group)
    dist.all_gather_object(all_dst_metadata_by_rank, my_dst_metadata, group=group)

    # Parameter to metadata maps keyed by resolved_name
    src_param_metadata_by_rank = {}
    dst_param_metadata_by_rank = {}
    src_param_metadata: dict[str, list[ParameterMetadata]] = {}

    for rank_id, rank_metadata_list in enumerate(all_src_metadata_by_rank):
        src_param_metadata_by_rank[rank_id] = {m.resolved_name: m for m in rank_metadata_list}
    for rank_id, rank_metadata_list in enumerate(all_dst_metadata_by_rank):
        dst_param_metadata_by_rank[rank_id] = {m.resolved_name: m for m in rank_metadata_list}
    for rank_metadata_list in all_src_metadata_by_rank:
        for metadata in rank_metadata_list:
            key = metadata.resolved_name
            if key not in src_param_metadata:
                src_param_metadata[key] = []
            src_param_metadata[key].append(metadata)

    # Build the plan on global rank 0 and broadcast to all ranks
    if my_global_rank == 0:
        plans_for_all_ranks = {r: ReshardPlan([], []) for r in range(world_size)}
        # Global monotonically increasing ID for non-local transfers.
        # This is shared between the corresponding send/recv ops so that
        # NVSHMEM can build schedule.
        next_task_id = 0

        # Pipeline-parallel (PP) "mapping" is handled implicitly.
        # Each rank contributes metadata only for the parameters it actually owns
        # (i.e., the module partitioning for its PP stage). When PP sizes differ
        # between source and destination, we don't compute an explicit stage-to-stage
        # mapping here; instead, we iterate destination ranks and plan copies for the
        # parameters present on those ranks. Any source rank that has the same logical
        # parameter (matched by resolved_name) can serve as a sender (with DP balancing),
        # and TP slicing is applied when applicable.
        for dst_rank in range(world_size):
            dst_rank_params = dst_param_metadata_by_rank.get(dst_rank, {})
            for resolved_name, dst_metadata in dst_rank_params.items():
                src_meta_list = src_param_metadata.get(resolved_name)
                if not src_meta_list:
                    raise RuntimeError(
                        f"Destination parameter '{resolved_name}' on rank {dst_rank} "
                        "not found in source model."
                    )
                # Choose a representative source metadata with DP round-robin balancing
                src_metadata = select_src_metadata_balanced(src_meta_list, dst_metadata, dst_rank)
                sources = _determine_source_ranks_for_dst_param(
                    resolved_name, src_metadata, dst_metadata, dst_rank
                )
                for src_rank, src_slice, dst_slice in sources:
                    task_id = next_task_id
                    next_task_id += 1

                    plans_for_all_ranks[dst_rank].recv_ops.append(
                        TransferOp(
                            param_name=dst_metadata.name,
                            peer_rank=src_rank,
                            is_send=False,
                            my_slice=dst_slice,
                            peer_slice=src_slice,
                            task_id=task_id,
                        )
                    )
                    plans_for_all_ranks[src_rank].send_ops.append(
                        TransferOp(
                            param_name=src_metadata.name,
                            peer_rank=dst_rank,
                            is_send=True,
                            my_slice=src_slice,
                            peer_slice=dst_slice,
                            task_id=task_id,
                        )
                    )
        plans_list = [plans_for_all_ranks[r] for r in range(world_size)]
    else:
        plans_list = [None] * world_size
    # Use group_src= (group rank) instead of src= (default PG global rank) to support
    # cross-cluster ProcessGroups where members share the same default PG rank.
    torch.distributed.broadcast_object_list(plans_list, group_src=0, group=group)
    my_plan = plans_list[my_global_rank]

    logger.info(
        f"Rank {my_global_rank}: Received plan - {len(my_plan.recv_ops)} recvs, "
        f"{len(my_plan.send_ops)} sends"
    )

    return my_plan


================================================
FILE: megatron/core/resharding/refit.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from __future__ import annotations

"""
High-level refit/reshard orchestration:
- prepare_swap_model_weights: build and cache the reshard plan without any transfer.
- swap_model_weights: public API; accepts a backend name or CopyService and delegates.
- reshard_model_weights: transport-agnostic core; builds/caches plan and executes.
"""

from dataclasses import dataclass
from typing import Any, Literal, Optional, Tuple, Union

import torch

from megatron.core import parallel_state
from megatron.core.inference.quantization.utils import (
    _should_quantize_param,
    quantize_params_to_mxfp8,
)
from megatron.core.models.common.language_module.language_module import LanguageModule
from megatron.core.utils import unwrap_model

from . import build_centralized_reshard_plan, execute_reshard_plan
from .copy_services.base import CopyService
from .copy_services.gloo_copy_service import GlooCopyService
from .copy_services.nccl_copy_service import NCCLCopyService
from .copy_services.nvshmem_copy_service import NVSHMEMCopyService
from .transforms import MXFP8ReshardTransform, ReshardTransform

# Supported refit backend names
RefitBackendName = Literal["nccl", "gloo", "nvshmem"]


@dataclass(frozen=True)
class _PlanCacheKey:
    """
    Cache key for reshard plans.
    """

    rank: int
    # Parallelism configuration: (TP, PP, EP, DP, expt_tp) or None for non-collocated ranks
    src_config: Optional[Tuple[int, int, int, int, int]]
    dst_config: Optional[Tuple[int, int, int, int, int]]
    num_experts: Optional[int]


def _get_config_tuple(core) -> Optional[Tuple[int, int, int, int, int]]:
    """Extract (TP, PP, EP, DP, expt_tp) sizes from a model core.

    Returns:
        Tuple of (TP, PP, EP, DP, expt_tp) sizes, or None if core is None.
        - TP: Tensor parallelism
        - PP: Pipeline parallelism
        - EP: Expert parallelism
        - DP: Data parallelism
        - expt_tp: Expert tensor parallelism
    """
    if core is None:
        return None
    pg = core.pg_collection
    return (
        len(torch.distributed.get_process_group_ranks(pg.tp)) if pg.tp else 1,
        len(torch.distributed.get_process_group_ranks(pg.pp)) if pg.pp else 1,
        len(torch.distributed.get_process_group_ranks(pg.ep)) if pg.ep else 1,
        len(torch.distributed.get_process_group_ranks(pg.dp)) if pg.dp else 1,
        (
            len(torch.distributed.get_process_group_ranks(pg.expt_tp))
            if hasattr(pg, 'expt_tp') and pg.expt_tp
            else 1
        ),
    )


def _build_plan_cache_key(
    src_core, tgt_core, num_experts: Optional[int], group=None
) -> _PlanCacheKey:
    """Build cache key for reshard plan.

    Args:
        src_core: Source model core (or None for non-collocated destination/idle ranks)
        tgt_core: Target model core (or None for non-collocated source/idle ranks)
        num_experts: Number of MoE experts (or None for non-MoE models)
        group: Optional process group for rank query

    Returns:
        Cache key that uniquely identifies this reshard configuration for this rank
    """
    # Use group.rank() to support cross-cluster ProcessGroups
    rank = group.rank() if group is not None else torch.distributed.get_rank()
    src_config = _get_config_tuple(src_core)
    dst_config = _get_config_tuple(tgt_core)
    return _PlanCacheKey(
        rank=rank, src_config=src_config, dst_config=dst_config, num_experts=num_experts
    )


# Module-level cache for refit services to avoid repeated allocations
_service_cache: dict[str, CopyService] = {}
_plan_cache: dict[_PlanCacheKey, Any] = {}


def get_or_create_service(backend: RefitBackendName, group=None) -> CopyService:
    """Get or create a cached CopyService instance for the given backend.

    This avoids expensive repeated allocations (especially for NVSHMEM buffers)
    when swap_model_weights is called multiple times with the same backend.

    Args:
        backend: Backend name ("nccl", "gloo", or "nvshmem").
        group: Optional process group for NCCL backend.
    """
    if backend in _service_cache:
        return _service_cache[backend]

    if backend == "nccl":
        service = NCCLCopyService(group=group)
    elif backend == "gloo":
        service = GlooCopyService(group=group)
    elif backend == "nvshmem":
        service = NVSHMEMCopyService(group=group)
    else:
        raise ValueError(f"Unknown backend '{backend}'")

    _service_cache[backend] = service
    return service


def clear_service_cache():
    """Clear the cached refit services.

    Call this if you need to invalidate the cache, for example when
    reinitializing distributed state.

    This properly finalizes services to free GPU buffers
    before clearing the cache.
    """
    global _service_cache

    # Finalize services to free resources for NVSHMEM backend
    # NCCL/Gloo services have no cleanup needed
    for backend_name, service in _service_cache.items():
        if hasattr(service, '_remote') and hasattr(service._remote, 'finalize'):
            service._remote.finalize()

    _service_cache.clear()


def clear_plan_cache():
    """
    Clear the cached refit plans.
    """
    global _plan_cache
    _plan_cache.clear()


def clear_all_caches():
    """
    Clear both service and plan caches.
    """
    clear_service_cache()
    clear_plan_cache()


def _unwrap_model_cores(src_model, target_model):
    """Extract (src_core, tgt_core, num_experts) from model arguments.

    Handles list-wrapped modules and None (non-collocated) models.
    Fills in missing DP groups from Megatron's parallel state on the source.

    Returns:
        (src_core, tgt_core, num_experts)
    """
    src_core = None
    tgt_core = None
    num_experts = None

    if src_model is not None:
        src_lm = src_model[0] if isinstance(src_model, (list, tuple)) else src_model
        num_experts = src_lm.config.num_moe_experts
        src_core = unwrap_model(src_lm)
        if not hasattr(src_core, "pg_collection") or src_core.pg_collection is None:
            raise RuntimeError("Source model missing pg_collection required for reshard")
        # Fill missing DP group on the source using Megatron's parallel state if not provided
        if getattr(src_core.pg_collection, "dp", None) is None:
            src_core.pg_collection.dp = parallel_state.get_data_parallel_group()

    if target_model is not None:
        tgt_lm = target_model[0] if isinstance(target_model, (list, tuple)) else target_model
        if num_experts is None:
            num_experts = tgt_lm.config.num_moe_experts
        tgt_core = unwrap_model(tgt_lm)
        if not hasattr(tgt_core, "pg_collection") or tgt_core.pg_collection is None:
            raise RuntimeError("Target model missing pg_collection required for reshard")

    return src_core, tgt_core, num_experts


def _build_or_get_plan(src_core, tgt_core, num_experts, group, src_rank_offset, dst_rank_offset):
    """Return the cached reshard plan, building it (collectively) if not yet cached.

    All participating ranks must call this simultaneously when the plan is not
    yet cached, because build_centralized_reshard_plan uses collective communication.
    """
    global _plan_cache
    cache_key = _build_plan_cache_key(src_core, tgt_core, num_experts, group=group)
    if cache_key not in _plan_cache:
        _plan_cache[cache_key] = build_centralized_reshard_plan(
            src_core,
            tgt_core,
            num_experts=num_experts,
            group=group,
            src_rank_offset=src_rank_offset,
            dst_rank_offset=dst_rank_offset,
        )
    return _plan_cache[cache_key]


def _needs_mxfp8_conversion(model) -> bool:
    """Check if a model uses FlashInfer MXFP8 inference and needs weight conversion."""
    if model is None:
        return False
    lm = model[0] if isinstance(model, (list, tuple)) else model
    config = lm.config
    return (
        getattr(config, 'transformer_impl', None) == 'inference_optimized'
        and getattr(config, 'fp8_recipe', None) == 'mxfp8'
    )


def _setup_mxfp8_transform_on_plan(plan, target_model) -> None:
    """Detect MXFP8 needs and attach a transform to the plan if required.

    If the *target_model* uses an inference-optimized layer spec with MXFP8,
    this function:
      1. Computes which params are eligible for MXFP8 conversion.
      2. Quantizes the target model's decoder weights to FlashInfer MXFP8Tensor
         (creating persistent buffers whose addresses are later captured by
         CUDA graphs).
      3. Builds an ``MXFP8ReshardTransform`` and attaches it to the plan as
         ``plan.transform``.

    If the model doesn't need MXFP8, ``plan.transform`` is set to None.
    Subsequent calls are no-ops if the plan already has a transform attribute.
    """
    if hasattr(plan, 'transform'):
        return  # Already set up

    if not _needs_mxfp8_conversion(target_model):
        plan.transform = None
        return

    lm = target_model[0] if isinstance(target_model, (list, tuple)) else target_model
    core = unwrap_model(lm)
    decoder = core.decoder if hasattr(core, 'decoder') else core

    # 1. Compute which parameters are eligible for MXFP8 conversion.
    #    Must be done while params are still visible as nn.Parameter (BF16).
    convertible: set[str] = set()
    for name, param in decoder.named_parameters():
        if _should_quantize_param(param):
            convertible.add(f"decoder.{name}")

    # 2. Quantize decoder weights → persistent MXFP8Tensor buffers.
    persistent_buffers = quantize_params_to_mxfp8(decoder)

    # 3. Build the transform and attach it to the plan.
    plan.transform = MXFP8ReshardTransform(
        convertible_params=convertible,
        persistent_buffers=persistent_buffers,
        buffer_key_prefix="decoder.",
    )


def prepare_swap_model_weights(
    src_model: LanguageModule,
    target_model: LanguageModule,
    group=None,
    src_rank_offset: int = 0,
    dst_rank_offset: int = 0,
):
    """Pre-build and cache the reshard plan and any format-conversion transforms.

    Call this during initialization while models are in their native (BF16) format,
    before any weight format conversion (e.g., MXFP8).  The plan is stored in the
    same module-level cache as swap_model_weights, so subsequent calls reuse it
    without needing to inspect named_parameters() again.

    If the *target_model* uses an inference-optimized layer spec with MXFP8
    (``config.transformer_impl == 'inference_optimized'`` and
    ``config.fp8_recipe == 'mxfp8'``), this function also:
      - computes which parameters are eligible for MXFP8 conversion,
      - quantizes the target decoder weights to persistent FlashInfer
        MXFP8Tensor buffers (whose addresses are later baked into CUDA graphs),
      - creates an ``MXFP8ReshardTransform`` that subsequent
        ``swap_model_weights`` calls use automatically.

    Callers do **not** need to know about MXFP8 — the transform is created and
    cached transparently.

    All participating ranks must call this simultaneously — the plan builder uses
    collective communication internally.

    Args:
        src_model: Source model, or None if this rank only receives weights.
        target_model: Target model, or None if this rank only sends weights.
        group: Optional process group for collective communication.
        src_rank_offset: Rank offset for source (training) workers.
        dst_rank_offset: Rank offset for destination (inference) workers.
    """
    src_core, tgt_core, num_experts = _unwrap_model_cores(src_model, target_model)
    plan = _build_or_get_plan(
        src_core, tgt_core, num_experts, group, src_rank_offset, dst_rank_offset
    )

    # Auto-detect and set up MXFP8 transform on the plan for the target model.
    # This must happen after the plan is built (while BF16 params are still visible)
    # and before any swap_model_weights call.
    _setup_mxfp8_transform_on_plan(plan, target_model)


def swap_model_weights(
    src_model: LanguageModule,
    target_model: LanguageModule,
    refit_method: Union[RefitBackendName, CopyService],
    group=None,
    src_rank_offset: int = 0,
    dst_rank_offset: int = 0,
    transform: Optional[ReshardTransform] = None,
):
    """
    Orchestrate weight swap/refit.

    If *transform* is not explicitly provided, the function automatically uses
    any ``MXFP8ReshardTransform`` that was created and cached by a prior
    ``prepare_swap_model_weights`` call for the same model pair.  This makes
    MXFP8 handling transparent to callers.

    Args:
        refit_method: a string backend name (one of the supported refit
            backends) or a CopyService instance.
        group: Optional process group for communication.
        src_rank_offset / dst_rank_offset: Offsets applied to local process
            group ranks so that metadata contains globally unique rank IDs
            across independent torch.distributed worlds.
        transform: Optional ReshardTransform for custom format conversion.
            If None, the cached transform (from prepare_swap_model_weights)
            is used automatically when the receiver needs MXFP8 conversion.
    """
    if isinstance(refit_method, str):
        service = get_or_create_service(refit_method, group=group)
    elif hasattr(refit_method, 'submit_send') and hasattr(refit_method, 'run'):
        service = refit_method
    else:
        raise TypeError(
            "refit_method must be a str backend name or a CopyService-compatible instance"
        )

    # Auto-resolve MXFP8 transform from the cached plan when no
    # explicit transform was provided.
    if transform is None:
        src_core, tgt_core, num_experts = _unwrap_model_cores(src_model, target_model)
        plan = _build_or_get_plan(
            src_core, tgt_core, num_experts, group, src_rank_offset, dst_rank_offset
        )
        transform = getattr(plan, 'transform', None)

    reshard_model_weights(
        src_model,
        target_model,
        service=service,
        group=group,
        src_rank_offset=src_rank_offset,
        dst_rank_offset=dst_rank_offset,
        transform=transform,
    )


def reshard_model_weights(
    src_model: LanguageModule,
    target_model: LanguageModule,
    service: CopyService,
    group=None,
    src_rank_offset: int = 0,
    dst_rank_offset: int = 0,
    transform: Optional[ReshardTransform] = None,
):
    """Reshard and copy model weights from ``src_model`` to ``target_model`` using ``service``.

    Supports None for src_model and/or target_model to enable non-collocated mode:
    - (src_model, target_model): Both models present (collocated mode)
    - (src_model, None): Source rank - only sends data (non-collocated)
    - (None, target_model): Destination rank - only receives data (non-collocated)
    - (None, None): Idle rank - participates in collectives but has no transfers (non-collocated)

    Args:
        group: Optional process group for collective communication.
        src_rank_offset / dst_rank_offset: Offsets for mapping local ranks to global ranks
            in independent torch.distributed worlds.
        transform: Optional ReshardTransform for custom format conversion.
    """
    global _plan_cache

    # Handle idle ranks (both models None) - they participate in collectives but have no work
    if src_model is None and target_model is None:
        cache_key = _build_plan_cache_key(
            src_core=None, tgt_core=None, num_experts=None, group=group
        )

        # Use cached plan if available, otherwise build (with collective participation)
        if cache_key not in _plan_cache:
            plan = build_centralized_reshard_plan(
                None,
                None,
                num_experts=None,
                group=group,
                src_rank_offset=src_rank_offset,
                dst_rank_offset=dst_rank_offset,
            )
            _plan_cache[cache_key] = plan
        else:
            plan = _plan_cache[cache_key]
        execute_reshard_plan(plan, None, None, service=service, group=group, transform=transform)
        return

    # Handle None models - extract core modules only from non-None models
    src_core = None
    tgt_core = None
    num_experts = None

    if src_model is not None:
        # Handle list-wrapped modules
        src_lm = src_model[0] if isinstance(src_model, (list, tuple)) else src_model
        num_experts = src_lm.config.num_moe_experts
        # Unwrap to get owning modules (with parameters and pg_collection)
        src_core = unwrap_model(src_lm)
        # Ensure pg_collection exists
        if not hasattr(src_core, "pg_collection") or src_core.pg_collection is None:
            raise RuntimeError("Source model missing pg_collection required for reshard")
        # Fill missing DP group on the source using Megatron's parallel state if not provided
        if getattr(src_core.pg_collection, "dp", None) is None:
            src_core.pg_collection.dp = parallel_state.get_data_parallel_group()

    if target_model is not None:
        # Handle list-wrapped modules
        tgt_lm = target_model[0] if isinstance(target_model, (list, tuple)) else target_model
        if num_experts is None:
            num_experts = tgt_lm.config.num_moe_experts
        # Unwrap to get owning modules (with parameters and pg_collection)
        tgt_core = unwrap_model(tgt_lm)
        # Ensure pg_collection exists
        if not hasattr(tgt_core, "pg_collection") or tgt_core.pg_collection is None:
            raise RuntimeError("Target model missing pg_collection required for reshard")

    # Build or retrieve cached plan
    cache_key = _build_plan_cache_key(src_core, tgt_core, num_experts, group=group)

    if cache_key not in _plan_cache:
        # All ranks must participate in planning (collective operations)
        plan = build_centralized_reshard_plan(
            src_core,
            tgt_core,
            num_experts=num_experts,
            group=group,
            src_rank_offset=src_rank_offset,
            dst_rank_offset=dst_rank_offset,
        )
        _plan_cache[cache_key] = plan
    else:
        plan = _plan_cache[cache_key]

    execute_reshard_plan(
        plan, src_core, tgt_core, service=service, group=group, transform=transform
    )


================================================
FILE: megatron/core/resharding/transforms.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from __future__ import annotations

"""
Reshard transforms for custom send/recv/writeback during weight transfer.

- ReshardTransform: base class for pluggable format conversion hooks.
- MXFP8ReshardTransform: writes received BF16 data into persistent FlashInfer
  MXFP8Tensor buffers so CUDA-graph device-pointer captures remain valid.
"""

import torch

from megatron.core.inference.quantization.mxfp8_tensor import MXFP8Tensor


class ReshardTransform:
    """Hook for custom send/recv/writeback during reshard execution.

    Implementations override the four methods below.  When an instance is
    passed to ``execute_reshard_plan``, each ``TransferOp`` is checked via
    ``should_transform(param_name)``; if True the transform methods are used
    instead of the default send/recv/writeback logic.

    The transform may change the wire format (e.g. send MXFP8 data+scale
    instead of BF16) **or** keep the same wire format and only post-process
    on the receive side (e.g. receive BF16, convert to MXFP8 in
    ``finalize_recv``).  The only constraint is that ``prepare_send`` and
    ``prepare_recv`` must return the same number of tensors for a given
    parameter so that send/recv pairs match.
    """

    def should_transform(self, param_name: str) -> bool:
        """Return True if *param_name* should use the transform path."""
        return False

    def prepare_send(
        self, param_name: str, src_slice: tuple[slice, ...], src_param: torch.nn.Parameter
    ) -> list[torch.Tensor]:
        """Produce tensor(s) to send for *param_name*.

        May return multiple tensors (e.g. data + scale when converting to
        MXFP8 on the sender side).  The default implementation sends the
        BF16 slice unchanged (single tensor).
        """
        raise NotImplementedError

    def prepare_recv(self, param_name: str, dst_slice: tuple[slice, ...]) -> list[torch.Tensor]:
        """Allocate receive buffer(s).  Count must match ``prepare_send`` output."""
        raise NotImplementedError

    def finalize_recv(
        self, param_name: str, dst_slice: tuple[slice, ...], recv_buffers: list[torch.Tensor]
    ) -> None:
        """Write received data into final destination (e.g. persistent buffers).

        This is where receiver-side format conversion can happen (e.g.
        converting a BF16 recv buffer to MXFP8 before writing into
        persistent storage).
        """
        raise NotImplementedError


# ---------------------------------------------------------------------------
# MXFP8 transform helpers
# ---------------------------------------------------------------------------


def _scale_slice_from_data_slice(
    data_slice: tuple[slice, ...], block_size: int = 32
) -> tuple[slice, ...]:
    """Convert an MXFP8 data slice to the corresponding scale slice.

    In MXFP8, each group of ``block_size`` elements along the last (K)
    dimension shares a single scale value.  All dimensions except the last
    are passed through unchanged; the last ``slice`` has its start/stop
    divided by ``block_size``.  Integer index on the last dim is converted
    to scale index as idx // block_size.
    """
    adjusted = list(data_slice)
    last = adjusted[-1]
    if isinstance(last, slice):
        if last.start is not None and last.start % block_size != 0:
            raise AssertionError(
                f"MXFP8 data slice last dim ({last}) must be aligned to block_size={block_size}"
            )
        if last.stop is not None and last.stop % block_size != 0:
            raise AssertionError(
                f"MXFP8 data slice last dim ({last}) must be aligned to block_size={block_size}"
            )
        scale_start = (last.start // block_size) if last.start is not None else None
        scale_stop = (last.stop // block_size) if last.stop is not None else None
        # Scale has one value per block; do not use last.step (would index scale wrong).
        adjusted[-1] = slice(scale_start, scale_stop)
    elif isinstance(last, int):
        adjusted[-1] = last // block_size
    return tuple(adjusted)


def _ensure_sendable(param: torch.Tensor) -> torch.Tensor:
    """Return a standard-dtype tensor suitable for wire transmission.

    Quantized parameter types (e.g., Transformer Engine MXFP8Tensor) are
    dequantized to their original precision (usually BF16).  Standard
    parameters are returned via ``.data`` (unwrapped from autograd).
    """
    try:
        from transformer_engine.pytorch.tensor.mxfp8_tensor import MXFP8Tensor as _TEMXFP8

        if isinstance(param, _TEMXFP8):
            return param.dequantize()
    except ImportError:
        pass
    return param.data


class MXFP8ReshardTransform(ReshardTransform):
    """MXFP8 format-conversion transform for reshard.

    Writes received weight data directly into persistent ``MXFP8Tensor``
    buffers so that CUDA-graph device-pointer captures remain valid across
    refits.

    Two modes are supported, controlled by *convert_on_send*:

    ``convert_on_send=False`` (default — **receiver-side conversion**):
        The sender transmits plain BF16 (one tensor per op, identical to the
        default reshard path).  The receiver allocates a BF16 receive buffer,
        then ``finalize_recv`` converts BF16 → MXFP8 and writes into the
        persistent buffers.  Because the wire format is unchanged the sender
        does **not** need a transform — only the receiver creates one.  This
        is the simplest mode and avoids any sender/receiver coordination.

    ``convert_on_send=True`` (**sender-side conversion**):
        The sender converts each BF16 slice to MXFP8 and sends **two**
        tensors (data + scale) per op.  The receiver allocates matching
        MXFP8 buffers and ``finalize_recv`` copies them directly.  Both
        sender and receiver must use the transform so that tensor counts
        match.  This mode halves wire bandwidth (~1 byte/elem vs 2).

        **Caveat**: CopyService backends that match local (same-rank)
        transfers by ``task_id`` (Gloo, NVSHMEM) will break if multiple
        tensors share the same ``task_id``.  This mode is therefore only
        safe for non-colocated setups where sender and receiver are on
        different ranks.  A future fix could generate unique sub-IDs.

    Args:
        convertible_params: set of fully-qualified parameter names that
            should use this transform.
        persistent_buffers: dict mapping parameter names (without
            *buffer_key_prefix*) to ``MXFP8Tensor`` objects that hold the
            receiver's persistent data/scale storage.  Empty on the sender
            when using ``convert_on_send=True``.
        buffer_key_prefix: prefix to strip from ``param_name`` when looking
            up entries in *persistent_buffers* (e.g. ``"decoder."``).
        convert_on_send: if True, convert BF16 → MXFP8 on the sender and
            transmit two tensors (data + scale).  If False (default),
            transmit BF16 and convert on the receiver in ``finalize_recv``.
    """

    def __init__(
        self,
        convertible_params: set[str],
        persistent_buffers: dict,
        buffer_key_prefix: str = "",
        convert_on_send: bool = False,
    ):
        self.convertible_params = convertible_params
        self.persistent_buffers = persistent_buffers
        self.buffer_key_prefix = buffer_key_prefix
        self.convert_on_send = convert_on_send
        # Accumulation buffers for 1D-scale params that arrive in partial slices.
        # The 1D swizzled FlashInfer scale can't be updated partially; we collect
        # all BF16 slices here and quantize the full weight once it's assembled.
        # Maps buf_key -> (full-size BF16 accumulation tensor, elements written so far).
        self._pending_1d: dict = {}

    def should_transform(self, param_name: str) -> bool:
        return param_name in self.convertible_params

    # -- send ----------------------------------------------------------------

    def prepare_send(self, param_name, src_slice, src_param):
        src_data = _ensure_sendable(src_param)
        if self.convert_on_send:

            bf16_data = src_data[src_slice].contiguous().to(torch.bfloat16)
            mxfp8 = MXFP8Tensor.from_bf16(bf16_data)
            return [mxfp8.data.contiguous(), mxfp8.scale.contiguous()]
        else:
            # BF16 on the wire — same as the default reshard path.
            return [src_data[src_slice].contiguous()]

    # -- recv ----------------------------------------------------------------

    def prepare_recv(self, param_name, dst_slice):
        buf_key = param_name.removeprefix(self.buffer_key_prefix)
        buf = self.persistent_buffers[buf_key]

        if self.convert_on_send:
            # Receive MXFP8 data + scale (2 buffers).
            if buf.scale.ndim == 1:
                # 1D swizzled scale can't be partially reconstructed from sender-quantized
                # slices.  Use convert_on_send=False for models with 1D-scale params.
                raise NotImplementedError(
                    f"convert_on_send=True is not supported for parameters with 1D swizzled "
                    f"scale (param={param_name!r}).  Use convert_on_send=False instead, which "
                    f"receives BF16 and quantizes the full weight on the receiver."
                )
            scale_slice = _scale_slice_from_data_slice(dst_slice)
            return [
                torch.empty_like(buf.data[dst_slice].contiguous()),
                torch.empty_like(buf.scale[scale_slice].contiguous()),
            ]
        else:
            # Receive BF16 (1 buffer, same shape as the MXFP8 data slice).
            shape = buf.data[dst_slice].shape
            return [torch.empty(shape, dtype=torch.bfloat16, device=buf.data.device)]

    def finalize_recv(self, param_name, dst_slice, recv_buffers):
        buf_key = param_name.removeprefix(self.buffer_key_prefix)
        buf = self.persistent_buffers[buf_key]

        if self.convert_on_send:
            # Already MXFP8 on the wire — copy data and 2D scale slices directly.
            # (1D scale is rejected at prepare_recv time, so only 2D reaches here.)
            buf.data[dst_slice].copy_(recv_buffers[0])
            scale_slice = _scale_slice_from_data_slice(dst_slice)
            buf.scale[scale_slice].copy_(recv_buffers[1])
        elif buf.scale.ndim == 1:
            # 1D swizzled scale (FlashInfer format) encodes scale values across the
            # full weight tensor; partial updates would corrupt the swizzle layout.
            # Accumulate BF16 slices and quantize once all slices are assembled.
            if buf_key not in self._pending_1d:
                # Use zeros so that any un-filled slice produces zeros rather than garbage.
                self._pending_1d[buf_key] = [
                    torch.zeros_like(buf.data, dtype=torch.bfloat16),
                    0,  # elements written so far
                ]
            accum, written = self._pending_1d[buf_key]
            accum[dst_slice].copy_(recv_buffers[0])
            written += recv_buffers[0].numel()
            if written >= buf.data.numel():
                if written != buf.data.numel():
                    raise AssertionError(
                        f"1D-scale param {param_name!r}: received {written} elements, "
                        f"expected {buf.data.numel()} (duplicate or missing slices?)"
                    )
                mxfp8 = MXFP8Tensor.from_bf16(accum)
                buf.data.copy_(mxfp8.data)
                buf.scale.copy_(mxfp8.scale)
                del self._pending_1d[buf_key]
            else:
                self._pending_1d[buf_key][1] = written
        else:
            # 2D scale: each scale row covers exactly one data row, so partial
            # row-wise updates are independent and can be applied immediately.
            mxfp8 = MXFP8Tensor.from_bf16(recv_buffers[0])
            buf.data[dst_slice].copy_(mxfp8.data)
            scale_slice = _scale_slice_from_data_slice(dst_slice)
            buf.scale[scale_slice].copy_(mxfp8.scale)


================================================
FILE: megatron/core/resharding/utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from __future__ import annotations

import logging
from dataclasses import dataclass
from typing import Mapping, Optional

import torch
import torch.distributed as dist

# -----------------------------------------------------------------------------
# Dataclasses used by the planner
# -----------------------------------------------------------------------------


@dataclass
class TransferOp:
    """Single logical send/recv operation used in a reshard plan."""

    param_name: str
    peer_rank: int  # Who to send to / receive from
    is_send: bool  # True=send, False=recv

    # Slice information (for when we execute the plan)
    my_slice: tuple[slice, ...]  # My tensor slice
    peer_slice: tuple[slice, ...]  # Peer's tensor slice (for reference)

    # Optional global task identifier for advanced backends (e.g., NVSHMEM)
    # When present, this ID is shared between the matching send/recv ops
    # across ranks and can be used to build richer communication schedules.
    task_id: int | None = None


@dataclass
class ParameterMetadata:
    """Metadata for a parameter (used when param is on different rank)."""

    name: str
    shape: tuple[int, ...]
    dtype: torch.dtype
    element_size: int

    # TP sharding info
    is_tp: bool = False
    partition_dim: int = 0
    partition_stride: int = 1
    # For parameters that pack multiple independently-sharded components of
    # different sizes (e.g. Mamba in_proj packs z, x, B, C, dt).  When present,
    # lists the per-TP-rank block sizes along partition_dim.  The refit planner
    # interleaves these blocks rather than doing a simple contiguous concat.
    partition_sizes: list[int] | None = None

    # EP sharding info (fused/grouped MoE)
    is_ep: bool = False
    num_experts: Optional[int] = None

    # Which rank owns this param
    owner_rank: int = -1

    tensor_parallel_group_ranks: list[int] | None = None
    expert_parallel_group_ranks: list[int] | None = None
    data_parallel_group_ranks: list[int] | None = None
    pipeline_parallel_group_ranks: list[int] | None = None

    # Canonical name for matching parameters across models with different EP/PP configurations.
    #
    # - EP (expert parallel): each rank owns a subset of experts with local indices
    #   (e.g., rank 1 has "weight0" locally, but it's actually global expert 4). The raw param
    #   name can't be used to match across source/destination because the same local name refers
    #   to different global experts on different ranks. `resolved_name` remaps local expert indices
    #   to global indices (e.g., "layer.experts.weight0" on rank 1 → "layer.experts.weight4").
    #
    # - PP (pipeline parallel): transformer blocks are often named with rank-local indices
    #   (e.g., PP stage 1 may have "decoder.layers.0" even though that corresponds to global
    #   layer 16). For reshard/refit across different PP partitionings (e.g., PP2 ↔ PP1),
    #   `resolved_name` may be further canonicalized to global layer indices.
    #
    # For non-EP and non-PP cases, resolved_name == name.
    resolved_name: Optional[str] = None
    # The global expert index this parameter belongs to (e.g., 4 for global expert 4).
    # Computed alongside resolved_name; None for non-EP or fused expert tensors.
    global_expert_index: Optional[int] = None


@dataclass
class ShardingDescriptor:
    """Descriptor for a sharded dimension for a parameter."""

    name: str  # "tp" | "ep" | custom label
    dim: int
    src_stride: int
    dst_stride: int
    src_dim_ranks: list[int]
    dst_dim_ranks: list[int]


@dataclass
class ReshardPlan:
    """Reshard plan - operations for this rank."""

    send_ops: list[TransferOp]
    recv_ops: list[TransferOp]

    def __str__(self):
        return f"ReshardPlan(sends={len(self.send_ops)}, recvs={len(self.recv_ops)})"


# -----------------------------------------------------------------------------
# EP + Metadata helpers
# -----------------------------------------------------------------------------


def _get_rank_in_group(global_rank: int, group_ranks: list[int]) -> int:
    try:
        return group_ranks.index(global_rank)
    except ValueError:
        raise ValueError(
            f"Rank {global_rank} not found in process group {group_ranks}. "
            f"This likely indicates a configuration mismatch."
        )


def _detect_expert_index_from_param_name(param_name: str) -> Optional[int]:
    """Extract expert index from parameter name for TEGroupedMLP per-expert tensors."""
    for part in param_name.split('.'):
        if (
            part.startswith('weight')
            and len(part) > len('weight')
            and part[len('weight') :].isdigit()
        ):
            return int(part[len('weight') :])
        if part.startswith('bias') and len(part) > len('bias') and part[len('bias') :].isdigit():
            return int(part[len('bias') :])
    return None


def assign_ep_resolved_name_inplace(
    meta: ParameterMetadata, *, base_name: str | None = None
) -> None:
    """
    EP-only canonicalization for per-expert parameters.

    Under Expert Parallelism (EP), each rank owns a subset of experts with local indices
    (e.g., rank 1 has "weight0" locally, but it's actually global expert 4). The raw param
    name can't be used to match across source/destination because the same local name refers
    to different global experts on different ranks. This function remaps local expert indices
    to global indices in `resolved_name` and sets `global_expert_index`.

    Effects:
    - Sets meta.resolved_name (defaults to base_name/meta.name for non-EP).
    - Sets meta.global_expert_index for per-expert parameters; otherwise leaves it as None.
    """
    base = meta.name if base_name is None else base_name
    meta.resolved_name = base
    meta.global_expert_index = None
    if not meta.is_ep:
        return

    local_idx = _detect_expert_index_from_param_name(base)
    if local_idx is None:
        # Fused experts tensor: leave name as-is; TP planner will handle slicing
        return
    ep_group = meta.expert_parallel_group_ranks
    ep_size = len(ep_group)
    ep_local_rank = ep_group.index(meta.owner_rank)
    experts_per_rank = meta.num_experts // ep_size
    global_idx = ep_local_rank * experts_per_rank + local_idx
    meta.global_expert_index = global_idx

    # Replace trailing integer in "weightK"/"biasK" with global_idx
    parts = base.split('.')
    new_parts = []
    for p in parts:
        if p.startswith('weight') and len(p) > len('weight') and p[len('weight') :].isdigit():
            new_parts.append('weight' + str(global_idx))
        elif p.startswith('bias') and len(p) > len('bias') and p[len('bias') :].isdigit():
            new_parts.append('bias' + str(global_idx))
        else:
            new_parts.append(p)
    meta.resolved_name = '.'.join(new_parts)


def assign_resolved_name_inplace(
    meta: ParameterMetadata,
    *,
    layer_module_prefix_map: Mapping[str, str] | None = None,
    base_name: str | None = None,
) -> None:
    """Set meta.resolved_name so the planner can match the same weights across models.

    It rewrites PP layer indices to global layer indices (when layer_module_prefix_map is
    provided) and
    rewrites EP per-expert indices (weightK/biasK) to global expert indices.
    """
    name = meta.name if base_name is None else base_name
    if layer_module_prefix_map:
        name = _resolve_global_layer_number_in_name(name, layer_module_prefix_map)
    assign_ep_resolved_name_inplace(meta, base_name=name)


def _build_layer_module_prefix_map(module: torch.nn.Module) -> dict[str, str]:
    """Build a mapping local_module_prefix -> global_module_prefix for PP layer modules.

    Megatron assigns a global, 1-indexed layer_number to each transformer layer module at
    construction time (including PP/VPP/layout offsets). We convert that to the 0-indexed naming
    convention used in parameter names and build a map such as:

    - "decoder.layers.0" → "decoder.layers.16"  (if layer_number == 17)
    """
    prefix_map: dict[str, str] = {}
    for module_name, submodule in module.named_modules():
        if not module_name:
            continue
        layer_number = getattr(submodule, 'layer_number', None)
        if not isinstance(layer_number, int):
            continue
        parts = module_name.split('.')
        if not parts[-1].isdigit():
            continue
        parts[-1] = str(layer_number - 1)  # convert 1-indexed to 0-indexed
        prefix_map[module_name] = '.'.join(parts)
    return prefix_map


def _resolve_global_layer_number_in_name(
    name: str, layer_module_prefix_map: Mapping[str, str]
) -> str:
    """Rewrite a parameter name to use global layer indices (PP-aware).

    Given a parameter name like decoder.layers.0.self_attention..., this function rewrites
    the decoder.layers.0 prefix to the corresponding global layer index using the owning
    layer module's layer_number.

    Implementation:
    - Build a {local_prefix -> global_prefix} map once (outside the per-parameter loop).
    - Perform a longest-prefix match replacement so we only rewrite the module path portion.
    """
    if not layer_module_prefix_map:
        return name

    parts = name.split('.')
    for i in range(len(parts), 0, -1):
        prefix = '.'.join(parts[:i])
        mapped = layer_module_prefix_map.get(prefix)
        if mapped is None:
            continue
        rest = '.'.join(parts[i:])
        return mapped if not rest else mapped + '.' + rest
    return name


def extract_param_metadata(
    param: torch.nn.Parameter,
    param_name: str,
    owner_rank: int,
    pg_collection,
    num_experts: Optional[int] = None,
    layer_module_prefix_map: Mapping[str, str] | None = None,
    rank_offset: int = 0,
) -> ParameterMetadata:
    """Extract metadata from a parameter for cross-rank communication."""
    # TP flags from attributes (set by Megatron linear layers)
    is_tp = bool(getattr(param, 'tensor_model_parallel', False))
    partition_dim = int(getattr(param, 'partition_dim', 0))
    partition_stride = int(getattr(param, 'partition_stride', 1))
    partition_sizes = getattr(param, 'partition_sizes', None)
    if partition_sizes is not None:
        partition_sizes = list(partition_sizes)

    # SwiGLU/GLU compatibility: For gated linear units, fc1 stores interleaved [gate, up] portions
    # and requires partition_stride=2 for correct resharding. New models set this at construction
    # time (MLP sets partition_stride=2 on weight when gated_linear_unit=True). For legacy models
    # where stride=1 was left as default, we apply stride=2 as a fallback for fc1 parameters.
    # This is safe because: (1) gated models need it, and (2) non-gated models have smaller fc1
    # and stride doesn't affect single-block transfers.
    # if 'mlp.linear_fc1' in param_name and is_tp and partition_stride == 1:
    #     partition_stride = 2

    # EP detection: Megatron convention - expert params are not allreduced
    is_ep = not bool(getattr(param, 'allreduce', True))

    # Expert-param detection for TP inference.  When explicit_expert_comm is
    # active (is_expert and (tp_size>1 or ep)), TE clears parallel_mode so
    # tensor_model_parallel is never stamped — yet the weight IS TP-sharded
    # when tp_size > 1.  We detect expert params via num_experts + the
    # per-expert naming convention (weightK / biasK in TEGroupedLinear).
    is_expert_param = (
        num_experts is not None and _detect_expert_index_from_param_name(param_name) is not None
    )

    tensor_parallel_group_ranks: list[int] | None = None
    expert_parallel_group_ranks: list[int] | None = None
    data_parallel_group_ranks: list[int] | None = None
    pipeline_parallel_group_ranks: list[int] | None = None

    def _offset_ranks(ranks: list[int]) -> list[int]:
        return [r + rank_offset for r in ranks] if rank_offset else ranks

    if is_ep or is_expert_param:
        if is_ep:
            expert_parallel_group_ranks = _offset_ranks(
                dist.get_process_group_ranks(pg_collection.ep)
            )
        # For expert params, always provide TP group ranks so the planner can
        # handle TP size transitions (e.g., TP2→TP1).  When explicit_expert_comm
        # clears TE's parallel_mode, tensor_model_parallel may not be set even
        # though the weight IS TP-sharded.  Detect TP via group size instead.
        expt_tp = getattr(pg_collection, 'expt_tp', None)
        tp_grp = expt_tp if expt_tp is not None else getattr(pg_collection, 'tp', None)
        if tp_grp is not None:
            tp_ranks = _offset_ranks(dist.get_process_group_ranks(tp_grp))
            tensor_parallel_group_ranks = tp_ranks
            if not is_tp and len(tp_ranks) > 1:
                is_tp = True
        data_parallel_group_ranks = _offset_ranks(dist.get_process_group_ranks(pg_collection.dp))
    elif is_tp:
        # Non-EP: use regular TP group
        if hasattr(pg_collection, 'tp') and pg_collection.tp is not None:
            tensor_parallel_group_ranks = _offset_ranks(
                dist.get_process_group_ranks(pg_collection.tp)
            )
        data_parallel_group_ranks = _offset_ranks(dist.get_process_group_ranks(pg_collection.dp))
    else:
        data_parallel_group_ranks = _offset_ranks(dist.get_process_group_ranks(pg_collection.dp))

    # Always provide TP group ranks so the planner can handle TP size transitions
    # (e.g., TP2→TP1).  When is_tp=False the param is replicated across the TP group,
    # but the planner still needs to know the TP topology to plan gather/scatter ops
    # when the *other* side of the reshard IS TP-sharded.
    if (
        tensor_parallel_group_ranks is None
        and hasattr(pg_collection, 'tp')
        and pg_collection.tp is not None
    ):
        tensor_parallel_group_ranks = _offset_ranks(dist.get_process_group_ranks(pg_collection.tp))

    if hasattr(pg_collection, 'pp') and pg_collection.pp is not None:
        pipeline_parallel_group_ranks = _offset_ranks(
            dist.get_process_group_ranks(pg_collection.pp)
        )
    else:
        pipeline_parallel_group_ranks = list(
            range(rank_offset, rank_offset + dist.get_world_size())
        )

    meta = ParameterMetadata(
        name=param_name,
        shape=tuple(param.shape),
        dtype=param.dtype,
        element_size=param.element_size(),
        is_tp=is_tp,
        partition_dim=partition_dim,
        partition_stride=partition_stride,
        partition_sizes=partition_sizes,
        is_ep=is_ep,
        num_experts=num_experts,
        owner_rank=owner_rank,
        tensor_parallel_group_ranks=tensor_parallel_group_ranks,
        expert_parallel_group_ranks=expert_parallel_group_ranks,
        data_parallel_group_ranks=data_parallel_group_ranks,
        pipeline_parallel_group_ranks=pipeline_parallel_group_ranks,
    )
    assign_resolved_name_inplace(
        meta, layer_module_prefix_map=layer_module_prefix_map, base_name=param_name
    )

    return meta


def select_src_metadata_balanced(
    src_meta_list: list[ParameterMetadata], dst_metadata: ParameterMetadata, dst_rank: int
) -> ParameterMetadata:
    """Choose a representative source `ParameterMetadata` for a destination rank.

    The selected metadata provides topology information (TP/EP/DP group ranks) that the
    LCM transfer planner uses to compute actual source ranks and slices. This function
    doesn't perform transfers itself - it just picks which source configuration to use
    as reference for planning.

    Two scenarios for EP-sharded parameters:
    1. Non-collocated mode (same EP size, different rank numbering):
       - Filter by matching EP local rank to pair ranks with same expert position
       - Example: src ranks [0-63] and dst ranks [64-127] both with EP=8
       - Dst EP local 0 should use src EP local 0 as reference (same experts)

    2. Resharding mode (different EP sizes):
       - Skip EP local rank filtering (sizes don't correspond)
       - Example: EP=8→EP=16 means dst EP local 8 has no matching src EP local
       - Expert matching handled by resolved_name; LCM handles TP dimension changes

    Finally, balances across data-parallel (DP) groups to distribute load:
      - Groups src_meta_list by DP group
      - Selects source DP group via round-robin: dst_rank % num_src_dp_groups
      - Ensures even distribution of transfer load across source DP replicas
    """
    if not src_meta_list:
        raise ValueError("src_meta_list must be non-empty")

    # ============================================================================
    # EXPERT PARALLELISM (EP) LOCAL RANK FILTERING
    # ============================================================================
    # Purpose: In non-collocated mode with same EP size, ensure destination ranks
    # use source metadata from ranks with the same EP local position (same experts).
    #
    # Why size check matters:
    #   - Same size (EP=8→EP=8): Local ranks 0-7 exist in both src and dst
    #     → Filter ensures dst EP local 0 uses src EP local 0 (same global experts)
    #   - Different size (EP=8→EP=16): Local ranks 0-15 in dst, only 0-7 in src
    #     → Dst EP local 8 has no corresponding src EP local rank
    #     → Skip filter; expert reassignment handled by resolved_name matching
    #
    # Expert routing: When EP size changes, each expert parameter is matched via
    # resolved_name (which includes global expert index). The LCM/TP planner
    # handles any TP dimension changes, and DP round-robin distributes load.
    # ============================================================================
    dst_ep_group = dst_metadata.expert_parallel_group_ranks
    if dst_ep_group is not None:
        dst_ep_local = dst_ep_group.index(dst_rank)
        # Check if EP sizes match between source and destination
        src_ep_size = (
            len(src_meta_list[0].expert_parallel_group_ranks)
            if src_meta_list[0].expert_parallel_group_ranks
            else None
        )
        dst_ep_size = len(dst_ep_group)

        # Only filter by EP local rank when sizes match (non-collocated, not resharding)
        if src_ep_size == dst_ep_size:
            matching_ep = [
                m
                for m in src_meta_list
                if m.expert_parallel_group_ranks
                and m.expert_parallel_group_ranks.index(m.owner_rank) == dst_ep_local
            ]
            if not matching_ep:
                # This indicates a configuration bug: sizes match but no local rank match
                def _ep_local(m):
                    return (
                        m.expert_parallel_group_ranks.index(m.owner_rank)
                        if m.expert_parallel_group_ranks
                        else None
                    )

                available = [(m.owner_rank, _ep_local(m)) for m in src_meta_list]
                raise ValueError(
                    f"No source metadata with EP local rank {dst_ep_local}"
                    f" found for dst rank {dst_rank}. Available: {available}"
                )
            src_meta_list = matching_ep
        # else: EP resharding mode (sizes differ) - skip filter, keep all source candidates

    # ============================================================================
    # LOCAL COPY OPTIMIZATION (COLLOCATED MODE)
    # ============================================================================
    # In collocated mode, prefer local copies when available. If dst_rank appears
    # in the source metadata list (after TP/EP filtering), use it directly to
    # avoid unnecessary data transfers.
    #
    # A local copy is essentially free
    # (tensor.copy_() on same GPU), while any remote transfer incurs significant
    # overhead even within the same node.
    # ============================================================================
    local_meta = [m for m in src_meta_list if m.owner_rank == dst_rank]
    if local_meta:
        # Found local metadata - use it for a free local copy
        return local_meta[0]

    # ============================================================================
    # DATA PARALLELISM (DP) LOAD BALANCING
    # ============================================================================
    # After TP/EP filtering (if applicable), balance transfer load across source
    # data-parallel replicas. Each DP group holds a complete copy of the model,
    # so we can read from any DP group - choosing via round-robin spreads load.
    #
    # Load distribution: dst_rank % num_src_dp_groups ensures even distribution
    # even when destination has different DP configuration than source.
    # ============================================================================
    grouped_by_dp: dict[tuple[int, ...], list[ParameterMetadata]] = {}
    for meta in src_meta_list:
        dp_group = tuple(meta.data_parallel_group_ranks or [])
        grouped_by_dp.setdefault(dp_group, []).append(meta)

    # Fast path: only one DP group present; no balancing necessary
    if len(grouped_by_dp) == 1:
        return src_meta_list[0]

    # Round-robin selection across source DP groups based on destination global rank
    # This ensures even distribution: if we have 4 src DP groups and 128 dst ranks,
    # each src DP group will be selected by 32 dst ranks (128 / 4 = 32)
    sorted_dp_groups = sorted(grouped_by_dp.keys())
    chosen_group = sorted_dp_groups[dst_rank % len(sorted_dp_groups)]

    # Within the chosen DP group, distribute across available metadata entries
    # to balance load across all TP groups in the DP replica.
    # Example: With 4 TP groups in a DP group, dst_ranks will cycle through all 4
    # instead of always using the first one, better distributing transfer load.
    group_metadata = grouped_by_dp[chosen_group]
    within_group_idx = (dst_rank // len(sorted_dp_groups)) % len(group_metadata)
    selected = group_metadata[within_group_idx]
    return selected


logger = logging.getLogger(__name__)


================================================
FILE: megatron/core/safe_globals.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.

from argparse import Namespace
from io import BytesIO
from pathlib import PosixPath
from signal import Signals
from types import SimpleNamespace

import torch
from numpy import dtype, ndarray
from numpy.core.multiarray import _reconstruct
from numpy.dtypes import UInt32DType

from megatron.core.enums import ModelType
from megatron.core.optimizer import OptimizerConfig
from megatron.core.rerun_state_machine import RerunDiagnostic, RerunMode, RerunState
from megatron.core.transformer.enums import AttnBackend, CudaGraphScope

SAFE_GLOBALS = [
    SimpleNamespace,
    PosixPath,
    _reconstruct,
    ndarray,
    dtype,
    UInt32DType,
    Namespace,
    AttnBackend,
    CudaGraphScope,
    ModelType,
    OptimizerConfig,
    RerunDiagnostic,
    RerunMode,
    RerunState,
    BytesIO,
    Signals,
]


def register_safe_globals():
    """Register megatron-core safe classes with torch serialization."""
    for cls in SAFE_GLOBALS:
        torch.serialization.add_safe_globals([cls])


================================================
FILE: megatron/core/ssm/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.


================================================
FILE: megatron/core/ssm/gated_delta_net.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2025, Songlin Yang, Jan Kautz, Ali Hatamizadeh.

# Some of this code was adopted from https://github.com/huggingface/transformers
# This source code is licensed under the Apache license found in the
# LICENSE file in the root directory of this source tree.

import logging
from dataclasses import dataclass, replace
from typing import List, Optional, Tuple, Union

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor

from megatron.core.dist_checkpointing import ShardedTensor
from megatron.core.dist_checkpointing.mapping import ReplicaId, ShardedTensorFactory
from megatron.core.fp8_utils import get_fp8_align_size
from megatron.core.inference.contexts import BaseInferenceContext
from megatron.core.jit import jit_fuser
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.tensor_parallel import get_cuda_rng_tracker
from megatron.core.transformer import TransformerConfig
from megatron.core.transformer.identity_op import IdentityOp
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.spec_utils import ModuleSpec, build_module
from megatron.core.transformer.utils import (
    ensure_metadata_has_dp_cp_group,
    make_sharded_tensors_for_checkpoint,
    sharded_state_dict_default,
)
from megatron.core.utils import deprecate_inference_params, nvtx_range_pop, nvtx_range_push

# TODO: Implement GatedDeltaNetContextParallel
# from .gated_delta_net_context_parallel import GatedDeltaNetContextParallel

try:
    from fla.modules.l2norm import l2norm
    from fla.ops.gated_delta_rule import chunk_gated_delta_rule

    HAVE_FLA = True
except ImportError:
    chunk_gated_delta_rule = None

    HAVE_FLA = False

try:
    from causal_conv1d import causal_conv1d_fn
except ImportError:
    causal_conv1d_fn = None
    causal_conv1d_update = None


logger = logging.getLogger(__name__)


@dataclass
class GatedDeltaNetSubmodules:
    """
    Contains the module specs for the input linear, output norm, and output linear layers.
    """

    in_proj: Union[ModuleSpec, type] = IdentityOp
    out_norm: Union[ModuleSpec, type] = IdentityOp
    out_proj: Union[ModuleSpec, type] = IdentityOp


class GatedDeltaNet(MegatronModule):
    """Gated Delta Net (GDN) layer class

    GDN layer takes input with size [s, b, h]
    and returns output of the same size.
    """

    def __init__(
        self,
        config: TransformerConfig,
        submodules: GatedDeltaNetSubmodules,
        layer_number: int = None,
        bias: bool = False,
        conv_bias: bool = False,
        conv_init: Optional[float] = None,
        use_qk_l2norm: bool = True,
        A_init_range: Tuple[float, float] = (1, 16),
        pg_collection: ProcessGroupCollection = None,
    ):
        """
        Args:
            config: The config of the model.
            submodules: Contains the module specs for the input and output linear layers.
            layer_number: The layer number of this GDN layer.
            bias: Whether to use bias in the linear layers.
            conv_bias: Whether to use bias in the causal convolution.
            conv_init: The initialization range for the causal convolution weights.
            use_qk_l2norm: Whether to use L2 normalization in the kernel of the gated delta rule.
            A_init_range: The initialization range for the attention weights.
            pg_collection: The required process groups to use for tensor model parallel and context
                parallel.
        """

        if not HAVE_FLA:
            raise ImportError(
                "FLA is not installed. Please install it with `pip install flash-linear-attention`."
            )

        super().__init__(config)

        # Attributes from arguments
        self.layer_number = layer_number
        self.bias = bias
        self.conv_bias = conv_bias
        self.conv_init = conv_init
        assert A_init_range[0] >= 0 and A_init_range[1] >= A_init_range[0]
        self.A_init_range = A_init_range
        self.use_qk_l2norm = use_qk_l2norm
        assert pg_collection is not None, "pg_collection must be provided for GatedDeltaNet"
        self.pg_collection = pg_collection
        self.tp_size = self.pg_collection.tp.size()
        self.sp_size = self.tp_size if config.sequence_parallel else 1

        # Attributes from config
        self.config = config
        self.hidden_size = config.hidden_size
        self.act_fn = config.activation_func
        self.activation = self.act_fn.__name__
        self.conv_kernel_dim = config.linear_conv_kernel_dim
        self.key_head_dim = config.linear_key_head_dim
        self.value_head_dim = config.linear_value_head_dim
        self.num_key_heads = config.linear_num_key_heads
        self.num_value_heads = config.linear_num_value_heads
        self.qk_dim = self.key_head_dim * self.num_key_heads
        self.v_dim = self.value_head_dim * self.num_value_heads

        # Input projection (hidden_states -> q, k, v, gate, beta, alpha)
        # TODO: for now, output gate is forced for GDN.
        # We may remove this restriction in the future.
        self.in_proj_dim = self.qk_dim * 2 + self.v_dim * 2 + self.num_value_heads * 2
        if self.config.fp8:
            fp8_align_size = get_fp8_align_size(self.config.fp8_recipe)
            assert self.in_proj_dim % fp8_align_size == 0, (
                "For FP8, the innermost dimension of the GDN layer "
                "input projection output tensor must be a multiple of 16."
            )
        self.in_proj = build_module(
            submodules.in_proj,
            self.hidden_size,
            self.in_proj_dim,
            config=self.config,
            init_method=self.config.init_method,
            gather_output=False,
            bias=bias,
            skip_bias_add=False,
            is_expert=False,
            tp_comm_buffer_name="fc1",
            tp_group=self.pg_collection.tp,
        )

        # Conv1d for QKV
        self.conv_dim = self.qk_dim * 2 + self.v_dim
        self.conv_dim_local_tp = self.conv_dim // self.tp_size

        # weight shape: [conv_dim, 1, d_conv]
        # bias shape: [conv_dim]
        self.conv1d = nn.Conv1d(
            in_channels=self.conv_dim_local_tp,
            out_channels=self.conv_dim_local_tp,
            bias=conv_bias,
            kernel_size=self.conv_kernel_dim,
            groups=self.conv_dim_local_tp,
            padding=self.conv_kernel_dim - 1,
            device=torch.cuda.current_device(),
            dtype=config.params_dtype,
        )
        setattr(self.conv1d.weight, "tensor_model_parallel", True)
        setattr(self.conv1d.weight, "partition_dim", 0)
        if conv_bias:
            setattr(self.conv1d.bias, "tensor_model_parallel", True)
            setattr(self.conv1d.bias, "partition_dim", 0)

        # Time step projection (discretization)
        self.num_v_heads_local_tp = self.num_value_heads // self.tp_size
        # dt_bias parameter
        self.dt_bias = nn.Parameter(
            torch.empty(
                self.num_v_heads_local_tp,
                dtype=config.params_dtype,
                device=torch.cuda.current_device(),
            )
        )
        setattr(self.dt_bias, "tensor_model_parallel", True)
        setattr(self.dt_bias, "partition_dim", 0)
        # A_log parameter
        self.A_log = nn.Parameter(
            torch.empty(
                self.num_v_heads_local_tp,
                dtype=config.params_dtype,
                device=torch.cuda.current_device(),
            )
        )
        setattr(self.A_log, "tensor_model_parallel", True)
        setattr(self.A_log, "partition_dim", 0)

        # Output layernorm before projection
        self.out_norm = build_module(
            submodules.out_norm,
            config=self.config,
            hidden_size=self.value_head_dim,
            eps=self.config.layernorm_epsilon,
        )

        self.out_proj = build_module(
            submodules.out_proj,
            self.v_dim,
            self.hidden_size,
            config=self.config,
            init_method=self.config.output_layer_init_method,
            bias=bias,
            input_is_parallel=True,
            skip_bias_add=True,
            is_expert=False,
            tp_comm_buffer_name="fc2",
            tp_group=self.pg_collection.tp,
        )

        # TODO: support CP

        self.reset_parameters()

    def reset_parameters(self):
        """Reset the parameters."""
        if self.config.perform_initialization:
            with get_cuda_rng_tracker().fork():
                # conv1d.weight
                if self.conv_init is not None:
                    nn.init.uniform_(self.conv1d.weight, -self.conv_init, self.conv_init)
                # dt_bias
                torch.ones(
                    self.num_v_heads_local_tp,
                    out=self.dt_bias.data,
                    dtype=self.config.params_dtype,
                    device=torch.cuda.current_device(),
                )
                # A_log
                A = torch.empty(
                    self.num_v_heads_local_tp,
                    dtype=self.config.params_dtype,
                    device=torch.cuda.current_device(),
                ).uniform_(*self.A_init_range)
                self.A_log.data.copy_(torch.log(A))

    def forward(
        self,
        hidden_states: Tensor,
        attention_mask: Tensor,
        key_value_states: Optional[Tensor] = None,
        inference_context: Optional[BaseInferenceContext] = None,
        attention_bias: Optional[Tensor] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
        sequence_len_offset: Optional[int] = None,
        *,
        inference_params: Optional[BaseInferenceContext] = None,
        **kwargs,
    ):
        """
        Perform a forward pass through the GDN module.

        Args:
            hidden_states (Tensor): Hidden states.
            attention_mask (Tensor): Attention mask.
            key_value_states (Optional[Tensor]): Key/value states (for cross attention).
            inference_context (Optional[BaseInferenceContext]): Inference context that manages
                KV cache.
            attention_bias (Optional[Tensor]): Attention bias.
            packed_seq_params (Optional[PackedSeqparams]): Parameters used for THD format.
            sequence_len_offset (Optional[int]): Sequence length offset used for
                inference CUDA graphs.

        Return:
            (Tuple[Tensor, Tensor]) GDN output and bias.

        """
        # TODO: Deal with attention_mask

        inference_context = deprecate_inference_params(inference_context, inference_params)

        seq_len, batch, _ = hidden_states.shape
        seq_len = seq_len * self.sp_size

        if inference_context is not None:
            assert (
                inference_context.is_static_batching()
            ), "GDN does not currently support dynamic inference batching."
            assert not self.config.sequence_parallel
            # TODO: support inference
            raise NotImplementedError("GDN does not support inference for now.")

        if packed_seq_params is not None:
            # TODO: support packed sequence
            raise NotImplementedError("GDN does not support packed sequence for now.")

        # Input projection
        nvtx_range_push(suffix="in_proj")
        qkvzba, _ = self.in_proj(hidden_states)
        nvtx_range_pop(suffix="in_proj")

        # Transpose: s b x --> b s x
        # From sbhd to bshd format
        qkvzba = qkvzba.transpose(0, 1)

        # Split, reorder, and reshape the tensor into q, k, v, gate, beta, alpha
        qkv, gate, beta, alpha = torch.split(
            qkvzba,
            [
                (self.qk_dim * 2 + self.v_dim) // self.tp_size,
                self.v_dim // self.tp_size,
                self.num_value_heads // self.tp_size,
                self.num_value_heads // self.tp_size,
            ],
            dim=-1,
        )
        gate = gate.reshape(batch, seq_len, -1, self.value_head_dim)
        beta = beta.reshape(batch, seq_len, -1)
        alpha = alpha.reshape(batch, seq_len, -1)

        # Convolution on qkv
        qkv = qkv.transpose(1, 2).contiguous()  # b, s, d -> b, d, s
        nvtx_range_push(suffix="conv1d")
        if (causal_conv1d_fn is None) or self.config.deterministic_mode:
            qkv = self.act_fn(self.conv1d(qkv)[..., :seq_len])
        else:
            assert self.activation in ["silu", "swish"]
            qkv = causal_conv1d_fn(
                x=qkv,
                weight=self.conv1d.weight.squeeze(1),  # d, 1, w -> d, w
                bias=self.conv1d.bias,
                activation=self.activation,
            )
        nvtx_range_pop(suffix="conv1d")
        # Split qkv into query, key, and value
        qkv = qkv.transpose(1, 2)  # b, d, s -> b, s, d
        query, key, value = torch.split(
            qkv,
            [self.qk_dim // self.tp_size, self.qk_dim // self.tp_size, self.v_dim // self.tp_size],
            dim=-1,
        )
        query = query.reshape(batch, seq_len, -1, self.key_head_dim)
        key = key.reshape(batch, seq_len, -1, self.key_head_dim)
        value = value.reshape(batch, seq_len, -1, self.value_head_dim)
        # Apply L2 norm to query and key
        if self.use_qk_l2norm:
            query = l2norm(query.contiguous())
            key = l2norm(key.contiguous())
        if self.num_value_heads // self.num_key_heads > 1:
            query = query.repeat_interleave(self.num_value_heads // self.num_key_heads, dim=2)
            key = key.repeat_interleave(self.num_value_heads // self.num_key_heads, dim=2)

        # Make contiguous
        query = query.contiguous()
        key = key.contiguous()
        value = value.contiguous()
        gate = gate.contiguous()
        beta = beta.contiguous()
        alpha = alpha.contiguous()

        # Calculate g and beta
        nvtx_range_push(suffix="g_and_beta")
        g = -self.A_log.exp() * F.softplus(alpha.float() + self.dt_bias)  # In fp32
        beta = beta.sigmoid()
        nvtx_range_pop(suffix="g_and_beta")

        nvtx_range_push(suffix="gated_delta_rule")
        if self.config.deterministic_mode:
            core_attn_out, last_recurrent_state = torch_chunk_gated_delta_rule(
                query,
                key,
                value,
                g=g,
                beta=beta,
                initial_state=None,
                output_final_state=False,
                use_qk_l2norm_in_kernel=False,
            )
        else:
            core_attn_out, last_recurrent_state = chunk_gated_delta_rule(
                query,
                key,
                value,
                g=g,
                beta=beta,
                initial_state=None,
                output_final_state=False,
                use_qk_l2norm_in_kernel=False,
            )
        nvtx_range_pop(suffix="gated_delta_rule")

        # RMSNorm
        nvtx_range_push(suffix="gated_norm")
        norm_out = self._apply_gated_norm(core_attn_out, gate)
        nvtx_range_pop(suffix="gated_norm")

        # Transpose: b s x --> s b x
        # From bshd back to sbhd format
        norm_out = norm_out.reshape(batch, seq_len, -1)
        norm_out = norm_out.transpose(0, 1).contiguous()

        # Output projection
        nvtx_range_push(suffix="out_proj")
        out, out_bias = self.out_proj(norm_out)
        nvtx_range_pop(suffix="out_proj")

        return out, out_bias

    @jit_fuser
    def _apply_gated_norm(self, x, gate):
        # Output Norm
        x_dtype = x.dtype
        x = x.reshape(-1, x.shape[-1])
        y = self.out_norm(x)
        # Output gate
        gate = gate.reshape(-1, gate.shape[-1])
        y = y * self.act_fn(gate.float())
        y = y.to(x_dtype)
        return y

    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None, tp_group=None):
        """Provide a sharded state dictionary for distributed checkpointing."""
        # Guard for cases metadata is not provided
        metadata = ensure_metadata_has_dp_cp_group(metadata)

        sharded_state_dict = {}
        # Parameters
        self._save_to_state_dict(sharded_state_dict, "", keep_vars=True)
        sharded_state_dict = make_sharded_tensors_for_checkpoint(
            sharded_state_dict,
            prefix,
            tensor_parallel_layers_axis_map={
                "A_log": 0,
                "dt_bias": 0,
            },  # parameters sharded across TP
            sharded_offsets=sharded_offsets,
            tp_group=(tp_group if tp_group is not None else self.pg_collection.tp),
            dp_cp_group=metadata['dp_cp_group'],
        )
        # Submodules
        tp_group = tp_group if tp_group is not None else self.pg_collection.tp
        for name, module in self.named_children():
            if name == "conv1d":
                # Add TP sharding for Conv1d
                module_sd = module.state_dict(prefix="", keep_vars=True)
                tp_sharding_map = {f"weight": 0}
                if self.conv_bias:
                    tp_sharding_map[f"bias"] = 0
                module_sharded_sd = make_sharded_tensors_for_checkpoint(
                    module_sd,
                    f"{prefix}{name}.",
                    tp_sharding_map,
                    sharded_offsets,
                    tp_group=tp_group,
                    dp_cp_group=metadata['dp_cp_group'],
                )
            else:
                module_sharded_sd = sharded_state_dict_default(
                    module, f"{prefix}{name}.", sharded_offsets, metadata, tp_group=tp_group
                )

            sharded_state_dict.update(module_sharded_sd)

        # At this point the TP sharding is correctly defined for each tensor, but some of the
        # tensors must be additionally split into separate parts
        in_proj_dim_local_tp = self.in_proj_dim // self.tp_size
        assert sharded_state_dict[f"{prefix}in_proj.weight"].data.size(0) == in_proj_dim_local_tp, (
            in_proj_dim_local_tp,
            sharded_state_dict[f"{prefix}in_proj.weight"],
        )

        sharded_state_dict[f"{prefix}in_proj.weight"] = _split_tensor_factory(
            sharded_state_dict[f"{prefix}in_proj.weight"],
            [
                self.qk_dim // self.tp_size,
                self.qk_dim // self.tp_size,
                self.v_dim // self.tp_size,
                self.v_dim // self.tp_size,
                self.num_value_heads // self.tp_size,
                self.num_value_heads // self.tp_size,
            ],
            ["query", "key", "value", "z", "beta", "alpha"],
            0,
        )

        conv_layer_name_list = ["conv1d.weight"]
        assert (
            sharded_state_dict[f"{prefix}conv1d.weight"].data.size(0) == self.conv_dim_local_tp
        ), (self.conv_dim_local_tp, sharded_state_dict[f"{prefix}conv1d.weight"])
        if self.conv_bias:
            conv_layer_name_list.append("conv1d.bias")
            assert (
                sharded_state_dict[f"{prefix}conv1d.bias"].data.size(0) == self.conv_dim_local_tp
            ), (self.conv_dim_local_tp, sharded_state_dict[f"{prefix}conv1d.bias"])
        for conv_layer_name in conv_layer_name_list:
            sharded_state_dict[f"{prefix}{conv_layer_name}"] = _split_tensor_factory(
                sharded_state_dict[f"{prefix}{conv_layer_name}"],
                [
                    self.qk_dim // self.tp_size,
                    self.qk_dim // self.tp_size,
                    self.v_dim // self.tp_size,
                ],
                ["query", "key", "value"],
                0,
            )

        return sharded_state_dict

    def backward_dw(self):
        """Execute weight gradient computation for all linear layers."""
        self._backward_in_proj()
        self._backward_out_proj()

    def _backward_in_proj(self):
        """Computes weight gradients of input projection layer."""
        self.in_proj.backward_dw()

    def _backward_out_proj(self):
        """Computes weight gradients of output projection layer."""
        self.out_proj.backward_dw()


def _split_tensor_factory(
    orig_sh_ten: ShardedTensor, split_sections: List[int], split_names: List[str], split_dim: int
) -> ShardedTensorFactory:
    """Builds a factory that splits a given ShardedTensor into several independent chunks."""
    assert isinstance(orig_sh_ten, ShardedTensor), type(orig_sh_ten)
    orig_sh_ten_no_data = orig_sh_ten.without_data()  # remove `data` reference

    if sum(split_sections) != orig_sh_ten_no_data.local_shape[split_dim]:
        raise ValueError(
            f"Split sections must cover the whole dimension size, "
            f"got {split_sections=} vs dimensions size "
            f"{orig_sh_ten_no_data.local_shape[split_dim]}"
        )

    assert not isinstance(
        split_sections, int
    ), "Splitting into predefined section sizes is supported (`split_sections` must be a list)"
    assert len(split_sections) == len(split_names), (len(split_sections), len(split_names))

    @torch.no_grad()
    def sh_ten_build_fn(
        key: str, t: torch.Tensor, replica_id: ReplicaId, flattened_range: Optional[slice]
    ):
        factory_sh_ten = replace(
            orig_sh_ten_no_data,
            key=key,
            data=t,
            dtype=t.dtype,
            replica_id=replica_id,
            flattened_range=flattened_range,
        )

        chunk_sh_tens = []
        split_start = 0
        for split_size, split_name in zip(split_sections, split_names):
            split_chunks = factory_sh_ten.narrow(split_dim, split_start, split_size)
            for sh_ten in split_chunks:
                sh_ten.key = f"{sh_ten.key}.{split_name}"
            chunk_sh_tens.extend(split_chunks)
            split_start += split_size

        assert split_start == orig_sh_ten_no_data.local_shape[split_dim], (
            split_start,
            orig_sh_ten_no_data.local_shape[split_dim],
        )
        assert sum(sh_ten.data.numel() for sh_ten in chunk_sh_tens) == t.numel(), (
            chunk_sh_tens,
            t.shape,
        )
        return chunk_sh_tens

    @torch.no_grad()
    def sh_ten_merge_fn(sub_state_dict):
        return torch.cat(sub_state_dict)

    return ShardedTensorFactory(
        orig_sh_ten.key, orig_sh_ten.data, sh_ten_build_fn, sh_ten_merge_fn, orig_sh_ten.replica_id
    )


def torch_chunk_gated_delta_rule(
    query,
    key,
    value,
    g,
    beta,
    chunk_size=64,
    initial_state=None,
    output_final_state=False,
    use_qk_l2norm_in_kernel=False,
):
    # pylint: disable=line-too-long
    '''
    Torch-native implementation of chunked gated delta rule for deterministic mode.
    Need this because FLA is not deterministic.

    Reference: https://github.com/huggingface/transformers/blob/144c8ce2809a2e21914017652700e1ecb450501e/src/transformers/models/qwen3_next/modeling_qwen3_next.py#L470-L547
    '''

    initial_dtype = query.dtype
    if use_qk_l2norm_in_kernel:
        query = l2norm(query, dim=-1, eps=1e-6)
        key = l2norm(key, dim=-1, eps=1e-6)
    query, key, value, beta, g = [
        x.transpose(1, 2).contiguous().to(torch.float32) for x in (query, key, value, beta, g)
    ]

    batch_size, num_heads, sequence_length, k_head_dim = key.shape
    v_head_dim = value.shape[-1]
    pad_size = (chunk_size - sequence_length % chunk_size) % chunk_size
    query = F.pad(query, (0, 0, 0, pad_size))
    key = F.pad(key, (0, 0, 0, pad_size))
    value = F.pad(value, (0, 0, 0, pad_size))
    beta = F.pad(beta, (0, pad_size))
    g = F.pad(g, (0, pad_size))
    total_sequence_length = sequence_length + pad_size
    scale = 1 / (query.shape[-1] ** 0.5)
    query = query * scale

    v_beta = value * beta.unsqueeze(-1)
    k_beta = key * beta.unsqueeze(-1)
    # reshape to chunks
    query, key, value, k_beta, v_beta = [
        x.reshape(x.shape[0], x.shape[1], -1, chunk_size, x.shape[-1])
        for x in (query, key, value, k_beta, v_beta)
    ]
    g = g.reshape(g.shape[0], g.shape[1], -1, chunk_size)
    mask = torch.triu(
        torch.ones(chunk_size, chunk_size, dtype=torch.bool, device=query.device), diagonal=0
    )

    # chunk decay
    g = g.cumsum(dim=-1)
    decay_mask = ((g.unsqueeze(-1) - g.unsqueeze(-2)).tril().exp().float()).tril()
    attn = -((k_beta @ key.transpose(-1, -2)) * decay_mask).masked_fill(mask, 0)
    for i in range(1, chunk_size):
        row = attn[..., i, :i].clone()
        sub = attn[..., :i, :i].clone()
        attn[..., i, :i] = row + (row.unsqueeze(-1) * sub).sum(-2)
    attn = attn + torch.eye(chunk_size, dtype=attn.dtype, device=attn.device)
    value = attn @ v_beta
    k_cumdecay = attn @ (k_beta * g.exp().unsqueeze(-1))
    last_recurrent_state = (
        torch.zeros(batch_size, num_heads, k_head_dim, v_head_dim).to(value)
        if initial_state is None
        else initial_state.to(value)
    )
    core_attn_out = torch.zeros_like(value)
    mask = torch.triu(
        torch.ones(chunk_size, chunk_size, dtype=torch.bool, device=query.device), diagonal=1
    )

    # for each chunk
    for i in range(0, total_sequence_length // chunk_size):
        q_i, k_i, v_i = query[:, :, i], key[:, :, i], value[:, :, i]
        attn = (q_i @ k_i.transpose(-1, -2) * decay_mask[:, :, i]).masked_fill_(mask, 0)
        v_prime = (k_cumdecay[:, :, i]) @ last_recurrent_state
        v_new = v_i - v_prime
        attn_inter = (q_i * g[:, :, i, :, None].exp()) @ last_recurrent_state
        core_attn_out[:, :, i] = attn_inter + attn @ v_new
        last_recurrent_state = (
            last_recurrent_state * g[:, :, i, -1, None, None].exp()
            + (k_i * (g[:, :, i, -1, None] - g[:, :, i]).exp()[..., None]).transpose(-1, -2) @ v_new
        )

    if not output_final_state:
        last_recurrent_state = None
    core_attn_out = core_attn_out.reshape(
        core_attn_out.shape[0], core_attn_out.shape[1], -1, core_attn_out.shape[-1]
    )
    core_attn_out = core_attn_out[:, :, :sequence_length]
    core_attn_out = core_attn_out.transpose(1, 2).contiguous().to(initial_dtype)
    return core_attn_out, last_recurrent_state


================================================
FILE: megatron/core/ssm/mamba_block.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2024, Tri Dao, Albert Gu.

# Some of this code was adopted from https://github.com/state-spaces/mamba/
# This source code is licensed under the Apache license found in the
# LICENSE file in the root directory of this source tree.

from contextlib import nullcontext
from dataclasses import dataclass
from typing import Optional, Tuple, Union

import torch
from torch import Tensor, nn

from megatron.core.dist_checkpointing.mapping import ShardedStateDict
from megatron.core.dist_checkpointing.utils import replace_prefix_for_sharding
from megatron.core.enums import Fp8Recipe
from megatron.core.extensions.transformer_engine import TENorm
from megatron.core.fp4_utils import get_fp4_context
from megatron.core.fp8_utils import get_fp8_context
from megatron.core.inference.contexts import BaseInferenceContext
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols as LayerSymbols
from megatron.core.transformer import TransformerConfig
from megatron.core.transformer.enums import CudaGraphScope
from megatron.core.transformer.identity_op import IdentityOp
from megatron.core.transformer.module import GraphableMegatronModule, MegatronModule
from megatron.core.transformer.spec_utils import ModuleSpec, build_module
from megatron.core.transformer.transformer_layer import TransformerLayer
from megatron.core.transformer.utils import sharded_state_dict_default
from megatron.core.utils import WrappedTensor, deprecate_inference_params, make_viewless_tensor


@dataclass
class MambaStackSubmodules:
    """
    A class for the module specs for the MambaStack.
    """

    mamba_layer: Union[ModuleSpec, type] = IdentityOp
    attention_layer: Union[ModuleSpec, type] = IdentityOp
    mlp_layer: Union[ModuleSpec, type] = IdentityOp
    moe_layer: Union[ModuleSpec, type] = IdentityOp
    mtp_block_spec: Optional[ModuleSpec] = None


class MambaStack(GraphableMegatronModule, MegatronModule):
    """
    Constructor for the MambaStack class.

    Args:
        config (TransformerConfig): the model configuration
        submodules (MambaStackSubmodules): the submodules for the stack
        pre_process (bool, optional): whether to include an embedding layer.
            Defaults to True.
        layer_type_list (list, optional): pre-computed list of layer type symbols for
            this pipeline segment. When provided (by MambaModel), pipeline stage
            selection has already been done via '|' separators in the pattern.
        pp_layer_offset (int, optional): the global layer offset for this pipeline
            segment. Defaults to 0.
        post_layer_norm (bool, optional): whether to include a final layer norm.
            Defaults to True.
        post_process (bool, optional): whether to include an output layer.
            Defaults to True.
        device (optional): the device to use. Defaults to None.
        dtype (optional): the data type to use. Defaults to None.
        pg_collection (ProcessGroupCollection): the required model communication
            process groups to use.
        is_mtp_layer (bool, optional): whether this is an MTP layer. Defaults to False.
    """

    def __init__(
        self,
        config: TransformerConfig,
        submodules: MambaStackSubmodules,
        pre_process: bool = True,
        layer_type_list: Optional[list[str]] = None,
        pp_layer_offset: int = 0,
        post_layer_norm: bool = True,
        post_process: bool = True,
        device=None,
        dtype=None,
        pg_collection: ProcessGroupCollection = None,
        is_mtp_layer: bool = False,
    ) -> None:
        super().__init__(config=config)
        self.pre_process = pre_process
        self.post_layer_norm = post_layer_norm
        self.post_process = post_process
        self.is_mtp_layer = is_mtp_layer

        assert pg_collection is not None, "pg_collection must be provided for MambaStack"

        self.pp_group = pg_collection.pp
        self.tp_group = pg_collection.tp

        # Required for pipeline parallel schedules
        self.input_tensor = None
        self.pg_collection = pg_collection

        assert layer_type_list is not None, (
            "layer_type_list must be provided. It should be pre-computed from "
            "--hybrid-layer-pattern by MambaModel."
        )
        self.layer_type_list = layer_type_list

        # Build layers from the pre-selected segment
        self.layers = nn.ModuleList()
        for i, layer_type in enumerate(self.layer_type_list):
            layer_number = i + 1 + pp_layer_offset
            if self.config.fp8:
                quant_init_context = get_fp8_context(self.config, i + pp_layer_offset, is_init=True)
            elif self.config.fp4:
                quant_init_context = get_fp4_context(self.config, i + pp_layer_offset, is_init=True)
            else:
                quant_init_context = nullcontext()
            with quant_init_context:
                if layer_type == LayerSymbols.MAMBA:
                    layer = build_module(
                        submodules.mamba_layer,
                        config=self.config,
                        layer_number=layer_number,
                        pp_layer_offset=pp_layer_offset,
                        pg_collection=pg_collection,
                    )
                elif layer_type == LayerSymbols.ATTENTION:
                    layer = build_module(
                        submodules.attention_layer,
                        config=self.config,
                        layer_number=layer_number,
                        pg_collection=pg_collection,
                        is_mtp_layer=is_mtp_layer,
                        add_layer_offset=False,
                        pp_layer_offset=pp_layer_offset,
                    )
                elif layer_type == LayerSymbols.MLP:
                    layer = build_module(
                        submodules.mlp_layer,
                        config=self.config,
                        layer_number=layer_number,
                        pg_collection=pg_collection,
                        add_layer_offset=False,
                    )
                elif layer_type == LayerSymbols.MOE:
                    layer = build_module(
                        submodules.moe_layer,
                        config=self.config,
                        layer_number=layer_number,
                        pg_collection=pg_collection,
                        add_layer_offset=False,
                    )
                else:
                    assert False, "unexpected layer_type"
            self.layers.append(layer)

        # Required for activation recomputation
        self.num_layers_per_pipeline_rank = len(self.layers)

        if self.post_process and self.post_layer_norm:
            # Final layer norm before output.
            self.final_norm = TENorm(
                config=self.config,
                hidden_size=self.config.hidden_size,
                eps=self.config.layernorm_epsilon,
            )

    def set_input_tensor(self, input_tensor: Tensor):
        """Set input tensor to be used instead of forward()'s input.

        When doing pipeline parallelism the input from the previous
        stage comes from communication, not from the input, so the
        model's forward_step_func won't have it. This function is thus
        used by internal code to bypass the input provided by the
        forward_step_func"""
        self.input_tensor = input_tensor

    def mamba_state_shapes_per_request(self) -> Optional[Tuple[Tuple[int], Tuple[int]]]:
        """
        Returns the Mamba conv and ssm states shapes per input sequence
        if this block contains Mamba layers (this may not be the case with PP > 1).
        """
        for layer_type, layer in zip(self.layer_type_list, self.layers):
            if layer_type == LayerSymbols.MAMBA:
                return layer.mamba_state_shapes_per_request()
        return None

    def _should_call_local_cudagraph(self, *args, **kwargs):
        """
        Check if we should call the local cudagraph path.
        """
        if (
            not self.training
            and hasattr(self, 'cudagraph_manager')
            and kwargs['attention_mask'] is None
            and (
                kwargs.get('inference_context') is not None
                or kwargs.get('inference_params') is not None
            )
            and CudaGraphScope.full_iteration_inference in self.config.cuda_graph_scope
        ):
            if kwargs['inference_context'].is_static_batching():
                using_cuda_graph = kwargs['inference_context'].is_decode_only()
            else:
                using_cuda_graph = kwargs['inference_context'].using_cuda_graph_this_step()

            if using_cuda_graph:
                return True
        return False

    def __call__(self, *args, **kwargs):
        if self._should_call_local_cudagraph(*args, **kwargs):
            kwargs['hidden_states'] = (
                kwargs['hidden_states'].unwrap()
                if isinstance(kwargs['hidden_states'], WrappedTensor)
                else kwargs['hidden_states']
            )
            return super().__call__(*args, **kwargs)[0]
        return super().__call__(*args, **kwargs)

    def forward(
        self,
        hidden_states: Union[Tensor, WrappedTensor],
        attention_mask: Tensor,
        inference_context: Optional[BaseInferenceContext] = None,
        rotary_pos_emb: Optional[Tensor] = None,
        *,
        inference_params: Optional[BaseInferenceContext] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
        padding_mask=None,
    ):
        """
        Forward function of the MambaStack class.

        It either returns the Loss values if labels are given or the
            final hidden units

        Args:
            hidden_states (Union[Tensor, WrappedTensor]): the input tensor.
                Can be passed as a WrappedTensor during inference to avoid an obsolete
                reference in the calling function.
            attention_mask (Tensor): the attention mask.
            inference_context (BaseInferenceContext): the inference parameters.
            rotary_pos_emb (Tensor, optional): the rotary positional embeddings.
                Defaults to None.
        Returns:
            Tensor: the output tensor.
        """

        inference_context = deprecate_inference_params(inference_context, inference_params)

        if not self.pre_process:
            # See set_input_tensor()
            hidden_states = self.input_tensor

        # Delete the obsolete reference to the initial input tensor if necessary
        if isinstance(hidden_states, WrappedTensor):
            hidden_states = hidden_states.unwrap()

        if inference_context and inference_context.is_static_batching():
            # NOTE(bnorick): match BaseInferenceContext attributes for
            # mamba_ssm.utils.generation.BaseInferenceContext,
            # this hack supports eval
            inference_context.max_seqlen = inference_context.max_sequence_length
            inference_context.seqlen_offset = inference_context.sequence_len_offset

        if (
            (
                (
                    self.config.cuda_graph_impl == "local"
                    and CudaGraphScope.full_iteration not in self.config.cuda_graph_scope
                )
                or self.config.flash_decode
            )
            and inference_context
            and inference_context.is_static_batching()
            and not self.training
        ):
            current_batch_size = hidden_states.shape[1]
            sequence_len_offset = torch.tensor(
                [inference_context.sequence_len_offset] * current_batch_size,
                dtype=torch.int32,
                device='cuda',
            )
        else:
            sequence_len_offset = None

        # If fp8_recipe is delayed, wrap the entire pass with get_fp8_context(),
        # otherwise do nothing extra at the outer level
        # if we are using other fp8 recipes, then the context manager enter&exit are free
        # we can wrap fp8_context within the for loop over layers, so that we can fine-grained
        # control which layer will be fp8 or bf16
        use_outer_fp8_context = self.config.fp8 and self.config.fp8_recipe == Fp8Recipe.delayed
        use_inner_fp8_context = self.config.fp8 and self.config.fp8_recipe != Fp8Recipe.delayed
        use_fp4_context = self.config.fp4 is not None
        outer_fp8_context = get_fp8_context(self.config) if use_outer_fp8_context else nullcontext()

        if use_inner_fp8_context:

            def get_inner_quant_context(config, layer_number):
                return get_fp8_context(config, layer_number)

        elif use_fp4_context:

            def get_inner_quant_context(config, layer_number):
                return get_fp4_context(config, layer_number)

        else:

            def get_inner_quant_context(config, layer_number):
                return nullcontext()

        with outer_fp8_context:
            for layer in self.layers:
                # Layers have 1-indexed layer numbers attribute.
                inner_quant_context = get_inner_quant_context(self.config, layer.layer_number - 1)
                with inner_quant_context:
                    if isinstance(layer, TransformerLayer):
                        hidden_states, _ = layer(
                            hidden_states=hidden_states,
                            attention_mask=attention_mask,
                            inference_context=inference_context,
                            rotary_pos_emb=rotary_pos_emb,
                            sequence_len_offset=sequence_len_offset,
                            packed_seq_params=packed_seq_params,
                            padding_mask=padding_mask,
                        )
                    else:  # MambaLayer, Expert, or MLP
                        hidden_states = layer(
                            hidden_states=hidden_states,
                            attention_mask=attention_mask,
                            inference_context=inference_context,
                            packed_seq_params=packed_seq_params,
                        )

                # The attention layer (currently a simplified transformer layer)
                # outputs a tuple of (hidden_states, context). Context is intended
                # for cross-attention, and is not needed in our model.
                if isinstance(hidden_states, tuple):
                    hidden_states = hidden_states[0]

        # Final layer norm.
        if self.post_process and self.post_layer_norm:
            hidden_states = self.final_norm(hidden_states)

        # Ensure that the tensor passed between pipeline parallel stages is
        # viewless. See related notes in TransformerBlock and TransformerLayer
        hidden_states = make_viewless_tensor(
            inp=hidden_states, requires_grad=hidden_states.requires_grad, keep_graph=True
        )

        return hidden_states

    def sharded_state_dict(
        self,
        prefix: str = '',
        sharded_offsets: Optional[tuple] = None,
        metadata: Optional[dict] = None,
    ) -> ShardedStateDict:
        """
        Returns a sharded state dictionary for the current object.

        This function constructs a sharded state dictionary by iterating over the layers
        in the current object, computing the sharded state dictionary for each layer,
        and combining the results into a single dictionary.

        Parameters:
            prefix (str): The prefix to use for the state dictionary keys.
            sharded_offsets (tuple): The sharded offsets to use for the state dictionary.
            metadata (dict): Additional metadata to use when computing the sharded state dictionary.

        Returns:
            dict: The sharded state dictionary for the current object.
        """

        sharded_state_dict = {}
        layer_prefix = f'{prefix}layers.'

        for local_layer_idx, layer in enumerate(self.layers):

            global_layer_offset = layer.layer_number - 1  # self.layer_number starts at 1
            state_dict_prefix = (
                f'{layer_prefix}{local_layer_idx}.'  # module list index in MambaBlock
            )

            sharded_prefix = f'{layer_prefix}{global_layer_offset}.'
            sharded_pp_offset = []

            layer_sharded_state_dict = layer.sharded_state_dict(
                state_dict_prefix, sharded_pp_offset, metadata
            )

            replace_prefix_for_sharding(layer_sharded_state_dict, state_dict_prefix, sharded_prefix)

            sharded_state_dict.update(layer_sharded_state_dict)

        # Add modules other than self.layers
        for name, module in self.named_children():
            if not module is self.layers:
                sharded_state_dict.update(
                    sharded_state_dict_default(
                        module,
                        f'{prefix}{name}.',
                        sharded_offsets,
                        metadata,
                        tp_group=self.tp_group,
                    )
                )

        return sharded_state_dict


================================================
FILE: megatron/core/ssm/mamba_context_parallel.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from typing import Optional

import torch
import torch.nn as nn
import torch.nn.functional as F

from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.tensor_parallel import all_to_all
from megatron.core.utils import is_te_min_version

try:
    from einops import repeat

    HAVE_EINOPS = True
except ImportError:
    HAVE_EINOPS = False

try:
    # Register the TE CUDA kernels
    import transformer_engine  # pylint: disable=unused-import

    # Alias the PyTorch wrapper so we can call tex.* APIs
    import transformer_engine_torch as tex
except ImportError:
    # TE isn’t installed or the torch wrapper is missing
    tex = None


class MambaContextParallel:
    """
    This class provides the following functionality related to Mamba "all-to-all" context parallel:
    1. Error checking, and creation of, relevant parameters (e.g. nheads_local_tpcp)
    2. Collective operations on activations, on each context parallel rank, before and after the
       convolution and SSM
    3. A convolution operator that uses the correct slices of trainable variables on the current
       context parallel rank
    4. Sliced views of relevant trainable variables for the current context parallel rank

    This class is intentionally not a sub-class of MegatronModule. This class does not contain any
    trainable variables of its own and should not be involved in any checkpoint loading or saving.

    Args:
        cp_group (torch.distributed.ProcessGroup):
            The process group to use for context parallel.
        d_inner_local_tp (int): d_inner on the current tp rank
        nheads_local_tp (int): nheads on the current tp rank
        ngroups_local_tp (int): ngroups on the current tp rank
        d_state (int): Mamba d_state
        conv1d_cp1 (nn.Conv1d):
            The conv1d op which would be applied on this tp rank if cp_size was 1
        dt_bias_cp1 (torch.Tensor):
            The dt_bias parameter which would be used on this tp rank if cp_size was 1
        A_log_cp1 (torch.Tensor):
            The A_log parameter which would be used on this tp rank if cp_size was 1
        D_cp1 (torch.Tensor): The D parameter which would be used on this tp rank if cp_size was 1
        D_has_hdim (bool): D parameter is sized to hidden dimension, rather than being per-head
    """

    def __init__(
        self,
        cp_group: torch.distributed.ProcessGroup,
        d_inner_local_tp: int,
        nheads_local_tp: int,
        ngroups_local_tp: int,
        d_state: int,
        conv1d_cp1: nn.Conv1d,
        dt_bias_cp1: torch.Tensor,
        A_log_cp1: torch.Tensor,
        D_cp1: torch.Tensor,
        D_has_hdim: bool,
    ) -> None:
        if not HAVE_EINOPS:
            raise ImportError("einops is required by the Mamba model but cannot be imported")

        self.cp_group = cp_group
        self.d_inner_local_tp = d_inner_local_tp
        self.nheads_local_tp = nheads_local_tp
        self.ngroups_local_tp = ngroups_local_tp
        self.d_state = d_state
        self.conv1d_cp1 = conv1d_cp1
        self.dt_bias_cp1 = dt_bias_cp1
        self.A_log_cp1 = A_log_cp1
        self.D_cp1 = D_cp1
        self.D_has_hdim = D_has_hdim

        self.cp_size = self.cp_group.size()

        if self.cp_size == 1:
            self.d_inner_local_tpcp = self.d_inner_local_tp
            self.nheads_local_tpcp = self.nheads_local_tp
            self.ngroups_local_tpcp = self.ngroups_local_tp
            return

        self.cp_rank = self.cp_group.rank()

        # Ensure that each CP rank gets at least one head:
        assert (
            self.nheads_local_tp % self.cp_size == 0
        ), "nheads must be evenly divisible by tp_size * cp_size"
        # Note that an upper-bound on cp_size is nheads // tp_size
        self.nheads_local_tpcp = self.nheads_local_tp // self.cp_size

        # Note that we do not need to confirm that `d_inner_local_tp % cp_size == 0` because
        # `d_inner % headdim == 0`, `nheads = self.d_inner // headdim`,
        # `nheads % tp_size == 0`, `nheads_local_tp = nheads // tp_size`, and
        # `nheads_local_tp % cp_size == 0`
        self.d_inner_local_tpcp = self.d_inner_local_tp // self.cp_size

        # Ensure that each CP rank gets a positive integer number of groups:
        if self.ngroups_local_tp < self.cp_size:
            assert (
                self.cp_size % self.ngroups_local_tp == 0
            ), "cp_size must be evenly divisible by ngroups/tp_size"
            # Need to replicate the group state (shard the heads of each group) across CP ranks:
            self.group_repeat_count = self.cp_size // self.ngroups_local_tp
            self.ngroups_local_tpcp = 1
        else:
            assert (
                self.ngroups_local_tp % self.cp_size == 0
            ), "ngroups must be evenly divisible by tp_size * cp_size"
            # Group state is not replicted across CP ranks. All heads for any group are present on
            # one CP rank
            self.group_repeat_count = 1
            self.ngroups_local_tpcp = self.ngroups_local_tp // self.cp_size

        # Note that we do not need to confirm that `nheads_local_tpcp % ngroups_local_tpcp == 0`
        # because `nheads % ngroups == 0`, and therefore `nheads_local_tp % ngroups_local_tp == 0`,
        # and also `nheads_local_tpcp = nheads_local_tp // cp_size` whilst ngroups_local_tpcp is
        # either 1 or `ngroups_local_tp // cp_size`

    def pre_conv_ssm(
        self, input_: torch.Tensor, packed_seq_params: Optional[PackedSeqParams] = None
    ) -> torch.Tensor:
        """Method to be applied before the convolution and SSM"""
        if self.cp_size == 1:
            return input_

        z, x, B, C, dt = torch.split(
            input_,
            [
                self.d_inner_local_tp,
                self.d_inner_local_tp,  # z, x: [l_global//cp, b, d_inner]
                self.ngroups_local_tp * self.d_state,  # B: [l_global//cp, b, ngroups * d_state]
                self.ngroups_local_tp * self.d_state,  # C: [l_global//cp, b, ngroups * d_state]
                self.nheads_local_tp,  # dt : [l_global//cp, b, nheads]
            ],
            dim=-1,
        )

        # TODO (duncan): Can the some or all of the all_to_alls be combined?

        # [l_global//cp, b, d_inner] -> [l_global, b, d_inner//cp]
        z = _all_to_all_cp2hp(z, self.cp_group)

        # [l_global//cp, b, d_inner] -> [l_global, b, d_inner//cp]
        x = _all_to_all_cp2hp(x, self.cp_group)

        # Below, each group state will be repeated before moving to the next group state. This
        # causes replicas of the same group state to land on consecutive context parallel ranks,
        # along with their associated heads. This is consistent with consecutive group states being
        # associated with consecutive groups of heads.
        B = repeat(
            B,
            "l b (g n) -> l b (g r n)",
            g=self.ngroups_local_tp,
            n=self.d_state,
            r=self.group_repeat_count,
        )
        C = repeat(
            C,
            "l b (g n) -> l b (g r n)",
            g=self.ngroups_local_tp,
            n=self.d_state,
            r=self.group_repeat_count,
        )

        # [l_global//cp, b, g*r*n] -> [l_global, b, g*r*n//cp]
        B = _all_to_all_cp2hp(B, self.cp_group)

        # [l_global//cp, b, g*r*n] -> [l_global, b, g*r*n//cp]
        C = _all_to_all_cp2hp(C, self.cp_group)

        # [l_global//cp, b, nheads] -> [l_global, b, nheads//cp]
        dt = _all_to_all_cp2hp(dt, self.cp_group)

        output = torch.cat([z, x, B, C, dt], dim=-1)
        # TODO(duncan): for hybrid models, consider isolating load-balancing to attention layers
        output = _undo_attention_load_balancing(output, self.cp_size, packed_seq_params)

        return output

    def post_conv_ssm(
        self, input_: torch.Tensor, packed_seq_params: Optional[PackedSeqParams] = None
    ) -> torch.Tensor:
        """Method to be applied after the convolution and SSM"""
        if self.cp_size == 1:
            return input_
        else:
            return _all_to_all_hp2cp(
                _redo_attention_load_balancing(input_, self.cp_size, packed_seq_params),
                self.cp_group,
            )

    def conv1d(self, input_: torch.Tensor) -> torch.Tensor:
        """
        Performs a conv1d on one context parallel rank, using slices of the weight and bias from
        the convolution that would be run when cp_size=1
        """
        if self.cp_size == 1:
            return self.conv1d_cp1(input_)
        else:
            return F.conv1d(
                input=input_,
                weight=self.get_conv1d_weight(),
                bias=self.get_conv1d_bias(),
                stride=self.conv1d_cp1.stride,
                padding=self.conv1d_cp1.padding,
                dilation=self.conv1d_cp1.dilation,
                groups=self.conv1d_channels(),  # in_channels == out_channels == groups
            )

    # TODO(duncan): Make this a class instance variable?
    def conv1d_channels(self):
        """Returns the number of convolution channels on the current context parallel rank"""
        # The number of convolution input (or output) channels, per context parallel rank, is the
        # sum of the hidden (or feature) dimensions of x, B, and C, per context parallel rank
        return self.d_inner_local_tpcp + 2 * self.ngroups_local_tpcp * self.d_state

    def get_conv1d_weight(self) -> torch.Tensor:
        """Returns a slice of the conv1d weight relevant to the current context parallel rank"""
        # weight shape: [conv_dim, 1, d_conv]
        return self._slice_conv_param(self.conv1d_cp1.weight)

    def get_conv1d_bias(self) -> torch.Tensor:
        """Returns a slice of the conv1d bias relevant to the current context parallel rank"""
        # bias shape: [conv_dim]
        return self._slice_conv_param(self.conv1d_cp1.bias)

    def get_dt_bias(self) -> torch.Tensor:
        """Returns a slice of dt_bias relevant to the current context parallel rank"""
        return self._slice_vector_param(self.dt_bias_cp1)

    def get_A_log(self) -> torch.Tensor:
        """Returns a slice of A_log relevant to the current context parallel rank"""
        return self._slice_vector_param(self.A_log_cp1)

    def get_D(self) -> torch.Tensor:
        """Returns a slice of D relevant to the current context parallel rank"""
        return self._slice_vector_param(self.D_cp1, has_hdim=self.D_has_hdim)

    def _slice_conv_param(self, param: torch.Tensor) -> torch.Tensor:
        """
        Slices a cp_size=1 conv1d parameter (either weight or bias) along the first dimension,
        returning the parts of the parameter needed for convolution on the current context parallel
        rank. Parameter slicing is done in the forward path so that gradients will backpropagate to
        the cp_size=1 parameters.
        """
        if self.cp_size == 1:
            return param

        x, B, C = torch.split(
            param,
            [
                self.d_inner_local_tp,
                self.ngroups_local_tp * self.d_state,
                self.ngroups_local_tp * self.d_state,
            ],
            dim=0,
        )

        # Slicing section of parameter associated with x:
        size = self.d_inner_local_tpcp
        start = self.cp_rank * size
        end = start + size
        x_sliced = x[start:end, ...]

        # Slicing section of parameter associated with B and C:
        size = self.ngroups_local_tpcp * self.d_state
        start = (self.cp_rank // self.group_repeat_count) * size
        end = start + size
        B_sliced = B[start:end, ...]
        C_sliced = C[start:end, ...]

        return torch.cat([x_sliced, B_sliced, C_sliced], dim=0).contiguous()

    def _slice_vector_param(self, param: torch.Tensor, has_hdim: bool = False) -> torch.Tensor:
        """
        Slices a cp_size=1 vector parameter along the first dimension, returning the part of the
        parameter needed on the current context parallel rank. Parameter slicing is done in the
        forward path so that gradients will backpropagate to the cp_size=1 parameters.
        """
        if self.cp_size == 1:
            return param

        size = self.d_inner_local_tpcp if has_hdim else self.nheads_local_tpcp
        start = self.cp_rank * size
        end = start + size
        return param[start:end]


# TODO(duncan): Consider combining with all_to_all_sp2hp in mappings.py and using einops.rearrange
def _all_to_all_cp2hp(
    input_: torch.Tensor, cp_group: torch.distributed.ProcessGroup
) -> torch.Tensor:
    """
    Perform AlltoAll communication on a context parallel group, transform the
    input tensor from shape
    [global-sequence/context-parallel-size, batch, local-hidden] to
    [global-sequence, batch, local-hidden/context-parallel-size].

    Args:
        input_ (torch.Tensor):
            The input tensor, which is partitioned along the sequence dimension
        cp_group (torch.distributed.ProcessGroup):
            Process group to use for context parallel

    Returns:
        torch.Tensor: The output tensor with shape
            [global-sequence, batch, local-hidden/context-parallel-size].
    """
    assert input_.dim() == 3, "all_to_all_cp2hp assumes 3-d input shape."
    s_in, b_in, h_in = input_.shape
    # Squash the first two dimensions -> [s*b, h]
    input_ = input_.reshape(-1, h_in)
    # Split into world_size chunks along the h dimension
    world_size = cp_group.size()
    h_out = h_in // world_size
    split_tensors = torch.split(input_, split_size_or_sections=h_out, dim=1)
    # Concat the chunks along the s*b dimension
    concat_tensor = torch.cat(split_tensors, dim=0)
    # TODO(duncan): Can the following be optimized by using the non-single (tensor list) version of
    # all-to-all?
    # Swap chunks of dim0 across the cp ranks
    output = all_to_all(cp_group, concat_tensor)
    # Recover the s and b dimensions
    output = output.reshape(s_in * world_size, b_in, h_out)
    return output


# TODO(duncan): Consider combining with all_to_all_hp2sp in mappings.py and using einops.rearrange
def _all_to_all_hp2cp(
    input_: torch.Tensor, cp_group: torch.distributed.ProcessGroup
) -> torch.Tensor:
    """
    Perform AlltoAll communication on a context parallel group, transform the
    input tensor from shape
    [global-sequence, batch, local-hidden/context-parallel-size] to
    [global-sequence/context-parallel-size, batch, local-hidden].

    Args:
        input_ (torch.Tensor):
            The input tensor, which is partitioned along the hidden dimension
        cp_group (torch.distributed.ProcessGroup):
            Process group to use for context parallel

    Returns:
        torch.Tensor: The output tensor with shape
            [global-sequence/context-parallel-size, batch, local-hidden].
    """
    assert input_.dim() == 3, "all_to_all_hp2cp assumes 3-d input shape."
    s_in, b_in, h_in = input_.shape
    # Squash the first two dimensions -> [s*b, h]
    input_ = input_.reshape(-1, h_in)
    # Swap chunks of dim0 across the cp ranks
    input_exchanged = all_to_all(cp_group, input_)
    # Split into world_size chunks along the s*b dimension
    world_size = cp_group.size()
    s_out = s_in // world_size
    split_tensors = torch.split(input_exchanged, split_size_or_sections=s_out * b_in, dim=0)
    # Concat the chunks along the h dimension
    output = torch.cat(split_tensors, dim=-1)
    # Recover the s and b dimensions
    output = output.reshape(s_out, b_in, h_in * world_size)
    return output


def _undo_attention_load_balancing(
    input_: torch.Tensor, cp_size: int, packed_seq_params: Optional[PackedSeqParams] = None
) -> torch.Tensor:
    """
    Undoes the context parallel attention load balancing.
    For example (non-packed), for cp_size=3, converts 162534 to 123456 for
    sequential processing by the convolution and SSM.
    """
    if packed_seq_params is None:
        num_chunks_div_2 = cp_size
        num_chunks = num_chunks_div_2 * 2
        chunks = torch.chunk(input_, chunks=num_chunks, dim=0)
        order = [2 * i for i in range(num_chunks_div_2)] + [
            num_chunks - 2 * i - 1 for i in range(num_chunks_div_2)
        ]
        reordered_chunks = [chunks[i] for i in order]
        return torch.cat(reordered_chunks, dim=0)
    else:
        assert tex is not None and is_te_min_version("1.10.0"), (
            "Please update Transformer Engine to >= 1.10 to use "
            "Context Parallel with THD format data"
        )
        if packed_seq_params.cu_seqlens_q_padded is not None:
            cu_seqlens = packed_seq_params.cu_seqlens_q_padded
        else:
            cu_seqlens = packed_seq_params.cu_seqlens_q
        total_tokens = input_.size(0)
        assert total_tokens % cp_size == 0
        seqlen_per_rank = total_tokens // cp_size
        output = torch.empty_like(input_)
        for cp_rank in range(cp_size):
            start = cp_rank * seqlen_per_rank
            end = start + seqlen_per_rank
            index = tex.thd_get_partitioned_indices(cu_seqlens, total_tokens, cp_size, cp_rank)
            output[index] = input_[start:end]
        return output


def _redo_attention_load_balancing(
    input_: torch.Tensor, cp_size: int, packed_seq_params: Optional[PackedSeqParams] = None
) -> torch.Tensor:
    """
    Redo the context parallel attention load balancing.
    For example (non-packed), for cp_size=3, converts 123456 to 162534 for
    efficient processing by attention.
    """
    if packed_seq_params is None:
        num_chunks_div_2 = cp_size
        num_chunks = num_chunks_div_2 * 2
        chunks = torch.chunk(input_, chunks=num_chunks, dim=0)
        order = [None] * num_chunks
        order[::2] = range(num_chunks_div_2)  # order[even]
        order[1::2] = reversed(range(num_chunks_div_2, num_chunks))  # order[odd]
        reordered_chunks = [chunks[i] for i in order]
        return torch.cat(reordered_chunks, dim=0)
    else:
        assert tex is not None and is_te_min_version("1.10.0"), (
            "Please update Transformer Engine to >= 1.10 to use "
            "Context Parallel with THD format data"
        )
        if packed_seq_params.cu_seqlens_q_padded is not None:
            cu_seqlens = packed_seq_params.cu_seqlens_q_padded
        else:
            cu_seqlens = packed_seq_params.cu_seqlens_q
        total_tokens = input_.size(0)
        assert total_tokens % cp_size == 0
        seqlen_per_rank = total_tokens // cp_size
        index = torch.empty(total_tokens, device=input_.device, dtype=torch.int32)
        for cp_rank in range(cp_size):
            start = cp_rank * seqlen_per_rank
            end = start + seqlen_per_rank
            index[start:end] = tex.thd_get_partitioned_indices(
                cu_seqlens, total_tokens, cp_size, cp_rank
            )
        return input_.index_select(0, index)


================================================
FILE: megatron/core/ssm/mamba_hybrid_layer_allocation.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import logging
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple

import torch

from megatron.core.utils import log_on_each_pipeline_stage, log_single_rank

logger = logging.getLogger(__name__)


class Symbols:
    """Symbols for different layer types and pattern separators."""

    MAMBA = "M"
    ATTENTION = "*"
    MLP = "-"
    MOE = 'E'
    PIPE = '|'
    MTP_SEPARATOR = "/"
    VALID_LAYERS = {MAMBA, ATTENTION, MLP, MOE}


@dataclass
class ParsedHybridPattern:
    """Result of parsing a unified hybrid pattern string.

    A unified pattern encodes both the main decoder pattern and the MTP pattern
    in a single string using "/" as a separator. The main pattern may also
    contain "|" pipe symbols to define pipeline stage boundaries for flexible
    virtual pipeline parallelism (fVPP).

    Format: "<main_pattern>/<mtp_pattern>/<mtp_pattern>/..."

    Examples:
        - "M*M*" -> main="M*M*", mtp=None, depths=0 (no MTP)
        - "M*M*/MM/MM" -> main="M*M*", mtp="MM", depths=2
        - "MMMM/*M/*M/*M" -> main="MMMM", mtp="*M", depths=3
        - "M-M-|M-M*-/MM/MM" -> main="M-M-|M-M*-" (2 PP stages), mtp="MM", depths=2

    The "/" symbol introduces MTP patterns. Each repeated pattern after the main
    decoder represents one MTP prediction depth.

    The "|" symbol in the main pattern defines pipeline stage boundaries.

    Attributes:
        main_pattern: The main decoder layer pattern (e.g., "M*M*" or "M-M-|M-M*-")
        mtp_pattern: The MTP layer pattern per depth (e.g., "MM"), or None if no MTP
        mtp_num_depths: Number of MTP prediction depths (0 if no MTP)
    """

    main_pattern: Optional[str]
    mtp_pattern: Optional[str]
    mtp_num_depths: int


def pattern_from_ratios(
    num_layers: int, attention_ratio: float = 0.0, mlp_ratio: float = 0.0
) -> str:
    """Convert deprecated ratio arguments to a layer pattern string.

    Generates an evenly-spaced hybrid layer pattern from target attention and MLP
    ratios. This exists for backward compatibility with code that uses the deprecated
    hybrid_attention_ratio and hybrid_mlp_ratio parameters.

    Args:
        num_layers: Total number of layers.
        attention_ratio: Target ratio of attention layers to total layers.
        mlp_ratio: Target ratio of MLP layers to total layers.

    Returns:
        A layer pattern string (e.g., "MMM*MMM*MM").
    """
    assert num_layers > 0
    assert 0.0 <= attention_ratio <= 1.0
    assert 0.0 <= mlp_ratio <= 1.0
    assert attention_ratio + mlp_ratio <= 1.0

    # Allocate attention layers (evenly spaced, starting and ending with mamba)
    attention_count = round(num_layers * attention_ratio)
    mamba_count = num_layers - attention_count
    sections = attention_count + 1
    section_len = mamba_count / sections

    layer_types = [Symbols.MAMBA] * num_layers
    x = section_len
    for i in range(num_layers):
        if x < 0.5:
            layer_types[i] = Symbols.ATTENTION
            x += section_len
        else:
            x -= 1

    # Allocate MLP layers (evenly distributed, not replacing attention)
    mlp_count = round(num_layers * mlp_ratio)
    if mlp_count > 0:
        mamba_count -= mlp_count
        ratio = mamba_count / mlp_count
        x = ratio
        for i in range(num_layers):
            if layer_types[i] == Symbols.MAMBA:
                if x < 0.5:
                    layer_types[i] = Symbols.MLP
                    x += ratio
                else:
                    x -= 1

    return ''.join(layer_types)


def get_hybrid_total_layer_count(pattern: str) -> int:
    """Returns the total number of main decoder layers in a hybrid layer pattern.

    Extracts the main pattern (before the first MTP separator '/'), strips
    pipeline stage separators '|', and returns the character count.

    Args:
        pattern: Full hybrid layer pattern, possibly including MTP and pipe separators.

    Returns:
        Total number of layers in the main decoder pattern.
    """
    main_pattern = pattern.split(Symbols.MTP_SEPARATOR)[0]
    _validate_pattern(main_pattern, "main", allow_pipe=True)
    return len(main_pattern.replace(Symbols.PIPE, ''))


def get_hybrid_total_pipeline_segment_count(pattern: str) -> int:
    """Returns the number of pipeline segments in a hybrid layer pattern.

    Extracts the main pattern (before the first MTP separator '/') and counts
    the number of segments delimited by '|'.

    Args:
        pattern: Full hybrid layer pattern, possibly including MTP and pipe separators.

    Returns:
        Number of pipeline segments (pipe count + 1).
    """
    main_pattern = pattern.split(Symbols.MTP_SEPARATOR)[0]
    return main_pattern.count(Symbols.PIPE) + 1


def get_hybrid_layer_counts(pattern: str) -> Dict[str, int]:
    """Count layers by type across the full hybrid pattern (main + MTP).

    Parses the pattern to extract main and MTP components, then counts
    each layer type. Main pattern '|' separators are skipped. MTP layers
    are counted once per MTP depth.

    Args:
        pattern: Full hybrid layer pattern string.

    Returns:
        Dictionary mapping layer symbol to count. Keys are Symbols.ATTENTION,
        Symbols.MAMBA, Symbols.MLP, and Symbols.MOE.

    Examples:
        >>> get_hybrid_layer_counts("M*M*")
        {'*': 2, 'M': 2, '-': 0, 'E': 0}

        >>> get_hybrid_layer_counts("M-M-|M-M*-/MM/MM")
        {'*': 1, 'M': 8, '-': 4, 'E': 0}
    """
    parsed = parse_hybrid_pattern(pattern)
    counts = {Symbols.ATTENTION: 0, Symbols.MAMBA: 0, Symbols.MLP: 0, Symbols.MOE: 0}

    # Count main decoder layers (skip '|' pipe separators)
    if parsed.main_pattern:
        for char in parsed.main_pattern:
            if char in counts:
                counts[char] += 1

    # Count MTP layers (pattern repeated mtp_num_depths times)
    if parsed.mtp_pattern and parsed.mtp_num_depths > 0:
        for char in parsed.mtp_pattern:
            if char in counts:
                counts[char] += parsed.mtp_num_depths

    return counts


def parse_hybrid_pattern(pattern: Optional[str]) -> ParsedHybridPattern:
    """Parse a unified hybrid pattern string into main and MTP components.

    The pattern uses "/" as a separator between the main decoder pattern and
    MTP patterns. Each MTP pattern after the separator represents one prediction
    depth. The main pattern may contain "|" pipe symbols for pipeline stage
    boundaries.

    Format: "<main_pattern>/<mtp_pattern>/<mtp_pattern>/..."

    Args:
        pattern: Unified pattern string, e.g., "M*M*/MM/MM" or just "M*M*"

    Returns:
        ParsedHybridPattern with main_pattern, mtp_pattern, and mtp_num_depths

    Raises:
        ValueError: If MTP patterns are inconsistent (all must be identical)
        ValueError: If pattern contains invalid layer symbols

    Examples:
        >>> parse_hybrid_pattern("M*M*")
        ParsedHybridPattern(main_pattern="M*M*", mtp_pattern=None, mtp_num_depths=0)

        >>> parse_hybrid_pattern("M*M*/MM/MM")
        ParsedHybridPattern(main_pattern="M*M*", mtp_pattern="MM", mtp_num_depths=2)

        >>> parse_hybrid_pattern("MMMM/*M/*M/*M")
        ParsedHybridPattern(main_pattern="MMMM", mtp_pattern="*M", mtp_num_depths=3)

        >>> parse_hybrid_pattern("M-M-|M-M*-/MM/MM")
        ParsedHybridPattern(main_pattern="M-M-|M-M*-", mtp_pattern="MM", mtp_num_depths=2)
    """
    if pattern is None:
        return ParsedHybridPattern(main_pattern=None, mtp_pattern=None, mtp_num_depths=0)

    parts = pattern.split(Symbols.MTP_SEPARATOR)

    if len(parts) == 1:
        # No MTP separator found - pattern is main decoder only
        main_pattern = parts[0]
        _validate_pattern(main_pattern, "main", allow_pipe=True)
        return ParsedHybridPattern(main_pattern=main_pattern, mtp_pattern=None, mtp_num_depths=0)

    # First part is main decoder pattern
    main_pattern = parts[0]
    if main_pattern:
        _validate_pattern(main_pattern, "main", allow_pipe=True)

    # Remaining parts are MTP patterns (one per depth)
    mtp_parts = parts[1:]

    if not mtp_parts or all(p == "" for p in mtp_parts):
        # No MTP patterns after separator
        return ParsedHybridPattern(
            main_pattern=main_pattern if main_pattern else None, mtp_pattern=None, mtp_num_depths=0
        )

    # Validate all MTP patterns are identical
    mtp_pattern = mtp_parts[0]
    for i, part in enumerate(mtp_parts[1:], start=2):
        if part != mtp_pattern:
            raise ValueError(
                f"All MTP patterns must be identical. "
                f"Pattern 1 is '{mtp_pattern}', but pattern {i} is '{part}'. "
                f"Full pattern: '{pattern}'"
            )

    _validate_pattern(mtp_pattern, "MTP", allow_pipe=False)

    return ParsedHybridPattern(
        main_pattern=main_pattern if main_pattern else None,
        mtp_pattern=mtp_pattern,
        mtp_num_depths=len(mtp_parts),
    )


def _validate_pattern(pattern: str, pattern_name: str, allow_pipe: bool = False) -> None:
    """Validate that a pattern contains only valid layer symbols.

    Args:
        pattern: Layer pattern string to validate
        pattern_name: Name of pattern for error messages (e.g., "main" or "MTP")
        allow_pipe: Whether to allow the pipe '|' separator (for main patterns)

    Raises:
        ValueError: If pattern contains invalid symbols
    """
    valid_chars = Symbols.VALID_LAYERS | {Symbols.PIPE} if allow_pipe else Symbols.VALID_LAYERS
    for char in pattern:
        if char not in valid_chars:
            raise ValueError(
                f"In {pattern_name} pattern, '{char}' is not a valid layer symbol. "
                f"Valid symbols are: {valid_chars}"
            )


def validate_segment_layers(segment: str) -> List[str]:
    """Validate and convert a single pipeline segment pattern to a layer type list.

    This is used after the main pattern has been split by '|' into segments.
    Each segment should contain only valid layer symbols (no '|').

    Args:
        segment: A single pipeline segment pattern string (e.g., "M-M*-")

    Returns:
        List of layer type characters.

    Raises:
        ValueError: If segment contains invalid layer symbols.
    """
    layer_type_list = list(segment)
    for layer_char in layer_type_list:
        if layer_char not in Symbols.VALID_LAYERS:
            raise ValueError(
                f"In hybrid layer pattern segment, '{layer_char}' is not "
                f"one of {Symbols.VALID_LAYERS}"
            )
    return layer_type_list


def select_pipeline_segment(
    main_pattern: str,
    pp_group: Optional[torch.distributed.ProcessGroup],
    vp_stage: Optional[int],
    first_stage_layers: Optional[int] = None,
    last_stage_layers: Optional[int] = None,
) -> Tuple[List[str], int]:
    """Select and validate the pipeline segment for the given PP rank and VP stage.

    When the main pattern contains '|' pipe separators, splits by '|' into
    pipeline segments and selects the segment for the current PP rank / VP stage.

    When the pattern has no pipes but pp_size > 1, falls back to runtime layer
    slicing (for backwards compatibility), supporting both even and uneven PP splits
    via first_stage_layers / last_stage_layers.

    Args:
        main_pattern: Main decoder pattern (may contain '|' separators).
            Empty string is allowed (produces one empty segment).
        pp_group: Pipeline parallel process group, or None if not using PP.
        vp_stage: Virtual pipeline stage, or None if not using VPP.
        first_stage_layers: Number of layers on the first pipeline stage for
            uneven PP. Only valid when the pattern has no pipe separators.
        last_stage_layers: Number of layers on the last pipeline stage for
            uneven PP. Only valid when the pattern has no pipe separators.

    Returns:
        Tuple of (layer_type_list, layer_offset) where layer_type_list is
        the list of layer type characters for this segment, and layer_offset
        is the sum of layer counts from all preceding segments.

    Raises:
        ValueError: If the segment contains invalid layer symbols, if
            first/last_stage_layers are used with pipe separators, if VPP is
            requested without pipe separators, or if layer counts are not
            evenly divisible across pipeline stages.
    """
    segments = main_pattern.split(Symbols.PIPE) if main_pattern else ['']

    pp_rank = torch.distributed.get_rank(pp_group) if pp_group is not None else 0
    pp_size = torch.distributed.get_world_size(pp_group) if pp_group is not None else 1

    if len(segments) > 1 and (first_stage_layers is not None or last_stage_layers is not None):
        raise ValueError(
            "Cannot specify num_layers_in_first_pipeline_stage or "
            "num_layers_in_last_pipeline_stage when hybrid_layer_pattern "
            "contains pipe ('|') separators. The pipeline layout is already "
            "explicitly defined by the pipe separators."
        )

    if len(segments) == 1 and pp_size > 1:
        if vp_stage is not None:
            raise ValueError(
                "Virtual pipeline parallelism (vp_stage != None) is not supported "
                "when hybrid_layer_pattern has no pipe ('|') separators. "
                "Add '|' separators to define explicit pipeline/virtual-pipeline "
                "stage boundaries."
            )
        log_single_rank(
            logger,
            logging.WARNING,
            "DEPRECATION: Using hybrid_layer_pattern without pipe ('|') separators "
            "with pipeline_model_parallel_size > 1 is deprecated. Please add '|' "
            "separators to explicitly define pipeline stage boundaries. "
            "Example: 'M*M*M*M*' with pp_size=2 should become 'M*M*|M*M*'.",
        )
        full_pattern = segments[0]
        layer_type_list = validate_segment_layers(full_pattern)
        num_layers = len(layer_type_list)

        if first_stage_layers is not None or last_stage_layers is not None:
            first = first_stage_layers or 0
            last = last_stage_layers or 0
            middle_num_layers = num_layers - first - last
            middle_stages = pp_size - sum(
                1 for x in (first_stage_layers, last_stage_layers) if x is not None
            )
            if middle_stages > 0:
                if middle_num_layers % middle_stages != 0:
                    raise ValueError(
                        f"Middle layers ({middle_num_layers}) must be evenly divisible "
                        f"by middle pipeline stages ({middle_stages})."
                    )
                layers_per_middle = middle_num_layers // middle_stages
            else:
                layers_per_middle = 0

            is_first = first_stage_layers is not None and pp_rank == 0
            is_last = last_stage_layers is not None and pp_rank == pp_size - 1

            if is_first:
                offset = 0
                count = first
            elif is_last:
                offset = num_layers - last
                count = last
            else:
                middle_rank = pp_rank if first_stage_layers is None else pp_rank - 1
                offset = middle_rank * layers_per_middle + first
                count = layers_per_middle
        else:
            if num_layers % pp_size != 0:
                raise ValueError(
                    f"Number of layers ({num_layers}) must be evenly divisible "
                    f"by pipeline-model-parallel-size ({pp_size}) when no pipe "
                    f"separators are specified in the pattern."
                )
            layers_per_rank = num_layers // pp_size
            offset = pp_rank * layers_per_rank
            count = layers_per_rank

        selected = layer_type_list[offset : offset + count]
        log_on_each_pipeline_stage(
            logger,
            logging.INFO,
            f"MambaModel: pp_rank={pp_rank}/{pp_size}, vp_stage={vp_stage}, "
            f"layers='{''.join(selected)}' ({len(selected)} layers), "
            f"layer_offset={offset} (auto-split)",
        )
        return selected, offset

    # Pipe-based segment selection
    if len(segments) > 1 and len(segments) % pp_size != 0:
        raise ValueError(
            f"The number of pipe-delimited segments ({len(segments)}) in "
            f"hybrid_layer_pattern must be evenly divisible by "
            f"pipeline_model_parallel_size ({pp_size})."
        )

    vp_rel = vp_stage if vp_stage is not None else 0
    segment_index = vp_rel * pp_size + pp_rank

    if segment_index >= len(segments):
        raise ValueError(
            f"Pipeline segment index {segment_index} (pp_rank={pp_rank}, "
            f"vp_stage={vp_rel}) is out of range for {len(segments)} segments. "
            f"The pattern does not define enough pipe-delimited segments for "
            f"the current PP/VPP configuration."
        )

    layer_offset = sum(len(segments[i]) for i in range(segment_index))
    my_segment = segments[segment_index]

    layer_type_list = validate_segment_layers(my_segment)

    log_on_each_pipeline_stage(
        logger,
        logging.INFO,
        f"MambaModel: pp_rank={pp_rank}/{pp_size}, vp_stage={vp_rel}, "
        f"segment_index={segment_index}/{len(segments)}, "
        f"layers='{my_segment}' ({len(layer_type_list)} layers), "
        f"layer_offset={layer_offset}",
    )

    return layer_type_list, layer_offset


def get_layer_maps_from_layer_type_list(
    layer_type_list: List[str],
) -> Tuple[Dict[int, int], Dict[int, int], Dict[int, int]]:
    """
    Returns maps from global layer index to the corresponding layer index
    for each layer type in [Attention, Mamba, MLP, MoE] given a layer type list.
    """
    layer_types = [Symbols.ATTENTION, Symbols.MAMBA, Symbols.MLP, Symbols.MOE]
    layer_maps = {layer_type: {} for layer_type in layer_types}
    for global_layer_idx, layer_type in enumerate(layer_type_list):
        layer_map = layer_maps[layer_type]
        local_layer_idx = len(layer_map)
        layer_map[global_layer_idx] = local_layer_idx
    return [layer_maps[layer_type] for layer_type in layer_types]


================================================
FILE: megatron/core/ssm/mamba_layer.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2024, Tri Dao, Albert Gu.

# Some of this code was adopted from https://github.com/state-spaces/mamba/
# This source code is licensed under the Apache license found in the
# LICENSE file in the root directory of this source tree.

from dataclasses import dataclass, field
from typing import Dict, Optional, Protocol, Tuple, Union

import torch
from torch import Tensor

from megatron.core.dist_checkpointing.mapping import ShardedStateDict
from megatron.core.dist_checkpointing.utils import apply_prefix_mapping
from megatron.core.inference.contexts import BaseInferenceContext
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.transformer.enums import CudaGraphScope
from megatron.core.transformer.identity_op import IdentityOp
from megatron.core.transformer.module import GraphableMegatronModule
from megatron.core.transformer.spec_utils import ModuleSpec, build_module
from megatron.core.transformer.torch_norm import LayerNormInterface
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.typed_torch import apply_module
from megatron.core.utils import deprecate_inference_params


class LayerNormBuilder(Protocol):
    """A protocol showing how MambaLayer expects to construct its LayerNorm."""

    def __call__(self, config: TransformerConfig, hidden_size: int, /) -> LayerNormInterface: ...


@dataclass
class MambaLayerSubmodules:
    """
    Configuration class for specifying the submodules of a Mamba layer.

    This class defines the structure and default implementations for various
    components of a Mamba layer, allowing for flexible customization of the
    layer's architecture.

    Args:
        norm (Union[ModuleSpec, type]): Specification for the input layer normalization.
        mixer (Union[ModuleSpec, type]): Specification for the along-sequence mixing mechanism.
        mamba_bda (Union[ModuleSpec, type]): Specification for the bias-dropout-add operation
            after the mixer.
    """

    norm: LayerNormBuilder = IdentityOp
    mixer: Union[ModuleSpec, type] = IdentityOp
    mamba_bda: Union[ModuleSpec, type] = IdentityOp

    # Mapping for sharded tensor keys to be applied in `sharded_state_dict` method
    sharded_state_dict_keys_map: Dict[str, str] = field(default_factory=dict)


class MambaLayer(GraphableMegatronModule):
    """
    A single Mamba layer.

    Mamba layer takes input with size [s, b, h] and returns an
    output of the same size.
    """

    def __init__(
        self,
        config: TransformerConfig,
        submodules: MambaLayerSubmodules,
        layer_number: int = 1,
        pg_collection: ProcessGroupCollection = None,
        pp_layer_offset: int = 0,
    ):
        """Initialize Mamba Layer."""
        super().__init__(config)
        assert pg_collection is not None, "pg_collection must be provided for MambaLayer"

        self.config = config
        self.submodules_config = submodules
        self.layer_number = layer_number
        self.hidden_dropout = config.hidden_dropout
        self.mixer = build_module(
            submodules.mixer,
            self.config,
            d_model=self.config.hidden_size,
            layer_number=layer_number,
            pg_collection=pg_collection,
            pp_layer_offset=pp_layer_offset,
        )
        self.norm = submodules.norm(self.config, self.config.hidden_size)
        self.mamba_bda = build_module(submodules.mamba_bda)
        self.bias_dropout_add_exec_handler = torch.enable_grad

    def create_mcore_cudagraph_manager(self, config):
        """Register the mamba layer for cudagraphs."""
        from megatron.core.transformer.cuda_graphs import CudaGraphManager

        if not self.config.cuda_graph_scope or CudaGraphScope.mamba in self.config.cuda_graph_scope:
            self.cudagraph_manager = CudaGraphManager(config)

    def mamba_state_shapes_per_request(self) -> Tuple[Tuple[int], Tuple[int]]:
        """Returns the Mamba conv and ssm states shapes per request."""
        return self.mixer.mamba_state_shapes_per_request()

    def forward(
        self,
        hidden_states: Tensor,
        attention_mask: Optional[Tensor] = None,  # Not used in MambaLayer
        inference_context: Optional[BaseInferenceContext] = None,
        rotary_pos_emb: Optional[Tensor] = None,  # Not used in MambaLayer
        *,
        inference_params: Optional[BaseInferenceContext] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
    ):
        """
        Perform a forward pass through the Mamba layer.

        This method implements the core computation of a Mamba layer, including
        the convolution and the selective SSM/SSD.

        Args:
            hidden_states (Tensor): Input tensor of shape [s, b, h] where s is sequence length,
                b is batch size, and h is hidden size.
            attention_mask (Tensor): Mask tensor for self-attention. Not used by this layer.
            inference_context (BaseInferenceContext, optional): Parameters for inference-time
                optimizations.
            rotary_pos_emb (Tensor, optional): Rotary positional embeddings.

        Returns:
            output (Tensor): Transformed hidden states of shape [s, b, h].
        """

        inference_context = deprecate_inference_params(inference_context, inference_params)

        residual = hidden_states
        if self.config.fp32_residual_connection:
            residual = residual.float()

        hidden_states = hidden_states.to(dtype=self.config.params_dtype)
        hidden_states = apply_module(self.norm)(hidden_states)

        mixer_out_with_bias = self.mixer(
            hidden_states, inference_context=inference_context, packed_seq_params=packed_seq_params
        )

        with self.bias_dropout_add_exec_handler():
            hidden_states = self.mamba_bda(
                training=self.training, fused=self.config.bias_dropout_fusion
            )(mixer_out_with_bias, residual, self.hidden_dropout)

        return hidden_states

    def sharded_state_dict(
        self, prefix: str = '', sharded_offsets: tuple = (), metadata: Optional[dict] = None
    ) -> ShardedStateDict:
        """
        Generate a sharded state dictionary for the mamba layer.

        Args:
            prefix (str, optional): Prefix to be added to all keys in the state dict.
            sharded_offsets (tuple, optional): Tuple of sharding offsets.
            metadata (Optional[dict], optional): Additional metadata for sharding.

        Returns:
            ShardedStateDict: A dictionary containing the sharded state of the mamba layer.
        """
        sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)
        prefixed_map = {
            f'{prefix}{k}': f'{prefix}{v}'
            for k, v in self.submodules_config.sharded_state_dict_keys_map.items()
        }
        if prefixed_map:
            apply_prefix_mapping(sharded_state_dict, prefixed_map)
        return sharded_state_dict

    def _te_cuda_graph_replay(self, *args, **kwargs):
        """
        CUDA graph replay for this layer and microbatch `self.current_microbatch` using TE
        interface. TransformerEngine versions>=1.10 allow keyword arguments with CUDA graph.
        However, CUDA graph accepts only Tensor inputs.
        Hence, `inference_context` is excluded from input list.
        """
        assert kwargs.get('inference_context') is None, (
            "CUDA graph accepts only Tensor inputs. inference_context is excluded from input list. "
            "For inference cuda graph, please use cuda_graph_impl=local instead."
        )
        return super()._te_cuda_graph_replay(*args, **kwargs)

    def _should_call_local_cudagraph(self, *args, **kwargs):
        """
        Check if we should call the local cudagraph path.
        """
        # Training and validation mode CUDA graphs.
        if (
            hasattr(self, 'cudagraph_manager')
            and kwargs.get('inference_context') is None
            and not torch.is_inference_mode_enabled()  # for inference eager dummy_forward
        ):
            return True
        elif not self.training and (
            hasattr(self, 'cudagraph_manager')
            and kwargs.get('attention_mask') is None
            and kwargs.get('inference_context') is not None
            and not self.config.cuda_graph_scope  # empty-list = per-layer CUDA graphs
        ):
            context = kwargs['inference_context']
            using_cuda_graph = (context.is_static_batching() and context.is_decode_only()) or (
                not context.is_static_batching() and context.using_cuda_graph_this_step()
            )
            return using_cuda_graph
        return False


================================================
FILE: megatron/core/ssm/mamba_mixer.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2024, Tri Dao, Albert Gu.

# Some of this code was adopted from https://github.com/state-spaces/mamba/
# This source code is licensed under the Apache license found in the
# LICENSE file in the root directory of this source tree.

import logging
import math
from dataclasses import dataclass, replace
from typing import List, Optional, Tuple, Union

import torch
import torch.nn as nn
import torch.nn.functional as F

from megatron.core import parallel_state
from megatron.core.dist_checkpointing import ShardedTensor
from megatron.core.dist_checkpointing.mapping import ReplicaId, ShardedTensorFactory
from megatron.core.inference.contexts import BaseInferenceContext, DynamicInferenceContext
from megatron.core.inference.contexts.attention_context.triton.tensor_ops import (
    tensor_get_slice_after,
    tensor_masked_update,
    tensor_merge,
)
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.ssm.ops.causal_conv1d_triton import causal_conv1d_update
from megatron.core.ssm.ops.mamba_ssm import selective_state_update
from megatron.core.tensor_parallel import get_cuda_rng_tracker
from megatron.core.transformer import TransformerConfig
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.spec_utils import ModuleSpec, build_module
from megatron.core.transformer.utils import (
    ensure_metadata_has_dp_cp_group,
    make_sharded_tensors_for_checkpoint,
    sharded_state_dict_default,
)
from megatron.core.utils import (
    deprecate_inference_params,
    is_causal_conv1d_min_version,
    is_mamba_min_version,
    is_using_quantization_scales,
    log_single_rank,
)

from .mamba_context_parallel import MambaContextParallel

try:
    from causal_conv1d import causal_conv1d_fn
    from causal_conv1d.causal_conv1d_varlen import causal_conv1d_varlen_states

except ImportError:
    causal_conv1d_fn = None

try:
    from mamba_ssm.ops.triton.layernorm_gated import RMSNorm as RMSNormGated
    from mamba_ssm.ops.triton.ssd_combined import (
        mamba_chunk_scan_combined,
        mamba_split_conv1d_scan_combined,
    )

    HAVE_MAMBA_SSM = True
except ImportError:
    mamba_chunk_scan_combined = None
    mamba_split_conv1d_scan_combined = None
    HAVE_MAMBA_SSM = False

try:
    from megatron.core.ssm.ops.ssd_combined import mamba_chunk_scan_combined_varlen

    HAVE_SSM_OPS_VARLEN = True
except ImportError:
    mamba_chunk_scan_combined_varlen = None
    HAVE_SSM_OPS_VARLEN = False

if not HAVE_MAMBA_SSM:
    from unittest.mock import MagicMock

    RMSNormGated = MagicMock()
    HAVE_MAMBA_SSM = False

try:
    from einops import rearrange, repeat

    HAVE_EINOPS = True
except ImportError:
    HAVE_EINOPS = False

logger = logging.getLogger(__name__)


class ExtendedRMSNorm(RMSNormGated):
    """
    RMSNormGated with sharded state dict.
    """

    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
        """Sharding along axis 0, bias not sharded"""
        if not hasattr(self, 'tp_group'):
            self.tp_group = parallel_state.get_tensor_model_parallel_group()
        state_dict = self.state_dict(prefix="", keep_vars=True)
        return make_sharded_tensors_for_checkpoint(
            state_dict,
            prefix,
            {"weight": 0},
            sharded_offsets,
            tp_group=self.tp_group,
            dp_cp_group=metadata["dp_cp_group"],
        )


@dataclass
class MambaMixerSubmodules:
    """
    Contains the module specs for the input and output linear layers.
    """

    in_proj: Union[ModuleSpec, type] = None
    out_proj: Union[ModuleSpec, type] = None


class MambaMixer(MegatronModule):
    """
    Args:
        config: The config of the model.
        submodules: Contains the module specs for the input and output linear layers.
        d_model: The hidden size of the model.
        d_state: The state size of the SSM.
        d_conv: The number of channels in the causal convolution.
        conv_init: The initialization range for the causal convolution weights.
        expand: The expansion factor for the SSM.
        headdim: The hidden size of each attention head.
        ngroups: The number of attention heads.
        A_init_range: The initialization range for the attention weights.
        D_has_hdim: Whether the D parameter has the same number of dimensions as the hidden
            state.
        rmsnorm: Whether to use root mean square normalization.
        norm_before_gate: Whether to apply normalization before the gating mechanism.
        dt_min: The minimum value of the dt parameter.
        dt_max: The maximum value of the dt parameter.
        dt_init: The initialization value of the dt parameter.
        dt_scale: The scaling factor for the dt parameter.
        dt_init_floor: The minimum value of the dt parameter after initialization.
        bias: Whether to use bias in the linear layers.
        conv_bias: Whether to use bias in the causal convolution.
        chunk_size: The chunk size for the Mamba SSM fused kernel.
        use_mem_eff_path: Whether to use the memory-efficient path for the Mamba model.
        layer_number: The layer number of this Mamba layer.
        pg_collection: The required process groups to use for tensor model parallel and context
            parallel.
    """

    def __init__(
        self,
        config: TransformerConfig,
        submodules: MambaMixerSubmodules,
        d_model,
        d_conv=4,
        conv_init=None,
        expand=2,
        A_init_range=(1, 16),
        D_has_hdim=False,
        rmsnorm=True,
        norm_before_gate=False,
        dt_min=0.001,
        dt_max=0.1,
        dt_init="random",
        dt_scale=1.0,
        dt_init_floor=1e-4,
        bias=False,
        conv_bias=True,
        # Fused kernel and sharding options
        chunk_size=128,
        layer_number=None,
        pg_collection: ProcessGroupCollection = None,
        pp_layer_offset: int = 0,
    ):
        if not HAVE_MAMBA_SSM:
            raise ImportError(
                "MambaSSM is not installed. Please install it with `pip install mamba-ssm`."
            )

        if not HAVE_EINOPS:
            raise ImportError("einops is required by the Mamba model but cannot be imported")

        super().__init__(config)
        self.config = config
        self.d_model = d_model
        self.d_conv = d_conv
        self.conv_init = conv_init
        self.expand = expand
        self.d_inner = int(self.expand * self.d_model)
        self.D_has_hdim = D_has_hdim
        self.rmsnorm = rmsnorm
        self.norm_before_gate = norm_before_gate
        self.chunk_size = chunk_size
        self.layer_number = layer_number
        self.pp_layer_offset = pp_layer_offset
        self.cached_batch_size = None
        assert pg_collection is not None, "pg_collection must be provided for MambaMixer"
        self.pg_collection = pg_collection
        self.use_mem_eff_path = self.config.use_mamba_mem_eff_path
        self.d_state = self.config.mamba_state_dim
        self.headdim = self.config.mamba_head_dim
        self.ngroups = self.config.mamba_num_groups

        assert self.d_state is not None and self.d_state > 0
        assert self.headdim is not None and self.headdim > 0
        assert self.ngroups is not None and self.ngroups > 0

        if self.config.mamba_num_heads is not None:
            self.nheads = self.config.mamba_num_heads
            assert self.nheads > 0
            self.d_inner = self.nheads * self.headdim
        else:
            assert self.d_inner % self.headdim == 0, "d_inner must be evenly divisible by headdim"
            self.nheads = self.d_inner // self.headdim

        if self.config.fp8:
            assert (2 * self.d_inner + 2 * self.ngroups * self.d_state + self.nheads) % 16 == 0, (
                "For FP8, the innermost dimension of the Mamba layer "
                "input projection output tensor must be a multiple of 16."
            )

        tp_size = self.pg_collection.tp.size()

        # Ensure that each TP rank gets at least one head:
        assert self.nheads % tp_size == 0, "nheads must be evenly divisble by tp_size"
        self.nheads_local_tp = self.nheads // tp_size

        # Note that we do not need to confirm that `d_inner % tp_size == 0` because
        # `d_inner % headdim == 0`, `nheads = d_inner // headdim`, and `nheads % tp_size == 0`
        self.d_inner_local_tp = self.d_inner // tp_size

        # Ensure that each TP rank gets at least one group:
        assert self.ngroups % tp_size == 0, "ngroups must be evenly divisible by tp_size"
        self.ngroups_local_tp = self.ngroups // tp_size

        # Ensure that each group has a positive integer number of heads:
        assert self.nheads % self.ngroups == 0, "nheads must be evenly divisible by ngroups"

        assert not bias
        assert not self.norm_before_gate

        # Assume sequence parallelism: input is already partitioned along the sequence dimension
        self.in_proj = build_module(
            submodules.in_proj,
            self.d_model,
            self.d_inner * 2 + 2 * self.ngroups * self.d_state + self.nheads,  # z x B C dt
            config=self.config,
            init_method=self.config.init_method,
            gather_output=False,
            bias=bias,
            skip_bias_add=False,
            is_expert=False,
            tp_comm_buffer_name="fc1",
            tp_group=self.pg_collection.tp,
        )
        # in_proj packs [z, x, B, C, dt] into one ColumnParallelLinear.  Each
        # component is independently TP-sharded but with different sizes.  When
        # resharding across different TP sizes the planner must interleave
        # per-component blocks rather than doing a contiguous concat.
        # partition_sizes lists the per-TP-rank block sizes along partition_dim.
        in_proj_partition_sizes = [
            self.d_inner_local_tp,  # z
            self.d_inner_local_tp,  # x
            self.ngroups_local_tp * self.d_state,  # B
            self.ngroups_local_tp * self.d_state,  # C
            self.nheads_local_tp,  # dt
        ]
        setattr(self.in_proj.weight, "partition_sizes", in_proj_partition_sizes)

        if not self.use_mem_eff_path:
            log_single_rank(
                logger,
                logging.WARNING,
                (
                    "We are not currently using or functionally testing use_mem_eff_path==False "
                    "for training. It may not work as expected."
                ),
            )

        conv_dim = self.d_inner_local_tp + 2 * self.ngroups_local_tp * self.d_state  # x B C
        with get_cuda_rng_tracker().fork():
            # weight shape: [conv_dim, 1, d_conv]
            # bias shape: [conv_dim]
            self.conv1d = nn.Conv1d(
                in_channels=conv_dim,
                out_channels=conv_dim,
                bias=conv_bias,
                kernel_size=d_conv,
                groups=conv_dim,
                padding=d_conv - 1,
                device=torch.cuda.current_device(),
                dtype=config.params_dtype,
            )
            setattr(self.conv1d.weight, "tensor_model_parallel", True)
            setattr(self.conv1d.weight, "partition_dim", 0)
            setattr(self.conv1d.bias, "tensor_model_parallel", True)
            setattr(self.conv1d.bias, "partition_dim", 0)
            # partition_sizes describes the per-TP-rank block sizes along the
            # partition dim.  conv1d packs [x, B, C] whose local sizes differ,
            # so a plain contiguous concat would produce the wrong layout when
            # resharding across different TP sizes.
            conv_partition_sizes = [
                self.d_inner_local_tp,
                self.ngroups_local_tp * self.d_state,
                self.ngroups_local_tp * self.d_state,
            ]
            setattr(self.conv1d.weight, "partition_sizes", conv_partition_sizes)
            setattr(self.conv1d.bias, "partition_sizes", conv_partition_sizes)
            if self.config.perform_initialization:
                if self.conv_init is not None:
                    nn.init.uniform_(self.conv1d.weight, -self.conv_init, self.conv_init)
                else:
                    nn.init.kaiming_uniform_(self.conv1d.weight, a=math.sqrt(5))

        self.activation = "silu"
        self.act = nn.SiLU()

        with get_cuda_rng_tracker().fork():
            # Initialize dt bias so that F.softplus(dt_bias) is between dt_min and dt_max
            dt = torch.exp(
                torch.rand(
                    self.nheads_local_tp,
                    device=torch.cuda.current_device(),
                    dtype=config.params_dtype,
                )
                * (math.log(dt_max) - math.log(dt_min))
                + math.log(dt_min)
            ).clamp(min=dt_init_floor)
            # Inverse of softplus: https://github.com/pytorch/pytorch/issues/72759
            inv_dt = dt + torch.log(-torch.expm1(-dt))
            self.dt_bias = nn.Parameter(inv_dt)
            setattr(self.dt_bias, "tensor_model_parallel", True)
            setattr(self.dt_bias, "partition_dim", 0)

            # A parameter
            assert A_init_range[0] > 0 and A_init_range[1] >= A_init_range[0]
            A = torch.empty(
                self.nheads_local_tp, dtype=torch.float32, device=torch.cuda.current_device()
            )
            if self.config.perform_initialization:
                A = A.uniform_(*A_init_range)
            A_log = torch.log(A)  # Keep A_log in fp32
            self.A_log = nn.Parameter(A_log)
            setattr(self.A_log, "tensor_model_parallel", True)
            setattr(self.A_log, "partition_dim", 0)
        # D "skip" parameter
        self.D = nn.Parameter(
            torch.ones(
                self.d_inner_local_tp if self.D_has_hdim else self.nheads_local_tp,
                device=torch.cuda.current_device(),
            )
        )  # Keep in fp32
        setattr(self.D, "tensor_model_parallel", True)
        setattr(self.D, "partition_dim", 0)
        if self.rmsnorm:
            assert RMSNormGated is not None
            self.norm = ExtendedRMSNorm(
                self.d_inner_local_tp,
                eps=1e-5,
                group_size=self.d_inner_local_tp // self.ngroups_local_tp,
                norm_before_gate=self.norm_before_gate,
                device=torch.cuda.current_device(),
                dtype=config.params_dtype,
            )
            setattr(self.norm.weight, "tensor_model_parallel", True)
            setattr(self.norm.weight, "partition_dim", 0)
        # Assume sequence parallelism: input is partitioned along d_inner and
        # output is partitioned along the sequence dimension
        self.out_proj = build_module(
            submodules.out_proj,
            self.d_inner,
            self.d_model,
            config=self.config,
            init_method=self.config.output_layer_init_method,
            bias=bias,
            input_is_parallel=True,
            skip_bias_add=True,
            is_expert=False,
            tp_comm_buffer_name="fc2",
            tp_group=self.pg_collection.tp,
        )

        # Regarding `conv1d`.{`weight`, `bias`}, `dt_bias`, `A_log`, and `D`: these are the
        # trainable variables for the current tensor parallel rank, with each tensor parallel rank
        # having indepdendent trainable variables. All context parallel ranks in a tensor parallel
        # rank store the same trainable variables, but only use and update their unique/independent
        # slice of them.
        self.cp = MambaContextParallel(
            cp_group=self.pg_collection.cp,
            d_inner_local_tp=self.d_inner_local_tp,
            nheads_local_tp=self.nheads_local_tp,
            ngroups_local_tp=self.ngroups_local_tp,
            d_state=self.d_state,
            conv1d_cp1=self.conv1d,
            dt_bias_cp1=self.dt_bias,
            A_log_cp1=self.A_log,
            D_cp1=self.D,
            D_has_hdim=self.D_has_hdim,
        )
        self.tp_group = pg_collection.tp

    def forward(
        self,
        hidden_states,
        inference_context=None,
        *,
        inference_params: Optional[BaseInferenceContext] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
    ):
        """
        hidden_states: (nL, B, D) / (L B D)
        Returns: same shape as hidden_states
        """

        inference_context = deprecate_inference_params(inference_context, inference_params)

        in_inference_mode = inference_context is not None and not self.training

        _, batch, dim = hidden_states.shape
        conv_state, ssm_state = None, None

        if in_inference_mode:
            if inference_context.is_dynamic_batching():
                return self._dynamic_inference(hidden_states, inference_context)
            else:
                assert inference_context.is_static_batching()
                assert not self.config.sequence_parallel
                conv_state, ssm_state = self._get_states_from_cache(inference_context, batch)
                if inference_context.seqlen_offset > 0:
                    # The states are updated inplace
                    out, out_bias = self._decode(hidden_states, conv_state, ssm_state)
                    return out, out_bias

        zxBCdt, _ = self.in_proj(hidden_states)

        zxBCdt = self.cp.pre_conv_ssm(zxBCdt, packed_seq_params)

        if in_inference_mode or not self.use_mem_eff_path:
            # TODO(ksanthanam): Consider deprecating this path for training
            assert packed_seq_params is None, (
                "Training with packed sequences is not supported "
                "in the non-memory-efficient code path."
            )
            y = self._ssm_prefill(zxBCdt, conv_state=conv_state, ssm_state=ssm_state)
        else:
            assert ssm_state is None
            y = self._ssm_training(zxBCdt, packed_seq_params)

        out, out_bias = self.out_proj(y)

        return out, out_bias

    def _dynamic_inference(self, hidden_states: torch.Tensor, context: DynamicInferenceContext):
        """
        Executes dynamic inference by separating decode and prefill requests and
        running them independently.
        """
        sequence_packing_available, reason_for_no_sequence_packing = (
            _check_mamba_sequence_packing_support(for_inference_not_training=True)
        )
        assert sequence_packing_available, reason_for_no_sequence_packing

        # Grab standard states
        conv_state, ssm_state = context.mamba_states_cache(self.layer_number - self.pp_layer_offset)

        # Fetch intermediate states for speculative decoding
        # (just buffers, existing data is overwritten)
        int_conv_state = None
        int_ssm_state = None
        if context.num_speculative_tokens > 0:
            int_conv_state, int_ssm_state = context.mamba_states_cache(
                self.layer_number - self.pp_layer_offset, intermediate=True
            )

        padded_dims = context.padded_batch_dimensions
        token_count = padded_dims.token_count
        decode_req_count = padded_dims.decode_req_count
        prefill_req_count = padded_dims.prefill_req_count

        # Input projection
        zxBCdt, _ = self.in_proj(hidden_states)

        y_decode = None
        y_prefill = None

        # Decode
        if decode_req_count > 0:
            # For mixed batch, the decode tokens are at the start of zxBCdt
            seq_len = 1 + context.num_speculative_tokens
            decode_token_count = decode_req_count * seq_len

            zxBCdt_decode = zxBCdt[:decode_token_count] if prefill_req_count > 0 else zxBCdt

            # Reshape from [N*S, 1, d] to [N, S, d] for the 3D Triton kernels
            zxBCdt_decode = zxBCdt_decode.squeeze(1).view(decode_req_count, seq_len, -1)

            y_decode = self._ssm_decode(
                zxBCdt_decode,
                conv_state,
                ssm_state,
                batch_indices=context.mamba_metadata.batch_indices_decode,
                intermediate_conv_state=int_conv_state,
                intermediate_ssm_state=int_ssm_state,
            )

            # Flatten back to [N*S, 1, d] to match merge logic
            y_decode = y_decode.view(decode_token_count, 1, -1)

        # Prefill
        if prefill_req_count > 0:
            if decode_req_count > 0:
                # If mixed, slice the prefill portion out of zxBCdt
                zxBCdt_prefill = torch.empty_like(zxBCdt)
                tensor_get_slice_after(
                    zxBCdt,
                    zxBCdt_prefill,
                    context.mamba_metadata.device_decode_prefill,
                    check_bounds=False,
                )
            else:
                zxBCdt_prefill = zxBCdt

            intermediate_offsets = (
                context.mamba_slot_allocator.get_intermediate_offsets()
                if context.mamba_slot_allocator is not None
                else None
            )
            result = self._dynamic_inference_prefill(
                zxBCdt_prefill,
                context,
                conv_state,
                ssm_state,
                intermediate_token_offsets=intermediate_offsets,
            )
            if isinstance(result, tuple):
                y_prefill, intermediate_states = result
                mamba_layer_idx = context.layer_map[self.layer_number - self.pp_layer_offset - 1]
                if context.mamba_slot_allocator is not None:
                    context.mamba_slot_allocator.buffer_intermediate_states(
                        mamba_layer_idx, intermediate_states
                    )
            else:
                y_prefill = result

        # Merge decode and prefill results if necessary
        if y_decode is not None and y_prefill is not None:
            y = torch.empty(
                [token_count, 1, y_prefill.shape[-1]],
                dtype=y_prefill.dtype,
                device=y_prefill.device,
            )
            tensor_merge(
                y_decode, y_prefill, context.mamba_metadata.device_decode_prefill, output_tensor=y
            )
        elif y_decode is not None:
            y = y_decode
        elif y_prefill is not None:
            y = y_prefill
        else:
            raise RuntimeError("Dynamic inference called with 0 decode and 0 prefill requests")

        # Clear the outputs for padding tokens when using quantization scales
        # to avoid corrupting amax calculations
        if is_using_quantization_scales(self.config):
            y[context.padding_slice] = 0.0

        # Output projection
        out, out_bias = self.out_proj(y)

        return out, out_bias

    def _dynamic_inference_prefill(
        self,
        zxBCdt: torch.Tensor,
        context: DynamicInferenceContext,
        conv_state: torch.Tensor,
        ssm_state: torch.Tensor,
        intermediate_token_offsets: Optional[List[List[int]]] = None,
    ) -> Union[torch.Tensor, Tuple[torch.Tensor, List]]:
        """Helper to run dynamic inference prefill.

        All prefill requests (including chunked prefill) are processed together
        through the unified varlen path. Uses precomputed metadata from
        MambaMetadata.update() to avoid .item() calls and data-dependent
        control flow, enabling CUDA graph compatibility.
        """
        metadata = context.mamba_metadata
        real_prefill_count = context.batch_dimensions.prefill_req_count
        if real_prefill_count <= 0:
            return None

        # Use precomputed metadata (no .item() calls, no stripping).
        cu_seqlens = metadata.cu_seqlens
        batch_indices = metadata.batch_indices_prefill
        real_token_count = metadata.real_prefill_token_count
        seq_idx = metadata.seq_idx

        # Pass full padded tensor — SSM kernel uses cu_chunk_seqlens for
        # boundaries and never accesses tokens beyond the last boundary.
        # Output y is initialized to zeros in _ssm_prefill so padding
        # positions remain zero (safe for RMSNorm and downstream ops).

        use_triton_conv1d = context._use_triton_conv1d_this_step

        result = self._ssm_prefill(
            zxBCdt,
            conv_state=conv_state,
            ssm_state=ssm_state,
            seq_idx=seq_idx,
            cu_seqlens=cu_seqlens,
            batch_indices=batch_indices,
            intermediate_token_offsets=intermediate_token_offsets,
            use_triton_conv1d=use_triton_conv1d,
            cu_chunk_seqlens=metadata.cu_chunk_seqlens,
            last_chunk_indices=metadata.last_chunk_indices,
            seq_idx_for_varlen=metadata.seq_idx_for_varlen,
            cu_seqlens_list=metadata.cu_seqlens_list,
            real_token_count=real_token_count,
            conv_seq_idx=metadata.conv_seq_idx,
            conv_seq_start=metadata.conv_seq_start,
        )

        if intermediate_token_offsets is not None:
            y_prefill, intermediate_states = result
        else:
            y_prefill = result
            intermediate_states = None

        if intermediate_states is not None:
            return y_prefill, intermediate_states
        return y_prefill

    def _decode(
        self, hidden_states, conv_state, ssm_state, batch_indices: Optional[torch.Tensor] = None
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """Performs inference step for decoding."""
        # assert self.ngroups_local_tp == 1, "Only support ngroups=1 for inference for now"
        is_dynamic_batching = batch_indices is not None

        if not is_dynamic_batching:
            assert (
                hidden_states.shape[0] == 1
            ), "Only support decoding with 1 token at a time for now"

        # (1, b, d_model) -> (1, b, proj_dim)
        zxBCdt, _ = self.in_proj(hidden_states)

        # Make batch size leading dimension since that is 1
        if is_dynamic_batching:
            zxBCdt = zxBCdt.transpose(0, 1)

        assert self.cp.cp_size == 1, "Context parallel not supported for Mamba inferenece decode"

        y = self._ssm_decode(
            zxBCdt, conv_state=conv_state, ssm_state=ssm_state, batch_indices=batch_indices
        )

        # Restore sequence length as first dimension
        if is_dynamic_batching:
            y = y.transpose(0, 1)

        # y has shape (1, b, d_inner), which is what out_proj expects
        out, out_bias = self.out_proj(y)

        return out, out_bias

    def _ssm_training(
        self, zxBCdt: torch.Tensor, packed_seq_params: Optional[PackedSeqParams] = None
    ) -> torch.Tensor:
        """
        Performs SSM computation for training step.

        Uses the memory-efficient kernel `mamba_split_conv1d_scan_combined` which reduces the size
        of forward activations stored for backprop and therefore reduces memory pressure during
        training.
        """

        # transpose: l b pd --> b l pd
        zxBCdt = rearrange(zxBCdt, "l b d -> b l d").contiguous()

        # (nheads_local_tpcp)
        A = -torch.exp(self.cp.get_A_log().float())

        # TODO(duncan): Can this code be removed?
        if self.conv1d.bias is not None:
            self.conv1d.bias.data_ptr()

        seq_idx = None
        if packed_seq_params is not None:
            sequence_packing_available, reason_for_no_sequence_packing = (
                _check_mamba_sequence_packing_support(for_inference_not_training=False)
            )
            assert sequence_packing_available, reason_for_no_sequence_packing
            seq_idx = packed_seq_params.seq_idx

        y = mamba_split_conv1d_scan_combined(
            zxBCdt,
            rearrange(self.cp.get_conv1d_weight(), "d 1 w -> d w"),
            self.cp.get_conv1d_bias(),
            self.cp.get_dt_bias().float(),
            A,
            D=(
                rearrange(self.cp.get_D().float(), "(h p) -> h p", p=self.headdim)
                if self.D_has_hdim
                else self.cp.get_D()
            ),
            chunk_size=self.chunk_size,
            activation=self.activation,
            headdim=None if self.D_has_hdim else self.headdim,
            ngroups=self.cp.ngroups_local_tpcp,
            norm_before_gate=self.norm_before_gate,
            seq_idx=seq_idx,
        )

        y = rearrange(y, "b l d -> l b d").contiguous()
        y = self.cp.post_conv_ssm(y, packed_seq_params)

        if self.rmsnorm:
            y = self.norm(y)

        return y

    def _ssm_prefill(
        self,
        zxBCdt: torch.Tensor,
        conv_state: Optional[torch.Tensor],
        ssm_state: Optional[torch.Tensor],
        seq_idx: Optional[torch.Tensor] = None,
        cu_seqlens: Optional[torch.Tensor] = None,
        batch_indices: Optional[torch.Tensor] = None,
        intermediate_token_offsets: Optional[List[List[int]]] = None,
        use_triton_conv1d: bool = False,
        cu_chunk_seqlens: Optional[torch.Tensor] = None,
        last_chunk_indices: Optional[torch.Tensor] = None,
        seq_idx_for_varlen: Optional[torch.Tensor] = None,
        cu_seqlens_list: Optional[List[int]] = None,
        real_token_count: Optional[int] = None,
        conv_seq_idx: Optional[torch.Tensor] = None,
        conv_seq_start: Optional[torch.Tensor] = None,
    ) -> Union[torch.Tensor, Tuple[torch.Tensor, List]]:
        """
        Performs SSM computation for inference prefill step.

        Args:
            zxBCdt: The input tensor of shape (l, b, d), which is a concatenation of
                z, x, B, C, and dt projections.
            conv_state: The convolution state tensor for inference.
            ssm_state: The selective scan state tensor for inference.
            seq_idx: A map from token index to request index for variable-length sequences.
            cu_seqlens: Cumulative sequence lengths for variable-length sequences.
            batch_indices: A map from batch id to position in the Mamba state tensors for
                dynamic inference.
            intermediate_token_offsets: Per-request list of token offsets (relative to
                sequence start) at which to extract intermediate SSM and conv states.
                Offsets must be multiples of chunk_size.
            cu_chunk_seqlens: Precomputed chunk boundaries from MambaMetadata.
            last_chunk_indices: Precomputed last chunk index per sequence.
            seq_idx_for_varlen: Precomputed request ID per chunk.
            cu_seqlens_list: Python list of cumulative sequence lengths (avoids .item()).
            real_token_count: Number of real (non-padding) tokens.
            conv_seq_idx: Precomputed per-token request ID for Triton conv1d.
            conv_seq_start: Precomputed per-token request start for Triton conv1d.

        Returns:
            If intermediate_token_offsets is None: output tensor of shape (l, b, d).
            If provided: (output, intermediate_states_per_request) where each entry is
            (ssm_states, conv_states) or None.
        """
        is_dynamic_batching = seq_idx is not None

        # transpose: l b pd --> b l pd
        zxBCdt = rearrange(zxBCdt, "l b d -> b l d").contiguous()

        # (nheads_local_tpcp)
        A = -torch.exp(self.cp.get_A_log().float())

        z, xBC, dt = torch.split(
            zxBCdt,
            [
                self.cp.d_inner_local_tpcp,
                self.cp.d_inner_local_tpcp + 2 * self.cp.ngroups_local_tpcp * self.d_state,
                self.cp.nheads_local_tpcp,
            ],
            dim=-1,
        )

        # Compute short convolution
        xBC_pre_conv = None
        if conv_state is not None and is_dynamic_batching:
            assert batch_indices is not None

            # Extract initial conv states BEFORE saving new ones.
            # causal_conv1d_varlen_states computes the final conv state from the
            # input sequence and tensor_masked_update writes it into the conv_state
            # buffer. If we read initial_conv_states after this write, restored
            # requests see their own newly-computed states instead of the cached
            # initial states from a previous request, corrupting the conv output.
            initial_conv_states = conv_state[batch_indices, :, 1:]

            # Save final conv states from the input sequence
            conv_varlen_states = causal_conv1d_varlen_states(
                xBC.squeeze(0), cu_seqlens, state_len=conv_state.shape[-1]
            )
            tensor_masked_update(conv_state, batch_indices, conv_varlen_states)

            # Conv state dtype might differ from params dtype, so cast xBC and weight / bias
            # tensors to the conv state dtype for causal_conv1d_varlen_fn and then cast xBC
            # back to the original dtype
            xBC_dtype = xBC.dtype
            conv_state_dtype = conv_state.dtype

            xBC = xBC.to(conv_state_dtype)
            conv_weight = rearrange(self.cp.get_conv1d_weight(), "d 1 w -> d w").to(
                conv_state_dtype
            )
            conv_bias = self.cp.get_conv1d_bias().to(conv_state_dtype)

            xBC_pre_conv = xBC if intermediate_token_offsets is not None else None
            if use_triton_conv1d:
                from megatron.core.ssm.ops.causal_conv1d_varlen import causal_conv1d_varlen_fn

                xBC_out = causal_conv1d_varlen_fn(
                    x=xBC.squeeze(0).contiguous(),
                    weight=conv_weight,
                    bias=conv_bias,
                    cu_seqlens=cu_seqlens,
                    initial_states=initial_conv_states,
                    activation=self.activation,
                    precomputed_seq_idx=conv_seq_idx,
                    precomputed_seq_start=conv_seq_start,
                )
                xBC = xBC_out.to(xBC_dtype).unsqueeze(0)
            else:
                # causal_conv1d_fn cannot accept both seq_idx and
                # initial_states simultaneously. Using seq_idx with packed sequences
                # zeroes the conv state at sequence boundaries instead of using the
                # cached initial states. We must loop over requests individually,
                # passing initial_states per-request with channels-last layout.
                # This loop launches a variable number of kernels and uses .item(),
                # so it is incompatible with CUDA graph capture/replay.
                assert not torch.cuda.is_current_stream_capturing(), (
                    "Per-request conv1d loop is not CUDA-graph compatible. "
                    "Enable use_triton_conv1d or set num_cuda_graphs=None."
                )
                num_requests = cu_seqlens.shape[0] - 1
                xBC_out = xBC.clone()
                for r in range(num_requests):
                    start = cu_seqlens[r].item()
                    end = cu_seqlens[r + 1].item()
                    if start == end:
                        continue
                    # xBC is (1, total_tokens, conv_dim); slice gives channels-last via transpose
                    xBC_r = xBC[:, start:end, :].transpose(1, 2)  # channels-last (1, C, L)
                    init_r = initial_conv_states[r : r + 1]  # (1, conv_dim, d_conv-1)
                    init_r = init_r.permute(0, 2, 1).contiguous().transpose(1, 2)  # channels-last
                    xBC_r = causal_conv1d_fn(
                        x=xBC_r,
                        weight=conv_weight,
                        bias=conv_bias,
                        activation=self.activation,
                        initial_states=init_r,
                    )
                    xBC_out[:, start:end, :] = xBC_r.transpose(1, 2).contiguous()
                xBC = xBC_out.to(xBC_dtype)
        else:
            # Non-dynamic-batching path (static batching / training fallback)
            xBC = rearrange(xBC, "b l d -> b d l").contiguous()
            if conv_state is not None:
                # If we just take x[:, :, -self.d_conv :], it will error if seqlen < self.d_conv
                # Instead F.pad will pad with zeros if seqlen < self.d_conv, and truncate otherwise.
                conv_state.copy_(
                    F.pad(xBC, (self.d_conv - xBC.shape[-1], 0))
                )  # Update state (B D W)

            seqlen = xBC.size(2)
            if causal_conv1d_fn is None:
                xBC = self.act(self.cp.conv1d(xBC)[..., :seqlen])
            else:
                assert self.activation in ["silu", "swish"]
                xBC = causal_conv1d_fn(
                    x=xBC,
                    weight=rearrange(self.cp.get_conv1d_weight(), "d 1 w -> d w"),
                    bias=self.cp.get_conv1d_bias(),
                    activation=self.activation,
                    seq_idx=seq_idx,
                )
            xBC = rearrange(xBC, "b d l -> b l d").contiguous()

        x, B, C = torch.split(
            xBC,
            [
                self.cp.d_inner_local_tpcp,
                self.cp.ngroups_local_tpcp * self.d_state,
                self.cp.ngroups_local_tpcp * self.d_state,
            ],
            dim=-1,
        )

        # TODO Vijay: fuse most of the transposes with the GEMMS
        x = rearrange(x, "b l (h p) -> b l h p", p=self.headdim).contiguous()
        dt = dt.contiguous()
        B = rearrange(B, "b l (g n) -> b l g n", n=self.d_state).contiguous()
        C = rearrange(C, "b l (g n) -> b l g n", n=self.d_state).contiguous()
        z = rearrange(z, "b l (h p) -> b l h p", p=self.headdim).contiguous()

        # If `rmsnorm == False`, then the norm inside `mamba_chunk_scan_combined` will be used.
        # In this case, if `cp_size > 1` then that norm could be performed on less heads than if
        # `cp_size == 1` (groups of heads can be sharded across CP ranks), which would be
        # mathematically incorrect, and potentially arithmetically unstable.
        assert (
            self.cp.cp_size == 1 or self.rmsnorm
        ), "Context parallel not supported for use_mem_eff_path==False and rmsnorm==False"

        if is_dynamic_batching:
            # Unified varlen SSM path: all prefill requests through single kernel call
            initial_ssm_state = ssm_state[batch_indices]

            x = x.squeeze(0)
            dt = dt.squeeze(0)
            A = A.squeeze(0)
            B = B.squeeze(0)
            C = C.squeeze(0)
            z = z.squeeze(0)
            # Initialize with zeros so padding positions (beyond cu_chunk_seqlens
            # boundaries) remain zero, which is safe for RMSNorm and downstream ops.
            y = torch.zeros_like(x)

            intermediate_chunk_indices = None
            per_request_intermediate_counts = []

            if cu_chunk_seqlens is not None:
                # Use precomputed chunk metadata (CUDA graph compatible, no .item())
                # Build intermediate chunk indices if needed (eager mode only,
                # since Step 6 forces eager when intermediate states are required)
                if intermediate_token_offsets is not None:
                    seqlens = cu_seqlens_list
                    num_real_seqs = len(seqlens) - 1
                    intermediate_chunk_indices_list = []
                    cumulative_chunks = 0
                    for i in range(num_real_seqs):
                        seq_len = seqlens[i + 1] - seqlens[i]
                        num_chunks = max(1, (seq_len + self.chunk_size - 1) // self.chunk_size)
                        first_chunk_idx = cumulative_chunks
                        offsets = intermediate_token_offsets[i]
                        count = 0
                        for offset in offsets:
                            assert offset > 0 and offset <= seq_len, (
                                f"intermediate offset {offset} out of range for "
                                f"sequence {i} with length {seq_len}"
                            )
                            assert offset % self.chunk_size == 0, (
                                f"intermediate offset {offset} is not a multiple "
                                f"of chunk_size {self.chunk_size}"
                            )
                            chunk_idx = first_chunk_idx + (offset // self.chunk_size) - 1
                            intermediate_chunk_indices_list.append(chunk_idx)
                            count += 1
                        per_request_intermediate_counts.append(count)
                        cumulative_chunks += num_chunks
                    if intermediate_chunk_indices_list:
                        intermediate_chunk_indices = cu_seqlens.new_tensor(
                            intermediate_chunk_indices_list, dtype=torch.int64
                        )
            else:
                # Fallback: build chunk metadata from cu_seqlens (non-precomputed)
                chunk_boundaries = [0]
                last_chunk_indices_list = []
                intermediate_chunk_indices_list = []
                num_seqs = cu_seqlens.numel() - 1
                for i in range(num_seqs):
                    start = cu_seqlens[i].item()
                    end = cu_seqlens[i + 1].item()
                    first_chunk_idx = len(chunk_boundaries) - 1
                    pos = start + self.chunk_size
                    while pos < end:
                        chunk_boundaries.append(pos)
                        pos += self.chunk_size
                    chunk_boundaries.append(end)
                    last_chunk_indices_list.append(len(chunk_boundaries) - 2)

                    if intermediate_token_offsets is not None:
                        seq_len = end - start
                        offsets = intermediate_token_offsets[i]
                        count = 0
                        for offset in offsets:
                            assert offset > 0 and offset <= seq_len, (
                                f"intermediate offset {offset} out of range for "
                                f"sequence {i} with length {seq_len}"
                            )
                            assert offset % self.chunk_size == 0, (
                                f"intermediate offset {offset} is not a multiple "
                                f"of chunk_size {self.chunk_size}"
                            )
                            chunk_idx = first_chunk_idx + (offset // self.chunk_size) - 1
                            intermediate_chunk_indices_list.append(chunk_idx)
                            count += 1
                        per_request_intermediate_counts.append(count)

                cu_chunk_seqlens = cu_seqlens.new_tensor(chunk_boundaries)
                last_chunk_indices = cu_seqlens.new_tensor(last_chunk_indices_list)

                if intermediate_token_offsets is not None and intermediate_chunk_indices_list:
                    intermediate_chunk_indices = cu_seqlens.new_tensor(
                        intermediate_chunk_indices_list, dtype=torch.int64
                    )

                seq_idx_for_varlen = None
                if seq_idx is not None:
                    chunk_starts = cu_chunk_seqlens[:-1]
                    seq_idx_for_varlen = seq_idx[0, chunk_starts].contiguous()

            ssm_varlen_result = mamba_chunk_scan_combined_varlen(
                x=x,
                dt=dt,
                A=A,
                B=B,
                C=C,
                chunk_size=self.chunk_size,
                cu_chunk_seqlens=cu_chunk_seqlens,
                last_chunk_indices=last_chunk_indices,
                seq_idx=seq_idx_for_varlen,
                out=y,
                D=(
                    rearrange(self.cp.get_D().float(), "(h p) -> h p", p=self.headdim)
                    if self.D_has_hdim
                    else self.cp.get_D()
                ),
                z=z if not self.rmsnorm else None,
                dt_bias=self.cp.get_dt_bias().float(),
                initial_states=initial_ssm_state,
                return_intermediate_states=False,
                intermediate_chunk_indices=intermediate_chunk_indices,
                dt_softplus=True,
                dt_limit=(0.0, float("inf")),
                state_dtype=ssm_state.dtype,
            )

            if intermediate_chunk_indices is not None:
                ssm_varlen_states, intermediate_ssm_states = ssm_varlen_result
            else:
                ssm_varlen_states = ssm_varlen_result

            y = y.unsqueeze(0)
            z = z.unsqueeze(0)

            tensor_masked_update(ssm_state, batch_indices, ssm_varlen_states)

            # Assemble per-request intermediate states (SSM + conv)
            if intermediate_chunk_indices is not None:
                conv_dim = xBC_pre_conv.shape[-1]
                intermediate_states_per_request = []
                ssm_offset = 0
                num_real_seqs = (
                    len(cu_seqlens_list) - 1
                    if cu_seqlens_list is not None
                    else cu_seqlens.numel() - 1
                )
                for i in range(num_real_seqs):
                    count = per_request_intermediate_counts[i]
                    if count == 0:
                        intermediate_states_per_request.append(None)
                    else:
                        req_ssm = intermediate_ssm_states[ssm_offset : ssm_offset + count]
                        # Extract conv states: last d_conv tokens of pre-conv xBC at each offset
                        req_conv_list = []
                        seq_start = (
                            cu_seqlens_list[i]
                            if cu_seqlens_list is not None
                            else cu_seqlens[i].item()
                        )
                        for offset in intermediate_token_offsets[i]:
                            abs_pos = seq_start + offset
                            conv_state_at_offset = xBC_pre_conv[
                                0, abs_pos - self.d_conv : abs_pos, :
                            ].t()
                            req_conv_list.append(conv_state_at_offset)
                        req_conv = torch.stack(req_conv_list)
                        intermediate_states_per_request.append((req_ssm, req_conv))
                        ssm_offset += count
        else:
            # Non-dynamic-batching path (static batching)
            initial_ssm_state = None
            y = mamba_chunk_scan_combined(
                x,
                dt,
                A,
                B,
                C,
                self.chunk_size,
                D=(
                    rearrange(self.cp.get_D().float(), "(h p) -> h p", p=self.headdim)
                    if self.D_has_hdim
                    else self.cp.get_D()
                ),
                z=z if not self.rmsnorm else None,
                dt_bias=self.cp.get_dt_bias().float(),
                dt_softplus=True,
                return_final_states=ssm_state is not None,
                initial_states=initial_ssm_state,
            )

            if ssm_state is not None:
                y, last_state = y
                ssm_state.copy_(last_state)

        y = rearrange(y, "b l h p -> l b (h p)").contiguous()
        y = self.cp.post_conv_ssm(y)

        if self.rmsnorm:
            z = rearrange(z, "b l h p -> l b (h p)").contiguous()
            z = self.cp.post_conv_ssm(z)
            y = self.norm(y, z)

        if intermediate_token_offsets is not None and is_dynamic_batching:
            return y, intermediate_states_per_request
        return y

    def _ssm_decode(
        self,
        zxBCdt: torch.Tensor,
        conv_state: torch.Tensor,
        ssm_state: torch.Tensor,
        batch_indices: Optional[torch.Tensor] = None,
        intermediate_conv_state: Optional[torch.Tensor] = None,
        intermediate_ssm_state: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        """
        Performs SSM computation for inference decode step.

        Args:
            zxBCdt: The input tensor of shape (b, s, d), which is a concatenation of
                z, x, B, C, and dt projections.
                s is the sequence length (1 + num_speculative_tokens).
            conv_state: The convolution state tensor for inference.
            ssm_state: The selective scan state tensor for inference.
            batch_indices: A map from batch id to position in the Mamba state tensors.
            intermediate_conv_state: Optional buffer for storing conv state at each
                sequence step (for speculative decoding rollback).
            intermediate_ssm_state: Optional buffer for storing SSM state at each
                sequence step (for speculative decoding rollback).

        Returns:
            The output tensor of shape (b, s, d).
        """
        batch_size, seq_len, _ = zxBCdt.shape
        dtype = zxBCdt.dtype

        z, xBC, dt = torch.split(
            zxBCdt,
            [
                self.d_inner_local_tp,
                self.d_inner_local_tp + 2 * self.ngroups_local_tp * self.d_state,
                self.nheads_local_tp,
            ],
            dim=-1,
        )

        # Conv step
        if causal_conv1d_update is None:
            # TODO(ksanthanam): Consider deprecating this path
            assert seq_len == 1, "Native PyTorch fallback only supports 1 token at a time"
            xBC_squeeze = xBC.squeeze(1)
            conv_state.copy_(torch.roll(conv_state, shifts=-1, dims=-1))  # Update state (B D W)
            conv_state[:, :, -1] = xBC_squeeze
            xBC_squeeze = torch.sum(
                conv_state * rearrange(self.conv1d.weight, "d 1 w -> d w"), dim=-1
            )  # (B D)
            if self.conv1d.bias is not None:
                xBC_squeeze = xBC_squeeze + self.conv1d.bias
            xBC = self.act(xBC_squeeze).to(dtype=xBC.dtype).unsqueeze(1)
        else:
            # Conv state dtype might differ from params dtype, so cast xBC and weight / bias
            # tensors to the conv state dtype for causal_conv1d_update and then cast xBC
            # back to the original dtype
            xBC_dtype = xBC.dtype
            weight = rearrange(self.conv1d.weight, "d 1 w -> d w")
            xBC = causal_conv1d_update(
                xBC.to(conv_state.dtype),
                conv_state,
                weight.to(conv_state.dtype),
                self.conv1d.bias.to(conv_state.dtype),
                self.activation,
                conv_state_indices=batch_indices,
                intermediate_conv_states=intermediate_conv_state,
            ).to(xBC_dtype)

        x, B, C = torch.split(
            xBC,
            [
                self.d_inner_local_tp,
                self.ngroups_local_tp * self.d_state,
                self.ngroups_local_tp * self.d_state,
            ],
            dim=-1,
        )
        A = -torch.exp(self.A_log.float())

        # SSM step
        if selective_state_update is None:
            # TODO(ksanthanam): Consider deprecating this path
            assert seq_len == 1, "Native PyTorch fallback only supports 1 token at a time"

            x = x.squeeze(1)
            B = B.squeeze(1)
            C = C.squeeze(1)
            dt = dt.squeeze(1)
            if z is not None:
                z = z.squeeze(1)

            if self.ngroups_local_tp > 1:
                B = rearrange(B, "b (g n) -> b g n", n=self.d_state)
                C = rearrange(C, "b (g n) -> b g n", n=self.d_state)
                B = repeat(
                    B, "b g n -> b (g h) n", h=self.d_inner_local_tp // self.ngroups_local_tp
                )
                C = repeat(
                    C, "b g n -> b (g h) n", h=self.d_inner_local_tp // self.ngroups_local_tp
                )

                dt = repeat(dt, "b h -> b (h p)", p=self.headdim)
                dt_bias = repeat(self.dt_bias, "h -> (h p)", p=self.headdim)
                A = repeat(A, "h -> (h p) n", p=self.headdim, n=self.d_state)
                D = repeat(self.D, "h -> (h p)", p=self.headdim)

                dt = F.softplus(dt + dt_bias.to(dtype=dt.dtype))
                dA = torch.exp(torch.einsum("bd,dn->bdn", dt, A))

                dB_x = torch.einsum("bd,bdn,bd->bdn", dt, B, x)
                ssm_state.copy_(
                    ssm_state * rearrange(dA, "b (h p) n -> b h p n", p=self.headdim)
                    + rearrange(dB_x, "b (h p) n -> b h p n", p=self.headdim)
                )

                y = torch.einsum(
                    "bdn,bdn->bd",
                    rearrange(ssm_state.to(dtype), "b h p n -> b (h p) n", p=self.headdim),
                    C,
                )
                y = y + D.to(dtype) * x
                if not self.rmsnorm:
                    y = y * self.act(z)  # (B D)
            else:
                # Discretize A and B (b (g n))
                dt = F.softplus(dt + self.dt_bias.to(dtype=dt.dtype))  # (batch, nheads)
                dA = torch.exp(dt * A)
                x = rearrange(x, "b (h p) -> b h p", p=self.headdim)
                dBx = torch.einsum("bh,bn,bhp->bhpn", dt, B, x)
                ssm_state.copy_(ssm_state * rearrange(dA, "b h -> b h 1 1") + dBx)
                y = torch.einsum("bhpn,bn->bhp", ssm_state.to(dtype), C)
                y = y + rearrange(self.D.to(dtype), "h -> h 1") * x
                y = rearrange(y, "b h p -> b (h p)")
                if not self.rmsnorm:
                    y = y * self.act(z)  # (B D)

            y = y.unsqueeze(1)  # Restore seq dimension
        else:
            A = repeat(A, "h -> h p n", p=self.headdim, n=self.d_state).to(dtype=torch.float32)

            # Incorporate sequence dimension in einops rearrengements
            dt = repeat(dt, "b s h -> b s h p", p=self.headdim)
            dt_bias = repeat(self.dt_bias, "h -> h p", p=self.headdim)
            D = repeat(self.D, "h -> h p", p=self.headdim)
            B = rearrange(B, "b s (g n) -> b s g n", g=self.ngroups_local_tp)
            C = rearrange(C, "b s (g n) -> b s g n", g=self.ngroups_local_tp)
            x_reshaped = rearrange(x, "b s (h p) -> b s h p", p=self.headdim)
            if not self.rmsnorm:
                z = rearrange(z, "b s (h p) -> b s h p", p=self.headdim)

            # Upcast the batch_indices to prevent integer overflow errors in the case of
            # large max request counts.
            if batch_indices is not None:
                batch_indices = batch_indices.to(torch.int64)

            y = selective_state_update(
                ssm_state,
                x_reshaped,
                dt,
                A,
                B,
                C,
                D,
                z=z if not self.rmsnorm else None,
                dt_bias=dt_bias,
                dt_softplus=True,
                state_batch_indices=batch_indices,
                intermediate_ssm_states=intermediate_ssm_state,  # SSM only
            )
            y = rearrange(y, "b s h p -> b s (h p)")

        if self.rmsnorm:
            y = self.norm(y, z)

        return y

    def mamba_state_shapes_per_request(self) -> Tuple[Tuple[int], Tuple[int]]:
        """Returns the Mamba conv and ssm states shapes per request."""
        conv_states_shape = (self.conv1d.weight.shape[0], self.d_conv)
        ssm_states_shape = (self.nheads_local_tp, self.headdim, self.d_state)
        return (conv_states_shape, ssm_states_shape)

    def _get_states_from_cache(self, inference_context, batch_size, *, inference_params=None):
        """Initializes or retrieves the SSM state tensors from the cache.

        At the start of any inference (at the prefill step), if there is no cache or if the
        cached batch size has changed, then new tensors are initialized and stored in the cache.
        Otherwise the existing tensors are retrieved from the cache and zeroed out.
        """

        inference_context = deprecate_inference_params(inference_context, inference_params)

        assert inference_context is not None
        assert inference_context.is_static_batching()
        assert self.layer_number is not None

        if (
            self.layer_number not in inference_context.key_value_memory_dict
            or batch_size != self.cached_batch_size
        ):
            conv_state_shape, ssm_state_shape = self.mamba_state_shapes_per_request()
            conv_state = torch.zeros(
                batch_size,
                *conv_state_shape,
                device=self.conv1d.weight.device,
                dtype=self.conv1d.weight.dtype,
            )
            ssm_state = torch.zeros(
                batch_size,
                *ssm_state_shape,
                device=self.in_proj.weight.device,
                dtype=self.in_proj.weight.dtype,
            )
            inference_context.key_value_memory_dict[self.layer_number] = (conv_state, ssm_state)
            self.cached_batch_size = batch_size
        else:
            conv_state, ssm_state = inference_context.key_value_memory_dict[self.layer_number]
            if inference_context.sequence_len_offset == 0:
                conv_state.zero_()
                ssm_state.zero_()
        return conv_state, ssm_state

    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
        """Provide a sharded state dictionary for distributed checkpointing."""
        # Guard for cases metadata is not provided
        metadata = ensure_metadata_has_dp_cp_group(metadata)

        sharded_state_dict = {}
        # Parameters
        self._save_to_state_dict(sharded_state_dict, "", keep_vars=True)
        sharded_state_dict = make_sharded_tensors_for_checkpoint(
            sharded_state_dict,
            prefix,
            tensor_parallel_layers_axis_map={
                "A_log": 0,
                "dt_bias": 0,
                "D": 0,
            },  # parameters sharded across TP
            sharded_offsets=sharded_offsets,
        )
        # Submodules
        for name, module in self.named_children():
            if name == "conv1d":
                # Add TP sharding for Conv1d
                module_sd = module.state_dict(prefix="", keep_vars=True)
                module_sharded_sd = make_sharded_tensors_for_checkpoint(
                    module_sd,
                    f"{prefix}{name}.",
                    {"weight": 0, "bias": 0},
                    sharded_offsets,
                    tp_group=self.tp_group,
                    dp_cp_group=metadata['dp_cp_group'],
                )

            else:
                module_sharded_sd = sharded_state_dict_default(
                    module, f"{prefix}{name}.", sharded_offsets, metadata, tp_group=self.tp_group
                )

            sharded_state_dict.update(module_sharded_sd)

        # At this point the TP sharding is correctly defined for each tensor, but some of the
        # tensors must be additionally split into separate parts
        in_proj_dim = (
            self.d_inner_local_tp * 2
            + 2 * self.ngroups_local_tp * self.d_state
            + self.nheads_local_tp
        )
        assert sharded_state_dict[f"{prefix}in_proj.weight"].data.size(0) == in_proj_dim, (
            in_proj_dim,
            sharded_state_dict[f"{prefix}in_proj.weight"],
        )

        sharded_state_dict[f"{prefix}in_proj.weight"] = _split_tensor_factory(
            sharded_state_dict[f"{prefix}in_proj.weight"],
            [
                self.d_inner_local_tp,
                self.d_inner_local_tp,
                self.ngroups_local_tp * self.d_state,
                self.ngroups_local_tp * self.d_state,
                self.nheads_local_tp,
            ],
            ["z", "x", "B", "C", "dt"],
            0,
        )

        conv_dim = self.d_inner_local_tp + 2 * self.ngroups_local_tp * self.d_state
        assert sharded_state_dict[f"{prefix}conv1d.weight"].data.size(0) == conv_dim, (
            conv_dim,
            sharded_state_dict[f"{prefix}conv1d.weight"],
        )
        assert sharded_state_dict[f"{prefix}conv1d.bias"].data.size(0) == conv_dim, (
            conv_dim,
            sharded_state_dict[f"{prefix}conv1d.bias"],
        )

        for conv_layer_name in ["conv1d.weight", "conv1d.bias"]:
            sharded_state_dict[f"{prefix}{conv_layer_name}"] = _split_tensor_factory(
                sharded_state_dict[f"{prefix}{conv_layer_name}"],
                [
                    self.d_inner_local_tp,
                    self.ngroups_local_tp * self.d_state,
                    self.ngroups_local_tp * self.d_state,
                ],
                ["x", "B", "C"],
                0,
            )

        return sharded_state_dict


def _split_tensor_factory(
    orig_sh_ten: ShardedTensor, split_sections: List[int], split_names: List[str], split_dim: int
) -> ShardedTensorFactory:
    """Builds a factory that splits a given ShardedTensor into several independent chunks."""
    assert isinstance(orig_sh_ten, ShardedTensor), type(orig_sh_ten)
    orig_sh_ten_no_data = orig_sh_ten.without_data()  # remove `data` reference

    if sum(split_sections) != orig_sh_ten_no_data.local_shape[split_dim]:
        raise ValueError(
            f"Split sections must cover the whole dimension size, "
            f"got {split_sections=} vs dimensions size "
            f"{orig_sh_ten_no_data.local_shape[split_dim]}"
        )

    assert not isinstance(
        split_sections, int
    ), "Splitting into predefined section sizes is supported (`split_sections` must be a list)"
    assert len(split_sections) == len(split_names), (len(split_sections), len(split_names))

    @torch.no_grad()
    def sh_ten_build_fn(
        key: str, t: torch.Tensor, replica_id: ReplicaId, flattened_range: Optional[slice]
    ):
        factory_sh_ten = replace(
            orig_sh_ten_no_data,
            key=key,
            data=t,
            dtype=t.dtype,
            replica_id=replica_id,
            flattened_range=flattened_range,
        )

        chunk_sh_tens = []
        split_start = 0
        for split_size, split_name in zip(split_sections, split_names):
            split_chunks = factory_sh_ten.narrow(split_dim, split_start, split_size)
            for sh_ten in split_chunks:
                sh_ten.key = f"{sh_ten.key}.{split_name}"
            chunk_sh_tens.extend(split_chunks)
            split_start += split_size

        assert split_start == orig_sh_ten_no_data.local_shape[split_dim], (
            split_start,
            orig_sh_ten_no_data.local_shape[split_dim],
        )
        assert sum(sh_ten.data.numel() for sh_ten in chunk_sh_tens) == t.numel(), (
            chunk_sh_tens,
            t.shape,
        )
        return chunk_sh_tens

    @torch.no_grad()
    def sh_ten_merge_fn(sub_state_dict):
        return torch.cat(sub_state_dict)

    return ShardedTensorFactory(
        orig_sh_ten.key, orig_sh_ten.data, sh_ten_build_fn, sh_ten_merge_fn, orig_sh_ten.replica_id
    )


def _check_mamba_sequence_packing_support(
    for_inference_not_training: bool = True,
) -> Tuple[bool, Optional[str]]:
    """Checks whether `causal_conv1d` and `mamba_ssm` support sequence packing."""
    if for_inference_not_training:
        # https://github.com/Dao-AILab/causal-conv1d/commit/d87608f78f87d1288a7821d9e6ff4b10a8d5bf07
        conv1d_min = "1.5.3.post1"
        # https://github.com/state-spaces/mamba/commit/4f77d5306e19f5c7ae37665a44c3e61e24cafcb5
        mamba_min = "2.2.6.post3"
    else:
        conv1d_min = "1.4.0"
        mamba_min = "2.0.0"
    if not is_causal_conv1d_min_version(conv1d_min):
        return False, f"causal_conv1d >= {conv1d_min} is required"
    elif not is_mamba_min_version(mamba_min):
        return False, f"mamba_ssm >= {mamba_min} is required"
    return True, None


================================================
FILE: megatron/core/ssm/mlp_layer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from typing import Optional

from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.transformer import (
    TransformerConfig,
    TransformerLayer,
    TransformerLayerSubmodules,
)


class MLPLayer(TransformerLayer):
    """Drop-in replacement for TransformerLayer but initializes only an MLP via the spec."""

    def __init__(
        self,
        config: TransformerConfig,
        submodules: TransformerLayerSubmodules,
        layer_number: int = 1,
        hidden_dropout: float = None,
        pg_collection: Optional[ProcessGroupCollection] = None,
        add_layer_offset: bool = True,
    ):
        super().__init__(
            config=config,
            submodules=submodules,
            layer_number=layer_number,
            hidden_dropout=hidden_dropout,
            pg_collection=pg_collection,
            add_layer_offset=add_layer_offset,
        )


================================================
FILE: megatron/core/ssm/ops/__init__.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
# Triton kernels for Mamba SSM (adapted from vLLM / state-spaces/mamba).

try:
    from .ssd_combined import mamba_chunk_scan_combined_varlen
except ImportError:
    mamba_chunk_scan_combined_varlen = None

try:
    from .causal_conv1d_varlen import causal_conv1d_varlen_fn
except ImportError:
    causal_conv1d_varlen_fn = None

__all__ = ["mamba_chunk_scan_combined_varlen", "causal_conv1d_varlen_fn"]


================================================
FILE: megatron/core/ssm/ops/causal_conv1d_triton.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.

# Some of this code was adopted from https://github.com/Dao-AILab/causal-conv1d/
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.

import torch

try:
    import triton
    import triton.language as tl

    HAVE_TRITON = True
except ImportError:
    from unittest.mock import MagicMock

    from megatron.core.utils import null_decorator

    triton = MagicMock()
    triton.jit = null_decorator
    tl = MagicMock()
    HAVE_TRITON = False


@triton.jit
def causal_conv1d_update_kernel(
    x_ptr,
    x_b_stride,
    x_s_stride,
    x_c_stride,
    conv_state_ptr,
    conv_state_b_stride,
    conv_state_c_stride,
    conv_state_l_stride,
    int_state_ptr,
    int_state_b_stride,
    int_state_s_stride,
    int_state_c_stride,
    int_state_l_stride,
    weight_ptr,
    weight_c_stride,
    weight_width_stride,
    bias_ptr,
    bias_stride,
    out_ptr,
    out_b_stride,
    out_s_stride,
    out_c_stride,
    conv_state_indices_ptr,
    batch,
    seq_len,
    dim,
    state_len,
    WIDTH: tl.constexpr,
    BLOCK_DIM: tl.constexpr,
    HAS_BIAS: tl.constexpr,
    HAS_STATE_INDICES: tl.constexpr,
    HAS_INT_STATE: tl.constexpr,
    SILU_ACTIVATION: tl.constexpr,
):
    """Triton implementation of causal_conv1d_update (kernel)."""
    batch_id = tl.program_id(0)
    channel_block_id = tl.program_id(1)

    channel_offsets = channel_block_id * BLOCK_DIM + tl.arange(0, BLOCK_DIM)
    mask = channel_offsets < dim

    # State batch coordinate mapping
    if HAS_STATE_INDICES:
        state_batch_coord = tl.load(conv_state_indices_ptr + batch_id)
    else:
        state_batch_coord = batch_id

    # Base Pointers
    conv_state_ptrs = (
        conv_state_ptr
        + state_batch_coord * conv_state_b_stride
        + channel_offsets * conv_state_c_stride
    )
    weight_ptrs = weight_ptr + channel_offsets * weight_c_stride

    # Skip padding tokens (block-level uniform condition)
    if state_batch_coord < 0:
        for s in range(seq_len):
            out_ptrs = (
                out_ptr
                + batch_id * out_b_stride
                + s * out_s_stride
                + channel_offsets * out_c_stride
            )
            tl.store(out_ptrs, 0.0, mask=mask)
        return

    # Load Bias
    if HAS_BIAS:
        bias_val = tl.load(bias_ptr + channel_offsets * bias_stride, mask=mask).to(tl.float32)
    else:
        bias_val = tl.zeros([BLOCK_DIM], dtype=tl.float32)

    # Load Weights
    if WIDTH == 2:
        w0 = tl.load(weight_ptrs + 0 * weight_width_stride, mask=mask).to(tl.float32)
        w1 = tl.load(weight_ptrs + 1 * weight_width_stride, mask=mask).to(tl.float32)
    elif WIDTH == 3:
        w0 = tl.load(weight_ptrs + 0 * weight_width_stride, mask=mask).to(tl.float32)
        w1 = tl.load(weight_ptrs + 1 * weight_width_stride, mask=mask).to(tl.float32)
        w2 = tl.load(weight_ptrs + 2 * weight_width_stride, mask=mask).to(tl.float32)
    elif WIDTH == 4:
        w0 = tl.load(weight_ptrs + 0 * weight_width_stride, mask=mask).to(tl.float32)
        w1 = tl.load(weight_ptrs + 1 * weight_width_stride, mask=mask).to(tl.float32)
        w2 = tl.load(weight_ptrs + 2 * weight_width_stride, mask=mask).to(tl.float32)
        w3 = tl.load(weight_ptrs + 3 * weight_width_stride, mask=mask).to(tl.float32)

    # Initialize independent x_vals to match unrolled float array
    x_val_0 = tl.zeros([BLOCK_DIM], dtype=tl.float32)
    x_val_1 = tl.zeros([BLOCK_DIM], dtype=tl.float32)
    x_val_2 = tl.zeros([BLOCK_DIM], dtype=tl.float32)
    x_val_3 = tl.zeros([BLOCK_DIM], dtype=tl.float32)

    # Loop over the sequence dimension (e.g., speculative tokens)
    for s in range(seq_len):
        x_ptrs = x_ptr + batch_id * x_b_stride + s * x_s_stride + channel_offsets * x_c_stride
        out_ptrs = (
            out_ptr + batch_id * out_b_stride + s * out_s_stride + channel_offsets * out_c_stride
        )

        # Load the last (WIDTH - 1) elements to use them BEFORE they are overwritten
        # by the shift
        if WIDTH >= 2:
            x_val_0 = tl.load(
                conv_state_ptrs + (state_len - WIDTH + 1) * conv_state_l_stride, mask=mask
            ).to(tl.float32)
        if WIDTH >= 3:
            x_val_1 = tl.load(
                conv_state_ptrs + (state_len - WIDTH + 2) * conv_state_l_stride, mask=mask
            ).to(tl.float32)
        if WIDTH >= 4:
            x_val_2 = tl.load(
                conv_state_ptrs + (state_len - WIDTH + 3) * conv_state_l_stride, mask=mask
            ).to(tl.float32)

        # Shift the linear state buffer left by 1
        i = 0
        while i < state_len - 1:
            val = tl.load(conv_state_ptrs + (i + 1) * conv_state_l_stride, mask=mask)
            tl.store(conv_state_ptrs + i * conv_state_l_stride, val, mask=mask)
            i += 1

        # Process the single token for the current sequence step
        x_val = tl.load(x_ptrs, mask=mask)

        # Store the new token at the end of the linear state buffer
        tl.store(conv_state_ptrs + (state_len - 1) * conv_state_l_stride, x_val, mask=mask)

        # Write out to the intermediate state buffer if requested
        if HAS_INT_STATE:
            i = 0
            while i < state_len:
                val = tl.load(conv_state_ptrs + i * conv_state_l_stride, mask=mask)
                int_ptr = (
                    int_state_ptr
                    + state_batch_coord * int_state_b_stride
                    + s * int_state_s_stride
                    + channel_offsets * int_state_c_stride
                    + i * int_state_l_stride
                )
                tl.store(int_ptr, val, mask=mask)
                i += 1

        # Advance registers for calculation
        x_val_f32 = x_val.to(tl.float32)
        if WIDTH == 2:
            x_val_1 = x_val_f32
        elif WIDTH == 3:
            x_val_2 = x_val_f32
        elif WIDTH == 4:
            x_val_3 = x_val_f32

        # Compute output
        out_val = bias_val
        if WIDTH == 2:
            out_val += w0 * x_val_0 + w1 * x_val_1
        elif WIDTH == 3:
            out_val += w0 * x_val_0 + w1 * x_val_1 + w2 * x_val_2
        elif WIDTH == 4:
            out_val += w0 * x_val_0 + w1 * x_val_1 + w2 * x_val_2 + w3 * x_val_3

        if SILU_ACTIVATION:
            out_val = out_val * tl.sigmoid(out_val)

        tl.store(out_ptrs, out_val.to(out_ptrs.dtype.element_ty), mask=mask)


def causal_conv1d_update(
    x: torch.Tensor,
    conv_state: torch.Tensor,
    weight: torch.Tensor,
    bias: torch.Tensor | None,
    silu_activation: bool,
    conv_state_indices: torch.Tensor | None,
    intermediate_conv_states: torch.Tensor | None = None,
) -> torch.Tensor:
    """Triton implementation of causal_conv1d_update (entrypoint)."""

    # Check if input is 2D, temporarily treat as 3D for uniform processing
    is_2d = x.dim() == 2
    if is_2d:
        x = x.unsqueeze(1)

    batch, seq_len, dim = x.shape
    out = torch.empty_like(x)
    state_len = conv_state.shape[-1]
    width = weight.shape[-1]

    if bias is not None:
        bias_stride = bias.stride(0)
        has_bias = True
    else:
        bias = x  # Dummy pointer
        bias_stride = 0
        has_bias = False

    if conv_state_indices is not None:
        has_state_indices = True
    else:
        conv_state_indices = x  # Dummy pointer
        has_state_indices = False

    # Extract intermediate state strides if provided
    if intermediate_conv_states is not None:
        has_int_state = True
        int_state_ptr = intermediate_conv_states
        int_state_b_stride = intermediate_conv_states.stride(0)
        int_state_s_stride = intermediate_conv_states.stride(1)
        int_state_c_stride = intermediate_conv_states.stride(2)
        int_state_l_stride = intermediate_conv_states.stride(3)
    else:
        has_int_state = False
        int_state_ptr = x  # Dummy pointer
        int_state_b_stride = 0
        int_state_s_stride = 0
        int_state_c_stride = 0
        int_state_l_stride = 0

    BLOCK_DIM = 64
    grid = (batch, triton.cdiv(dim, BLOCK_DIM))

    causal_conv1d_update_kernel[grid](
        x,
        x.stride(0),
        x.stride(1),
        x.stride(2),
        conv_state,
        conv_state.stride(0),
        conv_state.stride(1),
        conv_state.stride(2),
        int_state_ptr,
        int_state_b_stride,
        int_state_s_stride,
        int_state_c_stride,
        int_state_l_stride,
        weight,
        weight.stride(0),
        weight.stride(1),
        bias,
        bias_stride,
        out,
        out.stride(0),
        out.stride(1),
        out.stride(2),
        conv_state_indices,
        batch,
        seq_len,
        dim,
        state_len,
        WIDTH=width,
        BLOCK_DIM=BLOCK_DIM,
        HAS_BIAS=has_bias,
        HAS_STATE_INDICES=has_state_indices,
        HAS_INT_STATE=has_int_state,
        SILU_ACTIVATION=silu_activation == "silu",
    )

    if is_2d:
        out = out.squeeze(1)

    return out


================================================
FILE: megatron/core/ssm/ops/causal_conv1d_varlen.py
================================================
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

"""Triton varlen depthwise causal 1D convolution with per-sequence initial states and fused SiLU.

Supports packed variable-length sequences where `causal_conv1d_fn` cannot accept
both `seq_idx` and `initial_states` simultaneously.
"""

import torch
import triton
import triton.language as tl

from megatron.core.ssm.ops.determinism import autotune_configs


@triton.autotune(
    configs=autotune_configs(
        [
            triton.Config({"BLOCK_T": 128, "BLOCK_C": 64}, num_warps=4),
            triton.Config({"BLOCK_T": 128, "BLOCK_C": 128}, num_warps=4),
            triton.Config({"BLOCK_T": 256, "BLOCK_C": 64}, num_warps=4),
            triton.Config({"BLOCK_T": 256, "BLOCK_C": 128}, num_warps=8),
        ]
    ),
    key=["conv_dim"],
)
@triton.jit
def _causal_conv1d_varlen_kernel(
    x_ptr,
    weight_ptr,
    bias_ptr,
    seq_idx_ptr,
    seq_start_ptr,
    initial_states_ptr,
    out_ptr,
    total_tokens,
    conv_dim: tl.constexpr,
    initial_states_stride_req,
    initial_states_stride_dim,
    WIDTH: tl.constexpr,
    BLOCK_T: tl.constexpr,
    BLOCK_C: tl.constexpr,
    HAS_INITIAL_STATES: tl.constexpr,
):
    """Depthwise causal conv1d over packed varlen sequences with initial states and SiLU.

    Fully vectorized over BLOCK_T tokens x BLOCK_C channels per thread block.
    """
    pid_c = tl.program_id(0)
    pid_t = tl.program_id(1)

    c_off = pid_c * BLOCK_C + tl.arange(0, BLOCK_C)  # (BLOCK_C,)
    c_mask = c_off < conv_dim
    t_off = pid_t * BLOCK_T + tl.arange(0, BLOCK_T)  # (BLOCK_T,)
    t_mask = t_off < total_tokens

    # Load bias: (BLOCK_C,) broadcast to (BLOCK_T, BLOCK_C)
    bias = tl.load(bias_ptr + c_off, mask=c_mask, other=0.0).to(tl.float32)
    acc = tl.zeros((BLOCK_T, BLOCK_C), dtype=tl.float32) + bias[None, :]

    # Load per-token request ID and request start position
    req_id = tl.load(seq_idx_ptr + t_off, mask=t_mask, other=0)  # (BLOCK_T,)
    req_start = tl.load(seq_start_ptr + t_off, mask=t_mask, other=0)  # (BLOCK_T,)

    # Unrolled convolution over WIDTH taps (typically 4)
    for j in tl.static_range(WIDTH):
        # Load weight column j: (BLOCK_C,)
        w_j = tl.load(weight_ptr + c_off * WIDTH + j, mask=c_mask, other=0.0).to(tl.float32)

        # Source position for this tap
        src = t_off - (WIDTH - 1) + j  # (BLOCK_T,)
        in_seq = src >= req_start  # (BLOCK_T,) — True if source is within the sequence

        # Load from x for in-sequence positions (mask out out-of-bounds)
        src_safe = tl.maximum(src, 0)
        x_val = tl.load(
            x_ptr + src_safe[:, None] * conv_dim + c_off[None, :],
            mask=t_mask[:, None] & c_mask[None, :] & in_seq[:, None],
            other=0.0,
        ).to(
            tl.float32
        )  # (BLOCK_T, BLOCK_C)

        if HAS_INITIAL_STATES:
            # For tokens where src < req_start, load from initial_states
            state_col = (WIDTH - 1) - (req_start - src)  # (BLOCK_T,)
            valid_state = (~in_seq) & (state_col >= 0)  # (BLOCK_T,)
            state_col_safe = tl.maximum(state_col, 0)

            state_val = tl.load(
                initial_states_ptr
                + req_id[:, None] * initial_states_stride_req
                + c_off[None, :] * initial_states_stride_dim
                + state_col_safe[:, None],
                mask=t_mask[:, None] & c_mask[None, :] & valid_state[:, None],
                other=0.0,
            ).to(
                tl.float32
            )  # (BLOCK_T, BLOCK_C)

            tap = tl.where(in_seq[:, None], x_val, state_val)
        else:
            tap = x_val

        acc += tap * w_j[None, :]

    # SiLU activation: x * sigmoid(x)
    sigmoid_acc = 1.0 / (1.0 + tl.exp(-acc))
    result = acc * sigmoid_acc

    # Store output (cast back to input dtype)
    tl.store(
        out_ptr + t_off[:, None] * conv_dim + c_off[None, :],
        result,
        mask=t_mask[:, None] & c_mask[None, :],
    )


def causal_conv1d_varlen_fn(
    x: torch.Tensor,
    weight: torch.Tensor,
    bias: torch.Tensor,
    cu_seqlens: torch.Tensor,
    initial_states: torch.Tensor = None,
    activation: str = "silu",
    precomputed_seq_idx: torch.Tensor = None,
    precomputed_seq_start: torch.Tensor = None,
) -> torch.Tensor:
    """Depthwise causal 1D convolution over packed variable-length sequences.

    Supports both `cu_seqlens` (sequence boundaries) and `initial_states`
    simultaneously, unlike `causal_conv1d_fn` which requires mutual exclusivity
    between `seq_idx` and `initial_states`.

    Args:
        x: Input tensor of shape (total_tokens, conv_dim), channels-last packed.
        weight: Convolution weights of shape (conv_dim, d_conv).
        bias: Bias of shape (conv_dim,).
        cu_seqlens: Cumulative sequence lengths of shape (num_requests + 1,), int32.
        initial_states: Per-request initial conv states of shape
            (num_requests, conv_dim, d_conv - 1). If None, uses zeros.
        activation: Activation function, must be "silu".
        precomputed_seq_idx: Precomputed per-token request ID of shape
            (total_tokens,). If provided, skips repeat_interleave (CUDA
            graph compatible). Padding tokens should use 0 as sentinel.
        precomputed_seq_start: Precomputed per-token request start position
            of shape (total_tokens,). Must be provided together with
            precomputed_seq_idx.

    Returns:
        Output tensor of shape (total_tokens, conv_dim).
    """
    assert activation == "silu", f"Only silu activation is supported, got {activation}"
    assert x.is_contiguous(), "x must be contiguous"
    assert weight.is_contiguous(), "weight must be contiguous"

    total_tokens, conv_dim = x.shape
    d_conv = weight.shape[1]
    num_requests = cu_seqlens.shape[0] - 1

    out = torch.empty_like(x)

    # Use precomputed per-token metadata if provided (CUDA graph compatible),
    # otherwise compute from cu_seqlens via repeat_interleave.
    if precomputed_seq_idx is not None:
        assert precomputed_seq_start is not None
        seq_idx = precomputed_seq_idx
        seq_start = precomputed_seq_start
    else:
        seq_lengths = cu_seqlens[1:] - cu_seqlens[:-1]
        seq_idx = torch.repeat_interleave(
            torch.arange(num_requests, device=x.device, dtype=torch.int32), seq_lengths
        )
        seq_start = torch.repeat_interleave(cu_seqlens[:-1], seq_lengths).to(torch.int32)

    has_initial_states = initial_states is not None
    if not has_initial_states:
        initial_states = torch.empty((1, 1, 1), dtype=x.dtype, device=x.device)
        is_stride_req = 1
        is_stride_dim = 1
    else:
        if precomputed_seq_idx is None:
            assert initial_states.shape == (num_requests, conv_dim, d_conv - 1)
        is_stride_req = initial_states.stride(0)
        is_stride_dim = initial_states.stride(1)

    grid = lambda meta: (
        triton.cdiv(conv_dim, meta["BLOCK_C"]),
        triton.cdiv(total_tokens, meta["BLOCK_T"]),
    )

    _causal_conv1d_varlen_kernel[grid](
        x,
        weight,
        bias,
        seq_idx,
        seq_start,
        initial_states,
        out,
        total_tokens,
        conv_dim,
        is_stride_req,
        is_stride_dim,
        WIDTH=d_conv,
        HAS_INITIAL_STATES=has_initial_states,
    )

    return out


def _causal_conv1d_varlen_simple(
    x: torch.Tensor,
    weight: torch.Tensor,
    bias: torch.Tensor,
    cu_seqlens: torch.Tensor,
    initial_states: torch.Tensor,
    out: torch.Tensor,
) -> None:
    """Simple PyTorch implementation of varlen causal conv1d with initial states and SiLU.

    This is a reference implementation for testing. Processes each request and token
    sequentially.
    """
    total_tokens, conv_dim = x.shape
    d_conv = weight.shape[1]
    num_requests = cu_seqlens.shape[0] - 1

    for r in range(num_requests):
        start = cu_seqlens[r].item()
        end = cu_seqlens[r + 1].item()
        seq_len = end - start

        if seq_len == 0:
            continue

        if initial_states is not None:
            init_state = initial_states[r]  # (conv_dim, d_conv - 1)
        else:
            init_state = torch.zeros((conv_dim, d_conv - 1), dtype=x.dtype, device=x.device)

        x_seq = x[start:end]  # (seq_len, conv_dim)

        for t in range(seq_len):
            acc = bias.float()  # (conv_dim,)
            for j in range(d_conv):
                src_pos = t - (d_conv - 1) + j
                if src_pos < 0:
                    state_col = (d_conv - 1) + src_pos
                    if state_col >= 0 and state_col < d_conv - 1:
                        tap = init_state[:, state_col].float()
                    else:
                        tap = torch.zeros(conv_dim, dtype=torch.float32, device=x.device)
                else:
                    tap = x_seq[src_pos].float()

                acc = acc + tap * weight[:, j].float()

            result = acc * torch.sigmoid(acc)
            out[start + t] = result.to(out.dtype)


================================================
FILE: megatron/core/ssm/ops/determinism.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2024, Tri Dao, Albert Gu.

# Some of this code was adopted from https://github.com/state-spaces/mamba/
# This source code is licensed under the Apache license found in the
# LICENSE file in the root directory of this source tree.

import os
import warnings

import torch
from packaging import version

try:
    import triton

    TRITON_VERSION = version.parse(triton.__version__)
except ImportError:
    TRITON_VERSION = version.parse("0.0.0")

TRITON_HAS_CACHE_RESULTS = TRITON_VERSION >= version.parse("3.4.0")
_autotune_warning_issued = False

_deterministic_override = None


def use_deterministic_mode():
    """Use torch deterministic mode."""
    if _deterministic_override is not None:
        return _deterministic_override
    env = os.environ.get('MAMBA_DETERMINISTIC')
    if env:
        return env[0] == '1'
    return torch.are_deterministic_algorithms_enabled()


def set_deterministic_mode(value):
    """Set torch deterministic mode."""
    global _deterministic_override
    _deterministic_override = value


def _estimate_config_cost(cfg):
    """Estimate shared memory cost of a config. Lower is cheaper.

    Returns a tuple (block_cost, num_warps) so that ties in block cost
    are broken deterministically by warp count (fewer warps = cheaper).
    """
    block_product = 1
    for key, val in cfg.kwargs.items():
        if key.startswith('BLOCK') and isinstance(val, int):
            block_product *= val
    stages = getattr(cfg, 'num_stages', 1) or 1
    warps = getattr(cfg, 'num_warps', 1) or 1
    return (block_product * stages, warps)


def _filter_configs_by_block_sizes(configs):
    """Filter configs by TRITON_AUTOTUNE_BLOCK_* env vars.

    Scans environment for any variable matching TRITON_AUTOTUNE_BLOCK_*
    (e.g. TRITON_AUTOTUNE_BLOCK_SIZE_M, TRITON_AUTOTUNE_BLOCK_SIZE_H,
    TRITON_AUTOTUNE_BLOCK_T, TRITON_AUTOTUNE_BLOCK_C, TRITON_AUTOTUNE_BLOCK_SIZE)
    and maps them to the corresponding kernel kwarg (BLOCK_SIZE_M, BLOCK_SIZE_H,
    BLOCK_T, BLOCK_C, BLOCK_SIZE).
    """
    prefix = "TRITON_AUTOTUNE_"
    env_filters = {}
    for env_key, env_val in os.environ.items():
        if env_key.startswith(prefix + "BLOCK") and env_val:
            kwarg_name = env_key[len(prefix) :]
            env_filters[kwarg_name] = int(env_val)
    if not env_filters:
        return None
    matching = configs
    for key, target in sorted(env_filters.items()):
        matching = [c for c in matching if c.kwargs.get(key) == target]
    return matching[:1] if matching else None


def autotune_configs(configs):
    """Select autotune configs for deterministic mode.

    Uses cached autotuning (TRITON_CACHE_AUTOTUNING=1) if Triton >= 3.4.0,
    otherwise auto-selects the cheapest config by block size * stages.
    """
    if not configs or not use_deterministic_mode():
        return configs
    if TRITON_HAS_CACHE_RESULTS and os.environ.get("TRITON_CACHE_AUTOTUNING") == "1":
        return configs
    global _autotune_warning_issued
    if not _autotune_warning_issued:
        _autotune_warning_issued = True
        msg = (
            "Deterministic mode: set TRITON_CACHE_AUTOTUNING=1 for cached autotuning."
            if TRITON_HAS_CACHE_RESULTS
            else "Deterministic mode: upgrade to Triton >= 3.4.0 for cached autotuning."
        )
        warnings.warn(msg)
    filtered = _filter_configs_by_block_sizes(configs)
    if filtered:
        return filtered
    return [min(configs, key=_estimate_config_cost)]


def alloc_tile_workspace(base_shape, tile_dim, dtype, device, deterministic, *, zero_init=True):
    """Allocate buffer for deterministic per-program reductions."""
    if base_shape is None:
        return None, 0
    if deterministic:
        factory = torch.zeros if zero_init else torch.empty
        tensor = factory(*base_shape, tile_dim, device=device, dtype=dtype)
        return tensor, tensor.stride(-1)
    return torch.empty(*base_shape, device=device, dtype=dtype), 0


def finalize_tile_workspace(tensor, deterministic):
    """Finalize tile workspace."""
    if tensor is None:
        return None
    if deterministic:
        tensor = tensor.sum(dim=-1)
    return tensor


================================================
FILE: megatron/core/ssm/ops/mamba_ssm.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2024, Tri Dao, Albert Gu.

# Some of this code was adopted from https://github.com/state-spaces/mamba/
# This source code is licensed under the Apache license found in the
# LICENSE file in the root directory of this source tree.

import torch
from packaging import version

try:
    import triton
    import triton.language as tl

    TRITON3 = version.parse(triton.__version__) >= version.parse("3.0.0")

    HAVE_TRITON = True
except ImportError:
    from unittest.mock import MagicMock

    from megatron.core.utils import null_decorator

    triton = MagicMock()
    triton.jit = null_decorator
    tl = MagicMock()
    HAVE_TRITON = False
    TRITON3 = False

if TRITON3:

    @triton.jit
    def softplus(dt):
        """Optimized softplus."""
        return tl.math.log(tl.math.exp(dt) + 1)

elif HAVE_TRITON:

    @triton.jit
    def softplus(dt):
        """Optimized softplus."""
        return tl.math.log1p(tl.exp(dt))


@triton.heuristics({"HAS_DT_BIAS": lambda args: args["dt_bias_ptr"] is not None})
@triton.heuristics({"HAS_D": lambda args: args["D_ptr"] is not None})
@triton.heuristics({"HAS_Z": lambda args: args["z_ptr"] is not None})
@triton.heuristics(
    {"HAS_STATE_BATCH_INDICES": lambda args: args["state_batch_indices_ptr"] is not None}
)
@triton.heuristics({"HAS_INT_STATE": lambda args: args["int_state_ptr"] is not None})
@triton.heuristics({"BLOCK_SIZE_DSTATE": lambda args: triton.next_power_of_2(args["dstate"])})
@triton.jit
def _selective_scan_update_kernel(
    # Pointers to matrices
    state_ptr,
    x_ptr,
    dt_ptr,
    dt_bias_ptr,
    A_ptr,
    B_ptr,
    C_ptr,
    D_ptr,
    z_ptr,
    out_ptr,
    state_batch_indices_ptr,
    int_state_ptr,
    # Matrix dimensions
    batch,
    seq_len,
    nheads,
    dim,
    dstate,
    nheads_ngroups_ratio,
    # Strides
    stride_state_batch,
    stride_state_head,
    stride_state_dim,
    stride_state_dstate,
    stride_x_batch,
    stride_x_seq,
    stride_x_head,
    stride_x_dim,
    stride_dt_batch,
    stride_dt_seq,
    stride_dt_head,
    stride_dt_dim,
    stride_dt_bias_head,
    stride_dt_bias_dim,
    stride_A_head,
    stride_A_dim,
    stride_A_dstate,
    stride_B_batch,
    stride_B_seq,
    stride_B_group,
    stride_B_dstate,
    stride_C_batch,
    stride_C_seq,
    stride_C_group,
    stride_C_dstate,
    stride_D_head,
    stride_D_dim,
    stride_z_batch,
    stride_z_seq,
    stride_z_head,
    stride_z_dim,
    stride_out_batch,
    stride_out_seq,
    stride_out_head,
    stride_out_dim,
    stride_int_batch,
    stride_int_seq,
    stride_int_head,
    stride_int_dim,
    stride_int_dstate,
    # Meta-parameters
    DT_SOFTPLUS: tl.constexpr,
    TIE_HDIM: tl.constexpr,
    BLOCK_SIZE_M: tl.constexpr,
    HAS_DT_BIAS: tl.constexpr,
    HAS_D: tl.constexpr,
    HAS_Z: tl.constexpr,
    HAS_STATE_BATCH_INDICES: tl.constexpr,
    HAS_INT_STATE: tl.constexpr,
    BLOCK_SIZE_DSTATE: tl.constexpr,
):
    pid_m = tl.program_id(axis=0)
    pid_b = tl.program_id(axis=1)
    pid_h = tl.program_id(axis=2)

    offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
    offs_n = tl.arange(0, BLOCK_SIZE_DSTATE)

    out_ptr += pid_b * stride_out_batch + pid_h * stride_out_head
    out_ptrs = out_ptr + offs_m * stride_out_dim

    # 1. State Mapping (handles dynamic batching slot allocation)
    if HAS_STATE_BATCH_INDICES:
        state_batch_indices_ptr += pid_b
        state_batch_idx = tl.load(state_batch_indices_ptr)
        # Skip padding tokens (e.g. from graph capture or inactive slots)
        if state_batch_idx < 0:
            for s in range(seq_len):
                out_s_ptrs = out_ptrs + s * stride_out_seq
                tl.store(out_s_ptrs, 0.0, mask=offs_m < dim)
            return
        state_ptr += state_batch_idx * stride_state_batch + pid_h * stride_state_head
        if HAS_INT_STATE:
            int_state_ptr += state_batch_idx * stride_int_batch + pid_h * stride_int_head
    else:
        state_ptr += pid_b * stride_state_batch + pid_h * stride_state_head
        if HAS_INT_STATE:
            int_state_ptr += pid_b * stride_int_batch + pid_h * stride_int_head

    # Base Pointers for Sequence iteration
    x_ptr += pid_b * stride_x_batch + pid_h * stride_x_head
    dt_ptr += pid_b * stride_dt_batch + pid_h * stride_dt_head
    if HAS_DT_BIAS:
        dt_bias_ptr += pid_h * stride_dt_bias_head

    A_ptr += pid_h * stride_A_head
    B_ptr += pid_b * stride_B_batch + (pid_h // nheads_ngroups_ratio) * stride_B_group
    C_ptr += pid_b * stride_C_batch + (pid_h // nheads_ngroups_ratio) * stride_C_group
    if HAS_Z:
        z_ptr += pid_b * stride_z_batch + pid_h * stride_z_head

    # Constant offsets (A, D, and bias do not have a sequence dimension)
    state_ptrs = state_ptr + (
        offs_m[:, None] * stride_state_dim + offs_n[None, :] * stride_state_dstate
    )
    if HAS_INT_STATE:
        int_state_ptrs = int_state_ptr + (
            offs_m[:, None] * stride_int_dim + offs_n[None, :] * stride_int_dstate
        )

    x_ptrs = x_ptr + offs_m * stride_x_dim
    dt_ptrs = dt_ptr + offs_m * stride_dt_dim

    if HAS_DT_BIAS:
        dt_bias_ptrs = dt_bias_ptr + offs_m * stride_dt_bias_dim
    if HAS_D:
        D_ptr += pid_h * stride_D_head
        D_ptrs = D_ptr + offs_m * stride_D_dim

    A_ptrs = A_ptr + (offs_m[:, None] * stride_A_dim + offs_n[None, :] * stride_A_dstate)
    B_ptrs = B_ptr + offs_n * stride_B_dstate
    C_ptrs = C_ptr + offs_n * stride_C_dstate

    if HAS_Z:
        z_ptrs = z_ptr + offs_m * stride_z_dim

    # Load initial historical state and constant parameters
    state = tl.load(
        state_ptrs, mask=(offs_m[:, None] < dim) & (offs_n[None, :] < dstate), other=0.0
    ).to(tl.float32)

    if not TIE_HDIM:
        A = tl.load(
            A_ptrs, mask=(offs_m[:, None] < dim) & (offs_n[None, :] < dstate), other=0.0
        ).to(tl.float32)
    else:
        A = tl.load(A_ptr).to(tl.float32)

    if HAS_D:
        D = tl.load(D_ptrs, mask=offs_m < dim, other=0.0).to(tl.float32)

    # ----------------------------------------------------
    # Sequence Loop (Processes Main Token + Speculative Drafts)
    # ----------------------------------------------------
    for s in range(seq_len):
        x_s_ptrs = x_ptrs + s * stride_x_seq
        dt_s_ptrs = dt_ptrs + s * stride_dt_seq
        B_s_ptrs = B_ptrs + s * stride_B_seq
        C_s_ptrs = C_ptrs + s * stride_C_seq
        if HAS_Z:
            z_s_ptrs = z_ptrs + s * stride_z_seq

        x = tl.load(x_s_ptrs, mask=offs_m < dim, other=0.0).to(tl.float32)

        # Calculate dt and dA
        if not TIE_HDIM:
            dt = tl.load(dt_s_ptrs, mask=offs_m < dim, other=0.0).to(tl.float32)
            if HAS_DT_BIAS:
                dt += tl.load(dt_bias_ptrs, mask=offs_m < dim, other=0.0).to(tl.float32)
            if DT_SOFTPLUS:
                dt = tl.where(dt <= 20.0, softplus(dt), dt)
            dA = tl.exp(A * dt[:, None])
        else:
            dt = tl.load(dt_ptr + s * stride_dt_seq).to(tl.float32)
            if HAS_DT_BIAS:
                dt += tl.load(dt_bias_ptr).to(tl.float32)
            if DT_SOFTPLUS:
                dt = tl.where(dt <= 20.0, softplus(dt), dt)
            dA = tl.exp(A * dt)

        # Load B and C
        B = tl.load(B_s_ptrs, mask=offs_n < dstate, other=0.0).to(tl.float32)
        C = tl.load(C_s_ptrs, mask=offs_n < dstate, other=0.0).to(tl.float32)
        if HAS_Z:
            z = tl.load(z_s_ptrs, mask=offs_m < dim, other=0.0).to(tl.float32)

        if not TIE_HDIM:
            dB = B[None, :] * dt[:, None]
        else:
            dB = B * dt

        # ----------------------------------------------------
        # The Core State Recurrence (h_t = dA * h_{t-1} + dB * x_t)
        # ----------------------------------------------------
        state = state * dA + dB * x[:, None]

        # ----------------------------------------------------
        # Dump Intermediate Speculative State Snapshot
        # ----------------------------------------------------
        if HAS_INT_STATE:
            int_state_s_ptrs = int_state_ptrs + s * stride_int_seq
            tl.store(
                int_state_s_ptrs, state, mask=(offs_m[:, None] < dim) & (offs_n[None, :] < dstate)
            )

        # Calculate Output
        out = tl.sum(state * C[None, :], axis=1)
        if HAS_D:
            out += x * D
        if HAS_Z:
            out *= z * tl.sigmoid(z)

        out_s_ptrs = out_ptrs + s * stride_out_seq
        tl.store(out_s_ptrs, out, mask=offs_m < dim)

    # After processing all sequence steps, persist the final state back to HBM
    tl.store(state_ptrs, state, mask=(offs_m[:, None] < dim) & (offs_n[None, :] < dstate))


def selective_state_update(
    state,
    x,
    dt,
    A,
    B,
    C,
    D=None,
    z=None,
    dt_bias=None,
    dt_softplus=False,
    state_batch_indices=None,
    intermediate_ssm_states=None,
):
    """
    Argument:
        state: (batch, dim, dstate) or (batch, nheads, dim, dstate)
        x: (batch, dim), (batch, seqlen, dim), (batch, nheads, dim) or (batch, seqlen, nheads, dim)
        dt: Matches x
        A: (dim, dstate) or (nheads, dim, dstate)
        B: (batch, dstate), (batch, seqlen, dstate), (batch, ngroups, dstate) or
            (batch, seqlen, ngroups, dstate)
        C: Matches B
        D: (dim,) or (nheads, dim)
        z: Matches x
        dt_bias: (dim,) or (nheads, dim)
        intermediate_ssm_states: Optional buffer of shape (batch, seqlen, nheads, dim, dstate)
                                 or (batch, seqlen, dim, dstate)
    Return:
        out: shape matches x
    """
    has_heads = state.dim() > 3
    if not has_heads:
        state = state.unsqueeze(1)

    # Standardize inputs to explicit sequence and head dimensions: (batch, seq_len, nheads, dim)
    is_seq_unsq = False
    if has_heads:
        if x.dim() == 3:  # (batch, nheads, dim) -> (batch, 1, nheads, dim)
            x = x.unsqueeze(1)
            dt = dt.unsqueeze(1)
            B = B.unsqueeze(1)
            C = C.unsqueeze(1)
            if z is not None:
                z = z.unsqueeze(1)
            is_seq_unsq = True
    else:
        if x.dim() == 2:  # (batch, dim) -> (batch, 1, 1, dim)
            x = x.unsqueeze(1).unsqueeze(2)
            dt = dt.unsqueeze(1).unsqueeze(2)
            B = B.unsqueeze(1).unsqueeze(2)
            C = C.unsqueeze(1).unsqueeze(2)
            if z is not None:
                z = z.unsqueeze(1).unsqueeze(2)
            is_seq_unsq = True
        elif x.dim() == 3:  # (batch, seqlen, dim) -> (batch, seqlen, 1, dim)
            x = x.unsqueeze(2)
            dt = dt.unsqueeze(2)
            B = B.unsqueeze(2)
            C = C.unsqueeze(2)
            if z is not None:
                z = z.unsqueeze(2)

    if A.dim() == 2:
        A = A.unsqueeze(0)
    if D is not None and D.dim() == 1:
        D = D.unsqueeze(0)
    if dt_bias is not None and dt_bias.dim() == 1:
        dt_bias = dt_bias.unsqueeze(0)

    # Set up Intermediate State standardization
    if intermediate_ssm_states is not None:
        if not has_heads and intermediate_ssm_states.dim() == 4:
            intermediate_ssm_states = intermediate_ssm_states.unsqueeze(
                2
            )  # (batch, seqlen, 1, dim, dstate)
        int_state_strides = (
            intermediate_ssm_states.stride(0),
            intermediate_ssm_states.stride(1),
            intermediate_ssm_states.stride(2),
            intermediate_ssm_states.stride(3),
            intermediate_ssm_states.stride(4),
        )
    else:
        intermediate_ssm_states = x  # Dummy pointer
        int_state_strides = (0, 0, 0, 0, 0)

    batch, seq_len, nheads, dim = x.shape
    dstate = state.shape[-1]
    ngroups = B.shape[-2]

    out = torch.empty_like(x)
    grid = lambda META: (triton.cdiv(dim, META['BLOCK_SIZE_M']), batch, nheads)
    z_strides = (
        (z.stride(0), z.stride(1), z.stride(2), z.stride(3)) if z is not None else (0, 0, 0, 0)
    )

    BLOCK_SIZE_M, num_warps = (
        (32, 4)
        if dstate <= 16
        else (
            (16, 4)
            if dstate <= 32
            else ((8, 4) if dstate <= 64 else ((4, 4) if dstate <= 128 else ((4, 8))))
        )
    )

    tie_hdim = (
        A.stride(-1) == 0
        and A.stride(-2) == 0
        and dt.stride(-1) == 0
        and (dt_bias is None or dt_bias.stride(-1) == 0)
    )

    with torch.cuda.device(x.device.index):
        _selective_scan_update_kernel[grid](
            state,
            x,
            dt,
            dt_bias,
            A,
            B,
            C,
            D,
            z,
            out,
            state_batch_indices,
            intermediate_ssm_states,
            batch,
            seq_len,
            nheads,
            dim,
            dstate,
            nheads // ngroups,
            state.stride(0),
            state.stride(1),
            state.stride(2),
            state.stride(3),
            x.stride(0),
            x.stride(1),
            x.stride(2),
            x.stride(3),
            dt.stride(0),
            dt.stride(1),
            dt.stride(2),
            dt.stride(3),
            *(dt_bias.stride(0), dt_bias.stride(1)) if dt_bias is not None else (0, 0),
            A.stride(0),
            A.stride(1),
            A.stride(2),
            B.stride(0),
            B.stride(1),
            B.stride(2),
            B.stride(3),
            C.stride(0),
            C.stride(1),
            C.stride(2),
            C.stride(3),
            *(D.stride(0), D.stride(1)) if D is not None else (0, 0),
            z_strides[0],
            z_strides[1],
            z_strides[2],
            z_strides[3],
            out.stride(0),
            out.stride(1),
            out.stride(2),
            out.stride(3),
            *int_state_strides,
            dt_softplus,
            tie_hdim,
            BLOCK_SIZE_M,
            num_warps=num_warps,
        )

    # Revert dimensions back to match original x format
    if not has_heads:
        out = out.squeeze(2)
    if is_seq_unsq:
        out = out.squeeze(1)

    return out


================================================
FILE: megatron/core/ssm/ops/ssd_bmm.py
================================================
# Copyright (c) 2024, Tri Dao, Albert Gu.
# Adapted from https://github.com/state-spaces/mamba/blob/v2.2.4/mamba_ssm/ops/triton/ssd_bmm.py
# Adapted from vLLM project (Apache-2.0).

import torch
import triton
import triton.language as tl

from megatron.core.ssm.ops.determinism import autotune_configs


@triton.autotune(
    configs=autotune_configs(
        [
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64},
                num_stages=3,
                num_warps=8,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32},
                num_stages=5,
                num_warps=2,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32},
                num_stages=5,
                num_warps=2,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=2,
            ),
        ]
    ),
    key=["chunk_size", "K", "IS_CAUSAL"],
)
@triton.jit
def _bmm_chunk_fwd_kernel(
    # Pointers to matrices
    a_ptr,
    b_ptr,
    out_ptr,
    cu_chunk_seqlens_ptr,
    # Matrix dimensions
    seqlen,
    chunk_size: tl.constexpr,
    K: tl.constexpr,
    ngroups: tl.constexpr,
    stride_a_seqlen: tl.int64,
    stride_a_head: tl.int64,
    stride_ak: tl.constexpr,
    stride_b_seqlen: tl.int64,
    stride_b_head: tl.int64,
    stride_bk: tl.constexpr,
    stride_out_chunk: tl.int64,
    stride_out_head: tl.int64,
    stride_outm: tl.int64,
    stride_outn: tl.constexpr,
    # Meta-parameters
    IS_CAUSAL: tl.constexpr,
    dot_dtype: tl.constexpr,
    BLOCK_SIZE_M: tl.constexpr,
    BLOCK_SIZE_N: tl.constexpr,
    BLOCK_SIZE_K: tl.constexpr,
):
    pid_ch = tl.program_id(axis=1).to(tl.int64)
    pid_c = pid_ch // ngroups
    pid_h = pid_ch - pid_c * ngroups
    num_pid_n = tl.cdiv(chunk_size, BLOCK_SIZE_N)
    pid_m = tl.program_id(axis=0) // num_pid_n
    pid_n = tl.program_id(axis=0) % num_pid_n
    if IS_CAUSAL:
        if pid_n * BLOCK_SIZE_N >= (pid_m + 1) * BLOCK_SIZE_M:
            return

    chunk_seqlen_start = tl.load(cu_chunk_seqlens_ptr + pid_c)
    chunk_seqlen_end = tl.load(cu_chunk_seqlens_ptr + pid_c + 1)

    a_ptr += chunk_seqlen_start * stride_a_seqlen + pid_h * stride_a_head
    b_ptr += chunk_seqlen_start * stride_b_seqlen + pid_h * stride_b_head

    offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
    offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
    offs_k = tl.arange(0, BLOCK_SIZE_K)
    a_ptrs = a_ptr + (offs_m[:, None] * stride_a_seqlen + offs_k[None, :] * stride_ak)
    b_ptrs = b_ptr + (offs_k[:, None] * stride_bk + offs_n[None, :] * stride_b_seqlen)
    chunk_size_limit = chunk_seqlen_end - chunk_seqlen_start

    acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)

    # compute a * b.T
    for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)):
        a = tl.load(
            a_ptrs,
            mask=(offs_m[:, None] < chunk_size_limit) & (offs_k[None, :] < K - k * BLOCK_SIZE_K),
            other=0.0,
        ).to(dot_dtype)
        b = tl.load(
            b_ptrs,
            mask=(offs_k[:, None] < K - k * BLOCK_SIZE_K) & (offs_n[None, :] < chunk_size_limit),
            other=0.0,
        ).to(dot_dtype)
        acc += tl.dot(a, b)
        a_ptrs += BLOCK_SIZE_K * stride_ak
        b_ptrs += BLOCK_SIZE_K * stride_bk

    offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
    offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)

    out = acc.to(out_ptr.dtype.element_ty)
    out_ptr += pid_c * stride_out_chunk + pid_h * stride_out_head
    out_ptrs = out_ptr + (stride_outm * offs_m[:, None] + offs_n[None, :] * stride_outn)
    tl.store(out_ptrs, out, mask=(offs_m[:, None] < chunk_size) & (offs_n[None, :] < chunk_size))


def _bmm_chunk_fwd(a, b, chunk_size, cu_chunk_seqlens, causal=False, output_dtype=None):
    """
    Argument:
        a: (seqlen, ngroups, k)
        b: (seqlen, ngroups, k)
        chunk_size: int
        cu_chunk_seq_lens: (nchunks+1,)
        causal: if True, then out[i, j] for i > j will be arbitrary, only out[i, j] for i <= j are
            guaranteed to be correct.
    Return:
        out: (nchunks, ngroups, chunk_size, chunk_size)
    """
    seqlen, ngroups, k = a.shape
    assert b.shape == a.shape
    if a.stride(-1) != 1 and a.stride(0) != 1:
        a = a.contiguous()
    if b.stride(-1) != 1 and b.stride(0) != 1:
        b = b.contiguous()

    nchunks = len(cu_chunk_seqlens) - 1
    # Allocates output.
    out_dtype = a.dtype if output_dtype is None else output_dtype
    out = torch.empty((nchunks, ngroups, chunk_size, chunk_size), device=a.device, dtype=out_dtype)
    dot_dtype = (
        tl.bfloat16
        if a.dtype == torch.bfloat16 or b.dtype == torch.bfloat16
        else (tl.float16 if a.dtype == torch.float16 or b.dtype == torch.float16 else tl.float32)
    )
    grid = lambda META: (
        triton.cdiv(chunk_size, META["BLOCK_SIZE_M"])
        * triton.cdiv(chunk_size, META["BLOCK_SIZE_N"]),
        nchunks * ngroups,
    )
    with torch.cuda.device(a.device.index):
        _bmm_chunk_fwd_kernel[grid](
            a_ptr=a,
            b_ptr=b,
            out_ptr=out,
            cu_chunk_seqlens_ptr=cu_chunk_seqlens,
            seqlen=seqlen,
            chunk_size=chunk_size,
            K=k,
            ngroups=ngroups,
            stride_a_seqlen=a.stride(0),
            stride_a_head=a.stride(1),
            stride_ak=a.stride(2),
            stride_b_seqlen=b.stride(0),
            stride_b_head=b.stride(1),
            stride_bk=b.stride(2),
            stride_out_chunk=out.stride(0),
            stride_out_head=out.stride(1),
            stride_outm=out.stride(-2),
            stride_outn=out.stride(-1),
            IS_CAUSAL=causal,
            dot_dtype=dot_dtype,
        )
    return out


================================================
FILE: megatron/core/ssm/ops/ssd_chunk_scan.py
================================================
# Copyright (c) 2024, Tri Dao, Albert Gu.
# Adapted from:
#   https://github.com/state-spaces/mamba/blob/v2.2.4/mamba_ssm/ops/triton/ssd_chunk_scan.py
# Adapted from vLLM project (Apache-2.0).

import triton
import triton.language as tl
from packaging import version

from megatron.core.ssm.ops.determinism import autotune_configs

TRITON_22 = version.parse(triton.__version__) >= version.parse("2.2.0")


@triton.autotune(
    configs=autotune_configs(
        [
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64},
                num_stages=3,
                num_warps=8,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32},
                num_stages=5,
                num_warps=2,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32},
                num_stages=5,
                num_warps=2,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=2,
            ),
        ]
    ),
    key=["chunk_size", "hdim", "dstate", "IS_CAUSAL"],
)
@triton.jit
def _chunk_scan_fwd_kernel(
    # Pointers to matrices
    cb_ptr,
    x_ptr,
    z_ptr,
    out_ptr,
    dt_ptr,
    dA_cumsum_ptr,
    seq_idx_ptr,
    C_ptr,
    states_ptr,
    D_ptr,
    initstates_ptr,
    cu_chunk_seqlens_ptr,
    # Matrix dimensions
    chunk_size: tl.constexpr,
    hdim: tl.constexpr,
    dstate: tl.constexpr,
    seqlen,
    nheads_ngroups_ratio: tl.constexpr,
    # Strides
    stride_cb_chunk: tl.int64,
    stride_cb_head: tl.int64,
    stride_cb_csize_m: tl.int64,
    stride_cb_csize_k: tl.constexpr,
    stride_x_seqlen: tl.int64,
    stride_x_head: tl.int64,
    stride_x_hdim: tl.constexpr,
    stride_z_seqlen: tl.int64,
    stride_z_head: tl.int64,
    stride_z_hdim: tl.constexpr,
    stride_out_seqlen: tl.int64,
    stride_out_head: tl.int64,
    stride_out_hdim: tl.constexpr,
    stride_dt_chunk: tl.int64,
    stride_dt_head: tl.int64,
    stride_dt_csize: tl.constexpr,
    stride_dA_cs_chunk: tl.int64,
    stride_dA_cs_head: tl.int64,
    stride_dA_cs_csize: tl.constexpr,
    stride_seq_idx_chunk: tl.constexpr,
    stride_C_seqlen: tl.int64,
    stride_C_head: tl.int64,
    stride_C_dstate: tl.constexpr,
    stride_states_chunk: tl.int64,
    stride_states_head: tl.int64,
    stride_states_hdim: tl.int64,
    stride_states_dstate: tl.constexpr,
    stride_init_states_batch: tl.int64,
    stride_init_states_head: tl.int64,
    stride_init_states_hdim: tl.int64,
    stride_init_states_dstate: tl.constexpr,
    stride_D_head: tl.constexpr,
    # Meta-parameters
    IS_CAUSAL: tl.constexpr,
    HAS_D: tl.constexpr,
    D_HAS_HDIM: tl.constexpr,
    HAS_Z: tl.constexpr,
    BLOCK_SIZE_M: tl.constexpr,
    BLOCK_SIZE_N: tl.constexpr,
    BLOCK_SIZE_K: tl.constexpr,
    BLOCK_SIZE_DSTATE: tl.constexpr,
    IS_TRITON_22: tl.constexpr,
    HAS_INITSTATES: tl.constexpr,
):
    pid_c = tl.program_id(axis=1).to(tl.int64)
    pid_h = tl.program_id(axis=2)
    num_pid_n = tl.cdiv(hdim, BLOCK_SIZE_N)
    pid_m = tl.program_id(axis=0) // num_pid_n
    pid_n = tl.program_id(axis=0) % num_pid_n
    cb_ptr += pid_c * stride_cb_chunk + (pid_h // nheads_ngroups_ratio) * stride_cb_head
    chunk_seqlen_start = tl.load(cu_chunk_seqlens_ptr + pid_c)
    chunk_seqlen_end = tl.load(cu_chunk_seqlens_ptr + pid_c + 1)
    x_ptr += chunk_seqlen_start * stride_x_seqlen + pid_h * stride_x_head
    dt_ptr += pid_c * stride_dt_chunk + pid_h * stride_dt_head
    dA_cumsum_ptr += pid_c * stride_dA_cs_chunk + pid_h * stride_dA_cs_head
    C_ptr += chunk_seqlen_start * stride_C_seqlen + (pid_h // nheads_ngroups_ratio) * stride_C_head

    # M-block offsets and prev states
    #  - logic in next block may override these if there is an active offset
    offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)

    seq_idx_ptr += pid_c * stride_seq_idx_chunk
    seq_idx = tl.load(seq_idx_ptr)
    seq_idx_prev = tl.load(seq_idx_ptr - stride_seq_idx_chunk, mask=pid_c >= 1, other=-1)

    if HAS_INITSTATES and (seq_idx != seq_idx_prev):
        prev_states_ptr = (
            initstates_ptr + seq_idx * stride_init_states_batch + pid_h * stride_init_states_head
        )
        prev_states_hdim = stride_init_states_hdim
        prev_states_dstate = stride_init_states_dstate
    else:
        prev_states_ptr = (
            states_ptr + (pid_c - 1) * stride_states_chunk + pid_h * stride_states_head
        )
        prev_states_hdim = stride_states_hdim
        prev_states_dstate = stride_states_dstate

    chunk_size_limit = chunk_seqlen_end - chunk_seqlen_start

    offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
    dA_cs_m = tl.load(
        dA_cumsum_ptr + offs_m * stride_dA_cs_csize, mask=offs_m < chunk_size, other=0.0
    ).to(tl.float32)

    acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)

    offs_out_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
    offs_out_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)

    # Faster to just do 1 iteration with larger BLOCK_SIZE_K, up to block size 128
    offs_k_dstate = tl.arange(0, BLOCK_SIZE_DSTATE if BLOCK_SIZE_DSTATE <= 128 else BLOCK_SIZE_K)
    C_ptrs = C_ptr + (offs_m[:, None] * stride_C_seqlen + offs_k_dstate[None, :] * stride_C_dstate)

    scale_m = tl.exp(dA_cs_m)
    if BLOCK_SIZE_DSTATE <= 128:
        C = tl.load(
            C_ptrs,
            mask=(offs_m[:, None] < chunk_size_limit) & (offs_k_dstate[None, :] < dstate),
            other=0.0,
        )

        if not HAS_INITSTATES and (seq_idx != seq_idx_prev):
            # if no init states AND starting a new sequence, we need zeros
            prev_states = tl.zeros((BLOCK_SIZE_DSTATE, BLOCK_SIZE_N), dtype=C_ptr.dtype.element_ty)
        else:
            # otherwise read the previous state
            prev_states_ptrs = (
                prev_states_ptr
                + offs_n[None, :] * prev_states_hdim
                + offs_k_dstate[:, None] * prev_states_dstate
            )
            prev_states = tl.load(
                prev_states_ptrs,
                mask=(offs_k_dstate[:, None] < dstate) & (offs_n[None, :] < hdim),
                other=0.0,
            )
            prev_states = prev_states.to(C_ptr.dtype.element_ty)

        acc = tl.dot(C, prev_states) * scale_m[:, None]

    else:
        prev_states_ptrs = (
            prev_states_ptr
            + offs_n[None, :] * prev_states_hdim
            + offs_k_dstate[:, None] * prev_states_dstate
        )
        for k in range(0, dstate, BLOCK_SIZE_K):
            C = tl.load(
                C_ptrs,
                mask=(offs_m[:, None] < chunk_size_limit) & (offs_k_dstate[None, :] < dstate - k),
                other=0.0,
            )
            if not HAS_INITSTATES and (seq_idx != seq_idx_prev):
                prev_states = tl.zeros((BLOCK_SIZE_K, BLOCK_SIZE_N), dtype=C_ptr.dtype.element_ty)
            else:
                prev_states = tl.load(
                    prev_states_ptrs,
                    mask=(offs_k_dstate[:, None] < dstate - k) & (offs_n[None, :] < hdim),
                    other=0.0,
                )
                prev_states = prev_states.to(C_ptr.dtype.element_ty)
            acc += tl.dot(C, prev_states)
            C_ptrs += BLOCK_SIZE_K
            prev_states_ptrs += BLOCK_SIZE_K
        acc *= scale_m[:, None]

    offs_k = tl.arange(0, BLOCK_SIZE_K)
    cb_ptrs = cb_ptr + (offs_m[:, None] * stride_cb_csize_m + offs_k[None, :] * stride_cb_csize_k)
    x_ptrs = x_ptr + (offs_k[:, None] * stride_x_seqlen + offs_n[None, :] * stride_x_hdim)
    dt_ptrs = dt_ptr + offs_k * stride_dt_csize
    dA_cumsum_ptrs = dA_cumsum_ptr + offs_k * stride_dA_cs_csize
    K_MAX = chunk_size_limit if not IS_CAUSAL else min((pid_m + 1) * BLOCK_SIZE_M, chunk_size_limit)
    for k in range(0, K_MAX, BLOCK_SIZE_K):
        cb = tl.load(
            cb_ptrs,
            mask=(offs_m[:, None] < chunk_size) & (offs_k[None, :] < chunk_size - k),
            other=0.0,
        ).to(tl.float32)
        dA_cs_k = tl.load(dA_cumsum_ptrs, mask=offs_k < chunk_size - k, other=0.0).to(tl.float32)
        # If there's seq_idx, we already set cb[i, j] = 0 for seq_idx[i] != seq_idx[j].
        # So we don't need masking wrt seq_idx here.
        cb *= tl.exp(tl.minimum(dA_cs_m[:, None] - dA_cs_k[None, :], 0.0))
        dt_k = tl.load(dt_ptrs, mask=offs_k < chunk_size - k, other=0.0).to(tl.float32)
        cb *= dt_k
        if IS_CAUSAL:
            mask = offs_m[:, None] >= k + offs_k[None, :]
            cb = tl.where(mask, cb, 0.0)
        cb = cb.to(x_ptr.dtype.element_ty)
        x = tl.load(
            x_ptrs,
            mask=(offs_k[:, None] < chunk_size_limit - k) & (offs_n[None, :] < hdim),
            other=0.0,
        )
        acc += tl.dot(cb, x)
        cb_ptrs += BLOCK_SIZE_K * stride_cb_csize_k
        x_ptrs += BLOCK_SIZE_K * stride_x_seqlen
        dt_ptrs += BLOCK_SIZE_K * stride_dt_csize
        dA_cumsum_ptrs += BLOCK_SIZE_K * stride_dA_cs_csize

    offs_out_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
    offs_out_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)

    if HAS_D:
        if D_HAS_HDIM:
            D = tl.load(D_ptr + pid_h * stride_D_head + offs_n, mask=offs_n < hdim, other=0.0).to(
                tl.float32
            )
        else:
            D = tl.load(D_ptr + pid_h * stride_D_head).to(tl.float32)
        x_residual = tl.load(
            x_ptr + (offs_m[:, None] * stride_x_seqlen + offs_n[None, :] * stride_x_hdim),
            mask=(offs_m[:, None] < chunk_size_limit) & (offs_n[None, :] < hdim),
            other=0.0,
        ).to(tl.float32)
        acc += x_residual * D

    if HAS_Z:
        z_ptr += chunk_seqlen_start * stride_z_seqlen + pid_h * stride_z_head
        z_ptrs = z_ptr + (
            stride_z_seqlen * offs_out_m[:, None] + stride_z_hdim * offs_out_n[None, :]
        )
        z = tl.load(
            z_ptrs,
            mask=(offs_out_m[:, None] < chunk_size_limit) & (offs_out_n[None, :] < hdim),
            other=0.0,
        ).to(tl.float32)
        acc *= z * tl.sigmoid(z)

    out_ptr += chunk_seqlen_start * stride_out_seqlen + pid_h * stride_out_head
    out_ptrs = out_ptr + (
        stride_out_seqlen * offs_out_m[:, None] + offs_out_n[None, :] * stride_out_hdim
    )
    tl.store(
        out_ptrs, acc, mask=(offs_out_m[:, None] < chunk_size_limit) & (offs_out_n[None, :] < hdim)
    )


def _chunk_scan_fwd(
    cb,
    x,
    dt,
    dA_cumsum,
    C,
    states,
    cu_chunk_seqlens,
    out,
    seq_idx,
    D=None,
    z=None,
    initial_states=None,
):
    assert seq_idx is not None, "this implementation requires seq_idx"

    seqlen, nheads, headdim = x.shape
    _, nchunks, chunk_size = dt.shape
    _, ngroups, dstate = C.shape
    assert nheads % ngroups == 0
    assert C.shape == (seqlen, ngroups, dstate)
    assert cb.shape == (nchunks, ngroups, chunk_size, chunk_size)
    if D is not None:
        assert D.shape == (nheads, headdim) or D.shape == (nheads,)
    if z is not None:
        assert z.shape == x.shape
    assert dt.shape == (nheads, nchunks, chunk_size)
    assert dA_cumsum.shape == (nheads, nchunks, chunk_size)
    assert states.shape == (nchunks, nheads, headdim, dstate)
    assert seq_idx.shape == (nchunks,)

    grid = lambda META: (
        triton.cdiv(chunk_size, META["BLOCK_SIZE_M"]) * triton.cdiv(headdim, META["BLOCK_SIZE_N"]),
        nchunks,
        nheads,
    )

    z_strides = (z.stride(0), z.stride(1), z.stride(2)) if z is not None else (0, 0, 0)
    initial_states_strides = (
        (
            initial_states.stride(0),
            initial_states.stride(1),
            initial_states.stride(2),
            initial_states.stride(3),
        )
        if initial_states is not None
        else (0, 0, 0, 0)
    )

    _chunk_scan_fwd_kernel[grid](
        cb_ptr=cb,
        x_ptr=x,
        z_ptr=z,
        out_ptr=out,
        dt_ptr=dt,
        dA_cumsum_ptr=dA_cumsum,
        seq_idx_ptr=seq_idx,
        C_ptr=C,
        states_ptr=states,
        D_ptr=D,
        initstates_ptr=initial_states,
        cu_chunk_seqlens_ptr=cu_chunk_seqlens,
        chunk_size=chunk_size,
        hdim=headdim,
        dstate=dstate,
        seqlen=seqlen,
        nheads_ngroups_ratio=nheads // ngroups,
        stride_cb_chunk=cb.stride(0),
        stride_cb_head=cb.stride(1),
        stride_cb_csize_m=cb.stride(2),
        stride_cb_csize_k=cb.stride(3),
        stride_x_seqlen=x.stride(0),
        stride_x_head=x.stride(1),
        stride_x_hdim=x.stride(2),
        stride_z_seqlen=z_strides[0],
        stride_z_head=z_strides[1],
        stride_z_hdim=z_strides[2],
        stride_out_seqlen=out.stride(0),
        stride_out_head=out.stride(1),
        stride_out_hdim=out.stride(2),
        stride_dt_chunk=dt.stride(1),
        stride_dt_head=dt.stride(0),
        stride_dt_csize=dt.stride(2),
        stride_dA_cs_chunk=dA_cumsum.stride(1),
        stride_dA_cs_head=dA_cumsum.stride(0),
        stride_dA_cs_csize=dA_cumsum.stride(2),
        stride_seq_idx_chunk=seq_idx.stride(0),
        stride_C_seqlen=C.stride(0),
        stride_C_head=C.stride(1),
        stride_C_dstate=C.stride(2),
        stride_states_chunk=states.stride(0),
        stride_states_head=states.stride(1),
        stride_states_hdim=states.stride(2),
        stride_states_dstate=states.stride(3),
        stride_init_states_batch=initial_states_strides[0],
        stride_init_states_head=initial_states_strides[1],
        stride_init_states_hdim=initial_states_strides[2],
        stride_init_states_dstate=initial_states_strides[3],
        stride_D_head=D.stride(0) if D is not None else 0,
        IS_CAUSAL=True,
        HAS_D=D is not None,
        D_HAS_HDIM=D.dim() == 2 if D is not None else True,
        HAS_Z=z is not None,
        BLOCK_SIZE_DSTATE=max(triton.next_power_of_2(dstate), 16),
        IS_TRITON_22=TRITON_22,
        HAS_INITSTATES=initial_states is not None,
    )
    return


================================================
FILE: megatron/core/ssm/ops/ssd_chunk_state.py
================================================
# Copyright (c) 2024, Tri Dao, Albert Gu.
# Adapted from:
#   https://github.com/state-spaces/mamba/blob/v2.2.4/mamba_ssm/ops/triton/ssd_chunk_state.py
# Adapted from vLLM project (Apache-2.0).

import torch
import triton
import triton.language as tl
from packaging import version

from megatron.core.ssm.ops.determinism import autotune_configs

try:
    TRITON3 = version.parse(triton.__version__) >= version.parse("3.0.0")
except:
    raise ImportError("Triton version 3.0.0 or higher is required")

if TRITON3:

    @triton.jit
    def softplus(dt):  # pylint: disable=C0116
        dt = tl.where(dt <= 20.0, tl.math.log(tl.math.exp(dt) + 1), dt)
        return dt

else:

    @triton.jit
    def softplus(dt):  # pylint: disable=C0116
        dt = tl.where(dt <= 20.0, tl.math.log1p(tl.exp(dt)), dt)
        return dt


@triton.autotune(
    configs=autotune_configs(
        [
            triton.Config({"BLOCK_SIZE_H": 2}),
            triton.Config({"BLOCK_SIZE_H": 4}),
            triton.Config({"BLOCK_SIZE_H": 8}),
            triton.Config({"BLOCK_SIZE_H": 16}),
            triton.Config({"BLOCK_SIZE_H": 32}),
            triton.Config({"BLOCK_SIZE_H": 64}),
        ]
    ),
    key=["chunk_size", "nheads"],
)
@triton.jit
def _chunk_cumsum_fwd_kernel(
    # Pointers to matrices
    dt_ptr,
    A_ptr,
    dt_bias_ptr,
    dt_out_ptr,
    dA_cumsum_ptr,
    cu_chunk_seqlens_ptr,
    # Matrix dimension
    seqlen,
    nheads: tl.constexpr,
    chunk_size: tl.constexpr,
    dt_min: tl.constexpr,
    dt_max: tl.constexpr,
    # Strides
    stride_dt_seqlen: tl.int64,
    stride_dt_head: tl.constexpr,
    stride_A_head: tl.constexpr,
    stride_dt_bias_head: tl.constexpr,
    stride_dt_out_head: tl.int64,
    stride_dt_out_chunk: tl.int64,
    stride_dt_out_csize: tl.constexpr,
    stride_dA_cs_head: tl.int64,
    stride_dA_cs_chunk: tl.int64,
    stride_dA_cs_csize: tl.constexpr,
    # Meta-parameters
    DT_SOFTPLUS: tl.constexpr,
    HAS_DT_BIAS: tl.constexpr,
    BLOCK_SIZE_H: tl.constexpr,
    BLOCK_SIZE_CHUNK: tl.constexpr,
):
    # if dt is long, may cause problems, so use 64 bit
    # https://github.com/triton-lang/triton/issues/1058
    pid_c = tl.program_id(axis=0).to(tl.int64)
    pid_h = tl.program_id(axis=1)

    chunk_seqlen_start = tl.load(cu_chunk_seqlens_ptr + pid_c)
    chunk_seqlen_end = tl.load(cu_chunk_seqlens_ptr + pid_c + 1)

    dt_ptr += chunk_seqlen_start * stride_dt_seqlen
    dt_out_ptr += pid_c * stride_dt_out_chunk
    dA_cumsum_ptr += pid_c * stride_dA_cs_chunk

    offs_h = pid_h * BLOCK_SIZE_H + tl.arange(0, BLOCK_SIZE_H)
    offs_c = tl.arange(0, BLOCK_SIZE_CHUNK)
    dt_ptrs = dt_ptr + (offs_h[:, None] * stride_dt_head + offs_c[None, :] * stride_dt_seqlen)
    A_ptrs = A_ptr + offs_h * stride_A_head
    dt_out_ptrs = dt_out_ptr + (
        offs_h[:, None] * stride_dt_out_head + offs_c[None, :] * stride_dt_out_csize
    )
    dA_cs_ptrs = dA_cumsum_ptr + (
        offs_h[:, None] * stride_dA_cs_head + offs_c[None, :] * stride_dA_cs_csize
    )
    chunk_size_limit = chunk_seqlen_end - chunk_seqlen_start

    dt = tl.load(
        dt_ptrs, mask=(offs_h[:, None] < nheads) & (offs_c[None, :] < chunk_size_limit), other=0.0
    ).to(tl.float32)
    if HAS_DT_BIAS:
        dt_bias = tl.load(
            dt_bias_ptr + offs_h * stride_dt_bias_head, mask=offs_h < nheads, other=0.0
        ).to(tl.float32)
        dt += dt_bias[:, None]
    if DT_SOFTPLUS:
        dt = tl.where(dt <= 20.0, softplus(dt), dt)

    dt = tl.clamp(dt, dt_min, dt_max)
    dt = tl.where((offs_h[:, None] < nheads) & (offs_c[None, :] < chunk_size_limit), dt, 0.0)
    tl.store(dt_out_ptrs, dt, mask=(offs_h[:, None] < nheads) & (offs_c[None, :] < chunk_size))
    A = tl.load(A_ptrs, mask=offs_h < nheads, other=0.0).to(tl.float32)
    dA = dt * A[:, None]
    dA_cs = tl.cumsum(dA, axis=1)
    tl.store(dA_cs_ptrs, dA_cs, mask=(offs_h[:, None] < nheads) & (offs_c[None, :] < chunk_size))


@triton.autotune(
    configs=autotune_configs(
        [
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64},
                num_stages=3,
                num_warps=8,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32},
                num_stages=5,
                num_warps=2,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32},
                num_stages=5,
                num_warps=2,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=2,
            ),
        ]
    ),
    key=["hdim", "dstate", "chunk_size"],
)
@triton.jit
def _chunk_state_fwd_kernel(
    # Pointers to matrices
    x_ptr,
    b_ptr,
    states_ptr,
    dt_ptr,
    dA_cumsum_ptr,
    cu_chunk_seqlens_ptr,
    # Matrix dimensions
    hdim: tl.constexpr,
    dstate: tl.constexpr,
    chunk_size: tl.constexpr,
    seqlen,
    nheads_ngroups_ratio: tl.constexpr,
    # Strides
    stride_x_seqlen: tl.int64,
    stride_x_head: tl.int64,
    stride_x_hdim: tl.constexpr,
    stride_b_seqlen: tl.int64,
    stride_b_head: tl.int64,
    stride_b_dstate: tl.constexpr,
    stride_states_chunk: tl.int64,
    stride_states_head: tl.int64,
    stride_states_hdim: tl.int64,
    stride_states_dstate: tl.constexpr,
    stride_dt_head: tl.int64,
    stride_dt_chunk: tl.int64,
    stride_dt_csize: tl.constexpr,
    stride_dA_cs_head: tl.int64,
    stride_dA_cs_chunk: tl.int64,
    stride_dA_cs_csize: tl.constexpr,
    # Meta-parameters
    BLOCK_SIZE_M: tl.constexpr,
    BLOCK_SIZE_N: tl.constexpr,
    BLOCK_SIZE_K: tl.constexpr,
):
    pid_c = tl.program_id(axis=1).to(tl.int64)
    pid_h = tl.program_id(axis=2)
    num_pid_n = tl.cdiv(dstate, BLOCK_SIZE_N)
    pid_m = tl.program_id(axis=0) // num_pid_n
    pid_n = tl.program_id(axis=0) % num_pid_n
    chunk_seqlen_start = tl.load(cu_chunk_seqlens_ptr + pid_c)
    chunk_seqlen_end = tl.load(cu_chunk_seqlens_ptr + pid_c + 1)
    b_ptr += chunk_seqlen_start * stride_b_seqlen + (pid_h // nheads_ngroups_ratio) * stride_b_head
    x_ptr += chunk_seqlen_start * stride_x_seqlen + pid_h * stride_x_head
    dt_ptr += pid_c * stride_dt_chunk + pid_h * stride_dt_head
    dA_cumsum_ptr += pid_c * stride_dA_cs_chunk + pid_h * stride_dA_cs_head

    offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
    offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
    offs_k = tl.arange(0, BLOCK_SIZE_K)
    x_ptrs = x_ptr + (offs_m[:, None] * stride_x_hdim + offs_k[None, :] * stride_x_seqlen)
    b_ptrs = b_ptr + (offs_n[None, :] * stride_b_dstate + offs_k[:, None] * stride_b_seqlen)
    dt_ptrs = dt_ptr + offs_k * stride_dt_csize
    dA_cs_last = tl.load(dA_cumsum_ptr + (chunk_size - 1) * stride_dA_cs_csize).to(tl.float32)
    dA_cumsum_ptrs = dA_cumsum_ptr + offs_k * stride_dA_cs_csize

    chunk_size_limit = chunk_seqlen_end - chunk_seqlen_start

    acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)
    for k in range(0, chunk_size_limit, BLOCK_SIZE_K):
        x = tl.load(
            x_ptrs,
            mask=(offs_m[:, None] < hdim) & (offs_k[None, :] < chunk_size_limit - k),
            other=0.0,
        )
        b = tl.load(
            b_ptrs,
            mask=(offs_k[:, None] < chunk_size_limit - k) & (offs_n[None, :] < dstate),
            other=0.0,
        ).to(tl.float32)
        dA_cs_k = tl.load(dA_cumsum_ptrs, mask=offs_k < chunk_size_limit - k, other=0.0).to(
            tl.float32
        )
        dt_k = tl.load(dt_ptrs, mask=offs_k < chunk_size_limit - k, other=0.0).to(tl.float32)
        scale = tl.exp(tl.minimum(dA_cs_last - dA_cs_k, 0.0)) * dt_k
        b *= scale[:, None]
        b = b.to(x_ptr.dtype.element_ty)
        acc += tl.dot(x, b)

        x_ptrs += BLOCK_SIZE_K * stride_x_seqlen
        b_ptrs += BLOCK_SIZE_K * stride_b_seqlen
        dt_ptrs += BLOCK_SIZE_K * stride_dt_csize
        dA_cumsum_ptrs += BLOCK_SIZE_K * stride_dA_cs_csize

    states = acc.to(states_ptr.dtype.element_ty)

    states_ptr += pid_c * stride_states_chunk + pid_h * stride_states_head
    offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
    offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
    states_ptrs = states_ptr + (
        offs_m[:, None] * stride_states_hdim + offs_n[None, :] * stride_states_dstate
    )
    c_mask = (offs_m[:, None] < hdim) & (offs_n[None, :] < dstate)
    tl.store(states_ptrs, states, mask=c_mask)


def _chunk_cumsum_fwd(
    dt,
    A,
    chunk_size,
    cu_chunk_seqlens,
    dt_bias=None,
    dt_softplus=False,
    dt_limit=(0.0, float("inf")),
):
    seqlen, nheads = dt.shape
    assert A.shape == (nheads,)
    if dt_bias is not None:
        assert dt_bias.shape == (nheads,)
    nchunks = cu_chunk_seqlens.shape[0] - 1
    dt_out = torch.empty(nheads, nchunks, chunk_size, device=dt.device, dtype=torch.float32)
    dA_cumsum = torch.empty(nheads, nchunks, chunk_size, device=dt.device, dtype=torch.float32)
    grid_chunk_cs = lambda META: (nchunks, triton.cdiv(nheads, META["BLOCK_SIZE_H"]))
    with torch.cuda.device(dt.device.index):
        _chunk_cumsum_fwd_kernel[grid_chunk_cs](
            dt_ptr=dt,
            A_ptr=A,
            dt_bias_ptr=dt_bias,
            dt_out_ptr=dt_out,
            dA_cumsum_ptr=dA_cumsum,
            cu_chunk_seqlens_ptr=cu_chunk_seqlens,
            seqlen=seqlen,
            nheads=nheads,
            chunk_size=chunk_size,
            dt_min=dt_limit[0],
            dt_max=dt_limit[1],
            stride_dt_seqlen=dt.stride(0),
            stride_dt_head=dt.stride(1),
            stride_A_head=A.stride(0),
            stride_dt_bias_head=dt_bias.stride(0) if dt_bias is not None else 0,
            stride_dt_out_head=dt_out.stride(0),
            stride_dt_out_chunk=dt_out.stride(1),
            stride_dt_out_csize=dt_out.stride(2),
            stride_dA_cs_head=dA_cumsum.stride(0),
            stride_dA_cs_chunk=dA_cumsum.stride(1),
            stride_dA_cs_csize=dA_cumsum.stride(2),
            DT_SOFTPLUS=dt_softplus,
            HAS_DT_BIAS=dt_bias is not None,
            BLOCK_SIZE_CHUNK=triton.next_power_of_2(chunk_size),
        )
    return dA_cumsum, dt_out


def _chunk_state_fwd(B, x, dt, dA_cumsum, cu_chunk_seqlens, states=None, states_in_fp32=True):
    seqlen, nheads, headdim = x.shape
    _, nchunks, chunk_size = dt.shape
    _, ngroups, dstate = B.shape
    assert nheads % ngroups == 0
    assert B.shape == (seqlen, ngroups, dstate)
    assert dt.shape == (nheads, nchunks, chunk_size)
    assert dA_cumsum.shape == dt.shape

    if states is not None:
        assert states.shape == (nchunks, nheads, headdim, dstate)
    else:
        states_dtype = torch.float32 if states_in_fp32 else B.dtype
        states = torch.empty(
            (nchunks, nheads, headdim, dstate), device=x.device, dtype=states_dtype
        )

    grid = lambda META: (
        triton.cdiv(headdim, META["BLOCK_SIZE_M"]) * triton.cdiv(dstate, META["BLOCK_SIZE_N"]),
        nchunks,
        nheads,
    )
    with torch.cuda.device(x.device.index):
        _chunk_state_fwd_kernel[grid](
            x_ptr=x,
            b_ptr=B,
            states_ptr=states,
            dt_ptr=dt,
            dA_cumsum_ptr=dA_cumsum,
            cu_chunk_seqlens_ptr=cu_chunk_seqlens,
            hdim=headdim,
            dstate=dstate,
            chunk_size=chunk_size,
            seqlen=seqlen,
            nheads_ngroups_ratio=nheads // ngroups,
            stride_x_seqlen=x.stride(0),
            stride_x_head=x.stride(1),
            stride_x_hdim=x.stride(2),
            stride_b_seqlen=B.stride(0),
            stride_b_head=B.stride(1),
            stride_b_dstate=B.stride(2),
            stride_states_chunk=states.stride(0),
            stride_states_head=states.stride(1),
            stride_states_hdim=states.stride(2),
            stride_states_dstate=states.stride(3),
            stride_dt_head=dt.stride(0),
            stride_dt_chunk=dt.stride(1),
            stride_dt_csize=dt.stride(2),
            stride_dA_cs_head=dA_cumsum.stride(0),
            stride_dA_cs_chunk=dA_cumsum.stride(1),
            stride_dA_cs_csize=dA_cumsum.stride(2),
        )
    return states


@triton.autotune(
    configs=autotune_configs(
        [
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64},
                num_stages=3,
                num_warps=8,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=4,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 32},
                num_stages=5,
                num_warps=2,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32},
                num_stages=5,
                num_warps=2,
            ),
            triton.Config(
                {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 32},
                num_stages=4,
                num_warps=2,
            ),
        ]
    ),
    key=["hdim", "dstate", "chunk_size"],
)
@triton.jit
def _chunk_state_varlen_kernel(
    x_ptr,
    b_ptr,
    dt_ptr,
    dA_cumsum_ptr,
    chunk_states_ptr,
    cu_seqlens_ptr,
    last_chunk_indices_ptr,
    cu_chunk_seqlens_ptr,
    states_ptr,
    initstates_ptr,
    hdim: tl.constexpr,
    dstate: tl.constexpr,
    chunk_size: tl.constexpr,
    nheads_ngroups_ratio: tl.constexpr,
    stride_x_seqlen: tl.int64,
    stride_x_head: tl.int64,
    stride_x_hdim: tl.constexpr,
    stride_b_seqlen: tl.int64,
    stride_b_head: tl.int64,
    stride_b_dstate: tl.constexpr,
    stride_dt_head: tl.int64,
    stride_dt_chunk: tl.int64,
    stride_dt_csize: tl.constexpr,
    stride_dA_cs_head: tl.int64,
    stride_dA_cs_chunk: tl.int64,
    stride_dA_cs_csize: tl.constexpr,
    stride_chunk_states_chunk: tl.int64,
    stride_chunk_states_head: tl.int64,
    stride_chunk_states_hdim: tl.int64,
    stride_chunk_states_dstate: tl.constexpr,
    stride_states_batch: tl.int64,
    stride_states_head: tl.int64,
    stride_states_hdim: tl.int64,
    stride_states_dstate: tl.constexpr,
    stride_init_states_batch: tl.int64,
    stride_init_states_head: tl.int64,
    stride_init_states_hdim: tl.int64,
    stride_init_states_dstate: tl.constexpr,
    BLOCK_SIZE_M: tl.constexpr,
    BLOCK_SIZE_N: tl.constexpr,
    BLOCK_SIZE_K: tl.constexpr,
    HAS_INITSTATES: tl.constexpr,
    USE_LAST_CHUNK_INDICES: tl.constexpr,
):
    pid_b = tl.program_id(axis=1)
    pid_h = tl.program_id(axis=2)
    num_pid_n = tl.cdiv(dstate, BLOCK_SIZE_N)
    pid_m = tl.program_id(axis=0) // num_pid_n
    pid_n = tl.program_id(axis=0) % num_pid_n
    end_idx = tl.load(cu_seqlens_ptr + pid_b + 1)
    start_idx = tl.load(cu_seqlens_ptr + pid_b)
    if USE_LAST_CHUNK_INDICES:
        pid_c = tl.load(last_chunk_indices_ptr + pid_b).to(tl.int64)
        chunk_start = tl.load(cu_chunk_seqlens_ptr + pid_c)
        chunk_size_limit = tl.load(cu_chunk_seqlens_ptr + pid_c + 1) - chunk_start
    else:
        pid_c = (end_idx - 1) // chunk_size
        chunk_start = pid_c * chunk_size
        chunk_size_limit = end_idx - chunk_start
    b_ptr += chunk_start * stride_b_seqlen + (pid_h // nheads_ngroups_ratio) * stride_b_head
    x_ptr += chunk_start * stride_x_seqlen + pid_h * stride_x_head
    dt_ptr += pid_c * stride_dt_chunk + pid_h * stride_dt_head
    dA_cumsum_ptr += pid_c * stride_dA_cs_chunk + pid_h * stride_dA_cs_head
    chunk_states_ptr += pid_c * stride_chunk_states_chunk + pid_h * stride_chunk_states_head

    if HAS_INITSTATES:
        initstates_ptr += pid_h * stride_init_states_head

    offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
    offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
    offs_k = tl.arange(0, BLOCK_SIZE_K)
    x_ptrs = x_ptr + (offs_m[:, None] * stride_x_hdim + offs_k[None, :] * stride_x_seqlen)
    b_ptrs = b_ptr + (offs_n[None, :] * stride_b_dstate + offs_k[:, None] * stride_b_seqlen)
    dt_ptrs = dt_ptr + offs_k * stride_dt_csize
    dA_cs_last = tl.load(dA_cumsum_ptr + (end_idx - 1 - chunk_start) * stride_dA_cs_csize).to(
        tl.float32
    )
    dA_cumsum_ptrs = dA_cumsum_ptr + offs_k * stride_dA_cs_csize

    start_idx_cur = tl.maximum(start_idx - chunk_start, 0)

    acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)
    for k in range(0, chunk_size_limit, BLOCK_SIZE_K):
        x = tl.load(
            x_ptrs,
            mask=(offs_m[:, None] < hdim)
            & (offs_k[None, :] < chunk_size_limit - k)
            & (offs_k[None, :] >= start_idx_cur - k),
            other=0.0,
        )
        b = tl.load(
            b_ptrs,
            mask=(offs_k[:, None] < chunk_size_limit - k)
            & (offs_n[None, :] < dstate)
            & (offs_k[:, None] >= start_idx_cur - k),
            other=0.0,
        ).to(tl.float32)
        dA_cs_k = tl.load(dA_cumsum_ptrs, mask=offs_k < chunk_size_limit - k, other=0.0).to(
            tl.float32
        )
        dt_k = tl.load(dt_ptrs, mask=offs_k < chunk_size_limit - k, other=0.0).to(tl.float32)
        scale = tl.where(
            (offs_k >= start_idx_cur - k) & (offs_k < chunk_size_limit - k),
            tl.exp(tl.minimum(dA_cs_last - dA_cs_k, 0.0)) * dt_k,
            0.0,
        )
        b *= scale[:, None]
        b = b.to(x_ptr.dtype.element_ty)
        acc += tl.dot(x, b)
        x_ptrs += BLOCK_SIZE_K * stride_x_seqlen
        b_ptrs += BLOCK_SIZE_K * stride_b_seqlen
        dt_ptrs += BLOCK_SIZE_K * stride_dt_csize
        dA_cumsum_ptrs += BLOCK_SIZE_K * stride_dA_cs_csize

    if (start_idx < chunk_start) or (HAS_INITSTATES):
        dA_cs_boundary = 0.0
        if not HAS_INITSTATES:
            past_states_ptrs = chunk_states_ptr + (
                offs_m[:, None] * stride_chunk_states_hdim
                + offs_n[None, :] * stride_chunk_states_dstate
            )
        else:
            if start_idx < chunk_start:
                past_states_ptrs = chunk_states_ptr + (
                    offs_m[:, None] * stride_chunk_states_hdim
                    + offs_n[None, :] * stride_chunk_states_dstate
                )
            else:
                past_states_ptrs = initstates_ptr + (
                    pid_b * stride_init_states_batch
                    + offs_m[:, None] * stride_init_states_hdim
                    + offs_n[None, :] * stride_init_states_dstate
                )
                if start_idx > chunk_start:
                    dA_cs_boundary = tl.load(
                        dA_cumsum_ptr + (start_idx - chunk_start - 1) * stride_dA_cs_csize
                    ).to(tl.float32)

        past_states = tl.load(
            past_states_ptrs, mask=(offs_m[:, None] < hdim) & (offs_n[None, :] < dstate), other=0.0
        ).to(tl.float32)
        scale = tl.exp(tl.minimum(dA_cs_last - dA_cs_boundary, 0.0))
        acc += past_states * scale

    states = acc.to(states_ptr.dtype.element_ty)
    states_ptr += pid_b * stride_states_batch + pid_h * stride_states_head
    offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
    offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
    states_ptrs = states_ptr + (
        offs_m[:, None] * stride_states_hdim + offs_n[None, :] * stride_states_dstate
    )
    c_mask = (offs_m[:, None] < hdim) & (offs_n[None, :] < dstate)
    tl.store(states_ptrs, states, mask=c_mask)


def chunk_state_varlen(
    B,
    x,
    dt,
    dA_cumsum,
    cu_seqlens,
    chunk_states,
    initial_states=None,
    last_chunk_indices=None,
    cu_chunk_seqlens=None,
):
    """Compute per-sequence final SSM state from chunk states.

    Correct when sequences share chunks.
    """
    total_seqlen, nheads, headdim = x.shape
    _, nchunks, chunk_size = dt.shape
    _, ngroups, dstate = B.shape
    batch = cu_seqlens.shape[0] - 1
    cu_seqlens = cu_seqlens.contiguous()
    assert nheads % ngroups == 0
    assert B.shape == (total_seqlen, ngroups, dstate)
    assert dt.shape == (nheads, nchunks, chunk_size)
    assert dA_cumsum.shape == dt.shape
    assert chunk_states.shape == (nchunks, nheads, headdim, dstate)
    if initial_states is not None:
        assert initial_states.shape == (batch, nheads, headdim, dstate)
    use_last_chunk = last_chunk_indices is not None and cu_chunk_seqlens is not None
    if use_last_chunk:
        last_chunk_indices = last_chunk_indices.contiguous().to(x.device)
        cu_chunk_seqlens = cu_chunk_seqlens.contiguous().to(x.device)
    else:
        last_chunk_indices = torch.zeros(1, dtype=torch.int64, device=x.device)
        cu_chunk_seqlens = cu_seqlens

    states = torch.empty(
        batch, nheads, headdim, dstate, dtype=chunk_states.dtype, device=chunk_states.device
    )
    initial_states_strides = (
        (
            initial_states.stride(0),
            initial_states.stride(1),
            initial_states.stride(2),
            initial_states.stride(3),
        )
        if initial_states is not None
        else (0, 0, 0, 0)
    )
    grid = lambda META: (
        triton.cdiv(headdim, META["BLOCK_SIZE_M"]) * triton.cdiv(dstate, META["BLOCK_SIZE_N"]),
        batch,
        nheads,
    )
    with torch.cuda.device(x.device.index):
        _chunk_state_varlen_kernel[grid](
            x_ptr=x,
            b_ptr=B,
            dt_ptr=dt,
            dA_cumsum_ptr=dA_cumsum,
            chunk_states_ptr=chunk_states,
            cu_seqlens_ptr=cu_seqlens,
            last_chunk_indices_ptr=last_chunk_indices,
            cu_chunk_seqlens_ptr=cu_chunk_seqlens,
            states_ptr=states,
            initstates_ptr=initial_states,
            hdim=headdim,
            dstate=dstate,
            chunk_size=chunk_size,
            nheads_ngroups_ratio=nheads // ngroups,
            stride_x_seqlen=x.stride(0),
            stride_x_head=x.stride(1),
            stride_x_hdim=x.stride(2),
            stride_b_seqlen=B.stride(0),
            stride_b_head=B.stride(1),
            stride_b_dstate=B.stride(2),
            stride_dt_head=dt.stride(0),
            stride_dt_chunk=dt.stride(1),
            stride_dt_csize=dt.stride(2),
            stride_dA_cs_head=dA_cumsum.stride(0),
            stride_dA_cs_chunk=dA_cumsum.stride(1),
            stride_dA_cs_csize=dA_cumsum.stride(2),
            stride_chunk_states_chunk=chunk_states.stride(0),
            stride_chunk_states_head=chunk_states.stride(1),
            stride_chunk_states_hdim=chunk_states.stride(2),
            stride_chunk_states_dstate=chunk_states.stride(3),
            stride_states_batch=states.stride(0),
            stride_states_head=states.stride(1),
            stride_states_hdim=states.stride(2),
            stride_states_dstate=states.stride(3),
            stride_init_states_batch=initial_states_strides[0],
            stride_init_states_head=initial_states_strides[1],
            stride_init_states_hdim=initial_states_strides[2],
            stride_init_states_dstate=initial_states_strides[3],
            HAS_INITSTATES=initial_states is not None,
            USE_LAST_CHUNK_INDICES=use_last_chunk,
        )
    return states


================================================
FILE: megatron/core/ssm/ops/ssd_combined.py
================================================
# Copyright (c) 2024, Tri Dao, Albert Gu.
# Adapted from:
#   https://github.com/state-spaces/mamba/blob/v2.2.4/mamba_ssm/ops/triton/ssd_combined.py
# Adapted from vLLM project (Apache-2.0).

import torch
import triton
from packaging import version

from .ssd_bmm import _bmm_chunk_fwd
from .ssd_chunk_scan import _chunk_scan_fwd
from .ssd_chunk_state import _chunk_cumsum_fwd, _chunk_state_fwd
from .ssd_state_passing import _state_passing_fwd

TRITON_22 = version.parse(triton.__version__) >= version.parse("2.2.0")


def is_int_pow_2(n):
    """Return True if n is a positive integer power of 2."""
    return isinstance(n, int) and n > 0 and (n & (n - 1)) == 0


def _mamba_chunk_scan_combined_fwd(
    x,
    dt,
    A,
    B,
    C,
    chunk_size,
    out,
    D=None,
    z=None,
    dt_bias=None,
    initial_states=None,
    return_intermediate_states=False,
    seq_idx=None,
    cu_chunk_seqlens=None,
    last_chunk_indices=None,
    intermediate_chunk_indices=None,
    dt_softplus=False,
    dt_limit=(0.0, float("inf")),
    state_dtype=None,
):
    assert is_int_pow_2(chunk_size), "chunk_size must be integer power of 2"
    seqlen, nheads, headdim = x.shape
    _, ngroups, dstate = B.shape
    assert nheads % ngroups == 0
    assert B.shape == (
        seqlen,
        ngroups,
        dstate,
    ), f"B.shape={B.shape} != ({seqlen}, {ngroups}, {dstate})"
    assert dt.shape == (seqlen, nheads)
    assert A.shape == (nheads,)
    assert C.shape == B.shape
    if z is not None:
        assert z.shape == x.shape
    if D is not None:
        assert D.shape == (nheads, headdim) or D.shape == (nheads,)
    if seq_idx is not None:
        assert seq_idx.shape == (cu_chunk_seqlens.shape[0] - 1,)
    if B.stride(-1) != 1:
        B = B.contiguous()
    if C.stride(-1) != 1:
        C = C.contiguous()
    if x.stride(-1) != 1 and x.stride(0) != 1:  # Either M or K dimension should be contiguous
        x = x.contiguous()
    if (
        z is not None and z.stride(-1) != 1 and z.stride(0) != 1
    ):  # Either M or K dimension should be contiguous
        z = z.contiguous()
    if D is not None and D.stride(-1) != 1:
        D = D.contiguous()
    assert cu_chunk_seqlens is not None, "Assuming varlen input - must supply cu_chunk_seqlens"
    assert last_chunk_indices is not None, "last_chunk_indices must be provided"

    if initial_states is not None:
        num_seqs = last_chunk_indices.shape[0]
        assert initial_states.shape == (num_seqs, nheads, headdim, dstate)

    # This function executes 5 sub-functions for computing mamba
    # - a good resource is the blog https://goombalab.github.io/blog/2024/mamba2-part3-algorithm/
    #   which has a minimal implementation to understand the below operations
    # - as explained by the blog, mamba is a special case of causal attention
    # - the idea is to chunk the attention matrix and compute each
    #   submatrix separately using different optimizations.
    # - see the blog and paper for a visualization of the submatrices
    #   which we refer to in the comments below

    # 1. Compute chunked cumsum of A * dt
    # - here dt may go through a softplus activation
    dA_cumsum, dt = _chunk_cumsum_fwd(
        dt,
        A,
        chunk_size,
        cu_chunk_seqlens,
        dt_bias=dt_bias,
        dt_softplus=dt_softplus,
        dt_limit=dt_limit,
    )

    # 2. Compute the state for each intra-chunk
    # (right term of low-rank factorization of off-diagonal blocks; B terms)
    states = _chunk_state_fwd(B, x, dt, dA_cumsum, cu_chunk_seqlens, states_in_fp32=True)

    # 3. Compute the inter-chunk SSM recurrence; produces correct SSM states at chunk boundaries
    # (middle term of factorization of off-diag blocks; A terms)
    # - for handling chunked prefill, this requires i) initial_states and
    #   ii) seq_idx to be all specified.
    # - When a new seq_idx is detected, we will stop passing the prev_state
    #   and switch accordingly to the init_state corresponding to the new seq_idx.
    states = _state_passing_fwd(
        states.flatten(-2),  # ... p n -> ... (p n)
        dA_cumsum,  # (nheads, nchunks, chunk_size)
        cu_chunk_seqlens,
        initial_states=(
            initial_states.flatten(-2) if initial_states is not None else None
        ),  # (batch, nheads, headdim*dstate)
        seq_idx=seq_idx,
        out_dtype=state_dtype if state_dtype is not None else C.dtype,
    )
    states = states.unflatten(-1, (-1, dstate))

    # 4. Compute batched matrix multiply for C_j^T B_i terms
    CB = _bmm_chunk_fwd(C, B, chunk_size, cu_chunk_seqlens, output_dtype=torch.float32)

    # 5. Scan and compute the diagonal blocks, taking into
    #    account past causal states.
    # - if initial states are provided, then states information will be
    #   augmented with initial_states.
    # - to do this properly, we need to account for example changes in
    #   the continuous batch, therefore we introduce pseudo chunks, which is
    #   a chunk that is split up each time an example changes.
    # - in each (pseudo) chunk, we detect if the previous (pseudo) chunk had
    #   a seq_idx change, in which case we take states information from
    #   init_states.
    _chunk_scan_fwd(
        CB,
        x,
        dt,
        dA_cumsum,
        C,
        states,
        cu_chunk_seqlens,
        out,  # in-place update
        seq_idx,
        D=D,
        z=z,
        initial_states=initial_states,
    )

    if return_intermediate_states:
        return states

    final_states = states[last_chunk_indices]
    if intermediate_chunk_indices is not None:
        intermediate_states = states[intermediate_chunk_indices]
        return final_states, intermediate_states
    else:
        return final_states


def mamba_chunk_scan_combined_varlen(
    x,
    dt,
    A,
    B,
    C,
    chunk_size,
    cu_chunk_seqlens,
    last_chunk_indices,
    seq_idx,
    out,
    D=None,
    z=None,
    dt_bias=None,
    initial_states=None,
    dt_softplus=False,
    dt_limit=(0.0, float("inf")),
    return_intermediate_states=False,
    intermediate_chunk_indices=None,
    state_dtype=None,
):
    """
    Argument:
        x: (seqlen, nheads, headdim)
        dt: (seqlen, nheads)
        A: (nheads)
        B: (seqlen, ngroups, dstate)
        C: (seqlen, ngroups, dstate)
        chunk_size: int
        cu_chunk_seqlens: (nchunks + 1,)
        last_chunk_indices: (batch,)
        seq_idx: (nchunks,)
        out: (seqlen, nheads, headdim) preallocated output tensor
        D: (nheads, headdim) or (nheads,)
        z: (seqlen, nheads, headdim)
        dt_bias: (nheads,)
        initial_states: (batch, nheads, headdim, dstate)
        dt_softplus: Whether to apply softplus to dt
        intermediate_chunk_indices: (N,) optional int64 tensor of chunk indices at which to
            extract intermediate SSM states. When provided, returns (final_states,
            intermediate_states) instead of just final_states.
        state_dtype: The data type of the ssm state
    Return:
        varlen_states: (batch, nheads, headdim, dstate), or
        (varlen_states, intermediate_states) if intermediate_chunk_indices is provided
    """

    assert seq_idx is not None

    varlen_states = _mamba_chunk_scan_combined_fwd(
        x,
        dt,
        A,
        B,
        C,
        chunk_size,
        out,
        D=D,
        z=z,
        dt_bias=dt_bias,
        initial_states=initial_states,
        return_intermediate_states=return_intermediate_states,
        seq_idx=seq_idx,
        cu_chunk_seqlens=cu_chunk_seqlens,
        last_chunk_indices=last_chunk_indices,
        intermediate_chunk_indices=intermediate_chunk_indices,
        dt_softplus=dt_softplus,
        dt_limit=dt_limit,
        state_dtype=state_dtype,
    )

    return varlen_states


================================================
FILE: megatron/core/ssm/ops/ssd_state_passing.py
================================================
# Copyright (c) 2024, Tri Dao, Albert Gu.
# Adapted from:
#   https://github.com/state-spaces/mamba/blob/v2.2.4/mamba_ssm/ops/triton/ssd_state_passing.py
# Adapted from vLLM project (Apache-2.0).

import torch
import triton
import triton.language as tl

from megatron.core.ssm.ops.determinism import autotune_configs


@triton.autotune(
    configs=autotune_configs(
        [
            triton.Config({"BLOCK_SIZE": 64}),
            triton.Config({"BLOCK_SIZE": 128}),
            triton.Config({"BLOCK_SIZE": 256}),
            triton.Config({"BLOCK_SIZE": 512}),
            triton.Config({"BLOCK_SIZE": 1024}),
            triton.Config({"BLOCK_SIZE": 2048}),
        ]
    ),
    key=["dim"],
)
@triton.jit
def _state_passing_fwd_kernel(
    # Pointers to matrices
    states_ptr,
    out_ptr,
    dA_cs_ptr,
    initstates_ptr,
    seq_idx_ptr,
    cu_chunk_seqlens_ptr,
    # Matrix dimensions
    dim: tl.constexpr,
    nchunks,
    seqlen,
    chunk_size: tl.constexpr,
    # Strides
    stride_states_chunk: tl.int64,
    stride_states_head: tl.int64,
    stride_states_dim: tl.constexpr,
    stride_out_chunk: tl.int64,
    stride_out_head: tl.int64,
    stride_out_dim: tl.constexpr,
    stride_dA_cs_head: tl.int64,
    stride_dA_cs_chunk: tl.int64,
    stride_dA_cs_csize: tl.constexpr,
    stride_initstates_batch: tl.int64,
    stride_initstates_head: tl.int64,
    stride_initstates_dim: tl.constexpr,
    stride_seq_idx_chunk: tl.constexpr,
    # Meta-parameters
    HAS_INITSTATES: tl.constexpr,
    BLOCK_SIZE: tl.constexpr,
):
    pid_h = tl.program_id(axis=1)
    pid_m = tl.program_id(axis=0)

    states_ptr += pid_h * stride_states_head
    dA_cs_ptr += pid_h * stride_dA_cs_head + (chunk_size - 1) * stride_dA_cs_csize
    out_ptr += pid_h * stride_out_head

    offs_m = pid_m * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
    states_ptrs = states_ptr + offs_m * stride_states_dim
    out_ptrs = out_ptr + offs_m * stride_out_dim

    if HAS_INITSTATES:
        initstates_ptrs = (
            initstates_ptr + pid_h * stride_initstates_head + offs_m * stride_initstates_dim
        )

        states = tl.load(initstates_ptrs, mask=offs_m < dim, other=0.0).to(tl.float32)
    else:
        states = tl.zeros((BLOCK_SIZE,), dtype=tl.float32)

    prev_seq_idx = 0
    for c in range(nchunks):
        new_states = tl.load(states_ptrs, mask=offs_m < dim, other=0.0).to(tl.float32)
        dA_cs = tl.load(dA_cs_ptr).to(tl.float32)
        seq_idx = tl.load(seq_idx_ptr + c * stride_seq_idx_chunk)
        # we have started a new sequence
        if prev_seq_idx != seq_idx:
            if HAS_INITSTATES:
                initstates_ptrs = (
                    initstates_ptr
                    + seq_idx * stride_initstates_batch
                    + pid_h * stride_initstates_head
                    + offs_m * stride_initstates_dim
                )
                states = tl.load(initstates_ptrs, mask=offs_m < dim, other=0.0).to(tl.float32)
            else:
                states = tl.zeros((BLOCK_SIZE,), dtype=tl.float32)

        prev_seq_idx = seq_idx
        states = tl.exp(dA_cs) * states + new_states
        tl.store(out_ptrs, states, mask=offs_m < dim)

        states_ptrs += stride_states_chunk
        dA_cs_ptr += stride_dA_cs_chunk
        out_ptrs += stride_out_chunk


def _state_passing_fwd(
    states, dA_cumsum, cu_chunk_seqlens, seq_idx, initial_states=None, out_dtype=None
):
    nchunks, nheads, dim = states.shape
    chunk_size = dA_cumsum.shape[-1]
    assert dA_cumsum.shape == (nheads, nchunks, chunk_size)
    seqlen = seq_idx.shape[-1]
    out_dtype = states.dtype if out_dtype is None else out_dtype
    out = torch.empty((nchunks, nheads, dim), device=states.device, dtype=out_dtype)

    initial_states_strides = (
        (initial_states.stride(0), initial_states.stride(1), initial_states.stride(2))
        if initial_states is not None
        else (0, 0, 0)
    )

    grid = lambda META: (triton.cdiv(dim, META["BLOCK_SIZE"]), nheads)
    with torch.cuda.device(states.device.index):
        _state_passing_fwd_kernel[grid](
            states_ptr=states,
            out_ptr=out,
            dA_cs_ptr=dA_cumsum,
            initstates_ptr=initial_states,
            seq_idx_ptr=seq_idx,
            cu_chunk_seqlens_ptr=cu_chunk_seqlens,
            dim=dim,
            nchunks=nchunks,
            seqlen=seqlen if seq_idx is not None else 0,
            chunk_size=chunk_size if seq_idx is not None else 0,
            stride_states_chunk=states.stride(0),
            stride_states_head=states.stride(1),
            stride_states_dim=states.stride(2),
            stride_out_chunk=out.stride(0),
            stride_out_head=out.stride(1),
            stride_out_dim=out.stride(2),
            stride_dA_cs_head=dA_cumsum.stride(0),
            stride_dA_cs_chunk=dA_cumsum.stride(1),
            stride_dA_cs_csize=dA_cumsum.stride(2),
            stride_initstates_batch=initial_states_strides[0],
            stride_initstates_head=initial_states_strides[1],
            stride_initstates_dim=initial_states_strides[2],
            stride_seq_idx_chunk=seq_idx.stride(0),
            HAS_INITSTATES=initial_states is not None,
        )
    return out


================================================
FILE: megatron/core/ssm/triton_cache_manager.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
# Copyright 2018-2020 Philippe Tillet
# Copyright 2020-2022 OpenAI

# Some of this code was adopted from https://github.com/triton-lang/triton
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import os
import uuid
from pathlib import Path

try:
    from triton import __version__ as triton_version
    from triton.runtime.cache import FileCacheManager
except ImportError:
    raise ImportError("triton is required by the Mamba model but cannot be imported")


def _version_no_greater_than(version, version_limit):
    major, minor, _ = map(int, version.split('.'))
    limit_major, limit_minor = map(int, version_limit.split('.'))
    return major < limit_major or (major == limit_major and minor <= limit_minor)


def default_cache_dir():
    """Provides a default path for the Triton cache directory."""
    return os.path.join(Path.home(), ".triton", "cache")


class ParallelFileCacheManager(FileCacheManager):
    """
    This patched version of ParallelFileCacheManager prevents errors related
    to the builing of the Triton compiler cache when the number of model
    parallel ranks is greater than one, including when certain types of file
    system are used (such as Lustre).

    Usage:
    export TRITON_CACHE_DIR=<chosen-cache-location>
    export TRITON_CACHE_MANAGER=megatron.core.ssm.triton_cache_manager:ParallelFileCacheManager

    This patch implements the changes in the following two Triton project pull
    requests:
    1. https://github.com/triton-lang/triton/pull/3544
    2. https://github.com/triton-lang/triton/pull/4295

    The above changes will probably be included in Triton release version 3.2,
    making this patch no longer necessary.
    """

    def put(self, data, filename, binary=True) -> str:
        """A patched version of put, implementing PR 3544 and PR 4295."""
        patch_limit = '3.1'
        assert _version_no_greater_than(triton_version, patch_limit), (
            "Assertion failed: ParallelFileCacheManager patch should not be "
            f"used beyond Triton version {patch_limit}."
        )
        if not self.cache_dir:
            raise RuntimeError("Could not create or locate cache dir")
        binary = isinstance(data, bytes)
        if not binary:
            data = str(data)
        assert self.lock_path is not None
        filepath = self._make_path(filename)
        # Random ID to avoid any collisions
        rnd_id = str(uuid.uuid4())
        # we use the PID in case a bunch of these around so we can see what PID made it
        pid = os.getpid()
        # use temp dir to be robust against program interruptions
        temp_dir = os.path.join(self.cache_dir, f"tmp.pid_{pid}_{rnd_id}")
        os.makedirs(temp_dir, exist_ok=True)
        temp_path = os.path.join(temp_dir, filename)

        mode = "wb" if binary else "w"
        with open(temp_path, mode) as f:
            f.write(data)
        # Replace is guaranteed to be atomic on POSIX systems if it succeeds
        # so filepath cannot see a partial write
        os.replace(temp_path, filepath)
        os.removedirs(temp_dir)
        return filepath


================================================
FILE: megatron/core/tensor_parallel/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from .cross_entropy import vocab_parallel_cross_entropy
from .data import broadcast_data
from .inference_layers import (
    InferenceColumnParallelLinear,
    InferenceLayerNormColumnParallelLinear,
    InferenceRowParallelLinear,
)
from .layers import (
    ColumnParallelLinear,
    RowParallelLinear,
    VocabParallelEmbedding,
    copy_tensor_model_parallel_attributes,
    linear_with_grad_accumulation_and_async_allreduce,
    param_is_not_tensor_parallel_duplicate,
    set_defaults_if_not_set_tensor_model_parallel_attributes,
    set_tensor_model_parallel_attributes,
)
from .mappings import (
    all_gather_last_dim_from_tensor_parallel_region,
    all_to_all,
    all_to_all_hp2sp,
    all_to_all_sp2hp,
    copy_to_tensor_model_parallel_region,
    gather_from_sequence_parallel_region,
    gather_from_tensor_model_parallel_region,
    reduce_from_tensor_model_parallel_region,
    reduce_scatter_last_dim_to_tensor_parallel_region,
    reduce_scatter_to_sequence_parallel_region,
    scatter_to_sequence_parallel_region,
    scatter_to_tensor_model_parallel_region,
)
from .random import (
    CheckpointWithoutOutput,
    checkpoint,
    convert_cuda_rng_state,
    get_cuda_rng_tracker,
    get_data_parallel_rng_tracker_name,
    get_expert_parallel_rng_tracker_name,
    is_graph_safe_cuda_rng_tracker,
    model_parallel_cuda_manual_seed,
)
from .utils import (
    gather_split_1d_tensor,
    split_tensor_along_last_dim,
    split_tensor_into_1d_equal_chunks,
)

__all__ = [
    # cross_entropy.py
    "vocab_parallel_cross_entropy",
    # data.py
    "broadcast_data",
    # layers.py
    "ColumnParallelLinear",
    "RowParallelLinear",
    "VocabParallelEmbedding",
    "set_tensor_model_parallel_attributes",
    "set_defaults_if_not_set_tensor_model_parallel_attributes",
    "copy_tensor_model_parallel_attributes",
    "param_is_not_tensor_parallel_duplicate",
    "linear_with_grad_accumulation_and_async_allreduce",
    # mappings.py
    "copy_to_tensor_model_parallel_region",
    "gather_from_tensor_model_parallel_region",
    "gather_from_sequence_parallel_region",
    "reduce_from_tensor_model_parallel_region",
    "reduce_scatter_to_sequence_parallel_region",
    "scatter_to_tensor_model_parallel_region",
    "scatter_to_sequence_parallel_region",
    # random.py
    "checkpoint",
    "convert_cuda_rng_state",
    "get_cuda_rng_tracker",
    "model_parallel_cuda_manual_seed",
    "get_expert_parallel_rng_tracker_name",
    "is_graph_safe_cuda_rng_tracker",
    "CheckpointWithoutOutput",
    # utils.py
    "split_tensor_along_last_dim",
    "split_tensor_into_1d_equal_chunks",
    "gather_split_1d_tensor",
]


================================================
FILE: megatron/core/tensor_parallel/cross_entropy.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

from typing import Tuple

import torch

from megatron.core.parallel_state import (
    get_tensor_model_parallel_group,
    get_tensor_model_parallel_rank,
    get_tensor_model_parallel_world_size,
)

from .utils import VocabUtility


class VocabParallelCrossEntropy:
    """
    Computes the Cross Entropy Loss splitting the Vocab size across tensor parallel
    ranks. This implementation is used in both fused and unfused cross entropy implementations
    """

    @staticmethod
    def calculate_logits_max(
        vocab_parallel_logits: torch.Tensor,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """Calculates logits_max."""

        vocab_parallel_logits = vocab_parallel_logits.float()
        # Maximum value along vocab dimension across all GPUs.
        logits_max = torch.max(vocab_parallel_logits, dim=-1)[0]

        return vocab_parallel_logits, logits_max

    @staticmethod
    def calculate_predicted_logits(
        vocab_parallel_logits: torch.Tensor,
        target: torch.Tensor,
        logits_max: torch.Tensor,
        vocab_start_index: int,
        vocab_end_index: int,
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
        """Calculates predicted logits."""

        # In-place subtraction reduces memory pressure.
        vocab_parallel_logits -= logits_max.unsqueeze(dim=-1)

        # Create a mask of valid vocab ids (1 means it needs to be masked).
        target_mask = (target < vocab_start_index) | (target >= vocab_end_index)
        masked_target = target.clone() - vocab_start_index
        masked_target[target_mask] = 0

        # Get predicted-logits = logits[target].
        # For Simplicity, we convert logits to a 2-D tensor with size
        # [*, partition-vocab-size] and target to a 1-D tensor of size [*].
        partition_vocab_size = vocab_parallel_logits.size()[-1]
        logits_2d = vocab_parallel_logits.view(-1, partition_vocab_size)
        masked_target_1d = masked_target.view(-1)
        arange_1d = torch.arange(start=0, end=logits_2d.size()[0], device=logits_2d.device)
        predicted_logits_1d = logits_2d[arange_1d, masked_target_1d]
        predicted_logits_1d = predicted_logits_1d.clone().contiguous()
        predicted_logits = predicted_logits_1d.view_as(target)
        predicted_logits[target_mask] = 0.0

        exp_logits = vocab_parallel_logits
        torch.exp(vocab_parallel_logits, out=exp_logits)
        sum_exp_logits = exp_logits.sum(dim=-1)

        return target_mask, masked_target_1d, predicted_logits, sum_exp_logits, exp_logits

    @staticmethod
    def calculate_cross_entropy_loss(
        exp_logits: torch.Tensor, predicted_logits: torch.Tensor, sum_exp_logits: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """Calculates cross entropy loss."""

        # Loss = log(sum(exp(logits))) - predicted-logit.
        loss = torch.log(sum_exp_logits) - predicted_logits

        # Normalize and optionally smooth logits
        exp_logits.div_(sum_exp_logits.unsqueeze(dim=-1))

        return exp_logits, loss

    @staticmethod
    def prepare_gradient_calculation_operands(
        softmax: torch.Tensor, target_mask: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
        """Prepare gradient calculation operands."""

        # All the inputs have softmax as thier gradient.
        grad_input = softmax
        # For simplicity, work with the 2D gradient.
        partition_vocab_size = softmax.size()[-1]
        grad_2d = grad_input.view(-1, partition_vocab_size)

        # Add the gradient from matching classes.
        arange_1d = torch.arange(start=0, end=grad_2d.size()[0], device=grad_2d.device)

        softmax_update = 1.0 - target_mask.view(-1).float()

        return grad_2d, arange_1d, softmax_update, grad_input

    @staticmethod
    def calculate_gradients(
        grad_2d: torch.Tensor,
        arange_1d: torch.Tensor,
        masked_target_1d: torch.Tensor,
        softmax_update: torch.Tensor,
        grad_input: torch.Tensor,
        grad_output: torch.Tensor,
    ) -> torch.Tensor:
        """Calculates gradients."""

        grad_2d[arange_1d, masked_target_1d] -= softmax_update

        # Finally elementwise multiplication with the output gradients.
        grad_input.mul_(grad_output.unsqueeze(dim=-1))

        return grad_input


class _VocabParallelCrossEntropy(torch.autograd.Function):
    @staticmethod
    def forward(ctx, vocab_parallel_logits, target, label_smoothing=0.0):
        """Vocab parallel cross entropy forward function."""

        vocab_parallel_logits, logits_max = VocabParallelCrossEntropy.calculate_logits_max(
            vocab_parallel_logits
        )
        torch.distributed.all_reduce(
            logits_max, op=torch.distributed.ReduceOp.MAX, group=get_tensor_model_parallel_group()
        )

        # Get the partition's vocab indices
        get_vocab_range = VocabUtility.vocab_range_from_per_partition_vocab_size
        partition_vocab_size = vocab_parallel_logits.size()[-1]
        rank = get_tensor_model_parallel_rank()
        world_size = get_tensor_model_parallel_world_size()
        vocab_start_index, vocab_end_index = get_vocab_range(partition_vocab_size, rank, world_size)

        (target_mask, masked_target_1d, predicted_logits, sum_exp_logits, exp_logits) = (
            VocabParallelCrossEntropy.calculate_predicted_logits(
                vocab_parallel_logits, target, logits_max, vocab_start_index, vocab_end_index
            )
        )

        # All reduce is needed to get the chunks from other GPUs.
        torch.distributed.all_reduce(
            predicted_logits,
            op=torch.distributed.ReduceOp.SUM,
            group=get_tensor_model_parallel_group(),
        )

        torch.distributed.all_reduce(
            sum_exp_logits,
            op=torch.distributed.ReduceOp.SUM,
            group=get_tensor_model_parallel_group(),
        )

        exp_logits, loss = VocabParallelCrossEntropy.calculate_cross_entropy_loss(
            exp_logits, predicted_logits, sum_exp_logits
        )

        vocab_size = exp_logits.size(-1)
        if label_smoothing > 0:
            r"""
            We'd like to assign 1 / (K - 1) probability mass to every index that is not the ground truth.
            = (1 - alpha) * y_gt + alpha * mean(y_{i for i != gt})
            = (1 - alpha) * y_gt + (alpha / (K - 1)) * \sum_{i != gt} y_i
            = ((K - 1) * (1 - alpha) / (K - 1)) * y_gt + (alpha / (K - 1)) * \sum_{i != gt} y_i
            = (K * (1 - alpha) - 1) / (K - 1)) * y_gt  + (alpha / (K - 1)) * \sum_{i} y_i
            = (1 - (alpha * K) / (K - 1)) * y_gt + ( (alpha * K) / (K - 1) ) * \sum_{i} y_i / K
            From: https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/common/losses/smoothed_cross_entropy.py
            """  # pylint: disable=line-too-long
            assert 1.0 > label_smoothing > 0.0
            smoothing = label_smoothing * vocab_size / (vocab_size - 1)

            # Exp logits at this point are normalized probabilities.
            # So we can just take the log to get log-probs.
            log_probs = torch.log(exp_logits)
            mean_log_probs = log_probs.mean(dim=-1)
            loss = (1.0 - smoothing) * loss - smoothing * mean_log_probs

        ctx.label_smoothing, ctx.vocab_size = label_smoothing, vocab_size

        # Store softmax, target-mask and masked-target for backward pass.
        ctx.save_for_backward(exp_logits, target_mask, masked_target_1d)

        return loss

    @staticmethod
    def backward(ctx, grad_output):
        """Vocab parallel cross entropy backward function."""

        # Retreive tensors from the forward path.
        softmax, target_mask, masked_target_1d = ctx.saved_tensors
        label_smoothing, vocab_size = ctx.label_smoothing, ctx.vocab_size

        (grad_2d, arange_1d, softmax_update, grad_input) = (
            VocabParallelCrossEntropy.prepare_gradient_calculation_operands(softmax, target_mask)
        )

        if label_smoothing > 0:
            smoothing = label_smoothing * vocab_size / (vocab_size - 1)
            grad_2d[arange_1d, masked_target_1d] -= (1.0 - smoothing) * softmax_update
            average_grad = 1 / vocab_size
            grad_2d[arange_1d, :] -= smoothing * average_grad

            # Finally elementwise multiplication with the output gradients.
            grad_input.mul_(grad_output.unsqueeze(dim=-1))
        else:
            grad_input = VocabParallelCrossEntropy.calculate_gradients(
                grad_2d, arange_1d, masked_target_1d, softmax_update, grad_input, grad_output
            )

        return grad_input, None, None


def vocab_parallel_cross_entropy(vocab_parallel_logits, target, label_smoothing=0.0):
    """
    Performs cross entropy loss when logits are split across tensor parallel ranks

    Args:
        vocab_parallel_logits: logits split across tensor parallel ranks
            dimension is [sequence_length, batch_size, vocab_size/num_parallel_ranks]

        target: correct vocab ids of dimseion [sequence_length, micro_batch_size]

        label_smoothing: smoothing factor, must be in range [0.0, 1.0)
                         default is no smoothing (=0.0)
    """
    return _VocabParallelCrossEntropy.apply(vocab_parallel_logits, target, label_smoothing)


================================================
FILE: megatron/core/tensor_parallel/data.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

import torch

from megatron.core.utils import get_tensor_model_parallel_group_if_none

_MAX_DATA_DIM = 5


def _check_data_types(keys, data, target_dtype):
    """Check that all the keys have the same target data type."""
    for key in keys:
        assert (
            data[key].dtype == target_dtype
        ), '{} has data type {} which ' 'is different than {}'.format(
            key, data[key].dtype, target_dtype
        )


def _build_key_size_numel_dictionaries(keys, data, tp_group=None):
    """Build the size on rank 0 and broadcast."""
    tp_group = get_tensor_model_parallel_group_if_none(tp_group)
    max_dim = _MAX_DATA_DIM
    sizes = [0 for _ in range(max_dim) for _ in keys]

    # Pack the sizes on rank zero.
    if tp_group.rank() == 0:
        offset = 0
        for key in keys:
            assert data[key].dim() < max_dim, 'you should increase MAX_DATA_DIM'
            size = data[key].size()
            for i, s in enumerate(size):
                sizes[i + offset] = s
            offset += max_dim

    # Move to GPU and broadcast.
    sizes_cuda = torch.tensor(sizes, dtype=torch.long, device='cuda')
    group_ranks = torch.distributed.get_process_group_ranks(group=tp_group)
    torch.distributed.broadcast(sizes_cuda, group_ranks[0], group=tp_group)

    # Move back to cpu and unpack.
    sizes_cpu = sizes_cuda.cpu()
    key_size = {}
    key_numel = {}
    total_numel = 0
    offset = 0
    for key in keys:
        i = 0
        size = []
        numel = 1
        while sizes_cpu[offset + i] > 0:
            this_size = sizes_cpu[offset + i]
            size.append(this_size)
            numel *= this_size
            i += 1
        key_size[key] = size
        key_numel[key] = numel
        total_numel += numel
        offset += max_dim

    return key_size, key_numel, total_numel


def broadcast_data(keys, data, datatype, tp_group=None):
    """Broadcast data from rank zero of each model parallel group to the
    members of the same model parallel group.

    Args:
        keys: list of keys in the data disctionary to be broadcasted
        data: data dictionary of string keys and cpu tensor values.
        datatype: torch data type of all tensors in data associated
                  with keys.
        tp_group: the tensor model parallel group to broadcast to.
    """
    # Build (key, size) and (key, number of elements) dictionaries along
    # with the total number of elements on all ranks.
    key_size, key_numel, total_numel = _build_key_size_numel_dictionaries(keys, data)
    tp_group = get_tensor_model_parallel_group_if_none(tp_group)
    # Pack on rank zero.
    if tp_group.rank() == 0:
        # Check that all keys have the same data type.
        _check_data_types(keys, data, datatype)
        # Flatten the data associated with the keys
        flatten_data = torch.cat([data[key].cuda().contiguous().view(-1) for key in keys], dim=0)
    else:
        flatten_data = torch.empty(total_numel, device=torch.cuda.current_device(), dtype=datatype)

    # Broadcast
    group_ranks = torch.distributed.get_process_group_ranks(group=tp_group)
    torch.distributed.broadcast(flatten_data, group_ranks[0], group=tp_group)

    # Unpack
    output = {}
    offset = 0
    for key in keys:
        size = key_size[key]
        numel = key_numel[key]
        output[key] = flatten_data.narrow(0, offset, numel).view(size)
        offset += numel

    return output


================================================
FILE: megatron/core/tensor_parallel/inference_layers.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
from typing import Callable, Optional, Tuple, Union

import torch
import torch.distributed as dist

from megatron.core.extensions.transformer_engine import (
    TEColumnParallelLinear,
    TELayerNormColumnParallelLinear,
    TERowParallelLinear,
)
from megatron.core.inference.communication.torch_symm_triton import (
    are_tensors_nvls_eligible,
    fused_multimem_rs_add_norm_ag,
    multimem_all_gather,
    multimem_reduce_scatter,
)
from megatron.core.inference.quantization.mxfp8_tensor import MXFP8Tensor
from megatron.core.inference.quantization.utils import mm_mxfp8
from megatron.core.inference.symmetric_memory import SymmetricMemoryManager
from megatron.core.model_parallel_config import ModelParallelConfig
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.utils import get_tensor_model_parallel_group_if_none

try:
    import transformer_engine.pytorch.cpp_extensions as tex
    from transformer_engine.pytorch.constants import TE_DType
    from transformer_engine.pytorch.distributed import (
        gather_along_first_dim,
        reduce_scatter_along_first_dim,
    )

    HAVE_TE = True
except ImportError:
    HAVE_TE = False


def _te_rms_norm_kernel(x: torch.Tensor, weight: torch.Tensor, eps: float):
    x_shape = x.shape
    x = x.view(-1, x.size(-1))
    out, _, _ = tex.rmsnorm_fwd(
        x, weight, eps, None, None, TE_DType[x.dtype], 16, False  # sm-margin  # zero centered gamma
    )
    out = out.view(*x_shape[:-1], -1)
    return out.to(x.dtype)


def _apply_linear(
    x: torch.Tensor,
    weight: Union[torch.Tensor, MXFP8Tensor],
    config: TransformerConfig,
    out: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    """
    Helper to apply either MXFP8 or standard GEMM based on the configuration.
    """
    kwargs = {"out": out} if out is not None else {}
    if config.fp8_recipe == "mxfp8":
        return mm_mxfp8(x, weight, **kwargs)
    return torch.matmul(x, weight.t(), **kwargs)


class InferenceLayerNormColumnParallelLinear(TELayerNormColumnParallelLinear):
    """
    Inference optimized version of TELayerNormColumnParallelLinear.
    """

    def __init__(
        self,
        input_size: int,
        output_size: int,
        *,
        config: TransformerConfig,
        init_method: Callable,
        gather_output: bool,
        bias: bool,
        skip_bias_add: bool,
        is_expert: bool,
        stride: int = 1,
        skip_weight_param_allocation: bool = False,
        tp_comm_buffer_name: Optional[str] = None,
        tp_group: Optional[torch.distributed.ProcessGroup] = None,
    ):
        assert HAVE_TE, "--transformer-impl=inference_optimized requires transformer engine"
        super().__init__(
            input_size,
            output_size,
            config=config,
            init_method=init_method,
            gather_output=gather_output,
            bias=bias,
            skip_bias_add=skip_bias_add,
            is_expert=is_expert,
            stride=stride,
            skip_weight_param_allocation=skip_weight_param_allocation,
            tp_comm_buffer_name=tp_comm_buffer_name,
            tp_group=tp_group,
        )
        self.tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
        self.tp_size = dist.get_world_size(self.tp_group)

        assert (
            output_size % self.tp_size == 0
        ), f"output_size ({output_size}) must be divisible by tp_size ({self.tp_size})"

        self.eps = config.layernorm_epsilon

        if self.tp_size > 1:
            assert (
                config.sequence_parallel
            ), "--transformer-impl=inference_optimized requires --sequence-parallel"

        self.triton_nvls_kernels_allowed = not config.inference_disable_triton_nvls_kernels

        # Boolean to be toggled externally for skipping norm and all-gather.
        # This is used when enabling fused reduce-scatter + add + rms-norm + all-gather
        # in tensor parallelism. In this case, the preceeding RowParallelLinear layer
        # has already applied the rms-norm and all-gather.
        self.skip_norm_and_all_gather = False

    def _maybe_allocate_symmetric_buffer(self, x: torch.Tensor):
        """
        Attempt to allocate symmetric memory buffer for all-gather.
        """
        symm_mem_buffer_dims = list(x.size())
        symm_mem_buffer_dims[0] *= self.tp_size
        buf = SymmetricMemoryManager.get_buffer("tp", process_group=self.tp_group)
        symm_mem_buffer = buf.maybe_get_tensor(symm_mem_buffer_dims, dtype=x.dtype)
        return symm_mem_buffer

    def _all_gather(self, x: torch.Tensor, symm_mem_buffer: dict) -> None:
        """
        Attempt an NVLS all-gather into symmetric memory. If not possible,
        revert to torch dist (NCCL) all-gather.
        """
        if self.tp_size == 1:
            return x

        # Check input only: if input is 16-byte divisible, the output
        # (world_size * input) is too.
        can_use_nvls = (
            self.triton_nvls_kernels_allowed
            and are_tensors_nvls_eligible(x)
            and symm_mem_buffer["handle"] is not None
        )
        if can_use_nvls:
            # do multimem all gather
            multimem_all_gather(symm_mem_buffer["tensor"], x, symm_mem_buffer["handle"])
            return symm_mem_buffer["tensor"]
        else:
            # revert to torch dist (NCCL) all gather
            x, _ = gather_along_first_dim(x, process_group=self.tp_group)
            return x

    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, None]:
        """
        Forward pass.
        """
        # Necessary conditions to ensure we are executing the fused rs-add-rmsnorm-ag
        # in the preceeding RowParallelLinear layer.
        # 1. skip_norm_and_all_gather is True
        # 2. tp_size > 1
        # 3. enough symmetric memory is available - if available it already has the output

        if self.training:
            return super().forward(x)

        if self.tp_size == 1:
            x = _te_rms_norm_kernel(x=x, weight=self.layer_norm_weight, eps=self.eps)
            x = _apply_linear(x, self.weight, self.config)
            return x, None

        symm_mem_buffer = self._maybe_allocate_symmetric_buffer(x)
        is_in_fused_mode = (
            self.skip_norm_and_all_gather
            and self.tp_size > 1
            and symm_mem_buffer["handle"] is not None
        )
        if is_in_fused_mode:
            x = symm_mem_buffer["tensor"]
        else:
            x = _te_rms_norm_kernel(x=x, weight=self.layer_norm_weight, eps=self.eps)
            x = self._all_gather(x, symm_mem_buffer)

        x = _apply_linear(x, self.weight, self.config)

        return x, None


class InferenceColumnParallelLinear(TEColumnParallelLinear):
    """
    Inference optimized version of TEColumnParallelLinear.
    """

    def __init__(
        self,
        input_size: int,
        output_size: int,
        *,
        config: TransformerConfig,
        init_method: Callable,
        gather_output: bool,
        bias: bool,
        skip_bias_add: bool,
        is_expert: bool,
        stride: int = 1,
        skip_weight_param_allocation: bool = False,
        tp_comm_buffer_name: Optional[str] = None,
        tp_group: Optional[torch.distributed.ProcessGroup] = None,
    ):
        assert HAVE_TE, "--transformer-impl=inference_optimized requires transformer engine"
        super().__init__(
            input_size,
            output_size,
            config=config,
            init_method=init_method,
            gather_output=gather_output,
            bias=bias,
            skip_bias_add=skip_bias_add,
            is_expert=is_expert,
            stride=stride,
            skip_weight_param_allocation=skip_weight_param_allocation,
            tp_comm_buffer_name=tp_comm_buffer_name,
            tp_group=tp_group,
        )
        self.tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
        self.tp_size = dist.get_world_size(self.tp_group)

        assert (
            output_size % self.tp_size == 0
        ), f"output_size ({output_size}) must be divisible by tp_size ({self.tp_size})"

        if self.tp_size > 1:
            assert (
                config.sequence_parallel
            ), "--transformer-impl=inference_optimized requires --sequence-parallel"

        self.triton_nvls_kernels_allowed = not config.inference_disable_triton_nvls_kernels

    def _maybe_allocate_symmetric_buffer(self, x: torch.Tensor):
        """
        Attempt to allocate symmetric memory buffer for all-gather.
        """
        symm_mem_buffer_dims = list(x.size())
        symm_mem_buffer_dims[0] *= self.tp_size
        buf = SymmetricMemoryManager.get_buffer("tp", process_group=self.tp_group)
        symm_mem_buffer = buf.maybe_get_tensor(symm_mem_buffer_dims, dtype=x.dtype)
        return symm_mem_buffer

    def _all_gather(self, x: torch.Tensor, symm_mem_buffer: dict) -> None:
        """
        Attempt an NVLS all-gather into symmetric memory. If not possible,
        revert to torch dist (NCCL) all-gather.
        """
        if self.tp_size == 1:
            return x

        can_use_nvls = (
            self.triton_nvls_kernels_allowed
            and are_tensors_nvls_eligible(x)
            and symm_mem_buffer["handle"] is not None
        )
        if can_use_nvls:
            multimem_all_gather(symm_mem_buffer["tensor"], x, symm_mem_buffer["handle"])
            return symm_mem_buffer["tensor"]
        else:
            x, _ = gather_along_first_dim(x, process_group=self.tp_group)
            return x

    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, None]:
        """
        Forward pass.
        """
        if self.training:
            return super().forward(x)

        if self.tp_size == 1:
            x = _apply_linear(x, self.weight, self.config)
            return x, None

        symm_mem_buffer = self._maybe_allocate_symmetric_buffer(x)
        x = self._all_gather(x, symm_mem_buffer)
        x = _apply_linear(x, self.weight, self.config)

        return x, None


class InferenceRowParallelLinear(TERowParallelLinear):
    """
    Inference optimized version of TERowParallelLinear.
    """

    def __init__(
        self,
        input_size: int,
        output_size: int,
        *,
        config: ModelParallelConfig,
        init_method: Callable,
        bias: bool,
        input_is_parallel: bool,
        skip_bias_add: bool,
        is_expert: bool,
        tp_comm_buffer_name: Optional[str] = None,
        tp_group: Optional[torch.distributed.ProcessGroup] = None,
    ):
        assert HAVE_TE, "--transformer-impl=inference_optimized requires transformer engine"
        super().__init__(
            input_size,
            output_size,
            config=config,
            init_method=init_method,
            bias=bias,
            input_is_parallel=input_is_parallel,
            skip_bias_add=skip_bias_add,
            is_expert=is_expert,
            tp_comm_buffer_name=tp_comm_buffer_name,
            tp_group=tp_group,
        )
        self.tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
        self.tp_size = dist.get_world_size(self.tp_group)
        assert (
            input_size % self.tp_size == 0
        ), f"input_size ({input_size}) must be divisible by tp_size ({self.tp_size})"

        if self.tp_size > 1:
            assert (
                config.sequence_parallel
            ), "--transformer-impl=inference_optimized requires --sequence-parallel"

        self.triton_nvls_kernels_allowed = not getattr(
            config, 'inference_disable_triton_nvls_kernels', False
        )

        # Placeholder for next layer norm weights for fused
        # reduce-scatter + add + rms-norm + all-gather
        self.next_layer_norm_weights = None
        self.config = config

    def _matmul_reduce_scatter(self, x, residual=None):
        """
        Multiplies x by the weight matrix and performs a reduce-scatter.
        It will first try to write the matmul output to symmetric memory
        and perform an NVLS multicast reduce-scatter. If that is not possible,
        it will revert to torch.dist (NCCL) reduce-scatter.
        """
        use_mxfp8 = self.config.fp8_recipe == "mxfp8"
        symm_mem_buffer_dims = list(x.size())
        if use_mxfp8:
            # Remove batch dimension for FlashInfer mxfp8
            del symm_mem_buffer_dims[1]
        symm_mem_buffer_dims[-1] = self.weight.size(0)
        buf = SymmetricMemoryManager.get_buffer("tp", process_group=self.tp_group)
        symm_mem_buffer = buf.maybe_get_tensor(symm_mem_buffer_dims, dtype=x.dtype)

        # RS requires bf16 (hardware multimem reduce is bf16-only).
        # Check the matmul output shape: if it is NVLS-eligible, the RS output
        # (world_size times smaller on dim 0) is too.
        can_use_nvls = (
            self.triton_nvls_kernels_allowed
            and x.dtype == torch.bfloat16
            and are_tensors_nvls_eligible(x)
            and symm_mem_buffer["handle"] is not None
        )

        if can_use_nvls:
            # Write output of matmul directly onto the symmetric memory buffer

            x = _apply_linear(x, self.weight, self.config, out=symm_mem_buffer["tensor"])

            # perform nvls reduce-scatter
            if self.next_layer_norm_weights is None:
                output_dims = list(x.size())
                output_dims[0] = x.size(0) // self.tp_size
                output = torch.empty(output_dims, dtype=x.dtype, device=x.device)
                multimem_reduce_scatter(output, x, symm_mem_buffer["handle"])
                return output
            else:
                assert hasattr(self, "residual"), (
                    "For fused reduce-scatter + add + rms-norm + all-gather, "
                    "residual must be set via _set_residual()"
                )
                residual = self.residual
                fused_multimem_rs_add_norm_ag(
                    residual,
                    symm_mem_buffer["tensor"],
                    symm_mem_buffer["handle"],
                    residual,
                    self.next_layer_norm_weights,
                    self.config.layernorm_epsilon,
                )
                # 1. Residual has the output of the reduce-scatter + residual add
                #    Care must be taken in the model definition, so as to not apply the
                #    residual again.
                # 2. The output of the full reduce-scatter + add + rms-norm + all-gather is
                #    written into symm_mem_buffer["tensor"] and will be accessible there.
                return residual
        else:
            # revert to torch dist (NCCL) reduce-scatter
            x = _apply_linear(x, self.weight, self.config)
            x, _ = reduce_scatter_along_first_dim(x, tp_group=self.tp_group)
        return x

    def _set_next_layer_norm_weights(self, weights: torch.Tensor):
        """
        Set next layer norm weights for fused reduce-scatter + add + rms-norm + all-gather.
        """
        self.next_layer_norm_weights = weights

    def _set_residual(self, residual: torch.Tensor):
        """
        Set residual for fused reduce-scatter + add + rms-norm + all-gather.
        """
        self.residual = residual

    def forward(
        self, x: torch.Tensor, residual: Optional[torch.Tensor] = None
    ) -> tuple[torch.Tensor, None]:
        """
        Forward pass.
        """
        if self.training:
            return super().forward(x)

        if self.tp_size == 1:
            x = _apply_linear(x, self.weight, self.config)
            return x, None
        else:
            x = self._matmul_reduce_scatter(x)
            return x, None


================================================
FILE: megatron/core/tensor_parallel/layers.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

# Parts of the code here are adapted from PyTorch
# repo: https://github.com/pytorch/pytorch
from __future__ import annotations

import os
import warnings
from functools import partial
from typing import Any, Callable, List, Optional, Tuple

import torch
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from typing_extensions import override

from megatron.core.model_parallel_config import ModelParallelConfig
from megatron.core.parallel_state import (
    get_global_memory_buffer,
    get_tensor_model_parallel_rank,
    get_tensor_model_parallel_world_size,
)
from megatron.core.utils import (
    divide,
    get_pg_rank,
    get_pg_size,
    get_tensor_model_parallel_group_if_none,
    is_torch_min_version,
    make_tp_sharded_tensor_for_checkpoint,
    prepare_input_tensors_for_wgrad_compute,
)

from ..dist_checkpointing.mapping import ShardedStateDict
from ..transformer.utils import make_sharded_tensors_for_checkpoint
from .mappings import (
    copy_to_tensor_model_parallel_region,
    gather_from_sequence_parallel_region,
    gather_from_tensor_model_parallel_region,
    reduce_from_tensor_model_parallel_region,
    reduce_scatter_to_sequence_parallel_region,
    scatter_to_tensor_model_parallel_region,
)
from .random import get_cuda_rng_tracker, get_expert_parallel_rng_tracker_name
from .utils import VocabUtility

_grad_accum_fusion_available = True
try:
    import fused_weight_gradient_mlp_cuda
except ImportError:
    _grad_accum_fusion_available = False

try:
    import transformer_engine  # pylint: disable=unused-import
    from transformer_engine.pytorch.module.base import get_dummy_wgrad

    HAVE_TE = True
except ImportError:
    HAVE_TE = False

_MODEL_PARALLEL_ATTRIBUTE_DEFAULTS = {
    "expert_tp": False,
    "is_qkv": False,
    "tensor_model_parallel": False,
    "partition_dim": -1,
    "partition_stride": 1,
}

try:
    if is_torch_min_version("2.4.0a0"):
        custom_fwd = partial(torch.amp.custom_fwd, device_type="cuda")
        custom_bwd = partial(torch.amp.custom_bwd, device_type="cuda")
    else:
        custom_fwd = torch.cuda.amp.custom_fwd
        custom_bwd = torch.cuda.amp.custom_bwd
except:
    custom_fwd = torch.cuda.amp.custom_fwd
    custom_bwd = torch.cuda.amp.custom_bwd

try:
    if is_torch_min_version("1.13.0"):
        dist_all_gather_func = torch.distributed.all_gather_into_tensor
        dist_reduce_scatter_func = torch.distributed.reduce_scatter_tensor
    else:
        dist_all_gather_func = torch.distributed._all_gather_base
        dist_reduce_scatter_func = torch.distributed._reduce_scatter_base
except:
    dist_all_gather_func = torch.distributed._all_gather_base
    dist_reduce_scatter_func = torch.distributed._reduce_scatter_base


def param_is_not_tensor_parallel_duplicate(param, tp_group=None):
    """Returns true if the passed-in parameter is not a duplicate parameter
    on another TP rank."""
    if hasattr(param, "tensor_model_parallel") and param.tensor_model_parallel:
        return True
    # Prefer provided tp_group when available (new explicit path).
    if tp_group is not None:
        return tp_group.rank() == 0
    # Fallback to legacy global state (back-compat).
    return get_tensor_model_parallel_rank() == 0


def set_tensor_model_parallel_attributes(tensor, is_parallel, dim, stride):
    """Sets tp attributes to tensor"""
    # Make sure the attributes are not set.
    for attribute in _MODEL_PARALLEL_ATTRIBUTE_DEFAULTS:
        assert not hasattr(tensor, attribute)
    # Set the attributes.
    setattr(tensor, "tensor_model_parallel", is_parallel)
    setattr(tensor, "partition_dim", dim)
    setattr(tensor, "partition_stride", stride)


def set_defaults_if_not_set_tensor_model_parallel_attributes(tensor):
    """Set default model parallel attributes if not set explicitly already."""

    def maybe_set(attribute, value):
        if not hasattr(tensor, attribute):
            setattr(tensor, attribute, value)

    for attribute in _MODEL_PARALLEL_ATTRIBUTE_DEFAULTS:
        maybe_set(attribute, _MODEL_PARALLEL_ATTRIBUTE_DEFAULTS[attribute])


def copy_tensor_model_parallel_attributes(destination_tensor, source_tensor):
    """Copy model parallel attributes from one tensor to another."""

    def maybe_copy(attribute):
        if hasattr(source_tensor, attribute):
            setattr(destination_tensor, attribute, getattr(source_tensor, attribute))

    for attribute in _MODEL_PARALLEL_ATTRIBUTE_DEFAULTS:
        maybe_copy(attribute)


def _initialize_affine_weight_gpu(weight, init_method, partition_dim, stride=1, is_expert=False):
    """Initialize affine weight for model parallel on GPU."""

    set_tensor_model_parallel_attributes(
        tensor=weight, is_parallel=True, dim=partition_dim, stride=stride
    )

    if not is_expert:
        with get_cuda_rng_tracker().fork():
            init_method(weight)
    else:
        with get_cuda_rng_tracker().fork(get_expert_parallel_rng_tracker_name()):
            init_method(weight)


def _initialize_affine_weight_cpu(
    weight,
    output_size,
    input_size,
    per_partition_size,
    partition_dim,
    init_method,
    stride=1,
    return_master_weight=False,
    *,
    params_dtype=torch.float32,
    rank=None,
    world_size=None,
    skip_set_tensor_parallel_attributes=False,
):
    """Initialize affine weight for model parallel.

    Build the master weight on all processes and scatter
    the relevant chunk."""

    if not skip_set_tensor_parallel_attributes:
        set_tensor_model_parallel_attributes(
            tensor=weight, is_parallel=True, dim=partition_dim, stride=stride
        )

    # Initialize master weight
    master_weight = torch.empty(output_size, input_size, dtype=torch.float, requires_grad=False)
    init_method(master_weight)
    master_weight = master_weight.to(dtype=params_dtype)
    # Split and copy
    per_partition_per_stride_size = divide(per_partition_size, stride)
    weight_list = torch.split(master_weight, per_partition_per_stride_size, dim=partition_dim)
    if rank is None:
        rank = get_tensor_model_parallel_rank()
        world_size = get_tensor_model_parallel_world_size()
    my_weight_list = weight_list[rank::world_size]

    with torch.no_grad():
        # all tensors must live on the same device
        cpu_weight = torch.cat(my_weight_list, dim=partition_dim).to_dense()
        weight.data.copy_(cpu_weight)
    if return_master_weight:
        return master_weight
    return None


class VocabParallelEmbedding(torch.nn.Module):
    """Embedding parallelized in the vocabulary dimension.

    This is mainly adapted from torch.nn.Embedding and all the default
    values are kept.

    Args:
        num_embeddings: vocabulary size.
        embedding_dim: size of hidden state.
        reduce_scatter_embeddings: Decides whether to perform ReduceScatter after embedding lookup

    Keyword Args:
        config: A megatron.core.ModelParallelConfig object
    """

    def __init__(
        self,
        num_embeddings: int,
        embedding_dim: int,
        *,
        init_method: Callable,
        reduce_scatter_embeddings: bool = False,
        config: ModelParallelConfig,
        tp_group: Optional[torch.distributed.ProcessGroup] = None,
    ):
        super(VocabParallelEmbedding, self).__init__()
        # Keep the input dimensions.
        self.num_embeddings = num_embeddings
        self.embedding_dim = embedding_dim
        self.reduce_scatter_embeddings = reduce_scatter_embeddings
        self.tp_group = tp_group

        self.tp_group = get_tensor_model_parallel_group_if_none(self.tp_group)

        (self.vocab_start_index, self.vocab_end_index) = (
            VocabUtility.vocab_range_from_global_vocab_size(
                self.num_embeddings, get_pg_rank(self.tp_group), get_pg_size(self.tp_group)
            )
        )
        self.num_embeddings_per_partition = self.vocab_end_index - self.vocab_start_index
        self.deterministic_mode = config.deterministic_mode

        # Allocate weights and initialize.
        if config.use_cpu_initialization:
            self.weight = Parameter(
                torch.empty(
                    self.num_embeddings_per_partition, self.embedding_dim, dtype=config.params_dtype
                )
            )
            if config.perform_initialization:
                _initialize_affine_weight_cpu(
                    self.weight,
                    self.num_embeddings,
                    self.embedding_dim,
                    self.num_embeddings_per_partition,
                    0,
                    init_method,
                    params_dtype=config.params_dtype,
                    rank=get_pg_rank(self.tp_group),
                    world_size=get_pg_size(self.tp_group),
                )
        else:
            self.weight = Parameter(
                torch.empty(
                    self.num_embeddings_per_partition,
                    self.embedding_dim,
                    device=torch.cuda.current_device(),
                    dtype=config.params_dtype,
                )
            )
            if config.perform_initialization:
                _initialize_affine_weight_gpu(self.weight, init_method, partition_dim=0, stride=1)

    def forward(self, input_):
        """Forward.

        Args:
            input_ (torch.Tensor): Input tensor.
        """
        if self.tp_group.size() > 1:
            # Build the mask.
            input_mask = (input_ < self.vocab_start_index) | (input_ >= self.vocab_end_index)
            # Mask the input.
            masked_input = input_.clone() - self.vocab_start_index
            masked_input[input_mask] = 0
        else:
            masked_input = input_
        # Get the embeddings.
        if self.deterministic_mode:
            output_parallel = self.weight[masked_input]
        else:
            # F.embedding currently has a non-deterministic backward function
            output_parallel = F.embedding(masked_input, self.weight)
        # Mask the output embedding.
        if self.tp_group.size() > 1:
            output_parallel[input_mask, :] = 0.0

        if self.reduce_scatter_embeddings:
            # Data format change to avoid explicit tranposes : [b s h] --> [s b h].
            output_parallel = output_parallel.transpose(0, 1).contiguous()
            output = reduce_scatter_to_sequence_parallel_region(
                output_parallel, group=self.tp_group
            )
        else:
            # Reduce across all the model parallel GPUs.
            output = reduce_from_tensor_model_parallel_region(output_parallel, group=self.tp_group)
        return output

    def sharded_state_dict(
        self,
        prefix: str = "",
        sharded_offsets: Tuple[Tuple[int, int, int]] = (),
        metadata: Optional[dict] = None,
    ) -> ShardedStateDict:
        """Non-default implementation for embeddings due to `allow_shape_mismatch` param"""
        state_dict = self.state_dict(prefix="", keep_vars=True)

        weight_prefix = f"{prefix}weight"
        return {
            weight_prefix: make_tp_sharded_tensor_for_checkpoint(
                tensor=state_dict["weight"],
                key=weight_prefix,
                allow_shape_mismatch=True,
                prepend_offsets=sharded_offsets,
                tp_group=self.tp_group,
                dp_cp_group=metadata["dp_cp_group"],
            )
        }


class LinearWithFrozenWeight(torch.autograd.Function):
    """Linear operator that does not calculate gradient for weight.
    This op and LinearWithGradAccumulationAndAsyncCommunication performs
    mathematically-identical forward and DGRAD.

    Conceptually this op is the same as torch.nn.functional.linear with
    weight.requires_grad==False, but in experiments they are not identical
    mathematically."""

    @staticmethod
    @custom_fwd
    def forward(ctx, input, weight, bias, allreduce_dgrad, tp_group):
        """Forward with frozen weight."""
        ctx.save_for_backward(weight)
        ctx.allreduce_dgrad = allreduce_dgrad
        ctx.tp_group = tp_group
        output = torch.matmul(input, weight.t())
        if bias is not None:
            output = output + bias
        return output

    @staticmethod
    @custom_bwd
    def backward(ctx, grad_output):
        """Backward with frozen weight."""
        (weight,) = ctx.saved_tensors
        grad_input = grad_output.matmul(weight)

        if ctx.allreduce_dgrad:
            # All-reduce. Note: here async and sync are effectively the same.
            torch.distributed.all_reduce(grad_input, group=ctx.tp_group)

        return grad_input, None, None, None, None


def linear_with_frozen_weight(
    input: torch.Tensor,
    weight: torch.Tensor,
    bias: Optional[torch.Tensor],
    gradient_accumulation_fusion: bool,
    allreduce_dgrad: bool,
    sequence_parallel: bool,
    tp_group: Optional[torch.distributed.ProcessGroup],
    grad_output_buffer: Optional[List[torch.Tensor]] = None,
    wgrad_deferral_limit: None = None,
) -> torch.Tensor:
    """Linear layer execution with weight.requires_grad == False.

    This function handles linear layers with weight frozen (untrainable).
    In the forward, it only saves weight and does not save input activations.
    In the backward, it does not perform weight gradient calculation, or
    weight gradient allreduce.

    Args:

    input (torch.Tensor required): input like torch.nn.functional.linear

    weight (torch.Tensor required): weight like torch.nn.functional.linear

    bias (torch.Tensor optional): bias like torch.nn.functional.linear

    gradient_accumulation_fusion (bool required): dummy argument, used to
    keep the API unified between all forward implementation functions.

    allreduce_dgrad (bool, required): Do the allreduce of input gradients.
        Here, async and sync allreduce are the same. If sequence_parallel is
        True, this must be False, as no all reduce is performed.

    sequence_parallel (bool required): Indicates that sequence
        parallelism is used and thus in the forward pass the input is
        all gathered, and the backward pass the input gradients are
        reduce scattered.

    tp_group (torch.distributed.ProcessGroup): The process group to use for tensor
                                                       parallel operations.

    grad_output_buffer (List[torch.Tensor] optional): dummy argument, used to
    keep the API unified between all forward implementation functions.

    wgrad_deferral_limit (int optional): dummy argument, used to
    keep the API unified between all forward implementation functions.
    """

    assert grad_output_buffer is None, (
        "grad_output_buffer kwarg is only supported with "
        "linear_with_grad_accumulation_and_async_allreduce"
    )

    assert wgrad_deferral_limit is None, (
        "This arg is only supported with " "linear_with_grad_accumulation_and_async_allreduce"
    )

    tp_group = get_tensor_model_parallel_group_if_none(tp_group)

    if sequence_parallel:
        input = gather_from_sequence_parallel_region(
            input, tensor_parallel_output_grad=True, group=tp_group
        )
    else:
        input = input

    args = [input, weight, bias, allreduce_dgrad, tp_group]

    return LinearWithFrozenWeight.apply(*args)


class LinearWithGradAccumulationAndAsyncCommunication(torch.autograd.Function):
    """See linear_with_grad_accumulation_and_async_allreduce"""

    @staticmethod
    @custom_fwd
    def forward(
        ctx,
        input,
        weight,
        bias,
        gradient_accumulation_fusion,
        allreduce_dgrad,
        sequence_parallel,
        grad_output_buffer,
        wgrad_deferral_limit,
        tp_group,
    ):
        """Forward."""
        if gradient_accumulation_fusion and hasattr(weight, "main_grad"):
            main_grad = weight.main_grad
        else:
            main_grad = None
        ctx.save_for_backward(input, weight)
        # We can't save main_grad in save_for_backward as this module would be
        # reused across layers like MTP logits. So, to prevent in-place modification
        # checks we save the tensor in ctx.
        ctx.main_grad = main_grad
        ctx.use_bias = bias is not None
        ctx.gradient_accumulation_fusion = gradient_accumulation_fusion
        ctx.allreduce_dgrad = allreduce_dgrad
        ctx.sequence_parallel = sequence_parallel
        ctx.wgrad_deferral_limit = wgrad_deferral_limit
        ctx.grad_output_buffer = grad_output_buffer
        ctx.tp_group = tp_group

        if sequence_parallel:
            dim_size = list(input.size())
            dim_size[0] = dim_size[0] * tp_group.size()

            all_gather_buffer = get_global_memory_buffer().get_tensor(dim_size, input.dtype, "mpu")
            dist_all_gather_func(all_gather_buffer, input, group=tp_group)
            total_input = all_gather_buffer
        else:
            total_input = input

        output = torch.matmul(total_input, weight.t())
        if bias is not None:
            output = output + bias
        return output

    @staticmethod
    @custom_bwd
    def backward(ctx, grad_output):
        """Backward."""
        input, weight = ctx.saved_tensors
        main_grad = ctx.main_grad
        use_bias = ctx.use_bias
        grad_output_buffer = ctx.grad_output_buffer
        wgrad_deferral_limit = ctx.wgrad_deferral_limit
        handle = None
        tp_group = ctx.tp_group

        if ctx.gradient_accumulation_fusion:
            weight.main_grad = main_grad

        wgrad_compute = True
        if grad_output_buffer is not None:
            if wgrad_deferral_limit == 0 or len(grad_output_buffer) < wgrad_deferral_limit:
                grad_output_buffer.append(grad_output)
                wgrad_compute = False

        if wgrad_compute:
            if ctx.sequence_parallel:
                dim_size = list(input.size())
                dim_size[0] = dim_size[0] * tp_group.size()

                all_gather_buffer = get_global_memory_buffer().get_tensor(
                    dim_size, input.dtype, "mpu"
                )
                handle = dist_all_gather_func(
                    all_gather_buffer, input, group=tp_group, async_op=True
                )

                # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the
                # gather is scheduled before the input gradient computation
                total_input = all_gather_buffer
            else:
                total_input = input
        grad_input = grad_output.matmul(weight)

        if ctx.sequence_parallel and wgrad_compute:
            # pylint: disable=possibly-used-before-assignment
            handle.wait()

        if wgrad_compute:
            grad_output, total_input = prepare_input_tensors_for_wgrad_compute(
                grad_output, total_input
            )

        if ctx.allreduce_dgrad:
            # Asynchronous all-reduce
            handle = torch.distributed.all_reduce(grad_input, group=tp_group, async_op=True)
            # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the
            # all-reduce is scheduled before the weight gradient computation

        if ctx.sequence_parallel:
            assert not ctx.allreduce_dgrad
            dim_size = list(input.size())
            sub_grad_input = torch.empty(
                dim_size, dtype=input.dtype, device=torch.cuda.current_device(), requires_grad=False
            )
            # reduce_scatter
            handle = dist_reduce_scatter_func(
                sub_grad_input, grad_input, group=tp_group, async_op=True
            )
            # Here we rely on CUDA_DEVICE_MAX_CONNECTIONS=1 to ensure that the
            # reduce scatter is scheduled before the weight gradient computation

        if ctx.gradient_accumulation_fusion:
            if wgrad_compute:
                # In case of Megatron-FSDP, need to create main grad buffers in-place
                if hasattr(weight, "__fsdp_param__"):
                    weight.main_grad = weight.get_main_grad()
                    torch.matmul(grad_output.t(), total_input, out=weight.main_grad)
                else:
                    if weight.main_grad.dtype == torch.float32:
                        fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp32(
                            total_input, grad_output, weight.main_grad
                        )
                    elif weight.main_grad.dtype in (torch.float16, torch.bfloat16):
                        fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp16(
                            total_input, grad_output, weight.main_grad
                        )
                    else:
                        raise RuntimeError(
                            "Unsupported gradient type for gradient accumulation fusion"
                        )

            if hasattr(weight, "grad_added_to_main_grad"):
                # When overlap_grad_reduce is True, need to ensure that backward hooks
                # are all run on the main backprop thread to prevent deadlocks. Setup
                # dummy grad_weight tensor to prevent backward hooks from being run
                # in a background thread.
                if getattr(weight, "zero_out_wgrad", False):
                    if HAVE_TE:
                        # get_dummy_wgrad function in TE enables reuse of single dummy wgrad buffer
                        # across different layers/microbatches. The function accepts shape as list.
                        grad_weight = get_dummy_wgrad(
                            list(weight.main_grad.shape), input.dtype, zero=True
                        )
                    else:
                        grad_weight = torch.zeros(
                            weight.main_grad.shape,
                            dtype=input.dtype,
                            device=torch.cuda.current_device(),
                            requires_grad=False,
                        )
                else:
                    if HAVE_TE:
                        grad_weight = get_dummy_wgrad(list(weight.main_grad.shape), input.dtype)
                    else:
                        grad_weight = torch.empty(
                            weight.main_grad.shape,
                            dtype=input.dtype,
                            device=torch.cuda.current_device(),
                            requires_grad=False,
                        )
                weight.grad_added_to_main_grad = True
            else:
                grad_weight = None
        else:
            grad_weight = grad_output.t().matmul(total_input)
        grad_bias = grad_output.sum(dim=0) if use_bias else None

        if ctx.sequence_parallel:
            handle.wait()
            # Need to return None's as gradient has to flow for all the input arguments
            # provided during forward
            return (sub_grad_input, grad_weight, grad_bias, None, None, None, None, None, None)

        if ctx.allreduce_dgrad:
            handle.wait()

        return grad_input, grad_weight, grad_bias, None, None, None, None, None, None


def linear_with_grad_accumulation_and_async_allreduce(
    input: torch.Tensor,
    weight: torch.Tensor,
    bias: Optional[torch.Tensor],
    gradient_accumulation_fusion: bool,
    allreduce_dgrad: bool,
    sequence_parallel: bool,
    grad_output_buffer: Optional[List[torch.Tensor]] = None,
    wgrad_deferral_limit: Optional[int] = 0,
    tp_group: Optional[torch.distributed.ProcessGroup] = None,
) -> torch.Tensor:
    """Linear layer execution with asynchronous communication and
    gradient accumulation fusion in backprop.

    This has the option to accumulate the result of backprop
    calculation into an existing gradient buffer, preventing the need
    to do an additional addition kernel after the gradient
    calculation.

    Additionally, the tensor parallel all reduce of the input
    gradients can be done asynchronously with the calculation of
    the weight gradients.

    In the case of sequence parallelism, the reduce scatter of the
    input gradients is done asynchronously with the calculation of the
    weight gradients.

    Use of this module requires that the environment variable
    CUDA_DEVICE_MAX_CONNECTIONS=1. There are a few collective
    operations, noted in the code, that should be scheduled before
    compute kernels to overlap the communication with the computation,
    which is necessary for a speedup but not for correctness so that
    ordering isn't imposed by the scheduler. Setting
    CUDA_DEVICE_MAX_CONNECTIONS=1 forces the kernels to be scheduled
    in the order they are called.

    Args:
        input (torch.Tensor required): input like torch.nn.functional.linear

        weight (torch.Tensor required): weight like torch.nn.functional.linear

        bias (torch.Tensor optional): bias like torch.nn.functional.linear

        gradient_accumulation_fusion (bool required): Perform the gradient
            accumulation fusion, requires the custom CUDA extension
            fused_weight_gradient_mlp_cuda module. To use
            gradient_accumulation_fusion you must install APEX with
            --cpp_ext and --cuda_ext. For example: "pip install
            --global-option=\"--cpp_ext\" --global-option=\"--cuda_ext .\"
            " Note that the extension requires CUDA>=11. Otherwise, you
            must turn off gradient accumulation fusion."

        allreduce_dgrad (bool required): Do the allreduce of input gradients.
            The allreduce is done asynchronously with the computation of weight
            gradients. If sequence_parallel is True, this must be
            False, as no all reduce is performed.

        sequence_parallel (bool required): Indicates that sequence
            parallelism is used and thus in the forward pass the input is
            all gathered, and the backward pass the input gradients are
            reduce scattered.

        tp_group (torch.distributed.ProcessGroup required): The process group to use for tensor
                                                   parallel operations.

        grad_output_buffer (List[torch.Tensor] optional): Buffer used to save
            output gradients when embedding table wgrad compute is deferred.
            Defaults to None.

        wgrad_deferral_limit (int optional): Limit on the number of
            micro-batches for which embedding weight gradient GEMM should be
            deferred. Disable by setting this to 0. Defaults to 0.
    """

    tp_group = get_tensor_model_parallel_group_if_none(tp_group)

    args = [
        input,
        weight,
        bias,
        gradient_accumulation_fusion,
        allreduce_dgrad,
        sequence_parallel,
        grad_output_buffer,
        wgrad_deferral_limit,
        tp_group,
    ]

    if not linear_with_grad_accumulation_and_async_allreduce.warned:
        if os.environ.get("CUDA_DEVICE_MAX_CONNECTIONS") != "1":
            if sequence_parallel:
                warnings.warn(
                    "When using sequence parallelism it is recommended to set the "
                    "environment variable CUDA_DEVICE_MAX_CONNECTIONS to 1 for "
                    "maximum speedup"
                )
                linear_with_grad_accumulation_and_async_allreduce.warned = True

            if allreduce_dgrad:
                warnings.warn(
                    "When using async grad allreduce it is recommended to set the "
                    "environment variable CUDA_DEVICE_MAX_CONNECTIONS to 1 for "
                    "maximum speedup"
                )
                linear_with_grad_accumulation_and_async_allreduce.warned = True

    return LinearWithGradAccumulationAndAsyncCommunication.apply(*args)


linear_with_grad_accumulation_and_async_allreduce.warned = False


class ColumnParallelLinear(torch.nn.Module):
    """Linear layer with column parallelism.

    The linear layer is defined as Y = XA + b. A is parallelized along
    its second dimension as A = [A_1, ..., A_p].

    Args:
        input_size:
            first dimension of matrix A.
        output_size:
            second dimension of matrix A.
        bias:
            If true, add bias
        gather_output:
            If true, call all-gather on output and make Y available to all GPUs,
            otherwise, every GPU will have its output which is Y_i = XA_i
        init_method:
            method to initialize weights. Note that bias is always set to zero.
        stride:
            For the strided linear layers.
        keep_master_weight_for_test:
            This was added for testing and should be set to False. It
            returns the master weights used for initialization.
        skip_bias_add:
            If True, do not add the bias term, instead return it to be added by the
            caller. This enables performance optimizations where bias can be fused with other
            elementwise operations.
        skip_weight_param_allocation:
            If True, weight parameter is not allocated and must be passed
            as a keyword argument `weight` during the forward pass. Note that this does not
            affect bias, which will be allocated if bias is True. Defaults to False.
        embedding_activation_buffer:
            This buffer holds the input activations of the final embedding
            linear layer on the last pipeline stage when defer_embedding_wgrad_compute is enabled.
        grad_output_buffer:
            This buffer holds the gradient outputs of the final embedding linear
            layer on the last pipeline stage when defer_embedding_wgrad_compute is enabled.
        is_expert:
            If True, the layer is treated as an MoE expert layer.
        config:
            ModelParallelConfig object
        tp_comm_buffer_name:
            Communication buffer name is not used in non-Transformer-Engine modules.
        disable_grad_reduce:
            If True, reduction of output gradients across tensor-parallel ranks
            will be disabled. Defaults to False. This feature is used by Lora Adapter in Nemo to
            delay and fuse reduction along with other gradients for performance optimization.
    """

    def __init__(
        self,
        input_size,
        output_size,
        *,
        config: ModelParallelConfig,
        init_method: Callable,
        bias=True,
        gather_output=False,
        stride=1,
        keep_master_weight_for_test=False,
        skip_bias_add=False,
        skip_weight_param_allocation: bool = False,
        embedding_activation_buffer: Optional[List[torch.Tensor]] = None,
        grad_output_buffer: Optional[List[torch.Tensor]] = None,
        is_expert: bool = False,
        tp_comm_buffer_name: Optional[str] = None,  # Not used
        disable_grad_reduce: bool = False,
        tp_group: Optional[torch.distributed.ProcessGroup] = None,
    ):
        super(ColumnParallelLinear, self).__init__()

        # Keep input parameters
        self.input_size = input_size
        self.output_size = output_size
        self.gather_output = gather_output
        # Divide the weight matrix along the last dimension.
        self.skip_bias_add = skip_bias_add
        self.is_expert = is_expert
        self.expert_parallel = config.expert_model_parallel_size > 1
        self.embedding_activation_buffer = embedding_activation_buffer
        self.grad_output_buffer = grad_output_buffer
        self.config = config
        self.disable_grad_reduce = disable_grad_reduce
        self.tp_group = tp_group

        self.tp_group = get_tensor_model_parallel_group_if_none(
            self.tp_group, is_expert=self.is_expert
        )
        world_size = get_pg_size(self.tp_group)
        rank = get_pg_rank(self.tp_group)
        self.explicit_expert_comm = self.is_expert and (world_size > 1 or self.expert_parallel)
        self.output_size_per_partition = divide(output_size, world_size)

        # Parameters.
        # Note: torch.nn.functional.linear performs XA^T + b and as a result
        # we allocate the transpose.
        # Initialize weight.
        if not skip_weight_param_allocation:
            if config.use_cpu_initialization:
                self.weight = Parameter(
                    torch.empty(
                        self.output_size_per_partition, self.input_size, dtype=config.params_dtype
                    )
                )
                if config.perform_initialization:
                    self.master_weight = _initialize_affine_weight_cpu(
                        self.weight,
                        self.output_size,
                        self.input_size,
                        self.output_size_per_partition,
                        0,
                        init_method,
                        stride=stride,
                        return_master_weight=keep_master_weight_for_test,
                        rank=rank,
                        world_size=world_size,
                    )
            else:
                self.weight = Parameter(
                    torch.empty(
                        self.output_size_per_partition,
                        self.input_size,
                        device=torch.cuda.current_device(),
                        dtype=config.params_dtype,
                    )
                )
                if config.perform_initialization:
                    _initialize_affine_weight_gpu(
                        self.weight,
                        init_method,
                        partition_dim=0,
                        stride=stride,
                        is_expert=self.is_expert,
                    )

            setattr(self.weight, "allreduce", not (self.is_expert and self.expert_parallel))
        else:
            self.weight = None

        if bias:
            if config.use_cpu_initialization:
                self.bias = Parameter(
                    torch.empty(self.output_size_per_partition, dtype=config.params_dtype)
                )
            else:
                self.bias = Parameter(
                    torch.empty(
                        self.output_size_per_partition,
                        device=torch.cuda.current_device(),
                        dtype=config.params_dtype,
                    )
                )
            set_tensor_model_parallel_attributes(self.bias, True, 0, stride)
            if config.perform_initialization:
                # Always initialize bias to zero.
                with torch.no_grad():
                    self.bias.zero_()
            setattr(self.bias, "allreduce", not (self.is_expert and self.expert_parallel))
        else:
            self.register_parameter("bias", None)

        self.sequence_parallel = config.sequence_parallel
        if self.sequence_parallel and world_size <= 1:
            warnings.warn(
                "`sequence_parallel` is set to `True`, but tensor model parallel size "
                f"is {world_size}. Disabling sequence parallel."
            )
            self.sequence_parallel = False

        self.allreduce_dgrad = (
            world_size > 1 and not self.sequence_parallel and not self.disable_grad_reduce
        )

        if config.gradient_accumulation_fusion and not _grad_accum_fusion_available:
            raise RuntimeError(
                "ColumnParallelLinear was called with gradient_accumulation_fusion set "
                "to True but the custom CUDA extension fused_weight_gradient_mlp_cuda "
                "module is not found. To use gradient_accumulation_fusion you must "
                "install APEX with --cpp_ext and --cuda_ext. For example: "
                'pip install --global-option="--cpp_ext" --global-option="--cuda_ext ." '
                "Note that the extension requires CUDA>=11. Otherwise, you must turn off "
                "gradient accumulation fusion."
            )
        self.gradient_accumulation_fusion = config.gradient_accumulation_fusion

        if self.allreduce_dgrad and self.sequence_parallel:
            raise RuntimeError(
                "`allreduce_dgrad` and `sequence_parallel` cannot be enabled at the same time."
            )

        # Hook adding a default empty _extra_state for state dict
        self._register_load_state_dict_pre_hook(
            lambda state_dict, prefix, *args, **kwargs: state_dict.setdefault(
                f"{prefix}_extra_state"
            )
        )

    def _forward_impl(self, input, weight, *args, **kwargs):
        if not weight.requires_grad:
            return linear_with_frozen_weight(input, weight, *args, **kwargs)
        else:
            return linear_with_grad_accumulation_and_async_allreduce(input, weight, *args, **kwargs)

    def forward(
        self,
        input_: torch.Tensor,
        weight: Optional[torch.Tensor] = None,
        runtime_gather_output: Optional[bool] = None,
    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
        """Forward of ColumnParallelLinear

        Args:
            input_:
                3D tensor whose order of dimension is [sequence, batch, hidden]
            weight (optional):
                weight tensor to use, compulsory when skip_weight_param_allocation is True.
            runtime_gather_output (bool): Gather output at runtime. Default None means
                `gather_output` arg in the constructor will be used.

        Returns:
            - output
            - bias

        """
        if weight is None:
            if self.weight is None:
                raise RuntimeError(
                    "weight was not supplied to ColumnParallelLinear forward pass "
                    "and skip_weight_param_allocation is True."
                )
            weight = self.weight
        else:
            # Check the weight passed in is the correct shape
            expected_shape = (self.output_size_per_partition, self.input_size)
            if weight.shape != expected_shape:
                raise RuntimeError(
                    f"supplied weight's shape is {tuple(weight.shape)}, "
                    f"not {expected_shape} as expected"
                )

        bias = self.bias if not self.skip_bias_add else None

        if (
            self.allreduce_dgrad
            or self.sequence_parallel
            or self.explicit_expert_comm
            or self.disable_grad_reduce
        ):
            input_parallel = input_
        else:
            input_parallel = copy_to_tensor_model_parallel_region(input_, group=self.tp_group)

        if self.config.defer_embedding_wgrad_compute:
            if (
                self.config.wgrad_deferral_limit == 0
                or len(self.embedding_activation_buffer) < self.config.wgrad_deferral_limit
            ):
                self.embedding_activation_buffer.append(input_parallel)

        # Matrix multiply.
        allreduce_dgrad = False if self.explicit_expert_comm else self.allreduce_dgrad

        if self.config._cpu_offloading_context is not None:
            if self.config._cpu_offloading_context.inside_context is True:
                if not HAVE_TE:
                    assert (
                        self.config.cpu_offloading is False
                    ), "CPU Offloading cannot be enabled while TE is not present"
                else:
                    input_parallel.activation_offloading = self.config.cpu_offloading_activations

        output_parallel = self._forward_impl(
            input=input_parallel,
            weight=weight,
            bias=bias,
            gradient_accumulation_fusion=self.gradient_accumulation_fusion,
            allreduce_dgrad=allreduce_dgrad,
            sequence_parallel=False if self.explicit_expert_comm else self.sequence_parallel,
            grad_output_buffer=(
                self.grad_output_buffer if self.config.defer_embedding_wgrad_compute else None
            ),
            wgrad_deferral_limit=(
                self.config.wgrad_deferral_limit
                if self.config.defer_embedding_wgrad_compute
                else None
            ),
            tp_group=self.tp_group,
        )

        gather_output = self.gather_output
        # Use the runtime gather output if it's set explicitly.
        if runtime_gather_output is not None:
            gather_output = runtime_gather_output

        if gather_output:
            # All-gather across the partitions.
            output = gather_from_tensor_model_parallel_region(output_parallel, group=self.tp_group)
        else:
            output = output_parallel
        output_bias = self.bias if self.skip_bias_add else None
        return output, output_bias

    def backward_dw(self) -> None:
        """Compute weight gradients during the backward pass if delay_wgrad_compute is enabled.

        Not supported - does nothing.
        """
        pass

    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
        """Sharding along axis 0, bias sharded"""
        state_dict = self.state_dict(prefix="", keep_vars=True)
        return make_sharded_tensors_for_checkpoint(
            state_dict,
            prefix,
            {"weight": 0, "bias": 0},
            sharded_offsets,
            tp_group=self.tp_group,
            dp_cp_group=metadata['dp_cp_group'],
        )

    def set_extra_state(self, state: Any):
        """Extra state is ignored"""

    def get_extra_state(self) -> None:
        """Keep compatibility with TE state dict."""
        return None

    @override
    def extra_repr(self) -> str:
        """Extra context to add to the module's string representation."""
        tp = self.output_size // self.output_size_per_partition
        use_bias = self.bias is not None and self.bias is True
        return (
            f"in_features={self.input_size}, "
            f"out_features={self.output_size}, "
            f"bias={use_bias}, "
            f"TP={tp}"
        )


class RowParallelLinear(torch.nn.Module):
    """Linear layer with row parallelism.

    The linear layer is defined as Y = XA + b. A is parallelized along its first dimension and X
    along its second dimension. A = transpose([A_1 .. A_p]) X = [X_1, ..., X_p]

    Args:
        input_size:
            first dimension of matrix A.
        output_size:
            second dimension of matrix A.
        bias:
            If true, add bias. Note that bias is not parallelized.
        input_is_parallel:
            If true, we assume that the input is already split across the GPUs
            and we do not split again.
        init_method:
            method to initialize weights. Note that bias is always set to zero.
        stride:
            For the strided linear layers.
        keep_master_weight_for_test:
            This was added for testing and should be set to False. It returns the master weights
            used for initialization.
        skip_bias_add:
            If True, do not add the bias term, instead return it to be added by the
            caller. This enables performance optimizations where bias can be fused with other
            elementwise operations.
        is_expert:
            If True, the layer is treated as an MoE expert layer
        tp_comm_buffer_name:
            Communication buffer name. Not used in non-Transformer-Engine modules.
        config:
            ModelParallelConfig object

    """

    def __init__(
        self,
        input_size: int,
        output_size: int,
        *,
        config: ModelParallelConfig,
        init_method: Callable,
        bias: bool,
        input_is_parallel: bool,
        skip_bias_add: bool,
        stride: int = 1,
        keep_master_weight_for_test: bool = False,
        is_expert: bool = False,
        tp_comm_buffer_name: str | None = None,  # Not used
        tp_group: Optional[torch.distributed.ProcessGroup] = None,
    ):
        super(RowParallelLinear, self).__init__()

        # Keep input parameters
        self.input_size = input_size
        self.output_size = output_size
        self.input_is_parallel = input_is_parallel
        self.skip_bias_add = skip_bias_add
        self.config = config
        self.is_expert = is_expert
        self.expert_parallel = config.expert_model_parallel_size > 1
        self.gradient_accumulation_fusion = config.gradient_accumulation_fusion
        self.sequence_parallel = config.sequence_parallel
        self.tp_group = tp_group

        if self.sequence_parallel and not self.input_is_parallel:
            raise RuntimeError("To enable `sequence_parallel`, `input_is_parallel` must be `True`")

        # Divide the weight matrix along the last dimension.
        self.tp_group = get_tensor_model_parallel_group_if_none(
            self.tp_group, is_expert=self.is_expert
        )

        world_size = get_pg_size(self.tp_group)
        rank = get_pg_rank(self.tp_group)
        self.explicit_expert_comm = self.is_expert and (world_size > 1 or self.expert_parallel)

        self.input_size_per_partition = divide(input_size, world_size)

        # Parameters.
        # Note: torch.nn.functional.linear performs XA^T + b and as a result
        # we allocate the transpose.
        # Initialize weight.
        if config.use_cpu_initialization:
            self.weight = Parameter(
                torch.empty(
                    self.output_size, self.input_size_per_partition, dtype=config.params_dtype
                )
            )
            if config.perform_initialization:
                self.master_weight = _initialize_affine_weight_cpu(
                    self.weight,
                    self.output_size,
                    self.input_size,
                    self.input_size_per_partition,
                    1,
                    init_method,
                    stride=stride,
                    return_master_weight=keep_master_weight_for_test,
                    params_dtype=config.params_dtype,
                    rank=rank,
                    world_size=world_size,
                )
        else:
            self.weight = Parameter(
                torch.empty(
                    self.output_size,
                    self.input_size_per_partition,
                    device=torch.cuda.current_device(),
                    dtype=config.params_dtype,
                )
            )
            if config.perform_initialization:
                _initialize_affine_weight_gpu(
                    self.weight,
                    init_method,
                    partition_dim=1,
                    stride=stride,
                    is_expert=self.is_expert,
                )
        setattr(self.weight, "allreduce", not (self.is_expert and self.expert_parallel))

        if bias:
            if config.use_cpu_initialization:
                self.bias = Parameter(torch.empty(self.output_size, dtype=config.params_dtype))
            else:
                self.bias = Parameter(
                    torch.empty(
                        self.output_size,
                        device=torch.cuda.current_device(),
                        dtype=config.params_dtype,
                    )
                )

            if config.perform_initialization:
                # Always initialize bias to zero.
                with torch.no_grad():
                    self.bias.zero_()
            setattr(self.bias, "allreduce", not (self.is_expert and self.expert_parallel))
            setattr(self.bias, "sequence_parallel", self.sequence_parallel)
        else:
            self.register_parameter("bias", None)

        # Hook adding a default empty _extra_state for state dict
        self._register_load_state_dict_pre_hook(
            lambda state_dict, prefix, *args, **kwargs: state_dict.setdefault(
                f"{prefix}_extra_state"
            )
        )

    def _forward_impl(self, input, weight, *args, **kwargs):
        if not weight.requires_grad:
            return linear_with_frozen_weight(input, weight, *args, **kwargs)
        else:
            return linear_with_grad_accumulation_and_async_allreduce(input, weight, *args, **kwargs)

    def forward(self, input_: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
        """Forward of RowParallelLinear

        Args:
            input_: 3D tensor whose order of dimension is [sequence, batch, hidden]

        Returns:
            - output
            - bias
        """

        # Set up backprop all-reduce.
        if self.input_is_parallel:
            input_parallel = input_
        else:
            assert not self.sequence_parallel
            input_parallel = scatter_to_tensor_model_parallel_region(input_, group=self.tp_group)
        # Matrix multiply.
        allreduce_dgrad = False

        if self.config._cpu_offloading_context is not None:
            if self.config._cpu_offloading_context.inside_context is True:
                if not HAVE_TE:
                    assert (
                        self.config.cpu_offloading is False
                    ), "CPU Offloading cannot be enabled while TE is not present"
                else:
                    input_parallel.activation_offloading = self.config.cpu_offloading_activations

        output_parallel = self._forward_impl(
            input=input_parallel,
            weight=self.weight,
            bias=None,
            gradient_accumulation_fusion=self.gradient_accumulation_fusion,
            allreduce_dgrad=allreduce_dgrad,
            sequence_parallel=False,
            tp_group=None,
            grad_output_buffer=None,
        )

        # All-reduce across all the partitions.
        if self.explicit_expert_comm:
            assert self.skip_bias_add
            output_ = output_parallel
        elif self.sequence_parallel:
            output_ = reduce_scatter_to_sequence_parallel_region(
                output_parallel, group=self.tp_group
            )
        else:
            output_ = reduce_from_tensor_model_parallel_region(output_parallel, group=self.tp_group)
        if not self.skip_bias_add:
            output = (output_ + self.bias) if self.bias is not None else output_
            output_bias = None
        else:
            output = output_
            output_bias = self.bias
        return output, output_bias

    def backward_dw(self) -> None:
        """Compute weight gradients during the backward pass if delay_wgrad_compute is enabled.

        Not supported - does nothing.
        """
        pass

    def sharded_state_dict(self, prefix="", sharded_offsets=(), metadata=None):
        """Sharding along axis 1, bias not sharded"""
        state_dict = self.state_dict(prefix="", keep_vars=True)
        return make_sharded_tensors_for_checkpoint(
            state_dict,
            prefix,
            {"weight": 1},
            sharded_offsets,
            tp_group=self.tp_group,
            dp_cp_group=metadata['dp_cp_group'],
        )

    def set_extra_state(self, state: Any):
        """Extra state is ignored"""

    def get_extra_state(self) -> None:
        """Keep compatibility with TE state dict."""
        return None

    @override
    def extra_repr(self) -> str:
        """Extra context to add to the module's string representation."""
        tp = self.input_size // self.input_size_per_partition
        use_bias = self.bias is not None and self.bias is True
        return (
            f"in_features={self.input_size}, "
            f"out_features={self.output_size}, "
            f"bias={use_bias}, "
            f"TP={tp}"
        )


================================================
FILE: megatron/core/tensor_parallel/mappings.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

import torch

from megatron.core.parallel_state import get_global_memory_buffer
from megatron.core.utils import get_tensor_model_parallel_group_if_none, is_torch_min_version

from .utils import split_tensor_along_last_dim

try:
    if is_torch_min_version("1.13.0"):
        dist_all_gather_func = torch.distributed.all_gather_into_tensor
        dist_reduce_scatter_func = torch.distributed.reduce_scatter_tensor
    else:
        dist_all_gather_func = torch.distributed._all_gather_base
        dist_reduce_scatter_func = torch.distributed._reduce_scatter_base
except:
    dist_all_gather_func = torch.distributed._all_gather_base
    dist_reduce_scatter_func = torch.distributed._reduce_scatter_base


def _reduce(input_, group):
    """All-reduce the input tensor across model parallel group."""
    assert group is not None, "group should not be None"

    # Bypass the function if we are using only 1 GPU.
    if group.size() == 1:
        return input_

    # All-reduce.
    torch.distributed.all_reduce(input_.contiguous(), group=group)

    return input_


def _split_along_last_dim(input_, group):
    """Split the tensor along its last dimension and keep the
    corresponding slice."""
    assert group is not None, "group should not be None"

    world_size = group.size()
    # Bypass the function if we are using only 1 GPU.
    if world_size == 1:
        return input_

    # Split along last dimension.
    input_list = split_tensor_along_last_dim(input_, world_size)

    # Note: torch.split does not create contiguous tensors by default.
    rank = group.rank()
    output = input_list[rank].contiguous()

    return output


def _split_along_first_dim(input_, group):
    """Split the tensor along its first dimension and keep the
    corresponding slice."""
    assert group is not None, "group should not be None"

    world_size = group.size()
    # Bypass the function if we are using only 1 GPU.
    if world_size == 1:
        return input_

    # Split along first dimension.
    dim_size = input_.size()[0]
    assert (
        dim_size % world_size == 0
    ), "First dimension of the tensor should be divisible by tensor parallel size"
    local_dim_size = dim_size // world_size
    rank = group.rank()
    dim_offset = rank * local_dim_size

    output = input_[dim_offset : dim_offset + local_dim_size].contiguous()

    return output


def _gather_along_last_dim(input_, group):
    """Gather tensors and concatinate along the last dimension."""

    world_size = group.size()
    # Bypass the function if we are using only 1 GPU.
    if world_size == 1:
        return input_

    dim_size = list(input_.size())
    dim_size[0] = dim_size[0] * world_size

    output = torch.empty(dim_size, dtype=input_.dtype, device=torch.cuda.current_device())
    dist_all_gather_func(output, input_.contiguous(), group=group)
    tensor_list = output.chunk(world_size, dim=0)
    output = torch.cat(tensor_list, dim=-1).contiguous()

    return output


def _reduce_scatter_along_last_dim(input_, group):
    """Reduce-scatter tensors on the last dimension."""

    world_size = group.size()
    target_shape = list(input_.size())
    target_shape[-1] = target_shape[-1] // world_size
    input_ = input_.reshape(-1, input_.shape[-1])
    split_tensors = torch.split(
        input_, split_size_or_sections=input_.shape[-1] // world_size, dim=1
    )
    concat_tensor = torch.cat(split_tensors, dim=0)
    output = _reduce_scatter_along_first_dim(concat_tensor, group=group).reshape(target_shape)
    return output


def _gather_along_first_dim(input_, group, output_split_sizes=None, use_global_buffer=False):
    """Gather tensors and concatenate along the first dimension.

    Args:
        input_tensor (torch.Tensor):
            A tensor to be gathered.
        output_split_sizes (List[int], optional):
            A list specifying the sizes of the output splits along the first dimension.
            If None, equal splitting is assumed. Default: None.

    Returns:
        torch.Tensor: Gathered tensor.
    """

    assert group is not None, "group should not be None"
    world_size = group.size()
    # Bypass the function if we are using only 1 GPU.
    if world_size == 1:
        return input_

    dim_size = list(input_.size())
    if output_split_sizes is None:
        dim_size[0] = dim_size[0] * world_size

        if use_global_buffer:
            output = get_global_memory_buffer().get_tensor(dim_size, input_.dtype, "mpu")
        else:
            output = torch.empty(dim_size, dtype=input_.dtype, device=torch.cuda.current_device())
        dist_all_gather_func(output, input_.contiguous(), group=group)
    else:
        dim_size[0] = sum(output_split_sizes)
        if use_global_buffer:
            output = get_global_memory_buffer().get_tensor(dim_size, input_.dtype, "mpu")
        else:
            output = torch.empty(dim_size, dtype=input_.dtype, device=torch.cuda.current_device())
        output_tensor_list = list(torch.split(output, output_split_sizes, dim=0))
        torch.distributed.all_gather(output_tensor_list, input_, group=group)

    return output


def _reduce_scatter_along_first_dim(input_, group, input_split_sizes=None, use_global_buffer=False):
    """Reduce-scatter the input tensor across model parallel group.

    Args:
        input_ (torch.Tensor): The input tensor to be reduce-scattered.
        input_split_sizes (List[int], optional): A list specifying the sizes of
            the input splits along the first dimension for each rank. If None,
            equal splitting is assumed. Default: None.
    """
    assert group is not None, "group should not be None"
    world_size = group.size()
    # Bypass the function if we are using only 1 GPU.
    if world_size == 1:
        return input_

    if input_split_sizes is None:
        dim_size = list(input_.size())
        assert (
            dim_size[0] % world_size == 0
        ), "First dimension of the tensor should be divisible by tensor parallel size"

        dim_size[0] = dim_size[0] // world_size

        if use_global_buffer:
            output = get_global_memory_buffer().get_tensor(dim_size, input_.dtype, "mpu")
        else:
            output = torch.empty(dim_size, dtype=input_.dtype, device=torch.cuda.current_device())
        dist_reduce_scatter_func(output, input_.contiguous(), group=group)
    else:
        rank = group.rank()
        input_tensor_list = list(torch.split(input_, input_split_sizes, dim=0))

        if use_global_buffer:
            output = get_global_memory_buffer().get_tensor(
                input_tensor_list[rank].shape, input_.dtype, "mpu"
            )
        else:
            output = torch.empty_like(input_tensor_list[rank])
        torch.distributed.reduce_scatter(output, input_tensor_list, group=group)
    return output


class _CopyToModelParallelRegion(torch.autograd.Function):
    """Pass the input to the model parallel region."""

    @staticmethod
    def symbolic(graph, input_, group):
        """Symbolic function for tracing."""
        return input_

    @staticmethod
    def forward(ctx, input_, group):
        """Forward function."""
        ctx.group = group
        return input_

    @staticmethod
    def backward(ctx, grad_output):
        """Backward function."""
        return _reduce(grad_output, ctx.group), None


class _ReduceFromModelParallelRegion(torch.autograd.Function):
    """All-reduce the input from the model parallel region."""

    @staticmethod
    def symbolic(graph, input_, group):
        """Symbolic function for tracing."""
        return _reduce(input_, group)

    @staticmethod
    def forward(ctx, input_, group):
        """Forward function."""
        return _reduce(input_, group)

    @staticmethod
    def backward(ctx, grad_output):
        """Backward function."""
        return grad_output, None


class _ScatterToModelParallelRegion(torch.autograd.Function):
    """Split the input and keep only the corresponding chuck to the rank."""

    @staticmethod
    def symbolic(graph, input_, group):
        """Symbolic function for tracing."""
        return _split_along_last_dim(input_, group)

    @staticmethod
    def forward(ctx, input_, group):
        """Forward function."""
        ctx.group = group
        return _split_along_last_dim(input_, group)

    @staticmethod
    def backward(ctx, grad_output):
        """Backward function."""
        return _gather_along_last_dim(grad_output, ctx.group), None


class _GatherFromModelParallelRegion(torch.autograd.Function):
    """Gather the input from model parallel region and concatinate."""

    @staticmethod
    def symbolic(graph, input_, group):
        """Symbolic function for tracing."""
        return _gather_along_last_dim(input_, group)

    @staticmethod
    def forward(ctx, input_, group):
        """Forward function."""
        ctx.group = group
        return _gather_along_last_dim(input_, group)

    @staticmethod
    def backward(ctx, grad_output):
        """Backward function."""
        return _split_along_last_dim(grad_output, ctx.group), None


class _ScatterToSequenceParallelRegion(torch.autograd.Function):
    """Split the input and keep only the corresponding chuck to the rank."""

    @staticmethod
    def symbolic(graph, input_, group):
        """Symbolic function for tracing."""
        return _split_along_first_dim(input_, group)

    @staticmethod
    def forward(ctx, input_, group):
        """Forward function."""
        ctx.group = group
        return _split_along_first_dim(input_, group)

    @staticmethod
    def backward(ctx, grad_output):
        """Backward function."""
        return _gather_along_first_dim(grad_output, ctx.group), None


class _GatherFromSequenceParallelRegion(torch.autograd.Function):
    """Gather the input from sequence parallel region and concatinate."""

    @staticmethod
    def symbolic(
        graph,
        input_,
        group,
        tensor_parallel_output_grad=True,
        output_split_sizes=None,
        use_global_buffer=False,
    ):
        """Symbolic function for tracing."""
        return _gather_along_first_dim(input_, group, output_split_sizes, use_global_buffer)

    @staticmethod
    def forward(
        ctx,
        input_,
        group,
        tensor_parallel_output_grad=True,
        output_split_sizes=None,
        use_global_buffer=False,
    ):
        """Forward function."""
        ctx.tensor_parallel_output_grad = tensor_parallel_output_grad
        ctx.group = group
        ctx.output_split_sizes = output_split_sizes
        ctx.use_global_buffer = use_global_buffer
        return _gather_along_first_dim(input_, group, output_split_sizes, use_global_buffer)

    @staticmethod
    def backward(ctx, grad_output):
        """Backward function."""
        tensor_parallel_output_grad = ctx.tensor_parallel_output_grad

        # If the computation graph after the gather operation is
        # in the tensor parallel mode, output gradients need to reduce
        # scattered and whereas if the computation is duplicated,
        # output gradients need to be scattered.
        if tensor_parallel_output_grad:
            return (
                _reduce_scatter_along_first_dim(
                    grad_output, ctx.group, ctx.output_split_sizes, ctx.use_global_buffer
                ),
                None,
                None,
                None,
                None,
            )
        else:
            assert ctx.output_split_sizes is None
            return (_split_along_first_dim(grad_output, ctx.group), None, None, None, None)


class _ReduceScatterToSequenceParallelRegion(torch.autograd.Function):
    """Reduce scatter the input from the model parallel region."""

    @staticmethod
    def symbolic(graph, input_, group, input_split_sizes=None, use_global_buffer=False):
        """Symbolic function for tracing."""
        return _reduce_scatter_along_first_dim(input_, group, input_split_sizes, use_global_buffer)

    @staticmethod
    def forward(ctx, input_, group, input_split_sizes=None, use_global_buffer=False):
        """Forward function."""
        ctx.group = group
        ctx.input_split_sizes = input_split_sizes
        ctx.use_global_buffer = use_global_buffer
        return _reduce_scatter_along_first_dim(input_, group, input_split_sizes, use_global_buffer)

    @staticmethod
    def backward(ctx, grad_output):
        """Backward function."""
        input_split_sizes = ctx.input_split_sizes
        use_global_buffer = ctx.use_global_buffer
        return (
            _gather_along_first_dim(grad_output, ctx.group, input_split_sizes, use_global_buffer),
            None,
            None,
            None,
        )


class _AllGatherFromTensorParallelRegion(torch.autograd.Function):
    """Gather the input from model parallel region and concatenate."""

    @staticmethod
    def symbolic(graph, input_, group):
        """Symbolic function for tracing."""
        return _gather_along_last_dim(input_, group)

    @staticmethod
    def forward(ctx, input_, group):
        """Forward function."""
        ctx.group = group
        return _gather_along_last_dim(input_, group)

    @staticmethod
    def backward(ctx, grad_output):
        """Backward function."""
        return _reduce_scatter_along_last_dim(grad_output, ctx.group), None


class _ReduceScatterToTensorParallelRegion(torch.autograd.Function):
    """Reduce scatter the input from the model parallel region."""

    @staticmethod
    def symbolic(graph, input_, group):
        """Symbolic function for tracing."""
        return _reduce_scatter_along_last_dim(input_, group)

    @staticmethod
    def forward(ctx, input_, group):
        """Forward function."""
        ctx.group = group
        return _reduce_scatter_along_last_dim(input_, group)

    @staticmethod
    def backward(ctx, grad_output):
        """Backward function."""
        return _gather_along_last_dim(grad_output, ctx.group), None


class _AllToAll(torch.autograd.Function):
    @staticmethod
    def forward(ctx, group, input, output_split_sizes, input_split_sizes):
        """Forward function."""
        ctx.group = group
        ctx.output_split_sizes = output_split_sizes
        ctx.input_split_sizes = input_split_sizes

        world_size = group.size()
        # Bypass the function if we are using only 1 GPU.
        if world_size == 1:
            return input

        input = input.contiguous()
        if output_split_sizes is None:
            # Equal split (all2all)
            output = torch.empty_like(input)
        else:
            # Unequal split (all2all-v)
            output = input.new_empty(
                size=[sum(output_split_sizes)] + list(input.size()[1:]),
                dtype=input.dtype,
                device=torch.cuda.current_device(),
            )
        torch.distributed.all_to_all_single(
            output,
            input,
            output_split_sizes=output_split_sizes,
            input_split_sizes=input_split_sizes,
            group=group,
        )
        return output

    @staticmethod
    def backward(ctx, *grad_output):
        """Backward function."""
        return (
            None,
            _AllToAll.apply(ctx.group, *grad_output, ctx.input_split_sizes, ctx.output_split_sizes),
            None,
            None,
        )


# -----------------
# Helper functions.
# -----------------


def copy_to_tensor_model_parallel_region(input_, group=None):
    """Wrapper for autograd function: forward: copy, backward allreduce"""
    group = get_tensor_model_parallel_group_if_none(group)
    return _CopyToModelParallelRegion.apply(input_, group)


def reduce_from_tensor_model_parallel_region(input_, group=None):
    """Wrapper for autograd function: forward: all reduce, backward copy"""
    group = get_tensor_model_parallel_group_if_none(group)
    return _ReduceFromModelParallelRegion.apply(input_, group)


def scatter_to_tensor_model_parallel_region(input_, group=None):
    """Wrapper for autograd function: forward: RS, backward: AG <last dim>"""
    group = get_tensor_model_parallel_group_if_none(group)
    return _ScatterToModelParallelRegion.apply(input_, group)


def gather_from_tensor_model_parallel_region(input_, group=None):
    """Wrapper for autograd function: forward: AG, backward: split <last dim>"""
    group = get_tensor_model_parallel_group_if_none(group)
    return _GatherFromModelParallelRegion.apply(input_, group)


def scatter_to_sequence_parallel_region(input_, group=None):
    """Wrapper for autograd function: forward: split, backward: AG <last dim>"""
    group = get_tensor_model_parallel_group_if_none(group)
    return _ScatterToSequenceParallelRegion.apply(input_, group)


def gather_from_sequence_parallel_region(
    input_,
    tensor_parallel_output_grad=True,
    group=None,
    output_split_sizes=None,
    use_global_buffer=False,
):
    """Wrapper for autograd function: forward: AG, backward: RS <first dim>"""
    group = get_tensor_model_parallel_group_if_none(group)
    return _GatherFromSequenceParallelRegion.apply(
        input_, group, tensor_parallel_output_grad, output_split_sizes, use_global_buffer
    )


def reduce_scatter_to_sequence_parallel_region(
    input_, group=None, input_split_sizes=None, use_global_buffer=False
):
    """Wrapper for autograd function: forward: RS, backward AG <fisrt dim>"""
    group = get_tensor_model_parallel_group_if_none(group)
    return _ReduceScatterToSequenceParallelRegion.apply(
        input_, group, input_split_sizes, use_global_buffer
    )


def all_gather_last_dim_from_tensor_parallel_region(input_, group=None):
    """Wrapper for autograd function: forward: AG, backward RS <last dim>"""
    group = get_tensor_model_parallel_group_if_none(group)
    return _AllGatherFromTensorParallelRegion.apply(input_, group)


def reduce_scatter_last_dim_to_tensor_parallel_region(input_, group=None):
    """Wrapper for autograd function: forward: RS, backward AG: AG <last dim>"""
    group = get_tensor_model_parallel_group_if_none(group)
    return _ReduceScatterToTensorParallelRegion.apply(input_, group)


def all_to_all(group, input_, output_split_sizes_=None, input_split_sizes=None):
    """Wrapper for autograd function"""
    assert group is not None, "group should not be None"
    return _AllToAll.apply(group, input_, output_split_sizes_, input_split_sizes)


def all_to_all_sp2hp(input_, group=None):
    """
    Perform AlltoAll communication on tensor parallel group, transform the input tensor from shape
    [num_tokens/TP, H] to [num_tokens, H/TP].

    Args:
        input_ (torch.Tensor):
            The input tensor which has been distributed along the sequence
            dimension.
        group (torch.distributed.ProcessGroup, optional):
            The process group to work on. If None, the tensor model parallel group
            will be used.

    Returns:
        torch.Tensor: The output tensor with shape [num_tokens, H/TP].

    """
    group = get_tensor_model_parallel_group_if_none(group)

    world_size = group.size()
    input_ = input_.reshape(-1, input_.shape[-1])
    split_tensors = torch.split(
        input_, split_size_or_sections=input_.shape[-1] // world_size, dim=1
    )
    concat_tensor = torch.cat(split_tensors, dim=0)
    output = all_to_all(group, concat_tensor)
    return output


def all_to_all_hp2sp(input_, group=None):
    """
    Perform AlltoAll communication on tensor parallel group, transform the input tensor from shape
    [num_tokens, H/TP] to [num_tokens/TP, H].

    Args:
        input_ (torch.Tensor):
            The input tensor which has been distributed along the hidden
            dimension.
        group (torch.distributed.ProcessGroup, optional):
            The process group to work on. If None, the tensor model parallel group
            will be used.

    Returns:
        torch.Tensor: The output tensor with shape [num_tokens/TP, H].
    """
    group = get_tensor_model_parallel_group_if_none(group)

    world_size = group.size()
    input_ = input_.reshape(-1, input_.shape[-1])
    input_exchanged = all_to_all(group, input_)
    input_reshaped = input_exchanged.reshape(-1, input_exchanged.shape[-1])
    split_tensors = torch.split(
        input_reshaped, split_size_or_sections=input_reshaped.shape[0] // world_size, dim=0
    )
    output = torch.cat(split_tensors, dim=-1)
    return output


================================================
FILE: megatron/core/tensor_parallel/random.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

# Parts of the code here are adapted from PyTorch
# repo: https://github.com/pytorch/pytorch
from __future__ import annotations

import contextlib
import logging
from collections.abc import Callable
from typing import Any, Optional, TypeVar, Union

import torch
from torch import _C
from torch.cuda import _lazy_call, _lazy_init
from torch.cuda import device as device_ctx_manager
from torch.utils.checkpoint import detach_variable
from torch.utils.cpp_extension import load_inline
from typing_extensions import TypeVarTuple, Unpack

from megatron.core.parallel_state import (
    get_expert_model_parallel_rank,
    get_expert_tensor_parallel_rank,
    get_tensor_model_parallel_rank,
)
from megatron.core.utils import is_te_min_version, safely_set_viewless_tensor_data

# ---------------------------------------------------------------------------
# C++ extension: zero-copy storage sharing for CheckpointWithoutOutput
# ---------------------------------------------------------------------------
# Makes dst's UntypedStorage point to src's data WITHOUT copying bytes.
# Holds a refcounted reference to src's StorageImpl so the memory stays alive.
# Operates below the Tensor / autograd layer → no version-counter bump,
# and ALL TensorImpls that reference dst's StorageImpl (including views
# created by reshape / split / etc. inside TE GroupedLinear) see the data.
# ---------------------------------------------------------------------------

_SHARE_STORAGE_SRC = r"""
#include <torch/extension.h>

void share_storage(at::Tensor dst, at::Tensor src) {
    auto* dst_impl = dst.storage().unsafeGetStorageImpl();

    // Copy src's c10::Storage (increments StorageImpl refcount).
    auto* src_storage_ref = new c10::Storage(src.storage());

    void*       data   = src_storage_ref->data_ptr().get();
    size_t      nbytes = src_storage_ref->nbytes();
    c10::Device device = src_storage_ref->device();

    // Build a DataPtr whose deleter releases our StorageImpl reference.
    c10::DataPtr shared(
        data,
        static_cast<void*>(src_storage_ref),
        [](void* ctx) { delete static_cast<c10::Storage*>(ctx); },
        device);

    dst_impl->set_data_ptr(std::move(shared));
    dst_impl->set_nbytes(nbytes);
}
"""

_share_storage_ext = None


def _get_share_storage():
    """Lazily compile & cache the share_storage extension."""
    global _share_storage_ext
    if _share_storage_ext is None:
        _share_storage_ext = load_inline(
            name="share_storage_ext",
            cpp_sources=_SHARE_STORAGE_SRC,
            functions=["share_storage"],
            verbose=False,
        )
    return _share_storage_ext.share_storage


from .utils import gather_split_1d_tensor, split_tensor_into_1d_equal_chunks

try:
    import transformer_engine  # pylint: disable=unused-import
    from transformer_engine.pytorch.distributed import activation_recompute_forward
    from transformer_engine.pytorch.fp8 import FP8GlobalStateManager, fp8_autocast

    HAVE_TE = True
except ModuleNotFoundError:
    HAVE_TE = False


# Default name for the model parallel rng tracker.
_MODEL_PARALLEL_RNG_TRACKER_NAME = 'model-parallel-rng'
_EXPERT_PARALLEL_RNG_TRACKER_NAME = 'expert-parallel-rng'
_DATA_PARALLEL_RNG_TRACKER_NAME = 'data-parallel-rng'


def _get_cuda_rng_state(
    device: Union[int, str, torch.device] = "cuda", clone: bool = False, graph_safe: bool = False
) -> torch.Tensor:
    """Return the random number generator state of the specified GPU.

    Arguments:
        device (int): The gpu to retrieve the rng state
        clone (bool): Whether to also clone the retrieved RNG state
        graph_safe (bool): Get the rng state in a graph safe manner.

    This function is adapted from torch.cuda.random.get_rng_state()"""

    # if not using cuda graphs, just use the builtin pytorch function
    if not graph_safe:
        return torch.cuda.random.get_rng_state(device=device)

    _lazy_init()
    if isinstance(device, str):
        device = torch.device(device)
    elif isinstance(device, int):
        device = torch.device("cuda", device)
    idx = device.index
    if idx is None:
        idx = torch.cuda.current_device()

    default_generator = torch.cuda.default_generators[idx]
    if clone:
        return default_generator.clone_state()
    return default_generator.graphsafe_get_state()


def _set_cuda_rng_state(new_state: torch.Tensor, device: int = -1, graph_safe: bool = False):
    """Sets the random number generator state of the current GPU.

    Arguments:
        new_state (torch.ByteTensor): The desired state
        device (int): The gpu to retrieve the rng state
        graph_safe (bool): Set the rng state in a graph safe manner.

    This function is adapted from PyTorch repo (torch.cuda.set_rng_state)
    with a single change: the input state is not cloned. Cloning caused
    major performance issues for +4 GPU cases.
    """
    if hasattr(_C, '_cuda_setRNGState') and callable(_C._cuda_setRNGState):
        # older PyTorch
        def cb():
            with device_ctx_manager(device):
                _C._cuda_setRNGState(new_state)

    else:
        # newer PyTorch
        if device == -1:
            device = torch.device('cuda')
        elif isinstance(device, str):
            device = torch.device(device)
        elif isinstance(device, int):
            device = torch.device('cuda', device)

        def cb():
            idx = device.index
            if idx is None:
                idx = torch.cuda.current_device()
            default_generator = torch.cuda.default_generators[idx]

            # if graph capturing, set the rng state in a cudagraphable way
            if graph_safe:
                default_generator.graphsafe_set_state(new_state)
            else:
                default_generator.set_state(new_state)

    _lazy_call(cb)


def convert_cuda_rng_state(
    state: Union[torch.Tensor, torch.Generator], to_graphable: bool = False
) -> Union[torch.Tensor, torch.Generator]:
    """
    Convert the cuda rng state tensor to the graphable version,
    or from the graphable version to the non-graphable tensor version.
    """
    if to_graphable:
        if isinstance(state, torch.Tensor):
            # Convert to the graphable version.
            # Store current rng state.
            orig_cuda_rng_state = _get_cuda_rng_state(graph_safe=False)
            # Set rng state to the desired one
            _set_cuda_rng_state(state, graph_safe=False)
            # Get the graphable state
            graphable_state = _get_cuda_rng_state(clone=True, graph_safe=True)
            # And set the state to the original state we started with.
            _set_cuda_rng_state(orig_cuda_rng_state, graph_safe=False)
            return graphable_state
        elif isinstance(state, torch.Generator):
            # already graphable, just return it.
            return state
        else:
            raise ValueError(f"Invalid state type: {type(state)}")
    else:
        if isinstance(state, torch.Tensor):
            # already non-graphable, just return it.
            return state
        elif isinstance(state, torch.Generator):
            # Convert to the non-graphable tensor version.
            return state.get_state()
        else:
            raise ValueError(f"Invalid state type: {type(state)}")


def get_expert_parallel_rng_tracker_name():
    """Get the expert parallel rng tracker name"""
    global _EXPERT_PARALLEL_RNG_TRACKER_NAME
    return _EXPERT_PARALLEL_RNG_TRACKER_NAME


def get_data_parallel_rng_tracker_name():
    """Get the data parallel rng tracker name"""
    global _DATA_PARALLEL_RNG_TRACKER_NAME
    return _DATA_PARALLEL_RNG_TRACKER_NAME


class CudaRNGStatesTracker:
    """Tracker for the cuda RNG states.

    Using the `add` method, a cuda rng state is initialized based on
    the input `seed` and is assigned to `name`. Later, by forking the
    rng state, we can perform operations and return to our starting
    cuda state.
    """

    def __init__(self, use_cudagraphable_rng=False, is_inference_rng_tracker=False):
        self.reset()
        self.use_cudagraphable_rng = use_cudagraphable_rng
        self.is_inference_rng_tracker = is_inference_rng_tracker

        if self.use_cudagraphable_rng:
            assert (
                hasattr(torch.cuda.CUDAGraph, "register_generator_state")
                and hasattr(torch.Generator, "graphsafe_set_state")
                and hasattr(torch.Generator, "graphsafe_get_state")
                and hasattr(torch.Generator, "clone_state")
            ), "Tried using cudagraphs with RNG, however not detected in pytorch!"

    def is_initialized(self):
        """Checks if the internal RNG state has been set wirth set_states()."""
        return self._is_initialized

    def reset(self):
        """Set to the initial state (no tracker)."""

        # Track if initialized.
        self._is_initialized = False

        # Map from a string name to the cuda rng state.
        self.states_ = {}

        # Seeds are just for book keeping and ensure no seed is set twice.
        self.seeds_ = set()

        # Name of the rng state currently being used in the generator.
        # The default one is "default-rng" and won't be pushed to the self.states_ dictionary.
        self._current_state_name = "default-rng"

    def get_states(self):
        """Get rng states. Copy the dictionary so we have direct
        pointers to the states, not just a pointer to the dictionary."""
        states = {}
        for name in self.states_:
            states[name] = self.states_[name]
        return states

    def set_states(self, states):
        """Set the rng states. For efficiency purposes, we do not check
        the size of seed for compatibility."""
        self._is_initialized = True
        self.states_ = states

    def add(self, name, seed):
        """Track the rng state."""
        self._is_initialized = True
        # Check seed is not already used.
        if seed in self.seeds_:
            raise Exception('seed {} already exists'.format(seed))
        self.seeds_.add(seed)
        # Check that state is not already defined.
        if name in self.states_:
            raise Exception('cuda rng state {} already exists'.format(name))

        # If available, create the state in a graph safe manner
        if self.use_cudagraphable_rng:
            new_state = _get_cuda_rng_state(clone=True, graph_safe=True)
            new_state.manual_seed(seed)
            self.states_[name] = new_state
        else:
            # Get the current rng state.
            orig_rng_state = torch.cuda.get_rng_state()
            # Set the new state and store it.
            torch.cuda.manual_seed(seed)
            self.states_[name] = torch.cuda.get_rng_state()
            # Reset rng state to what it was.
            _set_cuda_rng_state(orig_rng_state)

    @contextlib.contextmanager
    def fork(self, name=_MODEL_PARALLEL_RNG_TRACKER_NAME):
        """Fork the cuda rng state, perform operations, and exit with
        the original state."""
        # Check if we have added the state
        if name not in self.states_:
            raise Exception('cuda rng state {} is not added'.format(name))
        # Store current rng state and name. Store in self.states_ if it's not the default state.
        orig_cuda_rng_state = _get_cuda_rng_state(graph_safe=self.use_cudagraphable_rng)
        orig_state_name = self._current_state_name
        if orig_state_name != "default-rng":
            self.states_[orig_state_name] = orig_cuda_rng_state
        # Set rng state and name to the desired one.
        _set_cuda_rng_state(self.states_[name], graph_safe=self.use_cudagraphable_rng)
        self._current_state_name = name
        # Record cpu RNG state
        cpu_rng_state = torch.get_rng_state()
        # Do the stuff we wanted to do.
        try:
            yield
        finally:
            # Throw a warning if cpu RNG state changed
            if not torch.all(cpu_rng_state == torch.get_rng_state()).item():
                logging.getLogger(__name__).warning('CPU RNG state changed within GPU RNG context')
            # Check if the current state name is the same as the desired state name.
            if self._current_state_name != name:
                raise Exception(
                    f'current state name {self._current_state_name} is not the same as the desired '
                    f'state name {name}.'
                )
            # Update the current rng state for later use.
            self.states_[name] = _get_cuda_rng_state(graph_safe=self.use_cudagraphable_rng)
            # And set the state and name to the original state we started with.
            if orig_state_name != "default-rng":
                orig_cuda_rng_state = self.states_[orig_state_name]
            _set_cuda_rng_state(orig_cuda_rng_state, graph_safe=self.use_cudagraphable_rng)
            self._current_state_name = orig_state_name


# RNG tracker object.
_CUDA_RNG_STATE_TRACKER = None
_CUDA_RNG_STATE_TRACKER_INITIALIZED = False


def initialize_rng_tracker(
    use_te_rng_tracker: bool = False,
    inference_rng_tracker: bool = False,
    use_cudagraphable_rng: bool = False,
    force_reset: bool = False,
):
    """Create the RNG tracker. 'use_te_rng_tracker' determines whether to use
    Megatron or TransformerEngine's implementation.
    In particular, TransformerEngine's implementation is cudagraphable and supports FP8.
    """
    global _CUDA_RNG_STATE_TRACKER
    global _CUDA_RNG_STATE_TRACKER_INITIALIZED
    if force_reset:
        _CUDA_RNG_STATE_TRACKER = None
        _CUDA_RNG_STATE_TRACKER_INITIALIZED = False

    if _CUDA_RNG_STATE_TRACKER_INITIALIZED:
        return

    # Get the base tracker class
    base_tracker = None
    if HAVE_TE and use_te_rng_tracker:
        if not is_te_min_version("1.5.0"):
            raise RuntimeError("use_te_rng_tracker requires TransformerEngine version >= 1.5")
        from megatron.core.extensions.transformer_engine import TECudaRNGStatesTracker

        base_tracker = TECudaRNGStatesTracker
        tracker_kwargs = {"is_inference_rng_tracker": inference_rng_tracker}
    else:
        base_tracker = CudaRNGStatesTracker
        tracker_kwargs = {
            "use_cudagraphable_rng": use_cudagraphable_rng,
            "is_inference_rng_tracker": inference_rng_tracker,
        }

    if inference_rng_tracker:

        class InferenceCudaRNGStatesTracker(base_tracker):  # type: ignore[valid-type, misc]
            """RNG tracker for inference."""

            def add(self, name, seed):
                """Mirrors the interface from the training RNG tracker."""
                pass

            def set_states(self, states):
                """Mirrors the interface from the training RNG tracker."""
                pass

            def fork(self, name=_MODEL_PARALLEL_RNG_TRACKER_NAME):
                """Mirrors the interface from the training RNG tracker."""
                return contextlib.nullcontext()

        tracker_class = InferenceCudaRNGStatesTracker
    else:
        tracker_class = base_tracker

    _CUDA_RNG_STATE_TRACKER = tracker_class(**tracker_kwargs)
    _CUDA_RNG_STATE_TRACKER_INITIALIZED = True


def get_cuda_rng_tracker(
    use_te_rng_tracker: bool = False,
    inference_rng_tracker: bool = False,
    use_cudagraphable_rng: bool = False,
):
    """Get cuda rng tracker."""
    initialize_rng_tracker(use_te_rng_tracker, inference_rng_tracker, use_cudagraphable_rng)
    return _CUDA_RNG_STATE_TRACKER


def get_all_rng_states():
    """Returns all generator states used by the current `CudaRNGStatesTracker`."""

    assert (
        _CUDA_RNG_STATE_TRACKER_INITIALIZED
    ), "Tried getting all rng states but RNG Tracker has not been initalized!"

    if isinstance(_CUDA_RNG_STATE_TRACKER, CudaRNGStatesTracker):
        return _CUDA_RNG_STATE_TRACKER.states_
    # If TE is installed, check if we are using TE's RNG tracker
    elif HAVE_TE and is_te_min_version("1.5.0"):
        from megatron.core.extensions.transformer_engine import TECudaRNGStatesTracker

        if isinstance(_CUDA_RNG_STATE_TRACKER, TECudaRNGStatesTracker):
            from transformer_engine.pytorch.distributed import get_all_rng_states

            return get_all_rng_states()
    # no valid tracker, return an empty dict
    else:
        return {}


def model_parallel_cuda_manual_seed(
    seed: int,
    te_rng_tracker: bool = False,
    inference_rng_tracker: bool = False,
    use_cudagraphable_rng: bool = False,
    tp_rank: Optional[int] = None,
    ep_rank: Optional[int] = None,
    etp_rank: Optional[int] = None,
    force_reset_rng: bool = False,
):
    """Initialize model parallel cuda seed.

    This function should be called after the model parallel is
    initialized. Also, no torch.cuda.manual_seed should be called
    after this function. Basically, this is replacement for that
    function.
    Three set of RNG states are tracked:
    default state: This is for data parallelism and is the same among a set of model parallel GPUs
    but different across different model parallel groups. This is used for example for dropout
    in the non-tensor-model-parallel regions.
    tensor-model-parallel state: This state is different among a set of model parallel GPUs,
    but the same across data parallel groups. This is used for example for dropout
    in model parallel regions.
    expert-parallel-seed: This state is only used for the expert layer of MoE models.
    It is different among expert-tensor and expert-model parallel GPUs, and the same
    across expert-data parallel groups.
    """
    if tp_rank is None:
        tp_rank = get_tensor_model_parallel_rank()
    if ep_rank is None:
        ep_rank = get_expert_model_parallel_rank()
    if etp_rank is None:
        etp_rank = get_expert_tensor_parallel_rank()
    # 2718 is just for fun and any POSITIVE value will work.
    offset = seed + 2718
    tensor_model_parallel_seed = offset + tp_rank
    # Data parallel gets the original seed.
    data_parallel_seed = seed

    initialize_rng_tracker(
        te_rng_tracker, inference_rng_tracker, use_cudagraphable_rng, force_reset=force_reset_rng
    )
    _CUDA_RNG_STATE_TRACKER.reset()
    # Set the default state.
    torch.cuda.manual_seed(data_parallel_seed)
    _CUDA_RNG_STATE_TRACKER.add(_DATA_PARALLEL_RNG_TRACKER_NAME, data_parallel_seed)

    # and model parallel state.
    _CUDA_RNG_STATE_TRACKER.add(_MODEL_PARALLEL_RNG_TRACKER_NAME, tensor_model_parallel_seed)

    expert_parallel_seed = seed + 1024 + 100 * ep_rank + etp_rank
    _CUDA_RNG_STATE_TRACKER.add(_EXPERT_PARALLEL_RNG_TRACKER_NAME, expert_parallel_seed)


def is_graph_safe_cuda_rng_tracker(cuda_rng_tracker):
    """Check if the cuda rng tracker is graph safe version."""
    if HAVE_TE and is_te_min_version("1.5.0"):
        from megatron.core.extensions.transformer_engine import TECudaRNGStatesTracker

        if isinstance(cuda_rng_tracker, TECudaRNGStatesTracker):
            return True
    if getattr(cuda_rng_tracker, "use_cudagraphable_rng", False):
        return True
    return False


def _get_all_rng_states():
    """Get all the rng states."""
    cpu_rng_state = torch.get_rng_state()
    cuda_rng_state = _get_cuda_rng_state(
        graph_safe=is_graph_safe_cuda_rng_tracker(get_cuda_rng_tracker())
    )
    cuda_rng_state_tracker = get_cuda_rng_tracker().get_states()
    return cpu_rng_state, cuda_rng_state, cuda_rng_state_tracker


def _set_all_rng_states(cpu_rng_state, cuda_rng_state, cuda_rng_state_tracker):
    """Set all the rng states."""
    torch.set_rng_state(cpu_rng_state)
    _set_cuda_rng_state(
        cuda_rng_state, graph_safe=is_graph_safe_cuda_rng_tracker(get_cuda_rng_tracker())
    )
    get_cuda_rng_tracker().set_states(cuda_rng_state_tracker)


@contextlib.contextmanager
def _fork_rng():
    """Fork the rng state."""
    # Store the current states.
    current_states = _get_all_rng_states()
    try:
        yield
    finally:
        # Set the states back to what it was at the start of this function.
        _set_all_rng_states(*current_states)


# Global flag that's toggled whenever inside a checkpointing context
IS_CHECKPOINTING = False


def _set_checkpointing():
    """Set state to checkpointing enabled."""
    global IS_CHECKPOINTING
    IS_CHECKPOINTING = True


def _unset_checkpointing():
    """Unset state to checkpointing enabled."""
    global IS_CHECKPOINTING
    IS_CHECKPOINTING = False


def is_checkpointing():
    """Check if currently in a checkpoint context."""
    return IS_CHECKPOINTING


_R = TypeVar('_R')
_Ts = TypeVarTuple('_Ts')


class CheckpointFunction(torch.autograd.Function):
    """Checkpoint Function

    This function is adapted from torch.utils.checkpoint with two main changes:
    1) torch.cuda.set_rng_state is replaced with `_set_cuda_rng_state`
    2) the states in the model parallel tracker are also properly tracked/set/reset.
    """

    # pylint: disable=missing-function-docstring
    @staticmethod
    def forward(
        ctx: Any,
        run_function: Callable[[Unpack[_Ts]], _R],
        distribute_saved_activations: bool,
        *args: Unpack[_Ts],
    ) -> _R:
        """Forward pass."""
        _set_checkpointing()

        ctx.run_function = run_function
        ctx.distribute_saved_activations = distribute_saved_activations

        # Copy the rng states.
        ctx.rng_states = _get_all_rng_states()

        with torch.no_grad():
            outputs = run_function(*args)

        # Divide hidden states across model parallel group and only keep
        # the chunk corresponding to the current rank.
        if distribute_saved_activations:
            ctx.input_0_shape = args[0].data.shape
            safely_set_viewless_tensor_data(
                args[0], split_tensor_into_1d_equal_chunks(args[0].data, new_buffer=True)
            )

        # Store everything.
        ctx.save_for_backward(*args)

        _unset_checkpointing()
        return outputs

    # pylint: disable=missing-function-docstring
    @staticmethod
    def backward(ctx, *args):
        """Backward pass."""
        if not torch.autograd._is_checkpoint_valid():
            raise RuntimeError(
                "Checkpointing is not compatible with .grad(), "
                "please use .backward() if possible"
            )
        _set_checkpointing()

        inputs = ctx.saved_tensors
        if ctx.distribute_saved_activations:
            safely_set_viewless_tensor_data(
                inputs[0], gather_split_1d_tensor(inputs[0].data).view(ctx.input_0_shape)
            )

        with _fork_rng():
            # Set the states to what it used to be before the forward pass.
            _set_all_rng_states(*ctx.rng_states)

            # Compute the forward pass.
            detached_inputs = detach_variable(inputs)
            with torch.enable_grad():
                outputs = ctx.run_function(*detached_inputs)

        if isinstance(outputs, torch.Tensor):
            outputs = (outputs,)

        # filter out non tensor outputs for backward pass
        outputs, args = zip(
            *filter(lambda x: torch.is_tensor(x[0]) and x[0].requires_grad, zip(outputs, args))
        )
        torch.autograd.backward(outputs, args)
        grads = tuple(inp.grad if isinstance(inp, torch.Tensor) else inp for inp in detached_inputs)

        _unset_checkpointing()
        return (None, None) + grads


def checkpoint(
    function: Callable[[Unpack[_Ts]], _R], distribute_saved_activations: bool, *args: Unpack[_Ts]
) -> _R:
    """Checkpoint a model or part of the model.
    This has been directly copied from torch.utils.checkpoint."""
    return CheckpointFunction.apply(function, distribute_saved_activations, *args)


class CheckpointWithoutOutputFunction(torch.autograd.Function):
    """
    Checkpoint Function Helper for CheckpointWithoutOutput.
    Save context for recompute.
    """

    @staticmethod
    def forward(
        ctx: Any,
        run_function: Callable[[Unpack[_Ts]], _R],
        checkpoint_without_output_obj: CheckpointWithoutOutput,
        *args: Unpack[_Ts],
    ) -> _R:
        """Forward pass."""
        if checkpoint_without_output_obj.fp8:
            fp8 = FP8GlobalStateManager.is_fp8_enabled()
            ctx.fp8 = fp8
            ctx.fp8_recipe = FP8GlobalStateManager.get_fp8_recipe() if fp8 else None
            fwd_ctx = activation_recompute_forward(activation_recompute=True, recompute_phase=False)
        else:
            ctx.fp8 = False
            ctx.fp8_recipe = None
            fwd_ctx = contextlib.nullcontext()

        with torch.no_grad(), fwd_ctx:
            outputs = run_function(*args)
        ctx.save_for_backward(*detach_variable(args))
        # the CheckpointWithoutOutput object is passed in, then it can access the saved input
        # tensors later for recomputation
        checkpoint_without_output_obj.ctx = ctx
        return outputs

    @staticmethod
    def backward(ctx, *args):
        """Backward pass."""
        # Get the inputs from the context instead of the saved tensors
        # because the saved tensors are already cached by the recomputation.
        # This is to avoid double-reloading the inputs in CPU offloading scenario.
        inputs = ctx.inputs
        outputs = ctx.outputs
        torch.autograd.backward(outputs, args)
        ctx.outputs = None
        ctx.inputs = None
        grads = tuple(inp.grad if isinstance(inp, torch.Tensor) else inp for inp in inputs)
        return (None, None) + grads


class CheckpointWithoutOutput(object):
    """
    Checkpoint a model or part of the model and release the output.

    For the normal 'checkpoint` function, the outputs of it may be saved by the following
    modules for their backward computation. However, the output of the checkpointed function is
    re-generated at recomputation, so the output store is not technically needed. This method can
    manually discard the output in the forward pass and restore it by recomputation in the
    backward pass to reduce the memory usage.

    Due to the reason above, to save memory with this method, the caller should make sure that the
    discarded output tensors are directly saved in the following modules for backward computation.
    """

    def __init__(self, fp8=False):
        self.fp8 = fp8 is not None
        self.run_function = None
        self.fwd_cpu_rng_state = None
        self.fwd_cuda_rng_state = None
        self.fwd_cuda_rng_state_tracker = None
        self.ctx = None
        self.outputs = None

    def checkpoint(self, run_function: Callable[[Unpack[_Ts]], _R], *args: Unpack[_Ts]) -> _R:
        """Checkpoint function."""

        # If in cuda graph warmup, disable checkpointing, as 'discard_output_and_register_recompute'
        # may be called in a separate graph warmup.
        from megatron.core.transformer.cuda_graphs import is_graph_warmup

        if is_graph_warmup():
            return run_function(*args)

        self.run_function = run_function

        self.rng_states = _get_all_rng_states()

        outputs = CheckpointWithoutOutputFunction.apply(run_function, self, *args)
        self.outputs = outputs
        if isinstance(self.outputs, torch.Tensor):
            self.outputs = (self.outputs,)
        return outputs

    def _recompute(self, _):
        """Used as a hook to recompute the output."""

        from megatron.core.transformer.cuda_graphs import is_graph_capturing, is_graph_warmup

        # The recomputation has been triggered already. Just return.
        # Handle cudagraphs, do nothing if currently in graph warmup
        if self.ctx is None or is_graph_warmup():
            return

        if not torch.autograd._is_checkpoint_valid() and not is_graph_capturing():
            raise RuntimeError(
                "Checkpointing is not compatible with .grad(), "
                "please use .backward() if possible"
            )

        with _fork_rng():
            _set_all_rng_states(*self.rng_states)

            if self.fp8:
                recompute_ctx = activation_recompute_forward(
                    activation_recompute=True, recompute_phase=True
                )
                fp8_ctx = fp8_autocast(enabled=self.ctx.fp8, fp8_recipe=self.ctx.fp8_recipe)
            else:
                recompute_ctx = contextlib.nullcontext()
                fp8_ctx = contextlib.nullcontext()

            # Store the inputs for backward pass
            inputs = self.ctx.saved_tensors

            def detach(t):
                if isinstance(t, torch.Tensor):
                    requires_grad = t.requires_grad
                    t = t.detach()
                    t.requires_grad_(requires_grad)
                return t

            inputs = tuple(detach(t) for t in inputs)
            with torch.enable_grad(), fp8_ctx, recompute_ctx:
                outputs = self.run_function(*inputs)

        self.run_function = None
        self.rng_states = None

        if isinstance(outputs, torch.Tensor):
            outputs = (outputs,)

        # Zero-copy: make output's StorageImpl point to recomputation_output's data.
        # This operates at the UntypedStorage level (below TensorImpl), so:
        #   - ALL views / reshapes that reference output's StorageImpl see the data
        #     (e.g. TE GroupedLinear's inp.reshape() + torch.split() saved for backward)
        #   - No tensor version-counter bump (no autograd complaint)
        share_storage = _get_share_storage()
        for output, recomputation_output in zip(self.outputs, outputs):
            share_storage(output, recomputation_output)

        self.ctx.outputs = outputs
        self.ctx.inputs = inputs
        self.outputs = None
        self.ctx = None

    def discard_output_and_register_recompute(self, hook_tensor):
        """
        Release the output tensor storages and register the recompute function as a grad hook of
        the hook_tensor.

        Note: the caller should make sure that the output tensors are no longer used
        in the forward pass and the gradient of the hook_tensor is computed before the recomputed
        tensors are used.
        """

        from megatron.core.transformer.cuda_graphs import is_graph_warmup

        if is_graph_warmup():
            return

        # use resize to release the output tensor memory and still keep the metadata in the tensors.
        # the metadata is still needed for backward
        for output in self.outputs:
            output.untyped_storage().resize_(0)

        # register the recomputation as a backward hook, when the the gradient of the hook_tensor
        # is computed, the recomputation will be triggered. The hook_tensor should be selected
        # carefully to ensure that the tensors are recomputed before it is used by other backward
        # computations.
        if hook_tensor.requires_grad:
            hook_tensor.register_hook(self._recompute)


================================================
FILE: megatron/core/tensor_parallel/utils.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

from typing import List, Sequence

import torch

from megatron.core.utils import (
    divide,
    get_tensor_model_parallel_group_if_none,
    is_torch_min_version,
)

try:
    if is_torch_min_version("1.13.0"):
        dist_all_gather_func = torch.distributed.all_gather_into_tensor
    else:
        dist_all_gather_func = torch.distributed._all_gather_base
except Exception:
    dist_all_gather_func = torch.distributed._all_gather_base


def split_tensor_along_last_dim(
    tensor: torch.Tensor, num_partitions: int, contiguous_split_chunks: bool = False
) -> List[torch.Tensor]:
    """Split a tensor along its last dimension.

    Args:
        tensor: input tensor.
        num_partitions: number of partitions to split the tensor
        contiguous_split_chunks: If True, make each chunk contiguous
                                 in memory.

    Returns:
        A list of Tensors
    """
    # Get the size and dimension.
    last_dim = tensor.dim() - 1
    last_dim_size = divide(tensor.size()[last_dim], num_partitions)
    # Split.
    tensor_list = torch.split(tensor, last_dim_size, dim=last_dim)
    # Note: torch.split does not create contiguous tensors by default.
    if contiguous_split_chunks:
        return tuple(chunk.contiguous() for chunk in tensor_list)

    return tensor_list


def split_tensor_into_1d_equal_chunks(tensor, new_buffer=False, tp_group=None):
    """Break a tensor into equal 1D chunks across tensor parallel ranks.

    Returns a Tensor or View with this rank's portion of the data.

    Args:
        tensor: The tensor to split

    Keyword Args:
        new_buffer (bool): If True, returns a new Tensor.
                           If False, returns a view into the existing Tensor.
                           Default is False

    """
    tp_group = get_tensor_model_parallel_group_if_none(tp_group)
    partition_size = torch.numel(tensor) // tp_group.size()
    start_index = partition_size * tp_group.rank()
    end_index = start_index + partition_size
    if new_buffer:
        data = torch.empty(
            partition_size,
            dtype=tensor.dtype,
            device=torch.cuda.current_device(),
            requires_grad=False,
        )
        data.copy_(tensor.view(-1)[start_index:end_index])
    else:
        data = tensor.view(-1)[start_index:end_index]
    return data


def gather_split_1d_tensor(tensor, tp_group=None):
    """Opposite of split_tensor_into_1d_equal_chunks. Gather values from tensor
    model parallel ranks.

    Returns a new Tensor with the gathered data.

    Args:
        tensor: A Tensor or view of this rank's portion of the data.
    """
    tp_group = get_tensor_model_parallel_group_if_none(tp_group)
    numel_gathered = torch.numel(tensor) * tp_group.size()
    gathered = torch.empty(
        numel_gathered, dtype=tensor.dtype, device=torch.cuda.current_device(), requires_grad=False
    )
    dist_all_gather_func(gathered, tensor, group=tp_group)
    return gathered


class VocabUtility:
    """Split the vocabulary into `world_size` chunks and return the first
    and last index of the vocabulary belonging to the `rank`
    partition: Note that indices in [fist, last)

    """

    @staticmethod
    def vocab_range_from_per_partition_vocab_size(
        per_partition_vocab_size: int, rank, world_size: int
    ) -> Sequence[int]:
        """Vocab range from per partition vocab size."""
        index_f = rank * per_partition_vocab_size
        index_l = index_f + per_partition_vocab_size
        return index_f, index_l

    @staticmethod
    def vocab_range_from_global_vocab_size(
        global_vocab_size: int, rank: int, world_size: int
    ) -> Sequence[int]:
        """Vocab range from global vocab size."""
        per_partition_vocab_size = divide(global_vocab_size, world_size)
        return VocabUtility.vocab_range_from_per_partition_vocab_size(
            per_partition_vocab_size, rank, world_size
        )


================================================
FILE: megatron/core/timers.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.

"""Megatron timers."""

import logging
import time
from abc import ABC, abstractmethod
from typing import List

import torch

try:
    import wandb
except ImportError:
    wandb = None

try:
    from torch.utils.tensorboard import SummaryWriter
except ImportError:
    SummaryWriter = None

from megatron.core.utils import is_torch_min_version

try:
    if is_torch_min_version("1.13.0"):
        dist_all_gather_func = torch.distributed.all_gather_into_tensor
    else:
        dist_all_gather_func = torch.distributed._all_gather_base
except:
    dist_all_gather_func = torch.distributed._all_gather_base

logger = logging.getLogger(__name__)


class TimerBase(ABC):
    """Timer base class."""

    def __init__(self, name):
        self.name = name

    @abstractmethod
    def start(self, barrier=False):
        """Start the timer.

        Args:
            barrier (bool, optional): Synchronizes ranks before starting. Defaults to False.
        """
        pass

    @abstractmethod
    def stop(self, barrier=False):
        """Stop the timer.

        Args:
            barrier (bool, optional): Synchronizes ranks before stopping. Defaults to False.
        """
        pass

    @abstractmethod
    def reset(self):
        """Reset timer."""
        pass

    @abstractmethod
    def elapsed(self, reset=True, barrier=False):
        """Calculates the elapsed time and restarts timer.

        Args:
            reset (bool, optional): Resets timer before restarting. Defaults to True.
            barrier (bool, optional): Synchronizes ranks before stopping. Defaults to False.

        Returns:
            float: Elapsed time.
        """
        pass


class DummyTimer(TimerBase):
    """Dummy Timer."""

    def __init__(self):
        super().__init__('dummy timer')

    def start(self, barrier=False):
        return

    def stop(self, barrier=False):
        return

    def reset(self):
        return

    def elapsed(self, reset=True, barrier=False):
        raise Exception(
            'dummy timer should not be used to calculate elapsed time, '
            'check if timer\'s log_level <= self._log_level.'
        )

    def active_time(self):
        """Returns the cumulative duration the timer has been active.
        Note: Not supported for DummyTimer.
        """
        raise Exception(
            'active timer should not be used to calculate elapsed time, '
            'check if timer\'s log_level <= self._log_level.'
        )


class Timer(TimerBase):
    """
    Timer class with ability to start/stop.

    Comment on using `barrier`: If this flag is passed, then all
    the caller processes will wait till all reach the timing routine.
    It is up to the user to make sure all the ranks in `barrier_group`
    call it otherwise, it will result in a hang.
    Comment on `barrier_group`: By default it is set to None which
    in torch distributed land, it will result in the global communicator.
    """

    def __init__(self, name):
        """Initialize Timer.

        Args:
            name (str): Name of the timer.
        """
        super().__init__(name)
        self._elapsed = 0.0
        self._active_time = 0.0
        self._started = False
        # Note that None will default to the global process group
        self._barrier_group = None
        self._start_time = time.time()

    def set_barrier_group(self, barrier_group):
        """Sets barrier group.

        Args:
            barrier_group (ProcessGroup): Torch ProcessGroup for barrier.
        """
        self._barrier_group = barrier_group

    def start(self, barrier=False):
        """Start the timer.

        Args:
            barrier (bool, optional): Synchronizes ranks before starting. Defaults to False.
        """
        assert not self._started, 'timer has already been started'
        if barrier:
            torch.distributed.barrier(group=self._barrier_group)
        torch.cuda.synchronize()
        self._start_time = time.time()
        self._started = True

    def stop(self, barrier=False):
        """Stop the timer.

        Args:
            barrier (bool, optional): Synchronizes ranks before stopping. Defaults to False.
        """
        assert self._started, 'timer is not started'
        if barrier:
            torch.distributed.barrier(group=self._barrier_group)
        torch.cuda.synchronize()
        elapsed = time.time() - self._start_time
        self._elapsed += elapsed
        self._active_time += elapsed
        self._started = False

    def reset(self):
        """Reset timer."""
        # Don't reset _active_time
        self._elapsed = 0.0
        self._started = False

    def set_elapsed(self, value):
        """Directly set the elapsed time.

        This is useful for injecting pre-computed timing values (e.g., startup
        timestamps) into the timer so they can be reported via timers.log().

        Args:
            value (float): The elapsed time value in seconds.
        """
        self._elapsed = value

    def elapsed(self, reset=True, barrier=False):
        """Calculates the elapsed time and restarts timer.

        Args:
            reset (bool, optional): Resets timer before restarting. Defaults to True.
            barrier (bool, optional): Synchronizes ranks before stopping. Defaults to False.

        Returns:
            float: Elapsed time.
        """
        _started = self._started
        # If the timing in progress, end it first.
        if self._started:
            self.stop(barrier=barrier)
        # Get the elapsed time.
        _elapsed = self._elapsed
        # Reset the elapsed time
        if reset:
            self.reset()
        # If timing was in progress, set it back.
        if _started:
            self.start(barrier=barrier)
        return _elapsed

    def active_time(self):
        """Calculates the cumulative duration for which the timer has been active"""
        return self._active_time


class Timers:
    """Class for a group of Timers."""

    def __init__(self, log_level, log_option):
        """Initialize group of timers.

        Args:
            log_level (int): Log level to control what timers are enabled.
            log_option (str): Setting for logging statistics over ranks for all the timers.
                              Allowed: ['max', 'minmax', 'all'].
        """
        self._log_level = log_level
        allowed_log_options = set(['max', 'minmax', 'all'])
        assert (
            log_option in allowed_log_options
        ), 'input log option {} is invalid. It must be one of {}'.format(
            log_option, allowed_log_options
        )
        self._log_option = log_option
        self._timers = {}
        self._log_levels = {}
        self._dummy_timer = DummyTimer()
        self._max_log_level = 2

    def __call__(self, name, log_level=None):
        """Call timer with name and log level."""
        # If the timer has already been set, then check if the log-level
        # is provided, it matches the one that the timer was created with.
        if name in self._timers:
            if log_level is not None:
                assert log_level == self._log_levels[name], (
                    'input log level {} does not match already existing '
                    'log level {} for {} timer'.format(log_level, self._log_levels[name], name)
                )
            return self._timers[name]
        # If timer does not exist and no log level is provided,
        # set it to the max log level which is 2.
        if log_level is None:
            log_level = self._max_log_level
        assert (
            log_level <= self._max_log_level
        ), 'log level {} is larger than max supported log level {}'.format(
            log_level, self._max_log_level
        )
        # Now if the input log level is larger than the one set for
        # the timers class, just ignore it and return a dummy timer.
        if log_level > self._log_level:
            return self._dummy_timer
        # Otherwise, initalize the timer and set the level.
        self._timers[name] = Timer(name)
        self._log_levels[name] = log_level
        return self._timers[name]

    def _get_elapsed_time_all_ranks(self, names, reset, barrier):
        """Returns elapsed times of timers in names.
        Assumptions:
            - All the ranks call this function.
            - `names` are identical on all ranks.
        If the above assumptions are not met, calling this function will
        result in hang.

        Args:
            names (List[str]): list of timer names
            reset (bool): reset the timer after recording the elapsed time
            barrier (bool): if set, do a global barrier before time measurments

        Returns:
            torch.tensor: Tensor of size [world_size, len(names)] with times in float.
        """

        if len(names) == 0:
            return None

        # First make sure all the callers are in sync.
        if barrier:
            torch.distributed.barrier()

        world_size = torch.distributed.get_world_size()
        rank = torch.distributed.get_rank()

        # Here we can use gather on the rank we want to print the
        # timing, however, there is no gather_base support in
        # pytorch yet. It is simpler to deal with a single tensor
        # and since we are only gathering a small amount of data,
        # it should be ok to use all-gather instead of gather.
        rank_name_to_time = torch.zeros(
            (world_size, len(names)), dtype=torch.float, device=torch.cuda.current_device()
        )
        for i, name in enumerate(names):
            if name in self._timers:
                # Here we don't need to pass the barrier flag as all
                # the processes are already in sync. This avoids the
                # issue of different timers having different barrier
                # groups inside their class.
                rank_name_to_time[rank, i] = self._timers[name].elapsed(reset=reset)

        # See the note above for why we are not using gather.
        dist_all_gather_func(rank_name_to_time.view(-1), rank_name_to_time[rank, :].view(-1))

        return rank_name_to_time

    def _get_global_min_max_time(self, names, reset, barrier, normalizer):
        """Report only min and max times across all ranks."""

        rank_name_to_time = self._get_elapsed_time_all_ranks(names, reset, barrier)
        # Using Python built-in methods to avoid the overhead of PyTorch operations.
        rank_name_to_time = (
            rank_name_to_time.permute(1, 0).tolist() if rank_name_to_time is not None else None
        )
        name_to_min_max_time = {}
        for i, name in enumerate(names):
            # filter out the ones we did not have any timings for
            rank_to_time = list(filter(lambda x: x > 0.0, rank_name_to_time[i]))
            # If the timer exists:
            if len(rank_to_time) > 0:
                name_to_min_max_time[name] = (
                    min(rank_to_time) / normalizer,
                    max(rank_to_time) / normalizer,
                )
        return name_to_min_max_time

    def _get_global_min_max_time_string(self, names, reset, barrier, normalizer, max_only):
        """Report strings for max/minmax times across all ranks."""
        name_to_min_max_time = self._get_global_min_max_time(names, reset, barrier, normalizer)
        if not name_to_min_max_time:
            return None
        if max_only:
            output_string = 'max time across ranks (ms):'
        else:
            output_string = '(min, max) time across ranks (ms):'
        for name in name_to_min_max_time:
            min_time, max_time = name_to_min_max_time[name]
            if max_only:
                output_string += '\n    {}: {:.2f}'.format((name + ' ').ljust(48, '.'), max_time)
            else:
                output_string += '\n    {}: ({:.2f}, {:.2f})'.format(
                    (name + ' ').ljust(48, '.'), min_time, max_time
                )
        return output_string

    def _get_all_ranks_time_string(self, names, reset, barrier, normalizer):
        """Report times across all ranks."""
        rank_name_to_time = self._get_elapsed_time_all_ranks(names, reset, barrier)

        output_string = 'times across ranks (ms):'
        no_reported_timing = True
        for i, name in enumerate(names):
            not_yet_found = True
            for rank in range(torch.distributed.get_world_size()):
                if rank_name_to_time[rank, i] > 0:
                    no_reported_timing = False
                    if not_yet_found:
                        not_yet_found = False
                        output_string += '\n  {}:'.format(name)
                    output_string += '\n     rank {:2d}: {:.2f}'.format(
                        rank, rank_name_to_time[rank, i] / normalizer
                    )
        if no_reported_timing:
            return None
        return output_string

    def get_all_timers_string(
        self,
        names: List[str] = None,
        normalizer: float = 1.0,
        reset: bool = True,
        barrier: bool = False,
    ):
        """Returns the output string with logged timer values according to configured options.

        Args:
            names (List[str]): Names of the timers to log. If None, all registered timers are
                               fetched. Defaults to None.
            normalizer (float, optional): Normalizes the timer values by the factor.
                                          Defaults to 1.0.
            reset (bool, optional): Whether to reset timer values after logging. Defaults to True.
            barrier (bool, optional): Whether to do a global barrier before time measurments.
                                      Defaults to False.

        Raises:
            Exception: Raises if log option is invalid.

        Returns:
            str: Formatted string with the timer values.
        """

        if names == None:  # get all registered timers
            names = self._timers.keys()

        assert normalizer > 0.0
        if self._log_option in ['max', 'minmax']:
            max_only = False
            if self._log_option == 'max':
                max_only = True
            output_string = self._get_global_min_max_time_string(
                names, reset, barrier, normalizer / 1000.0, max_only
            )
        elif self._log_option == 'all':
            output_string = self._get_all_ranks_time_string(
                names, reset, barrier, normalizer / 1000.0
            )
        else:
            raise Exception('unknown timing log option {}'.format(self._log_option))
        return output_string

    def log(
        self,
        names: List[str],
        rank: int = None,
        normalizer: float = 1.0,
        reset: bool = True,
        barrier: bool = False,
    ):
        """logs the timers passed in names to stdout. Example usage is to log average per step
           value for timer 'foo', this function can be called with normalizer factor set to logging
           interval.

        Args:
            names (List[str]): Names of the timers to log.
            rank (int, optional): logs the timers to a specific rank. If set to None, logs to the
                                  last rank. Defaults to None.
            normalizer (float, optional): Normalizes the timer values by the factor.
                                          Defaults to 1.0.
            reset (bool, optional): Whether to reset timer values after logging. Defaults to True.
            barrier (bool, optional): Whether to do a global barrier before time measurments.
                                      Defaults to False.
        """

        output_string = self.get_all_timers_string(names, normalizer, reset, barrier)
        # If no input rank is provided, log on last rank.
        if rank is None:
            rank = torch.distributed.get_world_size() - 1
        if rank == torch.distributed.get_rank() and output_string is not None:
            logger.info(output_string)

    def write(
        self,
        names: List[str],
        writer,
        iteration: int,
        normalizer: float = 1.0,
        reset: bool = True,
        barrier: bool = False,
    ):
        """Write timers to a tensorboard writer.
        Note that we only report maximum time across ranks to tensorboard.

        Args:
            names (List[str]): Names of the timers to log.
            writer (SummaryWriter): Tensorboard SummaryWriter object
            iteration (int): Current iteration.
            normalizer (float, optional): Normalizes the timer values by the factor.
                                          Defaults to 1.0.
            reset (bool, optional): Whether to reset timer values after logging. Defaults to True.
            barrier (bool, optional): Whether to do a global barrier before time measurments.
                                      Defaults to False.
        """
        # currently when using add_scalars,
        # torch.utils.add_scalars makes each timer its own run, which
        # polutes the runs list, so we just add each as a scalar
        assert normalizer > 0.0
        name_to_min_max_time = self._get_global_min_max_time(names, reset, barrier, normalizer)
        if writer is not None:
            for name in name_to_min_max_time:
                _, max_time = name_to_min_max_time[name]
                if isinstance(writer, SummaryWriter) and SummaryWriter is not None:
                    writer.add_scalar(name + '-time', max_time, iteration)
                elif writer == wandb and wandb is not None:
                    writer.log({name + '-time': max_time}, iteration)


================================================
FILE: megatron/core/tokenizers/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from megatron.core.tokenizers.base_tokenizer import MegatronTokenizerBase
from megatron.core.tokenizers.megatron_tokenizer import MegatronTokenizer


================================================
FILE: megatron/core/tokenizers/base_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from abc import ABC, abstractmethod


class MegatronTokenizerBase(ABC):
    """Abstract class for Megatron tokenizers."""

    def __init__(self, path: str, config: dict, **kwargs) -> None:
        """
        Args:
            path (str): path to the tokenizer model.
            config (dict): tokenizer parameters.
                library (str): tokenizer library.
                class_name (str): name of tokenizer class.
                class_path (str): path to tokenizer class.
                model_type (str): type of the model to be used with tokenizer.
                chat_template (str): tokenizer chat template.
        """

        self.path = path
        for key, value in config.items():
            setattr(self, key, value)

    @abstractmethod
    def tokenize(self):
        """Encoding function."""
        pass

    @abstractmethod
    def detokenize(self):
        """Decoding function."""
        pass

    @abstractmethod
    def vocab(self):
        """Returns tokenizer vocab."""
        pass

    @abstractmethod
    def vocab_size(self):
        """Returns tokenizer vocab size."""
        pass

    @abstractmethod
    def apply_chat_template(self):
        """Applies tokenizer's chat template."""
        pass


================================================
FILE: megatron/core/tokenizers/megatron_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import importlib
import json
import logging
import os
from collections import OrderedDict
from typing import Optional, Union

from megatron.core.tokenizers.base_tokenizer import MegatronTokenizerBase

TOKENIZER_MAPPING_NAMES = OrderedDict(
    [
        ("default-text", "DefaultTokenizerText"),
        ("gpt", "GPTTokenizer"),
        ("mamba", "MambaTokenizer"),
        ("bert", "BertTokenizer"),
        ("t5", "T5Tokenizer"),
        ("default-vision", "DefaultTokenizerVision"),
    ]
)

TEXT_LIBRARIES = [
    "sentencepiece",
    "huggingface",
    "megatron",
    "tiktoken",
    "byte-level",
    "null-text",
    "sft",
]
VISION_LIBRARIES = ["multimodal", "null-multimodal"]

logger = logging.getLogger(__name__)


class MegatronTokenizer:
    """Restores model tokenizer."""

    def __init__(self) -> None:
        raise EnvironmentError(
            "MegatronTokenizer is designed to be instantiated using the "
            "`MegatronTokenizer.from_pretrained()` method."
        )

    def from_pretrained(
        tokenizer_path: str = None, metadata_path: Optional[Union[str, dict]] = None, **kwargs
    ) -> MegatronTokenizerBase:
        """
        Args:
            path (str): path to tokenizer file with metadata.json in folder.
            metadata_path (Optional[str]): path to the tokenizer metadata.
                Must be specified when loading the tokenizer from HF.

        Returns:
            MegatronTokenizerBase: tokenizer object.

        Usage:
            MegatronTokenizer.from_pretrained(tokenizer_path='/path/to/tokenzier')
        """

        # Get metadata path
        if not metadata_path:
            metadata_path = _get_metadata_path(tokenizer_path)

        if isinstance(metadata_path, str):
            # Check if metadata file exists
            assert os.path.exists(metadata_path), (
                "Tokenizer metadata file doesn't exist. Please, use "
                "MegatronTokenizer.write_metadata() method to generate metadata file."
            )
            # Load tokenizer metadata
            with open(metadata_path, "r") as f:
                metadata = json.load(f)
        elif isinstance(metadata_path, dict):
            metadata = metadata_path
            metadata_path = None
        else:
            raise ValueError(
                f"Expected metadata_path to be str or dict, but got {type(metadata_path)}."
            )

        tokenizer_library = metadata.get('library', None)
        if tokenizer_library not in ['byte-level', 'null-text', 'null-multimodal']:
            assert tokenizer_path, "Tokenizer path must be specified."

        if tokenizer_library in ['multimodal']:
            assert 'prompt_format' in kwargs, "Prompt format (`prompt_format`) must be specified."
            assert (
                'special_tokens' in kwargs
            ), "Special tokens (`special_tokens`) must be specified."
            assert (
                'image_tag_type' in kwargs
            ), "Image tag type (`image_tag_type`) must be specified."

        # Initialize tokenizer object
        tokenizer_cls = _get_tokenizer_model_class(tokenizer_library, metadata)

        metadata['metadata_path'] = metadata_path
        tokenizer = tokenizer_cls(path=tokenizer_path, config=metadata, **kwargs)

        return tokenizer

    def write_metadata(
        tokenizer_path: str,
        tokenizer_library: str,
        model_type: Optional[str] = None,
        tokenizer_class: Optional[MegatronTokenizerBase] = None,
        chat_template: Optional[str] = None,
        overwrite: Optional[bool] = False,
        metadata_path: Optional[str] = None,
    ) -> None:
        """
        Creates metadata file for tokenizer.

        Args:
            tokenizer_path (str): path to tokenizer model.
            tokenizer_library (str): tokenizer model library.
            model_type (str): type of the model to be used with tokenizer.
                list of available model types: [gpt, bert, t5, mamba, default].
                `DefaultTokenizerText` will be used if model_type is not specified.
            tokenizer_class (MegatronTokenizerBase): pre-defined tokenizer class.
            chat_template (str): tokenizer chat template in jinja format.
            overwrite (bool): overwrites existing metadata file if set to True.
            metadata_path (Optional[str]): path where metadata file will be saved. If not specified,
                the metadata file will be stored in the same directory as the tokenizer.

        Usage:
            MegatronTokenizer.write_metadata(
                tokenizer_path='/path/to/tokenzier/model',
                tokenizer_library='sentencepiece',
                model_type='llama',
            )
        """

        assert os.path.exists(
            tokenizer_path
        ), "Tokenizer path doesn't exist. Please, provide the correct path to the tokenizer."
        assert tokenizer_library in TEXT_LIBRARIES or tokenizer_library in VISION_LIBRARIES, (
            "Tokenizer library is not supported. Please, see the list of available "
            f"tokenizer libraries: text: {TEXT_LIBRARIES}, vision: {VISION_LIBRARIES}."
        )
        tokenizer_type = 'text' if tokenizer_library in TEXT_LIBRARIES else 'vision'
        if model_type is None and tokenizer_class is None:
            model_type = f"default-{tokenizer_type}"

        # Write metadata
        if not metadata_path:
            metadata_path = _get_metadata_path(tokenizer_path)
        if os.path.exists(metadata_path) and not overwrite:
            raise ValueError(
                "Metadata file already exists. If you want to overwrite it, "
                "please set overwrite param to True."
            )
        else:
            metadata = {
                'library': tokenizer_library,
                'class_name': tokenizer_class.__name__ if tokenizer_class else None,
                'class_path': tokenizer_class.__module__ if tokenizer_class else None,
                'model_type': model_type,
                'chat_template': chat_template,
            }

            with open(metadata_path, "w") as f:
                json.dump(metadata, f)

            logger.info(f"Metadata file was sucessfully saved: {metadata_path}.")


def _get_metadata_path(tokenizer_path: str) -> str:
    """
    Returns metadata file path.

    Args:
        tokenizer_path (str): path to the tokenizer model.

    Returns:
        str: path to the metadata file.
    """

    # Get metadata file path
    dir_path = os.path.dirname(tokenizer_path) if os.path.isfile(tokenizer_path) else tokenizer_path
    metadata_path = f'{dir_path}/tokenizer_metadata.json'

    return metadata_path


def _get_tokenizer_model_class(library: str, metadata: dict) -> MegatronTokenizerBase:
    """
    Returns a class which corresponds to choosen tokenizer model type.

    Args:
        library (str): tokenizer library.
        metadata (dict): tokenizer metadata.

    Returns:
        MegatronTokenizerBase: class for choosen tokenizer model type.
    """
    # Return tokenizer class if it was specified in metadata.
    if metadata.get('tokenizer_class', None):
        return getattr(metadata['tokenizer_class_path'], metadata['tokenizer_class_name'])

    # Define tokenizer type
    tokenizer_type = 'text' if library in TEXT_LIBRARIES else 'vision'

    module_name = f"megatron.core.tokenizers.{tokenizer_type}.models"
    models = importlib.import_module(module_name)

    model_type = metadata.get("model_type", None)
    if model_type is None:
        model_type = f"default-{tokenizer_type}"

    tokenizer_cls = getattr(models, TOKENIZER_MAPPING_NAMES[model_type])

    return tokenizer_cls


================================================
FILE: megatron/core/tokenizers/text/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from megatron.core.tokenizers.text.text_tokenizer import MegatronTokenizerText


================================================
FILE: megatron/core/tokenizers/text/libraries/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from megatron.core.tokenizers.text.libraries.bytelevel_tokenizer import ByteLevelTokenizer
from megatron.core.tokenizers.text.libraries.huggingface_tokenizer import HuggingFaceTokenizer
from megatron.core.tokenizers.text.libraries.megatron_hf_tokenizer import MegatronHFTokenizer
from megatron.core.tokenizers.text.libraries.null_tokenizer import NullTokenizer
from megatron.core.tokenizers.text.libraries.sentencepiece_tokenizer import SentencePieceTokenizer
from megatron.core.tokenizers.text.libraries.sft_tokenizer import SFTTokenizer
from megatron.core.tokenizers.text.libraries.tiktoken_tokenizer import TikTokenTokenizer


================================================
FILE: megatron/core/tokenizers/text/libraries/abstract_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from abc import ABC, abstractmethod
from typing import List


class MegatronTokenizerTextAbstract(ABC):
    """
    Abstract class for Megatron text tokenizers.
    """

    @abstractmethod
    def text_to_tokens(self, text: str) -> List[str]:
        """
        Converts text to tokens.

        Args:
            text (str): text to be tokenized.

        Returns:
            List[str]: list of tokens.
        """
        pass

    @abstractmethod
    def tokens_to_text(self, tokens: List[str]) -> str:
        """
        Converts tokens to text.

        Args:
            tokens (List[str]): tokens to be detokenized.

        Returns:
            str: detokenized text.
        """
        pass

    @abstractmethod
    def tokens_to_ids(self, tokens: List[str]) -> List[int]:
        """
        Converts tokens to ids.

        Args:
            tokens (List[str]): tokens to be converted.

        Returns:
            List[int]: ids of tokens.
        """
        pass

    @abstractmethod
    def ids_to_tokens(self, ids: List[int]) -> List[str]:
        """
        Converts ids to tokens.

        Args:
            ids (List[int]): ids to be converted.

        Returns:
            List[str]: list of tokens.
        """
        pass

    @abstractmethod
    def text_to_ids(self, text: str) -> List[int]:
        """
        Converts text to ids.

        Args:
            text (str): text to be tokenized.

        Returns:
            List[int]: list of ids.
        """
        pass

    @abstractmethod
    def ids_to_text(self, ids: List[int]) -> str:
        """
        Converts ids to text.

        Args:
            ids (List[int]): ids to be detokenized.

        Returns:
            str: detokenized text.
        """
        pass

    @abstractmethod
    def add_special_tokens(self):
        """Adds special tokens to the tokenizer."""
        pass

    @property
    def cls_id(self) -> int:
        """Property alias to match MegatronTokenizer; returns cls_id if available."""
        if hasattr(self, 'cls_id'):
            return self.cls_id
        raise AttributeError(f"{type(self).__name__} has no attribute 'cls' or 'cls_id'")

    @property
    def sep_id(self) -> int:
        """Property alias to match MegatronTokenizer; returns sep_id if available."""
        if hasattr(self, 'sep_id'):
            return self.sep_id
        raise AttributeError(f"{type(self).__name__} has no attribute 'sep' or 'sep_id'")

    @property
    def pad_id(self) -> int:
        """Property alias to match MegatronTokenizer; returns pad_id if available."""
        if hasattr(self, 'pad_id'):
            return self.pad_id
        raise AttributeError(f"{type(self).__name__} has no attribute 'pad' or 'pad_id'")

    @property
    def eod(self) -> int:
        """Property alias to match MegatronTokenizer; returns eod_id if available."""
        if hasattr(self, 'eod_id'):
            return self.eod_id
        if hasattr(self, 'eos_id'):
            # Default to end-of-sentence id if end-of-document is not defined.
            return self.eos_id
        raise AttributeError(
            f"{type(self).__name__} has no attribute 'eod', 'eod_id', 'eos', or 'eos_id'"
        )

    @property
    def bos_id(self) -> int:
        """Property alias to match MegatronTokenizer; returns bos_id if available."""
        if hasattr(self, 'bos_id'):
            return self.bos_id
        raise AttributeError(f"{type(self).__name__} has no attribute 'bos' or 'bos_id'")

    @property
    def eos_id(self) -> int:
        """Property alias to match MegatronTokenizer; returns eos_id if available."""
        if hasattr(self, 'eos_id'):
            return self.eos_id
        raise AttributeError(f"{type(self).__name__} has no attribute 'eos' or 'eos_id'")

    @property
    def mask_id(self) -> int:
        """Property alias to match MegatronTokenizer; returns mask_id if available."""
        if hasattr(self, 'mask_id'):
            return self.mask_id
        raise AttributeError(f"{type(self).__name__} has no attribute 'mask' or 'mask_id'")


================================================
FILE: megatron/core/tokenizers/text/libraries/bytelevel_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from typing import Dict, List, Optional, Union

from .abstract_tokenizer import MegatronTokenizerTextAbstract


class ByteLevelTokenizer(MegatronTokenizerTextAbstract):
    """
    A byte-level tokenizer that encodes text as UTF-8 bytes with user control over the EOS, BOS,
        and PAD tokens as well as the vocabulary size and a mapping of other special tokens
        to their IDs.
    """

    def __init__(
        self,
        special_tokens: Optional[Union[Dict[str, str], List[str]]] = None,
        vocab_size: int = 512,
        _eos_id: int = 0,
        _pad_id: int = 1,
        _bos_id: int = None,
    ):
        """A byte-level tokenizer that encodes text as UTF-8 bytes.

        This tokenizer treats each byte as a token, with a default vocabulary size of 512
        to accommodate UTF-8 byte values (0-255) plus special tokens. It can handle
        arbitrary text input by encoding it into bytes.

        Args:
            special_tokens: Dictionary or list of special tokens to add to the vocabulary.
                These tokens will be assigned IDs at the end of the vocabulary.
                Defaults to None.
            vocab_size: Size of the vocabulary, should be at least 256 to handle all byte values.
                Special tokens will be added after this size.
                Defaults to 512.
            _eos_id: ID to use for the end-of-sequence token.
                Defaults to 0.
            _pad_id: ID to use for the padding token.
                Defaults to 1.
            _bos_id: ID to use for the beginning-of-sequence token.
                Defaults to None.
        """
        self._eos_id = _eos_id
        self._pad_id = _pad_id
        self._bos_id = _bos_id
        self.special_token_to_id = {
            self.pad_id: self.pad_id,
            self.bos_id: self.bos_id,
            self.eos_id: self.eos_id,
        }
        # Track special byte-tokens at end of vocabulary.
        self.vocab_size = vocab_size if special_tokens is None else vocab_size + len(special_tokens)
        self.special_start = self.vocab_size
        special_tokens = {} if special_tokens is None else special_tokens
        for tok in special_tokens:
            self.special_start -= 1
            self.special_token_to_id[tok] = self.special_start
        self.id_to_special_token = {v: k for k, v in self.special_token_to_id.items()}

    # no distinction between tokens and ids.
    def text_to_tokens(self, text):
        """
        Convert a text to a list of tokens.
        """
        return self.text_to_ids(text)

    def tokens_to_text(self, tokens):
        """
        Convert a list of tokens to a text.
        """
        return self.ids_to_text(tokens)

    def text_to_ids(self, text):
        """
        Convert a text to a list of IDs.
        """
        return list(text.encode('utf-8'))

    def ids_to_text(self, ids):
        """
        Convert a list of IDs to a text.
        """
        # remove special tokens.
        ids = [x for x in ids if x < self.special_start]
        return bytes(ids).decode('utf-8', errors='ignore').rstrip()

    def tokens_to_ids(self, tokens):
        """
        Convert a list of tokens to a list of IDs.
        """
        if isinstance(tokens, str):
            tokens = [tokens]
        ids = []
        for token in tokens:
            ids.append(self.token_to_id(token))
        return ids

    def ids_to_tokens(self, ids):
        """
        Convert a list of IDs to a list of tokens.
        """
        if isinstance(ids, int):
            ids = [ids]
        tokens = []
        for id in ids:
            tokens.append(self.id_to_token(id))
        return tokens

    def token_to_id(self, token):
        """
        Convert a token to its corresponding ID.
        """
        if token in self.special_token_to_id:
            return self.special_token_to_id[token]
        else:
            return token

    def id_to_token(self, id):
        """
        Convert an ID to its corresponding token.
        """
        if id < self.special_start:
            return id
        else:
            return self.id_to_special_token[id]

    def add_special_tokens(self, special_tokens: Union[list, dict]) -> None:
        """Adds special tokens to the tokenizer."""
        raise NotImplementedError("This method is not supported for byte-level tokenizers.")

    @property
    def pad_id(self):
        """
        Get the padding ID.
        """
        return self._pad_id

    @property
    def bos_id(self):
        """
        Get the beginning-of-sequence ID.
        """
        return self._bos_id

    @property
    def eos_id(self):
        """
        Get the end-of-sequence ID.
        """
        return self._eos_id

    @property
    def eod(self):
        """
        Get the end-of-document ID.
        """
        return self._eos_id

    @property
    def unk_id(self):
        """
        Get the unknown ID.
        """
        return 259


================================================
FILE: megatron/core/tokenizers/text/libraries/chat_template.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from typing import Dict, List, Optional, Union

try:
    from transformers.utils.chat_template_utils import _compile_jinja_template

    HAVE_TRANSFORMERS = True
except ImportError:
    HAVE_TRANSFORMERS = False


class MegatronTokenizerChatTemplate:
    """Chat template class for Megatron text tokenizers."""

    def apply_chat_template(
        self,
        conversation: List[Dict[str, str]],
        chat_template: str,
        tokenize: Optional[bool] = True,
        truncation: Optional[bool] = False,
        max_length: Optional[int] = None,
        add_generation_prompt: Optional[bool] = False,
    ) -> Union[str, List[int]]:
        """
        Applies tokenizer's chat template to the conversation.

        Args:
            conversation (List[Dict[str, str]]): a list of dicts with "role" and "content" keys,
                representing the chat history so far. Conversation example:
                [
                    {"role": "system", "content": "You are a witty and helpful assistant."},
                    {"role": "user", "content": "Hey, what's a fun fact about octopuses?"},
                    {"role": "assistant", "content": "Octopuses blood is blue!"},
                    {"role": "user", "content": "Whoa, why is their blood blue?"},
                ]
            tokenize (bool): whether to tokenize the output. If `False`,
                the output will be a string.
            truncation (bool): whether to truncate sequences at the maximum length.
                Has no effect if tokenize is `False`.
            max_length (int): maximum length to use fro truncation.
                Has no effect if tokenize is `False`.
            add_generation_prompt (bool): If this is set, a prompt with the token(s) that indicate
                the start of an assistant message will be appended to the formatted output.
                This is useful when you want to generate a response from the model.
                Note that this argument will be passed to the chat template,
                and so it must be supported in the template for this argument to have any effect.
        """

        assert chat_template, (
            "Chat template is not defined. "
            "Please, specify tokenizer chat template in the metadata file."
        )
        if truncation:
            assert max_length, "max_length must be specified if truncation is used."

        if HAVE_TRANSFORMERS:
            compiled_template = _compile_jinja_template(chat_template)
            chat_text = compiled_template.render(
                messages=conversation, add_generation_prompt=add_generation_prompt
            )

            if tokenize:
                chat_ids = self.text_to_ids(chat_text)
                if truncation:
                    chat_ids = chat_ids[:max_length]
                return chat_ids

            return chat_text
        else:
            raise ModuleNotFoundError("Please, install transformers library.")


================================================
FILE: megatron/core/tokenizers/text/libraries/huggingface_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import logging
from typing import List, Optional

try:
    from transformers import AutoTokenizer

    HAVE_TRANSFORMERS = True
except ModuleNotFoundError:
    HAVE_TRANSFORMERS = False

from megatron.core.utils import log_single_rank

from .abstract_tokenizer import MegatronTokenizerTextAbstract

logger = logging.getLogger(__name__)


class HuggingFaceTokenizer(MegatronTokenizerTextAbstract):
    """
    Wrapper of HuggingFace AutoTokenizer
        https://huggingface.co/transformers/model_doc/auto.html#autotokenizer.
    """

    def __init__(
        self,
        tokenizer_path: str,
        vocab_file: Optional[str] = None,
        merges_file: Optional[str] = None,
        mask_token: Optional[str] = None,
        bos_token: Optional[str] = None,
        eos_token: Optional[str] = None,
        pad_token: Optional[str] = None,
        sep_token: Optional[str] = None,
        cls_token: Optional[str] = None,
        unk_token: Optional[str] = None,
        additional_special_tokens: Optional[List] = [],
        use_fast: Optional[bool] = True,
        trust_remote_code: Optional[bool] = False,
        include_special_tokens: bool = True,
        chat_template: str = None,
    ):
        """
        Args:
            tokenizer_path: corresponds to HuggingFace-AutoTokenizer's
                'pretrained_model_name_or_path' input argument.
                For more details please refer to
                https://huggingface.co/transformers/_modules/transformers/tokenization_auto.html#AutoTokenizer.from_pretrained.
            vocab_file: path to file with vocabulary which consists
                of characters separated by newlines.
            mask_token: mask token
            bos_token: the beginning of sequence token
            eos_token: the end of sequence token. Usually equal to sep_token
            pad_token: token to use for padding
            sep_token: token used for separating sequences
            cls_token: class token. Usually equal to bos_token
            unk_token: token to use for unknown tokens
            additional_special_tokens: list of other tokens beside standard special tokens
                (bos, eos, pad, etc.).
                For example, sentinel tokens for T5 (<extra_id_0>, <extra_id_1>, etc.)
            use_fast: whether to use fast HuggingFace tokenizer
            include_special_tokens: when True, converting text to ids will include special
                tokens / prompt tokens (if any), yielding self.tokenizer(text).input_ids
        """

        try:
            # this logic deals with different huggingface tokenizers having different args
            if vocab_file is None:
                self.tokenizer = AutoTokenizer.from_pretrained(
                    pretrained_model_name_or_path=tokenizer_path,
                    use_fast=use_fast,
                    trust_remote_code=trust_remote_code,
                )
            elif merges_file is None:
                self.tokenizer = AutoTokenizer.from_pretrained(
                    pretrained_model_name_or_path=tokenizer_path,
                    vocab_file=vocab_file,
                    use_fast=use_fast,
                    trust_remote_code=trust_remote_code,
                )
            else:
                self.tokenizer = AutoTokenizer.from_pretrained(
                    pretrained_model_name_or_path=tokenizer_path,
                    vocab_file=vocab_file,
                    merge_files=merges_file,
                    use_fast=use_fast,
                    trust_remote_code=trust_remote_code,
                )
        except Exception as e:
            raise ValueError(
                'Unable to instantiate HuggingFace AutoTokenizer '
                f'for {tokenizer_path}. Exception: {e}'
            )

        # Store the tokenizer's existing chat template if the user does not provide
        # a custom chat template. Otherwise, override the default chat template with
        # the user-provided template.
        if chat_template is None:
            chat_template = self.tokenizer.chat_template
        else:
            self.tokenizer.chat_template = chat_template

        self.include_special_tokens = include_special_tokens
        self.original_vocab_size = len(self.tokenizer)
        self.chat_template = chat_template
        self.eos_token = eos_token
        special_tokens_dict = {}

        # # setting special tokens, by default the default model's special tokens will be preserved
        # # unless passes new values to the special tokens
        if unk_token is not None:
            special_tokens_dict["unk_token"] = unk_token
        if mask_token is not None:
            special_tokens_dict["mask_token"] = mask_token
        if pad_token is not None:
            special_tokens_dict["pad_token"] = pad_token

        # if the model does not have eos_token but has sep_token,
        if sep_token is not None:
            special_tokens_dict["sep_token"] = sep_token
        if eos_token is not None:
            special_tokens_dict["eos_token"] = eos_token
        elif self.tokenizer.sep_token is None and self.tokenizer.eos_token:
            special_tokens_dict["sep_token"] = self.tokenizer.eos_token
        elif self.tokenizer.eos_token is None and self.tokenizer.sep_token:
            special_tokens_dict["eos_token"] = self.tokenizer.sep_token

        # if the model does not have bos_token but has cls_token,
        # set bos_token = cls_token, and vice versa
        if bos_token is not None:
            special_tokens_dict["bos_token"] = bos_token
        elif self.tokenizer.bos_token is None and self.tokenizer.cls_token:
            special_tokens_dict["bos_token"] = self.tokenizer.cls_token
        if cls_token is not None:
            special_tokens_dict["cls_token"] = cls_token
        elif self.tokenizer.cls_token is None and self.tokenizer.bos_token:
            special_tokens_dict["cls_token"] = self.tokenizer.bos_token

        # add additional special tokens (not standard special tokens such as bos, eod, sep)
        if additional_special_tokens is not None:
            special_tokens_dict["additional_special_tokens"] = additional_special_tokens

        new_tokens_in_vocab = []
        for token in [mask_token, bos_token, eos_token, pad_token, sep_token, cls_token, unk_token]:
            if token is not None and token not in self.tokenizer.get_vocab():
                new_tokens_in_vocab.append(token)
        for token in additional_special_tokens:
            if token is not None and token not in self.tokenizer.get_vocab():
                new_tokens_in_vocab.append(token)

        if len(new_tokens_in_vocab) > 0:
            """
            Special tokens that were not previously included in the tokenizer's vocabulary file
            will be added to the vocabulary and, as a result, the model should be resized,
            for example:

            # define your model
            tokenizer_path = 'roberta-base'
            tokenizer = MegatronTokenizer.from_pretrained(tokenizer_path=tokenizer_path)

            special_tokens = {'bos_token': '<BOS>',
                              'cls_token': '<CSL>',
                              'additional_special_tokens': ['<MY_NER_TOKEN>', '<ANOTHER_TOKEN>']}
            tokenizer.add_special_tokens(special_tokens_dict=special_tokens)

            # resize your model so that the embeddings for newly added tokens
            tokenizer.resize_token_embeddings(tokenizer_default.vocab_size)
            """

            log_single_rank(
                logger,
                logging.WARNING,
                f'{new_tokens_in_vocab} \n will be added to the vocabulary.\n'
                f'Please resize your model accordingly.',
            )
        self.add_special_tokens(special_tokens_dict)
        self.space_sensitive = self.text_to_tokens('x y') != self.text_to_tokens(
            'x'
        ) + self.text_to_tokens('y')
        self._inv_vocab_dict = {}

    def add_special_tokens(self, special_tokens_dict: dict) -> int:
        """
        Adds a dictionary of special tokens (eos, pad, cls...).
        If special tokens are NOT in the vocabulary, they are added
        to it (indexed starting from the last index of the current vocabulary).

        Args:
            special_tokens_dict: dict of string.
                Keys should be in the list of predefined special attributes:
                [``bos_token``, ``eos_token``, ``unk_token``, ``sep_token``, ``pad_token``,
                ``cls_token``, ``mask_token``, ``additional_special_tokens``].
                Tokens are only added if they are not already in the vocabulary.

        Returns:
            Number of tokens added to the vocabulary.
        """

        num_tokens_added = self.tokenizer.add_special_tokens(special_tokens_dict)

        if num_tokens_added > 0:
            log_single_rank(
                logger,
                logging.INFO,
                f'{num_tokens_added} special tokens added, resize your model accordingly.',
            )
        for k in self.tokenizer.SPECIAL_TOKENS_ATTRIBUTES:
            setattr(self, k, getattr(self.tokenizer, k, None))
        return num_tokens_added

    @property
    def additional_special_tokens_ids(self):
        """
        Returns a list of the additional special tokens (excluding bos, eos, pad, unk).
        Used to return sentinel tokens for e.g. T5.
        """
        return [self.token_to_id(token) for token in self.additional_special_tokens]

    def text_to_tokens(self, text: str) -> List[str]:
        """Converts text to tokens."""
        tokens = self.tokenizer.tokenize(text)
        return tokens

    def tokens_to_text(self, tokens: List[str]) -> str:
        """Converts list of tokens text."""
        text = self.tokenizer.convert_tokens_to_string(tokens)
        return text

    def token_to_id(self, token: str) -> int:
        """Converts a single token to it's id."""
        return self.tokens_to_ids([token])[0]

    def tokens_to_ids(self, tokens: List[str]) -> List[int]:
        """Converts list of tokens to it's ids."""
        ids = self.tokenizer.convert_tokens_to_ids(tokens)
        return ids

    def ids_to_tokens(self, ids: List[int]) -> List[str]:
        """Converts list of tokens ids to it's token values."""
        tokens = self.tokenizer.convert_ids_to_tokens(ids)
        return tokens

    def text_to_ids(self, text: str) -> List[int]:
        """Converts text to tokens ids."""
        if self.include_special_tokens:
            return self.tokenizer(text).input_ids
        tokens = self.text_to_tokens(text)
        ids = self.tokens_to_ids(tokens)
        return ids

    def ids_to_text(self, ids: List[int], remove_special_tokens: Optional[bool] = None) -> str:
        """Converts list of ids to text.

        When remove_special_tokens is None, uses not self.include_special_tokens so that
        --tokenizer-hf-include-special-tokens keeps EOS (and other special tokens) in
        detokenized output (e.g. for RL trajectory consistency).
        """
        if remove_special_tokens is None:
            remove_special_tokens = not self.include_special_tokens
        tokens = self.ids_to_tokens(ids)
        if remove_special_tokens:
            tokens_clean = [
                t for t in tokens if t is not None and t not in self.tokenizer.all_special_tokens
            ]
        else:
            tokens_clean = [t for t in tokens if t is not None]
        text = self.tokens_to_text(tokens_clean)
        return text

    def apply_chat_template(self, conversation, chat_template, **kwargs):
        """Applies chat template and tokenizes results"""
        return self.tokenizer.apply_chat_template(
            conversation=conversation, chat_template=chat_template, **kwargs
        )

    @property
    def vocab(self) -> list:
        """Returns tokenizer vocab values."""
        id2vocab = {v: k for k, v in self.tokenizer.vocab.items()}
        return [id2vocab[i] for i in range(len(id2vocab))]

    @property
    def inv_vocab(self) -> dict:
        """Returns tokenizer vocab with reversed keys and values."""
        if self._inv_vocab_dict == {}:
            self._inv_vocab_dict = {v: k for k, v in self.tokenizer.vocab.items()}
        return self._inv_vocab_dict

    @property
    def vocab_size(self) -> int:
        """Returns size of tokenizer vocabulary."""
        return len(self.tokenizer)

    @property
    def pad_id(self) -> int:
        """Returns id of padding token."""
        if getattr(self, 'pad_token') is None:
            return None
        return self.tokens_to_ids([getattr(self, 'pad_token')])[0]

    @property
    def bos_id(self) -> int:
        """Returns id of beggining of sentence token."""
        if getattr(self, 'bos_token') is None:
            return None
        return self.tokens_to_ids([getattr(self, 'bos_token')])[0]

    @property
    def eos_id(self) -> int:
        """Returns id of end of sentence token."""
        return self.tokens_to_ids([getattr(self, 'eos_token')])[0]

    @property
    def eod(self) -> int:
        """Returns EOD token id."""
        if getattr(self, 'eos_token') is None:
            return None
        return self.tokens_to_ids([getattr(self, 'eos_token')])[0]

    @property
    def sep_id(self) -> int:
        """Returns id of SEP token."""
        if getattr(self, 'sep_token') is None:
            return None
        return self.tokens_to_ids([getattr(self, 'sep_token')])[0]

    @property
    def cls_id(self) -> int:
        """Returns id of classification token."""
        if getattr(self, 'cls_token') is None:
            return None
        return self.tokens_to_ids([getattr(self, 'cls_token')])[0]

    @property
    def unk_id(self) -> int:
        """Returns id of unknown tokens."""
        if getattr(self, 'unk_token') is None:
            return None
        return self.tokens_to_ids([getattr(self, 'unk_token')])[0]

    @property
    def mask_id(self) -> int:
        """Returns id of mask token."""
        if getattr(self, 'mask_token') is None:
            return None
        return self.tokens_to_ids([getattr(self, 'mask_token')])[0]

    def save_vocabulary(self, save_directory: str, filename_prefix: str = None):
        """Saves tokenizer's vocabulary and other artifacts to the specified directory"""
        return self.tokenizer.save_vocabulary(
            save_directory=save_directory, filename_prefix=filename_prefix
        )

    def save_pretrained(self, save_directory: str):
        """Saves tokenizer's vocabulary and other artifacts to the specified directory"""
        return self.tokenizer.save_pretrained(save_directory)


================================================
FILE: megatron/core/tokenizers/text/libraries/megatron_hf_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import logging
import os
import shutil
from typing import Optional

import torch
from torch.hub import _get_torch_home

try:
    import wget

    HAVE_WGET = True
except ModuleNotFoundError:
    HAVE_WGET = False

from .huggingface_tokenizer import HuggingFaceTokenizer

logger = logging.getLogger(__name__)
torch_home = _get_torch_home()

if not isinstance(torch_home, str):
    logger.info("Torch home not found, caching megatron in cwd")
    torch_home = os.getcwd()

MEGATRON_CACHE = os.path.join(torch_home, "megatron")

MEGATRON_CONFIG_MAP = {
    "BertWordPieceLowerCase": {
        "checkpoint": "https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.0/files/release/mp_rank_00/model_optim_rng.pt",  # pylint: disable=line-too-long
        "vocab": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt",  # pylint: disable=line-too-long
        "tokenizer_name": "bert-large-uncased",
    },
    "BertWordPieceCase": {
        "checkpoint": "https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_cased/files/release/mp_rank_00/model_optim_rng.pt",  # pylint: disable=line-too-long
        "vocab": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt",
        "tokenizer_name": "bert-large-cased",
    },
    "GPT2BPETokenizer": {
        "vocab": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json",
        "merges_file": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt",
        "tokenizer_name": "gpt2",
    },
    "megatron-gpt-345m": {
        "vocab": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json",
        "merges_file": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt",
        "tokenizer_name": "gpt2",
    },
    "megatron-bert-345m-uncased": {
        "checkpoint": "https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.0/files/release/mp_rank_00/model_optim_rng.pt",  # pylint: disable=line-too-long
        "vocab": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt",
        "tokenizer_name": "bert-large-uncased",
    },
    "megatron-bert-345m-cased": {
        "checkpoint": "https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.1_cased/files/release/mp_rank_00/model_optim_rng.pt",  # pylint: disable=line-too-long
        "vocab": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt",
        "tokenizer_name": "bert-large-cased",
    },
    "megatron-bert-uncased": {
        "vocab": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt",  # pylint: disable=line-too-long
        "tokenizer_name": "bert-large-uncased",
    },
    "megatron-bert-cased": {
        "vocab": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt",
        "tokenizer_name": "bert-large-cased",
    },
    "biomegatron-bert-345m-uncased": {
        "vocab": "https://api.ngc.nvidia.com/v2/models/nvidia/biomegatron345muncased/versions/0/files/vocab.txt",  # pylint: disable=line-too-long
        "tokenizer_name": "bert-large-uncased",
    },
    "biomegatron-bert-345m-cased": {
        "vocab": "https://api.ngc.nvidia.com/v2/models/nvidia/biomegatron345mcased/versions/0/files/vocab.txt",  # pylint: disable=line-too-long
        "tokenizer_name": "bert-large-cased",
    },
}


class MegatronHFTokenizer(HuggingFaceTokenizer):
    """ """

    def __init__(
        self,
        tokenizer_path: str,
        vocab_file: Optional[str] = None,
        merges_file: Optional[str] = None,
        **kwargs,
    ) -> None:
        if tokenizer_path in MEGATRON_CONFIG_MAP.keys():
            tokenizer_name = tokenizer_path
        else:
            raise ValueError(
                f"The name of the tokenizer is incorrect. \
            Please see the list of available models: {self._get_available_models_list()}."
            )

        vocab_file = self._get_vocab_file(tokenizer_name, vocab_file)
        merges_file = self._get_merges_file(tokenizer_name, vocab_file)
        tokenizer_path = MEGATRON_CONFIG_MAP[tokenizer_name]["tokenizer_name"]
        super().__init__(tokenizer_path, vocab_file, merges_file, **kwargs)

    def _get_vocab_file(self, tokenizer_name: str, vocab_file: str = None) -> str:
        """
        Gets vocabulary file from cache or downloads it.

        Args:
            tokenizer_name (str): pretrained model name.
            vocab_file (str): path to the vocab file.

        Returns:
            path: path to the vocab file
        """

        if not vocab_file:
            url = MEGATRON_CONFIG_MAP[tokenizer_name]["vocab"]

            path = os.path.join(MEGATRON_CACHE, tokenizer_name + "_vocab")
            vocab_file = self._download(path, url)

        return vocab_file

    def _get_merges_file(self, tokenizer_name: str, merges_file: str = None) -> str:
        """
        Gets merge file from cache or downloads it.

        Args:
            tokenizer_name (str): pretrained model name.
            merges_file (str): path to the merges file.

        Returns:
            path: path to the vocab file.
        """

        if not merges_file:
            if 'gpt' not in tokenizer_name.lower():
                return None
            url = MEGATRON_CONFIG_MAP[tokenizer_name]["merges_file"]

            path = os.path.join(MEGATRON_CACHE, tokenizer_name + "_merges")
            merges_file = self._download(path, url)

        return merges_file

    def _get_available_models_list(self) -> list:
        """Returns a list of available megatron tokenizers."""

        return list(MEGATRON_CONFIG_MAP.keys())

    def _download(self, path: str, url: str):
        """
        Gets a file from cache or downloads it

        Args:
            path: path to the file in cache
            url: url to the file
        Returns:
            path: path to the file in cache
        """

        if url is None:
            return None

        if (
            not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0
        ) and not os.path.exists(path):
            os.makedirs(MEGATRON_CACHE, exist_ok=True)
            logging.info(f"Downloading from {url} to {path}")
            if HAVE_WGET:
                downloaded_path = wget.download(url)
            else:
                raise ModuleNotFoundError("wget library should be isntalled.")
            if not os.path.exists(downloaded_path):
                raise FileNotFoundError(f"Downloaded file not found: {downloaded_path}")
            shutil.move(downloaded_path, path)
        # wait until the master process downloads the file and writes it to the cache dir
        if torch.distributed.is_initialized():
            torch.distributed.barrier()

        return path


================================================
FILE: megatron/core/tokenizers/text/libraries/null_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from collections import OrderedDict


class NullTokenizer:
    """
    Synthetic tokenizer for performance benchmarking and debugging

    Args:
        vocab_size: vocabulary size for embedding
    """

    def __init__(self, vocab_size):
        """ """
        self._vocab_size_without_eod = int(vocab_size)
        self._eod_id = self._vocab_size_without_eod

    def text_to_ids(self, text):
        """Converts text to ids."""
        return [int(x) for x in text.split(' ')]

    def ids_to_text(self, ids):
        """Converts ids to text."""
        text = [str(x) for x in ids]
        return ' '.join(text)

    def tokens_to_ids(self, tokens):
        """Converts tokens to ids."""
        return [int(x) for x in tokens]

    def ids_to_tokens(self, ids):
        """Converts ids to tokens."""
        return [str(x) for x in ids]

    def offsets(self, ids: list[int], text: str) -> list[int]:
        """Returns offsets."""
        offsets, start_idx = [], 0
        for id_ in ids:
            offsets.append(start_idx)
            start_idx += 1 + len(str(id_))
        return offsets

    @property
    def unique_identifiers(self) -> OrderedDict:
        """Property required for use with megatron-core datasets."""
        return OrderedDict({"class": f"{type(self).__module__}.{type(self).__qualname__}"})

    @property
    def vocab_size(self):
        """Returns vocab size."""
        return self._vocab_size_without_eod + 1

    @property
    def vocab(self):
        """ """
        raise NotImplementedError

    @property
    def inv_vocab(self):
        """ """
        raise NotImplementedError

    @property
    def cls(self):
        """Returns cls token."""
        return -1

    @property
    def sep(self):
        """Returns sep token."""
        return -1

    @property
    def mask(self):
        """Returns mask token."""
        return -1

    @property
    def eod(self):
        """Returns eod token."""
        return self._eod_id

    @property
    def additional_special_tokens_ids(self):
        """ """
        return None


================================================
FILE: megatron/core/tokenizers/text/libraries/sentencepiece_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import os
import re
from typing import Dict, List, Optional, Union

import numpy as np

try:
    import sentencepiece

    HAVE_SP = True
except ModuleNotFoundError:
    HAVE_SP = False

import torch

from .abstract_tokenizer import MegatronTokenizerTextAbstract
from .chat_template import MegatronTokenizerChatTemplate


class SentencePieceTokenizer(MegatronTokenizerTextAbstract, MegatronTokenizerChatTemplate):
    """Sentencepiecetokenizer https://github.com/google/sentencepiece."""

    def __init__(
        self,
        tokenizer_path: str,
        special_tokens: Optional[Union[Dict[str, str], List[str]]] = None,
        legacy: bool = False,
        ignore_extra_whitespaces: bool = True,
        chat_template: Optional[str] = None,
        trim_spm_separator_after_special_token=True,
        spm_separator='▁',
    ) -> None:
        """
        Args:
            tokenizer_path (str): path to sentence piece tokenizer model.
            special_tokens (Optional[Union[Dict[str, str], List[str]]]):
                either list of special tokens or dictionary of token name to token value
            legacy (bool): when set to True, the previous behavior of the SentecePiece wrapper
                will be restored, including the possibility to add special tokens inside wrapper.
            ignore_extra_whitespaces (bool): whether to ignore extra whitespaces in the
                input text while encoding.
                Note:
                    This is done for the current models tokenizers that don't handle extra
                    whitespaces as by default tokenizer learned to ignore it.
                    To check if the tokenizer by default ignores extra whitespaces refer to
                    `self.removed_extra_spaces` attribute of the tokenizer.
                    We added a parameter to process_asr_tokenizer.py for upcoming models to
                    handle it inbuilt.
            chat_template (Optional[str]): tokenizer chat template in jinja format.
        """

        self.chat_template = chat_template
        if not tokenizer_path or not os.path.exists(tokenizer_path):
            raise ValueError(f"tokenizer_path: {tokenizer_path} is invalid")

        if HAVE_SP:
            self.tokenizer = sentencepiece.SentencePieceProcessor()
        else:
            raise ModuleNotFoundError("sentencepiece library should be installed.")

        self.tokenizer.Load(tokenizer_path)

        self.original_vocab_size = self.tokenizer.get_piece_size()
        self.vocab_size = self.tokenizer.get_piece_size()
        self.legacy = legacy
        self.ignore_extra_whitespaces = ignore_extra_whitespaces
        # using special symbol for extra_space token, so it is not likely to be in the vocabulary
        self.extra_space_token = '☯'
        self.special_token_to_id = {}
        self.id_to_special_token = {}
        self.trim_spm_separator_after_special_token = trim_spm_separator_after_special_token
        self.spm_separator_id = self.tokenizer.piece_to_id(spm_separator)
        self.spm_separator = spm_separator

        if special_tokens:
            if not self.legacy:
                raise ValueError(
                    "Special tokens must be None when legacy is set to False. "
                    "Provide special tokens at train time."
                )
            self.add_special_tokens(special_tokens)

        self.removed_extra_spaces = self.tokenizer.encode_as_pieces(
            'x  y'
        ) == self.tokenizer.encode_as_pieces('x y')
        self.space_sensitive = self.text_to_tokens('x y') != self.text_to_tokens(
            'x'
        ) + self.text_to_tokens('y')

    def text_to_tokens(self, text: str) -> List[str]:
        """Converts text to tokens."""
        if self.removed_extra_spaces and not self.ignore_extra_whitespaces:
            text = re.sub(r'(?<= )(?= )|^ | $', f' {self.extra_space_token} ', text)
        if self.legacy:
            tokens = []
            idx = 0
            last_idx = 0

            while 1:
                indices = {}

                for token in self.special_token_to_id:
                    try:
                        indices[token] = text[idx:].index(token)
                    except ValueError:
                        continue

                if len(indices) == 0:
                    break

                next_token = min(indices, key=indices.get)
                next_idx = idx + indices[next_token]

                tok = self.tokenizer.encode_as_pieces(text[idx:next_idx])
                # Chat-templates insert a space between a special token and first word (e.g.
                # "[INST] who") which is tokenized as <inst-id> <space-id> <who-id> instead of
                # <inst-id> <who-id>.
                if (
                    self.trim_spm_separator_after_special_token
                    and len(tokens) > 0
                    and tokens[-1] in self.special_token_to_id
                    and len(tok) > 0
                    and tok[0] == self.spm_separator
                ):
                    tok.pop(0)
                tokens.extend(tok)
                tokens.append(next_token)
                idx = next_idx + len(next_token)

            tokens.extend(self.tokenizer.encode_as_pieces(text[idx:]))

        else:
            tokens = self.tokenizer.encode_as_pieces(text)

        if self.removed_extra_spaces and not self.ignore_extra_whitespaces:
            tokens = list(filter(lambda x: x != self.extra_space_token, tokens))
        return tokens

    def text_to_ids(self, text, sample_alpha=None) -> List[int]:
        """Converts text to tokens ids."""
        if isinstance(text, str):
            return self._text_to_ids(text, sample_alpha)
        else:
            raise ValueError(f"Expected str input, but got {type(text)}")

    def _text_to_ids(self, text, sample_alpha=None) -> List[int]:
        """Converts text to tokens ids."""
        if self.removed_extra_spaces and not self.ignore_extra_whitespaces:
            text = re.sub(r'(?<= )(?= )|^ | $', f' {self.extra_space_token} ', text).rstrip()
        if self.legacy:
            ids = []
            idx = 0
            last_idx = 0

            while 1:
                indices = {}

                for token in self.special_token_to_id:
                    try:
                        indices[token] = text[idx:].index(token)
                    except ValueError:
                        continue

                if len(indices) == 0:
                    break

                next_token = min(indices, key=indices.get)
                next_idx = idx + indices[next_token]

                text_tokens = self.tokenizer.encode(text[idx:next_idx])
                # Chat-templates insert a space between a special token and first word (e.g.
                # "[INST] who") which is tokenized as <inst-id> <space-id> <who-id> instead of
                # <inst-id> <who-id>.
                if (
                    self.trim_spm_separator_after_special_token
                    and len(ids) > 0
                    and ids[-1] in self.id_to_special_token
                    and len(text_tokens) > 0
                    and text_tokens[0] == self.spm_separator_id
                ):
                    text_tokens.pop(0)
                ids.extend(text_tokens)
                ids.append(self.special_token_to_id[next_token])
                idx = next_idx + len(next_token)

            if self.removed_extra_spaces and not self.ignore_extra_whitespaces:
                ids.extend(self._text_to_ids_extra_space(text[idx:]))
            else:
                ids.extend(self.tokenizer.encode_as_ids(text[idx:]))
            return ids

        if self.removed_extra_spaces and not self.ignore_extra_whitespaces:
            return self._text_to_ids_extra_space(text, sample_alpha)

        if sample_alpha is not None:
            return self.tokenizer.encode_as_ids(
                text, enable_sampling=True, alpha=sample_alpha, nbest_size=-1
            )
        else:
            return self.tokenizer.encode_as_ids(text)

    def _text_to_ids_extra_space(self, text, sample_alpha=None) -> List[int]:
        """Converts text to tokens ids."""
        ids = []
        encoding_kwargs = {}
        if sample_alpha is not None:
            encoding_kwargs = {'enable_sampling': True, 'alpha': sample_alpha, 'nbest_size': -1}
        for part in text.split(self.extra_space_token):
            if not part:
                continue
            part += self.extra_space_token
            part_ids = self.tokenizer.encode_as_ids(part, **encoding_kwargs)
            ids.extend(part_ids[:-1])

        return ids

    def tokens_to_text(self, tokens: List[str]) -> str:
        """Converts list of tokens text."""
        if isinstance(tokens, np.ndarray):
            tokens = tokens.tolist()

        return self.tokenizer.decode_pieces(tokens)

    def ids_to_text(self, ids: List[int]) -> str:
        """Converts list of ids to text."""
        if isinstance(ids, (np.ndarray, torch.Tensor)):
            ids = ids.tolist()

        if self.legacy:
            text = ""
            last_i = 0

            for i, id in enumerate(ids):
                if id in self.id_to_special_token:
                    text += self.tokenizer.decode_ids(ids[last_i:i]) + " "
                    text += self.id_to_special_token[id] + " "
                    last_i = i + 1

            text += self.tokenizer.decode_ids(ids[last_i:])
            return text.strip()

        return self.tokenizer.decode_ids(ids)

    def token_to_id(self, token: str) -> int:
        """Converts a single token to it's id."""
        if self.legacy and token in self.special_token_to_id:
            return self.special_token_to_id[token]

        return self.tokenizer.piece_to_id(token)

    def ids_to_tokens(self, ids: List[int]) -> List[str]:
        """Converts list of tokens ids to it's token values."""
        tokens = []
        for id in ids:
            if id >= self.original_vocab_size:
                tokens.append(self.id_to_special_token[id])
            else:
                tokens.append(self.tokenizer.id_to_piece(id))
        return tokens

    def tokens_to_ids(
        self, tokens: Union[str, List[str]], tokens_to_skip: List[str] = []
    ) -> List[int]:
        """Converts list of tokens to it's ids."""
        if isinstance(tokens, str):
            tokens = [tokens]
        ids = []
        for token in tokens:
            if token not in tokens_to_skip:
                ids.append(self.token_to_id(token))
        return ids

    def add_special_tokens(self, special_tokens: Union[list, dict]) -> None:
        """Adds special tokens to the tokenizer."""
        if not self.legacy:
            raise AttributeError(
                "Special Token addition does not work when legacy is set to False."
            )

        if isinstance(special_tokens, list):
            for token in special_tokens:
                if (
                    self.tokenizer.piece_to_id(token) == self.tokenizer.unk_id()
                    and token not in self.special_token_to_id
                ):
                    self.special_token_to_id[token] = self.vocab_size
                    self.id_to_special_token[self.vocab_size] = token
                    self.vocab_size += 1
                elif self.tokenizer.piece_to_id(token) != self.tokenizer.unk_id():
                    self.special_token_to_id[token] = self.tokenizer.piece_to_id(token)
                    self.id_to_special_token[self.special_token_to_id[token]] = token

        elif isinstance(special_tokens, dict):
            for token_name, token in special_tokens.items():
                setattr(self, token_name, token)
                if (
                    self.tokenizer.piece_to_id(token) == self.tokenizer.unk_id()
                    and token not in self.special_token_to_id
                ):
                    self.special_token_to_id[token] = self.vocab_size
                    self.id_to_special_token[self.vocab_size] = token
                    self.vocab_size += 1
                elif self.tokenizer.piece_to_id(token) != self.tokenizer.unk_id():
                    self.special_token_to_id[token] = self.tokenizer.piece_to_id(token)
                    self.id_to_special_token[self.special_token_to_id[token]] = token
        else:
            raise ValueError(
                "Expected special_tokens to be a list or a dict " + str(type(special_tokens))
            )

    def offsets(self, ids: list[int], text: str) -> list[int]:
        """Calculate offsets."""
        return [p.begin for p in self.tokenizer.decode_ids_as_immutable_proto(ids).pieces]

    @property
    def pad_id(self) -> int:
        """Returns id of padding token."""
        if self.legacy:
            pad_id = self.tokens_to_ids([self.pad_token])[0]
        else:
            pad_id = self.tokenizer.pad_id()
        return pad_id

    @property
    def bos_id(self) -> int:
        """Returns id of begginning of sentence token."""
        if self.legacy:
            bos_id = self.tokens_to_ids([self.bos_token])[0]
        else:
            bos_id = self.tokenizer.bos_id()
        return bos_id

    @property
    def eos_id(self) -> int:
        """Returns id of end of sentence token."""
        if self.legacy:
            eos_id = self.tokens_to_ids([self.eos_token])[0]
        else:
            eos_id = self.tokenizer.eos_id()
        return eos_id

    @property
    def sep_id(self) -> int:
        """Returns id of end of SEP token."""
        if self.legacy:
            return self.tokens_to_ids([self.sep_token])[0]
        else:
            raise NameError(
                "Use function token_to_id to retrieve special tokens other than "
                "unk, pad, bos, and eos."
            )

    @property
    def cls_id(self) -> int:
        """Returns id of classification token."""
        if self.legacy:
            return self.tokens_to_ids([self.cls_token])[0]
        else:
            raise NameError(
                "Use function token_to_id to retrieve special tokens other than "
                "unk, pad, bos, and eos."
            )

    @property
    def mask_id(self) -> int:
        """Returns id of mask token."""
        if self.legacy:
            return self.tokens_to_ids([self.mask_token])[0]
        else:
            raise NameError(
                "Use function token_to_id to retrieve special tokens other than "
                "unk, pad, bos, and eos."
            )

    @property
    def unk_id(self) -> int:
        """Returns id of unknown tokens."""
        return self.tokenizer.unk_id()

    @property
    def additional_special_tokens_ids(self) -> list:
        """
        Returns a list of the additional special tokens (excluding bos, eos, pad, unk).
        Used to return sentinel tokens for e.g. T5.
        """
        special_tokens = set(
            [
                self.bos_token,
                self.eos_token,
                self.pad_token,
                self.mask_token,
                self.cls_token,
                self.sep_token,
            ]
        )
        return [v for k, v in self.special_token_to_id.items() if k not in special_tokens]

    @property
    def vocab(self) -> list:
        """Returns tokenizer's vocabulary."""
        main_vocab = [
            self.tokenizer.id_to_piece(id) for id in range(self.tokenizer.get_piece_size())
        ]
        special_tokens = [
            self.id_to_special_token[self.original_vocab_size + i]
            for i in range(self.vocab_size - self.original_vocab_size)
        ]
        return main_vocab + special_tokens

    @property
    def inv_vocab(self) -> dict:
        """Returns tokenizer vocab with reversed keys and values."""
        return {id: token for id, token in enumerate(self.vocab)}


================================================
FILE: megatron/core/tokenizers/text/libraries/sft_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from dataclasses import dataclass
from typing import Dict, List, Union

import numpy as np

try:
    import transformers

    HAVE_TRANSFORMERS = True
except ModuleNotFoundError:
    HAVE_TRANSFORMERS = False


# fmt: off
nemotron_h_aligned_custom_template = """{% for message in messages %}{% if message['role'] == 'system' %}{{ '<SPECIAL_10>System\n' + message['content'].strip() + '\n' }}{% elif message['role'] == 'user' %}{{ '<SPECIAL_11>User\n' + message['content'].strip() + '\n' + '<SPECIAL_11>Assistant\n' }}{% elif message['role'] == 'assistant' %}{{ message['content'].strip() + '\n' }}{% endif %}{% endfor %}""" # pylint: disable=line-too-long
nemotron_nano_v2_custom_template = """{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'system' %}{{ '<SPECIAL_10>System\n' + content.replace('/think', '').replace('/no_think', '').strip() + '\n' }}{% elif message['role'] == 'user' %}{{ '<SPECIAL_11>User\n' + content.replace('/think', '').replace('/no_think', '').strip() + '\n' }}{% elif message['role'] == 'assistant' %}{{ '<SPECIAL_11>Assistant\n' + content.strip() + '\n<SPECIAL_12>\n' }}{% endif %}{% endfor %}""" # pylint: disable=line-too-long
identity_template = """{% for message in messages %}{{ message['content'] }}{% endfor %}"""
# fmt: on


IGNORE_INDEX = -100


@dataclass
class PromptConfig:
    """Config options for different prompt formats."""

    # How many tokens are used for the assistant prefix, e.g. "<|im_start|>assistant\n".
    # Used for masking the assistant prefix.
    assistant_prefix_len: int
    # Padding token ID.
    pad_token_id: int
    # For overriding the default chat format template.
    custom_chat_template: str
    # If the tokenizer inserts BOS token by default.
    has_bos: bool
    # If the tokenizer supports a separate role for system messages.
    has_system_role: bool
    # Wether to force a specific system message.
    force_system_message: bool = False
    system_default: dict = None


class SFTTokenizer:
    """SFT Tokenizer."""

    def __init__(self, tokenizer_path: str, prompt_format: str):
        """
        Note: Currently, only HuggingFaceTokenizer is supported as the underlying text tokenizer.

        Args:
            tokenizer_path (str): Underlying tokenizer path.
            prompt_format (str): Prompt format for the tokenizer.
        """
        if HAVE_TRANSFORMERS:
            # Currently, only HuggingFace tokenizers are supported.
            tokenizer = transformers.AutoTokenizer.from_pretrained(
                pretrained_model_name_or_path=tokenizer_path
            )
        else:
            raise ImportError(
                "SFTTokenizer currently requires transformers library to be installed"
            )

        self._vocab_size = len(tokenizer)
        self._tokenizer = tokenizer

        if prompt_format == "nemotron-nano-v2":
            self._prompt_config = PromptConfig(
                assistant_prefix_len=3,
                pad_token_id=tokenizer.convert_tokens_to_ids("<unk>"),
                custom_chat_template=nemotron_nano_v2_custom_template,
                has_bos=False,
                has_system_role=True,
            )
        elif prompt_format == "nemotron-h-aligned":
            self._prompt_config = PromptConfig(
                assistant_prefix_len=0,
                pad_token_id=tokenizer.convert_tokens_to_ids("<SPECIAL_233>"),
                custom_chat_template=nemotron_h_aligned_custom_template,
                has_bos=False,
                has_system_role=True,
            )
        elif prompt_format == "identity":
            self._prompt_config = PromptConfig(
                assistant_prefix_len=0,
                pad_token_id=tokenizer.convert_tokens_to_ids("<unk>"),
                custom_chat_template=identity_template,
                has_bos=False,
                has_system_role=True,
            )
        elif prompt_format == "default":
            self._prompt_config = PromptConfig(
                assistant_prefix_len=0,
                pad_token_id=(
                    tokenizer.pad_token_id
                    if tokenizer.pad_token_id is not None
                    else tokenizer.eos_token_id
                ),
                custom_chat_template=tokenizer.chat_template,
                has_bos=tokenizer.bos_token_id is not None,
                has_system_role=True,
            )
        else:
            raise NotImplementedError("unknown SFT prompt format", prompt_format)

        self._prompt_format = prompt_format

    def tokenize_conversation(
        self, conversation: List[Dict], return_target: bool, add_generation_prompt: bool
    ):
        """Convert a conversation to tokens.

        Args:
            conversation (List[Dict]): Sequence of system/user/assistant messages.
                Must be in the following format:
                [
                    {"role": "system", "content": "something"},
                    {"role": "user", "content": "something1"},
                    {"role": "assistant", "content": "something2"},
                ]
            return_target (bool): Return target tokens with system and assistant masked.
            add_generation_prompt (bool): Add assistant prefix to the end.
        """
        # Skip system message if the tokenizer doesn't have a system role.
        if not self._prompt_config.has_system_role and conversation[0]["role"] == "system":
            conversation = conversation[1:]

        tokens = self._tokenizer.apply_chat_template(
            conversation,
            tokenize=True,
            add_generation_prompt=add_generation_prompt,
            return_assistant_token_mask=False,
            return_tensors="np",
            chat_template=self._prompt_config.custom_chat_template,
        )[0]

        if not return_target:
            return tokens

        target = tokens.copy()

        # When using the default prompt format, we do not replace any tokens with IGNORE_INDEX.
        # Instead, all token losses will be used for simplicity.
        if self._prompt_format == "default":
            return tokens, target

        # Mask system and user tokens in the target.
        idx = 0
        for turn_idx, turn in enumerate(conversation):

            if turn["role"].lower() == "assistant" and len(turn["content"]) == 0:
                raise ValueError(f"empty assistant turn in conversation: {conversation}.")
            if turn["role"].lower() == "assistant":
                assert conversation[turn_idx - 1]["role"].lower() in ("user", "tool")

            turn_tokens = self._tokenizer.apply_chat_template(
                [turn], tokenize=True, chat_template=self._prompt_config.custom_chat_template
            )

            # There should be only one BOS at the very beginning.
            # After the first turn, skip BOS token.
            if self._prompt_config.has_bos and turn_idx > 0:
                turn_tokens = turn_tokens[1:]
            turn_len = len(turn_tokens)

            role = turn["role"].lower()
            if role in ("system", "user", "tool"):
                target[idx : idx + turn_len] = IGNORE_INDEX
            elif role == "assistant":
                if self._prompt_config.assistant_prefix_len > 0:
                    target[idx : idx + self._prompt_config.assistant_prefix_len] = IGNORE_INDEX
            else:
                raise ValueError("Wrong role value.")

            assert np.allclose(
                tokens[idx : idx + turn_len], turn_tokens
            ), f"expected turn tokens to match tokens in conversation {conversation}"

            idx += turn_len

        assert idx == len(tokens), f"mismatch in target masking the conversation {conversation}"

        return tokens, target

    def text_to_ids(self, text: Union[str, List[Dict]]):
        """Tokenize conversation or string input."""
        if isinstance(text, list):
            # This code path is used by the inference code currently.
            return self.tokenize_conversation(
                text, return_target=False, add_generation_prompt=True
            ).tolist()

        return self._tokenizer.encode(text)

    def tokens_to_ids(self, tokens: List[str]):
        """Convert tokens to IDs."""
        return self._tokenizer.convert_tokens_to_ids(tokens)

    def ids_to_text(self, tokens: List[int]):
        """Detokenize tokens."""
        return self._tokenizer.decode(tokens)

    def ids_to_tokens(self):
        """Converts ids to tokens."""
        raise NotImplementedError("This method is not supported for SFTTokenizer.")

    def text_to_tokens(self):
        """Converts text to tokens."""
        raise NotImplementedError("This method is not supported for SFTTokenizer.")

    def tokens_to_text(self):
        """Converts tokens to text."""
        raise NotImplementedError("This method is not supported for SFTTokenizer.")

    def get_special_tokens(self):
        """Get special tokens."""
        return self._tokenizer.get_added_vocab()

    def add_special_tokens(self):
        """Add special tokens."""
        raise NotImplementedError("This method is not supported for SFTTokenizer.")

    @property
    def pad_id(self):
        """Pad token ID."""
        return self._prompt_config.pad_token_id

    @property
    def bos_id(self):
        """Beginning of sequence token ID."""
        return self._tokenizer.bos_token_id

    @property
    def eod(self):
        """End of sentence token ID."""
        return self._tokenizer.eos_token_id

    @property
    def vocab(self):
        """Vocab."""
        return NotImplementedError("not used")

    @property
    def inv_vocab(self):
        """Inverse vocab."""
        return NotImplementedError("not used")

    @property
    def vocab_size(self):
        """Vocabulary size."""
        return self._vocab_size


================================================
FILE: megatron/core/tokenizers/text/libraries/tiktoken_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import base64
import json
import logging
import os
from pathlib import Path
from typing import Dict, List, Optional

try:
    import tiktoken
except ImportError:
    pass

from .abstract_tokenizer import MegatronTokenizerTextAbstract
from .chat_template import MegatronTokenizerChatTemplate

logger = logging.getLogger(__name__)

_PATTERN_TIKTOKEN_V1 = (
    r"[^\r\n\p{L}\p{N}]?+\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]++[\r\n]*|\s*[\r\n]|\s+(?!\S)|\s+"
)
_PATTERN_TIKTOKEN_V2 = "[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"  # pylint: disable=line-too-long
DEFAULT_TIKTOKEN_MAX_VOCAB = 2**17  # 131072
SPECIAL_TOKENS = ["<unk>", "<s>", "</s>", "<mask>", "<pad>", "<cls>", "<sep>"]
SPECIAL_TOKEN_TEMPLATE = "<SPECIAL_{id}>"

__all__ = ["TikTokenTokenizer", "reload_mergeable_ranks"]


def reload_mergeable_ranks(
    path: str, max_vocab: Optional[int] = None, num_special_tokens: Optional[int] = None
) -> Dict[bytes, int]:
    """
    Reload the tokenizer JSON file and convert it to Tiktoken format.

    Args:
        path (str): path to the tokenizer.
        max_vocab (Optional[int]): maximum size of vocabulary.
        num_special_tokens (Optional[int]): number of added special tokens.

    Returns:
        Dict[bytes, int]: reloaded tokenizer vocab.
    """

    assert path.endswith(".json")
    from megatron.core.utils import log_single_rank

    # reload vocab
    with open(path, "r") as f:
        vocab = json.load(f)
    assert isinstance(vocab, list)
    log_single_rank(logger, logging.INFO, f"Vocab size: {len(vocab)}")
    if max_vocab is not None:
        vocab = vocab[:max_vocab]
        from megatron.core.utils import log_single_rank

        log_single_rank(logger, logging.INFO, f"Cutting vocab to first {len(vocab)} tokens")

    # build ranks
    ranks: Dict[bytes, int] = {}
    for i, x in enumerate(vocab):
        assert x.keys() == {"rank", "token_bytes", "token_str"}
        assert x["rank"] == i
        merge = base64.b64decode(x["token_bytes"])
        assert i >= 256 or merge == bytes([i])
        ranks[merge] = x["rank"] + num_special_tokens

    # sanity check
    assert len(ranks) == len(vocab)
    assert set(ranks.values()) == set(range(num_special_tokens, len(ranks) + num_special_tokens))

    return ranks


class TikTokenTokenizer(MegatronTokenizerTextAbstract, MegatronTokenizerChatTemplate):
    """TikTokenTokenizer https://github.com/openai/tiktoken."""

    def __init__(
        self,
        tokenizer_path: str,
        special_tokens: Optional[List[str]] = None,
        num_special_tokens: Optional[int] = 1000,
        chat_template: Optional[str] = None,
        pattern: Optional[str] = "v2",
        vocab_size: Optional[int] = DEFAULT_TIKTOKEN_MAX_VOCAB,
    ):
        """
        Args:
            tokenizer_path (str): path to tokenizer vocabulary.
            special_tokens (Optional[List[str]]): template for user-defined special tokens.
            num_special_tokens (int): number of special tokens to generate.
            chat_template (Optional[str]): tokenizer chat template in jinja format.
            pattern (Optional[str]): regex pattern to split the text.
            vocab_size (Optional[int]): size of vocabulary.
        """

        if not tokenizer_path or not os.path.exists(tokenizer_path):
            raise ValueError(f"tokenizer_path: {tokenizer_path} is invalid")

        if special_tokens is None:
            special_tokens = SPECIAL_TOKENS.copy()

        if pattern == "v1":
            pattern = _PATTERN_TIKTOKEN_V1
        elif pattern == "v2":
            pattern = _PATTERN_TIKTOKEN_V2
        else:
            raise ValueError(f"Expected tiktoken pattern to be `v1` or `v2`, but got {pattern}.")

        assert len(special_tokens) == len(
            set(special_tokens)
        ), f"Special tokens should be unique: {special_tokens}"
        assert len(special_tokens) <= num_special_tokens < vocab_size
        assert set(SPECIAL_TOKENS) <= set(
            special_tokens
        ), f"Custom special tokens should include {SPECIAL_TOKENS}"

        self._unk_id = special_tokens.index("<unk>")
        self._bos_id = special_tokens.index("<s>")
        self._eos_id = special_tokens.index("</s>")
        self._mask_id = special_tokens.index("<mask>")
        self._pad_id = special_tokens.index("<pad>")
        self._cls_id = special_tokens.index("<cls>")
        self._sep_id = special_tokens.index("<sep>")

        self._vocab_size = vocab_size
        self.chat_template = chat_template
        self.num_special_tokens = num_special_tokens
        special_filler = [
            SPECIAL_TOKEN_TEMPLATE.format(id=i)
            for i in range(len(special_tokens), num_special_tokens)
        ]
        self.special_filler = special_filler
        from megatron.core.utils import log_single_rank

        if special_filler:
            log_single_rank(
                logger,
                logging.INFO,
                "Adding special tokens: "
                f"{', '.join(special_tokens)}, {special_filler[0]}, ..., {special_filler[-1]}",
            )
        self.special_tokens = special_tokens + special_filler
        assert (
            len(set(self.special_tokens)) == len(self.special_tokens) == num_special_tokens
        ), self.special_tokens
        self.inner_vocab_size = vocab_size - num_special_tokens

        # reload vocab
        self.token2id = reload_mergeable_ranks(
            tokenizer_path, max_vocab=self.inner_vocab_size, num_special_tokens=num_special_tokens
        )

        self.id2token = {v: k for k, v in self.token2id.items()}
        assert set(range(num_special_tokens, vocab_size)) == set(self.id2token.keys())

        self.shifted_id2token = {i: tok for i, tok in enumerate(self.special_tokens)}
        for key, value in self.id2token.items():
            self.shifted_id2token[key + self.num_special_tokens] = value

        special_tokens_dict = {t: i for i, t in enumerate(self.special_tokens)}
        self.tokenizer = tiktoken.Encoding(
            name=Path(tokenizer_path).parent.name,
            pat_str=pattern,
            mergeable_ranks=self.token2id,
            special_tokens=special_tokens_dict,  # special tokens are handled manually
        )

        self._vocab = special_tokens_dict | self.token2id

    def text_to_tokens(self, text: str) -> List[str]:
        """Converts text to tokens."""
        token_ids = self.tokenizer.encode(text)
        return [self.tokenizer.decode_single_token_bytes(token) for token in token_ids]

    def tokens_to_text(self, tokens: List[int]) -> str:
        """Converts list of tokens to text."""
        token_ids = [self.tokenizer.encode_single_token(tokens) for tokens in tokens]
        return self.tokenizer.decode(token_ids)

    def token_to_id(self, token: str) -> int:
        """Converts a single token to it's id."""
        if token in self.special_tokens:
            return self.special_tokens.index(token)
        else:
            return self.tokenizer.encode_single_token(token) + self.num_special_tokens

    def tokens_to_ids(self, tokens: List[str]) -> List[int]:
        """Converts list of tokens to list of it's ids."""
        return [self.token_to_id(token) for token in tokens]

    def id_to_token(self, token_id: int) -> str:
        """Converts token id to token."""
        if token_id < self.num_special_tokens:
            return self.special_tokens[token_id]
        else:
            token_bytes = self.tokenizer.decode_single_token_bytes(token_id)
            return token_bytes.decode('utf-8', errors='replace')

    def ids_to_tokens(self, token_ids: List[int]) -> List[str]:
        """Converts list of tokens ids to list of tokens."""
        tokens = []
        for token_id in token_ids:
            tokens.append(self.id_to_token(token_id))

        return tokens

    def text_to_ids(self, text: str) -> List[int]:
        """Converts text to list of ids."""
        tokens = self.tokenizer.encode(text, allowed_special="all")
        return tokens

    def ids_to_text(self, tokens: List[int], remove_special_tokens: bool = False) -> str:
        """Converts list of ids to text."""
        # Filter out special tokens and adjust the remaining tokens
        if remove_special_tokens:
            adjusted_tokens = [
                t
                for t in tokens
                if t not in {self.bos_id, self.eos_id} and t >= self.num_special_tokens
            ]
        else:
            adjusted_tokens = tokens

        # Decode only if there are tokens left after filtering
        if adjusted_tokens:
            return "".join(self.ids_to_tokens(adjusted_tokens))
        else:
            return ""  # Return an empty string if all tokens were filtered out

    def add_special_tokens(self, special_tokens_dict: dict):
        """Adds special tokens to the tokenizer."""
        raise NotImplementedError("This method is not supported for TikToken tokenizers.")

    def offsets(self, ids: list[int], text: str) -> list[int]:
        """Calculate offsets."""
        try:
            return self.tokenizer.decode_with_offsets(ids)[1]
        except UnicodeDecodeError:
            # Tiktoken has an unnecessary check that raises UnicodeDecodeError
            # from `text = b"".join(token_bytes).decode("utf-8", errors="strict")`
            # which is not needed for our use case. So we re-implement it, without
            # the check.

            token_bytes = self.tokenizer.decode_tokens_bytes(ids)
            text_len = 0
            offsets = []
            for token in token_bytes:
                offsets.append(max(0, text_len - (0x80 <= token[0] < 0xC0)))
                text_len += sum(1 for c in token if not 0x80 <= c < 0xC0)
            return offsets

    @property
    def additional_special_tokens_ids(self) -> list:
        """
        Returns a list of the additional special tokens, excluding [bos, eos, pad, unk]
        and special_filler. Used to return sentinel tokens for e.g. T5.
        """
        excluding_tokens = (
            self.ids_to_tokens([self._unk_id, self._bos_id, self._eos_id]) + self.special_filler
        )
        result = [
            self.token_to_id(token)
            for token in self.special_tokens
            if token not in excluding_tokens
        ]
        return result

    @property
    def bos_id(self) -> int:
        """Returns id of beginning of sentence token."""
        return self._bos_id

    @property
    def eos_id(self) -> int:
        """Returns id of end of sentence token."""
        return self._eos_id

    @property
    def eod(self) -> int:
        """Returns id of end of document token."""
        return self._eos_id

    @property
    def unk_id(self) -> int:
        """Returns id of unknown tokens."""
        return self._unk_id

    @property
    def mask_id(self) -> int:
        """Returns id of mask token."""
        return self._mask_id

    @property
    def pad_id(self) -> int:
        """Returns id of padding token."""
        return self._pad_id

    @property
    def cls_id(self) -> int:
        """Returns id of classification token."""
        return self._cls_id

    @property
    def sep_id(self) -> int:
        """Returns id of SEP token."""
        return self._sep_id

    @property
    def vocab(self):
        """Returns tokenizer vocab."""
        return self._vocab

    @property
    def decoder(self):
        """ """
        return self.shifted_id2token

    @property
    def encoder(self):
        """ """
        return self._vocab

    @property
    def vocab_size(self) -> int:
        """Returns tokenizer vocab size."""
        return self._vocab_size

    @property
    def inv_vocab(self) -> dict:
        """Returns tokenizer vocab with reversed keys and values."""
        return self.shifted_id2token


================================================
FILE: megatron/core/tokenizers/text/models/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from megatron.core.tokenizers.text.models.bert_tokenizer import BertTokenizer
from megatron.core.tokenizers.text.models.default_tokenizer import DefaultTokenizerText
from megatron.core.tokenizers.text.models.gpt_tokenizer import GPTTokenizer
from megatron.core.tokenizers.text.models.mamba_tokenizer import MambaTokenizer
from megatron.core.tokenizers.text.models.t5_tokenizer import T5Tokenizer


================================================
FILE: megatron/core/tokenizers/text/models/bert_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from megatron.core.tokenizers.text.text_tokenizer import MegatronTokenizerText


class BertTokenizer(MegatronTokenizerText):
    """Base class for Megatron Bert tokenizer."""

    def __init__(self, path: str = None, config: dict = None, **kwargs) -> None:
        config['class_name'] = self.__class__.__name__
        config['class_path'] = self.__class__.__module__
        super().__init__(path, config, **kwargs)


================================================
FILE: megatron/core/tokenizers/text/models/default_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from megatron.core.tokenizers.text.text_tokenizer import MegatronTokenizerText


class DefaultTokenizerText(MegatronTokenizerText):
    """Base class for Megatron default tokenizer."""

    def __init__(self, path: str = None, config: dict = None, **kwargs) -> None:
        config['class_name'] = self.__class__.__name__
        config['class_path'] = self.__class__.__module__
        super().__init__(path, config, **kwargs)


================================================
FILE: megatron/core/tokenizers/text/models/gpt_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from megatron.core.tokenizers.text.text_tokenizer import MegatronTokenizerText


class GPTTokenizer(MegatronTokenizerText):
    """Base class for Megatron GPT tokenizer."""

    def __init__(self, path: str = None, config: dict = None, **kwargs) -> None:
        config['class_name'] = self.__class__.__name__
        config['class_path'] = self.__class__.__module__
        super().__init__(path, config, **kwargs)


================================================
FILE: megatron/core/tokenizers/text/models/mamba_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from megatron.core.tokenizers.text.text_tokenizer import MegatronTokenizerText


class MambaTokenizer(MegatronTokenizerText):
    """Base class for Megatron Mamba tokenizer."""

    def __init__(self, path: str = None, config: dict = None, **kwargs) -> None:
        config['class_name'] = self.__class__.__name__
        config['class_path'] = self.__class__.__module__
        super().__init__(path, config, **kwargs)


================================================
FILE: megatron/core/tokenizers/text/models/t5_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from megatron.core.tokenizers.text.text_tokenizer import MegatronTokenizerText


class T5Tokenizer(MegatronTokenizerText):
    """Base class for Megatron T5 tokenizer."""

    def __init__(self, path: str = None, config: dict = None, **kwargs) -> None:
        config['class_name'] = self.__class__.__name__
        config['class_path'] = self.__class__.__module__
        super().__init__(path, config, **kwargs)


================================================
FILE: megatron/core/tokenizers/text/parsers/__init__.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
from megatron.core.tokenizers.text.parsers.deepseek_r1_reasoning_parser import (
    DeepSeekR1ReasoningParser,
)
from megatron.core.tokenizers.text.parsers.qwen3_coder_tool_parser import Qwen3CoderToolParser

PARSER_MAPPING = {
    "deepseek-r1-reasoning": DeepSeekR1ReasoningParser,
    "qwen3-coder-tool": Qwen3CoderToolParser,
}

__all__ = ["PARSER_MAPPING"]


================================================
FILE: megatron/core/tokenizers/text/parsers/base_parser.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
from typing import Any


class BaseParser:
    """Base class for text parsers."""

    @staticmethod
    def parse(text: str, **kwargs) -> tuple[str, dict[str, Any]]:
        """
        Parses the text into a tuple containing extracted content
        and a dictionary of additional information.

        Args:
            text (str): The text to parse.

        Returns:
            tuple[str, dict[str, Any]]: A tuple containing the unprocessed text
            and a dictionary with the extracted information.
        """
        return text, {}


================================================
FILE: megatron/core/tokenizers/text/parsers/deepseek_r1_reasoning_parser.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
from megatron.core.tokenizers.text.parsers.base_parser import BaseParser


class DeepSeekR1ReasoningParser(BaseParser):
    """Parser for DeepSeek R1 style reasoning output."""

    @staticmethod
    def parse(text: str, **kwargs) -> tuple[str, dict[str, str]]:
        """
        Extracts the reasoning content from the text using <think>...</think> tags.
        Only extracts the first set of think tags.
        If an initial <think> tag is not present but a </think> tag is,
        it will infer a <think> tag at the beginning of the text.

        Args:
            text (str): The text to parse.

        Returns:
            tuple[str, dict[str, str]]: A tuple containing the unprocessed text
            and a dictionary with the extracted reasoning content.
        """

        if "</think>" in text:
            if "<think>" in text:
                # Strip the <think> prefix (it might not be present if it was part of the prompt)
                pre_text, text = text.split("<think>", maxsplit=1)
            else:
                pre_text = ""
            reasoning_content, remaining_text = text.split("</think>", maxsplit=1)
            return pre_text + remaining_text, {'reasoning': reasoning_content}
        else:
            return text, {}


================================================
FILE: megatron/core/tokenizers/text/parsers/qwen3_coder_tool_parser.py
================================================
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import ast
import json
import logging
import re
import uuid
from types import SimpleNamespace
from typing import Any

from megatron.core.tokenizers.text.parsers.base_parser import BaseParser

logger = logging.getLogger(__name__)

# These map to vLLM types but we just use dictionaries for now
ToolCall = dict[str, Any]
FunctionCall = dict[str, Any]
ChatCompletionToolsParam = dict[str, Any]
ChatCompletionRequest = dict[str, Any]
ExtractedToolCallInformation = dict


class _Qwen3CoderToolParser:

    # Sentinel tokens for streaming mode
    tool_call_start_token: str = "<tool_call>"
    tool_call_end_token: str = "</tool_call>"
    tool_call_prefix: str = "<function="

    # Regex patterns
    tool_call_complete_regex = re.compile(r"<tool_call>(.*?)</tool_call>", re.DOTALL)
    tool_call_regex = re.compile(r"<tool_call>(.*?)</tool_call>|<tool_call>(.*?)$", re.DOTALL)
    tool_call_function_regex = re.compile(r"<function=(.*?)</function>|<function=(.*)$", re.DOTALL)
    tool_call_parameter_regex = re.compile(
        r"<parameter=(.*?)(?:</parameter>|(?=<parameter=)|(?=</function>)|$)", re.DOTALL
    )

    def _generate_tool_call_id(self) -> str:
        """Generate a unique tool call ID."""
        return f"call_{uuid.uuid4().hex[:24]}"

    def _get_arguments_config(
        self, func_name: str, tools: list[ChatCompletionToolsParam] | None
    ) -> dict:
        """Extract argument configuration for a function."""
        if tools is None:
            return {}
        for config in tools:
            config = SimpleNamespace(**config)  # Convert to SimpleNamespace for ease of access
            if not hasattr(config, "type") or not (
                hasattr(config, "function") and hasattr(config.function, "name")
            ):
                continue
            if config.type == "function" and config.function.name == func_name:
                if not hasattr(config.function, "parameters"):
                    return {}
                params = config.function.parameters
                if isinstance(params, dict) and "properties" in params:
                    return params["properties"]
                elif isinstance(params, dict):
                    return params
                else:
                    return {}
        logger.debug("Tool '%s' is not defined in the tools list.", func_name)
        return {}

    def _convert_param_value(
        self, param_value: str, param_name: str, param_config: dict, func_name: str
    ) -> Any:
        """Convert parameter value based on its type in the schema."""
        # Handle null value for any type
        if param_value.lower() == "null":
            return None

        if param_name not in param_config:
            if param_config != {}:
                logger.debug(
                    "Parsed parameter '%s' is not defined in the tool "
                    "parameters for tool '%s', directly returning the "
                    "string value.",
                    param_name,
                    func_name,
                )
            return param_value

        if isinstance(param_config[param_name], dict) and "type" in param_config[param_name]:
            param_type = str(param_config[param_name]["type"]).strip().lower()
        else:
            param_type = "string"
        if param_type in ["string", "str", "text", "varchar", "char", "enum"]:
            return param_value
        elif (
            param_type.startswith("int")
            or param_type.startswith("uint")
            or param_type.startswith("long")
            or param_type.startswith("short")
            or param_type.startswith("unsigned")
        ):
            try:
                return int(param_value)
            except (ValueError, TypeError):
                logger.debug(
                    "Parsed value '%s' of parameter '%s' is not an "
                    "integer in tool '%s', degenerating to string.",
                    param_value,
                    param_name,
                    func_name,
                )
                return param_value
        elif param_type.startswith("num") or param_type.startswith("float"):
            try:
                float_param_value = float(param_value)
                return (
                    float_param_value
                    if float_param_value - int(float_param_value) != 0
                    else int(float_param_value)
                )
            except (ValueError, TypeError):
                logger.debug(
                    "Parsed value '%s' of parameter '%s' is not a float "
                    "in tool '%s', degenerating to string.",
                    param_value,
                    param_name,
                    func_name,
                )
                return param_value
        elif param_type in ["boolean", "bool", "binary"]:
            param_value = param_value.lower()
            if param_value not in ["true", "false"]:
                logger.debug(
                    "Parsed value '%s' of parameter '%s' is not a boolean "
                    "(`true` or `false`) in tool '%s', degenerating to "
                    "false.",
                    param_value,
                    param_name,
                    func_name,
                )
            return param_value == "true"
        else:
            if (
                param_type in ["object", "array", "arr"]
                or param_type.startswith("dict")
                or param_type.startswith("list")
            ):
                try:
                    param_value = json.loads(param_value)
                    return param_value
                except (json.JSONDecodeError, TypeError, ValueError):
                    logger.debug(
                        "Parsed value '%s' of parameter '%s' cannot be "
                        "parsed with json.loads in tool '%s', will try "
                        "other methods to parse it.",
                        param_value,
                        param_name,
                        func_name,
                    )
            try:
                param_value = ast.literal_eval(param_value)  # safer
            except (ValueError, SyntaxError, TypeError):
                logger.debug(
                    "Parsed value '%s' of parameter '%s' cannot be "
                    "converted via Python `ast.literal_eval()` in tool "
                    "'%s', degenerating to string.",
                    param_value,
                    param_name,
                    func_name,
                )
            return param_value

    def _parse_xml_function_call(
        self, function_call_str: str, tools: list[ChatCompletionToolsParam] | None
    ) -> ToolCall | None:
        # Extract function name
        end_index = function_call_str.index(">")
        function_name = function_call_str[:end_index]
        param_config = self._get_arguments_config(function_name, tools)
        parameters = function_call_str[end_index + 1 :]
        param_dict = {}
        for match_text in self.tool_call_parameter_regex.findall(parameters):
            idx = match_text.index(">")
            param_name = match_text[:idx]
            param_value = str(match_text[idx + 1 :])
            # Remove prefix and trailing \n
            if param_value.startswith("\n"):
                param_value = param_value[1:]
            if param_value.endswith("\n"):
                param_value = param_value[:-1]

            param_dict[param_name] = self._convert_param_value(
                param_value, param_name, param_config, function_name
            )
        return ToolCall(
            type="function",
            id=self._generate_tool_call_id(),
            function=FunctionCall(
                name=function_name, arguments=json.dumps(param_dict, ensure_ascii=False)
            ),
        )

    def _get_function_calls(self, model_output: str) -> list[str]:
        # Find all tool calls
        matched_ranges = self.tool_call_regex.findall(model_output)
        raw_tool_calls = [match[0] if match[0] else match[1] for match in matched_ranges]

        # Back-off strategy if no tool_call tags found
        if len(raw_tool_calls) == 0:
            raw_tool_calls = [model_output]

        raw_function_calls = []
        for tool_call in raw_tool_calls:
            raw_function_calls.extend(self.tool_call_function_regex.findall(tool_call))

        function_calls = [match[0] if match[0] else match[1] for match in raw_function_calls]
        return function_calls

    def extract_tool_calls(
        self, model_output: str, tools: list[ChatCompletionToolsParam] | None
    ) -> ExtractedToolCallInformation:
        """Extracts the tool calls from the text using <tool_call>...</tool_call> tags."""
        # Quick check to avoid unnecessary processing
        if self.tool_call_prefix not in model_output:
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=model_output
            )

        try:
            function_calls = self._get_function_calls(model_output)
            if len(function_calls) == 0:
                return ExtractedToolCallInformation(
                    tools_called=False, tool_calls=[], content=model_output
                )

            tool_calls = [
                self._parse_xml_function_call(function_call_str, tools)
                for function_call_str in function_calls
            ]

            # Extract content before tool calls
            content_index = model_output.find(self.tool_call_start_token)
            idx = model_output.find(self.tool_call_prefix)
            content_index = content_index if content_index >= 0 else idx
            content = model_output[:content_index]  # .rstrip()

            return ExtractedToolCallInformation(
                tools_called=(len(tool_calls) > 0),
                tool_calls=tool_calls,
                content=content if content else None,
            )

        except Exception:
            logger.exception("Error in extracting tool call from response.")
            return ExtractedToolCallInformation(
                tools_called=False, tool_calls=[], content=model_output
            )


class Qwen3CoderToolParser(BaseParser):
    """Parser for Qwen3 Coder style tool calls."""

    @staticmethod
    def parse(text: str, **kwargs) -> tuple[str, dict[str, list[dict]]]:
        """
        Extracts the tool calls from the text using <tool_call>...</tool_call> tags.
        Uses the _Qwen3CoderToolParser class (copied from vLLM) to extract the tool calls.

        Args:
            text (str): The text to parse.

        Returns:
            tuple[str, dict[str, str]]: A tuple containing the unprocessed text
            and a dictionary with the extracted tool calls.
        """

        information = _Qwen3CoderToolParser().extract_tool_calls(
            text, tools=kwargs.get("tools", [])
        )
        if information.get("tools_called", False):
            return information.get("content", ""), {"tool_calls": information.get("tool_calls", [])}
        else:
            return text, {}


================================================
FILE: megatron/core/tokenizers/text/text_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from collections import OrderedDict
from typing import Dict, List, Optional, Union

from megatron.core.tokenizers.base_tokenizer import MegatronTokenizerBase
from megatron.core.tokenizers.text.libraries.abstract_tokenizer import MegatronTokenizerTextAbstract

TOKENIZER_MAPPING_LIBRARIES = OrderedDict(
    [
        ("sentencepiece", "SentencePieceTokenizer"),
        ("huggingface", "HuggingFaceTokenizer"),
        ("megatron", "MegatronHFTokenizer"),
        ("tiktoken", "TikTokenTokenizer"),
        ("byte-level", "ByteLevelTokenizer"),
        ("null-text", "NullTokenizer"),
        ("sft", "SFTTokenizer"),
    ]
)


class MegatronTokenizerText(MegatronTokenizerBase):
    """Base class for Megatron text tokenizers."""

    def __init__(self, path: str, config: dict, **kwargs) -> None:
        """
        Args:
            path (str): path to the tokenizer model.
            config (dict): tokenizer parameters.
                library (str): tokenizer library.
                class_name (str): name of tokenizer class.
                class_path (str): path to tokenizer class.
                model_type (str): type of the model to be used with tokenizer.
                chat_template (str): tokenizer chat template.
        """

        super().__init__(path, config, **kwargs)
        self._tokenizer = self._restore_model(**kwargs)
        self.additional_args = kwargs
        self.path = path

        config_template = config.get("chat_template", None)
        tokenizer_template = getattr(self._tokenizer, "chat_template", None)
        kwargs_template = kwargs.get("chat_template", None)

        if config_template is not None:
            self.chat_template = config_template
        elif tokenizer_template is not None:
            self.chat_template = tokenizer_template
        else:
            self.chat_template = kwargs_template

    def _restore_model(self, **kwargs) -> MegatronTokenizerTextAbstract:
        """Returns tokenizer library object."""

        import megatron.core.tokenizers.text.libraries as tokenizers

        library_class = getattr(tokenizers, TOKENIZER_MAPPING_LIBRARIES[self.library])

        if self.library in ['byte-level', 'null-text']:
            return library_class(**kwargs)
        else:
            return library_class(self.path, **kwargs)

    def tokenize(self, text: str) -> List[int]:
        """
        Text tokenization.

        Args:
            text (str): text to be tokenized.

        Returns:
            list: list of ids.
        """

        return self._tokenizer.text_to_ids(text)

    def detokenize(self, ids: List[int]) -> str:
        """
        Text detokenization.

        Args:
            ids (list): text to be tokenized.

        Returns:
            text: detokenized text.
        """

        return self._tokenizer.ids_to_text(ids)

    def apply_chat_template(
        self, conversation: List[Dict[str, str]], chat_template: Optional[str] = None, **kwargs
    ) -> Union[str, list]:
        """
        Applies chat template to the conversation.

        Args:
            conversation (list):
            chat_template (Optional[str]): chat template to be use. If not specified,
                tokenizer's chat template will be used.

        Returns:
            Union[str, list]: a chat with applied chat template or a list of token ids.
        """

        # Use tokenizer's chat template if chat template wasn't specified.
        if not chat_template:
            assert self.chat_template is not None, "`chat_template` was not specified."

            chat_template = self.chat_template

        return self._tokenizer.apply_chat_template(
            conversation=conversation, chat_template=chat_template, **kwargs
        )

    def tokenize_conversation(
        self, conversation: List[Dict], return_target: bool, add_generation_prompt: bool
    ):
        """Convert a conversation to tokens. Needed for SFTTokenizer.

        Args:
            conversation (List[Dict]): Sequence of system/user/assistant messages.
                Must be in the following format:
                [
                    {"role": "system", "content": "something"},
                    {"role": "user", "content": "something1"},
                    {"role": "assistant", "content": "something2"},
                ]
            return_target (bool): Return target tokens with system and assistant masked.
            add_generation_prompt (bool): Add assistant prefix to the end.
        """

        if self.library == 'sft':
            return self._tokenizer.tokenize_conversation(
                conversation=conversation,
                return_target=return_target,
                add_generation_prompt=add_generation_prompt,
            )
        else:
            raise NotImplementedError("This method is supported only for SFTTokenizer.")

    def save_pretrained(self, path: str) -> None:
        """
        Saves HF tokenizer files.

        Args:
            path (str): path where to save tokenizer files.
        """

        if self.library in ['huggingface', 'megatron']:
            self._tokenizer.save_pretrained(path)
        else:
            raise ValueError(
                f"save_pretrained method is not supported with {self.library} library."
            )

    def add_special_tokens(self, special_tokens: Union[list, dict]) -> None:
        """
        Adds a dictionary of special tokens (eos, pad, cls...).
            Tokens are only added if they are not already in the vocabulary.
            Indexed starting from the last index of the current vocabulary.

        Args:
            special_tokens_dict: dict of string. Keys should be in the list of predefined
                special attributes: [``bos_token``, ``eos_token``, ``unk_token``, ``sep_token``,
                ``pad_token``, ``cls_token``, ``mask_token``, ``additional_special_tokens``].
        """

        self._tokenizer.add_special_tokens(special_tokens)

    def offsets(self, ids: list[int], text: str) -> list[int]:
        """Calculate offsets."""
        return self._tokenizer.offsets(ids=ids, text=text)

    @property
    def space_sensitive(self):
        """Check if tokenizer is space sensetive."""
        if self.library in ['sentencepiece', 'huggingface']:
            return self._tokenizer.space_sensitive
        else:
            raise NotImplementedError(
                f"This method is not supported for {self.library} tokenizers."
            )

    @property
    def additional_special_tokens_ids(self) -> list:
        """Returns a list of the additional special tokens."""
        return self._tokenizer.additional_special_tokens_ids

    @property
    def vocab_size(self) -> int:
        """Returns vocabulary size."""
        return self._tokenizer.vocab_size

    @property
    def vocab(self):
        """Returns tokenizer vocabulary."""
        return self._tokenizer.vocab

    @property
    def unique_identifiers(self) -> OrderedDict:
        """Returns a dictionary of unique identifiers."""
        unique_identifiers = OrderedDict()
        unique_identifiers["class"] = f"{type(self).__module__}.{type(self).__qualname__}"
        unique_identifiers["tokenizer_path"] = self.path
        for arg in self.additional_args:
            unique_identifiers[arg] = str(self.additional_args[arg])

        return unique_identifiers

    @property
    def pad(self) -> int:
        """Returns id of padding token."""
        return self._tokenizer.pad_id

    @property
    def pad_id(self) -> int:
        """Returns id of padding token. Need for NeMo."""
        return self._tokenizer.pad_id

    @property
    def eod(self) -> int:
        """Returns id of end of document token."""
        return self._tokenizer.eod

    @property
    def bos(self) -> int:
        """Returns id of beginning of sentence token."""
        return self._tokenizer.bos_id

    @property
    def bos_id(self) -> int:
        """Returns id of beginning of sentence token. Need for NeMo."""
        return self._tokenizer.bos_id

    @property
    def eos_id(self) -> int:
        """Returns id of end of sentence token."""
        return self._tokenizer.eos_id

    @property
    def eos(self) -> int:
        """Returns id of end of sentence token. Need for legacy."""
        return self._tokenizer.eos_id

    @property
    def unk(self) -> int:
        """Returns id of of unknown token."""
        return self._tokenizer.unk_id

    @property
    def unk_id(self) -> int:
        """Returns id of of unknown token. Need for NeMo."""
        return self._tokenizer.unk_id

    @property
    def mask(self) -> int:
        """Returns id of of mask token."""
        return self._tokenizer.mask_id

    @property
    def mask_id(self) -> int:
        """Returns id of of mask token. Need for NeMo."""
        return self._tokenizer.mask_id

    @property
    def cls(self) -> int:
        """Returns id of classification token."""
        return self._tokenizer.cls_id

    @property
    def cls_id(self) -> int:
        """Returns id of classification token. Need for NeMo."""
        return self._tokenizer.cls_id

    @property
    def sep(self) -> int:
        """Returns id of SEP token."""
        return self._tokenizer.sep_id

    @property
    def sep_id(self) -> int:
        """Returns id of SEP token. Need for NeMo."""
        return self._tokenizer.sep_id

    @property
    def vocab_file(self) -> str:
        """Returns vocabulary file path if specified."""
        return self.additional_args.get('vocab_file', None)

    @property
    def merges_file(self) -> str:
        """Returns merges file path if specified."""
        return self.additional_args.get('merges_file', None)

    @property
    def inv_vocab(self) -> dict:
        """Returns tokenizer vocab with reversed keys and values."""
        return self._tokenizer.inv_vocab


================================================
FILE: megatron/core/tokenizers/utils/build_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import logging
import math

from megatron.core.tokenizers import MegatronTokenizer

MEGATRON_TOKENIZERS = ['BertWordPieceLowerCase', 'BertWordPieceCase', 'GPT2BPETokenizer']

SP_TOKENIZERS = ['SentencePieceTokenizer', 'GPTSentencePieceTokenizer', 'Llama2Tokenizer']

logger = logging.getLogger(__name__)


def build_tokenizer(args, **kwargs):
    """Initialize tokenizer."""
    kwargs = {}
    tokenizer_library = None
    tokenizer_path = None
    if args.tokenizer_type in MEGATRON_TOKENIZERS:
        tokenizer_library = 'megatron'
        tokenizer_path = args.tokenizer_type
        kwargs['additional_special_tokens'] = (
            args.tokenizer_special_tokens if args.tokenizer_special_tokens else []
        )
        if tokenizer_path == 'BertWordPieceCase':
            special_tokens = {}
            special_tokens['additional_special_tokens'] = [f'<extra_id_{i}>' for i in range(100)]
            kwargs = special_tokens
        kwargs['vocab_file'] = args.vocab_file
        kwargs['merges_file'] = args.merge_file
        kwargs['use_fast'] = not args.tokenizer_hf_no_use_fast
        kwargs['trust_remote_code'] = args.trust_remote_code
        kwargs['include_special_tokens'] = not args.tokenizer_hf_no_include_special_tokens
    elif args.tokenizer_type in SP_TOKENIZERS:
        tokenizer_library = 'sentencepiece'
        tokenizer_path = args.tokenizer_model
        kwargs['legacy'] = args.tokenizer_sentencepiece_legacy
        kwargs['special_tokens'] = args.tokenizer_special_tokens
    elif args.tokenizer_type == 'TikTokenizer':
        tokenizer_library = 'tiktoken'
        tokenizer_path = args.tokenizer_model
        if args.tiktoken_pattern:
            kwargs['pattern'] = args.tiktoken_pattern
        if args.vocab_size:
            kwargs['vocab_size'] = args.vocab_size
        kwargs['num_special_tokens'] = args.tiktoken_num_special_tokens
        kwargs['special_tokens'] = args.tokenizer_special_tokens
    elif args.tokenizer_type == 'HuggingFaceTokenizer':
        tokenizer_library = 'huggingface'
        tokenizer_path = args.tokenizer_model
        kwargs['vocab_file'] = args.vocab_file
        kwargs['merges_file'] = args.merge_file
        kwargs['additional_special_tokens'] = (
            args.tokenizer_special_tokens if args.tokenizer_special_tokens else []
        )
        kwargs['use_fast'] = not args.tokenizer_hf_no_use_fast
        kwargs['trust_remote_code'] = args.trust_remote_code
        kwargs['include_special_tokens'] = not args.tokenizer_hf_no_include_special_tokens
    elif args.tokenizer_type == 'MultimodalTokenizer':
        tokenizer_library = 'multimodal'
        kwargs['prompt_format'] = args.tokenizer_prompt_format
        kwargs['special_tokens'] = args.special_tokens
        kwargs['image_tag_type'] = args.image_tag_type
        kwargs['force_system_message'] = args.force_system_message
    elif args.tokenizer_type == 'SFTTokenizer':
        tokenizer_library = 'sft'
        tokenizer_path = args.tokenizer_model
        kwargs['prompt_format'] = args.sft_tokenizer_prompt_format
    elif args.tokenizer_type in ['NullTokenizer', 'NullMultimodalTokenizer']:
        tokenizer_library = (
            'null-text' if args.tokenizer_type == 'NullTokenizer' else 'null-multimodal'
        )
        metadata = {'library': tokenizer_library}
        if args.vocab_size:
            kwargs['vocab_size'] = args.vocab_size
        tokenizer = MegatronTokenizer.from_pretrained(metadata_path=metadata, **kwargs)

        # Add vocab size (if not already set from a checkpoint).
        _set_padded_vocab_size(args, tokenizer)

        return tokenizer

    if args.tokenizer_metadata:
        metadata = args.tokenizer_metadata
    else:
        metadata = {'library': tokenizer_library}
    tokenizer = MegatronTokenizer.from_pretrained(
        tokenizer_path=tokenizer_path, metadata_path=metadata, **kwargs
    )

    # Add vocab size (if not already set from a checkpoint).
    _set_padded_vocab_size(args, tokenizer)

    return tokenizer


def vocab_size_with_padding(orig_vocab_size, args, logging_enabled=True):
    """Pad vocab size so it is divisible by model parallel size and
    still having GPU friendly size."""

    after = orig_vocab_size
    multiple = args.make_vocab_size_divisible_by * args.tensor_model_parallel_size
    after = int(math.ceil(after / multiple) * multiple)
    if args.rank == 0 and logging_enabled:
        logger.info(
            f' > padded vocab (size: {orig_vocab_size}) with '
            f'{after - orig_vocab_size} dummy tokens '
            f'(new size: {after})'
        )
    return after


def _set_padded_vocab_size(args, tokenizer):
    """Sets padded vocab size if None."""
    if getattr(args, "padded_vocab_size", None) is None:
        args.padded_vocab_size = vocab_size_with_padding(tokenizer.vocab_size, args)


================================================
FILE: megatron/core/tokenizers/vision/__init__.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.

from megatron.core.tokenizers.vision.vision_tokenizer import MegatronTokenizerVision


================================================
FILE: megatron/core/tokenizers/vision/libraries/__init__.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.

from megatron.core.tokenizers.vision.libraries.multimodal_tokenizer import (
    MegatronMultimodalTokenizer,
)
from megatron.core.tokenizers.vision.libraries.null_multimodal_tokenizer import (
    MegatronNullMultimodalTokenizer,
)


================================================
FILE: megatron/core/tokenizers/vision/libraries/multimodal_tokenizer.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.

from typing import Dict, List, Union

import numpy as np

from megatron.core.models.multimodal.llava_model import IGNORE_INDEX, IMAGE_TOKEN
from megatron.core.tokenizers.text.libraries.sft_tokenizer import PromptConfig

try:
    import transformers

    HAVE_TRANSFORMERS = True
except (ImportError, ModuleNotFoundError):
    HAVE_TRANSFORMERS = False


IMAGE_TAGS = {
    "nvlm": ("<Image>", "</Image>"),
    "internvl": ("<img>", "</img>"),
    "": None,  # Image tag not used.
}


# The default mistral template raises exceptions so we use a custom one.
mistral_custom_template = """
{{- bos_token }}
{%- for message in messages %}
    {%- if message['role'] == 'user' %}
        {{- '[INST] ' + message['content'] + '[/INST]' }}
    {%- elif message['role'] == 'assistant' %}
        {{- ' ' + message['content'] + eos_token}}
    {%- endif %}
{%- endfor %}
{% if add_generation_prompt %}{{ ' ' }}{% endif %}
"""


nvlm_yi_34b_template = "{{- bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"  # pylint: disable=line-too-long


qwen2p0_custom_template = "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"  # pylint: disable=line-too-long


# Note: this is the same template as
# https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct/blob/main/tokenizer_config.json#L2053
# but we removed the forced system message.
llama3p1_chat_template = """{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = none %}\n{%- endif %}\n\n{%- if system_message is not none %}{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n    {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{%-endif %}{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n            {%- for arg_name, arg_val in tool_call.arguments | items %}\n                {{- arg_name + '=\"' + arg_val + '\"' }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- endif %}\n                {%- endfor %}\n            {{- \")\" }}\n        {%- else  %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n            {{- '\"parameters\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- \"}\" }}\n        {%- endif %}\n        {%- if builtin_tools is defined %}\n            {#- This means we're in ipython mode #}\n            {{- \"<|eom_id|>\" }}\n        {%- else %}\n            {{- \"<|eot_id|>\" }}\n        {%- endif %}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n"""  # pylint: disable=line-too-long

nemotron_custom_template = "{{- bos_token }}{% for message in messages %}{{'<SPECIAL_14>' + message['role'] + '\n' + message['content'] + '<SPECIAL_15>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<SPECIAL_14>assistant\n' }}{% endif %}"  # pylint: disable=line-too-long

nemotron_aligned_custom_template = "{{- bos_token}}{% for message in messages %}{{message['role'] + '\n' + message['content'] + '\n' + '[PREFIX]'}}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant\n' }}{% endif %}"  # pylint: disable=line-too-long


class MegatronMultimodalTokenizer:
    """Multimodal Tokenizer."""

    def __init__(
        self,
        path: str,
        prompt_format: str,
        special_tokens: List[str],
        image_tag_type: str,
        force_system_message: bool = False,
        **kwargs,
    ):
        """Tokenizer with a support for non-text inputs.

        Note: Currently, only HuggingFaceTokenizer is supported as the underlying text tokenizer.

        Args:
            path (str): Path to the underlying tokenizer.
            prompt_format (str): Prompt format for the tokenizer.
            special_tokens (List[str]): Non-text tokens.
            image_tag_type (str): Image tag to apply, if any. For example <img><image></img>.
        """
        if not HAVE_TRANSFORMERS:
            raise ImportError(
                "MegatronMultimodalTokenizer currently requires "
                "transformers library to be installed."
            )
        if prompt_format == "nvlm-yi-34b":
            kwargs.update({"from_slow": True, "legacy": False, "add_bos_token": True})
        tokenizer = transformers.AutoTokenizer.from_pretrained(
            pretrained_model_name_or_path=path, **kwargs
        )

        self._vocab_size = len(tokenizer)

        num_added_tokens = tokenizer.add_tokens(special_tokens, special_tokens=True)
        assert num_added_tokens == len(
            special_tokens
        ), f"failed to add {len(special_tokens)} special tokens; only added {num_added_tokens}"

        self.tokenizer = tokenizer

        if prompt_format == "mistral":
            # Mistral format doesn't have prefix for the assistant message.
            self._prompt_config = PromptConfig(
                assistant_prefix_len=0,
                pad_token_id=tokenizer.unk_token_id,
                custom_chat_template=mistral_custom_template,
                has_bos=True,
                has_system_role=False,
            )
        elif prompt_format == "llama3":
            # "<|start_header_id|>assistant<|end_header|>\n\n" is the prefix for assistant messages.
            self._prompt_config = PromptConfig(
                assistant_prefix_len=4,
                pad_token_id=tokenizer.convert_tokens_to_ids("<|end_of_text|>"),
                custom_chat_template=None,
                has_bos=True,
                has_system_role=True,
            )
        elif prompt_format in ("llama3p1", "llama3p2"):
            # "<|start_header_id|>assistant<|end_header|>\n\n" is the prefix for assistant messages.
            # That occupies 4 tokens and can be masked in the target.
            self._prompt_config = PromptConfig(
                assistant_prefix_len=4,
                pad_token_id=tokenizer.convert_tokens_to_ids("<|finetune_right_pad_id|>"),
                custom_chat_template=llama3p1_chat_template,
                has_bos=True,
                has_system_role=True,
            )
        elif prompt_format == "nvlm-yi-34b":
            self._prompt_config = PromptConfig(
                assistant_prefix_len=4,
                pad_token_id=tokenizer.pad_token_id,
                custom_chat_template=nvlm_yi_34b_template,
                has_bos=True,
                has_system_role=True,
            )
        elif prompt_format == "chatml":
            # "<|im_start|>assistant\n" is the prefix for assistant messages
            self._prompt_config = PromptConfig(
                assistant_prefix_len=3,
                pad_token_id=tokenizer.pad_token_id,
                custom_chat_template=None,
                has_bos=False,
                has_system_role=True,
            )
        elif prompt_format == "nemotron5":
            # "<|im_start|>assistant\n" is the prefix.
            self._prompt_config = PromptConfig(
                assistant_prefix_len=3,
                pad_token_id=tokenizer.convert_tokens_to_ids("<SPECIAL_233>"),
                custom_chat_template=nemotron_custom_template,
                has_bos=True,
                has_system_role=True,
            )
        elif prompt_format == "nemotron5-aligned":
            # "Assistant\n" is the prefix.
            self._prompt_config = PromptConfig(
                assistant_prefix_len=2,
                pad_token_id=tokenizer.convert_tokens_to_ids("<SPECIAL_233>"),
                custom_chat_template=nemotron_aligned_custom_template,
                has_bos=True,
                has_system_role=True,
            )
        elif prompt_format in ("qwen2p0", "qwen2p5"):
            # "<|im_start|>assistant\n" is the prefix for assistant messages
            self._prompt_config = PromptConfig(
                assistant_prefix_len=3,
                pad_token_id=tokenizer.pad_token_id,
                custom_chat_template=qwen2p0_custom_template,
                has_bos=False,
                has_system_role=True,
                force_system_message=force_system_message,
                system_default={
                    "role": "system",
                    "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",  # pylint: disable=line-too-long
                },
            )
        elif prompt_format == "llama3p1":
            # "<|start_header_id|>assistant<|end_header|>\n\n" is the prefix for assistant messages.
            # That occupies 4 tokens and can be masked in the target.
            self._prompt_config = PromptConfig(
                assistant_prefix_len=4,
                pad_token_id=tokenizer.convert_tokens_to_ids("<|finetune_right_pad_id|>"),
                custom_chat_template=llama3p1_chat_template,
                has_bos=True,
                has_system_role=True,
            )
        else:
            raise NotImplementedError("unknown multimodal tokenizer type", prompt_format)

        self._prompt_format = prompt_format
        self._image_tag = IMAGE_TAGS[image_tag_type]

    def _apply_image_tag(self, text: Union[str, List[Dict]]):
        """Surround <image> with image tags such as <img> and </img>."""
        if self._image_tag is None:
            return text

        replacement = f"{self._image_tag[0]}{IMAGE_TOKEN}{self._image_tag[1]}"

        if isinstance(text, list):
            for turn in text:
                turn["content"] = turn["content"].replace(IMAGE_TOKEN, replacement)
        else:
            text = text.replace(IMAGE_TOKEN, replacement)

        return text

    def tokenize(self, text: Union[str, List[Dict]]):
        """Tokenize conversation or string input."""
        if isinstance(text, list):
            # This code path is used by the inference code currently.
            return self.tokenize_conversation(text, False, True).tolist()

        return self._encode(text)

    def _encode(self, text: str):
        """Tokenize text input."""
        text = self._apply_image_tag(text)
        return self.tokenizer.encode(text)

    def tokenize_conversation(
        self, conversation: List[Dict], return_target: bool, add_generation_prompt: bool
    ):
        """Convert a conversation to tokens.

        Args:
            conversation (List[Dict]): Sequence of system/user/assistant messages.
                Must be in the following format:
                [
                    {"role": "user", "content": "something"},
                    {"role": "assistant", "content": "something2"},
                ]
            return_target (bool): Return target tokens with system and assistant masked.
            add_generation_prompt (bool): Add assistant prefix to the end.
        """
        # Skip system message if the tokenizer doesn't have a system role.
        if not self._prompt_config.has_system_role and conversation[0]["role"] == "system":
            conversation = conversation[1:]

        if self._prompt_config.force_system_message:
            assert (
                self._prompt_config.system_default is not None
            ), "Trying to force system message with empty system default"
            if conversation[0]["role"] == "system":
                conversation[0] = self._prompt_config.system_default
            else:
                conversation = [self._prompt_config.system_default] + conversation

        if self._prompt_format == "nemotron5-aligned":
            for turn in conversation:
                tmp = turn['role']
                turn['role'] = tmp[:1].upper() + tmp[1:]

        # Apply possible image tag.
        conversation = self._apply_image_tag(conversation)

        tokens = self.tokenizer.apply_chat_template(
            conversation,
            tokenize=True,
            add_generation_prompt=add_generation_prompt,
            return_assistant_token_mask=False,
            return_tensors="np",
            chat_template=self._prompt_config.custom_chat_template,
        )[0]

        if not return_target:
            return tokens

        target = tokens.copy()

        # Mask system and user tokens in the target.
        idx = 0
        for turn_idx, turn in enumerate(conversation):
            if len(turn["content"]) == 0:
                raise ValueError(f"empty turn in conversation: {conversation}. Skipping.")

            turn_tokens = self.tokenizer.apply_chat_template(
                [turn], tokenize=True, chat_template=self._prompt_config.custom_chat_template
            )

            # There should be only one BOS at the very beginning.
            # After the first turn, skip BOS token.
            if self._prompt_config.has_bos and turn_idx > 0:
                turn_tokens = turn_tokens[1:]

            turn_len = len(turn_tokens)

            role = turn["role"].lower()
            if role in ("system", "user"):
                target[idx : idx + turn_len] = IGNORE_INDEX
            elif role == "assistant":
                if IMAGE_TOKEN in turn["content"]:
                    raise RuntimeError(f"{IMAGE_TOKEN} not allowed in assistant content!")

                if self._prompt_config.assistant_prefix_len > 0:
                    target[idx : idx + self._prompt_config.assistant_prefix_len] = IGNORE_INDEX

            assert np.allclose(
                tokens[idx : idx + turn_len], turn_tokens
            ), f"expected turn tokens to match tokens in conversation {conversation}"

            idx += turn_len

        assert idx == len(tokens), f"mismatch in target masking the conversation {conversation}"

        return tokens, target

    def convert_tokens_to_ids(self, tokens: List[str]):
        """Convert tokens to IDs."""
        return self.tokenizer.convert_tokens_to_ids(tokens)

    def detokenize(self, tokens: List[int]):
        """Detokenize tokens."""
        return self.tokenizer.decode(tokens)

    def add_special_tokens(self, special_tokens: List[str]):
        """Add special tokens."""
        self.tokenizer.add_tokens(special_tokens, special_tokens=True)

    def get_special_tokens(self):
        """Get special tokens."""
        return self.tokenizer.get_added_vocab()

    @property
    def pad(self):
        """Pad token ID."""
        return self._prompt_config.pad_token_id

    @property
    def eod(self):
        """End of sentence token ID."""
        return self.tokenizer.eos_token_id

    @property
    def vocab_size(self):
        """Vocabulary size."""
        return self._vocab_size

    @property
    def vocab(self):
        """Tokenizer vocab."""
        return self.tokenizer.get_vocab()


================================================
FILE: megatron/core/tokenizers/vision/libraries/null_multimodal_tokenizer.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.


class MegatronNullMultimodalTokenizer:
    """Megatron Null Multimodal Tokenizer"""

    def __init__(self, vocab_size, image_token=None, image_token_id=None):
        """ """
        self._vocab_size_without_eod = int(vocab_size)
        self._eod_id = self._vocab_size_without_eod

        from megatron.core.models.multimodal.llava_model import (
            DEFAULT_IMAGE_TOKEN_INDEX,
            IMAGE_TOKEN,
        )

        self._image_token = image_token if image_token is not None else IMAGE_TOKEN
        self._image_token_id = (
            image_token_id if image_token_id is not None else DEFAULT_IMAGE_TOKEN_INDEX
        )

    def tokenize(self, text):
        """
        Text tokenization.

        Args:
            text (str | list): text to be tokenized.

        Returns:
            list: list of ids.
        """
        return [int(x) for x in text.split(' ')]

    def detokenize(self, ids):
        """
        Text detokenization.

        Args:
            ids (list): text to be tokenized.

        Returns:
            text: detokenized text.
        """
        text = [str(x) for x in ids]
        return ' '.join(text)

    def offsets(self, ids: list[int], text: str) -> list[int]:
        """Offsets calculation."""
        offsets, start_idx = [], 0
        for id_ in ids:
            offsets.append(start_idx)
            start_idx += 1 + len(str(id_))
        return offsets

    def convert_tokens_to_ids(self, tokens):
        """Convert tokens to IDs."""
        ids = [
            (int(t) if t != self._image_token else self._image_token_id) for t in tokens.split('  ')
        ]
        return ids if len(ids) > 1 else ids[0]

    @property
    def vocab_size(self):
        """Vocab size."""
        return self._vocab_size_without_eod + 1

    @property
    def cls(self):
        """CLS token id."""
        return -1

    @property
    def sep(self):
        """SEP token id."""
        return -1

    @property
    def mask(self):
        """MASK token id."""
        return -1

    @property
    def eod(self):
        """EOD token id."""
        return self._eod_id

    @property
    def additional_special_tokens_ids(self):
        """Returns IDs of additional special tokens."""
        return None


================================================
FILE: megatron/core/tokenizers/vision/models/__init__.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.

from megatron.core.tokenizers.vision.models.default_tokenizer import DefaultTokenizerVision


================================================
FILE: megatron/core/tokenizers/vision/models/default_tokenizer.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from megatron.core.tokenizers.vision.vision_tokenizer import MegatronTokenizerVision


class DefaultTokenizerVision(MegatronTokenizerVision):
    """Base class for Megatron default vision tokenizer."""

    def __init__(self, path: str = None, config: dict = None, **kwargs) -> None:
        config['class_name'] = self.__class__.__name__
        config['class_path'] = self.__class__.__module__
        super().__init__(path, config, **kwargs)


================================================
FILE: megatron/core/tokenizers/vision/vision_tokenizer.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.

from collections import OrderedDict
from typing import Dict, List, Union

from megatron.core.tokenizers.base_tokenizer import MegatronTokenizerBase

TOKENIZER_MAPPING_LIBRARIES = OrderedDict(
    [
        ("multimodal", "MegatronMultimodalTokenizer"),
        ("null-multimodal", "MegatronNullMultimodalTokenizer"),
    ]
)


class MegatronTokenizerVision(MegatronTokenizerBase):
    """Base class for Megatron vision tokenizers."""

    def __init__(self, path: str, config: dict, **kwargs) -> None:
        """
        Args:
            path (str): path to the tokenizer model.
            config (dict): tokenizer parameters.
                library (str): tokenizer library.
                class_name (str): name of tokenizer class.
                class_path (str): path to tokenizer class.
                model_type (str): type of the model to be used with tokenizer.
        """

        super().__init__(path, config, **kwargs)
        self._tokenizer = self._restore_model(**kwargs)
        self.path = path

    def _restore_model(self, **kwargs):
        """Returns tokenizer library object."""

        import megatron.core.tokenizers.vision.libraries as tokenizers

        library_class = getattr(tokenizers, TOKENIZER_MAPPING_LIBRARIES[self.library])

        if self.library in ['null-multimodal']:
            return library_class(**kwargs)
        else:
            return library_class(self.path, **kwargs)

    def tokenize(self, text: Union[str, List[Dict]]) -> List[int]:
        """
        Text tokenization.

        Args:
            text (str | list): text to be tokenized.

        Returns:
            list: list of ids.
        """

        return self._tokenizer.tokenize(text)

    def detokenize(self, ids: List[int]) -> str:
        """
        Text detokenization.

        Args:
            ids (list): text to be tokenized.

        Returns:
            text: detokenized text.
        """

        return self._tokenizer.detokenize(ids)

    def tokenize_conversation(
        self, conversation: List[Dict], return_target: bool, add_generation_prompt: bool
    ):
        """Convert a conversation to tokens.

        Args:
            conversation (List[Dict]): Sequence of system/user/assistant messages.
                Must be in the following format:
                [
                    {"role": "user", "content": "something"},
                    {"role": "assistant", "content": "something2"},
                ]
            return_target (bool): Return target tokens with system and assistant masked.
            add_generation_prompt (bool): Add assistant prefix to the end.
        """

        return self._tokenizer.tokenize_conversation(
            conversation=conversation,
            return_target=return_target,
            add_generation_prompt=add_generation_prompt,
        )

    def add_special_tokens(self, special_tokens: Union[list, dict]) -> None:
        """
        Adds a dictionary of special tokens (eos, pad, cls...).
            Tokens are only added if they are not already in the vocabulary.
            Indexed starting from the last index of the current vocabulary.

        Args:
            special_tokens_dict: dict of string. Keys should be in the list of predefined
                special attributes: [``bos_token``, ``eos_token``, ``unk_token``, ``sep_token``,
                ``pad_token``, ``cls_token``, ``mask_token``, ``additional_special_tokens``].
        """

        self._tokenizer.add_special_tokens(special_tokens)

    def convert_tokens_to_ids(self, tokens: List[str]):
        """Convert tokens to IDs."""
        return self._tokenizer.convert_tokens_to_ids(tokens)

    def apply_chat_template(self):
        """Applies tokenizer's chat template."""
        raise NotImplementedError("This method is not supported for vision tokenizers.")

    def get_special_tokens(self) -> list:
        """Returns a list of the additional special tokens."""
        return self._tokenizer.get_special_tokens()

    def offsets(self, ids: list[int], text: str) -> list[int]:
        """Calculate offsets."""
        return self._tokenizer.offsets(ids=ids, text=text)

    @property
    def vocab(self):
        """Tokenizer vocab."""
        return self._tokenizer.vocab

    @property
    def vocab_size(self) -> int:
        """Returns vocabulary size."""
        return self._tokenizer.vocab_size

    @property
    def pad(self):
        """Pad token ID."""
        return self._tokenizer.pad

    @property
    def eod(self):
        """End of sentence token ID."""
        return self._tokenizer.eod


================================================
FILE: megatron/core/transformer/__init__.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

from .module import MegatronModule
from .spec_utils import ModuleSpec, build_module
from .transformer_config import MLATransformerConfig, TransformerConfig
from .transformer_layer import TransformerLayer, TransformerLayerSubmodules


================================================
FILE: megatron/core/transformer/attention.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
from __future__ import annotations

import copy
import inspect
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Callable, Optional, Protocol, Tuple, Union

import torch
from torch import Tensor

from megatron.core import tensor_parallel
from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.inference.contexts import BaseInferenceContext
from megatron.core.jit import jit_fuser
from megatron.core.models.common.embeddings.rope_utils import (
    apply_rotary_pos_emb,
    apply_rotary_pos_emb_with_cos_sin,
)
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.parallel_state import (
    get_data_parallel_group,
    get_data_parallel_rank,
    get_data_parallel_world_size,
    get_tensor_model_parallel_group,
    get_tensor_model_parallel_rank,
    get_tensor_model_parallel_world_size,
)
from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
    FineGrainedActivationOffloadingInterface as off_interface,
)
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.tensor_parallel.mappings import all_gather_last_dim_from_tensor_parallel_region
from megatron.core.transformer.identity_op import IdentityOp
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.spec_utils import ModuleSpec, build_module
from megatron.core.transformer.torch_norm import LayerNormBuilder
from megatron.core.typed_torch import apply_module, not_none
from megatron.core.utils import (
    deprecate_inference_params,
    divide,
    get_pg_rank,
    get_pg_size,
    is_fa_min_version,
    is_te_min_version,
    is_using_quantization_scales,
    nvtx_range_pop,
    nvtx_range_push,
)

from ..models.common.embeddings.yarn_rotary_pos_embedding import (
    _yarn_get_concentration_factor_from_config,
)
from .enums import AttnMaskType, CudaGraphScope
from .transformer_config import TransformerConfig

try:
    from einops import rearrange
except ImportError:
    rearrange = None

try:
    from flash_attn_3.flash_attn_interface import _flash_attn_forward
    from flash_attn_3.flash_attn_interface import (
        flash_attn_with_kvcache as flash_attn3_with_kvcache,
    )

    HAVE_FA3 = True
except ImportError as e:
    HAVE_FA3 = False

if not HAVE_FA3:
    try:
        from flashattn_hopper.flash_attn_interface import _flash_attn_forward
        from flashattn_hopper.flash_attn_interface import (
            flash_attn_with_kvcache as flash_attn3_with_kvcache,
        )

        HAVE_FA3 = True
    except ImportError as e:
        pass

try:
    from flash_mla import flash_mla_with_kvcache, get_mla_metadata

    HAVE_FMLA = True
except ImportError:
    flash_mla_with_kvcache = None
    get_mla_metadata = None
    HAVE_FMLA = False

from megatron.core.transformer.transformer_config import MLATransformerConfig

try:
    from flash_attn import flash_attn_varlen_func, flash_attn_with_kvcache
except:
    flash_attn_varlen_func = None
    flash_attn_with_kvcache = None

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import (
        SplitAlongDim,
        TELinear,
        set_save_original_input,
    )
else:
    SplitAlongDim, TELinear, set_save_original_input = None, None, None

try:
    from transformer_engine.pytorch.attention.rope import apply_fused_qkv_rotary_pos_emb

    HAVE_FUSED_QKV_ROPE = True
except ImportError:
    HAVE_FUSED_QKV_ROPE = False


class LinearQkv(Protocol):
    """Protocol for linear_qkv modules."""

    def forward(self, input: Tensor, /) -> tuple[Tensor, object]:
        """Applies linear_qkv."""
        ...

    def backward_dw(self) -> None:
        """Backward pass for the linear_qkv module."""
        ...


class LinearQkvBuilder(Protocol):
    """Protocol for building linear_qkv layers."""

    def __call__(
        self,
        input_size: int,
        output_size: int,
        /,
        *,
        config: TransformerConfig,
        init_method: Callable[[torch.Tensor], None],
        gather_output: bool,
        bias: bool,
        skip_bias_add: bool,
        is_expert: bool,
        tp_comm_buffer_name: str,
        tp_group: torch.distributed.ProcessGroup | None = None,
    ) -> LinearQkv: ...


class LinearLayer(Protocol):
    """Protocol for linear_q and linear_kv modules."""

    def forward(self, input: Tensor, /) -> Tuple[Tensor, object]:
        """Applies linear_q/linear_kv."""
        ...


class LinearLayerBuilder(Protocol):
    """Protocol for building linear_q and linear_kv layers."""

    def __call__(
        self,
        input_size: int,
        output_size: int,
        /,
        *,
        config: TransformerConfig,
        init_method: Callable[[torch.Tensor], None],
        gather_output: bool,
        bias: bool,
        skip_bias_add: bool,
        is_expert: bool,
    ) -> LinearLayer: ...


class CoreAttention(Protocol):
    """Protocol for core_attention modules."""

    def forward(
        self,
        query: Tensor,
        key: Tensor,
        value: Tensor,
        attention_mask: Optional[Tensor],
        /,
        *,
        attn_mask_type: AttnMaskType,
        attention_bias: Optional[Tensor],
        packed_seq_params: Optional[PackedSeqParams],
    ) -> Tensor:
        """Applies dot product attention."""
        ...


class CoreAttentionBuilder(Protocol):
    """Protocol for building core_attention layers."""

    def __call__(
        self,
        *,
        config: TransformerConfig,
        layer_number: int,
        attn_mask_type: AttnMaskType,
        attention_type: str,
        cp_comm_type: Optional[str],
        softmax_scale: Optional[float],
        pg_collection: Optional[ProcessGroupCollection],
    ) -> CoreAttention: ...


@dataclass
class SelfAttentionSubmodules:
    """
    Configuration class for specifying the submodules of a self-attention.
    """

    linear_qkv: LinearQkvBuilder
    core_attention: CoreAttentionBuilder
    linear_proj: Union[ModuleSpec, type] = None
    q_layernorm: LayerNormBuilder | None = None
    k_layernorm: LayerNormBuilder | None = None


@dataclass
class CrossAttentionSubmodules:
    """
    Configuration class for specifying the submodules of a cross-attention.
    """

    linear_q: LinearLayerBuilder
    linear_kv: LinearLayerBuilder
    core_attention: CoreAttentionBuilder
    linear_proj: Union[ModuleSpec, type] = None


class Attention(MegatronModule, ABC):
    """Attention layer abstract class.

    This layer only contains common modules required for the "self attn" and
    "cross attn" specializations.
    """

    def __init__(
        self,
        config: TransformerConfig,
        submodules: Union[SelfAttentionSubmodules, CrossAttentionSubmodules],
        layer_number: int,
        attn_mask_type: AttnMaskType,
        attention_type: str,
        cp_comm_type: str | None = None,
        pg_collection: ProcessGroupCollection | None = None,
        pp_layer_offset: Optional[int] = None,
    ):
        super().__init__(config=config)

        self.config = config
        self.layer_number = layer_number
        self._pp_layer_offset = pp_layer_offset

        self.attn_mask_type = attn_mask_type
        self.attention_type = attention_type
        self.batch_invariant_mode = config.batch_invariant_mode

        assert self.config.kv_channels is not None
        assert self.config.num_query_groups is not None

        # For normal attention without groups, num_query_groups == num_attention_heads,
        # so these two will be the same
        self.query_projection_size = self.config.kv_channels * self.config.num_attention_heads
        self.kv_projection_size = self.config.kv_channels * self.config.num_query_groups

        if pg_collection is None:
            pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp'])
        else:
            assert hasattr(
                pg_collection, 'tp'
            ), "Attention pg_collection must have tp process group"
            assert hasattr(
                pg_collection, 'cp'
            ), "Attention pg_collection must have cp process group"
        self.pg_collection = pg_collection
        self.tp_group = pg_collection.tp

        # Per attention head and per partition values
        world_size = get_pg_size(self.pg_collection.tp)
        self.hidden_size_per_attention_head = divide(
            self.query_projection_size, self.config.num_attention_heads
        )
        if self.config.num_query_groups < world_size:
            # When num_kv_heads < tp_size, each TP rank (post AG) initially produces
            # activations for 1 kv_head and (num_q_heads / num_kv_heads) q_heads.
            # We then pull out the appropriate (num_q_heads / tp_size) q_heads.
            self.num_query_groups_per_partition = 1
            self.num_attention_heads_per_partition = divide(
                self.config.num_attention_heads, self.config.num_query_groups
            )
        else:
            # When num_kv_heads >= tp_size, each TP rank produces activations for
            # (num_kv_heads / tp_size) kv_heads and (num_q_heads / tp_size) q_heads.
            self.num_query_groups_per_partition = divide(self.config.num_query_groups, world_size)
            self.num_attention_heads_per_partition = divide(
                self.config.num_attention_heads, world_size
            )
        self.world_size = world_size

        # To support both CUDA Graphs and key value with different hidden size
        self.key_hidden_size = self.hidden_size_per_attention_head
        self.val_hidden_size = self.hidden_size_per_attention_head

        if self.config.num_query_groups < world_size:
            # TE throws an assertion error if num_kv_heads / num_query_groups
            # is not divisible by TP size.
            # TODO(rwaleffe/dnarayanan): Clean this up eventually.
            tmp_config = copy.deepcopy(self.config)
            tmp_config.num_query_groups = world_size
        else:
            tmp_config = self.config
        self.core_attention = submodules.core_attention(
            config=tmp_config,
            layer_number=self.layer_number,
            attn_mask_type=self.attn_mask_type,
            attention_type=self.attention_type,
            cp_comm_type=cp_comm_type,
            softmax_scale=self.config.softmax_scale,
            pg_collection=self.pg_collection,
        )

        self.checkpoint_core_attention = (
            self.config.recompute_granularity == 'selective'
            and "core_attn" in self.config.recompute_modules
        )

        self.offload_qkv_linear = (
            self.config.fine_grained_activation_offloading
            and "qkv_linear" in self.config.offload_modules
        )

        self.offload_core_attention = (
            self.config.fine_grained_activation_offloading
            and "core_attn" in self.config.offload_modules
        )

        self.offload_attn_proj = (
            self.config.fine_grained_activation_offloading
            and "attn_proj" in self.config.offload_modules
        )

        # Output.
        self.linear_proj = build_module(
            submodules.linear_proj,
            self.query_projection_size,
            self.config.hidden_size,
            config=self.config,
            init_method=self.config.output_layer_init_method,
            bias=self.config.add_bias_linear,
            input_is_parallel=True,
            skip_bias_add=True,
            is_expert=False,
            tp_comm_buffer_name='proj',
            tp_group=self.pg_collection.tp,
        )

        if (
            HAVE_TE
            and isinstance(self.linear_proj, TELinear)
            and (
                (
                    self.config.fp8
                    and self.config.fp8_recipe != 'delayed'
                    and is_te_min_version("2.6.0dev0")
                )
                or (self.config.fp4 and is_te_min_version("2.7.0.dev0"))
            )
        ):
            # For fp8/fp4 training, the output of the fused core_attn is saved by itself, and
            # linear_proj also saves the quantized tensor of this output. Here we set the
            # linear_proj to save the original input tensors to avoid the extra memory usage of
            # the quantized tensor.
            set_save_original_input(self.linear_proj)

    def _checkpointed_attention_forward(
        self,
        query,
        key,
        value,
        attention_mask,
        rotary_pos_emb=None,
        attn_mask_type=None,
        attention_bias=None,
        packed_seq_params=None,
    ):
        """Forward method with selective activation checkpointing."""

        def custom_forward(*inputs):
            query = inputs[0]
            key = inputs[1]
            value = inputs[2]
            attention_mask = inputs[3]
            attn_mask_type = inputs[5]
            attn_mask_type = AttnMaskType(attn_mask_type.item())
            output_ = apply_module(self.core_attention)(
                query,
                key,
                value,
                attention_mask,
                attn_mask_type=attn_mask_type,
                attention_bias=attention_bias,
                packed_seq_params=packed_seq_params,
            )
            return output_

        if attn_mask_type is None:
            attn_mask_type = self.attn_mask_type
        attn_mask_type = torch.tensor([attn_mask_type.value], dtype=torch.int)
        hidden_states = tensor_parallel.checkpoint(
            custom_forward, False, query, key, value, attention_mask, rotary_pos_emb, attn_mask_type
        )

        return hidden_states

    def _allocate_memory(self, inference_max_sequence_length, batch_size, dim, dtype):
        """Allocate memory to store kv cache during inference."""

        return torch.empty(
            inference_max_sequence_length,
            batch_size,
            self.num_query_groups_per_partition,
            dim,
            dtype=dtype,
            device=torch.cuda.current_device(),
        )

    def _get_pp_layer_offset_for_inference(self):
        """Return the pipeline parallel layer offset for inference.

        When pp_layer_offset was explicitly provided (e.g. by MambaBlock for
        hybrid models using --hybrid-layer-pattern with fVPP), use that value
        directly.  Otherwise fall back to the standard computation which assumes
        uniform layer distribution across pipeline stages.
        """
        if self._pp_layer_offset is not None:
            return self._pp_layer_offset

        assert (
            self.config.virtual_pipeline_model_parallel_size is None
        ), "Virtual pipeline parallelism is not supported for inference"

        # Import here to avoid circular imports
        from megatron.core.transformer.transformer_layer import get_transformer_layer_offset

        return get_transformer_layer_offset(
            self.config, vp_stage=None, pp_rank=get_pg_rank(self.pg_collection.pp)
        )

    def _adjust_key_value_for_inference(
        self,
        inference_context: BaseInferenceContext,
        query: Tensor,
        key: Tensor,
        value: Tensor,
        rotary_pos_emb: Tensor,
        rotary_pos_cos: Optional[Tensor] = None,
        rotary_pos_sin: Optional[Tensor] = None,
        rotary_pos_cos_sin: Optional[Tensor] = None,
        sequence_len_offset: Optional[int] = None,
        *,
        inference_params: Optional[BaseInferenceContext] = None,
    ) -> tuple[Tensor, Tensor, Tensor, Tensor, AttnMaskType, Tensor]:
        """
        Saves the generated key and value tensors to the end of the buffers in inference_context.
        Returns the full size keys and values from the provided inference_context, as well as
        adjusted rotary_pos_emb.

        Args:
            query (Tensor): Query tensor.
            key (Tensor): Key tensor.
            value (Tensor): Value tensor.
            rotary_pos_emb (Optional[Union[Tensor, Tuple[Tensor, Tensor]]]): Rotary
                embedding tensor(s).
            rotary_pos_cos (Optional[Tensor]): Rotary embedding cosine.
            rotary_pos_sin (Optional[Tensor]): Rotary embedding sine.
            rotary_pos_cos_sin (Optional[Tensor]): Combined rotary embedding cosine and sine.
            Currently used exclusively for inference with dynamic batching and flashinfer RoPE.
            sequence_len_offset (Optional[int]): Sequence length offset used for
                inference CUDA graphs.

        Return:
            Tuple of: query, key, value, rotary_pos_emb, attn_mask_type, block_table.
        """

        inference_context = deprecate_inference_params(inference_context, inference_params)

        attn_mask_type = self.attn_mask_type
        if inference_context is None:
            return query, key, value, rotary_pos_emb, attn_mask_type, None

        # =================================================
        # Pre-allocate memory for key-values for inference.
        # =================================================
        if inference_context.is_static_batching():
            if self.layer_number not in inference_context.key_value_memory_dict:
                inf_max_seq_length = inference_context.max_sequence_length
                inf_max_batch_size = inference_context.max_batch_size
                inference_key_memory = self._allocate_memory(
                    inf_max_seq_length, inf_max_batch_size, self.key_hidden_size, key.dtype
                )
                inference_value_memory = self._allocate_memory(
                    inf_max_seq_length, inf_max_batch_size, self.val_hidden_size, value.dtype
                )
                inference_context.key_value_memory_dict[self.layer_number] = (
                    inference_key_memory,
                    inference_value_memory,
                )
            else:
                # Get the pre-allocated buffers for this layer
                inference_key_memory, inference_value_memory = (
                    inference_context.key_value_memory_dict[self.layer_number]
                )

        if (
            not inference_context.is_static_batching() or inference_context.sequence_len_offset > 0
        ) and (not self.training or not is_te_min_version("2.2.0")):
            # This should mean that we are past the prompt forward_step
            # and so we need to turn off masking
            # Note: in ModelOpt, we may use inference_context for speculative decoding
            # in training. In that case, we do not want to turn off masking as we need
            # customized attention mask for speculative decoding.

            attn_mask_type = AttnMaskType.no_mask

        if inference_context.is_static_batching():
            batch_start = inference_context.batch_size_offset
            batch_end = batch_start + key.size(1)
            assert batch_end <= inference_key_memory.size(1)
            sequence_start = inference_context.sequence_len_offset
            sequence_end = sequence_start + key.size(0)
            assert sequence_end <= inference_key_memory.size(0), (
                "Current sequence length is longer than expected maximum sequence length! "
                "Increase inference_max_seq_length."
            )

        if self.config.flash_decode:
            rotary_pos_cos_q = None
            rotary_pos_sin_q = None
            rotary_pos_cos_k = None
            rotary_pos_sin_k = None

            assert inference_context.is_static_batching()
            if (
                inference_context.sequence_len_offset > 0 and rotary_pos_cos is not None
            ):  # Decode phase, not prefill
                rotary_pos_cos_q = rotary_pos_cos[sequence_end - 1 : sequence_end]
                rotary_pos_sin_q = rotary_pos_sin[sequence_end - 1 : sequence_end]
                rotary_pos_cos_k = rotary_pos_cos[sequence_end - 1 : sequence_end]
                rotary_pos_sin_k = rotary_pos_sin[sequence_end - 1 : sequence_end]
            elif rotary_pos_cos is not None:  # Prefill
                rotary_pos_cos_q = rotary_pos_cos[:sequence_end]
                rotary_pos_sin_q = rotary_pos_sin[:sequence_end]
                rotary_pos_cos_k = rotary_pos_cos[:sequence_end]
                rotary_pos_sin_k = rotary_pos_sin[:sequence_end]

            # Flash Decoding assumes that the keys stored in the KV Cache already have RoPE applied.
            # Apply RoPE before we store the keys to make it compatible with flash decoding kernel
            if rotary_pos_sin_q is not None and rotary_pos_sin_k is not None:
                key = apply_rotary_pos_emb_with_cos_sin(
                    key,
                    rotary_pos_cos_k,
                    rotary_pos_sin_k,
                    rotary_interleaved=self.config.rotary_interleaved,
                )
                query = apply_rotary_pos_emb_with_cos_sin(
                    query,
                    rotary_pos_cos_q,
                    rotary_pos_sin_q,
                    rotary_interleaved=self.config.rotary_interleaved,
                )
        else:
            rotary_pos_cos_q = None
            rotary_pos_sin_q = None

        # Adjust rotary embeddings.
        if rotary_pos_emb is not None:
            q_pos_emb, k_pos_emb = rotary_pos_emb
            if inference_context.is_static_batching():
                q_pos_emb = q_pos_emb[sequence_start:sequence_end, :, :, :]
                k_pos_emb = k_pos_emb[:sequence_end, :, :, :]
            else:
                pass
            rotary_pos_emb = (q_pos_emb, k_pos_emb)

        block_table = None
        if inference_context.is_static_batching():
            # Copy key and values.
            inference_key_memory[sequence_start:sequence_end, batch_start:batch_end, ...] = key
            inference_value_memory[sequence_start:sequence_end, batch_start:batch_end, ...] = value
            key = inference_key_memory[:sequence_end, batch_start:batch_end, ...]
            value = inference_value_memory[:sequence_end, batch_start:batch_end, ...]
        else:
            pp_layer_offset = self._get_pp_layer_offset_for_inference()

            # Apply rotary embeddings before appending KV cache.
            if inference_context.use_flashinfer_fused_rope and (rotary_pos_cos_sin is not None):
                query, key = inference_context.apply_fused_qk_rotary_emb(
                    query, key, rotary_pos_cos_sin, self.config
                )
            elif rotary_pos_emb is not None:
                q_pos_emb, k_pos_emb = rotary_pos_emb
                key = inference_context.apply_rotary_emb_key(
                    key, k_pos_emb, self.config, self.pg_collection.cp
                )

                rotary_pos_emb = (q_pos_emb, None)  # key rotary emb has been applied

            # Append key/value data tensors to cache.
            inference_context.append_key_value_cache(
                self.layer_number - pp_layer_offset, key, value
            )

            _, max_seqlen_q = inference_context.cu_query_lengths()
            if getattr(self.config, "cache_mla_latents", None) and max_seqlen_q > 1:
                # Doing unabsorbed MLA Attention with cached mla latents (prefill/mixed mode)
                kv_cache, _, block_table = inference_context.key_value_cache(
                    self.layer_number - pp_layer_offset
                )
                # Uncompress the KV cache for prefill/mixed mode
                key, value = self.uncompress_kv_from_cache(kv_cache)
            else:
                # Read key/value *pointer* tensors from cache.
                key, value, block_table = inference_context.key_value_cache(
                    self.layer_number - pp_layer_offset
                )
        return query, key, value, rotary_pos_emb, attn_mask_type, block_table

    @abstractmethod
    def get_query_key_value_tensors(
        self,
        hidden_states: Tensor,
        key_value_states: Tensor | None,
        output_gate: bool = False,
        split_qkv: bool = True,
    ) -> (
        tuple[Tensor, Tensor, Tensor, Tensor]
        | tuple[Tensor, Tensor, Tensor]
        | tuple[Tensor, list[int]]
    ):
        """
        This method needs to be implemented based on whether the derived class
        is "self-attn" or "cross-attn".
        """

    def flash_decode(
        self,
        sequence_len_offset: Tensor,
        query_layer: Tensor,
        key_layer: Tensor,
        value_layer: Tensor,
        inference_key_memory: Tensor,
        inference_value_memory: Tensor,
        rotary_cos: Tensor,
        rotary_sin: Tensor,
        rotary_interleaved: bool = False,
    ) -> tuple[Tensor, Tensor]:
        """
        The flash decoding kernel will do the following in a single execution:
        1. Compute RoPE embedding with precomputed cos & sin tensors
        2. Update the KV Cache
        3. Performs the flash attention operation
        """
        assert flash_attn_with_kvcache is not None, (
            "Flash Decoding requires the flash_attn_with_kvcache kernel, "
            "available in the flash-attn package."
        )
        q = query_layer.permute(1, 0, 2, 3)
        k = key_layer.permute(1, 0, 2, 3)
        v = value_layer.permute(1, 0, 2, 3)
        k_cache = inference_key_memory.permute(1, 0, 2, 3)
        v_cache = inference_value_memory.permute(1, 0, 2, 3)

        if rotary_cos is not None:
            rotary_cos = rotary_cos.to(query_layer.dtype)
        if rotary_sin is not None:
            rotary_sin = rotary_sin.to(query_layer.dtype)

        out = flash_attn_with_kvcache(
            q=q,
            k_cache=k_cache,
            v_cache=v_cache,
            k=k,
            v=v,
            rotary_cos=rotary_cos,
            rotary_sin=rotary_sin,
            cache_seqlens=sequence_len_offset,
            rotary_interleaved=rotary_interleaved,
        )
        return out

    def _flash_attention_3_forward_wrapper(
        self,
        q: Tensor,
        k: Tensor,
        v: Tensor,
        max_seqlen_q,
        max_seqlen_k,
        cu_seqlens_q,
        seqlens_k,
        block_table,
        softmax_scale,
    ):
        """
        Wrapper for calling the FA3 _flash_attn_forward function.
        Handles argument conversion for different versions of the _flash_attn_forward API.
        """
        candidate_kwargs = {
            "q": q,
            "k": k,
            "v": v,
            "k_new": None,
            "v_new": None,
            "qv": None,
            "out": None,
            "out_": None,
            "cu_seqlens_q": cu_seqlens_q,
            "cu_seqlens_k": None,
            "cu_seqlens_k_new": None,
            "seqused_q": None,
            "seqused_k": seqlens_k,
            "max_seqlen_q": max_seqlen_q,
            "max_seqlen_k": max_seqlen_k,
            "page_table": block_table,
            "kv_batch_idx": None,
            "leftpad_k": None,
            "rotary_cos": None,
            "rotary_sin": None,
            "seqlens_rotary": None,
            "q_descale": None,
            "k_descale": None,
            "v_descale": None,
            "softmax_scale": softmax_scale,
            "causal": True,
            "attention_chunk": 0,
            "softcap": 0.0,
            "window_size": (-1, -1),
            "window_size_left": -1,
            "window_size_right": -1,
            "rotary_interleaved": True,
            "scheduler_metadata": None,
            "num_splits": 0 if not self.batch_invariant_mode else 1,
            "pack_gqa": None,
            "sm_margin": 0,
        }

        # Parse the expect argument names from the function signature
        if inspect.isfunction(_flash_attn_forward):
            sig = inspect.signature(_flash_attn_forward)
        else:
            assert isinstance(_flash_attn_forward, torch._library.custom_ops.CustomOpDef)
            sig = inspect.signature(_flash_attn_forward._init_fn)
        valid_kwargs = set(sig.parameters.keys())
        final_kwargs = {k: candidate_kwargs[k] for k in valid_kwargs if k in candidate_kwargs}

        output_total, *unused = _flash_attn_forward(**final_kwargs)

        return output_total

    def flash_decode_and_prefill(
        self,
        q: Tensor,
        k: Tensor,
        v: Tensor,
        max_seqlen_q,
        max_seqlen_k,
        cu_seqlens_q,
        cu_seqlens_k,
        seqlens_k,
        block_table,
        is_decode_only,
    ) -> Tensor:
        """Flash attention kernel for mixed decode and prefill samples.

        Args:
            q (Tensor): Query tensor.
            k (Tensor): Key tensor.
            v (Tensor): Value tensor.
            max_seqlen_q (int): Query total sequence length.
            max_seqlen_k (int): Key total sequence length.
            cu_seqlens_q (Tensor): Cumulative query sequence lengths.
            cu_seqlens_k (Tensor): Cumulative key sequence lengths.
            seqlens_k (Tensor): key sequence lengths.
            block_table (Tensor): KV cache block ids for all samples.
            is_decode_only (bool): True if batch is decode only.
        Return:
            (Tensor) Attention output.
        """

        assert not self.training
        assert block_table is not None

        # Flash attn kernel.
        if max_seqlen_q > 1:
            q = q.squeeze(1)
            if getattr(self, "softmax_scale", None) is not None:
                softmax_scale = self.softmax_scale
            else:
                softmax_scale = q.shape[-1] ** -0.5
            if HAVE_FA3:
                # TODO(ksanthanam): Replace with call to flash_attn_varlen_func once
                # it accepts block_table
                output_total = self._flash_attention_3_forward_wrapper(
                    q,
                    k,
                    v,
                    max_seqlen_q,
                    max_seqlen_k,
                    cu_seqlens_q,
                    seqlens_k,
                    block_table,
                    softmax_scale,
                )
            else:
                assert (
                    self.batch_invariant_mode is False
                ), "Batch invariant mode is not supported for flash attention 2"
                output_total = flash_attn_varlen_func(
                    q,
                    k,
                    v,
                    cu_seqlens_q,
                    cu_seqlens_k,
                    max_seqlen_q,
                    max_seqlen_k,
                    softmax_scale=softmax_scale,
                    causal=True,
                    block_table=block_table,
                )
            output_total = output_total.unsqueeze(1)
        else:  # decode only
            # If using MLA we use the FlashMLA kernel
            if isinstance(self.config, MLATransformerConfig):
                softmax_scale = self.softmax_scale

                num_heads_k = 1  # Only a single head for MLA Flash
                seq_len_q = 1  # Sequence length is 1 for decode
                num_heads_q = self.num_attention_heads_per_partition
                num_heads_per_head_k = seq_len_q * num_heads_q // num_heads_k

                cache_seqlens = seqlens_k
                tile_scheduler_metadata, num_splits = get_mla_metadata(
                    cache_seqlens,  # cumulative key-lengths
                    num_heads_per_head_k,  # decode-only lengths
                    num_heads_k,  # per-head dim of V
                )
                head_dim_v = self.config.kv_lora_rank
                kv_cache = k.unsqueeze(-2)
                output_total, softmax_lse = flash_mla_with_kvcache(
                    q,
                    kv_cache,
                    block_table,
                    cache_seqlens,
                    head_dim_v,
                    tile_scheduler_metadata,
                    num_splits,
                    softmax_scale=softmax_scale,
                    causal=True,
                )
            else:
                flash_attn_args = {
                    "q": q,
                    "k_cache": k,
                    "v_cache": v,
                    "cache_seqlens": seqlens_k,
                    "causal": True,
                    "page_table" if HAVE_FA3 else "block_table": block_table,
                    "num_splits": 0 if not self.batch_invariant_mode else 1,
                }
                if HAVE_FA3:
                    output_total = flash_attn3_with_kvcache(**flash_attn_args)
                else:
                    assert (
                        not self.batch_invariant_mode
                    ), "Batch invariant mode is not supported for flash attention 2"
                    output_total = flash_attn_with_kvcache(**flash_attn_args)
        return output_total

    def forward(
        self,
        hidden_states: Tensor,
        attention_mask: Tensor,
        key_value_states: Optional[Tensor] = None,
        inference_context: Optional[BaseInferenceContext] = None,
        rotary_pos_emb: Optional[Union[Tensor, Tuple[Tensor, Tensor]]] = None,
        rotary_pos_cos: Optional[Tensor] = None,
        rotary_pos_sin: Optional[Tensor] = None,
        rotary_pos_cos_sin: Optional[Tensor] = None,
        attention_bias: Optional[Tensor] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
        sequence_len_offset: Optional[int] = None,
        *,
        inference_params: Optional[BaseInferenceContext] = None,
    ) -> tuple[Tensor, Tensor]:
        """
        Perform a forward pass through the attention module.

        Args:
            hidden_states (Tensor): Hidden states.
            attention_mask (Tensor): Attention mask.
            key_value_states (Optional[Tensor]): Key/value states (for cross attention).
            inference_context (Optional[BaseInferenceContext]): Inference context that manages
                KV cache.
            rotary_pos_emb (Optional[Union[Tensor, Tuple[Tensor, Tensor]]]): Rotary
                embedding tensor(s).
            rotary_pos_cos (Optional[Tensor]): Rotary embedding cosine.
            rotary_pos_sin (Optional[Tensor]): Rotary embedding sine.
            rotary_pos_cos_sin (Optional[Tensor]): Combined rotary embedding cosine and sine.
            Currently used exclusively for inference with dynamic batching and flashinfer RoPE.
            attention_bias (Optional[Tensor]): Attention bias.
            packed_seq_params (Optional[PackedSeqparams]): Parameters used for THD format.
            sequence_len_offset (Optional[int]): Sequence length offset used for
                inference CUDA graphs.

        Return:
            (Tuple[Tensor, Tensor]) Attention output and bias.

        """
        # Check if we need to skip RoPE
        # no_rope is 0-indexed array and self.layer_number is 1-indexed
        no_rope = (
            self.config.no_rope_freq[self.layer_number - 1] if self.config.no_rope_freq else False
        )
        if no_rope:
            rotary_pos_emb = None

        inference_context = deprecate_inference_params(inference_context, inference_params)

        if inference_context and inference_context.is_dynamic_batching():
            assert HAVE_FA3 or is_fa_min_version(
                "2.7.3"
            ), "flash attn verion v2.7.3 and above is required for dynamic batching."

        # hidden_states: [sq, b, h]
        is_inference_mode = inference_context is not None and not self.training
        # is_using_flash_decode - True is we are using the static inference engine with flash decode
        is_using_flash_decode = is_inference_mode and self.config.flash_decode
        # is_using_flashinfer_rope - True if we are using the dynamic inference engine
        # with flashinfer fused rope
        is_using_flashinfer_rope = is_inference_mode and (
            not inference_context.is_static_batching()
            and inference_context.use_flashinfer_fused_rope
        )
        if is_using_flash_decode or is_using_flashinfer_rope:
            # flash decode and flash-infer fused rope use rotary_pos_cos and rotary_pos_sin
            rotary_pos_emb = None
        else:
            assert rotary_pos_cos is None and rotary_pos_sin is None

        # For self attention we just duplicate the rotary_pos_emb if it isn't already
        if rotary_pos_emb is not None and not isinstance(rotary_pos_emb, tuple):
            rotary_pos_emb = (rotary_pos_emb,) * 2

        # =====================
        # Query, Key, and Value
        # =====================
        # Get the query, key and value tensors based on the type of attention -
        # self or cross attn.
        nvtx_range_push(suffix="qkv")
        split_qkv = (self.attention_type == "cross") or not all(
            [
                not self.config.test_mode,
                self.config.fused_single_qkv_rope,
                inference_context is None,
                packed_seq_params is None,
                (
                    rotary_pos_emb is not None
                    and rotary_pos_emb[0] is not None
                    and rotary_pos_emb[1] is not None
                ),
                not self.config.flash_decode,
                HAVE_FUSED_QKV_ROPE,
                self.q_layernorm is None or isinstance(self.q_layernorm, IdentityOp),
                self.k_layernorm is None or isinstance(self.k_layernorm, IdentityOp),
            ]
        )
        # Check if fused_single_qkv_rope is requested but either unavailable or not
        # supported for the current use case.
        if self.attention_type != "cross":
            assert not (
                self.config.fused_single_qkv_rope and split_qkv
            ), "fused_single_qkv_rope requested but not available/supported for the config."

        with off_interface(self.offload_qkv_linear, hidden_states, "qkv_linear") as hidden_states:
            qkv_output = self.get_query_key_value_tensors(
                hidden_states,
                key_value_states,
                split_qkv=split_qkv,
                output_gate=self.config.attention_output_gate,
            )
        if self.offload_qkv_linear:
            # `qkv_output` may be a tuple; commit supports tuple/list and will keep structure.
            qkv_output = off_interface.group_commit(
                qkv_output, name="qkv_linear", forced_released_tensors=[]
            )
        attn_mask_type = self.attn_mask_type
        block_table = None
        gate = None
        if split_qkv:
            if self.config.attention_output_gate:
                query, key, value, gate = qkv_output
            else:
                query, key, value = qkv_output
            mixed_qkv = qkv_split_arg_list = None
        else:
            assert (
                not self.config.attention_output_gate
            ), "attention_output_gate is not supported for unsplit mixed_qkv tensor."
            mixed_qkv, qkv_split_arg_list = qkv_output
        nvtx_range_pop(suffix="qkv")

        # ===================================================
        # Adjust key, value, and rotary_pos_emb for inference
        # ===================================================

        in_decode_mode = (
            inference_context is not None
            and inference_context.is_decode_only()
            and not self.training
        )

        # This branch only runs in the decode phase of flash decoding and returns after the linear
        # projection. This conditional is not used in the prefill phase or non-flash-decoding cases.
        nvtx_range_push(suffix="adjust_key_value")
        if in_decode_mode and self.config.flash_decode:
            assert self.layer_number in inference_context.key_value_memory_dict
            assert inference_context.sequence_len_offset is not None
            inference_key_memory, inference_value_memory = inference_context.key_value_memory_dict[
                self.layer_number
            ]
            output = self.flash_decode(
                sequence_len_offset=sequence_len_offset,
                query_layer=query,
                key_layer=key,
                value_layer=value,
                inference_key_memory=inference_key_memory,
                inference_value_memory=inference_value_memory,
                rotary_cos=rotary_pos_cos,
                rotary_sin=rotary_pos_sin,
                rotary_interleaved=self.config.rotary_interleaved,
            )
            out = output.transpose(0, 1).contiguous()
            context_layer = out.view(out.size(0), out.size(1), -1)
            output, bias = self.linear_proj(context_layer)
            return output, bias

        if (
            in_decode_mode
            and self.config.cuda_graph_impl == "local"
            and CudaGraphScope.full_iteration not in self.config.cuda_graph_scope
            and inference_context.is_static_batching()
        ):
            raise ValueError(f"CUDA graphs must use flash decode with static batching!")

        if split_qkv:
            query, key, value, rotary_pos_emb, attn_mask_type, block_table = (
                self._adjust_key_value_for_inference(
                    inference_context,
                    query,
                    key,
                    value,
                    rotary_pos_emb,
                    rotary_pos_cos,
                    rotary_pos_sin,
                    rotary_pos_cos_sin,
                    sequence_len_offset,
                )
            )

        if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
            query = query.squeeze(1)
            key = key.squeeze(1)
            value = value.squeeze(1)
        nvtx_range_pop(suffix="adjust_key_value")

        # ================================================
        # relative positional embedding (rotary embedding)
        # ================================================
        nvtx_range_push(suffix="rotary_pos_emb")
        if rotary_pos_emb is not None and (
            not self.config.flash_decode or inference_context is None
        ):
            q_pos_emb, k_pos_emb = rotary_pos_emb

            if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
                if packed_seq_params.cu_seqlens_q_padded is not None:
                    cu_seqlens_q = packed_seq_params.cu_seqlens_q_padded
                else:
                    cu_seqlens_q = packed_seq_params.cu_seqlens_q
                if packed_seq_params.cu_seqlens_kv_padded is not None:
                    cu_seqlens_kv = packed_seq_params.cu_seqlens_kv_padded
                else:
                    cu_seqlens_kv = packed_seq_params.cu_seqlens_kv
            else:
                cu_seqlens_q = cu_seqlens_kv = None

            if split_qkv:
                if q_pos_emb is not None:
                    # TODO VIJAY: simplify
                    if inference_context is None or inference_context.is_static_batching():
                        query = apply_rotary_pos_emb(
                            query,
                            q_pos_emb,
                            config=self.config,
                            cu_seqlens=cu_seqlens_q,
                            mscale=_yarn_get_concentration_factor_from_config(self.config),
                            cp_group=self.pg_collection.cp,
                        )
                    else:
                        query = inference_context.apply_rotary_emb_query(
                            query, q_pos_emb, self.config, cu_seqlens_q, self.pg_collection.cp
                        )
                if k_pos_emb is not None:
                    key = apply_rotary_pos_emb(
                        key,
                        k_pos_emb,
                        config=self.config,
                        cu_seqlens=cu_seqlens_kv,
                        mscale=_yarn_get_concentration_factor_from_config(self.config),
                        cp_group=self.pg_collection.cp,
                    )
            else:
                query, key, value = apply_fused_qkv_rotary_pos_emb(
                    mixed_qkv, q_pos_emb, k_pos_emb, qkv_split_arg_list
                )

            # TODO, can apply positional embedding to value_layer so it has
            # absolute positional embedding.
            # otherwise, only relative positional embedding takes effect
            # value_layer = apply_rotary_pos_emb(value_layer, k_pos_emb)
        nvtx_range_pop(suffix="rotary_pos_emb")

        # ==================================
        # core attention computation
        # ==================================

        nvtx_range_push(suffix="core_attention")
        if self.checkpoint_core_attention and self.training:
            core_attn_out = self._checkpointed_attention_forward(
                query,
                key,
                value,
                attention_mask,
                attn_mask_type=attn_mask_type,
                attention_bias=attention_bias,
                packed_seq_params=packed_seq_params,
            )
        else:
            if inference_context is None or inference_context.is_static_batching():
                # Static batching attention kernel.
                with off_interface(
                    self.offload_core_attention and self.training, query, "core_attn"
                ) as query:
                    core_attn_out = apply_module(self.core_attention)(
                        query,
                        key,
                        value,
                        attention_mask,
                        attn_mask_type=attn_mask_type,
                        attention_bias=attention_bias,
                        packed_seq_params=packed_seq_params,
                    )

            else:
                # Dynamic batching attention kernel.
                q, k, v = (query, key, value)
                cu_query_lengths, max_seqlen_q = inference_context.cu_query_lengths()
                cu_kv_lengths, kv_lengths, max_seqlen_k = inference_context.cu_kv_lengths()

                core_attn_out = self.flash_decode_and_prefill(
                    q,
                    k,
                    v,
                    max_seqlen_q,
                    max_seqlen_k,
                    cu_query_lengths,
                    cu_kv_lengths,
                    kv_lengths,
                    block_table,
                    inference_context.is_decode_only(),
                )
                core_attn_out = rearrange(core_attn_out, 's b h d -> s b (h d)')

                # Clear the outputs for padding tokens when using quantization scales
                # to avoid corrupting amax calculations
                if is_using_quantization_scales(self.config):
                    core_attn_out[inference_context.padding_slice] = 0.0

            if self.offload_core_attention and self.training:
                core_attn_out = off_interface.group_commit(
                    core_attn_out, name="core_attn", forced_released_tensors=[query, key, value]
                )
        if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
            # reshape to same output shape as unpacked case
            # (t, np, hn) -> (t, b=1, h=np*hn)
            # t is the pack size = sum (sq_i)
            # note that batch is a dummy dimension in the packed case
            core_attn_out = core_attn_out.reshape(core_attn_out.size(0), 1, -1)
        nvtx_range_pop(suffix="core_attention")

        # Output gate
        if gate is not None:
            nvtx_range_push(suffix="output_gate")
            core_attn_out = self._apply_output_gate(core_attn_out, gate)
            nvtx_range_pop(suffix="output_gate")

        # =================
        # Output. [sq, b, h]
        # =================
        nvtx_range_push(suffix="linear_proj")
        with off_interface(self.offload_attn_proj, core_attn_out, "attn_proj") as core_attn_out:
            output, bias = self.linear_proj(core_attn_out)
        if self.offload_attn_proj:
            output = off_interface.group_commit(
                output, name="attn_proj", forced_released_tensors=[core_attn_out]
            )
        nvtx_range_pop(suffix="linear_proj")

        return output, bias

    @jit_fuser
    def _apply_output_gate(self, x, gate):
        x_dtype = x.dtype
        gate = gate.contiguous()
        gate = gate.view(*x.shape)
        x = x * torch.sigmoid(gate.float())
        x = x.to(x_dtype)
        return x

    def set_for_recompute_input_layernorm(self):
        """Set the attention layer for recompute input_layernorm. Only needed for fp8."""
        raise NotImplementedError("set_for_recompute_input_layernorm is not implemented.")

    def clip_qk(self):
        """
        QK Clipping is a technique to clip the query and key attention logits to prevent the
        attention logits from exploding.
        """
        raise NotImplementedError("clip_qk is not implemented.")


class SelfAttention(Attention):
    """Self-attention layer class

    Self-attention layer takes input with size [s, b, h]
    and returns output of the same size.
    """

    def __init__(
        self,
        config: TransformerConfig,
        submodules: SelfAttentionSubmodules,
        layer_number: int,
        attn_mask_type: AttnMaskType = AttnMaskType.padding,
        cp_comm_type: str | None = None,
        pg_collection: ProcessGroupCollection | None = None,
        pp_layer_offset: Optional[int] = None,
    ):
        super().__init__(
            config=config,
            submodules=submodules,
            layer_number=layer_number,
            attn_mask_type=attn_mask_type,
            attention_type="self",
            cp_comm_type=cp_comm_type,
            pg_collection=pg_collection,
            pp_layer_offset=pp_layer_offset,
        )

        self.linear_qkv_out_dim = self.query_projection_size + 2 * self.kv_projection_size
        if self.config.attention_output_gate:
            self.linear_qkv_out_dim += self.config.kv_channels * self.config.num_attention_heads
        self.linear_qkv = submodules.linear_qkv(
            self.config.hidden_size,
            self.linear_qkv_out_dim,
            config=self.config,
            init_method=not_none(self.config.init_method),
            gather_output=False,
            bias=self.config.add_bias_linear or self.config.add_qkv_bias,
            skip_bias_add=False,
            is_expert=False,
            tp_comm_buffer_name='qkv',
            tp_group=self.pg_collection.tp,
        )

        if submodules.q_layernorm is not None:
            self.q_layernorm = submodules.q_layernorm(
                hidden_size=self.hidden_size_per_attention_head,
                config=self.config,
                eps=self.config.layernorm_epsilon,
            )
        else:
            self.q_layernorm = None

        if submodules.k_layernorm is not None:
            self.k_layernorm = submodules.k_layernorm(
                hidden_size=self.hidden_size_per_attention_head,
                config=self.config,
                eps=self.config.layernorm_epsilon,
            )
        else:
            self.k_layernorm = None

    def run_realtime_tests(self):
        """Performs a consistency check.

        This function makes sure that tensors across devices are the same during an experiment.
        This is often not guaranteed to be so because of silent hardware failures (eg, memory
        corruption loading a checkpoint, network traffic corruption encountered during
        data transmission).

        (TODO) In the future, more tensors should be checked across the training run and
        checked every X iterations. This is left for future work. Equality of tensors is probably
        not required; transmitting hashes is sufficient."""

        if not self.config.qk_layernorm:
            return

        # check that all tensor parallel and data parallel ranks have the same
        # Q & K layernorm parameters.
        rank = get_data_parallel_rank()
        inputs = torch.stack(
            [
                self.q_layernorm.weight.data,
                self.q_layernorm.bias.data,
                self.k_layernorm.weight.data,
                self.k_layernorm.bias.data,
            ]
        )
        dp_list = [torch.empty_like(inputs) for _ in range(get_data_parallel_world_size())]
        dp_list[rank] = inputs
        torch.distributed.all_gather(dp_list, inputs, group=get_data_parallel_group())

        def _compare(srcs, tgts, names, parallelism):
            assert len(srcs) == len(tgts) == len(names)
            for src, tgt, name in zip(srcs, tgts, names):
                assert torch.all(src == tgt), (
                    f"Discrepancy between {name} in {parallelism} ranks {i} and {rank}. "
                    f"Diff: {torch.norm(src - tgt)}"
                )

        for i, dp in enumerate(dp_list):
            q_w, q_b, k_w, k_b = torch.unbind(dp)
            _compare(
                [q_w, q_b, k_w, k_b],
                [
                    self.q_layernorm.weight.data,
                    self.q_layernorm.bias.data,
                    self.k_layernorm.weight.data,
                    self.k_layernorm.bias.data,
                ],
                ["q_w", "q_b", "k_w", "k_b"],
                "DP",
            )

        rank = get_tensor_model_parallel_rank()
        tp_list = [torch.empty_like(inputs) for _ in range(get_tensor_model_parallel_world_size())]
        tp_list[rank] = inputs
        torch.distributed.all_gather(tp_list, inputs, group=get_tensor_model_parallel_group())

        for i, tp in enumerate(tp_list):
            q_w, q_b, k_w, k_b = torch.unbind(tp)
            _compare(
                [q_w, q_b, k_w, k_b],
                [
                    self.q_layernorm.weight.data,
                    self.q_layernorm.bias.data,
                    self.k_layernorm.weight.data,
                    self.k_layernorm.bias.data,
                ],
                ["q_w", "q_b", "k_w", "k_b"],
                "TP",
            )

    def get_query_key_value_tensors(
        self,
        hidden_states: Tensor,
        key_value_states: Tensor | None = None,
        output_gate: bool = False,
        split_qkv: bool = True,
    ) -> (
        tuple[Tensor, Tensor, Tensor, Tensor]
        | tuple[Tensor, Tensor, Tensor]
        | tuple[Tensor, list[int]]
    ):
        """
        Derives `query`, `key` and `value` tensors from `hidden_states`.
        If `output_gate` is True, then also derives `gate` tensor.
        If `split_qkv=False`, then the unsplit mixed_qkv tensor is returned.
        """
        # If no output gate: Attention heads [sq, b, h] --> [sq, b, ng * (np/ng + 2) * hn)]
        # If have output gate: Attention heads [sq, b, h] --> [sq, b, ng * (2 * np/ng + 2) * hn)]
        mixed_qkv, _ = apply_module(self.linear_qkv)(hidden_states)
        num_query_heads_per_group = (
            self.num_attention_heads_per_partition // self.num_query_groups_per_partition
        )
        num_qkv_heads_per_group = num_query_heads_per_group + 2
        if output_gate:
            num_qkv_heads_per_group += num_query_heads_per_group

        assert self.config.num_query_groups is not None
        if self.config.num_query_groups < self.world_size:
            # Note that weights are interleaved in the following manner:
            # q1 q2 k1 v1 | q3 q4 k2 v2 | q5 q6 k3 v3 | ...
            # When tp_size > num_kv_heads, we split "q1 q2 k1 v1" over multiple
            # ranks, so a rank does not have a clean partitioning of just the q_heads
            # it needs. Instead, we perform the following steps:
            # 1. Assemble the full "q1 q2 k1 v1 | q3 q4 k2 v2 | q5 q6 k3 v3 | ..."
            #    through an AG.
            # 2. Pull out the right slice (e.g., "q1 q2 k1 v1" or "q3 q4 k2 v2").
            # 3. Split q_heads (e.g., q1, q2), k_heads (e.g., k1), v_heads (e.g., v1).
            # 4. Further index into query to get only the q_heads that this rank is
            #    responsible for (e.g., q1).
            # The block of code below performs steps 1 and 2.
            mixed_qkv = all_gather_last_dim_from_tensor_parallel_region(
                mixed_qkv, group=self.pg_collection.tp
            )
            idx = get_pg_rank(self.pg_collection.tp) // (
                self.world_size // self.config.num_query_groups
            )
            size = mixed_qkv.size()[-1] // self.config.num_query_groups
            mixed_qkv = mixed_qkv[:, :, idx * size : (idx + 1) * size]

        # If no output gate: [sq, b, hp] --> [sq, b, ng, (np/ng + 2) * hn]
        # If have output gate: [sq, b, hp] --> [sq, b, ng, (2 * np/ng + 2) * hn]
        new_tensor_shape = mixed_qkv.size()[:-1] + (
            self.num_query_groups_per_partition,
            num_qkv_heads_per_group * self.hidden_size_per_attention_head,
        )
        mixed_qkv = mixed_qkv.view(*new_tensor_shape)

        # Split the tensor into query, gate, key, and value.
        if output_gate:
            if not split_qkv:
                raise ValueError("split_qkv not supported for gated attention yet.")
            # If have output gate: [sq, b, ng, (2 * np/ng + 2) * hn]
            # --> [sq, b, ng, np/ng * hn], [sq, b, ng, np/ng * hn],
            # [sq, b, ng, hn], [sq, b, ng, hn]
            split_arg_list = [
                num_query_heads_per_group * self.hidden_size_per_attention_head,
                num_query_heads_per_group * self.hidden_size_per_attention_head,
                self.hidden_size_per_attention_head,
                self.hidden_size_per_attention_head,
            ]

            if SplitAlongDim is not None:
                (query, gate, key, value) = SplitAlongDim(mixed_qkv, 3, split_arg_list)
            else:
                (query, gate, key, value) = torch.split(mixed_qkv, split_arg_list, dim=3)
        else:
            # If no output gate: [sq, b, ng, (np/ng + 2) * hn]
            # --> [sq, b, ng, np/ng * hn], None, [sq, b, ng, hn], [sq, b, ng, hn]
            split_arg_list = [
                num_query_heads_per_group * self.hidden_size_per_attention_head,
                self.hidden_size_per_attention_head,
                self.hidden_size_per_attention_head,
            ]

            # Return unsplit mixed_qkv and split_arg_list
            if not split_qkv:
                return mixed_qkv, split_arg_list

            if SplitAlongDim is not None:
                (query, key, value) = SplitAlongDim(mixed_qkv, 3, split_arg_list)
            else:
                (query, key, value) = torch.split(mixed_qkv, split_arg_list, dim=3)

        # Query [sq, b, ng, np/ng * hn] -> [sq, b, np, hn]
        query = query.reshape(query.size(0), query.size(1), -1, self.hidden_size_per_attention_head)

        if self.config.num_query_groups < self.world_size:
            # query above corresponds to (num_q_heads / num_kv_heads) q_heads.
            # Index appropriately into query to get (num_q_heads / tp_size) q_heads.
            # This is step 4 in the list of steps above.
            idx = get_pg_rank(self.pg_collection.tp) % (
                self.world_size // self.config.num_query_groups
            )
            size = self.num_attention_heads_per_partition // (
                self.world_size // self.config.num_query_groups
            )
            query = query[:, :, idx * size : (idx + 1) * size, :]

        if self.q_layernorm is not None:
            query = apply_module(self.q_layernorm)(query)

        if self.k_layernorm is not None:
            key = apply_module(self.k_layernorm)(key)

        if self.config.test_mode:
            self.run_realtime_tests()

        if output_gate:
            # Gate [sq, b, ng, np/ng * hn] -> [sq, b, np, hn]
            gate = gate.reshape(*gate.shape[:2], -1, self.hidden_size_per_attention_head)
            if self.config.num_query_groups < self.world_size:
                idx = get_tensor_model_parallel_rank() % (
                    self.world_size // self.config.num_query_groups
                )
                size = self.num_attention_heads_per_partition // (
                    self.world_size // self.config.num_query_groups
                )
                gate = gate[:, :, idx * size : (idx + 1) * size, :]
            return query, key, value, gate

        return query, key, value

    def backward_dw(self) -> None:
        """Execute weight update operations"""
        self._backward_qkv_proj()
        self._backward_output_proj()

    def _backward_qkv_proj(self):
        """Update weights for QKV projection layer"""
        self.linear_qkv.backward_dw()

    def _backward_output_proj(self):
        """Update weights for output projection layer"""
        self.linear_proj.backward_dw()

    def set_for_recompute_input_layernorm(self):
        """Set the attention layer for recompute input_layernorm. Only needed for fp8/fp4."""
        from megatron.core.extensions.transformer_engine import set_save_original_input

        set_save_original_input(self.linear_qkv)

    def clip_qk(self):
        """
        QK Clipping is a technique to clip the query and key attention logits to prevent the
        attention logits from exploding. This function is experimental on GQA.
        """
        if not self.config.qk_clip:
            raise ValueError("qk_clip option needs to be enabled")

        if self.core_attention.current_max_attn_logits is None:
            raise ValueError("current_max_attn_logits is None")

        assert self.core_attention.current_max_attn_logits.shape == (
            self.num_attention_heads_per_partition,
        ), f"current_max_attn_logits shape is not ({self.num_attention_heads_per_partition}, ) \
                    but {self.core_attention.current_max_attn_logits.shape}"

        grouped_max_attn_logits = torch.max(
            self.core_attention.current_max_attn_logits.view(
                self.num_query_groups_per_partition, -1
            ),
            dim=1,
        ).values

        # only update the weight if any head has
        # current_max_attn_logits > qk_clip_threshold
        if torch.any(grouped_max_attn_logits > self.config.qk_clip_threshold):
            # Use num_query_groups_per_partition for tensor parallel scenarios

            # qk_clip_balancing_eta (g, 1, 1)
            assert grouped_max_attn_logits.shape == (
                self.num_query_groups_per_partition,
            ), f"current_max_attn_logits shape is not ({self.num_query_groups_per_partition},) \
                but {grouped_max_attn_logits.shape}"
            self.qk_clip_balancing_eta = torch.clamp(
                self.config.qk_clip_threshold / grouped_max_attn_logits, max=1.0
            ).view(self.num_query_groups_per_partition, 1, 1)
            assert torch.all(self.qk_clip_balancing_eta <= 1.0)

            # Handle different weight access patterns (main_param vs direct access)
            if hasattr(self.linear_qkv.weight, 'main_param'):
                self.linear_qkv.weight.main_param.data.copy_(
                    self._clip_linear_qkv(self.linear_qkv.weight.main_param.data)
                )

            self.linear_qkv.weight.data.copy_(self._clip_linear_qkv(self.linear_qkv.weight.data))

        # reset current_max_attn_logits
        self.core_attention.current_max_attn_logits = None

    def _clip_linear_qkv(self, weight):
        """Apply qkclip to linear_qkv layer"""
        # Reshape to (g, query_projection_size + 2 * kv_projection_size, -1)
        weight_reshaped = weight.view(
            self.num_query_groups_per_partition,
            (self.query_projection_size + 2 * self.kv_projection_size)
            // self.num_query_groups_per_partition,
            -1,
        )

        # Split into query_projection_size and 2 * kv_projection_size parts:
        # (n, a, -1) and (n, b, -1)
        weight_q = weight_reshaped[
            :, : self.query_projection_size // self.num_query_groups_per_partition, :
        ]
        weight_k = weight_reshaped[
            :,
            self.query_projection_size
            // self.num_query_groups_per_partition : (
                self.query_projection_size + self.kv_projection_size
            )
            // self.num_query_groups_per_partition,
            :,
        ]
        weight_v = weight_reshaped[
            :,
            (self.query_projection_size + self.kv_projection_size)
            // self.num_query_groups_per_partition :,
            :,
        ]

        # extend the qk_clip_balancing_eta to the same shape as weight_q and weight_k
        self.qk_clip_balancing_eta_extended = self.qk_clip_balancing_eta.repeat(
            1, weight_q.size(1), 1
        )

        # Clipping
        weight_q.mul_(torch.pow(self.qk_clip_balancing_eta_extended, self.config.qk_clip_alpha))
        weight_k.mul_(torch.pow(self.qk_clip_balancing_eta, 1 - self.config.qk_clip_alpha))

        # Concatenate back and reshape to original shape
        weight_updated = torch.cat([weight_q, weight_k, weight_v], dim=1)
        weight_updated = weight_updated.view(
            self.query_projection_size + 2 * self.kv_projection_size, -1
        )

        return weight_updated


class CrossAttention(Attention):
    """Cross-attention layer class

    Cross-attention layer takes input with size [s, b, h] and context with size
    [s, b, h] and returns output of the same size.
    """

    def __init__(
        self,
        config: TransformerConfig,
        submodules: CrossAttentionSubmodules,
        layer_number: int,
        attn_mask_type: AttnMaskType = AttnMaskType.padding,
        cp_comm_type: str | None = None,
        pg_collection: ProcessGroupCollection | None = None,
    ):
        super().__init__(
            config=config,
            submodules=submodules,
            layer_number=layer_number,
            attn_mask_type=attn_mask_type,
            attention_type="cross",
            cp_comm_type=cp_comm_type,
            pg_collection=pg_collection,
        )

        if self.config.num_query_groups != self.config.num_attention_heads:
            raise ValueError("Group query attention is not currently supported in cross attention.")
        assert self.query_projection_size == self.kv_projection_size

        self.linear_q = submodules.linear_q(
            self.config.hidden_size,
            self.query_projection_size,
            config=self.config,
            init_method=not_none(self.config.init_method),
            gather_output=False,
            bias=self.config.add_bias_linear,
            skip_bias_add=False,
            is_expert=False,
        )

        self.linear_kv = submodules.linear_kv(
            self.config.hidden_size,
            2 * self.kv_projection_size,
            config=self.config,
            init_method=not_none(self.config.init_method),
            gather_output=False,
            bias=self.config.add_bias_linear,
            skip_bias_add=False,
            is_expert=False,
        )

    def get_query_key_value_tensors(
        self,
        hidden_states: Tensor,
        key_value_states: Optional[Tensor],
        output_gate: bool = False,
        split_qkv: bool = True,
    ) -> Tuple[Tensor, Tensor, Tensor]:
        """
        Derives `query` tensor from `hidden_states`, and `key`/`value` tensors
        from `key_value_states`.
        """
        assert split_qkv, "split_qkv must be True for CrossAttention"
        assert not output_gate, "Output gate is not supported in cross attention for now."

        assert key_value_states is not None, "key_value_states cannot be None for CrossAttention"
        # Attention heads [sk, b, h] --> [sk, b, (np * 2 * hn)]
        mixed_kv, _ = apply_module(self.linear_kv)(key_value_states)

        # [sk, b, (np * 2 * hn)] --> [sk, b, np, 2 * hn]
        new_tensor_shape = mixed_kv.size()[:-1] + (
            self.num_attention_heads_per_partition,
            2 * self.hidden_size_per_attention_head,
        )
        mixed_kv = mixed_kv.view(*new_tensor_shape)

        # [sk, b, np, 2 * hn] --> 2 [sk, b, np, hn]
        (key, value) = tensor_parallel.split_tensor_along_last_dim(mixed_kv, 2)

        # Attention head [sq, b, h] --> [sq, b, hp]
        query, _ = apply_module(self.linear_q)(hidden_states)

        # [sq, b, hp] --> [sq, b, np, hn]
        new_tensor_shape = query.size()[:-1] + (
            self.num_attention_heads_per_partition,
            self.hidden_size_per_attention_head,
        )
        query = query.view(*new_tensor_shape)

        return query, key, value


================================================
FILE: megatron/core/transformer/cuda_graphs.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import dataclasses
import gc
import inspect
import logging
import math
import os
import time
from collections import defaultdict
from contextlib import nullcontext
from copy import deepcopy
from dataclasses import dataclass, is_dataclass
from enum import Enum
from functools import partial
from itertools import chain, zip_longest
from math import ceil
from typing import Any, Dict, List

import torch
from torch.utils._pytree import tree_map as tree_map_pyt

from megatron.core import parallel_state
from megatron.core.num_microbatches_calculator import get_num_microbatches
from megatron.core.tensor_parallel.random import (
    CudaRNGStatesTracker,
    get_all_rng_states,
    get_cuda_rng_tracker,
    is_checkpointing,
)
from megatron.core.transformer.enums import CudaGraphScope
from megatron.core.transformer.module import GraphableMegatronModule, MegatronModule
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.utils import (
    get_attr_wrapped_model,
    get_torch_version,
    is_te_min_version,
    log_on_each_pipeline_stage,
    log_single_rank,
)

try:
    import transformer_engine as te  # pylint: disable=unused-import
    from transformer_engine.pytorch.distributed import is_fp8_activation_recompute_enabled
    from transformer_engine.pytorch.fp8 import FP8GlobalStateManager
    from transformer_engine.pytorch.graph import (
        make_graphed_callables,
        restore_fp8_tensors,
        save_fp8_tensors,
    )
    from transformer_engine.pytorch.graph import set_capture_end as te_set_capture_end
    from transformer_engine.pytorch.graph import set_capture_start as te_set_capture_start
    from transformer_engine.pytorch.module.base import TransformerEngineBaseModule
    from transformer_engine.pytorch.utils import make_weak_ref

    HAVE_TE_GRAPHS = True
except:
    HAVE_TE_GRAPHS = False

try:
    from tqdm import tqdm

    HAVE_TQDM = True
except:
    HAVE_TQDM = False

_IS_GRAPH_CAPTURING = False
_IS_GRAPH_WARMUP = False
logger = logging.getLogger(__name__)

# Freeze GC during capture.
# TODO (@lmcafee): remove all freeze-GC code once most users are on PyTorch 2.9+.
FREEZE_GC = os.getenv("CUDA_GRAPH_CAPTURE_FREEZE_GC") != "0"
try:
    from packaging.version import Version as PkgVersion

    FREEZE_GC_MAX_TORCH_VERSION = PkgVersion("2.9.0a0")
    if get_torch_version() >= FREEZE_GC_MAX_TORCH_VERSION:
        FREEZE_GC = False
except ImportError:
    pass


def is_graph_capturing():
    """Query if currently capturing."""
    return _IS_GRAPH_CAPTURING


def _set_capture_start():
    """Set graph capture has started."""
    global _IS_GRAPH_CAPTURING
    _IS_GRAPH_CAPTURING = True


def _set_capture_end():
    """Set graph capture has ended."""
    global _IS_GRAPH_CAPTURING
    _IS_GRAPH_CAPTURING = False


def is_graph_warmup():
    """Query if currently warming up for graph capture."""
    return _IS_GRAPH_WARMUP


def _set_warmup_start():
    """Set graph warmup has started."""
    global _IS_GRAPH_WARMUP
    _IS_GRAPH_WARMUP = True


def _set_warmup_end():
    """Set graph warmup has ended."""
    global _IS_GRAPH_WARMUP
    _IS_GRAPH_WARMUP = False


@dataclass
class CudagraphBufferMetadata:
    """
    Metadata saved to tensors during cudagraph capture. This data will be used to determine
    during graph captue when a cudagraph can reuse a buffer or directly write its output into
    a subsequent's graph's input.
    """

    is_cudagraph_input: bool = False
    is_cudagraph_output: bool = False
    input_use_count: int = 0
    cudagraph_reuse_ref_count: int = 0
    capture_reuse_count: int = 0
    fwd_cudagraph_buffer: torch.Tensor = None
    bwd_cudagraph_buffer: torch.Tensor = None


class ArgMetadata:
    """Arg meta."""

    def __init__(self, arg):
        self.type = type(arg)
        if isinstance(arg, torch.Tensor):
            self.shape = arg.shape
            self.dtype = arg.dtype
            self.device = arg.device
            self.value = arg.data_ptr()
            self.requires_grad = arg.requires_grad
            if hasattr(arg, "cg_buffer_metadata"):
                # Its important this is a reference copy
                self.cg_buffer_metadata = arg.cg_buffer_metadata
        else:
            self.value = arg

    def zeros_like(self):
        """Reconstruct a tensor with the properties as the meta arg."""

        assert self.type == torch.Tensor
        return torch.zeros(
            *self.shape, dtype=self.dtype, device=self.device, requires_grad=self.requires_grad
        )


class TensorReusePool:
    """
    A pool-like list of tensors that can be reused as input and output buffers during graph capture.
    Also maintains strong references to all tensors created by this pool, so that they will never be
    freed by the memory allocator.
    """

    """Record strong references to buffers created by the pool so they cannot be deallocated between
    graph captures."""
    tensor_strong_refs: list = []

    """Record the data_ptrs of buffers created by the pool to check when a tensor came was 
    allocated from this pool. """
    tensor_strong_refs_dataptrs: set = set()

    """Buffers that have been returned to the pool and are available for reuse. """
    pool: list[torch.Tensor] = []

    def insert(self, tensor: torch.Tensor):
        """Return a tensor to the pool reuse."""
        assert self.owns(tensor)
        self.pool.append(tensor)

    def owns(self, tensor: torch.Tensor):
        """Check if a tensor was created from this pool."""
        return tensor.data_ptr() in self.tensor_strong_refs_dataptrs

    def get(self, meta: ArgMetadata):
        """Try to get a buffer from the pool. If a matching tensor is already in the pool, its
        assumed to be available and returned. Otherwise, allocate a new buffer."""

        assert isinstance(meta, ArgMetadata)
        # Find first matching buffer in pool
        for i, buf in enumerate(self.pool):
            if buf.shape == meta.shape and buf.dtype == meta.dtype and buf.device == meta.device:
                return self.pool.pop(i)

        out = meta.zeros_like()
        self.tensor_strong_refs.append(out)
        self.tensor_strong_refs_dataptrs.add(out.data_ptr())
        return out


def tree_map(func, tree):
    """
    Wrapper around pytorch's tree_map, but also recurses into dataclasses.
    """

    def wrapper(arg):
        # If it's a dataclass, map over its fields
        if is_dataclass(arg) and not isinstance(arg, type):
            changes = {
                f.name: tree_map_pyt(func, getattr(arg, f.name)) for f in dataclasses.fields(arg)
            }
            return dataclasses.replace(arg, **changes)

        # Otherwise, apply the user function
        return func(arg)

    return tree_map_pyt(wrapper, tree)


def _check_supported_type(meta):
    """Check if arg meta is a supported type for cudagraph input/outputs."""

    assert isinstance(meta, ArgMetadata)

    # Import inference contexts here to guard against circular import.
    from megatron.core.inference.contexts.dynamic_context import DynamicInferenceContext
    from megatron.core.inference.contexts.static_context import StaticInferenceContext

    _SUPPORTED_TYPES = {
        torch.Tensor,
        type(None),
        bool,
        int,
        str,
        float,
        dataclass,
        StaticInferenceContext,
        DynamicInferenceContext,
        ArgMetadata,
    }
    assert meta.type in _SUPPORTED_TYPES or is_dataclass(
        meta.value
    ), f"Cudagraphs received an arg of type {meta.type} which is not supported."


def _determine_if_first_last_layer_of_this_vp_chunk(base_module):
    """Determine if the given module is the first/last layer of the PP+VPP chunk it belongs to.
    Returns a tuple of two booleans indicating if the module is the first/last layer of the chunk.
    """

    # import modules here to avoid a circular import
    from megatron.core.transformer.transformer_block import get_num_layers_to_build
    from megatron.core.transformer.transformer_layer import get_transformer_layer_offset

    if not hasattr(base_module, "layer_number"):
        return True, True

    # find all first/last layers of this PP stage
    first_layer_numbers = []
    last_layer_numbers = []
    vp_size = base_module.config.virtual_pipeline_model_parallel_size or 1
    for i in range(vp_size):
        # layer numbers are 1-indexed
        layer_offset = get_transformer_layer_offset(base_module.config, vp_stage=i)
        num_layers_to_build = get_num_layers_to_build(base_module.config, vp_stage=i)
        if num_layers_to_build > 0:
            first_layer_numbers.append(layer_offset + 1)
            last_layer_numbers.append(layer_offset + num_layers_to_build)
    return (
        base_module.layer_number in first_layer_numbers,
        base_module.layer_number in last_layer_numbers,
    )


def _clone_nested_tensors(value: Any) -> Any:
    """Recursively clone tensors inside nested containers."""
    if torch.is_tensor(value):
        return value.clone()
    if isinstance(value, (tuple, list)):
        return type(value)(_clone_nested_tensors(v) for v in value)
    if isinstance(value, dict):
        return {k: _clone_nested_tensors(v) for k, v in value.items()}
    if isinstance(value, set):
        raise TypeError(
            "Sets of tensors are unsupported in cudagraph helpers; use list/tuple instead"
        )
    return value


def _ensure_generator_state_is_cudagraph_safe(gen: torch.Generator) -> torch.Generator:
    """Make generator state safe for CUDA graph capture/replay.

    Generator state tensors can become inference tensors if created under `torch.inference_mode()`.
    CUDA graph capture may later attempt in-place updates on that state; this fails for inference
    tensors. Fix the generator *in-place* (preserving identity) by cloning its state outside
    inference mode and setting it back.
    """
    with torch.inference_mode(mode=False):
        if hasattr(gen, "graphsafe_get_state"):
            state = gen.graphsafe_get_state()
        else:
            state = gen.get_state()

        cloned_state = _clone_nested_tensors(state)
        if hasattr(gen, "graphsafe_set_state"):
            gen.graphsafe_set_state(cloned_state)
        else:
            gen.set_state(cloned_state)

    return gen


fwd_buffer_reuse_ref_count = 0
bwd_buffer_reuse_ref_count = 0


class _CudagraphGlobalRecord:
    """A global datastructure that records of the ordering of all _CudaGraphRunner's
    first fwd or bwd passes. 'create_cudagraphs' will use this to create
    cudagraphs in execution order, which is required for cudagraphs sharing a mempool."""

    """A global flag that if true, all cudagraph runners
    fwd and bwd passes will be performed using their cudagraphed versions."""
    cudagraph_created = False

    """A record of fwd and bwd graph creation, populated with 'record_fwd_graph' and
    'record_bwd_graph."""
    cudagraph_record: list[tuple] = []
    cudagraph_inference_record: list[tuple] = []

    """A pool-like data structure to reuse input and output buffers across cudagraph."""
    tensor_reuse_pool = TensorReusePool()

    @classmethod
    def record_fwd_graph(cls, runner, args, kwargs, out):
        """Record a fwd graph to 'cudagraph_record"""
        cls.cudagraph_record.append((runner, "fwd", args, kwargs, out))

    @classmethod
    def record_bwd_graph(cls, runner):
        """Record a bwd graph to 'cudagraph_record"""
        cls.cudagraph_record.append((runner, "bwd"))

    @classmethod
    def create_cudagraphs(cls):
        """Iterate through 'cudagraph_record' creating graphs in the order in which
        they were recorded."""
        # Cudagraphs have already been created, check that no cudagraphed modules ran in eager mode
        if cls.cudagraph_created:
            assert len(cls.cudagraph_record) == 0, (
                "One or more _CudaGraphRunners requested to create a graph after cudagraphs",
                "were already created!",
            )
            return

        # No cudagraphs have been created or recorded, so do nothing
        if len(cls.cudagraph_record) == 0:
            return

        # Otherwise, create all the recorded cudagraphs.
        has_te_modules = False
        if HAVE_TE_GRAPHS:
            for g in cls.cudagraph_record:
                base_module = g[0].base_module
                has_te_modules = has_te_modules or any(
                    [isinstance(m, TransformerEngineBaseModule) for m in base_module.modules()]
                )

        progress_bar = enumerate(cls.cudagraph_record)
        time_start = time.time()
        mem_stats_start = torch.cuda.memory_stats()

        if torch.distributed.get_rank() == 0:
            if HAVE_TQDM:
                progress_bar = tqdm(
                    progress_bar, "create cuda graphs", total=len(cls.cudagraph_record)
                )

            logger.info(f"Creating {len(cls.cudagraph_record)} CUDA graphs")
            if not HAVE_TE_GRAPHS:
                logger.warning(
                    "Transformer Engine was not detected while capturing training cudagraphs."
                    "As a result cudagraph memory overhead may significantly increase as "
                    "Transformer Engine's weak reference feature is used on cudagraph input and "
                    "output buffers. This allows the memory of input and output buffers to be "
                    " reclaimed across graphs while remaining valid buffers for when the graph "
                    "is replayed. For more information see: "
                    "https://github.com/NVIDIA/TransformerEngine/blob/v2.10/transformer_engine/pytorch/utils.py#L759"  # pylint: disable=line-too-long
                )

        gc.collect()
        torch.cuda.empty_cache()

        _set_capture_start()
        if has_te_modules:
            te_set_capture_start()

        global bwd_buffer_reuse_ref_count, fwd_buffer_reuse_ref_count

        def format_mem_bytes(mem_bytes):
            for power, suffix in [(4, "tb"), (3, "gb"), (2, "mb"), (1, "kb"), (0, "bytes")]:
                suffix_bytes = 1024**power
                if mem_bytes >= suffix_bytes:
                    return "%.1f %s" % (mem_bytes / suffix_bytes, suffix)
            return "%d bytes" % mem_bytes

        for g_idx, g in progress_bar:
            if torch.distributed.get_rank() == 0:
                mem_stats = torch.cuda.memory_stats()
                progress_str = "create cuda graphs | mem: alloc %s, res %s" % (
                    format_mem_bytes(mem_stats["allocated_bytes.all.current"]),
                    format_mem_bytes(mem_stats["reserved_bytes.all.current"]),
                )
                if HAVE_TQDM:
                    progress_bar.set_description(progress_str)
                elif g_idx % 100 == 0 or g_idx == len(cls.cudagraph_record) - 1:
                    logger.info(f"{g_idx}/{len(cls.cudagraph_record)}. {progress_str}")

            runner, graph_type = g[0:2]
            if graph_type == 'fwd':
                args, kwargs, out = g[2:]
                runner.create_fwd_graph(args, kwargs, out, clone_inputs=True)
            else:
                assert fwd_buffer_reuse_ref_count == 0
                runner.create_bwd_graph()

        # Memory usage.
        time_end = time.time()
        mem_stats_end = torch.cuda.memory_stats()
        capture_stats = {
            "time": time_end - time_start,
            "allocated_bytes": (
                mem_stats_end["allocated_bytes.all.current"]
                - mem_stats_start["allocated_bytes.all.current"]
            ),
            "reserved_bytes": (
                mem_stats_end["reserved_bytes.all.current"]
                - mem_stats_start["reserved_bytes.all.current"]
            ),
        }

        log_single_rank(
            logger,
            logging.INFO,
            "> built %d cuda graph(s) in %.2f sec, with total memory usage: "
            "allocated %s, reserved %s."
            % (
                len(cls.cudagraph_record),
                capture_stats["time"],
                format_mem_bytes(capture_stats["allocated_bytes"]),
                format_mem_bytes(capture_stats["reserved_bytes"]),
            ),
        )

        # Mark cuda graphs as created.
        for g in cls.cudagraph_record:
            runner = g[0]
            runner.cudagraph_created = True

        # Reset global record.
        cls.cudagraph_created = True
        cls.cudagraph_record = []

        # Finished capturing.
        _set_capture_end()
        if has_te_modules:
            te_set_capture_end()

        torch.cuda.set_stream(torch.cuda.default_stream())

        # Return capture time and memory usage.
        return capture_stats


def create_cudagraphs():
    """Should be called at the end of each schedule function,
    (e.g. forward_backward_pipelining_with_interleaving) in
    `megatron.core.pipeline_parallel.schedules.py`. During the first step, _CudaGraphRunners
    populate _CudagraphGlobalRecord with the global order in which cudagraphs should be created.
    At the end for the first step, this function calls each runner's `create_fwd_graph` and
    `create_bwd_graph` in the order recorded in _CudagraphGlobalRecord, which allows cudagraphs
    to be created in execution order, which allows multiple cudagraphs to share a single
    memory pool, minimizing cudagraph memory usage."""

    return _CudagraphGlobalRecord.create_cudagraphs()


def delete_cuda_graphs():
    """Delete all CUDA graphs."""

    # Reset runners.
    for record in [
        *_CudagraphGlobalRecord.cudagraph_record,
        *_CudagraphGlobalRecord.cudagraph_inference_record,
    ]:
        runner = record[0]
        assert isinstance(runner, _CudaGraphRunner)

        runner.cudagraph_created = False
        runner.fwd_graph_recorded = False
        runner.bwd_graph_recorded = False
        runner.fwd_graph = None
        runner.bwd_graph = None
        runner.mempool = None

    # Reset global tracking state
    _CudagraphGlobalRecord.cudagraph_created = False
    _CudagraphGlobalRecord.cudagraph_record = []
    _CudagraphGlobalRecord.cudagraph_inference_record = []

    # TODO: Optional?: Force garbage collection to clean up memory
    gc.collect()
    torch.cuda.empty_cache()

    CudaGraphManager.global_mempool = None


class _GraphStatus(Enum):
    """An Enum to track if a cudagraph is ready to perform a forward or backward pass."""

    FWD_READY = 0  # Set immediately after a bwd pass
    BWD_READY = 1  # Set immediately after a fwd pass


class _CudagraphRecordNode(torch.autograd.Function):
    """Inserts a noop node into the autograd graph, used to record when a bwd graph needs
    to be created."""

    @staticmethod
    def forward(ctx, runner, inputs):
        """Forward pass, does nothing but registers an autograd node."""

        assert (
            runner.status == _GraphStatus.FWD_READY
        ), "Tried calling the fwd cudagraph when the bwd cudagraph was expected to be called next!"

        ctx.runner = runner
        return inputs

    @staticmethod
    def backward(ctx, grads):
        """If this is the first bwd pass of this runner, record that a
        bwd graph needs to be created."""

        runner = ctx.runner
        assert (
            runner.status == _GraphStatus.BWD_READY
        ), "Tried calling the bwd cudagraph when the fwd cudagraph was expected to be called next!"
        runner.status = _GraphStatus.FWD_READY
        if not runner.bwd_graph_recorded:
            _CudagraphGlobalRecord.record_bwd_graph(runner)
            runner.bwd_graph_recorded = True

        return None, grads


class _CudagraphReplayNode(torch.autograd.Function):
    """Replays the runner's cudagraphs with autograd. Handles copying data into/out of the
    cudagraph io and fp8/fp4 if used."""

    @staticmethod
    def forward(ctx, runner, is_first_microbatch, *inputs):
        """Replay the forward graph of the passed runner."""

        assert (
            runner.fwd_graph is not None
        ), "Tried replaying fwd cudagraph before calling 'create_fwd_cudagraph!"
        assert (
            runner.status == _GraphStatus.FWD_READY
        ), "Tried calling the fwd cudagraph when the bwd cudagraph was expected to be called next!"
        assert len(inputs) == len(
            runner.fwd_graph_input_surface
        ), "Fwd cudagraph received a different number of tensors than what it was graphed with!"

        # Copy new data into fwd graph input buffer
        need_copy_inputs = []
        for user_input, cudagraph_input in zip(inputs, runner.fwd_graph_input_surface):
            if (
                hasattr(cudagraph_input, "can_skip_replay_copy")
                and cudagraph_input.can_skip_replay_copy
            ):
                need_copy_inputs.append(user_input)
                assert user_input.data_ptr() == cudagraph_input.data_ptr()
            else:
                if user_input.data_ptr() != cudagraph_input.data_ptr():
                    cudagraph_input.copy_(user_input)

        ctx.runner = runner
        ctx.save_for_backward(*need_copy_inputs)

        if runner.fp8_enabled or runner.fp4_enabled:
            if isinstance(FP8GlobalStateManager.get_fp8_recipe(), te.common.recipe.DelayedScaling):
                for m in runner.base_module.modules():
                    if isinstance(m, TransformerEngineBaseModule):
                        m.fp8_meta["fp8_group"] = FP8GlobalStateManager.get_fp8_group()
                        m.fp8_meta["recipe"] = FP8GlobalStateManager.get_fp8_recipe()

                        if is_te_min_version("1.13.0"):
                            FP8GlobalStateManager.add_fp8_tensors_to_global_buffer(m.fp8_meta)
                        else:
                            FP8GlobalStateManager.add_fp8_tensors_to_global_buffer(
                                m.fp8_meta, fp8_weights=m._get_fp8_params()
                            )

            # Note that FP8GlobalStateManager.is_first_fp8_module() is inacccurate as each
            # layer may be in its own fp8 context, when the fp8 recipe != delayed_scaling
            if runner.is_first_layer and (runner.fp8_param_cache_updated != is_first_microbatch):
                FP8GlobalStateManager.set_skip_fp8_weight_update_tensor(not is_first_microbatch)
                runner.fp8_param_cache_updated = is_first_microbatch

        runner.fwd_graph.replay()
        return runner.fwd_graph_output_surface

    @staticmethod
    def backward(ctx, *grads):
        """Replay the backward graph of the passed runner."""

        runner = ctx.runner
        assert (
            runner.bwd_graph is not None
        ), "Tried replaying bwd cudagraph before calling 'create_bwd_cudagraph'!"
        assert (
            runner.status == _GraphStatus.BWD_READY
        ), "Tried calling the bwd cudagraph when the fwd cudagraph was expected to be called next!"
        assert len(grads) == len(
            runner.static_grad_outputs
        ), "Bwd cudagraph received a different number of tensors than what it was graphed with!"

        need_copy_inputs = list(ctx.saved_tensors)
        for cudagraph_input in runner.fwd_graph_input_surface:
            if (
                hasattr(cudagraph_input, "can_skip_replay_copy")
                and cudagraph_input.can_skip_replay_copy
            ):
                cudagraph_input.copy_(need_copy_inputs.pop(0))

        # Copy new data into bwd graph input buffer
        for user_output_grad, cudagraph_output_grad in zip(grads, runner.static_grad_outputs):
            if cudagraph_output_grad is None:
                continue
            if user_output_grad.data_ptr() != cudagraph_output_grad.data_ptr():
                cudagraph_output_grad.copy_(user_output_grad)

        runner.bwd_graph.replay()
        runner.status = _GraphStatus.FWD_READY

        # Update FP8 scale factors if needed
        if runner.fp8_enabled and isinstance(
            FP8GlobalStateManager.get_fp8_recipe(), te.common.recipe.DelayedScaling
        ):
            FP8GlobalStateManager.reduce_and_update_fp8_tensors(forward=False)

        # If using gradient_accumulation_fusion, whenever `main_grad` is calculated
        # the `grad_added_to_main_grad` attribute is expected to set. However when using
        # cudagraphs this doesn't occur so we emulate this behavior here.
        for param, grad_added in runner.groundtruth_grad_added_to_main_grad.items():
            param.grad_added_to_main_grad = grad_added

        # Replaying the next bwd graph destroys the data held in static_grad_inputs, so clone
        # wgrads as autograd may launch the next graph before wgrads are accumulated
        dgrads = runner.static_grad_inputs[: runner.num_dgrads]
        wgrads = (g.clone() for g in runner.static_grad_inputs[runner.num_dgrads :])

        return None, None, *dgrads, *wgrads


class _CudaGraphRunner(torch.nn.Module):
    """Represents the execution of a cudagraphed module for a single microbatch.
    If there are multiple outstanding microbatches per module, such as for pipeline parallelism,
    CudaGraphManager automatically creates multiple _CudaGraphRunners per module."""

    def __init__(
        self,
        base_module: MegatronModule,
        mempool: int,
        fwd_graph_input_args: List[Any],
        fwd_graph_input_kwargs: Dict[str, Any],
        func,
        need_backward,
    ):
        """Creates a _CudaGraphRunner, which holds a single pair of fwd and bwd cudagraphs, which
        are not created until this runner records its graph creation into
        '_CudagraphGlobalRecord', and 'create_cudagraphs()' is called."""

        super().__init__()

        self.base_module = base_module
        self.mempool = mempool

        self.fwd_graph_input_arg_metas = [ArgMetadata(a) for a in fwd_graph_input_args]
        self.fwd_graph_input_kwarg_metas = {
            k: ArgMetadata(a) for k, a in fwd_graph_input_kwargs.items()
        }

        self.fwd_graph = None
        self.bwd_graph = None

        self.fwd_graph_recorded = False
        self.bwd_graph_recorded = False
        self.cudagraph_created = False
        self.status = _GraphStatus.FWD_READY

        self.fuse_wgrad_accumulation = False
        self.backward_retain_grad = False
        self.fp8_enabled = False
        self.fp4_enabled = False
        self.deallocate_pipeline_outputs = False

        self.grad_enabled = need_backward and torch.is_grad_enabled()
        self.func = super(MegatronModule, self.base_module).__call__ if func is None else func
        self.is_first_layer, self.is_last_layer = _determine_if_first_last_layer_of_this_vp_chunk(
            base_module
        )

        # We use this attribute to record the value of 'is_first_microbatch' each fwd cudagraph
        # replay so that way we only update the value of this flag in FP8GlobalStateManager when
        # it changes which incurs an HtoD sync
        if self.is_first_layer:
            self.fp8_param_cache_updated = None

        if hasattr(self.base_module, "config") and isinstance(
            self.base_module.config, TransformerConfig
        ):
            self.fuse_wgrad_accumulation = self.base_module.config.gradient_accumulation_fusion
            self.backward_retain_grad = self.base_module.config.cuda_graph_retain_backward_graph
            self.deallocate_pipeline_outputs = self.base_module.config.deallocate_pipeline_outputs
            self.num_warmup_steps = self.base_module.config.cuda_graph_warmup_steps
            self.fp8_enabled = self.base_module.config.fp8 is not None
            self.fp4_enabled = self.base_module.config.fp4 is not None
            self.fp8_runtime_enabled = None
            self.fp4_runtime_enabled = None

            if self.fp8_enabled:
                self.fp8_recipe = FP8GlobalStateManager.get_fp8_recipe()
                FP8GlobalStateManager.set_skip_fp8_weight_update_tensor(False)

            if self.fp4_enabled:
                from megatron.core.fp4_utils import get_fp4_recipe  # to avoid circular import

                self.fp4_recipe = get_fp4_recipe(self.base_module.config)
                FP8GlobalStateManager.set_skip_fp8_weight_update_tensor(False)

    def __str__(self):
        return "%s; hid %s" % (
            self.base_module.__class__.__name__,
            tuple(self.fwd_graph_input_kwarg_metas["hidden_states"].shape),
        )

    def get_quantization_context(self):
        """Return appropriate quantization context (FP8 or FP4) in cudagraph mode."""
        if self.fp8_runtime_enabled:
            from megatron.core.fp8_utils import get_fp8_context  # to avoid circular import

            return get_fp8_context(self.base_module.config, self.base_module.layer_number - 1)
        elif self.fp4_runtime_enabled:
            from megatron.core.fp4_utils import get_fp4_context  # to avoid circular import

            return get_fp4_context(self.base_module.config, self.base_module.layer_number - 1)
        else:
            return nullcontext()

    def get_connected_params(self, outputs):
        """Iterate through the autograd graph of 'outputs' and returns all parameters connected.
        In theory this should return all parameters that return a nonzero wgrad when computing
        the backward pass of 'outputs'."""
        # Flatten outputs and start traversal from roots that require gradients
        args = (outputs,) if torch.is_tensor(outputs) else outputs
        stack = [
            t.grad_fn
            for t in self.get_tensors(args, check_types=False)
            if t.requires_grad and t.grad_fn
        ]
        visited, p_ids = set(), set()

        while stack:
            if (fn := stack.pop()) not in visited:
                visited.add(fn)
                # AccumulateGrad nodes (leafs) hold the 'variable' (Parameter) they accumulate into
                if hasattr(fn, 'variable'):
                    p_ids.add(id(fn.variable))
                stack.extend(f for f, _ in fn.next_functions if f)

        # Return module params that were found in the graph, preserving original order
        return tuple(p for p in self.base_module.parameters() if id(p) in p_ids)

    def create_fwd_graph(self, args, kwargs, outputs=None, clone_inputs=True):
        """Create a fwd cudagraph for this runner. Should be called inside
        'create_cudagraphs()'."""

        global fwd_buffer_reuse_ref_count

        self.args = args
        self.kwargs = kwargs
        self.outputs = outputs

        # save grads and other variables that may be affected by graph warmup
        if self.training and torch.is_grad_enabled():
            grad_backup = []
            for param in self.base_module.parameters():
                grad_backup.append(param.main_grad.clone() if hasattr(param, "main_grad") else None)

            saved_fp8_tensors = None
            if self.fp8_enabled:
                if is_te_min_version("1.13.0"):
                    saved_fp8_tensors = save_fp8_tensors([self.base_module], self.fp8_recipe)
                else:
                    saved_fp8_tensors = save_fp8_tensors(
                        [self.base_module], self.fp8_recipe.amax_history_len
                    )
            elif self.fp4_enabled:
                if is_te_min_version("2.7.0.dev0"):
                    saved_fp8_tensors = save_fp8_tensors([self.base_module], self.fp4_recipe)
                else:
                    raise ValueError("FP4 requires TE >= 2.7.0.dev0 for NVFP4BlockScaling support.")

        # cache the moe aux loss if needed, which is accumulated inside the forward pass
        from megatron.core.transformer.transformer_layer import MoETransformerLayer

        is_moe = isinstance(self.base_module, MoETransformerLayer)
        if is_moe:
            from megatron.core.transformer.moe.moe_utils import get_moe_layer_wise_logging_tracker

            tracker = get_moe_layer_wise_logging_tracker()
            cached_aux_losses = {}
            for name in tracker:
                if "values" in tracker[name]:
                    cached_aux_losses[name] = torch.clone(tracker[name]["values"])

        self.fwd_graph = torch.cuda.CUDAGraph()

        # For cases with multiple active RNG states, e.g. TP.
        rng_states = get_all_rng_states()
        with torch.inference_mode(mode=False):
            for gen in rng_states.values():
                self.fwd_graph.register_generator_state(
                    _ensure_generator_state_is_cudagraph_safe(gen)
                )

        def _resolve_input_buffer(ten):
            if not isinstance(ten, ArgMetadata):
                return ten

            # the input tensor is resued from another cudagraph's input or output
            if (
                hasattr(ten, "cg_buffer_metadata")
                and ten.cg_buffer_metadata.fwd_cudagraph_buffer is not None
            ):
                global fwd_buffer_reuse_ref_count
                buf = ten.cg_buffer_metadata.fwd_cudagraph_buffer

                assert (
                    ten.cg_buffer_metadata.is_cudagraph_input
                    and buf.cg_buffer_metadata.capture_reuse_count > 0
                )

                if (
                    ten.cg_buffer_metadata.input_use_count > 1
                    and ten.cg_buffer_metadata.input_use_count
                    == buf.cg_buffer_metadata.capture_reuse_count
                ):
                    can_skip_replay_copy = False
                else:
                    can_skip_replay_copy = True

                buf.cg_buffer_metadata.capture_reuse_count -= 1
                if buf.cg_buffer_metadata.capture_reuse_count == 0:
                    ten.cg_buffer_metadata.fwd_cudagraph_buffer = None
                    fwd_buffer_reuse_ref_count -= 1
            else:
                # need to provide a fresh buffer from the reuse pool
                buf = _CudagraphGlobalRecord.tensor_reuse_pool.get(ten)
                can_skip_replay_copy = False

            buf = buf.detach().requires_grad_(ten.requires_grad)
            buf.can_skip_replay_copy = can_skip_replay_copy
            return buf

        if clone_inputs:
            # if a buffer is used for multiple inputs, create it now
            for ten in self.get_tensors(args, kwargs):
                if (
                    hasattr(ten, 'cg_buffer_metadata')
                    and ten.cg_buffer_metadata.input_use_count > 1
                    and ten.cg_buffer_metadata.fwd_cudagraph_buffer is None
                ):
                    buf = _CudagraphGlobalRecord.tensor_reuse_pool.get(ten)
                    buf.cg_buffer_metadata = deepcopy(ten.cg_buffer_metadata)
                    buf.cg_buffer_metadata.capture_reuse_count = (
                        ten.cg_buffer_metadata.input_use_count
                    )
                    ten.cg_buffer_metadata.fwd_cudagraph_buffer = buf
                    fwd_buffer_reuse_ref_count += 1

            self.fwd_graph_input_args = tree_map(_resolve_input_buffer, args)
            self.fwd_graph_input_kwargs = tree_map(_resolve_input_buffer, kwargs)
        else:
            self.fwd_graph_input_args, self.fwd_graph_input_kwargs = args, kwargs

        self.fwd_graph_input_surface = self.get_tensors(
            self.fwd_graph_input_args, self.fwd_graph_input_kwargs
        )

        ctx = torch.no_grad() if not self.grad_enabled else nullcontext()
        with ctx:
            # warmup again as case graph capture mode may execute a different codepath
            _set_warmup_start()
            for _ in range(self.num_warmup_steps):
                with self.get_quantization_context():

                    def clone_ten(ten):
                        if not torch.is_tensor(ten):
                            return ten
                        return torch.zeros_like(ten).requires_grad_(ten.requires_grad)

                    warmup_args = tree_map(clone_ten, self.fwd_graph_input_args)
                    warmup_kwargs = tree_map(clone_ten, self.fwd_graph_input_kwargs)
                    warmup_outputs = self.func(*warmup_args, **warmup_kwargs)

                if self.grad_enabled:
                    warmup_outputs = self.get_tensors(warmup_outputs)
                    warmup_outputs = tuple(o for o in warmup_outputs if o.requires_grad)
                    input_tensors = self.get_tensors(warmup_args, warmup_kwargs)
                    torch.autograd.grad(
                        outputs=warmup_outputs,
                        inputs=tuple(i for i in input_tensors if i.requires_grad),
                        grad_outputs=tuple(torch.zeros_like(o) for o in warmup_outputs),
                        only_inputs=True,
                        allow_unused=True,
                    )

            _set_warmup_end()

            with self.get_quantization_context():
                torch.cuda.synchronize()
                # Register default CUDA generators ourselves (fixed in-place to have normal tensors)
                # before capture begins, to avoid inference-tensor state issues during capture.
                with torch.inference_mode(mode=False):
                    for device_idx in range(torch.cuda.device_count()):
                        default_gen = torch.cuda.default_generators[device_idx]
                        self.fwd_graph.register_generator_state(
                            _ensure_generator_state_is_cudagraph_safe(default_gen)
                        )

                # Freeze GC, to speed up capture time ~15-20x.
                if FREEZE_GC:
                    gc.freeze()

                with torch.cuda.graph(
                    self.fwd_graph, pool=self.mempool, capture_error_mode="thread_local"
                ):
                    fwd_graph_outputs = self.func(
                        *self.fwd_graph_input_args, **self.fwd_graph_input_kwargs
                    )

                # Unfreeze GC.
                if FREEZE_GC:
                    gc.unfreeze()

                    # gc.collect() drops references to unreachable tensors created during capture,
                    # returning their storage to the allocator to avoid a slowdown during replay.
                    # However, it forces expensive global garbage collection, so must be done
                    # only on the last layer per-device to avoid slowing down graph creation.
                    if self.is_last_layer:
                        gc.collect()

        # save cudagraph output buffer
        self.fwd_graph_outputs = fwd_graph_outputs
        self.fwd_graph_output_surface = self.get_tensors(fwd_graph_outputs)

        for fwd_graph_out, o in zip(
            self.fwd_graph_output_surface, self.get_arg_metas(self.outputs)
        ):
            assert hasattr(o, "cg_buffer_metadata") and o.cg_buffer_metadata.is_cudagraph_output

            if (
                o.cg_buffer_metadata.is_cudagraph_input
                and o.cg_buffer_metadata.fwd_cudagraph_buffer is None
            ):
                fwd_graph_out.cg_buffer_metadata = deepcopy(o.cg_buffer_metadata)
                fwd_graph_out.cg_buffer_metadata.capture_reuse_count = (
                    o.cg_buffer_metadata.cudagraph_reuse_ref_count
                )
                o.cg_buffer_metadata.fwd_cudagraph_buffer = fwd_graph_out
                fwd_buffer_reuse_ref_count += 1

        # if an input buffer requires a copy, and does not have metadata attached to it at this
        # point, it will not be reused after this forward pass, so return it to the pool
        for buf in self.fwd_graph_input_surface:
            if (
                hasattr(buf, "can_skip_replay_copy")
                and not buf.can_skip_replay_copy
                and not hasattr(buf, "cg_buffer_metadata")
            ):
                assert _CudagraphGlobalRecord.tensor_reuse_pool.owns(buf)
                _CudagraphGlobalRecord.tensor_reuse_pool.insert(buf)

        if self.training and torch.is_grad_enabled():
            assert (
                len(self.fwd_graph_output_surface) > 0
            ), """Tried graphing a module that returned no tensors in training mode, 
                however the graphed module must output at least one tensor, 
                so that a corresponding backward node may be registered in the autograd graph."""

            self.params_to_backprop = self.get_connected_params(fwd_graph_outputs)
            self.num_wgrads = len(self.params_to_backprop)
            self.num_dgrads = len(self.fwd_graph_input_surface)
            self.fwd_graph_input_surface = self.fwd_graph_input_surface + self.params_to_backprop

            if self.fp8_enabled:
                restore_fp8_tensors([self.base_module], saved_fp8_tensors)
            # restore cached grads
            for main_grad_copy, param in zip(grad_backup, self.base_module.parameters()):
                if main_grad_copy is not None:
                    param.main_grad.copy_(main_grad_copy)

        if is_moe:
            for name in tracker:
                tracker[name]["values"].copy_(cached_aux_losses[name])

    def create_bwd_graph(self):
        """Create a bwd cudagraph for this runner. Should be called inside
        'create_cudagraphs()'."""

        # unlike 'fwd_buffer_reuse_ref_count', 'bwd_buffer_reuse_ref_count' may not decrement
        # to 0 when activation checkpointing is used. See [interaction with recompute].
        global bwd_buffer_reuse_ref_count

        assert self.grad_enabled
        self.bwd_graph = torch.cuda.CUDAGraph()

        # For cases with multiple active RNG states, e.g. TP.
        for _, state in get_all_rng_states().items():
            self.bwd_graph.register_generator_state(state)

        self.static_grad_outputs = []
        for o in self.get_arg_metas(self.outputs):
            out_grad = None
            if o.requires_grad:
                # TODO: (jiemingz) [interaction with recompute]
                # for activation recompute, the fwd pass is rerun in the backward pass and
                # the metadata we attach in record_graph_capture is lost. As a result the next
                # cudagraph expects the buffer to be provided 'fwd_cudagraph_buffer' but is missing.
                # So, we cannot always assume this metadata exists. Consequently, there are extra
                # copies between the outputs of the fwd-bwd pass and the bwd pass.
                if (
                    o.cg_buffer_metadata.is_cudagraph_input
                    and o.cg_buffer_metadata.bwd_cudagraph_buffer is not None
                ):
                    o.cg_buffer_metadata.bwd_cudagraph_buffer.shape == o.shape

                    out_grad = o.cg_buffer_metadata.bwd_cudagraph_buffer
                    o.cg_buffer_metadata.bwd_cudagraph_buffer = None
                    out_grad.cg_buffer_metadata.capture_reuse_count -= 1
                    bwd_buffer_reuse_ref_count -= 1
                else:
                    out_grad = _CudagraphGlobalRecord.tensor_reuse_pool.get(o)
                out_grad.requires_grad = True
            self.static_grad_outputs.append(out_grad)

        # Freeze GC, to speed up capture time ~15-20x.
        if FREEZE_GC:
            gc.freeze()

        with torch.cuda.graph(self.bwd_graph, pool=self.mempool):
            grad_inputs = torch.autograd.grad(
                outputs=tuple(o for o in self.fwd_graph_output_surface if o.requires_grad),
                inputs=tuple(i for i in self.fwd_graph_input_surface if i.requires_grad),
                grad_outputs=tuple(o for o in self.static_grad_outputs if o is not None),
                retain_graph=self.backward_retain_grad,
                only_inputs=True,
                allow_unused=True,
            )

        # Unfreeze GC.
        if FREEZE_GC:
            gc.unfreeze()

        # Constructs a tuple suitable for returning from Graphed.backward:
        # Pads out the actually-needed grads with Nones in gradient slots for inputs
        # that don't require grad
        grad_inputs = list(grad_inputs)
        self.static_grad_inputs = []
        for input_tensor in self.get_arg_metas(self.args, self.kwargs):
            if input_tensor.requires_grad:
                input_grad = grad_inputs.pop(0)
                input_grad.cg_buffer_metadata = deepcopy(input_tensor.cg_buffer_metadata)
                if input_tensor.cg_buffer_metadata.is_cudagraph_output:
                    if input_tensor.cg_buffer_metadata.bwd_cudagraph_buffer is None:
                        input_tensor.cg_buffer_metadata.bwd_cudagraph_buffer = input_grad
                        input_grad.cg_buffer_metadata.capture_reuse_count += 1
                        bwd_buffer_reuse_ref_count += 1
                self.static_grad_inputs.append(input_grad)
            else:
                self.static_grad_inputs.append(None)

        # at this point static_grad_inputs hold the input dgrads, add the wgrads next
        assert self.num_wgrads == len(grad_inputs)
        self.static_grad_inputs.extend(grad_inputs)
        self.static_grad_inputs = tuple(self.static_grad_inputs)
        self.static_grad_outputs = tuple(self.static_grad_outputs)

        self.groundtruth_grad_added_to_main_grad = {}
        if self.fuse_wgrad_accumulation:
            for param in self.params_to_backprop:
                if hasattr(param, "grad_added_to_main_grad"):
                    self.groundtruth_grad_added_to_main_grad[param] = param.grad_added_to_main_grad

        # After backward pass grad_output buffers are no longer used and returned to the pool
        for ten in self.static_grad_outputs:
            if torch.is_tensor(ten):
                # Check that the tensor is not in use. This scenario may occur when a cudagraph
                # passes its input directly as an output, and places this output as the
                # input of a subsequent cudgraph, leading to a grad output buffer to be still in use
                # even after the backward pass.
                reuse_count = (
                    ten.cg_buffer_metadata.capture_reuse_count
                    if hasattr(ten, "cg_buffer_metadata")
                    else 0
                )

                if _CudagraphGlobalRecord.tensor_reuse_pool.owns(ten) and reuse_count == 0:
                    _CudagraphGlobalRecord.tensor_reuse_pool.insert(ten)

        # now weakref everything
        if HAVE_TE_GRAPHS:

            def replace_with_weak_ref(arg):
                if not torch.is_tensor(arg):
                    return arg

                try:
                    ref = make_weak_ref(arg)
                except RuntimeError:
                    # Fallback to keeping a strong reference. There is a known bug where some
                    # dtypes (e.g. torch.float64) are not mapped to a representation in
                    # transformer_engine/pytorch/utils.py.
                    if torch.distributed.get_rank() == 0:
                        logger.warning(
                            f"Could not create weak ref for tensor with dtype {arg.dtype}; "
                            f"keeping strong ref with a potential memory overhead."
                        )
                    return arg
                ref.requires_grad = arg.requires_grad
                if hasattr(arg, "can_skip_replay_copy"):
                    ref.can_skip_replay_copy = arg.can_skip_replay_copy
                return ref

            self.fwd_graph_input_surface = tree_map(
                replace_with_weak_ref, self.fwd_graph_input_surface
            )
            self.fwd_graph_input_args = tree_map(replace_with_weak_ref, self.fwd_graph_input_args)
            self.fwd_graph_input_kwargs = tree_map(
                replace_with_weak_ref, self.fwd_graph_input_kwargs
            )
            self.fwd_graph_output_surface = tree_map(
                replace_with_weak_ref, self.fwd_graph_output_surface
            )
            # It is safe to weakref static_grad_inputs as any inuse input grads have a strong ref
            # stored in 'bwd_cudagraph_buffer'
            self.static_grad_inputs = tree_map(replace_with_weak_ref, self.static_grad_inputs)
            self.static_grad_outputs = tree_map(replace_with_weak_ref, self.static_grad_outputs)

        delattr(self, "args")
        delattr(self, "kwargs")
        delattr(self, "outputs")

    def apply_cudagraph_record_metadata(self, args, kwargs, outputs):
        """Attaches graph capture metadata to all passed in tensors."""

        for t in self.get_tensors(args, kwargs):
            if not hasattr(t, "cg_buffer_metadata"):
                t.cg_buffer_metadata = CudagraphBufferMetadata()

            t.cg_buffer_metadata.is_cudagraph_input = True
            t.cg_buffer_metadata.input_use_count += 1

            if t.cg_buffer_metadata.is_cudagraph_output:
                t.cg_buffer_metadata.cudagraph_reuse_ref_count += 1

        # mark all outputs, so that the fwd graph we may reuse cudagraph output buffers as inputs
        for o in self.get_tensors(outputs):
            o.cg_buffer_metadata = CudagraphBufferMetadata()
            o.cg_buffer_metadata.is_cudagraph_output = True

    def record_graph_capture(self, args, kwargs):
        """Records the data needed to create this runner's forward cudagraph.
        The first pass records a graph and appends the runner to _CudagraphGlobalRecord.
        The actual cudagraph will be created when 'create_cudagraphs()` is called. Subsequent
        passes should replay the graph."""

        # Run the forward pass as normal in eager mode.
        out = self.func(*args, **kwargs)

        if type(out) != tuple:
            out = (out,)

        # Register a noop autograd node that toggles `self.graph_status` in the bwd pass, which
        # tracks when the runner completes its bwd pass.
        # If it's the first bwd encountered by this runner, record it to _CudagraphGlobalRecord
        # We record the noop autograd node to the first output tensor. This is sufficient for
        # TransformerLayer and MambaLayer as their output is just the hidden_states.
        out = tuple(
            [
                _CudagraphRecordNode.apply(self, o) if torch.is_tensor(o) and i == 0 else o
                for i, o in enumerate(out)
            ]
        )

        if not self.fwd_graph_recorded:
            logger.debug(f"Recording forward graph creation...")

            self.apply_cudagraph_record_metadata(args, kwargs, out)

            def _replace_with_meta(arg):
                return ArgMetadata(arg) if torch.is_tensor(arg) else arg

            m_args = tree_map(_replace_with_meta, args)
            m_kwargs = tree_map(_replace_with_meta, kwargs)
            m_out = tree_map(_replace_with_meta, out)
            _CudagraphGlobalRecord.record_fwd_graph(self, m_args, m_kwargs, m_out)

            if HAVE_TE_GRAPHS:
                if FP8GlobalStateManager.is_fp8_enabled():
                    # check if the low precision recipe is either fp4 or fp8
                    if is_te_min_version("2.7.0.dev0"):
                        from transformer_engine.common.recipe import NVFP4BlockScaling

                        recipe = FP8GlobalStateManager.get_fp8_recipe()
                        if isinstance(recipe, NVFP4BlockScaling):
                            self.fp4_runtime_enabled = True
                        else:
                            self.fp8_runtime_enabled = True
                    else:
                        self.fp8_runtime_enabled = True

            self.fwd_graph_recorded = True

        if len(out) == 1:
            return out[0]
        return tuple(out)

    def replay_graph_capture(self, is_first_microbatch, args, kwargs):
        """Replay the fwd cuda graph with autograd."""

        # Arguments passed to a cudagraph for replay must match the args in the captured graph.
        #  Tensor arguments need to have the same shape, dtype, and device location.
        #  All other arguments must have the exact same memory addresses for graph safety.
        mismatch_errors = self.get_mismatch_errors(args, kwargs)
        if mismatch_errors:
            error_msg = "CUDA graph argument mismatch:\n" + "\n".join(mismatch_errors)
            raise AssertionError(error_msg)

        inp_tensors = self.get_tensors(args, kwargs, check_types=False)
        if self.grad_enabled:
            func_args = inp_tensors + self.params_to_backprop
        else:
            func_args = inp_tensors

        out = _CudagraphReplayNode.apply(self, is_first_microbatch, *func_args)

        out_iter = iter(self.to_list(out))
        fwd_outputs = self.to_list(self.fwd_graph_outputs)
        return tuple(next(out_iter) if torch.is_tensor(o) else o for o in fwd_outputs)

    def get_mismatch_errors(self, args, kwargs):
        """Return list of detailed errors for mismatched cudagraph args."""
        errors = []

        def add_error(msg):
            errors.append(f"  - {msg}")

        def check(val, ref, context):

            assert isinstance(val, ArgMetadata)
            assert isinstance(ref, ArgMetadata)

            _check_supported_type(val)
            _check_supported_type(ref)

            if val.type != ref.type and not (is_dataclass(val.value) and is_dataclass(ref.value)):
                add_error(
                    f"Type mismatch at {context}: Received {val.type} but expected {ref.type}"
                )
                return False

            if ref.type == torch.Tensor or issubclass(ref.type, torch.Tensor):
                mismatches = []
                if val.shape != ref.shape:
                    mismatches.append(f"Received shape {val.shape} but expected {ref.shape}")
                if val.dtype != ref.dtype:
                    mismatches.append(f"Received dtype {val.dtype} but expected {ref.dtype}")
                if val.device != ref.device:
                    mismatches.append(f"Received device {val.device} but expected {ref.device}")
                if mismatches:
                    add_error(f"Tensor mismatch at {context}: {', '.join(mismatches)}")

            elif is_dataclass(ref.value):
                for field in dataclasses.fields(ref.value):
                    check(
                        ArgMetadata(getattr(val.value, field.name)),
                        ArgMetadata(getattr(ref.value, field.name)),
                        f"{context}.{field.name}",
                    )
            elif val.value != ref.value:
                add_error(f"Value mismatch at {context}: {val.value} vs {ref.value}")

        # Check positional arguments
        if len(args) != len(self.fwd_graph_input_arg_metas):
            add_error(
                f"Argument count mismatch: {len(args)} vs {len(self.fwd_graph_input_arg_metas)}"
            )
        else:
            for i, (arg, graph_arg_meta) in enumerate(zip(args, self.fwd_graph_input_arg_metas)):
                check(ArgMetadata(arg), graph_arg_meta, f"args[{i}]")

        # Check keyword arguments
        kwargs_keys = set(kwargs.keys())
        graph_keys = set(self.fwd_graph_input_kwarg_metas.keys())

        if missing_keys := graph_keys - kwargs_keys:
            add_error(f"Missing kwargs: {missing_keys}")
        if extra_keys := kwargs_keys - graph_keys:
            add_error(f"Unexpected kwargs: {extra_keys}")

        for k in kwargs_keys & graph_keys:
            check(ArgMetadata(kwargs[k]), self.fwd_graph_input_kwarg_metas[k], f"kwargs['{k}']")

        return errors

    def get_arg_metas(self, args, kwargs=None):
        """Replaces all passed in tensors with 'ArgMetadata' and returns them as a list."""
        arg_metas = []

        def collect(item):
            if isinstance(item, ArgMetadata):
                arg_metas.append(item)
            return item  # tree_map expects a return value to rebuild the tree

        tree_map(collect, args)
        if kwargs is not None:
            tree_map(collect, kwargs)

        return arg_metas

    def get_tensors(self, args, kwargs=None, check_types=True):
        """
        Filter and flatten all tensors from args and kwargs using list comprehensions
        and itertools.chain for faster flattening.
        """

        def extract_tensors(arg):
            if check_types:
                _check_supported_type(ArgMetadata(arg))
            if torch.is_tensor(arg):
                return [arg]

            if is_dataclass(arg):
                return [
                    attr
                    for field in dataclasses.fields(arg)
                    if torch.is_tensor(attr := getattr(arg, field.name))
                ]

            return []

        if torch.is_tensor(args):
            return (args,)

        args_tens = [tensor for arg in args for tensor in extract_tensors(arg)] if args else []
        kwargs_tens = (
            [tensor for val in kwargs.values() for tensor in extract_tensors(val)] if kwargs else []
        )

        return tuple(chain(args_tens, kwargs_tens))

    def to_list(self, x):
        """Helper function to wrap an input into a list"""
        return [x] if torch.is_tensor(x) else list(x)


class CudaGraphManager(torch.nn.Module):
    """Creates and runs cudagraphs for a megatron module"""

    """A global mempool for when 'cuda_graph_use_single_mempool' is used."""
    global_mempool = None

    def __init__(
        self, config: TransformerConfig, base_module=None, function_name=None, need_backward=True
    ):
        super().__init__()
        """Creates a CudaGraphManager to manage CUDA graphs for a Megatron module.

        Args:
            config: TransformerConfig object containing CUDA graph settings for memory
                pooling, graph retention, gradient accumulation, FP8/FP4, and warmup steps.
        """
        rng_tracker = get_cuda_rng_tracker()
        self.need_backward = need_backward

        if function_name is not None:
            func = getattr(base_module, function_name)

            def wrapped_func(*args, **kwargs):
                out = self(base_module, args, kwargs)
                return out

            setattr(base_module, function_name, wrapped_func)
        else:
            func = None
        self.func = func

        # need to delay the import here to avoid a circular import
        global HAVE_TE_GRAPHS
        try:
            from megatron.core.extensions.transformer_engine import TECudaRNGStatesTracker
        except ImportError:
            TECudaRNGStatesTracker = None

        assert (
            rng_tracker.is_inference_rng_tracker
            or (HAVE_TE_GRAPHS and isinstance(rng_tracker, TECudaRNGStatesTracker))
            or (isinstance(rng_tracker, CudaRNGStatesTracker) and rng_tracker.use_cudagraphable_rng)
        ), "RNG tracker does not support cudagraphs!"

        assert config.cuda_graph_impl == "local", "Option cuda_graph_impl=local not enabled."
        if torch.cuda.get_device_capability()[0] < 10:
            assert (
                "expandable_segments:True" not in os.getenv("PYTORCH_CUDA_ALLOC_CONF", "")
                or os.getenv("NCCL_GRAPH_REGISTER", "") == "0"
            ), (
                "Setting NCCL_GRAPH_REGISTER=0 to avoid illegal memory access when using "
                "CUDA Graph with PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True."
            )

        self.cudagraph_runners: list[_CudaGraphRunner] = []
        self.inference_cudagraphs_lookup_table: dict = defaultdict(lambda: None)
        self.is_first_microbatch = False

        # Without pipeline parallelism, microbatches execute one at a time.
        # Therefore modules will always execute in the same order, so cudagraphs
        # can both be reused and share a single mempool.
        self.reuse_cudagraphs = parallel_state.get_pipeline_model_parallel_world_size() == 1
        if CudaGraphManager.global_mempool is None:
            CudaGraphManager.global_mempool = torch.cuda.graph_pool_handle()
            # Cudagraph stream capture requires no operations on the default stream prior to the
            # capture, so change to a side stream.
            torch.cuda.set_stream(torch.cuda.Stream())

    def call_ddp_preforward_hook(self, module):
        """Call any DDP pre-forward hooks which are used to launch async data parallel
        param gather. Any other pre-forward hooks are not allowed."""

        from megatron.core.distributed import distributed_data_parallel

        if module._forward_pre_hooks:
            for _, hook in module._forward_pre_hooks.items():
                assert (
                    inspect.getmodule(hook) == distributed_data_parallel
                ), "Tried to cudagraph a module with user registered pre-forward hooks, \
                which is not allowed."
                # Only hooks from Mcore DDP, which take no args, should be called at this point.
                hook(module)

    def get_cudagraph_runner(self, megatron_module, args, kwargs, reuse_cudagraphs):
        '''Returns a valid cudagraph runner for the current forward call.
        The cudagraph corresponding to this call is the first element of 'self.cudagraph_runners'.
        We iterate through the list by 1 for each call, and the number of calls is equal to the
        length of 'self.cudagraph_runners'.
        Otherwise, we assign a mempool per microbatch, which allows cudagraphs to be reused
        over different microbatches by tracking their respective fwd and bwd passes.'''
        if reuse_cudagraphs:
            is_inference_mode = 'inference_context' in kwargs.keys() and kwargs['inference_context']
            if is_inference_mode:
                is_static_batching = kwargs['inference_context'].is_static_batching()
                if is_static_batching:
                    batch_size = kwargs['hidden_states'].shape[0]
                    is_decode_only = kwargs["inference_context"].is_decode_only()
                    runner = self.inference_cudagraphs_lookup_table[(batch_size, is_decode_only)]
                else:
                    padded_batch_dimensions = kwargs['inference_context'].padded_batch_dimensions
                    runner = self.inference_cudagraphs_lookup_table[padded_batch_dimensions]
            else:
                # Todo: For training, we could also cache runners based on input shape.
                # If autograd is currently disabled, it doesnt matter if a runner was created
                # with or without autograd, so just get the first fwd ready runner.
                require_grad = self.need_backward and torch.is_grad_enabled()

                def is_valid(r):
                    return (
                        r.status == _GraphStatus.FWD_READY
                        and not r.get_mismatch_errors(args, kwargs)
                        and (not require_grad or r.grad_enabled)
                    )

                # We must choose the first available runner, as the order of
                # self.cudagraph_runners corresponds to the capture order.
                runner = next((r for r in self.cudagraph_runners if is_valid(r)), None)

            if runner is None:
                if _CudagraphGlobalRecord.cudagraph_created:
                    assert False, (
                        f"`cudagraph_created` is set to True but no matching cudagraph "
                        f"runners were found. This module has {len(self.cudagraph_runners)} "
                        f"existing runners. Use `get_mismatch_errors` to debug mismatches."
                    )
                else:
                    runner = _CudaGraphRunner(
                        megatron_module,
                        CudaGraphManager.global_mempool,
                        args,
                        kwargs,
                        self.func,
                        self.need_backward,
                    )
                    self.cudagraph_runners.append(runner)
                    if is_inference_mode:
                        # Cache the newly created runner in the inference lookup table.
                        if is_static_batching:
                            self.inference_cudagraphs_lookup_table[(batch_size, is_decode_only)] = (
                                runner
                            )
                        else:
                            self.inference_cudagraphs_lookup_table[padded_batch_dimensions] = runner
        else:
            # Create cudagraphs for every microbatch
            if _CudagraphGlobalRecord.cudagraph_created:
                runner = self.cudagraph_runners[0]
                assert runner.status == _GraphStatus.FWD_READY
                self.cudagraph_runners = self.cudagraph_runners[1:] + self.cudagraph_runners[:1]
            else:
                runner = _CudaGraphRunner(
                    megatron_module,
                    CudaGraphManager.global_mempool,
                    args,
                    kwargs,
                    self.func,
                    self.need_backward,
                )
                self.cudagraph_runners.append(runner)

        return runner

    def __call__(self, megatron_module, args, kwargs):
        """Calls the forward pass of the cudagraphed module.

        Args:
            megatron_module (torch.nn.module): The megatron module to be graphed and run

            args (tuple):  The positional args to be passed to the module.

            kwargs (dict):  The keyword args to be passed to the module.
        """
        is_inference_mode = 'inference_context' in kwargs.keys() and kwargs['inference_context']
        is_in_checkpoint_fwd = is_checkpointing()
        if HAVE_TE_GRAPHS:
            is_in_checkpoint_fwd = is_in_checkpoint_fwd or is_fp8_activation_recompute_enabled()

        if _CudagraphGlobalRecord.cudagraph_created:
            if self.training and torch.is_grad_enabled():
                # Trigger Mcore DDP pre-forward hooks
                self.call_ddp_preforward_hook(megatron_module)
                for module in megatron_module.modules():
                    self.call_ddp_preforward_hook(module)

            runner = self.get_cudagraph_runner(megatron_module, args, kwargs, self.reuse_cudagraphs)
            out = runner.replay_graph_capture(self.is_first_microbatch, args, kwargs)
        else:
            if is_inference_mode:
                # Inference generation mode creates graphs immediately
                runner = self.get_cudagraph_runner(megatron_module, args, kwargs, True)

                if not runner.fwd_graph_recorded:
                    # Reuse graph input-output buffers for inference
                    local_args, local_kwargs = args, kwargs
                    if not runner.is_first_layer:
                        # Find previous layer's runner in the global record
                        try:
                            previous_runner = next(
                                r
                                for r in _CudagraphGlobalRecord.cudagraph_inference_record
                                if (
                                    r[0].base_module.layer_number
                                    == runner.base_module.layer_number - 1
                                    and r[0].fwd_graph is not None
                                    and ArgMetadata(r[3]['hidden_states'])
                                    == ArgMetadata(kwargs['hidden_states'])
                                )
                            )
                            # Replace the hidden states from previous layer's output buffer
                            local_kwargs = dict(kwargs)
                            local_kwargs['hidden_states'] = previous_runner[0].fwd_graph_outputs[0]
                        except StopIteration:
                            # No match found for previous layer, continue with no buffer reuse
                            pass

                    runner.create_fwd_graph(
                        local_args, local_kwargs, outputs=None, clone_inputs=runner.is_first_layer
                    )
                    runner.fwd_graph_recorded = True
                    runner.cudagraph_created = True
                    runner = runner.eval()

                    # Record this to the global execution record
                    _CudagraphGlobalRecord.cudagraph_inference_record.append(
                        (runner, "fwd", args, kwargs)
                    )

                # Now replay the graph
                out = runner.replay_graph_capture(self.is_first_microbatch, args, kwargs)
            elif self.training or is_in_checkpoint_fwd:
                runner = self.get_cudagraph_runner(
                    megatron_module, args, kwargs, self.reuse_cudagraphs
                )
                # check if a layer is frozen during training.
                if not torch.is_grad_enabled():
                    # If the layer is frozen, we need to set the runner to eval mode.
                    runner.eval()
                out = runner.record_graph_capture(args, kwargs)
            else:
                # No cudagraphs were found in training mode with grad disabled, so fallback to
                # eager since autograd is needed to correctly trace the backward graph.
                if self.func is not None:
                    return self.func(*args, **kwargs)
                else:
                    return super(MegatronModule, megatron_module).__call__(*args, **kwargs)

        self.is_first_microbatch = False
        # If forward only, next replay should be a forward pass as well
        if is_inference_mode or not torch.is_grad_enabled():
            runner.status = _GraphStatus.FWD_READY
        else:
            runner.status = _GraphStatus.BWD_READY

        return out


# The following functions are for capturing CUDA Graphs using TE make_graphed_callables().
def _layer_is_graphable(layer, config):
    """
    Check if a layer is graphable.
    """

    # Only GraphableMegatronModule can be graphed.
    if not isinstance(layer, GraphableMegatronModule):
        return False

    # If cuda_graph_scope is not set, every layer is graphed.
    if not config.cuda_graph_scope:
        return True

    # import modules here to avoid a circular import
    from megatron.core.ssm.mamba_layer import MambaLayer
    from megatron.core.transformer.identity_op import IdentityOp
    from megatron.core.transformer.mlp import MLP
    from megatron.core.transformer.moe.moe_layer import MoELayer
    from megatron.core.transformer.transformer_layer import TransformerLayer

    if isinstance(layer, MambaLayer) and CudaGraphScope.mamba in config.cuda_graph_scope:
        # mamba layer.
        return True
    if isinstance(layer, TransformerLayer):
        if CudaGraphScope.attn in config.cuda_graph_scope and not (
            isinstance(layer.self_attention, IdentityOp)
            and isinstance(layer.cross_attention, IdentityOp)
        ):
            # attn layer.
            return True
        if (
            CudaGraphScope.moe in config.cuda_graph_scope
            or CudaGraphScope.moe_router in config.cuda_graph_scope
            or CudaGraphScope.moe_preprocess in config.cuda_graph_scope
        ) and isinstance(layer.mlp, MoELayer):
            # moe layer.
            return True
        if CudaGraphScope.mlp in config.cuda_graph_scope and isinstance(layer.mlp, MLP):
            # mlp layer.
            return True
    return False


class TECudaGraphHelper:
    """
    Helper class to capture CUDA Graphs using TE make_graphed_callables().
    It is used in the beginning of the training loop to capture per-layer CUDA Graphs.
    `self.create_cudagraphs()` should be called to capture the CUDA Graphs and
    `self.cuda_graph_set_manual_hooks()` should be called to set manual pre-forward hooks for the
    parameters that are covered by cudagraphs.
    """

    def __init__(self, model, config, seq_length, micro_batch_size, optimizers=[]):
        assert HAVE_TE_GRAPHS, "CUDA Graphs are not supported without TE."
        assert (
            config.cuda_graph_impl == "transformer_engine"
        ), "Option cuda_graph_impl=transformer_engine not enabled."
        assert (
            "expandable_segments:True" not in os.getenv("PYTORCH_CUDA_ALLOC_CONF", "")
            or os.getenv("NCCL_GRAPH_REGISTER", "") == "0"
        ), (
            "Setting NCCL_GRAPH_REGISTER=0 to avoid illegal memory access when using "
            "CUDA Graph with PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True."
        )
        assert CudaGraphScope.full_iteration not in config.cuda_graph_scope, (
            "full_iteration cuda graph is not supported for cuda_graph_impl=transformer_engine. "
            "Please use cuda_graph_impl=local instead."
        )

        self.model = model
        self.config = config
        self.seq_length = seq_length
        self.micro_batch_size = micro_batch_size
        self.optimizers = optimizers
        self.num_model_chunks = len(model)

        # Number of microbatches to capture. The value will be set in _get_cuda_graph_input_data().
        self.num_microbatches = None

        self._discover_layers()

        # Flags to track CUDA Graph state:
        # - _capture_finished: Whether create_cudagraphs() has been called (used by training loop)
        # - _graphs_created: Whether any graphs were actually created (may be False if no
        #   layers found)
        self._capture_finished = False
        self._graphs_created = False

    def _discover_layers(self):
        """Discover captureable layers from the model and populate internal data structures."""
        self.chunks_with_decoder = []
        self.num_layers_per_chunk = []
        self.callables_per_chunk = []
        self.callables_per_chunk_is_mtp = []
        self.flattened_callables = []
        self.flattened_callables_is_mtp = []
        for chunk_number, model_chunk in enumerate(self.model):
            try:
                chunk_with_decoder = get_attr_wrapped_model(
                    model_chunk, 'decoder', allow_none=False, return_model_obj=True
                )
            except RuntimeError:
                num_graphable_layers = 0
                log_on_each_pipeline_stage(
                    logger=logger,
                    tp_group=None,
                    dp_cp_group=None,
                    level=logging.DEBUG,
                    msg=f'Rank {torch.distributed.get_rank()}: '
                    f'No valid layer in model chunk {chunk_number}.',
                )
            else:
                num_decoder_layers = len(chunk_with_decoder.decoder.layers)
                if hasattr(chunk_with_decoder, 'mtp'):
                    num_mtp_layers = len(chunk_with_decoder.mtp.layers)
                else:
                    num_mtp_layers = 0
                num_graphable_layers = 0
                callables, callables_is_mtp = [], []
                for layer_number in range(num_decoder_layers):
                    layer = chunk_with_decoder.decoder.layers[layer_number]
                    if _layer_is_graphable(layer, self.config):
                        num_graphable_layers += 1
                        callables.append(layer)
                        callables_is_mtp.append(False)
                for layer_number in range(num_mtp_layers):
                    layer = chunk_with_decoder.mtp.layers[layer_number].mtp_model_layer
                    if _layer_is_graphable(layer, self.config):
                        num_graphable_layers += 1
                        callables.append(layer)
                        callables_is_mtp.append(True)
                log_on_each_pipeline_stage(
                    logger=logger,
                    tp_group=None,
                    dp_cp_group=None,
                    level=logging.DEBUG,
                    msg=f'Rank {torch.distributed.get_rank()}: '
                    f'{num_decoder_layers} decoder layers and {num_mtp_layers} MTP layers in '
                    f'model chunk {chunk_number}. {num_graphable_layers} graphable layers.',
                )
            finally:
                if num_graphable_layers > 0:
                    self.chunks_with_decoder.append(chunk_with_decoder)
                    self.num_layers_per_chunk.append(num_graphable_layers)
                    self.callables_per_chunk.append(callables)
                    self.callables_per_chunk_is_mtp.append(callables_is_mtp)
                    self.flattened_callables.extend(callables)
                    self.flattened_callables_is_mtp.extend(callables_is_mtp)
                else:
                    self.chunks_with_decoder.append(None)
                    self.num_layers_per_chunk.append(0)
                    self.callables_per_chunk.append([])
                    self.callables_per_chunk_is_mtp.append([])

        log_on_each_pipeline_stage(
            logger=logger,
            tp_group=None,
            dp_cp_group=None,
            level=logging.INFO,
            msg=f'Rank {torch.distributed.get_rank()}: '
            f'{len(self.flattened_callables)} graphable layers.',
        )

    def capture_finished(self):
        """
        Returns whether create_cudagraphs() has been called.

        This is used by the training loop to determine if the capture process has run.
        Returns True after create_cudagraphs() completes, regardless of whether any
        graphs were actually created.
        """
        return self._capture_finished

    def graphs_created(self):
        """
        Returns whether any CUDA Graphs were actually created.

        This returns False if create_cudagraphs() was called but no graphable layers
        were found, and True if at least one graph was successfully created.
        """
        return self._graphs_created

    def _get_sample_arguments(self, order, chunk_id_list=None):
        """
        Generate sample arguments and keyword arguments for CUDA Graph capturing with
        memory-optimized buffer reuse.

        This method creates static input tensors for each (layer, microbatch) pair needed
        by TE's make_graphed_callables(). It optimizes memory usage by reusing input buffers
        across non-overlapping forward passes based on the pipeline parallel schedule.
        This optimization is essential for reducing peak memory during CUDA Graph capturing with
        many microbatches, as it allows buffers to be reused instead of allocating new ones for
        later microbatches.

        Memory Optimization Strategy:
            The 1F1B (one-forward-one-backward) interleaved schedule in pipeline parallelism
            means that once a microbatch's backward pass completes, its input buffers are no
            longer needed. This method tracks buffer lifecycle and reuses "consumed" buffers
            (those whose backward has completed) for new forward passes with matching tensor
            signatures (shape, dtype, layout).

            Example schedule: [1, 1, 1, 2, 2, 2, -2, 1, -2, 1, -2, 2, -1, 2, -1, -1, -2, -2, -1, -1]
            - Positive values indicate forward passes (chunk_id = value)
            - Negative values indicate backward passes (chunk_id = -value)
            - When processing -2 (backward of chunk 2), its buffers become available for reuse
            - The next forward with matching signature can reuse those buffers

        Args:
            order (List[int]): The forward/backward execution order from
                convert_schedule_table_to_order(). Positive integers represent forward passes
                (1-indexed chunk ID), negative integers represent backward passes.
            chunk_id_list (List[Tuple[int, int]]): The list of chunk IDs and layer IDs in the
                order. This is useful only when overlap_moe_expert_parallel_comm is enabled,
                the order maps each layers' idx to their original chunk id.

        Returns:
            Tuple[List[Tuple], List[Dict]]: A tuple containing:
                - sample_args: List of positional argument tuples for each (layer, microbatch).
                    Length = num_layers * num_microbatches. Elements with the same tensor
                    signature may share references to reduce memory allocation.
                - sample_kwargs: List of keyword argument dicts for each (layer, microbatch).
                    Length = num_layers * num_microbatches. Elements with the same tensor
                    signature may share references to reduce memory allocation.

        Data Structures:
            - fwd_sample_queues: Dict[chunk_id, List[Tuple[sample_keys, fwd_idx]]]
                Queue of forward samples per chunk awaiting their backward pass.
            - consumed_sample_queue: Dict[sample_keys, List[fwd_idx]]
                Pool of buffer indices whose backward is complete, keyed by tensor signature.
            - sample_keys: Tuple of (shape, dtype, layout) for args + (key, shape, dtype, layout)
                for kwargs, used to match compatible buffers for reuse.
        """
        assert self.num_model_chunks == max(
            order
        ), "num_model_chunks must match the max chunk id in order."
        if chunk_id_list is None:
            # check only if 1f1b overlap is disabled.
            assert (
                self.num_microbatches == len(order) // self.num_model_chunks // 2
            ), "num_microbatches must match the number of microbatches in order."

        # Generate sample arguments and keyword arguments for capturing.
        sample_args = [None] * (len(self.flattened_callables) * self.num_microbatches)
        sample_kwargs = [None] * (len(self.flattened_callables) * self.num_microbatches)

        rotary_pos_emb_cache = {}

        def _get_layer_static_inputs(layer, chunk_of_the_layer):
            """
            Get the static inputs for a layer.
            """
            assert layer in chunk_of_the_layer.decoder.layers or any(
                layer is mtp_layer.mtp_model_layer for mtp_layer in chunk_of_the_layer.mtp.layers
            ), "Layer is not in the chunk"

            def get_rotary_pos_emb(transformer_module, transformer_input):
                if (
                    transformer_module.position_embedding_type == 'rope'
                    and not self.config.multi_latent_attention
                ):
                    rotary_seq_len = transformer_module.rotary_pos_emb.get_rotary_seq_len(
                        None, transformer_module.decoder, transformer_input, self.config, None
                    )
                    if rotary_seq_len not in rotary_pos_emb_cache:
                        rotary_pos_emb_cache[rotary_seq_len] = transformer_module.rotary_pos_emb(
                            rotary_seq_len
                        )
                    return rotary_pos_emb_cache[rotary_seq_len]
                else:
                    return None

            static_inputs = layer.get_layer_static_inputs(self.seq_length, self.micro_batch_size)

            from megatron.core.transformer.identity_op import IdentityOp
            from megatron.core.transformer.transformer_layer import TransformerLayer

            contains_self_attn = (
                isinstance(layer, TransformerLayer)
                and not isinstance(layer.self_attention, IdentityOp)
                and (
                    not self.config.cuda_graph_scope
                    or CudaGraphScope.attn in self.config.cuda_graph_scope
                )
            )

            _sample_kwargs = {}
            if is_te_min_version("1.10.0"):
                # te.make_graphed_callables() accepts keyword arguments since 1.10.0.
                hidden_states = static_inputs.pop("hidden_states")
                _sample_args = (hidden_states,)
                if contains_self_attn:
                    rotary_pos_emb = get_rotary_pos_emb(chunk_of_the_layer, hidden_states)
                    if rotary_pos_emb is not None:
                        static_inputs["rotary_pos_emb"] = rotary_pos_emb
                _sample_kwargs = static_inputs
            elif contains_self_attn:
                _sample_args = (
                    static_inputs.pop("hidden_states"),
                    static_inputs.pop("attention_mask"),
                )
            else:
                _sample_args = (static_inputs.pop("hidden_states"),)
            return _sample_args, _sample_kwargs

        # Calculate the starting index of each chunk in callables for future use.
        prefix_num_layers = [0]
        for model_chunk_idx in range(self.num_model_chunks):
            num_layers = self.num_layers_per_chunk[model_chunk_idx]
            prefix_num_layers.append(prefix_num_layers[-1] + num_layers)

        # Reorganize args and kwargs for input tensor reuse.
        # fwd_sample_queues is keyed by model chunk index. The value is a queue of tuples.
        # Each tuple contains the sample key signature and its fwd_idx. When we finish a backward
        # chunk, we pop the corresponding fwd_idx and push to the consumed_sample_queue.
        # consumed_sample_queue is keyed by the sample key signature. The value is a queue of the
        # fwd_idx whose backward has been called so that we can reuse the same static buffers.
        # In this way, we can reuse the same static input buffers for the non-overlapping samples
        # with the same input signature.
        fwd_sample_queues = {}
        consumed_sample_queue = {}
        layer_sample_keys_cache = {}
        fwd_idx = [0] * self.num_model_chunks
        for idx, chunk_id in enumerate(order):
            model_chunk_idx = abs(ceil(chunk_id)) - 1

            if chunk_id > 0:
                if model_chunk_idx not in fwd_sample_queues:
                    fwd_sample_queues[model_chunk_idx] = []

                sample_start_idx = (prefix_num_layers[model_chunk_idx] * self.num_microbatches) + (
                    fwd_idx[model_chunk_idx] * self.num_layers_per_chunk[model_chunk_idx]
                )
                if chunk_id_list:
                    model_chunk_idx = chunk_id_list[idx][0]
                    callables_curr_chunk = [
                        self.callables_per_chunk[model_chunk_idx][chunk_id_list[idx][1]]
                    ]
                else:
                    callables_curr_chunk = self.callables_per_chunk[model_chunk_idx]
                for layer_idx, layer in enumerate(callables_curr_chunk):
                    per_callable_fwd_idx = sample_start_idx + layer_idx

                    # Get sample_args and sample_kwargs for index per_callable_fwd_idx.
                    assert (
                        sample_args[per_callable_fwd_idx] is None
                        and sample_kwargs[per_callable_fwd_idx] is None
                    ), (
                        f"sample_args and sample_kwargs must be None before assigning static data, "
                        f"but got sample_args[{per_callable_fwd_idx}] = "
                        f"{sample_args[per_callable_fwd_idx]} and "
                        f"sample_kwargs[{per_callable_fwd_idx}] = "
                        f"{sample_kwargs[per_callable_fwd_idx]}."
                    )
                    if id(layer) not in layer_sample_keys_cache:
                        # Have not generated the static inputs for this layer yet. So we don't
                        # know the input signature of this layer. Generate the static inputs, and
                        # cache the signature.
                        sample_args[per_callable_fwd_idx], sample_kwargs[per_callable_fwd_idx] = (
                            _get_layer_static_inputs(
                                layer, self.chunks_with_decoder[model_chunk_idx]
                            )
                        )
                        sample_args_keys = tuple(
                            (t.shape, t.dtype, t.layout) for t in sample_args[per_callable_fwd_idx]
                        )
                        sample_kwargs_keys = tuple(
                            (k, v.shape, v.dtype, v.layout)
                            for k, v in sorted(sample_kwargs[per_callable_fwd_idx].items())
                        )
                        sample_keys = sample_args_keys + sample_kwargs_keys
                        layer_sample_keys_cache[id(layer)] = sample_keys
                    else:
                        # Get signature from cache. This signature will be used to see if we can
                        # reuse the static inputs of a previous forward pass for this forward pass.
                        # If not, we still need to generate the new static inputs.
                        sample_keys = layer_sample_keys_cache[id(layer)]
                    model_chunk_idx = abs(chunk_id) - 1
                    fwd_sample_queues[model_chunk_idx].append((sample_keys, per_callable_fwd_idx))
                    if consumed_sample_queue.get(sample_keys, []):
                        # We can reuse the static inputs of a previous forward pass for this
                        # forward pass, because they are of the same input signature and the
                        # backward pass of the previous forward pass has completed.
                        reuse_fwd_idx = consumed_sample_queue[sample_keys].pop(0)
                        assert (
                            sample_args[reuse_fwd_idx] is not None
                            and sample_kwargs[reuse_fwd_idx] is not None
                        ), (
                            f"sample_args and sample_kwargs must not be None when reusing, but got "
                            f"sample_args[{reuse_fwd_idx}] = {sample_args[reuse_fwd_idx]} and "
                            f"sample_kwargs[{reuse_fwd_idx}] = {sample_kwargs[reuse_fwd_idx]}.",
                        )
                        sample_args[per_callable_fwd_idx] = sample_args[reuse_fwd_idx]
                        sample_kwargs[per_callable_fwd_idx] = sample_kwargs[reuse_fwd_idx]

                    if sample_args[per_callable_fwd_idx] is None:
                        # Unfortunately, no previous static inputs are available for reuse,
                        # sample_args is still None. Last attempt: generate the new static inputs
                        # for this forward pass.
                        if chunk_id_list:
                            model_chunk_idx = chunk_id_list[idx][0]
                        sample_args[per_callable_fwd_idx], sample_kwargs[per_callable_fwd_idx] = (
                            _get_layer_static_inputs(
                                layer, self.chunks_with_decoder[model_chunk_idx]
                            )
                        )
                        model_chunk_idx = abs(chunk_id) - 1
                fwd_idx[model_chunk_idx] += 1
            elif ceil(chunk_id) == chunk_id:
                num_consumed_samples = min(
                    len(fwd_sample_queues[model_chunk_idx]),
                    self.num_layers_per_chunk[model_chunk_idx],
                )
                for sample_keys, per_callable_fwd_idx in fwd_sample_queues[model_chunk_idx][
                    :num_consumed_samples
                ]:
                    if sample_keys not in consumed_sample_queue:
                        consumed_sample_queue[sample_keys] = []
                    consumed_sample_queue[sample_keys].append(per_callable_fwd_idx)
                fwd_sample_queues[model_chunk_idx] = fwd_sample_queues[model_chunk_idx][
                    num_consumed_samples:
                ]
            else:
                # skip register static inputs for wgrad backward graphs
                continue

        return sample_args, sample_kwargs

    def _get_cuda_graph_input_data(self):
        """
        Create the CUDA Graph capturing input data.
        The data is organized per-chunk per-microbatch per-layer.
        """

        # Get the PP and VPP scheduling order.
        from megatron.core.pipeline_parallel.schedules import (
            get_pp_rank_microbatches,
            get_schedule_table,
        )

        # If PP is not enabled, we only need to capture one microbatch.
        if (
            parallel_state.get_pipeline_model_parallel_world_size() == 1
            and not self.config.overlap_moe_expert_parallel_comm
        ):
            assert (
                self.num_model_chunks == 1
            ), "If PP is not enabled, there should be only one model chunk."
            self.num_microbatches = 1
        else:
            self.num_microbatches = get_num_microbatches()

        _, _, num_warmup_microbatches, _ = get_pp_rank_microbatches(
            self.num_microbatches,
            self.num_model_chunks,
            self.config.microbatch_group_size_per_vp_stage,
            False,
        )
        schedule_table = get_schedule_table(
            self.num_microbatches,
            self.num_model_chunks,
            self.config.microbatch_group_size_per_vp_stage,
        )
        order = convert_schedule_table_to_order(
            num_warmup_microbatches, self.num_model_chunks, schedule_table
        )
        log_on_each_pipeline_stage(
            logger=logger,
            tp_group=None,
            dp_cp_group=None,
            level=logging.DEBUG,
            msg=f'Rank {torch.distributed.get_rank()}: ORDER {order}',
        )
        chunk_id_list = None
        if self.config.overlap_moe_expert_parallel_comm:
            wgrad_in_graph_scope = CudaGraphScope.attn in self.config.cuda_graph_scope or (
                CudaGraphScope.moe_router in self.config.cuda_graph_scope
                and self.config.moe_shared_expert_intermediate_size is not None
                and not self.config.moe_shared_expert_overlap
            )
            capture_wgrad_graph = self.config.delay_wgrad_compute and wgrad_in_graph_scope
            order, chunk_id_list = get_overlap_moe_expert_parallel_comm_order(
                order, self.num_layers_per_chunk, capture_wgrad_graph
            )
            self.num_layers_per_chunk = [1] * sum(self.num_layers_per_chunk)
            self.num_model_chunks = max(order)
            _order_without_wgrad = []
            for c_id in order:
                if ceil(c_id) != c_id:
                    continue
                _order_without_wgrad.append(c_id)
            self.num_microbatches = len(_order_without_wgrad) // self.num_model_chunks // 2
            log_on_each_pipeline_stage(
                logger=logger,
                tp_group=None,
                dp_cp_group=None,
                level=logging.DEBUG,
                msg=f'Rank {torch.distributed.get_rank()}: '
                f'ORDER after overlap_moe_expert_parallel_comm {order}',
            )

        # Generate sample arguments and keyword arguments for capturing.
        sample_args, sample_kwargs = self._get_sample_arguments(order, chunk_id_list)

        def get_make_graphed_callables_kwargs():
            kwargs = {
                'allow_unused_input': True,
                '_order': order,
                'retain_graph_in_backward': self.config.cuda_graph_retain_backward_graph,
            }

            # Calculate the number of warmup iterations per layer per microbatch inside TE
            # make_graphed_callables(). There are two rules:
            # 1. There should be at least 1 warmup iteration per layer per microbatch inside TE
            # make_graphed_callables().
            # 2. There should be at least 10 warmup iterations per layer, counting the MCore warmup
            # steps before going into this capture routine.
            kwargs['num_warmup_iters'] = max(
                1,
                math.ceil(
                    (10 - self.config.cuda_graph_warmup_steps * get_num_microbatches())
                    / self.num_microbatches
                ),
            )

            if is_te_min_version("2.6.0"):
                # Starting from TE 2.6.0, make_graphed_callables() accepts different number
                # of layers per chunk.
                kwargs['_num_layers_per_chunk'] = self.num_layers_per_chunk
            if is_te_min_version("2.7.0"):
                # Starting from TE 2.7.0, make_graphed_callables() optimizes the graph memory usage
                # by reusing input/output data buffers between graphs.
                kwargs['_reuse_graph_input_output_buffers'] = True

            if sample_kwargs:
                kwargs['sample_kwargs'] = sample_kwargs

            from megatron.core.fp4_utils import get_fp4_recipe
            from megatron.core.fp8_utils import get_fp8_recipe

            if self.config.fp8 or self.config.fp4:
                # FP4 and FP8 are mutually exclusive, so use fp8_* kwargs for FP4 too
                # since TE currently uses fp8_autocast for both FP8 and FP4 quantization

                def _get_fp8_enabled():
                    if is_te_min_version("2.8.0"):
                        from megatron.core.fp8_utils import is_first_last_bf16_layer

                        fp8_enabled = []
                        for callable, is_mtp in zip(
                            self.flattened_callables, self.flattened_callables_is_mtp
                        ):
                            fp8_enabled.append(
                                not is_first_last_bf16_layer(
                                    self.config, callable.layer_number - 1 if not is_mtp else -1
                                )
                            )
                        return tuple(fp8_enabled)
                    else:
                        return True

                kwargs['fp8_enabled'] = _get_fp8_enabled()
                kwargs['fp8_recipe'] = (
                    get_fp8_recipe(self.config) if self.config.fp8 else get_fp4_recipe(self.config)
                )
                kwargs['fp8_weight_caching'] = True
                if is_te_min_version("1.14.0") and parallel_state.model_parallel_is_initialized():
                    kwargs['fp8_group'] = parallel_state.get_amax_reduction_group(
                        with_context_parallel=True, tp_only_amax_red=self.config.tp_only_amax_red
                    )
            else:
                kwargs['fp8_enabled'] = False
            return kwargs

        kwargs = get_make_graphed_callables_kwargs()
        return sample_args, kwargs

    def _start_capturing(self):
        """
        Start capturing CUDA Graphs.
        """
        assert not self._capture_finished, "CUDA Graph capture has already been finished."

        torch.cuda.synchronize()
        gc.collect()
        torch.cuda.empty_cache()
        if FREEZE_GC:
            gc.freeze()

        _set_capture_start()
        log_single_rank(logger, logging.INFO, f'Start CUDA Graphs capture...')
        return time.time()

    def _reset_after_capture(self):
        """
        Reset the model and optimizer state after capturing CUDA Graphs.
        """
        from megatron.core.distributed.finalize_model_grads import reset_model_temporary_tensors
        from megatron.core.transformer.moe.moe_utils import clear_aux_losses_tracker

        for model_chunk in self.model:
            model_chunk.zero_grad_buffer()
        for optimizer in self.optimizers:
            optimizer.zero_grad()
        clear_aux_losses_tracker()
        reset_model_temporary_tensors(self.config, self.model)

    def _finish_capturing(self, start_time):
        """
        Finish capturing CUDA Graphs and clean up the related state.
        """
        log_single_rank(
            logger,
            logging.INFO,
            f'Time spent in CUDA Graphs capture on rank {torch.distributed.get_rank()}: '
            f'{time.time() - start_time}s',
        )
        _set_capture_end()

        torch.cuda.synchronize()
        self._reset_after_capture()
        if FREEZE_GC:
            gc.unfreeze()
        gc.collect()
        torch.cuda.empty_cache()

        self._capture_finished = True

    def create_cudagraphs(self):
        """
        Capture CUDA Graphs per TransformerLayer per microbatch.
        """
        start_time = self._start_capturing()

        if not self.flattened_callables:
            # Check if there are any graphable layers. If not, log a warning and skip capture,
            # but still call _finish_capturing to ensure all ranks complete the capture phase.
            logger.warning(
                'TECudaGraphHelper: No graphable layers found. Skipping CUDA graph capture.'
            )
        else:
            # Prepare CUDA Graph capturing input data and call `make_graphed_callables`.
            sample_args, kwargs = self._get_cuda_graph_input_data()
            if self.config.sequence_parallel:
                rng_context = get_cuda_rng_tracker().fork()
            else:
                rng_context = nullcontext()
            with rng_context:
                graphs = make_graphed_callables(
                    tuple(self.flattened_callables), sample_args, **kwargs
                )

            # Push the captured graphs to the corresponding TransformerBlock.
            num_layers_accumulated = 0
            for layers in self.callables_per_chunk:
                for layer_number, layer in enumerate(layers):
                    layer.cuda_graphs = []
                    for batch_number in range(self.num_microbatches):
                        if self.config.overlap_moe_expert_parallel_comm:
                            graph_idx = (
                                num_layers_accumulated + layer_number
                            ) * self.num_microbatches + batch_number
                        else:
                            graph_idx = (
                                num_layers_accumulated * self.num_microbatches
                                + batch_number * len(layers)
                                + layer_number
                            )
                        layer.cuda_graphs.append(graphs[graph_idx])
                num_layers_accumulated += len(layers)

            self._graphs_created = True

        self._finish_capturing(start_time)

    def cuda_graph_set_manual_hooks(self):
        """
        Set CUDA Graph manual hooks for the modules that contain direct parameters and
        are covered by cudagraphs.
        """
        for chunk_number, layers in enumerate(self.callables_per_chunk):
            model_chunk = self.model[chunk_number]
            for layer in layers:
                layer.setup_manual_hooks(model_chunk._make_forward_pre_hook)

    def delete_cuda_graphs(self):
        """
        Delete all CUDA graphs.
        """
        assert self._graphs_created, "No CUDA Graphs were created to delete."

        graph_resettable = is_te_min_version("2.10.0")
        graphs_reset, graphs_not_reset = 0, 0
        for layers in self.callables_per_chunk:
            for layer in layers:
                for graph in layer.cuda_graphs:
                    if graph_resettable:
                        graph.reset()
                        graphs_reset += 1
                    else:
                        graphs_not_reset += 1
                layer.cuda_graphs = []
                layer.cuda_graph_manual_hooks = []

        log_on_each_pipeline_stage(
            logger=logger,
            tp_group=None,
            dp_cp_group=None,
            level=logging.INFO,
            msg=f'Rank {torch.distributed.get_rank()}: '
            f'{graphs_reset} graphs deleted with explicit reset, '
            f'{graphs_not_reset} graphs deleted without explicit reset.',
        )
        self._graphs_created = False


def convert_schedule_table_to_order(num_warmup_microbatches, num_model_chunks, schedule_table):
    """Convert a tunable schedule lookup table to the te.make_graphed_callables() accepted
    order format. For example, the tunable schedule table for PP2 N3M5 with VP2 is as below:
    virtual_microbatch_id | 0 1 2 3 4 5 6 7 8 9
    microbatch_id         | 0 1 2 0 1 2 3 4 3 4
    model_chunk_id        | 0 0 0 1 1 1 0 0 1 1

    Then the forward backward separated order is:
    forward               | 1 1 1 2 2 2 1 1 2 2
    backward              | -2 -2 -2 -1 -1 -1 -2 -2 -1 -1

    If num_warmup_microbatches is 5, the output order is:
    1 1 1 2 2 2 -2 1 -2 1 -2 2 -1 2 -1 -1 -2 -2 -1 -1
    """
    _, model_chunk_id_table = zip(*schedule_table)
    forward_order = [chunk_id + 1 for chunk_id in model_chunk_id_table]
    backward_order = [chunk_id - num_model_chunks for chunk_id in model_chunk_id_table]
    order = forward_order[:num_warmup_microbatches]
    for i in range(num_warmup_microbatches, len(forward_order)):
        order.append(forward_order[i])
        order.append(backward_order[i - num_warmup_microbatches])
    if num_warmup_microbatches > 0:
        order.extend(backward_order[-num_warmup_microbatches:])
    return order


def get_overlap_moe_expert_parallel_comm_order(order, num_layers_per_chunk, capture_wgrad_graph):
    """
    This functions gets the order for overlap_moe_expert_parallel_comm schedule for the original
    chunk-wise order list. Each chunk is transformered to chunks with only 1 layer so that
    layers between 2 chunks can now overlap with each other while following the graph order.
    If capture_wgrad_graph is True, the wgrad backward graph is also added to the order by
    decreasing the layer id by 0.5.

    Args:
        order (List[int]): The original chunk-wise order list. Positive values represent forward
            passes for chunks, negative values represent backward passes. The absolute value
            indicates the chunk ID (1-indexed).
        num_layers_per_chunk (List[int]): Number of graphable layers in each chunk. The length
            of this list equals the number of chunks.
        capture_wgrad_graph (bool): If True, weight gradient computation graphs are added to the
            order by appending entries with layer_id - 0.5.

    Returns:
        Tuple[List[float], List[Optional[List[int]]]]: A tuple containing:
            - new_order: The layer-wise order list where each chunk is expanded to individual
              layers. Positive values are forward passes, negative values are backward passes.
              Values with .5 suffix indicate weight gradient computations.
            - chunk_id_list: A list parallel to new_order. For forward passes, contains
              [chunk_id, layer_index_within_chunk]. For backward passes, contains None.

    Example:
        original_order: [1, 2, -2, 1, -1, -1]
        num_layers_per_chunk: [1, 2]
        capture_wgrad_graph=True:
            new_order: [1, 2, 3, 1, -3, -3.5, -2, -2.5, -1, -1.5, -1, -1.5]
            chunk_id_list: [[0, 0], [1, 0], [1, 1], [0, 0], None,
                            None, None, None, None, None, None, None]
        capture_wgrad_graph=False:
            new_order: [1, 2, 3, 1, -3, -2, -1, -1]
            chunk_id_list: [[0, 0], [1, 0], [1, 1], [0, 0], None, None, None, None]
    """

    def _add_order(new_order, chunk_id_list, c_id, layer_id, is_wgrad=False, index=None):
        if is_wgrad:
            new_order.append(layer_id - 0.5)
        else:
            new_order.append(layer_id)
        if c_id > 0:
            chunk_id_list.append([abs(c_id) - 1, index])
        else:
            chunk_id_list.append(None)

    new_order = []
    chunk_id_list = []
    add_order = partial(_add_order, new_order, chunk_id_list)
    first_backward_idx, last_forward_idx = None, None
    for idx, c_id in enumerate(order):
        if first_backward_idx is None and c_id < 0:
            first_backward_idx = idx
        if c_id > 0:
            last_forward_idx = idx

    def get_layer_range(c_id):
        num_layers = num_layers_per_chunk[abs(c_id) - 1]
        num_layers_previous_chunks = sum(num_layers_per_chunk[: abs(c_id) - 1])
        if c_id > 0:
            return list(
                range(num_layers_previous_chunks + 1, num_layers_previous_chunks + num_layers + 1)
            )
        return list(range(-num_layers_previous_chunks - num_layers, -num_layers_previous_chunks))

    # warmup stage
    for c_id in order[:first_backward_idx]:
        layer_range = get_layer_range(c_id)
        new_order += layer_range
        chunk_id_list.extend([abs(c_id) - 1, i] for i in range(len(layer_range)))

    # 1f1b overlap stage
    if first_backward_idx < last_forward_idx:
        for c_id_b, c_id_f in zip(
            order[first_backward_idx : last_forward_idx + 1 : 2],
            order[first_backward_idx + 1 : last_forward_idx + 1 : 2],
        ):
            layer_range_f = get_layer_range(c_id_f)
            layer_range_b = get_layer_range(c_id_b)
            index = 0
            for l_b, l_f in zip_longest(layer_range_b, layer_range_f, fillvalue=0):
                # always forward graph before backward graph
                if l_f != 0:
                    add_order(c_id_f, l_f, index=index)
                if l_b != 0:
                    add_order(c_id_b, l_b)
                    if capture_wgrad_graph and index < len(layer_range_b) - 1:
                        add_order(c_id_b, l_b, is_wgrad=True)
                index += 1
            # last wgrad backward
            if capture_wgrad_graph and layer_range_b:
                add_order(c_id_b, layer_range_b[-1], is_wgrad=True)

    # cool down stage, backward graphs only
    for c_id in order[last_forward_idx + 1 :]:
        for l_b in get_layer_range(c_id):
            add_order(c_id, l_b)
            if capture_wgrad_graph:
                add_order(c_id, l_b, is_wgrad=True)

    return new_order, chunk_id_list


# ---------------------------------------------------------------------------
# set_current_microbatch: sets per-layer microbatch index for TE graph replay
# ---------------------------------------------------------------------------


def set_current_microbatch(model, microbatch_id):
    """Set the current microbatch on all layers that use TE CUDA graph replay.

    current_microbatch is read by _te_cuda_graph_replay to select the
    correct graph index.  This helper is called from the pipeline-parallel
    schedule before each forward step.
    """
    decoder_exists = True
    model_with_decoder = None
    try:
        model_with_decoder = get_attr_wrapped_model(
            model, "decoder", allow_none=False, return_model_obj=True
        )
    except RuntimeError:
        decoder_exists = False
    if decoder_exists and model_with_decoder is not None:
        for layer in model_with_decoder.decoder.layers:
            layer.current_microbatch = microbatch_id
        if hasattr(model_with_decoder, 'mtp'):
            for layer in model_with_decoder.mtp.layers:
                assert hasattr(
                    layer, 'mtp_model_layer'
                ), f"MTP layer {layer} must have 'mtp_model_layer' attribute"
                layer.mtp_model_layer.current_microbatch = microbatch_id

    # Also set current_microbatch on vision encoder layers so that
    # _te_cuda_graph_replay selects the correct graph index. Without this,
    # vision layers always use graph 0 (since current_microbatch defaults to 0),
    # causing all microbatch forwards to overwrite the same static buffers.
    # When backward runs for earlier microbatches, the buffers contain stale
    # data from later forwards, producing NaN gradients.
    try:
        model_with_vision = get_attr_wrapped_model(
            model, "vision_model", allow_none=True, return_model_obj=True
        )
    except RuntimeError:
        model_with_vision = None
    if model_with_vision is not None and hasattr(model_with_vision, 'vision_model'):
        vision_model = model_with_vision.vision_model
        if hasattr(vision_model, 'decoder') and hasattr(vision_model.decoder, 'layers'):
            for layer in vision_model.decoder.layers:
                layer.current_microbatch = microbatch_id


# ---------------------------------------------------------------------------
# Vision encoder CUDA graph helpers
# ---------------------------------------------------------------------------


def _wrap_graph_for_vision(graph_fn):
    """Wrap a graphed callable to filter out None outputs.

    During make_graphed_callables warmup, vision encoder layers go through their
    normal forward() path which returns (output, context=None). _te_cuda_graph_replay
    asserts len(output) == 1 but gets 2 elements. This wrapper filters out None
    values so replay sees (output,) instead of (output, None).
    """

    def wrapped(*args, **kwargs):
        result = graph_fn(*args, **kwargs)
        if isinstance(result, tuple):
            filtered = tuple(r for r in result if r is not None)
            return filtered if filtered else result
        return result

    for attr in ('backward_dw', 'reset'):
        if hasattr(graph_fn, attr):
            setattr(wrapped, attr, getattr(graph_fn, attr))
    return wrapped


def get_vision_cuda_graph_seq_length(vision_config, default_seq_length: int = 4096) -> int:
    """Calculate the sequence length for vision encoder CUDA graphs.

    For vision encoders, the sequence length depends on:
    - max_vision_cuda_graph_seq_length: explicit maximum (if set)
    - num_position_embeddings: maximum number of patches
    - spatial_merge_size: pooling factor that reduces sequence length

    Args:
        vision_config: The TransformerConfig for vision encoder
        default_seq_length: Default sequence length if cannot be calculated

    Returns:
        The sequence length to use for CUDA graph capture
    """
    if (
        hasattr(vision_config, 'max_vision_cuda_graph_seq_length')
        and vision_config.max_vision_cuda_graph_seq_length
    ):
        return vision_config.max_vision_cuda_graph_seq_length

    if hasattr(vision_config, 'num_position_embeddings'):
        seq_length = vision_config.num_position_embeddings
        if hasattr(vision_config, 'spatial_merge_size'):
            merge_factor = vision_config.spatial_merge_size**2
            seq_length = seq_length // merge_factor
        return seq_length

    return default_seq_length


class VisionTECudaGraphHelper(TECudaGraphHelper):
    """Helper to capture CUDA Graphs for vision encoder layers using TE.

    Inherits from TECudaGraphHelper and overrides only the
    vision-specific behaviour:

    * Layer discovery finds vision_model.decoder.layers instead of the
      language decoder layers.
    * num_model_chunks is always 1 (vision has no virtual pipeline stages).
    * Batch dimension is always 1 (images are concatenated along the sequence
      dimension).
    * Sample argument generation uses a simple loop (no rotary embeddings or
      buffer-reuse optimization).
    * _finish_capturing wraps captured graphs to filter None values that arise
      from vision encoder layers returning (output, None), and skips cleanup
      that is handled by the LM decoder helper.

    Note:
        With pipeline parallelism > 1, only the first pipeline stage typically
        has vision layers. Ranks without vision layers can safely skip calling
        create_cudagraphs() or will gracefully return with no graphs created.

    Args:
        model: The full model (list of model chunks) containing vision_model.
        vision_config: TransformerConfig for the vision encoder.
        vision_seq_length: Sequence length for vision (max vision tokens).
        micro_batch_size: Micro-batch size (unused for sample-arg generation
            since the vision encoder always uses batch-dim = 1).
        num_microbatches: Number of microbatches per step.
    """

    def __init__(
        self,
        model,
        vision_config,
        vision_seq_length: int,
        micro_batch_size: int,
        num_microbatches: int = 1,
    ):
        super().__init__(model, vision_config, vision_seq_length, micro_batch_size)
        # Vision encoder concatenates all images along the sequence dimension
        # with a fixed batch dimension of 1, regardless of the training MBS.
        self.micro_batch_size = 1
        self.num_model_chunks = 1
        self.num_microbatches = num_microbatches

    def _discover_layers(self):
        """Discover captureable layers from the vision encoder."""
        self.vision_model = None
        vision_layers = []

        for model_chunk in self.model:
            try:
                unwrapped = get_attr_wrapped_model(
                    model_chunk, 'vision_model', allow_none=True, return_model_obj=True
                )
                if unwrapped is not None and hasattr(unwrapped, 'vision_model'):
                    self.vision_model = unwrapped.vision_model
                    break
            except (RuntimeError, AttributeError):
                continue

        if self.vision_model is not None:
            if hasattr(self.vision_model, 'decoder') and hasattr(
                self.vision_model.decoder, 'layers'
            ):
                for layer in self.vision_model.decoder.layers:
                    if _layer_is_graphable(layer, self.config):
                        vision_layers.append(layer)

        if vision_layers:
            self.chunks_with_decoder = [self.vision_model]
            self.num_layers_per_chunk = [len(vision_layers)]
            self.callables_per_chunk = [vision_layers]
            self.callables_per_chunk_is_mtp = [[False] * len(vision_layers)]
            self.flattened_callables = list(vision_layers)
            self.flattened_callables_is_mtp = [False] * len(vision_layers)
        else:
            if self.vision_model is None:
                logger.warning(
                    'VisionTECudaGraphHelper: No vision_model found in model. '
                    'CUDA graphs will not be captured for vision encoder.'
                )
            self.chunks_with_decoder = [None]
            self.num_layers_per_chunk = [0]
            self.callables_per_chunk = [[]]
            self.callables_per_chunk_is_mtp = [[]]
            self.flattened_callables = []
            self.flattened_callables_is_mtp = []

        # backward-compat aliases used by callers / tests
        self.callables = vision_layers
        self.num_layers = len(vision_layers)

        if vision_layers:
            logger.info(
                f'VisionTECudaGraphHelper: Found {self.num_layers} graphable vision encoder '
                f'layers. seq_length={self.seq_length} (all images concatenated, batch_dim=1)'
            )

    def _reset_after_capture(self):
        """
        No-op: vision encoder layers do not require any reset:
        - model_chunk.zero_grad_buffer() / optimizer.zero_grad() (handled
          by the LM decoder helper's _finish_capturing which runs on all ranks).
        - clear_aux_losses_tracker / reset_model_temporary_tensors
          (LM-specific cleanup already handled by the LM helper).
        """

    def _finish_capturing(self, start_time):
        """
        Before calling super()._finish_capturing, wrap the captured graphs with
        _wrap_graph_for_vision to filter None from (output, None) tuples so that
        _te_cuda_graph_replay's len == 1 assertion passes.
        """
        # Wrap the captured graphs before finishing
        for layer in self.flattened_callables:
            if hasattr(layer, 'cuda_graphs'):
                layer.cuda_graphs = [_wrap_graph_for_vision(g) for g in layer.cuda_graphs]

        super()._finish_capturing(start_time)

    def _get_sample_arguments(self, order, chunk_id_list=None):
        """Generate sample arguments for vision encoder CUDA Graph capturing.

        Vision uses a simple per-layer-per-microbatch loop with batch_dim=1
        and no rotary embeddings (unlike the parent's buffer-reuse
        optimization). The order and chunk_id_list arguments are
        unused because vision has num_model_chunks=1 and does not need
        the pipeline-schedule-aware buffer lifecycle tracking.

        Returns:
            Tuple of (sample_args, sample_kwargs) lists for each
            (layer, microbatch) pair.
        """
        if not self.flattened_callables:
            return [], []

        sample_args = []
        sample_kwargs_list = []
        hidden_size = self.config.hidden_size

        for _microbatch_idx in range(self.num_microbatches):
            for layer in self.flattened_callables:
                hidden_states = torch.zeros(
                    self.seq_length,
                    1,
                    hidden_size,
                    dtype=torch.bfloat16,
                    device='cuda',
                    requires_grad=True,
                )

                if hasattr(layer, 'get_layer_static_inputs'):
                    static_inputs = layer.get_layer_static_inputs(self.seq_length, 1)
                    hidden_states = static_inputs.pop('hidden_states', hidden_states)
                    sample_args.append((hidden_states,))
                    sample_kwargs_list.append(static_inputs)
                else:
                    sample_args.append((hidden_states,))
                    sample_kwargs_list.append({})

        return sample_args, sample_kwargs_list

    def cuda_graph_set_manual_hooks(self):
        """No-op: vision encoder layers do not use DDP parameter-gather hooks.

        The parent derives hooks from model_chunk._make_forward_pre_hook which
        requires overlap_param_gather=True.  Vision encoder parameters are not
        distributed with the same overlap strategy, so we skip hook setup.
        """


================================================
FILE: megatron/core/transformer/custom_layers/__init__.py
================================================


================================================
FILE: megatron/core/transformer/custom_layers/batch_invariant_kernels.py
================================================
# Copyright 2025 Thinking Machines Lab
# The following code has been adapted
# from the following repo: https://github.com/thinking-machines-lab/batch_invariant_ops


import contextlib
import importlib
import importlib.util
import logging
from collections import namedtuple
from collections.abc import Callable
from typing import Any, Dict, List, Optional

import torch

try:
    import triton
    import triton.language as tl

    HAVE_TRITON = True
except ImportError:
    from unittest.mock import MagicMock

    from megatron.core.utils import null_decorator

    triton = MagicMock()
    triton.jit = null_decorator
    tl = MagicMock()
    HAVE_TRITON = False

__all__ = [
    "set_batch_invariant_mode",
    "is_batch_invariant_mode_enabled",
    "disable_batch_invariant_mode",
    "enable_batch_invariant_mode",
]


_LOGGER = logging.getLogger(__name__)


def _matmul_launch_metadata(
    grid: Callable[..., Any], kernel: Any, args: Dict[str, Any]
) -> Dict[str, Any]:
    """Build launch metadata for Triton matmul kernels used in BIK matmul."""
    ret = {}
    m, n, k = args["M"], args["N"], args["K"]
    ret["name"] = f"{kernel.name} [M={m}, N={n}, K={k}]"
    if "tiles_per_update" in args:
        ret["name"] = (
            f"{kernel.name} [M={m}, N={n}, K={k}, tiles_per_update={args['tiles_per_update']:02}]"
        )
    if "c_ptr" in args:
        bytes_per_elem = args["c_ptr"].element_size()
    else:
        bytes_per_elem = 1 if args["FP8_OUTPUT"] else 2
    ret[f"flops{bytes_per_elem * 8}"] = 2.0 * m * n * k
    ret["bytes"] = bytes_per_elem * (m * k + n * k + m * n)
    return ret


@triton.jit
def _compute_pid(tile_id, num_pid_in_group, num_pid_m, GROUP_SIZE_M, NUM_SMS):
    group_id = tile_id // num_pid_in_group
    first_pid_m = group_id * GROUP_SIZE_M
    group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M)
    pid_m = first_pid_m + (tile_id % group_size_m)
    pid_n = (tile_id % num_pid_in_group) // group_size_m
    return pid_m, pid_n


@triton.jit(launch_metadata=_matmul_launch_metadata)
def matmul_kernel_persistent(
    a_ptr,
    b_ptr,
    c_ptr,  #
    bias_ptr,
    M,
    N,
    K,  #
    stride_am,
    stride_ak,
    stride_bk,
    stride_bn,
    stride_cm,
    stride_cn,
    BLOCK_SIZE_M: tl.constexpr,  #
    BLOCK_SIZE_N: tl.constexpr,  #
    BLOCK_SIZE_K: tl.constexpr,  #
    GROUP_SIZE_M: tl.constexpr,  #
    NUM_SMS: tl.constexpr,  #
    A_LARGE: tl.constexpr,
    B_LARGE: tl.constexpr,
    C_LARGE: tl.constexpr,
    HAS_BIAS: tl.constexpr,
):
    """Persistent matmul Triton kernel backing `matmul_persistent`."""
    start_pid = tl.program_id(axis=0)
    num_pid_m = tl.cdiv(M, BLOCK_SIZE_M)
    num_pid_n = tl.cdiv(N, BLOCK_SIZE_N)
    k_tiles = tl.cdiv(K, BLOCK_SIZE_K)
    num_tiles = num_pid_m * num_pid_n

    tile_id_c = start_pid - NUM_SMS

    offs_k_for_mask = tl.arange(0, BLOCK_SIZE_K)
    num_pid_in_group = GROUP_SIZE_M * num_pid_n

    for tile_id in tl.range(start_pid, num_tiles, NUM_SMS, flatten=True):
        pid_m, pid_n = _compute_pid(tile_id, num_pid_in_group, num_pid_m, GROUP_SIZE_M, NUM_SMS)
        start_m = pid_m * BLOCK_SIZE_M
        start_n = pid_n * BLOCK_SIZE_N
        offs_am = start_m + tl.arange(0, BLOCK_SIZE_M)
        offs_bn = start_n + tl.arange(0, BLOCK_SIZE_N)
        if A_LARGE:
            offs_am = offs_am.to(tl.int64)
        if B_LARGE:
            offs_bn = offs_bn.to(tl.int64)
        offs_am = tl.where(offs_am < M, offs_am, 0)
        offs_bn = tl.where(offs_bn < N, offs_bn, 0)
        offs_am = tl.max_contiguous(tl.multiple_of(offs_am, BLOCK_SIZE_M), BLOCK_SIZE_M)
        offs_bn = tl.max_contiguous(tl.multiple_of(offs_bn, BLOCK_SIZE_N), BLOCK_SIZE_N)

        accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)
        for ki in range(k_tiles):
            if A_LARGE or B_LARGE:
                offs_k = ki * BLOCK_SIZE_K + tl.arange(0, BLOCK_SIZE_K).to(tl.int64)
            else:
                offs_k = ki * BLOCK_SIZE_K + tl.arange(0, BLOCK_SIZE_K)
            a_ptrs = a_ptr + (offs_am[:, None] * stride_am + offs_k[None, :] * stride_ak)
            b_ptrs = b_ptr + (offs_k[:, None] * stride_bk + offs_bn[None, :] * stride_bn)

            a = tl.load(a_ptrs, mask=offs_k_for_mask[None, :] < K - ki * BLOCK_SIZE_K, other=0.0)
            b = tl.load(b_ptrs, mask=offs_k_for_mask[:, None] < K - ki * BLOCK_SIZE_K, other=0.0)
            accumulator = tl.dot(a, b, accumulator)

        tile_id_c += NUM_SMS
        pid_m, pid_n = _compute_pid(tile_id_c, num_pid_in_group, num_pid_m, GROUP_SIZE_M, NUM_SMS)
        offs_cm = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
        offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
        if C_LARGE:
            offs_cm = offs_cm.to(tl.int64)
            offs_cn = offs_cn.to(tl.int64)
        c_ptrs = c_ptr + stride_cm * offs_cm[:, None] + stride_cn * offs_cn[None, :]
        c_mask = (offs_cm[:, None] < M) & (offs_cn[None, :] < N)
        if HAS_BIAS:
            bias_ptrs = bias_ptr + offs_cn
            bias = tl.load(bias_ptrs, mask=offs_cn < N, other=0.0).to(tl.float32)
            accumulator += bias
        c = accumulator.to(c_ptr.dtype.element_ty)
        tl.store(c_ptrs, c, mask=c_mask)


def get_compute_units():
    """
    Returns the number of streaming multiprocessors (SMs) or equivalent compute units
    for the available accelerator. Assigns the value to NUM_SMS.
    """
    NUM_SMS = None
    device_type = getattr(torch.accelerator.current_accelerator(), "type", "cpu")

    # Use match/case for device-specific logic (Python 3.10+)
    match device_type:
        case "cuda":
            device_properties = torch.cuda.get_device_properties(0)
            NUM_SMS = device_properties.multi_processor_count
        case "xpu":
            device_properties = torch.xpu.get_device_properties(0)
            NUM_SMS = device_properties.max_compute_units
        case _:
            _LOGGER.warning("No CUDA or XPU device available. Using CPU.")
            # For CPU, you might want to use the number of CPU cores
            NUM_SMS = torch.get_num_threads()

    return NUM_SMS


def matmul_persistent(a: torch.Tensor, b: torch.Tensor, bias: torch.Tensor | None = None):
    """Persistent matmul kernel used by batch-invariant GEMM."""
    # Check constraints.
    assert a.shape[1] == b.shape[0], "Incompatible dimensions"
    assert a.dtype == b.dtype, "Incompatible dtypes"
    assert (
        bias is None or bias.dim() == 1
    ), "Currently assuming bias is 1D, let Horace know if you run into this"

    NUM_SMS = get_compute_units()
    M, K = a.shape
    K, N = b.shape
    dtype = a.dtype
    # Allocates output.
    c = torch.empty((M, N), device=a.device, dtype=dtype)

    # 1D launch kernel where each block gets its own program.
    def grid(META):
        blocks_m = triton.cdiv(M, META["BLOCK_SIZE_M"])
        blocks_n = triton.cdiv(N, META["BLOCK_SIZE_N"])
        return (min(NUM_SMS, blocks_m * blocks_n),)

    configs = {
        torch.bfloat16: {
            "BLOCK_SIZE_M": 128,
            "BLOCK_SIZE_N": 128,
            "BLOCK_SIZE_K": 64,
            "GROUP_SIZE_M": 8,
            "num_stages": 3,
            "num_warps": 8,
        },
        torch.float16: {
            "BLOCK_SIZE_M": 128,
            "BLOCK_SIZE_N": 256,
            "BLOCK_SIZE_K": 64,
            "GROUP_SIZE_M": 8,
            "num_stages": 3,
            "num_warps": 8,
        },
        torch.float32: {
            "BLOCK_SIZE_M": 128,
            "BLOCK_SIZE_N": 128,
            "BLOCK_SIZE_K": 32,
            "GROUP_SIZE_M": 8,
            "num_stages": 3,
            "num_warps": 8,
        },
    }
    matmul_kernel_persistent[grid](
        a,
        b,
        c,  #
        bias,
        M,
        N,
        K,  #
        a.stride(0),
        a.stride(1),  #
        b.stride(0),
        b.stride(1),  #
        c.stride(0),
        c.stride(1),  #
        NUM_SMS=NUM_SMS,  #
        A_LARGE=a.numel() > 2**31,
        B_LARGE=b.numel() > 2**31,
        C_LARGE=c.numel() > 2**31,
        HAS_BIAS=bias is not None,
        **configs[dtype],
    )
    return c


@triton.jit
def _log_softmax_kernel(
    input_ptr, output_ptr, input_row_stride, output_row_stride, n_cols, BLOCK_SIZE: tl.constexpr
):
    """
    Compute log_softmax along the last dimension of a 2D tensor.
    Each block handles one row of the input tensor.
    """
    # Get the row index for this block
    row_idx = tl.program_id(0).to(tl.int64)

    # Compute base pointers for input and output rows
    row_start_ptr = input_ptr + row_idx * input_row_stride
    output_row_start_ptr = output_ptr + row_idx * output_row_stride

    # Step 1: Find maximum value in the row for numerical stability
    max_val = -float("inf")
    for col_offset in range(0, n_cols, BLOCK_SIZE):
        col_idx = col_offset + tl.arange(0, BLOCK_SIZE)
        mask = col_idx < n_cols

        # Load values
        vals = tl.load(row_start_ptr + col_idx, mask=mask, other=-float("inf"))

        # Update maximum
        max_val = tl.max(tl.maximum(vals, max_val))

    # Step 2: Compute sum of exp(x - max_val)
    sum_exp = 0.0
    for col_offset in range(0, n_cols, BLOCK_SIZE):
        col_idx = col_offset + tl.arange(0, BLOCK_SIZE)
        mask = col_idx < n_cols

        # Load values
        vals = tl.load(row_start_ptr + col_idx, mask=mask, other=0.0)

        # Compute exp(x - max_val) and accumulate
        exp_vals = tl.exp(vals - max_val)
        sum_exp += tl.sum(tl.where(mask, exp_vals, 0.0))

    # Compute log(sum_exp)
    log_sum_exp = tl.log(sum_exp)

    # Step 3: Compute final log_softmax values: x - max_val - log_sum_exp
    for col_offset in range(0, n_cols, BLOCK_SIZE):
        col_idx = col_offset + tl.arange(0, BLOCK_SIZE)
        mask = col_idx < n_cols

        # Load values
        vals = tl.load(row_start_ptr + col_idx, mask=mask)

        # Compute log_softmax
        output = vals - max_val - log_sum_exp

        # Store results
        tl.store(output_row_start_ptr + col_idx, output, mask=mask)


def log_softmax(input: torch.Tensor, dim: int = -1) -> torch.Tensor:
    """
    Compute log_softmax using Triton kernel.

    Args:
        input: Input tensor
        dim: Dimension along which to compute log_softmax (only -1 or last dim supported)
    >> Stashed changes
    Returns:
        Tensor with log_softmax applied along the specified dimension
    """
    if dim != -1 and dim != input.ndim - 1:
        raise ValueError("This implementation only supports log_softmax along the last dimension")
    # Flatten all dimensions except the last one
    original_shape = input.shape
    input_2d = input.reshape(-1, input.shape[-1])
    input_2d = input_2d.contiguous()

    n_rows, n_cols = input_2d.shape

    # Allocate output tensor
    output = torch.empty_like(input_2d)

    # Choose block size based on the number of columns
    BLOCK_SIZE = 1024

    # Launch kernel with one block per row
    grid = (n_rows,)
    _log_softmax_kernel[grid](
        input_2d, output, input_2d.stride(0), output.stride(0), n_cols, BLOCK_SIZE=BLOCK_SIZE
    )
    # Reshape output back to original shape
    return output.reshape(original_shape)


@triton.jit
def mean_kernel(
    input_ptr,
    output_ptr,
    input_stride0,
    input_stride1,
    input_stride2,
    output_stride0,
    output_stride1,
    M,  # size before reduction dim
    N,  # size of reduction dim
    K,  # size after reduction dim
    BLOCK_SIZE: tl.constexpr,
):
    """
    Kernel for computing mean along a single dimension.
    Input is viewed as (M, N, K) where N is the dimension being reduced.
    """
    # Program ID gives us which output element we're computing
    pid = tl.program_id(0)

    # Compute output indices
    m_idx = pid // K
    k_idx = pid % K

    # Bounds check
    if m_idx >= M or k_idx >= K:
        return

    # Accumulate sum across reduction dimension
    acc = 0.0
    for n_start in range(0, N, BLOCK_SIZE):
        n_offsets = n_start + tl.arange(0, BLOCK_SIZE)
        mask = n_offsets < N

        # Calculate input indices
        input_idx = m_idx * input_stride0 + n_offsets * input_stride1 + k_idx * input_stride2

        # Load and accumulate
        vals = tl.load(input_ptr + input_idx, mask=mask, other=0.0)
        acc += tl.sum(vals)

    # Compute mean and store
    mean_val = acc / N
    output_idx = m_idx * output_stride0 + k_idx * output_stride1
    tl.store(output_ptr + output_idx, mean_val)


def mean_dim(
    input: torch.Tensor, dim: int, keepdim: bool = False, dtype: torch.dtype | None = None
) -> torch.Tensor:
    """
    Triton implementation of torch.mean with single dimension reduction.

    Args:
        input: Input tensor
        dim: Single dimension along which to compute mean
        keepdim: Whether to keep the reduced dimension
        dtype: Output dtype. If None, uses input dtype (or float32 for integer inputs)

    Returns:
        Tensor with mean values along specified dimension
    """
    # Validate inputs
    assert input.is_cuda, "Input must be a CUDA tensor"
    assert (
        -input.ndim <= dim < input.ndim
    ), f"Invalid dimension {dim} for tensor with {input.ndim} dimensions"

    # Handle negative dim
    if dim < 0:
        dim = dim + input.ndim

    # Handle dtype
    if dtype is None:
        if input.dtype in [torch.int8, torch.int16, torch.int32, torch.int64]:
            dtype = torch.float32
        else:
            dtype = input.dtype

    # Convert input to appropriate dtype if needed
    if input.dtype != dtype:
        input = input.to(dtype)

    # Get input shape and strides
    shape = list(input.shape)

    # Calculate dimensions for kernel
    M = 1
    for i in range(dim):
        M *= shape[i]

    N = shape[dim]

    K = 1
    for i in range(dim + 1, len(shape)):
        K *= shape[i]

    # Reshape input to 3D view (M, N, K)
    input_3d = input.reshape(M, N, K)

    # Create output shape
    if keepdim:
        output_shape = shape.copy()
        output_shape[dim] = 1
    else:
        output_shape = shape[:dim] + shape[dim + 1 :]

    # Create output tensor
    output = torch.empty(output_shape, dtype=dtype, device=input.device)

    # Reshape output for kernel
    if keepdim:
        output_2d = output.reshape(M, 1, K).squeeze(1)
    else:
        output_2d = output.reshape(M, K)

    # Launch kernel
    grid = (M * K,)
    BLOCK_SIZE = 1024

    mean_kernel[grid](
        input_3d,
        output_2d,
        input_3d.stride(0),
        input_3d.stride(1),
        input_3d.stride(2),
        output_2d.stride(0),
        output_2d.stride(1) if output_2d.ndim > 1 else 0,
        M,
        N,
        K,
        BLOCK_SIZE,
    )

    return output


def mm_batch_invariant(a, b):
    """Batch-invariant replacement for `aten::mm` using a persistent matmul kernel."""
    return matmul_persistent(a, b)


def addmm_batch_invariant(bias, a, b):
    """Batch-invariant replacement for `aten::addmm` using a persistent matmul kernel."""
    return matmul_persistent(a, b, bias=bias)


def _log_softmax_batch_invariant(input, dim, _half_to_float):
    assert not _half_to_float, "not implemented"
    return log_softmax(input, dim=dim)


def mean_batch_invariant(input, dim, keepdim=False, dtype: torch.dtype | None = None):
    """Batch-invariant replacement for `aten::mean.dim` over one or more dimensions."""
    assert dtype is None or dtype == torch.float32, f"unsupported dtype: {dtype}"
    if len(dim) == 1:
        return mean_dim(input, dim[0], keepdim=keepdim)
    else:
        assert input.dtype in {
            torch.float16,
            torch.bfloat16,
            torch.float32,
        }, "only float types supported for now"
        n_elems = 1
        for d in dim:
            n_elems *= input.shape[d]
        return torch.sum(input, dim=dim, keepdim=keepdim, dtype=torch.float32) / n_elems


AttentionBlockSize = namedtuple("AttentionBlockSize", ["block_m", "block_n"])


def get_batch_invariant_attention_block_size() -> AttentionBlockSize:
    """Return the (block_m, block_n) tiling used for batch-invariant attention."""
    return AttentionBlockSize(block_m=16, block_n=16)


_batch_invariant_MODE = False
_batch_invariant_LIB = None
_TE_GENERAL_GEMM_ORIG = None
_TE_RMSNORM_ORIG_FWD = None
_MEG_TE_GENERAL_GEMM_ORIG = None
_TE_RMSNORM_FUNC_ORIGS: Dict[str, Any] = {}
_TE_GEMM_FUNC_ORIGS: Dict[str, Any] = {}


def _import_module_if_available(name: str):
    spec = importlib.util.find_spec(name)
    if spec is None:
        return None
    return importlib.import_module(name)


def _te_patch_for_batch_invariant():
    """Patch Transformer Engine modules to use batch-invariant GEMM and RMSNorm.

    This monkey-patches TE's GEMM and RMSNorm entry points to dispatch to the
    batch-invariant implementations when batch-invariant mode is enabled.
    Safe no-op if TE is unavailable.
    """
    global _TE_GENERAL_GEMM_ORIG, _TE_RMSNORM_ORIG_FWD, _MEG_TE_GENERAL_GEMM_ORIG
    import transformer_engine.pytorch as te
    import transformer_engine.pytorch.cpp_extensions as te_cpp

    # Patch general_gemm once
    if _TE_GENERAL_GEMM_ORIG is None and hasattr(te_cpp, "general_gemm"):
        _TE_GENERAL_GEMM_ORIG = te_cpp.general_gemm
        te_cpp.general_gemm = _te_general_gemm_patched

    # Also patch the symbol imported inside TE's module.linear
    # (from ..cpp_extensions import general_gemm)
    import transformer_engine.pytorch.module.linear as te_linear_mod

    if hasattr(te_linear_mod, "general_gemm"):
        if "module.linear.general_gemm" not in _TE_GEMM_FUNC_ORIGS:
            _TE_GEMM_FUNC_ORIGS["module.linear.general_gemm"] = te_linear_mod.general_gemm
            te_linear_mod.general_gemm = _te_general_gemm_patched

    # Also patch the symbol imported inside TE's module.layernorm_linear
    import transformer_engine.pytorch.module.layernorm_linear as te_layernorm_linear_mod

    if hasattr(te_layernorm_linear_mod, "general_gemm"):
        if "module.layernorm_linear.general_gemm" not in _TE_GEMM_FUNC_ORIGS:
            _TE_GEMM_FUNC_ORIGS["module.layernorm_linear.general_gemm"] = (
                te_layernorm_linear_mod.general_gemm
            )
            te_layernorm_linear_mod.general_gemm = _te_general_gemm_patched

    # Also patch the symbol imported into Megatron's TE wrapper module
    import megatron.core.extensions.transformer_engine as meg_te

    if _MEG_TE_GENERAL_GEMM_ORIG is None and hasattr(meg_te, "general_gemm"):
        _MEG_TE_GENERAL_GEMM_ORIG = meg_te.general_gemm
        meg_te.general_gemm = _te_general_gemm_patched

    # Patch RMSNorm.forward once (class may be on te or te.pytorch)
    rms_cls = getattr(te, "RMSNorm", None)
    if rms_cls is None:
        rms_cls = getattr(te, "pytorch", None)
        rms_cls = getattr(rms_cls, "RMSNorm", None)
    if rms_cls is not None and _TE_RMSNORM_ORIG_FWD is None and hasattr(rms_cls, "forward"):
        _TE_RMSNORM_ORIG_FWD = rms_cls.forward
        rms_cls.forward = _te_rmsnorm_forward_patched

    # Patch TE module-level RMSNorm functions used by fused LayerNormLinear
    import transformer_engine.pytorch.module.layernorm as te_layernorm_mod

    def _make_rmsnorm_patched(orig_func):
        # Module-level helpers (e.g. transformer_engine.pytorch.module.layernorm.rmsnorm)
        # do not go through the RMSNorm class, so we also wrap those functions here.
        def _patched(*args, **kwargs):
            # If batch-invariant mode is off, use original
            if not is_batch_invariant_mode_enabled():
                return orig_func(*args, **kwargs)

            # Extract x, weight, eps from args/kwargs per TE signatures
            x = args[0] if len(args) > 0 else kwargs.get("x")
            weight = args[1] if len(args) > 1 else kwargs.get("weight")
            eps = (args[2] if len(args) > 2 else None) if "eps" not in kwargs else kwargs.get("eps")
            if eps is None:
                eps = 1e-5
            if x is None or weight is None:
                return orig_func(*args, **kwargs)

            y = rmsnorm_batch_invariant(x, weight, float(eps))
            # Match TE behavior: cast output to parameter dtype
            return y.to(weight.dtype)

        return _patched

    for name in ("rmsnorm", "rmsnorm_forward", "rmsnorm_fwd"):
        if hasattr(te_layernorm_mod, name) and name not in _TE_RMSNORM_FUNC_ORIGS:
            orig = getattr(te_layernorm_mod, name)
            _TE_RMSNORM_FUNC_ORIGS[name] = orig
            setattr(te_layernorm_mod, name, _make_rmsnorm_patched(orig))


def _te_unpatch_for_batch_invariant():
    """Restore original Transformer Engine functions if they were patched."""
    global _TE_GENERAL_GEMM_ORIG, _TE_RMSNORM_ORIG_FWD, _MEG_TE_GENERAL_GEMM_ORIG
    te_cpp = _import_module_if_available("transformer_engine.pytorch.cpp_extensions")
    te = _import_module_if_available("transformer_engine.pytorch")
    if te_cpp is None or te is None:
        _TE_GENERAL_GEMM_ORIG = None
        _TE_RMSNORM_ORIG_FWD = None
        _MEG_TE_GENERAL_GEMM_ORIG = None
        return

    if _TE_GENERAL_GEMM_ORIG is not None and hasattr(te_cpp, "general_gemm"):
        te_cpp.general_gemm = _TE_GENERAL_GEMM_ORIG
        _TE_GENERAL_GEMM_ORIG = None

    rms_cls = getattr(te, "RMSNorm", None)
    if rms_cls is None:
        rms_cls = getattr(te, "pytorch", None)
        rms_cls = getattr(rms_cls, "RMSNorm", None)
    if rms_cls is not None and _TE_RMSNORM_ORIG_FWD is not None:
        rms_cls.forward = _TE_RMSNORM_ORIG_FWD
        _TE_RMSNORM_ORIG_FWD = None

    meg_te = _import_module_if_available("megatron.core.extensions.transformer_engine")
    if (
        meg_te is not None
        and _MEG_TE_GENERAL_GEMM_ORIG is not None
        and hasattr(meg_te, "general_gemm")
    ):
        meg_te.general_gemm = _MEG_TE_GENERAL_GEMM_ORIG
        _MEG_TE_GENERAL_GEMM_ORIG = None
    elif meg_te is None:
        _MEG_TE_GENERAL_GEMM_ORIG = None

    # Restore TE module-level RMSNorm functions
    te_layernorm_mod = _import_module_if_available("transformer_engine.pytorch.module.layernorm")
    if te_layernorm_mod is not None:
        for name, orig in list(_TE_RMSNORM_FUNC_ORIGS.items()):
            if hasattr(te_layernorm_mod, name):
                setattr(te_layernorm_mod, name, orig)
            _TE_RMSNORM_FUNC_ORIGS.pop(name, None)
    else:
        _TE_RMSNORM_FUNC_ORIGS.clear()

    # Restore TE module.linear imported symbol for general_gemm if patched
    te_linear_mod = _import_module_if_available("transformer_engine.pytorch.module.linear")
    key = "module.linear.general_gemm"
    if (
        te_linear_mod is not None
        and key in _TE_GEMM_FUNC_ORIGS
        and hasattr(te_linear_mod, "general_gemm")
    ):
        te_linear_mod.general_gemm = _TE_GEMM_FUNC_ORIGS[key]
        _TE_GEMM_FUNC_ORIGS.pop(key, None)
    else:
        _TE_GEMM_FUNC_ORIGS.pop(key, None)

    # Restore TE module.layernorm_linear imported symbol for general_gemm if patched
    te_layernorm_linear_mod = _import_module_if_available(
        "transformer_engine.pytorch.module.layernorm_linear"
    )
    key = "module.layernorm_linear.general_gemm"
    if (
        te_layernorm_linear_mod is not None
        and key in _TE_GEMM_FUNC_ORIGS
        and hasattr(te_layernorm_linear_mod, "general_gemm")
    ):
        te_layernorm_linear_mod.general_gemm = _TE_GEMM_FUNC_ORIGS[key]
        _TE_GEMM_FUNC_ORIGS.pop(key, None)
    else:
        _TE_GEMM_FUNC_ORIGS.pop(key, None)


def _extract_te_gemm_args(args: tuple, kwargs: Dict[str, Any]):
    """Utility to parse TE general_gemm flexible signature.

    Returns (A, B, out_dtype, layout, out, bias, grad).
    """
    A = args[0] if len(args) > 0 else kwargs.get("A")
    B = args[1] if len(args) > 1 else kwargs.get("B")
    out_dtype = kwargs.get("out_dtype")
    layout = kwargs.get("layout", "TN")
    out = kwargs.get("out")
    bias = kwargs.get("bias")
    grad = kwargs.get("grad", False)
    return A, B, out_dtype, layout, out, bias, grad


def _is_supported_dtype_for_bik(t: torch.dtype) -> bool:
    return t in {torch.float16, torch.bfloat16, torch.float32}


class BatchInvariantTEGemmFn(torch.autograd.Function):
    """Autograd function implementing batch-invariant TE GEMM."""

    @staticmethod
    def forward(
        ctx,
        A: torch.Tensor,
        B: torch.Tensor,
        bias: Optional[torch.Tensor],
        out_dtype: Optional[torch.dtype],
        layout: str,
    ):
        """Forward pass computing batch-invariant TE GEMM.

        Respects TE's flexible `layout` semantics, flattens leading dimensions of
        the input as needed, applies optional bias, and casts to `out_dtype`.
        """
        assert isinstance(layout, str) and len(layout) == 2, f"Unsupported layout: {layout}"
        transa = layout[0].upper() == "T"
        transb = layout[1].upper() == "T"

        opA = A.transpose(0, 1).contiguous() if transa else A.contiguous()  # [K, O] or [I, O]
        opB = B.transpose(0, 1).contiguous() if transb else B.contiguous()  # [..., K]

        # Flatten opA to 2D if needed (weight tensors should be 2D, but validate)
        if opA.dim() > 2:
            opA = opA.reshape(-1, opA.shape[-1])
        elif opA.dim() < 2:
            raise ValueError(f"opA has insufficient dimensions: {opA.shape}")
        assert opA.dim() == 2, f"opA must be 2D for matmul_persistent, got shape {opA.shape}"

        # Flatten all leading dims of opB except the last feature dim to match TE behavior
        if opB.dim() >= 2:
            leading_shape = opB.shape[:-1]
            K = opB.shape[-1]
            opB_2d = opB.reshape(-1, K)
        else:
            leading_shape = ()
            opB_2d = opB

        # Perform GEMM: (N_total, K) @ (K, O) -> (N_total, O)
        base_2d = matmul_persistent(opB_2d, opA, bias=None)

        # Reshape back to original leading dims with output features at the end
        out = base_2d.reshape(*leading_shape, base_2d.shape[-1])

        # Add bias after reshaping to match output structure
        if bias is not None:
            out = out + bias

        if out_dtype is not None:
            out = out.to(out_dtype)

        # Save for backward
        ctx.transa = transa
        ctx.transb = transb
        ctx.leading_shape = leading_shape
        ctx.bias_present = bias is not None
        ctx.save_for_backward(A, B)
        return out

    @staticmethod
    def backward(ctx, grad_output: torch.Tensor):
        """Backward pass for batch-invariant TE GEMM.

        Computes gradients w.r.t. A, B, and optional bias while mirroring the
        reshaping/layout logic used in the forward pass.
        """
        A, B = ctx.saved_tensors
        transa = ctx.transa
        transb = ctx.transb
        leading_shape = ctx.leading_shape

        # Reconstruct opA/opB for gradients
        opA = A.transpose(0, 1).contiguous() if transa else A  # [K, O]
        opB = B.transpose(0, 1).contiguous() if transb else B  # [..., K]

        # Flatten grad_output to 2D to mirror forward flatten
        if grad_output.dim() >= 2 and isinstance(leading_shape, tuple) and len(leading_shape) > 0:
            N_total = 1
            for s in leading_shape:
                N_total *= s
            grad_out_2d = grad_output.reshape(N_total, grad_output.shape[-1])
        else:
            grad_out_2d = grad_output

        # Y = B_flat @ A -> dB_flat = dY @ A^T ; dA = B_flat^T @ dY
        d_opB_2d = grad_out_2d.matmul(opA.transpose(0, 1).contiguous())
        d_opA = opB.reshape(-1, opB.shape[-1]).transpose(0, 1).contiguous().matmul(grad_out_2d)

        # Reshape d_opB back to original opB shape
        d_opB = (
            d_opB_2d.reshape(*leading_shape, d_opB_2d.shape[-1])
            if grad_output.dim() >= 2
            else d_opB_2d
        )

        # Map back to dA, dB based on trans flags
        if transa:
            dA = d_opA.transpose(0, 1).contiguous()
        else:
            dA = d_opA

        if transb:
            dB = d_opB.transpose(0, 1).contiguous()
        else:
            dB = d_opB

        # Bias grad along last dimension of Y, if bias was added in forward
        if ctx.bias_present:
            dbias = grad_output.reshape(-1, grad_output.shape[-1]).sum(dim=0)
        else:
            dbias = None

        return dA, dB, dbias, None, None


def _te_general_gemm_patched(*args, **kwargs) -> List[torch.Tensor]:
    """
    Batch-invariant replacement for TE general_gemm.
    Returns a list of tensors to match TE's API: (gemm_out, bias_grad, gelu_input, extra_output)
    """
    global _TE_GENERAL_GEMM_ORIG
    # If original not captured, do nothing
    if _TE_GENERAL_GEMM_ORIG is None:
        raise RuntimeError("TE general_gemm original not captured; patching order issue")

    A, B, out_dtype, layout, out, bias, grad = _extract_te_gemm_args(args, kwargs)
    extra_output = kwargs.get("extra_output", None)
    ub = kwargs.get("ub", None)
    ub_type = kwargs.get("ub_type", None)
    bulk_overlap = kwargs.get("bulk_overlap", False)

    # Guardrails: validate inputs
    if A is None or B is None:
        raise ValueError("Batch-invariant GEMM requires A and B tensors.")
    if (not A.is_cuda) or (not B.is_cuda):
        raise RuntimeError("Batch-invariant GEMM requires CUDA tensors.")
    if not _is_supported_dtype_for_bik(A.dtype) or not _is_supported_dtype_for_bik(B.dtype):
        raise RuntimeError(f"Unsupported dtype for batch-invariant GEMM: {A.dtype}, {B.dtype}")

    # Disallow GEMM-comm overlap in batch-invariant mode
    if extra_output is not None or ub is not None or ub_type is not None or bulk_overlap:
        raise RuntimeError(
            "Batch-invariant GEMM does not support Userbuffers/overlap "
            "(extra_output/ub/ub_type/bulk_overlap)."
        )

    # Compute via autograd-aware function matching TE's layout semantics
    result = BatchInvariantTEGemmFn.apply(A, B, bias if not grad else None, out_dtype, layout)

    bias_grad = None
    if grad and bias is not None:
        # Flatten B to 2D and sum over batch/sequence dimension (first dim)
        B_flat = B.reshape(-1, B.shape[-1]) if B.dim() > 2 else B
        bias_grad = B_flat.sum(dim=0)  # Sum over batch/sequence, keeping output dim

    if out is not None:
        out.copy_(result)
        # TE expects (gemm_out, bias_grad, gelu_input, extra_output)
        return (out, bias_grad, None, extra_output)
    return (result, bias_grad, None, extra_output)


class BatchInvariantRMSNormFn(torch.autograd.Function):
    """Autograd function implementing batch-invariant RMSNorm."""

    @staticmethod
    def forward(ctx, x: torch.Tensor, weight: torch.Tensor, eps: float, zero_centered_gamma: bool):
        """Forward pass for batch-invariant RMSNorm.

        Normalizes `x` using an RMSNorm-style statistic computed via `mean_dim`,
        applies affine `weight`, and stores intermediate rsigma for backward.
        """
        if not x.is_cuda:
            raise RuntimeError("Batch-invariant RMSNorm requires CUDA tensors.")
        if not _is_supported_dtype_for_bik(x.dtype):
            raise RuntimeError(f"Unsupported dtype for batch-invariant RMSNorm: {x.dtype}")
        weight_eff = weight + 1.0 if zero_centered_gamma else weight

        # We do everything in rmsnorm_batch_invariant manually here so that we can
        # save rsigma in full precision for backward to match the TE behavior.
        x_dtype = x.dtype
        x_fp32 = x.float()
        w_fp32 = weight.to(device=x.device, dtype=torch.float32)
        ms = mean_dim(x_fp32 * x_fp32, dim=-1, keepdim=True)
        rsigma = torch.rsqrt(ms + eps)
        out_fp32 = (x_fp32 * rsigma) * w_fp32
        out = out_fp32.to(x_dtype)

        # Save for backward
        ctx.eps = eps
        ctx.zero_centered_gamma = zero_centered_gamma
        ctx.rsigma = rsigma

        ctx.save_for_backward(x, weight, rsigma)
        return out

    @staticmethod
    def backward(ctx, grad_output: torch.Tensor):
        """Backward pass for batch-invariant RMSNorm.

        Computes gradients w.r.t. input and weight while matching TE's fp32
        accumulation and reduction behavior for numerical stability.
        """
        x, weight, rsigma = ctx.saved_tensors
        w_eff = (weight + 1.0) if ctx.zero_centered_gamma else weight

        go_fp32 = grad_output.float()
        x_fp32 = x.float()
        w_fp32 = w_eff.to(device=x.device, dtype=torch.float32)
        r = rsigma
        r3 = r * r * r
        D = x.shape[-1]

        red_dims = tuple(range(0, go_fp32.ndim - 1))
        g_w = (go_fp32 * x_fp32 * r).sum(dim=red_dims).to(weight.dtype)

        s = (go_fp32 * x_fp32 * w_fp32).sum(dim=-1, keepdim=True)
        dx = go_fp32 * (w_fp32 * r) - (w_fp32 * r3) * (s * x_fp32) / D
        dx = dx.to(x.dtype)

        return dx, g_w, None, None


def rmsnorm_batch_invariant(x: torch.Tensor, weight: torch.Tensor, eps: float) -> torch.Tensor:
    """Batch-invariant RMSNorm wrapper that delegates to autograd-aware implementation.

    This provides a simple functional interface while using the optimized BatchInvariantRMSNormFn
    which has better numerics (fp32 precision in forward/backward).
    """
    # Delegate to the autograd function with zero_centered_gamma=False (standard RMSNorm)
    return BatchInvariantRMSNormFn.apply(x, weight, eps, False)


def _te_rmsnorm_forward_patched(self, x: torch.Tensor) -> torch.Tensor:
    """Patched TE RMSNorm.forward that routes to batch-invariant
    implementation with autograd support.
    """
    weight = getattr(self, "weight", None)
    if weight is None:
        raise RuntimeError("Batch-invariant RMSNorm requires affine weight.")
    eps = getattr(self, "eps", 1e-5)
    zero_centered_gamma = getattr(self, "zero_centered_gamma", False)
    return BatchInvariantRMSNormFn.apply(x, weight, eps, zero_centered_gamma)


def is_batch_invariant_mode_enabled():
    """Return True if global batch-invariant mode is currently enabled."""
    return _batch_invariant_MODE


def enable_batch_invariant_mode():
    """Enable global batch-invariant mode and patch Aten/TE kernels."""
    global _batch_invariant_MODE, _batch_invariant_LIB
    if _batch_invariant_MODE:
        return
    dispatch_key = getattr(torch.accelerator.current_accelerator(), "type", "cpu").upper()
    _batch_invariant_MODE = True
    _batch_invariant_LIB = torch.library.Library("aten", "IMPL")
    _batch_invariant_LIB.impl("aten::mm", mm_batch_invariant, dispatch_key)
    _batch_invariant_LIB.impl("aten::addmm", addmm_batch_invariant, dispatch_key)
    _batch_invariant_LIB.impl("aten::_log_softmax", _log_softmax_batch_invariant, dispatch_key)
    _batch_invariant_LIB.impl("aten::mean.dim", mean_batch_invariant, dispatch_key)
    # Also patch Transformer Engine kernels when available
    _te_patch_for_batch_invariant()


def disable_batch_invariant_mode():
    """Disable global batch-invariant mode and restore original kernels."""
    global _batch_invariant_MODE, _batch_invariant_LIB
    if _batch_invariant_LIB is not None:
        _batch_invariant_LIB._destroy()
    _batch_invariant_MODE = False
    _batch_invariant_LIB = None
    # Restore Transformer Engine kernels if previously patched
    _te_unpatch_for_batch_invariant()


@contextlib.contextmanager
def set_batch_invariant_mode(enabled: bool = True):
    """Context manager to toggle global batch-invariant mode.

    When `enabled` is True, batch-invariant kernels are enabled for the duration of
    the context; when False, they are disabled for the duration. This implementation
    is re-entrant and correctly restores the previous state even under nesting.
    """
    global _batch_invariant_MODE, _batch_invariant_LIB
    # Save the previous on/off state so we can correctly restore it, even under
    # nested usage or when toggling from True->False inside an outer True scope.
    prev_enabled = _batch_invariant_MODE

    # Apply the requested state only if it differs from the current one.
    if enabled and not prev_enabled:
        enable_batch_invariant_mode()
    elif not enabled and prev_enabled:
        disable_batch_invariant_mode()

    try:
        yield
    finally:
        # Restore the previous state. If we turned BIK on at entry, turn it off here.
        # If we turned it off at entry (inside an outer True scope), turn it back on.
        if enabled and not prev_enabled:
            disable_batch_invariant_mode()
        elif not enabled and prev_enabled:
            enable_batch_invariant_mode()


================================================
FILE: megatron/core/transformer/dot_product_attention.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.


import math
from typing import Optional, Tuple

import torch
from torch import Tensor

from megatron.core import parallel_state, tensor_parallel
from megatron.core.dist_checkpointing.mapping import ShardedStateDict
from megatron.core.fusions.fused_softmax import FusedScaleMaskSoftmax
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.utils import (
    attention_mask_func,
    is_layer_window_attention,
    make_sharded_tensors_for_checkpoint,
)
from megatron.core.utils import divide


class DotProductAttention(MegatronModule):
    """
    Region where selective activation recomputation is applied.
    This region is memory intensive but less compute intensive which
    makes activation checkpointing more efficient for LLMs (20B+).
    See Reducing Activation Recomputation in Large Transformer Models:
    https://arxiv.org/abs/2205.05198 for more details.

    We use the following notation:
     h: hidden size
     n: number of attention heads
     p: number of tensor model parallel partitions
     b: batch size
     s: sequence length
    """

    def __init__(
        self,
        config: TransformerConfig,
        layer_number: int,
        attn_mask_type: AttnMaskType,
        attention_type: str,
        attention_dropout: Optional[float] = None,
        softmax_scale: Optional[float] = None,
        cp_comm_type: Optional[str] = None,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ):
        super().__init__(config=config)

        self.config: TransformerConfig = config

        assert (
            self.config.context_parallel_size == 1
        ), "Context parallelism is only supported by TEDotProductAttention!"

        self.layer_number = max(1, layer_number)
        self.attn_mask_type = attn_mask_type
        self.attention_type = attention_type  # unused for now

        projection_size = self.config.kv_channels * self.config.num_attention_heads

        # Per attention head and per partition values.
        if pg_collection is None:
            pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp'])
        else:
            assert hasattr(
                pg_collection, 'tp'
            ), "DotProductAttention pg_collection must have tp process group"
        self.pg_collection = pg_collection
        self.tp_group = self.pg_collection.tp

        world_size = pg_collection.tp.size()
        self.hidden_size_per_partition = divide(projection_size, world_size)
        self.hidden_size_per_attention_head = divide(projection_size, config.num_attention_heads)
        self.num_attention_heads_per_partition = divide(self.config.num_attention_heads, world_size)
        self.num_query_groups_per_partition = divide(self.config.num_query_groups, world_size)

        coeff = None
        if softmax_scale is None:
            self.softmax_scale = 1.0 / math.sqrt(self.hidden_size_per_attention_head)
        else:
            self.softmax_scale = softmax_scale

        if self.config.apply_query_key_layer_scaling:
            coeff = self.layer_number
            self.softmax_scale /= coeff

        if is_layer_window_attention(
            self.config.window_size, self.config.window_attn_skip_freq, layer_number
        ):
            window_size = self.config.window_size
        else:
            window_size = None

        self.scale_mask_softmax = FusedScaleMaskSoftmax(
            input_in_fp16=self.config.fp16,
            input_in_bf16=self.config.bf16,
            attn_mask_type=self.attn_mask_type,
            scaled_masked_softmax_fusion=self.config.masked_softmax_fusion,
            mask_func=attention_mask_func,
            softmax_in_fp32=self.config.attention_softmax_in_fp32,
            scale=coeff,
            window_size=window_size,
        )

        # Dropout. Note that for a single iteration, this layer will generate
        # different outputs on different number of parallel partitions but
        # on average it should not be partition dependent.
        self.attention_dropout = torch.nn.Dropout(
            self.config.attention_dropout if attention_dropout is None else attention_dropout
        )

        if self.config.softmax_type == "vanilla":
            self.softmax_offset = None
        elif self.config.softmax_type == "off-by-one":
            self.softmax_offset = torch.zeros(
                self.num_attention_heads_per_partition,
                device=torch.cuda.current_device(),
                dtype=self.config.params_dtype,
            )
        elif self.config.softmax_type == "learnable":
            self.register_parameter(
                "softmax_offset",
                torch.nn.Parameter(
                    torch.empty(
                        self.num_attention_heads_per_partition,
                        device=torch.cuda.current_device(),
                        dtype=self.config.params_dtype,
                    )
                ),
            )
            if config.perform_initialization:
                self.softmax_offset = config.init_method(self.softmax_offset)
        else:
            raise ValueError("Softmax type not supported")

    def forward(
        self,
        query: Tensor,
        key: Tensor,
        value: Tensor,
        attention_mask: Optional[Tensor],
        attn_mask_type: Optional[AttnMaskType] = None,
        attention_bias: Optional[Tensor] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
    ):
        """Forward."""
        assert packed_seq_params is None, (
            "Packed sequence is not supported by DotProductAttention."
            "Please use TEDotProductAttention instead."
        )
        assert attention_bias is None, "Attention bias is not supported for DotProductAttention."

        # ===================================
        # Raw attention scores. [b, n/p, s, s]
        # ===================================

        # expand the key and value [sk, b, ng, hn] -> [sk, b, np, hn]
        # This is a noop for normal attention where ng == np. When using group query attention this
        # creates a view that has the keys and values virtually repeated along their dimension to
        # match the number of queries.

        # attn_mask_type is not used.
        if self.num_attention_heads_per_partition // self.num_query_groups_per_partition > 1:
            key = key.repeat_interleave(
                self.num_attention_heads_per_partition // self.num_query_groups_per_partition, dim=2
            )
            value = value.repeat_interleave(
                self.num_attention_heads_per_partition // self.num_query_groups_per_partition, dim=2
            )

        # [b, np, sq, sk]
        output_size = (query.size(1), query.size(2), query.size(0), key.size(0))

        # [sq, b, np, hn] -> [sq, b * np, hn]
        # This will be a simple view when doing normal attention, but in group query attention
        # the key and value tensors are repeated to match the queries so you can't use
        # simple strides to extract the queries.
        query = query.reshape(output_size[2], output_size[0] * output_size[1], -1)
        # [sk, b, np, hn] -> [sk, b * np, hn]
        key = key.view(output_size[3], output_size[0] * output_size[1], -1)

        # preallocting input tensor: [b * np, sq, sk]
        matmul_input_buffer = parallel_state.get_global_memory_buffer().get_tensor(
            (output_size[0] * output_size[1], output_size[2], output_size[3]), query.dtype, "mpu"
        )

        # Raw attention scores. [b * np, sq, sk]
        matmul_result = torch.baddbmm(
            matmul_input_buffer,
            query.transpose(0, 1),  # [b * np, sq, hn]
            key.transpose(0, 1).transpose(1, 2),  # [b * np, hn, sk]
            beta=0.0,
            alpha=self.softmax_scale,
        )

        # change view to [b, np, sq, sk]
        attention_scores = matmul_result.view(*output_size)

        # ===========================
        # Attention probs and dropout
        # ===========================

        # attention scores and attention mask [b, np, sq, sk]
        attention_probs: Tensor = self.scale_mask_softmax(
            attention_scores, attention_mask, self.softmax_offset
        )
        # This is actually dropping out entire tokens to attend to, which might
        # seem a bit unusual, but is taken from the original Transformer paper.

        if not self.config.sequence_parallel:
            with tensor_parallel.get_cuda_rng_tracker().fork():
                attention_probs = self.attention_dropout(attention_probs)
        else:
            attention_probs = self.attention_dropout(attention_probs)

        # =========================
        # Context layer. [sq, b, hp]
        # =========================

        # value -> context layer.
        # [sk, b, np, hn] --> [b, np, sq, hn]

        # context layer shape: [b, np, sq, hn]
        output_size = (value.size(1), value.size(2), query.size(0), value.size(3))

        # change view [sk, b * np, hn]
        value = value.view(value.size(0), output_size[0] * output_size[1], -1)

        # change view [b * np, sq, sk]
        attention_probs = attention_probs.view(output_size[0] * output_size[1], output_size[2], -1)

        # matmul: [b * np, sq, hn]
        context = torch.bmm(attention_probs, value.transpose(0, 1))

        # change view [b, np, sq, hn]
        context = context.view(*output_size)

        # [b, np, sq, hn] --> [sq, b, np, hn]
        context = context.permute(2, 0, 1, 3).contiguous()

        # [sq, b, np, hn] --> [sq, b, hp]
        new_context_shape = context.size()[:-2] + (self.hidden_size_per_partition,)
        context = context.view(*new_context_shape)

        return context

    def sharded_state_dict(
        self,
        prefix: str = '',
        sharded_offsets: Tuple[Tuple[int, int, int], ...] = (),
        metadata: Optional[dict] = None,
    ) -> ShardedStateDict:
        """Sharded state dict for the learnable softmax offset parameter"""
        if self.config.softmax_type == "learnable":
            state_dict = self.state_dict(prefix="", keep_vars=True)
        else:
            state_dict = {}
        return make_sharded_tensors_for_checkpoint(
            state_dict,
            prefix,
            {'softmax_offset': 0},
            sharded_offsets,
            tp_group=self.tp_group,
            dp_cp_group=metadata['dp_cp_group'],
        )


================================================
FILE: megatron/core/transformer/enums.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

import enum


# can we get rid of this?
# it's being used in pipeline schedules
class ModelType(enum.Enum):
    """Model Type

    encoder_or_decoder for bert, gpt etc
    """

    encoder_or_decoder = 1


class LayerType(enum.Enum):
    """Layer type
    embedding: embedding layer
    loss: loss layer
    encoder: encoder layer, not implemented yet, expect to be used in MLLM models
    decoder: decoder layer
    mtp: multi-token prediction layer, not implemented yet
    """

    embedding = 1
    loss = 2
    encoder = 3
    decoder = 4
    mtp = 5


class AttnType(enum.Enum):
    """Attention type"""

    self_attn = 1
    cross_attn = 2


class AttnMaskType(enum.Enum):
    """Attention Mask Type"""

    padding = 1
    causal = 2
    no_mask = 3  # only used for TE
    padding_causal = 4  # only used for thd attention
    arbitrary = 5
    causal_bottom_right = 6  # only used for TE


class AttnBackend(enum.Enum):
    """Attention Backend"""

    flash = 1
    fused = 2
    unfused = 3
    local = 4
    auto = 5


class CudaGraphScope(enum.Enum):
    """Cuda Graph Scope - defines which parts of the model to capture."""

    full_iteration = 1  # Captures the entire training iteration
    attn = 2  # Captures attention layers
    mlp = 3  # Captures MLP layers (dense layers only)
    moe = 4  # Captures MoE layers (drop-and-pad MoE layers only)
    moe_router = 5  # Captures MoE router part
    moe_preprocess = 6  # Captures MoE preprocessing part (requires moe_router)
    mamba = 7  # Captures Mamba layers
    full_iteration_inference = 8  # Captures the entire inference iteration


================================================
FILE: megatron/core/transformer/experimental_attention_variant/absorbed_mla.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

"""
Absorbed Multi-Latent Attention implementation.

This module implements MLA with matrix absorption:
- Absorbs K's up projection into Q: Q' = Q @ K_up_proj^T
- Applies V's up projection after core attention
- Core attention operates in MQA form with KV being single-head.

The absorption is mathematically equivalent to standard MLA but enables MQA-style attention which
can be more efficient for certain attention variants.
"""

import math
from dataclasses import dataclass
from typing import NoReturn, Optional, Union

import torch

from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.models.common.embeddings import (
    RotaryEmbedding,
    YarnRotaryEmbedding,
    _yarn_get_mscale,
    apply_rotary_pos_emb,
)
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.tensor_parallel.layers import ColumnParallelLinear
from megatron.core.tensor_parallel.mappings import (
    gather_from_sequence_parallel_region,
    gather_from_tensor_model_parallel_region,
    scatter_to_sequence_parallel_region,
)
from megatron.core.transformer.attention import Attention
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.spec_utils import ModuleSpec, build_module
from megatron.core.transformer.transformer_config import MLATransformerConfig
from megatron.core.utils import deprecate_inference_params, get_pg_size

try:
    from megatron.core.fusions.fused_mla_yarn_rope_apply import (
        fused_apply_mla_rope_for_kv,
        fused_apply_mla_rope_for_q,
    )
except ImportError:
    fused_apply_mla_rope_for_kv = None
    fused_apply_mla_rope_for_q = None

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import (
        TEColumnParallelLinear,
        TELinear,
        set_save_original_input,
    )
    from megatron.core.post_training.modelopt.layers import Linear
else:
    TEColumnParallelLinear, TELinear, Linear, set_save_original_input = None, None, None, None


@dataclass
class AbsorbedMLASelfAttentionSubmodules:
    """
    Configuration class for specifying the submodules of absorbed multi-latent self-attention.
    """

    linear_q_proj: Union[ModuleSpec, type] = None
    linear_q_down_proj: Union[ModuleSpec, type] = None
    linear_q_up_proj: Union[ModuleSpec, type] = None
    linear_kv_down_proj: Union[ModuleSpec, type] = None
    linear_k_up_proj: Union[ModuleSpec, type] = None
    linear_v_up_proj: Union[ModuleSpec, type] = None
    core_attention: Union[ModuleSpec, type] = None
    linear_proj: Union[ModuleSpec, type] = None
    q_layernorm: Union[ModuleSpec, type] = None
    kv_layernorm: Union[ModuleSpec, type] = None


class AbsorbedMLASelfAttention(Attention):
    """Multi-latent self-attention layer with matrix absorption.

    This layer takes input with shape [s, b, h] and returns output of the same shape.

    Compared to standard MLA, this class implements matrix absorption:
      - K's up projection is applied to the query before core attention, not to the compressed KV.
      - V's up projection is applied to the output of core attention, not to the compressed KV.
      - Core attention operates in MQA form with KV being single-head.

    The absorption is mathematically equivalent to standard MLA but enables MQA-style attention
    computation which can be more efficient for certain attention variants.
    """

    def __init__(
        self,
        config: MLATransformerConfig,
        submodules: AbsorbedMLASelfAttentionSubmodules,
        layer_number: int,
        attn_mask_type=AttnMaskType.padding,
        cp_comm_type: Optional[str] = None,
        pg_collection: ProcessGroupCollection = None,
    ):
        if pg_collection is None:
            pg_collection = ProcessGroupCollection.use_mpu_process_groups()

        super().__init__(
            config=config,
            submodules=submodules,
            layer_number=layer_number,
            attn_mask_type=attn_mask_type,
            attention_type="self",
            pg_collection=pg_collection,
        )

        assert not config.add_bias_linear, "add_bias_linear is not supported for AbsorbedMLA"

        self.query_projection_size = self.config.v_head_dim * self.config.num_attention_heads
        self.q_head_dim = self.config.qk_head_dim + self.config.qk_pos_emb_head_dim

        # Inference is currently not supported.
        self.key_hidden_size = None
        self.val_hidden_size = None

        self.recompute_up_proj = (
            self.config.recompute_granularity == 'selective'
            and "mla_up_proj" in self.config.recompute_modules
        )
        self.qkv_up_checkpoint = None

        mscale = _yarn_get_mscale(self.config.rotary_scaling_factor, self.config.mscale_all_dim)
        self.softmax_scale = mscale * mscale / math.sqrt(self.q_head_dim)
        self.cache_mla_latents = self.config.cache_mla_latents
        assert not self.cache_mla_latents, "cache_mla_latents is not supported for AbsorbedMLA"

        if self.config.rope_type == "rope":
            self.rotary_pos_emb = RotaryEmbedding(
                self.config.qk_pos_emb_head_dim,
                rotary_percent=self.config.rotary_percent,
                rotary_base=self.config.rotary_base,
                cp_group=self.pg_collection.cp,
            )
        elif self.config.rope_type == "yarn":
            self.rotary_pos_emb = YarnRotaryEmbedding(
                self.config.qk_pos_emb_head_dim,
                rotary_base=self.config.rotary_base,
                scaling_factor=self.config.rotary_scaling_factor,
                original_max_position_embeddings=self.config.original_max_position_embeddings,
                beta_fast=self.config.beta_fast,
                beta_slow=self.config.beta_slow,
                mscale=self.config.mscale,
                mscale_all_dim=self.config.mscale_all_dim,
                cp_group=self.pg_collection.cp,
            )
        else:
            raise ValueError(
                f"Unsupported RoPE type: {self.config.rope_type}, supported types are "
                "'rope' and 'yarn'"
            )

        self.core_attention = build_module(
            submodules.core_attention,
            config=self.config,
            layer_number=self.layer_number,
            attn_mask_type=self.attn_mask_type,
            attention_type="self",
            softmax_scale=self.softmax_scale,
            k_channels=self.config.kv_lora_rank + self.config.qk_pos_emb_head_dim,
            v_channels=self.config.kv_lora_rank,
            cp_comm_type=cp_comm_type,
            pg_collection=self.pg_collection,
        )

        # Output.
        self.linear_proj = build_module(
            submodules.linear_proj,
            self.query_projection_size,
            self.config.hidden_size,
            config=self.config,
            init_method=self.config.output_layer_init_method,
            bias=self.config.add_bias_linear,
            input_is_parallel=True,
            skip_bias_add=True,
            is_expert=False,
            tp_comm_buffer_name='proj',
            tp_group=self.pg_collection.tp,
        )

        if (
            HAVE_TE
            and isinstance(self.linear_proj, TELinear)
            and (
                (
                    self.config.fp8
                    and self.config.fp8_recipe != 'delayed'
                    and is_te_min_version("2.6.0dev0")
                )
                or (self.config.fp4 and is_te_min_version("2.7.0.dev0"))
            )
        ):
            # For fp8/fp4 training, the output of the fused core_attn is saved by itself, and
            # linear_proj also saves the quantized tensor of this output. Here we set the
            # linear_proj to save the original input tensors to avoid the extra memory usage of
            # the quantized tensor.
            set_save_original_input(self.linear_proj)

        if self.config.q_lora_rank is None:
            # Not projecting query
            self.linear_q_proj = build_module(
                submodules.linear_q_proj,
                self.config.hidden_size,
                self.config.num_attention_heads * self.q_head_dim,
                config=self.config,
                init_method=self.config.init_method,
                gather_output=False,
                bias=False,
                skip_bias_add=False,
                is_expert=False,
                tp_comm_buffer_name='q_proj',
            )
        else:
            q_down_proj_kwargs = {}
            if submodules.linear_q_down_proj in [TELinear]:
                q_down_proj_kwargs['parallel_mode'] = 'duplicated'
            elif submodules.linear_q_down_proj in [
                Linear,
                TEColumnParallelLinear,
                ColumnParallelLinear,
            ]:
                q_down_proj_kwargs['gather_output'] = False
            else:
                raise ValueError(f"Unsupported linear_q_down_proj: {submodules.linear_q_down_proj}")

            self.linear_q_down_proj = build_module(
                submodules.linear_q_down_proj,
                self.config.hidden_size,
                self.config.q_lora_rank,
                config=self.config,
                init_method=self.config.init_method,
                bias=False,
                skip_bias_add=False,
                is_expert=False,
                tp_comm_buffer_name='q_down_proj',
                skip_weight_param_allocation=False,
                tp_group=(
                    pg_collection.tp
                    if q_down_proj_kwargs.get('parallel_mode') != 'duplicated'
                    else None
                ),
                **q_down_proj_kwargs,
            )

            self.linear_q_up_proj = build_module(
                submodules.linear_q_up_proj,
                self.config.q_lora_rank,
                self.config.num_attention_heads * self.q_head_dim,
                config=self.config,
                init_method=self.config.init_method,
                gather_output=False,
                bias=False,
                skip_bias_add=False,
                is_expert=False,
                tp_comm_buffer_name='q_up_proj',
                tp_group=pg_collection.tp,
            )

        kv_down_proj_kwargs = {}
        if submodules.linear_kv_down_proj in [TELinear]:
            kv_down_proj_kwargs['parallel_mode'] = 'duplicated'
        elif submodules.linear_kv_down_proj in [
            Linear,
            TEColumnParallelLinear,
            ColumnParallelLinear,
        ]:
            kv_down_proj_kwargs['gather_output'] = False
        else:
            raise ValueError(f"Unsupported linear_kv_down_proj: {submodules.linear_kv_down_proj}")

        self.linear_kv_down_proj = build_module(
            submodules.linear_kv_down_proj,
            self.config.hidden_size,
            self.config.kv_lora_rank + self.config.qk_pos_emb_head_dim,
            config=self.config,
            init_method=self.config.init_method,
            bias=False,
            skip_bias_add=False,
            is_expert=False,
            tp_comm_buffer_name='kv_down_proj',
            skip_weight_param_allocation=False,
            tp_group=(
                pg_collection.tp
                if kv_down_proj_kwargs.get('parallel_mode') != 'duplicated'
                else None
            ),
            **kv_down_proj_kwargs,
        )

        # Build separate K and V up projections
        self.linear_k_up_proj = build_module(
            submodules.linear_k_up_proj,
            self.config.kv_lora_rank,
            self.config.num_attention_heads * self.config.qk_head_dim,
            config=self.config,
            init_method=self.config.init_method,
            gather_output=False,
            bias=False,
            skip_bias_add=False,
            is_expert=False,
            tp_comm_buffer_name='k_up_proj',
            tp_group=pg_collection.tp,
        )
        self.linear_v_up_proj = build_module(
            submodules.linear_v_up_proj,
            self.config.kv_lora_rank,
            self.config.num_attention_heads * self.config.v_head_dim,
            config=self.config,
            init_method=self.config.init_method,
            gather_output=False,
            bias=False,
            skip_bias_add=False,
            is_expert=False,
            tp_comm_buffer_name='v_up_proj',
            tp_group=pg_collection.tp,
        )

        if self.config.q_lora_rank is not None:
            self.q_layernorm = build_module(
                submodules.q_layernorm,
                hidden_size=self.config.q_lora_rank,
                config=self.config,
                eps=self.config.layernorm_epsilon,
            )

        self.kv_layernorm = build_module(
            submodules.kv_layernorm,
            hidden_size=self.config.kv_lora_rank,
            config=self.config,
            eps=self.config.layernorm_epsilon,
        )

    def get_query_key_value_tensors(
        self,
        hidden_states,
        key_value_states=None,
        packed_seq_params=None,
        inference_context=None,
        *,
        inference_params=None,
    ):
        """
        Derives absorbed q, compressed q, and compressed kv tensors from `hidden_states`.
        """
        # s = sequence length, b = batch size, h = hidden size
        assert (
            hidden_states.ndim == 3
        ), f"hidden_states should be 3D, [s, b, h], got {hidden_states.ndim}D"
        if packed_seq_params is not None:
            assert (
                packed_seq_params.local_cp_size is None
            ), "dynamic context parallel is not supported with MLA yet and is planned for future. \
            Please disable dynamic context parallel."

        inference_context = deprecate_inference_params(inference_context, inference_params)

        # =========================================
        # Prepare RoPE and seqlen related params
        # =========================================
        rotary_seq_len = self.rotary_pos_emb.get_rotary_seq_len(
            inference_context, None, hidden_states, self.config, packed_seq_params
        )

        mscale = 1.0
        rotary_pos_cos = None
        rotary_pos_sin = None
        packed_seq = packed_seq_params is not None and packed_seq_params.qkv_format == 'thd'
        if self.config.rope_type == "rope":
            rotary_pos_emb = self.rotary_pos_emb(rotary_seq_len, packed_seq=packed_seq)
        else:
            if self.config.apply_rope_fusion:
                rotary_pos_cos, rotary_pos_sin = self.rotary_pos_emb.get_cached_cos_sin(
                    rotary_seq_len, dtype=hidden_states.dtype, packed_seq=packed_seq
                )
                rotary_pos_emb = None
                assert inference_context is None, "Inference with MLA RoPE fusion is not supported"
                assert (
                    fused_apply_mla_rope_for_q is not None
                    and fused_apply_mla_rope_for_kv is not None
                ), "Fused MLA RoPE apply is not imported successfully"
            else:
                rotary_pos_emb, mscale = self.rotary_pos_emb(rotary_seq_len, packed_seq=packed_seq)

        if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
            if packed_seq_params.cu_seqlens_q_padded is not None:
                cu_seqlens_q = packed_seq_params.cu_seqlens_q_padded
            else:
                cu_seqlens_q = packed_seq_params.cu_seqlens_q
            if packed_seq_params.cu_seqlens_kv_padded is not None:
                cu_seqlens_kv = packed_seq_params.cu_seqlens_kv_padded
            else:
                cu_seqlens_kv = packed_seq_params.cu_seqlens_kv
        else:
            cu_seqlens_q = cu_seqlens_kv = None

        # =========================================
        # Q down projection
        # =========================================
        if self.config.q_lora_rank is not None:
            # if linear_q_down_proj is ColumnParallelLinear:
            #     q_compressed: [s, b, q_lora_rank / TP]
            # elif linear_q_down_proj is Linear:
            #     q_compressed: [s / TP, b, q_lora_rank]
            q_compressed, _ = self.linear_q_down_proj(hidden_states)

            # When output is sharded (ColumnParallelLinear), two things are needed to be
            # identical to a normal Linear.
            #   1. Manually gather output to restore output dim q_lora_rank;
            #   2. Scatter sequence back to s / TP if sequence-parallel since it was
            #      gathered by ColumnParallelLinear.
            if q_compressed.size(-1) != self.config.q_lora_rank:
                q_compressed = gather_from_tensor_model_parallel_region(q_compressed)
                if self.config.sequence_parallel:
                    q_compressed = scatter_to_sequence_parallel_region(q_compressed)
        else:
            q_compressed = hidden_states

        # =========================================
        # KV down projection
        # =========================================
        # if linear_kv_down_proj is ColumnParallelLinear:
        #     kv_combined: [s, b, (kv_lora_rank + qk_pos_emb_head_dim) / TP]
        # elif linear_kv_down_proj is Linear:
        #     kv_combined: [s / TP, b, (kv_lora_rank + qk_pos_emb_head_dim)]
        kv_combined, _ = self.linear_kv_down_proj(hidden_states)
        if kv_combined.size(-1) != self.config.kv_lora_rank + self.config.qk_pos_emb_head_dim:
            # kv_combined: [s, b, (kv_lora_rank + qk_pos_emb_head_dim)]
            kv_combined = gather_from_tensor_model_parallel_region(kv_combined)
            # kv_compressed:[s, b, kv_lora_rank], k_pos_emb: [s, b, qk_pos_emb_head_dim]
            kv_compressed, k_pos_emb = torch.split(
                kv_combined, [self.config.kv_lora_rank, self.config.qk_pos_emb_head_dim], dim=-1
            )
            if self.config.sequence_parallel:
                # kv_compressed:[s / TP, b, kv_lora_rank]
                kv_compressed = scatter_to_sequence_parallel_region(kv_compressed)
        else:
            # kv_compressed:[s / TP, b, kv_lora_rank], k_pos_emb: [s / TP, b, qk_pos_emb_head_dim]
            kv_compressed, k_pos_emb = torch.split(
                kv_combined, [self.config.kv_lora_rank, self.config.qk_pos_emb_head_dim], dim=-1
            )
            if get_pg_size(self.tp_group) > 1 and self.config.sequence_parallel:
                # k_pos_emb: [s, b, qk_pos_emb_head_dim]
                k_pos_emb = gather_from_sequence_parallel_region(k_pos_emb, group=self.tp_group)

        if packed_seq_params is not None:
            assert q_compressed.ndim == 3 and q_compressed.size(1) == 1
            assert kv_compressed.ndim == 3 and kv_compressed.size(1) == 1
            assert k_pos_emb.ndim == 3 and k_pos_emb.size(1) == 1
            # If sequence packing, TE expect [t, h, d] shaped qkv input.
            # In Megatron-Core, the qkv shape is [t, 1, h, d].
            # So we need to reshape qkv from [t, 1, h, d] to [t, h, d].
            q_compressed = q_compressed.squeeze(1)
            kv_compressed = kv_compressed.squeeze(1)
            k_pos_emb = k_pos_emb.squeeze(1)

        # =========================================
        # Apply norm
        # =========================================
        if self.config.q_lora_rank is not None:
            # q_compressed: [num_tokens, q_lora_rank]
            q_compressed = self.q_layernorm(q_compressed)

        kv_compressed = self.kv_layernorm(kv_compressed)
        # Because we won't apply V up projection to the compressed KV, so we need to gather it
        # manually.
        if get_pg_size(self.tp_group) > 1 and self.config.sequence_parallel:
            kv_compressed = gather_from_sequence_parallel_region(kv_compressed, group=self.tp_group)

        # =========================================
        # QKV up projection and RoPE apply
        # =========================================

        def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_pos_emb):
            """
            Apply the up projection and RoPE to the query and key.
            When sequence packing enabled, the input tensors adopt a packed shape of [t, ...];
            otherwise, they maintain the unpacked shape [s, b, ...]. In subsequent code comments,
            we uniformly use [num_tokens, ...] to denote [s, b, ...] or [t, ...] for two cases.
            """
            if self.config.q_lora_rank is not None:
                # q_compressed: [num_tokens, q_lora_rank]
                # q: [num_tokens, n * (qk_head_dim + qk_pos_emb_head_dim)]
                q, _ = self.linear_q_up_proj(q_compressed)
            else:
                # q_compressed: [num_tokens, hidden_size]
                # q: [num_tokens, n * (qk_head_dim + qk_pos_emb_head_dim)]
                q, _ = self.linear_q_proj(q_compressed)

            # q: [num_tokens, n, q_head_dim]
            q = q.view(*q.size()[:-1], self.num_attention_heads_per_partition, self.q_head_dim)

            # [num_tokens, kv_lora_rank] -> [num_tokens, 1, kv_lora_rank]
            kv_compressed = torch.unsqueeze(kv_compressed, -2)
            # [num_tokens, qk_pos_emb_head_dim] -> [num_tokens, 1, qk_pos_emb_head_dim]
            k_pos_emb = torch.unsqueeze(k_pos_emb, -2)

            # Prepare k_up_weight for absorption
            # k_up_weight: linear_k_up_proj.weight viewed as [n, qk_head_dim, kv_lora_rank]
            assert self.linear_k_up_proj.weight.size(0) == (
                self.num_attention_heads_per_partition * self.config.qk_head_dim
            )
            assert self.linear_k_up_proj.weight.size(1) == self.config.kv_lora_rank
            k_up_weight = self.linear_k_up_proj.weight.view(
                self.num_attention_heads_per_partition,
                self.config.qk_head_dim,
                self.config.kv_lora_rank,
            )

            if self.config.apply_rope_fusion:
                # q_no_pe: [num_tokens, n, qk_head_dim]
                # q_pos_emb: [num_tokens, n, qk_pos_emb_head_dim]
                q_no_pe, q_pos_emb = torch.split(
                    q, [self.config.qk_head_dim, self.config.qk_pos_emb_head_dim], dim=-1
                )

                # Absorb k_up_weight into q_no_pe
                # q_absorbed: [num_tokens, n, kv_lora_rank]
                q_absorbed = torch.einsum("...nd,ndk->...nk", q_no_pe, k_up_weight)
                q_absorbed = q_absorbed.contiguous()
                assert q_absorbed.ndim == q.ndim
                assert q_absorbed.shape[:-1] == q.shape[:-1]
                assert q_absorbed.size(-1) == self.config.kv_lora_rank

                # q_absorbed: [num_tokens, n, (kv_lora_rank + qk_pos_emb_head_dim)]
                q_absorbed = torch.cat([q_absorbed, q_pos_emb], dim=-1)
                # kv_compressed: [num_tokens, 1, (kv_lora_rank + qk_pos_emb_head_dim)]
                kv_compressed = torch.cat([kv_compressed, k_pos_emb], dim=-1)

                cp_rank = self.pg_collection.cp.rank()
                cp_size = self.pg_collection.cp.size()
                q_absorbed = fused_apply_mla_rope_for_q(
                    q_absorbed,
                    rotary_pos_cos,
                    rotary_pos_sin,
                    self.config.kv_lora_rank,
                    self.config.qk_pos_emb_head_dim,
                    cu_seqlens_q,
                    cp_rank,
                    cp_size,
                )
                kv_compressed = fused_apply_mla_rope_for_q(
                    kv_compressed,
                    rotary_pos_cos,
                    rotary_pos_sin,
                    self.config.kv_lora_rank,
                    self.config.qk_pos_emb_head_dim,
                    cu_seqlens_kv,
                    cp_rank,
                    cp_size,
                )
            else:
                q_len = q.size()[0]
                if inference_context is not None:
                    # add offset to the sequence start for inference
                    sequence_start = inference_context.sequence_len_offset
                    sequence_end = sequence_start + q_len
                    rotary_pos_emb = rotary_pos_emb[sequence_start:sequence_end]
                elif packed_seq_params is None or self.config.context_parallel_size == 1:
                    # Shorten rotary_pos_emb to the sequence length when inference_params
                    # is not provided. This makes sure we can run forward directly with
                    # any sequence length. During training, the sequence length is always
                    # the full rotary_pos_emb length, except for sequence packing + CP.
                    # When sequence packing and context parallel are both enabled, the
                    # position embedding will not split rotary_pos_emb, so it may exceed
                    # the sequence length on this CP rank, but we need the full rotary_pos_emb
                    # to cover the full sequence, so we do not shorten it here.
                    rotary_pos_emb = rotary_pos_emb[0:q_len]

                # q_no_pe: [num_tokens, n, qk_head_dim]
                # q_pos_emb: [num_tokens, n, qk_pos_emb_head_dim]
                q_no_pe, q_pos_emb = torch.split(
                    q, [self.config.qk_head_dim, self.config.qk_pos_emb_head_dim], dim=-1
                )

                # Absorb k_up_weight into q_no_pe
                # q_absorbed: [num_tokens, n, kv_lora_rank]
                q_absorbed = torch.einsum("...nd,ndk->...nk", q_no_pe, k_up_weight)
                q_absorbed = q_absorbed.contiguous()
                assert q_absorbed.ndim == q.ndim
                assert q_absorbed.shape[:-1] == q.shape[:-1]
                assert q_absorbed.size(-1) == self.config.kv_lora_rank

                # Apply RoPE to q_pos_emb: [num_tokens, n, qk_pos_emb_head_dim]
                q_pos_emb = apply_rotary_pos_emb(
                    q_pos_emb,
                    rotary_pos_emb,
                    config=self.config,
                    cu_seqlens=cu_seqlens_q,
                    mscale=mscale,
                    cp_group=self.pg_collection.cp,
                )
                # k_pos_emb:[num_tokens, 1, qk_pos_emb_head_dim]
                k_pos_emb = apply_rotary_pos_emb(
                    k_pos_emb,
                    rotary_pos_emb,
                    config=self.config,
                    cu_seqlens=cu_seqlens_kv,
                    mscale=mscale,
                    cp_group=self.pg_collection.cp,
                )

                # query: [num_tokens, n, (kv_lora_rank + qk_pos_emb_head_dim)]
                q_absorbed = torch.cat([q_absorbed, q_pos_emb], dim=-1)
                # key: [num_tokens, 1, (kv_lora_rank + qk_pos_emb_head_dim)]
                kv_compressed = torch.cat([kv_compressed, k_pos_emb], dim=-1)

            assert q_absorbed.is_contiguous()
            assert kv_compressed.is_contiguous()

            return q_absorbed, kv_compressed

        if self.recompute_up_proj:
            quantization = self.config.fp8 or self.config.fp4
            assert not quantization, "FP8/FP4 is not supported for AbsorbedMLA"
            self.qkv_up_checkpoint = tensor_parallel.CheckpointWithoutOutput(fp8=quantization)
            q_absorbed, kv_compressed = self.qkv_up_checkpoint.checkpoint(
                qkv_up_proj_and_rope_apply, q_compressed, kv_compressed, k_pos_emb, rotary_pos_emb
            )
        else:
            assert not self.cache_mla_latents, "cache_mla_latents is not supported for AbsorbedMLA"
            q_absorbed, kv_compressed = qkv_up_proj_and_rope_apply(
                q_compressed, kv_compressed, k_pos_emb, rotary_pos_emb
            )

        return q_absorbed, kv_compressed, q_compressed

    def _checkpointed_attention_forward(
        self,
        q_absorbed,
        k_compressed,
        v_compressed,
        hidden_states,
        q_compressed,
        attention_mask,
        rotary_pos_emb=None,
        attn_mask_type=None,
        attention_bias=None,
        packed_seq_params=None,
    ):
        """Forward method with selective activation checkpointing."""

        def custom_forward(*inputs):
            q_absorbed = inputs[0]
            k_compressed = inputs[1]
            v_compressed = inputs[2]
            hidden_states = inputs[3]
            q_compressed = inputs[4]
            attention_mask = inputs[5]
            attn_mask_type = inputs[7]
            attention_bias = inputs[8]
            packed_seq_params = inputs[9]
            attn_mask_type = AttnMaskType(attn_mask_type.item())
            output_ = self.core_attention(
                q_absorbed,
                k_compressed,
                v_compressed,
                hidden_states,
                q_compressed,
                attention_mask,
                attn_mask_type=attn_mask_type,
                attention_bias=attention_bias,
                packed_seq_params=packed_seq_params,
            )
            return output_

        if attn_mask_type is None:
            attn_mask_type = self.attn_mask_type
        attn_mask_type = torch.tensor([attn_mask_type.value], dtype=torch.int)
        hidden_states = tensor_parallel.checkpoint(
            custom_forward,
            False,
            q_absorbed,
            k_compressed,
            v_compressed,
            hidden_states,
            q_compressed,
            attention_mask,
            rotary_pos_emb,
            attn_mask_type,
            attention_bias,
            packed_seq_params,
        )

        return hidden_states

    def forward(
        self,
        hidden_states,
        attention_mask,
        key_value_states=None,
        inference_context=None,
        rotary_pos_emb=None,
        rotary_pos_cos=None,
        rotary_pos_sin=None,
        rotary_pos_cos_sin=None,
        attention_bias=None,
        packed_seq_params=None,
        sequence_len_offset=None,
        *,
        inference_params=None,
    ):
        """Forward pass for multi-latent attention with matrix absorption"""
        assert rotary_pos_emb is None, "Rotary position embeddings should not be passed into MLA."
        assert attention_bias is None, "Attention bias should not be passed into MLA."
        assert (
            rotary_pos_cos is None and rotary_pos_sin is None
        ), "MLA does not support Flash Decoding"
        assert not rotary_pos_cos_sin, "Flash-infer rope has not been tested with MLA."
        assert not (
            self.training and self.cache_mla_latents
        ), "cache_mla_latents conflicts with training."
        assert (
            inference_context is None and inference_params is None
        ), "Inference is not supported for AbsorbedMLA"

        # =====================
        # Query, Key, and Value
        # =====================
        q_absorbed, kv_compressed, q_compressed = self.get_query_key_value_tensors(
            hidden_states, key_value_states, packed_seq_params, inference_context=inference_context
        )

        assert q_absorbed.is_contiguous()
        assert q_compressed.is_contiguous()
        assert kv_compressed.is_contiguous()

        # ==================================
        # Core attention computation
        # ==================================
        if self.checkpoint_core_attention and self.training:
            core_attn_out = self._checkpointed_attention_forward(
                q_absorbed,
                kv_compressed,
                None,
                hidden_states,
                q_compressed,
                attention_mask,
                packed_seq_params=packed_seq_params,
            )
        else:
            core_attn_out = self.core_attention(
                q_absorbed,
                kv_compressed,
                None,
                hidden_states,
                q_compressed,
                attention_mask,
                packed_seq_params=packed_seq_params,
                attn_mask_type=self.attn_mask_type,
            )

        # ==================================
        # Apply V up projection
        # ==================================
        assert self.linear_v_up_proj.weight.size(0) == (
            self.num_attention_heads_per_partition * self.config.v_head_dim
        )
        assert self.linear_v_up_proj.weight.size(1) == self.config.kv_lora_rank
        v_up_weight = self.linear_v_up_proj.weight.view(
            self.num_attention_heads_per_partition, self.config.v_head_dim, self.config.kv_lora_rank
        )
        core_attn_out = core_attn_out.view(
            *core_attn_out.shape[:-1],
            self.num_attention_heads_per_partition,
            self.config.kv_lora_rank,
        )
        core_attn_out = torch.einsum("...nc,ndc->...nd", core_attn_out, v_up_weight)
        core_attn_out = core_attn_out.contiguous()
        core_attn_out = core_attn_out.view(*core_attn_out.shape[:-2], -1)

        if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
            core_attn_out = core_attn_out.unsqueeze(1)

        assert core_attn_out.ndim == hidden_states.ndim
        assert core_attn_out.shape[0] == (
            hidden_states.shape[0] * self.config.tensor_model_parallel_size
        ), (
            f"{core_attn_out.shape[0]} != "
            f"{hidden_states.shape[0]} * "
            f"{self.config.tensor_model_parallel_size}"
        )
        assert core_attn_out.shape[1:-1] == hidden_states.shape[1:-1]
        assert core_attn_out.size(-1) == (
            self.config.v_head_dim * self.num_attention_heads_per_partition
        )

        if self.recompute_up_proj:
            assert self.qkv_up_checkpoint is not None
            self.qkv_up_checkpoint.discard_output_and_register_recompute(core_attn_out)
            self.qkv_up_checkpoint = None

        # =================
        # Output. [sq, b, h]
        # =================
        output, bias = self.linear_proj(core_attn_out)

        return output, bias

    def backward_dw(self) -> NoReturn:
        """Execute weight gradient computation."""
        self._backward_kv_proj()
        self._backward_q_proj()
        self._backward_output_proj()

    def _backward_kv_proj(self):
        """Computes weight gradients of KV projection layers."""
        self.linear_k_up_proj.backward_dw()
        self.linear_v_up_proj.backward_dw()
        self.linear_kv_down_proj.backward_dw()

    def _backward_q_proj(self):
        """Computes weight gradients of Q projection layers."""
        if self.config.q_lora_rank is None:
            self.linear_q_proj.backward_dw()
        else:
            self.linear_q_down_proj.backward_dw()
            self.linear_q_up_proj.backward_dw()

    def _backward_output_proj(self):
        """Computes weight gradients of output projection layer."""
        self.linear_proj.backward_dw()

    def set_for_recompute_input_layernorm(self):
        """Set the attention layer for recompute input_layernorm. Only needed for fp8/fp4."""
        from megatron.core.extensions.transformer_engine import set_save_original_input

        if self.config.q_lora_rank is not None:
            set_save_original_input(self.linear_q_down_proj)
        set_save_original_input(self.linear_kv_down_proj)

    def clip_qk(self):
        """
        QK Clipping is a technique to clip the query and key attention logits to prevent the
        attention logits from exploding. Per MuonClip usage, we update the weight by calling this
        function after Muon optimizer step.
        """
        raise NotImplementedError("clip_qk is not implemented for AbsorbedMLA")

    def _combine_kv_weights(self, k_weight, v_weight):
        """Combine separate K and V weights into MLA's interleaved format.

        MLA's linear_kv_up_proj weight layout (per head interleaved):
            [head0_K, head0_V, head1_K, head1_V, ...]

        AbsorbedMLA's separate weights layout:
            K: [head0_K, head1_K, ...]
            V: [head0_V, head1_V, ...]

        This method interleaves K and V per head to match MLA's format.

        Args:
            k_weight: [num_heads_per_partition * qk_head_dim, kv_lora_rank]
            v_weight: [num_heads_per_partition * v_head_dim, kv_lora_rank]

        Returns:
            combined: [num_heads_per_partition * (qk_head_dim + v_head_dim), kv_lora_rank]
        """
        n = self.num_attention_heads_per_partition
        qk_dim = self.config.qk_head_dim
        v_dim = self.config.v_head_dim
        lora_rank = self.config.kv_lora_rank

        # Reshape to per-head format
        k_per_head = k_weight.view(n, qk_dim, lora_rank)
        v_per_head = v_weight.view(n, v_dim, lora_rank)

        # Concatenate K and V for each head along dim=1
        # Result: [n, qk_dim + v_dim, lora_rank]
        combined_per_head = torch.cat([k_per_head, v_per_head], dim=1)

        # Reshape back to linear weight format
        combined_weight = combined_per_head.view(n * (qk_dim + v_dim), lora_rank)

        return combined_weight

    def _split_kv_weights(self, combined_weight):
        """Split MLA's interleaved KV weight into separate K and V weights.

        MLA's linear_kv_up_proj weight layout (per head interleaved):
            [head0_K, head0_V, head1_K, head1_V, ...]

        This method extracts K and V into separate tensors:
            K: [head0_K, head1_K, ...]
            V: [head0_V, head1_V, ...]

        Args:
            combined_weight: [num_heads_per_partition * (qk_head_dim + v_head_dim), kv_lora_rank]

        Returns:
            k_weight: [num_heads_per_partition * qk_head_dim, kv_lora_rank]
            v_weight: [num_heads_per_partition * v_head_dim, kv_lora_rank]
        """
        n = self.num_attention_heads_per_partition
        qk_dim = self.config.qk_head_dim
        v_dim = self.config.v_head_dim
        lora_rank = self.config.kv_lora_rank

        # Reshape to per-head format
        combined_per_head = combined_weight.view(n, qk_dim + v_dim, lora_rank)

        # Split K and V for each head (slicing creates non-contiguous views)
        k_per_head = combined_per_head[:, :qk_dim, :]  # [n, qk_dim, lora_rank]
        v_per_head = combined_per_head[:, qk_dim:, :]  # [n, v_dim, lora_rank]

        # Make contiguous and reshape back to linear weight format
        k_weight = k_per_head.contiguous().view(n * qk_dim, lora_rank)
        v_weight = v_per_head.contiguous().view(n * v_dim, lora_rank)

        return k_weight, v_weight

    def _load_from_state_dict(self, state_dict, prefix, *args, **kwargs):
        """Handle loading from checkpoints with combined KV up projection weights.

        This method splits the combined 'linear_kv_up_proj.weight' (which has per-head
        interleaved K and V) into separate 'linear_k_up_proj.weight' and 'linear_v_up_proj.weight'.
        """
        combined_key = f'{prefix}linear_kv_up_proj.weight'
        k_up_key = f'{prefix}linear_k_up_proj.weight'
        v_up_key = f'{prefix}linear_v_up_proj.weight'

        # Split combined KV weights into separate K and V
        if combined_key in state_dict:
            combined_weight = state_dict[combined_key]

            # Split with proper per-head de-interleaving
            k_weight, v_weight = self._split_kv_weights(combined_weight)

            state_dict[k_up_key] = k_weight
            state_dict[v_up_key] = v_weight

            del state_dict[combined_key]

        combined_extra_state_key = f'{prefix}linear_kv_up_proj._extra_state'
        k_up_extra_state_key = f'{prefix}linear_k_up_proj._extra_state'
        v_up_extra_state_key = f'{prefix}linear_v_up_proj._extra_state'

        if combined_extra_state_key in state_dict:
            combined_extra_state = state_dict[combined_extra_state_key]

            assert isinstance(combined_extra_state, torch.Tensor)
            # Now we can only handle the case where the extra state is empty.
            assert combined_extra_state.numel() == 0

            state_dict[k_up_extra_state_key] = combined_extra_state.clone()
            state_dict[v_up_extra_state_key] = combined_extra_state.clone()

            del state_dict[combined_extra_state_key]

        super()._load_from_state_dict(state_dict, prefix, *args, **kwargs)


================================================
FILE: megatron/core/transformer/experimental_attention_variant/dsa.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import copy
import math
from dataclasses import dataclass
from typing import Optional, Tuple, Union

import torch

from megatron.core import parallel_state
from megatron.core.models.common.embeddings import (
    RotaryEmbedding,
    YarnRotaryEmbedding,
    apply_rotary_pos_emb,
)
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.tensor_parallel.mappings import gather_from_sequence_parallel_region
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.spec_utils import ModuleSpec, build_module
from megatron.core.transformer.transformer_config import TransformerConfig

try:
    from fast_hadamard_transform import hadamard_transform
except ImportError:
    hadamard_transform = None


def rotate_activation(x: torch.Tensor) -> torch.Tensor:
    """Apply Hadamard rotation activation.
    Reference:
        https://github.com/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/inference/model.py#L424-L428

    Args:
        x: Input tensor (must be bfloat16).

    Returns:
        Rotated tensor.
    """
    assert (
        x.dtype == torch.bfloat16
    ), f"rotate_activation only support bf16 input, but got {x.dtype}"
    assert hadamard_transform is not None, "fast_hadamard_transform is not installed."
    hidden_size = x.size(-1)
    return hadamard_transform(x, scale=hidden_size**-0.5)


class DSAIndexerLossLoggingHelper:
    """Helper class for logging sparse attention indexer losses."""

    tracker = {}

    @staticmethod
    def save_loss_to_tracker(
        loss: torch.Tensor,
        layer_number: int,
        num_layers: int,
        reduce_group: torch.distributed.ProcessGroup = None,
        avg_group: torch.distributed.ProcessGroup = None,
    ):
        """Save the indexer loss for logging.

        Args:
            loss: The loss tensor.
            layer_number: Layer index of the loss, 1-indexed.
            num_layers: The number of total layers.
            reduce_group: The group for reducing the loss.
            avg_group: The group for averaging the loss.
        """
        # Skip indexer loss logging if layer_number is None.
        if layer_number is None:
            return

        tracker = DSAIndexerLossLoggingHelper.tracker
        if "values" not in tracker:
            tracker["values"] = torch.zeros(num_layers, device=torch.cuda.current_device())
        tracker["values"][layer_number - 1] += loss.detach()
        tracker["reduce_group"] = reduce_group
        tracker["avg_group"] = avg_group

    @staticmethod
    def clean_loss_in_tracker():
        """Clear the indexer losses."""
        tracker = DSAIndexerLossLoggingHelper.tracker
        if "values" in tracker:
            tracker["values"].zero_()
        tracker["reduce_group"] = None
        tracker["avg_group"] = None

    @staticmethod
    def reduce_loss_in_tracker():
        """Collect and reduce the indexer losses across ranks."""
        tracker = DSAIndexerLossLoggingHelper.tracker
        if "values" not in tracker:
            return
        values = tracker["values"]

        torch.distributed.all_reduce(
            values, group=parallel_state.get_pipeline_model_parallel_group()
        )
        # Reduce indexer losses across ranks.
        if tracker.get('reduce_group') is not None:
            torch.distributed.all_reduce(values, group=tracker.get('reduce_group'))
        if tracker.get('avg_group') is not None:
            torch.distributed.all_reduce(
                values, group=tracker['avg_group'], op=torch.distributed.ReduceOp.AVG
            )
        torch.distributed.all_reduce(
            values,
            group=parallel_state.get_data_parallel_group(with_context_parallel=False),
            op=torch.distributed.ReduceOp.AVG,
        )

    @staticmethod
    def track_indexer_metrics(
        loss_scale: float,
        iteration: int,
        writer,
        wandb_writer=None,
        total_loss_dict=None,
        per_layer_logging: bool = False,
    ):
        """Track the sparse attention indexer metrics for logging.

        Args:
            loss_scale: Scale factor for the loss.
            iteration: Current training iteration.
            writer: TensorBoard writer.
            wandb_writer: Weights & Biases writer.
            total_loss_dict: Dictionary to accumulate total losses.
            per_layer_logging: Whether to log per-layer losses.
        """
        DSAIndexerLossLoggingHelper.reduce_loss_in_tracker()
        tracker = DSAIndexerLossLoggingHelper.tracker
        if "values" not in tracker:
            return

        indexer_loss_values = tracker["values"] * loss_scale
        num_layers = indexer_loss_values.shape[0]

        # Average across all layers (assuming all layers have sparse attention)
        avg_indexer_loss = indexer_loss_values.sum() / num_layers

        # Log average loss
        if total_loss_dict is not None:
            if "indexer loss" in total_loss_dict:
                total_loss_dict["indexer loss"] += avg_indexer_loss
            else:
                total_loss_dict["indexer loss"] = avg_indexer_loss

        if writer is not None:
            writer.add_scalar("indexer loss", avg_indexer_loss, iteration)

        if wandb_writer is not None:
            wandb_writer.log({"indexer loss": avg_indexer_loss}, iteration)

        DSAIndexerLossLoggingHelper.clean_loss_in_tracker()


def compute_dsa_indexer_loss(
    index_scores: torch.Tensor,
    topk_indices: torch.Tensor,
    query: torch.Tensor,
    key: torch.Tensor,
    softmax_scale: float,
    loss_coeff: float,
    sparse_loss: bool,
    pg_collection: ProcessGroupCollection,
) -> torch.Tensor:
    """
    Compute KL divergence loss between index_scores and true attention_scores.

    This loss trains the indexer to predict which tokens are important by matching the distribution
    of true attention scores.

    Reference: Section 2.1 of
        https://github.com/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/DeepSeek_V3_2.pdf

    Args:
        index_scores: Scores predicted by indexer [batch, seqlen_q, seqlen_k].
        topk_indices: Top-k indices [batch, seqlen_q, index_topk].
        query: Query tensor [seqlen_q, batch, heads, dim].
        key: Key tensor [seqlen_k, batch, heads, dim].
        softmax_scale: Scale coefficient after q @ k^T.
        loss_coeff: Coefficient for the indexer KL divergence loss.
        sparse_loss: bool, whether to use sparse indexer loss. If True, only the topk
            indices will be used to compute the loss.
        pg_collection: Process group collection, must have TP process group.

    Returns:
        index_loss: KL divergence loss (scalar).
    """
    sq, b, np, hn = query.size()
    sk = key.size(0)

    # [sq, b, np, hn] -> [b, np, sq, hn] -> [b * np, sq, hn]
    query = query.permute(1, 2, 0, 3).reshape(b * np, sq, hn)
    # [sk, b, np, hn] -> [b, np, hn, sk] -> [b * np, hn, sk]
    key = key.permute(1, 2, 3, 0).reshape(b * np, hn, sk)
    # Compute attention scores [b * np, sq, sk]
    attention_scores = torch.bmm(query.float(), key.float()) * softmax_scale
    # Reshape to [b, np, sq, sk]
    attention_scores = attention_scores.reshape(b, np, sq, sk)

    # causal_mask [sq, sk]
    causal_mask = torch.triu(
        torch.full((sq, sk), float('-inf'), dtype=torch.float32, device=attention_scores.device),
        diagonal=1,
    )
    # index_mask [b, sq, sk]
    index_mask = torch.full(
        (b, sq, sk), float("-inf"), dtype=torch.float32, device=causal_mask.device
    ).scatter_(-1, topk_indices, 0)

    # [b, np, sq, skv] + [1, 1, sq, skv] -> [b, np, sq, skv]
    attention_scores += causal_mask.view(1, 1, sq, sk)
    if sparse_loss:
        # [b, np, sq, sk] + [b, 1, sq, sk] -> [b, np, sq, sk]
        attention_scores += index_mask.view(b, 1, sq, sk)
        # [b, sq, sk] + [b, sq, sk] -> [b, sq, sk]
        index_scores += index_mask

    # [b, np, sq, sk] -> [b, np, sq, sk]
    attention_scores = torch.nn.functional.softmax(attention_scores, dim=-1, dtype=torch.float32)
    # [b, sq, sk] -> [b, sq, sk]
    index_scores = torch.nn.functional.softmax(index_scores, dim=-1, dtype=torch.float32)

    # Sum attention scores across heads.
    # [batch, heads, seqlen_q, seqlen_k] -> [batch, seqlen_q, seqlen_k]
    attention_scores = attention_scores.sum(dim=1)
    if pg_collection.tp.size() > 1:
        # attention scores are scattered to TP ranks in head dimension.
        torch.distributed.all_reduce(attention_scores.contiguous(), group=pg_collection.tp)
    # L1 normalize target on the last dimension. Doesn't use abs() because attention_scores are
    # obtained from softmax so they are already non-negative.
    attention_scores = attention_scores / attention_scores.sum(dim=-1, keepdim=True)

    # Compute KL divergence: KL(target || index) = target(x) * log(target(x) / index(x))
    # kl_per_element [b, sq, sk]
    kl_per_element = attention_scores * (
        torch.log(attention_scores + 1e-10) - torch.log(index_scores + 1e-10)
    )

    # [b, sq, sk] -> [b, sq] -> [1]
    # Each token has same weight in the loss.
    kl_div = kl_per_element.sum(dim=-1).mean()

    # Scale by coefficient.
    indexer_loss = kl_div * loss_coeff

    return indexer_loss


def _compute_index_scores(q: torch.Tensor, weights: torch.Tensor, k: torch.Tensor) -> torch.Tensor:
    """
    Perform index score using BF16 precision.

    Reference:
        https://github.com/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/inference/kernel.py#L254-L274
    This is a BF16 implementation of the `fp8_index` logic:
        1. Compute attention scores: q @ k^T;
        2. Apply ReLU activation;
        3. Weight by attention weights;
        4. Sum across attention heads.

    Args:
        q: BF16 [seqlen_q, batch, index_n_heads, index_head_dim], the query tensor.
        weights: BF16 [seqlen_q, batch, index_n_heads], the attention weights.
        k: BF16 [seqlen_k, batch, index_head_dim], the key tensor.

    Returns:
        index_scores: FP32 [batch, seqlen_q, seqlen_k], the index scores.
    """
    # Compute attention scores: q @ k^T
    # [seqlen_q, batch, index_n_heads, index_head_dim] @ [seqlen_k, batch, index_head_dim]^T
    #   -> [seqlen_q, batch, index_n_heads, seqlen_k]
    index_scores = torch.einsum('sbhd,tbd->sbht', q.float(), k.float())

    # Apply ReLU activation.
    index_scores = torch.relu(index_scores)

    # Weight each head by attention weights.
    # [seqlen_q, batch, index_n_heads, seqlen_k] * [seqlen_q, batch, index_n_heads, 1]
    #   -> [seqlen_q, batch, index_n_heads, seqlen_k]
    index_scores = index_scores * weights.unsqueeze(-1)

    # Sum across attention heads.
    # [seqlen_q, batch, index_n_heads, seqlen_k] -> [seqlen_q, batch, seqlen_k]
    index_scores = index_scores.sum(dim=2)

    # Transpose to [batch, seqlen_q, seqlen_k].
    index_scores = index_scores.transpose(0, 1)

    return index_scores


def fused_qk_topk_naive(
    q: torch.Tensor,
    k: torch.Tensor,
    weights: torch.Tensor,
    index_topk: int,
    mask: Optional[torch.Tensor] = None,
):
    """Naive implementation of QK Topk."""
    seqlen = q.size(0)
    # =========================================
    # Compute index scores
    # =========================================
    # [batch, seqlen, seqlen]
    index_scores = _compute_index_scores(q, weights, k)
    if mask is not None:
        assert mask.dtype == index_scores.dtype, "Mask dtype must match index scores dtype"
        index_scores = index_scores + mask

    # =========================================
    # Select top-k indices
    # =========================================
    topk_k = min(index_topk, seqlen)
    # [batch, seqlen, index_topk]
    topk_indices = index_scores.topk(topk_k, dim=-1)[1]

    return index_scores, topk_indices


def fwd_fused_indexer_loss_naive(
    q, weights, k, query, key, topk, softmax_scale, loss_coeff, mask, sparse_loss, pg_collection
):
    """Naive implementation of forward pass for indexer loss."""
    index_scores, topk_indices = fused_qk_topk_naive(q, k, weights, topk, mask)

    indexer_loss = compute_dsa_indexer_loss(
        index_scores,
        topk_indices,
        query,
        key,
        softmax_scale,
        loss_coeff,
        sparse_loss,
        pg_collection,
    )

    return topk_indices, indexer_loss


def bwd_fused_indexer_loss_naive(
    q,
    weights,
    k,
    query,
    key,
    topk_indices,
    softmax_scale,
    loss_coeff,
    sparse_loss,
    grad_loss,
    pg_collection,
):
    """Naive implementation of backward pass for indexer loss."""
    index_scores = _compute_index_scores(q, weights, k)  # [B, Sq, Sk]

    sq, b, np, hn = query.size()
    sk = key.size(0)

    # [sq, b, np, hn] -> [b, np, sq, hn] -> [b * np, sq, hn]
    query_reshaped = query.permute(1, 2, 0, 3).reshape(b * np, sq, hn)
    # [sk, b, np, hn] -> [b, np, hn, sk] -> [b * np, hn, sk]
    key_reshaped = key.permute(1, 2, 3, 0).reshape(b * np, hn, sk)
    # Compute attention scores [b * np, sq, sk]
    attention_scores = torch.bmm(query_reshaped.float(), key_reshaped.float()) * softmax_scale
    # Free reshaped tensors - no longer needed after bmm
    del query_reshaped, key_reshaped

    # Reshape to [b, np, sq, sk]
    attention_scores = attention_scores.reshape(b, np, sq, sk)

    # causal_mask [sq, sk]
    causal_mask = torch.triu(
        torch.full((sq, sk), float('-inf'), dtype=torch.float32, device=attention_scores.device),
        diagonal=1,
    )
    # index_mask [b, sq, sk]
    index_mask = torch.full(
        (b, sq, sk), float("-inf"), dtype=torch.float32, device=causal_mask.device
    ).scatter_(-1, topk_indices, 0)

    # Apply causal mask to both attention and index scores
    # [b, np, sq, skv] + [1, 1, sq, skv] -> [b, np, sq, skv]
    attention_scores = attention_scores + causal_mask.view(1, 1, sq, sk)
    # [b, sq, sk] + [1, sq, sk] -> [b, sq, sk]
    index_scores = index_scores + causal_mask.unsqueeze(0)
    # Free causal_mask - no longer needed
    del causal_mask

    if sparse_loss:
        # [b, np, sq, sk] + [b, 1, sq, sk] -> [b, np, sq, sk]
        attention_scores = attention_scores + index_mask.view(b, 1, sq, sk)
        # [b, sq, sk] + [b, sq, sk] -> [b, sq, sk]
        index_scores = index_scores + index_mask

    # Compute softmax for both
    attention_scores_softmax = torch.nn.functional.softmax(
        attention_scores, dim=-1, dtype=torch.float32
    )
    # Free attention_scores immediately
    del attention_scores

    index_scores_softmax = torch.nn.functional.softmax(index_scores, dim=-1, dtype=torch.float32)
    # Free index_scores - no longer needed after softmax
    del index_scores

    # Sum attention scores across heads: [b, np, sq, sk] -> [b, sq, sk]
    attention_scores_sum = attention_scores_softmax.sum(dim=1)
    # Free attention_scores_softmax
    del attention_scores_softmax

    if pg_collection.tp.size() > 1:
        # attention scores are scattered to TP ranks in head dimension.
        torch.distributed.all_reduce(attention_scores_sum.contiguous(), group=pg_collection.tp)

    # L1 normalize
    attention_scores_normalized = attention_scores_sum / attention_scores_sum.sum(
        dim=-1, keepdim=True
    )
    # Free attention_scores_sum - no longer needed after normalization
    del attention_scores_sum

    # Backward through loss = kl_div * loss_coeff
    # where kl_div = kl_per_element.sum(dim=-1).mean()
    grad_kl_div = grad_loss * loss_coeff  # scalar

    # Backward through mean: distribute gradient equally
    grad_kl_per_row = grad_kl_div / (b * sq)  # scalar value for each row

    # Backward through sum(dim=-1): broadcast back to [b, sq, sk]
    # Each element in a row contributes to the sum, so gradient is same for all
    grad_kl_per_element = grad_kl_per_row.view(1, 1, 1).expand(b, sq, sk)

    # Backward through kl_per_element = target * (log(target) - log(index))
    # ∂kl/∂index_softmax = -target / index_softmax
    grad_index_scores_softmax = (
        -attention_scores_normalized / (index_scores_softmax + 1e-10) * grad_kl_per_element
    )
    # Free attention_scores_normalized - no longer needed
    del attention_scores_normalized

    # Backward through softmax: ∂L/∂x = softmax * (∂L/∂softmax - sum(∂L/∂softmax * softmax))
    sum_grad = (grad_index_scores_softmax * index_scores_softmax).sum(dim=-1, keepdim=True)
    grad_index_scores_logits = index_scores_softmax * (grad_index_scores_softmax - sum_grad)
    # Free intermediate tensors
    del index_scores_softmax, grad_index_scores_softmax, sum_grad

    # Zero out gradients for masked positions
    # Create a mask for valid (non-masked) positions
    # Causal mask: position (i, j) is valid if j <= i
    causal_valid_mask = torch.tril(
        torch.ones((sq, sk), device=q.device, dtype=torch.bool)
    )  # [sq, sk]
    if sparse_loss:
        # Also apply index mask - only topk positions are valid
        index_valid_mask = index_mask == 0  # [b, sq, sk]
        del index_mask  # Free index_mask immediately after use
        valid_mask = causal_valid_mask.unsqueeze(0) & index_valid_mask  # [b, sq, sk]
        del index_valid_mask
    else:
        del index_mask  # Free index_mask even if not used for sparse_loss
        valid_mask = causal_valid_mask.unsqueeze(0).expand(b, sq, sk)  # [b, sq, sk]
    del causal_valid_mask

    grad_index_scores_logits = grad_index_scores_logits * valid_mask.float()
    del valid_mask

    # Transpose from [b, sq, sk] to [sq, b, sk]
    grad_index_scores = grad_index_scores_logits.transpose(0, 1)  # [sq, b, sk]
    del grad_index_scores_logits

    # Backward through sum over heads: expand gradient
    grad_weighted_scores = grad_index_scores.unsqueeze(2)  # [sq, b, 1, sk]
    del grad_index_scores

    # Compute forward values needed for backward
    scores = torch.einsum('sbhd,tbd->sbht', q.float(), k.float())  # [sq, b, h, sk]
    # Compute relu_mask before relu (saves memory vs keeping both scores and relu output)
    relu_mask = scores > 0
    scores_after_relu = torch.relu(scores)
    del scores

    # Backward through multiplication by weights: index_scores_per_head * weights
    # ∂L/∂weights = grad * relu_scores (sum over sk)
    grad_weights = (grad_weighted_scores * scores_after_relu).sum(dim=-1)  # [sq, b, h]

    # ∂L/∂relu_scores = grad * weights
    grad_scores_after_relu = grad_weighted_scores * weights.unsqueeze(-1)  # [sq, b, h, sk]
    del grad_weighted_scores, scores_after_relu

    # Backward through ReLU
    grad_scores = grad_scores_after_relu * relu_mask.float()  # [sq, b, h, sk]
    del grad_scores_after_relu, relu_mask

    # Backward through einsum 'sbhd,tbd->sbht'
    # ∂L/∂q = einsum('sbht,tbd->sbhd', grad_scores, k)
    grad_q = torch.einsum('sbht,tbd->sbhd', grad_scores, k.float())  # [sq, b, h, d]
    # ∂L/∂k = einsum('sbht,sbhd->tbd', grad_scores, q)
    grad_k = torch.einsum('sbht,sbhd->tbd', grad_scores, q.float())  # [sk, b, d]
    del grad_scores

    return grad_q.to(q.dtype), grad_weights.to(weights.dtype), grad_k.to(k.dtype)


class FusedDSAIndexerLoss(torch.autograd.Function):
    """Fused implementation of DSA Indexer Loss."""

    @staticmethod
    def forward(
        ctx,
        q,
        weights,
        k,
        query,
        key,
        softmax_scale,
        topk,
        loss_coeff,
        mask,
        sparse_loss,
        pg_collection,
    ):
        """
        Fused forward: index_scores never materialized in full.
        """
        topk_indices, loss = fwd_fused_indexer_loss_naive(
            q,
            weights,
            k,
            query,
            key,
            topk,
            softmax_scale,
            loss_coeff,
            mask,
            sparse_loss,
            pg_collection,
        )

        # Save for backward (recomputation strategy)
        ctx.save_for_backward(q, weights, k, query, key, topk_indices)
        ctx.softmax_scale = softmax_scale
        ctx.loss_coeff = loss_coeff
        ctx.sparse_loss = sparse_loss
        ctx.pg_collection = pg_collection

        return topk_indices, loss

    @staticmethod
    def backward(ctx, grad_topk_indices, grad_loss):
        """
        Backward: Recompute what we need.
        """
        q, weights, k, query, key, topk_indices = ctx.saved_tensors

        grad_q, grad_weights, grad_k = bwd_fused_indexer_loss_naive(
            q,
            weights,
            k,
            query,
            key,
            topk_indices,
            ctx.softmax_scale,
            ctx.loss_coeff,
            ctx.sparse_loss,
            grad_loss,
            ctx.pg_collection,
        )

        # query and key are detached in forward, so return None for their gradients
        return grad_q, grad_weights, grad_k, None, None, None, None, None, None, None, None


class DSAIndexerLossAutoScaler(torch.autograd.Function):
    """An AutoScaler that triggers the backward pass and scales the grad for indexer loss.

    This custom autograd function attaches a KL divergence loss to the activation
    to train the indexer to predict attention scores without affecting the forward pass.
    """

    main_loss_backward_scale: torch.Tensor = None

    @staticmethod
    def forward(ctx, output: torch.Tensor, indexer_loss: torch.Tensor):
        """Preserve the indexer_loss by storing it in the context to avoid garbage collection.

        Args:
            output: The output tensor (activation).
            indexer_loss: The indexer KL divergence loss tensor.

        Returns:
            torch.Tensor: The output tensor unchanged.
        """
        ctx.save_for_backward(indexer_loss)
        return output

    @staticmethod
    def backward(ctx, grad_output: torch.Tensor):
        """Compute and scale the gradient for indexer loss.

        Args:
            grad_output: The gradient of the output.

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: The gradient of the output, scaled indexer loss
                gradient.
        """
        (indexer_loss,) = ctx.saved_tensors
        if DSAIndexerLossAutoScaler.main_loss_backward_scale is None:
            DSAIndexerLossAutoScaler.main_loss_backward_scale = torch.tensor(
                1.0, device=indexer_loss.device
            )
        indexer_loss_backward_scale = DSAIndexerLossAutoScaler.main_loss_backward_scale
        scaled_indexer_loss_grad = torch.ones_like(indexer_loss) * indexer_loss_backward_scale
        return grad_output, scaled_indexer_loss_grad

    @staticmethod
    def set_loss_scale(scale: torch.Tensor):
        """Set the scale of the indexer loss.

        Args:
            scale: The scale value to set.
        """
        if DSAIndexerLossAutoScaler.main_loss_backward_scale is None:
            DSAIndexerLossAutoScaler.main_loss_backward_scale = scale
        else:
            DSAIndexerLossAutoScaler.main_loss_backward_scale.copy_(scale)


@dataclass
class DSAIndexerSubmodules:
    """
    Configuration class for specifying the submodules of an DSA Indexer.

    Args:
        linear_wq_b: Linear projection for query bottleneck expansion.
        linear_wk: Linear projection for key.
        k_norm: Layer normalization for key.
        linear_weights_proj: Linear projection for attention weights.
    """

    linear_wq_b: Union[ModuleSpec, type] = None
    linear_wk: Union[ModuleSpec, type] = None
    k_norm: Union[ModuleSpec, type] = None
    linear_weights_proj: Union[ModuleSpec, type] = None


@dataclass
class DSAttentionSubmodules:
    """
    Configuration class for specifying the submodules of DSAttention.

    Args:
        indexer: DSA Indexer module for computing sparse attention indices.
    """

    indexer: Union[ModuleSpec, type] = None


class DSAIndexer(MegatronModule):
    """
    DSA Lightning Indexer for DeepSeek Sparse Attention.

    Computes index scores to identify the top-k most relevant key-value pairs for each query in
    sparse attention.

    Reference:
        https://github.com/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/inference/model.py#L431-L480
    """

    def __init__(
        self,
        config: TransformerConfig,
        submodules: DSAIndexerSubmodules,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ) -> None:
        """Initialize the indexer.

        Args:
            config (TransformerConfig): The configuration for the transformer model.
            submodules (DSAIndexerSubmodules): Indexer submodules specification.
            pg_collection (ProcessGroupCollection, optional): Process groups for the indexer.
        """
        super().__init__(config=config)
        self.hidden_size = self.config.hidden_size
        self.qk_pos_emb_head_dim = self.config.qk_pos_emb_head_dim
        self.q_lora_rank = (
            self.config.q_lora_rank
            if self.config.q_lora_rank is not None
            else self.config.hidden_size
        )

        self.index_n_heads = self.config.dsa_indexer_n_heads
        self.index_head_dim = self.config.dsa_indexer_head_dim
        self.index_topk = self.config.dsa_indexer_topk

        self.softmax_scale: float = self.index_head_dim**-0.5

        if pg_collection is None:
            pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['tp', 'cp'])
        self.pg_collection = pg_collection

        # Initialize Position Embedding.
        if self.config.rope_type == 'rope':
            self.rotary_pos_emb = RotaryEmbedding(
                self.qk_pos_emb_head_dim,
                rotary_percent=self.config.rotary_percent,
                rotary_base=self.config.rotary_base,
                cp_group=self.pg_collection.cp,
            )
        elif self.config.rope_type == 'yarn':
            self.rotary_pos_emb = YarnRotaryEmbedding(
                self.qk_pos_emb_head_dim,
                rotary_base=self.config.rotary_base,
                scaling_factor=self.config.rotary_scaling_factor,
                original_max_position_embeddings=self.config.original_max_position_embeddings,
                beta_fast=self.config.beta_fast,
                beta_slow=self.config.beta_slow,
                mscale=self.config.mscale,
                mscale_all_dim=self.config.mscale_all_dim,
                cp_group=self.pg_collection.cp,
            )
        else:
            raise ValueError(
                f'Unsupported RoPE type: {self.config.rope_type}, supported types are "rope" and '
                f'"yarn"'
            )

        self.linear_wq_b = build_module(
            submodules.linear_wq_b,
            self.q_lora_rank,
            self.index_n_heads * self.index_head_dim,
            config=self.config,
            init_method=self.config.init_method,
            bias=False,
            skip_bias_add=False,
            skip_weight_param_allocation=False,
            parallel_mode="duplicated",
        )

        self.linear_wk = build_module(
            submodules.linear_wk,
            self.hidden_size,
            self.index_head_dim,
            config=self.config,
            init_method=self.config.init_method,
            bias=False,
            skip_bias_add=False,
            skip_weight_param_allocation=False,
            parallel_mode="duplicated",
        )

        k_norm_config = copy.copy(self.config)
        k_norm_config.normalization = "LayerNorm"
        self.k_norm = build_module(
            submodules.k_norm,
            config=k_norm_config,
            hidden_size=self.index_head_dim,
            eps=self.config.layernorm_epsilon,
        )

        self.linear_weights_proj = build_module(
            submodules.linear_weights_proj,
            self.hidden_size,
            self.index_n_heads,
            config=self.config,
            init_method=self.config.init_method,
            bias=False,
            skip_bias_add=False,
            skip_weight_param_allocation=False,
            parallel_mode="duplicated",
        )

    def _apply_rope(self, x: torch.Tensor, rotary_pos_emb: torch.Tensor, mscale: float):
        """Apply RoPE to the input tensor."""
        # x_nope [seqlen, batch, *, index_head_dim - qk_pos_emb_head_dim]
        # x_pe   [seqlen, batch, *, qk_pos_emb_head_dim]
        x_nope, x_pe = torch.split(
            x, [self.index_head_dim - self.qk_pos_emb_head_dim, self.qk_pos_emb_head_dim], dim=-1
        )
        x_pe = apply_rotary_pos_emb(
            x_pe,
            rotary_pos_emb,
            config=self.config,
            cu_seqlens=None,
            mscale=mscale,
            cp_group=self.pg_collection.cp,
        )
        # [seqlen, batch, *, index_head_dim]
        x = torch.cat([x_nope, x_pe], dim=-1)
        return x

    def forward_before_topk(
        self, x: torch.Tensor, qr: torch.Tensor, packed_seq_params: Optional[PackedSeqParams] = None
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """All computations before topk."""
        # =========================================
        # Prepare RoPE params
        # =========================================
        rotary_seq_len = self.rotary_pos_emb.get_rotary_seq_len(
            None, None, x, self.config, packed_seq_params
        )
        if self.config.rope_type == "rope":
            rotary_pos_emb = self.rotary_pos_emb(rotary_seq_len, packed_seq=False)
            mscale = 1.0
        else:
            rotary_pos_emb, mscale = self.rotary_pos_emb(rotary_seq_len, packed_seq=False)

        # =========================================
        # Gather inputs if sp is enabled
        # =========================================
        if self.config.sequence_parallel and self.pg_collection.tp.size() > 1:
            x = gather_from_sequence_parallel_region(x, group=self.pg_collection.tp)
            qr = gather_from_sequence_parallel_region(qr, group=self.pg_collection.tp)

        # =========================================
        # Get sequence length and batch size
        # =========================================
        seqlen, bsz, _ = x.size()

        # =========================================
        # q linear and apply rope to q
        # =========================================
        # [seqlen, batch, q_lora_rank] -> [seqlen, batch, index_n_heads * index_head_dim]
        q, _ = self.linear_wq_b(qr)
        # [seqlen, batch, index_n_heads * index_head_dim]
        #   -> [seqlen, batch, index_n_heads, index_head_dim]
        q = q.reshape(seqlen, bsz, self.index_n_heads, self.index_head_dim)
        q = self._apply_rope(q, rotary_pos_emb, mscale)

        # =========================================
        # k linear and apply rope to k
        # =========================================
        # [seqlen, batch, hidden_size] -> [seqlen, batch, index_head_dim]
        k, _ = self.linear_wk(x)
        k = self.k_norm(k)
        # [seqlen, batch, index_head_dim] -> [seqlen, batch, 1, index_head_dim]
        k = k.reshape(seqlen, bsz, 1, self.index_head_dim)
        k = self._apply_rope(k, rotary_pos_emb, mscale)
        # [seqlen, batch, 1, index_head_dim] -> [seqlen, batch, index_head_dim]
        k = k.reshape(seqlen, bsz, self.index_head_dim)

        # =========================================
        # Rotate activation
        # =========================================
        q = rotate_activation(q)
        k = rotate_activation(k)

        # =========================================
        # Prepare weights for index scores
        # =========================================
        # [seqlen, batch, hidden_size] -> [seqlen, batch, index_n_heads]
        weights, _ = self.linear_weights_proj(x)
        weights = weights * (self.index_n_heads**-0.5) * self.softmax_scale

        return q, k, weights

    def forward_with_scores(
        self,
        x: torch.Tensor,
        qr: torch.Tensor,
        mask: Optional[torch.Tensor] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Forward pass for DSA Indexer that returns both index scores and top-k indices.

        This is used when KL loss is enabled to compare indexer scores with true attention scores.

        Args:
            x: hidden states [seqlen, batch, hidden_size].
            qr: Low-rank query tensor [seqlen, batch, q_lora_rank].
            mask: Attention mask [batch, seqlen, seqlen].
            packed_seq_params: Packed sequence parameters for variable length sequences.

        Returns:
            index_scores: Index scores [batch, seqlen, seqlen].
            topk_indices: Top-k indices [batch, seqlen, index_topk].
        """
        assert packed_seq_params is None, "Packed sequence is not supported for DSAttention"

        # [seqlen, batch, index_n_heads * index_head_dim]
        # [seqlen, batch, index_head_dim]
        # [seqlen, batch, index_n_heads]
        q, k, weights = self.forward_before_topk(x, qr, packed_seq_params)

        # [batch, seqlen, seqlen], [batch, seqlen, index_topk]
        index_scores, topk_indices = fused_qk_topk_naive(q, k, weights, self.index_topk, mask)

        return index_scores, topk_indices

    def forward(
        self,
        x: torch.Tensor,
        qr: torch.Tensor,
        mask: Optional[torch.Tensor] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
    ):
        """
        Forward pass for DSA Indexer.

        Args:
            x: hidden states [seqlen, batch, hidden_size].
            qr: Low-rank query tensor [seqlen, batch, q_lora_rank].
            mask: Attention mask [batch, seqlen, seqlen].
            packed_seq_params: Packed sequence parameters for variable length sequences.

        Returns:
            topk_indices: Top-k indices for sparse attention [batch, seqlen, index_topk].
        """
        _, topk_indices = self.forward_with_scores(x, qr, mask, packed_seq_params)
        return topk_indices


def unfused_dsa_fn(query, key, value, topk_indices, softmax_scale):
    """
    Unfused sparse attention implementation.
    """
    sq, b, np, hn = query.size()
    skv = key.size(0)
    hnv = value.size(3)

    # ===================================
    # Raw attention scores [b, np, sq, skv]
    # ===================================
    # [sq, b, np, hn] -> [b, np, sq, hn] -> [b * np, sq, hn]
    query = query.permute(1, 2, 0, 3).reshape(b * np, sq, hn)
    # [skv, b, np, hn] -> [b, np, hn, skv] -> [b * np, hn, skv]
    key = key.permute(1, 2, 3, 0).reshape(b * np, hn, skv)
    # Compute attention scores [b * np, sq, skv]
    attention_scores = torch.bmm(query.float(), key.float()) * softmax_scale
    # Reshape to [b, np, sq, skv]
    attention_scores = attention_scores.reshape(b, np, sq, skv)

    # ===================================
    # Apply sparse mask from indexer
    # ===================================
    # index_mask [b, sq, skv]
    index_mask = torch.full((b, sq, skv), float("-inf"), device=attention_scores.device)
    index_mask.scatter_(-1, topk_indices, 0)
    # causal_mask [sq, skv]
    causal_mask = torch.triu(
        torch.full((sq, skv), float('-inf'), dtype=torch.float32, device=index_mask.device),
        diagonal=1,
    )
    # [b, sq, skv] + [1, sq, skv] -> [b, sq, skv]
    index_mask += causal_mask.view(1, sq, skv)
    # [b, np, sq, skv] + [b, 1, sq, skv] -> [b, np, sq, skv]
    attention_scores += index_mask.unsqueeze(1)
    attention_scores = torch.nn.functional.softmax(attention_scores, dim=-1, dtype=torch.float32)

    # ===================================
    # Output
    # ===================================
    # [skv, b, np, hnv] -> [b, np, skv, hnv] -> [b * np, skv, hnv]
    value = value.permute(1, 2, 0, 3).reshape(b * np, skv, hnv)
    # Reshape attention_scores: [b, np, sq, skv] -> [b * np, sq, skv]
    attention_scores = attention_scores.reshape(b * np, sq, skv)
    # Compute output: [b * np, sq, hnv]
    output = torch.bmm(attention_scores.to(value.dtype), value)
    # Reshape output: [b * np, sq, hnv] -> [b, np, sq, hnv] -> [sq, b, np, hnv]
    output = output.reshape(b, np, sq, hnv).permute(2, 0, 1, 3).contiguous()
    # Flatten: [sq, b, np, hnv] -> [sq, b, np * hnv]
    output = output.reshape(sq, b, np * hnv)
    return output


class DSAttention(MegatronModule):
    """
    This module implements sparse attention mechanism using an DSA Indexer to compute top-k
    attention indices for reducing computational complexity.

    Reference:
        https://github.com/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/inference/model.py#L491-L597
    """

    def __init__(
        self,
        config: TransformerConfig,
        submodules: DSAttentionSubmodules,
        layer_number: int,
        attn_mask_type: AttnMaskType,
        attention_type: str,
        attention_dropout: Optional[float] = None,
        softmax_scale: Optional[float] = None,
        k_channels: Optional[int] = None,
        v_channels: Optional[int] = None,
        cp_comm_type: str = "p2p",
        pg_collection: ProcessGroupCollection = None,
    ):
        super().__init__(config=config)

        self.layer_number = layer_number

        self.indexer = build_module(
            submodules.indexer, config=self.config, pg_collection=pg_collection
        )

        if softmax_scale is None:
            softmax_scale = 1.0 / math.sqrt(
                k_channels if k_channels is not None else config.kv_channels
            )
        self.softmax_scale = softmax_scale

    def forward(
        self,
        query: torch.Tensor,
        key: torch.Tensor,
        value: torch.Tensor,
        attention_mask: torch.Tensor,
        x: torch.Tensor,
        qr: torch.Tensor,
        attn_mask_type: AttnMaskType = None,
        attention_bias: torch.Tensor = None,
        packed_seq_params: PackedSeqParams = None,
    ):
        """
        Forward pass for Sparse Attention.

        Args:
            query: Query tensor [sq, b, np, hn].
            key: Key tensor [skv, b, np, hn].
            value: Value tensor [skv, b, np, hnv].
            x: Original hidden states [sq, b, hidden_size].
            qr: Low-rank query representation [sq, b, q_lora_rank].
            attention_mask: Attention mask tensor [b, 1, sq, sk].
            attn_mask_type: Type of attention mask.
            attention_bias: Optional attention bias.
            packed_seq_params: Packed sequence parameters.

        Returns:
            output: Output tensor [sq, b, hidden_size]
        """
        sq, b, np, hn = query.size()
        skv = key.size(0)
        hnv = value.size(3)

        # Detach x and qr to prevent gradients of indexer from flowing back to the main model.
        x = x.detach()
        qr = qr.detach()

        # Get a FP32 mask with -inf for masked positions.
        if attn_mask_type is not None:
            assert attn_mask_type == AttnMaskType.causal, 'Only causal mask is supported for now'
            # Generate upper triangular mask with -inf above diagonal, 0 elsewhere
            # torch.triu with diagonal=1 creates upper triangular matrix (excluding main diagonal)
            # float_mask [sq, skv]
            float_mask = torch.triu(
                torch.full((sq, skv), float('-inf'), dtype=torch.float32, device=x.device),
                diagonal=1,
            )
        else:
            assert attention_mask.shape == (b, 1, sq, skv), 'attention_mask shape mismatch'
            # [b, 1, sq, skv] -> [b, sq, skv]
            mask = attention_mask.squeeze()
            # float_mask [b, sq, skv]
            float_mask = torch.zeros_like(mask, dtype=torch.float32).masked_fill(
                mask, float('-inf')
            )

        if self.training and torch.is_grad_enabled():
            # ===================================
            # Prepare inputs for indexer loss
            # ===================================
            q, k, weights = self.indexer.forward_before_topk(x, qr, packed_seq_params)
            indexer_loss_coeff = getattr(self.config, 'dsa_indexer_loss_coeff', 0.0)

            # ===================================
            # Attach indexer topk and loss
            # ===================================
            # Compute KL divergence loss between indexer scores and true attention scores
            topk_indices, indexer_loss = FusedDSAIndexerLoss.apply(
                q,
                weights,
                k,
                query.detach(),
                key.detach(),
                self.softmax_scale,
                self.indexer.index_topk,
                indexer_loss_coeff,
                float_mask,
                getattr(self.config, "dsa_indexer_use_sparse_loss", False),
                self.indexer.pg_collection,
            )
            # Save indexer loss for logging
            if indexer_loss_coeff > 0:
                DSAIndexerLossLoggingHelper.save_loss_to_tracker(
                    loss=indexer_loss,
                    layer_number=self.layer_number,
                    num_layers=self.config.num_layers,
                )

            # ===================================
            # Run sparse attention kernel
            # ===================================
            output = unfused_dsa_fn(query, key, value, topk_indices, self.softmax_scale)

            # Attach loss to output
            output = DSAIndexerLossAutoScaler.apply(output, indexer_loss)

        else:
            # ===================================
            # Get index scores and top-k indices
            # ===================================
            _, topk_indices = self.indexer.forward_with_scores(
                x, qr, mask=float_mask, packed_seq_params=packed_seq_params
            )

            # ===================================
            # Run sparse attention kernel
            # ===================================
            output = unfused_dsa_fn(query, key, value, topk_indices, self.softmax_scale)

        return output


================================================
FILE: megatron/core/transformer/fsdp_dtensor_checkpoint.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import re

import torch
import torch.distributed as dist
from torch.distributed.checkpoint import default_planner

logger = logging.getLogger(__name__)

try:
    from torch.distributed import DeviceMesh
    from torch.distributed._tensor import DTensor
    from torch.distributed.checkpoint.metadata import TensorStorageMetadata
    from torch.distributed.tensor.placement_types import Replicate, Shard

    from megatron.core.distributed.fsdp.src.megatron_fsdp.param_and_grad_buffer import (
        make_fsdp_dtensor,
    )
    from megatron.core.distributed.fsdp.src.megatron_fsdp.uneven_dtensor import (
        gather_uneven_dtensor_to_full_tensor,
    )
    from megatron.core.distributed.fsdp.src.megatron_fsdp.utils import (
        get_mcore_tensor_parallel_partition_dim,
        is_mcore_tensor_model_parallel,
    )

    HAVE_MEGATRON_FSDP = True
except ImportError:
    HAVE_MEGATRON_FSDP = False

from megatron.core import parallel_state
from megatron.core.tensor_parallel.layers import copy_tensor_model_parallel_attributes
from megatron.core.transformer.transformer_layer import TransformerLayer
from megatron.core.utils import get_attr_wrapped_model


def get_ep_layer_offset(num_experts: int | None = None) -> int:
    """
    Get the expert layer offset for the current model.

    Args:
        num_experts: Total number of experts in the model. If None, returns 0.

    Returns:
        The expert layer offset for the current EP rank.
    """
    ep_size = parallel_state.get_expert_model_parallel_world_size()
    ep_rank = parallel_state.get_expert_model_parallel_rank()
    num_local_experts = num_experts // ep_size if num_experts else 0
    local_expert_offset = ep_rank * num_local_experts

    return local_expert_offset


def get_expert_index_from_key(key):
    """Extract expert index from various expert key formats.

    Supported formats:
    - GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc2.weight0'
    - SequentialMLP: 'mlp.experts.local_experts.0.linear_fc1.weight',
        'mlp.experts.local_experts.0.linear_fc2.weight'

    Returns:
        int: Expert index if found, None otherwise.
    """
    # GroupedMLP: index is at the end after 'weight'
    if 'mlp.experts.linear_fc1.weight' in key or 'mlp.experts.linear_fc2.weight' in key:
        m = re.search(r'^.*\.mlp\.experts\.linear_fc\d\.weight(\d+)', key)
        assert m, f"Failed to parse expert index from key: {key}"
        return int(m.group(1))
    # SequentialMLP: index is between 'local_experts.' and next '.'
    elif 'mlp.experts.local_experts' in key:
        m = re.search(r'^.*\.mlp\.experts\.local_experts\.(\d+)', key)
        assert m, f"Failed to parse expert index from key: {key}"
        return int(m.group(1))
    return None


def handle_experts_in_state_dict(state_dict, num_experts: int | None = None):
    """
    Rewrite expert keys in state dict.

    Args:
        state_dict: The state dictionary to process.
        num_experts: Total number of experts in the model. If None, no expert processing occurs.

    Returns:
        The processed state dictionary with rewritten expert keys.
    """
    local_expert_start = get_ep_layer_offset(num_experts)
    local_expert_end = num_experts if num_experts else 0

    def should_keep_expert_key(expert_index):
        """Determine if this rank should keep this expert key based on expert index"""
        if expert_index is None:
            # If we can't determine expert index, keep the key (non-expert weights)
            return True

        # Check if this expert belongs to this rank
        return local_expert_start <= expert_index < local_expert_end

    def replace_expert_index_in_key(key, expert_index, state_dict):
        """Replace expert index in key with new index corresponding to the current rank"""
        new_expert_index = expert_index + local_expert_start
        # GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc2.weight0'
        if 'mlp.experts.linear_fc1.weight' in key or 'mlp.experts.linear_fc2.weight' in key:
            # Handle SwiGLU weight{idx}_w and weight{idx}_v format
            if key.endswith('_w') or key.endswith('_v'):
                suffix = key[-2:]  # '_w' or '_v'
                new_key = key.replace(
                    f'weight{expert_index}{suffix}', f'weight{new_expert_index}{suffix}'
                )
            # Handle regular weight{idx} format
            else:
                new_key = key.replace(f'weight{expert_index}', f'weight{new_expert_index}')
        # SequentialMLP: index is between 'local_experts.' and next '.'
        elif 'mlp.experts.local_experts' in key:
            new_key = key.replace(
                f'local_experts.{expert_index}.', f'local_experts.{new_expert_index}.'
            )
        else:
            raise ValueError(f"Unexpected expert key format: {key}")

        state_dict[new_key] = state_dict[key]
        del state_dict[key]

    # Process model state dict
    state_dict = state_dict.copy()
    for key in list(state_dict.keys()):
        expert_index = get_expert_index_from_key(key)
        if not should_keep_expert_key(expert_index):
            replace_expert_index_in_key(key, expert_index, state_dict)

    return state_dict


def expert_param_local_key(key: str, num_experts: int | None = None) -> str:
    """Get the module parameter corresponding to the key.

    Args:
        key: The parameter key to process.
        num_experts: Total number of experts in the model. If None, no expert processing occurs.

    Returns:
        The local parameter key with adjusted expert indices.
    """
    local_expert_offset = get_ep_layer_offset(num_experts)
    expert_index = get_expert_index_from_key(key)
    if expert_index is not None:
        new_expert_index = expert_index - local_expert_offset
        # GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc2.weight0'
        if 'mlp.experts.linear_fc1.weight' in key or 'mlp.experts.linear_fc2.weight' in key:
            new_key = key.replace(f'weight{expert_index}', f'weight{new_expert_index}')
        # SequentialMLP: index is between 'local_experts.' and next '.'
        elif 'mlp.experts.local_experts' in key:
            new_key = key.replace(
                f'local_experts.{expert_index}.', f'local_experts.{new_expert_index}.'
            )
        else:
            raise ValueError(f"Unexpected expert key format: {key}")
        key = new_key

    return key


def handle_swiglu_in_state_dict(model, model_state_dict, optimizer_state_dict):
    """
    Handle SWiGLU in model and optimizer state dicts.
    """
    assert HAVE_MEGATRON_FSDP, "This function requires Megatron-FSDP to be installed."

    # Extract num_experts from model config for expert parameter processing
    model_config = get_attr_wrapped_model(model, "config", allow_none=True)
    num_experts = (
        getattr(model_config, 'num_moe_experts', None) if model_config is not None else None
    )

    def intersection(s1, s2):
        # Only works for step=1
        start = max(s1.start, s2.start)
        stop = min(s1.stop, s2.stop)
        if start >= stop:
            return slice(0, 0)  # Empty slice if no intersection
        return slice(start, stop)

    def offset_slice(s, offset):
        return slice(s.start + offset, s.stop + offset)

    def is_swiglu_key(key):
        """
        Check if this key should be handled as SwiGLU linear_fc1 weight or bias.
        """
        # Non-expert MLP: 'mlp.linear_fc1.weight', 'mlp.linear_fc1.bias'
        # GroupedMLP: 'mlp.experts.linear_fc1.weight0', 'mlp.experts.linear_fc1.bias0'
        # SequentialMLP: 'mlp.experts.local_experts.0.linear_fc1.weight',
        #   'mlp.experts.local_experts.0.linear_fc1.bias'
        return any(
            re.search(pat, key)
            for pat in [
                r"(.*)\.mlp\.linear_fc1\.weight$",
                r"(.*)\.mlp\.linear_fc1\.bias$",
                r"(.*)\.mlp\.experts\.linear_fc1\.weight(\d+)$",
                r"(.*)\.mlp\.experts\.linear_fc1\.bias(\d+)$",
                r"(.*)\.mlp\.experts\.local_experts\.(\d+)\.linear_fc1\.weight$",
                r"(.*)\.mlp\.experts\.local_experts\.(\d+)\.linear_fc1\.bias$",
                r"(.*)\.mlp\.shared_experts\.linear_fc1\.weight$",
                r"(.*)\.mlp\.shared_experts\.linear_fc1\.bias$",
            ]
        )

    def split_swiglu_linear_fc1(data, dist_param, swiglu_shard_axis, is_expert_param):
        """
        Split the SWiGLU linear_fc1 parameter into two parts: weight_w and weight_v.
        """
        assert data.shape[swiglu_shard_axis] % 2 == 0, (
            f"SWiGLU weights must have an even size along the shard axis {swiglu_shard_axis}, "
            f"got {data.shape[swiglu_shard_axis]}"
        )

        fsdp_slice = dist_param.megatron_fsdp_slice
        megatron_fsdp_dist_index = dist_param.megatron_fsdp_dist_index

        tp_mesh = megatron_fsdp_dist_index.get_submesh(
            [megatron_fsdp_dist_index.tp_dim], is_expert_parallel=is_expert_param
        )
        data_size = data.numel() // tp_mesh.mesh.numel()
        w_slice = slice(0, data_size // 2)
        v_slice = slice(data_size // 2, data_size)

        view_shape = list(data.shape)
        view_shape[swiglu_shard_axis] = -1
        local_tensor = data.to_local()
        weight_w = local_tensor.view(-1)[
            offset_slice(intersection(fsdp_slice, w_slice), -fsdp_slice.start)
        ]
        weight_v = local_tensor.view(-1)[
            offset_slice(intersection(fsdp_slice, v_slice), -fsdp_slice.start)
        ]
        weight_w = weight_w.reshape(view_shape)
        weight_v = weight_v.reshape(view_shape)

        # Fake parameters w and v are used to provide the correct parameter
        # shape and Tensor-Parallelism information.
        per_tp_rank_shape = list(data.shape)
        if is_mcore_tensor_model_parallel(dist_param):
            tp_dim = get_mcore_tensor_parallel_partition_dim(dist_param)
            assert tp_dim is not None, "Tensor model parallel dimension not found"
            per_tp_rank_shape[tp_dim] //= tp_mesh.mesh.numel()
        linear_fc1_meta = torch.empty(*per_tp_rank_shape, device="meta")
        w_meta, v_meta = torch.chunk(linear_fc1_meta, 2, dim=swiglu_shard_axis)
        copy_tensor_model_parallel_attributes(w_meta, dist_param)
        copy_tensor_model_parallel_attributes(v_meta, dist_param)

        weight_w = make_fsdp_dtensor(
            weight_w.data,
            w_meta,
            dist_index=megatron_fsdp_dist_index,
            is_expert_param=is_expert_param,
            run_check=True,
            update_uneven_dtensor_chunk_meta=True,
        )
        weight_v = make_fsdp_dtensor(
            weight_v.data,
            v_meta,
            dist_index=megatron_fsdp_dist_index,
            is_expert_param=is_expert_param,
            run_check=True,
            update_uneven_dtensor_chunk_meta=True,
        )
        return weight_w, weight_v

    model_state_dict = model_state_dict.copy()
    for key in list(model_state_dict.keys()):
        if is_swiglu_key(key):
            dist_param = model.get_parameter(f"module.{key}")
            weight_w, weight_v = split_swiglu_linear_fc1(
                model_state_dict[key],
                dist_param,
                swiglu_shard_axis=0,
                is_expert_param='mlp.experts' in key,
            )

            # Update the model state dict with the new keys
            model_state_dict[f"{key}_w"] = weight_w
            model_state_dict[f"{key}_v"] = weight_v
            del model_state_dict[key]

    if optimizer_state_dict is not None:
        optimizer_state_dict = optimizer_state_dict.copy()
        if len(optimizer_state_dict["state"]) != 0:
            opt_state_dict = optimizer_state_dict["state"]
            new_opt_state_dict = {}
            for key in list(opt_state_dict.keys()):
                # Only process SWIGLU keys
                if not is_swiglu_key(key):
                    new_opt_state_dict[key] = opt_state_dict[key]
                    continue
                new_opt_state_dict[f"{key}_w"] = opt_state_dict[key].copy()
                new_opt_state_dict[f"{key}_v"] = opt_state_dict[key].copy()
                for subkey in ["exp_avg", "exp_avg_sq"]:
                    dist_param = model.get_parameter(
                        expert_param_local_key(key[len("module.") :], num_experts)
                    )
                    weight_w, weight_v = split_swiglu_linear_fc1(
                        opt_state_dict[key][subkey],
                        dist_param,
                        swiglu_shard_axis=0,
                        is_expert_param="mlp.experts" in key,
                    )
                    # Update the optimizer state dict with the new keys
                    new_opt_state_dict[f"{key}_w"][subkey] = weight_w
                    new_opt_state_dict[f"{key}_v"][subkey] = weight_v
            optimizer_state_dict["state"] = new_opt_state_dict

    return model_state_dict, optimizer_state_dict


def handle_fp8_extra_state_case(model_state_dict):
    """
    Handle the case where FP8 extra state is present in the model state dict.
    """
    assert HAVE_MEGATRON_FSDP, "This function requires Megatron-FSDP to be installed."

    for key in list(model_state_dict.keys()):
        if key.endswith('._extra_state'):
            del model_state_dict[key]


def flatten_state_dict(obj, parent_key="", sep="."):
    """
    Recursively flattens a nested state dict into a single-level dict with keys
    """
    items = {}
    if isinstance(obj, dict):
        for k, v in obj.items():
            new_key = f"{parent_key}{sep}{k}" if parent_key else str(k)
            items.update(flatten_state_dict(v, new_key, sep=sep))
    elif isinstance(obj, list):
        for i, v in enumerate(obj):
            new_key = f"{parent_key}{sep}{i}" if parent_key else str(i)
            items.update(flatten_state_dict(v, new_key, sep=sep))
    else:
        items[parent_key] = obj
    return items


def print_diff_in_state_dicts(state_dict_metadata, load_state_dict, limit=100):
    """
    Print the differences between two state dicts: metadata state dict and load state dict.
    This function compares the keys and shapes of the tensors in both dicts.
    """
    state_dict_metadata = flatten_state_dict(state_dict_metadata)
    load_state_dict = flatten_state_dict(load_state_dict)
    meta_keys = set(state_dict_metadata.keys())
    load_keys = set(load_state_dict.keys())

    only_in_meta = list(meta_keys - load_keys)
    only_in_load = list(load_keys - meta_keys)
    in_both = list(meta_keys & load_keys)

    logger.info(f"Keys only in checkpoint metadata_state_dict(first {limit}):")
    for k in sorted(only_in_meta[:limit]):
        logger.info(f"  {k}")

    logger.info(f"\nKeys only in load_state_dict(first {limit}):")
    for k in sorted(only_in_load[:limit]):
        logger.info(f"  {k}")

    logger.info(f"\nKeys in both but with different shapes(first {limit}):")
    for k in sorted(in_both[:limit]):
        v_meta = state_dict_metadata[k]
        v_load = load_state_dict[k]
        # If tensors, compare shape; else, compare type/values
        meta_shape = v_meta.size if hasattr(v_meta, "size") else type(v_meta)
        load_shape = v_load.shape if hasattr(v_load, "shape") else type(v_load)
        if meta_shape != load_shape:
            logger.info(f"  {k}: meta shape={meta_shape}, load shape={load_shape}")


def validate_loaded_state_dict(state_dict, checkpoint_path):
    """
    Validate the loaded state dict against the expected structure and types.
    """
    assert HAVE_MEGATRON_FSDP, "This function requires Megatron-FSDP to be installed."

    # Initialize reader
    reader = torch.distributed.checkpoint.FileSystemReader(checkpoint_path)
    metadata = reader.read_metadata()
    flat_state_dict = flatten_state_dict(state_dict)

    for key, value in flat_state_dict.items():
        tensor_metadata = metadata.state_dict_metadata[key]

        if not isinstance(tensor_metadata, TensorStorageMetadata):
            continue
        if not isinstance(value, DTensor):
            load_item_dict = {key: torch.empty_like(value)}
        else:
            load_item_dict = {
                key: torch.distributed.tensor.empty(
                    tensor_metadata.size,
                    dtype=tensor_metadata.properties.dtype,
                    device_mesh=DeviceMesh.from_group(
                        group=dist.group.WORLD,
                        device_type="cuda",
                        mesh=torch.arange(dist.get_world_size()),
                        mesh_dim_names=("world",),
                    ),
                    placements=[Shard(0)],
                )
            }
        torch.distributed.checkpoint.load(
            load_item_dict, storage_reader=reader, planner=default_planner.DefaultLoadPlanner()
        )
        if isinstance(value, DTensor):
            full_value = gather_uneven_dtensor_to_full_tensor(value)
            loaded_tensor = load_item_dict[key].redistribute(
                placements=[Replicate()] * len(value.placements)
            )
            assert torch.allclose(
                loaded_tensor._local_tensor, full_value._local_tensor, atol=1e-8, rtol=1e-5
            ), f"key: {key}; {loaded_tensor} {full_value}"
        else:
            assert torch.allclose(
                value, load_item_dict[key]
            ), f"key: {key}; {value} {load_item_dict[key]}"


def get_global_unique_param_name(model_chunks, param):
    """
    Get the global unique parameter name for a given model and parameter.

    Args:
        model_chunks: List of model chunks to search for the parameter.
        param: The parameter to find the name for.

    Returns:
        The global unique parameter name.
    """
    param_name = None
    for model in model_chunks:
        for name, p in model.named_parameters():
            if p is param:
                param_name = name
                break
    if param_name is None:
        raise ValueError("Parameter not found in model chunks")

    # Get PP unique parameter name
    if re.search(r"layers\.(\d+)", param_name) and "mtp" not in param_name:
        tf_layer_number = -1
        for module in model.modules():
            if not isinstance(module, TransformerLayer):
                continue
            for p in module.parameters():
                if p is param:
                    tf_layer_number = module.layer_number
                    break
        if tf_layer_number != -1:
            param_name = re.sub(r"layers\.(\d+)", f"layers.{tf_layer_number - 1}", param_name)

    # Get EP unique parameter name
    num_experts = model_chunks[0].config.num_moe_experts if model_chunks else None
    param_name = next(iter(handle_experts_in_state_dict({param_name: None}, num_experts).keys()))

    return param_name


================================================
FILE: megatron/core/transformer/heterogeneous/heterogeneous_config.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import json
from dataclasses import asdict, dataclass, field, fields
from pathlib import Path

from megatron.core.transformer import TransformerConfig


@dataclass
class AttentionConfig:
    """Configuration parameters for the self-attention part of a single transformer
    block in a heterogeneous transformer."""

    no_op: bool = False
    """Whether this is a no-op operation."""

    replace_with_linear: bool = False
    """Whether to replace the self-attention mechanism with a single linear layer."""

    num_query_groups: int | None = None
    """Number of query groups for grouped query attention."""

    @classmethod
    def build_config_from_dict(
        cls, block_config_dict: dict, num_attention_heads: int
    ) -> 'AttentionConfig':
        """
        Builds an AttentionConfig object from a dictionary and the number of attention heads.

        Args:
            block_config_dict (dict): The dictionary containing the configuration for the attention.
            num_attention_heads (int): The number of attention heads.

        Returns:
            AttentionConfig: The AttentionConfig object.
        """
        attention_config_dict = block_config_dict["attention"]
        if "num_query_groups" not in attention_config_dict:
            # compatibility with HF config of nvidia/Llama-3_1-Nemotron-51B-Instruct
            n_heads_in_group = attention_config_dict.pop("n_heads_in_group")
            if n_heads_in_group is not None:
                if num_attention_heads % n_heads_in_group != 0:
                    raise ValueError(
                        f"num_attention_heads ({num_attention_heads}) must be a multiple of "
                        f"n_heads_in_group ({n_heads_in_group})."
                    )
                num_query_groups = num_attention_heads // n_heads_in_group
            else:
                num_query_groups = None
            attention_config_dict["num_query_groups"] = num_query_groups

        # keep only fields from cls
        field_names = {f.name for f in fields(cls)}
        attn_config_dict = {k: v for k, v in attention_config_dict.items() if k in field_names}
        return cls(**attn_config_dict)


@dataclass
class MLPConfig:
    """Configuration parameters for the MLP part of a single transformer
    block in a heterogeneous transformer."""

    no_op: bool = False
    """Whether this is a no-op operation."""

    replace_with_linear: bool = False
    """Whether to replace the MLP with a single linear layer."""

    ffn_hidden_size: float | None = None
    """MLP intermediate size"""

    @classmethod
    def build_config_from_dict(cls, block_config_dict: dict, hidden_size: int) -> 'MLPConfig':
        """
        Builds an MLPConfig object from a dictionary and a hidden size.

        Args:
            block_config_dict (dict): The dictionary containing the configuration for the MLP.
            hidden_size (int): The hidden size of the MLP.

        Returns:
            MLPConfig: The constructed MLPConfig object.
        """
        mlp_config_dict = block_config_dict.get("ffn") or block_config_dict.get("mlp")
        if "ffn_hidden_size" not in mlp_config_dict:
            # compatibility with HF config of nvidia/Llama-3_1-Nemotron-51B-Instruct
            ffn_mult = mlp_config_dict.pop("ffn_mult")
            if ffn_mult is not None:
                ffn_hidden_size = cls.ffn_mult_to_intermediate_size(ffn_mult, hidden_size)
            else:
                ffn_hidden_size = None
            mlp_config_dict["ffn_hidden_size"] = ffn_hidden_size

        # keep only fields from cls
        field_names = {f.name for f in fields(cls)}
        mlp_config_dict = {k: v for k, v in mlp_config_dict.items() if k in field_names}
        return cls(**mlp_config_dict)

    @staticmethod
    def ffn_mult_to_intermediate_size(ffn_mult: float, hidden_size: int) -> int:
        """
        Calculates the intermediate size of the MLP based on the given
        `ffn_mult` and `hidden_size`.

        Args:
            ffn_mult (float): The multiplier for the feed-forward network.
            hidden_size (int): The size of the hidden layer.

        Returns:
            int: The calculated intermediate size.
        """
        intermediate_size = int(2 * ffn_mult * hidden_size / 3)
        return MLPConfig.find_multiple(intermediate_size, 256)

    @staticmethod
    def find_multiple(n: int, k: int) -> int:
        """
        Calculates the smallest multiple of `k` greater than or equal to `n`.

        Args:
            n (int): The number to find the multiple of.
            k (int): The number to find the multiple of.

        Returns:
            int: The smallest multiple of `k` greater than or equal to `n`.
        """
        if n % k == 0:
            return n
        return n + k - (n % k)


@dataclass
class TransformerBlockConfig:
    """Configuration parameters for a single transformer block in a heterogeneous transformer."""

    attention: AttentionConfig
    """Configuration parameters for the self-attention part of the transformer block in a 
    heterogeneous transformer."""

    mlp: MLPConfig
    """Configuration parameters for the mlp part of the transformer block in a 
    heterogeneous transformer."""


@dataclass
class HeterogeneousTransformerConfig(TransformerConfig):
    """Configuration object for megatron-core heterogeneous transformers.

    Heterogeneous models refer to transformer architectures where individual layers can differ
    in configuration. Specifically:
        - Attention or MLP layers can be replaced with either a linear layer or a no-op
        - MLP intermediate dimensions can vary between layers
    We use the format of the HuggingFace config files in llama nemotron models to define
    the architecture.
    For example,
    https://huggingface.co/nvidia/Llama-3_3-Nemotron-Super-49B-v1/resolve/main/config.json

    Most notably, the "heterogeneous_layers_config_path" maps to a json file containing a
    "block_configs" key, which is a list of attention and mlp configurations for each layer.
    For example, the "block_config" for a 2 layer model is:
    "block_configs": [
        {
            "attention": {
                "n_heads_in_group": 8,
                "no_op": false,
                "replace_with_linear": false,
            },
            "ffn": {
                "ffn_mult": 2.625,
                "no_op": false,
                "replace_with_linear": false,
            }
        },
        {
            "attention": {
                "n_heads_in_group": null,
                "no_op": true,
                "replace_with_linear": false,
            },
            "ffn": {
                "ffn_mult": 2.625,
                "no_op": false,
                "replace_with_linear": false,
            }
        }
    ]
    """

    heterogeneous_layers_config_path: str = ""
    """Path to the json file containing the heterogeneous block specs."""

    heterogeneous_layers_config_encoded_json: str = ""
    """The contents of the json file containing the heterogeneous block specs. It will be read from 
    heterogeneous_layers_config_path at first, then saved forever inside the model checkpoint."""

    per_block_parameters: list[TransformerBlockConfig] = field(init=False)
    """Configuration parameters for each of the transformer blocks in a 
    heterogeneous transformer."""

    def __post_init__(self):
        super().__post_init__()

        self.heterogeneous_block_specs = True

        if self.heterogeneous_layers_config_encoded_json in ("", None):
            self.heterogeneous_layers_config_encoded_json = Path(
                self.heterogeneous_layers_config_path
            ).read_text()

        hf_config_dict = json.loads(self.heterogeneous_layers_config_encoded_json)
        assert "block_configs" in hf_config_dict
        block_list = hf_config_dict["block_configs"]

        block_configs = [
            TransformerBlockConfig(
                attention=AttentionConfig.build_config_from_dict(
                    block_config_dict=block, num_attention_heads=self.num_attention_heads
                ),
                mlp=MLPConfig.build_config_from_dict(
                    block_config_dict=block, hidden_size=self.hidden_size
                ),
            )
            for block in block_list
        ]

        self.per_block_parameters = block_configs

    def get_config_for_layer(self, layer_number: int) -> TransformerConfig:
        """
        Get the config for the given layer number.
        Based on the layer number, the corresponding block config is returned,
        overriding the main config's value.
        """
        layer_idx = layer_number - 1  # layer number starts from 1
        if layer_idx < 0 or layer_idx >= len(self.per_block_parameters):
            raise ValueError(
                f"Invalid layer number: {layer_number}. Should be in "
                f"range [1, {len(self.per_block_parameters)}]."
            )
        block_config = self.per_block_parameters[layer_idx]

        keys_to_update = {}

        # attention config updates
        if block_config.attention.num_query_groups is not None:
            assert (
                not block_config.attention.replace_with_linear and not block_config.attention.no_op
            )
            keys_to_update['num_query_groups'] = block_config.attention.num_query_groups

        # mlp config updates
        if block_config.mlp.ffn_hidden_size is not None:
            assert not block_config.mlp.replace_with_linear and not block_config.mlp.no_op
            keys_to_update['ffn_hidden_size'] = block_config.mlp.ffn_hidden_size

        transformer_config_dict = asdict(self)

        # remove keys that are not in TransformerConfig
        transformer_config_field_names = {f.name for f in fields(TransformerConfig)}
        transformer_config_dict = {
            k: v for k, v in transformer_config_dict.items() if k in transformer_config_field_names
        }

        transformer_config_dict.update(keys_to_update)

        return TransformerConfig(**transformer_config_dict)


================================================
FILE: megatron/core/transformer/heterogeneous/linear_replacements.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import torch.nn.functional as F
from torch import Tensor

from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.parallel_state import (
    get_tensor_model_parallel_rank,
    get_tensor_model_parallel_world_size,
)
from megatron.core.tensor_parallel.layers import ColumnParallelLinear
from megatron.core.tensor_parallel.mappings import (
    gather_from_tensor_model_parallel_region,
    reduce_scatter_to_sequence_parallel_region,
)
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.utils import divide

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import TELayerNormColumnParallelLinear
else:
    TELayerNormColumnParallelLinear = None


def _gather_from_tensor_parallel_region(x: Tensor, config: TransformerConfig) -> Tensor:
    if get_tensor_model_parallel_world_size() > 1:
        if config.sequence_parallel:
            # pad hidden dimension (last dimension) with zeros such that the valid data is placed in
            # indices [tp_rank * hidden/tp_size, (tp_rank+1) * hidden/tp_size),
            # and zeros fill the other parts.
            output_size = config.hidden_size
            output_size_per_partition = divide(output_size, get_tensor_model_parallel_world_size())

            pad_before = get_tensor_model_parallel_rank() * output_size_per_partition
            pad_after = output_size - pad_before - output_size_per_partition

            pad_shape = [0] * (x.ndim - 1) * 2 + [pad_before, pad_after]
            x = F.pad(x, pad_shape, "constant", 0)

            x = reduce_scatter_to_sequence_parallel_region(x)
        else:
            x = gather_from_tensor_model_parallel_region(x)

    return x


if HAVE_TE:

    class TELayerNormColumnParallelLinearGathered(TELayerNormColumnParallelLinear):
        """
        A linear replacement for TE Attention/MLP blocks.
        Supports gathering TP outputs when sequence parallel is enabled.
        """

        def __init__(self, config: TransformerConfig, tp_comm_buffer_name: str, *args, **kwargs):
            super().__init__(
                input_size=config.hidden_size,
                output_size=config.hidden_size,
                config=config,
                init_method=config.init_method,
                gather_output=False,
                bias=config.add_bias_linear,
                skip_bias_add=False,
                is_expert=False,
                tp_comm_buffer_name=tp_comm_buffer_name,
            )

        def forward(self, x, **kwargs):
            """Forward of TELayerNormColumnParallelLinearGathered"""
            out, bias = super().forward(x)
            assert bias is None, "bias should be None since we set skip_bias_add=False"

            out = _gather_from_tensor_parallel_region(out, self.config)

            return out, bias


class ColumnParallelLinearGathered(ColumnParallelLinear):
    """
    A linear replacement for local implementation of Attention/MLP blocks.
    Supports gathering TP outputs when sequence parallel is enabled.
    """

    def __init__(self, config: TransformerConfig, *args, **kwargs):
        super().__init__(
            input_size=config.hidden_size,
            output_size=config.hidden_size,
            config=config,
            init_method=config.init_method,
            gather_output=False,
            bias=config.add_bias_linear,
            skip_bias_add=False,
            is_expert=False,
        )

    def forward(
        self,
        input_: Tensor,
        weight: Tensor | None = None,
        runtime_gather_output: bool | None = None,
        **kwargs,
    ):
        """Forward of ColumnParallelLinearGathered"""
        out, bias = super().forward(input_, weight, runtime_gather_output)
        assert bias is None, "bias should be None since we set skip_bias_add=False"

        if runtime_gather_output or self.gather_output:
            raise ValueError("gathering TP outputs is not supported for linear replacement")

        out = _gather_from_tensor_parallel_region(out, self.config)

        return out, bias


================================================
FILE: megatron/core/transformer/identity_op.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
from typing import TypeVar

import torch

T = TypeVar('T')


class IdentityOp(torch.nn.Module):
    """
    This is a placeholder for IdentityOp(x) -> x
    """

    def __init__(self, *args: object, **kwargs: object):
        super().__init__()

    def forward(self, x: T, *args: object, **kwargs: object) -> T:
        """Forward pass.

        Returns x unchanged.
        """
        return x


class IdentityFuncOp(IdentityOp):
    """
    This is a placeholder for IdentityFuncOp(...)(x) -> IdentityOp(x) -> x.
    Such a func is handy for ops like `bias_dropout_fusion` which themselves
    return a function at runtime based on passed arguments
    """

    def __init__(self, *args: object, **kwargs: object):
        super().__init__()

    def forward(self, *args: object, **kwargs: object):
        """Forward pass.

        Returns a function which returns its first argument unchanged, and discards all others.
        """
        return super().forward


================================================
FILE: megatron/core/transformer/mlp.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from __future__ import annotations

import gc
import logging
import warnings
from collections.abc import Callable
from dataclasses import dataclass
from typing import Optional, Protocol, cast

import numpy as np
import torch
import torch.nn.functional as F

from megatron.core.dist_checkpointing import ShardedTensor
from megatron.core.dist_checkpointing.mapping import (
    ReplicaId,
    ShardedStateDict,
    ShardedTensorFactory,
)
from megatron.core.fusions.fused_bias_geglu import (
    bias_geglu_impl,
    quick_gelu,
    weighted_bias_quick_geglu_impl,
)
from megatron.core.fusions.fused_bias_gelu import bias_gelu_impl
from megatron.core.fusions.fused_bias_swiglu import bias_swiglu_impl, weighted_bias_swiglu_impl
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.typed_torch import apply_module, not_none
from megatron.core.utils import (
    get_tensor_model_parallel_group_if_none,
    nvtx_range_pop,
    nvtx_range_push,
)

try:
    import transformer_engine  # pylint: disable=unused-import

    HAVE_TE = True
except ImportError:
    HAVE_TE = False


logger = logging.getLogger(__name__)


class LinearFc1Interface(Protocol):
    """Interface for linear_fc1 module in MLP."""

    def forward(self, hidden_states: torch.Tensor, /) -> tuple[torch.Tensor, torch.Tensor | None]:
        """Forward method for linear_fc1 module."""
        ...

    def backward_dw(self) -> None:
        """Backward method for linear_fc1 module."""
        ...


class LinearFc1Builder(Protocol):
    """Protocol describing how to build a linear_fc1 layer in MLP."""

    def __call__(
        self,
        input_size: int,
        output_size: int,
        /,
        *,
        config: TransformerConfig,
        init_method: Callable[[torch.Tensor], None],
        gather_output: bool,
        bias: bool,
        skip_bias_add: bool,
        is_expert: bool,
        tp_comm_buffer_name: str | None,
        tp_group: torch.distributed.ProcessGroup | None,
        stride: int = 1,
    ) -> LinearFc1Interface:
        """Builds a linear_fc1 layer for MLP."""
        ...


class TEActivationFunctionInterface(Protocol):
    """Interface for activation_function module in MLP."""

    def forward(self, input_: torch.Tensor, /) -> torch.Tensor:
        """Forward method for activation_function module."""
        ...


class TEActivationFunctionBuilder(Protocol):
    """Protocol for activation_function module in MLP."""

    def __call__(self, *, config: TransformerConfig) -> TEActivationFunctionInterface:
        """Builds an activation function module for MLP."""
        ...


class LinearFc2Interface(Protocol):
    """Interface for linear_fc2 module in MLP."""

    def forward(self, hidden_states: torch.Tensor, /) -> tuple[torch.Tensor, torch.Tensor | None]:
        """Forward method for linear_fc2 module."""
        ...

    def backward_dw(self) -> None:
        """Backward method for linear_fc2 module."""
        ...


class LinearFc2Builder(Protocol):
    """Protocol describing how to build a linear_fc2 layer in MLP."""

    def __call__(
        self,
        input_size: int,
        output_size: int,
        /,
        *,
        config: TransformerConfig,
        init_method: Callable[[torch.Tensor], None],
        bias: bool,
        input_is_parallel: bool,
        skip_bias_add: bool,
        is_expert: bool,
        tp_comm_buffer_name: str | None,
        tp_group: torch.distributed.ProcessGroup | None,
    ) -> LinearFc2Interface:
        """Builds a linear_fc2 layer for MLP."""
        ...


@dataclass
class MLPSubmodules:
    """
    The dataclass for ModuleSpecs of MLP submodules
    including  linear fc1, activation function, linear fc2.
    """

    linear_fc1: LinearFc1Builder

    linear_fc2: LinearFc2Builder

    activation_func: TEActivationFunctionBuilder | None = None
    """
    Builder for an activation function module; only used if config.use_te_activation_func is True.
    """


class MLP(MegatronModule):
    """
    MLP will take the input with h hidden state, project it to 4*h
    hidden dimension, perform nonlinear transformation, and project the
    state back into h hidden dimension.


    Returns an output and a bias to be added to the output.
    If config.add_bias_linear is False, the bias returned is None.

    We use the following notation:
     h: hidden size
     p: number of tensor model parallel partitions
     b: batch size
     s: sequence length
    """

    def __init__(
        self,
        config: TransformerConfig,
        submodules: MLPSubmodules,
        is_expert: bool = False,
        input_size: Optional[int] = None,
        ffn_hidden_size: Optional[int] = None,
        tp_group: Optional[torch.distributed.ProcessGroup] = None,
    ):
        super().__init__(config=config)

        self.config: TransformerConfig = config

        self.input_size = input_size if input_size != None else self.config.hidden_size

        self.tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
        if ffn_hidden_size is None:
            if is_expert:
                raise ValueError("MoE MLP requires `ffn_hidden_size`, but it was not provided.")
            warnings.warn(
                "MLP requires ffn_hidden_size, but it was not provided. Using \
                    config.ffn_hidden_size by default.",
                DeprecationWarning,
                stacklevel=2,
            )
            ffn_hidden_size = not_none(self.config.ffn_hidden_size)

        # If this is a gated linear unit we double the output width
        # see https://arxiv.org/pdf/2002.05202.pdf
        # For GLU/SwiGLU, use stride=2 because each TP rank stores interleaved [gate, up] portions.
        # This is critical for correct weight resharding across different TP sizes.
        if self.config.gated_linear_unit:
            ffn_hidden_size *= 2
            fc1_stride = 2
            if self.config.use_kitchen:
                # Kitchen Linear doesn't support stride != 1.
                # Weight resharding across TP sizes will have aforementioned problems.
                fc1_stride = 1
        else:
            fc1_stride = 1

        # Use moe_latent_size only for routed experts. 'is_expert' is false for
        # shared_experts.
        use_latent_size = (self.config.moe_latent_size is not None) and is_expert

        self.linear_fc1 = submodules.linear_fc1(
            self.input_size if not use_latent_size else not_none(self.config.moe_latent_size),
            ffn_hidden_size,
            config=self.config,
            init_method=not_none(self.config.init_method),
            gather_output=False,
            bias=self.config.add_bias_linear,
            skip_bias_add=True,
            is_expert=is_expert,
            tp_comm_buffer_name="fc1",
            tp_group=tp_group,
            stride=fc1_stride,
        )

        if self.config.use_te_activation_func and not (submodules.activation_func is None):
            self.activation_func = apply_module(submodules.activation_func(config=self.config))
        else:
            self.activation_func = self.config.activation_func

        self.linear_fc2 = submodules.linear_fc2(
            not_none(self.config.ffn_hidden_size),
            not_none(
                self.config.hidden_size if not use_latent_size else self.config.moe_latent_size
            ),
            config=self.config,
            init_method=not_none(self.config.output_layer_init_method),
            bias=self.config.add_bias_linear,
            input_is_parallel=True,
            skip_bias_add=True,
            is_expert=is_expert,
            tp_comm_buffer_name="fc2",
            tp_group=tp_group,
        )

    def forward(
        self, hidden_states: torch.Tensor, per_token_scale: torch.Tensor | None = None, **kwargs
    ):
        """Perform the forward pass through the MLP block."""
        # [s, b, 4 * h/p]
        nvtx_range_push(suffix="linear_fc1")
        intermediate_parallel, bias_parallel = apply_module(self.linear_fc1)(hidden_states)
        nvtx_range_pop(suffix="linear_fc1")

        nvtx_range_push(suffix="activation")
        if self.config.use_te_activation_func:
            if bias_parallel is not None:
                intermediate_parallel = intermediate_parallel + bias_parallel
            intermediate_parallel = self.activation_func(intermediate_parallel)
            if per_token_scale is not None:
                original_dtype = intermediate_parallel.dtype
                intermediate_parallel = intermediate_parallel * per_token_scale.unsqueeze(-1)
                intermediate_parallel = intermediate_parallel.to(original_dtype)
        elif self.config.bias_activation_fusion:
            if per_token_scale is not None:
                if self.activation_func == F.silu and self.config.gated_linear_unit:
                    # dtype is handled inside the fused kernel
                    intermediate_parallel = weighted_bias_swiglu_impl(
                        intermediate_parallel,
                        bias_parallel,
                        per_token_scale.unsqueeze(-1),
                        self.config.activation_func_fp8_input_store,
                    )
                elif self.activation_func == quick_gelu and self.config.gated_linear_unit:
                    intermediate_parallel = weighted_bias_quick_geglu_impl(
                        intermediate_parallel,
                        bias_parallel,
                        per_token_scale.unsqueeze(-1),
                        self.config.activation_func_fp8_input_store,
                        self.config.glu_linear_offset,
                        self.config.activation_func_clamp_value,
                    )
                else:
                    raise ValueError(
                        "Only support fusion of swiglu and quick_gelu with per_token_scale in MLP."
                    )
            else:
                if self.activation_func == F.gelu:
                    if self.config.gated_linear_unit:
                        intermediate_parallel = bias_geglu_impl(
                            intermediate_parallel, bias_parallel
                        )
                    else:
                        assert self.config.add_bias_linear is True
                        intermediate_parallel = bias_gelu_impl(intermediate_parallel, bias_parallel)
                elif self.activation_func == F.silu and self.config.gated_linear_unit:
                    intermediate_parallel = bias_swiglu_impl(
                        intermediate_parallel,
                        bias_parallel,
                        self.config.activation_func_fp8_input_store,
                        self.config.cpu_offloading
                        and self.config.cpu_offloading_activations
                        and HAVE_TE,
                    )
                else:
                    raise ValueError("Only support fusion of gelu and swiglu")
        else:
            if bias_parallel is not None:
                intermediate_parallel = intermediate_parallel + bias_parallel
            if self.config.gated_linear_unit:

                def glu(x):
                    x_glu, x_linear = torch.chunk(x, 2, dim=-1)
                    if (val := self.config.activation_func_clamp_value) is not None:
                        x_glu = x_glu.clamp(min=None, max=val)
                        x_linear = x_linear.clamp(min=-val, max=val)
                    return self.config.activation_func(x_glu) * (
                        x_linear + self.config.glu_linear_offset
                    )

                intermediate_parallel = glu(intermediate_parallel)
            else:
                intermediate_parallel = self.activation_func(intermediate_parallel)

            if per_token_scale is not None:
                original_dtype = intermediate_parallel.dtype
                intermediate_parallel = intermediate_parallel * per_token_scale.unsqueeze(-1)
                intermediate_parallel = intermediate_parallel.to(original_dtype)
        nvtx_range_pop(suffix="activation")

        # [s, b, h]
        nvtx_range_push(suffix="linear_fc2")

        output, output_bias = apply_module(self.linear_fc2)(
            cast(torch.Tensor, intermediate_parallel)
        )
        nvtx_range_pop(suffix="linear_fc2")

        if per_token_scale is not None and output_bias is not None:
            # if this MLP is an expert, and bias is required, we add the bias to output directly
            # without doing bda later.
            output += output_bias.unsqueeze(0) * per_token_scale.unsqueeze(-1)
            output_bias = None

        return output, output_bias

    # pylint: disable=missing-function-docstring
    def sharded_state_dict(
        self, prefix: str = "", sharded_offsets: tuple = (), metadata: Optional[dict] = None
    ) -> ShardedStateDict:
        """Return the sharded state dictionary of the module."""
        sharded_state_dict = {}
        singleton_local_shards = (metadata or {}).get('singleton_local_shards', False)
        for name, module in self._modules.items():
            sub_sd = module.sharded_state_dict(f"{prefix}{name}.", sharded_offsets, metadata)
            if self.config.gated_linear_unit and name == "linear_fc1":
                for k, v in sub_sd.items():
                    if k in (f"{prefix}{name}.weight", f"{prefix}{name}.bias"):
                        sub_sd[k] = apply_swiglu_sharded_factory(
                            v, sharded_offsets, singleton_local_shards
                        )
            sharded_state_dict.update(sub_sd)
        return sharded_state_dict

    def backward_dw(self):
        self.linear_fc2.backward_dw()
        self.linear_fc1.backward_dw()


# pylint: disable=missing-function-docstring
def apply_swiglu_sharded_factory(
    original_sh_ten, sharded_offsets, singleton_local_shards: bool = False
):
    # We must split the tensor into 2 parts, each sharded separately.
    # This requires a ShardedTensorFactory which `chunk`s during saving
    # and `cat`s during loading

    swiglu_shard_axis = 0
    prepend_axis_num = len(sharded_offsets)
    original_shape = original_sh_ten.local_shape
    original_numel = int(np.prod(original_shape))
    local_axis_size = original_shape[swiglu_shard_axis]
    assert (
        original_sh_ten.global_offset[swiglu_shard_axis + prepend_axis_num] % local_axis_size == 0
    )
    rank_offset = (
        original_sh_ten.global_offset[swiglu_shard_axis + prepend_axis_num] // local_axis_size
    )
    axis_frag = original_sh_ten.axis_fragmentations[swiglu_shard_axis + prepend_axis_num]

    @torch.no_grad()
    def sh_ten_build_fn(
        key: str, t: torch.Tensor, replica_id: ReplicaId, flattened_range: Optional[slice]
    ):
        if singleton_local_shards:
            offset_w = (swiglu_shard_axis + prepend_axis_num, rank_offset, axis_frag)
            offset_v = (swiglu_shard_axis + prepend_axis_num, rank_offset, axis_frag)
            w_key = f'{key}_w'
            v_key = f'{key}_v'
        else:
            offset_w = (swiglu_shard_axis + prepend_axis_num, rank_offset, axis_frag * 2)
            offset_v = (
                swiglu_shard_axis + prepend_axis_num,
                rank_offset + axis_frag,
                axis_frag * 2,
            )
            w_key = key
            v_key = key

        tensor_w, tensor_v = torch.chunk(t, 2, dim=swiglu_shard_axis)
        return [
            ShardedTensor.from_rank_offsets(
                w_key,
                tensor_w,
                *sharded_offsets,
                offset_w,
                replica_id=replica_id,
                prepend_axis_num=prepend_axis_num,
            ),
            ShardedTensor.from_rank_offsets(
                v_key,
                tensor_v,
                *sharded_offsets,
                offset_v,
                replica_id=replica_id,
                prepend_axis_num=prepend_axis_num,
            ),
        ]

    def sh_ten_merge_fn(sub_state_dict):
        with torch.no_grad():
            try:
                return torch.cat(sub_state_dict)
            except (RuntimeError, torch.cuda.OutOfMemoryError) as e:
                logger.warning(
                    f"CUDA OutOfMemoryError encountered during tensors merging."
                    f" Switching to CPU merge. (Error: {e})"
                )
                merged_sub_state_dict = torch.cat([t.cpu() for t in sub_state_dict])
                gc.collect()
                torch.cuda.empty_cache()
                return merged_sub_state_dict

    return ShardedTensorFactory(
        original_sh_ten.key,
        original_sh_ten.data,
        sh_ten_build_fn,
        sh_ten_merge_fn,
        original_sh_ten.replica_id,
        flattened_range=original_sh_ten.flattened_range,
    )


================================================
FILE: megatron/core/transformer/module.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Megatron Module."""
from functools import partial
from typing import Optional, Tuple

import torch
from torch.autograd import Variable
from torch.nn.parameter import Parameter

from megatron.core import parallel_state
from megatron.core.dist_checkpointing.mapping import ShardedStateDict
from megatron.core.transformer.enums import CudaGraphScope
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.utils import (
    ensure_metadata_has_dp_cp_group,
    make_sharded_tensors_for_checkpoint,
    sharded_state_dict_default,
)

_FLOAT_TYPES = (torch.FloatTensor, torch.cuda.FloatTensor)
_HALF_TYPES = (torch.HalfTensor, torch.cuda.HalfTensor)
_BF16_TYPES = (torch.BFloat16Tensor, torch.cuda.BFloat16Tensor)


def param_is_not_shared(param):  # pylint: disable=missing-function-docstring
    return not hasattr(param, 'shared') or not param.shared


class MegatronModule(torch.nn.Module):
    """Base Megatron module inhertied by all Models.

    Megatron specific extensions of torch Module with support
    for pipelining

    Args:
        config (TransformerConfig): Transformer config
    """

    # def __init__(self, config: TransformerConfig, share_word_embeddings=True):
    def __init__(self, config: TransformerConfig):
        super().__init__()
        self.config = config

    def state_dict_for_save_checkpoint(self, prefix: str = '', keep_vars: bool = False):
        """Override state dict for saving checkpoints Use this function to override the
        state dict for saving checkpoints.

        Args:
            prefix (str, optional): _description_. Defaults to ''.
            keep_vars (bool, optional): _description_. Defaults to False.

        Returns:
            _type_: _description_
        """

        return self.state_dict(prefix=prefix, keep_vars=keep_vars)

    def sharded_state_dict(
        self,
        prefix: str = '',
        sharded_offsets: Tuple[Tuple[int, int, int], ...] = (),
        metadata: Optional[dict] = None,
    ) -> ShardedStateDict:
        """Default implementation for sharded state dict for distributed checkpointing.

        General definition of sharded_state_dict simply calls `sharded_state_dict_default`
        (which call sharded_state_dict method if possible or a default implementation otherwise)
        recursively on all submodules.

        Args:
            prefix (str): prefix for the state dict keys
            sharded_offsets (Tuple[Tuple[int, int, int]], optional): sharding already
                applied (e.g. PP related) by sup-modules. Passed along to ShardedTensor
            metadata (dict, optional): metadata passed recursively to sharded_state_dict methods

        Returns:
            dict: dictionary of state dict keys mapped to ShardedTensors
        """
        sharded_state_dict = {}
        # Save parameters
        self._save_to_state_dict(sharded_state_dict, '', keep_vars=True)
        if not hasattr(self, 'tp_group'):
            # some model interface hasn't updated for m4, fallback needed
            tp_group = parallel_state.get_tensor_model_parallel_group()
        else:
            tp_group = self.tp_group
        # Guard for cases metadata is not provided
        metadata = ensure_metadata_has_dp_cp_group(metadata)
        sharded_state_dict = make_sharded_tensors_for_checkpoint(
            sharded_state_dict,
            prefix,
            sharded_offsets=sharded_offsets,
            tp_group=tp_group,
            dp_cp_group=metadata['dp_cp_group'],
        )
        # Recurse into submodules
        for name, module in self.named_children():
            sharded_state_dict.update(
                sharded_state_dict_default(
                    module, f'{prefix}{name}.', sharded_offsets, metadata, tp_group=tp_group
                )
            )
        return sharded_state_dict

    def set_is_first_microbatch(self):
        """Sets the is_first_microbatch flag if it exists and config.fp8==True.
        When this flag is set, TE modules will update their fp8 parameter cache.
        If kitchen is being used, kitchen controls quantization level.
        """
        if (
            self.config.fp8 is not None
            or self.config.fp4 is not None
            or getattr(self.config, 'use_kitchen', False)
        ):
            if not hasattr(self, "modules_with_is_first_microbatch"):
                self.modules_with_is_first_microbatch = []
                for m in self.modules():
                    if hasattr(m, "is_first_microbatch"):
                        self.modules_with_is_first_microbatch.append(m)
            for m in self.modules_with_is_first_microbatch:
                m.is_first_microbatch = True

    def set_symmetric_ar(self, set_to: Optional[str] = None) -> None:
        """
        Set symmetric all-reduce functionality across all eligible modules.

        This method traverses the model's module hierarchy to find all modules
        with the 'symmetric_ar_type' attribute, caches them, and then sets their
        '_symmetric_ar_cache' attribute to the specified value to enable or
        disable symmetric all-reduce operations.

        Args:
            set_to (Any, optional): Value to set for the 'symmetric_ar_type' to.
            Allowed choices ['two_shot', "one_shot", "multimem_all_reduce", None]
        """
        assert set_to in ['two_shot', "one_shot", "multimem_all_reduce", None]

        # Recursive function to find all modules with our target attributes
        def create_ar_cache(module):
            # Check if this module has any of our target attributes
            if hasattr(module, "symmetric_ar_type"):
                self._symmetric_ar_cache.append(module)

            # Check all children modules recursively
            for child in module._modules.values():
                if child is not None:
                    create_ar_cache(child)

        if not hasattr(self, "_symmetric_ar_cache"):
            self._symmetric_ar_cache = []
            create_ar_cache(self)

        for module in self._symmetric_ar_cache:
            module._symmetric_ar_cache = set_to


class GraphableMegatronModule(MegatronModule):
    """Megatron module that can be used to capture and replay CUDA graphs.
    Now only TransformerLayer and MambaLayer are graphable.

    Args:
        config (TransformerConfig): Transformer config
    """

    def __init__(self, config: TransformerConfig, vp_stage: Optional[int] = None):
        super().__init__(config)

        assert isinstance(config, TransformerConfig), "config must be a TransformerConfig"

        # Enable cuda graphs.
        if (
            config.cuda_graph_impl == "local"
            and CudaGraphScope.full_iteration not in config.cuda_graph_scope
        ):
            if hasattr(self, "create_mcore_cudagraph_manager"):
                self.create_mcore_cudagraph_manager(config)
            else:
                from megatron.core.transformer.cuda_graphs import CudaGraphManager

                self.cudagraph_manager = CudaGraphManager(config)
        elif config.cuda_graph_impl == "transformer_engine":
            # List to store CUDA graphs. A list of `N` CUDA graphs for this layer where N is
            # the number of microbatches. Multiple CUDA graphs per layer is required to support
            # pipelining which requires running FWD graph of multiple microbatches before BWD
            # graph. To enable CUDA graph, this list should be populated in the model training
            # script with the graphs returned by make_graphed_callables API before the first
            # training step.
            self.cuda_graphs = []
            # List to store forward pre-hooks. Forward pre-hooks are not captured into CUDA
            # graphs. Those hooks and args are collected in this list and should be manually
            # triggered before CUDA Graph running. This is required to ensure the correct param
            # all-gather overlap with forward compute.
            self.cuda_graph_manual_hooks = []
            # _CudaGraphBackwardDWWrapper object used to manage the wgrad backward computation.
            # The `backward_dw` func api is the same as `TransformerLayerNode.backward_dw` and
            # calls wgrad computation in attention module (contains attn and shared expert)
            # according to CUDA graph scope.
            self.cuda_graph_backward_dw_wrapper = None

    def init_backward_dw_wrapper(self):
        """Initialize the backward_dw_wrapper."""
        from megatron.core.models.gpt.fine_grained_callables import _BackwardDWWrapper

        config = getattr(self, 'config', None)
        assert config is not None, (
            "TransformerLayer must be initialized before calling " "`init_backward_dw_wrapper`."
        )
        self.backward_dw_wrapper = _BackwardDWWrapper(self)

    def set_te_cuda_graph_backward_dw_wrapper(self):
        """Replace the backward_dw callable with dw cuda graph."""
        assert (
            self.backward_dw_wrapper is not None
        ), "`backward_dw_wrapper` must be set when cuda graphs are enabled for ep overlap."
        self.backward_dw_wrapper.set_graphed_backward_dw_callable(
            partial(self._te_cuda_graph_backward_dw_graph, self.current_microbatch)
        )

    def _te_cuda_graph_backward_dw_graph(self, microbatch_idx):
        """
        CUDA Graph backward weight gradient computation for current layer.
        """
        cg_index = microbatch_idx % len(self.cuda_graphs)
        if not hasattr(self.cuda_graphs[cg_index], 'backward_dw'):
            return
        self.cuda_graphs[cg_index].backward_dw()

    def get_layer_static_inputs(self, seq_length, micro_batch_size):
        """
        Get the static inputs for the layer.
        We assume that the module has one hidden_states input, whose shape is inferred
        from the seq_length, micro_batch_size, and parallel config.
        Override this method if the module has other inputs.

        Returns:
            Dict[str, torch.Tensor]: A dictionary containing the static inputs for the layer.
        """
        # Calculate data shape related values.
        context_parallel_size = self.config.context_parallel_size
        slen_per_cp = seq_length // context_parallel_size
        sequence_parallel = self.config.sequence_parallel
        tensor_model_parallel_size = self.config.tensor_model_parallel_size
        slen_per_cptp = (
            slen_per_cp // tensor_model_parallel_size if sequence_parallel else slen_per_cp
        )

        static_inputs = {}
        static_inputs["hidden_states"] = torch.ones(
            (slen_per_cptp, micro_batch_size, self.config.hidden_size),
            dtype=torch.bfloat16,
            requires_grad=True,
            device=torch.cuda.current_device(),
        )
        return static_inputs

    def setup_manual_hooks(self, make_hook_func):
        """
        Set CUDA Graph manual hooks for the submodules that contain direct parameters and are
        covered by cudagraphs.
        """
        self.cuda_graph_manual_hooks = []

        # Select the modules who contain direct parameters and are covered by cudagraphs.
        # Add these modules to the `cuda_graph_manual_hooks` because their hooks will not
        # be automatically triggered when they go through the CUDA Graph path.
        param_modules = {}
        for submodule in self._get_submodules_under_cudagraphs():
            for module in submodule.modules():
                if next(module.parameters(recurse=False), None) is not None:
                    # Module contains direct parameters.
                    param_modules[id(module)] = module
        for module in param_modules.values():
            self.cuda_graph_manual_hooks.append((make_hook_func(), (module,)))

    def _get_submodules_under_cudagraphs(self):
        """
        Get the submodules that are covered by cudagraphs. Return a list that only contains the
        module itself if the whole layer is covered by cudagraphs.
        """
        return [self]

    def _te_cuda_graph_capture(self, *args, **kwargs):
        """
        CUDA Graph capture for this layer using TE interface.
        Normally it's just a forward pass if we're capturing the entire layer.
        """
        return self.forward(*args, **kwargs)

    def _te_cuda_graph_replay(self, *args, **kwargs):
        """
        CUDA graph replay for this layer and microbatch `self.current_microbatch` using TE
        interface. TransformerEngine versions>=1.10 allow keyword arguments with CUDA graph.
        However, CUDA graph accepts only Tensor inputs.
        Hence, check if the arguments are all tensors.
        """
        for arg in args:
            assert isinstance(arg, torch.Tensor), "CUDA graph accepts only Tensor inputs."
        for _, v in kwargs.items():
            assert v is None or isinstance(
                v, torch.Tensor
            ), "CUDA graph accepts only Tensor inputs."

        cg_index = getattr(self, 'current_microbatch', 0) % len(self.cuda_graphs)
        cudagraph_args, cudagraph_kwargs = self._get_te_cuda_graph_replay_args(*args, **kwargs)

        for hook, hook_args in self.cuda_graph_manual_hooks:
            hook(*hook_args)
        return self.cuda_graphs[cg_index](*cudagraph_args, **cudagraph_kwargs)

    def _get_te_cuda_graph_replay_args(self, *args, **kwargs):
        """Helper function to get tensor arguments for TE CUDA graph."""
        if len(args) == 0:
            assert 'hidden_states' in kwargs, "hidden_states is required."
            hidden_states = kwargs.pop('hidden_states')
            cudagraph_args = (hidden_states,)
        else:
            assert (
                'hidden_states' not in kwargs
            ), "hidden_states should only be passed as either a positional or keyword argument."
            cudagraph_args = tuple(args)

        cudagraph_kwargs = kwargs.copy()
        cudagraph_kwargs['is_first_microbatch'] = getattr(self, 'current_microbatch', 0) == 0
        return cudagraph_args, cudagraph_kwargs

    def _should_call_local_cudagraph(self, *args, **kwargs):
        """
        Check if we should call the local cudagraph path.
        """
        return hasattr(self, 'cudagraph_manager')

    def _should_call_te_cudagraph(self, *args, **kwargs):
        """
        Check if we should call the TE cudagraph path.
        """
        from megatron.core.transformer.cuda_graphs import is_graph_capturing

        return (
            self.config.cuda_graph_impl == "transformer_engine"
            and self.training
            and (is_graph_capturing() or self.cuda_graphs)
        )

    def __call__(self, *args, **kwargs):
        if self._should_call_local_cudagraph(*args, **kwargs):
            return self.cudagraph_manager(self, args, kwargs)
        elif self._should_call_te_cudagraph(*args, **kwargs):
            if not self.cuda_graphs:
                # Do CUDA Graphs capture.
                cuda_graph_func = self._te_cuda_graph_capture
            else:
                # Do CUDA Graphs replay.
                cuda_graph_func = self._te_cuda_graph_replay
            return cuda_graph_func(*args, **kwargs)
        return super().__call__(*args, **kwargs)


def conversion_helper(val, conversion):
    """Recursively applies a conversion function to values in nested data structures.

    Args:
        val: A single value or a nested structure (tuple/list) of values to convert
        conversion (callable): A function that performs the desired conversion on a single value

    Returns:
        The converted value, maintaining the same nested structure as the input.
        If input is a single value, returns the converted value.
        If input is a tuple/list, returns a tuple/list with all elements converted.
    """
    if not isinstance(val, (tuple, list)):
        return conversion(val)
    rtn = [conversion_helper(v, conversion) for v in val]
    if isinstance(val, tuple):
        rtn = tuple(rtn)
    return rtn


def fp32_to_float16(val, float16_convertor):
    """Converts floating-point values from fp32 to fp16.

    Args:
        val: The value to convert. Can be a single number, a tuple, or a list.
        float16_convertor: A function that converts a single fp32 value to fp16
    """

    def half_conversion(val):
        val_typecheck = val
        if isinstance(val_typecheck, (Parameter, Variable)):
            val_typecheck = val.data
        if isinstance(val_typecheck, _FLOAT_TYPES):
            val = float16_convertor(val)
        return val

    return conversion_helper(val, half_conversion)


def float16_to_fp32(val):
    """Converts floating-point values from fp16 to fp32.

    Args:
        val: The value to convert. Can be a single number, a tuple, or a list.
    """

    def float_conversion(val):
        val_typecheck = val
        if isinstance(val_typecheck, (Parameter, Variable)):
            val_typecheck = val.data
        if isinstance(val_typecheck, (_BF16_TYPES, _HALF_TYPES)):
            val = val.float()
        return val

    return conversion_helper(val, float_conversion)


class Float16Module(MegatronModule):
    """Float 16 Module.

    Attributes:
        config (TransformerConfig): Transformer config
        fp16 (bool) : Specifies if the model runs in fp16 mode
        bf16 (bool) : Specifies if the model runs in bf16 mode

    Args:
        config (TransformerConfig): The transformer config used to initalize the model
    """

    def __init__(self, config: TransformerConfig, module: torch.nn.Module):
        super(Float16Module, self).__init__(config)
        self.config = config
        self.fp16 = config.fp16
        self.bf16 = config.bf16
        self.vp_size = config.virtual_pipeline_model_parallel_size
        self.vp_stage = getattr(module, 'vp_stage', None)
        self.pg_collection = getattr(module, 'pg_collection', None)

        if self.fp16:
            self.add_module('module', module.half())

            def float16_convertor(val):
                return val.half()

        elif self.bf16:
            self.add_module('module', module.bfloat16())

            def float16_convertor(val):
                return val.bfloat16()

        else:
            raise Exception('Either config.fp16 or config.bf16 should be True.')

        self.float16_convertor = float16_convertor

    def set_input_tensor(self, input_tensor):  # pylint: disable=missing-function-docstring
        return self.module.set_input_tensor(input_tensor)

    def forward(self, *inputs, fp32_output=True, **kwargs):
        """
        Execute the wrapped module in model precision and optionally upcast outputs to fp32.

        On the first pipeline stage, positional/keyword tensor inputs are converted to the
        module precision (fp16 or bf16) before invoking the wrapped module. The wrapped module
        is called with the provided inputs and keyword arguments. On the last pipeline stage
        only, outputs are upcast to fp32 if ``fp32_output`` is True; otherwise, outputs are
        returned in the model precision (fp16/bf16).

        Args:
            *inputs: Positional inputs forwarded to the wrapped module (converted to fp16/bf16 on
                the pipeline first stage).
            fp32_output (bool, keyword-only): If True (default), upcast outputs to fp32 on the
                pipeline last stage. Has no effect on non-last stages. Set to False to keep outputs
                in model precision when downstream consumers expect half precision or to avoid
                extra casts.
            **kwargs: Keyword arguments forwarded to the wrapped module.

        Returns:
            The wrapped module's outputs, potentially upcast to fp32 depending on pipeline stage
            and ``fp32_output``.
        """
        from megatron.core.pipeline_parallel.utils import (
            is_pp_first_stage,
            is_pp_last_stage,
            is_vp_first_stage,
            is_vp_last_stage,
        )

        if self.pg_collection is None:
            pp_group = parallel_state.get_pipeline_model_parallel_group()
        else:
            pp_group = self.pg_collection.pp
        if is_vp_first_stage(self.vp_stage, self.vp_size) and is_pp_first_stage(pp_group):
            inputs = fp32_to_float16(inputs, self.float16_convertor)
        outputs = self.module(*inputs, **kwargs)
        if (
            is_vp_last_stage(self.vp_stage, self.vp_size)
            and is_pp_last_stage(pp_group)
            and fp32_output is True
        ):
            outputs = float16_to_fp32(outputs)
        return outputs

    def state_dict(
        self, destination=None, prefix='', keep_vars=False
    ):  # pylint: disable=missing-function-docstring
        return self.module.state_dict(destination=destination, prefix=prefix, keep_vars=keep_vars)

    def state_dict_for_save_checkpoint(self, prefix='', keep_vars=False):
        """Retrieve state_dict from the module being wrapped."""
        return self.module.state_dict_for_save_checkpoint(prefix=prefix, keep_vars=keep_vars)

    def sharded_state_dict(self, prefix='', *args, **kwargs):
        """Retrieve sharded_state_dict from the module being wrapped."""
        return self.module.sharded_state_dict(prefix, *args, **kwargs)

    def load_state_dict(
        self, state_dict, strict=True
    ):  # pylint: disable=missing-function-docstring
        self.module.load_state_dict(state_dict, strict=strict)


================================================
FILE: megatron/core/transformer/moe/README.md
================================================
# Megatron Core MoE

Megatron Core MoE is a production-ready framework for training large-scale Mixture-of-Experts models, providing the foundational architecture, performance optimizations, and best practices that guide MoE framework development across the industry.

## What's New
For latest features and architectures, please refer to the [MCore dev roadmap](https://github.com/NVIDIA/Megatron-LM/issues/1729).

### 🔥 [MCore dev] (2026/01)
- 🚀 Pipeline-aware fine-grained activation offloading
- 🚀 Qwen3-Next model support
- 🚀 DeepSeek-V3.2 model support
- 🚀 Muon and Layer-wise distributed optimizer
- 🚀 CUDA Graph support with fine-grained scopes

### 🔥 [MCore v0.15] (2025/11)
- 🚀 Add HybridEP backend to Flex Dispatcher(GB200, B200, H100 supported)
- 🚀 Support FSDP with EP for MoE models

### 🔥 [MCore v0.14] (2025/09)
- 🚀 Batch-level overlapping to hide EP-A2A communication (--overlap-moe-expert-parallel-comm --delay-wgrad-compute)
- 🚀 FP8 support for Fine-grained Recomputations
- Router fusion kernels for MoE models (--moe-router-fusion)
- Context Parallelism (CP) support for MTP and MLA

### 🔥 [MCore v0.13] (2025/07)
- Support bf16 dtype for optimizer states to use precision-aware optimizer in TransformerEngine (--use-precision-aware-optimizer)
- Flexible Asymmetric Virtual Pipeline Parallelism with Custom Pipeline Layout (--pipeline-model-parallel-layout)
- Add Hybrid Shard Data-Parallel support for MoE models (--num-distributed-optimizer-instances)
- Fine-grained recomputation to reduce activation memory. (--recompute-modules with --recompute-granularity selective)
- Memory efficient token permutation by moving the probs multiplication from unpermutation to activation function of GroupedMLP.

### 🔥 [MCore v0.12] (2025/05)
- Support DeepSeek's DeepEP for efficient token dispatching (--moe-token-dispatcher-type flex --moe-enable-deepep)
- Support Multi-Token Prediction (MTP) (--mtp-num-layers 1)
- CUDA Graph support for dropless MoE models with attention only capture (--te-rng-track --external-cuda-graph --cuda-graph-scope attn)

## Overview of MCore MoE Supported Features and Architectures

### Model Support
- ✅ **DeepSeek**
  - ✅ DeepSeek-V2
  - ✅ DeepSeek-V3, including MTP
- ✅ **Qwen**
  - ✅ Qwen2-57B-A14B
  - ✅ Qwen3-30B-A3B
  - ✅ Qwen3-235B-A22B
- ✅ **Mixtral**
  - ✅ Mixtral-8x7B
  - ✅ Mixtral-8x22B

### Core MoE Functionality
- ✅ Token dropless MoE (dMoE) - Advanced routing without token dropping
- ✅ Top-K Router with flexible K selection
- ✅ Load balancing losses for expert utilization optimization

### Advanced Parallelism
- ✅ Expert Parallel (EP) with 3D parallelism integration
- ✅ Full parallelism combo: EP + DP + TP + PP + SP support
- ✅ Context Parallel (CP) for long sequence MoE training
- ✅ Parallel Folding Heterogeneous Parallelism Mappings for Efficient Large-Scale MoE Model Training
- ✅ Distributed Optimizer for MoE (ZeRO-1 equivalent)

### Performance Optimizations
- ✅ Memory Efficient token permutation
- ✅ Fine-grained Recomputations (mla, moe, mlp, moe_act, norm)
- ✅ MLA TP Support for Mixture of Linear Attention
- ✅ GroupedGEMM and GA Fusion
- ✅ DP/PP/TP Communication Overlapping
- ✅ Overlapped Shared Expert execution
- ✅ Router Fusion optimizations
- ✅ Token (un)permutation Fusion kernels
- ✅ cuDNN fused Attention integration

### Hardware & Precision Support
- ✅ DeepEP support for H100 and B200
- ✅ GroupedGEMM including FP8/MXFP8 support
- ✅ FP8 weights with BF16 optimizer states
- ✅ FP8 training full support

### Developer Experience
- ✅ MoE Model Zoo with pre-training best practices
- ✅ Distributed Checkpointing for MoE models
- ✅ Upcycling Support for model scaling
- ✅ MCore2HF Converter for ecosystem compatibility
- ✅ Layer-wise logging for detailed monitoring
- ✅ Runtime Upcycling capabilities

## Quick Start Guide

### Basic MoE Training in Megatron-LM

To train a top-2 MoE model with 8 experts and auxiliary loss, add the following arguments to your megatron training script:

```bash
## Set MoE Hidden site
--num-experts 8
--moe-shared-expert-intermediate-size: 2048
## Set router config
--moe-router-load-balancing-type aux_loss
--moe-router-topk 2
--moe-aux-loss-coeff 1e-2
## Set token dispatcher
--moe-token-dispatcher-type alltoall
```

Detailed documentation for each feature is available in the [Feature Documentation](#feature-documentation) section.

### Use the pre-defined config to train the popular MoE models
We have provided some pre-defined config to train the popular MoE models in the [Megatron-MoE-Model-Zoo](https://github.com/yanring/Megatron-MoE-ModelZoo/tree/main) repository. You can use them as a reference to configure your training script. Currently we have added the config for Mixtral 8x7B, Mixtral 8x22B, DeepSeek-V3, Qwen3-30B-A3B, Qwen3-235B-A22B.

### General Performance Tips
#### Training arguments
The following flags are general performance flags that can help to achieve higher performance on almost all workloads. Check if you have enabled all of them in your training script.

```bash
## Enable DeepEP token dispatcher
--moe-token-dispatcher-type flex
--moe-flex-dispatcher-backend deepep
## Enable GroupedGEMM
--moe-grouped-gemm
## Enable fusion kernels
--moe-router-fusion
--moe-permute-fusion
--cross-entropy-loss-fusion
--cross-entropy-fusion-impl te

## Communication optimization
--use-distributed-optimizer
--overlap-param-gather
--overlap-grad-reduce
--tp-comm-overlap

## Enable manual gc to prevent python jitter
--manual-gc: true
--manual-gc-interval: 10
```
#### Environment variables

Below are some environment variables that can be useful.
```bash
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True # Enable expandable segments to prevent memory fragmentation
export NCCL_NVLS_ENABLE=0 # Disable NVLS to prevent memory overhead
```
#### Dependencies
- Use the latest version of [TransformerEngine](https://github.com/NVIDIA/TransformerEngine).
- Use the latest [NGC PyTorch Docker Image](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch)

## Best Practices to achieve high performance on MoE training

Distributed training involves complex trade-offs between **communication**, **memory**, and **computation**, making it challenging to find an optimal parallelism configuration. This section provides a systematic workflow to help you identify the best parallel mapping for your model and hardware.

### Step 1: Find the feasible parallel mapping under the memory capacity of the GPU
To find the best parallel mapping, we need to first know the feasible parallel mapping for the model under the memory capacity of the GPU.
The consumption of memory consists of three parts:
- Activation memory
- Weight and gradient memory
- Optimizer states memory
Different parallel strategies will shard these tensor memory in different ways.

| Parallel Strategy | Peak Activation Memory          | Weight Memory  | Optimizer states                  | Communication (Per-Layer) |
|:-----------------:|:-------------------------------:|:--------------:|:---------------------------------:|:-------------------------:|
| TP                | 1/N (with SP on)                | 1/N            | 1/N                               |        High               |
| EP                | ~1 (varies with EP balancing)   | 1/N in MoELayer| 1/N                               |       Medium              |
| PP                | 1 (>1 with virtual pipeline)    | 1/N            | 1/N                               |       Medium              |
| CP                | 1/N                             | 1              | 1/N (with distributed optimizer)  |       Medium              |
| DP                | 1                               | 1              | 1/N (with distributed optimizer)  |        Low                |

We provide the argument of `--fake-init-process-group` to emulate distributed training on one GPU. This is useful to find the feasible parallel mapping under the memory capacity of the GPU. See https://github.com/NVIDIA/Megatron-LM/pull/2254 for detailed usage.

### Step 2: Select Optimal Parallelism Strategy

The optimal parallelism configuration varies based on **model architecture**, **sequence length**, and **hardware platform**. Below are general guidelines to help you achieve high throughput.

#### Guideline 1: Minimize Model Parallelism, Maximize Data Parallelism

| Aspect | Recommendation |
|--------|----------------|
| **Goal** | Keep TP/EP/PP as small as possible while avoiding OOM |
| **Why** | Model parallelism introduces communication overhead that hurts performance |
| **How** | Use distributed optimizer (`--use-distributed-optimizer`) to shard optimizer states across DP ranks, freeing memory for larger DP size |

#### Guideline 2: Keep EP and TP Communication Within NVLink Domain

| Aspect | Recommendation |
|--------|----------------|
| **Goal** | Ensure EP×TP fits within a single node (typically 8 GPUs) |
| **Why** | EP and TP are communication-intensive; NVLink provides much higher bandwidth than cross-node interconnects |
| **Scaling** | When scaling beyond one node, prefer PP over expanding TP/EP across nodes |

**Note:**
For very large MoE models like DeepSeek-V3, the EP communication may exceed the NVLink bandwidth. In this case, consider using 1F1B A2A Overlap to overlap the EP communication.

#### Guideline 3: Use Pipeline Parallelism (PP) for Multi-Node Scaling

| Aspect | Recommendation |
|--------|----------------|
| **Goal** | Use PP to distribute layers across nodes while keeping EP×TP within NVLink |
| **VPP** | Enable Virtual Pipeline Parallelism to reduce pipeline bubbles when `PP ≥ 2` |
| **Config** | Set `--num-layers-per-virtual-pipeline-stage` to control VPP size |

**VPP Size Tuning:**
- Valid values: all divisors of `num_layers / PP_size`
- Example: `num_layers=24, PP=4` → valid VPP sizes: `{1, 2, 3, 6}`
- Trade-off: Larger VPP = fewer bubbles but more P2P communications
- Recommendation: A middle value often gives the best balance

#### Guideline 4: Prefer EP over TP for Expert Layers

| EP Advantages | Details |
|---------------|---------|
| **Better GEMM efficiency** | Larger local matrix sizes improve GPU utilization |
| **Lower communication** | EP has less communication overhead than TP for MoE layers |
| **Simpler computation graph** | Easier to overlap communication with computation |
| **Token permutation** | When `EP = num_experts`, local token permutation is eliminated |

**Example:** For Mixtral 8x7B, `EP8×TP1` outperforms `EP4×TP2`.

#### Guideline 5: Enable Context Parallelism (CP) for Long Sequences

| Aspect | Recommendation |
|--------|----------------|
| **When to use** | Sequence length ≥ 8K tokens |
| **Key factor** | CP efficiency depends on overlapping communication with computation |
| **Config** | Set `--context-parallel-size` to partition sequences across GPUs |

### Step 3: Enable Performance Features Based on Profiling Bottlenecks

After establishing a working parallel configuration, profile your training to identify bottlenecks and apply targeted optimizations.

#### Memory Bottleneck

**Symptom**: Forced to use full recomputation or excessively large parallelism degrees to avoid OOM.

**Solutions**:
| Optimization | Overhead | Config | Reference |
|--------------|----------|--------|---------|
| Selective Recomputation | Low | `--recompute-granularity selective --recompute-modules ...` | [Fine-grained Recomputation](#fine-grained-recomputation) |
| Activation Offloading | Medium | `--fine-grained-activation-offloading --offload-modules ...` | [Fine-grained Activation Offloading](#fine-grained-activation-offloading) |
| Optimizer Offloading | Medium | `--optimizer-cpu-offload` | --- |

#### Communication Bottleneck

**Symptom**: Profiling shows significant time spent in collective operations.

**Solutions**: Identify which communication is the bottleneck and enable corresponding overlap:
| Communication Type | Overlap Config |
|--------------------|----------------|
| DP gradient reduce | `--overlap-grad-reduce` |
| DP param gather    | `--overlap-param-gather` |
| TP communication   | `--tp-comm-overlap` |
| EP All-to-All      | `--overlap-moe-expert-parallel-comm --delay-wgrad-compute` |
| PP send/recv       | Enable VPP with `--num-layers-per-virtual-pipeline-stage` |

#### CPU Overhead Bottleneck

**Symptom**: Nsight Systems timeline shows gaps between GPU kernels where CPU cannot launch kernels fast enough.

**Solutions**:
| Optimization | Config |
|--------------|--------|
| Disable Python GC | `--manual-gc --manual-gc-interval 100` |
| Enable CUDA Graphs | `--cuda-graph-impl transformer_engine --cuda-graph-scope attn moe_router moe_preprocess` |
| Reduce kernel launches | Decrease TP size or increase micro-batch size |

#### Computation Bottleneck

**Symptom**: GPU utilization is low despite no communication or CPU bottlenecks.

**Solutions**:
| Optimization | Config |
|--------------|--------|
| Enable kernel fusions | `--moe-router-fusion --moe-grouped-gemm --moe-permute-fusion` |
| Use FP8 precision | `--fp8-format e4m3 --fp8-recipe blockwise` |


## Feature Documentation

### Router and Load Balancing

Routers determine which expert(s) handle each token. A lightweight MLP scores every token and applies `softmax` or `sigmoid` to compute routing probabilities. The router then selects the top-K experts for each token.

> **Note**: The router logits is better to remain in **FP32** or **FP64** rather than BF16 by --moe-router-dtype fp32. At high expert counts, FP32 precision yields better accuracy because output hidden states of experts are multiplied by router scores and accumulated to get the final output.

#### Router Types

| Router Types | Description | Config |
|-------------|-------------|----------|
| **Top-K Router** | Standard routing with configurable K, uses softmax for probability computation | --moe-router-topk 8 |
| **Group Top-K Router** | Selects top-K expert groups, then routes experts in selected groups | --moe-router-num-groups 8 --moe-router-group-topk 4 |
| **Router score function** | Score function to calculate the probs from output logits of router | --moe-router-score-function softmax/sigmoid |

#### Load Balancing Strategies

| Strategy | Description | Config |
|----------|-------------|--------|
| **aux_loss** | Auxiliary loss for balancing expert usage on a micro-batch | `--moe-router-load-balancing-type aux_loss` |
| **seq_aux_loss** | Sequence-level auxiliary loss for balancing expert usage on each sequence| `--moe-router-load-balancing-type seq_aux_loss` |
| **global_aux_loss** | Global auxiliary loss for balancing expert usage on a global batch across all ranks | `--moe-router-load-balancing-type global_aux_loss` |
| **sinkhorn** | Optimal transport formulation for balancing expert usage | `--moe-router-load-balancing-type sinkhorn` |
| **aux loss free** | Dynamic bias-based load balancing strategy without auxiliary loss | `--moe-router-enable-expert-bias --moe-router-bias-update-rate 1e-3`|
| **none** | No load balancing | `--moe-router-load-balancing-type none` |

### Token Dispatching

After routing, tokens are **dispatched** to the GPU hosting the assigned expert. After expert computation, tokens are sent back and **combined** to restore the original sequence.

| Dispatcher | Description | Best For | Config |
|------------|-------------|----------|--------|
| **alltoall** | NCCL-based All-to-All communication for token exchange | Standard EP > 1 setups | `--moe-token-dispatcher-type alltoall` |
| **FlexDispatcher with [DeepEP](https://github.com/deepseek-ai/DeepEP) backend** | Removes redundant tokens during cross-node communication, fuses intra/inter-node communication into single kernel | Cross-node EP, fine-grained MoE (DeepSeek-V3) | `--moe-token-dispatcher-type flex --moe-flex-dispatcher-backend deepep` |
| **FlexDispatcher with [HybridEP](https://github.com/deepseek-ai/DeepEP/tree/hybrid-ep) backend** | NVIDIA's optimized dispatcher using TMA and IBGDA, fewer SMs, native MNNVL support | GB200 NVL72, Multi-Node NVLink | `--moe-token-dispatcher-type flex --moe-flex-dispatcher-backend hybridep` |
| **allgather** | Gathers all tokens to each GPU, no inter-GPU token movement | TP-only setups, small EP, large Top-K | `--moe-token-dispatcher-type allgather` |

### Upcycling
Use `--moe-use-upcycling` to enable upcycling, which loads the dense model from the `--load` directory, converts it to an MoE model at runtime, and starts training. The converted model is saved to the `--save` path before training begins. Upcycling is built on distributed checkpointing, supporting parallel modes different from existing dense checkpoints, such as arbitrary expert parallelism during upcycling.

In addition to the default upcycling strategy, we also support granular upcycling strategy which is a more state-of-the-art upcycling strategy from [our recent research work](https://arxiv.org/abs/2410.07524). For the default upcycling strategy, we duplicate the existing MLP to multiple experts, with each expert starting from a copy of the MLP. For the granular upcycling strategy, we use `--moe-upcycling-granularity` to specify how many times smaller is the expert hidden size compared with the original dense FFN hidden size. For using granular upcycling strategy, please set `--moe-upcycling-granularity` as a positive integer. If this param is set to 1, it means using the default upcycling strategy.

Note: The MoE model structure is defined through script arguments. All MoE-related arguments (such as `--num-experts`) can be customized; however, other model structure arguments must be consistent with those of the dense model. For granular upcycling strategy, the moe's FFN hidden size should be set as dense FFN hidden size divided by `--moe-upcycling-granularity`.

## Training Optimizations
MoE training faces three fundamental performance bottlenecks: **Memory Wall**, **Communication Wall**, and **Compute Efficiency Wall**. The following optimizations address each of these challenges.

### MoE Parallel Folding
**The Problem with Traditional Approaches:**
- Prior MoE frameworks constrain **EP ≤ DP** (Expert Parallelism must be a sub-group of Data Parallelism), which severely limits scalability.
- Applying the same TP/CP to both attention and MoE is suboptimal:
  - High TP benefits attention but hurts MoE (small per-expert dims make TP overhead prohibitive)
  - High CP benefits long-context attention but is unnecessary for MoE (tokens processed independently)

**MoE Parallel Folding** is Megatron Core's solution that **decouples attention and MoE parallelism**:

| Parallelism Group | Attention Layers | MoE Layers |
|-------------------|------------------|------------|
| **Dimensions** | TP × CP × DP × PP | ETP × EP × EDP × PP |

#### Key Benefits

1. **Breaks the EP ≤ DP Constraint**
   - Traditional: TP=4, CP=2, DP=8, PP=4 → max EP=8
   - With Folding: Same attention config, but MoE uses ETP=1, EP=64, EDP=1 → 8× more expert parallelism

2. **Reduces Minimum GPU Requirements**
   - Traditional CP=8, EP=8 requires at least 64 GPUs
   - With Folding: CP and EP are folded together, only 8 GPUs needed

3. **Enables Independent Optimization**
   - Use high TP for attention (memory efficiency)
   - Use ETP=1 for MoE (better GEMM efficiency, less communication)

4. **Keeps High-Bandwidth Communication in NVLink Domain**
   - Both CP and EP communication can remain within NVLink domain

> **Reference**: [MoE Parallel Folding: Heterogeneous Parallelism Mappings for Efficient Large-Scale MoE Model Training](https://arxiv.org/abs/2504.14960)

### Memory Optimization

Memory optimization is critical for large-scale MoE training, as MoE models maintain all expert parameters even though only a subset is activated per token.

| Optimization | Description | Config |
|--------------|-------------|--------|
| **Fine-grained Recomputation** | Selectively recomputes specific modules (e.g., `mla_up_proj`, `layernorm`, `moe_act`) instead of full layers | `--recompute-granularity selective --recompute-modules mla_up_proj layernorm moe_act` |
| **Fine-grained Activation Offloading** | Offloads activations to CPU memory, overlapping D2H/H2D transfers with computation | See `docs/source/api-guide/fine_grained_activation_offloading.md` |
| **Precision-aware Optimizer** | Stores optimizer states (exp_avg, exp_avg_sq) in BF16 instead of FP32, reducing optimizer memory by 50% | `--use-precision-aware-optimizer --exp-avg-dtype bf16 --exp-avg-sq-dtype bf16` |
| **Optimizer Offloading** | Offloads optimizer states to CPU memory. | `--optimizer-cpu-offload` |

#### Fine-grained Recomputation
A new output-discarding checkpointing method is also supported. This method discards the output memory of certain submodules during the forward pass and recomputes them during the backward pass, which can save memory compared to standard checkpointing. This can be enabled for specific submodules using the `--recompute-granularity selective --recompute-modules [submodule1, submodule2, ...]` argument. The supported submodules are:

* `moe_act`: Recompute the GroupedMLP activation function.
* `layernorm`: Recompute the input_layernorm and pre_mlp_layernorm (when they are not `IdentityOp`).
* `mla_up_proj`: Recompute the MLA up projection and RoPE applying parts.
* `core_attn`: Recompute the core attention submodule (uses standard checkpointing rather than output-discarding).
* `mlp`: Recompute the dense MLP submodule (uses standard checkpointing rather than output-discarding) which is useful for hybrid-models like DeepSeek-V3.
* `moe`: Recompute the MoE layer submodule (uses standard checkpointing rather than output-discarding).

#### Fine-grained Activation Offloading

Unlike recomputation (which trades compute for memory), offloading trades **GPU-CPU bandwidth for memory**: activations are transferred to CPU during forward pass and retrieved during backward pass. The key is hiding transfer latency behind computation using asynchronous D2H/H2D transfers.

**Key Features:**
- **Module-level granularity**: Target specific modules rather than entire layers
- **Computation-offloading overlap**: Asynchronous transfers via independent CUDA streams
- **Compatible with PP/VPP**: Works with pipeline parallelism and fine-grained recomputation

**Usage**
```bash
--fine-grained-activation-offloading
--offload-modules expert_fc1 moe_act # Choices: attn_norm, core_attn, attn_proj, mlp_norm, expert_fc1, moe_act
```

For more details, see `docs/source/api-guide/fine_grained_activation_offloading.md`

### Communication Optimization

Distributed training introduces communication overhead from various parallelism strategies. Megatron Core supports overlapping communication with computation to hide latency and improve throughput.

#### Data Parallel (DP) Communication Overlap

With distributed optimizer, DP introduces **reduce-scatter** (gradients) and **all-gather** (parameters) communications, chunked by Transformer layer granularity.

| Optimization | Description | Config |
|--------------|-------------|--------|
| **Gradient Reduce Overlap** | Overlaps gradient reduce-scatter with backward computation | `--overlap-grad-reduce` |
| **Param Gather Overlap** | Overlaps parameter all-gather with forward computation | `--overlap-param-gather` |
| **BF16 Gradient Reduce** | Reduces gradients in BF16 instead of FP32 for better performance | `--grad-reduce-in-fp32 false` (via mixed precision config) |
| **FP8 Param Gather** | Conducts parameter all-gather in FP8, reducing overhead by 50% | `--fp8-param-gather` |

#### Tensor Parallel (TP) Communication Overlap

TP with sequence parallelism introduces activation all-gather and reduce-scatter operations. Communications are overlapped in **bulk** (no dependency) or **pipelined** (with dependency) fashion.

| Optimization | Description | Config |
|--------------|-------------|--------|
| **TP Comm Overlap** | Enables bulk and pipelined TP communication overlap | `--tp-comm-overlap` |

> **Requirements**: `tensor_model_parallel_size >= 2` and `--sequence-parallel`

#### Pipeline Parallel (PP) Communication Overlap

PP introduces P2P activation sends/receives between pipeline stages. Overlap is automatic in the 1F1B pipelining phase when VPP is enabled.

| Optimization | Description | Config |
|--------------|-------------|--------|
| **P2P Comm Overlap** | Overlaps PP P2P communications with non-dependent computations | `--overlap-p2p-comm` (auto-enabled with VPP) |
| **VPP for Better Overlap** | Increases overlap opportunities by reducing layers per virtual stage | `--num-layers-per-virtual-pipeline-stage` |

#### Expert Parallel (EP) Communication Overlap

EP All-to-All can consume 30-40% of training time without optimization. These features hide or reduce EP communication overhead.

| Optimization | Description | Config |
|--------------|-------------|--------|
| **EP A2A Overlap** | Overlaps All-to-All with computation by merging FWD-BWD passes of adjacent microbatches | `--overlap-moe-expert-parallel-comm --delay-wgrad-compute` |
| **Shared Expert Overlap** | Runs shared expert computation concurrently with EP token transfer | `--moe-shared-expert-overlap` |

> **Requirements for EP A2A Overlap**: `expert_model_parallel_size > 1`, CUDA_DEVICE_MAX_CONNECTIONS > 1.

### Compute Optimization

Fine-grained MoE produces many small operations that can underutilize GPU resources. These optimizations reduce kernel launch overhead and improve GPU utilization.

| Optimization | Description | Config |
|--------------|-------------|--------|
| **Grouped GEMM** | Batches multiple expert GEMM operations into a single kernel call, improving GPU utilization | `--moe-grouped-gemm` |
| **Router Fusion** | Fuses router projection, top-k selection, softmax, and auxiliary loss into fewer kernels | `--moe-router-fusion` |
| **Permute Fusion** | Fuses token permutation/unpermutation operations into optimized single kernels | `--moe-permute-fusion` |
| **FP8 Training** | Uses FP8 Tensor Core operations for faster GEMMs on Hopper/Blackwell GPUs | `--fp8 --fp8-recipe blockwise` |


### FP8 Training

FP8 training provides benefits across all three performance walls:

| Wall | FP8 Benefit | Impact |
|------|-------------|--------|
| **Memory** | 50% activation reduction | Stores linear layer inputs in FP8 instead of BF16 |
| **Memory** | Eliminate BF16 weight copies | Native FP8 casts directly from FP32 to FP8 |
| **Communication** | 50% EP dispatch volume | Dispatches tokens in FP8 instead of BF16 |
| **Communication** | 50% parameter all-gather | With FP8 primary weights (except MXFP8) |
| **Compute** | Faster Tensor Core GEMMs | FP8 ops on Hopper/Blackwell are faster than BF16 |

#### FP8 Recipes

| Recipe | Scaling Granularity | Format | Platform | Use Case |
|--------|---------------------|--------|----------|----------|
| **Per-tensor** | Whole tensor | E4M3/E5M2 hybrid | Hopper, Blackwell | Conservative, initial experimentation |
| **Blockwise** | 1×128 (activations), 128×128 (weights) | E4M3 | Hopper | **Production-proven** (DeepSeek-V3, Minimax-M2) |
| **MXFP8** | 1×32 | E4M3 + E8M0 scaling | Blackwell | Native hardware support on GB200 |

> **Recommendation**: Use **blockwise FP8** on Hopper for production training. It has been validated at scale on DeepSeek-V3 class models.

#### MoE-Specific FP8 Optimizations

| Optimization | Description | Config |
|--------------|-------------|--------|
| **Routing Map Padding** | Pads routing map (not tokens) to align M dimension to 16/32, avoiding per-tensor padding overhead | `--moe-router-padding-for-fp8` |
| **FP8 Primary Weights** | Casts FP32 master weights directly to FP8, eliminating BF16 intermediate copy | `--fp8-param-gather` (Need additional `--reuse-grad-buf-for-mxfp8-param-ag` for MXFP8) |


#### Example Configuration

```bash
# Blockwise FP8 on Hopper (recommended for production)
--fp8-format e4m3
--fp8-recipe blockwise
--fp8-param-gather
--moe-router-padding-for-fp8

# MXFP8 on Blackwell
--fp8-format e4m3
--fp8-recipe mxfp8
--moe-router-padding-for-fp8
--fp8-param-gather 
--reuse-grad-buf-for-mxfp8-param-ag
```

> **Note**: For blockwise and MXFP8 recipes with current scaling, training loss curves show negligible difference compared to BF16 baselines.


### CUDA Graph
CUDA Graph functionality can be enabled through the `--cuda-graph-impl` option. There are two implementations:

1. `--cuda-graph-impl=local`: Captures cuda graphs using the MCore-internal cuda graph manager.
2. `--cuda-graph-impl=transformer_engine`: Captures cuda graphs using the TE `make_graphed_callables()` interface.

To use `--cuda-graph-impl=transformer_engine`, the user should call related methods `TECudaGraphHelper.create_cudagraphs()` and `TECudaGraphHelper.cuda_graph_set_manual_hooks()` in the training script. Please refer to the usage in `megatron/training/training.py`.

For MoE models, certain configurations may prevent CUDA Graph capture of MoE layers. Specifically, when `--moe-expert-capacity-factor` and `--moe-pad-expert-input-to-capacity` are not set, the resulting dynamic shapes make MoE layers uncapturable. In such cases, you can still leverage CUDA Graphs for the attention layers (operations in `TransformerLayer._forward_attention()`) by setting `--cuda-graph-scope=attn`, while leaving the MoE layers (operations in `TransformerLayer._forward_mlp()`) unmodified. See the argument description for more usage of `--cuda-graph-scope`.

## MoE Arguments Reference
### Core Arguments
| Argument | Description | Default |
|----------|-------------|---------|
| --num-experts | Number of Experts in MoE | None |
| --expert-model-parallel-size | Degree of expert model parallelism | 1 |
| --moe-ffn-hidden-size | MoE FFN hidden size | FFN hidden size of the dense model |
| --expert-tensor-parallel-size | Expert layer tensor parallelism | Same as TP(Recommeded to set to 1 for fine-grained MoE models) |
| --moe-layer-freq | MoE layer frequency pattern | 1 |

### Router Arguments
| Argument | Description | Default |
|----------|-------------|---------|
| --moe-router-load-balancing-type | Load balancing: aux_loss, sinkhorn, seq_aux_loss, none | aux_loss |
| --moe-router-topk | Number of experts per token | 2 |
| --moe-router-score-function | Score function: softmax, sigmoid | softmax |
| --moe-router-pre-softmax | Softmax before top-k | False |
| --moe-router-num-groups | Groups for group-limited routing | None |
| --moe-router-group-topk | Selected groups in group-limited routing | None |
| --moe-router-enable-expert-bias | Dynamic per-expert bias | False |
| --moe-router-bias-update-rate | Bias update rate | 1e-3 |
| --moe-router-fusion | Enable router fusion | False |
| --moe-router-dtype | Router precision: fp32, fp64 | None |
| --moe-router-padding-for-fp8 | Pad for FP8 alignment | False |

### Loss and Regularization
| Argument | Description | Default |
|----------|-------------|---------|
| --moe-aux-loss-coeff | Auxiliary loss coefficient | 0.0 |
| --moe-z-loss-coeff | Z-loss coefficient | None |
| --moe-input-jitter-eps | Input jitter epsilon | None |

### Token Dispatching
| Argument | Description | Default |
|----------|-------------|---------|
| --moe-token-dispatcher-type | Dispatcher: allgather, alltoall, flex | allgather |
| --moe-enable-deepep | Enable DeepEP (with flex) | False |
| --moe-expert-capacity-factor | Capacity factor | None |
| --moe-pad-expert-input-to-capacity | Pad to capacity | False |
| --moe-token-drop-policy | Drop policy: probs, position | probs |
| --moe-permute-fusion | Fuse permutation ops | False |

### Performance Optimization
| Argument | Description | Default |
|----------|-------------|---------|
| --moe-grouped-gemm | Use GroupedGEMM | False |
| --overlap-moe-expert-parallel-comm | Batch-level EP overlap | False |
| --delay-wgrad-compute | Split dgrad/wgrad compute | False |
| --moe-shared-expert-intermediate-size | Shared expert FFN size | None |
| --moe-shared-expert-overlap | Overlap shared expert | False |

### Memory and Checkpointing
| Argument | Description | Default |
|----------|-------------|---------|
| --moe-layer-recompute | Recompute MoE layer | False |
| --moe-use-upcycling | Enable upcycling | False |
| --moe-upcycling-granularity | Upcycling granularity | 1 |

### Miscellaneous
| Argument | Description | Default |
|----------|-------------|---------|
| --moe-per-layer-logging | Per-layer logging | False |
| --moe-router-force-load-balancing | Force load balancing (experimental) | False |

## Examples
```bash
#!/bin/bash

# Runs Mixtral 8x7B model on 32 H100/A100 GPUs

export CUDA_DEVICE_MAX_CONNECTIONS=1

GPUS_PER_NODE=8
MASTER_ADDR=${MASTER_ADDR:-"localhost"}
MASTER_PORT=${MASTER_PORT:-"6000"}
NNODES=${NNODES:-"4"}
NODE_RANK=${RANK:-"0"}
WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES))

CHECKPOINT_PATH=$1
TOKENIZER_MODEL=$2
DATA_PATH=$3

DISTRIBUTED_ARGS=(
    --nproc_per_node $GPUS_PER_NODE
    --nnodes $NNODES
    --node_rank $NODE_RANK
    --master_addr $MASTER_ADDR
    --master_port $MASTER_PORT
)

MODEL_ARGS=(
    --disable-bias-linear
    --seq-length 4096
    --max-position-embeddings 32768
    --num-layers 32
    --hidden-size 4096
    --ffn-hidden-size 14336
    --num-attention-heads 32
    --init-method-std 0.01
    --attention-dropout 0.0
    --hidden-dropout 0.0
    --normalization RMSNorm
    --position-embedding-type rope
    --swiglu
    --untie-embeddings-and-output-weights
    --group-query-attention
    --num-query-groups 8
    --no-masked-softmax-fusion
    --no-position-embedding
)

MOE_ARGS=(
    --num-experts 8
    --expert-model-parallel-size 8
    --moe-router-load-balancing-type aux_loss
    --moe-router-topk 2
    --moe-aux-loss-coeff 1e-2
    --moe-grouped-gemm
    --moe-permute-fusion
    --moe-token-dispatcher-type alltoall
)

DATA_ARGS=(
    --tokenizer-type Llama2Tokenizer
    --tokenizer-model ${TOKENIZER_MODEL}
    --data-path $DATA_PATH
    --split 99990,8,2
)

TRAINING_ARGS=(
    --micro-batch-size 1
    --global-batch-size 128
    --lr 1e-4
    --train-iters 500000
    --lr-decay-iters 320000
    --lr-decay-style cosine
    --min-lr 1.0e-5
    --weight-decay 0.1
    --lr-warmup-iters 500
    --clip-grad 1.0
    --bf16
    --overlap-grad-reduce
    --overlap-param-gather
)

MODEL_PARALLEL_ARGS=(
    --tensor-model-parallel-size 1
    --pipeline-model-parallel-size 4
    --num-layers-per-virtual-pipeline-stage 8
    --sequence-parallel
    --use-distributed-optimizer
)

LOGGING_ARGS=(
    --log-interval 1
    --save-interval 10000
    --eval-interval 1000
    --eval-iters 10
    --save $CHECKPOINT_PATH
    --load $CHECKPOINT_PATH
    --tensorboard-dir "${CHECKPOINT_PATH}/tensorboard"
    --ckpt-format torch_dist
    --auto-detect-ckpt-format
)

torchrun ${DISTRIBUTED_ARGS[@]} pretrain_gpt.py \
    ${MODEL_ARGS[@]} \
    ${MOE_ARGS[@]} \
    ${DATA_ARGS[@]} \
    ${TRAINING_ARGS[@]} \
    ${MODEL_PARALLEL_ARGS[@]} \
    ${LOGGING_ARGS[@]}
```

</details>

## Contributing

We welcome contributions! Please see [CONTRIBUTING.md](https://github.com/NVIDIA/Megatron-LM/blob/main/CONTRIBUTING.md) for guidelines.

## Support

- GitHub Issues: [Report bugs or request features](https://github.com/NVIDIA/Megatron-LM/issues)
- Documentation: [Full documentation](https://docs.nvidia.com/megatron-core/developer-guide/latest/index.html)


## Citation

If you use Megatron-Core MoE in your research, please cite:

```bibtex

@article{megatron-lm,
  title={Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism},
  author={Shoeybi, Mohammad and Patwary, Mostofa and Puri, Raul and LeGresley, Patrick and Casper, Jared and Catanzaro, Bryan},
  journal={arXiv preprint arXiv:1909.08053},
  year={2019}
}

@article{moe-parallel-folding,
    title={MoE Parallel Folding: Heterogeneous Parallelism Mappings for Efficient Large-Scale MoE Model Training with Megatron Core}, 
    author={Liu, Dennis and Yan, Zijie and Yao, Xin and Liu, Tong and Korthikanti, Vijay and Wu, Evan and Fan, Shiqing and Deng, Gao and Bai, Hongxiao and Chang, Jianbin and Aithal, Ashwath and Andersch, Michael and Shoeybi, Mohammad and Yao, Jiajie and Zhou, Chandler and Wu, David and Li, Xipeng and Yang, June},
    year={2025},
    journal={arXiv preprint arXiv:2504.14960},
}
```


================================================
FILE: megatron/core/transformer/moe/__init__.py
================================================


================================================
FILE: megatron/core/transformer/moe/experts.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
from __future__ import annotations

import logging
from collections.abc import Callable
from copy import deepcopy
from dataclasses import dataclass
from math import ceil
from typing import Optional, Protocol, Tuple

import torch
import torch.nn.functional as F

from megatron.core import tensor_parallel
from megatron.core.activations import squared_relu
from megatron.core.dist_checkpointing.mapping import ShardedStateDict
from megatron.core.dist_checkpointing.utils import replace_prefix_for_sharding
from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.fusions.fused_bias_geglu import quick_gelu, weighted_bias_quick_geglu_impl
from megatron.core.fusions.fused_bias_swiglu import weighted_bias_swiglu_impl
from megatron.core.fusions.fused_weighted_squared_relu import weighted_squared_relu_impl
from megatron.core.inference.quantization.mxfp8_tensor import MXFP8Tensor
from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
    FineGrainedActivationOffloadingInterface as off_interface,
)
from megatron.core.transformer.mlp import (
    MLP,
    MLPSubmodules,
    TEActivationFunctionBuilder,
    apply_swiglu_sharded_factory,
)
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.moe.moe_utils import (
    ProcessGroupCollection,
    get_align_size_for_quantization,
)
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.utils import (
    ensure_metadata_has_dp_cp_group,
    sharded_state_dict_default,
)
from megatron.core.typed_torch import apply_module, not_none

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import Fp8Padding, Fp8Unpadding
else:
    Fp8Padding, Fp8Unpadding = None, None

try:
    import flashinfer.fused_moe as fused_moe
    from flashinfer.fused_moe.core import ActivationType

    HAVE_FLASHINFER = True
except ImportError:
    HAVE_FLASHINFER = False

from megatron.core.inference.moe import ActivationType as McoreActivationType
from megatron.core.inference.moe import (
    InferenceGroupedGemmBackend,
    mcore_fused_moe,
    resolve_inference_grouped_gemm_backend,
)

logger = logging.getLogger(__name__)


class GroupedLinearFc1Interface(Protocol):
    """Interface for linear_fc1 module in TEGroupedMLP."""

    def forward(
        self, permuted_local_hidden_states: torch.Tensor, tokens_per_expert: list[int], /
    ) -> tuple[torch.Tensor, torch.Tensor | None]:
        """Forward method for linear_fc1 module."""
        ...

    def backward_dw(self) -> None:
        """Backward method for linear_fc1 module."""
        ...


class GroupedLinearFc1Builder(Protocol):
    """Protocol describing how to build a linear_fc1 layer in TEGroupedMLP."""

    def __call__(
        self,
        num_local_experts: int,
        input_size: int,
        output_size: int,
        /,
        *,
        config: TransformerConfig,
        init_method: Callable[[torch.Tensor], None],
        bias: bool,
        skip_bias_add: bool,
        is_expert: bool,
        tp_comm_buffer_name: str | None,
        pg_collection: ProcessGroupCollection | None,
    ) -> GroupedLinearFc1Interface:
        """Builds a linear_fc1 layer for TEGroupedMLP."""
        ...


class GroupedLinearFc2Interface(Protocol):
    """Protocol for linear_fc2 module in TEGroupedMLP."""

    def forward(
        self, intermediate_parallel: torch.Tensor, tokens_per_expert: list[int], /
    ) -> tuple[torch.Tensor, torch.Tensor | None]:
        """Forward method for linear_fc2 module."""
        ...

    def backward_dw(self) -> None:
        """Backward method for linear_fc2 module."""
        ...


class GroupedLinearFc2Builder(Protocol):
    """Protocol describing how to build a linear_fc2 layer in TEGroupedMLP."""

    def __call__(
        self,
        num_local_experts: int,
        input_size: int,
        output_size: int,
        /,
        *,
        config: TransformerConfig,
        init_method: Callable[[torch.Tensor], None],
        bias: bool,
        skip_bias_add: bool,
        is_expert: bool,
        tp_comm_buffer_name: str | None,
        pg_collection: ProcessGroupCollection | None,
    ) -> GroupedLinearFc2Interface:
        """Builds a linear_fc2 layer for TEGroupedMLP."""
        ...


@dataclass
class TEGroupedMLPSubmodules:
    """
    The dataclass for ModuleSpecs of TEGroupedMLP submodules
    including  linear fc1, activation function, linear fc2.
    """

    linear_fc1: GroupedLinearFc1Builder

    linear_fc2: GroupedLinearFc2Builder

    activation_func: TEActivationFunctionBuilder | None = None
    """
    Builder for an activation function module; only used if config.use_te_activation_func is True.
    """


class TEGroupedMLP(MegatronModule):
    """An efficient implementation of the Experts layer using TE's GroupedLinear.

    Executes multiple experts in parallel to maximize computational efficiency.
    """

    # TODO(M4): breaking api, switched from pass in tp_group to pass in pg_collection.
    def __init__(
        self,
        num_local_experts: int,
        config: TransformerConfig,
        submodules: TEGroupedMLPSubmodules,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ):
        super().__init__(config=config)
        self.num_local_experts = num_local_experts
        self.input_size = self.config.hidden_size
        assert not (
            self.config.add_bias_linear and config.bias_dropout_fusion
        ), "bias_dropout_fusion is not supported in TEGroupedMLP when add_bias_linear=True"

        self.ep_group = pg_collection.ep
        self.tp_group = pg_collection.expt_tp

        # Double the output width with gated linear unit, see https://arxiv.org/pdf/2002.05202.pdf
        ffn_hidden_size = not_none(self.config.moe_ffn_hidden_size)
        if self.config.gated_linear_unit:
            ffn_hidden_size *= 2

        self.linear_fc1 = submodules.linear_fc1(
            self.num_local_experts,
            self.input_size if self.config.moe_latent_size is None else self.config.moe_latent_size,
            ffn_hidden_size,
            config=self.config,
            init_method=not_none(self.config.init_method),
            bias=self.config.add_bias_linear,
            skip_bias_add=False,
            is_expert=True,
            tp_comm_buffer_name='fc1',
            pg_collection=pg_collection,
        )

        if self.config.use_te_activation_func and not (submodules.activation_func is None):
            self.activation_func = apply_module(submodules.activation_func(config=self.config))
        else:
            self.activation_func = self.config.activation_func

        self.linear_fc2 = submodules.linear_fc2(
            self.num_local_experts,
            not_none(self.config.moe_ffn_hidden_size),
            (
                self.config.hidden_size
                if self.config.moe_latent_size is None
                else self.config.moe_latent_size
            ),
            config=self.config,
            init_method=not_none(self.config.output_layer_init_method),
            bias=self.config.add_bias_linear,
            skip_bias_add=True,
            is_expert=True,
            tp_comm_buffer_name='fc2',
            pg_collection=pg_collection,
        )

        self.offload_expert_fc1 = (
            self.config.fine_grained_activation_offloading
            and "expert_fc1" in self.config.offload_modules
        )

        self.offload_moe_act = (
            self.config.fine_grained_activation_offloading
            and "moe_act" in self.config.offload_modules
        )

        self.activation_recompute = (
            self.config.recompute_granularity == 'selective'
            and "moe_act" in self.config.recompute_modules
        )
        if self.activation_recompute and (self.config.fp8 or self.config.fp4):
            from megatron.core.extensions.transformer_engine import set_save_original_input

            set_save_original_input(self.linear_fc2)

        # This is to avoid the CPU overhead of multiple d2h copies
        if self.offload_expert_fc1:
            from megatron.core.extensions.transformer_engine import set_save_original_input

            set_save_original_input(self.linear_fc1)

        if self.config.fp8 or self.config.fp4:
            assert HAVE_TE, "FP8 and FP4 requires TE."
            self.quantization_padding = Fp8Padding(self.num_local_experts)
            self.quantization_unpadding = Fp8Unpadding(self.num_local_experts)

    @staticmethod
    def _apply_bias(intermediate_parallel, bias_parallel, tokens_per_expert, permuted_probs):
        if bias_parallel is None:
            return intermediate_parallel
        shape = intermediate_parallel.shape
        return (
            torch.cat(
                [
                    t + b * p
                    for t, b, p in zip(
                        torch.split(intermediate_parallel.view(-1, shape[-1]), tokens_per_expert),
                        bias_parallel,
                        torch.split(permuted_probs, tokens_per_expert),
                    )
                ]
            )
            .view(shape)
            .to(intermediate_parallel.dtype)
        )

    def bias_act_func(self, intermediate_parallel, bias_parallel, permuted_probs):
        """
        Applies bias and activation function to the output of linear_fc1.
        """
        if self.config.use_te_activation_func:
            if bias_parallel is not None:
                intermediate_parallel = intermediate_parallel + bias_parallel
            intermediate_parallel = self.activation_func(intermediate_parallel)
            if permuted_probs is not None:
                original_dtype = intermediate_parallel.dtype
                intermediate_parallel = intermediate_parallel * permuted_probs
                intermediate_parallel = intermediate_parallel.to(original_dtype)
        elif self.config.bias_activation_fusion:
            if self.activation_func == F.silu and self.config.gated_linear_unit:
                # dtype is handled inside the fused kernel
                intermediate_parallel = weighted_bias_swiglu_impl(
                    intermediate_parallel,
                    bias_parallel,
                    permuted_probs,
                    self.config.activation_func_fp8_input_store,
                )
            elif self.activation_func == quick_gelu and self.config.gated_linear_unit:
                intermediate_parallel = weighted_bias_quick_geglu_impl(
                    intermediate_parallel,
                    bias_parallel,
                    permuted_probs,
                    self.config.activation_func_fp8_input_store,
                    self.config.glu_linear_offset,
                    self.config.activation_func_clamp_value,
                )
            else:
                raise ValueError("Only support fusion of swiglu and quick_gelu in TEGroupedMLP.")
        elif self.activation_func == squared_relu and self.config.use_fused_weighted_squared_relu:
            assert bias_parallel is None, "Bias is not supported with fused weighted squared relu."
            intermediate_parallel = weighted_squared_relu_impl(
                intermediate_parallel, permuted_probs
            )
        else:
            if self.config.gated_linear_unit:

                def glu(x):
                    x_glu, x_linear = torch.chunk(x, 2, dim=-1)
                    if (val := self.config.activation_func_clamp_value) is not None:
                        x_glu = x_glu.clamp(min=None, max=val)
                        x_linear = x_linear.clamp(min=-val, max=val)
                    return self.config.activation_func(x_glu) * (
                        x_linear + self.config.glu_linear_offset
                    )

                intermediate_parallel = glu(intermediate_parallel)
            else:
                intermediate_parallel = self.activation_func(intermediate_parallel)
            original_dtype = intermediate_parallel.dtype
            intermediate_parallel = intermediate_parallel * permuted_probs
            intermediate_parallel = intermediate_parallel.to(original_dtype)
        return intermediate_parallel

    def forward(
        self,
        permuted_local_hidden_states: torch.Tensor,
        tokens_per_expert: torch.Tensor,
        permuted_probs: torch.Tensor,
    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
        """Forward of TEGroupedMLP

        Args:
            permuted_local_hidden_states (torch.Tensor): The permuted input hidden states of the
            local experts.
            tokens_per_expert (torch.Tensor): The number of tokens per expert.
            permuted_probs (torch.Tensor): The permuted probs of each token produced by the router.

        Return:
            output (torch.Tensor): The output of the local experts.
        """
        tokens_per_expert: list[int] = tokens_per_expert.tolist()
        if self.config.fp8 or self.config.fp4:
            actual_tokens_per_expert = tokens_per_expert
            permuted_local_hidden_states, tokens_per_expert = self.quantization_padding(
                permuted_local_hidden_states, tokens_per_expert
            )
            permuted_probs, _ = self.quantization_padding(
                permuted_probs.unsqueeze(-1), actual_tokens_per_expert
            )
        else:
            permuted_probs = permuted_probs.unsqueeze(-1)

        if self.config.moe_apply_probs_on_input:
            assert (
                self.config.moe_router_topk == 1
            ), "`moe_apply_probs_on_input` only works with `moe_router_topk`=1."
            original_dtype = permuted_local_hidden_states.dtype
            permuted_local_hidden_states = permuted_probs * permuted_local_hidden_states
            permuted_local_hidden_states = permuted_local_hidden_states.to(original_dtype)
            # Probs already applied, so reset to 1.
            permuted_probs = torch.ones_like(permuted_probs)

        with off_interface(
            self.offload_expert_fc1, permuted_local_hidden_states, "expert_fc1"
        ) as permuted_local_hidden_states:
            fc1_output, bias_parallel = apply_module(self.linear_fc1)(
                permuted_local_hidden_states, tokens_per_expert
            )
        if self.offload_expert_fc1:
            fc1_output = off_interface.group_commit(
                fc1_output,
                name="expert_fc1",
                forced_released_tensors=[permuted_local_hidden_states],
            )

        if self.activation_recompute:
            self.activation_checkpoint = tensor_parallel.CheckpointWithoutOutput()
            with off_interface(self.offload_moe_act, fc1_output, "moe_act") as fc1_output:
                bias_act_output = self.activation_checkpoint.checkpoint(
                    self.bias_act_func, fc1_output, bias_parallel, permuted_probs
                )
        else:
            with off_interface(self.offload_moe_act, fc1_output, "moe_act") as fc1_output:
                bias_act_output = self.bias_act_func(fc1_output, bias_parallel, permuted_probs)
        output, output_bias = apply_module(self.linear_fc2)(bias_act_output, tokens_per_expert)
        if self.activation_recompute:
            self.activation_checkpoint.discard_output_and_register_recompute(output)

        # Delay the offload of the moe act until after the linear_fc2 has been computed
        # to make sure the fc1_output is reloaded to GPU before recomputing moe_act.
        if self.offload_moe_act:
            output = off_interface.group_commit(
                output, name="moe_act", forced_released_tensors=[fc1_output]
            )
        output = self._apply_bias(output, output_bias, tokens_per_expert, permuted_probs)

        # upad and concat the output
        if self.config.fp8 or self.config.fp4:
            output = self.quantization_unpadding(output, actual_tokens_per_expert)

        output_bias = None

        return output, output_bias

    def sharded_state_dict(
        self, prefix: str = '', sharded_offsets: tuple = (), metadata: Optional[dict] = None
    ) -> ShardedStateDict:
        """
        Maps local expert to global experts.
        The sharded state dict is interchangable with SequentialMLP's.
        """
        # Guard for cases metadata is not provided
        metadata = ensure_metadata_has_dp_cp_group(metadata)
        singleton_local_shards = (metadata or {}).get('singleton_local_shards', False)
        sharded_state_dict = {}
        for name, module in self._modules.items():
            sub_sd = sharded_state_dict_default(
                module, f'{name}.', sharded_offsets, metadata, tp_group=self.tp_group
            )
            if name == 'linear_fc1' and self.config.gated_linear_unit:
                num_global_experts = self.ep_group.size() * self.num_local_experts
                local_expert_indices_offset = self.ep_group.rank() * self.num_local_experts
                ep_axis = len(sharded_offsets)
                for i in range(self.num_local_experts):
                    if singleton_local_shards:
                        new_sharded_offsets = sharded_offsets
                    else:
                        new_sharded_offsets = (
                            *sharded_offsets,
                            (ep_axis, local_expert_indices_offset + i, num_global_experts),
                        )
                    for k in (f'{name}.weight{i}', f'{name}.bias{i}'):
                        if k in sub_sd:
                            sub_sd[k] = apply_swiglu_sharded_factory(
                                sub_sd[k], new_sharded_offsets, singleton_local_shards
                            )
            if singleton_local_shards:
                replace_prefix_for_sharding(sub_sd, '', f'{prefix}experts.')
            else:
                # Add prefix here to match sequential's keys
                replace_prefix_for_sharding(sub_sd, f'{name}.', f'{prefix}experts.{name}.')
            sharded_state_dict.update({f"{prefix}{k}": v for k, v in sub_sd.items()})
        return sharded_state_dict

    def backward_dw(self):
        """Performs backward pass for weight gradients in TEGroupedMLP.

        This method executes the backward pass for weight gradients by calling
        backward_dw() on the linear layers in reverse order (fc2 followed by fc1).
        If an error occurs during execution, it is caught and re-raised with a
        descriptive message.
        """
        self.linear_fc2.backward_dw()
        self.linear_fc1.backward_dw()


class InferenceGroupedMLP(TEGroupedMLP):
    """Inference-optimized GroupedMLP with GPU-resident offsets.

    Inherits from TEGroupedMLP to reuse weight initialization and checkpoint compatibility.
    Supports three forward paths:
    - Training: delegates to parent TEGroupedMLP
    - Inference + CUDA graphed: FlashInfer cutlass_fused_moe (fused permute + GEMM)
    - Inference + eager: torch.nn.functional.grouped_mm with GPU-resident cumsum offsets
    """

    def __init__(
        self,
        num_local_experts: int,
        config: TransformerConfig,
        submodules: MLPSubmodules,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ):
        # Initialize parent TEGroupedMLP (creates linear_fc1, linear_fc2)
        super().__init__(
            num_local_experts=num_local_experts,
            config=config,
            submodules=submodules,
            pg_collection=pg_collection,
        )

        # Concatenated weights are built lazily on first forward to ensure
        # checkpoint loading has already populated the per-expert parameters.
        self._concatenated_weights_built = False

        self.is_inference_cuda_graphed_iteration = False

        if HAVE_FLASHINFER:
            self._flashinfer_activation_type = self._resolve_flashinfer_activation_type()

        self._mcore_activation_type = self._resolve_mcore_activation_type()
        self.inference_grouped_gemm_backend = config.inference_grouped_gemm_backend

    def _resolve_flashinfer_activation_type(self):
        """Map megatron activation config to FlashInfer ActivationType."""
        assert (
            HAVE_FLASHINFER
        ), "flashinfer-python is required to resolve FlashInfer activation type."
        func = self.config.activation_func
        if func == F.silu:
            return ActivationType.Silu
        elif func == F.gelu:
            return ActivationType.Gelu
        elif func == F.relu:
            return ActivationType.Relu
        elif func == squared_relu:
            return ActivationType.Relu2
        raise ValueError(f"No FlashInfer ActivationType mapping for activation_func={func}")

    def _resolve_mcore_activation_type(self):
        """Map megatron activation config to mcore_fused_moe ActivationType."""
        func = self.config.activation_func
        if func == squared_relu:
            return McoreActivationType.SQUARED_RELU
        raise ValueError(f"No mcore_fused_moe ActivationType mapping for activation_func={func}")

    def set_inference_cuda_graphed_iteration(self):
        """Enable CUDA-graphed iteration mode."""
        self.is_inference_cuda_graphed_iteration = True

    def unset_inference_cuda_graphed_iteration(self):
        """Disable CUDA-graphed iteration mode."""
        self.is_inference_cuda_graphed_iteration = False

    def _build_concatenated_mxfp8_weights(self):
        """Build stacked MXFP8 weight tensors from per-expert MXFP8Tensor attributes.

        After quantize_model_to_mxfp8, each per-expert weight (weight0, weight1, ...)
        has been replaced with an MXFP8Tensor. This method stacks their data and
        scales into _fc1_weight / _fc2_weight for scaled_grouped_mm.

        Note: this creates a contiguous copy since per-expert MXFP8Tensor attributes
        are not contiguous across experts. This is a one-time cost at first forward.

        Unlike _build_concatenated_weights, this does not create nn.Parameter views
        back into the buffer — MXFP8 weights are not nn.Parameters (they are plain
        MXFP8Tensor attributes set by quantize_model_to_mxfp8). This path is only
        intended for non-colocated inference.
        """

        for linear_name, buf_name in [('linear_fc1', '_fc1_weight'), ('linear_fc2', '_fc2_weight')]:
            linear = getattr(self, linear_name)
            q_list, s_list = [], []
            for i in range(self.num_local_experts):
                w = getattr(linear, f'weight{i}')
                if isinstance(w, MXFP8Tensor):
                    mxfp8 = w
                elif hasattr(w, 'data') and isinstance(w.data, MXFP8Tensor):
                    mxfp8 = w.data
                else:
                    raise RuntimeError(
                        f"Expected MXFP8Tensor for {linear_name}.weight{i}, "
                        f"got {type(w).__name__}. Was quantize_model_to_mxfp8 called?"
                    )
                q_list.append(mxfp8.data)
                s_list.append(mxfp8.scale)

            setattr(
                self,
                buf_name,
                MXFP8Tensor(
                    data=torch.stack(q_list, dim=0).contiguous(),
                    scale=torch.stack(s_list, dim=0).contiguous(),
                ),
            )

    @torch.inference_mode(False)  # needed for non-colocated inference.
    def _build_concatenated_weights(self):
        """Create big contiguous weight tensors that share storage with TE's per-expert parameters.

        Creates _fc1_weight and _fc2_weight as contiguous tensors of shape
        [num_experts, out_features, in_features]. Instead of replacing TE's parameters
        (which breaks TE's internal bookkeeping), we redirect each parameter's .data
        to be a view into the contiguous buffer. The nn.Parameter objects themselves
        remain untouched in TE's module, preserving FP8 scaling state, etc.

        This allows:
        - TE's forward to work correctly (same Parameter objects, same internal state)
        - Training updates to flow through (param.data is a view into the big tensor)
        - torch.nn.functional.grouped_mm / FlashInfer to use the big tensor directly
        """
        # Get device/dtype from existing TE weights
        device = self.linear_fc1.weight0.device
        dtype = self.linear_fc1.weight0.dtype

        fc1_shape = self.linear_fc1.weight0.shape  # [out_features, in_features]
        fc2_shape = self.linear_fc2.weight0.shape

        # Create big contiguous tensors
        _fc1_weight = torch.empty(self.num_local_experts, *fc1_shape, device=device, dtype=dtype)
        _fc2_weight = torch.empty(self.num_local_experts, *fc2_shape, device=device, dtype=dtype)

        # Copy existing TE weights into big tensors, then point param.data to the views
        for i in range(self.num_local_experts):
            fc1_param = getattr(self.linear_fc1, f'weight{i}')
            fc2_param = getattr(self.linear_fc2, f'weight{i}')

            # Copy initialized data into contiguous buffer
            _fc1_weight[i].copy_(fc1_param.data)
            _fc2_weight[i].copy_(fc2_param.data)

            # Redirect param.data to view into contiguous buffer.
            # The nn.Parameter object stays the same — TE's internal state is preserved.
            fc1_param.data = _fc1_weight[i]
            fc2_param.data = _fc2_weight[i]

        # Register big tensors as non-persistent buffers (for .to() device movement, not saved)
        self.register_buffer('_fc1_weight', _fc1_weight, persistent=False)
        self.register_buffer('_fc2_weight', _fc2_weight, persistent=False)

    def _flashinfer_forward(self, hidden_states, routing_map, probs):
        """FlashInfer fused MoE kernel for CUDA-graphed inference iterations."""
        assert HAVE_FLASHINFER, "flashinfer-python is required for FlashInfer forward path."
        assert probs.dtype == torch.float32, "FlashInfer forward path requires fp32 probabilities."
        output = fused_moe.cutlass_fused_moe(
            hidden_states,
            routing_map.int(),
            probs,
            self._fc1_weight,
            self._fc2_weight,
            hidden_states.dtype,
            quant_scales=None,
            activation_type=self._flashinfer_activation_type,
            ep_size=self.ep_group.size(),
            ep_rank=self.ep_group.rank(),
        )[0]
        return output, None

    def _mcore_fused_moe_forward(
        self, hidden_states, probs, routing_map=None, tokens_per_expert=None, skip_permute=False
    ):
        """Torch grouped_mm fused MoE forward via mcore_fused_moe."""
        local_expert_start = self.ep_group.rank() * self.num_local_experts
        output = mcore_fused_moe(
            hidden_states,
            probs,
            self._fc1_weight,
            self._fc2_weight,
            activation_type=self._mcore_activation_type,
            num_local_experts=self.num_local_experts,
            local_expert_start=local_expert_start,
            routing_map=routing_map,
            tokens_per_expert=tokens_per_expert,
            skip_permute=skip_permute,
            disable_fused_quant_kernels=self.config.inference_moe_disable_fused_quant_kernels,
        )
        return output, None

    def forward(
        self,
        permuted_local_hidden_states: torch.Tensor,
        tokens_per_expert: Optional[torch.Tensor],
        permuted_probs: torch.Tensor,
        routing_map: Optional[torch.Tensor] = None,
    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
        """Forward pass with three modes:

        - Training: delegates to parent TEGroupedMLP.
        - Inference + CUDA graphed: FlashInfer cutlass_fused_moe. tokens_per_expert
          is not used in this path; the FlashInfer kernel operates directly on
          routing_map.
        - Inference + eager: torch.nn.functional.grouped_mm with GPU-resident cumsum offsets.

        Args:
            permuted_local_hidden_states: [num_tokens, hidden_size] input hidden states.
            tokens_per_expert: [num_experts] number of tokens routed to each expert.
                None when using the CUDA-graphed FlashInfer path.
            permuted_probs: [num_tokens, topk] routing probabilities.
            routing_map: [num_tokens, topk] token-to-expert assignment indices.
                Required for the FlashInfer CUDA-graphed path, None otherwise.
        """

        if self.training:
            assert (
                not self.config.fp8_recipe == "mxfp8"
            ), "MXFP8 inference optimized is not compatible with training / colocated RL."
            return super().forward(permuted_local_hidden_states, tokens_per_expert, permuted_probs)

        # Lazily build concatenated weights on first forward (after checkpoint load)
        if not self._concatenated_weights_built:
            if self.config.fp8_recipe == "mxfp8":
                self._build_concatenated_mxfp8_weights()
            else:
                self._build_concatenated_weights()
            self._concatenated_weights_built = True

        resolved_backend = resolve_inference_grouped_gemm_backend(
            self.inference_grouped_gemm_backend,
            self.is_inference_cuda_graphed_iteration,
            is_mxfp8=self.config.fp8_recipe == "mxfp8",
        )

        if resolved_backend == InferenceGroupedGemmBackend.FLASHINFER:
            assert routing_map is not None, "routing_map is required for FlashInfer forward pass."
            assert (
                self.is_inference_cuda_graphed_iteration
            ), "FlashInfer forward path is only used in CUDA-graphed inference iterations."
            return self._flashinfer_forward(
                permuted_local_hidden_states, routing_map, permuted_probs
            )
        elif resolved_backend == InferenceGroupedGemmBackend.TORCH:
            return self._mcore_fused_moe_forward(
                permuted_local_hidden_states,
                permuted_probs,
                routing_map=routing_map,
                tokens_per_expert=tokens_per_expert,
                skip_permute=(not self.is_inference_cuda_graphed_iteration),
            )
        elif resolved_backend == InferenceGroupedGemmBackend.TE:
            return super().forward(permuted_local_hidden_states, tokens_per_expert, permuted_probs)


class SequentialMLP(MegatronModule):
    """An implementation of the Experts layer using a sequence of MLP layers.

    This class executes each expert sequentially.
    """

    # TODO(M4): breaking api, switched from pass in tp_group to pass in pg_collection.
    def __init__(
        self,
        num_local_experts,
        config: TransformerConfig,
        submodules: MLPSubmodules,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ):

        if config.moe_ffn_hidden_size == config.ffn_hidden_size:
            super().__init__(config=config)
        else:
            # Local SequentialMLP can still be used here by overriding the ffn_hidden_size
            # with a deepcopied config.
            sequential_mlp_config = deepcopy(config)
            sequential_mlp_config.ffn_hidden_size = config.moe_ffn_hidden_size
            super().__init__(config=sequential_mlp_config)

        self.num_local_experts = num_local_experts
        self.local_experts = torch.nn.ModuleList()
        self.ep_group = pg_collection.ep
        self.tp_group = pg_collection.expt_tp
        # use pg_collection.expt_dp_group as data parallel group in this module.
        # TODO (Hepteract): expt_dp wont be needed here once distributed checkpoint is refactored
        self.dp_group = pg_collection.expt_dp

        for _ in range(self.num_local_experts):
            expert = MLP(
                self.config,
                submodules,
                ffn_hidden_size=self.config.moe_ffn_hidden_size,
                is_expert=True,
                tp_group=pg_collection.expt_tp,
            )
            self.local_experts.append(expert)

    def _pad_tensor_for_quantization(self, hidden, probs):
        """Padding tensor shape to multiples of 16/32."""
        actual_num_tokens = hidden.shape[0]
        divisor = get_align_size_for_quantization(self.config)
        padded_num_tokens = ceil(actual_num_tokens / divisor) * divisor - actual_num_tokens
        if padded_num_tokens > 0:
            pad_tensor = torch.zeros(
                padded_num_tokens, hidden.shape[1], dtype=hidden.dtype, device=hidden.device
            )
            hidden = torch.cat((hidden, pad_tensor), dim=0)
            pad_probs = torch.zeros(padded_num_tokens, dtype=probs.dtype, device=probs.device)
            probs = torch.cat((probs, pad_probs), dim=0)
        return hidden, probs

    def forward(
        self,
        permuted_local_hidden_states: torch.Tensor,
        tokens_per_expert: torch.Tensor,
        permuted_probs: torch.Tensor,
    ):
        """Forward step of the SequentialMLP."""

        if self.config.moe_apply_probs_on_input:
            assert (
                self.config.moe_router_topk == 1
            ), "`moe_apply_probs_on_input` only works with `moe_router_topk`=1."
            original_dtype = permuted_local_hidden_states.dtype
            permuted_local_hidden_states = (
                permuted_probs.unsqueeze(-1) * permuted_local_hidden_states
            )
            permuted_local_hidden_states = permuted_local_hidden_states.to(original_dtype)
            # Probs already applied, so reset to 1.
            permuted_probs = torch.ones_like(permuted_probs)

        if self.num_local_experts == 1:
            if self.config.fp8 or self.config.fp4:
                hidden, probs = self._pad_tensor_for_quantization(
                    permuted_local_hidden_states, permuted_probs
                )
                output, output_bias = self.local_experts[0](hidden, probs)
                output = output[: permuted_local_hidden_states.shape[0]]
            else:
                output, output_bias = self.local_experts[0](
                    permuted_local_hidden_states, permuted_probs
                )

            return output, output_bias
        else:
            tokens_per_expert = tokens_per_expert.tolist()
            tokens_list = torch.split(permuted_local_hidden_states, tokens_per_expert)
            probs_list = torch.split(permuted_probs, tokens_per_expert)

            output_local_list = []

            for expert, tokens, probs in zip(self.local_experts, tokens_list, probs_list):
                if self.config.fp8 or self.config.fp4:
                    hidden, probs = self._pad_tensor_for_quantization(tokens, probs)
                    output, output_bias = expert(hidden, probs)
                    output = output[: tokens.shape[0]]
                else:
                    output, output_bias = expert(tokens, probs)
                output_local_list.append(output)

            output_local = torch.cat(output_local_list, dim=0)
            output_bias_local = None
            # Note: if bias is enabled on experts, it is already added to the output at this point
            return output_local, output_bias_local

    def backward_dw(self):
        """Backward pass for weight gradients in SequentialMLP."""
        for expert in self.local_experts:
            expert.backward_dw()

    def sharded_state_dict(self, prefix='', sharded_offsets=(), metadata=None):
        """Maps local expert to global experts."""
        # Guard for cases metadata is not provided
        metadata = ensure_metadata_has_dp_cp_group(metadata)

        sharded_state_dict = {}
        num_global_experts = self.ep_group.size() * self.num_local_experts
        local_expert_indices_offset = self.ep_group.rank() * self.num_local_experts

        singleton_local_shards = (metadata or {}).get('singleton_local_shards', False)

        for expert_local_idx, expert in enumerate(self.local_experts):
            expert_global_idx = local_expert_indices_offset + expert_local_idx
            expert_state_dict_prefix = f'{prefix}local_experts.{expert_local_idx}.'
            if singleton_local_shards:
                expert_sharded_prefix = f'{prefix}experts.{expert_global_idx}.'
                expert_sharded_offsets = sharded_offsets
            else:
                expert_sharded_prefix = f'{prefix}experts.'
                expert_sharded_offsets = (
                    *sharded_offsets,
                    (len(sharded_offsets), expert_global_idx, num_global_experts),
                )

            expert_state_dict = expert.sharded_state_dict(
                expert_state_dict_prefix, expert_sharded_offsets, metadata
            )
            # Remove expert layers indexing from sharded keys
            replace_prefix_for_sharding(
                expert_state_dict, expert_state_dict_prefix, expert_sharded_prefix
            )
            # Adjust replica ids - replication along DP modulo EP
            for k, sh_ten in expert_state_dict.items():
                replica_id = sh_ten.replica_id
                assert (
                    len(replica_id) == 3
                ), f'Expected replica_id for {k} to be in (PP, TP, DP) format, got: {replica_id}'

                sh_ten.replica_id = (*replica_id[:2], self.dp_group.rank())

            sharded_state_dict.update(expert_state_dict)
        return sharded_state_dict


================================================
FILE: megatron/core/transformer/moe/fused_a2a.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
# Portions of this code are from DeepSeek DeepEP project
# Copyright (c) 2025 DeepSeek
# Licensed under the MIT License - https://github.com/deepseek-ai/DeepEP/blob/main/LICENSE

from megatron.core.utils import internal_api

try:
    from deep_ep import Buffer
    from deep_ep.utils import EventHandle, EventOverlap

    HAVE_DEEP_EP = True
except ImportError:
    HAVE_DEEP_EP = False

import torch

_buffer = None


def get_hidden_bytes(x: torch.Tensor) -> int:
    """Calculate the number of hidden bytes for a tensor.

    Args:
        x (torch.Tensor): Input tensor

    Returns:
        int: Number of hidden bytes
    """
    return x.size(1) * max(x.element_size(), 2)


def get_buffer(group: torch.distributed.ProcessGroup, hidden_bytes: int):
    """Get or create a buffer for all-to-all communication.

    Args:
        group (torch.distributed.ProcessGroup): Process group for communication
        hidden_bytes (int): Number of hidden bytes needed

    Returns:
        Buffer: Communication buffer
    """
    global _buffer
    num_nvl_bytes, num_rdma_bytes = 0, 0
    for config in (
        Buffer.get_dispatch_config(group.size()),
        Buffer.get_combine_config(group.size()),
    ):
        # Split long line for PEP8 compliance
        num_nvl_bytes = max(
            config.get_nvl_buffer_size_hint(hidden_bytes, group.size()), num_nvl_bytes
        )
        num_rdma_bytes = max(
            config.get_rdma_buffer_size_hint(hidden_bytes, group.size()), num_rdma_bytes
        )

    # Allocate buffer if not existed or not enough buffer
    # NOTES: the adaptive routing configuration of the network **must be off**
    if (
        _buffer is None
        or _buffer.group != group
        or _buffer.num_nvl_bytes < num_nvl_bytes
        or _buffer.num_rdma_bytes < num_rdma_bytes
    ):
        _buffer = Buffer(group, num_nvl_bytes, num_rdma_bytes)
    return _buffer


class FusedDispatch(torch.autograd.Function):
    """Fused dispatch operation for MoE routing combining computation and communication."""

    @staticmethod
    def forward(
        ctx,
        x,
        token_indices,
        token_probs,
        num_experts,
        group,
        async_finish=False,
        allocate_on_comm_stream=False,
    ):
        """Forward pass of fused dispatch."""
        previous_event = None
        if async_finish:
            previous_event = EventOverlap(EventHandle())
        # Calculate layout before actual dispatch
        buffer = get_buffer(group, get_hidden_bytes(x))
        (
            num_tokens_per_rank,
            num_tokens_per_rdma_rank,
            num_tokens_per_expert,
            is_token_in_rank,
            event,
        ) = buffer.get_dispatch_layout(
            token_indices,
            num_experts,
            previous_event=previous_event,
            async_finish=async_finish,
            allocate_on_comm_stream=allocate_on_comm_stream,
        )

        # Do MoE dispatch
        # NOTES: the CPU will wait for GPU's signal to arrive,
        # so this is not compatible with CUDA graph
        (
            recv_x,
            recv_token_indices,
            recv_token_probs,
            num_recv_tokens_per_expert_list,
            handle,
            after_event_overlap,
        ) = buffer.dispatch(
            x,
            topk_idx=token_indices,
            topk_weights=token_probs,  # DeepEP only supports float32 probs
            num_tokens_per_rank=num_tokens_per_rank,
            num_tokens_per_rdma_rank=num_tokens_per_rdma_rank,
            is_token_in_rank=is_token_in_rank,
            num_tokens_per_expert=num_tokens_per_expert,
            previous_event=event,  # wait in deepep::intra/inter_dispatch
            async_finish=async_finish,
            allocate_on_comm_stream=allocate_on_comm_stream,
        )

        # Make sure current stream is synchronized
        if async_finish:
            after_event_overlap.current_stream_wait()

        # Save for backward
        ctx.group = group
        ctx.handle = handle
        ctx.async_finish = async_finish
        ctx.allocate_on_comm_stream = allocate_on_comm_stream
        tokens_per_expert = torch.tensor(num_recv_tokens_per_expert_list)

        return (recv_x, recv_token_indices, recv_token_probs, tokens_per_expert, handle)

    @staticmethod
    def backward(
        ctx, grad_output, grad_token_indices, grad_token_probs, grad_tokens_per_expert, grad_handle
    ):
        """Backward pass of fused dispatch."""
        buffer = get_buffer(ctx.group, get_hidden_bytes(grad_output))
        handle = ctx.handle
        previous_event = None
        if ctx.async_finish:
            previous_event = EventOverlap(EventHandle())
        grad_x, grad_token_probs, after_event = buffer.combine(
            grad_output.contiguous(),
            handle,
            topk_weights=grad_token_probs.float(),
            previous_event=previous_event,
            async_finish=ctx.async_finish,
            allocate_on_comm_stream=ctx.allocate_on_comm_stream,
        )
        # Make sure current stream is synchronized
        if ctx.async_finish:
            after_event.current_stream_wait()
        return grad_x, None, grad_token_probs, None, None, None, None


class FusedCombine(torch.autograd.Function):
    """Fused combine operation for MoE output combining computation and communication."""

    @staticmethod
    def forward(ctx, x, group, handle, async_finish=False, allocate_on_comm_stream=False):
        """Forward pass of fused combine."""
        previous_event = None
        if async_finish:
            previous_event = EventOverlap(EventHandle())
        buffer = get_buffer(group, get_hidden_bytes(x))
        combined_x, _, after_event = buffer.combine(
            x,
            handle=handle,
            async_finish=async_finish,
            previous_event=previous_event,
            allocate_on_comm_stream=allocate_on_comm_stream,
        )
        # Make sure current stream is synchronized
        if async_finish:
            after_event.current_stream_wait()

        ctx.handle = handle
        ctx.group = group
        ctx.async_finish = async_finish
        ctx.allocate_on_comm_stream = allocate_on_comm_stream
        return combined_x, None

    @staticmethod
    def backward(ctx, grad_output, previous_event=None):
        """Backward pass of fused combine."""
        previous_event = None
        if ctx.async_finish:
            previous_event = EventOverlap(EventHandle())
        buffer = get_buffer(ctx.group, get_hidden_bytes(grad_output))
        grad_x, _, _, _, _, after_event = buffer.dispatch(
            grad_output.contiguous(),
            handle=ctx.handle,
            previous_event=previous_event,
            async_finish=ctx.async_finish,
            allocate_on_comm_stream=ctx.allocate_on_comm_stream,
        )
        # Make sure current stream is synchronized
        if ctx.async_finish:
            after_event.current_stream_wait()
        return grad_x, None, None, None, None


if HAVE_DEEP_EP:

    def fused_dispatch(
        x,
        token_indices,
        token_probs,
        num_experts,
        group,
        async_finish=False,
        allocate_on_comm_stream=False,
    ):
        """Perform fused dispatch operation if deep_ep is available.

        Args:
            x: Input tensor [num_tokens, hidden_size]
            token_indices: Token routing indices [num_tokens, topk]
            token_probs: Token routing probabilities [num_tokens, topk]
            num_experts: Number of experts
            group: Process group
            previous_event: Previous CUDA event

        Returns:
            Result of FusedDispatch
        """
        return FusedDispatch.apply(
            x.contiguous(),
            token_indices,
            token_probs,
            num_experts,
            group,
            async_finish,
            allocate_on_comm_stream,
        )

    def fused_combine(x, group, handle, async_finish=False, allocate_on_comm_stream=False):
        """Perform fused combine operation if deep_ep is available.

        Args:
            x: Input tensor
            group: Process group
            handle: Communication handle
            previous_event: Previous CUDA event

        Returns:
            Result of FusedCombine
        """
        return FusedCombine.apply(x, group, handle, async_finish, allocate_on_comm_stream)

    def set_deepep_num_sms(num_sms):
        """Sets the number of SMs to use for DeepEP"""
        Buffer.set_num_sms(num_sms)

else:
    fused_dispatch = None
    fused_combine = None
    set_deepep_num_sms = None


try:
    from deep_ep import HybridEPBuffer

    HAVE_HYBRIDEP = True
except ImportError:
    HAVE_HYBRIDEP = False

_hybrid_ep_buffer = None


def init_hybrid_ep_buffer(
    group: torch.distributed.ProcessGroup,
    hidden_dim: int,
    seq_len: int,
    num_local_experts: int,
    num_sms_dispatch_api: int,
    num_sms_combine_api: int,
    fp8_dispatch: bool,
) -> None:
    '''
    Initialize the HybridEP buffer, including buffer allocation and metadata
    initialization.

    If a runtime dispatch/combine requires a larger buffer than the one
    initialized, the buffer will be reallocated at runtime,
    incuring extra run-time overhead.

    Args:
        group (torch.distributed.ProcessGroup):
            Process group for HybridEP all-to-all communication.
        hidden_dim (int):
            Hidden dimension of the input tensor.
        seq_len (int):
            Maximum sequence length of the input tensor.
        num_local_experts (int):
            Number of local experts.
        num_sms_dispatch_api (int):
            Number of SMs used by the dispatch API.
        num_sms_combine_api (int):
            Number of SMs used by the combine API.
        fp8_dispatch (bool):
            Whether to use FP8 communication during the dispatch phase.
    '''
    assert not fp8_dispatch, "HybridEP dispatcher does not support fp8 dispatch now"
    global _hybrid_ep_buffer
    _hybrid_ep_buffer = HybridEPBuffer(
        group=group,
        hidden_dim=hidden_dim,
        max_num_of_tokens_per_rank=seq_len,
        num_local_experts=num_local_experts,
        use_fp8=fp8_dispatch,
        num_sms_dispatch_api=num_sms_dispatch_api,
        num_sms_combine_api=num_sms_combine_api,
    )


def reset_hybrid_ep_buffer():
    '''
    Reset the HybridEP buffer
    '''
    global _hybrid_ep_buffer
    _hybrid_ep_buffer = None


class HybridEPDispatch(torch.autograd.Function):
    '''
    Fused dispatch operation for permute + dispatch a2a + permute using the HybridEP backend
    '''

    @staticmethod
    def forward(
        ctx,
        x,
        routing_map,
        probs,
        group,
        num_local_experts,
        num_sms_dispatch_api=24,
        num_sms_combine_api=24,
        num_permuted_tokens=None,
        pad_multiple=None,
    ):
        '''
        Forward pass of fused dispatch of the HybridEP backend
        '''
        if _hybrid_ep_buffer is None:
            seq_len, hidden_dim = x.shape[-2:]
            fp8_dispatch = False  # Currently, we do not support fp8 dispatch
            init_hybrid_ep_buffer(
                group,
                hidden_dim,
                seq_len,
                num_local_experts,
                num_sms_dispatch_api,
                num_sms_combine_api,
                fp8_dispatch,
            )
        # If we provide the num_permuted_tokens, we do not need to use sync to
        # wait for the data in pinned memory ready
        non_blocking = num_permuted_tokens is not None
        # Process the dispatch
        (
            dispatched_hidden,
            dispatched_probs,
            dispatched_scaling_factor,
            tokens_per_expert,
            handle,
        ) = _hybrid_ep_buffer.dispatch_with_permute(
            hidden=x,
            routing_map=routing_map,
            probs=probs,
            scaling_factor=None,
            num_of_experts_per_rank=num_local_experts,
            pad_multiple=pad_multiple,
            num_permuted_tokens=num_permuted_tokens,
            non_blocking=non_blocking,
        )

        ctx.handle = handle
        ctx.pad_multiple = pad_multiple
        return (
            dispatched_hidden,
            dispatched_probs,
            dispatched_scaling_factor,
            tokens_per_expert,
            handle,
        )

    @staticmethod
    def backward(ctx, grad_x, grad_probs, grad_scaling_factor, grad_tokens_per_expert, grad_handle):
        '''
        Backward pass of fused dispatch of the HybridEP backend
        '''
        handle = ctx.handle
        combined_hidden, combined_probs = _hybrid_ep_buffer.combine_with_unpermute(
            hidden=grad_x, probs=grad_probs, handle=handle, pad_multiple=ctx.pad_multiple
        )
        return combined_hidden, None, combined_probs, None, None, None, None, None, None, None


@internal_api
class HybridEPCombine(torch.autograd.Function):
    '''
    Fused combine operation for permute + combine a2a + permute using the HybridEP backend
    '''

    @staticmethod
    def forward(ctx, x, handle, num_permuted_tokens=None, pad_multiple=None):
        '''
        Forward pass of fused combine of the HybridEP backend
        '''
        combined_hidden, _ = _hybrid_ep_buffer.combine_with_unpermute(
            hidden=x, handle=handle, pad_multiple=pad_multiple
        )
        ctx.handle = handle
        ctx.pad_multiple = pad_multiple
        ctx.num_permuted_tokens = num_permuted_tokens
        return combined_hidden

    @staticmethod
    def backward(ctx, grad_x):
        '''
        Backward pass of fused combine of the HybridEP backend
        '''
        handle = ctx.handle
        dispatched_hidden, _, _, _, _ = _hybrid_ep_buffer.dispatch_with_permute(
            hidden=grad_x,
            scaling_factor=None,
            handle=handle,
            pad_multiple=ctx.pad_multiple,
            num_permuted_tokens=ctx.num_permuted_tokens,
        )
        return dispatched_hidden, None, None, None, None


if HAVE_HYBRIDEP:

    @internal_api
    def hybrid_ep_dispatch(
        x,
        routing_map,
        probs,
        group,
        num_local_experts,
        num_sms_dispatch_api=24,
        num_sms_combine_api=24,
        num_permuted_tokens=None,
        pad_multiple=None,
    ):
        '''
        Perform fused dispatch for "permute + dispatch a2a + permute" using the
        HybridEP backend.

        Args:
            x (torch.Tensor):
                Input hidden states to dispatch.
            routing_map (torch.Tensor):
                Map indicating which expert each token is routed to.
            probs (torch.Tensor):
                Routing probabilities for each token-expert pair.
            group (torch.distributed.ProcessGroup):
                Process group used for communication.
            num_local_experts (int):
                Number of local experts.
            num_sms_dispatch_api (int):
                Number of SMs used by the dispatch API.
            num_sms_combine_api (int):
                Number of SMs used by the combine API.
            num_permuted_tokens (int):
                Number of tokens after permute. HybridEP uses this to allocate buffers.
                If not provided, HybridEP obtains the size from a GPU tensor,
                which causes a D2H synchronization.
            pad_multiple (int):
                Alignment multiple required for FP8 GEMM. If not provided, no padding
                is performed.
        '''
        return HybridEPDispatch.apply(
            x,
            routing_map,
            probs,
            group,
            num_local_experts,
            num_sms_dispatch_api,
            num_sms_combine_api,
            num_permuted_tokens,
            pad_multiple,
        )

    @internal_api
    def hybrid_ep_combine(x, handle, num_permuted_tokens, pad_multiple):
        '''
        Perform fused combine operation for unpermute + combine a2a + unpermute
        using the HybridEP backend

        args:
            x (torch.Tensor):
                Input hidden states to combine
            handle (EventHandle):
                Communication handle from dispatch operation
            num_permuted_tokens (int): The number of tokens before unpermute. HybridEP uses this
                to allocate buffers. If not provided, HybridEP obtains the size from a GPU tensor,
                which causes a D2H synchronization.
            pad_multiple (int):
                The alignment multiple required for FP8 GEMM. If not provided, no padding
                is performed.
        '''
        return HybridEPCombine.apply(x, handle, num_permuted_tokens, pad_multiple)

else:
    hybrid_ep_dispatch = None
    hybrid_ep_combine = None


================================================
FILE: megatron/core/transformer/moe/moe_layer.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Optional, Protocol, Union

import torch

from megatron.core import parallel_state, tensor_parallel, utils
from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.moe.moe_utils import (
    MoECudaGraphPartialCaptureSignal,
    MoECudaGraphTensorStore,
    get_default_pg_collection,
    maybe_skip_or_early_return_by_cudagraph,
)
from megatron.core.transformer.moe.router import TopKRouter
from megatron.core.transformer.moe.token_dispatcher import (
    MoEAllGatherTokenDispatcher,
    MoEAlltoAllTokenDispatcher,
    MoEFlexTokenDispatcher,
    MoETokenDispatcher,
)
from megatron.core.transformer.moe.token_dispatcher_inference import (
    InferenceCUDAGraphTokenDispatcher,
)
from megatron.core.transformer.spec_utils import ModuleSpec, build_module
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.typed_torch import apply_module
from megatron.core.utils import internal_api

try:
    import flashinfer  # pylint: disable=unused-import

    HAVE_FLASHINFER = True
except ImportError:
    HAVE_FLASHINFER = False

if HAVE_FLASHINFER:
    try:
        import flashinfer_cubin  # pylint: disable=unused-import
        import flashinfer_jit_cache  # pylint: disable=unused-import

        HAVE_FLASHINFER_CUBIN_AND_JIT_CACHE = True
    except ImportError:
        HAVE_FLASHINFER_CUBIN_AND_JIT_CACHE = False

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import TELinear, te_checkpoint
else:
    TELinear, te_checkpoint = None, None


class RouterInterface(Protocol):
    """Interface for the router used in an MoELayer."""

    def forward(self, input: torch.Tensor, /) -> tuple[torch.Tensor, torch.Tensor]:
        """Forward pass of the router.

        Returns:
            A tuple of (probabilities, routing_map).
        """
        ...

    def set_layer_number(self, layer_number: int) -> None:
        """Set the layer number for the router.

        Called from transformer_layer during initialization.
        """
        ...


class RouterBuilder(Protocol):
    """Protocol for building a Router."""

    def __call__(
        self, /, *, config: TransformerConfig, pg_collection: ProcessGroupCollection | None
    ) -> RouterInterface: ...


@dataclass
class MoESubmodules:
    """MoE Layer Submodule spec"""

    experts: Union[ModuleSpec, type] = None
    shared_experts: Union[ModuleSpec, type] = None
    router: RouterBuilder = TopKRouter


class BaseMoELayer(MegatronModule, ABC):
    """Base class for a mixture of experts layer.

    Args:
        config (TransformerConfig): Configuration object for the transformer model.
    """

    def __init__(
        self,
        config: TransformerConfig,
        layer_number: Optional[int] = None,
        pg_collection: Optional[ProcessGroupCollection] = None,
        is_mtp_layer: bool = False,
    ):
        super(BaseMoELayer, self).__init__(config)
        self.config = config
        self.layer_number = layer_number
        self.is_mtp_layer = is_mtp_layer
        self.ep_group = pg_collection.ep
        # use pg_collection.expt_tp_group as tensor parallel group in this module.
        self.attn_tp_group = pg_collection.tp
        ep_size = utils.get_pg_size(self.ep_group)
        ep_rank = utils.get_pg_rank(self.ep_group)
        assert ep_size > 0, "Expected non-negative expert parallel size"

        assert self.config.num_moe_experts % ep_size == 0
        self.num_local_experts = self.config.num_moe_experts // ep_size
        local_expert_indices_offset = ep_rank * self.num_local_experts

        self.use_shared_expert = self.config.moe_shared_expert_intermediate_size is not None
        self.shared_expert_overlap = self.config.moe_shared_expert_overlap

        self.local_expert_indices = [
            local_expert_indices_offset + i for i in range(self.num_local_experts)
        ]
        assert all(map(lambda x: x < self.config.num_moe_experts, self.local_expert_indices))
        self.router: RouterInterface = None
        self.experts = None
        self.shared_experts = None
        self.token_dispatcher: Optional[MoETokenDispatcher] = None
        self.layer_number = layer_number

    @abstractmethod
    def forward(self, hidden_states):
        """Forward method for the MoE layer."""
        pass

    def set_layer_number(self, layer_number: int):
        """Set the layer number for the MoE layer."""
        self.layer_number = layer_number
        self.router.set_layer_number(layer_number)


class MoELayer(BaseMoELayer):
    """Mixture of Experts layer.

    This layer implements a Mixture of Experts model, where each token is routed to a
    subset of experts. This implementation supports different token dispatching
    strategies such as All-to-All and All-Gather.
    """

    def __init__(
        self,
        config: TransformerConfig,
        submodules: Optional[MoESubmodules] = None,
        layer_number: Optional[int] = None,
        pg_collection: Optional[ProcessGroupCollection] = None,
        is_mtp_layer: bool = False,
    ):
        self.submodules = submodules
        # TODO(Hepteract): delete the usage of the global parallel_state.
        # Initialize process groups with the global parallel_state.
        if pg_collection is None:
            pg_collection = get_default_pg_collection()
        super(MoELayer, self).__init__(
            config=config,
            layer_number=layer_number,
            pg_collection=pg_collection,
            is_mtp_layer=is_mtp_layer,
        )
        # If using mcore cudagraphs, recompute is handled by transformer_layer.MoETransformerLayer
        self.moe_layer_recompute = (
            config.recompute_granularity == 'selective'
            and "moe" in config.recompute_modules
            and config.cuda_graph_impl != 'local'
        )
        self.shared_experts_recompute = (
            config.recompute_granularity == 'selective'
            and "shared_experts" in config.recompute_modules
        )

        self.tp_group = pg_collection.tp

        # Initialize router.
        self.router = submodules.router(
            config=self.config, pg_collection=pg_collection, is_mtp_layer=is_mtp_layer
        )
        self.tp_group = pg_collection.tp

        # Initialize latent projections.
        if self.config.moe_latent_size:
            assert HAVE_TE, "TransformerEngine is required for MoE latent projections."
            self.fc1_latent_proj = TELinear(
                self.config.hidden_size,
                self.config.moe_latent_size,
                parallel_mode="duplicated",
                config=self.config,
                init_method=self.config.init_method,
                bias=self.config.add_bias_linear,
                skip_bias_add=False,
                skip_weight_param_allocation=False,
                is_expert=False,
            )
            self.fc2_latent_proj = TELinear(
                self.config.moe_latent_size,
                self.config.hidden_size,
                parallel_mode="duplicated",
                config=self.config,
                init_method=self.config.output_layer_init_method,
                bias=self.config.add_bias_linear,
                skip_bias_add=False,
                skip_weight_param_allocation=False,
                is_expert=False,
            )

        # Initialize token dispatcher
        if config.moe_token_dispatcher_type == "allgather":
            self.token_dispatcher = MoEAllGatherTokenDispatcher(
                self.num_local_experts,
                self.local_expert_indices,
                config=self.config,
                pg_collection=pg_collection,
            )
        elif config.moe_token_dispatcher_type == "alltoall":
            self.token_dispatcher = MoEAlltoAllTokenDispatcher(
                self.num_local_experts,
                self.local_expert_indices,
                config=self.config,
                pg_collection=pg_collection,
            )
        elif config.moe_token_dispatcher_type == "flex":
            self.token_dispatcher = MoEFlexTokenDispatcher(
                self.num_local_experts,
                self.local_expert_indices,
                config=self.config,
                pg_collection=pg_collection,
            )
        else:
            raise ValueError(
                f"Unsupported token dispatcher type: {config.moe_token_dispatcher_type}"
            )

        # Initialize experts
        self.experts = build_module(
            self.submodules.experts,
            self.num_local_experts,
            self.config,
            pg_collection=pg_collection,
        )

        # Initialize shared experts
        if self.use_shared_expert:
            self.shared_experts = build_module(
                self.submodules.shared_experts,
                config=self.config,
                pg_collection=pg_collection,
                gate=self.config.moe_shared_expert_gate,
            )
            if self.shared_expert_overlap:
                self.token_dispatcher.set_shared_experts(self.shared_experts)

        # Inference-optimized mode setup
        if config.transformer_impl == "inference_optimized":
            if config.inference_grouped_gemm_backend == 'auto':
                assert HAVE_FLASHINFER, (
                    "inference_grouped_gemm_backend='auto'"
                    "requires flashinfer-python. "
                    "Install flashinfer-python or set "
                    "inference_grouped_gemm_backend to 'torch' or 'te'."
                )

                # Verify that pre-compiled FlashInfer CUTLASS kernels are available
                # when using the FlashInfer backend. The flashinfer-jit-cache package
                # must be installed ahead of time to avoid a multi-minute JIT
                # compilation step at runtime.
                from megatron.core.inference.utils import check_flashinfer_jit_cache_installed

                check_flashinfer_jit_cache_installed()
            elif config.inference_grouped_gemm_backend == 'torch':
                assert hasattr(torch.nn.functional, 'grouped_mm'), (
                    "inference_grouped_gemm_backend='torch' requires "
                    "torch.nn.functional.grouped_mm (available since PyTorch 2.10)."
                )
            self._setup_inference_mode(pg_collection)

        # Cudagraph tensor store for resuming the forward pass from the end of the cudagraph.
        self.cudagraph_tensor_store = MoECudaGraphTensorStore()
        self.fwd_execution_map = ["route", "expert_compute", "postprocess"]

    def _setup_inference_mode(self, pg_collection):
        """Set up inference-optimized token dispatcher and state.

        Called from __init__ when config.transformer_impl == "inference_optimized".
        Creates an InferenceCUDAGraphTokenDispatcher alongside the standard dispatcher,
        which is swapped in during CUDA-graphed forward passes.
        """

        assert self.config.moe_token_dispatcher_type == "alltoall", (
            f"Inference-optimized MoE requires 'alltoall' dispatcher, "
            f"got '{self.config.moe_token_dispatcher_type}'"
        )
        self.is_inference_cuda_graphed_iteration = False
        self._inference_token_dispatcher = InferenceCUDAGraphTokenDispatcher(
            self.num_local_experts,
            self.local_expert_indices,
            config=self.config,
            pg_collection=pg_collection,
        )

    def set_inference_cuda_graphed_iteration(self):
        """Enable CUDA-graphed iteration mode on this layer, its router, and its experts.

        Swaps in the inference-optimized token dispatcher and disables
        shared expert overlap.
        """
        self.is_inference_cuda_graphed_iteration = True
        if hasattr(self.router, "set_inference_cuda_graphed_iteration"):
            self.router.set_inference_cuda_graphed_iteration()
        if hasattr(self.experts, "set_inference_cuda_graphed_iteration"):
            self.experts.set_inference_cuda_graphed_iteration()

        if self._inference_token_dispatcher is not None:
            self._saved_token_dispatcher = self.token_dispatcher
            self.token_dispatcher = self._inference_token_dispatcher
            self._saved_shared_expert_overlap = self.shared_expert_overlap
            self.shared_expert_overlap = False

    def unset_inference_cuda_graphed_iteration(self):
        """Disable CUDA-graphed iteration mode on this layer, its router, and its experts.

        Restores the standard token dispatcher and shared expert overlap setting.
        """
        self.is_inference_cuda_graphed_iteration = False
        if hasattr(self.router, "unset_inference_cuda_graphed_iteration"):
            self.router.unset_inference_cuda_graphed_iteration()
        if hasattr(self.experts, "unset_inference_cuda_graphed_iteration"):
            self.experts.unset_inference_cuda_graphed_iteration()

        if hasattr(self, "_saved_token_dispatcher"):
            self.token_dispatcher = self._saved_token_dispatcher
            self.shared_expert_overlap = self._saved_shared_expert_overlap

    @maybe_skip_or_early_return_by_cudagraph("route")
    def route(self, hidden_states: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
        """Compute token routing for preprocessing.

        This method uses the router to determine which experts to send each token to,
        producing routing probabilities and a mapping.
        """
        probs, routing_map = apply_module(self.router)(hidden_states, padding_mask)
        return probs, routing_map

    @maybe_skip_or_early_return_by_cudagraph("preprocess")
    def preprocess(
        self, hidden_states: torch.Tensor, probs: torch.Tensor, routing_map: torch.Tensor
    ):
        """Preprocess token routing for dispatch.

        This method preprocesses the hidden states and routing probabilities for the token
        dispatcher.
        """
        # Project the hidden_states from hidden dimension down to latent dimenion.
        if self.config.moe_latent_size:
            assert (
                not self.shared_expert_overlap
            ), "Shared expert overlap not supported when MoE latent projections are used."
            hidden_states, _ = self.fc1_latent_proj(hidden_states)
        hidden_states, probs = self.token_dispatcher.dispatch_preprocess(
            hidden_states, routing_map, probs
        )
        return hidden_states, probs

    def dispatch(self, hidden_states: torch.Tensor, probs: torch.Tensor):
        """Dispatches tokens to assigned expert ranks via communication.

        This method performs the actual communication (e.g., All-to-All) to distribute
        tokens and their associated probabilities to the devices hosting their assigned
        experts.
        """
        return self.token_dispatcher.token_dispatch(hidden_states, probs)

    @maybe_skip_or_early_return_by_cudagraph("shared_experts_compute")
    def shared_experts_compute(self, hidden_states: torch.Tensor):
        """Computes the output of the shared experts.

        If a shared expert is configured and not overlapped with communication,
        it is computed here.
        """
        shared_expert_output = None
        if self.use_shared_expert and not self.shared_expert_overlap:
            # Compute the shared expert separately when not overlapped with communication.
            if self.shared_experts_recompute:
                if self.config.fp8 or self.config.fp4:
                    shared_expert_output = te_checkpoint(
                        self.shared_experts,
                        False,
                        tensor_parallel.random.get_cuda_rng_tracker,
                        parallel_state.get_tensor_model_parallel_group(),
                        hidden_states,
                    )
                else:
                    shared_expert_output = tensor_parallel.checkpoint(
                        self.shared_experts, False, hidden_states
                    )
            else:
                shared_expert_output = self.shared_experts(hidden_states)

        return shared_expert_output

    @internal_api
    def routed_experts_compute(self, hidden_states: torch.Tensor, probs: torch.Tensor):
        """Computes the output of the routed experts on the dispatched tokens.

        This method first post-processes the dispatched input to get permuted tokens
        for each expert. It then passes the tokens through the local experts.
        The output from the experts is preprocessed for the combine step.
        """
        dispatched_input, tokens_per_expert, permuted_probs = (
            self.token_dispatcher.dispatch_postprocess(hidden_states, probs)
        )
        if (
            hasattr(self, "_inference_token_dispatcher")
            and self.is_inference_cuda_graphed_iteration
        ):
            routing_map = self.token_dispatcher.routing_map
            expert_output, mlp_bias = self.experts(
                dispatched_input, tokens_per_expert, permuted_probs, routing_map=routing_map
            )
        else:
            expert_output, mlp_bias = self.experts(
                dispatched_input, tokens_per_expert, permuted_probs
            )
        assert mlp_bias is None, f"mlp_bias is not supported for {type(self.token_dispatcher)}"
        output = self.token_dispatcher.combine_preprocess(expert_output)

        return output, mlp_bias

    def combine(self, output: torch.Tensor):
        """Combines expert outputs via communication and adds shared expert output.

        This method uses the token dispatcher to combine the outputs from different
        experts (e.g., via an All-to-All communication).
        """
        output = self.token_dispatcher.token_combine(output)
        return output

    def postprocess(self, output: torch.Tensor, shared_expert_output: Optional[torch.Tensor]):
        """Project the output back from latent dimension to hidden dimension after combine
        in latent dimension if needed. Combine expert output with shared_experts if needed."""

        output = self.token_dispatcher.combine_postprocess(output)
        if self.config.moe_latent_size:
            output, _ = self.fc2_latent_proj(output)

        if shared_expert_output is not None:
            output = output + shared_expert_output
        return output

    def router_and_preprocess(self, hidden_states: torch.Tensor):
        """This method is a combined method of route and preprocess. Deprecated."""

        probs, routing_map = self.route(hidden_states)
        hidden_states, probs, residual = self.preprocess(hidden_states, probs, routing_map)
        return hidden_states, probs, residual

    def forward(
        self,
        hidden_states: torch.Tensor,
        intermediate_tensors=None,
        padding_mask: Optional[torch.Tensor] = None,
    ):
        """Forward pass for the MoE layer.

        The forward pass comprises four main steps:
        1. Routing & Preprocessing: Route tokens to the assigned experts and prepare for dispatch.
        2. Dispatch: Tokens are sent to the expert devices using communication collectives.
        3. Expert Computation: Experts process the dispatched tokens.
        4. Combine: The outputs from the experts are combined and returned.

        Args:
            hidden_states (torch.Tensor): The input tensor shape [seq_length, bsz, hidden_size].
            padding_mask (torch.Tensor, optional): Boolean mask indicating non-padding tokens.
                                                   Shape [seq_length, bsz]. True for valid tokens,
                                                   False for padding tokens. Defaults to None.
        Returns:
            A tuple containing the output tensor and the MLP bias, if any.
        """
        if self.training and self.attn_tp_group.size() > 1 and not self.config.sequence_parallel:
            raise ValueError(
                "During training, performance may degrade if MoE and tensor parallelism"
                "are enabled without also enabling sequence parallelism."
            )
        # Transpose from [bsz, seq_length] to [seq_length, bsz] to align with hidden_states
        if padding_mask is not None:
            padding_mask = padding_mask.transpose(0, 1).bool()

        # MoE forward: route -> dispatch -> compute -> combine
        def custom_forward(hidden_states, intermediate_tensors=None, padding_mask=None):
            try:
                if "route" in self.fwd_execution_map:
                    shared_expert_output = self.shared_experts_compute(hidden_states)
                    probs, routing_map = self.route(hidden_states, padding_mask)
                    hidden_states, probs = self.preprocess(hidden_states, probs, routing_map)

                    if intermediate_tensors is not None:
                        return hidden_states, probs, shared_expert_output

            except MoECudaGraphPartialCaptureSignal as e:
                # This signal is raised from the maybe_skip_or_early_return_by_cudagraph decorator.
                # It means we should early-return from the MoE layer forward pass.
                # This happens when we are partially capturing the CUDA graph of the MoE layer,
                # like cuda_graph_scope=["moe_router", "moe_preprocess"].
                # We need to return the intermediate tensors as CUDA graph outputs.
                return e.get_early_return_outputs(hidden_states, shared_expert_output)

            if "expert_compute" in self.fwd_execution_map:
                if intermediate_tensors is not None:
                    hidden_states, probs = intermediate_tensors

                dispatched_input, probs = self.dispatch(hidden_states, probs)
                output, mlp_bias = self.routed_experts_compute(dispatched_input, probs)
                assert (
                    mlp_bias is None
                ), f"mlp_bias is not supported for {type(self.token_dispatcher)}"
                output = self.combine(output)

                if intermediate_tensors is not None:
                    return output, mlp_bias

            if "postprocess" in self.fwd_execution_map:
                if intermediate_tensors is not None:
                    output, shared_expert_output = intermediate_tensors

                output = self.postprocess(output, shared_expert_output)

                if intermediate_tensors is not None:
                    return output

            return output, mlp_bias

        if self.moe_layer_recompute and self.training:
            if self.config.fp8 or self.config.fp4:
                outputs = te_checkpoint(
                    custom_forward,
                    False,
                    tensor_parallel.random.get_cuda_rng_tracker,
                    parallel_state.get_tensor_model_parallel_group(),
                    hidden_states,
                    intermediate_tensors,
                    padding_mask,
                )
            else:
                outputs = tensor_parallel.checkpoint(
                    custom_forward, False, hidden_states, intermediate_tensors, padding_mask
                )
        else:
            outputs = custom_forward(hidden_states, intermediate_tensors, padding_mask)

        return outputs

    def backward_dw(self, routed_experts: bool = True, shared_experts: bool = False):
        """Compute weight gradients for experts and shared experts."""
        # TODO(Wohox): replace the "routed_experts" and "shared_experts" arguments with better
        # naming to better explain that they are actually from different fine-grained callables,
        # or use scanning to decide which backward_dw should be called.
        if routed_experts:
            self.experts.backward_dw()
            if self.config.moe_latent_size:
                # TODO(Wohox): fc2_latent_proj forward and backward are executed in comm stream,
                # so we execute its backward_dw in the comm stream too. But this may harm the
                # EP overlap performance. Better to check if there is a better way to handle this.
                from megatron.core.pipeline_parallel.utils import get_comm_stream

                comm_stream = get_comm_stream()
                with torch.cuda.stream(comm_stream):
                    self.fc2_latent_proj.backward_dw()
        if shared_experts:
            if self.use_shared_expert and not self.shared_expert_overlap:
                self.shared_experts.backward_dw()
            if self.config.moe_latent_size:
                self.fc1_latent_proj.backward_dw()

    def set_for_recompute_pre_mlp_layernorm(self):
        """Set the MoE layer for recompute pre_mlp_layernorm. Only needed for fp8/fp4."""
        # If shared_experts_recompute is used, nothing needs to be done because the checkpoint
        # function will save the original input tensors.
        if self.shared_experts is not None and not self.shared_experts_recompute:
            from megatron.core.extensions.transformer_engine import set_save_original_input

            set_save_original_input(self.shared_experts.linear_fc1)


================================================
FILE: megatron/core/transformer/moe/moe_utils.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import functools
import math
from dataclasses import dataclass
from typing import List, Optional, Tuple, Union

import torch

from megatron.core import parallel_state
from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.fp4_utils import get_fp4_align_size
from megatron.core.fp8_utils import get_fp8_align_size
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.tensor_parallel import get_cuda_rng_tracker, get_expert_parallel_rng_tracker_name
from megatron.core.tensor_parallel.mappings import reduce_from_tensor_model_parallel_region
from megatron.core.transformer.cuda_graphs import is_graph_capturing
from megatron.core.transformer.enums import CudaGraphScope
from megatron.core.transformer.moe.router_replay import RouterReplay
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.utils import internal_api, is_te_min_version

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import (
        fused_compute_score_for_moe_aux_loss,
        fused_moe_aux_loss,
        fused_permute,
        fused_permute_and_pad_with_probs,
        fused_permute_with_probs,
        fused_sort_chunks_by_index,
        fused_sort_chunks_by_index_with_probs,
        fused_topk_with_score_function,
        fused_unpermute,
        te_general_gemm,
    )
else:
    (
        fused_compute_score_for_moe_aux_loss,
        fused_moe_aux_loss,
        fused_permute,
        fused_permute_and_pad_with_probs,
        fused_permute_with_probs,
        fused_sort_chunks_by_index,
        fused_sort_chunks_by_index_with_probs,
        fused_topk_with_score_function,
        fused_unpermute,
        te_general_gemm,
    ) = (None, None, None, None, None, None, None, None, None, None)


# MOE logging
_MOE_LAYER_WISE_LOGGING_TRACKER: dict = {}


def switch_load_balancing_loss_func(
    probs: torch.Tensor,
    tokens_per_expert: torch.Tensor,
    total_num_tokens: int,
    topk: int,
    num_experts: int,
    moe_aux_loss_coeff: float,
    fused: bool = False,
    padding_mask: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    """Calculate the auxiliary loss for load balancing.
    Refer to the Switch Transformer (https://arxiv.org/abs/2101.03961)
    and Global Load Balancing Loss(https://arxiv.org/abs/2501.11873) for details.

    Detailed explanation of the auxiliary loss:

    The formula for the auxiliary loss is:
        loss = E * Σ_{i=1}^{E} (f_i * P_i)
    where:
        f_i = 1 / (T * topk) * Σ_{x∈B} routing_map(x, i)
             (fraction of tokens dispatched to expert i)
        P_i = 1 / T * Σ_{x∈B} probs(x, i)
             (averaged router probability allocated for expert i)
        E is the number of experts
        T is the total number of tokens in the batch B

    For distributed training with sequence or context parallelism, each rank can
    process a subset of the batch.
        loss = E * Σ_{i=1}^{E} (f_i * Σ_{j=1}^{N} P_ij)
             = E * Σ_{i=1}^{E} Σ_{j=1}^{N} (f_i * P_ij)
             = Σ_{j=1}^{N} E * (Σ_{i=1}^{E} f_i * P_ij)

    where:
        f_i = 1 / (T * topk) * Σ_{x∈B} routing_map(x, i)
             (fraction of tokens dispatched to expert i in the global batch)
        P_ij = 1 / T * Σ_{x∈B_j} probs(x, i)
              (averaged router probability allocated for expert i in local batch of the j-th rank)
        N is the number of ranks
        B_j is the batch of tokens in the j-th rank
        T is the total number of tokens in the global batch B

    Note:
    To calculate the auxiliary loss at different levels (micro-batch or global batch):
    - probs: Should always be from the local batch being processed
    - tokens_per_expert: Should represent token counts at the desired level
      (either micro-batch or global batch)
    - total_num_tokens: Should match the total token count at the same level as tokens_per_expert

    Args:
        probs (torch.Tensor): Softmax probabilities output by the router for each token.
                              Shape in [num_tokens, num_experts].
        tokens_per_expert (torch.Tensor): Number of tokens assigned to each expert in the batch.
                                          Shape in [num_experts]
        total_num_tokens (int): Total number of tokens in the batch.
        topk (int): The number of experts selected for each token.
        num_experts (int): The number of experts.
        moe_aux_loss_coeff (float): The coefficient for the auxiliary loss.
        fused (bool): Whether to use the fused version of the auxiliary loss.
        padding_mask (torch.Tensor, optional): Boolean mask indicating non-padding tokens.
                                               Shape in [num_tokens]. True for valid tokens,
                                               False for padding tokens. Defaults to None.

    Returns:
        torch.Tensor: The auxiliary loss for load balancing.
    """
    # Apply padding mask to probs if provided
    if padding_mask is not None:
        # padding_mask: [num_tokens], probs: [num_tokens, num_experts]
        mask_expanded = padding_mask.unsqueeze(-1)
        probs = probs * mask_expanded

    if fused:
        if not HAVE_TE or fused_moe_aux_loss is None:
            raise ValueError("fused_moe_aux_loss is not available. Please install TE >= 2.7.0.")
        return fused_moe_aux_loss(
            probs=probs,
            tokens_per_expert=tokens_per_expert,
            total_num_tokens=total_num_tokens,
            topk=topk,
            num_experts=num_experts,
            coeff=moe_aux_loss_coeff,
        )

    aggregated_probs_per_expert = probs.sum(dim=0)
    aux_loss = torch.sum(aggregated_probs_per_expert * tokens_per_expert) * (
        num_experts * moe_aux_loss_coeff / (topk * total_num_tokens * total_num_tokens)
    )
    return aux_loss


def z_loss_func(
    logits: torch.Tensor, z_loss_coeff: float, padding_mask: Optional[torch.Tensor] = None
) -> torch.Tensor:
    """Encourages the router's logits to remain small to enhance stability.
    Please refer to the ST-MoE paper (https://arxiv.org/pdf/2202.08906.pdf) for details.

    Args:
        logits (torch.Tensor): The logits of the router.
        z_loss_coeff (float): The coefficient for the z-loss.
        padding_mask (torch.Tensor, optional): Boolean mask indicating padding positions.
                                               Shape [num_tokens]. True = padding (exclude),
                                               False = valid (include). Defaults to None.

    Returns:
        torch.Tensor: The logits after applying the z-loss.
    """
    logsum = torch.logsumexp(logits, dim=-1)
    z_loss_values = torch.square(logsum)

    if padding_mask is not None:
        # Invert padding_mask: True (padding) -> 0, False (valid) -> 1
        valid_mask = ~padding_mask
        # Only compute z_loss for valid (non-padding) tokens
        z_loss_values = z_loss_values * valid_mask
        # Compute mean over valid tokens only
        num_valid_tokens = valid_mask.sum()
        z_loss = z_loss_values.sum() / torch.clamp(num_valid_tokens, min=1.0) * z_loss_coeff
    else:
        z_loss = torch.mean(z_loss_values) * z_loss_coeff
    return z_loss


def sinkhorn(cost: torch.Tensor, tol: float = 0.0001) -> torch.Tensor:
    """Sinkhorn based MoE routing function.

    Args:
        cost (torch.Tensor): The cost tensor.
        tol (float): The tolerance for the Sinkhorn algorithm.

    Returns:
        torch.Tensor: The routing probabilities.
    """
    cost = torch.exp(cost)
    d0 = torch.ones(cost.size(0), device=cost.device, dtype=cost.dtype)
    d1 = torch.ones(cost.size(1), device=cost.device, dtype=cost.dtype)

    eps = 0.00000001
    error = 1e9
    d1_old = d1
    while error > tol:
        d0 = (1 / d0.size(0)) * 1 / (torch.sum(d1 * cost, 1) + eps)
        d1 = (1 / d1.size(0)) * 1 / (torch.sum(d0.unsqueeze(1) * cost, 0) + eps)
        error = torch.mean(torch.abs(d1_old - d1))
        d1_old = d1
    return d1 * cost * d0.unsqueeze(1)


def get_capacity(
    num_tokens: int, num_experts: int, capacity_factor: float, min_capacity: Optional[int] = None
) -> int:
    """
    Calculate the capacity of each expert.

    Args:
        num_tokens (int): num of the input tokens.
        num_experts (int): num of the experts.
        capacity_factor (float): Capacity factor.
        min_capacity (int, optional): Minimum capacity. Defaults to None.

    Returns:
        int: Capacity of each expert.
    """
    capacity = math.ceil((num_tokens / num_experts) * capacity_factor)
    if min_capacity is not None and capacity < min_capacity:
        capacity = min_capacity
    return capacity


def get_tokens_per_expert_and_token_count(
    routing_map: torch.Tensor,
    reduce_group: torch.distributed.ProcessGroup,
    topk: int = None,
    with_padding_mask: bool = False,
) -> torch.Tensor:
    """
    Compute global_tokens_per_expert, local_num_tokens and total_num_tokens with padding mask.
    """
    local_tokens_per_expert = routing_map.sum(dim=0)
    global_tokens_per_expert = reduce_from_tensor_model_parallel_region(
        local_tokens_per_expert, reduce_group
    )
    if with_padding_mask:
        local_num_tokens = local_tokens_per_expert.sum() / topk
        total_num_tokens = global_tokens_per_expert.sum() / topk
    else:
        local_num_tokens = routing_map.shape[0]
        total_num_tokens = local_num_tokens * reduce_group.size()
    return global_tokens_per_expert, local_num_tokens, total_num_tokens


class MoEAuxLossAutoScaler(torch.autograd.Function):
    """An AutoScaler that triggers the backward pass and scales the grad for auxiliary loss."""

    main_loss_backward_scale: Optional[torch.Tensor] = None

    @staticmethod
    def forward(ctx, output: torch.Tensor, aux_loss: torch.Tensor) -> torch.Tensor:
        """Preserve the aux_loss by storing it in the context to avoid garbage collection.

        Args:
            output (torch.Tensor): The output tensor.
            aux_loss (torch.Tensor): The auxiliary loss tensor.

        Returns:
            torch.Tensor: The output tensor.
        """
        ctx.save_for_backward(aux_loss)
        return output

    @staticmethod
    def backward(ctx, grad_output: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """Compute and scale the gradient for auxiliary loss..

        Args:
            grad_output (torch.Tensor): The gradient of the output.

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: The gradient of the output, scaled auxiliary loss
                                               gradient.
        """
        (aux_loss,) = ctx.saved_tensors
        if MoEAuxLossAutoScaler.main_loss_backward_scale is None:
            MoEAuxLossAutoScaler.main_loss_backward_scale = torch.tensor(
                1.0, device=aux_loss.device
            )
        aux_loss_backward_scale = MoEAuxLossAutoScaler.main_loss_backward_scale
        scaled_aux_loss_grad = torch.ones_like(aux_loss) * aux_loss_backward_scale
        return grad_output, scaled_aux_loss_grad

    @staticmethod
    def set_loss_scale(scale: torch.Tensor) -> None:
        """set the scale of the aux loss.

        Args:
            scale (torch.Tensor): The scale value to set. Please ensure that the scale passed in
                                  matches the scale of the main_loss.
        """
        if MoEAuxLossAutoScaler.main_loss_backward_scale is None:
            MoEAuxLossAutoScaler.main_loss_backward_scale = scale
        else:
            MoEAuxLossAutoScaler.main_loss_backward_scale.copy_(scale)


def permute(
    tokens: torch.Tensor,
    routing_map: torch.Tensor,
    probs: Optional[torch.Tensor] = None,
    num_out_tokens: Optional[int] = None,
    fused: bool = False,
    drop_and_pad: bool = False,
    tokens_per_expert: Optional[torch.Tensor] = None,
    align_size: int = -1,
) -> Tuple[
    torch.Tensor,
    Optional[torch.Tensor],
    torch.Tensor,
    Optional[torch.Tensor],
    Optional[torch.Tensor],
]:
    """Permute the tokens and probs based on the mask.
    Tokens with the same designated expert will be grouped together.
    The shape of mask is [tokens, num_experts], it indicates which experts were selected
    by each token.

    When drop_and_pad=True, in routing_map, the number of non-zeros in each column equals to
    expert capacity. This function exploits this feature to use ops that support cuda graph.

    If the fused permute and pad kernel is available, it will pad the tokens to the align_size
    and return the padded permuted tokens, pad_offsets and padded tokens per expert.

    Args:
        tokens (torch.Tensor): The input token tensor, [num_tokens, hidden].
        routing_map (torch.Tensor): The sparse token to expert mapping, [num_tokens, num_experts].
        probs (torch.Tensor, optional): The probs tensor, [num_tokens, num_experts].
        num_out_tokens (int, optional): The number of output tokens. If None, it's set to
                                        the number of input tokens.
        fused (bool, optional): Whether use the fused permute function.
        drop_and_pad (bool, optional): Whether or not the token dispatcher uses token-drop
                                       and pads the number of tokens to the expert capacity.
                                       If set to true, routing_map has a fixed number of non-zeros
                                       in each column.
        tokens_per_expert (torch.Tensor, optional): Tensor of shape `[num_experts]` containing
                                                    actual token counts per expert.
        align_size (int, optional): The alignment size for the input tensor for fp8 or fp4.

    Returns:
        Tuple[
            torch.Tensor,
            Optional[torch.Tensor],
            torch.Tensor,
            Optional[torch.Tensor],
            Optional[torch.Tensor],
        ]:
            The permuted tokens, (optional) permuted probs, sorted indices,
            (optional) pad_offsets, (optional) padded_tokens_per_expert.
    """
    if fused and probs is None:
        if not HAVE_TE or fused_permute is None:
            raise ValueError("fused_permute is not available. Please install TE >= 2.1.0.")
        permuted_input, sorted_indices = fused_permute(
            tokens, routing_map, num_out_tokens=num_out_tokens
        )
        return permuted_input, None, sorted_indices, None, tokens_per_expert

    if fused and probs is not None:
        if not HAVE_TE or (
            fused_permute_and_pad_with_probs is None and fused_permute_with_probs is None
        ):
            raise ValueError(
                "Transformer Engine (TE) fused kernel is not available. "
                "fused_permute_with_probs typically requires TE >= 2.1.0, and "
                "fused_permute_and_pad_with_probs` typically requires TE >= 2.12.0. "
            )
        if fused_permute_and_pad_with_probs is not None and tokens_per_expert is not None:
            return fused_permute_and_pad_with_probs(
                tokens, probs, routing_map, tokens_per_expert, align_size
            )
        else:
            output, permuted_probs, row_id_map = fused_permute_with_probs(
                tokens, probs, routing_map, num_out_tokens=num_out_tokens
            )
            return output, permuted_probs, row_id_map, None, tokens_per_expert

    num_tokens, hidden = tokens.shape
    num_experts = routing_map.shape[1]
    permuted_probs = None
    if drop_and_pad and not (num_out_tokens is None):
        capacity = num_out_tokens // num_experts
        assert not routing_map.requires_grad
        # mask [num_tokens, num_experts] -> [num_experts, num_tokens]
        routing_map = routing_map.to(dtype=torch.int8).T.contiguous()
        # use argsort to put indices of all non-zeros in the beginning of list
        # and keep the first `capacity` number of indices
        sorted_indices = routing_map.argsort(dim=-1, descending=True, stable=True)[
            :, :capacity
        ].contiguous()
        # flatten from [num_experts, capacity] to 1D
        sorted_indices = sorted_indices.view(-1)

        if probs is not None:
            # [num_tokens, num_experts] -> num_experts * num_tokens
            probs_T_1D = probs.T.contiguous().view(-1)
            # get 1D indices of the probs selected by routing_map
            indices_dim0 = torch.arange(num_experts, device=routing_map.device).unsqueeze(-1)
            indices_dim1 = sorted_indices.view(num_experts, capacity)
            indices_1D = (indices_dim0 * num_tokens + indices_dim1).view(-1)
            # get probs from indices
            permuted_probs = probs_T_1D.index_select(0, indices_1D)
    else:
        assert (
            num_out_tokens is not None
        ), "num_out_tokens is required for the argsort-based permute"

        # mask [num_tokens, num_experts] -> [num_experts, num_tokens]
        routing_map = routing_map.bool().T.contiguous()

        # Use argsort to get indices of non-zero entries in row-major order.
        # This is equivalent to masked_select but produces fixed-shape output,
        # making it compatible with CUDA graph capture.
        flat_sorted = routing_map.reshape(-1).argsort(descending=True, stable=True)
        flat_sorted = flat_sorted[:num_out_tokens]
        sorted_indices = flat_sorted % num_tokens

        if probs is not None:
            permuted_probs = probs.T.contiguous().reshape(-1)[flat_sorted]

    # use the mapping to permute the tokens
    permuted_input = tokens.index_select(0, sorted_indices)

    return permuted_input, permuted_probs, sorted_indices, None, tokens_per_expert


def unpermute(
    permuted_tokens: torch.Tensor,
    sorted_indices: torch.Tensor,
    restore_shape: torch.Size,
    probs: Optional[torch.Tensor] = None,
    routing_map: Optional[torch.Tensor] = None,
    fused: bool = False,
    drop_and_pad: bool = False,
    pad_offsets: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    """
    Restore the original order of tokens after permutation. If probs are provided, it
    will also apply them to the tokens before restoring the order.

    When drop_and_pad=True, the tensors will have the following properties:
      - In routing_map, the number of non-zeros in each column equals to expert capacity
      - The size of sorted_indices equals to num_experts * capacity, each split of `capacity`
        contains the indices of tokens routed to an expert.
    This function exploits these features to use ops that support cuda graph.

    Args:
        permuted_tokens (torch.Tensor): The permuted token tensor.
        sorted_indices (torch.Tensor): The indices used to sort the tokens.
        restore_shape (torch.Size): The shape of the unpermuted tensor.
        probs (torch.Tensor, optional): The unpermuted probs tensor,
        routing_map (torch.Tensor, optional): Token to expert mapping, shape
            [num_tokens, num_experts].
        fused (bool, optional): Whether use the fused unpermute function.
        drop_and_pad (bool, optional): Whether or not the token dispatcher uses token-drop
                                       and pads the number of tokens to the expert capacity.
        pad_offsets (torch.Tensor, optional):
            Tensor of per-expert cumulative padding offsets used to remove padding added
            during permutation. This is the fourth output of `moe_permute_and_pad_with_probs`
            and is required when unpermuting padded outputs. Defaults to None.

    Returns:
        torch.Tensor: The tokens restored to their original order.
    """
    if fused:
        if not HAVE_TE or fused_unpermute is None:
            raise ValueError("fused_unpermute is not available. Please install TE >= 2.1.0.")
        extra_kwargs = {}
        if is_te_min_version("2.12.0"):
            extra_kwargs["pad_offsets"] = pad_offsets
        return fused_unpermute(
            permuted_tokens,
            sorted_indices,
            merging_probs=probs,
            restore_shape=restore_shape,
            **extra_kwargs,
        )

    _, hidden = restore_shape
    input_dtype = permuted_tokens.dtype

    if probs is not None:
        assert routing_map is not None, "Mask must be provided to permute the probs."
        if drop_and_pad:
            num_experts = routing_map.size(1)
            num_permuted_tokens = sorted_indices.size(0)
            capacity = num_permuted_tokens // num_experts
            num_unpermuted_tokens = probs.size(0)

            # [num_unpermuted_tokens, num_experts] -> num_experts * num_unpermuted_tokens
            probs_T_1D = probs.T.contiguous().view(-1)

            # get 1D indices of the probs selected by routing_map
            indices_dim0 = torch.arange(num_experts, device=routing_map.device).unsqueeze(-1)
            indices_dim1 = sorted_indices.view(num_experts, capacity)
            indices_1D = (indices_dim0 * num_unpermuted_tokens + indices_dim1).view(-1)

            # get probs from indices
            permuted_probs = probs_T_1D.index_select(0, indices_1D)
        else:
            permuted_probs = probs.T.contiguous().masked_select(routing_map.T.contiguous())
        # Here may promote permuted_tokens to higher precision (fp32/fp64) if probs is in
        # higher precision due to moe_router_dtype being enabled. This can lead to
        # additional GPU memory usage. Use --moe-permute-fusion flag to avoid this extra memory
        # allocation.
        permuted_tokens = permuted_tokens * permuted_probs.unsqueeze(-1)

    # Create an output tensor filled with zeros
    output_tokens = torch.zeros(
        restore_shape, dtype=permuted_tokens.dtype, device=permuted_tokens.device
    )
    if torch.are_deterministic_algorithms_enabled():
        # Use index_add which is deterministic when deterministic algorithms are enabled
        # and is CUDA graph compatible
        output_tokens = torch.zeros(
            restore_shape, dtype=permuted_tokens.dtype, device=permuted_tokens.device
        )
        # index_add is deterministic when torch.use_deterministic_algorithms(True) is set
        # and is CUDA graph compatible unlike scatter_add
        output_tokens.index_add_(0, sorted_indices, permuted_tokens)
    else:
        # Scatter add the permuted_input back to the original positions
        output_tokens.scatter_add_(
            0, sorted_indices.unsqueeze(1).expand(-1, hidden), permuted_tokens
        )
    return output_tokens.to(dtype=input_dtype)


def sort_chunks_by_idxs(
    input: torch.Tensor,
    split_sizes: torch.Tensor,
    sorted_idxs: torch.Tensor,
    probs: Optional[torch.Tensor] = None,
    fused: bool = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
    """Split and sort the input tensor based on the split_sizes and sorted indices.

    Args:
        input (torch.Tensor): The input tensor.
        split_sizes (torch.Tensor): The split sizes.
        sorted_idxs (torch.Tensor): The sorted indices.
        probs (torch.Tensor, optional): The probs tensor. Defaults to None.
        fused (bool, optional): Whether to use the fused version of the sort_chunks_by_idxs
                                function. Defaults to False.

    Returns:
        Tuple[torch.Tensor, Optional[torch.Tensor]]: The sorted output tensor and permuted probs.
    """
    if fused and probs is None:
        if not HAVE_TE or fused_sort_chunks_by_index is None:
            raise ValueError(
                "fused_sort_chunks_by_index is not available. Please install TE >= 2.1.0."
            )
        return fused_sort_chunks_by_index(input, split_sizes, sorted_idxs), None

    if fused and probs is not None:
        if not HAVE_TE or fused_sort_chunks_by_index_with_probs is None:
            raise ValueError(
                "fused_sort_chunks_by_index_with_probs is not available. "
                "Please install TE >= 2.1.0."
            )
        return fused_sort_chunks_by_index_with_probs(input, probs, split_sizes, sorted_idxs)

    input = torch.split(input, split_sizes.tolist(), dim=0)
    output = torch.cat([input[i] for i in sorted_idxs.tolist()], dim=0)
    if probs is not None:
        probs = torch.split(probs, split_sizes.tolist(), dim=0)
        permuted_probs = torch.cat([probs[i] for i in sorted_idxs.tolist()], dim=0)
    else:
        permuted_probs = None
    return output, permuted_probs


def group_limited_topk(
    scores: torch.Tensor,
    topk: int,
    num_tokens: int,
    num_experts: int,
    num_groups: int,
    group_topk: int,
) -> Tuple[torch.Tensor, torch.Tensor]:
    """Perform top-k routing on a subset of expert groups.

    When using group-limited routing:
    1. Experts are divided into 'moe_router_num_groups' equal-sized groups
    2. For each token, 'moe_router_group_topk' groups are selected based on routing scores
       (specifically, the sum of top-2 expert scores within each group)
    3. From these selected groups, 'moe_router_topk' individual experts are chosen

    Two common use cases:
    - Device-limited routing: Set 'moe_router_num_groups' equal to expert parallel size (EP)
      to limit each token to experts on a subset of devices
      (See DeepSeek-V2: https://arxiv.org/pdf/2405.04434)

    - Node-limited routing: Set 'moe_router_num_groups' equal to number of nodes in EP group
      to limit each token to experts on a subset of nodes
      (See DeepSeek-V3: https://arxiv.org/pdf/2412.19437)

    Args:
        scores (torch.Tensor): Softmax scores generated by the router.
        topk (int): The number of experts to select for each token.
        num_tokens (int): The number of tokens.
        num_experts (int): The number of experts.
        num_groups (int): Number of groups for routed experts.
        group_topk (int): Number of groups selected for each token.

    Returns:
        Tuple[torch.Tensor, torch.Tensor]: Probs and indices tensor.
    """
    # Organize the experts into groups
    # Select groups based on sum of top-(topk/group_topk) routing scores within each group
    group_scores = (
        scores.view(num_tokens, num_groups, -1).topk(topk // group_topk, dim=-1)[0].sum(dim=-1)
    )
    group_idx = torch.topk(group_scores, k=group_topk, dim=-1, sorted=False)[1]
    group_mask = torch.zeros_like(group_scores)
    group_mask.scatter_(1, group_idx, 1)

    # Mask the experts based on selection groups
    score_mask = (
        group_mask.unsqueeze(-1)
        .expand(num_tokens, num_groups, num_experts // num_groups)
        .reshape(num_tokens, -1)
    )

    masked_scores = scores.masked_fill(~score_mask.bool(), float('-inf'))
    probs, top_indices = torch.topk(masked_scores, k=topk, dim=-1)

    return probs, top_indices


def pad_routing_map(routing_map: torch.Tensor, pad_multiple: int) -> torch.Tensor:
    """Pad the routing map to ensure each expert has a multiple of pad_multiple tokens.

    This function ensures that each expert has a number of tokens that is a multiple of
    pad_multiple by converting some 0s to 1s in the routing map. The padding is done by
    selecting the first N zero elements in each row, where N is the number needed to reach
    the next multiple of pad_multiple.

    Args:
        routing_map (torch.Tensor): A boolean or integer tensor of shape [num_tokens,
            num_experts] indicating which tokens are routed to which experts.
        pad_multiple (int): The multiple to pad each expert's token count to.

    Returns:
        torch.Tensor: The padded routing map of shape [num_tokens, num_experts].
    """
    # Transpose to [num_experts, num_tokens] for easier row-wise operations
    routing_map = routing_map.transpose(0, 1)  # [num_experts, num_tokens]

    # Calculate how many tokens need to be padded for each expert
    num_ones = routing_map.sum(dim=1)
    num_to_pad = (-num_ones) % pad_multiple

    # Find the positions of zeros in each row and their ranks
    is_zero = routing_map == 0
    zero_ranks = torch.cumsum(is_zero.int(), dim=1)

    # Create mask for elements that need to be padded (converted from 0 to 1)
    mask = zero_ranks <= num_to_pad.unsqueeze(1)
    routing_map[mask] = 1

    routing_map = routing_map.transpose(0, 1)
    return routing_map


def topk_routing_with_score_function(
    logits: torch.Tensor,
    topk: int,
    use_pre_softmax: bool = False,
    num_groups: Optional[int] = None,
    group_topk: Optional[int] = None,
    scaling_factor: Optional[float] = None,
    score_function: str = "softmax",
    expert_bias: Optional[torch.Tensor] = None,
    fused: bool = False,
    router_replay: Optional['RouterReplay'] = None,
    dense_output: bool = False,
) -> Tuple[torch.Tensor, torch.Tensor]:
    """Compute the routing probabilities and map for top-k selection with score function.

    Args:
        logits (torch.Tensor): Logits tensor.
        topk (int): The number of experts to select for each token.
        use_pre_softmax (bool, optional): Whether to apply softmax or sigmoid before top-k
                                          selection. Defaults to False.
        num_groups (int, optional): Number of groups for routed experts. Defaults to None.
        group_topk (int, optional): Number of selected groups for each token. Defaults to None.
        scaling_factor (float, optional): Scaling factor of routing score in top-k selection.
                                         Defaults to None.
        score_function (str, optional): The score function to use. Can be either "softmax" or
                                        "sigmoid". Defaults to "softmax".
        expert_bias (torch.Tensor, optional): The bias added to logits for expert routing.
                                              Defaults to None.
        fused (bool, optional): Whether to use the fused version. Defaults to False.
        router_replay (Optional['RouterReplay']): For debugging and development, allows for
                                             deterministic routing by replaying a previously
                                             recorded routing sequence.

                                              Defaults to None.
        dense_output (bool, optional): If True, return dense tensors [num_tokens, topk] instead of
                                       sparse tensors [num_tokens, num_experts]. Defaults to False.

    Returns:
        Tuple[torch.Tensor, torch.Tensor]:
            When dense_output=False (default):
                - routing_probs (torch.Tensor): Shape [num_tokens, num_experts]. Sparse tensor
                  containing the normalized routing probability for each token-expert pair. Non-zero
                  entries correspond to the top-k selected experts per token.
                - routing_map (torch.Tensor): Shape [num_tokens, num_experts]. Boolean mask where
                  True indicates the token is routed to that expert (i.e. the expert was in the
                  token's top-k selection).
            When dense_output=True:
                - probs (torch.Tensor): Shape [num_tokens, topk]. The normalized routing
                  probabilities for each token's top-k selected experts.
                - top_indices (torch.Tensor): Shape [num_tokens, topk]. The expert indices
                  selected for each token.
    """
    assert logits.dim() == 2, f"Expected 2D logits [num_tokens, num_experts], got {logits.dim()}."
    num_tokens, num_experts = logits.shape
    if fused:
        if not HAVE_TE or fused_topk_with_score_function is None:
            raise ValueError(
                "fused_topk_with_score_function is not available. Please install TE >= 2.6.0."
            )
        return fused_topk_with_score_function(
            logits=logits,
            topk=topk,
            use_pre_softmax=use_pre_softmax,
            num_groups=num_groups,
            group_topk=group_topk,
            scaling_factor=scaling_factor,
            score_function=score_function,
            expert_bias=expert_bias,
        )

    def _compute_topk(
        scores: torch.Tensor,
        topk: int,
        num_groups: Optional[int] = None,
        group_topk: Optional[int] = None,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """Compute the top-k indices for the given scores.

        Args:
            scores (torch.Tensor): The scores tensor.
            topk (int): The number of top-k indices to compute.
            num_groups (int, optional): The number of groups to compute the top-k indices for.
                                        Defaults to None.
            group_topk (int, optional): The number of top-k indices to compute for each group.
                                        Defaults to None.

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: The top-k indices and the top-k scores.
        """
        if group_topk:
            return group_limited_topk(
                scores=scores,
                topk=topk,
                num_tokens=num_tokens,
                num_experts=num_experts,
                num_groups=num_groups,
                group_topk=group_topk,
            )
        else:
            return torch.topk(scores, k=topk, dim=1)

    def compute_topk(scores, topk, num_groups=None, group_topk=None):
        # Default behavior if no replay is active

        if router_replay is None:
            return _compute_topk(scores, topk, num_groups=num_groups, group_topk=group_topk)
        else:
            return router_replay.get_replay_topk(
                scores, topk, num_groups, group_topk, _compute_topk
            )

    if score_function == "softmax":
        if use_pre_softmax:
            scores = torch.softmax(logits, dim=-1, dtype=torch.float32).type_as(logits)
            probs, top_indices = compute_topk(scores, topk, num_groups, group_topk)
        else:
            scores, top_indices = compute_topk(logits, topk, num_groups, group_topk)
            probs = torch.softmax(scores, dim=-1, dtype=torch.float32).type_as(logits)
    elif score_function == "sigmoid":
        scores = torch.sigmoid(logits.float()).type_as(logits)
        if expert_bias is not None:
            scores_for_routing = scores + expert_bias
            _, top_indices = compute_topk(scores_for_routing, topk, num_groups, group_topk)
            scores = torch.gather(scores, dim=1, index=top_indices).type_as(logits)
        else:
            scores, top_indices = compute_topk(scores, topk, num_groups, group_topk)
        probs = scores / (scores.sum(dim=-1, keepdim=True) + 1e-20) if topk > 1 else scores
    else:
        raise ValueError(f"Invalid score_function: {score_function}")

    if scaling_factor:
        probs = probs * scaling_factor

    if dense_output:
        return probs, top_indices

    if torch.are_deterministic_algorithms_enabled():
        # build [num_tokens, num_experts] from [num_tokens, topk]
        routing_probs = torch.zeros_like(logits)
        rows = torch.arange(num_tokens, device=logits.device).unsqueeze(1)
        routing_probs.index_put_((rows, top_indices), probs, accumulate=False)

        routing_map = torch.zeros_like(logits, dtype=logits.dtype)
        routing_map.index_put_(
            (rows, top_indices), torch.ones_like(probs, dtype=routing_map.dtype), accumulate=False
        )
        routing_map = routing_map.bool()
    else:
        # TODO Try using element-wise operations instead of scatter?
        routing_probs = torch.zeros_like(logits).scatter(1, top_indices, probs)
        routing_map = torch.zeros_like(logits).int().scatter(1, top_indices, 1).bool()

    return routing_probs, routing_map


def compute_routing_scores_for_aux_loss(
    logits: torch.Tensor,
    topk: int,
    score_function: str,
    fused: bool = False,
    padding_mask: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, torch.Tensor]:
    """Compute routing scores based on the score function.

    Args:
        logits (torch.Tensor): The logits tensor after gating, shape: [num_tokens, num_experts].
        topk (int): The number of top-k indices to compute.
        score_function (str): The score function to use. Can be either "softmax" or "sigmoid".
        fused (bool, optional): Whether to use the fused version. Defaults to False.
        padding_mask (torch.Tensor, optional): Boolean mask indicating non-padding tokens.
                                               Shape in [num_tokens]. True for valid tokens,
                                               False for padding tokens. Defaults to None.

    Returns:
        Tuple[torch.Tensor, torch.Tensor]: The routing map and the normalized routing scores.
    """
    if fused:
        if not HAVE_TE or fused_compute_score_for_moe_aux_loss is None:
            raise ValueError(
                "fused_compute_score_for_moe_aux_loss is not available. Please install TE >= 2.6.0."
            )
        routing_map, scores = fused_compute_score_for_moe_aux_loss(
            logits=logits, topk=topk, score_function=score_function
        )
    else:
        if score_function == "softmax":
            scores = torch.softmax(logits, dim=-1, dtype=torch.float32)
        elif score_function == "sigmoid":
            # Cast logits to float32 before sigmoid for stability
            scores = torch.sigmoid(logits.to(torch.float32))
            scores = scores / (scores.sum(dim=-1, keepdim=True) + 1e-20)
        else:
            raise ValueError(f"Invalid score_function: {score_function}")

        _, top_indices = torch.topk(scores, k=topk, dim=1)
        routing_map = torch.zeros_like(logits).int().scatter(1, top_indices, 1).bool()

    # Apply padding mask to scores if provided
    if padding_mask is not None:
        # Invert padding_mask and make True indicates valid tokens
        valid_mask = (~padding_mask).unsqueeze(-1)
        routing_map = routing_map * valid_mask
        scores = scores * valid_mask
    return routing_map, scores


def apply_router_token_dropping(
    routing_probs: torch.Tensor,
    routing_map: torch.Tensor,
    router_topk: int,
    capacity_factor: float,
    drop_policy: str = "probs",
    pad_to_capacity: bool = False,
) -> Tuple[torch.Tensor, torch.Tensor]:
    """Apply token dropping to top-k expert selection.

    This function enforces expert capacity limits by dropping tokens that exceed
    the capacity and optionally padding to capacity.

    Args:
        routing_probs (torch.Tensor): Tensor of shape [num_tokens, num_experts]
            containing the routing probabilities for selected experts.
        routing_map (torch.Tensor): Boolean tensor of shape [num_tokens, num_experts]
            indicating which experts were selected for each token.
        router_topk (int): Number of experts selected per token.
        capacity_factor (float): The capacity factor of each expert.
        drop_policy (str, optional): Policy to drop tokens - "probs" or "position".
                                     Defaults to "probs".
        pad_to_capacity (bool, optional): Whether to pad to capacity. Defaults to False.

    Returns:
        Tuple[torch.Tensor, torch.Tensor]:
            - final_probs: Routing probabilities after applying capacity constraints
            - final_map: Boolean mask after applying capacity constraints
    """
    assert routing_probs.ndim == 2 and routing_map.ndim == 2
    num_tokens, num_experts = routing_probs.shape
    # Calculate expert capacity
    expert_capacity = get_capacity(
        num_tokens=num_tokens * router_topk,
        num_experts=num_experts,
        capacity_factor=capacity_factor,
    )

    # Create capacity mask based on drop policy
    if expert_capacity > num_tokens:
        # No need to drop tokens if capacity exceeds the number of tokens
        capacity_mask = torch.ones_like(routing_probs).bool()
    else:
        if drop_policy == "probs":
            _, capacity_indices = torch.topk(routing_probs, k=expert_capacity, dim=0, sorted=False)
            capacity_mask = torch.zeros_like(routing_probs).scatter(0, capacity_indices, 1).bool()
        elif drop_policy == "position":
            _, capacity_indices = torch.topk(
                routing_map.int(), k=expert_capacity, dim=0, sorted=False
            )
            capacity_mask = torch.zeros_like(routing_probs).scatter(0, capacity_indices, 1).bool()
        else:
            raise ValueError(f"Invalid drop_policy: {drop_policy}")

    # Apply capacity constraints
    if pad_to_capacity:
        final_map = capacity_mask
        final_probs = routing_probs * final_map
    else:
        # Get exceed mask and maskout exceeded probs and indices
        final_map = torch.logical_and(routing_map, capacity_mask)
        final_probs = routing_probs * final_map

    return final_probs, final_map


def save_to_aux_losses_tracker(
    name: str,
    loss: torch.Tensor,
    layer_number: int,
    num_layers: int,
    reduce_group: Optional[torch.distributed.ProcessGroup] = None,
    avg_group: Optional[torch.distributed.ProcessGroup] = None,
    reduce_group_has_dp: bool = False,
) -> None:
    """Save the auxiliary loss for logging.
    Args:
        name (str): The name of the loss.
        loss (torch.Tensor): The loss tensor.
        layer_number (int): Layer index of the loss.
        num_layers (int): The number of total layers.
        reduce_group (torch.distributed.ProcessGroup, optional): The group for reducing the loss.
                                                                 Defaults to None.
        avg_group (torch.distributed.ProcessGroup, optional): The group for averaging the loss.
                                                              Defaults to None.
        reduce_group_has_dp (bool, optional): Whether the reduce group has data parallel ranks.
            Set this to True if the reduce group has data parallel ranks. This flag is used to
            ensure the correct reduction in aux loss tracking. Defaults to False.
    """
    # Skip aux loss logging if layer_number is None.
    if layer_number is None:
        return

    tracker = get_moe_layer_wise_logging_tracker()
    if name not in tracker:
        tracker[name] = {}
        tracker[name]["values"] = torch.zeros(num_layers, device=loss.device)
    tracker[name]["values"][layer_number - 1] += loss.detach()  # Aggregate the loss for the layer.
    tracker[name]["reduce_group"] = reduce_group
    tracker[name]["avg_group"] = avg_group
    tracker[name]["reduce_group_has_dp"] = reduce_group_has_dp


def clear_aux_losses_tracker() -> None:
    """Clear the auxiliary losses."""
    tracker = get_moe_layer_wise_logging_tracker()
    for name in tracker:
        tracker[name]["values"].zero_()


def reduce_aux_losses_tracker_across_ranks(
    track_names: Optional[List[str]] = None, pg_collection: Optional[ProcessGroupCollection] = None
) -> None:
    """Collect and reduce the auxiliary losses across ranks.

    Args:
        track_names (Optional[List[str]], optional):
            The names of the losses to track. Defaults to None.
        pg_collection (Optional[ProcessGroupCollection], optional):
            The process group collection. Defaults to None.
    """
    tracker = get_moe_layer_wise_logging_tracker()
    if track_names is None:
        track_names = tracker.keys()

    if pg_collection is None:
        # Use parallel_state groups
        pp_group = parallel_state.get_pipeline_model_parallel_group()
        dp_group = parallel_state.get_data_parallel_group(
            with_context_parallel=False, partial_data_parallel=False
        )
    else:
        pp_group = pg_collection.pp
        dp_group = pg_collection.dp

    for name in track_names:
        values = tracker[name]["values"]
        # TODO(Hepteract): delete the usage of the global parallel_state.
        # Collect aux losses across PP.
        torch.distributed.all_reduce(values, group=pp_group)
        # Reduce aux losses across ranks.
        if tracker[name].get('reduce_group') is not None:
            torch.distributed.all_reduce(values, group=tracker[name].get('reduce_group'))
            # Need to conduct reduction across data parallel ranks. When the reduce_group
            # does not have 'dp' attribute, do it manually.
            if not tracker[name].get('reduce_group_has_dp', False):
                torch.distributed.all_reduce(
                    values, group=dp_group, op=torch.distributed.ReduceOp.AVG
                )
        if tracker[name].get('avg_group') is not None:
            torch.distributed.all_reduce(
                values, group=tracker[name]['avg_group'], op=torch.distributed.ReduceOp.AVG
            )


def track_moe_metrics(
    loss_scale: float,
    iteration: int,
    writer: Optional["SummaryWriter"] = None,
    wandb_writer: Optional["wandb.Run"] = None,
    total_loss_dict: Optional[dict[str, torch.Tensor]] = None,
    per_layer_logging: bool = False,
    force_initialize: bool = False,
    track_names: Optional[List[str]] = None,
    num_layers: Optional[int] = None,
    moe_layer_freq: Optional[Union[int, List[int]]] = None,
    mtp_num_layers: Optional[int] = None,
    pg_collection: Optional[ProcessGroupCollection] = None,
) -> None:
    """Track the MoE metrics for logging.

    Args:
        loss_scale (float): The loss scale.
        iteration (int): The iteration.
        writer (SummaryWriter, optional): The tensorboard writer. Defaults to None.
        wandb_writer (wandb.Run, optional): The wandb writer. Defaults to None.
        total_loss_dict (dict[str, torch.Tensor], optional): The total loss dictionary.
                                                             Defaults to None.
        per_layer_logging (bool, optional): Whether to log per layer. Defaults to False.
        force_initialize (bool, optional): Whether to force initialize the tracker.
                                           Defaults to False.
        track_names (List[str], optional): The names of the losses to track. Defaults to None.
        num_layers (int, optional): The number of layers. Defaults to None.
        moe_layer_freq (Union[int, List[int]], optional): The frequency of the MoE layers.
                                                          Defaults to None.
        mtp_num_layers (int, optional): The number of layers in the model parallel group.
                                        Defaults to None.
        pg_collection (ProcessGroupCollection, optional): The process group collection.
                                                          Defaults to None.
    """
    # Aux loss logging
    tracker = get_moe_layer_wise_logging_tracker()
    # Initialize the tracker if force_initialize is True.
    # The values tensor size must match what the router creates in save_to_aux_losses_tracker,
    # which uses (num_layers + mtp_num_layers). This is important for PP ranks that have no
    # MoE layers (so the tracker is empty and force_initialize creates the entry); their tensor
    # size must match ranks that do have MoE layers, otherwise all_reduce across PP will hang.
    tracker_num_layers = num_layers
    if mtp_num_layers is not None:
        tracker_num_layers += mtp_num_layers
    if force_initialize:
        if track_names is not None:
            for key in track_names:
                if key not in tracker:
                    tracker[key] = {}
                    tracker[key]["values"] = torch.zeros(tracker_num_layers, device="cuda")
                    tracker[key]["reduce_group"] = None
                    tracker[key]["avg_group"] = None
                    tracker[key]["reduce_group_has_dp"] = False
    reduce_aux_losses_tracker_across_ranks(track_names, pg_collection=pg_collection)

    # Get number of MoE layers
    if moe_layer_freq is None:
        num_moe_layers = num_layers
    elif isinstance(moe_layer_freq, int):
        assert isinstance(num_layers, int)
        moe_layer_pattern = [1 if (i % moe_layer_freq == 0) else 0 for i in range(num_layers)]
        num_moe_layers = sum(moe_layer_pattern)
    elif isinstance(moe_layer_freq, list):
        num_moe_layers = sum(moe_layer_freq)
    else:
        raise ValueError(f"Invalid moe_layer_freq: {moe_layer_freq}")

    if mtp_num_layers is not None:
        num_moe_layers += mtp_num_layers

    aux_losses = {k: v['values'].float() * loss_scale for k, v in tracker.items()}
    for name, loss_list in aux_losses.items():
        if total_loss_dict is not None:
            if name not in total_loss_dict:
                total_loss_dict[name] = loss_list.sum() / num_moe_layers
            else:
                total_loss_dict[name] += loss_list.sum() / num_moe_layers
        if writer is not None:
            # currently when using add_scalars,
            # torch.utils.add_scalars makes each timer its own run, which
            # polutes the runs list, so we just add each as a scalar
            writer.add_scalar(name, loss_list.sum() / num_moe_layers, iteration)
            if per_layer_logging:
                for i, loss in enumerate(loss_list.tolist()):
                    writer.add_scalar(f"moe/{name}_layer_{i}", loss, iteration)

            # W&B logging lacks support for logging multiple scalars simultaneously.
            # As a workaround, we log each scalar individually first, then we can create
            # a custom panel to manually group them to a single plot.
            if wandb_writer:
                wandb_writer.log({f"{name}": loss_list.sum() / num_moe_layers}, iteration)
                if per_layer_logging:
                    wandb_writer.log(
                        {
                            f"moe/{name}_layer_{i}": loss
                            for i, loss in enumerate(loss_list.tolist())
                        },
                        iteration,
                    )

    clear_aux_losses_tracker()


def get_updated_expert_bias(
    tokens_per_expert: torch.Tensor, expert_bias: torch.Tensor, expert_bias_update_rate: float
) -> torch.Tensor:
    """Update expert bias for biased expert routing. See https://arxiv.org/abs/2408.15664v1#

    Args:
        tokens_per_expert (torch.Tensor): The number of tokens assigned to each expert.
        expert_bias (torch.Tensor): The bias for each expert.
        expert_bias_udpate_rate (float): The update rate for the expert bias.

    Returns:
        torch.Tensor: The updated expert bias.
    """
    with torch.no_grad():
        # All Reduce Across TPxCPxDP group
        torch.distributed.all_reduce(
            tokens_per_expert,
            # TODO(Hepteract): delete the usage of the global parallel_state.
            group=parallel_state.get_tensor_and_data_parallel_group(with_context_parallel=True),
        )
        average_tokens = tokens_per_expert.sum(dim=-1, keepdim=True) / tokens_per_expert.shape[-1]
        offset = average_tokens - tokens_per_expert
        updated_expert_bias = expert_bias + torch.sign(offset) * expert_bias_update_rate
        return updated_expert_bias


def maybe_move_tensor_to_cpu(
    tensor: torch.Tensor, as_numpy: bool = False, record_stream: bool = False
) -> torch.Tensor:
    """Move a tensor to CPU if it is on GPU.
    Args:
        tensor (torch.Tensor): The tensor to move to CPU.
        as_numpy (bool, optional): Whether to convert the tensor to a numpy array.
                                   Defaults to False.
        record_stream (bool, optional): Whether to record the stream of the tensor, to prevent
                                        memory leak when the DtoH data transfer is on a side
                                        stream. Defaults to False.

    Returns:
        torch.Tensor: The tensor moved to CPU.
    """
    if torch.is_tensor(tensor) and tensor.is_cuda:
        cpu_tensor = tensor.to(torch.device("cpu"), non_blocking=True)
        if as_numpy:
            cpu_tensor = cpu_tensor.numpy()
        if record_stream:
            tensor.record_stream(torch.cuda.current_stream())
        tensor = cpu_tensor
    return tensor


def get_moe_layer_wise_logging_tracker() -> dict:
    """Return the moe layer wise tracker."""
    global _MOE_LAYER_WISE_LOGGING_TRACKER
    return _MOE_LAYER_WISE_LOGGING_TRACKER


@internal_api
class RandomSTE(torch.autograd.Function):
    """
    Straight-Through Estimator(STE) function that returns random values
    with different seed for each rank.

    This is used to generate random logits of router for load-balanced benchmark.
    """

    @staticmethod
    def forward(ctx, logits: torch.Tensor) -> torch.Tensor:
        """
        Forward pass returns random logits with rank-specific seed.

        Args:
            logits (torch.Tensor): The logits.

        Returns:
            torch.Tensor: The random logits.
        """
        with get_cuda_rng_tracker().fork(get_expert_parallel_rng_tracker_name()):
            random_logits = logits.clone().normal_()
        return random_logits

    @staticmethod
    def backward(ctx, grad_output: torch.Tensor) -> torch.Tensor:
        """
        Backward pass propagates the gradient for logits.

        Args:
            grad_output (torch.Tensor): The gradient output.

        Returns:
            torch.Tensor: The gradient input.
        """
        return grad_output


def apply_random_logits(logits: torch.Tensor) -> torch.Tensor:
    """
    Apply the RandomSTE function to the logits.

    Args:
        logits (torch.Tensor): The logits.

    Returns:
        torch.Tensor: The random logits.
    """
    return RandomSTE.apply(logits)


class RouterGatingLinearFunction(torch.autograd.Function):
    """
    Autograd function for router gating linear.
    """

    @staticmethod
    def forward(
        ctx,
        inp: torch.Tensor,
        weight: torch.Tensor,
        bias: Optional[torch.Tensor],
        router_dtype: torch.dtype,
    ) -> torch.Tensor:
        """
        Forward pass of the RouterGatingLinearFunction function.

        Args:
            inp (torch.Tensor): The input tensor.
            weight (torch.Tensor): The weight tensor.
            bias (torch.Tensor): The bias tensor. Could be None.
            router_dtype (torch.dtype): The router dtype.

        Returns:
            torch.Tensor: The output tensor.
        """
        ctx.save_for_backward(inp, weight, bias)
        ctx.router_dtype = router_dtype
        ctx.input_dtype = inp.dtype
        ctx.weight_dtype = weight.dtype
        inp_shape = inp.shape
        inp = inp.view(-1, inp_shape[-1])

        if te_general_gemm is not None and router_dtype != torch.float64:
            output = te_general_gemm(weight, inp, router_dtype, layout="TN", bias=bias)
            output = output[0]
        elif bias is None:
            output = torch.mm(inp.to(router_dtype), weight.to(router_dtype).t())
        else:
            output = torch.addmm(
                bias.to(router_dtype), inp.to(router_dtype), weight.to(router_dtype).t()
            )

        output = output.view(*inp_shape[:-1], -1)
        return output

    @staticmethod
    def backward(
        ctx, grad_output: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor], None]:
        """
        Backward pass of the RouterGatingLinearFunction function.

        Args:
            grad_output (torch.Tensor): The gradient output.

        Returns:
            Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor], None]:
                The gradient input, gradient weight, gradient bias, and None.
        """
        inp, weight, bias = ctx.saved_tensors
        inp_shape = inp.shape
        grad_shape = grad_output.shape
        inp = inp.view(-1, inp_shape[-1])
        grad_output = grad_output.view(-1, grad_shape[-1])

        if te_general_gemm is not None and ctx.router_dtype != torch.float64:
            grad_input = te_general_gemm(
                weight.to(ctx.router_dtype), grad_output, ctx.router_dtype, layout="NN", grad=True
            )
            grad_weight = te_general_gemm(
                inp.to(ctx.router_dtype), grad_output, ctx.router_dtype, layout="NT", grad=True
            )
            grad_input = grad_input[0].to(ctx.input_dtype)
            grad_weight = grad_weight[0].to(ctx.weight_dtype)
        else:
            grad_input = torch.mm(grad_output, weight.to(ctx.router_dtype)).to(ctx.input_dtype)
            grad_weight = torch.mm(grad_output.t(), inp.to(ctx.router_dtype)).to(ctx.weight_dtype)

        grad_bias = grad_output.sum(dim=0).to(ctx.weight_dtype) if bias is not None else None
        grad_input = grad_input.view(*inp_shape)
        return grad_input, grad_weight, grad_bias, None


def router_gating_linear(
    inp: torch.Tensor, weight: torch.Tensor, bias: Optional[torch.Tensor], router_dtype: torch.dtype
) -> torch.Tensor:
    """
    Customized linear layer for router gating.
    This linear layer accepts bfloat16 input and weight, and can return output with router_dtype.
    It can reduce the memory usage by avoiding saving the intermediate high precision tensors.

    Args:
        inp (torch.Tensor): The input tensor.
        weight (torch.Tensor): The weight tensor.
        bias (torch.Tensor): The bias tensor. Could be None.
        router_dtype (torch.dtype): The router dtype.

    Returns:
        torch.Tensor: The output tensor.
    """
    return RouterGatingLinearFunction.apply(inp, weight, bias, router_dtype)


def get_align_size_for_quantization(config: TransformerConfig) -> int:
    """Get the alignment size for quantization.

    Args:
        config (TransformerConfig): The configuration.

    Returns:
        int: The alignment size for quantization.
    """
    if config.fp8:
        return get_fp8_align_size(config.fp8_recipe)
    elif config.fp4:
        return get_fp4_align_size(config.fp4_recipe)
    return 16


# TODO(Hepteract): delete the usage of the global parallel_state.
# Initialize process groups with the global parallel_state.
def get_default_pg_collection() -> ProcessGroupCollection:
    """Get the default process groups for MoE.

    Returns:
        ProcessGroupCollection: The default process groups for MoE.
    """
    pg_collection = ProcessGroupCollection()
    pg_collection.ep = parallel_state.get_expert_model_parallel_group()
    pg_collection.tp = parallel_state.get_tensor_model_parallel_group()
    pg_collection.cp = parallel_state.get_context_parallel_group()
    pg_collection.expt_tp = parallel_state.get_expert_tensor_parallel_group()
    pg_collection.expt_dp = parallel_state.get_expert_data_parallel_group()
    pg_collection.tp_ep = parallel_state.get_expert_tensor_and_model_parallel_group()
    pg_collection.tp_cp = parallel_state.get_tensor_and_context_parallel_group()
    pg_collection.tp_dp_cp = parallel_state.get_tensor_and_data_parallel_group(
        with_context_parallel=True
    )
    return pg_collection


class MoECudaGraphPartialCaptureSignal(Exception):
    """
    Used to early-return from a MoE layer forward pass in CUDA graph capture.
    This signal is raised when we are partially capturing the CUDA graph of the MoE layer,
    and the related intermediate tensors are recorded in self.kwargs.
    Call self.get_early_return_outputs() to collect the CUDA graph outputs.
    """

    def __init__(self, moe_layer, return_step: str, **kwargs):
        self.moe_layer = moe_layer
        self.return_step = return_step
        self.kwargs = kwargs

    def get_early_return_outputs(
        self, hidden_states: torch.Tensor, shared_expert_output: torch.Tensor
    ) -> List[torch.Tensor]:
        """
        Get the CUDA graph early return outputs for the MoE layer, including the intermediate
        tensors and the intermediate attributes of the token dispatcher.

        The returned output tensors are in the order of:
        - routed experts path outputs
          - hidden states, probs, and routing map for capturing router
          - hidden states and probs for capturing router and preprocess
        - intermediate attributes of the token dispatcher (if capturing the preprocess step)
        - shared expert path output (if exists)
        """
        if self.return_step == "route":
            # Capturing the router step returns three intermediate tensors:
            # hidden states, routing probabilities, and routing map.
            outputs = [hidden_states, self.kwargs['probs'], self.kwargs['routing_map']]
        elif self.return_step == "preprocess":
            # Capturing the preprocess step returns two intermediate tensors:
            # hidden states and routing probabilities.
            # It also returns the intermediate attributes of the token dispatcher, recorded in
            # "token_dispatcher.cudagraph_attrs".
            outputs = [self.kwargs['hidden_states'], self.kwargs['probs']]
            valid_cudagraph_attrs = []
            for attr_name in self.moe_layer.token_dispatcher.cudagraph_attrs:
                hier_attr_name = attr_name.split('.')
                attr = self.moe_layer.token_dispatcher
                for name in hier_attr_name:
                    attr = getattr(attr, name, None)
                    if attr is None:
                        break
                if isinstance(attr, torch.Tensor):
                    outputs.append(attr)
                    valid_cudagraph_attrs.append(attr_name)
            if self.moe_layer.token_dispatcher.valid_cudagraph_attrs is None:
                self.moe_layer.token_dispatcher.valid_cudagraph_attrs = valid_cudagraph_attrs
            else:
                assert (
                    self.moe_layer.token_dispatcher.valid_cudagraph_attrs == valid_cudagraph_attrs
                ), (
                    "valid_cudagraph_attrs mismatch: "
                    f"{self.moe_layer.token_dispatcher.valid_cudagraph_attrs} != "
                    f"{valid_cudagraph_attrs}"
                )
        # Also return the shared expert output, if it is not None.
        if shared_expert_output is not None:
            outputs.append(shared_expert_output)
        return outputs


@internal_api
@dataclass
class MoECudaGraphTensorStore:
    """Storage for tensors used in CUDA graph replay for MoE layers.

    This dataclass stores intermediate tensors computed during CUDA graph replay
    that need to be resumed from the end of the CUDA graph scope to skip redundant computations.

    Attributes:
        hidden_states (Optional[torch.Tensor]): The hidden states output from the CUDA graph replay.
        probs (Optional[torch.Tensor]): The routing probabilities for each token-expert pair.
        routing_map (Optional[torch.Tensor]): The sparse mapping indicating which experts
            were selected for each token. Used to skip the normal router step.
        shared_expert_output (Optional[torch.Tensor]): The output from shared experts
            computation. Used to skip the normal shared expert computation step.
    """

    hidden_states: Optional[torch.Tensor] = None
    probs: Optional[torch.Tensor] = None
    routing_map: Optional[torch.Tensor] = None
    shared_expert_output: Optional[torch.Tensor] = None

    def is_empty(self) -> bool:
        """Check if the store has any non-None tensors.

        Returns:
            bool: True if all fields are None, False otherwise.
        """
        return all(
            getattr(self, field_name) is None
            for field_name in ['hidden_states', 'probs', 'routing_map', 'shared_expert_output']
        )

    def set(self, **kwargs):
        """Set the tensors in the store from keyword arguments."""
        for field_name, value in kwargs.items():
            assert field_name in [
                'hidden_states',
                'probs',
                'routing_map',
                'shared_expert_output',
            ], f"Invalid field name: {field_name}"
            if value is not None:
                assert isinstance(
                    value, torch.Tensor
                ), f"Value must be a torch.Tensor, got {type(value)} for field {field_name}"
                setattr(self, field_name, value)

    def clear(self):
        """Reset all stored tensors to None."""
        for field_name in ['hidden_states', 'probs', 'routing_map', 'shared_expert_output']:
            setattr(self, field_name, None)


def maybe_skip_or_early_return_by_cudagraph(step_condition):
    """
    Decorator to skip certain codepaths in the MoE layer forward pass in CUDA graph replay,
    or early return from the MoE layer forward pass in CUDA graph capture.

    Args:
        step_condition: The step condition to check. Can be "shared_experts_compute", "route",
        or "preprocess". If "shared_experts_compute", the shared experts computation will be
        skipped in replay if it is in the CUDA graph scope. If "route" or "preprocess", the
        router or preprocess will be skipped in replay if it is in the CUDA graph scope, or
        early return from the MoE layer forward pass if it is in CUDA graph capturing mode.

    Returns:
        A decorator function that wraps the MoE layer forward pass.
    """

    def maybe_raise_signal(moe_layer, **kwargs):
        """
        Check if the MoE layer should early return for CUDA graph capture.
        If so, raise a MoECudaGraphPartialCaptureSignal.
        """
        if (
            moe_layer.config.cuda_graph_impl == "transformer_engine"
            and moe_layer.training
            and is_graph_capturing()
        ):
            if (
                step_condition == "route"
                and CudaGraphScope.moe_router in moe_layer.config.cuda_graph_scope
                and CudaGraphScope.moe_preprocess not in moe_layer.config.cuda_graph_scope
            ):
                raise MoECudaGraphPartialCaptureSignal(moe_layer, "route", **kwargs)
            elif (
                step_condition == "preprocess"
                and CudaGraphScope.moe_preprocess in moe_layer.config.cuda_graph_scope
            ):
                raise MoECudaGraphPartialCaptureSignal(moe_layer, "preprocess", **kwargs)

    def decorator(func):

        @functools.wraps(func)
        def wrapped_func(moe_layer, *args, **kwargs):
            """
            Check if we should skip executing the original function based on the current
            step condition and the tensor store status. If the tensor can be found in the store,
            it indicates that it is already computed by the CUDA graph replay, so we can skip it.
            Otherwise, we execute the original function and check if we should raise a signal to
            early return in CUDA graph capture.
            """

            if moe_layer.config.cuda_graph_impl != "transformer_engine":
                return func(moe_layer, *args, **kwargs)

            # The non-cudagraph codepath just calls the original function.
            if not is_graph_capturing() and moe_layer.cudagraph_tensor_store.is_empty():
                return func(moe_layer, *args, **kwargs)

            assert (
                not is_graph_capturing() or moe_layer.cudagraph_tensor_store.is_empty()
            ), "cudagraph_tensor_store cannot be used when it is capturing cuda graph."
            if step_condition == "shared_experts_compute":
                if moe_layer.cudagraph_tensor_store.shared_expert_output is None:
                    # Don't skip the shared expert computation.
                    shared_expert_output = func(moe_layer, *args, **kwargs)
                else:
                    # Skip the shared expert computation and get value from store.
                    shared_expert_output = moe_layer.cudagraph_tensor_store.shared_expert_output
                return shared_expert_output
            elif step_condition == "route":
                if moe_layer.cudagraph_tensor_store.probs is None:
                    # Don't skip the router.
                    assert (
                        moe_layer.cudagraph_tensor_store.routing_map is None
                    ), "routing_map must be None if probs is None"
                    probs, routing_map = func(moe_layer, *args, **kwargs)

                    # Maybe early return after the router.
                    maybe_raise_signal(moe_layer, probs=probs, routing_map=routing_map)
                else:
                    # Skip the router and get value from store.
                    probs, routing_map = (
                        moe_layer.cudagraph_tensor_store.probs,
                        moe_layer.cudagraph_tensor_store.routing_map,
                    )
                return probs, routing_map
            elif step_condition == "preprocess":
                if (
                    moe_layer.cudagraph_tensor_store.is_empty()
                    or moe_layer.cudagraph_tensor_store.routing_map is not None
                ):
                    # Don't skip the preprocess.
                    hidden_states, probs = func(moe_layer, *args, **kwargs)

                    # Maybe early return after the preprocess.
                    maybe_raise_signal(moe_layer, hidden_states=hidden_states, probs=probs)
                else:
                    # Skip the preprocess and get value from store.
                    assert (
                        moe_layer.cudagraph_tensor_store.hidden_states is not None
                        and moe_layer.cudagraph_tensor_store.probs is not None
                    ), "hidden_states and probs must be given in moe_preprocess cudagraph replay"
                    hidden_states, probs = (
                        moe_layer.cudagraph_tensor_store.hidden_states,
                        moe_layer.cudagraph_tensor_store.probs,
                    )
                return hidden_states, probs

        return wrapped_func

    return decorator


================================================
FILE: megatron/core/transformer/moe/router.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from abc import ABC, abstractmethod
from typing import Optional, Union

import torch

from megatron.core.jit import jit_fuser
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.moe.moe_utils import (
    MoEAuxLossAutoScaler,
    ProcessGroupCollection,
    apply_random_logits,
    apply_router_token_dropping,
    compute_routing_scores_for_aux_loss,
    get_tokens_per_expert_and_token_count,
    router_gating_linear,
    save_to_aux_losses_tracker,
    sinkhorn,
    switch_load_balancing_loss_func,
    topk_routing_with_score_function,
    z_loss_func,
)
from megatron.core.transformer.moe.router_replay import RouterReplay
from megatron.core.transformer.transformer_config import TransformerConfig


class Router(ABC, MegatronModule):
    """Base Router class"""

    def __init__(
        self,
        config: TransformerConfig,
        pg_collection: Optional[ProcessGroupCollection] = None,
        is_mtp_layer: bool = False,
    ) -> None:
        """
        Initialize the Router module.

        Args:
            config (TransformerConfig): Configuration object for the Transformer model.
            pg_collection (ProcessGroupCollection, optional): Process groups for MoE operations.
            is_mtp_layer (bool): Flag indicating if this router is part of an MTP layer.
        """
        super().__init__(config)
        self.config = config
        self.num_experts = self.config.num_moe_experts
        self.moe_aux_loss_func = None
        self.layer_number = None
        self.is_mtp_layer = is_mtp_layer
        self.tp_group = pg_collection.tp
        self.cp_group = pg_collection.cp
        self.tp_cp_group = pg_collection.tp_cp
        self.tp_dp_cp_group = pg_collection.tp_dp_cp

        # Initialize the gate weights.
        # TODO: Add support for GPU initialization, which requires updating the golden values.
        self.weight = torch.nn.Parameter(
            torch.empty((self.config.num_moe_experts, self.config.hidden_size), dtype=torch.float32)
        )
        if self.config.add_bias_linear:
            self.bias = torch.nn.Parameter(
                torch.empty((self.config.num_moe_experts), dtype=torch.float32)
            )
        else:
            self.bias = None
        # If calculate per token loss, we need to scale up moe aux loss by the number of tokens.
        # So we need to know if the model is configured to calculate per token loss.
        self.calculate_per_token_loss = self.config.calculate_per_token_loss
        self.reset_parameters()

    def reset_parameters(self):
        """Reset the router parameters."""
        if self.config.perform_initialization:
            self.config.init_method(self.weight)
            if self.bias is not None:
                self.config.init_method(self.bias)
        self.weight.data = self.weight.data.to(dtype=self.config.params_dtype)
        setattr(self.weight, 'sequence_parallel', self.config.sequence_parallel)
        if self.bias is not None:
            self.bias.data = self.bias.data.to(dtype=self.config.params_dtype)
            setattr(self.bias, 'sequence_parallel', self.config.sequence_parallel)

    def gating(self, input: torch.Tensor):
        """Forward pass of the router gate.

        Args:
            input (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Logits tensor.
        """
        if self.weight.device.type == 'cpu':
            # move weights to GPU
            self.weight.data = self.weight.data.to(device=torch.cuda.current_device())
        if self.bias is not None and self.bias.device.type == 'cpu':
            self.bias.data = self.bias.data.to(device=torch.cuda.current_device())

        # Convert to specified datatype for routing computation if enabled
        router_dtype = input.dtype
        if self.config.moe_router_dtype == 'fp32':
            router_dtype = torch.float32
        elif self.config.moe_router_dtype == 'fp64':
            router_dtype = torch.float64
        logits = router_gating_linear(input, self.weight, self.bias, router_dtype)
        return logits

    @abstractmethod
    def routing(self, logits: torch.Tensor):
        """Routing function.

        Args:
            logits (torch.Tensor): Logits tensor.

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: A tuple containing token assignment
            probabilities and mapping.
        """
        raise NotImplementedError("Routing function not implemented.")

    @abstractmethod
    def forward(self, input: torch.Tensor):
        """
        Forward pass of the router.

        Args:
            input (torch.Tensor): Input tensor.
        """
        raise NotImplementedError("Forward function not implemented.")

    def set_layer_number(self, layer_number: int):
        """Set the layer number for the router."""
        self.layer_number = layer_number


class TopKRouter(Router):
    """Route each token to the top-k experts.

    The workflow of TopKRouter is as follows:
    (1) Calculate the logits by the router gating network.
    (2) Calculate the routing probabilities and map for top-k selection with score function.
    (3) [Optional] Apply token dropping to top-k expert selection.
    (4) [Optional] Apply the auxiliary load balancing loss for the given scores and routing map.

    Naming convention:
        logits: The output logits by the router gating network.
        scores: The scores after score function used to select the experts and calculate aux loss.
        probs: The topk weights used to combined the experts' outputs.
        routing_map: The masked routing map between tokens and experts.
    """

    def __init__(
        self,
        config: TransformerConfig,
        pg_collection: Optional[ProcessGroupCollection] = None,
        is_mtp_layer: bool = False,
    ) -> None:
        """Initialize the zero token dropping router.

        Args:
            config (TransformerConfig): The configuration for the transformer model.
            pg_collection (ProcessGroupCollection, optional): Process groups for MoE operations.
            is_mtp_layer (bool): Flag indicating if this router is part of an MTP layer.
        """
        super().__init__(config=config, pg_collection=pg_collection, is_mtp_layer=is_mtp_layer)
        self.topk = self.config.moe_router_topk
        self.routing_type = self.config.moe_router_load_balancing_type
        self.score_function = self.config.moe_router_score_function
        self.input_jitter = None

        self.enable_expert_bias = self.config.moe_router_enable_expert_bias
        if self.enable_expert_bias:
            self.register_buffer(
                'local_tokens_per_expert',
                torch.zeros(
                    self.config.num_moe_experts,
                    dtype=torch.float32,
                    device=torch.cuda.current_device(),
                ),
                persistent=False,
            )
            self.register_buffer(
                'expert_bias',
                torch.zeros(
                    self.config.num_moe_experts,
                    dtype=torch.float32,
                    device=torch.cuda.current_device(),
                ),
            )
        else:
            self.local_tokens_per_expert = None
            self.expert_bias = None

        # Initialize global tokens per expert for global aux loss
        if self.get_aux_loss_coeff("global_aux_loss") > 0:
            self.register_buffer(
                'global_tokens_per_expert',
                torch.zeros(
                    self.config.num_moe_experts,
                    dtype=torch.float32,
                    device=torch.cuda.current_device(),
                ),
                persistent=False,
            )
            self.register_buffer(
                'ga_steps',
                torch.tensor(0, dtype=torch.float32, device=torch.cuda.current_device()),
                persistent=False,
            )
        else:
            self.global_tokens_per_expert = None
            self.ga_steps = None

        self.router_replay = None
        if self.config.moe_enable_routing_replay:
            self.router_replay = RouterReplay()

    def _maintain_float32_expert_bias(self):
        """
        Maintain the expert bias in float32.

        When using bf16/fp16, the expert bias gets converted to lower precision in Float16Module.
        We keep it in float32 to avoid routing errors when updating the expert_bias.
        """
        if hasattr(self, 'expert_bias') and self.expert_bias is not None:
            if self.expert_bias.dtype != torch.float32:
                self.expert_bias.data = self.expert_bias.data.to(torch.float32)

    def sinkhorn_load_balancing(self, logits: torch.Tensor):
        """Apply sinkhorn routing to the logits tensor.

        Args:
            logits (torch.Tensor): The logits tensor.

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: A tuple containing token assignment
            probabilities and mask.
        """

        def _sinkhorn_activation(logits):
            if self.topk == 1:
                logits = torch.sigmoid(logits)
            else:  # k > 1
                logits = torch.softmax(logits, dim=-1, dtype=torch.float32).type_as(logits)
            return logits

        assert self.config.moe_aux_loss_coeff == 0, "Sinkhorn routing does not support aux loss."
        if self.training:
            with torch.no_grad():
                norm_logits = sinkhorn(
                    logits.to(dtype=torch.float32)
                )  # explicit fp32 conversion for stability
                _, indices = torch.topk(norm_logits, k=self.topk, dim=1)
            logits = _sinkhorn_activation(logits)
        else:
            logits = _sinkhorn_activation(logits)
            _, indices = torch.topk(logits, k=self.topk, dim=1)
        map = torch.zeros_like(logits).int().scatter(1, indices, 1).bool()
        scores = logits * map
        return scores, map

    def get_aux_loss_coeff(self, aux_loss_type: str) -> float:
        """Return the aux loss coeff for the given auxiliary loss type.
        If the auxiliary loss type is not found, return 0.0.
        """
        if isinstance(self.routing_type, str):
            if self.routing_type == aux_loss_type:
                return self.config.moe_aux_loss_coeff
        if isinstance(self.routing_type, list):
            try:
                idx = self.routing_type.index(aux_loss_type)
                return self.config.moe_aux_loss_coeff[idx]
            except ValueError:
                return 0.0
        return 0.0

    def is_aux_loss_enabled(self) -> bool:
        """Check if the auxiliary loss is enabled."""
        for aux_loss_type in ["aux_loss", "seq_aux_loss", "global_aux_loss"]:
            if self.get_aux_loss_coeff(aux_loss_type) > 0:
                return True
        return False

    def _apply_aux_loss(
        self,
        probs: torch.Tensor,
        scores_for_aux_loss: torch.Tensor,
        routing_map: torch.Tensor,
        with_padding_mask: bool = False,
    ):
        """Apply the auxiliary loss for the given scores and routing map."""
        aux_loss_coeff = self.get_aux_loss_coeff("aux_loss")
        if aux_loss_coeff == 0:
            return probs

        global_tokens_per_expert, local_num_tokens, total_num_tokens = (
            get_tokens_per_expert_and_token_count(
                routing_map=routing_map,
                reduce_group=self.tp_cp_group,
                topk=self.topk,
                with_padding_mask=with_padding_mask,
            )
        )

        aux_loss = switch_load_balancing_loss_func(
            probs=scores_for_aux_loss,
            tokens_per_expert=global_tokens_per_expert,
            total_num_tokens=total_num_tokens,
            topk=self.topk,
            num_experts=self.config.num_moe_experts,
            moe_aux_loss_coeff=aux_loss_coeff,
            fused=self.config.moe_router_fusion,
        )
        probs = self.attach_and_log_load_balancing_loss(
            probs,
            aux_loss_coeff,
            aux_loss,
            "load_balancing_loss",
            self.tp_cp_group,
            valid_token_count=local_num_tokens,
        )
        return probs

    def _apply_seq_aux_loss(
        self,
        probs: torch.Tensor,
        scores_for_aux_loss: torch.Tensor,
        routing_map: torch.Tensor,
        seq_length: int,
        bsz: int,
        with_padding_mask: bool = False,
    ):
        """Apply the sequence-level auxiliary loss for the given scores and routing map.

        To calculate the sequence-level aux loss, we reshape the batch_size dimension to
        experts dimension. The resulted loss by switch_load_balancing_loss_func is equal
        to the sum of aux loss for each sequence in the batch. And then we divide the aux
        loss by the batch size to get averaged aux loss.
        """
        seq_aux_loss_coeff = self.get_aux_loss_coeff("seq_aux_loss")
        if seq_aux_loss_coeff == 0:
            return probs

        scores_for_aux_loss = scores_for_aux_loss.reshape(seq_length, -1)
        routing_map = routing_map.reshape(seq_length, -1)

        global_tokens_per_expert, local_num_tokens, total_num_tokens = (
            get_tokens_per_expert_and_token_count(
                routing_map=routing_map,
                reduce_group=self.tp_cp_group,
                with_padding_mask=with_padding_mask,
                topk=self.topk * bsz,
            )
        )

        aux_loss = (
            switch_load_balancing_loss_func(
                probs=scores_for_aux_loss,
                tokens_per_expert=global_tokens_per_expert,
                total_num_tokens=total_num_tokens,
                topk=self.topk,
                num_experts=self.config.num_moe_experts,
                moe_aux_loss_coeff=seq_aux_loss_coeff,
                fused=self.config.moe_router_fusion,
            )
            / bsz
        )

        probs = self.attach_and_log_load_balancing_loss(
            probs,
            seq_aux_loss_coeff,
            aux_loss,
            "seq_load_balancing_loss",
            self.tp_cp_group,
            valid_token_count=local_num_tokens,
        )
        return probs

    def _apply_global_aux_loss(
        self,
        probs: torch.Tensor,
        scores_for_aux_loss: torch.Tensor,
        routing_map: torch.Tensor,
        with_padding_mask: bool = False,
    ):
        """Apply the global auxiliary loss for the given scores and routing map."""
        global_aux_loss_coeff = self.get_aux_loss_coeff("global_aux_loss")
        if global_aux_loss_coeff == 0:
            return probs

        # Use unified function to compute tokens_per_expert and num_tokens
        global_tokens_per_expert, local_num_tokens, total_num_tokens = (
            get_tokens_per_expert_and_token_count(
                routing_map=routing_map,
                reduce_group=self.tp_dp_cp_group,
                with_padding_mask=with_padding_mask,
                topk=self.topk,
            )
        )

        self.global_tokens_per_expert += global_tokens_per_expert
        self.ga_steps += 1
        averated_tokens_per_expert = self.global_tokens_per_expert / self.ga_steps

        global_aux_loss = switch_load_balancing_loss_func(
            probs=scores_for_aux_loss,
            tokens_per_expert=averated_tokens_per_expert,
            total_num_tokens=total_num_tokens,
            topk=self.topk,
            num_experts=self.config.num_moe_experts,
            moe_aux_loss_coeff=global_aux_loss_coeff,
            fused=self.config.moe_router_fusion,
        )
        probs = self.attach_and_log_load_balancing_loss(
            probs,
            global_aux_loss_coeff,
            global_aux_loss,
            "global_load_balancing_loss",
            self.tp_dp_cp_group,
            reduce_group_has_dp=True,
            valid_token_count=local_num_tokens,
        )
        return probs

    def attach_and_log_load_balancing_loss(
        self,
        activation: torch.Tensor,
        aux_loss_coeff: float,
        aux_loss: torch.Tensor,
        aux_loss_name: str,
        reduce_group: torch.distributed.ProcessGroup,
        reduce_group_has_dp: bool = False,
        valid_token_count: Optional[Union[int, torch.Tensor]] = None,
    ):
        """Attach aux loss function to activation and add to logging.

        Args:
            activation (torch.Tensor): Activation tensor to attach the aux loss to.
            aux_loss_coeff (float): Coefficient for the aux loss.
            aux_loss (torch.Tensor): Computed aux loss.
            aux_loss_name (str): Name of the aux loss for logging.
            reduce_group (torch.distributed.ProcessGroup): Process group for reduction.
            reduce_group_has_dp (bool): Whether the reduce group has data parallel ranks.
                Set this to True if the reduce group has data parallel ranks. This flag is used to
                ensure the correct reduction in aux loss tracking.
            valid_token_count (int or torch.Tensor, optional): Number of valid tokens excluding
                padding tokens. Can be a Python int or a torch.Tensor (typically 0-d tensor).
                If None, uses activation.shape[0]. Defaults to None.
        """
        # When using repeated MTP layers, the loss is counted "mtp_num_layers" times.
        # To avoid accumulating the load balancing loss multiple times, we scale it by
        # 1/mtp_num_layers so the total loss is correct.
        if (
            self.is_mtp_layer
            and self.config.mtp_use_repeated_layer
            and self.config.mtp_num_layers is not None
        ):
            aux_loss = aux_loss / self.config.mtp_num_layers

        # TODO (zijiey): fix the per_layer_logging for MTP, currently it will incorrectly
        # add the aux loss logging value to other layer's since it is difficult to get the
        # correct layer_number for MTP. It does not affect the correctness of the calculation
        # results and the reduced load_balancing_loss logging value.
        num_layers = self.config.num_layers
        if self.config.mtp_num_layers is not None:
            num_layers += self.config.mtp_num_layers

        if self.is_mtp_layer:
            layer_number = self.layer_number + self.config.num_layers
        else:
            layer_number = self.layer_number

        save_to_aux_losses_tracker(
            aux_loss_name,
            aux_loss / aux_loss_coeff,
            layer_number,
            num_layers,
            reduce_group=reduce_group,
            reduce_group_has_dp=reduce_group_has_dp,
        )
        if self.calculate_per_token_loss:
            # Scale the aux_loss by the number of tokens.
            # The expected final scaling for aux_loss gradients is 1/(num_micro_batches * dp_size).
            # After commit 02648000, Megatron started using the number of total tokens to scale
            # gradients under the argument of calculate_per_token_loss,
            # which scales both the main_loss gradient and aux_loss gradient by
            # 1/(num_local_tokens * dp_size * num_micro_batches) in finalize_model_grads function.
            # To correct this scaling, we need to scale the aux_loss by num_local_tokens here.
            # Use valid_token_count (excluding padding) if provided, otherwise use total tokens.
            num_tokens = valid_token_count if valid_token_count is not None else activation.shape[0]
            activation = MoEAuxLossAutoScaler.apply(activation, aux_loss * num_tokens)
        else:
            activation = MoEAuxLossAutoScaler.apply(activation, aux_loss)
        return activation

    def apply_z_loss(self, logits, padding_mask: Optional[torch.Tensor] = None):
        """Encourages the router's logits to remain small to enhance stability.
        Please refer to the ST-MoE paper (https://arxiv.org/pdf/2202.08906.pdf) for details.

        Args:
            logits (torch.Tensor): The logits of the router.
            padding_mask (torch.Tensor, optional): Boolean mask indicating non-padding tokens.
                                                   Shape in [num_tokens]. True for valid tokens,
                                                   False for padding tokens. Defaults to None.

        Returns:
            torch.Tensor: The logits after applying the z-loss.
        """
        if self.config.moe_z_loss_coeff is not None and self.training and torch.is_grad_enabled():
            # Skip Z loss calculations when using torch.no_grad() or checkpointing.
            moe_z_loss_coeff = self.config.moe_z_loss_coeff / self.tp_cp_group.size()
            z_loss = z_loss_func(logits, moe_z_loss_coeff, padding_mask=padding_mask)
            if self.calculate_per_token_loss:
                # The expected final scaling for z_loss gradients is
                # 1/(num_micro_batches * dp_size).
                # After commit 02648000, Megatron started using the number of total tokens
                # to scale gradients under the argument of calculate_per_token_loss,
                # which scales both the main_loss gradient and z_loss gradient by
                # 1/(num_local_tokens * dp_size * num_micro_batches) in finalize_model_grads().
                # To correct this scaling, we need to scale the z_loss by num_local_tokens here.
                # Count valid tokens: sum of inverted mask (False -> True = valid)
                num_tokens = (~padding_mask).sum() if padding_mask is not None else logits.shape[0]
                logits = MoEAuxLossAutoScaler.apply(logits, z_loss * num_tokens)
            else:
                logits = MoEAuxLossAutoScaler.apply(logits, z_loss)

            # When using repeated MTP layers, the same MTP layer is called mtp_num_layers times.
            # To avoid accumulating the z_loss multiple times, we scale it by 1/mtp_num_layers
            # so the total loss is correct.
            if (
                self.is_mtp_layer
                and self.config.mtp_use_repeated_layer
                and self.config.mtp_num_layers is not None
            ):
                z_loss = z_loss / self.config.mtp_num_layers

            num_layers = self.config.num_layers
            if self.config.mtp_num_layers is not None:
                num_layers += self.config.mtp_num_layers

            if self.is_mtp_layer:
                layer_number = self.layer_number + self.config.num_layers
            else:
                layer_number = self.layer_number

            save_to_aux_losses_tracker(
                "z_loss", z_loss / moe_z_loss_coeff, layer_number, num_layers
            )
        return logits

    def apply_input_jitter(self, input: torch.Tensor):
        """Add noise to the input tensor.
        Refer to https://arxiv.org/abs/2101.03961.

        Args:
            input (Tensor): Input tensor.

        Returns:
            Tensor: Jittered input.
        """
        if self.config.moe_input_jitter_eps is not None:
            eps = self.config.moe_input_jitter_eps
            if self.input_jitter is None:
                self.input_jitter = torch.distributions.uniform.Uniform(
                    torch.tensor(1.0 - eps, dtype=input.dtype, device=input.device),
                    torch.tensor(1.0 + eps, dtype=input.dtype, device=input.device),
                ).rsample
            return input * self.input_jitter(input.shape)
        else:
            return input

    @jit_fuser
    def _apply_expert_bias(
        self, routing_map: torch.Tensor, padding_mask: Optional[torch.Tensor] = None
    ):
        """
        Update expert bias and tokens_per_expert
        Prevent extra local tokens accumulation on evaluation or activation recomputation
        """
        if self.enable_expert_bias and torch.is_grad_enabled():
            with torch.no_grad():
                if padding_mask is not None:
                    routing_map = routing_map & (~padding_mask)
                self.local_tokens_per_expert += routing_map.sum(dim=0)

    def routing(self, logits: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
        """Top-k routing function

        Args:
            logits (torch.Tensor): Logits tensor after gating.
            padding_mask (torch.Tensor, optional): Boolean mask indicating non-padding tokens.
                                                   Shape [seq_length, bsz]. True for valid tokens,
                                                   False for padding tokens. Defaults to None.

        Returns:
            probs (torch.Tensor): The probabilities of token to experts assignment.
            routing_map (torch.Tensor): The mapping of token to experts assignment,
                with shape [num_tokens, num_experts].
        """
        seq_length, bsz = logits.shape[:2]
        logits = logits.view(-1, self.config.num_moe_experts)

        # Flatten padding_mask to [num_tokens] if provided
        if padding_mask is not None:
            padding_mask = padding_mask.reshape(-1)

        # Apply Z-Loss
        logits = self.apply_z_loss(logits, padding_mask=padding_mask)

        # Calculate probs and routing_map for token dispatching
        if self.routing_type == "sinkhorn":
            probs, routing_map = self.sinkhorn_load_balancing(logits)
        else:
            probs, routing_map = topk_routing_with_score_function(
                logits,
                self.topk,
                use_pre_softmax=self.config.moe_router_pre_softmax,
                num_groups=self.config.moe_router_num_groups,
                group_topk=self.config.moe_router_group_topk,
                scaling_factor=self.config.moe_router_topk_scaling_factor,
                score_function=self.score_function,
                expert_bias=self.expert_bias,
                fused=self.config.moe_router_fusion,
                router_replay=self.router_replay,
            )

        # Apply token dropping to probs and routing_map.
        if self.config.moe_expert_capacity_factor is not None:
            probs, routing_map = apply_router_token_dropping(
                probs,
                routing_map,
                router_topk=self.topk,
                capacity_factor=self.config.moe_expert_capacity_factor,
                drop_policy=self.config.moe_token_drop_policy,
                pad_to_capacity=self.config.moe_pad_expert_input_to_capacity,
            )

        # Apply each aux loss type and attach aux loss autograd function to probs
        if self.training and torch.is_grad_enabled() and self.is_aux_loss_enabled():
            # Calculate scores and routing_map for aux loss
            routing_map_for_aux_loss, scores_for_aux_loss = compute_routing_scores_for_aux_loss(
                logits,
                self.topk,
                self.score_function,
                fused=self.config.moe_router_fusion,
                padding_mask=padding_mask,
            )
            probs = self._apply_aux_loss(
                probs,
                scores_for_aux_loss,
                routing_map_for_aux_loss,
                with_padding_mask=padding_mask is not None,
            )
            probs = self._apply_seq_aux_loss(
                probs,
                scores_for_aux_loss,
                routing_map_for_aux_loss,
                seq_length,
                bsz,
                with_padding_mask=padding_mask is not None,
            )
            probs = self._apply_global_aux_loss(
                probs,
                scores_for_aux_loss,
                routing_map_for_aux_loss,
                with_padding_mask=padding_mask is not None,
            )

        # Optionally apply expert bias
        self._apply_expert_bias(routing_map, padding_mask=padding_mask)

        return probs, routing_map

    def reset_global_aux_loss_tracker(self):
        """Reset the global aux loss tracker."""
        if self.global_tokens_per_expert is not None:
            self.global_tokens_per_expert.zero_()
            self.ga_steps.zero_()

    def forward(self, input: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
        """
        Forward pass of the router.

        Args:
            input (torch.Tensor): Input tensor.
            padding_mask (torch.Tensor, optional): Boolean mask indicating non-padding tokens.
                                                   Shape [seq_length, bsz]. True for valid tokens,
                                                   False for padding tokens. Defaults to None.
        """
        self._maintain_float32_expert_bias()

        # Apply input jitter
        input = self.apply_input_jitter(input)
        logits = self.gating(input)

        if self.config.moe_router_force_load_balancing:
            # Apply force load balancing with random logits for benchmark
            logits = apply_random_logits(logits)

        probs, routing_map = self.routing(logits, padding_mask=padding_mask)

        return probs, routing_map

    def _load_from_state_dict(self, *args, **kwargs):
        """Load the state dict of the router."""
        self._maintain_float32_expert_bias()  # switch to float32 before loading
        return super()._load_from_state_dict(*args, **kwargs)

    def _save_to_state_dict(self, *args, **kwargs):
        """Save the state dict of the router."""
        self._maintain_float32_expert_bias()  # switch to float32 before saving
        return super()._save_to_state_dict(*args, **kwargs)


class InferenceTopKRouter(TopKRouter):
    """Inference-only top-k router that strips out training-specific overhead.

    A stripped-down version of TopKRouter that skips z-loss, auxiliary load
    balancing losses, token dropping, and expert bias updates. The _forward()
    method is @torch.compile()'d and returns dense [num_tokens, topk] tensors
    instead of sparse [num_tokens, num_experts] for compatibility with FlashInfer.

    Falls back to the parent TopKRouter.forward() for training or
    non-CUDA-graphed inference iterations.
    """

    def __init__(
        self,
        config: TransformerConfig,
        pg_collection: Optional[ProcessGroupCollection] = None,
        is_mtp_layer: bool = False,
    ) -> None:
        """Initialize the specialized inference top-k router.

        Args:
            config (TransformerConfig): The configuration for the transformer model.
            pg_collection (ProcessGroupCollection, optional): Process groups for MoE operations.
        """
        # Enforce constraints before calling super().__init__
        assert config.moe_router_num_groups is None, (
            f"InferenceTopKRouter requires moe_router_num_groups=None, "
            f"got {config.moe_router_num_groups}"
        )
        assert config.moe_router_score_function in ["sigmoid", "softmax"], (
            f"InferenceTopKRouter requires moe_router_score_function in "
            f"['sigmoid', 'softmax'], got '{config.moe_router_score_function}'"
        )

        super().__init__(config=config, pg_collection=pg_collection)

        self.is_inference_cuda_graphed_iteration = False

    def set_inference_cuda_graphed_iteration(self):
        """Enable CUDA graph-compatible operations for the router."""
        self.is_inference_cuda_graphed_iteration = True

    def unset_inference_cuda_graphed_iteration(self):
        """Disable CUDA graph-compatible operations for the router."""
        self.is_inference_cuda_graphed_iteration = False

    @staticmethod
    @torch.compile
    def _compiled_topk_routing(
        logits,
        topk,
        use_pre_softmax,
        num_groups,
        group_topk,
        scaling_factor,
        score_function,
        expert_bias,
        fused,
        router_replay,
        dense_output,
    ):
        return topk_routing_with_score_function(
            logits,
            topk,
            use_pre_softmax=use_pre_softmax,
            num_groups=num_groups,
            group_topk=group_topk,
            scaling_factor=scaling_factor,
            score_function=score_function,
            expert_bias=expert_bias,
            fused=fused,
            router_replay=router_replay,
            dense_output=dense_output,
        )

    def _forward(self, input: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
        logits = self.gating(input).squeeze(1)  # [num_tokens, num_experts]

        probs, top_indices = self._compiled_topk_routing(
            logits,
            self.topk,
            use_pre_softmax=self.config.moe_router_pre_softmax,
            num_groups=self.config.moe_router_num_groups,
            group_topk=self.config.moe_router_group_topk,
            scaling_factor=self.config.moe_router_topk_scaling_factor,
            score_function=self.score_function,
            expert_bias=self.expert_bias,
            fused=self.config.moe_router_fusion,
            router_replay=self.router_replay,
            dense_output=True,
        )
        return probs.squeeze(1), top_indices.squeeze(1)

    def forward(self, input: torch.Tensor, padding_mask: Optional[torch.Tensor] = None):
        """Simplified forward pass for inference - returns dense tensors only.

        Args:
            input (torch.Tensor): Input tensor of shape [seq_length, bsz, hidden_size].
            padding_mask (torch.Tensor, optional): Not used in inference.

        Returns:
            Tuple[torch.Tensor, torch.Tensor]:
                - probs: Normalized routing probabilities [num_tokens, topk]
                - top_indices: Selected expert indices [num_tokens, topk]
        """

        if self.training or not self.is_inference_cuda_graphed_iteration:
            return super().forward(input, padding_mask)

        return self._forward(input, padding_mask)


================================================
FILE: megatron/core/transformer/moe/router_replay.py
================================================
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
from enum import Enum
from typing import Callable, List, Optional, Tuple

import torch


class RouterReplayAction(Enum):
    """
    A Enum to define the actions for router replay.
    """

    RECORD = "record"  # Record the topk indices for replay
    REPLAY_FORWARD = "replay_forward"  # Replay the recorded topk indices for forward pass
    REPLAY_BACKWARD = "replay_backward"  # Replay topk indices for re-compute during backward pass


class RouterReplay:
    """
    A class to manage the recording and replaying of MoE routing decisions.
    It holds all router instances and provides static methods to globally
    control recording and replaying.
    """

    # Static variable to hold all router instances, one per MoE layer.
    global_router_replay_instances: List['RouterReplay'] = []

    @staticmethod
    def set_replay_data(all_layers_topk_indices: List[torch.Tensor]):
        """
        Distributes the topk indices for all layers to their respective RouterReplay instances.
        :param all_layers_topk_indices: A list of tensors, where each tensor contains the
                                        topk indices for a specific layer. The order
                                        must match the instantiation order of the routers.
        """
        if len(all_layers_topk_indices) != len(RouterReplay.global_router_replay_instances):
            raise ValueError(
                f"The number of replay tensors ({len(all_layers_topk_indices)}) "
                f"does not match instances ({len(RouterReplay.global_router_replay_instances)})."
            )
        for i, router_instance in enumerate(RouterReplay.global_router_replay_instances):
            router_instance.set_target_indices(all_layers_topk_indices[i])

    @staticmethod
    def get_recorded_data() -> List[torch.Tensor]:
        """
        Collects the recorded topk indices from all RouterReplay instances.
        :return: A list of tensors, each containing the recorded topk indices for a layer.
        """
        return [
            router.get_recorded_indices() for router in RouterReplay.global_router_replay_instances
        ]

    @staticmethod
    def clear_global_indices():
        """Clears the recorded and target topk indices in all instances."""
        for router in RouterReplay.global_router_replay_instances:
            router.clear_indices()

    @staticmethod
    def set_global_router_replay_action(router_replay_action: RouterReplayAction):
        """Sets the router replay action for all router instances."""
        for router in RouterReplay.global_router_replay_instances:
            router.set_router_replay_action(router_replay_action)

    @staticmethod
    def clear_global_router_replay_action():
        """Clears the router replay action for all router instances."""
        for router in RouterReplay.global_router_replay_instances:
            router.clear_router_replay_action()

    @staticmethod
    def clear_global_router_replay_instances():
        """Clear the global list of router replay instances to prevent memory leaks."""
        RouterReplay.global_router_replay_instances.clear()

    @staticmethod
    def set_global_static_buffers(static_buffer: torch.Tensor):
        """Sets static buffers for all router instances from a combined buffer.

        Args:
            static_buffer: Tensor of shape [max_tokens, num_layers, topk].
                          Each layer's RouterReplay gets a slice [:, layer_idx, :].
        """
        num_layers = len(RouterReplay.global_router_replay_instances)
        assert static_buffer.shape[1] == num_layers, (
            f"Buffer has {static_buffer.shape[1]} layers but there are "
            f"{num_layers} RouterReplay instances."
        )
        for layer_idx, router_instance in enumerate(RouterReplay.global_router_replay_instances):
            # Each layer gets a view of shape [max_tokens, topk]
            router_instance.set_static_buffer(static_buffer[:, layer_idx, :])

    @staticmethod
    def clear_global_static_buffers():
        """Clears static buffers from all router instances."""
        for router in RouterReplay.global_router_replay_instances:
            router.clear_static_buffer()

    def __init__(self):
        """Initializes a RouterReplay instance for a specific layer."""
        self.target_topk_idx: Optional[torch.Tensor] = None  # Target topk indices for replay
        self.recorded_topk_idx: Optional[torch.Tensor] = None  # Recorded topk indices for replay
        self.router_replay_action: Optional[RouterReplayAction] = (
            None  # Router replay action for this layer
        )
        self.replay_backward_list: List[torch.Tensor] = (
            []
        )  # List of tensors for backward pass replay
        self.static_buffer: Optional[torch.Tensor] = None  # Static buffer for CUDA graph
        RouterReplay.global_router_replay_instances.append(self)

    def set_target_indices(self, topk_indices: torch.Tensor):
        """Sets the target topk indices for replay."""
        self.target_topk_idx = topk_indices
        self.replay_backward_list.append(topk_indices)

    def get_recorded_indices(self) -> Optional[torch.Tensor]:
        """Returns the recorded topk indices."""
        return self.recorded_topk_idx

    def clear_indices(self):
        """Clears the recorded and target topk indices."""
        self.recorded_topk_idx = None
        self.target_topk_idx = None
        self.replay_backward_list = []

    def set_router_replay_action(self, router_replay_action: RouterReplayAction):
        """Sets the router replay action for this layer."""
        self.router_replay_action = router_replay_action

    def clear_router_replay_action(self):
        """Clears the router replay action for this layer."""
        self.router_replay_action = None

    def get_replay_topk(
        self,
        scores: torch.Tensor,
        topk: int,
        num_groups: Optional[int] = None,
        group_topk: Optional[int] = None,
        default_compute_topk: Callable[
            [torch.Tensor, int, Optional[int], Optional[int]], Tuple[torch.Tensor, torch.Tensor]
        ] = None,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        A wrapper for top-k computation that handles different replay actions.

        Args:
            scores (torch.Tensor): The scores to compute top-k on.
            topk (int): The number of top elements to select.
            num_groups (Optional[int]): Number of expert groups for group-limited routing.
            group_topk (Optional[int]): Number of groups to select for each token.
            default_compute_topk (Callable): The default top-k computation function, which
                                             should return a tuple of (values, indices).

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: A tuple containing the top-k values and indices.
        """
        if self.router_replay_action == RouterReplayAction.RECORD:
            probs, top_indices = default_compute_topk(
                scores, topk, num_groups=num_groups, group_topk=group_topk
            )
            self.record_indices(top_indices)
            return probs, top_indices
        elif self.router_replay_action == RouterReplayAction.REPLAY_FORWARD:
            top_indices = self.target_topk_idx
            # Ensure indices are on the correct device
            top_indices = top_indices.to(scores.device)
            # Gather the scores for the replayed indices to get the probabilities
            probs = scores.gather(1, top_indices)
            return probs, top_indices
        elif self.router_replay_action == RouterReplayAction.REPLAY_BACKWARD:
            top_indices = self.replay_backward_list.pop(0)
            # Ensure indices are on the correct device
            top_indices = top_indices.to(scores.device)
            # Gather the scores for the replayed indices to get the probabilities
            probs = scores.gather(1, top_indices)
            return probs, top_indices
        else:
            return default_compute_topk(scores, topk, num_groups, group_topk)

    def set_static_buffer(self, buffer: torch.Tensor):
        """Sets a static buffer for CUDA graph compatible recording.

        Args:
            buffer: Tensor of shape [max_tokens, topk] to copy routing indices into.
        """
        self.static_buffer = buffer

    def clear_static_buffer(self):
        """Clears the static buffer."""
        self.static_buffer = None

    def record_indices(self, topk_indices: torch.Tensor):
        """Records the topk indices.

        If a static buffer is set (for CUDA graph compatibility), copies into it.
        Otherwise, just stores the tensor reference.
        """
        if self.static_buffer is not None:
            # Copy into static buffer for CUDA graph compatibility.
            num_tokens = topk_indices.shape[0]
            self.static_buffer[:num_tokens].copy_(topk_indices)
            self.recorded_topk_idx = self.static_buffer[:num_tokens]
        else:
            self.recorded_topk_idx = topk_indices


================================================
FILE: megatron/core/transformer/moe/shared_experts.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import warnings
from copy import copy
from typing import Optional

import torch
import torch.nn.functional as F

from megatron.core.dist_checkpointing.mapping import ShardedStateDict
from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.fusions.fused_bias_geglu import bias_geglu_impl
from megatron.core.fusions.fused_bias_gelu import bias_gelu_impl
from megatron.core.fusions.fused_bias_swiglu import bias_swiglu_impl
from megatron.core.tensor_parallel.mappings import (
    copy_to_tensor_model_parallel_region,
    gather_from_sequence_parallel_region,
    reduce_from_tensor_model_parallel_region,
    reduce_scatter_to_sequence_parallel_region,
)
from megatron.core.transformer.mlp import MLP, MLPSubmodules
from megatron.core.transformer.moe.moe_utils import ProcessGroupCollection
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.typed_torch import apply_module
from megatron.core.utils import (
    is_te_min_version,
    is_torch_min_version,
    make_sharded_tensor_for_checkpoint,
)

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import TELinear, set_save_original_input
else:
    TELinear, set_save_original_input = None, None


class SharedExpertMLP(MLP):
    """
    MLP layer for Shared Experts.
    """

    # This stream is used when '--moe-shared-expert-overlap' is set.
    # The shared experts are scheduled into this stream to be overlapped with the dispatcher.
    stream = None

    def __init__(
        self,
        config: TransformerConfig,
        submodules: MLPSubmodules,
        gate: bool,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ):
        config = copy(config)
        assert config.add_bias_linear == False, "bias is not supported in the shared experts, "
        "please set '--disable-bias-linear' instead."

        config.ffn_hidden_size = config.moe_shared_expert_intermediate_size
        # TODO(Hepteract): pass pg_collection to MLP after refactoring MLP
        super().__init__(config=config, submodules=submodules, tp_group=pg_collection.tp)

        self.use_shared_expert_gate = gate
        if self.use_shared_expert_gate:
            # TODO: Add support for GPU initialization, which requires updating the golden values.
            self.gate_weight = torch.nn.Parameter(torch.empty((1, self.config.hidden_size)))
            if config.perform_initialization:
                config.init_method(self.gate_weight)
            self.gate_weight.data = self.gate_weight.data.to(dtype=config.params_dtype)
            setattr(self.gate_weight, 'sequence_parallel', self.config.sequence_parallel)
        else:
            self.gate_weight = None

        if (
            self.config.fp8
            and self.config.fp8_recipe != 'delayed'
            and is_te_min_version("2.6.0dev0")
        ) or (self.config.fp4 and is_te_min_version("2.7.0.dev0")):
            # For fp8/fp4 training, the output of pre_mlp_layernorm is saved by router, and
            # the shared expert linear_fc1 also saves the quantized tensor of this output.
            # Here we set the linear_fc1 to save the original input tensors to avoid the extra
            # memory usage of the quantized tensor.
            shared_experts_recompute = (
                config.recompute_granularity == 'selective'
                and "shared_experts" in config.recompute_modules
            )
            if not shared_experts_recompute and HAVE_TE and isinstance(self.linear_fc1, TELinear):
                set_save_original_input(self.linear_fc1)

        if self.config.moe_shared_expert_overlap:
            # disable TP related AG/RS communications in the linear module
            for linear in [self.linear_fc1, self.linear_fc2]:
                if hasattr(linear, 'parallel_mode'):
                    # TELinear
                    linear.parallel_mode = None
                    linear.ub_overlap_rs_fprop = False
                    linear.ub_overlap_ag_dgrad = False
                    linear.ub_overlap_ag_fprop = False
                    linear.ub_overlap_rs_dgrad = False
                else:
                    # MCore legacy Linear
                    linear.explicit_expert_comm = True

            # The overlapped version is splitted into some separated functions and is put inside
            # the token dispatcher. These functions should be called in this order and no one can
            # be skipped:
            #     pre_forward_comm(input)
            #     linear_fc1_forward_and_act()
            #     linear_fc2_forward()
            #     post_forward_comm()
            #     output = get_output()
            #
            # We use cached intermediate results to avoid messy arg passing in the dispatcher.
            self.cached_fc1_input = None
            self.cached_fc2_input = None
            self.cached_fc2_output = None
            self.cached_output = None
            self.gate_score = None

            if self.stream is None:
                self.stream = torch.cuda.Stream()

    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
        """Forward function"""
        output, _ = super().forward(hidden_states)
        if self.use_shared_expert_gate:
            logits = torch.nn.functional.linear(hidden_states, self.gate_weight)
            gate_score = torch.nn.functional.sigmoid(logits)
            output = output * gate_score
        return output

    def sharded_state_dict(
        self, prefix: str = '', sharded_offsets: tuple = (), metadata: Optional[dict] = None
    ) -> ShardedStateDict:
        """Gets sharded state dict."""
        sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)
        if self.use_shared_expert_gate:
            name = 'gate_weight'
            state_dict = self.state_dict(prefix='', keep_vars=True)
            sub_sd = {
                f'{prefix}{name}': make_sharded_tensor_for_checkpoint(
                    state_dict[name],
                    f'{prefix}{name}',
                    prepend_offsets=sharded_offsets,
                    tp_group=self.tp_group,
                    dp_cp_group=metadata['dp_cp_group'],
                )
            }
            sharded_state_dict.update(sub_sd)
        return sharded_state_dict

    def pre_forward_comm(self, input):
        """
        All Gather for SP before forward.
        This function is used to overlap shared experts with the dispatcher.
        It is only useful when --moe-shared-expert-overlap is set and may be changed.
        """
        assert self.config.moe_shared_expert_overlap
        assert self.cached_output is None
        self.stream.wait_stream(torch.cuda.current_stream())
        with torch.cuda.stream(self.stream):
            if self.use_shared_expert_gate:
                logits = torch.nn.functional.linear(input, self.gate_weight)
                self.gate_score = torch.nn.functional.sigmoid(logits)
            if self.config.sequence_parallel:
                self.cached_fc1_input = gather_from_sequence_parallel_region(
                    input, tensor_parallel_output_grad=True
                )
            else:
                self.cached_fc1_input = copy_to_tensor_model_parallel_region(input)
            set_tensor_grad_fn_sequence_sr(self.cached_fc1_input, torch.iinfo(torch.int).max)

    def linear_fc1_forward_and_act(self, overlapped_comm_output=None):
        """
        Do Linear FC1 and activation function forward.
        This function is used to overlap shared experts with the dispatcher.
        It is only useful when --moe-shared-expert-overlap is set and may be changed.
        """
        assert self.config.moe_shared_expert_overlap
        assert self.cached_fc1_input is not None
        if overlapped_comm_output is not None:
            set_tensor_grad_fn_sequence_sr(overlapped_comm_output, torch.iinfo(torch.int).max)
        with torch.cuda.stream(self.stream):
            # [s, b, 4 * h/p]
            intermediate_parallel, bias_parallel = apply_module(self.linear_fc1)(
                self.cached_fc1_input
            )
            self.cached_fc1_input = None

            if self.config.use_te_activation_func:
                if bias_parallel is not None:
                    intermediate_parallel = intermediate_parallel + bias_parallel
                intermediate_parallel = self.activation_func(intermediate_parallel)
            elif self.config.bias_activation_fusion:
                if self.activation_func == F.gelu:
                    if self.config.gated_linear_unit:
                        intermediate_parallel = bias_geglu_impl(
                            intermediate_parallel, bias_parallel
                        )
                    else:
                        assert self.config.add_bias_linear is True
                        intermediate_parallel = bias_gelu_impl(intermediate_parallel, bias_parallel)
                elif self.activation_func == F.silu and self.config.gated_linear_unit:
                    intermediate_parallel = bias_swiglu_impl(
                        intermediate_parallel,
                        bias_parallel,
                        self.config.activation_func_fp8_input_store,
                    )
                else:
                    raise ValueError("Only support fusion of gelu and swiglu")
            else:
                if bias_parallel is not None:
                    intermediate_parallel = intermediate_parallel + bias_parallel
                if self.config.gated_linear_unit:

                    def glu(x):
                        x = torch.chunk(x, 2, dim=-1)
                        return self.config.activation_func(x[0]) * x[1]

                    intermediate_parallel = glu(intermediate_parallel)
                else:
                    intermediate_parallel = self.activation_func(intermediate_parallel)

            self.cached_fc2_input = intermediate_parallel

    def linear_fc2_forward(self, overlapped_comm_output=None):
        """
        Do Linear FC2 forward.
        This function is used to overlap shared experts with the dispatcher.
        It is only useful when --moe-shared-expert-overlap is set and may be changed.
        """
        assert self.config.moe_shared_expert_overlap
        assert self.cached_fc2_input is not None
        if overlapped_comm_output is not None:
            set_tensor_grad_fn_sequence_sr(overlapped_comm_output, torch.iinfo(torch.int).max)
        with torch.cuda.stream(self.stream):
            # [s, b, h]
            self.cached_fc2_output, _ = apply_module(self.linear_fc2)(self.cached_fc2_input)
            self.cached_fc2_input = None

    def post_forward_comm(self):
        """
        Reduce scatter for SP after forward.
        This function is used to overlap shared experts with the dispatcher.
        It is only useful when --moe-shared-expert-overlap is set and may be changed.
        """
        assert self.config.moe_shared_expert_overlap
        assert self.cached_fc2_output is not None
        with torch.cuda.stream(self.stream):
            if self.config.sequence_parallel:
                self.cached_output = reduce_scatter_to_sequence_parallel_region(
                    self.cached_fc2_output
                )
            else:
                self.cached_output = reduce_from_tensor_model_parallel_region(
                    self.cached_fc2_output
                )
            self.cached_fc2_output = None
            set_tensor_grad_fn_sequence_sr(self.cached_output, torch.iinfo(torch.int).max)

    def get_output(self):
        """
        Gets the module forward output.
        This function is used to overlap shared experts with the dispatcher.
        It is only useful when --moe-shared-expert-overlap is set and may be changed.
        """
        assert self.config.moe_shared_expert_overlap
        assert self.cached_output is not None
        with torch.cuda.stream(self.stream):
            if self.use_shared_expert_gate:
                assert self.gate_score is not None
                output = self.cached_output * self.gate_score
                self.gate_score = None
            else:
                output = self.cached_output
            self.cached_output = None
        torch.cuda.current_stream().wait_stream(self.stream)
        return output


def set_tensor_grad_fn_sequence_sr(tensor, value):
    """
    Set sequence_sr for the grad_fn of a tensor to control the backward order.
    For older PyTorch version, do nothing (backward order is not changed).
    The bigger the value is, the earlier the grad_fn is scheduled.
    """
    if is_torch_min_version("2.2.0"):
        if tensor is not None and tensor.grad_fn is not None:
            tensor.grad_fn._set_sequence_nr(value)
    else:
        warnings.warn(
            "WARNING : PyTorch is too old to set sequence_sr and the performance may not "
            "be optimal. Please use PyTorch >= 2.2.0 for better performance."
        )


================================================
FILE: megatron/core/transformer/moe/token_dispatcher.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import logging
from abc import ABC, abstractmethod
from typing import List, Optional, Tuple

import torch

from megatron.core import utils
from megatron.core.config import is_experimental_enabled
from megatron.core.fusions.fused_indices_converter import fused_indices_to_multihot
from megatron.core.fusions.fused_pad_routing_map import fused_pad_routing_map
from megatron.core.jit import jit_fuser
from megatron.core.tensor_parallel import (
    all_to_all,
    gather_from_sequence_parallel_region,
    reduce_scatter_to_sequence_parallel_region,
)
from megatron.core.transformer.enums import CudaGraphScope
from megatron.core.transformer.moe.fused_a2a import (
    fused_combine,
    fused_dispatch,
    hybrid_ep_combine,
    hybrid_ep_dispatch,
    set_deepep_num_sms,
)
from megatron.core.transformer.moe.moe_utils import (
    ProcessGroupCollection,
    get_align_size_for_quantization,
    get_capacity,
    maybe_move_tensor_to_cpu,
    pad_routing_map,
    permute,
    sort_chunks_by_idxs,
    unpermute,
)
from megatron.core.transformer.moe.shared_experts import SharedExpertMLP
from megatron.core.transformer.transformer_config import TransformerConfig

""" We use the following notation throughout this file:
     H: hidden size
     B: micro batch size
     S: sequence length
     TP: tensor model parallel size
     EP: expert model parallel size
     num_local_tokens: S/TP*B
     num_global_tokens: num_local_tokens*TP*EP
"""

logger = logging.getLogger(__name__)


class MoETokenDispatcher:
    """
    MoE Token Dispatcher
    """

    def __init__(
        self, config: TransformerConfig, pg_collection: Optional[ProcessGroupCollection] = None
    ) -> None:
        """
        Initialize the MoE Token Dispatcher.

        Args:
            config (TransformerConfig): Configuration for the MoE layer.
            pg_collection (ProcessGroupCollection, optional): Process groups for MoE operations.
        """
        self.config = config
        self.shared_experts: Optional[SharedExpertMLP] = None

        self.ep_group = pg_collection.ep
        # use pg_collection.expt_tp_group as tensor parallel group in this module.
        self.tp_group = pg_collection.expt_tp
        self.tp_ep_group = pg_collection.tp_ep

        self.tp_size = utils.get_pg_size(self.tp_group)
        self.tp_rank = utils.get_pg_rank(self.tp_group)
        self.ep_size = utils.get_pg_size(self.ep_group)

        # Attributes that need to be captured in cudagraph. These attributes are returned
        # as cudagraph outputs when the cuda_graph_scope contains moe_preprocess.
        self.cudagraph_attrs = []
        self.valid_cudagraph_attrs = None

    @abstractmethod
    def dispatch_preprocess(
        self, tokens: torch.Tensor, routing_map: torch.Tensor, probs: torch.Tensor
    ):
        """Prepares tokens for dispatch without inter-device communication.

        This method should handle all local computations like tensor rearrangement and
        metadata extraction before the main communication step.

        Note:
            Try to avoid any communication here to enable optimal computation-communication
            overlapping when enabling communication overlap, since communications in the
            same stream runs sequentially and may get exposed.

        Args:
            tokens (torch.Tensor): Input tokens.
            routing_map (torch.Tensor): Token to expert mapping tensor.
            probs (torch.Tensor): The routing probability tensor, [num_tokens, num_experts].

        Returns:
            A tuple of preprocessed tokens and probabilities.
        """
        raise NotImplementedError("dispatch_preprocess function not implemented.")

    @abstractmethod
    def token_dispatch(self, hidden_states: torch.Tensor, probs: torch.Tensor):
        """Dispatches tokens to expert devices using communication.

        This method performs the main communication (e.g., All-to-All) to send
        tokens to the devices where their assigned experts reside.

        Args:
            hidden_states (torch.Tensor): Preprocessed hidden states to be dispatched.
            probs (torch.Tensor): Preprocessed probabilities for each token-expert pair.

        Returns:
            A tuple of dispatched tokens and probabilities.
        """
        raise NotImplementedError("token_dispatch function not implemented.")

    @abstractmethod
    def dispatch_postprocess(self, hidden_states: torch.Tensor, probs: torch.Tensor):
        """Performs local processing after token dispatch communication.

        This method handles post-communication tasks like token reordering and
        preparing metadata for the expert forward pass.

        Note:
            Try to avoid any communication here to enable optimal computation-communication
            overlapping when enabling communication overlap, since communications in the
            same stream runs sequentially and may get exposed.

        Args:
            hidden_states (torch.Tensor): Dispatched hidden states.
            probs (torch.Tensor): Dispatched probabilities.

        Returns:
            A tuple containing the permuted tokens for experts, the number of
            tokens per expert, and the permuted probabilities.
        """
        raise NotImplementedError("dispatch_postprocess function not implemented.")

    @abstractmethod
    def combine_preprocess(self, hidden_states):
        """Prepares expert outputs for the combine step.

        This method performs local computations on expert outputs before the
        communication step for combining them.

        Note:
            Try to avoid any communication here to enable optimal computation-communication
            overlapping when enabling communication overlap, since communications in the
            same stream runs sequentially and may get exposed.

        Args:
            hidden_states (torch.Tensor): The output tensor from the experts.

        Returns:
            The preprocessed expert output.
        """
        raise NotImplementedError("combine_preprocess function not implemented.")

    @abstractmethod
    def token_combine(self, hidden_states):
        """Combines expert outputs across devices using communication.

        This method aggregates expert outputs from different devices via
        communication (e.g., All-to-All or Reduce-Scatter).

        Args:
            hidden_states (torch.Tensor): Preprocessed output from experts.

        Returns:
            The combined expert outputs.
        """
        raise NotImplementedError("token_combine function not implemented.")

    @abstractmethod
    def combine_postprocess(self, hidden_states):
        """Performs local processing after token combine.

        This method handles post-communication tasks like unpermuting and
        reshaping to restore the original tensor structure.

        Note:
            Try to avoid any communication here to enable optimal computation-communication
            overlapping when enabling communication overlap, since communications in the
            same stream runs sequentially and may get exposed.

        Args:
            hidden_states (torch.Tensor): Combined hidden states from token combination

        Returns:
            The final output tensor.
        """
        raise NotImplementedError("combine_postprocess function not implemented.")

    def set_shared_experts(self, shared_experts):
        """Set shared expert to the dispatcher."""
        assert self.config.moe_shared_expert_overlap
        self.shared_experts = shared_experts


class MoEAllGatherTokenDispatcher(MoETokenDispatcher):
    """
    AllGather Based Token dispatcher.
    Note that this allgather spans the communication domain of TP*EP:
    """

    def __init__(
        self,
        num_local_experts: int,
        local_expert_indices: List[int],
        config: TransformerConfig,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ) -> None:
        """Initialize the AllGather based token dispatcher.

        Args:
            num_local_experts (int): Number of local experts.
            local_expert_indices (List[int]): Indices of local experts.
            config (TransformerConfig): Configuration for the MoE layer.
            pg_collection (ProcessGroupCollection, optional): Process groups for MoE operations.
        """
        super().__init__(config=config, pg_collection=pg_collection)
        self.num_local_experts = num_local_experts
        assert self.num_local_experts > 0, "Expected at least one expert"
        self.local_expert_indices = local_expert_indices
        assert len(self.local_expert_indices) > 0, "Expected at least one local expert index"
        self.router_topk = config.moe_router_topk
        self.add_bias = config.add_bias_linear

        # self.global_local_map: 2D tensor. A mask of mapping between global and local tokens where
        # each element is True if it's between the local_expert_indices. Only useful when cross
        # device token permutation is enabled and **AllGahter** is performed.
        self.global_local_map = None

        # Attributes that need to be captured in cudagraph. These attributes are returned
        # as cudagraph outputs when the cuda_graph_scope contains moe_preprocess.
        self.cudagraph_attrs = ['routing_map']

    def dispatch_preprocess(
        self, hidden_states: torch.Tensor, routing_map: torch.Tensor, probs: torch.Tensor
    ):
        """Reshapes hidden states and caches the routing map."""
        self.hidden_shape = hidden_states.shape
        # [S/TP, B, H] -> [S*B/TP, H]
        hidden_states = hidden_states.view(-1, self.hidden_shape[-1])
        self.routing_map = routing_map
        return hidden_states, probs

    def token_dispatch(self, hidden_states, probs):
        """Gathers tokens from all TP*EP ranks using AllGather."""

        # Permute the tokens across the expert parallel devices.
        if self.tp_size > 1 or self.ep_size > 1:
            ## local_indices calculation
            with torch.no_grad():
                # [num_local_tokens, num_experts] -> [num_global_tokens, num_experts], where:
                #     num_local_tokens=(S/TP)*B, num_global_tokens=S*B*EP
                self.routing_map = gather_from_sequence_parallel_region(
                    self.routing_map, group=self.tp_ep_group
                )

            ## local_probs calculation
            # max_prob: [S/TP*B, num_experts] -> global_probs: [S*B*EP, num_experts]
            probs = gather_from_sequence_parallel_region(probs, group=self.tp_ep_group)
            # Note that this allgather spans the communication domain of TP*EP.
            #  [(S/TP)*B, H] -> [((S/TP)*B)*(TP*EP), H] = [S*B*EP, H]
            hidden_states = gather_from_sequence_parallel_region(
                hidden_states, group=self.tp_ep_group, use_global_buffer=True
            )

        return hidden_states, probs

    def dispatch_postprocess(self, hidden_states, probs):
        """After gathering in token_dispatch, this method identifies tokens for local experts and
        permutes them for expert processing.
        """
        self.hidden_shape_before_permute = hidden_states.shape

        # The routing map and probs that for local experts.
        self.local_map = self.routing_map[
            :, self.local_expert_indices[0] : self.local_expert_indices[-1] + 1
        ].contiguous()
        # probs of global token assignment to local experts.
        self.local_probs = probs[
            :, self.local_expert_indices[0] : self.local_expert_indices[-1] + 1
        ].contiguous()

        tokens_per_expert = self.local_map.sum(dim=0).long().cpu()

        permuted_local_hidden_states, _, self.reversed_local_input_permutation_mapping, _, _ = (
            permute(
                hidden_states,
                self.local_map,
                num_out_tokens=tokens_per_expert.sum().item(),
                fused=self.config.moe_permute_fusion,
            )
        )

        self.local_probs = self.local_probs.T.contiguous().masked_select(
            self.local_map.T.contiguous()
        )
        self.routing_map = None
        return permuted_local_hidden_states, tokens_per_expert, self.local_probs

    def combine_preprocess(self, hidden_states):
        """
        Reverses token permutation to restore original ordering before reduction operations.

        This method unpermutes the expert outputs using the cached permutation mapping
        from the dispatch phase. The unpermutation operation restores tokens to their
        original sequence positions, preparing them for the subsequent reduction scatter
        operation that will aggregate results across ranks.
        """
        unpermuted_local_hidden = unpermute(
            hidden_states,
            self.reversed_local_input_permutation_mapping,
            restore_shape=self.hidden_shape_before_permute,
            routing_map=self.local_map,
            fused=self.config.moe_permute_fusion,
        )
        return unpermuted_local_hidden

    def token_combine(self, hidden_states):
        """Combines expert outputs using Reduce-Scatter.

        This method performs the ReduceScatter communication operation to collect expert
        outputs from their processing ranks and redistribute tokens back to the ranks that
        originally held them. This completes the expert processing
        communication pattern and prepares tokens for final unpermutation.
        """
        # Unpermute the tokens across ranks.
        if self.tp_size > 1 or self.ep_size > 1:
            hidden_states = reduce_scatter_to_sequence_parallel_region(
                hidden_states.to(self.local_probs.dtype), group=self.tp_ep_group
            ).to(hidden_states.dtype)
        return hidden_states

    def combine_postprocess(self, hidden_states):
        """Restores the original tensor shape."""
        return hidden_states.view(self.hidden_shape)


class MoEAlltoAllTokenDispatcher(MoETokenDispatcher):
    """
    AlltoAll-based token dispatcher.

    The workflow of AlltoAll token dispatcher is as follows:
    (1) preprocess: calculate necessary metadata for communication and permute
    (2) dispatch process: permute tokens
    (3) token dispatch: A2A(EP)
    (4) dispatch postprocess: AG(TP)->sort_chunk(if num_local_experts>1)
    (5) combine preprocess: sort_chunk(if num_local_experts>1)->RS(TP)
    (6) token combine: A2A(EP)
    (7) combine postprocess: unpermute tokens
    """

    # DtoH copies are performed on this stream for overlapping with the main stream.
    cuda_dtoh_stream = None

    def __init__(
        self,
        num_local_experts: int,
        local_expert_indices: List[int],
        config: TransformerConfig,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ) -> None:
        """
        Initialize the AlltoAll token dispatcher.

        Args:
            num_local_experts (int): Number of local experts on the current device.
            local_expert_indices (List[int]): Indices of local experts on the current device.
            config (TransformerConfig): Configuration for the transformer model.
            pg_collection (ProcessGroupCollection, optional): Process groups for MoE operations.
        """
        super().__init__(config=config, pg_collection=pg_collection)
        self.num_local_experts = num_local_experts
        assert config.num_moe_experts is not None
        self.num_experts = config.num_moe_experts
        assert self.num_local_experts > 0, "Expected at least one expert"
        self.local_expert_indices = local_expert_indices
        assert (
            len(self.local_expert_indices) == self.num_local_experts
        ), "Invalid local expert indices"
        for i in range(len(self.local_expert_indices) - 1):
            assert (
                self.local_expert_indices[i] == self.local_expert_indices[i + 1] - 1
            ), "local_expert_indices must be continuous"

        # [ep_size]. Represents the number of tokens sent by the current rank to other
        # EP ranks.
        self.input_splits = None
        # [ep_size]. Represents the number of tokens received by the current rank from
        # other EP ranks.
        self.output_splits = None
        # [tp_size]. Represents the number of tokens received by the current rank from
        # other TP ranks.
        self.output_splits_tp = None
        self.permute_idx_device = torch.device("cuda") if self.config.moe_permute_fusion else "cpu"
        input_chunk_idxs = torch.arange(
            self.num_experts * self.tp_size, device=self.permute_idx_device
        )
        # [num_local_experts, tp_size * ep_size]. Sort the input chunks by local experts.
        self.sort_input_by_local_experts = input_chunk_idxs.reshape(
            -1, self.num_local_experts
        ).T.ravel()
        # [tp_size * ep_size, num_local_experts]. Restore the output chunks by local experts.
        self.restore_output_by_local_experts = input_chunk_idxs.reshape(
            self.num_local_experts, -1
        ).T.ravel()

        # Token drop and padding.
        # Drop and pad the input to capacity.
        self.drop_and_pad = self.config.moe_pad_expert_input_to_capacity
        if self.drop_and_pad:
            assert self.config.moe_expert_capacity_factor is not None
            self.moe_expert_capacity_factor = self.config.moe_expert_capacity_factor
        self.capacity = None

        # A cuda stream synchronization is needed in during token permutation in some cases,
        # because there are several non-blocking DtoH data transfers called at
        # `self.cuda_dtoh_point`. The synchronization happens at `self.cuda_sync_point`, which is
        # decided based on the MoE and parallel settings. Valid points are "before_permutation_1",
        # "before_ep_alltoall", "before_permutation_2", "before_finish", and "no_sync".
        self.cuda_sync_point = "no_sync"
        self.cuda_sync_point_priority = {
            "before_permutation_1": 0,
            "before_ep_alltoall": 1,
            "before_permutation_2": 2,
            "before_finish": 3,
            "no_sync": 4,
        }
        self.cuda_dtoh_point = "before_permutation_1"
        if config.cuda_graph_impl != "none" and (
            CudaGraphScope.moe_preprocess in config.cuda_graph_scope
            or not self.config.cuda_graph_scope
        ):
            self.cuda_dtoh_point = "before_ep_alltoall"
        if MoEAlltoAllTokenDispatcher.cuda_dtoh_stream is None:
            MoEAlltoAllTokenDispatcher.cuda_dtoh_stream = torch.cuda.Stream()

        # Attributes that need to be captured in cudagraph. These attributes are returned
        # as cudagraph outputs when the cuda_graph_scope contains moe_preprocess.
        self.cudagraph_attrs = [
            'tokens_per_expert',
            'input_splits',
            'output_splits',
            'output_splits_tp',
            'num_out_tokens',
            'num_global_tokens_per_local_expert',
            'reversed_local_input_permutation_mapping',
            'routing_map',
        ]

        self.shared_experts = None

    def set_shared_experts(self, shared_experts):
        """Set shared expert to the dispatcher."""
        super().set_shared_experts(shared_experts)
        if shared_experts.use_shared_expert_gate:
            self.cudagraph_attrs.append('shared_experts.gate_score')
        self.cudagraph_attrs.append('shared_experts.cached_fc1_input')

    def preprocess(self, routing_map: torch.Tensor) -> torch.Tensor:
        """
        Preprocesses the token routing map for All-to-All communication and token permutation.

        This method computes the number of tokens assigned to each expert based on the routing_map.
        It also initializes necessary data structures for All-to-All communication, such as input
        and output splits, and the mapping between global tokens and local experts. This method
        should not call any DtoH data copying due to performance consideration. The necessary DtoH
        copies are made on the `self.cuda_dtoh_stream` at `self.cuda_dtoh_point`.

        Args:
            routing_map (torch.Tensor): The mapping of tokens to experts.

        Returns:
            A tensor with the number of tokens for each local expert.
        """
        if self.drop_and_pad:
            # Drop and pad the input to capacity.
            num_tokens = routing_map.size(0) * self.config.moe_router_topk
            self.capacity = get_capacity(
                num_tokens=num_tokens,
                num_experts=self.num_experts,
                capacity_factor=self.moe_expert_capacity_factor,
            )
            self.num_out_tokens = self.capacity * self.num_experts
            # [num_local_experts], number of tokens processed by each expert.
            num_tokens_per_local_expert = torch.full(
                (self.num_local_experts,),
                self.capacity * self.tp_size * self.ep_size,
                dtype=torch.long,
            )
            # [tp_size * ep_size, num_local_experts]. Represents the number of tokens sent
            # to each local expert by all ranks.
            self.num_global_tokens_per_local_expert = torch.full(
                (self.num_experts * self.tp_size,),
                self.capacity,
                dtype=torch.long,
                device=self.permute_idx_device,
            )
            return num_tokens_per_local_expert

        # [num_experts], number of tokens assigned to each expert from the current rank's input.
        num_local_tokens_per_expert = routing_map.sum(dim=0).long()

        if (
            self.config.moe_expert_capacity_factor is not None
            or self.config.moe_router_padding_for_quantization
        ):
            # When using token dropping or router padding, output size is dynamic.
            # Need to sync output size GPU->CPU before allocating output buffer
            self.num_out_tokens = num_local_tokens_per_expert.sum()
            self._maybe_update_cuda_sync_point("before_permutation_1")
        else:
            # For dropless training, output size is static (num_tokens * topk)
            # No explicit sync needed
            self.num_out_tokens = routing_map.size(0) * self.config.moe_router_topk
        if self.ep_size > 1 or self.tp_size > 1:
            # ===================================================
            # Calculate input_splits, output_splits for alltoall/allgather in variable size.
            # ===================================================
            # [ep_size]. Represents the number of tokens sent by the current rank to other
            # EP ranks.
            self.input_splits = num_local_tokens_per_expert.reshape(
                self.ep_size, self.num_local_experts
            ).sum(axis=1)
            # Gather the global distribution of tokens across ranks.
            # num_global_tokens_per_expert represents the number of tokens sent to each
            # expert by all ranks.
            # [tp_size, ep_size, num_experts]
            num_global_tokens_per_expert = (
                gather_from_sequence_parallel_region(
                    num_local_tokens_per_expert, group=self.tp_ep_group
                )
                .reshape(self.ep_size, self.tp_size, self.num_experts)
                .transpose(0, 1)
            )
            # [tp_size, ep_size, num_experts] -> [tp_size, ep_size, num_local_experts]
            num_global_tokens_per_local_expert = num_global_tokens_per_expert[
                :, :, self.local_expert_indices[0] : self.local_expert_indices[-1] + 1
            ].contiguous()
            # [tp_size, ep_size, num_local_experts] -> [tp_size, ep_size]
            num_global_tokens_per_rank = num_global_tokens_per_local_expert.sum(axis=2)
            # [tp_size, ep_size] -> [ep_size]
            # self.output_splits represents the number of tokens received by the current rank
            # from other EP rank.
            self.output_splits = num_global_tokens_per_rank[self.tp_rank]
            # [tp_size, ep_size] -> [tp_size]
            # self.output_splits_tp represents the number of tokens received by the current
            # rank from other TP rank.
            self.output_splits_tp = num_global_tokens_per_rank.sum(axis=1)
            # [tp_size, ep_size, num_local_experts] -> [num_local_experts]
            num_tokens_per_local_expert = num_global_tokens_per_local_expert.sum(dim=(0, 1))

            # A synchronization is needed before expert parallel AlltoAll communication
            # to get the `input_splits` and `output_splits` CPU values.
            self._maybe_update_cuda_sync_point("before_ep_alltoall")
        else:
            num_global_tokens_per_local_expert = num_local_tokens_per_expert.reshape(
                self.num_experts
            )
            num_tokens_per_local_expert = num_local_tokens_per_expert

            # A synchronization is needed before the returns
            # to get the `num_tokens_per_local_expert` CPU value.
            self._maybe_update_cuda_sync_point("before_finish")

        if self.num_local_experts > 1:
            # [tp_size * ep_size, num_local_experts]. Represents the number of tokens sent
            # to each local expert by all ranks.
            self.num_global_tokens_per_local_expert = num_global_tokens_per_local_expert.view(
                -1, self.num_local_experts
            )
            if not self.config.moe_permute_fusion:
                # A synchronization is needed before permutation 2
                # to get the `num_global_tokens_per_local_expert` CPU value.
                self._maybe_update_cuda_sync_point("before_permutation_2")

        assert (
            self.cuda_sync_point_priority[self.cuda_dtoh_point]
            <= self.cuda_sync_point_priority[self.cuda_sync_point]
        ), "cuda_sync_point must be after cuda_dtoh_point."
        return num_tokens_per_local_expert

    def dispatch_preprocess(
        self, hidden_states: torch.Tensor, routing_map: torch.Tensor, probs: torch.Tensor
    ):
        """Prepares hidden states and probabilities for dispatch.

        This method reshapes the hidden states, computes communication metadata,
        and permutes the tokens and probabilities before the All-to-All communication.

        Args:
            hidden_states (torch.Tensor): Input token embeddings.
            routing_map (torch.Tensor): The mapping of tokens to experts.
            probs (torch.Tensor): Routing probabilities.

        Returns:
            A tuple of permuted hidden states and probabilities.
        """
        # Preprocess: Get the metadata for communication, permutation and computation operations.
        self.hidden_shape = hidden_states.shape
        self.probs = probs
        self.routing_map = routing_map
        assert probs.dim() == 2, "Expected 2D tensor for probs"
        assert routing_map.dim() == 2, "Expected 2D tensor for token2expert mask"
        assert routing_map.dtype == torch.bool, "Expected bool tensor for mask"
        hidden_states = hidden_states.view(-1, self.hidden_shape[-1])

        if self.config.moe_router_padding_for_quantization:
            pad_multiple = get_align_size_for_quantization(self.config)
            if is_experimental_enabled() and self.config.moe_permute_fusion:
                self.routing_map = fused_pad_routing_map(self.routing_map, pad_multiple)
            else:
                self.routing_map = pad_routing_map(self.routing_map, pad_multiple)
        self.tokens_per_expert = self.preprocess(self.routing_map)

        if self.shared_experts is not None:
            self.shared_experts.pre_forward_comm(hidden_states.view(self.hidden_shape))

        # Permutation 1: input to AlltoAll input
        self.tokens_per_expert = self._maybe_dtoh_and_synchronize(
            "before_permutation_1", self.tokens_per_expert
        )
        self.hidden_shape_before_permute = hidden_states.shape
        (
            permutated_local_input_tokens,
            permuted_probs,
            self.reversed_local_input_permutation_mapping,
            _,
            _,
        ) = permute(
            hidden_states,
            self.routing_map,
            probs=probs,
            num_out_tokens=self.num_out_tokens,
            fused=self.config.moe_permute_fusion,
            drop_and_pad=self.drop_and_pad,
        )
        return permutated_local_input_tokens, permuted_probs

    def token_dispatch(self, permutated_local_input_tokens, permuted_probs):
        """
        Perform all-to-all communication for dispatching tokens.

        This method performs the all-to-all communication step to dispatch tokens across
        expert parallel ranks. It synchronizes metadata at the appropriate point before
        performing the communication.

        Args:
            permutated_local_input_tokens (torch.Tensor): Pre-permuted input tokens.
            permuted_probs (torch.Tensor): Pre-permuted probabilities.

        Returns:
            A tuple of tokens and probabilities after All-to-All.
        """

        # Perform expert parallel AlltoAll communication
        self.tokens_per_expert = self._maybe_dtoh_and_synchronize(
            "before_ep_alltoall", self.tokens_per_expert
        )
        global_input_tokens = all_to_all(
            self.ep_group, permutated_local_input_tokens, self.output_splits, self.input_splits
        )
        global_probs = all_to_all(
            self.ep_group, permuted_probs, self.output_splits, self.input_splits
        )

        return global_input_tokens, global_probs

    def dispatch_postprocess(self, global_input_tokens, global_probs):
        """Post-processes tokens after All-to-All communication.

        This involves an All-Gather in the tensor parallel dimension and sorting
        tokens by expert if there are multiple local experts.

        Args:
            global_input_tokens (torch.Tensor): Tokens after All-to-All.
            global_probs (torch.Tensor): Probabilities after All-to-All.

        Returns:
            A tuple of processed tokens, token counts per expert, and processed probabilities.
        """
        if self.shared_experts is not None:
            self.shared_experts.linear_fc1_forward_and_act(global_input_tokens)

        if self.tp_size > 1:
            if self.output_splits_tp is None:
                output_split_sizes = None
            else:
                output_split_sizes = self.output_splits_tp.tolist()
            global_input_tokens = gather_from_sequence_parallel_region(
                global_input_tokens, group=self.tp_group, output_split_sizes=output_split_sizes
            )
            global_probs = gather_from_sequence_parallel_region(
                global_probs, group=self.tp_group, output_split_sizes=output_split_sizes
            )

        # Permutation 2: Sort tokens by local expert.
        self.tokens_per_expert = self._maybe_dtoh_and_synchronize(
            "before_permutation_2", self.tokens_per_expert
        )
        if self.num_local_experts > 1:
            if self.drop_and_pad:
                global_input_tokens = (
                    global_input_tokens.view(
                        self.tp_size * self.ep_size,
                        self.num_local_experts,
                        self.capacity,
                        *global_input_tokens.size()[1:],
                    )
                    .transpose(0, 1)
                    .contiguous()
                    .flatten(start_dim=0, end_dim=2)
                )
                global_probs = (
                    global_probs.view(
                        self.tp_size * self.ep_size,
                        self.num_local_experts,
                        self.capacity,
                        *global_probs.size()[1:],
                    )
                    .transpose(0, 1)
                    .contiguous()
                    .flatten(start_dim=0, end_dim=2)
                )
            else:
                global_input_tokens, global_probs = sort_chunks_by_idxs(
                    global_input_tokens,
                    self.num_global_tokens_per_local_expert.ravel(),
                    self.sort_input_by_local_experts,
                    probs=global_probs,
                    fused=self.config.moe_permute_fusion,
                )

        tokens_per_expert = self._maybe_dtoh_and_synchronize(
            "before_finish", self.tokens_per_expert
        )
        self.tokens_per_expert = None
        return global_input_tokens, tokens_per_expert, global_probs

    def combine_preprocess(self, hidden_states):
        """Prepares hidden states for token combination after expert computations.

        This may involve un-sorting tokens and a Reduce-Scatter in the tensor
        parallel dimension.
        """
        # Unpermutation 2: Unsort tokens by local expert.
        if self.num_local_experts > 1:
            if self.drop_and_pad:
                hidden_states = (
                    hidden_states.view(
                        self.num_local_experts,
                        self.tp_size * self.ep_size,
                        self.capacity,
                        *hidden_states.size()[1:],
                    )
                    .transpose(0, 1)
                    .contiguous()
                    .flatten(start_dim=0, end_dim=2)
                )
            else:
                hidden_states, _ = sort_chunks_by_idxs(
                    hidden_states,
                    self.num_global_tokens_per_local_expert.T.ravel(),
                    self.restore_output_by_local_experts,
                    fused=self.config.moe_permute_fusion,
                )

        if self.tp_size > 1:
            if self.output_splits_tp is None:
                input_split_sizes = None
            else:
                input_split_sizes = self.output_splits_tp.tolist()
            hidden_states = reduce_scatter_to_sequence_parallel_region(
                hidden_states.to(self.probs.dtype),
                group=self.tp_group,
                input_split_sizes=input_split_sizes,
            ).to(hidden_states.dtype)

        return hidden_states

    def token_combine(
        self,
        hidden_states: torch.Tensor,
        async_finish: bool = True,
        allocate_on_comm_stream: bool = True,
    ):
        """Executes fused un-permutation and communication using DeepEP kernels.

        This method performs the inverse AlltoAll communication operation to collect expert
        outputs from their processing ranks and redistribute them back to the ranks that
        originally held the corresponding tokens. This completes the expert processing
        communication pattern and prepares tokens for final unpermutation.

        Args:
            hidden_states (torch.Tensor): Expert outputs ready for combination
            async_finish (bool): Whether to use asynchronous communication completion
            allocate_on_comm_stream (bool): Whether to allocate buffers on communication stream

        Returns:
            Tokens after the All-to-All communication for combining.
        """
        # Perform expert parallel AlltoAll communication
        # hidden_states: [SEQL, H] -> [SEQL, H/TP]
        permutated_local_input_tokens = all_to_all(
            self.ep_group, hidden_states, self.input_splits, self.output_splits
        )
        return permutated_local_input_tokens

    def combine_postprocess(self, permutated_local_input_tokens):
        """Finalizes token reconstruction with un-permutation and reshaping.

        This method un-permutes the tokens back to their original order,
        reshapes the tensor to its original shape, and adds the shared
        expert output if enabled.

        Args:
            permutated_local_input_tokens (torch.Tensor): Permuted hidden states from token combine.

        Returns:
            The final MoE layer output reshaped to its original dimensions.
        """
        if self.shared_experts is not None:
            self.shared_experts.linear_fc2_forward(permutated_local_input_tokens)
            self.shared_experts.post_forward_comm()

        # Unpermutation 1: AlltoAll output to output
        output = unpermute(
            permutated_local_input_tokens,
            self.reversed_local_input_permutation_mapping,
            restore_shape=self.hidden_shape_before_permute,
            routing_map=self.routing_map,
            fused=self.config.moe_permute_fusion,
            drop_and_pad=self.drop_and_pad,
        )

        # Reshape the output tensor
        output = output.view(self.hidden_shape)

        # Add shared experts output
        if self.shared_experts is not None:
            shared_expert_output = self.shared_experts.get_output()
            output += shared_expert_output
        return output

    def _maybe_update_cuda_sync_point(self, point: str):
        """
        Update the CUDA sync point if the priority of the new point is higher than the current
        sync point, which means the new point is reached earlier than the current sync point.
        """
        if (
            self.cuda_sync_point_priority[point]
            < self.cuda_sync_point_priority[self.cuda_sync_point]
        ):
            self.cuda_sync_point = point

    def _maybe_dtoh_and_synchronize(
        self, point: str, tokens_per_expert: Optional[torch.Tensor] = None
    ) -> torch.Tensor:
        """
        Move all possible GPU tensors to CPU and make a synchronization at the expected point.
        """
        if not self.drop_and_pad:
            if point == self.cuda_dtoh_point:
                # Move all possible GPU tensors to CPU at self.cuda_dtoh_point.
                on_side_stream = torch.cuda.current_stream() != self.cuda_dtoh_stream
                if on_side_stream:
                    self.cuda_dtoh_stream.wait_stream(torch.cuda.current_stream())
                with torch.cuda.stream(self.cuda_dtoh_stream):
                    # TODO: use MemcpyBatchAsync instead.
                    tokens_per_expert = maybe_move_tensor_to_cpu(
                        tokens_per_expert, record_stream=on_side_stream
                    )
                    self.input_splits = maybe_move_tensor_to_cpu(
                        self.input_splits, as_numpy=True, record_stream=on_side_stream
                    )
                    self.output_splits = maybe_move_tensor_to_cpu(
                        self.output_splits, as_numpy=True, record_stream=on_side_stream
                    )
                    self.output_splits_tp = maybe_move_tensor_to_cpu(
                        self.output_splits_tp, as_numpy=True, record_stream=on_side_stream
                    )
                    self.num_out_tokens = maybe_move_tensor_to_cpu(
                        self.num_out_tokens, record_stream=on_side_stream
                    )
                    if self.num_local_experts > 1 and not self.config.moe_permute_fusion:
                        self.num_global_tokens_per_local_expert = maybe_move_tensor_to_cpu(
                            self.num_global_tokens_per_local_expert, record_stream=on_side_stream
                        )
                self.d2h_event = self.cuda_dtoh_stream.record_event()

            if point == self.cuda_sync_point:
                # Synchronize with the DtoH stream at self.cuda_sync_point.
                self.d2h_event.synchronize()

        return tokens_per_expert


class _DispatchManager(ABC):
    """
    A manager class to handle dispatch and combine processes for MoE models.

    DispatcherManager handles token dispatching according to the routing_map of format
    [num_local_tokens, world_size, num_instances]. The routing_map is a 3D tensor where each
    element indicates whether a token should be sent to a specific rank.

    num_instances is the maximum number of tokens instances dispatched into a target rank, it
    can be the number of local experts, or the size of sub_group.
    """

    @abstractmethod
    def setup_metadata(self, routing_map: torch.Tensor, probs: torch.Tensor):
        """Set up metadata of routing_map and probs."""
        pass

    @abstractmethod
    def dispatch(self, hidden_states: torch.Tensor) -> torch.Tensor:
        """Dispatch the hidden_states according to the routing_map."""
        pass

    @abstractmethod
    def combine(self, hidden_states: torch.Tensor) -> torch.Tensor:
        """Combine the hidden_states after expert processing."""
        pass

    @abstractmethod
    def get_permuted_hidden_states_by_experts(self, hidden_states: torch.Tensor) -> torch.Tensor:
        """Get the permuted hidden states by instances."""
        pass

    @abstractmethod
    def get_restored_hidden_states_by_experts(self, hidden_states: torch.Tensor) -> torch.Tensor:
        """Get the restored hidden states by instances."""
        pass


class _HybridEPManager(_DispatchManager):
    """
    A manager class to handle fused all-to-all communication processes for MoE models using
    HybridEP backend. See https://github.com/deepseek-ai/DeepEP/tree/hybrid-ep for more details.

    The workflow of the HybridEP dispatcher is:
    (1) setup_metadata(): Process routing map and probabilities to prepare dispatch metadata
    (2) dispatch():
        - Permute tokens for communication, perform all-to-all communication,
        and permute tokens for experts in single step
    (3) combine():
        - Unpermute tokens for communication, perform all-to-all communication,
        and unpermute tokens for attention in single step
    """

    def __init__(
        self,
        group: torch.distributed.ProcessGroup,
        num_local_experts: int,
        num_experts: int,
        config: TransformerConfig,
    ):
        """
        Initialize the HybridEP dispatcher.

        Args:
            group (torch.distributed.ProcessGroup): The process group to use for communication.
                This should be the ETPxEP group.
            num_local_experts (int): The number of local experts.
            num_experts (int): The total number of experts in the group.
            config (TransformerConfig): The configuration for the transformer model.
        """
        self.group = group
        self.num_local_experts = num_local_experts
        self.num_experts = num_experts
        self.config = config
        self.permute_fusion = config.moe_permute_fusion
        self.capacity_factor = config.moe_expert_capacity_factor
        # Drop and pad the input to capacity.
        self.drop_and_pad = self.config.moe_pad_expert_input_to_capacity
        if self.drop_and_pad:
            assert self.capacity_factor is not None
        self.capacity = None
        # Actually the the up-bound for the number of tokens
        # after permute op, None means no up-bound, will cause a CPU sync
        self.num_permuted_tokens = None

        # Metadata
        self.token_probs: Optional[torch.Tensor] = None
        # Handle used for combine operation
        self.handle = None
        # Used for padding the output for each expert
        self.pad_multiple = None

        if hybrid_ep_dispatch is None:
            raise ImportError(
                "HybridEP is not installed. Please install HybridEP package from "
                "https://github.com/deepseek-ai/DeepEP/tree/hybrid-ep."
            )

    def setup_metadata(self, routing_map: torch.Tensor, probs: torch.Tensor):
        num_tokens = routing_map.shape[0]
        self.routing_map = routing_map.reshape(num_tokens, self.num_experts)
        self.token_probs = probs.reshape(num_tokens, self.num_experts)
        # Compute the capacity for each expert at the drop_and_pad mode
        if self.drop_and_pad:
            num_out_tokens = num_tokens * self.config.moe_router_topk
            # Drop and pad the input to capacity.
            self.capacity = get_capacity(
                num_tokens=num_out_tokens,
                num_experts=self.num_experts,
                capacity_factor=self.capacity_factor,
            )
            # In drop_and_pad mode, the number of tokens after the permute op
            # can be computed on the CPU
            self.num_permuted_tokens = self.capacity * self.group.size() * self.num_local_experts
            self.tokens_per_expert = torch.full(
                (self.num_local_experts,), self.capacity * self.group.size(), dtype=torch.long
            )

    def dispatch(
        self,
        hidden_states: torch.Tensor,
        async_finish: bool = True,
        allocate_on_comm_stream: bool = True,
    ) -> torch.Tensor:
        # HybridEP only supports float32 probs
        if self.token_probs.dtype != torch.float32:
            if self.token_probs.dtype in [torch.bfloat16, torch.float16]:
                logger.warning(
                    "HybridEP only supports float32 probs, please set --moe-router-dtype=fp32"
                )
            self.token_probs = self.token_probs.float()  # downcast or upcast
        if self.config.fp8 or self.config.fp4:
            self.pad_multiple = get_align_size_for_quantization(self.config)
        dispatched_hidden, self.dispatched_probs, _, tokens_per_expert, self.handle = (
            hybrid_ep_dispatch(
                x=hidden_states,
                routing_map=self.routing_map,
                probs=self.token_probs,
                group=self.group,
                num_local_experts=self.num_local_experts,
                num_sms_dispatch_api=self.config.moe_hybridep_num_sms,
                num_sms_combine_api=self.config.moe_hybridep_num_sms,
                num_permuted_tokens=self.num_permuted_tokens,
                pad_multiple=self.pad_multiple,
            )
        )

        if not self.drop_and_pad:
            self.tokens_per_expert = tokens_per_expert
            # self.num_permuted_tokens is necessary to allocate the output tensor for permute
            self.num_permuted_tokens = self.tokens_per_expert.sum()

        return dispatched_hidden

    def combine(
        self,
        hidden_states: torch.Tensor,
        async_finish: bool = True,
        allocate_on_comm_stream: bool = True,
    ) -> torch.Tensor:
        hidden_states = hybrid_ep_combine(
            x=hidden_states,
            handle=self.handle,
            num_permuted_tokens=self.num_permuted_tokens,
            pad_multiple=self.pad_multiple,
        )
        # Release the used handle/num_permuted_tokens which could change in each iteration.
        # For drop_and_pad mode, we don't need to reset the num_permuted_tokens and
        # num_dispatched_tokens, because their values never change.
        self.handle = None
        if not self.drop_and_pad:
            self.num_permuted_tokens = None
        return hidden_states

    def get_permuted_hidden_states_by_experts(self, hidden_states: torch.Tensor) -> torch.Tensor:
        return hidden_states, self.dispatched_probs

    def get_restored_hidden_states_by_experts(self, hidden_states: torch.Tensor) -> torch.Tensor:
        return hidden_states

    def get_number_of_tokens_per_expert(self) -> torch.Tensor:
        '''
        Get the number of tokens per expert.
        '''
        return self.tokens_per_expert


class _DeepepManager(_DispatchManager):
    """
    A manager class to handle fused all-to-all communication processes for MoE models using
    DeepEP backend. See https://github.com/deepseek-ai/deepep for more details.

    The workflow of the DeepEP dispatcher is:
    (1) setup_metadata(): Process routing map and probabilities to prepare dispatch metadata
    (2) dispatch():
        - Use fused kernel to permute tokens and perform all-to-all communication in single step
    (3) get_permuted_hidden_states_by_instances():
        - Convert routing map and probabilities to multihot format
        - Permute tokens using fused kernel
    (4) get_restored_hidden_states_by_instances():
        - Reverse permutation using fused kernel
    (5) combine():
        - Reverse process using fused kernel to unpermute and perform all-to-all in single step

    This implementation uses fused communication kernels (fused_dispatch/fused_combine) that
    combine permutation and communication operations for improved efficiency compared to
    separate permute+alltoall steps.
    """

    def __init__(
        self,
        group: torch.distributed.ProcessGroup,
        num_local_experts: int,
        router_topk: int,
        num_experts: int,
        config: TransformerConfig,
    ):
        """
        Initialize the DeepEP dispatcher.

        Args:
            group (torch.distributed.ProcessGroup): The process group to use for communication.
                This should be the ETPxEP group.
            num_local_experts (int): The number of local experts.
            router_topk (int): The number of experts for each token to select.
            num_experts (int): The total number of experts in the group.
            config (TransformerConfig): The configuration for the transformer model.
        """
        self.group = group
        self.num_local_experts = num_local_experts
        self.config = config

        self.router_topk = router_topk
        self.num_experts = num_experts
        self.router_dtype = config.moe_router_dtype
        self.capacity_factor = config.moe_expert_capacity_factor
        self.permute_fusion = config.moe_permute_fusion

        # Metadata
        self.token_indices: Optional[torch.Tensor] = None
        self.token_probs: Optional[torch.Tensor] = None
        # Handle used for combine operation
        self.handle = None

        if fused_dispatch is None:
            raise ImportError(
                "DeepEP is not installed. Please install DeepEP package from "
                "https://github.com/deepseek-ai/deepep."
            )
        set_deepep_num_sms(config.moe_deepep_num_sms)

    def setup_metadata(self, routing_map: torch.Tensor, probs: torch.Tensor):
        num_tokens = routing_map.shape[0]

        routing_map = routing_map.reshape(num_tokens, self.num_experts)
        probs = probs.reshape(num_tokens, self.num_experts)
        # Convert the format of routing map from multihot to indices.
        self.token_probs, self.token_indices = torch.topk(probs, self.router_topk, dim=-1)
        # Mask the indices of dropped tokens with -1
        if self.capacity_factor is not None:
            mask = self.token_probs == 0
            self.token_indices = self.token_indices.masked_fill(mask, -1)

    def dispatch(
        self,
        hidden_states: torch.Tensor,
        async_finish: bool = False,
        allocate_on_comm_stream: bool = False,
    ) -> torch.Tensor:
        # DeepEP only supports float32 probs
        if self.token_probs.dtype != torch.float32:
            if self.token_probs.dtype in [torch.bfloat16, torch.float16]:
                logger.warning(
                    "DeepEP only supports float32 probs, please set --moe-router-dtype=fp32"
                )
            self.token_probs = self.token_probs.float()  # downcast or upcast
        hidden_states, dispatched_indices, dispatched_probs, num_tokens_per_expert, handle = (
            fused_dispatch(
                hidden_states,
                self.token_indices,
                self.token_probs,
                self.num_experts,
                self.group,
                async_finish=async_finish,
                allocate_on_comm_stream=allocate_on_comm_stream,
            )
        )
        self.handle = handle
        self.tokens_per_expert = num_tokens_per_expert
        self.dispatched_indices = dispatched_indices
        self.dispatched_probs = dispatched_probs

        return hidden_states

    def _indices_to_multihot(self, indices, probs):
        """
        Converts a tensor of indices to a multihot vector.

        Args:
            indices (torch.Tensor): [num_tokens, topk] token indices, where -1 means masked out.
            probs (torch.Tensor): [num_tokens, topk] token probabilities.

        Returns:
            A tuple of (routing_map, probs), where routing_map is the multihot vector
            and probs is the multihot probabilities.
        """
        batch_size = indices.shape[0]
        multihot_routing_map = torch.zeros(
            (batch_size, self.num_local_experts), dtype=torch.long, device=indices.device
        )

        multihot_probs = torch.zeros(
            (batch_size, self.num_local_experts), dtype=torch.float, device=indices.device
        )

        mask = indices != -1
        valid_indices = indices[mask]
        row_indices = torch.arange(batch_size, device=indices.device).repeat_interleave(
            mask.sum(dim=1)
        )
        multihot_routing_map[row_indices, valid_indices] = 1
        multihot_probs[row_indices, valid_indices] = probs[mask]
        return multihot_routing_map.bool(), multihot_probs

    def get_number_of_tokens_per_expert(self) -> torch.Tensor:
        """
        Get the number of tokens per expert.
        """
        return self.tokens_per_expert

    def combine(
        self,
        hidden_states: torch.Tensor,
        async_finish: bool = False,
        allocate_on_comm_stream: bool = False,
    ) -> torch.Tensor:
        hidden_states, _ = fused_combine(
            hidden_states,
            self.group,
            self.handle,
            async_finish=async_finish,
            allocate_on_comm_stream=allocate_on_comm_stream,
        )
        # Release the handle after combine operation
        self.handle = None
        return hidden_states

    def _pad_routing_map(
        self, routing_map: torch.Tensor, tokens_per_expert: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Pad the routing map to the nearest multiple of the pad_multiple.
        """
        pad_multiple = get_align_size_for_quantization(self.config)

        num_input_tokens = routing_map.shape[0]
        target_tokens_per_expert = (
            torch.ceil(tokens_per_expert / pad_multiple) * pad_multiple
        ).long()

        # Check if there are enough tokens to pad
        enough_tokens_to_pad = torch.all(target_tokens_per_expert <= num_input_tokens)
        if not enough_tokens_to_pad:
            logger.warning(
                "Not enough tokens to pad. The total number of tokens received in this rank "
                "is smaller than the target number of tokens for each expert. "
                "Falling back to explicit padding within GroupedMLP"
            )
        else:
            if is_experimental_enabled() and self.permute_fusion:
                from megatron.core.fusions.fused_pad_routing_map import fused_pad_routing_map

                routing_map = fused_pad_routing_map(routing_map, pad_multiple)
            else:
                routing_map = pad_routing_map(routing_map, pad_multiple)
            tokens_per_expert = target_tokens_per_expert
        return routing_map, tokens_per_expert

    def get_permuted_hidden_states_by_experts(self, hidden_states: torch.Tensor) -> torch.Tensor:
        if is_experimental_enabled() and self.permute_fusion:
            self.dispatched_routing_map, self.dispatched_probs = fused_indices_to_multihot(
                self.dispatched_indices, self.dispatched_probs, self.num_local_experts
            )
        else:
            self.dispatched_routing_map, self.dispatched_probs = self._indices_to_multihot(
                self.dispatched_indices, self.dispatched_probs
            )
        if self.config.moe_router_padding_for_quantization:
            self.dispatched_routing_map, self.tokens_per_expert = self._pad_routing_map(
                self.dispatched_routing_map, self.tokens_per_expert
            )

        self.hidden_shape_before_permute = hidden_states.shape
        assert self.dispatched_probs.dtype == torch.float32, "DeepEP only supports float32 probs"
        (
            hidden_states,
            permuted_probs,
            self.reversed_mapping_for_combine,
            self.pad_offsets,
            self.tokens_per_expert,
        ) = permute(
            hidden_states,
            self.dispatched_routing_map,
            probs=self.dispatched_probs,
            num_out_tokens=self.tokens_per_expert.sum().item(),
            fused=self.permute_fusion,
            tokens_per_expert=self.tokens_per_expert,
            align_size=get_align_size_for_quantization(self.config),
        )
        if self.router_dtype == "fp64":
            permuted_probs = permuted_probs.to(torch.float64)
        return hidden_states, permuted_probs

    def get_restored_hidden_states_by_experts(self, hidden_states: torch.Tensor) -> torch.Tensor:
        hidden_states = unpermute(
            hidden_states,
            self.reversed_mapping_for_combine,
            restore_shape=self.hidden_shape_before_permute,
            routing_map=self.dispatched_routing_map,
            fused=self.permute_fusion,
            pad_offsets=self.pad_offsets,
        )
        return hidden_states


class MoEFlexTokenDispatcher(MoETokenDispatcher):
    """A flexible token dispatcher that abstracts the underlying tensor and expert
    parallelism. It uses a single communication group over all TP and EP ranks,
    making the dispatch logic independent of the specific parallelism strategy.
    """

    def __init__(
        self,
        num_local_experts: int,
        local_expert_indices: List[int],
        config: TransformerConfig,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ):
        """
        Initialize the Flex token dispatcher.

        Args:
            num_local_experts (int): Number of local experts on the current device.
            local_expert_indices (List[int]): Indices of local experts on the current device.
            config (TransformerConfig): Configuration for the transformer model.
            pg_collection (ProcessGroupCollection, optional): Process groups for MoE operations.
        """
        super().__init__(config=config, pg_collection=pg_collection)

        self.num_local_experts = num_local_experts
        self.local_expert_indices = local_expert_indices
        assert self.tp_size * self.ep_size > 1, "Flex token dispatcher requires TPxEP > 1"
        if self.config.moe_flex_dispatcher_backend == "deepep":
            self._comm_manager = _DeepepManager(
                group=self.tp_ep_group,
                num_local_experts=self.num_local_experts,
                router_topk=self.tp_size * self.config.moe_router_topk,
                num_experts=self.tp_size * self.config.num_moe_experts,
                config=self.config,
            )
            self.cudagraph_attrs = ['_comm_manager.token_probs', '_comm_manager.token_indices']
        elif self.config.moe_flex_dispatcher_backend == "hybridep":
            self._comm_manager = _HybridEPManager(
                group=self.tp_ep_group,
                num_local_experts=self.num_local_experts,
                num_experts=self.tp_size * self.config.num_moe_experts,
                config=self.config,
            )
            self.cudagraph_attrs = ['_comm_manager.token_probs', '_comm_manager.routing_map']
        else:
            raise ValueError(
                f"Invalid backend: {self.config.moe_flex_dispatcher_backend}"
                "Please set --moe-flex-dispatcher-backend=deepep or "
                "--moe-flex-dispatcher-backend=hybridep"
            )

    def set_shared_experts(self, shared_experts):
        raise NotImplementedError(
            "Shared expert overlap is not supported in Flex Token Dispatcher."
        )

    def _initialize_metadata(self, routing_map: torch.Tensor, probs: torch.Tensor) -> torch.Tensor:
        """
        Initialize the routing map and probs to a unified format covering the TPxEP group.
        This design decouples the communication group from underlying model parallelism groups,
        such that the communication strategy of tokens can be agnostic of TP size and EP size.

        This function expands the routing_map from shape [num_local_tokens, num_experts] to
        [num_local_tokens, world_size, num_local_experts]. Each element in the routing_map
        indicates whether a token should be sent to a specific rank. Specifically, the
        routing_map is replicated across TP group since each TP ranks in a TP group should
        receive the same tokens.
        """
        num_local_tokens = routing_map.shape[0]
        world_size = self.tp_size * self.ep_size
        # Organize routing map and probs to [num_local_tokens, world_size, num_local_experts]
        routing_map = (
            routing_map.reshape(num_local_tokens, self.ep_size, 1, self.num_local_experts)
            .expand(-1, -1, self.tp_size, -1)
            .reshape(num_local_tokens, world_size, self.num_local_experts)
        ).contiguous()
        probs = (
            probs.reshape(num_local_tokens, self.ep_size, 1, self.num_local_experts)
            .expand(-1, -1, self.tp_size, -1)
            .reshape(num_local_tokens, world_size, self.num_local_experts)
        ).contiguous()
        return routing_map, probs

    @jit_fuser
    def dispatch_preprocess(
        self, hidden_states: torch.Tensor, routing_map: torch.Tensor, probs: torch.Tensor
    ):
        """Initializes routing metadata and prepares tensors for fused dispatch.

        This method reshapes input tensors and processes routing information into a
        unified format, where the routing map is expanded to cover the TPxEP communication domain,
        enabling the token dispatch logic to be agnostic to parallelism strategies.

        Args:
            hidden_states (torch.Tensor): Input hidden states to be processed
            routing_map (torch.Tensor): Map indicating which expert each token should be routed to
            probs (torch.Tensor): Routing probabilities for each token-expert pair

        Returns:
            A tuple of reshaped hidden states and token probabilities.
        """
        self.hidden_shape = hidden_states.shape
        hidden_states = hidden_states.view(-1, self.hidden_shape[-1])

        # Initialize metadata
        routing_map, probs = self._initialize_metadata(routing_map, probs)

        self._comm_manager.setup_metadata(routing_map, probs)
        return hidden_states, self._comm_manager.token_probs

    def token_dispatch(
        self,
        hidden_states: torch.Tensor,
        probs: Optional[torch.Tensor] = None,
        async_finish: bool = True,
        allocate_on_comm_stream: bool = True,
    ):
        """
        Execute fused permutation and AlltoAll communication.

        This method currently leverages DeepEP's fused dispatch kernel, which combines token
        permutation and AlltoAll communication into a single optimized operation.
        The fused approach reduces memory bandwidth requirements and enables better
        overlap between computation and communication operations.

        Args:
            hidden_states (torch.Tensor): Preprocessed hidden states to be dispatched
            probs (torch.Tensor): Routing probabilities (unused in current implementation)
            async_finish (bool): Whether to use asynchronous communication completion
            allocate_on_comm_stream (bool): Whether to allocate buffers on communication stream

        Returns:
            A tuple of dispatched tokens and probabilities.
        """
        return (
            self._comm_manager.dispatch(hidden_states, async_finish, allocate_on_comm_stream),
            self._comm_manager.dispatched_probs,
        )

    def dispatch_postprocess(self, hidden_states: torch.Tensor, probs: torch.Tensor):
        """Converts dispatched tokens to a per-expert format for expert processing.

        This method transforms the output of the fused dispatch into the tensor
        organization required for the expert computation.

        Args:
            hidden_states (torch.Tensor): Hidden states after fused dispatch
            probs (torch.Tensor): Routing probabilities after fused dispatch

        Returns:
            A tuple of permuted tokens, token counts per expert, and permuted probabilities.
        """
        global_input_tokens, permuted_probs = (
            self._comm_manager.get_permuted_hidden_states_by_experts(hidden_states)
        )
        tokens_per_expert = self._comm_manager.get_number_of_tokens_per_expert()
        return global_input_tokens, tokens_per_expert, permuted_probs

    def combine_preprocess(self, hidden_states: torch.Tensor):
        """Pre-processes hidden states before combining them after expert processing.

        This method restores the hidden states to their original ordering before expert processing
        by using the communication manager's restoration function.
        """
        hidden_states = self._comm_manager.get_restored_hidden_states_by_experts(hidden_states)
        return hidden_states

    def token_combine(
        self,
        hidden_states: torch.Tensor,
        async_finish: bool = True,
        allocate_on_comm_stream: bool = True,
    ):
        """Executes fused un-permutation and communication using DeepEP kernels.

        This is the inverse of the `token_dispatch` operation.

        Args:
            hidden_states (torch.Tensor): Expert outputs ready for combination
            async_finish (bool): Whether to use asynchronous communication completion
            allocate_on_comm_stream (bool): Whether to allocate buffers on communication stream

        Returns:
            Combined tokens after fused un-permutation and communication.
        """
        return self._comm_manager.combine(hidden_states, async_finish, allocate_on_comm_stream)

    def combine_postprocess(self, hidden_states: torch.Tensor):
        """
        Restores the original tensor shape and finalizes the MoE layer output.

        This method performs the final step of the MoE token processing pipeline
        by reshaping the combined tokens back to their original input dimensions.

        Args:
            hidden_states (torch.Tensor): Combined tokens.

        Returns:
            The final MoE layer output reshaped to its original dimensions.
        """
        return hidden_states.view(self.hidden_shape)


================================================
FILE: megatron/core/transformer/moe/token_dispatcher_inference.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

"""
CUDA-graph-compatible token dispatcher for inference.

This dispatcher is only used during CUDA-graphed inference iterations. It replaces
AlltoAll with AllGather/ReduceScatter for token exchange, keeping all metadata
GPU-resident to avoid host synchronizations that would break CUDA graph capture.

Supports latency-optimized NVLS collectives (multimem all-gather/reduce-scatter)
on Hopper+ GPUs with BF16, with automatic fallback to NCCL.
"""

from typing import List, Optional

import torch

from megatron.core.inference.communication.torch_symm_triton import (
    are_tensors_nvls_eligible,
    multimem_all_gather_fused,
    multimem_reduce_scatter,
)
from megatron.core.inference.symmetric_memory import SymmetricMemoryManager
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.tensor_parallel import (
    gather_from_sequence_parallel_region,
    reduce_scatter_to_sequence_parallel_region,
)
from megatron.core.transformer.moe.token_dispatcher import MoEAllGatherTokenDispatcher
from megatron.core.transformer.transformer_config import TransformerConfig


class InferenceCUDAGraphTokenDispatcher(MoEAllGatherTokenDispatcher):
    """
    CUDA-graph-compatible AllGather token dispatcher for inference.

    Only used during CUDA-graphed inference iterations. Swapped in by
    MoELayer.set_inference_cuda_graphed_iteration() before graph capture
    and swapped out by MoELayer.unset_inference_cuda_graphed_iteration() after.

    Key features:
    - AllGather/ReduceScatter instead of AlltoAll for CUDA graph compatibility
    - GPU-resident metadata (no host synchronization)
    - NVLS collectives on Hopper+ with automatic NCCL fallback
    """

    def __init__(
        self,
        num_local_experts: int,
        local_expert_indices: List[int],
        config: TransformerConfig,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ) -> None:
        """
        Initialize the InferenceCUDAGraphTokenDispatcher.

        Args:
            num_local_experts: Number of experts on this rank.
            local_expert_indices: Global indices of experts on this rank.
            config: Transformer configuration.
            pg_collection: Process group collection for distributed ops.
        """
        super().__init__(
            num_local_experts=num_local_experts,
            local_expert_indices=local_expert_indices,
            config=config,
            pg_collection=pg_collection,
        )
        self.topk = config.moe_router_topk

        self.triton_nvls_kernels_allowed = not self.config.inference_disable_triton_nvls_kernels

    def _maybe_allocate_ag_buffers(
        self, routing_map: torch.Tensor, probs: torch.Tensor, hidden_states: torch.Tensor
    ) -> dict:
        """Allocate a single symmetric memory output buffer for fused all-gather.

        Creates one contiguous symmetric memory buffer sized for the gathered
        (global) routing_map, probs, and hidden_states, then returns sliced views
        into it. This allows a single fused NVLS all-gather kernel to write all
        three outputs in one launch.

        Args:
            routing_map (torch.Tensor): Local routing map, shape [local_tokens, topk].
                Boolean or integer tensor mapping each token to its selected experts.
            probs (torch.Tensor): Local routing probabilities, shape [local_tokens, topk].
                Normalized weights for each token's selected experts.
            hidden_states (torch.Tensor): Local hidden states, shape [local_tokens, hidden_dim].

        Returns:
            dict: A dictionary with the following keys:
                - "handle": Symmetric memory handle for NVLS ops, or None if
                  symmetric memory is unavailable.
                - "routing_map": Raw byte view for the gathered routing map output.
                - "routing_map_offset": Byte offset of routing_map within the buffer.
                - "probs": Raw byte view for the gathered probs output.
                - "probs_offset": Byte offset of probs within the buffer.
                - "hidden_states": Raw byte view for the gathered hidden states output.
                - "hidden_states_offset": Byte offset of hidden_states within the buffer.
                When allocation fails, all tensor views are None and offsets are 0.
        """
        _NONE = {
            "handle": None,
            "routing_map": None,
            "routing_map_offset": 0,
            "probs": None,
            "probs_offset": 0,
            "hidden_states": None,
            "hidden_states_offset": 0,
        }

        local_tokens = probs.size(0)
        global_tokens = local_tokens * self.ep_size
        topk = probs.size(-1)
        hidden_dim = hidden_states.size(-1)

        result = SymmetricMemoryManager.get_buffer(
            "ep", process_group=self.ep_group
        ).maybe_get_tensors(
            [
                (global_tokens * topk, routing_map.dtype),
                (global_tokens * topk, probs.dtype),
                (global_tokens * hidden_dim, hidden_states.dtype),
            ]
        )

        if result["handle"] is None:
            return _NONE

        (rm_buf, rm_off), (p_buf, p_off), (hs_buf, hs_off) = result["tensors"]
        return {
            "handle": result["handle"],
            "routing_map": rm_buf,
            "routing_map_offset": rm_off,
            "probs": p_buf,
            "probs_offset": p_off,
            "hidden_states": hs_buf,
            "hidden_states_offset": hs_off,
        }

    def _maybe_allocate_rs_buffer(self, x: torch.Tensor) -> dict:
        """Allocate a symmetric memory buffer for reduce-scatter input.

        The buffer has the same shape and dtype as x so that x can be copied
        into it before the NVLS reduce-scatter kernel.

        Args:
            x (torch.Tensor): The global hidden states to be reduce-scattered,
                shape [global_tokens, hidden_dim].

        Returns:
            dict: A dictionary with keys "handle" (symmetric memory handle, or
                None if unavailable) and "tensor" (the allocated buffer, or None).
        """
        symm_mem_buffer = SymmetricMemoryManager.get_buffer(
            "ep", process_group=self.ep_group
        ).maybe_get_tensor(list(x.size()), dtype=x.dtype)
        return symm_mem_buffer

    def token_dispatch(self, hidden_states, probs):
        """Gathers tokens from all EP ranks using AllGather.

        Performs all-gather on routing_map (stored in self.routing_map), probs,
        and hidden_states so that every rank holds the full global view.
        Uses latency-optimized fused NVLS multimem_all_gather on Hopper+ GPUs
        with BF16 when symmetric memory is available. Falls back to NCCL otherwise.

        Args:
            hidden_states (torch.Tensor): Local hidden states,
                shape [local_tokens, hidden_dim].
            probs (torch.Tensor): Local routing probabilities,
                shape [local_tokens, topk]. Normalized weights for each token's
                selected experts.

        Returns:
            tuple: (hidden_states, probs) gathered across all EP ranks.
                - hidden_states (torch.Tensor): Shape [global_tokens, hidden_dim].
                - probs (torch.Tensor): Shape [global_tokens, topk].
                Also updates self.routing_map in-place to the gathered
                shape [global_tokens, topk].
        """
        if self.ep_size == 1:
            return hidden_states, probs

        # 1. Check inputs only: if inputs are 16-byte divisible,
        #  outputs (world_size * input) are too.
        nvls_eligible = self.triton_nvls_kernels_allowed and are_tensors_nvls_eligible(
            hidden_states, probs, self.routing_map
        )
        ag_buffers = None

        if nvls_eligible:
            # 2. Now attempt to allocate symmetric memory buffers for
            # all-gather outputs. If allocation fails, fallback to NCCL.
            ag_buffers = self._maybe_allocate_ag_buffers(self.routing_map, probs, hidden_states)

        # 3. Can use NVLS if eligible and buffers allocated successfully (handle is not None)
        can_use_nvls = nvls_eligible and ag_buffers["handle"] is not None

        if can_use_nvls:
            # Capture shapes for reshaping after all-gather
            # Output shape: [local_tokens * ep_size, dim]
            local_tokens = probs.size(0)
            global_tokens = local_tokens * self.ep_size
            topk = probs.size(1)
            hidden_dim = hidden_states.size(1)
            routing_map_dtype = self.routing_map.dtype
            probs_dtype = probs.dtype
            hidden_dtype = hidden_states.dtype

            # Fused NVLS all-gather: single kernel launch + single barrier for all 3 tensors
            multimem_all_gather_fused(
                ag_buffers["routing_map"].view(
                    torch.bfloat16
                ),  # .view does not change the underlying data
                self.routing_map.view(torch.bfloat16),
                ag_buffers["routing_map_offset"],
                ag_buffers["probs"].view(torch.bfloat16),
                probs.view(torch.bfloat16),
                ag_buffers["probs_offset"],
                ag_buffers["hidden_states"].view(torch.bfloat16),
                hidden_states.view(torch.bfloat16),
                ag_buffers["hidden_states_offset"],
                ag_buffers["handle"],
            )
            self.routing_map = (
                ag_buffers["routing_map"].view(routing_map_dtype).view(global_tokens, topk)
            )
            probs = ag_buffers["probs"].view(probs_dtype).view(global_tokens, topk)
            hidden_states = (
                ag_buffers["hidden_states"].view(hidden_dtype).view(global_tokens, hidden_dim)
            )
        else:
            # Fallback to NCCL for all tensors
            with torch.no_grad():
                self.routing_map = gather_from_sequence_parallel_region(
                    self.routing_map, group=self.tp_ep_group
                )
            probs = gather_from_sequence_parallel_region(probs, group=self.tp_ep_group)
            hidden_states = gather_from_sequence_parallel_region(
                hidden_states, group=self.tp_ep_group
            )

        return hidden_states, probs

    def dispatch_postprocess(self, hidden_states, probs):
        """Pass-through: returns inputs directly without permutation.

        Unlike the training dispatcher, this does not permute tokens or compute
        tokens_per_expert. The downstream InferenceGroupedMLP (FlashInfer /
        CUTLASS fused MoE kernel) operates directly on the routing map stored
        in self.routing_map.

        Args:
            hidden_states (torch.Tensor): Gathered hidden states,
                shape [global_tokens, hidden_dim].
            probs (torch.Tensor): Gathered routing probabilities,
                shape [global_tokens, topk].

        Returns:
            tuple: (hidden_states, tokens_per_expert, probs) where
                tokens_per_expert is always None.
        """
        return hidden_states, None, probs

    def combine_preprocess(self, expert_output):
        """Pass-through: InferenceGroupedMLP already produces unpermuted output.

        No unpermutation is needed because dispatch_postprocess did not permute
        the tokens in the first place.

        Args:
            expert_output (torch.Tensor): Output from InferenceGroupedMLP,
                shape [global_tokens, hidden_dim].

        Returns:
            torch.Tensor: The input tensor unchanged.
        """
        return expert_output

    def token_combine(self, hidden_states):
        """Combines expert outputs across EP ranks using Reduce-Scatter.

        Reduces the global expert output (summing contributions from each rank)
        and scatters the result so each rank receives its local token slice.
        Uses latency-optimized NVLS multimem_reduce_scatter on Hopper+ GPUs
        with BF16 when symmetric memory is available. Falls back to NCCL otherwise.

        Args:
            hidden_states (torch.Tensor): Combined expert output after routing
                weights have been applied, shape [global_tokens, hidden_dim].

        Returns:
            torch.Tensor: Local slice of the reduced output,
                shape [local_tokens, hidden_dim] where
                local_tokens = global_tokens // ep_size.
        """
        if self.ep_size == 1:
            return hidden_states

        # Compute output shape first — check NVLS eligibility on the output,
        # since if the smaller output is 16-byte divisible, the input is too.
        output_shape = list(hidden_states.size())
        output_shape[0] = hidden_states.size(0) // self.ep_size
        output = torch.empty(output_shape, dtype=hidden_states.dtype, device=hidden_states.device)

        # Check output only: if output is 16-byte divisible, input (world_size * output) is too.
        nvls_eligible = (
            self.triton_nvls_kernels_allowed
            and output.dtype in (torch.bfloat16, torch.float32)
            and are_tensors_nvls_eligible(output)
        )
        rs_buffer = None

        if nvls_eligible:
            rs_buffer = self._maybe_allocate_rs_buffer(hidden_states)

        can_use_nvls = nvls_eligible and rs_buffer["handle"] is not None

        if can_use_nvls:
            # Copy input to symmetric memory for reduce-scatter
            rs_buffer["tensor"].copy_(hidden_states)

            # Use latency-optimized NVLS reduce-scatter
            multimem_reduce_scatter(output, rs_buffer["tensor"], rs_buffer["handle"])
            return output.to(torch.bfloat16)
        else:
            # Fallback to NCCL
            hidden_states = reduce_scatter_to_sequence_parallel_region(
                hidden_states, group=self.tp_ep_group
            )
            return hidden_states.to(torch.bfloat16)


================================================
FILE: megatron/core/transformer/moe/upcycling_utils.py
================================================
# Copyright (c) 2022-2024, NVIDIA CORPORATION.  All rights reserved.
""" Helpers for converting a dense model to a MoE model in runtime """
import copy
from enum import Enum

import torch

from megatron.core.transformer.moe.experts import SequentialMLP, TEGroupedMLP
from megatron.core.transformer.moe.moe_layer import BaseMoELayer

ExpertsType = Enum('ExpertsType', ('SequentialMLP', 'TEGroupedMLP'))
ActivationFuncName = Enum('ActivationFuncName', ('gelu', 'silu', 'squared_relu'))


def _get_keys_endswith(model, suffix):
    """
    Retrieve keys from the model that end with a specified suffix.
    """
    return [k for k in model if k.endswith(suffix)]


def _find_submodule(model, submodule_name):
    """
    Find sub-module in model
    """
    for name, submodule in model.named_modules():
        if name.endswith("." + submodule_name) or name == submodule_name:
            return submodule
    return None


def _get_config(moe_model, dense_model):
    """
    Get various params from dense state dict and moe model.
    """
    # Find mlp sub-module in moe model and get relatived args
    mlp = _find_submodule(moe_model, "mlp")
    assert mlp is not None, f'can not find mlp layer in moe model: {moe_model}'
    assert isinstance(mlp, BaseMoELayer), (
        f'The mlp layer {type(mlp)} is not supported by upcycling.'
        f"Please use mlp layer inherited from {type(BaseMoELayer)}"
    )
    num_local_experts = mlp.num_local_experts
    num_experts = mlp.config.num_moe_experts
    gated_linear_unit = mlp.config.gated_linear_unit
    moe_router_topk = mlp.config.moe_router_topk
    moe_router_pre_softmax = mlp.config.moe_router_pre_softmax
    moe_ffn_hidden_size = mlp.config.moe_ffn_hidden_size
    func_name = mlp.config.activation_func.__name__

    if func_name == "gelu":
        activation_func_name = ActivationFuncName.gelu
    elif func_name == "silu":
        activation_func_name = ActivationFuncName.silu
    elif func_name == "squared_relu":
        activation_func_name = ActivationFuncName.squared_relu
    else:
        raise ValueError(
            f"The activation func is not supported by upcycling."
            + f"Valid options are: {list(ActivationFuncName.__members__.keys())}"
            + f"But got {func_name} "
        )
    ep_rank = mlp.ep_group.rank()

    # Find experts sub-module in moe model and get relatived args
    experts = _find_submodule(mlp, "experts")
    assert (
        experts is not None
    ), f'The model is not supported by upcycling. Can not find experts in {mlp}'
    if isinstance(experts, SequentialMLP):
        experts_type = ExpertsType.SequentialMLP
    elif isinstance(experts, TEGroupedMLP):
        experts_type = ExpertsType.TEGroupedMLP
    else:
        raise TypeError(
            f"The experts type {type(experts)} is not supported by upcycling."
            f" Valid options are: {list(ExpertsType.__members__.keys())}"
        )

    # Find mlp sub-module in dense model and get relatived args
    dense_mlp = _find_submodule(dense_model, "mlp")
    assert dense_mlp is not None, f'can not find mlp layer in moe model: {moe_model}'
    dense_ffn_hidden_size = dense_mlp.config.ffn_hidden_size

    # calc granularity and expansion_rate
    assert (
        dense_ffn_hidden_size % moe_ffn_hidden_size == 0
    ), "The ffn hidden size of dense model must be divisible by ffn hidden size of moe model."
    granularity = dense_ffn_hidden_size // moe_ffn_hidden_size
    assert (
        num_experts % granularity == 0
    ), "The number of experts must be divisible by granularity for upcycling"
    expansion_rate = num_experts // granularity

    return (
        num_local_experts,
        moe_router_topk,
        granularity,
        expansion_rate,
        experts_type,
        gated_linear_unit,
        activation_func_name,
        moe_router_pre_softmax,
        ep_rank,
    )


def _convert_to_moe_state_dict(moe_model, dense_model):
    """
    Convert a dense model's state_dict to a MoE model's state_dict.

    This function takes the state dictionary of a dense model and modifies it to fit the
    structure required by a Mixture of Experts model. It handles the necessary
    transformations for weights and biases specific to the MoE architecture.

    Args:
        state_dict (dict): The dense model's state_dict.
        moe_model (nn.Module): The MoE model instance from which to get the submodule
                               and state_dict, must be a model without FP16 and/or
                               DDP wrapper.

    Returns:
        dict: The converted MoE model state_dict, ready for use in the MoE architecture.
    """
    (
        num_local_experts,
        moe_router_topk,
        granularity,
        expansion_rate,
        experts_type,
        gated_linear_unit,
        activation_func_name,
        moe_router_pre_softmax,
        ep_rank,
    ) = _get_config(moe_model, dense_model)

    def _process_router_param(value):
        value = value.data.data.clone()
        value = torch.tensor_split(value, granularity, dim=0)[0]
        value = [t.repeat(granularity, 1) for t in value]
        value = torch.cat(value, dim=0)
        return value

    def _get_moe_activation_scale():
        """
        Calc moe activation scale factor relative to dense activation.
        For more detail please refer to https://arxiv.org/abs/2410.07524.
        """
        if moe_router_pre_softmax:
            moe_activation_scale = (expansion_rate * granularity * granularity) / moe_router_topk
        else:
            moe_activation_scale = granularity
        return moe_activation_scale

    def _get_weight_scale():
        moe_activation_scale = _get_moe_activation_scale()
        if gated_linear_unit == True:
            scale = moe_activation_scale ** (1 / 3)
        elif activation_func_name == ActivationFuncName.squared_relu:
            scale = moe_activation_scale ** (1 / 3)
        else:
            scale = moe_activation_scale ** (1 / 2)
        return scale

    def _process_fc1_weight_param(param):
        param = param.clone()
        scale = _get_weight_scale()
        param = param * scale
        if activation_func_name == ActivationFuncName.silu and gated_linear_unit == True:
            param_1, param_2 = torch.chunk(param, 2, dim=0)
            params_1 = torch.tensor_split(param_1, granularity, dim=0)
            params_2 = torch.tensor_split(param_2, granularity, dim=0)
            params = [torch.cat([params_1[i], params_2[i]], dim=0) for i in range(granularity)]
        else:
            params = torch.tensor_split(param, granularity, dim=0)
        params = params * expansion_rate
        return params

    def _process_fc1_bias_param(param):
        # need to add test case, and re-implement this func according the test result
        params = _process_fc1_weight_param(param)
        params = [tensor.squeeze(0) for tensor in params]
        return params

    def _process_fc2_weight_param(param):
        param = param.clone()
        scale = _get_weight_scale()
        param = param * scale
        params = torch.tensor_split(param, granularity, dim=1)
        params = params * expansion_rate
        return params

    def _process_fc2_bias_param(param):
        param = param.clone()
        params = param.repeat(granularity * expansion_rate, 1)
        return params

    # Step 1. Copy values from dense state dict to moe state dict as init.
    dense_state_dict = copy.deepcopy(dense_model.state_dict())
    moe_state_dict = copy.deepcopy(moe_model.state_dict())
    for key in dense_state_dict.keys() & moe_state_dict.keys():
        moe_state_dict[key] = dense_state_dict[key]

    # Step 2. Convert key for layer norm layer
    def _convert_key_value(
        dist_dict=moe_state_dict,
        src_dict=dense_state_dict,
        key_replace_old=None,
        key_replace_new=None,
        value_process_func=lambda x: x,
    ):
        """
        Get value from src_dict according to the key, and copy value to dist_dict with new key.
        The new key is generated by formatting old key with defined pattern.
        """
        keys = _get_keys_endswith(src_dict, key_replace_old)
        for key in keys:
            value = src_dict[key]
            new_value = value_process_func(value)
            new_key = key.replace(key_replace_old, key_replace_new)
            dist_dict[new_key] = new_value.clone() if hasattr(new_value, 'clone') else new_value
        return

    _convert_key_value(
        key_replace_old='mlp.linear_fc1.layer_norm_weight',
        key_replace_new='pre_mlp_layernorm.weight',
    )
    _convert_key_value(
        key_replace_old='mlp.linear_fc1.layer_norm_bias', key_replace_new='pre_mlp_layernorm.bias'
    )

    # Step 3. Convert key and value for router layer
    _convert_key_value(
        src_dict=moe_state_dict,
        key_replace_old='mlp.router.weight',
        key_replace_new='mlp.router.weight',
        value_process_func=_process_router_param,
    )

    # Step 4. Expand linear layer
    def _expand_key_value(
        dist_dict=moe_state_dict,
        src_dict=dense_state_dict,
        key_replace_old=None,
        key_replace_new=None,
        value_process_func=lambda x: x,
        num_local_experts=num_local_experts,
    ):
        """
        Get value from src_dict according to the key,
        Copy and expand value to dist_dict with new key.
        The new key is generated by formatting old key with defined pattern.
        """
        keys = _get_keys_endswith(src_dict, key_replace_old)
        for key in keys:
            param = src_dict[key]
            params = value_process_func(param)
            for idx in range(num_local_experts):
                new_key = key.replace(key_replace_old, key_replace_new).format(idx)
                dist_dict[new_key] = params[ep_rank * num_local_experts + idx]
        return

    if experts_type == ExpertsType.SequentialMLP:
        _expand_key_value(
            key_replace_old='mlp.linear_fc1.weight',
            key_replace_new='mlp.experts.local_experts.{}.linear_fc1.weight',
            value_process_func=_process_fc1_weight_param,
        )
        _expand_key_value(
            key_replace_old='mlp.linear_fc1.bias',
            key_replace_new='mlp.experts.local_experts.{}.linear_fc1.bias',
            value_process_func=_process_fc1_bias_param,
        )
        _expand_key_value(
            key_replace_old='mlp.linear_fc2.weight',
            key_replace_new='mlp.experts.local_experts.{}.linear_fc2.weight',
            value_process_func=_process_fc2_weight_param,
        )
        _expand_key_value(
            key_replace_old='mlp.linear_fc2.bias',
            key_replace_new='mlp.experts.local_experts.{}.linear_fc2.bias',
            value_process_func=_process_fc2_bias_param,
        )
    elif experts_type == ExpertsType.TEGroupedMLP:
        _expand_key_value(
            key_replace_old='mlp.linear_fc1.weight',
            key_replace_new='mlp.experts.linear_fc1.weight{}',
            value_process_func=_process_fc1_weight_param,
        )
        _expand_key_value(
            key_replace_old='mlp.linear_fc2.weight',
            key_replace_new='mlp.experts.linear_fc2.weight{}',
            value_process_func=_process_fc2_weight_param,
        )
    else:
        raise ValueError(f"unknown moe weight format {experts_type}")

    return moe_state_dict


def upcycle_state_dict(moe_model, dense_model):
    """
    Convert a dense model's state_dict to a MoE model's state_dict.

    This function facilitates the conversion of the state_dict from a dense model to
    a MoE model, ensuring that the parameters are correctly mapped for each model.

    Args:
        moe_model (nn.Module): The MoE model, must be a model without FP16 and/or DDP wrapper.
        dense_model (nn.Module): The dense model instance.

    Returns:
        dict: A dictionary containing the converted state_dict for the MoE model.
    """

    state_dict = {}
    if len(moe_model) == 1:
        assert len(dense_model) == 1
        state_dict['model'] = _convert_to_moe_state_dict(moe_model[0], dense_model[0])
    else:
        assert len(moe_model) == len(dense_model)
        for i in range(len(moe_model)):
            state_dict['model%d' % i] = _convert_to_moe_state_dict(
                dense_model[i].state_dict(), moe_model[i]
            )
    return state_dict


def load_and_upcycle_model(
    load_dense_ckpt_func, moe_model, dense_model, strict=True, load_args=(), load_kwargs={}
):
    """
    Load a dense model checkpoint and convert it to a MoE model.

    This function loads a checkpoint for a dense model and converts it to the MoE model format,
    allowing for the integration of the dense model's parameters into the MoE architecture.
    For more detail please refer to https://arxiv.org/abs/2410.07524.

    Args:
        load_dense_ckpt_func (callable): The function to load the dense model checkpoint.
        moe_model (nn.Module): The MoE model instance.
        dense_model (nn.Module): The dense model instance.
        strict (bool): Whether to strictly load the state dictionary (default is True).
        load_args (tuple): Positional arguments to pass to the loading function.
        load_kwargs (dict): Keyword arguments to pass to the loading function.
    """

    iteration, num_floating_point_operations_so_far = load_dense_ckpt_func(
        *load_args, **load_kwargs
    )
    state_dict = upcycle_state_dict(moe_model, dense_model)

    if len(moe_model) == 1:
        moe_model[0].load_state_dict(state_dict['model'], strict=strict)
    else:
        for i in range(len(moe_model)):
            moe_model[i].load_state_dict(state_dict['model%d' % i], strict=strict)

    return iteration, num_floating_point_operations_so_far


================================================
FILE: megatron/core/transformer/multi_latent_attention.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.


import math
from dataclasses import dataclass
from typing import NoReturn, Optional, Union

import torch

try:
    from einops import rearrange

    HAVE_EINOPS = True
except ImportError:
    HAVE_EINOPS = False


from megatron.core import tensor_parallel
from megatron.core.dist_checkpointing.mapping import ShardedObject
from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.models.common.embeddings import (
    RotaryEmbedding,
    YarnRotaryEmbedding,
    _yarn_get_mscale,
    apply_rotary_pos_emb,
)
from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
    FineGrainedActivationOffloadingInterface as off_interface,
)
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.tensor_parallel.layers import ColumnParallelLinear
from megatron.core.tensor_parallel.mappings import (
    gather_from_sequence_parallel_region,
    gather_from_tensor_model_parallel_region,
    scatter_to_sequence_parallel_region,
)
from megatron.core.transformer.attention import Attention
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.spec_utils import ModuleSpec, build_module
from megatron.core.transformer.torch_norm import LayerNormBuilder
from megatron.core.transformer.transformer_config import MLATransformerConfig
from megatron.core.typed_torch import apply_module
from megatron.core.utils import (
    deprecate_inference_params,
    get_pg_size,
    is_te_min_version,
    make_tp_sharded_tensor_for_checkpoint,
)

try:
    from megatron.core.fusions.fused_mla_yarn_rope_apply import (
        fused_apply_mla_rope_for_kv,
        fused_apply_mla_rope_for_q,
    )
except:
    fused_apply_mla_rope_for_kv = None
    fused_apply_mla_rope_for_q = None


if HAVE_TE:
    from megatron.core.extensions.transformer_engine import (
        TEColumnParallelLinear,
        TELayerNormColumnParallelLinear,
        TELinear,
        set_save_original_input,
        split_te_layernorm_column_parallel_linear,
    )
    from megatron.core.post_training.modelopt.layers import Linear
else:
    (
        TEColumnParallelLinear,
        TELayerNormColumnParallelLinear,
        TELinear,
        Linear,
        set_save_original_input,
        split_te_layernorm_column_parallel_linear,
    ) = (None, None, None, None, None, None)


@dataclass
class MLASelfAttentionSubmodules:
    """Submodules for the MLA self-attention layer."""

    # TODO(nschank): Move layernorms back to the bottom once all other layers have defaults removed.
    q_layernorm: LayerNormBuilder
    kv_layernorm: LayerNormBuilder

    linear_q_proj: Union[ModuleSpec, type] = None
    linear_q_down_proj: Union[ModuleSpec, type] = None
    linear_q_up_proj: Union[ModuleSpec, type] = None
    linear_kv_down_proj: Union[ModuleSpec, type] = None
    linear_kv_up_proj: Union[ModuleSpec, type] = None
    linear_qkv_down_proj: Union[ModuleSpec, type] = None
    core_attention: Union[ModuleSpec, type] = None
    linear_proj: Union[ModuleSpec, type] = None


class MultiLatentAttention(Attention):
    """Multi-Latent Attention layer abstract class.

    This layer only contains common modules required for the "self attn" and
    "cross attn" specializations.
    """

    def __init__(
        self,
        config: MLATransformerConfig,
        submodules: MLASelfAttentionSubmodules,
        layer_number: int,
        attn_mask_type: AttnMaskType,
        attention_type: str,
        cp_comm_type: Optional[str] = None,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ) -> None:

        super().__init__(
            config=config,
            submodules=submodules,
            layer_number=layer_number,
            attention_type=attention_type,
            attn_mask_type=attn_mask_type,
            pg_collection=pg_collection,
        )
        self.config: MLATransformerConfig

        self.query_projection_size = self.config.v_head_dim * self.config.num_attention_heads

        self.q_head_dim = self.config.qk_head_dim + self.config.qk_pos_emb_head_dim

        # Overwrite the base class kv shape to support MLA inference
        self.key_hidden_size = self.q_head_dim
        self.val_hidden_size = self.config.v_head_dim

        self.recompute_up_proj = (
            self.config.recompute_granularity == 'selective'
            and "mla_up_proj" in self.config.recompute_modules
        )
        self.qkv_up_checkpoint = None

        mscale = _yarn_get_mscale(self.config.rotary_scaling_factor, self.config.mscale_all_dim)
        self.softmax_scale = mscale * mscale / math.sqrt(self.q_head_dim)
        self.cache_mla_latents = self.config.cache_mla_latents

        if self.config.rope_type == "rope":
            self.rotary_pos_emb = RotaryEmbedding(
                self.config.qk_pos_emb_head_dim,
                rotary_percent=self.config.rotary_percent,
                rotary_base=self.config.rotary_base,
                cp_group=self.pg_collection.cp,
            )
        elif self.config.rope_type == "yarn":

            self.rotary_pos_emb = YarnRotaryEmbedding(
                self.config.qk_pos_emb_head_dim,
                rotary_base=self.config.rotary_base,
                scaling_factor=self.config.rotary_scaling_factor,
                original_max_position_embeddings=self.config.original_max_position_embeddings,
                beta_fast=self.config.beta_fast,
                beta_slow=self.config.beta_slow,
                mscale=self.config.mscale,
                mscale_all_dim=self.config.mscale_all_dim,
                cp_group=self.pg_collection.cp,
            )
        else:
            raise ValueError(
                f"Unsupported RoPE type: {self.config.rope_type}, supported types are "
                "'rope' and 'yarn'"
            )

        self.core_attention = build_module(
            submodules.core_attention,
            config=self.config,
            layer_number=self.layer_number,
            attn_mask_type=self.attn_mask_type,
            attention_type=self.attention_type,
            softmax_scale=self.softmax_scale,
            k_channels=self.q_head_dim,
            v_channels=self.config.v_head_dim,
            cp_comm_type=cp_comm_type,
            pg_collection=self.pg_collection,
        )

        # Output.
        self.linear_proj = build_module(
            submodules.linear_proj,
            self.query_projection_size,
            self.config.hidden_size,
            config=self.config,
            init_method=self.config.output_layer_init_method,
            bias=self.config.add_bias_linear,
            input_is_parallel=True,
            skip_bias_add=True,
            is_expert=False,
            tp_comm_buffer_name='proj',
            tp_group=self.pg_collection.tp,
        )

        if (
            HAVE_TE
            and isinstance(self.linear_proj, TELinear)
            and (
                (
                    self.config.fp8
                    and self.config.fp8_recipe != 'delayed'
                    and is_te_min_version("2.6.0dev0")
                )
                or (self.config.fp4 and is_te_min_version("2.7.0.dev0"))
            )
        ):
            # For fp8/fp4 training, the output of the fused core_attn is saved by itself, and
            # linear_proj also saves the quantized tensor of this output. Here we set the
            # linear_proj to save the original input tensors to avoid the extra memory usage of
            # the quantized tensor.
            set_save_original_input(self.linear_proj)

    def forward(
        self,
        hidden_states,
        attention_mask,
        key_value_states=None,
        inference_context=None,
        rotary_pos_emb=None,
        rotary_pos_cos=None,
        rotary_pos_sin=None,
        rotary_pos_cos_sin=None,
        attention_bias=None,
        packed_seq_params=None,
        position_ids=None,
        sequence_len_offset=None,
        *,
        inference_params=None,
    ):
        """Forward pass for multi-latent attention"""
        assert rotary_pos_emb is None, "Rotary position embeddings should not be passed into MLA."
        assert attention_bias is None, "Attention bias should not be passed into MLA."
        assert (
            rotary_pos_cos is None and rotary_pos_sin is None
        ), "MLA does not support Flash Decoding"
        assert not rotary_pos_cos_sin, "Flash-infer rope has not been tested with MLA."
        assert not (
            self.training and self.cache_mla_latents
        ), "cache_mla_latents conflicts with training."

        # hidden_states: [sq, b, h]

        inference_context = deprecate_inference_params(inference_context, inference_params)
        if inference_context and not inference_context.is_static_batching():
            assert (
                self.config.cache_mla_latents
            ), "currently to use dynamic backend for MLA cache mla latents must be true"

        if self.config.cache_mla_latents:
            self.prepare_for_absorption()

        # =====================
        # Query, Key, and Value
        # =====================
        # Get the query, key and value tensors based on the type of attention -
        # self or cross attn.
        # query: [96, 1, 16, 128], key:[96, 1, 16, 128], value:[96, 1, 16, 128]
        with off_interface(self.offload_qkv_linear, hidden_states, "qkv_linear") as hidden_states:
            query, key, value, q_compressed, kv_compressed = self.get_query_key_value_tensors(
                hidden_states,
                key_value_states,
                position_ids,
                packed_seq_params,
                inference_context=inference_context,
            )
        if self.offload_qkv_linear:
            query = off_interface.group_commit(
                query, name="qkv_linear", forced_released_tensors=[hidden_states]
            )

        # ===================================================
        # Adjust key, value for inference
        # ===================================================
        # rotary_pos_emb = None
        query, key, value, _, attn_mask_type, block_table = self._adjust_key_value_for_inference(
            inference_context, query, key, value, rotary_pos_emb=None
        )

        # TODO: Currently, TE can only accept contiguous tensors for MLA
        query = query.contiguous()
        key = key.contiguous()

        # Value is none during decode for absorption
        if value is not None:
            value = value.contiguous()

        # ==================================
        # core attention computation
        # ==================================
        # Need corresponding TE change
        if self.checkpoint_core_attention and self.training:
            core_attn_out = self._checkpointed_attention_forward(
                query, key, value, attention_mask, packed_seq_params=packed_seq_params
            )
        else:
            if inference_context is None or inference_context.is_static_batching():
                extra_kwargs = {}
                if self.config.experimental_attention_variant == "dsa":
                    # For dsa we need to pass in the original hidden states and the compressed
                    # query representation.
                    extra_kwargs["x"] = hidden_states
                    extra_kwargs["qr"] = q_compressed
                with off_interface(
                    self.offload_core_attention and self.training, query, "core_attn"
                ) as query:
                    core_attn_out = self.core_attention(
                        query,
                        key,
                        value,
                        attention_mask,
                        packed_seq_params=packed_seq_params,
                        attn_mask_type=attn_mask_type,
                        **extra_kwargs,
                    )
            elif self.cache_mla_latents:
                # Dynamic batching attention kernel.
                q, k, v = (query, key, value)
                cu_query_lengths, max_seqlen_q = inference_context.cu_query_lengths()
                cu_kv_lengths, kv_lengths, max_seqlen_k = inference_context.cu_kv_lengths()

                core_attn_out = self.flash_decode_and_prefill(
                    q,
                    k,
                    v,
                    max_seqlen_q,
                    max_seqlen_k,
                    cu_query_lengths,
                    cu_kv_lengths,
                    kv_lengths,
                    block_table,
                )
                # Only rearrange if not in absorption mode (Flash MLA handles format correctly)
                if not inference_context.is_decode_only():
                    core_attn_out = rearrange(core_attn_out, 's b h d -> s b (h d)')
            if self.offload_core_attention and self.training:
                core_attn_out = off_interface.group_commit(
                    core_attn_out, name="core_attn", forced_released_tensors=[query, key, value]
                )

        # We are doing absorption with cache mla latents and decode mode.
        if self.cache_mla_latents and inference_context.is_decode_only():
            # core_attn_out = self.self.up_v_layer(core_attn_out)
            core_attn_out = torch.einsum("sbhc,hdc->sbhd", core_attn_out, self.up_v_weight)
            core_attn_out = core_attn_out.contiguous()

            # Flatten back: [seq, batch, num_heads * v_head_dim]
            core_attn_out = core_attn_out.view(core_attn_out.size(0), core_attn_out.size(1), -1)

        if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
            # reshape to same output shape as unpacked case
            # (t, np, hn) -> (t, b=1, h=np*hn)
            # t is the pack size = sum (sq_i)
            # note that batch is a dummy dimension in the packed case
            core_attn_out = core_attn_out.reshape(core_attn_out.size(0), 1, -1)

        if self.recompute_up_proj:
            assert self.qkv_up_checkpoint is not None
            self.qkv_up_checkpoint.discard_output_and_register_recompute(core_attn_out)
            self.qkv_up_checkpoint = None

        # =================
        # Output. [sq, b, h]
        # =================
        with off_interface(self.offload_attn_proj, core_attn_out, "attn_proj") as core_attn_out:
            output, bias = self.linear_proj(core_attn_out)
        if self.offload_attn_proj:
            output = off_interface.group_commit(
                output, name="attn_proj", forced_released_tensors=[core_attn_out]
            )

        return output, bias


class MLASelfAttention(MultiLatentAttention):
    """MLA Self-attention layer class

    Self-attention layer takes input with size [s, b, h]
    and returns output of the same size.
    """

    def __init__(
        self,
        config: MLATransformerConfig,
        submodules: MLASelfAttentionSubmodules,
        layer_number: int,
        attn_mask_type=AttnMaskType.padding,
        cp_comm_type: Optional[str] = None,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ):
        if pg_collection is None:
            pg_collection = ProcessGroupCollection.use_mpu_process_groups()

        super().__init__(
            config=config,
            submodules=submodules,
            layer_number=layer_number,
            attn_mask_type=attn_mask_type,
            attention_type="self",
            cp_comm_type=cp_comm_type,
            pg_collection=pg_collection,
        )

        if self.config.q_lora_rank is None:
            # Not projecting query
            self.linear_q_proj = build_module(
                submodules.linear_q_proj,
                self.config.hidden_size,
                self.config.num_attention_heads * self.q_head_dim,
                config=self.config,
                init_method=self.config.init_method,
                gather_output=False,
                bias=False,
                skip_bias_add=False,
                is_expert=False,
                tp_comm_buffer_name='q_proj',
            )

        else:
            q_down_proj_kwargs = {}
            if submodules.linear_q_down_proj in [TELinear]:
                q_down_proj_kwargs['parallel_mode'] = 'duplicated'
            elif submodules.linear_q_down_proj in [
                Linear,
                TEColumnParallelLinear,
                ColumnParallelLinear,
            ]:
                q_down_proj_kwargs['gather_output'] = False
            else:
                raise ValueError(f"Unsupported linear_q_down_proj: {submodules.linear_q_down_proj}")

            self.linear_q_down_proj = build_module(
                submodules.linear_q_down_proj,
                self.config.hidden_size,
                self.config.q_lora_rank,
                config=self.config,
                init_method=self.config.init_method,
                bias=False,
                skip_bias_add=False,
                is_expert=False,
                tp_comm_buffer_name='q_down_proj',
                skip_weight_param_allocation=False,
                tp_group=(
                    pg_collection.tp
                    if q_down_proj_kwargs.get('parallel_mode') != 'duplicated'
                    else None
                ),
                **q_down_proj_kwargs,
            )

            self.linear_q_up_proj = build_module(
                submodules.linear_q_up_proj,
                self.config.q_lora_rank,
                self.config.num_attention_heads * self.q_head_dim,
                config=self.config,
                init_method=self.config.init_method,
                gather_output=False,
                bias=False,
                skip_bias_add=False,
                is_expert=False,
                tp_comm_buffer_name='q_up_proj',
                tp_group=pg_collection.tp,
            )

        kv_down_proj_kwargs = {}
        if submodules.linear_kv_down_proj in [TELinear]:
            kv_down_proj_kwargs['parallel_mode'] = 'duplicated'
        elif submodules.linear_kv_down_proj in [
            Linear,
            TEColumnParallelLinear,
            ColumnParallelLinear,
        ]:
            kv_down_proj_kwargs['gather_output'] = False
        else:
            raise ValueError(f"Unsupported linear_kv_down_proj: {submodules.linear_kv_down_proj}")

        self.linear_kv_down_proj = build_module(
            submodules.linear_kv_down_proj,
            self.config.hidden_size,
            self.config.kv_lora_rank + self.config.qk_pos_emb_head_dim,
            config=self.config,
            init_method=self.config.init_method,
            bias=False,
            skip_bias_add=False,
            is_expert=False,
            tp_comm_buffer_name='kv_down_proj',
            skip_weight_param_allocation=False,
            tp_group=(
                pg_collection.tp
                if kv_down_proj_kwargs.get('parallel_mode') != 'duplicated'
                else None
            ),
            **kv_down_proj_kwargs,
        )

        self.linear_kv_up_proj = build_module(
            submodules.linear_kv_up_proj,
            self.config.kv_lora_rank,
            self.config.num_attention_heads * (self.config.qk_head_dim + self.config.v_head_dim),
            config=self.config,
            init_method=self.config.init_method,
            gather_output=False,
            bias=False,
            skip_bias_add=False,
            is_expert=False,
            tp_comm_buffer_name='kv_up_proj',
            tp_group=pg_collection.tp,
        )

        if self.config.q_lora_rank is not None:
            self.q_layernorm = submodules.q_layernorm(
                hidden_size=self.config.q_lora_rank,
                config=self.config,
                eps=self.config.layernorm_epsilon,
            )

        self.kv_layernorm = submodules.kv_layernorm(
            hidden_size=self.config.kv_lora_rank,
            config=self.config,
            eps=self.config.layernorm_epsilon,
        )

    def _qkv_down_projection(self, hidden_states):
        """Unfused q/kv down projection path."""
        if self.config.q_lora_rank is not None:
            # if linear_q_down_proj is ColumnParallelLinear:
            #     q_compressed: [s, b, q_lora_rank / TP]
            # elif linear_q_down_proj is Linear:
            #     q_compressed: [s / TP, b, q_lora_rank]
            q_compressed, _ = self.linear_q_down_proj(hidden_states)

            # When output is sharded (ColumnParallelLinear), two things are needed to be
            # identical to a normal Linear.
            #   1. Manually gather output to restore output dim q_lora_rank;
            #   2. Scatter sequence back to s / TP if sequence-parallel since it was
            #      gathered by ColumnParallelLinear.
            if q_compressed.size(-1) != self.config.q_lora_rank:
                q_compressed = gather_from_tensor_model_parallel_region(q_compressed)
                if self.config.sequence_parallel:
                    q_compressed = scatter_to_sequence_parallel_region(q_compressed)
        else:
            q_compressed = hidden_states

        # if linear_kv_down_proj is ColumnParallelLinear:
        #     kv_combined: [s, b, (kv_lora_rank + qk_pos_emb_head_dim) / TP]
        # elif linear_kv_down_proj is Linear:
        #     kv_combined: [s / TP, b, (kv_lora_rank + qk_pos_emb_head_dim)]
        kv_combined, _ = self.linear_kv_down_proj(hidden_states)
        return q_compressed, kv_combined

    def get_query_key_value_tensors(
        self,
        hidden_states,
        key_value_states=None,
        position_ids=None,
        packed_seq_params=None,
        inference_context=None,
        *,
        inference_params=None,
    ):
        """
        Derives `query`, `key` and `value` tensors from `hidden_states`.
        """
        # s = sequence length, b = batch size, h = hidden size, n = num attention heads
        # Attention heads [s, b, n*h]
        assert (
            hidden_states.ndim == 3
        ), f"hidden_states should be 3D, [s, b, n*h], got {hidden_states.ndim}D"
        if packed_seq_params is not None:
            assert (
                packed_seq_params.local_cp_size is None
            ), "hybrid_context_parallel is not supported with MLA yet and is planned for future. \
            Please disable hybrid_context_parallel."

        inference_context = deprecate_inference_params(inference_context, inference_params)

        # =========================================
        # Prepare RoPE and seqlen related params
        # =========================================
        rotary_seq_len = self.rotary_pos_emb.get_rotary_seq_len(
            inference_context, None, hidden_states, self.config, packed_seq_params
        )

        # rotary_pos_emb:[s, b, 1, 64]
        mscale = 1.0
        rotary_pos_cos = None
        rotary_pos_sin = None
        packed_seq = packed_seq_params is not None and packed_seq_params.qkv_format == 'thd'
        if self.config.rope_type == "rope":
            rotary_pos_emb = self.rotary_pos_emb(rotary_seq_len, packed_seq=packed_seq)
        else:
            if self.config.apply_rope_fusion:
                rotary_pos_cos, rotary_pos_sin = self.rotary_pos_emb.get_cached_cos_sin(
                    rotary_seq_len, dtype=hidden_states.dtype, packed_seq=packed_seq
                )
                rotary_pos_emb = None
                assert inference_context is None, "Inference with MLA RoPE fusion is not supported"
                assert (
                    fused_apply_mla_rope_for_q is not None
                    and fused_apply_mla_rope_for_kv is not None
                ), "Fused MLA RoPE apply is not imported successfully"
            else:
                rotary_pos_emb, mscale = self.rotary_pos_emb(rotary_seq_len, packed_seq=packed_seq)

        if packed_seq_params is not None and packed_seq_params.qkv_format == 'thd':
            if packed_seq_params.cu_seqlens_q_padded is not None:
                cu_seqlens_q = packed_seq_params.cu_seqlens_q_padded
            else:
                cu_seqlens_q = packed_seq_params.cu_seqlens_q
            if packed_seq_params.cu_seqlens_kv_padded is not None:
                cu_seqlens_kv = packed_seq_params.cu_seqlens_kv_padded
            else:
                cu_seqlens_kv = packed_seq_params.cu_seqlens_kv
        else:
            cu_seqlens_q = cu_seqlens_kv = None

        # =========================================
        # QKV down projection and layernorm
        # =========================================
        q_compressed, kv_combined = self._qkv_down_projection(hidden_states)
        if kv_combined.size(-1) != self.config.kv_lora_rank + self.config.qk_pos_emb_head_dim:
            # kv_combined: [s, b, (kv_lora_rank + qk_pos_emb_head_dim)]
            kv_combined = gather_from_tensor_model_parallel_region(kv_combined)
            # kv_compressed:[s, b, kv_lora_rank], k_pos_emb: [s, b, qk_pos_emb_head_dim]
            kv_compressed, k_pos_emb = torch.split(
                kv_combined, [self.config.kv_lora_rank, self.config.qk_pos_emb_head_dim], dim=-1
            )
            if self.config.sequence_parallel:
                # kv_compressed:[s / TP, b, kv_lora_rank]
                kv_compressed = scatter_to_sequence_parallel_region(kv_compressed)
        else:
            # kv_compressed:[s / TP, b, kv_lora_rank], k_pos_emb: [s / TP, b, qk_pos_emb_head_dim]
            kv_compressed, k_pos_emb = torch.split(
                kv_combined, [self.config.kv_lora_rank, self.config.qk_pos_emb_head_dim], dim=-1
            )
            if get_pg_size(self.tp_group) > 1 and self.config.sequence_parallel:
                # k_pos_emb: [s, b, qk_pos_emb_head_dim]
                k_pos_emb = gather_from_sequence_parallel_region(k_pos_emb, group=self.tp_group)

        if packed_seq_params is not None:
            # If sequence packing, TE expect [t, h, d] shaped qkv input.
            # In Megatron-Core, the qkv shape is [t, 1, h, d].
            # So we need to reshape qkv from [t, 1, h, d] to [t, h, d].
            q_compressed = q_compressed.squeeze(1)
            kv_compressed = kv_compressed.squeeze(1)
            k_pos_emb = k_pos_emb.squeeze(1)

        # =========================================
        # Apply norm
        # =========================================

        if self.config.q_lora_rank is not None:
            # q_compressed: [num_tokens, q_lora_rank]
            q_compressed = apply_module(self.q_layernorm)(q_compressed)

        kv_compressed = apply_module(self.kv_layernorm)(kv_compressed)

        # =========================================
        # QKV up projection and RoPE apply
        # =========================================

        def qkv_up_proj_and_rope_apply_for_cached_latent_kv(
            q_compressed, kv_compressed, k_pos_emb, rotary_pos_emb
        ):
            if self.config.q_lora_rank is not None:
                # q_compressed: [num_tokens, q_lora_rank]
                # q: [num_tokens, n * (qk_head_dim + qk_pos_emb_head_dim)]
                q, _ = self.linear_q_up_proj(q_compressed)
            else:
                # q_compressed: [num_tokens, hidden_size]
                # q: [num_tokens, n * (qk_head_dim + qk_pos_emb_head_dim)]
                q, _ = self.linear_q_proj(q_compressed)

            # q: [num_tokens, n, q_head_dim]
            q = q.view(*q.size()[:-1], self.num_attention_heads_per_partition, self.q_head_dim)

            # [num_tokens, qk_pos_emb_head_dim] -> [num_tokens, 1, qk_pos_emb_head_dim]
            k_pos_emb = torch.unsqueeze(k_pos_emb, -2)

            q_no_pe, q_pos_emb = torch.split(
                q, [self.config.qk_head_dim, self.config.qk_pos_emb_head_dim], dim=-1
            )

            # Dynamic batching: use inference context methods
            q_pos_emb = inference_context.apply_rotary_emb_query(
                q_pos_emb,
                rotary_pos_emb,
                config=self.config,
                cu_seqlens_q=cu_seqlens_q,
                cp_group=self.pg_collection.cp,
                mscale=mscale,
            )
            # k_pos_emb:[num_tokens, 1, qk_pos_emb_head_dim]
            k_pos_emb = inference_context.apply_rotary_emb_key(
                k_pos_emb,
                rotary_pos_emb,
                config=self.config,
                cp_group=self.pg_collection.cp,
                mscale=mscale,
            )

            # Create KV cache entry. It will the be the key vector in cache mla latents path
            k_pos_emb_squeezed = k_pos_emb.squeeze(1)
            kv_cached = torch.cat([kv_compressed, k_pos_emb_squeezed], dim=-1)

            # Flag for whether to use absorption. We only use absorption
            # when caching the latents and in decode-only mode
            use_absorption = (
                self.config.cache_mla_latents
                and inference_context
                and inference_context.is_decode_only()
            )
            # Compute query components. Multiply by up k if absorbing
            q_content = (
                torch.einsum("sbhd,hdk->sbhk", q_no_pe, self.up_k_weight)
                if use_absorption
                else q_no_pe
            )
            # Query: content + original positional (latent_dim + pos_dim)
            query = torch.cat([q_content, q_pos_emb], dim=-1)

            key = kv_cached
            value = None

            query = query.contiguous()
            key = key.contiguous()

            return query, key, value

        def qkv_up_proj_and_rope_apply(q_compressed, kv_compressed, k_pos_emb, rotary_pos_emb):
            """
            Apply the up projection and RoPE to the query and key.
            When sequence packing enabled, the input tensors adopt a packed shape of [t, ...];
            otherwise, they maintain the unpacked shape [s, b, ...]. In subsequent code comments,
            we uniformly use [num_tokens, ...] to denote [s, b, ...] or [t, ...] for two cases.
            """
            if self.config.q_lora_rank is not None:
                # q_compressed: [num_tokens, q_lora_rank]
                # q: [num_tokens, n * (qk_head_dim + qk_pos_emb_head_dim)]
                q, _ = self.linear_q_up_proj(q_compressed)
            else:
                # q_compressed: [num_tokens, hidden_size]
                # q: [num_tokens, n * (qk_head_dim + qk_pos_emb_head_dim)]
                q, _ = self.linear_q_proj(q_compressed)

            # q: [num_tokens, n, q_head_dim]
            q = q.view(*q.size()[:-1], self.num_attention_heads_per_partition, self.q_head_dim)

            # kv: [num_tokens, n * (qk_head_dim + v_head_dim)]
            kv, _ = self.linear_kv_up_proj(kv_compressed)

            # kv: [num_tokens, n, (qk_head_dim + v_head_dim)]
            kv = kv.view(
                *kv.size()[:-1],
                self.num_attention_heads_per_partition,
                self.config.qk_head_dim + self.config.v_head_dim,
            )

            # [num_tokens, qk_pos_emb_head_dim] -> [num_tokens, 1, qk_pos_emb_head_dim]
            k_pos_emb = torch.unsqueeze(k_pos_emb, -2)

            # todo add assert about fusions and caching
            if self.config.apply_rope_fusion:
                cp_rank = self.pg_collection.cp.rank()
                cp_size = self.pg_collection.cp.size()
                query = fused_apply_mla_rope_for_q(
                    q,
                    rotary_pos_cos,
                    rotary_pos_sin,
                    self.config.qk_head_dim,
                    self.config.qk_pos_emb_head_dim,
                    cu_seqlens_q,
                    cp_rank,
                    cp_size,
                )
                key, value = fused_apply_mla_rope_for_kv(
                    kv,
                    k_pos_emb,
                    rotary_pos_cos,
                    rotary_pos_sin,
                    self.config.qk_pos_emb_head_dim,
                    self.config.qk_head_dim,
                    self.config.v_head_dim,
                    cu_seqlens_kv,
                    cp_rank,
                    cp_size,
                )
            else:
                q_len = q.size()[0]
                if inference_context is not None:
                    # add offset to the sequence start for inference
                    sequence_start = inference_context.sequence_len_offset
                    sequence_end = sequence_start + q_len
                    rotary_pos_emb = rotary_pos_emb[sequence_start:sequence_end]
                elif packed_seq_params is None or self.config.context_parallel_size == 1:
                    # Shorten rotary_pos_emb to the sequence length when inference_params
                    # is not provided. This makes sure we can run forward directly with
                    # any sequence length. During training, the sequence length is always
                    # the full rotary_pos_emb length, except for sequence packing + CP.
                    # When sequence packing and context parallel are both enabled, the
                    # position embedding will not split rotary_pos_emb, so it may exceed
                    # the sequence length on this CP rank, but we need the full rotary_pos_emb
                    # to cover the full sequence, so we do not shorten it here.
                    rotary_pos_emb = rotary_pos_emb[0:q_len]

                # q_no_pe: [num_tokens, n, qk_head_dim]
                # q_pos_emb: [num_tokens, n, qk_pos_emb_head_dim]
                q_no_pe, q_pos_emb = torch.split(
                    q, [self.config.qk_head_dim, self.config.qk_pos_emb_head_dim], dim=-1
                )

                # k_no_pe: [num_tokens, n, qk_head_dim]
                # value: [num_tokens, n, v_head_dim]
                k_no_pe, value = torch.split(
                    kv, [self.config.qk_head_dim, self.config.v_head_dim], dim=-1
                )

                # q_pos_emb: [num_tokens, n, qk_pos_emb_head_dim]
                q_pos_emb = apply_rotary_pos_emb(
                    q_pos_emb,
                    rotary_pos_emb,
                    config=self.config,
                    cu_seqlens=cu_seqlens_q,
                    mscale=mscale,
                    cp_group=self.pg_collection.cp,
                )
                # k_pos_emb:[num_tokens, 1, qk_pos_emb_head_dim]
                k_pos_emb = apply_rotary_pos_emb(
                    k_pos_emb,
                    rotary_pos_emb,
                    config=self.config,
                    cu_seqlens=cu_seqlens_kv,
                    mscale=mscale,
                    cp_group=self.pg_collection.cp,
                )

                # query: [num_tokens, n, (qk_head_dim + v_head_dim)]
                query = torch.cat([q_no_pe, q_pos_emb], dim=-1)

                # key: [num_tokens, n, (qk_head_dim + v_head_dim)]
                if k_pos_emb.ndim == 4:
                    k_pos_emb = k_pos_emb.expand(-1, -1, self.num_attention_heads_per_partition, -1)
                else:
                    assert k_pos_emb.ndim == 3
                    k_pos_emb = k_pos_emb.expand(-1, self.num_attention_heads_per_partition, -1)
                key = torch.cat([k_no_pe, k_pos_emb], dim=-1)

            query = query.contiguous()
            key = key.contiguous()
            value = value.contiguous()

            return query, key, value

        if self.recompute_up_proj:
            quantization = self.config.fp8 or self.config.fp4
            self.qkv_up_checkpoint = tensor_parallel.CheckpointWithoutOutput(fp8=quantization)
            query, key, value = self.qkv_up_checkpoint.checkpoint(
                qkv_up_proj_and_rope_apply, q_compressed, kv_compressed, k_pos_emb, rotary_pos_emb
            )
        else:
            if self.cache_mla_latents:
                assert (
                    inference_context and not inference_context.is_static_batching()
                ), "Caching MLA latents only works with dynamic backend inference"
                query, key, value = qkv_up_proj_and_rope_apply_for_cached_latent_kv(
                    q_compressed, kv_compressed, k_pos_emb, rotary_pos_emb
                )
            else:
                query, key, value = qkv_up_proj_and_rope_apply(
                    q_compressed, kv_compressed, k_pos_emb, rotary_pos_emb
                )

        return query, key, value, q_compressed, kv_compressed

    def uncompress_kv_from_cache(self, kv_cached):
        """
        Take a compressed kv and uncompress them
        """
        kv_compressed, k_pos_emb = torch.split(
            kv_cached, [self.config.kv_lora_rank, self.config.qk_pos_emb_head_dim], dim=-1
        )

        # Seperated out the norm and linear
        kv, _ = self.linear_kv_up_proj_linear(kv_compressed)

        kv = kv.view(
            *kv.size()[:-1],
            self.num_attention_heads_per_partition,
            self.config.qk_head_dim + self.config.v_head_dim,
        )

        k_no_pe, value = torch.split(kv, [self.config.qk_head_dim, self.config.v_head_dim], dim=-1)

        # Add head dimension
        k_pos_emb = k_pos_emb.unsqueeze(-2)
        k_pos_emb = k_pos_emb.expand(-1, -1, self.num_attention_heads_per_partition, -1)

        key = torch.cat([k_no_pe, k_pos_emb], dim=-1)
        return key, value

    def prepare_for_absorption(self):
        """Prepare the model for absorption optimization in MLA (Multi-Latent Attention).

        This method sets up the necessary components for the absorption technique, which
        optimizes memory during inference by caching compressed KV latents instead
        of full KV states. The absorption technique allows efficient decode-only operations
        by pre-computing certain matrix multiplications.

        Note (Peter): Right now we are not doing true absorption. We will add this support
        at a later time.

        The method performs the following operations:
        1. Splits the fused layernorm + linear layer (linear_kv_up_proj) into separate
        components.
        2. Extracts and stores the up-projection weights for K and V separately, which
        are used during the absorption process
        3. Replaces the identity kv_layernorm with the actual layernorm from the split
        4. Stores the linear component separately for uncompressing KV cache during
        prefill/mixed stages

        This is a one-time setup that should only be called once at initialization when
        cache_mla_latents is enabled.
        """
        # We should only have to call to set once at start
        if not hasattr(self, "up_k_weight"):
            with torch.no_grad():
                linear_kv_up_proj_norm, linear_kv_up_proj_linear = (
                    split_te_layernorm_column_parallel_linear(
                        self.linear_kv_up_proj, self.config, None, self.linear_kv_up_proj.tp_group
                    )
                )

                # Note: When caching latents we overide the kv_layernorm
                # which was an identity before because in the is path
                # we unfused the linear_kv_up_proj
                self.kv_layernorm = linear_kv_up_proj_norm

                # This is used in absorption when we are
                # uncompressing the KV cache in prefill/mixed stages
                self.linear_kv_up_proj_linear = linear_kv_up_proj_linear

                kv_up_weight = (
                    self.linear_kv_up_proj.weight
                )  # [num_heads * (qk_head_dim + v_head_dim), kv_lora_rank]
                kv_up_weight = kv_up_weight.view(
                    self.num_attention_heads_per_partition,
                    self.config.qk_head_dim + self.config.v_head_dim,
                    self.config.kv_lora_rank,
                )
                # Split into K and V up-projection weights. These are used for absorption
                self.up_k_weight = kv_up_weight[
                    :, : self.config.qk_head_dim, :
                ]  # [num_heads, qk_head_dim, kv_lora_rank]
                self.up_v_weight = kv_up_weight[
                    :, self.config.qk_head_dim :, :
                ]  # [num_heads, v_head_dim, kv_lora_rank]

                # We delete the original linear_kv_up_proj as we do not
                # need it for the absorbed path.
                del self.linear_kv_up_proj

    def backward_dw(self) -> NoReturn:
        """Execute weight gradient computation"""
        self._backward_kv_proj()
        self._backward_q_proj()
        self._backward_output_proj()

    def _backward_kv_proj(self):
        """Computes weight gradients of KV projection layers"""
        self.linear_kv_up_proj.backward_dw()
        self.linear_kv_down_proj.backward_dw()

    def _backward_q_proj(self):
        """Computes weight gradients of Q projection layers"""
        if self.config.q_lora_rank is None:
            self.linear_q_proj.backward_dw()
        else:
            self.linear_q_down_proj.backward_dw()
            self.linear_q_up_proj.backward_dw()

    def _backward_output_proj(self):
        """Computes weight gradients of output projection layer"""
        self.linear_proj.backward_dw()

    def set_for_recompute_input_layernorm(self):
        """Set the attention layer for recompute input_layernorm. Only needed for fp8/fp4."""
        if self.config.q_lora_rank is not None:
            set_save_original_input(self.linear_q_down_proj)
        set_save_original_input(self.linear_kv_down_proj)

    def clip_qk(self):
        """
        QK Clipping is a technique to clip the query and key attention logits to prevent the
        attention logits from exploding. Per MuonClip usage, we update the weight by calling this
        function after Muon optimizer step.
        """

        if not self.config.qk_clip:
            raise ValueError("qk_clip option needs to be enabled")

        if self.core_attention.current_max_attn_logits is None:
            raise ValueError("current_max_attn_logits is None")

        # Check if we're in absorption mode
        if self.cache_mla_latents and not hasattr(self, 'linear_kv_up_proj'):
            raise ValueError(
                "qk_clip is not supported when cache_mla_latents is enabled and absorption is "
                "active. The linear_kv_up_proj layer has been deleted during absorption "
                "preparation."
            )

        assert self.core_attention.current_max_attn_logits.shape == (
            self.num_attention_heads_per_partition,
        ), f"current_max_attn_logits shape is not ({self.num_attention_heads_per_partition}, ) \
                    but {self.core_attention.current_max_attn_logits.shape}"

        # only update the weight if any head has
        # current_max_attn_logits > qk_clip_threshold
        if torch.any(self.core_attention.current_max_attn_logits > self.config.qk_clip_threshold):
            # Use num_attention_heads_per_partition for tensor parallel scenarios

            # qk_clip_balancing_eta (n, 1, 1)
            assert self.core_attention.current_max_attn_logits.shape == (
                self.num_attention_heads_per_partition,
            ), f"current_max_attn_logits shape is not ({self.num_attention_heads_per_partition},) \
                but {self.core_attention.current_max_attn_logits.shape}"
            self.qk_clip_balancing_eta = torch.clamp(
                self.config.qk_clip_threshold / self.core_attention.current_max_attn_logits, max=1.0
            ).view(self.num_attention_heads_per_partition, 1, 1)
            assert torch.all(self.qk_clip_balancing_eta <= 1.0)

            # Update q side weight, keep qk_pos_emb_head_dim side weight unchanged
            if self.config.q_lora_rank is None:
                q_proj_weight = self.linear_q_proj.weight
            else:
                q_proj_weight = self.linear_q_up_proj.weight

            # Handle different weight access patterns (main_param vs direct access)
            if hasattr(q_proj_weight, 'main_param'):
                q_proj_weight.main_param.data.copy_(
                    self._clip_q_proj_weight(q_proj_weight.main_param.data)
                )
            q_proj_weight.data.copy_(self._clip_q_proj_weight(q_proj_weight.data))

            # Update k side weight, keep v side weight unchanged
            kv_proj_weight = self.linear_kv_up_proj.weight

            # Handle different weight access patterns
            if hasattr(kv_proj_weight, 'main_param'):
                kv_proj_weight.main_param.data.copy_(
                    self._clip_kv_proj_weight(kv_proj_weight.main_param.data)
                )
            kv_proj_weight.data.copy_(self._clip_kv_proj_weight(kv_proj_weight.data))

        # reset current_max_attn_logits
        self.core_attention.current_max_attn_logits = None

    def _clip_q_proj_weight(self, weight):
        """Clip q_proj_weight"""
        # Reshape to (n, a + b, -1)
        weight_reshaped = weight.view(
            self.num_attention_heads_per_partition,
            self.config.qk_head_dim + self.config.qk_pos_emb_head_dim,
            -1,
        )

        # Split into qk_head_dim and qk_pos_emb_head_dim parts: (n, a, -1) and (n, b, -1)
        weight_q_nope = weight_reshaped[:, : self.config.qk_head_dim, :]
        weight_q_pe = weight_reshaped[:, self.config.qk_head_dim :, :]

        # Clipping
        weight_q_nope.mul_(torch.pow(self.qk_clip_balancing_eta, self.config.qk_clip_alpha))
        weight_q_pe.mul_(self.qk_clip_balancing_eta)

        # Concatenate back and reshape to original shape
        weight_q_updated = torch.cat([weight_q_nope, weight_q_pe], dim=1)
        weight_q_updated = weight_q_updated.view(
            self.num_attention_heads_per_partition
            * (self.config.qk_head_dim + self.config.qk_pos_emb_head_dim),
            -1,
        )

        return weight_q_updated

    def _clip_kv_proj_weight(self, weight):
        """Clip kv_proj_weight"""
        # shape: (n, qk_head_dim + v_head_dim, kv_lora_rank)
        weight_reshaped = weight.view(
            self.num_attention_heads_per_partition,
            self.config.qk_head_dim + self.config.v_head_dim,
            -1,
        )

        # Split into qk_head_dim and v_head_dim parts: (n, a, -1) and (n, b, -1)
        weight_k = weight_reshaped[:, : self.config.qk_head_dim, :]
        weight_v = weight_reshaped[:, self.config.qk_head_dim :, :]

        # Clipping
        weight_k.mul_(torch.pow(self.qk_clip_balancing_eta, 1 - self.config.qk_clip_alpha))

        # Concatenate back and reshape to original shape
        weight_kv_updated = torch.cat([weight_k, weight_v], dim=1)
        weight_kv_updated = weight_kv_updated.view(
            self.num_attention_heads_per_partition
            * (self.config.qk_head_dim + self.config.v_head_dim),
            -1,
        )

        return weight_kv_updated


class FusedMLASelfAttention(MLASelfAttention):
    """MLA self-attention with fused q/kv down projection."""

    def __init__(
        self,
        config: MLATransformerConfig,
        submodules: MLASelfAttentionSubmodules,
        layer_number: int,
        attn_mask_type=AttnMaskType.padding,
        cp_comm_type: Optional[str] = None,
        pg_collection: Optional[ProcessGroupCollection] = None,
    ):
        if pg_collection is None:
            pg_collection = ProcessGroupCollection.use_mpu_process_groups()

        MultiLatentAttention.__init__(
            self,
            config=config,
            submodules=submodules,
            layer_number=layer_number,
            attn_mask_type=attn_mask_type,
            attention_type="self",
            cp_comm_type=cp_comm_type,
            pg_collection=pg_collection,
        )

        assert self.config.q_lora_rank is not None, (
            "FusedMLASelfAttention requires q_lora_rank to be set; "
            "fallback to MLASelfAttention for q_lora_rank=None."
        )

        qkv_down_proj_kwargs = {}
        if submodules.linear_qkv_down_proj in [TELinear]:
            qkv_down_proj_kwargs['parallel_mode'] = 'duplicated'
        elif submodules.linear_qkv_down_proj in [
            Linear,
            TEColumnParallelLinear,
            ColumnParallelLinear,
            TELayerNormColumnParallelLinear,
        ]:
            qkv_down_proj_kwargs['gather_output'] = False
        else:
            raise ValueError(f"Unsupported linear_qkv_down_proj: {submodules.linear_qkv_down_proj}")

        self.linear_qkv_down_proj = build_module(
            submodules.linear_qkv_down_proj,
            self.config.hidden_size,
            self.config.q_lora_rank + self.config.kv_lora_rank + self.config.qk_pos_emb_head_dim,
            config=self.config,
            init_method=self.config.init_method,
            bias=False,
            skip_bias_add=False,
            is_expert=False,
            tp_comm_buffer_name='qkv_down_proj',
            skip_weight_param_allocation=False,
            tp_group=(
                pg_collection.tp
                if qkv_down_proj_kwargs.get('parallel_mode') != 'duplicated'
                else None
            ),
            **qkv_down_proj_kwargs,
        )

        self.linear_q_up_proj = build_module(
            submodules.linear_q_up_proj,
            self.config.q_lora_rank,
            self.config.num_attention_heads * self.q_head_dim,
            config=self.config,
            init_method=self.config.init_method,
            gather_output=False,
            bias=False,
            skip_bias_add=False,
            is_expert=False,
            tp_comm_buffer_name='q_up_proj',
            tp_group=pg_collection.tp,
        )

        self.linear_kv_up_proj = build_module(
            submodules.linear_kv_up_proj,
            self.config.kv_lora_rank,
            self.config.num_attention_heads * (self.config.qk_head_dim + self.config.v_head_dim),
            config=self.config,
            init_method=self.config.init_method,
            gather_output=False,
            bias=False,
            skip_bias_add=False,
            is_expert=False,
            tp_comm_buffer_name='kv_up_proj',
            tp_group=pg_collection.tp,
        )

        self.q_layernorm = submodules.q_layernorm(
            hidden_size=self.config.q_lora_rank,
            config=self.config,
            eps=self.config.layernorm_epsilon,
        )
        self.kv_layernorm = submodules.kv_layernorm(
            hidden_size=self.config.kv_lora_rank,
            config=self.config,
            eps=self.config.layernorm_epsilon,
        )

    def _qkv_down_projection(self, hidden_states):
        """Fused q/kv down projection path."""
        qkv, _ = self.linear_qkv_down_proj(hidden_states)
        q_compressed, kv_combined = torch.split(
            qkv,
            [self.config.q_lora_rank, self.config.kv_lora_rank + self.config.qk_pos_emb_head_dim],
            dim=-1,
        )
        return q_compressed, kv_combined

    def sharded_state_dict(self, prefix: str = "", sharded_offsets: tuple = (), metadata=None):
        """Return a sharded state dict compatible with pre-fusion checkpoints."""
        sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)

        def _clone_sharded_object_with_key(obj: ShardedObject, new_key: str) -> ShardedObject:
            return ShardedObject(
                key=new_key,
                data=obj.data,
                global_shape=obj.global_shape,
                global_offset=obj.global_offset,
                replica_id=obj.replica_id,
            )

        fused_prefix = f"{prefix}linear_qkv_down_proj."

        fused_extra_keys = [
            k
            for k in sharded_state_dict.keys()
            if k.startswith(fused_prefix) and "_extra_state" in k
        ]
        for fused_extra_key in fused_extra_keys:
            suffix = fused_extra_key[len(fused_prefix) :]
            q_extra_key = f"{prefix}linear_q_down_proj.{suffix}"
            kv_extra_key = f"{prefix}linear_kv_down_proj.{suffix}"
            fused_obj = sharded_state_dict.get(fused_extra_key)
            if isinstance(fused_obj, ShardedObject):
                sharded_state_dict[q_extra_key] = _clone_sharded_object_with_key(
                    fused_obj, q_extra_key
                )
                sharded_state_dict[kv_extra_key] = _clone_sharded_object_with_key(
                    fused_obj, kv_extra_key
                )
            elif fused_obj is not None:
                sharded_state_dict[q_extra_key] = fused_obj
                sharded_state_dict[kv_extra_key] = fused_obj

        for key in list(sharded_state_dict.keys()):
            if key.startswith(fused_prefix):
                del sharded_state_dict[key]

        fused_weight = self.linear_qkv_down_proj.weight
        total_out = (
            self.config.q_lora_rank + self.config.kv_lora_rank + self.config.qk_pos_emb_head_dim
        )
        tp_size = get_pg_size(self.tp_group)

        if fused_weight.size(0) == total_out:
            q_split = self.config.q_lora_rank
            kv_split = self.config.kv_lora_rank + self.config.qk_pos_emb_head_dim
        else:
            assert (
                self.config.q_lora_rank % tp_size == 0
            ), "q_lora_rank must be divisible by tensor-parallel size"
            assert (
                self.config.kv_lora_rank + self.config.qk_pos_emb_head_dim
            ) % tp_size == 0, (
                "kv_lora_rank + qk_pos_emb_head_dim must be divisible by tensor-parallel size"
            )
            q_split = self.config.q_lora_rank // tp_size
            kv_split = (self.config.kv_lora_rank + self.config.qk_pos_emb_head_dim) // tp_size

        if q_split + kv_split != fused_weight.size(0):
            raise ValueError(
                "Unexpected fused qkv-down weight shape: "
                f"got {tuple(fused_weight.size())}, expected dim0 {q_split + kv_split}"
            )

        q_weight, kv_weight = torch.split(fused_weight, [q_split, kv_split], dim=0)

        q_key = f"{prefix}linear_q_down_proj.weight"
        kv_key = f"{prefix}linear_kv_down_proj.weight"

        sharded_state_dict[q_key] = make_tp_sharded_tensor_for_checkpoint(
            tensor=q_weight, key=q_key, tp_axis=0, prepend_offsets=sharded_offsets
        )
        sharded_state_dict[kv_key] = make_tp_sharded_tensor_for_checkpoint(
            tensor=kv_weight, key=kv_key, tp_axis=0, prepend_offsets=sharded_offsets
        )

        return sharded_state_dict

    def _load_from_state_dict(self, state_dict, prefix, *args, **kwargs):
        """Load state dict with automatic unfused->fused conversion."""
        q_key = f"{prefix}linear_q_down_proj.weight"
        kv_key = f"{prefix}linear_kv_down_proj.weight"
        fused_key = f"{prefix}linear_qkv_down_proj.weight"

        def _as_tensor(x):
            return x.data if hasattr(x, 'data') else x

        if fused_key not in state_dict and q_key in state_dict and kv_key in state_dict:
            q_weight = _as_tensor(state_dict[q_key])
            kv_weight = _as_tensor(state_dict[kv_key])
            state_dict[fused_key] = torch.cat([q_weight, kv_weight], dim=0)
            del state_dict[q_key]
            del state_dict[kv_key]
            state_dict.pop(f"{prefix}linear_q_down_proj.bias", None)
            state_dict.pop(f"{prefix}linear_kv_down_proj.bias", None)

        return super()._load_from_state_dict(state_dict, prefix, *args, **kwargs)


================================================
FILE: megatron/core/transformer/multi_token_prediction.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
from __future__ import annotations

import warnings
from contextlib import nullcontext
from dataclasses import dataclass
from typing import TYPE_CHECKING, Callable, List, Optional, Union

import torch
from torch import Tensor

from megatron.core import InferenceParams, parallel_state, tensor_parallel
from megatron.core.dist_checkpointing.mapping import ShardedStateDict
from megatron.core.dist_checkpointing.utils import apply_prefix_mapping, replace_prefix_for_sharding
from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.fp8_utils import get_fp8_context
from megatron.core.models.backends import BackendSpecProvider, LocalSpecProvider
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.pipeline_parallel.utils import is_vp_last_stage
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.tensor_parallel import (
    gather_from_tensor_model_parallel_region,
    scatter_to_sequence_parallel_region,
)
from megatron.core.transformer.enums import AttnMaskType, LayerType
from megatron.core.transformer.module import MegatronModule
from megatron.core.transformer.spec_utils import ModuleSpec, build_module
from megatron.core.transformer.torch_norm import LayerNormBuilder
from megatron.core.transformer.transformer_block import TransformerBlockSubmodules
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.typed_torch import apply_module
from megatron.core.utils import (
    get_pg_rank,
    is_torch_min_version,
    make_tp_sharded_tensor_for_checkpoint,
    make_viewless_tensor,
)

if TYPE_CHECKING:
    from megatron.core.ssm.mamba_block import MambaStackSubmodules

if is_torch_min_version("1.13.0"):
    dist_all_gather_func = torch.distributed.all_gather_into_tensor
else:
    dist_all_gather_func = torch.distributed._all_gather_base

SUPPORTED_ATTN_MASK = [
    AttnMaskType.padding,
    AttnMaskType.causal,
    AttnMaskType.no_mask,
    AttnMaskType.padding_causal,
]

if HAVE_TE:
    from megatron.core.extensions.transformer_engine_spec_provider import TESpecProvider
else:
    TESpecProvider = None


def tie_word_embeddings_state_dict(
    sharded_state_dict: ShardedStateDict,
    word_emb_weight: Tensor,
    word_emb_weight_key: str,
    tp_group: torch.distributed.ProcessGroup,
    dp_cp_group: torch.distributed.ProcessGroup,
) -> None:
    """tie the embedding of the mtp processing stage in a given sharded state dict.

    Args:
        sharded_state_dict (ShardedStateDict): state dict with the weight to tie.
        word_emb_weight (Tensor): weight of the word embedding.
        word_emb_weight_key (str): key of the word embedding in the sharded state dict.
        tp_group (torch.distributed.ProcessGroup): The tensor parallel group
        dp_cp_group (torch.distributed.ProcessGroup): The dp-cp comm group

    Returns: None, acts in-place
    """
    mtp_word_emb_replica_id = (
        1,  # copy of embedding in pre processing stage
        0,
        get_pg_rank(dp_cp_group),
    )
    assert word_emb_weight_key in sharded_state_dict
    del sharded_state_dict[word_emb_weight_key]
    sharded_state_dict[word_emb_weight_key] = make_tp_sharded_tensor_for_checkpoint(
        tensor=word_emb_weight,
        key=word_emb_weight_key,
        replica_id=mtp_word_emb_replica_id,
        allow_shape_mismatch=True,
        tp_group=tp_group,
        dp_cp_group=dp_cp_group,
    )


def tie_output_layer_state_dict(
    sharded_state_dict: ShardedStateDict,
    output_layer_weight: Tensor,
    output_layer_weight_key: str,
    tp_group: torch.distributed.ProcessGroup,
    dp_cp_group: torch.distributed.ProcessGroup,
) -> None:
    """tie the output layer of the mtp processing stage in a given sharded state dict.

    Args:
        sharded_state_dict (ShardedStateDict): state dict with the weight to tie.
        output_layer_weight (Tensor): weight of the output layer.
        output_layer_weight_key (str): key of the output layer in the sharded state dict.
        tp_group (torch.distributed.ProcessGroup): The tensor parallel group
        dp_cp_group (torch.distributed.ProcessGroup): The dp-cp comm group

    Returns: None, acts in-place
    """
    mtp_output_layer_replica_id = (
        1,  # copy of output layer in post processing stage
        0,
        get_pg_rank(dp_cp_group),
    )
    assert output_layer_weight_key in sharded_state_dict
    del sharded_state_dict[output_layer_weight_key]
    sharded_state_dict[output_layer_weight_key] = make_tp_sharded_tensor_for_checkpoint(
        tensor=output_layer_weight,
        key=output_layer_weight_key,
        replica_id=mtp_output_layer_replica_id,
        allow_shape_mismatch=True,
        tp_group=tp_group,
        dp_cp_group=dp_cp_group,
    )


def roll_tensor(tensor, shifts=-1, dims=-1, cp_group=None, packed_seq_params=None):
    """Roll the tensor input along the sequence dimension with Context Parallelism (CP) support.

    This function extends the original roll_tensor to support Context Parallelism, which allows
    MTP to work with CP > 1. When CP is enabled, the sequence dimension is split across CP ranks,
    and tensor rolling requires communication between adjacent CP ranks to properly handle the
    boundary conditions.

    For CP=1 (default behavior): Uses standard torch.roll with zero padding
    For CP>1: Splits tensor into chunks, performs rolling within each chunk, then exchanges
    boundary elements between adjacent CP ranks to maintain sequence continuity.

    For packed sequences: Respects sequence boundaries when rolling to avoid mixing tokens
    from different sequences.

    Args:
        tensor (Tensor): The input tensor to roll.
        shifts (int): The shift of the tensor (typically -1 for MTP).
        dims (int): The dimension to roll (typically -1 for sequence dimension).
        cp_group (ProcessGroup): The context parallelism process group. If None or size=1,
                               falls back to standard rolling behavior.
        packed_seq_params (PackedSeqParams): Parameters for packed sequence processing.
                                            If provided, respects sequence boundaries.
    Returns:
        tuple: (rolled_tensor, sum_of_rolled_tensor)
    """
    # Handle packed sequences cases
    if packed_seq_params is not None:
        return _roll_tensor_packed_seq(tensor, shifts, dims, packed_seq_params, cp_group)

    # Standard rolling behavior when CP is not enabled (cp_group is None or size=1)
    if cp_group is None or cp_group.size() == 1:
        rolled_tensor = torch.roll(tensor, shifts=shifts, dims=dims)
        rolled_tensor.select(dims, shifts).fill_(0)
        return rolled_tensor, rolled_tensor.sum()

    # CP-enabled rolling: Split tensor into chunks and handle boundary communication
    # This matches the batch splitting logic in get_batch_on_this_cp_rank() function
    tensor_list = tensor.chunk(2, dim=dims)
    rolled_tensor_list = []
    for i in range(len(tensor_list)):
        rolled_tensor_list.append(torch.roll(tensor_list[i], shifts=shifts, dims=dims))

    # Prepare tensors for communication between CP ranks
    # Each CP rank needs to send boundary elements to adjacent ranks
    tensor_send_list = []
    tensor_recv_list = []
    for i in range(len(rolled_tensor_list)):
        tensor_send_list.append(rolled_tensor_list[i].select(dims, shifts).contiguous())
        empty_tensor = torch.empty(
            tensor_send_list[i].shape,
            dtype=tensor_send_list[i].dtype,
            device=torch.cuda.current_device(),
        )
        tensor_recv_list.append(empty_tensor)

    # Get the global rank of next and prev process in the cp group
    global_ranks = torch.distributed.get_process_group_ranks(group=cp_group)
    local_rank = torch.distributed.get_rank(group=cp_group)
    next_rank = global_ranks[(local_rank + 1) % len(global_ranks)]
    prev_rank = global_ranks[(local_rank - 1) % len(global_ranks)]

    # Start send and recv ops
    ops = []
    if local_rank != 0:
        req_send_first_part = torch.distributed.isend(tensor=tensor_send_list[0], dst=prev_rank)
        ops.append(req_send_first_part)
        req_recv_second_part = torch.distributed.irecv(tensor=tensor_recv_list[1], src=prev_rank)
        ops.append(req_recv_second_part)
    else:
        # Inserted elements are set to be 0.0.
        tensor_recv_list[1] = 0
    if local_rank != len(global_ranks) - 1:
        req_recv_first_part = torch.distributed.irecv(tensor=tensor_recv_list[0], src=next_rank)
        ops.append(req_recv_first_part)
        req_send_second_part = torch.distributed.isend(tensor=tensor_send_list[1], dst=next_rank)
        ops.append(req_send_second_part)
    else:
        # For the last CP rank, the removed elements of second part go into the first part
        tensor_recv_list[0] = tensor_send_list[1]

    # Wait for all communication operations to complete
    for op in ops:
        op.wait()

    # Splicing: Replace boundary elements with received elements from adjacent ranks
    # This ensures proper sequence continuity across CP boundaries
    index = [slice(None)] * rolled_tensor_list[0].dim()
    index[dims] = shifts
    for i in range(len(rolled_tensor_list)):
        rolled_tensor_list[i][tuple(index)] = tensor_recv_list[i]

    # Concatenate the processed chunks back into a single tensor
    rolled_tensor = torch.cat(rolled_tensor_list, dim=dims)

    return rolled_tensor, rolled_tensor.sum()


def _roll_tensor_packed_seq(tensor, shifts, dims, packed_seq_params, cp_group=None):
    """Roll tensor with packed sequence support.
    This function handles rolling for packed sequences by respecting sequence boundaries
    """

    # Notice: This is a naive implementation to test the correctness,
    # a better solution will only sync the boundary tokens once.
    assert (
        dims == -1 or dims == tensor.dim() - 1
    ), "Packed sequence roll only supports the last dimension."
    assert shifts == -1, "Packed sequence roll only supports a single-token left shift."
    cu_seqlens = packed_seq_params.cu_seqlens_q
    assert cu_seqlens is not None, "Packed sequence parameters must provide cu_seqlens_q."

    rolled_tensor = tensor.clone()

    cp_size = cp_group.size() if cp_group is not None else 1
    if cp_size == 1:
        # CP disabled: roll each packed sequence independently within its boundaries
        for i in range(len(cu_seqlens) - 1):
            start_idx = cu_seqlens[i]
            end_idx = cu_seqlens[i + 1]
            seq_slice = tensor[..., start_idx:end_idx]
            rolled_seq = torch.roll(seq_slice, shifts=shifts, dims=dims)
            # Zero out the last position(s) that would cross sequence boundaries
            rolled_seq[..., shifts:] = 0
            rolled_tensor[..., start_idx:end_idx] = rolled_seq
        return rolled_tensor, rolled_tensor.sum()

    # CP enabled: each rank owns two chunks per sequence (front and mirrored tail).
    local_rank = torch.distributed.get_rank(group=cp_group)
    global_ranks = torch.distributed.get_process_group_ranks(group=cp_group)
    next_rank = global_ranks[(local_rank + 1) % cp_size]
    prev_rank = global_ranks[(local_rank - 1) % cp_size]

    # Iterate over each sequence individually
    for i in range(len(cu_seqlens) - 1):
        start_idx = cu_seqlens[i]
        end_idx = cu_seqlens[i + 1]

        # the idx has been multiplied by cp_size, need to divide it by cp_size to get the local idx
        local_start_idx = start_idx // cp_size
        local_end_idx = end_idx // cp_size

        # Skip empty sequences - this can happen when a sequence is very short and
        # after dividing by cp_size, the local slice has zero length
        local_seq_len = local_end_idx - local_start_idx
        if local_seq_len == 0:
            continue

        tensor_slice = rolled_tensor[..., local_start_idx:local_end_idx].clone()

        # The following code is very similar as the code in roll_tensor function
        local_chunks = tensor_slice.chunk(2, dim=dims)
        rolled_chunks = [torch.roll(chunk, shifts=shifts, dims=dims) for chunk in local_chunks]

        tensor_send_list = []
        tensor_recv_list = []
        for chunk in rolled_chunks:
            # Skip empty chunks that can occur when the sequence slice is very small
            if chunk.size(dims) == 0:
                tensor_send_list.append(
                    torch.empty(chunk.shape[:-1], dtype=chunk.dtype, device=chunk.device)
                )
                tensor_recv_list.append(
                    torch.empty(chunk.shape[:-1], dtype=chunk.dtype, device=chunk.device)
                )
                continue
            boundary = chunk.select(dims, shifts).contiguous().clone()
            tensor_send_list.append(boundary)
            tensor_recv_list.append(torch.empty_like(boundary))

        ops = []
        if local_rank != 0:
            ops.append(torch.distributed.isend(tensor=tensor_send_list[0], dst=prev_rank))
            ops.append(torch.distributed.irecv(tensor=tensor_recv_list[1], src=prev_rank))
        else:
            tensor_recv_list[1].zero_()

        if local_rank != cp_size - 1:
            ops.append(torch.distributed.irecv(tensor=tensor_recv_list[0], src=next_rank))
            ops.append(torch.distributed.isend(tensor=tensor_send_list[1], dst=next_rank))
        else:
            tensor_recv_list[0].copy_(tensor_send_list[1])

        for op in ops:
            op.wait()

        index = [slice(None)] * rolled_chunks[0].dim()
        index[dims] = shifts
        for chunk, recv in zip(rolled_chunks, tensor_recv_list):
            # Skip empty chunks
            if chunk.size(dims) == 0:
                continue
            chunk[tuple(index)] = recv

        seq_result = torch.cat(rolled_chunks, dim=dims)

        # update the rolled tensor
        rolled_tensor[..., local_start_idx:local_end_idx] = seq_result

    return rolled_tensor, rolled_tensor.sum()


class MTPLossLoggingHelper:
    """Helper class for logging MTP losses."""

    tracker = {}

    @staticmethod
    def save_loss_to_tracker(
        loss: torch.Tensor,
        layer_number: int,
        num_layers: int,
        reduce_group: Optional[torch.distributed.ProcessGroup] = None,
        avg_group: Optional[torch.distributed.ProcessGroup] = None,
    ):
        """Save the mtp loss for logging.
        Args:
            loss (torch.Tensor): The loss tensor.
            layer_number (int): Layer index of the loss.
            num_layers (int): The number of total layers.
            reduce_group (torch.distributed.ProcessGroup): The group for reducing the loss.
            mean_group (torch.distributed.ProcessGroup): The group for averaging the loss.
        """
        # Skip mtp loss logging if layer_number is None.
        if layer_number is None:
            return

        tracker = MTPLossLoggingHelper.tracker
        if "values" not in tracker:
            tracker["values"] = torch.zeros(num_layers, device=torch.cuda.current_device())
        tracker["values"][layer_number] += loss.detach()
        tracker["reduce_group"] = reduce_group
        tracker["avg_group"] = avg_group

    def clean_loss_in_tracker():
        """Clear the mtp losses."""
        tracker = MTPLossLoggingHelper.tracker
        tracker["values"].zero_()
        tracker["reduce_group"] = None
        tracker["avg_group"] = None

    def reduce_loss_in_tracker():
        """Collect and reduce the mtp losses across ranks."""
        tracker = MTPLossLoggingHelper.tracker
        if "values" not in tracker:
            return
        values = tracker["values"]
        # Reduce mtp losses across ranks.
        if tracker.get('reduce_group') is not None:
            torch.distributed.all_reduce(values, group=tracker.get('reduce_group'))
        if tracker.get('avg_group') is not None:
            torch.distributed.all_reduce(
                values, group=tracker['avg_group'], op=torch.distributed.ReduceOp.AVG
            )

    def track_mtp_metrics(loss_scale, iteration, writer, wandb_writer=None, total_loss_dict=None):
        """Track the Multi-Token Prediction (MTP) metrics for logging."""
        MTPLossLoggingHelper.reduce_loss_in_tracker()
        tracker = MTPLossLoggingHelper.tracker
        if "values" not in tracker:
            return
        mtp_losses = tracker["values"] * loss_scale
        mtp_num_layers = mtp_losses.shape[0]
        for i in range(mtp_num_layers):
            name = f"mtp_{i + 1} loss"
            loss = mtp_losses[i]
            if total_loss_dict is not None:
                if name in total_loss_dict:
                    total_loss_dict[name] += loss
                else:
                    total_loss_dict[name] = loss
            if writer is not None:
                writer.add_scalar(name, loss, iteration)
            if wandb_writer is not None:
                wandb_writer.log({f"{name}": loss}, iteration)

        MTPLossLoggingHelper.clean_loss_in_tracker()


@dataclass
class MultiTokenPredictionLayerSubmodules:
    """
    Dataclass for specifying the submodules of a MultiTokenPrediction module.

    Args:
        hnorm: Specification or instance of the hidden states normalization to be applied.
        enorm: Specification or instance of the embedding normalization to be applied.
        eh_proj (Union[ModuleSpec, type]): Specification or instance of the
            linear projection to be applied.
        mtp_model_layer (Union[ModuleSpec, type]): Specification
            or instance of the transformer or mamba block to be applied.
    """

    enorm: LayerNormBuilder
    hnorm: LayerNormBuilder
    # TODO(nschank): Move this back below transformer_layer once eh_proj and transformer_layer have
    # their defaults removed.
    layer_norm: LayerNormBuilder

    eh_proj: Union[ModuleSpec, type] = None
    mtp_model_layer: Union[ModuleSpec, type] = None


def get_mtp_layer_spec(
    mtp_model_layer_spec: ModuleSpec, use_transformer_engine: bool
) -> ModuleSpec:
    """Get the MTP layer spec.

    Returns:
        ModuleSpec: Module specification with TE modules
    """
    return get_mtp_layer_spec_for_backend(
        mtp_model_layer_spec,
        backend=TESpecProvider() if use_transformer_engine else LocalSpecProvider(),
    )


def get_mtp_layer_spec_for_backend(
    mtp_model_layer_spec: ModuleSpec, backend: BackendSpecProvider
) -> ModuleSpec:
    """Get the MTP layer spec.

    Returns:
        ModuleSpec: Module specification with modules from the backend.
    """
    column_parallel_linear_impl: type = backend.column_parallel_linear()
    layer_norm_impl = backend.layer_norm()
    mtp_layer_spec = ModuleSpec(
        module=MultiTokenPredictionLayer,
        submodules=MultiTokenPredictionLayerSubmodules(
            enorm=layer_norm_impl,
            hnorm=layer_norm_impl,
            eh_proj=column_parallel_linear_impl,
            mtp_model_layer=mtp_model_layer_spec,
            layer_norm=layer_norm_impl,
        ),
    )
    return mtp_layer_spec


def mtp_on_this_rank(
    config: TransformerConfig, ignore_virtual: Optional[bool] = True, vp_stage: Optional[int] = None
) -> bool:
    """
    Check if there is MTP on the current rank.

    Behavior:
        - If a custom pipeline model parallel layout is provided in the config:
            - If virtual pipeline parallelism is enabled (and `ignore_virtual` is False), checks
              whether any MTP layers are present on this (pp_rank, vp_stage) pair.
            - Otherwise, checks all virtual pipeline ranks of the current pipeline rank. Returns
              True if any virtual sub-rank includes at least one MTP layer.
        - If no custom layout is provided, assumes all MTP layers (if any) are placed on the last
          pipeline stage. The function returns True only on the last pipeline stage.
    """
    mtp_on_this_rank = False
    pp_rank = parallel_state.get_pipeline_model_parallel_rank()
    if config.pipeline_model_parallel_layout is not None:
        # with custom PP layout, we support put MTP layers on any pipeline stage
        layout = config.pipeline_model_parallel_layout.layout
        if (
            not ignore_virtual
            and parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None
        ):
            assert vp_stage is not None, "vp_stage must be passed if virtual pipeline is enabled"
            num_layers_to_build = layout[pp_rank][vp_stage].count(LayerType.mtp)
            mtp_on_this_rank = num_layers_to_build > 0
        else:
            for vpp_rank in range(len(layout[pp_rank])):
                num_layers_to_build = layout[pp_rank][vpp_rank].count(LayerType.mtp)
                if num_layers_to_build > 0:
                    mtp_on_this_rank = True
                    break
    else:
        # without custom PP layout, we only support put all of MTP layers on the last pipeline stage
        if config.mtp_num_layers is not None:
            mtp_on_this_rank = parallel_state.is_pipeline_last_stage(
                ignore_virtual=ignore_virtual, vp_stage=vp_stage
            )
        else:
            mtp_on_this_rank = False
    return mtp_on_this_rank


def get_mtp_ranks(pp_ranks: List[int], config: TransformerConfig) -> List[int]:
    """Get the ranks of the MTP layers."""
    mtp_ranks = set()
    if config.mtp_num_layers is None:
        return []
    if config.pipeline_model_parallel_layout is None:
        return [pp_ranks[-1]]
    layout = config.pipeline_model_parallel_layout.layout
    for pp_rank in range(len(layout)):
        for vpp_rank in range(len(layout[pp_rank])):
            num_layers_to_build = layout[pp_rank][vpp_rank].count(LayerType.mtp)
            if num_layers_to_build:
                mtp_ranks.add(pp_ranks[pp_rank])
    return list(mtp_ranks)


def get_mtp_layer_offset(config: TransformerConfig, vp_stage: Optional[int] = None) -> int:
    """Get the offset of the MTP layer."""
    if config.pipeline_model_parallel_size > 1:
        if config.pipeline_model_parallel_layout:
            offset = config.pipeline_model_parallel_layout.get_layer_offset(
                layer_type=LayerType.mtp, vp_stage=vp_stage
            )
        else:
            offset = 0
    else:
        offset = 0
    return offset


def get_mtp_num_layers_to_build(
    config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
) -> int:
    """Get the number of MTP layers to build."""
    if config.pipeline_model_parallel_layout is not None:
        # If we have a custom PP layout, get the number of mtp layers in the layout array.
        num_layers_to_build = config.pipeline_model_parallel_layout.get_num_layers_to_build(
            layer_type=LayerType.mtp, vp_stage=vp_stage
        )
        assert num_layers_to_build == config.mtp_num_layers or num_layers_to_build == 0, (
            f"Currently, we only support put all of MTP layers on the last pipeline stage, "
            f"so the number of MTP layers to build ({num_layers_to_build}) must match "
            f"mtp_num_layers ({config.mtp_num_layers}) or be 0."
        )
    else:
        if parallel_state.is_pipeline_last_stage(ignore_virtual=False, vp_stage=vp_stage):
            num_layers_to_build = config.mtp_num_layers if config.mtp_num_layers else 0
        else:
            num_layers_to_build = 0
    return num_layers_to_build


class MTPLossAutoScaler(torch.autograd.Function):
    """An AutoScaler that triggers the backward pass and scales the grad for mtp loss."""

    main_loss_backward_scale: torch.Tensor = torch.tensor(1.0)

    @staticmethod
    def forward(ctx, output: torch.Tensor, mtp_loss: torch.Tensor):
        """Preserve the mtp by storing it in the context to avoid garbage collection.

        Args:
            output (torch.Tensor): The output tensor.
            mtp_loss (torch.Tensor): The mtp loss tensor.

        Returns:
            torch.Tensor: The output tensor.
        """
        ctx.save_for_backward(mtp_loss)
        return output

    @staticmethod
    def backward(ctx, grad_output: torch.Tensor):
        """Compute and scale the gradient for mtp loss..

        Args:
            grad_output (torch.Tensor): The gradient of the output.

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: The gradient of the output, scaled mtp loss
                                               gradient.
        """
        (mtp_loss,) = ctx.saved_tensors
        mtp_loss_backward_scale = MTPLossAutoScaler.main_loss_backward_scale
        scaled_mtp_loss_grad = torch.ones_like(mtp_loss) * mtp_loss_backward_scale
        return grad_output, scaled_mtp_loss_grad

    @staticmethod
    def set_loss_scale(scale: torch.Tensor):
        """set the scale of the mtp loss.

        Args:
            scale (torch.Tensor): The scale value to set. Please ensure that the scale passed in
                                  matches the scale of the main_loss.
        """
        MTPLossAutoScaler.main_loss_backward_scale = scale


def process_mtp_loss(
    hidden_states: Tensor,
    labels: Tensor,
    loss_mask: Optional[Tensor],
    output_layer: Callable,
    output_weight: Optional[Tensor],
    runtime_gather_output: Optional[bool],
    is_training: bool,
    compute_language_model_loss: Callable,
    config: TransformerConfig,
    cp_group: Optional[torch.distributed.ProcessGroup] = None,
    packed_seq_params: Optional[PackedSeqParams] = None,
    scale_logits_fn: Optional[Callable[[Tensor], Tensor]] = None,
) -> Tensor:
    """Process Multi-Token Prediction (MTP) loss computation.

    This is a standalone function that handles MTP loss computation. It's used on the
    post_process rank to split concatenated hidden states and compute MTP losses.

    Args:
        hidden_states (Tensor): Hidden states tensor (concatenated with MTP outputs).
        labels (Tensor): Ground truth labels.
        loss_mask (Optional[Tensor]): Mask for loss computation. If None, uses all ones.
        output_layer (Callable): Output layer method to compute logits.
        output_weight (Optional[Tensor]): Optional output weight for shared embeddings.
        runtime_gather_output (Optional[bool]): Whether to gather output at runtime.
        is_training (bool): Whether the model is in training mode.
        compute_language_model_loss (Callable): Method to compute language model loss.
        config (TransformerConfig): Model configuration containing mtp_num_layers etc.
        cp_group (Optional[ProcessGroup]): Context parallelism process group.
        packed_seq_params (Optional[PackedSeqParams]): Packed sequence parameters.
        scale_logits_fn (Optional[Callable[[Tensor], Tensor]]): Optional function to
            scale logits before loss computation (e.g., MuP output scaling).

    Returns:
        Tensor: Updated hidden states after MTP loss processing (first chunk only).
    """
    hidden_states_list = torch.chunk(hidden_states, 1 + config.mtp_num_layers, dim=0)
    hidden_states = hidden_states_list[0]

    if labels is None:
        return hidden_states

    mtp_labels = labels.clone()
    if loss_mask is None:
        loss_mask = torch.ones_like(mtp_labels)

    # Store the original number of tokens before rolling for proper normalization
    # when calculate_per_token_loss is enabled. This ensures MTP gradients are
    # correctly scaled relative to the main loss gradients in finalize_model_grads.
    original_num_tokens = loss_mask.sum()

    for mtp_layer_number in range(config.mtp_num_layers):
        mtp_logits, _ = output_layer(
            hidden_states_list[mtp_layer_number + 1],
            weight=output_weight,
            runtime_gather_output=runtime_gather_output,
        )
        if scale_logits_fn is not None:
            mtp_logits = scale_logits_fn(mtp_logits)
        mtp_labels, _ = roll_tensor(
            mtp_labels, shifts=-1, dims=-1, cp_group=cp_group, packed_seq_params=packed_seq_params
        )
        loss_mask, num_tokens = roll_tensor(
            loss_mask, shifts=-1, dims=-1, cp_group=cp_group, packed_seq_params=packed_seq_params
        )
        mtp_loss = compute_language_model_loss(mtp_labels, mtp_logits)
        mtp_loss = loss_mask * mtp_loss
        if is_training:
            mtp_loss_for_log = (
                torch.sum(mtp_loss) / num_tokens if num_tokens > 0 else mtp_loss.new_tensor(0.0)
            )
            MTPLossLoggingHelper.save_loss_to_tracker(
                mtp_loss_for_log,
                mtp_layer_number,
                config.mtp_num_layers,
                avg_group=parallel_state.get_data_parallel_group(with_context_parallel=True),
            )
        mtp_loss_scale = config.mtp_loss_scaling_factor / config.mtp_num_layers
        if config.calculate_per_token_loss:
            # When calculate_per_token_loss is enabled, finalize_model_grads will
            # divide all gradients by total_num_tokens (from main loss).
            # However, MTP has fewer valid tokens due to rolling. To ensure correct
            # per-token gradient weighting, we normalize by the rolled token count
            # and re-scale by the original token count.
            # Avoid division by zero
            num_tokens_safe = torch.clamp(num_tokens, min=1)
            mtp_loss_normalized = (
                mtp_loss_scale * mtp_loss * (original_num_tokens / num_tokens_safe)
            )
            hidden_states = MTPLossAutoScaler.apply(hidden_states, mtp_loss_normalized)
        else:
            safe_num_tokens = num_tokens.clamp(min=1)
            hidden_states = MTPLossAutoScaler.apply(
                hidden_states, mtp_loss_scale * mtp_loss / safe_num_tokens
            )

    return hidden_states


class MultiTokenPredictionLayer(MegatronModule):
    """The implementation for Multi-Token Prediction (MTP) which extends
    the prediction scope to multiple future tokens at each position.

    This MTP implementation sequentially predict additional tokens and keep the complete
    causal chain at each prediction depth, by using D sequential modules to predict
    D additional tokens.

    The k-th MTP module consists of a shared embedding layer, a projection matrix,
    a Transformer block, and a shared output head.

    For the i-th input token at the (k - 1)-th prediction depth, we first combine
    the representation of the i-th token and the embedding of the (i + K)-th token with
    the linear projection. The combined serves as the input of the Transformer block at
    the k-th depth to produce the output representation.

    For more information, refer to DeepSeek-V3 Technical Report
    https://arxiv.org/pdf/2412.19437.pdf
    """

    def __init__(
        self,
        config: TransformerConfig,
        submodules: MultiTokenPredictionLayerSubmodules,
        layer_number: int = 1,
        vp_stage: Optional[int] = None,
        pg_collection: Optional[ProcessGroupCollection] = None,
        # For Mamba path - pattern and submodules to build inner layers directly
        mtp_layer_pattern: Optional[str] = None,
        mamba_submodules: Optional[MambaStackSubmodules] = None,
    ):
        super().__init__(config=config)
        self.sequence_parallel = config.sequence_parallel
        self.submodules = submodules
        self.layer_number = layer_number + get_mtp_layer_offset(self.config, vp_stage)
        self.vp_stage = vp_stage
        self.cp_group = pg_collection.cp
        self.tp_group = pg_collection.tp if pg_collection is not None else None
        self.mtp_layer_pattern = mtp_layer_pattern

        # Validate attention mask type if using transformer-based inner layers
        if self.submodules.mtp_model_layer is not None and hasattr(
            self.submodules.mtp_model_layer, 'submodules'
        ):
            from megatron.core.ssm.mamba_block import MambaStackSubmodules
            from megatron.core.transformer.transformer_layer import TransformerLayerSubmodules

            layer_submodules = None
            if isinstance(self.submodules.mtp_model_layer.submodules, MambaStackSubmodules):
                attention_layer_spec = self.submodules.mtp_model_layer.submodules.attention_layer
                if hasattr(attention_layer_spec, 'submodules'):
                    assert isinstance(attention_layer_spec.submodules, TransformerLayerSubmodules)
                    layer_submodules = attention_layer_spec.submodules
            elif isinstance(self.submodules.mtp_model_layer.submodules, TransformerLayerSubmodules):
                layer_submodules = self.submodules.mtp_model_layer.submodules
            else:
                raise ValueError(
                    "Unsupported mtp_model_layer submodules type for attention mask validation."
                )
            if layer_submodules:
                self_attention_spec = layer_submodules.self_attention
                attn_mask_type = self_attention_spec.params.get('attn_mask_type', '')
                assert attn_mask_type in SUPPORTED_ATTN_MASK, (
                    f"Multi-Token Prediction (MTP) is not yet supported with "
                    f"{attn_mask_type} attention mask type. "
                    f"The supported attention mask types are {SUPPORTED_ATTN_MASK}."
                )

        self.enorm = self.submodules.enorm(
            config=self.config,
            hidden_size=self.config.hidden_size,
            eps=self.config.layernorm_epsilon,
        )

        self.hnorm = self.submodules.hnorm(
            config=self.config,
            hidden_size=self.config.hidden_size,
            eps=self.config.layernorm_epsilon,
        )

        # For the linear projection at the (k - 1)-th MTP layer, the input is the concatenation
        # of the i-th token's hidden states and the (i + K)-th token's decoder input,
        # so the input's shape is [s, b, 2*h].
        # The output will be send to the following transformer layer,
        # so the output's shape should be [s, b, h].
        self.eh_proj = build_module(
            self.submodules.eh_proj,
            self.config.hidden_size * 2,
            self.config.hidden_size,
            config=self.config,
            init_method=self.config.init_method,
            gather_output=False,
            bias=False,
            skip_bias_add=False,
            is_expert=False,
            tp_comm_buffer_name="mtp_eh_proj",
            tp_group=pg_collection.tp if pg_collection is not None else None,
        )

        # Build inner layers: two possible paths
        # 1. Mamba path: use MambaStack for hybrid pattern support
        # 2. GPT path: single TransformerLayer
        if mtp_layer_pattern is not None and mamba_submodules is not None:
            from megatron.core.ssm.mamba_block import MambaStack
            from megatron.core.ssm.mamba_hybrid_layer_allocation import validate_segment_layers

            self.mtp_model_layer = MambaStack(
                config=self.config,
                submodules=mamba_submodules,
                layer_type_list=validate_segment_layers(mtp_layer_pattern),
                pp_layer_offset=0,
                pre_process=True,  # Always receives input from eh_proj
                post_layer_norm=False,  # MTP has its own final_layernorm
                post_process=True,  # MTP layer is self-contained
                pg_collection=pg_collection,
                is_mtp_layer=True,
            )
        elif self.config.mtp_num_layers is not None:
            # GPT path: Uses the transformer block spec for MTP layer
            # MTP inner layers use their own layer numbering (self.layer_number = 1, 2, etc.)
            # rather than continuing from decoder layer numbers. This is consistent with the
            # Mamba path and ensures proper aux loss tracking in router.py.
            self.mtp_model_layer = build_module(
                self.submodules.mtp_model_layer,
                config=self.config,
                vp_stage=self.vp_stage,
                layer_number=self.layer_number,
                is_mtp_layer=True,
                pg_collection=pg_collection,
            )

        self.final_layernorm = self.submodules.layer_norm(
            config=self.config,
            hidden_size=self.config.hidden_size,
            eps=self.config.layernorm_epsilon,
        )
        self.offload_context = nullcontext()

    def _get_embeddings(
        self,
        input_ids: torch.Tensor,
        position_ids: torch.Tensor,
        embedding: Callable,
        hidden_states: torch.Tensor,
        packed_seq_params: Optional[PackedSeqParams] = None,
    ):
        """
        Preprocesses input data for the Multi-Token Prediction (MTP) layers.

        This function computes the decoder input and sends updated input_ids and position_ids to
        the next layer.

        Args:
            input_ids (torch.Tensor): The input token IDs.
            position_ids (torch.Tensor): The position IDs corresponding to the input tokens.
            embedding (Callable): The embedding module
                from gpt model to compute the decoder input.
            hidden_states (torch.Tensor): hidden states tensor of shape [s, b, h] where s is the
                sequence length, b is the batch size, and h is the hidden size.
            packed_seq_params (PackedSeqParams): Parameters for packed sequence processing.
        """
        # Calc logits for the current Multi-Token Prediction (MTP) layers.
        input_ids, _ = roll_tensor(
            input_ids,
            shifts=-1,
            dims=-1,
            cp_group=self.cp_group,
            packed_seq_params=packed_seq_params,
        )
        position_ids, _ = roll_tensor(
            position_ids,
            shifts=-1,
            dims=-1,
            cp_group=self.cp_group,
            packed_seq_params=packed_seq_params,
        )
        # embedding
        decoder_input = embedding(input_ids=input_ids, position_ids=position_ids)

        hidden_states = make_viewless_tensor(inp=hidden_states, requires_grad=True, keep_graph=True)

        return input_ids, position_ids, decoder_input, hidden_states

    def _concat_embeddings(self, hidden_states: torch.Tensor, decoder_input: torch.Tensor):
        """
        Concatenate the tokens before sending to transformer layer.
        """
        decoder_input = apply_module(self.enorm)(decoder_input)
        decoder_input = make_viewless_tensor(inp=decoder_input, requires_grad=True, keep_graph=True)
        hidden_states = apply_module(self.hnorm)(hidden_states)
        hidden_states = make_viewless_tensor(inp=hidden_states, requires_grad=True, keep_graph=True)
        # At the (k - 1)-th MTP module, concatenates the i-th token's hidden_states
        # and the (i + K)-th token's embedding, and combine them with linear projection.
        hidden_states = torch.cat((decoder_input, hidden_states), -1)
        hidden_states, _ = self.eh_proj(hidden_states)
        # For tensor parallel we need to gather the tensor across the model-parallel
        # ranks after the linear projection. This used to call
        # `all_gather_last_dim_from_tensor_parallel_region`, but that utility reduces
        # the gradient in backward pass and was therefore incorrect in this context.
        # It has been replaced with the correct `gather_from_tensor_model_parallel_region`.
        hidden_states = gather_from_tensor_model_parallel_region(hidden_states, group=self.tp_group)
        # For sequence parallel, scatter after linear_fc and before transformer layer.
        if self.sequence_parallel:
            hidden_states = scatter_to_sequence_parallel_region(hidden_states, group=self.tp_group)
        return hidden_states

    def _proj_and_transformer_layer(
        self,
        hidden_states: Tensor,
        decoder_input: Tensor,
        attention_mask: Optional[torch.Tensor] = None,
        context: Optional[torch.Tensor] = None,
        context_mask: Optional[torch.Tensor] = None,
        rotary_pos_emb: Optional[torch.Tensor] = None,
        rotary_pos_cos: Optional[torch.Tensor] = None,
        rotary_pos_sin: Optional[torch.Tensor] = None,
        attention_bias: Optional[torch.Tensor] = None,
        inference_params: Optional[InferenceParams] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
        sequence_len_offset: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        """
        Concatenates embeddings with hidden states and then applies transformer layer forward.
        """
        if self.config.sequence_parallel:
            rng_context = tensor_parallel.get_cuda_rng_tracker().fork()
        else:
            rng_context = nullcontext()

        # Unlike transformer_block.py which needs to support mixed-precision in
        # different layers,currently MTP only use global fp8 context.
        if self.config.fp8:
            fp8_context = get_fp8_context(self.config)
            transformer_layer_fp8_context = get_fp8_context(self.config)
        else:
            fp8_context = nullcontext()
            transformer_layer_fp8_context = nullcontext()

        # TODO: currently ignoring FP4 in MTP layers because we need more numerical validation
        with rng_context:
            with fp8_context:
                hidden_states = self._concat_embeddings(hidden_states, decoder_input)

            # Use a separate fp8 context for the transformer layer. This is to ensure that when the
            # transformer layer is cudagraphed, the FP8GlobalStateManager.is_first_fp8_module() is
            # True so that the fp8 weight caching can be triggered correctly.
            with transformer_layer_fp8_context:
                if self.mtp_layer_pattern is not None:
                    hidden_states = self.mtp_model_layer(
                        hidden_states=hidden_states,
                        attention_mask=attention_mask,
                        rotary_pos_emb=rotary_pos_emb,
                        inference_context=inference_params,
                        packed_seq_params=packed_seq_params,
                    )
                else:
                    # GPT path: single TransformerLayer
                    hidden_states, _ = self.mtp_model_layer(
                        hidden_states=hidden_states,
                        attention_mask=attention_mask,
                        context=context,
                        context_mask=context_mask,
                        rotary_pos_emb=rotary_pos_emb,
                        rotary_pos_cos=rotary_pos_cos,
                        rotary_pos_sin=rotary_pos_sin,
                        attention_bias=attention_bias,
                        inference_params=inference_params,
                        packed_seq_params=packed_seq_params,
                        sequence_len_offset=sequence_len_offset,
                    )

        hidden_states = self._postprocess(hidden_states)

        return hidden_states

    def _postprocess(self, hidden_states: torch.Tensor):
        """
        Postprocesses the output of the transformer layers.
        """

        # Layer norm before shared head layer.
        hidden_states = apply_module(self.final_layernorm)(hidden_states)
        # TENorm produces a "viewed" tensor. This will result in schedule.py's
        # deallocate_output_tensor() throwing an error, so a viewless tensor is
        # created to prevent this.
        hidden_states = make_viewless_tensor(inp=hidden_states, requires_grad=True, keep_graph=True)

        return hidden_states

    def forward_single_position(
        self,
        hidden_states: Tensor,
        next_token_ids: Tensor,
        position_ids: Tensor,
        embedding: Callable,
        attention_mask: Optional[Tensor] = None,
        rotary_pos_emb: Optional[Tensor] = None,
        rotary_pos_cos: Optional[Tensor] = None,
        rotary_pos_sin: Optional[Tensor] = None,
        inference_params=None,
        packed_seq_params: Optional[PackedSeqParams] = None,
        sequence_len_offset: Optional[Tensor] = None,
    ) -> Tensor:
        """Forward for single positions without roll_tensor (speculative decoding).

        Unlike the regular forward which rolls input_ids to get the next token's
        embedding, this method directly takes the correct next_token_ids. This is
        used in speculative decoding where the correct next token is known after
        verification.

        Args:
            hidden_states (Tensor): Hidden states at positions of interest [N, B, H].
            next_token_ids (Tensor): The correct next token IDs [B, N].
            position_ids (Tensor): Position IDs for the next tokens [B, N].
            embedding (Callable): The embedding module.

        Returns:
            Tensor: MTP hidden states [N, B, H].
        """
        decoder_input = embedding(input_ids=next_token_ids, position_ids=position_ids)
        hidden_states = make_viewless_tensor(
            inp=hidden_states, requires_grad=False, keep_graph=False
        )
        hidden_states = self._proj_and_transformer_layer(
            hidden_states=hidden_states,
            decoder_input=decoder_input,
            attention_mask=attention_mask,
            rotary_pos_emb=rotary_pos_emb,
            rotary_pos_cos=rotary_pos_cos,
            rotary_pos_sin=rotary_pos_sin,
            inference_params=inference_params,
            packed_seq_params=packed_seq_params,
            sequence_len_offset=sequence_len_offset,
        )
        return hidden_states

    def _checkpointed_forward(self, forward_func, *args, **kwargs):
        def checkpoint_handler():
            """Determines whether to use the `te_checkpoint` or `tensor_parallel.checkpoint`"""
            if self.config.fp8:
                from megatron.core.extensions.transformer_engine import te_checkpoint

                return te_checkpoint(
                    forward_func,
                    self.config.distribute_saved_activations,
                    tensor_parallel.random.get_cuda_rng_tracker,
                    parallel_state.get_tensor_model_parallel_group(),
                    *args,
                    **kwargs,
                )
            else:
                return tensor_parallel.checkpoint(
                    forward_func, self.config.distribute_saved_activations, *args, *kwargs.values()
                )

        if self.config.recompute_method == 'uniform':
            # Uniformly divide the total number of Transformer layers and checkpoint
            # the input activation of each divided chunk.
            # A method to further reduce memory usage reducing checkpoints.
            assert (
                self.config.recompute_num_layers == 1
            ), "recompute_num_layers must be 1 for MTP recompute"
            outputs = checkpoint_handler()
        elif self.config.recompute_method == 'block':
            # TODO: implement block-based recompute for MTP
            warnings.warn(
                "recompute_method == 'block' is not supported for MTP yet." " Skipping recompute."
            )
            outputs = forward_func(*args, **kwargs)
        else:
            raise ValueError("Invalid activation recompute method.")

        return outputs

    def forward(
        self,
        input_ids: Tensor,
        position_ids: Tensor,
        hidden_states: Tensor,
        attention_mask: Tensor,
        context: Optional[Tensor] = None,
        context_mask: Optional[Tensor] = None,
        rotary_pos_emb: Optional[Tensor] = None,
        rotary_pos_cos: Optional[Tensor] = None,
        rotary_pos_sin: Optional[Tensor] = None,
        attention_bias: Optional[Tensor] = None,
        inference_params: Optional[InferenceParams] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
        sequence_len_offset: Optional[Tensor] = None,
        embedding=None,
    ):
        """
        Execute the forward pass through the Multi-Token Prediction (MTP) layer.

        Args:
            input_ids (Tensor): Input token IDs .
            position_ids (Tensor): Positional IDs of the input tokens.
            hidden_states (Tensor): Hidden states tensor of shape [s, b, h] where s is the
                sequence length, b is the batch size, and h is the hidden size.
            attention_mask (Tensor): Boolean tensor of shape [1, 1, s, s] for masking
                self-attention.
            context (Tensor, optional): Context tensor for cross-attention, if applicable.
            context_mask (Tensor, optional): Mask for cross-attention context, if applicable.
            rotary_pos_emb (Tensor, optional): Rotary positional embeddings.
            rotary_pos_cos (Tensor, optional): Cosine component of rotary positional embeddings.
            rotary_pos_sin (Tensor, optional): Sine component of rotary positional embeddings.
            sequence_len_offset (Tensor, optional): Offset for sequence length, if applicable.
            embedding (Callable): The embedding module from gpt model to compute the decoder input.

        Returns:
            Union[Tensor, Tuple[Tensor, Tensor]]: The output hidden states tensor of shape
            [s, b, h], and optionally the updated context tensor if cross-attention is used.
        """
        assert context is None, "multi token prediction + cross attention is not yet supported."
        input_ids, position_ids, decoder_input, hidden_states = self._get_embeddings(
            input_ids=input_ids,
            position_ids=position_ids,
            embedding=embedding,
            hidden_states=hidden_states,
            packed_seq_params=packed_seq_params,
        )

        if self.config.recompute_granularity == 'full' and self.training:
            hidden_states = self._checkpointed_forward(
                self._proj_and_transformer_layer,
                hidden_states=hidden_states,
                decoder_input=decoder_input,
                attention_mask=attention_mask,
                context=context,
                context_mask=context_mask,
                rotary_pos_emb=rotary_pos_emb,
                rotary_pos_cos=rotary_pos_cos,
                rotary_pos_sin=rotary_pos_sin,
                attention_bias=attention_bias,
                inference_params=inference_params,
                packed_seq_params=packed_seq_params,
                sequence_len_offset=sequence_len_offset,
            )
        else:
            hidden_states = self._proj_and_transformer_layer(
                hidden_states=hidden_states,
                decoder_input=decoder_input,
                attention_mask=attention_mask,
                context=context,
                context_mask=context_mask,
                rotary_pos_emb=rotary_pos_emb,
                rotary_pos_cos=rotary_pos_cos,
                rotary_pos_sin=rotary_pos_sin,
                attention_bias=attention_bias,
                inference_params=inference_params,
                packed_seq_params=packed_seq_params,
                sequence_len_offset=sequence_len_offset,
            )

        return hidden_states, input_ids, position_ids

    def sharded_state_dict(
        self, prefix: str = '', sharded_offsets: tuple = (), metadata: Optional[dict] = None
    ) -> ShardedStateDict:
        """
        Generate a sharded state dictionary for the multi token prediction layer.

        Args:
            prefix (str, optional): Prefix to be added to all keys in the state dict.
            sharded_offsets (tuple, optional): Tuple of sharding offsets.
            metadata (Optional[dict], optional): Additional metadata for sharding.

        Returns:
            ShardedStateDict: A dictionary containing the sharded state of the multi
            token prediction layer.
        """
        sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)

        # Backward compatibility: GPT MTP checkpoints were saved with the submodule
        # named 'transformer_layer'. Remap checkpoint keys so old checkpoints load
        # correctly. Mamba MTP models keep 'mtp_model_layer' as their native format
        # since no older checkpoints exist for them.
        if self.mtp_layer_pattern is None:
            apply_prefix_mapping(
                sharded_state_dict, {f'{prefix}mtp_model_layer.': f'{prefix}transformer_layer.'}
            )

        return sharded_state_dict


@dataclass
class MultiTokenPredictionBlockSubmodules:
    """
    Dataclass for specifying the submodules of a multi token prediction block.

    This class defines the structure for configuring the layers, allowing for
    flexible and customizable architecture designs.

    Args:
        layer_specs (List[ModuleSpec], optional): A list of module specifications for
            the layers within the multi token prediction block. Each specification typically
            defines a complete multi token prediction layer (e.g., shared embedding,
            projection matrix, transformer block, shared output head).
    """

    layer_specs: Optional[List[ModuleSpec]] = None


def _get_mtp_block_submodules(
    config: TransformerConfig, spec: Union[MultiTokenPredictionBlockSubmodules, ModuleSpec]
) -> MultiTokenPredictionBlockSubmodules:
    """
    Retrieve or construct MultiTokenPredictionBlockSubmodules based on the provided specification.

    Args:
        config (TransformerConfig): Configuration object for the transformer model.
        spec (Union[MultiTokenPredictionBlockSubmodules, ModuleSpec]): Specification for the
            multi token prediction block submodules.
            Can be either a MultiTokenPredictionBlockSubmodules instance or a ModuleSpec.

    Returns:
        MultiTokenPredictionBlockSubmodules: The submodules for the multi token prediction block.
    """

    # Transformer block submodules.
    if isinstance(spec, MultiTokenPredictionBlockSubmodules):
        return spec
    elif isinstance(spec, ModuleSpec):
        if issubclass(spec.module, MultiTokenPredictionBlock):
            return spec.submodules
        else:
            raise Exception(f"specialize for {spec.module.__name__}.")
    else:
        raise Exception(f"specialize for {type(spec).__name__}.")


class MultiTokenPredictionBlock(MegatronModule):
    """The implementation for Multi-Token Prediction (MTP) which extends
    the prediction scope to multiple future tokens at each position.

    This MTP implementation sequentially predict additional tokens and keep the complete
    causal chain at each prediction depth, by using D sequential modules to predict
    D additional tokens.

    The k-th MTP module consists of a shared embedding layer, a projection matrix,
    a Transformer block, and a shared output head.

    For the i-th input token at the (k - 1)-th prediction depth, we first combine
    the representation of the i-th token and the embedding of the (i + K)-th token with
    the linear projection. The combined serves as the input of the Transformer block at
    the k-th depth to produce the output representation.

    When `mtp_use_repeated_layer=True` in config, instead of creating N separate MTP layers,
    only 1 layer is created and applied mtp_num_layers times.

    For more information, please refer to DeepSeek-V3 Technical Report
    https://arxiv.org/pdf/2412.19437.pdf
    """

    def __init__(
        self,
        config: TransformerConfig,
        spec: Union[TransformerBlockSubmodules, ModuleSpec],
        vp_stage: Optional[int] = None,
        pg_collection: Optional[ProcessGroupCollection] = None,
        # New: For Mamba path with unified pattern syntax
        mtp_layer_pattern: Optional[str] = None,
        mtp_num_depths: int = 0,
        mamba_submodules: Optional["MambaStackSubmodules"] = None,
    ):
        super().__init__(config=config)
        self.submodules = _get_mtp_block_submodules(config, spec)
        self.mtp_loss_scaling_factor = config.mtp_loss_scaling_factor
        self.vp_stage = vp_stage
        self.mtp_layer_pattern = mtp_layer_pattern
        self.mtp_num_depths = mtp_num_depths
        self.mamba_submodules = mamba_submodules
        self.mtp_use_repeated_layer = self.config.mtp_use_repeated_layer

        vp_size = config.virtual_pipeline_model_parallel_size
        assert is_vp_last_stage(vp_stage=vp_stage, vp_size=vp_size), (
            f"MTP layers must be placed on the last virtual pipeline stage. "
            f"Got vp_stage={vp_stage} with vp_size={vp_size}. "
            f"Placing MTP layers on different VPP stages is not currently supported."
        )

        # Initialize Context Parallelism (CP) support for MTP
        # This enables MTP to work with CP > 1 by providing the CP process group
        # to the roll_tensor function for proper boundary communication
        if pg_collection is None:
            # Use default MPU process groups if not provided
            pg_collection = ProcessGroupCollection.use_mpu_process_groups(required_pgs=['cp', 'tp'])
        else:
            # Ensure the provided process groups include CP
            assert hasattr(
                pg_collection, 'cp'
            ), "MultiTokenPredictionBlock pg_collection must have cp process group"

        self._build_layers(pg_collection)
        assert len(self.layers) > 0, "MultiTokenPredictionBlock must have at least one layer."
        self.cp_group = pg_collection.cp

    def _build_layers(self, pg_collection):
        # Determine number of depths to build
        if self.mtp_num_depths > 0:
            num_depths = self.mtp_num_depths
        else:
            num_depths = self.config.mtp_num_layers or len(self.submodules.layer_specs)

        def build_layer_legacy(layer_spec, layer_number):
            """Build layer using legacy spec-based approach."""
            fp8_init_context = get_fp8_context(self.config, is_init=True)
            with fp8_init_context:
                module = build_module(
                    layer_spec,
                    config=self.config,
                    layer_number=layer_number,
                    vp_stage=self.vp_stage,
                    pg_collection=pg_collection,
                    mtp_layer_pattern=self.mtp_layer_pattern,
                )
            return module

        def build_layer_with_pattern(layer_spec, layer_number, mtp_layer_pattern, mamba_submodules):
            """Build layer using pattern-based approach (new Mamba path)."""
            fp8_init_context = get_fp8_context(self.config, is_init=True)
            with fp8_init_context:
                module = build_module(
                    layer_spec,
                    config=self.config,
                    layer_number=layer_number,
                    vp_stage=self.vp_stage,
                    pg_collection=pg_collection,
                    mtp_layer_pattern=mtp_layer_pattern,
                    mamba_submodules=mamba_submodules,
                )
            return module

        # New Mamba path: use mtp_layer_pattern and mamba_submodules
        if self.mtp_layer_pattern is not None and self.mamba_submodules is not None:
            if self.mtp_use_repeated_layer:
                # Shared/repeated layer: build one layer, use it for all depths
                layer_spec = self.submodules.layer_specs[0]
                shared_layer = build_layer_with_pattern(
                    layer_spec,
                    layer_number=1,
                    mtp_layer_pattern=self.mtp_layer_pattern,
                    mamba_submodules=self.mamba_submodules,
                )
                self.layers = torch.nn.ModuleList([shared_layer])
            else:
                # Non-shared: each depth gets its own layers
                self.layers = torch.nn.ModuleList(
                    [
                        build_layer_with_pattern(
                            self.submodules.layer_specs[
                                min(i, len(self.submodules.layer_specs) - 1)
                            ],
                            layer_number=i + 1,
                            mtp_layer_pattern=self.mtp_layer_pattern,
                            mamba_submodules=self.mamba_submodules,
                        )
                        for i in range(num_depths)
                    ]
                )
        elif self.mtp_use_repeated_layer:
            # Legacy repeated layer mode
            if len(self.submodules.layer_specs) != 1:
                warnings.warn(
                    "Repeated MTP mode expects exactly 1 layer spec, got "
                    f"{len(self.submodules.layer_specs)} instead. "
                    f"The first layer will be applied {self.config.mtp_num_layers} times."
                )
            self.layers = torch.nn.ModuleList(
                [build_layer_legacy(self.submodules.layer_specs[0], layer_number=1)]
            )
        else:
            # Legacy mode: build from layer_specs
            self.layers = torch.nn.ModuleList(
                [
                    build_layer_legacy(layer_spec, i + 1)
                    for i, layer_spec in enumerate(self.submodules.layer_specs)
                ]
            )

    def forward(
        self,
        input_ids: Tensor,
        position_ids: Tensor,
        hidden_states: Tensor,
        attention_mask: Tensor,
        context: Optional[Tensor] = None,
        context_mask: Optional[Tensor] = None,
        rotary_pos_emb: Optional[Tensor] = None,
        rotary_pos_cos: Optional[Tensor] = None,
        rotary_pos_sin: Optional[Tensor] = None,
        attention_bias: Optional[Tensor] = None,
        inference_params: Optional[InferenceParams] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
        sequence_len_offset: Optional[Tensor] = None,
        extra_block_kwargs: Optional[dict] = None,
        embedding=None,
    ) -> Tensor:
        """
        Perform the forward pass through all of the MTP modules.

        Args:
            hidden_states (Tensor): Hidden states for input token with the shape [s, b, h]
                where s is the sequence length, b is the batch size, and h is the hidden size.
            attention_mask (Tensor): Boolean tensor of shape [1, 1, s, s] for masking
                self-attention.

        Returns:
            (Tensor): The mtp loss tensor of shape [b, s].
        """
        # get hidden states from previous mtp stages
        offset = get_mtp_layer_offset(self.config, self.vp_stage)
        hidden_states_list = list(torch.chunk(hidden_states, 1 + offset, dim=0))
        hidden_states = hidden_states_list[offset]
        for iteration in range(self.config.mtp_num_layers):
            layer_idx = 0 if self.mtp_use_repeated_layer else iteration
            (hidden_states, input_ids, position_ids) = self.layers[layer_idx](
                input_ids=input_ids,
                position_ids=position_ids,
                hidden_states=hidden_states,
                attention_mask=attention_mask,
                inference_params=inference_params,
                rotary_pos_emb=rotary_pos_emb,
                rotary_pos_cos=rotary_pos_cos,
                rotary_pos_sin=rotary_pos_sin,
                packed_seq_params=packed_seq_params,
                sequence_len_offset=sequence_len_offset,
                embedding=embedding,
                **(extra_block_kwargs or {}),
            )

            # append the output hidden states of the current mtp layer
            # to the hidden_states_list
            hidden_states_list.append(hidden_states)

        # concat the hidden states of all mtp layers
        hidden_states = torch.cat(hidden_states_list, dim=0)
        return hidden_states

    def sharded_state_dict(
        self, prefix: str = '', sharded_offsets: tuple = (), metadata: Optional[dict] = None
    ) -> ShardedStateDict:
        """
        Generate a sharded state dictionary for the multi token prediction module.

        Args:
            prefix (str, optional): Prefix to be added to all keys in the state dict.
            sharded_offsets (tuple, optional): Tuple of sharding offsets.
            metadata (Optional[dict], optional): Additional metadata for sharding.

        Returns:
            ShardedStateDict: A dictionary containing the sharded state of the multi
            token prediction module.
        """
        sharded_state_dict = {}
        layer_prefix = f'{prefix}layers.'
        for layer in self.layers:
            offset = get_mtp_layer_offset(self.config, self.vp_stage)
            sharded_prefix = f'{layer_prefix}{layer.layer_number - 1}.'

            state_dict_prefix = f'{layer_prefix}{layer.layer_number - 1 - offset}.'
            sharded_pp_offset = []
            layer_sharded_state_dict = layer.sharded_state_dict(
                state_dict_prefix, sharded_pp_offset, metadata
            )
            replace_prefix_for_sharding(layer_sharded_state_dict, state_dict_prefix, sharded_prefix)
            sharded_state_dict.update(layer_sharded_state_dict)
        return sharded_state_dict


================================================
FILE: megatron/core/transformer/pipeline_parallel_layer_layout.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import copy
import logging
import re
from functools import lru_cache
from typing import Optional

from megatron.core import parallel_state
from megatron.core.transformer.enums import LayerType

logger = logging.getLogger(__name__)


class PipelineParallelLayerLayout:
    """Configuration of custom pipeline parallel layer partitioning."""

    def __repr__(self) -> str:
        if isinstance(self.input_data, str):
            return self.input_data
        else:
            return str(self.input_data)

    def __init__(self, layout: str | list, pipeline_model_parallel_size: int):
        """Initialize PipelineParallelLayerLayout from a list or a str.
        Format validation will be done here.
        """

        self.input_data = layout
        if isinstance(layout, str):
            layout = PipelineParallelLayerLayout.parse_str_to_list(layout)
        else:
            layout = copy.deepcopy(layout)
        assert all(isinstance(row, list) for row in layout), (
            f"pipeline_model_parallel_layout must be a list of lists, but got"
            f" {[type(row) for row in layout]=}"
        )

        # Check PP size and get VPP size
        assert len(layout) % pipeline_model_parallel_size == 0, (
            f"pipeline_model_parallel_layout must be divisible"
            f" by pipeline_model_parallel_size ({len(layout)=},"
            f" {pipeline_model_parallel_size=})"
        )
        virtual_pipeline_model_parallel_size = len(layout) // pipeline_model_parallel_size

        # Convert 1D layout to 2D layout
        layout = [
            [
                layout[vpp_rank * pipeline_model_parallel_size + pp_rank]
                for vpp_rank in range(virtual_pipeline_model_parallel_size)
            ]
            for pp_rank in range(pipeline_model_parallel_size)
        ]

        # Convert all strings in pipeline_model_parallel_layout to LayerType
        for pp_rank in range(pipeline_model_parallel_size):
            for vpp_rank in range(virtual_pipeline_model_parallel_size):
                transferred_layout = []
                for layer_type in layout[pp_rank][vpp_rank]:
                    assert isinstance(layer_type, LayerType) or isinstance(layer_type, str), (
                        f"elements in pipeline_model_parallel_layout must be LayerType or str,"
                        f" but got {type(layer_type)}."
                    )
                    if isinstance(layer_type, str):
                        layer_type = layer_type.strip().lower()
                        assert (
                            layer_type in LayerType.__members__
                        ), f"{layer_type} is not a valid LayerType"
                        layer_type = LayerType[layer_type]
                    transferred_layout.append(layer_type)
                layout[pp_rank][vpp_rank] = transferred_layout

        # Flatten the pipeline layout in layer id order.
        flatten_layout = []
        for vpp_rank in range(virtual_pipeline_model_parallel_size):
            for row in layout:
                flatten_layout.extend(row[vpp_rank])

        self.pipeline_model_parallel_size = pipeline_model_parallel_size
        self.virtual_pipeline_model_parallel_size = virtual_pipeline_model_parallel_size
        self.layout = layout
        self.flatten_layout = flatten_layout

    def validate_layer_layout(self, num_layers: int, mtp_num_layers: int):
        """Check whether the layout is valid."""

        # Check whether the input layer id is valid
        assert all(
            isinstance(x, LayerType) for x in self.flatten_layout
        ), "All layers must be a valid LayerType."

        # Embedding layer and loss layer must be specified
        assert (
            self.flatten_layout[0] == LayerType.embedding
        ), f"The first layer must be embedding, but got {self.flatten_layout[0]}"
        assert (
            self.flatten_layout[-1] == LayerType.loss
        ), f"The last layer must be loss, but got {self.flatten_layout[-1]}"

        # Layer number verification
        assert (
            self.flatten_layout.count(LayerType.embedding) == 1
        ), "Embedding must be specified exactly once"
        assert self.flatten_layout.count(LayerType.loss) == 1, "Loss must be specified exactly once"
        assert self.flatten_layout.count(LayerType.decoder) == num_layers, (
            f"Number of decoder layers {self.flatten_layout.count(LayerType.decoder)}"
            f"must match num_layers {num_layers}"
        )
        # MTP layer verification
        assert self.flatten_layout.count(LayerType.mtp) == mtp_num_layers or (
            mtp_num_layers is None and self.flatten_layout.count(LayerType.mtp) == 0
        ), "Number of mtp layers in layout must match mtp_num_layers"
        for i in range(len(self.flatten_layout)):
            if self.flatten_layout[i] == LayerType.mtp:
                assert (
                    self.flatten_layout[i:].count(LayerType.decoder) == 0
                ), "decoder layers must be placed before MTP layers"
                break
        for pp_rank in range(self.pipeline_model_parallel_size):
            for vpp_rank in range(self.virtual_pipeline_model_parallel_size - 1):
                assert (
                    LayerType.mtp not in self.layout[pp_rank][vpp_rank]
                ), f"Currently we restrict that the MTP should be always in the last "
                f"virtual pipeline stage of that rank. But got {self.layout[pp_rank][vpp_rank]}"
        for pp_rank in range(self.pipeline_model_parallel_size):
            if LayerType.mtp in self.layout[pp_rank][-1]:
                assert (
                    self.layout[pp_rank][-1].count(LayerType.mtp) == mtp_num_layers
                ), "All of the MTP layers must be in the same one virtual pipeline stage"
        for vpp_rank in range(self.virtual_pipeline_model_parallel_size - 1):
            assert LayerType.mtp not in self.layout[0][vpp_rank], (
                f"Currently we restrict that the MTP should not be in the first pp rank."
                f"But got {self.layout[0]} for the first pp rank."
            )
        ## Detect MTP standalone usage.
        mtp_standalone = False
        for pp_rank in range(self.pipeline_model_parallel_size):
            if (
                LayerType.mtp in self.layout[pp_rank][-1]
                and pp_rank != self.pipeline_model_parallel_size - 1
            ):
                mtp_standalone = True
                break

        # TODO: remove them in the future once they are supported
        if self.flatten_layout.count(LayerType.encoder) > 0:
            raise NotImplementedError("Encoder layer is not supported for flexible pipeline layout")

        return mtp_standalone

    def get_num_layers_to_build(
        self,
        layer_type: LayerType = LayerType.decoder,
        vp_stage: Optional[int] = None,
        pp_rank: Optional[int] = None,
    ):
        """Get the number of layers to build in the pipeline stage"""
        if pp_rank is None:
            pp_rank = parallel_state.get_pipeline_model_parallel_rank()
        if parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None:
            assert vp_stage is not None, "vp_stage must be passed if virtual pipeline is enabled"
        else:
            vp_stage = 0

        # Count layer numbers in this stage.
        num_layers_to_build = self.layout[pp_rank][vp_stage].count(layer_type)
        return num_layers_to_build

    def get_layer_offset(
        self,
        layer_type: LayerType = LayerType.decoder,
        vp_stage: Optional[int] = None,
        pp_rank: Optional[int] = None,
    ):
        """Get the layer offset in the pipeline stage"""
        if pp_rank is None:
            pp_rank = parallel_state.get_pipeline_model_parallel_rank()
        if parallel_state.get_virtual_pipeline_model_parallel_world_size() is not None:
            assert vp_stage is not None, "vp_stage must be passed if virtual pipeline is enabled"
        else:
            vp_stage = 0

        # Calculate the offset by summing up the number of
        # layers in all the previous pipeline stages.
        offset = 0
        for _vpp_rank in range(vp_stage + 1):
            for _pp_rank in range(
                self.pipeline_model_parallel_size if _vpp_rank < vp_stage else pp_rank
            ):
                offset += self.layout[_pp_rank][_vpp_rank].count(layer_type)
        return offset

    def get_layer_id_list(
        self,
        layer_type: LayerType = LayerType.decoder,
        vp_stage: Optional[int] = None,
        pp_rank: Optional[int] = None,
    ):
        """Get the list of layer_id for each layer in the pipeline stage."""
        offset = self.get_layer_offset(layer_type=layer_type, vp_stage=vp_stage, pp_rank=pp_rank)
        num_layers_to_build = self.get_num_layers_to_build(
            layer_type=layer_type, vp_stage=vp_stage, pp_rank=pp_rank
        )
        return list(range(offset, offset + num_layers_to_build))

    def pretty_repr(self):
        """Pretty representation of the custom layout, showing the layers held by each stage.
        Example:
                            VPP rank 0                 VPP rank 1
        PP rank 0           embedding,decoder*2        decoder*2
        PP rank 1-13        decoder*2                  decoder*2
        PP rank 14          decoder*2                  mtp
        PP rank 15          decoder*2                  loss
        """

        matrix = []
        if self.virtual_pipeline_model_parallel_size > 1:
            header = [""] + [
                f"VPP rank {vpp_rank}"
                for vpp_rank in range(self.virtual_pipeline_model_parallel_size)
            ]
            matrix.append(header)

        prev_row_repr, prev_row_start_pp_rank = None, None
        for pp_rank in range(self.pipeline_model_parallel_size + 1):
            row_repr = []
            if pp_rank < self.pipeline_model_parallel_size:
                for vpp_rank in range(self.virtual_pipeline_model_parallel_size):
                    stage = self.layout[pp_rank][vpp_rank]
                    stage_repr = []
                    prev_layer, prev_layer_cnt = None, 0
                    for layer_type in stage + [None]:
                        if layer_type == prev_layer:
                            prev_layer_cnt += 1
                        else:
                            if prev_layer_cnt > 1:
                                stage_repr.append(f"{prev_layer.name}*{prev_layer_cnt}")
                            elif prev_layer_cnt == 1:
                                stage_repr.append(f"{prev_layer.name}")
                            prev_layer, prev_layer_cnt = layer_type, 1
                    if len(stage_repr) == 0:
                        stage_repr.append(f"(empty stage)")
                    row_repr.append(",".join(stage_repr))

            if row_repr != prev_row_repr:
                if prev_row_start_pp_rank == pp_rank - 1:
                    matrix.append([f"PP rank {pp_rank - 1}"] + prev_row_repr)
                elif prev_row_repr is not None:
                    matrix.append(
                        [f"PP rank {prev_row_start_pp_rank}-{pp_rank - 1}"] + prev_row_repr
                    )
                prev_row_repr, prev_row_start_pp_rank = row_repr, pp_rank

        # Indent the matrix to make it more readable
        lens = [max(map(len, col)) for col in zip(*matrix)]
        indents = 8 if self.virtual_pipeline_model_parallel_size <= 4 else 4
        fmt = (" " * indents).join('{{:{}}}'.format(x) for x in lens)
        return "\n".join([fmt.format(*row) for row in matrix])

    @staticmethod
    @lru_cache()
    def from_str(layout, pipeline_model_parallel_size):
        """Parse the pipeline model parallel layout from a string."""
        parsed_layout = PipelineParallelLayerLayout(layout, pipeline_model_parallel_size)
        # Pretty print the layout distribution.
        from megatron.core.utils import log_single_rank

        log_single_rank(
            logger,
            logging.INFO,
            f"Parse pipeline model parallel layout {layout} to:\n" + parsed_layout.pretty_repr(),
        )
        return parsed_layout

    @staticmethod
    def get_num_stages_from_str(layout: str):
        """Get the number of PP * VPP stages from a layout string."""
        layout_list = PipelineParallelLayerLayout.parse_str_to_list(layout)
        return len(layout_list)

    @staticmethod
    def parse_str_to_list(layout_str: str):
        """Parse a layout string to a list of lists.
        Example: "Ettt|(tt|)*29,m|L" will be parsed to
        [["E","t","t","t"]]+[["t","t"]]*29+[["m"],["L"]]"""

        layout_str = layout_str.replace(",", "")  # remove purely cosmetic commas

        # unroll multiplications in the expression
        patterns = [
            # unroll expression in parentheses ()*n. Examples:
            # xy(ab|cd|ef)*2,pq -> xyab|cd|efab|cd|efpq
            # (ab)*3 -> ababab
            # ab,(cd|)*2 -> abcd|cd|
            # (|ab)*2,cd -> |ab|abcd
            r'\(([^)]+)\)\*(\d+)',
            r'(.)\*(\d+)',  # unroll x*n to n xs
        ]
        for pattern in patterns:
            layout_str = re.sub(pattern, lambda x: x.group(1) * int(x.group(2)), layout_str)

        char2layer_type = {
            "E": LayerType.embedding,
            "L": LayerType.loss,
            "t": LayerType.decoder,  # t denotes "transformer"
            "m": LayerType.mtp,
        }

        # parse the layout string
        layout_list = []
        for stage in layout_str.split('|'):
            layout_list.append([])
            for layer_char in stage:
                assert layer_char in char2layer_type, (
                    f"Invalid layer character: {layer_char} ({stage=}, {layout_str=}),"
                    f" known layer characters: {list(char2layer_type.keys())}"
                )

                layout_list[-1].append(char2layer_type[layer_char])
        return layout_list


================================================
FILE: megatron/core/transformer/spec_utils.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

import logging
import types
from dataclasses import dataclass, field
from typing import Any, Tuple, Union

logger = logging.getLogger(__name__)


@dataclass
class ModuleSpec:
    """This is a Module Specification dataclass.

    Specification defines the location of the module (to import dynamically)
    or the imported module itself. It also defines the params that need to be
    passed to initialize the module.

    Args:
        module (Union[Tuple, type]): A tuple describing the location of the
            module class e.g. `(module.location, ModuleClass)` or the imported
            module class itself e.g. `ModuleClass` (which is already imported
            using `from module.location import ModuleClass`).
        params (dict): A dictionary of params that need to be passed while init.

    """

    module: Union[Tuple, type]
    params: dict = field(default_factory=lambda: {})
    submodules: object = None
    metainfo: dict = field(default_factory=lambda: {})

    def __call__(self, *args: Any, **kwargs: Any) -> Any:
        """Builds an instance of the module from the spec.

        Args:
            *args: Positional arguments to be passed to the module init.
            **kwargs: Keyword arguments to be passed to the module init.
        """
        return build_module(self, *args, **kwargs)


def import_module(module_path: Tuple[str]):
    """Import a named object from a module in the context of this function.

    TODO: make this importer module more robust, at least make sure there
    are no side effects of using this as is
    """
    base_path, name = module_path
    try:
        module = __import__(base_path, globals(), locals(), [name])
    except ImportError as e:
        logger.error(f"couldn't import module due to {e}")
        return None
    return vars(module)[name]


# pylint: disable=missing-function-docstring
def get_module(spec_or_module: Union[ModuleSpec, type], **additional_kwargs):
    """Returns or imports the provided module."""
    # If a module clas is already provided return it as is
    if isinstance(spec_or_module, (type, types.FunctionType)):
        return spec_or_module

    # If the module is provided instead of module path, then return it as is
    if isinstance(spec_or_module.module, (type, types.FunctionType)):
        return spec_or_module.module

    # Otherwise, return the dynamically imported module from the module path
    return import_module(spec_or_module.module)


def build_module(spec_or_module: Union[ModuleSpec, type], *args, **kwargs):
    """Builds an instance of the module from the spec.

    Args:
        spec_or_module: The module spec or module class to build.
        *args: Positional arguments to be passed to the module init.
        **kwargs: Keyword arguments to be passed to the module init.
    """
    # If the passed `spec_or_module` is
    # a `Function`, then return it as it is
    # NOTE: to support an already initialized module add the following condition
    # `or isinstance(spec_or_module, torch.nn.Module)` to the following if check
    if isinstance(spec_or_module, types.FunctionType):
        return spec_or_module

    # If the passed `spec_or_module` is actually a spec (instance of
    # `ModuleSpec`) and it specifies a `Function` using its `module`
    # field, return the `Function` as it is
    if isinstance(spec_or_module, ModuleSpec) and isinstance(
        spec_or_module.module, types.FunctionType
    ):
        return spec_or_module.module

    # Check if a module class is provided as a spec or if the module path
    # itself is a class
    if isinstance(spec_or_module, type):
        module = spec_or_module
    elif hasattr(spec_or_module, "module") and isinstance(spec_or_module.module, type):
        module = spec_or_module.module
    else:
        # Otherwise, dynamically import the module from the module path
        module = import_module(spec_or_module.module)

    # If the imported module is actually a `Function` return it as it is
    if isinstance(module, types.FunctionType):
        return module

    # Finally return the initialized module with params from the spec as well
    # as those passed as **kwargs from the code

    # Add the `submodules` argument to the module init call if it exists in the
    # spec.
    if hasattr(spec_or_module, "submodules") and spec_or_module.submodules is not None:
        kwargs["submodules"] = spec_or_module.submodules

    try:
        return module(
            *args, **spec_or_module.params if hasattr(spec_or_module, "params") else {}, **kwargs
        )
    except Exception as e:
        # improve the error message since we hide the module name in the line above
        import sys

        raise type(e)(f"{str(e)} when instantiating {module.__name__}").with_traceback(
            sys.exc_info()[2]
        )


================================================
FILE: megatron/core/transformer/torch_layer_norm.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from megatron.core.transformer.torch_norm import WrappedTorchNorm

WrappedTorchLayerNorm = WrappedTorchNorm


================================================
FILE: megatron/core/transformer/torch_norm.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from typing import Protocol

import torch

from megatron.core.jit import jit_fuser
from megatron.core.transformer import TransformerConfig
from megatron.core.utils import is_torch_min_version


class LayerNormInterface(Protocol):
    """Interface that all LayerNorm implementations should follow."""

    def forward(self, x: torch.Tensor, /) -> torch.Tensor:
        """Forward method for a LayerNorm implementation."""
        ...


class LayerNormBuilder(Protocol):
    """A protocol showing how Modules are expected to construct LayerNorms."""

    def __call__(
        self, *, config: TransformerConfig, hidden_size: int, eps: float
    ) -> LayerNormInterface: ...


class WrappedTorchNorm:
    """
    A conditional wrapper to initialize an instance of PyTorch's
    `LayerNorm` or `RMSNorm` based on input
    """

    def __new__(
        cls,
        config: TransformerConfig,
        hidden_size: int,
        eps: float = 1e-5,
        # TODO: unused arguments.
        # See https://gitlab-master.nvidia.com/ADLR/megatron-lm/-/issues/223
        persist_layer_norm: bool = False,
        zero_centered_gamma: bool = False,
        normalization: str = "LayerNorm",
    ) -> LayerNormInterface:
        assert (
            not config.layernorm_zero_centered_gamma
        ), f"zero_centered_gamma not supported by torch LayerNorm"

        assert not config.persist_layer_norm, f"persist_layer_norm not supported by torch LayerNorm"

        assert not config.sequence_parallel, f"sequence parallel not supported by torch LayerNorm"

        assert (
            not config.memory_efficient_layer_norm
        ), f"memory_efficient_layer_norm not supported by torch LayerNorm"

        if config.normalization == "LayerNorm":
            norm_cls = torch.nn.LayerNorm
        elif config.normalization == "RMSNorm":
            assert is_torch_min_version(
                "2.4.0a0"
            ), 'Torch RMSNorm requires PyTorch version >= 2.4.0'

            norm_cls = torch.nn.RMSNorm
        elif config.normalization == "L2Norm":
            norm_cls = torch.nn.L2Norm
        else:
            raise Exception("Only LayerNorm, RMSNorm and L2Norm are currently supported")

        return norm_cls(normalized_shape=hidden_size, eps=eps)


class L2Norm(torch.nn.Module, LayerNormInterface):
    """
    Applies L2 normalization to the input tensor along the last dimension.

    This module normalizes the input tensor such that the mean of the squared values
    along the last dimension is 1 (within a small epsilon for numerical stability).

    Args:
        hidden_size (int): Expected input shape for normalization (not used internally).
        eps (float, optional): A small value added to the denominator for numerical stability.
            Default: 1e-6.
    """

    def __init__(self, hidden_size: int, eps: float = 1e-6, **kwargs):
        super().__init__()
        self.hidden_size = hidden_size
        self.eps = eps

    @jit_fuser
    def _norm(self, x: torch.Tensor) -> torch.Tensor:
        """
        Performs the actual L2 normalization.

        Args:
            x (torch.Tensor): The input tensor to normalize.

        Returns:
            torch.Tensor: The L2-normalized tensor.
        """
        x_float = x.float()
        return (x_float * torch.rsqrt(x_float.pow(2).mean(-1, keepdim=True) + self.eps)).type_as(x)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Forward pass of the L2Norm module.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: L2-normalized tensor with the same dtype as input.
        """
        return self._norm(x)


================================================
FILE: megatron/core/transformer/transformer_block.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
import logging
from contextlib import nullcontext
from dataclasses import dataclass
from typing import List, Optional, Set, Union, cast

import torch
from torch import Tensor

from megatron.core import parallel_state, tensor_parallel
from megatron.core.dist_checkpointing.mapping import ShardedStateDict
from megatron.core.dist_checkpointing.utils import replace_prefix_for_sharding
from megatron.core.enums import Fp8Recipe
from megatron.core.extensions.transformer_engine import HAVE_TE
from megatron.core.fp4_utils import get_fp4_context
from megatron.core.fp8_utils import get_fp8_context
from megatron.core.fusions.fused_layer_norm import FusedLayerNorm
from megatron.core.inference.contexts import BaseInferenceContext
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.pipeline_parallel.utils import is_vp_first_stage, is_vp_last_stage
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.transformer.enums import CudaGraphScope, LayerType
from megatron.core.transformer.module import GraphableMegatronModule, MegatronModule
from megatron.core.transformer.spec_utils import ModuleSpec, build_module
from megatron.core.transformer.torch_norm import LayerNormBuilder
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.transformer.transformer_layer import (
    BaseTransformerLayer,
    get_transformer_layer_offset,
)
from megatron.core.transformer.utils import sharded_state_dict_default
from megatron.core.typed_torch import apply_module, not_none
from megatron.core.utils import (
    WrappedTensor,
    deprecate_inference_params,
    get_pg_rank,
    make_viewless_tensor,
)

try:
    import apex  # pylint: disable=unused-import

    HAVE_APEX = True
except ImportError:
    HAVE_APEX = False

get_cpu_offload_context = None
te_checkpoint = None

if HAVE_TE:
    from megatron.core.extensions.transformer_engine import (
        TENorm,
        get_cpu_offload_context,
        te_checkpoint,
    )

    LayerNormImpl = TENorm

elif HAVE_APEX:
    LayerNormImpl = FusedLayerNorm

else:
    from megatron.core.transformer.torch_norm import WrappedTorchNorm

    LayerNormImpl = WrappedTorchNorm


logger = logging.getLogger(__name__)


def get_num_layers_to_build(
    config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
) -> int:
    """
    Determine the number of transformer layers to build for the current pipeline stage.
    Args:
        config (TransformerConfig): Configuration object containing transformer model parameters.
        vp_stage (Optional[int]): Virtual pipeline stage number.
        pp_rank (Optional[int]): Pipeline parallel rank.

    Returns:
        int: The number of layers to be built for the current pipeline stage.
    """
    # If we have a custom PP layout, straightforwardly
    # return the number of decoders in the layout array.
    if config.pipeline_model_parallel_layout is not None:
        return config.pipeline_model_parallel_layout.get_num_layers_to_build(
            layer_type=LayerType.decoder, vp_stage=vp_stage
        )

    # Fallback for legacy tests.
    if pp_rank is None:
        pp_rank = parallel_state.get_pipeline_model_parallel_rank()

    is_first_pp_stage = pp_rank == 0
    is_last_pp_stage = pp_rank == config.pipeline_model_parallel_size - 1

    if (
        config.num_layers_in_first_pipeline_stage is not None
        or config.num_layers_in_last_pipeline_stage is not None
    ):

        assert not (
            config.account_for_embedding_in_pipeline_split
            or config.account_for_loss_in_pipeline_split
        ), " \
        Does not support standalone embedding stage and standalone loss stage with uneven pp"
        # Number of layers to distribute over rest of pipeline stages
        layers_to_distribute = config.num_layers
        # Number of pipeline stages left for distributing transformer layers
        pipeline_stages_left = config.pipeline_model_parallel_size

        # If the uneven first (last) pipeline stage is enabled, remove the specified number
        # of layers to calculate the number of layers on each middle pipeline stage.
        if config.num_layers_in_first_pipeline_stage is not None:
            layers_to_distribute -= config.num_layers_in_first_pipeline_stage
            pipeline_stages_left -= 1

        if config.num_layers_in_last_pipeline_stage is not None:
            layers_to_distribute -= config.num_layers_in_last_pipeline_stage
            pipeline_stages_left -= 1

        # If pp_size <= 2, we do not have any intermediate pipeline stages, and we do not
        # need to check if the left over layers are divisible by the left over stages.
        if pipeline_stages_left > 0:
            assert (
                layers_to_distribute % pipeline_stages_left == 0
            ), "With uneven pipelineing the left over layers must be divisible by left over stages"
            num_layers_per_pipeline_rank = layers_to_distribute // pipeline_stages_left
        else:
            num_layers_per_pipeline_rank = 0

        # If the uneven first (last) pipeline stage is enabled, return the specified number
        # of layers for all virtual pipeline parallel stages within the first (last) pipeline
        # parallel stage.

        if is_first_pp_stage and config.num_layers_in_first_pipeline_stage is not None:
            num_layers_per_pipeline_rank = config.num_layers_in_first_pipeline_stage

        if is_last_pp_stage and config.num_layers_in_last_pipeline_stage is not None:
            num_layers_per_pipeline_rank = config.num_layers_in_last_pipeline_stage
    else:
        # Include the embedding layer and loss layer into pipeline parallelism partition
        num_layers = config.num_layers
        if config.account_for_embedding_in_pipeline_split:
            num_layers += 1

        if config.account_for_loss_in_pipeline_split:
            num_layers += 1

        assert (
            num_layers % config.pipeline_model_parallel_size == 0
        ), f"{num_layers=} should be divisible by {config.pipeline_model_parallel_size=}"
        num_layers_per_pipeline_rank = num_layers // config.pipeline_model_parallel_size

    vp_size = config.virtual_pipeline_model_parallel_size
    if vp_size is not None and config.pipeline_model_parallel_size > 1:
        # Interleaved pipeline parallelism:
        # Number of layers in each model chunk is the number of layers in the stage,
        # divided by the number of model chunks in a stage.
        # With 8 layers, 2 stages, and 4 model chunks, we want an assignment of
        # layers to stages like (each list is a model chunk):
        # Stage 0: [0]  [2]  [4]  [6]
        # Stage 1: [1]  [3]  [5]  [7]
        # With 8 layers, 2 stages, and 2 virtual stages, we want an assignment of
        # layers to stages like (each list is a model chunk):
        # Stage 0: [0, 1]  [4, 5]
        # Stage 1: [2, 3]  [6, 7]

        assert (
            num_layers_per_pipeline_rank % vp_size == 0
        ), f"num_layers_per_pipeline_rank {num_layers_per_pipeline_rank} \
            should be divisible by vp_size {vp_size}"
        num_layers_per_virtual_stage = num_layers_per_pipeline_rank // vp_size

        num_layers_to_build = num_layers_per_virtual_stage

    else:
        # Non-interleaved pipeline parallelism:
        # Each stage gets a contiguous set of layers.
        num_layers_to_build = num_layers_per_pipeline_rank

    # The embedding (or loss) layer cannot function as a standalone transformer layer
    # Reduce the number of layers to construct by 1 on the first (or last) stage if the
    # embedding (or loss) layer is included in the pipeline parallelism partition and placement.
    if config.account_for_embedding_in_pipeline_split:
        if is_vp_first_stage(vp_stage, vp_size) and is_first_pp_stage:
            num_layers_to_build -= 1
            assert (
                num_layers_to_build >= 0
            ), f"Not enough layers in the first virtual pipeline stage"

    if config.account_for_loss_in_pipeline_split:
        if is_vp_last_stage(vp_stage, vp_size) and is_last_pp_stage:
            num_layers_to_build -= 1
            assert num_layers_to_build >= 0, f"Not enough layers in the last virtual pipeline stage"

    return num_layers_to_build


@dataclass
class TransformerBlockSubmodules:
    """
    Dataclass for specifying the submodules of a transformer block.

    This class defines the structure for configuring the layers and normalization
    within a transformer block, allowing for flexible and customizable architecture designs.

    Args:
        layer_specs (List[ModuleSpec], optional): A list of module specifications for
            the layers within the transformer block. Each specification typically
            defines a complete transformer layer (e.g., self-attention, feed-forward network).
        layer_norm (Optional[Union[ModuleSpec, torch.nn.Module]], optional): Specification
            or instance of the layer normalization to be applied.
    """

    layer_specs: Optional[List[ModuleSpec]] = None
    layer_norm: LayerNormBuilder | None = None


def _get_block_submodules(
    config: TransformerConfig,
    spec: Union[TransformerBlockSubmodules, ModuleSpec],
    vp_stage: Optional[int] = None,
    pp_rank: Optional[int] = None,
) -> TransformerBlockSubmodules:
    """
    Retrieve or construct TransformerBlockSubmodules based on the provided specification.

    Args:
        config (TransformerConfig): Configuration object for the transformer model.
        spec (Union[TransformerBlockSubmodules, ModuleSpec]): Specification for the
            transformer block submodules. Can be either a TransformerBlockSubmodules
            instance or a ModuleSpec.
        vp_stage (Optional[int]): Virtual pipeline stage number.

    Returns:
        TransformerBlockSubmodules: The submodules for the transformer block.
    """

    # Transformer block submodules.
    if isinstance(spec, TransformerBlockSubmodules):
        return spec

    # ModuleSpec here is generally assumed to be for a transformer layer that
    # is implemented in `transformer_layer.py` or if it subclasses
    # `BaseTransformerLayer` from the `transformer_layer.py` file.
    elif isinstance(spec, ModuleSpec):
        if issubclass(spec.module, TransformerBlock):
            return spec.submodules
        elif issubclass(spec.module, BaseTransformerLayer):
            num_layers = get_num_layers_to_build(config, vp_stage, pp_rank)
            return TransformerBlockSubmodules(
                layer_specs=[spec] * num_layers, layer_norm=LayerNormImpl
            )
        else:
            raise Exception(f"specialize for {spec.module.__name__}.")
    else:
        raise Exception(f"specialize for {type(spec).__name__}.")


class TransformerBlock(GraphableMegatronModule, MegatronModule):
    """Transformer class."""

    def __init__(
        self,
        config: TransformerConfig,
        spec: Union[TransformerBlockSubmodules, ModuleSpec],
        post_layer_norm: bool = True,
        pre_process: bool = True,
        post_process: bool = True,
        pg_collection: Optional[ProcessGroupCollection] = None,
        vp_stage: Optional[int] = None,
    ):
        super().__init__(config=config)

        if pg_collection is None:
            pg_collection = ProcessGroupCollection.use_mpu_process_groups()
        self.pg_collection = pg_collection
        self.tp_group = pg_collection.tp

        pp_group = self.pg_collection.pp if hasattr(self.pg_collection, 'pp') else None
        pp_rank = get_pg_rank(pp_group)

        self.submodules = _get_block_submodules(config, spec, vp_stage, pp_rank)
        self.post_layer_norm = post_layer_norm
        self.pre_process = pre_process
        self.post_process = post_process
        self.vp_stage = vp_stage

        # required for pipeline parallel schedules
        self.input_tensor = None

        self.checkpoint_core_attention = (
            self.config.recompute_granularity == 'selective'
            and "core_attn" in self.config.recompute_modules
        )

        if get_cpu_offload_context is not None:
            (self.offload_context, self.group_prefetch_offload_commit_async) = (
                get_cpu_offload_context(
                    self.config.cpu_offloading,
                    self.config.cpu_offloading_num_layers,
                    self.config.num_layers,
                    self.config.cpu_offloading_activations,
                    self.config.cpu_offloading_weights,
                    self.config.cpu_offloading_double_buffering,
                    self.config.cpu_offloading_retain_pinned_cpu_buffers,
                )
            )
            self.config._cpu_offloading_context = (
                self.offload_context if self.config.cpu_offloading else None
            )
        else:
            assert (
                self.config.cpu_offloading is False
            ), "CPU Offloading is enabled when TE is not present"

            self.offload_context, self.group_prefetch_offload_commit_async = nullcontext(), None
            self.config._cpu_offloading_context = None

        self._build_layers()
        self.num_layers_per_pipeline_rank = len(self.layers)

    def _build_layers(self):
        # Transformer layers.
        # @jcasper can we improve how we deal with layer_number?
        # currently it's only used in CoreAttention?
        # if self.apply_query_key_layer_scaling:
        #     coeff = self.layer_number
        #     self.norm_factor *= coeff
        def build_layer(layer_spec, layer_number):
            global_layer_number = layer_number + get_transformer_layer_offset(
                self.config, self.vp_stage, get_pg_rank(self.pg_collection.pp)
            )  # 1-based index
            if self.config.heterogeneous_block_specs:
                layer_config = self.config.get_config_for_layer(global_layer_number)
            else:
                layer_config = self.config

            # Get appropriate quantization context (FP8 and FP4 are mutually exclusive)
            if layer_config.fp8:
                quantization_context = get_fp8_context(
                    layer_config, global_layer_number - 1, is_init=True
                )
            elif layer_config.fp4:
                quantization_context = get_fp4_context(
                    layer_config, global_layer_number - 1, is_init=True
                )
            else:
                quantization_context = nullcontext()

            with quantization_context:
                module = build_module(
                    layer_spec,
                    config=layer_config,
                    layer_number=layer_number,
                    pg_collection=self.pg_collection,
                    vp_stage=self.vp_stage,
                )
            return module

        # offset is implicit in TransformerLayer
        self.layers = torch.nn.ModuleList(
            [
                build_layer(layer_spec, i + 1)
                for i, layer_spec in enumerate(self.submodules.layer_specs)
            ]
        )

        # @TODO: add back account_for_embedding_in_pipeline_split (see issue #293)
        # In pipeline parallelism, we want to add this LN only to the last stage of the pipeline
        # self.post_process and self.post_layer_norm guide this behavior
        if self.has_final_layernorm_in_this_stage():
            self.final_layernorm = not_none(self.submodules.layer_norm)(
                config=self.config,
                hidden_size=self.config.hidden_size,
                eps=self.config.layernorm_epsilon,
            )
        else:
            self.final_layernorm = None  # Either this or nn.Identity

        if self.config.inference_fuse_tp_communication:
            self._setup_fused_tp_communication()

    def has_final_layernorm_in_this_stage(self):
        """
        Check if this vpp stage contains the final layernorm.

        Note:
            Final layernorm now has been moved from the post-process stage to the last decoder
            layer by using this function.
            There will be a small numeric difference because of grad norm reduction when final
            layernorm is placed in different pipeline stages in deterministic mode. It can still
            be bitwise aligned by disabling grad norm clipping.
        """
        if self.config.mtp_num_layers is None:
            # for model without MTPLayer, the final layernorm is set in the stage which does
            # post_process
            return self.submodules.layer_norm and self.post_process and self.post_layer_norm
        else:
            # for model with MTPLayer, the final layernorm is set in the stage which has the
            # last layer of the decoder
            has_final_layernorm_in_this_stage = False
            for layer in self.layers:
                if layer.layer_number == self.config.num_layers:
                    has_final_layernorm_in_this_stage = True
                    break
            return (
                self.submodules.layer_norm
                and has_final_layernorm_in_this_stage
                and self.post_layer_norm
            )

    def _setup_fused_tp_communication(self):
        """Setup fused TP communication for all layers.
        We have a fused reduce-scatter + add + layer-norm + all-gather operation.
        We call this kernel from within row parallel linear layers.
        But layer-norm needs the layer norm weights from the
        successive column parallel linear layer.
        This function is used to pass those weights to the respective layers.
        """

        for i in range(len(self.layers)):
            current_layer = self.layers[i]

            # Get next layer's QKV norm weights (None for last layer)
            if i < len(self.layers) - 1:
                next_qkv_norm_weights = self.layers[i + 1].get_qkv_layer_norm_weights()
            else:
                next_qkv_norm_weights = None

            # Configure all fused TP communication settings in one call
            current_layer.configure_fused_tp_inference(
                skip_qkv_norm_and_all_gather=(i > 0),
                fc2_next_layer_norm_weights=next_qkv_norm_weights,
            )

    def _get_layer(self, layer_number: int):
        return self.layers[layer_number]

    def _checkpointed_forward(
        self,
        hidden_states: Tensor,
        attention_mask: Tensor,
        context: Tensor,
        context_mask: Tensor,
        rotary_pos_emb: Tensor,
        attention_bias: Tensor,
        packed_seq_params: PackedSeqParams,
        use_inner_quantization_context: bool,
        padding_mask: Optional[Tensor] = None,
        extract_layer_indices: Optional[Set[int]] = None,
        layer_offset: int = 0,
    ):
        """Forward method with activation checkpointing.

        Args:
            extract_layer_indices (Set[int], optional): Global layer
                indices (across all pipeline stages) from which to
                extract features.
            layer_offset (int): The global layer offset for the current
                pipeline stage. Used to convert local layer indices to
                global indices when checking extract_layer_indices.

        Returns:
            If extract_layer_indices is empty: hidden_states tensor
            If extract_layer_indices is non-empty: (hidden_states, intermediate_hidden_states) tuple
        """
        if extract_layer_indices is None:
            extract_layer_indices = set()
        intermediate_hidden_states: List[Tensor] = []

        def custom(start: int, end: int):
            def custom_forward(
                hidden_states,
                attention_mask,
                context,
                context_mask,
                rotary_pos_emb,
                padding_mask=None,
            ):
                for index in range(start, end):
                    layer = self._get_layer(index)

                    # Get appropriate inner quantization context
                    if use_inner_quantization_context:
                        if self.config.fp8:
                            inner_quantization_context = get_fp8_context(
                                self.config, layer.layer_number - 1
                            )
                        # TODO: check if fp4 is supported in this case
                        elif self.config.fp4:
                            inner_quantization_context = get_fp4_context(
                                self.config, layer.layer_number - 1
                            )
                        else:
                            inner_quantization_context = nullcontext()
                    else:
                        inner_quantization_context = nullcontext()

                    with inner_quantization_context:
                        hidden_states, context = layer(
                            hidden_states=hidden_states,
                            attention_mask=attention_mask,
                            context=context,
                            context_mask=context_mask,
                            rotary_pos_emb=rotary_pos_emb,
                            attention_bias=attention_bias,
                            inference_context=None,
                            packed_seq_params=packed_seq_params,
                            padding_mask=padding_mask,
                        )
                return hidden_states, context

            return custom_forward

        def checkpoint_handler(forward_func):
            """Determines whether to use the `te_checkpoint` or `tensor_parallel.checkpoint`"""
            # TODO: check if fp4 is supported in this case
            if self.config.fp8 or self.config.fp4:
                return te_checkpoint(
                    forward_func,
                    self.config.distribute_saved_activations,
                    tensor_parallel.random.get_cuda_rng_tracker,
                    self.pg_collection.tp,
                    hidden_states,
                    attention_mask,
                    context,
                    context_mask,
                    rotary_pos_emb,
                    padding_mask,
                )
            else:
                return tensor_parallel.checkpoint(
                    forward_func,
                    self.config.distribute_saved_activations,
                    hidden_states,
                    attention_mask,
                    context,
                    context_mask,
                    rotary_pos_emb,
                    padding_mask,
                )

        if self.config.recompute_method == 'uniform':
            # Uniformly divide the total number of Transformer layers and checkpoint
            # the input activation of each divided chunk.
            # A method to further reduce memory usage reducing checkpoints.
            layer_idx = 0
            while layer_idx < self.num_layers_per_pipeline_rank:
                hidden_states, context = checkpoint_handler(
                    custom(layer_idx, layer_idx + self.config.recompute_num_layers)
                )

                # Feature extraction for uniform recompute: collect at end of each chunk
                # Note: Only the last layer of each chunk can have features collected
                chunk_end = min(
                    layer_idx + self.config.recompute_num_layers, self.num_layers_per_pipeline_rank
                )
                for idx in range(layer_idx, chunk_end):
                    if (idx + layer_offset) in extract_layer_indices:
                        # For uniform recompute, we can only get features at chunk boundaries
                        # Limitation: for fine-grained extraction, use 'block'
                        if idx == chunk_end - 1:
                            intermediate_hidden_states.append(hidden_states)

                layer_idx += self.config.recompute_num_layers

        elif self.config.recompute_method == 'block':
            # Checkpoint the input activation of only a set number of individual
            # Transformer layers and skip the rest.
            # A method fully use the device memory removing redundant re-computation.
            recompute_skip_num_layers = 0
            for layer_idx in range(self.num_layers_per_pipeline_rank):
                # Skip recomputation when input grad computation is not needed.
                # Need to have at least one input tensor with gradient computation
                # for re-enterant autograd engine.
                # TODO: check if fp4 is supported in this case
                if (self.config.fp8 or self.config.fp4) and not hidden_states.requires_grad:
                    recompute_skip_num_layers += 1
                if (
                    layer_idx >= recompute_skip_num_layers
                    and layer_idx < self.config.recompute_num_layers + recompute_skip_num_layers
                ):
                    hidden_states, context = checkpoint_handler(custom(layer_idx, layer_idx + 1))
                else:
                    hidden_states, context = custom(layer_idx, layer_idx + 1)(
                        hidden_states, attention_mask, context, context_mask, rotary_pos_emb
                    )

                # Feature extraction: collect hidden states at specified global layer indices
                if (layer_idx + layer_offset) in extract_layer_indices:
                    intermediate_hidden_states.append(hidden_states)
        else:
            raise ValueError("Invalid activation recompute method.")

        # Return intermediate hidden states if feature extraction was requested
        if len(extract_layer_indices) > 0:
            return hidden_states, intermediate_hidden_states

        return hidden_states

    def set_input_tensor(self, input_tensor: Tensor):
        """Set input tensor to be used instead of forward()'s input.

        When doing pipeline parallelism the input from the previous
        stage comes from communication, not from the input, so the
        model's forward_step_func won't have it. This function is thus
        used by internal code to bypass the input provided by the
        forward_step_func"""
        self.input_tensor = input_tensor

    def _should_call_local_cudagraph(self, *args, **kwargs):
        """
        Check if we should call the local cudagraph path.
        """
        if (
            not self.training
            and hasattr(self, 'cudagraph_manager')
            and kwargs['attention_mask'] is None
            and (
                kwargs.get('inference_context') is not None
                or kwargs.get('inference_params') is not None
            )
            and CudaGraphScope.full_iteration_inference in self.config.cuda_graph_scope
        ):
            if kwargs['inference_context'].is_static_batching():
                using_cuda_graph = kwargs['inference_context'].is_decode_only()
            else:
                using_cuda_graph = kwargs['inference_context'].using_cuda_graph_this_step()

            if using_cuda_graph:
                return True
        return False

    def __call__(self, *args, **kwargs):
        if self._should_call_local_cudagraph(*args, **kwargs):
            kwargs['hidden_states'] = (
                kwargs['hidden_states'].unwrap()
                if isinstance(kwargs['hidden_states'], WrappedTensor)
                else kwargs['hidden_states']
            )
            return super().__call__(*args, **kwargs)[0]
        return super().__call__(*args, **kwargs)

    def forward(
        self,
        hidden_states: Union[Tensor, WrappedTensor],
        attention_mask: Optional[Tensor],
        context: Optional[Tensor] = None,
        context_mask: Optional[Tensor] = None,
        rotary_pos_emb: Optional[Tensor] = None,
        rotary_pos_cos: Optional[Tensor] = None,
        rotary_pos_sin: Optional[Tensor] = None,
        rotary_pos_cos_sin: Optional[Tensor] = None,
        attention_bias: Optional[Tensor] = None,
        inference_context: Optional[BaseInferenceContext] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
        sequence_len_offset: Optional[Tensor] = None,
        padding_mask: Optional[Tensor] = None,
        extract_layer_indices: Optional[Set[int]] = None,
        *,
        inference_params: Optional[BaseInferenceContext] = None,
        dynamic_inference_decode_only: Optional[bool] = None,
    ):
        """
        Perform the forward pass through the transformer block.

        This method handles the core computation of the transformer, including
        self-attention, optional cross-attention, and feed-forward operations.

        Args:
            hidden_states (Union[Tensor, WrappedTensor]): Input tensor of shape [s, b, h]
                where s is the sequence length, b is the batch size, and h is the hidden size.
                Can be passed as a WrappedTensor during inference to avoid an obsolete
                reference in the calling function.
            attention_mask (Tensor): Boolean tensor of shape [1, 1, s, s] for masking
                self-attention.
            context (Tensor, optional): Context tensor for cross-attention.
            context_mask (Tensor, optional): Mask for cross-attention context
            rotary_pos_emb (Tensor, optional): Rotary positional embeddings.
            rotary_pos_cos (Optional[Tensor]): Rotary embedding cosine.
            rotary_pos_sin (Optional[Tensor]): Rotary embedding sine.
            rotary_pos_cos_sin (Optional[Tensor]): Combined rotary embedding cosine and sine.
            Currently used exclusively for inference with dynamic batching and flashinfer RoPE.
            attention_bias (Tensor): Bias tensor for Q * K.T of shape in shape broadcastable
                to [b, num_head, sq, skv], e.g. [1, 1, sq, skv].
                Used as an alternative to apply attention mask for TE cuDNN attention.
            inference_context (BaseInferenceContext, optional): Parameters for inference-time
                optimizations.
            packed_seq_params (PackedSeqParams, optional): Parameters for packed sequence
                processing.
            extract_layer_indices (Set[int], optional): A set of global
                layer indices (0-based across all pipeline stages) from
                which to extract intermediate hidden states. If
                non-empty, the forward pass will collect hidden_states
                after each specified layer.
            dynamic_inference_decode_only: Optional[bool]: If true, indicates that the current
                inference context is for decode-only. This args is only used to uniquely
                identify decode and non-decode cuda graph runners in the cuda graph manager.

        Returns:
            Union[Tensor, Tuple[Tensor, List[Tensor]]]:
                - If extract_layer_indices is None or empty: Returns the output hidden states tensor
                  of shape [s, b, h].
                - If extract_layer_indices is non-empty: Returns a tuple
                  of (hidden_states, intermediate_hidden_states) where
                  intermediate_hidden_states is a list of tensors
                  corresponding to hidden states after each layer in
                  extract_layer_indices.
        """

        inference_context = deprecate_inference_params(inference_context, inference_params)
        # Remove 'dynamic_inference_decode_only' from kwargs if present
        # this is only used to uniquely identify decode and non-decode cuda graph
        # runners in the cuda graph manager

        # Initialize feature collection (consistent with FastGen's Wan implementation)
        if extract_layer_indices is None:
            extract_layer_indices = set()
        intermediate_hidden_states: List[Tensor] = []

        # Calculate the global layer offset for this pipeline stage
        # This is needed to convert local layer indices to global indices for feature extraction
        pp_group = self.pg_collection.pp if hasattr(self.pg_collection, 'pp') else None
        layer_offset = get_transformer_layer_offset(
            self.config, self.vp_stage, get_pg_rank(pp_group)
        )

        # Delete the obsolete reference to the initial input tensor if necessary
        if isinstance(hidden_states, WrappedTensor):
            hidden_states = hidden_states.unwrap()

        if not self.pre_process:
            # See set_input_tensor()
            hidden_states = self.input_tensor

        # Viewless tensor.
        # - We only need to create a viewless tensor in the case of micro batch
        #   size (mbs) == 1, since in this case, 'hidden_states.transpose()'
        #   above creates a view tensor, and '.contiguous()' is a pass-through.
        #   For mbs >= 2, '.contiguous()' creates a new tensor, eliminating
        #   the need to make it viewless.
        #
        #   However, we don't explicitly check mbs == 1 here because
        #   make_viewless_tensor() has negligible overhead when its input
        #   is already viewless.
        #
        # - For the 'else' case above, calling make_viewless_tensor() here is
        #   likely redundant, since p2p_communication.py (likely originator)
        #   already creates viewless tensors. That said, make_viewless_tensor()
        #   is called here to be future-proof and corner-case-proof.
        hidden_states = make_viewless_tensor(inp=hidden_states, requires_grad=True, keep_graph=True)

        if self.config.sequence_parallel:
            rng_context = tensor_parallel.get_cuda_rng_tracker().fork()
        else:
            rng_context = nullcontext()

        # If fp8_recipe is delayed, wrap the entire pass with get_fp8_context(),
        # otherwise do nothing extra at the outer level
        # if we are using other fp8 recipes, then the context manager enter&exit are free
        # we can wrap fp8_context within the for loop over layers, so that we can fine-grained
        # control which layer will be fp8 or bf16
        # For FP4: NVFP4BlockScaling doesn't have delayed scaling, always uses inner context
        if self.config.fp8:
            use_outer_quantization_context = self.config.fp8_recipe == Fp8Recipe.delayed
            use_inner_quantization_context = self.config.fp8_recipe != Fp8Recipe.delayed
            outer_quantization_context = (
                get_fp8_context(self.config) if use_outer_quantization_context else nullcontext()
            )
        elif self.config.fp4:
            use_outer_quantization_context = False
            use_inner_quantization_context = True
            outer_quantization_context = nullcontext()
        else:
            # No quantization
            use_outer_quantization_context = False
            use_inner_quantization_context = False
            outer_quantization_context = nullcontext()

        with rng_context, outer_quantization_context:
            # Forward pass.
            if self.config.recompute_granularity == 'full' and self.training:
                checkpointed_result = self._checkpointed_forward(
                    hidden_states=hidden_states,
                    attention_mask=attention_mask,
                    context=context,
                    context_mask=context_mask,
                    rotary_pos_emb=rotary_pos_emb,
                    attention_bias=attention_bias,
                    packed_seq_params=packed_seq_params,
                    use_inner_quantization_context=use_inner_quantization_context,
                    padding_mask=padding_mask,
                    extract_layer_indices=extract_layer_indices,
                    layer_offset=layer_offset,
                )
                # Handle return value from _checkpointed_forward
                if len(extract_layer_indices) > 0:
                    # (hidden_states, intermediate_hidden_states) tuple
                    hidden_states, intermediate_hidden_states = checkpointed_result
                else:
                    # No intermediate_hidden_states requested: just hidden_states
                    hidden_states = checkpointed_result
            else:
                for l_no, layer in enumerate(self.layers):
                    # Get appropriate inner quantization context
                    if use_inner_quantization_context:
                        if self.config.fp8:
                            inner_quantization_context = get_fp8_context(
                                self.config, layer.layer_number - 1
                            )
                        elif self.config.fp4:
                            inner_quantization_context = get_fp4_context(
                                self.config, layer.layer_number - 1
                            )
                        else:
                            inner_quantization_context = nullcontext()
                    else:
                        inner_quantization_context = nullcontext()

                    with self.offload_context, inner_quantization_context:
                        hidden_states, context = layer(
                            hidden_states=hidden_states,
                            attention_mask=attention_mask,
                            context=context,
                            context_mask=context_mask,
                            rotary_pos_emb=rotary_pos_emb,
                            rotary_pos_cos=rotary_pos_cos,
                            rotary_pos_sin=rotary_pos_sin,
                            rotary_pos_cos_sin=rotary_pos_cos_sin,
                            attention_bias=attention_bias,
                            inference_context=inference_context,
                            packed_seq_params=packed_seq_params,
                            sequence_len_offset=sequence_len_offset,
                            padding_mask=padding_mask,
                        )

                    if (
                        torch.is_grad_enabled()
                        and self.config.cpu_offloading
                        and self.group_prefetch_offload_commit_async is not None
                    ):
                        hidden_states = self.group_prefetch_offload_commit_async(hidden_states)

                    # Extract intermediate embeddings using global layer index
                    if (l_no + layer_offset) in extract_layer_indices:
                        intermediate_hidden_states.append(hidden_states)

        # Final layer norm.
        if self.final_layernorm is not None:
            hidden_states = apply_module(self.final_layernorm)(cast(Tensor, hidden_states))
            # TENorm produces a "viewed" tensor. This will result in schedule.py's
            # deallocate_output_tensor() throwing an error, so a viewless tensor is
            # created to prevent this.
            hidden_states = make_viewless_tensor(
                inp=hidden_states, requires_grad=True, keep_graph=True
            )

        # If this TransformerBlock is empty, input and output hidden states will be the same node
        # on the computational graph and will lead to unexpected errors in pipeline schedules.
        if not self.pre_process and len(self.layers) == 0 and not self.final_layernorm:
            hidden_states = hidden_states.clone()

        if len(extract_layer_indices) > 0:
            return hidden_states, intermediate_hidden_states

        return hidden_states

    def sharded_state_dict(
        self, prefix: str = '', sharded_offsets: tuple = (), metadata: dict = None
    ) -> ShardedStateDict:
        """
        Generate a sharded state dictionary for the transformer block.

        Args:
            prefix (str, optional): Prefix to be added to all keys in the state dict.
                Defaults to an empty string.
            sharded_offsets (tuple, optional): Tuple of sharding offsets.
            metadata (dict, optional): Additional metadata for sharding.
                Can specify if layers are non-homogeneous. Defaults to None.

        Returns:
            ShardedStateDict: A dictionary containing the sharded state of the model.
        """
        assert not sharded_offsets, "Unexpected sharded offsets"
        # TODO: remove multiple non_homogeneous_layers=True assignments
        #  once non_homogeneous_layers=False support is dropped
        non_homogeneous_layers = metadata is not None and metadata.get(
            'non_homogeneous_layers', False
        )
        if self.config.hetereogenous_dist_checkpoint:
            non_homogeneous_layers = True

        if isinstance(self.config.moe_layer_freq, int):
            if self.config.moe_layer_freq > 1:
                non_homogeneous_layers = True
        elif isinstance(self.config.moe_layer_freq, list):
            non_homogeneous_layers = True

        if isinstance(self.config.linear_attention_freq, int):
            if self.config.linear_attention_freq > 1:
                non_homogeneous_layers = True
        elif isinstance(self.config.linear_attention_freq, list):
            non_homogeneous_layers = True

        if self.config.heterogeneous_block_specs:
            non_homogeneous_layers = True

        singleton_local_shards = (metadata or {}).get('singleton_local_shards', False)
        if singleton_local_shards:
            if metadata is not None and metadata.get('non_homogeneous_layers') is False:
                # non_homogeneous_layers=False was set explicitly - emit an override warning
                logger.warning(
                    'non_homogeneous_layers=False is deprecated.'
                    ' Setting non_homogeneous_layers=True.'
                )
            non_homogeneous_layers = True

        sharded_state_dict = {}

        layer_prefix = f'{prefix}layers.'
        num_layers = self.config.num_layers
        for layer in self.layers:
            offset = get_transformer_layer_offset(
                self.config, self.vp_stage, get_pg_rank(self.pg_collection.pp)
            )

            global_layer_offset = layer.layer_number - 1  # self.layer_number starts at 1
            state_dict_prefix = f'{layer_prefix}{global_layer_offset - offset}.'  # module list index in TransformerBlock # pylint: disable=line-too-long
            if non_homogeneous_layers:
                sharded_prefix = f'{layer_prefix}{global_layer_offset}.'
                sharded_pp_offset = []
            else:
                sharded_prefix = layer_prefix
                sharded_pp_offset = [
                    (0, global_layer_offset, num_layers)
                ]  # PP sharding offset for ShardedTensors
            layer_sharded_state_dict = layer.sharded_state_dict(
                state_dict_prefix, sharded_pp_offset, metadata
            )
            replace_prefix_for_sharding(layer_sharded_state_dict, state_dict_prefix, sharded_prefix)

            sharded_state_dict.update(layer_sharded_state_dict)

        # Add modules other than self.layers
        for name, module in self.named_children():
            if not module is self.layers:
                sharded_state_dict.update(
                    sharded_state_dict_default(
                        module,
                        f'{prefix}{name}.',
                        sharded_offsets,
                        metadata,
                        tp_group=self.tp_group,
                    )
                )

        return sharded_state_dict


================================================
FILE: megatron/core/transformer/transformer_config.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import logging
import math
import warnings
from dataclasses import dataclass, field
from typing import Callable, List, Literal, Optional, Tuple, Union

import torch
import torch.nn.functional as F

from megatron.core.enums import Fp4Recipe, Fp8Recipe
from megatron.core.quantization.quant_config import RecipeConfig
from megatron.core.transformer.enums import AttnBackend, CudaGraphScope
from megatron.core.transformer.pipeline_parallel_layer_layout import PipelineParallelLayerLayout

from .._rank_utils import log_single_rank
from ..fusions.fused_bias_geglu import quick_gelu
from ..model_parallel_config import ModelParallelConfig
from ..utils import (
    get_te_version,
    init_method_normal,
    is_te_min_version,
    is_torch_min_version,
    mup_scaled_init_method_normal,
    scaled_init_method_normal,
)

logger = logging.getLogger(__name__)

try:
    from packaging.version import Version as PkgVersion

    HAVE_PACKAGING = True
except ImportError:
    HAVE_PACKAGING = False


@dataclass
class TransformerConfig(ModelParallelConfig):
    """Configuration object for megatron-core transformers.

    The initialization function has an argument for each parameter,
    including those in ModelParallelConfig.
    """

    ####################
    # model architecture
    ####################

    num_layers: int = field(default=0, metadata={"argparse_meta": {"default": None}})
    """Number of transformer layers in a transformer block."""

    mtp_num_layers: Optional[int] = None
    """Number of Multi-Token Prediction (MTP) Layers.
    MTP extends the prediction scope to multiple future tokens at each position.
    This MTP implementation sequentially predict additional tokens
    by using D sequential modules to predict D additional tokens.
    """

    mtp_loss_scaling_factor: Optional[float] = 0.1
    """Weighting factor of Multi-Token Prediction (MTP) loss.
    We compute the average of the MTP losses across all depths, 
    and multiply it the scaling factor to obtain the overall MTP loss, 
    which serves as an additional training objective.
    """

    mtp_use_repeated_layer: bool = False
    """Use a single MTP layer repeatedly instead of multiple separate layers."""

    mtp_hybrid_override_pattern: Optional[str] = None
    """DEPRECATED: Use unified hybrid_layer_pattern instead.
    Legacy argument for loading old checkpoints.
    Force a specific hybrid layer pattern for MTP layers.
    """

    num_layers_in_first_pipeline_stage: Optional[int] = None
    """Number of transformer layers on first pipeline stage.
    None implies equal layer division across PP ranks."""

    num_layers_in_last_pipeline_stage: Optional[int] = None
    """Number of transformer layers on last pipeline stage.
    None implies equal layer division across PP ranks."""

    pipeline_model_parallel_layout: Optional[Union[str, list, PipelineParallelLayerLayout]] = None
    """Custom definition of the pipeline parallel partitioning.
    Support type:
    - str: e.g., 'Et*3|(tt|)*29,m|L'. Stages are split by '|', replicated stages or layers
    can be described with multiplication. Commas can be used cosmetically.
    - list: e.g., [['embedding', 'decoder'], ['decoder', 'decoder', 'decoder', 'loss']].
    - PipelineParallelLayerLayout: a PipelineParallelLayerLayout object.
    If given either a string or a list, it will be transferred into a PipelineParallelLayerLayout
    in post init. Let i = a * pp_size + b, then layout[i] gives a list of the layers 
    in the a-th vpp stage and the b-th pp stage, i.e., vpp(0)pp(0), vpp(0)pp(1), ..., 
    vpp(i)pp(j), vpp(i)pp(j+1), ..., vpp(-1)pp(-2), vpp(-1)pp(-1).
    In the inner lists of layers, 'embedding' or 'E' denotes the embedding layer, 'loss' or 'L'
    denotes the loss function, and 'decoder' or 't' denotes the transformer decoder layer.
    Examples:
        [['embedding', 'decoder'], ['decoder', 'decoder', 'decoder', 'loss']]:
        pp = 2, vpp = None
        pp rank 0 holds: embedding, decoder
        pp rank 1 holds: decoder*3, loss
        'E|(tt|)*2,(t|)*4,mL':
        pp = 2, vpp = 4
        vpp rank 0 pp rank 0 holds: embedding
        vpp rank 0 pp rank 1~2 holds: decoder*2
        vpp rank 0 pp rank 3 holds: decoder
        vpp rank 1 pp rank 0~2 holds: decoder
        vpp rank 1 pp rank 3 holds: mtp, loss"""

    account_for_embedding_in_pipeline_split: bool = False
    """If set, the embedding layer will be treated as a standard transformer
    layer in the context of partition and placement for pipeline parallelism."""

    account_for_loss_in_pipeline_split: bool = False
    """If set, the loss layer will be treated as a standard transformer
    layer in the context of partition and placement for pipeline parallelism."""

    hidden_size: int = field(default=0, metadata={"argparse_meta": {"default": None}})
    """Transformer hidden size."""

    num_attention_heads: int = field(default=0, metadata={"argparse_meta": {"default": None}})
    """Number of transformer attention heads."""

    attention_backend: AttnBackend = AttnBackend.auto
    """Attention backend to run. By default we let transformer engine
    decide the best backend to run (except in the case of local).
    If attention backend is local we use the local pytorch implementation in mcore.
    Users can specify exact backend by changing this config. """

    softmax_scale: Optional[float] = None
    """Softmax scale for attention scaling."""

    softmax_type: Literal['vanilla', 'off-by-one', 'learnable'] = 'vanilla'
    """Applies modified softmax from https://www.evanmiller.org/attention-is-off-by-one.html. 
       Supports both TE FusedAttention and local unfused attention. Supports both a fixed offset and 
       and learnable offset."""

    num_query_groups: Optional[int] = field(
        default=None, metadata={"argparse_meta": {"default": 1}}
    )
    """Number of query groups for group query attention. If None, normal attention is used."""

    ffn_hidden_size: Optional[int] = None
    """Transformer Feed-Forward Network hidden size. This is set to 4*hidden_size
    if not provided."""

    kv_channels: Optional[int] = None
    """Projection weights dimension in multi-head attention. This is set to hidden_size //
    num_attention_heads if not provided."""

    hidden_dropout: float = 0.1
    """Dropout probability for transformer hidden state."""

    attention_dropout: float = 0.1
    """Post attention dropout probability."""

    fp32_residual_connection: bool = False
    """If true, move residual connections to fp32."""

    # @jcasper should we keep this option?
    apply_residual_connection_post_layernorm: bool = False
    """If True, uses the original BERT residule connection ordering."""

    layernorm_epsilon: float = field(
        default=1e-5, metadata={"argparse_meta": {"arg_names": ["--norm-epsilon"]}}
    )
    """Epsilon value for any LayerNorm/RMSNorm operations."""

    layernorm_zero_centered_gamma: bool = field(
        default=False, metadata={"argparse_meta": {"arg_names": ["--apply-layernorm-1p"]}}
    )
    """If set to True, the LayerNorm is adjusted to center the gamma values around 0. This improves
    numerical stability."""

    add_bias_linear: bool = field(
        default=True, metadata={"argparse_meta": {"arg_names": ["--disable-bias-linear"]}}
    )
    """Include/exclude a bias term in all linear layers (QKV projections, after core attention,
    and two in MLP layer)."""

    add_qkv_bias: bool = False
    """Add a bias term only for QKV projections."""

    gated_linear_unit: bool = False
    """Use a gated linear unit for the first linear layer in the MLP."""

    activation_func: Callable[[torch.Tensor], torch.Tensor] = F.gelu
    """Activation function to use for the non-linearity in the MLP."""

    activation_func_fp8_input_store: bool = False
    """Store the input of MLP activation function in FP8 for backprop to save memory.
    The stored input is casted back to the original precision before backprop compuatation."""

    glu_linear_offset: float = 0.0
    """Offset term in the GLU activation function: activation_func(x[0]) * (x[1] + offset). Only 
    used when gated_linear_unit is True"""

    activation_func_clamp_value: Optional[float] = None
    """Clamp the output of the linear_fc1 in the activation function. Only used when activation_func
    is quick_gelu."""

    num_moe_experts: Optional[int] = None
    """Number of experts to use for MoE layer. When set, it replaces MLP with MoE layer. Set to None
    for no MoE."""

    rotary_interleaved: bool = False
    """True is rotate pairs of even and odd dimensions (RoFormer style), False is rotate pairs of
    first half and second half (LLaMa style). Default to False."""

    window_size: Optional[Tuple[int, int]] = None
    """If not None, then will use sliding window attention. The size of the window is specified by
    the numbers inside the tuple; -1 is special value meaning "infinite window size"."""

    window_attn_skip_freq: Optional[Union[int, List[int]]] = None
    """Frequency of full attention layers among sliding window attention layers. Accepts either:
    - An integer N: Represents a (N-1):1 ratio, one full attention layer after (N-1) SWA layers.
    - A list that defines a custom pattern, e.g.: [1,1,1,1,0,0,0,0], where 1 represents SWA. """

    normalization: Literal['LayerNorm', 'RMSNorm'] = "LayerNorm"
    """Which norm to use for normalization layers, valid options are `LayerNorm` and `RMSNorm`."""

    qk_layernorm: bool = False
    """Whether to apply `normalization` type of normalization to the query and key embeddings."""

    qk_l2_norm: bool = False
    """Whether to apply llama 4-style qk L2 norm."""

    qk_clip: bool = False
    """Whether to clip the query and key weights. Needed for Muon MLA Model training."""

    qk_clip_alpha: float = 0.5
    """The balancing alpha for qk-clip. Q = Q * (eta ** alpha)"""

    qk_clip_threshold: float = 100
    """The balancing threshold for qk-clip. eta = min(threshold / max_attention_logits, 1.0)"""

    log_max_attention_logit: bool = False
    """Whether to log the max attention logit across whole model. Decoupled from qk_clip,
    defualts to False. Setting qk_clip will automatically log the max logit"""

    attention_output_gate: bool = False
    """Whether to apply output gate to the attention layers."""

    test_mode: bool = False
    """Whether to run real-time tests."""

    calculate_per_token_loss: bool = False
    """Whether cross entropy loss is calculated over the actual number of non-padded tokens in the
    global batch, versus the default behavior of assuming all tokens are non-padded."""

    multi_latent_attention: bool = False
    """Whether to use multi-latent attention."""

    no_rope_freq: Optional[Union[int, List[int]]] = None
    """Controls which layers perform Rotary Position Embedding (RoPE). Accepts either:
    An integer N: Creates a pattern where RoPE is skipped every N-1 layers. For example,
    no_rope=4 means RoPE is applied for 3 layers, then skipped for 1 layer, repeating this pattern.
    A list of integers: Defines a custom pattern where 1 means skip RoPE and 0 means apply RoPE.
    For example, [0,1,1,0] means: apply RoPE, skip RoPE, skip RoPE, apply RoPE."""

    ####################
    # attention variant
    ####################
    experimental_attention_variant: Optional[Literal['gated_delta_net', 'dsa']] = None
    """Type of attention variant to use. Currently support gated_delta_net and dsa."""

    ####################
    # DSA
    ####################
    dsa_indexer_n_heads: Optional[int] = None
    """Number of DSA indexer heads."""

    dsa_indexer_head_dim: Optional[int] = None
    """Dimension per DSA indexer head."""

    dsa_indexer_topk: Optional[int] = None
    """Number of top-k tokens to select in DSA indexer."""

    dsa_indexer_loss_coeff: Optional[float] = None
    """Coefficient for the DSA indexer KL divergence loss. Set to 0 to disable indexer loss."""

    dsa_indexer_use_sparse_loss: bool = False
    """Whether to use sparse DSA indexer loss. If True, the indexer loss will be computed using the
    top-k indices."""

    ####################
    # linear attention
    ####################
    linear_attention_freq: Optional[Union[int, List[int]]] = None
    """Frequency between LA (linear attention) layers 
    and SDPA (scaled dot-product attention) layers.
    Accepts either:
    - An integer N: Represents a (N-1):N ratio, meaning (N-1) LA layers for every 1 SDPA layer
    - A list that defines a custom pattern, e.g.: [1,1,1,0,1,1,1,0,1,1,1,0]"""

    linear_conv_kernel_dim: Optional[int] = 4
    """Conv kernel dimension for the gated delta net."""

    linear_key_head_dim: Optional[int] = 128
    """Query and key head dimension for the gated delta net."""

    linear_value_head_dim: Optional[int] = 128
    """Value and gate head dimension for the gated delta net."""

    linear_num_key_heads: Optional[int] = 16
    """Number of query and key heads for the gated delta net."""

    linear_num_value_heads: Optional[int] = 32
    """Number of value and gate heads for the gated delta net."""

    ####################
    # initialization
    ####################
    init_method: Optional[Callable] = None
    """Method to initialize weights. Note that bias is always set to zero. Should be a function that
    takes a single Tensor and initializes it. If None, will be set to
    megatron.core.utils.init_method_normal(init_method_std) which is torch nn init normal with
    mean=0.0 and std=init_method_std."""

    output_layer_init_method: Optional[Callable] = None
    """Method to initialize weights of the output layer of both attention and MLP blocks. If None,
    will be set to megatron.core.utils.scaled_init_method_normal(init_method_std) which is torch nn
    init normal with mean=0.0 and std=init_method_std / math.sqrt(2.0 * num_layers).
    Note: this does not control vocab readout/unembedding initialization."""

    init_method_std: float = 0.02
    """Standard deviation of the zero mean normal for the default initialization method, not used if
    init_method and output_layer_init_method are provided."""

    embedding_init_method: Optional[Callable] = None
    """
    Method to initialize weights of the embedding layer. If None, will be set as described 
    in init_method above.
    """

    embedding_init_method_std: Optional[float] = None
    """
    Standard deviation of the zero mean normal for the default initialization method for the 
    embedding layer. If None, will be set to init_method_std. Setting this to a value around
    1.0 may avoid loss spikes in training. Setting this to any value will also skip applying
    weight decay on embedding weights to avoid shrinkage towards zero.
    See https://arxiv.org/abs/2312.16903 for more details.
    """

    init_model_with_meta_device: bool = False
    """
    If True, initializes the model with the meta device. This is helpful for
    training of very large models. This feature is only works when megatron fsdp is turned on.
    """

    ####################
    # MuP (Maximal Update Parameterization)
    ####################
    use_mup: bool = False
    """
    Enable Maximal Update Parameterization (MuP) for hyperparameter transfer across
    model widths. When enabled, learning rates and initialization are scaled according
    to the width multiplier to ensure consistent training dynamics.
    """

    mup_width_mult: float = 1.0
    """
    Width multiplier for MuP scaling, computed as hidden_size / mup_base_hidden_size.
    This value is automatically computed in __post_init__ when use_mup is enabled.
    """

    mup_base_hidden_size: Optional[int] = None
    """
    Base hidden size for MuP width scaling. This is the reference width from which
    scaling factors are computed. Defaults to hidden_size if not specified (base model
    case where width_mult=1.0). Set this to your base/proxy model's hidden size when
    scaling up.
    """

    mup_embedding_mult: float = 1.0
    """
    Multiplier for embedding layer output. Applied after the embedding lookup.
    Default: 1.0 (no scaling).
    """

    mup_output_mult: float = 1.0
    """
    Multiplier for output logits before softmax. When MuP is enabled and this is left
    at 1.0, it is auto-set to 1/mup_width_mult to keep output variance stable across
    widths. Override to customize output scaling.
    Default: 1.0.
    """

    mup_base_head_dim: Optional[float] = None
    """
    Base head dimension for MuP attention scaling. When set,
    softmax_scale = sqrt(mup_base_head_dim) / (kv_channels ** mup_attn_scale_power).
    Set to base model's d_head (e.g., 64) to match standard 1/sqrt(d_head) scaling
    at the base width, ensuring non-MuP compatibility for that specific value.
    """

    mup_attn_scale_power: float = 1.0
    """
    Power for attention scaling: softmax_scale = 1 / (kv_channels ** mup_attn_scale_power).
    0.5 = standard attention (1/sqrt(d_head)), 1.0 = MuP attention (1/d_head).
    Default: 1.0 (MuP scaling when use_mup is True). Set to 0.5 for standard scaling.
    """

    ####################
    # mixed-precision
    ####################
    apply_query_key_layer_scaling: bool = False
    """If true, scale Q * K^T by 1 / layer-number. This improve numeric stability when training with
    fp16. Also sets `attention_softmax_in_fp32` to True."""

    attention_softmax_in_fp32: bool = True
    """If True, run attention masking and softmax in fp32. This should be True if
    apply_query_key_layer_scaling is True."""

    disable_bf16_reduced_precision_matmul: bool = False
    """If True, sets torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction=False to
    prevent matmul from using reduced precision accumulation when using BF16."""

    ####################
    # fusion
    ####################
    bias_activation_fusion: bool = False
    """If True, fuses bias addition and the activation function when possible."""

    masked_softmax_fusion: bool = False
    """If True, uses softmax fusion."""

    persist_layer_norm: bool = False
    """If True, uses the persistent fused layer norm kernel. This kernel only supports a fixed set
    of hidden sizes."""

    memory_efficient_layer_norm: bool = False
    """If True, and using local layers (not from TransformerEngine), tells Apex to use the memory
    efficient fused LayerNorm kernel. Ignored if not using LayerNorm."""

    bias_dropout_fusion: bool = False  # TODO: this should be bias_dropout_add_fusion?
    """If True, uses bias dropout fusion."""

    apply_rope_fusion: bool = False
    """If True, use fused RoPE kernel."""

    use_fused_weighted_squared_relu: bool = False
    """If True, uses fused weighted squared relu kernel when using MoE."""

    fused_single_qkv_rope: bool = False
    """If set, avoid splitting QKV before ROPE forward and avoid concatenating ROPE dgrads."""

    fused_residual_rmsnorm: bool = False
    """If True, fuses residual connection and RMSNorm backward pass when TE is used."""

    ####################
    # activation recomputation
    ####################
    recompute_granularity: Optional[Literal['full', 'selective']] = None
    """Determines which type of activation recompute to use.  Megatron-core supports 'selective'
    activation checkpointing where the submodules set in --recompute-modules is checkpointed.
    The default is "core_attn" which is the memory intensive part of attention.
    These memory intensive activations are also less compute intensive which makes activation
    checkpointing more efficient for LLMs (20B+).  See Reducing Activation Recomputation in Large
    Transformer Models (https://arxiv.org/abs/2205.05198) for more details.  'full' will checkpoint
    the entire transformer layer.  If None, no recompute is performed and all activations are saved.
    If set, must be 'selective' or 'full'. 'selective' always uses all layers.
    """

    recompute_method: Optional[Literal['uniform', 'block']] = None
    """Determines which transformer layers will be recomputed. uniform will uniformly divide the
    total number of transformer layers in a transformer block and recompute the input activation of
    each divided chunk at the specified granularity.  block will recompute the input activations for
    only a set number of transformer layers per pipeline stage.  The rest of the layers in the
    pipeline stage will not have any activations recomputed.  If None, and recompute is enabled, all
    layers will do recomputation. If set, must be 'uniform' or 'block'."""

    recompute_num_layers: Optional[int] = None
    """When recompute_method is uniform, recompute_num_layers is the number of transformer layers in
    each uniformly divided recompute unit.  When recompute_method is block, recompute_num_layers is
    the number of transformer layers to recompute within each pipeline stage.  Must be None for
    'selective' activation checkpointing."""

    distribute_saved_activations: Optional[bool] = False
    """If True, distribute recomputed activations across the model parallel group."""

    recompute_modules: Optional[List[str]] = None
    """The submodules to recompute.
    choices: "core_attn", "moe_act", "layernorm", "mla_up_proj", "mlp", "moe", "shared_experts".
    default: ["core_attn"].
    "core_attn": recompute the core attention part of the transformer layer.
    "moe_act": recompute the MoE MLP activation function.
    "layernorm": recompute the input_layernorm and pre_mlp_layernorm.
    "mla_up_proj": recompute the MLA up projection and RoPE applying parts.
    "mlp": recompute the dense MLP submodule.
    "moe": recompute the MoE layer.
    "shared_experts": recompute the shared experts in the MoE layer.
    "moe_act", "layernorm", and "mla_up_proj" use output-discarding checkpointing,
    "core_attn", "mlp", "moe", and "shared_experts" use normal checkpointing.
    """

    ####################
    # fp8 related
    ####################
    fp8: Optional[Literal['e4m3', 'hybrid']] = field(
        default=None, metadata={"argparse_meta": {"arg_names": ["--fp8-format"]}}
    )
    """If set, enables the use of FP8 precision through Transformer Engine. There are 2 predefined
    choices (1) 'e4m3' uniformly uses e4m3 for all FP8 tensors, (2) 'hybrid' uses e4m3 for all FP8
    activation and weight tensors and e5m2 for all FP8 output activation gradient tensors."""

    fp8_recipe: Optional[Literal['tensorwise', 'delayed', 'mxfp8', 'blockwise', 'custom']] = (
        "delayed"
    )
    """If set, enables the use of FP8 precision through Transformer Engine. There are 5 predefined
    choices (1) 'tensorwise' uses per tensor current scaling recipe, (2) 'delayed'
    uses delayed scaling recipe, 3) 'mxfp8' for Blackwell architecture only,
    4) 'blockwise' for blockwise scaling recipe, 5) 'custom' for custom quantization recipe."""

    fp8_param: bool = False
    """If set, keep the parameters in fp8 precision to save memory. This option must be used
    together with fp8 mode (i.e., TransformerConfig.fp8 is not None). Note that not all parameters
    will be converted to fp8; for example, biases will remain unchanged. The parameters affected are
    primarily the weights of GEMMs. The specific parameters that will be converted to fp8 are
    determined by TE."""

    fp8_quantizer_factory: Optional[str] = None
    """Python import path to a callable quantizer factory, e.g., package.module.quantizer_factory.
    Required when fp8_recipe is custom."""

    fp8_margin: int = 0
    """Margin for the scaling factor computation."""

    fp8_interval: int = 1
    """DEPRECATED from TransformerEngine v1.8.0. This flag is ignored.
    Controls how often the scaling factor is recomputed.
    """

    fp8_amax_history_len: int = 1
    """The length of the amax history window used for scaling factor computation."""

    fp8_amax_compute_algo: Literal['most_recent', 'max'] = "most_recent"
    """Algorithm used for choosing the `amax` value for the scaling factor computation. There are 2
    predefined choices: `max` chooses the largest `amax` in the history window, while `most_recent`
    always chooses the most recently seen value.

    """

    fp8_wgrad: bool = True
    """When set to False, override FP8 config options and do the wgrad computation
    in higher precision."""

    fp8_dot_product_attention: bool = False
    """When set to True, use the FP8 implementation of Dot Product Attention."""

    fp8_multi_head_attention: bool = False
    """When set to True, use the FP8 implementation of Multi Head Attention."""

    tp_only_amax_red: bool = False
    """When set to True, reduce the FP8 AMAX only in the TP or TP-CP domain"""

    first_last_layers_bf16: bool = False
    """If True, retains first and last N TransformerBlocks in BF16 as opposed to FP8."""

    num_layers_at_start_in_bf16: int = 1
    """Number of layers at the start of the model to keep in BF16 precision when
    first_last_layers_bf16 is True."""

    num_layers_at_end_in_bf16: int = 1
    """Number of layers at the end of the model to keep in BF16 precision when
    first_last_layers_bf16 is True."""

    use_kitchen: bool = False
    """Use the kitchen extension for transformer quantization."""

    use_kitchen_attention: bool = False
    """Use the kitchen extension for attention (instead of TE's attention)."""

    kitchen_attention_backend: Literal["sdpa", "fa"] = "sdpa"
    """Which kitchen attention backend to use when use_kitchen_attention=True.
    "sdpa" for KitchenDotProductAttention, "fa" for KitchenFlashAttention."""

    ####################
    # fp4 related
    ####################
    fp4: Optional[Literal['e2m1']] = field(
        default=None, metadata={"argparse_meta": {"arg_names": ["--fp4-format"]}}
    )
    """If set, enables the use of FP4 precision through Transformer Engine. Currently only 
    supports 'nvfp4' which uses NVFP4BlockScaling recipe (requires TE >= 2.7.0.dev0)."""

    fp4_recipe: Optional[Literal['nvfp4', 'custom']] = "nvfp4"
    """If set, enables the use of FP4 precision through Transformer Engine. Currently only
    'nvfp4' is supported which uses NVFP4BlockScaling recipe for Blackwell+ architecture."""

    fp4_param: bool = field(
        default=False, metadata={"argparse_meta": {"arg_names": ["--fp4-param-gather"]}}
    )
    """If set, keep the parameters in fp4 precision to save memory. This option must be used
    together with fp4 mode (i.e., TransformerConfig.fp4 is not None). Note that not all parameters
    will be converted to fp4; for example, biases will remain unchanged."""

    fp4_quantizer_factory: Optional[str] = None
    """Python import path to a callable quantizer factory, e.g., package.module.quantizer_factory.
    Required when fp4_recipe is custom."""

    ####################
    # MoE related
    ####################
    moe_shared_expert_intermediate_size: Optional[int] = None
    """Shared expert total ffn hidden size.
    It should be equal to 'num_shared_experts * ffn_size_of_each_shared_expert' if
    there are multiple shared experts.
    None means no shared expert.
    By default, the shared experts execute before the router. However, when
    moe_shared_expert_overlap or overlap_moe_expert_parallel_comm is set,
    the shared experts execute after the router, before the routed experts.
    This makes the gradients from the router and the shared experts added in
    different orders to the hidden_states, causing minor numerical differences
    in the hidden_states gradient."""

    moe_shared_expert_gate: bool = False
    """Enable gate for shared expert. Only effective when 
    moe-shared-expert-intermediate-size is set."""

    moe_shared_expert_overlap: bool = False
    """Enable overlapping between shared expert computations and dispatcher communications.
    Without this, the shared experts execute before the router. 
    Only effective when moe-shared-expert-intermediate-size is set.
    """

    moe_layer_freq: Union[int, List[int]] = 1
    """Frequency between MoE layers and Dense layers. Accepts either:
    - An integer N: Represents a 1:N ratio, meaning one expert layer for every N-1 dense layers.
    - A list that defines a custom pattern, e.g.: [1,1,1,0,1,1,1,0,1,1,1,0]"""

    moe_ffn_hidden_size: Optional[int] = None
    """MoE Feed-Forward Network hidden size. If not specified, defaults to the ffn_hidden_size."""

    moe_router_load_balancing_type: Union[str, List[str]] = "aux_loss"
    """The load balancing strategy for the router.
    Options:
    - "aux_loss": Load balancing loss used in GShard and SwitchTransformer, calculated at
    micro-batch level.
    - "seq_aux_loss": Load balancing loss used in DeepSeekV2 and DeepSeekV3, computes loss
    for each individual sample.
    - "global_aux_loss": Load balancing loss calculated at global batch level.
    - "sinkhorn": Balancing algorithm used in S-BASE.
    - "none": No load balancing.
    A list of strings can be provided to combine multiple aux-loss load balancing types.
    The default is "aux_loss".
    """

    moe_router_topk: int = 2
    """Number of experts to route to for each token."""

    moe_enable_routing_replay: bool = False
    """If True, enable the routing replay feature for MoE layers."""

    moe_router_topk_limited_devices: Optional[int] = None
    """Number of EP ranks to consider for each token in group-limited routing,
    DEPRECATED and replaced by moe_router_num_groups and moe_router_group_topk.
    """

    moe_router_padding_for_quantization: Optional[bool] = False
    """Whether to pad the routing_map to make sure the number of tokens each expert receives
    is a multiple of 16/32 for quantized precision (e.g., FP8, FP4). This can remove the explicit
    padding in the GroupedMLP layer."""

    moe_router_padding_for_fp8: Optional[bool] = False
    """[Compatibility alias for moe_router_padding_for_quantization]
    Enabling this will also enable moe_router_padding_for_quantization."""

    moe_router_num_groups: Optional[int] = None
    """Number of groups to divide experts into for group-limited routing.
    When using group-limited routing:
    1. Experts are divided into 'moe_router_num_groups' equal-sized groups
    2. For each token, 'moe_router_group_topk' groups are selected based on sum of
    top-('moe_router_topk'/'moe_router_group_topk') routing scores within each group
    3. From these selected groups, 'moe_router_topk' individual experts are chosen
    Two common use cases:
    - Device-limited routing: Set 'moe_router_num_groups' equal to expert parallel size (EP)
    to limit each token to experts on a subset of devices
    (See DeepSeek-V2: https://arxiv.org/pdf/2405.04434)
    - Node-limited routing: Set 'moe_router_num_groups' equal to number of nodes in EP group
    to limit each token to experts on a subset of nodes
    (See DeepSeek-V3: https://arxiv.org/pdf/2412.19437)
    """

    moe_router_group_topk: Optional[int] = None
    """Number of selected groups for group-limited routing."""

    moe_router_pre_softmax: bool = False
    """Enable pre-softmax(pre-sigmoid) routing for MoE, which means softmax is before the
    top-k selection.
    By default, softmax is done after top-k."""

    moe_router_topk_scaling_factor: Optional[float] = None
    """Scaling factor for routing score in top-k selection, only works when moe_router_pre_softmax
    enabled. Defaults to None, which means no scaling."""

    moe_router_score_function: Literal['softmax', 'sigmoid'] = "softmax"
    """Score function for MoE routing. Can be "softmax" or "sigmoid"."""

    moe_router_dtype: Optional[Literal['fp32', 'fp64']] = None
    """Data type for routing and expert output weighted averaging. Using fp32 or fp64 can
    improve stability especially when the number of experts is large (e.g. finegrained-moe).
    None means no changes for dtype."""

    moe_router_enable_expert_bias: bool = False
    """TopK routing with dynamic per-expert bias in the aux-loss-free load balancing strategy.
    The routing decision is based on the sum of the routing scores and the expert bias.
    See https://arxiv.org/abs/2408.15664 for details."""

    moe_router_bias_update_rate: float = 1e-3
    """The expert bias is updated based on the number of assigned tokens to each expert
    in a global batch, where the bias is increased for the experts with less assigned tokens
    and decreased for the experts with more assigned tokens.
    The default value 1e-3 is same as that used in DeepSeekV3."""

    moe_router_force_load_balancing: bool = False
    """[Experimental] Force load balancing with random logits for MoE router, supports naive topk 
    and group-limited topk. This is an experimental feature and only for benchmark."""

    moe_grouped_gemm: bool = False
    """When there are multiple experts per rank, compress multiple local (potentially small) gemms
    in a single kernel launch to improve the utilization and performance by leveraging the Grouped
    GEMM feature introduced since CUTLASS 2.8 (https://github.com/fanshiqing/grouped_gemm).
    """

    moe_aux_loss_coeff: Union[float, List[float]] = 0.0
    """Scaling coefficient for the aux loss. A starting value of 1e-2 is recommended.
    If a list of load balancing types is provided for `moe_router_load_balancing_type`,
    a corresponding list of coefficients should be provided here."""

    moe_z_loss_coeff: Optional[float] = None  # 1e-3 would be a good start value for z-loss
    """Scaling coefficient for the z-loss. A starting value of 1e-3 is recommended."""

    moe_input_jitter_eps: Optional[float] = None
    """Add noise to the input tensor by applying jitter with a specified epsilon value."""

    moe_token_dropping: bool = False
    """This feature involves selectively dropping and padding tokens for each expert to achieve a
    specified capacity, similar to GShard, Switch-Transformer, and DeepSpeed-MoE. Note that this is
    currently unsupported so should remain False."""

    moe_token_dispatcher_type: Literal['allgather', 'alltoall', 'flex'] = "allgather"
    """The type of token dispatcher to use. The default is 'allgather'.
    Options are 'allgather','alltoall' and 'flex'."""

    moe_enable_deepep: bool = False
    """[Experimental] Enable DeepEP for efficient token dispatching and combine in MoE models."""

    moe_flex_dispatcher_backend: Literal['deepep', 'hybridep'] = "deepep"
    """[Experimental] The backend to use for flex token dispatcher. The default is "deepep".
    Options are "deepep" and "hybridep". Currently only "hybridep" backend supports 
    the MNNVL case."""

    moe_per_layer_logging: bool = False
    """Enable per-layer logging for MoE, currently supports auxiliary loss and z loss."""

    moe_expert_capacity_factor: Optional[float] = None
    """moe_expert_capacity_factor (float): The capacity factor for each expert, None means no token
    will be dropped. The default is None."""

    moe_pad_expert_input_to_capacity: bool = False
    """moe_pad_expert_input_to_capacity (bool): If True, pads the input for each expert to match
    the expert capacity length, effective only after the moe_expert_capacity_factor is set. The
    default setting is False."""

    moe_pad_experts_for_cuda_graph_inference: bool = False
    """moe_pad_experts_for_cuda_graph_inference (bool): If True, the router will switch to dropping
    and padding during decode time which does not have a D2H sync. The capacity factor is set to the
    max that an expert could see during inference so no tokens are actually dropped. The default
    setting is False."""

    moe_token_drop_policy: Literal['probs', 'position'] = "probs"
    """The policy to drop tokens. Can be either "probs" or "position". If "probs", the tokens with
    the lowest probabilities will be dropped. If "position", tokens at the end of each batch will
    be dropped.
    """

    moe_layer_recompute: bool = False
    """Memory optimization: checkpointing moe_layer to save actiavtion memory."""

    moe_permute_fusion: bool = False
    """Fuse token rearrangement ops during token dispatching."""

    moe_router_fusion: bool = False
    """Enable fusion for MoE TopK routing and aux-loss computation. This is only
    supported in TransformerEngine 2.7.0 and above.
    """

    moe_apply_probs_on_input: bool = False
    """Apply probs on input of experts instead of applying after activation and glu."""

    moe_latent_size: Optional[int] = None
    """Latent projection dimension for MoE. If None, MoE latent projections are not used."""

    moe_deepep_num_sms: int = 20
    """Number of SMs to use for DeepEP."""

    moe_hybridep_num_sms: int = 16
    """Number of SMs to use for HybridEP. In pure NVL scenarios,
    16 SMs can generally achieve good bandwidth."""

    ##################
    # Context Parallel
    ##################
    cp_comm_type: Optional[Union[str, List[str]]] = None
    """Inter-gpu communication type for context parallelism.
    str: all layers share same communication type.
    List[str]: each layer has its separate communication type.
    cp_comm_type of each layer can be "p2p" or "all_gather" or "a2a" or "a2a+p2p".
    "p2p": Exchange KV chunks with P2P communications in ring topology. P2P is async and can be
    overlapped with attention compute.
    "all_gather": All-gather to get full sequence of KV before attention. The all-gather is not
    async, and cannot be overlapped.
    "a2a": Like DeepSpeed Ulysses, scatter attention heads across the CP group, and gather to get
    full sequence of QKV.
    "a2a+p2p": A hierarchical implementation of context parallelism to attention.
    It uses A2A communications in low-level CP groups (e.g., via NVLink),
    and P2P communications in high-level CP groups (e.g., via IBLink).
    """

    ##################
    # Cuda Graphs
    ##################
    enable_cuda_graph: bool = False
    """DEPRECATED and replaced by cuda_graph_impl.
    When set to true, either partial CUDA graph (1/many CUDA graph per layer) or full iteration
    CUDA graph (1 CUDA graph for whole iteration excluding optimizer) is enabled. --cuda-graph-scope
    determines the scope of graph capture."""

    cuda_graph_use_single_mempool: bool = False
    """[For `local` implementation only] When set to true, cudagraphs will be captured inside a
    single mempool, in which all cudagraphs may only be used once per step. If false, cudagraphs may
    be reused across microbatches. Enabling may reduce cudagraph memory overheads due to memory
    fragmentation, however may greatly increase the number of cudagraphs created when the number of
    microbatches is high."""

    cuda_graph_retain_backward_graph: bool = False
    """When set to true, cudagraph backward passes will be graph captured with 'retain_grad=True'
    This may enable cudagraphs for certain modules that are not completely cudagraph safe. For
    more details, see: https://pytorch.org/docs/stable/generated/torch.Tensor.backward.html."""

    cuda_graph_warmup_steps: int = 3
    """Number of warmup steps for CUDA graphs"""

    external_cuda_graph: bool = False
    """DEPRECATED and replaced by cuda_graph_impl.
    When set to true, TransformerLayer layers are swapped with user provided CUDA graphs."""

    cuda_graph_impl: Literal['none', 'local', 'transformer_engine'] = "none"
    """Determines the CUDA graph capture implementation.
    "none": no CUDA graph.
    "local": capture the CUDA graph using MCore local implementation. Either partial CUDA graph
    (1/many CUDA graph per layer) or full iteration CUDA graph (1 CUDA graph for whole iteration
    excluding optimizer) is enabled.
    "transformer_engine": capture the CUDA graph using TE make_graphed_callables()."""

    cuda_graph_scope: Union[str, CudaGraphScope, List[str], List[CudaGraphScope]] = "full"
    """Determines the CUDA graphs capturing scope.
    When cuda_graph_impl is set to "transformer_engine", valid values are "attn", "mlp", "moe",
    "moe_router", "moe_preprocess", "mamba". "full" or an empty list means the full layer. "full"
    is actually deprecated, but for backward compatibility, we still use "full" as the default
    value. It will be transformed to an empty list in __post_init__.
    When cuda_graph_impl is set to "local", "full_iteration" can be specified as cuda_graph_scope
    to enable whole iteration CUDA graph. All other values enable layerwise CUDA graph."""

    ####################
    # miscellaneous
    ####################
    clone_scatter_output_in_embedding: bool = True
    """When set to True, clone the output of scatter_to_sequence_parallel_region in embedding layer
    to facilitate garbage collection of input."""

    disable_parameter_transpose_cache: bool = False
    """When set to true, the parameter transposes are not cached for subsequent iterations."""

    config_logger_dir: str = ""
    """When non-empty, dumps entry-point configs to config_logger_dir"""

    flash_decode: bool = False
    """ Use the optimized flash decoding kernel during inference. """

    batch_invariant_mode: bool = False
    """If true, uses batch-invariant kernels that provide deterministic forward execution regardless
       of batch size. This ensures bitwise identical results when the same inputs are processed
       in different batch configurations. This will significantly affect speed of 
       training and inference as the kernels are not full optimized.
       Defaults to False."""

    use_te_activation_func: bool = False
    """Whether to use ffn activation functions implemented by TransformerEngine"""

    use_te_rng_tracker: bool = False
    """ Whether to use the TE or MCore version of the RNG tracker. """

    inference_rng_tracker: bool = False
    """ Whether we should instantiate a separate RNG tracker for inference. """

    inference_sampling_seed: int = 42
    """ Random seed to use for sampling during inference. """

    symmetric_ar_type: Optional[Literal['two_shot', "one_shot", "multimem_all_reduce"]] = None
    """What type of symmetric all reduce to use. The default is None
    which is no use of symmetric memory.
    """

    nccl_all_reduce_for_prefill: bool = False
    """If True, use NCCL all-reduce kernels when symmetric all-reduce is enabled."""

    use_inference_optimized_layers: bool = False
    """If True, use inference optimized transformer layers during inference."""

    inference_fuse_tp_communication: bool = False
    """ If true, uses a fused reduce-scatter-residual-norm-allgather kernel during inference. """

    inference_disable_triton_nvls_kernels: bool = False
    """ If true, disables the use of Triton NVLS kernels during inference. """

    inference_grouped_gemm_backend: Literal['auto', 'torch', 'te'] = "auto"
    """Specifies the backend to use for grouped GEMM operations during inference.
    Options:
    - 'auto': Uses FlashInfer for CUDA-graphed iterations (requires flashinfer-python),
      and torch.nn.functional.grouped_mm for non-CUDA-graphed iterations (falls back to TE
      if unavailable). Note: the heuristic for choosing backends in 'auto' mode may change
      in future releases.
    - 'torch': Uses torch.nn.functional.grouped_mm. For CUDA-graphed iterations, uses
      mcore_fused_moe (permute/unpermute + grouped_mm with Triton kernels).
    - 'te': Uses TE GroupedGEMM only. Not supported with CUDA graphs.
    """

    inference_moe_disable_fused_quant_kernels: bool = False
    """When False (default), use fused kernels that combine permute/activation with
    MXFP8 quantization + swizzle into a single kernel launch. Only applies when
    fp8_recipe='mxfp8'. Set to True to disable fusion and use separate kernel
    launches (useful for debugging)."""

    mrope_section: Optional[List[int]] = None
    """ Multimodal rope section is for channel dimension of temporal, height and width
    in rope calculation. """

    is_hybrid_model: bool = False
    """ Indicates whether this is a hybrid model. """

    mamba_state_dim: int = 128
    """The dimensionality of the state representation in Mamba layers."""

    mamba_head_dim: int = 64
    """The dimensionality of the heads in the Mamba layers."""

    mamba_num_groups: int = 8
    """The number of groups used in Mamba layers."""

    mamba_num_heads: Optional[int] = None
    """The number of heads used in Mamba layers.
    If None, the number of heads will be hidden_size * expand // mamba_head_dim."""

    use_mamba_mem_eff_path: bool = field(
        default=True, metadata={"argparse_meta": {"arg_names": ["--disable-mamba-mem-eff-path"]}}
    )
    """Controls usage of the memory efficient path for Mamba layers."""

    mlp_chunks_for_prefill: int = 1
    """The number of chunks along the sequence dimension to use for MLP computation
    during prefill."""

    heterogeneous_block_specs: bool = False
    """Whether to use heterogeneous block specs (nemotron-nas architecture)."""

    hetereogenous_dist_checkpoint: bool = False
    """Whether to use heterogenous layers in distributed checkpoint."""

    ####################
    # Quantization
    ####################
    quant_recipe: Optional[RecipeConfig] = None
    """Configuration of any per-module quantization settings to be applied to the model"""

    transformer_impl: Literal['local', 'transformer_engine', 'inference_optimized'] = (
        "transformer_engine"
    )
    """Transformer implementation to use.
    Options are 'transformer_engine' for Transformer Engine and 'local' for MCore."""

    #####################################
    # Fine-grained Activation Offloading
    #####################################
    fine_grained_activation_offloading: bool = False
    """If True, offload the input of the specified modules to the CPU.
    Fine-grained activation offloading is a module-level offloading method
    instead of a layer-level offloading method like cpu_offloading."""

    offload_modules: Optional[list[str]] = field(default_factory=list)
    """The submodules to offload its input.
    choices: "attn_norm", "qkv_linear", "core_attn", "attn_proj",
             "mlp_norm", "expert_fc1", "moe_act".
    "attn_norm": offload the input of the normalization in the attention part.
    "qkv_linear": offload the input of the qkv linear part.
    "core_attn": offload the input of the core attention part.
    "attn_proj": offload the input of the attn linear projection part.
    "mlp_norm": offload the input of the normalization in the mlp part.
    "expert_fc1": offload the input of the expert fc1 part.
    "moe_act": offload the input of the moe act part.
    """
    min_offloaded_tensor_size: int = 1024 * 1024
    """The minimum size of the tensor to be offloaded."""

    def __post_init__(self):
        """Python dataclass method that is used to modify attributes after initialization.
        See https://docs.python.org/3/library/dataclasses.html#post-init-processing for more
        details.
        """
        super().__post_init__()

        # When fp32 residual connections are enabled, pipeline parallel communication must
        # use fp32 to match the dtype of the residual stream between pipeline stages.
        if self.fp32_residual_connection and self.pipeline_dtype is not None:
            if self.pipeline_dtype != torch.float:
                log_single_rank(
                    logger,
                    logging.WARNING,
                    f"fp32_residual_connection is enabled, overriding pipeline_dtype "
                    f"from {self.pipeline_dtype} to torch.float to match the "
                    f"residual stream dtype between pipeline stages.",
                )
            self.pipeline_dtype = torch.float

        if self.fp16 and self.bf16:
            raise ValueError(
                f"Only one of self.fp16: {self.fp16} and self.bf16 {self.bf16} should be True."
            )

        # Apply BF16 matmul precision setting if needed
        if self.bf16 and self.disable_bf16_reduced_precision_matmul:
            torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False

        if self.num_attention_heads % self.tensor_model_parallel_size != 0:
            raise ValueError(
                f"num_attention_heads ({self.num_attention_heads}) must be a multiple of "
                f"tensor_model_parallel_size ({self.tensor_model_parallel_size})."
            )

        if self.ffn_hidden_size is None:
            self.ffn_hidden_size = 4 * self.hidden_size

        if self.kv_channels is None:
            self.kv_channels = self.hidden_size // self.num_attention_heads

        if self.num_query_groups is None:
            self.num_query_groups = self.num_attention_heads

        if (
            self.num_query_groups % self.tensor_model_parallel_size != 0
            and self.tensor_model_parallel_size % self.num_query_groups != 0
        ):
            raise ValueError(
                f"num_query_groups ({self.num_query_groups}) must be a multiple or divisor of "
                f"tensor_model_parallel_size ({self.tensor_model_parallel_size})."
            )

        if self.experimental_attention_variant == "gated_delta_net":
            assert (
                self.linear_attention_freq is not None
            ), f"linear_attention_freq must be set for linear gated_delta_net."

            # Check required parameters
            assert (
                self.linear_conv_kernel_dim is not None
            ), "linear_conv_kernel_dim must be set for gated delta net."
            assert (
                self.linear_key_head_dim is not None
            ), "linear_key_head_dim must be set for gated delta net."
            assert (
                self.linear_value_head_dim is not None
            ), "linear_value_head_dim must be set for gated delta net."
            assert (
                self.linear_num_key_heads is not None
            ), "linear_num_key_heads must be set for gated delta net."
            assert (
                self.linear_num_value_heads is not None
            ), "linear_num_value_heads must be set for gated delta net."
            assert self.linear_num_value_heads % self.linear_num_key_heads == 0, (
                f"linear_num_value_heads ({self.linear_num_value_heads}) must be a multiple of "
                f"linear_num_key_heads ({self.linear_num_key_heads})."
            )

            # Check tensor parallelism compatibility
            assert (
                self.linear_num_key_heads % self.tensor_model_parallel_size == 0
            ), "linear_num_key_heads must be a multiple of tensor_model_parallel_size."
            assert (
                self.linear_num_value_heads % self.tensor_model_parallel_size == 0
            ), "linear_num_value_heads must be a multiple of tensor_model_parallel_size."

            # Do not support yet, but coming soon.
            assert self.context_parallel_size == 1, (
                f"Gated delta net does not support context parallel for now,"
                f" but got {self.context_parallel_size=}."
            )

        if self.fp8:
            # cannot support first last layer bf16 with delayed scaling
            if self.first_last_layers_bf16 and self.fp8_recipe == Fp8Recipe.delayed:
                raise ValueError("Delayed scaling does not support first / last layer in BF16.")

            # max bf16 layers per pipeline stage
            max_bf16_layers_per_pipeline_stage = (
                self.num_layers // self.pipeline_model_parallel_size
            )

            # check start/end bf16 layer counts are valid
            if self.first_last_layers_bf16:
                if (
                    self.num_layers_at_start_in_bf16 < 0
                    or self.num_layers_at_start_in_bf16 > max_bf16_layers_per_pipeline_stage
                ):
                    raise ValueError(
                        f"num_layers_at_start_in_bf16 ({self.num_layers_at_start_in_bf16}) must be "
                        f"between 0 and number of layers per pipeline stage "
                        f"({max_bf16_layers_per_pipeline_stage})."
                    )
                if (
                    self.num_layers_at_end_in_bf16 < 0
                    or self.num_layers_at_end_in_bf16 > max_bf16_layers_per_pipeline_stage
                ):
                    raise ValueError(
                        f"num_layers_at_end_in_bf16 ({self.num_layers_at_end_in_bf16}) must be "
                        f"between 0 and number of layers per pipeline stage "
                        f"({max_bf16_layers_per_pipeline_stage})."
                    )

            if self.fp8_recipe == Fp8Recipe.custom:
                if not self.fp8_quantizer_factory:
                    raise ValueError(
                        "fp8_quantizer_factory must be provided when fp8_recipe is 'custom'. "
                        "Specify a Python import path (e.g., package.module.quantizer_factory) "
                        "via --fp8-quantizer-factory."
                    )

        if self.fp8_param and not self.fp8:
            raise ValueError("fp8_param must be used together with fp8 mode.")

        # FP4 validation
        if self.fp4_param and not self.fp4:
            raise ValueError("fp4_param must be used together with fp4 mode.")

        if self.fp4 and self.fp8:
            raise ValueError("fp4 and fp8 cannot be used simultaneously. Please choose one.")

        if self.fp4 and self.fp4_recipe == Fp4Recipe.custom:
            if not self.fp4_quantizer_factory:
                raise ValueError(
                    "fp4_quantizer_factory must be provided when fp4_recipe is 'custom'. "
                    "Specify a Python import path (e.g., package.module.quantizer_factory) "
                    "via --fp4-quantizer-factory."
                )

        if self.apply_query_key_layer_scaling:
            self.attention_softmax_in_fp32 = True

        if self.expert_model_parallel_size > 1 and self.num_moe_experts is None:
            raise ValueError("num_moe_experts must be non None to use expert-parallel.")

        if self.transformer_impl == "inference_optimized" and self.num_moe_experts is not None:
            if self.expert_tensor_parallel_size > 1:
                raise ValueError(
                    "Inference-optimized MoE layers does not support expert tensor parallelism."
                )
            if self.moe_expert_capacity_factor is not None:
                raise ValueError("Inference-optimized MoE layers only support dropless MoE ")
            if self.moe_router_padding_for_quantization:
                raise ValueError(
                    "Inference-optimized MoE layers do not support padded "
                    "routing map for quantization."
                )
            if self.moe_router_dtype != "fp32":
                raise ValueError(
                    "--transformer-impl='inference_optimized' requires --moe-router-dtype=fp32 "
                    "to avoid costly dtype conversions during decode."
                )

            if self.gated_linear_unit and self.cuda_graph_impl == "local":
                raise ValueError(
                    "--transformer-impl='inference_optimized' does not yet support CUDA graphs "
                    "with gated linear units (SwiGLU/GeGLU) due to differences in weight "
                    "layouts between the FlashInfer kernel and mcore. Either disable CUDA "
                    "graphs (--cuda-graph-impl=none) or use a non-gated activation "
                    "(e.g. squared_relu)."
                )

            assert self.inference_grouped_gemm_backend in ('auto', 'torch', 'te'), (
                f"inference_grouped_gemm_backend must be 'auto', 'torch', or 'te', "
                f"got '{self.inference_grouped_gemm_backend}'"
            )

            if self.cuda_graph_impl == "local":
                if self.inference_grouped_gemm_backend == "te":
                    raise ValueError(
                        "TE GroupedGEMM is not supported with CUDA graphs. Please set "
                        "inference_grouped_gemm_backend to 'auto' or 'torch', or disable "
                        "CUDA graphs (--cuda-graph-impl=none)."
                    )

        if self.num_moe_experts is not None and self.num_moe_experts <= 0:
            raise ValueError("num_moe_experts must be non-negative.")

        if self.num_moe_experts is not None and self.moe_ffn_hidden_size is None:
            self.moe_ffn_hidden_size = self.ffn_hidden_size
            warnings.warn("moe_ffn_hidden_size is not set, using ffn_hidden_size instead.")

        if self.num_moe_experts is None:
            assert (
                self.moe_ffn_hidden_size is None
            ), "moe_ffn_hidden_size must be None when num_experts is not set."

        if self.moe_enable_deepep:
            if self.moe_token_dispatcher_type != "flex":
                raise ValueError("DeepEP backend is only supported with flex token dispatcher.")
            if self.moe_flex_dispatcher_backend == "hybridep":
                raise ValueError("Only one backend is supported for flex token dispatcher.")
            self.moe_flex_dispatcher_backend = "deepep"
            warnings.warn(
                "moe_enable_deepep is deprecated."
                "Please use --moe-flex-dispatcher-backend=deepep instead."
            )

        if self.moe_token_dispatcher_type == "flex":
            if self.moe_pad_expert_input_to_capacity and (
                self.moe_enable_deepep or self.moe_flex_dispatcher_backend == "deepep"
            ):
                raise ValueError(
                    "Flex token dispatcher with deepep backend does not support "
                    "moe_pad_expert_input_to_capacity"
                )

        if self.moe_shared_expert_intermediate_size is not None:
            if self.moe_shared_expert_intermediate_size <= 0:
                raise ValueError(
                    f"moe_shared_expert_intermediate_size must be "
                    f"num_shared_experts * ffn_size_of_each_shared_expert, "
                    f"but got {self.moe_shared_expert_intermediate_size}"
                )
            if self.moe_shared_expert_overlap and self.moe_token_dispatcher_type not in [
                "alltoall"
            ]:
                raise ValueError(
                    f"moe_shared_expert_overlap only works with alltoall token dispatcher."
                )

        if isinstance(self.moe_router_load_balancing_type, list):
            assert isinstance(self.moe_aux_loss_coeff, list) and len(
                self.moe_aux_loss_coeff
            ) == len(self.moe_router_load_balancing_type), (
                "moe_aux_loss_coeff must be a list of the same length as "
                "moe_router_load_balancing_type"
            )

        if self.moe_expert_capacity_factor is not None:
            if self.moe_expert_capacity_factor < 0:
                self.moe_expert_capacity_factor = None
            if isinstance(self.moe_router_load_balancing_type, list):
                for load_balancing_type in self.moe_router_load_balancing_type:
                    if load_balancing_type not in [
                        "aux_loss",
                        "seq_aux_loss",
                        "global_aux_loss",
                        "none",
                    ]:
                        raise ValueError(
                            "moe_expert_capacity_factor only works with aux_loss, "
                            "seq_aux_loss, global_aux_loss or none load balancing"
                        )
            elif self.moe_router_load_balancing_type not in [
                "aux_loss",
                "seq_aux_loss",
                "global_aux_loss",
                "none",
            ]:
                raise ValueError(
                    "moe_expert_capacity_factor only works with aux_loss, "
                    "seq_aux_loss, global_aux_loss or none load balancing"
                )

        if self.moe_pad_expert_input_to_capacity:
            if self.moe_expert_capacity_factor is None:
                raise ValueError(
                    "moe_expert_capacity_factor must be set to use moe_pad_expert_input_to_capacity"
                )

        if self.cpu_offloading and (
            self.cpu_offloading_num_layers < 0 or self.cpu_offloading_num_layers >= self.num_layers
        ):
            raise ValueError(
                f"CPU offloading can be done only for layers less than {self.num_layers}"
            )

        if self.cpu_offloading and self.pipeline_model_parallel_size > 1:
            raise ValueError(
                "Currently there is no support for Pipeline parallelism with CPU offloading"
            )

        if self.cpu_offloading and self.recompute_granularity is not None:
            raise ValueError(
                "CPU offloading does not work when activation recomputation is enabled"
            )

        if self.recompute_granularity is not None:
            if self.recompute_granularity not in ["full", "selective"]:
                raise ValueError(
                    f'When using recompute_granuarlity: {self.recompute_granularity} must be "full"'
                    'or "selective".'
                )

            if self.recompute_method is not None:
                if self.recompute_method not in ["block", "uniform"]:
                    raise ValueError(
                        f'recompute_method: {self.recompute_method} must be "block" or "uniform".'
                    )
            elif self.recompute_granularity != "selective":
                raise ValueError(
                    f"Using recompute_granularity: {self.recompute_granularity} so "
                    'recompute_method must be "block" or "uniform"'
                )

            if self.recompute_granularity != "selective" and self.recompute_num_layers is None:
                raise ValueError(
                    f"When using recompute_granularity: {self.recompute_granularity} "
                    "recompute_num_layers must be between "
                    "1 and num_layers_per_pipeline_rank: "
                    f"{self.num_layers // self.pipeline_model_parallel_size}"
                )
            elif (
                self.recompute_granularity == "selective" and self.recompute_num_layers is not None
            ):
                raise ValueError(
                    f"When using recompute_granularity: {self.recompute_granularity} "
                    "recompute_num_layers must be None."
                )

            if self.distribute_saved_activations and self.sequence_parallel:
                raise ValueError(
                    f"distribute_saved_activations: {self.distribute_saved_activations} must be "
                    f"false when sequence parallel is enabled: {self.sequence_parallel}"
                )

        if self.recompute_modules is None:
            self.recompute_modules = ["core_attn"]

        if self.recompute_granularity == "selective":
            if len(self.recompute_modules) > 0:
                allowed_modules = {
                    "core_attn",
                    "moe_act",
                    "layernorm",
                    "mla_up_proj",
                    "mlp",
                    "moe",
                    "shared_experts",
                }
                invalid_modules = set(self.recompute_modules) - allowed_modules
                assert not invalid_modules, (
                    f"Invalid choices for recompute_modules: {invalid_modules}. "
                    f"Allowed modules are: {allowed_modules}"
                )

            if "moe_act" in self.recompute_modules and not self.moe_grouped_gemm:
                raise ValueError(
                    "moe_act in recompute_modules is only supported with moe_grouped_gemm."
                )

            if "mla_up_proj" in self.recompute_modules and not self.multi_latent_attention:
                raise ValueError(
                    "mla_up_proj in recompute_modules is only supported with "
                    "multi_latent_attention."
                )

            if "core_attn" in self.recompute_modules:
                warnings.warn(
                    "If you are using transformer_engine as the transformer implementation, "
                    "the core_attn is from transformer_engine and may be the fused version. "
                    "For fused attention, you have no need to set 'core_attn' to recompute. "
                    "Please check that the core_attn recompute is really needed."
                )

            if "shared_experts" in self.recompute_modules:
                if (
                    self.moe_shared_expert_intermediate_size is not None
                    and self.moe_shared_expert_overlap
                ):
                    raise ValueError(
                        "shared_experts recompute cannot work with --moe-shared-expert-overlap."
                    )

            if self.fp8:
                if "moe_act" in self.recompute_modules or "layernorm" in self.recompute_modules:
                    if self.fp8_recipe == 'delayed':
                        raise ValueError(
                            "Delayed scaling does not support moe_act and layernorm recompute "
                            "for fp8."
                        )
                    if not is_te_min_version("2.6.0dev0"):
                        raise ValueError(
                            "moe_act and layernorm recompute for fp8 needs "
                            "transformer-engine>=2.6.0dev0, "
                            f"but your version is {get_te_version()}."
                        )

        if self.moe_layer_recompute:
            warnings.warn(
                "--moe-layer-recompute is deprecated. "
                "Use --recompute-granularity selective --recompute-modules moe_layer instead."
            )
            if self.recompute_granularity == "full":
                raise ValueError(
                    "Do not set --moe-layer-recompute with full recompute granularity. "
                )
            self.recompute_granularity = "selective"
            if "moe" not in self.recompute_modules:
                self.recompute_modules.append("moe")

        if self.fine_grained_activation_offloading:
            assert (
                not self.cpu_offloading
            ), "fine_grained_activation_offloading cannot be enabled with cpu_offloading."
            assert self.offload_modules is not None and len(self.offload_modules) > 0
            allowed_modules = {
                "core_attn",
                "attn_proj",
                "expert_fc1",
                "moe_act",
                "attn_norm",
                "mlp_norm",
                "qkv_linear",
            }
            invalid_modules = set(self.offload_modules) - allowed_modules
            assert not invalid_modules, (
                f'Invalid choices for offload_modules: {invalid_modules}. '
                f'Allowed modules are: {allowed_modules}'
            )
            if "attn_proj" in self.offload_modules and "core_attn" not in self.offload_modules:
                raise ValueError(
                    "attn_proj cannot be set to offload_modules alone without core_attn "
                    "because the input of attn_proj is the output of core_attn, "
                    "which is needed in core_attn.backward()."
                )

        if (
            self.num_layers_in_first_pipeline_stage is not None
            or self.num_layers_in_last_pipeline_stage is not None
        ) and (
            self.account_for_embedding_in_pipeline_split or self.account_for_loss_in_pipeline_split
        ):
            raise ValueError(
                "num_layers_in_first_pipeline_stage and num_layers_in_last_pipeline_stage cannot be"
                "set at the same time with account_for_embedding_in_pipeline_split"
                "and account_for_loss_in_pipeline_split"
            )

        # PP layout
        if self.pipeline_model_parallel_layout is not None:
            # If pipeline layout is set, we will check the conflicts
            # with other pipeline layout arguments.
            any_conflict = (
                self.num_layers_in_first_pipeline_stage is not None
                or self.num_layers_in_last_pipeline_stage is not None
                or self.account_for_embedding_in_pipeline_split
                or self.account_for_loss_in_pipeline_split
            )
            if any_conflict:
                raise ValueError(
                    "pipeline_model_parallel_layout cannot be set"
                    " with other pipeline layout arguments."
                    f" {self.num_layers_in_first_pipeline_stage=},"
                    f" {self.num_layers_in_last_pipeline_stage=},"
                    f" {self.account_for_embedding_in_pipeline_split=},"
                    f" {self.account_for_loss_in_pipeline_split=}."
                )

            # Transfer pipeline_model_parallel_layout from str or list to
            # PipelineParallelLayerLayout
            if isinstance(self.pipeline_model_parallel_layout, str):
                self.pipeline_model_parallel_layout = PipelineParallelLayerLayout.from_str(
                    layout=self.pipeline_model_parallel_layout,
                    pipeline_model_parallel_size=self.pipeline_model_parallel_size,
                )
            elif isinstance(self.pipeline_model_parallel_layout, list):
                # Since list is not hashable, the initialization will not be cached.
                self.pipeline_model_parallel_layout = PipelineParallelLayerLayout(
                    layout=self.pipeline_model_parallel_layout,
                    pipeline_model_parallel_size=self.pipeline_model_parallel_size,
                )

            # Check whether the input VPP size conflicts with the PP layout
            detected_vpp_size = (
                self.pipeline_model_parallel_layout.virtual_pipeline_model_parallel_size
            )
            if self.virtual_pipeline_model_parallel_size is not None:
                assert self.virtual_pipeline_model_parallel_size == detected_vpp_size, (
                    f"virtual_pipeline_model_parallel_size conflicts with"
                    f" pipeline_model_parallel_layout,"
                    f" ({self.virtual_pipeline_model_parallel_size=}, "
                    f" {detected_vpp_size=})"
                )
            elif detected_vpp_size > 1:
                self.virtual_pipeline_model_parallel_size = detected_vpp_size

            # Check whether the layout is valid.
            self.mtp_standalone = self.pipeline_model_parallel_layout.validate_layer_layout(
                num_layers=self.num_layers, mtp_num_layers=self.mtp_num_layers
            )

        # Uneven PP
        elif (
            self.num_layers_in_first_pipeline_stage is not None
            or self.num_layers_in_last_pipeline_stage is not None
        ):
            pipeline_parallel_size = self.pipeline_model_parallel_size
            num_layers = self.num_layers

            if self.num_layers_in_first_pipeline_stage is not None:
                if self.num_layers_in_first_pipeline_stage <= 0:
                    raise ValueError("num_layers_in_first_pipeline_stage must be larger than 0")

                if self.virtual_pipeline_model_parallel_size is not None:
                    if (
                        self.num_layers_in_first_pipeline_stage
                        % self.virtual_pipeline_model_parallel_size
                        != 0
                    ):
                        raise ValueError(
                            f"number of layers at first stage: "
                            f"{self.num_layers_in_first_pipeline_stage}"
                            f"must be divisible by virtual pipeline"
                            f"parallel degree {self.virtual_pipeline_model_parallel_size}"
                        )
                num_layers -= self.num_layers_in_first_pipeline_stage
                pipeline_parallel_size -= 1

            if self.num_layers_in_last_pipeline_stage is not None:
                if self.num_layers_in_last_pipeline_stage <= 0:
                    raise ValueError("num_layers_in_last_pipeline_stage must be larger than 0")

                if self.virtual_pipeline_model_parallel_size is not None:
                    if (
                        self.num_layers_in_last_pipeline_stage
                        % self.virtual_pipeline_model_parallel_size
                        != 0
                    ):
                        raise ValueError(
                            f"number of layers at last stage: "
                            f"{self.num_layers_in_last_pipeline_stage}"
                            f"must be divisible by virtual pipeline"
                            f"parallel degree {self.virtual_pipeline_model_parallel_size}"
                        )
                num_layers -= self.num_layers_in_last_pipeline_stage
                pipeline_parallel_size -= 1

            # Ensure you either have middle pp stages and layers or none of them.
            if bool(num_layers) != bool(pipeline_parallel_size):
                raise ValueError(
                    f"Mismatch: {num_layers} middle layers remaining but {pipeline_parallel_size} "
                    f"middle PP stages available."
                )

            # Here pipeline_parallel_size is the number of middle PP stages. If there are middle
            # PP stages, check number of layers at middle stage is divisible by middle PP size.
            if pipeline_parallel_size and not num_layers % pipeline_parallel_size == 0:
                raise ValueError(
                    f"number of layers at middle stage: {num_layers} must be divisible by"
                    f"the middle pipeline model parallel size {pipeline_parallel_size}"
                )

            # If there are middle PP stages, check number of layers
            # on each middle PP rank is divisible by VPP size.
            if pipeline_parallel_size and self.virtual_pipeline_model_parallel_size is not None:
                num_layers_per_middle_pipeline_rank = num_layers // pipeline_parallel_size
                if (
                    not num_layers_per_middle_pipeline_rank
                    % self.virtual_pipeline_model_parallel_size
                    == 0
                ):
                    raise ValueError(
                        f"number of layers on each middle pipeline rank:"
                        f"{num_layers_per_middle_pipeline_rank} must be divisible by virtual"
                        f"pipeline parallel degree {self.virtual_pipeline_model_parallel_size}"
                    )

        elif (
            self.account_for_embedding_in_pipeline_split or self.account_for_loss_in_pipeline_split
        ):
            if self.virtual_pipeline_model_parallel_size is None:
                num_layers = self.num_layers

                if self.account_for_embedding_in_pipeline_split:
                    num_layers += 1

                if self.account_for_loss_in_pipeline_split:
                    num_layers += 1

                if not num_layers % self.pipeline_model_parallel_size == 0:
                    raise ValueError(
                        f"number of middle layers: {num_layers} must be divisible by "
                        f"middle pipeline_model_parallel_size {self.pipeline_model_parallel_size}"
                    )
            else:
                num_layers = self.num_layers
                if self.account_for_embedding_in_pipeline_split:
                    num_layers += 1

                if self.account_for_loss_in_pipeline_split:
                    num_layers += 1

                if not num_layers % self.pipeline_model_parallel_size == 0:
                    raise ValueError(
                        f"num_layers: {num_layers} after enable"
                        f"account_for_embedding_in_pipeline_split or "
                        f"account_for_loss_in_pipeline_split must be divisible"
                        f"by pipeline_model_parallel_size "
                        f"{self.pipeline_model_parallel_size}"
                    )

                num_layers_per_pipeline_rank = num_layers // self.pipeline_model_parallel_size
                if (
                    not num_layers_per_pipeline_rank % self.virtual_pipeline_model_parallel_size
                    == 0
                ):
                    raise ValueError(
                        f"number of layers on each pipeline rank: {num_layers_per_pipeline_rank}"
                        f"(after enable account_for_embedding_in_pipeline_split or "
                        f"account_for_loss_in_pipeline_split) must be divisible by"
                        f"virtual_pipeline_model_parallel_size"
                        f"{self.virtual_pipeline_model_parallel_size}"
                    )

        if self.apply_query_key_layer_scaling:
            self.attention_softmax_in_fp32 = True

        if self.bias_activation_fusion:
            if self.activation_func not in [F.gelu, F.silu, quick_gelu]:
                raise ValueError(
                    "When bias_activation_fusion is True, activation function should be either "
                    "gelu, swiglu, or quick_geglu"
                )
            if (
                self.activation_func == F.gelu
                and not self.gated_linear_unit
                and not self.add_bias_linear
            ):
                raise ValueError(
                    "When bias_activation_fusion is True, gated_linear_unit is False "
                    "and activation function is gelu, add_bias_linear must also be True."
                )
            if self.activation_func == quick_gelu and not self.gated_linear_unit:
                raise ValueError(
                    "When bias_activation_fusion is True and activation function is quick_gelu, "
                    "gated_linear_unit must be True."
                )
            if self.glu_linear_offset != 0.0 and self.activation_func != quick_gelu:
                raise ValueError(
                    "When bias_activation_fusion is True and glu_linear_offset is non-zero, "
                    "activation function must be quick_gelu."
                )

            if self.use_te_activation_func:
                raise ValueError(
                    "bias_activation_fusion and use_te_activation_func cannot be both true. "
                    "If you use bias in MLP FC1, we recommend setting bias_activation_fusion "
                    "to True and use_te_activation_func to False."
                )

        if self.fused_residual_rmsnorm:
            if self.normalization != "RMSNorm":
                raise ValueError(
                    "fused_residual_rmsnorm is only supported when normalization is RMSNorm."
                )

        if self.use_te_activation_func:
            if self.activation_func not in (F.gelu, F.silu, F.relu):
                raise ValueError(
                    "TransformerEngine only support gelu, geglu, silu, swiglu, relu, reglu. "
                    "If you don't want to use TransformerEngine activation function, set "
                    "use_te_activation_func to False"
                )

        if self.activation_func_fp8_input_store:
            if self.activation_func != F.silu or not self.gated_linear_unit:
                raise ValueError("Storing activation input in FP8 is supported only for SwiGLU.")

        if self.apply_rope_fusion:
            if self.multi_latent_attention:
                warnings.warn(
                    "apply_rope_fusion for multi-latent attention only supports training. "
                    "It is experimental and may change in future versions."
                )
            else:
                if self.rotary_interleaved:
                    if not is_te_min_version("2.3.0"):
                        raise ValueError(
                            "rotary_interleaved does not work with apply_rope_fusion for "
                            "TE < 2.3.0. Please install TE >= 2.3.0"
                        )

                from megatron.core.models.common.embeddings.rope_utils import (
                    fused_apply_rotary_pos_emb,
                    fused_apply_rotary_pos_emb_thd,
                )

                if fused_apply_rotary_pos_emb is None and fused_apply_rotary_pos_emb_thd is None:
                    raise ValueError(
                        "apply_rope_fusion is not available. Please install TE >= 1.4."
                    )

        if self.fused_single_qkv_rope:
            if self.attention_output_gate:
                raise ValueError("fused_single_qkv_rope does not support gated attention for now.")

        if self.multi_latent_attention and self.rotary_interleaved:
            raise ValueError("rotary_interleaved does not work with multi_latent_attention.")

        # MuP (Maximal Update Parameterization) configuration
        if self.use_mup:
            # Default base_hidden_size to hidden_size (base model case, width_mult=1.0)
            if self.mup_base_hidden_size is None:
                self.mup_base_hidden_size = self.hidden_size
            assert self.mup_base_hidden_size > 0, "--mup-base-hidden-size must be positive."
            # Compute width multiplier
            self.mup_width_mult = self.hidden_size / self.mup_base_hidden_size

            # MuP attention scaling: 1/d_head instead of 1/sqrt(d_head).
            if self.softmax_scale is None:
                base_head_scale = (
                    1.0 if self.mup_base_head_dim is None else self.mup_base_head_dim**0.5
                )
                self.softmax_scale = base_head_scale / (self.kv_channels**self.mup_attn_scale_power)

            # MuP output scaling: scale logits by 1/width_mult to keep outputs O(1).
            # Only auto-set if user hasn't explicitly configured it.
            if self.mup_output_mult == 1.0 and self.mup_width_mult != 1.0:
                self.mup_output_mult = 1.0 / self.mup_width_mult

            overridden_init_methods = []
            if self.init_method is not None:
                overridden_init_methods.append("init_method")
            if self.output_layer_init_method is not None:
                overridden_init_methods.append("output_layer_init_method")
            if overridden_init_methods:
                overridden_init_methods_text = " and ".join(overridden_init_methods)
                verb = "is" if len(overridden_init_methods) == 1 else "are"
                warnings.warn(
                    "use_mup is enabled, but custom "
                    + overridden_init_methods_text
                    + f" {verb} set. This may break MuP initialization assumptions.",
                    UserWarning,
                )

        # Set the embedding init method.
        # NOTE: This block must run AFTER the MuP block above but BEFORE the init_method
        # block below. When MuP is enabled and init_method is None (the common case),
        # embedding_init_method is set here using the unscaled init_method_std, while
        # init_method (set below) gets MuP width-scaling. This ordering ensures embeddings
        # use the base (unscaled) initialization as required by MuP.
        if self.embedding_init_method_std is None:
            # By default, use the same init std as you use for every other non-output layer.
            self.embedding_init_method_std = self.init_method_std

        if self.embedding_init_method is None:
            if self.init_method is None or (self.embedding_init_method_std != self.init_method_std):
                # In this case, we set both the init method and the embedding init method to
                #  whatever std value requested (or defaulted) for the embedding_init_layer
                self.embedding_init_method = init_method_normal(self.embedding_init_method_std)
            else:
                # Replicate the current behavior where if you are not changing the std of the
                #  embedding init differently and the init method is set, we fallback to the
                #  init method for this layer. Since we are here after an OR we know that
                #  init_method is not None
                self.embedding_init_method = self.init_method

        if self.init_method is None:
            if self.use_mup:
                # MuP: scale std by 1/sqrt(width_mult).
                self.init_method = init_method_normal(
                    self.init_method_std / math.sqrt(self.mup_width_mult)
                )
            else:
                self.init_method = init_method_normal(self.init_method_std)

        if self.output_layer_init_method is None:
            if self.use_mup:
                # MuP: depth and width scaling for output layers.
                self.output_layer_init_method = mup_scaled_init_method_normal(
                    self.init_method_std,
                    self.num_layers,
                    self.mup_width_mult,
                    multiplier=2.0 if not self.is_hybrid_model else 1.0,
                )
            else:
                self.output_layer_init_method = scaled_init_method_normal(
                    self.init_method_std,
                    self.num_layers,
                    multiplier=2.0 if not self.is_hybrid_model else 1.0,
                )

        if self.num_moe_experts is not None and self.add_bias_linear:
            assert (
                self.expert_tensor_parallel_size == 1
            ), "Bias in Moe is only supported when ETP==1"

        if self.moe_router_enable_expert_bias and self.moe_router_score_function != "sigmoid":
            raise ValueError(
                "Expert bias for aux-loss-free routing only supports sigmoid score function."
                "Please set --moe-router-score-function sigmoid for sigmoid score function."
            )

        if self.num_moe_experts and self.fp8:
            # TE version below 1.7.0 will raise Error when handle zeros tokens for expert
            if not is_te_min_version("1.7.0.dev0"):
                raise ValueError(
                    "Only transformer-engine>=1.7.0 supports MoE FP8 training, "
                    f"but your version is {get_te_version()}."
                )

            if self.moe_grouped_gemm and not is_te_min_version("1.11.0"):
                raise ValueError(
                    "Only transformer-engine>=1.11.0 supports FP8 grouped gemm, "
                    f"but your version is {get_te_version()}."
                )

        if self.moe_router_padding_for_fp8:
            # enable moe_router_padding_for_quantization
            warnings.warn(
                "--moe-router-padding-for-fp8 is going to be deprecated. "
                "Use --moe-router-padding-for-quantization instead."
            )
            self.moe_router_padding_for_quantization = True

        if self.moe_router_padding_for_quantization:
            if self.fp8 is None and self.fp4 is None:
                raise ValueError(
                    "fp8/fp4 must be specified when moe_router_padding_for_quantization is True."
                )

            if self.moe_token_dispatcher_type in ["allgather", "alltoall_seq"]:
                raise ValueError(
                    "allgather and alltoall_seq dispatcher does not support "
                    "moe_router_padding_for_quantization."
                )

        if (
            self.moe_router_topk == 1
            and self.moe_router_score_function == "softmax"
            and not self.moe_router_pre_softmax
            and self.moe_router_load_balancing_type != "sinkhorn"
        ):
            # Requires applying softmax before selecting the top-k when k is 1,
            # since softmax on a [num_tokens, 1] would yield a zero gradient.
            raise ValueError("Please use --moe-router-pre-softmax when topk is 1.")

        if self.moe_router_group_topk:
            if self.moe_router_topk_limited_devices:
                raise ValueError(
                    "moe_router_topk_limited_devices is deprecated and replaced by "
                    "moe_router_group_topk and moe_router_num_groups."
                )
            if not self.moe_router_num_groups:
                raise ValueError(
                    "When using group limited routing, moe_router_num_groups must be specified."
                )
            else:
                assert self.num_moe_experts % self.moe_router_num_groups == 0, (
                    f"num_moe_experts ({self.num_moe_experts}) should be divisible by "
                    f"moe_router_num_groups ({self.moe_router_num_groups})."
                )
                assert self.moe_router_group_topk <= self.moe_router_num_groups, (
                    f"moe_router_group_topk ({self.moe_router_group_topk}) should be smaller than "
                    f"moe_router_num_groups ({self.moe_router_num_groups})."
                )
        elif self.moe_router_topk_limited_devices:
            warnings.warn(
                "moe_router_topk_limited_devices is deprecated. Use moe_router_group_topk and "
                "moe_router_num_groups instead."
            )
            self.moe_router_group_topk = self.moe_router_topk_limited_devices
            self.moe_router_num_groups = self.expert_model_parallel_size

        if self.enable_cuda_graph or self.external_cuda_graph:
            assert (
                self.cuda_graph_impl == "none"
            ), "Do not use enable_cuda_graph or external_cuda_graph with cuda_graph_impl."
            assert (
                not self.enable_cuda_graph or not self.external_cuda_graph
            ), "enable_cuda_graph and external_cuda_graph cannot be enabled at the same time."

            if self.enable_cuda_graph:
                warnings.warn('enable_cuda_graph is deprecated, use cuda_graph_impl=local instead.')
                self.cuda_graph_impl = "local"
            if self.external_cuda_graph:
                warnings.warn(
                    'external_cuda_graph is deprecated, '
                    'use cuda_graph_impl=transformer_engine instead.'
                )
                self.cuda_graph_impl = "transformer_engine"

        if self.cuda_graph_scope is None:
            self.cuda_graph_scope = []
        elif not isinstance(self.cuda_graph_scope, list):
            if isinstance(self.cuda_graph_scope, CudaGraphScope):
                self.cuda_graph_scope = [self.cuda_graph_scope]
            else:
                assert isinstance(self.cuda_graph_scope, str), (
                    "cuda_graph_scope must be a string that can be converted to a list of "
                    f"CudaGraphScope, got {self.cuda_graph_scope}."
                )
                self.cuda_graph_scope = self.cuda_graph_scope.split(',')
        if all(isinstance(scope, str) for scope in self.cuda_graph_scope):
            # Backward compatibility for "full" scope. Now we use an empty list instead.
            if "full" in self.cuda_graph_scope:
                assert self.cuda_graph_scope == [
                    "full"
                ], "full scope cannot be used with other scopes."
                warnings.warn(
                    "full scope is deprecated. "
                    "Use empty cuda_graph_scope to capture the whole layer."
                )
                self.cuda_graph_scope = []
            else:
                self.cuda_graph_scope = [CudaGraphScope[scope] for scope in self.cuda_graph_scope]
        assert all(
            isinstance(scope, CudaGraphScope) for scope in self.cuda_graph_scope
        ), f"cuda_graph_scope must be a list of CudaGraphScope, got {self.cuda_graph_scope}."

        if self.cuda_graph_impl != "none":
            assert self.cuda_graph_impl in [
                "transformer_engine",
                "local",
            ], f"Invalid cuda graph implementation: {self.cuda_graph_impl}"

            if self.cpu_offloading:
                raise ValueError("CUDA graphs not supported with CPU offloading.")

            if self.cuda_graph_impl == "local":
                # local impl doesn't currently distinguish between moe_preproocess or moe_router
                # so just set both if either is specified.
                if (
                    CudaGraphScope.moe_router in self.cuda_graph_scope
                    or CudaGraphScope.moe_preprocess in self.cuda_graph_scope
                ):
                    if CudaGraphScope.moe_router not in self.cuda_graph_scope:
                        self.cuda_graph_scope.append(CudaGraphScope.moe_router)
                    if CudaGraphScope.moe_preprocess not in self.cuda_graph_scope:
                        self.cuda_graph_scope.append(CudaGraphScope.moe_preprocess)

            # Check cuda graph scopes
            if self.cuda_graph_impl == "transformer_engine":
                assert CudaGraphScope.full_iteration not in self.cuda_graph_scope, (
                    "To use full iteration cuda graph, please use "
                    "cuda_graph_impl=local instead of cuda_graph_impl=transformer_engine."
                )
            assert (
                CudaGraphScope.moe not in self.cuda_graph_scope
                or CudaGraphScope.moe_router not in self.cuda_graph_scope
            ), 'cuda_graph_scope must not contain both moe and moe_router.'
            if CudaGraphScope.moe_preprocess in self.cuda_graph_scope:
                assert (
                    CudaGraphScope.moe_router in self.cuda_graph_scope
                ), 'moe_preprocess cuda graph is only supported with moe_router cuda graph.'
            if self.num_moe_experts is None or self.num_moe_experts <= 1:
                assert (
                    CudaGraphScope.moe not in self.cuda_graph_scope
                    and CudaGraphScope.moe_router not in self.cuda_graph_scope
                ), 'moe cuda graph is only supported for MoE.'
            else:
                if self.moe_layer_freq == 1 or (
                    isinstance(self.moe_layer_freq, list) and 0 not in self.moe_layer_freq
                ):
                    assert CudaGraphScope.mlp not in self.cuda_graph_scope, (
                        'mlp cuda graph is only supported for dense layers, '
                        'but not found in the model.'
                    )
                if (
                    self.moe_expert_capacity_factor is None
                    or not self.moe_pad_expert_input_to_capacity
                ):
                    assert (
                        CudaGraphScope.moe not in self.cuda_graph_scope
                    ), 'moe cuda graph is only supported with drop-padding MoE.'
                    if self.moe_token_dispatcher_type == 'alltoall' and (
                        self.moe_expert_capacity_factor is not None
                        or self.moe_router_padding_for_fp8
                    ):
                        assert CudaGraphScope.moe_preprocess not in self.cuda_graph_scope, (
                            'moe_preprocess cuda graph is not supported when there are '
                            'DtoH copies and synchronizations in the preprocess step.'
                        )

            if self.recompute_granularity:
                if self.recompute_granularity != "selective":
                    assert self.cuda_graph_scope == [
                        CudaGraphScope.full_iteration
                    ], "full recompute is only supported with full iteration CUDA graph."
                else:
                    # The recompute module should be inside or outside of the graph scope.
                    # Recompute module coverring graph scope is not allowed.
                    if (
                        self.cuda_graph_impl == "transformer_engine"
                        and "moe" in self.recompute_modules
                    ):
                        assert (
                            CudaGraphScope.moe_router not in self.cuda_graph_scope
                        ), "moe recompute is not supported with moe_router CUDA graph with: "
                        "--cuda-graph-impl transformer_engine."

                    # Graphed recompute module doesn't accept random number.
                    if (
                        not self.cuda_graph_scope
                        or CudaGraphScope.full_iteration in self.cuda_graph_scope
                    ):
                        full_cudagraph = True
                    else:
                        full_cudagraph = False
                    if self.attention_dropout != 0.0:
                        assert (
                            not full_cudagraph and CudaGraphScope.attn not in self.cuda_graph_scope
                        ) or "core_attn" not in self.recompute_modules, (
                            "attention dropout is not supported with graphed attention "
                            "recomputation."
                        )
                    if self.hidden_dropout != 0.0:
                        assert (
                            (not full_cudagraph and CudaGraphScope.mlp not in self.cuda_graph_scope)
                            or "mlp" not in self.recompute_modules
                        ) and (
                            (not full_cudagraph and CudaGraphScope.moe not in self.cuda_graph_scope)
                            or "moe" not in self.recompute_modules
                        ), "hidden dropout is not supported with graphed MLP/MoE recomputation."
                    if self.moe_input_jitter_eps is not None:
                        assert (
                            not full_cudagraph and CudaGraphScope.moe not in self.cuda_graph_scope
                        ) or "moe" not in self.recompute_modules, (
                            "moe_input_jitter_eps is not supported with graphed moe recomputation."
                        )

        if self.moe_token_dispatcher_type in ["allgather"]:
            if self.variable_seq_lengths is True:
                raise ValueError(
                    f"Token dispatcher type: {self.moe_token_dispatcher_type} does not support "
                    f"variable sequence length, please use alltoall dispatcher instead."
                )

        if self.moe_permute_fusion:
            from megatron.core.transformer.moe.moe_utils import (
                fused_permute,
                fused_permute_with_probs,
                fused_sort_chunks_by_index,
                fused_sort_chunks_by_index_with_probs,
                fused_unpermute,
            )

            if (
                fused_permute is None
                or fused_permute_with_probs is None
                or fused_sort_chunks_by_index is None
                or fused_sort_chunks_by_index_with_probs is None
                or fused_unpermute is None
            ):
                raise ValueError("fused permutation is not available. Please install TE >= 2.1.0.")

        if self.overlap_moe_expert_parallel_comm:
            # TODO: remove this after we fix the hang issue with torch version < 2.6.0
            assert is_torch_min_version(
                "2.6.0"
            ), "A2A Overlap encounters hang issue with torch version < 2.6.0"
            if self.pipeline_model_parallel_size > 1:
                assert self.virtual_pipeline_model_parallel_size is not None, (
                    "If enabling EP A2A overlap, virtual_pipeline_model_parallel_size "
                    "must be specified when pipeline_model_parallel_size > 1"
                )
            # Expert model parallelism requirements
            assert (
                self.expert_model_parallel_size > 1
            ), 'overlap_moe_expert_parallel_comm is only supported with expert model parallelism'
            assert self.moe_token_dispatcher_type in [
                'alltoall',
                'flex',
            ], 'overlap_moe_expert_parallel_comm is supported with alltoall/flex token dispatcher'

            assert (
                self.recompute_granularity != 'full'
            ), 'disable full recomputation when enabling overlap_moe_expert_parallel_comm'
            assert (
                self.recompute_method is None
            ), 'disable recomputation method when enabling overlap_moe_expert_parallel_comm'
            assert (
                self.recompute_num_layers is None
            ), 'recompute_num_layers must be None when enabling overlap_moe_expert_parallel_comm'
            assert (
                "moe" not in self.recompute_modules
            ), 'disable moe in recompute_modules when enabling overlap_moe_expert_parallel_comm'

            # Check if bf16 or fp16 is used
            assert (
                self.bf16 or self.fp16
            ), 'overlap_moe_expert_parallel_comm is only supported with bf16 or fp16 model'

            assert (
                not self.moe_shared_expert_overlap
            ), 'disable moe_shared_expert_overlap when enabling overlap_moe_expert_parallel_comm'
            assert (
                self.mtp_num_layers is None or self.mtp_num_layers == 1
            ), 'MTP layernum only supports 1 when enabling overlap_moe_expert_parallel_comm.'
            if self.mtp_num_layers == 1:
                assert self.pipeline_model_parallel_size > 1, (
                    'Pipeline model parallel size must be larger than 1 '
                    'when enabling overlap_moe_expert_parallel_comm with MTP layer.'
                )

            if self.cuda_graph_impl != "none":
                assert (
                    self.cuda_graph_impl == "transformer_engine"
                    and CudaGraphScope.moe not in self.cuda_graph_scope
                    and CudaGraphScope.mlp not in self.cuda_graph_scope
                ), (
                    'CUDA graph scope on moe and mlp is not '
                    'supported with overlap_moe_expert_parallel_comm'
                )

        # Check delay_wgrad_compute compatibility
        if self.delay_wgrad_compute:
            assert (
                self.overlap_moe_expert_parallel_comm
            ), 'overlap_moe_expert_parallel_comm must be enabled when enabling delay_wgrad_compute'
            if self.cuda_graph_impl == "transformer_engine":
                assert is_te_min_version("2.10.0"), (
                    'TE version >= 2.10.0 is required for delay_wgrad_compute with '
                    'partial cuda graph'
                )

        if self.ep_overlap_early_attn_memory_release:
            assert self.overlap_moe_expert_parallel_comm, (
                'overlap_moe_expert_parallel_comm must be enabled when enabling '
                'ep_overlap_early_attn_memory_release'
            )

        if self.context_parallel_size > 1 and self.cp_comm_type is not None:
            if isinstance(self.cp_comm_type, list):
                assert len(self.cp_comm_type) == self.num_layers, (
                    f"Length of cp_comm_type ({len(self.cp_comm_type)}) should equal to "
                    f"the total number of transformer layers ({self.num_layers})!"
                )
            else:
                assert isinstance(
                    self.cp_comm_type, str
                ), "Unsupported communication type for context parallelism!"

        assert (
            self.pipeline_model_parallel_size > 0
        ), f"Pipeline model parallel size must be larger than 0 \
            when enable --standalone-embedding-stage and --standalone-loss-stage"

        if (
            self.num_moe_experts is not None
            and self.num_moe_experts >= 32
            and not self.moe_router_dtype
        ):
            warnings.warn(
                "Using a large number of experts (e.g. >=32) without fp32 routing. "
                "Consider enabling moe_router_dtype for better numerical stability."
            )
        if self.symmetric_ar_type is not None:
            if not HAVE_PACKAGING:
                raise ImportError(
                    "packaging is not installed. Please install it with `pip install packaging`."
                )
            assert is_torch_min_version("2.7.0a0"), "Must have at least torch version 2.7 or higher"
            assert is_te_min_version("2.3.0") or get_te_version() == PkgVersion(
                "2.3.0.dev0+39c0e70"
            ), "Must have at least TE version 2.3 or higher to use symmetric memory all reduce"

        if self.no_rope_freq:
            assert not self.flash_decode, "flash_decode cannot be used with no_rope."
            if isinstance(self.no_rope_freq, int):
                assert self.num_layers % self.no_rope_freq == 0, (
                    f"no_rope_freq={self.no_rope_freq} should be "
                    f"divisible by num_layers={self.num_layers}."
                )
                # Convert integer pattern to list pattern
                # e.g. no_rope=4 with num_layers=8 becomes [0,0,0,1,0,0,0,1]
                pattern = [0] * (self.no_rope_freq - 1) + [1]
                self.no_rope_freq = pattern * (self.num_layers // self.no_rope_freq)
            else:
                assert len(self.no_rope_freq) == self.num_layers, (
                    f"Length of no_rope list ({len(self.no_rope_freq)}) must match "
                    f"the number of layers ({self.num_layers})"
                )

        if self.transformer_impl == "inference_optimized":
            assert self.normalization == "RMSNorm"
            assert not self.layernorm_zero_centered_gamma
            assert not self.add_bias_linear
            assert not self.add_qkv_bias
            assert not self.use_kitchen

        if self.experimental_attention_variant == "dsa":
            assert (
                self.context_parallel_size == 1
            ), "Currently context parallelism is not supported by DSAttention!"
            assert not self.apply_rope_fusion, "RoPE fusion is not supported for DSAttention"

        if self.inference_fuse_tp_communication:
            assert self.transformer_impl == "inference_optimized", (
                "inference_fuse_tp_communication is only supported "
                "for inference_optimized transformer implementation."
            )
            assert (
                self.num_moe_experts is None
            ), "--inference-fuse-tp-communication is not supported for MoE models."

        if self.inference_disable_triton_nvls_kernels:
            assert self.transformer_impl == "inference_optimized", (
                "inference_disable_triton_nvls_kernels is only supported "
                "for inference_optimized transformer implementation."
            )

        if self.batch_invariant_mode:
            assert (
                self.attention_backend == AttnBackend.flash
            ), "Batch invariant mode only supports FlashAttention"


@dataclass
class MLATransformerConfig(TransformerConfig):
    """Configuration object for megatron-core Multi-Latent Attention (MLA) transformers.

    The initialization function has an argument for each parameter, including those in
    ModelParallelConfig. Included YaRN RoPE parameters that is fused in MLA.
    """

    multi_latent_attention: bool = True
    """Whether to use Multi-Latent Attention."""

    q_lora_rank: int = 512
    """Rank of Query tensor's low rank representation."""

    kv_lora_rank: int = 512
    """Rank of Key and Value tensors' low rank representation."""

    qk_head_dim: int = 128
    """Dimension of the head in the QK projection. q_head_dim = qk_head_dim + qk_pos_emb_head_dim"""

    qk_pos_emb_head_dim: int = 64
    """Dimension of the position embedding in the QK projection."""

    v_head_dim: int = 128
    """Dimension of the head in the V projection."""

    normalization: str = "RMSNorm"
    """Default normalization layer for MLA models is RMSNorm."""

    rope_type: str = "yarn"
    """Type of RoPE to use. Default to yarn, options are rope and yarn."""

    rotary_base: float = 10000
    """Rotary base for the rotary embeddings, used by rope and yarn."""

    rotary_percent: float = 1.0
    """Rotary percent for the rotary embeddings, used by rope."""

    rotary_scaling_factor: float = 40
    """Rotary scaling factor for the rotary embeddings, used by yarn."""

    original_max_position_embeddings: int = 4096
    """Original maximum position embeddings for the original model, used by yarn."""

    beta_fast: float = 32
    """Beta fast for YaRN RoPE, used by yarn."""

    beta_slow: float = 1
    """Beta slow for YaRN RoPE, used by yarn."""

    mscale: float = 1.0
    """Mscale for YaRN RoPE in Multi-Latent Attention, used by yarn."""

    mscale_all_dim: float = 0.0
    """Mscale all dimensions for YaRN RoPE in Multi-Latent Attention, used by yarn."""

    cache_mla_latents: bool = False
    """Cache the low dimensional tensors for MLA rather than full KV cache.
       This is only for the dynamic inference backend and requires that 
       Flash MLA is installed."""

    mla_down_proj_fusion: bool = False
    """Enable fused q/kv down-projection and fused input layernorm when backend supports.
       Otherwise fall back to the unfused MLA.
    """

    def __post_init__(self):
        super().__post_init__()
        if self.multi_latent_attention and self.apply_rope_fusion and self.rope_type != "yarn":
            raise ValueError("apply_rope_fusion for MLA only works with YARN RoPE.")

        if self.attention_output_gate:
            raise NotImplementedError("Output gate is not supported for MLA yet.")

        if self.cache_mla_latents:
            assert (
                self.apply_rope_fusion is False
            ), "Rope Fusion is not compatible with caching latents"


================================================
FILE: megatron/core/transformer/transformer_layer.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
from __future__ import annotations

import functools
import logging
import warnings
from abc import ABC
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Dict, Optional, Union

import torch
import torch.distributed
from torch import Tensor

from megatron.core import parallel_state, tensor_parallel
from megatron.core.dist_checkpointing.mapping import ShardedStateDict
from megatron.core.dist_checkpointing.utils import apply_prefix_mapping
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.transformer.cuda_graphs import is_graph_capturing
from megatron.core.transformer.enums import CudaGraphScope, LayerType
from megatron.core.transformer.identity_op import IdentityFuncOp, IdentityOp
from megatron.core.transformer.mlp import MLP
from megatron.core.transformer.module import GraphableMegatronModule
from megatron.core.transformer.spec_utils import ModuleSpec, build_module
from megatron.core.transformer.torch_norm import LayerNormBuilder
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.typed_torch import apply_module, copy_signature
from megatron.core.utils import (
    deprecate_inference_params,
    get_pg_rank,
    is_te_min_version,
    log_single_rank,
    make_viewless_tensor,
    nvtx_range_pop,
    nvtx_range_push,
)

if TYPE_CHECKING:
    from megatron.core.inference.contexts import BaseInferenceContext

logger = logging.getLogger(__name__)


def get_transformer_layer_offset(
    config: TransformerConfig, vp_stage: Optional[int] = None, pp_rank: Optional[int] = None
):
    """Get the index offset of current pipeline stage, given the level of pipelining."""
    if pp_rank is None:
        pp_rank = parallel_state.get_pipeline_model_parallel_rank()

    is_first_pp_stage = pp_rank == 0

    if config.pipeline_model_parallel_size > 1:

        if config.pipeline_model_parallel_layout:
            offset = config.pipeline_model_parallel_layout.get_layer_offset(
                layer_type=LayerType.decoder, vp_stage=vp_stage
            )
        elif (
            config.num_layers_in_first_pipeline_stage is not None
            or config.num_layers_in_last_pipeline_stage is not None
        ):
            # Calculate number of pipeline stages to distribute the remaining Transformer
            # layers after deducting the Transformer layers in the first or the last stages
            middle_pipeline_stages = config.pipeline_model_parallel_size
            middle_pipeline_stages -= sum(
                [
                    1 if x is not None else 0
                    for x in (
                        config.num_layers_in_first_pipeline_stage,
                        config.num_layers_in_last_pipeline_stage,
                    )
                ]
            )

            # Calculate layers to distribute in each pipeline stage. If the
            # num_layers_in_first_pipeline_stage and num_layers_in_last_pipeline_stage
            # are not set, we will not enable uneven pipeline. All layers will be treated
            # as middle layers.
            num_layers_in_first_pipeline_stage = (
                0
                if config.num_layers_in_first_pipeline_stage is None
                else config.num_layers_in_first_pipeline_stage
            )
            num_layers_in_last_pipeline_stage = (
                0
                if config.num_layers_in_last_pipeline_stage is None
                else config.num_layers_in_last_pipeline_stage
            )

            middle_num_layers = (
                config.num_layers
                - num_layers_in_first_pipeline_stage
                - num_layers_in_last_pipeline_stage
            )

            middle_pipeline_rank = (
                pp_rank if config.num_layers_in_first_pipeline_stage is None else pp_rank - 1
            )

            if (vp_size := config.virtual_pipeline_model_parallel_size) is not None:
                assert (
                    vp_stage is not None
                ), "vp_stage must be provided if virtual pipeline model parallel size is set"

                # Calculate number of layers in each virtual model chunk
                # If the num_layers_in_first_pipeline_stage and
                # num_layers_in_last_pipeline_stage are not set, all pipeline stages
                # will be treated as middle pipeline stages in the calculation
                num_layers_per_virtual_model_chunk_in_first_pipeline_stage = (
                    0
                    if config.num_layers_in_first_pipeline_stage is None
                    else config.num_layers_in_first_pipeline_stage // vp_size
                )

                num_layers_per_virtual_model_chunk_in_last_pipeline_stage = (
                    0
                    if config.num_layers_in_last_pipeline_stage is None
                    else config.num_layers_in_last_pipeline_stage // vp_size
                )

                num_layers_per_virtual_model_chunk_in_middle_pipeline_stage = (
                    middle_num_layers // vp_size
                )

                # First stage + middle stage + last stage
                total_virtual_chunks = (
                    num_layers_per_virtual_model_chunk_in_first_pipeline_stage
                    + num_layers_per_virtual_model_chunk_in_middle_pipeline_stage
                    + num_layers_per_virtual_model_chunk_in_last_pipeline_stage
                )

                # Calculate the layer offset with interleaved uneven pipeline parallelism
                if pp_rank == 0:
                    offset = vp_stage * total_virtual_chunks
                else:
                    offset = (
                        vp_stage * total_virtual_chunks
                        + num_layers_per_virtual_model_chunk_in_first_pipeline_stage
                        + middle_pipeline_rank
                        * (
                            num_layers_per_virtual_model_chunk_in_middle_pipeline_stage
                            // middle_pipeline_stages
                        )
                    )
            else:
                if middle_pipeline_stages > 0:
                    num_layers_per_pipeline_rank = middle_num_layers // middle_pipeline_stages
                else:
                    num_layers_per_pipeline_rank = 0

                if pp_rank == 0:
                    offset = 0
                else:
                    offset = (
                        middle_pipeline_rank * num_layers_per_pipeline_rank
                    ) + num_layers_in_first_pipeline_stage
        else:
            num_layers = config.num_layers

            # Increase the number of layers by one if we include the embedding (loss)
            # layer into pipeline parallelism partition and placement
            if config.account_for_embedding_in_pipeline_split:
                num_layers += 1

            if config.account_for_loss_in_pipeline_split:
                num_layers += 1

            num_layers_per_pipeline_rank = num_layers // config.pipeline_model_parallel_size

            # import here to avoid circular import
            from megatron.core.pipeline_parallel.utils import is_vp_first_stage

            if (vp_size := config.virtual_pipeline_model_parallel_size) is not None:
                assert (
                    vp_stage is not None
                ), "vp_stage must be provided if virtual pipeline model parallel size is set"

                num_layers_per_virtual_rank = num_layers_per_pipeline_rank // vp_size
                total_virtual_chunks = num_layers // vp_size
                offset = vp_stage * total_virtual_chunks + (pp_rank * num_layers_per_virtual_rank)

                # Reduce the offset of embedding layer from the total layer number
                if config.account_for_embedding_in_pipeline_split and not (
                    is_vp_first_stage(vp_stage, vp_size) and is_first_pp_stage
                ):
                    offset -= 1
            else:
                offset = pp_rank * num_layers_per_pipeline_rank

                # Reduce the offset of embedding layer from the total layer number
                if config.account_for_embedding_in_pipeline_split and not (
                    is_vp_first_stage(vp_stage, vp_size) and is_first_pp_stage
                ):
                    offset -= 1
    else:
        offset = 0
    return offset


@dataclass
class TransformerLayerSubmodules:
    """
    Configuration class for specifying the submodules of a transformer layer.

    This class defines the structure and default implementations for various
    components of a transformer layer, allowing for flexible customization
    of the layer's architecture.

    Args:
        input_layernorm: Specification for the input layer normalization.
        self_attention (Union[ModuleSpec, type]): Specification for the self-attention mechanism.
        self_attn_bda (Union[ModuleSpec, type]): Specification for the bias-dropout-add operation
            after self-attention.
        pre_cross_attn_layernorm: Specification for the layer
            normalization before cross-attention.
        cross_attention (Union[ModuleSpec, type]): Specification for the cross-attention mechanism.
        cross_attn_bda (Union[ModuleSpec, type]): Specification for the bias-dropout-add operation
            after cross-attention.
        pre_mlp_layernorm: Specification for the layer normalization
            before the MLP.
        mlp (Union[ModuleSpec, type]): Specification for the MLP in Dense layer.
        mlp_bda (Union[ModuleSpec, type]): Specification for the bias-dropout-add operation
            after the MLP.
        sharded_state_dict_keys_map (Dict[str, str]): Mapping for sharded tensor keys to be applied
            in the `sharded_state_dict` method.
    """

    input_layernorm: LayerNormBuilder = IdentityOp
    self_attention: Union[ModuleSpec, type] = IdentityOp
    self_attn_bda: Union[ModuleSpec, type] = IdentityFuncOp

    pre_cross_attn_layernorm: LayerNormBuilder = IdentityOp
    cross_attention: Union[ModuleSpec, type] = IdentityOp
    cross_attn_bda: Union[ModuleSpec, type] = IdentityFuncOp

    pre_mlp_layernorm: LayerNormBuilder = IdentityOp
    mlp: Union[ModuleSpec, type] = IdentityOp
    mlp_bda: Union[ModuleSpec, type] = IdentityFuncOp

    # Mapping for sharded tensor keys to be applied in `sharded_state_dict` method
    sharded_state_dict_keys_map: Dict[str, str] = field(default_factory=dict)


class BaseTransformerLayer(ABC):
    """A common parent class for `TransformerLayer` like implementations.

    A dummy class that is subclassed by similar `TransformerLayer`s e.g. the
    `TransformerLayer` in this file and possibly other `TransformerLayer`
    implementations that aim to use `TransformerBlock` as the base module.
    The main purpose is to check if any layer (or module) provided in the spec
    is a subclass of this class to allow fanning-out of that spec for all the
    layers in the `TransformerBlock`. See `_get_block_submodules` method
    implementation in `transformer_block.py` file for more details.
    """

    def __init__(self):
        pass


class TransformerLayer(GraphableMegatronModule, BaseTransformerLayer):
    """A single transformer layer.

    Transformer layer takes input with size [s, b, h] and returns an
    output of the same size.
    """

    def __init__(
        self,
        config: TransformerConfig,
        submodules: TransformerLayerSubmodules,
        layer_number: int = 1,
        hidden_dropout: Optional[float] = None,
        pg_collection: Optional[ProcessGroupCollection] = None,
        vp_stage: Optional[int] = None,
        is_mtp_layer: bool = False,
        add_layer_offset: bool = True,
        pp_layer_offset: Optional[int] = None,
    ):
        self.submodules_config = submodules
        super().__init__(config=config, vp_stage=vp_stage)

        if pg_collection is None:
            pg_collection = ProcessGroupCollection.use_mpu_process_groups()
        self.pg_collection = pg_collection
        self.tp_group = pg_collection.tp

        # MTP inner layers use their own layer numbering (starting from 1 within each MTP depth),
        # so they should NOT add the decoder layer offset. The router.py handles MTP layer
        # numbering separately by adding config.num_layers to distinguish MTP layers from decoder
        # layers in the aux loss tracker.
        #
        # When add_layer_offset is False, the caller has already included the correct offset
        # in layer_number (e.g. when using --hybrid-layer-pattern with fVPP).
        if is_mtp_layer or not add_layer_offset:
            self.layer_number = layer_number
        else:
            self.layer_number = layer_number + get_transformer_layer_offset(
                self.config, vp_stage, get_pg_rank(pg_collection.pp)
            )
        self.hidden_dropout = config.hidden_dropout if hidden_dropout is None else hidden_dropout
        self.is_mtp_layer = is_mtp_layer

        # [Module 1: Input Layernorm] Optional Layernorm on the input data
        # TODO: add pytorch only layernorm
        self.input_layernorm = submodules.input_layernorm(
            config=self.config,
            hidden_size=self.config.hidden_size,
            eps=self.config.layernorm_epsilon,
        )

        attention_optional_kwargs = {}
        if config.context_parallel_size > 1 and config.cp_comm_type is not None:
            if isinstance(config.cp_comm_type, list):
                # layer_number is 1-indexed, so we need to subtract 1 to get the correct index
                attention_optional_kwargs["cp_comm_type"] = config.cp_comm_type[
                    self.layer_number - 1
                ]
            else:
                attention_optional_kwargs["cp_comm_type"] = config.cp_comm_type

        attention_optional_kwargs["pg_collection"] = pg_collection
        if pp_layer_offset is not None:
            attention_optional_kwargs["pp_layer_offset"] = pp_layer_offset

        # [Module 2: SelfAttention]
        self.self_attention = build_module(
            submodules.self_attention,
            config=self.config,
            layer_number=self.layer_number,
            **attention_optional_kwargs,
        )

        # [Module 3: BiasDropoutFusion]
        self.self_attn_bda = build_module(submodules.self_attn_bda)

        # [Module 4: Post SelfAttention] Optional Layernorm after self-attn
        self.pre_cross_attn_layernorm = submodules.pre_cross_attn_layernorm(
            config=self.config,
            hidden_size=self.config.hidden_size,
            eps=self.config.layernorm_epsilon,
        )

        # [Module 5: CrossAttention]
        self.cross_attention = build_module(
            submodules.cross_attention,
            config=self.config,
            layer_number=self.layer_number,
            **attention_optional_kwargs,
        )

        # [Module 6: BiasDropoutFusion]
        self.cross_attn_bda = build_module(submodules.cross_attn_bda, config=self.config)

        # [Module 7: Pre MLP] Optional Layernorm before MLP
        self.pre_mlp_layernorm = submodules.pre_mlp_layernorm(
            config=self.config,
            hidden_size=self.config.hidden_size,
            eps=self.config.layernorm_epsilon,
        )
        # [Module 8: MLP block]
        additional_mlp_kwargs = {}
        # import here to avoid circular import
        from megatron.core.extensions.transformer_engine import TEFusedMLP
        from megatron.core.transformer.moe.experts import SequentialMLP, TEGroupedMLP
        from megatron.core.transformer.moe.moe_layer import MoELayer

        # MLP expects tp_group but MoELayer expects pg_collection to be passed in.
        # We can change MLP to accept pg_collection but it makes the logic implicit
        # The conditional below is to make the logic explicit
        # if submodules.mlp is not a ModuleSpec,we dont have to handle passing additional kwargs
        if isinstance(submodules.mlp, ModuleSpec):
            if submodules.mlp.module in (MoELayer, TEGroupedMLP, SequentialMLP):
                additional_mlp_kwargs["pg_collection"] = pg_collection
                # Pass is_mtp_layer flag to MoELayer to distinguish MTP MoE layers.
                if submodules.mlp.module == MoELayer:
                    additional_mlp_kwargs["is_mtp_layer"] = self.is_mtp_layer
            elif submodules.mlp.module == MLP:
                assert hasattr(
                    pg_collection, 'tp'
                ), 'TP process group is required for MLP in TransformerLayer'
                additional_mlp_kwargs["tp_group"] = pg_collection.tp
            elif TEFusedMLP is not None and submodules.mlp.module == TEFusedMLP:
                assert hasattr(
                    pg_collection, 'tp'
                ), 'TP process group is required for TEFusedMLP in TransformerLayer'
                additional_mlp_kwargs["tp_group"] = pg_collection.tp
            else:
                log_single_rank(
                    logger,
                    logging.WARNING,
                    f"Unknown MLP type: {type(submodules.mlp)}. Using default kwargs.",
                )
        self.mlp = build_module(submodules.mlp, config=self.config, **additional_mlp_kwargs)
        if hasattr(self.mlp, 'set_layer_number'):
            self.mlp.set_layer_number(self.layer_number)

        # [Module 9: BiasDropoutFusion]
        self.mlp_bda = build_module(submodules.mlp_bda)

        self.is_moe_layer = isinstance(self.mlp, MoELayer)

        self.recompute_input_layernorm = False
        self.recompute_pre_mlp_layernorm = False
        self.recompute_mlp = False
        if self.config.recompute_granularity == 'selective':
            assert self.config.recompute_modules is not None
            if "layernorm" in self.config.recompute_modules:
                if not isinstance(self.input_layernorm, IdentityOp):
                    self.recompute_input_layernorm = True
                    if self.config.fp8 or self.config.fp4:
                        self.self_attention.set_for_recompute_input_layernorm()

                def can_recompute_pre_mlp_layernorm_for_cudagraph():
                    if (
                        not self.is_moe_layer
                        or CudaGraphScope.moe_router not in self.config.cuda_graph_scope
                        or self.config.cuda_graph_impl == "local"
                    ):
                        # Not a MoE layer, or not capturing the router part.
                        return True
                    if (
                        self.config.moe_shared_expert_intermediate_size is not None
                        and self.config.moe_shared_expert_overlap
                    ):
                        # If shared expert overlap is used, we cannot make the pre-mlp layernorm
                        # recomputation, because the shared expert takes the layernorm output as
                        # input, and it is outside of the CUDA graph scope.
                        log_single_rank(
                            logger,
                            logging.WARNING,
                            "pre_mlp_layernorm recompute is not supported with moe router "
                            "cudagraph + shared expert overlap. Disabling pre_mlp_layernorm "
                            "recompute.",
                        )
                        return False
                    if CudaGraphScope.moe_preprocess in self.config.cuda_graph_scope and (
                        self.config.moe_token_dispatcher_type == "alltoall"
                        or self.config.moe_latent_size
                    ):
                        # Only when capturing the preprocess part and using alltoall token
                        # dispatcher or latent MoE can we make the pre-mlp layernorm recomputation.
                        # Because in other cases the layernorm output returns directly as one of the
                        # outputs of the cudagraph, which will be allocated a static buffer, thus
                        # not able to be released.
                        return True
                    log_single_rank(
                        logger,
                        logging.WARNING,
                        "pre_mlp_layernorm recompute is only supported with moe router + "
                        "preprocess cudagraph will alltoall token dispatcher or latent MoE. "
                        "Disabling pre_mlp_layernorm recompute.",
                    )
                    return False

                if (
                    not isinstance(self.pre_mlp_layernorm, IdentityOp)
                    and can_recompute_pre_mlp_layernorm_for_cudagraph()
                ):
                    self.recompute_pre_mlp_layernorm = True
                    if self.config.fp8 or self.config.fp4:
                        if isinstance(self.mlp, MoELayer):
                            self.mlp.set_for_recompute_pre_mlp_layernorm()
                        else:
                            from megatron.core.extensions.transformer_engine import (
                                set_save_original_input,
                            )

                            set_save_original_input(self.mlp.linear_fc1)
            if "mlp" in self.config.recompute_modules:
                if not self.is_moe_layer:
                    self.recompute_mlp = True
        self.offload_attn_norm = (
            self.config.fine_grained_activation_offloading
            and "attn_norm" in self.config.offload_modules
            and not isinstance(self.input_layernorm, IdentityOp)
        )
        self.offload_mlp_norm = (
            self.config.fine_grained_activation_offloading
            and "mlp_norm" in self.config.offload_modules
            and not isinstance(self.pre_mlp_layernorm, IdentityOp)
        )

        # @jcasper how should we handle nvfuser?
        # Set bias+dropout+add fusion grad_enable execution handler.
        # TORCH_MAJOR = int(torch.__version__.split('.')[0])
        # TORCH_MINOR = int(torch.__version__.split('.')[1])
        # use_nvfuser = TORCH_MAJOR > 1 or (TORCH_MAJOR == 1 and TORCH_MINOR >= 10)
        # self.bias_dropout_add_exec_handler = nullcontext if use_nvfuser else torch.enable_grad
        self.bias_dropout_add_exec_handler = torch.enable_grad

    def create_mcore_cudagraph_manager(self, config):
        """Register the transformer layer for cudagraphs."""

        from megatron.core.transformer.cuda_graphs import CudaGraphManager

        # If full scope, just cudagraph the entire layer
        if not self.config.cuda_graph_scope:
            self.cudagraph_manager = CudaGraphManager(config)
        elif (
            CudaGraphScope.attn in self.config.cuda_graph_scope
            and self.submodules_config.self_attention != IdentityOp
        ):
            self.cudagraph_manager = CudaGraphManager(config)
        elif (
            CudaGraphScope.mlp in self.config.cuda_graph_scope
            and self.submodules_config.mlp != IdentityOp
        ):
            # Cudagraphing MoE layers are supposed handled by MoeTransforerLayer
            assert not self.is_moe_layer
            self.cudagraph_manager = CudaGraphManager(config)

    @staticmethod
    def _get_layer_offset(config: TransformerConfig):
        """
        Get the layer offset for the current pipeline stage.

        Deprecated: please use `get_transformer_layer_offset` instead.
        """

        warnings.warn(
            "TransformerLayer._get_layer_offset is deprecated."
            "Please use get_transformer_layer_offset instead."
        )
        return get_transformer_layer_offset(config)

    def _forward_attention(
        self,
        hidden_states: Tensor,
        attention_mask: Optional[Tensor] = None,
        context: Optional[Tensor] = None,
        context_mask: Optional[Tensor] = None,
        rotary_pos_emb: Optional[Tensor] = None,
        rotary_pos_cos: Optional[Tensor] = None,
        rotary_pos_sin: Optional[Tensor] = None,
        rotary_pos_cos_sin: Optional[Tensor] = None,
        attention_bias: Optional[Tensor] = None,
        inference_context: Optional[BaseInferenceContext] = None,
        packed_seq_params: Optional[PackedSeqParams] = None,
        sequence_len_offset: Optional[Tensor] = None,
        padding_mask: Optional[Tensor] = None,
        *,
        inference_params: Optional[Any] = None,
    ):
        """
        Perform a forward pass through the attention layer and the layernorms before and after
        the attention operations.

        Args:
            hidden_states (Tensor): Input tensor of shape [s, b, h] where s is sequence length,
                b is batch size, and h is hidden size.
            attention_mask (Tensor): Mask tensor for self-attention.
            context (Tensor, optional): Context tensor for cross-attention.
            context_mask (Tensor, optional): Mask tensor for cross-attention.
            rotary_pos_emb (Tensor, optional): Rotary positional embeddings.
            rotary_pos_cos (Optional[Tensor]): Rotary embedding cosine.
            rotary_pos_sin (Optional[Tensor]): Rotary embedding sine.
            rotary_pos_cos_sin (Optional[Tensor]): Combined rotary embedding cosine and sine.
            Currently used exclusively for inference with dynamic batching and flashinfer RoPE.
            attention_bias (Tensor, optional): Bias tensor for Q * K.T.
            inference_context (object, optional): Parameters for inference-time optimizations.
            packed_seq_params (object, optional): Parameters for packed sequence processing.
            sequence_len_offset (Tensor, optional): Offset along sequence dimension
                during inference.

        Returns:
            Tuple[Tensor, Tensor]: A tuple containing:
                hidden_states (Tensor): Transformed hidden states before the MLP layernorm.
                context (Tensor): Updated context tensor if cross-attention is used,
                otherwise None.
        """
        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
            FineGrainedActivationOffloadingInterface as off_interface,
        )

        inference_context = deprecate_inference_params(inference_context, inference_params)

        # Optional Input Layer norm
        if self.recompute_input_layernorm:
            self.input_layernorm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
            with off_interface(self.offload_attn_norm, hidden_states, "attn_norm") as hidden_states:
                input_layernorm_output = self.input_layernorm_checkpoint.checkpoint(
                    apply_module(self.input_layernorm), hidden_states
                )
        else:
            with off_interface(self.offload_attn_norm, hidden_states, "attn_norm") as hidden_states:
                input_layernorm_output = apply_module(self.input_layernorm)(hidden_states)

        if isinstance(input_layernorm_output, tuple):
            if len(input_layernorm_output) != 2:
                raise ValueError(
                    f"When the output of input_layernorm is a tuple, it is "
                    f"expected to have 2 elements (output, residual), but "
                    f"got {len(input_layernorm_output)}"
                )
            input_layernorm_output, residual = input_layernorm_output
        else:
            residual = hidden_states

        if self.config.fp32_residual_connection:
            residual = residual.float()

        using_fused_tp_inference_kernel = (not self.training) and (
            self.config.inference_fuse_tp_communication
        )

        if using_fused_tp_inference_kernel:
            # Set the residual for fused reduce-scatter + add + layer-norm + all-gather
            # operation in attention's out_proj (linear_proj)
            self._set_proj_residual(residual)

        # Self attention.
        nvtx_range_push(suffix="self_attention")
        attention_output_with_bias = self.self_attention(
            input_layernorm_output,
            attention_mask=attention_mask,
            inference_context=inference_context,
            rotary_pos_emb=rotary_pos_emb,
            rotary_pos_cos=rotary_pos_cos,
            rotary_pos_sin=rotary_pos_sin,
            rotary_pos_cos_sin=rotary_pos_cos_sin,
            attention_bias=attention_bias,
            packed_seq_params=packed_seq_params,
            sequence_len_offset=sequence_len_offset,
        )
        nvtx_range_pop(suffix="self_attention")

        if self.recompute_input_layernorm:
            # discard the output of the input layernorm and register the recompute
            # as a gradient hook of attention_output_with_bias[0]
            self.input_layernorm_checkpoint.discard_output_and_register_recompute(
                attention_output_with_bias[0]
            )

        # TODO: could we move `bias_dropout_add_exec_handler` itself
        # inside the module provided in the `bias_dropout_add_spec` module?
        nvtx_range_push(suffix="self_attn_bda")
        if using_fused_tp_inference_kernel:
            # In inference optimized transformer layer, there is no bias and dropout
            # The remaining residual add is already handled inside the
            # self attention module.
            hidden_states = attention_output_with_bias[0]
        else:
            with self.bias_dropout_add_exec_handler():
                hidden_states = self.self_attn_bda(self.training, self.config.bias_dropout_fusion)(
                    attention_output_with_bias, residual, self.hidden_dropout
                )
        nvtx_range_pop(suffix="self_attn_bda")

        # Delay the offload of the attention norm until after the self_attn_bda has been computed
        # because the residual is needed in the self_attn_bda.
        if self.offload_attn_norm:
            hidden_states = off_interface.group_commit(
                hidden_states, name="attn_norm", forced_released_tensors=[residual]
            )

        # Optional Layer norm after self-attention
        pre_cross_attn_layernorm_output = apply_module(self.pre_cross_attn_layernorm)(hidden_states)

        if isinstance(pre_cross_attn_layernorm_output, tuple):
            if len(pre_cross_attn_layernorm_output) != 2:
                raise ValueError(
                    f"When the output of pre_cross_attn_layernorm_output "
                    f"is a tuple, it is expected to have 2 elements "
                    f"(output, residual), but "
                    f"got {len(pre_cross_attn_layernorm_output)}"
                )
            pre_cross_attn_layernorm_output, residual = pre_cross_attn_layernorm_output
        else:
            residual = hidden_states

        if self.config.fp32_residual_connection:
            residual = residual.float()
        # Cross attention.
        attention_output_with_bias = self.cross_attention(
            pre_cross_attn_layernorm_output,
            attention_mask=context_mask,
            key_value_states=context,
            inference_context=inference_context,
        )

        if isinstance(attention_output_with_bias, dict) and "context" in attention_output_with_bias:
            context = attention_output_with_bias["context"]

        # TODO: could we move `bias_dropout_add_exec_handler` itself
        # inside the module provided in the `bias_dropout_add_spec` module?
        with self.bias_dropout_add_exec_handler():
            hidden_states = self.cross_attn_bda(self.training, self.config.bias_dropout_fusion)(
                attention_output_with_bias, residual, self.hidden_dropout
            )

        return hidden_states, context

    @copy_signature(_forward_attention)
    def forward(self, *args, **kwargs):
        """
        Perform a forward pass through the transformer layer.

        This method calls the core computation of a transformer layer, including
        self-attention, cross-attention (if applicable), and feed-forward operations.
        """
        hidden_states, context = self._forward_attention(*args, **kwargs)
        output = self._forward_mlp(
            hidden_states,
            kwargs.get("inference_context", None),
            padding_mask=kwargs.get("padding_mask", None),
        )
        return output, context

    def _forward_pre_mlp_layernorm(self, hidden_states: Tensor):
        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
            FineGrainedActivationOffloadingInterface as off_interface,
        )

        if self.recompute_pre_mlp_layernorm:
            self.pre_mlp_norm_checkpoint = tensor_parallel.CheckpointWithoutOutput()
            with off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm") as hidden_states:
                pre_mlp_layernorm_output = self.pre_mlp_norm_checkpoint.checkpoint(
                    apply_module(self.pre_mlp_layernorm), hidden_states
                )
        else:
            with off_interface(self.offload_mlp_norm, hidden_states, "mlp_norm") as hidden_states:
                pre_mlp_layernorm_output = apply_module(self.pre_mlp_layernorm)(hidden_states)

        return pre_mlp_layernorm_output

    def _forward_mlp(
        self,
        hidden_states: Tensor,
        inference_context: BaseInferenceContext | None = None,
        padding_mask: Tensor | None = None,
    ) -> Tensor | list[Tensor | None]:
        """
        Perform a forward pass through the feed-forward layer.

        Args:
            hidden_states (Tensor): Transformed hidden states before the MLP layernorm.
                Shape [seq_length, batch_size, hidden_size].
            inference_context: Inference context for optimizations.
            padding_mask (Tensor, optional): Padding mask for MoE routing.
                Shape [bsz, seq_length]. True = padding (exclude), False = valid (include).
                Only used for MoE layers to exclude padding tokens from aux loss computations.
                The MoELayer will internally transform this to [seq_length, bsz] format.
        Returns:
            output (Tensor): Transformed hidden states of shape [s, b, h].
        """

        # Optional Layer norm post the cross-attention.
        pre_mlp_layernorm_output = self._forward_pre_mlp_layernorm(hidden_states)

        if isinstance(pre_mlp_layernorm_output, tuple):
            if len(pre_mlp_layernorm_output) != 2:
                raise ValueError(
                    f"When the output of pre_mlp_layernorm is a tuple, it is "
                    f"expected to have 2 elements (output, residual), but "
                    f"got {len(pre_mlp_layernorm_output)}"
                )
            pre_mlp_layernorm_output, residual = pre_mlp_layernorm_output
        else:
            # Residual connection.
            residual = hidden_states

        if self.config.fp32_residual_connection:
            residual = residual.float()

        nvtx_range_push(suffix="mlp")
        # Potentially chunk the MLP computation during prefill to minimize the peak activation size
        should_chunk_mlp_for_prefill = (
            self.config.mlp_chunks_for_prefill > 1
            and inference_context is not None
            and not inference_context.is_decode_only()
            and not isinstance(self.mlp, IdentityOp)
            and not self.config.transformer_impl == "inference_optimized"
        )

        using_fused_tp_inference_kernel = (not self.training) and (
            self.config.inference_fuse_tp_communication
        )

        if self.recompute_mlp:
            if self.config.fp8 or self.config.fp4:
                # import here to avoid circular import
                from megatron.core.extensions.transformer_engine import te_checkpoint

                mlp_output_with_bias = te_checkpoint(
                    self.mlp,
                    False,
                    tensor_parallel.random.get_cuda_rng_tracker,
                    self.pg_collection.tp,
                    pre_mlp_layernorm_output,
                    padding_mask=padding_mask,
                )
            else:
                mlp_output_with_bias = tensor_parallel.checkpoint(
                    functools.partial(self.mlp, padding_mask=padding_mask),
                    False,
                    pre_mlp_layernorm_output,
                )
        elif should_chunk_mlp_for_prefill:
            # Chunk input along sequence dimension
            num_chunks = min(self.config.mlp_chunks_for_prefill, pre_mlp_layernorm_output.shape[0])
            chunks = pre_mlp_layernorm_output.chunk(num_chunks, dim=0)

            # Compute outputs for each chunk
            outputs = [self.mlp(chunk) for chunk in chunks]

            # Aggregate chunk outputs
            mlp_output = torch.cat([out for out, _ in outputs], dim=0)
            bias_chunks = [bias for _, bias in outputs if bias is not None]
            bias_output = torch.stack(bias_chunks, dim=0).sum(dim=0) if bias_chunks else None
            mlp_output_with_bias = (mlp_output, bias_output)
        else:
            if using_fused_tp_inference_kernel:
                # Set the residual for fused reduce-scatter + add + layer-norm + all-gather
                # operation in MLP's fc2.
                self._set_fc2_residual(residual)
            mlp_output_with_bias = self.mlp(pre_mlp_layernorm_output, padding_mask=padding_mask)

        nvtx_range_pop(suffix="mlp")

        if (
            self.is_moe_layer
            and self.config.cuda_graph_impl == "transformer_engine"
            and self.training
            and is_graph_capturing()
            and CudaGraphScope.moe_router in self.config.cuda_graph_scope
        ):
            if self.recompute_pre_mlp_layernorm:
                # Register the recompute hooks to all the cudagraph output tensors, because some
                # tensors are in parallel execution paths and they all need pre_mlp_layernorm to be
                # recomputed in backward pass. For example, the router path and the shared expert
                # path. So only register in one path is risky.
                for tensor in mlp_output_with_bias:
                    self.pre_mlp_norm_checkpoint.discard_output_and_register_recompute(tensor)
            return list(mlp_output_with_bias) + [residual]
        else:
            return self._forward_post_mlp(mlp_output_with_bias, residual)

    def _forward_post_mlp(
        self, mlp_output_with_bias: tuple[Tensor, Tensor | None], residual: Tensor
    ) -> Tensor:
        """
        Perform operations after the MLP computation.

        Args:
            mlp_output_with_bias (Tensor): Output tensor of the MLP layer with bias.
            residual (Tensor): Residual tensor.

        Returns:
            output (Tensor): Transformed hidden states of shape [s, b, h].
        """
        from megatron.core.pipeline_parallel.fine_grained_activation_offload import (
            FineGrainedActivationOffloadingInterface as off_interface,
        )

        using_fused_tp_inference_kernel = (not self.training) and (
            self.config.inference_fuse_tp_communication
        )

        if self.recompute_pre_mlp_layernorm:
            # discard the output of the pre-mlp layernorm and register the recompute
            # as a gradient hook of mlp_output_with_bias[0]
            self.pre_mlp_norm_checkpoint.discard_output_and_register_recompute(
                mlp_output_with_bias[0]
            )

        # TODO: could we move `bias_dropout_add_exec_handler` itself
        # inside the module provided in the `bias_dropout_add_spec` module?
        nvtx_range_push(suffix="mlp_bda")
        if using_fused_tp_inference_kernel:
            # In inference optimized transformer layer, there is no bias and dropout
            # The remaining residual add is already handled inside the
            # MLP module.
            hidden_states = mlp_output_with_bias[0]
        else:
            with self.bias_dropout_add_exec_handler():
                hidden_states = self.mlp_bda(self.training, self.config.bias_dropout_fusion)(
                    mlp_output_with_bias, residual, self.hidden_dropout
                )
        nvtx_range_pop(suffix="mlp_bda")
        # Delay the offload of the mlp norm until after the mlp_bda has been computed
        # because the residual is needed in the mlp_bda.
        if self.offload_mlp_norm:
            hidden_states = off_interface.group_commit(
                hidden_states, name="mlp_norm", forced_released_tensors=[residual]
            )

        # Jit compiled function creates 'view' tensor. This tensor
        # potentially gets saved in the MPU checkpoint function context,
        # which rejects view tensors. While making a viewless tensor here
        # won't result in memory savings (like the data loader, or
        # p2p_communication), it serves to document the origin of this
        # 'view' tensor.
        output = make_viewless_tensor(
            inp=hidden_states, requires_grad=hidden_states.requires_grad, keep_graph=True
        )

        return output

    def sharded_state_dict(
        self, prefix: str = '', sharded_offsets: tuple = (), metadata: Optional[dict] = None
    ) -> ShardedStateDict:
        """
        Generate a sharded state dictionary for the transformer layer.

        Args:
            prefix (str, optional): Prefix to be added to all keys in the state dict.
            sharded_offsets (tuple, optional): Tuple of sharding offsets.
            metadata (Optional[dict], optional): Additional metadata for sharding.

        Returns:
            ShardedStateDict: A dictionary containing the sharded state of the transformer layer.
        """
        sharded_state_dict = super().sharded_state_dict(prefix, sharded_offsets, metadata)
        prefixed_map = {
            f'{prefix}{k}': f'{prefix}{v}'
            for k, v in self.submodules_config.sharded_state_dict_keys_map.items()
        }
        if prefixed_map:
            apply_prefix_mapping(sharded_state_dict, prefixed_map)
        return sharded_state_dict

    def configure_fused_tp_inference(
        self,
        skip_qkv_norm_and_all_gather: bool = False,
        fc2_next_layer_norm_weights: Optional[Tensor] = None,
    ):
        """
        Configure settings for fused TP communication in inference mode.

        Args:
            skip_qkv_norm (bool): Whether to skip norm and all-gather for linear_qkv.
            fc2_next_layer_norm_weights (Optional[Tensor]): Next layer's QKV norm weights
                for current layer's MLP FC2.
        """
        self.self_attention.linear_qkv.skip_norm_and_all_gather = skip_qkv_norm_and_all_gather

        # Use current layer's own MLP FC1 norm weights for attention's/mixer's out_proj
        mlp_fc1_weights = self.get_mlp_layer_norm_weights()
        self._set_proj_next_layer_norm_weights(mlp_fc1_weights)

        self.mlp.linear_fc1.skip_norm_and_all_gather = True
        # Use next layer's attention norm weights for current layer's MLP FC2
        self._set_fc2_next_layer_norm_weights(fc2_next_layer_norm_weights)

    def _set_proj_next_layer_norm_weights(self, weights: Tensor):
        """Set next layer norm weights for attention/mixer's linear_proj."""
        self.self_attention.linear_proj._set_next_layer_norm_weights(weights)

    def _set_fc2_next_layer_norm_weights(self, weights: Optional[Tensor]):
        """Set next layer norm weights for MLP FC2."""
        if weights is None:
            # Create dummy tensor for last layer (same shape as fc1 norm weights)
            weights = torch.empty_like(self.get_mlp_layer_norm_weights())
        self.mlp.linear_fc2._set_next_layer_norm_weights(weights)

    def _set_proj_residual(self, residual: Tensor):
        """Set residual for attention's/mixer's out_proj (linear_proj)."""
        self.self_attention.linear_proj._set_residual(residual)

    def _set_fc2_residual(self, residual: Tensor):
        """Set residual for MLP FC2."""
        self.mlp.linear_fc2._set_residual(residual)

    def get_mlp_layer_norm_weights(self) -> Tensor:
        """
        Get the MLP FC1 layer norm weights.

        Returns:
            Tensor: The layer norm weight data.
        """
        return self.mlp.linear_fc1.layer_norm_weight.data

    def get_qkv_layer_norm_weights(self) -> Tensor:
        """
        Get the QKV layer norm weights.

        Returns:
            Tensor: The layer norm weight data.
        """
        return self.self_attention.linear_qkv.layer_norm_weight.data

    def get_layer_static_inputs(self, seq_length, micro_batch_size):
        """
        Get the static inputs for the transformer layer. Besides the hidden_states that is
        generated in GraphableMegatronModule, we also add the attention_mask.

        Returns:
            Dict[str, torch.Tensor]: A dictionary containing the static inputs for the layer.
        """
        static_inputs = super().get_layer_static_inputs(seq_length, micro_batch_size)

        if not isinstance(self.self_attention, IdentityOp) and (
            not self.config.cuda_graph_scope or CudaGraphScope.attn in self.config.cuda_graph_scope
        ):
            slen_per_cp = seq_length // self.config.context_parallel_size
            static_inputs["attention_mask"] = (
                ~(torch.tril(torch.ones((slen_per_cp, seq_length))).bool())
                .to(torch.cuda.current_device())
                .reshape(1, 1, slen_per_cp, seq_length)
                .tile(micro_batch_size, 1, 1, 1)
            )
        return static_inputs

    def _get_submodules_under_cudagraphs(self):
        """
        Get the submodules that are covered by cudagraphs.
        """
        if not self.config.cuda_graph_scope:
            return super()._get_submodules_under_cudagraphs()

        submodules = []
        if CudaGraphScope.attn in self.config.cuda_graph_scope:
            submodules += [
                self.input_layernorm,
                self.self_attention,
                self.pre_cross_attn_layernorm,
                self.cross_attention,
            ]
        if (not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope) or (
            self.is_moe_layer and CudaGraphScope.moe in self.config.cuda_graph_scope
        ):
            submodules += [self.pre_mlp_layernorm, self.mlp]
        elif self.is_moe_layer and CudaGraphScope.moe_router in self.config.cuda_graph_scope:
            submodules += [self.pre_mlp_layernorm, self.mlp.router]
            if (
                self.config.moe_shared_expert_intermediate_size is not None
                and not self.config.moe_shared_expert_overlap
            ):
                submodules += [self.mlp.shared_experts]
        return submodules

    def _te_cuda_graph_capture(self, *args, **kwargs):
        """
        CUDA Graph capture for this layer using TE interface.
        There are some differences from the normal pass:
        1. In some conditions CUDA graph cannot cover the entire layer. The `cuda_graph_scope`
           attribute can be set to control the scope of the CUDA graph.
        2. If context is None, it cannot be returned as output.
        """
        context = None
        if not self.config.cuda_graph_scope or CudaGraphScope.attn in self.config.cuda_graph_scope:
            hidden_states, context = self._forward_attention(*args, **kwargs)
        else:
            if len(args) > 0:
                hidden_states = args[0]
            else:
                hidden_states = kwargs.pop("hidden_states")

        if (
            not self.config.cuda_graph_scope
            or (not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope)
            or (
                self.is_moe_layer
                and (
                    CudaGraphScope.moe in self.config.cuda_graph_scope
                    or CudaGraphScope.moe_router in self.config.cuda_graph_scope
                )
            )
        ):
            hidden_states = self._forward_mlp(hidden_states)
        if not isinstance(hidden_states, list) and not isinstance(hidden_states, tuple):
            cuda_graph_outputs = [hidden_states]
        else:
            cuda_graph_outputs = list(hidden_states)
        if context is not None:
            cuda_graph_outputs.append(context)
        return tuple(cuda_graph_outputs)

    def _te_cuda_graph_replay(self, *args, **kwargs):
        """
        CUDA graph replay for this layer and microbatch `self.current_microbatch` using TE
        interface. TransformerEngine versions>=1.10 allow keyword arguments with CUDA graph.
        However, CUDA graph accepts only Tensor inputs.
        Hence, `inference_context` and `packed_seq_params` are excluded from input list.
        """
        context = None
        if self.config.cuda_graph_scope and CudaGraphScope.attn not in self.config.cuda_graph_scope:
            hidden_states, context = self._forward_attention(*args, **kwargs)
            args = (hidden_states,)
            kwargs = {}

        assert (kwargs.get('inference_context') is None) and (
            kwargs.get('packed_seq_params') is None
        ), (
            "CUDA graph accepts only Tensor inputs. "
            "inference_context and packed_seq_params are excluded from input list. "
            "For inference cuda graph, please use cuda_graph_impl=local instead."
        )

        cuda_graph_output = list(super()._te_cuda_graph_replay(*args, **kwargs))

        if kwargs.get('context') is not None:
            context = cuda_graph_output.pop()

        if (
            not self.config.cuda_graph_scope
            or (not self.is_moe_layer and CudaGraphScope.mlp in self.config.cuda_graph_scope)
            or (self.is_moe_layer and CudaGraphScope.moe in self.config.cuda_graph_scope)
        ):
            # CUDA Graph captures the whole MLP/MoE part. CUDA Graph output is the layer output.
            assert len(cuda_graph_output) == 1, "CUDA Graph output should be the layer output."
            output = cuda_graph_output.pop()
            assert (
                not self.config.overlap_moe_expert_parallel_comm
            ), "EP overlap must be \
                disabled when CUDA graph captures the whole MLP/MoE part."
        elif self.is_moe_layer and CudaGraphScope.moe_router in self.config.cuda_graph_scope:
            # CUDA Graph partially captures the MoE.
            # The rest of the layer should go to the normal pass.
            shared_expert_output, routing_map = None, None
            # residual is the last element in the CUDA graph output.
            residual = cuda_graph_output.pop()
            if (
                self.config.moe_shared_expert_intermediate_size is not None
                and not self.config.moe_shared_expert_overlap
            ):
                # The shared expert output is the last second element in the CUDA graph output.
                shared_expert_output = cuda_graph_output.pop()

            if CudaGraphScope.moe_preprocess in self.config.cuda_graph_scope:
                # CUDA graph output is [hidden_states, probs] + attributes outputs.
                (hidden_states, probs), attr_outputs = cuda_graph_output[:2], cuda_graph_output[2:]
                valid_cudagraph_attrs = self.mlp.token_dispatcher.valid_cudagraph_attrs
                assert len(attr_outputs) == len(
                    valid_cudagraph_attrs
                ), f"attr_outputs: {len(attr_outputs)} != {len(valid_cudagraph_attrs)}"
                for i, attr_name in enumerate(valid_cudagraph_attrs):
                    hier_attr_name = attr_name.split('.')
                    attr = self.mlp.token_dispatcher
                    for name in hier_attr_name[:-1]:
                        attr = getattr(attr, name)
                    setattr(attr, hier_attr_name[-1], attr_outputs[i])
            else:
                # CUDA graph output is [hidden_states, probs, routing_map].
                assert len(cuda_graph_output) == 3, (
                    "CUDA graph output should be [hidden_states, probs, routing_map], "
                    f"but got {len(cuda_graph_output)} elements"
                )
                hidden_states, probs, routing_map = cuda_graph_output

            # Resume the MoELayer forward pass from the end of the CUDA graph scope.
            # The MoE layer will skip redundant computations when we pass in the calculated values
            # through the keyword arguments. See MoELayer.forward docstring for more details.
            nvtx_range_push(suffix="mlp")
            self.mlp.cudagraph_tensor_store.set(
                hidden_states=hidden_states,
                probs=probs,
                routing_map=routing_map,
                shared_expert_output=shared_expert_output,
            )
            # If EP overlap is enabled, remaining of mlp will be called as fine_grained_callables
            # and should be skipped here.
            if self.config.overlap_moe_expert_parallel_comm:
                probs, routing_map = self.mlp.route(hidden_states)
                hidden_states, probs = self.mlp.preprocess(hidden_states, probs, routing_map)
                nvtx_range_pop(suffix="mlp")
                return residual, hidden_states, probs, shared_expert_output
            mlp_output_with_bias = self.mlp(hidden_states)
            self.mlp.cudagraph_tensor_store.clear()
            nvtx_range_pop(suffix="mlp")

            # If we early returned, layernorm recompute hooks were attached to the output buffer
            # of the cudagraph, so disable the recompute hooks inside _forward_post_mlp
            recompute_pre_mlp_layernorm = self.recompute_pre_mlp_layernorm
            self.recompute_pre_mlp_layernorm = False
            output = self._forward_post_mlp(mlp_output_with_bias, residual)
            self.recompute_pre_mlp_layernorm = recompute_pre_mlp_layernorm
        else:
            # If EP overlap is enabled, needs to return same outputs as submodule.attn
            if self.config.overlap_moe_expert_parallel_comm:
                assert len(cuda_graph_output) == 1, "CUDA Graph output should be the layer output."
                residual = cuda_graph_output.pop()
                if not self.is_moe_layer:
                    return residual, None, None, None
                hidden_states = apply_module(self.pre_mlp_layernorm)(residual)
                if isinstance(hidden_states, tuple):
                    if len(hidden_states) != 2:
                        raise ValueError(
                            f"When the output of pre_mlp_layernorm is a tuple, it is "
                            f"expected to have 2 elements (output, residual), but "
                            f"got {len(hidden_states)}"
                        )
                    hidden_states, residual = hidden_states

                shared_expert_output = self.mlp.shared_experts_compute(hidden_states)
                probs, routing_map = self.mlp.route(hidden_states)
                hidden_states, probs = self.mlp.preprocess(hidden_states, probs, routing_map)
                return residual, hidden_states, probs, shared_expert_output

            # CUDA Graph does not capture the MLP/MoE part at all.
            output = self._forward_mlp(*cuda_graph_output)
        return output, context

    def _get_te_cuda_graph_replay_args(self, *args, **kwargs):
        """Helper function to get tensor arguments for TE CUDA graph."""
        cudagraph_args, cudagraph_kwargs = super()._get_te_cuda_graph_replay_args(*args, **kwargs)

        assert (
            len(cudagraph_args) == 1
        ), "Exactly one positional argument `hidden_states` is expected."
        hidden_states = cudagraph_args[0]

        try:
            import transformer_engine.pytorch as te  # pylint: disable=unused-import

            def get_zero_attention_mask(slen_per_tpcp, micro_batch_size):
                sequence_parallel = self.config.sequence_parallel
                tensor_model_parallel_size = self.config.tensor_model_parallel_size
                slen_per_cp = (
                    slen_per_tpcp * tensor_model_parallel_size
                    if sequence_parallel
                    else slen_per_tpcp
                )
                slen = slen_per_cp * self.config.context_parallel_size
                return torch.zeros(
                    (micro_batch_size, 1, slen_per_cp, slen),
                    dtype=torch.bool,
                    device=torch.cuda.current_device(),
                )

            if not is_te_min_version("1.10.0"):
                # TE version < 1.10.0 does not support keyword arguments with CUDA graph.
                for k, v in cudagraph_kwargs.items():
                    if k == "attention_mask":
                        if v is not None:
                            cudagraph_args.append(v)
                            cudagraph_kwargs[k] = None
                        else:
                            cudagraph_args.append(
                                get_zero_attention_mask(
                                    hidden_states.size(0), hidden_states.size(1)
                                )
                            )
                    elif k != 'is_first_microbatch':
                        assert v is None, "Keyword Arguments not supported with CUDA graph."
            elif (
                'attention_mask' in cudagraph_kwargs and cudagraph_kwargs['attention_mask'] is None
            ):
                # The attention_mask can be None when there is no padding to the input sequence.
                # However, an attention_mask Tensor must be passed into cudagraph for replay, so
                # we create an equivalent zero Tensor as the attention_mask.
                cudagraph_kwargs["attention_mask"] = get_zero_attention_mask(
                    hidden_states.size(0), hidden_states.size(1)
                )
        except ImportError:
            raise RuntimeError("CUDAGraph requires TransformerEngine, but not installed")
        return tuple(cudagraph_args), cudagraph_kwargs

    def _should_call_local_cudagraph(self, *args, **kwargs):
        """
        Check if we should call the local cudagraph path.
        """
        # Training and validation mode CUDA graphs.
        if (
            hasattr(self, 'cudagraph_manager')
            and kwargs.get('inference_context') is None
            and not torch.is_inference_mode_enabled()  # for inference eager dummy_forward
        ):
            return True
        # Inference mode. CUDA graphs are used in the decode phase only, when attn mask is None
        elif not self.training and (
            hasattr(self, 'cudagraph_manager')
            and kwargs['attention_mask'] is None
            and (
                (kwargs.get('inference_context') is not None)
                or (kwargs.get('inference_params') is not None)
            )
            and not self.config.cuda_graph_scope  # empty-list = per-layer CUDA graphs
        ):
            if kwargs['inference_context'].is_static_batching():
                using_cuda_graph = kwargs['inference_context'].is_decode_only()
            else:
                # it can happen that non-decode steps have a token count greater than the max
                # supported cuda graph token count. In that case this flag will be set to
                # False by initialize_attention, and we should not use cuda graphs.
                using_cuda_graph = kwargs['inference_context'].using_cuda_graph_this_step()
            if using_cuda_graph:
                return True
        return False

    def get_layer_norm_weights(self):
        """
        Get the weights of all layernorms (attention and MLP) in the transformer layer.
        Returns:
            List[Tensor]: A list of layernorm weight tensors.
        """
        return


class MoETransformerLayer(TransformerLayer):
    """
    A Transformer layer specialized for Mixture-of-Experts (MoE) architectures.

    Implements specific functionality to support CUDA graph capture for MoE layers.
    Due to the dynamic nature of MoE, capturing the entire layer in a single CUDA graph
    can be challenging. This class supports "partial" CUDA graphs by decomposing the
    MLP forward pass into router, expert-compute, and post-process stages.
    """

    def __init__(self, *args, **kwargs):
        self.is_moe_layer = True
        self.use_partial_cudagraphs = False
        self.moe_layer_recompute = False
        self.token_dispatcher_attrs = {}

        super().__init__(*args, **kwargs)

    def _should_call_local_cudagraph(self, *args, **kwargs):
        """
        Controls whether the full-layer cudagraph_manager captures the entire forward call
        as a single graph. Returns False to skip full-layer capture and route through _forward_mlp.

        MoE layers have two cudagraph modes:
        - Full-layer (use_partial_cudagraphs=False): the full-layer cudagraph_manager captures
          the forward pass as one graph. This is used during inference.
        - Partial (use_partial_cudagraphs=True): the full-layer manager is bypassed (returns
          False), and _forward_mlp routes through cudagraph_manager_router and
          cudagraph_manager_postprocess, which are monkey-patched onto _forward_mlp_router
          and _forward_mlp_postprocess by CudaGraphManager.__init__. The expert dispatch
          in between runs eagerly. This is used during training.
        """
        if self.use_partial_cudagraphs:
            return False
        if self.config.cuda_graph_impl != "local":
            return False
        return super()._should_call_local_cudagraph(*args, **kwargs)

    def transition_cudagraph_scope(self, mode):
        """Transition between full-layer and partial CUDA graph capture.

        Args:
            mode: 'full' for inference (full-layer capture) or 'partial' for training
            (router + postprocess captured, expert dispatch runs eagerly).
        """
        from megatron.core.transformer.cuda_graphs import CudaGraphManager

        if mode == 'partial':
            self.use_partial_cudagraphs = True
            self.moe_layer_recompute = (
                self.config.recompute_granularity == 'selective'
                and "moe" in self.config.recompute_modules
                and self.config.cuda_graph_impl == "local"
            )
            if not hasattr(self, '_router_dtoh_event'):
                self._router_dtoh_event = torch.cuda.Event()
            if not hasattr(self, 'cudagraph_manager_router'):
                self.cudagraph_manager_router = CudaGraphManager(
                    self.config, self, function_name="_forward_mlp_router"
                )
            if not hasattr(self, 'cudagraph_manager_postprocess'):
                self.cudagraph_manager_postprocess = CudaGraphManager(
                    self.config, self, function_name="_forward_mlp_postprocess"
                )
        elif mode == 'full':
            self.use_partial_cudagraphs = False
            self.mlp.fwd_execution_map = ["route", "expert_compute", "postprocess"]
            assert hasattr(self, 'cudagraph_manager'), (
                "MoETransformerLayer missing full cudagraph_manager; "
                "expected it to be created at __init__ with scope = [] "
            )
        else:
            raise ValueError(f"Unknown MoE cudagraph mode: {mode}, expected 'full' or 'partial'")

    def create_mcore_cudagraph_manager(self, config):
        """
        Initializes the CUDA graph manager(s) for the MoE layer.

        Unlike the standard layer which typically uses a single manager, this method
        can configure multiple graph managers if partial CUDA graphs are enabled via
        `cuda_graph_scope`. This allows capturing the static parts of the MoE pass
        while leaving the expert computation to execute eagerly.
        """

        from megatron.core.transformer.cuda_graphs import CudaGraphManager

        if not self.config.cuda_graph_scope or CudaGraphScope.moe in self.config.cuda_graph_scope:
            self.cudagraph_manager = CudaGraphManager(config)
        elif (
            CudaGraphScope.moe_router in self.config.cuda_graph_scope
            or CudaGraphScope.moe_preprocess in self.config.cuda_graph_scope
        ):
            self.transition_cudagraph_scope('partial')

    def _forward_mlp_router(self, hidden_states, padding_mask=None):
        """
        Executes the router phase of the MoE block.

        This includes the pre-MLP layernorm and the routing logic.
        This method is isolated so it can be captured by `cudagraph_manager_router`.
        """

        self.mlp.fwd_execution_map = "route"
        pre_mlp_layernorm_output = self._forward_pre_mlp_layernorm(hidden_states)
        if isinstance(pre_mlp_layernorm_output, tuple):
            if len(pre_mlp_layernorm_output) != 2:
                raise ValueError(
                    f"When the output of pre_mlp_layernorm is a tuple, it is "
                    f"expected to have 2 elements (output, residual), but "
                    f"got {len(pre_mlp_layernorm_output)}"
                )
            pre_mlp_layernorm_output, residual = pre_mlp_layernorm_output
        else:
            residual = hidden_states

        if self.config.fp32_residual_connection:
            residual = residual.float()

        router_outputs = self.mlp(
            pre_mlp_layernorm_output, intermediate_tensors=(), padding_mask=padding_mask
        )

        for attr_name in self.mlp.token_dispatcher.cudagraph_attrs:
            hier_attr_name = attr_name.split('.')
            attr = self.mlp.token_dispatcher
            for name in hier_attr_name:
                attr = getattr(attr, name)
            if torch.is_tensor(attr):
                if attr_name in self.token_dispatcher_attrs:
                    self.token_dispatcher_attrs[attr_name].copy_(attr)
                else:
                    self.token_dispatcher_attrs[attr_name] = attr.detach()

        return residual, *router_outputs

    def _forward_mlp_expert_compute(self, hidden_states, probs):
        """
        Executes the actual computation of the experts.

        This phase takes the routing information and inputs, dispatches them to the
        appropriate experts, and computes the results. In partial graph modes, this
        step runs eagerly between the router and postprocess graph replays.
        """

        for attr_name, attr in self.token_dispatcher_attrs.items():
            hier_attr_name = attr_name.split('.')
            obj = self.mlp.token_dispatcher
            for name in hier_attr_name[:-1]:
                obj = getattr(obj, name)
            setattr(obj, hier_attr_name[-1], attr)

        self.mlp.fwd_execution_map = "expert_compute"
        return self.mlp(None, intermediate_tensors=(hidden_states, probs))

    def _forward_mlp_postprocess(self, residual, output, shared_expert_output, mlp_bias):
        """
        Executes the post-processing phase of the MoE block.

        Handles combining the expert outputs, applying biases, re-registering
        activation recomputation hooks if necessary, and performing the final
        Bias-Dropout-Add. This method is isolated so it can be captured by cudagraphs.

        """

        # Restore token dispatcher attributes. During graph warmup, the router capture leaves these
        # attrs pointing into cudagraph pool memory; restoring them here ensures the postprocess
        # graph captures with valid pointers.
        for name, attr in self.token_dispatcher_attrs.items():
            setattr(self.mlp.token_dispatcher, name, attr)

        self.mlp.fwd_execution_map = "postprocess"
        output = self.mlp(None, intermediate_tensors=(output, shared_expert_output))
        return self._forward_post_mlp((output, mlp_bias), residual)

    def _forward_mlp(self, hidden_states, inference_context=None, padding_mask=None):
        """
        Orchestrates the MLP forward pass, handling partial CUDA graph execution logic.

        If `use_partial_cudagraphs` is True, this method stitches together the
        router, expert_compute, and postprocess calls.
        """

        if inference_context is not None:
            assert not self.use_partial_cudagraphs, (
                "Partial cudagraphs for MoEs were detected during inference!"
                "Please do not use --cuda-graph-scope moe_router moe_preprocess "
                "alongside inference."
            )

        def _forward_mlp_partial_cudagraphs(
            hidden_states, inference_context=None, padding_mask=None
        ):
            residual, hidden_states, probs, shared_expert_output = self._forward_mlp_router(
                hidden_states, padding_mask=padding_mask
            )

            # After the router graph replays, the captured .copy_() operations that update
            # self.token_dispatcher_attrs via `_maybe_dtoh_and_synchronize` are queued on the
            # current stream but may not have completed. Record an event after the router
            # graph and wait on it, so we block only until the router's D2H copies complete.
            self._router_dtoh_event.record()
            self._router_dtoh_event.synchronize()
            for name, attr in self.token_dispatcher_attrs.items():
                setattr(self.mlp.token_dispatcher, name, attr)

            expert_output, mlp_bias = self._forward_mlp_expert_compute(hidden_states, probs)
            return self._forward_mlp_postprocess(
                residual, expert_output, shared_expert_output, mlp_bias
            )

        if self.use_partial_cudagraphs:
            if self.moe_layer_recompute:
                if self.config.fp8 or self.config.fp4:
                    from megatron.core.extensions.transformer_engine import te_checkpoint

                    return te_checkpoint(
                        _forward_mlp_partial_cudagraphs,
                        False,
                        tensor_parallel.random.get_cuda_rng_tracker,
                        parallel_state.get_tensor_model_parallel_group(),
                        hidden_states,
                        padding_mask=padding_mask,
                    )
                else:
                    return tensor_parallel.checkpoint(
                        functools.partial(
                            _forward_mlp_partial_cudagraphs, padding_mask=padding_mask
                        ),
                        False,
                        hidden_states,
                    )
            else:
                return _forward_mlp_partial_cudagraphs(hidden_states, padding_mask=padding_mask)
        else:
            return super()._forward_mlp(hidden_states, padding_mask=padding_mask)


================================================
FILE: megatron/core/transformer/utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Utilities for transformer layers."""
from operator import itemgetter
from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Tuple, Union

import torch

from megatron.core import parallel_state
from megatron.core.dist_checkpointing.mapping import ShardedObject, ShardedStateDict, StateDict
from megatron.core.jit import jit_fuser
from megatron.core.utils import (
    get_pg_rank,
    get_tensor_model_parallel_group_if_none,
    make_sharded_tensor_for_checkpoint,
    make_tp_sharded_tensor_for_checkpoint,
)

if TYPE_CHECKING:
    from megatron.core.transformer import TransformerConfig


def get_linear_layer(rows, columns, init_method, perform_initialization=True):
    """Simple linear layer with weight initialization."""
    layer = torch.nn.Linear(rows, columns)
    if perform_initialization:  # Take from modelparallel config
        init_method(layer.weight)
    with torch.no_grad():
        layer.bias.zero_()
    return layer


def get_default_causal_mask(sq: int) -> torch.Tensor:
    """Return the causal upper triangular mask for softmax input."""
    return torch.triu(torch.ones(sq, sq, device="cuda"), diagonal=1).bool()


def get_sliding_window_causal_mask(sq, skv, window_size):
    """Create the equivalent attention mask for SWA in [sq, skv] shape"""
    m = torch.ones(sq, skv, dtype=torch.bool, device="cuda")
    mu = torch.triu(m, diagonal=skv - sq - window_size[0])
    ml = torch.tril(mu, diagonal=skv - sq + window_size[1])
    ml = ~ml

    return ml


# pylint: disable=missing-function-docstring
def attention_mask_func(attention_scores, attention_mask):
    attention_scores.masked_fill_(attention_mask, -10000.0)
    return attention_scores


@jit_fuser
def gelu_impl(x):
    """OpenAI's gelu implementation."""
    return 0.5 * x * (1.0 + torch.tanh(0.7978845608028654 * x * (1.0 + 0.044715 * x * x)))


# pylint: disable=missing-function-docstring
def openai_gelu(x):
    return gelu_impl(x)


# This is actually Python equivalent of torch.nn.functional.gelu(), also with
# type hints for ONNX exporter
# pylint: disable=missing-function-docstring
@jit_fuser
def erf_gelu(x):
    return (
        x * 0.5 * (torch.erf(x / 1.41421).to(dtype=x.dtype) + torch.ones_like(x).to(dtype=x.dtype))
    )


def make_sharded_tensors_for_checkpoint(
    state_dict: StateDict,
    prefix: str,
    tensor_parallel_layers_axis_map: Optional[Dict[str, int]] = None,
    sharded_offsets: Iterable[Tuple[int, int, int]] = (),
    extra_state_suffix: str = '_extra_state',
    tp_group: Optional[torch.distributed.ProcessGroup] = None,
    dp_cp_group: Optional[torch.distributed.ProcessGroup] = None,
):
    """Wraps tensors from transformer layers with ShardedTensor or ShardedObject.

    For a given `state_dict`, wraps:
    - all _extra_states with ShardedObject
    - all tensors specified in tensor_parallel_layers_axis_map with TP and DP sharded ShardedTensor
    - other values with DP sharded ShardedTensor

    Args:
        state_dict (StateDict): state_dict to convert
        prefix (str): prefix appended to keys in final state dict
        tensor_parallel_layers_axis_map (Dict[str, int], optional): dict mapping layer
            names to the axis for TP sharding
        sharded_offsets (Iterable[Tuple[int, int, int]], optional): sharding already
            applied (e.g. PP related), passed along to ShardedTensor
        extra_state_suffix (str, default = '_extra_state'): layers with this
            suffix will be wrapped with ShardedObject instead of ShardedTensor.
        tp_group (Optional[torch.distributed.ProcessGroup], optional): tensor parallel group.
            If None, defaults to parallel_state.get_tensor_model_parallel_group()
        dp_cp_group (Optional[torch.distributed.ProcessGroup], optional): data parallel group
            with context parallel. If None, defaults to
            parallel_state.get_data_parallel_group(with_context_parallel=True)

    """

    if tensor_parallel_layers_axis_map is None:
        tensor_parallel_layers_axis_map = {}

    if tp_group is None and dp_cp_group is None:
        tp_group = get_tensor_model_parallel_group_if_none(tp_group)
        dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)

    sharded_state_dict = {}
    for layer_name in state_dict.keys():
        tensor = state_dict[layer_name]
        layer_key = f'{prefix}{layer_name}'

        if layer_name.endswith(extra_state_suffix):
            # Compute replica_id when groups are provided
            replica_id = (0, get_pg_rank(tp_group), get_pg_rank(dp_cp_group))

            sharded_state_dict[layer_key] = make_sharded_object_for_checkpoint(
                tensor, layer_key, sharded_offsets, replica_id=replica_id
            )

        elif layer_name in tensor_parallel_layers_axis_map:
            tp_axis = tensor_parallel_layers_axis_map[layer_name]
            sharded_state_dict[layer_key] = make_tp_sharded_tensor_for_checkpoint(
                tensor,
                layer_key,
                tp_axis,
                prepend_offsets=sharded_offsets,
                tp_group=tp_group,
                dp_cp_group=dp_cp_group,
            )

        else:
            sharded_state_dict[layer_key] = make_sharded_tensor_for_checkpoint(
                tensor,
                layer_key,
                prepend_offsets=sharded_offsets,
                tp_group=tp_group,
                dp_cp_group=dp_cp_group,
            )

    return sharded_state_dict


def make_sharded_object_for_checkpoint(
    obj: Any,
    key: str,
    sharded_offsets: Iterable[Tuple[int, int, int]] = (),
    replica_id: Union[None, int, Tuple[int, ...]] = None,
    **kwargs,
):
    """Helper for instantiating a non-sharded ShardedObject (replicated across TP and DP group).

    Args:
        obj (object): any object to be sharded
        key (str): unique identifier of the object
        sharded_offsets (Iterable[Tuple[int, int, int]]): offsets normally
            prepended to ShardedTensors, will be used as global offsets for
            ShardedObject
        replica_id (Union[None, int, Tuple[int, ...]]): replica id
    """
    if replica_id is None:
        replica_id = (
            0,
            parallel_state.get_tensor_model_parallel_rank(),
            parallel_state.get_data_parallel_rank(with_context_parallel=True),
        )

    return ShardedObject(key, obj, *_get_extra_state_offsets(sharded_offsets), replica_id, **kwargs)


def _get_extra_state_offsets(
    sharded_offsets: Iterable[Tuple[int, int, int]]
) -> Tuple[Tuple[int, ...], Tuple[int, ...]]:
    """Turns ShardedTensor offsets into offsets suitable for ShardedObject."""
    if sharded_offsets:
        sharded_offsets = sorted(sharded_offsets, key=itemgetter(0))  # sort by axis
        axis, extra_state_offset, extra_state_shape = zip(*sharded_offsets)
        assert list(axis) == list(
            range(len(axis))
        ), f'Expected contiguous axis for offsets: {sharded_offsets}'
    else:
        extra_state_shape = (1,)
        extra_state_offset = (0,)
    return extra_state_shape, extra_state_offset


def ensure_metadata_has_dp_cp_group(metadata: Optional[dict]) -> dict:
    """Ensure `metadata` is a dict containing `dp_cp_group` entry.

    If `metadata` is None, a new dict is returned with `dp_cp_group` set.
    If `metadata` is a dict and missing `dp_cp_group`, it is updated in-place.
    Otherwise, asserts that `dp_cp_group` exists.
    """
    if metadata is None:
        return {'dp_cp_group': parallel_state.get_data_parallel_group(with_context_parallel=True)}
    assert isinstance(metadata, dict), "metadata must be a dict with dp_cp_group as key"
    if 'dp_cp_group' not in metadata:
        metadata['dp_cp_group'] = parallel_state.get_data_parallel_group(with_context_parallel=True)
    return metadata


def sharded_state_dict_default(
    module: torch.nn.Module,
    prefix: str = '',
    sharded_offsets: Tuple[Tuple[int, int, int]] = (),
    metadata: Optional[dict] = None,
    tp_group: Optional[torch.distributed.ProcessGroup] = None,
) -> ShardedStateDict:
    """Provides implementation for sharded_state_dict method for non-MegatronModules.

    Tries to call `module.sharded_state_dict` when possible,
    otherwise uses regular state dict and assumes tensors are replicated across TP and DP.

    `keep_vars=True` is passed to module.state_dict so that optimizer states
    can be sharded later on.

    Args:
        module (torch.nn.Module): module which sharded state dict we want to obtain
        prefix (str): prefix for the state dict keys
        sharded_offsets (Tuple[Tuple[int, int, int]], optional): sharding already
            applied (e.g. PP related) by sup-modules. Passed along to ShardedTensor
        metadata (dict, optional): metadata passed to module sharded_state_dict method
        tp_group (Optional[torch.distributed.ProcessGroup], optional): tensor parallel group.
            If None, defaults to parallel_state.get_tensor_model_parallel_group()

    Returns:
        dict: dictionary of state dict keys mapped to ShardedTensors
    """

    # Guard for cases metadata is not provided
    metadata = ensure_metadata_has_dp_cp_group(metadata)

    if hasattr(module, 'sharded_state_dict'):
        module_sharded_sd = module.sharded_state_dict(
            prefix=prefix, sharded_offsets=sharded_offsets, metadata=metadata
        )
    else:
        module_sd = module.state_dict(prefix='', keep_vars=True)
        module_sharded_sd = make_sharded_tensors_for_checkpoint(
            module_sd,
            prefix,
            {},
            sharded_offsets,
            tp_group=tp_group,
            dp_cp_group=metadata['dp_cp_group'],
        )
    return module_sharded_sd


# Initialize cache for sequence parallel modules
_sequence_parallel_attr_cache = None


def _init_sequence_parallel_cache(model, exclude_modules):
    """
    Initialize the cache of modules with sequence parallel attributes.
    Only needs to be called once, subsequent calls have no effect.

    Args:
        model: model to change sequence parallelism attributes
        exclude_modules: Modules to exclude from changing sequence parallelism
    """
    global _sequence_parallel_attr_cache
    model_id = id(model)
    if _sequence_parallel_attr_cache is not None and model_id in _sequence_parallel_attr_cache:
        return  # Cache already initialized

    # Attributes for sequence parallel
    sequence_parallel_attrs = [
        "sequence_parallel",
        "scatter_to_sequence_parallel",
        "reduce_scatter_embeddings",
    ]

    if model.position_embedding_type == "learned_absolute":
        sequence_parallel_attrs.remove("reduce_scatter_embeddings")

    # Initialize dictionary to hold attributes -> list of modules
    if _sequence_parallel_attr_cache is None:
        _sequence_parallel_attr_cache = {}
    _sequence_parallel_attr_cache[model_id] = {attr: [] for attr in sequence_parallel_attrs}

    # Get the model
    model_modules = model

    # Recursive function to find all modules with our target attributes
    def find_modules_with_attrs(module):
        if exclude_modules is None or module not in exclude_modules:
            # Check if this module has any of our target attributes
            for attr in sequence_parallel_attrs:
                if hasattr(module, attr):
                    _sequence_parallel_attr_cache[model_id][attr].append(module)

            # Check all children modules recursively
            for child in module._modules.values():
                if child is not None:
                    find_modules_with_attrs(child)

    # Start the search from each major component
    find_modules_with_attrs(model_modules)


def set_model_to_sequence_parallel(model, set_to=False, exclude_modules=None):
    """
    Set sequence parallel attributes for the model.

    Args:
        set_to: Value to set for sequence_parallel attributes
        exclude_modules: Modules to exclude from changing sequence parallelism
    """
    global _sequence_parallel_attr_cache
    model_id = id(model)

    # Initialize cache if needed
    if _sequence_parallel_attr_cache is None or model_id not in _sequence_parallel_attr_cache:
        _init_sequence_parallel_cache(model, exclude_modules)

    model.config.sequence_parallel = set_to

    # Set all cached attributes to desired value
    for attr, modules in _sequence_parallel_attr_cache[model_id].items():
        for module in modules:
            setattr(module, attr, set_to)


# Initialize cache for modules
cuda_graph_attr_cache = None


def init_cuda_graph_cache(model):
    """
    Initialize the cache of modules for cuda graphs
    """
    global cuda_graph_attr_cache
    model_id = id(model)
    if cuda_graph_attr_cache is not None and model_id in cuda_graph_attr_cache:
        return  # Cache already initialized

    cuda_graph_attrs = ["cuda_graph_impl", "flash_decode", "cudagraph_manager"]

    # Special case handling for activation recomputation
    if model.config.recompute_granularity is not None:
        cuda_graph_attrs.append("recompute_granularity")

    # Initialize dictionary to hold attributes -> list of modules
    if cuda_graph_attr_cache is None:
        cuda_graph_attr_cache = {}

    cuda_graph_attr_cache[model_id] = {attr: [] for attr in cuda_graph_attrs}

    # Get the model
    model_modules = model

    # Recursive function to find all modules with our target attributes
    def find_modules_with_attrs(module):
        # Check if this module has any of our target attributes
        for attr in ["cuda_graph_impl", "flash_decode"]:
            if hasattr(module, attr) and isinstance(getattr(module, attr), bool):
                cuda_graph_attr_cache[model_id][attr].append(module)

            # Check for config variables
            if hasattr(module, "config"):
                if hasattr(module.config, attr):
                    cuda_graph_attr_cache[model_id][attr].append(module.config)

        # Specific caching for cuda graph managers
        if hasattr(module, "cudagraph_manager"):
            cuda_graph_attr_cache[model_id]["cudagraph_manager"].append(
                [module, module.cudagraph_manager]
            )

        # Specific caching for recompute granularity
        if hasattr(module, "recompute_granularity"):
            cuda_graph_attr_cache[model_id]["recompute_granularity"].append(
                [module, module.recompute_granularity]
            )

        # Check all children modules recursively
        for child in module._modules.values():
            if child is not None:
                find_modules_with_attrs(child)

    # Start the search from each major component
    find_modules_with_attrs(model_modules)


def toggle_cuda_graphs(model, set_to="none"):
    """
    Toggle CUDA graph-related attributes for the model and its modules.

    Args:
        set_to (str): Value to set for CUDA graph-related attributes.
    """
    global cuda_graph_attr_cache
    model_id = id(model)

    # Initialize cache if needed
    if cuda_graph_attr_cache is None or model_id not in cuda_graph_attr_cache:
        init_cuda_graph_cache(model)

    assert set_to in ["none", "local"], f"Invalid CUDA graph implementation: {set_to}"
    model.config.cuda_graph_impl = set_to

    # Collect all modules that have any of the CUDA graph attributes
    for attribute, modules in cuda_graph_attr_cache[model_id].items():
        if attribute == "cuda_graph_impl":
            for module in modules:
                setattr(module, attribute, set_to)
        elif attribute == "recompute_granularity":
            for module in modules:
                if set_to == "local":
                    # If we are turning on cuda graphs we need to turn of activation recomputation
                    setattr(module[0], attribute, None)
                else:
                    # If we are turning off cuda graphs we can set it to the cached value
                    setattr(module[0], attribute, module[1])
        # Cuda Graph manager case
        elif attribute == "cudagraph_manager":
            for module in modules:
                if set_to == "local":
                    # If we are not resetting cuda graphs we set it to its cached cuda graph
                    setattr(module[0], attribute, module[1])
                else:
                    for module in modules:
                        # If we are deleting the cuda graph, we delete its attribute
                        if hasattr(module[0], "cudagraph_manager"):
                            delattr(module[0], "cudagraph_manager")


def transition_moe_cudagraphs(model, scope: str):
    """
    Switch MoE layers to the given cudagraph scope. Flips between 'partial' and 'full'.

    Args:
        model: The model with MoE layers which will be transitioned.
        scope: 'partial' for training (router + postprocess captured, expert dispatch eager)
               or 'full' for inference (full-layer graph capture).
    """
    from megatron.core.transformer.transformer_layer import MoETransformerLayer

    for module in model.modules():
        if isinstance(module, MoETransformerLayer):
            module.transition_cudagraph_scope(scope)


def is_layer_window_attention(
    window_size: Optional[Tuple[int, int]], window_attn_skip_freq: int | list, layer_number: int
) -> bool:
    # layer_number is 1-indexed
    if not window_size:
        return False
    if window_attn_skip_freq is None:
        return True
    if isinstance(window_attn_skip_freq, int):
        return layer_number % window_attn_skip_freq != 0
    if isinstance(window_attn_skip_freq, list):
        return bool(window_attn_skip_freq[layer_number - 1])

    raise ValueError(
        f"Invalid `window_attn_skip_freq`: {type(window_attn_skip_freq)}, "
        f"{window_attn_skip_freq}"
    )


================================================
FILE: megatron/core/typed_torch.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
"""Utilities for improved type hinting with torch interfaces."""
from __future__ import annotations

import inspect
from collections.abc import Callable
from typing import Any, Concatenate, Generic, Literal, ParamSpec, Protocol, TypeVar, overload

import torch

P = ParamSpec('P')
R_co = TypeVar('R_co', covariant=True)
T = TypeVar('T')


class _Module(Generic[P, R_co], Protocol):
    """Protocol allowing us to unwrap `forward`."""

    def forward(self, *args: P.args, **kwargs: P.kwargs) -> R_co:
        """Forward method of the matching torch.nn.Module."""
        ...


def apply_module(m: _Module[P, R_co], *, check_subclass: bool = True) -> Callable[P, R_co]:
    """Returns the provided module unchanged, but with correct type hints.

    Args:
      m: An instance of a subclass of `torch.nn.Module`.
      check_subclass: If `True`, checks that `m` is a subclass of
            `torch.nn.Module` and raises a `TypeError` if not.

    Returns:
      That module unchanged, but with correct type hints.
    """
    if check_subclass and not issubclass(type(m), torch.nn.Module):
        raise TypeError(f'{type(m)} is not a subclass of torch.nn.Module')
    return m  # type: ignore


def not_none(value: T | None) -> T:
    """Asserts that the provided value is not None and returns it.

    Args:
        value: An optional value.

    Returns:
        The provided value, guaranteed to be not None.
    """
    if value is None:
        raise ValueError('Expected value to be not None')
    return value


R_src = TypeVar('R_src')
R_dst = TypeVar('R_dst')
P_src = ParamSpec('P_src')
P_dst = ParamSpec('P_dst')
First_dst = TypeVar('First_dst')


@overload
def copy_signature(
    source: Callable[P_src, Any],
    /,
    *,
    handle_return_type: Literal['preserve'] = 'preserve',
    handle_first_src_param: Literal['copy'] = 'copy',
    handle_first_dst_param: Literal['drop'] = 'drop',
) -> Callable[[Callable[..., R_dst]], Callable[P_src, R_dst]]: ...


@overload
def copy_signature(
    source: Callable[P_src, R_src],
    /,
    *,
    handle_return_type: Literal['overwrite'],
    handle_first_src_param: Literal['copy'] = 'copy',
    handle_first_dst_param: Literal['drop'] = 'drop',
) -> Callable[[Callable[..., Any]], Callable[P_src, R_src]]: ...


@overload
def copy_signature(
    source: Callable[Concatenate[Any, P_src], Any],
    /,
    *,
    handle_return_type: Literal['preserve'] = 'preserve',
    handle_first_src_param: Literal['skip'],
    handle_first_dst_param: Literal['drop'] = 'drop',
) -> Callable[[Callable[..., R_dst]], Callable[P_src, R_dst]]: ...


@overload
def copy_signature(
    source: Callable[Concatenate[Any, P_src], R_src],
    /,
    *,
    handle_return_type: Literal['overwrite'],
    handle_first_src_param: Literal['skip'],
    handle_first_dst_param: Literal['drop'] = 'drop',
) -> Callable[[Callable[..., Any]], Callable[P_src, R_src]]: ...


@overload
def copy_signature(
    source: Callable[P_src, Any],
    /,
    *,
    handle_return_type: Literal['preserve'] = 'preserve',
    handle_first_src_param: Literal['copy'] = 'copy',
    handle_first_dst_param: Literal['preserve'],
) -> Callable[
    [Callable[Concatenate[First_dst, ...], R_dst]], Callable[Concatenate[First_dst, P_src], R_dst]
]: ...


@overload
def copy_signature(
    source: Callable[P_src, R_src],
    /,
    *,
    handle_return_type: Literal['overwrite'],
    handle_first_src_param: Literal['copy'] = 'copy',
    handle_first_dst_param: Literal['preserve'],
) -> Callable[
    [Callable[Concatenate[First_dst, ...], Any]], Callable[Concatenate[First_dst, P_src], R_src]
]: ...


@overload
def copy_signature(
    source: Callable[Concatenate[Any, P_src], Any],
    /,
    *,
    handle_return_type: Literal['preserve'] = 'preserve',
    handle_first_src_param: Literal['skip'],
    handle_first_dst_param: Literal['preserve'],
) -> Callable[
    [Callable[Concatenate[First_dst, ...], R_dst]], Callable[Concatenate[First_dst, P_src], R_dst]
]: ...


@overload
def copy_signature(
    source: Callable[Concatenate[Any, P_src], R_src],
    /,
    *,
    handle_return_type: Literal['overwrite'],
    handle_first_src_param: Literal['skip'],
    handle_first_dst_param: Literal['preserve'],
) -> Callable[
    [Callable[Concatenate[First_dst, ...], Any]], Callable[Concatenate[First_dst, P_src], R_src]
]: ...


def copy_signature(
    source: Callable[..., Any],
    /,
    *,
    handle_return_type: Literal['preserve', 'overwrite'] = 'preserve',
    handle_first_src_param: Literal['copy', 'skip'] = 'copy',
    handle_first_dst_param: Literal['preserve', 'drop'] = 'drop',
):
    """Decorator to copy the signature from one function to another.

      Similar to `functools.wraps`, but preserves the signature instead of the
      metadata. Useful when writing adapter/wrapper functions that forward arguments
      to another function, as in:

          def function_with_lots_of_args(
              a: int,
              b: str,
              c: float,
              ...
          ) -> BigObject:
              ...

          @copy_signature(function_with_lots_of_args)
          def convenient_wrapper(*args: Any, **kwargs: Any) -> str:
              return function_with_lots_of_args(*args, **kwargs).to_string()

    Args:
        source: The function or callable from which to copy the signature.
        handle_return_type: How to handle the return type of the decorated
          function. 'preserve' to keep the decorated function's return type
          (the default, since many wrappers are specifically written to return a
          different type), or 'overwrite' to copy the source function's return
          type as well.
        handle_first_src_param: Whether to include the first parameter of the
          source function. 'copy' to include it in the decorated function's
          signature (the default), 'skip' to exclude it (useful for removing
          'self' or 'cls').
        handle_first_dst_param: Whether to keep the first parameter of the
          decorated function. 'drop' to overwrite it just like any other parameter
          (the default), or 'preserve' to keep it in the decorated function's
          signature (useful for preserving 'self' or 'cls').

      Returns:
          A decorator that copies the signature from `source` to the decorated function.
    """
    source_signature = inspect.signature(source)

    def decorator(decorated: Callable[..., Any], /) -> Callable[..., Any]:
        dest_signature = inspect.signature(decorated)
        new_params = []
        if handle_first_dst_param == 'preserve':
            new_params.append(next(iter(dest_signature.parameters.values())))
        src_params_iter = iter(source_signature.parameters.values())
        if handle_first_src_param == 'skip':
            next(src_params_iter)
        new_params.extend(src_params_iter)
        new_signature = dest_signature.replace(parameters=new_params)
        if handle_return_type == 'overwrite':
            new_signature = new_signature.replace(
                return_annotation=source_signature.return_annotation
            )

        decorated.__signature__ = new_signature  # type: ignore
        return decorated

    return decorator


================================================
FILE: megatron/core/utils.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

"""Utility functions used throughout Megatron core"""

import array
import asyncio
import functools
import hashlib
import inspect
import logging
import math
import operator
import queue
import socket
import sys
import threading
import time
import traceback
import warnings
from collections import defaultdict
from contextlib import contextmanager, nullcontext
from dataclasses import dataclass
from datetime import datetime
from functools import lru_cache, reduce, wraps
from importlib.metadata import version
from types import TracebackType
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union

import numpy
import torch

from megatron.core import config
from megatron.core._rank_utils import log_single_rank
from megatron.core.package_info import __version__ as mcore_version
from megatron.core.packed_seq_params import PackedSeqParams

try:
    from torch.distributed._tensor import DTensor
    from torch.distributed.tensor.placement_types import Shard

    HAVE_DTENSOR = True
except ImportError:
    HAVE_DTENSOR = False

from megatron.core import parallel_state
from megatron.core.dist_checkpointing.mapping import ShardedTensor

try:
    from packaging.version import Version as PkgVersion

    HAVE_PACKAGING = True
except ImportError:
    HAVE_PACKAGING = False

try:
    import nvtx

    HAVE_NVTX = True
except ImportError:
    HAVE_NVTX = False

logger = logging.getLogger(__name__)

try:
    # Register the TE CUDA kernels
    import transformer_engine  # pylint: disable=unused-import

    # Alias the PyTorch wrapper so we can call tex.* APIs
    import transformer_engine_torch as tex
except ImportError:
    # TE isn’t installed or the torch wrapper is missing
    tex = None

try:
    _torch_version = PkgVersion(torch.__version__)
except Exception:
    # This is a WAR for building docs, where torch is not actually imported
    _torch_version = PkgVersion("0.0.0") if HAVE_PACKAGING else "0.0.0"
_te_version = None
_fa_version = None
_flashinfer_version = None
_mamba_ssm_version = None
_causal_conv1d_version = None


@contextmanager
def null_decorator(*args, **kwargs):
    """
    No-op decorator.
    """
    if len(kwargs) == 0 and len(args) == 1 and callable(args[0]):
        return args[0]
    else:

        def inner(func):
            return func

        return inner


class ExperimentalNotEnabledError(Exception):
    """Raised during calls to experimental code when ENABLE_EXPERIMENTAL not set."""


def experimental_fn(introduced_with_version: str):
    """A decorator that marks a function as experimental.
    Experimental functions may change quickly and do not guarantee backwards
    compatiblity.

    Experimental functions have a limited lifetime and should
    either be productionized or deprecated.

    Args:
        introduced_with_version (str): A version-like string of Mcore at time of
            introduction.

    Raises:
        ExperimentalNotEnabledError: Error raised when experimental function
            was called without enabling the experimental flag.
    """
    logged_functions = set()

    def validator(func: Callable, max_lifetime: int = 3) -> Callable:
        """Validates the request to the experimental function.

        Args:
            func (Callable): Callee
            max_lifetime (int, optional): Number of minor version that the experimental
                function is allowed to exist. Defaults to 3.

        Raises:
            ExperimentalNotEnabledError: Error raised when experimental function
                was called without enabling the experimental flag.

        Returns:
            Callable: The callee function.
        """
        if not HAVE_PACKAGING:
            raise ImportError(
                "packaging is not installed. Please install it with `pip install packaging`."
            )
        if (
            PkgVersion(introduced_with_version).minor + max_lifetime
            < PkgVersion(mcore_version).minor
        ):
            log_single_rank(
                logger,
                logging.WARNING,
                "%s has reached end of life. Please migrate to a non-experimental function.",
                func.__name__,
            )

        @wraps(func)
        def wrapped_func(*args, **kwargs):
            if config.is_experimental_enabled() is not True:
                raise ExperimentalNotEnabledError(f"Flag config.ENABLE_EXPERIMENTAL not enabled.")
            # log once on one rank
            if func.__name__ not in logged_functions:
                logged_functions.add(func.__name__)
                log_single_rank(
                    logger, logging.INFO, "ENABLE_EXPERIMENTAL is True, running experimental code."
                )

            return func(*args, **kwargs)

        return wrapped_func

    return validator


def experimental_cls(introduced_with_version: str):
    """A decorator that marks a Class as experimental.
    Experimental Classes may change quickly and do not guarantee backwards
    compatiblity.

    Experimental classes have a limited lifetime and should
    either be productionized or deprecated.

    Args:
        introduced_with_version (str): A version-like string of Mcore at time of
            introduction.

    Raises:
        ExperimentalNotEnabledError: Error raised when experimental class
            was called without enabling the experimental flag.
    """
    logged_classes = set()

    def validator(cls: Callable, max_lifetime: int = 3) -> Callable:
        """Validates the request to the experimental function.

        Args:
            func (Callable): Callee
            max_lifetime (int, optional): Number of minor version that the experimental
                function is allowed to exist. Defaults to 3.

        Raises:
            ExperimentalNotEnabledError: Error raised when experimental function
                was called without enabling the experimental flag.

        Returns:
            Callable: The callee function.
        """
        if not HAVE_PACKAGING:
            raise ImportError(
                "packaging is not installed. Please install it with `pip install packaging`."
            )

        if (
            PkgVersion(introduced_with_version).minor + max_lifetime
            < PkgVersion(mcore_version).minor
        ):
            log_single_rank(
                logger,
                logging.WARNING,
                "%s has reached end of life. Please migrate to a non-experimental function.",
                cls.__name__,
            )

        def wrapped_func(cls):
            def guard(super: super, attr: str):
                """Pass-through to callee attribute if experimental flag is enabled.

                Args:
                    super (super): Parent class of callee.
                    attr (str): Attribute of callee that is being called.

                Raises:
                    ExperimentalNotEnabledError: Raised if flag is not set.

                Returns:
                    Attribute of callee.
                """
                if attr == "is_experimental":
                    return config.is_experimental_enabled()

                if config.is_experimental_enabled() is not True:
                    raise ExperimentalNotEnabledError(
                        f"Flag config.ENABLE_EXPERIMENTAL not enabled."
                    )
                # log once on one rank
                if cls.__name__ not in logged_classes:
                    logged_classes.add(cls.__name__)
                    log_single_rank(
                        logger,
                        logging.INFO,
                        "ENABLE_EXPERIMENTAL is True, running experimental code.",
                    )
                return super.__getattribute__(attr)

            class ClassInterceptor(type):
                """Metaclass to intercept calls from the uninitialized class."""

                def __init__(self, *args, **kwargs):
                    super().__init__(*args, **kwargs)
                    self.__class__ = type(cls.__qualname__, (ClassInterceptor,), {})

                def __getattribute__(self, attr):
                    """Intercepts calls like A.hello_world()"""
                    return guard(super(), attr)

            class Proxy(cls, metaclass=ClassInterceptor):
                """Proxies calls from caller to the callee by relaying all
                attribute calls through a guarding mechanism.

                We use `__getattribute__` for relaying calls. Opposed to `__getattr__`,
                this is called regardless of whether the attribute exists or not.

                We need to distinguish two cases: callee is an instance vs. a class.

                If callee is an instance, `__getattribute__` will look and find attributes
                at the class level.

                If callee is a class, `__getattribute__` will look for attributes at
                _its_ class, which is `type`. Here, it won't find attributes.
                We solve this a metaclass mixin which swaps `type` with a custom class
                that supersets the callee's class. For mixins, any methods provided on
                parent classes will be provided to the metaclass. We add a
                `__getattribute__` to the metaclass as to allow it to fetch it from the
                callees class.

                """

                def __init__(self, *args, **kwargs):
                    super().__init__(*args, **kwargs)
                    self.__class__ = type(cls.__qualname__, (Proxy,), {})

                def __getattribute__(self, attr):
                    """Intercepts calls like a.hello_world()"""
                    return guard(super(), attr)

            return Proxy

        return wrapped_func(cls)

    return validator


def get_te_version():
    """Get TE version from __version__; if not available use pip's. Use caching."""
    if not HAVE_PACKAGING:
        raise ImportError(
            "packaging is not installed. Please install it with `pip install packaging`."
        )

    try:
        import transformer_engine as te

        HAVE_TE = True
    except ImportError:
        HAVE_TE = False

    def get_te_version_str():
        import transformer_engine as te

        if hasattr(te, "__version__"):
            return str(te.__version__)
        else:
            return version("transformer-engine")

    global _te_version
    if _te_version is None:
        if HAVE_TE:
            _te_version = PkgVersion(get_te_version_str())
        else:
            _te_version = PkgVersion("0.0.0")
    return _te_version


def is_te_min_version(version, check_equality=True):
    """Check if minimum version of `transformer-engine` is installed."""
    if not HAVE_PACKAGING:
        raise ImportError(
            "packaging is not installed. Please install it with `pip install packaging`."
        )

    if check_equality:
        return get_te_version() >= PkgVersion(version)
    return get_te_version() > PkgVersion(version)


def get_torch_version():
    """Get torch version from __version__."""

    global _torch_version
    return _torch_version


def is_torch_min_version(version, check_equality=True):
    """Check if minimum version of `torch` is installed."""
    if not HAVE_PACKAGING:
        raise ImportError(
            "packaging is not installed. Please install it with `pip install packaging`."
        )
    if check_equality:
        return get_torch_version() >= PkgVersion(version)
    return get_torch_version() > PkgVersion(version)


def get_fa_version():
    """Get Flash attention version from __version__; if not available use pip's. Use caching."""
    if not HAVE_PACKAGING:
        raise ImportError(
            "packaging is not installed. Please install it with `pip install packaging`."
        )

    def get_fa_version_str():
        import flash_attn as fa

        if hasattr(fa, "__version__"):
            return str(fa.__version__)
        else:
            return version("flash-attn")

    global _fa_version
    if _fa_version is None:
        _fa_version = PkgVersion(get_fa_version_str())
    return _fa_version


def is_fa_min_version(version, check_equality=True):
    """Check if minimum version of `flash-attn` is installed."""
    if not HAVE_PACKAGING:
        raise ImportError(
            "packaging is not installed. Please install it with `pip install packaging`."
        )
    if check_equality:
        return get_fa_version() >= PkgVersion(version)
    return get_fa_version() > PkgVersion(version)


def get_mamba_version():
    """Get mamba version from __version__; if not available use pip's. Use caching."""
    if not HAVE_PACKAGING:
        raise ImportError(
            "packaging is not installed. Please install it with `pip install packaging`."
        )

    def get_mamba_version_str():
        import mamba_ssm

        if hasattr(mamba_ssm, "__version__"):
            return str(mamba_ssm.__version__)
        else:
            return version("mamba_ssm")

    global _mamba_ssm_version
    if _mamba_ssm_version is None:
        _mamba_ssm_version = PkgVersion(get_mamba_version_str())
    return _mamba_ssm_version


def is_mamba_min_version(version, check_equality=True):
    """Check if minimum version of `mamba_ssm` is installed."""
    if not HAVE_PACKAGING:
        raise ImportError(
            "packaging is not installed. Please install it with `pip install packaging`."
        )
    if check_equality:
        return get_mamba_version() >= PkgVersion(version)
    return get_mamba_version() > PkgVersion(version)


def get_causal_conv1d_version():
    """Get causal_conv1d version from __version__; if not available use pip's. Use caching."""
    if not HAVE_PACKAGING:
        raise ImportError(
            "packaging is not installed. Please install it with `pip install packaging`."
        )

    def get_causal_conv1d_version_str():
        import causal_conv1d

        if hasattr(causal_conv1d, "__version__"):
            return str(causal_conv1d.__version__)
        else:
            return version("causal_conv1d")

    global _causal_conv1d_version
    if _causal_conv1d_version is None:
        _causal_conv1d_version = PkgVersion(get_causal_conv1d_version_str())
    return _causal_conv1d_version


def is_causal_conv1d_min_version(version, check_equality=True):
    """Check if minimum version of `causal_conv1d` is installed."""
    if not HAVE_PACKAGING:
        raise ImportError(
            "packaging is not installed. Please install it with `pip install packaging`."
        )
    if check_equality:
        return get_causal_conv1d_version() >= PkgVersion(version)
    return get_causal_conv1d_version() > PkgVersion(version)


def get_flashinfer_version():
    """Get flashinfer version from __version__; if not available use pip's. Use caching."""
    if not HAVE_PACKAGING:
        raise ImportError(
            "packaging is not installed. Please install it with `pip install packaging`."
        )

    def get_flashinfer_version_str():
        try:
            import flashinfer
        except ImportError:
            return None

        if hasattr(flashinfer, "__version__"):
            return str(flashinfer.__version__)
        else:
            return version("flashinfer")

    global _flashinfer_version
    if _flashinfer_version is None:
        if (flashinfer_version_str := get_flashinfer_version_str()) is not None:
            _flashinfer_version = PkgVersion(flashinfer_version_str)
    return _flashinfer_version


def is_flashinfer_min_version(version, check_equality=True):
    """Check if minimum version of `flashinfer` is installed."""
    if not HAVE_PACKAGING:
        raise ImportError(
            "packaging is not installed. Please install it with `pip install packaging`."
        )
    if (flashinfer_version := get_flashinfer_version()) is None:
        return False
    if check_equality:
        return flashinfer_version >= PkgVersion(version)
    return flashinver_version > PkgVersion(version)


def ensure_divisibility(numerator, denominator):
    """Ensure that numerator is divisible by the denominator."""
    assert numerator % denominator == 0, "{} is not divisible by {}".format(numerator, denominator)


def divide(numerator, denominator):
    """Ensure that numerator is divisible by the denominator and return
    the division value."""
    ensure_divisibility(numerator, denominator)
    return numerator // denominator


def get_tensor_model_parallel_group_if_none(tp_group, is_expert=False, check_initialized=True):
    """Issue a deprecation warning if tp_group is None and return the default tp group."""
    # TODO(zijiey): remove this function later.
    if not torch.distributed.is_initialized():
        return None

    # if parallel_state is not initialized, pass `tp_group` thru
    if not parallel_state.is_initialized():
        return tp_group

    if tp_group is None:
        if torch.distributed.is_initialized() and torch.distributed.get_rank() == 0:
            warnings.warn(
                "Warning: tp_group is None, using default tp group. "
                "Passing tp_group will be mandatory soon",
                DeprecationWarning,
                stacklevel=2,
            )
        if is_expert:
            tp_group = parallel_state.get_expert_tensor_parallel_group(
                check_initialized=check_initialized
            )
        else:
            tp_group = parallel_state.get_tensor_model_parallel_group(
                check_initialized=check_initialized
            )
    return tp_group


def get_pg_size(group=None):
    """Get world size for a distributed group.

    Args:
        group: Process group to get world size for. If None, uses default group.

    Returns:
        int: World size (1 if distributed not initialized or group is None, else group.size())
    """
    if not torch.distributed.is_initialized() or group is None:
        return 1
    return group.size()


def get_pg_rank(group=None):
    """Get rank for a distributed group.

    Args:
        group: Process group to get rank for. If None, uses default group.

    Returns:
        int: Rank (0 if distributed not initialized or group is None, else group.rank())
    """
    if not torch.distributed.is_initialized() or group is None:
        return 0
    return group.rank()


def get_pg_src_rank(group=None):
    """Calculate the global rank corresponding to the first local rank
    in the given process group.

    Args:
        group: Process group to query. If None or distributed is not initialized,
            returns 0.

    Returns:
        int: The first (source) global rank in the group.
    """
    if not torch.distributed.is_initialized() or group is None:
        return 0
    ranks = torch.distributed.get_process_group_ranks(group)
    return ranks[0]


def get_attr_wrapped_model(model, attr, allow_none=True, return_model_obj=False):
    """Get an attribute from a wrapped model.
    If return_model_obj is true, return the object that has the 'attr' attribute;
    otherwise, return the attribute directly."""
    if isinstance(model, list):
        raise RuntimeError("_get_attr_wrapped_model given a list of models")

    if allow_none:

        def condition(model, attr):
            return not hasattr(model, attr)

    else:

        def condition(model, attr):
            return getattr(model, attr, None) is None

    while condition(model, attr):
        if not hasattr(model, "module"):
            raise RuntimeError(f"_get_attr_wrapped_model couldn't find attribute {attr}")

        model = model.module

    if return_model_obj:
        return model
    return getattr(model, attr)


def get_model_type(model):
    """Returns model_type attribute"""
    return get_attr_wrapped_model(model, "model_type")


def get_model_xattn(model):
    """Returns whether the model has the xattn_needed attribute"""
    try:
        return get_attr_wrapped_model(model, "xattn_needed")
    except RuntimeError:
        return False


def get_model_config(model):
    """Returns the config attribute, allowed to return None"""
    return get_attr_wrapped_model(model, "config", allow_none=False)


class GlobalMemoryBuffer:
    """Global buffer to avoid dynamic memory allocations.
    Caller should ensure that buffers of the same name
    are not used concurrently."""

    def __init__(self):
        self.buffer = {}

    def get_tensor(self, tensor_shape, dtype, name, mem_alloc_context: Optional[Callable] = None):
        """
        Returns (potentially) a sub-tensor from the self.buffer for the given shape.
        """
        required_len = reduce(operator.mul, tensor_shape, 1)
        if (
            self.buffer.get((name, dtype), None) is None
            or self.buffer[(name, dtype)].numel() < required_len
        ):
            mem_alloc_context = mem_alloc_context if mem_alloc_context else nullcontext
            with mem_alloc_context():
                self.buffer[(name, dtype)] = torch.empty(
                    required_len,
                    dtype=dtype,
                    device=torch.cuda.current_device(),
                    requires_grad=False,
                )

        return self.buffer[(name, dtype)][0:required_len].view(*tensor_shape)


def _kernel_make_viewless_tensor(inp, requires_grad):
    """Make a viewless tensor.

    View tensors have the undesirable side-affect of retaining a reference
    to the originally-viewed tensor, even after manually setting the '.data'
    field. This method creates a new tensor that links to the old tensor's
    data, without linking the viewed tensor, referenced via the '._base'
    field.
    """
    out = torch.empty((1,), dtype=inp.dtype, device=inp.device, requires_grad=requires_grad)
    out.data = inp.data
    return out


class WrappedTensor:
    """
    A wrapper for tensors that enables caller functions to pass an indirect reference
    to callee functions. By wrapping the tensor, the caller's direct reference is removed,
    allowing the tensor to be garbage collected once the callee unwraps and frees it.
    """

    def __init__(self, tensor: torch.Tensor):
        self._wrapper = [tensor]

    def unwrap(self):
        """
        Returns the wrapped tensor while deleting the internal reference.
        Can only be called once.
        """
        if len(self._wrapper) == 0:
            raise RuntimeError(f"WrappedTensor has already been unwrapped")
        return self._wrapper.pop(0)


class MakeViewlessTensor(torch.autograd.Function):
    """
    Autograd function to make a viewless tensor.

    This function should be used in cases where the computation graph needs
    to be propagated, but we only want a viewless tensor (e.g.,
    ParallelTransformer's hidden_states). Call this function by passing
    'keep_graph = True' to 'make_viewless_tensor()'.
    """

    @staticmethod
    def forward(ctx, inp, requires_grad):
        """Runs the fwd pass of _kernel_make_viewless_tensor"""
        return _kernel_make_viewless_tensor(inp, requires_grad)

    @staticmethod
    def backward(ctx, grad_output):
        """No-op"""
        return grad_output, None


def make_viewless_tensor(inp, requires_grad, keep_graph):
    """
    Entry-point for creating viewless tensors.

    This method should be used, rather than calling 'MakeViewlessTensor'
    or '_kernel_make_viewless_tensor' directly. This method acts as a
    switch for determining if an autograd function or a regular method
    should be used to create the tensor.
    """

    # return tensor as-is, if not a 'view'
    if inp._base is None:
        return inp

    # create viewless tensor
    if keep_graph:
        return MakeViewlessTensor.apply(inp, requires_grad)
    else:
        return _kernel_make_viewless_tensor(inp, requires_grad)


def assert_viewless_tensor(tensor, extra_msg=None):
    """Assert that a tensor is not a view (i.e., its '._base' field is
    not set)."""
    if isinstance(tensor, list):
        [assert_viewless_tensor(t) for t in tensor]
        return tensor
    if not isinstance(tensor, torch.Tensor):
        return tensor
    assert tensor._base is None, (
        "Ensure tensor._base is None before setting tensor.data or storing "
        "tensor to memory buffer. Otherwise, a memory leak will occur (and "
        f"likely accumulate over iterations). {extra_msg}"
    )
    return tensor


def safely_set_viewless_tensor_data(tensor, new_data_tensor):
    """Safely set tensor's '.data' field.

    Check first that the tensor is viewless (i.e., '._base' not set). If not,
    raise an exception.
    """
    assert_viewless_tensor(
        tensor,
        extra_msg="FYI, tensor._base has shape %s, and new_data_tensor has shape %s."
        % ("--" if tensor._base is None else tensor._base.shape, new_data_tensor.shape),
    )
    tensor.data = new_data_tensor


def init_method_normal(sigma):
    """Init method based on N(0, sigma)."""
    return functools.partial(torch.nn.init.normal_, mean=0.0, std=sigma)


def scaled_init_method_normal(sigma, num_layers, multiplier=2.0):
    """Init method based on N(0, sigma/sqrt(2*num_layers)."""
    std = sigma / math.sqrt(multiplier * num_layers)

    return functools.partial(torch.nn.init.normal_, mean=0.0, std=std)


def mup_scaled_init_method_normal(sigma, num_layers, width_mult, multiplier=2.0):
    """MuP scaled init method for output layers: N(0, sigma / (sqrt(2*L) * sqrt(m))).

    Combines the standard scaled initialization (for output projection layers)
    with MuP width scaling. This ensures that both depth and width scaling
    are accounted for in the initialization.

    Args:
        sigma (float): Base standard deviation for initialization.
        num_layers (int): Number of transformer layers.
        width_mult (float): Width multiplier (hidden_size / base_hidden_size).
        multiplier (float): Multiplier for depth scaling (default: 2.0).

    Returns:
        Callable: Initialization function for torch.nn.init.
    """
    std = sigma / (math.sqrt(multiplier * num_layers) * math.sqrt(width_mult))
    return functools.partial(torch.nn.init.normal_, mean=0.0, std=std)


def log_on_each_pipeline_stage(
    logger: logging.Logger,
    *args: Any,
    tp_group: Optional[torch.distributed.ProcessGroup] = None,
    dp_cp_group: Optional[torch.distributed.ProcessGroup] = None,
    **kwargs: Any,
):
    """Log on first rank in each pipeline stage

    Args:
        logger (logging.Logger): The logger to write the logs

        args (Tuple[Any]): All logging.Logger.log positional arguments

        kwargs (Dict[str, Any]): All logging.Logger.log keyword arguments
    """
    assert torch.distributed.is_initialized()

    if tp_group is None and dp_cp_group is None:
        tp_rank = parallel_state.get_tensor_model_parallel_rank()
        dp_cp_rank = parallel_state.get_data_parallel_rank(with_context_parallel=True)
    elif tp_group is not None and dp_cp_group is not None:
        tp_rank = tp_group.rank()
        dp_cp_rank = dp_cp_group.rank()
    else:
        raise ValueError("tp_group and dp_cp_group must be provided or not provided together")

    if tp_rank == 0 and dp_cp_rank == 0:
        logger.log(*args, **kwargs)


def check_param_hashes_across_dp_replicas(
    model: List[torch.nn.Module], cross_check: bool = False
) -> bool:
    """Computes hashes of all parameters in model, all-gathers hashes across DP replicas,
    and then checks for equality between the locally-computed hashes and those of other ranks.

    NOTE: This function computes SHA-1 hashes on the CPU and thus needs to move all param
    tensors from GPU to CPU first; as a result, this function is not intended to be called
    very frequently in the main training loop.

    Args:
        model (List[torch.nn.Module]): List of model chunks whose parameter hashes need to
            be checked.
        cross_check (bool): If true, will check whether hashes match across all DP replicas.

    Returns:
        True if all param hashes match with corresponding hash on DP replica 0 or
        across all replicas if cross_check is enabled, False otherwise.
    """

    # Compute per-parameter hashes on this rank.
    # Keep track of expert and non-expert parameters separately since they need to be
    # all-gathered across different sets of ranks.
    non_expert_params, expert_params = [], []
    local_non_expert_param_hashes, local_expert_param_hashes = [], []
    for model_chunk_id, model_chunk in enumerate(model):
        for param_name, param in model_chunk.named_parameters():
            param_hash = torch.frombuffer(
                array.array(
                    "B", hashlib.sha1(param.data.to("cpu").float().numpy(force=True)).digest()
                ),
                dtype=torch.uint8,
            )
            if getattr(param, "allreduce", True):
                non_expert_params.append((model_chunk_id, param_name, param))
                local_non_expert_param_hashes.append(param_hash)
            else:
                expert_params.append((model_chunk_id, param_name, param))
                local_expert_param_hashes.append(param_hash)

    # Use data-modulo-expert parallel group to all-gather expert param hashes, regular
    # data-parallel group for non-expert param hashes.
    all_param_hashes_match = True
    for params, local_param_hashes, all_gather_group in zip(
        [non_expert_params, expert_params],
        [local_non_expert_param_hashes, local_expert_param_hashes],
        [parallel_state.get_data_parallel_group(), parallel_state.get_expert_data_parallel_group()],
    ):
        # Collect per-parameter hashes across all ranks in group.
        assert len(params) == len(local_param_hashes)
        if len(params) == 0:
            continue
        local_param_hashes = torch.stack(local_param_hashes).cuda()
        all_param_hashes = [
            torch.zeros_like(local_param_hashes) for _ in range(all_gather_group.size())
        ]
        torch.distributed.all_gather(all_param_hashes, local_param_hashes, group=all_gather_group)

        # Make sure local per-parameter hash matches DP rank 0.
        param_hashes_match = torch.equal(local_param_hashes, all_param_hashes[0])
        if not param_hashes_match:
            for i, (model_chunk_id, param_name, param) in enumerate(params):
                if not torch.equal(local_param_hashes[i], all_param_hashes[0][i]):
                    rank = torch.distributed.get_rank()
                    logger.info(
                        f"[Rank {rank}] Hash not matching for {param_name} in model chunk"
                        f"{model_chunk_id}"
                    )
        if cross_check:
            # Make sure all ranks have the same hash.
            all_param_hashes_match &= all(
                map(lambda x: torch.equal(local_param_hashes, x), all_param_hashes)
            )
        else:
            all_param_hashes_match &= param_hashes_match

    return all_param_hashes_match


def make_tp_sharded_tensor_for_checkpoint(
    tensor, key, tp_axis=0, replica_id=None, prepend_offsets=(), **kwargs
):
    """Helper for instantiating a ShardedTensor where the `tp_axis` dimension
    is sharded across TP group.

    Optionally, can provide offsets which prepend new dimensions to the tensor.

    Args:
        tensor: Tensor to shard
        key: Key for the sharded tensor
        tp_axis: Axis to shard across tensor parallel group (default: 0)
        replica_id: Replica ID for the tensor (default: None)
        prepend_offsets: Offsets to prepend to tensor dimensions (default: ())
        **kwargs: Additional arguments. May include:
            - tp_group: Tensor parallel group (default: None, falls back to parallel_state)
            - dp_cp_group: Data parallel + context parallel group
              (default: None, falls back to parallel_state)
    """
    # Pop group parameters from kwargs
    tp_group = kwargs.pop('tp_group', None)
    dp_cp_group = kwargs.pop('dp_cp_group', None)

    prepend_axis_num = len(prepend_offsets)

    new_offsets = []

    # Get groups with fallback to parallel_state
    if tp_group is None and dp_cp_group is None:
        tp_group = parallel_state.get_tensor_model_parallel_group()
        dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)

    # Use local get_pg_rank and get_pg_size functions
    tp_rank = get_pg_rank(tp_group)
    dp_rank = get_pg_rank(dp_cp_group)
    tp_size = get_pg_size(tp_group)
    dp_size = get_pg_size(dp_cp_group)
    dp_replica_id = get_pg_rank(dp_cp_group)

    new_offsets.append((tp_axis + prepend_axis_num, tp_rank, tp_size))

    if HAVE_DTENSOR and isinstance(tensor, DTensor):
        # TP + FSDP2 sharding
        dp_replica_id = 0
        tensor = tensor._local_tensor

        if tp_axis == 0:
            # both FSDP2 and TP shards axis 0
            # default MCore uses tp-cp-ep-dp-pp
            # FSDP2 is compatibile with TP, CP
            new_offsets[0] = (prepend_axis_num, tp_rank * dp_size + dp_rank, tp_size * dp_size)
        else:
            # FSDP2 shards axis 0 and TP shards some other axis
            new_offsets.append((prepend_axis_num, dp_rank, dp_size))

    if replica_id is None:
        replica_id = (0, 0, dp_replica_id)

    return ShardedTensor.from_rank_offsets(
        key,
        tensor,
        *prepend_offsets,
        *new_offsets,
        replica_id=replica_id,
        prepend_axis_num=prepend_axis_num,
        **kwargs,
    )


def make_sharded_tensor_for_checkpoint(tensor, key, prepend_offsets=(), replica_id=None, **kwargs):
    """Helper for instantiating a non-sharded ShardedTensor (replicated across TP and DP group).

    Optionally, can provide offsets which prepend new dimensions to the tensor.

    Keyword Args:
        tensor: Tensor to create sharded tensor for
        key: Key for the sharded tensor
        prepend_offsets: Offsets to prepend to tensor dimensions (default: ())
        replica_id: Replica ID for the tensor (default: None)
        **kwargs: Additional arguments. May include:
            - tp_group: Tensor parallel group (default: None, falls back to parallel_state)
            - dp_cp_group: Data parallel + context parallel group
              (default: None, falls back to parallel_state)
    """
    # Pop group parameters from kwargs
    tp_group = kwargs.pop('tp_group', None)
    dp_cp_group = kwargs.pop('dp_cp_group', None)

    prepend_axis_num = len(prepend_offsets)

    new_offsets = []

    # Get groups with fallback to parallel_state
    if tp_group is None and dp_cp_group is None:
        tp_group = parallel_state.get_tensor_model_parallel_group()
        dp_cp_group = parallel_state.get_data_parallel_group(with_context_parallel=True)

    # Use local get_pg_rank and get_pg_size functions
    dp_rank = get_pg_rank(dp_cp_group)
    dp_size = get_pg_size(dp_cp_group)
    dp_replica_id = get_pg_rank(dp_cp_group)

    if HAVE_DTENSOR and isinstance(tensor, DTensor):
        # FSDP2 sharding
        dp_replica_id = 0
        tensor = get_full_tensor_if_necessary(tensor)
        new_offsets.append((prepend_axis_num, dp_rank, dp_size))

    if replica_id is None:
        replica_id = (0, get_pg_rank(tp_group), dp_replica_id)

    return ShardedTensor.from_rank_offsets(
        key,
        tensor,
        *prepend_offsets,
        *new_offsets,
        replica_id=replica_id,
        prepend_axis_num=prepend_axis_num,
        **kwargs,
    )


def get_full_tensor_if_necessary(tensor):
    """For DTensor gets full tensor if some ranks will not have a local copy"""
    need_full_tensor = False
    for i in range(tensor.device_mesh.ndim):
        if (
            isinstance(tensor.placements[i], Shard)
            and tensor.device_mesh.shape[i] > tensor.shape[tensor.placements[i].dim]
        ):
            need_full_tensor = True
            break

    tensor = tensor.full_tensor() if need_full_tensor else tensor._local_tensor

    return tensor


def to_local_if_dtensor(tensor: Union[torch.Tensor, "DTensor"]) -> torch.Tensor:
    """Returns the local shard of the given tensor if it is a DTensor."""
    with torch.no_grad():
        return tensor.to_local() if HAVE_DTENSOR and isinstance(tensor, DTensor) else tensor


def get_data_parallel_group_if_dtensor(
    tensor: Union[torch.Tensor, "DTensor"], data_parallel_group: "ProcessGroup" = None
) -> Optional["ProcessGroup"]:
    """Gets the data parallel group of the given tensor if it is a DTensor."""
    if HAVE_DTENSOR and isinstance(tensor, DTensor):
        current_group = tensor.device_mesh.get_group()
        assert data_parallel_group is None or current_group == data_parallel_group
        return current_group
    return None


def prepare_input_tensors_for_wgrad_compute(grad_output, all_gathered_input):
    """Ensure grad_output is stored in a contiguous buffer."""
    # Doing gather + slicing during the NeMo forward pass can make this tensor
    # not be contiguous. PyTorch only checks if the tensor is contiguous, and only
    # clones it if it's not contiguous:
    # https://github.com/pytorch/pytorch/blob/c47cf9bc7f9e02f649ab4ed53fe4d35732c92ab6/torch/_refs/__init__.py#L2761
    grad_output = grad_output.contiguous()
    all_gathered_input = all_gathered_input.contiguous()
    # Convert the tensor shapes to 2D for execution compatibility
    if grad_output.dim() == 3:
        grad_output = grad_output.view(
            grad_output.shape[0] * grad_output.shape[1], grad_output.shape[2]
        )
        all_gathered_input = all_gathered_input.view(
            all_gathered_input.shape[0] * all_gathered_input.shape[1], all_gathered_input.shape[2]
        )

    return grad_output, all_gathered_input


try:
    if is_torch_min_version("1.13.0"):
        dist_all_gather_func = torch.distributed.all_gather_into_tensor
    else:
        dist_all_gather_func = torch.distributed._all_gather_base
except Exception:
    dist_all_gather_func = torch.distributed._all_gather_base


def drain_embedding_wgrad_compute(
    config, embedding_activation_buffer, grad_output_buffer, weight, tp_group
):
    """Helper for performing embedding wgrad GEMM's during the pipeline drain phase, pipelines the
    AllGather and GEMM's.

    Should only be used when pipeline model parallelism and gradient accumulation
    fusion are enabled.
    """

    assert len(embedding_activation_buffer) == len(
        grad_output_buffer
    ), "Length of activation and gradient buffers need to be equal!"

    import fused_weight_gradient_mlp_cuda

    from megatron.core.parallel_state import get_global_memory_buffer

    input = embedding_activation_buffer.pop(0)
    world_size = tp_group.size()
    dim_size = list(input.size())
    dim_size[0] = dim_size[0] * world_size

    all_gathered_input = [None, None]
    if config.sequence_parallel:
        all_gather_buffer = get_global_memory_buffer().get_tensor(dim_size, input.dtype, "mpu_0")
        handle = dist_all_gather_func(all_gather_buffer, input, group=tp_group, async_op=False)

        all_gathered_input[0] = all_gather_buffer
        all_gather_buffer = None
    else:
        all_gathered_input[0] = input

    input = None

    def wgrad_compute(all_gathered_input, grad_output, weight):
        grad_output, all_gathered_input = prepare_input_tensors_for_wgrad_compute(
            grad_output, all_gathered_input
        )

        if hasattr(weight, "__fsdp_param__"):
            weight.main_grad = weight.get_main_grad()

        if config.gradient_accumulation_fusion:
            if weight.main_grad.dtype == torch.float32:
                fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp32(
                    all_gathered_input, grad_output, weight.main_grad
                )
            elif weight.main_grad.dtype in (torch.float16, torch.bfloat16):
                fused_weight_gradient_mlp_cuda.wgrad_gemm_accum_fp16(
                    all_gathered_input, grad_output, weight.main_grad
                )
            else:
                raise RuntimeError("Unsupported gradient type for gradient accumulation fusion")

    # We have all_gathered_input list acting as a double buffer here,
    # since we are pipelining the AllGather and GEMM,one buffer all gathers
    # the input while the other buffer reads from it for the GEMM. We use i
    # and (i+1) for indexing to enable this double buffering.
    for i in range(len(embedding_activation_buffer)):
        input = embedding_activation_buffer.pop(0)
        if config.sequence_parallel:
            name = "mpu_" + str((i + 1) % 2)
            all_gather_buffer = get_global_memory_buffer().get_tensor(dim_size, input.dtype, name)
            handle = dist_all_gather_func(all_gather_buffer, input, group=tp_group, async_op=True)

            all_gathered_input[(i + 1) % 2] = all_gather_buffer
            all_gather_buffer = None
        else:
            all_gathered_input[(i + 1) % 2] = input

        grad_output = grad_output_buffer.pop(0)
        wgrad_compute(all_gathered_input[i % 2], grad_output, weight)
        drain_idx = (i + 1) % 2
        input, all_gathered_input[i % 2], grad_output = None, None, None

        if config.sequence_parallel:
            handle.wait()

    grad_output = grad_output_buffer.pop(0)
    wgrad_compute(all_gathered_input[drain_idx], grad_output, weight)
    input, all_gathered_input[drain_idx], grad_output = None, None, None


def local_multi_tensor_applier(op, noop_flag_buffer, tensor_lists, *args):
    """Multi tensor op applier"""
    return op(2048 * 32, noop_flag_buffer, tensor_lists, *args)


# computes l2 norm for a list of contiguous tensors
# works as a drop-in replacement for amp_C.multi_tensor_l2norm
def local_multi_tensor_l2_norm(chunk_size, noop_flag, tensor_lists, per_tensor, *args):
    """
    Computes l2 norm for a list of contiguous tensors
    works as a drop-in replacement for amp_C.multi_tensor_l2norm
    """
    l2 = [[(torch.norm(tensor)) for tensor in tensor_list] for tensor_list in tensor_lists]
    l2_reduced = torch.norm(torch.tensor(l2))
    l2_cuda = torch.tensor([float(l2_reduced)], dtype=torch.float, device="cuda")
    return l2_cuda, None


# works as a drop-in replacement for amp_C.multi_tensor_scale
def local_multi_tensor_scale(chunk_size, noop_flag, tensor_lists, scale):
    """Works as a drop-in replacement for amp_C.multi_tensor_scale."""
    for src, dst in zip(tensor_lists[0], tensor_lists[1]):
        dst.copy_(src * scale)


class _ValueWithRank:
    """This is an internal class, not for use outside this module

    Attributes:
        _rank (int): rank for the value
        _value (float) : the value it stores, eg elapsed time
        _unit (str) : unit for the value
    """

    def __init__(self, value: float, rank: int, unit: str = "") -> None:
        """Initializer

        Args:
            _value (float): the initial value with which it is inited
            _rank (int): the rank number
            _unit (str) : the unit of the value, eg ms or flops
        """
        self._rank = rank
        self._value = value
        self._unit = unit

    def __lt__(self, other) -> bool:
        """Check if value of self is smaller than other's value

        Args:
            other (_ValueWithRank): The other object to compare with

        Returns:
            bool: True if lhs._value of operand is less than rhs._value, else False
        """
        return self._value < other._value

    def __gt__(self, other) -> bool:
        """Check if value of self is larger than other's value

        Args:
            other (_ValueWithRank): The other object to compare with

        Returns:
            bool: True if lhs._value of operand is greater than rhs._value, else False
        """
        return self._value > other._value

    def __call__(self) -> Tuple[float, int, str]:
        """Returns the value, the rank, and unit as a Tuple

        Returns:
            Tuple[float, int, str]: value, rank, unit
        """
        return self._value, self._rank, self._unit

    def __str__(self) -> str:
        """String representation of the object

        Returns:
            str: strigified object
        """

        return f"{self._value:.2f}{self._unit}/{self._rank}"


@dataclass
class _StragglerData:
    """This is an internal dataclass, not for use outside this module

    Attributes:
        min_elapsed (_ValueWithRank) min iteration time across all ranks
        max_elapsed (_ValueWithRank) max iteration time across all ranks
        min_btime (_ValueWithRank) min cpu time across all ranks
        max_btime (_ValueWithRank) max cpu time across all ranks
        min_temp (_ValueWithRank): min gpu temp across all ranks
        max_temp (_ValueWithRank): max gpu temp across all ranks
        min_power (_ValueWithRank) min gpu power across all ranks
        max_power (_ValueWithRank) max gpu power across all ranks
        min_util (_ValueWithRank): min gpu util across all ranks
        max_util (_ValueWithRank): max gpu util across all ranks
        min_clock (_ValueWithRank): min gpu clock across all ranks
        max_clock (_ValueWithRank) max gpu clock across all ranks
        aflops (List[_ValueWithRank]): sorted array of (_ValueWithRank)
    """

    # gemm time
    min_elapsed = _ValueWithRank(sys.float_info.max, 0, "ms")
    max_elapsed = _ValueWithRank(sys.float_info.min, 0, "ms")
    # get_batch time
    min_btime = _ValueWithRank(sys.float_info.max, 0, "us")
    max_btime = _ValueWithRank(sys.float_info.min, 0, "us")
    # temp
    min_temp = _ValueWithRank(sys.float_info.max, 0, "C")
    max_temp = _ValueWithRank(sys.float_info.min, 0, "C")
    # power
    min_power = _ValueWithRank(sys.float_info.max, 0, "W")
    max_power = _ValueWithRank(sys.float_info.min, 0, "W")
    # util
    min_util = _ValueWithRank(sys.float_info.max, 0, "%")
    max_util = _ValueWithRank(sys.float_info.min, 0, "%")
    # clock
    min_clock = _ValueWithRank(sys.float_info.max, 0, "MHz")
    max_clock = _ValueWithRank(sys.float_info.min, 0, "MHz")
    aflops: Union[List[_ValueWithRank], None] = None


class StragglerDetector:
    """Singleton Class implementing per rank Straggler Detector

    It use cuda events to time operation of choice using the
    start and stop methods which can be directly invoked using
    the class instance or can be used like a python context.
    After collection, a report() method is available to display
    the collected metrics. It is only supported if CUDA is
    available. megatron/core/README_STRAGGLER.md for more info

    Note:
        The instance and class attributes mentioned below are all
        private to the class and has no use outside the class

    Attributes:
        _off (bool): current state of the toggle
        start (FunctionType): start method
        stop (FunctionType): stop method
        world (int): world size
        rank (int): rank for this instance
        mmcnt (int): number of ranks to report
        port (int): control port
        amp (float): amplification factor for TFLOPs, default 3.0
        toggle (bool): whether to start/stop detector collection
        bdata (bool): when true, just collect get_batch
        dev (int): cuda device
        evt_q (LifoQueue): cuda event queue
        start_gemm_ev (list[torch.cuda.Event]): cuda start event
        stop_gemm_ev (list[torch.cuda.Event]): cuda stop event
        start_data_ev (list[torch.cuda.Event]): cuda start event
        stop_data_ev (list[torch.cuda.Event]): cuda stop event
        start_gemm_tm (list[int]): start time (wallclock)
        stop_gemm_tm (list[int]): stop time (wallclock)
        start_data_tm (list[int]): start time for get_batch
        stop_data_tm (list[int]): stop time for get_batch
        sock (socket): the controller socket
        ctrlr (Thread): the controller thread
    """

    _configured = False
    """Indicates if the singleton instance is configured or not
    """

    def __new__(cls: Type["StragglerDetector"]) -> "StragglerDetector":
        """Constructor
        Creates an instance of the class if not created

        Args:
            cls (Type[&#39;StragglerDetector&#39;]): The class type

        Returns:
            StragglerDetector: the class instance
        """

        if not hasattr(cls, "_instance"):
            cls._instance = super(StragglerDetector, cls).__new__(cls)
        return cls._instance

    def __init__(self) -> None:
        """Initializer

        The inital state of the StragglerDetector instance is disabled.
        The enabled state is indicated using self._off member variable
        and the proerty enabled.
        """
        self._off: bool = True
        self.start = self.null_method
        self.stop = self.null_method
        self.world: int = 0
        self.rank: int = 0
        self.mmcnt: int = 1
        self.port: int = 0
        self.amp: float = 3.0
        self.toggle: bool = False
        self.bdata: bool = False
        self.dev: Union[torch.device, int, None] = None
        self.evt_q: Union[queue.LifoQueue, None] = None
        self.start_gemm_ev: List[torch.cuda.Event] = []
        self.stop_gemm_ev: List[torch.cuda.Event] = []
        self.start_data_ev: List[torch.cuda.Event] = []
        self.stop_data_ev: List[torch.cuda.Event] = []
        self.start_gemm_tm: List[int] = []
        self.stop_gemm_tm: List[int] = []
        self.start_data_tm: List[int] = []
        self.stop_data_tm: List[int] = []
        self.sock: Union[socket.socket, None] = None
        self.ctrlr: Union[threading.Thread, None] = None

    def configure(
        self,
        world: int,
        rank: int,
        mmcnt: int = 1,
        amp: float = 3.0,
        port: int = 65535,
        prefill: int = 1024,
        enabled: bool = False,
    ) -> None:
        """This method is called to configure the Singleton instance

        It should be called once per instantiation per process.

        Note:
            The constructor keeps the state of instance disabled
            i.e no collection will happen even when start/stop methods are
            called. Only when enabled is True (self._off is True), the
            start/stop method pointers get assigned the real collection
            methods, otherwise they are initialized with null_method

        Args:
            world (int): World Size
            rank (int): The rank of this trainer
            mmcnt (int, optional): Number of ranks to print for showing Min/Max Etpt.
                                   Defaults to 1.
            amp (float, optional): Set to 3.0 if we only use timers in fwd pass.
                                   Defaults to 3.0.
            port (int, optional): Control port, useful only for rank-0. Defaults to 65535.
            prefill (int, optional): How many Events to pre-populate. Defaults to 1024.
            enabled (bool, optional): Whether or not collection is enabled on startup.
                                      Defaults to False.
        """
        if StragglerDetector._configured:
            # don't throw
            return
        StragglerDetector._configured = True
        self.bdata = False
        self.start = self.null_method
        self.stop = self.null_method
        self._off = True
        # No CUDA, No Support
        if torch.cuda.is_available():
            self._off = not enabled
            self.world = world
            self.rank = rank
            self.mmcnt = mmcnt if mmcnt > 1 else 1
            self.amp = amp
            self.port = port
            self.toggle = False
            self.bdata = False
            self.evt_q = queue.LifoQueue()
            self.start_gemm_ev = []
            self.stop_gemm_ev = []
            self.start_data_ev = []
            self.stop_data_ev = []
            self.start_gemm_tm = []
            self.stop_gemm_tm = []
            self.start_data_tm = []
            self.stop_data_tm = []
            backend = torch.distributed.get_backend()
            if backend == "nccl":
                self.dev = torch.cuda.current_device()
            else:
                self.dev = torch.device("cpu")
            # cache some events
            for _ in range(prefill):
                self.evt_q.put(torch.cuda.Event(enable_timing=True))
            if self.rank == 0:
                # Start the controller
                self._controller()
            if not self._off:
                self.start = self.start_method
                self.stop = self.stop_method

    def reset(self) -> None:
        """This method is called to reset the metrics state of the instance

        It is generally called from within elapsed() after extracting per rank metrics.
        """
        if self._off:
            return
        # Pool them
        if self.evt_q is not None:
            _ = [self.evt_q.put(ev) for ev in self.start_gemm_ev]
            _ = [self.evt_q.put(ev) for ev in self.stop_gemm_ev]
            _ = [self.evt_q.put(ev) for ev in self.start_data_ev]
            _ = [self.evt_q.put(ev) for ev in self.stop_data_ev]
        self.start_gemm_ev = []
        self.stop_gemm_ev = []
        self.start_data_ev = []
        self.stop_data_ev = []
        # Use regular timers
        self.start_gemm_tm = []
        self.stop_gemm_tm = []
        self.start_data_tm = []
        self.stop_data_tm = []
        self.bdata = False

    def start_method(self) -> None:
        """This method adds the start timers.

        Both cuda event and perf_counter are added. If bdata is set to
        true from __call__, this method skips inserting cuda
        timer. This way it can be used to measure time spent on
        CPU - generally useful for timing get_batch()
        """
        # Not reentrant
        if self.evt_q is not None and self.evt_q.qsize() > 1:
            sev = self.evt_q.get()  # no try-catch
            eev = self.evt_q.get()  # no try-catch
        else:
            sev = torch.cuda.Event(enable_timing=True)
            eev = torch.cuda.Event(enable_timing=True)
        # First check if this start is for data
        if self.bdata:
            self.start_data_ev.append(sev)
            self.stop_data_ev.append(eev)
            self.start_data_tm.append(0)
            self.stop_data_tm.append(0)
            idx = len(self.stop_data_tm) - 1
            self.start_data_tm[idx] = time.perf_counter_ns()
            self.start_data_ev[idx].record()
            self.bdata = False
            return
        self.start_gemm_ev.append(sev)
        self.stop_gemm_ev.append(eev)
        self.start_gemm_tm.append(0)
        self.stop_gemm_tm.append(0)
        idx = len(self.stop_gemm_tm) - 1
        self.start_gemm_tm[idx] = time.perf_counter_ns()
        self.start_gemm_ev[idx].record()

    def stop_method(self) -> None:
        """This method adds the stop timers.

        Both cuda event and perf_counter are added. If bdata is set to
        true from __call__, this method skips inserting cuda
        timer. Also see start_method()
        """
        # Not reentrant
        # First check if this stop is for data
        idx = len(self.stop_data_tm) - 1
        if idx >= 0 and self.stop_data_tm[idx] == 0:
            self.stop_data_tm[idx] = time.perf_counter_ns()
            self.stop_data_ev[idx].record()
            return
        idx = len(self.stop_gemm_tm) - 1
        if idx >= 0 and self.stop_gemm_tm[idx] == 0:
            self.stop_gemm_tm[idx] = time.perf_counter_ns()
            self.stop_gemm_ev[idx].record()

    def elapsed(self) -> Tuple[float, float, int, int, int, int]:
        """This method is called from report(), or can be called directly

         It is called to collect all the elapsed time since last reset().
         It finally calls reset()

        Returns:
            Tuple[float, float, int, int, int, int]: see below for returns
                delta       : time spent in kernel
                batch_delta : time spent in get_batch
                temp        : observed gpu temp
                power       : observed gpu power
                util        : observed gpu utilization
                clock       : observed gpu clock
        """
        if self._off:
            # match with return below
            return 0, 0, 0, 0, 0, 0
        ls_ev = len(self.start_gemm_ev)
        le_ev = len(self.stop_gemm_ev)
        ls_bs = len(self.start_data_ev)
        ls_be = len(self.stop_data_ev)
        delta = 0.0
        batch_delta = 0.0
        temp = 0
        power = 0
        clock = 0
        if ls_ev != le_ev:
            logger.warning(f"Event Start/Stop out of sync {ls_ev}/{le_ev}")
        elif ls_bs != ls_be:
            logger.warning(f"get_batch Start/Stop out of sync {ls_bs}/{ls_be}")
        else:
            temp = torch.cuda.temperature()
            power = torch.cuda.power_draw()
            util = torch.cuda.utilization()
            clock = torch.cuda.clock_rate()
            torch.cuda.synchronize()
            # Process Events
            for i in range(ls_ev):
                e_ev = self.start_gemm_ev[i].elapsed_time(self.stop_gemm_ev[i])
                e_tm = (self.stop_gemm_tm[i] - self.start_gemm_tm[i]) / 1e6  # ns to ms
                # Pick the larger of Event and perf_counter time?
                delta += max(e_ev, e_tm)
            # Process get_batch
            for i in range(ls_bs):
                b_ev = self.start_data_ev[i].elapsed_time(self.stop_data_ev[i])
                b_tm = (self.stop_data_tm[i] - self.start_data_tm[i]) / 1e6  # ns to ms
                # data fetching has prefetch, hence take the max, instead of avg
                batch_delta = max(batch_delta, max(b_ev, b_tm))
        self.reset()  # Prepare for next round
        # time in ms, batch_delta in ms, check return above
        return delta, batch_delta, temp, power, util, clock

    def report(self, total_flops: float = 0.0, log_interval: int = 0) -> bool:
        """Function to log the min/max metircs and the associated rank over a time period

        It finds the slowest and fastest rank among all ranks. It should be
        called by all ranks, but only rank-0 prints the analysis
        At the end it checks, if the straggler detector should
        remain active or if it should be deactivated.

        Args:
            total_flops (float, optional): The theoretical flops over the period. Defaults to 0.0.
            log_interval (int, optional): The training interval over which reporting is called(ms)
                                          Defaults to 0.

        Returns:
            bool: True if reported, else False
        """
        ret = False
        if not self._off and total_flops > 0.0 and log_interval > 0:
            elapsed, btime, temp, power, util, clock = self.elapsed()  # get raw time
            # btime (get_batch time is max in the iteration)
            ptime = elapsed / (log_interval * 1.0)  # avg per iteration elapsed time, ms
            api_flops = total_flops / (log_interval * 1.0)  # avg per iteration flops, ms
            apir_flops = api_flops / (
                ptime * 10**9 * self.world
            )  # this is avg per iteration this rank's thruput, TFLOP/s (note 10**9),
            et_flops = apir_flops / self.amp  # Estimated TFLOPs, not tracing backward

            o_dt = self._min_max(
                ptime, btime, float(temp), float(power), float(util), float(clock), et_flops
            )
            if self.rank == 0 and o_dt is not None and o_dt.aflops is not None:
                now = f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}]"
                min_flops, min_frank, _ = o_dt.aflops[0]()
                max_flops, max_frank, _ = o_dt.aflops[-1]()
                logger.info(
                    f"{now} | "
                    f"MnRtt/Rnk: {o_dt.min_elapsed} | "
                    f"MxRtt/Rnk: {o_dt.max_elapsed} | "
                    f"MnPwr/Rnk: {o_dt.min_power} | "
                    f"MxPwr/Rnk: {o_dt.max_power} | "
                    f"MnTmp/Rnk: {o_dt.min_temp} | "
                    f"MxTmp/Rnk: {o_dt.max_temp} | "
                    f"MnUtl/Rnk: {o_dt.min_util} | "
                    f"MxUtl/Rnk: {o_dt.max_util} | "
                    f"MnClk/Rnk: {o_dt.min_clock} | "
                    f"MxClk/Rnk: {o_dt.max_clock} | "
                    f"MnDRtt/Rnk: {o_dt.min_btime} | "
                    f"MxDRtt/Rnk: {o_dt.max_btime} | "
                    f"MnEtpt/Rnk: {min_flops:.2f}TF/{min_frank} | "
                    f"MxEtpt/Rnk: {max_flops:.2f}TF/{max_frank}"
                )
                if self.mmcnt > 1 and self.mmcnt < self.world:
                    line = f"^^^^ Bottom {self.mmcnt} Ranks with lowest  Etpt(TF):"
                    for i in range(self.mmcnt):
                        line += f" {o_dt.aflops[i]},"
                    logger.info(line)
                    line = f"^^^^ Top    {self.mmcnt} Ranks with highest Etpt(TF):"
                    shift = self.world - self.mmcnt
                    for i in range(self.mmcnt):
                        line += f" {o_dt.aflops[i + shift]},"
                    logger.info(line)
                ret = True

        # Check/Communicate if tracking is turned off or on
        self._check_toggle()
        return ret

    def _check_toggle(self) -> None:
        """Helper method to check if a request to toggle the collection state was made

        It checks iof collection state toggle req was made via the server listening on
        rank-0 since last call to report(). Called by report(). Calling this method
        indirectly from report() is the only way to activate the change that is made
        via rank-0
        """
        # If no change just communicate the current
        off = self._off
        if self.rank == 0 and self.toggle:
            off = not self._off
            self.toggle = False
        st = torch.tensor(off, dtype=torch.bool, device=self.dev)
        torch.distributed.broadcast(st, 0)  # Blocking
        # save old switch
        off = self._off
        self._off = bool(st.item())
        if off != self._off:
            if not self._off:
                self.start = self.start_method
                self.stop = self.stop_method
                state = "ON"
            else:
                self.start = self.null_method
                self.stop = self.null_method
                state = "OFF"
            if self.rank == 0:
                logger.info(f"Toggling StragglerDetector State {state}")

    def _handler(self) -> None:
        """Thread function for the controller.

        It is a tcp-server that listens on a port. Uses HTTP protocol.
        If connected to it using curl, it indicates a toggle of the
        collection state. The actual toggling happens at the end of
        calling report() when _check_toggle() is called.
        """
        resp = r"HTTP/1.0 200 OK\r\nConnection: Close\r\nContent-length: "

        if self.rank == 0:
            state = "OFF" if self._off else "ON"
            logger.info(
                f"Controller ready to recv commands on port {self.port}. Current state {state}"
            )
            while True and self.sock is not None:
                try:
                    conn, _ = self.sock.accept()
                    _ = conn.recv(1024)
                    self.toggle = True
                    state = "ON" if self._off else "OFF"
                    msg = f"Will turn StragglerDetector {state} at next logging interval"
                    msg_len = len(msg)
                    final_resp = f"{resp}{msg_len}\r\n\r\n{msg}"
                    conn.send(final_resp.encode())
                    conn.close()
                    logger.info(msg)
                except Exception as err:
                    logger.error(f"Error in stragler handler.. {str(err)}")
                    return

    def _controller(self):
        """Installs a controller listener that is used to toggle collection state.

        Called from configure(). Ignored for all ranks other than rank-0
        """
        try:
            if self.rank == 0:
                neth = "0.0.0.0"
                netp = self.port
                self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
                self.sock.bind((neth, netp))
                self.sock.listen(128)
                self.ctrlr = threading.Thread(
                    target=self._handler, args=(), name="straggler", daemon=True
                )
                self.ctrlr.start()
        except Exception as err:
            logger.warning(f"StragglerDetector cannot be controlled.. {str(err)}")

    def _min_max(
        self,
        ptime: float,
        btime: float,
        temp: float,
        power: float,
        util: float,
        clock: float,
        flops: float,
    ) -> Union[_StragglerData, None]:
        """Helper function to find the min/max values

        Args:
            ptime (float): avg per iteration gpu time
            btime (float): avg per iteration cpu time
            temp (float): gpu temp at the time of reporting
            power (float): gpu power at the time of reporting
            util (float): gpu util at the time of reporting
            clock (float): gpu clock at the time of reporting
            flops (float): estimated flops for the rank

        Returns:
            Union[_StragglerData, None]: It contains the min/max of few metrics and the
                                         corresponding rank it also has sorted list of
                                         all (flops, rank) sorted by flops (aflops)
                                         or returns None if collecton is disabled
        """
        if self._off:
            return None
        # initialize output data object
        o_dt = _StragglerData()

        prof_data: Dict[str, Union[int, float]] = {}
        data_list: List[Dict[str, Union[int, float]]] = []
        prof_data["rank"] = self.rank
        prof_data["time"] = ptime
        prof_data["btime"] = btime
        prof_data["temp"] = temp
        prof_data["power"] = power
        prof_data["util"] = util
        prof_data["clock"] = clock
        prof_data["flops"] = flops

        if self.rank == 0:
            data_list = [prof_data] * self.world

        # this is blocking by default
        torch.distributed.gather_object(prof_data, object_gather_list=data_list, dst=0)

        if self.rank == 0:
            min_ctime = min(data_list, key=lambda k: k["time"])  # elapsed
            max_ctime = max(data_list, key=lambda k: k["time"])  # elapsed

            min_cbatch = min(data_list, key=lambda k: k["btime"])  # batch time
            max_cbatch = max(data_list, key=lambda k: k["btime"])  # batch time

            min_ctemp = min(data_list, key=lambda k: k["temp"])  # temp
            max_ctemp = max(data_list, key=lambda k: k["temp"])  # temp

            min_cpower = min(data_list, key=lambda k: k["power"])  # power
            max_cpower = max(data_list, key=lambda k: k["power"])  # power

            min_cutil = min(data_list, key=lambda k: k["util"])  # gpu util
            max_cutil = max(data_list, key=lambda k: k["util"])  # gpu util

            min_cclock = min(data_list, key=lambda k: k["clock"])  # gpu clock
            max_cclock = max(data_list, key=lambda k: k["clock"])  # gpu clock

            min_val = min_ctime["time"]
            min_rank = min_ctime["rank"]
            max_val = max_ctime["time"]
            max_rank = max_ctime["rank"]
            o_dt.min_elapsed = _ValueWithRank(min_val, int(min_rank), "ms")
            o_dt.max_elapsed = _ValueWithRank(max_val, int(max_rank), "ms")

            min_val = min_cbatch["btime"]
            min_rank = min_cbatch["rank"]
            max_val = max_cbatch["btime"]
            max_rank = max_cbatch["rank"]
            o_dt.min_btime = _ValueWithRank(min_val, int(min_rank), "ms")
            o_dt.max_btime = _ValueWithRank(max_val, int(max_rank), "ms")

            min_val = min_ctemp["temp"]
            min_rank = min_ctemp["rank"]
            max_val = max_ctemp["temp"]
            max_rank = max_ctemp["rank"]
            o_dt.min_temp = _ValueWithRank(min_val, int(min_rank), "C")
            o_dt.max_temp = _ValueWithRank(max_val, int(max_rank), "C")

            min_val = min_cpower["power"]
            min_rank = min_cpower["rank"]
            max_val = max_cpower["power"]
            max_rank = max_cpower["rank"]
            o_dt.min_power = _ValueWithRank(min_val, int(min_rank), "W")
            o_dt.max_power = _ValueWithRank(max_val, int(max_rank), "W")

            min_val = min_cutil["util"]
            min_rank = min_cutil["rank"]
            max_val = max_cutil["util"]
            max_rank = max_cutil["rank"]
            o_dt.min_util = _ValueWithRank(min_val, int(min_rank), "%")
            o_dt.max_util = _ValueWithRank(max_val, int(max_rank), "%")

            min_val = min_cclock["clock"]
            min_rank = min_cclock["rank"]
            max_val = max_cclock["clock"]
            max_rank = max_cclock["rank"]
            o_dt.min_clock = _ValueWithRank(min_val, int(min_rank), "MHz")
            o_dt.max_clock = _ValueWithRank(max_val, int(max_rank), "MHz")

            o_dt.aflops = [
                _ValueWithRank(d.get("flops", 0.0), int(d.get("rank", -1)))
                for _, d in enumerate(data_list)
            ]
            o_dt.aflops.sort(key=lambda val_with_rank: val_with_rank()[0])
        # wait for everyone here
        torch.distributed.barrier()

        return o_dt

    @property
    def enabled(self) -> bool:
        """Can be called to check the enabled state of the instance

        Note:
            After the request to toggle the state, the
            actual state change happens at end of call
            to report()
        """
        return not self._off

    @property
    def configured(self) -> bool:
        """Can be called to check if the instance is already configured

        Returns:
            bool: returns True if configure was called and was a success, else False
        """
        return StragglerDetector._configured

    @property
    def my_rank(self):
        """Can be called to get configured rank of this instance

        Returns:
            int: Configured rank for this instance
        """
        return self.rank

    @property
    def world_size(self) -> int:
        """Can be called to get configured world of this instance

        Returns:
            int: World size configured for this instance
        """
        return self.world

    def null_method(self) -> None:
        """Default method to initialize start/stop method ptrs"""
        pass

    def __enter__(self) -> "StragglerDetector":
        """Define context/instance entry

        Returns:
            StragglerDetector: the instance
        """
        self.start()
        return self

    def __call__(self, bdata: bool = False) -> "StragglerDetector":
        """Callable for the instance. Set context state,

        Useful when the context is used for cpu timers only when bdata=True

        Args:
            bdata (bool, optional): when true, only enables cpu timers. Defaults to False.

        Returns:
            StragglerDetector: the instance
        """
        self.bdata = bdata
        return self

    def __exit__(
        self,
        ex_type: Optional[Type[BaseException]],
        ex_val: Optional[BaseException],
        ex_tb: Optional[TracebackType],
    ) -> bool:
        """Define context/instance exit, calls the stop method

        Args:
            ex_type (Optional[Type[BaseException]]): Exception type
            ex_val (Optional[BaseException]): _description_
            ex_tb (Optional[TracebackType]): _description_

        Returns:
            bool: True if the exception was handled
        """
        # Should not suppress errors even if turned off
        if ex_type is not None:
            err = traceback.format_exception(ex_type, ex_val, ex_tb)
            logger.warning(f"{str(ex_val)}\n{err}")
        self.stop()
        return False


# Singleton, global visibility
__straggler__ = StragglerDetector()
"""StragglerDetector: private module variable, not be directly accessed
"""


def is_submodule(module, parent_module, strict=True):
    """
    Check if a module is a submodule of another module.
    """
    if strict:
        if module is parent_module:
            return False
    for m in parent_module.modules():
        if m is module:
            return True
    return False


########################
### context parallel ###
########################


def get_batch_on_this_cp_rank(
    batch: Dict[str, Any], cp_group: Optional[torch.distributed.ProcessGroup] = None
):
    """Slice batch input along sequence dimension into multiple chunks,
    which are parallelized across GPUs in a context parallel group.

    Args:
        batch (Dict[str, Any]): Input batch tensors.
        cp_group (Optional[torch.distributed.ProcessGroup]): Context-parallel process group.
            If provided, uses this group's size and rank. Otherwise, falls back to
            the current context-parallel settings from parallel_state.
    """

    # With causal masking, each token only attends to its prior tokens. Simply split
    # sequence into CP chunks can result in severe load imbalance. That's to say, chunks
    # at the end of sequence have bigger workload than others. To address this issue,
    # we split sequence into 2*CP ranks. Assuming CP=2, we then get 4 chunks, chunk_0
    # and chunk_3 are assigned to GPU0, chunk_1 and chunk_2 are assigned to GPU1, so
    # that we can get balanced workload among GPUs in a context parallel group.
    # Determine CP topology either from provided group or from current context parallel state
    if cp_group is not None:
        cp_size = get_pg_size(cp_group)
        cp_rank = get_pg_rank(cp_group)
    else:
        cp_size = parallel_state.get_context_parallel_world_size()
        cp_rank = parallel_state.get_context_parallel_rank()

    if cp_size > 1:
        for key, val in batch.items():
            if val is not None:
                seq_dim = 1 if key != 'attention_mask' else 2
                val = val.view(
                    *val.shape[0:seq_dim],
                    2 * cp_size,
                    val.shape[seq_dim] // (2 * cp_size),
                    *val.shape[(seq_dim + 1) :],
                )
                index = torch.zeros(2, dtype=torch.int64, device=val.device)
                index[0].fill_(cp_rank)
                index[1].fill_(2 * cp_size - cp_rank - 1)
                val = val.index_select(seq_dim, index)
                val = val.view(*val.shape[0:seq_dim], -1, *val.shape[(seq_dim + 2) :])
                batch[key] = val

    return batch


def get_thd_batch_on_this_cp_rank(
    batch: Dict[str, Any],
    cu_seqlens: torch.Tensor,
    cu_seqlens_padded: torch.Tensor,
    max_seqlen: torch.Tensor,
    cp_size: Optional[int] = None,
    cp_rank: Optional[int] = None,
):
    """Slice each sub-sample in a packed sample batch input along
    sequence dimension into multiple chunks, which are parallelized
    across GPUs in a context parallel group.
    """
    packed_seq_params = PackedSeqParams(
        qkv_format="thd",
        cu_seqlens_q=cu_seqlens,
        cu_seqlens_kv=cu_seqlens,
        cu_seqlens_q_padded=cu_seqlens_padded,
        cu_seqlens_kv_padded=cu_seqlens_padded,
        max_seqlen_q=int(max_seqlen[0].item()),
        max_seqlen_kv=int(max_seqlen[0].item()),
    )

    cp_size = parallel_state.get_context_parallel_world_size() if cp_size is None else cp_size
    cp_rank = parallel_state.get_context_parallel_rank() if cp_rank is None else cp_rank
    if cp_size > 1:  # slice batch along sequence dimension for context parallelism
        assert tex is not None and is_te_min_version("1.10.0"), (
            "Please update Transformer Engine to >= 1.10 to use "
            "Context Parallel with THD format data"
        )
        index = tex.thd_get_partitioned_indices(
            cu_seqlens_padded, batch['tokens'].size(1), cp_size, cp_rank
        )
        for key, data in batch.items():
            if key in {'attention_mask', 'cu_seqlens', 'cu_seqlens_padded', 'max_seqlen'}:
                continue
            batch[key] = data.index_select(1, index)

    return batch, packed_seq_params


################################
### hybrid context parallel ###
################################


def get_batch_on_this_hybrid_cp_rank(
    batch: Dict[str, Any],
    local_cp_size: int,
    cp_group: Optional[torch.distributed.ProcessGroup] = None,
):
    """Slice batch input along sequence dimension into multiple chunks,
    which are parallelized across GPUs in a context parallel group.
    """
    assert local_cp_size is not None
    if cp_group is None:
        # Get the local cp group required for as defined by the HybridCPDataLoaderWrapper
        if local_cp_size > 1:
            cp_group = parallel_state.get_hybrid_data_context_parallel_groups(
                group_size=local_cp_size
            )
    else:
        # If cp group is provided, it must match the local cp size
        # as defined by the HybridCPDataLoaderWrapper
        assert cp_group.size() == local_cp_size

    # Convert [seqlen] to [1, seqlen] similar to default collate_fn
    # as hybrid_context_parallel dataloader wrapper does not go through default collate_fn
    for key, data in batch.items():
        if key in ['attention_mask']:
            continue
        batch[key] = torch.stack([data], 0)
    sample_length = batch['tokens'].shape[1]
    # TODO(pmannan): Take care of padding tokens here if not divisible by cp_size*2
    # Create packed_seq_params for SBHD format with cp group information.
    packed_seq_params = PackedSeqParams(
        qkv_format="sbhd",
        cu_seqlens_q=torch.tensor([0, sample_length], device="cuda", pin_memory=True),
        cu_seqlens_kv=torch.tensor([0, sample_length], device="cuda", pin_memory=True),
        cu_seqlens_q_padded=torch.tensor([0, sample_length], device="cuda", pin_memory=True),
        cu_seqlens_kv_padded=torch.tensor([0, sample_length], device="cuda", pin_memory=True),
        max_seqlen_q=sample_length,
        max_seqlen_kv=sample_length,
        local_cp_size=local_cp_size,
        cp_group=cp_group,
    )

    if cp_group is not None and cp_group.size() > 1:
        # When using hybrid_context_parallel, each sub-sample of a packed sample is
        # required to be divisible by CP*DP*2 or CP*DP*TP*2 (if using sequence parallel)
        batch = get_batch_on_this_cp_rank(batch, cp_group=cp_group)

    return batch, packed_seq_params


######################
### NVTX profiling ###
######################

_nvtx_enabled: bool = False  # Whether NVTX range profiling is enabled
_nvtx_range_messages: list[str] = []  # Messages associated with active NVTX ranges


def configure_nvtx_profiling(enabled: bool) -> None:
    """Configure NVTX range profiling to be enabled or disabled.

    Args:
        enabled (bool): Whether to enable NVTX range profiling
    """
    global _nvtx_enabled
    _nvtx_enabled = enabled


def _nvtx_range_get_func_path():
    """Get the path of a function. Assumes being called from nvtx_range_push/pop.

    Returns:
        str: Module path and function name joined by a dot
    """
    # Get the caller's caller frame (go back 2 frames)
    frame = inspect.currentframe().f_back.f_back
    caller_func = inspect.getframeinfo(frame).function
    module = inspect.getmodule(frame)

    return f"{module.__name__}.{caller_func}"


def nvtx_range_push(msg=None, suffix=None) -> None:
    """Push NVTX range onto stack. If msg is not provided, use the calling function's path.

    Args:
        msg (str, optional): Message to associate with range
        suffix (str, optional): Suffix to append to the message
    """
    if not _nvtx_enabled:
        return

    if msg is None:
        msg = _nvtx_range_get_func_path()
    if suffix is not None:
        msg = f"{msg}.{suffix}"

    # Track messages to ensure consistency when popping
    _nvtx_range_messages.append(msg)

    # Push NVTX range
    torch.cuda.nvtx.range_push(msg)


def nvtx_range_pop(msg=None, suffix=None) -> None:
    """Pop NVTX range from stack. If msg is not provided, use the calling function's path.

    Args:
        msg (str, optional): Message to associate with range
        suffix (str, optional): Suffix to append to the message
    """
    if not _nvtx_enabled:
        return

    if msg is None:
        msg = _nvtx_range_get_func_path()
    if suffix is not None:
        msg = f"{msg}.{suffix}"

    # Update list of NVTX range messages and check for consistency
    if not _nvtx_range_messages:
        raise RuntimeError("Attempted to pop NVTX range from empty stack")
    last_msg = _nvtx_range_messages.pop()
    if msg is not None and msg != last_msg:
        raise ValueError(
            f"Attempted to pop NVTX range from stack with msg={msg}, "
            f"but last range has msg={last_msg}"
        )

    # Pop NVTX range
    torch.cuda.nvtx.range_pop()


@lru_cache(maxsize=None)
def _nvtx_decorator_get_func_path(func):
    """Get the path of a function.

    Args:
        func (Callable): Function to get path for.

    Returns:
        str: Module path and function name joined by a dot
    """
    caller_func = func.__name__
    module = inspect.getmodule(func)

    return f"{module.__name__}.{caller_func}"


def nvtx_decorator(message: Optional[str] = None, color: Optional[str] = None):
    """Decorator to add NVTX range to a function.

    Args:
        message (str, optional): Custom message for the NVTX range. If None, uses function path
        color (str, optional): Color for the NVTX range. Defaults to None

    Returns:
        Callable: Decorated function with NVTX profiling if enabled

    Example:
        @nvtx_decorator()
        def my_function():
            pass

        @nvtx_decorator(message="Custom Range", color="blue")
        def another_function():
            pass
    """

    def decorator(func: Callable) -> Callable:
        if _nvtx_enabled:
            return nvtx.annotate(
                message=message or _nvtx_decorator_get_func_path(func), color=color
            )(func)
        return func

    return decorator


def unwrap_model(model, module_instances=None):
    """Unwrap_model to return the final model instance"""
    if module_instances is None:
        from megatron.core.distributed import DistributedDataParallel as DDP
        from megatron.core.distributed import TorchFullyShardedDataParallel as torch_FSDP
        from megatron.core.distributed.fsdp.mcore_fsdp_adapter import (
            FullyShardedDataParallel as megatron_FSDP,
        )
        from megatron.core.transformer.module import Float16Module

        module_instances = (DDP, torch_FSDP, megatron_FSDP, Float16Module)

    return_list = True
    if not isinstance(model, list):
        model = [model]
        return_list = False
    unwrapped_model = []
    for model_module in model:
        while isinstance(model_module, module_instances):
            model_module = model_module.module
        unwrapped_model.append(model_module)
    if not return_list:
        return unwrapped_model[0]
    return unwrapped_model


_ASYNC_IO_LOOP: asyncio.AbstractEventLoop | None = None


def get_asyncio_loop(loop: asyncio.AbstractEventLoop | None = None) -> asyncio.AbstractEventLoop:
    """Creates an asyncio loop if necessary and then returns the current asyncio loop."""
    global _ASYNC_IO_LOOP
    if loop is None:
        try:
            loop = asyncio.get_running_loop()
        except RuntimeError as e:
            if _ASYNC_IO_LOOP is not None:
                return _ASYNC_IO_LOOP
            else:
                _ASYNC_IO_LOOP = loop = asyncio.new_event_loop()
                asyncio.set_event_loop(loop)
    return loop


def is_using_quantization_scales(config):
    """Returns whether the model is using quantization scales based on the config."""
    return getattr(config, "fp8", False) or getattr(config, "fp4", False)


_ASYNC_TASK_STATS = defaultdict(lambda: [0, 0.0])  # cnt, total_time


def trace_async_exceptions(func: Optional[Callable] = None, *, verbose: bool = False):
    """Decorator to be applied to every coroutine that runs in a separate task.

    This is needed because asyncio tasks do not propagate exceptions.
    Coroutines running inside separate tasks will fail silently if not decorated.

    Passing in `verbose=True` will print additional lifetime logging information about the task.
    Such functionality is relied on by some users, and can be enabled as shown below:
    ```
        @trace_async_exceptions(verbose=True)
        async def my_coroutine(...):
            ...
    ```
    """

    def _log_verbose(name: str, start: float) -> None:
        elapsed = (time.perf_counter() - start) * 1000.0
        cnt, tot = _ASYNC_TASK_STATS[name]
        _ASYNC_TASK_STATS[name] = [cnt + 1, tot + elapsed]
        avg = _ASYNC_TASK_STATS[name][1] / _ASYNC_TASK_STATS[name][0]

        log10 = numpy.log10(max(cnt, 1))
        if numpy.isclose(log10, round(log10)):
            logger.info(
                f"{name} completed in {elapsed:.3f} ms, "
                f"lifetime avg: {avg:.3f} ms, "
                f"lifetime cnt: {cnt + 1}"
            )

    def _decorate(fn: Callable):
        if asyncio.iscoroutinefunction(fn):

            @functools.wraps(fn)
            async def wrapper(*args, **kwargs):
                if verbose:
                    start = time.perf_counter()
                try:
                    return await fn(*args, **kwargs)
                except Exception as e:
                    logger.error(f"Exception in async function {fn.__name__}: {e}")
                    traceback.print_exc()
                    sys.exit(1)
                finally:
                    if verbose:
                        _log_verbose(fn.__qualname__, start)

        elif inspect.isasyncgenfunction(fn):

            @functools.wraps(fn)
            async def wrapper(*args, **kwargs):
                if verbose:
                    start = time.perf_counter()
                agen = fn(*args, **kwargs)
                try:
                    async for item in agen:
                        yield item
                except Exception as e:
                    logger.error(f"Exception in async generator {fn.__name__}: {e}")
                    traceback.print_exc()
                    sys.exit(1)
                finally:
                    if verbose:
                        _log_verbose(fn.__qualname__, start)

        else:
            raise TypeError("trace_async_exceptions must be used on async functions or generators")
        return wrapper

    return _decorate if func is None else _decorate(func)


# ============================================================================
# Backward Compatibility Decorators
# ============================================================================


def deprecated(
    version: str,
    removal_version: Optional[str] = None,
    alternative: Optional[str] = None,
    reason: Optional[str] = None,
) -> Callable:
    """
    Mark a function as deprecated.

    This decorator:
    1. Adds deprecation metadata to the function
    2. Issues a DeprecationWarning when the function is called
    3. Allows the compatibility checker to track deprecation lifecycle

    Args:
        version: Version where deprecation starts (e.g., "1.0.0")
        removal_version: Version where function will be removed (e.g., "2.0.0")
        alternative: Name of the recommended replacement function
        reason: Optional explanation for the deprecation

    Returns:
        Decorator function

    Example:
        @deprecated(
            version="1.0.0",
            removal_version="2.0.0",
            alternative="new_train_model",
            reason="Improved performance and cleaner API"
        )
        def old_train_model(config):
            pass
    """

    def decorator(func: Callable) -> Callable:
        # Add metadata
        func._deprecated = True
        func._deprecated_version = version
        func._removal_version = removal_version
        func._alternative = alternative
        func._deprecation_reason = reason

        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            # Build warning message
            msg_parts = [f"{func.__name__} is deprecated since version {version}."]

            if alternative:
                msg_parts.append(f"Use {alternative} instead.")

            if removal_version:
                msg_parts.append(f"Will be removed in version {removal_version}.")

            if reason:
                msg_parts.append(f"Reason: {reason}")

            warnings.warn(" ".join(msg_parts), DeprecationWarning, stacklevel=2)

            return func(*args, **kwargs)

        return wrapper

    return decorator


def internal_api(func: Callable) -> Callable:
    """
    Mark a function or class as internal API (not for external use).

    Use this decorator for:
    - Internal APIs not intended for public consumption
    - Experimental features that may change without notice
    - Implementation details that are not part of the stable API

    Objects marked with this decorator will be exempt from backward
    compatibility checks.

    Args:
        func: The function or class to mark as internal

    Returns:
        The original function/class with an internal API marker

    Example:
        @internal_api
        def _internal_helper():
            '''For internal use only'''
            pass

        @internal_api
        class ExperimentalFeature:
            '''This API may change without notice'''
            pass
    """
    func._internal_api = True
    return func


def experimental_api(func: Callable) -> Callable:
    """
    Mark a function or class as experimental API.

    Use this decorator for:
    - Experimental features that may change without notice
    - New APIs under active development
    - Features that are not yet stable

    Objects marked with this decorator will be exempt from backward
    compatibility checks, allowing rapid iteration during development.

    Args:
        func: The function or class to mark as experimental

    Returns:
        The original function/class with an experimental API marker

    Example:
        @experimental_api
        def new_experimental_feature():
            '''This API is experimental and may change'''
            pass

        @experimental_api
        class ExperimentalModel:
            '''This model is under active development'''
            pass
    """
    func._experimental_api = True
    return func


def deprecate_args(
    *deprecated_keys, message="Argument '{name}' has been deprecated and should not be used."
):
    """
    Intercepts specific keyword arguments to raise a custom TypeError.

    Args:
        *deprecated_keys: Strings representing the argument names to block.
        message: Custom error message string. Use {name} as a placeholder.
    """

    def decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            # Check if any deprecated key is present in kwargs
            found_deprecated = set(deprecated_keys) & set(kwargs.keys())

            if found_deprecated:
                bad_key = list(found_deprecated)[0]
                raise TypeError(message.format(name=bad_key))

            # Send args to the real function
            return func(*args, **kwargs)

        return wrapper

    return decorator


def deprecate_inference_params(inference_context, inference_params):
    """Print warning for deprecated `inference_params`."""
    if inference_context is None and inference_params is not None:
        warnings.warn(
            "`inference_params` renamed to `inference_context`, and will be "
            "removed in `megatron-core` 0.13."
        )
        return inference_params
    return inference_context


================================================
FILE: megatron/inference/__init__.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/inference/utils.py
================================================
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import logging
from argparse import ArgumentParser
from functools import partial
from typing import Optional
import torch

from gpt_builders import gpt_builder
from mamba_builders import mamba_builder
from megatron.core.inference.config import (
    InferenceConfig,
    KVCacheManagementMode,
    MambaInferenceStateConfig,
    PrefixCachingCoordinatorPolicy,
    PrefixCachingEvictionPolicy,
)
from megatron.core.inference.contexts import DynamicInferenceContext
from megatron.core.inference.engines import DynamicInferenceEngine
from megatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapper import (
    GPTInferenceWrapper,
)
from megatron.core.inference.quantization.utils import quantize_model_to_mxfp8
from megatron.core.inference.text_generation_controllers.text_generation_controller import (
    TextGenerationController,
)
from megatron.core.tokenizers.utils.build_tokenizer import build_tokenizer
from megatron.core.transformer.module import MegatronModule
from megatron.core.utils import get_attr_wrapped_model, log_single_rank, unwrap_model
from megatron.training import get_args
from megatron.training import get_model as _get_model
from megatron.training import get_tokenizer, get_wandb_writer
from megatron.training.checkpointing import load_checkpoint
from model_provider import model_provider

logger = logging.getLogger(__name__)


def get_model_for_inference() -> MegatronModule:
    """Initialize model and load checkpoint for inference."""

    args = get_args()

    if args.model_provider == "gpt":
        model_builder = gpt_builder
    elif args.model_provider == "mamba":
        model_builder = mamba_builder
    else:
        raise ValueError(f"Invalid model provider {args.model_provider}")

    # Build model.
    model = _get_model(partial(model_provider, model_builder), wrap_with_ddp=False)

    # Load checkpoint.
    assert args.load is not None
    args.exit_on_missing_checkpoint = True
    load_checkpoint(
        ddp_model=model,
        optimizer=None,
        opt_param_scheduler=None,
        strict=not args.inference_ckpt_non_strict,
    )

    # No virtual PP.
    assert len(model) == 1, "Above condition should have caught this"
    model = model[0]

    # Eval mode.
    model.eval()

    if args.transformer_impl == "inference_optimized" and args.fp8_recipe == "mxfp8":
        backend = args.inference_grouped_gemm_backend
        if backend == "auto":
            quant_backend = "flashinfer"
        elif backend == "torch":
            quant_backend = "triton"
        elif backend == "te":
            raise ValueError(
                "MXFP8 quantization is not supported with "
                "inference_grouped_gemm_backend='te'."
            )
        quantize_model_to_mxfp8(unwrap_model(model), backend=quant_backend)
    return model


def add_inference_args(parser: ArgumentParser) -> ArgumentParser:
    """Add inference command line arguments to the parser."""

    group = parser.add_argument_group(title='Inference')

    group.add_argument("--temperature", type=float, default=1.0, help='Sampling temperature.')
    group.add_argument("--top_k", type=int, default=1, help='Top k sampling.')
    group.add_argument("--top_p", type=float, default=0.0, help='Top p sampling.')
    group.add_argument(
        "--return-log-probs",
        action='store_true',
        default=False,
        help='Return the log probabilities of the final output tokens',
    )
    group.add_argument(
        "--prompts",
        metavar='N',
        type=str,
        nargs='+',
        help='Input prompts with each prompt within quotes and separated by space',
    )
    group.add_argument(
        "--num-tokens-to-prompt",
        type=int,
        nargs="+",
        default=[64, 1024],
        help='Number of tokens to use for simulated prompts. This should be a '
        'space-separated pair of integers, and the generated prompt lengths will '
        'be uniformly sampled within this range.',
    )
    group.add_argument(
        "--num-tokens-to-generate",
        type=int,
        default=30,
        help='Number of tokens to generate for each prompt',
    )
    group.add_argument(
        "--num-tokens-from-file",
        action='store_true',
        default=False,
        help='Use per-prompt num_tokens_to_generate from prompt file',
    )
    group.add_argument(
        "--top-n-logprobs",
        type=int,
        default=0,
        help=(
            "Return the top n logprobs for the generated tokens and their "
            "corresponding token as a dictionary"
        ),
    )
    group.add_argument(
        "--incoming-requests-per-step",
        type=int,
        default=None,
        help="Add a deterministic number of requests per step. This arg is "
        "prioritized over `--incoming-requests-per-sec` below (which is non-"
        "deterministic). Note that the number of requests added per step is "
        "additionally limited by the inference context's `max_requests`, "
        "`max_tokens`, and KV buffer size.",
    )
    group.add_argument(
        "--incoming-requests-per-sec",
        type=float,
        default=100.0,
        help="Simulated number of requests per second. Set to -1 to add all requests together.",
    )
    group.add_argument(
        "--incoming-requests-duration",
        type=float,
        default=10.0,
        help="Total amount of time to simulate that requests are "
        "arriving. Multiply this value with "
        "`--incoming-requests-per-sec` to get the approximate "
        "total number of requests. Set to -1 to add all requests together.",
    )
    group.add_argument(
        "--model-provider", choices=["mamba", "gpt"], default="gpt", help="Model provider"
    )
    group.add_argument(
        "--skip-prompt-log-probs", action='store_true', default=False, help='Skip prompt log probs.'
    )
    group.add_argument(
        "--stop-words",
        metavar='WORD',
        type=str,
        nargs='+',
        default=None,
        help='Stop words to terminate generation. Each word should be quoted and '
        'separated by space. Example: --stop-words "\\n\\n" "END" "###"',
    )
    group.add_argument(
        "--output-path", type=str, default=None, help="Path to save generations as JSON"
    )
    group.add_argument(
        "--output-every-n-results",
        type=int,
        default=1,
        help="To minimize the output file size of larger runs, only write the "
        "results of every `n` requests.",
    )
    group.add_argument(
        "--output-request-events",
        action='store_true',
        default=False,
        help="Include request events (lifecycle + per-token block allocator metrics) "
        "in the JSON output.",
    )
    group.add_argument(
        "--prompt-file",
        help='Jsonl file containing input prompts, where each item (i.e., line) '
        'contains the field \'text\' where the value is the prompt. All other '
        'fields within each item are ignored, and may be customized for each '
        'application.',
    )
    group.add_argument(
        "--prompt-file-num-truncate",
        type=int,
        help='Number of samples to use from the loaded prompt file (see '
        '`--prompt-file` above). The first `--prompt-file-num-truncate` samples '
        'will be used, in order.',
    )
    group.add_argument(
        "--use-flashinfer-fused-rope",
        action='store_true',
        default=False,
        help='Use flashinfer fused rope implementation.',
    )
    group.add_argument(
        "--no-record-throughput",
        action='store_false',
        dest="record_throughput",
        help="Disable throughput recording in --output-file",
    )
    group.add_argument(
        "--inference-ckpt-non-strict",
        action="store_true",
        help="Load checkpoint with `strict=False`.",
    )
    group.add_argument(
        "--termination-id",
        type=int,
        default=None,
        help="Termination ID that overrides `tokenizer.eod`.",
    )
    group.add_argument(
        "--suspend-resume-interval",
        type=int,
        default=None,
        help="Suspend and resume the dynamic engine every "
        "`suspend_resume_interval` requests. This is used to test the suspend/resume "
        "system.",
    )
    group.add_argument(
        "--suspend-timeout",
        type=float,
        default=0.0,
        help="Seconds to sleep while the engine is suspended (simulates a training step).",
    )
    group.add_argument(
        "--inference-repeat-n",
        type=int,
        default=1,
        help="Repeat inference iterations N times for benchmarking.",
    )
    group.add_argument(
        "--throughput-check-only",
        action='store_true',
        default=False,
        help="If true, only run throughput check without verifying outputs.",
    )
    group.add_argument(
        "--drain-between-batches",
        action='store_true',
        default=False,
        help="Process requests in batches, draining all active requests between batches.",
    )
    group.add_argument(
        "--batch-boundaries",
        type=str,
        default=None,
        help="Comma-separated list of request indices where each batch starts. "
        "Used with --drain-between-batches.",
    )
    group.add_argument(
        "--coordinator-schedule-output-path",
        type=str,
        default=None,
        help="Path to write coordinator request scheduling decisions as JSON",
    )
    return parser


def get_inference_config_from_model_and_args(model: MegatronModule, args):
    """Returns a `InferenceConfig` constructed from the model and command line arguments."""

    # Max sequence length.
    position_embedding_type = get_attr_wrapped_model(model, "position_embedding_type")
    model_max_seq_len = get_attr_wrapped_model(model, "max_sequence_length")
    inf_max_seq_len = args.inference_max_seq_length
    max_batch_size = args.inference_dynamic_batching_max_requests

    if position_embedding_type == "learned_absolute":
        # When using absolute position embeddings, it is critical that the
        # context's `max_sequence_length` is less than or equal to the model's
        # `max_sequence_length`. Otherwise, the context's `position_ids` will
        # contain ids greater than the dimension of the position embedding
        # tensor, which will result in an index error.
        if inf_max_seq_len:
            max_sequence_length = min(model_max_seq_len, inf_max_seq_len)
        else:
            max_sequence_length = model_max_seq_len
        assert max_batch_size is None or max_batch_size <= model_max_seq_len
    else:
        max_sequence_length = inf_max_seq_len
    if args.inference_dynamic_batching_max_requests is not None:
        max_sequence_length = max(max_sequence_length, max_batch_size)

    mamba_inference_state_config = MambaInferenceStateConfig.from_model(
        model,
        conv_states_dtype=args.mamba_inference_conv_states_dtype,
        ssm_states_dtype=args.mamba_inference_ssm_states_dtype,
    )
    pg_collection = get_attr_wrapped_model(model, "pg_collection")

    # Get inference logging configuration from args
    log_inference_wandb = args.inference_wandb_logging
    inference_logging_step_interval = args.inference_logging_step_interval

    # Get metrics writer if logging is enabled and on the logging rank
    # Use the same rank convention as training (last rank logs)
    metrics_writer = None
    if (
        inference_logging_step_interval > 0
        and log_inference_wandb
        and args.rank == (args.world_size - 1)
    ):
        metrics_writer = get_wandb_writer()
        if metrics_writer is None:
            log_single_rank(
                logger,
                logging.WARNING,
                "WARNING: --rl-inference-logging-step-interval is set but no metrics writer "
                "wandb module is available. Inference logging will be disabled.",
            )

    return InferenceConfig(
        block_size_tokens=args.inference_dynamic_batching_block_size,
        buffer_size_gb=args.inference_dynamic_batching_buffer_size_gb,
        paused_buffer_size_gb=args.inference_dynamic_batching_paused_buffer_size_gb,
        mamba_memory_ratio=args.inference_dynamic_batching_mamba_memory_ratio,
        num_cuda_graphs=(
            args.inference_dynamic_batching_num_cuda_graphs
            if args.cuda_graph_impl == "local"
            else None
        ),
        max_requests=args.inference_dynamic_batching_max_requests,
        max_tokens=args.inference_dynamic_batching_max_tokens,
        unified_memory_level=args.inference_dynamic_batching_unified_memory_level,
        kv_cache_management_mode=KVCacheManagementMode(args.rl_kv_cache_management_mode),
        cuda_graph_mixed_prefill_count=args.inference_dynamic_batching_cuda_graph_mixed_prefill_count,  # pylint: disable=line-too-long
        use_cuda_graphs_for_non_decode_steps=not args.decode_only_cuda_graphs,
        static_kv_memory_pointers=args.rl_persist_cuda_graphs,
        max_sequence_length=max_sequence_length,
        mamba_inference_state_config=mamba_inference_state_config,
        pg_collection=pg_collection,
        use_flashinfer_fused_rope=args.use_flashinfer_fused_rope,
        materialize_only_last_token_logits=(not args.return_log_probs and args.num_speculative_tokens == 0),
        track_generated_token_events=args.inference_dynamic_batching_track_generated_token_events,
        track_paused_request_events=args.inference_dynamic_batching_track_paused_request_events,
        enable_chunked_prefill=args.enable_chunked_prefill,
        enable_prefix_caching=args.inference_dynamic_batching_enable_prefix_caching,
        prefix_caching_eviction_policy=PrefixCachingEvictionPolicy(args.inference_dynamic_batching_prefix_caching_eviction_policy),
        prefix_caching_coordinator_policy=PrefixCachingCoordinatorPolicy(args.inference_dynamic_batching_prefix_caching_coordinator_policy),
        prefix_caching_mamba_gb=getattr(args, 'inference_dynamic_batching_prefix_caching_mamba_gb', None),
        use_triton_conv1d=getattr(args, 'inference_dynamic_batching_mamba_triton_conv1d', False),
        metrics_writer=metrics_writer,
        logging_step_interval=args.inference_logging_step_interval,
        num_speculative_tokens=args.num_speculative_tokens,
        use_synchronous_zmq_collectives=args.inference_use_synchronous_zmq_collectives,
    )


def get_dynamic_inference_engine(model: Optional[MegatronModule] = None) -> DynamicInferenceEngine:
    """Builds a `DynamicInferenceEngine`."""
    args = get_args()
    if model is None:
        model = get_model_for_inference()
    tokenizer = build_tokenizer(args)

    inference_config = get_inference_config_from_model_and_args(model, args)
    context = DynamicInferenceContext(model.config, inference_config)
    inference_wrapped_model = GPTInferenceWrapper(model, context)
    controller = TextGenerationController(inference_wrapped_model, tokenizer)
    engine = DynamicInferenceEngine(controller, context)
    return engine


================================================
FILE: megatron/legacy/fp16_deprecated/loss_scaler.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""For backward compatibility, we need the class definitions to deserialize."""

class LossScaler:
    def __init__(self, scale=1):
        self.cur_scale = scale

class DynamicLossScaler:
    def __init__(self,
                 init_scale=2**32,
                 scale_factor=2.,
                 scale_window=1000,
                 min_scale=1,
                 delayed_shift=1,
                 consecutive_hysteresis=False):
        self.cur_scale = init_scale
        self.cur_iter = 0
        self.last_overflow_iter = -1
        self.scale_factor = scale_factor
        self.scale_window = scale_window
        self.min_scale = min_scale
        self.delayed_shift = delayed_shift
        self.cur_hysteresis = delayed_shift
        self.consecutive_hysteresis = consecutive_hysteresis


================================================
FILE: megatron/legacy/fused_kernels/__init__.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

import os
import pathlib
import subprocess

from torch.utils import cpp_extension

# Setting this param to a list has a problem of generating different
# compilation commands (with diferent order of architectures) and
# leading to recompilation of fused kernels. Set it to empty string
# to avoid recompilation and assign arch flags explicity in
# extra_cuda_cflags below
os.environ["TORCH_CUDA_ARCH_LIST"] = ""


def load(args):

    # Check if cuda 11 is installed for compute capability 8.0
    cc_flag = []
    _, bare_metal_major, bare_metal_minor = _get_cuda_bare_metal_version(
        cpp_extension.CUDA_HOME
    )
    if int(bare_metal_major) >= 11:
        cc_flag.append('-gencode')
        cc_flag.append('arch=compute_80,code=sm_80')
        if int(bare_metal_minor) >= 8:
            cc_flag.append('-gencode')
            cc_flag.append('arch=compute_90,code=sm_90')

    # Build path
    srcpath = pathlib.Path(__file__).parent.absolute()
    buildpath = srcpath / "build"
    _create_build_dir(buildpath)

    # Helper function to build the kernels.
    def _cpp_extention_load_helper(name, sources, extra_cuda_flags):
        return cpp_extension.load(
            name=name,
            sources=sources,
            build_directory=buildpath,
            extra_cflags=[
                "-O3",
            ],
            extra_cuda_cflags=[
                "-O3",
                "-gencode",
                "arch=compute_70,code=sm_70",
                "--use_fast_math",
            ]
            + extra_cuda_flags
            + cc_flag,
            verbose=(args.rank == 0),
        )


def _get_cuda_bare_metal_version(cuda_dir):
    raw_output = subprocess.check_output(
        [cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True
    )
    output = raw_output.split()
    release_idx = output.index("release") + 1
    release = output[release_idx].split(".")
    bare_metal_major = release[0]
    bare_metal_minor = release[1][0]

    return raw_output, bare_metal_major, bare_metal_minor


def _create_build_dir(buildpath):
    try:
        os.mkdir(buildpath)
    except OSError:
        if not os.path.isdir(buildpath):
            print(f"Creation of the build directory {buildpath} failed")


================================================
FILE: megatron/legacy/fused_kernels/compat.h
================================================
/* Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved. */

/*This code is copied fron NVIDIA apex:
 *     https://github.com/NVIDIA/apex
 *     with minor changes. */


#ifndef TORCH_CHECK
#define TORCH_CHECK AT_CHECK
#endif

#ifdef VERSION_GE_1_3
#define DATA_PTR data_ptr
#else
#define DATA_PTR data
#endif


================================================
FILE: megatron/legacy/fused_kernels/tests/__init__.py
================================================


================================================
FILE: megatron/legacy/fused_kernels/tests/test_fused_kernels.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import math

import torch
from torch.nn import LayerNorm

from megatron.legacy.model.enums import AttnMaskType
from megatron.legacy.model.fused_layer_norm import MixedFusedLayerNorm
from megatron.legacy.model.fused_softmax import FusedScaleMaskSoftmax
from megatron.legacy.model.utils import attention_mask_func
from megatron.legacy.fused_kernels import load

def test_load_fused_kernels():
    try:
        import fused_layer_norm_cuda
        import scaled_masked_softmax_cuda
        import scaled_upper_triang_masked_softmax_cuda
        import torch

        print("[Success] load_fused_kernels")
    except ImportError as e:
        print("[Fail] load_fused_kernels")
        raise e

def test_fused_softmax():
    bert = BertModel.from_pretrained("bert-base-cased").cuda().half()
    tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
    test_text = (
        "Hello. How are you? I am fine thank you and you? yes Good. "
        "hi hi hi hi hi hi hi hi hi hi hi hi hi"  # 32
    )

    tokens = tokenizer(
        [test_text] * 4,
        return_tensors="pt",
    )

    embedding_output = bert.embeddings(
        input_ids=tokens["input_ids"].cuda(),
        position_ids=None,
        token_type_ids=tokens["token_type_ids"].cuda(),
        inputs_embeds=None,
        past_key_values_length=0,
    )

    # (bsz, 1, 1, seq_len)
    mask = bert.get_extended_attention_mask(
        attention_mask=tokens["attention_mask"].cuda(),
        input_shape=tokens["input_ids"].shape,
        device=bert.device,
    )
    # (bsz, 1, seq_len, seq_len)
    mask = mask.repeat(1, 1, mask.size()[-1], 1)

    attention = bert.encoder.layer[0].attention.self
    key_layer = attention.transpose_for_scores(attention.key(embedding_output))
    query_layer = attention.transpose_for_scores(attention.query(embedding_output))

    attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
    attention_scores /= math.sqrt(key_layer.size()[-1])

    fused_softmax = (
        FusedScaleMaskSoftmax(
            input_in_fp16=True,
            input_in_bf16=False,
            mask_func=attention_mask_func,
            scale=None,
            softmax_in_fp32=False,
            attn_mask_type=AttnMaskType.padding,
            scaled_masked_softmax_fusion=True,
        )
        .cuda()
        .half()
    )

    fused_softmax_output = fused_softmax(
        attention_scores,
        (mask != 0),
    )

    torch_softmax = (
        FusedScaleMaskSoftmax(
            input_in_fp16=True,
            input_in_bf16=False,
            mask_func=attention_mask_func,
            scale=None,
            softmax_in_fp32=False,
            attn_mask_type=AttnMaskType.padding,
            scaled_masked_softmax_fusion=False,
        )
        .cuda()
        .half()
    )

    torch_softmax_output = torch_softmax(
        attention_scores,
        (mask != 0),
    )

    test_result = (fused_softmax_output - torch_softmax_output).abs()

    while test_result.dim() != 1:
        test_result = test_result.mean(dim=-1)

    diff = test_result.mean(dim=-1)

    if diff <= 1e-3:
        print(
            f"\n[Success] test_fused_softmax"
            f"\n > mean_difference={diff}"
            f"\n > fused_values={fused_softmax_output[-1][-1][-1][:5].tolist()}"
            f"\n > torch_values={torch_softmax_output[-1][-1][-1][:5].tolist()}"
        )
    else:
        print(
            f"\n[Fail] test_fused_softmax"
            f"\n > mean_difference={diff}, "
            f"\n > fused_values={fused_softmax_output[-1][-1][-1][:5].tolist()}, "
            f"\n > torch_values={torch_softmax_output[-1][-1][-1][:5].tolist()}"
        )


def test_fused_upper_triangle_mask_softmax():
    gpt = GPT2Model.from_pretrained("gpt2").cuda().half()
    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
    test_text = (
        "Hello. How are you? I am fine thank you and you? yes Good. "
        "hi hi hi hi hi hi hi"  # 24
    )

    tokens = tokenizer(
        [test_text] * 4,
        return_tensors="pt",
    )

    attention_mask = tokens["attention_mask"].cuda()
    attention_mask = attention_mask.view(attention_mask.size(0), -1)
    attention_mask = attention_mask[:, None, None, :]
    attention_mask = (1.0 - attention_mask) * -10000.0
    attention_mask = attention_mask.repeat(1, 1, attention_mask.size()[-1], 1)
    attn = gpt.h[0]

    hidden_states = gpt.wte(tokens["input_ids"].cuda())
    q, k, v = attn.attn.c_attn(hidden_states).split(768, dim=-1)
    q = attn.attn._split_heads(q, attn.attn.num_heads, attn.attn.head_dim)
    k = attn.attn._split_heads(k, attn.attn.num_heads, attn.attn.head_dim)
    attn_weights = torch.matmul(q, k.transpose(-1, -2))

    sq, sk = q.size(-2), k.size(-2)
    causal_mask = attn.attn.bias[:, :, sk - sq : sk, :sk].bool()
    total_mask = ~(causal_mask & (attention_mask == 0))
    """
    tensor([[[[False,  True,  True,  ...,  True,  True,  True],
              [False, False,  True,  ...,  True,  True,  True],
              [False, False, False,  ...,  True,  True,  True],
              ...,
              [False, False, False,  ..., False,  True,  True],
              [False, False, False,  ..., False, False,  True],
              [False, False, False,  ..., False, False, False]]]
    """

    fused_softmax = (
        FusedScaleMaskSoftmax(
            input_in_fp16=True,
            input_in_bf16=False,
            mask_func=attention_mask_func,
            scale=None,
            softmax_in_fp32=False,
            attn_mask_type=AttnMaskType.causal,
            scaled_masked_softmax_fusion=True,
        )
        .cuda()
        .half()
    )

    fused_softmax_output = fused_softmax(
        attn_weights,
        total_mask,
    )

    torch_softmax = (
        FusedScaleMaskSoftmax(
            input_in_fp16=True,
            input_in_bf16=False,
            mask_func=attention_mask_func,
            scale=None,
            softmax_in_fp32=False,
            attn_mask_type=AttnMaskType.causal,
            scaled_masked_softmax_fusion=False,
        )
        .cuda()
        .half()
    )

    torch_softmax_output = torch_softmax(
        attn_weights,
        total_mask,
    )

    test_result = (fused_softmax_output - torch_softmax_output).abs()

    while test_result.dim() != 1:
        test_result = test_result.mean(dim=-1)

    diff = test_result.mean(dim=-1)

    if diff <= 1e-3:
        print(
            f"\n[Success] test_fused_upper_triangle_mask_softmax"
            f"\n > mean_difference={diff}"
            f"\n > fused_values={fused_softmax_output[-1][-1][-1][:5].tolist()}"
            f"\n > torch_values={torch_softmax_output[-1][-1][-1][:5].tolist()}"
        )
    else:
        print(
            f"\n[Fail] test_fused_upper_triangle_mask_softmax"
            f"\n > mean_difference={diff}, "
            f"\n > fused_values={fused_softmax_output[-1][-1][-1][:5].tolist()}, "
            f"\n > torch_values={torch_softmax_output[-1][-1][-1][:5].tolist()}"
        )


def test_layer_norm():
    bert = BertModel.from_pretrained("bert-base-cased").cuda().half()
    tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
    test_text = (
        "Hello. How are you? I am fine thank you and you? yes Good. "
        "hi hi hi hi hi hi hi hi hi hi hi hi hi"  # 32
    )

    tokens = tokenizer(
        [test_text] * 4,
        return_tensors="pt",
    )

    # [bsz, seq_len, d_model]
    embedding_output = (
        bert.embeddings(
            input_ids=tokens["input_ids"].cuda(),
            position_ids=None,
            token_type_ids=tokens["token_type_ids"].cuda(),
            inputs_embeds=None,
            past_key_values_length=0,
        )
        .cuda()
        .half()
    )

    fused_layernorm_layer = (
        MixedFusedLayerNorm(normalized_shape=embedding_output.size(-1)).cuda().half()
    )

    torch_layernorm_layer = (
        LayerNorm(normalized_shape=embedding_output.size(-1)).cuda().half()
    )

    fused_output = fused_layernorm_layer(embedding_output)
    torch_output = torch_layernorm_layer(embedding_output)
    test_result = (fused_output - torch_output).abs()

    while test_result.dim() != 1:
        test_result = test_result.mean(dim=-1)

    diff = test_result.mean(dim=-1)

    if diff <= 1e-3:
        print(
            f"\n[Success] test_layer_norm"
            f"\n > mean_difference={diff}"
            f"\n > fused_values={fused_output[-1][-1][:5].tolist()}"
            f"\n > torch_values={torch_output[-1][-1][:5].tolist()}"
        )
    else:
        print(
            f"\n[Fail] test_layer_norm"
            f"\n > mean_difference={diff}, "
            f"\n > fused_values={fused_output[-1][-1][:5].tolist()}, "
            f"\n > torch_values={torch_output[-1][-1][:5].tolist()}"
        )


def attention_mask_func(attention_scores, attention_mask):
    attention_scores.masked_fill_(attention_mask, -10000.0)
    return attention_scores


def forward_torch_softmax(input, mask, scale):
    input = input * scale
    mask_output = attention_mask_func(input, mask) if mask is not None else input
    probs = torch.nn.Softmax(dim=-1)(mask_output)
    return probs


def test_masked_softmax_forward():
    import scaled_masked_softmax_cuda

    batch = 2
    attn = 16
    scale_t = torch.tensor([1.0])
    for qlen in [128, 256, 1024, 2048, 4096]:
        for klen in [128, 256, 1024, 2048]:
            inputs = torch.normal(0, 2, (batch, attn, qlen, klen), dtype=torch.float16, device='cuda:0')
            masks = torch.randint(0, 2, (batch, 1, qlen, klen), dtype=torch.bool, device='cuda:0')
            softmax_results = scaled_masked_softmax_cuda.forward(inputs, masks, scale_t[0].item())
            softmax_results_torch = forward_torch_softmax(inputs, masks, scale_t[0].item())
            error = (softmax_results_torch - softmax_results).abs().max()
            assert error < 1e-3

def test_masked_softmax_backward():
    import scaled_masked_softmax_cuda

    batch = 2
    attn = 16
    scale_t = torch.tensor([1.0])
    for qlen in [128, 256, 1024, 2048, 4096]:
        for klen in [128, 256, 1024, 2048]:
            inputs = torch.normal(0, 2, (batch, attn, qlen, klen), dtype=torch.float16, device='cuda:0')
            backward = torch.rand_like(inputs, dtype=torch.float16, device='cuda:0')
            masks = torch.randint(0, 2, (batch, 1, qlen, klen), dtype=torch.bool, device='cuda:0')
            softmax_results = scaled_masked_softmax_cuda.forward(inputs, masks, scale_t[0].item())
            back_grad = scaled_masked_softmax_cuda.backward(backward, softmax_results, scale_t[0].item())

            inputs.requires_grad = True
            softmax_results_torch = forward_torch_softmax(inputs, masks, scale_t[0].item())
            softmax_results_torch.backward(backward)
            error = (back_grad - inputs.grad).abs().max()
            assert error < 1e-3


def test_allmasked_softmax_forward():
    import scaled_masked_softmax_cuda

    batch = 2
    attn = 16
    scale_t = torch.tensor([1.0])
    for qlen in [128, 256, 1024, 2048, 4096]:
        for klen in [128, 256, 1024, 2048]:
            inputs = torch.normal(0, 2, (batch, attn, qlen, klen), dtype=torch.float16, device='cuda:0')
            masks = torch.ones((batch, 1, qlen, klen), dtype=torch.bool, device='cuda:0')
            softmax_results = scaled_masked_softmax_cuda.forward(inputs, masks, scale_t[0].item())
            softmax_results_torch = torch.zeros_like(inputs)
            error = (softmax_results_torch - softmax_results).abs().max()
            assert error == 0.0


def test_allmasked_softmax_backward():
    import scaled_masked_softmax_cuda

    batch = 2
    attn = 16
    scale_t = torch.tensor([1.0])
    for qlen in [128, 256, 1024, 2048, 4096]:
        for klen in [128, 256, 1024, 2048]:
            inputs = torch.normal(0, 2, (batch, attn, qlen, klen), dtype=torch.float16, device='cuda:0')
            backward = torch.rand_like(inputs, dtype=torch.float16, device='cuda:0')
            masks = torch.ones((batch, 1, qlen, klen), dtype=torch.bool, device='cuda:0')
            softmax_results = scaled_masked_softmax_cuda.forward(inputs, masks, scale_t[0].item())
            back_grad = scaled_masked_softmax_cuda.backward(backward, softmax_results, scale_t[0].item())
            inputs.requires_grad = True
            softmax_results_torch = forward_torch_softmax(inputs, masks, scale_t[0].item())
            softmax_results_torch.backward(backward)
            error = (back_grad - inputs.grad).abs().max()
            assert error < 1e-3


if __name__ == "__main__":
    try:
        from transformers import BertTokenizer, GPT2Tokenizer
        from transformers.models.bert.modeling_bert import BertModel
        from transformers.models.gpt2.modeling_gpt2 import GPT2Model
        import transformers

        transformers.logging.set_verbosity(
            transformers.logging.FATAL,
        )

    except ImportError:
        print("\n[Fail] Please install `transformers` package to test fused kernels\n")
        exit(-1)

    load()
    test_masked_softmax_forward()
    test_masked_softmax_backward()
    test_allmasked_softmax_forward()
    test_allmasked_softmax_backward()
    test_load_fused_kernels()
    test_fused_softmax()
    test_fused_upper_triangle_mask_softmax()
    test_layer_norm()


================================================
FILE: megatron/legacy/fused_kernels/type_shim.h
================================================
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. */


#include <ATen/ATen.h>
#include "compat.h"


#define DISPATCH_HALF_AND_BFLOAT(TYPE, NAME, ...)			\
  switch(TYPE)								\
    {									\
    case at::ScalarType::Half:						\
      {									\
	using scalar_t = at::Half;					\
	__VA_ARGS__;							\
	break;								\
      }									\
    case at::ScalarType::BFloat16:					\
      {									\
	using scalar_t = at::BFloat16;					\
	__VA_ARGS__;							\
	break;								\
      }									\
    default:								\
      AT_ERROR(#NAME, " not implemented for '", toString(TYPE), "'");	\
      }


#define DISPATCH_HALF_BFLOAT_AND_FLOAT(TYPE, NAME, ...)			\
  switch(TYPE)								\
    {									\
    case at::ScalarType::Half:						\
      {									\
	using scalar_t = at::Half;					\
	__VA_ARGS__;							\
	break;								\
      }									\
    case at::ScalarType::BFloat16:					\
      {									\
	using scalar_t = at::BFloat16;					\
	__VA_ARGS__;							\
	break;								\
      }									\
    case at::ScalarType::Float:						\
      {									\
	using scalar_t = float;					\
	__VA_ARGS__;							\
	break;								\
      }									\
    default:								\
      AT_ERROR(#NAME, " not implemented for '", toString(TYPE), "'");	\
      }


#define DISPATCH_FLOAT_HALF_AND_BFLOAT_INOUT_TYPES(TYPEIN, TYPEOUT, NAME, ...) \
  switch(TYPEIN)							\
    {									\
    case at::ScalarType::Float:						\
      {									\
	using scalar_t_in = float;					\
	switch(TYPEOUT)							\
	  {								\
	  case at::ScalarType::Float:					\
	    {								\
	      using scalar_t_out = float;				\
	      __VA_ARGS__;						\
	      break;							\
	    }								\
	  case at::ScalarType::Half:					\
	    {								\
	      using scalar_t_out = at::Half;				\
	      __VA_ARGS__;						\
	      break;							\
	    }								\
	  case at::ScalarType::BFloat16:				\
	    {								\
	      using scalar_t_out = at::BFloat16;			\
	      __VA_ARGS__;						\
	      break;							\
	    }								\
	  default:							\
	    AT_ERROR(#NAME, " not implemented for '", toString(TYPEOUT), "'"); \
	  }								\
	break;								\
      }									\
    case at::ScalarType::Half:						\
      {									\
	using scalar_t_in = at::Half;					\
	using scalar_t_out = at::Half;					\
	__VA_ARGS__;							\
	break;								\
      }									\
    case at::ScalarType::BFloat16:					\
      {									\
	using scalar_t_in = at::BFloat16;				\
	using scalar_t_out = at::BFloat16;				\
	__VA_ARGS__;							\
	break;								\
      }									\
    default:								\
      AT_ERROR(#NAME, " not implemented for '", toString(TYPEIN), "'");	\
    }


================================================
FILE: megatron/legacy/model/__init__.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

from .fused_layer_norm import MixedFusedLayerNorm as LayerNorm
from .rms_norm import RMSNorm

from .bert_model import BertModel
from .gpt_model import GPTModel
from .t5_model import T5Model
from .language_model import get_language_model


================================================
FILE: megatron/legacy/model/bert_model.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

"""BERT model."""

import torch

from megatron.training import get_args
from megatron.core import tensor_parallel
from megatron.legacy.model.enums import AttnMaskType
from megatron.legacy.model.language_model import parallel_lm_logits
from megatron.legacy.model.language_model import get_language_model
from megatron.legacy.model.utils import get_norm
from megatron.legacy.model.utils import openai_gelu, erf_gelu
from megatron.legacy.model.utils import get_linear_layer
from megatron.legacy.model.utils import init_method_normal
from megatron.legacy.model.utils import scaled_init_method_normal
from .module import MegatronModule


def bert_extended_attention_mask(attention_mask):
    # We create a 3D attention mask from a 2D tensor mask.
    # [b, 1, s]
    attention_mask_b1s = attention_mask.unsqueeze(1)
    # [b, s, 1]
    attention_mask_bs1 = attention_mask.unsqueeze(2)
    # [b, s, s]
    attention_mask_bss = attention_mask_b1s * attention_mask_bs1
    # [b, 1, s, s]
    extended_attention_mask = attention_mask_bss.unsqueeze(1)

    # Convert attention mask to binary:
    extended_attention_mask = (extended_attention_mask < 0.5)

    return extended_attention_mask

def bert_position_ids(token_ids):
    # Create position ids
    seq_length = token_ids.size(1)
    position_ids = torch.arange(seq_length, dtype=torch.long,
                                device=token_ids.device)
    position_ids = position_ids.unsqueeze(0).expand_as(token_ids)

    return position_ids


class BertLMHead(MegatronModule):
    """Masked LM head for Bert

    Args:
        config: TransformerConfig object
        mpu_vocab_size: model parallel size of vocabulary.
        parallel_output: whether output logits being distributed or not.
    """

    def __init__(self, mpu_vocab_size, config, parallel_output):
        super().__init__(config=config)

        args = get_args()
        self.bias = torch.nn.Parameter(torch.zeros(mpu_vocab_size))
        tensor_parallel.set_tensor_model_parallel_attributes(self.bias, True, 0, 1)
        self.parallel_output = parallel_output

        self.dense = get_linear_layer(config.hidden_size, config.hidden_size, config.init_method)
        setattr(self.dense.weight, 'sequence_parallel', config.sequence_parallel)
        setattr(self.dense.bias, 'sequence_parallel', config.sequence_parallel)

        self.norm = get_norm(config)
        self.gelu = torch.nn.functional.gelu
        if args.openai_gelu:
            self.gelu = openai_gelu
        elif args.onnx_safe:
            self.gelu = erf_gelu

    def forward(self, hidden_states, word_embeddings_weight):
        hidden_states = self.dense(hidden_states)
        hidden_states = self.gelu(hidden_states)
        hidden_states = self.norm(hidden_states)
        output = parallel_lm_logits(hidden_states,
                                    word_embeddings_weight,
                                    self.parallel_output,
                                    bias=self.bias)
        return output

    def load_state_dict(self, state_dict, strict=True):
        """Customize load."""

        # Handle renaming layernorm -> norm in component names
        state_dict_ = {}
        for key in state_dict.keys():
            newkey = key.replace("layernorm", "norm")
            state_dict_[newkey] = state_dict[key]

        super().load_state_dict(state_dict_, strict)


def post_language_model_processing(lm_output, pooled_output,
                                   lm_head, binary_head,
                                   lm_labels,
                                   logit_weights,
                                   fp16_lm_cross_entropy):
    # Output.
    lm_logits = lm_head(
        lm_output, logit_weights)

    binary_logits = None
    if binary_head is not None:
        binary_logits = binary_head(pooled_output)

    if lm_labels is None:
        # [s b h] => [b s h]
        return lm_logits.transpose(0,1).contiguous(), binary_logits
    else:
        # [b s] => [s b]
        lm_labels = lm_labels.transpose(0,1).contiguous()
        # lm_logits : [s, b, h] and lm_labels: [s, b]
        if fp16_lm_cross_entropy:
            assert lm_logits.dtype == torch.half
            lm_loss = tensor_parallel.vocab_parallel_cross_entropy(lm_logits, lm_labels)
        else:
            lm_loss = tensor_parallel.vocab_parallel_cross_entropy(lm_logits.float(),
                                                        lm_labels)
        # [s, b] => [b s]
        lm_loss = lm_loss.transpose(0,1).contiguous()
        return lm_loss, binary_logits


class BertModel(MegatronModule):
    """Bert Language model."""

    def __init__(self,
                 config,
                 num_tokentypes=2,
                 add_binary_head=True,
                 parallel_output=True,
                 pre_process=True,
                 post_process=True):
        super().__init__(config=config)
        args = get_args()

        # TODO this option is not yet implemented in BERT
        assert args.untie_embeddings_and_output_weights is False

        self.fp16_lm_cross_entropy = args.fp16_lm_cross_entropy
        self.add_binary_head = add_binary_head
        self.parallel_output = parallel_output
        self.pre_process = pre_process
        self.post_process = post_process

        self.return_embeddings = args.output_bert_embeddings
        if self.return_embeddings:
            assert self.post_process and self.add_binary_head

        self.language_model, self._language_model_key = get_language_model(
            config=config,
            num_tokentypes=num_tokentypes,
            add_pooler=self.add_binary_head,
            encoder_attn_mask_type=AttnMaskType.padding,
            pre_process=self.pre_process,
            post_process=self.post_process)

        self.initialize_word_embeddings()
        if self.post_process:
            self.lm_head = BertLMHead(self.shared_embedding_or_output_weight().size(0), config, parallel_output)
            self._lm_head_key = 'lm_head'
            self.binary_head = None
            if self.add_binary_head:
                self.binary_head = get_linear_layer(config.hidden_size, 2,
                                                    config.init_method)
                self._binary_head_key = 'binary_head'

    def set_input_tensor(self, input_tensor):
        """See megatron.legacy.model.transformer.set_input_tensor()"""
        self.language_model.set_input_tensor(input_tensor)

    def forward(self, bert_model_input, attention_mask,
                tokentype_ids=None, lm_labels=None, inference_context=None):

        extended_attention_mask = bert_extended_attention_mask(attention_mask)
        input_ids = bert_model_input
        position_ids = bert_position_ids(input_ids)

        lm_output = self.language_model(
            input_ids,
            position_ids,
            extended_attention_mask,
            tokentype_ids=tokentype_ids
        )

        if self.post_process and self.add_binary_head:
            lm_output, pooled_output = lm_output

            # Return pooled output (e.g., when computing Bert embeddings).
            if self.return_embeddings:

                # Sum attention mask.
                embeddings = torch.transpose(lm_output, 0, 1)
                masks = torch.sum(attention_mask, dim=1)

                # Collect masked embeddings.
                output = torch.zeros(
                    size=(embeddings.shape[0], embeddings.shape[2]),
                    dtype=torch.float32,
                    device=torch.cuda.current_device())
                for i, (embedding, mask) in enumerate(zip(embeddings, masks)):
                    output[i, :] = torch.mean(embedding[1: mask - 1], dim=0)

                return output

        else:
            pooled_output = None

        if self.post_process:
            return post_language_model_processing(lm_output, pooled_output,
                                                  self.lm_head, self.binary_head,
                                                  lm_labels,
                                                  self.shared_embedding_or_output_weight(),
                                                  self.fp16_lm_cross_entropy)
        else:
            return lm_output


    def state_dict_for_save_checkpoint(self, prefix='', keep_vars=False):
        """For easy load when model is combined with other heads,
        add an extra key."""

        state_dict_ = {}
        state_dict_[self._language_model_key] \
            = self.language_model.state_dict_for_save_checkpoint(prefix=prefix,
                                                                 keep_vars=keep_vars)
        if self.post_process:
            state_dict_[self._lm_head_key] \
                = self.lm_head.state_dict_for_save_checkpoint(prefix=prefix,
                                                              keep_vars=keep_vars)
        if self.post_process and self.add_binary_head:
            state_dict_[self._binary_head_key] \
                = self.binary_head.state_dict(prefix=prefix, keep_vars=keep_vars)
        # Save word_embeddings.
        if self.post_process and not self.pre_process:
            state_dict_[self._word_embeddings_for_head_key] \
                = self.word_embeddings.state_dict(prefix=prefix, keep_vars=keep_vars)
        return state_dict_

    def load_state_dict(self, state_dict, strict=True):
        """Customized load."""

        self.language_model.load_state_dict(
            state_dict[self._language_model_key], strict=strict)
        if self.post_process:
            self.lm_head.load_state_dict(
                state_dict[self._lm_head_key], strict=strict)
        if self.post_process and self.add_binary_head:
            self.binary_head.load_state_dict(
                state_dict[self._binary_head_key], strict=strict)
        # Load word_embeddings.
        if self.post_process and not self.pre_process:
            self.word_embeddings.load_state_dict(
                state_dict[self._word_embeddings_for_head_key], strict=strict)


================================================
FILE: megatron/legacy/model/biencoder_model.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import os
import torch
import sys

from megatron.training import get_args, print_rank_0, get_tokenizer
from megatron.core import mpu
from megatron.training.checkpointing import fix_query_key_value_ordering
from megatron.training.checkpointing import get_checkpoint_tracker_filename
from megatron.training.checkpointing import get_checkpoint_name
from megatron.legacy.model.bert_model import bert_position_ids
from megatron.legacy.model.enums import AttnMaskType
from megatron.legacy.model.language_model import get_language_model
from megatron.legacy.model.utils import get_linear_layer
from megatron.legacy.model.utils import init_method_normal
from megatron.legacy.model.utils import scaled_init_method_normal
from .module import MegatronModule

def get_model_provider(only_query_model=False, only_context_model=False,
        biencoder_shared_query_context_model=False):

    def model_provider(pre_process=True, post_process=True):
        """Build the model."""

        print_rank_0('building Bienoder model ...')
        model = biencoder_model_provider(only_query_model=only_query_model,
                only_context_model = only_context_model,
                biencoder_shared_query_context_model = \
                biencoder_shared_query_context_model,
                pre_process=pre_process, post_process=post_process)

        return model

    return model_provider


def biencoder_model_provider(only_query_model=False,
                             only_context_model=False,
                             biencoder_shared_query_context_model=False,
                             pre_process=True,
                             post_process=True):
    """Build the model."""

    assert mpu.get_tensor_model_parallel_world_size() == 1 and \
        mpu.get_pipeline_model_parallel_world_size() == 1, \
        "Model parallel size > 1 not supported for ICT"

    print_rank_0('building BiEncoderModel...')

    # simpler to just keep using 2 tokentypes since
    # the LM we initialize with has 2 tokentypes
    model = BiEncoderModel(
        num_tokentypes=2,
        parallel_output=False,
        only_query_model=only_query_model,
        only_context_model=only_context_model,
        biencoder_shared_query_context_model=\
        biencoder_shared_query_context_model,
        pre_process=pre_process,
        post_process=post_process)

    return model


class BiEncoderModel(MegatronModule):
    """Bert-based module for Biencoder model."""

    def __init__(self,
                 num_tokentypes=1,
                 parallel_output=True,
                 only_query_model=False,
                 only_context_model=False,
                 biencoder_shared_query_context_model=False,
                 pre_process=True,
                 post_process=True):
        super(BiEncoderModel, self).__init__()
        args = get_args()

        bert_kwargs = dict(
            num_tokentypes=num_tokentypes,
            parallel_output=parallel_output,
            pre_process=pre_process,
            post_process=post_process)

        self.biencoder_shared_query_context_model = \
            biencoder_shared_query_context_model
        assert not (only_context_model and only_query_model)
        self.use_context_model = not only_query_model
        self.use_query_model = not only_context_model
        self.biencoder_projection_dim = args.biencoder_projection_dim

        if self.biencoder_shared_query_context_model:
            self.model = PretrainedBertModel(**bert_kwargs)
            self._model_key = 'shared_model'
            self.query_model, self.context_model = self.model, self.model
        else:
            if self.use_query_model:
                # this model embeds (pseudo-)queries - Embed_input in the paper
                self.query_model = PretrainedBertModel(**bert_kwargs)
                self._query_key = 'query_model'

            if self.use_context_model:
                # this model embeds evidence blocks - Embed_doc in the paper
                self.context_model = PretrainedBertModel(**bert_kwargs)
                self._context_key = 'context_model'

    def set_input_tensor(self, input_tensor):
        """See megatron.legacy.model.transformer.set_input_tensor()"""
        # this is just a placeholder and will be needed when model
        # parallelism will be used
        # self.language_model.set_input_tensor(input_tensor)
        return

    def forward(self, query_tokens, query_attention_mask, query_types,
                context_tokens, context_attention_mask, context_types):
        """Run a forward pass for each of the models and
        return the respective embeddings."""

        if self.use_query_model:
            query_logits = self.embed_text(self.query_model,
                                           query_tokens,
                                           query_attention_mask,
                                           query_types)
        else:
            raise ValueError("Cannot embed query without the query model.")
        if self.use_context_model:
            context_logits = self.embed_text(self.context_model,
                                             context_tokens,
                                             context_attention_mask,
                                             context_types)
        else:
            raise ValueError("Cannot embed block without the block model.")
        return query_logits, context_logits

    @staticmethod
    def embed_text(model, tokens, attention_mask, token_types):
        """Embed a batch of tokens using the model"""
        logits = model(tokens,
                              attention_mask,
                              token_types)
        return logits

    def state_dict_for_save_checkpoint(self, prefix='', keep_vars=False):
        """Save dict with state dicts of each of the models."""
        state_dict_ = {}
        if self.biencoder_shared_query_context_model:
            state_dict_[self._model_key] = \
                self.model.state_dict_for_save_checkpoint(
                    prefix=prefix, keep_vars=keep_vars)
        else:
            if self.use_query_model:
                state_dict_[self._query_key] = \
                    self.query_model.state_dict_for_save_checkpoint(
                        prefix=prefix, keep_vars=keep_vars)

            if self.use_context_model:
                state_dict_[self._context_key] = \
                    self.context_model.state_dict_for_save_checkpoint(
                        prefix=prefix, keep_vars=keep_vars)

        return state_dict_

    def load_state_dict(self, state_dict, strict=True):
        """Load the state dicts of each of the models"""
        if self.biencoder_shared_query_context_model:
            print_rank_0("Loading shared query-context model")
            self.model.load_state_dict(state_dict[self._model_key], \
                strict=strict)
        else:
            if self.use_query_model:
                print_rank_0("Loading query model")
                self.query_model.load_state_dict( \
                    state_dict[self._query_key], strict=strict)

            if self.use_context_model:
                print_rank_0("Loading context model")
                self.context_model.load_state_dict( \
                    state_dict[self._context_key], strict=strict)

    def init_state_dict_from_bert(self):
        """Initialize the state from a pretrained BERT model
        on iteration zero of ICT pretraining"""
        args = get_args()

        if args.bert_load is None:
            print_rank_0("bert-load argument is None")
            return

        tracker_filename = get_checkpoint_tracker_filename(args.bert_load)
        if not os.path.isfile(tracker_filename):
            raise FileNotFoundError("Could not find BERT checkpoint")
        with open(tracker_filename, 'r') as f:
            iteration = int(f.read().strip())
            assert iteration > 0

        checkpoint_name = get_checkpoint_name(args.bert_load, iteration, False)
        if mpu.get_data_parallel_rank() == 0:
            print('global rank {} is loading BERT checkpoint {}'.format(
                torch.distributed.get_rank(), checkpoint_name))

        # Load the checkpoint.
        try:
            state_dict = torch.load(checkpoint_name, map_location='cpu')
        except ModuleNotFoundError:
            from megatron.legacy.fp16_deprecated import loss_scaler
            # For backward compatibility.
            print_rank_0(' > deserializing using the old code structure ...')
            sys.modules['fp16.loss_scaler'] = sys.modules[
                'megatron.fp16_deprecated.loss_scaler']
            sys.modules['megatron.fp16.loss_scaler'] = sys.modules[
                'megatron.fp16_deprecated.loss_scaler']
            state_dict = torch.load(checkpoint_name, map_location='cpu')
            sys.modules.pop('fp16.loss_scaler', None)
            sys.modules.pop('megatron.fp16.loss_scaler', None)
        except Exception:
            print_rank_0('could not load the BERT checkpoint')
            sys.exit()

        checkpoint_version = state_dict.get('checkpoint_version', 0)

        # load the LM state dict into each model
        model_dict = state_dict['model']['language_model']

        if self.biencoder_shared_query_context_model:
            self.model.language_model.load_state_dict(model_dict)
            fix_query_key_value_ordering(self.model, checkpoint_version)
        else:
            if self.use_query_model:
                self.query_model.language_model.load_state_dict(model_dict)
                # give each model the same ict_head to begin with as well
                if self.biencoder_projection_dim > 0:
                    query_proj_state_dict = \
                        self.state_dict_for_save_checkpoint()\
                        [self._query_key]['projection_enc']
                fix_query_key_value_ordering(self.query_model, checkpoint_version)

            if self.use_context_model:
                self.context_model.language_model.load_state_dict(model_dict)
                if self.query_model is not None and \
                    self.biencoder_projection_dim > 0:
                    self.context_model.projection_enc.load_state_dict\
                        (query_proj_state_dict)
                fix_query_key_value_ordering(self.context_model, checkpoint_version)


class PretrainedBertModel(MegatronModule):
    """BERT-based encoder for queries or contexts used for
    learned information retrieval."""

    def __init__(self, num_tokentypes=2,
            parallel_output=True, pre_process=True, post_process=True):
        super(PretrainedBertModel, self).__init__()

        args = get_args()
        tokenizer = get_tokenizer()
        self.pad_id = tokenizer.pad
        self.biencoder_projection_dim = args.biencoder_projection_dim
        self.parallel_output = parallel_output
        self.pre_process = pre_process
        self.post_process = post_process
        init_method = init_method_normal(args.init_method_std)
        scaled_init_method = scaled_init_method_normal(
            args.init_method_std, args.num_layers)

        self.language_model, self._language_model_key = get_language_model(
            num_tokentypes=num_tokentypes,
            add_pooler=False,
            encoder_attn_mask_type=AttnMaskType.padding,
            init_method=init_method,
            scaled_init_method=scaled_init_method,
            pre_process=self.pre_process,
            post_process=self.post_process)

        if args.biencoder_projection_dim > 0:
            self.projection_enc = get_linear_layer(args.hidden_size,
                                                   args.biencoder_projection_dim,
                                                   init_method)
            self._projection_enc_key = 'projection_enc'

    def forward(self, input_ids, attention_mask, tokentype_ids=None):
        extended_attention_mask = attention_mask.unsqueeze(1)
        #extended_attention_mask = bert_extended_attention_mask(attention_mask)
        position_ids = bert_position_ids(input_ids)

        lm_output = self.language_model(input_ids,
                                        position_ids,
                                        extended_attention_mask,
                                        tokentype_ids=tokentype_ids)
        # This mask will be used in average-pooling and max-pooling
        pool_mask = (input_ids == self.pad_id).unsqueeze(2)

        # Taking the representation of the [CLS] token of BERT
        pooled_output = lm_output[0, :, :]

        # Converting to float16 dtype
        pooled_output = pooled_output.to(lm_output.dtype)

        # Output.
        if self.biencoder_projection_dim:
            pooled_output = self.projection_enc(pooled_output)

        return pooled_output

    def state_dict_for_save_checkpoint(self, prefix='', keep_vars=False):
        """For easy load when model is combined with other heads,
        add an extra key."""

        state_dict_ = {}
        state_dict_[self._language_model_key] \
            = self.language_model.state_dict_for_save_checkpoint(
                prefix=prefix, keep_vars=keep_vars)

        if self.biencoder_projection_dim > 0:
            state_dict_[self._projection_enc_key] = \
                self.projection_enc.state_dict(prefix=prefix,
                                               keep_vars=keep_vars)

        return state_dict_

    def load_state_dict(self, state_dict, strict=True):
        """Customized load."""
        print_rank_0("loading pretrained weights")
        self.language_model.load_state_dict(
            state_dict[self._language_model_key], strict=strict)

        if self.biencoder_projection_dim > 0:
            print_rank_0("loading projection head weights")
            self.projection_enc.load_state_dict(
                state_dict[self._projection_enc_key], strict=strict)


================================================
FILE: megatron/legacy/model/classification.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""Classification model."""

import torch

from megatron.training import get_args, print_rank_last
from megatron.legacy.model.enums import AttnMaskType
from megatron.legacy.model.bert_model import bert_extended_attention_mask, bert_position_ids
from megatron.legacy.model.language_model import get_language_model
from megatron.legacy.model.utils import get_linear_layer
from megatron.legacy.model.utils import init_method_normal
from megatron.legacy.model.utils import scaled_init_method_normal
from .module import MegatronModule


class Classification(MegatronModule):

    def __init__(self,
                 config,
                 num_classes,
                 num_tokentypes=2,
                 pre_process=True,
                 post_process=True):
        super().__init__(config=config, share_embeddings_and_output_weights=False)
        args = get_args()

        self.num_classes = num_classes
        self.pre_process = pre_process
        self.post_process = post_process

        self.language_model, self._language_model_key = get_language_model(
            config=config,
            num_tokentypes=num_tokentypes,
            add_pooler=True,
            encoder_attn_mask_type=AttnMaskType.padding,
            pre_process=self.pre_process,
            post_process=self.post_process)

        # Multi-choice head.
        if self.post_process:
            self.classification_dropout = torch.nn.Dropout(args.hidden_dropout)
            self.classification_head = get_linear_layer(args.hidden_size,
                                                        self.num_classes,
                                                        config.init_method)
            self._classification_head_key = 'classification_head'

    def set_input_tensor(self, input_tensor):
        """See megatron.legacy.model.transformer.set_input_tensor()"""
        self.language_model.set_input_tensor(input_tensor)

    def forward(self, model_input, attention_mask, tokentype_ids=None):

        extended_attention_mask = bert_extended_attention_mask(attention_mask)
        input_ids = model_input
        position_ids = bert_position_ids(input_ids)

        lm_output = self.language_model(
            input_ids,
            position_ids,
            extended_attention_mask,
            tokentype_ids=tokentype_ids
        )

        if self.post_process:
            _, pooled_output = lm_output
            classification_output = self.classification_dropout(pooled_output)
            classification_logits = self.classification_head(classification_output)

            # Reshape back to separate choices.
            classification_logits = classification_logits.view(-1, self.num_classes)

            return classification_logits
        return lm_output

    def state_dict_for_save_checkpoint(self, prefix='', keep_vars=False):
        """For easy load when model is combined with other heads,
        add an extra key."""

        state_dict_ = {}
        state_dict_[self._language_model_key] \
            = self.language_model.state_dict_for_save_checkpoint(prefix=prefix,
                                                                 keep_vars=keep_vars)
        if self.post_process:
            state_dict_[self._classification_head_key] \
                = self.classification_head.state_dict(prefix=prefix, keep_vars=keep_vars)
        return state_dict_

    def load_state_dict(self, state_dict, strict=True):
        """Customized load."""

        self.language_model.load_state_dict(
            state_dict[self._language_model_key], strict=strict)
        if self.post_process:
            if self._classification_head_key in state_dict:
                self.classification_head.load_state_dict(
                    state_dict[self._classification_head_key], strict=strict)
            else:
                print_rank_last('***WARNING*** could not find {} in the checkpoint, '
                                'initializing to random'.format(
                                    self._classification_head_key))


================================================
FILE: megatron/legacy/model/enums.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

import enum

class LayerType(enum.Enum):
    encoder = 1
    decoder = 2
 
class AttnType(enum.Enum):
    self_attn = 1
    cross_attn = 2

class AttnMaskType(enum.Enum):
    padding = 1
    causal = 2

# For backward compatibility with old model checkpoints
from megatron.core.enums import ModelType


================================================
FILE: megatron/legacy/model/fused_bias_gelu.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

import torch
from megatron.core.jit import jit_fuser


###### BIAS GELU FUSION/ NO AUTOGRAD ################
# 1/sqrt(2*pi)-> 0.3989423
# 1/sqrt(2)   -> 0.70710678
# sqrt(2/pi)  -> 0.79788456
# this function is tanh approximation of gelu
# actual gelu is:
# x * 0.5 * (1.0 + torch.erf(x * 0.70710678))

@jit_fuser
def bias_gelu(bias, y):
    x = bias + y
    return  x * 0.5 * (1.0 + torch.tanh(0.79788456 * x * (1 + 0.044715 * x * x)))

# gradient of tanh approximation of gelu
# gradient of actual gelu is:
# 0.5 * (1. + torch.erf(x * 0.70710678)) + 0.3989423 * x * torch.exp(-0.5 * x * x)
@jit_fuser
def bias_gelu_back(g, bias, y):
    x = bias + y
    tanh_out = torch.tanh(0.79788456 * x * (1 + 0.044715 * x * x))
    # sqrt(2/pi) * 3 * 0.044715 -> 0.1070322243
    ff = 0.5 * x * ((1 - tanh_out * tanh_out) * (0.79788456 + 0.1070322243 * x * x)) + 0.5 * (1 + tanh_out)
    return ff*g

class GeLUFunction(torch.autograd.Function):
    @staticmethod
    # bias is an optional argument
    def forward(ctx, input, bias):
        ctx.save_for_backward(input, bias)
        return bias_gelu(bias, input)

    @staticmethod
    def backward(ctx, grad_output):
        input, bias = ctx.saved_tensors
        tmp = bias_gelu_back(grad_output, bias, input)
        return tmp, tmp

bias_gelu_impl = GeLUFunction.apply


================================================
FILE: megatron/legacy/model/fused_layer_norm.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""This code is copied fron NVIDIA apex:
      https://github.com/NVIDIA/apex
   with some changes. """

import inspect
import numbers
import torch
from torch.nn.parameter import Parameter
from torch.nn import init
import importlib

from megatron.core.utils import make_viewless_tensor

try:
    from apex.contrib.layer_norm.layer_norm import FastLayerNormFN
    HAVE_PERSIST_LAYER_NORM = True
except ImportError:
    HAVE_PERSIST_LAYER_NORM = False

try:
    from apex.normalization.fused_layer_norm import fused_layer_norm_affine
except ImportError:
    fused_layer_norm_affine = None

global fused_layer_norm_cuda
fused_layer_norm_cuda = None


class MixedFusedLayerNorm(torch.nn.Module):

  def __init__(self, normalized_shape, eps=1e-5,
               no_persist_layer_norm=True,
               sequence_parallel=False,
               apply_layernorm_1p=False):
        super(MixedFusedLayerNorm, self).__init__()

        self.apply_layernorm_1p = apply_layernorm_1p

        global fused_layer_norm_cuda
        fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda")

        # List of hiddens sizes supported in the persistent layer norm kernel
        # If the hidden size is not supported, fall back to the non-persistent
        # kernel.
        persist_ln_hidden_sizes = [1024, 1536, 2048, 2304, 3072, 3840, 4096,
            5120, 6144, 8192, 10240, 12288, 12800, 15360, 16384, 18432, 20480,
            24576, 25600, 30720, 32768, 40960, 49152, 65536]
        if normalized_shape not in persist_ln_hidden_sizes or \
                not HAVE_PERSIST_LAYER_NORM:
            no_persist_layer_norm = True

        if isinstance(normalized_shape, numbers.Integral):
            normalized_shape = (normalized_shape,)
        self.normalized_shape = torch.Size(normalized_shape)
        self.eps = eps
        self.weight = Parameter(torch.Tensor(*normalized_shape))
        self.bias = Parameter(torch.Tensor(*normalized_shape))
        self.reset_parameters()
        self.no_persist_layer_norm = no_persist_layer_norm
        self.sequence_parallel = sequence_parallel

        # set sequence parallelism flag on weight and bias parameters
        setattr(self.weight, 'sequence_parallel', self.sequence_parallel)
        setattr(self.bias, 'sequence_parallel', self.sequence_parallel)


  def reset_parameters(self):

    if self.apply_layernorm_1p:
        init.zeros_(self.weight)
        init.zeros_(self.bias)
    else:
        init.ones_(self.weight)
        init.zeros_(self.bias)

  def forward(self, input):

    weight = self.weight + 1 if self.apply_layernorm_1p else self.weight

    if self.no_persist_layer_norm:
        assert fused_layer_norm_affine is not None, \
            "fused_layer_norm_affine is not available, please install apex from https://github.com/NVIDIA/apex"
        return fused_layer_norm_affine(input, weight, self.bias, self.normalized_shape, eps=self.eps)
    else:
        if 'memory_efficient' in inspect.getfullargspec(FastLayerNormFN.forward).args:
            output = FastLayerNormFN.apply(input, weight, self.bias, self.eps, False)
        else:
            output = FastLayerNormFN.apply(input, weight, self.bias, self.eps)
        # Apex's fast layer norm function outputs a 'view' tensor (i.e., has
        # a populated '_base' field). This will result in schedule.py's
        # deallocate_output_tensor() throwing an error, so a viewless tensor is
        # created to prevent this.
        output = make_viewless_tensor(inp = output,
                                      requires_grad = input.requires_grad,
                                      keep_graph = True)

        return output


================================================
FILE: megatron/legacy/model/fused_softmax.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.


import torch
import torch.nn as nn
from megatron.legacy.model.enums import AttnMaskType


class ScaledUpperTriangMaskedSoftmax(torch.autograd.Function):
    """
    Fused operation which performs following three operations in sequence
    1. Scale the tensor.
    2. Apply upper triangular mask (typically used in gpt models).
    3. Perform softmax.
    """

    @staticmethod
    def forward(ctx, inputs, scale):
        try:
            import scaled_upper_triang_masked_softmax_cuda
        except (ImportError, ModuleNotFoundError):
            print(f'Please install Apex to use fused_softmax')

        scale_t = torch.tensor([scale])
        softmax_results = scaled_upper_triang_masked_softmax_cuda.forward(
            inputs, scale_t[0]
        )

        ctx.save_for_backward(softmax_results, scale_t)
        return softmax_results

    @staticmethod
    def backward(ctx, output_grads):
        try:
            import scaled_upper_triang_masked_softmax_cuda
        except (ImportError, ModuleNotFoundError):
            print(f'Please install Apex to use fused_softmax')

        softmax_results, scale_t = ctx.saved_tensors
        input_grads = scaled_upper_triang_masked_softmax_cuda.backward(
            output_grads, softmax_results, scale_t[0]
        )

        return input_grads, None


class ScaledMaskedSoftmax(torch.autograd.Function):
    """
    Fused operation which performs following three operations in sequence
    1. Scale the tensor.
    2. Apply the mask.
    3. Perform softmax.
    """

    @staticmethod
    def forward(ctx, inputs, mask, scale):
        try:
            import scaled_masked_softmax_cuda
        except (ImportError, ModuleNotFoundError):
            print(f'Please install Apex to use fused_softmax')

        scale_t = torch.tensor([scale])

        softmax_results = scaled_masked_softmax_cuda.forward(inputs, mask, scale_t[0])
        ctx.save_for_backward(softmax_results, scale_t)
        return softmax_results

    @staticmethod
    def backward(ctx, output_grads):
        try:
            import scaled_masked_softmax_cuda
        except (ImportError, ModuleNotFoundError):
            print(f'Please install Apex to use fused_softmax')

        softmax_results, scale_t = ctx.saved_tensors

        input_grads = scaled_masked_softmax_cuda.backward(
            output_grads, softmax_results, scale_t[0]
        )
        return input_grads, None, None


class ScaledSoftmax(torch.autograd.Function):
    """
    Fused operation which performs following two operations in sequence
    1. Scale the tensor.
    2. Perform softmax.
    """

    @staticmethod
    def forward(ctx, inputs, scale):
        try:
            import scaled_softmax_cuda
        except (ImportError, ModuleNotFoundError):
            print(f'Please install Apex to use fused_softmax')

        scale_t = torch.tensor([scale])

        softmax_results = scaled_softmax_cuda.forward(
            inputs, scale_t[0]
        )
        ctx.save_for_backward(softmax_results, scale_t)
        return softmax_results

    @staticmethod
    def backward(ctx, output_grads):
        try:
            import scaled_softmax_cudaa
        except (ImportError, ModuleNotFoundError):
            print(f'Please install Apex to use fused_softmax')

        softmax_results, scale_t = ctx.saved_tensors

        input_grads = scaled_softmax_cuda.backward(
            output_grads, softmax_results, scale_t[0]
        )
        return input_grads, None, None


class FusedScaleMaskSoftmax(nn.Module):
    """
    fused operation: scaling + mask + softmax

    Args:
        input_in_fp16: flag to indicate if input in fp16 data format.
        input_in_bf16: flag to indicate if input in bf16 data format.
        attn_mask_type: attention mask type (pad or causal)
        scaled_masked_softmax_fusion: flag to indicate user want to use softmax fusion
        mask_func: mask function to be applied.
        softmax_in_fp32: if true, softmax in performed at fp32 precision.
        scale: scaling factor used in input tensor scaling.
    """

    def __init__(
        self,
        input_in_fp16,
        input_in_bf16,
        attn_mask_type,
        scaled_masked_softmax_fusion,
        mask_func,
        softmax_in_fp32,
        scale,
    ):
        super(FusedScaleMaskSoftmax, self).__init__()
        self.input_in_fp16 = input_in_fp16
        self.input_in_bf16 = input_in_bf16
        assert not (
            self.input_in_fp16 and self.input_in_bf16
        ), "both fp16 and bf16 flags cannot be active at the same time."
        self.input_in_float16 = self.input_in_fp16 or self.input_in_bf16
        self.attn_mask_type = attn_mask_type
        self.scaled_masked_softmax_fusion = scaled_masked_softmax_fusion
        self.mask_func = mask_func
        self.softmax_in_fp32 = softmax_in_fp32
        self.scale = scale

        assert (
            self.scale is None or softmax_in_fp32
        ), "softmax should be in fp32 when scaled"

    def forward(self, input, mask):
        # [b, np, sq, sk]
        assert input.dim() == 4

        if self.is_kernel_available(mask, *input.size()):
            return self.forward_fused_softmax(input, mask)
        else:
            return self.forward_torch_softmax(input, mask)

    def is_kernel_available(self, mask, b, np, sq, sk):
        attn_batches = b * np

        if (
            self.scaled_masked_softmax_fusion  # user want to fuse
            and self.input_in_float16  # input must be fp16
            and 16 < sk <= 16384  # sk must be 16 ~ 16384
            and sq % 4 == 0  # sq must be divisor of 4
            and sk % 4 == 0  # sk must be divisor of 4
            and attn_batches % 4 == 0  # np * b must be divisor of 4
        ):
            if 0 <= sk <= 16384:
                batch_per_block = self.get_batch_per_block(sq, sk, b, np)

                if self.attn_mask_type == AttnMaskType.causal:
                    if attn_batches % batch_per_block == 0:
                        return True
                else:
                    if sq % batch_per_block == 0:
                        return True
        return False

    def forward_fused_softmax(self, input, mask):
        b, np, sq, sk = input.size()
        scale = self.scale if self.scale is not None else 1.0

        if self.attn_mask_type == AttnMaskType.causal:
            assert sq == sk, "causal mask is only for self attention"

            # input is 3D tensor (attn_batches, sq, sk)
            input = input.view(-1, sq, sk)
            probs = ScaledUpperTriangMaskedSoftmax.apply(input, scale)
            return probs.view(b, np, sq, sk)
        else:
            # input is 4D tensor (b, np, sq, sk)
            if mask is not None:
                return ScaledMaskedSoftmax.apply(input, mask, scale)
            else:
                return ScaledSoftmax.apply(input, scale)

    def forward_torch_softmax(self, input, mask):
        if self.input_in_float16 and self.softmax_in_fp32:
            input = input.float()

        if self.scale is not None:
            input = input * self.scale
        mask_output = self.mask_func(input, mask) if mask is not None else input
        probs = torch.nn.Softmax(dim=-1)(mask_output)

        if self.input_in_float16 and self.softmax_in_fp32:
            if self.input_in_fp16:
                probs = probs.half()
            else:
                probs = probs.bfloat16()

        return probs

    @staticmethod
    def get_batch_per_block(sq, sk, b, np):
        try:
            import scaled_masked_softmax_cuda
        except (ImportError, ModuleNotFoundError):
            print(f'Please install Apex to use fused_softmax')

        return scaled_masked_softmax_cuda.get_batch_per_block(sq, sk, b, np)


================================================
FILE: megatron/legacy/model/gpt_model.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

"""GPT-2 model."""

import torch
from typing import Optional

from megatron.training import get_args
from megatron.core import tensor_parallel
from megatron.core.utils import deprecate_inference_params

from .enums import AttnMaskType
from .language_model import parallel_lm_logits
from .language_model import get_language_model
from .module import MegatronModule


def post_language_model_processing(lm_output, labels, logit_weights,
                                   parallel_output,
                                   fp16_lm_cross_entropy):

    # Output. Format [s b h]
    output = parallel_lm_logits(
        lm_output,
        logit_weights,
        parallel_output)

    if labels is None:
        # [s b h] => [b s h]
        return output.transpose(0,1).contiguous()
    else:
        # [b s] => [s b]
        labels = labels.transpose(0,1).contiguous()
        if fp16_lm_cross_entropy:
            assert output.dtype == torch.half
            loss = tensor_parallel.vocab_parallel_cross_entropy(output, labels)
        else:
            loss = tensor_parallel.vocab_parallel_cross_entropy(output.float(), labels)
        
        # [s b] => [b, s]
        loss = loss.transpose(0,1).contiguous()
        return loss


class GPTModel(MegatronModule):
    """GPT-2 Language model."""

    def __init__(self,
                 config,
                 num_tokentypes=0,
                 parallel_output=True,
                 pre_process=True,
                 post_process=True):
        args = get_args()
        super().__init__(config=config, share_embeddings_and_output_weights=not args.untie_embeddings_and_output_weights)

        self.parallel_output = parallel_output
        self.pre_process = pre_process
        self.post_process = post_process
        self.fp16_lm_cross_entropy = args.fp16_lm_cross_entropy
        self.untie_embeddings_and_output_weights = args.untie_embeddings_and_output_weights

        self.language_model, self._language_model_key = get_language_model(
            config=config,
            num_tokentypes=num_tokentypes,
            add_pooler=False,
            encoder_attn_mask_type=AttnMaskType.causal,
            pre_process=self.pre_process,
            post_process=self.post_process)
        
        if not args.untie_embeddings_and_output_weights:
            self.initialize_word_embeddings()

    def set_input_tensor(self, input_tensor):
        """See megatron.legacy.model.transformer.set_input_tensor()"""
        self.language_model.set_input_tensor(input_tensor)

    def forward(self, input_ids, position_ids, attention_mask,
                labels=None, tokentype_ids=None, inference_context=None, *, inference_params=None):

        inference_context = deprecate_inference_params(inference_context, inference_params)

        lm_output = self.language_model(
            input_ids,
            position_ids,
            attention_mask,
            inference_context=inference_context)

        if self.post_process:
            return post_language_model_processing(
                lm_output, labels,
                self.language_model.output_layer.weight if self.untie_embeddings_and_output_weights else self.shared_embedding_or_output_weight(),
                self.parallel_output,
                self.fp16_lm_cross_entropy)
        else:
            return lm_output

    def state_dict_for_save_checkpoint(self, prefix='', keep_vars=False):

        state_dict_ = {}
        state_dict_[self._language_model_key] \
            = self.language_model.state_dict_for_save_checkpoint(
                prefix=prefix, keep_vars=keep_vars)
        # Save word_embeddings.
        if self.post_process and not self.pre_process and not self.untie_embeddings_and_output_weights:
            state_dict_[self._word_embeddings_for_head_key] \
                = self.word_embeddings.state_dict(prefix=prefix,
                                                  keep_vars=keep_vars)
        return state_dict_

    def load_state_dict(self, state_dict, strict=True):
        """Customized load."""

        # Load word_embeddings.
        if self.post_process and not self.pre_process and not self.untie_embeddings_and_output_weights:
            self.word_embeddings.load_state_dict(
                state_dict[self._word_embeddings_for_head_key], strict=strict)
        if self._language_model_key in state_dict:
            state_dict = state_dict[self._language_model_key]
        self.language_model.load_state_dict(state_dict, strict=strict)


================================================
FILE: megatron/legacy/model/language_model.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

"""Transformer based language model."""

import torch
import torch.nn.functional as F
from typing import Optional

from megatron.core import mpu, tensor_parallel
from megatron.core.enums import ModelType
from megatron.core.inference.contexts import BaseInferenceContext
from megatron.core.models.common.embeddings.rotary_pos_embedding import RotaryEmbedding
from megatron.core.utils import deprecate_inference_params
from megatron.training import get_args

from .enums import AttnMaskType, LayerType
from .module import MegatronModule
from .transformer import ParallelTransformer
from .utils import get_linear_layer, init_method_normal, scaled_init_method_normal


def parallel_lm_logits(input_, word_embeddings_weight, parallel_output, bias=None):
    """LM logits using word embedding weights."""
    args = get_args()
    # Parallel logits.
    model_parallel = mpu.get_tensor_model_parallel_world_size() > 1
    if model_parallel or args.sequence_parallel:
        input_parallel = input_
        allreduce_dgrad = model_parallel and not args.sequence_parallel
    else:
        input_parallel = tensor_parallel.copy_to_tensor_model_parallel_region(input_)
        allreduce_dgrad = False

    # Matrix multiply.
    logits_parallel = tensor_parallel.linear_with_grad_accumulation_and_async_allreduce(
        input=input_parallel,
        weight=word_embeddings_weight,
        bias=bias,
        gradient_accumulation_fusion=args.gradient_accumulation_fusion,
        sequence_parallel=args.sequence_parallel,
        grad_output_buffer=None,
        allreduce_dgrad=allreduce_dgrad,
    )
    # Gather if needed.

    if parallel_output:
        return logits_parallel

    return tensor_parallel.gather_from_tensor_model_parallel_region(logits_parallel)


def get_language_model(
    config,
    num_tokentypes,
    add_pooler,
    encoder_attn_mask_type,
    add_encoder=True,
    add_decoder=False,
    decoder_attn_mask_type=AttnMaskType.causal,
    pre_process=True,
    post_process=True,
):
    """Build language model and return along with the key to save."""
    args = get_args()
    if config.init_method is None:
        config.init_method = init_method_normal(config.init_method_std)

    if config.output_layer_init_method is None:
        config.output_layer_init_method = scaled_init_method_normal(
            config.init_method_std, config.num_layers
        )

    # Language model.
    language_model = TransformerLanguageModel(
        config,
        encoder_attn_mask_type,
        num_tokentypes=num_tokentypes,
        add_encoder=add_encoder,
        add_decoder=add_decoder,
        decoder_attn_mask_type=decoder_attn_mask_type,
        add_pooler=add_pooler,
        pre_process=pre_process,
        post_process=post_process,
    )
    # key used for checkpoints.
    language_model_key = 'language_model'

    return language_model, language_model_key


class Pooler(MegatronModule):
    """Pooler layer.

    Pool hidden states of a specific token (for example start of the
    sequence) and add a linear transformation followed by a tanh.

    Args:
        hidden_size: hidden size
        init_method: weight initialization method for the linear layer.
            bias is set to zero.
    """

    def __init__(self, hidden_size, init_method):
        super(Pooler, self).__init__()
        args = get_args()
        self.dense = get_linear_layer(hidden_size, hidden_size, init_method)
        self.sequence_parallel = args.sequence_parallel

    def forward(self, hidden_states, sequence_index=0):
        # hidden_states: [s, b, h]
        # sequence_index: index of the token to pool.

        # gather data along sequence dimensions
        # same pooler is run on all tensor parallel nodes
        if self.sequence_parallel:
            hidden_states = tensor_parallel.gather_from_sequence_parallel_region(
                hidden_states, tensor_parallel_output_grad=False
            )

        pooled = hidden_states[sequence_index, :, :]
        pooled = self.dense(pooled)
        pooled = torch.tanh(pooled)
        return pooled


class Embedding(MegatronModule):
    """Language model embeddings.

    Args:
        hidden_size: hidden size
        vocab_size: vocabulary size
        max_sequence_length: maximum size of sequence. This
                             is used for positional embedding
        embedding_dropout_prob: dropout probability for embeddings
        init_method: weight initialization method
        num_tokentypes: size of the token-type embeddings. 0 value
                        will ignore this embedding
    """

    def __init__(
        self,
        hidden_size,
        vocab_size,
        max_sequence_length,
        embedding_dropout_prob,
        config,
        num_tokentypes=0,
    ):
        super(Embedding, self).__init__()

        self.hidden_size = hidden_size
        self.init_method = config.init_method
        self.num_tokentypes = num_tokentypes

        args = get_args()

        # Word embeddings (parallel).
        self.params_dtype = args.params_dtype
        self.word_embeddings = tensor_parallel.VocabParallelEmbedding(
            vocab_size, self.hidden_size, config=config, init_method=config.init_method
        )
        self._word_embeddings_key = 'word_embeddings'

        # Position embedding (serial).
        self.add_position_embedding = args.position_embedding_type == 'learned_absolute'
        if self.add_position_embedding:
            self.position_embeddings = torch.nn.Embedding(max_sequence_length, self.hidden_size)
            self._position_embeddings_key = 'position_embeddings'
            # Initialize the position embeddings.
            if args.perform_initialization:
                self.init_method(self.position_embeddings.weight)

        # Token type embedding.
        # Add this as an optional field that can be added through
        # method call so we can load a pretrain model without
        # token types and add them as needed.
        self._tokentype_embeddings_key = 'tokentype_embeddings'
        if self.num_tokentypes > 0:
            self.tokentype_embeddings = torch.nn.Embedding(self.num_tokentypes, self.hidden_size)
            # Initialize the token-type embeddings.
            if args.perform_initialization:
                self.init_method(self.tokentype_embeddings.weight)
        else:
            self.tokentype_embeddings = None

        self.fp32_residual_connection = args.fp32_residual_connection
        self.sequence_parallel = args.sequence_parallel
        self.clone_scatter_output_in_embedding = args.clone_scatter_output_in_embedding
        # Embeddings dropout
        self.embedding_dropout = torch.nn.Dropout(embedding_dropout_prob)

    def zero_parameters(self):
        """Zero out all parameters in embedding."""
        self.word_embeddings.weight.data.fill_(0)
        self.word_embeddings.weight.shared = True
        if self.add_position_embedding:
            self.position_embeddings.weight.data.fill_(0)
            self.position_embeddings.weight.shared = True
        if self.num_tokentypes > 0:
            self.tokentype_embeddings.weight.data.fill_(0)
            self.tokentype_embeddings.weight.shared = True

    def add_tokentype_embeddings(self, num_tokentypes):
        """Add token-type embedding. This function is provided so we can add
        token-type embeddings in case the pretrained model does not have it.
        This allows us to load the model normally and then add this embedding.
        """
        if self.tokentype_embeddings is not None:
            raise Exception('tokentype embeddings is already initialized')
        if torch.distributed.get_rank() == 0:
            print('adding embedding for {} tokentypes'.format(num_tokentypes), flush=True)
        self.num_tokentypes = num_tokentypes
        self.tokentype_embeddings = torch.nn.Embedding(num_tokentypes, self.hidden_size)
        # Initialize the token-type embeddings.
        args = get_args()
        self.init_method(self.tokentype_embeddings.weight)

    def forward(self, input_ids, position_ids, tokentype_ids=None):
        # Embeddings.
        words_embeddings = self.word_embeddings(input_ids)
        if self.add_position_embedding:
            position_embeddings = self.position_embeddings(position_ids)
            embeddings = words_embeddings + position_embeddings
        else:
            embeddings = words_embeddings

        if tokentype_ids is not None:
            assert self.tokentype_embeddings is not None
            embeddings = embeddings + self.tokentype_embeddings(tokentype_ids)
        else:
            assert self.tokentype_embeddings is None

        # Data format change to avoid explicit tranposes : [b s h] --> [s b h].
        embeddings = embeddings.transpose(0, 1).contiguous()

        # If the input flag for fp32 residual connection is set, convert for float.
        if self.fp32_residual_connection:
            embeddings = embeddings.float()

        # Dropout.
        if self.sequence_parallel:
            embeddings = tensor_parallel.scatter_to_sequence_parallel_region(embeddings)
            # `scatter_to_sequence_parallel_region` returns a view, which prevents
            # the original tensor from being garbage collected. Clone to facilitate GC.
            # Has a small runtime cost (~0.5%).
            if self.clone_scatter_output_in_embedding:
                embeddings = embeddings.clone()
            with tensor_parallel.get_cuda_rng_tracker().fork():
                embeddings = self.embedding_dropout(embeddings)
        else:
            embeddings = self.embedding_dropout(embeddings)

        return embeddings

    def state_dict_for_save_checkpoint(self, prefix='', keep_vars=False):
        """For easy load."""

        state_dict_ = {}
        state_dict_[self._word_embeddings_key] = self.word_embeddings.state_dict(
            prefix=prefix, keep_vars=keep_vars
        )
        if self.add_position_embedding:
            state_dict_[self._position_embeddings_key] = self.position_embeddings.state_dict(
                prefix=prefix, keep_vars=keep_vars
            )
        if self.num_tokentypes > 0:
            state_dict_[self._tokentype_embeddings_key] = self.tokentype_embeddings.state_dict(
                prefix=prefix, keep_vars=keep_vars
            )

        return state_dict_

    def load_state_dict(self, state_dict, strict=True):
        """Customized load."""

        # Word embedding.
        if self._word_embeddings_key in state_dict:
            state_dict_ = state_dict[self._word_embeddings_key]
        else:
            # for backward compatibility.
            state_dict_ = {}
            for key in state_dict.keys():
                if 'word_embeddings' in key:
                    state_dict_[key.split('word_embeddings.')[1]] = state_dict[key]
        self.word_embeddings.load_state_dict(state_dict_, strict=strict)

        # Position embedding.
        if self.add_position_embedding:
            if self._position_embeddings_key in state_dict:
                state_dict_ = state_dict[self._position_embeddings_key]
            else:
                # for backward compatibility.
                state_dict_ = {}
                for key in state_dict.keys():
                    if 'position_embeddings' in key:
                        state_dict_[key.split('position_embeddings.')[1]] = state_dict[key]
            self.position_embeddings.load_state_dict(state_dict_, strict=strict)

        # Tokentype embedding.
        if self.num_tokentypes > 0:
            state_dict_ = {}
            if self._tokentype_embeddings_key in state_dict:
                state_dict_ = state_dict[self._tokentype_embeddings_key]
            else:
                # for backward compatibility.
                for key in state_dict.keys():
                    if 'tokentype_embeddings' in key:
                        state_dict_[key.split('tokentype_embeddings.')[1]] = state_dict[key]
            if len(state_dict_.keys()) > 0:
                self.tokentype_embeddings.load_state_dict(state_dict_, strict=strict)
            else:
                print(
                    '***WARNING*** expected tokentype embeddings in the '
                    'checkpoint but could not find it',
                    flush=True,
                )


class TransformerLanguageModel(MegatronModule):
    """Transformer language model.

    Args:
        transformer_hparams: transformer hyperparameters
        vocab_size: vocabulary size
        max_sequence_length: maximum size of sequence. This
                             is used for positional embedding
        embedding_dropout_prob: dropout probability for embeddings
        num_tokentypes: size of the token-type embeddings. 0 value
                        will ignore this embedding
    """

    def __init__(
        self,
        config,
        encoder_attn_mask_type,
        num_tokentypes=0,
        add_encoder=True,
        add_decoder=False,
        decoder_attn_mask_type=AttnMaskType.causal,
        add_pooler=False,
        pre_process=True,
        post_process=True,
    ):
        args = get_args()
        # TODO: passing share_embeddings_and_output_weights=False will not work correctly for T5 and embeddings will not be synced. Fix later for T5.
        if args.untie_embeddings_and_output_weights:
            assert not add_decoder
        super(TransformerLanguageModel, self).__init__(
            share_embeddings_and_output_weights=not args.untie_embeddings_and_output_weights
        )

        self.pre_process = pre_process
        self.post_process = post_process
        self.hidden_size = config.hidden_size
        self.num_tokentypes = num_tokentypes
        self.init_method = config.init_method
        self.add_encoder = add_encoder
        self.encoder_attn_mask_type = encoder_attn_mask_type
        self.add_decoder = add_decoder
        self.decoder_attn_mask_type = decoder_attn_mask_type
        self.add_pooler = add_pooler
        self.encoder_hidden_state = None
        self.untie_embeddings_and_output_weights = args.untie_embeddings_and_output_weights

        # Embeddings.
        if self.pre_process:
            self.embedding = Embedding(
                self.hidden_size,
                args.padded_vocab_size,
                args.max_position_embeddings,
                args.hidden_dropout,
                config,
                self.num_tokentypes,
            )
            self._embedding_key = 'embedding'

        # Rotary positional embeddings
        self.use_rotary_position_embeddings = args.position_embedding_type == 'rope'
        if self.use_rotary_position_embeddings:
            self.seq_length = args.seq_length
            rotary_dim = (
                args.hidden_size // args.num_attention_heads
                if args.kv_channels is None
                else args.kv_channels
            )

            # partial rotary embeddings, which is better than full rotary
            # Wang and Komatsuzaki et al
            # https://github.com/kingoflolz/mesh-transformer-jax/
            self.rotary_pos_emb = RotaryEmbedding(
                kv_channels=rotary_dim,
                rotary_percent=args.rotary_percent,
                seq_len_interpolation_factor=args.rotary_seq_len_interpolation_factor,
            )

        # Encoder (usually set to True, False if part of an encoder-decoder
        # architecture and in encoder-only stage).
        if self.add_encoder:
            self.encoder = ParallelTransformer(
                config,
                model_type=args.model_type,
                self_attn_mask_type=self.encoder_attn_mask_type,
                pre_process=self.pre_process,
                post_process=self.post_process,
            )
            self._encoder_key = 'encoder'
        else:
            self.encoder = None

        # Decoder (usually set to False, True if part of an encoder-decoder
        # architecture and in decoder-only stage).
        if self.add_decoder:
            self.decoder = ParallelTransformer(
                config,
                model_type=args.model_type,
                layer_type=LayerType.decoder,
                self_attn_mask_type=self.decoder_attn_mask_type,
                pre_process=self.pre_process,
                post_process=self.post_process,
            )
            self._decoder_key = 'decoder'
        else:
            self.decoder = None

        if self.post_process:
            # Pooler.
            if self.add_pooler:
                self.pooler = Pooler(self.hidden_size, self.init_method)
                self._pooler_key = 'pooler'

            if self.untie_embeddings_and_output_weights:
                self.output_layer = tensor_parallel.ColumnParallelLinear(
                    args.hidden_size,
                    args.padded_vocab_size,
                    config=config,
                    init_method=self.init_method,
                    bias=False,
                )  # Setting bias to False always to keep it consistent with embedding tying that also does not have a bias.
                self._output_layer_key = 'output_layer'

    def set_input_tensor(self, input_tensor):
        """See megatron.legacy.model.transformer.set_input_tensor()"""

        # This is usually handled in schedules.py but some inference code still
        # gives us non-lists or None
        if not isinstance(input_tensor, list):
            input_tensor = [input_tensor]

        if self.add_encoder and self.add_decoder:
            assert (
                len(input_tensor) == 1
            ), 'input_tensor should only be length 1 for stage with both encoder and decoder'
            self.encoder.set_input_tensor(input_tensor[0])
        elif self.add_encoder:
            assert (
                len(input_tensor) == 1
            ), 'input_tensor should only be length 1 for stage with only encoder'
            self.encoder.set_input_tensor(input_tensor[0])
        elif self.add_decoder:
            if len(input_tensor) == 2:
                self.decoder.set_input_tensor(input_tensor[0])
                self.encoder_hidden_state = input_tensor[1]
            elif len(input_tensor) == 1:
                self.decoder.set_input_tensor(None)
                self.encoder_hidden_state = input_tensor[0]
            else:
                raise Exception('input_tensor must have either length 1 or 2')
        else:
            raise Exception('Stage must have at least either encoder or decoder')

    def forward(
        self,
        enc_input_ids,
        enc_position_ids,
        enc_attn_mask,
        dec_input_ids=None,
        dec_position_ids=None,
        dec_attn_mask=None,
        enc_dec_attn_mask=None,
        tokentype_ids=None,
        inference_context=None,
        pooling_sequence_index=0,
        enc_hidden_states=None,
        output_enc_hidden=False,
        *,
        inference_params: Optional[BaseInferenceContext] = None,
    ):

        inference_context = deprecate_inference_params(inference_context, inference_params)

        # Encoder embedding.
        if self.pre_process:
            encoder_input = self.embedding(
                enc_input_ids, enc_position_ids, tokentype_ids=tokentype_ids
            )
        else:
            encoder_input = None

        # Rotary positional embeddings
        rotary_pos_emb = None
        if self.use_rotary_position_embeddings:
            if inference_context is not None:
                rotary_pos_emb = self.rotary_pos_emb(inference_context.max_sequence_length)
            else:
                rotary_pos_emb = self.rotary_pos_emb(self.seq_length)

        # Run encoder.
        if enc_hidden_states is None:
            if self.encoder is not None:
                encoder_output = self.encoder(
                    encoder_input,
                    enc_attn_mask,
                    inference_context=inference_context,
                    rotary_pos_emb=rotary_pos_emb,
                )
            else:
                encoder_output = self.encoder_hidden_state
        else:
            encoder_output = enc_hidden_states.to(encoder_input.dtype)

        if self.post_process:
            if self.add_pooler:
                pooled_output = self.pooler(encoder_output, pooling_sequence_index)

        # output_enc_hidden refers to when we just need the encoder's
        # output. For example, it is helpful to compute
        # similarity between two sequences by average pooling
        if not self.add_decoder or output_enc_hidden:
            if self.add_pooler and self.post_process:
                return encoder_output, pooled_output
            else:
                return encoder_output

        # Decoder embedding.
        if self.pre_process:
            decoder_input = self.embedding(dec_input_ids, dec_position_ids)
        else:
            decoder_input = None

        # Run decoder.
        decoder_output = self.decoder(
            decoder_input,
            dec_attn_mask,
            encoder_output=encoder_output,
            enc_dec_attn_mask=enc_dec_attn_mask,
            inference_context=inference_context,
            rotary_pos_emb=rotary_pos_emb,
        )

        if self.add_pooler and self.post_process:
            return decoder_output, encoder_output, pooled_output
        else:
            return decoder_output, encoder_output

    def state_dict_for_save_checkpoint(self, prefix='', keep_vars=False):
        """For easy load."""

        state_dict_ = {}
        if self.pre_process:
            state_dict_[self._embedding_key] = self.embedding.state_dict_for_save_checkpoint(
                prefix=prefix, keep_vars=keep_vars
            )
        if self.add_encoder:
            state_dict_[self._encoder_key] = self.encoder.state_dict_for_save_checkpoint(
                prefix=prefix, keep_vars=keep_vars
            )
        if self.post_process:
            if self.add_pooler:
                state_dict_[self._pooler_key] = self.pooler.state_dict_for_save_checkpoint(
                    prefix=prefix, keep_vars=keep_vars
                )
            if self.untie_embeddings_and_output_weights:
                state_dict_[self._output_layer_key] = self.output_layer.state_dict(
                    prefix=prefix, keep_vars=keep_vars
                )

        if self.add_decoder:
            state_dict_[self._decoder_key] = self.decoder.state_dict_for_save_checkpoint(
                prefix=prefix, keep_vars=keep_vars
            )

        return state_dict_

    def load_state_dict(self, state_dict, strict=True):
        """Customized load."""

        # Embedding.
        if self.pre_process:
            if self._embedding_key in state_dict:
                state_dict_ = state_dict[self._embedding_key]
            else:
                # for backward compatibility.
                state_dict_ = {}
                for key in state_dict.keys():
                    if '_embeddings' in key:
                        state_dict_[key] = state_dict[key]
            self.embedding.load_state_dict(state_dict_, strict=strict)

        # Encoder.
        if self.add_encoder:
            if self._encoder_key in state_dict:
                state_dict_ = state_dict[self._encoder_key]
            # For backward compatibility.
            elif 'transformer' in state_dict:
                state_dict_ = state_dict['transformer']
            else:
                # For backward compatibility.
                state_dict_ = {}
                for key in state_dict.keys():
                    if 'transformer.' in key:
                        state_dict_[key.split('transformer.')[1]] = state_dict[key]

            # For backward compatibility.
            state_dict_self_attention = {}
            for key in state_dict_.keys():
                if '.attention.' in key:
                    state_dict_self_attention[key.replace(".attention.", ".self_attention.")] = (
                        state_dict_[key]
                    )
                else:
                    state_dict_self_attention[key] = state_dict_[key]
            state_dict_ = state_dict_self_attention

            self.encoder.load_state_dict(state_dict_, strict=strict)

        # Pooler.
        if self.post_process:
            if self.add_pooler:
                assert 'pooler' in state_dict, 'could not find data for pooler in the checkpoint'
                self.pooler.load_state_dict(state_dict[self._pooler_key], strict=strict)
            if self.untie_embeddings_and_output_weights:
                assert (
                    'output_layer' in state_dict
                ), 'could not find data for output_layer in the checkpoint'
                self.output_layer.load_state_dict(state_dict[self._output_layer_key], strict=strict)
        # Decoder.
        if self.add_decoder:
            assert 'decoder' in state_dict, 'could not find data for pooler in the checkpoint'
            self.decoder.load_state_dict(state_dict[self._decoder_key], strict=strict)


================================================
FILE: megatron/legacy/model/module.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""Megatron Module"""

import torch
from torch.autograd import Variable
from torch.nn.parameter import Parameter

from megatron.training import get_args
from megatron.core import mpu, tensor_parallel


_FLOAT_TYPES = (torch.FloatTensor, torch.cuda.FloatTensor)
_HALF_TYPES = (torch.HalfTensor, torch.cuda.HalfTensor)
_BF16_TYPES = (torch.BFloat16Tensor, torch.cuda.BFloat16Tensor)


def param_is_not_shared(param):
    return not hasattr(param, 'shared') or not param.shared


class MegatronModule(torch.nn.Module):
    """Megatron specific extensions of torch Module with support
    for pipelining."""

    def __init__(self, config=None, share_embeddings_and_output_weights=True):
        super(MegatronModule, self).__init__()
        self.config = config
        self.share_embeddings_and_output_weights = share_embeddings_and_output_weights

    def state_dict_for_save_checkpoint(self, prefix='', keep_vars=False):
        """Use this function to override the state dict for
        saving checkpoints."""
        return self.state_dict(prefix=prefix, keep_vars=keep_vars)

    def shared_embedding_or_output_weight(self):
        if self.pre_process:
            return self.language_model.embedding.word_embeddings.weight
        else:
            if not self.share_embeddings_and_output_weights:
                raise Exception(
                    'shared_embedding_or_output_weight() called for last '
                    'stage, but share_embeddings_and_output_weights is false'
                )
            return self.word_embeddings.weight

    def initialize_word_embeddings(self):
        args = get_args()
        if not self.share_embeddings_and_output_weights:
            raise Exception(
                'initialize_word_embeddings() was called but '
                'share_embeddings_and_output_weights is false'
            )

        # This function just initializes the word embeddings in the final stage
        # when we are using pipeline parallelism. Nothing to do if we aren't
        # using pipeline parallelism.
        if args.pipeline_model_parallel_size == 1:
            # Zero out wgrad if sharing embeddings between two layers on same
            # pipeline stage to make sure grad accumulation into main_grad is
            # correct and does not include garbage values (e.g., from torch.empty).
            self.shared_embedding_or_output_weight().zero_out_wgrad = True
            return

        if (
            mpu.is_pipeline_first_stage(ignore_virtual=False)
            and self.pre_process
            and not self.post_process
        ):
            self.shared_embedding_or_output_weight().shared_embedding = True

        # Parameters are shared between the word embeddings layers, and the
        # heads at the end of the model. In a pipelined setup with more than
        # one stage, the initial embedding layer and the head are on different
        # workers, so we do the following:
        # 1. Create a second copy of word_embeddings on the last stage, with
        #    initial parameters of 0.0.
        # 2. Do an all-reduce between the first and last stage to ensure that
        #    the two copies of word_embeddings start off with the same
        #    parameter values.
        # 3. In the training loop, before an all-reduce between the grads of
        #    the two word_embeddings layers to ensure that every applied weight
        #    update is the same on both stages.
        if mpu.is_pipeline_last_stage(ignore_virtual=False) and not self.pre_process:
            assert not mpu.is_pipeline_first_stage(ignore_virtual=False)
            self._word_embeddings_for_head_key = 'word_embeddings_for_head'
            # set word_embeddings weights to 0 here, then copy first
            # stage's weights using all_reduce below.
            self.word_embeddings = tensor_parallel.VocabParallelEmbedding(
                args.padded_vocab_size,
                self.config.hidden_size,
                config=self.config,
                init_method=self.config.init_method,
            )
            self.word_embeddings.weight.data.fill_(0)
            self.word_embeddings.weight.shared = True
            self.word_embeddings.weight.shared_embedding = True

        # Zero out initial weights for decoder embedding.
        # NOTE: We don't currently support T5 with the interleaved schedule.
        if not mpu.is_pipeline_first_stage(ignore_virtual=True) and self.pre_process:
            self.language_model.embedding.zero_parameters()

        if not torch.distributed.is_initialized():
            if not getattr(MegatronModule, "embedding_warning_printed", False):
                print(
                    "WARNING! Distributed processes aren't initialized, so "
                    "word embeddings in the last layer are not initialized. "
                    "If you are just manipulating a model this is fine, but "
                    "this needs to be handled manually. If you are training "
                    "something is definitely wrong."
                )
                MegatronModule.embedding_warning_printed = True
            return

        # Ensure that first and last stages have the same initial parameter
        # values.
        if mpu.is_rank_in_embedding_group(ignore_virtual=False):
            self.shared_embedding_or_output_weight().data = (
                self.shared_embedding_or_output_weight().data.cuda()
            )
            torch.distributed.all_reduce(
                self.shared_embedding_or_output_weight().data, group=mpu.get_embedding_group()
            )


def conversion_helper(val, conversion):
    """Apply conversion to val. Recursively apply conversion if `val`
    #is a nested tuple/list structure."""
    if not isinstance(val, (tuple, list)):
        return conversion(val)
    rtn = [conversion_helper(v, conversion) for v in val]
    if isinstance(val, tuple):
        rtn = tuple(rtn)
    return rtn


def fp32_to_float16(val, float16_convertor):
    """Convert fp32 `val` to fp16/bf16"""

    def half_conversion(val):
        val_typecheck = val
        if isinstance(val_typecheck, (Parameter, Variable)):
            val_typecheck = val.data
        if isinstance(val_typecheck, _FLOAT_TYPES):
            val = float16_convertor(val)
        return val

    return conversion_helper(val, half_conversion)


def float16_to_fp32(val):
    """Convert fp16/bf16 `val` to fp32"""

    def float_conversion(val):
        val_typecheck = val
        if isinstance(val_typecheck, (Parameter, Variable)):
            val_typecheck = val.data
        if isinstance(val_typecheck, (_BF16_TYPES, _HALF_TYPES)):
            val = val.float()
        return val

    return conversion_helper(val, float_conversion)


================================================
FILE: megatron/legacy/model/multiple_choice.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""Multiple choice model."""

import torch

from megatron.training import get_args, print_rank_last
from megatron.legacy.model.enums import AttnMaskType
from megatron.legacy.model.bert_model import bert_extended_attention_mask, bert_position_ids
from megatron.legacy.model.language_model import get_language_model
from megatron.legacy.model.utils import get_linear_layer
from megatron.legacy.model.utils import init_method_normal
from megatron.legacy.model.utils import scaled_init_method_normal
from .module import MegatronModule


class MultipleChoice(MegatronModule):

    def __init__(self,
                 config,
                 num_tokentypes=2,
                 pre_process=True,
                 post_process=True):
        super(MultipleChoice, self).__init__(share_embeddings_and_output_weights=False)
        args = get_args()

        self.pre_process = pre_process
        self.post_process = post_process

        self.language_model, self._language_model_key = get_language_model(
            config=config,
            num_tokentypes=num_tokentypes,
            add_pooler=True,
            encoder_attn_mask_type=AttnMaskType.padding,
            pre_process=self.pre_process,
            post_process=self.post_process)

        # Multi-choice head.
        if self.post_process:
            self.multichoice_dropout = torch.nn.Dropout(args.hidden_dropout)
            self.multichoice_head = get_linear_layer(args.hidden_size, 1,
                                                     init_method)
            self._multichoice_head_key = 'multichoice_head'

    def set_input_tensor(self, input_tensor):
        """See megatron.legacy.model.transformer.set_input_tensor()"""
        self.language_model.set_input_tensor(input_tensor)

    def forward(self, model_input, attention_mask, tokentype_ids=None):

        # [batch, choices, sequence] --> [batch * choices, sequence] -->
        #    transformer --> [batch, choices] --> softmax

        # Ensure the shape is [batch-size, choices, sequence]
        assert len(attention_mask.shape) == 3
        num_choices = attention_mask.shape[1]

        # Reshape and treat choice dimension the same as batch.
        attention_mask = attention_mask.view(-1, attention_mask.size(-1))
        extended_attention_mask = bert_extended_attention_mask(attention_mask)

        input_ids = model_input
        # Do the same as attention_mask for input_ids, tokentype_ids
        assert len(input_ids.shape) == 3
        assert len(tokentype_ids.shape) == 3
        input_ids = input_ids.view(-1, input_ids.size(-1))
        tokentype_ids = tokentype_ids.view(-1, tokentype_ids.size(-1))
        position_ids = bert_position_ids(input_ids)

        lm_output = self.language_model(
            input_ids,
            position_ids,
            extended_attention_mask,
            tokentype_ids=tokentype_ids
        )
        if self.post_process:
            _, pooled_output = lm_output
            multichoice_output = self.multichoice_dropout(pooled_output)
            multichoice_logits = self.multichoice_head(multichoice_output)

            # Reshape back to separate choices.
            multichoice_logits = multichoice_logits.view(-1, num_choices)

            return multichoice_logits
        return lm_output

    def state_dict_for_save_checkpoint(self, prefix='', keep_vars=False):
        """For easy load when model is combined with other heads,
        add an extra key."""

        state_dict_ = {}
        state_dict_[self._language_model_key] \
            = self.language_model.state_dict_for_save_checkpoint(prefix=prefix,
                                                                 keep_vars=keep_vars)
        if self.post_process:
            state_dict_[self._multichoice_head_key] \
                = self.multichoice_head.state_dict(prefix=prefix, keep_vars=keep_vars)
        return state_dict_

    def load_state_dict(self, state_dict, strict=True):
        """Customized load."""

        self.language_model.load_state_dict(
            state_dict[self._language_model_key], strict=strict)
        if self.post_process:
            if self._multichoice_head_key in state_dict:
                self.multichoice_head.load_state_dict(
                    state_dict[self._multichoice_head_key], strict=strict)
            else:
                print_rank_last('***WARNING*** could not find {} in the checkpoint, '
                                'initializing to random'.format(
                                    self._multichoice_head_key))


================================================
FILE: megatron/legacy/model/realm_model.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import os
import torch

from megatron.training import get_args, print_rank_0
from megatron.training.checkpointing import get_checkpoint_tracker_filename, get_checkpoint_name
from megatron.legacy.model import BertModel
from .module import MegatronModule
from megatron.core import mpu
from megatron.legacy.model.enums import AttnMaskType
from megatron.legacy.model.utils import get_linear_layer
from megatron.legacy.model.utils import init_method_normal
from megatron.legacy.model.language_model import get_language_model
from megatron.legacy.model.utils import scaled_init_method_normal
from megatron.legacy.model.bert_model import bert_extended_attention_mask, bert_position_ids


def general_ict_model_provider(only_query_model=False, only_block_model=False):
    """Build the model."""
    args = get_args()
    assert args.ict_head_size is not None, \
        "Need to specify --ict-head-size to provide an ICTBertModel"
    assert mpu.get_tensor_model_parallel_world_size() == 1 and mpu.get_pipeline_model_parallel_world_size() == 1, \
        "Model parallel size > 1 not supported for ICT"

    print_rank_0('building ICTBertModel...')

    # simpler to just keep using 2 tokentypes since the LM we initialize with has 2 tokentypes
    model = ICTBertModel(
        ict_head_size=args.ict_head_size,
        num_tokentypes=2,
        parallel_output=True,
        only_query_model=only_query_model,
        only_block_model=only_block_model)

    return model


class ICTBertModel(MegatronModule):
    """Bert-based module for Inverse Cloze task."""
    def __init__(self,
                 ict_head_size,
                 num_tokentypes=1,
                 parallel_output=True,
                 only_query_model=False,
                 only_block_model=False):
        super(ICTBertModel, self).__init__()
        bert_kwargs = dict(
            ict_head_size=ict_head_size,
            num_tokentypes=num_tokentypes,
            parallel_output=parallel_output
        )
        assert not (only_block_model and only_query_model)
        self.use_block_model = not only_query_model
        self.use_query_model = not only_block_model

        if self.use_query_model:
            # this model embeds (pseudo-)queries - Embed_input in the paper
            self.query_model = IREncoderBertModel(**bert_kwargs)
            self._query_key = 'question_model'

        if self.use_block_model:
            # this model embeds evidence blocks - Embed_doc in the paper
            self.block_model = IREncoderBertModel(**bert_kwargs)
            self._block_key = 'context_model'

    def forward(self, query_tokens, query_attention_mask, block_tokens, block_attention_mask):
        """Run a forward pass for each of the models and return the respective embeddings."""
        query_logits = self.embed_query(query_tokens, query_attention_mask)
        block_logits = self.embed_block(block_tokens, block_attention_mask)
        return query_logits, block_logits

    def embed_query(self, query_tokens, query_attention_mask):
        """Embed a batch of tokens using the query model"""
        if self.use_query_model:
            query_types = torch.cuda.LongTensor(*query_tokens.shape).fill_(0)
            query_ict_logits, _ = self.query_model.forward(query_tokens, query_attention_mask, query_types)
            return query_ict_logits
        else:
            raise ValueError("Cannot embed query without query model.")

    def embed_block(self, block_tokens, block_attention_mask):
        """Embed a batch of tokens using the block model"""
        if self.use_block_model:
            block_types = torch.cuda.LongTensor(*block_tokens.shape).fill_(0)
            block_ict_logits, _ = self.block_model.forward(block_tokens, block_attention_mask, block_types)
            return block_ict_logits
        else:
            raise ValueError("Cannot embed block without block model.")

    def state_dict_for_save_checkpoint(self, prefix='', keep_vars=False):
        """Save dict with state dicts of each of the models."""
        state_dict_ = {}
        if self.use_query_model:
            state_dict_[self._query_key] \
                = self.query_model.state_dict_for_save_checkpoint(
                    prefix=prefix, keep_vars=keep_vars)

        if self.use_block_model:
            state_dict_[self._block_key] \
                = self.block_model.state_dict_for_save_checkpoint(
                    prefix=prefix, keep_vars=keep_vars)

        return state_dict_

    def load_state_dict(self, state_dict, strict=True):
        """Load the state dicts of each of the models"""
        if self.use_query_model:
            print("Loading ICT query model", flush=True)
            self.query_model.load_state_dict(
                state_dict[self._query_key], strict=strict)

        if self.use_block_model:
            print("Loading ICT block model", flush=True)
            self.block_model.load_state_dict(
                state_dict[self._block_key], strict=strict)

    def init_state_dict_from_bert(self):
        """Initialize the state from a pretrained BERT model on iteration zero of ICT pretraining"""
        args = get_args()
        tracker_filename = get_checkpoint_tracker_filename(args.bert_load)
        if not os.path.isfile(tracker_filename):
            raise FileNotFoundError("Could not find BERT load for ICT")
        with open(tracker_filename, 'r') as f:
            iteration = int(f.read().strip())
            assert iteration > 0

        checkpoint_name = get_checkpoint_name(args.bert_load, iteration, False)
        if mpu.get_data_parallel_rank() == 0:
            print('global rank {} is loading checkpoint {}'.format(
                torch.distributed.get_rank(), checkpoint_name))

        try:
            state_dict = torch.load(checkpoint_name, map_location='cpu')
        except Exception:
            raise ValueError("Could not load checkpoint")

        # load the LM state dict into each model
        model_dict = state_dict['model']['language_model']
        self.query_model.language_model.load_state_dict(model_dict)
        self.block_model.language_model.load_state_dict(model_dict)

        # give each model the same ict_head to begin with as well
        query_ict_head_state_dict = self.state_dict_for_save_checkpoint()[self._query_key]['ict_head']
        self.block_model.ict_head.load_state_dict(query_ict_head_state_dict)


class IREncoderBertModel(MegatronModule):
    """BERT-based encoder for queries or blocks used for learned information retrieval."""
    def __init__(self, ict_head_size, num_tokentypes=2, parallel_output=True):
        super(IREncoderBertModel, self).__init__()
        args = get_args()

        self.ict_head_size = ict_head_size
        self.parallel_output = parallel_output
        init_method = init_method_normal(args.init_method_std)
        scaled_init_method = scaled_init_method_normal(args.init_method_std,
                                                       args.num_layers)

        self.language_model, self._language_model_key = get_language_model(
            num_tokentypes=num_tokentypes,
            add_pooler=True,
            encoder_attn_mask_type=AttnMaskType.padding,
            init_method=init_method,
            scaled_init_method=scaled_init_method)

        self.ict_head = get_linear_layer(args.hidden_size, ict_head_size, init_method)
        self._ict_head_key = 'ict_head'

    def forward(self, input_ids, attention_mask, tokentype_ids=None):
        extended_attention_mask = bert_extended_attention_mask(
            attention_mask, next(self.language_model.parameters()).dtype)
        position_ids = bert_position_ids(input_ids)

        lm_output, pooled_output = self.language_model(
            input_ids,
            position_ids,
            extended_attention_mask,
            tokentype_ids=tokentype_ids)

        # Output.
        ict_logits = self.ict_head(pooled_output)
        return ict_logits, None

    def state_dict_for_save_checkpoint(self, prefix='', keep_vars=False):
        """For easy load when model is combined with other heads,
        add an extra key."""

        state_dict_ = {}
        state_dict_[self._language_model_key] \
            = self.language_model.state_dict_for_save_checkpoint(prefix=prefix,
                                                                 keep_vars=keep_vars)
        state_dict_[self._ict_head_key] \
            = self.ict_head.state_dict(prefix=prefix,
                                       keep_vars=keep_vars)
        return state_dict_

    def load_state_dict(self, state_dict, strict=True):
        """Customized load."""
        self.language_model.load_state_dict(
            state_dict[self._language_model_key], strict=strict)
        self.ict_head.load_state_dict(
            state_dict[self._ict_head_key], strict=strict)


================================================
FILE: megatron/legacy/model/rms_norm.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

import torch
from torch import nn

class RMSNorm(torch.nn.Module):

    def __init__(self,
                 dim: int,
                 eps: float = 1e-6,
                 sequence_parallel: bool = False,
                 config: dict = None):
        """RMS Normaliation module

        Args:
            dim (int): The width of input, i.e. hidden size
            eps (float): epsilon to use for the norm, default to 1e-6
            sequence_parallel (bool): Set to true if sequence parallelism is being used,
              this marks the weights as needing to be allreduced.
        """
        super().__init__()
        self.eps = eps
        self.weight = nn.Parameter(torch.ones(dim))

        setattr(self.weight, 'sequence_parallel', sequence_parallel)

    def _norm(self, x):
        return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)

    def forward(self, x):
        output = self._norm(x.float()).type_as(x)
        return output * self.weight


================================================
FILE: megatron/legacy/model/t5_model.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""T5 model."""

import torch

from megatron.training import get_args
from megatron.core import tensor_parallel
from megatron.legacy.model.enums import AttnMaskType
from megatron.legacy.model.language_model import parallel_lm_logits, get_language_model
from megatron.legacy.model import LayerNorm
from megatron.legacy.model.utils import (
    openai_gelu,
    get_linear_layer
)
from .module import MegatronModule


def t5_extended_attention_mask(attention_mask_list):

    def attn_mask_postprocess(attn_mask):
        # [b, 1, s, s]
        extended_attention_mask = attn_mask.unsqueeze(1)
        return extended_attention_mask

    return [attn_mask_postprocess(attn_mask) for attn_mask in attention_mask_list]


def t5_position_ids(token_ids):
    # Create position ids
    seq_length = token_ids.size(1)
    position_ids = torch.arange(seq_length, dtype=torch.long,
                                device=token_ids.device)
    position_ids = position_ids.unsqueeze(0).expand_as(token_ids)

    return position_ids


class T5LMHead(MegatronModule):
    """Masked LM head for T5

    Args:
        mpu_vocab_size: model parallel size of vocabulary.
        parallel_output: wether output logits being distributed or not.
    """

    def __init__(self, mpu_vocab_size, parallel_output):
        super(T5LMHead, self).__init__()

        self.bias = torch.nn.Parameter(torch.zeros(mpu_vocab_size))
        self.bias.model_parallel = True
        self.bias.partition_dim = 0
        self.bias.stride = 1
        self.parallel_output = parallel_output

    def forward(self, hidden_states, word_embeddings_weight):
        output = parallel_lm_logits(hidden_states,
                                    word_embeddings_weight,
                                    self.parallel_output,
                                    bias=self.bias)
        return output


class T5Model(MegatronModule):
    """T5 Language model."""

    def __init__(self,
                 config,
                 num_tokentypes=0,
                 parallel_output=True,
                 pre_process=True,
                 post_process=True,
                 add_encoder=True,
                 add_decoder=True):
        super().__init__(config=config)
        args = get_args()

        self.fp16_lm_cross_entropy = args.fp16_lm_cross_entropy
        self.parallel_output = parallel_output
        self.pre_process = pre_process
        self.post_process = post_process
        self.add_encoder = add_encoder
        self.add_decoder = add_decoder

        self.language_model, self._language_model_key = get_language_model(
            config=config,
            num_tokentypes=num_tokentypes,
            add_pooler=False,
            add_encoder=add_encoder,
            add_decoder=add_decoder,
            encoder_attn_mask_type=AttnMaskType.padding,
            pre_process=self.pre_process,
            post_process=self.post_process)

        self.initialize_word_embeddings()

        if self.pre_process:
            self.position_embeddings = self.language_model.embedding.position_embeddings
        else:
            self.position_embeddings = None

        if self.post_process and self.add_decoder:
            self.lm_head = T5LMHead(
                self.shared_embedding_or_output_weight().size(0),
                parallel_output)
            self._lm_head_key = 'lm_head'

        # Tells schedules.py that this model has a skip connection between the encoder's output and the decoder
        # (and hence both the encoder and decoder's tensors are required for correct backprop).
        self.xattn_needed = True

    def set_input_tensor(self, input_tensor):
        """See megatron.legacy.model.transformer.set_input_tensor()"""
        self.language_model.set_input_tensor(input_tensor)

    def forward(self, encoder_input_ids, decoder_input_ids, encoder_attn_mask,
                decoder_attn_mask, encoder_decoder_attn_mask,
                tokentype_ids=None, lm_labels=None, enc_hidden_states=None):

        # Converting the attention masks to proper parameter settings
        encoder_attn_mask, decoder_attn_mask, encoder_decoder_attn_mask = t5_extended_attention_mask(
            [encoder_attn_mask, decoder_attn_mask, encoder_decoder_attn_mask])

        encoder_position_ids = t5_position_ids(encoder_input_ids)
        decoder_position_ids = t5_position_ids(decoder_input_ids)

        lm_output = self.language_model(encoder_input_ids,
                                        encoder_position_ids,
                                        encoder_attn_mask,
                                        decoder_input_ids,
                                        decoder_position_ids,
                                        decoder_attn_mask,
                                        encoder_decoder_attn_mask,
                                        tokentype_ids=tokentype_ids,
                                        enc_hidden_states=enc_hidden_states)

        if self.post_process and self.add_decoder:
            decoder_output, encoder_output = lm_output
            # Output. [s, b, h]
            lm_logits = self.lm_head(decoder_output,
                                     self.shared_embedding_or_output_weight())

            if lm_labels is None:
                # [s b h] => [b s h]
                return lm_logits.transpose(0,1).contiguous()
            else:
                # [b s] => [s b]
                lm_labels = lm_labels.transpose(0,1).contiguous()
                if self.fp16_lm_cross_entropy:
                    assert lm_logits.dtype == torch.half
                    lm_loss = tensor_parallel.vocab_parallel_cross_entropy(lm_logits, lm_labels)
                else:
                    lm_loss = tensor_parallel.vocab_parallel_cross_entropy(lm_logits.float(),
                                                                                lm_labels)
                # [s b] => [b s]
                lm_loss = lm_loss.transpose(0,1).contiguous()
            return lm_loss
        elif self.add_decoder and not self.add_encoder:
            decoder_output, encoder_output = lm_output
            return decoder_output
        else:
            encoder_output = lm_output
            return encoder_output

    def state_dict_for_save_checkpoint(self, prefix='', keep_vars=False):
        """For easy load when model is combined with other heads,
        add an extra key."""

        state_dict_ = {}
        state_dict_[self._language_model_key] \
            = self.language_model.state_dict_for_save_checkpoint(prefix=prefix,
                                                                 keep_vars=keep_vars)
        if self.post_process and self.add_decoder:
            state_dict_[self._lm_head_key] \
                = self.lm_head.state_dict_for_save_checkpoint(prefix=prefix,
                                                              keep_vars=keep_vars)
         # Save word_embeddings.
        if self.post_process and not self.pre_process and self.add_decoder:
            state_dict_[self._word_embeddings_for_head_key] \
                = self.word_embeddings.state_dict(prefix=prefix,
                                                  keep_vars=keep_vars)
        return state_dict_

    def load_state_dict(self, state_dict, strict=True):
        """Customized load."""

        self.language_model.load_state_dict(
            state_dict[self._language_model_key], strict=strict)
        if self.post_process and self.add_decoder:
            self.lm_head.load_state_dict(state_dict[self._lm_head_key],
                                         strict=strict)
        # Load word embeddings.
        if self.post_process and not self.pre_process and self.add_decoder:
            self.word_embeddings.load_state_dict(
                state_dict[self._word_embeddings_for_head_key], strict=strict)


================================================
FILE: megatron/legacy/model/transformer.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Transformer."""
import math
import os
from contextlib import nullcontext
from typing import Optional

import numpy as np
import torch
import torch.nn.functional as F

from megatron import core
from megatron.core import mpu, tensor_parallel
from megatron.core.enums import ModelType
from megatron.core.utils import deprecate_inference_params
from megatron.legacy.model.enums import AttnMaskType, LayerType, AttnType
from megatron.legacy.model.fused_softmax import FusedScaleMaskSoftmax
from megatron.legacy.model.fused_bias_gelu import bias_gelu_impl
from megatron.core.models.common.embeddings import apply_rotary_pos_emb
from megatron.core.jit import jit_fuser
from megatron.core.num_microbatches_calculator import get_num_microbatches
from megatron.core.parallel_state import (
    get_expert_tensor_and_model_parallel_group,
    get_tensor_model_parallel_group,
)
from megatron.core.tensor_parallel import (
    gather_from_sequence_parallel_region,
    reduce_scatter_to_sequence_parallel_region,
    get_cuda_rng_tracker,
    get_data_parallel_rng_tracker_name,
)
from megatron.legacy.model.enums import AttnMaskType, AttnType, LayerType
from megatron.legacy.model.fused_bias_gelu import bias_gelu_impl
from megatron.legacy.model.fused_softmax import FusedScaleMaskSoftmax
from megatron.legacy.model.utils import (
    attention_mask_func,
    erf_gelu,
    get_norm,
    openai_gelu,
)
from megatron.training import get_args, get_timers

from .module import MegatronModule

try:
    from einops import rearrange
except ImportError:
    rearrange = None

try:
    from flash_attn.flash_attn_interface import flash_attn_unpadded_func
except ImportError:
    try:
        from flash_attn.flash_attn_interface import (
            flash_attn_varlen_func as flash_attn_unpadded_func,
        )
    except ImportError:
        flash_attn_unpadded_func = None

""" We use the following notation throughout this file:
     h: hidden size
     n: number of attention heads
     p: number of model parallel partitions
     np: n/p
     hp: h/p
     hn: h/n
     b: batch size
     s: sequence length
     l: number of layers
    Transformer takes input of size [s, b, h] and returns a
    tensor of the same size. We use the following arguments:
        hyperparameters: transformer hyperparameters
"""

class DropPath(MegatronModule):
    """Drop paths (Stochastic Depth) per sample
    (when applied in main path of residual blocks).
    """

    def __init__(self, drop_prob=0.):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob

    def forward(self, hidden_state):
        if self.drop_prob == 0. or not self.training:
            return hidden_state
        keep_prob = 1 - self.drop_prob
        # work with diff dim tensors, not just 2D ConvNets
        # hidden_state: [s, b, h]
        shape = (1,) + (hidden_state.shape[1],) + (1,) * (hidden_state.ndim - 2)
        random_tensor = keep_prob + \
            torch.rand(shape, dtype=hidden_state.dtype, device=hidden_state.device)
        random_tensor.floor_()  # binarize
        output = hidden_state.div(keep_prob) * random_tensor
        return output

class ParallelMLP(MegatronModule):
    """MLP.

    MLP will take the input with h hidden state, project it to 4*h
    hidden dimension, perform nonlinear transformation, and project the
    state back into h hidden dimension.
    """

    def __init__(self, config, is_expert=False):
        super(ParallelMLP, self).__init__()
        args = get_args()

        self.add_bias = config.add_bias_linear

        ffn_hidden_size = config.ffn_hidden_size
        if config.gated_linear_unit:
            ffn_hidden_size *= 2

        # Project to 4h. If using swiglu double the output width, see https://arxiv.org/pdf/2002.05202.pdf
        self.dense_h_to_4h = tensor_parallel.ColumnParallelLinear(
            config.hidden_size,
            ffn_hidden_size,
            config=config,
            init_method=config.init_method,
            bias=self.add_bias,
            gather_output=False,
            skip_bias_add=True,
            is_expert=is_expert,
        )

        self.bias_gelu_fusion = False
        self.activation_func = None
        self.swiglu = args.swiglu

        if args.openai_gelu:
            self.activation_func = openai_gelu
        elif args.onnx_safe:
            self.activation_func = erf_gelu
        elif args.swiglu:
            def swiglu(x):
                x = torch.chunk(x, 2, dim=-1)
                return F.silu(x[0]) * x[1]
            self.activation_func = swiglu
        elif args.squared_relu:
            def squared_relu(x):
                return torch.pow(F.relu(x), 2)
            self.activation_func = squared_relu
        else:
            self.bias_gelu_fusion = args.bias_gelu_fusion
            self.activation_func = F.gelu

        # Project back to h.
        self.dense_4h_to_h = tensor_parallel.RowParallelLinear(
            config.ffn_hidden_size,
            config.hidden_size,
            config=config,
            init_method=config.output_layer_init_method,
            bias=self.add_bias,
            skip_bias_add=True,
            input_is_parallel=True,
            is_expert=is_expert,
        )

    def forward(self, hidden_states):

        # [s, b, 4hp]
        intermediate_parallel, bias_parallel = self.dense_h_to_4h(hidden_states)

        if self.bias_gelu_fusion:
            assert self.add_bias is True
            assert self.activation_func == F.gelu
            intermediate_parallel = bias_gelu_impl(intermediate_parallel, bias_parallel)
        else:
            if bias_parallel is not None:
                intermediate_parallel = intermediate_parallel + bias_parallel
            intermediate_parallel = self.activation_func(intermediate_parallel)

        # [s, b, h]
        output, output_bias = self.dense_4h_to_h(intermediate_parallel)
        return output, output_bias

def sinkhorn(cost, tol=0.0001):
    cost = torch.exp(cost)
    d0 = torch.ones(cost.size(0), device=cost.device, dtype=cost.dtype)
    d1 = torch.ones(cost.size(1), device=cost.device, dtype=cost.dtype)

    eps = 0.00000001
    error = 1e9
    d1_old = d1
    while error > tol:
        d0 = (1/d0.size(0))*1/(torch.sum(d1*cost,1) + eps)
        d1 = (1/d1.size(0))*1/(torch.sum(d0.unsqueeze(1)*cost,0)+eps)
        error = torch.mean(torch.abs(d1_old-d1))
        d1_old = d1
    return d1*cost*d0.unsqueeze(1)


def get_router_linear_layer(config):
    args = get_args()
    router = torch.nn.Linear(args.hidden_size, args.num_experts, bias=False)
    with get_cuda_rng_tracker().fork(get_data_parallel_rng_tracker_name()):
        config.init_method(router.weight)
    setattr(router.weight, 'sequence_parallel',config.sequence_parallel)
    return router


class SwitchMLP(MegatronModule):
    """
    Routes input to one of N MLP "experts"
    """
    def __init__(self, config):
        super(SwitchMLP, self).__init__()
        args = get_args()
        self.router = get_router_linear_layer(config)
        self.expert_parallel_size = mpu.get_expert_model_parallel_world_size()
        self.sequence_parallel = config.sequence_parallel
        self.add_bias = config.add_bias_linear

        assert args.num_experts % self.expert_parallel_size == 0
        self.num_local_experts = args.num_experts // self.expert_parallel_size
        local_expert_indices_offset = mpu.get_expert_model_parallel_rank() * self.num_local_experts
        self.local_expert_indices = [local_expert_indices_offset + i for i in range(self.num_local_experts)]

        self.local_experts = torch.nn.ModuleList()
        for i in range(self.num_local_experts):
            self.local_experts.append(ParallelMLP(config, is_expert=True))

        self.tp_ep_group = get_expert_tensor_and_model_parallel_group()

    def gather_indices(self, local_indices):
        """ Gather tensors and concatinate along the first dimension."""
        world_size = self.tp_ep_group.size()
        # Bypass the function if we are using only 1 GPU.
        if world_size == 1:
            return local_indices

        dim_size = list(local_indices.size())
        dim_size[0] = dim_size[0] * world_size

        # TODO pre allocate memory
        output = torch.empty(dim_size, dtype=local_indices.dtype,
                             device=torch.cuda.current_device())
        torch.distributed._all_gather_base(
            output, local_indices.contiguous(), group=self.tp_ep_group
        )
        return output

    def forward(self, hidden_states):
        # hidden_states: [b, s, h]
        args = get_args()
        s = hidden_states.size(0)
        b = hidden_states.size(1)
        h = hidden_states.size(2)
        route = self.router(hidden_states).view(-1, args.num_experts)

        # TODO (rprenger) Right now we're just using the sinkhorn algorithm
        # for load balancing. There should be an option to do no load balancing
        # and the algorithm and parametets should be further tested
        if self.training:
            with torch.no_grad():
                sinkroute = sinkhorn(route.detach().to(dtype=torch.float32))
                _, max_ind = torch.max(sinkroute, dim=1)
            route = torch.sigmoid(route)
            max_prob = route[torch.arange(route.size(0)), max_ind]
        else:
            route = torch.sigmoid(route)
            max_prob, max_ind = torch.max(route, dim=1)

        max_prob = torch.unsqueeze(max_prob, 1)
        hidden_states = hidden_states.view(-1, hidden_states.size(2))

        # TODO (rprenger) TODO this could be made easier to read
        # Converting [s, b, h] to [s*b, h].
        # Each vector could be routed differently
        if self.sequence_parallel or (self.expert_parallel_size > 1):
            global_hidden_states = \
                gather_from_sequence_parallel_region(hidden_states, group=self.tp_ep_group)
            global_indices = self.gather_indices(max_ind)
        else:
            global_hidden_states = hidden_states
            global_indices = max_ind

        output_total = torch.zeros_like(global_hidden_states)
        if self.add_bias:
            output_bias_total = torch.zeros_like(global_hidden_states)

        for expert_num, expert in enumerate(self.local_experts):
            local_expert_index = self.local_expert_indices[expert_num]
            local_indices = (global_indices == local_expert_index).nonzero()
            hidden = global_hidden_states[local_indices, :]
            output, output_bias = expert(hidden)
            output_total[local_indices, :] = output
            if self.add_bias:
                output_bias = output_bias.expand_as(output)
                output_bias_total[local_indices, :] = output_bias

        if self.sequence_parallel or (self.expert_parallel_size > 1):
            output_total = \
                reduce_scatter_to_sequence_parallel_region(output_total, group=self.tp_ep_group)
            if self.add_bias:
                output_bias_total = \
                    reduce_scatter_to_sequence_parallel_region(output_bias_total, group=self.tp_ep_group)

                # bias is duplicated across tensor parallelism ranks;
                # reduce scatter reduces bias across tensor parallel_ranks
                output_bias_total = \
                    output_bias_total/mpu.get_tensor_model_parallel_world_size()

        output_total = output_total*max_prob
        output_total = output_total.view(s, b, h)
        if self.add_bias:
            output_bias_total = output_bias_total*max_prob
            output_bias_total = output_bias_total.view(s, b, h)
        else:
            output_bias_total = None

        return output_total, output_bias_total


class CoreAttention(MegatronModule):

    def __init__(self, layer_number, config,
                 attn_mask_type=AttnMaskType.padding):
        super(CoreAttention, self).__init__()
        self.fp16 = config.fp16
        self.bf16 = config.bf16

        self.apply_query_key_layer_scaling = config.apply_query_key_layer_scaling
        self.attention_softmax_in_fp32 = config.attention_softmax_in_fp32
        if self.apply_query_key_layer_scaling:
            self.attention_softmax_in_fp32 = True
        self.layer_number = max(1, layer_number)
        self.attn_mask_type = attn_mask_type
        self.sequence_parallel = config.sequence_parallel

        projection_size = config.kv_channels * config.num_attention_heads

        # Per attention head and per partition values.
        world_size = mpu.get_tensor_model_parallel_world_size()
        self.hidden_size_per_partition = core.utils.divide(projection_size,
                                                           world_size)
        self.hidden_size_per_attention_head = core.utils.divide(
            projection_size, config.num_attention_heads)
        self.num_attention_heads_per_partition = core.utils.divide(
            config.num_attention_heads, world_size)

        coeff = None
        self.norm_factor = math.sqrt(self.hidden_size_per_attention_head)
        if self.apply_query_key_layer_scaling:
            coeff = self.layer_number
            self.norm_factor *= coeff

        self.scale_mask_softmax = FusedScaleMaskSoftmax(
            self.fp16, self.bf16,
            self.attn_mask_type,
            config.masked_softmax_fusion,
            attention_mask_func,
            self.attention_softmax_in_fp32,
            coeff)

        # Dropout. Note that for a single iteration, this layer will generate
        # different outputs on different number of parallel partitions but
        # on average it should not be partition dependent.
        self.attention_dropout = torch.nn.Dropout(config.attention_dropout)

    def forward(self, query_layer, key_layer,
                value_layer, attention_mask):

        # ===================================
        # Raw attention scores. [b, np, s, s]
        # ===================================

        # [b, np, sq, sk]
        output_size = (query_layer.size(1),
                       query_layer.size(2),
                       query_layer.size(0),
                       key_layer.size(0))

        # [sq, b, np, hn] -> [sq, b * np, hn]
        query_layer = query_layer.reshape(output_size[2],
                                          output_size[0] * output_size[1], -1)
        # [sk, b, np, hn] -> [sk, b * np, hn]
        key_layer = key_layer.view(output_size[3],
                                   output_size[0] * output_size[1], -1)

        # preallocting input tensor: [b * np, sq, sk]
        matmul_input_buffer = mpu.get_global_memory_buffer().get_tensor(
            (output_size[0]*output_size[1], output_size[2], output_size[3]),
            query_layer.dtype, "mpu")

        # Raw attention scores. [b * np, sq, sk]
        matmul_result = torch.baddbmm(
            matmul_input_buffer,
            query_layer.transpose(0, 1),   # [b * np, sq, hn]
            key_layer.transpose(0, 1).transpose(1, 2),  # [b * np, hn, sk]
            beta=0.0, alpha=(1.0/self.norm_factor))

        # change view to [b, np, sq, sk]
        attention_scores = matmul_result.view(*output_size)

        # ===========================
        # Attention probs and dropout
        # ===========================

        # attention scores and attention mask [b, np, sq, sk]
        attention_probs = self.scale_mask_softmax(attention_scores,
                                                  attention_mask)

        # This is actually dropping out entire tokens to attend to, which might
        # seem a bit unusual, but is taken from the original Transformer paper.
        if not self.sequence_parallel:
            with tensor_parallel.get_cuda_rng_tracker().fork():
                attention_probs = self.attention_dropout(attention_probs)
        else:
            attention_probs = self.attention_dropout(attention_probs)

        # =========================
        # Context layer. [sq, b, hp]
        # =========================

        # value_layer -> context layer.
        # [sk, b, np, hn] --> [b, np, sq, hn]

        # context layer shape: [b, np, sq, hn]
        output_size = (value_layer.size(1),
                       value_layer.size(2),
                       query_layer.size(0),
                       value_layer.size(3))

        # change view [sk, b * np, hn]
        value_layer = value_layer.view(value_layer.size(0),
                                       output_size[0] * output_size[1], -1)

        # change view [b * np, sq, sk]
        attention_probs = attention_probs.view(output_size[0] * output_size[1],
                                               output_size[2], -1)

        # matmul: [b * np, sq, hn]
        context_layer = torch.bmm(attention_probs, value_layer.transpose(0, 1))

        # change view [b, np, sq, hn]
        context_layer = context_layer.view(*output_size)

        # [b, np, sq, hn] --> [sq, b, np, hn]
        context_layer = context_layer.permute(2, 0, 1, 3).contiguous()

        # [sq, b, np, hn] --> [sq, b, hp]
        new_context_layer_shape = context_layer.size()[:-2] + \
            (self.hidden_size_per_partition,)
        context_layer = context_layer.view(*new_context_layer_shape)

        return context_layer


class FlashSelfAttention(torch.nn.Module):
    """Implement the scaled dot product attention with softmax.
    Arguments
    ---------
        softmax_scale: The temperature to use for the softmax attention.
                      (default: 1/sqrt(d_keys) where d_keys is computed at
                      runtime)
        attention_dropout: The dropout rate to apply to the attention
                           (default: 0.0)
    """
    def __init__(self, causal=False, softmax_scale=None, attention_dropout=0.0,
                 device=None, dtype=None):
        super().__init__()
        assert flash_attn_unpadded_func is not None, ('Please install FlashAttention first, '
                                                      'e.g., with pip install flash-attn')
        assert rearrange is not None, 'Please install einops first, e.g., with pip install einops'
        self.causal = causal
        self.softmax_scale = softmax_scale
        self.dropout_p = attention_dropout

    def forward(self, q, k, v):
        """Implements the multihead softmax attention.
        Arguments
        ---------
            q, k, v: The tensor containing the query, key, and value. (B, S, H, D)
        """

        assert all((i.dtype in [torch.float16, torch.bfloat16] for i in (q,k,v)))
        assert all((i.is_cuda for i in (q,k,v)))

        batch_size, seqlen_q = q.shape[0], q.shape[1]
        seqlen_k = k.shape[1]

        q, k, v = [rearrange(x, 'b s ... -> (b s) ...') for x in [q, k, v]]
        cu_seqlens_q = torch.arange(0, (batch_size + 1) * seqlen_q, step=seqlen_q, dtype=torch.int32,
                                    device=q.device)

        if self.training:
            # during training q,k,v always have same seqlen
            assert seqlen_k == seqlen_q

            is_causal = self.causal
            cu_seqlens_k = cu_seqlens_q
            dropout_p = self.dropout_p
        else:
            # turn off FA causal mask after first inference autoregressive iteration
            # only on first autoregressive step q,k,v have same seqlen
            is_causal = seqlen_q == seqlen_k
            cu_seqlens_k = torch.arange(0, (batch_size + 1) * seqlen_k, step=seqlen_k, dtype=torch.int32,
                        device=q.device)
            dropout_p = 0

        output = flash_attn_unpadded_func(
            q, k, v, cu_seqlens_q, cu_seqlens_k, seqlen_q, seqlen_k,
            dropout_p,
            softmax_scale=self.softmax_scale, causal=is_causal
        )

        output = rearrange(output, '(b s) ... -> b s ...', b=batch_size)
        return output


class ParallelAttention(MegatronModule):
    """Parallel self-attention layer abstract class.

    Self-attention layer takes input with size [s, b, h]
    and returns output of the same size.
    """

    def __init__(self, config, layer_number,
                 attention_type=AttnType.self_attn,
                 attn_mask_type=AttnMaskType.padding):
        super(ParallelAttention, self).__init__()
        args = get_args()
        self.layer_number = max(1, layer_number)
        self.attention_type = attention_type
        self.attn_mask_type = attn_mask_type
        self.params_dtype = config.params_dtype
        self.sequence_parallel = config.sequence_parallel
        self.config = config
        self.group_query_attention = args.group_query_attention
        self.num_query_groups = args.num_query_groups

        query_projection_size = config.kv_channels * config.num_attention_heads
        if self.group_query_attention:
            kv_projection_size = args.kv_channels * args.num_query_groups
        else:
            kv_projection_size = args.kv_channels * args.num_attention_heads

        self.use_flash_attn = args.use_flash_attn \
            and attention_type == AttnType.self_attn \
            and self.attn_mask_type == AttnMaskType.causal
        if self.use_flash_attn:
            if flash_attn_unpadded_func is None:
                raise ImportError('FlashAttention is not installed, please install with '
                                  'pip install flash-attn')
            assert attention_type == AttnType.self_attn, ('FlashAttention code path only supports '
                                                          'self-attention for now')
            assert self.attn_mask_type == AttnMaskType.causal, ('FlashAttention code path only '
                                                                'supports causal mask for now')
            if rearrange is None:
                raise ImportError('einops is not installed, please install with pip install einops')

        # Per attention head and per partition values.
        world_size = mpu.get_tensor_model_parallel_world_size()
        self.hidden_size_per_attention_head = core.utils.divide(
            query_projection_size, config.num_attention_heads)
        self.num_attention_heads_per_partition = core.utils.divide(
            config.num_attention_heads, world_size)

        if self.group_query_attention:
            if args.num_query_groups % world_size != 0:
                raise NotImplementedError('Currently the num_query_groups should be '
                                          'a multiple of the tensor parallel size')
            self.num_query_groups_per_partition = core.utils.divide(
                        args.num_query_groups, world_size)
        else:
            self.num_query_groups_per_partition = self.num_attention_heads_per_partition

        # Strided linear layer.
        if attention_type == AttnType.self_attn:
            self.query_key_value = tensor_parallel.ColumnParallelLinear(
                config.hidden_size,
                query_projection_size + 2 * kv_projection_size,
                config=config,
                init_method=config.init_method,
                bias=args.add_bias_linear or args.add_qkv_bias,
                gather_output=False)
        else:
            assert attention_type == AttnType.cross_attn

            if self.group_query_attention:
                raise NotImplementedError("Grouped query attention not implemented for cross-attention.")
            assert query_projection_size == kv_projection_size

            self.query = tensor_parallel.ColumnParallelLinear(
                config.hidden_size,
                query_projection_size,
                config=config,
                init_method=config.init_method,
                bias=config.add_bias_linear,
                gather_output=False)

            self.key_value = tensor_parallel.ColumnParallelLinear(
                config.hidden_size,
                2 * kv_projection_size,
                config=config,
                init_method=config.init_method,
                bias=config.add_bias_linear,
                gather_output=False)

        self.core_attention = CoreAttention(self.layer_number, config,
                                            self.attn_mask_type)
        self.checkpoint_core_attention = (
            config.recompute_granularity == 'selective'
            and "core_attn" in config.recompute_modules
        )

        if self.use_flash_attn:
            self.core_attention_flash = FlashSelfAttention(
                causal=True, attention_dropout=config.attention_dropout
            )

        # Output.
        self.dense = tensor_parallel.RowParallelLinear(
            query_projection_size,
            config.hidden_size,
            config=config,
            init_method=config.output_layer_init_method,
            bias=args.add_bias_linear,
            input_is_parallel=True,
            skip_bias_add=True)

    def _checkpointed_attention_forward(self, query_layer, key_layer,
                                        value_layer, attention_mask,
                                        rotary_pos_emb=None):
        """Forward method with activation checkpointing."""
        def custom_forward(*inputs):
            query_layer = inputs[0]
            key_layer = inputs[1]
            value_layer = inputs[2]
            attention_mask = inputs[3]
            output_ = self.core_attention(query_layer, key_layer,
                                          value_layer, attention_mask)
            return output_

        q_pos_emb, k_pos_emb = (None, None) if rotary_pos_emb is None \
            else rotary_pos_emb

        hidden_states = tensor_parallel.checkpoint(
            custom_forward,
            False, query_layer, key_layer, value_layer, attention_mask,
            q_pos_emb, k_pos_emb)

        return hidden_states

    def _allocate_memory(self, inference_max_sequence_len, batch_size, num_attention_heads):
        return torch.empty(
            inference_max_sequence_len,
            batch_size,
            num_attention_heads,
            self.hidden_size_per_attention_head,
            dtype=self.params_dtype,
            device=torch.cuda.current_device())

    def forward(self, hidden_states, attention_mask,
                encoder_output=None, inference_context=None,
                rotary_pos_emb=None, *, inference_params=None):
        # hidden_states: [sq, b, h]

        inference_context = deprecate_inference_params(inference_context, inference_params)

        # =================================================
        # Pre-allocate memory for key-values for inference.
        # =================================================
        is_first_step = False
        if inference_context:
            if self.layer_number not in inference_context.key_value_memory_dict:
                inf_max_seq_len = inference_context.max_sequence_length
                inf_max_batch_size = inference_context.max_batch_size
                inference_key_memory = self._allocate_memory(
                    inf_max_seq_len, inf_max_batch_size,
                    self.num_query_groups_per_partition)
                inference_value_memory = self._allocate_memory(
                    inf_max_seq_len, inf_max_batch_size,
                    self.num_query_groups_per_partition)

                inference_context.key_value_memory_dict[self.layer_number] = (
                    inference_key_memory, inference_value_memory)
                is_first_step = True
            else:
                inference_key_memory, inference_value_memory = \
                    inference_context.key_value_memory_dict[self.layer_number]

        # =====================
        # Query, Key, and Value
        # =====================
        if self.attention_type == AttnType.self_attn:

            # Attention heads [sq, b, h] --> [sq, b, ng * (np/ng + 2) * hn)]
            mixed_x_layer, _ = self.query_key_value(hidden_states)

            # [sq, b, hp] --> [sq, b, ng, (np/ng + 2) * hn]
            new_tensor_shape = mixed_x_layer.size()[:-1] + (
                self.num_query_groups_per_partition,
                (
                    (self.num_attention_heads_per_partition // self.num_query_groups_per_partition + 2)
                    * self.hidden_size_per_attention_head
                ),
            )
            mixed_x_layer = mixed_x_layer.view(*new_tensor_shape)

            # [sq, b, ng, (np/ng + 2) * hn] --> [sq, b, ng, np/ng * hn], [sq, b, ng, hn], [sq, b, ng, hn]
            (query_layer,
            key_layer,
            value_layer) = torch.split(
                mixed_x_layer,
                [
                    (
                        self.num_attention_heads_per_partition // self.num_query_groups_per_partition
                        * self.hidden_size_per_attention_head
                    ),
                    self.hidden_size_per_attention_head,
                    self.hidden_size_per_attention_head
                ],
                dim=3)

            # [sq, b, ng, np/ng * hn] -> [sq, b, np, hn] -
            query_layer = query_layer.view(query_layer.size(0), query_layer.size(1), -1, self.hidden_size_per_attention_head)
        else:
            # Attention heads [sk, b, h] --> [sk, b, (np * 2 * hn)]
            mixed_kv_layer, _ = self.key_value(encoder_output)

            # [sk, b, (np * 2 * hn)] --> [sk, b, np, 2 * hn]
            new_tensor_shape = mixed_kv_layer.size()[:-1] + \
                (self.num_attention_heads_per_partition,
                2 * self.hidden_size_per_attention_head)
            mixed_kv_layer = mixed_kv_layer.view(*new_tensor_shape)

            # [sk, b, np, 2 * hn] --> 2 [sk, b, np, hn]
            (key_layer,
            value_layer) = tensor_parallel.split_tensor_along_last_dim(mixed_kv_layer, 2)

            # Attention head [sq, b, h] --> [sq, b, hp]
            query_layer, _ = self.query(hidden_states)
            # [sq, b, hp] --> [sq, b, np, hn]
            new_tensor_shape = query_layer.size()[:-1] + \
                (self.num_attention_heads_per_partition,
                self.hidden_size_per_attention_head)
            query_layer = query_layer.view(*new_tensor_shape)

        # ==================================
        # Adjust key and value for inference
        # ==================================

        # duplicate the pos_emb for self attention
        if rotary_pos_emb is not None:
            if isinstance(rotary_pos_emb, tuple):
                rotary_pos_emb = rotary_pos_emb
            else:
                rotary_pos_emb = ((rotary_pos_emb,) * 2)

        if inference_context:
            batch_start = inference_context.batch_size_offset
            batch_end = batch_start + key_layer.size(1)
            assert batch_end <= inference_key_memory.size(1)
            sequence_start = inference_context.sequence_len_offset
            sequence_end = sequence_start + key_layer.size(0)
            assert sequence_end <= inference_key_memory.size(0), ("Current sequence length is "
            "longer than expected maximum sequence length! Increase inference_max_seq_length.")
            # Copy key and values.
            inference_key_memory[sequence_start:sequence_end,
                                 batch_start:batch_end, ...] = key_layer
            inference_value_memory[sequence_start:sequence_end,
                                   batch_start:batch_end, ...] = value_layer
            key_layer = inference_key_memory[
                :sequence_end, batch_start:batch_end, ...]
            value_layer = inference_value_memory[
                :sequence_end, batch_start:batch_end, ...]


            # adjust the key rotary positional embedding
            if rotary_pos_emb is not None:
                q_pos_emb, k_pos_emb = rotary_pos_emb
                # need to cross check this condition during inference
                # if not set_inference_key_value_memory:
                if not is_first_step:
                    # In inference, we compute one token at a time.
                    # Select the correct positional embedding
                    # (only the last token in the sequence)
                    q_pos_emb = q_pos_emb[sequence_end - 1 : sequence_end]
                else:
                    # In the first forward pass of inference,
                    # we use the entire provided prefix.
                    # q_pos_emb here has the rope embeddings of the entire
                    # prefix + to-be-generated output so
                    # we slice to just the prefix.
                    q_pos_emb = q_pos_emb[:sequence_end, :, :, :]
                k_pos_emb = k_pos_emb[:sequence_end, :, :, :]
                rotary_pos_emb = (q_pos_emb, k_pos_emb)

        # ==================================
        # core attention computation
        # ==================================

        # expand the key_layer and value_layer [sk, b, ng, hn] -> [sk, b, np, hn]
        if self.num_attention_heads_per_partition // self.num_query_groups_per_partition > 1:
            key_layer = key_layer.repeat_interleave(
                self.num_attention_heads_per_partition // self.num_query_groups_per_partition,
                dim = 2
            )
            value_layer = value_layer.repeat_interleave(
                self.num_attention_heads_per_partition // self.num_query_groups_per_partition,
                dim = 2
            )

        # apply relative positional encoding (rotary embedding)
        if rotary_pos_emb is not None:
            q_pos_emb, k_pos_emb = rotary_pos_emb
            query_layer = apply_rotary_pos_emb(query_layer, q_pos_emb,self.config)
            key_layer = apply_rotary_pos_emb(key_layer, k_pos_emb,self.config)
            # TODO, can apply positional embedding to value_layer so it has
            # absolute positional embedding.
            # otherwise, only relative positional embedding takes effect
            # value_layer = apply_rotary_pos_emb(value_layer, k_pos_emb)

        if not self.use_flash_attn:
            if self.checkpoint_core_attention:
                context_layer = self._checkpointed_attention_forward(
                    query_layer, key_layer, value_layer, attention_mask)
            else:
                context_layer = self.core_attention(
                    query_layer, key_layer, value_layer, attention_mask)
        else:
            q, k, v = [rearrange(x, 's b ... -> b s ...').contiguous()
                       for x in (query_layer, key_layer, value_layer)]
            if not self.sequence_parallel:
                with tensor_parallel.get_cuda_rng_tracker().fork():
                    context_layer = self.core_attention_flash(q, k, v)
            else:
                context_layer = self.core_attention_flash(q, k, v)
            context_layer = rearrange(context_layer, 'b s h d -> s b (h d)').contiguous()

        # =================
        # Output. [sq, b, h]
        # =================

        output, bias = self.dense(context_layer)

        return output, bias


def bias_dropout_add(x, bias, residual, prob, training):
    # type: (Tensor, Optional[Tensor], Tensor, float, bool) -> Tensor
    if bias is not None:
        x = x + bias
    out = torch.nn.functional.dropout(x, p=prob, training=training)
    out = residual + out
    return out


def get_bias_dropout_add(training):
    def _bias_dropout_add(x, bias, residual, prob):
        return bias_dropout_add(x, bias, residual, prob, training)
    return _bias_dropout_add


@jit_fuser
def bias_dropout_add_fused_train(x: torch.Tensor,
                                 bias: Optional[torch.Tensor],
                                 residual: torch.Tensor,
                                 prob: float) -> torch.Tensor:
    return bias_dropout_add(x, bias, residual, prob, True)


@jit_fuser
def bias_dropout_add_fused_inference(x: torch.Tensor,
                                     bias: Optional[torch.Tensor],
                                     residual: torch.Tensor,
                                     prob: float) -> torch.Tensor:
    return bias_dropout_add(x, bias, residual, prob, False)


class ParallelTransformerLayer(MegatronModule):
    """A single transformer layer.

    Transformer layer takes input with size [s, b, h] and returns an
    output of the same size.
    """

    def __init__(self, config,
                 layer_number, layer_type=LayerType.encoder,
                 self_attn_mask_type=AttnMaskType.padding,
                 drop_path_rate=0.):
        args = get_args()

        super(ParallelTransformerLayer, self).__init__()
        self.layer_number = layer_number
        self.layer_type = layer_type

        self.apply_residual_connection_post_norm \
            = config.apply_residual_connection_post_layernorm

        self.bf16 = config.bf16
        self.fp32_residual_connection = config.fp32_residual_connection

        # Normalize the input data.
        self.input_norm = get_norm(config)

        # Self attention.
        self.self_attention = ParallelAttention(
            config,
            layer_number,
            attention_type=AttnType.self_attn,
            attn_mask_type=self_attn_mask_type)
        self.hidden_dropout = config.hidden_dropout
        self.bias_dropout_fusion = config.bias_dropout_fusion
        self.drop_path = DropPath(drop_path_rate) if drop_path_rate > 0.0 else None

        # Normalize the attention output
        self.post_attention_norm = get_norm(config)

        # MLP
        if args.num_experts is not None:
            self.mlp = SwitchMLP(config)
        else:
            self.mlp = ParallelMLP(config)

        # Set bias+dropout+add fusion grad_enable execution handler.
        TORCH_MAJOR = int(torch.__version__.split('.')[0])
        TORCH_MINOR = int(torch.__version__.split('.')[1])
        use_nvfuser = TORCH_MAJOR > 1 or (TORCH_MAJOR == 1 and TORCH_MINOR >= 10)
        self.bias_dropout_add_exec_handler = \
                nullcontext if use_nvfuser else torch.enable_grad

    def default_decoder_cross_attention(self,
                                        encoder_output,
                                        enc_dec_attn_mask,
                                        norm_input,
                                        norm_output,
                                        bias_dropout_add_func):
        '''Cross attention for a standard encoder-decoder model.'''

        # Attention.
        attention_output, attention_bias = \
            self.inter_attention(norm_output,
                                 enc_dec_attn_mask,
                                 encoder_output=encoder_output)

        # Residual connection.
        if self.apply_residual_connection_post_norm:
            residual = norm_output
        else:
            residual = norm_input

        if attention_bias is not None:
            attention_bias = attention_bias.expand_as(residual)

        # Bias-dropout-add.
        with self.bias_dropout_add_exec_handler():
            norm_input = bias_dropout_add_func(
                attention_output,
                attention_bias,
                residual,
                self.hidden_dropout)

        # Normalize.
        norm_output = self.post_inter_attention_norm(norm_input)

        return norm_input, norm_output

    def forward(self, hidden_states, attention_mask,
                encoder_output=None, enc_dec_attn_mask=None,
                inference_context=None,
                rotary_pos_emb=None,
                *,
                inference_params=None):

        inference_context = deprecate_inference_params(inference_context, inference_params)

        # hidden_states: [s, b, h]

        # Layer norm at the beginning of the transformer layer.
        norm_output = self.input_norm(hidden_states)

        # Self attention.
        attention_output, attention_bias = \
            self.self_attention(
                norm_output,
                attention_mask,
                inference_context=inference_context,
                rotary_pos_emb=rotary_pos_emb)

        # Residual connection.
        if self.apply_residual_connection_post_norm:
            residual = norm_output
        else:
            residual = hidden_states

        if self.drop_path is None:
            # jit scripting for a nn.module (with dropout) is not
            # trigerring the fusion kernel. For now, we use two
            # different nn.functional routines to account for varying
            # dropout semantics during training and inference phases.
            if self.bias_dropout_fusion:
                if self.training:
                    bias_dropout_add_func = bias_dropout_add_fused_train
                else:
                    bias_dropout_add_func = bias_dropout_add_fused_inference
            else:
                bias_dropout_add_func = get_bias_dropout_add(self.training)

            if attention_bias is not None:
                attention_bias = attention_bias.expand_as(residual)
            with self.bias_dropout_add_exec_handler():
                norm_input = bias_dropout_add_func(
                    attention_output,
                    attention_bias,
                    residual,
                    self.hidden_dropout)
        else:
            out = torch.nn.functional.dropout(attention_output + attention_bias,
                                              p=self.hidden_dropout,
                                              training=self.training)
            norm_input = residual + self.drop_path(out)

        # Layer norm post the self attention.
        norm_output = self.post_attention_norm(norm_input)

        # Cross attention.
        if self.layer_type == LayerType.encoder:
            pass
        elif self.layer_type == LayerType.decoder:
            norm_input, norm_output = \
                self.default_decoder_cross_attention(
                    encoder_output,
                    enc_dec_attn_mask,
                    norm_input,
                    norm_output,
                    bias_dropout_add_func)
        else:
            raise Exception("Unsupported layer type, '%s'." %
                            self.layer_type.name)

        # MLP.
        mlp_output, mlp_bias = self.mlp(norm_output)

        # Second residual connection.
        if self.apply_residual_connection_post_norm:
            residual = norm_output
        else:
            residual = norm_input

        if self.drop_path is None:
            if mlp_bias is not None:
                mlp_bias = mlp_bias.expand_as(residual)
            with self.bias_dropout_add_exec_handler():
                output = bias_dropout_add_func(
                    mlp_output,
                    mlp_bias,
                    residual,
                    self.hidden_dropout)

            # Jit compiled function creates 'view' tensor. This tensor
            # potentially gets saved in the MPU checkpoint function context,
            # which rejects view tensors. While making a viewless tensor here
            # won't result in memory savings (like the data loader, or
            # p2p_communication), it serves to document the origin of this
            # 'view' tensor.
            output = core.utils.make_viewless_tensor(inp = output,
                                                     requires_grad = output.requires_grad,
                                                     keep_graph = True)

        else:
            if mlp_bias is not None:
                mlp_output = mlp_output + mlp_bias
            out = torch.nn.functional.dropout(mlp_output,
                                              p=self.hidden_dropout,
                                              training=self.training)
            output = residual + self.drop_path(out)

        return output


class NoopTransformerLayer(MegatronModule):
    """A single 'no-op' transformer layer.

    The sole purpose of this layer is for when a standalone embedding layer
    is used (i.e., args.account_for_embedding_in_pipeline_split == True). In this case,
    zero transformer layers are assigned when pipeline rank == 0. Additionally,
    when virtual pipeline rank >= 1, zero total model parameters are created
    (virtual rank 0 contains the input embedding). This results in the model's
    input and output tensors being the same, which causes an error when
    performing certain memory optimiations on the output tensor (e.g.,
    deallocating it). Thus, this layer disconnects the input from the output
    via a clone. Since ranks containing a no-op layer are generally under-
    utilized (both compute and memory), there's no worry of any performance
    degredation.
    """

    def __init__(self, layer_number):
        super().__init__()
        self.layer_number = layer_number

    def forward(self, hidden_states, attention_mask,
                encoder_output=None, enc_dec_attn_mask=None,
                inference_context=None, *, inference_params=None):
        return hidden_states.clone()


def _get_num_layers(args, model_type, is_decoder=False):
    """Compute the number of transformer layers resident on the current rank."""
    if mpu.get_pipeline_model_parallel_world_size() > 1:
        assert args.num_layers == args.encoder_num_layers
        assert args.num_layers % args.transformer_pipeline_model_parallel_size == 0, \
            'num_layers must be divisible by transformer_pipeline_model_parallel_size'

        # When a standalone embedding stage is used, all transformer layers
        # are divided among pipeline rank >= 1, while on pipeline rank 0,
        # ranks either contain the input embedding layer (virtual pp rank 0),
        # or no layers at all (virtual pp rank >= 1).
        num_layers = (
            0
            if args.account_for_embedding_in_pipeline_split
            and mpu.get_pipeline_model_parallel_rank() == 0 else
            args.num_layers // args.transformer_pipeline_model_parallel_size
        )
    else:
        if not is_decoder:
            num_layers = args.encoder_num_layers
        else:
            num_layers = args.decoder_num_layers
    return num_layers


class ParallelTransformer(MegatronModule):
    """Transformer class."""

    def __init__(self, config,
                 model_type, layer_type=LayerType.encoder,
                 self_attn_mask_type=AttnMaskType.padding,
                 post_norm=True,
                 pre_process=True,
                 post_process=True,
                 drop_path_rate=0.0):
        super(ParallelTransformer, self).__init__()
        args = get_args()

        self.layer_type = layer_type
        self.model_type = model_type
        self.bf16 = config.bf16
        self.fp32_residual_connection = config.fp32_residual_connection
        self.post_norm = post_norm
        self.pre_process = pre_process
        self.post_process = post_process
        self.input_tensor = None
        self.drop_path_rate = drop_path_rate
        self.transformer_impl = args.transformer_impl

        # Store activation checkpoiting flag.
        self.recompute_granularity = config.recompute_granularity
        self.recompute_method = config.recompute_method
        self.recompute_num_layers = config.recompute_num_layers
        self.distribute_saved_activations = \
            config.distribute_saved_activations and not config.sequence_parallel

        self.sequence_parallel = config.sequence_parallel

        # Transformer Engine Init.
        self.transformer_engine_v_0_10 = False
        self.transformer_engine_v_0_11 = False
        self.transformer_engine_v_0_8 = False
        if self.transformer_impl == 'transformer_engine':
            global transformer_engine
            import transformer_engine

            if core.utils.is_te_min_version("0.8.0"):
                self.transformer_engine_v_0_8 = True
            if core.utils.is_te_min_version("0.10.0"):
                self.transformer_engine_v_0_10 = True
            if core.utils.is_te_min_version("0.11.0"):
                self.transformer_engine_v_0_11 = True

            assert not args.squared_relu, ("TransformerEngine does not support squared "
                                           "relu activation.")

        self.use_fp8 = args.fp8 is not None
        self.fp8_recipe = None
        self.fp8_group = None
        if self.use_fp8:
            assert args.transformer_impl == 'transformer_engine', \
                'transformer-engine required for fp8 training and inference'
            self.fp8_group = mpu.get_amax_reduction_group(tp_only_amax_red=config.tp_only_amax_red)
            if args.fp8 == "e4m3":
                fp8_format = transformer_engine.common.recipe.Format.E4M3
            elif args.fp8 == "hybrid":
                fp8_format = transformer_engine.common.recipe.Format.HYBRID
            else:
                raise ValueError("The DelayedScaling recipe only supports E4M3 and HYBRID formats.")
            self.fp8_recipe = transformer_engine.common.recipe.DelayedScaling(
                margin=args.fp8_margin,
                interval=args.fp8_interval,
                fp8_format=fp8_format,
                amax_history_len=args.fp8_amax_history_len,
                amax_compute_algo=args.fp8_amax_compute_algo,
                override_linear_precision=(False, False, not args.fp8_wgrad),
            )

        self.num_microbatches_in_previous_step = -1
        self.microbatch_count = 0
        self.checkpoint_core_attention = (
            config.recompute_granularity == 'selective'
            and "core_attn" in config.recompute_modules
        )

        # Number of layers.
        self.num_layers = _get_num_layers(args, model_type,
                                          layer_type==LayerType.decoder)

        self.drop_path_rates = [
            rate.item() for rate in
            torch.linspace(0, self.drop_path_rate, config.num_layers)]

        def build_layer(layer_number):
            if args.transformer_impl == 'local':
                return ParallelTransformerLayer(
                    config,
                    layer_number,
                    layer_type=layer_type,
                    self_attn_mask_type=self_attn_mask_type,
                    drop_path_rate=self.drop_path_rates[layer_number - 1])
            else:
                # This argument is only available from TE v0.10 onwards.
                extra_transformer_engine_kwargs = {}
                if self.transformer_engine_v_0_8:
                    extra_transformer_engine_kwargs["bias"] = args.add_bias_linear
                if self.transformer_engine_v_0_10:
                    extra_transformer_engine_kwargs["activation"] = "swiglu" if args.swiglu else "gelu"
                if self.transformer_engine_v_0_11:
                    extra_transformer_engine_kwargs["normalization"] = args.normalization
                assert config.attention_softmax_in_fp32, "TransformerEngine only supports softmax compute in FP32."
                assert (
                    (bool(int(os.getenv("NVTE_APPLY_QK_LAYER_SCALING", "0"))) and args.fp16) == config.apply_query_key_layer_scaling
                ), ("Unsupported config for apply_query_key_layer_scaling in TransformerEngine. If --apply-query-key-layer-scaling is "
                    "provided, set env-var NVTE_APPLY_QK_LAYER_SCALING=1 and you must be using fp16.")
                return transformer_engine.pytorch.TransformerLayer(
                    config.hidden_size,
                    config.ffn_hidden_size,
                    config.num_attention_heads,
                    layernorm_epsilon=config.layernorm_epsilon,
                    hidden_dropout=config.hidden_dropout,
                    attention_dropout=config.attention_dropout,
                    init_method=config.init_method,
                    output_layer_init_method=config.output_layer_init_method,
                    layer_number=layer_number,
                    kv_channels=config.kv_channels,
                    self_attn_mask_type=self_attn_mask_type.name,
                    tp_group=mpu.get_tensor_model_parallel_group() if mpu.is_initialized() else None,
                    tp_size=mpu.get_tensor_model_parallel_world_size(),
                    get_rng_state_tracker=get_cuda_rng_tracker
                    if get_cuda_rng_tracker().is_initialized()
                    else None,
                    fuse_wgrad_accumulation=config.gradient_accumulation_fusion,
                    seq_length=args.seq_length,
                    micro_batch_size=args.micro_batch_size,
                    sequence_parallel=config.sequence_parallel,
                    params_dtype=config.params_dtype,
                    apply_residual_connection_post_layernorm=config.apply_residual_connection_post_layernorm,
                    output_layernorm=False,
                    layer_type="encoder",
                    drop_path_rate=self.drop_path_rates[layer_number - 1],
                    set_parallel_mode=True,
                    fuse_qkv_params=True,
                    **extra_transformer_engine_kwargs)

        if config.virtual_pipeline_model_parallel_size is not None:
            assert config.num_layers % config.virtual_pipeline_model_parallel_size == 0, \
                'num_layers_per_stage must be divisible by ' \
                'virtual_pipeline_model_parallel_size'
            # Number of layers in each model chunk is the number of layers in the stage,
            # divided by the number of model chunks in a stage.
            self.num_layers = self.num_layers // config.virtual_pipeline_model_parallel_size
            # With 8 layers, 2 stages, and 4 model chunks, we want an assignment of
            # layers to stages like (each list is a model chunk):
            # Stage 0: [0]  [2]  [4]  [6]
            # Stage 1: [1]  [3]  [5]  [7]
            # With 8 layers, 2 stages, and 2 virtual stages, we want an assignment of
            # layers to stages like (each list is a model chunk):
            # Stage 0: [0, 1]  [4, 5]
            # Stage 1: [2, 3]  [6, 7]
            offset = mpu.get_virtual_pipeline_model_parallel_rank() * (
                config.num_layers // config.virtual_pipeline_model_parallel_size) + \
                (mpu.get_pipeline_model_parallel_rank() * self.num_layers)
        else:
            # Each stage gets a contiguous set of layers.
            offset = mpu.get_pipeline_model_parallel_rank() * self.num_layers

        if self.num_layers == 0:
            # When a standalone embedding stage is used (e.g.,
            # args.account_for_embedding_in_pipeline_split == True), virtual pipeline ranks
            # on pipeline rank 0 will have zero transformer layers assigned to
            # them. This results in the model's input and output tensors to be
            # the same, which will cause failure for certain output tensor
            # optimizations (e.g., pipeline output deallocation). To remedy
            # this, we assign a 'no-op' layer on these ranks, which will
            # disconnect the input tensor from the output tensor.
            self.num_layers = 1
            self.layers = torch.nn.ModuleList([ NoopTransformerLayer(1) ])
        else:
            self.layers = torch.nn.ModuleList(
                [build_layer(i + 1 + offset) for i in range(self.num_layers)])

        if self.post_process and self.post_norm:
            # Final layer norm before output.
            self.final_norm = get_norm(config)

    def _get_layer(self, layer_number):
        return self.layers[layer_number]

    def _checkpointed_forward(self, hidden_states, attention_mask,
                              encoder_output, enc_dec_attn_mask,
                              rotary_pos_emb, is_first_microbatch):
        """Forward method with activation checkpointing."""
        def custom(start, end):
            def custom_forward(*args, **kwargs):
                x_, *args = args
                for index in range(start, end):
                    layer = self._get_layer(index)
                    x_ = layer(x_, *args, **kwargs)
                return x_
            return custom_forward

        te_forward_kwargs = {}
        if self.transformer_impl == 'transformer_engine':
            te_forward_kwargs['is_first_microbatch'] = is_first_microbatch
            if self.transformer_engine_v_0_10:
                te_forward_kwargs['rotary_pos_emb'] = rotary_pos_emb

        if self.recompute_method == 'uniform':
            # Uniformly divide the total number of Transformer layers and
            # checkpoint the input activation of each divided chunk.
            # A method to further reduce memory usage reducing checkpoints.
            l = 0
            while l < self.num_layers:
                if self.transformer_impl == 'transformer_engine':
                    hidden_states = transformer_engine.pytorch.checkpoint(
                        custom(l, l + self.recompute_num_layers),
                        self.distribute_saved_activations,
                        tensor_parallel.get_cuda_rng_tracker,
                        mpu.get_tensor_model_parallel_group(),
                        hidden_states, attention_mask, encoder_output,
                        enc_dec_attn_mask, **te_forward_kwargs)
                else:
                    hidden_states = tensor_parallel.checkpoint(
                        custom(l, l + self.recompute_num_layers),
                        self.distribute_saved_activations,
                        hidden_states, attention_mask,
                        encoder_output, enc_dec_attn_mask,
                        None, None, None, None, rotary_pos_emb)

                l += self.recompute_num_layers

        elif self.recompute_method == 'block':
            # Checkpoint the input activation of only a set number of individual
            # Transformer layers and skip the rest.
            # A method fully use the device memory removing redundant re-computation.
            for l in range(self.num_layers):
                if l < self.recompute_num_layers:
                    if self.transformer_impl == 'transformer_engine':
                        hidden_states = transformer_engine.pytorch.checkpoint(
                            custom(l, l + 1),
                            self.distribute_saved_activations,
                            tensor_parallel.get_cuda_rng_tracker,
                            mpu.get_tensor_model_parallel_group(),
                            hidden_states, attention_mask, encoder_output,
                            enc_dec_attn_mask, **te_forward_kwargs)
                    else:
                        hidden_states = tensor_parallel.checkpoint(
                            custom(l, l + 1),
                            self.distribute_saved_activations,
                            hidden_states, attention_mask,
                            encoder_output, enc_dec_attn_mask,
                            None, None, None, None, rotary_pos_emb)
                else:
                    if self.transformer_impl == 'transformer_engine':
                        hidden_states = custom(l, l + 1)(
                            hidden_states, attention_mask, encoder_output,
                            enc_dec_attn_mask, **te_forward_kwargs)
                    else:
                        hidden_states = custom(l, l + 1)(
                            hidden_states, attention_mask,
                            encoder_output, enc_dec_attn_mask,
                            None, None, None, None, rotary_pos_emb)
        else:
            raise ValueError("Invalid activation recompute method.")

        return hidden_states

    def set_input_tensor(self, input_tensor):
        """Set input tensor to be used instead of forward()'s input.

        When doing pipeline parallelism the input from the previous
        stage comes from communication, not from the input, so the
        model's forward_step_func won't have it. This function is thus
        used by internal code to bypass the input provided by the
        forward_step_func"""
        self.input_tensor = input_tensor

    def forward(self, hidden_states, attention_mask,
                encoder_output=None, enc_dec_attn_mask=None,
                inference_context=None,
                rotary_pos_emb=None,
                *,
                inference_params=None):
        # hidden_states: [s, b, h]

        inference_context = deprecate_inference_params(inference_context, inference_params)

        # Checks.
        if inference_context:
            assert self.recompute_granularity is None, \
                'inference does not work with activation checkpointing'

        if not self.pre_process:
            # See set_input_tensor()
            hidden_states = self.input_tensor

        # Viewless tensor.
        # - We only need to create a viewless tensor in the case of micro batch
        #   size (mbs) == 1, since in this case, 'hidden_states.transpose()'
        #   above creates a view tensor, and '.contiguous()' is a pass-through.
        #   For mbs >= 2, '.contiguous()' creates a new tensor, eliminating
        #   the need to make it viewless.
        #
        #   However, we don't explicitly check mbs == 1 here because
        #   make_viewless_tensor() has negligible overhead when its input
        #   is already viewless.
        #
        # - For the 'else' case above, calling make_viewless_tensor() here is
        #   likely redundant, since p2p_communication.py (likely originator)
        #   already creates viewless tensors. That said, make_viewless_tensor()
        #   is called here to be future-proof and corner-case-proof.
        hidden_states = core.utils.make_viewless_tensor(
            hidden_states,
            requires_grad=True,
            keep_graph=True,
        )

        # RNG context.
        if self.sequence_parallel:
            rng_context = tensor_parallel.get_cuda_rng_tracker().fork()
        else:
            rng_context = nullcontext()

        # Forward layers.
        with rng_context:
            # The fp8_autocast context manager is a no-op when enabled=True
            # The if...else serves to short circuit name resolution for fp8_autocast
            with transformer_engine.pytorch.fp8_autocast(
                enabled=self.use_fp8,
                fp8_recipe=self.fp8_recipe,
                fp8_group=self.fp8_group
            ) if self.use_fp8 else nullcontext():
                # Determine if the current iteration is first microbatch
                if self.num_microbatches_in_previous_step != get_num_microbatches():
                    self.microbatch_count = 0 # Reset count on new batch size rampup interval
                self.num_microbatches_in_previous_step = get_num_microbatches()
                is_first_microbatch = self.microbatch_count % get_num_microbatches() == 0

                # Forward pass.
                if self.recompute_granularity == 'full':
                    hidden_states = self._checkpointed_forward(hidden_states,
                                                               attention_mask,
                                                               encoder_output,
                                                               enc_dec_attn_mask,
                                                               rotary_pos_emb,
                                                               is_first_microbatch)
                else:
                    forward_kwargs = {
                        'encoder_output': encoder_output,
                        'enc_dec_attn_mask': enc_dec_attn_mask,
                    }
                    if self.transformer_impl == 'local':
                        forward_kwargs['inference_context'] = inference_context
                    else:
                        forward_kwargs['inference_params'] = inference_context

                    if self.transformer_impl == 'transformer_engine':
                        forward_kwargs['is_first_microbatch'] = is_first_microbatch
                        forward_kwargs['checkpoint_core_attention'] = self.checkpoint_core_attention
                        if self.transformer_engine_v_0_10:
                            forward_kwargs['rotary_pos_emb'] = rotary_pos_emb
                    else:
                        forward_kwargs['rotary_pos_emb'] = rotary_pos_emb

                    for index in range(self.num_layers):
                        layer = self._get_layer(index)

                        hidden_states = layer(
                            hidden_states,
                            attention_mask,
                            **forward_kwargs)

                # Skip counter update for eval and activation checkpointing
                if torch.is_grad_enabled() and self.training:
                    self.microbatch_count += 1

        # Final layer norm.
        if self.post_process and self.post_norm:
            hidden_states = self.final_norm(hidden_states)

        return hidden_states

    def load_state_dict(self, state_dict, strict=True):
        """Customize load."""

        # Handle renaming layernorm -> norm in component names
        state_dict_ = {}
        for key in state_dict.keys():
            # Bypass TransformerEngine module parameters.
            if "layernorm_qkv" in key or "layernorm_mlp" in key:
                state_dict_[key] = state_dict[key]
                continue
            newkey = key.replace("layernorm", "norm")
            state_dict_[newkey] = state_dict[key]

        super().load_state_dict(state_dict_, strict)


================================================
FILE: megatron/legacy/model/utils.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

"""Utilities for models."""

import math

import torch

from megatron.training import get_args
from megatron.legacy.model import LayerNorm, RMSNorm
from megatron.core.jit import jit_fuser

def init_method_normal(sigma):
    """Init method based on N(0, sigma)."""
    def init_(tensor):
        return torch.nn.init.normal_(tensor, mean=0.0, std=sigma)

    return init_


def scaled_init_method_normal(sigma, num_layers):
    """Init method based on N(0, sigma/sqrt(2*num_layers)."""
    std = sigma / math.sqrt(2.0 * num_layers)

    def init_(tensor):
        return torch.nn.init.normal_(tensor, mean=0.0, std=std)

    return init_


def attention_mask_func(attention_scores, attention_mask):
    attention_scores.masked_fill_(attention_mask, -10000.0)
    return attention_scores


def get_linear_layer(rows, columns, init_method):
    """Simple linear layer with weight initialization."""
    layer = torch.nn.Linear(rows, columns)
    if get_args().perform_initialization:
        init_method(layer.weight)
    with torch.no_grad():
        layer.bias.zero_()
    return layer


@jit_fuser
def gelu_impl(x):
    """OpenAI's gelu implementation."""
    return 0.5 * x * (1.0 + torch.tanh(0.7978845608028654 * x *

                                       (1.0 + 0.044715 * x * x)))
def openai_gelu(x):
    return gelu_impl(x)


#This is actually Python equivalent of torch.nn.functional.gelu(), also with type hints for ONNX exporter
@jit_fuser
def erf_gelu(x):
    return x * 0.5 * (torch.erf(x / 1.41421).to(dtype=x.dtype)+torch.ones_like(x).to(dtype=x.dtype))


def get_norm(config):
    args = get_args()
    if args.normalization == "LayerNorm":
        return LayerNorm(
            config.hidden_size,
            eps=config.layernorm_epsilon,
            no_persist_layer_norm=not config.persist_layer_norm,
            sequence_parallel=config.sequence_parallel,
            apply_layernorm_1p=args.apply_layernorm_1p)
    elif args.normalization == "RMSNorm":
        if "apply_layernorm_1p" in args and args.apply_layernorm_1p:
            raise NotImplementedError('RMSNorm does not currently support the layernorm_1p formulation.')

        return RMSNorm(dim=config.hidden_size,
                       eps=config.layernorm_epsilon,
                       sequence_parallel=config.sequence_parallel)
    else:
        raise Exception(f"unsupported norm type '{args.normalization}'.")


================================================
FILE: megatron/legacy/model/vision/classification.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""Vision Transformer(VIT) model."""

import torch
from torch.nn.init import trunc_normal_
from megatron.training import get_args
from megatron.legacy.model.utils import get_linear_layer
from megatron.legacy.model.vision.vit_backbone import VitBackbone, VitMlpHead
from megatron.legacy.model.vision.mit_backbone import mit_b3_avg
from megatron.legacy.model.module import MegatronModule

class VitClassificationModel(MegatronModule):
    """Vision Transformer Model."""

    def __init__(self, config, num_classes, finetune=False,
                 pre_process=True, post_process=True):
        super(VitClassificationModel, self).__init__()
        args = get_args()
        self.config = config

        self.hidden_size = args.hidden_size
        self.num_classes = num_classes
        self.finetune = finetune
        self.pre_process = pre_process
        self.post_process = post_process
        self.backbone = VitBackbone(
            config=config,
            pre_process=self.pre_process,
            post_process=self.post_process,
            single_token_output=True
        )

        if self.post_process:
            if not self.finetune:
                self.head = VitMlpHead(config, self.hidden_size, self.num_classes)
            else:
                self.head = get_linear_layer(
                    self.hidden_size,
                    self.num_classes,
                    torch.nn.init.zeros_
                )

    def set_input_tensor(self, input_tensor):
        """See megatron.legacy.model.transformer.set_input_tensor()"""
        self.backbone.set_input_tensor(input_tensor)

    def forward(self, input):
        hidden_states = self.backbone(input)

        if self.post_process:
            hidden_states = self.head(hidden_states)

        return hidden_states


class MitClassificationModel(MegatronModule):
    """Mix vision Transformer Model."""

    def __init__(self, num_classes,
                 pre_process=True, post_process=True):
        super(MitClassificationModel, self).__init__()
        args = get_args()

        self.hidden_size = args.hidden_size
        self.num_classes = num_classes

        self.backbone = mit_b3_avg()
        self.head = torch.nn.Linear(512, num_classes)
        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, torch.nn.Linear):
            trunc_normal_(m.weight, std=.02)
            if isinstance(m, torch.nn.Linear) and m.bias is not None:
                torch.nn.init.constant_(m.bias, 0)

    def set_input_tensor(self, input_tensor):
        """See megatron.legacy.model.transformer.set_input_tensor()"""
        pass

    def forward(self, input):
        hidden_states = self.backbone(input)
        hidden_states = self.head(hidden_states)

        return hidden_states


================================================
FILE: megatron/legacy/model/vision/dino.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the Apache license found in the
# LICENSE file in the root directory of this source tree.

# copied from https://github.com/facebookresearch/dino/blob/main/main_dino.py
# reworked/refactored some parts to make it run in Megatron.
import math
import apex
import einops
import torch
import numpy as np
import torch.nn.functional as F
from torch.nn.init import trunc_normal_
from megatron.training import get_args, print_rank_0
from megatron.legacy.model.utils import get_linear_layer
from megatron.legacy.model.vision.vit_backbone import VitBackbone
from megatron.legacy.model.module import MegatronModule
from megatron.legacy.model.vision.mit_backbone import mit_b5_avg
from megatron.legacy.model.vision.esvit_swin_backbone import get_swin


class DINOLoss(torch.nn.Module):
    def __init__(self, out_dim, ncrops, warmup_teacher_temp, teacher_temp,
                 warmup_teacher_temp_epochs, nepochs, student_temp=0.1,
                 center_momentum=0.9):
        super().__init__()
        self.student_temp = student_temp
        self.center_momentum = center_momentum
        self.ncrops = ncrops
        self.register_buffer("center", torch.zeros(1, out_dim))
        # we apply a warm up for the teacher temperature because
        # a too high temperature makes the training instable at the beginning
        self.teacher_temp_schedule = np.concatenate((
            np.linspace(warmup_teacher_temp,
                        teacher_temp, warmup_teacher_temp_epochs),
            np.ones(nepochs - warmup_teacher_temp_epochs) * teacher_temp
        ))
        self.teacher_temp = teacher_temp

    def forward(self, student_output, teacher_output, iteration):
        """
        Cross-entropy between softmax outputs of the teacher
        and student network.
        """
        args = get_args()
        student_out = student_output / self.student_temp
        student_out = student_out.chunk(self.ncrops)

        epoch = iteration // args.iter_per_epoch

        # teacher centering and sharpening
        temp = self.teacher_temp_schedule[epoch]
        teacher_out = F.softmax((teacher_output - self.center) / temp, dim=-1)

        teacher_out = teacher_out.detach().chunk(2)

        total_loss = 0
        n_loss_terms = 0
        for iq, q in enumerate(teacher_out):
            for v in range(len(student_out)):
                if v == iq:
                    # we skip cases where student and teacher operate on the same view
                    continue
                loss = torch.sum(-q * F.log_softmax(student_out[v], dim=-1), dim=-1)
                total_loss += loss.mean()
                n_loss_terms += 1
        total_loss /= n_loss_terms
        self.update_center(teacher_output)
        return total_loss

    @torch.no_grad()
    def update_center(self, teacher_output):
        """
        Update center used for teacher output.
        """
        batch_center = torch.sum(teacher_output, dim=0, keepdim=True)
        torch.distributed.all_reduce(batch_center)
        batch_center = batch_center / (len(teacher_output) * torch.distributed.get_world_size())
        self.center = self.center * self.center_momentum + batch_center * (1 - self.center_momentum)

class DINOHead(torch.nn.Module):
    def __init__(self, in_dim, out_dim, norm_last_layer=True, nlayers=3):
        super().__init__()
        args = get_args()
        hidden_dim = args.dino_head_hidden_size
        bottleneck_dim = args.dino_bottleneck_size
        nlayers = max(nlayers, 1)
        if nlayers == 1:
            self.mlp = torch.nn.Linear(in_dim, bottleneck_dim)
        else:
            layers = [torch.nn.Linear(in_dim, hidden_dim)]
            layers.append(torch.nn.GELU())
            for _ in range(nlayers - 2):
                layers.append(torch.nn.Linear(hidden_dim, hidden_dim))
                layers.append(torch.nn.GELU())
            layers.append(torch.nn.Linear(hidden_dim, bottleneck_dim))
            self.mlp = torch.nn.Sequential(*layers)
        self.apply(self._init_weights)
        self.last_layer = torch.nn.utils.weight_norm(torch.nn.Linear(bottleneck_dim, out_dim, bias=False))
        self.last_layer.weight_g.data.fill_(1)
        if norm_last_layer:
            self.last_layer.weight_g.requires_grad = False

    def _init_weights(self, m):
        if isinstance(m, torch.nn.Linear):
            trunc_normal_(m.weight, std=.02)
            if isinstance(m, torch.nn.Linear) and m.bias is not None:
                torch.nn.init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.mlp(x)
        x = torch.nn.functional.normalize(x, dim=-1, p=2)
        x = self.last_layer(x)
        return x


class MultiCropWrapper(MegatronModule):

    """
    Perform forward pass separately on each resolution input.
    The inputs corresponding to a single resolution are clubbed and single
    forward is run on the same resolution inputs. Hence we do several
    forward passes = number of different resolutions used. We then
    concatenate all the output features and run the head forward on these
    concatenated features.
    """
    def __init__(self, backbone, head):
        super(MultiCropWrapper, self).__init__()
        # disable layers dedicated to ImageNet labels classification
        #backbone.fc, backbone.head = torch.nn.Identity(), torch.nn.Identity()
        self.backbone = backbone
        self.head = head

    def forward(self, x):
        # convert to list
        if not isinstance(x, list):
            x = [x]
        idx_crops = torch.cumsum(torch.unique_consecutive(
            torch.tensor([inp.shape[-1] for inp in x]),
            return_counts=True,
        )[1], 0)

        start_idx = 0
        for end_idx in idx_crops:
            _out = self.backbone(torch.cat(x[start_idx: end_idx]))
            if start_idx == 0:
                output = _out
            else:
                output = torch.cat((output, _out))
            start_idx = end_idx
        # Run the head forward on the concatenated features.
        if self.training:
            return self.head(output)
        else:
            return output


def cosine_scheduler(base_value, final_value, epochs, niter_per_ep,
                     warmup_epochs=0, start_warmup_value=0):
    warmup_schedule = np.array([])
    warmup_iters = warmup_epochs * niter_per_ep
    if warmup_epochs > 0:
        warmup_schedule = \
                np.linspace(start_warmup_value, base_value, warmup_iters)

    iters = np.arange(epochs * niter_per_ep - warmup_iters)
    schedule = final_value + 0.5 * (base_value - final_value) \
        * (1 + np.cos(np.pi * iters / len(iters)))

    schedule = np.concatenate((warmup_schedule, schedule))
    assert len(schedule) == epochs * niter_per_ep
    return schedule


def get_student_backbone_and_num_features(config, pre_process=True, post_process=True):
    args = get_args()

    if args.vision_backbone_type == 'vit':
        student = VitBackbone(config,
                              pre_process=pre_process,
                              post_process=post_process,
                              drop_path_rate=0.1,
                              single_token_output=True)
        num_features = args.hidden_size
    elif args.vision_backbone_type == 'mit':
        student = mit_b5_avg(drop_path_rate=0.1)
        num_features = 512
    elif args.vision_backbone_type == 'swin':
        student = get_swin()
        num_features = student.num_features
    else:
        raise Exception('{} vision backbone is not supported.'.format(
                              args.vision_backbone_type))

    return student, num_features

def get_teacher_backbone_and_num_features(config, pre_process=True, post_process=True):
    args = get_args()

    if args.vision_backbone_type == 'vit':
        teacher = VitBackbone(config,
                              pre_process=pre_process,
                              post_process=post_process,
                              single_token_output=True)
        num_features = args.hidden_size
    elif args.vision_backbone_type == 'mit':
        teacher = mit_b5_avg(drop_path_rate=0.0)
        num_features = 512
    elif args.vision_backbone_type == 'swin':
        teacher = get_swin(is_teacher=True)
        num_features = teacher.num_features
    else:
        raise Exception('{} vision backbone is not supported.'.format(
                              args.vision_backbone_type))
    return teacher, num_features


class DINOPretrainModel(MegatronModule):
    def __init__(self, config, pre_process=True, post_process=True):
        super(DINOPretrainModel, self).__init__()
        args = get_args()
        self.config = config
        self.out_dim = 65536

        self.dino_loss = DINOLoss(
            self.out_dim,
            args.dino_local_crops_number + 2,
            args.dino_warmup_teacher_temp,
            args.dino_teacher_temp,
            args.dino_warmup_teacher_temp_epochs,
            300,
        )

        self.pre_process = pre_process
        self.post_process = post_process
        self.momentum_teacher = 0.996

        student_backbone, num_features = \
            get_student_backbone_and_num_features(config, pre_process, post_process)

        self.student = MultiCropWrapper(
            student_backbone,
            DINOHead(num_features, self.out_dim,
                     norm_last_layer=args.dino_norm_last_layer)
        )

        self.momentum_schedule = cosine_scheduler(
            self.momentum_teacher, 1,
            args.train_iters // args.iter_per_epoch,
            args.iter_per_epoch
        )

        teacher_backbone, num_features = \
            get_teacher_backbone_and_num_features(config, pre_process, post_process)
        self.teacher = MultiCropWrapper(
            teacher_backbone,
            DINOHead(num_features, self.out_dim)
        )
        self.teacher.load_state_dict(self.student.state_dict())

        for p in self.teacher.parameters():
            if hasattr(p, "requires_grad") and p.requires_grad is not None:
                p.requires_grad = False

    def set_input_tensor(self, tensor):
        pass

    def forward(self, input):
        student_output = None
        if self.training:
            student_output = self.student(input)
            teacher_output = self.teacher(input[:2])
        else:
            teacher_output = self.teacher(input)
        return student_output, teacher_output

    def cancel_gradients_last_layer(self, iteration):
        args = get_args()
        epoch = iteration // args.iter_per_epoch
        if epoch < args.dino_freeze_last_layer:
            for n, p in self.student.named_parameters():
                if "last_layer" in n:
                    p.grad = None

    def update_momentum(self, iteration):
        with torch.no_grad():
            m = self.momentum_schedule[iteration]
            for param_q, param_k in zip(self.student.parameters(), self.teacher.parameters()):
                param_k.data.mul_(m).add_((1 - m) * param_q.detach().data)


================================================
FILE: megatron/legacy/model/vision/esvit_swin_backbone.py
================================================
# Copyright (c) 2021 Microsoft
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# --------------------------------------------------------
# Modified by Chunyuan Li (chunyl@microsoft.com)
# Swin Transformer
# --------------------------------------------------------

import os
import logging
import torch
import torch.nn as nn
import torch.nn.functional as F
from functools import partial
import torch.distributed as dist
from torch.nn.init import trunc_normal_
from megatron.legacy.model.transformer import DropPath
from megatron.training import get_args
from megatron.legacy.model import LayerNorm
import numpy as np
from math import sqrt


class Mlp(nn.Module):
    def __init__(self, in_features, hidden_features=None,
                 out_features=None, act_layer=nn.GELU, drop=0.):
        super(Mlp, self).__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = act_layer()
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.drop = nn.Dropout(drop)

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.drop(x)
        return x


def window_partition(x, window_size):
    """
    Args:
        x: (B, H, W, C)
        window_size (int): window size
    Returns:
        windows: (num_windows*B, window_size, window_size, C)
    """
    B, H, W, C = x.shape
    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
    return windows


def window_reverse(windows, window_size, H, W):
    """
    Args:
        windows: (num_windows*B, window_size, window_size, C)
        window_size (int): Window size
        H (int): Height of image
        W (int): Width of image
    Returns:
        x: (B, H, W, C)
    """
    B = int(windows.shape[0] / (H * W / window_size / window_size))
    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
    return x


class WindowAttention(nn.Module):
    r"""Window based multi-head self attention (W-MSA) module with relative position bias.
    It supports both of shifted and non-shifted window.
    Args:
        dim (int): Number of input channels.
        window_size (tuple[int]): The height and width of the window.
        num_heads (int): Number of attention heads.
        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
    """

    def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):

        super(WindowAttention, self).__init__()
        self.dim = dim
        self.window_size = window_size  # Wh, Ww
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = qk_scale or head_dim ** -0.5

        # define a parameter table of relative position bias
        self.relative_position_bias_table = nn.Parameter(
            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH

        # get pair-wise relative position index for each token inside the window
        coords_h = torch.arange(self.window_size[0])
        coords_w = torch.arange(self.window_size[1])
        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
        coords_flatten = torch.flatten(coords, 1)  # 2 Wh*Ww
        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
        relative_coords[:, :, 1] += self.window_size[1] - 1
        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
        self.register_buffer("relative_position_index", relative_position_index)

        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)

        trunc_normal_(self.relative_position_bias_table, std=.02)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x, mask=None):
        """
        Args:
            x: input features with shape of (num_windows*B, N, C)
            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
        """
        B_, N, C = x.shape
        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)

        q = q * self.scale
        attn = (q @ k.transpose(-2, -1))

        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
        attn = attn + relative_position_bias.unsqueeze(0)

        if mask is not None:
            nW = mask.shape[0]
            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0).type(attn.type())
            attn = attn.view(-1, self.num_heads, N, N)
            attn = self.softmax(attn)
        else:
            attn = self.softmax(attn)

        attn_out = attn
        attn = self.attn_drop(attn)

        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
        x = self.proj(x)
        x = self.proj_drop(x)
        return x, attn_out

    def extra_repr(self) -> str:
        return f'dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}'

    def flops(self, N):
        # calculate flops for 1 window with token length of N
        flops = 0
        # qkv = self.qkv(x)
        flops += N * self.dim * 3 * self.dim
        # attn = (q @ k.transpose(-2, -1))
        flops += self.num_heads * N * (self.dim // self.num_heads) * N
        #  x = (attn @ v)
        flops += self.num_heads * N * N * (self.dim // self.num_heads)
        # x = self.proj(x)
        flops += N * self.dim * self.dim
        return flops

    @staticmethod
    def compute_macs(module, input, output):
        B, N, C = input[0].shape

        module.__flops__ += module.flops(N) * B


class SwinTransformerBlock(nn.Module):
    r"""Swin Transformer Block.
    Args:
        dim (int): Number of input channels.
        input_resolution (tuple[int]): Input resulotion.
        num_heads (int): Number of attention heads.
        window_size (int): Window size.
        shift_size (int): Shift size for SW-MSA.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
        drop (float, optional): Dropout rate. Default: 0.0
        attn_drop (float, optional): Attention dropout rate. Default: 0.0
        drop_path (float, optional): Stochastic depth rate. Default: 0.0
        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
    """

    def __init__(self, dim, input_resolution, num_heads, window_size=7, shift_size=0,
                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
                 act_layer=nn.GELU, norm_layer=nn.LayerNorm):
        super().__init__()
        self.dim = dim
        self.input_resolution = input_resolution
        self.num_heads = num_heads
        self.window_size = window_size
        self.shift_size = shift_size
        self.mlp_ratio = mlp_ratio
        if min(self.input_resolution) <= self.window_size:
            # if window size is larger than input resolution, we don't partition windows
            self.shift_size = 0
            self.window_size = min(self.input_resolution)
        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"

        self.norm1 = norm_layer(dim)
        self.attn = WindowAttention(
            dim, window_size=(self.window_size, self.window_size), num_heads=num_heads,
            qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)

        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
        self.norm2 = norm_layer(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)

        self.H = input_resolution[0]
        self.W = input_resolution[1]

        self.attn_mask_dict = {}


    def create_attn_mask(self, H, W):
        # calculate attention mask for SW-MSA

        Hp = int(np.ceil(H / self.window_size)) * self.window_size
        Wp = int(np.ceil(W / self.window_size)) * self.window_size
        img_mask = torch.zeros((1, Hp, Wp, 1))  # 1 Hp Wp 1
        h_slices = (slice(0, -self.window_size),
                    slice(-self.window_size, -self.shift_size),
                    slice(-self.shift_size, None))
        w_slices = (slice(0, -self.window_size),
                    slice(-self.window_size, -self.shift_size),
                    slice(-self.shift_size, None))
        cnt = 0
        for h in h_slices:
            for w in w_slices:
                img_mask[:, h, w, :] = cnt
                cnt += 1

        mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))

        return attn_mask


    def forward(self, x):
        B, L, C = x.shape
        H = int(sqrt(L))
        W = H

        shortcut = x
        x = self.norm1(x)
        x = x.view(B, H, W, C)

        # pad feature maps to multiples of window size
        pad_l = pad_t = 0
        pad_r = (self.window_size - W % self.window_size) % self.window_size
        pad_b = (self.window_size - H % self.window_size) % self.window_size
        x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
        _, Hp, Wp, _ = x.shape

        # cyclic shift
        if self.shift_size > 0:
            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))

            if H in self.attn_mask_dict.keys():
                attn_mask = self.attn_mask_dict[H]
            else:
                self.attn_mask_dict[H] = self.create_attn_mask(self.H, self.W).to(x.device)
                attn_mask = self.attn_mask_dict[H]

        else:
            shifted_x = x
            attn_mask = None

        # partition windows
        x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # nW*B, window_size*window_size, C

        # W-MSA/SW-MSA
        attn_windows, attn = self.attn(x_windows, attn_mask)  # nW*B, window_size*window_size, C

        # merge windows
        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
        shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp)  # B H' W' C

        # reverse cyclic shift
        if self.shift_size > 0:
            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
        else:
            x = shifted_x

        if pad_r > 0 or pad_b > 0:
            x = x[:, :H, :W, :].contiguous()

        x = x.view(B, H * W, C)

        # FFN
        x = shortcut + self.drop_path(x)
        x = x + self.drop_path(self.mlp(self.norm2(x)))

        return x, attn

    def extra_repr(self) -> str:
        return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \
               f"window_size={self.window_size}, shift_size={self.shift_size} mlp_ratio={self.mlp_ratio}"

    def flops(self):
        flops = 0
        H, W = self.input_resolution
        # norm1
        flops += self.dim * H * W
        # W-MSA/SW-MSA
        nW = H * W / self.window_size / self.window_size
        flops += nW * self.attn.flops(self.window_size * self.window_size)
        # mlp
        flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio
        # norm2
        flops += self.dim * H * W
        return flops


class PatchMerging(nn.Module):
    r"""Patch Merging Layer.
    Args:
        input_resolution (tuple[int]): Resolution of input feature.
        dim (int): Number of input channels.
        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
    """

    def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
        super().__init__()
        self.input_resolution = input_resolution
        self.dim = dim
        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
        self.norm = norm_layer(4 * dim)

    def forward(self, x):
        """ Forward function.
        Args:
            x: Input feature, tensor size (B, H*W, C).
            H, W: Spatial resolution of the input feature.
        """
        B, L, C = x.shape
        H = int(sqrt(L))
        W = H

        x = x.view(B, H, W, C)

        # padding
        pad_input = (H % 2 == 1) or (W % 2 == 1)
        if pad_input:
            x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2))

        x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
        x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
        x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
        x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
        x = torch.cat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
        x = x.view(B, -1, 4 * C)  # B H/2*W/2 4*C

        x = self.norm(x)
        x = self.reduction(x)

        return x


    def extra_repr(self) -> str:
        return f"input_resolution={self.input_resolution}, dim={self.dim}"

    def flops(self):
        H, W = self.input_resolution
        flops = H * W * self.dim
        flops += (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim
        return flops


class BasicLayer(nn.Module):
    """A basic Swin Transformer layer for one stage.
    Args:
        dim (int): Number of input channels.
        input_resolution (tuple[int]): Input resulotion.
        depth (int): Number of blocks.
        num_heads (int): Number of attention heads.
        window_size (int): Window size.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
        drop (float, optional): Dropout rate. Default: 0.0
        attn_drop (float, optional): Attention dropout rate. Default: 0.0
        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
    """

    def __init__(self, dim, input_resolution, depth, num_heads, window_size,
                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
                 drop_path=0., norm_layer=nn.LayerNorm, downsample=None):

        super().__init__()
        self.dim = dim
        self.input_resolution = input_resolution
        self.depth = depth

        self.blocks = nn.ModuleList([
            SwinTransformerBlock(dim=dim, input_resolution=input_resolution,
                                 num_heads=num_heads, window_size=window_size,
                                 shift_size=0 if (i % 2 == 0) else window_size // 2,
                                 mlp_ratio=mlp_ratio,
                                 qkv_bias=qkv_bias, qk_scale=qk_scale,
                                 drop=drop, attn_drop=attn_drop,
                                 drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
                                 norm_layer=norm_layer)
            for i in range(depth)])
        if downsample is not None:
            self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer)
        else:
            self.downsample = None

    def forward(self, x):
        for blk in self.blocks:
            x, _ = blk(x)
        if self.downsample is not None:
            x = self.downsample(x)
        return x

    def forward_with_features(self, x):
        fea = []
        for blk in self.blocks:
            x, _ = blk(x)
            fea.append(x)
        if self.downsample is not None:
            x = self.downsample(x)
        return x, fea

    def forward_with_attention(self, x):
        attns = []
        for blk in self.blocks:
            x, attn = blk(x)
            attns.append(attn)
        if self.downsample is not None:
            x = self.downsample(x)
        return x, attns


    def extra_repr(self) -> str:
        return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}"

    def flops(self):
        flops = 0
        for blk in self.blocks:
            flops += blk.flops()
        if self.downsample is not None:
            flops += self.downsample.flops()
        return flops


class PatchEmbed(nn.Module):
    """ Image to Patch Embedding
    """

    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, norm_layer=None):
        super().__init__()
        img_size = (img_size, img_size)
        patch_size = (patch_size, patch_size)
        patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
        self.img_size = img_size
        self.patch_size = patch_size
        self.patches_resolution = patches_resolution
        self.num_patches = patches_resolution[0] * patches_resolution[1]

        self.in_chans = in_chans
        self.embed_dim = embed_dim

        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
        if norm_layer is not None:
            self.norm = norm_layer(embed_dim)
        else:
            self.norm = None

    def forward(self, x):
        B, C, H, W = x.shape

        x = self.proj(x).flatten(2).transpose(1, 2)  # B Ph*Pw C
        if self.norm is not None:
            x = self.norm(x)
        return x


    def flops(self):
        Ho, Wo = self.patches_resolution
        flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1])
        if self.norm is not None:
            flops += Ho * Wo * self.embed_dim
        return flops

class SwinTransformer(nn.Module):
    r""" Swin Transformer
        A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -
          https://arxiv.org/pdf/2103.14030
    Args:
        img_size (int | tuple(int)): Input image size.
        patch_size (int | tuple(int)): Patch size.
        in_chans (int): Number of input channels.
        num_classes (int): Number of classes for classification head.
        embed_dim (int): Embedding dimension.
        depths (tuple(int)): Depth of Swin Transformer layers.
        num_heads (tuple(int)): Number of attention heads in different layers.
        window_size (int): Window size.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: Truee
        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set.
        drop_rate (float): Dropout rate.
        attn_drop_rate (float): Attention dropout rate.
        drop_path_rate (float): Stochastic depth rate.
        norm_layer (nn.Module): normalization layer.
        ape (bool): If True, add absolute position embedding to the patch embedding.
        patch_norm (bool): If True, add normalization after patch embedding.
    """

    def __init__(self, img_size=224, patch_size=4, in_chans=3, num_classes=1000,
                 embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24],
                 window_size=7, mlp_ratio=4., qkv_bias=True, qk_scale=None,
                 drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,
                 norm_layer=nn.LayerNorm, ape=False, patch_norm=True, **kwargs):
        super().__init__()

        self.num_classes = num_classes
        self.num_layers = len(depths)
        self.embed_dim = embed_dim
        self.ape = ape
        self.patch_norm = patch_norm
        self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))
        self.mlp_ratio = mlp_ratio

        self.patch_embed = PatchEmbed(
            img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim,
            norm_layer=norm_layer if self.patch_norm else None)
        num_patches = self.patch_embed.num_patches
        patches_resolution = self.patch_embed.patches_resolution
        self.patches_resolution = patches_resolution

        if self.ape:
            self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
            trunc_normal_(self.absolute_pos_embed, std=.02)

        self.pos_drop = nn.Dropout(p=drop_rate)

        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
        self.layers = nn.ModuleList()
        for i_layer in range(self.num_layers):
            layer = BasicLayer(dim=int(embed_dim * 2 ** i_layer),
                               input_resolution=(patches_resolution[0] // (2 ** i_layer),
                                                 patches_resolution[1] // (2 ** i_layer)),
                               depth=depths[i_layer],
                               num_heads=num_heads[i_layer],
                               window_size=window_size,
                               mlp_ratio=self.mlp_ratio,
                               qkv_bias=qkv_bias, qk_scale=qk_scale,
                               drop=drop_rate, attn_drop=attn_drop_rate,
                               drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
                               norm_layer=norm_layer,
                               downsample=PatchMerging if (i_layer < self.num_layers - 1) else None)
            self.layers.append(layer)

        self.norm = norm_layer(self.num_features)
        self.avgpool = nn.AdaptiveAvgPool1d(1)

        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            trunc_normal_(m.weight, std=.02)
            if isinstance(m, nn.Linear) and m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)

    @torch.jit.ignore
    def no_weight_decay(self):
        return {'absolute_pos_embed'}

    @torch.jit.ignore
    def no_weight_decay_keywords(self):
        # todo: to be implemented
        return {'relative_position_bias_table'}

    def forward(self, x):
        x = self.patch_embed(x)
        if self.ape:
            x = x + self.absolute_pos_embed
        x = self.pos_drop(x)

        for layer in self.layers:
            x = layer(x)

        x_region = self.norm(x)  # B L C
        x = self.avgpool(x_region.transpose(1, 2))  # B C 1
        x = torch.flatten(x, 1)

        return x


    def forward_feature_maps(self, x):
        x = self.patch_embed(x)
        if self.ape:
            x = x + self.absolute_pos_embed
        x = self.pos_drop(x)

        for layer in self.layers:
            x = layer(x)

        x_grid = self.norm(x)  # B L C
        x = self.avgpool(x_grid.transpose(1, 2))  # B C 1
        x = torch.flatten(x, 1)

        return x, x_grid


    def forward_selfattention(self, x, n=1):
        # n=1 return the last layer attn map; otherwise return attn maps in all layers

        
        x = self.patch_embed(x)
        if self.ape:
            x = x + self.absolute_pos_embed
        x = self.pos_drop(x)

        if n==1:
            return self.forward_last_selfattention(x)
        else:
            return self.forward_all_selfattention(x)

    def forward_last_selfattention(self, x):

        for i, layer in enumerate(self.layers):
            if i < len(self.layers) - 1:
                x = layer(x)
            else:
                x, attns = layer.forward_with_attention(x)
                return attns[-1]

    def forward_all_selfattention(self, x):
        attn_out = []

        for layer in self.layers:
            x, attns = layer.forward_with_attention(x)
            attn_out += attns

        return attn_out


    def forward_return_n_last_blocks(self, x, n=1, return_patch_avgpool=False, depth=[]):

        num_blks = sum(depth)
        start_idx = num_blks - n

        sum_cur = 0
        for i, d in enumerate(depth):
            sum_cur_new = sum_cur + d
            if start_idx >= sum_cur and start_idx < sum_cur_new:
                start_stage = i
                start_blk = start_idx - sum_cur
            sum_cur = sum_cur_new


        x = self.patch_embed(x)
        if self.ape:
            x = x + self.absolute_pos_embed
        x = self.pos_drop(x)

        # we will return the averaged token features from the `n` last blocks
        # note: there is no [CLS] token in Swin Transformer
        output = []
        s = 0
        for i, layer in enumerate(self.layers):
            x, fea = layer.forward_with_features(x)

            if i >= start_stage:
                for x_ in fea[start_blk:]:

                    if i == len(self.layers)-1: # use the norm in the last stage
                        x_ = self.norm(x_)

                    x_avg = torch.flatten(self.avgpool(x_.transpose(1, 2)), 1)  # B C     
                    # print(f'Stage {i},  x_avg {x_avg.shape}')          
                    output.append(x_avg)

                start_blk = 0

        return torch.cat(output, dim=-1)


    def flops(self):
        flops = 0
        flops += self.patch_embed.flops()
        for i, layer in enumerate(self.layers):
            flops += layer.flops()
            if dist.get_rank() == 0:
                print(f"GFLOPs layer_{i}: {layer.flops() / 1e9}")
        flops += self.num_features * self.patches_resolution[0] * self.patches_resolution[1] // (2 ** self.num_layers)
        flops += self.num_features * self.num_classes
        return flops

    def init_weights(self, pretrained='', pretrained_layers=[], verbose=True):
        if os.path.isfile(pretrained):
            pretrained_dict = torch.load(pretrained, map_location='cpu')
            logging.info(f'=> loading pretrained model {pretrained}')
            model_dict = self.state_dict()
            pretrained_dict = {
                k: v for k, v in pretrained_dict.items()
                if k in model_dict.keys()
            }
            need_init_state_dict = {}
            for k, v in pretrained_dict.items():
                need_init = (
                        k.split('.')[0] in pretrained_layers
                        or pretrained_layers[0] == '*'
                        or 'relative_position_index' not in k
                        or 'attn_mask' not in k
                )

                if need_init:
                    if verbose:
                        logging.info(f'=> init {k} from {pretrained}')

                    if 'relative_position_bias_table' in k and v.size() != model_dict[k].size():
                        relative_position_bias_table_pretrained = v
                        relative_position_bias_table_current = model_dict[k]
                        L1, nH1 = relative_position_bias_table_pretrained.size()
                        L2, nH2 = relative_position_bias_table_current.size()
                        if nH1 != nH2:
                            logging.info(f"Error in loading {k}, passing")
                        else:
                            if L1 != L2:
                                logging.info(
                                    '=> load_pretrained: resized variant: {} to {}'
                                        .format((L1, nH1), (L2, nH2))
                                )
                                S1 = int(L1 ** 0.5)
                                S2 = int(L2 ** 0.5)
                                relative_position_bias_table_pretrained_resized = torch.nn.functional.interpolate(
                                    relative_position_bias_table_pretrained.permute(1, 0).view(1, nH1, S1, S1),
                                    size=(S2, S2),
                                    mode='bicubic')
                                v = relative_position_bias_table_pretrained_resized.view(nH2, L2).permute(1, 0)

                    if 'absolute_pos_embed' in k and v.size() != model_dict[k].size():
                        absolute_pos_embed_pretrained = v
                        absolute_pos_embed_current = model_dict[k]
                        _, L1, C1 = absolute_pos_embed_pretrained.size()
                        _, L2, C2 = absolute_pos_embed_current.size()
                        if C1 != C1:
                            logging.info(f"Error in loading {k}, passing")
                        else:
                            if L1 != L2:
                                logging.info(
                                    '=> load_pretrained: resized variant: {} to {}'
                                        .format((1, L1, C1), (1, L2, C2))
                                )
                                S1 = int(L1 ** 0.5)
                                S2 = int(L2 ** 0.5)
                                absolute_pos_embed_pretrained = absolute_pos_embed_pretrained.reshape(-1, S1, S1, C1)
                                absolute_pos_embed_pretrained = absolute_pos_embed_pretrained.permute(0, 3, 1, 2)
                                absolute_pos_embed_pretrained_resized = torch.nn.functional.interpolate(
                                    absolute_pos_embed_pretrained, size=(S2, S2), mode='bicubic')
                                v = absolute_pos_embed_pretrained_resized.permute(0, 2, 3, 1).flatten(1, 2)

                    need_init_state_dict[k] = v
            self.load_state_dict(need_init_state_dict, strict=False)

    def freeze_pretrained_layers(self, frozen_layers=[]):
        for name, module in self.named_modules():
            if (
                    name.split('.')[0] in frozen_layers
                    or '.'.join(name.split('.')[0:2]) in frozen_layers
                    or (len(frozen_layers) > 0 and frozen_layers[0] == '*')
            ):
                for _name, param in module.named_parameters():
                    param.requires_grad = False
                logging.info(
                    '=> set param {} requires grad to False'
                        .format(name)
                )
        for name, param in self.named_parameters():
            if (
                    name.split('.')[0] in frozen_layers
                    or (len(frozen_layers) > 0 and frozen_layers[0] == '*')
                    and param.requires_grad is True
            ):
                param.requires_grad = False
                logging.info(
                    '=> set param {} requires grad to False'
                        .format(name)
                )
        return self


def get_swin(is_teacher=False):
    args = get_args()

    if args.swin_backbone_type == "tiny":
        embed_dim = 96
        depths = [2, 2, 6, 2]
        num_heads = [3, 6, 12, 24]
        drop_path_rate = 0.1
    elif args.swin_backbone_type == 'h3':
        embed_dim = 384
        depths = [2, 2, 18, 2]
        num_heads = [6, 12, 24, 48]
        drop_path_rate = 0.2
    else:
        embed_dim = 128
        depths = [2, 2, 18, 2]
        num_heads = [4, 8, 16, 32]
        drop_path_rate = 0.2

    swin = SwinTransformer(
        img_size=224,
        in_chans=3,
        num_classes=1000,
        patch_size=4,
        embed_dim=embed_dim,
        depths=depths,
        num_heads=num_heads,
        window_size=7,
        mlp_ratio=4,
        qkv_bias=True,
        drop_rate=0,
        attn_drop_rate=0,
        drop_path_rate=(0.0 if is_teacher else drop_path_rate),
        norm_layer=partial(LayerNorm, eps=1e-6),
        ape=False,
        patch_norm=True,
    )

    return swin


================================================
FILE: megatron/legacy/model/vision/inpainting.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
#
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.

import math
import apex
import einops
import torch
import torch.nn.functional as F
from megatron.training import get_args, print_rank_0
from megatron.legacy.model.utils import get_linear_layer
from megatron.legacy.model.vision.vit_backbone import VitBackbone
from megatron.legacy.model.module import MegatronModule
from megatron.legacy.model.vision.mit_backbone import mit_b3
from megatron.legacy.model.vision.utils import resize


class VitInpaintingModel(MegatronModule):

    def __init__(self, config, pre_process=True, post_process=True):
        super(VitInpaintingModel, self).__init__()
        args = get_args()

        self.config = config
        self.pre_process = pre_process
        self.post_process = post_process
        self.hidden_size = config.hidden_size
        self.backbone = VitBackbone(
            config=config,
            pre_process=self.pre_process,
            post_process=self.post_process,
            class_token=False,
        )
        self.patch_dim = args.patch_dim
        self.img_h = args.img_h
        self.img_w = args.img_w
        self.seq_length = args.seq_length
        # full mask

        if self.post_process:
            self.linear_decoder = get_linear_layer(
                self.hidden_size,
                self.backbone.flatten_dim,
                torch.nn.init.zeros_
            )

    def set_input_tensor(self, input_tensor):
        self.backbone.set_input_tensor(input_tensor)

    def forward(self, input):

        hidden_states = self.backbone(input)

        if not self.post_process:
            return hidden_states
        decoded_output = self.linear_decoder(hidden_states)
        output = einops.rearrange(
                decoded_output,
                "b (h w) (p1 p2 c) -> b c (h p1) (w p2)",
                p1=self.patch_dim,
                p2=self.patch_dim,
                h=self.img_h//self.patch_dim,
                w=self.img_w//self.patch_dim,
            )

        return output


class MLP(torch.nn.Module):
    """
    Linear Embedding
    """
    def __init__(self, input_dim=2048, embed_dim=768):
        super().__init__()
        self.proj = torch.nn.Linear(input_dim, embed_dim)

    def forward(self, x):
        x = x.flatten(2).transpose(1, 2)
        x = self.proj(x)
        return x


class MitInpaintingModel(MegatronModule):
    """Mix vision Transformer Model."""

    def __init__(self, pre_process=True, post_process=True):
        super(MitInpaintingModel, self).__init__()
        self.pre_process = pre_process
        self.post_process = post_process

        args = get_args()
        self.patch_dim = args.patch_dim
        self.img_h = args.img_h
        self.img_w = args.img_w
        self.flatten_dim = self.patch_dim * self.patch_dim * 3
        self.backbone = mit_b3()

        self.in_channels = [64, 128, 320, 512]
        self.embedding_dim = 768

        c1_in_channels, c2_in_channels, c3_in_channels, c4_in_channels = self.in_channels

        self.linear_c4 = MLP(input_dim=c4_in_channels, embed_dim=self.embedding_dim)
        self.linear_c3 = MLP(input_dim=c3_in_channels, embed_dim=self.embedding_dim)
        self.linear_c2 = MLP(input_dim=c2_in_channels, embed_dim=self.embedding_dim)
        self.linear_c1 = MLP(input_dim=c1_in_channels, embed_dim=self.embedding_dim)

        self.conv_fuse = torch.nn.Conv2d(self.embedding_dim*4, self.embedding_dim, 1, 1, bias=False)
        self.norm = apex.parallel.SyncBatchNorm(self.embedding_dim)
        self.dropout = torch.nn.Dropout2d(0.1)

        self.linear_pred = torch.nn.Conv2d(self.embedding_dim, self.flatten_dim, kernel_size=1)

    def set_input_tensor(self, input_tensor):
        """See megatron.legacy.model.transformer.set_input_tensor()"""
        pass

    def forward(self, input):
        c1, c2, c3, c4 = self.backbone(input)

        n, _, h, w = c4.shape
        _c4 = self.linear_c4(c4).permute(0, 2, 1).reshape(n, -1, c4.shape[2], c4.shape[3])
        _c4 = resize(_c4, size=c1.size()[2:], mode='bilinear', align_corners=False)

        _c3 = self.linear_c3(c3).permute(0, 2, 1).reshape(n, -1, c3.shape[2], c3.shape[3])
        _c3 = resize(_c3, size=c1.size()[2:], mode='bilinear', align_corners=False)

        _c2 = self.linear_c2(c2).permute(0, 2, 1).reshape(n, -1, c2.shape[2], c2.shape[3])
        _c2 = resize(_c2, size=c1.size()[2:], mode='bilinear', align_corners=False)

        _c1 = self.linear_c1(c1).permute(0, 2, 1).reshape(n, -1, c1.shape[2], c1.shape[3])

        _c = torch.cat([_c4, _c3, _c2, _c1], dim=1)
        _c = self.conv_fuse(_c)

        x = self.norm(_c)
        x = F.relu(x, inplace=True)
        x = self.dropout(x)

        x = self.linear_pred(x)

        output = einops.rearrange(
            x,
            "b (c p1 p2) h w -> b c (h p1) (w p2)",
            p1=self.patch_dim,
            p2=self.patch_dim,
            h=self.img_h//self.patch_dim,
            w=self.img_w//self.patch_dim,
        )

        return output


================================================
FILE: megatron/legacy/model/vision/knn_monitor.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import torch.nn.functional as F
import torch
from megatron.training import print_rank_0, get_args
from megatron.core import mpu
from megatron.legacy.data.vit_dataset import ClassificationTransform
from megatron.legacy.data.image_folder import ImageFolder

_FEATURE_BANK = None


def build_data_loader(dataset, drop_last=True, shuffle=False):
    """Data loader. Note that batch-size is the local (per GPU) batch-size."""
    # Sampler.
    args = get_args()
    micro_batch_size = 16
    num_workers = args.num_workers
    world_size = mpu.get_data_parallel_world_size()
    rank = mpu.get_data_parallel_rank()
    sampler = torch.utils.data.distributed.DistributedSampler(
        dataset, num_replicas=world_size, rank=rank,
        drop_last=drop_last, shuffle=shuffle
    )

    # Data loader. Note that batch size is the per GPU batch size.
    data_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=micro_batch_size,
        sampler=sampler,
        shuffle=False,
        num_workers=num_workers,
        drop_last=not drop_last,
        pin_memory=True,
    )
    return data_loader


def compute_feature_bank(model):
    args = get_args()
    global _FEATURE_BANK
    feature_bank = []
    feature_label = []

    train_ds = ImageFolder(
        root=args.data_path[0],
        transform=ClassificationTransform((args.img_h, args.img_w), train=False),
        data_per_class_fraction=1.0
    )
    classes = len(train_ds.classes)
    dataloader = build_data_loader(train_ds)
     
    for m in model:
        m.eval()

    with torch.no_grad():
        for i, batch in enumerate(dataloader):
            images = batch[0].cuda().contiguous()
            labels = batch[1].cuda().contiguous()
            student_feature, teacher_feature = model[0](images)
            feature = F.normalize(teacher_feature.float(), dim=1)
            feature_bank.append(feature)
            feature_label.append(labels)
    
    for m in model:
        m.train()

    # [N', D]
    feature_bank = torch.cat(feature_bank, dim=0).contiguous()
    feature_label = torch.cat(feature_label, dim=0).contiguous()

    feature_banks = [torch.zeros_like(feature_bank)
                     for i in range(mpu.get_data_parallel_world_size())]
    torch.distributed.all_gather(feature_banks,
                                 feature_bank,
                                 group=mpu.get_data_parallel_group())

    assert torch.all(torch.eq(feature_banks[mpu.get_data_parallel_rank()],
                              feature_bank))

    feature_labels = [torch.zeros_like(feature_label)
                      for i in range(mpu.get_data_parallel_world_size())]
    torch.distributed.all_gather(feature_labels,
                                 feature_label,
                                 group=mpu.get_data_parallel_group())

    # [D, N]
    feature_banks = torch.cat(feature_banks, dim=0).t().contiguous()
    # [N]
    feature_labels = torch.cat(feature_labels, dim=0).contiguous()
    print_rank_0("feature_banks size is {}".format(feature_banks.size()))
    print_rank_0("feature labels size is {}".format(feature_labels.size()))

    _FEATURE_BANK = (feature_banks, feature_labels, classes)


def get_feature_bank():
    global _FEATURE_BANK
    assert _FEATURE_BANK is not None
    return _FEATURE_BANK


# knn monitor as in InstDisc https://arxiv.org/abs/1805.01978
# implementation follows http://github.com/zhirongw/lemniscate.pytorch and
# https://github.com/leftthomas/SimCLR
def knn_predict(feature, feature_bank, feature_labels, classes, knn_k, knn_t):
    # compute cos similarity between each feature vector and feature bank ---> [B, N]
    sim_matrix = torch.mm(feature, feature_bank)
    # [B, K]
    sim_weight, sim_indices = sim_matrix.topk(k=knn_k, dim=-1)
    # [B, K]
    sim_labels = torch.gather(feature_labels.expand(feature.size(0), -1),
                              dim=-1,
                              index=sim_indices)
    sim_weight = (sim_weight / knn_t).exp()

    # counts for each class
    one_hot_label = torch.zeros(feature.size(0) * knn_k,
                                classes,
                                device=sim_labels.device)
    # [B*K, C]
    one_hot_label = one_hot_label.scatter(dim=-1,
                                          index=sim_labels.view(-1, 1),
                                          value=1.0)
    # weighted score ---> [B, C]
    pred_scores = torch.sum(
            one_hot_label.view(feature.size(0), -1, classes) * sim_weight.unsqueeze(dim=-1),
            dim=1)

    pred_labels = pred_scores.argsort(dim=-1, descending=True)
    return pred_labels


================================================
FILE: megatron/legacy/model/vision/mit_backbone.py
================================================
# Copyright (c) 2023, NVIDIA Corporation. All rights reserved.

import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from functools import partial
from torch.nn.init import trunc_normal_
from megatron.legacy.model.transformer import DropPath
from megatron.legacy.model import LayerNorm


class Mlp(nn.Module):
    def __init__(self,
                 in_features,
                 hidden_features=None,
                 out_features=None,
                 act_layer=nn.GELU,
                 drop=0.):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.dwconv = DWConv(hidden_features)
        self.act = act_layer()
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.drop = nn.Dropout(drop)

        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            trunc_normal_(m.weight, std=.02)
            if isinstance(m, nn.Linear) and m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)
        elif isinstance(m, nn.Conv2d):
            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
            fan_out //= m.groups
            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
            if m.bias is not None:
                m.bias.data.zero_()

    def forward(self, x, H, W):
        x = self.fc1(x)
        x = self.dwconv(x, H, W)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.drop(x)
        return x


class Attention(nn.Module):
    def __init__(self,
                 dim,
                 num_heads=8,
                 qkv_bias=False,
                 qk_scale=None,
                 attn_drop=0.,
                 proj_drop=0.,
                 sr_ratio=1):
        super().__init__()
        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."

        self.dim = dim
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = qk_scale or head_dim ** -0.5

        self.q = nn.Linear(dim, dim, bias=qkv_bias)
        self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)

        self.sr_ratio = sr_ratio
        if sr_ratio > 1:
            self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
            self.norm = LayerNorm(dim)

        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            trunc_normal_(m.weight, std=.02)
            if isinstance(m, nn.Linear) and m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)
        elif isinstance(m, nn.Conv2d):
            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
            fan_out //= m.groups
            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
            if m.bias is not None:
                m.bias.data.zero_()

    def forward(self, x, H, W):
        B, N, C = x.shape
        q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)

        if self.sr_ratio > 1:
            x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
            x_ = self.norm(x_)
            kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
        else:
            kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
        k, v = kv[0], kv[1]

        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)
        attn = self.attn_drop(attn)

        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
        x = self.proj(x)
        x = self.proj_drop(x)

        return x


class Block(nn.Module):

    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
                 drop_path=0., act_layer=nn.GELU, norm_layer=LayerNorm, sr_ratio=1):
        super().__init__()
        self.norm1 = norm_layer(dim)
        self.attn = Attention(
            dim,
            num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
            attn_drop=attn_drop, proj_drop=drop, sr_ratio=sr_ratio)
        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
        self.norm2 = norm_layer(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)

        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            trunc_normal_(m.weight, std=.02)
            if isinstance(m, nn.Linear) and m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)
        elif isinstance(m, nn.Conv2d):
            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
            fan_out //= m.groups
            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
            if m.bias is not None:
                m.bias.data.zero_()

    def forward(self, x, H, W):
        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
        x = x + self.drop_path(self.mlp(self.norm2(x), H, W))

        return x


class OverlapPatchEmbed(nn.Module):
    """ Image to Patch Embedding
    """

    def __init__(self, img_size=224, patch_size=7, stride=4, in_chans=3, embed_dim=768):
        super().__init__()
        img_size = (img_size, img_size)
        patch_size = (patch_size, patch_size)

        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=stride,
                              padding=(patch_size[0] // 2, patch_size[1] // 2))
        self.norm = LayerNorm(embed_dim)

        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            trunc_normal_(m.weight, std=.02)
            if isinstance(m, nn.Linear) and m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)
        elif isinstance(m, nn.Conv2d):
            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
            fan_out //= m.groups
            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
            if m.bias is not None:
                m.bias.data.zero_()

    def forward(self, x):
        x = self.proj(x)
        _, _, H, W = x.shape
        x = x.flatten(2).transpose(1, 2)
        x = self.norm(x)

        return x, H, W


class MixVisionTransformer(nn.Module):
    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512],
                 num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,
                 attn_drop_rate=0., drop_path_rate=0., norm_layer=LayerNorm,
                 depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], output_avg=False):
        super().__init__()
        self.num_classes = num_classes
        self.depths = depths
        self.output_avg = output_avg

        # patch_embed
        self.patch_embed1 = OverlapPatchEmbed(img_size=img_size, patch_size=7, stride=4, in_chans=in_chans,
                                              embed_dim=embed_dims[0])
        self.patch_embed2 = OverlapPatchEmbed(img_size=img_size // 4, patch_size=3, stride=2, in_chans=embed_dims[0],
                                              embed_dim=embed_dims[1])
        self.patch_embed3 = OverlapPatchEmbed(img_size=img_size // 8, patch_size=3, stride=2, in_chans=embed_dims[1],
                                              embed_dim=embed_dims[2])
        self.patch_embed4 = OverlapPatchEmbed(img_size=img_size // 16, patch_size=3, stride=2, in_chans=embed_dims[2],
                                              embed_dim=embed_dims[3])

        # transformer encoder
        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
        cur = 0
        self.block1 = nn.ModuleList([Block(
            dim=embed_dims[0], num_heads=num_heads[0], mlp_ratio=mlp_ratios[0], qkv_bias=qkv_bias, qk_scale=qk_scale,
            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
            sr_ratio=sr_ratios[0])
            for i in range(depths[0])])
        self.norm1 = norm_layer(embed_dims[0])

        cur += depths[0]
        self.block2 = nn.ModuleList([Block(
            dim=embed_dims[1], num_heads=num_heads[1], mlp_ratio=mlp_ratios[1], qkv_bias=qkv_bias, qk_scale=qk_scale,
            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
            sr_ratio=sr_ratios[1])
            for i in range(depths[1])])
        self.norm2 = norm_layer(embed_dims[1])

        cur += depths[1]
        self.block3 = nn.ModuleList([Block(
            dim=embed_dims[2], num_heads=num_heads[2], mlp_ratio=mlp_ratios[2], qkv_bias=qkv_bias, qk_scale=qk_scale,
            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
            sr_ratio=sr_ratios[2])
            for i in range(depths[2])])
        self.norm3 = norm_layer(embed_dims[2])

        cur += depths[2]
        self.block4 = nn.ModuleList([Block(
            dim=embed_dims[3], num_heads=num_heads[3], mlp_ratio=mlp_ratios[3], qkv_bias=qkv_bias, qk_scale=qk_scale,
            drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer,
            sr_ratio=sr_ratios[3])
            for i in range(depths[3])])
        self.norm4 = norm_layer(embed_dims[3])

        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            trunc_normal_(m.weight, std=.02)
            if isinstance(m, nn.Linear) and m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)
        elif isinstance(m, nn.Conv2d):
            fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
            fan_out //= m.groups
            m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
            if m.bias is not None:
                m.bias.data.zero_()

    def reset_drop_path(self, drop_path_rate):
        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(self.depths))]
        cur = 0
        for i in range(self.depths[0]):
            self.block1[i].drop_path.drop_prob = dpr[cur + i]

        cur += self.depths[0]
        for i in range(self.depths[1]):
            self.block2[i].drop_path.drop_prob = dpr[cur + i]

        cur += self.depths[1]
        for i in range(self.depths[2]):
            self.block3[i].drop_path.drop_prob = dpr[cur + i]

        cur += self.depths[2]
        for i in range(self.depths[3]):
            self.block4[i].drop_path.drop_prob = dpr[cur + i]

    def freeze_patch_emb(self):
        self.patch_embed1.requires_grad = False

    def forward_features(self, x):
        B = x.shape[0]
        outs = []

        # stage 1
        x, H, W = self.patch_embed1(x)
        for i, blk in enumerate(self.block1):
            x = blk(x, H, W)
        x = self.norm1(x)
        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
        outs.append(x)

        # stage 2
        x, H, W = self.patch_embed2(x)
        for i, blk in enumerate(self.block2):
            x = blk(x, H, W)
        x = self.norm2(x)
        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
        outs.append(x)

        # stage 3
        x, H, W = self.patch_embed3(x)
        for i, blk in enumerate(self.block3):
            x = blk(x, H, W)
        x = self.norm3(x)
        x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
        outs.append(x)

        # stage 4
        x, H, W = self.patch_embed4(x)
        for i, blk in enumerate(self.block4):
            x = blk(x, H, W)
        x = self.norm4(x)
        if not self.output_avg:
            x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
        outs.append(x)

        return outs

    def forward(self, x):
        x = self.forward_features(x)
    
        if self.output_avg:
            x = x[3].mean(dim=1)

        return x


class DWConv(nn.Module):
    def __init__(self, dim=768):
        super(DWConv, self).__init__()
        self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim)

    def forward(self, x, H, W):
        B, N, C = x.shape
        x = x.transpose(1, 2).view(B, C, H, W)
        x = self.dwconv(x)
        x = x.flatten(2).transpose(1, 2)

        return x

class mit_b0(MixVisionTransformer):
    def __init__(self, **kwargs):
        super(mit_b0, self).__init__(
            patch_size=4, embed_dims=[32, 64, 160, 256], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
            qkv_bias=True, norm_layer=partial(LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1],
            drop_rate=0.0, drop_path_rate=0.1)


class mit_b1(MixVisionTransformer):
    def __init__(self, **kwargs):
        super(mit_b1, self).__init__(
            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
            qkv_bias=True, norm_layer=partial(LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1],
            drop_rate=0.0, drop_path_rate=0.1)


class mit_b2(MixVisionTransformer):
    def __init__(self, **kwargs):
        super(mit_b2, self).__init__(
            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
            qkv_bias=True, norm_layer=partial(LayerNorm, eps=1e-6), depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1],
            drop_rate=0.0, drop_path_rate=0.1)

 
class mit_b3(MixVisionTransformer):
    def __init__(self, **kwargs):
        super(mit_b3, self).__init__(
            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
            qkv_bias=True, norm_layer=partial(LayerNorm, eps=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1],
            drop_rate=0.0, drop_path_rate=0.1)

class mit_b3_avg(MixVisionTransformer):
    def __init__(self, drop_path_rate=0.1, **kwargs):
        super(mit_b3_avg, self).__init__(
            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
            qkv_bias=True, norm_layer=partial(LayerNorm, eps=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1],
            drop_rate=0.0, drop_path_rate=drop_path_rate, output_avg=True)

class mit_b4(MixVisionTransformer):
    def __init__(self, **kwargs):
        super(mit_b4, self).__init__(
            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
            qkv_bias=True, norm_layer=partial(LayerNorm, eps=1e-6), depths=[3, 8, 27, 3], sr_ratios=[8, 4, 2, 1],
            drop_rate=0.0, drop_path_rate=0.1)

class mit_b5(MixVisionTransformer):
    def __init__(self, **kwargs):
        super(mit_b5, self).__init__(
            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
            qkv_bias=True, norm_layer=partial(LayerNorm, eps=1e-6), depths=[3, 6, 40, 3], sr_ratios=[8, 4, 2, 1],
            drop_rate=0.0, drop_path_rate=0.1)

class mit_b5_avg(MixVisionTransformer):
    def __init__(self, drop_path_rate=0.1, **kwargs):
        super(mit_b5_avg, self).__init__(
            patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4],
            qkv_bias=True, norm_layer=partial(LayerNorm, eps=1e-6), depths=[3, 6, 40, 3], sr_ratios=[8, 4, 2, 1],
            drop_rate=0.0, drop_path_rate=drop_path_rate, output_avg=True)


================================================
FILE: megatron/legacy/model/vision/swin_backbone.py
================================================
# Copyright (c) 2021 Microsoft
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# --------------------------------------------------------
# Swin Transformer
# --------------------------------------------------------

import torch
import torch.nn as nn
import torch.utils.checkpoint as checkpoint
from timm.models.layers import DropPath, to_2tuple, trunc_normal_
from math import sqrt

from megatron.training import get_args
from functools import partial


class Mlp(nn.Module):
    def __init__(self, in_features, hidden_features=None,
                 out_features=None, act_layer=nn.GELU, drop=0.):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = act_layer()
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.drop = nn.Dropout(drop)

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.drop(x)
        return x


def window_partition(x, window_size):
    """
    Args:
        x: (B, H, W, C)
        window_size (int): window size

    Returns:
        windows: (num_windows*B, window_size, window_size, C)
    """
    B, H, W, C = x.shape
    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
    return windows


def window_reverse(windows, window_size, H, W):
    """
    Args:
        windows: (num_windows*B, window_size, window_size, C)
        window_size (int): Window size
        H (int): Height of image
        W (int): Width of image

    Returns:
        x: (B, H, W, C)
    """
    B = int(windows.shape[0] / (H * W / window_size / window_size))
    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
    return x


class WindowAttention(nn.Module):
    r""" Window based multi-head self attention (W-MSA) module with relative position bias.
    It supports both of shifted and non-shifted window.

    Args:
        dim (int): Number of input channels.
        window_size (tuple[int]): The height and width of the window.
        num_heads (int): Number of attention heads.
        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
    """

    def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):

        super().__init__()
        self.dim = dim
        self.window_size = window_size  # Wh, Ww
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = qk_scale or head_dim ** -0.5

        # define a parameter table of relative position bias
        self.relative_position_bias_table = nn.Parameter(
            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH

        # get pair-wise relative position index for each token inside the window
        coords_h = torch.arange(self.window_size[0])
        coords_w = torch.arange(self.window_size[1])
        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
        relative_coords[:, :, 1] += self.window_size[1] - 1
        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
        self.register_buffer("relative_position_index", relative_position_index)

        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
        self.attn_drop = nn.Dropout(attn_drop)
        self.proj = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)

        trunc_normal_(self.relative_position_bias_table, std=.02)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x, mask=None):
        """
        Args:
            x: input features with shape of (num_windows*B, N, C)
            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
        """
        B_, N, C = x.shape
        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)

        q = q * self.scale
        attn = (q @ k.transpose(-2, -1))

        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
        attn = attn + relative_position_bias.unsqueeze(0)

        if mask is not None:
            nW = mask.shape[0]
            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
            attn = attn.view(-1, self.num_heads, N, N)
            attn = self.softmax(attn)
        else:
            attn = self.softmax(attn)

        attn = self.attn_drop(attn)

        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
        x = self.proj(x)
        x = self.proj_drop(x)
        return x

    def extra_repr(self) -> str:
        return f'dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}'

    def flops(self, N):
        # calculate flops for 1 window with token length of N
        flops = 0
        # qkv = self.qkv(x)
        flops += N * self.dim * 3 * self.dim
        # attn = (q @ k.transpose(-2, -1))
        flops += self.num_heads * N * (self.dim // self.num_heads) * N
        #  x = (attn @ v)
        flops += self.num_heads * N * N * (self.dim // self.num_heads)
        # x = self.proj(x)
        flops += N * self.dim * self.dim
        return flops


class SwinTransformerBlock(nn.Module):
    r""" Swin Transformer Block.

    Args:
        dim (int): Number of input channels.
        input_resolution (tuple[int]): Input resulotion.
        num_heads (int): Number of attention heads.
        window_size (int): Window size.
        shift_size (int): Shift size for SW-MSA.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
        drop (float, optional): Dropout rate. Default: 0.0
        attn_drop (float, optional): Attention dropout rate. Default: 0.0
        drop_path (float, optional): Stochastic depth rate. Default: 0.0
        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
    """

    def __init__(self, dim, input_resolution, num_heads, window_size=7, shift_size=0,
                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
                 act_layer=nn.GELU, norm_layer=nn.LayerNorm):
        super().__init__()
        self.dim = dim
        self.input_resolution = input_resolution
        self.num_heads = num_heads
        self.window_size = window_size
        self.shift_size = shift_size
        self.mlp_ratio = mlp_ratio
        if min(self.input_resolution) <= self.window_size:
            # if window size is larger than input resolution, we don't partition windows
            self.shift_size = 0
            self.window_size = min(self.input_resolution)
        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"

        self.norm1 = norm_layer(dim)
        self.attn = WindowAttention(
            dim, window_size=to_2tuple(self.window_size), num_heads=num_heads,
            qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)

        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
        self.norm2 = norm_layer(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)

        self.H = input_resolution[0]
        self.W = input_resolution[1]

        self.attn_mask_dict = {} 

    def create_attn_mask(self, H, W):
        # calculate attention mask for SW-MSA

        Hp = int(np.ceil(H / self.window_size)) * self.window_size
        Wp = int(np.ceil(W / self.window_size)) * self.window_size
        img_mask = torch.zeros((1, Hp, Wp, 1))  # 1 Hp Wp 1
        h_slices = (slice(0, -self.window_size),
                    slice(-self.window_size, -self.shift_size),
                    slice(-self.shift_size, None))
        w_slices = (slice(0, -self.window_size),
                    slice(-self.window_size, -self.shift_size),
                    slice(-self.shift_size, None))
        cnt = 0
        for h in h_slices:
            for w in w_slices:
                img_mask[:, h, w, :] = cnt
                cnt += 1

        mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))

        return attn_mask


    def forward(self, x):
        B, L, C = x.shape
        H = int(sqrt(L))
        W = H

        shortcut = x
        x = self.norm1(x)
        x = x.view(B, H, W, C)

        # cyclic shift
        if self.shift_size > 0:
            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
        else:
            shifted_x = x

        # partition windows
        x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # nW*B, window_size*window_size, C

        # W-MSA/SW-MSA
        attn_windows = self.attn(x_windows, mask=self.attn_mask)  # nW*B, window_size*window_size, C

        # merge windows
        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
        shifted_x = window_reverse(attn_windows, self.window_size, H, W)  # B H' W' C

        # reverse cyclic shift
        if self.shift_size > 0:
            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
        else:
            x = shifted_x
        x = x.view(B, H * W, C)

        # FFN
        x = shortcut + self.drop_path(x)
        x = x + self.drop_path(self.mlp(self.norm2(x)))

        return x

    def extra_repr(self) -> str:
        return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \
               f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}"

    def flops(self):
        flops = 0
        H, W = self.input_resolution
        # norm1
        flops += self.dim * H * W
        # W-MSA/SW-MSA
        nW = H * W / self.window_size / self.window_size
        flops += nW * self.attn.flops(self.window_size * self.window_size)
        # mlp
        flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio
        # norm2
        flops += self.dim * H * W
        return flops


class PatchMerging(nn.Module):
    r""" Patch Merging Layer.

    Args:
        input_resolution (tuple[int]): Resolution of input feature.
        dim (int): Number of input channels.
        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
    """

    def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
        super().__init__()
        self.input_resolution = input_resolution
        self.dim = dim
        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
        self.norm = norm_layer(4 * dim)

    def forward(self, x):
        """
        x: B, H*W, C
        """
        H, W = self.input_resolution
        B, L, C = x.shape
        assert L == H * W, "input feature has wrong size"
        assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even."

        x = x.view(B, H, W, C)

        x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
        x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
        x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
        x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
        x = torch.cat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
        x = x.view(B, -1, 4 * C)  # B H/2*W/2 4*C

        x = self.norm(x)
        x = self.reduction(x)

        return x

    def extra_repr(self) -> str:
        return f"input_resolution={self.input_resolution}, dim={self.dim}"

    def flops(self):
        H, W = self.input_resolution
        flops = H * W * self.dim
        flops += (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim
        return flops


class BasicLayer(nn.Module):
    """ A basic Swin Transformer layer for one stage.

    Args:
        dim (int): Number of input channels.
        input_resolution (tuple[int]): Input resolution.
        depth (int): Number of blocks.
        num_heads (int): Number of attention heads.
        window_size (int): Local window size.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
        drop (float, optional): Dropout rate. Default: 0.0
        attn_drop (float, optional): Attention dropout rate. Default: 0.0
        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
    """

    def __init__(self, dim, input_resolution, depth, num_heads, window_size,
                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
                 drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False):

        super().__init__()
        self.dim = dim
        self.input_resolution = input_resolution
        self.depth = depth
        self.use_checkpoint = use_checkpoint

        # build blocks
        self.blocks = nn.ModuleList([
            SwinTransformerBlock(dim=dim, input_resolution=input_resolution,
                                 num_heads=num_heads, window_size=window_size,
                                 shift_size=0 if (i % 2 == 0) else window_size // 2,
                                 mlp_ratio=mlp_ratio,
                                 qkv_bias=qkv_bias, qk_scale=qk_scale,
                                 drop=drop, attn_drop=attn_drop,
                                 drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
                                 norm_layer=norm_layer)
            for i in range(depth)])

        # patch merging layer
        if downsample is not None:
            self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer)
        else:
            self.downsample = None

    def forward(self, x):
        for blk in self.blocks:
            if self.use_checkpoint:
                x = checkpoint.checkpoint(blk, x)
            else:
                x = blk(x)
        x_b4_ds = x
        if self.downsample is not None:
            x = self.downsample(x)
        return x_b4_ds, x

    def extra_repr(self) -> str:
        return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}"

    def flops(self):
        flops = 0
        for blk in self.blocks:
            flops += blk.flops()
        if self.downsample is not None:
            flops += self.downsample.flops()
        return flops


class PatchEmbed(nn.Module):
    r""" Image to Patch Embedding

    Args:
        img_size (int): Image size.  Default: 224.
        patch_size (int): Patch token size. Default: 4.
        in_chans (int): Number of input image channels. Default: 3.
        embed_dim (int): Number of linear projection output channels. Default: 96.
        norm_layer (nn.Module, optional): Normalization layer. Default: None
    """

    def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
        super().__init__()
        img_size = to_2tuple(img_size)
        patch_size = to_2tuple(patch_size)
        patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
        self.img_size = img_size
        self.patch_size = patch_size
        self.patches_resolution = patches_resolution
        self.num_patches = patches_resolution[0] * patches_resolution[1]

        self.in_chans = in_chans
        self.embed_dim = embed_dim

        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
        if norm_layer is not None:
            self.norm = norm_layer(embed_dim)
        else:
            self.norm = None

    def forward(self, x):
        B, C, H, W = x.shape
        # FIXME look at relaxing size constraints
        assert H == self.img_size[0] and W == self.img_size[1], \
            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
        x = self.proj(x).flatten(2).transpose(1, 2)  # B Ph*Pw C
        if self.norm is not None:
            x = self.norm(x)
        return x

    def flops(self):
        Ho, Wo = self.patches_resolution
        flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1])
        if self.norm is not None:
            flops += Ho * Wo * self.embed_dim
        return flops


class SwinTransformer(nn.Module):
    r""" Swin Transformer
        A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -
          https://arxiv.org/pdf/2103.14030

    Args:
        img_size (int | tuple(int)): Input image size. Default 224
        patch_size (int | tuple(int)): Patch size. Default: 4
        in_chans (int): Number of input image channels. Default: 3
        embed_dim (int): Patch embedding dimension. Default: 96
        depths (tuple(int)): Depth of each Swin Transformer layer.
        num_heads (tuple(int)): Number of attention heads in different layers.
        window_size (int): Window size. Default: 7
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None
        drop_rate (float): Dropout rate. Default: 0
        attn_drop_rate (float): Attention dropout rate. Default: 0
        drop_path_rate (float): Stochastic depth rate. Default: 0.1
        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
        patch_norm (bool): If True, add normalization after patch embedding. Default: True
        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
    """

    def __init__(self, img_size=224, patch_size=4, in_chans=3,
                 embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24],
                 window_size=7, mlp_ratio=4., qkv_bias=True, qk_scale=None,
                 drop_rate=0., attn_drop_rate=0., drop_path_rate=0.3,
                 norm_layer=partial(nn.LayerNorm, eps=1e-6), ape=False, patch_norm=True,
                 use_checkpoint=False, output_avg=False, **kwargs):
        super().__init__()

        self.num_layers = len(depths)
        self.embed_dim = embed_dim
        self.ape = ape
        self.patch_norm = patch_norm
        self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))
        self.mlp_ratio = mlp_ratio
        self.img_size = to_2tuple(img_size)
        self.patch_size = to_2tuple(patch_size)
        self.output_avg = output_avg
        
        # split image into non-overlapping patches
        self.patch_embed = PatchEmbed(
            img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim,
            norm_layer=norm_layer if self.patch_norm else None)
        num_patches = self.patch_embed.num_patches
        patches_resolution = self.patch_embed.patches_resolution
        self.patches_resolution = patches_resolution

        # absolute position embedding
        if self.ape:
            self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
            trunc_normal_(self.absolute_pos_embed, std=.02)

        self.pos_drop = nn.Dropout(p=drop_rate)

        # stochastic depth
        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule

        # build layers
        self.layers = nn.ModuleList()
        for i_layer in range(self.num_layers):
            layer = BasicLayer(dim=int(embed_dim * 2 ** i_layer),
                               input_resolution=(patches_resolution[0] // (2 ** i_layer),
                                                 patches_resolution[1] // (2 ** i_layer)),
                               depth=depths[i_layer],
                               num_heads=num_heads[i_layer],
                               window_size=window_size,
                               mlp_ratio=self.mlp_ratio,
                               qkv_bias=qkv_bias, qk_scale=qk_scale,
                               drop=drop_rate, attn_drop=attn_drop_rate,
                               drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
                               norm_layer=norm_layer,
                               downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,
                               use_checkpoint=use_checkpoint)
            self.layers.append(layer)

        self.apply(self._init_weights)

    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            trunc_normal_(m.weight, std=.02)
            if isinstance(m, nn.Linear) and m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)

    @torch.jit.ignore
    def no_weight_decay(self):
        return {'absolute_pos_embed'}

    @torch.jit.ignore
    def no_weight_decay_keywords(self):
        return {'relative_position_bias_table'}

    def forward(self, x):
        x = self.patch_embed(x)
        if self.ape:
            x = x + self.absolute_pos_embed
        x = self.pos_drop(x)

        h = self.img_size[0] // self.patch_size[0]
        w = self.img_size[1] // self.patch_size[1]
        outs = []

        for i, layer in enumerate(self.layers):
            px, x = layer(x)
            b, n, c = px.shape

            if i != len(self.layers) - 1 or not self.output_avg:
                px = px.permute(0, 2, 1).contiguous()
                px = px.reshape(b, c, h, w)
            # is this a fair assumption ?? i think it's baked into the architecture
            h, w = h//2, w//2
            outs.append(px)

        if self.output_avg:
            return outs[-1].mean(dim=1)

        return outs

    def flops(self):
        flops = 0
        flops += self.patch_embed.flops()
        for i, layer in enumerate(self.layers):
            flops += layer.flops()
        flops += self.num_features * self.patches_resolution[0] * self.patches_resolution[1] // (2 ** self.num_layers)
        flops += self.num_features * self.num_classes
        return flops


def get_swin(drop_path_rate=0.3, output_avg=False):
    args = get_args()

    window_size = 7
    embed_dim = 128
    depths = [2, 2, 18, 2]
    num_heads = [4, 8, 16, 32]
    swin = SwinTransformer(
        img_size=(args.img_h, args.img_w,),
        in_chans=3,
        patch_size=args.patch_dim,
        embed_dim=embed_dim,
        depths=depths,
        num_heads=num_heads,
        window_size=window_size,
        drop_path_rate=drop_path_rate,
        output_avg=output_avg,
    )

    return swin


================================================
FILE: megatron/legacy/model/vision/utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import warnings
import torch
import torch.nn.functional as F


def resize(input,
           size=None,
           scale_factor=None,
           mode='nearest',
           align_corners=None,
           warning=True):
    if warning:
        if size is not None and align_corners:
            input_h, input_w = tuple(int(x) for x in input.shape[2:])
            output_h, output_w = tuple(int(x) for x in size)
            if output_h > input_h or output_w > output_h:
                if ((output_h > 1 and output_w > 1 and input_h > 1
                     and input_w > 1) and (output_h - 1) % (input_h - 1)
                        and (output_w - 1) % (input_w - 1)):
                    warnings.warn(
                        f'When align_corners={align_corners}, '
                        'the output would more aligned if '
                        f'input size {(input_h, input_w)} is `x+1` and '
                        f'out size {(output_h, output_w)} is `nx+1`')
    if isinstance(size, torch.Size):
        size = tuple(int(x) for x in size)
    return F.interpolate(input, size, scale_factor, mode, align_corners)


================================================
FILE: megatron/legacy/model/vision/vit_backbone.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""Vision Transformer(VIT) model."""

import math
import einops
import torch
import apex
import torch.nn.functional as F
from megatron.training import get_args
from megatron.legacy.model.transformer import ParallelTransformer
from megatron.legacy.model.utils import (
    get_linear_layer,
    init_method_normal,
    scaled_init_method_normal,
)
from megatron.legacy.model.module import MegatronModule

CLASS_TOKEN_LENGTH = 8

class VitMlpHead(MegatronModule):
    """Pooler layer.

    Pool hidden states of a specific token (for example start of the
    sequence) and add a linear transformation followed by a tanh.

    Args:
        hidden_size: hidden size
        init_method: weight initialization method for the linear layer.
            bias is set to zero.
    """

    def __init__(self, config, hidden_size, num_classes):
        super(VitMlpHead, self).__init__()
        self.config = config
        self.dense_in = torch.nn.Linear(hidden_size, hidden_size)
        self.relu = torch.nn.ReLU()
        self.dense_out = torch.nn.Linear(hidden_size, num_classes)
        torch.nn.init.constant_(self.dense_out.bias, -10)

    def forward(self, hidden_states):
        # hidden_states: [b, 1, h]
        # sequence_index: index of the token to pool.
        dense_in_result = self.dense_in(hidden_states)
        tanh_result = torch.tanh(dense_in_result)
        dense_out_result = self.dense_out(tanh_result)
        return dense_out_result


def isPerfectSquare(x):
    if(x >= 0):
        sr = math.sqrt(x)
        return (int(sr) * int(sr) == x)
    return False


def twod_interpolate_position_embeddings_hook(
    state_dict,
    prefix,
    local_metadata,
    strict,
    missing_keys,
    unexpected_keys,
    error_msgs,
):

    args = get_args()
    num_patches_per_dim_h = args.img_h // args.patch_dim
    num_patches_per_dim_w = args.img_w // args.patch_dim
    num_patches = num_patches_per_dim_h * num_patches_per_dim_w
    hidden_size = args.hidden_size

    key = prefix + "weight"

    assert key in state_dict
    if key in state_dict:
        input_param = state_dict[key]

        input_seq_len = input_param.shape[0]
        assert(isPerfectSquare(input_seq_len) or isPerfectSquare(input_seq_len - CLASS_TOKEN_LENGTH))
        input_has_class_token = not isPerfectSquare(input_seq_len)
        num_tok_input = input_seq_len - CLASS_TOKEN_LENGTH if input_has_class_token else input_seq_len
        num_tok_output = num_patches
        output_has_class_token = args.class_token_present

        # update input_param and load it to state_dict[key]
        if input_has_class_token:
            input_param_tok = input_param[:CLASS_TOKEN_LENGTH, :]
            input_param_grid = input_param[CLASS_TOKEN_LENGTH:, :]
        else:
            input_param_tok = torch.zeros(CLASS_TOKEN_LENGTH, hidden_size)
            input_param_grid = input_param

        assert input_param.shape[1] == hidden_size

        if num_tok_input != num_tok_output:

            gs_input = int(math.sqrt(num_tok_input))
            gs_new = (num_patches_per_dim_h, num_patches_per_dim_w)

            input_param_grid = input_param_grid.transpose(0, 1).contiguous()
            input_param_grid = input_param_grid.reshape(
                (1, -1, gs_input, gs_input)
            )
            input_param_grid = input_param_grid.float()
            scale_factor = (gs_new[0] / gs_input, gs_new[1] / gs_input)

            input_param_grid = F.interpolate(
                input_param_grid, scale_factor=scale_factor, mode="bilinear"
            )

            input_param_grid = input_param_grid.half()
            input_param_grid = input_param_grid.reshape((-1, num_tok_output))
            input_param_grid = input_param_grid.transpose(0, 1).contiguous()

            assert input_param_grid.shape[1] == hidden_size

        input_param = input_param_grid
        assert (
            input_param.shape[0] == num_tok_output
            and input_param.shape[1] == hidden_size
        )

        if output_has_class_token:
            input_param = torch.cat((input_param_tok, input_param), dim=0)

        state_dict[key] = input_param


class VitBackbone(MegatronModule):
    """Vision Transformer Model."""

    def __init__(self,
                 config,
                 pre_process=True,
                 post_process=True,
                 class_token=True,
                 single_token_output=False,
                 post_layer_norm=True,
                 drop_path_rate=0.0):
        super(VitBackbone, self).__init__(share_embeddings_and_output_weights=False)
        args = get_args()
        self.config = config

        self.fp16_lm_cross_entropy = args.fp16_lm_cross_entropy

        self.pre_process = pre_process
        self.post_process = post_process
        self.class_token = class_token
        self.post_layer_norm = post_layer_norm
        self.hidden_size = args.hidden_size
        self.patch_dim = args.patch_dim
        self.img_h = args.img_h
        self.img_w = args.img_w
        self.micro_batch_size = args.micro_batch_size
        self.single_token_output = single_token_output
        self.drop_path_rate = drop_path_rate

        assert self.img_h % self.patch_dim == 0
        assert self.img_w % self.patch_dim == 0
        self.num_patches_per_dim_h = self.img_h // self.patch_dim
        self.num_patches_per_dim_w = self.img_w // self.patch_dim
        self.num_patches = self.num_patches_per_dim_h * self.num_patches_per_dim_w
        self.seq_length = self.num_patches + (CLASS_TOKEN_LENGTH if self.class_token else 0)
        self.flatten_dim = self.patch_dim * self.patch_dim * args.num_channels
        self.input_tensor = None
        self.position_ids = None

        if self.pre_process:
            # cls_token
            if self.class_token:
                self.cls_token = torch.nn.Parameter(
                    torch.randn(1, CLASS_TOKEN_LENGTH, self.hidden_size)
                )
                torch.nn.init.zeros_(self.cls_token)
            self.position_ids = torch.arange(self.seq_length).expand(1, -1).cuda()

            # Linear encoder
            self.linear_encoder = torch.nn.Linear(
                self.flatten_dim, self.hidden_size
            )

            # embedding
            self.position_embeddings = torch.nn.Embedding(
                self.seq_length, self.hidden_size
            )
            init_method_normal(args.init_method_std)(
                self.position_embeddings.weight
            )

            args.class_token_present = self.class_token
            self.position_embeddings._register_load_state_dict_pre_hook(
                twod_interpolate_position_embeddings_hook
            )

            self.embedding_dropout = torch.nn.Dropout(args.hidden_dropout)

        # Transformer
        self.transformer = ParallelTransformer(
            config,
            model_type=args.model_type,
            pre_process=self.pre_process,
            post_process=self.post_process,
            post_layer_norm=self.post_layer_norm,
            drop_path_rate=self.drop_path_rate
        )

    def set_input_tensor(self, input_tensor):
        """See megatron.legacy.model.transformer.set_input_tensor()"""
        self.transformer.set_input_tensor(input_tensor)

    def forward(self, input):

        if self.pre_process:
            rearranged_input = einops.rearrange(
                input,
                "b c (h p1) (w p2) -> b (h w) (p1 p2 c)",
                p1=self.patch_dim,
                p2=self.patch_dim,
            )

            assert rearranged_input.dtype == torch.half
            encoder_output = self.linear_encoder(rearranged_input)

            concatenated_tokens = encoder_output
            if self.class_token:
                cls_tokens = self.cls_token.expand(encoder_output.shape[0], -1, -1)
                concatenated_tokens = torch.cat((cls_tokens, encoder_output), dim=1)

            token_embeddings = concatenated_tokens + \
                    self.position_embeddings(self.position_ids[:, :concatenated_tokens.shape[1]])
            # [b, s, h] => [s, b, h]
            token_embeddings = token_embeddings.transpose(0, 1).contiguous()
            hidden_states = self.embedding_dropout(token_embeddings)
        else:
            hidden_states = input

        hidden_states = self.transformer(hidden_states, None)

        if self.post_process:
            # [s b h] => [b s h]
            if self.single_token_output:
                hidden_states = hidden_states[0]
            else:
                hidden_states = hidden_states.transpose(0, 1).contiguous()

        return hidden_states


================================================
FILE: megatron/post_training/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/post_training/arguments.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.


def add_modelopt_args(parser):
    """Add additional arguments for using Model Optimizer (modelopt) features."""
    group = parser.add_argument_group(title="modelopt-generic")

    # Model and Checkpoint Compatibility
    group.add_argument(
        "--export-model-type",
        type=str,
        default="GPTModel",
        choices=["GPTModel", "MambaModel"],
        help="Model type to use in model_provider.",
    )
    group.add_argument(
        "--export-legacy-megatron",
        action="store_true",
        help="Export a legacy megatron-lm checkpoint.",
    )
    group.add_argument(
        "--export-te-mcore-model",
        action="store_true",
        help="Indicate the source checkpoint uses the fused Transformer-Engine mcore layer spec "
        "(where layernorms are fused into linear layers). Enables state_dict key remapping so the "
        "TE checkpoint can be loaded into the local ModelOpt spec for PTQ/export, and saved back "
        "in TE-compatible format. Mutually exclusive with --export-default-te-spec.",
    )
    group.add_argument(
        "--export-default-te-spec",
        action="store_true",
        help="Use the full Transformer-Engine layer spec for model building. "
        "This builds the model with TELayerNormColumnParallelLinear, TERowParallelLinear, "
        "TEGroupedMLP, TEDotProductAttention, etc., matching the canonical TE specs.",
    )
    group.add_argument(
        "--export-force-local-attention",
        action="store_true",
        help="Forcing local DotProductAttention; otherwise TEDotProductAttention is used.",
    )
    # Quantization
    group.add_argument(
        "--export-kv-cache-quant",
        help="Type of KV cache quantization to perform.",
        choices=["none", "fp8", "fp8_affine", "nvfp4", "nvfp4_affine", "nvfp4_rotate"],
        default="none",
    )
    group.add_argument(
        "--export-real-quant-cfg",
        type=str,
        default="None",
        choices=["fp8_real_quant", "fp8_blockwise_real_quant", "None"],
        help="Specify a real quantization config from the supported choices.",
    )
    group.add_argument(
        "--export-quant-cfg",
        type=str,
        default=None,
        # TODO replace choices with mtq.config.choices after deprecating the shorter aliases
        help="Specify a quantization config from mtq.config.choices.",
    )
    # Knowledge Distillation
    group.add_argument(
        '--export-kd-teacher-load',
        type=str,
        help='Path to checkpoint to load as distillation teacher. (Enables distillation mode automatically)',
    )
    group.add_argument(
        '--export-kd-teacher-model-config',
        type=str,
        default=None,
        help='Path to teacher model config for distillation. If not provided, defaults to ${export_kd_teacher_load}/model_config.yaml.',
    )
    group.add_argument(
        '--export-kd-teacher-ckpt-format',
        type=str,
        default=None,
        choices=['torch', 'torch_dist', 'torch_dcp'],
        help="Checkpoint format of teacher model, if different from student's.",
    )
    group.add_argument(
        '--export-kd-cfg',
        type=str,
        default=None,
        help='Path to distillation configuration yaml file, in order to use non-default settings.',
    )

    # Finetuning
    group.add_argument(
        "--finetune-hf-dataset", type=str, default=None, help="HF dataset used for finetuning."
    )
    group.add_argument(
        "--finetune-data-split", type=str, default="train", help="HF dataset split used for finetuning."
    )

    # Special model architecture option
    group.add_argument(
        '--export-qk-l2-norm',
        action="store_true",
        help='Use Llama-4 L2Norm instead of normal LayerNorm/RMSNorm for QK normalization.',
    )
    group.add_argument(
        '--export-moe-apply-probs-on-input',
        action="store_true",
        help='Use Llama-4 expert scaling on input instead of output.',
    )

    # Speculative decoding
    group.add_argument(
        '--export-offline-model',
        action="store_true",
        help='If set, the base model will have no decoder layer. Only the embedding layer and output layer are initialized.',
    )

    # Global state
    group.add_argument(
        '--modelopt-enabled',
        action="store_true",
        help='Will be set automatically when loading a ModelOpt checkpoint.',
    )

    # GPT-OSS YaRN RoPE support
    group.add_argument(
        '--enable-gpt-oss',
        action="store_true",
        help='Enable GPT-OSS mode with YaRN RoPE configuration. When enabled, automatically '
             'configures all YaRN parameters with GPT-OSS defaults.',
    )

    return parser


================================================
FILE: megatron/post_training/checkpointing.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import logging
from pathlib import Path
from typing import Optional, Tuple, Union

import modelopt.torch.opt as mto
import torch.nn as nn
from modelopt.torch.opt.plugins import restore_sharded_modelopt_state

from megatron.core import dist_checkpointing
from megatron.core.utils import get_torch_version, is_torch_min_version
from megatron.training import get_args
from megatron.training.checkpointing import _load_base_checkpoint, load_checkpoint
from megatron.training.utils import print_rank_0, unwrap_model
from .utils import print_distributed_quant_summary

logger = logging.getLogger(__name__)

NEMO_WEIGHT_DIR_NAMES = {"model_weights": "model.", "weights": "module."}


def has_modelopt_state(checkpoint_path: str) -> bool:
    """Check if modelopt_state folder exists inside the checkpoint.
    Args:
        checkpoint_path: Path to the checkpoint directory

    Returns:
        True if modelopt_state exists, False otherwise
    """
    args = get_args()

    try:
        if args.ckpt_format == "torch":
            # Non-sharded
            state_dict, _, _ = _load_base_checkpoint(checkpoint_path, rank0=False)
            if state_dict is None:
                return False
            if "modelopt_state" not in state_dict:
                return False
            return True
        else:
            # Sharded
            load_dir, _ = get_sharded_load_dir(checkpoint_path)
            if load_dir is None:
                return False
            if not (load_dir / "modelopt_state").is_dir():
                return False
            return True
    except Exception as e:
        print_rank_0(f"Failed to inspect checkpoint in {checkpoint_path}: {e}")
        return False


def get_sharded_load_dir(load_dir: str) -> Tuple[Union[Path, None], str]:
    """Helper to retrieve the sharded load directory and its prefix, if any."""
    load_dir = Path(load_dir)

    # Skip if load_dir is nonexistent or empty
    if not load_dir.is_dir() or not any(load_dir.iterdir()):
        return None, ""

    sharded_load_dir = None
    sharded_prefix = ""
    # Read the tracker file and set the iteration if this is a MLM sharded checkpoint.
    # If no tracker file, assume it is a NeMo sharded checkpoint.
    tracker_filename = load_dir / 'latest_checkpointed_iteration.txt'
    if tracker_filename.is_file():
        with open(tracker_filename, 'r') as f:
            metastring = f.read().strip()
            try:
                iteration = int(metastring)
                sharded_load_dir = Path(load_dir) / 'iter_{:07d}'.format(iteration)
            except ValueError:
                sharded_load_dir = Path(load_dir) / metastring
    else:
        for nemo_dir_name, prefix in NEMO_WEIGHT_DIR_NAMES.items():
            nemo_weight_dir = Path(load_dir) / nemo_dir_name
            if nemo_weight_dir.is_dir():
                sharded_load_dir = nemo_weight_dir
                sharded_prefix = prefix
                break

    if sharded_load_dir is None:
        raise ValueError(f"{load_dir} is not a MLM or NeMo sharded checkpoint!")
    if not sharded_load_dir.exists():
        return None, ""

    return sharded_load_dir, sharded_prefix


def load_modelopt_state(model: nn.Module, load_dir: Optional[str] = None) -> None:
    """Loading modelopt_state without loading the model.

    If distributed checkpointing in use, we try to load from the sharded modelopt_state. This will not
    load the model state_dict. Otherwise, if the checkpoint is not sharded, we load the base checkpoint
    (which contains the model state as well) and extract the modelopt_state.

    Args:
        model: the model to load the modelopt_state into
        load_dir: optionally provide a different loading path
    """
    args = get_args()
    load_dir = load_dir or args.load

    if args.ckpt_format == "torch":
        # Non-sharded
        print_rank_0(f"Loading ModelOpt state from base checkpoint ({load_dir})")
        try:
            state_dict, _, _ = _load_base_checkpoint(args.load, rank0=False)
        except Exception:
            print_rank_0("Failed to load base checkpoint via megatron _load_base_checkpoint!")
            return
        if state_dict is None:
            print_rank_0("No checkpoint state_dict found. Skipping loading ModelOpt state.")
            return
        modelopt_state = state_dict.get("modelopt_state", None)
        if modelopt_state is not None:
            mto.restore_from_modelopt_state(model, modelopt_state)
    else:
        # Sharded
        sharded_load_dir, _ = get_sharded_load_dir(load_dir)
        if sharded_load_dir is None:
            print_rank_0("No sharded checkpoint found. Skipping loading modelopt_state.")
            return
        restore_sharded_modelopt_state([model], sharded_load_dir)


def load_modelopt_checkpoint(
    model,
    optimizer=None,
    opt_param_scheduler=None,
    strict: bool = True,
    additional_sharded_prefix: str = "",
    load_arg: str = "load",
) -> None:
    """Load a sharded (untar .nemo or megatron --use-dist-ckpt) or unsharded checkpoint.

    Essentially, the function is detecting whether the checkpoint is a .nemo sharded checkpoint.
    If so, we load the sharded state_dict with additional_sharded_prefix `model.`.
    This additional prefix is tha artifact of the lightning module wrapper. Once the sharded
    state_dict is loaded, we use a state_dict pre_hook to pop this additional prefix (`model.`)
    from all state_dict keys.

    If this is not a .nemo sharded checkpoint, then this function will simply call
    load_checkpoint. See megatron.checkpointing.load_checkpoint for explanation.

    Args:
        additional_sharded_prefix: append additional prefix to align the sharded checkpoint keys.
            When loading an .nemo sharded checkpoint, this is usually `model.`. Otherwise, this is
            typically an empty string.
    """

    def _remove_prefix_state_dict_pre_hook(
        state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
    ):
        """Pytorch state_dict pre_hook to remove prefix of the state_dict keys."""
        if additional_sharded_prefix is None:
            return
        key_rewrite_list = []
        for key, _ in state_dict.items():
            if key.startswith(additional_sharded_prefix):
                key_rewrite_list.append(key)
        for old_key in key_rewrite_list:
            new_key = old_key[len(additional_sharded_prefix) :]
            state_dict[new_key] = state_dict.pop(old_key)

    args = get_args()
    load_dir = getattr(args, load_arg)
    sharded_load_dir, additional_sharded_prefix = get_sharded_load_dir(load_dir)

    unwrapped_model = unwrap_model(model)

    if args.ckpt_format == "torch":
        state_dict, checkpoint_name, release, ckpt_type = _load_base_checkpoint(
            load_dir, args, rank0=False
        )
        model_state_dict = state_dict["model"]
        unwrapped_model[0].load_state_dict(model_state_dict, strict=False)
        print_distributed_quant_summary(unwrapped_model[0])
    elif sharded_load_dir is not None and optimizer is None and opt_param_scheduler is None:
        sharded_state_dict_metadata = dist_checkpointing.load_content_metadata(sharded_load_dir)
        sharded_state_dict = unwrapped_model[0].sharded_state_dict(
            prefix=additional_sharded_prefix, metadata=sharded_state_dict_metadata
        )

        if additional_sharded_prefix:
            unwrapped_model[0]._register_load_state_dict_pre_hook(
                _remove_prefix_state_dict_pre_hook
            )
        model_state_dict = dist_checkpointing.load(
            sharded_state_dict, sharded_load_dir, strict=args.dist_ckpt_strictness
        )
        unwrapped_model[0].load_state_dict(model_state_dict, strict=False)
        print_distributed_quant_summary(unwrapped_model[0])
    else:
        _ = load_checkpoint(model, optimizer, opt_param_scheduler, strict=strict, load_arg=load_arg)

================================================
FILE: megatron/post_training/generate.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from typing import List, Optional

import torch
from tqdm import tqdm

from megatron.core import mpu
from megatron.core.inference.communication_utils import broadcast_from_last_pipeline_stage
from megatron.core.pipeline_parallel import get_forward_backward_func
from megatron.core.tensor_parallel.mappings import gather_from_tensor_model_parallel_region
from megatron.post_training.utils import get_current_memory_info


def simple_generate(
    model,
    input_ids: torch.Tensor,
    images: Optional[torch.Tensor] = None,
    osl: int = 32,
    eos_token_id: List[int] = [],
    disable_tqdm: bool = False,
    calibration_mode: bool = False,
):
    """A simple generate function without using KV-cache."""
    model.eval()

    def _dummy_loss_func(output_tensor, non_loss_data=True):
        return output_tensor

    def _forward_step_func(data, model):
        batch_size = data["tokens"].shape[0]
        seq_len = data["tokens"].shape[-1]
        device = data["tokens"].device

        attention_mask = (
            torch.triu(torch.ones((batch_size, seq_len, seq_len), device=device), diagonal=1)
            .bool()
            .view(batch_size, 1, seq_len, seq_len)
        )
        position_ids = torch.arange(
            data["tokens"].shape[0], dtype=torch.long, device=data["tokens"].device
        )
        output_tensor = model(data["tokens"], position_ids, attention_mask)
        return output_tensor, _dummy_loss_func

    disable_tqdm = disable_tqdm or torch.distributed.get_rank() > 0

    output_ids = None
    step_pbar = tqdm(range(osl), disable=disable_tqdm, leave=False)

    for step in step_pbar:
        step_pbar.set_description(get_current_memory_info())

        # When --sequence-parallel is used, sequence_len must be a multiple of
        # --tensor-parallel. We pad eos tokens on the left to be multiple of 32.
        num_pad_tokens = input_ids.shape[-1] % 32

        if num_pad_tokens > 0:
            num_pad_tokens = 32 - num_pad_tokens
            padding_shape = (input_ids.shape[0], num_pad_tokens)
            padded_tokens = torch.full(
                padding_shape, 0, dtype=input_ids.dtype, device=input_ids.device
            )
            tokens = torch.cat((input_ids, padded_tokens), dim=-1)
        else:
            tokens = input_ids

        logits_and_extras = get_forward_backward_func()(
            forward_step_func=_forward_step_func,
            data_iterator=[{"tokens": tokens}],
            model=model,
            num_microbatches=1,
            seq_length=tokens.shape[-1],
            micro_batch_size=tokens.shape[0],
            decoder_seq_length=tokens.shape[-1],
            forward_only=True,
            collect_non_loss_data=True,
        )

        if calibration_mode:
            continue  # avoid unnecessary computation

        if mpu.is_pipeline_last_stage():
            logits = gather_from_tensor_model_parallel_region(logits_and_extras[0])
            eager_ids = logits[:, input_ids.shape[-1] - 1, :].argmax(dim=-1, keepdim=True).detach()
        else:
            eager_ids = None

        eager_ids = broadcast_from_last_pipeline_stage(
            [input_ids.shape[0], 1], input_ids.dtype, eager_ids
        )

        input_ids = torch.cat([input_ids, eager_ids], dim=-1)

        if output_ids is None:
            output_ids = eager_ids
        else:
            output_ids = torch.cat([output_ids, eager_ids], dim=-1)

        if eager_ids.item() in eos_token_id:
            break

    return output_ids


def simple_speculative_generate(
    model,
    input_ids: torch.Tensor,
    images: Optional[torch.Tensor] = None,
    osl: int = 32,
    steps: int = 0,
    eos_token_id: List[int] = [],
    disable_tqdm: bool = False,
):
    """A simple generate function without using KV-cache."""
    output_ids = simple_generate(
        model,
        input_ids,
        images=images,
        osl=osl,
        eos_token_id=eos_token_id,
        disable_tqdm=disable_tqdm,
    )
    output_ids = torch.cat((input_ids, output_ids), dim=-1)
    actual_osl = output_ids.shape[-1] - input_ids.shape[-1]

    total_steps = 0
    while input_ids.shape[-1] < output_ids.shape[-1]:
        total_steps += 1
        offset = input_ids.shape[-1] + 1

        # Speculative decoding forward
        # NOTE: PP is not yet supported.
        new_token, draft_tokens = model.pseudo_speculative_generate(input_ids, steps=steps)

        # Always accept the first token.
        input_ids = output_ids[:, : offset]

        if input_ids.shape[-1] >= output_ids.shape[-1]:
            break

        for i in range(draft_tokens.shape[-1]):
            if torch.equal(draft_tokens[:, i : i + 1], output_ids[:, offset: offset + 1]):
                offset += 1
            else:
                break

        # Broadcast the accepted offset from the last rank.
        offset = [offset]
        torch.distributed.broadcast_object_list(
            offset,
            src=torch.distributed.get_world_size() - 1,
        )

        input_ids = output_ids[:, : offset[0]]

    return output_ids, actual_osl, total_steps


================================================
FILE: megatron/post_training/loss_func.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.

"""Pretrain GPT loss function(s)."""

import torch

from megatron.core import parallel_state
from megatron.core.models.gpt import GPTModel
from megatron.training import get_args
from megatron.training.utils import unwrap_model


def _mask_loss(output_tensor, loss_mask):
    """Apply mask to the unreduced loss tensor."""
    args = get_args()

    if isinstance(output_tensor, tuple):
        # Special distillation flags indicating whether to perform additional tensor-parallel adjustments.
        output_tensor, tp_reduce, is_sequence_parallel = output_tensor
    else:
        tp_reduce, is_sequence_parallel = False, False

    if is_sequence_parallel:
        # Sequence-parallel tensor derived from intermediate activation - need to split loss mask.
        idx = parallel_state.get_tensor_model_parallel_rank()
        loss_mask = torch.tensor_split(loss_mask, args.tensor_model_parallel_size, dim=1)[idx]

    losses = output_tensor.view(-1).float()
    loss_mask = loss_mask.reshape(-1).float()
    loss = torch.sum(losses * loss_mask)

    if tp_reduce or is_sequence_parallel:
        # Losses on parallel tensors require extra all-reduce to sync across MP ranks.
        torch.distributed.all_reduce(loss, group=parallel_state.get_tensor_model_parallel_group())

    return loss


def loss_func(loss_mask: torch.Tensor, output_tensor: torch.Tensor, model: GPTModel):
    """Loss function (with KD Loss support).

    Args:
        loss_mask (Tensor): Used to mask out some portions of the loss
        output_tensor (Tensor): The tensor with the losses
        model (GPTModel): The model (can be wrapped)
    """
    args = get_args()

    # Unwrap for both Distillation and LANA
    model = unwrap_model(model)

    # Standard lm loss
    loss_lm = _mask_loss(output_tensor, loss_mask)
    loss = loss_lm
    num_tokens = loss_mask.sum().clone().detach().to(torch.int)
    report = {'lm loss': torch.cat([loss_lm.clone().detach().view(1), num_tokens.view(1)])}

    if args.export_kd_teacher_load:
        # [ModelOpt]: Handle knowledge distillation
        losses = model.compute_kd_loss(
            student_loss=loss_lm,
            loss_reduction_fn=lambda x: _mask_loss(x, loss_mask),
        )

        report["total loss"] = torch.cat([losses["kd_loss"].clone().detach().view(1), num_tokens.view(1)])
        report["logits distillation loss"] = torch.cat([losses["logits_loss"].clone().detach().view(1), num_tokens.view(1)])
        report["intermediate distillation loss"] = torch.cat([losses["intermediate_loss"].clone().detach().view(1), num_tokens.view(1)])

        if model.training:
            loss = losses["kd_loss"]

    return loss, num_tokens, report


================================================
FILE: megatron/post_training/model_builder.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""ModelOpt GPT model provider."""

import logging
import os
from argparse import Namespace
from typing import Any, Dict

import modelopt.torch.distill as mtd
import modelopt.torch.distill.plugins.megatron as mtd_mcore
import modelopt.torch.opt as mto
import yaml

from megatron.core.models.gpt import GPTModel as MCoreGPTModel
from megatron.core.models.gpt.heterogeneous.heterogeneous_layer_specs import (
    get_gpt_heterogeneous_layer_spec,
)
from megatron.core.models.mamba import MambaModel as MCoreMambaModel
from megatron.core.post_training.modelopt.gpt.model_specs import get_gpt_modelopt_spec
from megatron.core.post_training.modelopt.gpt.state_dict_hooks import (
    mcore_gpt_load_te_state_dict_pre_hook,
)
from megatron.post_training.checkpointing import load_modelopt_checkpoint, load_modelopt_state
from megatron.training import get_args, print_rank_0
from megatron.training.arguments import core_transformer_config_from_args

from megatron.post_training.utils import print_distributed_quant_summary


def count_parameters_in_layer(model, layer_name):
    num_params = 0
    for name, param in model.named_parameters():
        if layer_name in name:
            num_params += param.numel()
            print_rank_0(f" - {name}: {param.numel()}")
    return num_params


def _add_load_convert_hooks(model: MCoreGPTModel):
    """Register some load_state_dict prehooks to handle some known state_dict key mismatch.
    """
    args = get_args()
    if args.export_te_mcore_model:
        model._register_load_state_dict_pre_hook(mcore_gpt_load_te_state_dict_pre_hook)


def _load_teacher_model_config(checkpoint_path: str) -> Namespace:
    """Reads teacher config from a file.

    The config provided, either in the teacher checkpoint dir or via `--export-kd-teacher-model-config`,
    should specify (in NeMo yaml config format) any model architecture settings which differ from the main student model's.
    This function will translate NeMo field names to MCore as needed.
    """
    required_teacher_fields = (
        "num_layers",
        "hidden_size",
        "ffn_hidden_size",
        "num_attention_heads",
    )

    args = get_args()
    if args.export_kd_teacher_model_config is not None:
        config_path = args.export_kd_teacher_model_config
    else:
        config_path = os.path.join(checkpoint_path, "model_config.yaml")
    if not os.path.exists(config_path):
        raise FileNotFoundError(
            f"Teacher model-config file {config_path} not found.\n"
            "Teacher checkpoint dir must contain a NeMo-format config named 'model_config.yaml'"
            " or provide it via --export-kd-teacher-model-config."
        )
    with open(config_path) as f:
        config = yaml.safe_load(f)

    if missing_keys := [k for k in required_teacher_fields if k not in config]:
        raise ValueError(
            f"Teacher model config file ({config_path}) missing the following required fields: {missing_keys}"
        )

    if "encoder_seq_length" in config:
        config["seq_length"] = config["encoder_seq_length"]
    if "bias" in config:
        config["disable_bias_linear"] = not config["bias"]
    if config.get("activation") == "swiglu":
        config["swiglu"] = True
    if config.get("position_embedding_type", False) is None:
        config["use_rotary_position_embeddings"] = config["no_position_embedding"] = True
    if "share_embeddings_and_output_weights" in config:
        config["untie_embeddings_and_output_weights"] = not config[
            "share_embeddings_and_output_weights"
        ]
    if "tokenizer" in config:
        config["tokenizer_type"] = config["tokenizer"]["type"]
        config["tokenizer_model"] = config["tokenizer"]["model"]
    if "masked_softmax_fusion" in config:
        config["no_masked_softmax_fusion"] = not config["masked_softmax_fusion"]
    if config.get("normalization") == "layernorm1p":
        config["apply_layernorm_1p"] = True
    if "precision" in config:
        config[config["precision"]] = True
    if "mcore_gpt" in config:
        config["use_mcore_models"] = config["mcore_gpt"]

    args_dict = vars(get_args()).copy()
    del args_dict["kv_channels"]  # not recalculated if present
    args_dict.update(config)

    # Backward compat: old checkpoints have hybrid_override_pattern but not hybrid_layer_pattern
    if (args_dict.get('hybrid_override_pattern') is not None
            and args_dict.get('hybrid_layer_pattern') is None):
        args_dict['hybrid_layer_pattern'] = args_dict['hybrid_override_pattern']

    return Namespace(**args_dict)


def _load_teacher_model(config, config_raw: Namespace, model_kwargs: Dict[str, Any]) -> MCoreGPTModel:
    """Teacher model creator."""
    args = get_args()

    if config.is_hybrid_model:
        # This parameter is not part of the TransformerConfig and needs to be passed separately.
        # Note: hybrid_override_pattern is remapped to hybrid_layer_pattern in
        # _load_teacher_model_config, so config_raw.hybrid_layer_pattern is always set here.
        model_kwargs["hybrid_layer_pattern"] = config_raw.hybrid_layer_pattern

        teacher = MCoreMambaModel(config=config, **model_kwargs)
    else:
        # GPT layer spec needs re-creation since it depends on number of model layers.
        if config.heterogeneous_block_specs:
            model_kwargs["transformer_layer_spec"] = get_gpt_heterogeneous_layer_spec(
                config=config,
                use_te=(args.transformer_impl == "transformer_engine"),
            )
        else:
            model_kwargs["transformer_layer_spec"] = get_gpt_modelopt_spec(
                config=config,
                local_core_attention=False if config.context_parallel_size > 1 else args.export_force_local_attention,
                remap_te_layernorm=args.export_te_mcore_model,
                real_quant_cfg=args.export_real_quant_cfg,
                use_arbitrary_attention_mask=False,
            )
        teacher = MCoreGPTModel(config=config, **model_kwargs)
    _add_load_convert_hooks(teacher)

    print_rank_0(f"Loading teacher as {type(teacher).__name__} from {args.export_kd_teacher_load} ...")
    # [WAR]: load checkpoint will check checkpoint's saved args and rng state if not finetune.
    # To avoid error out on loading teacher's checkpoint, we temporarily set args.finetune to
    # True while loading the teacher checkpoint.
    original_args_finetune, original_ckpt_format = args.finetune, args.ckpt_format
    args.finetune = True
    if args.export_kd_teacher_ckpt_format is not None:
        args.ckpt_format = args.export_kd_teacher_ckpt_format
    load_modelopt_checkpoint([teacher], load_arg='export_kd_teacher_load')
    args.finetune, args.ckpt_format = original_args_finetune, original_ckpt_format
    print_rank_0("...teacher loaded successfully.")

    return teacher


def modelopt_gpt_mamba_builder(
    args,
    pre_process,
    post_process,
    vp_stage=None,
    config=None,
    pg_collection=None,
) -> MCoreGPTModel | MCoreMambaModel:
    """Builds the model.

    Args:
        args (Namespace): The arguments namespace.
        pre_process (bool, optional): Set to true if you need to compute embedings. Defaults to True.
        post_process (bool, optional): Set to true if you need to want to compute output logits/loss. Defaults to True.
        vp_stage (int, optional): The virtual pipeline stage.
        config (TransformerConfig, optional): The configuration object.
        pg_collection (ProcessGroupCollection, optional): Collection of process groups
            used for tensor/context/pipeline/data parallelism. If provided, it will be
            attached to the returned model for downstream routing/resharding utilities.

    Returns:
        MCoreGPTModel | MCoreMambaModel: The returned model
    """
    print_rank_0("building GPT model ...")

    # ModelOpt by default assumes none homogenous layers. This affect the storage format of the sharded checkpoint.
    config = core_transformer_config_from_args(args)

    # Handle GPT-OSS mode with YaRN RoPE configuration
    if hasattr(args, 'enable_gpt_oss') and args.enable_gpt_oss:
        print_rank_0("GPT-OSS mode enabled: Configuring YaRN RoPE parameters")

        # Set GPT-OSS YaRN values directly on the config
        # These defaults are based on Huggingface GPT-OSS configurations
        config.position_embedding_type = "yarn"
        config.yarn_rotary_scaling_factor = 32.0
        config.yarn_original_max_position_embeddings = 131072
        config.yarn_beta_fast = 32.0
        config.yarn_beta_slow = 1.0
        config.yarn_mscale = 1.0
        config.yarn_mscale_all_dim = 0.0
        config.yarn_correction_range_round_to_int = False

    if vp_stage is not None:
        raise ValueError("ModelOpt integration does not currently support virtual pipeline parallel.")
    if args.use_legacy_models:
        raise ValueError(
            "ModelOpt integration only support MCore models. Use --use-mcore-modules instead."
        )
    if args.spec is not None:
        raise ValueError("ModelOpt integration does not support custom args.spec.")

    # Llama-4 Scout/Maverick support
    config.qk_l2_norm = args.export_qk_l2_norm
    config.moe_apply_probs_on_input = args.export_moe_apply_probs_on_input

    if args.export_model_type == "GPTModel":
        if args.export_offline_model:
            # Record the original num_layers. This is needed for _set_default_aux_hidden_state_layers
            config.original_num_layers = config.num_layers
            # Set num_layers to 0 for base model in offline mode
            config.num_layers = 0
            # SP is not used for offline
            # TODO: DSR1 MTP may require SP
            config.sequence_parallel = False
        if config.heterogeneous_block_specs:
            transformer_layer_spec = get_gpt_heterogeneous_layer_spec(
                config=config,
                use_te=args.transformer_impl == "transformer_engine",
            )
        else:
            if config.context_parallel_size > 1:
                print_rank_0("context_parallel_size > 1! Force using TEDotProductAttention!")
                local_core_attention=False
            else:
                local_core_attention=args.export_force_local_attention

            transformer_layer_spec = get_gpt_modelopt_spec(
                config=config,
                local_core_attention=local_core_attention,
                remap_te_layernorm=args.export_te_mcore_model,
                real_quant_cfg=args.export_real_quant_cfg,
                use_arbitrary_attention_mask=False,
            )

        model_kwargs = {
            "transformer_layer_spec": transformer_layer_spec,
            "vocab_size": args.padded_vocab_size,
            "max_sequence_length": args.max_position_embeddings,
            "pre_process": pre_process,
            "post_process": post_process,
            "fp16_lm_cross_entropy": args.fp16_lm_cross_entropy,
            "parallel_output": True,
            "share_embeddings_and_output_weights": not args.untie_embeddings_and_output_weights,
            "position_embedding_type": args.position_embedding_type,
            "rotary_percent": args.rotary_percent,
            "rotary_base": args.rotary_base,
            "rope_scaling": args.use_rope_scaling,
            "pg_collection": pg_collection,
        }
        model = MCoreGPTModel(config=config, **model_kwargs)
    elif args.export_model_type == "MambaModel" or getattr(args, 'hybrid_layer_pattern', None) is not None:
        from megatron.core.post_training.modelopt.mamba.model_specs import get_mamba_stack_modelopt_spec

        if args.export_default_te_spec and args.export_te_mcore_model:
            logging.getLogger(__name__).warning(
                "--export-default-te-spec and --export-te-mcore-model are mutually exclusive. "
                "Since --export-default-te-spec is given, --export-te-mcore-model will be disabled."
            )
            args.export_te_mcore_model = False

        mamba_stack_spec = get_mamba_stack_modelopt_spec(
            remap_te_layernorm=args.export_te_mcore_model,
            use_default_te_spec=args.export_default_te_spec,
        )
        model_kwargs = {
            "mamba_stack_spec": mamba_stack_spec,
            "vocab_size": args.padded_vocab_size,
            "max_sequence_length": args.max_position_embeddings,
            "hybrid_layer_pattern": args.hybrid_layer_pattern,
            "pre_process": pre_process,
            "post_process": post_process,
            "fp16_lm_cross_entropy": args.fp16_lm_cross_entropy,
            "parallel_output": True,
            "share_embeddings_and_output_weights": not args.untie_embeddings_and_output_weights,
            "position_embedding_type": args.position_embedding_type,
            "rotary_percent": args.rotary_percent,
            "rotary_base": args.rotary_base,
            "pg_collection": pg_collection,
        }

        model = MCoreMambaModel(config=config, **model_kwargs)

        for l in range(model.decoder.num_layers_per_pipeline_rank):
            layer_params = count_parameters_in_layer(model, f'decoder.layers.{l}.')
            print_rank_0(f" == params layer {l}: {layer_params}")

    else:
        raise ValueError("ModelOpt does not support model type {}".format(args.export_model_type))

    # [IMPORTANT] Load modelopt_state immediately before returning the model back to `get_model()`.
    #
    # ModelOpt can create additional trainable parameters (e.g. for online speculative
    # decoding training or PEFT). Hence resuming modelopt_state during checkpoint loading is already
    # too late since Megatron created the optimizer right after calling model_provider before loading
    # the checkpoint. To ensure all trainable parameters are reigistered, we try to resume the
    # modelopt_state (which transforms the model to have additional parameters) before returning.
    if args.load is not None:
        load_modelopt_state(model=model)

    _add_load_convert_hooks(model)

    # Distillation mode.
    if args.export_kd_teacher_load:
        print_rank_0("Distillation: Enabled.")

        # NOTE: Unknown memory leak occuring per fwd-bwd pass if model
        # is converted to a `modelopt.torch.opt.DynamicModule`.
        # Argument `--manual-gc` can result in an eventual OOM.
        assert (
            not args.manual_gc
        ), "ModelOpt Distillation currently incompatible with `--manual-gc` option."
        assert (
            not args.tp_comm_overlap
        ), "ModelOpt Distillation currently incompatible with `--tp-comm-overlap` option."
        assert (
            args.cross_entropy_fusion_impl != "te"
        ), "ModelOpt Distillation currently incompatible with TransformerEngine Cross-Entropy implementation."
        if args.pipeline_model_parallel_size > 1:
            assert (
                args.virtual_pipeline_model_parallel_size is None
            ), "ModelOpt Distillation currently incompatible with interleaved pipeline schedule."

        teacher_config_raw = _load_teacher_model_config(args.export_kd_teacher_load)
        teacher_config = core_transformer_config_from_args(teacher_config_raw)  # convert to TransformerConfig

        distill_cfg = mtd_mcore.setup_distillation_config(
            args.export_kd_cfg, student_cfg=config, teacher_cfg=teacher_config
        )
        kd_config = {
            "teacher_model": _load_teacher_model(teacher_config, teacher_config_raw, model_kwargs),
            "criterion": distill_cfg.criterion,
            "loss_balancer": distill_cfg.loss_balancer,
        }
        model = mtd.convert(model, mode=[("kd_loss", kd_config)])

        # Additional tweaks needed for MCore.
        # (accounts for sharded state, pipeline parallel, and potentially skipping LM loss)
        mtd_mcore.adjust_distillation_model_for_mcore(model, distill_cfg)
        # Also remove KD mode state to prevent issues with re-conversion after restore.
        mto.ModeloptStateManager(model).state_dict().pop()  # TODO(aanoosheh): remove once fixed in ModelOpt
    
    print_distributed_quant_summary(model)
    return model


================================================
FILE: megatron/post_training/non_loss_data_func.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import torch

from megatron.post_training.generate import simple_speculative_generate
from megatron.post_training.utils import get_mtbench_chat_data
from megatron.training import get_tokenizer
from megatron.training.utils import unwrap_model


def report_draft_acceptance_length(model, osl: int = 64, draft_steps: int = 7):
    """Report MTBench acceptance length."""
    tokenizer = get_tokenizer()._tokenizer
    unwrapped_model = unwrap_model(model)[0]
    parallel_draft_step = unwrapped_model.eagle_config.parallel_draft_step if hasattr(unwrapped_model, "eagle_config") else 1

    if unwrapped_model.training:
        return
    if not hasattr(unwrapped_model, "pseudo_speculative_generate"):
        return

    dataset = get_mtbench_chat_data()

    category_and_prompt = {}

    for example in dataset:
        if example["category"] not in category_and_prompt:
            category_and_prompt[example["category"]] = [example["conversations"][0]]

    total_osl = 0
    total_steps = 0
    for category, conversations in category_and_prompt.items():
        input_ids = tokenizer.apply_chat_template(
            conversations, return_tensors="pt", add_generation_prompt=True
        ).to(torch.cuda.current_device())
        output_ids, actual_osl, steps = simple_speculative_generate(
            unwrapped_model, input_ids, osl=osl, steps=draft_steps, disable_tqdm=True
        )
        total_osl += actual_osl
        total_steps += steps
        if torch.distributed.get_rank() == 0:
            al = actual_osl / steps
            ar = al / (draft_steps + parallel_draft_step)
            print(
                "Rank {:3}/{:3} {:12} AL {:.1f} AR {:.2f} STEPS {:5}/{:5} DRAFT {:2} PARALLEL {:2}".format(
                    torch.distributed.get_rank(),
                    torch.distributed.get_world_size(),
                    category,
                    al,
                    ar,
                    steps,
                    actual_osl,
                    draft_steps,
                    parallel_draft_step,
                ),
                flush=True,
            )
    if torch.distributed.get_rank() == 0:
        al = total_osl / total_steps
        ar = al / (draft_steps + parallel_draft_step)
        print(
            "Rank {:3}/{:3} {:12} AL {:.1f} AR {:.2f} STEPS {:5}/{:5} DRAFT {:2} PARALLEL {:2}".format(
                torch.distributed.get_rank(),
                torch.distributed.get_world_size(),
                "average",
                al,
                ar,
                total_steps,
                total_osl,
                draft_steps,
                parallel_draft_step,
            ),
            flush=True,
        )
    torch.distributed.barrier()


================================================
FILE: megatron/post_training/utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import inspect
import os

import modelopt
import modelopt.torch.quantization as mtq
import torch
from modelopt.torch.quantization.utils import is_quantized
from packaging.version import Version

from megatron.core import parallel_state
from megatron.training.utils import unwrap_model


def modelopt_version_higher_than(target_version: str):
    """Check if Model-Optimizer is greater than this version."""
    info = "rank {:3}/{:3} checking if nvidia-modelopt-{} is higher than {}".format(
        torch.distributed.get_rank(),
        torch.distributed.get_world_size(),
        str(modelopt.__version__),
        target_version,
    )
    print(info)
    return Version(modelopt.__version__) > Version(target_version)

def modelopt_version_at_least(target_version: str):
    """Check if Model-Optimizer is greater or equal than this version."""
    info = "rank {:3}/{:3} checking if nvidia-modelopt-{} is at least {}".format(
        torch.distributed.get_rank(),
        torch.distributed.get_world_size(),
        str(modelopt.__version__),
        target_version,
    )
    print(info)
    return Version(modelopt.__version__) >= Version(target_version)


def function_has_parameter(function, argument_name: str) -> bool:
    """Check if a function has a specific argument."""
    sig = inspect.signature(function)
    return argument_name in sig.parameters

def get_current_memory_info():
    """Get current memory usage."""
    remaining_mem, total_mem = torch.cuda.mem_get_info()
    info = "rank {:3}/{:3}  memory remaining {:03}% ({}/{} MB) ".format(
        torch.distributed.get_rank(),
        torch.distributed.get_world_size(),
        int(remaining_mem * 100 / total_mem),
        remaining_mem // 1048576,
        total_mem // 1048576,
    )
    return info


def report_current_memory_info():
    """Report current memory usage."""
    print(get_current_memory_info(), flush=True)
    torch.distributed.barrier()


def get_mtbench_chat_data():
    """Return a MTBench dataset."""
    from datasets import load_dataset

    def mtbench_to_oai_chat(example):
        """Convert MTBench data to OpenAI chat completion format."""
        conversations = []
        for prompt in example["prompt"]:
            conversations.append({"role": "user", "content": prompt})
        example["conversations"] = conversations
        return example

    dataset = load_dataset(
        "HuggingFaceH4/mt_bench_prompts", split="train", token=os.environ.get("HF_TOKEN", None)
    )
    return dataset.map(mtbench_to_oai_chat)


def to_empty_if_meta(module: torch.nn.Module, *, device: torch.device, recurse=True):
    """Move tensors to device if not meta device; otherwise materialize with empty_like().

    Args:
        module: The target module to apply this transformation.
        device: The desired device of the parameters
            and buffers in this module.
        recurse: Whether parameters and buffers of submodules should
            be recursively moved to the specified device.
    """

    def _empty_like_if_meta(tensor: torch.Tensor, *, device: torch.device):
        if tensor.device == torch.device("meta"):
            return torch.empty_like(tensor, device=device)
        else:
            return tensor.to(device)

    module._apply(lambda t: _empty_like_if_meta(t, device=device), recurse=recurse)


def print_distributed_quant_summary(model, msg=""):
    from megatron.core import parallel_state
    from megatron.training import print_rank_0
    from megatron.training.utils import unwrap_model

    unwrapped_model = unwrap_model(model)
    if isinstance(unwrapped_model, list):
        unwrapped_model = unwrapped_model[0]

    if not is_quantized(unwrapped_model):
        return

    print_rank_0(f"{msg}\nQuantization summary of unwrapped model: {unwrapped_model}\n{'_'*80}")

    if not torch.distributed.is_initialized():
        mtq.print_quant_summary(unwrapped_model)
        return

    # Only print from unique TP ranks of [0, 1]
    if parallel_state.get_data_parallel_rank(
        with_context_parallel=True
    ) == 0 and parallel_state.get_tensor_model_parallel_rank() in [0, 1]:
        TP_rank = parallel_state.get_tensor_model_parallel_rank()
        EP_rank = parallel_state.get_expert_model_parallel_rank()
        PP_rank = parallel_state.get_pipeline_model_parallel_rank()
        print(f"\nTP rank {TP_rank}, EP rank {EP_rank}, PP rank {PP_rank}")
        print("_" * 80)
        mtq.print_quant_summary(unwrapped_model)


================================================
FILE: megatron/rl/README.md
================================================
# Megatron-RL

## Status
08/27/2025: Megatron-RL is actively under development. While it is functional internally at NVIDIA, it is not yet usable by external users because not all required code has been released. The available code and examples may change as development progresses. For a current roadmap of planned Megatron-RL features please see [#1776](https://github.com/NVIDIA/Megatron-LM/issues/1776).

## Overview
Megatron-RL is adding native reinforcement learning (RL) based post-training to Megatron-LM. It provides a flexible library for defining RL environments and agents, extending the Megatron-LM training loop with RL algorithm support.

The bulk of the new library code is located in `megatron/rl`. However:
- Significant modifications have been made to the Megatron Core inference code.
- Minor changes were made to the Megatron-LM training loop to enable Megatron-RL.

Example environments for Megatron-RL can be found in `examples/rl`.

Megatron-RL is designed for research teams exploring RL post-training of LLMs at scale on state-of-the-art foundation models with cutting-edge performance on the latest NVIDIA hardware.

It is **not** intended as an enterprise framework and won't necessarily provide out-of-the-box support for any given open model. For those capabilities please refer to [Nemo RL](https://github.com/NVIDIA-NeMo/RL).

## Design

The design philosophy of Megatron RL is to keep the agent/environment design as decoupled as possible from the underlying RL implementation.
- The environment design defines the "Agent Environment" which takes as input a handle to an inference interface (i.e. something supporting `.generate(prompt, **generation_args)`) and must return experience rollouts along with rewards.
- The RL training system handles batching inference requests, hosting inference, training and other orchestration tasks.

Below we describe the different conceptual components and how they divide responsibility.

### Agent and Environment (referred to as an Agent)
- Takes a handle to an `InferenceInterface`.
- Returns `Rollout` or `EvaluationResponse` objects.
- Responsible for sampling parameters, custom generation arguments (e.g., stop conditions, inline evaluation), etc.

### Trainer/Evaluator
- Manages the control flow for rollout generation and evaluation.
- Coordinates with (or creates) the `InferenceInterface` and `Agent`s.

### Inference Interface
- Provides the endpoint the `AgenticEnvironment` uses to run `.generate(prompt, **generation_args)`.
- Can take many forms (e.g. Megatron, OpenAI, HF) and supports many configuration options.

## Examples
See `examples/rl` for demonstrations of:
- Custom `InferenceInterface` endpoints
- Example `Agents`

================================================
FILE: megatron/rl/__init__.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import asyncio
import functools
import importlib
import os
import sys
import time
import traceback
from typing import Callable, Coroutine, Type

from pydantic import BaseModel, ConfigDict, Field
from typing_extensions import Self, Type


def import_class(class_path: str) -> Type:
    """Import a class from a string path.

    Args:
        class_path: String path to the class (e.g. 'examples.rl.environments.countdown.countdown_agent.CountdownAgent' or '../environments.countdown.py:CountdownAgent')

    Returns:
        The class object
    """
    if '.py:' in class_path:
        # filepath.py:Classname branch.
        module_path, class_name = class_path.split(':')
        abs_path = os.path.abspath(module_path)
        spec = importlib.util.spec_from_file_location('acemath_agent', abs_path)
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)
    else:
        module_path, class_name = class_path.rsplit('.', 1)
        module = importlib.import_module(module_path, package=__package__)
    return getattr(module, class_name)


class TypeLookupable(BaseModel, extra='allow'):
    """Supports 'unwrapping' of base class into subclasses."""

    type_name: str = Field('Null', frozen=True)

    def unwrap(self) -> Self:
        """Turn instance of base class into registered subclass."""
        return type(self).Library.type_names[self.type_name](**self.model_dump())

    @classmethod
    def register_subclass(cls, register_type: Type[Self]) -> Type[Self]:
        """Register subclass for unwrapping."""
        if 'Library' not in cls.__dict__:

            class Library:
                type_names = {}

            cls.Library = Library
        cls.Library.type_names[register_type.__fields__['type_name'].default] = register_type
        return register_type


class GenericGenerationArgs(BaseModel):
    """Generic generation arguments."""

    model_config = ConfigDict(frozen=True)
    temperature: float | None = None
    top_k: int | None = None
    top_p: float | None = None
    max_tokens: int | None = None

    def add(self, generation_args: 'GenericGenerationArgs') -> 'GenericGenerationArgs':
        return GenericGenerationArgs.model_validate(
            {**self.model_dump(), **generation_args.model_dump(exclude_unset=True)}
        )


class Request(BaseModel):
    """Generation Request."""

    generation_args: GenericGenerationArgs = GenericGenerationArgs()


================================================
FILE: megatron/rl/agent/__init__.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from .pass_at_evaluation_agent import PassAtEvaluationResponse
from .reward_only_agent import RewardOnlyEvaluationResponse


================================================
FILE: megatron/rl/agent/api.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import asyncio
import logging
from abc import ABC, abstractmethod
from collections.abc import AsyncIterable
from typing import Generic, TypeVar

import numpy as np
from pydantic import BaseModel

from megatron.core.inference.utils import asyncio_Queue, asyncio_QueueShutDown
from megatron.core.utils import trace_async_exceptions

from ..__init__ import Request, TypeLookupable
from ..inference import (
    InferenceInterface,
    LLMChatMessage,
    ReturnsRaw,
)


class AgentBaseModel(BaseModel, extra='allow'):
    pass


class RolloutRequest(Request):
    """Request to agent to generate Rollouts."""

    num_rollouts: int
    inference_interface: InferenceInterface
    validation: bool = False


class GroupedRolloutRequest(Request):
    """Request to agent to generate grouped Rollouts."""

    num_groups: int
    rollouts_per_group: int
    inference_interface: InferenceInterface
    validation: bool = False
    filter_groups_with_same_reward: bool = False
    streaming: bool = False
    enforce_order: bool = False


class Rollout(AgentBaseModel):
    """Data for language-based Rollout."""

    trajectory: list[str]
    prompt_length: list[int] | None = None
    reward: float = None
    env_id: str = ''
    problem_id: str | None = None
    policy_epoch: list[list[tuple[int, int]]]
    kv_cache_epoch: list[list[tuple[int, int]]]
    num_evictions: list[int]


class TokenRollout(AgentBaseModel):
    """Tokenized representation of a language-based Rollout."""

    trajectory: list[list[int]]
    reward: list[float] | float
    generation_mask: list[list[bool]] | None = None
    logprobs: list[list[float]] | None = None
    env_id: str = ''
    problem_id: str | None = None
    policy_epoch: list[list[tuple[int, int]]]
    kv_cache_epoch: list[list[tuple[int, int]]]
    num_evictions: list[int]


Rollouts = list[TokenRollout | Rollout]


class RolloutGroup(AgentBaseModel):
    """A group of rollouts (e.g. multiple completions for one prompt) with batch metadata."""

    rollouts: Rollouts
    batch_id: int = 0
    index_in_batch: int = 0

    def __iter__(self):
        return iter(self.rollouts)

    def __len__(self):
        return len(self.rollouts)

    def __getitem__(self, idx):
        return self.rollouts[idx]


GroupedRollouts = list[RolloutGroup]


class ContrastiveRollout(AgentBaseModel):
    """Contrastive/Preference data for language-based Rollout."""

    chosen_trajectory: list[str]
    rejected_trajectory: list[str]


class Head2HeadRolloutRequest(Request):
    num_rollouts: int
    inference_interface: list[InferenceInterface]
    validation: bool = False


class EvaluationRequest(Request):
    """Request to evaluate N prompts, optionally distributed across ranks."""

    inference_interface: InferenceInterface
    num_prompts: int
    rank_info: tuple[int, int] | None = (
        None  # (rank, total_ranks) if distributed, None for full evaluation
    )
    validation: bool = True


class EvaluationResult(AgentBaseModel):
    prompt: str | list[LLMChatMessage]
    response: str | LLMChatMessage


class RewardEvaluationResult(EvaluationResult):
    reward: float
    problem_id: str | None = None


T = TypeVar('T', bound=EvaluationResult)


class EvaluationResponse(AgentBaseModel, TypeLookupable, Generic[T]):
    env_id: str
    results: list[T]

    def metrics(self):
        raise NotImplementedError(f"{type(self)} did not provide metric aggregation.")


class Agent(ABC, AgentBaseModel):
    pass


class RolloutGenerator(Agent, ABC):
    """An agent that produces Rollout objects containing rollout string and associated reward."""

    @abstractmethod
    async def rollout(self, request: RolloutRequest) -> Rollout: ...

    async def get_reward_rollouts(self, request: RolloutRequest) -> list[Rollout]:
        assert isinstance(
            request.inference_interface, ReturnsRaw
        ), "InferenceInterface must support raw_text return to provide rollouts."

        return await asyncio.gather(
            *[self.rollout(request=request) for _ in range(request.num_rollouts)]
        )


class ContrastiveRolloutGenerator(Agent, ABC):
    """An agent that produces ContrastiveRollout objects containing two rollout strings, one chosen and one rejected."""

    @abstractmethod
    async def get_contrastive_rollouts(
        self, request: RolloutRequest
    ) -> list[ContrastiveRollout]: ...


class TokenizedRolloutGenerator(Agent, ABC):
    """An agent that produces TokenRollout objects containing rollout token ids and associated rewards.

    Optionally can also provide generation masks to indicate which tokens were generated and token masks to indicate which
    tokens were possible at any given step.
    """

    @abstractmethod
    async def rollout(self, request: RolloutRequest) -> TokenRollout: ...

    async def get_reward_rollouts(self, request: RolloutRequest) -> list[TokenRollout]:
        assert isinstance(
            request.inference_interface, ReturnsRaw
        ), "InferenceInterface must support raw_text return to provide rollouts."

        return await asyncio.gather(
            *[self.rollout(request=request) for _ in range(request.num_rollouts)]
        )


class GroupedRolloutGenerator(Agent, ABC):
    """An interface to return grouped Rollout objects to support algorithms like GRPO."""

    parallel_generation_tasks: int = 512
    buffer_size: int = 10

    def __init__(self, *, parallel_generation_tasks: int | None = None, **kwargs):
        super().__init__(**kwargs)
        if parallel_generation_tasks is not None:
            self.parallel_generation_tasks = parallel_generation_tasks

    @abstractmethod
    async def group_rollout(self, request: GroupedRolloutRequest) -> list[Rollout]: ...

    async def get_grouped_rollouts(self, request: GroupedRolloutRequest):
        assert isinstance(
            request.inference_interface, ReturnsRaw
        ), "InferenceInterface must support raw_text return to provide rollouts."

        # When streaming, use buffer_size to create backpressure
        # for balanced generation in a multi-task setting.
        grouped_rollouts: asyncio_Queue[RolloutGroup] = asyncio_Queue(
            maxsize=self.buffer_size if request.streaming else 0
        )
        submitted_groups = 0

        # num_groups controls how many groups each worker generates and yields together.
        # When it's 1, the semaphore is a no-op.
        groups_per_worker = request.num_groups
        if groups_per_worker > 1:
            assert not request.filter_groups_with_same_reward, \
                "Cannot use filter_groups_with_same_reward with num_groups > 1."
        assert self.parallel_generation_tasks >= groups_per_worker, \
            f"{self.parallel_generation_tasks=} must be >= {groups_per_worker=}"
        num_workers = self.parallel_generation_tasks // groups_per_worker
        unused = self.parallel_generation_tasks % groups_per_worker
        if unused:
            logging.warning(
                f"parallel_generation_tasks ({self.parallel_generation_tasks}) is not "
                f"divisible by num_groups ({groups_per_worker}); "
                f"{unused} generation task(s) will be unused."
            )
        submission_gate = asyncio.Semaphore(num_workers)

        async def generate_and_enqueue(batch_id, index_in_batch):
            group = await self.group_rollout(request=request)
            if (
                not request.filter_groups_with_same_reward
                or np.std([r.reward for r in group]) > 1e-6
            ):
                await grouped_rollouts.put(
                    RolloutGroup(rollouts=group, batch_id=batch_id, index_in_batch=index_in_batch)
                )
                return True
            return False

        @trace_async_exceptions(verbose=True)
        async def generate_task():
            nonlocal submitted_groups
            while request.streaming or submitted_groups < self.parallel_generation_tasks:
                await submission_gate.acquire()
                batch_id = submitted_groups // groups_per_worker
                submitted_groups += groups_per_worker
                if groups_per_worker > 1:
                    await asyncio.gather(*[
                        generate_and_enqueue(batch_id, i)
                        for i in range(groups_per_worker)
                    ])
                else:
                    if not await generate_and_enqueue(batch_id, 0):
                        submitted_groups -= groups_per_worker
                        submission_gate.release()

        tasks = [asyncio.create_task(generate_task()) for _ in range(num_workers)]

        async def shutdown_queue_when_done():
            """Wait for all workers to finish, then shut down the queue."""
            await asyncio.gather(*tasks)
            grouped_rollouts.shutdown()

        shutdown_task = asyncio.create_task(shutdown_queue_when_done())

        try:
            next_batch_id = 0
            pending: dict[int, GroupedRollouts] = {}
            while True:
                try:
                    group = await grouped_rollouts.get()
                except asyncio_QueueShutDown:
                    break
                if request.enforce_order:
                    # Accumulate groups and enforce submission order across batches.
                    pending.setdefault(group.batch_id, []).append(group)
                    while (l := len(pending.get(next_batch_id, []))) >= groups_per_worker:
                        assert l == groups_per_worker
                        batch = pending.pop(next_batch_id)
                        batch.sort(key=lambda g: g.index_in_batch)
                        next_batch_id += 1
                        for g in batch:
                            yield g
                        submission_gate.release()
                else:
                    # Yield groups as soon as they're completed.
                    yield group
                    submission_gate.release()
        finally:
            shutdown_task.cancel()
            for task in tasks:
                task.cancel()


class EvaluationAgent(Agent, ABC):
    """An agent that can take an inference interface and return a benchmark score."""

    @abstractmethod
    async def run_evaluation(self, request: EvaluationRequest) -> EvaluationResponse: ...


================================================
FILE: megatron/rl/agent/huggingface_dataset_agent.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from datasets import load_dataset
from pydantic import BaseModel


class HFDatasetAgent(BaseModel):
    """
    Agent base class for loading and accessing HuggingFace datasets.

    Uses either a local dataset file (Arrow format) or downloads from the HuggingFace Hub,
    depending on which initialization argument is provided.

    Attributes:
        dataset_file (str | None): Path to a local dataset file directory. If provided, loads dataset from here.
        hf_dataset_name (str | None): Name of the HuggingFace dataset to load from the hub, if no file provided.
    """

    dataset_file: str | None = None
    hf_dataset_name: str | None = None

    def __init__(self, **data):
        super().__init__(**data)
        self.dataset = self.load_hf_dataset()

    def load_hf_dataset(self):
        """
        Loads the dataset from either a local file or the HuggingFace Hub.
        """
        if self.dataset_file:
            return load_dataset("arrow", data_dir=self.dataset_file, split=self.split)
        else:
            return load_dataset(self.hf_dataset_name, split=self.split)


================================================
FILE: megatron/rl/agent/pass_at_evaluation_agent.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import asyncio
from abc import ABC, abstractmethod
from typing import Any

import numpy as np

from ..__init__ import GenericGenerationArgs
from ..inference import LLMChatMessage
from .api import EvaluationAgent, EvaluationRequest, EvaluationResponse, RewardEvaluationResult


def pass_at_k(n_samples: int, n_correct: int, k: int) -> float:
    """Lower variance estimator of pass@k."""
    assert n_samples >= 0, "n_samples should be non-negative"
    assert n_correct >= 0, "n_correct should be non-negative"
    assert k <= n_samples, "k should be less than or equal to n_samples"

    if n_samples - n_correct < k:
        return 1.0

    return 1.0 - np.prod(1.0 - k / np.arange(n_samples - n_correct + 1, n_samples + 1))


class PassAtEvaluationResult(RewardEvaluationResult):
    pass_at: dict[int, float]
    response: list[str] | list[LLMChatMessage]
    reward: list[float]
    greedy_response: str | LLMChatMessage
    greedy_reward: float


class PassAtEvaluationResponse(EvaluationResponse[PassAtEvaluationResult]):
    type_name: str = 'PassAtEvaluationResponse'

    def metrics(self):
        metrics = {}
        if self.results:
            pass_at_k_keys = self.results[0].pass_at.keys()
            for k in pass_at_k_keys:
                metrics[f'pass_at_{k}'] = [el.pass_at[k] for el in self.results]
            metrics['greedy_reward'] = [el.greedy_reward for el in self.results]
        return metrics


class PassAtEvaluationAgent(EvaluationAgent, ABC):

    def __init__(self, max_k=32, **kwargs):
        super().__init__(**kwargs)
        self.max_k = max_k

    @abstractmethod
    async def _evaluation(
        self, prompt: Any, golden: dict | None, request: EvaluationRequest
    ) -> EvaluationResponse[RewardEvaluationResult]: ...

    async def evaluation(
        self, prompt: Any, golden: dict | None, request: EvaluationRequest
    ) -> PassAtEvaluationResponse:

        evaluations = [self._evaluation(prompt, golden, request) for _ in range(self.max_k)]
        responses = await asyncio.gather(*evaluations)

        rewards = [
            result.reward for result in sum([response.results for response in responses], [])
        ]
        response_texts = [
            result.response for result in sum([response.results for response in responses], [])
        ]

        # Count number of passing solutions (reward == 1.0)
        pass_count = sum(1 for reward in rewards if reward == 1.0)
        total_count = len(rewards)

        # Calculate pass@N for different N values
        pass_at = {
            k: pass_at_k(total_count, pass_count, k)
            for k in [1, self.max_k]  # You can adjust these values as needed
        }

        greedy_generation_args = request.generation_args.add(
            GenericGenerationArgs(top_k=1, temperature=0.0, top_p=0.0)
        )
        greedy_request = request.model_copy(update={'generation_args': greedy_generation_args})
        greedy_responses = await self._evaluation(prompt, golden, greedy_request)
        assert (
            len(greedy_responses.results) == 1
        ), "Evaluation only requested a single response but got multiple responses"
        greedy_response = greedy_responses.results[0]
        result = PassAtEvaluationResult(
            prompt=greedy_response.prompt,
            problem_id=golden['problem_id'] if golden and 'problem_id' in golden else None,
            pass_at=pass_at,
            response=response_texts,
            reward=rewards,
            greedy_response=greedy_response.response,
            greedy_reward=greedy_response.reward,
        )
        return PassAtEvaluationResponse(results=[result], env_id=self.env_id)


================================================
FILE: megatron/rl/agent/remote_agent.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from ..server.agent.fastapi_env_server import FastAPIEnvServer
from .api import EvaluationAgent, GroupedRolloutGenerator, RolloutGenerator


class RemoteAgent(FastAPIEnvServer, RolloutGenerator, GroupedRolloutGenerator, EvaluationAgent):
    env_id: str = "remote"
    env_server_host_port: str


================================================
FILE: megatron/rl/agent/reward_only_agent.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import asyncio
from typing import Any

import numpy as np
from tqdm.asyncio import tqdm

from ..inference import (
    InferenceResponse,
    LLMChatMessage,
    ReturnsRaw,
    ReturnsTokens,
)
from .api import (
    EvaluationAgent,
    EvaluationRequest,
    EvaluationResponse,
    GroupedRolloutGenerator,
    GroupedRolloutRequest,
    RewardEvaluationResult,
    Rollout,
    RolloutGenerator,
    RolloutRequest,
    TokenRollout,
)
from .pass_at_evaluation_agent import PassAtEvaluationAgent


class RewardOnlyEvaluationResponse(EvaluationResponse[RewardEvaluationResult]):
    type_name: str = 'RewardOnlyEvaluationResponse'

    def metrics(self):
        return {'reward': [el.reward for el in self.results]}


class RewardOnlyAgent(RolloutGenerator, GroupedRolloutGenerator, PassAtEvaluationAgent):
    """Agent that returns rollouts generated via default inference with a fixed reward function."""

    env_id: str | None = None

    def get_dataset(self, validation: bool = False):
        """Return validation or train dataset."""
        raise NotImplementedError("Derived class must implement get_dataset.")

    async def get_reward(self, response: str, golden: Any) -> float:
        """Given the LLM response and the golden data, provide a reward."""
        raise NotImplementedError("Derived class must implement get_reward")

    async def get_prompt(self, validation: bool) -> tuple[str, Any]:
        """Return a tuple with the prompt string and the golden data."""
        raise NotImplementedError("Derived class must implement get_prompt")

    async def evaluation_prompts(
        self, num_prompts: int, validation: bool = False
    ) -> list[tuple[str, Any]]:
        """Get evaluation prompts for the agent. This method should be overridden by subclasses."""
        raise NotImplementedError

    def _get_rank_subset(
        self, prompts: list[tuple[str, Any]], num_prompts: int, rank: int, world_size: int
    ) -> list[tuple[str, Any]]:
        """Helper method to get the subset of prompts for a given rank.

        Args:
            prompts: List of all prompts
            num_prompts: Total number of prompts to use
            rank: Current process rank
            world_size: Total number of processes

        Returns:
            Subset of prompts for the current rank
        """
        # Take first num_prompts from all prompts
        prompts = prompts[:num_prompts]

        # Split prompts into chunks for each rank
        chunk_size = (len(prompts) + world_size - 1) // world_size
        start_idx = rank * chunk_size
        end_idx = min(start_idx + chunk_size, len(prompts))

        return prompts[start_idx:end_idx]

    async def rollout_from_response(
        self, request: RolloutRequest, response: InferenceResponse, golden: Any
    ) -> Rollout:
        assert isinstance(
            request.inference_interface, ReturnsRaw
        ), "InferenceInterface must support raw_text return to provide rollouts."
        raw_text = response.raw_text

        response_text = response.response.content

        if isinstance(request.inference_interface, ReturnsTokens):
            logprobs = response.logprobs
            generation_mask = [
                True if (x >= response.prompt_length) else False
                for x in range(len(response.token_ids))
            ]
            rollout = TokenRollout(
                trajectory=[response.token_ids],
                reward=await self.get_reward(response_text, golden),
                logprobs=[logprobs],
                generation_mask=[generation_mask],
                env_id=self.env_id,
                problem_id=golden['problem_id'] if 'problem_id' in golden else None,
                policy_epoch=[response.policy_epoch],
                kv_cache_epoch=[response.kv_cache_epoch],
                num_evictions=[response.num_evictions],
            )
        else:
            rollout = Rollout(
                trajectory=[raw_text],
                reward=await self.get_reward(response_text, golden),
                env_id=self.env_id,
                problem_id=golden['problem_id'] if 'problem_id' in golden else None,
                policy_epoch=[response.policy_epoch],
                kv_cache_epoch=[response.kv_cache_epoch],
                num_evictions=[response.num_evictions],
            )

        return rollout

    async def rollout(self, request: RolloutRequest) -> Rollout:

        prompt, golden = await self.get_prompt(validation=request.validation)

        inference_request = request.inference_interface.prepare_request(
            prompt, request.generation_args
        )

        response = await request.inference_interface.agenerate(inference_request)

        return await self.rollout_from_response(request, response, golden)

    async def group_rollout(self, request: GroupedRolloutRequest) -> list[Rollout]:

        prompt, golden = await self.get_prompt(validation=request.validation)

        inference_request = request.inference_interface.prepare_request(
            prompt, request.generation_args
        )

        responses = await asyncio.gather(*[request.inference_interface.agenerate(inference_request) for _ in range(request.rollouts_per_group)])
        return [await self.rollout_from_response(request, response, golden) for response in responses]

    async def _evaluation(
        self, prompt: str, golden: Any, request: EvaluationRequest
    ) -> RewardOnlyEvaluationResponse:

        inference_request = request.inference_interface.prepare_request(
            prompt, request.generation_args
        )

        response = await request.inference_interface.agenerate(inference_request)
        response_text = response.response.content

        result = RewardEvaluationResult(
            env_id=self.env_id,
            prompt=[prompt] if isinstance(prompt, LLMChatMessage) else prompt,
            response=response.response,
            reward=await self.get_reward(response_text, golden),
            problem_id=golden['problem_id'] if 'problem_id' in golden else None,
        )

        return RewardOnlyEvaluationResponse(results=[result], env_id=self.env_id)

    async def run_evaluation(self, request: EvaluationRequest):

        # Get all prompts first
        all_prompts = list(
            await self.evaluation_prompts(
                num_prompts=request.num_prompts, validation=request.validation
            )
        )

        # Then get this rank's subset if needed
        if request.rank_info is not None:
            prompts_to_evaluate = self._get_rank_subset(
                all_prompts, request.num_prompts, request.rank_info[0], request.rank_info[1]
            )
        else:
            prompts_to_evaluate = all_prompts

        results = await tqdm.gather(
            *[self.evaluation(p, g, request) for p, g in prompts_to_evaluate],
            desc="Evaluating prompts..",
        )
        return type(results[0])(
            results=sum([result.results for result in results], []), env_id=self.env_id
        )


================================================
FILE: megatron/rl/agent/weighted_multi_task.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import asyncio
from typing import Any, Optional, Type

import numpy as np

from .. import import_class
from .api import (
    AgentBaseModel,
    ContrastiveRollout,
    ContrastiveRolloutGenerator,
    EvaluationAgent,
    EvaluationRequest,
    EvaluationResponse,
    GroupedRolloutGenerator,
    GroupedRolloutRequest,
    Rollout,
    RolloutGenerator,
    RolloutRequest,
)


class AgentConfig(AgentBaseModel):
    """Configuration for a single agent in the weighted multi-agent setup."""

    agent_type: Type[RolloutGenerator]
    agent_args: dict
    weight: float
    evaluation_only: bool = False

    def __init__(self, **data):
        super().__init__(**data)
        if self.weight < 0:
            raise ValueError("Agent weight must be non-negative")


class WeightedMultiTask(
    RolloutGenerator, GroupedRolloutGenerator, ContrastiveRolloutGenerator, EvaluationAgent
):
    """An agent that manages multiple sub-agents and distributes rollouts according to weights."""

    def __init__(self, agent_configs: list[AgentConfig]):
        super().__init__()
        if not agent_configs:
            raise ValueError("Must provide at least one agent configuration")

        # Initialize all sub-agents
        self.agents = []
        self.weights = []
        self.agent_configs = agent_configs  # Store the configs for later use

        # Calculate total weight only among non-evaluation agents
        total_weight = sum(config.weight for config in agent_configs if not config.evaluation_only)
        if total_weight <= 0:
            raise ValueError("Total weight of non-evaluation agents must be positive")

        for config in agent_configs:
            # Initialize the agent with its arguments
            agent = config.agent_type(**config.agent_args)
            self.agents.append(agent)
            # Only normalize weights for non-evaluation agents
            if config.evaluation_only:
                self.weights.append(0.0)
            else:
                self.weights.append(config.weight / total_weight)

    @classmethod
    def from_config(
        cls, config: list[dict[str, Any]], *, parallel_generation_tasks: int | None = None
    ) -> 'WeightedMultiTask':
        """Create a WeightedMultiTask from a config list.

        Args:
            config: List of dicts with keys:
                - agent_type: String path to agent class
                - agent_args: Dict of arguments to pass to agent constructor
                - weight: Float weight for this agent

        Returns:
            A WeightedMultiTask instance
        """
        agent_configs = []
        for entry in config:
            if not all(k in entry for k in ['agent_type', 'agent_args', 'weight']):
                raise ValueError(f"Missing required keys in config entry: {entry}")
            agent_args = entry.get('agent_args', {})
            agent_args['parallel_generation_tasks'] = parallel_generation_tasks

            # Import and instantiate the agent class
            agent_type = import_class(entry['agent_type'])
            agent_configs.append(
                AgentConfig(
                    agent_type=agent_type,
                    agent_args=agent_args,
                    weight=float(entry['weight']),
                    evaluation_only=entry.get('evaluation_only', False),
                )
            )

        instance = cls(agent_configs)
        if parallel_generation_tasks is not None:
            instance.parallel_generation_tasks = parallel_generation_tasks
        return instance

    def _distribute_counts(self, total_count: int, distribute_remainder: bool = True) -> list[int]:
        """Helper method to distribute counts according to weights.

        This implementation ensures the most balanced distribution possible while
        maintaining the relative proportions specified by weights.

        Args:
            total_count: Total number of items to distribute
            distribute_remainder: Whether to distribute the remainder of the counts to the agents with the largest fractional parts

        Returns:
            List of counts for each agent, summing to total_count
        """
        # Filter out evaluation-only agents for rollout distribution
        rollout_weights = [
            w for w, config in zip(self.weights, self.agent_configs) if not config.evaluation_only
        ]
        if not rollout_weights:
            raise ValueError("No non-evaluation agents available for rollout generation")

        # Calculate exact fractional counts
        exact_counts = [total_count * w for w in rollout_weights]

        # Get integer part of each count
        base_counts = [int(count) for count in exact_counts]
        remaining = total_count - sum(base_counts)

        if distribute_remainder:
            # Sort indices by fractional parts to distribute remaining counts
            # to those with largest fractional parts first
            fractional_parts = [count - int(count) for count in exact_counts]
            indices = list(range(len(rollout_weights)))
            indices.sort(key=lambda i: fractional_parts[i], reverse=True)

            # Distribute remaining counts
            for i in range(remaining):
                base_counts[indices[i]] += 1

        # Map back to original indices, skipping evaluation-only agents
        final_counts = []
        rollout_idx = 0
        for config in self.agent_configs:
            if config.evaluation_only:
                final_counts.append(0)
            else:
                final_counts.append(base_counts[rollout_idx])
                rollout_idx += 1

        return final_counts

    async def group_rollout(self, request: GroupedRolloutRequest) -> list[Rollout]:
        raise NotImplementedError(
            "WeightedMultiTask is a collection of tasks and therefore doesn't implement this method directly. Use get_grouped_rollouts instead to generate grouped rollouts."
        )

    async def rollout(self, request: RolloutRequest) -> Rollout:
        raise NotImplementedError(
            "WeightedMultiTask is a collection of tasks and therefore doesn't implement this method directly. Use get_reward_rollouts instead to generate rollouts."
        )

    async def get_reward_rollouts(self, request: RolloutRequest) -> list[Rollout]:
        """Distribute rollouts across sub-agents according to weights."""
        agent_rollouts = self._distribute_counts(request.num_rollouts)

        # Create tasks for each agent with non-zero rollouts
        tasks = []
        for agent, num_rollouts in zip(self.agents, agent_rollouts):
            if num_rollouts > 0:
                agent_request = RolloutRequest(
                    num_rollouts=num_rollouts,
                    inference_interface=request.inference_interface,
                    validation=request.validation,
                    generation_args=request.generation_args,
                )
                tasks.append(agent.get_reward_rollouts(agent_request))

        # Run all tasks concurrently and gather results
        all_rollouts_lists = await asyncio.gather(*tasks)
        return [rollout for rollouts in all_rollouts_lists for rollout in rollouts]

    async def get_grouped_rollouts(self, request: GroupedRolloutRequest):
        """Distribute grouped rollouts across sub-agents according to weights."""
        agent_groups = self._distribute_counts(request.num_groups)
        agent_pgts = self._distribute_counts(self.parallel_generation_tasks)
        agent_slots = self._distribute_counts(request.num_groups, distribute_remainder=False)
        agent_slots = np.array(agent_slots) / np.gcd.reduce(agent_slots)

        # Create tasks for each agent with non-zero groups
        generators = []
        for agent, num_groups, pgt in zip(self.agents, agent_groups, agent_pgts, strict=True):
            if num_groups > 0:
                if not isinstance(agent, GroupedRolloutGenerator):
                    raise TypeError(
                        f"Agent of type {type(agent)} does not support grouped rollouts"
                    )
                agent.parallel_generation_tasks = pgt
                agent_request = GroupedRolloutRequest(
                    num_groups=num_groups,
                    streaming=request.streaming,
                    enforce_order=request.enforce_order,
                    rollouts_per_group=request.rollouts_per_group,
                    inference_interface=request.inference_interface,
                    validation=request.validation,
                    generation_args=request.generation_args,
                    filter_groups_with_same_reward=request.filter_groups_with_same_reward,
                )
                generators.append(agent.get_grouped_rollouts(agent_request))
            else:
                generators.append(None)

        while any(generators):
            balanced_rollouts = asyncio.Queue()

            async def get_balanced_rollouts_if_remaining(agent_id):
                generated_rollouts = 0
                while generated_rollouts < agent_slots[agent_id]:
                    if generators[agent_id] is None:
                        return
                    try:
                        await balanced_rollouts.put(await anext(generators[agent_id]))
                        generated_rollouts += 1
                    except StopAsyncIteration:
                        await balanced_rollouts.put(None)
                        generators[agent_id] = None
                        return

            tasks = [
                asyncio.create_task(get_balanced_rollouts_if_remaining(agent_id))
                for agent_id in range(len(generators))
            ]

            try:
                while balanced_rollouts.qsize() > 0 or not all(task.done() for task in tasks):
                    rollout = await balanced_rollouts.get()
                    if rollout is not None:
                        yield rollout
            finally:
                for task in tasks:
                    task.cancel()

    async def get_contrastive_rollouts(self, request: RolloutRequest) -> list[ContrastiveRollout]:
        """Distribute contrastive rollouts across sub-agents according to weights."""
        agent_rollouts = self._distribute_counts(request.num_rollouts)

        # Create tasks for each agent with non-zero rollouts
        tasks = []
        for agent, num_rollouts in zip(self.agents, agent_rollouts):
            if num_rollouts > 0:
                if not isinstance(agent, ContrastiveRolloutGenerator):
                    raise TypeError(
                        f"Agent of type {type(agent)} does not support contrastive rollouts"
                    )

                agent_request = RolloutRequest(
                    num_rollouts=num_rollouts,
                    inference_interface=request.inference_interface,
                    validation=request.validation,
                    generation_args=request.generation_args,
                )
                tasks.append(agent.get_contrastive_rollouts(agent_request))

        # Run all tasks concurrently and gather results
        all_rollouts_lists = await asyncio.gather(*tasks)
        return [rollout for rollouts in all_rollouts_lists for rollout in rollouts]

    async def run_evaluation(self, request: EvaluationRequest) -> list[EvaluationResponse]:
        """Run evaluation across all sub-agents."""
        # Create tasks for each agent
        tasks = []
        for agent in self.agents:
            if not isinstance(agent, EvaluationAgent):
                raise TypeError(f"Agent of type {type(agent)} does not support evaluation")

            agent_request = EvaluationRequest(
                num_prompts=request.num_prompts,  # For evaluation, we don't distribute prompts
                rank_info=request.rank_info,  # Pass through original rank info
                inference_interface=request.inference_interface,
                validation=request.validation,
                generation_args=request.generation_args,
            )
            tasks.append(agent.run_evaluation(agent_request))

        # Run all tasks concurrently and gather results
        all_responses = await asyncio.gather(*tasks)

        return all_responses


================================================
FILE: megatron/rl/inference/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
# ruff: noqa: F403

from .api import *
from .inference_interface import *


================================================
FILE: megatron/rl/inference/api.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from pydantic import BaseModel

from ..__init__ import Request


class LLMChatMessage(BaseModel):
    role: str
    content: str


class InferenceRequest(Request):
    prompt: list[LLMChatMessage]
    tools: list[dict] | None = None


class InferenceResponse(BaseModel):
    """The minimum required response for an inference interface."""

    response: LLMChatMessage
    raw_text: str | None = None
    token_ids: list[int] | None = None
    prompt_length: int | None = None
    logprobs: list[float] | None = None
    policy_epoch: list[tuple[int, int]]
    kv_cache_epoch: list[tuple[int, int]]
    num_evictions: int


================================================
FILE: megatron/rl/inference/inference_interface.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import asyncio

from pydantic import BaseModel

from ..__init__ import GenericGenerationArgs
from ..inference.api import (
    InferenceRequest,
    InferenceResponse,
    LLMChatMessage,
)


class InferenceInterface(BaseModel):
    """Inference interface for chat models."""

    class Config:
        arbitrary_types_allowed = True

    def prepare_request(
        self, prompt: str | list[LLMChatMessage], generation_args: GenericGenerationArgs
    ) -> InferenceRequest:
        prompt = [LLMChatMessage(role='user', content=prompt)] if isinstance(prompt, str) else prompt
        return InferenceRequest(prompt=prompt, generation_args=generation_args)

    async def base_generate(self, request: InferenceRequest) -> InferenceResponse:
        assert NotImplementedError("Direct Inference Classes must implement the base_generate method.")

    async def agenerate(
        self, request: InferenceRequest
    ) -> InferenceResponse:
        return await self.base_generate(request)

    def generate(
        self, request: InferenceRequest
    ) -> InferenceResponse:
        try:
            loop = asyncio.get_running_loop()
        except RuntimeError:
            return asyncio.run(self.agenerate(request))
        else:
            return loop.run_until_complete(self.agenerate(request))

class ReturnsRaw(InferenceInterface):
    """Mix-In for interface that supports returning complete string fed to the LLM."""

    # TODO: Should this be a mix-in or a class variable?


class ReturnsTokens(InferenceInterface):
    """Mix-In for interface that supports returning the complete list of tokens fed to the LLM."""

    # TODO: Should this be a mix-in or a class variable?


class ReturnsLogProbs(ReturnsTokens):
    """Mix-In for interface that supports returning the logprobs for a set of tokens."""

    # TODO: Should this be a mix-in or a class variable?


================================================
FILE: megatron/rl/inference/megatron.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import asyncio
import logging

import httpx
import torch.distributed as dist
from openai import AsyncOpenAI, DefaultAioHttpClient
from pydantic import PrivateAttr

try:
    import h2  # noqa: F401
    use_http2 = True
except ImportError:
    use_http2 = False

from megatron.core.inference.config import KVCacheManagementMode
from megatron.core.inference.engines.dynamic_engine import DynamicInferenceEngine, EngineState
from megatron.core.inference.inference_client import InferenceClient
from megatron.core.models.gpt.gpt_model import GPTModel
from megatron.core.utils import log_single_rank
from megatron.training.global_vars import get_args, get_tokenizer

from ..inference.inference_interface import (
    InferenceRequest,
    InferenceResponse,
    LLMChatMessage,
    ReturnsRaw,
    ReturnsTokens,
)
from ..server.api import InferenceServer

logger = logging.getLogger(__name__)
logging.getLogger("httpx").setLevel(logging.WARNING)

class MegatronLocal(InferenceServer, ReturnsTokens, ReturnsRaw):
    """Interface to use MCoreEngine directly as an inference engine."""

    host: str
    port: int

    _client: InferenceClient = PrivateAttr(None)
    _inference_engine: DynamicInferenceEngine = PrivateAttr(None)
    _rl_kv_cache_management_mode: KVCacheManagementMode = PrivateAttr(None)
    _openai_client: AsyncOpenAI = PrivateAttr(None)

    async def base_generate(self, request: InferenceRequest) -> InferenceResponse:
        tokenizer = get_tokenizer()
        args = get_args()

        # Use the shared, optimized client instead of spinning up a new one
        client = self._openai_client

        # Things that may be problematic when doing this switch
        # - Add BOS token
        # - Skip prompt logprobs
        response = await client.chat.completions.create(
            model="",
            messages=[message.model_dump() for message in request.prompt],
            temperature=request.generation_args.temperature or 1.0,
            top_p=request.generation_args.top_p or 0.0,
            n=1,
            logprobs=True,
            extra_body={
                "skip_prompt_log_probs": True,
                "add_BOS": (not args.rl_skip_bos_token and tokenizer.bos is not None),
            },
        )

        choice = response.choices[0]

        return InferenceResponse(
            # TODO: Handle tool calls and reasoning in LLMChatMessage
            response=LLMChatMessage(**choice.message.model_dump(include={'role', 'content'})),
            raw_text=choice.raw_text,
            token_ids=choice.prompt_token_ids + choice.generation_token_ids,
            logprobs=choice.generation_log_probs,
            prompt_length=len(choice.prompt_token_ids),
            policy_epoch=choice.policy_epoch,
            kv_cache_epoch=choice.kv_cache_epoch,
            num_evictions=getattr(choice, 'num_evictions', 0),
        )

    @classmethod
    async def launch(cls, model: GPTModel, **kwargs):
        # Import here to avoid circular imports
        from megatron.inference.utils import get_dynamic_inference_engine

        args = get_args()
        tokenizer = get_tokenizer()

        if tokenizer.bos is None:
            log_single_rank(
                logger,
                logging.WARNING,
                "WARNING: Tokenizer has no BOS token so prompt will not have BOS token",
            )

        inference_engine: DynamicInferenceEngine = get_dynamic_inference_engine(model=model)
        dp_addr = await inference_engine.start_listening_to_data_parallel_coordinator(
            inference_coordinator_port=41521, launch_inference_coordinator=True,
        )

        if dist.get_rank() == 0:
            from megatron.core.inference.text_generation_server.dynamic_text_gen_server import start_text_gen_server

            client = InferenceClient(inference_coordinator_address=dp_addr)
            client.start()

            start_text_gen_server(
                coordinator_addr=dp_addr,
                tokenizer=inference_engine.controller.tokenizer,
                rank=dist.get_rank(),
                server_port=kwargs.get('port', 8294),
                parsers=[],
                verbose=kwargs.get('verbose', False),
            )
        else:
            client = None

        launched_server = cls(**kwargs)
        launched_server._client = client
        launched_server._inference_engine = inference_engine
        launched_server._rl_kv_cache_management_mode = KVCacheManagementMode(
            args.rl_kv_cache_management_mode
        )

        concurrency_limit = args.grpo_prompts_per_step * args.grpo_group_size * args.rl_parallel_generation_tasks
        custom_limits = httpx.Limits(
            max_connections=concurrency_limit,
            max_keepalive_connections=concurrency_limit,
        )
        http_client = DefaultAioHttpClient(
            timeout=None,
            limits=custom_limits,
            http2=use_http2
        )

        launched_server._openai_client = AsyncOpenAI(
            base_url=f"http://{launched_server.host}:{launched_server.port}",
            api_key="NONE",
            http_client=http_client
        )

        return launched_server

    async def kill(self):
        # Gracefully close the shared OpenAI client connections
        if self._openai_client is not None:
            await self._openai_client.close()

        if dist.get_rank() == 0:
            self._client.pause_engines()
        await self._inference_engine.wait_until(EngineState.PAUSED)

        if dist.get_rank() == 0:
            self._client.stop_engines()
        await self._inference_engine.wait_until(EngineState.STOPPED)

        if dist.get_rank() == 0:
            self._client.shutdown_coordinator()
            self._client.stop()

        if dist.get_rank() == 0:
            from megatron.core.inference.text_generation_server.dynamic_text_gen_server import stop_text_gen_server
            stop_text_gen_server()

    def set_generation_epoch(self, generation_epoch: int):
        if dist.get_rank() == 0:
            self._client.set_generation_epoch(generation_epoch)

    async def suspend(self):
        if dist.get_rank() == 0:
            self._client.pause_engines()
        await self._inference_engine.wait_until(EngineState.PAUSED)

        if dist.get_rank() == 0:
            self._client.suspend_engines()
        await self._inference_engine.wait_until(EngineState.SUSPENDED)

    async def resume(self):
        if self._inference_engine._state_events[EngineState.RUNNING].is_set():
            return

        if dist.get_rank() == 0:
            self._client.resume_engines()
        await self._inference_engine.wait_until(EngineState.RESUMED)

        if dist.get_rank() == 0:
            self._client.unpause_engines()
        await self._inference_engine.wait_until(EngineState.RUNNING)


================================================
FILE: megatron/rl/logging.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import os
from datetime import datetime

LOG_DIR = os.environ.get("LANGRL_LOG_DIR", None)
LOG_PREFIX = os.environ.get("LANGRL_LOG_PREFIX", "LANG_RL")

print(f"{LOG_PREFIX} Log directory: {LOG_DIR}")

log_handle = None
if LOG_DIR:
    log_handle = open(LOG_DIR + '/lang_rl.log', "w")

prefix = f"{LOG_PREFIX}: "


def log(message):
    if log_handle:
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        log_handle.write(f"[{timestamp}] {prefix}{message}\n")
        log_handle.flush()


================================================
FILE: megatron/rl/parallel_utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""
Utilities for building process groups for RL inference models with custom parallelism.
"""

from typing import Optional

import torch.distributed as dist

from megatron.core import mpu
from megatron.core.hyper_comm_grid import HyperCommGrid
from megatron.core.process_groups_config import ProcessGroupCollection


def build_inference_pg_collection(
    world_size: int,
    tp_size: Optional[int] = None,
    pp_size: Optional[int] = None,
    cp_size: Optional[int] = None,
    ep_size: Optional[int] = None,
    expt_tp_size: Optional[int] = None,
    use_tp_pp_dp_mapping: bool = False,
    rank_offset: int = 0,
) -> ProcessGroupCollection:
    """
    Build a ProcessGroupCollection for an RL inference model with custom parallelism.

    Uses two HyperCommGrids matching the structure of mpu:
    - decoder_grid: for dense/attention layers (tp, cp, dp, pp)
    - expert_grid: for MoE expert layers (expt_tp, ep, expt_dp, pp)

    Args:
        world_size: Total world size (number of ranks).
        tp_size: Tensor model parallel size. Defaults to training's TP size.
        pp_size: Pipeline parallel size. Defaults to training's PP size.
        cp_size: Context parallel size. Defaults to training's CP size.
        ep_size: Expert parallel size. Defaults to training's EP size.
        expt_tp_size: Expert tensor parallel size. Defaults to training's expert TP size.
        use_tp_pp_dp_mapping: If True, use 'tp-pp-dp' order; otherwise 'tp-dp-pp'.
        rank_offset: Starting rank when the grid doesn't span the entire communication world.
                     Used in non-collocated mode where model ranks don't start from 0.

    Returns:
        ProcessGroupCollection configured for the inference model.
    """
    # Use current MPU values as defaults
    if tp_size is None:
        tp_size = mpu.get_tensor_model_parallel_world_size()
    if cp_size is None:
        cp_size = mpu.get_context_parallel_world_size()
    if pp_size is None:
        pp_size = mpu.get_pipeline_model_parallel_world_size()
    if ep_size is None:
        ep_size = mpu.get_expert_model_parallel_world_size()
    if expt_tp_size is None:
        expt_tp_size = mpu.get_expert_tensor_parallel_world_size()


    # Compute DP size for dense layers (same formula as mpu)
    # world = tp × cp × dp × pp
    dp_size = world_size // (tp_size * cp_size * pp_size)
    assert dp_size >= 1 and (tp_size * cp_size * dp_size * pp_size) == world_size, (
        f"World size ({world_size}) must be divisible by tp*cp*pp ({tp_size * cp_size * pp_size})"
    )

    # Compute expert DP size (same formula as mpu)
    # world = expt_tp × ep × expt_dp × pp
    expt_dp_size = world_size // (expt_tp_size * ep_size * pp_size)
    assert expt_dp_size >= 1 and (expt_tp_size * ep_size * expt_dp_size * pp_size) == world_size, (
        f"World size ({world_size}) must be divisible by expt_tp*ep*pp ({expt_tp_size * ep_size * pp_size})"
    )

    rank = dist.get_rank()

    # ====================
    # Create decoder grid for dense/attention layers
    # Matches mpu's decoder_rank_generator with ep=1
    # ====================
    if use_tp_pp_dp_mapping:
        # Order: tp-cp-pp-dp
        decoder_grid = HyperCommGrid(
            [tp_size, cp_size, pp_size, dp_size],
            ["tp", "cp", "pp", "dp"],
            rank_offset=rank_offset
        )
    else:
        # Order: tp-cp-dp-pp (default)
        decoder_grid = HyperCommGrid(
            [tp_size, cp_size, dp_size, pp_size],
            ["tp", "cp", "dp", "pp"],
            rank_offset=rank_offset
        )

    # Create dense layer groups from decoder_grid
    tp_group = decoder_grid.create_pg("tp")
    cp_group = decoder_grid.create_pg("cp")
    pp_group = decoder_grid.create_pg("pp")
    dp_group = decoder_grid.create_pg("dp")
    mp_group = decoder_grid.create_pg(["tp", "pp"])
    tp_cp_group = decoder_grid.create_pg(["tp", "cp"])
    dp_cp_group = decoder_grid.create_pg(["cp", "dp"])
    tp_dp_cp_group = decoder_grid.create_pg(["tp", "cp", "dp"])

    # ====================
    # Create expert grid for MoE expert layers
    # Matches mpu's expert_decoder_rank_generator with cp=1
    # ====================
    if use_tp_pp_dp_mapping:
        # Order: tp-ep-pp-dp
        expert_grid = HyperCommGrid(
            [expt_tp_size, ep_size, pp_size, expt_dp_size],
            ["tp", "ep", "pp", "dp"],
            rank_offset=rank_offset
        )
    else:
        # Order: tp-ep-dp-pp (default)
        expert_grid = HyperCommGrid(
            [expt_tp_size, ep_size, expt_dp_size, pp_size],
            ["tp", "ep", "dp", "pp"],
            rank_offset=rank_offset
        )

    # Verify PP groups match between decoder and expert grids (required by mpu)
    decoder_pp_enum = decoder_grid.get_rank_enum("pp")
    expert_pp_enum = expert_grid.get_rank_enum("pp")
    assert decoder_pp_enum == expert_pp_enum, (
        f"PP groups must match between decoder and expert grids. "
        f"Decoder: {decoder_pp_enum}, Expert: {expert_pp_enum}"
    )

    # Create expert layer groups from expert_grid
    ep_group = expert_grid.create_pg("ep")
    expt_tp_group = expert_grid.create_pg("tp")
    expt_dp_group = expert_grid.create_pg("dp")
    tp_ep_group = expert_grid.create_pg(["tp", "ep"])
    tp_ep_pp_group = expert_grid.create_pg(["tp", "ep", "pp"])

    # ====================
    # Embedding groups (derived from PP groups)
    # ====================
    embd_group = None
    pos_embd_group = None

    pp_rank_enum = decoder_grid.get_rank_enum("pp")
    for pp_ranks in pp_rank_enum:
        # Embedding is on first and last PP stage
        if len(pp_ranks) == 1:
            embd_ranks = [pp_ranks[0]]
        else:
            embd_ranks = [pp_ranks[0], pp_ranks[-1]]
        group = dist.new_group(ranks=embd_ranks)
        if rank in embd_ranks:
            embd_group = group

        # Position embedding is only on first PP stage
        pos_embd_ranks = [pp_ranks[0]]
        group = dist.new_group(ranks=pos_embd_ranks)
        if rank in pos_embd_ranks:
            pos_embd_group = group

    return ProcessGroupCollection(
        tp=tp_group,
        cp=cp_group,
        pp=pp_group,
        ep=ep_group,
        embd=embd_group,
        pos_embd=pos_embd_group,
        dp=dp_group,
        tp_cp=tp_cp_group,
        mp=mp_group,
        expt_tp=expt_tp_group,
        expt_dp=expt_dp_group,
        tp_ep=tp_ep_group,
        tp_ep_pp=tp_ep_pp_group,
        dp_cp=dp_cp_group,
        tp_dp_cp=tp_dp_cp_group,
    )


================================================
FILE: megatron/rl/rl_utils.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import gc

import copy
from functools import partial
# Keep this to make the env registered.
import itertools
import math
import logging
import json
import os
from collections import Counter, defaultdict
from contextlib import contextmanager, nullcontext
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Iterator, List, Optional 

import numpy as np
import torch
import torch.distributed as dist
import yaml
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter

from megatron.core import mpu
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.full_cuda_graph import FullCudaGraphWrapper
from megatron.core.models.common.language_module.language_module import LanguageModule
from megatron.core.num_microbatches_calculator import reconfigure_num_microbatches_calculator
from megatron.core.optimizer import MegatronOptimizer
from megatron.core.pipeline_parallel import get_forward_backward_func
from megatron.core.pipeline_parallel.utils import is_pp_last_stage, get_pp_last_rank
from megatron.core.rerun_state_machine import RerunDataIterator
from megatron.core.tokenizers import MegatronTokenizer
from megatron.core.tokenizers.text.libraries.huggingface_tokenizer import HuggingFaceTokenizer
from megatron.core.transformer.cuda_graphs import _CudagraphGlobalRecord
from megatron.core.transformer.enums import CudaGraphScope
from megatron.core.transformer.utils import (
    toggle_cuda_graphs,
    transition_moe_cudagraphs,
)
from megatron.core.inference.utils import set_decode_expert_padding
from megatron.core.resharding.refit import swap_model_weights
from megatron.core.inference.unified_memory import (
    advise_managed_module_parameters_preferred_location,
    prefetch_managed_module_parameters,
)
from megatron.core.inference.utils import device_memory_summary
from megatron.core.utils import get_asyncio_loop, log_single_rank
from megatron.rl.sequence_packing_utils import (
    get_microbatch_dataloader,
    pack_inference_logprobs,
    compute_packed_inference_logprobs_stats,
    pack_all_trajectories,
    load_packed_data_by_index,
    get_sequence_packing_tensorboard_metrics,
    get_sequence_packing_log_info,
    get_default_packed_seq_params,
    update_microbatch_calculator,
)
from megatron.rl.agent.api import (
    EvaluationRequest,
    EvaluationResponse,
    GroupedRolloutRequest,
    GroupedRollouts,
    RewardEvaluationResult,
    Rollout,
    RolloutGroup,
    Rollouts,
    TokenRollout,
)
from megatron.rl.agent.weighted_multi_task import WeightedMultiTask
from megatron.rl.inference.megatron import MegatronLocal
from megatron.rl.logging import LOG_DIR as lang_rl_log_dir
from megatron.rl.logging import log as lang_rl_log
from megatron.rl.server.inference.inference_interface_server import InferenceInterfaceServer
from megatron.training.global_vars import (
    get_args,
    get_tensorboard_writer,
    get_tokenizer,
    get_wandb_writer,
)
from megatron.training.utils import (
    get_ltor_masks_and_position_ids,
    get_nvtx_range,
    print_rank_0,
    unwrap_model,
)
from megatron.core.utils import get_pg_rank, get_pg_size, get_attr_wrapped_model
from megatron.core.process_groups_config import ProcessGroupCollection
from wandb import wandb_run
from megatron.core.transformer.custom_layers.batch_invariant_kernels import (
    is_batch_invariant_mode_enabled,
)

from megatron.core.inference.contexts.dynamic_context import HAVE_TORCH_MEMORY_SAVER
if HAVE_TORCH_MEMORY_SAVER:
    from torch_memory_saver import torch_memory_saver

logger = logging.getLogger(__name__)

# Global variable to store packing context for forward_step
_GLOBAL_PACKING_CONTEXT = None


# Track whether the inference model is currently paused (offloaded to CPU).
# Model starts on GPU after creation and is used immediately, so starts as False.
_INFERENCE_MODEL_IS_PAUSED = False


def _torch_saver_swap_inference_model(*, to_cpu: bool) -> None:
    """Swap RL inference model weights between CPU and GPU using torch_memory_saver.

    Uses torch_memory_saver.pause()/resume() to transfer inference model weights
    that were allocated within a torch_memory_saver.region() context.

    Args:
        to_cpu: If True, move weights to CPU (pause). If False, restore weights to GPU (resume).
    """
    global _INFERENCE_MODEL_IS_PAUSED

    if not HAVE_TORCH_MEMORY_SAVER:
        raise RuntimeError(
            "torch_memory_saver is required for inference model offloading when not using UVM. "
            "Please install it: pip install torch_memory_saver "
            "(see https://github.com/fzyzcjy/torch_memory_saver)"
        )

    tag = "rl_inference_model"
    if to_cpu:
        if not _INFERENCE_MODEL_IS_PAUSED:
            print_rank_0(f"torch_memory_saver: pausing {tag}, before: {device_memory_summary()}")
            torch_memory_saver.pause(tag)
            _INFERENCE_MODEL_IS_PAUSED = True
            print_rank_0(f"torch_memory_saver: paused  {tag}, after:  {device_memory_summary()}")
    else:
        if _INFERENCE_MODEL_IS_PAUSED:
            print_rank_0(f"torch_memory_saver: resuming {tag}, before: {device_memory_summary()}")
            torch_memory_saver.resume(tag)
            _INFERENCE_MODEL_IS_PAUSED = False
            print_rank_0(f"torch_memory_saver: resumed  {tag}, after:  {device_memory_summary()}")


def _maybe_prefetch_separate_inference_model_weights(model_core, *, to_cpu: bool) -> None:
    """Prefetch RL *separate inference model* weights to CPU/GPU.

    Supports two modes:
    1. UVM-based offloading (when --rl-inference-model-unified-memory-level=1)
    2. torch_memory_saver-based offloading (when offloading is enabled but UVM is not)

    Gated by user args; this assumes the separate inference model was allocated
    with UVM or torch_memory_saver when enabled.
    """
    args = get_args()
    if not args.rl_offload_inference_model_weights_when_idle:
        return

    # Check for torch_memory_saver path (when offloading is enabled but UVM is not)
    if args.rl_inference_model_unified_memory_level != 1:
        _torch_saver_swap_inference_model(to_cpu=to_cpu)
        return

    # UVM-based path (when UVM level is 1)
    device = -1 if to_cpu else int(torch.cuda.current_device())
    # Note: include_buffers=False because buffers created with explicit device= in register_buffer()
    # are not allocated via the UVM mempool and will fail UVM operations. Only parameters are UVM-allocated.
    advise_managed_module_parameters_preferred_location(model_core, device=device, include_buffers=False)
    nbytes = prefetch_managed_module_parameters(model_core, device=device, include_buffers=False)
    # Ensure pages are resident before we enter CUDA-graph capture / inference, or before training continues.
    torch.cuda.synchronize()

    if to_cpu:
        print_rank_0(f"[Rank 0] offloaded {nbytes / 1024**2:.2f} MB of separate RL inference model weights to CPU (other ranks may vary)")
    else:
        print_rank_0(f"[Rank 0] prefetched {nbytes / 1024**2:.2f} MB of separate RL inference model weights to GPU (other ranks may vary)")


def verify_model_weights_swap(
    train_model: LanguageModule,
    inference_model: LanguageModule,
    seq_len: int = 8,
    batch_size: int = 2,
    atol: float = 1e-4,
    rtol: float = 1e-4,
) -> None:
    """Verify that the inference model produces the same forward pass outputs
    as the training model after the weights have been swapped.

    This function should be called after swap_model_weights to ensure the weight
    transfer was successful. It runs a forward pass on both models and asserts
    the outputs match.  This is meant for debugging purposes only.

    Args:
        train_model: The training model (source of weights).
        inference_model: The inference model (target of weights).
        seq_len: Sequence length for test input.
        batch_size: Batch size for test input.
        atol: Absolute tolerance for comparing outputs.
        rtol: Relative tolerance for comparing outputs.

    Raises:
        AssertionError: If forward pass outputs do not match within tolerance.
    """
    args = get_args()

    # Unwrap models to get the core module
    train_lm = train_model[0] if isinstance(train_model, (list, tuple)) else train_model
    inf_lm = inference_model[0] if isinstance(inference_model, (list, tuple)) else inference_model

    train_core = unwrap_model(train_lm)
    inf_core = unwrap_model(inf_lm)

    actual_vocab_size = getattr(args, 'padded_vocab_size', 128256)
    actual_seq_len = min(seq_len, getattr(args, 'seq_length', seq_len))
    device = torch.device(f"cuda:{torch.cuda.current_device()}")

    # Generate deterministic test input - same across ALL ranks
    torch.manual_seed(1234)
    test_tokens = torch.randint(
        low=0, high=actual_vocab_size, size=(batch_size, actual_seq_len),
        device=device, dtype=torch.long
    )
    test_position_ids = (
        torch.arange(actual_seq_len, device=device, dtype=torch.long)
        .unsqueeze(0)
        .expand(batch_size, -1)
    )
    test_attention_mask = torch.ones(
        (batch_size, 1, actual_seq_len, actual_seq_len), device=device, dtype=torch.bool
    )

    # Save and restore training state
    train_was_training = train_core.training
    inf_was_training = inf_core.training

    train_core.eval()
    inf_core.eval()

    try:
        with torch.no_grad():
            train_output = train_lm(
                test_tokens, test_position_ids, test_attention_mask,
                runtime_gather_output=True
            )

            inf_output = inf_lm(
                test_tokens, test_position_ids, test_attention_mask,
                runtime_gather_output=True
            )

        # Only check on ranks that have output (last PP stage)
        if train_output is not None and inf_output is not None:
            assert train_output.shape == inf_output.shape, (
                f"Output shape mismatch: train={train_output.shape}, infer={inf_output.shape}"
            )
            
            max_diff = (train_output - inf_output).abs().max().item()
            assert torch.allclose(train_output, inf_output, atol=atol, rtol=rtol), (
                f"Forward pass outputs do not match: max_diff={max_diff:.6e}, atol={atol}, rtol={rtol}"
            )

    finally:
        # Restore training state
        if train_was_training:
            train_core.train()
        if inf_was_training:
            inf_core.train()


@dataclass(slots=True)
class RolloutStats:
    rewards: list[list[float]] # inner list is for a group
    env_ids: list[str] # same length as len(rewards)
    turn_lens: list[list[int]] # token lengths of turns, grouped.
    traj_lens: list[list[int]] # all turns comprise one trajectory.
    num_turns: None | list[list[int]] # num_turns per traj
    advantages: None | list[list[float]]
    min_piold_to_inf_prob: None | float
    max_piold_to_inf_prob: None | float
    mean_piold_to_inf_prob: None | float
    min_inf_train_prob_abs_diff: None | float
    max_inf_train_prob_abs_diff: None | float
    mean_inf_train_prob_abs_diff: None | float
    min_inf_prob: None | float
    max_inf_prob: None | float
    mean_inf_prob: None | float
    policy_epoch: list[list[int]]
    kv_cache_epoch: list[list[int]]
    completed_epochs: list[list[int]]
    num_evictions: list[list[int]]


# Runtime state container for RL-specific data that shouldn't be checkpointed
class RLRuntimeState:
    """Container for runtime state that is not checkpointed, tracking state between rollout collections"""

    def __init__(self):
        self.packing_context = None
        self.last_collection_iteration = 0
        self.sequences_this_iteration_on_rank = 0
        self.latest_batch_num_sequences = 0

    def reset_iteration_counters(self, iteration):
        """Reset per-iteration counters."""
        self.sequences_this_iteration_on_rank = 0
        self.last_collection_iteration = iteration

    def increment_sequences(self, count):
        """Increment the sequence counter."""
        self.sequences_this_iteration_on_rank += count
        self.latest_batch_num_sequences = count


# Global runtime state instance
_rl_runtime_state = RLRuntimeState()


def get_rl_runtime_state():
    """Get the global RL runtime state."""
    return _rl_runtime_state


def update_inference_logprobs_group_stats(
    old_logprobs: torch.Tensor,
    inference_logprobs: torch.Tensor,
    mask: torch.Tensor,
    group_stats: Any,
) -> None:
    """Update group statistics with inference/train logprobs comparison metrics.

    This is the common statistics computation used by both packed and unpacked cases.

    Args:
        old_logprobs: Old logprobs tensor (train side)
        inference_logprobs: Inference logprobs tensor (aligned to match old_logprobs shape)
        mask: Boolean mask indicating valid positions for statistics
        group_stats: Statistics object to update with computed metrics
    """
    n_elems = mask.sum()
    if n_elems > 0:
        ratios = (old_logprobs - inference_logprobs).exp()[mask]
        abs_diffs = (old_logprobs.exp() - inference_logprobs.exp()).abs()[mask]

        group_stats.min_piold_to_inf_prob = ratios.min().item()
        group_stats.max_piold_to_inf_prob = ratios.max().item()
        group_stats.mean_piold_to_inf_prob = (ratios.sum() / n_elems).item()
        group_stats.min_inf_train_prob_abs_diff = abs_diffs.min().item()
        group_stats.max_inf_train_prob_abs_diff = abs_diffs.max().item()
        group_stats.mean_inf_train_prob_abs_diff = (abs_diffs.sum() / n_elems).item()

        inf_probs = inference_logprobs.exp()[mask]
        group_stats.min_inf_prob = inf_probs.min().item()
        group_stats.max_inf_prob = inf_probs.max().item()
        group_stats.mean_inf_prob = inf_probs.mean().item()


def align_unpacked_inference_logprobs(
    inference_logprobs: List[torch.Tensor],
    old_logprobs_for_data: torch.Tensor,
    generation_masks: torch.Tensor,
    group_stats: Any,
) -> torch.Tensor:
    """Align inference logprobs with old_logprobs for unpacked sequences and compute statistics.

    Args:
        inference_logprobs: List of inference logprobs tensors for each sequence
        old_logprobs_for_data: Template tensor with correct shape for alignment
        generation_masks: Tensor indicating which tokens were generated
        group_stats: Statistics object to update with computed metrics

    Returns:
        Aligned inference logprobs tensor
    """
    # Get first occurrence of a generation token
    # In get_logprobs() we chop off the first token -> the generation mask is shifted by one
    gen_masks_for_alignment = generation_masks
    first_gen_tok = gen_masks_for_alignment.int().argmax(dim=1) - 1

    # Align inference logprobs with old_logprobs
    # Note: We use old_logprobs_for_data as template since it has correct shape
    padded_inference_logprobs = old_logprobs_for_data.clone()

    # We need to align old_logprobs and inference logprobs as the latter are only for generations
    for i, inf_logprobs in enumerate(inference_logprobs):
        first_gen_idx = first_gen_tok[i]
        # We subtract -1 here because we append eod token on the train side, and we do not
        # get it from the inference. For the eod token, we reuse old_logprobs value.
        end_idx = min(first_gen_idx + len(inf_logprobs), padded_inference_logprobs.shape[1])
        actual_len = end_idx - first_gen_idx
        if actual_len > 0:
            padded_inference_logprobs[i, first_gen_idx:end_idx] = inf_logprobs[:actual_len]

    # Create truncated mask for statistics
    if old_logprobs_for_data.shape[1] + 1 < gen_masks_for_alignment.shape[1]:
        gen_masks_for_alignment = gen_masks_for_alignment[:, : old_logprobs_for_data.shape[1] + 1]

    truncated_mask = gen_masks_for_alignment[:, 1:].bool()

    # Final safety check
    if truncated_mask.shape != old_logprobs_for_data.shape:
        if truncated_mask.shape[1] > old_logprobs_for_data.shape[1]:
            truncated_mask = truncated_mask[:, : old_logprobs_for_data.shape[1]]
        elif truncated_mask.shape[1] < old_logprobs_for_data.shape[1]:
            pad_size = old_logprobs_for_data.shape[1] - truncated_mask.shape[1]
            truncated_mask = torch.nn.functional.pad(truncated_mask, (0, pad_size), value=False)

    # Sanity check: Two probability values cannot be more than 1.0 apart
    abs_diffs = (old_logprobs_for_data.exp() - padded_inference_logprobs.exp()).abs()[truncated_mask]
    assert all(abs_diffs <= 1.0)

    # Update group statistics using common helper
    update_inference_logprobs_group_stats(
        old_logprobs=old_logprobs_for_data,
        inference_logprobs=padded_inference_logprobs,
        mask=truncated_mask,
        group_stats=group_stats,
    )

    return padded_inference_logprobs


def get_agent(args, parallel_generation_tasks: int | None = None):
    """Get an agent based on environment configuration.

    If args.langrl_env_config is provided, uses weighted environment selection.
    Otherwise falls back to legacy single environment selection.
    """
    with open(args.langrl_env_config, 'r') as f:
        config = yaml.safe_load(f)

    return WeightedMultiTask.from_config(
        config,
        parallel_generation_tasks=parallel_generation_tasks,
    )


_INFERENCE_INTERFACE = None


def get_inference_interface(args, loop, model):
    global _INFERENCE_INTERFACE
    if _INFERENCE_INTERFACE is None:
        _INFERENCE_INTERFACE = loop.run_until_complete(
            MegatronLocal.launch(
                model[0],
                host='0.0.0.0',
                port=8294,
                verbose=args.inference_text_gen_server_logging)
        )
    return _INFERENCE_INTERFACE


_ROLLOUT_GENERATOR = None


def get_rollout_generator(args, inference_interface, n_prompts, samples_per_group):
    global _ROLLOUT_GENERATOR
    if not (streaming := args.rl_partial_rollouts) or _ROLLOUT_GENERATOR is None:
        agent = get_agent(args, parallel_generation_tasks=args.rl_parallel_generation_tasks)
        request = GroupedRolloutRequest(
            num_groups=args.rl_generation_batch_size,
            streaming=streaming,
            rollouts_per_group=samples_per_group,
            inference_interface=inference_interface,
            generation_args={
                'temperature': args.rl_default_temperature,
                'max_tokens': args.inference_max_seq_length,
                'top_p': args.rl_default_top_p,
                'top_k': args.rl_default_top_k,
            },
            filter_groups_with_same_reward=args.grpo_filter_groups_with_same_reward,
            enforce_order=args.rl_enforce_generation_order,
        )
        _ROLLOUT_GENERATOR = agent.get_grouped_rollouts(request)
    return _ROLLOUT_GENERATOR


def get_environment_rollouts(
    model: LanguageModule, inference_model: LanguageModule, optimizer: MegatronOptimizer, n_prompts: int, samples_per_group: int
):
    """Sample environment rollouts from an LLM.

    Args:
        model: Model to sample from.
        inference_model: Inference model to use for inference.
        n_prompts: Number of prompts to sample for across *all* data parallel workers.
        samples_per_group: Amount of trajectories per prompt.

    Returns:
        GroupedRollouts object which is a nested list with each element being a list of rollouts of a group.
    """
    args = get_args()
    nvtx_range = get_nvtx_range()

    if args.rl_offload_optimizer_during_inference:
        with nvtx_range("offload-optimizer-state-and-grad-buffers-during-inference"):
            if not args.rl_training_cuda_graphs:
                model[0].offload_grad_buffers()
            else:
                logger.warning(
                    "Gradient buffers will not be offloaded when training cudagraphs are used!"
                )
            optimizer.offload_to_cpu()

    # If we have separate training and inference models we to refit weights from the training model to the inference model.
    has_separate_inference_model = inference_model is not None
    if has_separate_inference_model:
        # If the separate inference model weights were prefetched to CPU while idle, bring them
        # back to GPU before refit/copy and before any CUDA-graph'd inference.
        with nvtx_range("prefetch-inference-model-weights-to-gpu"):
            inf_core = unwrap_model(inference_model[0])
            _maybe_prefetch_separate_inference_model_weights(inf_core, to_cpu=False)
        swap_model_weights(model, inference_model, args.refit_method)
        if args.rl_verify_model_weights_swap:
            verify_model_weights_swap(
                train_model=model,
                inference_model=inference_model,
                atol=.1,
                rtol=5e-4,
            )
    else:
        inference_model = model

    inference_pg_collection = get_attr_wrapped_model(inference_model[0], "pg_collection")
    pg_size = get_pg_size(inference_pg_collection.ep)
    assert (n_prompts % pg_size == 0), f"{n_prompts=} must be divisible by {pg_size=}"

    with nvtx_range("rollout-collection"):
        loop = get_asyncio_loop()
        with megatron_rl_inference_mode(
            inference_model,
            optimizer,
            args.cuda_graph_impl,
            False, # offload optimizer during rollout collection is handled above
            training_model=model if has_separate_inference_model else None,
        ) as inference_interface:

            with nvtx_range("inference-setup"):
                # Asyncronously run inference and rollout collection
                rollout_generator = get_rollout_generator(
                    args, inference_interface, n_prompts, samples_per_group
                )

            # NOTE(jbarker): we need to double check this when using PP>1
            rank = torch.distributed.get_rank()
            with nvtx_range("collect-rollouts"):
                if rank == 0:
                    log_single_rank(
                        logger,
                        logging.INFO,
                        f"Collecting rollouts, Iteration {args.curr_iteration}...",
                    )
                    rollouts = [
                        loop.run_until_complete(anext(rollout_generator)) for _ in range(n_prompts)
                    ]
                    # In deterministic mode, sort rollouts by problem_id for consistent ordering
                    # regardless of completion order due to system timing jitter.
                    if torch.are_deterministic_algorithms_enabled():
                        rollouts.sort(key=lambda group: group[0].problem_id if group and group[0].problem_id else "")
                    if not args.rl_partial_rollouts:
                        while True:
                            try:
                                loop.run_until_complete(anext(rollout_generator))
                                assert False, "Unexpected group left in generator."
                            except StopAsyncIteration:
                                break
                else:
                    # Just set up space to collect the rollouts
                    rollouts = [[None for _ in range(samples_per_group)] for _ in range(n_prompts)]

        with nvtx_range("sync-rollouts"):
            # Wait for Rollouts to be collected
            # TODO(jbarker): double check why this isn't causing rank 0 memory allocations
            torch.distributed.broadcast_object_list(rollouts, src=0)
        logger.debug(f"Got rollouts on rank {rank}")

    if args.rl_offload_optimizer_during_inference:
        with nvtx_range("restore-optimizer-state-and-grad-buffers-after-inference"):
            model[0].restore_grad_buffers()
            optimizer.restore_from_cpu()

    if lang_rl_log_dir and rank == get_pg_rank(inference_pg_collection.tp):
        with open(
            lang_rl_log_dir
            + f'/rollouts_rank{rank}_iteration{args.curr_iteration}_'
            + f'{Path(args.langrl_env_config).stem}.json',
            'w',
        ) as f:
            json.dump([[r.model_dump() for r in group] for group in rollouts], f)

    return rollouts


def selective_log_softmax(logits, index):
    """Taken from: https://github.com/huggingface/trl/blob/26d86757a7c7e24e397ea44f57ecce6031dfac01/trl/trainer/utils.py#L1659.

    A memory-efficient implementation of the common `log_softmax -> gather` operation.

    This function is equivalent to the following naive implementation:
    ```python
    logps = torch.gather(logits.log_softmax(-1), dim=-1, index=index.unsqueeze(-1)).squeeze(-1)
    ```

    Args:
        logits (`torch.Tensor`):
            Logits tensor of shape `(..., num_classes)`.
        index (`torch.Tensor`):
            Index tensor of shape `(...)`, specifying the positions to gather from the log-softmax output.

    Returns:
        `torch.Tensor`:
            Gathered log probabilities with the same shape as `index`.
    """
    use_bik_logsoftmax = is_batch_invariant_mode_enabled()
    if logits.dtype in [torch.float32, torch.float64] and not use_bik_logsoftmax:
        selected_logits = torch.gather(logits, dim=-1, index=index.unsqueeze(-1)).squeeze(-1)
        # loop to reduce peak mem consumption
        logsumexp_values = torch.stack([torch.logsumexp(lg, dim=-1) for lg in logits])
        per_token_logps = (
            selected_logits - logsumexp_values
        )  # log_softmax(x_i) = x_i - logsumexp(x)
    else:
        # logsumexp approach is unstable with bfloat16, fall back to slightly less efficent approach
        per_token_logps = []
        for row_logits, row_labels in zip(logits, index):  # loop to reduce peak mem consumption
            row_logps = torch.nn.functional.log_softmax(row_logits, dim=-1)
            row_per_token_logps = row_logps.gather(dim=-1, index=row_labels.unsqueeze(-1)).squeeze(
                -1
            )
            per_token_logps.append(row_per_token_logps)
        per_token_logps = torch.stack(per_token_logps)
    return per_token_logps


def get_logprobs(model, tokens, position_ids, no_grad=False, sequence_packing=False, packed_seq_params=None):
    """Get sequence logprobs from their token ids.

    Args:
        model: model to predict with.
        tokens: inputs for which we want to get logprobs.
        position_ids: position ids that come with tokens.
        attention_mask: attention mask that comes with tokens.
        no_grad: whether to run in no_grad mode.
        packed_seq_params: Optional PackedSeqParams for sequence packing with TE.
            When provided with qkv_format='thd', the input tokens are sliced to
            remove padding before the forward pass, and outputs are padded back.
        packed_seq_len: Optional length of the packed sequence (excluding padding).
            Required when packed_seq_params is provided to avoid CPU-GPU synchronization.

    Returns:
        Logprobs of input sequences.

    """

    args = get_args()
    # Ensure packed_seq_params is always provided for CUDA graph signature consistency.
    # When sequence_packing is enabled, construct from packing config (max_sequences_per_bin).
    # When sequence_packing is disabled, construct a single-sequence default so the CUDA
    # graph signature matches the training forward_step in train_rl.py.
    # This is necessary because reference logprobs steps will reuse the training forward graph.
    if packed_seq_params is None:
        if sequence_packing:
            packed_seq_params = get_default_packed_seq_params(
                seq_length=tokens.shape[1],
                max_sequences_per_bin=args.rl_sequence_packing_max_sequences_per_bin,
                device=tokens.device,
            )
        else:
            cu_seqlens = torch.tensor([0, tokens.shape[1]], dtype=torch.int32, device=tokens.device)
            packed_seq_params = PackedSeqParams(
                qkv_format='thd',
                cu_seqlens_q=cu_seqlens,
                cu_seqlens_kv=cu_seqlens,
                max_seqlen_q=tokens.shape[1],
                max_seqlen_kv=tokens.shape[1],
                total_tokens=tokens.shape[1],
            )

    nvtx_range = get_nvtx_range()

    with nvtx_range("get-logprobs", time=False):
        with nvtx_range("forward-pass", time=False):
            # TODO(vitalyk): use fp16/bf16 as a function argument. Do not use args.

            attention_mask_for_forward = None

            # This is a hack to fix megatron's behaviour when flash-decode affects the training code flow.
            flash_decode = model.config.flash_decode
            model.config.flash_decode = False
            fp32_output = not (args.fp16 or args.bf16)
            with torch.no_grad() if no_grad else nullcontext():
                logits_or_hidden_states = model(
                    tokens,
                    position_ids,
                    attention_mask_for_forward,
                    packed_seq_params=packed_seq_params,
                    runtime_gather_output=True,
                    fp32_output=fp32_output,
                )
            model.config.flash_decode = flash_decode

        pg_collection = get_attr_wrapped_model(model, "pg_collection")
        pp_group = pg_collection.pp

        if not is_pp_last_stage(pp_group):
            return logits_or_hidden_states
        else:
            logits = logits_or_hidden_states
            with nvtx_range("log-softmax", time=False):
                # We do not need logprobs for the n+1 token.
                logprobs = selective_log_softmax(logits[:, :-1, :], tokens[:, 1:])
            return logprobs


def calculate_grpo_advantages(rewards: list[list[float]], num_turns: list[list[int]]) -> np.ndarray:
    """Calculate GRPO advantages from rewards/num_turns.

    For multiturn rollouts, the logic is a bit more involved.
    # For training, we'll be turning each turn into a trajectory with the same reward
    # within a trajectory, e.g. if [[a,b],[c,d,e]] trajectory has reward 1.0, we will
    # get [a,b] with 1.0 and [c,d,e] with 1.0 when doing updates.
    """

    rewards = np.array(rewards)

    num_turns = np.array(num_turns)
    # Each outer dimension of num_turns is a group. Sum of those gives total num_turns per group.
    # Let's use this to calculate advantage.
    # mean/std should be repeated based on group lens
    group_turns = num_turns.sum(axis=-1)
    reward_means = rewards.mean(axis=1, keepdims=True).repeat(group_turns)
    reward_stds = rewards.std(axis=1, keepdims=True).repeat(group_turns)

    # rewards are originally [g, group_size]
    # Making an assumption that all groups are of the same size!
    # @vitalyk: this will go away when we start sending env-based sample reqs.
    rewards = rewards.flatten().repeat(num_turns.flatten())

    return ((rewards - reward_means) / (1e-4 + reward_stds)).tolist()


def compute_group_stats(
    rollouts: GroupedRollouts, tokenizer: MegatronTokenizer, seq_len: int,
) -> RolloutStats:
    """Add group-based rollout stats for logging.

    Args:
        rollouts: Rollouts to generate the stats for. Each inner list is a group (as in GRPO group), i.e. all rollouts are for the same prompt.
        tokenizer: Tokenizer to tokenize the rollouts in case they are raw strings.
        seq_len: Maximum sequence length.

    Returns:
       RolloutStats object containing all the stats.
    """
    # TODO (rkirby) Maybe do some of this after the tensor building
    group_reward_means = []
    group_reward_stds = []
    turn_lens = []
    traj_lens = []
    rewards = []
    env_ids = []
    group_reward_ids = []
    num_turns = [] # num_turns per traj
    all_policy_epoch = []
    all_kv_cache_epoch = []
    all_completed_epochs = []
    all_num_evictions = []
    for group in rollouts:
        group_rewards = []
        group_traj_lengths = []
        group_turn_lengths = []
        group_num_turns = []
        group_policy_epoch = []
        group_kv_epoch = []
        group_completed_epochs = []
        group_num_evictions = []
        for rollout in group:
            if isinstance(rollout, TokenRollout):
                for turn_traj in rollout.trajectory:
                    detokenized_traj = tokenizer.detokenize(turn_traj)
                    lang_rl_log(
                        f"Rollout: [{rollout.env_id}] [{rollout.reward} : {len(rollout.trajectory)} tokens] {detokenized_traj}"
                    )
                    # TODO(vitalyk): how does multiturn change EOD/EOT?
                    assert (len(turn_traj) == seq_len) or (
                        turn_traj[-1] == tokenizer.eod
                    ), f"Rollout is not the correct length: {len(turn_traj)} {turn_traj[-1]}\n{detokenized_traj}"
            else:
                lang_rl_log(
                    f"Rollout: [{rollout.env_id}] [{rollout.reward} : {len(rollout.trajectory)} chars] {rollout.trajectory}"
                )
            group_num_turns.append(len(rollout.trajectory))
            group_rewards.append(rollout.reward)
            roll_turn_lens = [len(t) for t in rollout.trajectory]
            group_turn_lengths.extend(roll_turn_lens)
            group_traj_lengths.append(sum(roll_turn_lens))
            assert rollout.policy_epoch, "Rollout has no policy_epoch data"
            assert rollout.kv_cache_epoch, "Rollout has no kv_cache_epoch data"
            group_policy_epoch.append(min(turn[0][1] for turn in rollout.policy_epoch))
            group_kv_epoch.append(min(turn[0][1] for turn in rollout.kv_cache_epoch))
            group_completed_epochs.extend(turn[-1][1] for turn in rollout.policy_epoch)
            group_num_evictions.append(sum(rollout.num_evictions))
        all_policy_epoch.append(group_policy_epoch)
        all_kv_cache_epoch.append(group_kv_epoch)
        all_completed_epochs.append(group_completed_epochs)
        all_num_evictions.append(group_num_evictions)
        traj_lens.append(group_traj_lengths)
        turn_lens.append(group_turn_lengths)
        env_ids.append(group[0].env_id) # All rollouts in a group share the env_id by design.
        rewards.append(group_rewards)
        # https://arxiv.org/abs/2504.21233 reports that lens variance hurts.
        # Let's track this.
        num_turns.append(group_num_turns)

    stats = RolloutStats(
        traj_lens=traj_lens,
        turn_lens=turn_lens,
        rewards=rewards,
        # --------
        # Everything above is per-group, i.e. it is a list of lists,
        # with the inner list being the group data.
        env_ids=env_ids,
        num_turns=num_turns,
        advantages=calculate_grpo_advantages(rewards, num_turns),
        min_piold_to_inf_prob=None,
        max_piold_to_inf_prob=None,
        mean_piold_to_inf_prob=None,
        min_inf_train_prob_abs_diff=None,
        max_inf_train_prob_abs_diff=None,
        mean_inf_train_prob_abs_diff=None,
        min_inf_prob=None,
        max_inf_prob=None,
        mean_inf_prob=None,
        policy_epoch=all_policy_epoch,
        kv_cache_epoch=all_kv_cache_epoch,
        completed_epochs=all_completed_epochs,
        num_evictions=all_num_evictions,
    )
    return stats


def prep_wandb_metrics(
        wandb_writer: wandb_run.Run,
        traj_lens: List[List[int]],
        turn_lens: List[List[int]],
        rewards: List[List[float]],
        num_turns: List[List[int]],
        advantages: List[float],
        policy_epoch: List[List[int]],
        kv_cache_epoch: List[List[int]],
        completed_epochs: List[List[int]],
        num_evictions: List[List[int]],
        current_iteration: int,
        example_group: list[TokenRollout | Rollout] | None = None,
        tokenizer: MegatronTokenizer | None = None,
    ):

    """Make a wandb-parseable dictionary of metrics for logging.

    Args:
        wandb_writer: Wandb run to log to.
        traj_lens: Grouped list of trajectory lengths.
        turn_lens: Grouped list of turn lengths.
        rewards: Grouped list of rewards.
        num_turns: Grouped list of number of turns in the trajectories.
        advantages: Flattened list of advantages.
        policy_epoch: Grouped list of per-rollout min policy epoch stamps.
        kv_cache_epoch: Grouped list of per-rollout min KV cache epoch stamps.
        completed_epochs: Grouped list of per-turn max policy epoch stamps.
        num_evictions: Grouped list of per-rollout number of evictions.
        current_iteration: Current training iteration.
        example_group: A list of rollouts of one group to log examples of trajectories.
        tokenizer: Tokenizer to untokenize trajectories for logging.
    """

    group_table = wandb_writer.Table(
        columns=['group_means', 'group_stds'],
        data=[[np.mean(g), np.std(g)] for g in rewards],
    )

    true_policy_staleness = [current_iteration - s for g in policy_epoch for s in g]
    true_kv_staleness = [current_iteration - s for g in kv_cache_epoch for s in g]

    metrics = {
            'group_means_hist': wandb_writer.plot.histogram(
                group_table, 'group_means', 'Group Means'
            ),
            'group_stds_hist': wandb_writer.plot.histogram(
                group_table, 'group_stds', 'Group STDs'
            ),
            'rewards_hist': wandb_writer.plot.histogram(
                wandb_writer.Table(
                    columns=['reward'], data=[[r] for g in rewards for r in g]
                ),
                'reward', 'All Rewards'
            ),
            'advantages_hist': wandb_writer.plot.histogram(
                wandb_writer.Table(
                    columns=['advantages'], data=[[x] for x in advantages]
                ),
                'advantages', 'Advantages'
            ),
            'rollout_table': wandb_writer.Table(
                columns=['reward', 'traj_length', 'num_evictions'],
                data=list(zip(
                    [r for g in rewards for r in g],
                    [l for g in traj_lens for l in g],
                    [e for g in num_evictions for e in g],
                )),
            ),
            'mean_turn_length': np.mean([np.mean(g) for g in turn_lens]),
            'mean_turn_length_std': np.mean([np.std(g) for g in turn_lens]),
            'max_turn_length': max([max(g) for g in turn_lens]),
            'min_turn_length': min([min(g) for g in turn_lens]),
            'mean_traj_length': np.mean([np.mean(g) for g in traj_lens]),
            'mean_traj_length_std': np.mean([np.std(g) for g in traj_lens]),
            'max_traj_length': max([max(g) for g in traj_lens]),
            'min_traj_length': min([min(g) for g in traj_lens]),
            'mean_num_turns': np.mean([np.mean(g) for g in num_turns]),
            'max_num_turns': max([max(g) for g in num_turns]),
            'min_num_turns': min([min(g) for g in num_turns]),
            'mean_reward': np.mean([np.mean(g) for g in rewards]),
            'mean_advantage': np.mean(advantages),
            'nonzero_groups_ratio': np.count_nonzero(advantages)
            / len(advantages),
            'mean_policy_staleness': np.mean(true_policy_staleness),
            'max_policy_staleness': max(true_policy_staleness),
            'min_policy_staleness': min(true_policy_staleness),
            'mean_kv_cache_staleness': np.mean(true_kv_staleness),
            'max_kv_cache_staleness': max(true_kv_staleness),
            'min_kv_cache_staleness': min(true_kv_staleness),
            'total_eviction_count': sum([sum(g) for g in num_evictions]),
            'max_num_evictions': max([max(g) for g in num_evictions]),
            'mean_completion_gap': np.mean([current_iteration - s for g in completed_epochs for s in g]),
    }
    if example_group:
        if tokenizer is None:
            raise ValueError("If you provide an example group to log, you need to provide a tokenizer too.")
        metrics['rollouts'] = wandb_writer.Table(
            columns=['Trajectories', 'Tokens', 'Rewards'],
            rows=[
                [
                    tokenizer.detokenize(turn) if isinstance(r, TokenRollout) else turn,
                    r.trajectory,
                    r.reward,
                ]
                for r in example_group for turn in r.trajectory
            ],
        )
    return metrics


def maybe_log_training_metrics(
    group_stats: RolloutStats,
    current_iteration: int,
    tokenizer: MegatronTokenizer,
    example_groups: dict[str, list[TokenRollout | Rollout]],
):
    """Log training metrics if writers are available.

    Args:
        group_stats: RolloutStats object to pass to writers.
        current_iteration: Current training iteration.
        tokenizer: Tokenizer to untokenize trajectories for logging.
        example_groups: A dict with values as list of rollouts of one group to log examples of trajectories. Keys are env names.
    """

    wandb_writer = get_wandb_writer()
    tb_writer = get_tensorboard_writer()
    if tb_writer:
        tb_writer.add_scalar('mean_reward', np.mean([np.mean(g) for g in group_stats.rewards]), current_iteration)
    if not wandb_writer:
        return

    # We log these metrics for the aggregated data, no split per env.
    metrics = {
        'min_piold_to_inf_prob': group_stats.min_piold_to_inf_prob,
        'max_piold_to_inf_prob': group_stats.max_piold_to_inf_prob,
        'mean_piold_to_inf_prob': group_stats.mean_piold_to_inf_prob,
        'min_inf_train_prob_abs_diff': group_stats.min_inf_train_prob_abs_diff,
        'max_inf_train_prob_abs_diff': group_stats.max_inf_train_prob_abs_diff,
        'mean_inf_train_prob_abs_diff': group_stats.mean_inf_train_prob_abs_diff,
        'min_inf_prob': group_stats.min_inf_prob,
        'max_inf_prob': group_stats.max_inf_prob,
        'mean_inf_prob': group_stats.mean_inf_prob,
    }

    traj_lens = group_stats.traj_lens
    turn_lens = group_stats.turn_lens
    rewards = group_stats.rewards
    num_turns = group_stats.num_turns
    advantages = group_stats.advantages
    policy_epoch = group_stats.policy_epoch
    kv_cache_epoch = group_stats.kv_cache_epoch
    completed_epochs = group_stats.completed_epochs
    num_evictions = group_stats.num_evictions

    metrics = metrics | prep_wandb_metrics(wandb_writer=wandb_writer,
        traj_lens=traj_lens, turn_lens=turn_lens, rewards=rewards, num_turns=num_turns, advantages=advantages,
        policy_epoch=policy_epoch, kv_cache_epoch=kv_cache_epoch, completed_epochs=completed_epochs,
        num_evictions=num_evictions, current_iteration=current_iteration)
    env_stats = lambda cont, idx: [cont[i] for i in idx]
    group_turn_counts = [sum(nt) for nt in num_turns]

    for env_id in set(group_stats.env_ids):
        env_idx = [i for i, eidx in enumerate(group_stats.env_ids) if eidx == env_id]

        # Advantages are flattened, we need to be more careful with those.
        env_advantages = []
        for i in env_idx:
            st = sum(group_turn_counts[:i])
            end = st + group_turn_counts[i]
            env_advantages.extend(advantages[st:end])

        env_metrics = prep_wandb_metrics(wandb_writer=wandb_writer, traj_lens=env_stats(traj_lens, env_idx),
            turn_lens=env_stats(turn_lens, env_idx),
            rewards=env_stats(rewards, env_idx),
            num_turns=env_stats(num_turns, env_idx),
            advantages=env_advantages,
            policy_epoch=env_stats(policy_epoch, env_idx),
            kv_cache_epoch=env_stats(kv_cache_epoch, env_idx),
            completed_epochs=env_stats(completed_epochs, env_idx),
            num_evictions=env_stats(num_evictions, env_idx),
            current_iteration=current_iteration,
            example_group=example_groups[env_id],
            tokenizer=tokenizer,
        )
        for k, v in env_metrics.items():
            metrics[f"{env_id}_{k}"] = v

    wandb_writer.log(metrics, step=current_iteration)


def prepare_trajectories(
    rollouts: Rollouts, tokenizer: MegatronTokenizer, seq_length: int, sequence_packing: bool, skip_bos_token: bool
):
    """Pad trajectories and extract the generation masks.
    Args:
        rollouts: Rollouts to extract trajectories from.
        tokenizer: Tokenizer to get the padding token and potentially tokenize.
        seq_length:  Maximum sequence length to pad to.

    Returns:
        Trajectories and their generation masks.

    Raises:
        ValueError:
    """
    # Track counts for each environment ID
    env_id_counts = Counter()

    DEFAULT_PAD_TOKENS = ['<|finetune_right_pad_id|>', '<SPECIAL_999>']

    if tokenizer.library == "huggingface":
        tokenizer : HuggingFaceTokenizer
        if not tokenizer.pad:
            for pad_token in DEFAULT_PAD_TOKENS:
                if pad_token in tokenizer._tokenizer.tokenizer.get_vocab():
                    log_single_rank(
                        logger, logging.INFO, f"Updating tokenizer pad token to {pad_token}"
                    )
                    tokenizer._tokenizer.pad_token = pad_token
                    break
            else:
                raise ValueError("No pad token found in tokenizer vocabulary")
    elif tokenizer.library == "tiktoken":
        assert "<SPECIAL_233>" in tokenizer.vocab, "Pad token is NOT in the tokenizer"
        tokenizer._pad_id = tokenizer.vocab["<SPECIAL_233>"]

    log_single_rank(logger, logging.INFO, f"Tokenizer vocab size: {tokenizer.vocab_size}")
    log_single_rank(
        logger,
        logging.INFO,
        f"Tokenizer PAD: '{tokenizer.detokenize([tokenizer.pad])} ({tokenizer.pad})'",
    )
    log_single_rank(
        logger,
        logging.INFO,
        f"Tokenizer EOD: '{tokenizer.detokenize([tokenizer.eod])} ({tokenizer.eod})'",
    )

    trajs = []
    generation_masks = []
    inference_logprobs = []
    for rollout in rollouts:
        # traj, gen mask and logprobs are lists now.
        # each list entry is a turn, single-turn environments just have a single-element list.
        # We assume that all lengths of the structs above have the same lengths (number of turns).

        all_turns_trajectories = (
            copy.deepcopy(rollout.trajectory)
            if isinstance(rollout, TokenRollout)
            else tokenizer.tokenize(rollout.trajectory)
        )
        for turn_idx, trajectory in enumerate(all_turns_trajectories):
            inf_logprobs = rollout.logprobs[turn_idx]
            generation_mask = rollout.generation_mask[turn_idx] if isinstance(rollout, TokenRollout) else None
            length = len(trajectory)
            assert length <= seq_length, "Rollout too long, how did this happen?"
            if len(trajectory) < seq_length:
                assert (
                    trajectory[-1] == tokenizer.eod
                ), "Trajectories under a seq_length limit should have eod token at the end."

            if length < seq_length:
                trajectory.extend([tokenizer.pad] * (seq_length - length))
                if generation_mask:
                    generation_mask.extend([False] * (seq_length - length))
            trajs.append(trajectory)
            generation_masks.append(generation_mask)

            if inf_logprobs is not None:
                inf_logprobs_tensor = torch.Tensor(inf_logprobs)
                # Don't pad individual logprobs here - padding happens later if needed
                inference_logprobs.append(inf_logprobs_tensor)
            else:
                inference_logprobs.append(None)

        env_id_counts[rollout.env_id] += 1

    if torch.distributed.is_initialized():
        logger.info(f"[{dist.get_rank()}] Rollout counts:")
        for env_id, count in env_id_counts.items():
            logger.info(f"[{dist.get_rank()}] \t{env_id}: {count}")

    generation_masks = torch.tensor(generation_masks, dtype=torch.bool, device='cpu')
    trajs = torch.tensor(trajs, device='cpu')

    # Only process if we have inference_logprobs
    if inference_logprobs and any(lp is not None for lp in inference_logprobs):
        # We need to pad all logprobs to the same size for sequence packing.
        # For non-packing mode, keep as list of tensors (unpadded)
        # This preserves the original behavior where each sequence can have different lengths
        if sequence_packing:
            inference_logprobs = _pad_nonnull_with_zeros(inference_logprobs, seq_length)
    else:
        inference_logprobs = None

    # Some sanity checks regarding the tokenization
    if not skip_bos_token:
        assert (
            tokenizer.bos is None or (trajs[:, 0] == tokenizer.bos).all()
        ), "First token should be bos"
    else:
        assert (
            tokenizer.bos is None or (trajs[:, 0] != tokenizer.bos).all()
        ), "First token should not be bos"  
    assert (
        tokenizer.bos is None or (trajs[:, 1] != tokenizer.bos).all()
    ), "Second token should not be bos"
    assert (
        (trajs * generation_masks.int() == tokenizer.eod).sum(axis=1) <= 1
    ).all(), "Only one eod per trajectory in generated tokens."
    # TODO(rkirby):
    # We should avoid the tokenizer pad token being the same as the eod token for proper loss masking,
    # But now the deepseek tokenizer has the pad token set to eod, we need to handle this.
    # assert (tokenizer.pad != tokenizer.eod), "Pad and eod should be different"
    return trajs, generation_masks, inference_logprobs


def logprobs_forward_step(data_iterator, model, is_correction, packing_context=None):
    # Avoid self.training checks which will trigger cudagraph capture; this path reuses
    # the forward pass from training after it has been captured on the 1st iteration.
    model.eval()

    if packing_context is not None:
        # When using sequence packing, the data iterator returns a tuple with a single element, the bin index.
        bin_tensor = next(data_iterator)[0]
        #TODO(jalbericiola): change for named tuple
        (b_trajs, _, _, _, b_posids, _, _, _, _, _, b_packed_seq_params) = (
            load_packed_data_by_index(bin_tensor.item(), packing_context, is_correction)
        )
    else:
        b_trajs, b_posids = next(data_iterator)
        b_packed_seq_params = None

    logprobs = (
        get_logprobs(
            model,
            b_trajs.cuda(),
            b_posids.cuda(),
            no_grad=True,
            sequence_packing=packing_context is not None,
            packed_seq_params=b_packed_seq_params,
        ),
        None,
    )
    model.train()
    return logprobs


def compute_logprobs_batch(
    model,
    data_loader,
    forward_backward_func,
    packing_context,
    trajs_batch_size, # n_bins for seq packing, and batch_size for non seq packing
    seq_length,
    logprobs_batch_size,
    decoder_seq_length,
    dtype,
    pp_group,
    is_correction,
    collect_non_loss_data=False,
):
    """Compute logprobs for all batches in the data loader."""
    logprobs_list = []
    data_iterator = iter(data_loader)
    for i in range(len(data_loader)):
        output_tensor = forward_backward_func(
            forward_step_func=partial(logprobs_forward_step, is_correction=is_correction, packing_context=packing_context),
            data_iterator=data_iterator,
            model=model,
            num_microbatches=1,
            seq_length=seq_length,
            micro_batch_size=logprobs_batch_size,
            decoder_seq_length=decoder_seq_length,
            forward_only=True,
            adjust_tensor_shapes_fn=None,
            collect_non_loss_data=collect_non_loss_data,
        )
        if is_pp_last_stage(pp_group):
            logprobs_list.append(output_tensor[0].detach())

    if is_pp_last_stage(pp_group):
        logprobs = torch.concat(logprobs_list, dim=0)
        assert logprobs.dtype == dtype
    else:
        logprobs = torch.empty(
            trajs_batch_size,
            seq_length-1,
            dtype=dtype,
            device=torch.cuda.current_device(),
        )

    # Only PP>1 needs a broadcast from the last stage; for PP=1 the output is already local.
    if get_pg_size(pp_group) > 1:
        dist.broadcast(logprobs, src=get_pp_last_rank(pp_group), group=pp_group)
    return logprobs.cpu()


def prepare_data_for_update(
    model: list[LanguageModule],
    ref_state_dict: Dict[str, Any],
    rollouts: GroupedRollouts,
    tokenizer: MegatronTokenizer,
    sequence_packing: bool,
    is_correction: bool,
) -> tuple[RerunDataIterator, RolloutStats, dict]:
    """Extract data for the update from raw rollouts.

    Args:
        model: Current policy as the zero-eth element.
        ref_state_dict: Reference policy state dict.
        rollouts: Rollouts to extract the data from.
        tokenizer: Tokenizer to pad/tokenize data.
        sequence_packing: Use sequence packing if True.
        is_correction: Prepare data for IS correction if True.

    Returns:
        Tuple of (cycled iterator over dataset batches, group stats, example groups per env).
    """
    args = get_args()
    nvtx_range = get_nvtx_range()
    runtime_state = get_rl_runtime_state()

    if args.cuda_graph_impl != "none" and not args.rl_training_cuda_graphs:
        lang_module = (
            model[0].module.module if hasattr(model[0].module, "module") else model[0].module
        )
        toggle_cuda_graphs(lang_module, "none")

    model = model[0]
    dtype = torch.bfloat16 if args.bf16 else (torch.float16 if args.fp16 else torch.float32)

    with nvtx_range("prepare-data-for-update"):
        with nvtx_range("compute-group-stats"):
            group_stats = compute_group_stats(rollouts, tokenizer, args.seq_length)
            # TODO(vitalyk): why do we need global_advantages here? go inside packing
            advantages = global_advantages = torch.tensor(group_stats.advantages, dtype=dtype).cuda()

        # Now split the rollouts across the data parallel ranks for training
        # This needs to be done at this point because we are about to calculate logprobs
        # Note :- For EP, do not use the expert data parallel group here. Always 
        # use the regular data parallel group. 

        # Get example group per environment to log their rollouts.
        example_groups = {}
        for g in rollouts:
            if g[0].env_id not in example_groups:
                example_groups[g[0].env_id] = g

        # Let's expand rollouts getting rid of the groups.
        # We need this to correctly split the rollouts across dp groups.
        # And we do not actually need them grouped in anything below anyways.
        rollouts = [r for g in rollouts for r in g]
        num_turns = [nt for g in group_stats.num_turns for nt in g]
        total_turns_sampled = len(rollouts)

        # We might sample more than we consume in one step.
        samples_ratio_per_step = args.global_batch_size / (args.grpo_prompts_per_step * args.grpo_group_size)
        assert samples_ratio_per_step <= 1, "You cannot use more data than you sampled."

        if (data_parallel_world_size := mpu.get_data_parallel_world_size()) > 0:
            data_split_size = len(rollouts) // data_parallel_world_size
            data_split_range = (
                mpu.get_data_parallel_rank() * data_split_size,
                (mpu.get_data_parallel_rank() + 1) * data_split_size,
            )
            rollouts = rollouts[data_split_range[0] : data_split_range[1]]
            local_num_turns = sum(num_turns[data_split_range[0] : data_split_range[1]])
            steps_before = sum(num_turns[:data_split_range[0]])
            advantages = advantages[steps_before:steps_before+local_num_turns]
            # First we calculate them on a global level and then we split and recalculate on a local level.
            # Sequence packing and reporting needs it global but non-packing wants it local.

        with nvtx_range("prepare_trajectories"):
            trajs, generation_masks, inference_logprobs = prepare_trajectories(
                rollouts, tokenizer, args.seq_length, sequence_packing, args.rl_skip_bos_token
            )

        packing_context = None
        # Build trajectories based on sequence packing or standard processing
        if sequence_packing:
            with nvtx_range("sequence_packing", time=True):
                runtime_state.packing_context = packing_context = pack_all_trajectories(
                    trajs, 
                    generation_masks, 
                    inference_logprobs, 
                    global_advantages, 
                    args.seq_length, 
                    args.rl_sequence_packing_max_sequences_per_bin,
                    args.rl_sequence_packing_algo
                    )
    
                compute_trajs = packing_context.packed_trajs
                compute_position_ids = packing_context.packed_position_ids
                # Use batch_size=1 for packed computation to enable proper attention masking
                # via PackedSeqParams (TE needs cu_seqlens per bin)
                dataset = TensorDataset(torch.arange(len(compute_trajs)))
                data_loader = DataLoader(dataset, batch_size=1)
                logprobs_batch_size = 1
        else:
            # Always compute standard masks for the original data (we'll need them later)
            with nvtx_range("get_ltor_masks_and_position_ids"):
                _, original_loss_mask, original_position_ids = get_ltor_masks_and_position_ids(
                    trajs,
                    tokenizer.eod,
                    tokenizer.pad,
                    args.reset_position_ids,
                    args.reset_attention_mask,
                    eod_mask_loss=False,
                    pad_mask_loss=True,
                )
                original_loss_mask[~generation_masks] = 0.0
                compute_trajs = trajs
                compute_position_ids = original_position_ids
                data_loader = DataLoader(
                    TensorDataset(compute_trajs, compute_position_ids),
                    batch_size=args.micro_batch_size,
                )
                logprobs_batch_size = args.micro_batch_size

        with torch.no_grad(), nvtx_range("compute_logprobs", time=True):
            # Before we can update the model, we need to get the logprobs for the \pi_{old} model.

            forward_backward_func = get_forward_backward_func()
            if args.cuda_graph_impl == "local" and CudaGraphScope.full_iteration in args.cuda_graph_scope:
                forward_backward_func = FullCudaGraphWrapper(
                    forward_backward_func, cuda_graph_warmup_steps=args.cuda_graph_warmup_steps
                )

            dtype = (
                torch.bfloat16 if args.bf16 else (torch.float16 if args.fp16 else torch.float32)
            )

            pg_collection = get_attr_wrapped_model(model, "pg_collection")
            pp_group = pg_collection.pp

            with torch.no_grad(), nvtx_range("compute_old_logprobs", time=True):
                old_logprobs = compute_logprobs_batch(
                    model=model,
                    data_loader=data_loader,
                    forward_backward_func=forward_backward_func,
                    packing_context=packing_context,
                    trajs_batch_size=len(compute_trajs),
                    seq_length=args.seq_length,
                    logprobs_batch_size=logprobs_batch_size,
                    decoder_seq_length=args.decoder_seq_length,
                    dtype=dtype,
                    pp_group=pp_group,
                    is_correction=args.rl_inference_logprobs_is_correction,
                )

            with torch.no_grad(), nvtx_range("compute_ref_logprobs", time=True):
                # We need to load the ref model state dict and compute the logprobs for the ref model
                cur_st_dict = {
                    k: (v.cpu() if v is not None else v) for k, v in model.state_dict().items()
                }
                model.load_state_dict(ref_state_dict)
                ref_logprobs = compute_logprobs_batch(
                    model=model,
                    data_loader=data_loader,
                    forward_backward_func=forward_backward_func,
                    packing_context=packing_context,
                    trajs_batch_size=len(compute_trajs),
                    seq_length=args.seq_length,
                    logprobs_batch_size=logprobs_batch_size,
                    decoder_seq_length=args.decoder_seq_length,
                    dtype=dtype,
                    pp_group=pp_group,
                    is_correction=args.rl_inference_logprobs_is_correction,
                )

                # logprobs are [b, seq, h] now.
                model.load_state_dict(cur_st_dict)

            torch.cuda.synchronize()
            gc.collect()
            torch.cuda.empty_cache()


        if sequence_packing:
            with nvtx_range("pack_logprobs", time=True):
                # Store logprobs on gpu in packing context
                # Since PackingContext is a dataclass, we add these as new attributes
                packing_context.old_logprobs = old_logprobs.cuda()
                packing_context.ref_logprobs = ref_logprobs.cuda()

                if inference_logprobs is not None:
                    # Pack the inference logprobs using the helper function
                    # We do this for logging purposes even if is_correction is disabled
                    packed_inference_logprobs = pack_inference_logprobs(
                        inference_logprobs=packing_context.original_inference_logprobs,
                        packing_info=packing_context.packing_info,
                        generation_masks=packing_context.original_generation_masks,
                        bin_size=args.seq_length,
                    )

                    # Compute statistics for logging using packed data
                    compute_packed_inference_logprobs_stats(
                        old_logprobs=old_logprobs,
                        packed_inference_logprobs=packed_inference_logprobs,
                        packed_loss_mask=packing_context.packed_loss_mask,
                        group_stats=group_stats,
                    )

                    # Store packed inference logprobs in packing context
                    packing_context.packed_inference_logprobs = packed_inference_logprobs.cuda()
                    # Only mark as having inference logprobs for IS correction if enabled
                    packing_context.has_inference_logprobs = args.rl_inference_logprobs_is_correction
            with nvtx_range("create_dataloader"):
                # @vitalyk: This function also reconfigures the data loader to count the
                # global_batch_size in the bins frame of reference.
                # I think it will be a better design if we split the data loader creating and logic
                # that reconfigures the microbatch calculator.

                update_microbatch_calculator(
                    samples_ratio_per_step=samples_ratio_per_step,
                    num_bins_this_rank = len(packing_context.packed_trajs),
                    bin_seq_indices = packing_context.packing_info.bin_seq_indices,
                    global_batch_size=args.global_batch_size, 
                    rampup_batch_size=args.rampup_batch_size, 
                    micro_batch_size=args.micro_batch_size, 
                    decrease_batch_size_if_needed=args.decrease_batch_size_if_needed,
               )
                loader = get_microbatch_dataloader(len(packing_context.packed_trajs), args.micro_batch_size)
        else:
            with nvtx_range("align_inference_logprobs", time=True):
                if inference_logprobs is not None:
                    inference_logprobs = align_unpacked_inference_logprobs(
                        inference_logprobs=inference_logprobs,
                        old_logprobs_for_data=old_logprobs,
                        generation_masks=generation_masks,
                        group_stats=group_stats,
                    )
                    # We run the above to fill in the inference/train side mismatch stats.
                    # We do the above for logging purposes.
                    # Nullify logprobs if not used in IS correction,
                    if not args.rl_inference_logprobs_is_correction:
                        inference_logprobs = None
            with nvtx_range("create_dataloader"):
                # Because of multiturn, our batch sizes for non-sequence packed trajectories are not fixed anymore.
                # As in sequence packing above, we need to reconfigure it too.
                runtime_state.packing_context = None

                reconfigure_num_microbatches_calculator(
                    rank=torch.distributed.get_rank() if torch.distributed.is_initialized() else 0,
                    global_batch_size=math.ceil(samples_ratio_per_step*total_turns_sampled), 
                    rampup_batch_size=args.rampup_batch_size, 
                    micro_batch_size=args.micro_batch_size, 
                    decrease_batch_size_if_needed=args.decrease_batch_size_if_needed,
                    data_parallel_size=mpu.get_data_parallel_world_size(),
                )

                dataset_tensors = [
                    compute_trajs,
                    advantages,
                    old_logprobs,
                    original_loss_mask,
                    original_position_ids,
                    ref_logprobs,
                ]
                if is_correction and inference_logprobs is not None:
                    dataset_tensors.append(inference_logprobs)
                else:
                    dataset_tensors.append(torch.zeros_like(old_logprobs))
                data = TensorDataset(*dataset_tensors)
                loader = DataLoader(data, batch_size=args.micro_batch_size)


    return RerunDataIterator(itertools.cycle(loader)), group_stats, example_groups


def get_grpo_data_iterator(
    model: LanguageModule,
    inference_model: LanguageModule | None,
    optimizer: MegatronOptimizer,
    iteration: int,
    ref_state_dict: Dict[str, torch.Tensor],
    grpo_iterations: int,
    grpo_prompts_per_step: int,
    grpo_group_size: int,
    global_batch_size: int,
    sequence_packing: bool,
    is_correction: bool,
    buffered_rollouts: RerunDataIterator | None = None,
) -> RerunDataIterator:
    """
    Get the data iterator for GRPO training.

    Depending on the sampling parameters either performs data collections or returns
    the buffered_rollouts as is.

    Args:
        model: The language model
        optimizer: The Megatron optimizer
        iteration: Current training iteration
        ref_state_dict: Reference model state dict for GRPO
        grpo_iterations: How many steps we reuse the sampled data for.
        grpo_prompts_per_step: How many prompts we sample per data collection.
        grpo_group_size: How many samples we do per prompt.
        global_batch_size: Global batch size.
        sequence_packing: Use sequence packing if True.
        is_correction: Use IS correction if True.
        buffered_rollouts: Previously collected rollouts (if any)

    Returns:
        RerunDataIterator for the current training step
    """
    runtime_state = get_rl_runtime_state()
    tokenizer = get_tokenizer()

    # We collect new rollouts when we've gone over the collected data 'grpo_iterations' times.
    global_batches_per_collection = (grpo_prompts_per_step * grpo_group_size) // global_batch_size
    if (
        buffered_rollouts is None or
        iteration == runtime_state.last_collection_iteration +
        (grpo_iterations * global_batches_per_collection)
    ):

        rollouts = get_environment_rollouts(
            model, inference_model, optimizer, grpo_prompts_per_step, grpo_group_size
        )
        buffered_rollouts, group_stats, example_groups = prepare_data_for_update(
            model=model,
            ref_state_dict=ref_state_dict,
            rollouts=rollouts,
            tokenizer=tokenizer,
            sequence_packing=sequence_packing,
            is_correction=is_correction,
        )
        runtime_state.group_stats = group_stats
        runtime_state.example_groups = example_groups
        runtime_state.reset_iteration_counters(iteration)

    maybe_log_training_metrics(
        group_stats=runtime_state.group_stats,
        current_iteration=iteration,
        tokenizer=tokenizer,
        example_groups=runtime_state.example_groups,
    )

    return buffered_rollouts


def evaluate_and_print_results_rl(
    data_iterator: Iterator[TensorDataset],
    model: list[LanguageModule],
    optimizer: MegatronOptimizer,
    iteration: int,
    write_to_tensorboard: bool = True,
    training_model: Optional[list[LanguageModule]] = None,
):
    """Helper function to evaluate and dump results on screen.

    Args:
        data_iterator: Iterator over batches of evaluation dataset.
        model: Model to evaluate with (may be separate inference model).
        iteration: Current training iteration.
        write_to_tensorboard: Dumpt stuff to tensorboard or not.
        training_model: Training model (if separate from inference model). Used to offload
            grad buffers and restore to train mode. If None, uses model parameter.
    """
    args = get_args()

    # TODO(vitalyk): I do not track eval loss as in training. We probably should.
    # megatron-lm uses forward_step_func to do the above.

    # Use context manager to temporarily disable sequence parallelism for evaluation

    with torch.no_grad():
        with megatron_rl_inference_mode(
            model,
            optimizer,
            args.cuda_graph_impl,
            args.rl_offload_optimizer_during_inference,
            training_model,
        ) as inference_interface:

            loop = get_asyncio_loop()

            rank = torch.distributed.get_rank()
            if rank == 0:
                logger.info("Collecting evaluation results...")
                agent = get_agent(args)
                request = EvaluationRequest(
                    inference_interface=inference_interface,
                    num_prompts=args.rl_prompts_per_eval,
                    validation=True,
                    rank_info=None,
                    generation_args={
                        'temperature': args.rl_default_temperature,
                        'max_tokens': args.seq_length,
                        'top_p': args.rl_default_top_p,
                        'top_k': args.rl_default_top_k,
                    },
                )
                evaluation_responses = loop.run_until_complete(agent.run_evaluation(request))
                if not isinstance(evaluation_responses, list):
                    evaluation_responses = [evaluation_responses]
            else:
                evaluation_responses = None

        dp_eval_results: list[None | list[EvaluationResponse]] = [
            None for _ in range(args.world_size)
        ]
        dist.gather_object(
            evaluation_responses,
            dp_eval_results if dist.get_rank() == (args.world_size - 1) else None,
            dst=args.world_size - 1,
        )

        if dist.get_rank() == args.world_size - 1:
            dp_eval_results = [x for x in dp_eval_results if x is not None]
            # TODO(rkirby): maybe factor this out into a function?
            eval_metrics = defaultdict(list)
            for responses in dp_eval_results:
                for response in responses:
                    if response is None:
                        continue
                    for k, v in response.metrics().items():
                        eval_metrics[f"{response.env_id}_eval_mean_{k}"].extend(v)
                    for result in response.results:
                        if isinstance(result, RewardEvaluationResult):
                            try:
                                lang_rl_log(
                                    f"Evaluation: [{response.env_id}] [{result.reward}] {result.prompt} {result.response}"
                                )
                            except Exception as e:
                                lang_rl_log(f"Error: {e}")
                                lang_rl_log(f"Result: {result}")
            logger.info(
                "Collected metrics:"
                + "".join([f"\n\t{k} count: {len(v)}" for k, v in eval_metrics.items()])
            )
            eval_metrics = {k: np.mean(v) for k, v in eval_metrics.items()}
            if write_to_tensorboard:
                tb_writer = get_tensorboard_writer()
                if tb_writer:
                    for k, v in eval_metrics.items():
                        tb_writer.add_scalar(k, v, iteration)
            wandb_writer = get_wandb_writer()
            if wandb_writer:
                wandb_writer.log(eval_metrics, step=iteration)
            logger.info(
                "Evaluation results:"
                + "".join([f"\n\t{k}: {v:0.4f}" for k, v in eval_metrics.items()])
            )
            if lang_rl_log_dir:
                with open(
                    lang_rl_log_dir
                    + f'/eval_rank{rank}_iteration{args.curr_iteration}_'
                    + f'{Path(args.langrl_env_config).stem}.json',
                    'w',
                ) as f:
                    json.dump([[r.model_dump() for r in group] for group in dp_eval_results], f)


def calculate_grpo_loss(
    current_logprobs: torch.Tensor,
    old_logprobs: torch.Tensor,
    ref_logprobs: torch.Tensor,
    advantages: torch.Tensor,
    clamp_eps_lower: float,
    clamp_eps_upper: float,
    kl_beta: float,
    entropy_weight: float,
    inference_logprobs: torch.Tensor | None = None,
    is_truncation_coef: float | None = None,
    seq_starts: list | None = None,
    seq_lengths: list | None = None,
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
    """Get GRPO loss, the kl term of the loss and the pi/pi_{old} ratios.

    Args:
        current_logprobs: pi logprobs, [batch, seq] for unpacked or [1, bin_size] for packed.
        old_logprobs: pi_{old} logprobs, [batch, seq] for unpacked or [1, bin_size] for packed.
        ref_logprobs: pi_{ref} logprobs, [batch, seq] for unpacked or [1, bin_size] for packed.
        advantages: advantages tensor, [batch,] for unpacked or [num_sequences_in_bin,] for packed.
        clamp_eps_lower: eps to clamp ratios from below.
        clamp_eps_upper: eps to clamp ratios from above, if vanilla GRPO, this should be equal to clamp_eps_lower.
        kl_beta: weight for the KL penalty term measuring the distance between pi and pi_{ref}.
        entropy_weight: weight for the entropy term.
        inference_logprobs: pi_{old} logprobs calculated by the inference engine.
            If not None, importance sampling correction will be applied.
        is_truncation_coef: importance sampling truncation coefficient. Will be applied if it is not None and inference_logprobs are present.
        seq_starts: (optional) For packed sequences: start positions of each sequence in the bin.
        seq_lengths: (optional) For packed sequences: original lengths of each sequence.

    Returns:
        total per-token GRPO loss [batch, seq] or [1, bin_size],
        kl_term of the loss [batch, seq] or [1, bin_size],
        pi/pi_{old} ratios [batch, seq] or [1, bin_size],
        entropy_term of the loss [batch, seq] or [1, bin_size],
        truncated_from_above [batch, seq] or [1, bin_size] (whether we clamped the ratios or not),
        truncated_from_below [batch, seq] or [1, bin_size] (whether we clamped the ratios or not).
    """
    # Ensure shapes match before computation
    if current_logprobs.shape != old_logprobs.shape:
        log_single_rank(
            logger,
            logging.WARNING,
            f"WARNING: Shape mismatch - current_logprobs: {current_logprobs.shape}, old_logprobs: {old_logprobs.shape}",
        )

    ratios = (current_logprobs - old_logprobs).exp()
    clamped_ratios = ratios.clamp(1 - clamp_eps_lower, 1 + clamp_eps_upper)
    truncated_from_above = torch.gt(ratios, 1 + clamp_eps_upper)
    truncated_from_below = torch.lt(ratios, 1 - clamp_eps_lower)

    # Handle advantages based on whether this is packed or unpacked
    if seq_starts is not None and seq_lengths is not None:
        # Packed sequences: map each sequence's advantage to its tokens
        bin_size = current_logprobs.shape[1]
        packed_advantages = torch.zeros(
            (1, bin_size), device=current_logprobs.device, dtype=current_logprobs.dtype
        )

        for seq_idx, (start, seq_len) in enumerate(zip(seq_starts, seq_lengths)):
            # Logprobs are 1 token shorter than sequences
            end = min(start + seq_len - 1, bin_size)
            if end > start:
                packed_advantages[0, start:end] = advantages[seq_idx].item()

        advantages = packed_advantages
    else:
        # Unpacked sequences: broadcast single advantage per sequence
        # Reshape to [batch, 1] to match logprobs shape [batch, seq]
        advantages = advantages.view(-1, 1)

    ref_diff = ref_logprobs - current_logprobs
    kl_term = ref_diff.exp() - ref_diff - 1
    entropy_term = -current_logprobs.exp() * current_logprobs

    is_weights = torch.tensor(1.0, dtype=old_logprobs.dtype).to(old_logprobs.device)
    if inference_logprobs is not None:
        is_weights = (old_logprobs - inference_logprobs).exp()
        if is_truncation_coef is not None:
            is_weights = torch.min(
                is_weights,
                torch.tensor(is_truncation_coef, dtype=old_logprobs.dtype).to(old_logprobs.device),
            )

    loss = (
        -is_weights * torch.min(ratios * advantages, clamped_ratios * advantages)
        + kl_beta * kl_term
        - entropy_weight * entropy_term
    )

    return loss, kl_term, ratios, entropy_term, truncated_from_above, truncated_from_below


@contextmanager
def megatron_rl_inference_mode(
    model: list[LanguageModule],
    optimizer: MegatronOptimizer,
    cuda_graph_impl: str,
    offload_optimizer_during_inference: bool,
    training_model: Optional[list[LanguageModule]] = None,
):
    """Manage the model inference context when collecting rollouts.

    Args:
        model: model to prepare for inference (may be separate inference model).
        optimizer: optimizer used to train the model.
        cuda_graph_impl: which cuda graph implementation to use.
        offload_optimizer_during_inference: move optimizer to cpu during inference or not.
        training_model: training model (if separate from inference model). Used to offload
            grad buffers and restore to train mode. If None, uses model parameter.

    Yields:
        None: this context manager does not return a value.

    """
    args = get_args()
    loop = get_asyncio_loop()
    nvtx_range = get_nvtx_range()

    logger.debug(f"[{dist.get_rank()}] Entering inference mode")

    # Change cudagraph scope for inference (empty list = full-layer capture)
    model[0].config.cuda_graph_scope = []
    model[0].config.cuda_graph_impl = "local"

    # If we get a lower precision wrapper, we go one object deeper.
    lang_module = model[0].module.module if hasattr(model[0].module, "module") else model[0].module

    # Switch MoE layers to full CUDA graph capture for inference
    if args.rl_training_cuda_graphs and args.num_experts is not None:
        transition_moe_cudagraphs(lang_module, 'full')

    lang_module.eval()
    # If this is a separate RL inference model with offloading enabled, ensure weights are on GPU
    # before any CUDA-graph capture/replay or inference. This is a no-op if already on GPU.
    model_core = unwrap_model(model[0])
    with nvtx_range("prefetch-inference-model-weights-to-gpu"):
        _maybe_prefetch_separate_inference_model_weights(model_core, to_cpu=False)

    rotary_module = getattr(lang_module, "rotary_pos_emb", None)
    # Vanilla RotaryEmbedding module has lru_cache decorator which breaks RL training
    # as it tries to reuse frequences tensors cached in inference mode.
    has_lru_cache = rotary_module is not None and hasattr(rotary_module.forward, "cache_parameters")
    if has_lru_cache:
        rotary_module.forward.cache_clear()

    with torch.no_grad():

        if offload_optimizer_during_inference:
            with nvtx_range("offload-optimizer-state-and-grad-buffers-before-inference"):
                if not args.rl_training_cuda_graphs:
                    # Offload grad buffers from the training model (if separate inference model is used)
                    # or from the inference model (if they're the same model)
                    model_for_grad_offload = training_model if training_model is not None else model
                    model_for_grad_offload[0].offload_grad_buffers()
                else:
                    logger.warning(
                        "Gradient buffers will not be offloaded when training cudagraphs are used!"
                    )
                optimizer.offload_to_cpu()

        if cuda_graph_impl != "none" and not args.rl_training_cuda_graphs:
            toggle_cuda_graphs(lang_module, cuda_graph_impl)

        inference_interface = get_inference_interface(args, loop, model)
        inference_interface.set_generation_epoch(get_args().curr_iteration)
        loop.run_until_complete(inference_interface.resume())

        logger.debug(f"[{dist.get_rank()}] Entered inference mode")
        yield inference_interface

        with nvtx_range("suspend-engine"):
            loop.run_until_complete(inference_interface.suspend())

        if cuda_graph_impl != "none" and not args.rl_training_cuda_graphs:
            toggle_cuda_graphs(lang_module, 'none')

        # Reset drop_and_pad leaked from inference decode
        set_decode_expert_padding(unwrap_model(model[0]), set_to=False)

        # Restore partial capture cudagraph scope for training if this is MoE
        if args.num_experts is not None:
            model[0].config.cuda_graph_scope = [
                CudaGraphScope.mamba,
                CudaGraphScope.attn,
                CudaGraphScope.moe_router,
                CudaGraphScope.moe_preprocess,
            ]

        # Switch MoE layers to partial CUDA graph capture for training
        if args.rl_training_cuda_graphs and args.num_experts is not None:
            transition_moe_cudagraphs(lang_module, 'partial')

        # If this is a separate RL inference model, prefetch weights back to CPU so they
        # don't consume GPU memory during training.
        with nvtx_range("prefetch-inference-model-weights-to-cpu"):
            _maybe_prefetch_separate_inference_model_weights(model_core, to_cpu=True)

        if offload_optimizer_during_inference:
            with nvtx_range("onload-optimizer-state-and-grad-buffers-after-inference"):
                # Restore grad buffers to the training model (if separate inference model is used)
                # or to the inference model (if they're the same model)
                model_for_grad_offload = training_model if training_model is not None else model
                model_for_grad_offload[0].restore_grad_buffers()
                optimizer.restore_from_cpu()

        # Set training model back to train mode (not inference model if they're separate)
        training_lang_module = unwrap_model(training_model[0]) if training_model is not None else lang_module
        training_lang_module.train()

        if has_lru_cache:
            rotary_module.forward.cache_clear()

        logger.debug(f"[{dist.get_rank()}] Exiting inference mode")


def rl_inference_interface_shutdown():
    global _INFERENCE_INTERFACE
    global _ROLLOUT_GENERATOR

    if _ROLLOUT_GENERATOR is not None:
        loop = get_asyncio_loop()
        loop.run_until_complete(_ROLLOUT_GENERATOR.aclose())
        _ROLLOUT_GENERATOR = None

    if _INFERENCE_INTERFACE is not None:
        loop = get_asyncio_loop()
        loop.run_until_complete(_INFERENCE_INTERFACE.kill())
        _INFERENCE_INTERFACE = None
    else:
        logger.warning("No inference interface to shutdown. This should not happen.")

    # TODO(rkirby): This is a hack to hard exit. There is a bug that is preventing us from using sys.exit(0).
    # It seem the Flask server has non-daemon threads that are preventing the program from exiting.
    # We need to find a way to gracefully complete all in progress requests and shutdown the Flask server.
    import os
    os._exit(0)


def get_iteration_sequence_count(args):
    """Get the total number of sequences processed in this iteration across all ranks."""
    runtime_state = get_rl_runtime_state()
    sequences_tensor = torch.tensor(
        runtime_state.sequences_this_iteration_on_rank, device='cuda', dtype=torch.long
    )
    if torch.distributed.is_initialized():
        torch.distributed.all_reduce(sequences_tensor, group=mpu.get_data_parallel_group())
    return int(sequences_tensor.item())
    
def _pad_nonnull_with_zeros(data: list[Optional[torch.Tensor]], max_len: int) -> torch.Tensor:
    """Pad each element of a list of tensors to the length required.
    Args:
        data: List of tensors to pad.
        max_len: Maximum length to pad to. Must be higher or equal than the max len of the data tensors.
    Returns:
        A padded tensor which is a stacked list of padded input tensors.

    """
    if all([el is None for el in data]):
        raise ValueError("At least one element of the data list should be not None.")
    padded_data = []
    for chunk in data:
        if chunk is not None:
            padding_size = max_len - len(chunk)
            if padding_size > 0:
                # Pad with zeros (these positions will be masked anyway)
                padded = torch.nn.functional.pad(chunk, (0, padding_size), value=0.0)
                padded_data.append(padded)
            elif padding_size == 0:
                padded_data.append(chunk)
            else:
                raise ValueError("One of the input tensors has larger length than padding max len.")
        else:
            # Create zero tensor for None logprobs
            padded_data.append(torch.zeros(max_len))
    return torch.stack(padded_data)


================================================
FILE: megatron/rl/sequence_packing_utils.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import torch
import math
import numpy as np
from typing import List, Dict, Any, Tuple, Optional
from torch.utils.data import DataLoader, TensorDataset
from dataclasses import dataclass, field
from megatron.core.utils import log_single_rank
from megatron.training.global_vars import get_args, get_tokenizer
from megatron.training.utils import get_nvtx_range
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core import mpu
import logging
import typing
from megatron.core.num_microbatches_calculator import (
        get_num_microbatches,
        reconfigure_num_microbatches_calculator,
    )

logger = logging.getLogger(__name__)


@dataclass
class PackingInfo:
    """Information about how sequences are packed into bins.
    
    Attributes:
        bin_seq_indices: List where each element contains the global sequence indices in that bin
        seq_starts: Dict mapping bin index to list of start positions for each sequence in that bin
        seq_lengths: List of all original sequence lengths (indexed by global sequence index)
        seq_to_bin_idx: List mapping each global sequence index to its bin index
        packing_algo: Algorithm used for distributing bins ('fifo' or 'round-robin')
    """
    bin_seq_indices: List[List[int]]
    seq_starts: Dict[int, List[int]]
    seq_lengths: List[int]
    seq_to_bin_idx: List[Optional[int]]
    packing_algo: typing.Literal['fifo', 'round-robin']


@dataclass
class PackingContext:
    """Context containing all information needed for sequence packing during training.
    
    Attributes:
        bin_size: Maximum size of each bin (in tokens)
        packer: 'SequencePacker' instance used for packing
        packing_info: PackingInfo object with bin assignments and metadata
        original_generation_masks: Generation masks for all sequences before packing
        original_trajs: All trajectories before packing
        packed_trajs: Packed trajectories tensor [num_bins, bin_size]
        packed_position_ids: Position IDs for packed sequences [num_bins, bin_size]
        packed_attention_mask: Attention mask for packed sequences [num_bins, 1, bin_size, bin_size]
        packed_loss_mask: Loss mask for packed sequences [num_bins, bin_size]
        original_inference_logprobs: Inference logprobs for all sequences before packing (optional)
        bin_advantages: List of advantage tensors for each bin
        cached_packed_seq_params: Pre-computed PackedSeqParams for each bin
    """
    bin_size: int
    packer: 'SequencePacker'
    packing_info: PackingInfo
    original_generation_masks: torch.Tensor
    original_trajs: torch.Tensor
    packed_trajs: torch.Tensor
    packed_position_ids: torch.Tensor
    packed_attention_mask: torch.Tensor
    packed_loss_mask: torch.Tensor
    original_inference_logprobs: Optional[torch.Tensor] = None
    bin_advantages: List[torch.Tensor] = field(default_factory=list)
    cached_packed_seq_params: List[Optional[PackedSeqParams]] = field(default_factory=list)


def load_packed_data_by_index(bin_idx: int, packing_context: PackingContext, logprobs_is_correction: bool):
    """Load packed data by index.

    Args:
        bin_idx: Index of the bin to load.
    """
    # Get packing context (should always be available in packed mode)
    idx = slice(bin_idx, bin_idx + 1)

    # Get cached PackedSeqParams for proper attention masking in Transformer Engine
    # These were pre-computed in prepare_data_for_update to avoid repeated tensor allocations
    packed_seq_params = packing_context.cached_packed_seq_params[bin_idx]

    # Extract packed data for this bin (already on GPU)
    tokens = packing_context.packed_trajs[idx]
    position_ids = packing_context.packed_position_ids[idx]

    # Check if we have old_logprobs and ref_logprobs as attributes
    # These are set after logprobs computation, so they may not exist during initial forward pass
    old_logprobs = getattr(packing_context, 'old_logprobs', None)
    if old_logprobs is not None:
        old_logprobs = old_logprobs[idx]
    
    ref_logprobs = getattr(packing_context, 'ref_logprobs', None)
    if ref_logprobs is not None:
        ref_logprobs = ref_logprobs[idx]
        
    # Slice from position 1 because logprobs predict the next token, so they are
    # shifted by 1 relative to the input tokens (logprobs has shape [batch, seq_len-1])
    loss_mask = packing_context.packed_loss_mask[idx, 1:]

    # Get sequence-level data for this bin
    packing_info = packing_context.packing_info
    seq_starts = packing_info.seq_starts[bin_idx]
    seq_indices = packing_info.bin_seq_indices[bin_idx]

    # Handle empty bins (used for padding to ensure all ranks have same iterations)
    if not seq_indices:
        seq_lengths = []
        advantages = torch.tensor([], device='cuda')
    else:
        seq_lengths = [packing_info.seq_lengths[idx] for idx in seq_indices]
        advantages = packing_context.bin_advantages[bin_idx]

    # Extract packed inference_logprobs if available
    packed_inference_logprobs = getattr(packing_context, 'packed_inference_logprobs', None)
    if packed_inference_logprobs is not None and logprobs_is_correction:
        inference_logprobs = packed_inference_logprobs[idx]
    else:
        inference_logprobs = None

    return (
        tokens,
        advantages,
        old_logprobs,
        loss_mask,
        position_ids,
        ref_logprobs,
        inference_logprobs,
        seq_starts,
        seq_lengths,
        seq_indices,
        packed_seq_params,
    )


def log_packing_efficiency(packing_context: PackingContext):
    # Log packing efficiency (for this rank's bins)
    packing_info = packing_context.packing_info
    packed_trajs = packing_context.packed_trajs
    my_bin_seq_indices = packing_info.bin_seq_indices
    num_bins = len(packing_info.bin_seq_indices)
    total_tokens = sum(packing_info.seq_lengths)  # All sequences
    my_sequences = sum(len(indices) for indices in my_bin_seq_indices)
    my_tokens = sum(
        packing_info.seq_lengths[idx]
        for indices in my_bin_seq_indices
        for idx in indices
    )
    total_capacity = packed_trajs.shape[0] * packed_trajs.shape[1]
    packing_efficiency = my_tokens / total_capacity if total_capacity > 0 else 0
    avg_seq_length = total_tokens / len(packing_info.seq_lengths)
    rank = mpu.get_data_parallel_rank()

    log_single_rank(logger, logging.INFO, "[Sequence Packing] Statistics:")
    log_single_rank(
        logger,
        logging.INFO,
        f"[Sequence Packing]  - Total sequences: {len(packing_info.seq_lengths)}",
    )
    log_single_rank(
        logger, logging.INFO, f"[Sequence Packing]  - Total bins: {num_bins}"
    )
    log_single_rank(
        logger,
        logging.INFO,
        f"[Sequence Packing]  - Bin size: {packed_trajs.shape[1]} tokens",
    )
    log_single_rank(
        logger,
        logging.INFO,
        f"[Sequence Packing]  - Average sequence length: {avg_seq_length:.1f} tokens",
    )
    log_single_rank(
        logger,
        logging.INFO,
        f"[Sequence Packing]  - This rank: {my_sequences} sequences in {packed_trajs.shape[0]} bins",
    )
    log_single_rank(
        logger,
        logging.INFO,
        f"[Sequence Packing]  - Packing efficiency: {packing_efficiency:.1%} ({my_tokens:,} / {total_capacity:,} tokens)",
    )

    # Add detailed per-rank sequence distribution analysis
    if torch.distributed.is_initialized():
        # Gather sequence counts from all ranks
        seq_counts_per_bin = [len(indices) for indices in my_bin_seq_indices]
        non_empty_bins = [c for c in seq_counts_per_bin if c > 0]

        # Create tensor with rank statistics
        rank_stats = torch.tensor(
            [
                float(rank),
                float(len(my_bin_seq_indices)),  # total bins
                float(len(non_empty_bins)),  # non-empty bins
                float(my_sequences),  # total sequences
                (
                    float(min(non_empty_bins)) if non_empty_bins else 0.0
                ),  # min sequences per bin
                (
                    float(max(non_empty_bins)) if non_empty_bins else 0.0
                ),  # max sequences per bin
                (
                    float(my_sequences / len(non_empty_bins)) if non_empty_bins else 0.0
                ),  # avg sequences per non-empty bin
            ],
            device='cuda',
        )

        # Gather from all ranks
        world_size = mpu.get_data_parallel_world_size()
        all_rank_stats = [torch.zeros_like(rank_stats) for _ in range(world_size)]
        torch.distributed.all_gather(
            all_rank_stats, rank_stats, group=mpu.get_data_parallel_group()
        )

        # Print detailed statistics for each rank
        if rank == 0:
            log_single_rank(
                logger,
                logging.INFO,
                f"[Sequence Packing] Per-rank distribution ({packing_info.packing_algo} algorithm):",
            )
            log_single_rank(
                logger,
                logging.INFO,
                "[Sequence Packing]  Rank | Total Bins | Non-empty | Sequences | Min/Bin | Max/Bin | Avg/Bin",
            )
            log_single_rank(
                logger,
                logging.INFO,
                "[Sequence Packing]  -----|------------|-----------|-----------|---------|---------|--------",
            )
            for stats in all_rank_stats:
                r = int(stats[0].item())
                total_bins = int(stats[1].item())
                non_empty = int(stats[2].item())
                sequences = int(stats[3].item())
                min_seq = int(stats[4].item())
                max_seq = int(stats[5].item())
                avg_seq = stats[6].item()
                log_single_rank(
                    logger,
                    logging.INFO,
                    f"[Sequence Packing]   {r:3d} | {total_bins:10d} | {non_empty:9d} | {sequences:9d} | {min_seq:7d} | {max_seq:7d} | {avg_seq:6.1f}",
                )

            # Also show first few bins for rank 0 as example
            log_single_rank(
                logger,
                logging.INFO,
                f"[Sequence Packing]  Example (Rank 0 first 10 bins): {seq_counts_per_bin[:10]}",
            )

            # Show the improvement from round-robin
            total_seqs_all_ranks = sum(int(stats[3].item()) for stats in all_rank_stats)
            avg_seqs_per_rank = total_seqs_all_ranks / world_size
            max_deviation = max(
                abs(int(stats[3].item()) - avg_seqs_per_rank)
                for stats in all_rank_stats
            )
            log_single_rank(
                logger,
                logging.INFO,
                "[Sequence Packing]  Round-robin distribution quality:",
            )
            log_single_rank(
                logger,
                logging.INFO,
                f"[Sequence Packing]  - Average sequences per rank: {avg_seqs_per_rank:.1f}",
            )
            log_single_rank(
                logger,
                logging.INFO,
                f"[Sequence Packing]  - Max deviation from average: {max_deviation:.0f} sequences ({max_deviation/avg_seqs_per_rank*100:.1f}%)",
            )

def get_actual_sequence_lengths(sequences: torch.Tensor, pad_token: int) -> List[int]:
    """Get actual sequence lengths for pre-padded sequences.

    Args:
        sequences: Tensor of shape [batch_size, seq_len] with pre-padded sequences
        pad_token: The padding token ID

    Returns:
        List of actual sequence lengths (excluding padding)
    """
    if len(sequences.shape) != 2:
        raise ValueError(f"Expected 2D tensor, got shape {sequences.shape}")

    actual_lengths = []

    # Find actual length of each sequence by locating where padding starts
    for seq in sequences:
        # Find the last non-padding token
        non_pad_mask = seq != pad_token
        if non_pad_mask.any():
            # Get the position of the last non-padding token
            actual_length = non_pad_mask.nonzero(as_tuple=True)[0][-1].item() + 1
        else:
            actual_length = 0  # All padding
        actual_lengths.append(actual_length)

    return actual_lengths


def create_empty_bins(
    num_empty_bins : int,
    bin_size : int,
    packed_trajs : torch.Tensor,
    packed_position_ids : torch.Tensor,
    packed_loss_mask : torch.Tensor,
    packed_attention_mask : torch.Tensor,
    tokenizer,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, List[Dict[str, Any]]]:
    """Create empty bins for padding to ensure all ranks have the same number of bins.

    Args:
        num_empty_bins: Number of empty bins to create
        bin_size: Size of each bin
        packed_trajs: Packed trajectories tensor (for dtype/device reference)
        packed_position_ids: Packed position IDs tensor (for dtype/device reference)
        packed_loss_mask: Packed loss mask tensor (for dtype/device reference)
        packed_attention_mask: Packed attention mask tensor (can be None)
        tokenizer: Tokenizer for pad token

    Returns:
        Tuple of (empty_trajs, empty_position_ids, empty_loss_mask, empty_attention_mask, empty_packing_info_entries)
    """
    device = packed_trajs.device

    # Create empty bins with proper shape
    empty_bins = []
    empty_position_ids_list = []
    empty_loss_mask_list = []
    empty_attention_mask_list = []
    empty_packing_info_entries = []

    for i in range(num_empty_bins):
        # Trajectories filled with pad tokens
        empty_bin = torch.full(
            (1, bin_size), tokenizer.pad, dtype=packed_trajs.dtype, device=device
        )
        empty_bins.append(empty_bin)

        # Zero position IDs
        empty_pos_ids = torch.zeros(1, bin_size, dtype=packed_position_ids.dtype, device=device)
        empty_position_ids_list.append(empty_pos_ids)

        # Zero loss mask (so no loss contribution)
        empty_loss = torch.zeros(1, bin_size, dtype=packed_loss_mask.dtype, device=device)
        empty_loss_mask_list.append(empty_loss)

        # Zero attention mask if needed
        if packed_attention_mask is not None:
            # Attention mask is always 4D: [num_bins, 1, bin_size, bin_size]
            empty_attn = torch.zeros(
                1, 1, bin_size, bin_size, dtype=packed_attention_mask.dtype, device=device
            )
            empty_attention_mask_list.append(empty_attn)

        # Empty packing info entries
        empty_packing_info_entries.append(
            {
                'bin_seq_indices': [],  # No sequences in empty bin
                'seq_starts': [],  # No sequence starts
            }
        )

    # Concatenate all empty bins
    if num_empty_bins > 0:
        empty_trajs = torch.cat(empty_bins, dim=0)
        empty_position_ids = torch.cat(empty_position_ids_list, dim=0)
        empty_loss_mask = torch.cat(empty_loss_mask_list, dim=0)
        empty_attention_mask = (
            torch.cat(empty_attention_mask_list, dim=0)
            if packed_attention_mask is not None
            else None
        )
    else:
        empty_trajs = None
        empty_position_ids = None
        empty_loss_mask = None
        empty_attention_mask = None

    return (
        empty_trajs,
        empty_position_ids,
        empty_loss_mask,
        empty_attention_mask,
        empty_packing_info_entries,
    )

def get_default_packed_seq_params(seq_length: int, max_sequences_per_bin: int, device: torch.device) -> PackedSeqParams:
    """Create a default PackedSeqParams that acts as no-op for a single sequence.

    This ensures CUDA graph signature consistency when packed_seq_params
    would otherwise be None. A single sequence spanning the full length
    means no actual packing boundaries

    Args:
        seq_length: The sequence length 
        max_sequences_per_bin: Max sequences to pack in a bin.
        device: Device to create tensors on.

    Returns:
        PackedSeqParams configured as a single unpacked sequence.
    """

    args = get_args()

    # Pad to the maximum number of sequences in the bin for the attention kernel.
    # We add 2 to account for the initial 0 and the final bin_size.
    cu_seqlens = torch.full(
        (max_sequences_per_bin + 2,), seq_length, dtype=torch.int32, device=device,
    )
    cu_seqlens[0] = 0

    return PackedSeqParams(
        qkv_format='thd',
        cu_seqlens_q=cu_seqlens,
        cu_seqlens_kv=cu_seqlens,
        cu_seqlens_q_padded=None,
        cu_seqlens_kv_padded=None,
        max_seqlen_q=seq_length,
        max_seqlen_kv=seq_length,
        total_tokens=seq_length,
    )

def create_packed_seq_params(packing_context: PackingContext):
    cached_packed_seq_params = []
    packing_info = packing_context.packing_info
    bin_size = packing_context.bin_size
    max_sequences_per_bin = packing_context.packer.max_sequences_per_bin
    device = packing_context.packed_trajs.device
    for bin_idx in range(len(packing_context.packed_trajs)):
        params = create_packed_seq_params_for_bin(
            packing_info=packing_info,
            bin_idx=bin_idx,
            bin_size=bin_size,
            max_sequences_per_bin=max_sequences_per_bin,
            device=device,
        )
        cached_packed_seq_params.append(params)
    return cached_packed_seq_params

def create_packed_seq_params_for_bin(
    packing_info: PackingInfo,
    bin_idx: int,
    bin_size: int,
    max_sequences_per_bin: int,
    device: torch.device
) -> Optional[PackedSeqParams]:
    """Create PackedSeqParams for a single bin to enable proper attention masking in TE.

    When using Transformer Engine with sequence packing, we need to provide cu_seqlens
    (cumulative sequence lengths) so that TE knows the boundaries between sequences
    within a packed bin. This prevents attention leakage between unrelated sequences.

    Args:
        packing_info: PackingInfo object containing packing metadata from SequencePacker
        bin_idx: Index of the bin to create params for
        bin_size: Size of the bin (padded sequence length)
        max_sequences_per_bin: Maximum number of sequences per bin
        device: Device to create tensors on

    Returns:
        PackedSeqParams with cu_seqlens set for proper attention masking (or None if empty)
    """
    seq_indices = packing_info.bin_seq_indices[bin_idx]

    # Handle empty bins (padding bins with no sequences)
    if not seq_indices:
        return None

    # Get actual sequence lengths for sequences in this bin
    seq_lengths_in_bin = [packing_info.seq_lengths[idx] for idx in seq_indices]

    # Build cumulative sequence lengths for actual sequences
    # cu_seqlens should be [0, len(seq1), len(seq1)+len(seq2), ..., total_actual_len]
    cu_seqlens_list = np.append(np.cumsum([0] + seq_lengths_in_bin), bin_size)

    cu_seqlens = torch.tensor(cu_seqlens_list, dtype=torch.int32, device=device)

    # Pad cu_seqlens to bin_size by repeating the last value (creates zero-length ghost sequences)
    # This ensures a fixed tensor size for CUDA graph compatibility
    # We add 2 to account for the initial 0 and the final bin_size.
    if len(cu_seqlens) < max_sequences_per_bin + 2:
        out = cu_seqlens.new_full((max_sequences_per_bin + 2,), bin_size)
        out[:len(cu_seqlens)] = cu_seqlens
        cu_seqlens = out

    max_seqlen = bin_size

    return PackedSeqParams(
        qkv_format='thd',
        cu_seqlens_q=cu_seqlens,
        cu_seqlens_kv=cu_seqlens,
        cu_seqlens_q_padded=None,
        cu_seqlens_kv_padded=None,
        max_seqlen_q=max_seqlen,
        max_seqlen_kv=max_seqlen,
        total_tokens=bin_size,
    )


def pack_inference_logprobs(
    inference_logprobs: List[torch.Tensor],
    packing_info: PackingInfo,
    generation_masks: torch.Tensor,
    bin_size: int,
) -> torch.Tensor:
    """Pack inference logprobs into bins aligned with packed sequences.

    Args:
        inference_logprobs: List of inference logprobs tensors for each sequence
        packing_info: PackingInfo object containing bin assignments and sequence positions
        generation_masks: Tensor indicating which tokens were generated
        bin_size: Size of each bin

    Returns:
        Packed inference logprobs tensor of shape [num_bins, bin_size - 1]
    """
    num_bins = len(packing_info.bin_seq_indices)

    # Create packed inference logprobs tensor (logprobs are 1 token shorter than sequences)
    packed_inference_logprobs = torch.zeros(
        (num_bins, bin_size - 1), dtype=torch.float32, device='cpu'
    )

    # Create mapping from global sequence index to local bin index
    # This is needed because seq_to_bin_idx uses global bin indices,
    # but after distribution each rank only has a subset of bins
    seq_to_local_bin = {}
    for local_bin_idx, seq_indices in enumerate(packing_info.bin_seq_indices):
        for seq_idx in seq_indices:
            seq_to_local_bin[seq_idx] = local_bin_idx

    # Align and pack inference logprobs based on generation masks
    for seq_idx in range(len(inference_logprobs)):
        if seq_idx not in seq_to_local_bin:
            continue  # Skip sequences not on this rank

        local_bin_idx = seq_to_local_bin[seq_idx]

        # Get the position of this sequence within the bin
        seq_positions = packing_info.bin_seq_indices[local_bin_idx]
        seq_pos_in_bin = seq_positions.index(seq_idx)
        seq_start = packing_info.seq_starts[local_bin_idx][seq_pos_in_bin]

        # Get generation mask for this sequence to find where generation starts
        gen_mask = generation_masks[seq_idx]
        # Find first generation token (accounting for the shift in get_logprobs)
        first_gen_idx = gen_mask.int().argmax().item() - 1

        # Get the inference logprobs for this sequence
        if isinstance(inference_logprobs[seq_idx], torch.Tensor):
            seq_inf_logprobs = inference_logprobs[seq_idx]
        else:
            continue  # Skip if no inference logprobs

        # Calculate where to place inference logprobs in the packed tensor
        # The inference logprobs start at the first generated token position
        pack_start = seq_start + first_gen_idx
        pack_end = min(
            pack_start + len(seq_inf_logprobs), seq_start + packing_info.seq_lengths[seq_idx] - 1
        )
        actual_len = pack_end - pack_start

        if actual_len > 0 and pack_end <= bin_size - 1:
            packed_inference_logprobs[local_bin_idx, pack_start:pack_end] = seq_inf_logprobs[
                :actual_len
            ]

    return packed_inference_logprobs


def compute_packed_inference_logprobs_stats(
    old_logprobs: torch.Tensor,
    packed_inference_logprobs: torch.Tensor,
    packed_loss_mask: torch.Tensor,
    group_stats: Any,
) -> None:
    """Compute statistics for packed inference logprobs for logging purposes.

    Compares packed inference logprobs with old logprobs using the packed loss mask
    to identify valid positions. Updates group_stats with computed metrics.

    Args:
        old_logprobs: Old logprobs tensor in packed format [num_bins, seq_len-1]
        packed_inference_logprobs: Packed inference logprobs [num_bins, seq_len-1]
        packed_loss_mask: Loss mask indicating valid positions [num_bins, seq_len]
        group_stats: Statistics object to update with computed metrics
    """
    # Lazy import to avoid circular dependency (rl_utils imports from this module)
    from megatron.rl.rl_utils import update_inference_logprobs_group_stats

    # Ensure all tensors are on the same device (CPU for stats computation)
    old_logprobs = old_logprobs.cpu()
    packed_inference_logprobs = packed_inference_logprobs.cpu()
    packed_loss_mask = packed_loss_mask.cpu()

    # Use packed_loss_mask to identify valid positions for stats (shift by 1 for logprobs)
    mask = packed_loss_mask[:, 1:].bool()

    # Ensure shapes match
    if mask.shape != old_logprobs.shape:
        return

    # Update group statistics using common helper
    update_inference_logprobs_group_stats(
        old_logprobs=old_logprobs,
        inference_logprobs=packed_inference_logprobs,
        mask=mask,
        group_stats=group_stats,
    )


class SequencePacker:
    """Packs multiple sequences into bins to minimize padding and improve GPU utilization."""

    def __init__(self, bin_size: int, pad_token: int, max_sequences_per_bin: int = 16):
        self.bin_size = bin_size
        self.pad_token = pad_token
        self.max_sequences_per_bin = max_sequences_per_bin

    def pack_sequences(
        self, trajs: torch.Tensor, generation_masks: Optional[torch.Tensor] = None
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, PackingInfo]:
        """Pack sequences into bins using a greedy first-fit algorithm."""
        # Convert trajectories to list for packing
        sequences = [trajs[i] for i in range(trajs.shape[0])]

        sequences_tensor = torch.stack(sequences)

        seq_lengths = get_actual_sequence_lengths(sequences_tensor, self.pad_token)

        # Trim sequences to actual lengths
        sequences = [sequences_tensor[i, :length] for i, length in enumerate(seq_lengths)]

        sorted_indices = sorted(range(len(sequences)), key=lambda i: seq_lengths[i], reverse=True)

        bins = []
        bin_seq_indices = []  # Track which sequences are in each bin
        current_bin = []
        current_bin_indices = []
        current_bin_length = 0

        # Pack sequences into bins
        sequences_per_bin = []
        for idx in sorted_indices:
            seq = sequences[idx]
            seq_len = len(seq)

            if (
                current_bin_length + seq_len <= self.bin_size
                and len(current_bin) < self.max_sequences_per_bin
            ):
                current_bin.append(seq)
                current_bin_indices.append(idx)
                current_bin_length += seq_len
            else:
                # Start a new bin
                if current_bin:
                    bins.append(current_bin)
                    bin_seq_indices.append(current_bin_indices)
                    sequences_per_bin.append(len(current_bin))
                current_bin = [seq]
                current_bin_indices = [idx]
                current_bin_length = seq_len

        # Don't forget the last bin
        if current_bin:
            bins.append(current_bin)
            bin_seq_indices.append(current_bin_indices)
            sequences_per_bin.append(len(current_bin))

        # Create packed tensors
        num_bins = len(bins)
        device = sequences[0].device
        dtype = sequences[0].dtype

        # Log packing distribution
        if sequences_per_bin:
            avg_seqs_per_bin = sum(sequences_per_bin) / len(sequences_per_bin)
            min_seqs = min(sequences_per_bin)
            max_seqs = max(sequences_per_bin)
            log_single_rank(
                logger,
                logging.INFO,
                (
                    f"[SequencePacker] Packing distribution: {num_bins} bins, "
                    f"avg {avg_seqs_per_bin:.1f} seqs/bin, "
                    f"min {min_seqs}, max {max_seqs} seqs/bin "
                    f"(limit: {self.max_sequences_per_bin})"
                ),
            )
            # Store for later use
            self.last_avg_seqs_per_bin = avg_seqs_per_bin

        packed_sequences = torch.full(
            (num_bins, self.bin_size), self.pad_token, dtype=dtype, device=device
        )
        position_ids = torch.zeros(
            (num_bins, self.bin_size), dtype=torch.long, device=device, requires_grad=False
        )
        attention_mask = torch.zeros(
            (num_bins, 1, self.bin_size, self.bin_size), dtype=torch.bool, device=device
        )
        loss_mask = torch.zeros((num_bins, self.bin_size), dtype=torch.float, device=device)

        # Track packing information for unpacking later
        seq_starts_dict: Dict[int, List[int]] = {}
        seq_to_bin_idx: List[Optional[int]] = [None] * len(sequences)

        # Build seq_to_bin_idx mapping
        for bin_idx, seq_indices in enumerate(bin_seq_indices):
            for seq_idx in seq_indices:
                seq_to_bin_idx[seq_idx] = bin_idx

        # Fill bins
        for bin_idx, (bin_seqs, seq_indices) in enumerate(zip(bins, bin_seq_indices)):
            seq_starts = []
            current_pos = 0

            for seq_idx, seq in enumerate(bin_seqs):
                start = current_pos
                end = start + len(seq)
                seq_starts.append(start)
                current_pos = end

                # Pack sequence
                packed_sequences[bin_idx, start:end] = seq

                # Position IDs reset for each sequence
                position_ids[bin_idx, start:end] = torch.arange(
                    len(seq), device=device, requires_grad=False
                )

                # Causal attention mask within each sequence
                seq_len = end - start
                attention_mask[bin_idx, 0, start:end, start:end] = torch.tril(
                    torch.ones(seq_len, seq_len, dtype=torch.bool, device=device)
                )

                # Loss mask (excluding padding)
                loss_mask[bin_idx, start:end] = 1.0

                # Apply generation mask if provided
                if generation_masks is not None:
                    orig_idx = seq_indices[seq_idx]
                    gen_mask = generation_masks[orig_idx][
                        : len(seq)
                    ]  # Truncate to actual seq length
                    loss_mask[bin_idx, start:end] *= gen_mask.float()

            seq_starts.append(current_pos)
            seq_starts_dict[bin_idx] = seq_starts

        # Note: We'll store the actual padded length later when we know it
        # (it depends on the original trajectories passed to pack_sequences)

        # Invert attention mask, before inversion: (True = attend, False = mask)
        attention_mask.bitwise_not_()

        # Create the PackingInfo dataclass
        packing_info = PackingInfo(
            bin_seq_indices=bin_seq_indices,
            seq_starts=seq_starts_dict,
            seq_lengths=seq_lengths,
            seq_to_bin_idx=seq_to_bin_idx,
            packing_algo='fifo'
        )

        seq_per_bin = [len(indices) for indices in packing_info.bin_seq_indices]
        log_single_rank(
            logger, logging.DEBUG, ("Initial packing output (before distribution):")
        )
        log_single_rank(
            logger,
            logging.DEBUG,
            f"  - Total bins created: {len(packing_info.bin_seq_indices)}",
        )
        log_single_rank(
            logger, logging.DEBUG, f"  - Total sequences packed: {sum(seq_per_bin)}"
        )
        log_single_rank(
            logger,
            logging.DEBUG,
            f"  - Sequences per bin: min={min(seq_per_bin)}, max={max(seq_per_bin)}, avg={sum(seq_per_bin)/len(seq_per_bin):.1f}",
        )
        log_single_rank(logger, logging.DEBUG, f"  - First 20 bins: {seq_per_bin[:20]}")

        return packed_sequences, position_ids, attention_mask, loss_mask, packing_info

def distribute_packed_bins(
    packed_trajs: torch.Tensor,
    packed_position_ids: torch.Tensor,
    packed_attention_mask: torch.Tensor,
    packed_loss_mask: torch.Tensor,
    packing_info: PackingInfo,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, PackingInfo]:
    """Distribute packed bins across the data parallel ranks."""
    rank = mpu.get_data_parallel_rank()
    world_size = mpu.get_data_parallel_world_size()
    tokenizer = get_tokenizer()

    # Distribute packed bins across data parallel ranks
    num_bins, bin_size = packed_trajs.shape
    packing_algo = packing_info.packing_algo

    if packing_algo == 'round-robin':
        # Round-robin assignment: rank i gets bins [i, i+world_size, i+2*world_size, ...]
        my_bin_indices = list(range(rank, num_bins, world_size))
    else:  # fifo (default)
        world_size = world_size if world_size > 0 else 1
        # FIFO assignment: divide bins sequentially across ranks
        bins_per_rank = num_bins // world_size
        extra_bins = num_bins % world_size

        # Calculate start and end indices for this rank
        if rank < extra_bins:
            # Ranks with extra bins
            start_idx = rank * (bins_per_rank + 1)
            end_idx = start_idx + bins_per_rank + 1
        else:
            # Ranks without extra bins
            start_idx = rank * bins_per_rank + extra_bins
            end_idx = start_idx + bins_per_rank

        my_bin_indices = list(range(start_idx, end_idx))

    # Calculate the maximum bins any rank has (for synchronization)
    max_bins_per_rank = (num_bins + world_size - 1) // world_size

    # Extract this rank's bins
    my_packed_trajs = []
    my_packed_position_ids = []
    my_packed_attention_mask = []
    my_packed_loss_mask = []
    my_bin_seq_indices = []
    my_seq_starts = {}


    # Build the local data from the global indices
    for new_idx, old_idx in enumerate(my_bin_indices):
        my_packed_trajs.append(packed_trajs[old_idx])
        my_packed_position_ids.append(packed_position_ids[old_idx])
        if packed_attention_mask is not None:
            my_packed_attention_mask.append(packed_attention_mask[old_idx])
        my_packed_loss_mask.append(packed_loss_mask[old_idx])
        my_bin_seq_indices.append(packing_info.bin_seq_indices[old_idx])
        my_seq_starts[new_idx] = packing_info.seq_starts[old_idx]

    # Stack the selected bins
    packed_trajs = (
        torch.stack(my_packed_trajs)
        if my_packed_trajs
        else torch.empty(
            0,
            packed_trajs.shape[1],
            dtype=packed_trajs.dtype,
            device=packed_trajs.device,
        )
    )
    packed_position_ids = (
        torch.stack(my_packed_position_ids)
        if my_packed_position_ids
        else torch.empty(
            0,
            packed_position_ids.shape[1],
            dtype=packed_position_ids.dtype,
            device=packed_position_ids.device,
        )
    )
    packed_attention_mask = (
        torch.stack(my_packed_attention_mask) if my_packed_attention_mask else None
    )
    packed_loss_mask = (
        torch.stack(my_packed_loss_mask)
        if my_packed_loss_mask
        else torch.empty(
            0,
            packed_loss_mask.shape[1],
            dtype=packed_loss_mask.dtype,
            device=packed_loss_mask.device,
        )
    )

    # Debug: Check what we're extracting
    log_single_rank(logger, logging.DEBUG, (f"Rank 0 {packing_algo} bin assignment:"))
    log_single_rank(
        logger, logging.DEBUG, f"  - Total bins before distribution: {num_bins}"
    )
    log_single_rank(
        logger,
        logging.DEBUG,
        f"  - Bins assigned to rank 0: {my_bin_indices[:10]}... (showing first 10)",
    )
    log_single_rank(
        logger,
        logging.DEBUG,
        f"  - Number of bins for this rank: {len(my_bin_indices)}",
    )
    log_single_rank(
        logger,
        logging.DEBUG,
        f"  - Length of my_bin_seq_indices: {len(my_bin_seq_indices)}",
    )
    if len(my_bin_seq_indices) > 0:
        log_single_rank(
            logger,
            logging.DEBUG,
            f"  - Sequences in first 5 bins: {[len(indices) for indices in my_bin_seq_indices[:5]]}",
        )

    # Create updated packing info for this rank
    new_packing_info = PackingInfo(
        bin_seq_indices=my_bin_seq_indices,
        seq_starts=my_seq_starts,
        seq_lengths=packing_info.seq_lengths,  # Keep all sequence lengths
        seq_to_bin_idx=packing_info.seq_to_bin_idx,  # Keep mapping
        packing_algo=packing_algo,
    )

    # Add empty bins if this rank has fewer than max_bins_per_rank
    current_bins = len(my_bin_indices)
    if current_bins < max_bins_per_rank:
        num_empty_bins = max_bins_per_rank - current_bins

        # Create empty bins using the helper function
        (
            empty_trajs,
            empty_position_ids,
            empty_loss_mask,
            empty_attention_mask,
            empty_packing_entries,
        ) = create_empty_bins(
            num_empty_bins,
            bin_size,
            packed_trajs,
            packed_position_ids,
            packed_loss_mask,
            packed_attention_mask,
            tokenizer,
        )

        # Append empty bins to packed tensors
        packed_trajs = torch.cat([packed_trajs, empty_trajs], dim=0)
        packed_position_ids = torch.cat(
            [packed_position_ids, empty_position_ids], dim=0
        )
        packed_loss_mask = torch.cat([packed_loss_mask, empty_loss_mask], dim=0)

        if packed_attention_mask is not None and empty_attention_mask is not None:
            packed_attention_mask = torch.cat(
                [packed_attention_mask, empty_attention_mask], dim=0
            )

        # Add empty entries to packing_info
        for i, entry in enumerate(empty_packing_entries):
            bin_idx = current_bins + i
            new_packing_info.bin_seq_indices.append(entry['bin_seq_indices'])
            new_packing_info.seq_starts[bin_idx] = entry['seq_starts']

    return packed_trajs, packed_position_ids, packed_attention_mask, packed_loss_mask, new_packing_info


def pack_all_trajectories(trajs, generation_masks, inference_logprobs, global_advantages, bin_size, max_sequences_per_bin, packing_algo):
    tokenizer = get_tokenizer()
    data_parallel_world_size = mpu.get_data_parallel_world_size()
    data_parallel_group = mpu.get_data_parallel_group()
    nvtx_range = get_nvtx_range()

    with nvtx_range("regather_trajectories", time=True):
        def _gather(data):
            data = data.cuda()
            data_list = [torch.empty_like(data) for _ in range(data_parallel_world_size)]
            torch.distributed.all_gather(data_list, data, group=data_parallel_group)
            return torch.cat(data_list, dim=0)

        trajs = _gather(trajs)    
        generation_masks = _gather(generation_masks) 
        if inference_logprobs is not None:
            inference_logprobs = _gather(inference_logprobs)

    with nvtx_range("pack_sequences", time=True):
        # Create packer with max sequences per bin limit to prevent extreme imbalance
        packer = SequencePacker(
            bin_size=bin_size,
            pad_token=tokenizer.pad,
            max_sequences_per_bin=max_sequences_per_bin,
        )

        # Pack sequences with generation masks
        (
            packed_trajs,
            packed_position_ids,
            packed_attention_mask,
            packed_loss_mask,
            packing_info,
        ) = packer.pack_sequences(trajs, generation_masks)
        packing_info.packing_algo = packing_algo

        # Distribute packed bins across the data parallel ranks
        (
            packed_trajs,
            packed_position_ids,
            packed_attention_mask,
            packed_loss_mask,
            packing_info,
        ) = distribute_packed_bins(
            packed_trajs,
            packed_position_ids,
            packed_attention_mask,
            packed_loss_mask,
            packing_info,
        )

    # Create bin_advantages list
    bin_advantages = []
    for seq_indices in packing_info.bin_seq_indices:
        if seq_indices:
            bin_advantages.append(global_advantages[seq_indices])
        else:
            bin_advantages.append(
                torch.tensor([], dtype=global_advantages.dtype, device=global_advantages.device)
            )

    # Pre-compute all PackedSeqParams for all bins ONCE to avoid repeated
    # tensor allocations that cause CUDA memory fragmentation and periodic spikes
    # Create a temporary packing context to pass to create_packed_seq_params
    cached_packed_seq_params = [
        create_packed_seq_params_for_bin(
                packing_info=packing_info,
                bin_idx=bin_idx,
                bin_size=bin_size,
                max_sequences_per_bin=max_sequences_per_bin,
                device=packed_trajs.device,
            ) for bin_idx in range(len(packed_trajs))
    ]

    # Create the final PackingContext
    packing_context = PackingContext(
        bin_size=bin_size,
        packer=packer,
        packing_info=packing_info,
        original_generation_masks=generation_masks,
        original_trajs=trajs,
        packed_trajs=packed_trajs,
        packed_position_ids=packed_position_ids,
        packed_attention_mask=packed_attention_mask,
        packed_loss_mask=packed_loss_mask,
        original_inference_logprobs=inference_logprobs,
        bin_advantages=bin_advantages,
        cached_packed_seq_params=cached_packed_seq_params,
    )

    log_packing_efficiency(packing_context)

    return packing_context

def update_microbatch_calculator(
    samples_ratio_per_step: float,
    num_bins_this_rank: int,
    bin_seq_indices: List[List[int]],
    global_batch_size: int, 
    rampup_batch_size: int, 
    micro_batch_size: int, 
    decrease_batch_size_if_needed: bool,
):
    """Return a data loader with seqpacked indices with microbatches in bins frame of reference.
    Args:
        samples_ratio_per_step: Fraction of sampled trajectories to use per iteration.
        num_bins_this_rank: Amount of packing bins that belongs to current rank.
        bin_seq_indices: Global seq indices in the bin, see PackingInfo.
        global_batch_size: Current global batch size.
        rampup_batch_size: Rampup batch size. See num_microbatches_calculator.py for more.
        micro_batch_size: Micro batch size at init.
        decrease_batch_size_if_needed: Scale down batch size. See num_microbatches_calculator.py for more.

    As a side effect, we calculate the global batch size in the bins frame of reference.
    In sequence packing, our batch dimension shrinks as we move some trajs onto free
    space in sequence dimension. The resulting batch size is what we return here.
    """

    dp_world_size = mpu.get_data_parallel_world_size()

    # Ceiling division means we will reuse some bins
    # If we did floor we would leave some behind
    local_bins_per_step = math.ceil(samples_ratio_per_step * num_bins_this_rank)

    bins_bs = local_bins_per_step * dp_world_size

    old_num_microbatches = get_num_microbatches()
    reconfigure_num_microbatches_calculator(
        rank=torch.distributed.get_rank() if torch.distributed.is_initialized() else 0,
        rampup_batch_size=rampup_batch_size,
        global_batch_size=bins_bs,
        micro_batch_size=micro_batch_size,
        data_parallel_size=dp_world_size,
        decrease_batch_size_if_needed=decrease_batch_size_if_needed,
    )
    new_num_microbatches = get_num_microbatches()

    log_single_rank(
        logger, logging.INFO, "[Sequence Packing] Multi-step training plan:"
    )

    log_single_rank(
        logger,
        logging.INFO,
        f"[Sequence Packing]  - Bins per rank per step: {samples_ratio_per_step}*{num_bins_this_rank}={local_bins_per_step}",
    )

    log_single_rank(
        logger,
        logging.INFO,
        f"[Sequence Packing]  - Target sequences per step: {global_batch_size}",
    )
    log_single_rank(
        logger,
        logging.INFO,
        f"[Sequence Packing]  - Microbatches per step: {new_num_microbatches} (was {old_num_microbatches})",
    )

    # Opt steps only depends on how much we sample and how much we consume.
    # We make sure this is an integer division, check validate_args in arguments.py for details.
    opt_steps = int(1 / samples_ratio_per_step)
    for step in range(min(3, opt_steps)):
        start_idx = step * local_bins_per_step
        end_idx = min(start_idx + local_bins_per_step, num_bins_this_rank)
        step_bins = end_idx - start_idx

        actual_seqs = sum(
            len(bin_seq_indices[bin_idx])
            for bin_idx in range(start_idx, end_idx)
            if bin_idx < len(bin_seq_indices)
        )
        est_global_seqs = actual_seqs * dp_world_size
        log_single_rank(
            logger,
            logging.INFO,
            f"[Sequence Packing]  - Step {step + 1}: {step_bins} bins, ~{est_global_seqs} sequences globally",
        )

    if opt_steps > 3:
        log_single_rank(logger, logging.INFO, f"  - ... ({opt_steps - 3} more steps)")

def get_microbatch_dataloader(num_bins_this_rank, micro_batch_size):
    bin_indices = torch.arange(num_bins_this_rank)
    dataset = TensorDataset(bin_indices)
    return DataLoader(dataset, batch_size=micro_batch_size, shuffle=False, collate_fn=lambda x: x[0])

def get_sequence_packing_log_info(args):
    """Get logging information for sequence packing mode."""
    if args.consumed_train_bins > 0:
        return f' consumed bins: {args.consumed_train_bins:12d} |'
    return ''


def get_sequence_packing_tensorboard_metrics(args):
    """Get tensorboard metrics for sequence packing mode."""
    metrics = {}
    if args.consumed_train_bins > 0:
        bin_batch_size = (
            mpu.get_data_parallel_world_size() * args.micro_batch_size * get_num_microbatches()
        )
        metrics['bin-batch-size'] = bin_batch_size
        metrics['consumed-bins'] = args.consumed_train_bins
    return metrics


================================================
FILE: megatron/rl/server/__init__.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
from .inference.inference_interface_server import *


================================================
FILE: megatron/rl/server/agent/__init__.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.


================================================
FILE: megatron/rl/server/agent/fastapi_env_server.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import asyncio
import socket
from typing import AsyncGenerator

import httpx
import yaml
from fastapi import FastAPI
from pydantic import Field, PrivateAttr
from typing_extensions import Self
from uvicorn import Config, Server
from uvicorn.config import LOGGING_CONFIG

LOGGING_CONFIG['root'] = {"handlers": ["default"], "level": "INFO"}

from ... import import_class, inference
from ...agent.api import (
    Agent,
    ContrastiveRollout,
    ContrastiveRolloutGenerator,
    EvaluationAgent,
    EvaluationRequest,
    EvaluationResponse,
    GroupedRolloutGenerator,
    GroupedRolloutRequest,
    RolloutGenerator,
    RolloutRequest,
    TokenRollout,
)
from ...server.api import (
    EnvironmentServer,
    InferenceServer,
    RemoteEvaluationRequest,
    RemoteGroupedRolloutRequest,
    RemoteRolloutRequest,
)
from .. import agent
from ..api import EnvironmentServer, InferenceServer, RemoteEvaluationRequest, RemoteRolloutRequest


@EnvironmentServer.register_subclass
class FastAPIEnvServer(EnvironmentServer):
    server_type: str = Field('FastAPIEnvServer', frozen=True, Literal=True)
    env_server_host_port: str
    _server_task: asyncio.Task = PrivateAttr(None)

    @classmethod
    async def launch(cls, env_cls: type[Agent], cls_args: dict, port: int, **kwargs) -> Self:

        app = FastAPI()

        if issubclass(env_cls, GroupedRolloutGenerator):

            @app.post("/grouped_rollouts/")
            async def grouped_rollouts(
                request: RemoteGroupedRolloutRequest,
            ) -> list[list[TokenRollout]]:
                env = env_cls(**cls_args)
                request.inference_interface = request.inference_interface.unwrap()
                return await env.get_grouped_rollouts(request)

        if issubclass(env_cls, ContrastiveRolloutGenerator):

            @app.post("/contrastive_rollouts/")
            async def contrastive_rollouts(
                request: RemoteRolloutRequest,
            ) -> list[ContrastiveRollout]:
                env = env_cls(**cls_args)
                request.inference_interface = request.inference_interface.unwrap()
                return await env.get_contrastive_rollouts(request)

        if issubclass(env_cls, RolloutGenerator):

            @app.post("/rollouts/")
            async def rollouts(request: RemoteRolloutRequest) -> list[TokenRollout]:
                env = env_cls(**cls_args)
                request.inference_interface = request.inference_interface.unwrap()
                return await env.get_reward_rollouts(request)

        if issubclass(env_cls, EvaluationAgent):

            @app.post("/evaluation/")
            async def run_evaluation(request: RemoteEvaluationRequest):
                env = env_cls(**cls_args)
                request.inference_interface = request.inference_interface.unwrap()
                return await env.run_evaluation(request)

        loop = asyncio.get_event_loop()
        config = Config(app=app, loop=loop, host='0.0.0.0', port=port)
        server = Server(config)
        server_task = loop.create_task(server.serve())

        ip = socket.gethostbyname(socket.gethostname())

        launched_server = cls(env_server_host_port=f"{ip}:{config.port}", **kwargs)
        launched_server._server_task = server_task

        return launched_server

    def kill(self):
        return self._server_task.cancel()

    async def get_contrastive_rollouts(self, request: RolloutRequest) -> list[ContrastiveRollout]:
        assert isinstance(
            request.inference_interface, InferenceServer
        ), "Rollout requests to remote server must contain an InferenceServer object"
        payload = request.model_dump()
        payload["inference_interface"] = request.inference_interface.model_dump()
        async with httpx.AsyncClient() as client:
            response = await client.post(
                f"http://{self.env_server_host_port}/contrastive_rollouts/",
                json=payload,
                timeout=None,
            )
        rollouts = [ContrastiveRollout.model_validate(r) for r in response.json()]
        return rollouts

    async def group_rollout(self, request: GroupedRolloutRequest):
        assert (
            False
        ), "Calling group_rollout on FastAPIEnvServer is not supported, use get_grouped_rollouts"

    async def get_grouped_rollouts(
        self, request: GroupedRolloutRequest
    ) -> AsyncGenerator[list[TokenRollout], None]:
        assert isinstance(
            request.inference_interface, InferenceServer
        ), "Rollout requests to remote server must contain an InferenceServer object"
        assert not request.streaming, "FastAPIEnvServer does not support group rollout streaming"
        payload = request.model_dump()
        payload["inference_interface"] = request.inference_interface.model_dump()
        async with httpx.AsyncClient() as client:
            response = await client.post(
                f"http://{self.env_server_host_port}/grouped_rollouts/", json=payload, timeout=None
            )
        rollouts = [[TokenRollout.model_validate(r) for r in group] for group in response.json()]
        for rollout in rollouts:
            yield rollout

    async def rollout(self, request: RolloutRequest) -> TokenRollout:
        assert (
            False
        ), "Calling rollout on FastAPIEnvServer is not supported, use get_reward_rollouts"

    async def get_reward_rollouts(self, request: RolloutRequest) -> list[TokenRollout]:
        assert isinstance(
            request.inference_interface, InferenceServer
        ), "Rollout requests to remote server must contain an InferenceServer object"
        payload = request.model_dump()
        payload["inference_interface"] = request.inference_interface.model_dump()
        async with httpx.AsyncClient() as client:
            response = await client.post(
                f"http://{self.env_server_host_port}/rollouts/", json=payload, timeout=None
            )
        rollouts = [TokenRollout.model_validate(r) for r in response.json()]
        return rollouts

    async def run_evaluation(self, request: EvaluationRequest) -> EvaluationResponse:
        assert isinstance(
            request.inference_interface, InferenceServer
        ), "Evaluation requests to remote server must contain an InferenceServer object"
        payload = request.model_dump()
        payload["inference_interface"] = request.inference_interface.model_dump()
        async with httpx.AsyncClient(timeout=None) as client:
            response = await client.post(
                f"http://{self.env_server_host_port}/evaluation/", json=payload, timeout=None
            )
        response = EvaluationResponse.model_validate(response.json()).unwrap()
        return response


def run(agent_cls: type[Agent], cls_args: dict, port: int):
    loop = asyncio.new_event_loop()

    async def run_server():
        server: FastAPIEnvServer = await FastAPIEnvServer.launch(
            env_cls=agent_cls, cls_args=cls_args, port=port
        )
        print(server.model_dump(exclude={'_server_task'}))
        await server._server_task

    loop.run_until_complete(run_server())


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--env-config", type=str, required=True)
    parser.add_argument("--port", type=int, default=8000)
    args = parser.parse_args()
    with open(args.env_config, 'r') as f:
        config = yaml.safe_load(f)[0]
    agent_cls = import_class(config['agent_type'])
    cls_args = config['agent_args']
    run(agent_cls, cls_args, port=args.port)


================================================
FILE: megatron/rl/server/api.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

from pydantic import BaseModel, Field
from typing_extensions import Self, Type

from .. import TypeLookupable
from ..agent.api import EvaluationRequest, GroupedRolloutRequest, RolloutRequest
from ..inference import InferenceInterface


class Server(TypeLookupable):
    """Server interface class. Implements launch and kill control methods."""

    @classmethod
    async def launch(cls) -> Self:
        raise NotImplementedError

    async def suspend(self):
        pass

    async def resume(self):
        pass

    async def kill(self):
        raise NotImplementedError


class InferenceServer(Server, InferenceInterface):
    """Base Inference Server."""

    ...


class EnvironmentServer(Server):
    """Base Environment Server."""

    ...


class RemoteRolloutRequest(RolloutRequest):
    inference_interface: InferenceServer


class RemoteGroupedRolloutRequest(GroupedRolloutRequest):
    inference_interface: InferenceServer


class RemoteEvaluationRequest(EvaluationRequest):
    inference_interface: InferenceServer


================================================
FILE: megatron/rl/server/inference/__init__.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.


================================================
FILE: megatron/rl/server/inference/inference_interface_server.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import asyncio
import os
import socket
import weakref

import httpx
from fastapi import FastAPI
from pydantic import Field, PrivateAttr
from typing_extensions import Self
from uvicorn import Config, Server

from ...inference.inference_interface import (
    InferenceInterface,
    InferenceRequest,
    InferenceResponse,
    ReturnsRaw,
    ReturnsTokens,
)
from ...server.api import InferenceServer


@InferenceServer.register_subclass
class InferenceInterfaceClient(InferenceServer):
    type_name: str = Field(default='InferenceInterfaceClient', frozen=True)
    env_server_host_port: str
    conversation_template: None = None

    async def base_generate(self, request: InferenceRequest) -> InferenceResponse:
        async with httpx.AsyncClient(timeout=None) as client:
            response = await client.post(
                f"http://{self.env_server_host_port}/base_generate/", json=request.model_dump()
            )
            return InferenceResponse.model_validate(response.json())


@InferenceServer.register_subclass
class InferenceInterfaceServer(InferenceInterfaceClient, ReturnsRaw, ReturnsTokens):
    type_name: str = Field(default='InferenceInterfaceServer', frozen=True)
    _server: Server
    _server_task: asyncio.Task
    _inference_interface: InferenceInterface
    _interface_launched: bool = PrivateAttr(False)

    @classmethod
    async def launch(cls, interface_cls: type[InferenceInterface], **kwargs) -> Self:
        app = FastAPI()
        loop = asyncio.get_event_loop()
        config = Config(
            app=app,
            loop=loop,
            host='0.0.0.0',
            port=os.getenv('MEGATRON_RL_INFERENCE_SERVER_PORT', 8294),
        )
        ip = socket.gethostbyname(socket.gethostname())
        launched_server = cls(env_server_host_port=f"{ip}:{config.port}")

        if issubclass(interface_cls, InferenceServer):
            launched_server._inference_interface = await interface_cls.launch(**kwargs)
            launched_server._interface_launched = True
        else:
            launched_server._inference_interface = interface_cls(**kwargs)

        # Use a weak reference to avoid circular reference
        server_ref = weakref.ref(launched_server)

        @app.post("/base_generate/")
        async def base_generate(request: InferenceRequest) -> InferenceResponse:
            server = server_ref()
            if server is None:
                raise RuntimeError("Server has been garbage collected")
            return await server._inference_interface.base_generate(request)

        server = Server(config)
        launched_server._server = server
        launched_server._server_task = loop.create_task(server.serve())

        print(f"Launched server on {ip}:{config.port}")
        return launched_server

    async def kill(self):
        self._server.should_exit = True
        if isinstance(self._inference_interface, InferenceServer) and self._interface_launched:
            self._interface_launched = False
            await self._inference_interface.kill()
        await self._server_task

    async def suspend(self):
        if isinstance(self._inference_interface, InferenceServer):
            await self._inference_interface.suspend()

    async def resume(self):
        if isinstance(self._inference_interface, InferenceServer):
            await self._inference_interface.resume()


================================================
FILE: megatron/training/__init__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import torch

from .global_vars import get_args
from .global_vars import get_signal_handler
from .global_vars import get_tokenizer
from .global_vars import get_tensorboard_writer
from .global_vars import get_wandb_writer
from .global_vars import get_one_logger
from .global_vars import get_adlr_autoresume
from .global_vars import get_timers
from .initialize  import initialize_megatron
from .training import pretrain, get_model, get_train_valid_test_num_samples, set_startup_timestamps

from .utils import (print_rank_0,
                    is_last_rank,
                    print_rank_last)


================================================
FILE: megatron/training/argument_utils.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import dataclasses
import typing
import types
from typing import Any, Optional
from argparse import ArgumentParser, _ArgumentGroup
import inspect
import itertools
import builtins
import ast
import enum
from dataclasses import Field, fields

# TODO: support arg renames

class TypeInferenceError(Exception):
    """Custom exception type to be conditionally handled by ArgumentGroupFactory."""
    pass

class ArgumentGroupFactory:
    """Utility that adds an argument group to an ArgumentParser based on the attributes of a dataclass.

    This utility uses dataclass metadata including type annotations and docstrings to automatically
        infer the type, default, and other argparse keyword arguments.

    You can override or supplement the automatically inferred argparse kwargs for any 
        dataclass field by providing an "argparse_meta" key in the field's metadata dict.
        The value should be a dict of kwargs that will be passed to ArgumentParser.add_argument().
        These metadata kwargs take precedence over the automatically inferred values.

        Example:
            @dataclass
            class YourConfig:
                your_attribute: int | str | None = field(
                    default=None,
                    metadata={
                        "argparse_meta": {
                            "arg_names": ["--your-arg-name1", "--your-arg-name2"],
                            "type": str,
                            "nargs": "+",
                            "default": "foo",
                        }
                    },
                )

        In this example, inferring the type automatically would fail, as Unions are
        not supported. However the metadata is present, so that takes precedence.
        Any keyword arguments to `ArgumentParser.add_argument()` can be included in
        the "argparse_meta" dict, as well as "arg_names" for the argument flag name.

    This class can also be used as a base class and extended as needed to support dataclasses
        that require some customized or additional handling.

    Args:
        src_cfg_class: The source dataclass type (not instance) whose fields will be 
            converted into command-line arguments. Each field's type annotation determines 
            the argument type, default values become argument defaults, and field-level 
            docstrings are extracted to populate argument help text.
        exclude: Optional list of attribute names from `src_cfg_class` to exclude from 
            argument generation. Useful for omitting internal fields, computed properties,
            or attributes that should be configured through other means. If None, all 
            dataclass fields will be converted to command-line arguments. Default: None.
    """

    def __init__(self, src_cfg_class: type, exclude: Optional[list[str]] = None) -> None:
        self.src_cfg_class = src_cfg_class
        self.field_docstrings = self._get_field_docstrings(src_cfg_class)
        self.exclude = set(exclude) if exclude is not None else set()

    def _format_arg_name(self, config_attr_name: str, prefix: Optional[str] = None) -> str:
        """Convert dataclass name into appropriate argparse flag name.

        Args:
            config_attr_name: dataclass attribute name
            prefix: prefix string to add to the dataclass attribute name. e.g. 'no' for bool 
                settings that are default True. A hyphen is added after the prefix. Default: None
        """
        arg_name = config_attr_name
        if prefix:
            arg_name = prefix + '_' + arg_name
        arg_name = "--" + arg_name.replace("_", "-")
        return arg_name

    def _get_enum_kwargs(self, config_type: enum.EnumMeta) -> dict[str, Any]:
        """Build kwargs for Enums.

        With these settings, the user must provide a valid enum value, e.g.
            'flash', for `AttnBackend.flash`.
        """
        def enum_type_handler(cli_arg):
            return config_type[cli_arg]

        return {"type": enum_type_handler, "choices": list(config_type)}

    def _extract_type(self, config_type: type) -> dict[str, Any]:
        """Determine the type, nargs, and choices settings for this argument.

        Args:
            config_type: attribute type from dataclass
        """
        origin = typing.get_origin(config_type)
        type_tuple = typing.get_args(config_type)

        if isinstance(config_type, type) and issubclass(config_type, enum.Enum):
            return self._get_enum_kwargs(config_type)

        # Primitive type
        if origin is None:
            return {"type": config_type}

        if origin in [types.UnionType, typing.Union]:
            # Handle Optional and Union
            if type_tuple[1] == type(None): # Optional type. First element is value inside Optional[]
                return self._extract_type(type_tuple[0])
            else:
                raise TypeInferenceError(f"Unions not supported by argparse: {config_type}")

        elif origin is list:
            if len(type_tuple) == 1:
                kwargs = self._extract_type(type_tuple[0])
                kwargs["nargs"] = "+"
                return kwargs
            else:
                raise TypeInferenceError(f"Multi-type lists not supported by argparse: {config_type}")

        elif origin is typing.Literal:
            choices_types = [type(choice) for choice in type_tuple]
            assert all([t == choices_types[0] for t in choices_types]), "Type of each choice in a Literal type should all be the same."
            kwargs = {"type": choices_types[0], "choices": type_tuple}
            return kwargs
        else:
            raise TypeInferenceError(f"Unsupported type: {config_type}")


    def _build_argparse_kwargs_from_field(self, attribute: Field) -> dict[str, Any]:
        """Assemble kwargs for add_argument().

        Args:
            attribute: dataclass attribute
        """
        argparse_kwargs = {}
        argparse_kwargs["arg_names"] = [self._format_arg_name(attribute.name)]
        argparse_kwargs["dest"] = attribute.name
        argparse_kwargs["help"] = self.field_docstrings[attribute.name] if attribute.name in self.field_docstrings else ""

        # dataclasses specifies that both should not be set
        if isinstance(attribute.default, type(dataclasses.MISSING)):
            # dataclasses specified default_factory must be a zero-argument callable
            argparse_kwargs["default"] = attribute.default_factory()
        else:
            argparse_kwargs["default"] = attribute.default

        attr_argparse_meta = None
        if attribute.metadata != {} and "argparse_meta" in attribute.metadata:
            # save metadata here, but update at the end so the metadata has highest precedence
            attr_argparse_meta = attribute.metadata["argparse_meta"]


        # if we cannot infer the argparse type, all of this logic may fail. we try to defer
        # to the developer-specified metadata if present
        try:
            argparse_kwargs.update(self._extract_type(attribute.type))

            # use store_true or store_false action for enable/disable flags, which doesn't accept a 'type'
            if argparse_kwargs["type"] == bool:
                argparse_kwargs["action"] = "store_true" if attribute.default == False else "store_false"
                argparse_kwargs.pop("type")

                # add '--no-*' and '--disable-*' prefix if this is a store_false argument
                if argparse_kwargs["action"] == "store_false":
                    argparse_kwargs["arg_names"] = [self._format_arg_name(attribute.name, prefix="no"), self._format_arg_name(attribute.name, prefix="disable")] 
        except TypeInferenceError as e:
            if attr_argparse_meta is not None:
                print(
                    f"WARNING: Inferring the appropriate argparse argument type from {self.src_cfg_class} "
                    f"failed for {attribute.name}: {attribute.type}.\n"
                    "Deferring to attribute metadata. If the metadata is incomplete, 'parser.add_argument()' may fail.\n"
                    f"Original failure: {e}"
                )
            else:
                raise e

        # metadata provided by field takes precedence 
        if attr_argparse_meta is not None:
            argparse_kwargs.update(attr_argparse_meta)

        return argparse_kwargs

    def build_group(self, parser: ArgumentParser, title: Optional[str] = None) -> _ArgumentGroup:
        """Entrypoint method that adds the argument group to the parser.

        Args:
            parser: The parser to add arguments to
            title: Title for the argument group
        """
        arg_group = parser.add_argument_group(title=title, description=self.src_cfg_class.__doc__)
        for attr in fields(self.src_cfg_class):
            if attr.name in self.exclude or attr.init is False:
                continue

            add_arg_kwargs = self._build_argparse_kwargs_from_field(attr)

            arg_names = add_arg_kwargs.pop("arg_names")
            arg_group.add_argument(*arg_names, **add_arg_kwargs)

        return arg_group

    def _get_field_docstrings(self, src_cfg_class: type) -> dict[str, str]:
        """Extract field-level docstrings from a dataclass by inspecting its AST.

        Recurses on parent classes of `src_cfg_class`.

        Args:
            src_cfg_class: Dataclass to get docstrings from.
        """
        source = inspect.getsource(src_cfg_class)
        tree = ast.parse(source)
        root_node = tree.body[0]

        assert isinstance(root_node, ast.ClassDef), "Provided object must be a class."

        field_docstrings = {}

        # Iterate over body of the dataclass using 2-width sliding window.
        # When 'a' is an assignment expression and 'b' is a constant, the window is
        # lined up with an attribute-docstring pair. The pair can be saved to our dict.
        for a, b in itertools.pairwise(root_node.body):
            a_cond = isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name)
            b_cond = isinstance(b, ast.Expr) and isinstance(b.value, ast.Constant)

            if a_cond and b_cond:
                # These should be guaranteed by typechecks above, but assert just in case
                assert isinstance(a.target.id, str), "Dataclass attribute not in the expected format. Name is not a string."
                assert isinstance(b.value.value, str), "Dataclass attribute docstring is not a string."

                # Formatting
                docstring = inspect.cleandoc(b.value.value)
                docstring = ' '.join(docstring.split())

                field_docstrings[a.target.id] = docstring

        # recurse on parent class
        base_classes = src_cfg_class.__bases__
        if len(base_classes) > 0:
            parent_class = base_classes[0]
            if parent_class.__name__ not in builtins.__dict__:
                field_docstrings.update(self._get_field_docstrings(base_classes[0]))

        return field_docstrings


================================================
FILE: megatron/training/arguments.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

"""Megatron arguments."""

import argparse
import dataclasses
import json
import os
from pathlib import Path
import re
import types

import torch
import torch.nn.functional as F
from packaging.version import Version as PkgVersion

from megatron.core.dist_checkpointing.validation import StrictHandling
from megatron.core.rerun_state_machine import RerunStateMachine
from megatron.core.transformer import MLATransformerConfig, TransformerConfig
from megatron.core.transformer.pipeline_parallel_layer_layout import PipelineParallelLayerLayout
from megatron.core.transformer.enums import AttnBackend, CudaGraphScope
from megatron.core.transformer.heterogeneous.heterogeneous_config import (
    HeterogeneousTransformerConfig,
    MLPConfig,
)
from megatron.core.utils import (
    get_torch_version,
    is_flashinfer_min_version,
    is_te_min_version,
    is_torch_min_version,
)
from megatron.core.activations import squared_relu
from megatron.core.fusions.fused_bias_geglu import quick_gelu
from megatron.training.utils import (
    get_device_arch_version,
    update_use_dist_ckpt,
    print_rank_0,
    warn_rank_0,
)
from megatron.core.msc_utils import MultiStorageClientFeature

from megatron.core.quantization.utils import (
    kitchen_quantization_recipe_config,
    load_quantization_recipe,
)

from megatron.training.argument_utils import ArgumentGroupFactory

def add_megatron_arguments(parser: argparse.ArgumentParser):
    """"Add Megatron-LM arguments to the given parser."""

    # Standard arguments.
    parser = _add_network_size_args(parser)
    parser = _add_regularization_args(parser)
    parser = _add_training_args(parser)
    parser = _add_rl_args(parser)
    parser = _add_initialization_args(parser)
    parser = _add_learning_rate_args(parser)
    parser = _add_checkpointing_args(parser)
    parser = _add_mixed_precision_args(parser)
    parser = _add_distributed_args(parser)
    parser = _add_validation_args(parser)
    parser = _add_data_args(parser)
    parser = _add_tokenizer_args(parser)
    parser = _add_autoresume_args(parser)
    parser = _add_biencoder_args(parser)
    parser = _add_vision_args(parser)
    parser = _add_moe_args(parser)
    parser = _add_mla_args(parser)
    parser = _add_experimental_attention_variant_args(parser)
    parser = _add_heterogeneous_args(parser)
    parser = _add_logging_args(parser)
    parser = _add_straggler_detector_args(parser)
    parser = _add_workload_inspector_server_args(parser)
    parser = _add_inference_args(parser)
    parser = _add_transformer_engine_args(parser)
    parser = _add_experimental_args(parser)
    parser = _add_one_logger_args(parser)
    parser = _add_inprocess_restart_args(parser)
    parser = _add_ft_package_args(parser)
    parser = _add_rerun_machine_args(parser)
    parser = _add_msc_args(parser)
    parser = _add_kitchen_quantization_arguments(parser)
    parser = _add_sft_args(parser)

    return parser

def parse_args(extra_args_provider=None, ignore_unknown_args=False):
    """Parse all arguments."""
    parser = argparse.ArgumentParser(description='Megatron-LM Arguments',
                                     allow_abbrev=False)

    parser = add_megatron_arguments(parser)

    # Custom arguments.
    if extra_args_provider is not None:
        parser = extra_args_provider(parser)

    # Parse.
    if ignore_unknown_args:
        args, _ = parser.parse_known_args()
    else:
        args = parser.parse_args()

    # Experimental yaml
    if args.yaml_cfg is not None:
        from .yaml_arguments import load_yaml
        assert args.yaml_cfg and not args.use_legacy_models, \
            "Yaml config is not supported with legacy models."
        args = load_yaml(args.yaml_cfg)


    # Args from environment
    args.rank = int(os.getenv('RANK', '0'))
    args.world_size = int(os.getenv("WORLD_SIZE", '1'))

    # Args to disable MSC
    if not args.enable_msc:
        MultiStorageClientFeature.disable()
        assert MultiStorageClientFeature.is_enabled() is False
        warn_rank_0('The MSC feature is disabled.')

    return args


def validate_model_config_args_from_heterogeneous_config(args):
    """Validate model config arguments from heterogeneous config.

    This function takes model arguments and validates them based on a heterogeneous layer configuration.
    The heterogeneous config can be provided either as a path to a JSON file or as an encoded JSON string.

    The function enforces certain model architecture choices like SiLU activation, RMSNorm, grouped query attention,
    and RoPE positional embeddings. It also sets model dimensions like number of layers, hidden size, and attention heads
    based on the heterogeneous config.

    Args:
        args: Model configuration arguments to be overridden. Expected to have attributes:
            - heterogeneous_layers_config_path (str): Path to JSON config file
            - heterogeneous_layers_config_encoded_json (str): Encoded JSON config string

    Returns:
        None
    """
    if (
        args.heterogeneous_layers_config_path is None
        and args.heterogeneous_layers_config_encoded_json is None
    ):
        return

    if args.heterogeneous_layers_config_encoded_json is None:
        args.heterogeneous_layers_config_encoded_json = Path(
            args.heterogeneous_layers_config_path
        ).read_text()

    hf_config_dict = types.SimpleNamespace(**json.loads(args.heterogeneous_layers_config_encoded_json))

    assert hf_config_dict.hidden_act == "silu", (
        f"hidden_act in heterogeneous config is {hf_config_dict.hidden_act}, should be silu"
    )

    n_kv_heads_in_group = [
        config["attention"]["n_heads_in_group"] for config in hf_config_dict.block_configs
        if config["attention"]["n_heads_in_group"] is not None
    ]
    assert all(num == n_kv_heads_in_group[0] for num in n_kv_heads_in_group), "num query head must be consistent across all layers"

    args_to_validate = {
        "swiglu": True,
        "normalization": "RMSNorm",
        "group_query_attention": True,
        "position_embedding_type": "rope",
        "rotary_percent": 1.0,
        "use_rope_scaling": True,
        "use_rotary_position_embeddings": True,
        "num_layers": hf_config_dict.num_hidden_layers,
        "hidden_size": hf_config_dict.hidden_size,
        "num_attention_heads": hf_config_dict.num_attention_heads,
        "untie_embeddings_and_output_weights": not hf_config_dict.tie_word_embeddings,
        "rotary_base": hf_config_dict.rope_theta,
        "rope_scaling_factor": hf_config_dict.rope_scaling["factor"],
        "num_query_groups": hf_config_dict.num_attention_heads // n_kv_heads_in_group[0],
    }

    incompatible_args = {}
    for key, value in args_to_validate.items():
        provided_value = getattr(args, key, None)
        if provided_value != value:
            incompatible_args[key] = (provided_value, value)

    if incompatible_args:
        incompatible_args_str = ', '.join([
            f"{k}: {provided_value} (provided) != {value} (expected)"
            for k, (provided_value, value) in incompatible_args.items()
        ])
        raise ValueError(
            f"Arguments differ from heterogeneous config: {incompatible_args_str}"
        )

def _eval_pattern(pattern):
    """ Validate and evaluate a string containing a Python list expression """
    assert isinstance(pattern, str)

    # validate input, only allow comma, digits, [, ], (, ), +, and *
    if bool(re.compile(r'[^,\d\[\]\(\)\+\*]').search(pattern)):
        raise ValueError(f"Invalid pattern: {pattern}")

    return eval(pattern)

def no_rope_freq_type(x):
    """ Controls which layers to skip performing Rotary Position Embedding.
    - An integer N: Represents a 1:N ratio, meaning RoPE is skipped every N-1 layers.
    - A string "N": Same as above, but provided as a string
    - A string containing a Python list expression that defines a custom pattern, e.g.:
      "([0]*3+[1]*1)*3" evaluates to [0,0,0,1,0,0,0,1,0,0,0,1]
      where 1 indicates rope is skipped on the layer.
      This allows defining arbitrary patterns of rope skipping.
      The pattern length must match the total number of transformer layers.
      Examples:
          "([1]+[0]*23)": Only first layer has rope skipped for a 24-layer network.
          "([0]*3+[1]*1)*2": Every 4 layers the rope is skipped on the last layer. Repeat twice.
    """
    if x is None or isinstance(x, int):
        return x
    assert isinstance(x, str)
    if '[' in x:
        # it's a custom pattern
        return _eval_pattern(x)
    else:
        # it's a single int but in str
        return int(x)

def moe_freq_type(x):
    """Frequency between MoE layers and Dense layers.

    Accepts either:
    - An integer N: Represents a 1:N ratio, meaning one expert layer for every N-1 dense layers
    - A string "N": Same as above, but provided as a string
    - A string containing a Python list expression that defines a custom pattern, e.g.:
      "([1]*3+[0]*1)*3" evaluates to [1,1,1,0,1,1,1,0,1,1,1,0]
      where 1 indicates an expert layer and 0 indicates a dense layer.
      This allows defining arbitrary patterns of expert and dense layers.
      The pattern length must match the total number of transformer layers.
      Examples:
          "([0]+[1]*23)": 1 dense layer followed by 23 expert layers
          "([1]*3+[0]*2)*2": Three expert layers followed by two dense layers, repeated twice.
    """
    if isinstance(x, int):
        return x
    assert isinstance(x, str)
    if '[' in x:
        # it's a custom pattern
        return _eval_pattern(x)
    else:
        # it's a single int but in str
        return int(x)

def la_freq_type(x):
    """Frequency between LA (linear attention) layers and SDPA (scaled dot-product attention) layers.

    Accepts either:
    - An integer N: Represents a (N-1):N ratio, meaning (N-1) LA layers for every 1 SDPA layer
    - A string "N": Same as above, but provided as a string
    - A string containing a Python list expression that defines a custom pattern, e.g.:
      "([1]*3+[0]*1)*3" evaluates to [1,1,1,0,1,1,1,0,1,1,1,0]
      where 1 indicates an LA layer and 0 indicates a SDPA layer.
      This allows defining arbitrary patterns of LA and SDPA layers.
      The pattern length must match the total number of transformer layers.
      Examples:
          "([0]+[1]*23)": 1 SDPA layer followed by 23 LA layers
          "([1]*3+[0]*2)*2": Three LA layers followed by two SDPA layers, repeated twice.
    """
    if x is None or isinstance(x, int):
        return x
    assert isinstance(x, str)
    if '[' in x:
        # it's a custom pattern
        return _eval_pattern(x)
    else:
        # it's a single int but in str
        return int(x)

def tuple_type(x):
    """
    Convert a string to a tuple of integers.
    Examples:
        "1,2,3" -> (1, 2, 3)
        "(1,2,3)" -> (1, 2, 3)
    """
    if x is None or isinstance(x, tuple):
        return x
    assert isinstance(x, str)
    return tuple(int(i) for i in x.strip('()').split(','))

def validate_args(args, defaults={}):

    # Temporary
    assert args.non_persistent_ckpt_type in ['global', 'local', None], \
        'Currently only global and local checkpoints are supported'
    if args.non_persistent_ckpt_type == 'local':
        try:
            from nvidia_resiliency_ext.checkpointing.local.ckpt_managers.local_manager import \
                LocalCheckpointManager
        except ModuleNotFoundError as e:
            raise RuntimeError('nvidia_resiliency_ext is required for local checkpointing') from e

    # validate model config args from heterogeneous config (if provided).
    validate_model_config_args_from_heterogeneous_config(args)

    # Set args.use_dist_ckpt from args.ckpt_format.
    if args.use_legacy_models:
        assert args.ckpt_format == "torch", \
            "legacy model format only supports the 'torch' checkpoint format."
    update_use_dist_ckpt(args)

    total_model_size = args.tensor_model_parallel_size * args.pipeline_model_parallel_size * args.context_parallel_size

    # Total model size.
    assert args.world_size % total_model_size == 0, (
        f"world size ({args.world_size}) is not divisible by total_model_size ({total_model_size=})"
    )

    if args.attention_backend == AttnBackend.local:
        assert args.spec[0] == 'local' , '--attention-backend local is only supported with --spec local'

    # Pipeline model parallel size.
    args.transformer_pipeline_model_parallel_size = args.pipeline_model_parallel_size

    total_model_size = args.tensor_model_parallel_size * args.pipeline_model_parallel_size * args.context_parallel_size
    args.data_parallel_size = args.world_size // total_model_size

    if args.perform_rl_step:
        # ----------------------------------------------------------------
        # CUDA graphs
        #
        #   --cuda-graph-impl controls whether CUDA graphs are built.
        #   The sweep of various inference CUDA graphs is built inside inference, not the RL loop.
        #   Both training and inference CUDA graphs are gated by this flag.
        #
        #   --rl-training-cuda-graphs controls whether CUDA graphs are used during training.
        #   Toggling CUDA graphs on and off is done inside the RL loop.
        #
        #   --rl-persist-cuda-graphs controls whether CUDA graphs are built once, or repeatedly.
        #   When this flag is True, inference requires static memory pointers for the KV cache.
        #   When this flag is False, inference is in charge of deleting/rebuilding CUDA graphs.
        #
        # KV cache management (--rl-kv-cache-management-mode)
        #
        #   Inference initializes the KV cache, inside either a normal memory pool, UVM, or TMS.
        #
        #   On suspend (inference -> training):
        #     "persist"   — no-op; KV cache stays on GPU.
        #     "offload"   — KV cache is offloaded to CPU.
        #     "recompute" — KV cache is deleted entirely.
        #
        #   On resume (training → inference):
        #     "persist"   — no-op; KV cache is already on GPU.
        #     "offload"   — KV cache is restored from CPU.
        #     "recompute" — KV cache is recomputed from scratch.
        # ----------------------------------------------------------------

        # Persisting CGs only makes sense if we build any CGs.
        assert not args.rl_persist_cuda_graphs or args.cuda_graph_impl != "none", (
            "--rl-persist-cuda-graphs is set but no CUDA graphs are being built."
        )
        # Training CGs only makes sense if we build any CGs.
        assert not args.rl_training_cuda_graphs or args.cuda_graph_impl != "none", (
            "--rl-training-cuda-graphs is set but no CUDA graphs are being built."
        )
        # If CUDA graphs persist and KV cache memory address is not static, we need
        # either UVM or torch_memory_saver to maintain memory address stability for CGs.
        if args.rl_persist_cuda_graphs and args.rl_kv_cache_management_mode != "persist":
            try:
                from torch_memory_saver import torch_memory_saver
            except ImportError:
                assert args.inference_dynamic_batching_unified_memory_level > 0, (
                    "Persisting CUDA graphs requires static KV cache memory. Use "
                    "--rl-kv-cache-management-mode=persist, UVM, or install torch_memory_saver."
                )

        # Offload mode requires CG persistence: CG recapture runs dummy forward
        # passes that corrupt the preserved KV data.
        assert (
            (not args.rl_kv_cache_management_mode == "offload") or (args.rl_persist_cuda_graphs)
        ), "--rl-kv-cache-management-mode=offload requires --rl-persist-cuda-graphs"

        # There's no need to manually offload the KV cache with UVM.
        assert not (
            args.inference_dynamic_batching_unified_memory_level > 0
            and args.rl_kv_cache_management_mode == "offload"
        ), "--rl-kv-cache-management-mode=offload is incompatible with UVM"
        # We currently cannot recapture CGs in offload mode.
        assert not(
            not args.rl_persist_cuda_graphs and args.rl_kv_cache_management_mode == "offload"
        ), "Cannot recapture CUDA graphs while offloading KV cache."

        # Validate inference model offloading - requires either UVM or torch_memory_saver
        if args.rl_offload_inference_model_weights_when_idle:
            if args.rl_inference_model_unified_memory_level != 1:
                # Not using UVM, so we need torch_memory_saver
                try:
                    from torch_memory_saver import torch_memory_saver
                except ImportError:
                    raise AssertionError(
                        "To use --rl-offload-inference-model-weights-when-idle without UVM "
                        "(--rl-inference-model-unified-memory-level=1), `torch_memory_saver` must be "
                        "installed. See https://github.com/fzyzcjy/torch_memory_saver."
                    )

        # Resolve deprecated --rl-parallel-generation-tasks -> --rl-num-parallel-generations.
        assert args.rl_num_parallel_generations is None \
            or args.rl_parallel_generation_tasks is None, \
            "Cannot specify both --rl-num-parallel-generations and " \
            "--rl-parallel-generation-tasks. Use --rl-num-parallel-generations " \
            "(--rl-parallel-generation-tasks is deprecated)."
        if args.rl_parallel_generation_tasks is not None:
            print_rank_0(
                "WARNING: --rl-parallel-generation-tasks is deprecated, "
                "use --rl-num-parallel-generations instead.")
            args.rl_num_parallel_generations = (
                args.rl_parallel_generation_tasks * args.grpo_group_size)

        # Resolve --rl-num-parallel-generations / --rl-num-parallel-generation-batches.
        assert args.rl_num_parallel_generations is None \
            or args.rl_num_parallel_generation_batches is None, \
            "--rl-num-parallel-generations and --rl-num-parallel-generation-batches " \
            "are mutually exclusive."
        if args.rl_num_parallel_generations is not None:
            assert args.rl_partial_rollouts, \
                "--rl-num-parallel-generations requires --rl-partial-rollouts."
            assert args.rl_num_parallel_generations % args.grpo_group_size == 0, \
                f"--rl-num-parallel-generations ({args.rl_num_parallel_generations}) " \
                f"must be divisible by --grpo-group-size ({args.grpo_group_size})."
            args.rl_parallel_generation_tasks = (
                args.rl_num_parallel_generations // args.grpo_group_size)
            if args.rl_generation_batch_size is None:
                args.rl_generation_batch_size = 1
        elif args.rl_num_parallel_generation_batches is not None:
            assert args.rl_partial_rollouts, \
                "--rl-num-parallel-generation-batches requires --rl-partial-rollouts."
            if args.rl_generation_batch_size is None:
                args.rl_generation_batch_size = args.grpo_prompts_per_step
            args.rl_parallel_generation_tasks = (
                args.rl_num_parallel_generation_batches * args.rl_generation_batch_size)
        else:
            if args.rl_generation_batch_size is None:
                args.rl_generation_batch_size = 1
            args.rl_parallel_generation_tasks = 512

        # Derive enforce_order after all resolution is complete.
        args.rl_enforce_generation_order = (args.rl_generation_batch_size > 1)

        args.grpo_samples_per_iteration = args.grpo_prompts_per_step * args.grpo_group_size

        if args.rl_use_sequence_packing:
            assert args.micro_batch_size == 1, \
                "micro_batch_size must be 1 when using sequence packing. To increase compute per micro batch increase the sequence length."

    print_rank_0('using world size: {}, data-parallel size: {}, '
                 'context-parallel size: {}, '
                 'hierarchical context-parallel sizes: {}, '
                 'tensor-model-parallel size: {}, '
                 'pipeline-model-parallel size: {}'.format(
                     args.world_size, args.data_parallel_size,
                     args.context_parallel_size,
                     args.hierarchical_context_parallel_sizes,
                     args.tensor_model_parallel_size,
                     args.pipeline_model_parallel_size))

    # Checks.

    if args.hierarchical_context_parallel_sizes:
        from numpy import prod
        assert args.context_parallel_size == prod(args.hierarchical_context_parallel_sizes)
    if "a2a+p2p" in args.cp_comm_type:
        assert args.hierarchical_context_parallel_sizes is not None, \
        "--hierarchical-context-parallel-sizes must be set when a2a+p2p is used in cp comm"

    if args.expert_tensor_parallel_size is None:
        args.expert_tensor_parallel_size = args.tensor_model_parallel_size

    # Deprecated arguments.
    assert args.batch_size is None, '--batch-size argument is no longer ' \
        'valid, use --micro-batch-size instead'
    del args.batch_size
    assert args.warmup is None, '--warmup argument is no longer valid, use ' \
        '--lr-warmup-fraction instead'
    del args.warmup
    assert args.model_parallel_size is None, '--model-parallel-size is no ' \
        'longer valid, use --tensor-model-parallel-size instead'
    del args.model_parallel_size

    if args.checkpoint_activations:
        print_rank_0('--checkpoint-activations is no longer valid, use --recompute-activations, '
                     'or, for more control, --recompute-granularity and --recompute-method.')
        exit()
    del args.checkpoint_activations

    if args.recompute_activations:
        args.recompute_granularity = 'selective'
    del args.recompute_activations

    if args.enable_cuda_graph or args.external_cuda_graph:
        assert (
            args.cuda_graph_impl == "none"
        ), "Do not use --enable-cuda-graph or --external-cuda-graph with --cuda-graph-impl."
        assert (
            not args.enable_cuda_graph or not args.external_cuda_graph
        ), "--enable-cuda-graph and --external-cuda-graph cannot be enabled at the same time."

        if args.enable_cuda_graph:
            print_rank_0(
                '--enable-cuda-graph is deprecated, use --cuda-graph-impl=local instead.', args.rank
            )
            args.cuda_graph_impl = "local"
            del args.enable_cuda_graph
        if args.external_cuda_graph:
            print_rank_0(
                '--external-cuda-graph is deprecated, use --cuda-graph-impl=transformer_engine instead.',
                args.rank,
            )
            args.cuda_graph_impl = "transformer_engine"
            del args.external_cuda_graph

    # Set input defaults.
    for key in defaults:
        # For default to be valid, it should not be provided in the
        # arguments that are passed to the program. We check this by
        # ensuring the arg is set to None.
        if getattr(args, key, None) is not None:
            warn_rank_0('Overriding default arguments for {key}:{v} '
                        'with {key}:{v2}'.format(key=key, v=defaults[key],
                                                 v2=getattr(args, key)))
        else:
            setattr(args, key, defaults[key])

    if args.data_path is not None and args.split is None:
        legacy_default_split_value = '969, 30, 1'
        warn_rank_0('Please specify --split when using --data-path. Using legacy default value '
                    f'of "{legacy_default_split_value}"')
        args.split = legacy_default_split_value

    use_data_path = (args.data_path is not None) or (args.data_args_path is not None)
    if use_data_path:
        # Exactly one of the two has to be None if we use it.
        assert (args.data_path is None) or (args.data_args_path is None)
    use_per_split_data_path = any(
        elt is not None
        for elt in [args.train_data_path, args.valid_data_path, args.test_data_path]) or \
            args.per_split_data_args_path is not None
    if use_per_split_data_path:
         # Exactly one of the two has to be None if we use it.
        assert any(elt is not None
                   for elt in [args.train_data_path, args.valid_data_path, args.test_data_path]) is False or \
            args.per_split_data_args_path is None

    if args.phase_transition_iterations:
        args.phase_transition_iterations = sorted(
            int(x.strip()) for x in args.phase_transition_iterations.split(",")
        )
        assert args.rampup_batch_size is None, "multi-phase training does not support batch size ramp-up"

    # Batch size.
    assert args.micro_batch_size is not None
    assert args.micro_batch_size > 0
    if args.global_batch_size is None:
        args.global_batch_size = args.micro_batch_size * args.data_parallel_size
        print_rank_0('setting global batch size to {}'.format(args.global_batch_size))
    assert args.global_batch_size > 0

    if args.perform_rl_step:
        num_generated_samples_per_inference_iteration = (
            args.grpo_samples_per_iteration * args.grpo_iterations)

        # Ensure that the number of prompts we collect is a multiple of the global batch size.
        # TODO: Make this account for batch size rampup?
        assert num_generated_samples_per_inference_iteration % args.global_batch_size == 0, \
            f"grpo_group_size * grpo_prompts_per_step * grpo_iterations should be divisible by global_batch_size"

        # For now only exit/checkpoint on iterations where we generate data. We don't currently
        # have a way to checkpoint the generated data.
        num_training_iterations_per_inference_iteration = (
            num_generated_samples_per_inference_iteration // args.global_batch_size)
        if args.exit_interval is not None:
            assert args.exit_interval % num_training_iterations_per_inference_iteration == 0, \
                f"exit_interval should be divisible by number of global batches per inference iteration."
        if args.save_interval is not None:
            assert args.save_interval % num_training_iterations_per_inference_iteration == 0, \
                f"save_interval should be divisible by number of global batches per inference iteration."

    # === Hybrid layer pattern: deprecation handling and validation ===

    # Backward compat: --hybrid-override-pattern is deprecated in favor of --hybrid-layer-pattern
    used_hybrid_override_pattern = False
    if args.hybrid_override_pattern is not None:
        assert args.hybrid_layer_pattern is None, (
            '--hybrid-override-pattern and --hybrid-layer-pattern cannot both be specified. '
            '--hybrid-override-pattern is deprecated; use --hybrid-layer-pattern instead.'
        )
        warn_rank_0(
            "--hybrid-override-pattern is deprecated. Use --hybrid-layer-pattern instead.",
            args.rank,
        )
        args.hybrid_layer_pattern = args.hybrid_override_pattern
        used_hybrid_override_pattern = True

    if args.mtp_hybrid_override_pattern is not None:
        warn_rank_0(
            "--mtp-hybrid-override-pattern is deprecated. "
            "For new hybrid models with MTP, use unified --hybrid-layer-pattern instead. "
            "Example: 'M*M*/MM/MM' means main='M*M*', MTP pattern='MM' with 2 depths. "
            "This argument is kept only for loading old checkpoints.",
            args.rank,
        )

    from megatron.core.ssm.mamba_hybrid_layer_allocation import (
        Symbols, parse_hybrid_pattern, get_hybrid_total_layer_count,
        get_hybrid_total_pipeline_segment_count,
    )
    sep = Symbols.MTP_SEPARATOR

    # Backward compat: convert legacy mtp_hybrid_override_pattern to unified format
    if (
        args.mtp_hybrid_override_pattern is not None
        and args.mtp_num_layers is not None
        and args.mtp_num_layers > 0
        and (args.hybrid_layer_pattern is None or sep not in args.hybrid_layer_pattern)
    ):
        main_pattern = args.hybrid_layer_pattern or ''
        mtp_pattern = args.mtp_hybrid_override_pattern
        args.hybrid_layer_pattern = main_pattern + sep + sep.join([mtp_pattern] * args.mtp_num_layers)
        args.mtp_hybrid_override_pattern = None
        print_rank_0(f"Converted legacy MTP pattern to unified: {args.hybrid_layer_pattern}")

    if args.hybrid_layer_pattern is not None:
        # Derive num_layers from pattern; hybrid_layer_pattern always overrides --num-layers when
        # both are present (e.g. when loading from checkpoint with --use-checkpoint-args).
        num_layers_in_pattern = get_hybrid_total_layer_count(args.hybrid_layer_pattern)
        if args.num_layers is not None and args.num_layers != num_layers_in_pattern:
            warn_rank_0(
                f'--hybrid-layer-pattern is set; ignoring --num-layers ({args.num_layers}) and '
                f'using the layer count derived from the pattern ({num_layers_in_pattern}).',
                args.rank,
            )
        args.num_layers = num_layers_in_pattern

        # first/last pipeline num layers are incompatible with pipe-separated patterns
        # (the pipe separators already define the pipeline layout explicitly), but are
        # allowed for pipe-free patterns where they control uneven PP splitting.
        has_pipes = Symbols.PIPE in args.hybrid_layer_pattern.split(sep)[0]
        if has_pipes:
            assert args.decoder_first_pipeline_num_layers is None, (
                'If --hybrid-layer-pattern contains pipe separators, '
                '--decoder-first-pipeline-num-layers should not be specified '
                'as the pipeline layout is explicitly defined.'
            )
            assert args.decoder_last_pipeline_num_layers is None, (
                'If --hybrid-layer-pattern contains pipe separators, '
                '--decoder-last-pipeline-num-layers should not be specified '
                'as the pipeline layout is explicitly defined.'
            )
        assert args.num_layers_per_virtual_pipeline_stage is None, (
            '--num-layers-per-virtual-pipeline-stage should not be used with '
            '--hybrid-layer-pattern. To specify virtual pipelining, describe a number of '
            'pipeline segments in --hybrid-layer-pattern that is a multiple of '
            '--pipeline-model-parallel-size greater than 1'
        )
        assert args.num_virtual_stages_per_pipeline_rank is None, (
            '--num-virtual-stages-per-pipeline-rank should not be used with '
            '--hybrid-layer-pattern. Virtual pipeline stages are derived from the '
            'number of | segments in the pattern.'
        )
        assert args.pipeline_model_parallel_layout is None, (
            '--pipeline-model-parallel-layout should not be used with --hybrid-layer-pattern. '
            'Pipeline stage layout is defined by | separators in the pattern.'
        )
        assert not args.account_for_embedding_in_pipeline_split, (
            '--account-for-embedding-in-pipeline-split should not be used with '
            '--hybrid-layer-pattern. Pipeline stage layout is defined by | separators '
            'in the pattern.'
        )
        assert not args.account_for_loss_in_pipeline_split, (
            '--account-for-loss-in-pipeline-split should not be used with '
            '--hybrid-layer-pattern. Pipeline stage layout is defined by | separators '
            'in the pattern.'
        )

        # Derive VPP from pipe segments in the pattern
        hybrid_pipeline_segments = get_hybrid_total_pipeline_segment_count(
            args.hybrid_layer_pattern
        )
        if hybrid_pipeline_segments == 1 and args.transformer_pipeline_model_parallel_size > 1:
            # No pipes in pattern -- PP will be handled by select_pipeline_segment
            # at model init time (for backwards compatibility).
            args.virtual_pipeline_model_parallel_size = None
        else:
            assert hybrid_pipeline_segments % args.transformer_pipeline_model_parallel_size == 0, (
                'The number of hybrid pipeline segments described by --hybrid-layer-pattern must '
                'be evenly divisible by --pipeline-model-parallel-size. '
                f'Got {hybrid_pipeline_segments} segments and '
                f'{args.transformer_pipeline_model_parallel_size} pipeline parallel size.'
            )
            if hybrid_pipeline_segments > args.transformer_pipeline_model_parallel_size:
                # Must be set here in order to assign virtual parallel ranks in
                # training.py/get_model
                args.virtual_pipeline_model_parallel_size = (
                    hybrid_pipeline_segments // args.transformer_pipeline_model_parallel_size
                )
            else:
                args.virtual_pipeline_model_parallel_size = None

    # Infer mtp_num_layers from unified pattern
    if args.hybrid_layer_pattern and sep in args.hybrid_layer_pattern:
        parsed = parse_hybrid_pattern(args.hybrid_layer_pattern)
        if parsed.mtp_pattern and parsed.mtp_num_depths > 0:
            inferred_mtp_num_layers = parsed.mtp_num_depths
            if args.mtp_num_layers is None:
                args.mtp_num_layers = inferred_mtp_num_layers
            elif args.mtp_num_layers != inferred_mtp_num_layers:
                warn_rank_0(
                    f"--mtp-num-layers ({args.mtp_num_layers}) conflicts with "
                    f"MTP depth count ({inferred_mtp_num_layers}) in pattern "
                    f"'{args.hybrid_layer_pattern}'. "
                    f"Using the inferred value ({inferred_mtp_num_layers}).",
                    args.rank
                )
                args.mtp_num_layers = inferred_mtp_num_layers

    # MTP validation
    if args.mtp_num_layers:
        assert not args.use_legacy_models, "The legacy Megatron models does not support Multi-Token Prediction (MTP)."
        assert args.position_embedding_type == "rope" or args.position_embedding_type == "none", (
            f"Multi-Token Prediction (MTP) is not supported with {args.position_embedding_type} position embedding type."
            + f"The supported position embedding types are rope and none."
        )

    # Validate MTP args for hybrid vs non-hybrid models
    if args.hybrid_layer_pattern is not None:
        # Mamba/hybrid model MTP validation
        if args.mtp_num_layers and not (args.hybrid_layer_pattern and sep in args.hybrid_layer_pattern):
            # Hybrid model wants MTP but no unified pattern - check for legacy args
            if args.mtp_hybrid_override_pattern is None:
                warn_rank_0(
                    "Hybrid model with --mtp-num-layers but no MTP pattern. "
                    "Use unified --hybrid-layer-pattern with '/' separator (e.g., 'M*M*/MM/MM') "
                    "or legacy --mtp-hybrid-override-pattern for old checkpoints.",
                    args.rank
                )
    else:
        # Non-hybrid (GPT) model MTP validation
        if args.mtp_hybrid_override_pattern is not None:
            warn_rank_0(
                "--mtp-hybrid-override-pattern is for Mamba/hybrid models only. "
                "For GPT models, MTP replicates the main transformer layer structure. "
                "This argument will be ignored.",
                args.rank
            )
    # === End of hybrid layer pattern: deprecation handling and validation ===

    # Uneven virtual pipeline parallelism
    assert (
        int(args.num_layers_per_virtual_pipeline_stage is not None)
        + int(args.num_virtual_stages_per_pipeline_rank is not None)
        + int(args.pipeline_model_parallel_layout is not None)
    ) <= 1, (
        'No more than one of the following arguments can be set at the same time: '
        '--num-layers-per-virtual-pipeline-stage, --num-virtual-stages-per-pipeline-rank,'
        '--pipeline-model-parallel-layout. '
        f'{args.num_layers_per_virtual_pipeline_stage=}, '
        f'{args.num_virtual_stages_per_pipeline_rank=}, '
        f'{args.pipeline_model_parallel_layout=}.'
    )

    if args.pipeline_model_parallel_layout is not None:
        # Parse the input flattened layout to a list and get the vpp size.
        # We will validate the layout more carefully in the TransformerConfig constructor.
        num_stages = PipelineParallelLayerLayout.get_num_stages_from_str(args.pipeline_model_parallel_layout)
        assert num_stages % args.pipeline_model_parallel_size == 0, (
            f"The length of pipeline_model_parallel_layout must be divisible"
            f" by pipeline_model_parallel_size ({num_stages=},"
            f" {args.pipeline_model_parallel_size=})"
        )
        args.virtual_pipeline_model_parallel_size = num_stages // args.pipeline_model_parallel_size
        if args.virtual_pipeline_model_parallel_size == 1:
            args.virtual_pipeline_model_parallel_size = None
    elif args.num_layers_per_virtual_pipeline_stage is not None or args.num_virtual_stages_per_pipeline_rank is not None:
        if args.num_virtual_stages_per_pipeline_rank is None:
            assert args.decoder_first_pipeline_num_layers is None and args.decoder_last_pipeline_num_layers is None, \
                'please use --num-virtual-stages-per-pipeline-rank to specify virtual pipeline parallel degree when enable uneven pipeline parallelism'
            if args.num_layers is not None:
                num_layers = args.num_layers
            else:
                num_layers = args.decoder_num_layers

            if args.account_for_embedding_in_pipeline_split:
                num_layers += 1

            if args.account_for_loss_in_pipeline_split:
                num_layers += 1

            assert num_layers % args.transformer_pipeline_model_parallel_size == 0, \
                'number of layers of the model must be divisible pipeline model parallel size'
            num_layers_per_pipeline_stage = num_layers // args.transformer_pipeline_model_parallel_size

            assert num_layers_per_pipeline_stage % args.num_layers_per_virtual_pipeline_stage == 0, \
                'number of layers per pipeline stage must be divisible number of layers per virtual pipeline stage'
            args.virtual_pipeline_model_parallel_size = num_layers_per_pipeline_stage // \
                args.num_layers_per_virtual_pipeline_stage
        else:
            args.virtual_pipeline_model_parallel_size = args.num_virtual_stages_per_pipeline_rank
        if args.virtual_pipeline_model_parallel_size == 1:
            args.virtual_pipeline_model_parallel_size = None
    else:
        # Only set VPP to None if it wasn't already derived from --hybrid-layer-pattern
        if args.hybrid_layer_pattern is None:
            args.virtual_pipeline_model_parallel_size = None

        if args.decoder_first_pipeline_num_layers is None and args.decoder_last_pipeline_num_layers is None:
            # Divisibility check not applicable for T5 models which specify encoder_num_layers
            # and decoder_num_layers, or for hybrid models using --hybrid-layer-pattern.
            if args.num_layers is not None and args.hybrid_layer_pattern is None:
                num_layers = args.num_layers

                if args.account_for_embedding_in_pipeline_split:
                    num_layers += 1

                if args.account_for_loss_in_pipeline_split:
                    num_layers += 1

                assert num_layers % args.transformer_pipeline_model_parallel_size == 0, \
                    'Number of layers should be divisible by the pipeline-model-parallel size'

    if args.virtual_pipeline_model_parallel_size is not None:
        if args.overlap_p2p_comm:
            assert args.pipeline_model_parallel_size > 1, \
                'When interleaved schedule is used, pipeline-model-parallel size '\
                'should be greater than 1'
        else:
            assert args.pipeline_model_parallel_size > 2, \
                'When interleaved schedule is used and p2p communication overlap is disabled, '\
                'pipeline-model-parallel size should be greater than 2 to avoid having multiple '\
                'p2p sends and recvs between same 2 ranks per communication batch'
    else:
        # Overlap P2P communication is disabled if not using the interleaved schedule.
        args.overlap_p2p_comm = False
        args.align_param_gather = False
        # Only print warning if PP size > 1.
        if args.rank == 0 and args.pipeline_model_parallel_size > 1:
            print('WARNING: Setting args.overlap_p2p_comm and args.align_param_gather to False '
                'since non-interleaved schedule does not support overlapping p2p communication '
                'and aligned param AG')

    print_rank_0(
        f"Number of virtual stages per pipeline stage: {args.virtual_pipeline_model_parallel_size}"
    )

    if args.overlap_param_gather:
        assert args.use_distributed_optimizer or args.use_megatron_fsdp \
            or args.optimizer == 'dist_muon', \
            '--overlap-param-gather only supported with distributed optimizer, megatron fsdp, or dist_muon'
        assert args.overlap_grad_reduce, \
            'Must use --overlap-param-gather with --overlap-grad-reduce'
        assert not args.use_legacy_models, \
            '--overlap-param-gather only supported with MCore models'

    if args.use_torch_fsdp2:
        assert is_torch_min_version("2.4.0"), \
            'FSDP2 requires PyTorch >= 2.4.0 with FSDP 2 support.'
        assert args.pipeline_model_parallel_size == 1, \
            '--use-torch-fsdp2 is not supported with pipeline parallelism'
        assert args.expert_model_parallel_size == 1, \
            '--use-torch-fsdp2 is not supported with expert parallelism'
        assert not args.use_distributed_optimizer, \
            "--use-torch-fsdp2 is not supported with MCore's distributed optimizer"
        assert not args.gradient_accumulation_fusion, \
            '--use-torch-fsdp2 is not supported with gradient accumulation fusion'
        assert args.ckpt_format in ('torch_dist', 'torch_dcp'), \
            '--use-torch-fsdp2 requires --ckpt-format torch_dist or torch_dcp'
        assert args.untie_embeddings_and_output_weights, \
            '--use-torch-fsdp2 requires --untie-embeddings-and-output-weights'
        assert not args.fp16, \
            '--use-torch-fsdp2 not supported with fp16 yet'
        assert os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS') != "1", \
            'FSDP always requires CUDA_DEVICE_MAX_CONNECTIONS value large than one'

        if args.fp8_param_gather and is_te_min_version("2.0.0"):
            args.fp8_param_gather = False
            warn_rank_0(
                'FSDP2 FP8 param gather is not supported yet in TE 2.0, will fallback to bf16'
                'all_gather instead, turning off fp8_param_gather',
                args.rank,
            )
        if args.fp4_param and not is_te_min_version("2.7.0.dev0"):
            raise ValueError("--fp4-param requires Transformer Engine >= 2.7.0.dev0.")

    if args.overlap_param_gather_with_optimizer_step:
        assert args.use_distributed_optimizer, \
            '--overlap-param-gather-with-optimizer-step only supported with distributed optimizer'
        assert args.overlap_param_gather, \
            'Must use --overlap-param-gather-with-optimizer-step with --overlap-param-gather'
        assert args.virtual_pipeline_model_parallel_size is not None, \
            '--overlap-param-gather-with-optimizer-step only supported with interleaved pipeline parallelism'
        assert not args.use_dist_ckpt, \
            '--overlap-param-gather-with-optimizer-step not supported with distributed checkpointing yet'

    # Map string data-type to torch.dtype.
    dtype_map = {
        'fp32': torch.float32, 'bf16': torch.bfloat16, 'fp16': torch.float16, 'fp8': torch.uint8, 'auto': None,
    }
    map_dtype = lambda d: d if isinstance(d, torch.dtype) else dtype_map[d]

    args.main_grads_dtype = map_dtype(args.main_grads_dtype)
    args.main_params_dtype = map_dtype(args.main_params_dtype)
    args.exp_avg_dtype = map_dtype(args.exp_avg_dtype)
    args.exp_avg_sq_dtype = map_dtype(args.exp_avg_sq_dtype)
    args.mamba_inference_conv_states_dtype = map_dtype(args.mamba_inference_conv_states_dtype)
    args.mamba_inference_ssm_states_dtype = map_dtype(args.mamba_inference_ssm_states_dtype)

    args.megatron_fsdp_main_params_dtype = map_dtype(args.megatron_fsdp_main_params_dtype)
    args.megatron_fsdp_main_grads_dtype = map_dtype(args.megatron_fsdp_main_grads_dtype)
    args.megatron_fsdp_grad_comm_dtype = map_dtype(args.megatron_fsdp_grad_comm_dtype)
    if args.grad_reduce_in_bf16:
        args.megatron_fsdp_grad_comm_dtype = torch.bfloat16

    if args.fp8_param_gather:
        assert args.use_distributed_optimizer or args.use_torch_fsdp2 or args.use_megatron_fsdp or not torch.is_grad_enabled(), \
            '--fp8-param-gather only supported with distributed optimizer, torch fsdp2, megatron fsdp, or inference mode'

    # FP4 and FP8 are mutually exclusive
    if args.fp4 and args.fp8:
        raise ValueError("--fp4-format and --fp8-format cannot be used simultaneously. Please choose one.")

    # FP4 param requires FP4 mode
    if args.fp4_param and not args.fp4:
        raise ValueError("--fp4-param-gather must be used together with --fp4-format.")

    # FP4 requires TE >= 2.7.0.dev0
    if args.fp4 and not is_te_min_version("2.7.0.dev0"):
        raise ValueError("--fp4-format requires Transformer Engine >= 2.7.0.dev0 for NVFP4BlockScaling support.")

    if (
        args.fp8_recipe == 'mxfp8'
        and args.transformer_impl == 'inference_optimized'
        and not is_flashinfer_min_version("0.6.4")
    ):
        raise ValueError("MXFP8 with inference optimized layers requires FlashInfer >= 0.6.4")

    if args.use_megatron_fsdp:
        # NOTE: The flag `use_custom_fsdp` is deprecated and will be removed in future versions.
        #       Please use `use_megatron_fsdp` instead, as all functionality will be migrated there.
        #       Future updates will drop support for `use_custom_fsdp` to avoid confusion.
        args.use_custom_fsdp = True

        if args.data_parallel_sharding_strategy in ["optim_grads_params", "optim_grads"]:
            warn_rank_0(
                'Please make sure your TransformerEngine support FSDP + gradient accumulation fusion',
                args.rank,
            )

        if args.data_parallel_sharding_strategy == "optim_grads_params":
            assert args.check_weight_hash_across_dp_replicas_interval is None, \
                'check_weight_hash_across_dp_replicas_interval is not supported with optim_grads_params'

        assert os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS') != "1", \
            'FSDP always requires CUDA_DEVICE_MAX_CONNECTIONS value large than one'

        assert args.ckpt_format == "fsdp_dtensor", \
            "Megatron FSDP only supports fsdp_dtensor checkpoint format"
        
    if args.fsdp_manual_registration:
        assert args.use_megatron_fsdp, "FSDP manual registration is only supported with Megatron FSDP"
        assert args.nccl_ub, "FSDP manual registration is only supported with nccl-ub option"

        if args.use_megatron_fsdp:
            args.reuse_grad_buf_for_mxfp8_param_ag = False

    # Parameters dtype.
    args.params_dtype = torch.float
    if args.fp16:
        assert not args.bf16
        args.params_dtype = torch.half
        # Turn off checking for NaNs in loss and grads if using dynamic loss scaling,
        # where NaNs in grads / loss are signal to the loss scaler.
        if not args.loss_scale:
            args.check_for_nan_in_loss_and_grad = False
            warn_rank_0('Setting args.check_for_nan_in_loss_and_grad to False since '
                        'dynamic loss scaling is being used')
    if args.bf16:
        assert not args.fp16
        args.params_dtype = torch.bfloat16
        # bfloat16 requires gradient accumulation and all-reduce to
        # be done in fp32.
        if args.accumulate_allreduce_grads_in_fp32:
            assert args.main_grads_dtype == torch.float32, \
                "--main-grads-dtype can only be fp32 when --accumulate-allreduce-grads-in-fp32 is set"

        if args.grad_reduce_in_bf16:
            args.accumulate_allreduce_grads_in_fp32 = False
        elif not args.accumulate_allreduce_grads_in_fp32 and args.main_grads_dtype == torch.float32:
            args.accumulate_allreduce_grads_in_fp32 = True
            print_rank_0('accumulate and all-reduce gradients in fp32 for bfloat16 data type.')
    if args.cuda_graph_impl == "local" and CudaGraphScope.full_iteration in args.cuda_graph_scope:
        assert not args.check_for_nan_in_loss_and_grad, \
        "--no-check-for-nan-in-loss-and-grad should be set with --cuda-graph-scope=full_iteration for training. Note: If you are trying to use full_iteration CUDA graphs for inference, please use --cuda-graph-scope full_iteration_inference instead"
    
    if args.cuda_graph_impl == "local" and CudaGraphScope.full_iteration_inference in args.cuda_graph_scope:
        if args.fp8 is not None:
            assert args.transformer_impl == "inference_optimized", \
                "fp8 with full_iteration_inference CUDA graphs is only supported with " \
                "--transformer-impl=inference_optimized"
            assert args.fp8_recipe == "mxfp8", \
                "Only --fp8-recipe=mxfp8 is supported with full_iteration_inference CUDA graphs"

    if args.cuda_graph_impl == 'local':
        assert args.inference_dynamic_batching_num_cuda_graphs > 0 or args.inference_dynamic_batching_num_cuda_graphs == -1, \
            'inference_dynamic_batching_num_cuda_graphs should be a positive integer or -1' \
            '-1 means that we will automatically determine the number of CUDA graphs to capture based on the `max_requests` value.'

    print_rank_0('using {} for parameters ...'.format(args.params_dtype))

    if args.dataloader_type is None:
        args.dataloader_type = 'single'

    # data
    assert args.num_dataset_builder_threads > 0

    # Consumed tokens.
    args.consumed_train_samples = 0
    args.skipped_train_samples = 0
    args.consumed_valid_samples = 0
    if args.rl_use_sequence_packing:
        args.consumed_train_bins = 0

    # Support for variable sequence lengths across batches/microbatches.
    # set it if the dataloader supports generation of variable sequence lengths
    # across batches/microbatches. Due to additional communication overhead
    # during pipeline parallelism, it should not be set if sequence length
    # is constant during training.
    args.variable_seq_lengths = False

    # Iteration-based training.
    # Skip these checks when skip_train is set: LR config is irrelevant.
    if args.train_iters and not args.skip_train:
        # If we use iteration-based training, make sure the
        # sample-based options are off.
        assert args.train_samples is None, \
            'expected iteration-based training'
        assert args.lr_decay_samples is None, \
            'expected iteration-based learning rate decay'
        assert args.lr_warmup_samples == 0, \
            'expected iteration-based learning rate warmup'
        assert args.rampup_batch_size is None, \
            'expected no batch-size rampup for iteration-based training'
        if args.lr_warmup_fraction is not None:
            assert args.lr_warmup_iters == 0, \
                'can only specify one of lr-warmup-fraction and lr-warmup-iters'

    # Sample-based training.
    if args.train_samples and not args.skip_train:
        # If we use sample-based training, make sure the
        # iteration-based options are off.
        assert args.train_iters is None, \
            'expected sample-based training'
        assert args.lr_decay_iters is None, \
            'expected sample-based learning rate decay'
        assert args.lr_warmup_iters == 0, \
            'expected sample-based learnig rate warmup'
        if args.lr_warmup_fraction is not None:
            assert args.lr_warmup_samples == 0, \
                'can only specify one of lr-warmup-fraction ' \
                'and lr-warmup-samples'

    if args.num_layers is not None:
        assert args.encoder_num_layers is None, \
            'cannot have both num-layers and encoder-num-layers specified'
        args.encoder_num_layers = args.num_layers
    else:
        assert args.encoder_num_layers is not None, \
            'either num-layers or encoder-num-layers should be specified'
        args.num_layers = args.encoder_num_layers

    # Check required arguments.
    required_args = ['num_layers', 'hidden_size', 'num_attention_heads',
                     'max_position_embeddings']
    for req_arg in required_args:
        _check_arg_is_not_none(args, req_arg)

    # Checks.
    if args.ffn_hidden_size is None:
        if args.swiglu:
            # reduce the dimnesion for MLP since projections happens on
            # two linear layers. this keeps the number of paramters in
            # the same ballpark as the counterpart with 4*h size
            # we keep it a multiple of 64, which means the actual tensor size
            # will be a multiple of 64 / tp_size
            args.ffn_hidden_size = int((4 * args.hidden_size * 2 / 3) / 64) * 64
        else:
            args.ffn_hidden_size = 4 * args.hidden_size

    if args.kv_channels is None:
        assert args.hidden_size % args.num_attention_heads == 0
        args.kv_channels = args.hidden_size // args.num_attention_heads

    if args.seq_length is not None and args.context_parallel_size > 1:
        assert args.seq_length % (args.context_parallel_size * 2) == 0, \
            'seq-length should be a multiple of 2 * context-parallel-size ' \
            'if context-parallel-size > 1.'

    if args.seq_length is not None:
        assert args.encoder_seq_length is None
        args.encoder_seq_length = args.seq_length
    else:
        assert args.encoder_seq_length is not None
        args.seq_length = args.encoder_seq_length

    if args.seq_length is not None:
        assert args.max_position_embeddings >= args.seq_length, \
            f"max_position_embeddings ({args.max_position_embeddings}) must be greater than " \
            f"or equal to seq_length ({args.seq_length})."
    if args.decoder_seq_length is not None:
        assert args.max_position_embeddings >= args.decoder_seq_length
    if args.lr is not None:
        assert args.min_lr <= args.lr
    if args.save is not None:
        assert args.save_interval is not None
        assert args.save_interval > 0
        if args.save_retain_interval is not None:
            assert args.save_retain_interval > 0
            assert args.save_retain_interval % args.save_interval == 0
    if args.log_memory_interval is not None:
        assert args.log_memory_interval % args.log_interval == 0
    # Mixed precision checks.
    if args.fp16_lm_cross_entropy:
        assert args.fp16, 'lm cross entropy in fp16 only support in fp16 mode.'
    if args.fp32_residual_connection:
        assert args.fp16 or args.bf16, \
            'residual connection in fp32 only supported when using fp16 or bf16.'

    if args.moe_grouped_gemm:
        dc = torch.cuda.get_device_capability()
        assert dc[0] >= 8, "Unsupported compute capability for GroupedGEMM kernels."

    if args.no_weight_decay_cond_type is not None:
        print_rank_0(
            'WARNING: --no-weight-decay-cond-type is deprecated. Please use --apply-wd-to-qk-layernorm instead.',
            args.rank,
        )
        if args.no_weight_decay_cond_type == "apply_wd_to_qk_layernorm":
            args.apply_wd_to_qk_layernorm = True
        else:
            raise ValueError(f"Invalid no_weight_decay_cond_type: {args.no_weight_decay_cond_type}")
        args.no_weight_decay_cond_type = None

    if args.weight_decay_incr_style == 'constant':
        assert args.start_weight_decay is None
        assert args.end_weight_decay is None
        args.start_weight_decay = args.weight_decay
        args.end_weight_decay = args.weight_decay
    else:
        assert args.start_weight_decay is not None
        assert args.end_weight_decay is not None

    # Persistent fused layer norm.
    if not is_torch_min_version("1.11.0a0"):
        args.no_persist_layer_norm = True
        print_rank_0('Persistent fused layer norm kernel is supported from '
                     'pytorch v1.11 (nvidia pytorch container paired with v1.11). '
                     'Defaulting to no_persist_layer_norm=True')

    # Activation recomputing.
    if args.distribute_saved_activations:
        assert args.tensor_model_parallel_size > 1, 'can distribute ' \
            'recomputed activations only across tensor model ' \
            'parallel groups'
        assert args.recompute_granularity == 'full', \
            'distributed recompute activations is only '\
            'application to full recompute granularity'
        assert args.recompute_method is not None, \
            'for distributed recompute activations to work you '\
            'need to use a recompute method '
        assert is_torch_min_version("1.10.0a0"), \
            'distributed recompute activations are supported for pytorch ' \
            'v1.10 and above (Nvidia Pytorch container >= 21.07). Current ' \
            f'pytorch version is v{get_torch_version()}.'

    if args.recompute_granularity == 'selective':
        assert args.recompute_method is None, \
            'recompute method is not yet supported for ' \
            'selective recomputing granularity'

    # disable sequence parallelism when tp=1
    # to avoid change in numerics when
    # sequence_parallelism is enabled.
    if args.tensor_model_parallel_size == 1:
        if args.sequence_parallel:
            warn_rank_0(
                "Disabling sequence parallelism because tensor model parallelism is disabled",
                args.rank,
            )
        args.sequence_parallel = False

    if args.tp_comm_overlap:
        assert args.sequence_parallel == True, 'Tensor parallel communication/GEMM overlap can happen only when sequence parallelism is enabled'

    if args.hybrid_context_parallel:
        assert not args.pipeline_model_parallel_size > 1, 'Hybrid context parallelism not supported with pipeline parallelism'
        assert not args.enable_cuda_graph, 'Hybrid context parallelism not supported with CUDA Graph'
        assert not args.use_megatron_fsdp, 'Hybrid context parallelism not supported with Megatron FSDP'
        assert args.dataloader_type == 'single', 'Hybrid context parallelism only supported with single dataloader type'
        assert args.calculate_per_token_loss, 'Hybrid context parallelism must be used with --calculate-per-token-loss'

    # disable async_tensor_model_parallel_allreduce when
    # model parallel memory optimization is enabled
    if (args.tensor_model_parallel_size > 1 or args.context_parallel_size > 1) \
        and get_device_arch_version() < 10:
        # CUDA_DEVICE_MAX_CONNECTIONS requirement no longer exists since the Blackwell architecture
        if args.use_torch_fsdp2 or args.use_megatron_fsdp:
            fsdp_impl = "Torch-FSDP2" if args.use_torch_fsdp2 else "Megatron-FSDP"
            warn_rank_0(
                f"Using tensor model parallelism or context parallelism with {fsdp_impl} together. "
                "Try not to using them together since they require different CUDA_MAX_CONNECTIONS "
                "settings for best performance. sequence parallelism requires setting the "
                f"environment variable CUDA_DEVICE_MAX_CONNECTIONS to 1 while {fsdp_impl} "
                "requires not setting CUDA_DEVICE_MAX_CONNECTIONS=1 for better parallelization.",
                args.rank,
            )
        elif args.overlap_moe_expert_parallel_comm:
            warn_rank_0(
                "For Hopper and before, try not to use tensor model parallelism or context parallelism with overlap_moe_expert_parallel_comm. "
                "Using tensor/context model parallelism requires setting the environment "
                "variable CUDA_DEVICE_MAX_CONNECTIONS to 1 to maximize the performance. "
                "While overlap_moe_expert_parallel_comm requires setting a larger CUDA_DEVICE_MAX_CONNECTIONS "
                "for better parallelization. If you want to use both, you can set CUDA_DEVICE_MAX_CONNECTIONS to 1 or 32, "
                "which depends on which parallelization you want to prioritize.",
                args.rank,
            )
        else:
            assert os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS') == "1", \
                "Using tensor model parallelism or context parallelism require setting the environment variable " \
                "CUDA_DEVICE_MAX_CONNECTIONS to 1"

    # Setting FSDP communication groups for high priority streams for Blackwell and later architectures
    # Assigning high priority to communication streams ensures that communication kernels are scheduled
    # with higher priority, minimizing the exposed communication when it is overlapped with other computation kernels.
    if args.use_torch_fsdp2 or args.use_megatron_fsdp and get_device_arch_version() >= 10:
        if 'dp_cp' not in args.high_priority_stream_groups:
            args.high_priority_stream_groups.append('dp_cp')
        if args.expert_model_parallel_size  > 1 and 'ep_dp' not in args.high_priority_stream_groups:
            args.high_priority_stream_groups.append('ep_dp')

    # Disable bias gelu fusion if we are disabling bias altogether
    if not args.add_bias_linear:
        args.bias_gelu_fusion = False

    # Keep the 'add bias' args in sync; add_qkv_bias is more targeted.
    if args.add_bias_linear:
        args.add_qkv_bias = True

    if args.qk_clip:
        assert is_te_min_version("2.9.0"), \
            '--qk-clip is only supported with TE >= 2.9.0.'
        assert 0.0 < args.qk_clip_alpha < 1.0, \
            '--qk-clip-alpha must be between 0.0 and 1.0 when using --qk-clip.'
        assert args.qk_clip_threshold > 0, \
            '--qk-clip-threshold must be greater than 0 when using --qk-clip.'

    # decoupled log max attention logit check
    if args.log_max_attention_logit:
        assert is_te_min_version("2.9.0"), \
            '--log-max-attention-logit is only supported with TE >= 2.9.0.'

    if args.decoupled_lr is not None or args.decoupled_min_lr is not None:
        assert not args.use_legacy_models, \
            '--decoupled-lr and --decoupled-min-lr is not supported in legacy models.'

    # Legacy RoPE arguments
    if args.use_rotary_position_embeddings:
        args.position_embedding_type = 'rope'
    if args.rotary_interleaved and args.use_legacy_models:
        raise RuntimeError('--rotary-interleaved is not supported in legacy models.')
    if args.position_embedding_type != 'rope':
        args.apply_rope_fusion = False

    # Would just need to add 'NoPE' as a position_embedding_type to support this, but for now
    # don't allow it to keep things simple
    if not args.add_position_embedding and args.position_embedding_type != 'rope':
        raise RuntimeError('--no-position-embedding is deprecated, use --position-embedding-type')

    # Relative position embeddings arguments
    if args.position_embedding_type == 'relative':
        assert (
            args.transformer_impl == "transformer_engine"
        ), 'Local transformer implementation currently does not support attention bias-based position embeddings.'

    # MultiModal rotary embeddings arguments
    if args.position_embedding_type == "mrope":
        assert args.mrope_section is not None, \
            '--mrope-section should be set when using --position-embedding-type mrope.'

    # MoE Spec check
    if args.num_experts == 0:
        args.num_experts = None
    if args.num_experts is not None and args.moe_ffn_hidden_size is None:
        args.moe_ffn_hidden_size = args.ffn_hidden_size
        warn_rank_0("moe_ffn_hidden_size is not set, using ffn_hidden_size for MoE instead.")

    # Context parallel
    if args.context_parallel_size > 1:
        assert not args.use_legacy_models, "Context parallelism is not supported in legacy models."

    # Expert parallelism check
    if args.expert_model_parallel_size  > 1:
        assert args.num_experts is not None, "num_experts must be non None to use expert model parallelism"
        assert args.num_experts % args.expert_model_parallel_size == 0, \
            "Number of experts should be a multiple of expert model parallel_size."

    # MoE router check
    if isinstance(args.moe_router_load_balancing_type, list) and len(args.moe_router_load_balancing_type) == 1:
        args.moe_router_load_balancing_type = args.moe_router_load_balancing_type[0]
    if isinstance(args.moe_aux_loss_coeff, list) and len(args.moe_aux_loss_coeff) == 1:
        args.moe_aux_loss_coeff = args.moe_aux_loss_coeff[0]

    # Distributed checkpointing checks
    if args.use_dist_ckpt and args.use_legacy_models:
        raise RuntimeError('--use-dist-ckpt is not supported in legacy models.')

    # torch_dcp (torch.distributed.checkpoint) checkpointing format checks.
    if args.ckpt_format == "torch_dcp":
        assert args.use_torch_fsdp2, "--ckpt-format torch_dcp is only tested with FSDP."
        assert args.tensor_model_parallel_size <= 1, \
            "--ckpt-format torch_dcp is not tested with megatron tensor parallelism."
        assert args.pipeline_model_parallel_size <= 1, \
            "--ckpt-format torch_dcp is not tested with megatron pipeline parallelism."

    # fsdp_dtensor checkpointing format checks.
    if args.ckpt_format == "fsdp_dtensor":
        assert args.use_megatron_fsdp, "--ckpt-format fsdp_dtensor is only tested with Megatron FSDP."

    # Data blend checks
    assert args.mock_data + \
           bool(args.data_path) + \
           any([args.train_data_path, args.valid_data_path, args.test_data_path]) \
           <= 1, "A single data source must be provided in training mode, else None"

    if args.fim_data:
        extra_tokens = [
            args.fim_prefix_token,
            args.fim_middle_token,
            args.fim_suffix_token,
            args.fim_pad_token,
            args.fim_eod_token,
        ]
        assert not args.mock_data, "Mock dataset is not supported with FIM dataset."
        assert args.fim_rate, "--fim-rate should be specified."
        assert args.fim_spm_rate, "--fim-spm-rate should be specified."
        assert all(token is not None for token in extra_tokens), "FIM extra tokens should be specified."

    # Deterministic mode
    if args.deterministic_mode:
        assert not args.use_flash_attn, "Flash attention can not be used in deterministic mode."
        assert not args.cross_entropy_loss_fusion, "Cross Entropy Fusion is currently not deterministic."

        all_reduce_choices = ["Tree", "Ring", "CollnetDirect", "CollnetChain", "^NVLS"]
        assert os.getenv("NCCL_ALGO", -1) != -1 and os.getenv("NCCL_ALGO") in all_reduce_choices, \
            f"NCCL_ALGO must be one of {all_reduce_choices}."

        torch.use_deterministic_algorithms(True)

    # Update the printed args to reflect that `apply_query_key_layer_scaling` also controls `attention_softmax_in_fp32`
    if args.apply_query_key_layer_scaling:
        args.attention_softmax_in_fp32 = True

    if args.result_rejected_tracker_filename is not None:
        # Append to passed-in args.iterations_to_skip.
        iterations_to_skip_from_file = RerunStateMachine.get_skipped_iterations_from_tracker_file(
            args.result_rejected_tracker_filename
        )
        args.iterations_to_skip.extend(iterations_to_skip_from_file)

    # Make sure all functionality that requires Gloo process groups is disabled.
    if not args.enable_gloo_process_groups:
        if args.use_distributed_optimizer:
            # If using distributed optimizer, must use distributed checkpointing.
            # Legacy checkpointing uses Gloo process groups to collect full distributed
            # optimizer state in the CPU memory of DP rank 0.
            assert args.use_dist_ckpt

            if args.dist_ckpt_optim_fully_reshardable:
                assert not args.distrib_optim_fully_reshardable_mem_efficient, \
                    '--distrib-optim-fully-reshardable-mem-efficient requires -enable-gloo-process-groups'

    if args.fake_process_group:
        assert args.moe_token_dispatcher_type != "flex", "Fake process group is not supported with flex token dispatcher."
        # Disable nan check for fake process group
        args.check_for_nan_in_loss_and_grad = False
        warn_rank_0('check_for_nan_in_loss_and_grad is set to False for fake process group.')
        # Disable gloo process groups for fake process group
        args.enable_gloo_process_groups = False
        warn_rank_0('enable_gloo_process_groups is set to False for fake process group.')

    # Checkpointing
    if args.ckpt_fully_parallel_save_deprecated and args.rank == 0:
        print('--ckpt-fully-parallel-save flag is deprecated and has no effect.'
              ' Use --no-ckpt-fully-parallel-save to disable parallel save.')
    if (
        args.use_dist_ckpt
        and not args.ckpt_fully_parallel_save
        and args.use_distributed_optimizer
        and args.rank == 0
    ):
        print('Warning: With non-parallel ckpt save and DistributedOptimizer,'
              ' it will be impossible to resume training with different parallelism.'
              ' Consider removing flag --no-ckpt-fully-parallel-save.')
    if args.use_dist_ckpt_deprecated and args.rank == 0:
        print('--use-dist-ckpt is deprecated and has no effect.'
              ' Use --ckpt-format to select the checkpoint format.')
    if args.dist_ckpt_format_deprecated and args.rank == 0:
        print('--dist-ckpt-format is deprecated and has no effect.'
              ' Use --ckpt-format to select the checkpoint format.')

    if args.use_dist_ckpt and args.ckpt_fully_parallel_load:
        if args.ckpt_fully_parallel_load_exchange_algo != "broadcast":
            warn_rank_0(
                "Currently only the 'broadcast' exchange algorithm is supported for fully parallel load. "
                "Other algorithms cannot guarantee numerical stability yet."
            )

    if args.load_main_params_from_ckpt:
        assert args.no_load_optim, '--load-main-params-from-ckpt must be used with --no-load-optim.'

    if args.use_dist_ckpt and args.async_save:
        if not args.use_persistent_ckpt_worker:
            warn_rank_0(
                '--async-save is not supported without --use-persistent-ckpt-worker. '
                'Disabling --async-save.'
            )
            args.async_save = False
        
    # Inference args
    if args.inference_batch_times_seqlen_threshold > -1:
        assert args.pipeline_model_parallel_size > 1, \
            "--inference-batch-times-seqlen-threshold requires setting --pipeline-model-parallel-size > 1."
        assert (
            args.cuda_graph_impl == "none"
        ), "Pipeline-parallel microbatched inference is incompatible with CUDA graphs"

    if args.inference_dynamic_batching:
        assert args.inference_dynamic_batching_buffer_size_gb is not None
        assert args.inference_dynamic_batching_block_size % 256 == 0, "block size should be a multiple of 256"

    if args.cuda_graph_impl == "local" and args.expert_model_parallel_size > 1 and args.transformer_impl != "inference_optimized":
       assert args.moe_pad_experts_for_cuda_graph_inference, \
        "--moe-pad-experts-for-cuda-graph-inference must be set when using CUDA graphs with expert parallelism"

    # MoE upcycling check
    if args.moe_use_upcycling:
        assert args.save is not None, "When using upcycling, the --save option must be specified."
        if not args.no_load_optim:
            args.no_load_optim = True
            warn_rank_0('enabling --no-load-optim for upcycling.')
        if not args.no_load_rng:
            args.no_load_rng = True
            warn_rank_0('enabling --no-load-rng for upcycling.')

    # --skip-train checks.
    # In RL inference-only mode, --no-load-optim is user-controlled: it determines whether the
    # optimizer is created (needed for --rl-offload-optimizer-during-inference) or skipped entirely.
    if args.skip_train and not args.perform_rl_step and not args.no_load_optim:
        args.no_load_optim = True
        warn_rank_0('enabling --no-load-optim when skipping training.')
    if args.skip_train and args.perform_rl_step and args.no_load_optim and args.rl_offload_optimizer_during_inference:
        assert False, \
            '--no-load-optim with --skip-train --perform-rl-step skips the optimizer; ' \
            '--rl-offload-optimizer-during-inference is incompatible (no optimizer to offload).'

    # Muon optimizer check
    if 'muon' in args.optimizer:

        if args.optimizer == 'muon':
            assert not args.overlap_grad_reduce, "Muon optimizer does not support overlap grad reduce. Use dist_muon instead."
            assert not args.overlap_param_gather, "Muon optimizer does not support overlap param gather. Use dist_muon instead."

        assert not args.use_distributed_optimizer, "Muon optimizer does not support distributed optimizer for now."
        assert not args.use_torch_fsdp2, "Muon optimizer does not support Torch-FSDP2 for now."
        assert not args.use_megatron_fsdp, "Muon optimizer does not support Megatron-FSDP for now."
        assert args.ckpt_format in ["torch", "torch_dist"], "Muon optimizer supports torch and torch_dist checkpoint format."

    # Optimizer CPU offload check
    if args.optimizer_cpu_offload:
        assert args.use_precision_aware_optimizer, (
            "The optimizer cpu offload must be used in conjunction with `--use-precision-aware-optimizer`, "
            "as the hybrid device optimizer reuses the code path of this flag."
        )
        assert not args.fp8_param_gather or args.fp8_recipe == "delayed", (
            "When `--fp8-param-gather` is enabled, the optimizer cpu offload "
            "must be used in conjunction with `--fp8-recipe delayed`."
        )

    if args.non_persistent_ckpt_type == "local":
        assert args.non_persistent_local_ckpt_dir is not None, "Tried to use local checkpointing without specifying --local-ckpt-dir!"
    if args.replication:
        assert args.replication_jump is not None, "--replication requires the value of --replication-jump!"
        assert args.non_persistent_ckpt_type == "local", f"--replication requires args.non_persistent_ckpt_type == 'local', but got: {args.non_persistent_ckpt_type}"
    elif args.replication_jump:
        warn_rank_0("--replication-jump was specified despite not using replication. Ignoring.")
        args.replication_jump = None

    if args.delay_wgrad_compute:
        assert args.transformer_impl == 'transformer_engine', \
            "Delaying wgrad compute is only supported with transformer_engine implementation"
        if args.overlap_grad_reduce:
            assert is_te_min_version("2.8.0"), (
                "overlap_grad_reduce is only supported with TE >= 2.8.0 when enabling delay_wgrad_compute"
            )
            wgrad_in_graph_scope = CudaGraphScope.attn in args.cuda_graph_scope or (
                CudaGraphScope.moe_router in args.cuda_graph_scope
                and args.moe_shared_expert_intermediate_size is not None
                and not args.moe_shared_expert_overlap
            )
            if wgrad_in_graph_scope:
                assert is_te_min_version(
                    "2.12.0"
                ), "CUDA graph with delay_wgrad_compute requires TE version >= 2.12.0."
                assert args.gradient_accumulation_fusion, (
                    'CUDA graph with delay_wgrad_compute requires gradient_accumulation_fusion '
                    'to be enabled. This is because the default gradient accumulation does not '
                    'use static memory addresses, which breaks CUDA graph requirements.'
                )
                if CudaGraphScope.attn in args.cuda_graph_scope:
                    assert (
                        not args.add_bias_linear and not args.add_qkv_bias
                    ), "CUDA graph with delay_wgrad_compute doesn't support attn bias for now."

        if not args.gradient_accumulation_fusion:
            assert is_te_min_version("2.7.0"), (
                "disabling gradient_accumulation_fusion is only supported with TE >= 2.7.0 "
                "when enabling delay_wgrad_compute"
            )

    if args.fine_grained_activation_offloading:
        assert args.transformer_impl == 'transformer_engine', \
            "Fine-grained activation offloading is only supported with transformer_engine implementation"
        if is_te_min_version("2.10.0"):
            assert os.getenv("NVTE_CPU_OFFLOAD_V1", "0") == "1", \
                "For fine-grained activation offloading with TE >= 2.10.0, NVTE_CPU_OFFLOAD_V1 should be set to 1 to avoid offloading weights."

    if args.mtp_num_layers:
        assert not args.use_legacy_models, "The legacy Megatron models does not support Multi-Token Prediction (MTP)."
        # MTP is compatible with position embedding types that use position_ids.
        supported_position_types = ["learned_absolute", "rope", "mrope", "none"]
        assert args.position_embedding_type in supported_position_types, (
            f"Multi-Token Prediction (MTP) is not supported with '{args.position_embedding_type}' position embedding type. "
            f"The supported position embedding types are: {', '.join(supported_position_types)}."
        )

    if args.cpu_offloading_num_layers > 0:
        args.cpu_offloading = True

    # CUDA Graphs
    if args.cuda_graph_impl != "none":
        if (
            "transformer_engine" in (args.transformer_impl, args.cuda_graph_impl)
            and not args.te_rng_tracker
        ):
            args.te_rng_tracker = True
            warn_rank_0("te_rng_tracker is not enabled, enabling it for CUDA graphs.", args.rank)
        if args.cuda_graph_impl == "transformer_engine":
            assert (
                "expandable_segments:True" not in os.getenv("PYTORCH_CUDA_ALLOC_CONF", "")
                or os.getenv("NCCL_GRAPH_REGISTER", "") == "0"
            ), (
                "Setting NCCL_GRAPH_REGISTER=0 to avoid illegal memory access when using "
                "CUDA Graph with PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True."
            )
    if args.cuda_graph_scope == "full" or (
        isinstance(args.cuda_graph_scope, list) and "full" in args.cuda_graph_scope
    ):
        if isinstance(args.cuda_graph_scope, list):
            assert args.cuda_graph_scope == ["full"], "full scope cannot be used with other scopes."
        args.cuda_graph_scope = []
        warn_rank_0(
            'full scope is deprecated. Use empty cuda_graph_scope to capture the whole layer.'
        )
    
    if args.multi_latent_attention:
        assert not args.group_query_attention, "Group query attention is mutually exclusive with multi latent attention."
        
    if args.mla_down_proj_fusion:
        assert args.multi_latent_attention, "--mla-down-proj-fusion requires --multi-latent-attention"

    # MoE latent projections
    if args.moe_latent_size is not None:
        assert args.moe_latent_size > 0, "MoE latent projection dimension has to be greater than zero."
        assert args.num_experts is not None, "MoE latent projections are applicable only for MoE models."
        assert not args.use_legacy_models, "MoE latent projections are only supported for mcore models."

    if args.tiktoken_special_tokens and not args.tokenizer_special_tokens:
        warn_rank_0(
            "--tiktoken-special-tokens argument is deprecated and will be removed soon. "
            "Use --tokenizer-special-tokens instead."
        )
        args.tokenizer_special_tokens = args.tiktoken_special_tokens
    
    if args.tokenizer_hf_use_fast:
        warn_rank_0(
            "--tokenizer-hf-use-fast argument is deprecated and will be removed soon. "
            "`use_fast` is set to True by default for HF tokenizers."
            "Use --tokenizer-hf-no-use-fast if you want to disable `use_fast`."
        )

    if args.tokenizer_hf_include_special_tokens:
        warn_rank_0(
            "--tokenizer-hf-include-special-tokens argument is deprecated and will be removed soon. "
            "`include_special_tokens` is set to True by default for HF tokenizers."
            "Use --tokenizer-hf-no-include-special-tokens if you want to disable `include_special_tokens`."
        )

    # Print arguments.
    _print_args("arguments", args)

    return args


def _print_args(title, args):
    """Print arguments."""
    from megatron.training.utils import is_rank0
    if is_rank0():
        print(f'------------------------ {title} ------------------------', flush=True)
        str_list = []
        for arg in vars(args):
            dots = '.' * (48 - len(arg))
            str_list.append('  {} {} {}'.format(arg, dots, getattr(args, arg)))
        for arg in sorted(str_list, key=lambda x: x.lower()):
            print(arg, flush=True)
        print(f'-------------------- end of {title} ---------------------', flush=True)


def _check_arg_is_not_none(args, arg):
    assert getattr(args, arg) is not None, '{} argument is None'.format(arg)


def core_transformer_config_from_args(args, config_class=None):

    # Config class.
    config_class = config_class or TransformerConfig

    if args.multi_latent_attention:
        config_class = MLATransformerConfig

    if args.heterogeneous_layers_config_path is not None:
        assert not args.multi_latent_attention, "Multi latent attention with heterogeneous layers is not supported."
        config_class = HeterogeneousTransformerConfig

    # Translate args to core transformer configuration
    kw_args = {}
    for f in dataclasses.fields(config_class):
        if hasattr(args, f.name):
            kw_args[f.name] = getattr(args, f.name)
    kw_args['persist_layer_norm'] = not args.no_persist_layer_norm
    kw_args['deallocate_pipeline_outputs'] = True
    kw_args['pipeline_dtype'] = args.params_dtype
    kw_args['batch_p2p_comm'] = not args.overlap_p2p_comm
    kw_args['num_moe_experts'] = args.num_experts
    kw_args['rotary_interleaved'] = args.rotary_interleaved
    kw_args['num_layers_in_first_pipeline_stage']= args.decoder_first_pipeline_num_layers
    kw_args['num_layers_in_last_pipeline_stage']= args.decoder_last_pipeline_num_layers
    kw_args['fp8_param'] = args.fp8_param_gather
    if args.swiglu:
        kw_args['activation_func'] = F.silu
        kw_args['gated_linear_unit'] = True
        kw_args['bias_activation_fusion'] = args.bias_swiglu_fusion
    else:
        kw_args['bias_activation_fusion'] = args.bias_gelu_fusion
    if args.squared_relu:
        assert not args.swiglu
        kw_args['activation_func'] = squared_relu
    elif args.quick_geglu:
        assert not args.swiglu
        kw_args['gated_linear_unit'] = True
        kw_args['activation_func'] = quick_gelu
    if args.init_method_xavier_uniform:
        kw_args['init_method'] = torch.nn.init.xavier_uniform_
        kw_args['scaled_init_method'] = torch.nn.init.xavier_uniform_
    if args.group_query_attention:
        kw_args['num_query_groups'] = args.num_query_groups
    else:
        kw_args['num_query_groups'] = None
    kw_args['config_logger_dir'] = args.config_logger_dir
    if args.rope_type is None:
        # Pop 'rope_type' to let the config class use the default value.
        kw_args.pop('rope_type', None)
    else:
        assert (args.multi_latent_attention or args.rope_type == 'rope'), (
            f'Common attention only support rope_type="rope", but got {args.rope_type}.'
        )

    if len(args.cp_comm_type) == 1:
        kw_args['cp_comm_type'] = args.cp_comm_type[0]
    if args.hybrid_layer_pattern is not None:
        kw_args['is_hybrid_model'] = True

    kw_args['inference_sampling_seed'] = args.seed

    # handle quantization config
    # NOTE: Kitchen arguments are only added to the namespace when
    # Kitchen library is available.
    if hasattr(args, "kitchen_config_file") and args.kitchen_config_file is not None:
        kw_args['use_kitchen'] = True
        kw_args['quant_recipe'] = load_quantization_recipe(args.kitchen_config_file)
    elif hasattr(args, 'kitchen_recipe_number') and args.kitchen_recipe_number is not None:
        kw_args['use_kitchen'] = True
        kw_args['quant_recipe'] = kitchen_quantization_recipe_config(args.kitchen_recipe_number)

    kw_args['moe_latent_size'] = args.moe_latent_size

    if args.te_precision_config_file:
        assert not 'quant_recipe' in kw_args, "Quantization recipe already configured."
        # TODO(kwyss): Prohibit fp8_params or fp4_params with this flexibility
        kw_args['quant_recipe'] = load_quantization_recipe(args.te_precision_config_file)

    if hasattr(args, "use_kitchen_attention"):
        kw_args['use_kitchen_attention'] = args.use_kitchen_attention
    if hasattr(args, "kitchen_attention_backend"):
        kw_args['kitchen_attention_backend'] = args.kitchen_attention_backend

    # Return config.
    return config_class(**kw_args)


def _add_transformer_engine_args(parser):
    group = parser.add_argument_group(title='Transformer-Engine')

    # delayed scaling only configs
    group.add_argument('--fp8-param-gather', action='store_true',
                       help='Keep the compute param in fp8 (do not use any other intermediate '
                            'dtype) and perform the param all-gather in fp8.')

    # FP4 related arguments
    group.add_argument('--te-precision-config-file', default=None,
                       help='Configuration file to select per-module precision overrides. '
                       'See TransformerEngineMixedPrecision.md')
    return parser

def _add_inference_args(parser):
    group = parser.add_argument_group(title='inference')

    group.add_argument('--inference-batch-times-seqlen-threshold',
                       type=int, default=-1,
                       help='If (batch-size * sequence-length) is smaller than this threshold'
                       'then batches will not be split up for pipelining.'
                       'Requires setting --pipeline-model-parallel-size > 1.'
                       'Setting this to -1 indicates that batch pipelining is not used.')
    group.add_argument('--max-tokens-to-oom',
                       type=int, default=12000,
                       help='Maximum number of tokens during inference'
                       'tokens here is # in prompt + # to generate'
                       'Allows us to throw an error before OOM crashes server')
    group.add_argument('--output-bert-embeddings', action='store_true',
                       help='Output Bert embeddings (via mean pooling) from '
                       'model, rather than its binary head output or entire '
                       'hidden batch.')
    group.add_argument('--bert-embedder-type', default="megatron",
                       choices=["megatron", "huggingface"],
                       help='Select either Megatron or Huggingface as the '
                       'Bert embedder.')
    group.add_argument('--cuda-graph-scope', nargs='+', type=lambda scope: CudaGraphScope[scope] if scope != "full" else scope, default=[],
                       help='Determines the CUDA graphs capturing scope. '
                       'choices: "attn", "mlp", "moe", "moe_router", "moe_preprocess", "mamba", "full_iteration". '
                       '"attn": captures operations in TransformerLayer._forward_attention(). '
                       '"mlp": captures operations in TransformerLayer._forward_mlp() for a dense layer. '
                       '"moe": captures operations in TransformerLayer._forward_mlp() for a MoE layer. '
                       '"moe_router": captures operations in TransformerLayer._forward_mlp() up to MoELayer.router(), '
                       'including the shared experts if they are not overlapped with EP comm. '
                       '"moe_preprocess": captures operations in MoELayer.preprocess(). Must be used together with "moe_router". '
                       '"mamba": captures the mamba layer. '
                       '"full_iteration": captures a whole training iteration. '
                       '"full_iteration_inference": captures a whole inference iteration. '
                       'full_iteration and full_iteration_inference scopes are only supported with --cuda-graph-impl=local, other scopes are only supported with --cuda-graph-impl=transformer_engine. '
                       'If not specified, the default scope is to capture the whole Transformer layer. '
                       'For backward compatibility, we still allow passing "full" to specify capturing the whole layer, and convert it to an empty list.')
    group.add_argument('--use-legacy-static-engine', action='store_true', default=False,
                       help='Use legacy static engine. (Current static engine uses dynamic engine under the hood)',
                       dest='use_legacy_static_engine')
    group.add_argument('--inference-max-requests', type=int, default=8,
                       help='Maximum number of requests for inference.',
                       dest='inference_max_requests')
    group.add_argument('--inference-max-seq-length', type=int, default=2560,
                       help='Maximum sequence length expected for inference (prefill + decode).',
                       dest='inference_max_seq_length')
    group.add_argument('--inference-dynamic-batching',
                       action='store_true', default=False,
                       help='Enable dynamic batching mode.')
    group.add_argument('--inference-dynamic-batching-buffer-size-gb',
                       type=float, default=40.,
                       help='Amount of on-GPU memory allocated for the KV cache. '
                       'The total amount of memory allocated for the KV cache '
                       '(CPU + GPU memory) depends on the value set for the '
                       'unified virtual memory (UVM) level (via '
                       '`--inference-dynamic-batching-unified-memory-level`).'
                       'If the UVM level is 0, then only GPU memory is used and '
                       'the total memory equals `buffer_size_gb`. If the UVM '
                       'level is 1, then additional memory is utilized on the '
                       'CPU and the total memory equals `buffer_size_gb + '
                       'paused_buffer_size_gb`.')
    group.add_argument('--inference-dynamic-batching-paused-buffer-size-gb',
                       type=float, default=None,
                       help='Amount of memory reserved for paused requests in '
                       'the dynamic inference context. Active requests are '
                       'paused when there are not enough active blocks available '
                       'to continue generating a request.')
    group.add_argument('--inference-dynamic-batching-mamba-memory-ratio', type=float, default=None,
                       help='Percentage of memory buffer to allocate for Mamba states. '
                       'If not specified, allocates Mamba state tensors for each KV cache block. '
                       'Only used for hybrid models.')
    group.add_argument('--inference-dynamic-batching-block-size',
                       type=int, default=256,
                       help='KV cache block size. '
                       'It should be a multiple of 256')
    group.add_argument('--inference-dynamic-batching-max-requests',
                       type=int, default=None,
                       help='Override the inference context\'s `max_requests`. '
                       'By default, `max_requests` is set to the number of '
                       'blocks in the context\'s memory buffer.')
    group.add_argument('--inference-dynamic-batching-max-tokens',
                       type=int, default=None,
                       help='Override the inference context\'s default `max_tokens`.')
    group.add_argument('--inference-dynamic-batching-num-cuda-graphs',
                       type=int, default=16,
                       help='Maximum number of cuda graphs to capture, where the '
                       'cuda graph batch sizes range from 1 to `max_requests`. '
                       '(See `dynamic_context.py` for details on how '
                       '`max_requests` is computed). Due to rounding, the actual '
                       'number of cuda graphs may not equal this argument.'
                       'The user can also pass -1, in which case we automatically determine the number of graphs ' \
                       'to capture based on the `max_requests`.')
    group.add_argument('--inference-dynamic-batching-track-paused-request-events',
                       action='store_true',
                       help='Track paused request ids by adding \'paused\' events '
                       'to each request\'s event history. This has a very minor '
                       'impact on latency.')
    group.add_argument('--inference-dynamic-batching-track-generated-token-events',
                       action='store_true',
                       help='Track per-token events with timestamps for each generated token. '
                       'When enabled, each generated token creates a GENERATED_TOKEN event '
                       'with a timestamp, useful for per-token latency analysis.')
    group.add_argument('--decode-only-cuda-graphs',
                       action='store_true', default=False,
                       help='Only use cuda graphs for decode-only steps, not prefill and mixed steps.')
    group.add_argument('--inference-dynamic-batching-unified-memory-level',
                       type=int, default=0, choices=[0, 1],
                       help='Set unified memory usage within the dynamic '
                       'inference context. The levels are: 0) no unified memory, '
                       '1) allocate `memory_buffer` in unified memory. '
                       'Eventually, additional levels will be included to '
                       'control other tensors within the context.')
    group.add_argument('--enable-chunked-prefill', dest='enable_chunked_prefill',
                       action='store_true', default=False,
                       help="Enable chunked prefill (disabled by default)")
    group.add_argument('--num-speculative-tokens', type=int, default=0,
                       help='Number of speculative tokens generated during decode')
    group.add_argument('--inference-dynamic-batching-prefix-caching',
                       dest='inference_dynamic_batching_enable_prefix_caching',
                       action=argparse.BooleanOptionalAction,
                       default=False,
                       help='Enable/disable prefix caching for dynamic batching inference. '
                       'When disabled, KV cache blocks cannot be shared between '
                       'requests with identical prompt prefixes.')
    group.add_argument('--inference-dynamic-batching-prefix-caching-eviction-policy',
                       type=str, default='ref_zero',
                       choices=['ref_zero', 'lru'],
                       dest='inference_dynamic_batching_prefix_caching_eviction_policy',
                       help='Eviction policy for prefix caching blocks. '
                       '"ref_zero" (default) immediately returns blocks to the '
                       'free pool when ref_count hits 0. "lru" keeps blocks '
                       'cached and evicts via LRU only when space is needed.')
    group.add_argument('--inference-dynamic-batching-prefix-caching-coordinator-policy',
                       type=str, default='first_prefix_block',
                       choices=['longest_prefix', 'first_prefix_block', 'round_robin'],
                       dest='inference_dynamic_batching_prefix_caching_coordinator_policy',
                       help='Coordinator routing policy for prefix caching. '
                       '"first_prefix_block" (default) routes based on the first '
                       'block hash only. "longest_prefix" routes to the rank with '
                       'the longest matching prefix. "round_robin" ignores prefix '
                       'affinity and cycles through ranks.')
    group.add_argument('--inference-dynamic-batching-prefix-caching-mamba-gb',
                       type=float, default=None,
                       dest='inference_dynamic_batching_prefix_caching_mamba_gb',
                       help='GPU memory budget (in GB) for the Mamba state cache '
                       'used by prefix caching on hybrid models. When set, Mamba '
                       'states at block boundaries are cached for reuse.')
    group.add_argument('--inference-dynamic-batching-mamba-triton-conv1d',
                       action='store_true', default=False,
                       dest='inference_dynamic_batching_mamba_triton_conv1d',
                       help='Use Triton varlen conv1d kernel for Mamba prefill '
                       'instead of per-request causal_conv1d_fn calls.')
    group.add_argument('--inference-dynamic-batching-cuda-graph-max-tokens',
                       type=int, default=16384,
                       help='Maximum number of tokens to capture in a cuda graph.')
    group.add_argument('--inference-dynamic-batching-cuda-graph-mixed-prefill-count',
                       type=int, default=16,
                       help='Number of mixed prefill requests to capture in a cuda graph.')
    group.add_argument('--inference-logging-step-interval', type=int, default=0,
                       help='Step interval for logging inference metrics. '
                            'Default to 0 to disable inference logging.')
    group.add_argument('--inference-text-gen-server-logging', action=argparse.BooleanOptionalAction,
                       required=False, default=False,
                       help='Enable per-request logging in the inference text generation server.')
    group.add_argument('--inference-wandb-logging', action=argparse.BooleanOptionalAction,
                       required=False, default=False, help='Enable inference wandb logging.')
    group.add_argument("--inference-coordinator-port", type=int,
                       help="This port will be used to setup the inference coordinator on node-0")
    group.add_argument('--mamba-inference-conv-states-dtype', type=str,
                       choices=['bf16', 'fp16', 'fp32'], default='bf16',
                       help='Dtype for the Mamba inference conv states tensor')
    group.add_argument('--mamba-inference-ssm-states-dtype', type=str,
                       choices=['bf16', 'fp16', 'fp32'], default='bf16',
                       help='Dtype for the Mamba inference SSM states tensor')
    group.add_argument('--inference-use-synchronous-zmq-collectives', action=argparse.BooleanOptionalAction,
                       required=False, default=False, help='Use synchronous ZMQ collectives for inference. Helps in reducing performance variability for MoEs.')
    return parser


def _add_network_size_args(parser):
    exclude = [
        # cannot provide callables over CLI
        "timers",
        "finalize_model_grads_func",
        "grad_scale_func",
        "no_sync_func",
        "grad_sync_func",
        "param_sync_func",
        "_cpu_offloading_context",
        "init_method",
        "output_layer_init_method",
        "embedding_init_method",
        "activation_func",
        # types affect docstring
        "pipeline_model_parallel_layout",
        "window_size",
        "window_attn_skip_freq",
        "no_rope_freq",
        "moe_layer_freq",
        "linear_attention_freq",
        "moe_router_load_balancing_type",
        "moe_aux_loss_coeff",
        "cp_comm_type",
        "cuda_graph_scope",
        # no CLI argument exists for these
        "virtual_pipeline_model_parallel_size",
        "params_dtype",
        "enable_autocast",
        "autocast_dtype",
        "num_microbatches_with_partial_activation_checkpoints",
        "tp_comm_overlap_disable_qkv",
        "tp_comm_overlap_disable_fc1",
        "pipeline_dtype",
        "variable_seq_lengths",
        "batch_p2p_comm",
        "batch_p2p_sync",
        "deallocate_pipeline_outputs",
        "cpu_offloading",
        "cpu_offloading_activations",
        "cpu_offloading_weights",
        "cpu_offloading_double_buffering",
        "num_layers_in_first_pipeline_stage",
        "num_layers_in_last_pipeline_stage",
        "softmax_scale",
        "gated_linear_unit",
        "bias_activation_fusion",
        "activation_func_fp8_input_store",
        "test_mode",
        "memory_efficient_layer_norm",
        "fused_single_qkv_rope",
        "fp8_dot_product_attention",
        "fp8_multi_head_attention",
        "tp_only_amax_red",
        "use_kitchen",
        "moe_token_dropping",
        "cuda_graph_use_single_mempool",
        "cuda_graph_retain_backward_graph",
        "disable_parameter_transpose_cache",
        "inference_sampling_seed",
        "use_inference_optimized_layers",
        "heterogeneous_block_specs",
        "hetereogenous_dist_checkpoint",
        "quant_recipe",
        # deprecated and no CLI arg exists
        "tp_comm_atomic_ag",
        "tp_comm_atomic_rs",
        "moe_router_topk_limited_devices",
        # already generated by another config
        "inference_rng_tracker",
        "use_te_rng_tracker",
        "log_max_attention_logit",
        "barrier_with_L1_time",
        # args uses same var with a different name
        "num_moe_experts",
        "fp8_param",
        # incompatible defaults in dataclass
        "gradient_accumulation_fusion",
        "overlap_p2p_comm",
        "attention_softmax_in_fp32",
        "masked_softmax_fusion",
        "persist_layer_norm",
        "bias_dropout_fusion",
        "apply_rope_fusion",
    ]
    transformer_factory = ArgumentGroupFactory(TransformerConfig, exclude=exclude)
    transformer_group = transformer_factory.build_group(parser, "transformer configuration")

    group = parser.add_argument_group(title='network size')

    group.add_argument('--encoder-num-layers', type=int, default=None,
                       help='Number of encoder transformer layers.')
    group.add_argument('--decoder-num-layers', type=int, default=None,
                       help='Number of decoder transformer layers.')
    group.add_argument('--group-query-attention', action='store_true',
                          help='Use group-query attention.')
    group.add_argument('--window-size', type=tuple_type, default=None,
                       help='Window size for window attention. If not provided, '
                            'window attention will be disabled.')
    group.add_argument('--window-attn-skip-freq', type=moe_freq_type, default=None,
                       help='Frequency of layers to skip window attention. Accepts either: '
                            '- An integer N: Represents a (N-1):1 ratio, meaning one full attention layer '
                            'after (N-1) SWA layers. '
                            '- A string containing a Python list expression that defines a custom pattern, '
                            'e.g.: "[1,1,1,0]*3" evaluates to [1,1,1,0,1,1,1,0,1,1,1,0] '
                            'where 1 indicates SWA and 0 indicates full attention. ')
    group.add_argument('--max-position-embeddings', type=int, default=None,
                       help='Maximum number of position embeddings to use. '
                       'This is the size of position embedding.')
    group.add_argument('--position-embedding-type', type=str, default='learned_absolute',
                        choices=['learned_absolute', 'rope', 'mrope', 'relative', 'none'],
                        help='Position embedding type.')
    group.add_argument('--relative-attention-num-buckets', type=int, default=32,
                        help='Number of buckets for relative position embeddings.')
    group.add_argument('--relative-attention-max-distance', type=int, default=128,
                        help='Maximum distance for relative position embeddings calculation.')
    group.add_argument('--use-rotary-position-embeddings', action='store_true',
                       help='Use rotary positional embeddings or not. '
                       'Deprecated: use --position-embedding-type')
    group.add_argument('--rotary-base', type=int, default=10000,
                       help='Base to use for rotary positional embeddings, default 10000')
    group.add_argument('--rotary-percent', type=float, default=1.0,
                       help='Percent of rotary dimension to use, default 100%%')
    group.add_argument('--rotary-seq-len-interpolation-factor', type=int, default=None,
                       help='Sequence length interpolation factor for rotary embeddings.')
    group.add_argument('--use-rope-scaling', action='store_true',
                       help='Apply rope scaling as used in llama3.x')
    group.add_argument('--rope-scaling-factor', type=float, default=8.0,
                       help='Rope scaling factor in llama3.x models')
    group.add_argument('--no-rope-freq', type=no_rope_freq_type, default=None,
                       help='Controls which layers to skip performing Rotary Position Embedding. Accepts either: '
                            '- An integer N: Represents a 1:N ratio, meaning RoPE is skipped every N-1 layers. '
                            '- A string containing a Python list expression that defines a custom pattern, e.g.: '
                            '"([0]*3+[1]*1)*3" evaluates to [0,0,0,1,0,0,0,1,0,0,0,1] '
                            'where 1 indicates no-rope layer. This patten is equivalent to --no-rope-freq=4.'
                            'By default this is disabled and set to None, indicating RoPE will be performed'
                            'on every layer.'
                       )
    group.add_argument('--no-position-embedding',
                       action='store_false',
                       help='Disable position embedding. Deprecated: use --position-embedding-type',
                       dest='add_position_embedding')
    group.add_argument('--make-vocab-size-divisible-by', type=int, default=128,
                       help='Pad the vocab size to be divisible by this value.'
                       'This is added for computational efficieny reasons.')
    group.add_argument('--openai-gelu', action='store_true',
                       help='Use OpenAIs GeLU implementation. This option'
                       'should not be used unless for backward compatibility'
                       'reasons.')
    group.add_argument('--squared-relu', action='store_true',
                       help='Use squared relu activation instead of default gelu')
    group.add_argument('--swiglu', action='store_true',
                       help='Use gated linear units and SiLU activation instead of default gelu')
    group.add_argument('--quick-geglu', action='store_true',
                       help='Use quick geglu activation instead of default gelu')
    group.add_argument('--onnx-safe', type=bool, required=False,
                       help='Use workarounds for known problems with '
                       'Torch ONNX exporter')
    group.add_argument('--bert-no-binary-head', action='store_false',
                       help='Disable BERT binary head.',
                       dest='bert_binary_head')
    group.add_argument('--untie-embeddings-and-output-weights', action='store_true',
                       help='Untie embeddings and output weights.')
    return parser

def _add_straggler_detector_args(parser):
    from megatron.training.config import StragglerDetectionConfig

    straggler_factory = ArgumentGroupFactory(StragglerDetectionConfig)
    group = straggler_factory.build_group(parser, "straggler")

    return parser

def _add_workload_inspector_server_args(parser):
    group = parser.add_argument_group(title='workload inspector')
    group.add_argument('--run-workload-inspector-server', action='store_true',
                       help='If set, enables workload inspector server for on-demand profiling.')
    return parser

def _add_inprocess_restart_args(parser):
    group = parser.add_argument_group(title='In-process restart')

    group.add_argument('--inprocess-restart', action='store_true',
                       help='Enables in-process restart.')

    group.add_argument('--inprocess-max-iterations', default=None, type=int,
                       help='Maximum number of in-process restart iterations.')
    group.add_argument('--inprocess-monitor-thread-interval', default=1.0, type=float,
                       help='Monitoring interval (in seconds) for the monitoring thread.')
    group.add_argument('--inprocess-monitor-process-interval', default=1.0, type=float,
                       help='Monitoring interval (in seconds) for the monitoring process.')
    group.add_argument('--inprocess-progress-watchdog-interval', default=1.0, type=float,
                       help='Interval (in seconds) for automatic progress watchdog timestamp '
                       'updates.')
    group.add_argument('--inprocess-heartbeat-interval', default=30, type=float,
                       help='Monitoring interval (in seconds) for detecting unresponsive ranks.')

    group.add_argument('--inprocess-soft-timeout', default=60, type=float,
                       help='Soft progress timeout (in seconds).')
    group.add_argument('--inprocess-hard-timeout', default=90, type=float,
                       help='Hard progress timeout (in seconds).')
    group.add_argument('--inprocess-heartbeat-timeout', default=60, type=float,
                       help='Timeout (in seconds) for a missing rank detection heartbeat.')

    group.add_argument('--inprocess-barrier-timeout', default=120, type=float,
                       help='Timeout (in seconds) for internal distributed barrier')
    group.add_argument('--inprocess-completion-timeout', default=120, type=float,
                       help='Timeout (in seconds) for barrier on completion on all ranks')

    group.add_argument('--inprocess-last-call-wait', default=1, type=float,
                       help='Time interval (in seconds) for other ranks to report concurrent '
                       'terminal failures.')
    group.add_argument('--inprocess-termination-grace-time', default=1, type=float,
                       help='Interval (in seconds) between SIGTERM and SIGKILL issued on hard '
                       'timeout')

    group.add_argument('--inprocess-granularity', default='node', type=str,
                       choices=['node', 'rank'],
                       help='Granularity for in-process restart.')
    group.add_argument('--inprocess-active-world-size',
                       default=int(os.getenv('WORLD_SIZE', '1')), type=int,
                       help='The number of ranks initially executing the workload. '
                       'The remaining ranks from the allocation are set aside '
                       'as warm reserve.')
    group.add_argument('--inprocess-empty-cuda-cache', action='store_true',
                       help='Release all unoccupied cached GPU memory on every in-process restart.')
    return parser

def _add_one_logger_args(parser):
    group = parser.add_argument_group(title='one logger')
    group.add_argument('--no-one-logger', action='store_false',
                       help='If set, disable using one_logger to track E2E metrics'
                       'Note that one_logger is an internal tool and not '
                       'available externally. For installation, please go to '
                       'https://confluence.nvidia.com/display/MLWFO/Package+Repositories'
                       'for more details',
                       dest='enable_one_logger')
    group.add_argument('--one-logger-project', type=str, default='megatron-lm',
                       help='The one-logger project name. Will ignore if '
                       '--no-one-logger is set')
    group.add_argument('--one-logger-run-name', type=str, default=None,
                       help='The one-logger run name displayed. Will ignore if '
                       '--no-one-logger is set')
    group.add_argument('--one-logger-async', action='store_true',
                       help='If set, forces one_logger to use async mode.')
    group.add_argument('--app-tag-run-name', type=str, default=None,
                       help='Jobs belonging to same training run, suppose to '
                       'have the same name. It will be used to track progress of '
                       'a training done over multiple different jobs')
    group.add_argument('--app-tag-run-version', type=str, default='0.0.0',
                       help='The version of the training of which current job is '
                       'part of. It will be used to track the changes in the '
                       'application side which might change the performance '
                       'baseline')
    return parser


def _add_ft_package_args(parser):
    group = parser.add_argument_group(title='ft_package')
    group.add_argument('--enable-ft-package', action='store_true',
                       help='If set, Fault Tolerance package is enabled. '
                       'Note: This feature is for Nvidia internal use only.')
    group.add_argument('--calc-ft-timeouts', action='store_true',
                       help='If set, FT package will try to automatically compute the timeouts. '
                       'Note: This feature is for Nvidia internal use only.')
    group.add_argument('--ft-num-warmup-iters', type=int, default=5,
                       help='Number of warmup iterations before monitoring step section and '
                       'out-of-section timeouts. The first N iterations are excluded from '
                       'timeout monitoring as they can be significantly slower than steady-state. '
                       'Default: 5. Note: This feature is for Nvidia internal use only.')
    return parser


def _add_logging_args(parser):
    from megatron.training.config import LoggerConfig

    log_factory = ArgumentGroupFactory(LoggerConfig, exclude = ["log_throughput_to_tensorboard", "throughput_window_size", "memory_keys", "log_l2_norm_grad_to_tensorboard", "log_runtime_to_tensorboard", "runtime_time_unit", "filter_warnings", "modules_to_filter", "set_level_for_all_loggers", "save_config_filepath"])
    group = log_factory.build_group(parser, title="logging")

    return parser


def _add_regularization_args(parser):
    group = parser.add_argument_group(title='regularization')

    group.add_argument('--weight-decay', type=float, default=0.01,
                       help='Weight decay coefficient for L2 regularization.')
    group.add_argument('--apply-wd-to-qk-layernorm', action='store_true',
                       help='Apply weight decay to qk layernorm as a special case.')
    group.add_argument('--clip-grad', type=float, default=1.0,
                       help='Gradient clipping based on global L2 norm.')
    group.add_argument('--adam-beta1', type=float, default=0.9,
                       help='First coefficient for computing running averages '
                       'of gradient and its square')
    group.add_argument('--adam-beta2', type=float, default=0.999,
                       help='Second coefficient for computing running averages '
                       'of gradient and its square')
    group.add_argument('--adam-eps', type=float, default=1e-08,
                       help='Term added to the denominator to improve'
                       'numerical stability')
    group.add_argument('--sgd-momentum', type=float, default=0.9,
                       help='Momentum factor for sgd')
    group.add_argument('--muon-momentum', type=float, default=0.9,
                       help='Momentum factor for Muon optimizer')
    group.add_argument('--muon-no-split-qkv', action='store_false', default=True,
                       dest='muon_split_qkv',
                       help='Whether to split QKV parameters for Muon optimizer')
    group.add_argument('--muon-use-nesterov', action='store_true',
                       help='Whether to use Nesterov-style momentum in the internal SGD')
    group.add_argument('--muon-scale-mode', type=str, default='spectral',
                       choices=['spectral', 'unit_rms_norm', 'shape_scaling'],
                       help='Scale mode for Muon optimizer')
    group.add_argument('--muon-fp32-matmul-prec', type=str, default='medium',
                       choices=['low', 'medium', 'high'],
                       help='FP32 matmul precision for Newton-Schulz iteration')
    group.add_argument('--muon-num-ns-steps', type=int, default=5,
                       help='Number of Newton-Schulz steps for Muon optimizer')
    group.add_argument('--muon-tp-mode', type=str, default='blockwise',
                       choices=['blockwise', 'duplicated', 'distributed'],
                       help='How to perform NS calculation for tensor model parallel weights')
    group.add_argument('--muon-extra-scale-factor', type=float, default=1.0,
                       help='Additional scale factor for the muon update')
    group.add_argument('--muon-scalar-optimizer', type=str, default='adam',
                       choices=['adam', 'lion'],
                       help='Optimizer for scalar parameters (embeddings, biases, norms) '
                       'when using muon. Defaults to adam.')
    group.add_argument('--lion-beta1', type=float, default=0.95,
                       help='First beta coefficient for Lion optimizer '
                       '(used in sign update). Default: 0.95.')
    group.add_argument('--lion-beta2', type=float, default=0.98,
                       help='Second beta coefficient for Lion optimizer '
                       '(used in momentum EMA update). Default: 0.98.')

    group.add_argument('--no-weight-decay-cond-type', type=str, choices=['apply_wd_to_qk_layernorm'],
                       help='Type of no weight decay condition. Choices: '
                       'None (default): apply weight decay to 1D weights and biases.'
                       '"apply_wd_to_qk_layernorm": additionally apply weight decay to '
                       'qk layernorm as a special case.'
                       'DEPRECATED. Please use --apply-wd-to-qk-layernorm instead. ')
    return parser


def _add_rl_args(parser):
    group = parser.add_argument_group(title='rl')
    group.add_argument('--perform-rl-step', action='store_true',
                       help="Use the RL training step.")
    group.add_argument('--rl-prompts-per-eval', type=int, default=32,
                       help='Number of prompts to evaluate for for each RL task.'
                        'This evaluation can be very expensive when using environments'
                        'that evaluate pass@k so we default to a lower number.')
    # TODO(rkirby): allow for "complete" evaluation when --rl-prompts-per-eval is set to -1
    group.add_argument('--grpo-prompts-per-step', type=int, default=32,
                       help="Number of GRPO groups (G in the paper).")
    group.add_argument('--grpo-group-size', type=int, default=2,
                       help="Number of samples per a GRPO group.")
    group.add_argument('--rl-num-parallel-generations', type=int, default=None,
                       help='Number of rollouts being generated by the inference engine simultaneously. '
                            'Internally divided by grpo_group_size. '
                            'Requires --rl-partial-rollouts. '
                            'Mutually exclusive with --rl-num-parallel-generation-batches.')
    group.add_argument('--rl-num-parallel-generation-batches', type=int, default=None,
                       help='Number of generation batches in flight. '
                            'Set to L+1 to allow for L steps of staleness between the inference and training policies. '
                            'Each batch contains grpo_prompts_per_step groups by default. '
                            'Requires --rl-partial-rollouts. '
                            'Mutually exclusive with --rl-num-parallel-generations.')
    group.add_argument('--rl-generation-batch-size', type=int, default=None,
                       help='Override the number of groups per generation batch. '
                            'Defaults to grpo_prompts_per_step when '
                            '--rl-num-parallel-generation-batches is set.')
    group.add_argument('--grpo-iterations', type=int, default=2,
                       help="Number of iterations per a GRPO implementation.")
    # As in DAPO, we keep upper/lower eps different.
    # To have a vanilla GRPO, set them to be the same.
    group.add_argument('--grpo-clamp-eps-lower', type=float, default=0.01,
                       help="Lower GRPO clipping bound.")
    group.add_argument('--grpo-clamp-eps-upper', type=float, default=0.01,
                       help="Upper GRPO clipping bound. In vanilla implementation, equals to the lower one.")
    group.add_argument('--grpo-kl-beta', type=float, default=0.001,
                       help="KL term weight in the GRPO loss.")
    group.add_argument('--grpo-entropy-term-weight', type=float, default=0.0,
                       help="Entropy term weight in GRPO loss.")
    group.add_argument('--grpo-filter-groups-with-same-reward', action='store_true',
                       help="Filter groups with same reward.")
    group.add_argument('--langrl-env-config', type=str, default=None,
                       help="Path to YAML config file for RL environment configuration.")
    group.add_argument('--rl-default-temperature', type=float, default=1.0,
                       help="Default temperature for model inference.")
    group.add_argument('--rl-default-top-p', type=float, default=0,
                       help="Default top-p for model inference.")
    group.add_argument('--rl-default-top-k', type=int, default=-1,
                       help="Default top-k for model inference.")
    group.add_argument('--rl-offload-optimizer-during-inference', action='store_true',
                       help='Offload optimizer state to CPU during inference/rollout to save GPU memory')
    group.add_argument('--rl-kv-cache-management-mode', type=str, default='persist',
                       choices=['persist', 'offload', 'recompute'],
                       help='KV cache management mode during RL training: '
                            'persist: leave KV cache in GPU memory (default), '
                            'offload: offload KV cache to CPU during training, '
                            'recompute: deallocate KV cache and recompute from scratch each cycle')
    group.add_argument('--rl-persist-cuda-graphs', action=argparse.BooleanOptionalAction, type=bool, default=False,
                       help='Persist CUDA graphs when the inference engine is suspended. '
                            'If False, CUDA graphs are deleted on suspend and re-captured on resume.')
    group.add_argument('--rl-partial-rollouts', action=argparse.BooleanOptionalAction, default=False,
                       help='Allow inference to continue generating rollouts while training updates '
                            'the policy weights. This enables off-policy training where rollouts may '
                            'be generated with a stale version of the policy. Use '
                            '--rl-num-parallel-generations or --rl-num-parallel-generation-batches '
                            'to control the degree of staleness.')
    group.add_argument('--rl-inference-logprobs-is-correction', action=argparse.BooleanOptionalAction, type=bool, default=False,
                       help='If set, use inference logprobs in importance sampling correction of the loss.')
    group.add_argument('--rl-importance-sampling-truncation-coef', type=float, default=None,
                       help="If --inference-logprobs-is-correction is on and this coefficient is set, apply truncation for the IS correction at GRPO loss.")
    group.add_argument('--rl-use-sequence-packing', action=argparse.BooleanOptionalAction, type=bool, default=False,
                       help='Enable sequence packing')
    group.add_argument('--rl-sequence-packing-max-sequences-per-bin', type=int, default=50,
                       help='Maximum number of sequences that can be packed into a single bin. ')
    group.add_argument('--rl-sequence-packing-algo', type=str, default='fifo',
                       choices=['fifo', 'round-robin'],
                       help='Algorithm for distributing packed bins across ranks. '
                            'fifo: first-in-first-out sequential distribution, '
                            'round-robin: distribute bins cyclically across ranks for better load balancing')
    group.add_argument('--rl-training-cuda-graphs', action=argparse.BooleanOptionalAction, type=bool,
                       default=False,
                       help='If set, do not toggle CUDA graphs on/off between inference and training phases.')
    group.add_argument('--rl-inference-tensor-model-parallel-size', type=int, default=None,
                       help='Degree of tensor model parallelism for inference for RL.')     
    group.add_argument(
        '--rl-inference-pipeline-model-parallel-size',
        type=int,
        default=None,
        help='Degree of pipeline model parallelism for inference for RL.',
    )
    group.add_argument(
        '--rl-inference-expert-model-parallel-size',
        type=int,
        default=None,
        help='Degree of expert model parallelism for inference for RL.',
    )
    group.add_argument(
        '--rl-inference-expert-tensor-model-parallel-size',
        type=int,
        default=None,
        help='Degree of expert tensor model parallelism for inference for RL. '
             'For MoE models, this controls the TP size for expert layers specifically. '
             'Defaults to training expert_tensor_parallel_size if not specified.',
    )
    group.add_argument(
        '--rl-inference-model-unified-memory-level',
        type=int,
        default=0,
        choices=[0, 1],
        help=(
            'Allocate the separate RL inference model parameters from a unified virtual memory (UVM) '
            'CUDA mempool. Level 0 disables UVM (default). Level 1 enables UVM allocation so the '
            'inference model weights can be prefetched to CPU when idle while keeping CUDA-graph-safe '
            'device pointers.'
        ),
    )
    group.add_argument(
        '--rl-offload-inference-model-weights-when-idle',
        action=argparse.BooleanOptionalAction,
        required=False,
        default=False,
        help=(
            'When using a separate RL inference model, offload its weights to CPU when not doing rollout '
            'inference, and restore to GPU right before inference. Works with two backends: '
            '1) UVM (when --rl-inference-model-unified-memory-level=1), or '
            '2) torch_memory_saver (when UVM is not enabled; requires torch_memory_saver to be installed).'
        ),
    )
    group.add_argument('--refit-method', type=str, default='gloo',
                       choices=['nccl', 'gloo', 'nvshmem'],
                       help=('Method to refit the model weights between training and inference models during RL. '
                             'nccl: use NCCLCopyService to refit using NCCL; '
                             'gloo: use GlooCopyService over CPU; '
                             'nvshmem: use NVSHMEMCopyService to refit using the NVSHMEM.'))
    group.add_argument('--rl-verify-model-weights-swap', action=argparse.BooleanOptionalAction, default=False,
                       help='If set, verify that the model weights were correctly transferred by comparing forward pass outputs on'
                       'the first swap of model weights.')

    group.add_argument('--rl-parallel-generation-tasks', type=int, default=None,
                       help='Deprecated: use --rl-num-parallel-generations instead.')
    group.add_argument('--rl-skip-bos-token', action=argparse.BooleanOptionalAction, type=bool, default=False,
                        help='Skip BOS token at the beginning of the sequences. Default is False.')
    group.add_argument('--rl-inference-parsers', nargs='*', default=[],
                       help='List of response parsers to enable for RL inference '
                            '(e.g. --rl-inference-parsers deepseek-r1-reasoning qwen3-coder-tool).')
    return parser

def _add_training_args(parser):
    from megatron.training.config import TrainingConfig
    from megatron.training.config import ProfilingConfig

    prof_factory = ArgumentGroupFactory(ProfilingConfig, exclude=["record_shapes", "nvtx_ranges"])
    prof_group = prof_factory.build_group(parser, "profiling")

    train_factory = ArgumentGroupFactory(TrainingConfig)
    group = train_factory.build_group(parser, "training")

    group.add_argument('--batch-size', type=int, default=None,
                       help='Old batch size parameter, do not use. '
                       'Use --micro-batch-size instead')
    group.add_argument('--recompute-activations', action='store_true',
                       help='recompute activation to allow for training '
                       'with larger models, sequences, and batch sizes.')
    group.add_argument('--no-check-for-nan-in-loss-and-grad', action='store_false',
                       help='Check for NaNs in loss and grad',
                       dest='check_for_nan_in_loss_and_grad')
    group.add_argument('--check-for-large-grads', action='store_true',
                       help='Check for unexpectedly large grads',
                       dest='check_for_large_grads')
    group.add_argument('--result-rejected-tracker-filename', type=str, default=None,
                       help='Optional name of file tracking `result_rejected` events.')
    group.add_argument('--tp-comm-overlap-cfg', type=str, default=None,
                       help='Config file when tp_comm_overlap is enabled.')

    # deprecated
    group.add_argument('--checkpoint-activations', action='store_true',
                       help='Checkpoint activation to allow for training '
                       'with larger models, sequences, and batch sizes.')
    group.add_argument('--no-masked-softmax-fusion',
                       action='store_false',
                       help='Disable fusion of query_key_value scaling, '
                       'masking, and softmax.',
                       dest='masked_softmax_fusion')
    group.add_argument('--no-bias-gelu-fusion', action='store_false',
                       help='Disable bias and gelu fusion.',
                       dest='bias_gelu_fusion')
    group.add_argument('--no-bias-swiglu-fusion', action='store_false',
                       help='Disable bias and swiglu fusion, the fusion is '
                       'available only when using megatron-core.',
                       dest='bias_swiglu_fusion')
    group.add_argument('--no-bias-dropout-fusion', action='store_false',
                       help='Disable bias and dropout fusion.',
                       dest='bias_dropout_fusion')
    group.add_argument('--no-rope-fusion', action='store_false',
                       help='Disable rope fusion, the fusion is available '
                       'only when using megatron-core.',
                       dest='apply_rope_fusion')
    group.add_argument('--rope-type', type=str, default=None,
                      choices=['rope', 'yarn'],
                      help='Type of rope to use. Note that MLA takes yarn by default, '
                      'and common attention takes rope by default.')
    group.add_argument('--use-flash-attn', action='store_true',
                       help='use FlashAttention implementation of attention. '
                       'https://arxiv.org/abs/2205.14135')
    group.add_argument('--optimizer', type=str, default='adam',
                       choices=['adam', 'sgd', 'muon', 'dist_muon', 'lion'],
                       help='Optimizer function')
    group.add_argument('--optimizer-cpu-offload', action='store_true',
                       help='Offload optimizer state to CPU')
    group.add_argument('--optimizer-offload-fraction', type=float, default=1.0,
                          help='Ratio of optimizer state to offload to CPU')
    group.add_argument('--use-torch-optimizer-for-cpu-offload', action='store_true',
                       help="Use torch.optim.Optimizer instead of Megatron's optimizer in optimizer cpu offload mode.")
    group.add_argument('--overlap-cpu-optimizer-d2h-h2d', action='store_true', default=False,
                       help='Overlap CPU optimizer step, gradients D2H and updated parameters H2D.')
    group.add_argument('--dump-param-to-param-group-map', type=str, default=None,
                        help="Path to a file containing parameter-to-parameter-group mapping. "
                        "Provide a JSON file that specifies which parameters belong to which "
                        "parameter group for global coordination.")
    group.add_argument('--no-pin-cpu-grads', action='store_false', dest='pin_cpu_grads',
                       help='Disable pinning of CPU memory for gradients.')
    group.add_argument('--no-pin-cpu-params', action='store_false', dest='pin_cpu_params',
                       help='Disable pinning of CPU memory for parameters.')
    group.add_argument('--dataloader-type', type=str, default=None,
                       choices=['single', 'cyclic', 'external'],
                       help='Single pass vs multiple pass data loader')
    group.add_argument('--no-persist-layer-norm', action='store_true',
                       help='Disable using persistent fused layer norm kernel. '
                       'This kernel supports only a set of hidden sizes. Please '
                       'check persist_ln_hidden_sizes if your hidden '
                       'size is supported.')
    group.add_argument('--no-gradient-accumulation-fusion',
                       action='store_false',
                       help='Disable fusing gradient accumulation to weight '
                       'gradient computation of linear layers',
                       dest='gradient_accumulation_fusion')
    group.add_argument('--use-mcore-models', action='store_true',
                       dest='deprecated_use_mcore_models',
                       help='DEPRECATED. Use the implementation from megatron core.'
                       'Now ignored and mcore models are the default, use '
                       '--use-legacy-models to not use core models.')
    group.add_argument('--use-legacy-models', action='store_true',
                       help='Use the legacy Megatron models, not Megatron-Core models.')

    return parser


def _add_rerun_machine_args(parser):
    from megatron.training.config import RerunStateMachineConfig

    rerun_factory = ArgumentGroupFactory(RerunStateMachineConfig, exclude=["check_for_nan_in_loss"])
    group = rerun_factory.build_group(parser, "rerun engine")

    return parser


def _add_initialization_args(parser):
    from megatron.training.config import RNGConfig

    rng_factory = ArgumentGroupFactory(RNGConfig)
    group = rng_factory.build_group(parser, "RNG and initialization")

    group.add_argument('--init-method-xavier-uniform', action='store_true',
                       help='Enable Xavier uniform parameter initialization')

    return parser


def _add_learning_rate_args(parser):
    from megatron.training.config import SchedulerConfig

    sched_factory = ArgumentGroupFactory(SchedulerConfig, exclude=["no_weight_decay_cond_type"])
    group = sched_factory.build_group(parser, title="learning rate and weight decay")

    group.add_argument('--lr', type=float, default=None,
                       help='Initial learning rate. Depending on decay style '
                       'and initial warmup, the learning rate at each '
                       'iteration would be different.')
    group.add_argument('--warmup', type=int, default=None,
                       help='Old lr warmup argument, do not use. Use one of the'
                       '--lr-warmup-* arguments above')
    group.add_argument('--min-lr', type=float, default=0.0,
                       help='Minimum value for learning rate. The scheduler'
                       'clip values below this threshold.')
    group.add_argument('--decoupled-lr', type=float, default=None,
                       help='Separate learning rate for the input and output layer')
    group.add_argument('--decoupled-min-lr', type=float, default=None,
                       help='Minimum value for learning rate for the input and output layer. The scheduler'
                       'clip values below this threshold')

    return parser


def _add_checkpointing_args(parser):
    from megatron.training.config import CheckpointConfig

    ckpt_factory = ArgumentGroupFactory(CheckpointConfig, exclude=["most_recent_k", "save_tokenizer_assets", "save_optim", "save_rng", "load_optim", "load_rng"])
    group = ckpt_factory.build_group(parser, "checkpointing")

    group.add_argument('--no-save-optim', action='store_true', default=None,
                       help='Do not save current optimizer.')
    group.add_argument('--no-save-rng', action='store_true', default=None,
                       help='Do not save current rng state.')
    group.add_argument('--no-load-optim', action='store_true', default=None,
                       help='Do not load optimizer when loading checkpoint.')
    group.add_argument('--no-load-rng', action='store_true', default=None,
                       help='Do not load rng state when loading checkpoint.')
    group.add_argument('--use-dist-ckpt', action='store_true',
                       dest='use_dist_ckpt_deprecated',
                       help='Deprecated: see --ckpt-format.')
    group.add_argument('--dist-ckpt-format',
                       dest='dist_ckpt_format_deprecated',
                       help='Deprecated: see --ckpt-format.')
    group.add_argument('--dist-ckpt-workers', type=int, default=1,
                       help='Number of workers for distributed checkpointing. '
                       'Only used for async save. '
                       'If set to 1, the checkpointing is performed in a single process.')
    group.add_argument('--ckpt-fully-parallel-save', action='store_true',
                       dest='ckpt_fully_parallel_save_deprecated',
                       help='Deprecated: see --no-ckpt-fully-parallel-save.')
    return parser


def _add_mixed_precision_args(parser):
    group = parser.add_argument_group(title='mixed precision')

    group.add_argument('--grad-reduce-in-bf16', action='store_true',
                       help='Reduce gradients in bfloat16.')
    group.add_argument('--loss-scale', type=float, default=None,
                       help='Static loss scaling, positive power of 2 '
                       'values can improve fp16 convergence. If None, dynamic'
                       'loss scaling is used.')
    group.add_argument('--initial-loss-scale', type=float, default=2**32,
                       help='Initial loss-scale for dynamic loss scaling.')
    group.add_argument('--min-loss-scale', type=float, default=1.0,
                       help='Minimum loss scale for dynamic loss scaling.')
    group.add_argument('--loss-scale-window', type=float, default=1000,
                       help='Window over which to raise/lower dynamic scale.')
    group.add_argument('--hysteresis', type=int, default=2,
                       help='hysteresis for dynamic loss scaling')
    group.add_argument('--attention-softmax-in-fp32', action='store_true',
                       help='Run attention masking and softmax in fp32.')
    group.add_argument('--accumulate-allreduce-grads-in-fp32',
                       action='store_true',
                       help='Gradient accumulation and all-reduce in fp32.')
    group.add_argument('--fp16-lm-cross-entropy', action='store_true',
                       help='Move the cross entropy unreduced loss calculation'
                       'for lm head to fp16.')
    group.add_argument('--reuse-grad-buf-for-mxfp8-param-ag', action='store_true',
                       help='If True, reuse the grad buffer for MXFP8 parameter all-gather.')

    return parser


def _add_distributed_args(parser):
    from megatron.training.config import DistributedInitConfig

    dist_init_factory = ArgumentGroupFactory(DistributedInitConfig)
    group = dist_init_factory.build_group(parser, "distributed init")

    group.add_argument('--decoder-first-pipeline-num-layers',
                       type=int, default=None,
                       help=('The number of transformer layers on the first pipeline stage of the decoder. '
                       'Default None is even split of transformer layers across all pipeline stages'))
    group.add_argument('--decoder-last-pipeline-num-layers',
                       type=int, default=None,
                       help=('The number of transformer layers on the last pipeline stage of the decoder. '
                       'Default None is even split of transformer layers across all pipeline stages'))
    group.add_argument('--pipeline-model-parallel-layout',
                       type=str, default=None,
                       help=('A string that describes a custom pipeline model parallel layout. '
                       'e.g., "E|(t|)*3,m|m||L". E, L, t, m denotes embedding, loss, transformer '
                       'decoder layer, and mtp layer, respectively. Stages are split by "|". '
                       'Replicated stages or layers can be described with multiplication. '
                       'Commas can be used cosmetically. '
                       'Default None is not using this argument to set the layout.'))
    group.add_argument('--model-parallel-size', type=int, default=None,
                       help='Old model parallel argument, do not use. Use '
                       '--tensor-model-parallel-size instead.')
    group.add_argument('--num-layers-per-virtual-pipeline-stage', type=int, default=None,
                       help='Number of layers per virtual pipeline stage')
    group.add_argument('--num-virtual-stages-per-pipeline-rank', type=int, default=None,
                       help='Number of virtual pipeline stages per pipeline parallelism rank')
    group.add_argument('--no-overlap-p2p-communication', action='store_false',
                       help='overlap pipeline parallel communication with forward and backward chunks in 1F1B',
                       dest='overlap_p2p_comm')
    group.add_argument('--overlap-grad-reduce', action='store_true',
                       default=False, help='If set, overlap DDP grad reduce.')
    group.add_argument('--ddp-num-buckets', type=int, default=None,
                       help='Number of buckets for data-parallel communication')
    group.add_argument('--ddp-bucket-size', type=int, default=None,
                       help='Bucket size for data-parallel communication')
    group.add_argument('--ddp-pad-buckets-for-high-nccl-busbw', action='store_true',
                       default=False, help='If set, make sure the bucket size is divisible by a large power '
                       'of 2 (2^16) to ensure NCCL collectives have high bus bandwidth at large DP counts, '
                       'since NCCL message size (which for ring algorithms is bucket_size / dp_size) '
                       'apparently needs to be divisible by a power of 2 for high busbw.')
    group.add_argument('--ddp-reduce-scatter-with-fp32-accumulation', action='store_true',
                       default=False, help='If set, use a reduce-scatter implementation which sends lower-precision '
                       'values over the wire (using an all-to-all to keep total communication overhead in line '
                       'with the standard ring implementation) but performs accumulation locally in FP32.')
    group.add_argument('--ddp-average-in-collective', action='store_true',
                       default=False, help='If set, average directly in data-parallel communication collective.')
    group.add_argument('--overlap-param-gather', action='store_true',
                       default=False, help='If set, overlap param all-gather in distributed optimizer.')
    group.add_argument('--overlap-param-gather-with-optimizer-step', action='store_true',
                       default=False, help='If set, overlap param all-gather of first bucket with optimizer step.')
    group.add_argument('--no-align-param-gather', action='store_false',
                       help='If not set, all PP stages will launch param all-gathers simultaneously. '
                       'Otherwise, each PP stage will independently launch as needed.',
                       dest='align_param_gather')
    group.add_argument('--no-scatter-gather-tensors-in-pipeline', action='store_false',
                       help='If not set, use scatter/gather to optimize communication of tensors in pipeline.',
                       dest='scatter_gather_tensors_in_pipeline')
    group.add_argument('--use-distributed-optimizer', action='store_true',
                       help='Use distributed optimizer.')
    group.add_argument('--use-nccl-ub', action='store_true', dest='nccl_ub',
                       help='Use the userbuffer registration for DP/FSDP communication buffers.'
                       'This option will reduce GPU SM usage for the DP/FSDP communication,'
                       'which is improving the performance of the overlapped computation.')
    group.add_argument('--disable-symmetric-registration', action='store_true', dest='disable_symmetric_registration',
                       default=False, help='Disable symmetric (window) registration for NCCL userbuffer registration.'
                       'This option will force to use conventional (local) userbuffer registration when use-nccl-ub is set.')
    group.add_argument('--fsdp-manual-registration', action='store_true', dest='fsdp_manual_registration',
                       default=False, help='Manually register the FSDP communication buffers to NCCL user buffer.'
                       'This option is only effective when use-megatron-fsdp and use-nccl-ub is set.')
    group.add_argument('--create-all-gather-group', action='store_true',
                   help='Create a separate process group for all-gather operations '
                   'to overlap reduce-scatter and all-gather operations.')
    group.add_argument('--data-parallel-sharding-strategy', type=str, default='no_shard',
                       choices=['no_shard', 'optim', 'optim_grads', 'optim_grads_params'],
                       help='Sharding strategy of data parallelism.')
    group.add_argument('--outer-dp-sharding-strategy', type=str, default='no_shard',
                       choices=['no_shard', 'optim'],
                       help='Sharding strategy for outer data parallel group in Hybrid Sharded Data Parallel (HSDP) mode. '
                            'Valid values are "no_shard" (DP Replication) and "optim" (Optimizer State Hybrid Sharding). '
                            'The "optim" option is only supported when --data-parallel-sharding-strategy is "optim_grads_params". '
                            'This option is only effective when Hybrid FSDP is enabled (i.e., when dp_outer_dim is not None). '
                            'Default: "no_shard".')
    group.add_argument('--no-gradient-reduce-div-fusion', action='store_false', dest='gradient_reduce_div_fusion',
                       help='If not set, fuse the division in gradient reduce.')
    group.add_argument('--fsdp-double-buffer', action='store_true',
                       help="Enable double buffering for temporary memory needed for Megatron FSDP communications. "
                        "Double-buffering the communication memory improves memory management efficiency by "
                        "reusing previously allocated buffers, rather than creating new buffers for each FSDP communication. "
                        "This is required for user buffer registration and is enabled by default when using NCCL user buffers.")
    group.add_argument('--suggested-communication-unit-size', type=int, default=None,
                   help='Specifies the number of elements to communicate at once during FSDP (Fully Sharded Data Parallel) operations. '
                        'This flag also affects FSDP all-gather prefetch behavior. Setting a larger value increases the communication buffer size, '
                        'while a smaller value disables prefetching and may degrade performance. Adjust this value based on your system\'s memory '
                        'and performance requirements.')
    group.add_argument('--keep-fp8-transpose-cache', action='store_true',
                       help='If set, keep the fp8 transpose cache when using Megatron FSDP.')
    group.add_argument('--enable-full-sharding-in-hsdp', action='store_true',
                       help='If set, enable full sharding in megatron-fsdp Hybrid Sharded Data Parallel (HSDP) mode.')
    group.add_argument('--num-distributed-optimizer-instances', type=int, default=1,
                       help='Number of Distributed Optimizer copies across Data Parallel domain.')
    group.add_argument('--torch-fsdp2-no-reshard-after-forward', action='store_false', dest='torch_fsdp2_reshard_after_forward',
                       help='Whether to reshard weights after forward pass when using PyTorch FSDP2. '
                       'Set to enable FSDP ZeRO-2.')
    group.add_argument('--cp-comm-type', nargs='+', type=str, default=["p2p"],
                       help='Inter-gpu communication type for context parallelism: '
                       'p2p, a2a, allgather or a2a+p2p. If a single string is provided, '
                       'all layers will share the same communication type. Users can also '
                       'specify separated types for each layer like '
                       '--cp-comm-type p2p p2p a2a a2a a2a+p2p a2a+p2p')
    group.add_argument('--fake-process-group', action='store_true', default=False,
                       help='If set, initialize with fake distributed process group and all distributed communication operations will be skipped. \
                       This is quite useful for profiling memory usage of distributed training with just one GPU. \
                       Setting WORLD_SIZE and RANK to the specific values for target distribtued scale.')
    return parser


def _add_validation_args(parser):
    from megatron.training.config import ValidationConfig

    val_factory = ArgumentGroupFactory(ValidationConfig)
    group = val_factory.build_group(parser, "validation")

    return parser


def _add_tokenizer_args(parser):
    group = parser.add_argument_group(title='tokenizer')
    group.add_argument('--vocab-size', type=int, default=None,
                       help='Size of vocab before EOD or padding.')
    group.add_argument('--padded-vocab-size', type=int, default=None,
                       help='Vocabulary size of the model (padded to be divisible by '
                       'tensor model parallel size). If not provided, it will be '
                       'automatically calculated from vocab-size.')
    group.add_argument('--vocab-file', type=str, default=None,
                       help='Path to the vocab file.')
    group.add_argument('--merge-file', type=str, default=None,
                       help='Path to the BPE merge file.')
    group.add_argument('--vocab-extra-ids', type=int, default=0,
                       help='Number of additional vocabulary tokens. '
                            'They are used for span masking in the T5 model')
    group.add_argument('--tokenizer-type', type=str,
                       default=None,
                       choices=['BertWordPieceLowerCase',
                                'BertWordPieceCase',
                                'GPT2BPETokenizer',
                                'SentencePieceTokenizer',
                                'GPTSentencePieceTokenizer',
                                'HuggingFaceTokenizer',
                                'Llama2Tokenizer',
                                'TikTokenizer',
                                'MultimodalTokenizer',
                                'NullTokenizer',
                                'NullMultimodalTokenizer',
                                'SFTTokenizer'],
                       help='What type of tokenizer to use.')
    group.add_argument('--tokenizer-model', type=str, default=None,
                       help='Sentencepiece tokenizer model.')
    group.add_argument('--tokenizer-metadata', type=str, default=None,
                       help='Path to tokenizer metadata in json format.')
    group.add_argument('--tokenizer-special-tokens', type=str, nargs='+', default=None,
                       help='List of special tokens. For TikTokenizer needs to have '
                            '["<unk>", "<s>", "</s>", "<mask>", "<pad>", "<cls>", "<sep>"]')
    group.add_argument('--tiktoken-pattern', type=str, default=None,
                       help='Which tiktoken pattern to use. Options: [v1, v2]')
    group.add_argument('--tiktoken-num-special-tokens', type=int, default=1000,
                       help='Number of special tokens in tiktoken tokenizer')
    group.add_argument('--tiktoken-special-tokens', type=str, nargs='+', default=None,
                       help='List of tiktoken special tokens, needs to have '
                            '["<unk>", "<s>", "</s>", "<mask>", "<pad>", "<cls>", "<sep>"]')
    group.add_argument('--tokenizer-sentencepiece-legacy', action='store_true', default=False,
                       help='SentencePiece tokenizer wrapper legacy behavior. Allows special tokens usage.')
    group.add_argument('--tokenizer-hf-use-fast', action='store_true', default=True,
                       help='Whether to use fast HuggingFace tokenizer.')
    group.add_argument('--tokenizer-hf-include-special-tokens', action='store_true', default=True,
                       help='Converting text to ids will include special for HuggingFace tokenizer.')
    group.add_argument('--tokenizer-hf-no-use-fast', action='store_true', default=False,
                       help='Whether to use fast HuggingFace tokenizer.')
    group.add_argument('--tokenizer-hf-no-include-special-tokens', action='store_true', default=False,
                       help='Converting text to ids will not include special for HuggingFace tokenizer.')
    group.add_argument("--trust-remote-code", action="store_true", default=False,
                       help='Whether or not to allow PreTrainedTokenizer to execute remote code')
    return parser


def _add_data_args(parser):
    group = parser.add_argument_group(title='data and dataloader')

    group.add_argument('--data-path', nargs='*', default=None,
                       help='The weight and prefix list for a set of train, validation, and test'
                       'datasets which split according to --split. The accepted formats are: '
                       '(1) a single prefix, '
                       '(2) a list of weight prefix pairs e.g. weight1 prefix1 weight2 prefix2, '
                       '(3) a list of prefixes e.g. prefix1 prefix2. '
                       'For (3), weights are inferred from the lengths of the contributing datasets. '
                       'This argument is exclusive to the other independent --*-data-path arguments.')
    group.add_argument('--phase-transition-iterations', type=str, default=None,
                       help='Comma-separated list of iterations where phase '
                       'transitions occur. Requires fixed global batch size across phases. '
                       'Does not support batch size ramp-up.')
    group.add_argument('--split', type=str, default=None,
                       help='Comma-separated list of proportions for training,'
                       ' validation, and test split. For example the split '
                       '`90,5,5` will use 90%% of data for training, 5%% for '
                       'validation and 5%% for test.')
    group.add_argument('--train-data-path', nargs='*', default=None,
                       help='The weight and prefix list for an independent train dataset. '
                       'Follows the same pattern rules as --data-path.')
    group.add_argument('--valid-data-path', nargs='*', default=None,
                       help='The weight and prefix list for an independent validation dataset. '
                       'Follows the same pattern rules as --data-path.')
    group.add_argument('--test-data-path', nargs='*', default=None,
                       help='The weight and prefix list for an independent test dataset. '
                       'Follows the same pattern rules as --data-path.')
    group.add_argument('--data-args-path', type=str, default=None,
                       help='Path to data-args. Instead of feeding `--data-path` '
                       'with weighted dataset, we pass in a file path from which '
                       'we read that argument. This is useful when the list of data is '
                       'too big.')
    group.add_argument('--per-split-data-args-path', type=str, default=None,
                       help='Path to per-split-data-args. Instead of feeding '
                       '`--(train|valid|test)-data-path` with weighted dataset, '
                       'we pass in a file path from which we read those arguments. '
                       'This is useful when the list of data is too big. Format is a '
                       'json file with `train`, `valid, `test` keys')
    group.add_argument('--per-dataset-sequences-path', default=None,
                       help='Path to a json file with the sequences per dataset. Check the tools/build_sequences_per_dataset.py script to build this file.')
    group.add_argument('--dataloader-fast-cache-load', action='store_true',
                       help='Option to use the fast cache loading path when building the datasets. Requires all the dataset caches to be built and stored in --data-cache-path.')
    group.add_argument('--dataloader-defer-npy-index-mmap', action='store_true',
                       help='Defer the mmap of the dataset indexes (.npy files) until the first access. Requires all the dataset caches to be built and stored in --data-cache-path.')
    group.add_argument('--data-cache-path', default=None,
                       help='Path to a directory to hold cached index files.')
    group.add_argument('--no-mmap-bin-files', action='store_false',
                       help='Disable mmap-ing of .bin files.',
                       dest='mmap_bin_files')
    group.add_argument('--mock-data', action='store_true',
                       help='Skip data loading and validation and opt for artificial '
                       'generation of mock data when an implementation is available.')
    group.add_argument('--seq-length', type=int, default=None,
                       help='Maximum sequence length to process.')
    group.add_argument('--encoder-seq-length', type=int, default=None,
                       help='Maximum encoder sequence length to process.'
                       'This should be exclusive of --seq-length')
    group.add_argument('--decoder-seq-length', type=int, default=None,
                       help="Maximum decoder sequence length to process.")
    group.add_argument('--sample-rate', type=float, default=1.0,
                       help='sample rate for training data. Supposed to be 0 '
                            ' < sample_rate < 1')
    group.add_argument('--mask-prob', type=float, default=0.15,
                       help='Probability of replacing a token with mask.')
    group.add_argument('--short-seq-prob', type=float, default=0.1,
                       help='Probability of producing a short sequence.')
    group.add_argument('--num-workers', type=int, default=2,
                       help="Dataloader number of workers.")
    group.add_argument('--reset-position-ids', action='store_true',
                       help='Reset posistion ids after end-of-document token.')
    group.add_argument('--reset-attention-mask', action='store_true',
                       help='Reset self attention mask after '
                       'end-of-document token.')
    group.add_argument('--eod-mask-loss', action='store_true',
                       help='Mask loss for the end of document tokens.')
    group.add_argument('--no-create-attention-mask-in-dataloader', action='store_false',
                       help='If set, do not create attention_masks in dataloader.',
                       dest='create_attention_mask_in_dataloader')
    group.add_argument('--num-dataset-builder-threads', type=int, default=1,
                       help='Number of parallel threads per rank for dataset builder')
    group.add_argument('--object-storage-cache-path', type=str, default=None,
                       help='Path to cache index files when using s3 or msc dataloader')
    group.add_argument('--mid-level-dataset-surplus', type=float, default=0.005,
                       help='The sample surplus to build for the mid-level datasets(s)')
    group.add_argument('--allow-ambiguous-pad-tokens', action='store_true',
                       help='Whether to prevent pad tokens already present in the dataset '
                       'from being masked out when the pad token incorrectly shares the same id '
                       'with other special tokens in the tokenizer. Note that this argument has '
                       'no effect when the tokenizer correctly provides a unique id for the pad. '
                       'Masking out such ambiguous pad tokens results in training instability. '
                       'Such a scenario is best resolved by fixing the tokenizer; leaving this '
                       'option as False provides a workaround. '
                       'When left to the default of False, any token ids that collide with the '
                       'pad token id - as provided by the tokenizer - will not be masked out of '
                       'the loss calculation: it cannot be determined whether they are truly pad. '
                       'If instead this argument is set, the training flow will treat all tokens '
                       'that share the same id as the pad token as true pad tokens, potentially '
                       'causing severe training instability.')
    group.add_argument('--fim-data', action='store_true', help='Whether to use the FIM dataset.')
    group.add_argument('--fim-rate', type=float, default=0.5,
                       help='Probability to convert a training sample into a FIM format.')
    group.add_argument('--fim-spm-rate', type=float, default=0.5,
                       help='Probability that the a FIM sample uses the SPM format over the PSM format.')
    group.add_argument('--fim-split-sample', type=str, default=None,
                       help='String around which to split the sample for FIM.')
    group.add_argument('--fim-fragment-rate', type=float, default=None,
                       help='Rate of FIM on each fragment when --fim-split-sample is not None.')
    group.add_argument('--fim-no-prefix', type=str, default=None,
                       help='Do not apply FIM to fragments that start with this prefix')
    group.add_argument('--fim-prefix-token', type=str, default='<fim_prefix>',
                       help='FIM prefix token')
    group.add_argument('--fim-middle-token', type=str, default='<fim_middle>',
                       help='FIM middle token')
    group.add_argument('--fim-suffix-token', type=str, default='<fim_suffix>',
                       help='FIM suffix token')
    group.add_argument('--fim-pad-token', type=str, default='<fim_pad>',
                       help='FIM PAD token')
    group.add_argument('--fim-eod-token', type=str, default='<|endoftext|>',
                       help='FIM EOD token')
    return parser


def _add_autoresume_args(parser):
    group = parser.add_argument_group(title='autoresume')

    group.add_argument('--adlr-autoresume', action='store_true',
                       help='Enable autoresume on adlr cluster.')
    group.add_argument('--adlr-autoresume-interval', type=int, default=1000,
                       help='Intervals over which check for autoresume'
                       'termination signal')

    return parser


def _add_biencoder_args(parser):
    group = parser.add_argument_group(title='biencoder')

    # network size
    group.add_argument('--ict-head-size', type=int, default=None,
                       help='Size of block embeddings to be used in ICT and '
                        'REALM (paper default: 128)')
    group.add_argument('--biencoder-projection-dim', type=int, default=0,
                       help='Size of projection head used in biencoder (paper'
                        ' default: 128)')
    group.add_argument('--biencoder-shared-query-context-model', action='store_true',
                        help='Whether to share the parameters of the query '
                        'and context models or not')

    # checkpointing
    group.add_argument('--ict-load', type=str, default=None,
                       help='Directory containing an ICTBertModel checkpoint')
    group.add_argument('--bert-load', type=str, default=None,
                       help='Directory containing an BertModel checkpoint '
                       '(needed to start ICT and REALM)')

    # data
    group.add_argument('--titles-data-path', type=str, default=None,
                       help='Path to titles dataset used for ICT')
    group.add_argument('--query-in-block-prob', type=float, default=0.1,
                       help='Probability of keeping query in block for '
                       'ICT dataset')
    group.add_argument('--use-one-sent-docs', action='store_true',
                       help='Whether to use one sentence documents in ICT')
    group.add_argument('--evidence-data-path', type=str, default=None,
                       help='Path to Wikipedia Evidence frm DPR paper')

    # training
    group.add_argument('--retriever-report-topk-accuracies', nargs='+', type=int,
                        default=[], help="Which top-k accuracies to report "
                        "(e.g. '1 5 20')")
    group.add_argument('--retriever-score-scaling', action='store_true',
                       help='Whether to scale retriever scores by inverse '
                        'square root of hidden size')

    # faiss index
    group.add_argument('--block-data-path', type=str, default=None,
                       help='Where to save/load BlockData to/from')
    group.add_argument('--embedding-path', type=str, default=None,
                       help='Where to save/load Open-Retrieval Embedding'
                        ' data to/from')

    # indexer
    group.add_argument('--indexer-batch-size', type=int, default=128,
                       help='How large of batches to use when doing indexing '
                       'jobs')
    group.add_argument('--indexer-log-interval', type=int, default=1000,
                       help='After how many batches should the indexer '
                       'report progress')
    return parser


def _add_vision_args(parser):
    group = parser.add_argument_group(title="vision")

    # general vision arguements
    group.add_argument('--num-classes', type=int, default=1000,
                       help='num of classes in vision classificaiton task')
    group.add_argument('--img-h', type=int, default=224,
                       help='Image height for vision classification task')
    group.add_argument('--img-w', type=int, default=224,
                       help='Image height for vision classification task')
    group.add_argument('--num-channels', type=int, default=3,
                       help='Number of channels in input image data')
    group.add_argument('--patch-dim', type=int, default=16,
                       help='patch dimension')
    group.add_argument('--classes-fraction', type=float, default=1.0,
                       help='training with fraction of classes.')
    group.add_argument('--data-per-class-fraction', type=float, default=1.0,
                       help='training with fraction of data per class.')
    group.add_argument('--no-data-sharding', action='store_false',
                       help='Disable data sharding.',
                       dest='data_sharding')
    group.add_argument('--head-lr-mult', type=float, default=1.0,
                       help='learning rate multiplier for head during finetuning')

    # pretraining type and backbone selection`
    group.add_argument('--vision-pretraining', action='store_true',
                       help='flag to indicate vision pretraining')
    group.add_argument('--vision-pretraining-type', type=str, default='classify',
                       choices=['classify', 'inpaint', 'dino'],
                       help='pretraining objectives')
    group.add_argument('--vision-backbone-type', type=str, default='vit',
                       choices=['vit', 'mit', 'swin'],
                       help='backbone types types')
    group.add_argument('--swin-backbone-type', type=str, default='tiny',
                       choices=['tiny', 'base', 'h3'],
                       help='pretraining objectives')
    # inpainting arguments
    group.add_argument('--mask-type', type=str, default='random',
                       choices=['random', 'row'],
                       help='mask types')
    group.add_argument('--mask-factor', type=float, default=1.0,
                       help='mask size scaling parameter')

    # dino arguments
    group.add_argument('--iter-per-epoch', type=int, default=1250,
                       help='iterations per epoch')
    group.add_argument('--dino-local-img-size', type=int, default=96,
                       help='Image size for vision classification task')
    group.add_argument('--dino-local-crops-number', type=int, default=10,
                       help='Number of local crops')
    group.add_argument('--dino-head-hidden-size', type=int, default=2048,
                       help='Hidden dimension size in dino head')
    group.add_argument('--dino-bottleneck-size', type=int, default=256,
                       help='Bottle neck dimension in dino head ')
    group.add_argument('--dino-freeze-last-layer', type=float, default=1,
                       help='Freezing last layer weights')
    group.add_argument('--dino-norm-last-layer', action='store_true',
                       help='Disable Norm in last layer.')
    group.add_argument('--dino-warmup-teacher-temp', type=float, default=0.04,
                       help='warump teacher temperature')
    group.add_argument('--dino-teacher-temp', type=float, default=0.07,
                       help='teacher temperature')
    group.add_argument('--dino-warmup-teacher-temp-epochs', type=int, default=30,
                       help='warmup teacher temperaure epochs')

    return parser

def _add_moe_args(parser):
    group = parser.add_argument_group(title="moe")
    # General arguments
    group.add_argument('--num-experts', type=int, default=None,
                       help='Number of Experts in MoE (None means no MoE)')
    group.add_argument('--moe-layer-freq', type=moe_freq_type, default=1,
                       help='Frequency between MoE layers and Dense layers. Accepts either: '
                            '- An integer N: Represents a 1:N ratio, meaning one expert layer for every N-1 dense layers '
                            '- A string containing a Python list expression that defines a custom pattern, e.g.: '
                            '"([1]*3+[0]*1)*3" evaluates to [1,1,1,0,1,1,1,0,1,1,1,0] '
                            'where 1 indicates an expert layer and 0 indicates a dense layer. '
                            'Examples: "([0]+[1]*23)": 1 dense layer followed by 23 expert layers, '
                            '"([1]*3+[0]*2)*2": Three expert layers followed by two dense layers, repeated twice.')
    group.add_argument('--moe-use-upcycling', action='store_true',
                       help='Load a checkpoint of a dense model, convert it into an MoE model, and save the converted model to the path specified by --save. '
                       'Upcycling is implemented on the top of distributed checkpointing, so it supports parallel modes different from the dense model.')
    # Router arguments
    group.add_argument('--moe-router-load-balancing-type', nargs='+', type=str,
                       choices=['aux_loss', 'seq_aux_loss', 'global_aux_loss', 'sinkhorn', 'none'],
                       default='aux_loss',
                       help='Determines the load balancing strategy for the router. "aux_loss" corresponds to the load balancing loss used in GShard and SwitchTransformer; "seq_aux_loss" corresponds to the load balancing loss used in DeepSeekV2, which computes the loss for each individual sample; "sinkhorn" corresponds to the balancing algorithm used in S-BASE, and "none" implies no load balancing. The default is "aux_loss".')
    group.add_argument('--moe-aux-loss-coeff', type=float, nargs='+', default=0.0,
                       help='Scaling coefficient for the aux loss: a starting value of 1e-2 is recommended.')
    # Token dispatcher arguments
    # MoE communication overlap arguments

    group.add_argument('--moe-upcycling-granularity', type=int, default=1,
                       help='This param sepecifics how many times smaller is the expert hidden size compared with the original dense FFN hidden size. '
                       'For using granular upcycling strategy, please set this param as a positive integer. If this param is set to 1, it means using the default upcycling strategy.')
    return parser

def _add_mla_args(parser):
    group = parser.add_argument_group(title="mla")
    group.add_argument('--q-lora-rank', type=int, default=None,
                       help="Rank of Query tensor's low rank representation.")
    group.add_argument('--kv-lora-rank', type=int, default=32,
                       help="Rank of Key and Value tensors' low rank representation.")
    group.add_argument('--qk-head-dim', type=int, default=128,
                       help="Dimension of the head in the QK projection. q_head_dim = qk_head_dim + qk_pos_emb_head_dim")
    group.add_argument('--qk-pos-emb-head-dim', type=int, default=64,
                       help="Dimension of the position embedding in the QK projection.")
    group.add_argument('--v-head-dim', type=int, default=128,
                       help="Dimension of the head in the V projection.")
    group.add_argument('--rotary-scaling-factor', type=float, default=1.0,
                       help="Rotary scaling factor for the rotary embeddings.")
    group.add_argument('--mscale', type=float, default=1.0,
                       help="Mscale for YaRN RoPE in multi-latent attention.")
    group.add_argument('--mscale-all-dim', type=float, default=0.0,
                       help="Mscale all dimensions for YaRN RoPE in multi-latent attention.")
    group.add_argument('--cache-mla-latents', action='store_true', default=False,
                       help="If set caches the mla down projected latents with mla flash decode.")
    group.add_argument(
        '--mla-down-proj-fusion',
        action='store_true',
        default=False,
        help="Enable fused q/kv down-projection and fused input layernorm when backend supports. "
             "Otherwise fall back to the unfused MLA.",
    )

    return parser

def _add_experimental_attention_variant_args(parser):
    group = parser.add_argument_group(title="experimental_attention_variant")
    # Linear attention
    group.add_argument('--linear-attention-freq', type=la_freq_type, default=None,
                       help='Frequency between LA (linear attention) layers and'
                            ' SDPA (scaled dot-product attention) layers. Accepts either: '
                            '- An integer N: Represents a (N-1):N ratio, meaning (N-1) LA layers for every 1 SDPA layer '
                            '- A string containing a Python list expression that defines a custom pattern, e.g.: '
                            '"([1]*3+[0]*1)*3" evaluates to [1,1,1,0,1,1,1,0,1,1,1,0] '
                            'where 1 indicates an LA layer and 0 indicates a SDPA layer. '
                            'Examples: "([0]+[1]*23)": 1 SDPA layer followed by 23 LA layers, '
                            '"([1]*3+[0]*2)*2": Three LA layers followed by two SDPA layers, repeated twice.')
    return parser

def _add_heterogeneous_args(parser):
    """
    Heterogeneous models refer to transformer architectures where individual layers can differ
    in configuration. Specifically:
        - Attention or MLP layers can be replaced with either a linear layer or a no-op
        - MLP intermediate dimensions can vary between layers
    We use the format of the HuggingFace config files in llama nemotron models to define the architecture.
    For example, https://huggingface.co/nvidia/Llama-3_3-Nemotron-Super-49B-v1/resolve/main/config.json

    Most notably, the "block_config" maps to a list of attention and mlp configurations for each layer.
    For example, the "block_config" for a 2 layer model is:
     "block_configs": [
        {
            "attention": {
                "n_heads_in_group": 8,
                "no_op": false,
                "replace_with_linear": false,
            },
            "ffn": {
                "ffn_mult": 2.625,
                "no_op": false,
                "replace_with_linear": false,
            }
        },
        {
            "attention": {
                "n_heads_in_group": null,
                "no_op": true,
                "replace_with_linear": false,
            },
            "ffn": {
                "ffn_mult": 2.625,
                "no_op": false,
                "replace_with_linear": false,
            }
        }
    ]
    """
    group = parser.add_argument_group(title="heterogeneous architecture")
    group.add_argument('--heterogeneous-layers-config-path', type=str, default=None,
                       help='Path to json file containing heterogeneous model configuration. '
                       'Use the format of the HuggingFace config files in llama nemotron '
                       'models, e.g. https://huggingface.co/nvidia/Llama-3_3-Nemotron-Super-49B-v1/resolve/main/config.json.')
    group.add_argument('--heterogeneous-layers-config-encoded-json', type=str, default=None,
                       help='This is encoded json string of the heterogeneous model configuration. Used to keep the content '
                       'of the heterogeneous model specification in args when the model is loaded from a checkpoint. '
                       'Use the format of the HuggingFace config files in llama nemotron '
                       'models, e.g. https://huggingface.co/nvidia/Llama-3_3-Nemotron-Super-49B-v1/resolve/main/config.json.')
    return parser

def _add_experimental_args(parser):
    group = parser.add_argument_group(title='experimental')

    group.add_argument('--enable-experimental', action='store_true',
                       help='Enable experimental features.')
    group.add_argument('--spec', type=str, default=None, nargs='*',
                       help='Specify the <module_location function_name> pair '
                       'that returns a spec to customize a model, transformer '
                       'block, or transformer layer, depending on the use case.'
                       'To use local spec specify local as the argument.'
                       'For more details, see the model class, '
                       '`transformer_block.py`, or `transformer_layer.py`')
    group.add_argument('--hybrid-layer-pattern', type=str, default=None,
                       help='Specify a hybrid layer pattern using M (mamba), * (attention), '
                       '- (mlp), E (moe). Use | to define pipeline stage boundaries for '
                       'flexible virtual pipeline parallel (fVPP). Use / to separate MTP '
                       'patterns. Example: "M-M-|M-M*-|M-M-|M-M*-" or "M-M-|M-M*-/MM/MM". '
                       'When this flag is used, it is the sole indicator that a hybrid model '
                       'is being run.')
    group.add_argument('--hybrid-override-pattern', type=str, default=None,
                       help='Deprecated. Use --hybrid-layer-pattern instead. '
                       'If specified, its value will be forwarded to --hybrid-layer-pattern.')
    group.add_argument('--yaml-cfg', type=str, default=None,
                       help = 'Config file to add additional arguments')

    # Args of precision-aware optimizer.
    group.add_argument('--use-precision-aware-optimizer', action='store_true',
                       help='Use the precision-aware optimizer in TransformerEngine, which allows '
                       'setting the main params and optimizer states to lower precision, such as '
                       'fp16, bf16 and fp8.')
    group.add_argument('--main-grads-dtype', default='fp32', choices=['fp32', 'bf16'],
                       help='Dtype of main grads when enabling precision-aware-optimizer.')
    group.add_argument('--main-params-dtype', default='fp32', choices=['fp32', 'fp16'],
                       help='Dtype of main params when enabling precision-aware-optimizer.')
    group.add_argument('--exp-avg-dtype', default='fp32', choices=['fp32', 'fp16', 'bf16', 'fp8'],
                       help='Dtype of exp_avg (1st moment in adam optimizer) when enabling '
                            'precision-aware-optimizer. This dtype is used for storing the '
                            'optimizer state in memory during training but does not affect '
                            'the precision in the kernel computation.')
    group.add_argument('--exp-avg-sq-dtype', default='fp32', choices=['fp32', 'fp16', 'bf16', 'fp8'],
                       help='Dtype of exp_avg_sq (2nd moment in adam optimizer) when enabling '
                            'precision-aware-optimizer. This dtype is used for storing the '
                            'optimizer state in memory during training but does not affect '
                            'the precision in the kernel computation.')

    # Megatron-FSDP Arguments
    group.add_argument('--megatron-fsdp-main-params-dtype', default='fp32', choices=['fp32', 'bf16', 'fp16', 'auto'],
                       help="Data type for the main weight buffer utilized for distributed optimization "
                            "and quantization with Megatron-FSDP. If 'auto', then the native model parameter "
                            "data-type will be used for the main weight data-type.")
    group.add_argument('--megatron-fsdp-main-grads-dtype', default='auto', choices=['fp32', 'bf16', 'fp16', 'auto'],
                       help="Data type for the main gradient buffer utilized for distributed optimization "
                            "with Megatron-FSDP. If 'auto', then the native model gradient data-type will "
                            "be used for the main gradient / accumulation data-type.")
    group.add_argument("--megatron-fsdp-grad-comm-dtype", default='auto', choices=['fp32', 'fp16', 'bf16', 'auto'],
                        help="When using Megatron-FSDP, this controls the data-type used when communicating "
                             "model gradients during FSDP. If 'auto', then the main gradient data-type will "
                             "be used for the gradient communication / reduction data-type. When using NCCL "
                             "v2.27+, reduction is always computed in FP32 if using NCCL Symmetric kernels.")
    
    return parser


def _add_msc_args(parser):
    group = parser.add_argument_group(title="msc")
    group.add_argument('--disable-msc', default=True, action='store_false', dest='enable_msc',
                       help='Disable the usage of Multi-Storage Client (MSC) in Megatron Core.')
    return parser

def _add_kitchen_quantization_arguments(parser: argparse.ArgumentParser):
    """Add quant-specific arguments to the main parser

    If kitchen isn't available, nothing to do here, return unchanged parser
    """
    try:
        from megatron.core.extensions.kitchen import KitchenSpecProvider, HAVE_KITCHEN

    except (ImportError, ModuleNotFoundError):
        HAVE_KITCHEN = False

    if HAVE_KITCHEN:
        group = parser.add_argument_group(title="kitchen")
        recipe_or_config_group = group.add_mutually_exclusive_group(required=False)
        recipe_or_config_group.add_argument(
            '--kitchen-config-file',
            type=str,
            default=None,
            help="Use the config .yaml file at the specified location to "
            "configure kitchen quantization.",
        )
        recipe_or_config_group.add_argument(
            '--kitchen-recipe-number',
            type=int,
            default=None,
            help="Use a default kitchen recipe for all linear layers as defined by QAT_PARAMS index. "
            "The argument has no effect on attention layers.",
        )
    return parser

def _add_sft_args(parser):
    group = parser.add_argument_group(title='sft')
    group.add_argument('--sft', action="store_true", help='Megatron SFT training')
    group.add_argument('--sft-tokenizer-prompt-format', type=str, default="nemotron-h-aligned",
                       help='SFT prompt format.')
    return parser


================================================
FILE: megatron/training/async_utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""
This module provides a singleton instance of AsyncCallsQueue which manages
the async checkpoint save calls.
"""
import logging
import time

from megatron.core.dist_checkpointing.strategies.async_utils import AsyncCallsQueue, AsyncRequest
from megatron.core.dist_checkpointing.strategies.cached_metadata_filesystem_reader import (
    CachedMetadataFileSystemReader,
)
from megatron.core.dist_checkpointing.strategies.filesystem_async import _results_queue, get_write_results_queue
from megatron.training import get_args
from megatron.training.utils import print_rank_0

logger = logging.getLogger(__name__)

# Singleton manager of async calls
# The default is `TemporalAsyncCaller`
_async_calls_queue = AsyncCallsQueue()


def init_persistent_async_worker(rank: int, mp_mode: str = 'spawn'):
    global _async_calls_queue
    args = get_args()
    # Recreate the async_calls_queue for persistent worker
    # This duplicate step is for backward compatiblity
    time_start = time.time()
    if rank == 0:
        print(f"init_persistent_async_worker: {rank}, Starting Async Caller", flush=True)
    _async_calls_queue = AsyncCallsQueue(persistent=True)
    # initialize the persistent caller with QoS priorities from args
    AsyncCallsQueue.warmup_persistent_caller(
        rank,
        mp_mode,
        cpu_priority=args.async_ckpt_cpu_priority,
        io_priority=args.async_ckpt_io_priority,
    )
    # initialize ckpt write results queue
    get_write_results_queue('fork')
    if rank == 0:
        print(f"init_persistent_async_worker: rank {rank}, Async Caller Started in {time.time() - time_start} seconds", flush=True)


def schedule_async_save(async_request: AsyncRequest):
    """Schedule the async save request.

    Args:
        async_request (AsyncRequest): the async save request.
    """
    _async_calls_queue.schedule_async_request(async_request)


def maybe_finalize_async_save(blocking: bool = False, terminate=False):
    """Finalizes active async save calls and cleans up deletion processes.

    Args:
        blocking (bool, optional): if True, will wait until all active requests
            are done. Otherwise, finalizes only the async request that already
            finished. Defaults to False.
        terminate (bool, optional): if True, the asynchronous queue will
                be closed as the last action of this function.
    """
    args = get_args()
    if not args.async_save:
        return

    if blocking and not is_empty_async_queue():
        print_rank_0('Unfinalized async checkpoint saves. Finalizing them synchronously now.')

    _async_calls_queue.maybe_finalize_async_calls(blocking, no_dist=False)

    # Clean up finished deletion processes to prevent zombies
    # Import here to avoid circular dependency
    from .checkpointing import finalize_deletion_processes
    finalize_deletion_processes(blocking=blocking or terminate)

    if terminate:
        _async_calls_queue.close()


def is_empty_async_queue() -> bool:
    """Check if async calls queue is empty. This result is consistent across ranks.

    Returns:
        bool: True if there is any ongoing async call.
    """
    return _async_calls_queue.get_num_unfinalized_calls() == 0


def reset_persistent_async_worker():
    global _async_calls_queue, _results_queue
    
    if _async_calls_queue is not None:
        _async_calls_queue.close(abort=True)
        del _async_calls_queue
    if _results_queue is not None:
        _results_queue._manager.shutdown()
        del _results_queue
    _results_queue = None
    _async_calls_queue = None
    CachedMetadataFileSystemReader.clear_metadata_cache()


================================================
FILE: megatron/training/checkpointing.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Input/output checkpointing."""

import contextlib
import multiprocessing
import os
import random
import shutil
import sys
import threading
import types
from argparse import Namespace
from datetime import datetime
from enum import Enum, auto
from logging import getLogger
from pathlib import Path
from time import time

import numpy as np
import torch
from typing import Optional, Union, List, Dict, Any
from torch.distributed.checkpoint import FileSystemReader, default_planner

from megatron.core import dist_checkpointing, mpu, tensor_parallel
from megatron.core.dist_checkpointing.mapping import ShardedObject
from megatron.core.dist_checkpointing.serialization import get_default_load_sharded_strategy
from megatron.core.dist_checkpointing.strategies.fully_parallel import (
    FullyParallelLoadStrategyWrapper,
    FullyParallelSaveStrategyWrapper,
)
from megatron.core.msc_utils import MultiStorageClientFeature, open_file
from megatron.core.num_microbatches_calculator import update_num_microbatches
from megatron.core.utils import get_pg_rank, get_pg_size
from megatron.core.optimizer import DistributedOptimizer
from megatron.core.rerun_state_machine import get_rerun_state_machine
from megatron.core.utils import get_torch_version, is_torch_min_version

from ..core.dist_checkpointing.serialization import get_default_save_sharded_strategy
from ..core.dist_checkpointing.utils import _clean_metadata_for_serialization
from . import ft_integration, wandb_utils
from .async_utils import is_empty_async_queue, schedule_async_save
from megatron.core.dist_checkpointing.strategies.async_utils import AsyncRequest, _disable_gc
from .global_vars import get_args
from .one_logger_utils import on_save_checkpoint_start, on_save_checkpoint_success
from .utils import append_to_progress_log, is_last_rank, print_rank_0, unwrap_model

try:
    from megatron.core.distributed.fsdp.src.megatron_fsdp.uneven_dtensor import preprocess_state_dict_for_uneven_dtensor
    from megatron.core.transformer.fsdp_dtensor_checkpoint import (
        print_diff_in_state_dicts,
        handle_fp8_extra_state_case,
        handle_swiglu_in_state_dict,
        handle_experts_in_state_dict,
    )
    HAVE_MEGATRON_FSDP = True
except ImportError:
    HAVE_MEGATRON_FSDP = False


# [ModelOpt]: Import
try:
    from modelopt.torch.opt.plugins import save_modelopt_state, save_sharded_modelopt_state
    from megatron.post_training.utils import print_distributed_quant_summary
    has_nvidia_modelopt = True
except Exception:
    has_nvidia_modelopt = False

_CHECKPOINT_VERSION = None
_LOADED_ITERATION = None

logger = getLogger(__name__)
_NON_PERSISTENT_CKPT_SUBDIR = 'non_persistent'

# Track deletion processes to prevent zombies
_deletion_processes = []

def finalize_deletion_processes(blocking=False):
    """Clean up deletion processes to prevent zombie processes.

    Args:
        blocking (bool): If True, waits for all deletion processes to complete.
                        If False, only joins processes that have already finished.

    Note: Deletion processes are daemon processes (auto-terminate if parent dies),
    but we still need to join() them to reap zombie processes when they complete normally.
    The daemon flag and join() serve different purposes:
    - daemon=True: Auto-terminate if parent process dies abruptly
    - join(): Reap zombie processes after normal completion
    """
    global _deletion_processes
    finished = []
    for proc in _deletion_processes:
        if not proc.is_alive() or blocking:
            logger.debug(f"Joining deletion process {proc.pid} (blocking={blocking}, is_alive={proc.is_alive()})")
            proc.join()
            finished.append(proc)
    for proc in finished:
        _deletion_processes.remove(proc)

def set_checkpoint_version(value):
    global _CHECKPOINT_VERSION
    if _CHECKPOINT_VERSION is not None:
        assert _CHECKPOINT_VERSION == value, \
            "checkpoint versions do not match"
    _CHECKPOINT_VERSION = value


def get_checkpoint_version():
    global _CHECKPOINT_VERSION
    return _CHECKPOINT_VERSION


def set_loaded_iteration(value):
    """Set the iteration that was loaded from checkpoint.

    This is stored separately from args to avoid polluting the checkpoint
    with runtime state (args is saved in checkpoints).
    """
    global _LOADED_ITERATION
    _LOADED_ITERATION = value


def get_loaded_iteration():
    """Get the iteration that was loaded from checkpoint, or None if no checkpoint was loaded."""
    global _LOADED_ITERATION
    return _LOADED_ITERATION


def check_checkpoint_args(checkpoint_args):
    """Ensure fixed arguments for a model are the same for the input
    arguments and the one retrieved from checkpoint."""
    args = get_args()

    def _compare(arg_name, old_arg_name=None, default=None):
        if old_arg_name is not None:
            ckpt_arg_name = old_arg_name
        else:
            ckpt_arg_name = arg_name
        if default is not None:
            checkpoint_value = getattr(checkpoint_args, ckpt_arg_name, default)
        else:
            checkpoint_value = getattr(checkpoint_args, ckpt_arg_name)
        args_value = getattr(args, arg_name)
        error_message = '{} value from checkpoint ({}) is not equal to the ' \
                        'input argument value ({}).'.format(
                            arg_name, checkpoint_value, args_value)
        assert checkpoint_value == args_value, error_message

    _compare('num_layers')
    _compare('hidden_size')
    _compare('num_attention_heads')
    _compare('add_position_embedding', default=True)
    if args.vocab_file:
        _compare('max_position_embeddings')
        _compare('make_vocab_size_divisible_by')
        if not args.use_dist_ckpt:
            _compare('padded_vocab_size')
        _compare('tokenizer_type')
    if args.data_parallel_random_init:
        _compare('data_parallel_random_init')
    if args.phase_transition_iterations:
        _compare('global_batch_size')
    if get_checkpoint_version() < 3.0:
        _compare('tensor_model_parallel_size',
                 old_arg_name='model_parallel_size')
    if get_checkpoint_version() >= 3.0 and not args.use_dist_ckpt:
        _compare('tensor_model_parallel_size')
        _compare('pipeline_model_parallel_size')


def isfile(filename) -> bool:
    if MultiStorageClientFeature.is_enabled():
        msc = MultiStorageClientFeature.import_package()
        return msc.os.path.isfile(filename)
    else:
        return os.path.isfile(filename)


def ensure_directory_exists(filename, check_parent=True):
    """Build filename's path if it does not already exists."""
    dirname = os.path.dirname(filename) if check_parent else filename
    if MultiStorageClientFeature.is_enabled():
        msc = MultiStorageClientFeature.import_package()
        msc.os.makedirs(dirname, exist_ok=True)
    else:
        os.makedirs(dirname, exist_ok=True)


def get_checkpoint_name(checkpoints_path, iteration, release=False,
                        pipeline_parallel=None,
                        tensor_rank=None, pipeline_rank=None,
                        expert_parallel=None, expert_rank=None,
                        return_base_dir=False, basename="model_optim_rng.pt"):
    """Determine the directory name for this rank's checkpoint."""
    if release:
        directory = 'release'
    else:
        directory = 'iter_{:07d}'.format(iteration)
    if return_base_dir:
        common_path = os.path.join(checkpoints_path, directory)
        return common_path

    # Use both the tensor and pipeline MP rank.
    if pipeline_parallel is None:
        pipeline_parallel = (mpu.get_pipeline_model_parallel_world_size() > 1)
    if tensor_rank is None:
        tensor_rank = mpu.get_tensor_model_parallel_rank()
    if pipeline_rank is None:
        pipeline_rank = mpu.get_pipeline_model_parallel_rank()
    if expert_parallel is None:
        expert_parallel = (mpu.get_expert_model_parallel_world_size() > 1)
    if expert_rank is None:
        expert_rank = mpu.get_expert_model_parallel_rank()

    # Use both the tensor and pipeline MP rank. If using the distributed
    # optimizer, then the optimizer's path must additionally include the
    # data parallel rank.
    if not pipeline_parallel:
        common_path = os.path.join(checkpoints_path, directory,
                            f'mp_rank_{tensor_rank:02d}')
    else:
        common_path = os.path.join(checkpoints_path, directory,
                f'mp_rank_{tensor_rank:02d}_{pipeline_rank:03d}')

    if expert_parallel:
        common_path = common_path + f'_{expert_rank:03d}'

    return os.path.join(common_path, basename)


def get_load_checkpoint_path_by_args(args, load_arg="load"):
    """Get the checkpoint path based on the arguments."""
    load_dir = getattr(args, load_arg)
    iteration, release = -1, False
    tracker_filename = 'because load directory is not defined'
    if load_dir is not None:
        tracker_filename = get_checkpoint_tracker_filename(load_dir)
        if isfile(tracker_filename):
            iteration, release = read_metadata(tracker_filename)

    # Allow user to specify the loaded iteration.
    if getattr(args, "ckpt_step", None):
        iteration = args.ckpt_step

    return get_checkpoint_name(load_dir, iteration, release, return_base_dir=True)


def get_distributed_optimizer_checkpoint_name(model_checkpoint_name):
    return os.path.join(os.path.dirname(model_checkpoint_name),
                        "distrib_optim.pt")


def find_checkpoint_rank_0(checkpoints_path, iteration, release=False):
    """Finds the checkpoint for rank 0 without knowing if we are using
    pipeline parallelism/expert parallelism or not.

    Since the checkpoint naming scheme changes if pipeline or expert
    parallelism is present, we need to look for both naming schemes if
    we don't know if the checkpoint has pipeline or expert parallelism.
    """

    # Look for checkpoint with no pipelining and no expert parallelism
    filename = get_checkpoint_name(checkpoints_path, iteration, release,
                                   pipeline_parallel=False,
                                   tensor_rank=0, pipeline_rank=0,
                                   expert_parallel=False, expert_rank=0)
    if isfile(filename):
        return filename

    # Look for checkpoint with no pipelining and expert parallelism
    filename = get_checkpoint_name(checkpoints_path, iteration, release,
                                   pipeline_parallel=False,
                                   tensor_rank=0, pipeline_rank=0,
                                   expert_parallel=True, expert_rank=0)
    if isfile(filename):
        return filename

    # Look for checkpoint with pipelining and no expert parallelism
    filename = get_checkpoint_name(checkpoints_path, iteration, release,
                                   pipeline_parallel=True,
                                   tensor_rank=0, pipeline_rank=0,
                                   expert_parallel=False, expert_rank=0)
    if isfile(filename):
        return filename

    # Look for checkpoint with pipelining and expert parallelism
    filename = get_checkpoint_name(checkpoints_path, iteration, release,
                                   pipeline_parallel=True,
                                   tensor_rank=0, pipeline_rank=0,
                                   expert_parallel=True, expert_rank=0)
    if isfile(filename):
        return filename

    # Look for a distributed checkpoint
    filename = get_checkpoint_name(checkpoints_path, iteration, release,
                                   pipeline_parallel=True,
                                   return_base_dir=True)
    if dist_checkpointing.check_is_distributed_checkpoint(filename):
        return filename

    return None


def get_checkpoint_tracker_filename(checkpoints_path):

    """Tracker file rescords the latest chckpoint during
    training to restart from."""
    return os.path.join(checkpoints_path, 'latest_checkpointed_iteration.txt')


def checkpoint_exists(checkpoints_path):
    if checkpoints_path is None:
        return False
    path = get_checkpoint_tracker_filename(checkpoints_path)
    return isfile(path)


def read_metadata(tracker_filename):
    # Read the tracker file and either set the iteration or
    # mark it as a release checkpoint.
    iteration = -1
    release = False

    with open_file(tracker_filename, 'r') as f:
        metastring = f.read().strip()
        try:
            iteration = int(metastring)
        except ValueError:
            release = metastring == 'release'
            if not release:
                print_rank_0('ERROR: Invalid metadata file {}. Exiting'.format(
                    tracker_filename))
                sys.exit()
            else:
                # Set iteration to 0 for release checkpoints
                iteration = 0
    assert iteration > -1 or release, 'error parsing metadata file {}'.format(
        tracker_filename)

    # Get the max iteration retrieved across the ranks.
    if torch.distributed.is_initialized():
        iters_cuda = torch.tensor([iteration], dtype=torch.long, device='cuda')
        torch.distributed.all_reduce(iters_cuda, op=torch.distributed.ReduceOp.MAX)
        max_iter = iters_cuda[0].item()

        # We should now have all the same iteration.
        # If not, print a warning and chose the maximum
        # iteration across all ranks.
        if iteration != max_iter:
            rank = torch.distributed.get_rank()
            print('WARNING: on rank {} found iteration {} in the '
                  'metadata while max iteration across the ranks '
                  'is {}, replacing it with max iteration.'.format(
                      rank, iteration, max_iter), flush=True)
    else:
        # When loading a checkpoint outside of training (for example,
        # when editing it), we might not have torch distributed
        # initialized, in this case, just assume we have the latest
        max_iter = iteration
    return max_iter, release


def get_rng_state(ckpt_format: str, tp_group: torch.distributed.ProcessGroup, pp_group: torch.distributed.ProcessGroup) -> Union[List[Dict[str, Any]], ShardedObject]:
    """Collect rng state across data parallel ranks."""
    args = get_args()
    rng_state = {
        'random_rng_state': random.getstate(),
        'np_rng_state': np.random.get_state(),
        'torch_rng_state': torch.get_rng_state(),
        'cuda_rng_state': torch.cuda.get_rng_state(),
        'rng_tracker_states': tensor_parallel.get_cuda_rng_tracker().get_states()}

    rng_state_list = None
    if args.data_parallel_random_init and torch.distributed.is_initialized() and \
            mpu.get_data_parallel_world_size() > 1:
        rng_state_list = \
            [None for i in range(mpu.get_data_parallel_world_size())]
        torch.distributed.all_gather_object(
            rng_state_list,
            rng_state,
            group=mpu.get_data_parallel_group())
    else:
        rng_state_list = [rng_state]

    if ckpt_format == "torch_dist":
        pp_rank = get_pg_rank(pp_group)
        pp_size = get_pg_size(pp_group)
        tp_rank = get_pg_rank(tp_group)
        tp_size = get_pg_size(tp_group)
        rng_state_list = ShardedObject('rng_state', rng_state_list, (pp_size, tp_size), (pp_rank, tp_rank),
                                       replica_id=mpu.get_data_parallel_rank(with_context_parallel=True))
    elif ckpt_format == "fsdp_dtensor":
        pp_rank = mpu.get_pipeline_model_parallel_rank()
        tp_rank = mpu.get_tensor_model_parallel_rank()
        rng_state_list = {
            f"({pp_rank}, {tp_rank})": rng_state_list
        }

    return rng_state_list

class CheckpointType(Enum):
    LEGACY = auto()
    LOCAL = auto()
    GLOBAL = auto()
    TORCH_DCP = auto()
    FSDP_DTENSOR = auto()


def _build_sharded_state_dict_metadata(args: Namespace, dp_cp_group: Optional[torch.distributed.ProcessGroup] = None) -> dict:
    """Builds metadata used for sharded_state_dict versioning.

    The whole content metadata is passed to ``shared_state_dict`` model and optimizer methods
    and therefore affects only the logic behind sharded_state_dict creation.
    The content metadata should be minimalistic, ideally flat (or with a single nesting level)
    and with semantically meaningful flag names (e.g. `distrib_optim_sharding_type`).
    In particular, a simple integer (or SemVer) versioning flag (e.g. `metadata['version'] = 3.4`)
    is discouraged, because the metadata serves for all models and optimizers and it's practically
    impossible to enforce a linearly increasing versioning for this whole space.

    Args:
        args: Arguments namespace
        dp_cp_group: Data parallel + context parallel group (default: None, falls back to mpu API)
    """
    metadata = {}

    if args.use_distributed_optimizer and args.ckpt_format == "fsdp_dtensor":
        metadata['distrib_optim_sharding_type'] = 'fsdp_dtensor'

    if args.use_distributed_optimizer and args.ckpt_format != "fsdp_dtensor":
        if args.dist_ckpt_optim_fully_reshardable:
            metadata['distrib_optim_sharding_type'] = 'fully_reshardable'
            metadata['distrib_optim_fully_reshardable_mem_efficient'] = args.distrib_optim_fully_reshardable_mem_efficient
        else:
            metadata['distrib_optim_sharding_type'] = 'dp_reshardable'

    metadata['singleton_local_shards'] = False
    metadata['chained_optim_avoid_prefix'] = True
    # Add dp_cp_group to metadata. If not provided, fallback to global parallel state.
    if dp_cp_group is None:
        dp_cp_group = mpu.get_data_parallel_group(with_context_parallel=True)
    metadata['dp_cp_group'] = dp_cp_group
    return metadata


def save_grads(save_dir, state_dict, iteration, grad_label):
    """Persist state_dict of grads onto disk. In case of wgrads, this collection should
    be performed before the grads are cleared but after they are reduced.

    NOTE: wgrads for non-expert layers will be duplicated if using expert parallelism, but
    this can be handled in postprocessing."""

    print_rank_0(f"  [{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')}] saving {grad_label} "
                 f"from iteration {iteration:7d}")

    if mpu.get_expert_data_parallel_rank() == 0:
        # Create saving directory.
        ep_rank = mpu.get_expert_model_parallel_rank()
        pp_rank = mpu.get_pipeline_model_parallel_rank()
        tp_rank = mpu.get_tensor_model_parallel_rank()
        assert save_dir is not None
        assert iteration is not None
        save_dir = os.path.join(save_dir, grad_label, f"iter_{iteration:07d}")
        os.makedirs(save_dir, exist_ok=True)

        # Save state_dict.
        checkpoint_name = f"mp_rank_{tp_rank:02d}"
        if mpu.get_pipeline_model_parallel_world_size() > 1:
            checkpoint_name += f"_{pp_rank:03d}"
        if mpu.get_expert_model_parallel_world_size() > 1:
            checkpoint_name += f"_{ep_rank:03d}"
        full_save_path = os.path.join(save_dir, f"{checkpoint_name}.pth")
        # Convert back to dict (e.g., from collections.defaultdict) for easy loading later.
        torch.save(dict(state_dict), full_save_path)

    print_rank_0(f"  [{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')}] saved {grad_label} "
                 f"from iteration {iteration:7d}")


def save_checkpoint(iteration, model, optimizer, opt_param_scheduler, num_floating_point_operations_so_far,
                    checkpointing_context=None, pipeline_rank=None, expert_rank=None, tensor_rank=None, pipeline_parallel=None, expert_parallel=None, non_persistent_ckpt=False,
                    train_data_iterator=None, preprocess_common_state_dict_fn = None, release=False, tp_group: Optional[torch.distributed.ProcessGroup] = None, pp_group: Optional[torch.distributed.ProcessGroup] = None, dp_cp_group: Optional[torch.distributed.ProcessGroup] = None):
    """Save a model, optimizer and optionally dataloader checkpoint.

    Checkpointing context is used to persist some checkpointing state
    throughout a single job. Must be initialized externally (not used if None).

    If non_persistent_ckpt is True,
    the checkpoint will be saved with special functionality for removing old checkpoints.
    There are several types of non-persistent checkpoints:
    "global" - Saved as a standard checkpoint (e.g., on Lustre) with old checkpoints being removed.
    "local" - Each rank saves a portion of the checkpoint locally (e.g., on SSD/ramdisk).

    Dataloader checkpoint is only saved if the dataloader supports it. Currently this applies only
    to the Megatron Energon dataloader (multimodal) and not the built-in Megatron dataloader (text-only).

    Args:
        dp_cp_group: Data parallel + context parallel group (default: None, falls back to mpu API)
    """
    start_ckpt = time()
    args = get_args()

    if args.async_save and not is_empty_async_queue():
        print_rank_0('WARNING: Starting a checkpoint save before previous has finished. Consider increasing the checkpoint interval.')

    # Prepare E2E metrics at start of save checkpoint
    productive_metrics = on_save_checkpoint_start(args.async_save)

    # Monitor for the checkpointing timeout (no-op if FT is not enabled)
    ft_integration.on_checkpointing_start()

    # Only rank zero of the data parallel writes to the disk.
    model = unwrap_model(model)

    # Handle non_persistent_ckpt flag. Besides overwriting `args.save` and
    # `args.use_dist_ckpt`, non-persistent global ckpt requires no additional logic
    ckpt_type = CheckpointType.GLOBAL if args.use_dist_ckpt else CheckpointType.LEGACY
    save_dir = args.save
    if non_persistent_ckpt:
        if args.non_persistent_ckpt_type == 'global':
            ckpt_type = CheckpointType.GLOBAL
            save_dir = (
                args.non_persistent_global_ckpt_dir
                if args.non_persistent_global_ckpt_dir
                else os.path.join(save_dir, _NON_PERSISTENT_CKPT_SUBDIR)
            )
            # TODO Can we ensure the previous checkpoint is saved? We don't want to allow two saves in parallel.
            cleanup_old_non_persistent_checkpoint(
                save_dir, leave_ckpt_num=1, do_async=args.async_save
            )
        elif args.non_persistent_ckpt_type == 'local':
            ckpt_type = CheckpointType.LOCAL
            save_dir = checkpointing_context['local_checkpoint_manager'].local_ckpt_dir
        else:
            raise NotImplementedError(f"Please use local or global non-persistent checkpoints (got: {args.non_persistent_ckpt_type})")

    ckpt_format = args.ckpt_format if ckpt_type == CheckpointType.GLOBAL else 'torch'
    print_rank_0(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')}] saving checkpoint "
                 f"at iteration {iteration:7d} to {save_dir} in {ckpt_format} format")

    # Collect rng state across data parallel ranks.
    if tp_group is None and pp_group is None:
        tp_group = mpu.get_tensor_model_parallel_group()
        pp_group = mpu.get_pipeline_model_parallel_group()
    rng_state = get_rng_state(args.ckpt_format, tp_group, pp_group)

    # Collect rerun state across all ranks
    rerun_state_machine = get_rerun_state_machine()
    rerun_state = rerun_state_machine.state_dict(
        data_iterator=train_data_iterator, ckpt_format=args.ckpt_format,
    )

    # Checkpoint name.
    return_base_dir = (ckpt_type != CheckpointType.LEGACY)
    checkpoint_name = get_checkpoint_name(save_dir, iteration, release=release, pipeline_parallel=pipeline_parallel,
        tensor_rank=tensor_rank, pipeline_rank=pipeline_rank, expert_parallel=expert_parallel, expert_rank=expert_rank, return_base_dir=return_base_dir)

    # Save dataloader state if the dataloader supports it (currently only Megatron Energon).
    maybe_save_dataloader_state(train_data_iterator, iteration, getattr(args, "dataloader_save", None))

    # Save distributed optimizer's custom parameter state.
    if (
        args.use_distributed_optimizer
        and not args.no_save_optim
        and optimizer is not None
        and ckpt_type == CheckpointType.LEGACY
    ):
        optim_checkpoint_name = \
            get_distributed_optimizer_checkpoint_name(checkpoint_name)
        ensure_directory_exists(optim_checkpoint_name)
        if not optimizer.is_stub_optimizer:
            optimizer.save_parameter_state(optim_checkpoint_name)

    # LayerWiseDistributedOptimizer save optimizer state to file on different ranks
    if getattr(args, "optimizer", "adam").startswith("dist_") and args.ckpt_format == 'torch':
        dp_rank = mpu.get_data_parallel_rank()
        optim_checkpoint_name = os.path.join(os.path.dirname(checkpoint_name), f"layer_wise_optimizer_{dp_rank}.pt")
        ensure_directory_exists(optim_checkpoint_name)
        if not optimizer.is_stub_optimizer:
            optimizer.save_state_dict_to_file(optim_checkpoint_name)

    async_save_request = None
    if args.async_save:
        if ckpt_type == CheckpointType.LEGACY:
            raise NotImplementedError('Async checkpoint save not implemented for legacy checkpoints')
        elif ckpt_type == CheckpointType.GLOBAL and args.ckpt_format != 'torch_dist':
            raise NotImplementedError(f'Async checkpoint save not implemented for {args.ckpt_format} distributed checkpoint format')

    rank = torch.distributed.get_rank() if torch.distributed.is_initialized() else 0

    # Collect args, model, RNG.
    if not torch.distributed.is_initialized() \
            or mpu.get_expert_data_parallel_rank() == 0 \
            or ckpt_type != CheckpointType.LEGACY:
        if ckpt_type != CheckpointType.LEGACY:
            sharded_sd_metadata = _build_sharded_state_dict_metadata(args, dp_cp_group=dp_cp_group)
            if args.use_distributed_optimizer:
                print_rank_0(f'Storing distributed optimizer sharded state of type'
                             f' {sharded_sd_metadata["distrib_optim_sharding_type"]}')
        else:
            sharded_sd_metadata = None
        state_dict = generate_state_dict(
            args,
            model,
            optimizer,
            opt_param_scheduler,
            rng_state,
            iteration=iteration,
            optim_sd_kwargs=dict(metadata=sharded_sd_metadata),
            model_sd_kwargs=dict(metadata=sharded_sd_metadata),
            rerun_state=rerun_state,
        )

        state_dict['num_floating_point_operations_so_far'] = num_floating_point_operations_so_far
        if ckpt_type == CheckpointType.GLOBAL and ckpt_format == "torch_dist":
            if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0:
                # TODO Handle non-empty directories (e.g., after a crash during saving).
                ensure_directory_exists(checkpoint_name, check_parent=False)
            if checkpointing_context is not None and 'save_strategy' in checkpointing_context:
                save_strategy = checkpointing_context['save_strategy']
                # Already saved once before - don't need to rerun sharding validation
                validate_sharding_integrity = not args.ckpt_assume_constant_structure
            else:
                validate_sharding_integrity = True
                save_strategy = get_default_save_sharded_strategy(args.ckpt_format)
                if args.ckpt_assume_constant_structure and args.ckpt_format == 'torch_dist':
                    save_strategy.use_cached_ckpt_structure = args.ckpt_assume_constant_structure
                    if args.async_save:
                        save_strategy.thread_count = args.dist_ckpt_workers
                    else:
                        # We don't allow per-rank parallel save for sync save
                        logger.warning('Per-rank parallel save is not supported for sync save. '
                                       'Setting args.dist_ckpt_workers to 1')
                        save_strategy.thread_count = 1
                    if checkpointing_context is not None and 'load_strategy' in checkpointing_context:
                        cached_global_metadata = getattr(checkpointing_context['load_strategy'], 'cached_global_metadata', None)
                        if cached_global_metadata is not None:
                            logger.debug("Plugging in the read metadata from the load strategy...")
                            save_strategy.cached_global_metadata = cached_global_metadata
                        else:
                            logger.debug("Failed to plug in the read metadata from the load strategy...")

                if args.ckpt_fully_parallel_save:
                    if args.ckpt_fully_parallel_save_process_group == 'dp':
                        process_group = mpu.get_data_parallel_group(with_context_parallel=True)
                    elif args.ckpt_fully_parallel_save_process_group == 'ep_dp':
                        process_group = mpu.get_expert_data_parallel_group()
                    save_strategy = FullyParallelSaveStrategyWrapper(save_strategy, process_group,
                                                                     args.ckpt_assume_constant_structure)
            # Store save strategy for future checkpoint saves
            if checkpointing_context is not None:
                checkpointing_context['save_strategy'] = save_strategy
            end_ckpt = time()
            logger.debug(f"rank: {rank}, takes {end_ckpt - start_ckpt} to prepare state dict for ckpt ")
            async_save_request = dist_checkpointing.save(state_dict, checkpoint_name, save_strategy,
                                                         async_sharded_save=args.async_save,
                                                         validate_access_integrity=validate_sharding_integrity,
                                                         preprocess_common_before_consistancy_check=preprocess_common_state_dict_fn,
                                                         content_metadata=_clean_metadata_for_serialization(sharded_sd_metadata))
            # [ModelOpt]: save sharded modelopt_state
            if has_nvidia_modelopt:
                save_sharded_modelopt_state(model, checkpoint_name, (args.ckpt_format, 1))
        elif ckpt_type == CheckpointType.GLOBAL and ckpt_format in ["torch_dcp", "fsdp_dtensor"]:
            if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0:
                # TODO Handle non-empty directories (e.g., after a crash during saving).
                ensure_directory_exists(checkpoint_name, check_parent=False)

            if ckpt_format == "fsdp_dtensor":
                state_dict = preprocess_fsdp_dtensor_state_dict(args, state_dict, model[0])

            fs_storage_writer = torch.distributed.checkpoint.FileSystemWriter(checkpoint_name)
            torch.distributed.checkpoint.save(
                state_dict=state_dict,
                storage_writer=fs_storage_writer,
            )
        else:
            # [ModelOpt]: Inject modelopt_state into state_dict
            if has_nvidia_modelopt:
                if ckpt_type == CheckpointType.LOCAL:
                    print_rank_0('WARNING: Local checkpointing does not support nvidia_modelopt.')
                else:
                    save_modelopt_state(model, state_dict)

            end_ckpt = time()
            logger.debug(f"rank: {rank}, takes {end_ckpt - start_ckpt} to prepare state dict for ckpt ")
            if ckpt_type == CheckpointType.LOCAL:
                try:
                    from megatron.core.dist_checkpointing.tensor_aware_state_dict import MCoreTensorAwareStateDict
                except ModuleNotFoundError:
                    raise RuntimeError("The 'nvidia_resiliency_ext' module is required for local "
                                       "checkpointing but was not found. Please ensure it is installed.")
                if (sharded_sd_metadata or {}).get('distrib_optim_sharding_type') in ['fully_reshardable', 'dp_zero_gather_scatter']:
                    # Note: Currently full reshardabilty is not supported when local checkpoints are used.
                    raise RuntimeError(
                        f"Local checkpointing does not support optimizer sharding type '{sharded_sd_metadata['distrib_optim_sharding_type']}'. "
                        "Don't use '--dist-ckpt-optim-fully-reshardable' when saving local checkpoints."
                    )
                algo = args.non_persistent_local_ckpt_algo
                cached_metadata = None
                if args.ckpt_assume_constant_structure and 'local_checkpoint_cache' in checkpointing_context:
                    cached_metadata = checkpointing_context['local_checkpoint_cache']
                state_dict_for_save, cacheable_metadata = MCoreTensorAwareStateDict.from_state_dict(
                    state_dict, algo=algo, cached_metadata=cached_metadata,
                    parallelization_group=mpu.get_data_parallel_group(with_context_parallel=True)
                )
                async_save_request = checkpointing_context['local_checkpoint_manager'].save(
                    state_dict_for_save, iteration, is_async=bool(args.async_save)
                )
                checkpointing_context['local_checkpoint_cache'] = cacheable_metadata
            else:
                assert ckpt_type == CheckpointType.LEGACY
                # Save.
                ensure_directory_exists(checkpoint_name)
                torch.save(state_dict, checkpoint_name)
    start_misc = time()
    if ckpt_type != CheckpointType.LOCAL:
        if not args.async_save:
            assert async_save_request is None
            # Wait so everyone is done (necessary)
            if torch.distributed.is_initialized():
                torch.distributed.barrier()

    # And update the latest iteration
    if not torch.distributed.is_initialized() \
            or torch.distributed.get_rank() == 0:
        tracker_filename = get_checkpoint_tracker_filename(save_dir)

        if ckpt_type == CheckpointType.LOCAL:
            def iter_finalize_fn():
                print_rank_0(f"  [{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')}] successfully "
                             f"saved local checkpoint from iteration {iteration:7d}")
                if args.log_progress and args.async_save:
                    append_to_progress_log(f'Saved async local checkpoint\tIteration: {iteration}',
                                           barrier=False)
        else:
            def iter_finalize_fn():
                prev_iteration = 0
                save_retain_interval = getattr(args, 'save_retain_interval', None)  # For backwards compatibility of tests.
                if save_retain_interval is not None:
                    if os.path.exists(tracker_filename):  # TODO: Make this work with MSC remote paths?
                        with open_file(tracker_filename, 'r') as f:
                            prev_iteration = int(f.read().strip())
                with open_file(tracker_filename, 'w') as f:
                    f.write("release" if release else str(iteration))
                tensor_rank_to_print = (tensor_rank if tensor_rank is not None else mpu.get_tensor_model_parallel_rank()) + 1
                pipeline_rank_to_print = (pipeline_rank if pipeline_rank is not None else mpu.get_pipeline_model_parallel_rank()) + 1
                print_rank_0(f"  [{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')}] successfully saved "
                             f"checkpoint from iteration {int(iteration):7d} to {args.save} "
                             f"[ t {tensor_rank_to_print}/{mpu.get_tensor_model_parallel_world_size()}, "
                             f"p {pipeline_rank_to_print}/{mpu.get_pipeline_model_parallel_world_size()} ]")
                if args.log_progress and args.async_save:
                    append_to_progress_log(f'Saved async checkpoint\tIteration: {iteration}',
                                           barrier=False)

                if save_retain_interval is not None:
                    if prev_iteration > 0 and prev_iteration != iteration and prev_iteration % save_retain_interval != 0:
                        checkpoint_name = get_checkpoint_name(args.save, iteration=prev_iteration,
                                                              return_base_dir=True)
                        # Don't delete if `checkpoint_name` is a symbolic link.
                        if os.path.islink(checkpoint_name):  # TODO: Make this work with MSC remote paths?
                            print_rank_0(f'  skipping deleting checkpoint from iteration {prev_iteration:7d} '
                                         f'at {args.save} since it is a symbolic link')
                        else:
                            # Asynchronous version of delete_checkpoint(args, iteration_to_delete=prev_iteration).
                            # Use multiprocessing to delete checkpoint in background
                            if args.async_save:
                                # Clean up any finished deletion processes before starting a new one
                                finalize_deletion_processes(blocking=False)
                                ctx = multiprocessing.get_context('fork')
                                delete_process = ctx.Process(
                                    target=_async_delete_checkpoint_impl,
                                    args=(args.save, prev_iteration, args.log_progress, True,
                                          args.async_ckpt_cpu_priority, args.async_ckpt_io_priority),
                                    daemon=True
                                )
                                delete_process.start()
                                # Track the process so we can join it later to prevent zombies
                                _deletion_processes.append(delete_process)
                            else:
                                th = threading.Thread(target=_async_delete_checkpoint_impl, args=(args.save, prev_iteration, args.log_progress))
                                th.start()

        if args.async_save:
            assert async_save_request is not None
            async_save_request.add_finalize_fn(iter_finalize_fn)
        else:
            iter_finalize_fn()

    # Additional callback for one_logger (last rank)
    if not torch.distributed.is_initialized() \
       or is_last_rank():
        def onelogger_finalize_fn():
            on_save_checkpoint_success(productive_metrics, args.async_save)
        if args.async_save:
            assert async_save_request is not None
            async_save_request.add_finalize_fn(onelogger_finalize_fn)
        else:
            onelogger_finalize_fn()

    # Additional callback for wandb (last rank)
    if not torch.distributed.is_initialized() \
       or is_last_rank():
        def wandb_finalize_fn():
            wandb_utils.on_save_checkpoint_success(checkpoint_name, get_checkpoint_tracker_filename(save_dir), save_dir, iteration)
        if args.async_save:
            assert async_save_request is not None
            async_save_request.add_finalize_fn(wandb_finalize_fn)
        else:
            wandb_finalize_fn()

    if args.async_save:
        schedule_async_save(async_save_request)
        print_rank_0(f"  [{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')}] scheduled "
                     f"an async checkpoint save at iteration {iteration:7d} to {save_dir}")

    # Wait so everyone is done (not necessary)
    if torch.distributed.is_initialized():
        torch.distributed.barrier()

    end_misc = time()
    logger.debug(f"rank: {rank}, takes {end_misc - start_misc} to finalize ckpt save ")

    ft_integration.on_checkpointing_end(is_async_finalization=False)

@_disable_gc()
def _async_delete_checkpoint_impl(save_path, iteration_to_delete, log_progress=False, lower_priority=False,
                                  cpu_priority=None, io_priority=None):
    """Module-level function for async checkpoint deletion.
    
    This function can be pickled and executed by the async worker process.
    Note: This is only called from rank 0, so we use regular print() instead of print_rank_0()
    since torch.distributed won't be initialized in the async worker process.
    
    Args:
        save_path (str): Path to the checkpoints directory
        iteration_to_delete (int): Iteration number of checkpoint to delete
        log_progress (bool): Whether to log progress
        lower_priority (bool): If True, set process QoS (e.g. nice, ionice) so deletion doesn't contend with training.
        cpu_priority (int): Nice value for CPU when lower_priority is True (from args.async_ckpt_cpu_priority).
        io_priority (int): I/O class when lower_priority is True (from args.async_ckpt_io_priority).
    """
    if lower_priority:
        from megatron.core.dist_checkpointing.strategies.async_utils import _set_process_qos
        _set_process_qos(cpu_priority=cpu_priority, io_priority=io_priority)

    checkpoint_name = get_checkpoint_name(save_path, iteration=iteration_to_delete,
                                         return_base_dir=True)
    try:
        shutil.rmtree(checkpoint_name)  # TODO: Make this work with MSC remote paths?
        print(f'  successfully deleted checkpoint from iteration {iteration_to_delete:7d} '
              f'at {save_path}', flush=True)
        if log_progress:
            append_to_progress_log(f'Deleted checkpoint\tIteration: {iteration_to_delete}', barrier=False)
    except Exception as e:
        print(f'  encountered exception "{e}" when trying to delete checkpoint from '
              f'iteration {iteration_to_delete:7d} at {save_path}', flush=True)
        # Any exception encountered in checkpoint deletion can be ignored and is not fatal.
        pass


def cleanup_old_non_persistent_checkpoint(save_dir, leave_ckpt_num=1, do_async=False):
    if torch.distributed.is_initialized() and torch.distributed.get_rank() != 0:
        return
    save_dir = Path(save_dir)

    iter_prefix = "iter_"
    iter_ckpts = save_dir.rglob(f'{iter_prefix}*')
    sorted_iter_ckpts = sorted(iter_ckpts, key=lambda ckpt_name: int(ckpt_name.name[len(iter_prefix):]))
    if not sorted_iter_ckpts:
        return
    rm_iter_ckpts = sorted_iter_ckpts[:-leave_ckpt_num]
    print_rank_0(f'Non-persistent checkpoints scheduled for removal: {rm_iter_ckpts}')
    print_rank_0(f'Non-persistent checkpoints to be kept: {sorted_iter_ckpts[-leave_ckpt_num:]}')

    def remove_iter_ckpts(_iter_ckpts):
        for ckpt in _iter_ckpts:
            shutil.rmtree(ckpt)
    if do_async:
        threading.Thread(target=remove_iter_ckpts, args=(rm_iter_ckpts,)).start()
    else:
        remove_iter_ckpts(rm_iter_ckpts)


def maybe_save_dataloader_state(train_iterator, iteration, dataloader_save_path):
    """Saves dataloader state if the dataloader supports it.

    Currently, this is only used by Megatron Energon dataloader (multimodal) to store its state at a
    specific iteration. The Megatron built-in dataloader (text-only) creates index files upfront
    to track its state.

    If the provided dataloader has `save_state` method, then it is called to save the state.
    Otherwise, no state is saved.

    Args:
        train_iterator (iterable): Train dataloader.
        iteration (int): Current iteration.
        dataloader_save_path (str): Path where the dataloader state is saved.
    """
    # If no dataloader or saving path is provided, exit early, otherwise, raise an error.
    if train_iterator is None or dataloader_save_path is None or dataloader_save_path == "":
        return

    # If dataloader doesn't support saving state, raise an error.
    if not hasattr(train_iterator.iterable, "save_state"):
        raise RuntimeError(f"Could not find a save_state for the train_iterator of type {type(train_iterator)}")

    # Save dataloader state for each data parallel rank only once.
    first_rank = mpu.is_pipeline_first_stage(ignore_virtual=True) and mpu.get_tensor_model_parallel_rank() == 0
    if not first_rank:
        return

    dp_rank = mpu.get_data_parallel_rank()
    if dp_rank == 0:
        print(f"saving dataloader checkpoint at iteration {iteration} to {dataloader_save_path}")
    train_dataloader_state_dict = train_iterator.iterable.save_state()
    data_state_save_path = get_checkpoint_name(
        dataloader_save_path, iteration,
        basename=f'train_dataloader_dprank{dp_rank:03d}.pt'
    )

    torch.distributed.barrier(group=mpu.get_data_parallel_group())

    if mpu.get_data_parallel_rank() == 0:
        ensure_directory_exists(data_state_save_path)

    torch.distributed.barrier(group=mpu.get_data_parallel_group())

    dataloader_save_dict = {}
    dataloader_save_dict['dataloader_state_dict'] = train_dataloader_state_dict
    torch.save(dataloader_save_dict, data_state_save_path)


def generate_state_dict(
    args,
    model,
    optimizer,
    opt_param_scheduler,
    rng_state,
    iteration=None,
    optim_sd_kwargs=None,
    model_sd_kwargs=None,
    rerun_state=None,
):
    """Generate a state dict from given model, optimizer, scheduler, rng state and others. """

    # Arguments, iteration, and model.
    state_dict = {}
    state_dict['args'] = args
    state_dict['checkpoint_version'] = 3.0
    if iteration is not None:
        state_dict['iteration'] = iteration

    for i in range(len(model)):
        key = "model"
        if len(model) > 1:
            key = f"model{i}"

        if args.ckpt_format == "torch_dist":
            model_sd = model[i].sharded_state_dict(
                **(model_sd_kwargs or {
                    "metadata": {
                        "dp_cp_group": mpu.get_data_parallel_group(with_context_parallel=True)
                    }
                })
            )
        else:   # torch, torch_dcp, fsdp_dtensor
            model_sd = model[i].state_dict_for_save_checkpoint()

        state_dict[key] = model_sd

    # Optimizer stuff.
    if not args.no_save_optim:
        if optimizer is not None and not optimizer.is_stub_optimizer:

            if args.ckpt_format == "torch_dist":
                optimizer_sd = optimizer.sharded_state_dict(
                    state_dict,
                    **(optim_sd_kwargs or {
                        "metadata": {
                            "dp_cp_group": mpu.get_data_parallel_group(with_context_parallel=True)
                        }
                    })
                )
            elif args.ckpt_format == "fsdp_dtensor":
                if optim_sd_kwargs is None:
                    optim_sd_kwargs = {}
                if "metadata" not in optim_sd_kwargs:
                    optim_sd_kwargs["metadata"] = {}
                optim_sd_kwargs['metadata'].update(_build_sharded_state_dict_metadata(args))
                optimizer_sd = optimizer.sharded_state_dict(state_dict, **optim_sd_kwargs)
            else:
                optimizer_sd = optimizer.state_dict()

            state_dict['optimizer'] = optimizer_sd

        if opt_param_scheduler is not None:
            state_dict['opt_param_scheduler'] = \
                opt_param_scheduler.state_dict()

    # Rerun state
    if rerun_state:
        state_dict['rerun_state_machine'] = rerun_state

    # RNG states.
    if not args.no_save_rng and rng_state:
        state_dict["rng_state"] = rng_state

    return state_dict


def preprocess_fsdp_dtensor_state_dict(args, raw_state_dict, model):
    state_dict = raw_state_dict.copy()
    handle_fp8_extra_state_case(state_dict["model"])
    if args.swiglu:
        if "optimizer" in state_dict:
            model_state_dict, optimizer_state_dict = handle_swiglu_in_state_dict(
                model, state_dict["model"], state_dict["optimizer"]
            )
            state_dict["model"] = model_state_dict
            state_dict["optimizer"] = optimizer_state_dict
        else:
            model_state_dict, _ = handle_swiglu_in_state_dict(
                model, state_dict["model"], None
            )
            state_dict["model"] = model_state_dict
    if args.num_experts:
        state_dict["model"] = handle_experts_in_state_dict(state_dict["model"], args.num_experts)
    preprocess_state_dict_for_uneven_dtensor(state_dict)

    return state_dict


def _transpose_first_dim(t, num_splits, num_splits_first, model):
    input_shape = t.size()
    # We use a self_attention module but the values extracted aren't
    # specific to self attention so should work for cross attention as well
    while hasattr(model, 'module'):
        model = model.module
    attention_module = model.language_model.encoder.layers[0].self_attention
    hidden_size_per_attention_head = attention_module.hidden_size_per_attention_head
    num_attention_heads_per_partition = attention_module.num_attention_heads_per_partition
    if num_splits_first:
        """[num_splits * np * hn, h]
        -->(view) [num_splits, np, hn, h]
        -->(tranpose) [np, num_splits, hn, h]
        -->(view) [np * num_splits * hn, h] """

        intermediate_shape = \
            (num_splits, num_attention_heads_per_partition,
             hidden_size_per_attention_head) + input_shape[1:]

        t = t.view(*intermediate_shape)
        t = t.transpose(0, 1).contiguous()
    else:
        """[np * hn * num_splits, h]
        -->(view) [np, hn, num_splits, h]
        -->(tranpose) [np, num_splits, hn, h]
        -->(view) [np * num_splits * hn, h] """

        intermediate_shape = \
            (num_attention_heads_per_partition,
             hidden_size_per_attention_head, num_splits) +\
             input_shape[1:]

        t = t.view(*intermediate_shape)
        t = t.transpose(1, 2).contiguous()
    t = t.view(*input_shape)

    return t


def fix_query_key_value_ordering(model, checkpoint_version):
    """Fix up query/key/value matrix ordering if checkpoint
    version is smaller than 2.0
    """
    if checkpoint_version < 2.0:
        if isinstance(model, list):
            assert len(model)==1
            model = model[0]
        for name, param in model.named_parameters():
            if name.endswith(('.query_key_value.weight', '.query_key_value.bias')):
                if checkpoint_version == 0:
                    fixed_param = _transpose_first_dim(param.data, 3, True, model)
                elif checkpoint_version == 1.0:
                    fixed_param = _transpose_first_dim(param.data, 3, False, model)
                else:
                    print_rank_0(f"Invalid checkpoint version {checkpoint_version}.")
                    sys.exit()
                param.data.copy_(fixed_param)
            if name.endswith(('.key_value.weight', '.key_value.bias')):
                if checkpoint_version == 0:
                    fixed_param = _transpose_first_dim(param.data, 2, True, model)
                elif checkpoint_version == 1.0:
                    fixed_param = _transpose_first_dim(param.data, 2, False, model)
                else:
                    print_rank_0(f"Invalid checkpoint version {checkpoint_version}.")
                    sys.exit()
                param.data.copy_(fixed_param)
        print_rank_0(" successfully fixed query-key-values ordering for"
                     " checkpoint version {}".format(checkpoint_version))


def _get_non_persistent_iteration(non_persistent_global_dir, args, checkpointing_context=None):
    if args.non_persistent_ckpt_type is None:
        return -1
    elif args.non_persistent_ckpt_type == "global":
        tracker_filename = get_checkpoint_tracker_filename(non_persistent_global_dir)
        if isfile(tracker_filename):
            iteration, release = read_metadata(tracker_filename)
            if release:
                raise RuntimeError('Non-persistent checkpoint can\'t be a release checkpoint')
        else:
            iteration = -1
            print_rank_0('WARNING: could not find the metadata file {}'.format(tracker_filename))
            print_rank_0('    will not load any non-persistent checkpoint')
        return iteration
    elif args.non_persistent_ckpt_type == "local":
        return checkpointing_context['local_checkpoint_manager'].find_latest()
    else:
        assert False, 'Please use local or global non-persistent checkpoints' \
            f'(got: {args.non_persistent_ckpt_type})'


def _load_non_persistent_base_checkpoint(
    non_persistent_global_dir,
    args,
    rank0,
    sharded_state_dict,
    non_persistent_iteration,
    checkpointing_context=None,
):
    """ Load the base state_dict from a non-persistent distributed checkpoint.
    Depending on the non_persistent_ckpt_type, different logic may be required.
    """
    assert args.non_persistent_ckpt_type is not None
    if args.non_persistent_ckpt_type == "global":
        if not rank0:
            print_rank_0(
                f'Loading from a non-persistent checkpoint (non-persistent iter {non_persistent_iteration})'
            )
        return _load_global_dist_base_checkpoint(
            non_persistent_global_dir, args, rank0, sharded_state_dict, non_persistent_iteration, False,
            checkpointing_context=checkpointing_context
        )
    elif args.non_persistent_ckpt_type == "local":
        intermediate_state_dict, checkpoint_name = checkpointing_context[
            'local_checkpoint_manager'
        ].load()
        state_dict = intermediate_state_dict.to_state_dict(
            sharded_state_dict,
            algo=args.non_persistent_local_ckpt_algo,
            parallelization_group = mpu.get_data_parallel_group(with_context_parallel=True)
        )
        return state_dict, checkpoint_name, False, CheckpointType.LOCAL
    else:
        raise NotImplementedError(f"Please use local or global non-persistent checkpoints (got: {args.non_persistent_ckpt_type})")


def _load_global_dist_base_checkpoint(
    load_dir, args, rank0, sharded_state_dict, iteration, release, checkpointing_context=None
):
    """ Load the base state_dict from the given directory containing the global distributed checkpoint """
    if rank0:
        checkpoint_name = find_checkpoint_rank_0(load_dir, iteration, release)
        state_dict = dist_checkpointing.load_common_state_dict(checkpoint_name)
        return state_dict, checkpoint_name, release, CheckpointType.GLOBAL

    if sharded_state_dict is None:
        assert not args.auto_detect_ckpt_format and not args.use_dist_ckpt, (
            args.auto_detect_ckpt_format,
            args.use_dist_ckpt,
        )
        raise RuntimeError(
            'Detected load from a distributed checkpoint, but neither --use-dist-ckpt nor --auto-detect-ckpt-format is set.'
        )

    checkpoint_name = get_checkpoint_name(load_dir, iteration, release, return_base_dir=True)
    load_strategy = get_default_load_sharded_strategy(
        checkpoint_name, cache_metadata=args.ckpt_assume_constant_structure
    )
    # NOTE: `args.ckpt_fully_parallel_load` applies to both persistent and non-persistent checkpoints.
    if args.ckpt_fully_parallel_load:
        if args.ckpt_fully_parallel_load_process_group == 'dp':
            process_group = mpu.get_data_parallel_group(with_context_parallel=True)
        elif args.ckpt_fully_parallel_load_process_group == 'ep_dp':
            process_group = mpu.get_expert_data_parallel_group()
        else:
            raise ValueError(f"Invalid load process group: {args.ckpt_fully_parallel_load_process_group}")

        load_strategy = FullyParallelLoadStrategyWrapper(
            load_strategy, process_group, exchange_algo=args.ckpt_fully_parallel_load_exchange_algo
        )
    if checkpointing_context is not None:
        checkpointing_context["load_strategy"] = load_strategy
    state_dict = dist_checkpointing.load(sharded_state_dict, checkpoint_name, load_strategy, strict=args.dist_ckpt_strictness)
    return state_dict, checkpoint_name, release, CheckpointType.GLOBAL


def _get_checkpoint_format(checkpoint_name, args):
    """Get the format of an existing checkpoint."""
    if MultiStorageClientFeature.is_enabled():
        msc = MultiStorageClientFeature.import_package()
        checkpoint_dir = msc.Path(checkpoint_name)
        is_torch_ckpt = any([f.name.startswith("mp_rank_0") for f in checkpoint_dir.iterdir()])
        is_torch_dcp = checkpoint_dir.joinpath(".metadata").exists()
    else:
        is_torch_ckpt = any([f.startswith("mp_rank_0") for f in os.listdir(checkpoint_name)])
        is_torch_dcp = os.path.exists(os.path.join(checkpoint_name, ".metadata"))

    ckpt_format = None
    if dist_checkpointing.check_is_distributed_checkpoint(checkpoint_name):
        ckpt_format = "torch_dist"
    elif is_torch_ckpt:
        ckpt_format = "torch"
    elif is_torch_dcp:
        ckpt_format = "torch_dcp"
        if getattr(args, "use_megatron_fsdp", False):
            ckpt_format = "fsdp_dtensor"
    else:
        raise NotImplementedError(f"unknown checkpoint format in {checkpoint_name}")

    return ckpt_format


def _load_base_checkpoint(
    load_dir,
    args,
    rank0=False,
    sharded_state_dict=None,
    checkpointing_context=None,
):
    """ Load the base state_dict from the given directory

    If rank0 is true, just loads rank 0 checkpoint, ignoring arguments.
    """
    # Try to load non-persistent checkpoint first
    non_persistent_global_dir = (
        args.non_persistent_global_ckpt_dir
        if args.non_persistent_global_ckpt_dir or load_dir is None
        else os.path.join(load_dir, _NON_PERSISTENT_CKPT_SUBDIR)
    )
    non_persistent_iteration = _get_non_persistent_iteration(
        non_persistent_global_dir, args, checkpointing_context
    )
    iteration, release = -1, False
    tracker_filename = 'because load directory is not defined'
    if load_dir is not None:
        tracker_filename = get_checkpoint_tracker_filename(load_dir)
        if isfile(tracker_filename):
            iteration, release = read_metadata(tracker_filename)

    # Allow user to specify the loaded iteration.
    if getattr(args, "ckpt_step", None):
        iteration = args.ckpt_step

    # Record the iteration loaded (stored separately from args to avoid
    # polluting checkpoints, since args is saved in checkpoints).
    set_loaded_iteration(iteration)

    if non_persistent_iteration != -1:  # there is a non-persistent checkpoint
        if non_persistent_iteration >= iteration:
            return _load_non_persistent_base_checkpoint(
                non_persistent_global_dir,
                args,
                rank0,
                sharded_state_dict,
                non_persistent_iteration,
                checkpointing_context,
            )
        else:
            print_rank_0('WARNING: non-persistent checkpoints are older than persistent checkpoint')

    # Otherwise we are dealing with global checkpoints
    # If no tracker file, return nothing
    if iteration == -1:
        if not rank0:
            print_rank_0('WARNING: could not find the metadata file {}'.format(tracker_filename))
            print_rank_0('    will not load any checkpoints and will start from random')
        # Conditionally exit if checkpoint not found.
        if args.exit_on_missing_checkpoint:
            print_rank_0(">> '--exit-on-missing-checkpoint' set ... exiting. <<")
            if torch.distributed.is_initialized():
                torch.distributed.barrier()
            sys.exit()

        return None, "", False, None

    # Determine the type of the checkpoint on disk.
    checkpoint_name = get_checkpoint_name(load_dir, iteration, release, return_base_dir=True)
    ckpt_format = _get_checkpoint_format(checkpoint_name, args)

    if not rank0:
        dist_infix = "distributed " if ckpt_format == "torch_dist" else ""
        if release:
            print_rank_0(f' loading release {dist_infix}checkpoint from {load_dir}')
        else:
            print_rank_0(
                f' loading {dist_infix}checkpoint from {load_dir} at iteration {iteration}'
            )

    ckpt_type = None

    # Handle global distributed checkpoint
    if ckpt_format == "torch_dist":
        return _load_global_dist_base_checkpoint(
            load_dir, args, rank0, sharded_state_dict, iteration, release, checkpointing_context=checkpointing_context
        )
    elif ckpt_format == "torch":
        ckpt_type = CheckpointType.LEGACY
        # Handle global legacy checkpoint
        if rank0:
            checkpoint_name = find_checkpoint_rank_0(load_dir, iteration, release)
        else:
            checkpoint_name = get_checkpoint_name(load_dir, iteration, release, return_base_dir=False)
        try:
            state_dict = torch.load(checkpoint_name, map_location='cpu')
        except ModuleNotFoundError:
            from megatron.legacy.fp16_deprecated import loss_scaler

            # For backward compatibility.
            if not rank0:
                print_rank_0(' > deserializing using the old code structure ...')
            sys.modules['fp16.loss_scaler'] = sys.modules['megatron.legacy.fp16_deprecated.loss_scaler']
            sys.modules['megatron.fp16.loss_scaler'] = sys.modules[
                'megatron.legacy.fp16_deprecated.loss_scaler'
            ]
            sys.modules['megatron.model'] = sys.modules['megatron.legacy.model']
            state_dict = torch.load(checkpoint_name, map_location='cpu')
            sys.modules.pop('fp16.loss_scaler', None)
            sys.modules.pop('megatron.fp16.loss_scaler', None)
            sys.modules.pop('megatron.model', None)
        except Exception as e:
            print('could not load the checkpoint')
            print(e)
            sys.exit()
    elif ckpt_format == "torch_dcp":
        ckpt_type = CheckpointType.TORCH_DCP

        if rank0:
            # _load_base_checkpoint is called from load_args_from_checkpoint. torch.distributed is not initialized.
            # Load only metadata.
            state_dict = {"args": None, "iteration": None}
            torch.distributed.checkpoint.load(
                state_dict=state_dict,
                checkpoint_id=checkpoint_name,
            )
        else:
            # _load_base_checkpoint is called from load_checkpoint with a proper state dict.
            state_dict = sharded_state_dict

            fs_storage_reader = torch.distributed.checkpoint.FileSystemReader(checkpoint_name)

            torch.distributed.checkpoint.load_state_dict(
                state_dict=state_dict,
                storage_reader=fs_storage_reader,
            )
    elif ckpt_format == "fsdp_dtensor":
        assert HAVE_MEGATRON_FSDP, "Should not be called if Megatron-FSDP is not available."
        if rank0:
            return {}, checkpoint_name, release, CheckpointType.FSDP_DTENSOR

        state_dict = sharded_state_dict
        raw_optimizer_state_dict = state_dict["optimizer"].copy() if "optimizer" in state_dict else None
        raw_model_state_dict = state_dict["model"].copy() if "model" in state_dict else None
        model = state_dict.pop("_model")
        state_dict = preprocess_fsdp_dtensor_state_dict(args, state_dict, model[0])

        ckpt_type = CheckpointType.FSDP_DTENSOR
        fs_storage_reader = torch.distributed.checkpoint.FileSystemReader(checkpoint_name)
        allow_partial_load = not getattr(args, 'strict_fsdp_dtensor_load', False)
        if allow_partial_load:
            state_dict_metadata = fs_storage_reader.read_metadata().state_dict_metadata
            rank = torch.distributed.get_rank()
            import time as _time
            _time.sleep(rank * 0.001)  # Make that logs of different ranks do not overlap
            print_diff_in_state_dicts(state_dict_metadata, state_dict)

        planner = default_planner.DefaultLoadPlanner(allow_partial_load=allow_partial_load)
        torch.distributed.checkpoint.load_state_dict(
            state_dict=state_dict,
            storage_reader=fs_storage_reader,
            planner=planner,
        )

        if raw_optimizer_state_dict is not None:
            state_dict["optimizer"] = raw_optimizer_state_dict

        if raw_model_state_dict is not None:
            state_dict["model"] = raw_model_state_dict
    else:
        raise NotImplementedError(f"checkpoint format {ckpt_format} not supported")

    return state_dict, checkpoint_name, release, ckpt_type


def load_args_from_checkpoint(
    args, load_arg='load', checkpointing_context=None
):
    """Set required arguments from the checkpoint specified in the
    arguments.

    Will overwrite arguments that have a non-None default value, but
    will leave any arguments that default to None as set.

    Returns the same args NameSpace with the new values added/updated.

    If no checkpoint is specified in args, or if the checkpoint is
    there but invalid, the arguments will not be modified

    """
    load_dir = getattr(args, load_arg)

    if load_dir is None:
        print_rank_0('No load directory specified, using provided arguments.')
        return args

    state_dict, checkpoint_name, release, ckpt_type = _load_base_checkpoint(
        load_dir,
        args,
        rank0=True,
        checkpointing_context=checkpointing_context,
    )

    # Args.
    if not state_dict:
        print_rank_0('Checkpoint not found to provide arguments, using provided arguments.')
        return args

    if 'args' not in state_dict:
        print_rank_0('Checkpoint provided does not have arguments saved, using provided arguments.')
        return args

    checkpoint_args = state_dict['args']
    checkpoint_version = state_dict.get('checkpoint_version', 0)
    args.iteration = state_dict['iteration']

    # One-off conversion for foundation models
    if hasattr(checkpoint_args, 'disable_bias_linear'):
        setattr(
            checkpoint_args, 'add_bias_linear', not getattr(checkpoint_args, 'disable_bias_linear')
        )

    # Backward compat: old checkpoints have hybrid_override_pattern but not hybrid_layer_pattern
    if (getattr(checkpoint_args, 'hybrid_override_pattern', None) is not None
            and getattr(checkpoint_args, 'hybrid_layer_pattern', None) is None):
        setattr(
            checkpoint_args, 'hybrid_layer_pattern',
            getattr(checkpoint_args, 'hybrid_override_pattern'),
        )
        # num_layers is now derived from hybrid_layer_pattern in validate_args, and should not be
        # set at the same time as hybrid_layer_pattern.
        if hasattr(checkpoint_args, 'num_layers'):
            setattr(checkpoint_args, 'num_layers', None)

    def _set_arg(arg_name, old_arg_name=None, force=False):
        if not force and getattr(args, arg_name, None) is not None:
            return

        if old_arg_name is not None:
            checkpoint_value = getattr(checkpoint_args, old_arg_name, None)
        else:
            checkpoint_value = getattr(checkpoint_args, arg_name, None)

        if checkpoint_value is not None:
            print_rank_0(f"Setting {arg_name} to {checkpoint_value} from checkpoint")
            setattr(args, arg_name, checkpoint_value)
        else:
            print_rank_0(f"Checkpoint did not provide arguments {arg_name}")

    # Model args.
    _set_arg('num_layers')
    _set_arg('hidden_size')
    _set_arg('ffn_hidden_size')
    _set_arg('seq_length')
    _set_arg('num_attention_heads')
    _set_arg('num_query_groups', force=True)
    _set_arg('group_query_attention', force=True)
    _set_arg('kv_channels')
    _set_arg('max_position_embeddings')
    _set_arg('position_embedding_type', force=True)
    _set_arg('add_position_embedding', force=True)
    _set_arg('use_rotary_position_embeddings', force=True)
    _set_arg('rotary_base', force=True)
    _set_arg('rotary_percent', force=True)
    _set_arg('rotary_interleaved', force=True)
    _set_arg('add_bias_linear', force=True)
    _set_arg('add_qkv_bias', force=True)
    _set_arg('squared_relu', force=True)
    _set_arg('swiglu', force=True)
    _set_arg('untie_embeddings_and_output_weights', force=True)
    _set_arg('apply_layernorm_1p', force=True)
    _set_arg('normalization', force=True)
    _set_arg('apply_query_key_layer_scaling', force=True)
    _set_arg('attention_dropout', force=True)
    _set_arg('hidden_dropout', force=True)

    # Legacy MTP pattern for old checkpoints
    _set_arg('mtp_hybrid_override_pattern', force=True)
    _set_arg('mtp_num_layers', force=True)
    _set_arg('mtp_use_repeated_layer', force=True)

    _set_arg('spec', force=True)

    _set_arg('num_experts', force=True)
    _set_arg('mtp_num_layers', force=True)
    _set_arg('moe_layer_freq', force=True)
    if getattr(checkpoint_args, 'num_experts', None) is not None:
        _set_arg('moe_ffn_hidden_size', force=True)
    else:
        setattr(args, 'moe_ffn_hidden_size', None)
    _set_arg('moe_router_topk', force=True)
    _set_arg('moe_token_dispatcher_type', force=False)
    _set_arg('moe_router_pre_softmax', force=True)
    _set_arg('moe_grouped_gemm', force=True)
    _set_arg('moe_shared_expert_intermediate_size', force=True)
    _set_arg('moe_router_score_function', force=True)
    _set_arg('moe_router_enable_expert_bias', force=True)
    _set_arg('moe_router_topk_scaling_factor', force=True)

    # Mamba args.
    _set_arg('mamba_state_dim', force=True)
    _set_arg('mamba_head_dim', force=True)
    _set_arg('mamba_num_groups', force=True)
    _set_arg('mamba_num_heads', force=True)
    # We need to be able to override hybrid_layer_pattern from the command-line so that different
    # pipelining can be specified when re-loading a model (e.g. for inference or post-training).
    _set_arg('hybrid_layer_pattern')

    # Heterogeneous args.
    _set_arg('heterogeneous_layers_config_path', force=True)
    _set_arg('heterogeneous_layers_config_encoded_json', force=True)

    # MoE latent projection.
    _set_arg('moe_latent_size', force=True)

    # Tokenizer args.
    if args.use_tokenizer_model_from_checkpoint_args:
        # Using checkpoint version might not always be safe (e.g., if running on different cluster).
        _set_arg('tokenizer_model', force=True)
        _set_arg('tokenizer_type', force=True)
    _set_arg('tiktoken_pattern', force=True)
    _set_arg('padded_vocab_size')

    # Checkpoint args.
    _set_arg('ckpt_format')

    # Model parallelism args.
    if args.use_mp_args_from_checkpoint_args:
        if checkpoint_version < 3.0:
            _set_arg('tensor_model_parallel_size', 'model_parallel_size')
        else:
            _set_arg('tensor_model_parallel_size', force=True)
            _set_arg('pipeline_model_parallel_size', force=True)
            _set_arg('virtual_pipeline_model_parallel_size', force=True)
            _set_arg('num_layers_per_virtual_pipeline_stage')
            _set_arg('expert_model_parallel_size', force=True)

    return args, checkpoint_args


def load_checkpoint(ddp_model, optimizer, opt_param_scheduler, load_arg='load', strict=True,
                    checkpointing_context=None, skip_load_to_model_and_opt=False, tp_group: Optional[torch.distributed.ProcessGroup] = None, pp_group: Optional[torch.distributed.ProcessGroup] = None, dp_cp_group: Optional[torch.distributed.ProcessGroup] = None):
    """Load a model checkpoint and return the iteration.
    strict (bool): whether to strictly enforce that the keys in
        :attr:`state_dict` of the checkpoint match the names of
        parameters and buffers in model.
    skip_load_to_model_and_opt (bool): whether to call `load_state_dict`
        for :attr:`model` and :attr:`optimizer`. In case of running FSDP2 with mcore distributed
        checkpointing, the tensors are already loaded in-place by `_load_base_checkpoint`.
    dp_cp_group: Data parallel + context parallel group (default: None, falls back to mpu API)
    """
    args = get_args()
    load_dir = getattr(args, load_arg)

    # Finetuning directories
    pretrained_dir = getattr(args, 'pretrained_checkpoint', None)
    if pretrained_dir is not None and not checkpoint_exists(load_dir):
        print_rank_0(
            f'Checkpoint file not found in load directory {load_dir} attempting to finetune with checkpoint in {pretrained_dir}'
        )
        load_dir = pretrained_dir
        if not checkpoint_exists(load_dir):
            raise FileNotFoundError("No checkpoint found in load directory or pretrained directory")
        args.finetune = True

    model = unwrap_model(ddp_model)

    ckpt_format = args.ckpt_format
    if args.auto_detect_ckpt_format or ckpt_format == "torch_dist":
        state_dict, checkpoint_name, release, ckpt_type = _load_base_checkpoint(
            load_dir,
            args,
            rank0=True,
            checkpointing_context=checkpointing_context,
        )

        ckpt_format = None
        if ckpt_type == CheckpointType.TORCH_DCP:
            ckpt_format = "torch_dcp"
        elif ckpt_type == CheckpointType.FSDP_DTENSOR:
            ckpt_format = "fsdp_dtensor"
        elif ckpt_type == CheckpointType.LEGACY:
            ckpt_format = "torch"
        elif ckpt_type in [CheckpointType.LOCAL, CheckpointType.GLOBAL]:
            ckpt_format = "torch_dist"
        elif ckpt_type == None:
            pass    # Not loaded.
        else:
            raise NotImplementedError(f"checkpoint format {ckpt_format} not supported")

    load_kwargs = {}
    ignore_rng_state = False
    ignore_rerun_state = True
    if ckpt_format == "torch_dist":
        ckpt_args = types.SimpleNamespace()
        if state_dict is not None and "args" in state_dict:
            ckpt_args = state_dict.get("args")

        if not hasattr(ckpt_args, "tensor_model_parallel_size"):
            print_rank_0("WARNING: TP size not found in checkpoint args, using 1 as default.")
        if not hasattr(ckpt_args, "pipeline_model_parallel_size"):
            print_rank_0("WARNING: PP size not found in checkpoint args, using 1 as default.")

        ckpt_tp_pp = (
            getattr(ckpt_args, "tensor_model_parallel_size", 1),
            getattr(ckpt_args, "pipeline_model_parallel_size", 1),
        )
        run_tp_pp = (
            args.tensor_model_parallel_size,
            args.pipeline_model_parallel_size,
        )

        ckpt_world_size = getattr(ckpt_args, 'world_size', 0)
        run_world_size = getattr(args, 'world_size', 0)
        ckpt_dp = getattr(ckpt_args, 'data_parallel_size', 0)
        run_dp = getattr(args, 'data_parallel_size', 0)
        mismatch_msg = "(TP, PP) mismatch after resume ({} vs {} from checkpoint)".format(
            run_tp_pp, ckpt_tp_pp
        )

        # Determine if RNG state will be loaded
        if (ckpt_tp_pp == run_tp_pp and not release and not args.finetune and not args.no_load_rng
                and not getattr(ckpt_args, 'no_save_rng', False)):
            if tp_group is None and pp_group is None:
                tp_group = mpu.get_tensor_model_parallel_group()
                pp_group = mpu.get_pipeline_model_parallel_group()
            gen_sd_rng_state = get_rng_state(args.ckpt_format, tp_group, pp_group)  # we can load the rng state
        else:
            ignore_rng_state = True
            gen_sd_rng_state = None
            if ckpt_tp_pp != run_tp_pp:
                print_rank_0("{}: RNG state will be ignored".format(mismatch_msg))

        if ckpt_type == CheckpointType.LOCAL:
            sharded_sd_metadata = _build_sharded_state_dict_metadata(args)
        else:
            sharded_sd_metadata = dist_checkpointing.load_content_metadata(preloaded_state_dict=state_dict)
        print_rank_0(f'sharded_state_dict metadata loaded from the checkpoint: {sharded_sd_metadata}')

        # Determine if optimizer state will be loaded
        if (not release and not args.finetune and not args.no_load_optim
                and not getattr(ckpt_args, 'no_save_optim', False)):
            gen_sd_optim = optimizer
            gen_sd_opt_param_scheduler = opt_param_scheduler

            if args.use_distributed_optimizer:
                if sharded_sd_metadata is None:
                    # Backward-compatibility with old checkpoints which don't have content versioning
                    # Can be removed after ending support for MLM optimizer checkpoints with MCore < v0.13
                    # (for MCore v0.13+ checkpoints `sharded_sd_metadata is not None`)
                    sharded_sd_metadata = {
                        'distrib_optim_sharding_type': ('fully_sharded_model_space'
                                                        if getattr(ckpt_args, 'ckpt_fully_parallel_save', False)
                                                        else 'dp_zero_gather_scatter'),
                    }
                if (
                    ckpt_tp_pp != run_tp_pp
                    and sharded_sd_metadata['distrib_optim_sharding_type']
                    not in DistributedOptimizer.checkpoint_fully_reshardable_formats
                ):
                    raise RuntimeError(f"{mismatch_msg}: not supported for DistributedOptimizer with sharding type"
                                       f" {sharded_sd_metadata['distrib_optim_sharding_type']}."
                                       f" Please use `--ckpt-fully-parallel-save` flag during checkpoint saving.")

                # Check if fully parallel load is compatible with sharding type
                if args.ckpt_fully_parallel_load and sharded_sd_metadata['distrib_optim_sharding_type'] == 'dp_zero_gather_scatter':
                    raise RuntimeError("Fully parallel load is not supported for dp_zero_gather_scatter checkpoints. "
                                       "Please remove --ckpt-fully-parallel-load flag")
        else:
            gen_sd_optim = None
            gen_sd_opt_param_scheduler = None

        if dp_cp_group is None:
            dp_cp_group = mpu.get_data_parallel_group(with_context_parallel=True)

        # dist_checkpointing.load_content_metadata(...) may return None.
        # Ensure we have a dict before updating to avoid NoneType AttributeError.
        if sharded_sd_metadata is None:
            sharded_sd_metadata = {}
        sharded_sd_metadata["dp_cp_group"] = dp_cp_group

        optim_sd_kwargs = dict(metadata=sharded_sd_metadata, is_loading=True)
        model_sd_kwargs = dict(metadata=sharded_sd_metadata)

        # Determine if rerun state will be loaded
        gen_sd_rerun_state = None
        if (
            ckpt_world_size == run_world_size
            and ckpt_tp_pp == run_tp_pp
            and ckpt_dp == run_dp
            and not release
            and not args.finetune
            and 'rerun_state_machine' in state_dict
        ):
            rerun_state_machine = get_rerun_state_machine()
            if rerun_state_machine.validate_state_dict(state_dict['rerun_state_machine']):
                gen_sd_rerun_state = rerun_state_machine.state_dict(
                    data_iterator=None, ckpt_format=ckpt_format, force=True,
                )
                ignore_rerun_state = False
        if (
            ckpt_world_size != run_world_size
            or ckpt_tp_pp != run_tp_pp
            or ckpt_dp != run_dp
        ):
            print_rank_0("Job sharding has changed: Rerun state will be ignored")

        # [ModelOpt]: Initial loading from non-resume sharded checkpoint to a Distillation Model
        # will result in key mismatch with loss modules potentially containing parameters, since
        # it requires generating a state_dict before loading. Here we hide those modules if present.
        with contextlib.ExitStack() as stack:  # Allows multiple context managers for each model shard
            if args.finetune and hasattr(model[0], "hide_loss_modules"):
                for m in model:
                    stack.enter_context(m.hide_loss_modules())
            load_kwargs['sharded_state_dict'] = generate_state_dict(
                args, model, gen_sd_optim, gen_sd_opt_param_scheduler, gen_sd_rng_state,
                optim_sd_kwargs=optim_sd_kwargs, model_sd_kwargs=model_sd_kwargs,
                rerun_state=gen_sd_rerun_state
            )
    elif args.ckpt_format == "torch_dcp":
        model_sd = model[0].state_dict()
        optimizer_sd = optimizer.state_dict(is_loading=True)
        if tp_group is None and pp_group is None:
            tp_group = mpu.get_tensor_model_parallel_group()
            pp_group = mpu.get_pipeline_model_parallel_group()
        sharded_state_dict = {
            "model": model_sd,
            "optimizer": optimizer_sd,
            "args": None,
            "iteration": 1,
            "rng_state": get_rng_state(args.ckpt_format, tp_group, pp_group),
            "checkpoint_version": None,
            "opt_param_scheduler": opt_param_scheduler.state_dict(),
            "num_floating_point_operations_so_far": 0,
        }
        load_kwargs["sharded_state_dict"] = sharded_state_dict
    elif args.ckpt_format == "fsdp_dtensor":
        reader = FileSystemReader(get_load_checkpoint_path_by_args(args))
        try:
            state_dict_metadata = reader.read_metadata().state_dict_metadata
        except FileNotFoundError:
            state_dict_metadata = {}

        gen_sd_rerun_state = {}
        gen_sd_opt_param_scheduler = None
        gen_sd_rng_state = None
        gen_sd_optim = None
        if not args.finetune:
            if "rerun_state_machine" in state_dict_metadata:
                gen_sd_rerun_state = get_rerun_state_machine().state_dict(
                    data_iterator=None, ckpt_format=ckpt_format, force=True,
                )
            if not args.no_load_rng:
                gen_sd_rng_state = get_rng_state(args.ckpt_format, tp_group, pp_group)
            if not args.no_load_optim:
                gen_sd_optim = optimizer
                gen_sd_opt_param_scheduler = opt_param_scheduler

        optim_sd_kwargs = dict(metadata=_build_sharded_state_dict_metadata(args), is_loading=True)

        state_dict = generate_state_dict(
            args,
            model=model,
            optimizer=gen_sd_optim,
            opt_param_scheduler=gen_sd_opt_param_scheduler,
            rng_state=gen_sd_rng_state,
            optim_sd_kwargs=optim_sd_kwargs,
            rerun_state=gen_sd_rerun_state,
            iteration=1,
        )
        state_dict["_model"] = model
        load_kwargs["sharded_state_dict"] = state_dict

    state_dict, checkpoint_name, release, ckpt_type = _load_base_checkpoint(
        load_dir, args, rank0=False, checkpointing_context=checkpointing_context,
        **load_kwargs
    )

    # Checkpoint not loaded.
    if state_dict is None:
        # Iteration and num_floating_point_operations_so_far default to 0.
        return 0, 0

    # Set checkpoint version.
    set_checkpoint_version(state_dict.get('checkpoint_version', 0))

    # Convert to regular torch tensor to DTensor.
    if ckpt_type == CheckpointType.LEGACY and args.ckpt_format == "torch_dcp":
        dtensor_state_dict = _to_dtensor(ddp_model, state_dict["model"])
        state_dict["model"] = dtensor_state_dict

    # Set iteration.
    if args.finetune or release:
        iteration = 0
    else:
        try:
            iteration = state_dict['iteration']
        except KeyError:
            try:  # Backward compatible with older checkpoints
                iteration = state_dict['total_iters']
            except KeyError:
                print_rank_0('A metadata file exists but unable to load '
                             'iteration from checkpoint {}, exiting'.format(checkpoint_name))
                sys.exit()
    num_floating_point_operations_so_far = state_dict.get('num_floating_point_operations_so_far', 0)

    # Check arguments.
    if 'args' in state_dict and not args.finetune:
        checkpoint_args = state_dict['args']
        check_checkpoint_args(checkpoint_args)
        args.consumed_train_samples = getattr(checkpoint_args,
                                              'consumed_train_samples', 0)
        args.skipped_train_samples = getattr(checkpoint_args,
                                             'skipped_train_samples', 0)
        update_num_microbatches(consumed_samples=args.consumed_train_samples, verbose=True)
        args.consumed_valid_samples = getattr(checkpoint_args,
                                              'consumed_valid_samples', 0)
    else:
        print_rank_0('could not find arguments in the checkpoint ...')

    def load_model_state_dict(module, state_dict, strict: bool):
        """Helper function to load state dict with fallback for missing extra states."""
        try:
            module.load_state_dict(state_dict, strict=strict)
        except Exception as e:
            if strict:
                # Fallback support for backward compatibility breaking changes in TransformerEngine
                load_return = module.load_state_dict(state_dict, strict=False)
                print(f"load_return: {load_return}")
    # Model.
    if not skip_load_to_model_and_opt:
        if len(ddp_model) == 1:
            load_model_state_dict(ddp_model[0], state_dict['model'], strict)
        else:
            for i in range(len(ddp_model)):
                # If there is no corresponding model in the state_dict, it will be ignored.
                # It means that this is an empty stage.
                if 'model%d' % i not in state_dict:
                    continue
                load_model_state_dict(ddp_model[i], state_dict['model%d' % i], strict)
    # Fix up query/key/value matrix ordering if needed.
    checkpoint_version = get_checkpoint_version()
    print_rank_0(f' checkpoint version {checkpoint_version}')
    fix_query_key_value_ordering(model, checkpoint_version)

    # Optimizer.
    if not release and not args.finetune and not args.no_load_optim:
        try:
            # Load state dict.
            if getattr(args, "optimizer", "adam").startswith("dist_") and args.ckpt_format == 'torch':
                # LayerWiseDistributedOptimizer load optimizer state from file on different ranks
                dp_rank = mpu.get_data_parallel_rank()
                optim_checkpoint_name = os.path.join(os.path.dirname(checkpoint_name), f"layer_wise_optimizer_{dp_rank}.pt")
                optimizer.load_state_dict_from_file(optim_checkpoint_name)
            elif not skip_load_to_model_and_opt and optimizer is not None and not optimizer.is_stub_optimizer:
                optimizer.load_state_dict(state_dict['optimizer'])

            # Load distributed optimizer's custom parameter state.
            # For distributed checkpoint it's already loaded in load_state_dict above
            is_torch_dist = ckpt_format == "torch_dist"
            if args.use_distributed_optimizer and not is_torch_dist and ckpt_format not in ["torch_dcp", "fsdp_dtensor"]:
                # NOTE: this is a manual read of the tracker file.
                # This code should not be reached when reading from a non_persistent checkpoint
                assert not is_torch_dist
                tracker_filename = get_checkpoint_tracker_filename(load_dir)
                iteration, release = read_metadata(tracker_filename)
                model_checkpoint_name = \
                    get_checkpoint_name(load_dir, iteration, release)
                optim_checkpoint_name = \
                    get_distributed_optimizer_checkpoint_name(
                        model_checkpoint_name)
                optimizer.load_parameter_state(optim_checkpoint_name,
                                               update_legacy_format=args.ckpt_convert_update_legacy_dist_opt_format)

            # Load scheduler.
            if opt_param_scheduler is not None:
                if 'lr_scheduler' in state_dict: # backward compatbility
                    opt_param_scheduler.load_state_dict(state_dict['lr_scheduler'])
                else:
                    opt_param_scheduler.load_state_dict(state_dict['opt_param_scheduler'])
        except KeyError as e:
            print_rank_0('Unable to load optimizer from checkpoint {}. '
                         'Specify --no-load-optim or --finetune to prevent '
                         'attempting to load the optimizer state, '
                         'exiting ...'.format(checkpoint_name))
            raise e
    else:
        if (args.fp16 or args.bf16) and optimizer is not None:
            if args.load_main_params_from_ckpt:
                optimizer.reload_model_params(state_dict=state_dict)
            else:
                optimizer.reload_model_params()

    # rerun state
    if not ignore_rerun_state:
        try:
            if 'rerun_state_machine' in state_dict:
                get_rerun_state_machine().load_state_dict(state_dict['rerun_state_machine'])
        except Exception as e:
            print_rank_0(f"Unable to restore RerunMachine from checkpoint: {e}. Skipping.")

    # rng states.
    if not release and not args.finetune and not args.no_load_rng and not ignore_rng_state:
        try:
            cuda_rng_tracker = tensor_parallel.get_cuda_rng_tracker()
            graph_safe_rng = tensor_parallel.is_graph_safe_cuda_rng_tracker(cuda_rng_tracker)
            if 'rng_state' in state_dict:
                if args.ckpt_format == "fsdp_dtensor":
                    # FSDP DTensor checkpoints store rng_state in a different format.
                    tp_rank = mpu.get_tensor_model_parallel_rank()
                    pp_rank = mpu.get_pipeline_model_parallel_rank()
                    if f"({pp_rank}, {tp_rank})" in state_dict['rng_state']:
                        rng_state = state_dict['rng_state'][f"({pp_rank}, {tp_rank})"]
                    else:
                        print_rank_0("WARNING: RNG state not found for current TP/PP rank")
                        rng_state = next(iter(state_dict['rng_state'].values()))
                else:
                    rng_state = state_dict['rng_state']

                # access rng_state for data parallel rank
                if args.data_parallel_random_init:
                    rng_state = rng_state[mpu.get_data_parallel_rank()]
                else:
                    rng_state = rng_state[0]
                random.setstate(rng_state['random_rng_state'])
                np.random.set_state(rng_state['np_rng_state'])
                torch.set_rng_state(rng_state['torch_rng_state'])
                torch.cuda.set_rng_state(rng_state['cuda_rng_state'])
                # Check for empty states array
                if not rng_state['rng_tracker_states']:
                    raise KeyError
                rng_tracker_states = {
                    k: tensor_parallel.convert_cuda_rng_state(v, to_graphable=graph_safe_rng)
                    for k, v in rng_state['rng_tracker_states'].items()
                }
            else:  # backward compatability
                random.setstate(state_dict['random_rng_state'])
                np.random.set_state(state_dict['np_rng_state'])
                torch.set_rng_state(state_dict['torch_rng_state'])
                torch.cuda.set_rng_state(state_dict['cuda_rng_state'])
                # Check for empty states array
                if not state_dict['rng_tracker_states']:
                    raise KeyError
                rng_tracker_states = {
                    k: tensor_parallel.convert_cuda_rng_state(v, to_graphable=graph_safe_rng)
                    for k, v in state_dict['rng_tracker_states'].items()
                }
            cuda_rng_tracker.set_states(rng_tracker_states)
        except KeyError:
            print_rank_0('Unable to load rng state from checkpoint {}. '
                         'Specify --no-load-rng or --finetune to prevent '
                         'attempting to load the rng state, '
                         'exiting ...'.format(checkpoint_name))
            sys.exit()

    # Some utilities want to load a checkpoint without distributed being initialized
    if torch.distributed.is_initialized():
        torch.distributed.barrier()

    print_rank_0(f'  successfully loaded checkpoint from {load_dir} '
                 f'[ t {mpu.get_tensor_model_parallel_rank() + 1}/{mpu.get_tensor_model_parallel_world_size()}, '
                 f'p {mpu.get_pipeline_model_parallel_rank() + 1}/{mpu.get_pipeline_model_parallel_world_size()} ] '
                 f'at iteration {iteration}')
                 
    if has_nvidia_modelopt:
        print_distributed_quant_summary(model, msg="After loading checkpoint")
        
    # Additional callback for wandb (last rank)
    if not torch.distributed.is_initialized() \
       or is_last_rank():
        wandb_utils.on_load_checkpoint_success(checkpoint_name, load_dir)

    torch.cuda.empty_cache()

    if iteration > 0:
        # Notify FT that a checkpoint was loaded.
        is_local_chkpt = (ckpt_type == CheckpointType.LOCAL)
        ft_integration.on_checkpoint_loaded(is_local_chkpt=is_local_chkpt)

    # Patch checkpoint as needed if required field is not found.
    if optimizer is not None:
        log_printed = False
        for param_group in optimizer.param_groups:
            if 'default_config' not in param_group:
                param_group['default_config'] = True
                if not log_printed:
                    print_rank_0(">>> Inserting 'default_config' field into optimizer.param_groups...")
                log_printed = True

    return iteration, num_floating_point_operations_so_far


def _to_dtensor(wrapped_model, model_state_dict):
    device_mesh = wrapped_model[0].device_mesh

    new_model_sd = dict()
    for k, v in model_state_dict.items():
        # FP8 extra state cannot be converted to dtensor yet.
        if "_extra_state" in k:
            new_model_sd[k] = v
        else:
            new_model_sd[k] = torch.distributed.tensor.distribute_tensor(v, device_mesh)

    return new_model_sd


def load_biencoder_checkpoint(model, only_query_model=False,
                              only_context_model=False, custom_load_path=None):
    """
    selectively load retrieval models for indexing/retrieving
    from saved checkpoints
    """

    args = get_args()

    model = unwrap_model(model)

    load_path = custom_load_path if custom_load_path is not None else args.load

    tracker_filename = get_checkpoint_tracker_filename(load_path)

    with open_file(tracker_filename, 'r') as f:
        iteration = int(f.read().strip())

    checkpoint_name = get_checkpoint_name(load_path, iteration,
                                          args.use_distributed_optimizer,
                                          release=False)

    if mpu.get_data_parallel_rank() == 0:
        print('global rank {} is loading checkpoint {}'.format(
            torch.distributed.get_rank(), checkpoint_name))

    state_dict = torch.load(checkpoint_name, map_location='cpu')
    ret_state_dict = state_dict['model']

    if only_query_model:
        ret_state_dict.pop('context_model')
    if only_context_model:
        ret_state_dict.pop('query_model')

    assert len(model) == 1
    model[0].load_state_dict(ret_state_dict)
    torch.distributed.barrier()

    if mpu.get_data_parallel_rank() == 0:
        print(' successfully loaded {}'.format(checkpoint_name))

    return model


================================================
FILE: megatron/training/config/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

from megatron.training.config.common_config import (
    RNGConfig,
    ProfilingConfig,
    DistributedInitConfig,
)
from megatron.training.config.training_config import (
    TrainingConfig,
    ValidationConfig,
    SchedulerConfig,
    LoggerConfig,
    CheckpointConfig,
)
from megatron.training.config.resilience_config import (
    RerunStateMachineConfig,
    StragglerDetectionConfig,
)


================================================
FILE: megatron/training/config/common_config.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
from dataclasses import dataclass, field
from typing import Literal
import os

@dataclass(kw_only=True)
class RNGConfig:
    """Configuration settings for random number generation."""

    seed: int = 1234
    """Random seed used for python, numpy, pytorch, and cuda."""

    te_rng_tracker: bool = False
    """Use the Transformer Engine version of the random number generator.
    Required for CUDA graphs support."""

    inference_rng_tracker: bool = False
    """Use a random number generator configured for inference."""

    data_parallel_random_init: bool = False
    """Enable random initialization of params across data parallel ranks"""


@dataclass(kw_only=True)
class ProfilingConfig:
    """Configuration settings for profiling the training process."""

    use_nsys_profiler: bool = field(default=False, metadata={"argparse_meta": {"arg_names": ["--profile"], "dest": "profile"}})
    """Enable nsys profiling. When using this option, nsys options should be specified in
    commandline. An example nsys commandline is
    `nsys profile -s none -t nvtx,cuda -o <path/to/output_file> --force-overwrite true
    --capture-range=cudaProfilerApi --capture-range-end=stop`.
    """

    profile_step_start: int = 10
    """Global step to start profiling."""

    profile_step_end: int = 12
    """Global step to stop profiling."""

    use_pytorch_profiler: bool = False
    """Use the built-in pytorch profiler. Useful if you wish to view profiles in tensorboard."""

    pytorch_profiler_collect_shapes: bool = False
    """Collect tensor shape in pytorch profiler."""
  
    pytorch_profiler_collect_callstack: bool = False
    """Collect callstack in pytorch profiler."""
  
    pytorch_profiler_collect_chakra: bool = False                
    """Collect chakra trace in pytorch profiler."""

    profile_ranks: list[int] = field(default_factory=lambda: [])
    """Global ranks to profile."""

    record_memory_history: bool = False
    """Record memory history in last rank."""

    memory_snapshot_path: str = "snapshot.pickle"
    """Specifies where to dump the memory history pickle."""

    record_shapes: bool = False
    """Record shapes of tensors."""

    nvtx_ranges: bool = False
    """Enable NVTX range annotations for profiling. When enabled, inserts NVTX markers
    to categorize execution in profiler output."""


@dataclass(kw_only=True)
class DistributedInitConfig:
    """Configuration settings for distributed training initialization."""

    distributed_backend: Literal["nccl", "gloo"] = "nccl"
    """Which backend to use for distributed training."""

    distributed_timeout_minutes: int = 10
    """Timeout minutes for torch.distributed."""

    align_grad_reduce: bool = True
    """If not set, all PP stages will launch gradient reduces simultaneously.
    Otherwise, each PP stage will independently launch as needed.
    """

    local_rank: int = field(default_factory=lambda: int(os.getenv("LOCAL_RANK", "0")))
    """local rank passed from distributed launcher."""

    lazy_mpu_init: bool = False
    """If set to True, initialize_megatron() skips DDP initialization and returns function to complete it instead.
    Also turns on --use-cpu-initialization flag. This is for external DDP manager."""

    use_megatron_fsdp: bool = False
    """Use Megatron's Fully Sharded Data Parallel. Cannot be used together with use_torch_fsdp2."""

    use_torch_fsdp2: bool = False
    """Use the torch FSDP2 implementation. FSDP2 is not currently working with Pipeline Parallel.
    It is still not in a stable release stage, and may therefore contain bugs or other
    potential issues."""

    nccl_communicator_config_path: str | None = None
    """Path to the yaml file with NCCL communicator configurations. The number of min/max thread
    groups and thread group cluster size of each communicator can be configured by setting
    `min_ctas`, `max_ctas`, and `cga_cluster_size`."""

    use_tp_pp_dp_mapping: bool = False
    """If set, distributed ranks initialize order is changed from tp-cp-ep-dp-pp to tp-cp-ep-pp-dp.
    """

    enable_gloo_process_groups: bool = field(default=True, metadata={"argparse_meta": {"arg_names": ["--disable-gloo-process-groups"]}})
    """If enabled, create Gloo process groups for communications."""

    use_sharp: bool = False
    """Set the use of SHARP for the collective communications of data-parallel process groups.
    When `True`, run barrier within each data-parallel process group,
    which specifies the SHARP application target groups.
    """

    sharp_enabled_group: Literal["dp", "dp_replica"] | None = None
    """IB SHARP can be enabled from only one communication group.
    By default, it is enabled from dp group if not specified and use_sharp=True.
    Available options: [dp, dp_replica]
    """

    high_priority_stream_groups: list[str] | None = field(default_factory=list)
    """Specify which communicator groups should use high priority streams during creation.
    Assigning high priority to communication streams ensures that communication kernels
    are scheduled with higher priority, minimizing the exposed communication when it is
    overlapped with other computation kernels.
    """

    distributed_timeout_seconds_after_init: int | None = None
    """Timeout in seconds for process groups after initialization. This timeout is applied to all process groups after initialization and the first iteration completes."""

    flight_recorder_dump_path: str | None = None
    """Path for NCCL flight recorder trace dumps. Sets TORCH_FR_DUMP_TEMP_FILE and TORCH_NCCL_DEBUG_INFO_TEMP_FILE env variables before distributed init."""

    flight_recorder_trace_buffer_size: int = 2000
    """Size of the NCCL flight recorder trace buffer (TORCH_NCCL_TRACE_BUFFER_SIZE)."""

    flight_recorder_dump_on_timeout: bool = True
    """Dump flight recorder traces on NCCL timeout (TORCH_NCCL_DUMP_ON_TIMEOUT)."""

    flight_recorder_include_stack_trace: bool = False
    """Include stack traces in flight recorder dumps (TORCH_INCLUDE_STACK_TRACE)."""

    flight_recorder_include_only_active: bool = True
    """Include only active operations in flight recorder dumps (TORCH_INCLUDE_ONLY_ACTIVE)."""

    flight_recorder_extra_dump_on_exec: bool = True
    """Enable extra flight recorder dump on execution (TORCH_NCCL_EXTRA_DUMP_ON_EXEC)."""

    disable_jit_fuser: bool = False
    """Disable the JIT fuser."""


================================================
FILE: megatron/training/config/resilience_config.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
from dataclasses import dataclass
from typing import Literal

@dataclass(kw_only=True)
class RerunStateMachineConfig:
    """Configuration for the rerun state machine used for result validation or stats."""

    error_injection_rate: int = 0
    """Rate at which to inject unexpected results, e.g. 1000 means
    once every 1000 result validations"""

    error_injection_type: Literal["correct_result", "transient_error", "persistent_error"] = "transient_error"
    """Type of error to inject. """

    rerun_mode: Literal["disabled", "validate_results", "report_stats"] = "validate_results"
    """Use re-run engine to validate results (default) or to emit stats
    on variability of computations due to non-deterministic algorithms."""

    check_for_nan_in_loss: bool = True
    """Check for NaN in the loss."""

    check_for_spiky_loss: bool = False
    """Check for spiky loss."""


@dataclass(kw_only=True)
class StragglerDetectionConfig:
    """Configuration settings for detecting and logging GPU stragglers."""

    log_straggler: bool = False
    """If set, tracks and logs straggler per GPU."""

    straggler_ctrlr_port: int = 65535
    """Port number to toggle StragglerDetector on/off at runtime"""

    straggler_minmax_count: int = 1
    """Number of ranks to report with high/low estimated throughput"""

    disable_straggler_on_startup: bool = False
    """If set, StragglerDetector is disabled on startup."""


================================================
FILE: megatron/training/config/training_config.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
from dataclasses import dataclass, field
import signal
from typing import Literal, Optional

@dataclass(kw_only=True)
class TrainingConfig:
    """Configuration settings related to the training loop."""

    micro_batch_size: int | None = None
    """Batch size per model instance (local batch size). Global batch size is local batch size times
    data parallel size times number of micro batches."""

    global_batch_size: int | None = None
    """Training batch size. If set, it should be a multiple of micro-batch-size times
    data-parallel-size. If this value is None, then use micro-batch-size * data-parallel-size
    as the global batch size. This choice will result in 1 for number of micro-batches."""

    rampup_batch_size: list[int] | None = field(default=None, metadata={"argparse_meta": {"nargs": 3}})
    """Batch size ramp up with the following values: <start batch size>, <batch size increment>,
    <ramp-up samples>
    For example:
        rampup-batch-size = [16, 8, 300000]
        global-batch-size 1024
    will start with global batch size 16 and over (1024 - 16) / 8 = 126 intervals will increase
    the batch size linearly to 1024. In each interval we will use approximately
    300000 / 126 = 2380 samples.
    """

    decrease_batch_size_if_needed: bool = False
    """If set, decrease batch size if microbatch_size * dp_size does not 
    divide batch_size. Old batch_size will be restored if training is re-started 
    with dp_size that divides batch_size // microbatch_size."""

    empty_unused_memory_level: Literal[0, 1, 2] = 0
    """Call torch.cuda.empty_cache() each iteration (training and eval), to reduce fragmentation.
    0=off, 1=moderate, 2=aggressive.
    """

    check_weight_hash_across_dp_replicas_interval: int | None = None
    """Interval to check weight hashes are same across DP replicas. If not specified, weight hashes not checked."""

    train_sync_interval: int | None = None
    """Training CPU-GPU synchronization interval, to ensure that CPU is not running too far ahead of GPU."""

    train_iters: int | None = None
    """Total number of iterations to train over all training runs.
    Note that either train_iters or train_samples should be provided.
    """

    train_samples: int | None = None
    """Total number of samples to train over all training runs.
    Note that either train_iters or train_samples should be provided."""

    exit_interval: int | None = None
    """Exit the program after the iteration is divisible by this value."""

    exit_duration_in_mins: int | None = None
    """Exit the program after this many minutes."""

    exit_signal_handler: bool = False
    """Dynamically save the checkpoint and shutdown the training if SIGTERM is received"""

    exit_signal: signal.Signals = signal.SIGTERM
    """Signal for the signal handler to detect."""

    exit_signal_handler_for_dataloader: bool = False
    """Use signal handler for dataloader workers"""

    manual_gc: bool = False
    """Disable the threshold-based default garbage collector and trigger the garbage collection
    manually. Manual garbage collection helps to align the timing of the collection across ranks
    which mitigates the impact of CPU-associated jitters. When the manual gc is enabled, garbage
    collection is performed only at the start and the end of the validation routine by default."""

    manual_gc_interval: int = 0
    """Training step interval to trigger manual garbage collection. Values > 0 will trigger garbage
    collections between training steps.
    """

    manual_gc_eval: bool = True
    """When using manual garbage collection, this controls garbage collection at the start and the
    end of each evaluation run.
    """

    iterations_to_skip: list[int] = field(default_factory=list)
    """List of iterations to skip during training, empty by default."""


@dataclass(kw_only=True)
class ValidationConfig:
    """Configuration settings related to validation during or after model training."""

    eval_iters: int | None = 100
    """Number of iterations to run for evaluation. Used for both validation and test. If not set,
    evaluation will not run."""

    eval_interval: int | None = None
    """Interval between running evaluation on validation set. If not set, evaluation will not run
    during training.
    """

    skip_train: bool = False
    """If set, bypass the training loop, perform evaluation for validation/test, and exit."""

    test_mode: bool = False
    """Run all real-time test alongside the experiment."""

    full_validation: bool = False
    """If set, each time validation occurs it uses the full validation dataset(s). This currently only works for GPT datasets!"""

    multiple_validation_sets: bool = False
    """If set, multiple datasets listed in the validation split are evaluated independently with a
       separate loss for each dataset in the list. This argument requires that no weights are 
       included in the list.
    """


@dataclass(kw_only=True)
class SchedulerConfig:
    """Configuration settings for the learning rate scheduler and weight decay."""

    # ---------------- Learning rate config. ----------------
    lr_decay_style: Literal["constant", "linear", "cosine", "inverse-square-root", "WSD"] = "linear"
    """Learning rate decay function."""

    lr_wsd_decay_style: Literal["exponential", "linear", "cosine", "minus_sqrt"] = "exponential"
    """Decay style for the annealing phase of WSD"""

    lr_decay_iters: int | None = None
    """number of iterations to decay learning rate over, If None defaults to train iters"""

    lr_decay_samples: int | None = None
    """number of samples to decay learning rate over, If None defaults to train samples"""

    lr_wsd_decay_iters: int | None = None
    """number of iterations for the annealing phase in the wsd schedule"""

    lr_wsd_decay_samples: int | None = None
    """number of samples for the annealing phase in the wsd schedule"""

    lr_warmup_fraction: float | None = None
    """fraction of lr-warmup-(iters/samples) to use for warmup (as a float)"""

    lr_warmup_iters: int = 0
    """number of iterations to linearly warmup learning rate over."""

    lr_warmup_samples: int = 0
    """number of samples to linearly warmup learning rate over."""

    lr_warmup_init: float = 0.0
    """Initial value for learning rate warmup. The scheduler starts warmup from this value."""

    lr_decay_steps: int | None = field(init=False, default=None)
    """number of samples to decay learning rate over. Calculated at runtime from 
    lr_decay_iters or lr_decay_samples.
    """

    lr_warmup_steps: int | None = field(init=False, default=None)
    """number of samples to warmup learning rate over. Calculated at runtime from
    lr_warmup_fraction, lr_warmup_iters, or lr_warmup_samples.
    """
    
    override_opt_param_scheduler: bool = field(default=False, metadata={"argparse_meta": {"arg_names": ["--override-opt_param-scheduler", "--override-opt-param-scheduler"]}})
    """Reset the values of the scheduler (learning rate, warmup iterations, minimum learning rate,
    maximum number of iterations, and decay style) from input arguments and ignore values from
    checkpoints. Note that all the above values will be reset."""

    use_checkpoint_opt_param_scheduler: bool = field(default=False, metadata={"argparse_meta": {"arg_names": ["--use-checkpoint-opt_param-scheduler", "--use-checkpoint-opt-param-scheduler"]}})
    """Use checkpoint to set the values of the scheduler (learning rate, warmup iterations,
    minimum learning rate, maximum number of iterations, and decay style) from checkpoint
    and ignore input arguments."""

    # ---------------- Regularization config. ----------------

    start_weight_decay: float | None = None
    """Initial weight decay coefficient for L2 regularization."""

    end_weight_decay: float | None = None
    """End of run weight decay coefficient for L2 regularization."""

    weight_decay_incr_style: Literal["constant", "linear", "cosine"] = "constant"
    """Weight decay increment function."""

    no_weight_decay_cond_type: Literal["qwen3_next"] | None = None
    """Type of no weight decay condition. Choices:
    None (default): param no weight decay if and only if it is 1D; or it is bias;
    or it is embedding and embedding_init_method_std is not None.
    "qwen3_next": In addition to the default rules, apply weight decay to qk layernorm as a special case."""

    wd_incr_steps: int | None = field(init=False, default=None)
    """Number of samples to increment weight decay over. Calculated at runtime."""

    wsd_decay_steps: int | None = field(init=False, default=None)
    """Number of samples to decay WSD weight decay. Calculated at runtime."""


@dataclass(kw_only=True)
class LoggerConfig:
    """Configuration settings for logging, including TensorBoard and WandB."""

    log_interval: int = 100
    """Report loss and timing interval."""

    log_params_norm: bool = False
    """If set, calculate and log parameters norm."""

    log_throughput: bool = False
    """If set, calculate and log throughput per GPU."""

    log_throughput_to_tensorboard: bool = False
    """Enable throughput logging to tensorboard."""

    throughput_window_size: int = 100
    """Number of batches to use for a rolling average of throughput."""

    log_progress: bool = False
    """If set, log progress (in terms of number of processed tokens and number of floating-point operations)
    to progress.txt file in checkpoint directory.
    """

    timing_log_level: Literal[0, 1, 2] = 0
    """Granularity level to measure and report timing.
    0: report only iteration time and make sure timing does not introduce extra overhead.
    1: report timing for operations that are executed very limited times (basically once) during each iteration
        (such as gradient all-reduce)
    2: report timing for operations that migh be executed numerous times during each iteration.
    Note that setting the level to 1 or 2 might cause increase in iteration time.
    """

    timing_log_option: Literal["max", "minmax", "all"] = "minmax"
    """Options for logging timing:
    max: report the max timing across all ranks
    minmax: report min and max timings across all ranks
    all: report timings of all ranks.
    """

    tensorboard_dir: str | None = None
    """Write TensorBoard logs to this directory."""

    tensorboard_log_interval: int = 1
    """Report to tensorboard interval."""

    tensorboard_queue_size: int = 1000
    """Size of the tensorboard queue for pending events and summaries
    before one of the 'add' calls forces a flush to disk.
    """

    log_timers_to_tensorboard: bool = False
    """If set, write timers to tensorboard."""

    log_loss_scale_to_tensorboard: bool = True
    """Disable loss-scale logging to tensorboard."""

    log_validation_ppl_to_tensorboard: bool = False
    """If set, write validation perplexity to tensorboard."""

    log_memory_to_tensorboard: bool = False
    """Enable memory logging to tensorboard."""

    memory_keys: dict[str, str] | None = None
    """Names of memory statistics to log from `torch.cuda.memory_stats()`"""

    log_memory_interval: int | None = None
    """Report memory interval."""

    log_device_memory_used: bool = False
    """Log device memory used (as reported by nvidia-smi)."""

    log_l2_norm_grad_to_tensorboard: bool = False
    """Enable gradients logging to tensorboard."""

    log_num_zeros_in_grad: bool = False
    """If set, calculate and log the number of zeros in gradient."""

    log_max_attention_logit: bool = False
    """Enable max attention logit logging to tensorboard."""

    log_runtime_to_tensorboard: bool = False
    """Enable runtime metrics logging to tensorboard."""

    runtime_time_unit: str = "hours"
    """Time unit to use for time logging. """

    barrier_with_L1_time: bool = field(default=True, metadata={"argparse_meta": {"arg_names": ["--no-barrier-with-level-1-timing"]}})
    """If not disabled, use barrier with level 1 time measurements. Note that this is up to the user to
    make sure calling barrier with their timers will not result in hangs. This can happen if for
    example the user adds a level 1 timer that is not called by all ranks.
    """

    log_world_size_to_tensorboard: bool = False
    """Enable world size logging to tensorboard."""

    wandb_project: str | None = None
    """The wandb project name. Ignore wandb by default."""

    wandb_exp_name: str | None = None
    """The wandb experiment name."""

    wandb_save_dir: str | None = None
    """Path to save the wandb results locally."""

    wandb_entity: str | None = None
    """The wandb entity name. It is useful when there are multiple sub-projects in a project."""

    logging_level: int | None = None
    """Set default logging level"""

    filter_warnings: bool = True
    """Filter out warning messages"""

    modules_to_filter: list[str] | None = None
    """List of modules to filter out from the logs"""

    set_level_for_all_loggers: bool = False
    """Set the logging level for all loggers. If False, only level for NeMo loggers will be set."""

    log_energy: bool = False
    """If set, log energy consumption (in Joules)."""

    save_config_filepath: str | None = None
    """If set, save the task configuration (ConfigContainer) to this file."""


@dataclass(kw_only=True)
class CheckpointConfig:
    """Configuration settings for model checkpointing (saving and loading)."""

    save: str | None = None
    """Output directory to save checkpoints to."""

    save_interval: int | None = field(default=None, metadata={"argparse_meta": {"arg_names": ["--save-interval", "--persistent-save-interval"]}})
    """Number of iterations between persistent checkpoint saves."""

    save_wgrads_interval: int | None = None
    """Number of iterations between wgrad (main_grad) saves."""

    save_dgrads_interval: int | None = None
    """Number of iterations between dgrad saves."""

    save_retain_interval: int | None = None
    """Number of iterations between retained checkpoints
    (other checkpoints except the last checkpoint are automatically deleted).
    """

    most_recent_k: int | None = -1
    """Number of latest checkpoint to be saved."""

    save_optim: bool = True
    """Do not save current optimizer."""

    save_rng: bool = True
    """Do not save current rng state."""

    load: str | None = None
    """Directory containing a model checkpoint."""

    load_optim: bool = True
    """Do not load optimizer when loading checkpoint."""

    load_main_params_from_ckpt: bool = False
    """Load main parameters from checkpoint. When loading a model from a checkpoint without loading
    the optimizer, the model parameters are updated but for fp16 optimizer with main parameters,
    the main parameters need to also be updated.
    """

    load_rng: bool = True
    """Do not load rng state when loading checkpoint."""

    non_persistent_save_interval: int | None = None
    """Number of iterations between non-persistent saves."""

    non_persistent_ckpt_type: Literal["global", "local", "in_memory"] | None = None
    """Type of non-persistent model checkpoints.
    "global" - Saved as a standard checkpoint (e.g., on Lustre) with old checkpoints being removed.
    "local" - [TBD] Each rank saves a portion of the checkpoint locally (e.g., on SSD/ramdisk).
    "in_memory" - [TBD] A special kind of local checkpoint that avoids serialization.
    None - No non-persistent checkpointing (default option)."""

    non_persistent_global_ckpt_dir: str | None = None
    """Directory containing global non-persistent model checkpoints."""

    non_persistent_local_ckpt_dir: str | None = None
    """Directory containing local non-persistent model checkpoints."""

    non_persistent_local_ckpt_algo: Literal["fully_parallel", "atomic"] = "fully_parallel"
    """Algorithm for local non-persistent checkpointing."""

    finetune: bool = False
    """Load model for finetuning. Do not load optimizer or rng state from checkpoint and set iteration to 0.
    Assumed when loading a release checkpoint."""

    pretrained_checkpoint: str | None = None
    """Directory containing a pretrained model checkpoint for finetuning."""

    ckpt_step: int | None = None
    """Checkpoint step to load model from."""

    use_checkpoint_args: bool = False
    """Override model-related command-line arguments with arguments from checkpoint"""

    use_mp_args_from_checkpoint_args: bool = False
    """Copy model parallelism command-line arguments from checkpoint"""

    use_tokenizer_model_from_checkpoint_args: bool = True
    """If set, do not use tokenizer model path from checkpoint"""

    exit_on_missing_checkpoint: bool = False
    """If 'load' is set, but checkpoint is not found (e.g., path typo), then exit instead of random initialization."""

    ckpt_format: Literal["torch", "torch_dist", "torch_dcp", "fsdp_dtensor"] = "torch_dist"
    """ Checkpoint format to use. torch is the format used by torch.save/load.
    torch_dist is a megatron built-in distributed checkpointing format.
    torch_dcp is the torch.distributed.checkpoint format.
    fsdp_dtensor is a torch DCP native, Megatron FSDP training-specific checkpoint format.
    """

    auto_detect_ckpt_format: bool = False
    """Determine if the checkpoint format is in legacy or distributed format. If False,
    expects distributed checkpoint iff args.ckpt_format != "torch". Might slow down 
    loading a bit (double rank0 ckpt load).
    """

    ckpt_convert_format: Literal["torch", "torch_dist"] | None = None
    """Checkpoint format for conversion."""

    ckpt_convert_save: str | None = None
    """Save directory for converted checkpoint."""

    ckpt_convert_update_legacy_dist_opt_format: bool = False
    """When loading a checkpoint, update the legacy format for the distributed optimizer,
    which previously used a merged param/grad buffer and a different bucket mapping.
    The legacy format was deprecated on Feb 13, 2024.
    """

    ckpt_fully_parallel_save: bool = True
    """Disable applying full save parallelization across DP for distributed checkpoints.
    Depending on ckpt format might decrease the number of files in the checkpoint.
    Makes DistributedOptimizer checkpoint non-reshardable."""

    async_save: bool = False
    """Apply async checkpointing save. Currently works only with `torch_dist` distributed checkpoint format."""

    use_persistent_ckpt_worker: bool = False
    """Use a persistent background worker for async checkpoint saves. When enabled, creates a dedicated
    worker thread/process for handling async saves. When disabled, uses temporal workers that are
    created and destroyed for each save operation."""

    async_ckpt_cpu_priority: int = 10
    """CPU nice value target (0-19, higher = lower priority) for the async checkpoint writer process.
    If it exceeds 19, it will be set to 19. If the current nice value is greater than the target, it will be left unchanged.
    Only applies when using persistent ckpt worker."""

    async_ckpt_io_priority: Optional[int] = 3
    """I/O scheduling class (0-3, 3=idle) for the async checkpoint writer process."""

    ckpt_fully_parallel_load: bool = False
    """Apply full load parallelization across DP for distributed checkpoints."""

    ckpt_fully_parallel_load_exchange_algo: Literal["broadcast", "gather_rounds", "gather_object"] = "broadcast"
    """Algorithm for fully parallel load of distributed checkpoints.
    "broadcast"(default): Broadcast the checkpoint from rank 0 to all other ranks.
    "gather_rounds": Gather the checkpoint from all ranks in rounds.
    "gather_object": Gather the checkpoint from all ranks in a single operation.
    """

    ckpt_fully_parallel_save_process_group: Literal["dp", "ep_dp"] = "dp"
    """Process group for fully parallel save of distributed checkpoints.
    "dp"(default): Data parallel process group.
    "ep_dp": Expert data parallel process group.
    """

    ckpt_fully_parallel_load_process_group: Literal["dp", "ep_dp"] = "dp"
    """Process group for fully parallel load of distributed checkpoints.
    "dp"(default): Data parallel process group.
    "ep_dp": Expert data parallel process group.
    """

    ckpt_assume_constant_structure: bool = False
    """Assume the checkpoint structure is constant across saves to enable optimizations."""

    strict_fsdp_dtensor_load: bool = True
    """Whether to enforce strict loading for FSDP DTensor checkpoints. When False, allows partial loading."""

    dist_ckpt_strictness: Literal[
        "assume_ok_unexpected",
        "log_unexpected",
        "log_all",
        "raise_unexpected",
        "raise_all",
        "return_unexpected",
        "return_all",
        "ignore_all",
    ] = "assume_ok_unexpected"
    """Determine handling of key mismatch during checkpoint load. Check StrictHandling docs for flags meaning.
    NOTE: This flag controls only distributed checkpoint load from storage, not loading state dict into the model."""

    dist_ckpt_save_pre_mcore_014: bool = False
    """Revert checkpointing simplifications introduced in Megatron-Core v0.14.
    This option affects only checkpoint saving format and will be removed soon
    (checkpoint load format is determined based on checkpoint metadata)."""

    dist_ckpt_optim_fully_reshardable: bool = False
    """Make optimizer distributed checkpoint fully reshardable (TP/PP/EP/DP) as opposed to plain DP reshardability."""

    distrib_optim_fully_reshardable_mem_efficient: bool = False
    """During distributed optimizer checkpoint save and load tries to use as little memory as possible
    by using Gloo (instead of NCCL) and only one rank for saving. Turn on only if experiencing host or device memory
    issues. Has affect only with `dist_ckpt_optim_fully_reshardable` flag."""

    save_tokenizer_assets: bool = True
    """Save tokenizer files to checkpoint directory. When enabled, saves all tokenizer artifacts
    (vocab files, special tokens, tokenizer config) to make checkpoints self-contained and portable.
    Set to False for performance-sensitive scenarios where tokenizer files are not needed."""

    replication: bool = False
    """If set, replication of local checkpoints is enabled. Needs to be enabled on all ranks."""

    replication_jump: int | None = None
    """Specifies `J`, the spacing between ranks storing replicas of a given rank's data. Replicas
    for rank `n` may be on ranks `n+J`, `n+2J`, ..., or `n-J`, `n-2J`, etc. This flag has an
    effect only if --replication is used. and must be consistent across all ranks."""

    replication_factor: int = 2
    """Number of machines storing the replica of a given rank's data."""


================================================
FILE: megatron/training/datasets/README.md
================================================
# Data Pipeline

## FIM dataset

`GPTFIMDataset` extends Megatron-Core’s `GPTDataset` to support **Fill-in-the-Middle (FIM)** data augmentation.
It probabilistically converts samples into FIM format using configurable rates, with support for both PSM and SPM patterns, fragment-level splitting, and length-preserving output.

`GPTFIMDatasetConfig` provides the configuration needed to enable this behavior.
`GPTFIMDatasetConfig` configuration object extending `GPTDatasetConfig` to enable FIM preprocessing.

**Attributes**

- `rate`: Probability of converting a sample into a FIM example. A value of `1.0` means FIM is always applied. a value of `0.0` means FIM is never applied.
- `spm_rate`: Probability of using the SPM FIM pattern (vs PSM). The remaining probability (`1 - spm_rate`) selects the PSM (prefix-suffix-middle) pattern instead. For example, if `spm_rate = 0.3`: 30% SPM, 70% PSM.
- `extra_tokens`: Dictionary containing the FIM special tokens: {"prefix", "middle", "suffix", "pad", "eod"}.
- `split_sample`: Optional token around which samples are split before applying FIM. If provided, the input sequence is divided at every occurrence of this token, and FIM is applied independently to each fragment. `A B C <SPLI_SAMPLE> D E F <SPLIT_SAMPLE> G H` -> `FIM(Fragment 1) <SPLI_SAMPLE> FIM(Fragment 2) <SPLI_SAMPLE> FIM(Fragment 3)`.
- `fragment_rate`: Probability of applying FIM to each fragment when split_sample is used.
- `no_prefix`: If the decoded sequence starts with this prefix, FIM is skipped.
`GPTFIMDataset` dataset class that loads token sequences from an `IndexedDataset` and applies FIM transformations before returning each sample.

**PSM Format**
```
[prefix_tok] prefix [suffix_tok] suffix [middle_tok] middle
```

**SPM Format**
```
[prefix_tok, suffix_tok] suffix [middle_tok] prefix middle
```

**Special cases:**

- If the sequence starts with no_prefix, FIM is skipped.
- If FIM is not applied, the sample is returned unchanged.

================================================
FILE: megatron/training/datasets/__init__.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.


================================================
FILE: megatron/training/datasets/data_samplers.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""Dataloaders."""


import random

import numpy as np
import torch
from torch.utils.data import Dataset

from megatron.core import mpu
from megatron.core.datasets.utils import Split

from megatron.training import get_args
from megatron.training.dist_signal_handler import DistributedSignalHandler


def build_pretraining_data_loader(dataset, consumed_samples):
    """Build dataloader given an input dataset."""

    if dataset is None:
        return None
    args = get_args()

    if hasattr(dataset, 'split'):
        split = dataset.split
    elif hasattr(dataset, 'index_split'):
        split = dataset.index_split
    else:
        split = None

    if split == Split.valid and args.full_validation:
        batch_sampler = MegatronPretrainingSampler(
            total_samples=len(dataset),
            consumed_samples=0,
            micro_batch_size=args.micro_batch_size,
            data_parallel_rank=mpu.get_data_parallel_rank(),
            data_parallel_size=mpu.get_data_parallel_world_size(),
        )
    elif args.dataloader_type == 'single':
        if args.hybrid_context_parallel:
            batch_sampler = HybridCPMegatronPretrainingSampler(
                total_samples=len(dataset),
                consumed_samples=consumed_samples,
                micro_batch_size=args.micro_batch_size,
                global_batch_size=args.global_batch_size,
                data_parallel_rank=mpu.get_data_parallel_rank(),
                data_parallel_size=mpu.get_data_parallel_world_size())
        else:
            # Megatron sampler
            batch_sampler = MegatronPretrainingSampler(
                total_samples=len(dataset),
                consumed_samples=consumed_samples,
                micro_batch_size=args.micro_batch_size,
                data_parallel_rank=mpu.get_data_parallel_rank(),
                data_parallel_size=mpu.get_data_parallel_world_size())
    elif args.dataloader_type == 'cyclic':
        batch_sampler = MegatronPretrainingRandomSampler(
            dataset,
            total_samples=len(dataset),
            consumed_samples=consumed_samples,
            micro_batch_size=args.micro_batch_size,
            data_parallel_rank=mpu.get_data_parallel_rank(),
            data_parallel_size=mpu.get_data_parallel_world_size(),
            data_sharding=args.data_sharding,
        )
    elif args.dataloader_type == "external":
        # External dataloaders are passed through. User is expected to provide a
        # torch-compatible dataloader and define samplers, if needed.
        return dataset
    else:
        raise Exception('{} dataloader type is not supported.'.format(args.dataloader_type))

    def worker_init_fn(_):
        import os

        # Defensively close GPU device FDs in worker processes so workers do not
        # keep references into NVIDIA memory space. This helps ensure GPU memory
        # can be reclaimed even if a dataloader worker is delayed or fails to exit.
        def close_nvidia_fds():
            for fd in os.listdir("/proc/self/fd"):
                try:
                    path = os.readlink(f"/proc/self/fd/{fd}")
                    if path.startswith("/dev/nvidia"):
                        os.close(int(fd))
                except OSError:
                    pass

        close_nvidia_fds()
        if args.exit_signal_handler:
            DistributedSignalHandler(args.exit_signal).__enter__()

    maybe_worker_init_fn = (
        worker_init_fn if args.num_workers > 0 else None
    )
    # Torch dataloader.
    if args.hybrid_context_parallel:
        extra_kwargs = {"collate_fn": lambda x: x,}
    else:
        extra_kwargs = {}
    return torch.utils.data.DataLoader(
        dataset,
        batch_sampler=batch_sampler,
        num_workers=args.num_workers,
        pin_memory=True,
        persistent_workers=True if args.num_workers > 0 else False,
        worker_init_fn=maybe_worker_init_fn,
        **extra_kwargs,
    )

class MegatronPretrainingSampler:
    """
    Sampler for Megatron pretraining dataloaders that divides data samples across
    data parallel workers. Each worker receives a contiguous chunk of data determined by
    its rank and the micro batch size. Supports dropping the last incomplete batch if
    specified, and keeps track of total and consumed samples. Designed to work with
    distributed training using Megatron's data parallelism.
    """

    def __init__(
        self,
        total_samples,
        consumed_samples,
        micro_batch_size,
        data_parallel_rank,
        data_parallel_size,
        drop_last=True,
    ):
        # Keep a copy of input params for later use.
        self.total_samples = total_samples
        self.consumed_samples = consumed_samples
        self.micro_batch_size = micro_batch_size
        self.data_parallel_rank = data_parallel_rank
        self.micro_batch_times_data_parallel_size = self.micro_batch_size * data_parallel_size
        self.drop_last = drop_last

        # Sanity checks.
        assert self.total_samples > 0, 'no sample to consume: {}'.format(self.total_samples)
        assert (
            self.consumed_samples < self.total_samples
        ), 'no samples left to consume: {}, {}'.format(self.consumed_samples, self.total_samples)
        assert self.micro_batch_size > 0
        assert data_parallel_size > 0
        assert (
            self.data_parallel_rank < data_parallel_size
        ), 'data_parallel_rank should be smaller than data size: {}, ' '{}'.format(
            self.data_parallel_rank, data_parallel_size
        )

    def __len__(self):
        return self.total_samples

    def get_start_end_idx(self):
        """
        Calculate the start and end indices for the current data parallel worker's
        chunk within a batch.

        Returns:
            tuple: (start_idx, end_idx) indicating the slice of the batch for this worker.
        """
        start_idx = self.data_parallel_rank * self.micro_batch_size
        end_idx = start_idx + self.micro_batch_size
        return start_idx, end_idx

    def __iter__(self):
        batch = []
        # Last batch will be dropped if drop_last is not set False
        for idx in range(self.consumed_samples, self.total_samples):
            batch.append(idx)
            if len(batch) == self.micro_batch_times_data_parallel_size:
                start_idx, end_idx = self.get_start_end_idx()
                yield batch[start_idx:end_idx]
                batch = []

        # Check the last partial batch and see drop_last is set
        if len(batch) > 0 and not self.drop_last:
            start_idx, end_idx = self.get_start_end_idx()
            yield batch[start_idx:end_idx]

class HybridCPMegatronPretrainingSampler(MegatronPretrainingSampler):
    """
    Data sampler for hybrid context parallel (Hybrid CP) format.
    This data sampler pulls in the entire global batch at once across all data parallel ranks.
    This helps provide the Hybrid CP Dataloader Wrapper to schedule and load balance sub-samples
    of the entire global batch.
    """

    def __init__(self, total_samples, consumed_samples, micro_batch_size, global_batch_size,
                 data_parallel_rank, data_parallel_size, drop_last=True):
        super().__init__(total_samples, consumed_samples, micro_batch_size, data_parallel_rank, data_parallel_size, drop_last)
        self.global_batch_size = global_batch_size
        self.data_parallel_size = data_parallel_size
        self.num_micro_batches = self.global_batch_size // self.micro_batch_times_data_parallel_size

    def __len__(self):
        return self.total_samples

    def get_start_end_idx_global_batch(self):
        start_idx = [self.data_parallel_rank * self.micro_batch_size + i * self.micro_batch_size * self.data_parallel_size for i in range(self.num_micro_batches)]
        end_idx = [start_idx[i] + self.micro_batch_size for i in range(self.num_micro_batches)]
        return start_idx, end_idx

    def __iter__(self):
        batch = []
        # Last batch will be dropped if drop_last is not set False
        for idx in range(self.consumed_samples, self.total_samples):
            batch.append(idx)
            if len(batch) == self.micro_batch_times_data_parallel_size * self.num_micro_batches:
                start_idx, end_idx = self.get_start_end_idx_global_batch()
                global_batch_idx = []
                for i in range(self.num_micro_batches):
                    global_batch_idx.extend(batch[start_idx[i]:end_idx[i]])
                yield global_batch_idx
                batch = []

        # Check the last partial batch and see drop_last is set
        if len(batch) > 0 and not self.drop_last:
            start_idx, end_idx = self.get_start_end_idx_global_batch()
            global_batch_idx = []
            for i in range(self.num_micro_batches):
                global_batch_idx.extend(batch[start_idx[i]:end_idx[i]])
            yield global_batch_idx

class RandomSeedDataset(Dataset):
    """
    A dataset wrapper that resets the random seed before each sample.

    This ensures deterministic behavior per sample by setting the RNG state
    for torch, numpy, and random before accessing each underlying data sample.
    The base seed is retrieved from training arguments, and can be varied per epoch
    using the set_epoch method to ensure different shuffling or augmentation each epoch.

    Args:
        dataset: The underlying dataset to wrap.

    Methods:
        set_epoch(epoch): Change the seed offset so each epoch produces different randomization.
        __getitem__(idx): Sets the seed based on the sample index and current epoch.
    """

    def __init__(self, dataset, seed):
        self.base_seed = seed
        self.curr_seed = seed
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def set_epoch(self, epoch):
        """
        Change the seed offset so each epoch produces different randomization.

        Args:
            epoch: The epoch number to use as the seed offset.
        """
        self.curr_seed = self.base_seed + epoch

    def __getitem__(self, idx):
        seed = idx + self.curr_seed
        torch.manual_seed(seed)
        random.seed(seed)
        np.random.seed(seed)
        return self.dataset[idx]


class MegatronPretrainingRandomSampler:
    """
    Sampler for Megatron pretraining dataloaders that performs random sampling
    across data parallel workers. Supports data sharding to divide the dataset
    into buckets and shuffle within each bucket. Designed to work with distributed
    training using Megatron's data parallelism.
    """

    def __init__(
        self,
        dataset,
        total_samples,
        consumed_samples,
        micro_batch_size,
        data_parallel_rank,
        data_parallel_size,
        data_sharding,
    ):
        # Keep a copy of input params for later use.
        self.dataset = dataset
        self.total_samples = total_samples
        self.consumed_samples = consumed_samples
        self.micro_batch_size = micro_batch_size
        self.data_parallel_rank = data_parallel_rank
        self.data_parallel_size = data_parallel_size
        self.data_sharding = data_sharding
        self.micro_batch_times_data_parallel_size = self.micro_batch_size * data_parallel_size
        self.last_batch_size = self.total_samples % self.micro_batch_times_data_parallel_size

        # Sanity checks.
        assert self.total_samples > 0, 'no sample to consume: {}'.format(self.total_samples)
        assert self.micro_batch_size > 0
        assert data_parallel_size > 0
        assert (
            self.data_parallel_rank < data_parallel_size
        ), 'data_parallel_rank should be smaller than data size: {}, ' '{}'.format(
            self.data_parallel_rank, data_parallel_size
        )

    def __len__(self):
        return self.total_samples

    def __iter__(self):
        active_total_samples = self.total_samples - self.last_batch_size
        self.epoch = self.consumed_samples // active_total_samples
        current_epoch_samples = self.consumed_samples % active_total_samples
        assert current_epoch_samples % self.micro_batch_times_data_parallel_size == 0

        if isinstance(self.dataset, RandomSeedDataset):
            self.dataset.set_epoch(self.epoch)

        # data sharding and random sampling
        if self.data_sharding:
            bucket_size = (
                self.total_samples // self.micro_batch_times_data_parallel_size
            ) * self.micro_batch_size
            bucket_offset = current_epoch_samples // self.data_parallel_size
            start_idx = self.data_parallel_rank * bucket_size

            g = torch.Generator()
            g.manual_seed(self.epoch)
            random_idx = torch.randperm(bucket_size, generator=g).tolist()
            idx_range = [start_idx + x for x in random_idx[bucket_offset:]]
        else:
            full_bucket_size = (self.total_samples // self.micro_batch_size) * self.micro_batch_size
            full_bucket_offset = current_epoch_samples
            g = torch.Generator()
            g.manual_seed(self.epoch)
            idx_range_total = torch.randperm(full_bucket_size, generator=g).tolist()
            idx_range_active = idx_range_total[full_bucket_offset:]
            idx_range = idx_range_active[self.data_parallel_rank :: self.data_parallel_size]

        batch = []
        # Last batch if not complete will be dropped.
        for idx in idx_range:
            batch.append(idx)
            if len(batch) == self.micro_batch_size:
                self.consumed_samples += self.micro_batch_times_data_parallel_size
                yield batch
                batch = []


================================================
FILE: megatron/training/datasets/fim_dataset.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.

from typing import Dict, Tuple, Optional
from dataclasses import dataclass, field

import numpy as np
import logging
from megatron.core.datasets.gpt_dataset import GPTDataset, GPTDatasetConfig
from megatron.core.datasets.indexed_dataset import IndexedDataset
from megatron.core.datasets.utils import Split

logger = logging.getLogger(__name__)


@dataclass
class GPTFIMDatasetConfig(GPTDatasetConfig):
    """Configuration object for Megatron Core GPT FIM datasets"""

    fim_rate: float = None
    """Probability to convert a training sample into a FIM format"""

    fim_spm_rate: float = None
    """Probability that the a FIM sample uses the SPM format over the PSM format"""

    fim_extra_tokens: Dict = None
    """FIM extra tokens. Should consist of prefix, middle, suffix, PAD, and EOD tokens."""

    fim_split_sample: Optional[str] = None
    """String around which to split the sample for FIM"""

    fim_fragment_rate: Optional[float] = None
    """Rate of FIM on each fragment when split_sample is not None"""

    fim_no_prefix: Optional[str] = None
    """Do not apply FIM to fragments that start with this prefix"""


class GPTFIMDataset(GPTDataset):
    """The base GPT dataset

    Args:
        indexed_dataset (IndexedDataset): The IndexedDataset around which to build the
        MegatronDataset

        indexed_indices (np.ndarray): The set of the documents indices to expose

        num_samples (int): The number of samples to draw from the indexed dataset

        index_split (Split): The indexed_indices Split

        config (GPTFIMDatasetConfig): The GPT-specific container for all config sourced parameters
    """

    def __init__(
        self,
        indexed_dataset: IndexedDataset,
        dataset_path: str,
        indexed_indices: np.ndarray,
        num_samples: int,
        index_split: Split,
        config: GPTFIMDatasetConfig,
    ) -> None:
        super().__init__(
            indexed_dataset, dataset_path, indexed_indices, num_samples, index_split, config
        )

        self.np_rng = np.random.RandomState(seed=self.config.random_seed)
        logger.info(f"Initialized FIM RNG with seed = {self.config.random_seed}")
        # get FIM params
        self.fim_rate = self.config.fim_rate
        self.fim_spm_rate = self.config.fim_spm_rate
        self.fragment_fim_rate = self.config.fim_fragment_rate
        fim_split_sample = self.config.fim_split_sample
        self.no_fim_prefix = self.config.fim_no_prefix
        if fim_split_sample:
            fim_split_sample_ids = self.config.tokenizer._tokenizer.tokens_to_ids(fim_split_sample)
            assert isinstance(fim_split_sample_ids, int) or len(fim_split_sample_ids) == 1
            self.fim_split_sample = (
                fim_split_sample_ids
                if isinstance(fim_split_sample_ids, int)
                else fim_split_sample_ids[0]
            )
        else:
            self.fim_split_sample = None

        # get extra tokens ids
        fim_tokens = self.config.fim_extra_tokens
        fim_tokens = [
            fim_tokens["prefix"],
            fim_tokens["middle"],
            fim_tokens["suffix"],
            fim_tokens["pad"],
            fim_tokens["eod"],
        ]
        fim_tokens_ids = self.config.tokenizer._tokenizer.tokens_to_ids(fim_tokens)
        (
            self.prefix_tok_id,
            self.middle_tok_id,
            self.suffix_tok_id,
            self.pad_tok_id,
            self.eod_tok_id,
        ) = fim_tokens_ids

    def _query_document_sample_shuffle_indices(self, idx: int) -> Tuple[np.ndarray, np.ndarray]:
        """Get the text (token ids) and document ids for a given index

        Args:
            idx (int): The index into the dataset

        Returns:
            Tuple[np.ndarray, np.ndarray]: The text ids and document ids
        """
        # Do the shuffle mapping
        idx = self.shuffle_index[idx]

        # Get the beginning and end documents and offsets
        doc_index_beg, doc_index_beg_offset = self.sample_index[idx]
        doc_index_end, doc_index_end_offset = self.sample_index[idx + 1]

        document_ids = []
        sample_parts = []

        # Sample spans a single document
        if doc_index_beg == doc_index_end:
            # Add the document id
            document_ids.append(self.document_index[doc_index_beg])

            # Add the entire sample
            sample_parts.append(
                self.dataset.get(
                    self.document_index[doc_index_beg],
                    offset=doc_index_beg_offset,
                    length=doc_index_end_offset - doc_index_beg_offset + 1,
                )
            )

        # Sample spans multiple documents
        else:
            for i in range(doc_index_beg, doc_index_end + 1):
                # Add the document id
                document_ids.append(self.document_index[i])

                # Add the sample part
                offset = 0 if i > doc_index_beg else doc_index_beg_offset
                length = None if i < doc_index_end else doc_index_end_offset + 1
                sample_parts.append(
                    self.dataset.get(self.document_index[i], offset=offset, length=length)
                )

        sample = np.concatenate(sample_parts)

        sample_len = sample.shape[0]
        segment_breaks = np.argwhere(sample == self.eod_tok_id)

        if segment_breaks.shape != (0, 1):  # then there is an EOD token in this example
            curr_start_position = 0
            new_samples = []
            for loc in np.nditer(segment_breaks):
                # Only permute non-empty segments.
                if loc - curr_start_position > 0:
                    # permute {prefix, suffix, middle} or {suffix, prefix, middle}
                    permuted = self._fim_split_and_permute_sequence(sample[curr_start_position:loc])
                    new_samples += [permuted, [self.eod_tok_id]]

                curr_start_position = loc + 1  # jump over the EOD token
            # Permute the segment after the last EOD
            permuted = self._fim_split_and_permute_sequence(sample[curr_start_position:])
            new_samples.append(permuted)

            sample = np.concatenate(new_samples)
        else:
            sample = self._fim_split_and_permute_sequence(sample)

        diff = sample.shape[0] - sample_len
        if diff > 0:  # too long
            sample = sample[:sample_len]
        elif diff < 0:  # too short
            sample = np.concatenate([sample, np.full((-1 * diff), self.pad_tok_id)])

        assert sample.shape[0] == sample_len

        return (np.array(sample, dtype=np.int64), np.array(document_ids, dtype=np.int64))

    def _fim_permute_sequence(self, sequence, rate):
        return self._permute(
            sequence,
            rate,
            self.fim_spm_rate,
            self.config.tokenizer,
            truncate_or_pad=False,
            suffix_tok_id=self.suffix_tok_id,
            prefix_tok_id=self.prefix_tok_id,
            middle_tok_id=self.middle_tok_id,
            pad_tok_id=self.pad_tok_id,
            no_fim_prefix=self.no_fim_prefix,
        )

    def _fim_split_and_permute_sequence(self, sequence):
        """
        If self.fim_split_sample is not None, split the sequence.
        Then apply FIM on the fragments, or the whole sequence if self.fim_split_sample is None.
        """
        if self.fim_split_sample is None:
            return self._fim_permute_sequence(sequence, self.fim_rate)
        # fim_split_sample is set: split the sample on this token and permute each fragment separately.
        # Typically, if each sample is a repository, then we split again on the file level.
        # Each fragment is a file, and we permute the files.
        fragment_breaks = np.argwhere(sequence == self.fim_split_sample)
        if fragment_breaks.shape == (0, 1):
            # no split token in this sample
            return self._fim_permute_sequence(sequence, self.fim_rate)
        if not self.np_rng.binomial(1, self.fim_rate):
            # don't do FIM preproc
            return sequence
        # Do FIM on each fragment
        curr_start_position = 0
        new_samples = []
        for loc in np.nditer(fragment_breaks):
            if loc - curr_start_position > 0:
                permuted = self._fim_permute_sequence(
                    sequence[curr_start_position:loc], self.fragment_fim_rate
                )
                new_samples += [permuted, [self.fim_split_sample]]
            curr_start_position = loc + 1  # Jump over the split token
        # Permute the segment after the last split token
        permuted = self._fim_permute_sequence(
            sequence[curr_start_position:], self.fragment_fim_rate
        )
        new_samples.append(permuted)

        return np.concatenate(new_samples)

    def _permute(
        self,
        sample,
        fim_rate,
        fim_spm_rate,
        tokenizer,
        truncate_or_pad=True,
        suffix_tok_id=None,
        prefix_tok_id=None,
        middle_tok_id=None,
        pad_tok_id=None,
        no_fim_prefix=None,
    ):
        """
        Take in a sample (np array w/ size (0,chunklength)) and perform a FIM transformation on it.
        Maintain the same sample length (if transform creates a few extra tokens, drop them).
        """
        if self.np_rng.binomial(1, fim_rate):  # sample bernoulli dist

            # Use remove_special_tokens=True so character-level boundaries and re-tokenization
            # are consistent; otherwise ids_to_text(..., None) keeps special tokens when
            # include_special_tokens=True, changing contents and breaking e.g. split_sample.
            contents = tokenizer._tokenizer.ids_to_text(sample, remove_special_tokens=True)

            # Do not apply FIM if the sample starts with no_fim_prefix
            if no_fim_prefix is not None and contents.startswith(no_fim_prefix):
                return sample

            try:
                # A boundary can be =0 (prefix will be empty)
                # a boundary can be =len(contents) (suffix will be empty)
                # The two boundaries can be equal (middle will be empty)
                boundaries = list(self.np_rng.randint(low=0, high=len(contents) + 1, size=2))
                boundaries.sort()
            except ValueError as e:
                print(len(contents), contents)
                print(e)
                raise e

            prefix = contents[: boundaries[0]]
            middle = contents[boundaries[0] : boundaries[1]]
            suffix = contents[boundaries[1] :]

            prefix = np.array([*tokenizer._tokenizer.text_to_ids(prefix)], dtype=np.int64)
            middle = np.array([*tokenizer._tokenizer.text_to_ids(middle)], dtype=np.int64)
            suffix = np.array([*tokenizer._tokenizer.text_to_ids(suffix)], dtype=np.int64)

            # here we truncate each given segment to fit the same length as it was before
            # A consequence is that we never reach the end of a file?
            # we should rather truncate at the context-level
            if truncate_or_pad:
                # need to make same length as the input. Take the 3 sentinel tokens into account
                new_length = suffix.shape[0] + prefix.shape[0] + middle.shape[0] + 3
                diff = new_length - sample.shape[0]
                if diff > 0:  # too long
                    if (
                        suffix.shape[0] <= diff
                    ):  # if there's no space to truncate the suffix: stop and report it. atm i should have stopped this from happening
                        return sample
                    suffix = suffix[: suffix.shape[0] - diff]
                elif diff < 0:  # too short
                    suffix = np.concatenate([suffix, np.full((-1 * diff), pad_tok_id)])

            if self.np_rng.binomial(1, fim_spm_rate):
                # SPM (variant 2 from FIM paper)
                new_sample = np.concatenate(
                    [[prefix_tok_id, suffix_tok_id], suffix, [middle_tok_id], prefix, middle]
                )
            else:
                # PSM
                new_sample = np.concatenate(
                    [[prefix_tok_id], prefix, [suffix_tok_id], suffix, [middle_tok_id], middle]
                )

        else:
            # don't do FIM preproc
            new_sample = sample

        return new_sample


================================================
FILE: megatron/training/datasets/sft_dataset.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.

import atexit, json
from collections import Counter
from typing import Any, Dict, Optional

import numpy as np
import torch

from megatron.core.datasets.gpt_dataset import GPTDatasetConfig
from megatron.core.datasets.megatron_dataset import LowLevelDataset, MegatronDataset
from megatron.core.datasets.utils import Split

IGNORE_INDEX = -100


class SFTLowLevelDataset:
    """The low-level dataset loading jsonl data for SFT

    Args:
        dataset_path (str): The path to jsonl data
            Each line of the jsonl must have key "messages" (List[Dict]),
            which is a sequence of system/user/assistant messages.
            Must be in the following format:
            [
                {"role": "system", "content": "something"},
                {"role": "user", "content": "something1"},
                {"role": "assistant", "content": "something2"},
            ]
            A jsonl line can contain multiple conversations packed together into on list. Each
            conversation starts with the system role, and conversations can have multiple turns
            of the user and assistant roles.
    """

    def __init__(self, dataset_path: str) -> None:
        try:
            from datasets import load_dataset
        except ImportError:
            raise ImportError(
                "SFTDataset currently requires datasets library to be installed"
            )
        self.dataset = load_dataset("json", data_files=dataset_path, split="all")

    def __len__(self) -> int:
        return len(self.dataset)

    def __getitem__(self, idx: int) -> list:
        return self.dataset[idx]["messages"]


class SFTDataset(MegatronDataset):
    """The dataset used during SFT"""

    def __init__(
        self,
        dataset: LowLevelDataset,
        dataset_path: Optional[str],
        indices: np.ndarray,
        num_samples: Optional[int],
        index_split: Split,
        config: GPTDatasetConfig,
    ) -> None:
        super().__init__(dataset, dataset_path, indices, num_samples, index_split, config)

    @staticmethod
    def numel_low_level_dataset(low_level_dataset: LowLevelDataset) -> int:
        return len(low_level_dataset)

    @staticmethod
    def build_low_level_dataset(dataset_path: str, config: GPTDatasetConfig) -> LowLevelDataset:
        return SFTLowLevelDataset(dataset_path)

    def __len__(self) -> int:
        return self.num_samples

    def _split_conversations(self, merged_conversations):
        split_conversations = []
        current = []
        for msg in merged_conversations:
            # Whenever we see a new system message, start a new conversation
            if msg["role"] == "system":
                if current:  # If previously accumulating a conversation, then store it
                    split_conversations.append(current)
                current = [msg]  # Then start the new conversation
            else:
                current.append(msg) # Continue accumulating the current conversation
        if current:  # Store any remaining conversation
            split_conversations.append(current)
        return split_conversations

    def __getitem__(self, idx: int) -> Dict[str, Any]:

        tokenizer = self.config.tokenizer
        pack_length = self.config.sequence_length

        merged_conversations = self.dataset[int(self.indices[idx % len(self.indices)])]
        split_conversations = self._split_conversations(merged_conversations)

        def extend_with_padding(tokens, targets, positions, pad_len):
            tokens.extend([pad] * pad_len)
            targets.extend([pad] * pad_len)
            positions.extend(range(positions[-1]+1, positions[-1]+1+pad_len))

        pack_tokens = []
        pack_targets = []
        pack_positions = []
        cu_seqlens = [0]
        eod = tokenizer.eod
        pad = tokenizer.pad
        # TODO(duncan): Track number of convs dropped and/or truncated and amount of end-padding
        for conversation in split_conversations:

            tokens, targets = tokenizer.tokenize_conversation(
                conversation, return_target=True, add_generation_prompt=False
            )

            tokens_list = tokens.tolist()
            targets_list = targets.tolist()


            pack_tokens.extend(tokens_list)
            pack_targets.extend(targets_list)

            assert not self.config.reset_position_ids
            pack_positions.extend(range(len(tokens_list)))

            if self.config.context_parallel_size > 1:
                pad_granularity = self.config.context_parallel_size * 2
                mod_token_count = len(pack_tokens) % pad_granularity
                if mod_token_count != 0:
                    pad_len = pad_granularity - mod_token_count
                    extend_with_padding(pack_tokens, pack_targets, pack_positions, pad_len)

            # TODO(duncan): Consider also padding to multiple of number of tokens here. This might
            # be needed for efficiency (and potentially set via command-line argument).

            cu_seqlens.append(len(pack_tokens))

            # Handle any necessary truncation
            if len(pack_tokens) >= pack_length + 1:  # +1 here to account for later alignment
                # Truncate on the right
                max_body = pack_length
                pack_tokens = pack_tokens[:max_body]
                pack_targets = pack_targets[:max_body]
                pack_tokens.append(pad)
                pack_targets.append(pad)
                pack_positions = pack_positions[:pack_length+1]
                # Note len({pack_tokens, pack_targets, pack_positions}) should be pack_length + 1
                cu_seqlens[-1] = len(pack_tokens) - 1
                break

        # Handle any necessary padding
        if len(pack_tokens) < pack_length + 1:  # +1 here to account for later alignment
            pad_len = pack_length + 1 - len(pack_tokens)
            extend_with_padding(pack_tokens, pack_targets, pack_positions, pad_len)
            # Note len({pack_tokens, pack_targets, pack_positions}) should be pack_length + 1
            cu_seqlens[-1] = len(pack_tokens) - 1

        assert len(pack_tokens) == pack_length + 1
        assert len(pack_targets) == pack_length + 1
        assert len(pack_positions) == pack_length + 1

        # Align and convert to tensors
        input_ids    = torch.tensor(pack_tokens[:-1],  dtype=torch.int64)
        labels       = torch.tensor(pack_targets[1:], dtype=torch.int64)
        position_ids = torch.tensor(pack_positions[:-1], dtype=torch.int64)

        # Loss mask.
        loss_mask = torch.ones(pack_length, dtype=torch.float32)
        loss_mask[labels == pad] = 0.0  # Mask paddings
        loss_mask[labels == IGNORE_INDEX] = 0.0  # mask prompts

        # TODO(duncan): Optionally create an attention mask
        assert not self.config.create_attention_mask and not self.config.reset_attention_mask
        # attention_mask = None

        assert len(cu_seqlens) >= 2
        cu_seqlens = torch.tensor(cu_seqlens, dtype=torch.int32)
        # Calculating max_seqlen here, rather than incrementally above, because of possible
        # effects of truncation and padding
        adjacent_diffs = cu_seqlens[1:] - cu_seqlens[:-1]
        max_seqlen = adjacent_diffs.max()  # max_seqlen is a 0-D tensor

        return {
            'tokens': input_ids,
            'labels': labels,
            # 'attention_mask': attention_mask,  # PyTorch collate cannot handle NoneType
            'loss_mask': loss_mask,
            'position_ids': position_ids,
            'cu_seqlens': cu_seqlens,
            'max_seqlen': max_seqlen,
        }


================================================
FILE: megatron/training/dgrad_logging.py
================================================
# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.

"""dgrad logging using backward hooks."""

from collections import defaultdict
import torch
import torch.nn as nn

from megatron.core.tensor_parallel.layers import ColumnParallelLinear, RowParallelLinear

from .checkpointing import save_grads
from .utils import unwrap_model


def _get_linear_types():
    """Build tuple of linear layer types to capture gradients from."""
    types = [nn.Linear, nn.Embedding, ColumnParallelLinear, RowParallelLinear]

    # Add Transformer Engine layers if available.
    try:
        from megatron.core.extensions.transformer_engine import (
            TELinear,
            TEColumnParallelLinear,
            TERowParallelLinear,
            TELayerNormColumnParallelLinear,
        )
        types.extend([TELinear, TEColumnParallelLinear, TERowParallelLinear,
                      TELayerNormColumnParallelLinear])
    except ImportError:
        pass

    try:
        from megatron.core.extensions.transformer_engine import (
            TEGroupedLinear,
            TEColumnParallelGroupedLinear,
            TERowParallelGroupedLinear,
        )
        if TEGroupedLinear is not None:
            types.extend([TEGroupedLinear, TEColumnParallelGroupedLinear,
                          TERowParallelGroupedLinear])
    except ImportError:
        pass

    return tuple(types)


LINEAR_TYPES = _get_linear_types()


class DataGradLogger:
    """Captures and saves gradients from all linear layers using backward hooks.
    
    NOTE: Right now, we only save the dgrads for the last microbatch in a batch on DP replica 0.
    The code below would need to be extended to save dgrads for all microbatches in a batch."""

    def __init__(self, save_dir: str):
        self._save_dir = save_dir
        self._dgrads_state_dict = defaultdict(dict)
        self._hooks = []

    def _make_hook(self, model_chunk_name: str, module_name: str):
        """Create a backward hook for a named module."""
        def hook(_, grad_input, grad_output):
            for idx, grad in enumerate(grad_output):
                if grad is not None:
                    grad_name = f"{module_name}/output{idx}"
                    self._dgrads_state_dict[model_chunk_name][grad_name] = grad.detach().cpu()
            for idx, grad in enumerate(grad_input):
                if grad is not None:
                    grad_name = f"{module_name}/input{idx}"
                    self._dgrads_state_dict[model_chunk_name][grad_name] = grad.detach().cpu()
        return hook

    def save(self, iteration: int):
        """Save captured gradients to disk and clear the buffer."""
        if not self._dgrads_state_dict:
            return
        save_grads(self._save_dir, self._dgrads_state_dict, iteration, "dgrads")
        self._dgrads_state_dict.clear()

    def register_hooks(self, model: torch.nn.Module):
        """Find and register hooks on all linear layers."""
        assert len(self._hooks) == 0
        for model_chunk_id, model_chunk in enumerate(model):
            unwrapped_model_chunk = unwrap_model(model_chunk)
            for module_name, module in unwrapped_model_chunk.named_modules():
                if isinstance(module, LINEAR_TYPES):
                    model_chunk_name = f"model_chunk{model_chunk_id}"
                    handle = module.register_full_backward_hook(
                        self._make_hook(model_chunk_name, module_name)
                    )
                    self._hooks.append(handle)

    def remove_hooks(self):
        """Remove all registered hooks."""
        for handle in self._hooks:
            handle.remove()
        self._hooks.clear()


_LOGGER = None


def enable_dgrad_logging(model: torch.nn.Module, save_dir: str):
    """Enable dgrad logging on a model."""
    global _LOGGER
    if _LOGGER is None:
        _LOGGER = DataGradLogger(save_dir)
    _LOGGER.register_hooks(model)


def disable_dgrad_logging():
    """Disable dgrad logging on a model."""
    global _LOGGER
    assert _LOGGER is not None
    _LOGGER.remove_hooks()


def save_dgrads(iteration: int):
    """Save dgrads to disk."""
    global _LOGGER
    assert _LOGGER is not None
    _LOGGER.save(iteration)


================================================
FILE: megatron/training/dist_signal_handler.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import signal

import torch

def get_world_size():
    if torch.distributed.is_available() and torch.distributed.is_initialized():
        world_size = torch.distributed.get_world_size()
    else:
        world_size = 1
    return world_size


def get_device(local_rank=None):
    backend = torch.distributed.get_backend()
    if backend == 'nccl':
        if local_rank is None:
            device = torch.device('cuda')
        else:
            device = torch.device(f'cuda:{local_rank}')
    elif backend == 'gloo':
        device = torch.device('cpu')
    else:
        raise RuntimeError
    return device


def all_gather_item(item, dtype, group=None, async_op=False, local_rank=None):
    if not torch.distributed.is_available() or \
       not torch.distributed.is_initialized():
        return [item]

    device = get_device(local_rank)

    if group is not None:
        group_size = group.size()
    else:
        group_size = get_world_size()

    tensor = torch.tensor([item], device=device, dtype=dtype)
    output_tensors = [
        torch.zeros(1, dtype=tensor.dtype, device=tensor.device)
        for _ in range(group_size)
    ]
    torch.distributed.all_gather(output_tensors, tensor, group, async_op)
    output = [elem.item() for elem in output_tensors]
    return output


class DistributedSignalHandler:
    def __init__(self, sig: signal.Signals = signal.SIGTERM):
        self.sig = sig

    def signals_received(self):
        all_received = all_gather_item(
            self._signal_received, dtype=torch.int32
        )
        return all_received

    def __enter__(self):
        self._signal_received = False
        self.released = False
        self.original_handler = signal.getsignal(self.sig)

        def handler(signum, frame):
            self._signal_received = True

        signal.signal(self.sig, handler)

        return self

    def __exit__(self, type, value, tb):
        self.release()

    def release(self):
        if self.released:
            return False

        signal.signal(self.sig, self.original_handler)
        self.released = True
        return True


================================================
FILE: megatron/training/ft_integration.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""
Fault Tolerance (FT) package integration for Megatron-LM, using the FT section-based API.

The FT package is included in "nvidia-resiliency-ext" 
(https://github.com/NVIDIA/nvidia-resiliency-ext).

NOTE: The workload must be run using the `ft_launcher` tool provided by `nvidia-resiliency-ext.`
NOTE: Calls to the public API of this module are no-ops if FT is not initialized 
(`ft_integration.setup` was not called).
NOTE: Default distributed process group should be initialized before calling `ft_integration.setup`

The "setup" FT section is opened during FT initialization and closed before the first training or 
eval iteration. Training and evaluation steps are wrapped in the "step" section, but only after a 
few warmup iterations. This is because the initial iterations may be slower, and we want the "step" 
timeout to be short. These warmup steps, which are not wrapped in the "step" section, will fall into
the out-of-section area. All checkpoint-saving-related operations (including asynchronous 
checkpointing finalization) are wrapped in the "checkpointing" section.

If timeout calculation is enabled (--calc-ft-timeouts), 
FT timeouts are updated after each checkpoint and at the end of the run.
Updated values are based on observed intervals.

`ft_launcher` command example:
```
ft_launcher \
    --rdzv_backend=c10d --rdzv_endpoint=${MASTER_ADDR}:${MASTER_PORT} \
    --nnodes=${NUM_NODES} --nproc-per-node=${NUM_GPUS_PER_NODE} \
    --ft-param-rank_section_timeouts=setup:600,step:180,checkpointing:420 \
    --ft-param-rank_out_of_section_timeout=300 \
    train_script_with_ft.py
```
"""

import argparse
import json
import os
import random
import signal
import sys
import threading
import time
from typing import Any, Optional

import torch

from . import arguments
from . import global_vars
from .utils import is_rank0, print_rank_0

_GLOBAL_RANK_MONITOR_CLIENT = None

_ft_state_path = None
_is_persistent_chkpt_loaded = False
_is_async_chkpt_enabled = False
_is_calculating_timeouts = False
_is_setup_section_open = False
_seen_checkpoints_cnt = 0
_seen_tr_iters_cnt = 0
_curr_eval_iter_idx = 0

_NUM_WARMUP_ITERS = 1  # Will be set by --ft-num-warmup-iters (default: 5)
_MIN_ITERS_FOR_STEP_TIMEOUT_UPDATE = 16


def get_rank_monitor_client() -> Optional[Any]:
    """Returns the underlying fault tolerance client instance

    Returns:
        RankMonitorClient: rank monitor client instance, or None if FT was not initialized
    """
    return _GLOBAL_RANK_MONITOR_CLIENT


def setup() -> None:
    """Initialize fault tolerance before initialize_megatron"""
    args = arguments.parse_args(ignore_unknown_args=True)
    if not args.enable_ft_package:
        return

    # Initialize fault tolerance
    from nvidia_resiliency_ext.fault_tolerance import RankMonitorClient

    if os.environ.get("RANK") == "0":
        print("FT: initializing...", flush=True)

    checkpoint_dir = args.save
    if not checkpoint_dir:
        raise ValueError("checkpointing save dir must be set to enable fault tolerance")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir, exist_ok=True)

    cli = RankMonitorClient()
    global _GLOBAL_RANK_MONITOR_CLIENT
    global_vars._ensure_var_is_not_initialized(_GLOBAL_RANK_MONITOR_CLIENT, 'rank monitor client')
    _GLOBAL_RANK_MONITOR_CLIENT = cli

    global _ft_state_path
    _ft_state_path = os.path.join(checkpoint_dir, "ft_state.json")

    global _is_async_chkpt_enabled
    _is_async_chkpt_enabled = args.async_save

    global _is_calculating_timeouts
    _is_calculating_timeouts = args.calc_ft_timeouts

    global _NUM_WARMUP_ITERS
    _NUM_WARMUP_ITERS = args.ft_num_warmup_iters

    cli.init_workload_monitoring(num_warmup_iters=_NUM_WARMUP_ITERS)
    _load_state_if_exists()
    if os.environ.get("RANK") == "0":
        print(f"FT: initialized. Timeouts={cli.section_timeouts}", flush=True)

    cli.start_section("setup")
    global _is_setup_section_open
    _is_setup_section_open = True


def on_training_step_start() -> None:
    """Should be called before each training step"""
    rmon_cli = get_rank_monitor_client()
    if rmon_cli is not None:
        global _is_setup_section_open
        if _is_setup_section_open:
            rmon_cli.end_section("setup")
            _is_setup_section_open = False
        if _seen_tr_iters_cnt >= _NUM_WARMUP_ITERS:
            rmon_cli.start_section("step")
        # reset eval step index. we started training, so evaluation is done
        global _curr_eval_iter_idx
        _curr_eval_iter_idx = 0


def on_training_step_end() -> None:
    """Should be called after each training step"""
    rmon_cli = get_rank_monitor_client()
    if rmon_cli is not None:
        global _seen_tr_iters_cnt
        if _seen_tr_iters_cnt >= _NUM_WARMUP_ITERS:
            rmon_cli.end_section("step")
        _seen_tr_iters_cnt += 1


def on_eval_step_start() -> None:
    """Should be called before each validation step"""
    rmon_cli = get_rank_monitor_client()
    if rmon_cli is not None:
        global _is_setup_section_open
        if _is_setup_section_open:
            # setup section can be open if there were no training iters before evaluation
            rmon_cli.end_section("setup")
            _is_setup_section_open = False
        if _curr_eval_iter_idx >= _NUM_WARMUP_ITERS:
            rmon_cli.start_section("step")


def on_eval_step_end() -> None:
    """Should be called after each validation step"""
    rmon_cli = get_rank_monitor_client()
    if rmon_cli is not None:
        global _curr_eval_iter_idx
        if _curr_eval_iter_idx >= _NUM_WARMUP_ITERS:
            rmon_cli.end_section("step")
        _curr_eval_iter_idx += 1


def on_checkpointing_start() -> None:
    """Should be called before each checkpoint-saving-related operation."""
    rmon_cli = get_rank_monitor_client()
    if rmon_cli is not None:
        rmon_cli.start_section("checkpointing")


def on_checkpointing_end(is_async_finalization: bool) -> None:
    """Should be called after each checkpoint-saving-related operation.

    Args:
        is_async_finalization (bool): true if called after an async checkpointing finalization
    """
    rmon_cli = get_rank_monitor_client()
    if rmon_cli is not None:
        rmon_cli.end_section("checkpointing")
    # async checkpointing finalization is called before each training iter, it can be no-op.
    # let's try to update the timeouts only on the `save_checkpoint`
    if not is_async_finalization:
        global _seen_checkpoints_cnt
        _seen_checkpoints_cnt += 1
        _maybe_update_timeouts()


def on_checkpoint_loaded(is_local_chkpt: bool) -> None:
    """Should be called after a checkpoint was loaded

    Args:
        is_local_chkpt (bool): true if it was a local checkpoint, false if global
    """
    # checkpoint can be loaded during "setup"
    # check if persistent checkpoint was loaded,
    # in-memory checkpoint reading can be very fast,
    # so we could underestimate the "setup" timeout
    global _is_persistent_chkpt_loaded
    _is_persistent_chkpt_loaded = not is_local_chkpt


def shutdown() -> None:
    """Shutdowns fault folerance, updates the FT timeouts if possible"""
    global _GLOBAL_RANK_MONITOR_CLIENT
    rmon_cli = get_rank_monitor_client()
    if rmon_cli is not None:
        print_rank_0("FT: closing...")
        _maybe_update_timeouts(is_closing_ft=True)
        rmon_cli.shutdown_workload_monitoring()
        print_rank_0("FT: closed.")
    _GLOBAL_RANK_MONITOR_CLIENT = None


def _load_state_if_exists():
    rmon_cli = get_rank_monitor_client()
    if os.path.exists(_ft_state_path):
        with open(_ft_state_path, "r") as f:
            ft_state = json.load(f)
        rmon_cli.load_state_dict(ft_state)
        print_rank_0(f"FT: loaded timeouts from {_ft_state_path}. {rmon_cli.section_timeouts}")


def _update_timeouts(selected_sections, calc_out_of_section):
    print_rank_0(
        f"FT: updating timeouts for: {selected_sections} "
        + f"update out-of-section: {calc_out_of_section} ..."
    )
    rmon_cli = get_rank_monitor_client()
    rmon_cli.calculate_and_set_section_timeouts(
        selected_sections=selected_sections, calc_out_of_section=calc_out_of_section
    )
    if is_rank0():
        ft_state = rmon_cli.state_dict()
        with open(_ft_state_path, "w") as f:
            json.dump(ft_state, f)
        print_rank_0(f"FT: updated timeouts saved to {_ft_state_path}. {rmon_cli.section_timeouts}")


def _maybe_update_timeouts(is_closing_ft=False):
    rmon_cli = get_rank_monitor_client()
    if rmon_cli is None:
        return
    if not _is_calculating_timeouts:
        return

    # Decide which section timeouts can be updated
    sections_to_update = []

    if _is_persistent_chkpt_loaded:
        sections_to_update.append("setup")
    else:
        print_rank_0(
            "FT: can't update the setup section timeout until persistent checkpoint is loaded"
        )

    if _seen_tr_iters_cnt >= _MIN_ITERS_FOR_STEP_TIMEOUT_UPDATE:
        sections_to_update.append("step")
    else:
        print_rank_0("FT: need to see more training iterations to update the step section timeout")

    if _seen_checkpoints_cnt > 0:
        if not _is_async_chkpt_enabled:
            sections_to_update.append("checkpointing")
        else:
            # There can be too much checkpointing section time variability
            # across runs with the async checkpointing, e.g. in some runs all checkpointing
            # work can be parallelized (=short checkpointing sections) while in others we can
            # hit a costly finalization.
            print_rank_0(
                "FT: can't update the checkpointing section timeout with async checkpointing"
            )
    else:
        print_rank_0("FT: checkpointing section is not updated until a checkpoint was saved")

    update_out_of_section = False
    if is_closing_ft:
        # with async checkpointing, "checkpointing" section is not updated,
        # but still we want to see some checkpointing to ensure that is was a complete run
        if {'setup', 'step'}.issubset(sections_to_update) and _seen_checkpoints_cnt > 0:
            update_out_of_section = True
        else:
            print_rank_0(
                "FT: the out-of-section timeout won't be updated until all FT sections were seen"
            )

    else:
        print_rank_0("FT: the out-of-section timeout won't be updated as the FT is not closing yet")

    if sections_to_update or update_out_of_section:
        _update_timeouts(
            selected_sections=sections_to_update, calc_out_of_section=update_out_of_section
        )


def maybe_setup_simulated_fault() -> None:
    """Sets a simulated fault, based on `FT_SIM_FAULT_DESC` env variable.
    Simulated fault description format:
    rank_hung|rank_killed;rank_to_fail|"";base_delay
    NOTE: This if for FT testing only
    """

    simulated_fault_desc = os.environ.get('FT_SIM_FAULT_DESC', None)
    if not simulated_fault_desc:
        return
    fault_type: Any  # silence mypy
    rank_to_fail: Any  # silence mypy
    base_delay: Any  # silence mypy
    fault_type, rank_to_fail, base_delay = simulated_fault_desc.split(';')
    fault_type = fault_type.strip()
    rank_to_fail = rank_to_fail.strip()
    rank_to_fail = int(rank_to_fail) if rank_to_fail else None
    base_delay = float(base_delay.strip())

    rng = random.Random()

    print_rank_0(
        f"FT: Initializing simulated fault: {fault_type},"
        + f"rank to fail: {rank_to_fail}, base delay: {base_delay}"
    )

    # rank that simulates a fault can be explicitly specified in the `rank_to_fail` field
    # if not specified, it just picks a random rank
    rank = torch.distributed.get_rank()
    rand_rank = rng.randint(0, torch.distributed.get_world_size() - 1)
    rank_to_fail = rank_to_fail if rank_to_fail is not None else rand_rank
    rank_to_fail = torch.tensor([rank_to_fail], device=torch.cuda.current_device())
    torch.distributed.broadcast(rank_to_fail, 0)
    rank_to_fail = int(rank_to_fail.item())

    if rank != rank_to_fail:
        # this rank is not going to simulate a fault, nothing more to do
        return

    if fault_type == 'random':
        fault_type = rng.choice(['rank_killed', 'rank_hung'])

    if fault_type == 'rank_killed':
        target_pid = os.getpid()
    elif fault_type == 'rank_hung':
        target_pid = os.getpid()
    else:
        raise Exception(f"Unknown fault type {fault_type} expected one of: rank_killed, rank_hung.")

    # add some randomness to the delay
    delay = base_delay + 0.2 * rng.random() * base_delay

    print_rank_0(f"FT: Selected fault={fault_type}; target rank={rank_to_fail}; delay={delay}")

    def __fault_thread():
        time.sleep(delay)
        for of in [sys.stdout, sys.stderr]:
            print(
                f"\n####\nFT: Simulating fault: {fault_type}; rank to fail: {rank_to_fail}\n####\n",
                file=of,
                flush=True,
            )
        if fault_type == 'rank_hung':
            os.kill(target_pid, signal.SIGSTOP)
        else:
            os.kill(target_pid, signal.SIGKILL)

    fault_sim_thread = threading.Thread(target=__fault_thread)
    fault_sim_thread.daemon = True
    fault_sim_thread.start()


================================================
FILE: megatron/training/global_vars.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Megatron global variables."""

import os
import sys
import torch

from megatron.core import Timers
from megatron.core.config import set_experimental_flag
from megatron.core.energy_monitor import EnergyMonitor
from megatron.core.jit import disable_jit_fuser
from megatron.core.num_microbatches_calculator import init_num_microbatches_calculator, unset_num_microbatches_calculator
from megatron.core.tokenizers.utils.build_tokenizer import build_tokenizer
from megatron.training.dist_signal_handler import DistributedSignalHandler

_GLOBAL_ARGS = None
_GLOBAL_TOKENIZER = None
_GLOBAL_TENSORBOARD_WRITER = None
_GLOBAL_WANDB_WRITER = None
_GLOBAL_ONE_LOGGER = None
_GLOBAL_ADLR_AUTORESUME = None
_GLOBAL_TIMERS = None
_GLOBAL_ENERGY_MONITOR = None
_GLOBAL_SIGNAL_HANDLER = None

def get_args():
    """Return arguments."""
    _ensure_var_is_initialized(_GLOBAL_ARGS, 'args')
    return _GLOBAL_ARGS


def get_tokenizer():
    """Return tokenizer."""
    _ensure_var_is_initialized(_GLOBAL_TOKENIZER, 'tokenizer')
    return _GLOBAL_TOKENIZER


def get_tensorboard_writer():
    """Return tensorboard writer. It can be None so no need
    to check if it is initialized."""
    return _GLOBAL_TENSORBOARD_WRITER


def get_wandb_writer():
    """Return tensorboard writer. It can be None so no need
    to check if it is initialized."""
    return _GLOBAL_WANDB_WRITER


def get_one_logger():
    """Return one logger. It can be None so no need
    to check if it is initialized."""
    return _GLOBAL_ONE_LOGGER

def get_adlr_autoresume():
    """ADLR autoresume object. It can be None so no need
    to check if it is initialized."""
    return _GLOBAL_ADLR_AUTORESUME


def get_timers():
    """Return timers."""
    _ensure_var_is_initialized(_GLOBAL_TIMERS, 'timers')
    return _GLOBAL_TIMERS

def get_energy_monitor():
    """Return energy monitor."""
    _ensure_var_is_initialized(_GLOBAL_ENERGY_MONITOR, 'energy monitor')
    return _GLOBAL_ENERGY_MONITOR

def get_signal_handler():
    _ensure_var_is_initialized(_GLOBAL_SIGNAL_HANDLER, 'signal handler')
    return _GLOBAL_SIGNAL_HANDLER


def _set_signal_handler(exit_signal):

    global _GLOBAL_SIGNAL_HANDLER
    _ensure_var_is_not_initialized(_GLOBAL_SIGNAL_HANDLER, 'signal handler')
    _GLOBAL_SIGNAL_HANDLER = DistributedSignalHandler(exit_signal).__enter__()


def set_global_variables(args, build_tokenizer=True):
    """Set args, tokenizer, tensorboard-writer, adlr-autoresume, and timers."""

    assert args is not None

    _ensure_var_is_not_initialized(_GLOBAL_ARGS, 'args')
    set_args(args)

    init_num_microbatches_calculator(
        args.rank,
        args.rampup_batch_size,
        args.global_batch_size,
        args.micro_batch_size,
        args.data_parallel_size,
        args.decrease_batch_size_if_needed,
    )
    if build_tokenizer:
        _ = _build_tokenizer(args)
    _set_tensorboard_writer(args)
    _set_wandb_writer(args)
    _set_one_logger(args)
    _set_adlr_autoresume(args)
    _set_timers(args)
    _set_energy_monitor(args)

    if args.enable_experimental:
        set_experimental_flag(True)

    if args.exit_signal_handler:
        _set_signal_handler(args.exit_signal)

    if args.disable_jit_fuser:
        disable_jit_fuser()


def unset_global_variables():
    """Unset global vars.

    Useful for multiple runs. See `tests/unit_tests/ckpt_converter/test_ckpt_converter.py` for an example.
    """

    global _GLOBAL_ARGS
    global _GLOBAL_NUM_MICROBATCHES_CALCULATOR
    global _GLOBAL_TOKENIZER
    global _GLOBAL_TENSORBOARD_WRITER
    global _GLOBAL_WANDB_WRITER
    global _GLOBAL_ONE_LOGGER
    global _GLOBAL_ADLR_AUTORESUME
    global _GLOBAL_TIMERS
    global _GLOBAL_ENERGY_MONITOR
    global _GLOBAL_SIGNAL_HANDLER

    _GLOBAL_ARGS = None
    _GLOBAL_NUM_MICROBATCHES_CALCULATOR = None
    _GLOBAL_TOKENIZER = None
    _GLOBAL_TENSORBOARD_WRITER = None
    _GLOBAL_WANDB_WRITER = None
    _GLOBAL_ONE_LOGGER = None
    _GLOBAL_ADLR_AUTORESUME = None
    _GLOBAL_TIMERS = None
    _GLOBAL_ENERGY_MONITOR = None
    _GLOBAL_SIGNAL_HANDLER = None

    unset_num_microbatches_calculator()


def set_args(args):
    global _GLOBAL_ARGS
    _GLOBAL_ARGS = args


def _build_tokenizer(args):
    """Initialize tokenizer."""
    global _GLOBAL_TOKENIZER
    _ensure_var_is_not_initialized(_GLOBAL_TOKENIZER, 'tokenizer')
    _GLOBAL_TOKENIZER = build_tokenizer(args)
    return _GLOBAL_TOKENIZER


def rebuild_tokenizer(args):
    global _GLOBAL_TOKENIZER
    _GLOBAL_TOKENIZER = None
    return _build_tokenizer(args)


def _set_tensorboard_writer(args):
    """Set tensorboard writer."""
    global _GLOBAL_TENSORBOARD_WRITER
    _ensure_var_is_not_initialized(_GLOBAL_TENSORBOARD_WRITER,
                                   'tensorboard writer')

    if hasattr(args, 'tensorboard_dir') and \
       args.tensorboard_dir and args.rank == (args.world_size - 1):
        try:
            from torch.utils.tensorboard import SummaryWriter
            print('> setting tensorboard ...')
            _GLOBAL_TENSORBOARD_WRITER = SummaryWriter(
                log_dir=args.tensorboard_dir,
                max_queue=args.tensorboard_queue_size)
        except ModuleNotFoundError:
            print('WARNING: TensorBoard writing requested but is not '
                  'available (are you using PyTorch 1.1.0 or later?), '
                  'no TensorBoard logs will be written.', flush=True)


def _set_wandb_writer(args):
    global _GLOBAL_WANDB_WRITER
    _ensure_var_is_not_initialized(_GLOBAL_WANDB_WRITER,
                                   'wandb writer')
    if getattr(args, 'wandb_project', '') and args.rank == (args.world_size - 1):
        if args.wandb_exp_name == '':
            raise ValueError("Please specify the wandb experiment name!")

        import wandb
        if args.wandb_save_dir:
            save_dir = args.wandb_save_dir
        else:
            # Defaults to the save dir.
            save_dir = os.path.join(args.save, 'wandb')
        wandb_config = vars(args)
        if 'kitchen_config_file' in wandb_config and wandb_config['kitchen_config_file'] is not None:
            # Log the contents of the config for discovery of what the quantization
            # settings were.
            with open(wandb_config['kitchen_config_file'], "r") as f:
                wandb_config['kitchen_config_file_contents'] = f.read()
        wandb_kwargs = {
            'dir': save_dir,
            'name': args.wandb_exp_name,
            'project': args.wandb_project,
            'config': wandb_config}
        if args.wandb_entity:
            wandb_kwargs['entity'] = args.wandb_entity
        os.makedirs(wandb_kwargs['dir'], exist_ok=True)
        wandb.init(**wandb_kwargs)
        _GLOBAL_WANDB_WRITER = wandb


def _set_one_logger(args):
    global _GLOBAL_ONE_LOGGER
    _ensure_var_is_not_initialized(_GLOBAL_ONE_LOGGER, 'one logger')

    if args.enable_one_logger and args.rank == (args.world_size - 1):
        if args.one_logger_async or getattr(args, 'wandb_project', ''):
            one_logger_async = True
        else:
            one_logger_async = False
        try:
            from one_logger import OneLogger
            config = {
               'project': args.one_logger_project,
               'name': args.one_logger_run_name,
               'async': one_logger_async,
            }
            one_logger = OneLogger(config=config)
            _GLOBAL_ONE_LOGGER = one_logger
        except Exception:
            print('WARNING: one_logger package is required to enable e2e metrics '
                  'tracking. please go to '
                  'https://confluence.nvidia.com/display/MLWFO/Package+Repositories'
                  ' for details to install it')

def _set_adlr_autoresume(args):
    """Initialize ADLR autoresume."""
    global _GLOBAL_ADLR_AUTORESUME
    _ensure_var_is_not_initialized(_GLOBAL_ADLR_AUTORESUME, 'adlr autoresume')

    if args.adlr_autoresume:
        from megatron.training.utils import print_rank_0
        print_rank_0('enabling autoresume ...')
        sys.path.append(os.environ.get('SUBMIT_SCRIPTS', '.'))
        try:
            from userlib.auto_resume import AutoResume
        except ImportError:
            print_rank_0('ADLR autoresume is not available, exiting ...')
            sys.exit()

        _GLOBAL_ADLR_AUTORESUME = AutoResume


def _set_timers(args):
    """Initialize timers."""
    global _GLOBAL_TIMERS
    _ensure_var_is_not_initialized(_GLOBAL_TIMERS, 'timers')
    _GLOBAL_TIMERS = Timers(args.timing_log_level, args.timing_log_option)

def _set_energy_monitor(args):
    """Initialize energy monitor."""
    global _GLOBAL_ENERGY_MONITOR
    _ensure_var_is_not_initialized(_GLOBAL_ENERGY_MONITOR, 'energy monitor')
    _GLOBAL_ENERGY_MONITOR = EnergyMonitor()


def _ensure_var_is_initialized(var, name):
    """Make sure the input variable is not None."""
    assert var is not None, '{} is not initialized.'.format(name)


def _ensure_var_is_not_initialized(var, name):
    """Make sure the input variable is not None."""
    assert var is None, '{} is already initialized.'.format(name)

def destroy_global_vars():
    global _GLOBAL_ARGS
    _GLOBAL_ARGS = None

    global _GLOBAL_TOKENIZER
    _GLOBAL_TOKENIZER = None

    global _GLOBAL_TENSORBOARD_WRITER
    _GLOBAL_TENSORBOARD_WRITER = None

    global _GLOBAL_WANDB_WRITER
    _GLOBAL_WANDB_WRITER = None

    global _GLOBAL_ONE_LOGGER
    _GLOBAL_ONE_LOGGER = None

    global _GLOBAL_ADLR_AUTORESUME
    _GLOBAL_ADLR_AUTORESUME = None

    global _GLOBAL_TIMERS
    _GLOBAL_TIMERS = None

    global _GLOBAL_ENERGY_MONITOR
    _GLOBAL_ENERGY_MONITOR = None

    global _GLOBAL_SIGNAL_HANDLER
    _GLOBAL_SIGNAL_HANDLER = None


================================================
FILE: megatron/training/initialize.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

"""Megatron initialization."""
import logging
import os
import random
import time
import warnings
from datetime import timedelta

import numpy as np
import torch

from megatron.core import mpu, tensor_parallel
from megatron.core.fusions.fused_bias_dropout import bias_dropout_add_fused_train
from megatron.core.fusions.fused_bias_gelu import bias_gelu
from megatron.core.fusions.fused_bias_swiglu import bias_swiglu
from megatron.core.parallel_state import create_group
from megatron.core.rerun_state_machine import (
    RerunDiagnostic,
    RerunErrorInjector,
    RerunMode,
    initialize_rerun_state_machine,
)
from megatron.core.transformer.custom_layers.batch_invariant_kernels import enable_batch_invariant_mode
from megatron.core.utils import get_te_version, is_te_min_version, is_torch_min_version
from megatron.legacy import fused_kernels
from megatron.training import get_adlr_autoresume, get_args, get_tensorboard_writer
from megatron.training.utils import print_rank_0, warn_rank_0
from megatron.training import inprocess_restart
from megatron.training.arguments import parse_args, validate_args
from megatron.training.async_utils import init_persistent_async_worker
from megatron.training.checkpointing import load_args_from_checkpoint
from megatron.training.global_vars import set_global_variables
from megatron.training.utils import is_rank0
from megatron.training.yaml_arguments import validate_yaml

logger = logging.getLogger(__name__)


def initialize_megatron(
    extra_args_provider=None,
    args_defaults={},
    ignore_unknown_args=False,
    allow_no_cuda=False,
    skip_mpu_initialization=False,
    get_embedding_ranks=None,
    get_position_embedding_ranks=None,
    parsed_args=None,
    store=None,
):
    """Set global variables, initialize distributed, and
    set autoresume and random seeds.
    `allow_no_cuda` should not be set unless using megatron for cpu only
    data processing. In general this arg should not be set unless you know
    what you are doing.
    Returns a function to finalize distributed env initialization
    (optionally, only when args.lazy_mpu_init == True)
    """
    if not allow_no_cuda:
        # Make sure cuda is available.
        assert torch.cuda.is_available(), "Megatron requires CUDA."

    # Parse arguments
    if parsed_args is None:
        args = parse_args(extra_args_provider, ignore_unknown_args)
    else:
        args = parsed_args

    # Prep for checkpoint conversion.
    if args.ckpt_convert_format is not None:
        assert args.ckpt_convert_save is not None
        assert args.load is not None
        args.exit_on_missing_checkpoint = True

    if args.use_checkpoint_args or args_defaults.get("use_checkpoint_args", False):
        assert args.load is not None or args.pretrained_checkpoint is not None, "--use-checkpoint-args requires --load or --pretrained-checkpoint argument"
        assert args.non_persistent_ckpt_type != "local", (
            "--use-checkpoint-args is not supported with --non_persistent_ckpt_type=local. "
            "Two-stage checkpoint loading is not implemented, and all arguments must be defined "
            "before initializing LocalCheckpointManager."
        )
        load_args_from_checkpoint(args, load_arg='pretrained_checkpoint')
        load_args_from_checkpoint(args)

    if args.yaml_cfg is not None:
        args = validate_yaml(args, args_defaults)
    else:
        validate_args(args, args_defaults)

    # set global args, build tokenizer, and set adlr-autoresume,
    # tensorboard-writer, and timers.
    set_global_variables(args)

    # set logging level
    setup_logging()

    if args.async_save and args.use_persistent_ckpt_worker:
        init_persistent_async_worker(args.rank, 'forkserver')

    # init rerun state
    def state_save_func():
        return {'rng_tracker_states': tensor_parallel.get_cuda_rng_tracker().get_states()}

    def state_restore_func(state_dict):
        if state_dict['rng_tracker_states']:
            tensor_parallel.get_cuda_rng_tracker().set_states(state_dict['rng_tracker_states'])

    args = get_args()
    initialize_rerun_state_machine(
        state_save_func=state_save_func,
        state_restore_func=state_restore_func,
        mode=RerunMode(args.rerun_mode),
        error_injector=RerunErrorInjector(
            error_injection_rate=args.error_injection_rate,
            error_injection_type=RerunDiagnostic(args.error_injection_type),
        ),
        result_rejected_tracker_filename=args.result_rejected_tracker_filename,
    )
    
    if args.batch_invariant_mode:
        print_rank_0("Enabling batch invariant mode globally")
        enable_batch_invariant_mode()

    # torch.distributed initialization
    def finish_mpu_init():
        args = get_args()
        # Pytorch distributed.
        _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks, store)

        # Random seeds for reproducibility.
        print_rank_0("> setting random seeds to {} ...".format(args.seed))
        _set_random_seed(
            args.seed,
            args.data_parallel_random_init,
            args.te_rng_tracker,
            args.inference_rng_tracker,
            use_cudagraphable_rng=args.cuda_graph_impl != "none",
        )

        # Setup MoE aux loss scale value.
        if args.num_experts is not None:
            from megatron.core.transformer.moe.router import MoEAuxLossAutoScaler

            MoEAuxLossAutoScaler.set_loss_scale(torch.ones(1, device=torch.cuda.current_device()))

    if skip_mpu_initialization:
        return None

    args = get_args()
    if args.lazy_mpu_init:
        # TODO is this still a necessary option?
        args.use_cpu_initialization = True
        # delayed initialization of DDP-related stuff
        # We only set basic DDP globals
        mpu.set_tensor_model_parallel_world_size(args.tensor_model_parallel_size)
        # and return function for external DDP manager
        # to call when it has DDP initialized
        mpu.set_tensor_model_parallel_rank(args.rank)
        return finish_mpu_init
    else:
        # Megatron's MPU is the master. Complete initialization right away.
        finish_mpu_init()

        # Autoresume.
        _init_autoresume()

        # Compile dependencies.
        _compile_dependencies()

        if args.tp_comm_overlap:
            # TODO: Should this be activated with just decoder-tp-comm-overlap too?
            _initialize_tp_communicators()

        # No continuation function
        return None


def _compile_dependencies():

    args = get_args()

    # =========================
    # Compile dataset C++ code.
    # =========================
    # TODO: move this to ninja
    if torch.distributed.get_rank() == 0:
        start_time = time.time()
        print("> compiling dataset index builder ...")
        from megatron.core.datasets.utils import compile_helpers

        compile_helpers()
        print(
            ">>> done with dataset index builder. Compilation time: {:.3f} "
            "seconds".format(time.time() - start_time),
            flush=True,
        )

    # ==================
    # Load fused kernels
    # ==================

    # Custom kernel constraints check.
    seq_len = args.seq_length
    attn_batch_size = (
        args.num_attention_heads / args.tensor_model_parallel_size
    ) * args.micro_batch_size
    # Constraints on sequence length and attn_batch_size to enable warp based
    # optimization and upper triangular optimization (for causal mask)
    custom_kernel_constraint = (
        seq_len > 16 and seq_len <= 16384 and seq_len % 4 == 0 and attn_batch_size % 4 == 0
    )
    # Print a warning.
    if not ((args.fp16 or args.bf16) and custom_kernel_constraint and args.masked_softmax_fusion):
        warn_rank_0(
            "Constraints for invoking optimized fused softmax kernel are not met. "
            "We default back to unfused kernel invocations."
        )

    # Always build on rank zero first.
    if torch.distributed.get_rank() == 0:
        start_time = time.time()
        print("> compiling and loading fused kernels ...", flush=True)
        fused_kernels.load(args)
        torch.distributed.barrier()
    else:
        torch.distributed.barrier()
        fused_kernels.load(args)
    # Simple barrier to make sure all ranks have passed the
    # compilation phase successfully before moving on to the
    # rest of the program. We think this might ensure that
    # the lock is released.
    torch.distributed.barrier()
    if torch.distributed.get_rank() == 0:
        print(
            ">>> done with compiling and loading fused kernels. "
            "Compilation time: {:.3f} seconds".format(time.time() - start_time),
            flush=True,
        )


def _initialize_tp_communicators():
    """initializing the communicators with user buffers for high-performance tensor-model-parallel
    communication overlap"""

    try:
        import transformer_engine
        import yaml
        from transformer_engine.pytorch import module as te_module

    except ImportError:
        raise RuntimeError(
            "Tensor Parallel Communication/GEMM Overlap optimization needs 'yaml' and "
            "'transformer_engine' packages"
        )

    args = get_args()

    if args.tp_comm_overlap_cfg is not None:
        with open(args.tp_comm_overlap_cfg, "r") as stream:
            ub_cfgs = yaml.safe_load(stream)
    else:
        ub_cfgs = {}

    if getattr(args, 'decoder_tp_comm_overlap', False):
        input_shape = [
            (args.decoder_seq_length * args.micro_batch_size) // args.context_parallel_size,
            args.hidden_size,
        ]
    else:
        input_shape = [
            (args.seq_length * args.micro_batch_size) // args.context_parallel_size,
            args.hidden_size,
        ]


    if is_te_min_version("2.7.0"):
        UserBufferQuantizationMode = te_module.base.UserBufferQuantizationMode
        quantization_modes = [UserBufferQuantizationMode.FP8 if args.fp8 else UserBufferQuantizationMode.NONE]
        if args.fp8 is not None and args.first_last_layers_bf16 and (args.num_layers_at_start_in_bf16 > 0 or args.num_layers_at_end_in_bf16 > 0):
            quantization_modes.append(UserBufferQuantizationMode.NONE)
        # The process group with the target bootstrap backend is created in Transformer Engine.
        te_module.base.initialize_ub(
            shape=input_shape,
            tp_size=args.tensor_model_parallel_size,
            quantization_modes=quantization_modes,
            ub_cfgs=ub_cfgs,
            bootstrap_backend=args.tp_comm_bootstrap_backend,
        )
    elif is_te_min_version("1.9.0"):
        # The process group with the target bootstrap backend is created in Transformer Engine.
        te_module.base.initialize_ub(
            shape=input_shape,
            tp_size=args.tensor_model_parallel_size,
            use_fp8=(args.fp8 is not None),
            ub_cfgs=ub_cfgs,
            bootstrap_backend=args.tp_comm_bootstrap_backend,
        )
    else:
        if args.tp_comm_bootstrap_backend != 'mpi':
            warnings.warn(
                f"Transformer Engine v{get_te_version()} supports only MPI bootstrap backend."
            )
        # Create a MPI process group to help with TP communication overlap bootstrap.
        create_group(backend='mpi', group_desc='TP_BOOTSTRAP_GROUP_MPI')

        te_module.base.initialize_ub(
            shape=input_shape,
            tp_size=args.tensor_model_parallel_size,
            use_fp8=(args.fp8 is not None),
            ub_cfgs=ub_cfgs,
        )


def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks, store):
    """Initialize torch.distributed and core model parallel."""
    args = get_args()

    device_count = torch.cuda.device_count()
    if torch.distributed.is_initialized():

        print_rank_0("torch distributed is already initialized, skipping initialization ...")
        args.rank = torch.distributed.get_rank()
        args.world_size = torch.distributed.get_world_size()

    else:

        print_rank_0("> initializing torch distributed ...")
        # Manually set the device ids.
        if device_count > 0:
            torch.cuda.set_device(args.local_rank)
            device_id = torch.device(f'cuda:{args.local_rank}')
        else:
            device_id = None

        # Set to non-default stream for cudagraph capturing.
        if args.cuda_graph_impl == "transformer_engine":
            torch.cuda.set_stream(torch.cuda.Stream())

        # Set flight recorder env vars if specified.
        # Priority: pre-existing environment variable > MLM argument.
        # All vars follow the same setdefault semantics: if already set in the
        # environment we warn and keep the user's value; otherwise we apply the
        # value derived from the MLM argument / flag.
        # The block is also triggered when either path env var is already set
        # so that the remaining defaults are applied consistently.
        _fr_path = (
            args.flight_recorder_dump_path
            or os.environ.get('TORCH_FR_DUMP_TEMP_FILE')
            or os.environ.get('TORCH_NCCL_DEBUG_INFO_TEMP_FILE')
        )
        if _fr_path is not None:
            _fr_env_defaults = {
                'TORCH_FR_DUMP_TEMP_FILE': _fr_path,
                'TORCH_NCCL_DEBUG_INFO_TEMP_FILE': _fr_path,
                'TORCH_NCCL_TRACE_BUFFER_SIZE': str(args.flight_recorder_trace_buffer_size),
                'TORCH_NCCL_DUMP_ON_TIMEOUT': str(int(args.flight_recorder_dump_on_timeout)),
                'TORCH_INCLUDE_STACK_TRACE': str(int(args.flight_recorder_include_stack_trace)),
                'TORCH_INCLUDE_ONLY_ACTIVE': str(int(args.flight_recorder_include_only_active)),
                'TORCH_NCCL_EXTRA_DUMP_ON_EXEC': str(int(args.flight_recorder_extra_dump_on_exec)),
            }
            for _var, _default in _fr_env_defaults.items():
                if _var in os.environ:
                    warn_rank_0(
                        f"Flight recorder: environment variable {_var} is already set to "
                        f"'{os.environ[_var]}'; ignoring config value '{_default}'."
                    )
                else:
                    os.environ[_var] = _default
            print_rank_0(
                "Flight recorder env vars:\n"
                + "\n".join(f"  {k}={os.environ[k]}" for k in _fr_env_defaults)
            )

        # Call the init process
        init_process_group_kwargs = {
            'backend': args.distributed_backend,
            'store': store,
            'world_size': args.world_size,
            'rank': args.rank,
            'timeout': timedelta(minutes=args.distributed_timeout_minutes),
        }
        if args.fake_process_group:
            assert is_torch_min_version("2.3.0"), "Fake process group is only supported with PyTorch 2.3.0 and above."
            from torch.testing._internal.distributed.fake_pg import FakeStore
            store = FakeStore()
            init_process_group_kwargs['backend'] = 'fake'
            init_process_group_kwargs['store'] = store

        torch.distributed.init_process_group(**init_process_group_kwargs)
        inprocess_restart.maybe_force_nccl_backend_init(device_id)

    # Set the tensor model-parallel, pipeline model-parallel, and
    # data-parallel communicators.
    if device_count > 0:
        if mpu.model_parallel_is_initialized():
            print("model parallel is already initialized")
        else:
            mpu.initialize_model_parallel(
                args.tensor_model_parallel_size,
                args.pipeline_model_parallel_size,
                args.virtual_pipeline_model_parallel_size,
                pipeline_model_parallel_comm_backend=args.pipeline_model_parallel_comm_backend,
                use_sharp=args.use_sharp,
                context_parallel_size=args.context_parallel_size,
                hierarchical_context_parallel_sizes=args.hierarchical_context_parallel_sizes,
                hybrid_context_parallel=args.hybrid_context_parallel,
                expert_model_parallel_size=args.expert_model_parallel_size,
                num_distributed_optimizer_instances=args.num_distributed_optimizer_instances,
                expert_tensor_parallel_size=args.expert_tensor_parallel_size,
                distributed_timeout_minutes=args.distributed_timeout_minutes,
                nccl_communicator_config_path=args.nccl_communicator_config_path,
                order='tp-cp-ep-dp-pp' if not args.use_tp_pp_dp_mapping else 'tp-cp-ep-pp-dp',
                get_embedding_ranks=get_embedding_ranks,
                get_position_embedding_ranks=get_position_embedding_ranks,
                create_gloo_process_groups=args.enable_gloo_process_groups,
                high_priority_stream_groups=args.high_priority_stream_groups,
                sharp_enabled_group=args.sharp_enabled_group,
                create_all_gather_group=args.create_all_gather_group,
            )
            print_rank_0(
                f"> initialized tensor model parallel with size "
                f"{mpu.get_tensor_model_parallel_world_size()}"
            )
            print_rank_0(
                f"> initialized pipeline model parallel with size "
                f"{mpu.get_pipeline_model_parallel_world_size()}"
            )


def _init_autoresume():
    """Set autoresume start time."""
    autoresume = get_adlr_autoresume()
    if autoresume:
        torch.distributed.barrier()
        autoresume.init()
        torch.distributed.barrier()


def _set_random_seed(
    seed_: int,
    data_parallel_random_init: bool = False,
    te_rng_tracker: bool = False,
    inference_rng_tracker: bool = False,
    use_cudagraphable_rng: bool = False,
):
    """Set random seed for reproducability."""
    if seed_ is not None and seed_ > 0:
        # Ensure that different pipeline MP stages get different seeds.
        seed = seed_ + (100 * mpu.get_pipeline_model_parallel_rank())
        # Ensure different data parallel ranks get different seeds
        if data_parallel_random_init:
            seed = seed + (10 * mpu.get_data_parallel_rank())
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        if torch.cuda.device_count() > 0:
            tensor_parallel.model_parallel_cuda_manual_seed(
                seed, te_rng_tracker, inference_rng_tracker, use_cudagraphable_rng
            )
    else:
        raise ValueError("Seed ({}) should be a positive integer.".format(seed_))


def write_args_to_tensorboard():
    """Write arguments to tensorboard."""
    args = get_args()
    writer = get_tensorboard_writer()
    if writer:
        for arg in vars(args):
            writer.add_text(arg, str(getattr(args, arg)), global_step=args.iteration)


def set_jit_fusion_options():
    """Set PyTorch JIT layer fusion options."""
    # flags required to enable jit fusion kernels
    if is_torch_min_version("2.2.0a0"):
        pass  # we're using torch.compile for jit fusion
    elif is_torch_min_version("1.10.0a0"):
        # nvfuser
        torch._C._jit_set_profiling_executor(True)
        torch._C._jit_set_profiling_mode(True)
        torch._C._jit_override_can_fuse_on_cpu(False)
        torch._C._jit_override_can_fuse_on_gpu(False)
        torch._C._jit_set_texpr_fuser_enabled(False)
        torch._C._jit_set_nvfuser_enabled(True)
        torch._C._debug_set_autodiff_subgraph_inlining(False)
    else:
        # legacy pytorch fuser
        torch._C._jit_set_profiling_mode(False)
        torch._C._jit_set_profiling_executor(False)
        torch._C._jit_override_can_fuse_on_cpu(True)
        torch._C._jit_override_can_fuse_on_gpu(True)

    _warmup_jit_function()


def _warmup_jit_function():
    """Compilie JIT functions before the main training steps"""
    args = get_args()
    if args.bf16:
        dtype = torch.bfloat16
    elif args.fp16:
        dtype = torch.float16
    else:
        dtype = torch.float32

    # Warmup fused bias+gelu
    bias = torch.rand(
        args.ffn_hidden_size // args.tensor_model_parallel_size, dtype=dtype, device="cuda"
    )
    input = torch.rand(
        (
            args.seq_length // args.context_parallel_size,
            args.micro_batch_size,
            args.ffn_hidden_size // args.tensor_model_parallel_size,
        ),
        dtype=dtype,
        device="cuda",
    )
    # Warmup JIT fusions with the input grad_enable state of both forward
    # prop and recomputation
    for bias_grad, input_grad in zip([True, True], [False, True]):
        bias.requires_grad, input.requires_grad = bias_grad, input_grad
        for _ in range(5):
            if args.swiglu:
                output = bias_swiglu(input, bias)
            else:
                output = bias_gelu(bias, input)
    del bias, input, output

    # Warmup fused bias+dropout+add
    if args.sequence_parallel:
        seq_length = args.seq_length // mpu.get_tensor_model_parallel_world_size()
    else:
        seq_length = args.seq_length
    input = torch.rand(
        (seq_length // args.context_parallel_size, args.micro_batch_size, args.hidden_size),
        dtype=dtype,
        device="cuda",
    )
    residual = torch.rand(
        (seq_length // args.context_parallel_size, args.micro_batch_size, args.hidden_size),
        dtype=dtype,
        device="cuda",
    )
    bias = torch.rand((args.hidden_size), dtype=dtype, device="cuda").expand_as(residual)
    dropout_rate = 0.1
    # Warmup JIT fusions with the input grad_enable state of both forward
    # prop and recomputation
    for input_grad, bias_grad, residual_grad in zip([False, True], [True, True], [True, True]):
        input.requires_grad = input_grad
        bias.requires_grad = bias_grad
        residual.requires_grad = residual_grad
        for _ in range(5):
            output = bias_dropout_add_fused_train([input, bias], residual, dropout_rate)
    del bias, input, residual, output
    torch.cuda.empty_cache()


def setup_logging() -> None:
    """Sets the default logging level based on cmdline args and env vars.

    Precedence:
    1. Command line argument `--logging-level`
    2. Env var `MEGATRON_LOGGING_LEVEL`
    3. Default logging level (INFO)

    Returns: None
    """
    args = get_args()
    logging_level = None
    env_logging_level = os.getenv('MEGATRON_LOGGING_LEVEL', None)
    if env_logging_level is not None:
        logging_level = int(env_logging_level)
    if args.logging_level is not None:
        logging_level = args.logging_level

    if logging_level is not None:
        if is_rank0():
            logger.info(f'Setting logging level to {logging_level}')
        logging.getLogger().setLevel(logging_level)


================================================
FILE: megatron/training/inprocess_restart.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.

import os
import socket
from datetime import timedelta

try:
    import nvidia_resiliency_ext.inprocess as inprocess
except ImportError:
    inprocess = None

import warnings

import torch

from megatron.core import rerun_state_machine
from megatron.training import get_args
from megatron.training.async_utils import (
    reset_persistent_async_worker,
)

from . import arguments


def destroy_state():
    from . import training
    training.destroy_global_state()
    rerun_state_machine.destroy_rerun_state_machine()

def inprocess_restart(train, args):
    if inprocess is None:
        warnings.warn('In-process restart is not available')
        return train

    if 'TORCH_CPP_LOG_LEVEL' not in os.environ or os.environ['TORCH_CPP_LOG_LEVEL'] not in (
        'error',
        'fatal',
    ):
        warnings.warn(
            'Set TORCH_CPP_LOG_LEVEL=error to suppress c10d waitForInput timeout warning messages'
        )

    # Layers represents a configuration for a layer of branches at a certain
    # depth in a topology tree constructed by inprocess.rank_assignment.Tree.
    # First layer contains all ranks and it's the root of the topology tree,
    # the second optional layer groups ranks by nodes.
    layers = [
        inprocess.rank_assignment.Layer(
            min_ranks=args.inprocess_active_world_size,
            max_ranks=args.inprocess_active_world_size,
            flag=inprocess.rank_assignment.LayerFlag.RESERVE,
        )
    ]
    if args.inprocess_granularity == 'node':
        device_count = torch.cuda.device_count()

        layers.append(
            inprocess.rank_assignment.Layer(
                min_ranks=device_count,
                max_ranks=device_count,
                key_or_fn=lambda _: socket.gethostname(),
                flag=inprocess.rank_assignment.LayerFlag.RESERVE,
            )
        )

    finalize = [
        inprocess.finalize.ThreadedFinalize(timeout=timedelta(seconds=10), fn=destroy_state)
    ]

    if args.inprocess_empty_cuda_cache:
        finalize.append(
            inprocess.finalize.ThreadedFinalize(
                timeout=timedelta(seconds=10), fn=torch.cuda.empty_cache
            )
        )

    initialize = inprocess.Compose(
        inprocess.initialize.RetryController(min_world_size=args.inprocess_active_world_size),
        inprocess.nested_restarter.NestedRestarterHandlingCompleted(),
    )

    class AbortCheckpoint(inprocess.abort.Abort):
        def __call__(
            self, state: inprocess.state.FrozenState
        ) -> inprocess.state.FrozenState:
            reset_persistent_async_worker()
            return state

    abort = inprocess.Compose(
        inprocess.abort.AbortTransformerEngine(),
        inprocess.abort.AbortTorchDistributed(),
        AbortCheckpoint(),
        inprocess.nested_restarter.NestedRestarterHandlingStarting(),
    )
    completion = inprocess.nested_restarter.NestedRestarterFinalized()
    terminate = inprocess.nested_restarter.NestedRestarterAborted()

    train = inprocess.Wrapper(
        store_kwargs={
            'timeout': timedelta(seconds=300),
            'port': int(os.environ['MASTER_PORT']) + 2,
        },
        initialize=initialize,
        abort=abort,
        completion=completion,
        terminate=terminate,
        health_check=inprocess.health_check.CudaHealthCheck(timeout=timedelta(seconds=10)),
        rank_assignment=inprocess.rank_assignment.Tree(layers=layers),
        finalize=inprocess.Compose(*finalize),
        heartbeat_interval=timedelta(seconds=args.inprocess_heartbeat_interval),
        heartbeat_timeout=timedelta(seconds=args.inprocess_heartbeat_timeout),
        barrier_timeout=timedelta(seconds=args.inprocess_barrier_timeout),
        completion_timeout=timedelta(seconds=args.inprocess_completion_timeout),
        monitor_process_interval=timedelta(seconds=args.inprocess_monitor_process_interval),
        monitor_thread_interval=timedelta(seconds=args.inprocess_monitor_thread_interval),
        last_call_wait=timedelta(seconds=args.inprocess_last_call_wait),
        soft_timeout=timedelta(seconds=args.inprocess_soft_timeout),
        hard_timeout=timedelta(seconds=args.inprocess_hard_timeout),
        termination_grace_time=timedelta(seconds=args.inprocess_termination_grace_time),
        enabled=True,
    )(train)

    return train


def maybe_wrap_for_inprocess_restart(pretrain):

    args = arguments.parse_args(ignore_unknown_args=True)

    if args.inprocess_restart:
        pretrain = inprocess_restart(pretrain, args)

        store = torch.distributed.TCPStore(
            host_name=os.environ['MASTER_ADDR'],
            port=int(os.environ['MASTER_PORT'])+1,
            world_size=int(os.getenv('WORLD_SIZE', '1')),
            is_master=(int(os.getenv('RANK', '0')) == 0),
            timeout=timedelta(seconds=300),
            wait_for_workers=True,
            use_libuv=True,
        )
    else:
        store = None

    return pretrain, store


def maybe_force_nccl_backend_init(device_id):

    args = get_args()

    # Inprocess uses destroy_process_group to terminate NCCL backend, which
    # does not terminate NCCL kernels if NCCL backend wasn't fully initialized
    # before additional distributed subgroups are created. This forces initialization
    # of the NCCL backend.
    if args.inprocess_restart:
        tensor = torch.ones(128, device=device_id)
        torch.distributed.all_reduce(tensor)
        torch.cuda.synchronize()


================================================
FILE: megatron/training/log_handler.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

import sys
from logging import LogRecord, StreamHandler

BLACKLISTED_MODULES = ["torch.distributed"]


class CustomHandler(StreamHandler):
    """
    Custom handler to filter out logging from code outside of
    Megatron Core, and dump to stdout.
    """

    def __init__(self):
        super().__init__(stream=sys.stdout)

    def filter(self, record: LogRecord) -> bool:
        # Prevent log entries that come from the blacklisted modules
        # through (e.g., PyTorch Distributed).
        for blacklisted_module in BLACKLISTED_MODULES:
            if record.name.startswith(blacklisted_module):
                return False
        return True


================================================
FILE: megatron/training/one_logger_utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
import time, os

from .global_vars import get_one_logger, get_args

_one_logger_utils_version = "1.2.0-mlm"


def get_timestamp_in_ms():
    """Helper function to get timestamp in ms

    Returns:
        [int]: [timestamp in ms]
    """
    return round(time.time() * 1000.0)


def on_train_start(iteration, consumed_train_samples, train_samples, seq_length,
                   train_iters, save, async_save, log_throughput,
                   num_floating_point_operations_so_far):
    """Function will be called at the start of train function to prepare and track E2E metrics.

    Args:
        iteration (int): current iteration number
        consumed_train_samples (int): consumed sample numbers so far
        train_samples (int): total train sample number
        seq_length (int): sequence length
        train_iters (int): target iteration
        save (str): output directory to save checkpoints to
        async_save (bool): apply async checkpointing save
        log_throughput (bool): log throughput or not
        num_floating_point_operations_so_far (int): flops so far
    """
    args = get_args()
    one_logger = get_one_logger()

    if one_logger:
        with one_logger.get_context_manager():
            # Get app train loop start time
            app_train_loop_start_time = get_timestamp_in_ms()
            one_logger.store_set('app_train_loop_start_time', app_train_loop_start_time)

            # Set up initial values in store
            one_logger.store_set('iteration_start', iteration)
            one_logger.store_set('train_samples_start', consumed_train_samples)

            # Init accumulative metric values in one-logger store
            one_logger.store_set('train_iterations_time_msecs_total', 0)
            one_logger.store_set('tracked_train_iterations', iteration)
            one_logger.store_set('validation_iterations_time_msecs_total', 0)
            one_logger.store_set('tracked_validation_iterations', 0)
            one_logger.store_set('save_checkpoint_count', 0)
            one_logger.store_set('save_checkpoint_sync_time_total', 0.0)

            # Derive train_samples from iters for iteration-based training
            train_samples_target = train_samples or train_iters * args.global_batch_size

            train_tokens_target = seq_length * train_samples_target
            e2e_metrics = {
                'train_samples_start': consumed_train_samples,
                'train_iterations_start': iteration,
                'train_samples_target': train_samples_target,
                'train_iterations_target': train_iters,
                'train_tokens_target': train_tokens_target,
                'app_train_loop_start_time': app_train_loop_start_time,
                'is_save_checkpoint_enabled': save is not None,
                'save_checkpoint_strategy': 'async' if async_save else 'sync',
            }
            if log_throughput:
                e2e_metrics.update({
                    'train_tflop_start': float(num_floating_point_operations_so_far) / (10**12),
                })
            one_logger.log_metrics(e2e_metrics)


def _produce_e2e_metrics(log_throughput=False, throughput=None):
    """ Generate APP metrics for E2E tracking
    NOTE: always call this function after barrier call

    Args:
        log_throughput (bool, optional): if log throughput or not. Defaults to False.
        throughput (int, optional): throughput value to log. Defaults to None.

    Returns:
        dict: all E2E metrics
    """
    one_logger = get_one_logger()
    
    if one_logger:
        with one_logger.get_context_manager():
            # Unpack and assign local vars
            base_metrics = one_logger.store_get('get_e2e_base_metrics')()
            (iteration, train_duration, eval_duration, eval_iterations,
             total_flops_since_current_train_start, num_floating_point_operations_so_far,
             consumed_train_samples, world_size, seq_length) = base_metrics.values()

            iteration_start = one_logger.store_get('iteration_start')
            train_samples_start = one_logger.store_get('train_samples_start')

            train_samples = consumed_train_samples - train_samples_start
            train_iterations = iteration - iteration_start
            train_iterations_time_msecs_avg = (train_duration * 1000.0) / train_iterations
            if eval_iterations:
                validation_iterations_time_msecs_avg = (eval_duration * 1000.0) / eval_iterations
            else:
                validation_iterations_time_msecs_avg = None

            if not one_logger.store_has_key('first_logged_train_iterations_finish_time'):
                one_logger.store_set(
                    'first_logged_train_iterations_finish_time',
                    get_timestamp_in_ms()
                )

            train_tokens = train_samples * seq_length

            e2e_metrics = {
                'first_logged_train_iterations_finish_time': \
                    one_logger.store_get('first_logged_train_iterations_finish_time'),
                'train_iterations_end': iteration,
                'train_samples_end': consumed_train_samples,
                'train_iterations': train_iterations,
                'train_samples': train_samples,
                'train_iterations_time_msecs_avg': train_iterations_time_msecs_avg,
                'validation_iterations_time_total': eval_duration,
                'validation_iterations_time_msecs_avg': validation_iterations_time_msecs_avg,
                'train_tokens': train_tokens,
                'train_iterations_time_total': train_duration,
                'last_logged_train_iterations_finish_time': get_timestamp_in_ms(),
            }

            if log_throughput:
                if train_duration:
                    train_throughput_per_gpu = total_flops_since_current_train_start / (train_duration * 10**12 * world_size)
                else:
                    train_throughput_per_gpu = 0.0

                train_throughput_per_gpu_max = one_logger.store_get('train_throughput_per_gpu_max')
                if throughput:
                    train_throughput_per_gpu_max = max(throughput, train_throughput_per_gpu_max)
                    one_logger.store_set('train_throughput_per_gpu_max', train_throughput_per_gpu_max)

                throughput_metrics = {
                    'train_tflop_end': float(num_floating_point_operations_so_far) / (10**12),
                    'train_tflop': float(total_flops_since_current_train_start) / (10**12),
                    'train_throughput_per_gpu': train_throughput_per_gpu,
                    'train_throughput_per_gpu_max': train_throughput_per_gpu_max,
                }
                e2e_metrics.update(throughput_metrics)

            # Tracking minimal train/validation iteration duration metrics
            # Minimal train iteration duration
            current_train_iterations_time_msecs_total = train_duration * 1000.0
            current_train_iteration = iteration
            prev_train_iterations_time_msecs_total = one_logger.store_get('train_iterations_time_msecs_total')
            tracked_train_iterations = one_logger.store_get('tracked_train_iterations')

            if current_train_iteration > tracked_train_iterations:
                train_iterations_time_msecs = (
                    (current_train_iterations_time_msecs_total - prev_train_iterations_time_msecs_total) /
                    (current_train_iteration - tracked_train_iterations)
                )

                if not one_logger.store_has_key('train_iterations_time_msecs_min'):
                    train_iterations_time_msecs_min = train_iterations_time_msecs
                else:
                    train_iterations_time_msecs_min = min(
                        one_logger.store_get('train_iterations_time_msecs_min'),
                        train_iterations_time_msecs
                    )
                one_logger.store_set('train_iterations_time_msecs_min', train_iterations_time_msecs_min)
                one_logger.store_set('train_iterations_time_msecs_total', current_train_iterations_time_msecs_total)
                one_logger.store_set('tracked_train_iterations', current_train_iteration)

                e2e_metrics.update({
                    'train_iterations_time_msecs_min': train_iterations_time_msecs_min
                })

            # Minimal validation iteration duration
            current_validation_iterations_time_msecs_total = eval_duration * 1000.0
            current_validation_iteration = eval_iterations
            prev_validation_iterations_time_msecs_total = \
                one_logger.store_get('validation_iterations_time_msecs_total')
            tracked_validation_iterations = one_logger.store_get('tracked_validation_iterations')

            if current_validation_iteration > tracked_validation_iterations:
                validation_iterations_time_msecs = (
                    (current_validation_iterations_time_msecs_total - prev_validation_iterations_time_msecs_total) /
                    (current_validation_iteration - tracked_validation_iterations)
                )

                # Cache minimal validation iteration duration
                if not one_logger.store_has_key('validation_iterations_time_msecs_min'):
                    validation_iterations_time_msecs_min = validation_iterations_time_msecs
                else:
                    validation_iterations_time_msecs_min = min(
                        one_logger.store_get('validation_iterations_time_msecs_min'),
                        validation_iterations_time_msecs
                    )
                one_logger.store_set('validation_iterations_time_msecs_min', validation_iterations_time_msecs_min)
                one_logger.store_set('validation_iterations_time_msecs_total', current_validation_iterations_time_msecs_total)
                one_logger.store_set('tracked_validation_iterations', current_validation_iteration)

                e2e_metrics.update({
                    'validation_iterations_time_msecs_min': validation_iterations_time_msecs_min
                })
            return e2e_metrics


def track_e2e_metrics(log_throughput=False, throughput=None):
    """Track E2E application metrics with one-logger

    NOTE: the function should be called after barrier call.

    Args:
        log_throughput (bool, optional): if log throughput or not. Defaults to False.
        throughput (int, optional): throughput value to log. Defaults to None.
    """
    one_logger = get_one_logger()

    if one_logger:
        with one_logger.get_context_manager():
            e2e_metrics = _produce_e2e_metrics(log_throughput, throughput)
            one_logger.log_metrics(e2e_metrics)


def on_save_checkpoint_start(async_save):
    """Function to be called before save-checkpoint start to generate productive metrics to log after ckpt succeeds.

    Args:
        async_save (bool): apply async checkpointing save

    Returns:
        dict: productive metrics to be stored to DB after ckpt succeeds
    """
    one_logger = get_one_logger()
    
    if one_logger:
        with one_logger.get_context_manager():
            # Unpack and assign local vars
            base_metrics = one_logger.store_get('get_e2e_base_metrics')()
            (iteration, train_duration, eval_duration, eval_iterations,
             total_flops_since_current_train_start, num_floating_point_operations_so_far,
             consumed_train_samples, world_size, seq_length) = base_metrics.values()

            save_checkpoint_count = one_logger.store_get('save_checkpoint_count') + 1
            one_logger.store_set('save_checkpoint_count', save_checkpoint_count)
            one_logger.log_metrics({
                'train_iterations_save_checkpoint_end': iteration,
                'save_checkpoint_count': save_checkpoint_count,
            })
            productive_metrics = {
                'train_tflop_productive_end': float(num_floating_point_operations_so_far) / (10**12),
                'train_iterations_productive_end': iteration,
                'train_samples_productive_end': consumed_train_samples,
                'train_iterations_time_total_productive': train_duration,
                'validation_iterations_time_total_productive': eval_duration,
            }
            if async_save:
                productive_metrics.update({
                    'save_checkpoint_async_count': save_checkpoint_count,
                })
            return productive_metrics

            
def on_pretrain_start():
    """ Function to be called at the start of pretrain function to track E2E meta data
    """
    args = get_args()
    one_logger = get_one_logger()

    if one_logger:
        with one_logger.get_context_manager():
            job_name = os.environ.get('SLURM_JOB_NAME', None)
            app_tag_run_name =  job_name if not args.app_tag_run_name else args.app_tag_run_name
            app_tag_run_version = args.app_tag_run_version
            one_logger.store_set('app_tag_run_name', app_tag_run_name)
            one_logger.store_set('app_tag_run_version', app_tag_run_version)
            one_logger.store_set('train_throughput_per_gpu_max', 0.0)

            one_logger.log_metrics({
                'train_iterations_warmup': 5,
                'data_parallel_size' : args.data_parallel_size,
                'context_parallel_size': args.context_parallel_size,
                'global_batch_size': args.global_batch_size,
                'micro_batch_size': args.micro_batch_size,
                'pipeline_model_parallel_size': args.pipeline_model_parallel_size,
                'tensor_model_parallel_size': args.tensor_model_parallel_size,
                'expert_model_parallel_size' : args.expert_model_parallel_size,
                'world_size': args.world_size,
                'model_seq_length': args.seq_length,
                'app_tag_run_name': app_tag_run_name,
                'app_tag_run_version': app_tag_run_version,
                'is_log_throughput_enabled': args.log_throughput,
                'app_run_type': 'training',
                'summary_data_schema_version': '1.0.0',
                'app_metrics_feature_tags': 'full',
                'one_logger_utils_version': _one_logger_utils_version,
            })

def track_config_flags(train_iters, skip_train, do_train, do_valid, do_test, dataloader_type):
    """Track flags about train/validation/test enablement

    Args:
        train_iters (int): target train iteration number
        skip_train (bool): flag to skip train iterations
        do_train (bool): flags to do train
        do_valid (bool): flags to do validation
        do_test (bool): flags to do test
        dataloader_type (str): dataloader type
    """
    one_logger = get_one_logger()
    if one_logger:
        with one_logger.get_context_manager():
            # Track if training is enabled. Can only be done once args.do_train is assigned after dataloader is built.
            train_enabled = train_iters and (not skip_train) and do_train and train_iters > 0
            one_logger.log_metrics({
                'is_train_iterations_enabled': train_enabled,
                'is_validation_iterations_enabled': bool(do_valid),
                'is_test_iterations_enabled': bool(do_test),
            })

def on_save_checkpoint_success(productive_metrics, async_save):
    """Function to be called after checkpointing succeeds and checkpoint is persisted for storing productive metrics

    Args:
        productive_metrics (dict): productive related E2E metrics generated at the start of save checkpoint
        async_save (bool): apply async checkpointing save
    """
    one_logger = get_one_logger()

    if one_logger:
        with one_logger.get_context_manager():
            # Accumulate train_iterations_time_total_productive for current iteration
            prod_iteration = productive_metrics['train_iterations_productive_end']

            # Log start timestamp of first iteration that was successfully checkpointed
            if not one_logger.store_has_key('first_checkpoint_success'):
                app_train_loop_start_time = one_logger.store_get('app_train_loop_start_time')
                one_logger.store_set('first_checkpoint_success', True)
                one_logger.log_metrics({
                    'first_saved_train_iterations_start_time': app_train_loop_start_time
                })

            # Handle possible out-of-order async checkpoint callbacks
            need_update = True
            if one_logger.store_has_key('iters_prod_max'):
                need_update = prod_iteration > one_logger.store_get('iters_prod_max')

            if need_update:
                # Update cache
                one_logger.store_set('iters_prod_max', prod_iteration)

                if async_save:
                    save_checkpoint_sync_time_total_productive = \
                        one_logger.store_pop(f'save_checkpoint_sync_time_total_productive:{prod_iteration}')
                    last_successful_save_checkpoint_sync_finish_time = \
                        one_logger.store_pop(f'save_checkpoint_sync_finish_time:{prod_iteration}')
                    # Update productive metrics and log to DB
                    productive_metrics.update({
                        'save_checkpoint_sync_time_total_productive': save_checkpoint_sync_time_total_productive,
                        'last_successful_save_checkpoint_sync_finish_time': last_successful_save_checkpoint_sync_finish_time
                    })
                one_logger.log_metrics(productive_metrics)


def on_save_checkpoint_end(save_checkpoint_duration, current_iteration, async_save):
    """Function to be called after checkpointing ends
    
    Args:
        save_checkpoint_duration (float): duration of current save checkpoint process
        current_iteration (int): current train iteration step number
        async_save (bool): apply async checkpointing save
    """
    one_logger = get_one_logger()
    if one_logger:
        with one_logger.get_context_manager():
            save_checkpoint_sync_finish_time = get_timestamp_in_ms()

            # Track finish timestamp of the sync part of first successful save checkpoint
            if (one_logger.store_has_key('first_checkpoint_success') 
                    and not one_logger.store_has_key('first_successful_checkpoint_end')):
                one_logger.store_set('first_successful_checkpoint_end', True)
                one_logger.log_metrics({
                    'first_successful_save_checkpoint_sync_finish_time': save_checkpoint_sync_finish_time
                })

            save_checkpoint_sync_count = one_logger.store_get('save_checkpoint_count')

            # accumulate total sync checkpointing duration
            save_checkpoint_sync_time_total = \
                one_logger.store_get('save_checkpoint_sync_time_total') + save_checkpoint_duration
            one_logger.store_set('save_checkpoint_sync_time_total', save_checkpoint_sync_time_total)

            e2e_metrics = {}
            if async_save:
                # Cache total sync checkpointing duration
                one_logger.store_set(
                    f'save_checkpoint_sync_time_total_productive:{current_iteration}',
                    save_checkpoint_sync_time_total
                )
                # Cache finish time for current iteration
                one_logger.store_set(f'save_checkpoint_sync_finish_time:{current_iteration}',
                                     save_checkpoint_sync_finish_time)
            else:
                e2e_metrics.update({
                    # Track productive total time directly for sync ckpt
                    'save_checkpoint_sync_time_total_productive': save_checkpoint_sync_time_total,
                    'last_successful_save_checkpoint_sync_finish_time': save_checkpoint_sync_finish_time,
                })

            # Tracking min & max value sync checkpointing duration
            # For the first comparison
            if not one_logger.store_has_key('save_checkpoint_sync_time_max'):
                one_logger.store_set('save_checkpoint_sync_time_max', save_checkpoint_duration)
            if not one_logger.store_has_key('save_checkpoint_sync_time_min'):
                one_logger.store_set('save_checkpoint_sync_time_min', save_checkpoint_duration)

            save_checkpoint_sync_time_max = max(
                one_logger.store_get('save_checkpoint_sync_time_max'),
                save_checkpoint_duration
            )
            save_checkpoint_sync_time_min = min(
                one_logger.store_get('save_checkpoint_sync_time_min'),
                save_checkpoint_duration
            )
            one_logger.store_set('save_checkpoint_sync_time_max', save_checkpoint_sync_time_max)
            one_logger.store_set('save_checkpoint_sync_time_min', save_checkpoint_sync_time_min)
            e2e_metrics.update({
                'save_checkpoint_sync_count': save_checkpoint_sync_count,
                'save_checkpoint_sync_time_max': save_checkpoint_sync_time_max,
                'save_checkpoint_sync_time_min': save_checkpoint_sync_time_min,
                'save_checkpoint_sync_time_total': save_checkpoint_sync_time_total,
            })
            one_logger.log_metrics(e2e_metrics)


def track_app_tag(batch_size, world_size, seq_length):
    """Track app_tag and app_tag ID

    Args:
        batch_size (int): current batch size
        world_size (int): the number of processes of current job
        seq_length (int): current sequence length
    """
    # Track app tag & app tag ID
    one_logger = get_one_logger()
    if one_logger:
        with one_logger.get_context_manager():
            app_tag_run_name = one_logger.store_get('app_tag_run_name')
            app_tag_run_version = one_logger.store_get('app_tag_run_version')
            current_app_tag = (f'{app_tag_run_name}_{app_tag_run_version}_{batch_size}'
                            f'_{world_size}_{seq_length}')
            one_logger.log_app_tag(current_app_tag)


def finish():
    """Flush E2E metrics to remote server
    """
    one_logger = get_one_logger()
    if one_logger:
        with one_logger.get_context_manager():
            one_logger.finish()


================================================
FILE: megatron/training/theoretical_memory_usage.py
================================================
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.

"""Computes theoretical memory footprint for model training."""


import math
from .utils import is_hybrid_model, print_rank_0

NUM_BYTES_IN_MEGABYTE = 1024 * 1024


def compute_weight_and_optimizer_memory(args, verbose=False):
    # Attention projection size.
    query_projection_size = args.kv_channels * args.num_attention_heads
    query_projection_to_hidden_size_ratio = query_projection_size / args.hidden_size
    # Group Query Attention.
    if not args.group_query_attention:
        args.num_query_groups = args.num_attention_heads
    # MoE.
    num_experts = 1 if args.num_experts is None else args.num_experts
    gated_linear_multiplier = 3 / 2 if args.swiglu else 1
    
    shared_expert_ffn_hidden_size = (
        0
        if args.moe_shared_expert_intermediate_size is None
        else args.moe_shared_expert_intermediate_size
    )

    if args.num_experts is not None:
        if isinstance(args.moe_layer_freq, int):
            moe_layer_pattern = [
                1 if (i % args.moe_layer_freq == 0) else 0 for i in range(args.num_layers)
            ]
        elif isinstance(args.moe_layer_freq, list):
            moe_layer_pattern = args.moe_layer_freq
            assert len(moe_layer_pattern) == args.num_layers, (
                f"Invalid length of moe_layer_pattern: {len(moe_layer_pattern)}, "
                f"expected {args.num_layers}, "
                f"current moe layer pattern: {args.moe_layer_freq}"
            )

        num_dense_layers = args.num_layers - sum(moe_layer_pattern)
        num_moe_layers = sum(moe_layer_pattern)
        moe_ffn_hidden_size = args.moe_ffn_hidden_size
    else:
        moe_layer_pattern = [0] * args.num_layers
        num_dense_layers = args.num_layers
        num_moe_layers = 0
        moe_ffn_hidden_size = 0
    assert num_dense_layers + num_moe_layers == args.num_layers
    if args.mtp_num_layers is not None:
        mtp_layer_is_moe = moe_layer_pattern[-1]
        mtp_num_moe_layers = mtp_layer_is_moe * args.mtp_num_layers
        mtp_num_dense_layers = (1 - mtp_layer_is_moe) * args.mtp_num_layers
    else:
        mtp_num_moe_layers = 0
        mtp_num_dense_layers = 0

    # RMSNorm does not have bias, but LayerNorm has.
    norm_size = 1 if args.normalization == "RMSNorm" else 2

    if args.multi_latent_attention:
        assert not args.group_query_attention
        if args.q_lora_rank is None:
            q_term = args.hidden_size * args.num_attention_heads * (args.qk_head_dim + args.qk_pos_emb_head_dim)
        else:
            ## q lora + rope + q norm
            q_term = args.q_lora_rank * (args.hidden_size + args.num_attention_heads * (args.qk_head_dim + args.qk_pos_emb_head_dim) + norm_size) 
        
        self_attn_term = (
            q_term

            ## kv lora + rope + kv norm
            + args.kv_lora_rank
            * (args.hidden_size + args.num_attention_heads * (args.qk_head_dim + args.v_head_dim) + norm_size)
            + args.hidden_size * args.qk_pos_emb_head_dim

            ## o proj
            + (args.num_attention_heads * args.v_head_dim) * args.hidden_size
        )
    else:
        self_attn_term = (
            2
            * args.hidden_size
            * args.hidden_size
            * (
                # Attention.
                (
                    (1 + (args.num_query_groups / args.num_attention_heads))
                    * query_projection_to_hidden_size_ratio
                )
            )
        )

    num_parameters_in_transformer_layer_dense = (
        2
        * args.hidden_size
        * (
            # Dense MoE MLP.
            (args.ffn_hidden_size * gated_linear_multiplier)
            # Transformer layernorms.
            + norm_size
        )
        + self_attn_term
    )
    num_parameters_in_transformer_layer_moe = (
        2
        * args.hidden_size
        * (
            # MoE MLP.
            + (moe_ffn_hidden_size * num_experts * gated_linear_multiplier)
            # Shared MoE MLP.
            + (shared_expert_ffn_hidden_size * gated_linear_multiplier)
            # Transformer layernorms.
            + norm_size
        )
        + self_attn_term
    )
    embedding_size = args.hidden_size * args.padded_vocab_size
    final_layernorm = norm_size * args.hidden_size
    if args.untie_embeddings_and_output_weights:
        num_parameters_in_embedding_layers = 2 * embedding_size
    else:
        num_parameters_in_embedding_layers = embedding_size
    num_parameters_in_transformer_block = (
        num_parameters_in_transformer_layer_dense * num_dense_layers
        + num_parameters_in_transformer_layer_moe * num_moe_layers
        + final_layernorm
    )
    num_parameters_in_mtp_block = (
        num_parameters_in_transformer_layer_dense * mtp_num_dense_layers
        + num_parameters_in_transformer_layer_moe * mtp_num_moe_layers
    )
    num_total_parameters = (
        num_parameters_in_transformer_block
        + num_parameters_in_mtp_block
        + num_parameters_in_embedding_layers
    )
    if verbose:
        print(
            f"Number of parameters in transformer block in billions: "
            f"{num_parameters_in_transformer_block / 10**9: .2f}"
        )
        if args.mtp_num_layers is not None:
            print(
                f"Number of parameters in mtp block in billions: "
                f"{num_parameters_in_mtp_block / 10**9: .2f}"
            )
        print(
            f"Number of parameters in embedding layers in billions: "
            f"{num_parameters_in_embedding_layers / 10**9:.2f}"
        )
        print(f"Total number of parameters in billions: {num_total_parameters / 10**9:.2f}")

    # Most loaded model shard has (1/pp_size transformer layers + 1 mtp block + 1 embedding layer) / tp_size.
    num_parameters_on_most_loaded_model_shard = (
        (num_parameters_in_transformer_block / args.pipeline_model_parallel_size)
        + num_parameters_in_mtp_block
        + embedding_size
    ) / args.tensor_model_parallel_size
    if args.untie_embeddings_and_output_weights and args.pipeline_model_parallel_size == 1:
        num_parameters_on_most_loaded_model_shard += (
            embedding_size / args.tensor_model_parallel_size
        )
    if verbose:
        print(
            f"Number of parameters in most loaded shard in billions: "
            f"{num_parameters_on_most_loaded_model_shard / 10**9:.4f}"
        )

    if args.pipeline_model_parallel_size > 1:
        # Other shards just have (1/pp_size transformer layers) / tp_size.
        num_parameters_on_other_model_shards = num_parameters_in_transformer_block / (
            args.pipeline_model_parallel_size * args.tensor_model_parallel_size
        )
        if verbose:
            print(
                f"Number of parameters in other shards in billions: "
                f"{num_parameters_on_other_model_shards / 10**9:.4f}"
            )

    num_bytes_per_parameter = (
        18 if not args.use_distributed_optimizer else 6 + (12 / args.data_parallel_size)
    )
    weight_and_optimizer_memory = (
        num_parameters_on_most_loaded_model_shard * num_bytes_per_parameter
    )

    return weight_and_optimizer_memory


def compute_activation_memory(args, num_microbatches, verbose=False):
    # Using formula in Table 2 of https://arxiv.org/pdf/2205.05198.pdf.
    # We are trying to compute the maximum activation footprint, so all calculations in this
    # function are for the first pipeline stage.

    # TODO: This function needs to take into account query_projection_size potentially being
    # different from hidden_size.

    # Memory footprint from transformer layer (self-attention and MLP).
    activation_memory = (args.seq_length * args.micro_batch_size * args.hidden_size) * (
        18 + (4 * (args.ffn_hidden_size / args.hidden_size))
    )
    if verbose:
        print(
            f"Activation memory footprint per transformer layer: "
            f"{activation_memory / NUM_BYTES_IN_MEGABYTE / args.tensor_model_parallel_size:.1f} MB"
        )
    activation_memory *= args.num_layers

    # Now add activation memory required for input embeddings, last LayerNorm and output layer.

    # Input to embedding (pp_size microbatches in flight).
    activation_memory += (
        8 * args.seq_length * args.micro_batch_size * args.pipeline_model_parallel_size
    )
    # Dropout in embedding layer (pp_size microbatches in flight).
    activation_memory += (
        args.seq_length
        * args.micro_batch_size
        * args.hidden_size
        * args.pipeline_model_parallel_size
    )

    # Multiply by interleaved PP memory factor.
    if args.virtual_pipeline_model_parallel_size is not None:
        interleaved_schedule_memory_penalty = 1 + (
            (args.pipeline_model_parallel_size - 1)
            / (args.pipeline_model_parallel_size * args.virtual_pipeline_model_parallel_size)
        )
        in_flight_microbatches = math.ceil(
            interleaved_schedule_memory_penalty * args.pipeline_model_parallel_size
        )
        if verbose:
            print(
                f"Memory penalty from interleaved schedule: {interleaved_schedule_memory_penalty:.2f}"
            )
            print(f"Number of in-flight microbatches: {in_flight_microbatches}")
        activation_memory *= interleaved_schedule_memory_penalty

    # If using non-interleaved schedule, number of microbatches in pipeline can be less than pp_size,
    # so discount accordingly.
    if args.virtual_pipeline_model_parallel_size is None and args.pipeline_model_parallel_size > 1:
        if num_microbatches is not None:
            activation_memory *= min(1, num_microbatches / args.pipeline_model_parallel_size)
            in_flight_microbatches = min(num_microbatches, args.pipeline_model_parallel_size)
        else:
            in_flight_microbatches = args.pipeline_model_parallel_size
        if verbose:
            print(f"Number of in-flight microbatches: {in_flight_microbatches}")

    if args.pipeline_model_parallel_size == 1:
        # Inputs to output layer and CE loss.
        activation_memory += (
            args.seq_length
            * args.micro_batch_size
            * args.hidden_size
            * 4
            * (1 + (args.padded_vocab_size / args.hidden_size))
        )

    # Activation memory is partitioned by TP size due to tensor and sequence model parallelism.
    return activation_memory / args.tensor_model_parallel_size


def compute_activation_memory_without_sp(args, num_microbatches, verbose=False):
    """Compute activation memory without sequence parallelism"""

    # 4. Compute per-layer memory
    per_layer_memory = args.seq_length * args.micro_batch_size * args.hidden_size * (10 + (24 / args.tensor_model_parallel_size))

    if verbose:
        print(
            f"Activation memory footprint per transformer layer (precise, without SP): "
            f"{per_layer_memory / NUM_BYTES_IN_MEGABYTE:.1f} MB"
        )

    # 5. Multiply by number of layers
    total_activation_memory = per_layer_memory * args.num_layers

    # 6. Add embedding activations
    # Input to embedding (pp_size microbatches in flight)
    total_activation_memory += (
        8 * args.seq_length * args.micro_batch_size * args.pipeline_model_parallel_size
    )
    # Dropout in embedding layer (pp_size microbatches in flight)
    total_activation_memory += (
        args.seq_length
        * args.micro_batch_size
        * args.hidden_size
        * args.pipeline_model_parallel_size
    )

    # 7. Handle pipeline parallelism schedules
    # Multiply by interleaved PP memory factor
    if args.virtual_pipeline_model_parallel_size is not None:
        interleaved_schedule_memory_penalty = 1 + (
            (args.pipeline_model_parallel_size - 1)
            / (args.pipeline_model_parallel_size * args.virtual_pipeline_model_parallel_size)
        )
        in_flight_microbatches = math.ceil(
            interleaved_schedule_memory_penalty * args.pipeline_model_parallel_size
        )
        if verbose:
            print(
                f"Memory penalty from interleaved schedule: {interleaved_schedule_memory_penalty:.2f}"
            )
            print(f"Number of in-flight microbatches: {in_flight_microbatches}")
        total_activation_memory *= interleaved_schedule_memory_penalty

    # If using non-interleaved schedule, number of microbatches in pipeline can be less than pp_size
    if args.virtual_pipeline_model_parallel_size is None and args.pipeline_model_parallel_size > 1:
        if num_microbatches is not None:
            total_activation_memory *= min(1, num_microbatches / args.pipeline_model_parallel_size)
            in_flight_microbatches = min(num_microbatches, args.pipeline_model_parallel_size)
        else:
            in_flight_microbatches = args.pipeline_model_parallel_size
        if verbose:
            print(f"Number of in-flight microbatches: {in_flight_microbatches}")

    # 8. Add output layer memory if needed
    if args.pipeline_model_parallel_size == 1:
        # Logits calculation
        logits_size = args.seq_length * args.micro_batch_size * args.padded_vocab_size
        # The output projection is partitioned across TP
        logits_size /= args.tensor_model_parallel_size

        # Outputs from final layer norm
        final_ln_output = args.seq_length * args.micro_batch_size * args.hidden_size

        total_activation_memory += (logits_size + final_ln_output) * 2  # multiply by 2 for bytes

    # 9. Add buffer for optimizer and miscellaneous temporaries (5% overhead)
    overhead_factor = 1.05
    total_activation_memory *= overhead_factor

    return total_activation_memory


def report_theoretical_memory(args, num_microbatches=None, verbose=False):
    if is_hybrid_model(args):
        print("Theoretical memory footprints not yet supported for hybrid Mamba-Transformer models.")
        return

    weight_and_optimizer_memory = (
        compute_weight_and_optimizer_memory(args, verbose=verbose) / NUM_BYTES_IN_MEGABYTE
    )

    # Choose the appropriate activation memory calculation based on parallelism strategy
    if args.sequence_parallel and args.recompute_granularity == 'selective':
        print_rank_0("compute_activation_memory with SP")
        activation_memory = (
            compute_activation_memory(args, num_microbatches=num_microbatches, verbose=verbose)
            / NUM_BYTES_IN_MEGABYTE
        )
    else:
        print_rank_0("compute_activation_memory_without_sp")
        activation_memory = (
            compute_activation_memory_without_sp(args, num_microbatches=num_microbatches, verbose=verbose)
            / NUM_BYTES_IN_MEGABYTE
        )

    total_memory = weight_and_optimizer_memory + activation_memory

    print(
        f"Theoretical memory footprints: weight and optimizer={weight_and_optimizer_memory:.2f} MB, "
        f"activation={activation_memory:.2f} MB, total={total_memory:.2f} MB\n"
    )

    return weight_and_optimizer_memory, activation_memory, total_memory


================================================
FILE: megatron/training/training.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

"""Pretrain utilities."""
import time
# The earliest we can measure the start time.
_TRAIN_START_TIME = time.time()

# Startup timestamps for tracking program initialization phases
_STARTUP_TIMESTAMPS = {
    'program_start': None,  # Set by entry script before imports
    'main_entry': None,     # Set by entry script at start of __main__
    'pretrain_entry': None, # Set at top of pretrain()
}


def set_startup_timestamps(program_start=None, main_entry=None):
    """Set startup timestamps from the entry script.

    Call this after imports but before calling pretrain() to register
    the program start time and main entry time.

    Args:
        program_start: Timestamp captured at very start of program, before any imports.
        main_entry: Timestamp captured right after entering __main__ block.
    """
    global _TRAIN_START_TIME, _STARTUP_TIMESTAMPS
    if program_start is not None:
        _TRAIN_START_TIME = program_start
        _STARTUP_TIMESTAMPS['program_start'] = program_start
    if main_entry is not None:
        _STARTUP_TIMESTAMPS['main_entry'] = main_entry


from collections import defaultdict
import copy
import dataclasses
from datetime import datetime, timedelta
import functools
import gc
import inspect
import logging
import math
import os
import sys
from contextlib import nullcontext
from pathlib import Path
from typing import Any, Optional, Dict

import torch.distributed

from megatron.core.optimizer.distrib_optimizer import DistributedOptimizer
from megatron.core.optimizer_param_scheduler import get_canonical_lr_for_logging
from .log_handler import CustomHandler

# Make default logging level INFO, but filter out all log messages not from MCore.
logging.basicConfig(handlers=[CustomHandler()], level=logging.INFO)
from .theoretical_memory_usage import report_theoretical_memory

_LEGACY_TRAIN_START_TIME = time.time() # NOTE(asolergi-nv): Legacy timestamp

import torch

try:
    from megatron.rl import rl_utils
    has_rl_utils = True
except ImportError:
    has_rl_utils = False
from megatron.rl.parallel_utils import build_inference_pg_collection
try:
    from modelopt.torch.distill.plugins.megatron import (
        get_tensor_shapes_adjust_fn_for_distillation,
    )

    has_nvidia_modelopt = True
except ImportError:
    has_nvidia_modelopt = False

try:
    from nvidia_resiliency_ext.inprocess import CallWrapper
except ImportError:
    CallWrapper = type(None)


from megatron.core import mpu, tensor_parallel
from megatron.core.models.gpt.experimental_attention_variant_module_specs import (
    is_linear_attention_variant,
)
from megatron.core.utils import (
    check_param_hashes_across_dp_replicas,
    get_attr_wrapped_model,
    get_model_config,
    get_pg_size,
    get_pg_rank,
    StragglerDetector,
)
from megatron.core.fp8_utils import correct_amax_history_if_needed
from megatron.core.process_groups_config import ProcessGroupCollection
from megatron.core.pipeline_parallel.utils import (
    is_pp_first_stage,
    is_pp_last_stage,
    is_vp_first_stage,
    is_vp_last_stage,
)
from megatron.core.optimizer import get_mup_config_overrides, get_standard_config_overrides
from megatron.training.checkpointing import load_checkpoint
from megatron.training.checkpointing import save_checkpoint, save_grads
from megatron.training.checkpointing import checkpoint_exists
from megatron.training.checkpointing import get_loaded_iteration
from megatron.core.full_cuda_graph import FullCudaGraphWrapper
from megatron.core.transformer.cuda_graphs import TECudaGraphHelper
from megatron.core.transformer.enums import CudaGraphScope
from megatron.core.transformer.module import Float16Module
from megatron.core.distributed import DistributedDataParallelConfig, TorchFullyShardedDataParallelConfig
from megatron.core.distributed import DistributedDataParallel as DDP
from megatron.core.distributed.fsdp.mcore_fsdp_adapter import FullyShardedDataParallel as megatron_FSDP
from megatron.core.optimizer.optimizer import param_group_identifier_keys

from megatron.core.optimizer.qk_clip import clip_qk

try:
    from megatron.core.distributed import TorchFullyShardedDataParallel as torch_FSDP

    HAVE_FSDP2 = True
except ImportError:
    HAVE_FSDP2 = False

from megatron.core.distributed import finalize_model_grads
from megatron.core.enums import ModelType
from megatron.core.optimizer import get_megatron_optimizer, AdamOptimizerConfig, SGDOptimizerConfig, OptimizerConfig, ParamKey
from megatron.core.optimizer.muon import get_megatron_muon_optimizer
from megatron.core.rerun_state_machine import (
    get_rerun_state_machine,
    destroy_rerun_state_machine,
    RerunDataIterator,
    RerunMode,
)
from megatron.training.initialize import initialize_megatron
from megatron.training.initialize import write_args_to_tensorboard
from megatron.training.initialize import set_jit_fusion_options
from megatron.training.utils import get_batch_on_this_cp_rank, get_batch_on_this_tp_rank, is_hybrid_model
from megatron.training.datasets.data_samplers import build_pretraining_data_loader
from megatron.core.datasets.data_schedule import HybridCPDataLoaderWrapper
from megatron.core.optimizer_param_scheduler import OptimizerParamScheduler
from megatron.core.transformer.moe import upcycling_utils
from megatron.core.transformer.moe.moe_utils import track_moe_metrics, clear_aux_losses_tracker
from megatron.core.transformer.experimental_attention_variant.dsa import DSAIndexerLossLoggingHelper
from megatron.core.transformer.multi_token_prediction import MTPLossLoggingHelper
from megatron.core.parallel_state import (
    destroy_global_memory_buffer,
    destroy_model_parallel,
    update_pg_timeout
)
from megatron.core.inference.symmetric_memory import SymmetricMemoryManager
from megatron.core.inference.unified_memory import create_unified_mempool
from megatron.core.resharding.refit import swap_model_weights

try:
    from torch_memory_saver import torch_memory_saver
    torch_memory_saver.hook_mode = "torch"
    HAVE_TORCH_MEMORY_SAVER = True
except ImportError:
    HAVE_TORCH_MEMORY_SAVER = False

from megatron.core.pipeline_parallel import get_forward_backward_func
from megatron.core.num_microbatches_calculator import (
    destroy_num_microbatches_calculator,
    get_current_global_batch_size,
    get_current_running_global_batch_size,
    get_num_microbatches,
    update_num_microbatches
)

from .async_utils import maybe_finalize_async_save
from .utils import (
    append_to_progress_log,
    calc_params_l2_norm,
    check_adlr_autoresume_termination,
    logical_and_across_model_parallel_group,
    reduce_max_stat_across_model_parallel_group,
    is_last_rank,
    print_rank_0,
    print_rank_last,
    report_memory,
    unwrap_model,
    update_use_dist_ckpt,
    to_empty_if_meta_device,
)
from .global_vars import (
    destroy_global_vars,
    get_args,
    get_signal_handler,
    get_timers,
    get_tensorboard_writer,
    get_wandb_writer,
    get_one_logger,
    get_tokenizer,
    get_energy_monitor,
)
from . import one_logger_utils
from .dgrad_logging import enable_dgrad_logging, disable_dgrad_logging, save_dgrads

from . import ft_integration

stimer = StragglerDetector()

from megatron.core.msc_utils import MultiStorageClientFeature, open_file


def destroy_global_state():
    destroy_global_vars()
    destroy_num_microbatches_calculator()
    destroy_global_memory_buffer()
    SymmetricMemoryManager.destroy()
    destroy_model_parallel()
    destroy_rerun_state_machine()


def print_datetime(string, override_timestamp=None):
    """Note that this call will sync across all ranks. Use override_timestamp if provided;
       otherwise use current timestamp."""
    torch.distributed.barrier()
    if override_timestamp is None:
        time_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
    else:
        time_str = datetime.fromtimestamp(override_timestamp).strftime('%Y-%m-%d %H:%M:%S.%f')
    print_rank_0(f'[{string}] datetime: {time_str} ')

def num_floating_point_operations(args, batch_size):
    def mlp_layer_flops(batch_size, seq_len, hidden_size, expansion=4.0, swiglu=False):
        """Calculate FLOPs for an MLP layer."""
        scale_factor = 3.0 / 2.0 if swiglu else 1.0
        return 4 * expansion * scale_factor * batch_size * seq_len * hidden_size**2

    def moe_layer_flops(batch_size, seq_len, hidden_size, moe_ffn_hidden_size,
                        shared_expert_ffn_hidden_size, num_experts_routed_to,
                        moe_latent_size=None, swiglu=False):
        """Calculate FLOPs for an MoE layer."""
        scale_factor = 3.0 / 2.0 if swiglu else 1.0
        if moe_latent_size is None:
            routed_flops = (4 * batch_size * seq_len * hidden_size *
                            moe_ffn_hidden_size * num_experts_routed_to * scale_factor)
        else:
            # Routed experts run on moe_latent_size.
            routed_flops = (4 * batch_size * seq_len * moe_latent_size *
                            moe_ffn_hidden_size * num_experts_routed_to * scale_factor)
            # Up proj and down proj.
            routed_flops += (4 * batch_size * seq_len * hidden_size * moe_latent_size)
        shared_flops = 4 * batch_size * seq_len * hidden_size * shared_expert_ffn_hidden_size * scale_factor
        return routed_flops + shared_flops

    def attn_layer_flops(
        batch_size, seq_len, hidden_size, num_heads, gqa=True, gqa_groups=8, kv_channels=None
    ):
        """Calculate FLOPs for an attention layer."""
        p = (kv_channels * num_heads / hidden_size) if kv_channels else 1
        g = gqa_groups if gqa else num_heads
        return (
            4
            * batch_size
            * seq_len
            * hidden_size
            * p
            * (hidden_size + (hidden_size * (g / num_heads)) + (seq_len / 2))
        )

    def mamba_layer_flops(batch_size, seq_len, hidden_size, state_dim=16,
                          head_dim=64, num_groups=1, num_heads=128):
        """Calculate FLOPs for a Mamba layer."""
        # Note (rwaleffe): flops estimate for scan should be updated based on new SSD kernels,
        # but small percent of overall layer flops
        d_in = 2 * hidden_size
        if num_heads:
            nheads = num_heads
        else:
            nheads = d_in // head_dim
        return (
            (
                2
                * batch_size
                * seq_len
                * hidden_size
                * (2 * d_in + 2 * num_groups * state_dim + nheads)
            )  # in_proj
            + (7 * batch_size * seq_len * d_in * state_dim)  # scan
            + (2 * batch_size * seq_len * d_in * hidden_size)  # out_proj
        )

    def hybrid_flops(batch_size, seq_len, hidden_size,
                     num_attn_layers, num_mamba_layers, num_mlp_layers, num_moe_layers,
                     mamba_state_dim=128, mamba_head_dim=64,
                     mamba_num_groups=8, mamba_num_heads=128,
                     num_attn_heads=32, gqa=True,
                     gqa_groups=8, kv_channels=None,
                     mlp_expansion=4.0, swiglu=False,
                     moe_latent_size=None,
                     moe_ffn_hidden_size=2048, shared_expert_ffn_hidden_size=2048, num_experts_routed_to=1,
                     vocab_size=256000, mtp_num_layers=0):
        """Calculate total FLOPs for the hybrid model."""
        flops_fwd = (
                num_attn_layers * attn_layer_flops(batch_size, seq_len, hidden_size,
                                                   num_attn_heads, gqa, gqa_groups, kv_channels) +
                num_mlp_layers * mlp_layer_flops(batch_size, seq_len, hidden_size,
                                                 mlp_expansion, swiglu) +
                num_mamba_layers * mamba_layer_flops(batch_size, seq_len, hidden_size,
                                                     mamba_state_dim, mamba_head_dim,
                                                     mamba_num_groups, mamba_num_heads) +
                num_moe_layers * moe_layer_flops(batch_size, seq_len, hidden_size, moe_ffn_hidden_size,
                                                 shared_expert_ffn_hidden_size, num_experts_routed_to,
                                                 moe_latent_size, swiglu) +
                (2 * batch_size * seq_len * hidden_size * vocab_size * (1 + mtp_num_layers))  # logits computation
        )
        return flops_fwd * 3

    def transformer_flops():
        """Calculate FLOPs for a standard Transformer model."""
        # TODO(helenn/dnarayanan): Refactor this to reuse the helper methods.
        # Group Query Attention.
        if not args.group_query_attention:
            args.num_query_groups = args.num_attention_heads
        # MoE.
        if args.num_experts is None:
            # Every Transformer MLP is dense.
            num_dense_layers = args.num_layers
            num_moe_layers = 0
            num_experts_routed_to = 0
            last_layer_is_moe = 0
        else:
            # Calculate number of dense and MoE Transformer MLPs.
            if isinstance(args.moe_layer_freq, int):
                moe_layer_pattern = [
                    1 if (i % args.moe_layer_freq == 0) else 0 for i in range(args.num_layers)
                ]
            elif isinstance(args.moe_layer_freq, list):
                moe_layer_pattern = args.moe_layer_freq
            else:
                raise RuntimeError("Illegal --moe-layer-freq argument provided!")
            assert len(moe_layer_pattern) == args.num_layers, (
                f"Invalid length of moe_layer_pattern: {len(moe_layer_pattern)}, "
                f"expected {args.num_layers}, "
                f"current moe layer pattern: {args.moe_layer_freq}"
            )
            num_moe_layers = sum(moe_layer_pattern)  # Number of 1s in `moe_layer_pattern`.
            num_dense_layers = args.num_layers - num_moe_layers
            num_experts_routed_to = args.moe_router_topk
            last_layer_is_moe = moe_layer_pattern[-1]

        if args.mtp_num_layers is not None:
            mtp_num_layers = args.mtp_num_layers
            num_moe_layers += last_layer_is_moe * mtp_num_layers
            num_dense_layers += (1 - last_layer_is_moe) * mtp_num_layers
            num_layers = args.num_layers + mtp_num_layers
        else:
            mtp_num_layers = 0
            num_layers = args.num_layers

        moe_ffn_hidden_size = (
            args.moe_ffn_hidden_size
            if args.moe_ffn_hidden_size is not None
            else args.ffn_hidden_size
        )
        moe_latent_size = args.moe_latent_size
        shared_expert_ffn_hidden_size = (
            0
            if args.moe_shared_expert_intermediate_size is None
            else args.moe_shared_expert_intermediate_size
        )

        # - 3x: Each GEMM in the model needs to be performed 3 times (forward pass,
        #       backward wgrad [weight gradient], backward dgrad [data gradient]).
        forward_backward_expansion_factor = 3
        # - 2x: A GEMM of a m*n tensor with a n*k tensor requires 2mnk floating-point operations.
        fma_expansion_factor = 2
        # - 3x (SwiGLU enabled): h->2*ffn_h GEMM and ffn_h->h GEMM are stacked.
        # - 2x (SwiGLU disabled): h->ffn_h GEMM and ffn_h->h GEMM are stacked.
        ffn_expansion_factor = 3 if args.swiglu else 2

        if args.multi_latent_attention:
            assert not args.group_query_attention
            '''
            Basic arithmetic
            let B is batch size, s is seq_len, h is embedding dim,
            for one self_attnetion block (prenorm is not included)
            qkv projection:  6Bsh^2
            attn:            2Bs^2h
            attn over value: 2Bs^2h
            oproj:           2Bsh^2

            references
            https://arxiv.org/abs/2305.10403
            https://arxiv.org/abs/2205.05198
            '''
            ## MLA
            if args.q_lora_rank is None:
                q_term = (
                    args.hidden_size
                    * args.num_attention_heads
                    * (args.qk_head_dim + args.qk_pos_emb_head_dim)
                )
            else:
                q_term = args.q_lora_rank * (
                    args.hidden_size
                    + args.num_attention_heads * (args.qk_head_dim + args.qk_pos_emb_head_dim)
                    + 1
                )
            standard_self_attn_term = (
                forward_backward_expansion_factor
                * fma_expansion_factor
                * (
                    ## q lora + rope + q norm
                    q_term
                    ## kv lora + rope + kv norm
                    + args.kv_lora_rank
                    * (
                        args.hidden_size
                        + args.num_attention_heads * (args.qk_head_dim + args.v_head_dim)
                        + 1
                    )
                    + args.hidden_size * args.qk_pos_emb_head_dim
                    ## o proj
                    + (args.num_attention_heads * args.v_head_dim) * args.hidden_size
                    ## core attn
                    + args.seq_length
                    * (args.num_attention_heads * (args.qk_head_dim + args.qk_pos_emb_head_dim))
                    / 2  # causal mask (only half of the mask is non-zero)
                    + args.seq_length * args.num_attention_heads * args.v_head_dim / 2
                )
            )

        else:
            ## MHA or GQA
            query_projection_size = args.kv_channels * args.num_attention_heads
            key_projection_size = args.kv_channels * args.num_query_groups
            value_projection_size = args.kv_channels * args.num_query_groups
            gate_projection_size = query_projection_size if args.attention_output_gate else 0
            standard_self_attn_term = (
                forward_backward_expansion_factor
                * fma_expansion_factor
                * (
                    ## qkv proj
                    args.hidden_size
                    * (
                        query_projection_size
                        + key_projection_size
                        + value_projection_size
                        + gate_projection_size
                    )
                    ## core attention
                    + query_projection_size
                    * args.seq_length
                    / 2  # causal mask (only half of the mask is non-zero)
                    * 2  # QK^T and (QK^T)V
                    ## out proj
                    + query_projection_size
                    * args.hidden_size
                )
            )

        if is_linear_attention_variant(args.experimental_attention_variant):
            # Calculate number of dense and MoE Transformer MLPs.
            if isinstance(args.linear_attention_freq, int):
                linear_attention_pattern = [
                    # [1,1,...,1,0,1,1,...,1,0,...]
                    0 if ((i + 1) % args.linear_attention_freq == 0)
                    else 1 for i in range(num_layers)
                ]
            elif isinstance(args.linear_attention_freq, list):
                linear_attention_pattern = args.linear_attention_freq
                assert len(linear_attention_pattern) == num_layers, (
                    f"Invalid length of linear_attention_pattern: {len(linear_attention_pattern)}, "
                    f"expected {num_layers}, "
                    f"current linear attention pattern: {args.linear_attention_freq}"
                )
            elif args.linear_attention_freq is None:
                # This should be caught by config validation, but raise here as a safety check
                raise ValueError(
                    f"Linear attention type {args.experimental_attention_variant} is specified "
                    "but linear_attention_freq is None. "
                    "Please set linear_attention_freq to specify the LA/SDPA layer pattern."
                )
            else:
                raise ValueError(
                    f"Invalid linear_attention_freq: {type(args.linear_attention_freq)},"
                    f" {args.linear_attention_freq}"
                )
            num_linear_attention_layers = sum(linear_attention_pattern)
            num_standard_attention_layers = num_layers - num_linear_attention_layers

            if args.experimental_attention_variant == "gated_delta_net":
                # Calculate the FLOPs for the gated delta net attention.
                qk_head_dim = args.linear_key_head_dim
                v_head_dim = args.linear_value_head_dim
                num_qk_heads = args.linear_num_key_heads
                num_v_heads = args.linear_num_value_heads
                qk_dim = qk_head_dim * num_qk_heads
                v_dim = v_head_dim * num_v_heads
                linear_self_attn_term = (
                    forward_backward_expansion_factor
                    * fma_expansion_factor
                    * (
                        ## in proj
                        args.hidden_size
                        * (2 * qk_dim + 2 * v_dim + 2 * num_v_heads)
                        ## conv1d
                        + args.linear_conv_kernel_dim
                        * (2 * qk_dim + v_dim)
                        ## gated delta rule
                        + num_v_heads
                        * (v_head_dim ** 2)
                        * 4  # KK^T, VK^T, S(a(I-bKK^T)), and SQ
                        ## out proj
                        + args.hidden_size
                        * v_dim
                    )
                )
            else:
                raise ValueError(
                    "Invalid experimental_attention_variant: "
                    f"{args.experimental_attention_variant}"
                )
        else:
            num_linear_attention_layers = 0
            linear_self_attn_term = 0
            num_standard_attention_layers = num_layers

        self_attn_term = (
            linear_self_attn_term * num_linear_attention_layers
            + standard_self_attn_term * num_standard_attention_layers
        )

        total_floating_point_operations = (
            batch_size
            * args.seq_length
            * (
                # MLP
                forward_backward_expansion_factor
                * fma_expansion_factor
                * args.hidden_size
                * (
                    # dense layer (deepseek v2, v3 style)
                    (args.ffn_hidden_size * ffn_expansion_factor)
                    * num_dense_layers
                    # routed experts
                    + (
                        (moe_ffn_hidden_size * num_experts_routed_to * ffn_expansion_factor)
                        if moe_latent_size is None
                        else (
                            (
                                moe_ffn_hidden_size
                                * num_experts_routed_to
                                * ffn_expansion_factor
                                * moe_latent_size
                                / args.hidden_size
                            )  # Routed experts run on moe_latent_size.
                            + 2 * moe_latent_size  # Up proj and down proj.
                        )
                    )
                    * num_moe_layers
                    # Shared Experts.
                    + (shared_expert_ffn_hidden_size * ffn_expansion_factor)
                    * num_moe_layers
                )
                # Self Attention
                + self_attn_term
                # MTP norms and proj
                + forward_backward_expansion_factor
                * fma_expansion_factor
                * mtp_num_layers
                * (
                    # MTP eh norm + final nrom
                    3 * args.hidden_size
                    # MTH eh proj
                    + 2 * args.hidden_size * args.hidden_size
                )
                # Logit.
                + forward_backward_expansion_factor
                * fma_expansion_factor
                * args.hidden_size
                * args.padded_vocab_size
                * (mtp_num_layers + 1)  # MTP + final logit
            )
        )
        return total_floating_point_operations

    # Main entrypoint for FLOPs calculation.
    if is_hybrid_model(args):
        # Calculate the number of each type of layer.
        from operator import itemgetter

        from megatron.core.ssm.mamba_hybrid_layer_allocation import Symbols, get_hybrid_layer_counts
        num_attn_layers, num_mamba_layers, num_mlp_layers, num_moe_layers = itemgetter(
            Symbols.ATTENTION, Symbols.MAMBA, Symbols.MLP, Symbols.MOE
        )(get_hybrid_layer_counts(args.hybrid_layer_pattern))

        mtp_num_layers = args.mtp_num_layers
        if mtp_num_layers is None:
            mtp_num_layers = 0
        # Compute hybrid model FLOPs.
        return hybrid_flops(
            batch_size=batch_size,
            seq_len=args.seq_length,
            hidden_size=args.hidden_size,
            num_attn_layers=num_attn_layers,
            num_mamba_layers=num_mamba_layers,
            num_mlp_layers=num_mlp_layers,
            num_moe_layers=num_moe_layers,
            mamba_state_dim=args.mamba_state_dim,
            mamba_head_dim=args.mamba_head_dim,
            mamba_num_groups=args.mamba_num_groups,
            mamba_num_heads=args.mamba_num_heads,
            num_attn_heads=args.num_attention_heads,
            gqa=args.group_query_attention,
            gqa_groups=args.num_query_groups,
            kv_channels=args.kv_channels,
            mlp_expansion=args.ffn_hidden_size / args.hidden_size,
            swiglu=args.swiglu,
            moe_latent_size=args.moe_latent_size,
            moe_ffn_hidden_size=(args.moe_ffn_hidden_size if args.moe_ffn_hidden_size is not None
                                 else args.ffn_hidden_size),
            shared_expert_ffn_hidden_size=(0 if args.moe_shared_expert_intermediate_size is None
                                           else args.moe_shared_expert_intermediate_size),
            num_experts_routed_to=args.moe_router_topk,
            vocab_size=args.padded_vocab_size,
            mtp_num_layers=mtp_num_layers,
        )
    else:
        # Compute standard Transformer model FLOPs.
        return transformer_flops()


def get_start_time_from_progress_log():
    """
    Gets start time of earliest job with same world size. Also returns the number
    of floating-point operations completed in last saved checkpoint.
    """
    args = get_args()
    assert args.save is not None
    progress_log_filename = os.path.join(args.save, "progress.txt")

    # start_time is time when job with same world size started.
    # start_num_floating_point_operations is the number of floating-point operations
    # completed when this job started.
    # latest_num_floating_point_operations is the number of floating-point operations
    # completed in most recent saved checkpoint.
    start_time = None
    start_num_floating_point_operations = None
    latest_num_floating_point_operations = 0

    def _get_field(string, type):
        return type(string.split(': ')[1])

    with open_file(progress_log_filename, 'r') as f:
        for line in f:
            line = line.strip()
            line_tokens = line.split('\t')
            world_size_in_line = _get_field(line_tokens[2], int)
            if line_tokens[3] == "Saved checkpoint":
                latest_num_floating_point_operations = _get_field(line_tokens[7], float)
            if world_size_in_line != args.world_size:
                # Re-start search if we see a different world size.
                start_time = None
                start_num_floating_point_operations = None
                continue
            if line_tokens[3] == "Starting job":
                if start_time is None:
                    start_time = line_tokens[0]
                    start_num_floating_point_operations = latest_num_floating_point_operations
    assert (
        start_time is not None and start_num_floating_point_operations is not None
    ), "Should have seen at least one 'Starting job' entry with same world_size"
    return datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S'), start_num_floating_point_operations


def preprocess_common_state_dict(common_state_dict):
    import copy

    # Convert args key of type namespace to dictionary
    preprocessed_common_state_dict = copy.deepcopy(common_state_dict)
    preprocessed_common_state_dict['args'] = vars(preprocessed_common_state_dict['args'])
    # Remove rank and local rank from state dict if it exists, since they are expected to be different
    preprocessed_common_state_dict['args'].pop('local_rank', None)
    preprocessed_common_state_dict['args'].pop('rank', None)
    if (
        preprocessed_common_state_dict['args']['use_distributed_optimizer']
        and "optimizer" in preprocessed_common_state_dict
    ):
        def reorder_inner_param_groups(optimizer_state_dict):
            # When distributed optimizer loading, source param groups will be reordered,
            # so we reorder the param groups here to prevent warning.

            # Pop empty param_state.
            if "param_state" in optimizer_state_dict and not optimizer_state_dict["param_state"]:
                optimizer_state_dict.pop("param_state")

            # Reorder param groups.
            if "optimizer" not in optimizer_state_dict:
                return
            inner_optimizer = optimizer_state_dict["optimizer"]
            if "param_groups" not in inner_optimizer:
                return
            param_groups = inner_optimizer["param_groups"]
            key_fn = lambda pg: [pg[key] for key in param_group_identifier_keys]
            param_groups.sort(key=key_fn)
            inner_optimizer["param_groups"] = param_groups

        optimizer_state_dict = preprocessed_common_state_dict['optimizer']
        if "optimizer" in optimizer_state_dict:
            # Only 1 optimizer in chained optimizer.
            reorder_inner_param_groups(optimizer_state_dict)
        else:
            # Multiple optimizers in chained optimizer.
            for i in range(len(optimizer_state_dict)):
                if i in optimizer_state_dict.keys():
                    reorder_inner_param_groups(optimizer_state_dict[i])

    return preprocessed_common_state_dict


def pretrain(
    train_valid_test_dataset_provider,
    model_provider,
    model_type,
    forward_step_func,
    process_non_loss_data_func=None,
    extra_args_provider=None,
    args_defaults={},
    get_embedding_ranks=None,
    get_position_embedding_ranks=None,
    non_loss_data_func=None,
    store=None,
    inprocess_call_wrapper: Optional[CallWrapper] = None,
):
    """Main training program.

    This function will run the followings in the order provided:
        1) initialize Megatron.
        2) setup model, optimizer and lr schedule using the model_provider.
        3) call train_val_test_data_provider to get train/val/test datasets.
        4) train the model using the forward_step_func.

    Args:
        train_valid_test_dataset_provider: a function that takes the size of
            train/valid/test dataset and returns `train, valid, test` datasets.
        model_provider: a function that returns a vanilla version of the
            model. By vanilla we mean a simple model on cpu with no fp16 or ddp.
        model_type: an enum that specifies the type of model being trained.
        forward_step_func: a function that takes a `data iterator` and `model`,
            and returns a `loss` scalar with a dictionary with key:values being
            the info we would like to monitor during training, for example
            `lm-loss: value`. We also require that this function add
            `batch generator` to the timers class.
        process_non_loss_data_func: a function to post process outputs of the
            network. It can be used for dumping output tensors (e.g images) to
            tensorboard. It takes `collected data`(list of tensors),
            `current iteration index` and `tensorboard writer` as arguments.
        extra_args_provider: a function that takes a parser and adds arguments
            to it. It is used for programs to add their own arguments.
        args_defaults: a dictionary from argument-name to argument-value. It
            to set already parse arguments.
        get_embedding_ranks: a function that takes a list of ranks for a pipeline
            group and returns those ranks that should have word embeddings.
            For most models, these are the first and last pipeline stages.
            If None, defaults to returning the first and last pipeline stages.
        get_position_embedding_ranks: a function that takes a list of ranks for
            a pipeline group and returns those ranks that should have position
            embeddings. For most models, this is only the first pipeline stage.
            If None, defaults to returning only the first pipeline stage.
        non_loss_data_func (callable): A custom function to call during evaluation.
            It can run e.g. benchmarks.
        store: an optional instance of torch.distributed.Store, to be used by
            torch.distributed.init_process_group
        inprocess_call_wrapper: an optional instance of inprocess.CallWrapper,
            it is automatically injected when in-process restart is in use
    """
    # Capture timestamp right at top of pretrain, before initialize_megatron
    global _STARTUP_TIMESTAMPS
    _STARTUP_TIMESTAMPS['pretrain_entry'] = time.time()

    if inprocess_call_wrapper is not None:
        iteration = inprocess_call_wrapper.iteration
        store = torch.distributed.PrefixStore(str(iteration), store)

    timestamp_after_inprocess_setup = time.time()

    # Early fault tolerance setup - must be done before initialize_megatron
    # to enable monitoring of the initialization process
    ft_integration.setup()
    timestamp_after_in_job_setup = time.time()

    # Initalize and get arguments, timers, and Tensorboard writer.
    initialize_megatron(
        extra_args_provider=extra_args_provider,
        args_defaults=args_defaults,
        get_embedding_ranks=get_embedding_ranks,
        get_position_embedding_ranks=get_position_embedding_ranks,
        store=store,
    )

    timestamp_after_initialize_megatron = time.time()

    args = get_args()
    timers = get_timers()

    if args.fine_grained_activation_offloading:
        from megatron.core.pipeline_parallel.utils import (
            set_ideal_affinity_for_current_gpu
        )
        set_ideal_affinity_for_current_gpu()


    if args.log_progress:
        append_to_progress_log("Starting job")

    # Set pytorch JIT layer fusion options and warmup JIT functions.
    set_jit_fusion_options()

    timestamp_after_set_jit_fusion_options = time.time()

    # Adjust the startup time so it reflects the global minimum.
    # This will be closer to what scheduler will see (outside of
    # image ... launches).
    program_start = _STARTUP_TIMESTAMPS.get('program_start')
    main_entry = _STARTUP_TIMESTAMPS.get('main_entry')
    pretrain_entry = _STARTUP_TIMESTAMPS.get('pretrain_entry')

    # Initialize program_start_global with a fallback value in case set_startup_timestamps() wasn't called
    program_start_global = _TRAIN_START_TIME
    if _STARTUP_TIMESTAMPS['program_start'] is not None:
        program_start_global = torch.tensor([_STARTUP_TIMESTAMPS['program_start']], dtype=torch.double, device='cuda')
        torch.distributed.all_reduce(program_start_global, op=torch.distributed.ReduceOp.MIN)
        program_start_global = program_start_global.item()
    set_startup_timestamps(program_start=program_start_global)

    global _LEGACY_TRAIN_START_TIME
    start_time_tensor = torch.tensor([_LEGACY_TRAIN_START_TIME], dtype=torch.double, device='cuda')
    torch.distributed.all_reduce(start_time_tensor, op=torch.distributed.ReduceOp.MIN)
    _LEGACY_TRAIN_START_TIME = start_time_tensor.item()

    # Capture megatron init end time (matches original time.time() placement)
    megatron_init_end = time.time()

    app_metrics = {}
    app_metrics['app_start_time'] = round(program_start_global * 1000.0)
    app_metrics['app_model_init_start_time'] = round(program_start_global * 1000.0)

    # Print basic megatron init time (using global min start)
    # NOTE(asolergi-nv): This is not entirely accurate, but we keep it for backwards compatibility.
    print_rank_0(
        'time to initialize megatron (seconds): {:.3f}'.format(megatron_init_end - _LEGACY_TRAIN_START_TIME)
    )

    # Note, not entirely accurate as rank 0 might not be the first or last to hit these timestamps
    print_datetime('after in-process setup and before initialize_megatron', timestamp_after_inprocess_setup)
    print_datetime('after in-job setup and before initialize_megatron', timestamp_after_in_job_setup)

    if program_start is not None and main_entry is not None and pretrain_entry is not None:
        # Inject startup deltas into timers
        startup_timers = {
            'startup-program-entry-spread': program_start - program_start_global, # Local program start timestamp vs the global earliest program start timestamp
            'startup-library-setup': main_entry - program_start, # Local library imports
            'startup-program-setup': pretrain_entry - main_entry, # Local __main__ entry to pretrain entry
            'startup-in-process-setup': timestamp_after_inprocess_setup - pretrain_entry, # Local in-process setup
            'startup-in-job-setup': timestamp_after_in_job_setup - timestamp_after_inprocess_setup, # Local in-job setup
            'startup-initialize-megatron': timestamp_after_initialize_megatron - timestamp_after_in_job_setup, # Local initialize megatron
            'startup-set-jit-fusion-options': timestamp_after_set_jit_fusion_options - timestamp_after_initialize_megatron, # Local set JIT fusion options
            'all-reduce-start-timestamps-tensor': megatron_init_end - timestamp_after_set_jit_fusion_options, # 2x All-reduce, first collective call
            'startup-megatron-init-local': megatron_init_end - pretrain_entry, # Local megatron init
            'startup-megatron-init-global': megatron_init_end - program_start_global, # Local megatron init vs the global earliest program start timestamp
        }
        for name, delta in startup_timers.items():
            timers(name, log_level=0).set_elapsed(delta)
        timers.log(list(startup_timers.keys()), barrier=True)

        # Print rank 0's absolute timestamps
        startup_timestamps = {
            'before library-setup': program_start,
            'after library-setup': main_entry,
            'before megatron-init': pretrain_entry,
        }
        for name, ts in startup_timestamps.items():
            ts_str = datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S.%f')
            print_rank_0(f'[{name}] datetime: {ts_str}')

    print_datetime('after megatron is initialized')
    app_metrics['app_model_init_finish_time'] = one_logger_utils.get_timestamp_in_ms()

    # Track E2E metrics on pretrain start
    one_logger_utils.on_pretrain_start()

    # Context used for persisting some state between checkpoint saves.
    if args.non_persistent_ckpt_type == 'local':
        try:
            from nvidia_resiliency_ext.checkpointing.local.ckpt_managers.local_manager import (
                LocalCheckpointManager,
            )
            from nvidia_resiliency_ext.checkpointing.local.replication.group_utils import (
                parse_group_sequence,
                GroupWrapper,
            )
            from nvidia_resiliency_ext.checkpointing.local.replication.strategies import (
                CliqueReplicationStrategy,
            )
        except ModuleNotFoundError:
            raise RuntimeError(
                "The 'nvidia_resiliency_ext' module is required for local "
                "checkpointing but was not found. Please ensure it is installed."
            )

        if args.replication:
            repl_strategy = CliqueReplicationStrategy.from_replication_params(
                args.replication_jump, args.replication_factor
            )
        else:
            repl_strategy = None

        checkpointing_context = {
            'local_checkpoint_manager': LocalCheckpointManager(
                args.non_persistent_local_ckpt_dir, repl_strategy=repl_strategy
            )
        }
    else:
        checkpointing_context = {}

    # Model, optimizer, and learning rate.
    timers('model-and-optimizer-setup', log_level=0).start(barrier=True)
    model, optimizer, opt_param_scheduler = setup_model_and_optimizer(
        model_provider, model_type, checkpointing_context=checkpointing_context
    )

    timers('model-and-optimizer-setup').stop()
    print_datetime('after model, optimizer, and learning rate ' 'scheduler are built')
    config = get_model_config(model[0])

    # Build a separate inference model for RL if requested.
    inference_model = None
    if args.perform_rl_step:
        if (
            args.rl_inference_tensor_model_parallel_size is not None
            or args.rl_inference_pipeline_model_parallel_size is not None
            or args.rl_inference_expert_model_parallel_size is not None
            or args.rl_inference_expert_tensor_model_parallel_size is not None
        ):
            print_rank_0(
                "Building separate RL inference model with custom parallelism: "
                f"TP={args.rl_inference_tensor_model_parallel_size}, "
                f"PP={args.rl_inference_pipeline_model_parallel_size}, "
                f"EP={args.rl_inference_expert_model_parallel_size}, "
                f"ExptTP={args.rl_inference_expert_tensor_model_parallel_size}"
            )
            inference_pg_collection = build_inference_pg_collection(
                args.world_size,
                tp_size=args.rl_inference_tensor_model_parallel_size,
                pp_size=args.rl_inference_pipeline_model_parallel_size,
                ep_size=args.rl_inference_expert_model_parallel_size,
                expt_tp_size=args.rl_inference_expert_tensor_model_parallel_size,
                use_tp_pp_dp_mapping=args.use_tp_pp_dp_mapping,
            )

            # Build an isolated inference config so training config remains unchanged
            inference_config = copy.deepcopy(config)
            if args.rl_inference_tensor_model_parallel_size is not None:
                inference_config.tensor_model_parallel_size = args.rl_inference_tensor_model_parallel_size
            if args.rl_inference_pipeline_model_parallel_size is not None:
                inference_config.pipeline_model_parallel_size = (
                    args.rl_inference_pipeline_model_parallel_size
                )
            if args.rl_inference_expert_model_parallel_size is not None:
                inference_config.expert_model_parallel_size = (
                    args.rl_inference_expert_model_parallel_size
                )
            if args.rl_inference_expert_tensor_model_parallel_size is not None:
                inference_config.expert_tensor_parallel_size = (
                    args.rl_inference_expert_tensor_model_parallel_size
                )

            # Optionally allocate the RL inference model weights from a unified virtual memory (UVM)
            # mempool so we can prefetch weights to CPU when idle while keeping CUDA-graph-safe pointers.
            # Alternatively, use torch_memory_saver to offload the weights to CPU when idle.
            uvm_mempool = None
            uvm_level = args.rl_inference_model_unified_memory_level
            if uvm_level and uvm_level > 0:
                uvm_mempool = create_unified_mempool()

            # Determine which context manager to use for model allocation
            # Use torch_memory_saver if offloading is requested but UVM is not enabled
            use_torch_saver_for_inference_model = (
                args.rl_offload_inference_model_weights_when_idle
                and uvm_level == 0
                and HAVE_TORCH_MEMORY_SAVER
            )
            if use_torch_saver_for_inference_model:
                # Use torch_memory_saver for offloading - allocate within a tagged region
                model_alloc_ctx = torch_memory_saver.region(
                    tag="rl_inference_model", enable_cpu_backup=True
                )
            elif uvm_mempool is not None:
                model_alloc_ctx = torch.cuda.use_mem_pool(uvm_mempool)
            else:
                model_alloc_ctx = nullcontext()

            with model_alloc_ctx:
                inference_model = get_model(
                    model_provider,
                    model_type,
                    wrap_with_ddp=False,
                    pg_collection=inference_pg_collection,
                    config=inference_config,
                )
            inference_model[0].eval()

        # Validate: offloading flag requires a separate inference model
        if args.rl_offload_inference_model_weights_when_idle and inference_model is None:
            raise ValueError(
                "--rl-offload-inference-model-weights-when-idle requires a separate inference model. "
                "This flag is only useful when doing refit since the weights are shared with the training model."
            )

    # Data stuff.
    app_metrics['app_build_dataiters_start_time'] = one_logger_utils.get_timestamp_in_ms()
    timers('train/valid/test-data-iterators-setup', log_level=0).start(barrier=True)
    if args.virtual_pipeline_model_parallel_size is not None:
        train_data_iterator = []
        valid_data_iterator = []
        test_data_iterator = []
        for vp_stage in range(len(model)):
            dataset_provider_parameters = inspect.signature(train_valid_test_dataset_provider).parameters
            assert "vp_stage" in dataset_provider_parameters, \
                "vp_stage must be a kwarg in train_valid_test_dataset_provider when using virtual pipeline parallelism"
            vp_stage_train_valid_test_dataset_provider = \
                functools.partial(train_valid_test_dataset_provider, vp_stage=vp_stage)
            if getattr(train_valid_test_dataset_provider, 'is_distributed', False):
                vp_stage_train_valid_test_dataset_provider.is_distributed = True
            iterators = build_train_valid_test_data_iterators(
                vp_stage_train_valid_test_dataset_provider
            )
            train_data_iterator.append(iterators[0])
            valid_data_iterator.append(iterators[1])
            test_data_iterator.append(iterators[2])
    else:
        train_data_iterator, valid_data_iterator, test_data_iterator = (
            build_train_valid_test_data_iterators(train_valid_test_dataset_provider)
        )
    timers('train/valid/test-data-iterators-setup').stop()
    print_datetime('after dataloaders are built')
    app_metrics['app_build_dataiters_finish_time'] = one_logger_utils.get_timestamp_in_ms()

    # Track if training is enabled. Can only be done once args.do_train is assigned after dataloader is built.
    one_logger_utils.track_config_flags(
        args.train_iters,
        args.skip_train,
        args.do_train,
        args.do_valid,
        args.do_test,
        args.dataloader_type,
    )

    # Print setup timing.
    print_rank_0('done with setup ...')
    timers.log(['model-and-optimizer-setup', 'train/valid/test-data-iterators-setup'], barrier=True)

    one_logger = get_one_logger()
    one_logger and one_logger.log_metrics(app_metrics)

    wandb_writer = get_wandb_writer()
    if wandb_writer:
        # Add job name to the wandb config to make it easier to run more singleton dependency jobs.
        wandb_writer.config.update({'slurm_job_name': os.getenv("SLURM_JOB_NAME", "N/A")})

    if not args.skip_train or args.perform_rl_step:
        if args.skip_train:
            print_rank_0('RL inference-only mode (--skip-train --perform-rl-step) ...')
        else:
            print_rank_0('training ...')

        iteration = 0
        args.curr_iteration = iteration
        if args.do_train and (args.train_iters or 0) > 0:
            iteration, num_floating_point_operations_so_far = train(
                forward_step_func,
                model,
                optimizer,
                opt_param_scheduler,
                train_data_iterator,
                valid_data_iterator,
                process_non_loss_data_func,
                config,
                checkpointing_context,
                non_loss_data_func,
                inference_model,
            )

        print_datetime('after training is done')

        if not args.skip_train and args.save and iteration != 0 and iteration % args.save_interval != 0:
            save_checkpoint(
                iteration,
                model,
                optimizer,
                opt_param_scheduler,
                num_floating_point_operations_so_far,
                checkpointing_context,
                train_data_iterator=train_data_iterator,
                preprocess_common_state_dict_fn=preprocess_common_state_dict,
            )

        one_logger and one_logger.log_metrics(
            {'app_train_loop_finish_time': one_logger_utils.get_timestamp_in_ms()}
        )

    else:
        print_rank_0('skipping training (--skip-train is on) ...')

        iteration = args.iteration

    if args.do_valid:
        prefix = f'iteration {iteration} on validation set'
        if args.perform_rl_step:
            rl_eval_model = model
            rl_training_model = None
            if inference_model is not None:
                inf_core = unwrap_model(inference_model[0])
                # If separate inference and training models, swap training weights
                # back to the inference model for RL evaluation.
                rl_utils._maybe_prefetch_separate_inference_model_weights(inf_core, to_cpu=False)
                swap_model_weights(model, inference_model, args.refit_method)
                rl_eval_model = inference_model
                rl_training_model = model
            rl_utils.evaluate_and_print_results_rl(
                valid_data_iterator,
                rl_eval_model,
                optimizer,
                iteration,
                write_to_tensorboard=not args.skip_train,
                training_model=rl_training_model,
            )
        else:
            evaluate_and_print_results(
                prefix, forward_step_func,
                valid_data_iterator, model,
                iteration, process_non_loss_data_func, config,
                verbose=True, write_to_tensorboard=not args.skip_train,
                non_loss_data_func=non_loss_data_func
            )

    if args.do_test:
        prefix = f'iteration {iteration} on test set'
        evaluate_and_print_results(
            prefix,
            forward_step_func,
            test_data_iterator,
            model,
            iteration,
            process_non_loss_data_func,
            config,
            verbose=True,
            write_to_tensorboard=not args.skip_train,
            non_loss_data_func=non_loss_data_func,
        )

    wandb_writer = get_wandb_writer()
    if wandb_writer:
        wandb_writer.finish()

    ft_integration.on_checkpointing_start()
    maybe_finalize_async_save(blocking=True, terminate=True)
    ft_integration.on_checkpointing_end(is_async_finalization=True)

    one_logger and one_logger.log_metrics(
        {'app_finish_time': one_logger_utils.get_timestamp_in_ms()}
    )

    if args.perform_rl_step:
        rl_utils.rl_inference_interface_shutdown()

    ft_integration.shutdown()
    one_logger_utils.finish()


def update_train_iters(args):

    # For iteration-based training, we don't need to do anything
    if args.train_iters:
        return

    # Constant batch size with sample-based training.
    if args.rampup_batch_size is None:
        args.train_iters = args.train_samples // args.global_batch_size

    else:
        # Sample based training with rampup batch size.
        iterations = 0
        consumed_samples = 0
        # Rampup phase.
        while (
            consumed_samples <= int(args.rampup_batch_size[2])
            and consumed_samples <= args.train_samples
        ):
            update_num_microbatches(consumed_samples, consistency_check=False)
            consumed_samples += get_current_global_batch_size()
            iterations += 1
        # Reset
        update_num_microbatches(0, consistency_check=False)
        # Constant phase
        # Note that we throw away any partial last batch.
        if args.train_samples > consumed_samples:
            iterations += (args.train_samples - consumed_samples) // args.global_batch_size
        args.train_iters = iterations

    print_rank_0(f'setting training iterations to {args.train_iters}')


def get_model(model_provider_func, model_type=ModelType.encoder_or_decoder, wrap_with_ddp=True, config=None, pg_collection=None):
    """Build the model."""
    args = get_args()
    args.model_type = model_type
    if pg_collection is None:
        pg_collection = ProcessGroupCollection.use_mpu_process_groups()

    if has_nvidia_modelopt:
        from megatron.post_training.checkpointing import has_modelopt_state
        # [ModelOpt]: Check if the checkpoint is a ModelOpt checkpoint and
        # set a flag to use our model provider if so.
        if args.load is not None and has_modelopt_state(args.load):
            print_rank_0(f'ModelOpt checkpoint detected')
            args.modelopt_enabled = True
        elif getattr(args, "export_kd_teacher_load", None):
            # For distillation ckpts without ModelOpt state
            args.modelopt_enabled = True

    # Build model.
    def build_model():
        if (
            get_pg_size(pg_collection.pp) > 1
            and args.virtual_pipeline_model_parallel_size is not None
        ):
            model = []
            vp_size = args.virtual_pipeline_model_parallel_size
            for i in range(vp_size):
                # Set pre_process and post_process only after virtual rank is set.
                pre_process = is_pp_first_stage(pg_collection.pp) and is_vp_first_stage(
                    vp_stage=i, vp_size=vp_size
                )
                post_process = is_pp_last_stage(pg_collection.pp) and is_vp_last_stage(
                    vp_stage=i, vp_size=vp_size
                )
                this_model = model_provider_func(
                    pre_process=pre_process,
                    post_process=post_process,
                    vp_stage=i,
                    config=config,
                    pg_collection=pg_collection,
                )
                this_model.model_type = model_type
                this_model.vp_stage = i
                model.append(this_model)
        else:
            pre_process = is_pp_first_stage(pg_collection.pp)
            post_process = is_pp_last_stage(pg_collection.pp)
            model = model_provider_func(
                pre_process=pre_process,
                post_process=post_process,
                config=config,
                pg_collection=pg_collection,
            )
            model.model_type = model_type
        return model


    if args.init_model_with_meta_device:
        with torch.device('meta'):
            model = build_model()
    else:
        model = build_model()

    if not isinstance(model, list):
        model = [model]

    # Set tensor model parallel attributes if not set.
    # Only parameters that are already tensor model parallel have these
    # attributes set for them. We should make sure the default attributes
    # are set for all params so the optimizer can use them.
    for model_module in model:
        for param in model_module.parameters():
            tensor_parallel.set_defaults_if_not_set_tensor_model_parallel_attributes(param)

    # Print number of parameters.
    num_parameters = sum(
        [sum([p.nelement() for p in model_module.parameters()]) for model_module in model]
    )
    if get_pg_rank(pg_collection.dp) == 0 and get_pg_rank(pg_collection.cp) == 0:
        print(
            ' > number of parameters on (tensor, pipeline) '
            'model parallel rank ({}, {}): {}'.format(
                get_pg_rank(pg_collection.tp),
                get_pg_rank(pg_collection.pp),
                num_parameters,
            ),
            flush=True,
        )

    # GPU allocation.
    # For FSDP2, we don't allocate GPU memory here. We allocate GPU memory
    # in the fully_shard function of FSDP2 instead.
    if (
        not (args.use_torch_fsdp2 and args.use_cpu_initialization)
        and not args.init_model_with_meta_device
    ):
        for model_module in model:
            model_module.cuda(torch.cuda.current_device())

    # Fp16 conversion.
    if args.fp16 or args.bf16:
        config = get_model_config(model[0])
        model = [Float16Module(config, model_module) for model_module in model]

    # Materialize tensors on meta device (GPU allocation) if not using FSDP2 and not using Megatron FSDP.
    if args.init_model_with_meta_device and not args.use_torch_fsdp2 and not args.use_megatron_fsdp:
        model = [to_empty_if_meta_device(model_module, device=torch.device("cuda")) for model_module in model]

    # Before TE2.x: The model_module.bfloat16()/model_module.half() above will call the inplace
    #               copy of TE's Float8Tensor, which will write an unwanted value (amax calculated
    #               from the current fp8 param) to its amax_history. The below function will correct
    #               the amax_history back.
    # After TE2.x: Below function is an empty function and does nothing.
    correct_amax_history_if_needed(model)

    if wrap_with_ddp:
        if args.use_torch_fsdp2:
            assert HAVE_FSDP2, "Torch FSDP2 requires torch>=2.4.0"
            DP = torch_FSDP
        elif args.use_megatron_fsdp:
            DP = megatron_FSDP
        else:
            DP = DDP

        config = get_model_config(model[0])

        if getattr(args, "use_torch_fsdp2", False):
            reshard_after_forward = getattr(args, "torch_fsdp2_reshard_after_forward", True)
            ddp_config = TorchFullyShardedDataParallelConfig(reshard_after_forward=reshard_after_forward)
        else:
            kwargs = {}
            for f in dataclasses.fields(DistributedDataParallelConfig):
                if hasattr(args, f.name):
                    kwargs[f.name] = getattr(args, f.name)
            kwargs['grad_reduce_in_fp32'] = args.accumulate_allreduce_grads_in_fp32
            kwargs['check_for_nan_in_grad'] = args.check_for_nan_in_loss_and_grad
            kwargs['check_for_large_grads'] = args.check_for_large_grads
            if args.ddp_num_buckets is not None:
                assert args.ddp_bucket_size is None, \
                    "Cannot specify both --ddp-num-buckets and --ddp-bucket-size"
                assert args.ddp_num_buckets > 0, \
                    "--ddp-num-buckets must be greater than 0"
                kwargs['bucket_size'] = num_parameters // args.ddp_num_buckets
            else:
                kwargs['bucket_size'] = args.ddp_bucket_size
            kwargs['pad_buckets_for_high_nccl_busbw'] = args.ddp_pad_buckets_for_high_nccl_busbw
            kwargs['reduce_scatter_with_fp32_accumulation'] = args.ddp_reduce_scatter_with_fp32_accumulation
            kwargs['average_in_collective'] = args.ddp_average_in_collective
            ddp_config = DistributedDataParallelConfig(**kwargs)

            # In the Megatron FSDP and DDP use path, we need to initialize the bucket size.
            # If bucket_size is not provided as an input, use sane default.
            # If using very large dp_sizes, make buckets larger to ensure that chunks used in NCCL
            # ring-reduce implementations are large enough to remain bandwidth-bound rather than
            # latency-bound.
            if ddp_config.bucket_size is None:
                ddp_config.bucket_size = max(
                    40000000, 1000000 * mpu.get_data_parallel_world_size(with_context_parallel=True)
                )
            # Set bucket_size to infinity if overlap_grad_reduce is False.
            if not ddp_config.overlap_grad_reduce:
                ddp_config.bucket_size = None

        # Setup stream for ddp initialization. The side-stream may be necessary for cuda graph
        #  capture support with DDP, but we sync it with the current stream to avoid races.
        ddp_stream = torch.cuda.Stream()
        # Wait for the default stream to complete before starting ddp_stream
        ddp_stream.wait_stream(torch.cuda.current_stream())
        # Make ddp_stream start after whatever the default stream already queued
        with torch.cuda.stream(ddp_stream):
            model = [
                DP(
                    config=config,
                    ddp_config=ddp_config,
                    module=model_chunk,
                    # Turn off bucketing for model_chunk 2 onwards, since communication
                    # for these model chunks is overlapped with compute anyway.
                    disable_bucketing=(model_chunk_idx > 0) or args.overlap_param_gather_with_optimizer_step,
                )
                for (model_chunk_idx, model_chunk) in enumerate(model)
            ]
        # End of setup_stream
        # Critical: ensure side-stream work completes before touching params on default stream
        torch.cuda.current_stream().wait_stream(ddp_stream)

        # Broadcast params from data parallel src rank to other data parallel ranks.
        if args.data_parallel_random_init:
            for model_module in model:
                model_module.broadcast_params()

    return model


def get_optimizer_param_scheduler(optimizer):
    """Build the learning rate scheduler."""
    args = get_args()

    # Iteration-based training.
    if args.train_iters:
        if args.lr_decay_iters is None:
            args.lr_decay_iters = args.train_iters
        lr_decay_steps = args.lr_decay_iters * args.global_batch_size
        wd_incr_steps = args.train_iters * args.global_batch_size
        wsd_decay_steps = None
        if args.lr_wsd_decay_iters is not None:
            wsd_decay_steps = args.lr_wsd_decay_iters * args.global_batch_size
        if args.lr_warmup_fraction is not None:
            lr_warmup_steps = args.lr_warmup_fraction * lr_decay_steps
        else:
            lr_warmup_steps = args.lr_warmup_iters * args.global_batch_size
    # Sample-based training.
    elif args.train_samples:
        # We need to set training iters for later use. Technically
        # we need to adjust the training samples too (due to last
        # batch being incomplete) but we leave it as is for now.
        update_train_iters(args)
        if args.lr_decay_samples is None:
            args.lr_decay_samples = args.train_samples
        lr_decay_steps = args.lr_decay_samples
        wd_incr_steps = args.train_samples
        wsd_decay_steps = args.lr_wsd_decay_samples
        if args.lr_warmup_fraction is not None:
            lr_warmup_steps = args.lr_warmup_fraction * lr_decay_steps
        else:
            lr_warmup_steps = args.lr_warmup_samples
    else:
        raise Exception('either train-iters or train-samples should be provided.')

    opt_param_scheduler = OptimizerParamScheduler(
        optimizer,
        init_lr=args.lr_warmup_init,
        max_lr=args.lr,
        min_lr=args.min_lr,
        lr_warmup_steps=lr_warmup_steps,
        lr_decay_steps=lr_decay_steps,
        lr_decay_style=args.lr_decay_style,
        start_wd=args.start_weight_decay,
        end_wd=args.end_weight_decay,
        wd_incr_steps=wd_incr_steps,
        wd_incr_style=args.weight_decay_incr_style,
        use_checkpoint_opt_param_scheduler=args.use_checkpoint_opt_param_scheduler,
        override_opt_param_scheduler=args.override_opt_param_scheduler,
        wsd_decay_steps=wsd_decay_steps,
        lr_wsd_decay_style=args.lr_wsd_decay_style,
    )

    return opt_param_scheduler


def get_megatron_optimizer_config(args: Any) -> OptimizerConfig:
    """Return a Megatron optimizer config object from Megatron's arguments."""

    config = None
    if args.optimizer == 'adam' or 'muon' in args.optimizer:
        # TODO(deyuf): Muon needs both adam + muon but get() only receive one config
        # So for now we keep using adam config that's back compat with old way
        kwargs = {}
        for f in dataclasses.fields(AdamOptimizerConfig):
            if hasattr(args, f.name):
                kwargs[f.name] = getattr(args, f.name)
        config = AdamOptimizerConfig(**kwargs)
    elif args.optimizer == 'sgd':
        kwargs = {}
        for f in dataclasses.fields(SGDOptimizerConfig):
            if hasattr(args, f.name):
                kwargs[f.name] = getattr(args, f.name)
        config = SGDOptimizerConfig(**kwargs)
    else:
        raise ValueError("Invalid optimizer type!")

    # Construct the appropriate config_overrides object. This default handles many cases, but
    #  can be added to as needed by the user, or replaced entirely with a custom override.
    config_overrides = get_standard_config_overrides(config=config)

    return config, config_overrides


def setup_model_and_optimizer(
    model_provider_func,
    model_type,
    checkpointing_context=None,
):
    """Setup model and optimizer."""
    args = get_args()
    timers = get_timers()
    one_logger = get_one_logger()

    # Skip optimizer when not training. In RL inference-only mode (skip_train + perform_rl_step),
    # --no-load-optim controls whether the optimizer is skipped (saving memory) or created
    # (required for --rl-offload-optimizer-during-inference).
    skip_optimizer = args.skip_train and (not args.perform_rl_step or args.no_load_optim)
    wrap_with_ddp = not skip_optimizer
    model = get_model(model_provider_func, model_type, wrap_with_ddp=wrap_with_ddp)
    unwrapped_model = unwrap_model(model)

    one_logger and one_logger.log_metrics({"app_build_optimzer_start_time": one_logger_utils.get_timestamp_in_ms()})
    if skip_optimizer:
        optimizer, opt_param_scheduler = None, None
        # In RL inference-only mode, train_iters must still be set despite having no optimizer.
        if args.perform_rl_step:
            update_train_iters(args)
    else:
        config, config_overrides = get_megatron_optimizer_config(args)
        config.timers = timers
        if getattr(args, "use_mup", False):
            model_config_source = (
                unwrapped_model[0] if isinstance(unwrapped_model, list) else unwrapped_model
            )
            model_config = get_model_config(model_config_source)
            mup_overrides = get_mup_config_overrides(
                config=config,
                mup_width_mult=model_config.mup_width_mult,
                optimizer_type=config.optimizer,
            )
            if mup_overrides:
                config_overrides = {**(config_overrides or {}), **mup_overrides}

        if 'muon' not in config.optimizer:
            # If the user is asking for a non-zero embedding init std, skip weight decay for embeddings
            # to avoid embeddings from shrinking to zero as recommended in https://arxiv.org/abs/2312.16903
            # default_skip_embedding_weight_decay=args.embedding_init_method_std is not None,
            optimizer = get_megatron_optimizer(
                config,
                model,
                config_overrides=config_overrides,
                use_gloo_process_groups=args.enable_gloo_process_groups,
                dump_param_to_param_group_map=args.dump_param_to_param_group_map,
            )
        else:
            optimizer = get_megatron_muon_optimizer(
                config,
                model,
                config_overrides=config_overrides,
                use_gloo_process_groups=args.enable_gloo_process_groups,
                layer_wise_distributed_optimizer='dist' in config.optimizer,
            )
        opt_param_scheduler = get_optimizer_param_scheduler(optimizer)

    one_logger and one_logger.log_metrics({"app_build_optimzer_finish_time": one_logger_utils.get_timestamp_in_ms()})

    if args.moe_use_upcycling:
        torch.distributed.barrier()
        assert not checkpoint_exists(args.save), (
            "The upcycling destination directory already exists. "
            "Please check if --moe-use-upcycling is mistakenly enabled. "
            "Upcycling should only be set for the first run when converting the dense model. "
            "All subsequent runs should remove this flag. "
        )
        # before changing moe related global args, save them in local variables
        num_experts = args.num_experts
        expert_model_parallel_size = args.expert_model_parallel_size
        moe_ffn_hidden_size = args.ffn_hidden_size

        # set dense model related args in to global args before getting dense model
        args.num_experts = None
        args.expert_model_parallel_size = 1
        args.ffn_hidden_size = moe_ffn_hidden_size * args.moe_upcycling_granularity

        # get dense model
        dense_model_for_upcycling = get_model(model_provider_func, model_type)

        # recover moe upcycling related args in global args before executing upcycling
        args.num_experts = num_experts
        args.expert_model_parallel_size = expert_model_parallel_size
        args.ffn_hidden_size = moe_ffn_hidden_size

        # execute upcycling
        _, args.num_floating_point_operations_so_far = upcycling_utils.load_and_upcycle_model(
            load_checkpoint,
            unwrapped_model,
            dense_model_for_upcycling,
            load_kwargs={
                'model': dense_model_for_upcycling,
                'optimizer': None,
                'opt_param_scheduler': None,
            },
        )
        args.iteration = 1
        save_checkpoint(
            args.iteration, model, None, None, args.num_floating_point_operations_so_far
        )
        torch.distributed.barrier()
        del dense_model_for_upcycling
        if (args.fp16 or args.bf16) and optimizer is not None:
            optimizer.reload_model_params()
        print_rank_0(f'Upcycled checkpoint saved to {args.save}')

    if (
        args.load is not None or args.pretrained_checkpoint is not None
    ) and not args.moe_use_upcycling:
        one_logger and one_logger.log_metrics(
            {'load_checkpoint_start_time': one_logger_utils.get_timestamp_in_ms()}
        )
        timers('load-checkpoint', log_level=0).start(barrier=True)

        args.iteration, args.num_floating_point_operations_so_far = load_checkpoint(
            model,
            optimizer,
            opt_param_scheduler,
            checkpointing_context=checkpointing_context,
            skip_load_to_model_and_opt=HAVE_FSDP2
            and getattr(args, "use_torch_fsdp2", False)
            and args.ckpt_format == "torch_dist",
        )
        timers('load-checkpoint').stop(barrier=True)
        timers.log(['load-checkpoint'])
        one_logger and one_logger.log_metrics(
            {
                'load_checkpoint_finish_time': one_logger_utils.get_timestamp_in_ms(),
                'load_checkpoint_time': timers('load-checkpoint').active_time(),
            }
        )
    else:
        args.iteration = 0
        args.num_floating_point_operations_so_far = 0

    # get model without FP16 and/or DDP wrappers
    if (
        args.iteration == 0
        and len(unwrapped_model) == 1
        and hasattr(unwrapped_model[0], 'init_state_dict_from_bert')
    ):
        print_rank_0("Initializing ICT from pretrained BERT model")
        unwrapped_model[0].init_state_dict_from_bert()
        if args.fp16:
            optimizer.reload_model_params()

    # Convert checkpoint format.
    if args.ckpt_convert_format is not None:
        load_ckpt_format = args.ckpt_format
        args.ckpt_format = args.ckpt_convert_format
        args.save = os.path.join(args.ckpt_convert_save, args.ckpt_convert_format)
        update_use_dist_ckpt(args)

        save_checkpoint(
            args.iteration,
            model,
            optimizer,
            opt_param_scheduler,
            args.num_floating_point_operations_so_far,
            preprocess_common_state_dict_fn=preprocess_common_state_dict,
        )

        print_rank_0("> converted checkpoint: %s -> %s." % (load_ckpt_format, args.ckpt_format))
        torch.distributed.barrier()
        exit()

    return model, optimizer, opt_param_scheduler


def dummy_train_step(data_iterator):
    """Single dummy training step."""
    num_microbatches = get_num_microbatches()
    rerun_state_machine = get_rerun_state_machine()
    while rerun_state_machine.should_run_forward_backward(data_iterator):
        for _ in range(num_microbatches):
            # Re-use methods used in get_batch() from pretrain_{gpt, mamba}.py.
            batch = get_batch_on_this_tp_rank(data_iterator)
            batch = get_batch_on_this_cp_rank(batch)


def train_step(forward_step_func, data_iterator, model, optimizer, opt_param_scheduler, config, forward_backward_func, iteration=None):
    """Single training step."""
    args = get_args()
    timers = get_timers()

    rerun_state_machine = get_rerun_state_machine()
    save_dgrads_in_this_iteration = (args.save_dgrads_interval is not None and
                                     (iteration + 1) % args.save_dgrads_interval == 0)
    save_wgrads_in_this_iteration = (args.save_wgrads_interval is not None and
                                     (iteration + 1) % args.save_wgrads_interval == 0)
    while rerun_state_machine.should_run_forward_backward(data_iterator):
        # Set grad to zero.
        for model_chunk in model:
            model_chunk.zero_grad_buffer()
            # If saving main_grads in this iteration, then all-reduce instead of reduce-scatter.
            model_chunk.force_all_reduce = save_wgrads_in_this_iteration
        optimizer.zero_grad()

        if has_nvidia_modelopt:
            # [ModelOpt]: Pipeline-parallel Distillation stacks student and teacher tensors
            adjust_tensor_shapes_fn = get_tensor_shapes_adjust_fn_for_distillation(
                model,
                seq_length=args.seq_length,
                micro_batch_size=args.micro_batch_size,
                decoder_seq_length=args.decoder_seq_length,
            )
        else:
            adjust_tensor_shapes_fn = None

        # For the mxfp8_param with reuse_grad_buf_for_mxfp8_param_ag and dp_ag_overlap,
        # we need to call the _copy_main_params_to_param_buffer() after the grad buffer
        # is zeroed by zero_grad_buffer() because param and grad buffer are shared.
        #
        # However, we should skip this on the first iteration when forward_pre_hook is disabled,
        # because:
        # 1. The first iteration's params are already in param.data (from init or checkpoint).
        # 2. Without forward_pre_hook, finish_param_sync() won't be called to zero the grad buffer,
        #    so the main grads will be polluted by the main params.
        if args.reuse_grad_buf_for_mxfp8_param_ag and args.overlap_param_gather:
            # Check if forward_pre_hook is enabled by checking if hooks are registered.
            forward_pre_hook_enabled = len(model[0].remove_forward_pre_hook_handles) > 0
            if forward_pre_hook_enabled:
                for optim_instance in optimizer.chained_optimizers:
                    if isinstance(optim_instance, DistributedOptimizer):
                        optim_instance._copy_main_params_to_param_buffer()

        # Forward pass.
        if save_dgrads_in_this_iteration:
            enable_dgrad_logging(model, args.save)
        losses_reduced = forward_backward_func(
            forward_step_func=forward_step_func,
            data_iterator=data_iterator,
            model=model,
            num_microbatches=get_num_microbatches(),
            seq_length=args.seq_length,
            micro_batch_size=args.micro_batch_size,
            decoder_seq_length=args.decoder_seq_length,
            forward_only=False,
            adjust_tensor_shapes_fn=adjust_tensor_shapes_fn,
            force_all_reduce=save_wgrads_in_this_iteration,
        )
        if save_dgrads_in_this_iteration:
            save_dgrads(iteration + 1)
            disable_dgrad_logging()

        # Reset force_all_reduce field.
        for model_chunk in model:
            model_chunk.force_all_reduce = False

    # Checkpoint main_grads.
    if save_wgrads_in_this_iteration:
        # Collect state_dict of wgrads (each param's .main_grad field).
        state_dict = defaultdict(dict)
        for model_chunk_id, model_chunk in enumerate(model):
            model_chunk_name = f"model_chunk{model_chunk_id}"
            unwrapped_model_chunk = unwrap_model(model_chunk)
            for param_name, param in unwrapped_model_chunk.named_parameters():
                if getattr(param, "main_grad", None) is not None:
                    main_grad_on_cpu = param.main_grad.cpu()
                    state_dict[model_chunk_name][param_name] = main_grad_on_cpu

        # iteration is 0-indexed, move to 1-indexed for checkpoint name and logging.
        save_grads(args.save, state_dict, iteration + 1, "wgrads")

    should_checkpoint, should_exit, exit_code = rerun_state_machine.should_checkpoint_and_exit()
    if should_exit:
        return {}, True, should_checkpoint, should_exit, exit_code, None, None, 0

    # Empty unused memory.
    if args.empty_unused_memory_level >= 1:
        torch.cuda.empty_cache()

    # Vision gradients.
    if args.vision_pretraining and args.vision_pretraining_type == "dino":
        unwrapped_model = unwrap_model(model[0])
        unwrapped_model.cancel_gradients_last_layer(args.curr_iteration)

    # Update parameters.

    timers('optimizer', log_level=1).start(barrier=args.barrier_with_L1_time)
    update_successful, grad_norm, num_zeros_in_grad = optimizer.step()

    # get max attention logit for logging and run clip_qk()
    # Part of MuonClip Optimizer step
    log_max_attention_logit = 0
    if args.qk_clip or args.log_max_attention_logit:
        log_max_attention_logit = clip_qk(model, log_max_only=not args.qk_clip)

    timers('optimizer').stop()

    # when freezing sub-models we may have a mixture of successful and unsucessful ranks,
    # so we must gather across mp ranks
    update_successful = logical_and_across_model_parallel_group(update_successful)
    # grad_norm and num_zeros_in_grad will be None on ranks without trainable params,
    # so we must gather across mp ranks
    grad_norm = reduce_max_stat_across_model_parallel_group(grad_norm)
    if args.log_num_zeros_in_grad:
        num_zeros_in_grad = reduce_max_stat_across_model_parallel_group(num_zeros_in_grad)

    # Vision momentum.
    if args.vision_pretraining and args.vision_pretraining_type == "dino":
        unwrapped_model = unwrap_model(model[0])
        unwrapped_model.update_momentum(args.curr_iteration)

    # Update learning rate.
    if update_successful:
        increment = get_num_microbatches() * args.micro_batch_size * args.data_parallel_size
        opt_param_scheduler.step(increment=increment)
        skipped_iter = 0
    else:
        skipped_iter = 1

    # Empty unused memory.
    if args.empty_unused_memory_level >= 2:
        torch.cuda.empty_cache()

    if mpu.is_pipeline_last_stage(ignore_virtual=True):
        # Average loss across microbatches.
        loss_reduced = {}

        for key in losses_reduced[0].keys():
            val = [x[key].view(-1) for x in losses_reduced]
            if val[0].numel() == 2:
                # there is one dict per microbatch. in new reporting, we average
                # over the total number of tokens across the global batch.
                val = torch.vstack(val).sum(dim=0)
                torch.distributed.all_reduce(
                    val,
                    group=mpu.get_data_parallel_group(with_context_parallel=True)
                )
                loss_reduced[key] = val[0] / val[1]
            elif val[0].numel() == 1:
                # legacy behavior, we average over the number of microbatches
                val = torch.cat(val).mean()
                loss_reduced[key] = val
            else:
                raise ValueError(f"Invalid value shape: {val[0].shape} for key {key}")
        return (
            loss_reduced,
            skipped_iter,
            should_checkpoint,
            should_exit,
            exit_code,
            grad_norm,
            num_zeros_in_grad,
            log_max_attention_logit,
        )
    return {}, skipped_iter, should_checkpoint, should_exit, exit_code, grad_norm, num_zeros_in_grad, log_max_attention_logit


def training_log(
    loss_dict,
    total_loss_dict,
    learning_rate: float | None,
    iteration,
    loss_scale,
    report_memory_flag,
    skipped_iter,
    grad_norm,
    params_norm,
    num_zeros_in_grad,
    max_attention_logit,
    pg_collection=None,
    is_first_iteration=False,
):
    """Log training information such as losses, timing, ...."""
    args = get_args()
    timers = get_timers()
    writer = get_tensorboard_writer()
    wandb_writer = get_wandb_writer()
    one_logger = get_one_logger()
    energy_monitor = get_energy_monitor()

    # On first iteration, log stats but don't reset accumulators so normal interval stats remain accurate.
    should_reset = not is_first_iteration

    # Advanced, skipped, and Nan iterations.
    advanced_iters_key = 'advanced iterations'
    skipped_iters_key = 'skipped iterations'
    nan_iters_key = 'nan iterations'
    # Advanced iterations.
    if not skipped_iter:
        total_loss_dict[advanced_iters_key] = total_loss_dict.get(advanced_iters_key, 0) + 1
    else:
        if advanced_iters_key not in total_loss_dict:
            total_loss_dict[advanced_iters_key] = 0
    # Skipped iterations.
    total_loss_dict[skipped_iters_key] = total_loss_dict.get(skipped_iters_key, 0) + skipped_iter
    # Update losses and set nan iterations
    got_nan = False
    for key in loss_dict:
        if not skipped_iter:
            total_loss_dict[key] = (
                total_loss_dict.get(key, torch.tensor([0.0], dtype=torch.float, device='cuda'))
                + loss_dict[key]
            )
        else:
            value = loss_dict[key].float().sum().item()
            is_nan = value == float('inf') or value == -float('inf') or value != value
            got_nan = got_nan or is_nan
    total_loss_dict[nan_iters_key] = total_loss_dict.get(nan_iters_key, 0) + int(got_nan)

    # Logging.
    timers_to_log = []
    if args.timing_log_level >= 1:
        timers_to_log.extend([
            'forward-backward',
            'layernorm-grads-all-reduce',
            'embedding-grads-all-reduce',
            'all-grads-sync',
            'params-all-gather',
            'optimizer-copy-to-main-grad',
            'optimizer-unscale-and-check-inf',
            'optimizer-clip-main-grad',
            'optimizer-count-zeros',
            'optimizer-inner-step',
            'optimizer-copy-main-to-model-params',
            'optimizer',
        ])
    if args.timing_log_level >= 2:
        timers_to_log.extend([
            'batch-generator',
            'forward-compute',
            'backward-compute',
            'forward-recv',
            'forward-send',
            'backward-recv',
            'backward-send',
            'forward-send-forward-recv',
            'forward-send-backward-recv',
            'backward-send-forward-recv',
            'backward-send-backward-recv',
            'forward-backward-send-forward-backward-recv',
        ])
    # Add timers from RL loop if needed.
    if args.perform_rl_step:
        timers_to_log.extend(['rollout-collection', 'inference-setup', 'collect-rollouts', 'postrollout-gc-collect',
                              'sync-rollouts', 'prepare-data-for-update', 'compute-group-stats',
                              'prepare-trajectories', 'get-ltor-masks-and-position-ids', 'create-logprobs-dataloader',
                              'compute-logprobs', 'compute-ref-logprobs', 'compute-prob-stats',
                              'prepare-advantages', 'create-dataloader', 'log-wandb-tb',
                              'offload-optimizer-before-inference', 'onload-kv-cache-before-inference',
                              'wait-for-decode-only', 'build-cuda-graphs', 'suspend-engine',
                              'offload-kv-cache-after-inference', 'onload-optimizer-after-inference'])

    # Calculate batch size.
    batch_size = args.micro_batch_size * args.data_parallel_size * get_num_microbatches()

    # Track app tag & app tag ID
    one_logger_utils.track_app_tag(batch_size, args.world_size, args.seq_length)

    total_iterations = total_loss_dict[advanced_iters_key] + total_loss_dict[skipped_iters_key]

    # learning rate will be None on ranks without trainable params, so we must gather across mp ranks
    learning_rate: float | None = reduce_max_stat_across_model_parallel_group(learning_rate)
    # Tensorboard values.
    if writer and (iteration % args.tensorboard_log_interval == 0):
        if wandb_writer:
            wandb_writer.log({'samples vs steps': args.consumed_train_samples}, iteration)
        if learning_rate is not None:
            writer.add_scalar('learning-rate', learning_rate, iteration)
            writer.add_scalar('learning-rate vs samples', learning_rate, args.consumed_train_samples)
            if wandb_writer:
                wandb_writer.log({'learning-rate': learning_rate}, iteration)
        if args.skipped_train_samples > 0:
            writer.add_scalar('skipped-train-samples', args.skipped_train_samples, iteration)
            if wandb_writer:
                wandb_writer.log({'skipped-train-samples': args.skipped_train_samples}, iteration)
        writer.add_scalar('batch-size', batch_size, iteration)
        writer.add_scalar('batch-size vs samples', batch_size, args.consumed_train_samples)
        if wandb_writer:
            wandb_writer.log({'batch-size': batch_size}, iteration)
        # Log bins for packed mode
        if has_rl_utils and args.rl_use_sequence_packing:
            packing_metrics = rl_utils.get_sequence_packing_tensorboard_metrics(args)
            for metric_name, metric_value in packing_metrics.items():
                writer.add_scalar(metric_name, metric_value, iteration)
            if wandb_writer and packing_metrics:
                wandb_writer.log(packing_metrics, iteration)
        for key in loss_dict:
            writer.add_scalar(key, loss_dict[key], iteration)
            writer.add_scalar(key + ' vs samples', loss_dict[key], args.consumed_train_samples)
            if wandb_writer:
                wandb_writer.log({key: loss_dict[key]}, iteration)
        if args.log_loss_scale_to_tensorboard:
            writer.add_scalar('loss-scale', loss_scale, iteration)
            writer.add_scalar('loss-scale vs samples', loss_scale, args.consumed_train_samples)
            if wandb_writer:
                wandb_writer.log({'loss-scale': loss_scale}, iteration)
        if args.log_world_size_to_tensorboard:
            writer.add_scalar('world-size', args.world_size, iteration)
            writer.add_scalar('world-size vs samples', args.world_size, args.consumed_train_samples)
            if wandb_writer:
                wandb_writer.log({'world-size': args.world_size}, iteration)
        if grad_norm is not None:
            writer.add_scalar('grad-norm', grad_norm, iteration)
            writer.add_scalar('grad-norm vs samples', grad_norm, args.consumed_train_samples)
            if wandb_writer:
                wandb_writer.log({'grad-norm': grad_norm}, iteration)
        if num_zeros_in_grad is not None:
            writer.add_scalar('num-zeros', num_zeros_in_grad, iteration)
            writer.add_scalar(
                'num-zeros vs samples', num_zeros_in_grad, args.consumed_train_samples
            )
            if wandb_writer:
                wandb_writer.log({'num-zeros': num_zeros_in_grad}, iteration)
        if params_norm is not None:
            writer.add_scalar('params-norm', params_norm, iteration)
            writer.add_scalar('params-norm vs samples', params_norm, args.consumed_train_samples)
            if wandb_writer:
                wandb_writer.log({'params-norm': params_norm}, iteration)
        if args.perform_rl_step:
            grpo_collection_iteration = iteration // (args.grpo_iterations * ( ( args.grpo_samples_per_iteration )// args.global_batch_size ))
            writer.add_scalar('grpo_collection_iteration', grpo_collection_iteration, iteration)
            if wandb_writer:
                wandb_writer.log({'grpo_collection_iteration': grpo_collection_iteration}, iteration)
        if args.log_memory_to_tensorboard:
            mem_stats = torch.cuda.memory_stats()
            writer.add_scalar(
                "mem-reserved-bytes", mem_stats["reserved_bytes.all.current"], iteration
            )
            writer.add_scalar(
                "mem-allocated-bytes", mem_stats["allocated_bytes.all.current"], iteration
            )
            writer.add_scalar(
                "mem-max-allocated-bytes", mem_stats["allocated_bytes.all.peak"], iteration
            )
            writer.add_scalar("mem-allocated-count", mem_stats["allocation.all.current"], iteration)
        if args.log_max_attention_logit:
            writer.add_scalar('max_attention_logit', max_attention_logit, iteration)
            if wandb_writer:
                wandb_writer.log({'max_attention_logit': max_attention_logit}, iteration)

    # Log MoE metrics.
    if args.num_experts is not None:
        moe_loss_scale = 1 / get_num_microbatches()
        track_names = []
        if "aux_loss" in args.moe_router_load_balancing_type:
            track_names.append("load_balancing_loss")
        if "seq_aux_loss" in args.moe_router_load_balancing_type:
            track_names.append("seq_load_balancing_loss")
        if "global_aux_loss" in args.moe_router_load_balancing_type:
            track_names.append("global_load_balancing_loss")
        if args.moe_z_loss_coeff is not None:
            track_names.append("z_loss")

        if is_hybrid_model(args):
            from operator import itemgetter

            from megatron.core.ssm.mamba_hybrid_layer_allocation import (
                Symbols, get_hybrid_layer_counts,
            )
            layers = itemgetter(Symbols.MOE)(get_hybrid_layer_counts(args.hybrid_layer_pattern))
        else:
            layers = args.num_layers

        track_moe_metrics(
            loss_scale=moe_loss_scale,
            iteration=iteration,
            writer=writer,
            wandb_writer=wandb_writer,
            total_loss_dict=total_loss_dict,
            per_layer_logging=args.moe_per_layer_logging,
            force_initialize=True,
            track_names=track_names,
            num_layers=layers,
            moe_layer_freq=args.moe_layer_freq,
            mtp_num_layers=args.mtp_num_layers,
            pg_collection=pg_collection,
        )

    # Log MTP metrics.
    if args.mtp_num_layers is not None:
        mtp_loss_scale = 1 / get_num_microbatches()
        MTPLossLoggingHelper.track_mtp_metrics(
            mtp_loss_scale, iteration, writer, wandb_writer, total_loss_dict
        )

    # Track sparse attention indexer loss.
    if args.dsa_indexer_loss_coeff is not None and args.dsa_indexer_loss_coeff > 0:
        indexer_loss_scale = 1 / get_num_microbatches()
        DSAIndexerLossLoggingHelper.track_indexer_metrics(
            loss_scale=indexer_loss_scale,
            iteration=iteration,
            writer=writer,
            wandb_writer=wandb_writer,
            total_loss_dict=total_loss_dict,
        )

    # Dump memory snapshot and print metrics to stdout.
    if iteration % args.log_interval == 0 or is_first_iteration:
        if args.record_memory_history and (is_last_rank() or torch.distributed.get_backend() == 'fake'):
            snapshot = torch.cuda.memory._snapshot()
            from pickle import dump

            with open(args.memory_snapshot_path, 'wb') as f:
                dump(snapshot, f)

        elapsed_time = timers('interval-time').elapsed(barrier=True, reset=should_reset)
        elapsed_time_per_iteration = elapsed_time / total_iterations

        throughput = num_floating_point_operations(args, batch_size) / (
            elapsed_time_per_iteration * 10**12 * args.world_size
        )

        one_logger_utils.track_e2e_metrics(args.log_throughput, throughput)

        # We log to stdout after the first iteration (controlled by `is_first_iteration`)
        # to document initialization overhead. Log statistics to TensorBoard and
        # WandB according to the regular schedule.
        if args.log_timers_to_tensorboard and not is_first_iteration:
            if writer:
                writer.add_scalar('iteration-time', elapsed_time_per_iteration, iteration)
            if wandb_writer:
                wandb_writer.log({'iteration-time': elapsed_time_per_iteration}, iteration)
        log_string = f" [{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')}]"
        log_string += ' iteration {:8d}/{:8d} |'.format(iteration, args.train_iters)
        log_string += ' consumed samples: {:12d} |'.format(args.consumed_train_samples)
        if has_rl_utils and args.rl_use_sequence_packing:
            log_string += rl_utils.get_sequence_packing_log_info(args)
        if args.skipped_train_samples > 0:
            log_string += ' skipped samples: {:12d} |'.format(args.skipped_train_samples)
        log_string += ' elapsed time per iteration (ms): {:.1f} |'.format(
            elapsed_time_per_iteration * 1000.0
        )
        if args.log_throughput:
            log_string += f' throughput per GPU (TFLOP/s/GPU): {throughput:.1f} |'
            if args.log_timers_to_tensorboard:
                if writer:
                    writer.add_scalar('throughput', throughput, iteration)
                if wandb_writer:
                    wandb_writer.log({'throughput': throughput}, iteration)
        if args.log_energy:
            energy = (energy_monitor.lap() / total_iterations) / args.world_size
            power = energy / elapsed_time_per_iteration
            log_string += f' energy per GPU (J/iter/GPU): {energy:.1f} |'
            log_string += f' power per GPU (W/GPU): {power:.1f} |'
            if writer:
                writer.add_scalar('iter-energy/gpu', energy, iteration)
                writer.add_scalar('power/gpu', power, iteration)
            if wandb_writer:
                wandb_writer.log({'iter-energy/gpu': energy}, iteration)
                wandb_writer.log({'power/gpu': power}, iteration)
        # Decoupled_learning_rate should be not None only on first and last pipeline stage.
        if learning_rate is not None:
            log_string += f' learning rate: {learning_rate:.6E} |'
        log_string += f' global batch size: {batch_size:5d} |'
        for key in total_loss_dict:
            if key not in [advanced_iters_key, skipped_iters_key, nan_iters_key]:
                avg = total_loss_dict[key].item() / float(
                    max(1, total_loss_dict[advanced_iters_key])
                )
                if avg >= 0.0:
                    log_string += ' {}: {:.6E} |'.format(key, avg)
                if should_reset:
                    total_loss_dict[key] = torch.tensor([0.0], dtype=torch.float, device='cuda')
        log_string += f' loss scale: {loss_scale:.1f} |'
        if grad_norm is not None:
            log_string += f' grad norm: {grad_norm:.3f} |'
        if num_zeros_in_grad is not None:
            log_string += f' num zeros: {num_zeros_in_grad} |'
        if params_norm is not None:
            log_string += f' params norm: {params_norm:.3f} |'
        log_string += ' number of skipped iterations: {:3d} |'.format(
            total_loss_dict[skipped_iters_key]
        )
        log_string += ' number of nan iterations: {:3d} |'.format(total_loss_dict[nan_iters_key])
        if should_reset:
            total_loss_dict[advanced_iters_key] = 0
            total_loss_dict[skipped_iters_key] = 0
            total_loss_dict[nan_iters_key] = 0
        print_rank_last(log_string)
        reported_memory_in_this_iteration = False
        if report_memory_flag:
            # Report memory after optimizer state has been initialized.
            if torch.distributed.get_rank() == 0:
                num_microbatches = get_num_microbatches()
                report_theoretical_memory(args, num_microbatches=num_microbatches, verbose=True)
            report_memory(f'(after {iteration} iterations)')
            reported_memory_in_this_iteration = True
            loaded_iteration = max(get_loaded_iteration() or 0, 0)
            if iteration > (loaded_iteration + 1):
                # Make sure the memory after the second iteration is reported to include optimizer state memory.
                report_memory_flag = False
        if args.log_memory_interval is not None and iteration % args.log_memory_interval == 0 and \
            not reported_memory_in_this_iteration:
            report_memory(f'(after {iteration} iterations)')
        # Write timers to wandb, don't reset the counts.
        if args.log_timers_to_tensorboard:
            timers.write(timers_to_log, writer, iteration, normalizer=args.log_interval, reset=False)
            timers.write(timers_to_log, wandb_writer, iteration, normalizer=args.log_interval, reset=False)
        # Log timers to stdout
        timers.log(timers_to_log, normalizer=args.log_interval, reset=should_reset)

    return report_memory_flag


def compute_throughputs_and_append_to_progress_log(iteration, num_floating_point_operations_so_far):
    args = get_args()
    if args.save is None:
        return

    # Compute job throughput.
    # args.num_floating_point_operations_so_far keeps track of floating-point operations
    # completed at the start of job.
    global _TRAIN_START_TIME
    job_throughput = (
        num_floating_point_operations_so_far - args.num_floating_point_operations_so_far
    ) / ((time.time() - _TRAIN_START_TIME) * 10**12 * args.world_size)

    # Compute cumulative throughput since jobs of this world size were launched.
    # `get_start_time_from_progress_log` returns start time and number of floating-point
    # operations of first job of this world size.
    start_time, start_num_floating_point_operations = get_start_time_from_progress_log()
    elapsed_time = (datetime.now() - start_time).total_seconds()
    cumulative_throughput = (
        num_floating_point_operations_so_far - start_num_floating_point_operations
    ) / (elapsed_time * 10**12 * args.world_size)

    tokens_so_far = args.consumed_train_samples * args.seq_length
    saved_ckpt_prefix = 'Saving async checkpoint' if args.async_save else 'Saved checkpoint'
    append_to_progress_log(
        f"{saved_ckpt_prefix}\tIteration: {iteration}\t"
        f"Job throughput: {job_throughput:.1f} TFLOP/s/GPU\t"
        f"Cumulative throughput: {cumulative_throughput:.1f} TFLOP/s/GPU\t"
        f"Floating-point operations: {num_floating_point_operations_so_far:.2e}\t"
        f"Tokens (in billions): {tokens_so_far / 10**9:.2f}"
    )


def enable_forward_pre_hook(model_chunks):
    for model_chunk in model_chunks:
        assert isinstance(model_chunk, DDP)
        model_chunk.enable_forward_pre_hook()


def disable_forward_pre_hook(model_chunks, param_sync=True):
    for model_chunk in model_chunks:
        assert isinstance(model_chunk, DDP)
        model_chunk.disable_forward_pre_hook(param_sync=param_sync)


def force_param_sync(model_chunks: list[DDP]) -> None:
    for model_chunk in model_chunks:
        assert isinstance(model_chunk, DDP)
        model_chunk.start_param_sync(force_sync=True)

# Only report memory for first 3 checkpoint saves.
num_checkpoints_memory_reported = 0
MAX_NUM_CHECKPOINTS_MEMORY_REPORTED = 3

def save_checkpoint_and_time(
    iteration,
    model,
    optimizer,
    opt_param_scheduler,
    num_floating_point_operations_so_far,
    checkpointing_context,
    non_persistent_ckpt=False,
    train_data_iterator=None,
):
    args = get_args()
    timers = get_timers()
    energy_monitor = get_energy_monitor()

    # Synchronize forward pre-hook state before checkpoint save to avoid race conditions
    if should_disable_forward_pre_hook(args):
        force_param_sync(model)

    # Stop timer to get accurate train interval time and exclude checkpointing duration
    timers('interval-time').stop()
    energy_monitor.pause()

    # Extra barrier is added to make sure all ranks report the max time.
    timer_key = 'save-checkpoint-non-persistent' if non_persistent_ckpt else 'save-checkpoint'
    timers(timer_key, log_level=0).start(barrier=True)

    # Log E2E metrics before save-checkpoint
    one_logger_utils.track_e2e_metrics()
    # Free overlap param-gather buffers and release cached GPU memory so
    # that the async checkpoint worker process has enough GPU headroom for
    # D2H tensor transfers.
    for model_chunk in model:
        if hasattr(model_chunk, 'free_overlap_buffers'):
            model_chunk.free_overlap_buffers()
    torch.cuda.empty_cache()

    global num_checkpoints_memory_reported, MAX_NUM_CHECKPOINTS_MEMORY_REPORTED
    should_report_memory = num_checkpoints_memory_reported < MAX_NUM_CHECKPOINTS_MEMORY_REPORTED

    if should_report_memory:
        # Track memory before checkpoint save.
        report_memory(f"(before save_checkpoint for iteration {iteration})")
    # Save checkpoint.
    save_checkpoint(
        iteration,
        model,
        optimizer,
        opt_param_scheduler,
        num_floating_point_operations_so_far,
        checkpointing_context,
        non_persistent_ckpt=non_persistent_ckpt,
        train_data_iterator=train_data_iterator,
        preprocess_common_state_dict_fn=preprocess_common_state_dict,
    )
    if should_report_memory:
        # Track memory after checkpoint save.
        report_memory(f"(after save_checkpoint for iteration {iteration})")
    num_checkpoints_memory_reported += 1

    if args.fp8:
        # Run garbage collection after checkpoint saving to free memory from
        # dequantized bf16 tensors that were temporarily created during fp8
        # model checkpoint saving.
        gc.collect()
    timers(timer_key).stop(barrier=True)
    timers.log([timer_key])

    # Log E2E metrics after save-checkpoint
    one_logger_utils.track_e2e_metrics()
    save_checkpoint_duration = timers(timer_key).elapsed()
    one_logger_utils.on_save_checkpoint_end(save_checkpoint_duration, iteration, args.async_save)

    if args.log_progress and not non_persistent_ckpt:
        compute_throughputs_and_append_to_progress_log(
            iteration, num_floating_point_operations_so_far
        )

    # Recover timing
    energy_monitor.resume()
    timers('interval-time', log_level=0).start(barrier=True)


def post_training_step_callbacks(
    model,
    optimizer,
    opt_param_scheduler,
    iteration,
    prof,
    num_floating_point_operations_since_last_log_event,
    nsys_nvtx_context = None,
):
    """Run all post-training-step functions (e.g., FT heartbeats, GC)."""
    args = get_args()

    # Bring CPU and GPU back in sync if on right iteration.
    if args.train_sync_interval and iteration % args.train_sync_interval == 0:
        torch.cuda.synchronize()

    # Straggler detector.
    if iteration % args.log_interval == 0 and args.log_straggler:
        # Use FLOPs accumulated since last log event and then reset the counter
        stimer.report(num_floating_point_operations_since_last_log_event, args.log_interval)
        num_floating_point_operations_since_last_log_event = 0.0

    # Check weight hash across DP replicas.
    if (
        args.check_weight_hash_across_dp_replicas_interval is not None
        and iteration % args.check_weight_hash_across_dp_replicas_interval == 0
    ):
        if should_disable_forward_pre_hook(args):
            disable_forward_pre_hook(model)
        assert check_param_hashes_across_dp_replicas(
            model, cross_check=True
        ), "Parameter hashes not matching across DP replicas"
        torch.distributed.barrier()
        print_rank_0(f">>> Weight hashes match after {iteration} iterations...")
        if should_disable_forward_pre_hook(args):
            enable_forward_pre_hook(model)

    # Autoresume.
    if args.adlr_autoresume and (iteration % args.adlr_autoresume_interval == 0):
        check_adlr_autoresume_termination(iteration, model, optimizer, opt_param_scheduler)

    # Profiling.
    if (
        args.profile
        and iteration == args.profile_step_end
        and (len(args.profile_ranks) == 0 or
             torch.distributed.get_rank() in args.profile_ranks)
    ):
        if args.use_pytorch_profiler:
            assert prof is not None
            prof.stop()
            if prof.execution_trace_observer is not None:
                prof.execution_trace_observer.unregister_callback()
        else:
            torch.cuda.check_error(torch.cuda.cudart().cudaProfilerStop())
            if nsys_nvtx_context is not None:
                nsys_nvtx_context.__exit__(None, None, None)

    # Manual garbage collection.
    if args.manual_gc:
        if args.manual_gc_interval != 0 and iteration % args.manual_gc_interval == 0:
            gc.collect()

    # Return updated FLOPs accumulator so caller can persist the reset
    return num_floating_point_operations_since_last_log_event


def checkpoint_and_decide_exit(
    model,
    optimizer,
    opt_param_scheduler,
    iteration,
    num_floating_point_operations_so_far,
    checkpointing_context,
    train_data_iterator,
):
    """Save checkpoint and decide whether to exit based on arguments (e.g., if
    --exit-duration-in-mins is set). Actual exit happens in main training loop
    based on the return value of this function."""
    args = get_args()
    timers = get_timers()

    # Exit based on signal handler.
    saved_checkpoint = False
    if args.exit_signal_handler:
        signal_handler = get_signal_handler()
        if any(signal_handler.signals_received()):
            if args.save:
                save_checkpoint_and_time(
                    iteration,
                    model,
                    optimizer,
                    opt_param_scheduler,
                    num_floating_point_operations_so_far,
                    checkpointing_context,
                    train_data_iterator=train_data_iterator,
                )
            print_datetime('exiting program after receiving SIGTERM.')

            return True

    # Regular save (persistent and non-persistent).
    if args.save and args.save_interval and iteration % args.save_interval == 0:
        save_checkpoint_and_time(
            iteration,
            model,
            optimizer,
            opt_param_scheduler,
            num_floating_point_operations_so_far,
            checkpointing_context,
            train_data_iterator=train_data_iterator,
        )
        saved_checkpoint = True

    elif (
        args.save
        and args.non_persistent_save_interval
        and iteration % args.non_persistent_save_interval == 0
    ):
        save_checkpoint_and_time(
            iteration,
            model,
            optimizer,
            opt_param_scheduler,
            num_floating_point_operations_so_far,
            checkpointing_context,
            non_persistent_ckpt=True,
            train_data_iterator=train_data_iterator,
        )
        saved_checkpoint = True

    # Exit based on duration.
    if args.exit_duration_in_mins:
        train_time = (time.time() - _TRAIN_START_TIME) / 60.0
        done_cuda = torch.tensor(
            [train_time > args.exit_duration_in_mins], dtype=torch.int, device='cuda'
        )
        torch.distributed.all_reduce(done_cuda, op=torch.distributed.ReduceOp.MAX)
        done = done_cuda.item()
        if done:
            if args.save and not saved_checkpoint:
                save_checkpoint_and_time(
                    iteration,
                    model,
                    optimizer,
                    opt_param_scheduler,
                    num_floating_point_operations_so_far,
                    checkpointing_context,
                    train_data_iterator=train_data_iterator,
                )
            print_datetime(f'exiting program after {train_time} minutes')

            return True

    # Exit based on iterations.
    if (
        args.exit_interval
        and iteration % args.exit_interval == 0
    ) or (
        args.phase_transition_iterations
        and iteration in args.phase_transition_iterations
    ):
        if args.save and not saved_checkpoint:
            save_checkpoint_and_time(
                iteration,
                model,
                optimizer,
                opt_param_scheduler,
                num_floating_point_operations_so_far,
                checkpointing_context,
                train_data_iterator=train_data_iterator,
            )
        print_datetime(f'exiting program at iteration {iteration}')

        return True

    return False


def train(
    forward_step_func,
    model,
    optimizer,
    opt_param_scheduler,
    train_data_iterator,
    valid_data_iterator,
    process_non_loss_data_func,
    config,
    checkpointing_context,
    non_loss_data_func,
    inference_model=None,
):
    """Training function: run train_step desired number of times, run validation, checkpoint."""
    args = get_args()
    timers = get_timers()

    if args.perform_rl_step:
        assert has_rl_utils, "RL cannot run without the megatron.rl package"

    # Additional variable initialization for RL training
    if args.perform_rl_step:
        if args.skip_train:
            # In inference-only mode, use current weights as reference.
            print_rank_0("> RL inference-only: using current weights as reference.")
            ref_state_dict = {
                k: (v.cpu() if v is not None else v) for k, v in model[0].state_dict().items()
            }
        else:
            print_rank_0("> Loading pretrained checkpoint for reference weights in RL training...")
            load, finetune, no_load_optim = args.load, args.finetune, args.no_load_optim
            args.no_load_optim = True

            # Load pretrained checkpoint
            args.load = None
            args.finetune = True
            load_checkpoint(
                    model,
                    None,  # Don't load optimizer state
                    None,  # Don't load scheduler state
                    checkpointing_context=checkpointing_context,
                    skip_load_to_model_and_opt=HAVE_FSDP2
                    and getattr(args, "use_torch_fsdp2", False)
                    and args.ckpt_format == "torch_dist",
                )
            ref_state_dict = {k: (v.cpu() if v is not None else v) for k, v in model[0].state_dict().items()}

            # Reload RL training checkpoint weights
            args.load = load
            args.finetune = finetune
            print_rank_0("> Reloading RL training checkpoint...")
            load_checkpoint(
                    model,
                    None,
                    None,
                    checkpointing_context=checkpointing_context,
                    skip_load_to_model_and_opt=HAVE_FSDP2
                    and getattr(args, "use_torch_fsdp2", False)
                    and args.ckpt_format == "torch_dist",
                )

            args.no_load_optim = no_load_optim

    # IMPORTANT FIX: For RL training, reinitialize the microbatch calculator with the correct configuration
    if args.perform_rl_step:
        print_rank_0("> Reinitializing microbatch calculator for GRPO training...")
        from megatron.core.num_microbatches_calculator import (
            destroy_num_microbatches_calculator,
            init_num_microbatches_calculator
        )
        # First destroy the existing calculator
        destroy_num_microbatches_calculator()
        # Then initialize with the correct perform_rl_step=True context
        init_num_microbatches_calculator(
            args.rank,
            args.rampup_batch_size,
            args.global_batch_size,
            args.micro_batch_size,
            mpu.get_data_parallel_world_size(),
            args.decrease_batch_size_if_needed
        )
        print_rank_0(f"> GRPO training: num_microbatches set to {get_num_microbatches()}")

    energy_monitor = get_energy_monitor()
    one_logger = get_one_logger()

    if args.hybrid_context_parallel:
        train_data_iterator = iter(HybridCPDataLoaderWrapper(train_data_iterator, config))

    if args.run_workload_inspector_server:
        try:
            from workload_inspector.utils.webserver import run_server
            import threading

            threading.Thread(
                target=run_server, daemon=True, args=(torch.distributed.get_rank(),)
            ).start()
        except ModuleNotFoundError:
            print_rank_0("workload inspector module not found.")

    # Write args to tensorboard
    write_args_to_tensorboard()

    # Turn on training mode which enables dropout.
    for model_module in model:
        model_module.train()

    model_pg_collection = get_attr_wrapped_model(model[0], "pg_collection")

    # Tracking loss.
    total_loss_dict = {}

    # Iterations.
    iteration = args.iteration
    # Make sure rerun_state_machine has the right iteration loaded from checkpoint.
    rerun_state_machine = get_rerun_state_machine()
    if rerun_state_machine.current_iteration != iteration:
        print_rank_0(f"Overwriting rerun_state_machine.current_iteration from "
                     f"{rerun_state_machine.current_iteration} to {iteration}...")
        rerun_state_machine.current_iteration = iteration

    # Track E2E metrics at the start of training.
    one_logger_utils.on_train_start(
        iteration=iteration,
        consumed_train_samples=args.consumed_train_samples,
        train_samples=args.train_samples,
        seq_length=args.seq_length,
        train_iters=args.train_iters,
        save=args.save,
        async_save=args.async_save,
        log_throughput=args.log_throughput,
        num_floating_point_operations_so_far=args.num_floating_point_operations_so_far,
    )

    num_floating_point_operations_so_far = args.num_floating_point_operations_so_far

    # Setup some training config params.
    config.grad_scale_func = optimizer.scale_loss if optimizer is not None else None
    config.timers = timers
    if isinstance(model[0], (megatron_FSDP, DDP)) and args.overlap_grad_reduce:
        assert config.no_sync_func is None, (
            'When overlap_grad_reduce is True, config.no_sync_func must be None; '
            'a custom no_sync_func is not supported when overlapping grad-reduce'
        )
        config.no_sync_func = [model_chunk.no_sync for model_chunk in model]
        if len(model) == 1:
            config.no_sync_func = config.no_sync_func[0]
        if args.align_grad_reduce:
            config.grad_sync_func = [model_chunk.start_grad_sync for model_chunk in model]
            if len(model) == 1:
                config.grad_sync_func = config.grad_sync_func[0]
    if args.overlap_param_gather and args.align_param_gather:
        config.param_sync_func = [model_chunk.start_param_sync for model_chunk in model]
        if len(model) == 1:
            config.param_sync_func = config.param_sync_func[0]
    config.finalize_model_grads_func = finalize_model_grads

    if args.log_energy:
        energy_monitor.setup()
        energy_monitor.resume()

    timers('interval-time', log_level=0).start(barrier=True)
    print_datetime('before the start of training step')
    report_memory_flag = True
    pre_hook_enabled = False
    should_exit = False
    exit_code = 0
    is_first_iteration = True

    if args.manual_gc:
        # Disable the default garbage collector and perform the collection manually.
        # This is to align the timing of garbage collection across ranks.
        assert (
            args.manual_gc_interval >= 0
        ), 'Manual garbage collection interval should be larger than or equal to 0'
        gc.disable()
        gc.collect()

    # Singleton initialization of straggler detector.
    if args.log_straggler:
        global stimer
        world = torch.distributed.get_world_size()
        rank = torch.distributed.get_rank()
        mmcnt = args.straggler_minmax_count
        stimer.configure(
            world,
            rank,
            mmcnt=mmcnt,
            enabled=not args.disable_straggler_on_startup,
            port=args.straggler_ctrlr_port,
        )
    num_floating_point_operations_since_last_log_event = 0.0

    num_microbatches = get_num_microbatches()
    eval_duration = 0.0
    eval_iterations = 0
    # Wrap forward_backward_func for Full iteration CUDA graph
    forward_backward_func = get_forward_backward_func()
    if args.cuda_graph_impl == "local" and CudaGraphScope.full_iteration in args.cuda_graph_scope:
        forward_backward_func = FullCudaGraphWrapper(forward_backward_func, cuda_graph_warmup_steps=args.cuda_graph_warmup_steps)

    def get_e2e_base_metrics():
        """Get base metrics values for one-logger to calculate E2E tracking metrics."""
        num_floating_point_operations_since_current_train_start = (
            num_floating_point_operations_so_far - args.num_floating_point_operations_so_far
        )
        return {
            'iteration': iteration,
            'train_duration': timers('interval-time').active_time(),
            'eval_duration': eval_duration,
            'eval_iterations': eval_iterations,
            'total_flops_since_current_train_start': num_floating_point_operations_since_current_train_start,
            'num_floating_point_operations_so_far': num_floating_point_operations_so_far,
            'consumed_train_samples': args.consumed_train_samples,
            'world_size': args.world_size,
            'seq_length': args.seq_length,
        }

    # Cache into one-logger for callback.
    if one_logger:
        with one_logger.get_context_manager():
            one_logger.store_set('get_e2e_base_metrics', get_e2e_base_metrics)

    prof = None
    nsys_nvtx_context = None # reference to context for nsys profiling, so it can be cleaned up
    if (
        args.profile
        and (len(args.profile_ranks) == 0 or
             torch.distributed.get_rank() in args.profile_ranks)
        and args.use_pytorch_profiler
    ):
        if args.pytorch_profiler_collect_chakra:
            et_dir = Path(f"{args.tensorboard_dir}/../chakra")
            et_dir.mkdir(parents=True, exist_ok=True)
            et = torch.profiler.ExecutionTraceObserver().register_callback(f"{et_dir}/rank-{torch.distributed.get_rank()}.json.gz")
        else:
            et = None
        def trace_handler(p):
            profile_dir = Path(f"{args.tensorboard_dir}/../torch_profile")
            profile_dir.mkdir(parents=True, exist_ok=True)
            p.export_chrome_trace(f"{profile_dir}/rank-{torch.distributed.get_rank()}.json.gz")
        prof = torch.profiler.profile(
            schedule=torch.profiler.schedule(
                wait=max(args.profile_step_start - 1, 0),
                warmup=1 if args.profile_step_start > 0 else 0,
                active=args.profile_step_end - args.profile_step_start,
                repeat=1,
            ),
            on_trace_ready=trace_handler,
            record_shapes=args.pytorch_profiler_collect_shapes,
            with_stack=args.pytorch_profiler_collect_callstack,
            execution_trace_observer=et,
        )
        prof.start()

    start_iteration = iteration
    # Disable forward pre-hook to start training to ensure that errors in checkpoint loading
    # or random initialization don't propagate to all ranks in first all-gather (which is a
    # no-op if things work correctly).
    if should_disable_forward_pre_hook(args):
        disable_forward_pre_hook(model, param_sync=False)
        # Also remove param_sync_func temporarily so that sync calls made in
        # `forward_backward_func` are no-ops.
        param_sync_func = config.param_sync_func
        config.param_sync_func = None
        pre_hook_enabled = False
    # Also, check weight hash across DP replicas to be very pedantic.
    if args.check_weight_hash_across_dp_replicas_interval is not None:
        assert check_param_hashes_across_dp_replicas(
            model, cross_check=True
        ), "Parameter hashes not matching across DP replicas"
        torch.distributed.barrier()
        print_rank_0(f">>> Weight hashes match after {iteration} iterations...")

    # Initialize CUDA Graphs helper.
    if args.cuda_graph_impl == "transformer_engine":
        cuda_graph_helper = TECudaGraphHelper(
            model=model,
            config=config,
            seq_length=args.seq_length,
            micro_batch_size=args.micro_batch_size,
            optimizers=[optimizer],
        )

    # Run training iterations till done.
    buffered_rollouts = None
    while iteration < args.train_iters:
        if (args.profile 
            and (len(args.profile_ranks) == 0 or
                 torch.distributed.get_rank() in args.profile_ranks)):
            if args.use_pytorch_profiler:
                prof.step()
            elif iteration == args.profile_step_start:
                torch.cuda.check_error(torch.cuda.cudart().cudaProfilerStart())
                nsys_nvtx_context = torch.autograd.profiler.emit_nvtx(record_shapes=True)
                nsys_nvtx_context.__enter__()

        ft_integration.on_checkpointing_start()
        maybe_finalize_async_save(blocking=False)
        ft_integration.on_checkpointing_end(is_async_finalization=True)
        # Update the timeout for all process groups after initialization
        # We update the timeout after the first successful iteration,
        # which takes longer than others usually
        if args.distributed_timeout_seconds_after_init is not None and iteration == start_iteration+1:
            # TODO: some dynamic timeout setting is required
            # based on the iteration time considering interval-based steps (e.g. eval, checkpoint)
            # e.g. timeout for normal iterations vs timeout for iterations with checkpoint
            # this timeout is triggered when there's no collective communication
            # for the duration of timeout
            update_pg_timeout(timedelta(seconds=args.distributed_timeout_seconds_after_init))
        # Update number of microbatches first without consistency check to decide if a
        # checkpoint should be saved. If the number of microbatches is different
        # from the previous iteration, save a checkpoint. Then run consistency check
        # to make sure training configuration is still valid.
        # Standard microbatch update (sequence packing overrides this in rl_utils.py)
        update_num_microbatches(args.consumed_train_samples, consistency_check=False, verbose=True)
        # Skip automatic checkpoint on microbatch changes when sequence packing is active
        # as it intentionally reconfigures microbatches
        if get_num_microbatches() != num_microbatches and iteration != 0:
            if args.rl_use_sequence_packing:
                print_rank_0(
                    f"[Sequence Packing] Skipping automatic checkpoint at iteration {iteration} "
                    f"(microbatch change: {num_microbatches} -> {get_num_microbatches()})"
                )
            else:
                assert get_num_microbatches() > num_microbatches, (
                    f"Number of microbatches should be increasing due to batch size rampup; "
                    f"instead going from {num_microbatches} to {get_num_microbatches()}"
                )
                if args.save is not None:
                    save_checkpoint_and_time(
                        iteration,
                        model,
                        optimizer,
                        opt_param_scheduler,
                        num_floating_point_operations_so_far,
                        checkpointing_context,
                        train_data_iterator=train_data_iterator,
                    )
        num_microbatches = get_num_microbatches()
        update_num_microbatches(args.consumed_train_samples, consistency_check=True, verbose=True)

        # Capture CUDA Graphs.
        if (
            args.cuda_graph_impl == "transformer_engine"
            and not cuda_graph_helper.capture_finished()
            and iteration - start_iteration == args.cuda_graph_warmup_steps
        ):
            if args.cuda_graph_warmup_steps > 0 and should_disable_forward_pre_hook(args):
                disable_forward_pre_hook(model, param_sync=False)
            cuda_graph_helper.create_cudagraphs()
            if args.cuda_graph_warmup_steps > 0 and should_disable_forward_pre_hook(args):
                enable_forward_pre_hook(model)
                cuda_graph_helper.cuda_graph_set_manual_hooks()

        # Completely skip iteration if needed.
        if iteration in args.iterations_to_skip:
            # Dummy train_step to fast forward train_data_iterator.
            dummy_train_step(train_data_iterator)
            if iteration == start_iteration:
                start_iteration = iteration + 1
            iteration += 1
            batch_size = (
                mpu.get_data_parallel_world_size() * args.micro_batch_size * get_num_microbatches()
            )
            args.consumed_train_samples += batch_size
            args.skipped_train_samples += batch_size
            continue

        args.curr_iteration = iteration
        # For GRPO, we keep the data for a few epochs. DeepSeekMath paper calls this number $\mu$.
        # It is similar to a PPO epoch.

        if args.perform_rl_step:
            if optimizer is None:
                # Release stale CUDA cached memory before inference.
                torch.cuda.empty_cache()
            with torch.no_grad():
                train_data_iterator = rl_utils.get_grpo_data_iterator(
                    model, inference_model, optimizer, iteration, ref_state_dict,
                    grpo_iterations=args.grpo_iterations,
                    grpo_prompts_per_step=args.grpo_prompts_per_step,
                    grpo_group_size=args.grpo_group_size,
                    global_batch_size=args.global_batch_size,
                    sequence_packing=args.rl_use_sequence_packing,
                    buffered_rollouts=buffered_rollouts,
                    is_correction=args.rl_inference_logprobs_is_correction,
                )
                # Buffered rollouts are used as a state container for setups when
                # we use previously-generated data for an update.
                buffered_rollouts = train_data_iterator

        if args.skip_train:
            # RL inference-only mode: skip gradient updates, just collect rollouts.
            loss_dict = {}
            skipped_iter = 0
            should_checkpoint = False
            should_exit = False
            exit_code = 0
            grad_norm = 0.0
            num_zeros_in_grad = 0
            max_attention_logit = None
        else:
            ft_integration.on_training_step_start()
            (
                loss_dict,
                skipped_iter,
                should_checkpoint,
                should_exit,
                exit_code,
                grad_norm,
                num_zeros_in_grad,
                max_attention_logit,
            ) = train_step(
                forward_step_func, train_data_iterator, model, optimizer, opt_param_scheduler, config, forward_backward_func, iteration=iteration
            )
            ft_integration.on_training_step_end()
        if should_checkpoint:
            save_checkpoint_and_time(
                iteration,
                model,
                optimizer,
                opt_param_scheduler,
                num_floating_point_operations_so_far,
                checkpointing_context,
                train_data_iterator=train_data_iterator,
            )
        if should_exit:
            break

        # Enable forward pre-hooks after first set of forward and backward passes.
        # When running in fp16, skip all NaN iterations until steady-state loss scaling value
        # is reached.
        if iteration == start_iteration:
            if skipped_iter:
                # Only enable forward pre-hook after a training step has successfully run. Relevant
                # for fp16 codepath where first XX iterations are skipped until steady-state loss
                # scale value is reached.
                start_iteration = iteration + 1
            else:
                # Enable forward pre-hook after training step has successfully run. All subsequent
                # forward passes will use the forward pre-hook / `param_sync_func` in
                # `forward_backward_func`.
                if should_disable_forward_pre_hook(args):
                    enable_forward_pre_hook(model)
                    config.param_sync_func = param_sync_func
                    pre_hook_enabled = True
                    # Set the manual hooks here since it's not set right after the capturing.
                    if (
                        args.cuda_graph_impl == "transformer_engine"
                        and args.cuda_graph_warmup_steps == 0
                    ):
                        assert (
                            cuda_graph_helper.capture_finished()
                        ), "CUDA Graph capture should have been finished."
                        cuda_graph_helper.cuda_graph_set_manual_hooks()

        iteration += 1

        # If requested, manually register FSDP communication buffers after a short warmup.
        if (
            getattr(args, "fsdp_manual_registration", False)
            and getattr(args, "use_megatron_fsdp", False)
            and iteration ==  start_iteration + 1
        ):
            for model_chunk in model:
                if isinstance(model_chunk, megatron_FSDP) and getattr(
                    model_chunk.ddp_config, "fsdp_manual_registration", False
                ):
                    pad_buf = getattr(model_chunk, "param_and_grad_buffer", None)
                    if pad_buf is not None:
                        pad_buf.manual_buffer_registration()

        if args.perform_rl_step and args.rl_use_sequence_packing:
            iteration_sequences = rl_utils.get_iteration_sequence_count(args)
            # Track bins separately for packed mode
            bin_count = (
                mpu.get_data_parallel_world_size() * args.micro_batch_size * get_num_microbatches()
            )
            args.consumed_train_bins += bin_count
        else:
            batch_size = (
                mpu.get_data_parallel_world_size() * args.micro_batch_size * get_num_microbatches()
            )
            iteration_sequences = batch_size

        # Update consumed samples (always means sequences now)
        args.consumed_train_samples += iteration_sequences

        # Use iteration_sequences as batch_size for floating point operations
        batch_size = iteration_sequences

        num_skipped_samples_in_batch = (
            get_current_global_batch_size() - get_current_running_global_batch_size()
        )
        if args.decrease_batch_size_if_needed:
            assert num_skipped_samples_in_batch >= 0
        else:
            assert num_skipped_samples_in_batch == 0
        args.skipped_train_samples += num_skipped_samples_in_batch
        num_floating_point_operations_in_batch = num_floating_point_operations(args, batch_size)
        num_floating_point_operations_so_far += num_floating_point_operations_in_batch
        num_floating_point_operations_since_last_log_event += num_floating_point_operations_in_batch

        # Logging.
        if optimizer is not None and not optimizer.is_stub_optimizer:
            loss_scale = optimizer.get_loss_scale().item()
        else:
            loss_scale = 1.0
        params_norm = None

        if args.log_params_norm:
            params_norm = calc_params_l2_norm(model)
        if optimizer is not None:
            learning_rate = get_canonical_lr_for_logging(optimizer.param_groups)
        else:
            learning_rate = None
        report_memory_flag = training_log(
            loss_dict,
            total_loss_dict,
            learning_rate,
            iteration,
            loss_scale,
            report_memory_flag,
            skipped_iter,
            grad_norm,
            params_norm,
            num_zeros_in_grad,
            max_attention_logit,
            pg_collection=model_pg_collection,
            is_first_iteration=is_first_iteration,
        )
        is_first_iteration = False

        # Evaluation.
        if args.eval_interval and iteration % args.eval_interval == 0 and args.do_valid:
            if args.log_energy:
                energy_monitor.pause()
            timers('interval-time').stop()
            if should_disable_forward_pre_hook(args):
                disable_forward_pre_hook(model)
                pre_hook_enabled = False
            if args.manual_gc and args.manual_gc_eval:
                # Collect all objects.
                gc.collect()
            prefix = f'iteration {iteration}'
            timers('eval-time', log_level=0).start(barrier=True)
            if args.perform_rl_step:
                rl_eval_model = model
                rl_training_model = None
                # If separate inference and training models, swap training weights
                # back to the inference model for RL evaluation.
                if inference_model is not None:
                    inf_core = unwrap_model(inference_model[0])
                    rl_utils._maybe_prefetch_separate_inference_model_weights(
                        inf_core, to_cpu=False
                    )
                    swap_model_weights(model, inference_model, args.refit_method)
                    rl_eval_model = inference_model
                    rl_training_model = model
                rl_utils.evaluate_and_print_results_rl(
                    valid_data_iterator,
                    rl_eval_model,
                    optimizer,
                    iteration,
                    write_to_tensorboard=True,
                    training_model=rl_training_model,
                )
            else:
                evaluate_and_print_results(prefix, forward_step_func,
                                       valid_data_iterator, model,
                                       iteration, process_non_loss_data_func,
                                       config, verbose=False, write_to_tensorboard=True,
                                       non_loss_data_func=non_loss_data_func)

            eval_duration += timers('eval-time').elapsed()
            eval_iterations += sum(args.eval_iters) if isinstance(args.eval_iters, list) else args.eval_iters
            timers('eval-time').stop()
            one_logger_utils.track_e2e_metrics()

            if args.manual_gc and args.manual_gc_eval:
                # Collect only the objects created and used in evaluation.
                gc.collect(generation=0)
            if should_disable_forward_pre_hook(args):
                enable_forward_pre_hook(model)
                pre_hook_enabled = True
            timers('interval-time', log_level=0).start(barrier=True)
            if args.log_energy:
                energy_monitor.resume()
            if args.num_experts is not None:
                clear_aux_losses_tracker()

        # Miscellaneous post-training-step functions (e.g., FT heartbeats, GC).
        # Some of these only happen at specific iterations. Capture updated FLOPs accumulator
        # (it is reset inside the callback after logging).
        num_floating_point_operations_since_last_log_event = post_training_step_callbacks(
            model,
            optimizer,
            opt_param_scheduler,
            iteration,
            prof,
            num_floating_point_operations_since_last_log_event,
            nsys_nvtx_context,
        )

        # Checkpoint and decide whether to exit.
        should_exit = checkpoint_and_decide_exit(
            model,
            optimizer,
            opt_param_scheduler,
            iteration,
            num_floating_point_operations_so_far,
            checkpointing_context,
            train_data_iterator,
        )
        if should_exit:
            break

    # Destroy CUDA Graphs.
    if args.cuda_graph_impl == "transformer_engine" and cuda_graph_helper.graphs_created():
        cuda_graph_helper.delete_cuda_graphs()

    one_logger_utils.track_e2e_metrics()

    # Flush TensorBoard, WandB writers and one-logger.
    writer = get_tensorboard_writer()
    if writer:
        writer.flush()

    # Close out pre-hooks if using distributed optimizer and overlapped param gather.
    if pre_hook_enabled:
        disable_forward_pre_hook(model)

    ft_integration.on_checkpointing_start()
    # This will finalize all unfinalized async request and terminate
    # a persistent async worker if persistent ckpt worker is enabled
    maybe_finalize_async_save(blocking=True, terminate=True)
    ft_integration.on_checkpointing_end(is_async_finalization=True)

    if args.log_energy:
        energy_monitor.lap()
        total_energy = energy_monitor.get_total()
        print_rank_0(f"Total training energy (GPU): {total_energy / 1e6:.3f} MJ")
        energy_monitor.shutdown()

    # If any exit conditions (signal handler, duration, iterations) have been reached, exit.
    if should_exit:
        wandb_writer = get_wandb_writer()
        if wandb_writer:
            wandb_writer.finish()
        ft_integration.shutdown()
        one_logger_utils.finish()
        if args.perform_rl_step:
            rl_utils.rl_inference_interface_shutdown()
        sys.exit(exit_code)

    return iteration, num_floating_point_operations_so_far


def evaluate(
    forward_step_func,
    data_iterator,
    model,
    process_non_loss_data_func,
    config,
    verbose=False,
    non_loss_data_func=None,
    eval_iters=None,
):
    """Evaluation."""
    args = get_args()
    timers = get_timers()

    timers('evaluate', log_level=0).start(barrier=True)

    if args.vision_pretraining and args.vision_pretraining_type == "dino":
        from megatron.legacy.model.vision.knn_monitor import compute_feature_bank

        compute_feature_bank(model)

    # Turn on evaluation mode which disables dropout.
    for model_module in model:
        model_module.eval()

    # Disable result validation during evaluation
    rerun_state_machine = get_rerun_state_machine()
    rerun_mode = rerun_state_machine.get_mode()
    rerun_state_machine.set_mode(RerunMode.DISABLED)

    total_loss_dict = {}

    # make validation batch size independent from training batch size
    eval_batch_size = args.global_batch_size
    eval_num_microbatches = eval_batch_size // (args.micro_batch_size * args.data_parallel_size)
    forward_backward_func = get_forward_backward_func()
    if args.cuda_graph_impl == "local" and CudaGraphScope.full_iteration in args.cuda_graph_scope:
        forward_backward_func = FullCudaGraphWrapper(forward_backward_func, cuda_graph_warmup_steps=args.cuda_graph_warmup_steps)

    if has_nvidia_modelopt:
        # [ModelOpt]: Pipeline-parallel Distillation stacks student and teacher tensors
        adjust_tensor_shapes_fn = get_tensor_shapes_adjust_fn_for_distillation(
            model,
            seq_length=args.seq_length,
            micro_batch_size=args.micro_batch_size,
            decoder_seq_length=args.decoder_seq_length,
        )
    else:
        adjust_tensor_shapes_fn = None

    if eval_iters is None:
        eval_iters = args.eval_iters

    with torch.no_grad():
        iteration = 0
        if verbose:
            print_rank_0(f'Evaluating on {eval_iters * eval_batch_size} samples')
        while iteration < eval_iters:
            iteration += 1
            if verbose:
                print_rank_0(f'Evaluating iter {iteration}/{eval_iters}')

            # Don't care about timing during evaluation
            config.timers = None
            ft_integration.on_eval_step_start()
            loss_dicts = forward_backward_func(
                forward_step_func=forward_step_func,
                data_iterator=data_iterator,
                model=model,
                num_microbatches=eval_num_microbatches,
                seq_length=args.seq_length,
                micro_batch_size=args.micro_batch_size,
                decoder_seq_length=args.decoder_seq_length,
                forward_only=True,
                adjust_tensor_shapes_fn=adjust_tensor_shapes_fn,
            )
            ft_integration.on_eval_step_end()
            config.timers = get_timers()

            # Empty unused memory
            if args.empty_unused_memory_level >= 1:
                torch.cuda.empty_cache()

            if mpu.is_pipeline_last_stage(ignore_virtual=True):
                # Reduce across processes.
                for key in loss_dicts[0].keys():
                    if key not in total_loss_dict:
                        total_loss_dict[key] = torch.tensor([0.0, 0.0], dtype=torch.float, device='cuda')
                    val = [x[key].view(-1) for x in loss_dicts]

                    if val[0].numel() == 2:
                        if args.sft:
                            # normalize over micro batch instead of global
                            val = torch.vstack(val)
                            val = val[:, 0] / val[:, 1].clamp(min=1)
                            val = val.mean()
                            torch.distributed.all_reduce(
                                val,
                                group=mpu.get_data_parallel_group(with_context_parallel=True)
                            )
                            val /= torch.distributed.get_world_size(
                                group=mpu.get_data_parallel_group(with_context_parallel=True)
                            )
                            total_loss_dict[key][0] += val
                            total_loss_dict[key][1] += 1
                        else :
                            val = torch.vstack(val).sum(dim=0)
                            torch.distributed.all_reduce(
                                val,
                                group=mpu.get_data_parallel_group(with_context_parallel=True)
                            )
                            total_loss_dict[key] += val
                    elif val[0].numel() == 1:
                        val = torch.cat(val).sum()
                        total_loss_dict[key][0] += val
                        total_loss_dict[key][1] += len(loss_dicts)
                    else:
                        raise ValueError(f"Invalid value shape: {val[0].shape} for key {key}")

            args.consumed_valid_samples += eval_batch_size

            if args.exit_duration_in_mins:
                train_time = (time.time() - _TRAIN_START_TIME) / 60.0
                done_cuda = torch.tensor(
                    [train_time > args.exit_duration_in_mins], dtype=torch.int, device='cuda'
                )
                torch.distributed.all_reduce(done_cuda, op=torch.distributed.ReduceOp.MAX)
                done = done_cuda.item()
                if done:
                    rerun_state_machine.set_mode(rerun_mode)
                    print_rank_0('Exiting during evaluation, timelimit reached')
                    return None, None, True

        collected_non_loss_data = None
        if non_loss_data_func is not None:
            collected_non_loss_data = non_loss_data_func(model)
        elif process_non_loss_data_func is not None and is_last_rank():
            collected_non_loss_data = forward_backward_func(
                forward_step_func=forward_step_func,
                data_iterator=data_iterator,
                model=model,
                num_microbatches=get_num_microbatches(),
                seq_length=args.seq_length,
                micro_batch_size=args.micro_batch_size,
                decoder_seq_length=args.decoder_seq_length,
                forward_only=True,
                collect_non_loss_data=True,
            )

    # Move model back to the train mode.
    for model_module in model:
        model_module.train()

    for key in total_loss_dict:
        numerator, denominator = total_loss_dict[key]
        total_loss_dict[key] = numerator / denominator

    timers('evaluate').stop()
    timers.log(['evaluate'])

    rerun_state_machine.set_mode(rerun_mode)

    return total_loss_dict, collected_non_loss_data, False


def evaluate_and_print_results(
    prefix,
    forward_step_func,
    data_iterator,
    model,
    iteration,
    process_non_loss_data_func,
    config,
    verbose=False,
    write_to_tensorboard=True,
    non_loss_data_func=None,
):
    """Helper function to evaluate and dump results on screen."""
    args = get_args()
    if write_to_tensorboard:
        writer = get_tensorboard_writer()
    else:
        writer = None

    wandb_writer = get_wandb_writer()

    data_iterators = data_iterator if args.multiple_validation_sets else [data_iterator]

    if not args.multiple_validation_sets:
        eval_iters = [args.eval_iters]
    else:
        eval_iters = args.eval_iters

    if args.full_validation:
        assert len(eval_iters) == len(data_iterators)

        # with full validation we need to distribute eval_iters to all ranks
        if mpu.get_tensor_model_parallel_rank() == 0:
            eval_iters = torch.tensor(args.eval_iters, dtype=torch.long, device='cuda')
        else:
            eval_iters = torch.tensor([0] * len(eval_iters), dtype=torch.long, device='cuda')
        torch.distributed.broadcast(eval_iters, 0)
        eval_iters = eval_iters.tolist()
        args.eval_iters = eval_iters[0] if not args.multiple_validation_sets else eval_iters
    elif not args.multiple_validation_sets:
        eval_iters = [args.eval_iters]
    else:
        eval_iters = args.eval_iters

    for index, (iterator, iterations) in enumerate(zip(data_iterators, eval_iters)):
        suffix = ""
        if args.multiple_validation_sets:
            suffix = f"-{index}"
        total_loss_dict, collected_non_loss_data, timelimit = evaluate(
            forward_step_func,
            iterator,
            model,
            process_non_loss_data_func,
            config,
            verbose,
            non_loss_data_func,
            eval_iters=iterations,
        )
        # Timelimit hit during evaluation
        if timelimit:
            return
        string = f' validation{suffix} loss at {prefix} | '
        for key in total_loss_dict:
            string += '{} value: {:.6E} | '.format(key, total_loss_dict[key].item())
            ppl = math.exp(min(20, total_loss_dict[key].item()))
            string += '{} PPL: {:.6E} | '.format(key, ppl)
            if writer:
                writer.add_scalar('{} validation{}'.format(key, suffix), total_loss_dict[key].item(), iteration)
                writer.add_scalar(
                    '{} validation{} vs samples'.format(key, suffix),
                    total_loss_dict[key].item(),
                    args.consumed_train_samples,
                )
                if args.log_validation_ppl_to_tensorboard:
                    writer.add_scalar('{} validation{} ppl'.format(key, suffix), ppl, iteration)
                    writer.add_scalar(
                        '{} validation{} ppl vs samples'.format(key, suffix), ppl, args.consumed_train_samples
                    )
                if wandb_writer and is_last_rank():
                    wandb_writer.log(
                        {'{} validation{}'.format(key, suffix): total_loss_dict[key].item()}, iteration
                    )

        if process_non_loss_data_func is not None and writer and is_last_rank():
            process_non_loss_data_func(collected_non_loss_data, iteration, writer)

        length = len(string) + 1
        print_rank_last('-' * length)
        print_rank_last(string)
        print_rank_last('-' * length)


def cyclic_iter(iter):
    while True:
        for x in iter:
            yield x


def get_train_valid_test_num_samples():
    """Train/valid/test num samples."""

    args = get_args()

    # Number of train/valid/test samples.
    if args.train_samples:
        train_samples = args.train_samples
    else:
        train_samples = args.train_iters * args.global_batch_size
    if args.full_validation:
        eval_samples = None
    else:
        if args.skip_train:
            eval_iters = args.eval_iters
        else:
            assert args.train_iters is not None
            eval_iters = (args.train_iters // args.eval_interval + 1) * args.eval_iters
        eval_samples = eval_iters * args.global_batch_size
    test_samples = args.eval_iters * args.global_batch_size

    # Get train_samples in current phase.
    if args.phase_transition_iterations:
        phase_transition_samples = [0] + [t * args.global_batch_size for t in args.phase_transition_iterations] + [args.train_samples]
        current_sample = args.iteration * args.global_batch_size
        last_transition_sample = max(s for s in phase_transition_samples if s <= current_sample)
        next_transition_sample = min(s for s in phase_transition_samples if s > current_sample)
        train_samples_in_current_phase = next_transition_sample - last_transition_sample
    else:
        train_samples_in_current_phase = train_samples

    return (train_samples_in_current_phase, eval_samples, test_samples)


def build_train_valid_test_datasets(build_train_valid_test_datasets_provider, train_valid_test_num_samples=None):
    """Build pretraining datasets."""
    if train_valid_test_num_samples is None:
        train_valid_test_num_samples = get_train_valid_test_num_samples()
    print_rank_0(' > datasets target sizes (minimum size):')
    print_rank_0('    train:      {}'.format(train_valid_test_num_samples[0]))
    print_rank_0('    validation: {}'.format(train_valid_test_num_samples[1]))
    print_rank_0('    test:       {}'.format(train_valid_test_num_samples[2]))
    return build_train_valid_test_datasets_provider(train_valid_test_num_samples)


def build_train_valid_test_data_loaders(build_train_valid_test_datasets_provider):
    """Build pretraining data loaders."""

    args = get_args()

    (train_dataloader, valid_dataloaders, test_dataloader) = (None, None, None)

    print_rank_0('> building train, validation, and test datasets ...')

    # Backward compatibility, assume fixed batch size.
    if args.iteration > 0 and args.consumed_train_samples == 0:
        assert (
            args.train_samples is None
        ), 'Only backward compatiblity support for iteration-based training'

        args.consumed_train_samples = args.iteration * args.global_batch_size
    if args.iteration > 0 and args.consumed_valid_samples == 0:
        if args.train_samples is None:
            args.consumed_valid_samples = (
                (args.iteration // args.eval_interval) * args.eval_iters * args.global_batch_size
            )

    # Get consumed train samples in this phase.
    if args.phase_transition_iterations:
        last_transition = max(iteration for iteration in (0, *args.phase_transition_iterations) if iteration <= args.iteration)
        consumed_train_samples_in_current_phase = (args.iteration - last_transition) * args.global_batch_size
    else:
        consumed_train_samples_in_current_phase = args.consumed_train_samples

    # Rely on distributed-aware core datasets, temporary
    is_distributed = getattr(build_train_valid_test_datasets_provider, "is_distributed", False)

    # Construct the data pipeline
    if is_distributed or mpu.get_tensor_model_parallel_rank() == 0:

        # Build datasets and dataloders.
        if args.perform_rl_step:
            # we don't need to build any dataloaders for RL training
            train_dataloader = None
            valid_dataloaders = None
            test_dataloader = None
            do_train = (args.train_iters or 0) > 0
            do_valid = (args.full_validation or args.eval_iters > 0)
            do_test = (args.full_validation or args.eval_iters > 0)

        else:
            # Build datasets.
            train_ds, valid_ds, test_ds = build_train_valid_test_datasets(build_train_valid_test_datasets_provider)
            valid_ds = [valid_ds] if not isinstance(valid_ds, list) else valid_ds
            if args.skip_train:
                train_dataloader = None
            else:
                train_dataloader = build_pretraining_data_loader(train_ds, consumed_train_samples_in_current_phase)
            valid_dataloaders = []
            for valid_d in valid_ds:
                if args.skip_train or args.full_validation:
                    valid_dataloaders.append(build_pretraining_data_loader(valid_d, 0))
                else:
                    if args.multiple_validation_sets:
                        # TODO(bnorick): for multiple validation sets without full validation, args.consumed_valid_samples is not
                        # correct and needs to be calculated/set per validation set
                        raise NotImplementedError("--multiple-validation-sets currently requires --full-validation")
                    valid_dataloaders.append(build_pretraining_data_loader(valid_d, args.consumed_valid_samples))
            if not args.multiple_validation_sets:
                assert len(valid_dataloaders) == 1
            test_dataloader = build_pretraining_data_loader(test_ds, 0)
            do_train = train_dataloader is not None and (args.skip_train or args.train_iters > 0)
            do_valid = valid_dataloaders is not None and (args.full_validation or args.eval_iters > 0)
            do_test = test_dataloader is not None and (args.full_validation or args.eval_iters > 0)

        flags = torch.tensor(
            [int(do_train), int(do_valid), int(do_test)], dtype=torch.long, device='cuda'
        )
    else:
        flags = torch.tensor([0, 0, 0], dtype=torch.long, device='cuda')

    torch.distributed.broadcast(flags, 0)

    args.do_train = getattr(args, "do_train", False) or flags[0].item()
    args.do_valid = getattr(args, "do_valid", False) or flags[1].item()
    args.do_test = getattr(args, "do_test", False) or flags[2].item()
    return train_dataloader, valid_dataloaders, test_dataloader


def build_train_valid_test_data_iterators(build_train_valid_test_datasets_provider):
    """Build pretraining data iterators."""

    args = get_args()

    # Build loaders.
    train_dataloader, valid_dataloaders, test_dataloader = build_train_valid_test_data_loaders(
        build_train_valid_test_datasets_provider
    )

    # Build iterators.
    dl_type = args.dataloader_type
    assert dl_type in ['single', 'cyclic', 'external']

    def _get_iterator(dataloader_type, dataloader):
        """Return dataset iterator."""
        if dataloader_type == "single":
            return RerunDataIterator(iter(dataloader))
        elif dataloader_type == "cyclic":
            return RerunDataIterator(iter(cyclic_iter(dataloader)))
        elif dataloader_type == "external":
            # External dataloader is passed through. User is expected to define how to iterate.
            if isinstance(dataloader, list):
                return [RerunDataIterator(d) for d in dataloader]
            else:
                return RerunDataIterator(dataloader)
        else:
            raise RuntimeError("unexpected dataloader type")

    if train_dataloader is not None:
        train_data_iterator = _get_iterator(dl_type, train_dataloader)
    else:
        train_data_iterator = None

    if valid_dataloaders is not None:
        # when using full validation, we need to override eval iters with the correct
        # number of iterations on tp rank 0 so that it can be distributed to the other
        # ranks later
        if args.full_validation:
            if args.multiple_validation_sets:
                if valid_dataloaders[0] is None:
                    args.eval_iters = [None]*len(valid_dataloaders)
                else:
                    args.eval_iters = [len(dl) for dl in valid_dataloaders]
            else:
                args.eval_iters = len(valid_dataloaders[0])

        if args.multiple_validation_sets:
            if valid_dataloaders[0] is None:
                valid_data_iterators = [None] * len(valid_dataloaders)
            else:
                valid_dl_type = "cyclic" if args.full_validation else dl_type
                print(
                    f"[VALID DATA LOADER LENGTHS] "
                    ", ".join(f"{idx}: {len(dl)}" for idx, dl in enumerate(valid_dataloaders))
                )
                valid_data_iterators = [
                    _get_iterator(valid_dl_type, dl) for dl in valid_dataloaders
                ]
        elif valid_dataloaders[0] is not None:
            valid_data_iterators = _get_iterator(dl_type, valid_dataloaders[0])
        else:
            valid_data_iterators = None
    else:
        valid_data_iterators = None

    if test_dataloader is not None:
        test_data_iterator = _get_iterator(dl_type, test_dataloader)
    else:
        test_data_iterator = None

    return train_data_iterator, valid_data_iterators, test_data_iterator


def should_disable_forward_pre_hook(args):
    """Block forward pre-hook for certain configurations."""
    return (
        not args.use_megatron_fsdp
        and (args.use_distributed_optimizer or 'dist' in args.optimizer)
        and args.overlap_param_gather
    )


================================================
FILE: megatron/training/utils.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

"""General utilities."""
import json
import os
import sys
import warnings
from contextlib import contextmanager
from datetime import datetime
from collections import defaultdict

import torch

from megatron.core.msc_utils import MultiStorageClientFeature, open_file
from megatron.core._rank_utils import safe_get_rank as _safe_get_rank

try:
    from transformer_engine.pytorch.optimizers import multi_tensor_applier, multi_tensor_l2norm
except ImportError:
    try:
        from amp_C import multi_tensor_l2norm
        from apex.multi_tensor_apply import multi_tensor_applier
    except ImportError:
        warnings.warn(
            f'Transformer Engine and Apex are not installed. '
            'Falling back to local implementations of '
            'multi_tensor_applier and multi_tensor_l2norm'
        )

        from megatron.core.utils import (
            local_multi_tensor_l2_norm as multi_tensor_l2norm,
            local_multi_tensor_applier as multi_tensor_applier,
        )

from megatron.training import get_args, get_timers, get_adlr_autoresume
from megatron.core import mpu
from megatron.core.datasets.utils import get_blend_from_list
from megatron.core.tensor_parallel import param_is_not_tensor_parallel_duplicate
from megatron.core.utils import (
    get_batch_on_this_cp_rank,
    get_data_parallel_group_if_dtensor,
    to_local_if_dtensor,
    unwrap_model,
)
from megatron.legacy.model.module import param_is_not_shared


def calc_params_l2_norm(model, force_create_fp32_copy=False):
    """Calculate l2 norm of parameters"""
    args = get_args()
    if not isinstance(model, list):
        model = [model]

    if getattr(args, 'use_megatron_fsdp', False):
        # All Megatron FSDP parameters are expected to be PyTorch DTensor.
        # params_data is a dict of device_mesh -> list of local tensors.
        params = []
        for model_chunk in model:
            model_chunk.stop_communication()
            for name, param in model_chunk.named_parameters():
                if not hasattr(param, "_local_tensor"):
                    raise RuntimeError(
                        f"Megatron FSDP requires parameters are PyTorch DTensor. "
                        f"Parameter {name} is not a DTensor."
                    )
                params.append(param)

        return calc_dtensor_params_l2_norm(params)

    # Seperate moe and dense params
    params_data = []
    moe_params_data = []
    sharded_params_data = []
    data_parallel_group = None

    for model_chunk in model:
        for param in model_chunk.parameters():
            data_parallel_group = get_data_parallel_group_if_dtensor(param, data_parallel_group)
            is_not_tp_duplicate = param_is_not_tensor_parallel_duplicate(param)
            if not is_not_tp_duplicate:
                continue
            assert is_not_tp_duplicate
            if not getattr(param, 'allreduce', True):
                assert param_is_not_shared(param)
                param = to_local_if_dtensor(param)
                if args.bf16:
                    if not force_create_fp32_copy and hasattr(param, 'main_param'):
                        if getattr(param, 'main_param_sharded', False):
                            if param.main_param is not None:
                                sharded_params_data.append(param.main_param)
                        else:
                            moe_params_data.append(param.main_param)
                    else:
                        # Fallback to original logic of making a fp32 copy of the
                        # parameter if `.main_param` attribute is not available.
                        moe_params_data.append(param.data.float())
                else:
                    moe_params_data.append(param.data)
            else:
                if param_is_not_shared(param):
                    param = to_local_if_dtensor(param)
                    if args.bf16:
                        if not force_create_fp32_copy and hasattr(param, 'main_param'):
                            if getattr(param, 'main_param_sharded', False):
                                if param.main_param is not None:
                                    sharded_params_data.append(param.main_param)
                            else:
                                params_data.append(param.main_param)
                        else:
                            # Fallback to original logic of making a fp32 copy of the
                            # parameter if `.main_param` attribute is not available.
                            params_data.append(param.data.float())
                    else:
                        params_data.append(param.data)

    # Calculate norm.
    dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device='cuda')
    if len(params_data) > 0:
        norm, _ = multi_tensor_applier(
            multi_tensor_l2norm, dummy_overflow_buf, [params_data], False  # no per-parameter norm.
        )
        norm_2 = norm * norm
    else:
        norm_2 = torch.zeros((1,), dtype=torch.float32, device='cuda')

    if data_parallel_group is not None:
        torch.distributed.all_reduce(
            norm_2, op=torch.distributed.ReduceOp.SUM, group=data_parallel_group
        )

    # Add norm contribution from params with sharded main_params. These norms need to be
    # accumulated across the DP group since the main parameters are sharded because
    # of distributed optimizer.
    if len(sharded_params_data) > 0:
        dummy_overflow_buf = torch.tensor([0], dtype=torch.int, device='cuda')
        sharded_norm, _ = multi_tensor_applier(
            multi_tensor_l2norm,
            dummy_overflow_buf,
            [sharded_params_data],
            False,  # no per-parameter norm.
        )
        sharded_norm_2 = sharded_norm * sharded_norm
    else:
        sharded_norm_2 = torch.zeros((1,), dtype=torch.float32, device='cuda')
    # Sum over all DP groups, including CP since distributed optimizer state is
    # sharded jointly over DP+CP.
    torch.distributed.all_reduce(
        sharded_norm_2,
        op=torch.distributed.ReduceOp.SUM,
        group=mpu.get_data_parallel_group(with_context_parallel=True)
    )
    norm_2 += sharded_norm_2

    # Add norm contribution from expert layers in MoEs.
    if len(moe_params_data) > 0:
        moe_norm, _ = multi_tensor_applier(
            multi_tensor_l2norm,
            dummy_overflow_buf,
            [moe_params_data],
            False,  # no per-parameter norm.
        )
        moe_norm_2 = moe_norm * moe_norm

    # Account for MoE norm even if current rank doesn't have any expert params to prevent
    # hang in models with un-even numbers of MoE layers.
    # See details in https://gitlab-master.nvidia.com/ADLR/megatron-lm/-/issues/409
    else:
        moe_norm_2 = torch.zeros_like(norm_2)

    # Reduce norm across model parallel groups (dense and expert).
    # Dense params should sum across all model-parallel GPUs (tensor + pipeline).
    dense_reduce_group = mpu.get_model_parallel_group()
    ranks_in_dense_reduce_group = torch.distributed.get_process_group_ranks(dense_reduce_group)
    # Expert params should sum across all model-parallel GPUs (expert + tensor + pipeline).
    expert_reduce_group = mpu.get_expert_tensor_model_pipeline_parallel_group()
    ranks_in_expert_reduce_group = torch.distributed.get_process_group_ranks(expert_reduce_group)

    # If dense and expert reduce groups are the same, sum then reduce.
    if ranks_in_dense_reduce_group == ranks_in_expert_reduce_group:
        norm_2 += moe_norm_2
        torch.distributed.all_reduce(
            norm_2, op=torch.distributed.ReduceOp.SUM, group=dense_reduce_group
        )
    # If dense and expert reduce groups are different, reduce then sum.
    else:
        torch.distributed.all_reduce(
            norm_2, op=torch.distributed.ReduceOp.SUM, group=dense_reduce_group
        )
        torch.distributed.all_reduce(
            moe_norm_2, op=torch.distributed.ReduceOp.SUM, group=expert_reduce_group
        )
        norm_2 += moe_norm_2

    return norm_2.item() ** 0.5


def calc_dtensor_params_l2_norm(params):
    """Calculate l2 norm of DTensor parameters."""
    params_data = defaultdict(list)
    for param in params:
        params_data[param._spec].append(param._local_tensor)

    total_norm_2 = torch.zeros((1,), dtype=torch.float32, device='cuda')
    dummy_overflow_buf = torch.zeros((1,), dtype=torch.int, device='cuda')
    for dtensor_spec, local_tensors in params_data.items():
        local_tensors = [t for t in local_tensors if t.numel() > 0]
        if len(local_tensors) == 0:
            norm = torch.zeros((1,), dtype=torch.float32, device='cuda')
        else:
            norm, _ = multi_tensor_applier(
                multi_tensor_l2norm, dummy_overflow_buf, [local_tensors], False  # no per-parameter norm.
            )
        norm_2 = norm * norm
        for pg, placement in zip(
            dtensor_spec.device_mesh.get_all_groups(),
            dtensor_spec.placements,
        ):
            if placement.is_shard():
                torch.distributed.all_reduce(
                    norm_2, op=torch.distributed.ReduceOp.SUM, group=pg
                )
            elif placement.is_replicate():
                # Replicated parameters are already summed across all ranks.
                pass
            else:
                raise RuntimeError(
                    f"Unsupported placement {placement} for Megatron FSDP."
                )
        total_norm_2 += norm_2

    return total_norm_2.item() ** 0.5


def average_losses_across_data_parallel_group(losses):
    """Reduce a tensor of losses across all GPUs."""
    averaged_losses = torch.cat([loss.clone().detach().view(1) for loss in losses])
    torch.distributed.all_reduce(averaged_losses, group=mpu.get_data_parallel_group())
    averaged_losses = averaged_losses / mpu.get_data_parallel_group().size()

    return averaged_losses


def reduce_max_stat_across_model_parallel_group(stat: float) -> float | None:
    """
    Ranks without an optimizer will have no grad_norm or num_zeros_in_grad stats.
    We need to ensure the logging and writer rank has those values.
    This function reduces a stat tensor across the model parallel group.

    We use an all_reduce max since the values have already been summed across optimizer ranks where possible
    """
    if stat is None:
        stat = -1.0
    stat = torch.tensor([stat], dtype=torch.float32, device=torch.cuda.current_device())
    torch.distributed.all_reduce(
        stat, op=torch.distributed.ReduceOp.MAX, group=mpu.get_model_parallel_group()
    )
    if stat.item() == -1.0:
        # No rank has a valid stat, so return None to indicate that it is None across all ranks.
        return None
    else:
        return stat.item()


def logical_and_across_model_parallel_group(input: bool) -> bool:
    """
    This function gathers a bool value across the model parallel group
    """
    if input is True:
        input = 1
    else:
        input = 0
    input = torch.tensor([input], dtype=torch.int, device=torch.cuda.current_device())
    torch.distributed.all_reduce(
        input, op=torch.distributed.ReduceOp.MIN, group=mpu.get_model_parallel_group()
    )
    return bool(input.item())


def report_memory(name):
    """Simple GPU memory report."""
    args = get_args()
    mega_bytes = 1024.0 * 1024.0
    string = name + ' memory (MB)'
    string += f" | allocated: {torch.cuda.memory_allocated() / mega_bytes:.2f}"
    string += f" | max allocated: {torch.cuda.max_memory_allocated() / mega_bytes:.2f}"
    string += f" | reserved: {torch.cuda.memory_reserved() / mega_bytes:.2f}"
    string += f" | max reserved: {torch.cuda.max_memory_reserved() / mega_bytes:.2f}"
    if args.log_device_memory_used:
        string += f" | total device memory used: {torch.cuda.device_memory_used() / mega_bytes:.2f}"
    if mpu.get_data_parallel_rank() == 0:
        print("[Rank {}] {}".format(torch.distributed.get_rank(), string), flush=True)


def print_params_min_max_norm(optimizer, iteration):
    """Print min, max, and norm of all parameters."""
    index = 0
    rank = torch.distributed.get_rank()
    string = 'iteration, rank, index, tensor-model-parallel, min, max, norm\n'
    optimizer_ = optimizer.optimizer
    for param_group in optimizer_.param_groups:
        for param in param_group['params']:
            index += 1
            min_ = param.data.min()
            max_ = param.data.max()
            norm = torch.linalg.norm(param.data)
            string += '{:7d}, {:4d}, {:4d}, {:2d}, '.format(
                iteration, rank, index, int(param.tensor_model_parallel)
            )
            string += '{:.6E}, {:.6E}, {:.6E}\n'.format(min_, max_, norm)
    print(string, flush=True)


def check_adlr_autoresume_termination(iteration, model, optimizer, opt_param_scheduler):
    """Check for autoresume signal and exit if it is received."""
    from megatron.training.checkpointing import save_checkpoint

    args = get_args()
    autoresume = get_adlr_autoresume()
    # Add barrier to ensure consistnecy.
    torch.distributed.barrier()
    if autoresume.termination_requested():
        if args.save:
            save_checkpoint(iteration, model, optimizer, opt_param_scheduler)
        print_rank_0(">>> autoresume termination request found!")
        if torch.distributed.get_rank() == 0:
            autoresume.request_resume()
        print_rank_0(">>> training terminated. Returning")
        sys.exit(0)


def get_ltor_masks_and_position_ids(data,
                                    eod_token,
                                    pad_token,
                                    reset_position_ids,
                                    reset_attention_mask,
                                    eod_mask_loss,
                                    pad_mask_loss):
    """Build masks and position id for left to right model."""

    # Extract batch size and sequence length.
    micro_batch_size, seq_length = data.size()

    # Attention mask (lower triangular).
    if reset_attention_mask:
        att_mask_batch = micro_batch_size
    else:
        att_mask_batch = 1
    attention_mask = torch.tril(
        torch.ones((att_mask_batch, seq_length, seq_length), device=data.device)
    ).view(att_mask_batch, 1, seq_length, seq_length)

    # Loss mask.
    loss_mask = torch.ones(data.size(), dtype=torch.float, device=data.device)
    if eod_mask_loss:
        loss_mask[data == eod_token] = 0.0
    if pad_mask_loss:
        loss_mask[data == pad_token] = 0.0

    # Position ids.
    position_ids = torch.arange(seq_length, dtype=torch.long, device=data.device)
    position_ids = position_ids.unsqueeze(0).expand_as(data)
    # We need to clone as the ids will be modifed based on batch index.
    if reset_position_ids:
        position_ids = position_ids.clone()

    if reset_position_ids or reset_attention_mask:
        # Loop through the batches:
        for b in range(micro_batch_size):

            # Find indecies where EOD token is.
            eod_index = position_ids[b, data[b] == eod_token] & position_ids[b, data[b] == pad_token]
            # Detach indecies from positions if going to modify positions.
            if reset_position_ids:
                eod_index = eod_index.clone()

            # Loop through EOD indecies:
            prev_index = 0
            for j in range(eod_index.size()[0]):
                i = eod_index[j]
                # Mask attention loss.
                if reset_attention_mask:
                    attention_mask[b, 0, (i + 1) :, : (i + 1)] = 0
                # Reset positions.
                if reset_position_ids:
                    position_ids[b, (i + 1) :] -= i + 1 - prev_index
                    prev_index = i + 1

    # Convert attention mask to binary:
    attention_mask = attention_mask < 0.5

    return attention_mask, loss_mask, position_ids


def print_rank_0(message, rank=None):
    """If distributed is initialized or rank is specified, print only on rank 0."""
    if rank is not None:
        if rank == 0:
            print(message, flush=True)
    else:
        if _safe_get_rank() == 0:
            print(message, flush=True)


def warn_rank_0(message, rank=None):
    """If distributed is initialized or rank is specified, warn only on rank 0."""
    if rank is not None:
        if rank == 0:
            warnings.warn(message)
    else:
        if _safe_get_rank() == 0:
            warnings.warn(message)


def is_rank0():
    """Returns true if called in the rank0, false otherwise."""
    return _safe_get_rank() == 0


def is_last_rank():
    """Returns true if called on last rank, false otherwise."""
    assert torch.distributed.is_initialized()
    return _safe_get_rank() == (torch.distributed.get_world_size() - 1)


def print_rank_last(message):
    """If distributed is initialized, print only on last rank."""
    if torch.distributed.is_initialized() and torch.distributed.get_backend() != 'fake':
        if is_last_rank():
            print(message, flush=True)
    else:
        print(message, flush=True)


def is_hybrid_model(args):
    """Returns True if the model is a hybrid Mamba-Transformer model."""
    return args.hybrid_layer_pattern is not None


def is_first_or_last_pipeline_stage(vp_stage):
    """Return True if on first or last pipeline stage, taking into account virtual
    pipeline parallelism."""
    ignore_virtual = True
    if vp_stage is not None:
        ignore_virtual = False
    return (
        mpu.is_pipeline_first_stage(ignore_virtual=ignore_virtual, vp_stage=vp_stage)
        or mpu.is_pipeline_last_stage(ignore_virtual=ignore_virtual, vp_stage=vp_stage)
    )


def get_device_arch_version():
    """Returns GPU arch version (8: Ampere, 9: Hopper, 10: Blackwell, ...)"""
    return torch.cuda.get_device_properties(torch.device("cuda:0")).major


def append_to_progress_log(string, barrier=True):
    """Append given string to progress log."""
    args = get_args()
    if args.save is None:
        return
    progress_log_filename = os.path.join(args.save, "progress.txt")
    if barrier:
        torch.distributed.barrier()
    if torch.distributed.get_rank() == 0:
        with open_file(progress_log_filename, 'a') as f:
            job_id = os.getenv('SLURM_JOB_ID', '')
            num_gpus = args.world_size
            f.write(
                f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\tJob ID: {job_id}\t"
                f"# GPUs: {num_gpus}\t{string}\n"
            )


def get_blend_and_blend_per_split(args):
    """Get blend and blend_per_split from passed-in arguments."""
    use_data_path = args.data_path is not None or args.data_args_path is not None
    use_per_split_data_path = (
        any(
            elt is not None
            for elt in [args.train_data_path, args.valid_data_path, args.test_data_path]
        )
        or args.per_split_data_args_path is not None
    )

    blend = None
    blend_per_split = None
    if use_data_path:
        if args.data_args_path is not None:
            assert args.data_path is None
            with open_file(args.data_args_path, 'r') as f:
                blend = get_blend_from_list(f.read().split())
        else:
            assert args.data_path is not None
            blend = get_blend_from_list(args.data_path)
    elif use_per_split_data_path:
        if args.per_split_data_args_path is not None:
            with open_file(args.per_split_data_args_path, 'r') as f:
                per_split_data_args = json.load(f)
                # Each element in blend_per_split should be a list of files (and optional
                # weights), so split string if needed.
                for split in ["train", "valid", "test"]:
                    if isinstance(per_split_data_args[split], str):
                        per_split_data_args[split] = per_split_data_args[split].split()

                blend_per_split = [
                    get_blend_from_list(per_split_data_args["train"]),
                    get_blend_from_list(per_split_data_args["valid"]),
                    get_blend_from_list(per_split_data_args["test"]),
                ]
        else:
            blend_per_split = [
                get_blend_from_list(args.train_data_path),
                get_blend_from_list(args.valid_data_path),
                get_blend_from_list(args.test_data_path),
            ]
    else:
        blend, blend_per_split = None, None

    return blend, blend_per_split


def get_batch_on_this_tp_rank(data_iterator, mtp_on_this_rank: bool = False):

    args = get_args()

    def _broadcast(item):
        if item is not None:
            torch.distributed.broadcast(
                item,
                mpu.get_tensor_model_parallel_src_rank(),
                group=mpu.get_tensor_model_parallel_group(),
            )

    if mpu.get_tensor_model_parallel_rank() == 0:

        assert data_iterator is not None
        data = next(data_iterator)
        batch = {
            'tokens': data["tokens"].cuda(non_blocking=True),
            'labels': data["labels"].cuda(non_blocking=True),
            'loss_mask': data["loss_mask"].cuda(non_blocking=True),
            'attention_mask': (
                None
                if "attention_mask" not in data
                else data["attention_mask"].cuda(non_blocking=True)
            ),
            'position_ids': data["position_ids"].cuda(non_blocking=True),
            'cu_seqlens': (
                None
                if "cu_seqlens" not in data
                else data["cu_seqlens"].cuda(non_blocking=True)
            ),
            'max_seqlen': (
                None
                if "max_seqlen" not in data
                else data["max_seqlen"].cuda(non_blocking=True)
            ),
            'local_cp_size': (
                None
                if "local_cp_size" not in data
                else data["local_cp_size"].cuda(non_blocking=True)
            ),
        }

        def _broadcast_cu_seqlens(cu_seqlens):
            dev = torch.cuda.current_device()
            n = 0 if cu_seqlens is None else int(cu_seqlens.numel())
            n_tensor = torch.tensor(n, dtype=torch.int64, device=dev)
            _broadcast(n_tensor)

            if n == 0:
                buf = torch.empty(0, dtype=torch.int32, device=dev)
            else:
                assert isinstance(cu_seqlens, torch.Tensor)
                assert cu_seqlens.dtype == torch.int32
                assert cu_seqlens.shape[0] == 1, "micro-batch-size must be 1 for packing"
                buf = cu_seqlens.to(device=dev, non_blocking=True).contiguous()
            _broadcast(buf)

        if args.hybrid_context_parallel:
            seq_len = torch.tensor(batch['tokens'].shape[0], dtype=torch.int32, device=torch.cuda.current_device())
            _broadcast(seq_len)
            
        if args.pipeline_model_parallel_size == 1 or mtp_on_this_rank:
            _broadcast(batch['tokens'])
            _broadcast(batch['labels'])
            _broadcast(batch['loss_mask'])
            _broadcast(batch['attention_mask'])
            _broadcast(batch['position_ids'])
            _broadcast_cu_seqlens(batch['cu_seqlens'])
            _broadcast(batch['max_seqlen'])
            _broadcast(batch['local_cp_size'])

        elif mpu.is_pipeline_first_stage():
            _broadcast(batch['tokens'])
            _broadcast(batch['attention_mask'])
            _broadcast(batch['position_ids'])
            _broadcast_cu_seqlens(batch['cu_seqlens'])
            _broadcast(batch['max_seqlen'])

        elif mpu.is_pipeline_last_stage():
            # Multi-Token Prediction (MTP) layers need tokens and position_ids to calculate embedding.
            # Currently the Multi-Token Prediction (MTP) layers is fixed on the last stage, so we need
            # to broadcast tokens and position_ids to all of the tensor parallel ranks on the last stage.
            _broadcast(batch['labels'])
            _broadcast(batch['loss_mask'])
            _broadcast(batch['attention_mask'])

    else:
        if args.hybrid_context_parallel:
            seq_len = torch.tensor(0, dtype=torch.int32, device=torch.cuda.current_device())
            _broadcast(seq_len)
            shape = (seq_len.item())
        else:
            shape = (args.micro_batch_size, args.seq_length)
            
        tokens = torch.empty(
            shape,
            dtype=torch.int64,
            device=torch.cuda.current_device(),
        )
        labels = torch.empty(
            shape,
            dtype=torch.int64,
            device=torch.cuda.current_device(),
        )
        loss_mask = torch.empty(
            shape,
            dtype=torch.float32,
            device=torch.cuda.current_device(),
        )
        if args.create_attention_mask_in_dataloader:
            shape_attention_mask = (args.micro_batch_size, 1, args.seq_length, args.seq_length) if not args.hybrid_context_parallel else (1, 1, shape[0], shape[0])
            attention_mask = torch.empty(
                shape_attention_mask,
                dtype=torch.bool,
                device=torch.cuda.current_device(),
            )
        else:
            attention_mask = None
        position_ids = torch.empty(
            shape,
            dtype=torch.int64,
            device=torch.cuda.current_device(),
        )
        cu_seqlens = None
        if args.hybrid_context_parallel or args.sft:
            max_seqlen = torch.empty(
                1,
                dtype=torch.int32,
                device=torch.cuda.current_device(),
            )
        else:
            max_seqlen = None
        
        local_cp_size = torch.empty(
            1,
            dtype=torch.int32,
            device=torch.cuda.current_device(),
        ) if args.hybrid_context_parallel else None

        def _broadcast_cu_seqlens():
            dev = torch.cuda.current_device()

            n = torch.empty((), dtype=torch.int64, device=dev)
            _broadcast(n)
            n = int(n.item())

            if n == 0:
                cu_seqlens = torch.empty(0, dtype=torch.int32, device=dev)
            else:
                cu_seqlens = torch.empty((args.micro_batch_size, n), dtype=torch.int32, device=dev)
            _broadcast(cu_seqlens)

            return cu_seqlens if n > 0 else None

        if args.pipeline_model_parallel_size == 1 or mtp_on_this_rank:
            _broadcast(tokens)
            _broadcast(labels)
            _broadcast(loss_mask)
            _broadcast(attention_mask)
            _broadcast(position_ids)
            cu_seqlens = _broadcast_cu_seqlens()
            _broadcast(max_seqlen)
            _broadcast(local_cp_size)

        elif mpu.is_pipeline_first_stage():
            labels = None
            loss_mask = None

            _broadcast(tokens)
            _broadcast(attention_mask)
            _broadcast(position_ids)
            cu_seqlens = _broadcast_cu_seqlens()
            _broadcast(max_seqlen)

        elif mpu.is_pipeline_last_stage():
            # Multi-Token Prediction (MTP) layers need tokens and position_ids to calculate embedding.
            # Currently the Multi-Token Prediction (MTP) layers is fixed on the last stage, so we need
            # to broadcast tokens and position_ids to all of the tensor parallel ranks on the last stage.
            tokens = None
            position_ids = None
            cu_seqlens = None
            max_seqlen = None

            _broadcast(labels)
            _broadcast(loss_mask)
            _broadcast(attention_mask)

        batch = {
            'tokens': tokens,
            'labels': labels,
            'loss_mask': loss_mask,
            'attention_mask': attention_mask,
            'position_ids': position_ids,
            'cu_seqlens': cu_seqlens,
            'max_seqlen': max_seqlen,
            'local_cp_size': local_cp_size,
        }

    return batch


def update_use_dist_ckpt(args):
    args.use_dist_ckpt = args.ckpt_format != "torch"


def to_empty_if_meta_device(module: torch.nn.Module, *, device: torch.device, recurse=True):
    """Move tensors to device if not meta device; otherwise materialize with empty_like().

    Officially, torch suggests to_empty() for meta device materialization. Under the hood,
    torch.empty_like() is applied to all parameters or buffers (see _apply). This may
    accidently overwrite buffers with precomputed values during construction. Given the
    goal is to only materialize those tensors on meta device, this function checks the
    device first and only move the tensor to the destination if it is not on meta device.
   
    Args:
        module: The target module to apply this transformation.
        device: The desired device of the parameters
            and buffers in this module.
        recurse: Whether parameters and buffers of submodules should
            be recursively moved to the specified device.
    """

    def _empty_like_if_meta(tensor: torch.Tensor, *, device: torch.device):
        if tensor.device == torch.device("meta"):
            return torch.empty_like(tensor, device=device)
        else:
            return tensor.to(device)

    return module._apply(
        lambda t: _empty_like_if_meta(t, device=device), recurse=recurse
    )


def get_nvtx_range():
    """Create an NVTX range context manager."""
    try:
        from torch.cuda import nvtx

        @contextmanager
        def nvtx_range(msg, time=False):
            if time:
                timers = get_timers()
                timers(msg, log_level=0).start()
            try:
                nvtx.range_push(msg)
                yield
            finally:
                nvtx.range_pop()
                if time:
                    timers(msg, log_level=0).stop()

        return nvtx_range
    except:
        @contextmanager
        def dummy_range(msg):
            yield
        return dummy_range


================================================
FILE: megatron/training/wandb_utils.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

from pathlib import Path
from typing import Tuple

from megatron.training.global_vars import get_wandb_writer
from megatron.training.utils import print_rank_last


def _get_wandb_artifact_tracker_filename(save_dir: str) -> Path:
    """Wandb artifact tracker file records the latest artifact wandb entity and project"""
    return Path(save_dir) / "latest_wandb_artifact_path.txt"


def _get_artifact_name_and_version(save_dir: Path, checkpoint_path: Path) -> Tuple[str, str]:
    return save_dir.stem, checkpoint_path.stem


def on_save_checkpoint_success(checkpoint_path: str, tracker_filename: str, save_dir: str, iteration: int) -> None:
    """Function to be called after checkpointing succeeds and checkpoint is persisted for logging it as an artifact in W&B

    Args:
        checkpoint_path (str): path of the saved checkpoint
        tracker_filename (str): path of the tracker filename for the checkpoint iteration
        save_dir (str): path of the root save folder for all checkpoints
        iteration (int): iteration of the checkpoint
    """

    wandb_writer = get_wandb_writer()

    if wandb_writer:
        metadata = {"iteration": iteration}
        artifact_name, artifact_version = _get_artifact_name_and_version(Path(save_dir), Path(checkpoint_path))
        artifact = wandb_writer.Artifact(artifact_name, type="model", metadata=metadata)
        # wandb's artifact.add_reference requires absolute paths
        checkpoint_path = str(Path(checkpoint_path).resolve())
        artifact.add_reference(f"file://{checkpoint_path}", checksum=False)
        artifact.add_file(tracker_filename)
        wandb_writer.run.log_artifact(artifact, aliases=[artifact_version])
        wandb_tracker_filename = _get_wandb_artifact_tracker_filename(save_dir)
        wandb_tracker_filename.write_text(f"{wandb_writer.run.entity}/{wandb_writer.run.project}")


def on_load_checkpoint_success(checkpoint_path: str, load_dir: str) -> None:
    """Function to be called after succesful loading of a checkpoint, for aggregation and logging it to W&B

    Args:
        checkpoint_path (str): path of the loaded checkpoint
        load_dir (str): path of the root save folder for all checkpoints
        iteration (int): iteration of the checkpoint
    """

    wandb_writer = get_wandb_writer()
    
    if wandb_writer:
        try:
            artifact_name, artifact_version = _get_artifact_name_and_version(Path(load_dir), Path(checkpoint_path))
            wandb_tracker_filename = _get_wandb_artifact_tracker_filename(load_dir)
            artifact_path = ""
            if wandb_tracker_filename.is_file():
                artifact_path = wandb_tracker_filename.read_text().strip()
                artifact_path = f"{artifact_path}/"
            wandb_writer.run.use_artifact(f"{artifact_path}{artifact_name}:{artifact_version}")
        except Exception:
            print_rank_last(f"  failed to find checkpoint {checkpoint_path} in wandb")


================================================
FILE: megatron/training/yaml_arguments.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

"""Megatron arguments."""

import argparse
import dataclasses
import json
import os
import re
import torch
import types
import yaml

from itertools import chain, starmap
from types import SimpleNamespace

import torch.nn.functional as F

from megatron.core.transformer import TransformerConfig, MLATransformerConfig
from megatron.core.utils import get_torch_version, is_torch_min_version

# Taken from https://stackoverflow.com/questions/65414773/parse-environment-variable-from-yaml-with-pyyaml
# Allows for yaml to use environment variables
env_pattern = re.compile(r".*?\${(.*?)}.*?")
def env_constructor(loader, node):
    value = loader.construct_scalar(node)
    for group in env_pattern.findall(value):
        assert os.environ.get(group) is not None, f"environment variable {group} in yaml not found"
        value = value.replace(f"${{{group}}}", os.environ.get(group))
    return value
yaml.add_implicit_resolver("!pathex", env_pattern)
yaml.add_constructor("!pathex", env_constructor)


str_dtype_to_torch = {
    "float32" : torch.float32,
    "float16" : torch.float16,
    "bfloat16" : torch.bfloat16
}

def validate_yaml(args, defaults={}):
    
    # This is for legacy script env var setting
    if type(args.data_path) is str:
        # If no white space its a single path
        split_data_path = args.data_path.split()
        if len(split_data_path) != 1:
            args.data_path = split_data_path

    # Tensor model parallel size.
    args.model_parallel.tensor_model_parallel_size = min(
        args.model_parallel.tensor_model_parallel_size, args.world_size)
    assert args.world_size % args.model_parallel.tensor_model_parallel_size == 0, 'world size'\
        ' ({}) is not divisible by tensor model parallel size ({})'.format(
            args.world_size, args.model_parallel.tensor_model_parallel_size)
    # Pipeline model parallel size.
    args.model_parallel.pipeline_model_parallel_size = min(
        args.model_parallel.pipeline_model_parallel_size,
        (args.world_size // args.model_parallel.tensor_model_parallel_size))
    args.model_parallel.transformer_pipeline_model_parallel_size = (
        args.model_parallel.pipeline_model_parallel_size - 1
        if args.account_for_embedding_in_pipeline_split else
        args.model_parallel.pipeline_model_parallel_size
    )
    # Checks.
    model_parallel_size = args.model_parallel.pipeline_model_parallel_size * \
                          args.model_parallel.tensor_model_parallel_size
    assert args.world_size % (model_parallel_size * args.model_parallel.context_parallel_size) == 0, \
        'world size ({}) is not divisible by tensor parallel size ({}) times ' \
        'pipeline parallel size ({}) times context parallel size ({})'.format(
        args.world_size, args.model_parallel.tensor_model_parallel_size,
        args.model_parallel.pipeline_model_parallel_size, args.model_parallel.context_parallel_size)
    
    # data_parallel_size is not in model parallel config
    args.data_parallel_size = args.world_size // (model_parallel_size * args.model_parallel.context_parallel_size)
    if args.rank == 0:
        print('using world size: {}, data-parallel size: {}, '
              'context-parallel size: {}, '
              'tensor-model-parallel size: {}, '
              'pipeline-model-parallel size: {}'.format(
                  args.world_size, args.data_parallel_size,
                  args.model_parallel.context_parallel_size,
                  args.model_parallel.tensor_model_parallel_size,
                  args.model_parallel.pipeline_model_parallel_size), flush=True)

    if args.model_parallel.tp_comm_overlap:
        assert args.model_parallel.sequence_parallel == True, 'Tensor parallel communication/GEMM overlap can happen only when sequence parallelism is enabled'

    # Set input defaults.
    for key in defaults:
        # For default to be valid, it should not be provided in the
        # arguments that are passed to the program. We check this by
        # ensuring the arg is set to None.
        if getattr(args, key, None) is not None:
            if args.rank == 0:
                print('WARNING: overriding default arguments for {key}:{v} \
                       with {key}:{v2}'.format(key=key, v=defaults[key],
                                               v2=getattr(args, key)),
                                               flush=True)
        else:
            setattr(args, key, defaults[key])

    # Batch size.
    assert args.micro_batch_size is not None
    assert args.micro_batch_size > 0
    if args.global_batch_size is None:
        args.global_batch_size = args.micro_batch_size * args.data_parallel_size
        if args.rank == 0:
            print('setting global batch size to {}'.format(
                args.global_batch_size), flush=True)
    assert args.global_batch_size > 0

    # num_layers_per_virtual_pipeline_stage is not insde model parallel for checkpointing
    if args.num_layers_per_virtual_pipeline_stage is not None:
        assert args.model_parallel.pipeline_model_parallel_size > 2, \
            'pipeline-model-parallel size should be greater than 2 with ' \
            'interleaved schedule'
        assert args.language_model.num_layers % args.model_parallel.transformer_pipeline_model_parallel_size == 0, \
            'number of layers should be divisible by the pipeline parallel size'
        num_layers_per_pipeline_stage = args.language_model.num_layers // args.model_parallel.transformer_pipeline_model_parallel_size
        assert num_layers_per_pipeline_stage % args.num_layers_per_virtual_pipeline_stage == 0, \
            'number of layers per pipeline stage must be divisible number of layers per virtual pipeline stage'
        args.model_parallel.virtual_pipeline_model_parallel_size = num_layers_per_pipeline_stage // \
            args.num_layers_per_virtual_pipeline_stage
    else:
        args.model_parallel.virtual_pipeline_model_parallel_size = None
        # Overlap P2P communication is disabled if not using the interleaved schedule.
        args.model_parallel.overlap_p2p_comm = False
        if args.rank == 0:
            print('WARNING: Setting args.overlap_p2p_comm to False since non-interleaved '
                  'schedule does not support overlapping p2p communication')

    if args.overlap_param_gather:
        assert args.use_distributed_optimizer, \
            '--overlap-param-gather only supported with distributed optimizer'
        assert args.overlap_grad_reduce, \
            '--overlap-grad-reduce should be turned on when using --overlap-param-gather'

    # Parameters dtype.
    if args.model_parallel.fp16:
        assert not args.model_parallel.bf16
        args.model_parallel.params_dtype = torch.half
    if args.model_parallel.bf16:
        assert not args.model_parallel.fp16
        args.model_parallel.params_dtype = torch.bfloat16
        # bfloat16 requires gradient accumulation and all-reduce to
        # be done in fp32.
        if not args.accumulate_allreduce_grads_in_fp32:
            args.accumulate_allreduce_grads_in_fp32 = True
            if args.rank == 0:
                print('accumulate and all-reduce gradients in fp32 for '
                      'bfloat16 data type.', flush=True)

    if args.rank == 0:
        print('using {} for parameters ...'.format(args.model_parallel.params_dtype),
              flush=True)

    if args.dataloader_type is None:
        args.dataloader_type = 'single'

    # Consumed tokens.
    args.consumed_train_samples = 0
    args.consumed_valid_samples = 0

    # Support for variable sequence lengths across batches/microbatches.
    # set it if the dataloader supports generation of variable sequence lengths
    # across batches/microbatches. Due to additional communication overhead
    # during pipeline parallelism, it should not be set if sequence length
    # is constant during training.
    args.model_parallel.variable_seq_lengths = False

    # Iteration-based training.
    if args.train_iters:
        # If we use iteration-based training, make sure the
        # sample-based options are off.
        assert args.train_samples is None, \
            'expected iteration-based training'
        assert args.lr_decay_samples is None, \
            'expected iteration-based learning rate decay'
        assert args.lr_warmup_samples == 0, \
            'expected iteration-based learning rate warmup'
        assert args.rampup_batch_size is None, \
            'expected no batch-size rampup for iteration-based training'
        if args.lr_warmup_fraction is not None:
            assert args.lr_warmup_iters == 0, \
                'can only specify one of lr-warmup-fraction and lr-warmup-iters'

    # Sample-based training.
    if args.train_samples:
        # If we use sample-based training, make sure the
        # iteration-based options are off.
        assert args.train_iters is None, \
            'expected sample-based training'
        assert args.lr_decay_iters is None, \
            'expected sample-based learning rate decay'
        assert args.lr_warmup_iters == 0, \
            'expected sample-based learnig rate warmup'
        if args.lr_warmup_fraction is not None:
            assert args.lr_warmup_samples == 0, \
                'can only specify one of lr-warmup-fraction ' \
                'and lr-warmup-samples'

    # How to handle this better
    if args.language_model.num_layers is not None:
        assert args.encoder_num_layers is None, \
            'cannot have both num-layers and encoder-num-layers specified'
        args.encoder_num_layers = args.language_model.num_layers
    else:
        assert args.encoder_num_layers is not None, \
            'either num-layers or encoder-num-layers should be specified'
        args.language_model.num_layers = args.encoder_num_layers

    # Check required arguments.
    # removed max_position_embeddings from reqs
    required_args = ['num_layers', 'hidden_size', 'num_attention_heads']
    for req_arg in required_args:
        _check_arg_is_not_none(args.language_model, req_arg)

    # Checks.
    if args.language_model.ffn_hidden_size is None:
        if args.language_model.activation_func == "swiglu":
            # reduce the dimnesion for MLP since projections happens on
            # two linear layers. this keeps the number of paramters in
            # the same ballpark as the counterpart with 4*h size
            # we keep it a multiple of 64, which means the actual tensor size
            # will be a multiple of 64 / tp_size
            args.language_model.ffn_hidden_size = int((4 * args.language_model.hidden_size * 2 / 3) / 64) * 64
        else:
            args.language_model.ffn_hidden_size = 4 * args.language_model.hidden_size

    if args.language_model.kv_channels is None:
        assert args.language_model.hidden_size % args.language_model.num_attention_heads == 0
        args.language_model.kv_channels = args.language_model.hidden_size // args.language_model.num_attention_heads

    #TODO: Implement arguments for encoder-decoder
    if args.seq_length is not None:
        assert args.encoder_seq_length is None
        args.encoder_seq_length = args.seq_length
    else:
        assert args.encoder_seq_length is not None
        args.seq_length = args.encoder_seq_length

    if args.seq_length is not None:
        assert args.max_position_embeddings >= args.seq_length
    if args.decoder_seq_length is not None:
        assert args.max_position_embeddings >= args.decoder_seq_length
    if args.lr is not None:
        assert args.min_lr <= args.lr
    if args.save is not None:
        assert args.save_interval is not None
    # Mixed precision checks.
    if args.fp16_lm_cross_entropy:
        assert args.fp16, 'lm cross entropy in fp16 only support in fp16 mode.'
    if args.language_model.fp32_residual_connection:
        assert args.model_parallel.fp16 or args.model_parallel.bf16, \
            'residual connection in fp32 only supported when using fp16 or bf16.'

    if args.language_model.moe_grouped_gemm:
        assert args.model_parallel.bf16, 'Currently GroupedGEMM for MoE only supports bf16 dtype.'
        dc = torch.cuda.get_device_capability()
        assert dc[0] >= 8, "Unsupported compute capability for GroupedGEMM kernels."

    if args.weight_decay_incr_style == 'constant':
        assert args.start_weight_decay is None
        assert args.end_weight_decay is None
        args.start_weight_decay = args.weight_decay
        args.end_weight_decay = args.weight_decay
    else:
        assert args.start_weight_decay is not None
        assert args.end_weight_decay is not None

    # Persistent fused layer norm.
    if not is_torch_min_version("1.11.0a0"):
        args.language_model.persist_layer_norm = False
        if args.rank == 0:
            print('Persistent fused layer norm kernel is supported from '
                  'pytorch v1.11 (nvidia pytorch container paired with v1.11). '
                  'Defaulting to no_persist_layer_norm=True')

    # Activation recomputing.
    if args.language_model.distribute_saved_activations:
        assert args.model_parallel.tensor_model_parallel_size > 1, 'can distribute ' \
            'recomputed activations only across tensor model ' \
            'parallel groups'
        assert args.language_model.recompute_granularity == 'full', \
            'distributed recompute activations is only '\
            'application to full recompute granularity'
        assert args.language_model.recompute_method is not None, \
            'for distributed recompute activations to work you '\
            'need to use a recompute method '
        assert is_torch_min_version("1.10.0a0"), \
            'distributed recompute activations are supported for pytorch ' \
            'v1.10 and above (Nvidia Pytorch container >= 21.07). Current ' \
            f'pytorch version is v{get_torch_version()}.'

    if args.language_model.recompute_granularity == 'selective':
        assert args.language_model.recompute_method is None, \
            'recompute method is not yet supported for ' \
            'selective recomputing granularity'

    # disable sequence parallelism when tp=1
    # to avoid change in numerics when
    # sequence_parallelism is enabled.
    if args.model_parallel.tensor_model_parallel_size == 1:
        args.model_parallel.sequence_parallel = False

    if os.environ.get('CUDA_DEVICE_MAX_CONNECTIONS') != "1":
        if args.model_parallel.sequence_parallel:
            raise RuntimeError(
                "Using sequence parallelism requires setting the environment variable "
                "CUDA_DEVICE_MAX_CONNECTIONS to 1")
    
    # MoE Spec check
    if args.language_model.num_moe_experts is not None:
        assert args.spec is None, "Model Spec must be None when using MoEs"
        if args.model_parallel.tensor_model_parallel_size > 1:
            assert args.model_parallel.sequence_parallel, \
                "When using MoE and tensor parallelism, sequence parallelism must be used."

    # Expert parallelism check
    if args.model_parallel.expert_model_parallel_size  > 1:
        assert args.language_model.num_moe_experts is not None, "num_experts must be non None to use expert model parallelism"
        assert args.language_model.num_moe_experts % args.model_parallel.expert_model_parallel_size == 0, \
            "Number of experts should be a multiple of expert model parallel_size."
        assert not args.model_parallel.fp16, \
            "Expert parallelism is not supported with fp16 training."

    # Print arguments.
    _print_args("arguments", args)

    #TODO: Added as much of the global initialization requires the model parallel arguments
    args = SimpleNamespace(**args.__dict__, **args.model_parallel.__dict__)
    args = SimpleNamespace(**args.__dict__, **args.language_model.__dict__)
    # For GPT Layer spec in pretrain_gpt
    args.num_experts = args.language_model.num_moe_experts

    return args

def _print_args(title, args):
    """Print arguments."""
    if args.rank == 0:
        print(f'------------------------ {title} ------------------------',
              flush=True)
        str_list = []
        for arg in vars(args):
            dots = '.' * (48 - len(arg))
            str_list.append('  {} {} {}'.format(arg, dots, getattr(args, arg)))
        for arg in sorted(str_list, key=lambda x: x.lower()):
            print(arg, flush=True)
        print(f'-------------------- end of {title} ---------------------',
              flush=True)

def core_config_from_args(args, dataclass=TransformerConfig):
    """Builds core config object from namespace args from given dataclass

    Raises exception if argument missing in args

    Args:
        args(SimpleNamespace, optional): Namespace to pull argument values from 
        dataclass (dataclass, optional): Core dataclass config to pull argument names from


    Returns:
        SimpleNamespace: The returned namespace to build core config from
    """
    kw_args = {}
    for f in dataclasses.fields(dataclass):
        if hasattr(args, f.name):
            kw_args[f.name] = getattr(args, f.name)
        else:
            raise Exception(f"Missing argument {f.name} for {str(dataclass)} config")
    return kw_args

def _check_arg_is_not_none(args, arg):
    assert getattr(args, arg) is not None, '{} argument is None'.format(arg)

def core_transformer_config_from_yaml(args, transfomer_key = "language_model"):    
    # Combine transfomer config with model parallel args
    args = SimpleNamespace(**vars(getattr(args, transfomer_key)), **vars(args.model_parallel))
    # Translate args to core transformer configuration
    kw_args = core_config_from_args(args, TransformerConfig)    
    
    # Hardcoded 
    kw_args['deallocate_pipeline_outputs'] = True
    kw_args['pipeline_dtype'] = kw_args['params_dtype']
    kw_args['batch_p2p_comm'] = not args.overlap_p2p_comm 
    
    assert args.activation_func in ["swiglu","squaredrelu","gelu"], f"{args.activation_func} is not a supported activation function"
    if args.activation_func == "swiglu":
        kw_args['activation_func'] = F.silu
        kw_args['gated_linear_unit'] = True
        kw_args['bias_activation_fusion'] = args.bias_swiglu_fusion
    elif args.activation_func == "squaredrelu":
        def squared_relu(x):
            return torch.pow(F.relu(x), 2)
        kw_args['activation_func'] = squared_relu
    elif args.activation_func == "gelu":
        kw_args['activation_func'] = F.gelu
        if args.add_bias_linear:
            kw_args['bias_activation_fusion'] = False
        else:
            kw_args['bias_activation_fusion'] = args.bias_activation_fusion
    
    if args.init_method == "xavier_uniform":
        kw_args['init_method'] = torch.nn.init.xavier_uniform_
        kw_args['scaled_init_method'] = torch.nn.init.xavier_uniform_
    if args.embedding_init_method == "xavier_uniform":
        kw_args['embedding_init_method'] = torch.nn.init.xavier_uniform_
    
    # Return Transformer config.
    if getattr(args, "multi_latent_attention", False):
        return MLATransformerConfig(**kw_args)
    else:
        return TransformerConfig(**kw_args)

def load_yaml(yaml_path):
    print(f"warning using experimental yaml arguments feature, argparse arguments will be ignored")
    with open(yaml_path, "r") as f:
        config = yaml.safe_load(f)
        # Convert to nested namespace
        config_namespace = json.loads(json.dumps(config), object_hook=lambda item: SimpleNamespace(**item))
        # Add config location to namespace
        config_namespace.yaml_cfg = yaml_path
        return config_namespace


================================================
FILE: model_provider.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.

"""Common functions used in train_*.py and pretrain_*.py scripts."""

from typing import Callable, Optional, Union

import torch

from megatron.core.models.gpt import GPTModel
from megatron.core.models.mamba import MambaModel
from megatron.training import get_args, print_rank_0

try:
    from megatron.post_training.model_builder import modelopt_gpt_mamba_builder
    has_nvidia_modelopt = True
except ImportError:
    has_nvidia_modelopt = False

import megatron.legacy.model  # isort: skip

# NOTE: Loading `megatron.legacy.model` earlier fails due to circular import


def model_provider(
    model_builder: Callable, pre_process=True, post_process=True, vp_stage: Optional[int] = None, config=None, pg_collection=None,
) -> Union[GPTModel, megatron.legacy.model.GPTModel, MambaModel]:
    """Builds the model.

    If you set the use_legacy_models to True, it will return the legacy GPT model and if not the mcore GPT model.

    Args:
        model_builder: A callable that builds the actual model, its signature is the same as model_provider's with an exception of the first argument which is a builder itself. In addition might take a config passed from outside to skip its own config loading. See gpt_builder or mamba_builder for an example, see _gpt_model_builder in train_rl.py to see how to augment a default gpt builder and pass the config from outside
        pre_process (bool, optional): Set to true if you need to compute embedings. Defaults to True.
        post_process (bool, optional): Set to true if you need to compute output logits/loss. Defaults to True.

    Returns:
        Union[GPTModel, megatron.legacy.model.GPTModel, MambaModel]: The returned model
    """
    args = get_args()

    if args.record_memory_history:
        torch.cuda.memory._record_memory_history(
            True,
            # keep 100,000 alloc/free events from before the snapshot
            trace_alloc_max_entries=100000,
            # record stack information for the trace events
            trace_alloc_record_context=True,
        )

        def oom_observer(device, alloc, device_alloc, device_free):
            # snapshot right after an OOM happened
            print('saving allocated state during OOM')

            filename = f"oom_rank-{torch.distributed.get_rank()}_{args.memory_snapshot_path}"
            torch.cuda.memory._dump_snapshot(filename)

        torch._C._cuda_attach_out_of_memory_observer(oom_observer)

    if has_nvidia_modelopt and getattr(args, 'modelopt_enabled', False):
        # [ModelOpt]: Use custom builder + spec when modelopt is enabled
        model_builder = modelopt_gpt_mamba_builder

    return model_builder(args, pre_process, post_process, vp_stage, config=config, pg_collection=pg_collection)


def count_parameters_in_layer(model, layer_name):
    num_params = 0
    for name, param in model.named_parameters():
        if layer_name in name:
            num_params += param.numel()
            print_rank_0(f" - {name}: {param.numel()}")
    return num_params


================================================
FILE: pretrain_bert.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.

"""Pretrain BERT"""

from functools import partial

import torch
import torch.nn.functional as F

from megatron.training import get_args
from megatron.training import print_rank_0
from megatron.training import get_timers
from megatron.core import tensor_parallel
from megatron.core.enums import ModelType
import megatron.legacy.model
from megatron.core.models.bert.bert_model import BertModel
from megatron.training import pretrain
from megatron.training.utils import average_losses_across_data_parallel_group
from megatron.training.arguments import core_transformer_config_from_args
from megatron.core.transformer.spec_utils import import_module
from megatron.core.models.bert.bert_layer_specs import bert_layer_with_transformer_engine_spec, bert_layer_local_spec
from megatron.core.tokenizers.utils.build_tokenizer import build_tokenizer
from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder
from megatron.core.datasets.bert_dataset import BERTMaskedWordPieceDataset, BERTMaskedWordPieceDatasetConfig
from megatron.core.datasets.utils import get_blend_from_list
from megatron.core import mpu, tensor_parallel


def model_provider(pre_process=True, post_process=True, vp_stage=None, config=None, pg_collection=None):
    """Build the model."""

    print_rank_0('building BERT model ...')

    args = get_args()
    if config is None:
        config = core_transformer_config_from_args(args)
    num_tokentypes = 2 if args.bert_binary_head else 0

    if args.use_legacy_models:
        model = megatron.legacy.model.BertModel(
            config=config,
            num_tokentypes=num_tokentypes,
            add_binary_head=args.bert_binary_head,
            parallel_output=True,
            pre_process=pre_process,
            post_process=post_process)
    else:
        if args.spec is None:
            transformer_layer_spec = bert_layer_with_transformer_engine_spec #default spec
        elif args.spec[0] == 'local':
            print_rank_0('Using Local spec for transformer layers')
            transformer_layer_spec = bert_layer_local_spec
        else :
            transformer_layer_spec = import_module(args.spec)

        model = BertModel(
            config=config,
            transformer_layer_spec=transformer_layer_spec,
            vocab_size=args.padded_vocab_size,
            max_sequence_length=args.max_position_embeddings,
            num_tokentypes=num_tokentypes,
            add_binary_head=args.bert_binary_head,
            share_embeddings_and_output_weights=not args.untie_embeddings_and_output_weights,
            parallel_output=True,
            pre_process=pre_process,
            post_process=post_process,
            vp_stage=vp_stage)

    return model


def get_batch(data_iterator):
    """Build the batch."""

    # Items and their type.
    keys = ['text', 'types', 'labels',
            'is_random', 'loss_mask', 'padding_mask']
    datatype = torch.int64

    # Broadcast data.
    if data_iterator is not None:
        data = next(data_iterator)
    else:
        data = None
    data_b = tensor_parallel.broadcast_data(keys, data, datatype)

    # Unpack.
    tokens = data_b['text'].long()
    types = data_b['types'].long()
    sentence_order = data_b['is_random'].long()
    loss_mask = data_b['loss_mask'].float()
    lm_labels = data_b['labels'].long()
    padding_mask = data_b['padding_mask'].long()

    return tokens, types, sentence_order, loss_mask, lm_labels, padding_mask


def loss_func(loss_mask, sentence_order, output_tensor):
    lm_loss_, sop_logits = output_tensor

    lm_loss_ = lm_loss_.float()
    loss_mask = loss_mask.float()
    lm_loss = torch.sum(
        lm_loss_.view(-1) * loss_mask.reshape(-1)) / loss_mask.sum()

    if sop_logits is not None:
        sop_loss = F.cross_entropy(sop_logits.view(-1, 2).float(),
                                   sentence_order.view(-1),
                                   ignore_index=-1)
        sop_loss = sop_loss.float()
        loss = lm_loss + sop_loss
        averaged_losses = average_losses_across_data_parallel_group(
            [lm_loss, sop_loss])
        return loss, {'lm loss': averaged_losses[0],
                      'sop loss': averaged_losses[1]}
    else:
        loss = lm_loss
        averaged_losses = average_losses_across_data_parallel_group(
            [lm_loss])
        return loss, {'lm loss': averaged_losses[0]}


def forward_step(data_iterator, model):
    """Forward step."""
    args = get_args()
    timers = get_timers()

    # Get the batch.
    timers('batch-generator', log_level=2).start()
    tokens, types, sentence_order, loss_mask, lm_labels, padding_mask = get_batch(
        data_iterator)
    timers('batch-generator').stop()

    if not args.bert_binary_head:
        types = None

    # Forward pass through the model.
    output_tensor = model(tokens, padding_mask,
                          tokentype_ids=types, lm_labels=lm_labels)

    return output_tensor, partial(loss_func, loss_mask, sentence_order)


def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None):
    """Build train, valid, and test datasets."""
    args = get_args()

    tokenizer = build_tokenizer(args)

    config = BERTMaskedWordPieceDatasetConfig(
        random_seed=args.seed,
        sequence_length=args.seq_length,
        blend=get_blend_from_list(args.data_path),
        blend_per_split=[
            get_blend_from_list(args.train_data_path),
            get_blend_from_list(args.valid_data_path),
            get_blend_from_list(args.test_data_path)
        ],
        split=args.split,
        path_to_cache=args.data_cache_path,
        tokenizer=tokenizer,
        masking_probability=args.mask_prob,
        short_sequence_probability=args.short_seq_prob,
        masking_max_ngram=3,
        masking_do_full_word=True,
        masking_do_permutation=False,
        masking_use_longer_ngrams=False,
        masking_use_geometric_distribution=False,
        classification_head=args.bert_binary_head,
        mid_level_dataset_surplus=args.mid_level_dataset_surplus,
        allow_ambiguous_pad_tokens=args.allow_ambiguous_pad_tokens,
    )

    print_rank_0('> building train, validation, and test datasets '
                 'for BERT ...')

    train_ds, valid_ds, test_ds = BlendedMegatronDatasetBuilder(
        BERTMaskedWordPieceDataset,
        train_val_test_num_samples,
        lambda: mpu.get_tensor_model_parallel_rank() == 0,
        config,
    ).build()

    print_rank_0("> finished creating BERT datasets ...")

    return train_ds, valid_ds, test_ds


if __name__ == "__main__":

    # Temporary for transition to core datasets
    train_valid_test_datasets_provider.is_distributed = True

    pretrain(train_valid_test_datasets_provider, model_provider,
             ModelType.encoder_or_decoder,
             forward_step, args_defaults={'tokenizer_type': 'BertWordPieceLowerCase'})


================================================
FILE: pretrain_gpt.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

"""Pretrain and SFT GPT."""

# Capture the true program start time BEFORE any heavy imports.
import time
_PROGRAM_START_TIME = time.time()

import json

# Suppress warnings on all ranks but rank 0.
import os
import warnings
rank = int(os.environ.get('RANK', 0))
if rank != 0:
    warnings.filterwarnings("ignore", category=UserWarning)
    warnings.filterwarnings("ignore", category=FutureWarning)

from functools import partial
from typing import List, Optional, Tuple

import torch

from gpt_builders import gpt_builder
from megatron.core import parallel_state
from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder
from megatron.core.datasets.gpt_dataset import GPTDataset, GPTDatasetConfig, MockGPTDataset
from megatron.core.enums import ModelType
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.models.gpt import GPTModel
from megatron.core.rerun_state_machine import get_rerun_state_machine
from megatron.core.tokenizers.utils.build_tokenizer import build_tokenizer
from megatron.core.utils import get_attr_wrapped_model, get_thd_batch_on_this_cp_rank, get_batch_on_this_hybrid_cp_rank, StragglerDetector
from megatron.training import (
    get_args,
    get_timers,
    inprocess_restart,
    pretrain,
    print_rank_0,
    set_startup_timestamps,
)
from megatron.training.datasets.sft_dataset import SFTDataset
from megatron.core.transformer.multi_token_prediction import mtp_on_this_rank, get_mtp_ranks
from megatron.training.arguments import core_transformer_config_from_args
from megatron.training.datasets.fim_dataset import GPTFIMDataset, GPTFIMDatasetConfig
from megatron.training.utils import (
    get_batch_on_this_cp_rank,
    get_batch_on_this_tp_rank,
    get_blend_and_blend_per_split,
    is_first_or_last_pipeline_stage,
)
from model_provider import model_provider

try:
    from megatron.post_training.arguments import add_modelopt_args
    from megatron.post_training.loss_func import loss_func as loss_func_modelopt

    has_nvidia_modelopt = True
except ImportError:
    has_nvidia_modelopt = False

stimer = StragglerDetector()


def get_batch(data_iterator, vp_stage: Optional[int] = None):
    """Generate a batch.

    Packed sequence support (SFT / ``--sft`` flag):
        When ``args.sft`` is True, the dataset emits THD-format batches where
        multiple sequences are concatenated into a single flat token tensor.
        The batch includes ``cu_seqlens`` (cumulative sequence lengths, shape
        ``[1, S+1]``) and ``max_seqlen`` (shape ``[1]``) that describe the
        individual sequence boundaries.

        This function validates and squeezes those fields:
          - ``cu_seqlens``:  asserted to have shape ``[1, S+1]`` (micro-batch
            size must be 1 for packing), then squeezed to ``[S+1]``.
          - ``max_seqlen``:  asserted to be 1-D; kept as a tensor and passed
            to ``get_thd_batch_on_this_cp_rank`` which performs the final
            scalar conversion internally.

        Pipeline stage handling:
          - First/last PP stages: fetch the full batch (tokens + labels) and
            route through ``get_thd_batch_on_this_cp_rank`` to produce a
            ``PackedSeqParams`` object that carries ``cu_seqlens`` and
            ``max_seqlen`` to the attention kernel.
          - Middle PP stages: only ``cu_seqlens`` and ``max_seqlen`` are
            needed for attention masking; all other fields are returned as
            ``None`` with a ``PackedSeqParams`` built directly here.
          - MTP ranks (``mtp_on_this_rank``) also receive the full batch,
            regardless of pipeline stage.

        Difference from ``pretrain_mamba.py``:
          - Return format: GPT returns a 6-tuple
            ``(tokens, labels, loss_mask, attention_mask, position_ids,
            packed_seq_params)`` where ``packed_seq_params`` is a
            ``PackedSeqParams`` dataclass.  Mamba returns 7 values via
            ``batch.values()`` with ``cu_seqlens`` and ``max_seqlen`` as
            separate dict entries (no ``PackedSeqParams`` wrapper).
          - Middle-stage return: GPT returns ``(None×5, PackedSeqParams)``;
            Mamba returns an ``empty_batch`` dict with ``cu_seqlens`` and
            ``max_seqlen`` set.
          - CP with packed sequences: GPT delegates to
            ``get_thd_batch_on_this_cp_rank`` (MCore utility); Mamba
            implements the ``tex.thd_get_partitioned_indices`` CP slicing
            inline and does not call that helper.
          - MTP: GPT passes ``mtp_on_this_rank`` to ``get_batch_on_this_tp_rank``
            and uses it to gate the early-return; Mamba has no MTP support.
          - ``max_seqlen`` conversion: Mamba converts to a Python int scalar
            before returning (``int(max_seqlen[0].item())``); GPT keeps it as
            a tensor and lets ``get_thd_batch_on_this_cp_rank`` convert it,
            except for the middle-stage ``PackedSeqParams`` where conversion
            is done inline.
    """
    args = get_args()
    config = core_transformer_config_from_args(args)
    # TODO: this is pretty hacky, find a better way
    is_packed_sequence = get_args().sft  # SFT always uses packed sequence
    if not is_first_or_last_pipeline_stage(vp_stage) and not is_packed_sequence and (
    (not mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage))):
        return None, None, None, None, None, None

    # get batches based on the TP rank you are on
    batch = get_batch_on_this_tp_rank(
        data_iterator,
        mtp_on_this_rank=mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage)
        )

    cu_seqlens = batch.pop('cu_seqlens', None)
    cu_seqlens_padded = batch.pop('cu_seqlens_padded', None)
    max_seqlen = batch.pop('max_seqlen', None)
    local_cp_size = batch.pop('local_cp_size', None)
    if local_cp_size is not None:
        local_cp_size = int(local_cp_size.item())

    if cu_seqlens is not None:
        assert (
            cu_seqlens.dim() == 2 and cu_seqlens.shape[0] == 1
        ), "micro-batch-size must be 1 for packing"
        cu_seqlens = cu_seqlens[0]
        assert max_seqlen.dim() == 1

    # For middle pipeline stages with packed sequences, only cu_seqlens and
    # max_seqlen are needed (for attention masking); skip the full batch.
    if not is_first_or_last_pipeline_stage(vp_stage) and is_packed_sequence:
        return None, None, None, None, None, PackedSeqParams(
            cu_seqlens_q=cu_seqlens,
            cu_seqlens_kv=cu_seqlens,
            max_seqlen_q=int(max_seqlen[0].item()),
            max_seqlen_kv=int(max_seqlen[0].item()),
            qkv_format='thd',
        )

    if cu_seqlens is None and local_cp_size is None:
        # slice batch along sequence dimension for context parallelism
        batch = get_batch_on_this_cp_rank(batch)  # The implementation of this function is in MCore
        packed_seq_params = None
    elif local_cp_size is None:  # Packed THD format
        batch, packed_seq_params = get_thd_batch_on_this_cp_rank(batch, cu_seqlens, cu_seqlens_padded, max_seqlen)
    else: # Hybrid CP format
        batch, packed_seq_params = get_batch_on_this_hybrid_cp_rank(batch, local_cp_size)

    return (*batch.values(), packed_seq_params)


# define spiky loss as a loss that's 10x the max loss observed
SPIKY_LOSS_FACTOR = 10


def loss_func(
    loss_mask: torch.Tensor, output_tensor: torch.Tensor, model: Optional[GPTModel] = None
):
    """Loss function.

    Args:
        loss_mask (torch.Tensor): Used to mask out some portions of the loss
        output_tensor (torch.Tensor): The tensor with the losses
        model (GPTModel, optional): The model (can be wrapped)

    Returns:
        the loss scalar for this micro-batch
        the number of non-padded tokens in this microbatch
        a dict containing reporting metrics on the loss and number of tokens across
            the data parallel ranks
    """
    args = get_args()

    if has_nvidia_modelopt and getattr(args, 'modelopt_enabled', False):  # [ModelOpt]
        loss, num_tokens, report = loss_func_modelopt(loss_mask, output_tensor, model=model)
    else:
        losses = output_tensor.view(-1).float()
        loss_mask = loss_mask.view(-1).float()
        loss = torch.sum(losses * loss_mask)

        num_tokens = loss_mask.sum().clone().detach().to(torch.int)
        report = {'lm loss': torch.cat([loss.clone().detach().view(1), num_tokens.view(1)])}

    # Check individual rank losses are not NaN prior to DP all-reduce.
    rerun_state_machine = get_rerun_state_machine()
    if args.check_for_nan_in_loss_and_grad:
        rerun_state_machine.validate_result(
            result=loss,
            rejection_func=torch.isnan,
            message="found NaN in local forward loss calculation",
            tolerance=0.0,  # forward pass calculations are determinisic
            fatal=True,
        )
        rerun_state_machine.validate_result(
            result=loss,
            rejection_func=torch.isinf,
            message="found Inf in local forward loss calculation",
            tolerance=0.0,  # forward pass calculations are determinisic
            fatal=True,
        )
    # Check for spiky loss
    if args.check_for_spiky_loss:
        rerun_state_machine.validate_result(
            result=loss,
            rejection_func=partial(
                rerun_state_machine.is_unexpectedly_large,
                threshold=SPIKY_LOSS_FACTOR,
                context="loss",
            ),
            message="Spiky loss",
            tolerance=0.0,  # forward pass calculations are determinisic
            fatal=False,
        )

    return loss, num_tokens, report


def forward_step(data_iterator, model: GPTModel, return_schedule_plan: bool = False):
    """Forward training step.

    Args:
        data_iterator : Input data iterator
        model (GPTModel): The GPT Model
        return_schedule_plan (bool): Whether to return the schedule plan instead of the output tensor
    """
    args = get_args()
    timers = get_timers()

    # Get the batch.
    timers('batch-generator', log_level=2).start()
    global stimer
    with stimer(bdata=True):
        vp_stage = get_attr_wrapped_model(model, "vp_stage")
        tokens, labels, loss_mask, attention_mask, position_ids, packed_seq_params = get_batch(data_iterator, vp_stage)
    timers('batch-generator').stop()

    with stimer:
        if args.use_legacy_models:
            output_tensor = model(tokens, position_ids, attention_mask, labels=labels)
        else:
            if return_schedule_plan:
                assert args.overlap_moe_expert_parallel_comm, \
                    "overlap_moe_expert_parallel_comm must be enabled to return the schedule plan"
                schedule_plan = model.build_schedule_plan(
                    tokens, position_ids, attention_mask, labels=labels, loss_mask=loss_mask
                )
                return schedule_plan, partial(loss_func, loss_mask, model=model)
            else:
                output_tensor = model(
                    tokens, position_ids, attention_mask, labels=labels, loss_mask=loss_mask, packed_seq_params=packed_seq_params
                )

    # [ModelOpt]: model is needed to access ModelOpt distillation losses
    return output_tensor, partial(loss_func, loss_mask, model=model)


def is_dataset_built_on_rank(vp_stage=None, is_packed_sequence=False):
    args = get_args()
    config = core_transformer_config_from_args(args)
    if parallel_state.get_tensor_model_parallel_rank() != 0:
        return False
    elif is_packed_sequence:
        return True
    return (
        is_first_or_last_pipeline_stage(vp_stage)
        or mtp_on_this_rank(config, ignore_virtual=False, vp_stage=vp_stage)
    )


def core_gpt_dataset_config_from_args(args):
    tokenizer = build_tokenizer(args)

    # Sometimes --data-path is too long, instead we parse it from a file.
    blend: Optional[Tuple[List[str], Optional[List[float]]]]
    blend_per_split: Optional[List[Optional[Tuple[List[str], Optional[List[float]]]]]]
    blend, blend_per_split = get_blend_and_blend_per_split(args)

    sequences_per_dataset = None
    if args.per_dataset_sequences_path is not None:
        with open(args.per_dataset_sequences_path, "r") as f:
            sequences_per_dataset = json.load(f)

    data_args = {
        "random_seed": args.seed,
        "sequence_length": args.seq_length,
        "blend": blend,
        "blend_per_split": blend_per_split,
        "split": args.split,
        "multiple_validation_sets": args.multiple_validation_sets,
        "full_validation": args.full_validation,
        "num_dataset_builder_threads": args.num_dataset_builder_threads,
        "path_to_cache": args.data_cache_path,
        "mmap_bin_files": args.mmap_bin_files,
        "tokenizer": tokenizer,
        "reset_position_ids": args.reset_position_ids,
        "reset_attention_mask": args.reset_attention_mask,
        "eod_mask_loss": args.eod_mask_loss,
        "create_attention_mask": args.create_attention_mask_in_dataloader,
        "object_storage_cache_path": args.object_storage_cache_path,
        "mid_level_dataset_surplus": args.mid_level_dataset_surplus,
        "allow_ambiguous_pad_tokens": args.allow_ambiguous_pad_tokens,
        "fast_cache_load": args.dataloader_fast_cache_load,
        "sequences_per_dataset": sequences_per_dataset,
        "defer_npy_index_mmap": args.dataloader_defer_npy_index_mmap,
        "context_parallel_size": args.context_parallel_size,
        "data_parallel_size": args.data_parallel_size,
        "sequence_parallel_size": args.tensor_model_parallel_size*args.sequence_parallel,
        "hybrid_context_parallel": args.hybrid_context_parallel,
    }

    # add FIM args to the config
    if args.fim_data:
        extra_tokens = {
            "prefix": args.fim_prefix_token,
            "middle": args.fim_middle_token,
            "suffix": args.fim_suffix_token,
            "pad": args.fim_pad_token,
            "eod": args.fim_eod_token,
        }
        data_args.update(
            {
                "fim_rate": args.fim_rate,
                "fim_spm_rate": args.fim_spm_rate,
                "fim_extra_tokens": extra_tokens,
                "fim_split_sample": args.fim_split_sample,
                "fim_fragment_rate": args.fim_fragment_rate,
                "fim_no_prefix": args.fim_no_prefix,
            }
        )
        return GPTFIMDatasetConfig(**data_args)

    return GPTDatasetConfig(**data_args)


def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None):
    """Build the train test and validation datasets.

    Args:
        train_val_test_num_samples : A list containing the number of samples in train test and validation.
    """
    args = get_args()

    config = core_gpt_dataset_config_from_args(args)


    is_packed_sequence = False
    if args.sft:
        dataset_type = SFTDataset
        is_packed_sequence = True  # SFT always uses packed sequence
    else:
        if args.mock_data:
            dataset_type = MockGPTDataset
        elif args.fim_data:
            dataset_type = GPTFIMDataset
        else:
            dataset_type = GPTDataset

    print_rank_0("> building train, validation, and test datasets for GPT ...")

    is_dataset_built = partial(is_dataset_built_on_rank, vp_stage=vp_stage, is_packed_sequence=is_packed_sequence)
    train_ds, valid_ds, test_ds = BlendedMegatronDatasetBuilder(
        dataset_type, train_val_test_num_samples, is_dataset_built, config
    ).build()

    print_rank_0("> finished creating GPT datasets ...")

    return train_ds, valid_ds, test_ds


def get_embedding_ranks(pp_ranks: List[int]):
    """Get the embedding ranks."""
    embedding_ranks = [pp_ranks[0]]
    if len(pp_ranks) > 1:
        args = get_args()
        if not args.untie_embeddings_and_output_weights:
            embedding_ranks.append(pp_ranks[-1])
        config = core_transformer_config_from_args(args)
        mtp_ranks = get_mtp_ranks(pp_ranks, config)
        embedding_ranks.extend(mtp_ranks)
    embedding_ranks = list(set(embedding_ranks))
    embedding_ranks = sorted(embedding_ranks)
    return embedding_ranks


if __name__ == "__main__":
    # Timestamp right after entering __main__ block (after all imports/library setup)
    _MAIN_ENTRY_TIME = time.time()

    # Register startup timestamps for timing report in pretrain()
    set_startup_timestamps(program_start=_PROGRAM_START_TIME, main_entry=_MAIN_ENTRY_TIME)

    # Temporary for transition to core datasets
    train_valid_test_datasets_provider.is_distributed = True

    # Optionally enable inprocess restart on pretrain
    pretrain, store = inprocess_restart.maybe_wrap_for_inprocess_restart(pretrain)

    pretrain(
        train_valid_test_datasets_provider,
        partial(model_provider, gpt_builder),
        ModelType.encoder_or_decoder,
        forward_step,
        args_defaults={'tokenizer_type': 'GPT2BPETokenizer'},
        extra_args_provider=add_modelopt_args if has_nvidia_modelopt else None,
        store=store,
        get_embedding_ranks=get_embedding_ranks,
    )


================================================
FILE: pretrain_mamba.py
================================================
# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
"""Pretrain and SFT Mamba."""

# Capture the true program start time BEFORE any heavy imports.
import time
_PROGRAM_START_TIME = time.time()

import json

# Suppress warnings on all ranks but rank 0.
import os
import warnings
rank = int(os.environ.get('RANK', 0))
if rank != 0:
    warnings.filterwarnings("ignore", category=UserWarning)
    warnings.filterwarnings("ignore", category=FutureWarning)

from functools import partial
from typing import List, Optional, Tuple

import torch

from mamba_builders import mamba_builder
from megatron.core import mpu
from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder
from megatron.core.datasets.gpt_dataset import GPTDataset, GPTDatasetConfig, MockGPTDataset
from megatron.core.enums import ModelType
from megatron.core.packed_seq_params import PackedSeqParams
from megatron.core.parallel_state import (
    get_context_parallel_rank,
    get_context_parallel_world_size,
)
from megatron.core.models.mamba import MambaModel
from megatron.core.rerun_state_machine import get_rerun_state_machine
from megatron.core.tokenizers.utils.build_tokenizer import build_tokenizer
from megatron.core.utils import get_attr_wrapped_model, is_te_min_version, StragglerDetector
from megatron.training import (
    get_args,
    get_timers,
    inprocess_restart,
    pretrain,
    print_rank_0,
    set_startup_timestamps,
)
from megatron.training.datasets.sft_dataset import SFTDataset
from megatron.training.utils import (
    get_batch_on_this_cp_rank,
    get_batch_on_this_tp_rank,
    get_blend_and_blend_per_split,
    is_first_or_last_pipeline_stage,
)
from model_provider import model_provider

try:
    from megatron.post_training.arguments import add_modelopt_args
    from megatron.post_training.loss_func import loss_func as loss_func_modelopt
    has_nvidia_modelopt = True
except ImportError:
    has_nvidia_modelopt = False

try:
    # Register the TE CUDA kernels
    import transformer_engine  # pylint: disable=unused-import

    # Alias the PyTorch wrapper so we can call tex.* APIs
    import transformer_engine_torch as tex
except ImportError:
    # TE isn’t installed or the torch wrapper is missing
    tex = None

stimer = StragglerDetector()


def get_batch(data_iterator, vp_stage=None):
    """Generate a batch."""

    empty_batch = {
        'tokens': None,
        'labels': None,
        'loss_mask': None,
        'attention_mask': None,
        'position_ids': None,
        'cu_seqlens': None,
        'max_seqlen': None,
    }

    # TODO(duncan): Is there a more efficient way to access is_packed_sequence here?
    is_packed_sequence = get_args().sft  # SFT always uses packed sequence
    if not is_first_or_last_pipeline_stage(vp_stage) and not is_packed_sequence:
        return empty_batch.values()

    batch = get_batch_on_this_tp_rank(data_iterator)
    
    cu_seqlens = batch['cu_seqlens']
    # Unused at the moment
    cu_seqlens_padded = batch.pop('cu_seqlens_padded', None)
    # Support for Hybrid Context Parallel (Unused in this script)
    local_cp_size = batch.pop('local_cp_size', None)

    if cu_seqlens is not None:
        assert (
            cu_seqlens.dim() == 2 and cu_seqlens.shape[0] == 1
        ), "micro-batch-size must be 1 for packing"
        cu_seqlens = cu_seqlens[0]
        batch['cu_seqlens'] = cu_seqlens

        max_seqlen = batch['max_seqlen']
        assert max_seqlen.dim() == 1
        # TODO(duncan): can this be kept as a 0-D tensor?
        batch['max_seqlen'] = int(max_seqlen[0].item())

    if mpu.is_pipeline_first_stage(ignore_virtual=(vp_stage is None), vp_stage=vp_stage):
        total_tokens = batch['tokens'].size(1)
    elif mpu.is_pipeline_last_stage(ignore_virtual=(vp_stage is None), vp_stage=vp_stage):
        total_tokens = batch['labels'].size(1)
    else:  # packed sequence
        empty_batch['cu_seqlens'] = cu_seqlens
        empty_batch['max_seqlen'] = max_seqlen
        return empty_batch.values()

    if cu_seqlens is None:
        # slice batch along sequence dimension for context parallelism
        batch = get_batch_on_this_cp_rank(batch)  # The implementation of this function is in MCore
    else:  # Packed THD format
        cp_size = get_context_parallel_world_size()
        if cp_size > 1:  # slice batch along sequence dimension for context parallelism
            assert tex is not None and is_te_min_version("1.10.0"), (
                "Please update Transformer Engine to >= 1.10 to use "
                "Context Parallel with THD format data"
            )
            cp_rank = get_context_parallel_rank()
            index = tex.thd_get_partitioned_indices(
                cu_seqlens,
                total_tokens,
                cp_size,
                cp_rank,
            )
            for key, data in batch.items():
                if key in {'attention_mask', 'cu_seqlens', 'max_seqlen'}:
                    continue
                if data is not None:
                    # On first PP rank, labels and loss_mask can be None.
                    # On last PP rank, tokens and position_ids can be None.
                    batch[key] = data.index_select(1, index)

    return batch.values()


# define spiky loss as a loss that's 10x the max loss observed
SPIKY_LOSS_FACTOR = 10

def loss_func(loss_mask: torch.Tensor, output_tensor: torch.Tensor, model: Optional[MambaModel] = None):
    """Loss function.

    Args:
        loss_mask (torch.Tensor): Used to mask out some portions of the loss
        output_tensor (torch.Tensor): The tensor with the losses

    Returns:
        the loss scalar for this micro-batch
        the number of non-padded tokens in this microbatch
        a dict containing reporting metrics on the loss and number of tokens across
            the data parallel ranks
    """
    args = get_args()
    if has_nvidia_modelopt and getattr(args, 'modelopt_enabled', False):  # [ModelOpt]
        loss, num_tokens, report = loss_func_modelopt(loss_mask, output_tensor, model=model)
    else:
        losses = output_tensor.view(-1).float()
        loss_mask = loss_mask.view(-1).float()
        loss = torch.sum(losses * loss_mask)

        num_tokens = loss_mask.sum().clone().detach().to(torch.int)
        report = {'lm loss': torch.cat([loss.clone().detach().view(1), num_tokens.view(1)])}

    # Check individual rank losses are not NaN prior to DP all-reduce.
    rerun_state_machine = get_rerun_state_machine()
    if args.check_for_nan_in_loss_and_grad:
        rerun_state_machine.validate_result(
            result=loss,
            rejection_func=torch.isnan,
            message="found NaN in local forward loss calculation",
            tolerance=0.0,        # forward pass calculations are deterministic
            fatal=True,
        )
        rerun_state_machine.validate_result(
            result=loss,
            rejection_func=torch.isinf,
            message="found Inf in local forward loss calculation",
            tolerance=0.0,        # forward pass calculations are deterministic
            fatal=True,
        )
    # Check for spiky loss
    if args.check_for_spiky_loss:
        rerun_state_machine.validate_result(
            result=loss,
            rejection_func=partial(
                rerun_state_machine.is_unexpectedly_large,
                threshold=SPIKY_LOSS_FACTOR,
                context="loss",
            ),
            message="Spiky loss",
            tolerance=0.0,        # forward pass calculations are deterministic
            fatal=False,
        )

    return loss, num_tokens, report


def forward_step(data_iterator, model: MambaModel):
    """Forward training step.

    Args:
        data_iterator : Input data iterator
        model (MambaModel): The GPT Model
    """
    timers = get_timers()

    # Get the batch.
    timers('batch-generator', log_level=2).start()

    global stimer

    with stimer(bdata=True):
        vp_stage = get_attr_wrapped_model(model, "vp_stage")
        (
            tokens,
            labels,
            loss_mask,
            attention_mask,
            position_ids,
            cu_seqlens,
            max_seqlen,
        ) = get_batch(data_iterator, vp_stage)

    if cu_seqlens is None:
        packed_seq_params = None
    else:
        total_tokens = tokens.size(1) if tokens is not None else labels.size(1)
        packed_seq_params = PackedSeqParams(
            qkv_format="thd",
            cu_seqlens_q=cu_seqlens,
            cu_seqlens_kv=cu_seqlens,
            cu_seqlens_q_padded=None,
            cu_seqlens_kv_padded=None,
            max_seqlen_q=max_seqlen,
            max_seqlen_kv=max_seqlen,
            total_tokens=total_tokens,
        )

    timers('batch-generator').stop()

    with stimer:
        output_tensor = model(
            tokens,
            position_ids,
            attention_mask,
            labels=labels,
            packed_seq_params=packed_seq_params,
            loss_mask=loss_mask
        )

    # [ModelOpt]: model is needed to access ModelOpt distillation losses
    return output_tensor, partial(loss_func, loss_mask, model=model)


def is_dataset_built_on_rank(vp_stage=None, is_packed_sequence=False):
    if mpu.get_tensor_model_parallel_rank() != 0:
        return False
    elif is_packed_sequence:
        return True
    else:
        return is_first_or_last_pipeline_stage(vp_stage)


def core_gpt_dataset_config_from_args(args):
    tokenizer = build_tokenizer(args)

    # Sometimes --data-path is too long, instead we parse it from a file.
    blend: Optional[Tuple[List[str], Optional[List[float]]]]
    blend_per_split: Optional[List[Optional[Tuple[List[str], Optional[List[float]]]]]]
    blend, blend_per_split = get_blend_and_blend_per_split(args)

    sequences_per_dataset = None
    if args.per_dataset_sequences_path is not None:
        with open(args.per_dataset_sequences_path, "r") as f:
            sequences_per_dataset = json.load(f)

    return GPTDatasetConfig(
        random_seed=args.seed,
        sequence_length=args.seq_length,
        blend=blend,
        blend_per_split=blend_per_split,
        split=args.split,
        num_dataset_builder_threads=args.num_dataset_builder_threads,
        path_to_cache=args.data_cache_path,
        mmap_bin_files=args.mmap_bin_files,
        tokenizer=tokenizer,
        reset_position_ids=args.reset_position_ids,
        reset_attention_mask=args.reset_attention_mask,
        eod_mask_loss=args.eod_mask_loss,
        create_attention_mask=args.create_attention_mask_in_dataloader,
        object_storage_cache_path=args.object_storage_cache_path,
        mid_level_dataset_surplus=args.mid_level_dataset_surplus,
        allow_ambiguous_pad_tokens=args.allow_ambiguous_pad_tokens,
        fast_cache_load=args.dataloader_fast_cache_load,
        sequences_per_dataset=sequences_per_dataset,
        defer_npy_index_mmap=args.dataloader_defer_npy_index_mmap,
        context_parallel_size=args.context_parallel_size,
    )


def train_valid_test_datasets_provider(train_val_test_num_samples, vp_stage=None):
    """Build the train test and validation datasets.

    Args:
        train_val_test_num_samples : A list containing the number of samples in train test and validation.
    """
    args = get_args()
    config = core_gpt_dataset_config_from_args(args)

    is_packed_sequence = False
    if args.sft:
        dataset_type = SFTDataset
        is_packed_sequence = True  # SFT always uses packed sequence
    else:
        if args.mock_data:
            dataset_type = MockGPTDataset
        else:
            dataset_type = GPTDataset

    print_rank_0("> building train, validation, and test datasets for GPT ...")

    train_ds, valid_ds, test_ds = BlendedMegatronDatasetBuilder(
        dataset_type,
        train_val_test_num_samples,
        partial(is_dataset_built_on_rank, vp_stage=vp_stage, is_packed_sequence=is_packed_sequence),
        config
    ).build()

    print_rank_0("> finished creating GPT datasets ...")

    return train_ds, valid_ds, test_ds


if __name__ == "__main__":
    # Timestamp right after entering __main__ block (after all imports/library setup)
    _MAIN_ENTRY_TIME = time.time()

    # Register startup timestamps for timing report in pretrain()
    set_startup_timestamps(program_start=_PROGRAM_START_TIME, main_entry=_MAIN_ENTRY_TIME)

    # Temporary for transition to core datasets
    train_valid_test_datasets_provider.is_distributed = True

    # Optionally enable inprocess restart on pretrain
    pretrain, store = inprocess_restart.maybe_wrap_for_inprocess_restart(pretrain)

    pretrain(train_valid_test_datasets_provider,
             partial(model_provider, mamba_builder),
             ModelType.encoder_or_decoder,
             forward_step,
             args_defaults={'tokenizer_type': 'GPT2BPETokenizer'},
             store=store,
             extra_args_provider=add_modelopt_args if has_nvidia_modelopt else None,
             )


================================================
FILE: pretrain_t5.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.

"""Pretrain T5"""

from copy import deepcopy
from functools import partial
from typing import Union

import torch

import megatron
from megatron.core import mpu, tensor_parallel
from megatron.core.tokenizers.utils.build_tokenizer import build_tokenizer
from megatron.core.datasets.blended_megatron_dataset_builder import BlendedMegatronDatasetBuilder
from megatron.core.datasets.t5_dataset import (
    T5MaskedWordPieceDataset,
    T5MaskedWordPieceDatasetConfig,
)
from megatron.core.datasets.utils import get_blend_from_list
from megatron.core.enums import ModelType
from megatron.core.models.T5 import T5Model
from megatron.core.models.T5.t5_spec import (
    get_t5_decoder_with_local_block_spec,
    get_t5_decoder_with_transformer_engine_block_spec,
    get_t5_encoder_with_local_block_spec,
    get_t5_encoder_with_transformer_engine_block_spec,
)
from megatron.training import get_args, get_timers, pretrain, print_rank_0
from megatron.training.arguments import core_transformer_config_from_args
from pretrain_gpt import loss_func

"""
Pipeline parallelism for T5

T5 is a model architecture with both encoder and decoder blocks.
Consequently, pipeline parallelism is implemented slightly differently
compared to architectures like GPT and BERT.

In particular, when pipeline_model_parallel_world_size > 1, each stage
either executes an encoder block or a decoder block. The
--pipeline-model-parallel-split-rank argument controls the rank at which
the split happens: all ranks lower than this argument execute the
encoder block, and all ranks equal to or higher than this argument value
execute the decoder block.

In the encoder section of the model, only one tensor is sent downstream:
the intermediate encoder_hidden_state. In the decoder section of the
model, two tensors are sent downstream in the forward pass: the fully
computed encoder_hidden_state, and the intermediate decoder_hidden_state.

In particular, these are the shapes of the tensors sent between
different workers:
    If rank is in decoder section:
        intermediate decoder_hidden_state (pre-transpose),
        complete encoder_hidden_state (post-transpose).
    If rank is at boundary between encoder and decoder sections:
        complete encoder_hidden_state (post-transpose).
    If rank is in encoder section:
        intermediate encoder_hidden_state (pre-transpose).

Additionally, we have code in the backward_step function in schedules.py
to accumulate the encoder_hidden_state gradient across skip connections
(encoder_hidden_state fed in as input to each layer in the decoder).
"""


def model_provider(
    pre_process=True,
    post_process=True,
    add_encoder=True,
    add_decoder=True,
    config=None,
    pg_collection=None,
) -> Union[megatron.legacy.model.T5Model, T5Model]:
    """Builds the model.

    Args:
        pre_process (bool, optional): Set to true if you need to
            compute embedings. Defaults to True.
        post_process (bool, optional): Set to true if you need to want to
            compute output logits/loss. Defaults to True.
        add_encoder (bool, optional): Defaults to True
        add_decoder (bool, optional): Defaults to True
    Returns:
        T5Model: The returned T5 model
    """

    args = get_args()
    
    if config is None:
        config = core_transformer_config_from_args(args)
    if args.use_legacy_models:
        model = megatron.legacy.model.T5Model(
            config=config,
            num_tokentypes=0,
            parallel_output=True,
            pre_process=pre_process,
            post_process=post_process,
            add_encoder=add_encoder,
            add_decoder=add_decoder,
        )
    else:
        encoder_config = deepcopy(config)
        encoder_config.num_layers = args.encoder_num_layers

        if args.pipeline_model_parallel_size > 1:
            raise ValueError("Pipeline parallelism is not supported for T5.")

        encoder_layers_per_pipeline = (
            encoder_config.num_layers // encoder_config.pipeline_model_parallel_size
        )
        decoder_layers_per_pipeline = config.num_layers // config.pipeline_model_parallel_size

        if args.transformer_impl == "local":
            en_block_spec = get_t5_encoder_with_local_block_spec(encoder_layers_per_pipeline)
            de_block_spec = get_t5_decoder_with_local_block_spec(decoder_layers_per_pipeline)
        elif args.transformer_impl == "transformer_engine":
            en_block_spec = get_t5_encoder_with_transformer_engine_block_spec(
                encoder_layers_per_pipeline
            )
            de_block_spec = get_t5_decoder_with_transformer_engine_block_spec(
                decoder_layers_per_pipeline
            )

        print_rank_0('building T5 model ...')
        model = T5Model(
            config=config,
            encoder_config=encoder_config,
            transformer_encoder_layer_spec=en_block_spec,
            transformer_decoder_layer_spec=de_block_spec,
            vocab_size=args.padded_vocab_size,
            max_sequence_length=args.max_position_embeddings,
            pre_process=pre_process,
            post_process=post_process,
            fp16_lm_cross_entropy=args.fp16_lm_cross_entropy,
            parallel_output=True,
            share_embeddings_and_output_weights=not args.untie_embeddings_and_output_weights,
            position_embedding_type=args.position_embedding_type,
            rotary_percent=args.rotary_percent,
            relative_attention_num_buckets=args.relative_attention_num_buckets,
            relative_attention_max_distance=args.relative_attention_max_distance,
            add_encoder=add_encoder,
            add_decoder=add_decoder,
        )

    return model


def get_batch(data_iterator, use_local):
    """Build the batch."""

    keys = ['text_enc', 'text_dec', 'labels', 'loss_mask', 'enc_mask', 'dec_mask']
    datatype = torch.int64

    # Broadcast data.
    if data_iterator is not None:
        data = next(data_iterator)
    else:
        data = None
    data_b = tensor_parallel.broadcast_data(keys, data, datatype)

    # Unpack.
    tokens_enc = data_b['text_enc'].long()
    tokens_dec = data_b['text_dec'].long()
    labels = data_b['labels'].long()
    loss_mask = data_b['loss_mask'].float()
    enc_mask = data_b['enc_mask'] < 0.5
    dec_mask = data_b['dec_mask'] < 0.5

    # Configure attention mask based on different conditions
    # (e.g., transformer-impl, TE versions, TE backends)
    enc_mask, dec_mask, enc_dec_mask = T5MaskedWordPieceDataset.config_attention_mask(
        tokens_enc, tokens_dec, enc_mask, dec_mask, use_local
    )

    return tokens_enc, tokens_dec, loss_mask, labels, enc_mask, dec_mask, enc_dec_mask


def forward_step(data_iterator, model: T5Model):
    """Forward training step.

    Args:
        data_iterator : Input data iterator
        model (T5Model): The T5 Model
    """

    args = get_args()
    timers = get_timers()

    # Get the batch.
    timers('batch generator', log_level=2).start()
    use_local = args.transformer_impl == "local"
    tokens_enc, tokens_dec, loss_mask, lm_labels, enc_mask, dec_mask, enc_dec_mask = get_batch(
        data_iterator, use_local
    )
    timers('batch generator').stop()

    # Forward model lm_labels
    output_tensor = model(
        tokens_enc, tokens_dec, enc_mask, dec_mask, enc_dec_mask, lm_labels=lm_labels
    )

    return output_tensor, partial(loss_func, loss_mask)


def train_valid_test_datasets_provider(train_val_test_num_samples: int):
    """Build the train test and validation datasets.

    Args:
        train_val_test_num_samples : A list containing the number of samples
            in train test and validation.
    """
    args = get_args()

    tokenizer = build_tokenizer(args)

    config = T5MaskedWordPieceDatasetConfig(
        random_seed=args.seed,
        sequence_length=args.encoder_seq_length,
        sequence_length_decoder=args.decoder_seq_length,
        blend=get_blend_from_list(args.data_path),
        blend_per_split=[
            get_blend_from_list(args.train_data_path),
            get_blend_from_list(args.valid_data_path),
            get_blend_from_list(args.test_data_path),
        ],
        split=args.split,
        path_to_cache=args.data_cache_path,
        tokenizer=tokenizer,
        masking_probability=args.mask_prob,
        short_sequence_probability=args.short_seq_prob,
        masking_max_ngram=10,
        masking_do_full_word=True,
        masking_do_permutation=False,
        masking_use_longer_ngrams=False,
        masking_use_geometric_distribution=True,
        mid_level_dataset_surplus=args.mid_level_dataset_surplus,
        allow_ambiguous_pad_tokens=args.allow_ambiguous_pad_tokens,
    )

    print_rank_0('> building train, validation, and test datasets for T5 ...')

    train_ds, valid_ds, test_ds = BlendedMegatronDatasetBuilder(
        T5MaskedWordPieceDataset,
        train_val_test_num_samples,
        lambda: mpu.get_tensor_model_parallel_rank() == 0,
        config,
    ).build()

    print_rank_0("> finished creating T5 datasets ...")

    return train_ds, valid_ds, test_ds


def t5_embedding_ranks(pp_ranks):
    """T5's embedding ranks consist of the first and last ranks of the pipeline.
    Args:
        pp_ranks: A list of global ranks that constitute a pipeline group.
    """
    first_rank = pp_ranks[0]
    last_rank = pp_ranks[-1]

    if len(pp_ranks) == 1:
        return [first_rank]
    else:
        return [first_rank, last_rank]


def t5_position_embedding_ranks(pp_ranks):
    """T5's positional embeddings are on the first rank stage
    Args:
        pp_ranks: A list of global ranks that constitute a pipeline group.
    """
    return [pp_ranks[0]]


if __name__ == "__main__":

    # Temporary for transition to core datasets
    train_valid_test_datasets_provider.is_distributed = True

    pretrain(
        train_valid_test_datasets_provider,
        model_provider,
        ModelType.encoder_or_decoder,
        forward_step,
        args_defaults={'tokenizer_type': 'BertWordPieceLowerCase'},
        get_embedding_ranks=t5_embedding_ranks,
        get_position_embedding_ranks=t5_position_embedding_ranks,
    )


================================================
FILE: pretrain_vlm.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
"""Pretrain vision language model."""
import warnings
from copy import deepcopy
from functools import partial

import torch

from megatron.core import mpu, parallel_state, tensor_parallel
from megatron.core.datasets.blended_megatron_dataset_builder import (
    BlendedMegatronDatasetBuilder,
)
from megatron.core.datasets.multimodal_dataset import (
    MockMultimodalDataset,
    MultimodalDatasetConfig,
)
from megatron.core.enums import ModelType
from megatron.core.models.multimodal import context_parallel
from megatron.core.models.multimodal.llava_model import (
    DEFAULT_IMAGE_TOKEN_INDEX,
    LLaVAModel,
)
from megatron.core.models.multimodal.llava_spec import (
    decoder_model_with_local_default_spec,
    decoder_model_with_transformer_engine_default_spec,
)
from megatron.core.models.vision.clip_vit_model import get_num_image_embeddings
from megatron.core.models.vision.vit_layer_specs import (
    get_vit_layer_with_local_spec,
    get_vit_layer_with_transformer_engine_spec,
)
from megatron.core.transformer.enums import AttnMaskType
from megatron.core.transformer.spec_utils import import_module
from megatron.training import (
    get_args,
    get_timers,
    get_tokenizer,
    pretrain,
    print_rank_0,
)
from megatron.training.arguments import core_transformer_config_from_args
from pretrain_gpt import loss_func


def model_provider(
    pre_process=True,
    post_process=True,
    add_encoder=True,
    add_decoder=True,
    parallel_output=True,
    config=None,
    pg_collection=None,
) -> LLaVAModel:
    """Builds the model.

    Note: currently, only LLaVA model is supported. Follow-up changes will make this configurable.

    Args:
        pre_process (bool): Include the embedding layer in the gpt decoder (used with pipeline parallelism). Defaults to True.
        post_process (bool): Include an output layer and a layernorm in the gpt decoder (used with pipeline parallelism). Defaults to True.
        add_encoder (bool): Construct the encoder module (used with pipeline parallelism). Defaults to True. When we use pipelining, the encoder
            will live on only a subset of the pipeline stages (specifically, only the first stage).
        add_decoder (bool): Construct the decoder module (used with pipeline parallelism). Defaults to True. When we use pipelining, the decoder
            will live on only a subset of the pipeline stages (specifically, every stage after the first one).
        parallel_output (bool): Enable model parallel output.

    Returns:
        model (megatron.core.models.multimodal.llava_model.LLaVAModel): A multimodal model
    """
    args = get_args()
    vision_model_type = "clip"

    assert args.ckpt_format == 'torch', "Only ckpt-format torch is supported for VLM training currently."
    assert not (args.context_parallel_size > 1 and args.pipeline_model_parallel_size > 1), "PP+CP is not yet supported by this script. \
    Current mock dataset does not support natively packed sequence dataset required for correct PP comm shapes."

    num_image_embeddings = get_num_image_embeddings(
        args.img_h, args.img_w, args.patch_dim, vision_model_type, args.disable_vision_class_token,
        class_token_len=1, pixel_shuffle=False, use_tile_tags=False
    )

    old_seq_length = args.seq_length
    # dataloader-seq-length is required to determine the length of text seq len
    if args.dataloader_seq_length is None:
        args.dataloader_seq_length = args.seq_length

    # decoder_seq_len denotes the language model sequence length.
    decoder_seq_len = args.dataloader_seq_length + num_image_embeddings

    # seq_length and encoder_seq_length denote the vision model sequence length. Override if the user provided something else.
    args.seq_length = args.encoder_seq_length = num_image_embeddings
    if torch.distributed.get_rank() == 0 and old_seq_length != args.seq_length:
        warnings.warn(
            f"Changed seq_length and encoder_seq_length (vision model sequence length) from {old_seq_length} to num_image_tokens ({num_image_embeddings})"
        )
    mp_padding_needed = context_parallel.get_padding(
        decoder_seq_len,
        args.context_parallel_size,
        args.tensor_model_parallel_size,
        args.sequence_parallel,
        args.decoder_tp_comm_overlap,
        args.decoder_seq_length
    )
    args.decoder_seq_length = decoder_seq_len + mp_padding_needed

    args.max_position_embeddings = max(args.max_position_embeddings, args.decoder_seq_length)

    print_rank_0('building a multimodal model ...')
    if config is None:
        language_transformer_config = core_transformer_config_from_args(get_args())
    else:
        language_transformer_config = config
    if args.decoder_num_layers is not None:
        language_transformer_config.num_layers = args.decoder_num_layers
    else:
        language_transformer_config.num_layers = args.num_layers
    if args.decoder_tp_comm_overlap:
        assert args.transformer_impl == "transformer_engine", \
            "TransformerEngine is needed to support Decoder TP Comm overlap"
        language_transformer_config.tp_comm_overlap = args.decoder_tp_comm_overlap

    if args.spec is not None:
        language_transformer_layer_spec = import_module(args.spec)
    elif args.transformer_impl == "transformer_engine":
        language_transformer_layer_spec = decoder_model_with_transformer_engine_default_spec(
            args.num_experts, args.moe_grouped_gemm
        )
    else:  # transformer_impl == "local"
        language_transformer_layer_spec = decoder_model_with_local_default_spec(
            args.num_experts, args.moe_grouped_gemm
        )

    # Prepare mask type for any required padding to support CP/SP sequence sharding.
    if mp_padding_needed > 0:
        if language_transformer_layer_spec.submodules.self_attention.params.get('attn_mask_type', '') == AttnMaskType.causal:
            language_transformer_layer_spec.submodules.self_attention.params['attn_mask_type'] = AttnMaskType.padding_causal
        elif language_transformer_layer_spec.submodules.self_attention.params.get('attn_mask_type', '') == AttnMaskType.no_mask:
            language_transformer_layer_spec.submodules.self_attention.params['attn_mask_type'] = AttnMaskType.padding

    if args.transformer_impl == "transformer_engine":
        vision_transformer_layer_spec = get_vit_layer_with_transformer_engine_spec()
    else:  # transformer_impl == "local"
        vision_transformer_layer_spec = get_vit_layer_with_local_spec()

    # TODO: Make these configurable via input .yaml config.
    vision_transformer_config = deepcopy(language_transformer_config)
    vision_transformer_config.num_layers = args.encoder_num_layers
    vision_transformer_config.first_pipeline_num_layers = None
    vision_transformer_config.last_pipeline_num_layers = None
    vision_transformer_config.vision_model_type = vision_model_type
    vision_transformer_config.context_parallel_size = 1 # Force CP=1 for Vision Transformer
    if vision_transformer_config.sequence_parallel:
        print_rank_0("> Disabling Sequence parallelism in Vision Transformer. Not yet supported")
        vision_transformer_config.sequence_parallel = False
    if vision_transformer_config.tp_comm_overlap:
        print_rank_0("> Disabling TP Comm overlap in Vision Transformer. Not yet supported")
        vision_transformer_config.tp_comm_overlap = False

    vision_projection_type = "mlp"
    vision_projection_config = deepcopy(language_transformer_config)
    vision_projection_config.context_parallel_size = 1 # Force CP=1 for Vision Projection
    if vision_projection_config.sequence_parallel:
        print_rank_0("> Disabling Sequence parallelism in Vision Projection. Not yet supported")
        vision_projection_config.sequence_parallel = False
    if vision_projection_config.tp_comm_overlap:
        print_rank_0("> Disabling TP Comm overlap in Vision Projection. Not yet supported")
        vision_projection_config.tp_comm_overlap = False

    # Vision Encoder and Projection should live on PP rank0
    vision_transformer_config.pipeline_model_parallel_size = 1
    vision_projection_config.pipeline_model_parallel_size = 1

    vision_projection_modules = deepcopy(language_transformer_layer_spec.submodules.mlp.submodules)

    language_max_sequence_length = args.decoder_seq_length
    if args.context_parallel_size > 1:
        if args.use_packed_sequence or mp_padding_needed > 0:
            # Use THD data format
            language_max_sequence_length = args.decoder_seq_length * args.micro_batch_size
    model = LLaVAModel(
        language_transformer_config=language_transformer_config,
        language_transformer_layer_spec=language_transformer_layer_spec,
        language_vocab_size=args.padded_vocab_size,
        language_max_sequence_length=language_max_sequence_length,
        vision_transformer_config=vision_transformer_config,
        vision_transformer_layer_spec=vision_transformer_layer_spec,
        drop_vision_class_token=args.disable_vision_class_token,
        vision_projection_config=vision_projection_config,
        vision_projection_layer_spec=vision_projection_modules,
        vision_projection_type=vision_projection_type,
        parallel_output=parallel_output,
        language_position_embedding_type=args.position_embedding_type,
        language_rotary_percent=args.rotary_percent,
        language_rope_scaling=args.use_rope_scaling,
        pre_process=parallel_state.is_pipeline_first_stage(),
        post_process=parallel_state.is_pipeline_last_stage(),
        add_encoder=parallel_state.is_pipeline_first_stage(),
        add_decoder=True,
        img_h=args.img_h,
        img_w=args.img_w,
        patch_dim=args.patch_dim,
    )

    model.freeze(
        freeze_language_model=args.freeze_LM,
        freeze_vision_model=args.freeze_ViT,
        freeze_vision_projection=False,
    )

    return model


def train_valid_test_datasets_provider(train_val_test_num_samples):
    """Build the train test and validation datasets.

    Args:
        train_val_test_num_samples : A list containing the number of samples in train, validation, and test sets.

    Returns:
        train_ds, val_ds, test_ds (megatron.core.datasets.multimodal_dataset.MockMultimodalDataset): Train, validation, and test datasets, respectively.
    """
    args = get_args()

    config = MultimodalDatasetConfig(
        random_seed=args.seed,
        split=args.split,
        sequence_length=args.dataloader_seq_length,
        tokenizer=get_tokenizer(),
        reset_position_ids=args.reset_position_ids,
        reset_attention_mask=args.reset_attention_mask,
        eod_mask_loss=args.eod_mask_loss,
        image_h=args.img_h,
        image_w=args.img_w,
        preprocess_func=_preprocess_data_for_llava,
        mid_level_dataset_surplus=args.mid_level_dataset_surplus,
        allow_ambiguous_pad_tokens=args.allow_ambiguous_pad_tokens,
    )

    print_rank_0("> building train, validation, and test datasets for multimodal ...")

    train_ds, valid_ds, test_ds = BlendedMegatronDatasetBuilder(
        MockMultimodalDataset,
        train_val_test_num_samples,
        lambda: parallel_state.get_tensor_model_parallel_rank() == 0,
        config,
    ).build()

    print_rank_0("> finished creating multimodal datasets ...")

    return train_ds, valid_ds, test_ds


def _preprocess_data_for_llava(data):
    """Preprocess data sample to the format expected by a LLaVA model.

    Note: This doesn't support all the different modes in the official LLaVA repo yet.

    Args:
        data (dict): Data sample with keys like 'image', 'tokens', etc.

    Returns:
        data (dict): Processed data sample suitable for the model.
    """
    # Prepend image token index to tokens.
    data["tokens"] = torch.cat(
        [
            DEFAULT_IMAGE_TOKEN_INDEX
            * torch.ones(1, dtype=data["tokens"].dtype, device=data["tokens"].device),
            data["tokens"],
        ]
    )
    # Prepend labels accordingly.
    data["labels"] = torch.cat([data["tokens"][1].unsqueeze(0), data["labels"]])
    # Zero loss mask for the image token index.
    data["loss_mask"] = torch.cat(
        [
            torch.zeros(1, dtype=data["loss_mask"].dtype, device=data["loss_mask"].device),
            data["loss_mask"],
        ]
    )
    # Add one more position id.
    data["position_ids"] = torch.cat(
        [data["position_ids"], data["position_ids"][-1].unsqueeze(0) + 1]
    )

    return data


def get_batch(data_iterator):
    """Generate a batch.

    Args:
        data_iterator: Iterable dataset.

    Returns:
        sample: A data sample with images, tokens, etc.
    """
    args = get_args()
    cp_size = args.context_parallel_size
    # Broadcast data.
    if data_iterator is not None:
        data = next(data_iterator)
    else:
        data = None

    data_i = tensor_parallel.broadcast_data(["tokens", "position_ids", "labels"], data, torch.int64)
    data_f = tensor_parallel.broadcast_data(["image", "loss_mask"], data, torch.float32)

    batch = dict()
    packed_seq_params = None
    image_token_mask = None
    # Create batch with tokens and position_ids for CP sharding.
    tokens = data_i["tokens"].long()
    position_ids = data_i["position_ids"].long()
    labels = data_i["labels"].long()
    loss_mask = data_f["loss_mask"].float()
    images = data_f["image"].float()

    if cp_size > 1 or args.sequence_parallel:
        vision_model_type = "clip"
        # Calculate the number of image embedding tokens will be added to text tokens
        num_image_embeddings_per_tile = get_num_image_embeddings(
            args.img_h, args.img_w, args.patch_dim, vision_model_type,
            args.disable_vision_class_token, 1, False
        )
        # Pad to make sure the text sequence can be sharded equally by CP chunks.
        image_token_mask = tokens == DEFAULT_IMAGE_TOKEN_INDEX
        num_images_per_sample = torch.sum(image_token_mask, dim=-1)
        img_seq_len = (num_image_embeddings_per_tile * num_images_per_sample - num_images_per_sample).max()
        mp_padding_needed_for_text = context_parallel.get_padding(
            tokens.shape[1] + img_seq_len,
            args.context_parallel_size,
            args.tensor_model_parallel_size,
            args.sequence_parallel,
            args.decoder_tp_comm_overlap,
            args.decoder_seq_length
        )
        if mp_padding_needed_for_text > 0:
            tokens, position_ids, labels, loss_mask = [torch.nn.functional.pad(item, (0, mp_padding_needed_for_text)) for item in (tokens, position_ids, labels, loss_mask)]
        packed_seq_params = context_parallel.get_packed_seq_params(tokens, img_seq_len, mp_padding_needed_for_text, cp_size, args.use_packed_sequence)

        if packed_seq_params.qkv_format == 'thd':
            # Reshape from [B,S] to [T,1]
            tokens = (
                tokens.contiguous()
                .view(tokens.shape[0] * tokens.shape[1])
                .unsqueeze(0)
            )
            position_ids = (
                position_ids.contiguous()
                .view(position_ids.shape[0] * position_ids.shape[1])
                .unsqueeze(0)
            )
            labels = labels.view(labels.shape[0] * labels.shape[1]).unsqueeze(0)
            loss_mask = loss_mask.view(
                loss_mask.shape[0] * loss_mask.shape[1]
            ).unsqueeze(0)

    attention_mask = None  # Use the attention mask type defined in layer spec. Typically no mask for the vision model and causal mask for the vision model.

    return tokens, position_ids, labels, images, loss_mask, attention_mask, packed_seq_params


def forward_step(data_iterator, model: LLaVAModel):
    """Forward training step.

    Args:
        data_iterator: Iterable dataset.
        model (megatron.core.models.multimodal.llava_model.LLaVAModel): Multimodal model

    Returns:
        output_tensor (torch.Tensor): Loss of shape [b, s] if labels are provided, otherwise logits of shape [b, s, vocab_size].
        loss_func (callable): Loss function with a loss mask specified.
    """
    timers = get_timers()

    # Get the batch.
    timers('batch-generator', log_level=2).start()
    tokens, position_ids, labels, images, loss_mask, attention_mask, packed_seq_params = get_batch(data_iterator)
    timers('batch-generator').stop()

    output_tensor, loss_mask = model(
        images, tokens, position_ids, attention_mask, labels, loss_mask, packed_seq_params=packed_seq_params
    )

    return output_tensor, partial(loss_func, loss_mask)


def add_vlm_extra_args(parser):
    """Extra arguments."""
    group = parser.add_argument_group(title='vision language model specific arguments')
    group.add_argument(
        '--freeze-LM', action='store_true', default=False, help="Freeze language model weights"
    )
    group.add_argument(
        '--freeze-ViT', action='store_true', default=False, help="Freeze vision model (ViT) weights"
    )
    group.add_argument(
        "--disable-vision-class-token",
        action="store_true",
        default=False,
        help="Drop vision model class token",
    )
    group.add_argument("--dataloader-seq-length", type=int, help="Make dataloader to produce sequences of specific length.")
    group.add_argument("--decoder-tp-comm-overlap", action="store_true", default=False, help="Enables the overlap of "
                        "Tensor parallel communication and GEMM kernels in Decoder only. "
                        "Please provide decoder-seq-length when using this feature.")
    group.add_argument(
        "--use-packed-sequence",
        action="store_true",
        default=False,
        help="Use packed sequence",
    )
    return parser


def llava_embedding_ranks(pp_ranks):
    """LLaVA's embedding ranks consist of the first and last ranks of the pipeline.
    Args:
        pp_ranks: A list of global ranks that constitute a pipeline group.
    """
    first_rank = pp_ranks[0]
    last_rank = pp_ranks[-1]

    if len(pp_ranks) == 1:
        return [first_rank]
    else:
        return [first_rank, last_rank]


def llava_position_embedding_ranks(pp_ranks):
    """LLaVA's positional embeddings are on the first rank stage
    Args:
        pp_ranks: A list of global ranks that constitute a pipeline group.
    """
    return [pp_ranks[0]]


if __name__ == "__main__":
    train_valid_test_datasets_provider.is_distributed = True

    pretrain(
        train_valid_test_datasets_provider,
        model_provider,
        ModelType.encoder_or_decoder,
        forward_step,
        args_defaults={'tokenizer_type': 'GPT2BPETokenizer'},
        extra_args_provider=add_vlm_extra_args,
        get_embedding_ranks=llava_embedding_ranks,
        get_position_embedding_ranks=llava_position_embedding_ranks,
    )


================================================
FILE: pyproject.toml
================================================
# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.

[build-system]
requires = ["setuptools<80.0.0", "pybind11", "packaging>=24.2"]
build-backend = "setuptools.build_meta"

[tool.setuptools]
include-package-data = true

[tool.setuptools.packages.find]
include = [
    "megatron.core",
    "megatron.core.*",
    "megatron.training",
    "megatron.training.*",
]

[tool.setuptools.dynamic]
version = { attr = "megatron.core.package_info.__version__" }
readme = { file = "README.md", content-type = "text/markdown" }

[project]
name = "megatron-core"
dynamic = ["version", "readme"]
description = "Megatron Core - a library for efficient and scalable training of transformer based models"
requires-python = ">=3.10"
license = { text = "Apache 2.0" }
dependencies = ["torch>=2.6.0", "numpy", "packaging>=24.2"]
authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
maintainers = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
keywords = [
    "NLP",
    "NLU",
    "deep",
    "gpu",
    "language",
    "learning",
    "machine",
    "nvidia",
    "pytorch",
    "torch",
    "transformer",
]
classifiers = [
    "Development Status :: 5 - Production/Stable",
    "Environment :: Console",
    "Intended Audience :: Developers",
    "Intended Audience :: Information Technology",
    "Intended Audience :: Science/Research",
    "License :: OSI Approved :: BSD License",
    "Natural Language :: English",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
    "Topic :: Scientific/Engineering :: Image Recognition",
    "Topic :: Scientific/Engineering :: Mathematics",
    "Topic :: Scientific/Engineering",
    "Topic :: Software Development :: Libraries :: Python Modules",
    "Topic :: Software Development :: Libraries",
    "Topic :: Utilities",
]

[project.urls]
Download = "https://github.com/NVIDIA/Megatron-LM/releases"
Homepage = "https://github.com/NVIDIA/Megatron-LM"

[project.optional-dependencies]
training = [
    "flask-restful",
    "sentencepiece",
    "tiktoken",
    "wandb",
    "transformers",
    "accelerate",
]

### 'mlm' group is deprecated. please use 'training' instead ###
mlm = [
    "flask-restful",
    "sentencepiece",
    "tiktoken",
    "wandb",
    "transformers",
    "accelerate",
]

dev = [
    "nvidia-modelopt[torch]; sys_platform != 'darwin'",
    "transformer-engine[pytorch,core_cu13]",
    "nvidia-resiliency-ext",
    "tqdm",
    "einops~=0.8",
    "tensorstore~=0.1,!=0.1.46,!=0.1.72",
    "nvtx~=0.2",
    "multi-storage-client~=0.27",
    "opentelemetry-api~=1.33.1",
    "mamba-ssm~=2.2",
    "causal-conv1d~=1.5",
    "flash-linear-attention~=0.4.0",
    "megatron-energon[av_decode]~=6.0",
    "av",
    "flashinfer-python~=0.5.0",
    "wget",
    "onnxscript",
    "fastapi~=0.50",                                    # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0
    "datasets",
    "emerging_optimizers",
    "hypercorn",
    "quart",
    "openai[aiohttp]",
    "orjson",
]

lts = [
    "tqdm",
    "einops~=0.8",
    "tensorstore~=0.1,!=0.1.46,!=0.1.72",
    "nvtx~=0.2",
    "multi-storage-client~=0.27",
    "opentelemetry-api~=1.33.1",
    "mamba-ssm~=2.2",
    "causal-conv1d~=1.5",
    "megatron-energon[av_decode]~=6.0",
    "av",
    "flashinfer-python~=0.5.0",
    "wget",
    "onnxscript",
    "fastapi~=0.50",                      # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0
    "datasets",
    "emerging_optimizers",
]

[dependency-groups]
test = [
    "coverage",
    "nltk",
    "wrapt",
    "pytest==8.3.5",
    "pytest-mock",
    "mock",
    "pytest-cov",
    "pytest-random-order",
    "pytest-asyncio",
    "pygithub",
    "pydantic",
    "tensorboard",
    "pyyaml",
    "nemo-run",
]
docs = [
    "sphinx",
    "sphinx-autobuild",    # For live doc serving while editing docs
    "sphinx-autodoc2",     # For documenting Python API
    "sphinx-copybutton",   # Adds a copy button for code blocks
    "myst_parser",         # For our markdown docs
    "nvidia-sphinx-theme", # Our NVIDIA theme
]
build = [
    "setuptools<80.0.0,>=77.0.0",
    "packaging>=24.2",
    "hatchling",
    "pybind11",
    "Cython>=3.0.0",
    "torch",
    "nvidia-mathdx",              # for TE
]
linting = [
    "ruff~=0.9.0",
    "black==24.4.2",
    "isort==5.13.2",
    "flake8==7.1.0",
    "pylint==3.2.6",
]
ci = ["python-gitlab", "slack-sdk", "pandas"]
no_pypi_wheels = ["flash_mla", "emerging_optimizers"]

[tool.uv]
default-groups = ["linting", "build", "test"]
no-build-isolation-package = [
    "causal-conv1d",
    "flash_mla",
    "mamba-ssm",
    "transformer-engine",
    "transformer-engine-torch",
]
link-mode = "copy"
conflicts = [[{ extra = "lts" }, { extra = "dev" }]]
# We don't define override-dependencies globally but rather locally where we need it.
# For instance, when installing into a PyTorch base image, we don't want to install torch, 
# torchvision, and triton.
override-dependencies = [
    "torch; sys_platform == 'never'",
    "torchvision; sys_platform == 'never'",
    "triton; sys_platform == 'never'",
]

[tool.uv.sources]

flash_mla = [
    { git = "https://github.com/deepseek-ai/FlashMLA", rev = "9edee0c022cd0938148a18e334203b0aab43aa19" },
]
transformer-engine = { git = "https://github.com/NVIDIA/TransformerEngine.git", rev = "287770466f0f4433052260a765db5ff7b8be1320" }
nemo-run = { git = "https://github.com/NVIDIA-NeMo/Run.git", rev = "01a9a8ba360f7b2908728ad0516e0ad9d936966d" }
emerging_optimizers = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git", rev = "v0.1.0" }
nvidia-resiliency-ext = { git = "https://github.com/NVIDIA/nvidia-resiliency-ext.git", rev = "v0.5.0" }

[tool.isort]
profile = "black"                                                          # black-compatible
line_length = 100                                                          # should match black parameters
py_version = 310                                                           # python 3.10 as a target version
known_first_party = ["megatron"]                                           # FIRSTPARTY section
known_third_party = ["transformer_engine"]                                 # THIRDPARTY section
sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
default_section = "THIRDPARTY"
extend_skip = ["setup.py"]

[tool.black]
line_length = 100
skip_string_normalization = true
# recognized by future versions, disallows to reformat code with incompatible versions
# Matches NeMO version so people working on both codebases don't need two different version of black installed
required_version = "24"
skip_magic_trailing_comma = true
include = '\.pyi?$'
exclude = '''
/(
    \.git
  | \.venv
  | build
)/
'''

[tool.pytest.ini_options]
addopts = "--durations=15 -s -rA -x"
testpaths = ["tests"]
python_files = "test_*.py"
markers = [
    "internal: mark a test as a test to private/internal functions.",
    "flaky: mark flaky tests for LTS environment",
    "flaky_in_dev: mark flaky tests for DEV environment",
]

[tool.coverage.run]
concurrency = ["thread", "multiprocessing"]
omit = [
    "/tmp/*",
    "/opt/megatron-lm/tests/*",
    "/opt/megatron-lm/tools/*",
    "/usr/local/lib/python3.12/dist-packages/*",
    "/opt/megatron-lm/_remote_module_non_scriptable",
]
parallel = true
sigterm = false

[tool.coverage.paths]
source = [
    ".",
    "/opt/megatron-lm/",
    "/opt/megatron-lm-legacy/",
    "/home/runner/work/Megatron-LM/Megatron-LM/",
]

[tool.ruff.lint]
# Enable all `pydocstyle` rules, limiting to those that adhere to the
# Google convention via `convention = "google"`, below.
# select = ["D", "F"]
select = ["S506"]

# - On top of the Google convention, disable `D417`, which requires
#   documentation for every function parameter.
# - F841: local variable assigned but never used (exluced to favor readability)
# TODO: Remove D10 once we are about to release to get all the docstrings written
ignore = ["D417", "D10", "F841"]

[tool.ruff.lint.pydocstyle]
convention = "google"

# Section to exclude errors for different file types
[tool.ruff.per-file-ignores]
# Ignore all directories named `tests`.
"tests/**" = ["D"]
# Ignore all files that end in `_test.py`.
"*_test.py" = ["D"]
# Ignore F401 (import but unused) in __init__.py
"__init__.py" = ["F401"]


================================================
FILE: scripts/check_api_backwards_compatibility.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#!/usr/bin/env python3
"""
Megatron Core API Compatibility Checker

Simple checker using Griffe to find breaking changes between two versions.
Objects decorated with @internal_api, @experimental_api, or @deprecated are excluded from checks.

Usage:
    python scripts/check_api_backwards_compatibility.py --baseline core_v0.14.0
"""

import argparse
import logging
import os
import re
import sys
from collections import Counter

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(levelname)s: %(message)s',
    handlers=[logging.StreamHandler(sys.stderr)]
)
logger = logging.getLogger(__name__)

try:
    import griffe
    try:
        from griffe.dataclasses import Object
    except (ImportError, AttributeError):
        from griffe import Object
except ImportError as e:
    logger.error(f"griffe not installed: {e}")
    logger.error("Install with: pip install griffe")
    sys.exit(2)

# Configure UTF-8 for Windows
if sys.platform == 'win32':
    import io
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')


# Decorators that exempt objects from compatibility checks
EXEMPT_DECORATORS = ['internal_api', 'deprecated', 'experimental_api']

# Breakage kinds to ignore globally (not actual API signature changes)
# AttributeChangedValueBreakage: Changing constant values (e.g., VERSION = "1.0" -> "2.0")
#   is not a breaking API change - the constant still exists with the same name
IGNORED_BREAKAGE_KINDS = [
    'AttributeChangedValueBreakage',
]

# Breakage kinds to ignore only for __init__ methods
# ParameterMovedBreakage: Reordering parameters in __init__ is generally safe because:
#   - Config dataclasses should always be initialized with keyword arguments
#   - Adding fields to parent dataclasses shifts child __init__ params (inheritance artifact)
#   - Nobody should call Config(4096, 32, ...) with positional args
IGNORED_FOR_INIT_METHODS = [
    'ParameterMovedBreakage',
]


def has_exempt_decorator(obj: Object) -> bool:
    """Check if a Griffe object has any exempt decorator.
    
    Args:
        obj: A Griffe Object to check for exempt decorators
        
    Returns:
        bool: True if the object has any decorator matching EXEMPT_DECORATORS list
    """
    if not hasattr(obj, 'decorators'):
        return False
    if not obj.decorators:
        return False
    for decorator in obj.decorators:
        # Get the actual decorator name from the value attribute
        dec_value = str(getattr(decorator, 'value', ''))
        if any(exempt in dec_value for exempt in EXEMPT_DECORATORS):
            return True
    return False


def get_filtered_paths(package: Object, package_name: str) -> set:
    """Recursively collect all object paths with exempt decorators from a package.
    
    This function traverses the entire package tree and identifies objects that are
    decorated with any of the EXEMPT_DECORATORS, building a set of their full paths.
    
    Args:
        package: The Griffe package object to traverse
        package_name: The full package name (e.g., "megatron.core") for path construction
        
    Returns:
        set: A set of full object paths (e.g., "megatron.core.ModelParallelConfig") 
             that should be filtered from compatibility checks
    """
    filtered = set()
    visited = set()
    
    def visit(obj, path, depth=0, is_root=False):
        # Prevent infinite recursion
        if depth > 20 or id(obj) in visited:
            return
        visited.add(id(obj))
        
        # For root object, use the provided path; for children, append obj.name
        if is_root:
            current_path = path
        else:
            current_path = f"{path}.{obj.name}" if path else obj.name
        
        # Skip aliases (imported objects)
        if hasattr(obj, 'is_alias') and obj.is_alias:
            return
        
        # Skip private members
        if obj.name.startswith('_') and not obj.name.startswith('__'):
            return
            
        # Check for exempt decorator
        if has_exempt_decorator(obj):
            filtered.add(current_path)
            logger.info(f"  ⏭️  Exempt: {current_path}")
        
        # Visit children
        if hasattr(obj, 'members'):
            for member in obj.members.values():
                visit(member, current_path, depth + 1, is_root=False)
    
    # Start with the full package name (e.g., "megatron.core")
    visit(package, package_name, is_root=True)
    return filtered


def strip_ansi_codes(text):
    """Remove ANSI escape codes (terminal formatting) from text.
    
    Griffe includes ANSI codes for terminal formatting in some strings,
    which breaks string matching. This strips them out.
    
    Args:
        text: String potentially containing ANSI escape codes
        
    Returns:
        str: Clean text with ANSI codes removed
    """
    if not text:
        return text
    # Pattern to match ANSI escape codes
    ansi_escape = re.compile(r'\x1b\[[0-9;]*m')
    return ansi_escape.sub('', text)


def get_object_path(change) -> str:
    """Extract the full object path from a Griffe breaking change.
    
    Tries multiple sources to get the object path:
    1. Direct path attributes (new_path, old_path, path)
    2. Path from new_value or old_value objects
    3. Parse from the explanation string as last resort
    
    Args:
        change: A Griffe breaking change object
        
    Returns:
        str: The full object path (e.g., "megatron.core.ModelParallelConfig.__init__")
             or None if unable to extract
    """
    # Try different attributes
    path = (getattr(change, 'new_path', None) or 
            getattr(change, 'old_path', None) or
            getattr(change, 'path', None))
    
    if path:
        return strip_ansi_codes(path)
    
    # Try from values
    if hasattr(change, 'new_value') and change.new_value:
        path = getattr(change.new_value, 'path', None)
        if path:
            return strip_ansi_codes(path)
    
    if hasattr(change, 'old_value') and change.old_value:
        path = getattr(change.old_value, 'path', None)
        if path:
            return strip_ansi_codes(path)
    
    # Last resort: parse from explanation
    # Format: "filepath:line: object_path: description"
    # Example: "megatron/core/model_parallel_config.py:338: ModelParallelConfig.cpu_offloading_weights: Attribute value was changed"
    try:
        explanation = change.explain()
        # Split by ": " and get the second part (object path)
        parts = explanation.split(': ')
        if len(parts) >= 2:
            # Get the part after "filepath:line" but before the description
            # It's usually the second part
            object_path = parts[1]
            
            # Extract the module path from file path (first part)
            file_part = parts[0].split(':')[0]  # Get just the file path, remove line number
            
            # Convert file path to module path
            # e.g., "megatron/core/model_parallel_config.py" -> "megatron.core.model_parallel_config"
            module_path = file_part.replace('/', '.').replace('\\', '.').replace('.py', '')
            
            # If object_path doesn't start with module, prepend it
            if not object_path.startswith(module_path):
                full_path = f"{module_path}.{object_path}"
            else:
                full_path = object_path
            
            return strip_ansi_codes(full_path)
    except Exception:
        pass
    
    return None


def should_skip_change(change, filtered_paths: set) -> bool:
    """Determine if a breaking change should be skipped.
    
    A change is skipped if:
    - The change kind is in IGNORED_BREAKAGE_KINDS (not a signature change)
    - The change kind is in IGNORED_FOR_INIT_METHODS and affects an __init__ method
    - The changed object itself is in filtered_paths (exact match)
    - The changed object is a child of an exempt object (prefix match)
    
    Args:
        change: A Griffe breaking change object
        filtered_paths: Set of paths with exempt decorators
        
    Returns:
        bool: True if the change should be skipped (filtered out)
    """
    # Check if this breakage kind should be ignored globally (not a signature change)
    change_kind = type(change).__name__
    if change_kind in IGNORED_BREAKAGE_KINDS:
        return True
    
    path = get_object_path(change)
    if not path:
        return False
    
    # Strip parameter names from path for matching
    # e.g., "Class.__init__(param)" -> "Class.__init__"
    clean_path = path.split('(')[0] if '(' in path else path
    
    # Check if this is a breakage kind we ignore for __init__ methods
    # Config dataclasses should use keyword args, so parameter reordering is safe
    if change_kind in IGNORED_FOR_INIT_METHODS:
        if '.__init__' in clean_path:
            return True
    
    # Check exact match
    if clean_path in filtered_paths or path in filtered_paths:
        return True
    
    # Check if it's a child of a filtered object
    # e.g., MyClass.__init__ is child of MyClass, MyClass.attr is child of MyClass
    for filtered_path in filtered_paths:
        if clean_path.startswith(filtered_path + '.'):
            return True
        # Also check the original path in case parameter names matter
        if path.startswith(filtered_path + '.'):
            return True
    
    return False


def main():
    parser = argparse.ArgumentParser(description='Check API backwards compatibility')
    parser.add_argument('--baseline', required=True, help='Baseline git ref (tag/branch/commit)')
    parser.add_argument('--current', default=None, help='Current git ref (default: working directory)')
    parser.add_argument('--package', default='megatron.core', help='Package to check')
    parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
    args = parser.parse_args()
    
    try:
        package_name = args.package
        
        logger.info(f"\n{'='*80}\nAPI COMPATIBILITY CHECK: {package_name}\n{'='*80}\n")
        
        # Load baseline
        logger.info(f"📦 Loading baseline @ {args.baseline}...")
        baseline = griffe.load_git(
            package_name, ref=args.baseline, resolve_aliases=False, 
            resolve_external=False, allow_inspection=False)
        logger.info(f"   ✓ Loaded")
        
        # Load current
        logger.info(f"\n📦 Loading current @ {args.current or 'working directory'}...")
        if args.current:
            current = griffe.load_git(
                package_name, ref=args.current, resolve_aliases=False,
                resolve_external=False, allow_inspection=False)
        else:
            current = griffe.load(
                package_name, search_paths=[os.getcwd()], resolve_aliases=False,
                resolve_external=False, allow_inspection=False)
        logger.info(f"   ✓ Loaded")
        
        # Get filtered paths from CURRENT version only
        logger.info(f"\n🔍 Finding exempt objects in current version...")
        filtered_paths = get_filtered_paths(current, package_name)
        logger.info(f"   Found {len(filtered_paths)} exempt objects")
        
        # Find breaking changes
        logger.info(f"\n🔍 Comparing versions...")
        all_changes = list(griffe.find_breaking_changes(baseline, current))
        logger.info(f"   Found {len(all_changes)} potential breaking changes")
        
        # Filter out exempt changes  
        breaking_changes = []
        skipped_count = 0
        
        # DEBUG: Print first 5 breaking changes for debugging
        print("\n===TEST DEBUG (first 5 changes)===")
        print(f"Filtered paths: {filtered_paths}")
        for i, change in enumerate(all_changes[:5]):
            path = get_object_path(change)
            clean_path = path.split('(')[0] if path and '(' in path else path
            print(f"\nChange {i+1}: {path}")
            print(f"  Clean: {clean_path}")
            print(f"  Clean repr: {repr(clean_path)}")
            
            # Test matching
            matched = False
            for fpath in filtered_paths:
                if clean_path and (clean_path == fpath or clean_path.startswith(fpath + '.')):
                    print(f"  ✓ MATCH with: {fpath}")
                    matched = True
                    break
            if not matched:
                print(f"  ✗ NO MATCH")
        print("\n===END TEST DEBUG===\n")
        
        for change in all_changes:
            if should_skip_change(change, filtered_paths):
                skipped_count += 1
            else:
                breaking_changes.append(change)
        
        logger.info(f"\n   Skipped {skipped_count} exempt | Reporting {len(breaking_changes)} breaking changes")
        
        # Print results
        if not breaking_changes:
            logger.info(f"\n✅ No breaking changes detected!")
            return 0
        
        # Count by type
        change_types = Counter(change.kind.value for change in breaking_changes)
        logger.info(f"\n📊 Breaking changes by type:")
        for change_type, count in sorted(change_types.items(), key=lambda x: -x[1]):
            logger.info(f"   • {change_type}: {count}")
        
        # Print detailed changes
        print(f"\n❌ Found {len(breaking_changes)} breaking change(s):\n{'='*80}")
        
        for i, change in enumerate(breaking_changes, 1):
            path = get_object_path(change)
            path_info = f"\n   Object: {path}" if path else ""
            print(f"\n{i}. {change.kind.value}\n   Package: {package_name}{path_info}\n   → {change.explain()}\n{'-'*80}")
        
        print(f"\n{'='*80}\nSUMMARY\n{'='*80}\nTotal breaking changes: {len(breaking_changes)}\n{'='*80}\n")
        
        return 1
        
    except Exception as e:
        logger.error(f"\n❌ Error: {e}")
        if args.verbose:
            import traceback
            traceback.print_exc()
        return 2


if __name__ == "__main__":
    sys.exit(main())


================================================
FILE: setup.py
================================================
import subprocess

from setuptools import Extension, setup

setup_args = dict(
    ext_modules=[
        Extension(
            "megatron.core.datasets.helpers_cpp",
            sources=["megatron/core/datasets/helpers.cpp"],
            language="c++",
            extra_compile_args=(
                subprocess.check_output(["python3", "-m", "pybind11", "--includes"])
                .decode("utf-8")
                .strip()
                .split()
            )
            + ["-O3", "-Wall", "-std=c++17"],
            optional=True,
        )
    ]
)
setup(**setup_args)


================================================
FILE: tasks/data_utils.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

""" Tasks data utility."""

import re
import numpy as np


def clean_text(text):
    """Remove new lines and multiple spaces and adjust end of sentence dot."""

    text = text.replace("\n", " ")
    text = re.sub(r'\s+', ' ', text)
    for _ in range(3):
        text = text.replace(' . ', '. ')

    return text


def build_sample(ids, types, paddings, label, unique_id):
    """Convert to numpy and return a sample consumed by the batch producer."""

    ids_np = np.array(ids, dtype=np.int64)
    types_np = np.array(types, dtype=np.int64)
    paddings_np = np.array(paddings, dtype=np.int64)
    sample = ({'text': ids_np,
               'types': types_np,
               'padding_mask': paddings_np,
               'label': int(label),
               'uid': int(unique_id)})

    return sample


def build_tokens_types_paddings_from_text(text_a, text_b,
                                          tokenizer, max_seq_length):
    """Build token types and paddings, trim if needed, and pad if needed."""

    text_a_ids = tokenizer.tokenize(text_a)
    text_b_ids = None
    if text_b is not None:
        text_b_ids = tokenizer.tokenize(text_b)

    return build_tokens_types_paddings_from_ids(text_a_ids, text_b_ids,
                                                max_seq_length, tokenizer.cls,
                                                tokenizer.sep, tokenizer.pad)


def build_tokens_types_paddings_from_ids(text_a_ids, text_b_ids, max_seq_length,
                                         cls_id, sep_id, pad_id):
    """Build token types and paddings, trim if needed, and pad if needed."""

    ids = []
    types = []
    paddings = []

    # [CLS].
    ids.append(cls_id)
    types.append(0)
    paddings.append(1)

    # A.
    len_text_a = len(text_a_ids)
    ids.extend(text_a_ids)
    types.extend([0] * len_text_a)
    paddings.extend([1] * len_text_a)

    # [SEP].
    ids.append(sep_id)
    types.append(0)
    paddings.append(1)

    # B.
    if text_b_ids is not None:
        len_text_b = len(text_b_ids)
        ids.extend(text_b_ids)
        types.extend([1] * len_text_b)
        paddings.extend([1] * len_text_b)

    # Cap the size.
    trimmed = False
    if len(ids) >= max_seq_length:
        max_seq_length_m1 = max_seq_length - 1
        ids = ids[0:max_seq_length_m1]
        types = types[0:max_seq_length_m1]
        paddings = paddings[0:max_seq_length_m1]
        trimmed = True

    # [SEP].
    if (text_b_ids is not None) or trimmed:
        ids.append(sep_id)
        if text_b_ids is None:
            types.append(0)
        else:
            types.append(1)
        paddings.append(1)

    # Padding.
    padding_length = max_seq_length - len(ids)
    if padding_length > 0:
        ids.extend([pad_id] * padding_length)
        types.extend([pad_id] * padding_length)
        paddings.extend([0] * padding_length)

    return ids, types, paddings


================================================
FILE: tasks/eval_utils.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""Evaluation utilities."""

import os
import time
from functools import partial

import torch

from megatron.training import get_args
from megatron.training import print_rank_last, is_last_rank
from megatron.core import mpu
from megatron.schedules import get_forward_backward_func
from tasks.finetune_utils import build_data_loader
from tasks.finetune_utils import process_batch


def accuracy_func_provider(single_dataset_provider):
    """Provide function that calculates accuracies."""
    args = get_args()

    # Build dataloaders.
    datapaths = args.valid_data
    dataloaders = []
    for datapath in datapaths:
        dataset = single_dataset_provider(datapath)
        dataloader = build_data_loader(
            dataset, args.orig_micro_batch_size, num_workers=args.num_workers,
            drop_last=(mpu.get_data_parallel_world_size() > 1))
        dataloaders.append((dataset.dataset_name, dataloader))

    def metrics_func(model, epoch, output_predictions=False):
        print_rank_last('calculating metrics ...')
        correct = 0
        total = 0
        if output_predictions:
            assert mpu.get_data_parallel_world_size() == 1
            named_predictions = []
            names = 'predictions'
        for name, dataloader in dataloaders:
            output = calculate_correct_answers(name, model, dataloader,
                                               epoch, output_predictions)
            if not output_predictions:
                correct_ans, total_count = output
            else:
                correct_ans, total_count, predictions = output
                named_predictions.append((name, predictions))
                names += '_' + name
            correct += correct_ans
            total += total_count
        if is_last_rank():
            percent = float(correct) * 100.0 / float(total)
            print(' >> |epoch: {}| overall: correct / total = {} / {} = '
                  '{:.4f} %'.format(epoch, correct, total, percent))

        if output_predictions and is_last_rank():
            assert args.load is not None
            filename = os.path.join(args.load, names + '.pt')
            torch.save(named_predictions, filename)

    return metrics_func


def calculate_correct_answers(name, model, dataloader,
                              epoch, output_predictions):
    """Calculate correct over total answers and return prediction if the
    `output_predictions` is true."""
    args = get_args()
    forward_backward_func = get_forward_backward_func()
    start_time = time.time()
    for m in model:
        m.eval()
    saved_micro_batch_size = args.micro_batch_size
    saved_global_batch_size = args.global_batch_size

    ds = dataloader.dataset
    if hasattr(ds, 'sample_multiplier'):
        # If our dataset as a sample_multiplier attribute that means
        # each "sample" from the dataset actually has multiple samples
        # that will collapse into the batch dimension (for example in
        # the RACE dataset that has several options), we need to
        # account for that when setting the micro batch size.
        sample_multiplier = ds.sample_multiplier
    else:
        sample_multiplier = 1
    micro_batch_size_times_data_parallel = args.orig_micro_batch_size * args.data_parallel_size
    num_micro_batches = args.orig_global_batch_size // micro_batch_size_times_data_parallel

    def loss_func(output_predictions, labels, output_tensor):
        logits = output_tensor

        loss_dict = {}
        # Add output predictions.
        if output_predictions:
            assert False
            loss_dict['softmaxes'] = torch.nn.Softmax(dim=-1)(
                logits.float()).data.cpu().numpy().tolist()
            loss_dict['labels'] = labels.data.cpu().numpy().tolist()
            loss_dict['ids'] = batch['uid'].cpu().numpy().tolist()
        # Compute the correct answers.
        predicted = torch.argmax(logits, dim=-1)
        corrects = (predicted == labels)
        # Add to the counters.
        loss_dict['total'] = labels.size(0)
        loss_dict['correct'] = corrects.sum().item()

        return 0, loss_dict

    # defined inside to capture output_predictions
    def correct_answers_forward_step(batch, model):
        try:
            batch_ = next(batch)
        except Exception:
            batch_ = batch
        tokens, types, labels, attention_mask = process_batch(batch_)

        # Forward model.
        args = get_args()
        output_tensor = model(tokens, attention_mask, tokentype_ids=types)

        return output_tensor, partial(loss_func, output_predictions, labels)

    with torch.no_grad():
        # For all the batches in the dataset.
        total = 0
        correct = 0
        if output_predictions:
            # This option is only possible when data parallel size is 1.
            assert mpu.get_data_parallel_world_size() == 1
            softmaxes = []
            labels = []
            ids = []
        for _, batch in enumerate(dataloader):
            # For evaluation only mode we use drop_last = False to get all the
            # samples, which means we might not have a full batch, so we
            # adjust batch_size here to actual batch size of data
            actual_batch_size = len(batch['label'])
            # ... applying sample_multiplier if necessary
            args.micro_batch_size = actual_batch_size * sample_multiplier
            args.global_batch_size = actual_batch_size * sample_multiplier * num_micro_batches

            loss_dicts = forward_backward_func(correct_answers_forward_step, batch, model,
                                               optimizer=None, timers=None, forward_only=True)

            for loss_dict in loss_dicts:
                if output_predictions:
                    softmaxes.extend(loss_dict['softmaxes'])
                    labels.extend(loss_dict['labels'])
                    ids.extend(loss_dict['ids'])
                total += loss_dict['total']
                correct += loss_dict['correct']


    for m in model:
        m.train()
    args.micro_batch_size = saved_micro_batch_size
    args.global_batch_size = saved_global_batch_size

    # Reduce.
    if mpu.is_pipeline_last_stage():
        unreduced = torch.tensor([correct, total], dtype=torch.long, device='cuda')
        torch.distributed.all_reduce(unreduced,
                                     group=mpu.get_data_parallel_group())

        # Print on screen.

        correct_ans = unreduced[0].item()
        total_count = unreduced[1].item()
        percent = float(correct_ans) * 100.0 / float(total_count)
        elapsed_time = time.time() - start_time
        print_rank_last(' > |epoch: {}| metrics for {}: correct / total '
                        '= {} / {} = {:.4f} %, elapsed time (sec): {:.3f}'.format(
                            epoch, name, correct_ans, total_count,
                            percent, elapsed_time))

        if output_predictions:
            return correct_ans, total_count, (softmaxes, labels, ids)
        return correct_ans, total_count
    if output_predictions:
        return 0, 0, ()
    return 0, 0


================================================
FILE: tasks/finetune_utils.py
================================================
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""Finetune utilities."""

from functools import partial
import sys
import torch

from megatron.training import get_args
from megatron.core.num_microbatches_calculator import get_num_microbatches
from megatron.training import print_rank_0
from megatron.training import get_timers
from megatron.core import mpu
from megatron.core.enums import ModelType
from megatron.training.checkpointing import load_checkpoint
from megatron.training.checkpointing import save_checkpoint
from megatron.training.training import evaluate_and_print_results
from megatron.training.training import setup_model_and_optimizer
from megatron.training.training import train_step
from megatron.training.training import training_log
from megatron.training.utils import average_losses_across_data_parallel_group
from megatron.training.utils import calc_params_l2_norm
from megatron.training.utils import check_adlr_autoresume_termination


def process_batch(batch):
    """Process batch and produce inputs for the model."""
    args = get_args()

    tokens = batch['text'].long().cuda().contiguous()
    types = batch['types'].long().cuda().contiguous()
    labels = batch['label'].long().cuda().contiguous()
    attention_mask = batch['padding_mask'].float().cuda().contiguous()
    if args.fp16:
        attention_mask = attention_mask.half()

    return tokens, types, labels, attention_mask


def cross_entropy_loss_func(labels, output_tensor):
    logits = output_tensor

    # Cross-entropy loss.
    loss_func = torch.nn.CrossEntropyLoss()
    loss = loss_func(logits.contiguous().float(), labels)

    # Reduce loss for logging.
    averaged_loss = average_losses_across_data_parallel_group([loss])

    return loss, {'lm loss': averaged_loss[0]}


def _cross_entropy_forward_step(batch, model):
    """Simple forward step with cross-entropy loss."""
    timers = get_timers()

    # Get the batch.
    timers('batch-generator', log_level=2).start()
    try:
        batch_ = next(batch)
    except Exception:
        batch_ = batch
    tokens, types, labels, attention_mask = process_batch(batch_)
    timers('batch-generator').stop()

    # Forward model.
    output_tensor = model(tokens, attention_mask, tokentype_ids=types)

    return output_tensor, partial(cross_entropy_loss_func, labels)


def build_data_loader(dataset, micro_batch_size, num_workers, drop_last,
        task_collate_fn=None):
    """Data loader. Note that batch-size is the local (per GPU) batch-size."""

    # Sampler.
    world_size = mpu.get_data_parallel_world_size()
    rank = mpu.get_data_parallel_rank()
    sampler = torch.utils.data.distributed.DistributedSampler(
        dataset, num_replicas=world_size, rank=rank)

    # Data loader. Note that batch size is the per GPU batch size.
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=micro_batch_size,
                                              sampler=sampler,
                                              shuffle=False,
                                              num_workers=num_workers,
                                              drop_last=drop_last,
                                              pin_memory=True,
                                              collate_fn=task_collate_fn)

    return data_loader


def _build_infinite_size_dataloader(dataloader):
    """Build a looped dataloader with infinite size."""

    iterator = dataloader.__iter__()
    while True:
        try:
            yield iterator.__next__()
        except StopIteration:
            iterator = dataloader.__iter__()


def _build_train_valid_dataloaders(train_dataset, valid_dataset, 
    task_collate_fn=None):
    """Traing and validation dataloaders."""
    args = get_args()

    print_rank_0('building train and validation dataloaders ...')
    # Training dataset.
    train_dataloader = build_data_loader(train_dataset, args.micro_batch_size,
                                         args.num_workers, not args.keep_last,
                                         task_collate_fn)
    # Set the training iterations.
    args.train_iters_per_epoch = len(train_dataloader)
    args.train_iters = args.epochs * args.train_iters_per_epoch
    # Validation dataset. For this dataset, we do not need to set up
    # shuffling so we can just use a simple infinite loop.
    valid_dataloader_ = build_data_loader(valid_dataset, args.micro_batch_size,
                                          args.num_workers, not args.keep_last,
                                          task_collate_fn)
    valid_dataloader = _build_infinite_size_dataloader(valid_dataloader_)

    # Now that we've built the data loaders, set batch_size arguments
    # to the actual batch size the model will see for this dataset.
    # This is necessary so pipeline transfers know what size they are
    # and the LR schedule, which is based on samples seen, gets set
    # correctly.
    args.orig_micro_batch_size = args.micro_batch_size
    args.orig_global_batch_size = args.global_batch_size
    if hasattr(train_dataset, 'sample_multiplier'):
        # If our dataset as a sample_multiplier attribute that means
        # each "sample" from the dataset actually has multiple samples
        # that will collapse into the batch dimension (for example in
        # the RACE dataset that has several options), we need to
        # account for that when setting the micro batch size.
        args.micro_batch_size *= train_dataset.sample_multiplier
        args.global_batch_size *= train_dataset.sample_multiplier

    return train_dataloader, valid_dataloader


def _train(model, optimizer, opt_param_scheduler, forward_step,
           train_dataloader, valid_dataloader, end_of_epoch_callback):
    """Train the model."""
    args = get_args()
    timers = get_timers()

    assert get_num_microbatches() == 1, "finetuning with gradient accumulation doesn't currently work"

    # Turn on training mode which enables dropout.
    for m in model:
        m.train()

    # Tracking loss.
    losses_dict_sum = {}

    # Starting epoch and iteration
    start_epoch = args.iteration // args.train_iters_per_epoch
    start_iteration = args.iteration % args.train_iters_per_epoch
    iteration = args.iteration

    # Memory reporting flag.
    report_memory_flag = True

    # For each remaining epoch
    timers('interval-time', log_level=0).start(barrier=True)
    for epoch in range(start_epoch, args.epochs):
        print_rank_0('working on epoch {} ...'.format(epoch + 1))

        # Set the data loader epoch to shuffle the index iterator.
        train_dataloader.sampler.set_epoch(args.seed + epoch)

        # For all the batches in the dataset.
        for iteration_, batch in enumerate(train_dataloader):

            # Ignore the iterations before starting value
            if iteration_ < start_iteration:
                continue
            # Set to zero so the next epoch does not skip any batches.
            start_iteration = 0

            # Train for one step.
            out = train_step(forward_step, batch, model, optimizer, opt_param_scheduler)

            losses_dict, skipped_iter, grad_norm, num_zeros_in_grad = out
            iteration += 1

            # Logging.
            params_norm = None
            if args.log_params_norm:
                params_norm = calc_params_l2_norm(model)
            report_memory_flag = training_log(losses_dict, losses_dict_sum,
                                              optimizer.param_groups[0]['lr'],
                                              iteration,
                                              optimizer.get_loss_scale().item(),
                                              report_memory_flag, skipped_iter,
                                              grad_norm, params_norm, num_zeros_in_grad)

            # Autoresume
            if args.adlr_autoresume and \
               (iteration % args.adlr_autoresume_interval == 0):
                check_adlr_autoresume_termination(iteration, model,
                                                  optimizer, opt_param_scheduler)

            # Checkpointing
            saved_checkpoint = False
            if args.save and args.save_interval and \
               iteration % args.save_interval == 0:
                save_checkpoint(iteration, model, optimizer, opt_param_scheduler)
                saved_checkpoint = True

            # Evaluation
            if args.eval_interval and iteration % args.eval_interval == 0:
                prefix = 'iteration {}'.format(iteration)
                evaluate_and_print_results(prefix, forward_step,
                                           valid_dataloader, model,
                                           iteration, None, False)

            # Exiting based on iterations
            if args.exit_interval and iteration % args.exit_interval == 0:
                if not saved_checkpoint:
                    save_checkpoint(iteration, model, optimizer, opt_param_scheduler)
                torch.distributed.barrier()
                print_rank_0('exiting program at iteration {}'.format(iteration))
                sys.exit()

        # Checkpointing at the end of each epoch.
        if args.save:
            save_checkpoint(iteration, model, optimizer, opt_param_scheduler)

        # Callback at the end of each epoch.
        if end_of_epoch_callback is not None:
            end_of_epoch_callback(model, epoch)


def finetune(train_valid_datasets_provider, model_provider,
             model_type=ModelType.encoder_or_decoder,
             forward_step=_cross_entropy_forward_step,
             end_of_epoch_callback_provider=None,
             task_collate_fn=None):
    """Main finetune function used across all tasks."""
    args = get_args()
    timers = get_timers()

    assert args.rampup_batch_size is None, \
        'batch size scaling is not supported for finetuning'

    # Train and validation data loaders.
    timers('train/valid/test dataset/dataloder', log_level=0).start()
    if args.epochs > 0:
        train_dataset, valid_dataset = train_valid_datasets_provider()
        train_dataloader, valid_dataloader = _build_train_valid_dataloaders(
            train_dataset, valid_dataset, task_collate_fn)
    else:
        args.train_iters = 0
    timers('train/valid/test dataset/dataloder').stop()

    # Build calback function.
    timers('callback function', log_level=0).start()
    end_of_epoch_callback = None
    if end_of_epoch_callback_provider is not None:
        end_of_epoch_callback = end_of_epoch_callback_provider()
    timers('callback function').stop()

    # Build model, optimizer and learning rate scheduler.
    timers('model and optimizer', log_level=0).start()
    model, optimizer, opt_param_scheduler = setup_model_and_optimizer(model_provider, model_type)
    timers('model and optimizer').stop()

    # If pretrained checkpoint is provided and we have not trained for
    # any iteration (i.e., iteration is zero), then load the pretrained
    # checkpoint.
    timers('pretrained checkpoint', log_level=0).start(barrier=True)
    if args.iteration == 0 and args.pretrained_checkpoint is not None:
        original_load = args.load
        args.load = args.pretrained_checkpoint
        original_rng = args.no_load_rng
        args.no_load_rng = True
        _ = load_checkpoint(model, None, None)
        args.load = original_load
        args.no_load_rng = original_rng
        # This is critical when only model is loaded. We should make sure
        # main parameters are also updated.
        optimizer.reload_model_params()
    timers('pretrained checkpoint').stop()

    # Print setup timing.
    print_rank_0('done with setups ...')
    timers.log(['train/valid/test dataset/dataloder', 'callback function',
                'model and optimizer', 'pretrained checkpoint'], barrier=True)
    print_rank_0('training ...')

    # Finetune the model.
    if args.epochs > 0:
        _train(model, optimizer, opt_param_scheduler, forward_step,
               train_dataloader, valid_dataloader, end_of_epoch_callback)
    # Or just evaluate.
    else:
        if end_of_epoch_callback is not None:
            print_rank_0('evaluation only mode, setting epoch to -1')
            end_of_epoch_callback(model, epoch=-1, output_predictions=True)
    print_rank_0('done :-)')


================================================
FILE: tests/README.md
================================================
# Megatron-LM Tests

## Updating Functional Test Golden Values

When adding new functional tests, it may be necessary to update the golden values used to verify if the test is
passing as expected.

1. Add the new functional test case with the scope set to `mr-github`
2. Open a PR with the new test. Ensure the label `Run functional tests` is added
3. Run the PR CI tests
4. Run the script to download golden values from a Github CI run
    a. Ensure click, requests, and python-gitlab are installed in your environment
    b. Ensure a Github access token is set as an environment variable `GITHUB_TOKEN`
    c. Run the script `python tests/test_utils/python_scripts/download_golden_values.py --source github --pipeline-id <github-workflow-run-id>`
    d. Optionally pass in `--only-failing` to only download golden values for failing tests only
    e. Ensure you are only checking-in golden values for tests are you updating

The Github CI infra may not be appropriate for Perf tests. Perf tests may be more appropriate for nightly jobs on other infra.


================================================
FILE: tests/__init__.py
================================================


================================================
FILE: tests/functional_tests/__init__.py
================================================


================================================
FILE: tests/functional_tests/python_test_utils/__init__.py
================================================


================================================
FILE: tests/functional_tests/python_test_utils/common.py
================================================
import enum
import glob
import json
import logging
import os
import pathlib
from typing import Callable, Dict, List, Optional, Union

import numpy as np
import pydantic
from tensorboard.backend.event_processing import event_accumulator

# By default TB tries to be smart about what to load in memory to avoid OOM
# Since we expect every step to be there when we do our comparisons, we explicitly
# set the size guidance to 0 so that we load everything. It's okay given our tests
# are small/short.
SIZE_GUIDANCE = {event_accumulator.TENSORS: 0, event_accumulator.SCALARS: 0}

logger = logging.getLogger(__name__)


class TypeOfTestResult(enum.Enum):
    APPROXIMATE = 1
    DETERMINISTIC = 2


class Test(pydantic.BaseModel):
    pass


class NotApproximateError(Exception):
    """Raised if comparison is not within approximate bounds"""


class NotDeterminsticError(Exception):
    """Raised if comparison is not within approximate bounds"""


class ApproximateTest(Test):
    atol: Union[int, float] = 0
    rtol: float = 1e-5

    @property
    def type_of_test_result(self) -> TypeOfTestResult:
        return TypeOfTestResult.APPROXIMATE

    def error_message(self, metric_name: str) -> NotApproximateError:
        return NotApproximateError(f"Approximate comparison of {metric_name}: FAILED")


class DeterministicTest(Test):
    @property
    def rtol(self) -> float:
        return 0.0

    @property
    def atol(self) -> Union[int, float]:
        return 0

    @property
    def type_of_test_result(self) -> TypeOfTestResult:
        return TypeOfTestResult.DETERMINISTIC

    def error_message(self, metric_name: str) -> NotDeterminsticError:
        return NotDeterminsticError(f"Exact comparison of {metric_name}: FAILED")


class GoldenValueMetric(pydantic.BaseModel):
    start_step: int
    end_step: int
    step_interval: int
    values: Dict[int, Union[int, float, str]]

    def __repr__(self):
        return f"Values ({self.start_step},{self.end_step},{self.step_interval}): {', '.join([str(f'({step}, {value})') for step, value in self.values.items()])}"


class GoldenValues(pydantic.RootModel):
    root: Dict[str, GoldenValueMetric]


class MissingTensorboardLogsError(Exception):
    """Raised if TensorboardLogs not found"""


class UndefinedMetricError(Exception):
    """Raised of golden values metric has no test definition"""


class SkipMetricError(Exception):
    """Raised if metric shall be skipped"""


def read_tb_logs_as_list(
    path, index: int = 0, train_iters: int = 50, start_idx: int = 1, step_size: int = 5
) -> Optional[Dict[str, GoldenValueMetric]]:
    """Reads a TensorBoard Events file from the input path, and returns the
    summary specified as input as a list.

    Args:
        path: str, path to the dir where the events file is located.
        summary_name: str, name of the summary to read from the TB logs.

    Returns:
        summary_list: list, the values in the read summary list, formatted as a list.
    """
    files = glob.glob(f"{path}/events*tfevents*")
    files += glob.glob(f"{path}/results/events*tfevents*")

    if not files:
        logger.error(f"File not found matching: {path}/events* || {path}/results/events*")
        return None

    files.sort(key=lambda x: os.path.getmtime(os.path.join(path, pathlib.Path(x).name)))
    accumulators = []

    if index == -1:
        for event_file in files:
            ea = event_accumulator.EventAccumulator(event_file, size_guidance=SIZE_GUIDANCE)
            ea.Reload()
            accumulators.append(ea)
    else:
        event_file = files[index]
        ea = event_accumulator.EventAccumulator(event_file, size_guidance=SIZE_GUIDANCE)
        ea.Reload()
        accumulators.append(ea)

    summaries = {}
    for ea in accumulators:
        for scalar_name in ea.Tags()["scalars"]:
            if scalar_name in summaries:
                for x in ea.Scalars(scalar_name):
                    if x.step not in summaries[scalar_name]:
                        summaries[scalar_name][x.step] = round(x.value, 5)

            else:
                summaries[scalar_name] = {
                    x.step: round(x.value, 5) for x in ea.Scalars(scalar_name)
                }

    golden_values = {}

    for metric, values in summaries.items():
        # Add missing values
        values = {
            k: (values[k] if k in values else "nan")
            for k in range(1, train_iters + 1)
            if k == start_idx or (k > start_idx and int(k) % step_size == 0)
        }

        golden_values[metric] = GoldenValueMetric(
            start_step=min(values.keys()),
            end_step=max(values.keys()),
            step_interval=step_size,
            values=values,
        )

    return golden_values


def read_golden_values_from_json(
    golden_values_path: Union[str, pathlib.Path]
) -> Dict[str, GoldenValueMetric]:
    with open(golden_values_path) as f:
        if os.path.exists(golden_values_path):
            with open(golden_values_path) as f:
                return GoldenValues(**json.load(f)).root

        raise ValueError(f"File {golden_values_path} not found!")


def _filter_checks(
    checks: List[Union[ApproximateTest, DeterministicTest]], filter_for_type_of_check
):
    return [test for test in checks if test.type_of_test_result == filter_for_type_of_check]


def pipeline(
    compare_approximate_results: bool,
    golden_values: Dict[str, GoldenValueMetric],
    actual_values: Dict[str, GoldenValueMetric],
    checks: Dict[str, List[Union[ApproximateTest, DeterministicTest]]],
):
    all_test_passed = True
    failed_metrics = []

    for metric_name, metric_thresholds in checks.items():
        if metric_name not in list(actual_values.keys()):
            raise MissingTensorboardLogsError(
                f"Metric {metric_name} not found in Tensorboard logs! Please modify `model_config.yaml` to record it."
            )

        for test in metric_thresholds:
            if (
                compare_approximate_results
                and test.type_of_test_result == TypeOfTestResult.DETERMINISTIC
            ):
                continue

            try:
                golden_value = golden_values[metric_name]
                golden_value_list = list(golden_value.values.values())
                actual_value_list = [
                    value
                    for value_step, value in actual_values[metric_name].values.items()
                    if value_step in golden_value.values.keys()
                ]

                if metric_name == "iteration-time":
                    actual_value_list = [
                        np.median([np.inf if type(v) is str else v for v in actual_value_list])
                    ]
                    golden_value_list = [
                        np.median([np.inf if type(v) is str else v for v in golden_value_list])
                    ]
                    total_steps_evaluated = 1
                else:
                    total_steps_evaluated = golden_value.end_step / golden_value.step_interval + 1

                    actual_value_list = [np.inf if type(v) is str else v for v in actual_value_list]
                    golden_value_list = [np.inf if type(v) is str else v for v in golden_value_list]

                actual = np.array(actual_value_list)
                golden = np.array(golden_value_list)

                # Tolerance check
                is_close = np.isclose(actual, golden, rtol=test.rtol, atol=test.atol)

                num_failing_steps_allowed = min(max(total_steps_evaluated // 100, 1), 50)
                passing = np.mean(is_close) >= (num_failing_steps_allowed / total_steps_evaluated)

                if not passing:
                    logger.info(
                        "Actual values: %s", ", ".join([str(v) for v in (*actual_value_list,)])
                    )
                    logger.info(
                        "Golden values: %s", ", ".join([str(v) for v in (*golden_value_list,)])
                    )
                    raise test.error_message(metric_name)

                result = f"{test.type_of_test_result.name} test for metric {metric_name}: PASSED"
                result_code = 0

            except (NotApproximateError, NotDeterminsticError, MissingTensorboardLogsError) as e:
                result = str(e)
                result_code = 1
            except SkipMetricError:
                logger.info(f"{test.type_of_test_result.name} test for {metric_name}: SKIPPED")
                continue

            log_emitter = logger.info if result_code == 0 else logger.error
            log_emitter(result)
            if result_code == 1:
                all_test_passed = False
                failed_metrics.append(metric_name)

    assert all_test_passed, f"The following metrics failed: {', '.join(failed_metrics)}"


================================================
FILE: tests/functional_tests/python_test_utils/compute_golden_statistics.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

"""
Compute statistical bounds for golden values from multiple test runs.

This script aggregates results from multiple parallel runs of a functional test
and computes statistics (min, max, mean, std) for each metric at each step.
The output can be used to determine appropriate tolerances for test validation.

Usage:
    # Step 1: Run batch tests (from megatron-rl directory):
    ./tests/functional_tests/shell_test_utils/run_batch_ci_tests.sh \\
        test_cases/gpt/gpt_grpo_tp4_pp1_dp2_8b_correctness_and_throughput.sh 10

    # Step 2: Wait for jobs to complete, then compute statistics:
    python tests/functional_tests/python_test_utils/compute_golden_statistics.py \\
        --results-dir batch_test_logs_gpt_grpo_*/ \\
        --output golden_values_stats.json \\
        --recommend-tolerances

    # The script parses .out log files to find where each run wrote its results.
    # Each .out file should contain: "This test wrote results into /opt/megatron-lm/runs/<uuid>"
    # The container path /opt/megatron-lm maps to the workspace root on the host.

    # Or specify individual JSON files directly:
    python compute_golden_statistics.py \\
        --result-files runs/abc123/golden_values.json runs/def456/golden_values.json \\
        --output golden_values_stats.json
"""

import argparse
import glob
import json
import logging
import math
import os
import sys
from pathlib import Path
from statistics import mean, median, stdev
from typing import Any, Dict, List, Optional, Tuple

logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)


def find_result_json_files(results_dir: str, workspace_root: Optional[str] = None) -> List[str]:
    """
    Find all result JSON files from a batch test run.

    The batch test infrastructure (run_batch_ci_tests.sh) writes .out log files
    to the results directory. Each .out file contains a line like:
        "This test wrote results into /opt/megatron-lm/runs/<uuid>"

    The container path /opt/megatron-lm maps to the workspace root on the host.
    This function parses the .out files to find where the JSON results are.

    Args:
        results_dir: Path to batch_test_logs_* directory containing .out files
        workspace_root: Root of the megatron workspace (defaults to cwd)
    """
    result_files = []
    results_path = Path(results_dir)

    if not results_path.exists():
        logger.error(f"Results directory not found: {results_dir}")
        return []

    if workspace_root is None:
        # Try to find workspace root by looking for common markers
        workspace_root = os.getcwd()

    # Find all .out files from batch test runs
    out_files = list(results_path.glob("*.out"))

    if not out_files:
        logger.warning(f"No .out files found in {results_dir}")
        # Fall back to searching for JSON files directly
        return _find_json_files_directly(results_dir)

    logger.info(f"Found {len(out_files)} .out files to parse")

    for out_file in out_files:
        json_path = _extract_result_path_from_log(out_file, workspace_root)
        if json_path and os.path.exists(json_path):
            result_files.append(json_path)
        elif json_path:
            logger.warning(f"Result file not found: {json_path} (from {out_file.name})")

    return result_files


def _extract_result_path_from_log(out_file: Path, workspace_root: str) -> Optional[str]:
    """
    Parse a .out log file to find the result JSON path.

    Looks for the line: "This test wrote results into /opt/megatron-lm/runs/<uuid>"
    and converts the container path to the host path.
    """
    try:
        with open(out_file, 'r', errors='ignore') as f:
            content = f.read()
    except IOError as e:
        logger.warning(f"Failed to read {out_file}: {e}")
        return None

    # Look for the output path marker
    marker = "This test wrote results into "
    for line in content.split('\n'):
        if marker in line:
            # Extract the path after the marker
            idx = line.find(marker)
            output_path = line[idx + len(marker) :].strip()

            # Convert container path to host path
            # /opt/megatron-lm/... -> <workspace_root>/...
            if output_path.startswith("/opt/megatron-lm/"):
                host_path = output_path.replace("/opt/megatron-lm/", "")
                output_path = os.path.join(workspace_root, host_path)

            # Find JSON result files in this directory (search recursively)
            output_dir = Path(output_path)
            if output_dir.exists() and output_dir.is_dir():
                # Look for result JSON files with various naming patterns
                # Search recursively since files may be in subdirectories (e.g., 1/, 2/)
                patterns = [
                    "**/golden_values*.json",
                    "**/generations*.json",
                    "**/test_results*.json",
                ]

                for pattern in patterns:
                    json_files = list(output_dir.glob(pattern))
                    if json_files:
                        # Return the first match
                        logger.debug(f"Found result file: {json_files[0]}")
                        return str(json_files[0])

                # Fallback: any JSON file in subdirectories
                json_files = list(output_dir.glob("**/*.json"))
                if json_files:
                    logger.debug(f"Found result file (fallback): {json_files[0]}")
                    return str(json_files[0])

            logger.debug(f"Output directory not found or empty: {output_path}")
            return None

    logger.debug(f"No output path marker found in {out_file.name}")
    return None


def _find_json_files_directly(results_dir: str) -> List[str]:
    """
    Fallback: search for JSON files directly in the results directory.

    This is used when .out files don't contain the expected markers.
    """
    result_files = []
    results_path = Path(results_dir)

    # Look for golden_values*.json files in subdirectories
    patterns = ["**/golden_values*.json", "**/test_results*.json", "**/*_output.json"]

    for pattern in patterns:
        matches = list(results_path.glob(pattern))
        result_files.extend([str(p) for p in matches])

    # Remove duplicates while preserving order
    seen = set()
    unique_files = []
    for f in result_files:
        if f not in seen:
            seen.add(f)
            unique_files.append(f)

    return unique_files


def load_result_file(filepath: str) -> Optional[Dict[str, Any]]:
    """Load a single result JSON file."""
    try:
        with open(filepath, 'r') as f:
            content = f.read()

        data = json.loads(content)

        # Handle JSONL format (single line)
        if isinstance(data, str):
            data = json.loads(data)

        return data
    except (json.JSONDecodeError, IOError) as e:
        logger.warning(f"Failed to load {filepath}: {e}")
        return None


def _detect_result_format(data: Dict[str, Any]) -> str:
    """
    Detect whether the result file is from a training test or inference test.

    Returns:
        "training" - TensorBoard metrics format: {"metric_name": {"values": {...}}}
        "inference" - Generation output format: {"request_id": {"latency": ..., ...}}
        "unknown" - Unrecognized format
    """
    if not data:
        return "unknown"

    # Check first key's value structure
    first_key = next(iter(data.keys()))
    first_value = data[first_key]

    if isinstance(first_value, dict):
        if 'values' in first_value:
            return "training"
        if 'latency' in first_value or 'generated_text' in first_value:
            return "inference"

    return "unknown"


def _is_valid_numeric(value) -> bool:
    """Check if a value is a valid (non-NaN) numeric value."""
    if isinstance(value, str):
        try:
            value = float(value)
        except ValueError:
            return False

    if isinstance(value, (int, float)):
        return not math.isnan(value)

    return False


def _to_float(value) -> Optional[float]:
    """Convert value to float, returning None for invalid/NaN values."""
    if isinstance(value, str):
        try:
            value = float(value)
        except ValueError:
            return None

    if isinstance(value, (int, float)):
        if math.isnan(value):
            return None
        return float(value)

    return None


def _aggregate_training_results(
    data: Dict[str, Any], aggregated: Dict[str, Dict[str, List[float]]], run_index: int
) -> None:
    """Aggregate results from training test format."""
    for metric_name, metric_data in data.items():
        if not isinstance(metric_data, dict) or 'values' not in metric_data:
            continue

        if metric_name not in aggregated:
            aggregated[metric_name] = {}

        values = metric_data['values']
        for step, value in values.items():
            # Skip non-numeric or NaN values
            float_val = _to_float(value)
            if float_val is None:
                continue

            if step not in aggregated[metric_name]:
                aggregated[metric_name][step] = []

            aggregated[metric_name][step].append(float_val)

        # For metrics that use median-based comparison in the test (iteration-time,
        # mem-allocated-bytes, mem-max-allocated-bytes), also store all values from
        # this run so we can compute per-run medians later.
        # IMPORTANT: Store values in step order to match the test's index-based slicing.
        if metric_name in ['iteration-time', 'mem-allocated-bytes', 'mem-max-allocated-bytes']:
            all_values_key = f"_all_values_run_{run_index}"
            if all_values_key not in aggregated[metric_name]:
                aggregated[metric_name][all_values_key] = []

            # Sort by step number to ensure consistent ordering for index-based slicing
            sorted_steps = sorted(
                values.keys(), key=lambda x: int(x) if x.isdigit() else float('inf')
            )
            for step in sorted_steps:
                float_val = _to_float(values[step])
                if float_val is None:
                    continue
                aggregated[metric_name][all_values_key].append(
                    float_val
                )  # Just the value, not tuple


def _aggregate_inference_results(
    data: Dict[str, Any], aggregated: Dict[str, Dict[str, List[float]]], run_index: int
) -> None:
    """
    Aggregate results from inference test format.

    Extracts metrics like latency, step_count, and logprob statistics
    from generation outputs.
    """
    # Metrics to extract per request
    latencies = []
    step_counts = []
    prompt_logprob_means = []
    generated_logprob_means = []

    for request_id, request_data in data.items():
        if not isinstance(request_data, dict):
            continue

        # Extract latency
        if 'latency' in request_data:
            latencies.append(float(request_data['latency']))

        # Extract step count
        if 'step_count' in request_data:
            step_counts.append(float(request_data['step_count']))

        # Extract mean of prompt logprobs (as a consistency metric)
        if 'prompt_logprobs' in request_data and request_data['prompt_logprobs']:
            logprobs = request_data['prompt_logprobs']
            if isinstance(logprobs, list) and len(logprobs) > 0:
                prompt_logprob_means.append(sum(logprobs) / len(logprobs))

        # Extract mean of generated logprobs
        if 'generated_log_probs' in request_data and request_data['generated_log_probs']:
            logprobs = request_data['generated_log_probs']
            if isinstance(logprobs, list) and len(logprobs) > 0:
                generated_logprob_means.append(sum(logprobs) / len(logprobs))

    # Store aggregated metrics using run_index as the "step"
    run_key = str(run_index)

    if latencies:
        if 'latency' not in aggregated:
            aggregated['latency'] = {}
        if 'mean' not in aggregated['latency']:
            aggregated['latency']['mean'] = []
        aggregated['latency']['mean'].append(sum(latencies) / len(latencies))

        if 'total' not in aggregated['latency']:
            aggregated['latency']['total'] = []
        aggregated['latency']['total'].append(sum(latencies))

    if step_counts:
        if 'step_count' not in aggregated:
            aggregated['step_count'] = {}
        if 'mean' not in aggregated['step_count']:
            aggregated['step_count']['mean'] = []
        aggregated['step_count']['mean'].append(sum(step_counts) / len(step_counts))

    if prompt_logprob_means:
        if 'prompt_logprob_mean' not in aggregated:
            aggregated['prompt_logprob_mean'] = {}
        if 'mean' not in aggregated['prompt_logprob_mean']:
            aggregated['prompt_logprob_mean']['mean'] = []
        aggregated['prompt_logprob_mean']['mean'].append(
            sum(prompt_logprob_means) / len(prompt_logprob_means)
        )

    if generated_logprob_means:
        if 'generated_logprob_mean' not in aggregated:
            aggregated['generated_logprob_mean'] = {}
        if 'mean' not in aggregated['generated_logprob_mean']:
            aggregated['generated_logprob_mean']['mean'] = []
        aggregated['generated_logprob_mean']['mean'].append(
            sum(generated_logprob_means) / len(generated_logprob_means)
        )


def aggregate_results(result_files: List[str]) -> Dict[str, Dict[str, List[float]]]:
    """
    Aggregate results from multiple JSON files.

    Supports both training test format (TensorBoard metrics) and
    inference test format (generation outputs).

    Returns:
        Dict mapping metric_name -> step/key -> list of values across all runs
    """
    aggregated: Dict[str, Dict[str, List[float]]] = {}
    loaded_count = 0
    detected_format = None

    for idx, filepath in enumerate(result_files):
        data = load_result_file(filepath)
        if data is None:
            continue

        loaded_count += 1

        # Detect format from first file
        file_format = _detect_result_format(data)
        if detected_format is None:
            detected_format = file_format
            logger.info(f"Detected result format: {file_format}")

        if file_format == "training":
            _aggregate_training_results(data, aggregated, idx)
        elif file_format == "inference":
            _aggregate_inference_results(data, aggregated, idx)
        else:
            logger.warning(f"Unknown format in {filepath}, skipping")

    logger.info(f"Successfully loaded {loaded_count} of {len(result_files)} result files")
    return aggregated


def compute_statistics(aggregated: Dict[str, Dict[str, List[float]]]) -> Dict[str, Any]:
    """
    Compute statistics for each metric at each step.

    Returns:
        Dict with structure:
        {
            "metric_name": {
                "num_samples": N,
                "values": {
                    "step": {
                        "min": ...,
                        "max": ...,
                        "mean": ...,
                        "std": ...,
                        "samples": [...]  # original values
                    }
                }
            }
        }
    """
    stats: Dict[str, Any] = {}

    for metric_name, step_values in aggregated.items():
        # Determine number of samples (should be consistent across steps)
        # Skip internal keys used for median calculations
        regular_steps = {k: v for k, v in step_values.items() if not k.startswith("_")}
        sample_counts = [len(vals) for vals in regular_steps.values()]
        num_samples = max(sample_counts) if sample_counts else 0

        metric_stats = {"num_samples": num_samples, "values": {}}

        for step, values in regular_steps.items():
            if len(values) == 0:
                continue

            step_stats = {
                "min": min(values),
                "max": max(values),
                "mean": mean(values),
                "std": stdev(values) if len(values) > 1 else 0.0,
                "count": len(values),
            }

            # Include original samples for debugging
            step_stats["samples"] = values

            metric_stats["values"][step] = step_stats

        stats[metric_name] = metric_stats

    return stats


def compute_recommended_tolerances(
    stats: Dict[str, Any],
    aggregated: Dict[str, Dict[str, List[float]]],
    confidence_multiplier: float = 3.0,
    start_step: int = 1,
) -> Dict[str, Dict[str, float]]:
    """
    Compute recommended tolerances for each metric based on observed variance.

    For metrics that use median-based comparison in the test (iteration-time,
    mem-allocated-bytes, mem-max-allocated-bytes), computes variance of per-run
    medians rather than per-step variance.

    Args:
        stats: Output from compute_statistics()
        aggregated: Raw aggregated data (needed for median calculations)
        confidence_multiplier: Number of standard deviations for bounds (default 3.0 for ~99.7% coverage)
        start_step: First step to include in tolerance calculation (skips warmup steps)

    Returns:
        Dict mapping metric_name -> {
            "relative_tolerance": recommended relative tolerance,
            "absolute_tolerance": recommended absolute tolerance (for near-zero values),
            "max_observed_relative_variance": max(|value - mean| / |mean|) across all samples
        }
    """
    tolerances = {}

    # Metrics that use median-based comparison in the test (iteration-time)
    median_based_metrics = ['iteration-time']
    # Metrics that use max-based comparison in the test (memory)
    max_based_metrics = ['mem-allocated-bytes', 'mem-max-allocated-bytes']

    for metric_name, metric_data in stats.items():
        max_relative_variance = 0.0
        max_absolute_variance = 0.0
        steps_included = 0

        # For median-based metrics, compute variance of per-run medians
        if metric_name in median_based_metrics and metric_name in aggregated:
            run_medians = []

            # Find all run data keys
            for key in aggregated[metric_name].keys():
                if key.startswith("_all_values_run_"):
                    run_data = aggregated[metric_name][key]
                    # Use index-based slicing to match test behavior:
                    # [start_step:] skips the first `start_step` items
                    filtered_values = run_data[start_step:]

                    if filtered_values:
                        run_median = median(filtered_values)
                        run_medians.append(run_median)

            if run_medians:
                median_mean = mean(run_medians)

                # Compute relative variance of medians
                if abs(median_mean) > 1e-9:
                    for m in run_medians:
                        rel_var = abs(m - median_mean) / abs(median_mean)
                        max_relative_variance = max(max_relative_variance, rel_var)
                else:
                    for m in run_medians:
                        max_absolute_variance = max(max_absolute_variance, abs(m))

                steps_included = len(run_medians)

                logger.debug(
                    f"{metric_name}: computed variance from {len(run_medians)} run medians, "
                    f"mean={median_mean:.4f}, max_rel_var={max_relative_variance:.4%}"
                )

        # For max-based metrics (memory), compute variance of per-run max values
        elif metric_name in max_based_metrics and metric_name in aggregated:
            run_maxes = []

            # Find all run data keys
            for key in aggregated[metric_name].keys():
                if key.startswith("_all_values_run_"):
                    run_data = aggregated[metric_name][key]
                    # Skip first value (warmup), take max of rest
                    filtered_values = run_data[1:] if len(run_data) > 1 else run_data

                    if filtered_values:
                        run_max = max(filtered_values)
                        run_maxes.append(run_max)

            if run_maxes:
                max_mean = mean(run_maxes)

                # Compute relative variance of max values
                if abs(max_mean) > 1e-9:
                    for m in run_maxes:
                        rel_var = abs(m - max_mean) / abs(max_mean)
                        max_relative_variance = max(max_relative_variance, rel_var)
                else:
                    for m in run_maxes:
                        max_absolute_variance = max(max_absolute_variance, abs(m))

                steps_included = len(run_maxes)

                logger.debug(
                    f"{metric_name}: computed variance from {len(run_maxes)} run maxes, "
                    f"mean={max_mean:.4f}, max_rel_var={max_relative_variance:.4%}"
                )
        else:
            # Standard per-step variance calculation for other metrics
            for step, step_stats in metric_data["values"].items():
                # Skip warmup steps - try to parse step as int, skip if < start_step
                try:
                    step_num = int(step)
                    if step_num < start_step:
                        continue
                except (ValueError, TypeError):
                    # Non-numeric step key (e.g., "mean" for inference metrics) - include it
                    pass

                steps_included += 1
                mean_val = step_stats["mean"]

                # Compute observed relative variance
                if abs(mean_val) > 1e-9:
                    # For non-zero means, compute relative variance
                    for sample in step_stats["samples"]:
                        rel_var = abs(sample - mean_val) / abs(mean_val)
                        max_relative_variance = max(max_relative_variance, rel_var)
                else:
                    # For near-zero means, track absolute variance
                    for sample in step_stats["samples"]:
                        max_absolute_variance = max(max_absolute_variance, abs(sample))

        # Recommend tolerance with safety margin
        # Use observed variance * confidence_multiplier, with a minimum of 0.1%
        recommended_relative = max(max_relative_variance * confidence_multiplier, 0.001)

        # Round to reasonable precision
        recommended_relative = round(recommended_relative, 4)

        tolerances[metric_name] = {
            "relative_tolerance": recommended_relative,
            "absolute_tolerance": max(max_absolute_variance * confidence_multiplier, 1e-6),
            "max_observed_relative_variance": round(max_relative_variance, 6),
            "max_observed_absolute_variance": round(max_absolute_variance, 6),
            "steps_included": steps_included,
        }

    return tolerances


def format_summary(stats: Dict[str, Any], tolerances: Dict[str, Dict[str, float]]) -> str:
    """Format a human-readable summary of the statistics."""
    lines = []
    lines.append("=" * 70)
    lines.append("Golden Values Statistics Summary")
    lines.append("=" * 70)

    for metric_name in sorted(stats.keys()):
        metric_data = stats[metric_name]
        tol = tolerances.get(metric_name, {})

        lines.append(f"\n{metric_name}:")
        lines.append(f"  Samples: {metric_data['num_samples']}")
        lines.append(f"  Steps: {len(metric_data['values'])}")

        if tol:
            lines.append(
                f"  Max observed relative variance: {tol.get('max_observed_relative_variance', 'N/A'):.4%}"
            )
            lines.append(
                f"  Recommended relative tolerance: {tol.get('relative_tolerance', 'N/A'):.2%}"
            )
            lines.append(
                f"  Recommended absolute tolerance: {tol.get('absolute_tolerance', 'N/A'):.2e}"
            )

        # Show a few example steps
        values = metric_data["values"]
        example_steps = list(values.keys())[:3]
        if example_steps:
            lines.append("  Example steps:")
            for step in example_steps:
                s = values[step]
                lines.append(
                    f"    Step {step}: mean={s['mean']:.6g}, std={s['std']:.6g}, "
                    f"range=[{s['min']:.6g}, {s['max']:.6g}]"
                )

    lines.append("\n" + "=" * 70)
    return "\n".join(lines)


def main():
    parser = argparse.ArgumentParser(
        description="Compute statistical bounds for golden values from multiple test runs.",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=__doc__,
    )

    input_group = parser.add_mutually_exclusive_group(required=True)
    input_group.add_argument(
        "--results-dir",
        type=str,
        help="Directory containing batch test results (searches for JSON files)",
    )
    input_group.add_argument(
        "--result-files",
        type=str,
        nargs="+",
        help="Explicit list of result JSON files to aggregate",
    )

    parser.add_argument(
        "--output", "-o", type=str, required=True, help="Output path for statistics JSON file"
    )

    parser.add_argument(
        "--recommend-tolerances",
        action="store_true",
        help="Compute and display recommended tolerances based on observed variance",
    )

    parser.add_argument(
        "--confidence-multiplier",
        type=float,
        default=1.5,
        help="Multiplier for observed max variance when computing recommended tolerance. "
        "Example: if max observed variance is 5%% and multiplier is 1.5, recommended tolerance is 7.5%%. "
        "Use higher values (2-3) for more safety margin. Default: 1.5",
    )

    parser.add_argument(
        "--min-samples",
        type=int,
        default=2,
        help="Minimum number of samples required to compute statistics (default: 2)",
    )

    parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose output")

    parser.add_argument(
        "--workspace-root",
        type=str,
        default=None,
        help="Root of the megatron workspace (where runs/ directory is located). "
        "Defaults to current working directory.",
    )

    parser.add_argument(
        "--start-step",
        type=int,
        default=0,
        help="Number of initial steps to skip (index-based, matching test behavior). "
        "Uses Python slicing [start_step:] so --start-step 10 skips first 10 items. "
        "Default: 0 (include all). Set to match THROUGHPUT_TEST_PARAMS.--start_step from model_config.yaml.",
    )

    args = parser.parse_args()

    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    # Find or use result files
    if args.results_dir:
        result_files = find_result_json_files(args.results_dir, args.workspace_root)
        if not result_files:
            logger.error(f"No result JSON files found in {args.results_dir}")
            logger.info("Make sure the batch tests have completed and results are available.")
            logger.info(
                "The script looks for .out files and parses them to find the result JSON paths."
            )
            logger.info(
                "Each .out file should contain: 'This test wrote results into /opt/megatron-lm/runs/<uuid>'"
            )
            sys.exit(1)
        logger.info(f"Found {len(result_files)} result files from {args.results_dir}")
    else:
        result_files = args.result_files
        # Verify files exist
        for f in result_files:
            if not os.path.exists(f):
                logger.error(f"Result file not found: {f}")
                sys.exit(1)

    if args.verbose:
        for f in result_files:
            logger.debug(f"  - {f}")

    # Aggregate results
    aggregated = aggregate_results(result_files)

    if not aggregated:
        logger.error("No valid results found to aggregate")
        sys.exit(1)

    # Check minimum samples
    for metric_name, step_values in aggregated.items():
        for step, values in step_values.items():
            if len(values) < args.min_samples:
                logger.warning(
                    f"{metric_name} step {step}: only {len(values)} samples "
                    f"(minimum {args.min_samples} recommended)"
                )

    # Compute statistics
    stats = compute_statistics(aggregated)

    # Compute recommended tolerances (excluding warmup steps)
    if args.start_step > 1:
        logger.info(f"Excluding steps < {args.start_step} from tolerance calculation (warmup)")
    tolerances = compute_recommended_tolerances(
        stats, aggregated, args.confidence_multiplier, start_step=args.start_step
    )

    # Build output
    output = {
        "metadata": {
            "num_runs": len(result_files),
            "result_files": result_files,
            "confidence_multiplier": args.confidence_multiplier,
            "start_step": args.start_step,
        },
        "statistics": stats,
        "recommended_tolerances": tolerances,
    }

    # Write output
    output_path = Path(args.output)
    output_path.parent.mkdir(parents=True, exist_ok=True)

    with open(output_path, 'w') as f:
        json.dump(output, f, indent=2)

    logger.info(f"Statistics written to {args.output}")

    # Print summary
    if args.recommend_tolerances or args.verbose:
        print(format_summary(stats, tolerances))

        print("\nRecommended tolerance settings:")
        print("-" * 50)
        # Training test metrics
        training_metrics = [
            "lm-loss",
            "lm loss",
            "iteration-time",
            "mem-allocated-bytes",
            "mem-max-allocated-bytes",
        ]
        # Inference test metrics
        inference_metrics = [
            "latency",
            "step_count",
            "prompt_logprob_mean",
            "generated_logprob_mean",
        ]

        for metric_name in training_metrics + inference_metrics:
            if metric_name in tolerances:
                tol = tolerances[metric_name]
                var_name = metric_name.upper().replace('-', '_').replace(' ', '_')
                print(
                    f"{var_name}_RELATIVE_TOLERANCE = "
                    f"{tol['relative_tolerance']}  # {tol['relative_tolerance']:.2%}"
                )
                print(f"{var_name}_ABSOLUTE_TOLERANCE = " f"{tol['absolute_tolerance']:.2e}")


if __name__ == "__main__":
    main()


================================================
FILE: tests/functional_tests/python_test_utils/conftest.py
================================================
import pytest

from tests.functional_tests.python_test_utils import common


def pytest_addoption(parser):
    """
    Additional command-line arguments passed to pytest.
    """
    parser.addoption(
        "--allow-nondeterministic-algo",
        action="store_true",
        default=False,
        help="If set, test system checks for approximate results.",
    )
    parser.addoption("--golden-values-path", action="store", help="Path to golden values")
    parser.addoption("--actual-values-path", action="store", help="Path to actual values")
    parser.addoption("--actual-values-first-run-path", action="store", help="Path to actual values")
    parser.addoption(
        "--actual-values-second-run-path", action="store", help="Path to actual values"
    )
    parser.addoption("--scope", action="store", help="Test scope (MR, weekly, prerelease, release)")
    parser.addoption(
        "--train-iters", action="store", default=100, help="Number of train iters", type=int
    )
    parser.addoption("--test-values-path", action="store", help="Path to tensorboard records")
    parser.addoption("--tensorboard-path", action="store", help="Path to tensorboard records")
    parser.addoption("--model-config-path", action="store", help="Path to model_config.yaml")


@pytest.fixture
def compare_approximate_results(request) -> bool:
    """Simple fixture returning whether to check against results approximately."""
    return request.config.getoption("--allow-nondeterministic-algo") is True


@pytest.fixture
def golden_values_path(request):
    """Simple fixture returning golden values."""
    return request.config.getoption("--golden-values-path")


@pytest.fixture
def golden_values(request):
    """Simple fixture returning golden values."""
    return common.read_golden_values_from_json(request.config.getoption("--golden-values-path"))


@pytest.fixture
def actual_values(request):
    """Simple fixture returning golden values."""
    return common.read_golden_values_from_json(request.config.getoption("--actual-values-path"))


@pytest.fixture
def actual_values_first_run(request):
    """Simple fixture returning actual values."""
    return common.read_golden_values_from_json(
        request.config.getoption("--actual-values-first-run-path")
    )


@pytest.fixture
def actual_values_second_run(request):
    """Simple fixture returning actual values."""
    return common.read_golden_values_from_json(
        request.config.getoption("--actual-values-second-run-path")
    )


@pytest.fixture
def scope(request):
    """Simple fixture returning golden values."""
    return request.config.getoption("--scope")


@pytest.fixture
def train_iters(request):
    """Simple fixture returning number of train iters."""
    return request.config.getoption("--train-iters")


@pytest.fixture
def tensorboard_logs(request, train_iters):
    """Simple fixture returning tensorboard metrics."""
    return common.read_tb_logs_as_list(
        request.config.getoption("--tensorboard-path"), train_iters=train_iters
    )


@pytest.fixture
def test_values_path(request):
    return request.config.getoption("--test-values-path")


@pytest.fixture
def tensorboard_path(request):
    """Simple fixture returning path to tensorboard logs."""
    return request.config.getoption("--tensorboard-path")


@pytest.fixture
def model_config_path(request):
    """Simple fixture returning path to model_config.yaml."""
    return request.config.getoption("--model-config-path")


================================================
FILE: tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py
================================================
import os

os.environ["OPENBLAS_NUM_THREADS"] = "1"
import json
import logging

import click

from tests.functional_tests.python_test_utils import common

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


@click.command()
@click.option("--logs-dir", required=True, type=str, help="Path to Tensorboard logs")
@click.option("--train-iters", required=True, type=int, help="Number of train iters")
@click.option("--output-path", required=False, type=str, help="Path to write golden values")
@click.option(
    "--is-convergence-test/--is-normal-test",
    type=bool,
    help="Use first or all tensorboard logs",
    default=False,
)
@click.option(
    "--is-second-run/--is-not-second-run",
    type=bool,
    help="Use second run of tensorboard logs",
    default=False,
)
@click.option("--step-size", required=False, default=5, type=int, help="Step size of sampling")
def collect_train_test_metrics(
    logs_dir: str,
    train_iters: str,
    output_path: str,
    is_convergence_test: bool,
    is_second_run: bool,
    step_size: int,
):
    if is_convergence_test and is_second_run:
        raise ValueError("Convergence test cannot be run on second run of tensorboard logs")

    summaries = common.read_tb_logs_as_list(
        logs_dir,
        index=(-1 if is_convergence_test else (1 if is_second_run else 0)),
        train_iters=train_iters,
        start_idx=1,
        step_size=step_size,
    )

    if summaries is None:
        logger.warning("No tensorboard logs found, no golden values created.")
        return

    summaries = {
        golden_value_key: golden_value
        for (golden_value_key, golden_value) in summaries.items()
        if golden_value_key
        in [
            "iteration-time",
            "mem-allocated-bytes",
            "mem-max-allocated-bytes",
            "lm loss",
            "num-zeros",
            "mtp_1 loss",
        ]
    }

    if output_path is not None:
        with open(output_path, "w") as fh:
            json.dump(
                {
                    golden_value_key: golden_values.model_dump()
                    for golden_value_key, golden_values in summaries.items()
                },
                fh,
                indent=4,
            )


if __name__ == "__main__":
    collect_train_test_metrics()


================================================
FILE: tests/functional_tests/python_test_utils/test_grpo_training_loop.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import json
import logging
from statistics import median
from typing import Any, Dict, List, Tuple

import yaml

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Tolerance settings for all metrics.
# These tolerances account for hardware variance (different GPU silicon,
# driver versions, CUDA/cuDNN differences) while still catching real regressions.
# Tolerances can be tuned using compute_golden_statistics.py to analyze variance
# across multiple runs on different hardware.

# LM Loss tolerances
LM_LOSS_RELATIVE_TOLERANCE = 0.01  # 1% relative tolerance
LM_LOSS_ABSOLUTE_TOLERANCE = 1e-6  # For values near zero

# Iteration time tolerances (performance metric, higher variance expected)
ITERATION_TIME_RELATIVE_TOLERANCE = 0.15  # 15% relative tolerance

# Memory allocation tolerances
MEM_ALLOCATED_BYTES_RELATIVE_TOLERANCE = 0.10  # 10% relative tolerance
MEM_MAX_ALLOCATED_BYTES_RELATIVE_TOLERANCE = 0.10  # 10% relative tolerance


def validate_with_tolerance(
    golden_values: Dict[str, Any],
    current_values: Dict[str, Any],
    relative_tolerance: float,
    absolute_tolerance: float = 1e-9,
    metric_name: str = "metric",
) -> Tuple[bool, List[str]]:
    """
    Validate that current values are within tolerance of golden values.

    Args:
        golden_values: Dict mapping step -> expected value
        current_values: Dict mapping step -> actual value
        relative_tolerance: Maximum allowed relative difference (e.g., 0.01 for 1%)
        absolute_tolerance: Tolerance for values near zero
        metric_name: Name of metric for error messages

    Returns:
        Tuple of (passed: bool, mismatches: List[str])
    """
    mismatches = []

    for step, golden_val in golden_values.items():
        if step not in current_values:
            mismatches.append(f"Step {step}: missing in current run")
            continue

        current_val = current_values[step]

        # Handle the case where golden value is zero or near-zero
        if golden_val == 0 or abs(golden_val) < absolute_tolerance:
            if abs(current_val) > absolute_tolerance:
                mismatches.append(f"Step {step}: expected ~0, got {current_val}")
        else:
            # Calculate relative difference
            rel_diff = abs(current_val - golden_val) / abs(golden_val)
            if rel_diff > relative_tolerance:
                mismatches.append(
                    f"Step {step}: {current_val} differs from golden {golden_val} "
                    f"by {rel_diff:.4%} (tolerance: {relative_tolerance:.2%})"
                )

    # Check for extra steps in current that aren't in golden
    extra_steps = set(current_values.keys()) - set(golden_values.keys())
    if extra_steps:
        logger.info(f"{metric_name}: Ignoring extra steps in current run: {extra_steps}")

    return len(mismatches) == 0, mismatches


def test_grpo_training_loop(
    golden_values_path: str, test_values_path: str, model_config_path: str
) -> None:
    with open(model_config_path, 'r') as f:
        model_config = yaml.safe_load(f)
        metrics = model_config["METRICS"]
        if "ENV_VARS" in model_config and "THROUGHPUT_START_STEP" in model_config["ENV_VARS"]:
            start_step = model_config["ENV_VARS"]["THROUGHPUT_START_STEP"]
        else:
            start_step = 1

    with open(golden_values_path, 'r') as f1, open(test_values_path, 'r') as f2:
        golden_values_content = f1.read()
        tensorboard_content = f2.read()

    output_groundtruth = json.loads(golden_values_content)

    if isinstance(output_groundtruth, str):
        # Handle JSONL output, assume only one line in this case.
        output_groundtruth = json.loads(output_groundtruth)

    output_current = json.loads(tensorboard_content)
    if isinstance(output_current, str):
        # Handle JSONL output, assume only one line in this case.
        output_current = json.loads(output_current)

    # Allow current run to have extra metrics not in golden values
    # (only compare metrics defined in golden values)
    extra_in_current = set(output_current.keys()) - set(output_groundtruth.keys())
    if extra_in_current:
        logger.info(f"Ignoring extra metrics in current run: {extra_in_current}")

    assert set(output_groundtruth.keys()).issubset(
        set(output_current.keys())
    ), f"Some IDs from groundtruth are missing in current: {output_groundtruth.keys()} vs {output_current.keys()}"
    if set(output_groundtruth.keys()) != set(output_current.keys()):
        logger.warning(
            f"Some IDs from groundtruth are missing in output, only the subset of ids in groundtruth will be tested: {output_groundtruth.keys()} vs {output_current.keys()}"
        )
    assert len(output_groundtruth) > 0, "No test performed for output"

    if "iteration-time" in metrics and "iteration-time" in output_current:

        # First warmup iteration is excluded from iteration-time statistics.
        iteration_time_sampled = median(
            [l for l in output_current["iteration-time"]['values'].values()][start_step:]
        )
        iteration_time_golden = median(
            [l for l in output_groundtruth["iteration-time"]['values'].values()][start_step:]
        )

        lower_bound = (1 - ITERATION_TIME_RELATIVE_TOLERANCE) * iteration_time_golden
        upper_bound = (1 + ITERATION_TIME_RELATIVE_TOLERANCE) * iteration_time_golden
        assert lower_bound <= iteration_time_sampled <= upper_bound, (
            f"Iteration time {iteration_time_sampled} ms not within "
            f"{ITERATION_TIME_RELATIVE_TOLERANCE:.0%} of golden value ~{iteration_time_golden} ms. "
            f"Sampled: {output_current['iteration-time']} ms. "
            f"Please update golden values in the functional tests if this is expected."
        )

        output_groundtruth.pop('iteration-time')

    if "lm-loss" in metrics and "lm-loss" in output_current:

        # Validate lm-loss values with tolerance to account for hardware variance.
        # Previously required exact matching, but this caused flaky failures due to
        # floating-point differences across different GPU hardware.
        golden_lm_loss_values = output_groundtruth["lm-loss"]['values']
        current_lm_loss_values = output_current["lm-loss"]['values']

        passed, mismatches = validate_with_tolerance(
            golden_lm_loss_values,
            current_lm_loss_values,
            relative_tolerance=LM_LOSS_RELATIVE_TOLERANCE,
            absolute_tolerance=LM_LOSS_ABSOLUTE_TOLERANCE,
            metric_name="lm-loss",
        )

        if not passed:
            error_msg = (
                f"LM loss values outside tolerance ({LM_LOSS_RELATIVE_TOLERANCE:.1%}):\n"
                + "\n".join(f"  - {m}" for m in mismatches)
                + f"\n\nGolden: {golden_lm_loss_values}\n"
                + f"Current: {current_lm_loss_values}\n"
                + "Please update golden values in the functional tests if this is expected."
            )
            assert False, error_msg

        output_groundtruth.pop('lm-loss')

    if "mem-allocated-bytes" in metrics and "mem-allocated-bytes" in output_current:

        # Use max instead of median - we care about worst-case memory usage
        # Skip first step (warmup) which may have different memory characteristics
        current_values = [l for l in output_current["mem-allocated-bytes"]['values'].values()][1:]
        golden_values = [l for l in output_groundtruth["mem-allocated-bytes"]['values'].values()][
            1:
        ]

        mem_allocated_bytes_sampled = max(current_values)
        mem_allocated_bytes_golden = max(golden_values)

        upper_bound = (1 + MEM_ALLOCATED_BYTES_RELATIVE_TOLERANCE) * mem_allocated_bytes_golden
        assert mem_allocated_bytes_sampled <= upper_bound, (
            f"Max mem allocated bytes {mem_allocated_bytes_sampled} bytes exceeds "
            f"{MEM_ALLOCATED_BYTES_RELATIVE_TOLERANCE:.0%} above golden max {mem_allocated_bytes_golden} bytes. "
            f"Upper bound: {upper_bound} bytes. "
            f"Please update golden values in the functional tests if this is expected."
        )

        output_groundtruth.pop('mem-allocated-bytes')

    if "mem-max-allocated-bytes" in metrics and "mem-max-allocated-bytes" in output_current:

        # Use max - we care that peak memory doesn't exceed the golden peak
        # Skip first step (warmup) which may have different memory characteristics
        current_values = [l for l in output_current["mem-max-allocated-bytes"]['values'].values()][
            1:
        ]
        golden_values = [
            l for l in output_groundtruth["mem-max-allocated-bytes"]['values'].values()
        ][1:]

        mem_max_allocated_bytes_sampled = max(current_values)
        mem_max_allocated_bytes_golden = max(golden_values)

        upper_bound = (
            1 + MEM_MAX_ALLOCATED_BYTES_RELATIVE_TOLERANCE
        ) * mem_max_allocated_bytes_golden
        assert mem_max_allocated_bytes_sampled <= upper_bound, (
            f"Max mem-max-allocated bytes {mem_max_allocated_bytes_sampled} bytes exceeds "
            f"{MEM_MAX_ALLOCATED_BYTES_RELATIVE_TOLERANCE:.0%} above golden max {mem_max_allocated_bytes_golden} bytes. "
            f"Upper bound: {upper_bound} bytes. "
            f"Please update golden values in the functional tests if this is expected."
        )

        output_groundtruth.pop('mem-max-allocated-bytes')


================================================
FILE: tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import json
import logging
import math
import os
from statistics import median

import pytest
import yaml

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

_NON_REQUEST_TOP_LEVEL_KEYS = {
    # System-level metrics
    "throughput",
    "lifetime_prefill_token_count",
    # Peak memory metrics (added by inference scripts; optionally checked if present in golden values)
    "mem-max-allocated-bytes",
}


def _median_as_float(value):
    """Convert scalar or list metric to a single float (median).

    For list metrics (e.g., per-request throughput), treat the first element as
    warmup if length > 1, matching existing throughput behavior.
    """
    if isinstance(value, list):
        assert len(value) > 0, "Metric list is empty."
        values = [float(v) for v in value]
        if len(values) > 1:
            values = values[1:]
        return float(median(values))
    return float(value)


def _bytes_to_gib(num_bytes: float) -> float:
    return float(num_bytes) / (1024.0**3)


def test_inference_pipeline(
    golden_values_path: str, test_values_path: str, model_config_path: str
) -> None:
    if os.getenv("ENABLE_LIGHTWEIGHT_MODE") == "true":
        pytest.skip("Lightweight mode enabled. Skipping test.")

    with (
        open(golden_values_path, 'r') as f1,
        open(test_values_path, 'r') as f2,
        open(model_config_path, 'r') as f3,
    ):
        golden_values_content = f1.read()
        tensorboard_content = f2.read()
        model_config_content = f3.read()

    metrics = yaml.safe_load(model_config_content)["METRICS"]
    if not metrics:
        print("No metrics defined in model_config.yaml, skipping validation.")
        return

    output_groundtruth = json.loads(golden_values_content)

    if isinstance(output_groundtruth, str):
        # Handle JSONL output, assume only one line in this case.
        output_groundtruth = json.loads(output_groundtruth)

    output_current = json.loads(tensorboard_content)
    if isinstance(output_current, str):
        # Handle JSONL output, assume only one line in this case.
        output_current = json.loads(output_current)

    groundtruth_request_ids = set(output_groundtruth.keys()) - _NON_REQUEST_TOP_LEVEL_KEYS
    current_request_ids = set(output_current.keys()) - _NON_REQUEST_TOP_LEVEL_KEYS

    assert groundtruth_request_ids.issuperset(current_request_ids), (
        "Some request IDs from groundtruth are missing in current or current has unexpected IDs: "
        f"{sorted(groundtruth_request_ids)} vs {sorted(current_request_ids)}"
    )
    if groundtruth_request_ids != current_request_ids:
        logger.warning(
            "Some request IDs from groundtruth are missing in output; only the subset of ids in groundtruth will be tested: "
            f"{sorted(groundtruth_request_ids)} vs {sorted(current_request_ids)}"
        )
    assert len(output_groundtruth) > 0, "No test performed for output"

    # Throughput assertions.
    if "throughput" in output_groundtruth.keys():
        if "throughput" in metrics:
            # First warmup iteration is excluded from throughput statistics.
            throughput_sampled = median(output_current["throughput"][1:])
            throughput_golden = median(output_groundtruth["throughput"][1:])

            # 10% is empirically observed to be within hardware variance.
            assert (
                throughput_sampled >= 0.9 * throughput_golden
            ), f"Throughput is slower than expected! Expected to be within 10% of ~{throughput_golden} tok/s but benchmarked {output_current['throughput']} tok/s"

            # If throughput is significantly improved (> 20%), update golden values accordingly.
            assert (
                throughput_sampled < throughput_golden * 1.2
            ), f"Throughput has been improved from expected ~{throughput_golden} tok/s to {output_current['throughput']} tok/s. Please update golden values in the functional tests."

        output_groundtruth.pop('throughput')

    # Peak memory regression checks (optional: only if present in golden values).
    if "mem-max-allocated-bytes" in output_groundtruth:
        if "mem-max-allocated-bytes" in metrics:
            assert "mem-max-allocated-bytes" in output_current, (
                f"Golden values include mem-max-allocated-bytes but current output does not. "
                "Ensure the inference script records memory metrics to the output JSON."
            )
            sampled = _median_as_float(output_current["mem-max-allocated-bytes"])
            golden = _median_as_float(output_groundtruth["mem-max-allocated-bytes"])
            assert golden > 0, f"Golden mem_max_allocated_bytes must be > 0, got {golden}."

            low = 0.95 * golden
            high = 1.05 * golden

            if sampled < low:
                raise AssertionError(
                    f"Memory is too low for mem-max-allocated-bytes: "
                    f"expected within 5% of {golden:.0f} bytes ({_bytes_to_gib(golden):.3f} GiB) "
                    f"but got {sampled:.0f} bytes ({_bytes_to_gib(sampled):.3f} GiB). "
                    "This is >5% lower than expected; please update golden values in the functional tests."
                )
            if sampled > high:
                raise AssertionError(
                    f"Memory is too high for mem-max-allocated-bytes: "
                    f"expected within ±5% of {golden:.0f} bytes ({_bytes_to_gib(golden):.3f} GiB) "
                    f"but got {sampled:.0f} bytes ({_bytes_to_gib(sampled):.3f} GiB). "
                    "This is >5% higher than expected; this is likely a regression."
                )
        output_groundtruth.pop("mem-max-allocated-bytes")

    lptc_key = "lifetime_prefill_token_count"
    if lptc_key in output_groundtruth and lptc_key not in metrics:
        # metrics does not have lifetime_prefill_token_count, so ignore it
        output_groundtruth.pop(lptc_key)
    elif lptc_key in metrics:
        # Ground truth does not have lifetime_prefill_token_count, so ignore it
        metrics.pop(lptc_key)
    elif lptc_key in output_groundtruth and lptc_key in metrics:
        # TODO: Compare liftime_prefill_token_count to groundtruth
        pass

    for request_id, groundtruth_results in output_groundtruth.items():
        current_results = output_current[request_id]

        at_least_one_test_loop = False
        if "generated_tokens" in groundtruth_results and "generated_tokens" in metrics:
            at_least_one_test_loop = True
            tokens_groundtruth = groundtruth_results["generated_tokens"]
            tokens_current = current_results["generated_tokens"]
            # Check token equality
            assert (
                tokens_groundtruth == tokens_current
            ), f"Token mismatch:\nGround truth: {tokens_groundtruth}\nCurrent: {tokens_current}"

        if "logprobs" in groundtruth_results and "logprobs" in metrics:
            at_least_one_test_loop = True
            logprobs_groundtruth = groundtruth_results["logprobs"]
            logprobs_current = current_results["logprobs"]
            # Check logprobs length and tolerance
            assert len(logprobs_groundtruth) == len(
                logprobs_current
            ), f"Logprobs length mismatch: {len(logprobs_groundtruth)} vs {len(logprobs_current)}"

            for i, (lp1, lp2) in enumerate(zip(logprobs_groundtruth, logprobs_current)):
                assert math.isclose(
                    lp1, lp2, abs_tol=0.001
                ), f"Logprobs differ at index {i}: {lp1:.5f} vs {lp2:.5f}"

        if "generated_text" in groundtruth_results and "generated_text" in metrics:
            at_least_one_test_loop = True
            generated_text_groundtruth = groundtruth_results["generated_text"]
            generated_text_current = current_results["generated_text"]
            min_len = min(len(generated_text_groundtruth), len(generated_text_current))
            assert min_len > 0, (
                "Generated text mismatch:"
                f"\nGround truth: {generated_text_groundtruth}\nCurrent: {generated_text_current}"
            )
            assert generated_text_groundtruth[:min_len] == generated_text_current[:min_len], (
                "Generated text mismatch:"
                f"\nGround truth (truncated to {min_len} chars): {generated_text_groundtruth[:min_len]}"
                f"\nCurrent (truncated to {min_len} chars): {generated_text_current[:min_len]}"
            )

        if "routing_indices" in groundtruth_results and "routing_indices" in metrics:
            at_least_one_test_loop = True
            routing_indices_groundtruth = groundtruth_results["routing_indices"]
            routing_indices_current = current_results["routing_indices"]
            assert (
                routing_indices_groundtruth == routing_indices_current
            ), f"Routing indices mismatch:\nGround truth: {routing_indices_groundtruth}\nCurrent: {routing_indices_current}"

        if not at_least_one_test_loop:
            raise AssertionError(f"No test performed for output {groundtruth_results}")


================================================
FILE: tests/functional_tests/python_test_utils/test_optimizer_grads_match.py
================================================
import re
from argparse import ArgumentParser
from pathlib import Path
from typing import Dict, Iterable, Optional, Tuple, Union

import torch
from torch.distributed.checkpoint.filesystem import FileSystemReader
from torch.distributed.checkpoint.state_dict_loader import load

TensorLike = Union[torch.Tensor, Iterable[torch.Tensor]]


def _as_iter(x: TensorLike):
    return x if (isinstance(x, Iterable) and not isinstance(x, torch.Tensor)) else [x]


def _fro_norm(x: TensorLike) -> torch.Tensor:
    """Frobenius norm; supports sharded tensors (sum of shard ||·||_F^2)."""
    it = _as_iter(x)
    s = torch.tensor(0.0, device=next(iter(it)).device if it else "cpu")
    for t in it:
        s = s + t.float().pow(2).sum()
    return torch.sqrt(s)


def machine_epsilon_for_dtype(dtype: torch.dtype) -> float:
    """Return machine epsilon for dtype. For FP8, use BF16 epsilon per paper."""
    # Standard types
    if dtype in (torch.float32, torch.float16, torch.bfloat16):
        return float(torch.finfo(dtype).eps)
    # FP8 recipes: accum/store typically BF16/FP32; use BF16 epsilon
    if hasattr(torch, "float8_e4m3fn") and dtype in (
        torch.float8_e4m3fn,
        getattr(torch, "float8_e5m2fn", None),
    ):
        return float(torch.finfo(torch.bfloat16).eps)
    # Fallback
    return float(torch.finfo(torch.float32).eps)


def relative_grad_diff(g_hat: TensorLike, g_ref: TensorLike, eps_den: float = 1e-30) -> float:
    """
    Relative difference ||g_hat - g_ref||_F / ||g_ref||_F.
    Accepts a single tensor or an iterable of shards for each argument.
    """
    # If sharded, assume shards align 1:1; otherwise pass the merged tensors.
    gh_iter, gr_iter = _as_iter(g_hat), _as_iter(g_ref)
    if len(list(gh_iter)) != len(list(gr_iter)):
        # Re-materialize since we consumed generators above:
        gh_iter, gr_iter = _as_iter(g_hat), _as_iter(g_ref)
    num_sq = torch.tensor(0.0, device=next(iter(gh_iter)).device)
    for a, b in zip(_as_iter(g_hat), _as_iter(g_ref)):
        num_sq = num_sq + (a.float() - b.float()).pow(2).sum()
    num = torch.sqrt(num_sq)
    den = _fro_norm(g_ref)
    return float(num / (den + eps_den))


def expected_rel_bound(
    l: int,
    *,
    L: int = 32,
    C: float = 1.03,
    dtype: Optional[torch.dtype] = torch.bfloat16,
    k: float = 4.0,
) -> float:
    """
    Bound ~ k * (C ** (L + 1 - l)) * eps_mch, with 1-based layer index l.
    - L is hard-coded default to 32 per your request.
    - C is 'close to 1'; 1.01–1.05 are reasonable defaults.
    - k absorbs the hidden constant in big-O; 2–8 are common choices.
    - dtype controls eps_mch; for FP8 use BF16 epsilon (see https://www.arxiv.org/pdf/2506.09280 theorem 5.3).
    """
    eps_mch = machine_epsilon_for_dtype(dtype or torch.bfloat16)
    depth = L + 1 - l  # 1-based depth from the top (as in the theorem)
    depth = max(depth, 0)
    return float(k * (C**depth) * eps_mch)


def check_gradient(
    g_hat: TensorLike,
    g_ref: TensorLike,
    l: int,
    *,
    L: int = 32,
    C: float = 1.03,
    dtype: Optional[torch.dtype] = None,
    k: float = 4.0,
) -> Tuple[float, float, bool]:
    """
    Compute (rel_error, bound, ok) for layer l.
    - If dtype is None, infer from g_ref (or g_hat if needed).
    # See https://www.arxiv.org/pdf/2506.09280 theorem 5.3
    """
    # Infer dtype if not provided
    if dtype is None:
        t0 = next(iter(_as_iter(g_ref)))
        dtype = t0.dtype
    rel = relative_grad_diff(g_hat, g_ref)
    bnd = expected_rel_bound(l, L=L, C=C, dtype=dtype, k=k)
    return rel, bnd, (rel <= bnd)


def _filter_optimizer_tensors(plain_tensors: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
    """Return only optimizer-related tensors from a flat checkpoint tensor dict."""
    return {
        k: v for k, v in plain_tensors.items() if k.startswith("optimizer.") and ".exp_avg." in k
    }


def assert_grads_close(left: torch.Tensor, right: torch.Tensor):
    # Implement theorem 5.3 of https://www.arxiv.org/pdf/2506.09280

    # This is the real test:
    rel, bnd, ok = check_gradient(
        left, right, l=0, dtype=torch.bfloat16
    )  # hard code to layer 0 since that's the most permissive

    # If the real test above fails, run an assert close for the useful diagnostics and raise either way.
    if not ok:
        rel_shuff, _, ok_shuff = check_gradient(
            left, torch.roll(right, shifts=-1, dims=-1), l=0, dtype=torch.bfloat16
        )

        try:
            torch.testing.assert_close(left, right)
            msg = (
                "AssertionError on relative norm magnitude "
                f"(rel={rel}, bnd={bnd}, ok={ok}, rel_shuff={rel_shuff}, ok_shuff={ok_shuff}) "
                "but torch.testing.assert_close(left, right) passes. \n"
                f"Left: {left.shape}/{left.dtype} {left}\n"
                f"Right: {right.shape}/{right.dtype} {right}"
            )
        except AssertionError as e:
            msg = (
                "AssertionError on relative norm magnitude "
                f"(rel={rel}, bnd={bnd}, ok={ok}, rel_shuff={rel_shuff}, ok_shuff={ok_shuff}): {e}\n"
                f"Left: {left.shape}/{left.dtype} {left}\n"
                f"Right: {right.shape}/{right.dtype} {right}"
            )
        raise AssertionError(msg)


def unshard_row_parallel_state(saved_state, out_features, in_features, tp):
    # saved_state: [..., tp, out_features * (in_features // tp)]
    prefix = saved_state.shape[:-2]
    per = in_features // tp
    x = saved_state.view(*prefix, tp, out_features, per)  # [..., tp, O, I_shard]
    x = x.permute(*range(len(prefix)), -2, -3, -1)  # [..., O, tp, I_shard]
    x = x.reshape(*prefix, out_features, in_features)  # [..., O, I]
    return x


def _assert_optimizer_tensors_equal(
    left: Dict[str, torch.Tensor],
    right: Dict[str, torch.Tensor],
    left_empty: Dict[str, torch.Tensor],
    right_empty: Dict[str, torch.Tensor],
    eps=1e-4,
):
    left_keys = set(left.keys())
    right_keys = set(right.keys())

    only_left = sorted(left_keys - right_keys)
    only_right = sorted(right_keys - left_keys)
    assert (
        not only_left and not only_right
    ), f"Optimizer tensor keys mismatch.\nOnly in left: {only_left}\nOnly in right: {only_right}"
    some_non_zero = False
    assertions = []
    for key in sorted(left_keys):
        lt, rt = left[key], right[key]
        rt_colpar, rt_rowpar = None, None
        if lt.shape != rt.shape:
            # "Tensor shape mismatch for {key}: {lt.shape} vs {rt.shape}, trying simple reshape
            original_key = key.replace("optimizer.state.exp_avg.", "")
            # Unsharded shape
            # {'decoder.layers.self_attention.linear_proj.weight': torch.Size([32, 3072, 4096]), 'optimizer.state.exp_avg.decoder.layers.self_attention.linear_proj.weight': torch.Size([32, 1, 1, 12582912]), 'optimizer.state.exp_avg_sq.decoder.layers.self_attention.linear_proj.weight': torch.Size([32, 1, 1, 12582912]), 'optimizer.state.fp32_param.decoder.layers.self_attention.linear_proj.weight': torch.Size([32, 1, 1, 12582912])}
            # Sharded shape
            # {'decoder.layers.self_attention.linear_proj.weight': torch.Size([32, 3072, 4096]), 'optimizer.state.exp_avg.decoder.layers.self_attention.linear_proj.weight': torch.Size([32, 1, 2, 6291456]), 'optimizer.state.exp_avg_sq.decoder.layers.self_attention.linear_proj.weight': torch.Size([32, 1, 2, 6291456]), 'optimizer.state.fp32_param.decoder.layers.self_attention.linear_proj.weight': torch.Size([32, 1, 2, 6291456])}
            left_shape = left_empty[original_key].shape
            right_shape = right_empty[original_key].shape
            skip_tp_check = False

            if left_shape != right_shape:
                if "embedding.word_embeddings.weight" in key or ".output_layer.weight" in key:
                    # First handle different padding on the input/output dimensions.
                    lt = lt.reshape(left_shape)
                    rt = rt.reshape(right_shape)
                    min_dim = min(left_shape[0], right_shape[0])
                    lt = lt[:min_dim, ...]
                    rt = rt[:min_dim, ...]
                    left_shape = lt.shape
                    right_shape = rt.shape
                    skip_tp_check = True
                else:
                    raise AssertionError(
                        f"Tensor shape mismatch for {key}: {left_shape} vs {right_shape}"
                    )
            # problem: we do not know the TP axis for this tensor. We can guess though.
            if len(left_shape) == 3 and not skip_tp_check:
                # TP axis is 1
                lt = lt.reshape(left_shape[0], 1, left_shape[1], left_shape[2])
            elif len(left_shape) == 2 and not skip_tp_check:
                # TP axis is 2
                lt = lt.reshape(left_shape[0], 1, left_shape[1])

            if (
                key.endswith("mlp.linear_fc2.weight")
                or key.endswith("self_attention.linear_proj.weight")
            ) and not skip_tp_check:
                # Handle row parallel linear layers.
                # TODO come up with a better way to determine row parallel linear layers.
                rt = unshard_row_parallel_state(
                    rt, out_features=left_shape[1], in_features=left_shape[2], tp=rt.shape[2]
                )
            else:
                try:
                    rt = rt.reshape(lt.shape)
                except Exception as e:
                    msg = f"Tensor shape mismatch for {key}: {lt.shape} vs {rt.shape}, simple reshape failed: {e}"
                    if "embedding.word_embeddings.weight" in key or ".output_layer.weight" in key:
                        print(
                            f"FIXME: Skipping {key} because it's a word embedding or output layer,"
                            "and something about padding changes under TP."
                        )
                        continue
                    raise AssertionError(msg)

        assert (
            lt.shape == rt.shape and lt.dtype == rt.dtype
        ), f"Tensor meta mismatch for {key}: {lt.shape}/{lt.dtype} vs {rt.shape}/{rt.dtype}"
        # Reduce the rate of 0 vs near 0 rtol failures by adding a small epsilon
        left_scale = torch.max(torch.abs(lt))
        right_scale = torch.max(torch.abs(rt))
        if left_scale <= eps and right_scale <= eps:
            print(
                f"WARNING: zero-ish scale tensors ({left_scale=} vs {right_scale=}) "
                f"so they will trivially pass comparing {key=}"
            )
        else:
            some_non_zero = True
        try:
            assert_grads_close(lt, rt)
            print(f"Optimizer tensors match for {key}")
        except AssertionError as e:
            assertions.append(AssertionError(f"AssertionError for {key}: {e}"))
    assert not assertions, f"Assertion Errors found comparing keys: {assertions}"
    assert some_non_zero, "No non-zero tensors found in this comparison"


def load_dist_checkpoint_pt(
    ckpt_dir,
    metadata_ckpt_dir=None,
    pattern=r"optimizer",
    device="cpu",
    return_full_empty: bool = False,
):
    """Return {full_key: tensor} for every tensor whose key matches *pattern*."""
    meta_ckpt_dir = Path(metadata_ckpt_dir or ckpt_dir)
    meta_reader = FileSystemReader(str(meta_ckpt_dir))

    # --- fast metadata pass (no tensor data yet) -----------------------------
    meta = meta_reader.read_metadata()  # tiny JSON read
    tmeta = meta.state_dict_metadata  # key ➜ TensorMetadata
    if return_full_empty:
        wanted = [k for k in tmeta if hasattr(tmeta[k], "size")]
    else:
        wanted = [k for k in tmeta if re.search(pattern, k) and hasattr(tmeta[k], "size")]
    if not wanted:
        raise ValueError(f"No keys matching /{pattern}/ in {ckpt_dir}")

    # --- build "empty" placeholders -----------------------------------------
    placeholders = {
        k: torch.empty(tuple(tmeta[k].size), dtype=tmeta[k].properties.dtype, device=device)
        for k in wanted
    }
    if return_full_empty:
        return placeholders
    # --- stream just those tensors (no process-group needed) -----------------
    data_reader = FileSystemReader(str(ckpt_dir))

    load(
        state_dict=placeholders,
        storage_reader=data_reader,
        no_dist=True,  # switches off all collectives
    )
    return placeholders  # dict[str, Tensor]


def test_optimizer_states_match(checkpoint_dirs):
    """
    Compare optimizer state across provided torch_dist checkpoints:
    - Keys: ensure the set of optimizer tensor keys match across checkpoints
    - Values: ensure corresponding tensors are equal (allclose)
    - Structure (non-tensor common state): ensure common optimizer structures match
    """
    assert len(checkpoint_dirs) > 1, "This test requires 2 or more checkpoints <dir1> [<dir2> ...]."

    base_dir = checkpoint_dirs[0]

    # Compare optimizer tensors
    base_plain = load_dist_checkpoint_pt(base_dir)
    base_empty = load_dist_checkpoint_pt(base_dir, return_full_empty=True, device="meta")
    base_opt_tensors = _filter_optimizer_tensors(base_plain)
    assert base_opt_tensors, f"No optimizer tensors found in checkpoint: {base_dir}"
    assertions = []
    for other_dir in checkpoint_dirs[1:]:
        try:
            other_plain = load_dist_checkpoint_pt(other_dir)
            other_empty = load_dist_checkpoint_pt(other_dir, return_full_empty=True, device="meta")
            other_opt_tensors = _filter_optimizer_tensors(other_plain)
            assert other_opt_tensors, f"No optimizer tensors found in checkpoint: {other_dir}"
            _assert_optimizer_tensors_equal(
                base_opt_tensors, other_opt_tensors, base_empty, other_empty
            )
            print(f"Optimizer tensors match for {base_dir} and {other_dir}")
            del other_plain
            del other_opt_tensors
        except AssertionError as e:
            msg = f"AssertionError comparing {base_dir} to {other_dir}:\n{e}"
            print(f"Optimizer tensors mismatch for {base_dir} and {other_dir}:\n{msg}")
            assertions.append(AssertionError(msg))
    assert not assertions, f"AssertionErrors comparing {checkpoint_dirs}:\n{assertions}"


def main():
    parser = ArgumentParser(
        description="Given checkpoints saved with adam b1,b2=0 trained for one step, "
        "we can check that the gradients match under different training configurations. "
        "Currently this test script has some hard-coded assumptions for GPT style models, "
        "namely which layers are RowParallel and require different unsharding logic."
    )
    parser.add_argument(
        "checkpoints", nargs="+", type=Path, help="Path to the checkpoints to compare"
    )
    args = parser.parse_args()
    test_optimizer_states_match(args.checkpoints)


if __name__ == "__main__":
    main()


================================================
FILE: tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import logging
from typing import Dict, List, Optional

import yaml

from tests.functional_tests.python_test_utils import common

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

CHECK_THRESHOLDS = {
    "iteration-time": [common.ApproximateTest(atol=0, rtol=0.05)],
    "mem-allocated-bytes": [common.ApproximateTest(atol=0, rtol=0.05)],
    "mem-max-allocated-bytes": [common.ApproximateTest(atol=0, rtol=0.05)],
    "lm loss": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)],
    "mtp_1 loss": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)],
    "num-zeros": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)],
    "generated_tokens": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)],
    "logprobs": [common.DeterministicTest(), common.ApproximateTest(atol=0, rtol=0.05)],
}


def test_regular_pipeline(
    compare_approximate_results: bool,
    golden_values: Dict[str, common.GoldenValueMetric],
    actual_values: Dict[str, common.GoldenValueMetric],
    model_config_path: str,
    checks: Optional[Dict[str, List[common.Test]]] = None,
):
    if checks is None:
        with open(model_config_path) as f:
            model_config = yaml.safe_load(f)

        checks_types = (
            model_config["METRICS"] if "METRICS" in model_config else ["lm loss", "num-zeros"]
        )
        checks = {metric: CHECK_THRESHOLDS[metric] for metric in checks_types}

        if (
            len(
                missing_metrics := [
                    golden_metric
                    for golden_metric in checks.keys()
                    if golden_metric not in golden_values.keys()
                ]
            )
            > 0
        ):
            logger.error(
                f"The following metrics are required but not provided in golden values: {', '.join(missing_metrics)}"
            )
            assert False

    common.pipeline(
        compare_approximate_results=compare_approximate_results,
        golden_values=golden_values,
        actual_values=actual_values,
        checks=checks,
    )


================================================
FILE: tests/functional_tests/python_test_utils/test_pretraining_resume_checkpoint_pipeline.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

import logging
from typing import Dict

import yaml

from tests.functional_tests.python_test_utils import common, test_pretraining_regular_pipeline

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def test_resume_checkpoint_pipeline(
    compare_approximate_results: bool,
    actual_values_first_run: Dict[str, common.GoldenValueMetric],
    actual_values_second_run: Dict[str, common.GoldenValueMetric],
    train_iters: int,
    model_config_path: str,
):
    with open(model_config_path) as f:
        model_config = yaml.safe_load(f)

    checks_types = (
        model_config["METRICS"] if "METRICS" in model_config else ["lm loss", "num-zeros"]
    )
    checks = {
        metric: test_pretraining_regular_pipeline.CHECK_THRESHOLDS[metric]
        for metric in checks_types
    }

    if (
        len(
            missing_metrics := [
                golden_metric
                for golden_metric in checks.keys()
                if golden_metric not in actual_values_first_run.keys()
            ]
        )
        > 0
    ):
        logger.error(
            f"The following metrics are required but not logged during training: {', '.join(missing_metrics)}"
        )
        assert False

    # actual_values_second_run is NaN for the first 50 steps. We want to replace those
    # with the first 50 steps of actual_values_first_run

    actual_values_first_run = {
        metric_name: metric_values
        for (metric_name, metric_values) in actual_values_first_run.items()
        if metric_name in checks.keys()
    }

    actual_values_second_run = {
        metric_name: metric_values
        for (metric_name, metric_values) in actual_values_second_run.items()
        if metric_name in checks.keys()
    }

    for metric_name in checks.keys():
        actual_values_first_run[metric_name].start_step = train_iters // 2 + 1
        actual_values_first_run[metric_name].values = {
            k: v
            for k, v in actual_values_first_run[metric_name].values.items()
            if k > train_iters // 2
        }

        actual_values_second_run[metric_name].start_step = train_iters // 2 + 1
        actual_values_second_run[metric_name].values = {
            k: v
            for k, v in actual_values_second_run[metric_name].values.items()
            if k > train_iters // 2
        }

    logger.info(actual_values_first_run)
    logger.info(actual_values_second_run)

    test_pretraining_regular_pipeline.test_regular_pipeline(
        compare_approximate_results=compare_approximate_results,
        golden_values=actual_values_first_run,
        actual_values=actual_values_second_run,
        checks=checks,
        model_config_path=model_config_path,
    )


================================================
FILE: tests/functional_tests/shell_test_utils/_run_training.sh
================================================
#!/bin/bash

# This script can be used for model onboarding and testing.

# For onboarding, it extract scalars from Tensorboard logs only.
# For testing, it compares extracted Tensorboard scalars against
# a set of `GOLDEN_VALUES`.

set -euxo pipefail

set +x
for ARGUMENT in "$@"; do
    KEY=$(echo $ARGUMENT | cut -f1 -d=)

    KEY_LENGTH=${#KEY}
    VALUE="${ARGUMENT:$KEY_LENGTH+1}"

    export "$KEY"="$VALUE"
    echo "$KEY=$VALUE"
done
set -x

# Check that mandatory vars are set
MANDATORY_VARS=(
    "TRAINING_SCRIPT_PATH"
    "TRAINING_PARAMS_PATH"
    "OUTPUT_PATH"
    "TENSORBOARD_PATH"
    "CHECKPOINT_SAVE_PATH"
    "CHECKPOINT_LOAD_PATH"
    "DATA_PATH"
    "RUN_NUMBER"
    "REPEAT"
)
for mandatory_var in "${MANDATORY_VARS[@]}"; do
    if [[ -z "${!mandatory_var}" ]]; then
        echo 'Providing $'$mandatory_var' is mandatory.'
        exit 1
    fi
done

set +x
# Envsubst model_params
cat $TRAINING_PARAMS_PATH | envsubst "$(env | cut -d= -f1 | sed -e 's/^/$/')" >$TRAINING_PARAMS_PATH.tmp
TRAINING_PARAMS_PATH="$TRAINING_PARAMS_PATH.tmp"
set -x

# Pull env vars to export
ENV_VARS=$(/usr/local/bin/yq '... comments="" | .ENV_VARS | to_entries | .[] | [.key + "=" + .value] | join(" ")' "$TRAINING_PARAMS_PATH")
while IFS= read -r ARGUMENT; do
    KEY=$(echo $ARGUMENT | cut -f1 -d=)

    KEY_LENGTH=${#KEY}
    VALUE="${ARGUMENT:$KEY_LENGTH+1}"

    export "$KEY"="$VALUE"
    echo "$KEY=$VALUE"
done <<<"$ENV_VARS"

# Run before script
BEFORE_SCRIPT=$(cat "$TRAINING_PARAMS_PATH" | /usr/local/bin/yq '.BEFORE_SCRIPT')
if [[ "$BEFORE_SCRIPT" != null ]]; then
    eval "$BEFORE_SCRIPT"
fi

# Exit earlier to leave time for properly saving checkpoint
if [[ "$IS_NEMO_TEST" == "true" ]]; then
    PARAMS=()
    # Store the output in a variable first
    TRAINING_PARAMS_STR=$(/usr/local/bin/yq '... comments="" | .MODEL_ARGS | to_entries | .[] | with(select(.value == true); .value = "true") | .key + "=" + (select(.value != "") | .value | tostring)' "$TRAINING_PARAMS_PATH")
    # Build space-separated string while preserving quotes
    TRAINING_PARAMS_FROM_CONFIG=""
    while IFS= read -r line; do
        if [[ -n "$line" ]]; then
            # If value is "true", just use the key
            if [[ "$line" =~ =true$ ]]; then
                TRAINING_PARAMS_FROM_CONFIG+="${line%=true} "
            # If value contains spaces, wrap it in quotes
            elif [[ "$line" =~ .*=.*[[:space:]].* ]]; then
                key="${line%%=*}"
                value="${line#*=}"
                TRAINING_PARAMS_FROM_CONFIG+="$key=\"$value\" "
            else
                TRAINING_PARAMS_FROM_CONFIG+="$line "
            fi
        fi
    done <<<"$TRAINING_PARAMS_STR"
    # Remove trailing space
    TRAINING_PARAMS_FROM_CONFIG=${TRAINING_PARAMS_FROM_CONFIG% }
    # Split into array while preserving quotes
    eval "TRAINING_PARAMS_ARRAY=($TRAINING_PARAMS_FROM_CONFIG)"

else
    # If this is a second run (of checkpoint-resume), we might want to use a
    # different model configuration than during first time. So if key `MODEL_ARGS_2`
    # exists we use it, otherwise we use the same as for the first run.
    if [[ $RUN_NUMBER -gt 1 && $(/usr/local/bin/yq 'has("MODEL_ARGS_'$RUN_NUMBER'")' "$TRAINING_PARAMS_PATH") == true ]]; then
        export KEY="MODEL_ARGS_$RUN_NUMBER"
    else
        export KEY="MODEL_ARGS"
    fi

    # Store the output in a variable first
    TRAINING_PARAMS_STR=$(/usr/local/bin/yq 'explode(.) | ... comments="" | .[env(KEY)] | to_entries | .[] | with(select(.value == true); .value = "true") | .key + ": " + (select(.value != "") | .value | tostring)' "$TRAINING_PARAMS_PATH")
    # Build space-separated string while preserving quotes
    TRAINING_PARAMS_FROM_CONFIG=""
    while IFS= read -r line; do
        if [[ -n "$line" ]]; then

            key="${line%%:*}"
            value="${line#*: }"
            value="$(echo "$value" | xargs)" # trim whitespace
            # Case: true
            if [[ "$value" == "true" ]]; then
                TRAINING_PARAMS_FROM_CONFIG+="${key} "

            # Case: value is wrapped in ( )
            elif echo "$value" | grep -Eq '^\([^)]+\)$'; then
                TRAINING_PARAMS_FROM_CONFIG+="$key \"$value\" "

            # Case: value is wrapped in [ ]
            elif echo "$value" | grep -Eq '^\[[^]]+\]$'; then
                # Strip square brackets from value using sed
                value=$(echo "$value" | sed 's/^\[//;s/\]$//')
                TRAINING_PARAMS_FROM_CONFIG+="$key $value "

            # Case: contains spaces or shell metacharacters
            elif [[ "$value" == *" "* || "$value" == *"|"* || "$value" == *"("* || "$value" == *")"* ]]; then
                TRAINING_PARAMS_FROM_CONFIG+="$key \"$value\" "
            # Case: default
            else
                TRAINING_PARAMS_FROM_CONFIG+="$key $value "
            fi
        fi
    done <<<"$TRAINING_PARAMS_STR"
    # Remove trailing space
    TRAINING_PARAMS_FROM_CONFIG=${TRAINING_PARAMS_FROM_CONFIG% }
    # Split into array while preserving quotes
    eval "TRAINING_PARAMS_ARRAY=($TRAINING_PARAMS_FROM_CONFIG)"
    if [[ -n "${SLURM_JOB_END_TIME:-}" && -n "${SLURM_JOB_START_TIME:-}" ]]; then
        PARAMS=(
            "--exit-duration-in-mins"
            $((($SLURM_JOB_END_TIME - $SLURM_JOB_START_TIME) / 60 - 15))
        )
    fi
fi

# Extract training params
PARAMS=("${PARAMS[@]}" "${TRAINING_PARAMS_ARRAY[@]}")

# Set PYTHONPATH
export PYTHONPATH="$(pwd):${PYTHONPATH:-}"
export WANDB_API_KEY="${WANDB_API_KEY:-}"

######## Distributed training settings. ########
echo "------ARGUMENTS for SLURM ---"
MASTER_ADDR=${MASTER_ADDR:-localhost}
MASTER_PORT=${MASTER_PORT:-6000}
NUM_NODES=${NUM_NODES:-${SLURM_NNODES:-1}}
GPUS_PER_NODE=${GPUS_PER_NODE:-8}
NODE_RANK=${SLURM_NODEID:-${SLURM_NODEID:-0}}
LAST_RANK=$((GPUS_PER_NODE - 1)) 
export LOG_DIR=$OUTPUT_PATH/logs/$REPEAT
mkdir -p $LOG_DIR

DISTRIBUTED_ARGS=(
    --nproc_per_node $GPUS_PER_NODE
    --nnodes $NUM_NODES
    --master_addr $MASTER_ADDR
    --master_port $MASTER_PORT
    --node_rank $NODE_RANK
    --log-dir $LOG_DIR
    --tee "0:3,$LAST_RANK:3"
    --redirects "3"
)

# Start training
if [[ "$IS_NEMO_TEST" == "true" ]]; then
    uv run --no-sync python -m torch.distributed.run ${DISTRIBUTED_ARGS[@]} \
        --no-python /opt/venv/bin/$TRAINING_SCRIPT_PATH "${PARAMS[@]}" && EXIT_CODE=0 || EXIT_CODE=$?
else
    uv run --no-sync python -m torch.distributed.run ${DISTRIBUTED_ARGS[@]}  \
        $TRAINING_SCRIPT_PATH "${PARAMS[@]}" && EXIT_CODE=0 || EXIT_CODE=$?
fi

# Run after script
AFTER_SCRIPT=$(cat "$TRAINING_PARAMS_PATH" | /usr/local/bin/yq '.AFTER_SCRIPT')
if [[ "$AFTER_SCRIPT" != null ]]; then
    eval "$AFTER_SCRIPT"
fi 

# Set permissions
chmod -R g+w $OUTPUT_PATH

if [[ ${RECORD_CHECKPOINTS} == "true" ]]; then
    echo "Suppressing errors during checkpoint recording."
    exit 0
fi

exit ${EXIT_CODE:-0}


================================================
FILE: tests/functional_tests/shell_test_utils/run_batch_ci_tests.sh
================================================
#!/bin/bash
#
# Script to submit batch jobs to run test scripts across different compute nodes
#
# Usage:
#   ./run_batch_ci_tests.sh <test_script> [num_jobs] [partition]
#
# Arguments:
#   test_script  - Path to test script in test_cases/ (required)
#   num_jobs     - Number of jobs to submit (default: 10)
#   partition    - Slurm partition to use (default: interactive)
#
# Examples:
#   ./run_batch_ci_tests.sh test_cases/moe/gpt_grpo_tp4tp2_pp1_ep4ep2_dp8_throughputtest.sh
#   ./run_batch_ci_tests.sh test_cases/gpt/gpt3_mcore_te_tp2_pp2.sh 5
#   ./run_batch_ci_tests.sh test_cases/bert/bert_mcore_tp2_pp2.sh 10 batch_block1
#
# To list available test scripts:
#   ./run_batch_ci_tests.sh --list
#   ./run_batch_ci_tests.sh --list moe      # List only moe tests
#   ./run_batch_ci_tests.sh --list gpt      # List only gpt tests
#

set -e

# Function to list available test scripts
list_tests() {
    local filter="${1:-}"
    echo "Available test scripts in test_cases/:"
    echo
    if [ -n "$filter" ]; then
        # List tests in specific subdirectory
        if [ -d "test_cases/$filter" ]; then
            find "test_cases/$filter" -name "*.sh" -type f | sort
        else
            echo "No test_cases/$filter directory found."
            echo "Available subdirectories:"
            ls -d test_cases/*/ 2>/dev/null | sed 's|test_cases/||g; s|/||g' | xargs -I {} echo "  {}"
            exit 1
        fi
    else
        # List all tests grouped by subdirectory
        for dir in test_cases/*/; do
            if [ -d "$dir" ]; then
                subdir=$(basename "$dir")
                echo "=== $subdir ==="
                find "$dir" -name "*.sh" -type f | sort | sed 's|^|  |'
                echo
            fi
        done
    fi
    exit 0
}

# Handle --list option
if [ "${1:-}" = "--list" ]; then
    list_tests "${2:-}"
fi

# Configuration (same as start_ci_interactive.sh)
export DATASET_DIR=/lustre/fsw/portfolios/coreai/projects/coreai_dlalgo_mcore/mcore_ci
export TGT_IMAGE=gitlab-master.nvidia.com/adlr/megatron-lm/mcore_ci_dev:main
export ACCOUNT=llmservice_fm_text

# The test script to run inside the container (first argument, required)
TEST_SCRIPT="${1:-}"

if [ -z "$TEST_SCRIPT" ]; then
    echo "ERROR: Test script path is required"
    echo
    echo "Usage: $0 <test_script> [num_jobs] [partition]"
    echo
    echo "Run '$0 --list' to see available test scripts"
    exit 1
fi

# Number of jobs to submit (second argument, default 10)
NUM_JOBS=${2:-10}

# Partition (third argument, default to same as interactive - change if needed)
# Common batch partition names: batch, batch_block1, dgx_batch, etc.
export PARTITION=${3:-interactive}

# Verify test script exists
if [ ! -f "$TEST_SCRIPT" ]; then
    echo "ERROR: Test script not found: $TEST_SCRIPT"
    echo "Make sure you run this from the megatron-rl directory"
    echo
    echo "Run '$0 --list' to see available test scripts"
    exit 1
fi

# Extract test name from script path for job naming
# e.g., "test_cases/moe/gpt_grpo_tp4tp2_pp1_ep4ep2_dp8_throughputtest.sh" -> "gpt_grpo_tp4tp2_pp1_ep4ep2_dp8_throughputtest"
TEST_NAME=$(basename "$TEST_SCRIPT" .sh)

# Output directory for logs (include test name for clarity)
LOG_DIR="$(pwd)/batch_test_logs_${TEST_NAME}_$(date +%Y%m%d_%H%M%S)"
mkdir -p "$LOG_DIR"

# Container mounts
CONTAINER_MOUNTS="$DATASET_DIR:/mnt/artifacts,$(pwd):/opt/megatron-lm"

echo "============================================="
echo "Batch CI Test Submission"
echo "============================================="
echo "Test Script:  $TEST_SCRIPT"
echo "Test Name:    $TEST_NAME"
echo "Partition:    $PARTITION"
echo "Account:      $ACCOUNT"
echo "Image:        $TGT_IMAGE"
echo "Dataset Dir:  $DATASET_DIR"
echo "Num Jobs:     $NUM_JOBS"
echo "Log Dir:      $LOG_DIR"
echo "============================================="
echo

# Submit jobs
# Truncate test name if too long for job name (max ~64 chars typically)
SHORT_TEST_NAME="${TEST_NAME:0:50}"

for i in $(seq 1 $NUM_JOBS); do
    JOB_NAME="${SHORT_TEST_NAME}_run_${i}"
    
    sbatch \
        --job-name="$JOB_NAME" \
        --partition="$PARTITION" \
        --account="$ACCOUNT" \
        --nodes=1 \
        --gpus-per-task=8 \
        --time=1:00:00 \
        --exclusive \
        --output="$LOG_DIR/${JOB_NAME}_%j.out" \
        --error="$LOG_DIR/${JOB_NAME}_%j.err" \
        --export=ALL \
        --wrap="srun \
            --container-image=$TGT_IMAGE \
            --container-workdir=/opt/megatron-lm \
            --container-mounts=$CONTAINER_MOUNTS \
            --no-container-mount-home \
            bash -c 'cd /opt/megatron-lm && time bash $TEST_SCRIPT'"
    
    echo "Submitted job $i: $JOB_NAME"
done

echo
echo "============================================="
echo "All $NUM_JOBS jobs submitted!"
echo "Monitor with: squeue -u \$USER"
echo "Logs will be written to: $LOG_DIR"
echo "============================================="

# Create a helper script to check results
cat > "$LOG_DIR/check_results.sh" << 'CHECKEOF'
#!/bin/bash
# Check the results of all batch test runs

LOG_DIR="$(dirname "$0")"
echo "Checking results in: $LOG_DIR"
echo

total=0
passed=0
failed=0
pending=0

# Match any .out file that ends with _run_N_JOBID.out pattern
for outfile in "$LOG_DIR"/*_run_*.out; do
    if [ -f "$outfile" ]; then
        total=$((total + 1))
        jobname=$(basename "$outfile" .out)
        
        # Check if file is empty (job still running or not started)
        if [ ! -s "$outfile" ]; then
            echo "PENDING: $jobname (no output yet)"
            pending=$((pending + 1))
            continue
        fi
        
        # Check for success: look for "This test wrote results into" which indicates completion
        if grep -q "This test wrote results into" "$outfile" 2>/dev/null; then
            # Check for errors/failures
            if grep -Ei "FAILED|AssertionError|Exception:|Traceback" "$outfile" 2>/dev/null | grep -v "grep" > /dev/null; then
                echo "FAILED:  $jobname"
                failed=$((failed + 1))
            else
                # Extract timing info
                timing=$(grep -E "^real\s" "$outfile" 2>/dev/null | head -1 || echo "")
                echo "PASSED:  $jobname $timing"
                passed=$((passed + 1))
            fi
        else
            # Job might still be running or crashed early
            if grep -qi "error\|failed\|exception\|traceback" "$outfile" 2>/dev/null; then
                echo "FAILED:  $jobname (error in output)"
                failed=$((failed + 1))
            else
                echo "RUNNING: $jobname (incomplete output)"
                pending=$((pending + 1))
            fi
        fi
    fi
done

echo
echo "============================================="
echo "Summary:"
echo "  Passed:  $passed"
echo "  Failed:  $failed"
echo "  Pending: $pending"
echo "  Total:   $total"
echo "============================================="

if [ $failed -gt 0 ]; then
    exit 1
elif [ $pending -gt 0 ]; then
    exit 2
else
    exit 0
fi
CHECKEOF
chmod +x "$LOG_DIR/check_results.sh"

# Create a script to show node info for each job
cat > "$LOG_DIR/show_nodes.sh" << 'NODEEOF'
#!/bin/bash
# Show which node each job ran on

LOG_DIR="$(dirname "$0")"
echo "Node assignments for batch tests:"
echo

# Match any .out file that ends with _run_N_JOBID.out pattern
for outfile in "$LOG_DIR"/*_run_*.out; do
    if [ -f "$outfile" ]; then
        jobname=$(basename "$outfile" .out)
        jobid=$(echo "$outfile" | grep -oP '\d+(?=\.out)')
        
        # Try to get node from sacct or from output file
        node=$(sacct -j "$jobid" --format=NodeList --noheader 2>/dev/null | head -1 | tr -d ' ')
        if [ -z "$node" ]; then
            node="unknown"
        fi
        
        echo "$jobname (job $jobid): $node"
    fi
done
NODEEOF
chmod +x "$LOG_DIR/show_nodes.sh"

echo "After jobs complete:"
echo "  - Run '$LOG_DIR/check_results.sh' to check results"
echo "  - Run '$LOG_DIR/show_nodes.sh' to see which nodes were used"
echo
echo "To run other tests, use: $0 --list to see available test scripts"


================================================
FILE: tests/functional_tests/shell_test_utils/run_ci_test.sh
================================================
#!/bin/bash

set -exo pipefail

# Increase soft limit for number of open files to match hard limit
ulimit -Sn $(ulimit -Hn)

# Increase soft limit for number of processes to match hard limit
ulimit -Su $(ulimit -Hu)

# Set umask to 0002 to allow group read/write permissions
umask 0002

set +x
for ARGUMENT in "$@"; do
    # Split on first = only, preserving any subsequent = signs in the value
    KEY="${ARGUMENT%%=*}"
    VALUE="${ARGUMENT#*=}"

    # Remove any surrounding quotes from the value if they exist
    VALUE="${VALUE%\"}"
    VALUE="${VALUE#\"}"
    VALUE="${VALUE%\'}"
    VALUE="${VALUE#\'}"

    # Properly quote the value to preserve spaces and special characters
    export "$KEY"="$(eval echo $VALUE)"
    echo "$KEY=$VALUE"
done
set -x

# Check that mandatory vars are set
MANDATORY_VARS=(
    "TRAINING_SCRIPT_PATH"
    "TRAINING_PARAMS_PATH"
    "GOLDEN_VALUES_PATH"
    "OUTPUT_PATH"
    "TENSORBOARD_PATH"
    "CHECKPOINT_SAVE_PATH"
    "CHECKPOINT_LOAD_PATH"
    "DATA_PATH"
    "DATA_CACHE_PATH"
    "ENABLE_LIGHTWEIGHT_MODE"
)
for mandatory_var in "${MANDATORY_VARS[@]}"; do
    if [[ -z "${!mandatory_var}" ]]; then
        echo 'Providing $'$mandatory_var' is mandatory.'
        exit 1
    fi
done

set -exo pipefail

# Extract settings from params file
TEST_TYPE=$(cat $TRAINING_PARAMS_PATH |
    /usr/local/bin/yq '.TEST_TYPE')
ENABLE_LIGHTWEIGHT_MODE=$(cat $TRAINING_PARAMS_PATH |
    /usr/local/bin/yq '.ENV_VARS.ENABLE_LIGHTWEIGHT_MODE // "false"')
N_REPEAT=$(cat $TRAINING_PARAMS_PATH |
    /usr/local/bin/yq '.ENV_VARS.N_REPEAT // "'$N_REPEAT'"')
MODE=$(cat $TRAINING_PARAMS_PATH |
    /usr/local/bin/yq '.MODE // "pretraining"')

MODES=("pretraining" "inference")
TEST_TYPES=("regular" "ckpt-resume" "frozen-resume" "frozen-start" "checkpoint-consistency" "release")

if [[ "$TEST_TYPE" == "release" ]]; then
    export ONE_LOGGER_JOB_CATEGORY=production
else
    export ONE_LOGGER_JOB_CATEGORY=test
fi

mkdir -p $CHECKPOINT_SAVE_PATH
mkdir -p $CHECKPOINT_LOAD_PATH || true
_CHECKPOINT_LOAD_PATH=$CHECKPOINT_LOAD_PATH
_CHECKPOINT_SAVE_PATH=$CHECKPOINT_SAVE_PATH
_TENSORBOARD_PATH=$TENSORBOARD_PATH

SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
ROOT_DIR=$(realpath $SCRIPT_DIR/../../../)

IS_NEMO_TEST=$([[ $(echo "$TRAINING_SCRIPT_PATH" | tr '[:upper:]' '[:lower:]') == *nemo* ]] && echo "true" || echo "false")
export IS_NEMO_TEST

# Adjust model_config for lightweight mode
if [[ "$MODE" == "pretraining" && "$TEST_TYPE" != "release" ]]; then
    if [[ "$ENABLE_LIGHTWEIGHT_MODE" == "true" && "$IS_NEMO_TEST" == "true" ]]; then
        /usr/local/bin/yq -i '.MODEL_ARGS."trainer.max_steps" = 2' $TRAINING_PARAMS_PATH
        TRAIN_ITERS=$(cat $TRAINING_PARAMS_PATH |
            /usr/local/bin/yq '.MODEL_ARGS."trainer.max_steps // "100"')

        N_REPEAT=1

    elif [[ "$ENABLE_LIGHTWEIGHT_MODE" == "true" && "$IS_NEMO_TEST" == "false" ]]; then
        /usr/local/bin/yq -i '.ENV_VARS."SKIP_PYTEST" = 1' $TRAINING_PARAMS_PATH
        /usr/local/bin/yq -i '.MODEL_ARGS."--exit-interval" = 4' $TRAINING_PARAMS_PATH
        TRAIN_ITERS=$(cat $TRAINING_PARAMS_PATH |
            /usr/local/bin/yq '.MODEL_ARGS."--exit-interval" // "100"')
        N_REPEAT=1

        if [[ "$TEST_TYPE" == "ckpt-resume" || "$TEST_TYPE" == "frozen-resume" ]]; then
            /usr/local/bin/yq -i '.MODEL_ARGS."--save-interval" = 2' $TRAINING_PARAMS_PATH
        fi

    elif [[ "$ENABLE_LIGHTWEIGHT_MODE" == "false" && "$IS_NEMO_TEST" == "true" ]]; then
        TRAIN_ITERS=$(cat $TRAINING_PARAMS_PATH |
            /usr/local/bin/yq '.MODEL_ARGS."trainer.max_steps" // "100"')

    elif [[ "$ENABLE_LIGHTWEIGHT_MODE" == "false" && "$IS_NEMO_TEST" == "false" ]]; then
        /usr/local/bin/yq -i '.MODEL_ARGS."--exit-interval" = .MODEL_ARGS."--train-iters"' $TRAINING_PARAMS_PATH
        TRAIN_ITERS=$(cat $TRAINING_PARAMS_PATH |
            /usr/local/bin/yq '.MODEL_ARGS."--exit-interval" // "100"')
    fi
elif [[ "$MODE" == "inference" && "$TEST_TYPE" != "release" ]]; then
    if [[ "$ENABLE_LIGHTWEIGHT_MODE" == "true" && "$IS_NEMO_TEST" == "false" ]]; then
        /usr/local/bin/yq -i '.ENV_VARS."SKIP_PYTEST" = 1' $TRAINING_PARAMS_PATH
    fi
fi

if [[ "$MODE" == "pretraining" && "$TEST_TYPE" = "release" ]]; then
    TRAIN_ITERS=$(cat $TRAINING_PARAMS_PATH |
        /usr/local/bin/yq '.MODEL_ARGS."--exit-interval" // "100"')
fi

# Extract settings from params file
NVTE_ALLOW_NONDETERMINISTIC_ALGO=$(cat $TRAINING_PARAMS_PATH |
    /usr/local/bin/yq '.ENV_VARS.NVTE_ALLOW_NONDETERMINISTIC_ALGO')
NON_DETERMINSTIC_RESULTS=$(cat $TRAINING_PARAMS_PATH |
    /usr/local/bin/yq '.ENV_VARS.NON_DETERMINSTIC_RESULTS // "0"')
SKIP_PYTEST=$(cat $TRAINING_PARAMS_PATH |
    /usr/local/bin/yq '.ENV_VARS.SKIP_PYTEST')

export RECORD_CHECKPOINTS=${RECORD_CHECKPOINTS:-"false"}

NODE_RANK=${SLURM_NODEID:-${SLURM_NODEID:-0}}

for i in $(seq 1 $N_REPEAT); do
    # Move TB logs into a repeat-specific directory
    DIR=$(dirname "$_TENSORBOARD_PATH")
    FILE=$(basename "$_TENSORBOARD_PATH")
    export TENSORBOARD_PATH=$DIR/$i/$FILE
    mkdir -p $(dirname $TENSORBOARD_PATH)

    if [[ $i -gt 1 ]]; then
        rm -rf $CHECKPOINT_SAVE_PATH/* || true
        rm -rf /tmp/checkpoints/* || true   
        rm -rf $TENSORBOARD_PATH/* || true
    fi

    # First run never loads from a checkpoint
    export RUN_NUMBER=1
    export REPEAT=$i
    export CHECKPOINT_SAVE_PATH=$_CHECKPOINT_SAVE_PATH
    export TRAINING_EXIT_CODE=0
    declare -a ITER_CHECKPOINT_DIRS=()  # for the grad-test check if we're doing it

    if [[ "$TEST_TYPE" = "frozen-start" || "$TEST_TYPE" = "checkpoint-consistency" ]]; then
        export CHECKPOINT_LOAD_PATH=$_CHECKPOINT_LOAD_PATH
    else
        export CHECKPOINT_LOAD_PATH=/tmp/checkpoints/
    fi

    if [[ "$TEST_TYPE" = "release" ]]; then
        export CHECKPOINT_LOAD_PATH=$_CHECKPOINT_LOAD_PATH
        export CHECKPOINT_SAVE_PATH=$_CHECKPOINT_SAVE_PATH
    fi

    if [[ "$TEST_TYPE" = "checkpoint-consistency" ]]; then
        ## Loop over the list of model configs in the params file and run each one in sequence, collecting
        #  the checkpoints. Assume that we do a single step for this test.

        # 1. Loop over the runs in the params file
        # Get all MODEL_ARGS keys from the params file
        mapfile -t MODEL_ARGS_KEYS < <(/usr/local/bin/yq 'keys | .[] | select(test("^MODEL_ARGS(_\\d+)?$"))' "$TRAINING_PARAMS_PATH")
        

        # For-loop over the keys
        for KEY in "${MODEL_ARGS_KEYS[@]}"; do
            [[ -z "$KEY" ]] && continue

            if [[ "$KEY" =~ ^MODEL_ARGS_([0-9]+)$ ]]; then
                export LOOP_RN="${BASH_REMATCH[1]}"
            elif [[ "$KEY" == "MODEL_ARGS" ]]; then
                export LOOP_RN=1
            else
                echo "Unexpected KEY: $KEY" >&2; exit 1
            fi
            export RUN_NUMBER=$LOOP_RN

            # Get the number of GPUs from this run. Do not export this so it clashes with the other runs.
            N_GPUS=$(cat $TRAINING_PARAMS_PATH |
                /usr/local/bin/yq '.MODEL_ENV_VARS.'$KEY'.GPUS_PER_NODE')
            echo "Running $KEY with RUN_NUMBER=$RUN_NUMBER and GPUS_PER_NODE=$N_GPUS"
            
            ITER_CHECKPOINT_SAVE_PATH="$_CHECKPOINT_SAVE_PATH/repeat_${REPEAT}_key_${KEY}"
            mkdir -p $ITER_CHECKPOINT_SAVE_PATH

            # Save a checkpoint for this run
            GPUS_PER_NODE=$N_GPUS KEY=$KEY CHECKPOINT_SAVE_PATH=$ITER_CHECKPOINT_SAVE_PATH \
            bash $ROOT_DIR/tests/functional_tests/shell_test_utils/_run_training.sh || TRAINING_EXIT_CODE=$?

            # TODO find out the final iter and put that at the end rather than hardcoding 1
            ITER_CHECKPOINT_DIRS+=("$ITER_CHECKPOINT_SAVE_PATH/iter_0000001")
        done
    else
        # The standard single-run test that otherwise runs
        bash $ROOT_DIR/tests/functional_tests/shell_test_utils/_run_training.sh || TRAINING_EXIT_CODE=$?
    fi

    if [[ "$TEST_TYPE" = "frozen-resume" && -z "$(ls -A "$_CHECKPOINT_LOAD_PATH" 2>/dev/null)" ]]; then
        echo "No frozen checkpoint found. Will skip second run."

        export CHECKPOINT_SAVE_PATH=$_CHECKPOINT_SAVE_PATH
        if [[ $NODE_RANK -eq 0 ]]; then
            rm -rf "$CHECKPOINT_SAVE_PATH/iter_0000$TRAIN_ITERS"
        fi
        echo $((TRAIN_ITERS / 2)) >$CHECKPOINT_SAVE_PATH/latest_checkpointed_iteration.txt
        break
    fi

    if [[ "$TEST_TYPE" == "ckpt-resume" && "$TRAINING_EXIT_CODE" -eq 0 ]]; then
        export CHECKPOINT_LOAD_PATH=$CHECKPOINT_SAVE_PATH
        if [[ $NODE_RANK -eq 0 ]]; then
            rm -rf "$CHECKPOINT_LOAD_PATH/iter_$(printf "%07d\n" "$TRAIN_ITERS")"
        fi
        echo $((TRAIN_ITERS / 2)) >$CHECKPOINT_LOAD_PATH/latest_checkpointed_iteration.txt

        export RUN_NUMBER=2
        bash $ROOT_DIR/tests/functional_tests/shell_test_utils/_run_training.sh || TRAINING_EXIT_CODE=$?
    fi

    if [[ "$TEST_TYPE" == "frozen-resume" && "$TRAINING_EXIT_CODE" -eq 0 ]]; then

        # Checkpoint-resume tests load from prev run
        export CHECKPOINT_LOAD_PATH=$_CHECKPOINT_LOAD_PATH
        export CHECKPOINT_SAVE_PATH=/tmp/checkpoints/

        export RUN_NUMBER=2
        bash $ROOT_DIR/tests/functional_tests/shell_test_utils/_run_training.sh || TRAINING_EXIT_CODE=$?

        export CHECKPOINT_SAVE_PATH=$_CHECKPOINT_SAVE_PATH
        if [[ $NODE_RANK -eq 0 ]]; then
            rm -rf "$CHECKPOINT_SAVE_PATH/iter_0000$TRAIN_ITERS"
        fi
        echo $((TRAIN_ITERS / 2)) >$CHECKPOINT_SAVE_PATH/latest_checkpointed_iteration.txt
    fi

    if [[ ${RECORD_CHECKPOINTS} == "true" ]]; then
        echo "Skipping Pytest during checkpoint recording."
        SKIP_PYTEST=1
    fi

    if [[ ${SKIP_PYTEST:-0} != 1 || "$TEST_TYPE" == "release" ]]; then
        # Save run results
        export PYTHONPATH=$ROOT_DIR
        if [[ "$TEST_TYPE" == "release" ]]; then
            EXTRACT_ARGS=("--is-convergence-test")
        else
            EXTRACT_ARGS=("--is-normal-test" "--step-size" "1")
        fi

        # Read test values from Tensorboard for non-inference tests.
        # Inference tests will load from JSON instead.
        if [[ "$MODE" == "pretraining" ]]; then
            uv run --no-sync python $ROOT_DIR/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py \
                --logs-dir $TENSORBOARD_PATH \
                --train-iters $TRAIN_ITERS \
                --output-path ${OUTPUT_PATH}/$(basename $GOLDEN_VALUES_PATH) \
                "${EXTRACT_ARGS[@]}"
        fi
    fi

    # Maybe run tests
    if [[ ${SKIP_PYTEST:-0} == 1 ]]; then
        echo Skipping Pytest checks.
        exit ${TRAINING_EXIT_CODE}
    fi

    if [[ ! " ${TEST_TYPES[*]} " =~ " ${TEST_TYPE} " ]]; then
        echo "Test type $TEST_TYPE not yet implemented."
    fi

    if [[ ! " ${MODES[*]} " =~ " ${MODE} " ]]; then
        echo "Mode $MODE not yet implemented."
    fi

    export NVTE_ALLOW_NONDETERMINISTIC_ALGO
    if [[ "${NVTE_ALLOW_NONDETERMINISTIC_ALGO}" == "1" || "${NON_DETERMINSTIC_RESULTS}" == "1" ]]; then
        ALLOW_NONDETERMINISTIC_ALGO_ARG="--allow-nondeterministic-algo"
    fi

    if [[ "$SLURM_NODEID" -eq 0 ]]; then
        echo "Running pytest checks against golden values"

        # For pretraining jobs
        if [[ "$MODE" == "pretraining" && ("$TRAINING_EXIT_CODE" -eq 0 || "$TEST_TYPE" == "release") ]]; then
            if [[ "$TEST_TYPE" == "checkpoint-consistency" ]]; then
                echo "Running checkpoint consistency check"
                uv run --no-sync python $ROOT_DIR/tests/functional_tests/python_test_utils/test_optimizer_grads_match.py "${ITER_CHECKPOINT_DIRS[@]}"
            else
                uv run --no-sync pytest -s -o log_cli=true --log-cli-level=info $ROOT_DIR/tests/functional_tests/python_test_utils/test_pretraining_regular_pipeline.py \
                    --golden-values-path $GOLDEN_VALUES_PATH \
                    --actual-values-path ${OUTPUT_PATH}/$(basename $GOLDEN_VALUES_PATH) \
                    --train-iters $TRAIN_ITERS \
                    --model-config-path ${TRAINING_PARAMS_PATH} \
                    $ALLOW_NONDETERMINISTIC_ALGO_ARG

                if [[ "$TEST_TYPE" == "ckpt-resume" || "$TEST_TYPE" == "frozen-resume" ]]; then
                    uv run --no-sync python $ROOT_DIR/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py \
                        --logs-dir $TENSORBOARD_PATH \
                        --train-iters $TRAIN_ITERS \
                        --output-path "${OUTPUT_PATH}/$(basename $GOLDEN_VALUES_PATH .json)_2nd.json" \
                        --is-second-run \
                        "${EXTRACT_ARGS[@]}"
                            
                    echo "Running pytest 1st vs 2nd run comparison"
                    uv run --no-sync pytest -s -o log_cli=true --log-cli-level=info $ROOT_DIR/tests/functional_tests/python_test_utils/test_pretraining_resume_checkpoint_pipeline.py \
                        --actual-values-first-run-path ${OUTPUT_PATH}/$(basename $GOLDEN_VALUES_PATH) \
                        --actual-values-second-run-path "${OUTPUT_PATH}/$(basename $GOLDEN_VALUES_PATH .json)_2nd.json" \
                        --train-iters $TRAIN_ITERS \
                        --model-config-path ${TRAINING_PARAMS_PATH} \
                        $ALLOW_NONDETERMINISTIC_ALGO_ARG
                fi
            fi
        fi

        # For inference jobs
        if [[ "$MODE" == "inference" && ("$TRAINING_EXIT_CODE" -eq 0 || "$TEST_TYPE" == "release") ]]; then
            if [[ "$TEST_TYPE" == "frozen-start" ]]; then
                uv run --no-sync pytest -s -o log_cli=true --log-cli-level=info $ROOT_DIR/tests/functional_tests/python_test_utils/test_inference_regular_pipeline.py \
                    --golden-values-path $GOLDEN_VALUES_PATH \
                    --test-values-path $INFERENCE_OUTPUT_PATH \
                    --model-config-path ${TRAINING_PARAMS_PATH} \
                    $ALLOW_NONDETERMINISTIC_ALGO_ARG
            fi
        fi

        # For rl jobs
        if [[ "$MODE" == "rl" && ("$TRAINING_EXIT_CODE" -eq 0 || "$TEST_TYPE" == "release") ]]; then
            if [[ "$TEST_TYPE" == "frozen-start" ]]; then
                TRAIN_ITERS=$(cat $TRAINING_PARAMS_PATH |
                    /usr/local/bin/yq '.MODEL_ARGS."--exit-interval" // "50"')
                uv run --no-sync python $ROOT_DIR/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py \
                    --logs-dir $TENSORBOARD_PATH \
                    --train-iters $TRAIN_ITERS \
                    --output-path ${OUTPUT_PATH}/$(basename $GOLDEN_VALUES_PATH) \
                    "${EXTRACT_ARGS[@]}"
                uv run --no-sync pytest -s -o log_cli=true --log-cli-level=info $ROOT_DIR/tests/functional_tests/python_test_utils/test_grpo_training_loop.py \
                    --golden-values-path $GOLDEN_VALUES_PATH \
                    --test-values-path ${OUTPUT_PATH}/$(basename $GOLDEN_VALUES_PATH) \
                    --model-config-path ${TRAINING_PARAMS_PATH} \
                    $ALLOW_NONDETERMINISTIC_ALGO_ARG
            fi
        fi

        # Abort if training failed
        if [[ "$TRAINING_EXIT_CODE" -ne 0 && "$TEST_TYPE" != "release" ]]; then
            echo "Training failed. Aborting."
            exit 1
        fi
    fi
done


================================================
FILE: tests/functional_tests/shell_test_utils/start_interactive_job.sh
================================================
#!/bin/bash

# Function to print usage
print_usage() {
    echo "Usage: $0 [OPTIONS]"
    echo
    echo "Required options:"
    echo "  --partition PARTITION    Slurm partition"
    echo "  --slurm-account ACCOUNT  Slurm account/PPP"
    echo "  --image IMAGE           Container image"
    echo "  --dataset-dir DIR       Dataset root directory"
    echo
    echo "Optional options:"
    echo "  --time TIME             Job time limit (default: 1:00:00)"
    echo
    echo "Example:"
    echo "  $0 --partition dgx --slurm-account nvidia --image nvcr.io/nvidia/pytorch:23.10-py3 --dataset-dir /datasets"
    echo "  $0 --partition dgx --slurm-account nvidia --image nvcr.io/nvidia/pytorch:23.10-py3 --dataset-dir /datasets --time 2:00:00"
}

# Initialize variables
PARTITION=""
SLURM_ACCOUNT=""
IMAGE=""
DATASET_DIR=""
TIME="1:00:00"
RECIPES_DIR="tests/test_utils/recipes"
CONTAINER_MOUNTS=""
NO_GPUS_PER_TASK="FALSE"

# Declare associative array for tracking unique mounts
declare -A seen_mounts

# Parse command line arguments
while [[ $# -gt 0 ]]; do
    case $1 in
    --partition)
        PARTITION="$2"
        shift 2
        ;;
    --slurm-account)
        SLURM_ACCOUNT="$2"
        shift 2
        ;;
    --image)
        IMAGE="$2"
        shift 2
        ;;
    --dataset-dir)
        DATASET_DIR="$2"
        shift 2
        ;;
    --time)
        TIME="$2"
        shift 2
        ;;
    --no-gpus-per-task)
        NO_GPUS_PER_TASK="TRUE"
        shift 1
        ;;
    --help)
        print_usage
        exit 0
        ;;
    *)
        echo "Error: Unknown option '$1'"
        print_usage
        exit 1
        ;;
    esac
done

# Validate required arguments
if [ -z "$PARTITION" ] || [ -z "$SLURM_ACCOUNT" ] || [ -z "$IMAGE" ] || [ -z "$DATASET_DIR" ]; then
    echo "Error: Missing required arguments"
    print_usage
    exit 1
fi

# Add current directory to container mounts
CONTAINER_MOUNTS="$DATASET_DIR:/mnt/artifacts,$(pwd):/opt/megatron-lm"

# Build the final srun command
SRUN_CMD="srun \
    --partition=$PARTITION \
    --account=$SLURM_ACCOUNT \
    --container-image=$IMAGE \
    --container-workdir=/opt/megatron-lm \
    --container-mounts=$CONTAINER_MOUNTS \
    --no-container-mount-home \
    --nodes=1 \
    $(if [ "$NO_GPUS_PER_TASK" = "FALSE" ]; then echo "--gpus-per-task=8"; fi) \
    --time=$TIME \
    --pty bash"

printf "Generated srun command with all container mounts:\n\n"
echo "$SRUN_CMD"
echo
read -p "Execute this command? (y/n): " response
if [[ "$response" =~ ^[Yy]$ ]]; then
    echo "Executing command..."
    eval "$SRUN_CMD"
else
    echo "Command not executed."
fi


================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.49226, "5": 10.49833, "10": 10.49375, "15": 10.48886, "20": 10.46612, "25": 10.39219, "30": 10.20812, "35": 10.06926, "40": 9.93854, "45": 9.75472, "50": 9.6868}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2098.0, "5": 2869.0, "10": 2322.0, "15": 2605.0, "20": 2299.0, "25": 2583.0, "30": 2637.0, "35": 3051.0, "40": 1841.0, "45": 3921.0, "50": 3392.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3375511040.0, "5": 3375511040.0, "10": 3375511040.0, "15": 3375511040.0, "20": 3375511040.0, "25": 3375511040.0, "30": 3375511040.0, "35": 3375511040.0, "40": 3375511040.0, "45": 3375511040.0, "50": 3375511040.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4165166080.0, "5": 5631605760.0, "10": 5631605760.0, "15": 5631605760.0, "20": 5631605760.0, "25": 5631605760.0, "30": 5631605760.0, "35": 5631605760.0, "40": 5631605760.0, "45": 5631605760.0, "50": 5631605760.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 7.75804, "5": 0.68618, "10": 0.68574, "15": 0.71793, "20": 0.79578, "25": 0.68652, "30": 0.69897, "35": 0.68192, "40": 0.69111, "45": 0.68688, "50": 0.79338}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.60669,
            "2": 10.59353,
            "3": 10.60584,
            "4": 10.59649,
            "5": 10.60864,
            "6": 10.59356,
            "7": 10.59007,
            "8": 10.59863,
            "9": 10.59751,
            "10": 10.58406,
            "11": 10.5871,
            "12": 10.5868,
            "13": 10.58798,
            "14": 10.584,
            "15": 10.59476,
            "16": 10.57583,
            "17": 10.56453,
            "18": 10.57247,
            "19": 10.57124,
            "20": 10.5692,
            "21": 10.55506,
            "22": 10.50706,
            "23": 10.49008,
            "24": 10.48057,
            "25": 10.46669,
            "26": 10.47293,
            "27": 10.46421,
            "28": 10.45935,
            "29": 10.41081,
            "30": 10.31751,
            "31": 10.27443,
            "32": 10.23404,
            "33": 10.24168,
            "34": 10.18728,
            "35": 10.21275,
            "36": 10.17155,
            "37": 10.15185,
            "38": 10.12989,
            "39": 10.09684,
            "40": 10.05497,
            "41": 9.99663,
            "42": 9.94055,
            "43": 9.92416,
            "44": 9.8772,
            "45": 9.85131,
            "46": 9.79438,
            "47": 9.77933,
            "48": 9.7565,
            "49": 9.79733,
            "50": 9.7547
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 2234.0,
            "2": 2602.0,
            "3": 2388.0,
            "4": 2437.0,
            "5": 2787.0,
            "6": 2606.0,
            "7": 1584.0,
            "8": 2304.0,
            "9": 2266.0,
            "10": 2268.0,
            "11": 2359.0,
            "12": 2624.0,
            "13": 2299.0,
            "14": 2110.0,
            "15": 2760.0,
            "16": 2401.0,
            "17": 2626.0,
            "18": 2654.0,
            "19": 2723.0,
            "20": 2332.0,
            "21": 2381.0,
            "22": 2719.0,
            "23": 2332.0,
            "24": 2495.0,
            "25": 2429.0,
            "26": 2493.0,
            "27": 2682.0,
            "28": 2441.0,
            "29": 2649.0,
            "30": 2567.0,
            "31": 3215.0,
            "32": 3027.0,
            "33": 2690.0,
            "34": 2936.0,
            "35": 3006.0,
            "36": 3090.0,
            "37": 3011.0,
            "38": 2635.0,
            "39": 2474.0,
            "40": 2176.0,
            "41": 3476.0,
            "42": 3588.0,
            "43": 3497.0,
            "44": 3612.0,
            "45": 3666.0,
            "46": 2814.0,
            "47": 2046.0,
            "48": 3135.0,
            "49": 3297.0,
            "50": 3666.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3434522112.0,
            "2": 3434522112.0,
            "3": 3434522112.0,
            "4": 3434522112.0,
            "5": 3434522112.0,
            "6": 3435308544.0,
            "7": 3435308544.0,
            "8": 3435308544.0,
            "9": 3434522112.0,
            "10": 3435308544.0,
            "11": 3435308544.0,
            "12": 3435308544.0,
            "13": 3435308544.0,
            "14": 3435308544.0,
            "15": 3435308544.0,
            "16": 3435308544.0,
            "17": 3435308544.0,
            "18": 3434522112.0,
            "19": 3435308544.0,
            "20": 3435308544.0,
            "21": 3434522112.0,
            "22": 3434522112.0,
            "23": 3435308544.0,
            "24": 3435308544.0,
            "25": 3435308544.0,
            "26": 3435308544.0,
            "27": 3435308544.0,
            "28": 3435308544.0,
            "29": 3434522112.0,
            "30": 3434522112.0,
            "31": 3435308544.0,
            "32": 3435308544.0,
            "33": 3434522112.0,
            "34": 3435308544.0,
            "35": 3434522112.0,
            "36": 3435308544.0,
            "37": 3435308544.0,
            "38": 3435308544.0,
            "39": 3435308544.0,
            "40": 3434522112.0,
            "41": 3435308544.0,
            "42": 3434522112.0,
            "43": 3434522112.0,
            "44": 3435308544.0,
            "45": 3435308544.0,
            "46": 3435308544.0,
            "47": 3435308544.0,
            "48": 3434522112.0,
            "49": 3435308544.0,
            "50": 3434522112.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 4230456320.0,
            "2": 5709228032.0,
            "3": 5709228032.0,
            "4": 5709228032.0,
            "5": 5709229056.0,
            "6": 5709229056.0,
            "7": 5709229056.0,
            "8": 5709229056.0,
            "9": 5709229056.0,
            "10": 5709229056.0,
            "11": 5709229056.0,
            "12": 5709229056.0,
            "13": 5709229056.0,
            "14": 5709229056.0,
            "15": 5709229056.0,
            "16": 5709229056.0,
            "17": 5709229056.0,
            "18": 5709229056.0,
            "19": 5709229056.0,
            "20": 5709229056.0,
            "21": 5709229056.0,
            "22": 5709229056.0,
            "23": 5709229056.0,
            "24": 5709229056.0,
            "25": 5709229056.0,
            "26": 5709229056.0,
            "27": 5709229056.0,
            "28": 5709229056.0,
            "29": 5709229056.0,
            "30": 5709229056.0,
            "31": 5709229056.0,
            "32": 5709229056.0,
            "33": 5709229056.0,
            "34": 5709229056.0,
            "35": 5709229056.0,
            "36": 5709229056.0,
            "37": 5709229056.0,
            "38": 5709229056.0,
            "39": 5709229056.0,
            "40": 5709229056.0,
            "41": 5709229056.0,
            "42": 5709229056.0,
            "43": 5709229056.0,
            "44": 5709229056.0,
            "45": 5709229056.0,
            "46": 5709229056.0,
            "47": 5709229056.0,
            "48": 5709229056.0,
            "49": 5709229056.0,
            "50": 5709229056.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 6.3648,
            "3": 0.50773,
            "4": 0.48828,
            "5": 0.46858,
            "6": 0.46849,
            "7": 0.46721,
            "8": 0.47128,
            "9": 0.46839,
            "10": 0.46898,
            "11": 0.46864,
            "12": 0.47069,
            "13": 0.46803,
            "14": 0.47103,
            "15": 0.46722,
            "16": 0.46802,
            "17": 0.4671,
            "18": 0.44126,
            "19": 0.44637,
            "20": 0.44305,
            "21": 0.44231,
            "22": 0.45492,
            "23": 0.45474,
            "24": 0.46736,
            "25": 0.45949,
            "26": 0.46093,
            "27": 0.47387,
            "28": 0.45804,
            "29": 0.46041,
            "30": 0.45964,
            "31": 0.45448,
            "32": 0.4596,
            "33": 0.46132,
            "34": 0.4511,
            "35": 0.44276,
            "36": 0.44321,
            "37": 0.44297,
            "38": 0.44333,
            "39": 0.443,
            "40": 0.78942,
            "41": 0.44803,
            "42": 0.82322,
            "43": 0.72677,
            "44": 0.9334,
            "45": 1.01619,
            "46": 0.44666,
            "47": 0.88907,
            "48": 0.44404,
            "49": 0.77042,
            "50": 0.88736
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.49405, "5": 10.49933, "10": 10.49631, "15": 10.4873, "20": 10.46572, "25": 10.39496, "30": 10.2104, "35": 10.07333, "40": 9.94011, "45": 9.75651, "50": 9.69025}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2018.0, "5": 2740.0, "10": 2260.0, "15": 2649.0, "20": 2205.0, "25": 2675.0, "30": 2687.0, "35": 2930.0, "40": 1853.0, "45": 4016.0, "50": 2978.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3375511040.0, "5": 3375511040.0, "10": 3375511040.0, "15": 3375511040.0, "20": 3375511040.0, "25": 3375511040.0, "30": 3375511040.0, "35": 3375511040.0, "40": 3375511040.0, "45": 3375511040.0, "50": 3375511040.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4153629696.0, "5": 5620069376.0, "10": 5620069376.0, "15": 5620069376.0, "20": 5620069376.0, "25": 5620069376.0, "30": 5620069376.0, "35": 5620069376.0, "40": 5620069376.0, "45": 5620069376.0, "50": 5620069376.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 14.9519, "5": 0.61548, "10": 0.60778, "15": 0.60342, "20": 0.59844, "25": 0.60331, "30": 0.60426, "35": 0.59982, "40": 0.59928, "45": 0.80076, "50": 0.64239}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 24
  --hidden-size: 1024
  --num-attention-heads: 16
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 128
  --seq-length: 512
  --max-position-embeddings: 512
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 990000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.0001
  --min-lr: 0.00001
  --lr-warmup-fraction: 0.01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 2
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --ckpt-format: torch
  --attention-backend: unfused
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.46352, "5": 10.45321, "10": 10.4481, "15": 10.45891, "20": 10.41677, "25": 10.34598, "30": 10.1814, "35": 10.03992, "40": 9.90206, "45": 9.74954, "50": 9.66818}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2485.0, "5": 2824.0, "10": 2427.0, "15": 2767.0, "20": 2412.0, "25": 2691.0, "30": 2807.0, "35": 3077.0, "40": 2363.0, "45": 3744.0, "50": 3526.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2032164352.0, "5": 2032164352.0, "10": 2032164352.0, "15": 2032164352.0, "20": 2032164352.0, "25": 2032164352.0, "30": 2032164352.0, "35": 2032164352.0, "40": 2032164352.0, "45": 2032164352.0, "50": 2032164352.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4360259072.0, "5": 5220507136.0, "10": 5220507136.0, "15": 5220507136.0, "20": 5220507136.0, "25": 5220507136.0, "30": 5220507136.0, "35": 5220507136.0, "40": 5220507136.0, "45": 5220507136.0, "50": 5220507136.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 12.15656, "5": 0.90105, "10": 0.87495, "15": 0.87775, "20": 0.99829, "25": 0.90462, "30": 0.89264, "35": 0.90859, "40": 1.22654, "45": 0.98086, "50": 0.99661}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.50372,
            "2": 10.50518,
            "3": 10.50375,
            "4": 10.5122,
            "5": 10.49605,
            "6": 10.50635,
            "7": 10.50842,
            "8": 10.49712,
            "9": 10.50322,
            "10": 10.5124,
            "11": 10.51046,
            "12": 10.50236,
            "13": 10.50045,
            "14": 10.49454,
            "15": 10.49733,
            "16": 10.46631,
            "17": 10.47165,
            "18": 10.48801,
            "19": 10.47165,
            "20": 10.47134,
            "21": 10.45835,
            "22": 10.42035,
            "23": 10.41806,
            "24": 10.41512,
            "25": 10.37515,
            "26": 10.38894,
            "27": 10.35288,
            "28": 10.37449,
            "29": 10.32003,
            "30": 10.22366,
            "31": 10.17914,
            "32": 10.1594,
            "33": 10.16322,
            "34": 10.12997,
            "35": 10.10282,
            "36": 10.08198,
            "37": 10.07429,
            "38": 10.08263,
            "39": 10.02137,
            "40": 9.97876,
            "41": 9.93118,
            "42": 9.87334,
            "43": 9.87304,
            "44": 9.83579,
            "45": 9.81241,
            "46": 9.74637,
            "47": 9.73201,
            "48": 9.71622,
            "49": 9.76511,
            "50": 9.72575
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 2517.0,
            "2": 1916.0,
            "3": 1789.0,
            "4": 2431.0,
            "5": 2093.0,
            "6": 1896.0,
            "7": 2326.0,
            "8": 2342.0,
            "9": 2220.0,
            "10": 2183.0,
            "11": 2757.0,
            "12": 2022.0,
            "13": 2145.0,
            "14": 2737.0,
            "15": 2215.0,
            "16": 2683.0,
            "17": 2675.0,
            "18": 2661.0,
            "19": 2489.0,
            "20": 2625.0,
            "21": 2081.0,
            "22": 2656.0,
            "23": 2660.0,
            "24": 2796.0,
            "25": 2638.0,
            "26": 2650.0,
            "27": 2770.0,
            "28": 2834.0,
            "29": 2491.0,
            "30": 2958.0,
            "31": 2507.0,
            "32": 2724.0,
            "33": 2451.0,
            "34": 2602.0,
            "35": 2690.0,
            "36": 3159.0,
            "37": 3123.0,
            "38": 2775.0,
            "39": 2712.0,
            "40": 3571.0,
            "41": 1909.0,
            "42": 1523.0,
            "43": 1765.0,
            "44": 3233.0,
            "45": 3818.0,
            "46": 3184.0,
            "47": 2878.0,
            "48": 3078.0,
            "49": 2752.0,
            "50": 2196.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 2091169280.0,
            "2": 2091169280.0,
            "3": 2091169280.0,
            "4": 2091169280.0,
            "5": 2091169280.0,
            "6": 2091169280.0,
            "7": 2091169280.0,
            "8": 2091169280.0,
            "9": 2091169280.0,
            "10": 2091169280.0,
            "11": 2091169280.0,
            "12": 2091169280.0,
            "13": 2091169280.0,
            "14": 2091169280.0,
            "15": 2091169280.0,
            "16": 2091169280.0,
            "17": 2091169280.0,
            "18": 2091169280.0,
            "19": 2091169280.0,
            "20": 2091169280.0,
            "21": 2091169280.0,
            "22": 2091169280.0,
            "23": 2091169280.0,
            "24": 2091169280.0,
            "25": 2091169280.0,
            "26": 2091169280.0,
            "27": 2091169280.0,
            "28": 2091169280.0,
            "29": 2091169280.0,
            "30": 2091169280.0,
            "31": 2091169280.0,
            "32": 2091169280.0,
            "33": 2091169280.0,
            "34": 2091169280.0,
            "35": 2091169280.0,
            "36": 2091169280.0,
            "37": 2091169280.0,
            "38": 2091169280.0,
            "39": 2091169280.0,
            "40": 2091169280.0,
            "41": 2091169280.0,
            "42": 2091169280.0,
            "43": 2091169280.0,
            "44": 2091169280.0,
            "45": 2091169280.0,
            "46": 2091169280.0,
            "47": 2091169280.0,
            "48": 2091169280.0,
            "49": 2091169280.0,
            "50": 2091169280.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 4421607424.0,
            "2": 5294192128.0,
            "3": 5294192128.0,
            "4": 5294192128.0,
            "5": 5294192128.0,
            "6": 5294192128.0,
            "7": 5294192128.0,
            "8": 5294192128.0,
            "9": 5294192128.0,
            "10": 5294192128.0,
            "11": 5294192128.0,
            "12": 5294192128.0,
            "13": 5294977024.0,
            "14": 5294977024.0,
            "15": 5294977024.0,
            "16": 5294977024.0,
            "17": 5294977024.0,
            "18": 5294977024.0,
            "19": 5294977024.0,
            "20": 5294977024.0,
            "21": 5294977024.0,
            "22": 5294977024.0,
            "23": 5294977024.0,
            "24": 5294977024.0,
            "25": 5294977024.0,
            "26": 5294977024.0,
            "27": 5294977024.0,
            "28": 5294977024.0,
            "29": 5294977024.0,
            "30": 5294977024.0,
            "31": 5294977024.0,
            "32": 5294977024.0,
            "33": 5294977024.0,
            "34": 5294977024.0,
            "35": 5294977024.0,
            "36": 5294977024.0,
            "37": 5294977024.0,
            "38": 5294977024.0,
            "39": 5294977024.0,
            "40": 5294977024.0,
            "41": 5294977024.0,
            "42": 5294977024.0,
            "43": 5294977024.0,
            "44": 5294977024.0,
            "45": 5294977024.0,
            "46": 5294977024.0,
            "47": 5294977024.0,
            "48": 5294977024.0,
            "49": 5294977024.0,
            "50": 5294977024.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 8.80363,
            "3": 0.56371,
            "4": 0.55956,
            "5": 0.57722,
            "6": 0.5559,
            "7": 0.55109,
            "8": 0.56338,
            "9": 0.55675,
            "10": 0.54615,
            "11": 0.56947,
            "12": 0.54313,
            "13": 0.53579,
            "14": 0.5348,
            "15": 0.55059,
            "16": 0.58449,
            "17": 1.72193,
            "18": 1.00286,
            "19": 1.00508,
            "20": 1.37568,
            "21": 1.0004,
            "22": 1.61847,
            "23": 1.37176,
            "24": 1.86248,
            "25": 1.3993,
            "26": 0.56478,
            "27": 0.54444,
            "28": 0.55376,
            "29": 0.5687,
            "30": 0.58734,
            "31": 0.57557,
            "32": 0.58734,
            "33": 0.58598,
            "34": 0.57746,
            "35": 0.57728,
            "36": 0.55998,
            "37": 0.54419,
            "38": 0.54307,
            "39": 0.54541,
            "40": 0.56237,
            "41": 0.55314,
            "42": 0.55352,
            "43": 0.57847,
            "44": 0.5425,
            "45": 0.83687,
            "46": 0.56484,
            "47": 1.43437,
            "48": 1.15295,
            "49": 1.09052,
            "50": 0.85934
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.4681, "5": 10.45367, "10": 10.45093, "15": 10.45833, "20": 10.42029, "25": 10.3405, "30": 10.18378, "35": 10.03886, "40": 9.89837, "45": 9.75107, "50": 9.67018}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2373.0, "5": 2811.0, "10": 2502.0, "15": 2556.0, "20": 2392.0, "25": 2764.0, "30": 2957.0, "35": 3046.0, "40": 2373.0, "45": 3854.0, "50": 3568.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2032164352.0, "5": 2032164352.0, "10": 2032164352.0, "15": 2032164352.0, "20": 2032164352.0, "25": 2032164352.0, "30": 2032164352.0, "35": 2032164352.0, "40": 2032164352.0, "45": 2032164352.0, "50": 2032164352.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4341384704.0, "5": 5201632768.0, "10": 5201632768.0, "15": 5201632768.0, "20": 5201632768.0, "25": 5201632768.0, "30": 5201632768.0, "35": 5201632768.0, "40": 5201632768.0, "45": 5201632768.0, "50": 5201632768.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 22.0069, "5": 0.82183, "10": 1.0945, "15": 0.82371, "20": 0.84695, "25": 1.04803, "30": 0.79308, "35": 0.77873, "40": 0.98672, "45": 0.84816, "50": 0.7713}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 24
  --hidden-size: 1024
  --num-attention-heads: 16
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 128
  --seq-length: 512
  --max-position-embeddings: 512
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 990000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.0001
  --min-lr: 0.00001
  --lr-warmup-fraction: 0.01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --num-layers-per-virtual-pipeline-stage: 2
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --ckpt-format: torch
  --attention-backend: unfused
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.49689, "5": 10.48187, "10": 10.50191, "15": 10.45897, "20": 10.44608, "25": 10.35095, "30": 10.16631, "35": 10.04387, "40": 9.90911, "45": 9.75816, "50": 9.67525}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2071.0, "5": 2593.0, "10": 2168.0, "15": 2415.0, "20": 2201.0, "25": 2577.0, "30": 2948.0, "35": 2983.0, "40": 2260.0, "45": 3953.0, "50": 3549.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1754654208.0, "5": 1754654208.0, "10": 1754654208.0, "15": 1754654208.0, "20": 1754654208.0, "25": 1754654208.0, "30": 1754654208.0, "35": 1754654208.0, "40": 1754654208.0, "45": 1754654208.0, "50": 1754654208.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2313432064.0, "5": 3055894528.0, "10": 3055894528.0, "15": 3055894528.0, "20": 3055894528.0, "25": 3055894528.0, "30": 3055894528.0, "35": 3055894528.0, "40": 3055894528.0, "45": 3055894528.0, "50": 3055894528.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 8.01702, "5": 1.63485, "10": 1.62917, "15": 1.63494, "20": 1.8032, "25": 1.83901, "30": 1.67596, "35": 1.66247, "40": 1.67062, "45": 1.66391, "50": 1.67494}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.52582,
            "2": 10.54385,
            "3": 10.53319,
            "4": 10.53921,
            "5": 10.53684,
            "6": 10.54381,
            "7": 10.54794,
            "8": 10.54122,
            "9": 10.53291,
            "10": 10.52971,
            "11": 10.5324,
            "12": 10.5384,
            "13": 10.5393,
            "14": 10.52549,
            "15": 10.52273,
            "16": 10.50512,
            "17": 10.51237,
            "18": 10.50762,
            "19": 10.50413,
            "20": 10.51125,
            "21": 10.51037,
            "22": 10.44522,
            "23": 10.43219,
            "24": 10.44115,
            "25": 10.40728,
            "26": 10.41482,
            "27": 10.39808,
            "28": 10.41186,
            "29": 10.36611,
            "30": 10.28309,
            "31": 10.2298,
            "32": 10.20571,
            "33": 10.21465,
            "34": 10.17029,
            "35": 10.14644,
            "36": 10.12745,
            "37": 10.11693,
            "38": 10.11809,
            "39": 10.07868,
            "40": 10.01082,
            "41": 9.96752,
            "42": 9.92742,
            "43": 9.91995,
            "44": 9.86345,
            "45": 9.83762,
            "46": 9.7791,
            "47": 9.77306,
            "48": 9.74906,
            "49": 9.77865,
            "50": 9.7532
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 2713.0,
            "2": 2105.0,
            "3": 1711.0,
            "4": 2467.0,
            "5": 2378.0,
            "6": 2049.0,
            "7": 2371.0,
            "8": 2546.0,
            "9": 2514.0,
            "10": 2558.0,
            "11": 2696.0,
            "12": 2058.0,
            "13": 2341.0,
            "14": 2864.0,
            "15": 2242.0,
            "16": 2780.0,
            "17": 2769.0,
            "18": 2705.0,
            "19": 2663.0,
            "20": 2799.0,
            "21": 2296.0,
            "22": 2828.0,
            "23": 2685.0,
            "24": 2823.0,
            "25": 2678.0,
            "26": 2723.0,
            "27": 2650.0,
            "28": 2920.0,
            "29": 2640.0,
            "30": 2945.0,
            "31": 2442.0,
            "32": 2849.0,
            "33": 2386.0,
            "34": 2613.0,
            "35": 2676.0,
            "36": 3018.0,
            "37": 3312.0,
            "38": 2732.0,
            "39": 2933.0,
            "40": 3341.0,
            "41": 1746.0,
            "42": 1496.0,
            "43": 1694.0,
            "44": 2861.0,
            "45": 3761.0,
            "46": 3341.0,
            "47": 3180.0,
            "48": 2692.0,
            "49": 2538.0,
            "50": 2128.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1796646400.0,
            "2": 1796646400.0,
            "3": 1796646400.0,
            "4": 1796646400.0,
            "5": 1796646400.0,
            "6": 1796646400.0,
            "7": 1796646400.0,
            "8": 1796646400.0,
            "9": 1796646400.0,
            "10": 1796646400.0,
            "11": 1796646400.0,
            "12": 1796646400.0,
            "13": 1796646400.0,
            "14": 1796646400.0,
            "15": 1796646400.0,
            "16": 1796646400.0,
            "17": 1796646400.0,
            "18": 1797694976.0,
            "19": 1796646400.0,
            "20": 1796646400.0,
            "21": 1796646400.0,
            "22": 1796646400.0,
            "23": 1796646400.0,
            "24": 1796646400.0,
            "25": 1796646400.0,
            "26": 1796646400.0,
            "27": 1796646400.0,
            "28": 1796646400.0,
            "29": 1796646400.0,
            "30": 1796646400.0,
            "31": 1796646400.0,
            "32": 1796646400.0,
            "33": 1796646400.0,
            "34": 1796646400.0,
            "35": 1796646400.0,
            "36": 1796646400.0,
            "37": 1796646400.0,
            "38": 1796646400.0,
            "39": 1796646400.0,
            "40": 1796646400.0,
            "41": 1796646400.0,
            "42": 1796646400.0,
            "43": 1796646400.0,
            "44": 1796646400.0,
            "45": 1796646400.0,
            "46": 1796646400.0,
            "47": 1796646400.0,
            "48": 1796646400.0,
            "49": 1796646400.0,
            "50": 1796646400.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 2376915456.0,
            "2": 3124626944.0,
            "3": 3124626944.0,
            "4": 3126723584.0,
            "5": 3126723584.0,
            "6": 3126723584.0,
            "7": 3126723584.0,
            "8": 3126723584.0,
            "9": 3126723584.0,
            "10": 3126723584.0,
            "11": 3126723584.0,
            "12": 3126723584.0,
            "13": 3126723584.0,
            "14": 3126723584.0,
            "15": 3126723584.0,
            "16": 3126723584.0,
            "17": 3126723584.0,
            "18": 3126724096.0,
            "19": 3126724096.0,
            "20": 3126724096.0,
            "21": 3126724096.0,
            "22": 3126724096.0,
            "23": 3126724096.0,
            "24": 3126724096.0,
            "25": 3126724096.0,
            "26": 3126724096.0,
            "27": 3126724096.0,
            "28": 3126724096.0,
            "29": 3126724096.0,
            "30": 3126724096.0,
            "31": 3128818688.0,
            "32": 3128818688.0,
            "33": 3128818688.0,
            "34": 3128818688.0,
            "35": 3128818688.0,
            "36": 3128818688.0,
            "37": 3128818688.0,
            "38": 3128818688.0,
            "39": 3128818688.0,
            "40": 3128818688.0,
            "41": 3128818688.0,
            "42": 3128818688.0,
            "43": 3128818688.0,
            "44": 3128818688.0,
            "45": 3128818688.0,
            "46": 3128818688.0,
            "47": 3128818688.0,
            "48": 3128818688.0,
            "49": 3128818688.0,
            "50": 3128818688.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 7.37428,
            "3": 0.92991,
            "4": 0.9078,
            "5": 0.90709,
            "6": 0.90742,
            "7": 0.90431,
            "8": 0.90471,
            "9": 0.90657,
            "10": 0.90486,
            "11": 0.90481,
            "12": 0.90401,
            "13": 0.90933,
            "14": 0.90583,
            "15": 0.90584,
            "16": 0.90548,
            "17": 0.90677,
            "18": 0.90774,
            "19": 1.62221,
            "20": 0.90756,
            "21": 0.90392,
            "22": 1.30376,
            "23": 1.19849,
            "24": 1.18039,
            "25": 1.24273,
            "26": 0.90511,
            "27": 0.90514,
            "28": 0.90501,
            "29": 0.90556,
            "30": 0.90459,
            "31": 0.90475,
            "32": 0.90442,
            "33": 0.90462,
            "34": 0.90382,
            "35": 0.90348,
            "36": 0.90487,
            "37": 0.90596,
            "38": 0.90271,
            "39": 0.90357,
            "40": 0.90215,
            "41": 0.9009,
            "42": 0.9012,
            "43": 0.90294,
            "44": 0.90111,
            "45": 0.90371,
            "46": 0.9033,
            "47": 1.10365,
            "48": 0.90243,
            "49": 1.04847,
            "50": 0.90108
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.49574, "5": 10.48398, "10": 10.49943, "15": 10.4663, "20": 10.44775, "25": 10.34954, "30": 10.17283, "35": 10.0427, "40": 9.9076, "45": 9.7577, "50": 9.67688}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2182.0, "5": 2568.0, "10": 2108.0, "15": 2533.0, "20": 2166.0, "25": 2639.0, "30": 2769.0, "35": 3080.0, "40": 2282.0, "45": 3831.0, "50": 3519.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1754654208.0, "5": 1754654208.0, "10": 1754654208.0, "15": 1754654208.0, "20": 1754654208.0, "25": 1754654208.0, "30": 1754654208.0, "35": 1754654208.0, "40": 1754654208.0, "45": 1754654208.0, "50": 1754654208.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2300849152.0, "5": 3043311616.0, "10": 3043311616.0, "15": 3043311616.0, "20": 3043311616.0, "25": 3043311616.0, "30": 3043311616.0, "35": 3043311616.0, "40": 3043311616.0, "45": 3043311616.0, "50": 3043311616.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 11.92253, "5": 1.17517, "10": 1.16204, "15": 1.1534, "20": 1.15142, "25": 1.1777, "30": 1.14956, "35": 1.15257, "40": 1.14342, "45": 1.14293, "50": 1.14651}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 24
  --hidden-size: 1024
  --num-attention-heads: 16
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 128
  --seq-length: 512
  --max-position-embeddings: 512
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 990000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.0001
  --min-lr: 0.00001
  --lr-warmup-fraction: 0.01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --tensor-model-parallel-size: 2
  --pipeline-model-parallel-size: 2
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --ckpt-format: torch
  --attention-backend: unfused
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/golden_values_dev_dgx_a100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 5,
        "values": {
            "1": 10.49689,
            "5": 10.48165,
            "10": 10.50192,
            "15": 10.45891,
            "20": 10.44599,
            "25": 10.35067,
            "30": 10.16617,
            "35": 10.04377,
            "40": 9.90903,
            "45": 9.75804,
            "50": 9.67525,
            "55": 9.55381,
            "60": 9.45437,
            "65": 9.42265,
            "70": 9.30033,
            "75": 9.3248,
            "80": 9.26115,
            "85": 9.29647,
            "90": 9.23205,
            "95": 9.23789,
            "100": 9.106
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 5,
        "values": {
            "1": 2071.0,
            "5": 2603.0,
            "10": 2120.0,
            "15": 2502.0,
            "20": 2235.0,
            "25": 2509.0,
            "30": 2938.0,
            "35": 2948.0,
            "40": 2197.0,
            "45": 3921.0,
            "50": 3479.0,
            "55": 3577.0,
            "60": 2699.0,
            "65": 3580.0,
            "70": 3903.0,
            "75": 4779.0,
            "80": 3441.0,
            "85": 4133.0,
            "90": 4705.0,
            "95": 4363.0,
            "100": 3205.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 5,
        "values": {
            "1": 1754654208.0,
            "5": 1754654208.0,
            "10": 1754654208.0,
            "15": 1754654208.0,
            "20": 1754654208.0,
            "25": 1754654208.0,
            "30": 1754654208.0,
            "35": 1754654208.0,
            "40": 1754654208.0,
            "45": 1754654208.0,
            "50": 1754654208.0,
            "55": 1754654208.0,
            "60": 1754654208.0,
            "65": 1754654208.0,
            "70": 1754654208.0,
            "75": 1754654208.0,
            "80": 1754654208.0,
            "85": 1754654208.0,
            "90": 1754654208.0,
            "95": 1754654208.0,
            "100": 1754654208.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 5,
        "values": {
            "1": 2313432064.0,
            "5": 3055894528.0,
            "10": 3055894528.0,
            "15": 3055894528.0,
            "20": 3055894528.0,
            "25": 3055894528.0,
            "30": 3055894528.0,
            "35": 3055894528.0,
            "40": 3055894528.0,
            "45": 3055894528.0,
            "50": 3055894528.0,
            "55": 3055894528.0,
            "60": 3055894528.0,
            "65": 3055894528.0,
            "70": 3055894528.0,
            "75": 3055894528.0,
            "80": 3055894528.0,
            "85": 3055894528.0,
            "90": 3055894528.0,
            "95": 3055894528.0,
            "100": 3055894528.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 5,
        "values": {
            "1": 8.5415,
            "5": 1.31601,
            "10": 1.30283,
            "15": 1.30113,
            "20": 1.36717,
            "25": 1.30303,
            "30": 1.3046,
            "35": 1.30812,
            "40": 1.33609,
            "45": 1.29932,
            "50": 1.29774,
            "55": 1.3003,
            "60": 1.30422,
            "65": 1.29772,
            "70": 1.29885,
            "75": 1.30735,
            "80": 1.3284,
            "85": 1.30253,
            "90": 1.3315,
            "95": 1.30266,
            "100": 1.3038
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/golden_values_dev_dgx_h100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.48367, "5": 10.4764, "10": 10.47268, "15": 10.47925, "20": 10.45448, "25": 10.38182, "30": 10.21159, "35": 10.10576, "40": 9.98131, "45": 9.82324, "50": 9.72977, "55": 9.61012, "60": 9.51845, "65": 9.4581, "70": 9.37599, "75": 9.37873, "80": 9.31495, "85": 9.35008, "90": 9.2849, "95": 9.27998, "100": 9.14808}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2570.0, "5": 2033.0, "10": 2635.0, "15": 2027.0, "20": 2850.0, "25": 2514.0, "30": 2858.0, "35": 2396.0, "40": 3386.0, "45": 3575.0, "50": 2149.0, "55": 3552.0, "60": 2461.0, "65": 3090.0, "70": 4409.0, "75": 4761.0, "80": 3795.0, "85": 4392.0, "90": 4389.0, "95": 4668.0, "100": 3371.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1784014336.0, "5": 1784014336.0, "10": 1784014336.0, "15": 1784014336.0, "20": 1784014336.0, "25": 1784014336.0, "30": 1784014336.0, "35": 1784014336.0, "40": 1784014336.0, "45": 1784014336.0, "50": 1784014336.0, "55": 1784014336.0, "60": 1784014336.0, "65": 1784014336.0, "70": 1784014336.0, "75": 1784014336.0, "80": 1784014336.0, "85": 1784014336.0, "90": 1784014336.0, "95": 1784014336.0, "100": 1784014336.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2365860864.0, "5": 3108323328.0, "10": 3108323328.0, "15": 3108323328.0, "20": 3108323328.0, "25": 3108323328.0, "30": 3108323328.0, "35": 3108323328.0, "40": 3108323328.0, "45": 3108323328.0, "50": 3108323328.0, "55": 3108323328.0, "60": 3108323328.0, "65": 3108323328.0, "70": 3108323328.0, "75": 3108323328.0, "80": 3108323328.0, "85": 3108323328.0, "90": 3108323328.0, "95": 3108323328.0, "100": 3108323328.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.952, "5": 0.9152, "10": 0.90016, "15": 1.02052, "20": 0.83245, "25": 0.81421, "30": 0.82286, "35": 0.81792, "40": 0.87247, "45": 0.83604, "50": 0.8256, "55": 0.8313, "60": 1.12795, "65": 0.82458, "70": 0.83513, "75": 0.82314, "80": 0.96015, "85": 0.89432, "90": 0.8203, "95": 0.82739, "100": 0.88667}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.49574, "5": 10.48398, "10": 10.49943, "15": 10.4663, "20": 10.44775, "25": 10.34964, "30": 10.1728, "35": 10.04262, "40": 9.90767, "45": 9.75792, "50": 9.67684, "55": 9.55378, "60": 9.45458, "65": 9.42133, "70": 9.30109, "75": 9.32203, "80": 9.26184, "85": 9.29667, "90": 9.23332, "95": 9.23793, "100": 9.10611}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2182.0, "5": 2568.0, "10": 2108.0, "15": 2533.0, "20": 2151.0, "25": 2601.0, "30": 2801.0, "35": 3107.0, "40": 2294.0, "45": 3909.0, "50": 3482.0, "55": 3606.0, "60": 2653.0, "65": 3341.0, "70": 3849.0, "75": 5090.0, "80": 3613.0, "85": 4194.0, "90": 4618.0, "95": 4439.0, "100": 3224.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1754654208.0, "5": 1754654208.0, "10": 1754654208.0, "15": 1754654208.0, "20": 1754654208.0, "25": 1754654208.0, "30": 1754654208.0, "35": 1754654208.0, "40": 1754654208.0, "45": 1754654208.0, "50": 1754654208.0, "55": 1754654208.0, "60": 1754654208.0, "65": 1754654208.0, "70": 1754654208.0, "75": 1754654208.0, "80": 1754654208.0, "85": 1754654208.0, "90": 1754654208.0, "95": 1754654208.0, "100": 1754654208.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 12.95742, "5": 1.16734, "10": 2.45473, "15": 1.45839, "20": 1.51474, "25": 1.15989, "30": 1.14801, "35": 1.14584, "40": 1.15517, "45": 1.14468, "50": 1.14969, "55": 1.15684, "60": 1.14892, "65": 1.14737, "70": 1.30233, "75": 1.37176, "80": 1.1466, "85": 1.24468, "90": 1.15157, "95": 1.15026, "100": 1.15254}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_frozen_resume_torch_dist/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 24
  --hidden-size: 1024
  --num-attention-heads: 16
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 128
  --seq-length: 512
  --max-position-embeddings: 512
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 990000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.0001
  --min-lr: 0.00001
  --lr-warmup-fraction: 0.01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --tensor-model-parallel-size: 2
  --pipeline-model-parallel-size: 2
  --deterministic-mode: true
  --use-checkpoint-args: true
  --use-checkpoint-opt_param-scheduler: true
  --no-gradient-accumulation-fusion: true
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --ckpt-format: torch
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --attention-backend: unfused
TEST_TYPE: frozen-resume


================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.49711, "5": 10.48154, "10": 10.50197, "15": 10.45909, "20": 10.44614, "25": 10.35085, "30": 10.16654, "35": 10.04394, "40": 9.9092, "45": 9.75814, "50": 9.67518}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2095.0, "5": 2660.0, "10": 2235.0, "15": 2491.0, "20": 2216.0, "25": 2531.0, "30": 2718.0, "35": 2945.0, "40": 2310.0, "45": 3831.0, "50": 3502.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1767237120.0, "5": 1767237120.0, "10": 1767237120.0, "15": 1767237120.0, "20": 1767237120.0, "25": 1767237120.0, "30": 1767237120.0, "35": 1767237120.0, "40": 1767237120.0, "45": 1767237120.0, "50": 1767237120.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2313432064.0, "5": 3055894528.0, "10": 3055894528.0, "15": 3055894528.0, "20": 3055894528.0, "25": 3055894528.0, "30": 3055894528.0, "35": 3055894528.0, "40": 3055894528.0, "45": 3055894528.0, "50": 3055894528.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 8.74232, "5": 1.24548, "10": 1.13693, "15": 1.14447, "20": 1.37773, "25": 1.13543, "30": 1.13581, "35": 1.13373, "40": 1.13802, "45": 1.1302, "50": 1.13618}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.52551,
            "2": 10.54472,
            "3": 10.53112,
            "4": 10.53996,
            "5": 10.5371,
            "6": 10.54388,
            "7": 10.54852,
            "8": 10.5417,
            "9": 10.5317,
            "10": 10.52961,
            "11": 10.52976,
            "12": 10.53761,
            "13": 10.53871,
            "14": 10.52447,
            "15": 10.52205,
            "16": 10.50381,
            "17": 10.51177,
            "18": 10.50815,
            "19": 10.50379,
            "20": 10.51035,
            "21": 10.5095,
            "22": 10.44506,
            "23": 10.43405,
            "24": 10.44154,
            "25": 10.40759,
            "26": 10.41686,
            "27": 10.39998,
            "28": 10.41047,
            "29": 10.36664,
            "30": 10.28289,
            "31": 10.23013,
            "32": 10.20411,
            "33": 10.21551,
            "34": 10.17208,
            "35": 10.14779,
            "36": 10.12817,
            "37": 10.11674,
            "38": 10.11981,
            "39": 10.08045,
            "40": 10.01189,
            "41": 9.96762,
            "42": 9.92708,
            "43": 9.92037,
            "44": 9.8642,
            "45": 9.83846,
            "46": 9.7798,
            "47": 9.77341,
            "48": 9.74902,
            "49": 9.77849,
            "50": 9.75354
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 2667.0,
            "2": 2111.0,
            "3": 1710.0,
            "4": 2527.0,
            "5": 2170.0,
            "6": 2007.0,
            "7": 2321.0,
            "8": 2548.0,
            "9": 2544.0,
            "10": 2545.0,
            "11": 2696.0,
            "12": 1990.0,
            "13": 2361.0,
            "14": 2930.0,
            "15": 2273.0,
            "16": 2839.0,
            "17": 2683.0,
            "18": 2587.0,
            "19": 2795.0,
            "20": 2732.0,
            "21": 2188.0,
            "22": 2794.0,
            "23": 2638.0,
            "24": 2839.0,
            "25": 2636.0,
            "26": 2794.0,
            "27": 2727.0,
            "28": 2852.0,
            "29": 2479.0,
            "30": 2902.0,
            "31": 2482.0,
            "32": 2871.0,
            "33": 2457.0,
            "34": 2512.0,
            "35": 2744.0,
            "36": 2997.0,
            "37": 3284.0,
            "38": 2786.0,
            "39": 2916.0,
            "40": 3534.0,
            "41": 1715.0,
            "42": 1502.0,
            "43": 1776.0,
            "44": 2874.0,
            "45": 3603.0,
            "46": 3510.0,
            "47": 3195.0,
            "48": 2695.0,
            "49": 2483.0,
            "50": 2041.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1779868160.0,
            "2": 1779868160.0,
            "3": 1779868160.0,
            "4": 1779868160.0,
            "5": 1779868160.0,
            "6": 1779868160.0,
            "7": 1779868160.0,
            "8": 1779868160.0,
            "9": 1779868160.0,
            "10": 1779868160.0,
            "11": 1779868160.0,
            "12": 1779868160.0,
            "13": 1779868160.0,
            "14": 1779868160.0,
            "15": 1779868160.0,
            "16": 1779868160.0,
            "17": 1779868160.0,
            "18": 1779868160.0,
            "19": 1779868160.0,
            "20": 1779868160.0,
            "21": 1779868160.0,
            "22": 1779868160.0,
            "23": 1779868160.0,
            "24": 1779868160.0,
            "25": 1779868160.0,
            "26": 1779868160.0,
            "27": 1779868160.0,
            "28": 1779868160.0,
            "29": 1779868160.0,
            "30": 1779868160.0,
            "31": 1779868160.0,
            "32": 1779868160.0,
            "33": 1779868160.0,
            "34": 1779868160.0,
            "35": 1779868160.0,
            "36": 1779868160.0,
            "37": 1779868160.0,
            "38": 1779868160.0,
            "39": 1779868160.0,
            "40": 1779868160.0,
            "41": 1779868160.0,
            "42": 1779868160.0,
            "43": 1779868160.0,
            "44": 1779868160.0,
            "45": 1779868160.0,
            "46": 1779868160.0,
            "47": 1779868160.0,
            "48": 1779868160.0,
            "49": 1779868160.0,
            "50": 1779868160.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 2347554304.0,
            "2": 3095265792.0,
            "3": 3097362944.0,
            "4": 3097362944.0,
            "5": 3097362944.0,
            "6": 3097362944.0,
            "7": 3097362944.0,
            "8": 3097362944.0,
            "9": 3097362944.0,
            "10": 3097362944.0,
            "11": 3097362944.0,
            "12": 3097362944.0,
            "13": 3097362944.0,
            "14": 3097362944.0,
            "15": 3097362944.0,
            "16": 3097362944.0,
            "17": 3097362944.0,
            "18": 3097362944.0,
            "19": 3097362944.0,
            "20": 3097362944.0,
            "21": 3097362944.0,
            "22": 3097362944.0,
            "23": 3097362944.0,
            "24": 3097362944.0,
            "25": 3097362944.0,
            "26": 3097362944.0,
            "27": 3097362944.0,
            "28": 3097362944.0,
            "29": 3097362944.0,
            "30": 3097362944.0,
            "31": 3097362944.0,
            "32": 3097362944.0,
            "33": 3097362944.0,
            "34": 3097362944.0,
            "35": 3097362944.0,
            "36": 3097362944.0,
            "37": 3097362944.0,
            "38": 3097362944.0,
            "39": 3097362944.0,
            "40": 3097362944.0,
            "41": 3097362944.0,
            "42": 3097362944.0,
            "43": 3097362944.0,
            "44": 3097362944.0,
            "45": 3097362944.0,
            "46": 3097362944.0,
            "47": 3097362944.0,
            "48": 3097362944.0,
            "49": 3097362944.0,
            "50": 3097362944.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 6.8133,
            "3": 0.78889,
            "4": 0.76348,
            "5": 0.76062,
            "6": 0.7603,
            "7": 0.75938,
            "8": 0.75976,
            "9": 0.75832,
            "10": 0.7602,
            "11": 0.75916,
            "12": 0.75773,
            "13": 0.76091,
            "14": 0.75933,
            "15": 0.76144,
            "16": 0.75858,
            "17": 0.76001,
            "18": 0.75869,
            "19": 0.75878,
            "20": 0.75808,
            "21": 1.13983,
            "22": 1.05768,
            "23": 1.04222,
            "24": 1.20576,
            "25": 1.08454,
            "26": 1.32941,
            "27": 1.04313,
            "28": 0.75803,
            "29": 0.75938,
            "30": 0.75902,
            "31": 0.75689,
            "32": 0.75871,
            "33": 0.75866,
            "34": 0.75745,
            "35": 0.76139,
            "36": 0.75892,
            "37": 0.79114,
            "38": 0.76162,
            "39": 0.7674,
            "40": 0.75973,
            "41": 0.77146,
            "42": 0.76048,
            "43": 0.75813,
            "44": 0.76005,
            "45": 0.75842,
            "46": 0.76133,
            "47": 0.75955,
            "48": 0.75994,
            "49": 1.01962,
            "50": 0.76007
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.49566, "5": 10.48418, "10": 10.49947, "15": 10.46646, "20": 10.44788, "25": 10.34978, "30": 10.17275, "35": 10.04282, "40": 9.90773, "45": 9.75781, "50": 9.67689}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2183.0, "5": 2533.0, "10": 2162.0, "15": 2548.0, "20": 2191.0, "25": 2557.0, "30": 2864.0, "35": 2979.0, "40": 2332.0, "45": 3931.0, "50": 3611.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1767237120.0, "5": 1767237120.0, "10": 1767237120.0, "15": 1767237120.0, "20": 1767237120.0, "25": 1767237120.0, "30": 1767237120.0, "35": 1767237120.0, "40": 1767237120.0, "45": 1767237120.0, "50": 1767237120.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2313432064.0, "5": 3055894528.0, "10": 3055894528.0, "15": 3055894528.0, "20": 3055894528.0, "25": 3055894528.0, "30": 3055894528.0, "35": 3055894528.0, "40": 3055894528.0, "45": 3055894528.0, "50": 3055894528.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 12.99544, "5": 1.22265, "10": 1.24105, "15": 1.21376, "20": 1.20754, "25": 1.21517, "30": 1.19626, "35": 1.22975, "40": 1.1839, "45": 1.17092, "50": 1.17649}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 24
  --hidden-size: 1024
  --num-attention-heads: 16
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 128
  --seq-length: 512
  --max-position-embeddings: 512
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 990000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.0001
  --min-lr: 0.00001
  --lr-warmup-fraction: 0.01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --tensor-model-parallel-size: 2
  --pipeline-model-parallel-size: 2
  --spec: local
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --ckpt-format: torch
  --attention-backend: local
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.49689, "5": 10.48187, "10": 10.50191, "15": 10.45897, "20": 10.44608, "25": 10.35095, "30": 10.16631, "35": 10.04387, "40": 9.90911, "45": 9.75816, "50": 9.67525, "55": 9.55379, "60": 9.45432, "65": 9.42258, "70": 9.30031, "75": 9.32482, "80": 9.26124, "85": 9.29638, "90": 9.23211, "95": 9.23802, "100": 9.106}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2071.0, "5": 2593.0, "10": 2168.0, "15": 2415.0, "20": 2201.0, "25": 2577.0, "30": 2948.0, "35": 2983.0, "40": 2260.0, "45": 3953.0, "50": 3549.0, "55": 3586.0, "60": 2638.0, "65": 3507.0, "70": 3826.0, "75": 5012.0, "80": 3497.0, "85": 4326.0, "90": 4683.0, "95": 4357.0, "100": 3233.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1754654208.0, "5": 1754654208.0, "10": 1754654208.0, "15": 1754654208.0, "20": 1754654208.0, "25": 1754654208.0, "30": 1754654208.0, "35": 1754654208.0, "40": 1754654208.0, "45": 1754654208.0, "50": 1754654208.0, "55": 1754654208.0, "60": 1754654208.0, "65": 1754654208.0, "70": 1754654208.0, "75": 1754654208.0, "80": 1754654208.0, "85": 1754654208.0, "90": 1754654208.0, "95": 1754654208.0, "100": 1754654208.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2313432064.0, "5": 3055894528.0, "10": 3055894528.0, "15": 3055894528.0, "20": 3055894528.0, "25": 3055894528.0, "30": 3055894528.0, "35": 3055894528.0, "40": 3055894528.0, "45": 3055894528.0, "50": 3055894528.0, "55": 3055894528.0, "60": 3055894528.0, "65": 3055894528.0, "70": 3055894528.0, "75": 3055894528.0, "80": 3055894528.0, "85": 3055894528.0, "90": 3055894528.0, "95": 3055894528.0, "100": 3055894528.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 11.38046, "5": 1.68311, "10": 1.68974, "15": 1.6798, "20": 1.68022, "25": 1.71075, "30": 1.67873, "35": 1.68199, "40": 1.68158, "45": 1.68705, "50": 1.68278, "55": 1.68023, "60": 1.67589, "65": 1.68002, "70": 1.67968, "75": 1.68272, "80": 1.70105, "85": 1.68925, "90": 1.70082, "95": 1.68015, "100": 1.68441}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.5255,
            "2": 10.54543,
            "3": 10.53296,
            "4": 10.54172,
            "5": 10.53695,
            "6": 10.54202,
            "7": 10.54756,
            "8": 10.54057,
            "9": 10.5332,
            "10": 10.52931,
            "11": 10.53027,
            "12": 10.53716,
            "13": 10.53856,
            "14": 10.52431,
            "15": 10.52246,
            "16": 10.50461,
            "17": 10.51158,
            "18": 10.50818,
            "19": 10.50562,
            "20": 10.51022,
            "21": 10.5106,
            "22": 10.44632,
            "23": 10.43227,
            "24": 10.44226,
            "25": 10.40713,
            "26": 10.41644,
            "27": 10.39817,
            "28": 10.41053,
            "29": 10.36603,
            "30": 10.28268,
            "31": 10.23034,
            "32": 10.20526,
            "33": 10.21539,
            "34": 10.17031,
            "35": 10.14776,
            "36": 10.12828,
            "37": 10.1162,
            "38": 10.11801,
            "39": 10.07925,
            "40": 10.01108,
            "41": 9.96785,
            "42": 9.92765,
            "43": 9.92013,
            "44": 9.86313,
            "45": 9.8384,
            "46": 9.77939,
            "47": 9.77261,
            "48": 9.74972,
            "49": 9.77876,
            "50": 9.75325,
            "51": 9.75982,
            "52": 9.70938,
            "53": 9.67157,
            "54": 9.69254,
            "55": 9.68023,
            "56": 9.6724,
            "57": 9.60149,
            "58": 9.61869,
            "59": 9.54613,
            "60": 9.61183,
            "61": 9.5485,
            "62": 9.53684,
            "63": 9.5229,
            "64": 9.51064,
            "65": 9.52236,
            "66": 9.49106,
            "67": 9.458,
            "68": 9.44145,
            "69": 9.44261,
            "70": 9.44,
            "71": 9.47046,
            "72": 9.45683,
            "73": 9.40592,
            "74": 9.45691,
            "75": 9.40451,
            "76": 9.37381,
            "77": 9.34376,
            "78": 9.37822,
            "79": 9.41047,
            "80": 9.34538,
            "81": 9.33084,
            "82": 9.34567,
            "83": 9.31601,
            "84": 9.29953,
            "85": 9.33611,
            "86": 9.26941,
            "87": 9.3142,
            "88": 9.29905,
            "89": 9.26883,
            "90": 9.34101,
            "91": 9.25606,
            "92": 9.29678,
            "93": 9.2997,
            "94": 9.27544,
            "95": 9.28063,
            "96": 9.1827,
            "97": 9.26469,
            "98": 9.19697,
            "99": 9.21958,
            "100": 9.22941
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2627.0,
            "2": 2077.0,
            "3": 1787.0,
            "4": 2543.0,
            "5": 2299.0,
            "6": 2005.0,
            "7": 2340.0,
            "8": 2609.0,
            "9": 2445.0,
            "10": 2549.0,
            "11": 2729.0,
            "12": 1973.0,
            "13": 2344.0,
            "14": 2815.0,
            "15": 2138.0,
            "16": 2695.0,
            "17": 2734.0,
            "18": 2618.0,
            "19": 2684.0,
            "20": 2774.0,
            "21": 2232.0,
            "22": 2741.0,
            "23": 2766.0,
            "24": 2765.0,
            "25": 2603.0,
            "26": 2752.0,
            "27": 2710.0,
            "28": 2850.0,
            "29": 2548.0,
            "30": 2860.0,
            "31": 2455.0,
            "32": 2905.0,
            "33": 2370.0,
            "34": 2633.0,
            "35": 2616.0,
            "36": 2951.0,
            "37": 3244.0,
            "38": 2689.0,
            "39": 2961.0,
            "40": 3435.0,
            "41": 1681.0,
            "42": 1506.0,
            "43": 1708.0,
            "44": 2844.0,
            "45": 3578.0,
            "46": 3385.0,
            "47": 3166.0,
            "48": 2665.0,
            "49": 2581.0,
            "50": 2070.0,
            "51": 1804.0,
            "52": 2468.0,
            "53": 3818.0,
            "54": 3687.0,
            "55": 3358.0,
            "56": 4355.0,
            "57": 3881.0,
            "58": 4252.0,
            "59": 1801.0,
            "60": 2582.0,
            "61": 2222.0,
            "62": 4043.0,
            "63": 4021.0,
            "64": 4455.0,
            "65": 3146.0,
            "66": 1989.0,
            "67": 2145.0,
            "68": 4101.0,
            "69": 4371.0,
            "70": 4063.0,
            "71": 2082.0,
            "72": 4105.0,
            "73": 3483.0,
            "74": 2475.0,
            "75": 5400.0,
            "76": 2650.0,
            "77": 4081.0,
            "78": 4434.0,
            "79": 2230.0,
            "80": 3435.0,
            "81": 3996.0,
            "82": 3714.0,
            "83": 4848.0,
            "84": 5477.0,
            "85": 4379.0,
            "86": 3982.0,
            "87": 3568.0,
            "88": 4496.0,
            "89": 3904.0,
            "90": 4523.0,
            "91": 4699.0,
            "92": 3994.0,
            "93": 3782.0,
            "94": 3063.0,
            "95": 4124.0,
            "96": 3709.0,
            "97": 3391.0,
            "98": 4634.0,
            "99": 3766.0,
            "100": 3345.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1796646400.0,
            "2": 1796646400.0,
            "3": 1796646400.0,
            "4": 1796646400.0,
            "5": 1796646400.0,
            "6": 1796646400.0,
            "7": 1796646400.0,
            "8": 1796646400.0,
            "9": 1796646400.0,
            "10": 1796646400.0,
            "11": 1796646400.0,
            "12": 1796646400.0,
            "13": 1796646400.0,
            "14": 1796646400.0,
            "15": 1796646400.0,
            "16": 1796646400.0,
            "17": 1796646400.0,
            "18": 1797694976.0,
            "19": 1796646400.0,
            "20": 1796646400.0,
            "21": 1796646400.0,
            "22": 1796646400.0,
            "23": 1796646400.0,
            "24": 1796646400.0,
            "25": 1796646400.0,
            "26": 1796646400.0,
            "27": 1796646400.0,
            "28": 1796646400.0,
            "29": 1796646400.0,
            "30": 1796646400.0,
            "31": 1796646400.0,
            "32": 1796646400.0,
            "33": 1796646400.0,
            "34": 1796646400.0,
            "35": 1796646400.0,
            "36": 1796646400.0,
            "37": 1796646400.0,
            "38": 1796646400.0,
            "39": 1796646400.0,
            "40": 1796646400.0,
            "41": 1796646400.0,
            "42": 1796646400.0,
            "43": 1796646400.0,
            "44": 1796646400.0,
            "45": 1796646400.0,
            "46": 1796646400.0,
            "47": 1796646400.0,
            "48": 1796646400.0,
            "49": 1796646400.0,
            "50": 1796646400.0,
            "51": 1796646400.0,
            "52": 1796646400.0,
            "53": 1796646400.0,
            "54": 1796646400.0,
            "55": 1796646400.0,
            "56": 1796646400.0,
            "57": 1796646400.0,
            "58": 1796646400.0,
            "59": 1796646400.0,
            "60": 1796646400.0,
            "61": 1796646400.0,
            "62": 1796646400.0,
            "63": 1796646400.0,
            "64": 1796646400.0,
            "65": 1796646400.0,
            "66": 1796646400.0,
            "67": 1796646400.0,
            "68": 1796646400.0,
            "69": 1796646400.0,
            "70": 1796646400.0,
            "71": 1796646400.0,
            "72": 1796646400.0,
            "73": 1796646400.0,
            "74": 1796646400.0,
            "75": 1796646400.0,
            "76": 1796646400.0,
            "77": 1796646400.0,
            "78": 1796646400.0,
            "79": 1796646400.0,
            "80": 1796646400.0,
            "81": 1796646400.0,
            "82": 1796646400.0,
            "83": 1796646400.0,
            "84": 1796646400.0,
            "85": 1796646400.0,
            "86": 1796646400.0,
            "87": 1796646400.0,
            "88": 1796646400.0,
            "89": 1796646400.0,
            "90": 1796646400.0,
            "91": 1796646400.0,
            "92": 1796646400.0,
            "93": 1796646400.0,
            "94": 1796646400.0,
            "95": 1796646400.0,
            "96": 1796646400.0,
            "97": 1796646400.0,
            "98": 1796646400.0,
            "99": 1796646400.0,
            "100": 1796646400.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2376915456.0,
            "2": 3126718464.0,
            "3": 3126723584.0,
            "4": 3126723584.0,
            "5": 3126723584.0,
            "6": 3126723584.0,
            "7": 3126723584.0,
            "8": 3126723584.0,
            "9": 3126723584.0,
            "10": 3126723584.0,
            "11": 3126723584.0,
            "12": 3126723584.0,
            "13": 3126724096.0,
            "14": 3126724096.0,
            "15": 3126724096.0,
            "16": 3126724096.0,
            "17": 3126724096.0,
            "18": 3126724096.0,
            "19": 3126724096.0,
            "20": 3126724096.0,
            "21": 3126724096.0,
            "22": 3126724096.0,
            "23": 3126724096.0,
            "24": 3126724096.0,
            "25": 3126724096.0,
            "26": 3126724096.0,
            "27": 3126724096.0,
            "28": 3126724096.0,
            "29": 3126724096.0,
            "30": 3126724096.0,
            "31": 3126724096.0,
            "32": 3126724096.0,
            "33": 3126724096.0,
            "34": 3126724096.0,
            "35": 3128817152.0,
            "36": 3128817152.0,
            "37": 3128817152.0,
            "38": 3128817152.0,
            "39": 3128817152.0,
            "40": 3128817152.0,
            "41": 3128817152.0,
            "42": 3128817152.0,
            "43": 3128817152.0,
            "44": 3128817152.0,
            "45": 3128817152.0,
            "46": 3128817152.0,
            "47": 3128817152.0,
            "48": 3128817152.0,
            "49": 3128817152.0,
            "50": 3128817152.0,
            "51": 3128817152.0,
            "52": 3128817152.0,
            "53": 3128817152.0,
            "54": 3128817152.0,
            "55": 3128817152.0,
            "56": 3128817152.0,
            "57": 3128817152.0,
            "58": 3128817152.0,
            "59": 3128817152.0,
            "60": 3128817152.0,
            "61": 3128817152.0,
            "62": 3128817152.0,
            "63": 3128817152.0,
            "64": 3128817152.0,
            "65": 3128817152.0,
            "66": 3128817152.0,
            "67": 3128817152.0,
            "68": 3128817152.0,
            "69": 3128817152.0,
            "70": 3128817152.0,
            "71": 3128817152.0,
            "72": 3128817152.0,
            "73": 3128817152.0,
            "74": 3128817152.0,
            "75": 3128817152.0,
            "76": 3128817152.0,
            "77": 3128817152.0,
            "78": 3128817152.0,
            "79": 3128817152.0,
            "80": 3128817152.0,
            "81": 3128817152.0,
            "82": 3128817152.0,
            "83": 3128817152.0,
            "84": 3128817152.0,
            "85": 3128817152.0,
            "86": 3128817152.0,
            "87": 3128817152.0,
            "88": 3128817152.0,
            "89": 3128817152.0,
            "90": 3128817152.0,
            "91": 3128817152.0,
            "92": 3128817152.0,
            "93": 3128817152.0,
            "94": 3128817152.0,
            "95": 3128817152.0,
            "96": 3128817152.0,
            "97": 3128817152.0,
            "98": 3128817152.0,
            "99": 3128817152.0,
            "100": 3128817152.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 6.76631,
            "3": 0.95984,
            "4": 0.94975,
            "5": 0.93831,
            "6": 0.945,
            "7": 0.93711,
            "8": 0.94821,
            "9": 0.94811,
            "10": 0.90992,
            "11": 0.9045,
            "12": 0.95433,
            "13": 0.91015,
            "14": 0.9083,
            "15": 0.90314,
            "16": 0.90971,
            "17": 0.8884,
            "18": 0.88846,
            "19": 0.89125,
            "20": 0.88742,
            "21": 1.19785,
            "22": 1.20611,
            "23": 1.55135,
            "24": 1.48332,
            "25": 0.90669,
            "26": 0.90463,
            "27": 0.89318,
            "28": 0.89164,
            "29": 0.88961,
            "30": 0.90018,
            "31": 0.90462,
            "32": 0.90641,
            "33": 0.90556,
            "34": 0.98589,
            "35": 1.1539,
            "36": 0.88875,
            "37": 0.89282,
            "38": 0.89295,
            "39": 0.8889,
            "40": 0.88835,
            "41": 0.95698,
            "42": 0.89214,
            "43": 0.89368,
            "44": 0.89819,
            "45": 0.89685,
            "46": 0.89476,
            "47": 0.89421,
            "48": 0.89614,
            "49": 0.89461,
            "50": 0.89638,
            "51": 1.25608,
            "52": 1.33601,
            "53": 1.33532,
            "54": 1.10995,
            "55": 1.16395,
            "56": 0.89218,
            "57": 0.89161,
            "58": 0.89187,
            "59": 0.89198,
            "60": 0.88939,
            "61": 0.8924,
            "62": 0.89611,
            "63": 0.90486,
            "64": 0.89408,
            "65": 0.88917,
            "66": 0.89096,
            "67": 0.93479,
            "68": 0.88888,
            "69": 0.89018,
            "70": 1.29881,
            "71": 1.34537,
            "72": 0.89411,
            "73": 0.89067,
            "74": 1.35116,
            "75": 0.90202,
            "76": 0.92355,
            "77": 0.9139,
            "78": 0.90763,
            "79": 1.13089,
            "80": 0.91411,
            "81": 0.91094,
            "82": 1.55434,
            "83": 1.12496,
            "84": 1.09892,
            "85": 1.10172,
            "86": 0.89499,
            "87": 0.89706,
            "88": 0.89478,
            "89": 1.30503,
            "90": 0.89391,
            "91": 0.89533,
            "92": 0.89547,
            "93": 0.89634,
            "94": 0.89647,
            "95": 0.89626,
            "96": 0.8957,
            "97": 1.35845,
            "98": 0.89865,
            "99": 0.89531,
            "100": 0.89447
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.76001,
            "52": 9.70981,
            "53": 9.67192,
            "54": 9.69327,
            "55": 9.67994,
            "56": 9.67301,
            "57": 9.60209,
            "58": 9.61874,
            "59": 9.54659,
            "60": 9.61259,
            "61": 9.54836,
            "62": 9.53716,
            "63": 9.52333,
            "64": 9.51044,
            "65": 9.52202,
            "66": 9.4896,
            "67": 9.4575,
            "68": 9.44091,
            "69": 9.44314,
            "70": 9.43974,
            "71": 9.4698,
            "72": 9.45651,
            "73": 9.40468,
            "74": 9.45623,
            "75": 9.40499,
            "76": 9.37331,
            "77": 9.34347,
            "78": 9.37859,
            "79": 9.41089,
            "80": 9.34502,
            "81": 9.33074,
            "82": 9.34623,
            "83": 9.31635,
            "84": 9.29926,
            "85": 9.33611,
            "86": 9.26905,
            "87": 9.31456,
            "88": 9.29844,
            "89": 9.26888,
            "90": 9.34061,
            "91": 9.25718,
            "92": 9.29635,
            "93": 9.29969,
            "94": 9.2754,
            "95": 9.28117,
            "96": 9.18254,
            "97": 9.26445,
            "98": 9.1966,
            "99": 9.21992,
            "100": 9.22929
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 1818.0,
            "52": 2620.0,
            "53": 3744.0,
            "54": 3604.0,
            "55": 3334.0,
            "56": 4304.0,
            "57": 4040.0,
            "58": 4220.0,
            "59": 1787.0,
            "60": 2678.0,
            "61": 2295.0,
            "62": 3929.0,
            "63": 3820.0,
            "64": 4560.0,
            "65": 3100.0,
            "66": 2039.0,
            "67": 2163.0,
            "68": 4135.0,
            "69": 4393.0,
            "70": 4091.0,
            "71": 2120.0,
            "72": 4062.0,
            "73": 3510.0,
            "74": 2614.0,
            "75": 5305.0,
            "76": 2601.0,
            "77": 4058.0,
            "78": 4315.0,
            "79": 2234.0,
            "80": 3448.0,
            "81": 4090.0,
            "82": 3752.0,
            "83": 4925.0,
            "84": 5349.0,
            "85": 4450.0,
            "86": 4011.0,
            "87": 3738.0,
            "88": 4415.0,
            "89": 3811.0,
            "90": 4620.0,
            "91": 4703.0,
            "92": 4036.0,
            "93": 3711.0,
            "94": 3059.0,
            "95": 4017.0,
            "96": 3793.0,
            "97": 3300.0,
            "98": 4562.0,
            "99": 3832.0,
            "100": 3458.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 1796646400.0,
            "52": 1796646400.0,
            "53": 1796646400.0,
            "54": 1796646400.0,
            "55": 1796646400.0,
            "56": 1796646400.0,
            "57": 1796646400.0,
            "58": 1796646400.0,
            "59": 1796646400.0,
            "60": 1796646400.0,
            "61": 1796646400.0,
            "62": 1796646400.0,
            "63": 1796646400.0,
            "64": 1796646400.0,
            "65": 1796646400.0,
            "66": 1796646400.0,
            "67": 1796646400.0,
            "68": 1796646400.0,
            "69": 1796646400.0,
            "70": 1796646400.0,
            "71": 1796646400.0,
            "72": 1796646400.0,
            "73": 1796646400.0,
            "74": 1796646400.0,
            "75": 1796646400.0,
            "76": 1796646400.0,
            "77": 1796646400.0,
            "78": 1796646400.0,
            "79": 1796646400.0,
            "80": 1796646400.0,
            "81": 1796646400.0,
            "82": 1796646400.0,
            "83": 1796646400.0,
            "84": 1796646400.0,
            "85": 1796646400.0,
            "86": 1796646400.0,
            "87": 1796646400.0,
            "88": 1796646400.0,
            "89": 1796646400.0,
            "90": 1796646400.0,
            "91": 1796646400.0,
            "92": 1796646400.0,
            "93": 1796646400.0,
            "94": 1796646400.0,
            "95": 1796646400.0,
            "96": 1796646400.0,
            "97": 1796646400.0,
            "98": 1796646400.0,
            "99": 1796646400.0,
            "100": 1796646400.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 3124624896.0,
            "52": 3124626944.0,
            "53": 3124626944.0,
            "54": 3124626944.0,
            "55": 3124626944.0,
            "56": 3124626944.0,
            "57": 3124626944.0,
            "58": 3124626944.0,
            "59": 3124626944.0,
            "60": 3124626944.0,
            "61": 3124626944.0,
            "62": 3124626944.0,
            "63": 3124626944.0,
            "64": 3124626944.0,
            "65": 3124626944.0,
            "66": 3124626944.0,
            "67": 3124626944.0,
            "68": 3124626944.0,
            "69": 3124626944.0,
            "70": 3124626944.0,
            "71": 3124626944.0,
            "72": 3124626944.0,
            "73": 3124626944.0,
            "74": 3124626944.0,
            "75": 3124626944.0,
            "76": 3124626944.0,
            "77": 3124626944.0,
            "78": 3124626944.0,
            "79": 3124626944.0,
            "80": 3124626944.0,
            "81": 3124626944.0,
            "82": 3124626944.0,
            "83": 3124626944.0,
            "84": 3124626944.0,
            "85": 3124626944.0,
            "86": 3124626944.0,
            "87": 3124626944.0,
            "88": 3124626944.0,
            "89": 3124626944.0,
            "90": 3124626944.0,
            "91": 3124626944.0,
            "92": 3124626944.0,
            "93": 3124626944.0,
            "94": 3124626944.0,
            "95": 3126723584.0,
            "96": 3126723584.0,
            "97": 3126723584.0,
            "98": 3126723584.0,
            "99": 3126723584.0,
            "100": 3126723584.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": "nan",
            "52": 6.03911,
            "53": 0.97416,
            "54": 0.94779,
            "55": 0.9496,
            "56": 0.94003,
            "57": 0.95104,
            "58": 0.9583,
            "59": 0.94134,
            "60": 0.94902,
            "61": 0.94593,
            "62": 0.93544,
            "63": 0.94434,
            "64": 0.94553,
            "65": 0.92679,
            "66": 0.90378,
            "67": 0.90184,
            "68": 0.91076,
            "69": 0.90954,
            "70": 0.91372,
            "71": 0.91281,
            "72": 0.91175,
            "73": 0.91046,
            "74": 0.91357,
            "75": 0.90873,
            "76": 0.91033,
            "77": 0.91092,
            "78": 1.19718,
            "79": 1.185,
            "80": 1.2732,
            "81": 0.90464,
            "82": 0.90482,
            "83": 0.90412,
            "84": 0.90648,
            "85": 0.9074,
            "86": 0.91479,
            "87": 0.91427,
            "88": 0.91177,
            "89": 0.91209,
            "90": 0.913,
            "91": 0.9133,
            "92": 0.98243,
            "93": 0.91047,
            "94": 0.91069,
            "95": 0.91618,
            "96": 0.91277,
            "97": 0.90968,
            "98": 0.91034,
            "99": 0.9131,
            "100": 0.91106
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.49574, "5": 10.48398, "10": 10.49943, "15": 10.4663, "20": 10.44775, "25": 10.34954, "30": 10.17283, "35": 10.0427, "40": 9.9076, "45": 9.7577, "50": 9.67688, "55": 9.55375, "60": 9.4546, "65": 9.42141, "70": 9.30109, "75": 9.32202, "80": 9.26199, "85": 9.29667, "90": 9.2334, "95": 9.23801, "100": 9.10601}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2182.0, "5": 2568.0, "10": 2108.0, "15": 2533.0, "20": 2166.0, "25": 2639.0, "30": 2769.0, "35": 3080.0, "40": 2282.0, "45": 3831.0, "50": 3519.0, "55": 3692.0, "60": 2614.0, "65": 3344.0, "70": 4018.0, "75": 4983.0, "80": 3679.0, "85": 4082.0, "90": 4634.0, "95": 4487.0, "100": 3079.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1754654208.0, "5": 1754654208.0, "10": 1754654208.0, "15": 1754654208.0, "20": 1754654208.0, "25": 1754654208.0, "30": 1754654208.0, "35": 1754654208.0, "40": 1754654208.0, "45": 1754654208.0, "50": 1754654208.0, "55": 1754654208.0, "60": 1754654208.0, "65": 1754654208.0, "70": 1754654208.0, "75": 1755702784.0, "80": 1754654208.0, "85": 1754654208.0, "90": 1754654208.0, "95": 1754654208.0, "100": 1754654208.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2300849152.0, "5": 3043311616.0, "10": 3043311616.0, "15": 3043311616.0, "20": 3043311616.0, "25": 3043311616.0, "30": 3043311616.0, "35": 3043311616.0, "40": 3043311616.0, "45": 3043311616.0, "50": 3043311616.0, "55": 3043311616.0, "60": 3043311616.0, "65": 3043311616.0, "70": 3043311616.0, "75": 3043311616.0, "80": 3043311616.0, "85": 3043311616.0, "90": 3043311616.0, "95": 3043311616.0, "100": 3043311616.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 13.04253, "5": 1.18237, "10": 1.19035, "15": 1.15172, "20": 1.13338, "25": 1.24747, "30": 1.14325, "35": 1.14281, "40": 1.15392, "45": 1.16568, "50": 1.16303, "55": 1.18009, "60": 1.17624, "65": 1.17621, "70": 1.1845, "75": 1.19129, "80": 1.19627, "85": 1.18614, "90": 1.18685, "95": 1.20386, "100": 1.40621}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 24
  --hidden-size: 1024
  --num-attention-heads: 16
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 128
  --seq-length: 512
  --max-position-embeddings: 512
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 990000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.0001
  --min-lr: 0.00001
  --lr-warmup-fraction: 0.01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --tensor-model-parallel-size: 2
  --pipeline-model-parallel-size: 2
  --deterministic-mode: true
  --use-checkpoint-args: true
  --use-checkpoint-opt_param-scheduler: true
  --no-gradient-accumulation-fusion: true
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --ckpt-format: torch
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --attention-backend: unfused
TEST_TYPE: ckpt-resume


================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.49711, "5": 10.48154, "10": 10.50197, "15": 10.45909, "20": 10.44614, "25": 10.35085, "30": 10.16654, "35": 10.04394, "40": 9.9092, "45": 9.75814, "50": 9.67518, "55": 9.55395, "60": 9.45445, "65": 9.42273, "70": 9.30042, "75": 9.32469, "80": 9.26123, "85": 9.29645, "90": 9.23225, "95": 9.23791, "100": 9.10605}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2095.0, "5": 2660.0, "10": 2235.0, "15": 2491.0, "20": 2216.0, "25": 2531.0, "30": 2718.0, "35": 2945.0, "40": 2310.0, "45": 3831.0, "50": 3502.0, "55": 3624.0, "60": 2637.0, "65": 3554.0, "70": 4003.0, "75": 5020.0, "80": 3514.0, "85": 4269.0, "90": 4632.0, "95": 4445.0, "100": 3153.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1767237120.0, "5": 1767237120.0, "10": 1767237120.0, "15": 1767237120.0, "20": 1767237120.0, "25": 1767237120.0, "30": 1767237120.0, "35": 1767237120.0, "40": 1767237120.0, "45": 1767237120.0, "50": 1767237120.0, "55": 1767237120.0, "60": 1767237120.0, "65": 1768285696.0, "70": 1767237120.0, "75": 1767237120.0, "80": 1767237120.0, "85": 1767237120.0, "90": 1767237120.0, "95": 1767237120.0, "100": 1767237120.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2313432064.0, "5": 3055894528.0, "10": 3055894528.0, "15": 3055894528.0, "20": 3055894528.0, "25": 3055894528.0, "30": 3055894528.0, "35": 3055894528.0, "40": 3055894528.0, "45": 3055894528.0, "50": 3055894528.0, "55": 3055894528.0, "60": 3055894528.0, "65": 3055894528.0, "70": 3055894528.0, "75": 3055894528.0, "80": 3055894528.0, "85": 3055894528.0, "90": 3055894528.0, "95": 3055894528.0, "100": 3055894528.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 5.52043, "5": 1.15556, "10": 1.16775, "15": 1.15032, "20": 1.40753, "25": 1.14433, "30": 1.14733, "35": 1.14257, "40": 1.15327, "45": 1.13603, "50": 1.13335, "55": 1.1443, "60": 1.16534, "65": 1.14076, "70": 1.12574, "75": 1.12809, "80": 1.17823, "85": 1.60747, "90": 1.19608, "95": 1.15652, "100": 1.1273}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.52538,
            "2": 10.54405,
            "3": 10.53279,
            "4": 10.53976,
            "5": 10.53691,
            "6": 10.543,
            "7": 10.54588,
            "8": 10.5404,
            "9": 10.5336,
            "10": 10.52908,
            "11": 10.5317,
            "12": 10.53731,
            "13": 10.53878,
            "14": 10.52481,
            "15": 10.52268,
            "16": 10.50433,
            "17": 10.513,
            "18": 10.50762,
            "19": 10.50404,
            "20": 10.51124,
            "21": 10.50937,
            "22": 10.44494,
            "23": 10.43154,
            "24": 10.44097,
            "25": 10.40599,
            "26": 10.41479,
            "27": 10.40001,
            "28": 10.41072,
            "29": 10.36647,
            "30": 10.28229,
            "31": 10.22963,
            "32": 10.20468,
            "33": 10.21549,
            "34": 10.1706,
            "35": 10.14875,
            "36": 10.12819,
            "37": 10.11663,
            "38": 10.11882,
            "39": 10.08011,
            "40": 10.01123,
            "41": 9.96741,
            "42": 9.92753,
            "43": 9.9211,
            "44": 9.86398,
            "45": 9.8389,
            "46": 9.77921,
            "47": 9.77269,
            "48": 9.74952,
            "49": 9.77861,
            "50": 9.75389,
            "51": 9.76089,
            "52": 9.71033,
            "53": 9.67207,
            "54": 9.69277,
            "55": 9.68065,
            "56": 9.67265,
            "57": 9.60244,
            "58": 9.61924,
            "59": 9.54677,
            "60": 9.61248,
            "61": 9.54764,
            "62": 9.53711,
            "63": 9.52351,
            "64": 9.51103,
            "65": 9.52359,
            "66": 9.49117,
            "67": 9.45813,
            "68": 9.44041,
            "69": 9.44341,
            "70": 9.43985,
            "71": 9.46988,
            "72": 9.45668,
            "73": 9.40509,
            "74": 9.45601,
            "75": 9.40461,
            "76": 9.37348,
            "77": 9.34344,
            "78": 9.37804,
            "79": 9.41146,
            "80": 9.34475,
            "81": 9.3307,
            "82": 9.34695,
            "83": 9.31574,
            "84": 9.29963,
            "85": 9.3365,
            "86": 9.26935,
            "87": 9.31414,
            "88": 9.29826,
            "89": 9.26944,
            "90": 9.34131,
            "91": 9.25633,
            "92": 9.29676,
            "93": 9.29998,
            "94": 9.27507,
            "95": 9.28001,
            "96": 9.18223,
            "97": 9.26464,
            "98": 9.19677,
            "99": 9.21997,
            "100": 9.22913
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2706.0,
            "2": 2115.0,
            "3": 1731.0,
            "4": 2535.0,
            "5": 2330.0,
            "6": 1911.0,
            "7": 2378.0,
            "8": 2557.0,
            "9": 2479.0,
            "10": 2466.0,
            "11": 2681.0,
            "12": 1962.0,
            "13": 2320.0,
            "14": 2872.0,
            "15": 2205.0,
            "16": 2769.0,
            "17": 2689.0,
            "18": 2529.0,
            "19": 2708.0,
            "20": 2798.0,
            "21": 2344.0,
            "22": 2776.0,
            "23": 2692.0,
            "24": 2751.0,
            "25": 2705.0,
            "26": 2818.0,
            "27": 2692.0,
            "28": 2763.0,
            "29": 2618.0,
            "30": 2963.0,
            "31": 2474.0,
            "32": 2882.0,
            "33": 2355.0,
            "34": 2505.0,
            "35": 2616.0,
            "36": 2980.0,
            "37": 3228.0,
            "38": 2763.0,
            "39": 2986.0,
            "40": 3365.0,
            "41": 1718.0,
            "42": 1468.0,
            "43": 1770.0,
            "44": 2838.0,
            "45": 3671.0,
            "46": 3406.0,
            "47": 3197.0,
            "48": 2714.0,
            "49": 2486.0,
            "50": 2073.0,
            "51": 1798.0,
            "52": 2521.0,
            "53": 3896.0,
            "54": 3642.0,
            "55": 3344.0,
            "56": 4315.0,
            "57": 4027.0,
            "58": 4215.0,
            "59": 1770.0,
            "60": 2626.0,
            "61": 2300.0,
            "62": 3964.0,
            "63": 4018.0,
            "64": 4477.0,
            "65": 3066.0,
            "66": 2008.0,
            "67": 2066.0,
            "68": 4065.0,
            "69": 4448.0,
            "70": 4129.0,
            "71": 2066.0,
            "72": 4050.0,
            "73": 3466.0,
            "74": 2479.0,
            "75": 5317.0,
            "76": 2644.0,
            "77": 4044.0,
            "78": 4339.0,
            "79": 2270.0,
            "80": 3473.0,
            "81": 3809.0,
            "82": 3643.0,
            "83": 4759.0,
            "84": 5407.0,
            "85": 4356.0,
            "86": 4011.0,
            "87": 3620.0,
            "88": 4507.0,
            "89": 3831.0,
            "90": 4585.0,
            "91": 4698.0,
            "92": 3942.0,
            "93": 3759.0,
            "94": 3238.0,
            "95": 3976.0,
            "96": 3651.0,
            "97": 3461.0,
            "98": 4613.0,
            "99": 3821.0,
            "100": 3238.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1779868160.0,
            "2": 1779868160.0,
            "3": 1779868160.0,
            "4": 1779868160.0,
            "5": 1779868160.0,
            "6": 1779868160.0,
            "7": 1779868160.0,
            "8": 1779868160.0,
            "9": 1779868160.0,
            "10": 1779868160.0,
            "11": 1779868160.0,
            "12": 1779868160.0,
            "13": 1779868160.0,
            "14": 1779868160.0,
            "15": 1779868160.0,
            "16": 1779868160.0,
            "17": 1779868160.0,
            "18": 1779868160.0,
            "19": 1779868160.0,
            "20": 1779868160.0,
            "21": 1779868160.0,
            "22": 1779868160.0,
            "23": 1779868160.0,
            "24": 1779868160.0,
            "25": 1779868160.0,
            "26": 1779868160.0,
            "27": 1779868160.0,
            "28": 1779868160.0,
            "29": 1779868160.0,
            "30": 1779868160.0,
            "31": 1779868160.0,
            "32": 1779868160.0,
            "33": 1779868160.0,
            "34": 1779868160.0,
            "35": 1779868160.0,
            "36": 1779868160.0,
            "37": 1779868160.0,
            "38": 1779868160.0,
            "39": 1779868160.0,
            "40": 1779868160.0,
            "41": 1779868160.0,
            "42": 1779868160.0,
            "43": 1779868160.0,
            "44": 1779868160.0,
            "45": 1779868160.0,
            "46": 1779868160.0,
            "47": 1779868160.0,
            "48": 1779868160.0,
            "49": 1779868160.0,
            "50": 1779868160.0,
            "51": 1779868160.0,
            "52": 1779868160.0,
            "53": 1779868160.0,
            "54": 1779868160.0,
            "55": 1779868160.0,
            "56": 1779868160.0,
            "57": 1779868160.0,
            "58": 1779868160.0,
            "59": 1779868160.0,
            "60": 1779868160.0,
            "61": 1779868160.0,
            "62": 1779868160.0,
            "63": 1779868160.0,
            "64": 1779868160.0,
            "65": 1779868160.0,
            "66": 1779868160.0,
            "67": 1779868160.0,
            "68": 1779868160.0,
            "69": 1779868160.0,
            "70": 1779868160.0,
            "71": 1779868160.0,
            "72": 1779868160.0,
            "73": 1779868160.0,
            "74": 1779868160.0,
            "75": 1779868160.0,
            "76": 1779868160.0,
            "77": 1779868160.0,
            "78": 1779868160.0,
            "79": 1779868160.0,
            "80": 1779868160.0,
            "81": 1779868160.0,
            "82": 1779868160.0,
            "83": 1779868160.0,
            "84": 1779868160.0,
            "85": 1779868160.0,
            "86": 1779868160.0,
            "87": 1779868160.0,
            "88": 1779868160.0,
            "89": 1779868160.0,
            "90": 1779868160.0,
            "91": 1779868160.0,
            "92": 1779868160.0,
            "93": 1779868160.0,
            "94": 1779868160.0,
            "95": 1779868160.0,
            "96": 1779868160.0,
            "97": 1779868160.0,
            "98": 1779868160.0,
            "99": 1779868160.0,
            "100": 1779868160.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2347554304.0,
            "2": 3095265792.0,
            "3": 3095265792.0,
            "4": 3095265792.0,
            "5": 3097362432.0,
            "6": 3097362432.0,
            "7": 3097362432.0,
            "8": 3097362432.0,
            "9": 3097362432.0,
            "10": 3097362432.0,
            "11": 3097362432.0,
            "12": 3097362432.0,
            "13": 3097362432.0,
            "14": 3097362432.0,
            "15": 3097362432.0,
            "16": 3097362432.0,
            "17": 3097362432.0,
            "18": 3097362432.0,
            "19": 3097362432.0,
            "20": 3097362432.0,
            "21": 3097362432.0,
            "22": 3097362432.0,
            "23": 3097362432.0,
            "24": 3097362432.0,
            "25": 3097362432.0,
            "26": 3097362432.0,
            "27": 3097362432.0,
            "28": 3097362432.0,
            "29": 3097362432.0,
            "30": 3097362432.0,
            "31": 3097362432.0,
            "32": 3097362432.0,
            "33": 3097362432.0,
            "34": 3097362432.0,
            "35": 3097362432.0,
            "36": 3097362432.0,
            "37": 3097362432.0,
            "38": 3097362432.0,
            "39": 3097362432.0,
            "40": 3097362432.0,
            "41": 3097362432.0,
            "42": 3097362432.0,
            "43": 3097362432.0,
            "44": 3097362432.0,
            "45": 3097362432.0,
            "46": 3097362432.0,
            "47": 3097362432.0,
            "48": 3097362432.0,
            "49": 3097362432.0,
            "50": 3097362432.0,
            "51": 3097362432.0,
            "52": 3097362432.0,
            "53": 3097362432.0,
            "54": 3097362432.0,
            "55": 3097362432.0,
            "56": 3097362432.0,
            "57": 3097362432.0,
            "58": 3097362432.0,
            "59": 3097362432.0,
            "60": 3097362432.0,
            "61": 3097362432.0,
            "62": 3097362432.0,
            "63": 3097362432.0,
            "64": 3097362432.0,
            "65": 3097362432.0,
            "66": 3097362432.0,
            "67": 3097362432.0,
            "68": 3097362432.0,
            "69": 3097362432.0,
            "70": 3097362432.0,
            "71": 3097362432.0,
            "72": 3097362432.0,
            "73": 3097362432.0,
            "74": 3097362432.0,
            "75": 3097362432.0,
            "76": 3097362432.0,
            "77": 3097362432.0,
            "78": 3097362432.0,
            "79": 3097362432.0,
            "80": 3097362944.0,
            "81": 3097362944.0,
            "82": 3097362944.0,
            "83": 3097362944.0,
            "84": 3097362944.0,
            "85": 3097362944.0,
            "86": 3097362944.0,
            "87": 3097362944.0,
            "88": 3097362944.0,
            "89": 3097362944.0,
            "90": 3097362944.0,
            "91": 3097362944.0,
            "92": 3097362944.0,
            "93": 3097362944.0,
            "94": 3097362944.0,
            "95": 3097362944.0,
            "96": 3097362944.0,
            "97": 3097362944.0,
            "98": 3097362944.0,
            "99": 3097362944.0,
            "100": 3097362944.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 6.58997,
            "3": 0.78114,
            "4": 0.7669,
            "5": 0.76339,
            "6": 0.76181,
            "7": 0.76359,
            "8": 0.76853,
            "9": 0.76406,
            "10": 0.76522,
            "11": 0.76932,
            "12": 0.77188,
            "13": 1.03387,
            "14": 0.77087,
            "15": 0.76243,
            "16": 0.77006,
            "17": 0.78137,
            "18": 0.78189,
            "19": 0.78203,
            "20": 1.36049,
            "21": 0.77951,
            "22": 1.30811,
            "23": 1.06829,
            "24": 1.08426,
            "25": 0.77253,
            "26": 1.09007,
            "27": 0.76186,
            "28": 0.76255,
            "29": 0.76136,
            "30": 0.76279,
            "31": 0.7638,
            "32": 0.76071,
            "33": 0.77361,
            "34": 0.76883,
            "35": 0.76901,
            "36": 0.76148,
            "37": 0.76069,
            "38": 0.7629,
            "39": 0.76414,
            "40": 0.75813,
            "41": 0.75661,
            "42": 0.75626,
            "43": 0.75805,
            "44": 0.75655,
            "45": 0.757,
            "46": 0.76864,
            "47": 0.78359,
            "48": 0.95931,
            "49": 1.04485,
            "50": 1.23733,
            "51": 0.89192,
            "52": 1.03607,
            "53": 0.76537,
            "54": 1.19702,
            "55": 0.76311,
            "56": 0.75511,
            "57": 0.75574,
            "58": 0.79074,
            "59": 0.94641,
            "60": 0.7597,
            "61": 0.75771,
            "62": 0.75846,
            "63": 0.76455,
            "64": 0.76221,
            "65": 0.76078,
            "66": 0.76248,
            "67": 0.76124,
            "68": 0.75924,
            "69": 0.76384,
            "70": 0.76354,
            "71": 0.76298,
            "72": 0.76259,
            "73": 0.76264,
            "74": 0.75954,
            "75": 1.15842,
            "76": 0.96979,
            "77": 0.75738,
            "78": 0.95879,
            "79": 0.98573,
            "80": 0.98464,
            "81": 0.98381,
            "82": 0.75507,
            "83": 1.39042,
            "84": 0.75587,
            "85": 0.75366,
            "86": 0.75456,
            "87": 0.75427,
            "88": 1.47865,
            "89": 0.76061,
            "90": 0.75856,
            "91": 0.75496,
            "92": 0.75483,
            "93": 1.3078,
            "94": 1.22248,
            "95": 0.75751,
            "96": 0.75614,
            "97": 0.75908,
            "98": 0.75746,
            "99": 0.75722,
            "100": 0.76347
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.76077,
            "52": 9.71037,
            "53": 9.67209,
            "54": 9.69289,
            "55": 9.67976,
            "56": 9.67195,
            "57": 9.60235,
            "58": 9.62054,
            "59": 9.54681,
            "60": 9.61233,
            "61": 9.54777,
            "62": 9.53717,
            "63": 9.523,
            "64": 9.51039,
            "65": 9.52262,
            "66": 9.49082,
            "67": 9.45855,
            "68": 9.44092,
            "69": 9.44399,
            "70": 9.43963,
            "71": 9.46958,
            "72": 9.45728,
            "73": 9.40619,
            "74": 9.45672,
            "75": 9.40517,
            "76": 9.37395,
            "77": 9.34295,
            "78": 9.37775,
            "79": 9.41154,
            "80": 9.34542,
            "81": 9.33056,
            "82": 9.34678,
            "83": 9.31539,
            "84": 9.2998,
            "85": 9.33665,
            "86": 9.26983,
            "87": 9.31445,
            "88": 9.29902,
            "89": 9.27005,
            "90": 9.34069,
            "91": 9.25634,
            "92": 9.29607,
            "93": 9.29997,
            "94": 9.27642,
            "95": 9.28119,
            "96": 9.18249,
            "97": 9.26425,
            "98": 9.19693,
            "99": 9.22054,
            "100": 9.23002
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 1745.0,
            "52": 2480.0,
            "53": 3940.0,
            "54": 3685.0,
            "55": 3360.0,
            "56": 4255.0,
            "57": 3929.0,
            "58": 4275.0,
            "59": 1739.0,
            "60": 2646.0,
            "61": 2218.0,
            "62": 3977.0,
            "63": 3950.0,
            "64": 4525.0,
            "65": 3019.0,
            "66": 1895.0,
            "67": 2133.0,
            "68": 4062.0,
            "69": 4413.0,
            "70": 4046.0,
            "71": 2204.0,
            "72": 4079.0,
            "73": 3476.0,
            "74": 2504.0,
            "75": 5344.0,
            "76": 2574.0,
            "77": 3970.0,
            "78": 4325.0,
            "79": 2280.0,
            "80": 3423.0,
            "81": 3927.0,
            "82": 3691.0,
            "83": 4786.0,
            "84": 5548.0,
            "85": 4400.0,
            "86": 3970.0,
            "87": 3605.0,
            "88": 4496.0,
            "89": 3951.0,
            "90": 4531.0,
            "91": 4630.0,
            "92": 4008.0,
            "93": 3810.0,
            "94": 3128.0,
            "95": 4023.0,
            "96": 3596.0,
            "97": 3401.0,
            "98": 4733.0,
            "99": 3799.0,
            "100": 3329.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 1779868160.0,
            "52": 1779868160.0,
            "53": 1779868160.0,
            "54": 1779868160.0,
            "55": 1779868160.0,
            "56": 1779868160.0,
            "57": 1779868160.0,
            "58": 1779868160.0,
            "59": 1779868160.0,
            "60": 1779868160.0,
            "61": 1779868160.0,
            "62": 1779868160.0,
            "63": 1779868160.0,
            "64": 1779868160.0,
            "65": 1779868160.0,
            "66": 1779868160.0,
            "67": 1779868160.0,
            "68": 1779868160.0,
            "69": 1779868160.0,
            "70": 1779868160.0,
            "71": 1779868160.0,
            "72": 1779868160.0,
            "73": 1779868160.0,
            "74": 1779868160.0,
            "75": 1779868160.0,
            "76": 1779868160.0,
            "77": 1779868160.0,
            "78": 1779868160.0,
            "79": 1779868160.0,
            "80": 1779868160.0,
            "81": 1779868160.0,
            "82": 1779868160.0,
            "83": 1779868160.0,
            "84": 1779868160.0,
            "85": 1779868160.0,
            "86": 1779868160.0,
            "87": 1779868160.0,
            "88": 1779868160.0,
            "89": 1779868160.0,
            "90": 1779868160.0,
            "91": 1779868160.0,
            "92": 1779868160.0,
            "93": 1779868160.0,
            "94": 1779868160.0,
            "95": 1779868160.0,
            "96": 1779868160.0,
            "97": 1779868160.0,
            "98": 1779868160.0,
            "99": 1779868160.0,
            "100": 1779868160.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 3095263744.0,
            "52": 3095265792.0,
            "53": 3095265792.0,
            "54": 3095265792.0,
            "55": 3095265792.0,
            "56": 3095265792.0,
            "57": 3095265792.0,
            "58": 3095265792.0,
            "59": 3095265792.0,
            "60": 3095265792.0,
            "61": 3095265792.0,
            "62": 3095265792.0,
            "63": 3095265792.0,
            "64": 3095265792.0,
            "65": 3095265792.0,
            "66": 3095265792.0,
            "67": 3095265792.0,
            "68": 3095265792.0,
            "69": 3095265792.0,
            "70": 3095265792.0,
            "71": 3095265792.0,
            "72": 3095265792.0,
            "73": 3095265792.0,
            "74": 3095265792.0,
            "75": 3095265792.0,
            "76": 3095265792.0,
            "77": 3095265792.0,
            "78": 3095265792.0,
            "79": 3095265792.0,
            "80": 3095265792.0,
            "81": 3095265792.0,
            "82": 3095265792.0,
            "83": 3095265792.0,
            "84": 3095265792.0,
            "85": 3095265792.0,
            "86": 3095265792.0,
            "87": 3095265792.0,
            "88": 3095265792.0,
            "89": 3095265792.0,
            "90": 3095265792.0,
            "91": 3095265792.0,
            "92": 3095265792.0,
            "93": 3095265792.0,
            "94": 3095265792.0,
            "95": 3095265792.0,
            "96": 3095265792.0,
            "97": 3095265792.0,
            "98": 3095265792.0,
            "99": 3095265792.0,
            "100": 3095265792.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": "nan",
            "52": 5.73605,
            "53": 0.77824,
            "54": 0.75725,
            "55": 0.7532,
            "56": 0.7616,
            "57": 0.75553,
            "58": 0.75296,
            "59": 0.75491,
            "60": 0.75523,
            "61": 0.75657,
            "62": 0.75208,
            "63": 0.75617,
            "64": 0.75276,
            "65": 0.75464,
            "66": 0.75206,
            "67": 0.75496,
            "68": 0.74988,
            "69": 0.75191,
            "70": 0.75373,
            "71": 0.75115,
            "72": 0.75283,
            "73": 0.75392,
            "74": 0.81749,
            "75": 1.57609,
            "76": 1.0807,
            "77": 1.03733,
            "78": 0.75769,
            "79": 0.76457,
            "80": 0.76184,
            "81": 0.75954,
            "82": 0.76037,
            "83": 0.74979,
            "84": 0.74721,
            "85": 0.75789,
            "86": 0.75931,
            "87": 0.75373,
            "88": 0.75519,
            "89": 0.77826,
            "90": 0.77393,
            "91": 0.7716,
            "92": 0.77446,
            "93": 0.77897,
            "94": 0.77838,
            "95": 0.7783,
            "96": 0.77066,
            "97": 0.77269,
            "98": 0.78271,
            "99": 0.76978,
            "100": 0.77044
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.49566, "5": 10.48418, "10": 10.49947, "15": 10.46646, "20": 10.44788, "25": 10.34978, "30": 10.17275, "35": 10.04282, "40": 9.90773, "45": 9.75781, "50": 9.67689, "55": 9.55382, "60": 9.45468, "65": 9.42164, "70": 9.30116, "75": 9.32219, "80": 9.26192, "85": 9.29673, "90": 9.23346, "95": 9.238, "100": 9.1061}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2183.0, "5": 2533.0, "10": 2162.0, "15": 2548.0, "20": 2191.0, "25": 2557.0, "30": 2864.0, "35": 2979.0, "40": 2332.0, "45": 3931.0, "50": 3611.0, "55": 3750.0, "60": 2647.0, "65": 3396.0, "70": 3869.0, "75": 4912.0, "80": 3687.0, "85": 4182.0, "90": 4677.0, "95": 4397.0, "100": 3212.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1767237120.0, "5": 1767237120.0, "10": 1767237120.0, "15": 1767237120.0, "20": 1767237120.0, "25": 1767237120.0, "30": 1767237120.0, "35": 1767237120.0, "40": 1767237120.0, "45": 1767237120.0, "50": 1767237120.0, "55": 1767237120.0, "60": 1767237120.0, "65": 1767237120.0, "70": 1767237120.0, "75": 1767237120.0, "80": 1767237120.0, "85": 1767237120.0, "90": 1767237120.0, "95": 1767237120.0, "100": 1767237120.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2313432064.0, "5": 3056413696.0, "10": 3056416768.0, "15": 3056416768.0, "20": 3056416768.0, "25": 3056416768.0, "30": 3056416768.0, "35": 3056416768.0, "40": 3056416768.0, "45": 3056416768.0, "50": 3056416768.0, "55": 3056416768.0, "60": 3056416768.0, "65": 3056416768.0, "70": 3056416768.0, "75": 3056416768.0, "80": 3056416768.0, "85": 3056416768.0, "90": 3056416768.0, "95": 3056416768.0, "100": 3056416768.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 13.11825, "5": 1.26245, "10": 1.24684, "15": 1.21439, "20": 1.21303, "25": 1.60409, "30": 1.21786, "35": 1.21519, "40": 1.21559, "45": 1.21147, "50": 1.21024, "55": 1.22818, "60": 1.19382, "65": 1.20118, "70": 1.20129, "75": 1.40636, "80": 1.1941, "85": 1.18992, "90": 1.19216, "95": 1.18942, "100": 1.19526}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 24
  --hidden-size: 1024
  --num-attention-heads: 16
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 128
  --seq-length: 512
  --max-position-embeddings: 512
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 990000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.0001
  --min-lr: 0.00001
  --lr-warmup-fraction: 0.01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --tensor-model-parallel-size: 2
  --pipeline-model-parallel-size: 2
  --spec: local
  --deterministic-mode: true
  --use-checkpoint-args: true
  --use-checkpoint-opt_param-scheduler: true
  --no-gradient-accumulation-fusion: true
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --ckpt-format: torch
  --attention-backend: local
TEST_TYPE: ckpt-resume


================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.42004, "5": 10.44687, "10": 10.44032, "15": 10.43081, "20": 10.40841, "25": 10.32605, "30": 10.18604, "35": 10.03131, "40": 9.91274, "45": 9.75116, "50": 9.66124}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3226.0, "5": 3843.0, "10": 2475.0, "15": 2700.0, "20": 3443.0, "25": 2788.0, "30": 2821.0, "35": 4077.0, "40": 3244.0, "45": 4769.0, "50": 3733.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1632405504.0, "5": 1632405504.0, "10": 1632405504.0, "15": 1632405504.0, "20": 1632405504.0, "25": 1632405504.0, "30": 1632405504.0, "35": 1632405504.0, "40": 1632405504.0, "45": 1632405504.0, "50": 1632405504.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2477577728.0, "5": 3175497216.0, "10": 3175497216.0, "15": 3175497216.0, "20": 3178637312.0, "25": 3178637312.0, "30": 3178637312.0, "35": 3178637312.0, "40": 3178637312.0, "45": 3178637312.0, "50": 3178637312.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 7.09431, "5": 2.352, "10": 2.3669, "15": 2.36187, "20": 2.34867, "25": 2.34813, "30": 2.35284, "35": 2.36644, "40": 2.35505, "45": 2.34778, "50": 2.35217}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.51871,
            "2": 10.4956,
            "3": 10.51255,
            "4": 10.50998,
            "5": 10.49587,
            "6": 10.48692,
            "7": 10.49277,
            "8": 10.50311,
            "9": 10.50245,
            "10": 10.49512,
            "11": 10.49932,
            "12": 10.51527,
            "13": 10.49255,
            "14": 10.48631,
            "15": 10.50134,
            "16": 10.4797,
            "17": 10.47762,
            "18": 10.48507,
            "19": 10.47929,
            "20": 10.47261,
            "21": 10.47354,
            "22": 10.42928,
            "23": 10.41817,
            "24": 10.41344,
            "25": 10.40719,
            "26": 10.3839,
            "27": 10.37341,
            "28": 10.37565,
            "29": 10.32619,
            "30": 10.24178,
            "31": 10.23444,
            "32": 10.19319,
            "33": 10.20664,
            "34": 10.16671,
            "35": 10.15801,
            "36": 10.12188,
            "37": 10.11365,
            "38": 10.10678,
            "39": 10.06551,
            "40": 10.02612,
            "41": 9.98665,
            "42": 9.92401,
            "43": 9.90815,
            "44": 9.88451,
            "45": 9.84976,
            "46": 9.81212,
            "47": 9.79634,
            "48": 9.76934,
            "49": 9.82731,
            "50": 9.78659
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3597.0,
            "2": 3019.0,
            "3": 2834.0,
            "4": 3501.0,
            "5": 3175.0,
            "6": 2954.0,
            "7": 3807.0,
            "8": 3593.0,
            "9": 3344.0,
            "10": 3244.0,
            "11": 3905.0,
            "12": 1810.0,
            "13": 3393.0,
            "14": 4129.0,
            "15": 3308.0,
            "16": 3989.0,
            "17": 3605.0,
            "18": 3496.0,
            "19": 3861.0,
            "20": 3809.0,
            "21": 2265.0,
            "22": 2920.0,
            "23": 3855.0,
            "24": 3841.0,
            "25": 3679.0,
            "26": 3938.0,
            "27": 2749.0,
            "28": 4080.0,
            "29": 3641.0,
            "30": 3920.0,
            "31": 3465.0,
            "32": 3678.0,
            "33": 3345.0,
            "34": 3549.0,
            "35": 3593.0,
            "36": 4149.0,
            "37": 4239.0,
            "38": 4040.0,
            "39": 4130.0,
            "40": 4490.0,
            "41": 2672.0,
            "42": 2521.0,
            "43": 2574.0,
            "44": 3379.0,
            "45": 3999.0,
            "46": 3953.0,
            "47": 2708.0,
            "48": 4246.0,
            "49": 3920.0,
            "50": 3314.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1670994432.0,
            "2": 1670994432.0,
            "3": 1670994432.0,
            "4": 1670994432.0,
            "5": 1670994432.0,
            "6": 1670994432.0,
            "7": 1670994432.0,
            "8": 1670994432.0,
            "9": 1670994432.0,
            "10": 1670994432.0,
            "11": 1670994432.0,
            "12": 1670994432.0,
            "13": 1670994432.0,
            "14": 1670994432.0,
            "15": 1670994432.0,
            "16": 1670994432.0,
            "17": 1670994432.0,
            "18": 1670994432.0,
            "19": 1670994432.0,
            "20": 1670994432.0,
            "21": 1670994432.0,
            "22": 1670994432.0,
            "23": 1670994432.0,
            "24": 1670994432.0,
            "25": 1670994432.0,
            "26": 1670994432.0,
            "27": 1670994432.0,
            "28": 1670994432.0,
            "29": 1670994432.0,
            "30": 1670994432.0,
            "31": 1670994432.0,
            "32": 1670994432.0,
            "33": 1670994432.0,
            "34": 1670994432.0,
            "35": 1670994432.0,
            "36": 1670994432.0,
            "37": 1670994432.0,
            "38": 1670994432.0,
            "39": 1670994432.0,
            "40": 1670994432.0,
            "41": 1670994432.0,
            "42": 1670994432.0,
            "43": 1670994432.0,
            "44": 1670994432.0,
            "45": 1670994432.0,
            "46": 1670994432.0,
            "47": 1670994432.0,
            "48": 1670994432.0,
            "49": 1670994432.0,
            "50": 1670994432.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 2521210368.0,
            "2": 3221232128.0,
            "3": 3221232128.0,
            "4": 3221232128.0,
            "5": 3221232128.0,
            "6": 3221232128.0,
            "7": 3221232128.0,
            "8": 3221232128.0,
            "9": 3221232128.0,
            "10": 3221232128.0,
            "11": 3221232128.0,
            "12": 3221232128.0,
            "13": 3221232128.0,
            "14": 3221232128.0,
            "15": 3221232128.0,
            "16": 3221232128.0,
            "17": 3221232128.0,
            "18": 3221232128.0,
            "19": 3221232128.0,
            "20": 3221232128.0,
            "21": 3221232128.0,
            "22": 3221232128.0,
            "23": 3221232128.0,
            "24": 3221232128.0,
            "25": 3221232128.0,
            "26": 3221232128.0,
            "27": 3221232128.0,
            "28": 3221232128.0,
            "29": 3221232128.0,
            "30": 3221232128.0,
            "31": 3221232128.0,
            "32": 3221232128.0,
            "33": 3221232128.0,
            "34": 3221232128.0,
            "35": 3221232128.0,
            "36": 3221232128.0,
            "37": 3221232128.0,
            "38": 3221232128.0,
            "39": 3221232128.0,
            "40": 3221232128.0,
            "41": 3221232128.0,
            "42": 3221232128.0,
            "43": 3221232128.0,
            "44": 3221232128.0,
            "45": 3221232128.0,
            "46": 3221232128.0,
            "47": 3221232128.0,
            "48": 3221232128.0,
            "49": 3221232128.0,
            "50": 3221232128.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 6.15099,
            "3": 1.65293,
            "4": 1.63949,
            "5": 1.64244,
            "6": 1.6438,
            "7": 1.67622,
            "8": 1.6456,
            "9": 1.654,
            "10": 1.6418,
            "11": 1.6412,
            "12": 1.63799,
            "13": 1.63405,
            "14": 1.63852,
            "15": 1.64249,
            "16": 1.63757,
            "17": 1.63078,
            "18": 1.62993,
            "19": 1.72481,
            "20": 1.63412,
            "21": 1.74828,
            "22": 1.7055,
            "23": 2.19945,
            "24": 1.63616,
            "25": 1.63702,
            "26": 1.64273,
            "27": 1.64261,
            "28": 1.6368,
            "29": 2.16015,
            "30": 2.43465,
            "31": 2.08006,
            "32": 1.64052,
            "33": 2.41778,
            "34": 2.04121,
            "35": 2.02716,
            "36": 1.64041,
            "37": 1.64053,
            "38": 1.64108,
            "39": 1.64424,
            "40": 1.63863,
            "41": 1.63966,
            "42": 1.63523,
            "43": 1.64084,
            "44": 1.64004,
            "45": 1.64161,
            "46": 1.65273,
            "47": 1.6401,
            "48": 1.64477,
            "49": 1.65398,
            "50": 1.64766
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.42107, "5": 10.44497, "10": 10.44241, "15": 10.43152, "20": 10.40907, "25": 10.3264, "30": 10.18328, "35": 10.03461, "40": 9.91258, "45": 9.74932, "50": 9.66168}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2229.0, "5": 2848.0, "10": 2437.0, "15": 3644.0, "20": 3449.0, "25": 3783.0, "30": 2913.0, "35": 4128.0, "40": 2230.0, "45": 4790.0, "50": 4716.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1632405504.0, "5": 1632405504.0, "10": 1632405504.0, "15": 1632405504.0, "20": 1632405504.0, "25": 1632405504.0, "30": 1632405504.0, "35": 1632405504.0, "40": 1632405504.0, "45": 1632405504.0, "50": 1632405504.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2458703360.0, "5": 3155576320.0, "10": 3155576320.0, "15": 3155576320.0, "20": 3155576320.0, "25": 3155576320.0, "30": 3155576320.0, "35": 3155576320.0, "40": 3155576320.0, "45": 3155576320.0, "50": 3155576320.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.46827, "5": 2.04207, "10": 2.0714, "15": 2.06559, "20": 2.04371, "25": 2.04465, "30": 2.0474, "35": 2.21838, "40": 2.04636, "45": 2.05719, "50": 2.04581}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 24
  --hidden-size: 1024
  --num-attention-heads: 16
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 128
  --seq-length: 512
  --max-position-embeddings: 512
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 990000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/bert_shard00/my-bert_00_text_sentence
  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.0001
  --min-lr: 0.00001
  --lr-warmup-fraction: 0.01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --tensor-model-parallel-size: 4
  --pipeline-model-parallel-size: 1
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --ckpt-format: torch
  --attention-backend: unfused
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/bert/bert_release/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 20000,
        "step_interval": 5,
        "values": {
            "1": 10.50978,
            "5": 10.52127,
            "10": 10.54466,
            "15": 10.53034,
            "20": 10.14173,
            "25": 9.56311,
            "30": 9.41201,
            "35": 9.14043,
            "40": 9.03793,
            "45": 8.97945,
            "50": 8.74475,
            "55": 8.78099,
            "60": 8.43348,
            "65": 8.48725,
            "70": 8.14828,
            "75": 8.25803,
            "80": 7.88134,
            "85": 7.77081,
            "90": 7.61632,
            "95": 7.63765,
            "100": 7.62594,
            "105": 7.38758,
            "110": 7.51315,
            "115": 7.15652,
            "120": 6.98262,
            "125": 7.15266,
            "130": 6.93329,
            "135": 6.84929,
            "140": 7.07327,
            "145": 6.98775,
            "150": 6.9769,
            "155": 6.93349,
            "160": 6.9003,
            "165": 7.00237,
            "170": 6.82439,
            "175": 6.88854,
            "180": 6.58099,
            "185": 7.00284,
            "190": 6.97452,
            "195": 6.85067,
            "200": 6.86475,
            "205": 6.7906,
            "210": 6.82238,
            "215": 6.86867,
            "220": 6.64438,
            "225": 6.77745,
            "230": 6.84208,
            "235": 6.63336,
            "240": 6.57281,
            "245": 6.91337,
            "250": 6.70348,
            "255": 6.59624,
            "260": 6.70215,
            "265": 6.56059,
            "270": 6.47804,
            "275": 6.76849,
            "280": 6.60685,
            "285": 6.48901,
            "290": 6.58768,
            "295": 6.68338,
            "300": 6.33115,
            "305": 6.36339,
            "310": 6.95861,
            "315": 6.77054,
            "320": 6.73592,
            "325": 6.48057,
            "330": 6.69734,
            "335": 6.55731,
            "340": 6.43017,
            "345": 6.60688,
            "350": 6.48479,
            "355": 6.65201,
            "360": 6.5877,
            "365": 6.68503,
            "370": 6.53352,
            "375": 6.64235,
            "380": 6.35286,
            "385": 6.48193,
            "390": 6.5151,
            "395": 6.30702,
            "400": 6.59551,
            "405": 6.51309,
            "410": 6.59776,
            "415": 6.55058,
            "420": 6.5084,
            "425": 6.43579,
            "430": 6.55671,
            "435": 6.39106,
            "440": 6.53549,
            "445": 6.47319,
            "450": 6.61633,
            "455": 6.51197,
            "460": 6.82305,
            "465": 6.74201,
            "470": 6.42743,
            "475": 6.19014,
            "480": 6.49394,
            "485": 6.53743,
            "490": 6.20306,
            "495": 6.30694,
            "500": 6.48972,
            "505": 6.4621,
            "510": 6.5147,
            "515": 6.16072,
            "520": 6.57651,
            "525": 6.50925,
            "530": 6.32426,
            "535": 6.13235,
            "540": 6.61292,
            "545": 6.41906,
            "550": 6.42204,
            "555": 6.40691,
            "560": 6.41834,
            "565": 6.31847,
            "570": 6.3755,
            "575": 6.56663,
            "580": 6.47157,
            "585": 6.48295,
            "590": 6.21692,
            "595": 6.26575,
            "600": 6.46368,
            "605": 6.38135,
            "610": 6.44998,
            "615": 6.43694,
            "620": 6.47964,
            "625": 6.4062,
            "630": 6.46739,
            "635": 6.42306,
            "640": 6.35796,
            "645": 6.21266,
            "650": 6.30068,
            "655": 6.58562,
            "660": 6.34484,
            "665": 6.31661,
            "670": 6.41515,
            "675": 6.30345,
            "680": 6.36892,
            "685": 6.52142,
            "690": 6.29281,
            "695": 6.17693,
            "700": 6.12029,
            "705": 6.33282,
            "710": 6.17325,
            "715": 6.41845,
            "720": 6.40365,
            "725": 6.27661,
            "730": 6.36336,
            "735": 6.50301,
            "740": 6.6408,
            "745": 6.21239,
            "750": 6.22903,
            "755": 6.01962,
            "760": 6.54194,
            "765": 6.30887,
            "770": 6.42465,
            "775": 6.1418,
            "780": 6.2635,
            "785": 6.27722,
            "790": 6.44489,
            "795": 6.40784,
            "800": 6.10817,
            "805": 6.36587,
            "810": 6.24281,
            "815": 6.14724,
            "820": 6.35222,
            "825": 6.62956,
            "830": 6.35037,
            "835": 6.19158,
            "840": 6.1428,
            "845": 6.42518,
            "850": 6.43734,
            "855": 6.48145,
            "860": 6.22078,
            "865": 6.35995,
            "870": 6.30855,
            "875": 6.24704,
            "880": 6.29342,
            "885": 6.33952,
            "890": 6.19924,
            "895": 6.28849,
            "900": 6.33032,
            "905": 6.23994,
            "910": 6.15103,
            "915": 6.46643,
            "920": 6.42428,
            "925": 6.16715,
            "930": 6.045,
            "935": 6.42457,
            "940": 6.30709,
            "945": 6.05385,
            "950": 6.24124,
            "955": 6.04244,
            "960": 6.32627,
            "965": 6.17532,
            "970": 6.33015,
            "975": 6.08915,
            "980": 6.04081,
            "985": 6.3183,
            "990": 6.19724,
            "995": 6.31644,
            "1000": 6.23701,
            "1005": 6.40623,
            "1010": 6.20177,
            "1015": 6.10831,
            "1020": 6.09185,
            "1025": 6.3262,
            "1030": 6.1247,
            "1035": 6.02977,
            "1040": 6.00148,
            "1045": 6.59925,
            "1050": 6.33093,
            "1055": 6.08568,
            "1060": 6.14131,
            "1065": 5.85939,
            "1070": 6.24407,
            "1075": 5.95453,
            "1080": 6.11996,
            "1085": 6.1268,
            "1090": 6.04556,
            "1095": 6.23926,
            "1100": 6.07186,
            "1105": 6.2921,
            "1110": 6.21297,
            "1115": 6.08982,
            "1120": 6.15687,
            "1125": 6.11832,
            "1130": 5.94971,
            "1135": 6.31535,
            "1140": 6.39315,
            "1145": 6.20465,
            "1150": 6.10843,
            "1155": 6.03022,
            "1160": 6.2351,
            "1165": 6.23326,
            "1170": 6.37467,
            "1175": 6.0531,
            "1180": 6.12652,
            "1185": 6.24238,
            "1190": 6.36964,
            "1195": 6.27203,
            "1200": 6.0221,
            "1205": 6.25623,
            "1210": 6.24566,
            "1215": 6.10324,
            "1220": 6.0707,
            "1225": 6.39464,
            "1230": 6.21985,
            "1235": 6.00045,
            "1240": 6.5414,
            "1245": 6.09369,
            "1250": 5.95764,
            "1255": 6.08614,
            "1260": 6.15207,
            "1265": 6.04879,
            "1270": 5.93994,
            "1275": 6.0892,
            "1280": 5.86241,
            "1285": 5.9135,
            "1290": 6.11586,
            "1295": 6.09778,
            "1300": 6.24353,
            "1305": 5.95114,
            "1310": 6.02127,
            "1315": 6.01787,
            "1320": 6.08447,
            "1325": 6.26262,
            "1330": 6.08214,
            "1335": 6.02861,
            "1340": 6.21578,
            "1345": 6.05392,
            "1350": 5.92473,
            "1355": 6.26256,
            "1360": 6.31524,
            "1365": 6.05939,
            "1370": 6.00931,
            "1375": 6.30432,
            "1380": 6.13717,
            "1385": 5.91633,
            "1390": 6.0165,
            "1395": 6.02754,
            "1400": 6.23111,
            "1405": 6.00511,
            "1410": 6.12461,
            "1415": 6.33097,
            "1420": 6.14531,
            "1425": 6.15913,
            "1430": 5.99409,
            "1435": 6.369,
            "1440": 6.29075,
            "1445": 6.28371,
            "1450": 6.07842,
            "1455": 5.76615,
            "1460": 6.22168,
            "1465": 5.95006,
            "1470": 5.98994,
            "1475": 6.17676,
            "1480": 6.21702,
            "1485": 6.0273,
            "1490": 6.20639,
            "1495": 6.17929,
            "1500": 6.16936,
            "1505": 6.25671,
            "1510": 6.33012,
            "1515": 6.10774,
            "1520": 5.87779,
            "1525": 5.97556,
            "1530": 5.96883,
            "1535": 6.19055,
            "1540": 6.4155,
            "1545": 6.28752,
            "1550": 6.05849,
            "1555": 5.96582,
            "1560": 6.1972,
            "1565": 6.15321,
            "1570": 6.08372,
            "1575": 6.07103,
            "1580": 6.03693,
            "1585": 6.02378,
            "1590": 6.30795,
            "1595": 6.25445,
            "1600": 5.89585,
            "1605": 5.95341,
            "1610": 5.99693,
            "1615": 6.18964,
            "1620": 6.09966,
            "1625": 6.0184,
            "1630": 6.04511,
            "1635": 6.35301,
            "1640": 6.09002,
            "1645": 6.18054,
            "1650": 6.26055,
            "1655": 6.26873,
            "1660": 6.25826,
            "1665": 5.87738,
            "1670": 6.1647,
            "1675": 5.91026,
            "1680": 6.13663,
            "1685": 6.30892,
            "1690": 6.27804,
            "1695": 5.99948,
            "1700": 6.25646,
            "1705": 6.11808,
            "1710": 6.20222,
            "1715": 5.97646,
            "1720": 6.31462,
            "1725": 6.22429,
            "1730": 6.20496,
            "1735": 6.1343,
            "1740": 6.05348,
            "1745": 5.93513,
            "1750": 6.33739,
            "1755": 6.37528,
            "1760": 5.86269,
            "1765": 6.07216,
            "1770": 6.23357,
            "1775": 5.96561,
            "1780": 6.26714,
            "1785": 5.82641,
            "1790": 6.03373,
            "1795": 6.255,
            "1800": 6.12279,
            "1805": 5.9765,
            "1810": 6.63002,
            "1815": 6.219,
            "1820": 6.24733,
            "1825": 6.12365,
            "1830": 6.1671,
            "1835": 5.97481,
            "1840": 5.99782,
            "1845": 6.15354,
            "1850": 6.22021,
            "1855": 6.12722,
            "1860": 6.12755,
            "1865": 5.95807,
            "1870": 6.22601,
            "1875": 6.15834,
            "1880": 5.91109,
            "1885": 6.18952,
            "1890": 6.12636,
            "1895": 6.06292,
            "1900": 6.00214,
            "1905": 6.34379,
            "1910": 6.39854,
            "1915": 6.27321,
            "1920": 6.10121,
            "1925": 5.85579,
            "1930": 5.9871,
            "1935": 6.11977,
            "1940": 6.02412,
            "1945": 5.86019,
            "1950": 6.08761,
            "1955": 5.95415,
            "1960": 6.11389,
            "1965": 6.0503,
            "1970": 6.04376,
            "1975": 6.08327,
            "1980": 6.41602,
            "1985": 6.3055,
            "1990": 5.98697,
            "1995": 6.08656,
            "2000": 5.98795,
            "2005": 6.173,
            "2010": 6.28376,
            "2015": 6.05629,
            "2020": 6.19819,
            "2025": 6.23823,
            "2030": 6.10615,
            "2035": 6.16811,
            "2040": 5.80791,
            "2045": 5.98188,
            "2050": 6.2018,
            "2055": 6.03742,
            "2060": 6.32661,
            "2065": 6.18935,
            "2070": 5.96647,
            "2075": 6.04736,
            "2080": 6.11137,
            "2085": 6.03968,
            "2090": 6.02187,
            "2095": 6.23785,
            "2100": 6.13127,
            "2105": 6.09832,
            "2110": 6.02575,
            "2115": 6.15655,
            "2120": 5.99329,
            "2125": 6.25235,
            "2130": 6.19745,
            "2135": 5.94229,
            "2140": 6.07954,
            "2145": 5.749,
            "2150": 6.16774,
            "2155": 6.07575,
            "2160": 5.96929,
            "2165": 5.96396,
            "2170": 6.02488,
            "2175": 5.99406,
            "2180": 5.98873,
            "2185": 5.90066,
            "2190": 5.85869,
            "2195": 5.99928,
            "2200": 6.25176,
            "2205": 6.09569,
            "2210": 5.9385,
            "2215": 6.11442,
            "2220": 6.37571,
            "2225": 5.97098,
            "2230": 6.16859,
            "2235": 6.06329,
            "2240": 5.97345,
            "2245": 6.23139,
            "2250": 5.95368,
            "2255": 5.78713,
            "2260": 6.14571,
            "2265": 5.92416,
            "2270": 5.9742,
            "2275": 6.0561,
            "2280": 6.10853,
            "2285": 5.9609,
            "2290": 6.00926,
            "2295": 5.98689,
            "2300": 6.22828,
            "2305": 5.77386,
            "2310": 6.07112,
            "2315": 6.09636,
            "2320": 6.02308,
            "2325": 5.77792,
            "2330": 5.90006,
            "2335": 6.00477,
            "2340": 6.04988,
            "2345": 5.87943,
            "2350": 5.9455,
            "2355": 6.15292,
            "2360": 5.98856,
            "2365": 6.17572,
            "2370": 6.11409,
            "2375": 6.17821,
            "2380": 6.06473,
            "2385": 6.16272,
            "2390": 6.18619,
            "2395": 6.08761,
            "2400": 5.77243,
            "2405": 5.94605,
            "2410": 6.20816,
            "2415": 6.0388,
            "2420": 6.07585,
            "2425": 5.79995,
            "2430": 6.3696,
            "2435": 6.17437,
            "2440": 5.93429,
            "2445": 5.84607,
            "2450": 6.03347,
            "2455": 5.68925,
            "2460": 5.98785,
            "2465": 6.28467,
            "2470": 5.83379,
            "2475": 5.92314,
            "2480": 5.94645,
            "2485": 6.06727,
            "2490": 6.0579,
            "2495": 6.14306,
            "2500": 6.07794,
            "2505": 5.9618,
            "2510": 5.81741,
            "2515": 6.04262,
            "2520": 6.22326,
            "2525": 5.80973,
            "2530": 5.9709,
            "2535": 5.98911,
            "2540": 6.18465,
            "2545": 5.97127,
            "2550": 6.00245,
            "2555": 5.72017,
            "2560": 6.18913,
            "2565": 5.86481,
            "2570": 6.09136,
            "2575": 5.69286,
            "2580": 5.90757,
            "2585": 5.91095,
            "2590": 6.11604,
            "2595": 6.10858,
            "2600": 6.04358,
            "2605": 6.03382,
            "2610": 6.29485,
            "2615": 6.08114,
            "2620": 5.98273,
            "2625": 6.11247,
            "2630": 6.16681,
            "2635": 6.15853,
            "2640": 5.92667,
            "2645": 6.02411,
            "2650": 6.03993,
            "2655": 5.86835,
            "2660": 5.84915,
            "2665": 6.16659,
            "2670": 6.01992,
            "2675": 5.97334,
            "2680": 6.14047,
            "2685": 5.98695,
            "2690": 6.05779,
            "2695": 5.95816,
            "2700": 6.30411,
            "2705": 6.09839,
            "2710": 6.13429,
            "2715": 5.8388,
            "2720": 5.97832,
            "2725": 6.17393,
            "2730": 6.06026,
            "2735": 6.15802,
            "2740": 5.88136,
            "2745": 6.10355,
            "2750": 5.98925,
            "2755": 5.96378,
            "2760": 5.86853,
            "2765": 6.17326,
            "2770": 6.04686,
            "2775": 6.24488,
            "2780": 6.1769,
            "2785": 6.02252,
            "2790": 6.028,
            "2795": 5.85762,
            "2800": 6.12219,
            "2805": 5.99124,
            "2810": 6.12423,
            "2815": 6.09528,
            "2820": 6.00802,
            "2825": 5.88195,
            "2830": 6.27177,
            "2835": 5.95029,
            "2840": 6.0893,
            "2845": 5.8773,
            "2850": 5.8596,
            "2855": 6.14924,
            "2860": 5.81183,
            "2865": 6.08935,
            "2870": 6.05999,
            "2875": 6.11965,
            "2880": 5.89185,
            "2885": 5.92228,
            "2890": 6.044,
            "2895": 6.10215,
            "2900": 5.77353,
            "2905": 6.08914,
            "2910": 6.0114,
            "2915": 6.16499,
            "2920": 5.90654,
            "2925": 5.76641,
            "2930": 6.02525,
            "2935": 6.03343,
            "2940": 6.17916,
            "2945": 5.91933,
            "2950": 6.09969,
            "2955": 5.99275,
            "2960": 5.90625,
            "2965": 5.88884,
            "2970": 5.89181,
            "2975": 5.94191,
            "2980": 6.16419,
            "2985": 6.17411,
            "2990": 5.97356,
            "2995": 6.27941,
            "3000": 6.11256,
            "3005": 5.87221,
            "3010": 6.3364,
            "3015": 5.87806,
            "3020": 5.81857,
            "3025": 5.853,
            "3030": 5.99115,
            "3035": 5.96123,
            "3040": 6.24968,
            "3045": 5.82178,
            "3050": 6.23337,
            "3055": 5.8939,
            "3060": 5.88487,
            "3065": 5.92822,
            "3070": 6.12952,
            "3075": 5.83377,
            "3080": 5.90756,
            "3085": 6.07445,
            "3090": 6.1671,
            "3095": 6.05015,
            "3100": 6.11522,
            "3105": 5.97822,
            "3110": 5.84571,
            "3115": 6.05431,
            "3120": 6.28212,
            "3125": 6.18012,
            "3130": 5.98664,
            "3135": 6.13297,
            "3140": 5.85043,
            "3145": 6.35467,
            "3150": 6.01208,
            "3155": 6.22476,
            "3160": 6.04016,
            "3165": 6.08051,
            "3170": 6.21385,
            "3175": 6.04457,
            "3180": 6.01898,
            "3185": 6.03264,
            "3190": 5.99449,
            "3195": 5.87052,
            "3200": 6.05184,
            "3205": 5.9353,
            "3210": 5.97018,
            "3215": 5.96903,
            "3220": 5.89072,
            "3225": 5.80244,
            "3230": 6.12997,
            "3235": 5.85196,
            "3240": 6.11383,
            "3245": 6.14527,
            "3250": 5.95086,
            "3255": 6.06106,
            "3260": 5.98062,
            "3265": 6.07303,
            "3270": 6.06225,
            "3275": 6.04822,
            "3280": 6.00888,
            "3285": 5.83469,
            "3290": 6.06418,
            "3295": 5.75779,
            "3300": 5.95064,
            "3305": 5.97935,
            "3310": 5.83699,
            "3315": 5.45177,
            "3320": 5.95406,
            "3325": 5.87031,
            "3330": 5.90009,
            "3335": 6.02673,
            "3340": 6.00609,
            "3345": 5.97018,
            "3350": 5.88523,
            "3355": 5.93077,
            "3360": 5.76949,
            "3365": 6.27226,
            "3370": 6.08555,
            "3375": 5.82864,
            "3380": 6.17215,
            "3385": 5.92664,
            "3390": 5.85654,
            "3395": 5.99022,
            "3400": 6.02132,
            "3405": 5.98712,
            "3410": 6.2533,
            "3415": 5.9641,
            "3420": 5.76721,
            "3425": 6.25807,
            "3430": 5.86702,
            "3435": 5.896,
            "3440": 5.77806,
            "3445": 5.93952,
            "3450": 6.05672,
            "3455": 5.90924,
            "3460": 5.96039,
            "3465": 6.01595,
            "3470": 5.88352,
            "3475": 5.97563,
            "3480": 5.84986,
            "3485": 5.9187,
            "3490": 5.97846,
            "3495": 6.08206,
            "3500": 6.00425,
            "3505": 6.12916,
            "3510": 6.04774,
            "3515": 6.08829,
            "3520": 6.02516,
            "3525": 6.07695,
            "3530": 5.91115,
            "3535": 6.10843,
            "3540": 6.01169,
            "3545": 6.24094,
            "3550": 6.01702,
            "3555": 6.23271,
            "3560": 6.02274,
            "3565": 5.99809,
            "3570": 5.99479,
            "3575": 5.75468,
            "3580": 6.09507,
            "3585": 5.86544,
            "3590": 5.98315,
            "3595": 5.85808,
            "3600": 5.67863,
            "3605": 6.05424,
            "3610": 6.14856,
            "3615": 5.9446,
            "3620": 5.96901,
            "3625": 6.15804,
            "3630": 5.87917,
            "3635": 6.06303,
            "3640": 6.22118,
            "3645": 5.81263,
            "3650": 6.06533,
            "3655": 5.86796,
            "3660": 5.97358,
            "3665": 5.80527,
            "3670": 6.11461,
            "3675": 5.92948,
            "3680": 5.99149,
            "3685": 5.88701,
            "3690": 5.89312,
            "3695": 6.06007,
            "3700": 5.96269,
            "3705": 6.04204,
            "3710": 5.93027,
            "3715": 5.77244,
            "3720": 6.11438,
            "3725": 6.21321,
            "3730": 5.98738,
            "3735": 5.89429,
            "3740": 5.8646,
            "3745": 5.99594,
            "3750": 6.12476,
            "3755": 5.81869,
            "3760": 6.07677,
            "3765": 5.73443,
            "3770": 6.12293,
            "3775": 5.91663,
            "3780": 6.22376,
            "3785": 6.02782,
            "3790": 5.89494,
            "3795": 6.37162,
            "3800": 5.69486,
            "3805": 6.05985,
            "3810": 5.98003,
            "3815": 5.96771,
            "3820": 5.89768,
            "3825": 6.00724,
            "3830": 5.67211,
            "3835": 5.81791,
            "3840": 5.92655,
            "3845": 5.86602,
            "3850": 5.75022,
            "3855": 6.10025,
            "3860": 6.09023,
            "3865": 5.79756,
            "3870": 5.85898,
            "3875": 5.91542,
            "3880": 6.03141,
            "3885": 6.06076,
            "3890": 5.9103,
            "3895": 6.03542,
            "3900": 5.93195,
            "3905": 5.79682,
            "3910": 6.00266,
            "3915": 5.92411,
            "3920": 6.04708,
            "3925": 6.04212,
            "3930": 5.81825,
            "3935": 6.011,
            "3940": 5.96532,
            "3945": 5.78432,
            "3950": 5.70522,
            "3955": 5.66836,
            "3960": 5.70374,
            "3965": 5.8349,
            "3970": 5.92531,
            "3975": 6.02866,
            "3980": 6.03837,
            "3985": 5.87189,
            "3990": 5.90754,
            "3995": 6.07567,
            "4000": 5.9133,
            "4005": 5.8454,
            "4010": 5.89856,
            "4015": 6.06073,
            "4020": 5.72817,
            "4025": 5.98624,
            "4030": 5.86059,
            "4035": 5.60016,
            "4040": 6.1592,
            "4045": 6.00769,
            "4050": 5.92849,
            "4055": 6.1625,
            "4060": 5.85082,
            "4065": 5.87984,
            "4070": 5.95859,
            "4075": 5.87458,
            "4080": 5.83097,
            "4085": 6.04063,
            "4090": 6.10215,
            "4095": 5.76431,
            "4100": 5.76182,
            "4105": 6.137,
            "4110": 5.68234,
            "4115": 6.01339,
            "4120": 5.97184,
            "4125": 5.83483,
            "4130": 5.90794,
            "4135": 6.1168,
            "4140": 5.86355,
            "4145": 6.03874,
            "4150": 5.89801,
            "4155": 5.89877,
            "4160": 5.70674,
            "4165": 5.8545,
            "4170": 5.97487,
            "4175": 5.57132,
            "4180": 6.10157,
            "4185": 5.96884,
            "4190": 5.84876,
            "4195": 6.09226,
            "4200": 5.57539,
            "4205": 5.81751,
            "4210": 6.00172,
            "4215": 5.88576,
            "4220": 5.81668,
            "4225": 5.76344,
            "4230": 5.80367,
            "4235": 5.79075,
            "4240": 5.85309,
            "4245": 5.83807,
            "4250": 6.13289,
            "4255": 6.01369,
            "4260": 5.90699,
            "4265": 6.08868,
            "4270": 6.04465,
            "4275": 5.78601,
            "4280": 5.97173,
            "4285": 5.46478,
            "4290": 5.86685,
            "4295": 5.748,
            "4300": 6.0853,
            "4305": 5.78761,
            "4310": 5.88712,
            "4315": 6.01107,
            "4320": 5.61422,
            "4325": 5.91794,
            "4330": 5.87459,
            "4335": 6.07593,
            "4340": 5.6688,
            "4345": 5.99754,
            "4350": 6.19321,
            "4355": 6.06599,
            "4360": 5.97654,
            "4365": 5.95108,
            "4370": 6.12663,
            "4375": 5.82126,
            "4380": 6.22436,
            "4385": 5.94671,
            "4390": 5.8993,
            "4395": 5.85622,
            "4400": 5.98778,
            "4405": 6.07064,
            "4410": 5.69236,
            "4415": 5.68123,
            "4420": 5.98994,
            "4425": 5.98668,
            "4430": 5.74239,
            "4435": 5.5895,
            "4440": 5.9505,
            "4445": 5.99552,
            "4450": 5.96823,
            "4455": 5.96749,
            "4460": 5.92396,
            "4465": 5.74691,
            "4470": 6.00221,
            "4475": 5.99346,
            "4480": 5.8476,
            "4485": 5.84491,
            "4490": 5.7077,
            "4495": 6.09006,
            "4500": 5.97956,
            "4505": 6.0659,
            "4510": 5.72935,
            "4515": 6.05372,
            "4520": 5.99636,
            "4525": 5.64998,
            "4530": 5.84471,
            "4535": 5.77654,
            "4540": 5.93657,
            "4545": 5.84363,
            "4550": 6.06019,
            "4555": 5.72448,
            "4560": 5.99553,
            "4565": 5.96005,
            "4570": 5.8384,
            "4575": 6.02108,
            "4580": 5.69714,
            "4585": 5.97564,
            "4590": 5.97911,
            "4595": 5.74646,
            "4600": 5.94023,
            "4605": 5.9249,
            "4610": 5.83473,
            "4615": 5.9766,
            "4620": 6.07956,
            "4625": 5.7305,
            "4630": 5.82516,
            "4635": 5.84749,
            "4640": 5.77706,
            "4645": 5.81713,
            "4650": 5.78677,
            "4655": 5.95996,
            "4660": 6.05126,
            "4665": 5.75165,
            "4670": 6.05384,
            "4675": 6.23328,
            "4680": 5.8878,
            "4685": 5.67165,
            "4690": 5.87085,
            "4695": 5.69028,
            "4700": 5.97537,
            "4705": 5.86269,
            "4710": 5.95239,
            "4715": 6.12431,
            "4720": 5.98965,
            "4725": 5.91333,
            "4730": 6.03899,
            "4735": 5.81888,
            "4740": 5.95605,
            "4745": 5.92445,
            "4750": 5.85119,
            "4755": 5.65009,
            "4760": 5.9188,
            "4765": 5.9452,
            "4770": 6.09227,
            "4775": 5.88258,
            "4780": 6.07833,
            "4785": 5.8651,
            "4790": 5.93889,
            "4795": 5.83477,
            "4800": 5.64375,
            "4805": 5.69828,
            "4810": 5.91983,
            "4815": 6.00755,
            "4820": 5.49614,
            "4825": 5.98527,
            "4830": 5.93281,
            "4835": 5.81396,
            "4840": 6.10633,
            "4845": 6.03498,
            "4850": 5.91842,
            "4855": 6.1681,
            "4860": 6.05597,
            "4865": 5.81532,
            "4870": 5.95613,
            "4875": 5.96721,
            "4880": 5.7983,
            "4885": 6.07921,
            "4890": 5.78296,
            "4895": 5.9505,
            "4900": 5.96643,
            "4905": 5.87355,
            "4910": 5.75351,
            "4915": 5.93393,
            "4920": 6.00762,
            "4925": 6.09524,
            "4930": 5.95182,
            "4935": 6.02575,
            "4940": 5.864,
            "4945": 5.8716,
            "4950": 5.78527,
            "4955": 5.99416,
            "4960": 5.69035,
            "4965": 6.06079,
            "4970": 5.84489,
            "4975": 6.04867,
            "4980": 6.08676,
            "4985": 5.59181,
            "4990": 5.82774,
            "4995": 5.92434,
            "5000": 6.07698,
            "5005": 5.93579,
            "5010": 5.95001,
            "5015": 5.81211,
            "5020": 6.0789,
            "5025": 5.90382,
            "5030": 6.06042,
            "5035": 5.86897,
            "5040": 5.9012,
            "5045": 6.08214,
            "5050": 5.76079,
            "5055": 5.97771,
            "5060": 6.13892,
            "5065": 5.74586,
            "5070": 5.86642,
            "5075": 5.82248,
            "5080": 5.90904,
            "5085": 5.88283,
            "5090": 5.88469,
            "5095": 6.027,
            "5100": 5.67291,
            "5105": 5.81827,
            "5110": 5.87732,
            "5115": 5.92739,
            "5120": 5.90208,
            "5125": 6.02336,
            "5130": 6.1172,
            "5135": 5.95008,
            "5140": 5.82097,
            "5145": 5.67856,
            "5150": 5.88811,
            "5155": 5.82725,
            "5160": 5.94091,
            "5165": 5.85219,
            "5170": 6.01697,
            "5175": 6.09359,
            "5180": 6.08305,
            "5185": 6.10649,
            "5190": 6.15402,
            "5195": 5.99635,
            "5200": 6.05918,
            "5205": 5.87399,
            "5210": 5.84363,
            "5215": 5.97844,
            "5220": 6.01161,
            "5225": 5.68631,
            "5230": 6.11981,
            "5235": 5.71546,
            "5240": 5.93066,
            "5245": 6.05824,
            "5250": 5.76055,
            "5255": 6.03223,
            "5260": 5.71627,
            "5265": 5.90464,
            "5270": 5.84764,
            "5275": 5.68181,
            "5280": 5.75863,
            "5285": 6.0724,
            "5290": 6.01235,
            "5295": 5.94181,
            "5300": 5.97401,
            "5305": 5.99277,
            "5310": 5.65951,
            "5315": 5.75001,
            "5320": 5.6386,
            "5325": 5.87625,
            "5330": 5.65919,
            "5335": 5.81241,
            "5340": 5.72697,
            "5345": 5.77559,
            "5350": 5.92637,
            "5355": 5.92603,
            "5360": 6.07992,
            "5365": 5.78604,
            "5370": 5.85472,
            "5375": 5.63649,
            "5380": 5.8982,
            "5385": 6.12646,
            "5390": 5.80802,
            "5395": 5.89257,
            "5400": 5.93987,
            "5405": 5.68171,
            "5410": 5.47232,
            "5415": 6.07172,
            "5420": 5.98733,
            "5425": 5.88452,
            "5430": 5.63228,
            "5435": 5.97596,
            "5440": 5.85637,
            "5445": 5.80796,
            "5450": 6.23544,
            "5455": 6.05377,
            "5460": 5.96801,
            "5465": 5.74808,
            "5470": 6.14954,
            "5475": 5.91515,
            "5480": 5.8191,
            "5485": 5.80624,
            "5490": 6.05013,
            "5495": 5.83847,
            "5500": 5.95286,
            "5505": 5.6971,
            "5510": 5.90304,
            "5515": 6.03199,
            "5520": 6.06134,
            "5525": 5.74713,
            "5530": 5.98389,
            "5535": 5.65611,
            "5540": 5.67357,
            "5545": 5.70374,
            "5550": 5.91782,
            "5555": 5.85741,
            "5560": 5.8256,
            "5565": 5.55801,
            "5570": 5.92635,
            "5575": 6.08739,
            "5580": 5.96595,
            "5585": 6.08513,
            "5590": 5.9707,
            "5595": 6.12765,
            "5600": 5.99767,
            "5605": 6.07742,
            "5610": 5.98991,
            "5615": 5.70049,
            "5620": 6.24344,
            "5625": 6.03771,
            "5630": 6.14801,
            "5635": 5.88534,
            "5640": 5.76422,
            "5645": 6.13908,
            "5650": 5.86035,
            "5655": 5.69625,
            "5660": 6.12533,
            "5665": 5.81862,
            "5670": 5.72885,
            "5675": 5.93292,
            "5680": 5.83489,
            "5685": 5.83447,
            "5690": 5.74155,
            "5695": 5.8536,
            "5700": 5.81914,
            "5705": 5.8858,
            "5710": 6.11703,
            "5715": 5.80666,
            "5720": 5.68007,
            "5725": 5.85008,
            "5730": 5.64173,
            "5735": 5.79353,
            "5740": 5.66721,
            "5745": 5.99017,
            "5750": 6.04119,
            "5755": 5.862,
            "5760": 6.24023,
            "5765": 5.71757,
            "5770": 5.81909,
            "5775": 5.71088,
            "5780": 6.12669,
            "5785": 6.11558,
            "5790": 6.08197,
            "5795": 5.92237,
            "5800": 6.07773,
            "5805": 5.66323,
            "5810": 5.5611,
            "5815": 5.92778,
            "5820": 5.67742,
            "5825": 5.84299,
            "5830": 6.03477,
            "5835": 6.03871,
            "5840": 5.8794,
            "5845": 5.81815,
            "5850": 5.90177,
            "5855": 5.97375,
            "5860": 5.99436,
            "5865": 5.96317,
            "5870": 5.64651,
            "5875": 5.83133,
            "5880": 5.94834,
            "5885": 5.96674,
            "5890": 5.98563,
            "5895": 5.66984,
            "5900": 6.01495,
            "5905": 6.08352,
            "5910": 5.78805,
            "5915": 5.97641,
            "5920": 5.74395,
            "5925": 5.82793,
            "5930": 5.74695,
            "5935": 5.87957,
            "5940": 5.84082,
            "5945": 5.90503,
            "5950": 5.90527,
            "5955": 5.8254,
            "5960": 5.87073,
            "5965": 5.81172,
            "5970": 5.89204,
            "5975": 5.7955,
            "5980": 6.00817,
            "5985": 5.7193,
            "5990": 5.82529,
            "5995": 5.99112,
            "6000": 5.92348,
            "6005": 6.03366,
            "6010": 5.89691,
            "6015": 5.8933,
            "6020": 6.06883,
            "6025": 5.89103,
            "6030": 5.91443,
            "6035": 5.82379,
            "6040": 5.83304,
            "6045": 6.00192,
            "6050": 5.81129,
            "6055": 5.95391,
            "6060": 5.8171,
            "6065": 5.99975,
            "6070": 5.7831,
            "6075": 5.59593,
            "6080": 5.76179,
            "6085": 5.87687,
            "6090": 6.03011,
            "6095": 5.97702,
            "6100": 5.61804,
            "6105": 5.72225,
            "6110": 6.09274,
            "6115": 6.01397,
            "6120": 6.01759,
            "6125": 5.70215,
            "6130": 5.9337,
            "6135": 5.5358,
            "6140": 5.87096,
            "6145": 5.74275,
            "6150": 5.79393,
            "6155": 5.85795,
            "6160": 5.80031,
            "6165": 5.98103,
            "6170": 5.99153,
            "6175": 5.9781,
            "6180": 5.96022,
            "6185": 5.60066,
            "6190": 5.87777,
            "6195": 5.90184,
            "6200": 5.69397,
            "6205": 5.4556,
            "6210": 5.75823,
            "6215": 5.63252,
            "6220": 5.90273,
            "6225": 5.87178,
            "6230": 5.67458,
            "6235": 5.86232,
            "6240": 5.80303,
            "6245": 5.92534,
            "6250": 5.96294,
            "6255": 5.7412,
            "6260": 5.90903,
            "6265": 5.7987,
            "6270": 5.92134,
            "6275": 5.95146,
            "6280": 5.77518,
            "6285": 5.62948,
            "6290": 5.85273,
            "6295": 5.716,
            "6300": 5.72259,
            "6305": 5.94773,
            "6310": 5.62063,
            "6315": 5.59872,
            "6320": 5.87178,
            "6325": 5.95542,
            "6330": 5.87126,
            "6335": 5.89559,
            "6340": 6.05352,
            "6345": 5.86676,
            "6350": 5.83997,
            "6355": 5.81968,
            "6360": 5.75341,
            "6365": 6.00964,
            "6370": 5.78048,
            "6375": 5.64682,
            "6380": 5.91737,
            "6385": 5.82485,
            "6390": 5.70655,
            "6395": 6.04647,
            "6400": 5.77103,
            "6405": 5.9899,
            "6410": 5.80822,
            "6415": 5.95253,
            "6420": 5.87857,
            "6425": 5.97471,
            "6430": 5.80223,
            "6435": 5.60326,
            "6440": 5.89968,
            "6445": 5.93652,
            "6450": 5.98944,
            "6455": 5.90762,
            "6460": 5.958,
            "6465": 5.74596,
            "6470": 5.73367,
            "6475": 5.79494,
            "6480": 6.08245,
            "6485": 5.85529,
            "6490": 5.71498,
            "6495": 5.7895,
            "6500": 6.0034,
            "6505": 5.76055,
            "6510": 5.7645,
            "6515": 5.92902,
            "6520": 6.0899,
            "6525": 5.78329,
            "6530": 5.95975,
            "6535": 5.78587,
            "6540": 6.02548,
            "6545": 5.87562,
            "6550": 5.92859,
            "6555": 5.65474,
            "6560": 5.91311,
            "6565": 5.89179,
            "6570": 5.91107,
            "6575": 5.91625,
            "6580": 5.83753,
            "6585": 6.04759,
            "6590": 5.70117,
            "6595": 6.0058,
            "6600": 5.90438,
            "6605": 6.11084,
            "6610": 6.03094,
            "6615": 5.7118,
            "6620": 5.75283,
            "6625": 5.9522,
            "6630": 5.69935,
            "6635": 5.84934,
            "6640": 5.70983,
            "6645": 5.65642,
            "6650": 5.81813,
            "6655": 6.06195,
            "6660": 5.8348,
            "6665": 5.87236,
            "6670": 6.06463,
            "6675": 5.7501,
            "6680": 5.92726,
            "6685": 5.91132,
            "6690": 5.78325,
            "6695": 5.94339,
            "6700": 5.65606,
            "6705": 6.02323,
            "6710": 5.70003,
            "6715": 5.82179,
            "6720": 5.98177,
            "6725": 5.9901,
            "6730": 5.56335,
            "6735": 5.80813,
            "6740": 5.93822,
            "6745": 5.74147,
            "6750": 5.63666,
            "6755": 5.67431,
            "6760": 5.8599,
            "6765": 5.94307,
            "6770": 5.88272,
            "6775": 5.73587,
            "6780": 5.77524,
            "6785": 5.88231,
            "6790": 5.60721,
            "6795": 5.85158,
            "6800": 5.67624,
            "6805": 5.73236,
            "6810": 5.82755,
            "6815": 5.95412,
            "6820": 5.57441,
            "6825": 6.03122,
            "6830": 6.0482,
            "6835": 5.85559,
            "6840": 6.05801,
            "6845": 5.79033,
            "6850": 5.62256,
            "6855": 5.84221,
            "6860": 6.02341,
            "6865": 5.67526,
            "6870": 5.81619,
            "6875": 5.88666,
            "6880": 6.09726,
            "6885": 5.96126,
            "6890": 5.90717,
            "6895": 5.78897,
            "6900": 5.90797,
            "6905": 6.01068,
            "6910": 5.8045,
            "6915": 5.89249,
            "6920": 6.11334,
            "6925": 5.8205,
            "6930": 5.96635,
            "6935": 5.92457,
            "6940": 5.81981,
            "6945": 5.63397,
            "6950": 5.91039,
            "6955": 5.61677,
            "6960": 5.82424,
            "6965": 5.68932,
            "6970": 5.87399,
            "6975": 5.84955,
            "6980": 5.90257,
            "6985": 5.91065,
            "6990": 5.73719,
            "6995": 5.9054,
            "7000": 5.82523,
            "7005": 5.90228,
            "7010": 5.73003,
            "7015": 5.96845,
            "7020": 5.6446,
            "7025": 5.86577,
            "7030": 5.88152,
            "7035": 5.9189,
            "7040": 5.81787,
            "7045": 5.89815,
            "7050": 5.84115,
            "7055": 6.17361,
            "7060": 5.72015,
            "7065": 5.52182,
            "7070": 5.86789,
            "7075": 5.79244,
            "7080": 5.80958,
            "7085": 6.05439,
            "7090": 5.93379,
            "7095": 5.70727,
            "7100": 5.86268,
            "7105": 5.85154,
            "7110": 5.78458,
            "7115": 5.80091,
            "7120": 5.82934,
            "7125": 5.78886,
            "7130": 5.90033,
            "7135": 5.65941,
            "7140": 5.93427,
            "7145": 5.72986,
            "7150": 5.86317,
            "7155": 5.7065,
            "7160": 5.52225,
            "7165": 5.80018,
            "7170": 5.92623,
            "7175": 5.75375,
            "7180": 5.82535,
            "7185": 6.0224,
            "7190": 5.78273,
            "7195": 6.02501,
            "7200": 5.37935,
            "7205": 5.82061,
            "7210": 5.74915,
            "7215": 5.72093,
            "7220": 5.98546,
            "7225": 5.87261,
            "7230": 5.83835,
            "7235": 5.80851,
            "7240": 5.8259,
            "7245": 5.75648,
            "7250": 5.96252,
            "7255": 5.76334,
            "7260": 5.92925,
            "7265": 5.84815,
            "7270": 5.68179,
            "7275": 5.59586,
            "7280": 5.67849,
            "7285": 6.10472,
            "7290": 5.76924,
            "7295": 5.81937,
            "7300": 6.03509,
            "7305": 5.80934,
            "7310": 5.74695,
            "7315": 5.6909,
            "7320": 5.83675,
            "7325": 5.9337,
            "7330": 5.89108,
            "7335": 5.80577,
            "7340": 6.1136,
            "7345": 5.90458,
            "7350": 5.91356,
            "7355": 5.78722,
            "7360": 5.87141,
            "7365": 5.97614,
            "7370": 5.92979,
            "7375": 6.00716,
            "7380": 5.63211,
            "7385": 5.61113,
            "7390": 5.39419,
            "7395": 5.83256,
            "7400": 6.03245,
            "7405": 5.69148,
            "7410": 5.51928,
            "7415": 5.75375,
            "7420": 5.75417,
            "7425": 5.733,
            "7430": 5.65427,
            "7435": 5.92054,
            "7440": 5.78542,
            "7445": 5.99713,
            "7450": 5.65725,
            "7455": 5.72995,
            "7460": 5.95379,
            "7465": 5.6808,
            "7470": 5.90469,
            "7475": 5.89319,
            "7480": 6.11195,
            "7485": 6.02039,
            "7490": 5.8168,
            "7495": 5.93647,
            "7500": 5.74991,
            "7505": 5.46619,
            "7510": 5.62244,
            "7515": 5.78286,
            "7520": 5.55278,
            "7525": 6.16463,
            "7530": 5.65317,
            "7535": 5.87694,
            "7540": 5.65259,
            "7545": 5.79488,
            "7550": 6.12441,
            "7555": 5.61567,
            "7560": 5.50782,
            "7565": 5.75094,
            "7570": 5.88931,
            "7575": 5.85557,
            "7580": 5.87125,
            "7585": 5.8267,
            "7590": 5.71931,
            "7595": 5.92462,
            "7600": 5.96136,
            "7605": 5.7615,
            "7610": 5.89452,
            "7615": 5.75747,
            "7620": 5.96421,
            "7625": 5.5843,
            "7630": 5.94658,
            "7635": 5.75732,
            "7640": 5.64897,
            "7645": 5.8923,
            "7650": 5.90412,
            "7655": 5.87425,
            "7660": 5.9548,
            "7665": 5.71915,
            "7670": 5.97542,
            "7675": 5.76036,
            "7680": 5.78599,
            "7685": 5.61497,
            "7690": 6.03077,
            "7695": 5.73811,
            "7700": 5.8856,
            "7705": 5.84047,
            "7710": 5.97112,
            "7715": 5.99982,
            "7720": 5.86584,
            "7725": 5.88482,
            "7730": 5.82361,
            "7735": 5.94931,
            "7740": 5.83895,
            "7745": 5.77861,
            "7750": 5.84336,
            "7755": 5.98554,
            "7760": 6.13849,
            "7765": 5.60055,
            "7770": 5.75449,
            "7775": 5.74563,
            "7780": 5.85808,
            "7785": 5.61474,
            "7790": 5.88932,
            "7795": 5.9397,
            "7800": 5.70444,
            "7805": 5.82042,
            "7810": 5.42653,
            "7815": 5.94527,
            "7820": 5.98621,
            "7825": 5.73926,
            "7830": 5.66154,
            "7835": 5.64296,
            "7840": 5.80263,
            "7845": 5.91715,
            "7850": 5.77327,
            "7855": 5.80417,
            "7860": 5.5592,
            "7865": 5.28087,
            "7870": 5.94979,
            "7875": 5.69521,
            "7880": 5.7958,
            "7885": 5.67319,
            "7890": 5.76311,
            "7895": 5.58053,
            "7900": 5.96658,
            "7905": 5.80605,
            "7910": 5.8507,
            "7915": 5.62842,
            "7920": 5.67666,
            "7925": 5.69388,
            "7930": 5.91819,
            "7935": 5.92707,
            "7940": 5.64731,
            "7945": 5.78385,
            "7950": 6.01815,
            "7955": 5.75835,
            "7960": 5.79433,
            "7965": 5.73401,
            "7970": 5.75277,
            "7975": 5.9152,
            "7980": 5.44967,
            "7985": 5.92115,
            "7990": 5.69904,
            "7995": 5.65286,
            "8000": 5.73876,
            "8005": 5.87459,
            "8010": 5.50372,
            "8015": 5.88044,
            "8020": 5.82876,
            "8025": 5.41576,
            "8030": 5.74489,
            "8035": 5.65915,
            "8040": 5.74206,
            "8045": 5.57857,
            "8050": 5.83125,
            "8055": 5.9538,
            "8060": 5.72828,
            "8065": 5.88003,
            "8070": 5.83935,
            "8075": 5.87361,
            "8080": 5.65938,
            "8085": 5.78199,
            "8090": 5.69121,
            "8095": 5.58829,
            "8100": 5.85115,
            "8105": 5.6859,
            "8110": 5.60742,
            "8115": 5.61724,
            "8120": 5.8228,
            "8125": 5.53465,
            "8130": 5.9203,
            "8135": 5.68091,
            "8140": 5.87214,
            "8145": 5.86119,
            "8150": 6.07219,
            "8155": 6.00603,
            "8160": 5.73629,
            "8165": 5.68047,
            "8170": 5.84905,
            "8175": 6.04437,
            "8180": 5.70515,
            "8185": 5.66947,
            "8190": 5.72771,
            "8195": 5.62106,
            "8200": 5.78321,
            "8205": 5.61152,
            "8210": 5.78623,
            "8215": 5.55602,
            "8220": 5.69585,
            "8225": 5.7736,
            "8230": 5.89412,
            "8235": 5.66204,
            "8240": 5.61382,
            "8245": 5.57757,
            "8250": 5.91166,
            "8255": 5.7693,
            "8260": 5.75568,
            "8265": 5.74082,
            "8270": 5.82245,
            "8275": 5.65643,
            "8280": 5.81317,
            "8285": 5.5483,
            "8290": 5.70726,
            "8295": 5.86414,
            "8300": 5.7416,
            "8305": 5.70269,
            "8310": 5.85378,
            "8315": 5.50195,
            "8320": 5.74655,
            "8325": 5.94475,
            "8330": 5.85837,
            "8335": 5.88778,
            "8340": 5.72264,
            "8345": 5.82839,
            "8350": 5.53832,
            "8355": 5.83572,
            "8360": 5.79885,
            "8365": 5.67132,
            "8370": 5.80413,
            "8375": 5.71747,
            "8380": 5.85216,
            "8385": 5.81102,
            "8390": 5.61993,
            "8395": 5.69102,
            "8400": 5.85713,
            "8405": 5.6995,
            "8410": 5.74118,
            "8415": 5.90187,
            "8420": 5.84151,
            "8425": 5.84913,
            "8430": 5.49558,
            "8435": 5.52918,
            "8440": 5.7633,
            "8445": 5.44117,
            "8450": 5.84666,
            "8455": 5.71495,
            "8460": 5.76773,
            "8465": 5.43224,
            "8470": 5.9523,
            "8475": 5.78663,
            "8480": 5.48642,
            "8485": 5.90595,
            "8490": 5.77827,
            "8495": 5.54943,
            "8500": 5.86529,
            "8505": 5.48231,
            "8510": 5.963,
            "8515": 5.66701,
            "8520": 5.80127,
            "8525": 5.40307,
            "8530": 5.68808,
            "8535": 5.95814,
            "8540": 5.57377,
            "8545": 5.81558,
            "8550": 5.64348,
            "8555": 5.81956,
            "8560": 5.83793,
            "8565": 5.86858,
            "8570": 5.4327,
            "8575": 5.7496,
            "8580": 5.6916,
            "8585": 5.48663,
            "8590": 5.70748,
            "8595": 5.5836,
            "8600": 5.85816,
            "8605": 5.77138,
            "8610": 5.73719,
            "8615": 5.85218,
            "8620": 5.48443,
            "8625": 5.64338,
            "8630": 5.8253,
            "8635": 5.52197,
            "8640": 5.67824,
            "8645": 5.76686,
            "8650": 5.74379,
            "8655": 5.62157,
            "8660": 5.64496,
            "8665": 5.75088,
            "8670": 5.44835,
            "8675": 5.56863,
            "8680": 5.8322,
            "8685": 5.89414,
            "8690": 5.72245,
            "8695": 5.89752,
            "8700": 5.6815,
            "8705": 5.75873,
            "8710": 5.75225,
            "8715": 5.63715,
            "8720": 5.67331,
            "8725": 5.78481,
            "8730": 5.81521,
            "8735": 5.84676,
            "8740": 5.78851,
            "8745": 5.64737,
            "8750": 5.95051,
            "8755": 5.64348,
            "8760": 5.51875,
            "8765": 5.60277,
            "8770": 6.04023,
            "8775": 5.56903,
            "8780": 6.01286,
            "8785": 5.71062,
            "8790": 5.82622,
            "8795": 5.83736,
            "8800": 5.72499,
            "8805": 5.85817,
            "8810": 5.82446,
            "8815": 5.6432,
            "8820": 5.68449,
            "8825": 5.51728,
            "8830": 5.7538,
            "8835": 5.75181,
            "8840": 5.5402,
            "8845": 5.62024,
            "8850": 5.74495,
            "8855": 5.44479,
            "8860": 5.54647,
            "8865": 5.67279,
            "8870": 5.58194,
            "8875": 5.73709,
            "8880": 5.66083,
            "8885": 5.72858,
            "8890": 5.72609,
            "8895": 5.6752,
            "8900": 5.44787,
            "8905": 5.71791,
            "8910": 5.73477,
            "8915": 5.9619,
            "8920": 5.21852,
            "8925": 5.72635,
            "8930": 5.7187,
            "8935": 5.10783,
            "8940": 5.89046,
            "8945": 5.82739,
            "8950": 5.71231,
            "8955": 5.77992,
            "8960": 5.29486,
            "8965": 6.00662,
            "8970": 5.64547,
            "8975": 5.445,
            "8980": 5.75674,
            "8985": 5.69857,
            "8990": 5.90019,
            "8995": 5.51495,
            "9000": 5.53064,
            "9005": 5.54408,
            "9010": 5.80778,
            "9015": 5.64714,
            "9020": 5.78242,
            "9025": 5.70604,
            "9030": 5.51491,
            "9035": 5.82403,
            "9040": 5.95281,
            "9045": 5.7711,
            "9050": 5.77995,
            "9055": 5.76613,
            "9060": 5.79819,
            "9065": 5.60729,
            "9070": 5.70823,
            "9075": 5.70876,
            "9080": 5.71489,
            "9085": 5.39612,
            "9090": 5.72192,
            "9095": 5.45181,
            "9100": 5.59871,
            "9105": 5.79926,
            "9110": 5.81817,
            "9115": 5.64734,
            "9120": 5.72522,
            "9125": 5.60967,
            "9130": 5.60517,
            "9135": 5.76904,
            "9140": 5.90524,
            "9145": 5.70938,
            "9150": 5.86336,
            "9155": 5.62471,
            "9160": 5.43685,
            "9165": 5.60061,
            "9170": 5.66335,
            "9175": 5.79239,
            "9180": 5.46575,
            "9185": 5.67669,
            "9190": 5.65585,
            "9195": 5.68894,
            "9200": 5.62991,
            "9205": 5.7736,
            "9210": 5.78597,
            "9215": 5.69625,
            "9220": 5.50698,
            "9225": 5.72776,
            "9230": 5.91243,
            "9235": 5.693,
            "9240": 5.53852,
            "9245": 5.83328,
            "9250": 5.75418,
            "9255": 5.61255,
            "9260": 5.40615,
            "9265": 5.79094,
            "9270": 5.69612,
            "9275": 5.58171,
            "9280": 5.42069,
            "9285": 5.85854,
            "9290": 5.64179,
            "9295": 5.94107,
            "9300": 5.75516,
            "9305": 5.74535,
            "9310": 5.37089,
            "9315": 5.63947,
            "9320": 5.60158,
            "9325": 5.52865,
            "9330": 5.61313,
            "9335": 5.87542,
            "9340": 5.51997,
            "9345": 5.7969,
            "9350": 5.55185,
            "9355": 5.4676,
            "9360": 5.56715,
            "9365": 5.32066,
            "9370": 5.42417,
            "9375": 5.71939,
            "9380": 5.63188,
            "9385": 5.57203,
            "9390": 5.67483,
            "9395": 5.63884,
            "9400": 5.62555,
            "9405": 5.34531,
            "9410": 5.49428,
            "9415": 5.51842,
            "9420": 5.65113,
            "9425": 5.81622,
            "9430": 5.42515,
            "9435": 5.15815,
            "9440": 5.65546,
            "9445": 5.68333,
            "9450": 5.56855,
            "9455": 5.50595,
            "9460": 5.69174,
            "9465": 5.54387,
            "9470": 5.72237,
            "9475": 5.66723,
            "9480": 5.39031,
            "9485": 5.71229,
            "9490": 5.8039,
            "9495": 5.66766,
            "9500": 5.65917,
            "9505": 5.91223,
            "9510": 5.67015,
            "9515": 5.491,
            "9520": 5.54917,
            "9525": 5.88717,
            "9530": 5.73492,
            "9535": 5.55035,
            "9540": 5.44362,
            "9545": 5.52124,
            "9550": 5.47734,
            "9555": 5.77347,
            "9560": 5.57476,
            "9565": 5.54522,
            "9570": 5.56865,
            "9575": 5.68483,
            "9580": 5.55023,
            "9585": 5.37571,
            "9590": 5.66923,
            "9595": 5.51291,
            "9600": 5.61893,
            "9605": 5.42208,
            "9610": 5.36858,
            "9615": 5.1738,
            "9620": 5.57292,
            "9625": 5.56276,
            "9630": 5.74891,
            "9635": 5.60448,
            "9640": 5.65865,
            "9645": 5.46567,
            "9650": 5.8263,
            "9655": 5.66172,
            "9660": 5.42609,
            "9665": 5.37612,
            "9670": 5.77941,
            "9675": 5.28757,
            "9680": 5.67447,
            "9685": 5.81802,
            "9690": 5.55945,
            "9695": 5.56876,
            "9700": 5.46237,
            "9705": 5.68306,
            "9710": 5.7503,
            "9715": 5.57529,
            "9720": 5.71964,
            "9725": 5.42982,
            "9730": 5.49329,
            "9735": 5.47437,
            "9740": 5.82746,
            "9745": 5.53323,
            "9750": 5.3978,
            "9755": 5.49486,
            "9760": 5.67233,
            "9765": 5.66204,
            "9770": 5.28461,
            "9775": 5.65074,
            "9780": 5.63553,
            "9785": 5.39063,
            "9790": 5.52031,
            "9795": 5.6687,
            "9800": 5.59313,
            "9805": 5.54004,
            "9810": 5.3068,
            "9815": 5.53845,
            "9820": 5.48029,
            "9825": 5.63366,
            "9830": 5.43813,
            "9835": 5.56667,
            "9840": 5.44976,
            "9845": 5.83278,
            "9850": 5.41057,
            "9855": 5.69693,
            "9860": 5.52581,
            "9865": 5.37342,
            "9870": 5.63476,
            "9875": 5.62792,
            "9880": 5.07157,
            "9885": 5.59073,
            "9890": 5.71451,
            "9895": 5.42128,
            "9900": 5.57593,
            "9905": 5.38308,
            "9910": 5.57024,
            "9915": 5.32519,
            "9920": 5.74191,
            "9925": 5.62492,
            "9930": 5.51144,
            "9935": 5.57037,
            "9940": 5.43671,
            "9945": 5.56075,
            "9950": 5.60099,
            "9955": 5.34464,
            "9960": 5.76242,
            "9965": 5.65119,
            "9970": 5.56217,
            "9975": 5.56718,
            "9980": 5.71847,
            "9985": 5.63539,
            "9990": 5.81639,
            "9995": 5.45572,
            "10000": 5.45448,
            "10005": 5.39738,
            "10010": 5.61267,
            "10015": 5.77036,
            "10020": 5.79586,
            "10025": 5.66482,
            "10030": 5.0993,
            "10035": 5.69617,
            "10040": 5.52017,
            "10045": 5.82837,
            "10050": 5.76266,
            "10055": 5.76404,
            "10060": 5.46895,
            "10065": 5.51487,
            "10070": 5.44653,
            "10075": 5.41801,
            "10080": 5.60768,
            "10085": 5.42292,
            "10090": 5.73001,
            "10095": 5.51278,
            "10100": 5.44884,
            "10105": 5.46976,
            "10110": 5.69028,
            "10115": 5.66776,
            "10120": 5.10063,
            "10125": 5.37123,
            "10130": 5.42838,
            "10135": 5.54553,
            "10140": 5.70131,
            "10145": 5.43609,
            "10150": 5.44624,
            "10155": 5.53869,
            "10160": 5.4444,
            "10165": 5.64172,
            "10170": 5.50237,
            "10175": 5.50398,
            "10180": 5.33184,
            "10185": 5.44414,
            "10190": 5.32168,
            "10195": 5.46099,
            "10200": 5.40905,
            "10205": 5.4104,
            "10210": 5.67789,
            "10215": 5.40208,
            "10220": 5.65033,
            "10225": 5.48748,
            "10230": 5.58442,
            "10235": 5.49678,
            "10240": 5.59106,
            "10245": 5.50753,
            "10250": 5.19275,
            "10255": 5.17126,
            "10260": 5.56055,
            "10265": 5.55627,
            "10270": 5.50719,
            "10275": 5.29993,
            "10280": 5.43355,
            "10285": 5.3053,
            "10290": 5.29106,
            "10295": 5.61733,
            "10300": 5.46832,
            "10305": 5.43472,
            "10310": 5.52056,
            "10315": 5.27456,
            "10320": 5.19648,
            "10325": 5.41177,
            "10330": 5.44446,
            "10335": 5.43569,
            "10340": 5.38265,
            "10345": 5.3178,
            "10350": 5.63552,
            "10355": 5.29351,
            "10360": 5.45518,
            "10365": 5.28537,
            "10370": 5.5167,
            "10375": 5.6331,
            "10380": 5.59594,
            "10385": 5.64026,
            "10390": 5.46343,
            "10395": 5.51573,
            "10400": 5.49574,
            "10405": 5.44791,
            "10410": 5.77322,
            "10415": 5.44851,
            "10420": 5.51067,
            "10425": 5.63306,
            "10430": 5.41187,
            "10435": 5.30508,
            "10440": 5.39228,
            "10445": 5.42104,
            "10450": 5.61992,
            "10455": 5.43156,
            "10460": 5.67333,
            "10465": 5.56768,
            "10470": 5.25081,
            "10475": 5.44858,
            "10480": 5.47308,
            "10485": 5.40542,
            "10490": 5.17619,
            "10495": 5.25768,
            "10500": 5.48614,
            "10505": 5.65487,
            "10510": 5.40859,
            "10515": 5.4972,
            "10520": 5.4824,
            "10525": 5.44435,
            "10530": 5.5799,
            "10535": 5.5906,
            "10540": 5.40192,
            "10545": 5.66677,
            "10550": 5.29417,
            "10555": 5.59713,
            "10560": 5.54221,
            "10565": 5.0689,
            "10570": 5.49013,
            "10575": 5.43053,
            "10580": 5.48708,
            "10585": 5.50442,
            "10590": 5.60713,
            "10595": 5.38045,
            "10600": 5.31301,
            "10605": 5.37475,
            "10610": 5.49841,
            "10615": 5.41668,
            "10620": 5.34542,
            "10625": 5.68083,
            "10630": 5.38239,
            "10635": 5.49239,
            "10640": 5.26628,
            "10645": 5.41784,
            "10650": 5.56582,
            "10655": 5.49684,
            "10660": 5.5114,
            "10665": 5.42752,
            "10670": 5.42578,
            "10675": 5.42272,
            "10680": 5.30839,
            "10685": 5.35009,
            "10690": 5.72623,
            "10695": 5.39676,
            "10700": 5.63301,
            "10705": 5.39246,
            "10710": 5.26285,
            "10715": 4.85122,
            "10720": 5.41457,
            "10725": 5.13264,
            "10730": 5.4398,
            "10735": 5.28846,
            "10740": 5.56171,
            "10745": 5.39146,
            "10750": 4.981,
            "10755": 5.48363,
            "10760": 5.35974,
            "10765": 5.46424,
            "10770": 5.29479,
            "10775": 5.43827,
            "10780": 5.46618,
            "10785": 5.15981,
            "10790": 5.15144,
            "10795": 5.48996,
            "10800": 5.28698,
            "10805": 5.41302,
            "10810": 5.22487,
            "10815": 5.25558,
            "10820": 5.50332,
            "10825": 5.59768,
            "10830": 5.2898,
            "10835": 5.44857,
            "10840": 5.23103,
            "10845": 5.23178,
            "10850": 5.54621,
            "10855": 5.31229,
            "10860": 5.44692,
            "10865": 5.57098,
            "10870": 5.72651,
            "10875": 5.5894,
            "10880": 5.15458,
            "10885": 5.43015,
            "10890": 5.41961,
            "10895": 5.27356,
            "10900": 5.33399,
            "10905": 5.45975,
            "10910": 5.52123,
            "10915": 5.38522,
            "10920": 5.4865,
            "10925": 5.38967,
            "10930": 5.32343,
            "10935": 5.56203,
            "10940": 5.48646,
            "10945": 5.50133,
            "10950": 5.09878,
            "10955": 5.21281,
            "10960": 5.31994,
            "10965": 5.30069,
            "10970": 5.21482,
            "10975": 5.32144,
            "10980": 5.00466,
            "10985": 5.3417,
            "10990": 5.29457,
            "10995": 5.38489,
            "11000": 5.10342,
            "11005": 5.29629,
            "11010": 5.41226,
            "11015": 5.18733,
            "11020": 5.3114,
            "11025": 5.12156,
            "11030": 5.38006,
            "11035": 5.27404,
            "11040": 5.38953,
            "11045": 5.09468,
            "11050": 5.2871,
            "11055": 5.54488,
            "11060": 5.38812,
            "11065": 5.31649,
            "11070": 5.29049,
            "11075": 5.42471,
            "11080": 5.18478,
            "11085": 5.25859,
            "11090": 5.31518,
            "11095": 5.25785,
            "11100": 5.35766,
            "11105": 5.31621,
            "11110": 5.31336,
            "11115": 5.35389,
            "11120": 4.8879,
            "11125": 5.39122,
            "11130": 5.5925,
            "11135": 5.21331,
            "11140": 5.20276,
            "11145": 5.34978,
            "11150": 5.62584,
            "11155": 5.33386,
            "11160": 5.07946,
            "11165": 5.33424,
            "11170": 5.31754,
            "11175": 5.23341,
            "11180": 5.06347,
            "11185": 5.16813,
            "11190": 5.43057,
            "11195": 5.12904,
            "11200": 5.10636,
            "11205": 5.33595,
            "11210": 5.27203,
            "11215": 5.54721,
            "11220": 5.13841,
            "11225": 5.3839,
            "11230": 5.22825,
            "11235": 5.28994,
            "11240": 5.12826,
            "11245": 5.53477,
            "11250": 5.42918,
            "11255": 5.22503,
            "11260": 5.35958,
            "11265": 5.03432,
            "11270": 5.26427,
            "11275": 5.29353,
            "11280": 4.90851,
            "11285": 5.332,
            "11290": 5.11437,
            "11295": 5.23963,
            "11300": 5.05146,
            "11305": 5.48215,
            "11310": 5.39256,
            "11315": 5.35606,
            "11320": 5.4758,
            "11325": 5.32336,
            "11330": 5.26573,
            "11335": 5.29563,
            "11340": 5.49047,
            "11345": 5.064,
            "11350": 5.42964,
            "11355": 5.409,
            "11360": 5.64607,
            "11365": 5.41525,
            "11370": 5.48275,
            "11375": 5.33048,
            "11380": 5.4322,
            "11385": 5.23009,
            "11390": 5.23261,
            "11395": 5.33241,
            "11400": 4.98025,
            "11405": 5.14005,
            "11410": 5.30966,
            "11415": 5.53229,
            "11420": 5.01369,
            "11425": 5.19819,
            "11430": 5.25316,
            "11435": 5.07545,
            "11440": 5.3081,
            "11445": 5.31049,
            "11450": 5.44217,
            "11455": 5.36096,
            "11460": 5.18508,
            "11465": 5.4933,
            "11470": 5.26596,
            "11475": 5.35233,
            "11480": 5.15707,
            "11485": 5.32001,
            "11490": 5.27723,
            "11495": 5.63986,
            "11500": 5.32274,
            "11505": 5.14223,
            "11510": 5.24366,
            "11515": 5.11207,
            "11520": 5.24546,
            "11525": 5.26959,
            "11530": 5.12797,
            "11535": 5.38311,
            "11540": 5.16295,
            "11545": 5.22043,
            "11550": 5.17458,
            "11555": 5.43137,
            "11560": 5.29562,
            "11565": 5.10289,
            "11570": 5.14037,
            "11575": 5.19189,
            "11580": 5.64014,
            "11585": 5.35689,
            "11590": 5.11402,
            "11595": 5.33476,
            "11600": 5.26225,
            "11605": 5.33697,
            "11610": 5.22122,
            "11615": 5.29457,
            "11620": 5.08412,
            "11625": 5.26554,
            "11630": 5.18587,
            "11635": 5.50489,
            "11640": 5.02885,
            "11645": 5.14616,
            "11650": 5.10291,
            "11655": 5.4499,
            "11660": 5.17108,
            "11665": 5.29299,
            "11670": 5.31567,
            "11675": 5.02157,
            "11680": 5.32732,
            "11685": 5.43318,
            "11690": 5.31331,
            "11695": 5.35129,
            "11700": 5.27996,
            "11705": 5.1872,
            "11710": 5.06668,
            "11715": 5.04935,
            "11720": 5.30572,
            "11725": 5.19926,
            "11730": 5.21538,
            "11735": 5.24918,
            "11740": 5.23986,
            "11745": 5.16815,
            "11750": 5.20491,
            "11755": 5.20858,
            "11760": 5.3352,
            "11765": 5.31609,
            "11770": 5.07868,
            "11775": 5.57919,
            "11780": 5.34556,
            "11785": 5.57429,
            "11790": 5.30551,
            "11795": 5.35404,
            "11800": 5.1852,
            "11805": 5.54461,
            "11810": 5.15202,
            "11815": 5.42878,
            "11820": 5.05403,
            "11825": 4.92266,
            "11830": 5.1751,
            "11835": 5.19978,
            "11840": 5.21071,
            "11845": 5.21284,
            "11850": 5.35221,
            "11855": 5.05445,
            "11860": 5.4881,
            "11865": 5.26874,
            "11870": 5.44665,
            "11875": 5.09483,
            "11880": 5.30766,
            "11885": 5.06698,
            "11890": 5.2941,
            "11895": 4.95694,
            "11900": 5.39039,
            "11905": 5.21316,
            "11910": 5.41004,
            "11915": 5.17113,
            "11920": 5.29021,
            "11925": 5.17261,
            "11930": 5.19505,
            "11935": 5.14587,
            "11940": 4.96064,
            "11945": 5.39723,
            "11950": 5.15321,
            "11955": 5.04998,
            "11960": 4.90464,
            "11965": 5.08488,
            "11970": 5.22544,
            "11975": 5.04572,
            "11980": 5.08519,
            "11985": 5.21241,
            "11990": 5.16536,
            "11995": 5.13573,
            "12000": 5.12979,
            "12005": 5.07596,
            "12010": 5.25888,
            "12015": 4.9802,
            "12020": 5.25228,
            "12025": 5.12879,
            "12030": 5.0002,
            "12035": 4.95417,
            "12040": 5.15502,
            "12045": 5.38994,
            "12050": 5.32919,
            "12055": 4.98881,
            "12060": 5.22609,
            "12065": 5.27515,
            "12070": 5.21791,
            "12075": 5.30929,
            "12080": 5.2683,
            "12085": 5.11686,
            "12090": 5.03402,
            "12095": 5.5121,
            "12100": 5.02606,
            "12105": 5.14792,
            "12110": 5.16713,
            "12115": 4.9891,
            "12120": 4.83721,
            "12125": 5.08818,
            "12130": 5.26764,
            "12135": 5.16808,
            "12140": 5.29189,
            "12145": 5.07014,
            "12150": 4.89415,
            "12155": 5.12496,
            "12160": 5.16956,
            "12165": 5.24424,
            "12170": 5.20623,
            "12175": 5.27477,
            "12180": 5.34031,
            "12185": 5.24286,
            "12190": 4.84026,
            "12195": 5.03932,
            "12200": 5.21533,
            "12205": 5.11828,
            "12210": 5.0704,
            "12215": 5.06061,
            "12220": 5.27456,
            "12225": 5.18531,
            "12230": 4.99911,
            "12235": 5.3344,
            "12240": 5.16608,
            "12245": 5.13135,
            "12250": 5.25773,
            "12255": 4.99895,
            "12260": 4.90166,
            "12265": 4.88798,
            "12270": 5.06482,
            "12275": 5.16599,
            "12280": 4.88225,
            "12285": 5.00774,
            "12290": 5.19427,
            "12295": 5.3016,
            "12300": 5.50281,
            "12305": 5.20903,
            "12310": 5.29527,
            "12315": 5.10033,
            "12320": 5.05608,
            "12325": 5.29877,
            "12330": 5.24693,
            "12335": 5.40298,
            "12340": 4.98491,
            "12345": 5.34706,
            "12350": 4.99138,
            "12355": 5.08985,
            "12360": 5.04893,
            "12365": 4.80144,
            "12370": 5.16001,
            "12375": 4.87835,
            "12380": 5.26154,
            "12385": 5.23267,
            "12390": 5.10358,
            "12395": 5.20581,
            "12400": 5.16312,
            "12405": 5.34574,
            "12410": 5.20455,
            "12415": 5.36077,
            "12420": 5.4197,
            "12425": 5.29184,
            "12430": 4.9743,
            "12435": 5.0261,
            "12440": 5.05464,
            "12445": 5.26597,
            "12450": 5.17156,
            "12455": 5.07688,
            "12460": 4.84462,
            "12465": 4.99389,
            "12470": 5.4686,
            "12475": 5.15648,
            "12480": 4.96068,
            "12485": 5.28532,
            "12490": 5.07565,
            "12495": 5.40759,
            "12500": 5.51297,
            "12505": 5.36523,
            "12510": 4.83111,
            "12515": 5.09706,
            "12520": 5.14827,
            "12525": 5.11051,
            "12530": 4.9552,
            "12535": 5.33979,
            "12540": 4.94899,
            "12545": 4.98309,
            "12550": 5.45815,
            "12555": 5.02911,
            "12560": 4.96913,
            "12565": 5.31427,
            "12570": 5.04353,
            "12575": 5.16667,
            "12580": 4.96238,
            "12585": 5.28564,
            "12590": 4.88176,
            "12595": 5.26059,
            "12600": 5.26961,
            "12605": 5.09481,
            "12610": 5.2393,
            "12615": 5.17465,
            "12620": 5.36414,
            "12625": 5.07887,
            "12630": 5.15511,
            "12635": 5.3095,
            "12640": 4.98755,
            "12645": 5.29194,
            "12650": 4.98171,
            "12655": 5.11037,
            "12660": 5.17499,
            "12665": 5.13245,
            "12670": 5.04414,
            "12675": 5.34225,
            "12680": 4.94198,
            "12685": 4.94431,
            "12690": 5.23339,
            "12695": 4.94219,
            "12700": 5.05057,
            "12705": 5.17004,
            "12710": 4.98399,
            "12715": 5.01338,
            "12720": 4.9894,
            "12725": 5.07118,
            "12730": 4.95014,
            "12735": 4.92364,
            "12740": 5.15494,
            "12745": 4.83207,
            "12750": 4.8335,
            "12755": 5.07641,
            "12760": 4.78878,
            "12765": 5.19536,
            "12770": 5.02771,
            "12775": 5.09088,
            "12780": 5.30458,
            "12785": 5.15053,
            "12790": 5.03509,
            "12795": 5.10603,
            "12800": 5.2602,
            "12805": 4.79487,
            "12810": 5.13006,
            "12815": 4.92708,
            "12820": 4.91318,
            "12825": 5.15114,
            "12830": 5.01884,
            "12835": 5.28631,
            "12840": 5.07012,
            "12845": 5.17174,
            "12850": 4.79319,
            "12855": 5.07071,
            "12860": 5.15044,
            "12865": 5.10644,
            "12870": 4.99855,
            "12875": 5.18966,
            "12880": 5.03667,
            "12885": 5.10731,
            "12890": 5.47021,
            "12895": 5.08455,
            "12900": 4.85157,
            "12905": 5.20059,
            "12910": 4.90778,
            "12915": 5.01349,
            "12920": 5.27106,
            "12925": 5.08203,
            "12930": 5.04628,
            "12935": 5.02986,
            "12940": 5.27007,
            "12945": 4.78308,
            "12950": 5.21789,
            "12955": 4.76084,
            "12960": 5.10211,
            "12965": 5.05105,
            "12970": 4.75266,
            "12975": 5.0794,
            "12980": 4.966,
            "12985": 4.95098,
            "12990": 4.9252,
            "12995": 5.12385,
            "13000": 5.11248,
            "13005": 5.12747,
            "13010": 4.82895,
            "13015": 5.02726,
            "13020": 5.02576,
            "13025": 5.10289,
            "13030": 5.1175,
            "13035": 4.93431,
            "13040": 5.12657,
            "13045": 5.07393,
            "13050": 5.05855,
            "13055": 4.97631,
            "13060": 5.22803,
            "13065": 5.322,
            "13070": 5.17059,
            "13075": 5.26351,
            "13080": 5.01174,
            "13085": 5.24894,
            "13090": 5.08493,
            "13095": 5.15063,
            "13100": 5.07629,
            "13105": 5.13653,
            "13110": 5.06927,
            "13115": 4.86174,
            "13120": 5.1056,
            "13125": 4.95536,
            "13130": 5.15732,
            "13135": 5.01685,
            "13140": 4.90769,
            "13145": 5.42067,
            "13150": 5.15937,
            "13155": 5.24065,
            "13160": 4.89614,
            "13165": 5.02408,
            "13170": 5.26076,
            "13175": 5.05786,
            "13180": 4.69677,
            "13185": 5.30718,
            "13190": 4.97852,
            "13195": 5.26809,
            "13200": 4.90724,
            "13205": 5.335,
            "13210": 4.80096,
            "13215": 5.17831,
            "13220": 4.8429,
            "13225": 5.22175,
            "13230": 5.02105,
            "13235": 5.27301,
            "13240": 5.04417,
            "13245": 5.22899,
            "13250": 4.86866,
            "13255": 4.71847,
            "13260": 5.03377,
            "13265": 5.13709,
            "13270": 5.10824,
            "13275": 5.04779,
            "13280": 4.77767,
            "13285": 5.30808,
            "13290": 5.1795,
            "13295": 5.2129,
            "13300": 5.11053,
            "13305": 4.84262,
            "13310": 4.87306,
            "13315": 5.1051,
            "13320": 5.13564,
            "13325": 4.97561,
            "13330": 4.9481,
            "13335": 5.06769,
            "13340": 4.95857,
            "13345": 4.7664,
            "13350": 5.10203,
            "13355": 4.96381,
            "13360": 4.9395,
            "13365": 4.97848,
            "13370": 4.94301,
            "13375": 5.07502,
            "13380": 5.08495,
            "13385": 4.95,
            "13390": 5.24515,
            "13395": 5.15438,
            "13400": 4.8467,
            "13405": 4.97624,
            "13410": 4.7682,
            "13415": 4.85975,
            "13420": 5.00666,
            "13425": 4.93687,
            "13430": 5.0022,
            "13435": 4.9429,
            "13440": 4.88489,
            "13445": 4.88338,
            "13450": 5.0071,
            "13455": 4.83097,
            "13460": 5.01697,
            "13465": 4.99229,
            "13470": 5.05682,
            "13475": 4.94808,
            "13480": 5.10327,
            "13485": 4.9782,
            "13490": 5.254,
            "13495": 5.10716,
            "13500": 4.75951,
            "13505": 4.96739,
            "13510": 5.19627,
            "13515": 4.99345,
            "13520": 5.10446,
            "13525": 4.93902,
            "13530": 4.79818,
            "13535": 4.8274,
            "13540": 5.08758,
            "13545": 4.85313,
            "13550": 5.08194,
            "13555": 5.06478,
            "13560": 5.2769,
            "13565": 5.15907,
            "13570": 4.98919,
            "13575": 5.01379,
            "13580": 4.722,
            "13585": 5.39355,
            "13590": 5.11328,
            "13595": 5.01027,
            "13600": 4.99172,
            "13605": 5.02832,
            "13610": 5.09815,
            "13615": 4.86443,
            "13620": 4.96652,
            "13625": 5.17104,
            "13630": 4.781,
            "13635": 4.72384,
            "13640": 4.99692,
            "13645": 4.9662,
            "13650": 4.73196,
            "13655": 4.77385,
            "13660": 4.96103,
            "13665": 5.06142,
            "13670": 4.88411,
            "13675": 5.22164,
            "13680": 5.261,
            "13685": 4.96337,
            "13690": 4.94919,
            "13695": 5.08498,
            "13700": 4.93178,
            "13705": 4.98195,
            "13710": 4.92215,
            "13715": 4.83701,
            "13720": 4.88323,
            "13725": 4.89242,
            "13730": 4.86392,
            "13735": 4.95529,
            "13740": 5.17702,
            "13745": 5.09037,
            "13750": 4.93248,
            "13755": 4.80564,
            "13760": 4.69774,
            "13765": 5.06104,
            "13770": 5.10683,
            "13775": 4.85811,
            "13780": 4.72964,
            "13785": 5.09235,
            "13790": 4.78355,
            "13795": 4.78215,
            "13800": 4.92289,
            "13805": 4.89862,
            "13810": 4.99093,
            "13815": 4.79145,
            "13820": 4.81419,
            "13825": 4.93429,
            "13830": 5.12809,
            "13835": 4.94071,
            "13840": 5.23972,
            "13845": 4.97817,
            "13850": 4.83487,
            "13855": 4.88312,
            "13860": 5.00925,
            "13865": 4.8067,
            "13870": 5.15022,
            "13875": 4.84755,
            "13880": 4.97071,
            "13885": 5.0433,
            "13890": 4.93346,
            "13895": 4.81559,
            "13900": 4.59343,
            "13905": 4.93006,
            "13910": 4.9035,
            "13915": 5.01645,
            "13920": 4.76686,
            "13925": 5.17285,
            "13930": 4.64886,
            "13935": 5.06608,
            "13940": 5.22218,
            "13945": 4.9803,
            "13950": 5.01774,
            "13955": 4.70892,
            "13960": 4.95331,
            "13965": 5.10571,
            "13970": 5.08811,
            "13975": 4.92444,
            "13980": 4.89253,
            "13985": 4.94743,
            "13990": 4.98804,
            "13995": 4.92367,
            "14000": 5.06732,
            "14005": 4.88276,
            "14010": 4.89361,
            "14015": 4.80059,
            "14020": 5.07983,
            "14025": 4.9182,
            "14030": 4.95291,
            "14035": 4.78155,
            "14040": 4.7617,
            "14045": 4.8668,
            "14050": 4.88373,
            "14055": 5.07894,
            "14060": 4.93126,
            "14065": 5.05625,
            "14070": 4.8199,
            "14075": 5.01674,
            "14080": 4.68468,
            "14085": 5.12159,
            "14090": 5.0869,
            "14095": 4.87877,
            "14100": 4.9705,
            "14105": 4.8864,
            "14110": 4.82484,
            "14115": 5.05346,
            "14120": 5.04623,
            "14125": 4.45693,
            "14130": 5.0172,
            "14135": 4.84494,
            "14140": 5.07627,
            "14145": 4.65086,
            "14150": 4.87095,
            "14155": 4.84402,
            "14160": 4.77875,
            "14165": 4.95744,
            "14170": 4.99254,
            "14175": 4.84822,
            "14180": 4.81615,
            "14185": 4.63245,
            "14190": 5.03679,
            "14195": 4.97512,
            "14200": 4.58781,
            "14205": 5.08179,
            "14210": 5.0628,
            "14215": 4.92802,
            "14220": 4.7389,
            "14225": 5.30477,
            "14230": 4.94586,
            "14235": 4.75904,
            "14240": 4.86831,
            "14245": 4.89419,
            "14250": 5.19553,
            "14255": 5.22694,
            "14260": 4.86205,
            "14265": 4.96135,
            "14270": 5.2651,
            "14275": 5.01835,
            "14280": 4.99108,
            "14285": 4.69233,
            "14290": 5.157,
            "14295": 4.96129,
            "14300": 4.73314,
            "14305": 4.84197,
            "14310": 5.18251,
            "14315": 4.86545,
            "14320": 5.10792,
            "14325": 5.07603,
            "14330": 4.58363,
            "14335": 5.02189,
            "14340": 4.67099,
            "14345": 4.86248,
            "14350": 4.98567,
            "14355": 4.65625,
            "14360": 4.87177,
            "14365": 4.85035,
            "14370": 4.90377,
            "14375": 4.96268,
            "14380": 4.81463,
            "14385": 5.15815,
            "14390": 4.95826,
            "14395": 4.69235,
            "14400": 4.83745,
            "14405": 4.92114,
            "14410": 4.83264,
            "14415": 5.0625,
            "14420": 5.21412,
            "14425": 4.97503,
            "14430": 5.01114,
            "14435": 5.15086,
            "14440": 4.54716,
            "14445": 4.58051,
            "14450": 5.0477,
            "14455": 4.58494,
            "14460": 4.8713,
            "14465": 4.97262,
            "14470": 4.90007,
            "14475": 4.78199,
            "14480": 4.90689,
            "14485": 4.90936,
            "14490": 4.98819,
            "14495": 4.7845,
            "14500": 4.91488,
            "14505": 5.15281,
            "14510": 4.6078,
            "14515": 4.89414,
            "14520": 4.90056,
            "14525": 5.06847,
            "14530": 4.54613,
            "14535": 5.2252,
            "14540": 4.97574,
            "14545": 4.87428,
            "14550": 4.88586,
            "14555": 4.73355,
            "14560": 4.59163,
            "14565": 4.79432,
            "14570": 4.93756,
            "14575": 5.09785,
            "14580": 4.96759,
            "14585": 4.75999,
            "14590": 4.79689,
            "14595": 4.95524,
            "14600": 4.77796,
            "14605": 4.7761,
            "14610": 4.82441,
            "14615": 5.0521,
            "14620": 4.77713,
            "14625": 4.79255,
            "14630": 5.07408,
            "14635": 4.85126,
            "14640": 4.8002,
            "14645": 5.12059,
            "14650": 4.95293,
            "14655": 4.84797,
            "14660": 5.02873,
            "14665": 4.88839,
            "14670": 4.47402,
            "14675": 5.0046,
            "14680": 4.71185,
            "14685": 4.63711,
            "14690": 4.62325,
            "14695": 4.69572,
            "14700": 4.93487,
            "14705": 4.86596,
            "14710": 4.80614,
            "14715": 5.06867,
            "14720": 4.70206,
            "14725": 4.83637,
            "14730": 4.59291,
            "14735": 4.66946,
            "14740": 4.90847,
            "14745": 4.87165,
            "14750": 4.9452,
            "14755": 4.89775,
            "14760": 5.1262,
            "14765": 4.98213,
            "14770": 4.92312,
            "14775": 4.47141,
            "14780": 4.82405,
            "14785": 4.84707,
            "14790": 4.73498,
            "14795": 4.78284,
            "14800": 4.70781,
            "14805": 4.63459,
            "14810": 4.91404,
            "14815": 4.62215,
            "14820": 4.58473,
            "14825": 4.68746,
            "14830": 5.10846,
            "14835": 4.53125,
            "14840": 5.05128,
            "14845": 5.19247,
            "14850": 4.55808,
            "14855": 4.52084,
            "14860": 5.05563,
            "14865": 4.74425,
            "14870": 5.03344,
            "14875": 4.73217,
            "14880": 4.98246,
            "14885": 4.94209,
            "14890": 4.78201,
            "14895": 4.73843,
            "14900": 4.81477,
            "14905": 4.66706,
            "14910": 5.09996,
            "14915": 4.88285,
            "14920": 5.01675,
            "14925": 4.73942,
            "14930": 4.86807,
            "14935": 5.12806,
            "14940": 4.67223,
            "14945": 4.89615,
            "14950": 4.99014,
            "14955": 4.89293,
            "14960": 4.81479,
            "14965": 4.82249,
            "14970": 4.74157,
            "14975": 4.90845,
            "14980": 4.76388,
            "14985": 4.99903,
            "14990": 4.62563,
            "14995": 5.13266,
            "15000": 4.62498,
            "15005": 4.52218,
            "15010": 4.7871,
            "15015": 4.74455,
            "15020": 4.89398,
            "15025": 4.91434,
            "15030": 4.8599,
            "15035": 4.62418,
            "15040": 4.65693,
            "15045": 4.77515,
            "15050": 4.85562,
            "15055": 4.46231,
            "15060": 4.99464,
            "15065": 4.66208,
            "15070": 4.71317,
            "15075": 4.77616,
            "15080": 4.74703,
            "15085": 4.76255,
            "15090": 5.09646,
            "15095": 4.79939,
            "15100": 4.85981,
            "15105": 4.74989,
            "15110": 4.68363,
            "15115": 5.0107,
            "15120": 4.78472,
            "15125": 4.89988,
            "15130": 4.73603,
            "15135": 4.80516,
            "15140": 4.87103,
            "15145": 4.83148,
            "15150": 5.04261,
            "15155": 4.79253,
            "15160": 4.63838,
            "15165": 4.63616,
            "15170": 4.69095,
            "15175": 4.35958,
            "15180": 4.94989,
            "15185": 4.73968,
            "15190": 4.83404,
            "15195": 4.72368,
            "15200": 4.99464,
            "15205": 4.73751,
            "15210": 5.0314,
            "15215": 5.0337,
            "15220": 4.93225,
            "15225": 4.8974,
            "15230": 4.58442,
            "15235": 4.93022,
            "15240": 4.70614,
            "15245": 4.78421,
            "15250": 4.61867,
            "15255": 5.02811,
            "15260": 4.63778,
            "15265": 4.826,
            "15270": 4.64498,
            "15275": 5.07568,
            "15280": 4.87832,
            "15285": 4.75493,
            "15290": 4.88497,
            "15295": 5.11284,
            "15300": 4.67729,
            "15305": 4.89242,
            "15310": 4.77497,
            "15315": 4.66587,
            "15320": 4.93642,
            "15325": 5.13245,
            "15330": 4.7233,
            "15335": 5.04781,
            "15340": 4.76029,
            "15345": 4.74928,
            "15350": 4.57791,
            "15355": 4.86277,
            "15360": 4.64135,
            "15365": 4.94992,
            "15370": 4.88006,
            "15375": 4.90519,
            "15380": 4.51043,
            "15385": 4.63896,
            "15390": 4.89707,
            "15395": 4.53516,
            "15400": 4.80658,
            "15405": 4.50289,
            "15410": 4.60417,
            "15415": 4.76783,
            "15420": 4.99019,
            "15425": 4.77254,
            "15430": 4.66523,
            "15435": 4.6477,
            "15440": 4.86964,
            "15445": 4.85381,
            "15450": 4.617,
            "15455": 5.09657,
            "15460": 4.60491,
            "15465": 4.86977,
            "15470": 4.65755,
            "15475": 4.67083,
            "15480": 4.43437,
            "15485": 4.82445,
            "15490": 4.88238,
            "15495": 4.77175,
            "15500": 4.69489,
            "15505": 4.72311,
            "15510": 4.81122,
            "15515": 4.90513,
            "15520": 4.76229,
            "15525": 4.86479,
            "15530": 4.49282,
            "15535": 4.9159,
            "15540": 4.75554,
            "15545": 4.49215,
            "15550": 4.69815,
            "15555": 4.96397,
            "15560": 4.7907,
            "15565": 4.74977,
            "15570": 4.89822,
            "15575": 4.80497,
            "15580": 4.58905,
            "15585": 4.86616,
            "15590": 4.7745,
            "15595": 4.97573,
            "15600": 4.84517,
            "15605": 4.92525,
            "15610": 4.99658,
            "15615": 4.89825,
            "15620": 4.77826,
            "15625": 4.66244,
            "15630": 4.3159,
            "15635": 4.96955,
            "15640": 4.76836,
            "15645": 4.70358,
            "15650": 4.79252,
            "15655": 4.88939,
            "15660": 4.88048,
            "15665": 4.88021,
            "15670": 4.69952,
            "15675": 4.63279,
            "15680": 4.91851,
            "15685": 4.69419,
            "15690": 4.9997,
            "15695": 4.9379,
            "15700": 4.90601,
            "15705": 4.72114,
            "15710": 4.93068,
            "15715": 4.84024,
            "15720": 4.69986,
            "15725": 4.67042,
            "15730": 4.78352,
            "15735": 4.5362,
            "15740": 4.9068,
            "15745": 4.76769,
            "15750": 4.90403,
            "15755": 4.83083,
            "15760": 4.58091,
            "15765": 4.83142,
            "15770": 4.78481,
            "15775": 4.90601,
            "15780": 4.72524,
            "15785": 4.84635,
            "15790": 4.9457,
            "15795": 4.78133,
            "15800": 5.02767,
            "15805": 4.98891,
            "15810": 4.74932,
            "15815": 4.56872,
            "15820": 4.42049,
            "15825": 5.20357,
            "15830": 4.71547,
            "15835": 4.71912,
            "15840": 4.84135,
            "15845": 5.08675,
            "15850": 4.6542,
            "15855": 4.76943,
            "15860": 4.8623,
            "15865": 4.65139,
            "15870": 4.67308,
            "15875": 4.88833,
            "15880": 4.83878,
            "15885": 4.58143,
            "15890": 4.83384,
            "15895": 4.64788,
            "15900": 4.92605,
            "15905": 4.74661,
            "15910": 4.72983,
            "15915": 5.13453,
            "15920": 4.73263,
            "15925": 5.00441,
            "15930": 4.71277,
            "15935": 4.75444,
            "15940": 4.68556,
            "15945": 4.70267,
            "15950": 4.62207,
            "15955": 4.57956,
            "15960": 4.98056,
            "15965": 4.43255,
            "15970": 4.76459,
            "15975": 4.75011,
            "15980": 4.53546,
            "15985": 4.76038,
            "15990": 4.45186,
            "15995": 4.92147,
            "16000": 4.7459,
            "16005": 4.80806,
            "16010": 4.7528,
            "16015": 4.76945,
            "16020": 4.9481,
            "16025": 4.55741,
            "16030": 5.09834,
            "16035": 4.77601,
            "16040": 4.98811,
            "16045": 4.61088,
            "16050": 4.70176,
            "16055": 4.246,
            "16060": 4.7392,
            "16065": 5.01197,
            "16070": 4.29634,
            "16075": 4.60971,
            "16080": 4.87669,
            "16085": 4.50092,
            "16090": 4.85481,
            "16095": 4.41033,
            "16100": 4.74244,
            "16105": 4.79886,
            "16110": 4.57907,
            "16115": 4.80292,
            "16120": 4.59931,
            "16125": 4.46984,
            "16130": 4.68608,
            "16135": 4.45596,
            "16140": 4.93383,
            "16145": 4.79622,
            "16150": 4.92972,
            "16155": 4.56959,
            "16160": 4.98203,
            "16165": 4.69214,
            "16170": 5.23683,
            "16175": 4.70115,
            "16180": 4.86225,
            "16185": 4.74615,
            "16190": 4.69609,
            "16195": 4.85049,
            "16200": 4.6588,
            "16205": 4.93358,
            "16210": 4.88647,
            "16215": 4.6899,
            "16220": 4.76998,
            "16225": 4.73628,
            "16230": 5.02674,
            "16235": 4.70021,
            "16240": 4.58127,
            "16245": 4.72301,
            "16250": 4.90064,
            "16255": 4.91686,
            "16260": 4.74714,
            "16265": 4.82977,
            "16270": 4.14977,
            "16275": 4.77769,
            "16280": 4.77102,
            "16285": 4.67064,
            "16290": 4.76758,
            "16295": 4.62512,
            "16300": 4.81077,
            "16305": 4.69385,
            "16310": 4.56324,
            "16315": 4.79997,
            "16320": 5.01764,
            "16325": 4.4735,
            "16330": 4.32462,
            "16335": 4.76811,
            "16340": 4.70211,
            "16345": 4.58746,
            "16350": 4.62345,
            "16355": 4.67771,
            "16360": 4.46525,
            "16365": 4.82515,
            "16370": 4.61769,
            "16375": 4.59517,
            "16380": 4.72728,
            "16385": 4.85951,
            "16390": 4.55669,
            "16395": 4.79013,
            "16400": 4.64139,
            "16405": 4.70564,
            "16410": 4.64782,
            "16415": 4.83937,
            "16420": 4.67874,
            "16425": 4.81889,
            "16430": 4.72496,
            "16435": 4.51167,
            "16440": 4.84832,
            "16445": 4.74967,
            "16450": 4.60547,
            "16455": 4.6557,
            "16460": 4.70271,
            "16465": 4.72711,
            "16470": 4.67806,
            "16475": 4.66669,
            "16480": 4.80073,
            "16485": 5.07715,
            "16490": 4.60733,
            "16495": 4.60449,
            "16500": 4.86489,
            "16505": 4.99936,
            "16510": 4.8794,
            "16515": 4.81559,
            "16520": 4.57084,
            "16525": 4.66139,
            "16530": 4.79939,
            "16535": 4.97921,
            "16540": 4.65811,
            "16545": 4.73213,
            "16550": 4.58101,
            "16555": 4.61155,
            "16560": 4.65191,
            "16565": 4.5346,
            "16570": 4.59594,
            "16575": 4.83777,
            "16580": 4.75075,
            "16585": 4.57193,
            "16590": 4.80515,
            "16595": 4.65087,
            "16600": 4.74129,
            "16605": 4.49328,
            "16610": 4.92633,
            "16615": 4.79931,
            "16620": 4.98398,
            "16625": 4.75839,
            "16630": 4.69992,
            "16635": 4.92962,
            "16640": 4.9069,
            "16645": 4.48487,
            "16650": 4.33718,
            "16655": 4.76321,
            "16660": 4.69801,
            "16665": 4.69798,
            "16670": 4.61929,
            "16675": 4.80499,
            "16680": 4.58275,
            "16685": 4.60053,
            "16690": 4.97035,
            "16695": 4.85917,
            "16700": 4.61937,
            "16705": 4.91783,
            "16710": 4.74267,
            "16715": 4.52884,
            "16720": 4.74543,
            "16725": 4.63266,
            "16730": 4.54819,
            "16735": 4.53013,
            "16740": 4.60813,
            "16745": 4.88693,
            "16750": 4.06426,
            "16755": 4.33869,
            "16760": 4.65942,
            "16765": 4.52146,
            "16770": 4.95872,
            "16775": 4.66946,
            "16780": 4.89475,
            "16785": 4.46213,
            "16790": 4.60863,
            "16795": 4.73938,
            "16800": 4.65498,
            "16805": 4.52798,
            "16810": 4.50415,
            "16815": 4.37713,
            "16820": 4.58763,
            "16825": 4.46719,
            "16830": 4.834,
            "16835": 4.75285,
            "16840": 4.72075,
            "16845": 4.75785,
            "16850": 4.81304,
            "16855": 4.66356,
            "16860": 4.8475,
            "16865": 4.68936,
            "16870": 4.44779,
            "16875": 4.59173,
            "16880": 4.75886,
            "16885": 4.86107,
            "16890": 4.55498,
            "16895": 4.5525,
            "16900": 4.73939,
            "16905": 4.62733,
            "16910": 4.56552,
            "16915": 4.48919,
            "16920": 4.87261,
            "16925": 4.82187,
            "16930": 4.71978,
            "16935": 4.89883,
            "16940": 4.54186,
            "16945": 4.6665,
            "16950": 4.68932,
            "16955": 4.35388,
            "16960": 4.66289,
            "16965": 4.63855,
            "16970": 4.54481,
            "16975": 4.63565,
            "16980": 4.72426,
            "16985": 4.49631,
            "16990": 4.49158,
            "16995": 4.81208,
            "17000": 4.81587,
            "17005": 4.58197,
            "17010": 4.77911,
            "17015": 4.5962,
            "17020": 4.93642,
            "17025": 4.60978,
            "17030": 4.67155,
            "17035": 4.65959,
            "17040": 4.78206,
            "17045": 4.63812,
            "17050": 4.49213,
            "17055": 4.61738,
            "17060": 4.82748,
            "17065": 4.52788,
            "17070": 4.74378,
            "17075": 4.69414,
            "17080": 4.86931,
            "17085": 4.5391,
            "17090": 4.32375,
            "17095": 4.4591,
            "17100": 4.9009,
            "17105": 4.75297,
            "17110": 4.75194,
            "17115": 4.87058,
            "17120": 4.72655,
            "17125": 4.59249,
            "17130": 4.67227,
            "17135": 4.5668,
            "17140": 4.60474,
            "17145": 4.81179,
            "17150": 4.75672,
            "17155": 4.66812,
            "17160": 4.33974,
            "17165": 4.74413,
            "17170": 4.45015,
            "17175": 4.91618,
            "17180": 4.49876,
            "17185": 4.60362,
            "17190": 4.78273,
            "17195": 5.0283,
            "17200": 4.67986,
            "17205": 4.80526,
            "17210": 4.58875,
            "17215": 4.72074,
            "17220": 4.50428,
            "17225": 4.56697,
            "17230": 4.90317,
            "17235": 4.58125,
            "17240": 4.68439,
            "17245": 4.94341,
            "17250": 4.84728,
            "17255": 4.70231,
            "17260": 4.7444,
            "17265": 4.60532,
            "17270": 5.11259,
            "17275": 4.83118,
            "17280": 4.73296,
            "17285": 4.52787,
            "17290": 4.77533,
            "17295": 4.54969,
            "17300": 4.6355,
            "17305": 4.91287,
            "17310": 4.71691,
            "17315": 4.66218,
            "17320": 4.58749,
            "17325": 4.47276,
            "17330": 4.537,
            "17335": 4.5909,
            "17340": 4.65382,
            "17345": 4.85272,
            "17350": 4.77971,
            "17355": 4.41532,
            "17360": 4.73823,
            "17365": 4.78177,
            "17370": 4.46393,
            "17375": 4.69638,
            "17380": 4.63385,
            "17385": 4.69353,
            "17390": 4.69696,
            "17395": 4.5718,
            "17400": 4.62144,
            "17405": 4.9217,
            "17410": 4.63549,
            "17415": 5.08726,
            "17420": 4.48903,
            "17425": 4.52313,
            "17430": 4.65975,
            "17435": 4.78332,
            "17440": 4.74121,
            "17445": 4.87354,
            "17450": 4.63195,
            "17455": 4.48713,
            "17460": 4.54245,
            "17465": 4.58356,
            "17470": 4.61408,
            "17475": 4.70205,
            "17480": 4.6592,
            "17485": 4.73304,
            "17490": 4.68383,
            "17495": 4.4587,
            "17500": 4.46526,
            "17505": 4.69284,
            "17510": 4.36979,
            "17515": 4.73021,
            "17520": 4.42841,
            "17525": 4.67404,
            "17530": 4.79274,
            "17535": 4.4912,
            "17540": 4.79785,
            "17545": 4.79455,
            "17550": 4.91777,
            "17555": 4.63474,
            "17560": 4.39423,
            "17565": 4.47411,
            "17570": 4.57156,
            "17575": 4.77131,
            "17580": 4.65766,
            "17585": 4.54508,
            "17590": 4.78686,
            "17595": 4.19836,
            "17600": 4.35212,
            "17605": 4.75226,
            "17610": 4.38136,
            "17615": 4.50828,
            "17620": 4.74362,
            "17625": 4.50715,
            "17630": 4.60184,
            "17635": 4.87734,
            "17640": 4.6823,
            "17645": 4.50923,
            "17650": 4.6062,
            "17655": 4.71994,
            "17660": 4.36395,
            "17665": 4.53441,
            "17670": 4.90477,
            "17675": 4.85214,
            "17680": 4.87408,
            "17685": 4.7295,
            "17690": 4.90385,
            "17695": 4.57973,
            "17700": 4.47619,
            "17705": 4.82624,
            "17710": 4.65688,
            "17715": 4.84776,
            "17720": 4.94579,
            "17725": 4.43957,
            "17730": 4.75527,
            "17735": 4.51082,
            "17740": 4.63748,
            "17745": 4.65774,
            "17750": 4.29934,
            "17755": 4.7572,
            "17760": 4.43678,
            "17765": 4.55728,
            "17770": 4.59943,
            "17775": 4.56312,
            "17780": 4.71899,
            "17785": 4.49493,
            "17790": 4.63046,
            "17795": 4.87592,
            "17800": 4.57996,
            "17805": 4.77812,
            "17810": 4.20754,
            "17815": 4.87316,
            "17820": 4.71158,
            "17825": 4.80666,
            "17830": 4.7525,
            "17835": 4.65203,
            "17840": 4.80468,
            "17845": 4.62685,
            "17850": 4.63864,
            "17855": 4.71369,
            "17860": 4.75982,
            "17865": 4.80594,
            "17870": 4.67825,
            "17875": 4.76269,
            "17880": 4.78253,
            "17885": 4.69667,
            "17890": 4.67013,
            "17895": 4.36915,
            "17900": 4.28983,
            "17905": 4.59742,
            "17910": 4.72964,
            "17915": 4.53207,
            "17920": 4.60842,
            "17925": 4.50191,
            "17930": 4.84168,
            "17935": 4.64564,
            "17940": 4.65895,
            "17945": 4.57645,
            "17950": 4.75122,
            "17955": 4.51601,
            "17960": 4.51356,
            "17965": 4.52073,
            "17970": 4.60128,
            "17975": 4.51017,
            "17980": 4.41691,
            "17985": 4.64467,
            "17990": 4.64913,
            "17995": 4.97743,
            "18000": 4.58228,
            "18005": 4.60254,
            "18010": 4.75284,
            "18015": 4.70704,
            "18020": 4.5964,
            "18025": 4.79816,
            "18030": 4.6411,
            "18035": 4.43558,
            "18040": 4.72911,
            "18045": 4.62394,
            "18050": 4.68322,
            "18055": 4.36441,
            "18060": 4.72576,
            "18065": 4.61994,
            "18070": 4.76827,
            "18075": 4.62801,
            "18080": 4.26296,
            "18085": 4.67669,
            "18090": 4.50888,
            "18095": 4.73468,
            "18100": 4.85642,
            "18105": 4.64084,
            "18110": 4.7183,
            "18115": 4.67467,
            "18120": 4.60958,
            "18125": 4.76424,
            "18130": 4.69411,
            "18135": 4.83908,
            "18140": 4.71754,
            "18145": 4.3845,
            "18150": 4.65017,
            "18155": 4.48233,
            "18160": 4.84826,
            "18165": 4.55814,
            "18170": 4.53745,
            "18175": 4.68028,
            "18180": 4.57131,
            "18185": 4.5905,
            "18190": 4.50883,
            "18195": 4.60504,
            "18200": 4.70063,
            "18205": 4.52242,
            "18210": 4.44437,
            "18215": 4.81469,
            "18220": 4.39784,
            "18225": 4.75,
            "18230": 4.48278,
            "18235": 4.64329,
            "18240": 4.85086,
            "18245": 4.60176,
            "18250": 4.61558,
            "18255": 4.66843,
            "18260": 4.60638,
            "18265": 4.51874,
            "18270": 4.7893,
            "18275": 4.50199,
            "18280": 4.62968,
            "18285": 4.84819,
            "18290": 4.65167,
            "18295": 4.78086,
            "18300": 4.61435,
            "18305": 4.32573,
            "18310": 4.69768,
            "18315": 4.18116,
            "18320": 4.33477,
            "18325": 4.49775,
            "18330": 4.52442,
            "18335": 4.81296,
            "18340": 4.97932,
            "18345": 4.57733,
            "18350": 4.62815,
            "18355": 4.51447,
            "18360": 4.48932,
            "18365": 4.45975,
            "18370": 4.59487,
            "18375": 4.5655,
            "18380": 4.70773,
            "18385": 4.66241,
            "18390": 4.79674,
            "18395": 4.62293,
            "18400": 4.54809,
            "18405": 4.6569,
            "18410": 4.519,
            "18415": 4.56398,
            "18420": 4.66431,
            "18425": 4.64807,
            "18430": 4.76195,
            "18435": 4.27349,
            "18440": 4.44892,
            "18445": 4.77236,
            "18450": 4.42669,
            "18455": 4.648,
            "18460": 4.34656,
            "18465": 4.5645,
            "18470": 4.309,
            "18475": 4.85372,
            "18480": 4.58978,
            "18485": 4.57969,
            "18490": 4.76833,
            "18495": 4.67035,
            "18500": 4.72338,
            "18505": 4.74882,
            "18510": 4.46801,
            "18515": 4.42237,
            "18520": 4.60198,
            "18525": 4.35961,
            "18530": 4.70974,
            "18535": 4.76495,
            "18540": 4.49215,
            "18545": 4.66909,
            "18550": 4.91165,
            "18555": 4.71997,
            "18560": 4.35662,
            "18565": 4.63607,
            "18570": 4.72607,
            "18575": 4.57576,
            "18580": 4.82911,
            "18585": 4.60005,
            "18590": 4.83785,
            "18595": 4.64342,
            "18600": 4.42395,
            "18605": 4.7446,
            "18610": 4.51976,
            "18615": 4.71801,
            "18620": 4.40364,
            "18625": 4.45746,
            "18630": 4.3472,
            "18635": 4.36917,
            "18640": 4.86928,
            "18645": 4.67193,
            "18650": 4.51173,
            "18655": 4.51465,
            "18660": 4.6994,
            "18665": 4.62566,
            "18670": 4.52361,
            "18675": 4.54438,
            "18680": 4.3761,
            "18685": 4.43294,
            "18690": 4.6037,
            "18695": 4.54047,
            "18700": 4.81296,
            "18705": 4.48927,
            "18710": 4.34223,
            "18715": 4.44542,
            "18720": 4.52251,
            "18725": 4.54177,
            "18730": 4.50473,
            "18735": 4.33861,
            "18740": 4.58553,
            "18745": 4.46256,
            "18750": 4.59959,
            "18755": 4.49781,
            "18760": 4.71967,
            "18765": 4.63972,
            "18770": 4.62481,
            "18775": 4.30034,
            "18780": 4.47087,
            "18785": 4.8987,
            "18790": 4.50937,
            "18795": 4.8856,
            "18800": 4.67557,
            "18805": 4.27578,
            "18810": 4.706,
            "18815": 4.71354,
            "18820": 4.51947,
            "18825": 4.80085,
            "18830": 4.52687,
            "18835": 4.59144,
            "18840": 4.43681,
            "18845": 4.54676,
            "18850": 4.75405,
            "18855": 4.46891,
            "18860": 4.53346,
            "18865": 4.75976,
            "18870": 4.62768,
            "18875": 4.38074,
            "18880": 4.80513,
            "18885": 4.6356,
            "18890": 4.5538,
            "18895": 4.59307,
            "18900": 4.62087,
            "18905": 4.57833,
            "18910": 4.66721,
            "18915": 4.77333,
            "18920": 4.43086,
            "18925": 4.28249,
            "18930": 4.68255,
            "18935": 4.75492,
            "18940": 4.5639,
            "18945": 4.70177,
            "18950": 4.54525,
            "18955": 4.67803,
            "18960": 4.8443,
            "18965": 4.07386,
            "18970": 4.24213,
            "18975": 4.78565,
            "18980": 4.69905,
            "18985": 4.3731,
            "18990": 4.66503,
            "18995": 4.82676,
            "19000": 4.6225,
            "19005": 4.46129,
            "19010": 4.61163,
            "19015": 4.50512,
            "19020": 4.64305,
            "19025": 4.62295,
            "19030": 4.454,
            "19035": 4.5366,
            "19040": 4.60021,
            "19045": 4.76493,
            "19050": 4.46545,
            "19055": 4.40844,
            "19060": 4.53487,
            "19065": 4.81016,
            "19070": 4.68719,
            "19075": 4.59732,
            "19080": 4.7292,
            "19085": 4.5314,
            "19090": 4.589,
            "19095": 4.60859,
            "19100": 4.66854,
            "19105": 4.82439,
            "19110": 4.43348,
            "19115": 4.44115,
            "19120": 4.14421,
            "19125": 4.40973,
            "19130": 4.71963,
            "19135": 4.53203,
            "19140": 4.6135,
            "19145": 4.66073,
            "19150": 4.5769,
            "19155": 4.39117,
            "19160": 4.72376,
            "19165": 4.67529,
            "19170": 4.40564,
            "19175": 4.43033,
            "19180": 4.76606,
            "19185": 4.72398,
            "19190": 4.55824,
            "19195": 4.70678,
            "19200": 4.4623,
            "19205": 4.48976,
            "19210": 4.63309,
            "19215": 4.50118,
            "19220": 4.63016,
            "19225": 4.73247,
            "19230": 4.31444,
            "19235": 4.70501,
            "19240": 4.6426,
            "19245": 4.5939,
            "19250": 4.25354,
            "19255": 4.64854,
            "19260": 4.57848,
            "19265": 4.46031,
            "19270": 4.61578,
            "19275": 4.43833,
            "19280": 4.5869,
            "19285": 4.66495,
            "19290": 4.45297,
            "19295": 4.69172,
            "19300": 4.6158,
            "19305": 4.6902,
            "19310": 4.29722,
            "19315": 4.20295,
            "19320": 4.85395,
            "19325": 4.46266,
            "19330": 4.71449,
            "19335": 4.35437,
            "19340": 4.67676,
            "19345": 4.59857,
            "19350": 4.68159,
            "19355": 4.5756,
            "19360": 4.58754,
            "19365": 4.89733,
            "19370": 4.7604,
            "19375": 4.68848,
            "19380": 4.40422,
            "19385": 4.39114,
            "19390": 4.5088,
            "19395": 4.44562,
            "19400": 4.65054,
            "19405": 4.65788,
            "19410": 4.45117,
            "19415": 4.69358,
            "19420": 4.65999,
            "19425": 4.75182,
            "19430": 4.69321,
            "19435": 4.67592,
            "19440": 4.54452,
            "19445": 4.36871,
            "19450": 4.17698,
            "19455": 4.63272,
            "19460": 4.51241,
            "19465": 4.61512,
            "19470": 4.57366,
            "19475": 4.23703,
            "19480": 4.88882,
            "19485": 4.50198,
            "19490": 4.32005,
            "19495": 4.65571,
            "19500": 4.45988,
            "19505": 4.65682,
            "19510": 4.48595,
            "19515": 4.51166,
            "19520": 4.60804,
            "19525": 4.60808,
            "19530": 4.4181,
            "19535": 4.51527,
            "19540": 4.7436,
            "19545": 4.70435,
            "19550": 4.52721,
            "19555": 4.49473,
            "19560": 4.79977,
            "19565": 4.87307,
            "19570": 4.55569,
            "19575": 4.73853,
            "19580": 4.73813,
            "19585": 4.43278,
            "19590": 4.3982,
            "19595": 4.65843,
            "19600": 4.42959,
            "19605": 4.78195,
            "19610": 4.7514,
            "19615": 4.41102,
            "19620": 4.7986,
            "19625": 4.17544,
            "19630": 4.759,
            "19635": 4.67621,
            "19640": 4.62679,
            "19645": 4.6048,
            "19650": 4.30813,
            "19655": 4.64078,
            "19660": 4.28004,
            "19665": 4.64354,
            "19670": 4.57552,
            "19675": 4.48172,
            "19680": 4.47487,
            "19685": 4.69323,
            "19690": 4.82985,
            "19695": 4.40075,
            "19700": 4.70831,
            "19705": 4.51262,
            "19710": 4.61664,
            "19715": 4.57855,
            "19720": 4.53342,
            "19725": 4.62414,
            "19730": 4.61544,
            "19735": 4.4335,
            "19740": 4.50871,
            "19745": 4.55547,
            "19750": 4.4954,
            "19755": 4.40028,
            "19760": 4.38656,
            "19765": 4.30652,
            "19770": 4.63164,
            "19775": 4.44442,
            "19780": 4.49305,
            "19785": 4.8723,
            "19790": 4.57208,
            "19795": 4.64533,
            "19800": 4.56496,
            "19805": 4.46666,
            "19810": 4.65357,
            "19815": 4.61917,
            "19820": 4.92892,
            "19825": 4.53079,
            "19830": 4.88318,
            "19835": 4.7888,
            "19840": 4.41074,
            "19845": 4.72204,
            "19850": 4.76925,
            "19855": 4.49885,
            "19860": 4.4836,
            "19865": 4.53348,
            "19870": 4.60764,
            "19875": 4.51326,
            "19880": 4.51473,
            "19885": 4.66989,
            "19890": 4.54255,
            "19895": 4.51675,
            "19900": 4.4321,
            "19905": 4.48592,
            "19910": 4.71583,
            "19915": 4.32545,
            "19920": 4.55167,
            "19925": 4.44676,
            "19930": 4.36274,
            "19935": 4.87672,
            "19940": 4.74455,
            "19945": 4.5509,
            "19950": 4.78494,
            "19955": 4.52144,
            "19960": 4.41994,
            "19965": 4.40707,
            "19970": 4.39352,
            "19975": 4.53713,
            "19980": 4.64754,
            "19985": 4.48783,
            "19990": 4.65518,
            "19995": 4.29062,
            "20000": 4.85772
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 20000,
        "step_interval": 5,
        "values": {
            "1": 225487360.0,
            "5": 226011648.0,
            "10": 225487360.0,
            "15": 225487360.0,
            "20": 303322112.0,
            "25": 303846400.0,
            "30": 302797824.0,
            "35": 303846400.0,
            "40": 303846400.0,
            "45": 303846400.0,
            "50": 302797824.0,
            "55": 303322112.0,
            "60": 303846400.0,
            "65": 303322112.0,
            "70": 303846400.0,
            "75": 303846400.0,
            "80": 302797824.0,
            "85": 303846400.0,
            "90": 303846400.0,
            "95": 303846400.0,
            "100": 303322112.0,
            "105": 303846400.0,
            "110": 303846400.0,
            "115": 303846400.0,
            "120": 303846400.0,
            "125": 303846400.0,
            "130": 303846400.0,
            "135": 303846400.0,
            "140": 303322112.0,
            "145": 303322112.0,
            "150": 303322112.0,
            "155": 303846400.0,
            "160": 303322112.0,
            "165": 303322112.0,
            "170": 302797824.0,
            "175": 302797824.0,
            "180": 303846400.0,
            "185": 303322112.0,
            "190": 303322112.0,
            "195": 303846400.0,
            "200": 303322112.0,
            "205": 303322112.0,
            "210": 302797824.0,
            "215": 302797824.0,
            "220": 303322112.0,
            "225": 302797824.0,
            "230": 303322112.0,
            "235": 303322112.0,
            "240": 303846400.0,
            "245": 303322112.0,
            "250": 303322112.0,
            "255": 303322112.0,
            "260": 303322112.0,
            "265": 303846400.0,
            "270": 303322112.0,
            "275": 303846400.0,
            "280": 303846400.0,
            "285": 303322112.0,
            "290": 303846400.0,
            "295": 303322112.0,
            "300": 303846400.0,
            "305": 303846400.0,
            "310": 303322112.0,
            "315": 303846400.0,
            "320": 303322112.0,
            "325": 303322112.0,
            "330": 303846400.0,
            "335": 303322112.0,
            "340": 303846400.0,
            "345": 303322112.0,
            "350": 303322112.0,
            "355": 303322112.0,
            "360": 303322112.0,
            "365": 303322112.0,
            "370": 303322112.0,
            "375": 303322112.0,
            "380": 303322112.0,
            "385": 303846400.0,
            "390": 303322112.0,
            "395": 302797824.0,
            "400": 303322112.0,
            "405": 303322112.0,
            "410": 303322112.0,
            "415": 302797824.0,
            "420": 303322112.0,
            "425": 303846400.0,
            "430": 303322112.0,
            "435": 303322112.0,
            "440": 303322112.0,
            "445": 303846400.0,
            "450": 303846400.0,
            "455": 303322112.0,
            "460": 303846400.0,
            "465": 303322112.0,
            "470": 302797824.0,
            "475": 303322112.0,
            "480": 303322112.0,
            "485": 303322112.0,
            "490": 302797824.0,
            "495": 303846400.0,
            "500": 303322112.0,
            "505": 303322112.0,
            "510": 303846400.0,
            "515": 303846400.0,
            "520": 302797824.0,
            "525": 303322112.0,
            "530": 303846400.0,
            "535": 303322112.0,
            "540": 303322112.0,
            "545": 303322112.0,
            "550": 303846400.0,
            "555": 303846400.0,
            "560": 303846400.0,
            "565": 303846400.0,
            "570": 303846400.0,
            "575": 303322112.0,
            "580": 303322112.0,
            "585": 303322112.0,
            "590": 303846400.0,
            "595": 303846400.0,
            "600": 303846400.0,
            "605": 303322112.0,
            "610": 303322112.0,
            "615": 303322112.0,
            "620": 303322112.0,
            "625": 303322112.0,
            "630": 303322112.0,
            "635": 303846400.0,
            "640": 303846400.0,
            "645": 303846400.0,
            "650": 303322112.0,
            "655": 302797824.0,
            "660": 302797824.0,
            "665": 303322112.0,
            "670": 303322112.0,
            "675": 303322112.0,
            "680": 303322112.0,
            "685": 303846400.0,
            "690": 303322112.0,
            "695": 303846400.0,
            "700": 303846400.0,
            "705": 302797824.0,
            "710": 303322112.0,
            "715": 303322112.0,
            "720": 303322112.0,
            "725": 303322112.0,
            "730": 303322112.0,
            "735": 303322112.0,
            "740": 303846400.0,
            "745": 303322112.0,
            "750": 303322112.0,
            "755": 302797824.0,
            "760": 302797824.0,
            "765": 303322112.0,
            "770": 303322112.0,
            "775": 303322112.0,
            "780": 302797824.0,
            "785": 303322112.0,
            "790": 302797824.0,
            "795": 303846400.0,
            "800": 303322112.0,
            "805": 303322112.0,
            "810": 303846400.0,
            "815": 303322112.0,
            "820": 303322112.0,
            "825": 303322112.0,
            "830": 303322112.0,
            "835": 303846400.0,
            "840": 303322112.0,
            "845": 303322112.0,
            "850": 302797824.0,
            "855": 303846400.0,
            "860": 303322112.0,
            "865": 303846400.0,
            "870": 303322112.0,
            "875": 303846400.0,
            "880": 303846400.0,
            "885": 303322112.0,
            "890": 302797824.0,
            "895": 302797824.0,
            "900": 303846400.0,
            "905": 303846400.0,
            "910": 303322112.0,
            "915": 302797824.0,
            "920": 303322112.0,
            "925": 303322112.0,
            "930": 303846400.0,
            "935": 303322112.0,
            "940": 303322112.0,
            "945": 303322112.0,
            "950": 303322112.0,
            "955": 303322112.0,
            "960": 303322112.0,
            "965": 303322112.0,
            "970": 303322112.0,
            "975": 302797824.0,
            "980": 303322112.0,
            "985": 303322112.0,
            "990": 303846400.0,
            "995": 302797824.0,
            "1000": 303322112.0,
            "1005": 303846400.0,
            "1010": 302797824.0,
            "1015": 303322112.0,
            "1020": 303846400.0,
            "1025": 303322112.0,
            "1030": 303846400.0,
            "1035": 303846400.0,
            "1040": 303322112.0,
            "1045": 303846400.0,
            "1050": 302797824.0,
            "1055": 302797824.0,
            "1060": 303322112.0,
            "1065": 303846400.0,
            "1070": 302797824.0,
            "1075": 303846400.0,
            "1080": 303322112.0,
            "1085": 303846400.0,
            "1090": 302797824.0,
            "1095": 303322112.0,
            "1100": 303322112.0,
            "1105": 303846400.0,
            "1110": 303322112.0,
            "1115": 303322112.0,
            "1120": 303846400.0,
            "1125": 303846400.0,
            "1130": 303846400.0,
            "1135": 303322112.0,
            "1140": 303322112.0,
            "1145": 303846400.0,
            "1150": 302797824.0,
            "1155": 302797824.0,
            "1160": 303846400.0,
            "1165": 302797824.0,
            "1170": 303322112.0,
            "1175": 303846400.0,
            "1180": 303322112.0,
            "1185": 303846400.0,
            "1190": 303322112.0,
            "1195": 302797824.0,
            "1200": 303846400.0,
            "1205": 303846400.0,
            "1210": 302797824.0,
            "1215": 303322112.0,
            "1220": 303846400.0,
            "1225": 303322112.0,
            "1230": 302797824.0,
            "1235": 303322112.0,
            "1240": 303846400.0,
            "1245": 303322112.0,
            "1250": 303846400.0,
            "1255": 303846400.0,
            "1260": 303322112.0,
            "1265": 303322112.0,
            "1270": 303846400.0,
            "1275": 303846400.0,
            "1280": 303322112.0,
            "1285": 303846400.0,
            "1290": 303322112.0,
            "1295": 303322112.0,
            "1300": 303846400.0,
            "1305": 303846400.0,
            "1310": 303322112.0,
            "1315": 303322112.0,
            "1320": 303846400.0,
            "1325": 303846400.0,
            "1330": 303322112.0,
            "1335": 303322112.0,
            "1340": 303322112.0,
            "1345": 302797824.0,
            "1350": 303322112.0,
            "1355": 303322112.0,
            "1360": 303846400.0,
            "1365": 302797824.0,
            "1370": 302797824.0,
            "1375": 302797824.0,
            "1380": 302797824.0,
            "1385": 302797824.0,
            "1390": 303846400.0,
            "1395": 303322112.0,
            "1400": 303322112.0,
            "1405": 303322112.0,
            "1410": 303846400.0,
            "1415": 303322112.0,
            "1420": 303322112.0,
            "1425": 302797824.0,
            "1430": 302797824.0,
            "1435": 303322112.0,
            "1440": 303322112.0,
            "1445": 303846400.0,
            "1450": 302797824.0,
            "1455": 303846400.0,
            "1460": 303846400.0,
            "1465": 303322112.0,
            "1470": 303322112.0,
            "1475": 303322112.0,
            "1480": 303846400.0,
            "1485": 303322112.0,
            "1490": 303322112.0,
            "1495": 303322112.0,
            "1500": 303322112.0,
            "1505": 303846400.0,
            "1510": 303846400.0,
            "1515": 303322112.0,
            "1520": 303322112.0,
            "1525": 303846400.0,
            "1530": 303322112.0,
            "1535": 303322112.0,
            "1540": 302797824.0,
            "1545": 303322112.0,
            "1550": 303846400.0,
            "1555": 302797824.0,
            "1560": 302797824.0,
            "1565": 303846400.0,
            "1570": 303322112.0,
            "1575": 302797824.0,
            "1580": 303846400.0,
            "1585": 303322112.0,
            "1590": 303846400.0,
            "1595": 303846400.0,
            "1600": 303846400.0,
            "1605": 303322112.0,
            "1610": 302797824.0,
            "1615": 303846400.0,
            "1620": 303846400.0,
            "1625": 303322112.0,
            "1630": 302797824.0,
            "1635": 303846400.0,
            "1640": 303846400.0,
            "1645": 303322112.0,
            "1650": 303322112.0,
            "1655": 303846400.0,
            "1660": 303846400.0,
            "1665": 303846400.0,
            "1670": 303846400.0,
            "1675": 303322112.0,
            "1680": 303322112.0,
            "1685": 303846400.0,
            "1690": 302797824.0,
            "1695": 303846400.0,
            "1700": 302797824.0,
            "1705": 303846400.0,
            "1710": 303846400.0,
            "1715": 303322112.0,
            "1720": 302797824.0,
            "1725": 303846400.0,
            "1730": 302797824.0,
            "1735": 302797824.0,
            "1740": 303846400.0,
            "1745": 303846400.0,
            "1750": 303322112.0,
            "1755": 303846400.0,
            "1760": 303322112.0,
            "1765": 303322112.0,
            "1770": 302797824.0,
            "1775": 303846400.0,
            "1780": 303846400.0,
            "1785": 303322112.0,
            "1790": 302797824.0,
            "1795": 303846400.0,
            "1800": 303322112.0,
            "1805": 303322112.0,
            "1810": 303322112.0,
            "1815": 303322112.0,
            "1820": 303846400.0,
            "1825": 302797824.0,
            "1830": 303846400.0,
            "1835": 303846400.0,
            "1840": 302797824.0,
            "1845": 303846400.0,
            "1850": 303846400.0,
            "1855": 302797824.0,
            "1860": 303846400.0,
            "1865": 302797824.0,
            "1870": 302797824.0,
            "1875": 303322112.0,
            "1880": 303322112.0,
            "1885": 303322112.0,
            "1890": 303322112.0,
            "1895": 303322112.0,
            "1900": 303322112.0,
            "1905": 303846400.0,
            "1910": 303322112.0,
            "1915": 303846400.0,
            "1920": 303846400.0,
            "1925": 302797824.0,
            "1930": 302797824.0,
            "1935": 303322112.0,
            "1940": 303322112.0,
            "1945": 303322112.0,
            "1950": 302797824.0,
            "1955": 303846400.0,
            "1960": 302797824.0,
            "1965": 303322112.0,
            "1970": 303846400.0,
            "1975": 303846400.0,
            "1980": 303322112.0,
            "1985": 303846400.0,
            "1990": 304370688.0,
            "1995": 303322112.0,
            "2000": 302797824.0,
            "2005": 303322112.0,
            "2010": 303846400.0,
            "2015": 302797824.0,
            "2020": 302797824.0,
            "2025": 302797824.0,
            "2030": 303322112.0,
            "2035": 303322112.0,
            "2040": 303846400.0,
            "2045": 303846400.0,
            "2050": 303846400.0,
            "2055": 303322112.0,
            "2060": 303846400.0,
            "2065": 303846400.0,
            "2070": 303322112.0,
            "2075": 303846400.0,
            "2080": 303846400.0,
            "2085": 303322112.0,
            "2090": 303322112.0,
            "2095": 303846400.0,
            "2100": 303846400.0,
            "2105": 303846400.0,
            "2110": 303846400.0,
            "2115": 302797824.0,
            "2120": 302797824.0,
            "2125": 302797824.0,
            "2130": 303846400.0,
            "2135": 303846400.0,
            "2140": 302797824.0,
            "2145": 302797824.0,
            "2150": 302797824.0,
            "2155": 303322112.0,
            "2160": 303846400.0,
            "2165": 303322112.0,
            "2170": 303846400.0,
            "2175": 303322112.0,
            "2180": 303322112.0,
            "2185": 303846400.0,
            "2190": 303846400.0,
            "2195": 303846400.0,
            "2200": 303846400.0,
            "2205": 302797824.0,
            "2210": 302797824.0,
            "2215": 303846400.0,
            "2220": 303846400.0,
            "2225": 303322112.0,
            "2230": 303322112.0,
            "2235": 303322112.0,
            "2240": 303846400.0,
            "2245": 302797824.0,
            "2250": 303322112.0,
            "2255": 303846400.0,
            "2260": 302797824.0,
            "2265": 302797824.0,
            "2270": 303846400.0,
            "2275": 303322112.0,
            "2280": 303846400.0,
            "2285": 302797824.0,
            "2290": 303846400.0,
            "2295": 303846400.0,
            "2300": 303322112.0,
            "2305": 302797824.0,
            "2310": 303322112.0,
            "2315": 303846400.0,
            "2320": 302797824.0,
            "2325": 303322112.0,
            "2330": 302797824.0,
            "2335": 303846400.0,
            "2340": 303846400.0,
            "2345": 303846400.0,
            "2350": 303322112.0,
            "2355": 303846400.0,
            "2360": 303322112.0,
            "2365": 302797824.0,
            "2370": 303846400.0,
            "2375": 303322112.0,
            "2380": 302797824.0,
            "2385": 303846400.0,
            "2390": 303322112.0,
            "2395": 302797824.0,
            "2400": 303846400.0,
            "2405": 303846400.0,
            "2410": 303846400.0,
            "2415": 303846400.0,
            "2420": 303322112.0,
            "2425": 303322112.0,
            "2430": 303846400.0,
            "2435": 303846400.0,
            "2440": 303846400.0,
            "2445": 303846400.0,
            "2450": 303846400.0,
            "2455": 302797824.0,
            "2460": 303846400.0,
            "2465": 303846400.0,
            "2470": 303846400.0,
            "2475": 302797824.0,
            "2480": 303322112.0,
            "2485": 303846400.0,
            "2490": 303322112.0,
            "2495": 303322112.0,
            "2500": 303322112.0,
            "2505": 302797824.0,
            "2510": 303846400.0,
            "2515": 303846400.0,
            "2520": 303846400.0,
            "2525": 303322112.0,
            "2530": 303846400.0,
            "2535": 303846400.0,
            "2540": 303846400.0,
            "2545": 303846400.0,
            "2550": 303846400.0,
            "2555": 303846400.0,
            "2560": 303322112.0,
            "2565": 303846400.0,
            "2570": 303322112.0,
            "2575": 303846400.0,
            "2580": 303322112.0,
            "2585": 302797824.0,
            "2590": 303846400.0,
            "2595": 303846400.0,
            "2600": 303322112.0,
            "2605": 303846400.0,
            "2610": 303846400.0,
            "2615": 303846400.0,
            "2620": 303846400.0,
            "2625": 302797824.0,
            "2630": 302797824.0,
            "2635": 303322112.0,
            "2640": 302797824.0,
            "2645": 303846400.0,
            "2650": 303846400.0,
            "2655": 303322112.0,
            "2660": 303846400.0,
            "2665": 303846400.0,
            "2670": 302797824.0,
            "2675": 303846400.0,
            "2680": 303846400.0,
            "2685": 303846400.0,
            "2690": 303846400.0,
            "2695": 303846400.0,
            "2700": 303846400.0,
            "2705": 303846400.0,
            "2710": 302797824.0,
            "2715": 303846400.0,
            "2720": 302797824.0,
            "2725": 303846400.0,
            "2730": 303846400.0,
            "2735": 303322112.0,
            "2740": 302797824.0,
            "2745": 303846400.0,
            "2750": 303846400.0,
            "2755": 303846400.0,
            "2760": 302797824.0,
            "2765": 303846400.0,
            "2770": 303322112.0,
            "2775": 303846400.0,
            "2780": 303846400.0,
            "2785": 302797824.0,
            "2790": 303846400.0,
            "2795": 303322112.0,
            "2800": 302797824.0,
            "2805": 303846400.0,
            "2810": 303846400.0,
            "2815": 303322112.0,
            "2820": 303322112.0,
            "2825": 303322112.0,
            "2830": 303846400.0,
            "2835": 302797824.0,
            "2840": 303846400.0,
            "2845": 302797824.0,
            "2850": 303846400.0,
            "2855": 303846400.0,
            "2860": 302797824.0,
            "2865": 303846400.0,
            "2870": 303322112.0,
            "2875": 303846400.0,
            "2880": 303846400.0,
            "2885": 302797824.0,
            "2890": 303846400.0,
            "2895": 303846400.0,
            "2900": 303322112.0,
            "2905": 303322112.0,
            "2910": 303322112.0,
            "2915": 303846400.0,
            "2920": 303846400.0,
            "2925": 302797824.0,
            "2930": 303322112.0,
            "2935": 303322112.0,
            "2940": 303846400.0,
            "2945": 303322112.0,
            "2950": 303322112.0,
            "2955": 303846400.0,
            "2960": 303846400.0,
            "2965": 303322112.0,
            "2970": 303322112.0,
            "2975": 303846400.0,
            "2980": 303322112.0,
            "2985": 303322112.0,
            "2990": 303846400.0,
            "2995": 302797824.0,
            "3000": 303846400.0,
            "3005": 303322112.0,
            "3010": 303322112.0,
            "3015": 303322112.0,
            "3020": 303322112.0,
            "3025": 303322112.0,
            "3030": 302797824.0,
            "3035": 303322112.0,
            "3040": 303846400.0,
            "3045": 303322112.0,
            "3050": 303322112.0,
            "3055": 303322112.0,
            "3060": 303846400.0,
            "3065": 302797824.0,
            "3070": 303846400.0,
            "3075": 303322112.0,
            "3080": 303846400.0,
            "3085": 303846400.0,
            "3090": 302797824.0,
            "3095": 303846400.0,
            "3100": 303846400.0,
            "3105": 303322112.0,
            "3110": 302797824.0,
            "3115": 302797824.0,
            "3120": 303322112.0,
            "3125": 303322112.0,
            "3130": 303846400.0,
            "3135": 303846400.0,
            "3140": 303846400.0,
            "3145": 303322112.0,
            "3150": 303846400.0,
            "3155": 302797824.0,
            "3160": 302797824.0,
            "3165": 303322112.0,
            "3170": 303846400.0,
            "3175": 302797824.0,
            "3180": 302797824.0,
            "3185": 303846400.0,
            "3190": 303846400.0,
            "3195": 303846400.0,
            "3200": 303846400.0,
            "3205": 303322112.0,
            "3210": 303322112.0,
            "3215": 303846400.0,
            "3220": 303846400.0,
            "3225": 303846400.0,
            "3230": 303846400.0,
            "3235": 303322112.0,
            "3240": 303846400.0,
            "3245": 303846400.0,
            "3250": 303322112.0,
            "3255": 302797824.0,
            "3260": 303846400.0,
            "3265": 303846400.0,
            "3270": 303322112.0,
            "3275": 303322112.0,
            "3280": 303322112.0,
            "3285": 303322112.0,
            "3290": 302797824.0,
            "3295": 303322112.0,
            "3300": 303846400.0,
            "3305": 303846400.0,
            "3310": 303846400.0,
            "3315": 303846400.0,
            "3320": 303846400.0,
            "3325": 303322112.0,
            "3330": 302797824.0,
            "3335": 303322112.0,
            "3340": 303322112.0,
            "3345": 303322112.0,
            "3350": 303322112.0,
            "3355": 303322112.0,
            "3360": 302797824.0,
            "3365": 303846400.0,
            "3370": 303846400.0,
            "3375": 303846400.0,
            "3380": 302797824.0,
            "3385": 303846400.0,
            "3390": 303846400.0,
            "3395": 303322112.0,
            "3400": 303846400.0,
            "3405": 302797824.0,
            "3410": 303846400.0,
            "3415": 302797824.0,
            "3420": 302797824.0,
            "3425": 303322112.0,
            "3430": 303322112.0,
            "3435": 302797824.0,
            "3440": 303846400.0,
            "3445": 303846400.0,
            "3450": 302797824.0,
            "3455": 303322112.0,
            "3460": 303322112.0,
            "3465": 303846400.0,
            "3470": 303322112.0,
            "3475": 303322112.0,
            "3480": 303846400.0,
            "3485": 303322112.0,
            "3490": 302797824.0,
            "3495": 302797824.0,
            "3500": 303322112.0,
            "3505": 303322112.0,
            "3510": 303322112.0,
            "3515": 302797824.0,
            "3520": 302797824.0,
            "3525": 303322112.0,
            "3530": 303322112.0,
            "3535": 302797824.0,
            "3540": 303846400.0,
            "3545": 303322112.0,
            "3550": 303322112.0,
            "3555": 303846400.0,
            "3560": 303846400.0,
            "3565": 303846400.0,
            "3570": 302797824.0,
            "3575": 303322112.0,
            "3580": 303322112.0,
            "3585": 303846400.0,
            "3590": 303846400.0,
            "3595": 303846400.0,
            "3600": 302797824.0,
            "3605": 303846400.0,
            "3610": 303322112.0,
            "3615": 303322112.0,
            "3620": 303846400.0,
            "3625": 303322112.0,
            "3630": 303846400.0,
            "3635": 303322112.0,
            "3640": 302797824.0,
            "3645": 303846400.0,
            "3650": 303846400.0,
            "3655": 303322112.0,
            "3660": 303322112.0,
            "3665": 302797824.0,
            "3670": 303846400.0,
            "3675": 302797824.0,
            "3680": 303846400.0,
            "3685": 303322112.0,
            "3690": 303846400.0,
            "3695": 302797824.0,
            "3700": 302797824.0,
            "3705": 303846400.0,
            "3710": 303322112.0,
            "3715": 303846400.0,
            "3720": 303322112.0,
            "3725": 303846400.0,
            "3730": 302797824.0,
            "3735": 303846400.0,
            "3740": 303846400.0,
            "3745": 303322112.0,
            "3750": 303322112.0,
            "3755": 303846400.0,
            "3760": 303322112.0,
            "3765": 302797824.0,
            "3770": 303846400.0,
            "3775": 303846400.0,
            "3780": 303846400.0,
            "3785": 302797824.0,
            "3790": 302797824.0,
            "3795": 303322112.0,
            "3800": 302797824.0,
            "3805": 303846400.0,
            "3810": 303322112.0,
            "3815": 303846400.0,
            "3820": 303322112.0,
            "3825": 303846400.0,
            "3830": 303322112.0,
            "3835": 303846400.0,
            "3840": 302797824.0,
            "3845": 303846400.0,
            "3850": 302797824.0,
            "3855": 303846400.0,
            "3860": 302797824.0,
            "3865": 303846400.0,
            "3870": 303322112.0,
            "3875": 302797824.0,
            "3880": 302797824.0,
            "3885": 302797824.0,
            "3890": 303846400.0,
            "3895": 303846400.0,
            "3900": 303846400.0,
            "3905": 303322112.0,
            "3910": 302797824.0,
            "3915": 303846400.0,
            "3920": 303846400.0,
            "3925": 303846400.0,
            "3930": 303322112.0,
            "3935": 303846400.0,
            "3940": 303846400.0,
            "3945": 303322112.0,
            "3950": 303322112.0,
            "3955": 302797824.0,
            "3960": 302797824.0,
            "3965": 303322112.0,
            "3970": 303322112.0,
            "3975": 303846400.0,
            "3980": 303846400.0,
            "3985": 303846400.0,
            "3990": 302797824.0,
            "3995": 302797824.0,
            "4000": 303846400.0,
            "4005": 302797824.0,
            "4010": 303846400.0,
            "4015": 303322112.0,
            "4020": 303846400.0,
            "4025": 303322112.0,
            "4030": 303846400.0,
            "4035": 303846400.0,
            "4040": 303846400.0,
            "4045": 303846400.0,
            "4050": 303322112.0,
            "4055": 303846400.0,
            "4060": 302797824.0,
            "4065": 302797824.0,
            "4070": 302797824.0,
            "4075": 303322112.0,
            "4080": 303846400.0,
            "4085": 302797824.0,
            "4090": 303322112.0,
            "4095": 303322112.0,
            "4100": 303322112.0,
            "4105": 303846400.0,
            "4110": 303322112.0,
            "4115": 303846400.0,
            "4120": 303846400.0,
            "4125": 303846400.0,
            "4130": 302797824.0,
            "4135": 302797824.0,
            "4140": 303322112.0,
            "4145": 303846400.0,
            "4150": 302797824.0,
            "4155": 303846400.0,
            "4160": 303846400.0,
            "4165": 302797824.0,
            "4170": 302797824.0,
            "4175": 302797824.0,
            "4180": 302797824.0,
            "4185": 303322112.0,
            "4190": 302797824.0,
            "4195": 303322112.0,
            "4200": 303846400.0,
            "4205": 303846400.0,
            "4210": 303846400.0,
            "4215": 303322112.0,
            "4220": 303322112.0,
            "4225": 303322112.0,
            "4230": 303322112.0,
            "4235": 302797824.0,
            "4240": 303846400.0,
            "4245": 303846400.0,
            "4250": 303846400.0,
            "4255": 303846400.0,
            "4260": 303322112.0,
            "4265": 303322112.0,
            "4270": 303322112.0,
            "4275": 303846400.0,
            "4280": 303322112.0,
            "4285": 303322112.0,
            "4290": 303322112.0,
            "4295": 303322112.0,
            "4300": 303322112.0,
            "4305": 302797824.0,
            "4310": 303322112.0,
            "4315": 303322112.0,
            "4320": 302797824.0,
            "4325": 303846400.0,
            "4330": 303322112.0,
            "4335": 303322112.0,
            "4340": 303846400.0,
            "4345": 303322112.0,
            "4350": 303846400.0,
            "4355": 303846400.0,
            "4360": 303846400.0,
            "4365": 302797824.0,
            "4370": 303846400.0,
            "4375": 303846400.0,
            "4380": 303846400.0,
            "4385": 302797824.0,
            "4390": 303846400.0,
            "4395": 303322112.0,
            "4400": 302797824.0,
            "4405": 303322112.0,
            "4410": 303846400.0,
            "4415": 303846400.0,
            "4420": 303846400.0,
            "4425": 302797824.0,
            "4430": 303846400.0,
            "4435": 302797824.0,
            "4440": 303322112.0,
            "4445": 303846400.0,
            "4450": 303846400.0,
            "4455": 303846400.0,
            "4460": 303322112.0,
            "4465": 302797824.0,
            "4470": 303846400.0,
            "4475": 303322112.0,
            "4480": 303846400.0,
            "4485": 303322112.0,
            "4490": 302797824.0,
            "4495": 303846400.0,
            "4500": 303846400.0,
            "4505": 303846400.0,
            "4510": 303846400.0,
            "4515": 303322112.0,
            "4520": 303846400.0,
            "4525": 303846400.0,
            "4530": 302797824.0,
            "4535": 303846400.0,
            "4540": 303322112.0,
            "4545": 303322112.0,
            "4550": 302797824.0,
            "4555": 302797824.0,
            "4560": 303846400.0,
            "4565": 303322112.0,
            "4570": 303322112.0,
            "4575": 303322112.0,
            "4580": 303322112.0,
            "4585": 303322112.0,
            "4590": 303846400.0,
            "4595": 303846400.0,
            "4600": 302797824.0,
            "4605": 303846400.0,
            "4610": 303846400.0,
            "4615": 303846400.0,
            "4620": 303322112.0,
            "4625": 303322112.0,
            "4630": 303322112.0,
            "4635": 303846400.0,
            "4640": 303846400.0,
            "4645": 303322112.0,
            "4650": 303846400.0,
            "4655": 303322112.0,
            "4660": 303846400.0,
            "4665": 303846400.0,
            "4670": 303846400.0,
            "4675": 302797824.0,
            "4680": 302797824.0,
            "4685": 303322112.0,
            "4690": 303846400.0,
            "4695": 302797824.0,
            "4700": 303322112.0,
            "4705": 303846400.0,
            "4710": 303322112.0,
            "4715": 303846400.0,
            "4720": 303322112.0,
            "4725": 303846400.0,
            "4730": 302797824.0,
            "4735": 302797824.0,
            "4740": 303322112.0,
            "4745": 303846400.0,
            "4750": 302797824.0,
            "4755": 303322112.0,
            "4760": 303846400.0,
            "4765": 303846400.0,
            "4770": 303322112.0,
            "4775": 303846400.0,
            "4780": 303846400.0,
            "4785": 303846400.0,
            "4790": 302797824.0,
            "4795": 303846400.0,
            "4800": 302797824.0,
            "4805": 303846400.0,
            "4810": 303322112.0,
            "4815": 302797824.0,
            "4820": 303846400.0,
            "4825": 303322112.0,
            "4830": 303322112.0,
            "4835": 303846400.0,
            "4840": 302797824.0,
            "4845": 303846400.0,
            "4850": 303322112.0,
            "4855": 303846400.0,
            "4860": 303322112.0,
            "4865": 302797824.0,
            "4870": 303322112.0,
            "4875": 303322112.0,
            "4880": 303846400.0,
            "4885": 303322112.0,
            "4890": 303846400.0,
            "4895": 303846400.0,
            "4900": 303322112.0,
            "4905": 303322112.0,
            "4910": 303322112.0,
            "4915": 303846400.0,
            "4920": 303322112.0,
            "4925": 302797824.0,
            "4930": 302797824.0,
            "4935": 302797824.0,
            "4940": 302797824.0,
            "4945": 302797824.0,
            "4950": 303322112.0,
            "4955": 303846400.0,
            "4960": 303322112.0,
            "4965": 303846400.0,
            "4970": 303322112.0,
            "4975": 303846400.0,
            "4980": 303322112.0,
            "4985": 303322112.0,
            "4990": 303322112.0,
            "4995": 303846400.0,
            "5000": 303322112.0,
            "5005": 303846400.0,
            "5010": 303322112.0,
            "5015": 303846400.0,
            "5020": 303322112.0,
            "5025": 303846400.0,
            "5030": 303322112.0,
            "5035": 303846400.0,
            "5040": 303846400.0,
            "5045": 303322112.0,
            "5050": 303322112.0,
            "5055": 303322112.0,
            "5060": 303846400.0,
            "5065": 303322112.0,
            "5070": 303322112.0,
            "5075": 303322112.0,
            "5080": 303322112.0,
            "5085": 303322112.0,
            "5090": 303322112.0,
            "5095": 303322112.0,
            "5100": 302797824.0,
            "5105": 303322112.0,
            "5110": 303846400.0,
            "5115": 303846400.0,
            "5120": 303322112.0,
            "5125": 303846400.0,
            "5130": 303322112.0,
            "5135": 303322112.0,
            "5140": 303846400.0,
            "5145": 303322112.0,
            "5150": 303846400.0,
            "5155": 303846400.0,
            "5160": 303846400.0,
            "5165": 302797824.0,
            "5170": 303846400.0,
            "5175": 303322112.0,
            "5180": 302797824.0,
            "5185": 302797824.0,
            "5190": 303322112.0,
            "5195": 303322112.0,
            "5200": 302797824.0,
            "5205": 303322112.0,
            "5210": 303322112.0,
            "5215": 303846400.0,
            "5220": 303846400.0,
            "5225": 303322112.0,
            "5230": 303846400.0,
            "5235": 302797824.0,
            "5240": 303322112.0,
            "5245": 303322112.0,
            "5250": 303846400.0,
            "5255": 303322112.0,
            "5260": 303322112.0,
            "5265": 303846400.0,
            "5270": 303846400.0,
            "5275": 303846400.0,
            "5280": 302797824.0,
            "5285": 303322112.0,
            "5290": 303846400.0,
            "5295": 302797824.0,
            "5300": 303322112.0,
            "5305": 302797824.0,
            "5310": 303322112.0,
            "5315": 303846400.0,
            "5320": 303322112.0,
            "5325": 303322112.0,
            "5330": 303322112.0,
            "5335": 302797824.0,
            "5340": 302797824.0,
            "5345": 303322112.0,
            "5350": 303846400.0,
            "5355": 303846400.0,
            "5360": 302797824.0,
            "5365": 302797824.0,
            "5370": 303322112.0,
            "5375": 302797824.0,
            "5380": 303846400.0,
            "5385": 303322112.0,
            "5390": 303846400.0,
            "5395": 303322112.0,
            "5400": 303846400.0,
            "5405": 303846400.0,
            "5410": 303322112.0,
            "5415": 303322112.0,
            "5420": 303322112.0,
            "5425": 303322112.0,
            "5430": 303322112.0,
            "5435": 302797824.0,
            "5440": 303322112.0,
            "5445": 303846400.0,
            "5450": 303322112.0,
            "5455": 303322112.0,
            "5460": 303322112.0,
            "5465": 303846400.0,
            "5470": 303846400.0,
            "5475": 303322112.0,
            "5480": 303322112.0,
            "5485": 303846400.0,
            "5490": 303846400.0,
            "5495": 303322112.0,
            "5500": 303322112.0,
            "5505": 303322112.0,
            "5510": 303846400.0,
            "5515": 302797824.0,
            "5520": 303322112.0,
            "5525": 303322112.0,
            "5530": 303322112.0,
            "5535": 302797824.0,
            "5540": 302797824.0,
            "5545": 302797824.0,
            "5550": 303322112.0,
            "5555": 303322112.0,
            "5560": 303846400.0,
            "5565": 303322112.0,
            "5570": 303846400.0,
            "5575": 303322112.0,
            "5580": 303322112.0,
            "5585": 302797824.0,
            "5590": 303846400.0,
            "5595": 302797824.0,
            "5600": 303322112.0,
            "5605": 303846400.0,
            "5610": 303846400.0,
            "5615": 302797824.0,
            "5620": 303846400.0,
            "5625": 303322112.0,
            "5630": 302797824.0,
            "5635": 303846400.0,
            "5640": 303322112.0,
            "5645": 303322112.0,
            "5650": 303322112.0,
            "5655": 303322112.0,
            "5660": 303846400.0,
            "5665": 303846400.0,
            "5670": 303322112.0,
            "5675": 302797824.0,
            "5680": 303846400.0,
            "5685": 303322112.0,
            "5690": 303322112.0,
            "5695": 303322112.0,
            "5700": 303322112.0,
            "5705": 303322112.0,
            "5710": 303322112.0,
            "5715": 303846400.0,
            "5720": 303322112.0,
            "5725": 303322112.0,
            "5730": 303322112.0,
            "5735": 303322112.0,
            "5740": 303846400.0,
            "5745": 303322112.0,
            "5750": 303846400.0,
            "5755": 303846400.0,
            "5760": 303846400.0,
            "5765": 303846400.0,
            "5770": 303846400.0,
            "5775": 303322112.0,
            "5780": 302797824.0,
            "5785": 303322112.0,
            "5790": 303846400.0,
            "5795": 302797824.0,
            "5800": 303846400.0,
            "5805": 303846400.0,
            "5810": 302797824.0,
            "5815": 303846400.0,
            "5820": 303846400.0,
            "5825": 303322112.0,
            "5830": 303322112.0,
            "5835": 303846400.0,
            "5840": 302797824.0,
            "5845": 303322112.0,
            "5850": 303846400.0,
            "5855": 303322112.0,
            "5860": 303322112.0,
            "5865": 303322112.0,
            "5870": 303846400.0,
            "5875": 303846400.0,
            "5880": 303846400.0,
            "5885": 303846400.0,
            "5890": 303322112.0,
            "5895": 303846400.0,
            "5900": 303322112.0,
            "5905": 303846400.0,
            "5910": 303322112.0,
            "5915": 303846400.0,
            "5920": 303322112.0,
            "5925": 303322112.0,
            "5930": 303846400.0,
            "5935": 303322112.0,
            "5940": 303846400.0,
            "5945": 303322112.0,
            "5950": 303846400.0,
            "5955": 303322112.0,
            "5960": 303846400.0,
            "5965": 303846400.0,
            "5970": 303322112.0,
            "5975": 302797824.0,
            "5980": 303322112.0,
            "5985": 302797824.0,
            "5990": 303846400.0,
            "5995": 303846400.0,
            "6000": 303846400.0,
            "6005": 303322112.0,
            "6010": 303322112.0,
            "6015": 303846400.0,
            "6020": 302797824.0,
            "6025": 303322112.0,
            "6030": 303846400.0,
            "6035": 303846400.0,
            "6040": 303846400.0,
            "6045": 303846400.0,
            "6050": 302797824.0,
            "6055": 303846400.0,
            "6060": 303322112.0,
            "6065": 303846400.0,
            "6070": 303322112.0,
            "6075": 303322112.0,
            "6080": 302797824.0,
            "6085": 302797824.0,
            "6090": 303846400.0,
            "6095": 303846400.0,
            "6100": 302797824.0,
            "6105": 302797824.0,
            "6110": 303846400.0,
            "6115": 303322112.0,
            "6120": 303846400.0,
            "6125": 303846400.0,
            "6130": 303846400.0,
            "6135": 303846400.0,
            "6140": 303846400.0,
            "6145": 303846400.0,
            "6150": 303322112.0,
            "6155": 303322112.0,
            "6160": 303322112.0,
            "6165": 303322112.0,
            "6170": 303846400.0,
            "6175": 303322112.0,
            "6180": 302797824.0,
            "6185": 303322112.0,
            "6190": 303846400.0,
            "6195": 303846400.0,
            "6200": 303846400.0,
            "6205": 302797824.0,
            "6210": 302797824.0,
            "6215": 303846400.0,
            "6220": 303846400.0,
            "6225": 302797824.0,
            "6230": 303846400.0,
            "6235": 303322112.0,
            "6240": 302797824.0,
            "6245": 303846400.0,
            "6250": 303846400.0,
            "6255": 303846400.0,
            "6260": 303322112.0,
            "6265": 302797824.0,
            "6270": 303846400.0,
            "6275": 303846400.0,
            "6280": 302797824.0,
            "6285": 303322112.0,
            "6290": 303846400.0,
            "6295": 302797824.0,
            "6300": 303322112.0,
            "6305": 303322112.0,
            "6310": 303846400.0,
            "6315": 303846400.0,
            "6320": 303322112.0,
            "6325": 302797824.0,
            "6330": 303846400.0,
            "6335": 303846400.0,
            "6340": 303846400.0,
            "6345": 303846400.0,
            "6350": 303322112.0,
            "6355": 303846400.0,
            "6360": 303322112.0,
            "6365": 302797824.0,
            "6370": 302797824.0,
            "6375": 302797824.0,
            "6380": 303846400.0,
            "6385": 302797824.0,
            "6390": 302797824.0,
            "6395": 303846400.0,
            "6400": 302797824.0,
            "6405": 303846400.0,
            "6410": 303846400.0,
            "6415": 303322112.0,
            "6420": 303846400.0,
            "6425": 303846400.0,
            "6430": 303846400.0,
            "6435": 302797824.0,
            "6440": 303846400.0,
            "6445": 302797824.0,
            "6450": 303846400.0,
            "6455": 303846400.0,
            "6460": 303322112.0,
            "6465": 302797824.0,
            "6470": 303322112.0,
            "6475": 303846400.0,
            "6480": 303846400.0,
            "6485": 303846400.0,
            "6490": 303322112.0,
            "6495": 302797824.0,
            "6500": 303322112.0,
            "6505": 302797824.0,
            "6510": 303846400.0,
            "6515": 302797824.0,
            "6520": 303846400.0,
            "6525": 302797824.0,
            "6530": 303322112.0,
            "6535": 303846400.0,
            "6540": 303322112.0,
            "6545": 302797824.0,
            "6550": 303846400.0,
            "6555": 303846400.0,
            "6560": 303846400.0,
            "6565": 303846400.0,
            "6570": 303322112.0,
            "6575": 302797824.0,
            "6580": 302797824.0,
            "6585": 303322112.0,
            "6590": 303846400.0,
            "6595": 303846400.0,
            "6600": 303846400.0,
            "6605": 303846400.0,
            "6610": 303322112.0,
            "6615": 303322112.0,
            "6620": 303846400.0,
            "6625": 303322112.0,
            "6630": 302797824.0,
            "6635": 303322112.0,
            "6640": 303846400.0,
            "6645": 303846400.0,
            "6650": 303846400.0,
            "6655": 303846400.0,
            "6660": 303846400.0,
            "6665": 302797824.0,
            "6670": 302797824.0,
            "6675": 303322112.0,
            "6680": 303846400.0,
            "6685": 303846400.0,
            "6690": 302797824.0,
            "6695": 303322112.0,
            "6700": 303846400.0,
            "6705": 302797824.0,
            "6710": 302797824.0,
            "6715": 303322112.0,
            "6720": 303322112.0,
            "6725": 303846400.0,
            "6730": 303846400.0,
            "6735": 303322112.0,
            "6740": 302797824.0,
            "6745": 302797824.0,
            "6750": 303322112.0,
            "6755": 303846400.0,
            "6760": 303846400.0,
            "6765": 303846400.0,
            "6770": 302797824.0,
            "6775": 303322112.0,
            "6780": 303846400.0,
            "6785": 303846400.0,
            "6790": 303846400.0,
            "6795": 303846400.0,
            "6800": 303846400.0,
            "6805": 303846400.0,
            "6810": 303322112.0,
            "6815": 302797824.0,
            "6820": 303846400.0,
            "6825": 303322112.0,
            "6830": 303846400.0,
            "6835": 303846400.0,
            "6840": 303846400.0,
            "6845": 302797824.0,
            "6850": 303846400.0,
            "6855": 303322112.0,
            "6860": 303846400.0,
            "6865": 303322112.0,
            "6870": 303322112.0,
            "6875": 303322112.0,
            "6880": 303846400.0,
            "6885": 303846400.0,
            "6890": 303322112.0,
            "6895": 303322112.0,
            "6900": 303846400.0,
            "6905": 302797824.0,
            "6910": 302797824.0,
            "6915": 303846400.0,
            "6920": 303846400.0,
            "6925": 303322112.0,
            "6930": 302797824.0,
            "6935": 303322112.0,
            "6940": 303846400.0,
            "6945": 303322112.0,
            "6950": 302797824.0,
            "6955": 303846400.0,
            "6960": 303846400.0,
            "6965": 302797824.0,
            "6970": 303846400.0,
            "6975": 303846400.0,
            "6980": 303322112.0,
            "6985": 302797824.0,
            "6990": 302797824.0,
            "6995": 303322112.0,
            "7000": 303846400.0,
            "7005": 303846400.0,
            "7010": 302797824.0,
            "7015": 303322112.0,
            "7020": 303846400.0,
            "7025": 302797824.0,
            "7030": 302797824.0,
            "7035": 303846400.0,
            "7040": 303322112.0,
            "7045": 303846400.0,
            "7050": 303322112.0,
            "7055": 302797824.0,
            "7060": 303846400.0,
            "7065": 302797824.0,
            "7070": 303846400.0,
            "7075": 303322112.0,
            "7080": 303846400.0,
            "7085": 303846400.0,
            "7090": 303846400.0,
            "7095": 303322112.0,
            "7100": 303322112.0,
            "7105": 303846400.0,
            "7110": 303322112.0,
            "7115": 303846400.0,
            "7120": 303846400.0,
            "7125": 303846400.0,
            "7130": 302797824.0,
            "7135": 303846400.0,
            "7140": 303846400.0,
            "7145": 303322112.0,
            "7150": 303846400.0,
            "7155": 303322112.0,
            "7160": 303322112.0,
            "7165": 303846400.0,
            "7170": 303322112.0,
            "7175": 303322112.0,
            "7180": 303322112.0,
            "7185": 302797824.0,
            "7190": 303322112.0,
            "7195": 303322112.0,
            "7200": 303846400.0,
            "7205": 302797824.0,
            "7210": 302797824.0,
            "7215": 303322112.0,
            "7220": 303322112.0,
            "7225": 303846400.0,
            "7230": 303846400.0,
            "7235": 303322112.0,
            "7240": 303322112.0,
            "7245": 303322112.0,
            "7250": 303322112.0,
            "7255": 303846400.0,
            "7260": 303846400.0,
            "7265": 303846400.0,
            "7270": 303322112.0,
            "7275": 303846400.0,
            "7280": 303846400.0,
            "7285": 302797824.0,
            "7290": 302797824.0,
            "7295": 302797824.0,
            "7300": 303846400.0,
            "7305": 303846400.0,
            "7310": 302797824.0,
            "7315": 303322112.0,
            "7320": 302797824.0,
            "7325": 302797824.0,
            "7330": 302797824.0,
            "7335": 302797824.0,
            "7340": 303322112.0,
            "7345": 303322112.0,
            "7350": 302797824.0,
            "7355": 303322112.0,
            "7360": 303846400.0,
            "7365": 303322112.0,
            "7370": 303846400.0,
            "7375": 303846400.0,
            "7380": 303322112.0,
            "7385": 303322112.0,
            "7390": 303846400.0,
            "7395": 302797824.0,
            "7400": 303846400.0,
            "7405": 303846400.0,
            "7410": 303322112.0,
            "7415": 303322112.0,
            "7420": 303846400.0,
            "7425": 303322112.0,
            "7430": 303846400.0,
            "7435": 303846400.0,
            "7440": 302797824.0,
            "7445": 303846400.0,
            "7450": 303846400.0,
            "7455": 303846400.0,
            "7460": 303846400.0,
            "7465": 302797824.0,
            "7470": 302797824.0,
            "7475": 303846400.0,
            "7480": 303322112.0,
            "7485": 303322112.0,
            "7490": 303846400.0,
            "7495": 303846400.0,
            "7500": 303846400.0,
            "7505": 303846400.0,
            "7510": 303846400.0,
            "7515": 303322112.0,
            "7520": 303846400.0,
            "7525": 303846400.0,
            "7530": 302797824.0,
            "7535": 302797824.0,
            "7540": 303322112.0,
            "7545": 302797824.0,
            "7550": 303846400.0,
            "7555": 303846400.0,
            "7560": 302797824.0,
            "7565": 302797824.0,
            "7570": 303846400.0,
            "7575": 303322112.0,
            "7580": 303846400.0,
            "7585": 303322112.0,
            "7590": 303322112.0,
            "7595": 303322112.0,
            "7600": 303846400.0,
            "7605": 303322112.0,
            "7610": 303846400.0,
            "7615": 303322112.0,
            "7620": 303846400.0,
            "7625": 303322112.0,
            "7630": 303322112.0,
            "7635": 302797824.0,
            "7640": 303846400.0,
            "7645": 303322112.0,
            "7650": 303846400.0,
            "7655": 303846400.0,
            "7660": 303846400.0,
            "7665": 303846400.0,
            "7670": 303322112.0,
            "7675": 303846400.0,
            "7680": 303846400.0,
            "7685": 302797824.0,
            "7690": 302797824.0,
            "7695": 303322112.0,
            "7700": 303846400.0,
            "7705": 303846400.0,
            "7710": 303846400.0,
            "7715": 303322112.0,
            "7720": 302797824.0,
            "7725": 303846400.0,
            "7730": 303846400.0,
            "7735": 303846400.0,
            "7740": 303846400.0,
            "7745": 303322112.0,
            "7750": 303846400.0,
            "7755": 303322112.0,
            "7760": 303846400.0,
            "7765": 303846400.0,
            "7770": 303322112.0,
            "7775": 303322112.0,
            "7780": 302797824.0,
            "7785": 303846400.0,
            "7790": 302797824.0,
            "7795": 302797824.0,
            "7800": 303846400.0,
            "7805": 303846400.0,
            "7810": 303846400.0,
            "7815": 303846400.0,
            "7820": 303846400.0,
            "7825": 302797824.0,
            "7830": 303846400.0,
            "7835": 302797824.0,
            "7840": 303846400.0,
            "7845": 303846400.0,
            "7850": 303322112.0,
            "7855": 303846400.0,
            "7860": 303846400.0,
            "7865": 302797824.0,
            "7870": 303322112.0,
            "7875": 302797824.0,
            "7880": 303322112.0,
            "7885": 303846400.0,
            "7890": 303322112.0,
            "7895": 303322112.0,
            "7900": 303846400.0,
            "7905": 303322112.0,
            "7910": 303322112.0,
            "7915": 303322112.0,
            "7920": 303322112.0,
            "7925": 303846400.0,
            "7930": 303846400.0,
            "7935": 303846400.0,
            "7940": 303846400.0,
            "7945": 303846400.0,
            "7950": 302797824.0,
            "7955": 303846400.0,
            "7960": 303322112.0,
            "7965": 303846400.0,
            "7970": 303846400.0,
            "7975": 303846400.0,
            "7980": 303846400.0,
            "7985": 303846400.0,
            "7990": 303846400.0,
            "7995": 303322112.0,
            "8000": 303846400.0,
            "8005": 302797824.0,
            "8010": 302797824.0,
            "8015": 303846400.0,
            "8020": 303846400.0,
            "8025": 303846400.0,
            "8030": 303846400.0,
            "8035": 303322112.0,
            "8040": 302797824.0,
            "8045": 303846400.0,
            "8050": 303322112.0,
            "8055": 303322112.0,
            "8060": 303846400.0,
            "8065": 303846400.0,
            "8070": 302797824.0,
            "8075": 303322112.0,
            "8080": 302797824.0,
            "8085": 303846400.0,
            "8090": 302797824.0,
            "8095": 303322112.0,
            "8100": 302797824.0,
            "8105": 303846400.0,
            "8110": 303322112.0,
            "8115": 303846400.0,
            "8120": 303322112.0,
            "8125": 303322112.0,
            "8130": 302797824.0,
            "8135": 303322112.0,
            "8140": 303846400.0,
            "8145": 302797824.0,
            "8150": 302797824.0,
            "8155": 303322112.0,
            "8160": 303846400.0,
            "8165": 303322112.0,
            "8170": 303322112.0,
            "8175": 303846400.0,
            "8180": 302797824.0,
            "8185": 303846400.0,
            "8190": 303322112.0,
            "8195": 303322112.0,
            "8200": 303322112.0,
            "8205": 302797824.0,
            "8210": 303846400.0,
            "8215": 303846400.0,
            "8220": 302797824.0,
            "8225": 303322112.0,
            "8230": 303322112.0,
            "8235": 302797824.0,
            "8240": 303846400.0,
            "8245": 302797824.0,
            "8250": 303846400.0,
            "8255": 303322112.0,
            "8260": 303846400.0,
            "8265": 303846400.0,
            "8270": 302797824.0,
            "8275": 302797824.0,
            "8280": 303846400.0,
            "8285": 303322112.0,
            "8290": 303322112.0,
            "8295": 303846400.0,
            "8300": 303322112.0,
            "8305": 303322112.0,
            "8310": 303846400.0,
            "8315": 303846400.0,
            "8320": 302797824.0,
            "8325": 302797824.0,
            "8330": 303322112.0,
            "8335": 303846400.0,
            "8340": 302797824.0,
            "8345": 303322112.0,
            "8350": 303846400.0,
            "8355": 303846400.0,
            "8360": 302797824.0,
            "8365": 303846400.0,
            "8370": 303846400.0,
            "8375": 303846400.0,
            "8380": 303846400.0,
            "8385": 302797824.0,
            "8390": 303322112.0,
            "8395": 303322112.0,
            "8400": 303322112.0,
            "8405": 303322112.0,
            "8410": 302797824.0,
            "8415": 303846400.0,
            "8420": 303846400.0,
            "8425": 302797824.0,
            "8430": 303846400.0,
            "8435": 303322112.0,
            "8440": 302797824.0,
            "8445": 303322112.0,
            "8450": 303322112.0,
            "8455": 303846400.0,
            "8460": 303322112.0,
            "8465": 303846400.0,
            "8470": 302797824.0,
            "8475": 303846400.0,
            "8480": 303322112.0,
            "8485": 302797824.0,
            "8490": 302797824.0,
            "8495": 303846400.0,
            "8500": 303322112.0,
            "8505": 303322112.0,
            "8510": 303846400.0,
            "8515": 302797824.0,
            "8520": 303846400.0,
            "8525": 303846400.0,
            "8530": 302797824.0,
            "8535": 303846400.0,
            "8540": 303846400.0,
            "8545": 303322112.0,
            "8550": 303322112.0,
            "8555": 302797824.0,
            "8560": 302797824.0,
            "8565": 303322112.0,
            "8570": 303846400.0,
            "8575": 302797824.0,
            "8580": 303322112.0,
            "8585": 303846400.0,
            "8590": 303322112.0,
            "8595": 303322112.0,
            "8600": 303846400.0,
            "8605": 302797824.0,
            "8610": 303846400.0,
            "8615": 302797824.0,
            "8620": 303322112.0,
            "8625": 303846400.0,
            "8630": 303322112.0,
            "8635": 303322112.0,
            "8640": 302797824.0,
            "8645": 303322112.0,
            "8650": 304370688.0,
            "8655": 303322112.0,
            "8660": 302797824.0,
            "8665": 303322112.0,
            "8670": 303322112.0,
            "8675": 302797824.0,
            "8680": 303846400.0,
            "8685": 303846400.0,
            "8690": 303322112.0,
            "8695": 302797824.0,
            "8700": 303322112.0,
            "8705": 303846400.0,
            "8710": 302797824.0,
            "8715": 302797824.0,
            "8720": 303846400.0,
            "8725": 303846400.0,
            "8730": 303846400.0,
            "8735": 303846400.0,
            "8740": 303846400.0,
            "8745": 303846400.0,
            "8750": 303322112.0,
            "8755": 303846400.0,
            "8760": 303846400.0,
            "8765": 303846400.0,
            "8770": 303846400.0,
            "8775": 303846400.0,
            "8780": 302797824.0,
            "8785": 303322112.0,
            "8790": 302797824.0,
            "8795": 303846400.0,
            "8800": 303322112.0,
            "8805": 303322112.0,
            "8810": 302797824.0,
            "8815": 303846400.0,
            "8820": 303846400.0,
            "8825": 303322112.0,
            "8830": 303846400.0,
            "8835": 303322112.0,
            "8840": 302797824.0,
            "8845": 303846400.0,
            "8850": 303846400.0,
            "8855": 303322112.0,
            "8860": 303322112.0,
            "8865": 303322112.0,
            "8870": 303322112.0,
            "8875": 303322112.0,
            "8880": 302797824.0,
            "8885": 302797824.0,
            "8890": 303322112.0,
            "8895": 303846400.0,
            "8900": 303846400.0,
            "8905": 303846400.0,
            "8910": 302797824.0,
            "8915": 303846400.0,
            "8920": 303846400.0,
            "8925": 302797824.0,
            "8930": 303846400.0,
            "8935": 303846400.0,
            "8940": 303846400.0,
            "8945": 302797824.0,
            "8950": 303846400.0,
            "8955": 303322112.0,
            "8960": 303846400.0,
            "8965": 303322112.0,
            "8970": 303846400.0,
            "8975": 303846400.0,
            "8980": 303322112.0,
            "8985": 303322112.0,
            "8990": 303846400.0,
            "8995": 303322112.0,
            "9000": 303322112.0,
            "9005": 303322112.0,
            "9010": 303846400.0,
            "9015": 303846400.0,
            "9020": 303846400.0,
            "9025": 302797824.0,
            "9030": 303846400.0,
            "9035": 302797824.0,
            "9040": 302797824.0,
            "9045": 303846400.0,
            "9050": 302797824.0,
            "9055": 302797824.0,
            "9060": 303846400.0,
            "9065": 302797824.0,
            "9070": 303846400.0,
            "9075": 303322112.0,
            "9080": 303846400.0,
            "9085": 302797824.0,
            "9090": 303846400.0,
            "9095": 303846400.0,
            "9100": 303846400.0,
            "9105": 303846400.0,
            "9110": 303322112.0,
            "9115": 303846400.0,
            "9120": 303846400.0,
            "9125": 303846400.0,
            "9130": 303846400.0,
            "9135": 303322112.0,
            "9140": 303846400.0,
            "9145": 303846400.0,
            "9150": 302797824.0,
            "9155": 303846400.0,
            "9160": 303322112.0,
            "9165": 303846400.0,
            "9170": 303322112.0,
            "9175": 303846400.0,
            "9180": 303846400.0,
            "9185": 302797824.0,
            "9190": 303322112.0,
            "9195": 303846400.0,
            "9200": 303846400.0,
            "9205": 302797824.0,
            "9210": 303322112.0,
            "9215": 303846400.0,
            "9220": 303322112.0,
            "9225": 302797824.0,
            "9230": 302797824.0,
            "9235": 303322112.0,
            "9240": 303322112.0,
            "9245": 303846400.0,
            "9250": 302797824.0,
            "9255": 303322112.0,
            "9260": 303322112.0,
            "9265": 302797824.0,
            "9270": 303846400.0,
            "9275": 303846400.0,
            "9280": 303322112.0,
            "9285": 303322112.0,
            "9290": 303322112.0,
            "9295": 303846400.0,
            "9300": 303322112.0,
            "9305": 303322112.0,
            "9310": 302797824.0,
            "9315": 303322112.0,
            "9320": 303322112.0,
            "9325": 302797824.0,
            "9330": 303846400.0,
            "9335": 302797824.0,
            "9340": 303322112.0,
            "9345": 302797824.0,
            "9350": 303846400.0,
            "9355": 303322112.0,
            "9360": 303846400.0,
            "9365": 302797824.0,
            "9370": 303322112.0,
            "9375": 303846400.0,
            "9380": 303846400.0,
            "9385": 303846400.0,
            "9390": 303322112.0,
            "9395": 303322112.0,
            "9400": 303846400.0,
            "9405": 302797824.0,
            "9410": 303322112.0,
            "9415": 302797824.0,
            "9420": 303846400.0,
            "9425": 303322112.0,
            "9430": 303846400.0,
            "9435": 303322112.0,
            "9440": 302797824.0,
            "9445": 303322112.0,
            "9450": 303846400.0,
            "9455": 303846400.0,
            "9460": 303322112.0,
            "9465": 303846400.0,
            "9470": 302797824.0,
            "9475": 303322112.0,
            "9480": 303322112.0,
            "9485": 302797824.0,
            "9490": 303846400.0,
            "9495": 303322112.0,
            "9500": 302797824.0,
            "9505": 303846400.0,
            "9510": 303322112.0,
            "9515": 303322112.0,
            "9520": 303322112.0,
            "9525": 302797824.0,
            "9530": 303846400.0,
            "9535": 303322112.0,
            "9540": 303322112.0,
            "9545": 303846400.0,
            "9550": 303846400.0,
            "9555": 303846400.0,
            "9560": 303846400.0,
            "9565": 303846400.0,
            "9570": 303846400.0,
            "9575": 303322112.0,
            "9580": 303846400.0,
            "9585": 303846400.0,
            "9590": 303846400.0,
            "9595": 303846400.0,
            "9600": 302797824.0,
            "9605": 303846400.0,
            "9610": 303846400.0,
            "9615": 303846400.0,
            "9620": 303322112.0,
            "9625": 302797824.0,
            "9630": 303322112.0,
            "9635": 303846400.0,
            "9640": 303846400.0,
            "9645": 303846400.0,
            "9650": 303322112.0,
            "9655": 303322112.0,
            "9660": 303322112.0,
            "9665": 302797824.0,
            "9670": 303846400.0,
            "9675": 303846400.0,
            "9680": 303846400.0,
            "9685": 303846400.0,
            "9690": 303846400.0,
            "9695": 302797824.0,
            "9700": 303846400.0,
            "9705": 303846400.0,
            "9710": 303846400.0,
            "9715": 302797824.0,
            "9720": 303322112.0,
            "9725": 303846400.0,
            "9730": 302797824.0,
            "9735": 303322112.0,
            "9740": 303846400.0,
            "9745": 303846400.0,
            "9750": 303846400.0,
            "9755": 303846400.0,
            "9760": 303846400.0,
            "9765": 303846400.0,
            "9770": 303846400.0,
            "9775": 302797824.0,
            "9780": 302797824.0,
            "9785": 303322112.0,
            "9790": 303846400.0,
            "9795": 303846400.0,
            "9800": 303846400.0,
            "9805": 303322112.0,
            "9810": 303846400.0,
            "9815": 302797824.0,
            "9820": 303846400.0,
            "9825": 303322112.0,
            "9830": 303322112.0,
            "9835": 303322112.0,
            "9840": 303322112.0,
            "9845": 303322112.0,
            "9850": 303846400.0,
            "9855": 303846400.0,
            "9860": 303846400.0,
            "9865": 303846400.0,
            "9870": 303846400.0,
            "9875": 302797824.0,
            "9880": 303846400.0,
            "9885": 303846400.0,
            "9890": 302797824.0,
            "9895": 303846400.0,
            "9900": 303322112.0,
            "9905": 303846400.0,
            "9910": 303322112.0,
            "9915": 303322112.0,
            "9920": 303846400.0,
            "9925": 303322112.0,
            "9930": 303846400.0,
            "9935": 303322112.0,
            "9940": 303846400.0,
            "9945": 303322112.0,
            "9950": 303846400.0,
            "9955": 303846400.0,
            "9960": 303322112.0,
            "9965": 303846400.0,
            "9970": 303846400.0,
            "9975": 303846400.0,
            "9980": 303846400.0,
            "9985": 303846400.0,
            "9990": 303846400.0,
            "9995": 303322112.0,
            "10000": 303322112.0,
            "10005": 303846400.0,
            "10010": 303846400.0,
            "10015": 303322112.0,
            "10020": 303322112.0,
            "10025": 302797824.0,
            "10030": 302797824.0,
            "10035": 303322112.0,
            "10040": 303846400.0,
            "10045": 302797824.0,
            "10050": 303322112.0,
            "10055": 303322112.0,
            "10060": 303846400.0,
            "10065": 303846400.0,
            "10070": 303846400.0,
            "10075": 303846400.0,
            "10080": 302797824.0,
            "10085": 303846400.0,
            "10090": 303322112.0,
            "10095": 303322112.0,
            "10100": 303846400.0,
            "10105": 302797824.0,
            "10110": 302797824.0,
            "10115": 303322112.0,
            "10120": 303322112.0,
            "10125": 302797824.0,
            "10130": 303322112.0,
            "10135": 302797824.0,
            "10140": 303846400.0,
            "10145": 303846400.0,
            "10150": 303846400.0,
            "10155": 302797824.0,
            "10160": 303322112.0,
            "10165": 303846400.0,
            "10170": 303846400.0,
            "10175": 302797824.0,
            "10180": 302797824.0,
            "10185": 302797824.0,
            "10190": 303322112.0,
            "10195": 303846400.0,
            "10200": 303846400.0,
            "10205": 303322112.0,
            "10210": 303322112.0,
            "10215": 303846400.0,
            "10220": 303322112.0,
            "10225": 303846400.0,
            "10230": 302797824.0,
            "10235": 303322112.0,
            "10240": 302797824.0,
            "10245": 302797824.0,
            "10250": 303322112.0,
            "10255": 303846400.0,
            "10260": 303322112.0,
            "10265": 302797824.0,
            "10270": 303846400.0,
            "10275": 303846400.0,
            "10280": 303322112.0,
            "10285": 302797824.0,
            "10290": 303846400.0,
            "10295": 302797824.0,
            "10300": 303322112.0,
            "10305": 303322112.0,
            "10310": 302797824.0,
            "10315": 303846400.0,
            "10320": 303846400.0,
            "10325": 303846400.0,
            "10330": 302797824.0,
            "10335": 302797824.0,
            "10340": 303322112.0,
            "10345": 303846400.0,
            "10350": 303846400.0,
            "10355": 303846400.0,
            "10360": 302797824.0,
            "10365": 302797824.0,
            "10370": 303322112.0,
            "10375": 303846400.0,
            "10380": 303846400.0,
            "10385": 303846400.0,
            "10390": 303322112.0,
            "10395": 303846400.0,
            "10400": 303322112.0,
            "10405": 303322112.0,
            "10410": 302797824.0,
            "10415": 303322112.0,
            "10420": 302797824.0,
            "10425": 303846400.0,
            "10430": 303846400.0,
            "10435": 303846400.0,
            "10440": 303322112.0,
            "10445": 303846400.0,
            "10450": 303846400.0,
            "10455": 303322112.0,
            "10460": 303846400.0,
            "10465": 303322112.0,
            "10470": 303846400.0,
            "10475": 303322112.0,
            "10480": 302797824.0,
            "10485": 302797824.0,
            "10490": 303322112.0,
            "10495": 303322112.0,
            "10500": 303322112.0,
            "10505": 303846400.0,
            "10510": 303846400.0,
            "10515": 303322112.0,
            "10520": 303322112.0,
            "10525": 302797824.0,
            "10530": 303846400.0,
            "10535": 303846400.0,
            "10540": 303322112.0,
            "10545": 303846400.0,
            "10550": 302797824.0,
            "10555": 303846400.0,
            "10560": 303322112.0,
            "10565": 303322112.0,
            "10570": 303846400.0,
            "10575": 303846400.0,
            "10580": 303846400.0,
            "10585": 303322112.0,
            "10590": 303322112.0,
            "10595": 303322112.0,
            "10600": 303846400.0,
            "10605": 302797824.0,
            "10610": 303846400.0,
            "10615": 302797824.0,
            "10620": 302797824.0,
            "10625": 303846400.0,
            "10630": 303322112.0,
            "10635": 303846400.0,
            "10640": 303322112.0,
            "10645": 303846400.0,
            "10650": 303846400.0,
            "10655": 303322112.0,
            "10660": 303322112.0,
            "10665": 303322112.0,
            "10670": 303846400.0,
            "10675": 303846400.0,
            "10680": 302797824.0,
            "10685": 303322112.0,
            "10690": 303322112.0,
            "10695": 303846400.0,
            "10700": 303846400.0,
            "10705": 303846400.0,
            "10710": 303846400.0,
            "10715": 303846400.0,
            "10720": 303846400.0,
            "10725": 303846400.0,
            "10730": 302797824.0,
            "10735": 303322112.0,
            "10740": 302797824.0,
            "10745": 303846400.0,
            "10750": 303322112.0,
            "10755": 302797824.0,
            "10760": 302797824.0,
            "10765": 303322112.0,
            "10770": 303846400.0,
            "10775": 302797824.0,
            "10780": 303322112.0,
            "10785": 303322112.0,
            "10790": 303322112.0,
            "10795": 302797824.0,
            "10800": 303322112.0,
            "10805": 303322112.0,
            "10810": 303322112.0,
            "10815": 303322112.0,
            "10820": 303322112.0,
            "10825": 303846400.0,
            "10830": 303322112.0,
            "10835": 303322112.0,
            "10840": 303846400.0,
            "10845": 302797824.0,
            "10850": 303322112.0,
            "10855": 303846400.0,
            "10860": 303322112.0,
            "10865": 303846400.0,
            "10870": 303846400.0,
            "10875": 303846400.0,
            "10880": 303846400.0,
            "10885": 303846400.0,
            "10890": 303846400.0,
            "10895": 303322112.0,
            "10900": 303322112.0,
            "10905": 303846400.0,
            "10910": 303846400.0,
            "10915": 303846400.0,
            "10920": 303322112.0,
            "10925": 303322112.0,
            "10930": 303322112.0,
            "10935": 303846400.0,
            "10940": 302797824.0,
            "10945": 303846400.0,
            "10950": 303846400.0,
            "10955": 303846400.0,
            "10960": 302797824.0,
            "10965": 303846400.0,
            "10970": 303322112.0,
            "10975": 303846400.0,
            "10980": 303846400.0,
            "10985": 303322112.0,
            "10990": 303846400.0,
            "10995": 303846400.0,
            "11000": 302797824.0,
            "11005": 302797824.0,
            "11010": 303322112.0,
            "11015": 303322112.0,
            "11020": 303322112.0,
            "11025": 302797824.0,
            "11030": 303846400.0,
            "11035": 303846400.0,
            "11040": 302797824.0,
            "11045": 302797824.0,
            "11050": 303846400.0,
            "11055": 303322112.0,
            "11060": 303322112.0,
            "11065": 303322112.0,
            "11070": 302797824.0,
            "11075": 302797824.0,
            "11080": 303846400.0,
            "11085": 303846400.0,
            "11090": 303322112.0,
            "11095": 302797824.0,
            "11100": 303846400.0,
            "11105": 302797824.0,
            "11110": 303322112.0,
            "11115": 303322112.0,
            "11120": 303322112.0,
            "11125": 303322112.0,
            "11130": 303322112.0,
            "11135": 303846400.0,
            "11140": 303846400.0,
            "11145": 303846400.0,
            "11150": 303322112.0,
            "11155": 303846400.0,
            "11160": 303846400.0,
            "11165": 303322112.0,
            "11170": 303322112.0,
            "11175": 303322112.0,
            "11180": 303322112.0,
            "11185": 303322112.0,
            "11190": 303846400.0,
            "11195": 303846400.0,
            "11200": 302797824.0,
            "11205": 303322112.0,
            "11210": 302797824.0,
            "11215": 303322112.0,
            "11220": 303322112.0,
            "11225": 302797824.0,
            "11230": 303322112.0,
            "11235": 302797824.0,
            "11240": 303322112.0,
            "11245": 303322112.0,
            "11250": 302797824.0,
            "11255": 303322112.0,
            "11260": 303846400.0,
            "11265": 303322112.0,
            "11270": 303322112.0,
            "11275": 303846400.0,
            "11280": 303322112.0,
            "11285": 302797824.0,
            "11290": 303322112.0,
            "11295": 302797824.0,
            "11300": 302797824.0,
            "11305": 303322112.0,
            "11310": 303846400.0,
            "11315": 303846400.0,
            "11320": 303846400.0,
            "11325": 303846400.0,
            "11330": 303846400.0,
            "11335": 303846400.0,
            "11340": 303846400.0,
            "11345": 303322112.0,
            "11350": 303846400.0,
            "11355": 303846400.0,
            "11360": 303846400.0,
            "11365": 302797824.0,
            "11370": 303846400.0,
            "11375": 303322112.0,
            "11380": 303322112.0,
            "11385": 303322112.0,
            "11390": 302797824.0,
            "11395": 303846400.0,
            "11400": 302797824.0,
            "11405": 303846400.0,
            "11410": 303846400.0,
            "11415": 302797824.0,
            "11420": 303322112.0,
            "11425": 302797824.0,
            "11430": 303322112.0,
            "11435": 303322112.0,
            "11440": 303322112.0,
            "11445": 303322112.0,
            "11450": 303322112.0,
            "11455": 303846400.0,
            "11460": 303846400.0,
            "11465": 303846400.0,
            "11470": 303846400.0,
            "11475": 302797824.0,
            "11480": 303846400.0,
            "11485": 303846400.0,
            "11490": 303846400.0,
            "11495": 303846400.0,
            "11500": 303322112.0,
            "11505": 303846400.0,
            "11510": 303846400.0,
            "11515": 303846400.0,
            "11520": 303846400.0,
            "11525": 303322112.0,
            "11530": 303846400.0,
            "11535": 303322112.0,
            "11540": 303322112.0,
            "11545": 303322112.0,
            "11550": 302797824.0,
            "11555": 302797824.0,
            "11560": 302797824.0,
            "11565": 302797824.0,
            "11570": 303322112.0,
            "11575": 302797824.0,
            "11580": 303322112.0,
            "11585": 303846400.0,
            "11590": 303322112.0,
            "11595": 303846400.0,
            "11600": 302797824.0,
            "11605": 303846400.0,
            "11610": 303846400.0,
            "11615": 303322112.0,
            "11620": 303322112.0,
            "11625": 303846400.0,
            "11630": 303322112.0,
            "11635": 302797824.0,
            "11640": 303322112.0,
            "11645": 303846400.0,
            "11650": 302797824.0,
            "11655": 302797824.0,
            "11660": 303322112.0,
            "11665": 303322112.0,
            "11670": 302797824.0,
            "11675": 303846400.0,
            "11680": 303846400.0,
            "11685": 303322112.0,
            "11690": 303846400.0,
            "11695": 302797824.0,
            "11700": 303846400.0,
            "11705": 302797824.0,
            "11710": 303322112.0,
            "11715": 303846400.0,
            "11720": 303846400.0,
            "11725": 302797824.0,
            "11730": 303846400.0,
            "11735": 303846400.0,
            "11740": 303846400.0,
            "11745": 302797824.0,
            "11750": 303846400.0,
            "11755": 302797824.0,
            "11760": 302797824.0,
            "11765": 303846400.0,
            "11770": 303322112.0,
            "11775": 303322112.0,
            "11780": 302797824.0,
            "11785": 303322112.0,
            "11790": 303322112.0,
            "11795": 303322112.0,
            "11800": 303846400.0,
            "11805": 303846400.0,
            "11810": 303846400.0,
            "11815": 303846400.0,
            "11820": 303322112.0,
            "11825": 302797824.0,
            "11830": 303846400.0,
            "11835": 302797824.0,
            "11840": 303846400.0,
            "11845": 303322112.0,
            "11850": 303322112.0,
            "11855": 303846400.0,
            "11860": 303322112.0,
            "11865": 303846400.0,
            "11870": 303322112.0,
            "11875": 303846400.0,
            "11880": 303846400.0,
            "11885": 302797824.0,
            "11890": 303846400.0,
            "11895": 302797824.0,
            "11900": 303322112.0,
            "11905": 302797824.0,
            "11910": 302797824.0,
            "11915": 303846400.0,
            "11920": 303846400.0,
            "11925": 302797824.0,
            "11930": 303846400.0,
            "11935": 303322112.0,
            "11940": 303846400.0,
            "11945": 302797824.0,
            "11950": 303846400.0,
            "11955": 302797824.0,
            "11960": 303846400.0,
            "11965": 303846400.0,
            "11970": 303846400.0,
            "11975": 303846400.0,
            "11980": 302797824.0,
            "11985": 303846400.0,
            "11990": 303322112.0,
            "11995": 303322112.0,
            "12000": 303846400.0,
            "12005": 303846400.0,
            "12010": 303846400.0,
            "12015": 303846400.0,
            "12020": 303846400.0,
            "12025": 303846400.0,
            "12030": 303322112.0,
            "12035": 303846400.0,
            "12040": 303846400.0,
            "12045": 302797824.0,
            "12050": 303322112.0,
            "12055": 303846400.0,
            "12060": 303846400.0,
            "12065": 302797824.0,
            "12070": 303846400.0,
            "12075": 303322112.0,
            "12080": 303322112.0,
            "12085": 303846400.0,
            "12090": 303846400.0,
            "12095": 303322112.0,
            "12100": 303846400.0,
            "12105": 303846400.0,
            "12110": 302797824.0,
            "12115": 303846400.0,
            "12120": 302797824.0,
            "12125": 303322112.0,
            "12130": 303846400.0,
            "12135": 303846400.0,
            "12140": 303322112.0,
            "12145": 303846400.0,
            "12150": 302797824.0,
            "12155": 303846400.0,
            "12160": 302797824.0,
            "12165": 303846400.0,
            "12170": 302797824.0,
            "12175": 303846400.0,
            "12180": 303846400.0,
            "12185": 302797824.0,
            "12190": 303846400.0,
            "12195": 302797824.0,
            "12200": 303322112.0,
            "12205": 303322112.0,
            "12210": 303322112.0,
            "12215": 303322112.0,
            "12220": 303322112.0,
            "12225": 303846400.0,
            "12230": 303322112.0,
            "12235": 303846400.0,
            "12240": 303322112.0,
            "12245": 302797824.0,
            "12250": 303846400.0,
            "12255": 302797824.0,
            "12260": 302797824.0,
            "12265": 303322112.0,
            "12270": 303846400.0,
            "12275": 302797824.0,
            "12280": 302797824.0,
            "12285": 303846400.0,
            "12290": 303846400.0,
            "12295": 303322112.0,
            "12300": 303846400.0,
            "12305": 303846400.0,
            "12310": 303846400.0,
            "12315": 303322112.0,
            "12320": 303322112.0,
            "12325": 303322112.0,
            "12330": 303846400.0,
            "12335": 303846400.0,
            "12340": 303846400.0,
            "12345": 303322112.0,
            "12350": 302797824.0,
            "12355": 302797824.0,
            "12360": 303846400.0,
            "12365": 303846400.0,
            "12370": 303846400.0,
            "12375": 303322112.0,
            "12380": 303846400.0,
            "12385": 303846400.0,
            "12390": 303846400.0,
            "12395": 302797824.0,
            "12400": 303846400.0,
            "12405": 303846400.0,
            "12410": 303846400.0,
            "12415": 303322112.0,
            "12420": 303846400.0,
            "12425": 302797824.0,
            "12430": 303322112.0,
            "12435": 303846400.0,
            "12440": 303846400.0,
            "12445": 302797824.0,
            "12450": 302797824.0,
            "12455": 303846400.0,
            "12460": 302797824.0,
            "12465": 303322112.0,
            "12470": 303846400.0,
            "12475": 303322112.0,
            "12480": 303322112.0,
            "12485": 303846400.0,
            "12490": 303322112.0,
            "12495": 303846400.0,
            "12500": 303322112.0,
            "12505": 303322112.0,
            "12510": 303322112.0,
            "12515": 303322112.0,
            "12520": 302797824.0,
            "12525": 303322112.0,
            "12530": 303322112.0,
            "12535": 303846400.0,
            "12540": 303322112.0,
            "12545": 302797824.0,
            "12550": 302797824.0,
            "12555": 303322112.0,
            "12560": 302797824.0,
            "12565": 303846400.0,
            "12570": 303846400.0,
            "12575": 303846400.0,
            "12580": 303322112.0,
            "12585": 303322112.0,
            "12590": 303322112.0,
            "12595": 303846400.0,
            "12600": 303322112.0,
            "12605": 303846400.0,
            "12610": 303322112.0,
            "12615": 303846400.0,
            "12620": 302797824.0,
            "12625": 303846400.0,
            "12630": 302797824.0,
            "12635": 303322112.0,
            "12640": 302797824.0,
            "12645": 303846400.0,
            "12650": 303322112.0,
            "12655": 302797824.0,
            "12660": 302797824.0,
            "12665": 303846400.0,
            "12670": 303322112.0,
            "12675": 303322112.0,
            "12680": 303846400.0,
            "12685": 303322112.0,
            "12690": 303322112.0,
            "12695": 303846400.0,
            "12700": 303846400.0,
            "12705": 302797824.0,
            "12710": 303322112.0,
            "12715": 302797824.0,
            "12720": 302797824.0,
            "12725": 303846400.0,
            "12730": 303322112.0,
            "12735": 303846400.0,
            "12740": 303846400.0,
            "12745": 303846400.0,
            "12750": 303846400.0,
            "12755": 303846400.0,
            "12760": 303846400.0,
            "12765": 303322112.0,
            "12770": 303322112.0,
            "12775": 302797824.0,
            "12780": 303846400.0,
            "12785": 302797824.0,
            "12790": 303322112.0,
            "12795": 303322112.0,
            "12800": 303846400.0,
            "12805": 303322112.0,
            "12810": 303846400.0,
            "12815": 303322112.0,
            "12820": 303846400.0,
            "12825": 303322112.0,
            "12830": 303322112.0,
            "12835": 302797824.0,
            "12840": 303322112.0,
            "12845": 303322112.0,
            "12850": 302797824.0,
            "12855": 303322112.0,
            "12860": 303846400.0,
            "12865": 303846400.0,
            "12870": 302797824.0,
            "12875": 303322112.0,
            "12880": 302797824.0,
            "12885": 303846400.0,
            "12890": 303322112.0,
            "12895": 302797824.0,
            "12900": 303846400.0,
            "12905": 303322112.0,
            "12910": 303846400.0,
            "12915": 303846400.0,
            "12920": 303322112.0,
            "12925": 303322112.0,
            "12930": 303322112.0,
            "12935": 303322112.0,
            "12940": 303846400.0,
            "12945": 303322112.0,
            "12950": 303322112.0,
            "12955": 302797824.0,
            "12960": 303322112.0,
            "12965": 302797824.0,
            "12970": 303322112.0,
            "12975": 303846400.0,
            "12980": 303322112.0,
            "12985": 303846400.0,
            "12990": 302797824.0,
            "12995": 302797824.0,
            "13000": 303846400.0,
            "13005": 303846400.0,
            "13010": 303846400.0,
            "13015": 302797824.0,
            "13020": 303322112.0,
            "13025": 302797824.0,
            "13030": 302797824.0,
            "13035": 303846400.0,
            "13040": 303322112.0,
            "13045": 303322112.0,
            "13050": 302797824.0,
            "13055": 303846400.0,
            "13060": 302797824.0,
            "13065": 302797824.0,
            "13070": 303846400.0,
            "13075": 303322112.0,
            "13080": 303846400.0,
            "13085": 303846400.0,
            "13090": 303846400.0,
            "13095": 303846400.0,
            "13100": 303846400.0,
            "13105": 303846400.0,
            "13110": 303846400.0,
            "13115": 303846400.0,
            "13120": 303322112.0,
            "13125": 303322112.0,
            "13130": 302797824.0,
            "13135": 303846400.0,
            "13140": 303846400.0,
            "13145": 303846400.0,
            "13150": 303322112.0,
            "13155": 303846400.0,
            "13160": 303322112.0,
            "13165": 302797824.0,
            "13170": 303322112.0,
            "13175": 302797824.0,
            "13180": 303322112.0,
            "13185": 302797824.0,
            "13190": 303322112.0,
            "13195": 302797824.0,
            "13200": 303846400.0,
            "13205": 302797824.0,
            "13210": 303322112.0,
            "13215": 303322112.0,
            "13220": 303846400.0,
            "13225": 303846400.0,
            "13230": 302797824.0,
            "13235": 303846400.0,
            "13240": 302797824.0,
            "13245": 303322112.0,
            "13250": 303846400.0,
            "13255": 303846400.0,
            "13260": 303322112.0,
            "13265": 303322112.0,
            "13270": 303322112.0,
            "13275": 302797824.0,
            "13280": 303846400.0,
            "13285": 303322112.0,
            "13290": 303322112.0,
            "13295": 303846400.0,
            "13300": 303846400.0,
            "13305": 303846400.0,
            "13310": 303846400.0,
            "13315": 303322112.0,
            "13320": 303846400.0,
            "13325": 302797824.0,
            "13330": 303846400.0,
            "13335": 303846400.0,
            "13340": 303846400.0,
            "13345": 303322112.0,
            "13350": 303846400.0,
            "13355": 303846400.0,
            "13360": 303846400.0,
            "13365": 303846400.0,
            "13370": 303322112.0,
            "13375": 303846400.0,
            "13380": 303322112.0,
            "13385": 302797824.0,
            "13390": 303846400.0,
            "13395": 302797824.0,
            "13400": 302797824.0,
            "13405": 303846400.0,
            "13410": 303846400.0,
            "13415": 303322112.0,
            "13420": 303322112.0,
            "13425": 302797824.0,
            "13430": 302797824.0,
            "13435": 303322112.0,
            "13440": 303322112.0,
            "13445": 303846400.0,
            "13450": 302797824.0,
            "13455": 303322112.0,
            "13460": 302797824.0,
            "13465": 303322112.0,
            "13470": 302797824.0,
            "13475": 303846400.0,
            "13480": 302797824.0,
            "13485": 303846400.0,
            "13490": 303846400.0,
            "13495": 302797824.0,
            "13500": 303322112.0,
            "13505": 303846400.0,
            "13510": 303322112.0,
            "13515": 302797824.0,
            "13520": 302797824.0,
            "13525": 303846400.0,
            "13530": 303322112.0,
            "13535": 303846400.0,
            "13540": 303322112.0,
            "13545": 302797824.0,
            "13550": 303846400.0,
            "13555": 303846400.0,
            "13560": 303846400.0,
            "13565": 303322112.0,
            "13570": 302797824.0,
            "13575": 303846400.0,
            "13580": 303846400.0,
            "13585": 303322112.0,
            "13590": 303322112.0,
            "13595": 303846400.0,
            "13600": 303322112.0,
            "13605": 303846400.0,
            "13610": 303846400.0,
            "13615": 303846400.0,
            "13620": 303846400.0,
            "13625": 303322112.0,
            "13630": 303846400.0,
            "13635": 303322112.0,
            "13640": 303322112.0,
            "13645": 302797824.0,
            "13650": 302797824.0,
            "13655": 303322112.0,
            "13660": 303846400.0,
            "13665": 303846400.0,
            "13670": 303846400.0,
            "13675": 303846400.0,
            "13680": 302797824.0,
            "13685": 303846400.0,
            "13690": 302797824.0,
            "13695": 303846400.0,
            "13700": 303846400.0,
            "13705": 303846400.0,
            "13710": 303322112.0,
            "13715": 303322112.0,
            "13720": 303846400.0,
            "13725": 303846400.0,
            "13730": 303322112.0,
            "13735": 303846400.0,
            "13740": 303846400.0,
            "13745": 303322112.0,
            "13750": 303322112.0,
            "13755": 302797824.0,
            "13760": 303322112.0,
            "13765": 303322112.0,
            "13770": 303846400.0,
            "13775": 302797824.0,
            "13780": 302797824.0,
            "13785": 302797824.0,
            "13790": 303322112.0,
            "13795": 303322112.0,
            "13800": 303846400.0,
            "13805": 302797824.0,
            "13810": 303846400.0,
            "13815": 303846400.0,
            "13820": 303846400.0,
            "13825": 303846400.0,
            "13830": 303322112.0,
            "13835": 302797824.0,
            "13840": 303322112.0,
            "13845": 303846400.0,
            "13850": 303846400.0,
            "13855": 303322112.0,
            "13860": 302797824.0,
            "13865": 302797824.0,
            "13870": 303846400.0,
            "13875": 303846400.0,
            "13880": 303846400.0,
            "13885": 303846400.0,
            "13890": 302797824.0,
            "13895": 303846400.0,
            "13900": 302797824.0,
            "13905": 303846400.0,
            "13910": 303846400.0,
            "13915": 303846400.0,
            "13920": 303846400.0,
            "13925": 303322112.0,
            "13930": 303846400.0,
            "13935": 302797824.0,
            "13940": 303322112.0,
            "13945": 303846400.0,
            "13950": 303846400.0,
            "13955": 303846400.0,
            "13960": 303322112.0,
            "13965": 303322112.0,
            "13970": 303846400.0,
            "13975": 302797824.0,
            "13980": 303846400.0,
            "13985": 303846400.0,
            "13990": 302797824.0,
            "13995": 302797824.0,
            "14000": 303846400.0,
            "14005": 303322112.0,
            "14010": 303846400.0,
            "14015": 303322112.0,
            "14020": 302797824.0,
            "14025": 302797824.0,
            "14030": 303846400.0,
            "14035": 302797824.0,
            "14040": 303322112.0,
            "14045": 303846400.0,
            "14050": 302797824.0,
            "14055": 303846400.0,
            "14060": 303846400.0,
            "14065": 303322112.0,
            "14070": 303322112.0,
            "14075": 303846400.0,
            "14080": 303846400.0,
            "14085": 302797824.0,
            "14090": 303846400.0,
            "14095": 302797824.0,
            "14100": 303322112.0,
            "14105": 303322112.0,
            "14110": 303322112.0,
            "14115": 303846400.0,
            "14120": 303322112.0,
            "14125": 302797824.0,
            "14130": 303846400.0,
            "14135": 303322112.0,
            "14140": 303846400.0,
            "14145": 303846400.0,
            "14150": 303846400.0,
            "14155": 302797824.0,
            "14160": 303846400.0,
            "14165": 303846400.0,
            "14170": 303846400.0,
            "14175": 303846400.0,
            "14180": 303322112.0,
            "14185": 303846400.0,
            "14190": 303846400.0,
            "14195": 302797824.0,
            "14200": 302797824.0,
            "14205": 303322112.0,
            "14210": 302797824.0,
            "14215": 303846400.0,
            "14220": 302797824.0,
            "14225": 302797824.0,
            "14230": 302797824.0,
            "14235": 303846400.0,
            "14240": 302797824.0,
            "14245": 303322112.0,
            "14250": 303322112.0,
            "14255": 303322112.0,
            "14260": 303846400.0,
            "14265": 303322112.0,
            "14270": 303846400.0,
            "14275": 303322112.0,
            "14280": 303322112.0,
            "14285": 303846400.0,
            "14290": 303846400.0,
            "14295": 302797824.0,
            "14300": 303846400.0,
            "14305": 303846400.0,
            "14310": 302797824.0,
            "14315": 303846400.0,
            "14320": 303846400.0,
            "14325": 303322112.0,
            "14330": 303846400.0,
            "14335": 303846400.0,
            "14340": 303322112.0,
            "14345": 303322112.0,
            "14350": 303846400.0,
            "14355": 303846400.0,
            "14360": 303846400.0,
            "14365": 303846400.0,
            "14370": 303322112.0,
            "14375": 303322112.0,
            "14380": 303846400.0,
            "14385": 303846400.0,
            "14390": 302797824.0,
            "14395": 303846400.0,
            "14400": 302797824.0,
            "14405": 302797824.0,
            "14410": 303322112.0,
            "14415": 303322112.0,
            "14420": 303846400.0,
            "14425": 302797824.0,
            "14430": 303322112.0,
            "14435": 303846400.0,
            "14440": 303322112.0,
            "14445": 303846400.0,
            "14450": 303846400.0,
            "14455": 303846400.0,
            "14460": 303322112.0,
            "14465": 303846400.0,
            "14470": 303322112.0,
            "14475": 303322112.0,
            "14480": 303322112.0,
            "14485": 302797824.0,
            "14490": 303846400.0,
            "14495": 303846400.0,
            "14500": 302797824.0,
            "14505": 303322112.0,
            "14510": 302797824.0,
            "14515": 302797824.0,
            "14520": 303846400.0,
            "14525": 303846400.0,
            "14530": 303322112.0,
            "14535": 303322112.0,
            "14540": 303322112.0,
            "14545": 303846400.0,
            "14550": 303846400.0,
            "14555": 303322112.0,
            "14560": 303846400.0,
            "14565": 303322112.0,
            "14570": 303322112.0,
            "14575": 303846400.0,
            "14580": 303846400.0,
            "14585": 302797824.0,
            "14590": 303846400.0,
            "14595": 303846400.0,
            "14600": 302797824.0,
            "14605": 302797824.0,
            "14610": 303322112.0,
            "14615": 302797824.0,
            "14620": 303846400.0,
            "14625": 303846400.0,
            "14630": 303846400.0,
            "14635": 303846400.0,
            "14640": 302797824.0,
            "14645": 302797824.0,
            "14650": 303322112.0,
            "14655": 302797824.0,
            "14660": 303846400.0,
            "14665": 302797824.0,
            "14670": 303846400.0,
            "14675": 302797824.0,
            "14680": 303322112.0,
            "14685": 303322112.0,
            "14690": 303322112.0,
            "14695": 303322112.0,
            "14700": 303846400.0,
            "14705": 303846400.0,
            "14710": 302797824.0,
            "14715": 303846400.0,
            "14720": 303322112.0,
            "14725": 303322112.0,
            "14730": 303846400.0,
            "14735": 302797824.0,
            "14740": 303322112.0,
            "14745": 303322112.0,
            "14750": 302797824.0,
            "14755": 303846400.0,
            "14760": 302797824.0,
            "14765": 303846400.0,
            "14770": 303322112.0,
            "14775": 302797824.0,
            "14780": 303846400.0,
            "14785": 303846400.0,
            "14790": 303846400.0,
            "14795": 303846400.0,
            "14800": 303846400.0,
            "14805": 302797824.0,
            "14810": 303846400.0,
            "14815": 303846400.0,
            "14820": 303846400.0,
            "14825": 302797824.0,
            "14830": 303322112.0,
            "14835": 303322112.0,
            "14840": 303846400.0,
            "14845": 303322112.0,
            "14850": 303846400.0,
            "14855": 303846400.0,
            "14860": 303846400.0,
            "14865": 303322112.0,
            "14870": 302797824.0,
            "14875": 302797824.0,
            "14880": 303846400.0,
            "14885": 303322112.0,
            "14890": 303846400.0,
            "14895": 302797824.0,
            "14900": 303846400.0,
            "14905": 303322112.0,
            "14910": 302797824.0,
            "14915": 303846400.0,
            "14920": 303846400.0,
            "14925": 303322112.0,
            "14930": 303322112.0,
            "14935": 303322112.0,
            "14940": 302797824.0,
            "14945": 302797824.0,
            "14950": 302797824.0,
            "14955": 302797824.0,
            "14960": 303846400.0,
            "14965": 303846400.0,
            "14970": 303846400.0,
            "14975": 302797824.0,
            "14980": 302797824.0,
            "14985": 302797824.0,
            "14990": 303846400.0,
            "14995": 303322112.0,
            "15000": 303322112.0,
            "15005": 303846400.0,
            "15010": 303846400.0,
            "15015": 303846400.0,
            "15020": 303322112.0,
            "15025": 303322112.0,
            "15030": 303846400.0,
            "15035": 303322112.0,
            "15040": 303846400.0,
            "15045": 303846400.0,
            "15050": 303322112.0,
            "15055": 303846400.0,
            "15060": 303846400.0,
            "15065": 303322112.0,
            "15070": 303322112.0,
            "15075": 303322112.0,
            "15080": 303846400.0,
            "15085": 303322112.0,
            "15090": 303846400.0,
            "15095": 303322112.0,
            "15100": 303322112.0,
            "15105": 303846400.0,
            "15110": 303846400.0,
            "15115": 303846400.0,
            "15120": 303846400.0,
            "15125": 303846400.0,
            "15130": 302797824.0,
            "15135": 303322112.0,
            "15140": 303322112.0,
            "15145": 302797824.0,
            "15150": 303846400.0,
            "15155": 303322112.0,
            "15160": 303322112.0,
            "15165": 303846400.0,
            "15170": 303846400.0,
            "15175": 303846400.0,
            "15180": 303846400.0,
            "15185": 303846400.0,
            "15190": 302797824.0,
            "15195": 303322112.0,
            "15200": 303846400.0,
            "15205": 303322112.0,
            "15210": 302797824.0,
            "15215": 303322112.0,
            "15220": 302797824.0,
            "15225": 303322112.0,
            "15230": 302797824.0,
            "15235": 303322112.0,
            "15240": 303322112.0,
            "15245": 303846400.0,
            "15250": 303846400.0,
            "15255": 303322112.0,
            "15260": 302797824.0,
            "15265": 303846400.0,
            "15270": 303846400.0,
            "15275": 302797824.0,
            "15280": 303322112.0,
            "15285": 303322112.0,
            "15290": 303322112.0,
            "15295": 303322112.0,
            "15300": 303846400.0,
            "15305": 303846400.0,
            "15310": 303846400.0,
            "15315": 302797824.0,
            "15320": 303322112.0,
            "15325": 303322112.0,
            "15330": 303846400.0,
            "15335": 303846400.0,
            "15340": 303322112.0,
            "15345": 302797824.0,
            "15350": 303846400.0,
            "15355": 303846400.0,
            "15360": 303846400.0,
            "15365": 303846400.0,
            "15370": 303322112.0,
            "15375": 303846400.0,
            "15380": 303846400.0,
            "15385": 302797824.0,
            "15390": 303322112.0,
            "15395": 303322112.0,
            "15400": 303322112.0,
            "15405": 303846400.0,
            "15410": 303846400.0,
            "15415": 303322112.0,
            "15420": 303846400.0,
            "15425": 302797824.0,
            "15430": 302797824.0,
            "15435": 303846400.0,
            "15440": 303846400.0,
            "15445": 303846400.0,
            "15450": 303322112.0,
            "15455": 303846400.0,
            "15460": 302797824.0,
            "15465": 303846400.0,
            "15470": 303846400.0,
            "15475": 303846400.0,
            "15480": 303322112.0,
            "15485": 302797824.0,
            "15490": 302797824.0,
            "15495": 303846400.0,
            "15500": 303322112.0,
            "15505": 302797824.0,
            "15510": 303846400.0,
            "15515": 303322112.0,
            "15520": 303846400.0,
            "15525": 303322112.0,
            "15530": 303322112.0,
            "15535": 303322112.0,
            "15540": 303846400.0,
            "15545": 302797824.0,
            "15550": 303322112.0,
            "15555": 303846400.0,
            "15560": 303322112.0,
            "15565": 303846400.0,
            "15570": 303846400.0,
            "15575": 302797824.0,
            "15580": 303846400.0,
            "15585": 303846400.0,
            "15590": 303846400.0,
            "15595": 303322112.0,
            "15600": 303322112.0,
            "15605": 303322112.0,
            "15610": 302797824.0,
            "15615": 303846400.0,
            "15620": 302797824.0,
            "15625": 302797824.0,
            "15630": 302797824.0,
            "15635": 303846400.0,
            "15640": 302797824.0,
            "15645": 303846400.0,
            "15650": 303322112.0,
            "15655": 303322112.0,
            "15660": 303322112.0,
            "15665": 303846400.0,
            "15670": 303846400.0,
            "15675": 303846400.0,
            "15680": 303846400.0,
            "15685": 303846400.0,
            "15690": 303846400.0,
            "15695": 303846400.0,
            "15700": 303846400.0,
            "15705": 303322112.0,
            "15710": 302797824.0,
            "15715": 303846400.0,
            "15720": 302797824.0,
            "15725": 303846400.0,
            "15730": 302797824.0,
            "15735": 303846400.0,
            "15740": 303322112.0,
            "15745": 303322112.0,
            "15750": 303846400.0,
            "15755": 303846400.0,
            "15760": 303846400.0,
            "15765": 303846400.0,
            "15770": 303322112.0,
            "15775": 302797824.0,
            "15780": 303846400.0,
            "15785": 303322112.0,
            "15790": 303846400.0,
            "15795": 303322112.0,
            "15800": 303846400.0,
            "15805": 303846400.0,
            "15810": 303846400.0,
            "15815": 303846400.0,
            "15820": 303322112.0,
            "15825": 303846400.0,
            "15830": 303846400.0,
            "15835": 303322112.0,
            "15840": 303846400.0,
            "15845": 303846400.0,
            "15850": 303846400.0,
            "15855": 303846400.0,
            "15860": 303322112.0,
            "15865": 303846400.0,
            "15870": 303846400.0,
            "15875": 303846400.0,
            "15880": 303846400.0,
            "15885": 302797824.0,
            "15890": 303846400.0,
            "15895": 303322112.0,
            "15900": 303322112.0,
            "15905": 303846400.0,
            "15910": 302797824.0,
            "15915": 303322112.0,
            "15920": 303846400.0,
            "15925": 303322112.0,
            "15930": 303846400.0,
            "15935": 303322112.0,
            "15940": 302797824.0,
            "15945": 303322112.0,
            "15950": 303846400.0,
            "15955": 303846400.0,
            "15960": 303846400.0,
            "15965": 302797824.0,
            "15970": 303322112.0,
            "15975": 302797824.0,
            "15980": 303322112.0,
            "15985": 303322112.0,
            "15990": 302797824.0,
            "15995": 303846400.0,
            "16000": 303846400.0,
            "16005": 303846400.0,
            "16010": 303846400.0,
            "16015": 303322112.0,
            "16020": 303846400.0,
            "16025": 303322112.0,
            "16030": 303322112.0,
            "16035": 303846400.0,
            "16040": 302797824.0,
            "16045": 303846400.0,
            "16050": 303846400.0,
            "16055": 303846400.0,
            "16060": 303846400.0,
            "16065": 303322112.0,
            "16070": 302797824.0,
            "16075": 303846400.0,
            "16080": 303322112.0,
            "16085": 303322112.0,
            "16090": 303322112.0,
            "16095": 303846400.0,
            "16100": 303322112.0,
            "16105": 303846400.0,
            "16110": 303846400.0,
            "16115": 303846400.0,
            "16120": 302797824.0,
            "16125": 303846400.0,
            "16130": 303322112.0,
            "16135": 303322112.0,
            "16140": 303322112.0,
            "16145": 302797824.0,
            "16150": 302797824.0,
            "16155": 303846400.0,
            "16160": 303322112.0,
            "16165": 303846400.0,
            "16170": 303846400.0,
            "16175": 303322112.0,
            "16180": 303846400.0,
            "16185": 303846400.0,
            "16190": 303846400.0,
            "16195": 302797824.0,
            "16200": 303322112.0,
            "16205": 302797824.0,
            "16210": 303846400.0,
            "16215": 302797824.0,
            "16220": 302797824.0,
            "16225": 303322112.0,
            "16230": 303846400.0,
            "16235": 303846400.0,
            "16240": 303846400.0,
            "16245": 303322112.0,
            "16250": 303846400.0,
            "16255": 303846400.0,
            "16260": 302797824.0,
            "16265": 303322112.0,
            "16270": 303322112.0,
            "16275": 303846400.0,
            "16280": 303846400.0,
            "16285": 302797824.0,
            "16290": 303846400.0,
            "16295": 303322112.0,
            "16300": 303322112.0,
            "16305": 303322112.0,
            "16310": 303846400.0,
            "16315": 303846400.0,
            "16320": 303322112.0,
            "16325": 302797824.0,
            "16330": 302797824.0,
            "16335": 303322112.0,
            "16340": 303322112.0,
            "16345": 303846400.0,
            "16350": 303846400.0,
            "16355": 302797824.0,
            "16360": 302797824.0,
            "16365": 302797824.0,
            "16370": 303846400.0,
            "16375": 303322112.0,
            "16380": 302797824.0,
            "16385": 303846400.0,
            "16390": 302797824.0,
            "16395": 303322112.0,
            "16400": 303846400.0,
            "16405": 303322112.0,
            "16410": 303846400.0,
            "16415": 303846400.0,
            "16420": 302797824.0,
            "16425": 303322112.0,
            "16430": 303846400.0,
            "16435": 303322112.0,
            "16440": 303322112.0,
            "16445": 303846400.0,
            "16450": 303322112.0,
            "16455": 303846400.0,
            "16460": 303322112.0,
            "16465": 302797824.0,
            "16470": 303846400.0,
            "16475": 303846400.0,
            "16480": 303846400.0,
            "16485": 303322112.0,
            "16490": 303322112.0,
            "16495": 302797824.0,
            "16500": 303322112.0,
            "16505": 303846400.0,
            "16510": 303846400.0,
            "16515": 303322112.0,
            "16520": 302797824.0,
            "16525": 303846400.0,
            "16530": 303846400.0,
            "16535": 303846400.0,
            "16540": 303846400.0,
            "16545": 303846400.0,
            "16550": 302797824.0,
            "16555": 303846400.0,
            "16560": 302797824.0,
            "16565": 302797824.0,
            "16570": 303846400.0,
            "16575": 303846400.0,
            "16580": 303846400.0,
            "16585": 303846400.0,
            "16590": 302797824.0,
            "16595": 303322112.0,
            "16600": 303846400.0,
            "16605": 303322112.0,
            "16610": 303322112.0,
            "16615": 303322112.0,
            "16620": 303846400.0,
            "16625": 303322112.0,
            "16630": 303846400.0,
            "16635": 303322112.0,
            "16640": 303322112.0,
            "16645": 302797824.0,
            "16650": 303322112.0,
            "16655": 303846400.0,
            "16660": 302797824.0,
            "16665": 303322112.0,
            "16670": 303846400.0,
            "16675": 302797824.0,
            "16680": 302797824.0,
            "16685": 303846400.0,
            "16690": 303322112.0,
            "16695": 302797824.0,
            "16700": 303846400.0,
            "16705": 303322112.0,
            "16710": 303846400.0,
            "16715": 303322112.0,
            "16720": 303322112.0,
            "16725": 303322112.0,
            "16730": 303322112.0,
            "16735": 303846400.0,
            "16740": 303846400.0,
            "16745": 303846400.0,
            "16750": 303322112.0,
            "16755": 303322112.0,
            "16760": 303846400.0,
            "16765": 302797824.0,
            "16770": 303846400.0,
            "16775": 303322112.0,
            "16780": 303846400.0,
            "16785": 303846400.0,
            "16790": 303322112.0,
            "16795": 303846400.0,
            "16800": 302797824.0,
            "16805": 303846400.0,
            "16810": 303846400.0,
            "16815": 303846400.0,
            "16820": 303846400.0,
            "16825": 303322112.0,
            "16830": 303846400.0,
            "16835": 303322112.0,
            "16840": 303846400.0,
            "16845": 303322112.0,
            "16850": 302797824.0,
            "16855": 303846400.0,
            "16860": 303322112.0,
            "16865": 302797824.0,
            "16870": 303846400.0,
            "16875": 303846400.0,
            "16880": 303322112.0,
            "16885": 303846400.0,
            "16890": 303322112.0,
            "16895": 303846400.0,
            "16900": 303322112.0,
            "16905": 303846400.0,
            "16910": 303846400.0,
            "16915": 303322112.0,
            "16920": 303322112.0,
            "16925": 303322112.0,
            "16930": 303846400.0,
            "16935": 302797824.0,
            "16940": 303846400.0,
            "16945": 303846400.0,
            "16950": 303322112.0,
            "16955": 303322112.0,
            "16960": 302797824.0,
            "16965": 303322112.0,
            "16970": 303846400.0,
            "16975": 303846400.0,
            "16980": 302797824.0,
            "16985": 303846400.0,
            "16990": 303846400.0,
            "16995": 303322112.0,
            "17000": 303322112.0,
            "17005": 302797824.0,
            "17010": 302797824.0,
            "17015": 303846400.0,
            "17020": 302797824.0,
            "17025": 303846400.0,
            "17030": 303846400.0,
            "17035": 303846400.0,
            "17040": 302797824.0,
            "17045": 303846400.0,
            "17050": 303846400.0,
            "17055": 303322112.0,
            "17060": 302797824.0,
            "17065": 303322112.0,
            "17070": 303322112.0,
            "17075": 303322112.0,
            "17080": 303846400.0,
            "17085": 303846400.0,
            "17090": 303846400.0,
            "17095": 303846400.0,
            "17100": 303322112.0,
            "17105": 303322112.0,
            "17110": 303846400.0,
            "17115": 303846400.0,
            "17120": 303322112.0,
            "17125": 303846400.0,
            "17130": 303846400.0,
            "17135": 302797824.0,
            "17140": 302797824.0,
            "17145": 303322112.0,
            "17150": 303846400.0,
            "17155": 303322112.0,
            "17160": 303322112.0,
            "17165": 303846400.0,
            "17170": 303322112.0,
            "17175": 303322112.0,
            "17180": 303846400.0,
            "17185": 303846400.0,
            "17190": 302797824.0,
            "17195": 303846400.0,
            "17200": 303322112.0,
            "17205": 303322112.0,
            "17210": 303322112.0,
            "17215": 303322112.0,
            "17220": 303846400.0,
            "17225": 303846400.0,
            "17230": 303322112.0,
            "17235": 302797824.0,
            "17240": 302797824.0,
            "17245": 303322112.0,
            "17250": 303322112.0,
            "17255": 303846400.0,
            "17260": 303322112.0,
            "17265": 303322112.0,
            "17270": 302797824.0,
            "17275": 303322112.0,
            "17280": 303322112.0,
            "17285": 302797824.0,
            "17290": 302797824.0,
            "17295": 303322112.0,
            "17300": 303322112.0,
            "17305": 303846400.0,
            "17310": 303846400.0,
            "17315": 303322112.0,
            "17320": 303846400.0,
            "17325": 303322112.0,
            "17330": 303322112.0,
            "17335": 302797824.0,
            "17340": 303322112.0,
            "17345": 303846400.0,
            "17350": 303846400.0,
            "17355": 303322112.0,
            "17360": 303846400.0,
            "17365": 303846400.0,
            "17370": 302797824.0,
            "17375": 303846400.0,
            "17380": 303322112.0,
            "17385": 303846400.0,
            "17390": 303322112.0,
            "17395": 302797824.0,
            "17400": 302797824.0,
            "17405": 303322112.0,
            "17410": 303322112.0,
            "17415": 303846400.0,
            "17420": 303846400.0,
            "17425": 303846400.0,
            "17430": 303846400.0,
            "17435": 303322112.0,
            "17440": 303846400.0,
            "17445": 303846400.0,
            "17450": 302797824.0,
            "17455": 303322112.0,
            "17460": 302797824.0,
            "17465": 302797824.0,
            "17470": 303846400.0,
            "17475": 302797824.0,
            "17480": 303846400.0,
            "17485": 303322112.0,
            "17490": 303846400.0,
            "17495": 303322112.0,
            "17500": 303846400.0,
            "17505": 303846400.0,
            "17510": 303846400.0,
            "17515": 303846400.0,
            "17520": 303846400.0,
            "17525": 303846400.0,
            "17530": 303846400.0,
            "17535": 303846400.0,
            "17540": 302797824.0,
            "17545": 303322112.0,
            "17550": 302797824.0,
            "17555": 303322112.0,
            "17560": 303322112.0,
            "17565": 303322112.0,
            "17570": 303322112.0,
            "17575": 303846400.0,
            "17580": 303322112.0,
            "17585": 302797824.0,
            "17590": 303846400.0,
            "17595": 302797824.0,
            "17600": 303322112.0,
            "17605": 302797824.0,
            "17610": 303846400.0,
            "17615": 303846400.0,
            "17620": 303322112.0,
            "17625": 302797824.0,
            "17630": 303846400.0,
            "17635": 303846400.0,
            "17640": 303846400.0,
            "17645": 303846400.0,
            "17650": 303322112.0,
            "17655": 303322112.0,
            "17660": 303322112.0,
            "17665": 302797824.0,
            "17670": 303846400.0,
            "17675": 303322112.0,
            "17680": 303846400.0,
            "17685": 303846400.0,
            "17690": 302797824.0,
            "17695": 303846400.0,
            "17700": 303846400.0,
            "17705": 303322112.0,
            "17710": 303846400.0,
            "17715": 303322112.0,
            "17720": 302797824.0,
            "17725": 303846400.0,
            "17730": 303846400.0,
            "17735": 303322112.0,
            "17740": 303846400.0,
            "17745": 302797824.0,
            "17750": 302797824.0,
            "17755": 303322112.0,
            "17760": 303322112.0,
            "17765": 303322112.0,
            "17770": 303322112.0,
            "17775": 303322112.0,
            "17780": 303846400.0,
            "17785": 303846400.0,
            "17790": 303846400.0,
            "17795": 303846400.0,
            "17800": 302797824.0,
            "17805": 303322112.0,
            "17810": 302797824.0,
            "17815": 303846400.0,
            "17820": 303846400.0,
            "17825": 303846400.0,
            "17830": 303846400.0,
            "17835": 303322112.0,
            "17840": 303322112.0,
            "17845": 302797824.0,
            "17850": 303846400.0,
            "17855": 302797824.0,
            "17860": 303846400.0,
            "17865": 303846400.0,
            "17870": 303322112.0,
            "17875": 302797824.0,
            "17880": 303846400.0,
            "17885": 302797824.0,
            "17890": 303322112.0,
            "17895": 303322112.0,
            "17900": 303846400.0,
            "17905": 303846400.0,
            "17910": 303322112.0,
            "17915": 303846400.0,
            "17920": 303846400.0,
            "17925": 303846400.0,
            "17930": 303846400.0,
            "17935": 302797824.0,
            "17940": 303846400.0,
            "17945": 303846400.0,
            "17950": 303322112.0,
            "17955": 303322112.0,
            "17960": 302797824.0,
            "17965": 302797824.0,
            "17970": 303322112.0,
            "17975": 303846400.0,
            "17980": 303846400.0,
            "17985": 303322112.0,
            "17990": 302797824.0,
            "17995": 303322112.0,
            "18000": 302797824.0,
            "18005": 302797824.0,
            "18010": 303846400.0,
            "18015": 303846400.0,
            "18020": 302797824.0,
            "18025": 303846400.0,
            "18030": 303846400.0,
            "18035": 303322112.0,
            "18040": 302797824.0,
            "18045": 302797824.0,
            "18050": 302797824.0,
            "18055": 302797824.0,
            "18060": 303846400.0,
            "18065": 302797824.0,
            "18070": 303322112.0,
            "18075": 303846400.0,
            "18080": 303846400.0,
            "18085": 303846400.0,
            "18090": 303846400.0,
            "18095": 303322112.0,
            "18100": 303322112.0,
            "18105": 303322112.0,
            "18110": 303846400.0,
            "18115": 303846400.0,
            "18120": 303846400.0,
            "18125": 303846400.0,
            "18130": 302797824.0,
            "18135": 303846400.0,
            "18140": 302797824.0,
            "18145": 303846400.0,
            "18150": 303846400.0,
            "18155": 302797824.0,
            "18160": 302797824.0,
            "18165": 302797824.0,
            "18170": 303846400.0,
            "18175": 303322112.0,
            "18180": 302797824.0,
            "18185": 303322112.0,
            "18190": 303322112.0,
            "18195": 303846400.0,
            "18200": 302797824.0,
            "18205": 303846400.0,
            "18210": 303846400.0,
            "18215": 303322112.0,
            "18220": 303322112.0,
            "18225": 303846400.0,
            "18230": 303322112.0,
            "18235": 303322112.0,
            "18240": 303322112.0,
            "18245": 303846400.0,
            "18250": 302797824.0,
            "18255": 302797824.0,
            "18260": 303322112.0,
            "18265": 303846400.0,
            "18270": 303846400.0,
            "18275": 303322112.0,
            "18280": 303322112.0,
            "18285": 303322112.0,
            "18290": 303322112.0,
            "18295": 303846400.0,
            "18300": 303846400.0,
            "18305": 303846400.0,
            "18310": 302797824.0,
            "18315": 303846400.0,
            "18320": 302797824.0,
            "18325": 303846400.0,
            "18330": 303846400.0,
            "18335": 302797824.0,
            "18340": 302797824.0,
            "18345": 302797824.0,
            "18350": 303322112.0,
            "18355": 303322112.0,
            "18360": 302797824.0,
            "18365": 303846400.0,
            "18370": 303322112.0,
            "18375": 303846400.0,
            "18380": 302797824.0,
            "18385": 303846400.0,
            "18390": 303322112.0,
            "18395": 302797824.0,
            "18400": 302797824.0,
            "18405": 303322112.0,
            "18410": 303322112.0,
            "18415": 303322112.0,
            "18420": 302797824.0,
            "18425": 302797824.0,
            "18430": 303322112.0,
            "18435": 302797824.0,
            "18440": 303322112.0,
            "18445": 303846400.0,
            "18450": 302797824.0,
            "18455": 303846400.0,
            "18460": 302797824.0,
            "18465": 302797824.0,
            "18470": 303322112.0,
            "18475": 303846400.0,
            "18480": 303846400.0,
            "18485": 303846400.0,
            "18490": 302797824.0,
            "18495": 303846400.0,
            "18500": 303846400.0,
            "18505": 303846400.0,
            "18510": 303846400.0,
            "18515": 303846400.0,
            "18520": 303846400.0,
            "18525": 302797824.0,
            "18530": 303846400.0,
            "18535": 303846400.0,
            "18540": 303846400.0,
            "18545": 303846400.0,
            "18550": 303846400.0,
            "18555": 303846400.0,
            "18560": 303846400.0,
            "18565": 303846400.0,
            "18570": 303322112.0,
            "18575": 302797824.0,
            "18580": 303846400.0,
            "18585": 303322112.0,
            "18590": 303322112.0,
            "18595": 303846400.0,
            "18600": 303846400.0,
            "18605": 303846400.0,
            "18610": 303846400.0,
            "18615": 303322112.0,
            "18620": 302797824.0,
            "18625": 303322112.0,
            "18630": 303322112.0,
            "18635": 302797824.0,
            "18640": 302797824.0,
            "18645": 303322112.0,
            "18650": 303846400.0,
            "18655": 303846400.0,
            "18660": 303846400.0,
            "18665": 303846400.0,
            "18670": 303846400.0,
            "18675": 303846400.0,
            "18680": 302797824.0,
            "18685": 303846400.0,
            "18690": 302797824.0,
            "18695": 302797824.0,
            "18700": 303846400.0,
            "18705": 303846400.0,
            "18710": 303846400.0,
            "18715": 303846400.0,
            "18720": 301749248.0,
            "18725": 301749248.0,
            "18730": 302273536.0,
            "18735": 302273536.0,
            "18740": 301749248.0,
            "18745": 302273536.0,
            "18750": 302273536.0,
            "18755": 302273536.0,
            "18760": 301749248.0,
            "18765": 302273536.0,
            "18770": 302273536.0,
            "18775": 301749248.0,
            "18780": 302273536.0,
            "18785": 302273536.0,
            "18790": 302273536.0,
            "18795": 301749248.0,
            "18800": 302273536.0,
            "18805": 301749248.0,
            "18810": 301749248.0,
            "18815": 301749248.0,
            "18820": 301749248.0,
            "18825": 301749248.0,
            "18830": 302273536.0,
            "18835": 302273536.0,
            "18840": 302273536.0,
            "18845": 302273536.0,
            "18850": 301749248.0,
            "18855": 302273536.0,
            "18860": 301749248.0,
            "18865": 301749248.0,
            "18870": 301749248.0,
            "18875": 301749248.0,
            "18880": 302273536.0,
            "18885": 302273536.0,
            "18890": 302273536.0,
            "18895": 302273536.0,
            "18900": 301749248.0,
            "18905": 301749248.0,
            "18910": 301749248.0,
            "18915": 301749248.0,
            "18920": 301749248.0,
            "18925": 301749248.0,
            "18930": 302273536.0,
            "18935": 302273536.0,
            "18940": 302273536.0,
            "18945": 302273536.0,
            "18950": 301749248.0,
            "18955": 301749248.0,
            "18960": 301749248.0,
            "18965": 301749248.0,
            "18970": 302273536.0,
            "18975": 302273536.0,
            "18980": 301749248.0,
            "18985": 302273536.0,
            "18990": 302273536.0,
            "18995": 301749248.0,
            "19000": 302273536.0,
            "19005": 301749248.0,
            "19010": 301749248.0,
            "19015": 302273536.0,
            "19020": 301749248.0,
            "19025": 302273536.0,
            "19030": 301749248.0,
            "19035": 301749248.0,
            "19040": 302273536.0,
            "19045": 301749248.0,
            "19050": 302273536.0,
            "19055": 301749248.0,
            "19060": 301749248.0,
            "19065": 301749248.0,
            "19070": 301749248.0,
            "19075": 302273536.0,
            "19080": 302273536.0,
            "19085": 301749248.0,
            "19090": 301749248.0,
            "19095": 302273536.0,
            "19100": 301749248.0,
            "19105": 301749248.0,
            "19110": 302273536.0,
            "19115": 301749248.0,
            "19120": 302273536.0,
            "19125": 301749248.0,
            "19130": 301749248.0,
            "19135": 301749248.0,
            "19140": 302273536.0,
            "19145": 302273536.0,
            "19150": 301749248.0,
            "19155": 302273536.0,
            "19160": 301749248.0,
            "19165": 302273536.0,
            "19170": 301749248.0,
            "19175": 301749248.0,
            "19180": 301749248.0,
            "19185": 301749248.0,
            "19190": 302273536.0,
            "19195": 302273536.0,
            "19200": 301749248.0,
            "19205": 302273536.0,
            "19210": 301749248.0,
            "19215": 302273536.0,
            "19220": 301749248.0,
            "19225": 301749248.0,
            "19230": 301749248.0,
            "19235": 302273536.0,
            "19240": 301749248.0,
            "19245": 302273536.0,
            "19250": 302273536.0,
            "19255": 301749248.0,
            "19260": 301749248.0,
            "19265": 302273536.0,
            "19270": 302273536.0,
            "19275": 301749248.0,
            "19280": 301749248.0,
            "19285": 301749248.0,
            "19290": 301749248.0,
            "19295": 301749248.0,
            "19300": 302273536.0,
            "19305": 301749248.0,
            "19310": 301749248.0,
            "19315": 301749248.0,
            "19320": 301749248.0,
            "19325": 301749248.0,
            "19330": 302273536.0,
            "19335": 301749248.0,
            "19340": 301749248.0,
            "19345": 301749248.0,
            "19350": 302273536.0,
            "19355": 301749248.0,
            "19360": 301749248.0,
            "19365": 302273536.0,
            "19370": 301749248.0,
            "19375": 301749248.0,
            "19380": 301749248.0,
            "19385": 302273536.0,
            "19390": 301749248.0,
            "19395": 301749248.0,
            "19400": 301749248.0,
            "19405": 302273536.0,
            "19410": 301749248.0,
            "19415": 302273536.0,
            "19420": 301749248.0,
            "19425": 301749248.0,
            "19430": 302273536.0,
            "19435": 301749248.0,
            "19440": 301749248.0,
            "19445": 301749248.0,
            "19450": 302273536.0,
            "19455": 301749248.0,
            "19460": 302273536.0,
            "19465": 302273536.0,
            "19470": 301749248.0,
            "19475": 301749248.0,
            "19480": 302273536.0,
            "19485": 301749248.0,
            "19490": 302273536.0,
            "19495": 302273536.0,
            "19500": 302273536.0,
            "19505": 301749248.0,
            "19510": 302273536.0,
            "19515": 301749248.0,
            "19520": 302273536.0,
            "19525": 301749248.0,
            "19530": 302273536.0,
            "19535": 302273536.0,
            "19540": 302273536.0,
            "19545": 302273536.0,
            "19550": 302273536.0,
            "19555": 301749248.0,
            "19560": 301749248.0,
            "19565": 302273536.0,
            "19570": 301749248.0,
            "19575": 301749248.0,
            "19580": 302273536.0,
            "19585": 302273536.0,
            "19590": 302273536.0,
            "19595": 301749248.0,
            "19600": 301749248.0,
            "19605": 302273536.0,
            "19610": 301749248.0,
            "19615": 302273536.0,
            "19620": 301749248.0,
            "19625": 302273536.0,
            "19630": 301749248.0,
            "19635": 301749248.0,
            "19640": 301749248.0,
            "19645": 302273536.0,
            "19650": 301749248.0,
            "19655": 301749248.0,
            "19660": 301749248.0,
            "19665": 301749248.0,
            "19670": 301749248.0,
            "19675": 302273536.0,
            "19680": 301749248.0,
            "19685": 301749248.0,
            "19690": 301749248.0,
            "19695": 301749248.0,
            "19700": 301749248.0,
            "19705": 302273536.0,
            "19710": 302273536.0,
            "19715": 302273536.0,
            "19720": 302273536.0,
            "19725": 302273536.0,
            "19730": 301749248.0,
            "19735": 302273536.0,
            "19740": 301749248.0,
            "19745": 301749248.0,
            "19750": 301749248.0,
            "19755": 302273536.0,
            "19760": 301749248.0,
            "19765": 301749248.0,
            "19770": 301749248.0,
            "19775": 302273536.0,
            "19780": 302273536.0,
            "19785": 301749248.0,
            "19790": 302273536.0,
            "19795": 301749248.0,
            "19800": 301749248.0,
            "19805": 301749248.0,
            "19810": 302273536.0,
            "19815": 302273536.0,
            "19820": 301749248.0,
            "19825": 302273536.0,
            "19830": 302273536.0,
            "19835": 301749248.0,
            "19840": 301749248.0,
            "19845": 301749248.0,
            "19850": 301749248.0,
            "19855": 302273536.0,
            "19860": 301749248.0,
            "19865": 301749248.0,
            "19870": 302273536.0,
            "19875": 302273536.0,
            "19880": 301749248.0,
            "19885": 302273536.0,
            "19890": 302273536.0,
            "19895": 301749248.0,
            "19900": 301749248.0,
            "19905": 302273536.0,
            "19910": 302273536.0,
            "19915": 302273536.0,
            "19920": 301749248.0,
            "19925": 302273536.0,
            "19930": 302273536.0,
            "19935": 302273536.0,
            "19940": 301749248.0,
            "19945": 302273536.0,
            "19950": 302273536.0,
            "19955": 301749248.0,
            "19960": 302273536.0,
            "19965": 301749248.0,
            "19970": 302273536.0,
            "19975": 302273536.0,
            "19980": 302273536.0,
            "19985": 301749248.0,
            "19990": 302273536.0,
            "19995": 301749248.0,
            "20000": 301749248.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 20000,
        "step_interval": 5,
        "values": {
            "1": 408750592.0,
            "5": 408751104.0,
            "10": 408751104.0,
            "15": 408751104.0,
            "20": 487634432.0,
            "25": 487634432.0,
            "30": 487634432.0,
            "35": 487634432.0,
            "40": 487634432.0,
            "45": 487634432.0,
            "50": 487634432.0,
            "55": 487634432.0,
            "60": 487634432.0,
            "65": 487634432.0,
            "70": 487634432.0,
            "75": 487634432.0,
            "80": 487634432.0,
            "85": 487634432.0,
            "90": 487634432.0,
            "95": 487634432.0,
            "100": 487634432.0,
            "105": 487634432.0,
            "110": 487634432.0,
            "115": 487634432.0,
            "120": 487634432.0,
            "125": 487634432.0,
            "130": 487634432.0,
            "135": 487634432.0,
            "140": 487634432.0,
            "145": 487634432.0,
            "150": 487634432.0,
            "155": 487634432.0,
            "160": 487634432.0,
            "165": 487634432.0,
            "170": 487634432.0,
            "175": 487634432.0,
            "180": 487634432.0,
            "185": 487634432.0,
            "190": 487634432.0,
            "195": 487634432.0,
            "200": 487634432.0,
            "205": 487634432.0,
            "210": 487634432.0,
            "215": 487634432.0,
            "220": 487634432.0,
            "225": 487634432.0,
            "230": 487634432.0,
            "235": 487634432.0,
            "240": 487634432.0,
            "245": 487634432.0,
            "250": 487634432.0,
            "255": 487634432.0,
            "260": 487634432.0,
            "265": 487634432.0,
            "270": 487634432.0,
            "275": 487634432.0,
            "280": 487634432.0,
            "285": 487634432.0,
            "290": 487634432.0,
            "295": 487634432.0,
            "300": 487634432.0,
            "305": 487634432.0,
            "310": 487634432.0,
            "315": 487634432.0,
            "320": 487634432.0,
            "325": 487634432.0,
            "330": 487634432.0,
            "335": 487634432.0,
            "340": 487634432.0,
            "345": 487634432.0,
            "350": 487634432.0,
            "355": 487634432.0,
            "360": 487634432.0,
            "365": 487634432.0,
            "370": 487634432.0,
            "375": 487634432.0,
            "380": 487634432.0,
            "385": 487634432.0,
            "390": 487634432.0,
            "395": 487634432.0,
            "400": 487634432.0,
            "405": 487634432.0,
            "410": 487634432.0,
            "415": 487634432.0,
            "420": 487634432.0,
            "425": 487634432.0,
            "430": 487634432.0,
            "435": 487634432.0,
            "440": 487634432.0,
            "445": 487634432.0,
            "450": 487634432.0,
            "455": 487634432.0,
            "460": 487634432.0,
            "465": 487634432.0,
            "470": 487634432.0,
            "475": 487634432.0,
            "480": 487634432.0,
            "485": 487634432.0,
            "490": 487634432.0,
            "495": 487634432.0,
            "500": 487634432.0,
            "505": 487634432.0,
            "510": 487634432.0,
            "515": 487634432.0,
            "520": 487634432.0,
            "525": 487634432.0,
            "530": 487634432.0,
            "535": 487634432.0,
            "540": 487634432.0,
            "545": 487634432.0,
            "550": 487634432.0,
            "555": 487634432.0,
            "560": 487634432.0,
            "565": 487634432.0,
            "570": 487634432.0,
            "575": 487634432.0,
            "580": 487634432.0,
            "585": 487634432.0,
            "590": 487634432.0,
            "595": 487634432.0,
            "600": 487634432.0,
            "605": 487634432.0,
            "610": 487634432.0,
            "615": 487634432.0,
            "620": 487634432.0,
            "625": 487634432.0,
            "630": 487634432.0,
            "635": 487634432.0,
            "640": 487634432.0,
            "645": 487634432.0,
            "650": 487634432.0,
            "655": 487634432.0,
            "660": 487634432.0,
            "665": 487634432.0,
            "670": 487634432.0,
            "675": 487634432.0,
            "680": 487634432.0,
            "685": 487634432.0,
            "690": 487634432.0,
            "695": 487634432.0,
            "700": 487634432.0,
            "705": 487634432.0,
            "710": 487634432.0,
            "715": 487634432.0,
            "720": 487634432.0,
            "725": 487634432.0,
            "730": 487634432.0,
            "735": 487634432.0,
            "740": 487634432.0,
            "745": 487634432.0,
            "750": 487634432.0,
            "755": 487634432.0,
            "760": 487634432.0,
            "765": 487634432.0,
            "770": 487634432.0,
            "775": 487634432.0,
            "780": 487634432.0,
            "785": 487634432.0,
            "790": 487634432.0,
            "795": 487634432.0,
            "800": 487634432.0,
            "805": 487634432.0,
            "810": 487634432.0,
            "815": 487634432.0,
            "820": 487634432.0,
            "825": 487634432.0,
            "830": 487634432.0,
            "835": 487634432.0,
            "840": 487634432.0,
            "845": 487634432.0,
            "850": 487634432.0,
            "855": 487634432.0,
            "860": 487634432.0,
            "865": 487634432.0,
            "870": 487634432.0,
            "875": 487634432.0,
            "880": 487634432.0,
            "885": 487634432.0,
            "890": 487634432.0,
            "895": 487634432.0,
            "900": 487634432.0,
            "905": 487634432.0,
            "910": 487634432.0,
            "915": 487634432.0,
            "920": 487634432.0,
            "925": 487634432.0,
            "930": 487634432.0,
            "935": 487634432.0,
            "940": 487634432.0,
            "945": 487634432.0,
            "950": 487634432.0,
            "955": 487634432.0,
            "960": 487634432.0,
            "965": 487634432.0,
            "970": 487634432.0,
            "975": 487634432.0,
            "980": 487634432.0,
            "985": 487634432.0,
            "990": 487634432.0,
            "995": 487634432.0,
            "1000": 487634432.0,
            "1005": 487634432.0,
            "1010": 487634432.0,
            "1015": 487634432.0,
            "1020": 487634432.0,
            "1025": 487634432.0,
            "1030": 487634432.0,
            "1035": 487634432.0,
            "1040": 487634432.0,
            "1045": 487634432.0,
            "1050": 487634432.0,
            "1055": 487634432.0,
            "1060": 487634432.0,
            "1065": 487634432.0,
            "1070": 487634432.0,
            "1075": 487634432.0,
            "1080": 487634432.0,
            "1085": 487634432.0,
            "1090": 487634432.0,
            "1095": 487634432.0,
            "1100": 487634432.0,
            "1105": 487634432.0,
            "1110": 487634432.0,
            "1115": 487634432.0,
            "1120": 487634432.0,
            "1125": 487634432.0,
            "1130": 487634432.0,
            "1135": 487634432.0,
            "1140": 487634432.0,
            "1145": 487634432.0,
            "1150": 487634432.0,
            "1155": 487634432.0,
            "1160": 487634432.0,
            "1165": 487634432.0,
            "1170": 487634432.0,
            "1175": 487634432.0,
            "1180": 487634432.0,
            "1185": 487634432.0,
            "1190": 487634432.0,
            "1195": 487634432.0,
            "1200": 487634432.0,
            "1205": 487634432.0,
            "1210": 487634432.0,
            "1215": 487634432.0,
            "1220": 487634432.0,
            "1225": 487634432.0,
            "1230": 487634432.0,
            "1235": 487634432.0,
            "1240": 487634432.0,
            "1245": 487634432.0,
            "1250": 487634432.0,
            "1255": 487634432.0,
            "1260": 487634432.0,
            "1265": 487634432.0,
            "1270": 487634432.0,
            "1275": 487634432.0,
            "1280": 487634432.0,
            "1285": 487634432.0,
            "1290": 487634432.0,
            "1295": 487634432.0,
            "1300": 487634432.0,
            "1305": 487634432.0,
            "1310": 487634432.0,
            "1315": 487634432.0,
            "1320": 487634432.0,
            "1325": 487634432.0,
            "1330": 487634432.0,
            "1335": 487634432.0,
            "1340": 487634432.0,
            "1345": 487634432.0,
            "1350": 487634432.0,
            "1355": 487634432.0,
            "1360": 487634432.0,
            "1365": 487634432.0,
            "1370": 487634432.0,
            "1375": 487634432.0,
            "1380": 487634432.0,
            "1385": 487634432.0,
            "1390": 487634432.0,
            "1395": 487634432.0,
            "1400": 487634432.0,
            "1405": 487634432.0,
            "1410": 487634432.0,
            "1415": 487634432.0,
            "1420": 487634432.0,
            "1425": 487634432.0,
            "1430": 487634432.0,
            "1435": 487634432.0,
            "1440": 487634432.0,
            "1445": 487634432.0,
            "1450": 487634432.0,
            "1455": 487634432.0,
            "1460": 487634432.0,
            "1465": 487634432.0,
            "1470": 487634432.0,
            "1475": 487634432.0,
            "1480": 487634432.0,
            "1485": 487634432.0,
            "1490": 487634432.0,
            "1495": 487634432.0,
            "1500": 487634432.0,
            "1505": 487634432.0,
            "1510": 487634432.0,
            "1515": 487634432.0,
            "1520": 487634432.0,
            "1525": 487634432.0,
            "1530": 487634432.0,
            "1535": 487634432.0,
            "1540": 487634432.0,
            "1545": 487634432.0,
            "1550": 487634432.0,
            "1555": 487634432.0,
            "1560": 487634432.0,
            "1565": 487634432.0,
            "1570": 487634432.0,
            "1575": 487634432.0,
            "1580": 487634432.0,
            "1585": 487634432.0,
            "1590": 487634432.0,
            "1595": 487634432.0,
            "1600": 487634432.0,
            "1605": 487634432.0,
            "1610": 487634432.0,
            "1615": 487634432.0,
            "1620": 487634432.0,
            "1625": 487634432.0,
            "1630": 487634432.0,
            "1635": 487634432.0,
            "1640": 487634432.0,
            "1645": 487634432.0,
            "1650": 487634432.0,
            "1655": 487634432.0,
            "1660": 487634432.0,
            "1665": 487634432.0,
            "1670": 487634432.0,
            "1675": 487634432.0,
            "1680": 487634432.0,
            "1685": 487634432.0,
            "1690": 487634432.0,
            "1695": 487634432.0,
            "1700": 487634432.0,
            "1705": 487634432.0,
            "1710": 487634432.0,
            "1715": 487634432.0,
            "1720": 487634432.0,
            "1725": 487634432.0,
            "1730": 487634432.0,
            "1735": 487634432.0,
            "1740": 487634432.0,
            "1745": 487634432.0,
            "1750": 487634432.0,
            "1755": 487634432.0,
            "1760": 487634432.0,
            "1765": 487634432.0,
            "1770": 487634432.0,
            "1775": 487634432.0,
            "1780": 487634432.0,
            "1785": 487634432.0,
            "1790": 487634432.0,
            "1795": 487634432.0,
            "1800": 487634432.0,
            "1805": 487634432.0,
            "1810": 487634432.0,
            "1815": 487634432.0,
            "1820": 487634432.0,
            "1825": 487634432.0,
            "1830": 487634432.0,
            "1835": 487634432.0,
            "1840": 487634432.0,
            "1845": 487634432.0,
            "1850": 487634432.0,
            "1855": 487634432.0,
            "1860": 487634432.0,
            "1865": 487634432.0,
            "1870": 487634432.0,
            "1875": 487634432.0,
            "1880": 487634432.0,
            "1885": 487634432.0,
            "1890": 487634432.0,
            "1895": 487634432.0,
            "1900": 487634432.0,
            "1905": 487634432.0,
            "1910": 487634432.0,
            "1915": 487634432.0,
            "1920": 487634432.0,
            "1925": 487634432.0,
            "1930": 487634432.0,
            "1935": 487634432.0,
            "1940": 487634432.0,
            "1945": 487634432.0,
            "1950": 487634432.0,
            "1955": 487634432.0,
            "1960": 487634432.0,
            "1965": 487634432.0,
            "1970": 487634432.0,
            "1975": 487634432.0,
            "1980": 487634432.0,
            "1985": 487634432.0,
            "1990": 487634432.0,
            "1995": 487634432.0,
            "2000": 487634432.0,
            "2005": 487634432.0,
            "2010": 487634432.0,
            "2015": 487634432.0,
            "2020": 487634432.0,
            "2025": 487634432.0,
            "2030": 487634432.0,
            "2035": 487634432.0,
            "2040": 487634432.0,
            "2045": 487634432.0,
            "2050": 487634432.0,
            "2055": 487634432.0,
            "2060": 487634432.0,
            "2065": 487634432.0,
            "2070": 487634432.0,
            "2075": 487634432.0,
            "2080": 487634432.0,
            "2085": 487634432.0,
            "2090": 487634432.0,
            "2095": 487634432.0,
            "2100": 487634432.0,
            "2105": 487634432.0,
            "2110": 487634432.0,
            "2115": 487634432.0,
            "2120": 487634432.0,
            "2125": 487634432.0,
            "2130": 487634432.0,
            "2135": 487634432.0,
            "2140": 487634432.0,
            "2145": 487634432.0,
            "2150": 487634432.0,
            "2155": 487634432.0,
            "2160": 487634432.0,
            "2165": 487634432.0,
            "2170": 487634432.0,
            "2175": 487634432.0,
            "2180": 487634432.0,
            "2185": 487634432.0,
            "2190": 487634432.0,
            "2195": 487634432.0,
            "2200": 487634432.0,
            "2205": 487634432.0,
            "2210": 487634432.0,
            "2215": 487634432.0,
            "2220": 487634432.0,
            "2225": 487634432.0,
            "2230": 487634432.0,
            "2235": 487634432.0,
            "2240": 487634432.0,
            "2245": 487634432.0,
            "2250": 487634432.0,
            "2255": 487634432.0,
            "2260": 487634432.0,
            "2265": 487634432.0,
            "2270": 487634432.0,
            "2275": 487634432.0,
            "2280": 487634432.0,
            "2285": 487634432.0,
            "2290": 487634432.0,
            "2295": 487634432.0,
            "2300": 487634432.0,
            "2305": 487634432.0,
            "2310": 487634432.0,
            "2315": 487634432.0,
            "2320": 487634432.0,
            "2325": 487634432.0,
            "2330": 487634432.0,
            "2335": 487634432.0,
            "2340": 487634432.0,
            "2345": 487634432.0,
            "2350": 487634432.0,
            "2355": 487634432.0,
            "2360": 487634432.0,
            "2365": 487634432.0,
            "2370": 487634432.0,
            "2375": 487634432.0,
            "2380": 487634432.0,
            "2385": 487634432.0,
            "2390": 487634432.0,
            "2395": 487634432.0,
            "2400": 487634432.0,
            "2405": 487634432.0,
            "2410": 487634432.0,
            "2415": 487634432.0,
            "2420": 487634432.0,
            "2425": 487634432.0,
            "2430": 487634432.0,
            "2435": 487634432.0,
            "2440": 487634432.0,
            "2445": 487634432.0,
            "2450": 487634432.0,
            "2455": 487634432.0,
            "2460": 487634432.0,
            "2465": 487634432.0,
            "2470": 487634432.0,
            "2475": 487634432.0,
            "2480": 487634432.0,
            "2485": 487634432.0,
            "2490": 487634432.0,
            "2495": 487634432.0,
            "2500": 487634432.0,
            "2505": 487634432.0,
            "2510": 487634432.0,
            "2515": 487634432.0,
            "2520": 487634432.0,
            "2525": 487634432.0,
            "2530": 487634432.0,
            "2535": 487634432.0,
            "2540": 487634432.0,
            "2545": 487634432.0,
            "2550": 487634432.0,
            "2555": 487634432.0,
            "2560": 487634432.0,
            "2565": 487634432.0,
            "2570": 487634432.0,
            "2575": 487634432.0,
            "2580": 487634432.0,
            "2585": 487634432.0,
            "2590": 487634432.0,
            "2595": 487634432.0,
            "2600": 487634432.0,
            "2605": 487634432.0,
            "2610": 487634432.0,
            "2615": 487634432.0,
            "2620": 487634432.0,
            "2625": 487634432.0,
            "2630": 487634432.0,
            "2635": 487634432.0,
            "2640": 487634432.0,
            "2645": 487634432.0,
            "2650": 487634432.0,
            "2655": 487634432.0,
            "2660": 487634432.0,
            "2665": 487634432.0,
            "2670": 487634432.0,
            "2675": 487634432.0,
            "2680": 487634432.0,
            "2685": 487634432.0,
            "2690": 487634432.0,
            "2695": 487634432.0,
            "2700": 487634432.0,
            "2705": 487634432.0,
            "2710": 487634432.0,
            "2715": 487634432.0,
            "2720": 487634432.0,
            "2725": 487634432.0,
            "2730": 487634432.0,
            "2735": 487634432.0,
            "2740": 487634432.0,
            "2745": 487634432.0,
            "2750": 487634432.0,
            "2755": 487634432.0,
            "2760": 487634432.0,
            "2765": 487634432.0,
            "2770": 487634432.0,
            "2775": 487634432.0,
            "2780": 487634432.0,
            "2785": 487634432.0,
            "2790": 487634432.0,
            "2795": 487634432.0,
            "2800": 487634432.0,
            "2805": 487634432.0,
            "2810": 487634432.0,
            "2815": 487634432.0,
            "2820": 487634432.0,
            "2825": 487634432.0,
            "2830": 487634432.0,
            "2835": 487634432.0,
            "2840": 487634432.0,
            "2845": 487634432.0,
            "2850": 487634432.0,
            "2855": 487634432.0,
            "2860": 487634432.0,
            "2865": 487634432.0,
            "2870": 487634432.0,
            "2875": 487634432.0,
            "2880": 487634432.0,
            "2885": 487634432.0,
            "2890": 487634432.0,
            "2895": 487634432.0,
            "2900": 487634432.0,
            "2905": 487634432.0,
            "2910": 487634432.0,
            "2915": 487634432.0,
            "2920": 487634432.0,
            "2925": 487634432.0,
            "2930": 487634432.0,
            "2935": 487634432.0,
            "2940": 487634432.0,
            "2945": 487634432.0,
            "2950": 487634432.0,
            "2955": 487634432.0,
            "2960": 487634432.0,
            "2965": 487634432.0,
            "2970": 487634432.0,
            "2975": 487634432.0,
            "2980": 487634432.0,
            "2985": 487634432.0,
            "2990": 487634432.0,
            "2995": 487634432.0,
            "3000": 487634432.0,
            "3005": 487634432.0,
            "3010": 487634432.0,
            "3015": 487634432.0,
            "3020": 487634432.0,
            "3025": 487634432.0,
            "3030": 487634432.0,
            "3035": 487634432.0,
            "3040": 487634432.0,
            "3045": 487634432.0,
            "3050": 487634432.0,
            "3055": 487634432.0,
            "3060": 487634432.0,
            "3065": 487634432.0,
            "3070": 487634432.0,
            "3075": 487634432.0,
            "3080": 487634432.0,
            "3085": 487634432.0,
            "3090": 487634432.0,
            "3095": 487634432.0,
            "3100": 487634432.0,
            "3105": 487634432.0,
            "3110": 487634432.0,
            "3115": 487634432.0,
            "3120": 487634432.0,
            "3125": 487634432.0,
            "3130": 487634432.0,
            "3135": 487634432.0,
            "3140": 487634432.0,
            "3145": 487634432.0,
            "3150": 487634432.0,
            "3155": 487634432.0,
            "3160": 487634432.0,
            "3165": 487634432.0,
            "3170": 487634432.0,
            "3175": 487634432.0,
            "3180": 487634432.0,
            "3185": 487634432.0,
            "3190": 487634432.0,
            "3195": 487634432.0,
            "3200": 487634432.0,
            "3205": 487634432.0,
            "3210": 487634432.0,
            "3215": 487634432.0,
            "3220": 487634432.0,
            "3225": 487634432.0,
            "3230": 487634432.0,
            "3235": 487634432.0,
            "3240": 487634432.0,
            "3245": 487634432.0,
            "3250": 487634432.0,
            "3255": 487634432.0,
            "3260": 487634432.0,
            "3265": 487634432.0,
            "3270": 487634432.0,
            "3275": 487634432.0,
            "3280": 487634432.0,
            "3285": 487634432.0,
            "3290": 487634432.0,
            "3295": 487634432.0,
            "3300": 487634432.0,
            "3305": 487634432.0,
            "3310": 487634432.0,
            "3315": 487634432.0,
            "3320": 487634432.0,
            "3325": 487634432.0,
            "3330": 487634432.0,
            "3335": 487634432.0,
            "3340": 487634432.0,
            "3345": 487634432.0,
            "3350": 487634432.0,
            "3355": 487634432.0,
            "3360": 487634432.0,
            "3365": 487634432.0,
            "3370": 487634432.0,
            "3375": 487634432.0,
            "3380": 487634432.0,
            "3385": 487634432.0,
            "3390": 487634432.0,
            "3395": 487634432.0,
            "3400": 487634432.0,
            "3405": 487634432.0,
            "3410": 487634432.0,
            "3415": 487634432.0,
            "3420": 487634432.0,
            "3425": 487634432.0,
            "3430": 487634432.0,
            "3435": 487634432.0,
            "3440": 487634432.0,
            "3445": 487634432.0,
            "3450": 487634432.0,
            "3455": 487634432.0,
            "3460": 487634432.0,
            "3465": 487634432.0,
            "3470": 487634432.0,
            "3475": 487634432.0,
            "3480": 487634432.0,
            "3485": 487634432.0,
            "3490": 487634432.0,
            "3495": 487634432.0,
            "3500": 487634432.0,
            "3505": 487634432.0,
            "3510": 487634432.0,
            "3515": 487634432.0,
            "3520": 487634432.0,
            "3525": 487634432.0,
            "3530": 487634432.0,
            "3535": 487634432.0,
            "3540": 487634432.0,
            "3545": 487634432.0,
            "3550": 487634432.0,
            "3555": 487634432.0,
            "3560": 487634432.0,
            "3565": 487634432.0,
            "3570": 487634432.0,
            "3575": 487634432.0,
            "3580": 487634432.0,
            "3585": 487634432.0,
            "3590": 487634432.0,
            "3595": 487634432.0,
            "3600": 487634432.0,
            "3605": 487634432.0,
            "3610": 487634432.0,
            "3615": 487634432.0,
            "3620": 487634432.0,
            "3625": 487634432.0,
            "3630": 487634432.0,
            "3635": 487634432.0,
            "3640": 487634432.0,
            "3645": 487634432.0,
            "3650": 487634432.0,
            "3655": 487634432.0,
            "3660": 487634432.0,
            "3665": 487634432.0,
            "3670": 487634432.0,
            "3675": 487634432.0,
            "3680": 487634432.0,
            "3685": 487634432.0,
            "3690": 487634432.0,
            "3695": 487634432.0,
            "3700": 487634432.0,
            "3705": 487634432.0,
            "3710": 487634432.0,
            "3715": 487634432.0,
            "3720": 487634432.0,
            "3725": 487634432.0,
            "3730": 487634432.0,
            "3735": 487634432.0,
            "3740": 487634432.0,
            "3745": 487634432.0,
            "3750": 487634432.0,
            "3755": 487634432.0,
            "3760": 487634432.0,
            "3765": 487634432.0,
            "3770": 487634432.0,
            "3775": 487634432.0,
            "3780": 487634432.0,
            "3785": 487634432.0,
            "3790": 487634432.0,
            "3795": 487634432.0,
            "3800": 487634432.0,
            "3805": 487634432.0,
            "3810": 487634432.0,
            "3815": 487634432.0,
            "3820": 487634432.0,
            "3825": 487634432.0,
            "3830": 487634432.0,
            "3835": 487634432.0,
            "3840": 487634432.0,
            "3845": 487634432.0,
            "3850": 487634432.0,
            "3855": 487634432.0,
            "3860": 487634432.0,
            "3865": 487634432.0,
            "3870": 487634432.0,
            "3875": 487634432.0,
            "3880": 487634432.0,
            "3885": 487634432.0,
            "3890": 487634432.0,
            "3895": 487634432.0,
            "3900": 487634432.0,
            "3905": 487634432.0,
            "3910": 487634432.0,
            "3915": 487634432.0,
            "3920": 487634432.0,
            "3925": 487634432.0,
            "3930": 487634432.0,
            "3935": 487634432.0,
            "3940": 487634432.0,
            "3945": 487634432.0,
            "3950": 487634432.0,
            "3955": 487634432.0,
            "3960": 487634432.0,
            "3965": 487634432.0,
            "3970": 487634432.0,
            "3975": 487634432.0,
            "3980": 487634432.0,
            "3985": 487634432.0,
            "3990": 487634432.0,
            "3995": 487634432.0,
            "4000": 487634432.0,
            "4005": 487634432.0,
            "4010": 487634432.0,
            "4015": 487634432.0,
            "4020": 487634432.0,
            "4025": 487634432.0,
            "4030": 487634432.0,
            "4035": 487634432.0,
            "4040": 487634432.0,
            "4045": 487634432.0,
            "4050": 487634432.0,
            "4055": 487634432.0,
            "4060": 487634432.0,
            "4065": 487634432.0,
            "4070": 487634432.0,
            "4075": 487634432.0,
            "4080": 487634432.0,
            "4085": 487634432.0,
            "4090": 487634432.0,
            "4095": 487634432.0,
            "4100": 487634432.0,
            "4105": 487634432.0,
            "4110": 487634432.0,
            "4115": 487634432.0,
            "4120": 487634432.0,
            "4125": 487634432.0,
            "4130": 487634432.0,
            "4135": 487634432.0,
            "4140": 487634432.0,
            "4145": 487634432.0,
            "4150": 487634432.0,
            "4155": 487634432.0,
            "4160": 487634432.0,
            "4165": 487634432.0,
            "4170": 487634432.0,
            "4175": 487634432.0,
            "4180": 487634432.0,
            "4185": 487634432.0,
            "4190": 487634432.0,
            "4195": 487634432.0,
            "4200": 487634432.0,
            "4205": 487634432.0,
            "4210": 487634432.0,
            "4215": 487634432.0,
            "4220": 487634432.0,
            "4225": 487634432.0,
            "4230": 487634432.0,
            "4235": 487634432.0,
            "4240": 487634432.0,
            "4245": 487634432.0,
            "4250": 487634432.0,
            "4255": 488419840.0,
            "4260": 488419840.0,
            "4265": 488419840.0,
            "4270": 488419840.0,
            "4275": 488419840.0,
            "4280": 488419840.0,
            "4285": 488419840.0,
            "4290": 488419840.0,
            "4295": 488419840.0,
            "4300": 488419840.0,
            "4305": 488419840.0,
            "4310": 488419840.0,
            "4315": 488419840.0,
            "4320": 488419840.0,
            "4325": 488419840.0,
            "4330": 488419840.0,
            "4335": 488419840.0,
            "4340": 488419840.0,
            "4345": 488419840.0,
            "4350": 488419840.0,
            "4355": 488419840.0,
            "4360": 488419840.0,
            "4365": 488419840.0,
            "4370": 488419840.0,
            "4375": 488419840.0,
            "4380": 488419840.0,
            "4385": 488419840.0,
            "4390": 488419840.0,
            "4395": 488419840.0,
            "4400": 488419840.0,
            "4405": 488419840.0,
            "4410": 488419840.0,
            "4415": 488419840.0,
            "4420": 488419840.0,
            "4425": 488419840.0,
            "4430": 488419840.0,
            "4435": 488419840.0,
            "4440": 488419840.0,
            "4445": 488419840.0,
            "4450": 488419840.0,
            "4455": 488419840.0,
            "4460": 488419840.0,
            "4465": 488419840.0,
            "4470": 488419840.0,
            "4475": 488419840.0,
            "4480": 488419840.0,
            "4485": 488419840.0,
            "4490": 488419840.0,
            "4495": 488419840.0,
            "4500": 488419840.0,
            "4505": 488419840.0,
            "4510": 488419840.0,
            "4515": 488419840.0,
            "4520": 488419840.0,
            "4525": 488419840.0,
            "4530": 488419840.0,
            "4535": 488419840.0,
            "4540": 488419840.0,
            "4545": 488419840.0,
            "4550": 488419840.0,
            "4555": 488419840.0,
            "4560": 488419840.0,
            "4565": 488419840.0,
            "4570": 488419840.0,
            "4575": 488419840.0,
            "4580": 488419840.0,
            "4585": 488419840.0,
            "4590": 488419840.0,
            "4595": 488419840.0,
            "4600": 488419840.0,
            "4605": 488419840.0,
            "4610": 488419840.0,
            "4615": 488419840.0,
            "4620": 488419840.0,
            "4625": 488419840.0,
            "4630": 488419840.0,
            "4635": 488419840.0,
            "4640": 488419840.0,
            "4645": 488419840.0,
            "4650": 488419840.0,
            "4655": 488419840.0,
            "4660": 488419840.0,
            "4665": 488419840.0,
            "4670": 488419840.0,
            "4675": 488419840.0,
            "4680": 488419840.0,
            "4685": 488419840.0,
            "4690": 488419840.0,
            "4695": 488419840.0,
            "4700": 488419840.0,
            "4705": 488419840.0,
            "4710": 488419840.0,
            "4715": 488419840.0,
            "4720": 488419840.0,
            "4725": 488419840.0,
            "4730": 488419840.0,
            "4735": 488419840.0,
            "4740": 488419840.0,
            "4745": 488419840.0,
            "4750": 488419840.0,
            "4755": 488419840.0,
            "4760": 488419840.0,
            "4765": 488419840.0,
            "4770": 488419840.0,
            "4775": 488419840.0,
            "4780": 488419840.0,
            "4785": 488419840.0,
            "4790": 488419840.0,
            "4795": 488419840.0,
            "4800": 488419840.0,
            "4805": 488419840.0,
            "4810": 488419840.0,
            "4815": 488419840.0,
            "4820": 488419840.0,
            "4825": 488419840.0,
            "4830": 488419840.0,
            "4835": 488419840.0,
            "4840": 488419840.0,
            "4845": 488419840.0,
            "4850": 488419840.0,
            "4855": 488419840.0,
            "4860": 488419840.0,
            "4865": 488419840.0,
            "4870": 488419840.0,
            "4875": 488419840.0,
            "4880": 488419840.0,
            "4885": 488419840.0,
            "4890": 488419840.0,
            "4895": 488419840.0,
            "4900": 488419840.0,
            "4905": 488419840.0,
            "4910": 488419840.0,
            "4915": 488419840.0,
            "4920": 488419840.0,
            "4925": 488419840.0,
            "4930": 488419840.0,
            "4935": 488419840.0,
            "4940": 488419840.0,
            "4945": 488419840.0,
            "4950": 488419840.0,
            "4955": 488419840.0,
            "4960": 488419840.0,
            "4965": 488419840.0,
            "4970": 488419840.0,
            "4975": 488419840.0,
            "4980": 488419840.0,
            "4985": 488419840.0,
            "4990": 488419840.0,
            "4995": 488419840.0,
            "5000": 488419840.0,
            "5005": 488419840.0,
            "5010": 488419840.0,
            "5015": 488419840.0,
            "5020": 488419840.0,
            "5025": 488419840.0,
            "5030": 488419840.0,
            "5035": 488419840.0,
            "5040": 488419840.0,
            "5045": 488419840.0,
            "5050": 488419840.0,
            "5055": 488419840.0,
            "5060": 488419840.0,
            "5065": 488419840.0,
            "5070": 488419840.0,
            "5075": 488419840.0,
            "5080": 488419840.0,
            "5085": 488419840.0,
            "5090": 488419840.0,
            "5095": 488419840.0,
            "5100": 488419840.0,
            "5105": 488419840.0,
            "5110": 488419840.0,
            "5115": 488419840.0,
            "5120": 488419840.0,
            "5125": 488419840.0,
            "5130": 488419840.0,
            "5135": 488420352.0,
            "5140": 488420352.0,
            "5145": 488420352.0,
            "5150": 488420352.0,
            "5155": 488420352.0,
            "5160": 488420352.0,
            "5165": 488420352.0,
            "5170": 488420352.0,
            "5175": 488420352.0,
            "5180": 488420352.0,
            "5185": 488420352.0,
            "5190": 488420352.0,
            "5195": 488420352.0,
            "5200": 488420352.0,
            "5205": 488420352.0,
            "5210": 488420352.0,
            "5215": 488420352.0,
            "5220": 488420352.0,
            "5225": 488420352.0,
            "5230": 488420352.0,
            "5235": 488420352.0,
            "5240": 488420352.0,
            "5245": 488420352.0,
            "5250": 488420352.0,
            "5255": 488420352.0,
            "5260": 488420352.0,
            "5265": 488420352.0,
            "5270": 488420352.0,
            "5275": 488420352.0,
            "5280": 488420352.0,
            "5285": 488420352.0,
            "5290": 488420352.0,
            "5295": 488420352.0,
            "5300": 488420352.0,
            "5305": 488420352.0,
            "5310": 488420352.0,
            "5315": 488420352.0,
            "5320": 488420352.0,
            "5325": 488420352.0,
            "5330": 488420352.0,
            "5335": 488420352.0,
            "5340": 488420352.0,
            "5345": 488420352.0,
            "5350": 488420352.0,
            "5355": 488420352.0,
            "5360": 488420352.0,
            "5365": 488420352.0,
            "5370": 488420352.0,
            "5375": 488420352.0,
            "5380": 488420352.0,
            "5385": 488420352.0,
            "5390": 488420352.0,
            "5395": 488420352.0,
            "5400": 488420352.0,
            "5405": 488420352.0,
            "5410": 488420352.0,
            "5415": 488420352.0,
            "5420": 488420352.0,
            "5425": 488420352.0,
            "5430": 488420352.0,
            "5435": 488420352.0,
            "5440": 488420352.0,
            "5445": 488420352.0,
            "5450": 488420352.0,
            "5455": 488420352.0,
            "5460": 488420352.0,
            "5465": 488420352.0,
            "5470": 488420352.0,
            "5475": 488420352.0,
            "5480": 488420352.0,
            "5485": 488420352.0,
            "5490": 488420352.0,
            "5495": 488420352.0,
            "5500": 488420352.0,
            "5505": 488420352.0,
            "5510": 488420352.0,
            "5515": 488420352.0,
            "5520": 488420352.0,
            "5525": 488420352.0,
            "5530": 488420352.0,
            "5535": 488420352.0,
            "5540": 488420352.0,
            "5545": 488420352.0,
            "5550": 488420352.0,
            "5555": 488420352.0,
            "5560": 488420352.0,
            "5565": 488420352.0,
            "5570": 488420352.0,
            "5575": 488420352.0,
            "5580": 488420352.0,
            "5585": 488420352.0,
            "5590": 488420352.0,
            "5595": 488420352.0,
            "5600": 488420352.0,
            "5605": 488420352.0,
            "5610": 488420352.0,
            "5615": 488420352.0,
            "5620": 488420352.0,
            "5625": 488420352.0,
            "5630": 488420352.0,
            "5635": 488420352.0,
            "5640": 488420352.0,
            "5645": 488420352.0,
            "5650": 488420352.0,
            "5655": 488420352.0,
            "5660": 488420352.0,
            "5665": 488420352.0,
            "5670": 488420352.0,
            "5675": 488420352.0,
            "5680": 488420352.0,
            "5685": 488420352.0,
            "5690": 488420352.0,
            "5695": 488420352.0,
            "5700": 488420352.0,
            "5705": 488420352.0,
            "5710": 488420352.0,
            "5715": 488420352.0,
            "5720": 488420352.0,
            "5725": 488420352.0,
            "5730": 488420352.0,
            "5735": 488420352.0,
            "5740": 488420352.0,
            "5745": 488420352.0,
            "5750": 488420352.0,
            "5755": 488420352.0,
            "5760": 488420352.0,
            "5765": 488420352.0,
            "5770": 488420352.0,
            "5775": 488420352.0,
            "5780": 488420352.0,
            "5785": 488420352.0,
            "5790": 488420352.0,
            "5795": 488420352.0,
            "5800": 488420352.0,
            "5805": 488420352.0,
            "5810": 488420352.0,
            "5815": 488420352.0,
            "5820": 488420352.0,
            "5825": 488420352.0,
            "5830": 488420352.0,
            "5835": 488420352.0,
            "5840": 488420352.0,
            "5845": 488420352.0,
            "5850": 488420352.0,
            "5855": 488420352.0,
            "5860": 488420352.0,
            "5865": 488420352.0,
            "5870": 488420352.0,
            "5875": 488420352.0,
            "5880": 488420352.0,
            "5885": 488420352.0,
            "5890": 488420352.0,
            "5895": 488420352.0,
            "5900": 488420352.0,
            "5905": 488420352.0,
            "5910": 488420352.0,
            "5915": 488420352.0,
            "5920": 488420352.0,
            "5925": 488420352.0,
            "5930": 488420352.0,
            "5935": 488420352.0,
            "5940": 488420352.0,
            "5945": 488420352.0,
            "5950": 488420352.0,
            "5955": 488420352.0,
            "5960": 488420352.0,
            "5965": 488420352.0,
            "5970": 488420352.0,
            "5975": 488420352.0,
            "5980": 488420352.0,
            "5985": 488420352.0,
            "5990": 488420352.0,
            "5995": 488420352.0,
            "6000": 488420352.0,
            "6005": 488420352.0,
            "6010": 488420352.0,
            "6015": 488420352.0,
            "6020": 488420352.0,
            "6025": 488420352.0,
            "6030": 488420352.0,
            "6035": 488420352.0,
            "6040": 488420352.0,
            "6045": 488420352.0,
            "6050": 488420352.0,
            "6055": 488420352.0,
            "6060": 488420352.0,
            "6065": 488420352.0,
            "6070": 488420352.0,
            "6075": 488420352.0,
            "6080": 488420352.0,
            "6085": 488420352.0,
            "6090": 488420352.0,
            "6095": 488420352.0,
            "6100": 488420352.0,
            "6105": 488420352.0,
            "6110": 488420352.0,
            "6115": 488420352.0,
            "6120": 488420352.0,
            "6125": 488420352.0,
            "6130": 488420352.0,
            "6135": 488420352.0,
            "6140": 488420352.0,
            "6145": 488420352.0,
            "6150": 488420352.0,
            "6155": 488420352.0,
            "6160": 488420352.0,
            "6165": 488420352.0,
            "6170": 488420352.0,
            "6175": 488420352.0,
            "6180": 488420352.0,
            "6185": 488420352.0,
            "6190": 488420352.0,
            "6195": 488420352.0,
            "6200": 488420352.0,
            "6205": 488420352.0,
            "6210": 488420352.0,
            "6215": 488420352.0,
            "6220": 488420352.0,
            "6225": 488420352.0,
            "6230": 488420352.0,
            "6235": 488420352.0,
            "6240": 488420352.0,
            "6245": 488420352.0,
            "6250": 488420352.0,
            "6255": 488420352.0,
            "6260": 488420352.0,
            "6265": 488420352.0,
            "6270": 488420352.0,
            "6275": 488420352.0,
            "6280": 488420352.0,
            "6285": 488420352.0,
            "6290": 488420352.0,
            "6295": 488420352.0,
            "6300": 488420352.0,
            "6305": 488420352.0,
            "6310": 488420352.0,
            "6315": 488420352.0,
            "6320": 488420352.0,
            "6325": 488420352.0,
            "6330": 488420352.0,
            "6335": 488420352.0,
            "6340": 488420352.0,
            "6345": 488420352.0,
            "6350": 488420352.0,
            "6355": 488420352.0,
            "6360": 488420352.0,
            "6365": 488420352.0,
            "6370": 488420352.0,
            "6375": 488420352.0,
            "6380": 488420352.0,
            "6385": 488420352.0,
            "6390": 488420352.0,
            "6395": 488420352.0,
            "6400": 488420352.0,
            "6405": 488420352.0,
            "6410": 488420352.0,
            "6415": 488420352.0,
            "6420": 488420352.0,
            "6425": 488420352.0,
            "6430": 488420352.0,
            "6435": 488420352.0,
            "6440": 488420352.0,
            "6445": 488420352.0,
            "6450": 488420352.0,
            "6455": 488420352.0,
            "6460": 488420352.0,
            "6465": 488420352.0,
            "6470": 488420352.0,
            "6475": 488420352.0,
            "6480": 488420352.0,
            "6485": 488420352.0,
            "6490": 488420352.0,
            "6495": 488420352.0,
            "6500": 488420352.0,
            "6505": 488420352.0,
            "6510": 488420352.0,
            "6515": 488420352.0,
            "6520": 488420352.0,
            "6525": 488420352.0,
            "6530": 488420352.0,
            "6535": 488420352.0,
            "6540": 488420352.0,
            "6545": 488420352.0,
            "6550": 488420352.0,
            "6555": 488420352.0,
            "6560": 488420352.0,
            "6565": 488420352.0,
            "6570": 488420352.0,
            "6575": 488420352.0,
            "6580": 488420352.0,
            "6585": 488420352.0,
            "6590": 488420352.0,
            "6595": 488420352.0,
            "6600": 488420352.0,
            "6605": 488420352.0,
            "6610": 488420352.0,
            "6615": 488420352.0,
            "6620": 488420352.0,
            "6625": 488420352.0,
            "6630": 488420352.0,
            "6635": 488420352.0,
            "6640": 488420352.0,
            "6645": 488420352.0,
            "6650": 488420352.0,
            "6655": 488420352.0,
            "6660": 488420352.0,
            "6665": 488420352.0,
            "6670": 488420352.0,
            "6675": 488420352.0,
            "6680": 488420352.0,
            "6685": 488420352.0,
            "6690": 488420352.0,
            "6695": 488420352.0,
            "6700": 488420352.0,
            "6705": 488420352.0,
            "6710": 488420352.0,
            "6715": 488420352.0,
            "6720": 488420352.0,
            "6725": 488420352.0,
            "6730": 488420352.0,
            "6735": 488420352.0,
            "6740": 488420352.0,
            "6745": 488420352.0,
            "6750": 488420352.0,
            "6755": 488420352.0,
            "6760": 488420352.0,
            "6765": 488420352.0,
            "6770": 488420352.0,
            "6775": 488420352.0,
            "6780": 488420352.0,
            "6785": 488420352.0,
            "6790": 488420352.0,
            "6795": 488420352.0,
            "6800": 488420352.0,
            "6805": 488420352.0,
            "6810": 488420352.0,
            "6815": 488420352.0,
            "6820": 488420352.0,
            "6825": 488420352.0,
            "6830": 488420352.0,
            "6835": 488420352.0,
            "6840": 488420352.0,
            "6845": 488420352.0,
            "6850": 488420352.0,
            "6855": 488420352.0,
            "6860": 488420352.0,
            "6865": 488420352.0,
            "6870": 488420352.0,
            "6875": 488420352.0,
            "6880": 488420352.0,
            "6885": 488420352.0,
            "6890": 488420352.0,
            "6895": 488420352.0,
            "6900": 488420352.0,
            "6905": 488420352.0,
            "6910": 488420352.0,
            "6915": 488420352.0,
            "6920": 488420352.0,
            "6925": 488420352.0,
            "6930": 488420352.0,
            "6935": 488420352.0,
            "6940": 488420352.0,
            "6945": 488420352.0,
            "6950": 488420352.0,
            "6955": 488420352.0,
            "6960": 488420352.0,
            "6965": 488420352.0,
            "6970": 488420352.0,
            "6975": 488420352.0,
            "6980": 488420352.0,
            "6985": 488420352.0,
            "6990": 488420352.0,
            "6995": 488420352.0,
            "7000": 488420352.0,
            "7005": 488420352.0,
            "7010": 488420352.0,
            "7015": 488420352.0,
            "7020": 488420352.0,
            "7025": 488420352.0,
            "7030": 488420352.0,
            "7035": 488420352.0,
            "7040": 488420352.0,
            "7045": 488420352.0,
            "7050": 488420352.0,
            "7055": 488420352.0,
            "7060": 488420352.0,
            "7065": 488420352.0,
            "7070": 488420352.0,
            "7075": 488420352.0,
            "7080": 488420352.0,
            "7085": 488420352.0,
            "7090": 488420352.0,
            "7095": 488420352.0,
            "7100": 488420352.0,
            "7105": 488420352.0,
            "7110": 488420352.0,
            "7115": 488420352.0,
            "7120": 488420352.0,
            "7125": 488420352.0,
            "7130": 488420352.0,
            "7135": 488420352.0,
            "7140": 488420352.0,
            "7145": 488420352.0,
            "7150": 488420352.0,
            "7155": 488420352.0,
            "7160": 488420352.0,
            "7165": 488420352.0,
            "7170": 488420352.0,
            "7175": 488420352.0,
            "7180": 488420352.0,
            "7185": 488420352.0,
            "7190": 488420352.0,
            "7195": 488420352.0,
            "7200": 488420352.0,
            "7205": 488420352.0,
            "7210": 488420352.0,
            "7215": 488420352.0,
            "7220": 488420352.0,
            "7225": 488420352.0,
            "7230": 488420352.0,
            "7235": 488420352.0,
            "7240": 488420352.0,
            "7245": 488420352.0,
            "7250": 488420352.0,
            "7255": 488420352.0,
            "7260": 488420352.0,
            "7265": 488420352.0,
            "7270": 488420352.0,
            "7275": 488420352.0,
            "7280": 488420352.0,
            "7285": 488420352.0,
            "7290": 488420352.0,
            "7295": 488420352.0,
            "7300": 488420352.0,
            "7305": 488420352.0,
            "7310": 488420352.0,
            "7315": 488420352.0,
            "7320": 488420352.0,
            "7325": 488420352.0,
            "7330": 488420352.0,
            "7335": 488420352.0,
            "7340": 488420352.0,
            "7345": 488420352.0,
            "7350": 488420352.0,
            "7355": 488420352.0,
            "7360": 488420352.0,
            "7365": 488420352.0,
            "7370": 488420352.0,
            "7375": 488420352.0,
            "7380": 488420352.0,
            "7385": 488420352.0,
            "7390": 488420352.0,
            "7395": 488420352.0,
            "7400": 488420352.0,
            "7405": 488420352.0,
            "7410": 488420352.0,
            "7415": 488420352.0,
            "7420": 488420352.0,
            "7425": 488420352.0,
            "7430": 488420352.0,
            "7435": 488420352.0,
            "7440": 488420352.0,
            "7445": 488420352.0,
            "7450": 488420352.0,
            "7455": 488420352.0,
            "7460": 488420352.0,
            "7465": 488420352.0,
            "7470": 488420352.0,
            "7475": 488420352.0,
            "7480": 488420352.0,
            "7485": 488420352.0,
            "7490": 488420352.0,
            "7495": 488420352.0,
            "7500": 488420352.0,
            "7505": 488420352.0,
            "7510": 488420352.0,
            "7515": 488420352.0,
            "7520": 488420352.0,
            "7525": 488420352.0,
            "7530": 488420352.0,
            "7535": 488420352.0,
            "7540": 488420352.0,
            "7545": 488420352.0,
            "7550": 488420352.0,
            "7555": 488420352.0,
            "7560": 488420352.0,
            "7565": 488420352.0,
            "7570": 488420352.0,
            "7575": 488420352.0,
            "7580": 488420352.0,
            "7585": 488420352.0,
            "7590": 488420352.0,
            "7595": 488420352.0,
            "7600": 488420352.0,
            "7605": 488420352.0,
            "7610": 488420352.0,
            "7615": 488420352.0,
            "7620": 488420352.0,
            "7625": 488420352.0,
            "7630": 488420352.0,
            "7635": 488420352.0,
            "7640": 488420352.0,
            "7645": 488420352.0,
            "7650": 488420352.0,
            "7655": 488420352.0,
            "7660": 488420352.0,
            "7665": 488420352.0,
            "7670": 488420352.0,
            "7675": 488420352.0,
            "7680": 488420352.0,
            "7685": 488420352.0,
            "7690": 488420352.0,
            "7695": 488420352.0,
            "7700": 488420352.0,
            "7705": 488420352.0,
            "7710": 488420352.0,
            "7715": 488420352.0,
            "7720": 488420352.0,
            "7725": 488420352.0,
            "7730": 488420352.0,
            "7735": 488420352.0,
            "7740": 488420352.0,
            "7745": 488420352.0,
            "7750": 488420352.0,
            "7755": 488420352.0,
            "7760": 488420352.0,
            "7765": 488420352.0,
            "7770": 488420352.0,
            "7775": 488420352.0,
            "7780": 488420352.0,
            "7785": 488420352.0,
            "7790": 488420352.0,
            "7795": 488420352.0,
            "7800": 488420352.0,
            "7805": 488420352.0,
            "7810": 488420352.0,
            "7815": 488420352.0,
            "7820": 488420352.0,
            "7825": 488420352.0,
            "7830": 488420352.0,
            "7835": 488420352.0,
            "7840": 488420352.0,
            "7845": 488420352.0,
            "7850": 488420352.0,
            "7855": 488420352.0,
            "7860": 488420352.0,
            "7865": 488420352.0,
            "7870": 488420352.0,
            "7875": 488420352.0,
            "7880": 488420352.0,
            "7885": 488420352.0,
            "7890": 488420352.0,
            "7895": 488420352.0,
            "7900": 488420352.0,
            "7905": 488420352.0,
            "7910": 488420352.0,
            "7915": 488420352.0,
            "7920": 488420352.0,
            "7925": 488420352.0,
            "7930": 488420352.0,
            "7935": 488420352.0,
            "7940": 488420352.0,
            "7945": 488420352.0,
            "7950": 488420352.0,
            "7955": 488420352.0,
            "7960": 488420352.0,
            "7965": 488420352.0,
            "7970": 488420352.0,
            "7975": 488420352.0,
            "7980": 488420352.0,
            "7985": 488420352.0,
            "7990": 488420352.0,
            "7995": 488420352.0,
            "8000": 488420352.0,
            "8005": 488420352.0,
            "8010": 488420352.0,
            "8015": 488420352.0,
            "8020": 488420352.0,
            "8025": 488420352.0,
            "8030": 488420352.0,
            "8035": 488420352.0,
            "8040": 488420352.0,
            "8045": 488420352.0,
            "8050": 488420352.0,
            "8055": 488420352.0,
            "8060": 488420352.0,
            "8065": 488420352.0,
            "8070": 488420352.0,
            "8075": 488420352.0,
            "8080": 488420352.0,
            "8085": 488420352.0,
            "8090": 488420352.0,
            "8095": 488420352.0,
            "8100": 488420352.0,
            "8105": 488420352.0,
            "8110": 488420352.0,
            "8115": 488420352.0,
            "8120": 488420352.0,
            "8125": 488420352.0,
            "8130": 488420352.0,
            "8135": 488420352.0,
            "8140": 488420352.0,
            "8145": 488420352.0,
            "8150": 488420352.0,
            "8155": 488420352.0,
            "8160": 488420352.0,
            "8165": 488420352.0,
            "8170": 488420352.0,
            "8175": 488420352.0,
            "8180": 488420352.0,
            "8185": 488420352.0,
            "8190": 488420352.0,
            "8195": 488420352.0,
            "8200": 488420352.0,
            "8205": 488420352.0,
            "8210": 488420352.0,
            "8215": 488420352.0,
            "8220": 488420352.0,
            "8225": 488420352.0,
            "8230": 488420352.0,
            "8235": 488420352.0,
            "8240": 488420352.0,
            "8245": 488420352.0,
            "8250": 488420352.0,
            "8255": 488420352.0,
            "8260": 488420352.0,
            "8265": 488420352.0,
            "8270": 488420352.0,
            "8275": 488420352.0,
            "8280": 488420352.0,
            "8285": 488420352.0,
            "8290": 488420352.0,
            "8295": 488420352.0,
            "8300": 488420352.0,
            "8305": 488420352.0,
            "8310": 488420352.0,
            "8315": 488420352.0,
            "8320": 488420352.0,
            "8325": 488420352.0,
            "8330": 488420352.0,
            "8335": 488420352.0,
            "8340": 488420352.0,
            "8345": 488420352.0,
            "8350": 488420352.0,
            "8355": 488420352.0,
            "8360": 488420352.0,
            "8365": 488420352.0,
            "8370": 488420352.0,
            "8375": 488420352.0,
            "8380": 488420352.0,
            "8385": 488420352.0,
            "8390": 488420352.0,
            "8395": 488420352.0,
            "8400": 488420352.0,
            "8405": 488420352.0,
            "8410": 488420352.0,
            "8415": 488420352.0,
            "8420": 488420352.0,
            "8425": 488420352.0,
            "8430": 488420352.0,
            "8435": 488420352.0,
            "8440": 488420352.0,
            "8445": 488420352.0,
            "8450": 488420352.0,
            "8455": 488420352.0,
            "8460": 488420352.0,
            "8465": 488420352.0,
            "8470": 488420352.0,
            "8475": 488420352.0,
            "8480": 488420352.0,
            "8485": 488420352.0,
            "8490": 488420352.0,
            "8495": 488420352.0,
            "8500": 488420352.0,
            "8505": 488420352.0,
            "8510": 488420352.0,
            "8515": 488420352.0,
            "8520": 488420352.0,
            "8525": 488420352.0,
            "8530": 488420352.0,
            "8535": 488420352.0,
            "8540": 488420352.0,
            "8545": 488420352.0,
            "8550": 488420352.0,
            "8555": 488420352.0,
            "8560": 488420352.0,
            "8565": 488420352.0,
            "8570": 488420352.0,
            "8575": 488420352.0,
            "8580": 488420352.0,
            "8585": 488420352.0,
            "8590": 488420352.0,
            "8595": 488420352.0,
            "8600": 488420352.0,
            "8605": 488420352.0,
            "8610": 488420352.0,
            "8615": 488420352.0,
            "8620": 488420352.0,
            "8625": 488420352.0,
            "8630": 488420352.0,
            "8635": 488420352.0,
            "8640": 488420352.0,
            "8645": 488420352.0,
            "8650": 488420352.0,
            "8655": 488420352.0,
            "8660": 488420352.0,
            "8665": 488420352.0,
            "8670": 488420352.0,
            "8675": 488420352.0,
            "8680": 488420352.0,
            "8685": 488420352.0,
            "8690": 488420352.0,
            "8695": 488420352.0,
            "8700": 488420352.0,
            "8705": 488420352.0,
            "8710": 488420352.0,
            "8715": 488420352.0,
            "8720": 488420352.0,
            "8725": 488420352.0,
            "8730": 488420352.0,
            "8735": 488420352.0,
            "8740": 488420352.0,
            "8745": 488420352.0,
            "8750": 488420352.0,
            "8755": 488420352.0,
            "8760": 488420352.0,
            "8765": 488420352.0,
            "8770": 488420352.0,
            "8775": 488420352.0,
            "8780": 488420352.0,
            "8785": 488420352.0,
            "8790": 488420352.0,
            "8795": 488420352.0,
            "8800": 488420352.0,
            "8805": 488420352.0,
            "8810": 488420352.0,
            "8815": 488420352.0,
            "8820": 488420352.0,
            "8825": 488420352.0,
            "8830": 488420352.0,
            "8835": 488420352.0,
            "8840": 488420352.0,
            "8845": 488420352.0,
            "8850": 488420352.0,
            "8855": 488420352.0,
            "8860": 488420352.0,
            "8865": 488420352.0,
            "8870": 488420352.0,
            "8875": 488420352.0,
            "8880": 488420352.0,
            "8885": 488420352.0,
            "8890": 488420352.0,
            "8895": 488420352.0,
            "8900": 488420352.0,
            "8905": 488420352.0,
            "8910": 488420352.0,
            "8915": 488420352.0,
            "8920": 488420352.0,
            "8925": 488420352.0,
            "8930": 488420352.0,
            "8935": 488420352.0,
            "8940": 488420352.0,
            "8945": 488420352.0,
            "8950": 488420352.0,
            "8955": 488420352.0,
            "8960": 488420352.0,
            "8965": 488420352.0,
            "8970": 488420352.0,
            "8975": 488420352.0,
            "8980": 488420352.0,
            "8985": 488420352.0,
            "8990": 488420352.0,
            "8995": 488420352.0,
            "9000": 488420352.0,
            "9005": 488420352.0,
            "9010": 488420352.0,
            "9015": 488420352.0,
            "9020": 488420352.0,
            "9025": 488420352.0,
            "9030": 488420352.0,
            "9035": 488420352.0,
            "9040": 488420352.0,
            "9045": 488420352.0,
            "9050": 488420352.0,
            "9055": 488420352.0,
            "9060": 488420352.0,
            "9065": 488420352.0,
            "9070": 488420352.0,
            "9075": 488420352.0,
            "9080": 488420352.0,
            "9085": 488420352.0,
            "9090": 488420352.0,
            "9095": 488420352.0,
            "9100": 488420352.0,
            "9105": 488420352.0,
            "9110": 488420352.0,
            "9115": 488420352.0,
            "9120": 488420352.0,
            "9125": 488420352.0,
            "9130": 488420352.0,
            "9135": 488420352.0,
            "9140": 488420352.0,
            "9145": 488420352.0,
            "9150": 488420352.0,
            "9155": 488420352.0,
            "9160": 488420352.0,
            "9165": 488420352.0,
            "9170": 488420352.0,
            "9175": 488420352.0,
            "9180": 488420352.0,
            "9185": 488420352.0,
            "9190": 488420352.0,
            "9195": 488420352.0,
            "9200": 488420352.0,
            "9205": 488420352.0,
            "9210": 488420352.0,
            "9215": 488420352.0,
            "9220": 488420352.0,
            "9225": 488420352.0,
            "9230": 488420352.0,
            "9235": 488420352.0,
            "9240": 488420352.0,
            "9245": 488420352.0,
            "9250": 488420352.0,
            "9255": 488420352.0,
            "9260": 488420352.0,
            "9265": 488420352.0,
            "9270": 488420352.0,
            "9275": 488420352.0,
            "9280": 488420352.0,
            "9285": 488420352.0,
            "9290": 488420352.0,
            "9295": 488420352.0,
            "9300": 488420352.0,
            "9305": 488420352.0,
            "9310": 488420352.0,
            "9315": 488420352.0,
            "9320": 488420352.0,
            "9325": 488420352.0,
            "9330": 488420352.0,
            "9335": 488420352.0,
            "9340": 488420352.0,
            "9345": 488420352.0,
            "9350": 488420352.0,
            "9355": 488420352.0,
            "9360": 488420352.0,
            "9365": 488420352.0,
            "9370": 488420352.0,
            "9375": 488420352.0,
            "9380": 488420352.0,
            "9385": 488420352.0,
            "9390": 488420352.0,
            "9395": 488420352.0,
            "9400": 488420352.0,
            "9405": 488420352.0,
            "9410": 488420352.0,
            "9415": 488420352.0,
            "9420": 488420352.0,
            "9425": 488420352.0,
            "9430": 488420352.0,
            "9435": 488420352.0,
            "9440": 488420352.0,
            "9445": 488420352.0,
            "9450": 488420352.0,
            "9455": 488420352.0,
            "9460": 488420352.0,
            "9465": 488420352.0,
            "9470": 488420352.0,
            "9475": 488420352.0,
            "9480": 488420352.0,
            "9485": 488420352.0,
            "9490": 488420352.0,
            "9495": 488420352.0,
            "9500": 488420352.0,
            "9505": 488420352.0,
            "9510": 488420352.0,
            "9515": 488420352.0,
            "9520": 488420352.0,
            "9525": 488420352.0,
            "9530": 488420352.0,
            "9535": 488420352.0,
            "9540": 488420352.0,
            "9545": 488420352.0,
            "9550": 488420352.0,
            "9555": 488420352.0,
            "9560": 488420352.0,
            "9565": 488420352.0,
            "9570": 488420352.0,
            "9575": 488420352.0,
            "9580": 488420352.0,
            "9585": 488420352.0,
            "9590": 488420352.0,
            "9595": 488420352.0,
            "9600": 488420352.0,
            "9605": 488420352.0,
            "9610": 488420352.0,
            "9615": 488420352.0,
            "9620": 488420352.0,
            "9625": 488420352.0,
            "9630": 488420352.0,
            "9635": 488420352.0,
            "9640": 488420352.0,
            "9645": 488420352.0,
            "9650": 488420352.0,
            "9655": 488420352.0,
            "9660": 488420352.0,
            "9665": 488420352.0,
            "9670": 488420352.0,
            "9675": 488420352.0,
            "9680": 488420352.0,
            "9685": 488420352.0,
            "9690": 488420352.0,
            "9695": 488420352.0,
            "9700": 488420352.0,
            "9705": 488420352.0,
            "9710": 488420352.0,
            "9715": 488420352.0,
            "9720": 488420352.0,
            "9725": 488420352.0,
            "9730": 488420352.0,
            "9735": 488420352.0,
            "9740": 488420352.0,
            "9745": 488420352.0,
            "9750": 488420352.0,
            "9755": 488420352.0,
            "9760": 488420352.0,
            "9765": 488420352.0,
            "9770": 488420352.0,
            "9775": 488420352.0,
            "9780": 488420352.0,
            "9785": 488420352.0,
            "9790": 488420352.0,
            "9795": 488420352.0,
            "9800": 488420352.0,
            "9805": 488420352.0,
            "9810": 488420352.0,
            "9815": 488420352.0,
            "9820": 488420352.0,
            "9825": 488420352.0,
            "9830": 488420352.0,
            "9835": 488420352.0,
            "9840": 488420864.0,
            "9845": 488420864.0,
            "9850": 488420864.0,
            "9855": 488420864.0,
            "9860": 488420864.0,
            "9865": 488420864.0,
            "9870": 488420864.0,
            "9875": 488420864.0,
            "9880": 488420864.0,
            "9885": 488420864.0,
            "9890": 488420864.0,
            "9895": 488420864.0,
            "9900": 488420864.0,
            "9905": 488420864.0,
            "9910": 488420864.0,
            "9915": 488420864.0,
            "9920": 488420864.0,
            "9925": 488420864.0,
            "9930": 488420864.0,
            "9935": 488420864.0,
            "9940": 488420864.0,
            "9945": 488420864.0,
            "9950": 488420864.0,
            "9955": 488420864.0,
            "9960": 488420864.0,
            "9965": 488420864.0,
            "9970": 488420864.0,
            "9975": 488420864.0,
            "9980": 488420864.0,
            "9985": 488420864.0,
            "9990": 488420864.0,
            "9995": 488420864.0,
            "10000": 488420864.0,
            "10005": 488420864.0,
            "10010": 488420864.0,
            "10015": 488420864.0,
            "10020": 488420864.0,
            "10025": 488420864.0,
            "10030": 488420864.0,
            "10035": 488420864.0,
            "10040": 488420864.0,
            "10045": 488420864.0,
            "10050": 488420864.0,
            "10055": 488420864.0,
            "10060": 488420864.0,
            "10065": 488420864.0,
            "10070": 488420864.0,
            "10075": 488420864.0,
            "10080": 488420864.0,
            "10085": 488420864.0,
            "10090": 488420864.0,
            "10095": 488420864.0,
            "10100": 488420864.0,
            "10105": 488420864.0,
            "10110": 488420864.0,
            "10115": 488420864.0,
            "10120": 488420864.0,
            "10125": 488420864.0,
            "10130": 488420864.0,
            "10135": 488420864.0,
            "10140": 488420864.0,
            "10145": 488420864.0,
            "10150": 488420864.0,
            "10155": 488420864.0,
            "10160": 488420864.0,
            "10165": 488420864.0,
            "10170": 488420864.0,
            "10175": 488420864.0,
            "10180": 488420864.0,
            "10185": 488420864.0,
            "10190": 488420864.0,
            "10195": 488420864.0,
            "10200": 488420864.0,
            "10205": 488420864.0,
            "10210": 488420864.0,
            "10215": 488420864.0,
            "10220": 488420864.0,
            "10225": 488420864.0,
            "10230": 488420864.0,
            "10235": 488420864.0,
            "10240": 488420864.0,
            "10245": 488420864.0,
            "10250": 488420864.0,
            "10255": 488420864.0,
            "10260": 488420864.0,
            "10265": 488420864.0,
            "10270": 488420864.0,
            "10275": 488420864.0,
            "10280": 488420864.0,
            "10285": 488420864.0,
            "10290": 488420864.0,
            "10295": 488420864.0,
            "10300": 488420864.0,
            "10305": 488420864.0,
            "10310": 488420864.0,
            "10315": 488420864.0,
            "10320": 488420864.0,
            "10325": 488420864.0,
            "10330": 488420864.0,
            "10335": 488420864.0,
            "10340": 488420864.0,
            "10345": 488420864.0,
            "10350": 488420864.0,
            "10355": 488420864.0,
            "10360": 488420864.0,
            "10365": 488420864.0,
            "10370": 488420864.0,
            "10375": 488420864.0,
            "10380": 488420864.0,
            "10385": 488420864.0,
            "10390": 488420864.0,
            "10395": 488420864.0,
            "10400": 488420864.0,
            "10405": 488420864.0,
            "10410": 488420864.0,
            "10415": 488420864.0,
            "10420": 488420864.0,
            "10425": 488420864.0,
            "10430": 488420864.0,
            "10435": 488420864.0,
            "10440": 488420864.0,
            "10445": 488420864.0,
            "10450": 488420864.0,
            "10455": 488420864.0,
            "10460": 488420864.0,
            "10465": 488420864.0,
            "10470": 488420864.0,
            "10475": 488420864.0,
            "10480": 488420864.0,
            "10485": 488420864.0,
            "10490": 488420864.0,
            "10495": 488420864.0,
            "10500": 488420864.0,
            "10505": 488420864.0,
            "10510": 488420864.0,
            "10515": 488420864.0,
            "10520": 488420864.0,
            "10525": 488420864.0,
            "10530": 488420864.0,
            "10535": 488420864.0,
            "10540": 488420864.0,
            "10545": 488420864.0,
            "10550": 488420864.0,
            "10555": 488420864.0,
            "10560": 488420864.0,
            "10565": 488420864.0,
            "10570": 488420864.0,
            "10575": 488420864.0,
            "10580": 488420864.0,
            "10585": 488420864.0,
            "10590": 488420864.0,
            "10595": 488420864.0,
            "10600": 488420864.0,
            "10605": 488420864.0,
            "10610": 488420864.0,
            "10615": 488420864.0,
            "10620": 488420864.0,
            "10625": 488420864.0,
            "10630": 488420864.0,
            "10635": 488420864.0,
            "10640": 488420864.0,
            "10645": 488420864.0,
            "10650": 488420864.0,
            "10655": 488420864.0,
            "10660": 488420864.0,
            "10665": 488420864.0,
            "10670": 488420864.0,
            "10675": 488420864.0,
            "10680": 488420864.0,
            "10685": 488420864.0,
            "10690": 488420864.0,
            "10695": 488420864.0,
            "10700": 488420864.0,
            "10705": 488420864.0,
            "10710": 488420864.0,
            "10715": 488420864.0,
            "10720": 488420864.0,
            "10725": 488420864.0,
            "10730": 488420864.0,
            "10735": 488420864.0,
            "10740": 488420864.0,
            "10745": 488420864.0,
            "10750": 488420864.0,
            "10755": 488420864.0,
            "10760": 488420864.0,
            "10765": 488420864.0,
            "10770": 488420864.0,
            "10775": 488420864.0,
            "10780": 488420864.0,
            "10785": 488420864.0,
            "10790": 488420864.0,
            "10795": 488420864.0,
            "10800": 488420864.0,
            "10805": 488420864.0,
            "10810": 488420864.0,
            "10815": 488420864.0,
            "10820": 488420864.0,
            "10825": 488420864.0,
            "10830": 488420864.0,
            "10835": 488420864.0,
            "10840": 488420864.0,
            "10845": 488420864.0,
            "10850": 488420864.0,
            "10855": 488420864.0,
            "10860": 488420864.0,
            "10865": 488420864.0,
            "10870": 488420864.0,
            "10875": 488420864.0,
            "10880": 488420864.0,
            "10885": 488420864.0,
            "10890": 488420864.0,
            "10895": 488420864.0,
            "10900": 488420864.0,
            "10905": 488420864.0,
            "10910": 488420864.0,
            "10915": 488420864.0,
            "10920": 488420864.0,
            "10925": 488420864.0,
            "10930": 488420864.0,
            "10935": 488420864.0,
            "10940": 488420864.0,
            "10945": 488420864.0,
            "10950": 488420864.0,
            "10955": 488420864.0,
            "10960": 488420864.0,
            "10965": 488420864.0,
            "10970": 488420864.0,
            "10975": 488420864.0,
            "10980": 488420864.0,
            "10985": 488420864.0,
            "10990": 488420864.0,
            "10995": 488420864.0,
            "11000": 488420864.0,
            "11005": 488420864.0,
            "11010": 488420864.0,
            "11015": 488420864.0,
            "11020": 488420864.0,
            "11025": 488420864.0,
            "11030": 488420864.0,
            "11035": 488420864.0,
            "11040": 488420864.0,
            "11045": 488420864.0,
            "11050": 488420864.0,
            "11055": 488420864.0,
            "11060": 488420864.0,
            "11065": 488420864.0,
            "11070": 488420864.0,
            "11075": 488420864.0,
            "11080": 488420864.0,
            "11085": 488420864.0,
            "11090": 488420864.0,
            "11095": 488420864.0,
            "11100": 488420864.0,
            "11105": 488420864.0,
            "11110": 488420864.0,
            "11115": 488420864.0,
            "11120": 488420864.0,
            "11125": 488420864.0,
            "11130": 488420864.0,
            "11135": 488420864.0,
            "11140": 488420864.0,
            "11145": 488420864.0,
            "11150": 488420864.0,
            "11155": 488420864.0,
            "11160": 488420864.0,
            "11165": 488420864.0,
            "11170": 488420864.0,
            "11175": 488420864.0,
            "11180": 488420864.0,
            "11185": 488420864.0,
            "11190": 488420864.0,
            "11195": 488420864.0,
            "11200": 488420864.0,
            "11205": 488420864.0,
            "11210": 488420864.0,
            "11215": 488420864.0,
            "11220": 488420864.0,
            "11225": 488420864.0,
            "11230": 488420864.0,
            "11235": 488420864.0,
            "11240": 488420864.0,
            "11245": 488420864.0,
            "11250": 488420864.0,
            "11255": 488420864.0,
            "11260": 488420864.0,
            "11265": 488420864.0,
            "11270": 488420864.0,
            "11275": 488420864.0,
            "11280": 488420864.0,
            "11285": 488420864.0,
            "11290": 488420864.0,
            "11295": 488420864.0,
            "11300": 488420864.0,
            "11305": 488420864.0,
            "11310": 488420864.0,
            "11315": 488420864.0,
            "11320": 488420864.0,
            "11325": 488420864.0,
            "11330": 488420864.0,
            "11335": 488420864.0,
            "11340": 488420864.0,
            "11345": 488420864.0,
            "11350": 488420864.0,
            "11355": 488420864.0,
            "11360": 488420864.0,
            "11365": 488420864.0,
            "11370": 488420864.0,
            "11375": 488420864.0,
            "11380": 488420864.0,
            "11385": 488420864.0,
            "11390": 488420864.0,
            "11395": 488420864.0,
            "11400": 488420864.0,
            "11405": 488420864.0,
            "11410": 488420864.0,
            "11415": 488420864.0,
            "11420": 488420864.0,
            "11425": 488420864.0,
            "11430": 488420864.0,
            "11435": 488420864.0,
            "11440": 488420864.0,
            "11445": 488420864.0,
            "11450": 488420864.0,
            "11455": 488420864.0,
            "11460": 488420864.0,
            "11465": 488420864.0,
            "11470": 488420864.0,
            "11475": 488420864.0,
            "11480": 488420864.0,
            "11485": 488420864.0,
            "11490": 488420864.0,
            "11495": 488420864.0,
            "11500": 488420864.0,
            "11505": 488420864.0,
            "11510": 488420864.0,
            "11515": 488420864.0,
            "11520": 488420864.0,
            "11525": 488420864.0,
            "11530": 488420864.0,
            "11535": 488420864.0,
            "11540": 488420864.0,
            "11545": 488420864.0,
            "11550": 488420864.0,
            "11555": 488420864.0,
            "11560": 488420864.0,
            "11565": 488420864.0,
            "11570": 488420864.0,
            "11575": 488420864.0,
            "11580": 488420864.0,
            "11585": 488420864.0,
            "11590": 488420864.0,
            "11595": 488420864.0,
            "11600": 488420864.0,
            "11605": 488420864.0,
            "11610": 488420864.0,
            "11615": 488420864.0,
            "11620": 488420864.0,
            "11625": 488420864.0,
            "11630": 488420864.0,
            "11635": 488420864.0,
            "11640": 488420864.0,
            "11645": 488420864.0,
            "11650": 488420864.0,
            "11655": 488420864.0,
            "11660": 488420864.0,
            "11665": 488420864.0,
            "11670": 488420864.0,
            "11675": 488420864.0,
            "11680": 488420864.0,
            "11685": 488420864.0,
            "11690": 488420864.0,
            "11695": 488420864.0,
            "11700": 488420864.0,
            "11705": 488420864.0,
            "11710": 488420864.0,
            "11715": 488420864.0,
            "11720": 488420864.0,
            "11725": 488420864.0,
            "11730": 488420864.0,
            "11735": 488420864.0,
            "11740": 488420864.0,
            "11745": 488420864.0,
            "11750": 488420864.0,
            "11755": 488420864.0,
            "11760": 488420864.0,
            "11765": 488420864.0,
            "11770": 488420864.0,
            "11775": 488420864.0,
            "11780": 488420864.0,
            "11785": 488420864.0,
            "11790": 488420864.0,
            "11795": 488420864.0,
            "11800": 488420864.0,
            "11805": 488420864.0,
            "11810": 488420864.0,
            "11815": 488420864.0,
            "11820": 488420864.0,
            "11825": 488420864.0,
            "11830": 488420864.0,
            "11835": 488420864.0,
            "11840": 488420864.0,
            "11845": 488420864.0,
            "11850": 488420864.0,
            "11855": 488420864.0,
            "11860": 488420864.0,
            "11865": 488420864.0,
            "11870": 488420864.0,
            "11875": 488420864.0,
            "11880": 488420864.0,
            "11885": 488420864.0,
            "11890": 488420864.0,
            "11895": 488420864.0,
            "11900": 488420864.0,
            "11905": 488420864.0,
            "11910": 488420864.0,
            "11915": 488420864.0,
            "11920": 488420864.0,
            "11925": 488420864.0,
            "11930": 488420864.0,
            "11935": 488420864.0,
            "11940": 488420864.0,
            "11945": 488420864.0,
            "11950": 488420864.0,
            "11955": 488420864.0,
            "11960": 488420864.0,
            "11965": 488420864.0,
            "11970": 488420864.0,
            "11975": 488420864.0,
            "11980": 488420864.0,
            "11985": 488420864.0,
            "11990": 488420864.0,
            "11995": 488420864.0,
            "12000": 488420864.0,
            "12005": 488420864.0,
            "12010": 488420864.0,
            "12015": 488420864.0,
            "12020": 488420864.0,
            "12025": 488420864.0,
            "12030": 488420864.0,
            "12035": 488420864.0,
            "12040": 488420864.0,
            "12045": 488420864.0,
            "12050": 488420864.0,
            "12055": 488420864.0,
            "12060": 488420864.0,
            "12065": 488420864.0,
            "12070": 488420864.0,
            "12075": 488420864.0,
            "12080": 488420864.0,
            "12085": 488420864.0,
            "12090": 488420864.0,
            "12095": 488420864.0,
            "12100": 488420864.0,
            "12105": 488420864.0,
            "12110": 488420864.0,
            "12115": 488420864.0,
            "12120": 488420864.0,
            "12125": 488420864.0,
            "12130": 488420864.0,
            "12135": 488420864.0,
            "12140": 488420864.0,
            "12145": 488420864.0,
            "12150": 488420864.0,
            "12155": 488420864.0,
            "12160": 488420864.0,
            "12165": 488420864.0,
            "12170": 488420864.0,
            "12175": 488420864.0,
            "12180": 488420864.0,
            "12185": 488420864.0,
            "12190": 488420864.0,
            "12195": 488420864.0,
            "12200": 488420864.0,
            "12205": 488420864.0,
            "12210": 488420864.0,
            "12215": 488420864.0,
            "12220": 488420864.0,
            "12225": 488420864.0,
            "12230": 488420864.0,
            "12235": 488420864.0,
            "12240": 488420864.0,
            "12245": 488420864.0,
            "12250": 488420864.0,
            "12255": 488420864.0,
            "12260": 488420864.0,
            "12265": 488420864.0,
            "12270": 488420864.0,
            "12275": 488420864.0,
            "12280": 488420864.0,
            "12285": 488420864.0,
            "12290": 488420864.0,
            "12295": 488420864.0,
            "12300": 488420864.0,
            "12305": 488420864.0,
            "12310": 488420864.0,
            "12315": 488420864.0,
            "12320": 488420864.0,
            "12325": 488420864.0,
            "12330": 488420864.0,
            "12335": 488420864.0,
            "12340": 488420864.0,
            "12345": 488420864.0,
            "12350": 488420864.0,
            "12355": 488420864.0,
            "12360": 488420864.0,
            "12365": 488420864.0,
            "12370": 488420864.0,
            "12375": 488420864.0,
            "12380": 488420864.0,
            "12385": 488420864.0,
            "12390": 488420864.0,
            "12395": 488420864.0,
            "12400": 488420864.0,
            "12405": 488420864.0,
            "12410": 488420864.0,
            "12415": 488420864.0,
            "12420": 488420864.0,
            "12425": 488420864.0,
            "12430": 488420864.0,
            "12435": 488420864.0,
            "12440": 488420864.0,
            "12445": 488420864.0,
            "12450": 488420864.0,
            "12455": 488420864.0,
            "12460": 488420864.0,
            "12465": 488420864.0,
            "12470": 488420864.0,
            "12475": 488420864.0,
            "12480": 488420864.0,
            "12485": 488420864.0,
            "12490": 488420864.0,
            "12495": 488420864.0,
            "12500": 488420864.0,
            "12505": 488420864.0,
            "12510": 488420864.0,
            "12515": 488420864.0,
            "12520": 488420864.0,
            "12525": 488420864.0,
            "12530": 488420864.0,
            "12535": 488420864.0,
            "12540": 488420864.0,
            "12545": 488420864.0,
            "12550": 488420864.0,
            "12555": 488420864.0,
            "12560": 488420864.0,
            "12565": 488420864.0,
            "12570": 488420864.0,
            "12575": 488420864.0,
            "12580": 488420864.0,
            "12585": 488420864.0,
            "12590": 488420864.0,
            "12595": 488420864.0,
            "12600": 488420864.0,
            "12605": 488420864.0,
            "12610": 488420864.0,
            "12615": 488420864.0,
            "12620": 488420864.0,
            "12625": 488420864.0,
            "12630": 488420864.0,
            "12635": 488420864.0,
            "12640": 488420864.0,
            "12645": 488420864.0,
            "12650": 488420864.0,
            "12655": 488420864.0,
            "12660": 488420864.0,
            "12665": 488420864.0,
            "12670": 488420864.0,
            "12675": 488420864.0,
            "12680": 488420864.0,
            "12685": 488420864.0,
            "12690": 488420864.0,
            "12695": 488420864.0,
            "12700": 488420864.0,
            "12705": 488420864.0,
            "12710": 488420864.0,
            "12715": 488420864.0,
            "12720": 488420864.0,
            "12725": 488420864.0,
            "12730": 488420864.0,
            "12735": 488420864.0,
            "12740": 488420864.0,
            "12745": 488420864.0,
            "12750": 488420864.0,
            "12755": 488420864.0,
            "12760": 488420864.0,
            "12765": 488420864.0,
            "12770": 488420864.0,
            "12775": 488420864.0,
            "12780": 488420864.0,
            "12785": 488420864.0,
            "12790": 488420864.0,
            "12795": 488420864.0,
            "12800": 488420864.0,
            "12805": 488420864.0,
            "12810": 488420864.0,
            "12815": 488420864.0,
            "12820": 488420864.0,
            "12825": 488420864.0,
            "12830": 488420864.0,
            "12835": 488420864.0,
            "12840": 488420864.0,
            "12845": 488420864.0,
            "12850": 488420864.0,
            "12855": 488420864.0,
            "12860": 488420864.0,
            "12865": 488420864.0,
            "12870": 488420864.0,
            "12875": 488420864.0,
            "12880": 488420864.0,
            "12885": 488420864.0,
            "12890": 488420864.0,
            "12895": 488420864.0,
            "12900": 488420864.0,
            "12905": 488420864.0,
            "12910": 488420864.0,
            "12915": 488420864.0,
            "12920": 488420864.0,
            "12925": 488420864.0,
            "12930": 488420864.0,
            "12935": 488420864.0,
            "12940": 488420864.0,
            "12945": 488420864.0,
            "12950": 488420864.0,
            "12955": 488420864.0,
            "12960": 488420864.0,
            "12965": 488420864.0,
            "12970": 488420864.0,
            "12975": 488420864.0,
            "12980": 488420864.0,
            "12985": 488420864.0,
            "12990": 488420864.0,
            "12995": 488420864.0,
            "13000": 488420864.0,
            "13005": 488420864.0,
            "13010": 488420864.0,
            "13015": 488420864.0,
            "13020": 488420864.0,
            "13025": 488420864.0,
            "13030": 488420864.0,
            "13035": 488420864.0,
            "13040": 488420864.0,
            "13045": 488420864.0,
            "13050": 488420864.0,
            "13055": 488420864.0,
            "13060": 488420864.0,
            "13065": 488420864.0,
            "13070": 488420864.0,
            "13075": 488420864.0,
            "13080": 488420864.0,
            "13085": 488420864.0,
            "13090": 488420864.0,
            "13095": 488420864.0,
            "13100": 488420864.0,
            "13105": 488420864.0,
            "13110": 488420864.0,
            "13115": 488420864.0,
            "13120": 488420864.0,
            "13125": 488420864.0,
            "13130": 488420864.0,
            "13135": 488420864.0,
            "13140": 488420864.0,
            "13145": 488420864.0,
            "13150": 488420864.0,
            "13155": 488420864.0,
            "13160": 488420864.0,
            "13165": 488420864.0,
            "13170": 488420864.0,
            "13175": 488420864.0,
            "13180": 488420864.0,
            "13185": 488420864.0,
            "13190": 488420864.0,
            "13195": 488420864.0,
            "13200": 488420864.0,
            "13205": 488420864.0,
            "13210": 488420864.0,
            "13215": 488420864.0,
            "13220": 488420864.0,
            "13225": 488420864.0,
            "13230": 488420864.0,
            "13235": 488420864.0,
            "13240": 488420864.0,
            "13245": 488420864.0,
            "13250": 488420864.0,
            "13255": 488420864.0,
            "13260": 488420864.0,
            "13265": 488420864.0,
            "13270": 488420864.0,
            "13275": 488420864.0,
            "13280": 488420864.0,
            "13285": 488420864.0,
            "13290": 488420864.0,
            "13295": 488420864.0,
            "13300": 488420864.0,
            "13305": 488420864.0,
            "13310": 488420864.0,
            "13315": 488420864.0,
            "13320": 488420864.0,
            "13325": 488420864.0,
            "13330": 488420864.0,
            "13335": 488420864.0,
            "13340": 488420864.0,
            "13345": 488420864.0,
            "13350": 488420864.0,
            "13355": 488420864.0,
            "13360": 488420864.0,
            "13365": 488420864.0,
            "13370": 488420864.0,
            "13375": 488420864.0,
            "13380": 488420864.0,
            "13385": 488420864.0,
            "13390": 488420864.0,
            "13395": 488420864.0,
            "13400": 488420864.0,
            "13405": 488420864.0,
            "13410": 488420864.0,
            "13415": 488420864.0,
            "13420": 488420864.0,
            "13425": 488420864.0,
            "13430": 488420864.0,
            "13435": 488420864.0,
            "13440": 488420864.0,
            "13445": 488420864.0,
            "13450": 488420864.0,
            "13455": 488420864.0,
            "13460": 488420864.0,
            "13465": 488420864.0,
            "13470": 488420864.0,
            "13475": 488420864.0,
            "13480": 488420864.0,
            "13485": 488420864.0,
            "13490": 488420864.0,
            "13495": 488420864.0,
            "13500": 488420864.0,
            "13505": 488420864.0,
            "13510": 488420864.0,
            "13515": 488420864.0,
            "13520": 488420864.0,
            "13525": 488420864.0,
            "13530": 488420864.0,
            "13535": 488420864.0,
            "13540": 488420864.0,
            "13545": 488420864.0,
            "13550": 488420864.0,
            "13555": 488420864.0,
            "13560": 488420864.0,
            "13565": 488420864.0,
            "13570": 488420864.0,
            "13575": 488420864.0,
            "13580": 488420864.0,
            "13585": 488420864.0,
            "13590": 488420864.0,
            "13595": 488420864.0,
            "13600": 488420864.0,
            "13605": 488420864.0,
            "13610": 488420864.0,
            "13615": 488420864.0,
            "13620": 488420864.0,
            "13625": 488420864.0,
            "13630": 488420864.0,
            "13635": 488420864.0,
            "13640": 488420864.0,
            "13645": 488420864.0,
            "13650": 488420864.0,
            "13655": 488420864.0,
            "13660": 488420864.0,
            "13665": 488420864.0,
            "13670": 488420864.0,
            "13675": 488420864.0,
            "13680": 488420864.0,
            "13685": 488420864.0,
            "13690": 488420864.0,
            "13695": 488420864.0,
            "13700": 488420864.0,
            "13705": 488420864.0,
            "13710": 488420864.0,
            "13715": 488420864.0,
            "13720": 488420864.0,
            "13725": 488420864.0,
            "13730": 488420864.0,
            "13735": 488420864.0,
            "13740": 488420864.0,
            "13745": 488420864.0,
            "13750": 488420864.0,
            "13755": 488420864.0,
            "13760": 488420864.0,
            "13765": 488420864.0,
            "13770": 488420864.0,
            "13775": 488420864.0,
            "13780": 488420864.0,
            "13785": 488420864.0,
            "13790": 488420864.0,
            "13795": 488420864.0,
            "13800": 488420864.0,
            "13805": 488420864.0,
            "13810": 488420864.0,
            "13815": 488420864.0,
            "13820": 488420864.0,
            "13825": 488420864.0,
            "13830": 488420864.0,
            "13835": 488420864.0,
            "13840": 488420864.0,
            "13845": 488420864.0,
            "13850": 488420864.0,
            "13855": 488420864.0,
            "13860": 488420864.0,
            "13865": 488420864.0,
            "13870": 488420864.0,
            "13875": 488420864.0,
            "13880": 488420864.0,
            "13885": 488420864.0,
            "13890": 488420864.0,
            "13895": 488420864.0,
            "13900": 488420864.0,
            "13905": 488420864.0,
            "13910": 488420864.0,
            "13915": 488420864.0,
            "13920": 488420864.0,
            "13925": 488420864.0,
            "13930": 488420864.0,
            "13935": 488420864.0,
            "13940": 488420864.0,
            "13945": 488420864.0,
            "13950": 488420864.0,
            "13955": 488420864.0,
            "13960": 488420864.0,
            "13965": 488420864.0,
            "13970": 488420864.0,
            "13975": 488420864.0,
            "13980": 488420864.0,
            "13985": 488420864.0,
            "13990": 488420864.0,
            "13995": 488420864.0,
            "14000": 488420864.0,
            "14005": 488420864.0,
            "14010": 488420864.0,
            "14015": 488420864.0,
            "14020": 488420864.0,
            "14025": 488420864.0,
            "14030": 488420864.0,
            "14035": 488420864.0,
            "14040": 488420864.0,
            "14045": 488420864.0,
            "14050": 488420864.0,
            "14055": 488420864.0,
            "14060": 488420864.0,
            "14065": 488420864.0,
            "14070": 488420864.0,
            "14075": 488420864.0,
            "14080": 488420864.0,
            "14085": 488420864.0,
            "14090": 488420864.0,
            "14095": 488420864.0,
            "14100": 488420864.0,
            "14105": 488420864.0,
            "14110": 488420864.0,
            "14115": 488420864.0,
            "14120": 488420864.0,
            "14125": 488420864.0,
            "14130": 488420864.0,
            "14135": 488420864.0,
            "14140": 488420864.0,
            "14145": 488420864.0,
            "14150": 488420864.0,
            "14155": 488420864.0,
            "14160": 488420864.0,
            "14165": 488420864.0,
            "14170": 488420864.0,
            "14175": 488420864.0,
            "14180": 488420864.0,
            "14185": 488420864.0,
            "14190": 488420864.0,
            "14195": 488420864.0,
            "14200": 488420864.0,
            "14205": 488420864.0,
            "14210": 488420864.0,
            "14215": 488420864.0,
            "14220": 488420864.0,
            "14225": 488420864.0,
            "14230": 488420864.0,
            "14235": 488420864.0,
            "14240": 488420864.0,
            "14245": 488420864.0,
            "14250": 488420864.0,
            "14255": 488420864.0,
            "14260": 488420864.0,
            "14265": 488420864.0,
            "14270": 488420864.0,
            "14275": 488420864.0,
            "14280": 488420864.0,
            "14285": 488420864.0,
            "14290": 488420864.0,
            "14295": 488420864.0,
            "14300": 488420864.0,
            "14305": 488420864.0,
            "14310": 488420864.0,
            "14315": 488420864.0,
            "14320": 488420864.0,
            "14325": 488420864.0,
            "14330": 488420864.0,
            "14335": 488420864.0,
            "14340": 488420864.0,
            "14345": 488420864.0,
            "14350": 488420864.0,
            "14355": 488420864.0,
            "14360": 488420864.0,
            "14365": 488420864.0,
            "14370": 488420864.0,
            "14375": 488420864.0,
            "14380": 488420864.0,
            "14385": 488420864.0,
            "14390": 488420864.0,
            "14395": 488420864.0,
            "14400": 488420864.0,
            "14405": 488420864.0,
            "14410": 488420864.0,
            "14415": 488420864.0,
            "14420": 488420864.0,
            "14425": 488420864.0,
            "14430": 488420864.0,
            "14435": 488420864.0,
            "14440": 488420864.0,
            "14445": 488420864.0,
            "14450": 488420864.0,
            "14455": 488420864.0,
            "14460": 488420864.0,
            "14465": 488420864.0,
            "14470": 488420864.0,
            "14475": 488420864.0,
            "14480": 488420864.0,
            "14485": 488420864.0,
            "14490": 488420864.0,
            "14495": 488420864.0,
            "14500": 488420864.0,
            "14505": 488420864.0,
            "14510": 488420864.0,
            "14515": 488420864.0,
            "14520": 488420864.0,
            "14525": 488420864.0,
            "14530": 488420864.0,
            "14535": 488420864.0,
            "14540": 488420864.0,
            "14545": 488420864.0,
            "14550": 488420864.0,
            "14555": 488420864.0,
            "14560": 488420864.0,
            "14565": 488420864.0,
            "14570": 488420864.0,
            "14575": 488420864.0,
            "14580": 488420864.0,
            "14585": 488420864.0,
            "14590": 488420864.0,
            "14595": 488420864.0,
            "14600": 488420864.0,
            "14605": 488420864.0,
            "14610": 488420864.0,
            "14615": 488420864.0,
            "14620": 488420864.0,
            "14625": 488420864.0,
            "14630": 488420864.0,
            "14635": 488420864.0,
            "14640": 488420864.0,
            "14645": 488420864.0,
            "14650": 488420864.0,
            "14655": 488420864.0,
            "14660": 488420864.0,
            "14665": 488420864.0,
            "14670": 488420864.0,
            "14675": 488420864.0,
            "14680": 488420864.0,
            "14685": 488420864.0,
            "14690": 488420864.0,
            "14695": 488420864.0,
            "14700": 488420864.0,
            "14705": 488420864.0,
            "14710": 488420864.0,
            "14715": 488420864.0,
            "14720": 488420864.0,
            "14725": 488420864.0,
            "14730": 488420864.0,
            "14735": 488420864.0,
            "14740": 488420864.0,
            "14745": 488420864.0,
            "14750": 488420864.0,
            "14755": 488420864.0,
            "14760": 488420864.0,
            "14765": 488420864.0,
            "14770": 488420864.0,
            "14775": 488420864.0,
            "14780": 488420864.0,
            "14785": 488420864.0,
            "14790": 488420864.0,
            "14795": 488420864.0,
            "14800": 488420864.0,
            "14805": 488420864.0,
            "14810": 488420864.0,
            "14815": 488420864.0,
            "14820": 488420864.0,
            "14825": 488420864.0,
            "14830": 488420864.0,
            "14835": 488420864.0,
            "14840": 488420864.0,
            "14845": 488420864.0,
            "14850": 488420864.0,
            "14855": 488420864.0,
            "14860": 488420864.0,
            "14865": 488420864.0,
            "14870": 488420864.0,
            "14875": 488420864.0,
            "14880": 488420864.0,
            "14885": 488420864.0,
            "14890": 488420864.0,
            "14895": 488420864.0,
            "14900": 488420864.0,
            "14905": 488420864.0,
            "14910": 488420864.0,
            "14915": 488420864.0,
            "14920": 488420864.0,
            "14925": 488420864.0,
            "14930": 488420864.0,
            "14935": 488420864.0,
            "14940": 488420864.0,
            "14945": 488420864.0,
            "14950": 488420864.0,
            "14955": 488420864.0,
            "14960": 488420864.0,
            "14965": 488420864.0,
            "14970": 488420864.0,
            "14975": 488420864.0,
            "14980": 488420864.0,
            "14985": 488420864.0,
            "14990": 488420864.0,
            "14995": 488420864.0,
            "15000": 488420864.0,
            "15005": 488420864.0,
            "15010": 488420864.0,
            "15015": 488420864.0,
            "15020": 488420864.0,
            "15025": 488420864.0,
            "15030": 488420864.0,
            "15035": 488420864.0,
            "15040": 488420864.0,
            "15045": 488420864.0,
            "15050": 488420864.0,
            "15055": 488420864.0,
            "15060": 488420864.0,
            "15065": 488420864.0,
            "15070": 488420864.0,
            "15075": 488420864.0,
            "15080": 488420864.0,
            "15085": 488420864.0,
            "15090": 488420864.0,
            "15095": 488420864.0,
            "15100": 488420864.0,
            "15105": 488420864.0,
            "15110": 488420864.0,
            "15115": 488420864.0,
            "15120": 488420864.0,
            "15125": 488420864.0,
            "15130": 488420864.0,
            "15135": 488420864.0,
            "15140": 488420864.0,
            "15145": 488420864.0,
            "15150": 488420864.0,
            "15155": 488420864.0,
            "15160": 488420864.0,
            "15165": 488420864.0,
            "15170": 488420864.0,
            "15175": 488420864.0,
            "15180": 488420864.0,
            "15185": 488420864.0,
            "15190": 488420864.0,
            "15195": 488420864.0,
            "15200": 488420864.0,
            "15205": 488420864.0,
            "15210": 488420864.0,
            "15215": 488420864.0,
            "15220": 488420864.0,
            "15225": 488420864.0,
            "15230": 488420864.0,
            "15235": 488420864.0,
            "15240": 488420864.0,
            "15245": 488420864.0,
            "15250": 488420864.0,
            "15255": 488420864.0,
            "15260": 488420864.0,
            "15265": 488420864.0,
            "15270": 488420864.0,
            "15275": 488420864.0,
            "15280": 488420864.0,
            "15285": 488420864.0,
            "15290": 488420864.0,
            "15295": 488420864.0,
            "15300": 488420864.0,
            "15305": 488420864.0,
            "15310": 488420864.0,
            "15315": 488420864.0,
            "15320": 488420864.0,
            "15325": 488420864.0,
            "15330": 488420864.0,
            "15335": 488420864.0,
            "15340": 488420864.0,
            "15345": 488420864.0,
            "15350": 488420864.0,
            "15355": 488420864.0,
            "15360": 488420864.0,
            "15365": 488420864.0,
            "15370": 488420864.0,
            "15375": 488420864.0,
            "15380": 488420864.0,
            "15385": 488420864.0,
            "15390": 488420864.0,
            "15395": 488420864.0,
            "15400": 488420864.0,
            "15405": 488420864.0,
            "15410": 488420864.0,
            "15415": 488420864.0,
            "15420": 488420864.0,
            "15425": 488420864.0,
            "15430": 488420864.0,
            "15435": 488420864.0,
            "15440": 488420864.0,
            "15445": 488420864.0,
            "15450": 488420864.0,
            "15455": 488420864.0,
            "15460": 488420864.0,
            "15465": 488420864.0,
            "15470": 488420864.0,
            "15475": 488420864.0,
            "15480": 488420864.0,
            "15485": 488420864.0,
            "15490": 488420864.0,
            "15495": 488420864.0,
            "15500": 488420864.0,
            "15505": 488420864.0,
            "15510": 488420864.0,
            "15515": 488420864.0,
            "15520": 488420864.0,
            "15525": 488420864.0,
            "15530": 488420864.0,
            "15535": 488420864.0,
            "15540": 488420864.0,
            "15545": 488420864.0,
            "15550": 488420864.0,
            "15555": 488420864.0,
            "15560": 488420864.0,
            "15565": 488420864.0,
            "15570": 488420864.0,
            "15575": 488420864.0,
            "15580": 488420864.0,
            "15585": 488420864.0,
            "15590": 488420864.0,
            "15595": 488420864.0,
            "15600": 488420864.0,
            "15605": 488420864.0,
            "15610": 488420864.0,
            "15615": 488420864.0,
            "15620": 488420864.0,
            "15625": 488420864.0,
            "15630": 488420864.0,
            "15635": 488420864.0,
            "15640": 488420864.0,
            "15645": 488420864.0,
            "15650": 488420864.0,
            "15655": 488420864.0,
            "15660": 488420864.0,
            "15665": 488420864.0,
            "15670": 488420864.0,
            "15675": 488420864.0,
            "15680": 488420864.0,
            "15685": 488420864.0,
            "15690": 488420864.0,
            "15695": 488420864.0,
            "15700": 488420864.0,
            "15705": 488420864.0,
            "15710": 488420864.0,
            "15715": 488420864.0,
            "15720": 488420864.0,
            "15725": 488420864.0,
            "15730": 488420864.0,
            "15735": 488420864.0,
            "15740": 488420864.0,
            "15745": 488420864.0,
            "15750": 488420864.0,
            "15755": 488420864.0,
            "15760": 488420864.0,
            "15765": 488420864.0,
            "15770": 488420864.0,
            "15775": 488420864.0,
            "15780": 488420864.0,
            "15785": 488420864.0,
            "15790": 488420864.0,
            "15795": 488420864.0,
            "15800": 488420864.0,
            "15805": 488420864.0,
            "15810": 488420864.0,
            "15815": 488420864.0,
            "15820": 488420864.0,
            "15825": 488420864.0,
            "15830": 488420864.0,
            "15835": 488420864.0,
            "15840": 488420864.0,
            "15845": 488420864.0,
            "15850": 488420864.0,
            "15855": 488420864.0,
            "15860": 488420864.0,
            "15865": 488420864.0,
            "15870": 488420864.0,
            "15875": 488420864.0,
            "15880": 488420864.0,
            "15885": 488420864.0,
            "15890": 488420864.0,
            "15895": 488420864.0,
            "15900": 488420864.0,
            "15905": 488420864.0,
            "15910": 488420864.0,
            "15915": 488420864.0,
            "15920": 488420864.0,
            "15925": 488420864.0,
            "15930": 488420864.0,
            "15935": 488420864.0,
            "15940": 488420864.0,
            "15945": 488420864.0,
            "15950": 488420864.0,
            "15955": 488420864.0,
            "15960": 488420864.0,
            "15965": 488420864.0,
            "15970": 488420864.0,
            "15975": 488420864.0,
            "15980": 488420864.0,
            "15985": 488420864.0,
            "15990": 488420864.0,
            "15995": 488420864.0,
            "16000": 488420864.0,
            "16005": 488420864.0,
            "16010": 488420864.0,
            "16015": 488420864.0,
            "16020": 488420864.0,
            "16025": 488420864.0,
            "16030": 488420864.0,
            "16035": 488420864.0,
            "16040": 488420864.0,
            "16045": 488420864.0,
            "16050": 488420864.0,
            "16055": 488420864.0,
            "16060": 488420864.0,
            "16065": 488420864.0,
            "16070": 488420864.0,
            "16075": 488420864.0,
            "16080": 488420864.0,
            "16085": 488420864.0,
            "16090": 488420864.0,
            "16095": 488420864.0,
            "16100": 488420864.0,
            "16105": 488420864.0,
            "16110": 488420864.0,
            "16115": 488420864.0,
            "16120": 488420864.0,
            "16125": 488420864.0,
            "16130": 488420864.0,
            "16135": 488420864.0,
            "16140": 488420864.0,
            "16145": 488420864.0,
            "16150": 488420864.0,
            "16155": 488420864.0,
            "16160": 488420864.0,
            "16165": 488420864.0,
            "16170": 488420864.0,
            "16175": 488420864.0,
            "16180": 488420864.0,
            "16185": 488420864.0,
            "16190": 488420864.0,
            "16195": 488420864.0,
            "16200": 488420864.0,
            "16205": 488420864.0,
            "16210": 488420864.0,
            "16215": 488420864.0,
            "16220": 488420864.0,
            "16225": 488420864.0,
            "16230": 488420864.0,
            "16235": 488420864.0,
            "16240": 488420864.0,
            "16245": 488420864.0,
            "16250": 488420864.0,
            "16255": 488420864.0,
            "16260": 488420864.0,
            "16265": 488420864.0,
            "16270": 488420864.0,
            "16275": 488420864.0,
            "16280": 488420864.0,
            "16285": 488420864.0,
            "16290": 488420864.0,
            "16295": 488420864.0,
            "16300": 488420864.0,
            "16305": 488420864.0,
            "16310": 488420864.0,
            "16315": 488420864.0,
            "16320": 488420864.0,
            "16325": 488420864.0,
            "16330": 488420864.0,
            "16335": 488420864.0,
            "16340": 488420864.0,
            "16345": 488420864.0,
            "16350": 488420864.0,
            "16355": 488420864.0,
            "16360": 488420864.0,
            "16365": 488420864.0,
            "16370": 488420864.0,
            "16375": 488420864.0,
            "16380": 488420864.0,
            "16385": 488420864.0,
            "16390": 488420864.0,
            "16395": 488420864.0,
            "16400": 488420864.0,
            "16405": 488420864.0,
            "16410": 488420864.0,
            "16415": 488420864.0,
            "16420": 488420864.0,
            "16425": 488420864.0,
            "16430": 488420864.0,
            "16435": 488420864.0,
            "16440": 488420864.0,
            "16445": 488420864.0,
            "16450": 488420864.0,
            "16455": 488420864.0,
            "16460": 488420864.0,
            "16465": 488420864.0,
            "16470": 488420864.0,
            "16475": 488420864.0,
            "16480": 488420864.0,
            "16485": 488420864.0,
            "16490": 488420864.0,
            "16495": 488420864.0,
            "16500": 488420864.0,
            "16505": 488420864.0,
            "16510": 488420864.0,
            "16515": 488420864.0,
            "16520": 488420864.0,
            "16525": 488420864.0,
            "16530": 488420864.0,
            "16535": 488420864.0,
            "16540": 488420864.0,
            "16545": 488420864.0,
            "16550": 488420864.0,
            "16555": 488420864.0,
            "16560": 488420864.0,
            "16565": 488420864.0,
            "16570": 488420864.0,
            "16575": 488420864.0,
            "16580": 488420864.0,
            "16585": 488420864.0,
            "16590": 488420864.0,
            "16595": 488420864.0,
            "16600": 488420864.0,
            "16605": 488420864.0,
            "16610": 488420864.0,
            "16615": 488420864.0,
            "16620": 488420864.0,
            "16625": 488420864.0,
            "16630": 488420864.0,
            "16635": 488420864.0,
            "16640": 488420864.0,
            "16645": 488420864.0,
            "16650": 488420864.0,
            "16655": 488420864.0,
            "16660": 488420864.0,
            "16665": 488420864.0,
            "16670": 488420864.0,
            "16675": 488420864.0,
            "16680": 488420864.0,
            "16685": 488420864.0,
            "16690": 488420864.0,
            "16695": 488420864.0,
            "16700": 488420864.0,
            "16705": 488420864.0,
            "16710": 488420864.0,
            "16715": 488420864.0,
            "16720": 488420864.0,
            "16725": 488420864.0,
            "16730": 488420864.0,
            "16735": 488420864.0,
            "16740": 488420864.0,
            "16745": 488420864.0,
            "16750": 488420864.0,
            "16755": 488420864.0,
            "16760": 488420864.0,
            "16765": 488420864.0,
            "16770": 488420864.0,
            "16775": 488420864.0,
            "16780": 488420864.0,
            "16785": 488420864.0,
            "16790": 488420864.0,
            "16795": 488420864.0,
            "16800": 488420864.0,
            "16805": 488420864.0,
            "16810": 488420864.0,
            "16815": 488420864.0,
            "16820": 488420864.0,
            "16825": 488420864.0,
            "16830": 488420864.0,
            "16835": 488420864.0,
            "16840": 488420864.0,
            "16845": 488420864.0,
            "16850": 488420864.0,
            "16855": 488420864.0,
            "16860": 488420864.0,
            "16865": 488420864.0,
            "16870": 488420864.0,
            "16875": 488420864.0,
            "16880": 488420864.0,
            "16885": 488420864.0,
            "16890": 488420864.0,
            "16895": 488420864.0,
            "16900": 488420864.0,
            "16905": 488420864.0,
            "16910": 488420864.0,
            "16915": 488420864.0,
            "16920": 488420864.0,
            "16925": 488420864.0,
            "16930": 488420864.0,
            "16935": 488420864.0,
            "16940": 488420864.0,
            "16945": 488420864.0,
            "16950": 488420864.0,
            "16955": 488420864.0,
            "16960": 488420864.0,
            "16965": 488420864.0,
            "16970": 488420864.0,
            "16975": 488420864.0,
            "16980": 488420864.0,
            "16985": 488420864.0,
            "16990": 488420864.0,
            "16995": 488420864.0,
            "17000": 488420864.0,
            "17005": 488420864.0,
            "17010": 488420864.0,
            "17015": 488420864.0,
            "17020": 488420864.0,
            "17025": 488420864.0,
            "17030": 488420864.0,
            "17035": 488420864.0,
            "17040": 488420864.0,
            "17045": 488420864.0,
            "17050": 488420864.0,
            "17055": 488420864.0,
            "17060": 488420864.0,
            "17065": 488420864.0,
            "17070": 488420864.0,
            "17075": 488420864.0,
            "17080": 488420864.0,
            "17085": 488420864.0,
            "17090": 488420864.0,
            "17095": 488420864.0,
            "17100": 488420864.0,
            "17105": 488420864.0,
            "17110": 488420864.0,
            "17115": 488420864.0,
            "17120": 488420864.0,
            "17125": 488420864.0,
            "17130": 488420864.0,
            "17135": 488420864.0,
            "17140": 488420864.0,
            "17145": 488420864.0,
            "17150": 488420864.0,
            "17155": 488420864.0,
            "17160": 488420864.0,
            "17165": 488420864.0,
            "17170": 488420864.0,
            "17175": 488420864.0,
            "17180": 488420864.0,
            "17185": 488420864.0,
            "17190": 488420864.0,
            "17195": 488420864.0,
            "17200": 488420864.0,
            "17205": 488420864.0,
            "17210": 488420864.0,
            "17215": 488420864.0,
            "17220": 488420864.0,
            "17225": 488420864.0,
            "17230": 488420864.0,
            "17235": 488420864.0,
            "17240": 488420864.0,
            "17245": 488420864.0,
            "17250": 488420864.0,
            "17255": 488420864.0,
            "17260": 488420864.0,
            "17265": 488420864.0,
            "17270": 488420864.0,
            "17275": 488420864.0,
            "17280": 488420864.0,
            "17285": 488420864.0,
            "17290": 488420864.0,
            "17295": 488420864.0,
            "17300": 488420864.0,
            "17305": 488420864.0,
            "17310": 488420864.0,
            "17315": 488420864.0,
            "17320": 488420864.0,
            "17325": 488420864.0,
            "17330": 488420864.0,
            "17335": 488420864.0,
            "17340": 488420864.0,
            "17345": 488420864.0,
            "17350": 488420864.0,
            "17355": 488420864.0,
            "17360": 488420864.0,
            "17365": 488420864.0,
            "17370": 488420864.0,
            "17375": 488420864.0,
            "17380": 488420864.0,
            "17385": 488420864.0,
            "17390": 488420864.0,
            "17395": 488420864.0,
            "17400": 488420864.0,
            "17405": 488420864.0,
            "17410": 488420864.0,
            "17415": 488420864.0,
            "17420": 488420864.0,
            "17425": 488420864.0,
            "17430": 488420864.0,
            "17435": 488420864.0,
            "17440": 488420864.0,
            "17445": 488420864.0,
            "17450": 488420864.0,
            "17455": 488420864.0,
            "17460": 488420864.0,
            "17465": 488420864.0,
            "17470": 488420864.0,
            "17475": 488420864.0,
            "17480": 488420864.0,
            "17485": 488420864.0,
            "17490": 488420864.0,
            "17495": 488420864.0,
            "17500": 488420864.0,
            "17505": 488420864.0,
            "17510": 488420864.0,
            "17515": 488420864.0,
            "17520": 488420864.0,
            "17525": 488420864.0,
            "17530": 488420864.0,
            "17535": 488420864.0,
            "17540": 488420864.0,
            "17545": 488420864.0,
            "17550": 488420864.0,
            "17555": 488420864.0,
            "17560": 488420864.0,
            "17565": 488420864.0,
            "17570": 488420864.0,
            "17575": 488420864.0,
            "17580": 488420864.0,
            "17585": 488420864.0,
            "17590": 488420864.0,
            "17595": 488420864.0,
            "17600": 488420864.0,
            "17605": 488420864.0,
            "17610": 488420864.0,
            "17615": 488420864.0,
            "17620": 488420864.0,
            "17625": 488420864.0,
            "17630": 488420864.0,
            "17635": 488420864.0,
            "17640": 488420864.0,
            "17645": 488420864.0,
            "17650": 488420864.0,
            "17655": 488420864.0,
            "17660": 488420864.0,
            "17665": 488420864.0,
            "17670": 488420864.0,
            "17675": 488420864.0,
            "17680": 488420864.0,
            "17685": 488420864.0,
            "17690": 488420864.0,
            "17695": 488420864.0,
            "17700": 488420864.0,
            "17705": 488420864.0,
            "17710": 488420864.0,
            "17715": 488420864.0,
            "17720": 488420864.0,
            "17725": 488420864.0,
            "17730": 488420864.0,
            "17735": 488420864.0,
            "17740": 488420864.0,
            "17745": 488420864.0,
            "17750": 488420864.0,
            "17755": 488420864.0,
            "17760": 488420864.0,
            "17765": 488420864.0,
            "17770": 488420864.0,
            "17775": 488420864.0,
            "17780": 488420864.0,
            "17785": 488420864.0,
            "17790": 488420864.0,
            "17795": 488420864.0,
            "17800": 488420864.0,
            "17805": 488420864.0,
            "17810": 488420864.0,
            "17815": 488420864.0,
            "17820": 488420864.0,
            "17825": 488420864.0,
            "17830": 488420864.0,
            "17835": 488420864.0,
            "17840": 488420864.0,
            "17845": 488420864.0,
            "17850": 488420864.0,
            "17855": 488420864.0,
            "17860": 488420864.0,
            "17865": 488420864.0,
            "17870": 488420864.0,
            "17875": 488420864.0,
            "17880": 488420864.0,
            "17885": 488420864.0,
            "17890": 488420864.0,
            "17895": 488420864.0,
            "17900": 488420864.0,
            "17905": 488420864.0,
            "17910": 488420864.0,
            "17915": 488420864.0,
            "17920": 488420864.0,
            "17925": 488420864.0,
            "17930": 488420864.0,
            "17935": 488420864.0,
            "17940": 488420864.0,
            "17945": 488420864.0,
            "17950": 488420864.0,
            "17955": 488420864.0,
            "17960": 488420864.0,
            "17965": 488420864.0,
            "17970": 488420864.0,
            "17975": 488420864.0,
            "17980": 488420864.0,
            "17985": 488420864.0,
            "17990": 488420864.0,
            "17995": 488420864.0,
            "18000": 488420864.0,
            "18005": 488420864.0,
            "18010": 488420864.0,
            "18015": 488420864.0,
            "18020": 488420864.0,
            "18025": 488420864.0,
            "18030": 488420864.0,
            "18035": 488420864.0,
            "18040": 488420864.0,
            "18045": 488420864.0,
            "18050": 488420864.0,
            "18055": 488420864.0,
            "18060": 488420864.0,
            "18065": 488420864.0,
            "18070": 488420864.0,
            "18075": 488420864.0,
            "18080": 488420864.0,
            "18085": 488420864.0,
            "18090": 488420864.0,
            "18095": 488420864.0,
            "18100": 488420864.0,
            "18105": 488420864.0,
            "18110": 488420864.0,
            "18115": 488420864.0,
            "18120": 488420864.0,
            "18125": 488420864.0,
            "18130": 488420864.0,
            "18135": 488420864.0,
            "18140": 488420864.0,
            "18145": 488420864.0,
            "18150": 488420864.0,
            "18155": 488420864.0,
            "18160": 488420864.0,
            "18165": 488420864.0,
            "18170": 488420864.0,
            "18175": 488420864.0,
            "18180": 488420864.0,
            "18185": 488420864.0,
            "18190": 488420864.0,
            "18195": 488420864.0,
            "18200": 488420864.0,
            "18205": 488420864.0,
            "18210": 488420864.0,
            "18215": 488420864.0,
            "18220": 488420864.0,
            "18225": 488420864.0,
            "18230": 488420864.0,
            "18235": 488420864.0,
            "18240": 488420864.0,
            "18245": 488420864.0,
            "18250": 488420864.0,
            "18255": 488420864.0,
            "18260": 488420864.0,
            "18265": 488420864.0,
            "18270": 488420864.0,
            "18275": 488420864.0,
            "18280": 488420864.0,
            "18285": 488420864.0,
            "18290": 488420864.0,
            "18295": 488420864.0,
            "18300": 488420864.0,
            "18305": 488420864.0,
            "18310": 488420864.0,
            "18315": 488420864.0,
            "18320": 488420864.0,
            "18325": 488420864.0,
            "18330": 488420864.0,
            "18335": 488420864.0,
            "18340": 488420864.0,
            "18345": 488420864.0,
            "18350": 488420864.0,
            "18355": 488420864.0,
            "18360": 488420864.0,
            "18365": 488420864.0,
            "18370": 488420864.0,
            "18375": 488420864.0,
            "18380": 488420864.0,
            "18385": 488420864.0,
            "18390": 488420864.0,
            "18395": 488420864.0,
            "18400": 488420864.0,
            "18405": 488420864.0,
            "18410": 488420864.0,
            "18415": 488420864.0,
            "18420": 488420864.0,
            "18425": 488420864.0,
            "18430": 488420864.0,
            "18435": 488420864.0,
            "18440": 488420864.0,
            "18445": 488420864.0,
            "18450": 488420864.0,
            "18455": 488420864.0,
            "18460": 488420864.0,
            "18465": 488420864.0,
            "18470": 488420864.0,
            "18475": 488420864.0,
            "18480": 488420864.0,
            "18485": 488420864.0,
            "18490": 488420864.0,
            "18495": 488420864.0,
            "18500": 488420864.0,
            "18505": 488420864.0,
            "18510": 488420864.0,
            "18515": 488420864.0,
            "18520": 488420864.0,
            "18525": 488420864.0,
            "18530": 488420864.0,
            "18535": 488420864.0,
            "18540": 488420864.0,
            "18545": 488420864.0,
            "18550": 488420864.0,
            "18555": 488420864.0,
            "18560": 488420864.0,
            "18565": 488420864.0,
            "18570": 488420864.0,
            "18575": 488420864.0,
            "18580": 488420864.0,
            "18585": 488420864.0,
            "18590": 488420864.0,
            "18595": 488420864.0,
            "18600": 488420864.0,
            "18605": 488420864.0,
            "18610": 488420864.0,
            "18615": 488420864.0,
            "18620": 488420864.0,
            "18625": 488420864.0,
            "18630": 488420864.0,
            "18635": 488420864.0,
            "18640": 488420864.0,
            "18645": 488420864.0,
            "18650": 488420864.0,
            "18655": 488420864.0,
            "18660": 488420864.0,
            "18665": 488420864.0,
            "18670": 488420864.0,
            "18675": 488420864.0,
            "18680": 488420864.0,
            "18685": 488420864.0,
            "18690": 488420864.0,
            "18695": 488420864.0,
            "18700": 488420864.0,
            "18705": 488420864.0,
            "18710": 488420864.0,
            "18715": 488420864.0,
            "18720": 486061568.0,
            "18725": 486061568.0,
            "18730": 486061568.0,
            "18735": 486061568.0,
            "18740": 486061568.0,
            "18745": 486061568.0,
            "18750": 486061568.0,
            "18755": 486061568.0,
            "18760": 486061568.0,
            "18765": 486061568.0,
            "18770": 486061568.0,
            "18775": 486061568.0,
            "18780": 486061568.0,
            "18785": 486061568.0,
            "18790": 486061568.0,
            "18795": 486061568.0,
            "18800": 486061568.0,
            "18805": 486061568.0,
            "18810": 486061568.0,
            "18815": 486061568.0,
            "18820": 486061568.0,
            "18825": 486061568.0,
            "18830": 486061568.0,
            "18835": 486061568.0,
            "18840": 486061568.0,
            "18845": 486061568.0,
            "18850": 486061568.0,
            "18855": 486061568.0,
            "18860": 486061568.0,
            "18865": 486061568.0,
            "18870": 486061568.0,
            "18875": 486061568.0,
            "18880": 486061568.0,
            "18885": 486061568.0,
            "18890": 486061568.0,
            "18895": 486061568.0,
            "18900": 486061568.0,
            "18905": 486061568.0,
            "18910": 486061568.0,
            "18915": 486061568.0,
            "18920": 486061568.0,
            "18925": 486061568.0,
            "18930": 486061568.0,
            "18935": 486061568.0,
            "18940": 486061568.0,
            "18945": 486061568.0,
            "18950": 486061568.0,
            "18955": 486061568.0,
            "18960": 486061568.0,
            "18965": 486061568.0,
            "18970": 486061568.0,
            "18975": 486061568.0,
            "18980": 486061568.0,
            "18985": 486061568.0,
            "18990": 486061568.0,
            "18995": 486061568.0,
            "19000": 486061568.0,
            "19005": 486061568.0,
            "19010": 486061568.0,
            "19015": 486061568.0,
            "19020": 486061568.0,
            "19025": 486061568.0,
            "19030": 486061568.0,
            "19035": 486061568.0,
            "19040": 486061568.0,
            "19045": 486061568.0,
            "19050": 486061568.0,
            "19055": 486061568.0,
            "19060": 486061568.0,
            "19065": 486061568.0,
            "19070": 486061568.0,
            "19075": 486061568.0,
            "19080": 486061568.0,
            "19085": 486061568.0,
            "19090": 486061568.0,
            "19095": 486061568.0,
            "19100": 486061568.0,
            "19105": 486061568.0,
            "19110": 486061568.0,
            "19115": 486061568.0,
            "19120": 486061568.0,
            "19125": 486061568.0,
            "19130": 486061568.0,
            "19135": 486061568.0,
            "19140": 486061568.0,
            "19145": 486061568.0,
            "19150": 486061568.0,
            "19155": 486061568.0,
            "19160": 486061568.0,
            "19165": 486061568.0,
            "19170": 486061568.0,
            "19175": 486061568.0,
            "19180": 486061568.0,
            "19185": 486061568.0,
            "19190": 486061568.0,
            "19195": 486061568.0,
            "19200": 486061568.0,
            "19205": 486061568.0,
            "19210": 486061568.0,
            "19215": 486061568.0,
            "19220": 486061568.0,
            "19225": 486061568.0,
            "19230": 486061568.0,
            "19235": 486061568.0,
            "19240": 486061568.0,
            "19245": 486061568.0,
            "19250": 486061568.0,
            "19255": 486061568.0,
            "19260": 486061568.0,
            "19265": 486061568.0,
            "19270": 486061568.0,
            "19275": 486061568.0,
            "19280": 486061568.0,
            "19285": 486061568.0,
            "19290": 486061568.0,
            "19295": 486061568.0,
            "19300": 486061568.0,
            "19305": 486061568.0,
            "19310": 486061568.0,
            "19315": 486061568.0,
            "19320": 486061568.0,
            "19325": 486061568.0,
            "19330": 486061568.0,
            "19335": 486061568.0,
            "19340": 486061568.0,
            "19345": 486061568.0,
            "19350": 486061568.0,
            "19355": 486061568.0,
            "19360": 486061568.0,
            "19365": 486061568.0,
            "19370": 486061568.0,
            "19375": 486061568.0,
            "19380": 486061568.0,
            "19385": 486061568.0,
            "19390": 486061568.0,
            "19395": 486061568.0,
            "19400": 486061568.0,
            "19405": 486061568.0,
            "19410": 486061568.0,
            "19415": 486061568.0,
            "19420": 486061568.0,
            "19425": 486061568.0,
            "19430": 486061568.0,
            "19435": 486061568.0,
            "19440": 486061568.0,
            "19445": 486061568.0,
            "19450": 486061568.0,
            "19455": 486061568.0,
            "19460": 486061568.0,
            "19465": 486061568.0,
            "19470": 486061568.0,
            "19475": 486061568.0,
            "19480": 486061568.0,
            "19485": 486061568.0,
            "19490": 486061568.0,
            "19495": 486061568.0,
            "19500": 486061568.0,
            "19505": 486061568.0,
            "19510": 486061568.0,
            "19515": 486061568.0,
            "19520": 486061568.0,
            "19525": 486061568.0,
            "19530": 486061568.0,
            "19535": 486061568.0,
            "19540": 486061568.0,
            "19545": 486061568.0,
            "19550": 486061568.0,
            "19555": 486061568.0,
            "19560": 486061568.0,
            "19565": 486061568.0,
            "19570": 486061568.0,
            "19575": 486061568.0,
            "19580": 486061568.0,
            "19585": 486061568.0,
            "19590": 486061568.0,
            "19595": 486061568.0,
            "19600": 486061568.0,
            "19605": 486061568.0,
            "19610": 486061568.0,
            "19615": 486061568.0,
            "19620": 486061568.0,
            "19625": 486061568.0,
            "19630": 486061568.0,
            "19635": 486061568.0,
            "19640": 486061568.0,
            "19645": 486061568.0,
            "19650": 486061568.0,
            "19655": 486061568.0,
            "19660": 486061568.0,
            "19665": 486061568.0,
            "19670": 486061568.0,
            "19675": 486061568.0,
            "19680": 486061568.0,
            "19685": 486061568.0,
            "19690": 486061568.0,
            "19695": 486061568.0,
            "19700": 486061568.0,
            "19705": 486061568.0,
            "19710": 486061568.0,
            "19715": 486061568.0,
            "19720": 486061568.0,
            "19725": 486061568.0,
            "19730": 486061568.0,
            "19735": 486061568.0,
            "19740": 486061568.0,
            "19745": 486061568.0,
            "19750": 486061568.0,
            "19755": 486061568.0,
            "19760": 486061568.0,
            "19765": 486061568.0,
            "19770": 486061568.0,
            "19775": 486061568.0,
            "19780": 486061568.0,
            "19785": 486061568.0,
            "19790": 486061568.0,
            "19795": 486061568.0,
            "19800": 486061568.0,
            "19805": 486061568.0,
            "19810": 486061568.0,
            "19815": 486061568.0,
            "19820": 486061568.0,
            "19825": 486061568.0,
            "19830": 486061568.0,
            "19835": 486061568.0,
            "19840": 486061568.0,
            "19845": 486061568.0,
            "19850": 486061568.0,
            "19855": 486061568.0,
            "19860": 486061568.0,
            "19865": 486061568.0,
            "19870": 486061568.0,
            "19875": 486061568.0,
            "19880": 486061568.0,
            "19885": 486061568.0,
            "19890": 486061568.0,
            "19895": 486061568.0,
            "19900": 486061568.0,
            "19905": 486061568.0,
            "19910": 486061568.0,
            "19915": 486061568.0,
            "19920": 486061568.0,
            "19925": 486061568.0,
            "19930": 486061568.0,
            "19935": 486061568.0,
            "19940": 486061568.0,
            "19945": 486061568.0,
            "19950": 486061568.0,
            "19955": 486061568.0,
            "19960": 486061568.0,
            "19965": 486061568.0,
            "19970": 486061568.0,
            "19975": 486061568.0,
            "19980": 486061568.0,
            "19985": 486061568.0,
            "19990": 486061568.0,
            "19995": 486061568.0,
            "20000": 486061568.0
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 20000,
        "step_interval": 5,
        "values": {
            "1": "nan",
            "5": "nan",
            "10": "nan",
            "15": "nan",
            "20": 7137.0,
            "25": 7629.0,
            "30": 10879.0,
            "35": 7277.0,
            "40": 7853.0,
            "45": 8748.0,
            "50": 7127.0,
            "55": 8612.0,
            "60": 8666.0,
            "65": 8674.0,
            "70": 6577.0,
            "75": 8450.0,
            "80": 7542.0,
            "85": 10158.0,
            "90": 6406.0,
            "95": 7635.0,
            "100": 6887.0,
            "105": 7525.0,
            "110": 8434.0,
            "115": 6996.0,
            "120": 9499.0,
            "125": 6486.0,
            "130": 6976.0,
            "135": 7417.0,
            "140": 7405.0,
            "145": 8978.0,
            "150": 7925.0,
            "155": 7494.0,
            "160": 6827.0,
            "165": 8469.0,
            "170": 6201.0,
            "175": 8171.0,
            "180": 7359.0,
            "185": 7895.0,
            "190": 8148.0,
            "195": 10109.0,
            "200": 8108.0,
            "205": 8397.0,
            "210": 7372.0,
            "215": 7709.0,
            "220": 8983.0,
            "225": 9082.0,
            "230": 8315.0,
            "235": 8549.0,
            "240": 7485.0,
            "245": 7817.0,
            "250": 8397.0,
            "255": 7572.0,
            "260": 9009.0,
            "265": 9116.0,
            "270": 8137.0,
            "275": 8936.0,
            "280": 8179.0,
            "285": 8850.0,
            "290": 10306.0,
            "295": 10819.0,
            "300": 8537.0,
            "305": 7120.0,
            "310": 9519.0,
            "315": 8386.0,
            "320": 9228.0,
            "325": 8538.0,
            "330": 8791.0,
            "335": 8348.0,
            "340": 9193.0,
            "345": 9687.0,
            "350": 10351.0,
            "355": 8878.0,
            "360": 8255.0,
            "365": 8726.0,
            "370": 10505.0,
            "375": 8758.0,
            "380": 9437.0,
            "385": 8740.0,
            "390": 7355.0,
            "395": 9287.0,
            "400": 9923.0,
            "405": 10540.0,
            "410": 8608.0,
            "415": 9937.0,
            "420": 8077.0,
            "425": 12841.0,
            "430": 8689.0,
            "435": 9148.0,
            "440": 9822.0,
            "445": 8835.0,
            "450": 9309.0,
            "455": 7800.0,
            "460": 8815.0,
            "465": 10805.0,
            "470": 8889.0,
            "475": 9838.0,
            "480": 10094.0,
            "485": 9428.0,
            "490": 9795.0,
            "495": 10447.0,
            "500": 8150.0,
            "505": 9626.0,
            "510": 9402.0,
            "515": 9123.0,
            "520": 8647.0,
            "525": 8578.0,
            "530": 8244.0,
            "535": 9306.0,
            "540": 9956.0,
            "545": 7232.0,
            "550": 8953.0,
            "555": 10977.0,
            "560": 9585.0,
            "565": 13573.0,
            "570": 10135.0,
            "575": 10007.0,
            "580": 9375.0,
            "585": 8799.0,
            "590": 8499.0,
            "595": 11102.0,
            "600": 10341.0,
            "605": 8297.0,
            "610": 8746.0,
            "615": 9174.0,
            "620": 10349.0,
            "625": 9504.0,
            "630": 10875.0,
            "635": 10567.0,
            "640": 9185.0,
            "645": 8452.0,
            "650": 10868.0,
            "655": 9129.0,
            "660": 8994.0,
            "665": 8677.0,
            "670": 9763.0,
            "675": 8821.0,
            "680": 10203.0,
            "685": 8025.0,
            "690": 9248.0,
            "695": 10293.0,
            "700": 9223.0,
            "705": 10536.0,
            "710": 9146.0,
            "715": 9363.0,
            "720": 10524.0,
            "725": 11908.0,
            "730": 9030.0,
            "735": 9787.0,
            "740": 8688.0,
            "745": 8559.0,
            "750": 9447.0,
            "755": 9221.0,
            "760": 10250.0,
            "765": 9131.0,
            "770": 9650.0,
            "775": 8547.0,
            "780": 9093.0,
            "785": 9244.0,
            "790": 11192.0,
            "795": 9435.0,
            "800": 8869.0,
            "805": 8601.0,
            "810": 10115.0,
            "815": 11359.0,
            "820": 9763.0,
            "825": 8691.0,
            "830": 10005.0,
            "835": 8724.0,
            "840": 10474.0,
            "845": 7803.0,
            "850": 9752.0,
            "855": 8137.0,
            "860": 9126.0,
            "865": 9955.0,
            "870": 8519.0,
            "875": 10291.0,
            "880": 9214.0,
            "885": 8948.0,
            "890": 10261.0,
            "895": 10457.0,
            "900": 9854.0,
            "905": 10878.0,
            "910": 9955.0,
            "915": 8910.0,
            "920": 8944.0,
            "925": 9521.0,
            "930": 8918.0,
            "935": 9679.0,
            "940": 9854.0,
            "945": 9018.0,
            "950": 9094.0,
            "955": 9339.0,
            "960": 10867.0,
            "965": 8878.0,
            "970": 8814.0,
            "975": 9160.0,
            "980": 9336.0,
            "985": 10275.0,
            "990": 9402.0,
            "995": 9325.0,
            "1000": 10308.0,
            "1005": 9038.0,
            "1010": 9023.0,
            "1015": 10114.0,
            "1020": 8956.0,
            "1025": 11065.0,
            "1030": 9483.0,
            "1035": 8262.0,
            "1040": 10210.0,
            "1045": 10172.0,
            "1050": 10898.0,
            "1055": 10197.0,
            "1060": 9162.0,
            "1065": 8741.0,
            "1070": 8577.0,
            "1075": 8340.0,
            "1080": 12776.0,
            "1085": 9884.0,
            "1090": 10036.0,
            "1095": 9416.0,
            "1100": 9992.0,
            "1105": 9866.0,
            "1110": 9088.0,
            "1115": 9340.0,
            "1120": 9597.0,
            "1125": 10675.0,
            "1130": 9856.0,
            "1135": 10628.0,
            "1140": 9742.0,
            "1145": 11204.0,
            "1150": 10118.0,
            "1155": 9843.0,
            "1160": 11443.0,
            "1165": 12017.0,
            "1170": 12744.0,
            "1175": 9366.0,
            "1180": 9108.0,
            "1185": 11573.0,
            "1190": 11507.0,
            "1195": 8826.0,
            "1200": 11890.0,
            "1205": 9030.0,
            "1210": 8517.0,
            "1215": 10892.0,
            "1220": 8715.0,
            "1225": 10127.0,
            "1230": 11240.0,
            "1235": 9300.0,
            "1240": 9778.0,
            "1245": 10897.0,
            "1250": 9397.0,
            "1255": 8580.0,
            "1260": 10293.0,
            "1265": 9405.0,
            "1270": 8851.0,
            "1275": 9389.0,
            "1280": 9519.0,
            "1285": 9715.0,
            "1290": 9167.0,
            "1295": 11554.0,
            "1300": 10482.0,
            "1305": 9951.0,
            "1310": 10642.0,
            "1315": 10650.0,
            "1320": 10485.0,
            "1325": 10451.0,
            "1330": 9789.0,
            "1335": 9723.0,
            "1340": 9785.0,
            "1345": 9675.0,
            "1350": 11037.0,
            "1355": 9338.0,
            "1360": 9442.0,
            "1365": 10070.0,
            "1370": 9462.0,
            "1375": 8737.0,
            "1380": 9610.0,
            "1385": 10364.0,
            "1390": 12992.0,
            "1395": 10162.0,
            "1400": 8835.0,
            "1405": 10201.0,
            "1410": 11209.0,
            "1415": 9948.0,
            "1420": 10932.0,
            "1425": 10821.0,
            "1430": 10932.0,
            "1435": 12203.0,
            "1440": 9394.0,
            "1445": 11539.0,
            "1450": 9544.0,
            "1455": 9710.0,
            "1460": 9552.0,
            "1465": 9332.0,
            "1470": 10780.0,
            "1475": 10119.0,
            "1480": 8878.0,
            "1485": 11059.0,
            "1490": 10135.0,
            "1495": 9845.0,
            "1500": 8946.0,
            "1505": 9116.0,
            "1510": 12739.0,
            "1515": 10318.0,
            "1520": 9231.0,
            "1525": 10180.0,
            "1530": 10313.0,
            "1535": 9466.0,
            "1540": 9157.0,
            "1545": 9693.0,
            "1550": 9927.0,
            "1555": 8693.0,
            "1560": 9832.0,
            "1565": 12309.0,
            "1570": 10738.0,
            "1575": 9474.0,
            "1580": 9382.0,
            "1585": 8693.0,
            "1590": 8354.0,
            "1595": 8610.0,
            "1600": 11693.0,
            "1605": 9146.0,
            "1610": 9465.0,
            "1615": 11132.0,
            "1620": 9257.0,
            "1625": 10002.0,
            "1630": 10467.0,
            "1635": 11419.0,
            "1640": 8724.0,
            "1645": 11430.0,
            "1650": 10011.0,
            "1655": 10622.0,
            "1660": 12192.0,
            "1665": 11487.0,
            "1670": 8549.0,
            "1675": 10313.0,
            "1680": 9414.0,
            "1685": 9511.0,
            "1690": 10299.0,
            "1695": 9578.0,
            "1700": 10543.0,
            "1705": 9383.0,
            "1710": 12888.0,
            "1715": 10069.0,
            "1720": 10690.0,
            "1725": 9948.0,
            "1730": 9373.0,
            "1735": 9325.0,
            "1740": 9392.0,
            "1745": 9605.0,
            "1750": 8766.0,
            "1755": 10299.0,
            "1760": 9606.0,
            "1765": 10742.0,
            "1770": 10598.0,
            "1775": 11967.0,
            "1780": 9692.0,
            "1785": 9571.0,
            "1790": 9638.0,
            "1795": 10128.0,
            "1800": 10136.0,
            "1805": 8623.0,
            "1810": 10079.0,
            "1815": 11685.0,
            "1820": 9805.0,
            "1825": 10507.0,
            "1830": 12474.0,
            "1835": 8837.0,
            "1840": 9116.0,
            "1845": 9473.0,
            "1850": 9609.0,
            "1855": 10607.0,
            "1860": 10040.0,
            "1865": 11582.0,
            "1870": 9232.0,
            "1875": 11684.0,
            "1880": 12530.0,
            "1885": 9121.0,
            "1890": 9753.0,
            "1895": 9306.0,
            "1900": 8627.0,
            "1905": 9668.0,
            "1910": 11325.0,
            "1915": 11522.0,
            "1920": 10343.0,
            "1925": 9303.0,
            "1930": 10190.0,
            "1935": 11108.0,
            "1940": 11020.0,
            "1945": 10541.0,
            "1950": 9917.0,
            "1955": 9484.0,
            "1960": 9582.0,
            "1965": 9188.0,
            "1970": 10651.0,
            "1975": 10219.0,
            "1980": 9806.0,
            "1985": 10754.0,
            "1990": 10125.0,
            "1995": 8977.0,
            "2000": 9880.0,
            "2005": 10108.0,
            "2010": 8232.0,
            "2015": 9040.0,
            "2020": 11701.0,
            "2025": 9313.0,
            "2030": 11267.0,
            "2035": 9812.0,
            "2040": 11329.0,
            "2045": 9045.0,
            "2050": 9460.0,
            "2055": 9458.0,
            "2060": 9170.0,
            "2065": 9333.0,
            "2070": 8178.0,
            "2075": 11393.0,
            "2080": 10831.0,
            "2085": 10930.0,
            "2090": 11309.0,
            "2095": 10429.0,
            "2100": 10524.0,
            "2105": 10501.0,
            "2110": 8198.0,
            "2115": 9195.0,
            "2120": 9536.0,
            "2125": 9988.0,
            "2130": 9740.0,
            "2135": 11330.0,
            "2140": 10987.0,
            "2145": 10867.0,
            "2150": 8593.0,
            "2155": 10296.0,
            "2160": 11976.0,
            "2165": 9951.0,
            "2170": 9342.0,
            "2175": 11645.0,
            "2180": 12190.0,
            "2185": 11839.0,
            "2190": 11337.0,
            "2195": 9574.0,
            "2200": 7619.0,
            "2205": 10274.0,
            "2210": 8676.0,
            "2215": 9789.0,
            "2220": 9059.0,
            "2225": 10822.0,
            "2230": 9658.0,
            "2235": 9570.0,
            "2240": 9475.0,
            "2245": 8405.0,
            "2250": 11318.0,
            "2255": 11797.0,
            "2260": 9602.0,
            "2265": 10922.0,
            "2270": 8970.0,
            "2275": 10074.0,
            "2280": 11087.0,
            "2285": 10333.0,
            "2290": 9667.0,
            "2295": 11395.0,
            "2300": 9068.0,
            "2305": 9353.0,
            "2310": 8065.0,
            "2315": 8888.0,
            "2320": 9319.0,
            "2325": 7532.0,
            "2330": 10274.0,
            "2335": 10640.0,
            "2340": 9329.0,
            "2345": 10054.0,
            "2350": 8738.0,
            "2355": 8856.0,
            "2360": 9458.0,
            "2365": 9606.0,
            "2370": 8537.0,
            "2375": 10110.0,
            "2380": 10378.0,
            "2385": 11208.0,
            "2390": 11180.0,
            "2395": 9726.0,
            "2400": 10845.0,
            "2405": 10722.0,
            "2410": 9215.0,
            "2415": 10924.0,
            "2420": 8968.0,
            "2425": 10403.0,
            "2430": 8765.0,
            "2435": 10361.0,
            "2440": 9867.0,
            "2445": 9315.0,
            "2450": 9890.0,
            "2455": 9943.0,
            "2460": 10094.0,
            "2465": 7871.0,
            "2470": 9074.0,
            "2475": 10706.0,
            "2480": 8703.0,
            "2485": 11684.0,
            "2490": 9317.0,
            "2495": 10255.0,
            "2500": 9651.0,
            "2505": 9566.0,
            "2510": 9154.0,
            "2515": 8949.0,
            "2520": 9636.0,
            "2525": 11407.0,
            "2530": 10334.0,
            "2535": 10377.0,
            "2540": 9346.0,
            "2545": 9235.0,
            "2550": 8493.0,
            "2555": 13004.0,
            "2560": 10270.0,
            "2565": 9898.0,
            "2570": 11394.0,
            "2575": 7864.0,
            "2580": 9011.0,
            "2585": 9696.0,
            "2590": 8185.0,
            "2595": 10678.0,
            "2600": 11299.0,
            "2605": 12605.0,
            "2610": 9442.0,
            "2615": 10480.0,
            "2620": 10990.0,
            "2625": 10422.0,
            "2630": 9846.0,
            "2635": 8893.0,
            "2640": 11602.0,
            "2645": 9457.0,
            "2650": 9763.0,
            "2655": 11074.0,
            "2660": 10315.0,
            "2665": 9199.0,
            "2670": 8932.0,
            "2675": 10798.0,
            "2680": 10597.0,
            "2685": 12237.0,
            "2690": 10100.0,
            "2695": 9530.0,
            "2700": 9723.0,
            "2705": 10858.0,
            "2710": 11955.0,
            "2715": 9664.0,
            "2720": 12514.0,
            "2725": 10681.0,
            "2730": 9211.0,
            "2735": 10985.0,
            "2740": 9205.0,
            "2745": 9576.0,
            "2750": 10049.0,
            "2755": 9675.0,
            "2760": 9678.0,
            "2765": 8855.0,
            "2770": 10135.0,
            "2775": 8604.0,
            "2780": 9912.0,
            "2785": 9327.0,
            "2790": 9921.0,
            "2795": 11444.0,
            "2800": 11866.0,
            "2805": 10913.0,
            "2810": 11254.0,
            "2815": 9624.0,
            "2820": 9700.0,
            "2825": 8963.0,
            "2830": 10128.0,
            "2835": 10061.0,
            "2840": 11307.0,
            "2845": 10415.0,
            "2850": 8650.0,
            "2855": 9257.0,
            "2860": 8858.0,
            "2865": 9206.0,
            "2870": 9219.0,
            "2875": 9700.0,
            "2880": 11365.0,
            "2885": 11167.0,
            "2890": 11884.0,
            "2895": 8651.0,
            "2900": 8237.0,
            "2905": 10410.0,
            "2910": 9645.0,
            "2915": 11373.0,
            "2920": 10063.0,
            "2925": 9677.0,
            "2930": 9308.0,
            "2935": 8790.0,
            "2940": 8925.0,
            "2945": 8009.0,
            "2950": 9989.0,
            "2955": 9927.0,
            "2960": 9682.0,
            "2965": 9427.0,
            "2970": 8996.0,
            "2975": 10013.0,
            "2980": 11093.0,
            "2985": 9161.0,
            "2990": 10777.0,
            "2995": 12125.0,
            "3000": 11869.0,
            "3005": 8221.0,
            "3010": 10316.0,
            "3015": 8569.0,
            "3020": 11904.0,
            "3025": 9055.0,
            "3030": 9808.0,
            "3035": 9912.0,
            "3040": 12510.0,
            "3045": 11833.0,
            "3050": 9846.0,
            "3055": 8456.0,
            "3060": 10203.0,
            "3065": 10743.0,
            "3070": 10281.0,
            "3075": 10394.0,
            "3080": 9592.0,
            "3085": 7893.0,
            "3090": 10363.0,
            "3095": 9670.0,
            "3100": 10017.0,
            "3105": 10670.0,
            "3110": 10533.0,
            "3115": 10817.0,
            "3120": 9081.0,
            "3125": 12243.0,
            "3130": 9596.0,
            "3135": 9747.0,
            "3140": 9648.0,
            "3145": 10810.0,
            "3150": 10554.0,
            "3155": 9545.0,
            "3160": 11768.0,
            "3165": 9032.0,
            "3170": 9057.0,
            "3175": 11515.0,
            "3180": 10119.0,
            "3185": 10721.0,
            "3190": 8646.0,
            "3195": 11754.0,
            "3200": 9280.0,
            "3205": 9864.0,
            "3210": 10291.0,
            "3215": 10558.0,
            "3220": 9588.0,
            "3225": 10639.0,
            "3230": 10530.0,
            "3235": 9851.0,
            "3240": 8930.0,
            "3245": 11910.0,
            "3250": 10931.0,
            "3255": 8505.0,
            "3260": 8723.0,
            "3265": 9976.0,
            "3270": 10423.0,
            "3275": 10980.0,
            "3280": 10141.0,
            "3285": 10478.0,
            "3290": 11118.0,
            "3295": 8476.0,
            "3300": 10065.0,
            "3305": 8693.0,
            "3310": 10548.0,
            "3315": 9554.0,
            "3320": 9493.0,
            "3325": 10507.0,
            "3330": 8977.0,
            "3335": 9126.0,
            "3340": 8876.0,
            "3345": 8807.0,
            "3350": 11275.0,
            "3355": 12723.0,
            "3360": 11252.0,
            "3365": 11364.0,
            "3370": 10484.0,
            "3375": 10419.0,
            "3380": 10811.0,
            "3385": 10708.0,
            "3390": 9573.0,
            "3395": 10278.0,
            "3400": 9247.0,
            "3405": 8975.0,
            "3410": 9214.0,
            "3415": 8662.0,
            "3420": 9840.0,
            "3425": 9979.0,
            "3430": 9611.0,
            "3435": 9402.0,
            "3440": 9732.0,
            "3445": 9734.0,
            "3450": 9229.0,
            "3455": 10904.0,
            "3460": 10777.0,
            "3465": 10544.0,
            "3470": 10259.0,
            "3475": 10809.0,
            "3480": 11826.0,
            "3485": 10254.0,
            "3490": 9050.0,
            "3495": 9234.0,
            "3500": 10007.0,
            "3505": 10618.0,
            "3510": 10368.0,
            "3515": 8588.0,
            "3520": 9230.0,
            "3525": 10324.0,
            "3530": 11007.0,
            "3535": 10036.0,
            "3540": 9740.0,
            "3545": 10970.0,
            "3550": 10133.0,
            "3555": 11761.0,
            "3560": 9435.0,
            "3565": 9456.0,
            "3570": 9522.0,
            "3575": 11616.0,
            "3580": 9725.0,
            "3585": 10861.0,
            "3590": 11316.0,
            "3595": 11946.0,
            "3600": 12416.0,
            "3605": 11830.0,
            "3610": 10025.0,
            "3615": 11507.0,
            "3620": 9571.0,
            "3625": 12706.0,
            "3630": 11859.0,
            "3635": 10758.0,
            "3640": 9077.0,
            "3645": 12021.0,
            "3650": 10945.0,
            "3655": 9390.0,
            "3660": 9273.0,
            "3665": 10238.0,
            "3670": 8937.0,
            "3675": 12182.0,
            "3680": 10069.0,
            "3685": 8389.0,
            "3690": 8292.0,
            "3695": 10743.0,
            "3700": 9394.0,
            "3705": 9460.0,
            "3710": 9236.0,
            "3715": 10394.0,
            "3720": 10161.0,
            "3725": 10150.0,
            "3730": 9884.0,
            "3735": 8802.0,
            "3740": 8692.0,
            "3745": 10237.0,
            "3750": 9888.0,
            "3755": 9450.0,
            "3760": 9280.0,
            "3765": 9182.0,
            "3770": 9374.0,
            "3775": 9896.0,
            "3780": 9288.0,
            "3785": 9153.0,
            "3790": 9843.0,
            "3795": 11621.0,
            "3800": 9612.0,
            "3805": 12459.0,
            "3810": 10834.0,
            "3815": 9601.0,
            "3820": 10410.0,
            "3825": 9163.0,
            "3830": 10859.0,
            "3835": 8132.0,
            "3840": 10363.0,
            "3845": 10389.0,
            "3850": 12555.0,
            "3855": 11207.0,
            "3860": 9140.0,
            "3865": 11427.0,
            "3870": 9624.0,
            "3875": 10851.0,
            "3880": 9399.0,
            "3885": 10165.0,
            "3890": 12048.0,
            "3895": 9678.0,
            "3900": 9336.0,
            "3905": 9677.0,
            "3910": 8030.0,
            "3915": 8786.0,
            "3920": 8113.0,
            "3925": 9211.0,
            "3930": 9256.0,
            "3935": 8576.0,
            "3940": 10771.0,
            "3945": 9505.0,
            "3950": 10073.0,
            "3955": 9097.0,
            "3960": 10608.0,
            "3965": 8799.0,
            "3970": 9632.0,
            "3975": 10286.0,
            "3980": 9683.0,
            "3985": 11011.0,
            "3990": 10021.0,
            "3995": 10937.0,
            "4000": 9770.0,
            "4005": 9605.0,
            "4010": 9901.0,
            "4015": 9226.0,
            "4020": 9607.0,
            "4025": 9981.0,
            "4030": 10952.0,
            "4035": 10968.0,
            "4040": 10159.0,
            "4045": 10725.0,
            "4050": 8560.0,
            "4055": 12881.0,
            "4060": 8838.0,
            "4065": 10737.0,
            "4070": 11220.0,
            "4075": 10471.0,
            "4080": 10650.0,
            "4085": 9681.0,
            "4090": 10083.0,
            "4095": 9616.0,
            "4100": 8668.0,
            "4105": 10680.0,
            "4110": 13119.0,
            "4115": 10000.0,
            "4120": 10281.0,
            "4125": 9242.0,
            "4130": 8043.0,
            "4135": 10872.0,
            "4140": 9908.0,
            "4145": 9710.0,
            "4150": 11902.0,
            "4155": 11533.0,
            "4160": 12632.0,
            "4165": 9445.0,
            "4170": 9046.0,
            "4175": 9732.0,
            "4180": 10676.0,
            "4185": 9272.0,
            "4190": 12143.0,
            "4195": 10761.0,
            "4200": 12481.0,
            "4205": 11582.0,
            "4210": 12482.0,
            "4215": 11687.0,
            "4220": 9077.0,
            "4225": 10025.0,
            "4230": 9946.0,
            "4235": 11473.0,
            "4240": 9456.0,
            "4245": 12335.0,
            "4250": 10583.0,
            "4255": 9268.0,
            "4260": 11535.0,
            "4265": 9668.0,
            "4270": 9935.0,
            "4275": 10206.0,
            "4280": 9294.0,
            "4285": 9179.0,
            "4290": 9024.0,
            "4295": 9959.0,
            "4300": 9017.0,
            "4305": 12122.0,
            "4310": 10384.0,
            "4315": 11521.0,
            "4320": 10211.0,
            "4325": 9415.0,
            "4330": 10682.0,
            "4335": 9536.0,
            "4340": 10331.0,
            "4345": 9552.0,
            "4350": 9640.0,
            "4355": 9385.0,
            "4360": 9879.0,
            "4365": 9598.0,
            "4370": 10025.0,
            "4375": 10846.0,
            "4380": 9763.0,
            "4385": 9744.0,
            "4390": 10368.0,
            "4395": 10636.0,
            "4400": 9738.0,
            "4405": 9659.0,
            "4410": 11579.0,
            "4415": 8957.0,
            "4420": 10122.0,
            "4425": 9434.0,
            "4430": 10800.0,
            "4435": 9603.0,
            "4440": 8708.0,
            "4445": 9322.0,
            "4450": 9851.0,
            "4455": 10677.0,
            "4460": 11938.0,
            "4465": 10323.0,
            "4470": 9576.0,
            "4475": 9425.0,
            "4480": 9028.0,
            "4485": 11121.0,
            "4490": 10632.0,
            "4495": 9656.0,
            "4500": 10407.0,
            "4505": 10299.0,
            "4510": 12222.0,
            "4515": 10430.0,
            "4520": 10517.0,
            "4525": 8367.0,
            "4530": 11066.0,
            "4535": 9173.0,
            "4540": 10584.0,
            "4545": 9756.0,
            "4550": 9236.0,
            "4555": 12376.0,
            "4560": 9391.0,
            "4565": 8823.0,
            "4570": 8789.0,
            "4575": 10972.0,
            "4580": 8701.0,
            "4585": 10408.0,
            "4590": 11359.0,
            "4595": 9442.0,
            "4600": 10384.0,
            "4605": 9654.0,
            "4610": 11092.0,
            "4615": 9243.0,
            "4620": 12656.0,
            "4625": 9683.0,
            "4630": 9993.0,
            "4635": 10474.0,
            "4640": 11711.0,
            "4645": 12107.0,
            "4650": 10401.0,
            "4655": 10482.0,
            "4660": 8822.0,
            "4665": 10117.0,
            "4670": 11662.0,
            "4675": 8957.0,
            "4680": 9192.0,
            "4685": 9717.0,
            "4690": 11466.0,
            "4695": 9948.0,
            "4700": 11143.0,
            "4705": 11582.0,
            "4710": 10071.0,
            "4715": 10089.0,
            "4720": 10639.0,
            "4725": 10682.0,
            "4730": 10399.0,
            "4735": 9472.0,
            "4740": 9727.0,
            "4745": 11685.0,
            "4750": 8455.0,
            "4755": 8736.0,
            "4760": 9560.0,
            "4765": 9882.0,
            "4770": 9516.0,
            "4775": 9224.0,
            "4780": 11383.0,
            "4785": 9315.0,
            "4790": 10256.0,
            "4795": 13840.0,
            "4800": 10070.0,
            "4805": 9637.0,
            "4810": 9118.0,
            "4815": 12788.0,
            "4820": 9696.0,
            "4825": 8612.0,
            "4830": 10713.0,
            "4835": 9328.0,
            "4840": 12516.0,
            "4845": 10430.0,
            "4850": 10556.0,
            "4855": 12324.0,
            "4860": 8535.0,
            "4865": 14372.0,
            "4870": 10659.0,
            "4875": 11246.0,
            "4880": 11119.0,
            "4885": 10601.0,
            "4890": 12410.0,
            "4895": 9492.0,
            "4900": 10495.0,
            "4905": 10592.0,
            "4910": 10738.0,
            "4915": 10477.0,
            "4920": 12077.0,
            "4925": 10969.0,
            "4930": 10107.0,
            "4935": 11494.0,
            "4940": 9935.0,
            "4945": 11153.0,
            "4950": 9920.0,
            "4955": 11682.0,
            "4960": 12951.0,
            "4965": 9046.0,
            "4970": 11135.0,
            "4975": 9662.0,
            "4980": 11331.0,
            "4985": 10004.0,
            "4990": 12313.0,
            "4995": 10364.0,
            "5000": 11107.0,
            "5005": 11706.0,
            "5010": 10820.0,
            "5015": 9274.0,
            "5020": 10299.0,
            "5025": 10047.0,
            "5030": 9507.0,
            "5035": 10795.0,
            "5040": 10982.0,
            "5045": 11429.0,
            "5050": 10342.0,
            "5055": 10201.0,
            "5060": 10733.0,
            "5065": 10003.0,
            "5070": 10359.0,
            "5075": 10358.0,
            "5080": 11594.0,
            "5085": 10034.0,
            "5090": 8991.0,
            "5095": 9229.0,
            "5100": 9678.0,
            "5105": 10110.0,
            "5110": 9692.0,
            "5115": 8788.0,
            "5120": 11927.0,
            "5125": 10609.0,
            "5130": 9894.0,
            "5135": 9155.0,
            "5140": 9936.0,
            "5145": 7987.0,
            "5150": 11812.0,
            "5155": 10279.0,
            "5160": 10772.0,
            "5165": 9975.0,
            "5170": 10409.0,
            "5175": 10649.0,
            "5180": 10498.0,
            "5185": 10132.0,
            "5190": 11667.0,
            "5195": 12228.0,
            "5200": 11091.0,
            "5205": 9705.0,
            "5210": 7795.0,
            "5215": 10395.0,
            "5220": 9011.0,
            "5225": 10773.0,
            "5230": 9463.0,
            "5235": 9790.0,
            "5240": 11114.0,
            "5245": 11743.0,
            "5250": 10240.0,
            "5255": 9106.0,
            "5260": 9550.0,
            "5265": 9805.0,
            "5270": 13443.0,
            "5275": 9865.0,
            "5280": 9112.0,
            "5285": 10238.0,
            "5290": 11959.0,
            "5295": 10300.0,
            "5300": 9631.0,
            "5305": 9032.0,
            "5310": 9386.0,
            "5315": 10111.0,
            "5320": 9722.0,
            "5325": 9390.0,
            "5330": 9077.0,
            "5335": 9539.0,
            "5340": 9681.0,
            "5345": 9811.0,
            "5350": 10935.0,
            "5355": 11415.0,
            "5360": 9555.0,
            "5365": 10060.0,
            "5370": 10293.0,
            "5375": 9345.0,
            "5380": 9750.0,
            "5385": 12037.0,
            "5390": 10379.0,
            "5395": 10635.0,
            "5400": 10932.0,
            "5405": 10717.0,
            "5410": 9477.0,
            "5415": 10569.0,
            "5420": 10824.0,
            "5425": 9983.0,
            "5430": 9979.0,
            "5435": 11504.0,
            "5440": 10631.0,
            "5445": 10505.0,
            "5450": "nan",
            "5455": 12096.0,
            "5460": 11914.0,
            "5465": 12690.0,
            "5470": 10364.0,
            "5475": 12602.0,
            "5480": 9756.0,
            "5485": 9481.0,
            "5490": 10153.0,
            "5495": 10884.0,
            "5500": 10044.0,
            "5505": 8195.0,
            "5510": 10727.0,
            "5515": 10925.0,
            "5520": 9994.0,
            "5525": 10331.0,
            "5530": 10436.0,
            "5535": 9602.0,
            "5540": 9635.0,
            "5545": 11183.0,
            "5550": 10142.0,
            "5555": 11696.0,
            "5560": 10624.0,
            "5565": 8707.0,
            "5570": 9672.0,
            "5575": 10710.0,
            "5580": 9837.0,
            "5585": 11008.0,
            "5590": 9814.0,
            "5595": 12228.0,
            "5600": 9294.0,
            "5605": 10239.0,
            "5610": 12331.0,
            "5615": 9440.0,
            "5620": 10065.0,
            "5625": 9160.0,
            "5630": 9244.0,
            "5635": 9723.0,
            "5640": 10228.0,
            "5645": 10643.0,
            "5650": 12845.0,
            "5655": 10536.0,
            "5660": 11424.0,
            "5665": 10680.0,
            "5670": 9190.0,
            "5675": 12826.0,
            "5680": 8704.0,
            "5685": 11517.0,
            "5690": 8612.0,
            "5695": 10561.0,
            "5700": 10022.0,
            "5705": 12272.0,
            "5710": 10850.0,
            "5715": 10718.0,
            "5720": 10055.0,
            "5725": 9910.0,
            "5730": 10432.0,
            "5735": 9142.0,
            "5740": 10061.0,
            "5745": 9044.0,
            "5750": 10526.0,
            "5755": 12047.0,
            "5760": 8818.0,
            "5765": 10047.0,
            "5770": 8725.0,
            "5775": 9559.0,
            "5780": 10892.0,
            "5785": 9501.0,
            "5790": 12451.0,
            "5795": 10696.0,
            "5800": 9086.0,
            "5805": 11218.0,
            "5810": 9585.0,
            "5815": 11933.0,
            "5820": 9675.0,
            "5825": 9696.0,
            "5830": 10765.0,
            "5835": 9599.0,
            "5840": 9211.0,
            "5845": 11027.0,
            "5850": 10985.0,
            "5855": 9110.0,
            "5860": 11150.0,
            "5865": 9110.0,
            "5870": 10261.0,
            "5875": 11042.0,
            "5880": 10409.0,
            "5885": 9902.0,
            "5890": 9318.0,
            "5895": 9041.0,
            "5900": 8501.0,
            "5905": 10094.0,
            "5910": 10921.0,
            "5915": 11849.0,
            "5920": 9120.0,
            "5925": 9012.0,
            "5930": 8287.0,
            "5935": 10419.0,
            "5940": 9055.0,
            "5945": 11416.0,
            "5950": 10276.0,
            "5955": 8469.0,
            "5960": 11685.0,
            "5965": 9129.0,
            "5970": 12178.0,
            "5975": 9909.0,
            "5980": 9862.0,
            "5985": 10678.0,
            "5990": 10671.0,
            "5995": 9854.0,
            "6000": 9131.0,
            "6005": 10305.0,
            "6010": 13670.0,
            "6015": 11597.0,
            "6020": 10619.0,
            "6025": 9699.0,
            "6030": 9325.0,
            "6035": 11041.0,
            "6040": 10944.0,
            "6045": 9843.0,
            "6050": 10688.0,
            "6055": 9910.0,
            "6060": 8981.0,
            "6065": 9660.0,
            "6070": 9197.0,
            "6075": 10585.0,
            "6080": 12807.0,
            "6085": 9821.0,
            "6090": 11798.0,
            "6095": 10769.0,
            "6100": 12788.0,
            "6105": 10360.0,
            "6110": 9855.0,
            "6115": 9476.0,
            "6120": 10657.0,
            "6125": 11882.0,
            "6130": 8497.0,
            "6135": 10519.0,
            "6140": 10172.0,
            "6145": 8602.0,
            "6150": 11273.0,
            "6155": 10709.0,
            "6160": 10356.0,
            "6165": 10300.0,
            "6170": 9013.0,
            "6175": 9815.0,
            "6180": 9107.0,
            "6185": 12447.0,
            "6190": 10583.0,
            "6195": 10227.0,
            "6200": 9872.0,
            "6205": 10982.0,
            "6210": 9485.0,
            "6215": 9777.0,
            "6220": 8962.0,
            "6225": 10999.0,
            "6230": 11488.0,
            "6235": 10860.0,
            "6240": 10703.0,
            "6245": 9826.0,
            "6250": 10115.0,
            "6255": 8942.0,
            "6260": 9807.0,
            "6265": 8319.0,
            "6270": 10988.0,
            "6275": 9300.0,
            "6280": 9978.0,
            "6285": 11804.0,
            "6290": 8999.0,
            "6295": 10825.0,
            "6300": 9924.0,
            "6305": 9683.0,
            "6310": 11450.0,
            "6315": 8291.0,
            "6320": 10869.0,
            "6325": 11208.0,
            "6330": 10963.0,
            "6335": 10248.0,
            "6340": 10315.0,
            "6345": 8743.0,
            "6350": 10388.0,
            "6355": 11312.0,
            "6360": 9677.0,
            "6365": 9631.0,
            "6370": 10386.0,
            "6375": 9718.0,
            "6380": 10236.0,
            "6385": 12538.0,
            "6390": 10973.0,
            "6395": 11126.0,
            "6400": 8797.0,
            "6405": 10650.0,
            "6410": 11953.0,
            "6415": 10436.0,
            "6420": 9594.0,
            "6425": 11904.0,
            "6430": 10357.0,
            "6435": 10797.0,
            "6440": 9544.0,
            "6445": 9358.0,
            "6450": 9661.0,
            "6455": 9519.0,
            "6460": 10918.0,
            "6465": 9945.0,
            "6470": 10846.0,
            "6475": 10657.0,
            "6480": 9793.0,
            "6485": 9696.0,
            "6490": 11558.0,
            "6495": 8042.0,
            "6500": 9887.0,
            "6505": 11655.0,
            "6510": 10329.0,
            "6515": 9798.0,
            "6520": 10108.0,
            "6525": 10159.0,
            "6530": 10577.0,
            "6535": 8954.0,
            "6540": 9987.0,
            "6545": 10845.0,
            "6550": 11965.0,
            "6555": 10947.0,
            "6560": 11795.0,
            "6565": 10399.0,
            "6570": 10727.0,
            "6575": 9444.0,
            "6580": 11223.0,
            "6585": 10788.0,
            "6590": 10714.0,
            "6595": 11109.0,
            "6600": 10490.0,
            "6605": 10339.0,
            "6610": 11926.0,
            "6615": 8549.0,
            "6620": 8847.0,
            "6625": 10975.0,
            "6630": 9168.0,
            "6635": 11513.0,
            "6640": 9491.0,
            "6645": 11428.0,
            "6650": 11353.0,
            "6655": 10050.0,
            "6660": 10732.0,
            "6665": 9637.0,
            "6670": 9959.0,
            "6675": 10171.0,
            "6680": 10457.0,
            "6685": 9848.0,
            "6690": 9726.0,
            "6695": 10516.0,
            "6700": 10235.0,
            "6705": 11331.0,
            "6710": 12250.0,
            "6715": 13037.0,
            "6720": 10883.0,
            "6725": 10142.0,
            "6730": 9524.0,
            "6735": 11773.0,
            "6740": 9851.0,
            "6745": 10606.0,
            "6750": 10274.0,
            "6755": 10749.0,
            "6760": 9454.0,
            "6765": 10009.0,
            "6770": 9885.0,
            "6775": 8525.0,
            "6780": 9974.0,
            "6785": 10192.0,
            "6790": 10698.0,
            "6795": 10565.0,
            "6800": 9866.0,
            "6805": 10106.0,
            "6810": 12154.0,
            "6815": 11699.0,
            "6820": 9089.0,
            "6825": 9545.0,
            "6830": 10361.0,
            "6835": 11723.0,
            "6840": 8038.0,
            "6845": 9283.0,
            "6850": 9001.0,
            "6855": 9459.0,
            "6860": 11377.0,
            "6865": 10287.0,
            "6870": 11013.0,
            "6875": 9015.0,
            "6880": 10254.0,
            "6885": 10911.0,
            "6890": 12849.0,
            "6895": 9158.0,
            "6900": 10319.0,
            "6905": 12186.0,
            "6910": 11332.0,
            "6915": 8613.0,
            "6920": 10018.0,
            "6925": 12181.0,
            "6930": 12131.0,
            "6935": 11121.0,
            "6940": 10487.0,
            "6945": 11717.0,
            "6950": 8808.0,
            "6955": 10514.0,
            "6960": 10899.0,
            "6965": 8703.0,
            "6970": 10368.0,
            "6975": 11930.0,
            "6980": 10739.0,
            "6985": 10418.0,
            "6990": 9471.0,
            "6995": 10486.0,
            "7000": 11867.0,
            "7005": 8752.0,
            "7010": 12157.0,
            "7015": 10193.0,
            "7020": 9923.0,
            "7025": 9842.0,
            "7030": 12444.0,
            "7035": 10075.0,
            "7040": 10831.0,
            "7045": 10018.0,
            "7050": 9727.0,
            "7055": 12034.0,
            "7060": 11049.0,
            "7065": 10419.0,
            "7070": 11669.0,
            "7075": 10354.0,
            "7080": 11891.0,
            "7085": 9350.0,
            "7090": 11350.0,
            "7095": 9553.0,
            "7100": 8673.0,
            "7105": 10362.0,
            "7110": 10847.0,
            "7115": 11498.0,
            "7120": 9654.0,
            "7125": 9659.0,
            "7130": 10747.0,
            "7135": 10712.0,
            "7140": 11326.0,
            "7145": 9169.0,
            "7150": 13471.0,
            "7155": 8895.0,
            "7160": 9799.0,
            "7165": 10365.0,
            "7170": 8917.0,
            "7175": "nan",
            "7180": 9220.0,
            "7185": 10795.0,
            "7190": 10170.0,
            "7195": 12110.0,
            "7200": 10962.0,
            "7205": 10311.0,
            "7210": 10186.0,
            "7215": 9862.0,
            "7220": 10592.0,
            "7225": 10651.0,
            "7230": 12576.0,
            "7235": 9476.0,
            "7240": 10120.0,
            "7245": 9958.0,
            "7250": 12531.0,
            "7255": 9891.0,
            "7260": 10703.0,
            "7265": 11218.0,
            "7270": 10865.0,
            "7275": 8962.0,
            "7280": 9199.0,
            "7285": 11110.0,
            "7290": 11740.0,
            "7295": 9036.0,
            "7300": 9068.0,
            "7305": 10310.0,
            "7310": 10106.0,
            "7315": 13269.0,
            "7320": 10201.0,
            "7325": 10558.0,
            "7330": 10794.0,
            "7335": 8794.0,
            "7340": 10881.0,
            "7345": 10987.0,
            "7350": 10402.0,
            "7355": 10338.0,
            "7360": 10374.0,
            "7365": 10910.0,
            "7370": 10717.0,
            "7375": 9151.0,
            "7380": 10079.0,
            "7385": 9128.0,
            "7390": 9188.0,
            "7395": 11865.0,
            "7400": 8772.0,
            "7405": 10776.0,
            "7410": 10442.0,
            "7415": 9656.0,
            "7420": 9519.0,
            "7425": 9358.0,
            "7430": 11516.0,
            "7435": 11123.0,
            "7440": 10278.0,
            "7445": 12105.0,
            "7450": 10132.0,
            "7455": 11151.0,
            "7460": 11580.0,
            "7465": 9890.0,
            "7470": 11376.0,
            "7475": 8670.0,
            "7480": 10587.0,
            "7485": 9810.0,
            "7490": 7738.0,
            "7495": 9827.0,
            "7500": 10405.0,
            "7505": 10002.0,
            "7510": 9890.0,
            "7515": 8886.0,
            "7520": 9174.0,
            "7525": 10154.0,
            "7530": 9019.0,
            "7535": 9883.0,
            "7540": 9742.0,
            "7545": 8512.0,
            "7550": 9497.0,
            "7555": 9385.0,
            "7560": 11112.0,
            "7565": 9767.0,
            "7570": 9329.0,
            "7575": 10671.0,
            "7580": 9720.0,
            "7585": 11645.0,
            "7590": 10385.0,
            "7595": 10709.0,
            "7600": 10647.0,
            "7605": 7902.0,
            "7610": 9640.0,
            "7615": 10877.0,
            "7620": 9781.0,
            "7625": 10252.0,
            "7630": 9897.0,
            "7635": 9317.0,
            "7640": 8998.0,
            "7645": 14148.0,
            "7650": 9720.0,
            "7655": 8808.0,
            "7660": 10876.0,
            "7665": 10696.0,
            "7670": 11003.0,
            "7675": 9537.0,
            "7680": 10660.0,
            "7685": 9910.0,
            "7690": 11391.0,
            "7695": 10556.0,
            "7700": 9819.0,
            "7705": 9793.0,
            "7710": 10393.0,
            "7715": 9162.0,
            "7720": 10598.0,
            "7725": 10076.0,
            "7730": 9994.0,
            "7735": 10584.0,
            "7740": 12909.0,
            "7745": 10626.0,
            "7750": 11941.0,
            "7755": 10397.0,
            "7760": 9734.0,
            "7765": 11697.0,
            "7770": 9230.0,
            "7775": 10721.0,
            "7780": 9645.0,
            "7785": 9793.0,
            "7790": 10530.0,
            "7795": 9653.0,
            "7800": 10846.0,
            "7805": 10154.0,
            "7810": 10114.0,
            "7815": 10486.0,
            "7820": 10474.0,
            "7825": 9955.0,
            "7830": 9866.0,
            "7835": 9498.0,
            "7840": 10510.0,
            "7845": 9089.0,
            "7850": 8927.0,
            "7855": 9929.0,
            "7860": 12504.0,
            "7865": 9024.0,
            "7870": 8899.0,
            "7875": 12749.0,
            "7880": 11925.0,
            "7885": 12711.0,
            "7890": 11655.0,
            "7895": 11091.0,
            "7900": 9215.0,
            "7905": 12289.0,
            "7910": 9620.0,
            "7915": 10128.0,
            "7920": 8873.0,
            "7925": 10101.0,
            "7930": 12212.0,
            "7935": 10900.0,
            "7940": 10181.0,
            "7945": 12221.0,
            "7950": 11252.0,
            "7955": 12044.0,
            "7960": 10901.0,
            "7965": 9246.0,
            "7970": 10820.0,
            "7975": 9569.0,
            "7980": 10803.0,
            "7985": 10977.0,
            "7990": 10074.0,
            "7995": 8541.0,
            "8000": 10504.0,
            "8005": 10071.0,
            "8010": 10067.0,
            "8015": 9441.0,
            "8020": 7831.0,
            "8025": 12278.0,
            "8030": 8978.0,
            "8035": 10561.0,
            "8040": 10459.0,
            "8045": 10731.0,
            "8050": 8837.0,
            "8055": 11165.0,
            "8060": 11013.0,
            "8065": 9140.0,
            "8070": 11239.0,
            "8075": 9665.0,
            "8080": 10774.0,
            "8085": 9339.0,
            "8090": 9317.0,
            "8095": 9783.0,
            "8100": 11948.0,
            "8105": 9816.0,
            "8110": 10354.0,
            "8115": 10619.0,
            "8120": 10413.0,
            "8125": 9757.0,
            "8130": 9179.0,
            "8135": 9531.0,
            "8140": 9484.0,
            "8145": 11785.0,
            "8150": 11008.0,
            "8155": 11241.0,
            "8160": 7647.0,
            "8165": 10345.0,
            "8170": 11019.0,
            "8175": 9087.0,
            "8180": 11544.0,
            "8185": 9947.0,
            "8190": 9892.0,
            "8195": 11338.0,
            "8200": 11494.0,
            "8205": 10892.0,
            "8210": 10573.0,
            "8215": 11331.0,
            "8220": 10605.0,
            "8225": 9133.0,
            "8230": 13447.0,
            "8235": 9711.0,
            "8240": 8530.0,
            "8245": 8584.0,
            "8250": 9597.0,
            "8255": 9076.0,
            "8260": 12519.0,
            "8265": 9697.0,
            "8270": 10182.0,
            "8275": 10346.0,
            "8280": 10236.0,
            "8285": 10283.0,
            "8290": 10413.0,
            "8295": 10282.0,
            "8300": 10641.0,
            "8305": 9507.0,
            "8310": 9714.0,
            "8315": 9176.0,
            "8320": 10879.0,
            "8325": 9019.0,
            "8330": 10120.0,
            "8335": 10889.0,
            "8340": 11038.0,
            "8345": 8848.0,
            "8350": 10407.0,
            "8355": 9015.0,
            "8360": 8618.0,
            "8365": 9662.0,
            "8370": 10717.0,
            "8375": 10718.0,
            "8380": 12517.0,
            "8385": 12823.0,
            "8390": 9540.0,
            "8395": 10525.0,
            "8400": 10242.0,
            "8405": 9273.0,
            "8410": 10261.0,
            "8415": 9656.0,
            "8420": 10796.0,
            "8425": 11083.0,
            "8430": 9972.0,
            "8435": 9864.0,
            "8440": 9396.0,
            "8445": 8861.0,
            "8450": 13051.0,
            "8455": 10104.0,
            "8460": 9449.0,
            "8465": 9607.0,
            "8470": 10754.0,
            "8475": 10768.0,
            "8480": 9839.0,
            "8485": 10540.0,
            "8490": 12184.0,
            "8495": 9135.0,
            "8500": 10528.0,
            "8505": 9683.0,
            "8510": 9089.0,
            "8515": 11775.0,
            "8520": 10932.0,
            "8525": 9478.0,
            "8530": 9422.0,
            "8535": 10877.0,
            "8540": 10337.0,
            "8545": 10570.0,
            "8550": 13479.0,
            "8555": 10807.0,
            "8560": 10617.0,
            "8565": 11290.0,
            "8570": 12088.0,
            "8575": 12300.0,
            "8580": 11586.0,
            "8585": 10189.0,
            "8590": 12822.0,
            "8595": 11589.0,
            "8600": 10054.0,
            "8605": 10135.0,
            "8610": 9288.0,
            "8615": 10012.0,
            "8620": 8974.0,
            "8625": 10725.0,
            "8630": 11880.0,
            "8635": 10063.0,
            "8640": 10020.0,
            "8645": 9087.0,
            "8650": 9002.0,
            "8655": 11052.0,
            "8660": 10717.0,
            "8665": 8605.0,
            "8670": 10943.0,
            "8675": 10330.0,
            "8680": 11757.0,
            "8685": 10725.0,
            "8690": 10799.0,
            "8695": 9251.0,
            "8700": 10851.0,
            "8705": 11503.0,
            "8710": 10907.0,
            "8715": 10944.0,
            "8720": 9024.0,
            "8725": 9685.0,
            "8730": 8553.0,
            "8735": 10003.0,
            "8740": 12282.0,
            "8745": 10421.0,
            "8750": 9604.0,
            "8755": 9351.0,
            "8760": 9408.0,
            "8765": 10744.0,
            "8770": 8774.0,
            "8775": 10670.0,
            "8780": 9824.0,
            "8785": 10617.0,
            "8790": 10710.0,
            "8795": 9398.0,
            "8800": 11092.0,
            "8805": 10953.0,
            "8810": 10314.0,
            "8815": 11169.0,
            "8820": 9555.0,
            "8825": 9905.0,
            "8830": 8984.0,
            "8835": 9699.0,
            "8840": 10635.0,
            "8845": 12676.0,
            "8850": 9559.0,
            "8855": 9588.0,
            "8860": 11105.0,
            "8865": 9704.0,
            "8870": 9316.0,
            "8875": 10459.0,
            "8880": 9118.0,
            "8885": 10607.0,
            "8890": 9961.0,
            "8895": 11666.0,
            "8900": 10666.0,
            "8905": 9281.0,
            "8910": 10384.0,
            "8915": 11541.0,
            "8920": 11786.0,
            "8925": 11486.0,
            "8930": 11040.0,
            "8935": 11054.0,
            "8940": 9307.0,
            "8945": 12556.0,
            "8950": 9678.0,
            "8955": 9801.0,
            "8960": 9997.0,
            "8965": 11996.0,
            "8970": 9785.0,
            "8975": 9583.0,
            "8980": 9182.0,
            "8985": 10135.0,
            "8990": 10662.0,
            "8995": 10097.0,
            "9000": 10249.0,
            "9005": 9675.0,
            "9010": 9679.0,
            "9015": 11037.0,
            "9020": 10190.0,
            "9025": 10681.0,
            "9030": 9905.0,
            "9035": 10743.0,
            "9040": 10660.0,
            "9045": 10761.0,
            "9050": 9473.0,
            "9055": 8174.0,
            "9060": 10682.0,
            "9065": 9541.0,
            "9070": 10488.0,
            "9075": 11429.0,
            "9080": 10949.0,
            "9085": 9994.0,
            "9090": 9926.0,
            "9095": 11016.0,
            "9100": 10036.0,
            "9105": 10014.0,
            "9110": 10641.0,
            "9115": 9344.0,
            "9120": 10034.0,
            "9125": 10986.0,
            "9130": 10402.0,
            "9135": 9577.0,
            "9140": 9727.0,
            "9145": 8785.0,
            "9150": 11546.0,
            "9155": 10483.0,
            "9160": 10552.0,
            "9165": 13570.0,
            "9170": 9831.0,
            "9175": 10668.0,
            "9180": 9454.0,
            "9185": 10770.0,
            "9190": 10895.0,
            "9195": 9939.0,
            "9200": 9567.0,
            "9205": 11056.0,
            "9210": 9347.0,
            "9215": 10572.0,
            "9220": "nan",
            "9225": 10828.0,
            "9230": 9226.0,
            "9235": 9642.0,
            "9240": 10633.0,
            "9245": 9917.0,
            "9250": 10460.0,
            "9255": 12301.0,
            "9260": 8727.0,
            "9265": 10638.0,
            "9270": 10403.0,
            "9275": 10013.0,
            "9280": 9499.0,
            "9285": 12060.0,
            "9290": 9363.0,
            "9295": 10671.0,
            "9300": 10266.0,
            "9305": 10098.0,
            "9310": 11940.0,
            "9315": 10967.0,
            "9320": 9461.0,
            "9325": 11107.0,
            "9330": 11070.0,
            "9335": 9842.0,
            "9340": 8492.0,
            "9345": 12394.0,
            "9350": 10563.0,
            "9355": 10031.0,
            "9360": 10809.0,
            "9365": 9915.0,
            "9370": 10766.0,
            "9375": 12107.0,
            "9380": 10586.0,
            "9385": 9823.0,
            "9390": 9322.0,
            "9395": 9783.0,
            "9400": 11366.0,
            "9405": 9569.0,
            "9410": 9703.0,
            "9415": 9754.0,
            "9420": 9747.0,
            "9425": 11389.0,
            "9430": 10325.0,
            "9435": 10599.0,
            "9440": 10217.0,
            "9445": 11655.0,
            "9450": 9883.0,
            "9455": 10240.0,
            "9460": 11197.0,
            "9465": 11777.0,
            "9470": 10168.0,
            "9475": 10107.0,
            "9480": 10562.0,
            "9485": 10751.0,
            "9490": 9214.0,
            "9495": 9820.0,
            "9500": 9181.0,
            "9505": 11403.0,
            "9510": 11962.0,
            "9515": 8348.0,
            "9520": 10209.0,
            "9525": 9241.0,
            "9530": 10512.0,
            "9535": 9094.0,
            "9540": 10960.0,
            "9545": 8888.0,
            "9550": 9329.0,
            "9555": 9628.0,
            "9560": 10246.0,
            "9565": 9260.0,
            "9570": 12577.0,
            "9575": 10405.0,
            "9580": 11321.0,
            "9585": 11266.0,
            "9590": 10629.0,
            "9595": 10307.0,
            "9600": 11111.0,
            "9605": 11625.0,
            "9610": 9565.0,
            "9615": 10018.0,
            "9620": 12168.0,
            "9625": 9586.0,
            "9630": 13008.0,
            "9635": 10843.0,
            "9640": 8790.0,
            "9645": 10454.0,
            "9650": 9905.0,
            "9655": 10850.0,
            "9660": 10460.0,
            "9665": 9791.0,
            "9670": 12962.0,
            "9675": 10025.0,
            "9680": 9608.0,
            "9685": 8518.0,
            "9690": 9871.0,
            "9695": 10168.0,
            "9700": 10575.0,
            "9705": 10522.0,
            "9710": 10745.0,
            "9715": 9180.0,
            "9720": 10687.0,
            "9725": 9624.0,
            "9730": 10438.0,
            "9735": 10036.0,
            "9740": 10619.0,
            "9745": 9231.0,
            "9750": 10385.0,
            "9755": 9790.0,
            "9760": 10339.0,
            "9765": 10005.0,
            "9770": 11237.0,
            "9775": 10811.0,
            "9780": 11699.0,
            "9785": 11255.0,
            "9790": 10142.0,
            "9795": 9503.0,
            "9800": 10365.0,
            "9805": 9123.0,
            "9810": 10307.0,
            "9815": 8726.0,
            "9820": 11100.0,
            "9825": 10654.0,
            "9830": 9409.0,
            "9835": 11022.0,
            "9840": 8559.0,
            "9845": 9979.0,
            "9850": 10384.0,
            "9855": 9171.0,
            "9860": 9893.0,
            "9865": 10158.0,
            "9870": 9734.0,
            "9875": 10992.0,
            "9880": 9101.0,
            "9885": 12196.0,
            "9890": 9061.0,
            "9895": 11972.0,
            "9900": 9730.0,
            "9905": 11644.0,
            "9910": 10574.0,
            "9915": 9669.0,
            "9920": 10106.0,
            "9925": 9800.0,
            "9930": 11310.0,
            "9935": 10308.0,
            "9940": 12992.0,
            "9945": 11100.0,
            "9950": 9686.0,
            "9955": 9575.0,
            "9960": 10746.0,
            "9965": 10207.0,
            "9970": 11116.0,
            "9975": 9566.0,
            "9980": 9617.0,
            "9985": 11345.0,
            "9990": 10495.0,
            "9995": 11836.0,
            "10000": 10155.0,
            "10005": 10688.0,
            "10010": 9453.0,
            "10015": 9390.0,
            "10020": 9027.0,
            "10025": 10842.0,
            "10030": 10428.0,
            "10035": 10062.0,
            "10040": 9896.0,
            "10045": 10331.0,
            "10050": 8749.0,
            "10055": 11235.0,
            "10060": 11029.0,
            "10065": 11574.0,
            "10070": 8546.0,
            "10075": 9728.0,
            "10080": 10110.0,
            "10085": 9901.0,
            "10090": 10392.0,
            "10095": 11461.0,
            "10100": 9483.0,
            "10105": 10241.0,
            "10110": 10711.0,
            "10115": 10687.0,
            "10120": 10365.0,
            "10125": 9189.0,
            "10130": 9362.0,
            "10135": 9729.0,
            "10140": 9890.0,
            "10145": 9645.0,
            "10150": 10400.0,
            "10155": 9825.0,
            "10160": 9696.0,
            "10165": 9579.0,
            "10170": 10547.0,
            "10175": 9508.0,
            "10180": 10948.0,
            "10185": 10746.0,
            "10190": 10911.0,
            "10195": 11120.0,
            "10200": 9246.0,
            "10205": 9944.0,
            "10210": 12957.0,
            "10215": 10613.0,
            "10220": 10383.0,
            "10225": 10097.0,
            "10230": 10462.0,
            "10235": 10025.0,
            "10240": 9834.0,
            "10245": 12372.0,
            "10250": 13575.0,
            "10255": 9740.0,
            "10260": 10200.0,
            "10265": 10092.0,
            "10270": 9240.0,
            "10275": 9753.0,
            "10280": 9622.0,
            "10285": 10587.0,
            "10290": 10300.0,
            "10295": 8883.0,
            "10300": 11066.0,
            "10305": 12614.0,
            "10310": 11065.0,
            "10315": 9236.0,
            "10320": 10138.0,
            "10325": 11330.0,
            "10330": 10243.0,
            "10335": 10119.0,
            "10340": 9927.0,
            "10345": 10550.0,
            "10350": 10404.0,
            "10355": 10373.0,
            "10360": 10029.0,
            "10365": 10083.0,
            "10370": 9447.0,
            "10375": 10788.0,
            "10380": 11087.0,
            "10385": 9689.0,
            "10390": 10883.0,
            "10395": 12231.0,
            "10400": 10049.0,
            "10405": 10749.0,
            "10410": 11350.0,
            "10415": 10319.0,
            "10420": 11324.0,
            "10425": 10238.0,
            "10430": 10071.0,
            "10435": 9983.0,
            "10440": 11466.0,
            "10445": 10255.0,
            "10450": 10547.0,
            "10455": 10824.0,
            "10460": 11017.0,
            "10465": 11247.0,
            "10470": 9391.0,
            "10475": 11164.0,
            "10480": 10382.0,
            "10485": 10269.0,
            "10490": 11860.0,
            "10495": 10453.0,
            "10500": 10032.0,
            "10505": 10490.0,
            "10510": 9433.0,
            "10515": 10845.0,
            "10520": 10360.0,
            "10525": 9835.0,
            "10530": 10948.0,
            "10535": 9868.0,
            "10540": 10541.0,
            "10545": 10070.0,
            "10550": 11567.0,
            "10555": 10731.0,
            "10560": 10743.0,
            "10565": 10005.0,
            "10570": 10379.0,
            "10575": 10109.0,
            "10580": 11362.0,
            "10585": 10804.0,
            "10590": 9702.0,
            "10595": 9806.0,
            "10600": 9486.0,
            "10605": 10888.0,
            "10610": 11375.0,
            "10615": 10910.0,
            "10620": 10795.0,
            "10625": 10942.0,
            "10630": 9836.0,
            "10635": 9615.0,
            "10640": 10417.0,
            "10645": 12964.0,
            "10650": 9155.0,
            "10655": 9598.0,
            "10660": 9996.0,
            "10665": 10144.0,
            "10670": 10417.0,
            "10675": 10110.0,
            "10680": 9745.0,
            "10685": 10357.0,
            "10690": 9723.0,
            "10695": 9582.0,
            "10700": 11110.0,
            "10705": 11465.0,
            "10710": 10497.0,
            "10715": 11782.0,
            "10720": 9672.0,
            "10725": 12421.0,
            "10730": 9127.0,
            "10735": 9029.0,
            "10740": 11100.0,
            "10745": 10757.0,
            "10750": 10224.0,
            "10755": 9873.0,
            "10760": 9280.0,
            "10765": 11486.0,
            "10770": 10708.0,
            "10775": 12283.0,
            "10780": 10188.0,
            "10785": 10123.0,
            "10790": 11096.0,
            "10795": 10847.0,
            "10800": 10572.0,
            "10805": 12310.0,
            "10810": 10631.0,
            "10815": 10297.0,
            "10820": 9735.0,
            "10825": 11745.0,
            "10830": 10503.0,
            "10835": 10616.0,
            "10840": 10681.0,
            "10845": 9903.0,
            "10850": 9840.0,
            "10855": 10945.0,
            "10860": 9919.0,
            "10865": 10542.0,
            "10870": 9798.0,
            "10875": 12653.0,
            "10880": 9605.0,
            "10885": 10095.0,
            "10890": 11476.0,
            "10895": 9603.0,
            "10900": 10818.0,
            "10905": 9516.0,
            "10910": 9886.0,
            "10915": 9720.0,
            "10920": 10940.0,
            "10925": 10204.0,
            "10930": 9218.0,
            "10935": 10958.0,
            "10940": 11625.0,
            "10945": 9675.0,
            "10950": 8864.0,
            "10955": 10960.0,
            "10960": 10164.0,
            "10965": 9925.0,
            "10970": 10139.0,
            "10975": 11345.0,
            "10980": 9868.0,
            "10985": 9783.0,
            "10990": 11290.0,
            "10995": 10116.0,
            "11000": 10012.0,
            "11005": 8943.0,
            "11010": 10006.0,
            "11015": 9545.0,
            "11020": 11131.0,
            "11025": 10270.0,
            "11030": 11184.0,
            "11035": 10601.0,
            "11040": 9783.0,
            "11045": 12868.0,
            "11050": 10194.0,
            "11055": 11264.0,
            "11060": 10959.0,
            "11065": 10368.0,
            "11070": 10388.0,
            "11075": 11458.0,
            "11080": 10404.0,
            "11085": 10323.0,
            "11090": 10198.0,
            "11095": 10575.0,
            "11100": 11475.0,
            "11105": 9824.0,
            "11110": 10741.0,
            "11115": 10293.0,
            "11120": 10267.0,
            "11125": 10504.0,
            "11130": 12000.0,
            "11135": 10687.0,
            "11140": 12005.0,
            "11145": 10892.0,
            "11150": 11335.0,
            "11155": 9167.0,
            "11160": 12399.0,
            "11165": 10359.0,
            "11170": 11115.0,
            "11175": 10882.0,
            "11180": 11068.0,
            "11185": 10495.0,
            "11190": 9029.0,
            "11195": 10720.0,
            "11200": 10132.0,
            "11205": 9407.0,
            "11210": 10558.0,
            "11215": 10074.0,
            "11220": 9676.0,
            "11225": 10635.0,
            "11230": 14220.0,
            "11235": 10233.0,
            "11240": 10164.0,
            "11245": 10961.0,
            "11250": 9671.0,
            "11255": 11196.0,
            "11260": 9922.0,
            "11265": 9667.0,
            "11270": 9380.0,
            "11275": 10972.0,
            "11280": 9974.0,
            "11285": 9980.0,
            "11290": 9702.0,
            "11295": 11156.0,
            "11300": 10343.0,
            "11305": 11135.0,
            "11310": 11122.0,
            "11315": 11420.0,
            "11320": 10663.0,
            "11325": 10426.0,
            "11330": 12457.0,
            "11335": 10432.0,
            "11340": 9815.0,
            "11345": 9199.0,
            "11350": 10192.0,
            "11355": 11844.0,
            "11360": 10511.0,
            "11365": 10620.0,
            "11370": 10611.0,
            "11375": 8813.0,
            "11380": 9564.0,
            "11385": 10553.0,
            "11390": 9819.0,
            "11395": 10627.0,
            "11400": 10190.0,
            "11405": 9596.0,
            "11410": 10842.0,
            "11415": 9589.0,
            "11420": 10500.0,
            "11425": 11097.0,
            "11430": 12371.0,
            "11435": 11275.0,
            "11440": 9239.0,
            "11445": 11601.0,
            "11450": 9258.0,
            "11455": 9324.0,
            "11460": 10250.0,
            "11465": 9920.0,
            "11470": 11767.0,
            "11475": 11039.0,
            "11480": 10614.0,
            "11485": 9259.0,
            "11490": 10670.0,
            "11495": 10196.0,
            "11500": 10740.0,
            "11505": 9647.0,
            "11510": 10025.0,
            "11515": 11025.0,
            "11520": 10377.0,
            "11525": 9389.0,
            "11530": 10605.0,
            "11535": 10437.0,
            "11540": 10271.0,
            "11545": 9613.0,
            "11550": 10509.0,
            "11555": 9963.0,
            "11560": 11424.0,
            "11565": 9416.0,
            "11570": 10329.0,
            "11575": 11260.0,
            "11580": 11605.0,
            "11585": 10851.0,
            "11590": 10282.0,
            "11595": 9684.0,
            "11600": 9463.0,
            "11605": 10460.0,
            "11610": 11212.0,
            "11615": 9737.0,
            "11620": 9798.0,
            "11625": 10954.0,
            "11630": 9265.0,
            "11635": 11369.0,
            "11640": 8794.0,
            "11645": 11161.0,
            "11650": 10096.0,
            "11655": 9680.0,
            "11660": 9783.0,
            "11665": 9700.0,
            "11670": 10960.0,
            "11675": 8481.0,
            "11680": 10078.0,
            "11685": 11592.0,
            "11690": 10704.0,
            "11695": 10504.0,
            "11700": 11493.0,
            "11705": 10456.0,
            "11710": 9312.0,
            "11715": 11202.0,
            "11720": 12056.0,
            "11725": 9927.0,
            "11730": 9740.0,
            "11735": 10917.0,
            "11740": 12017.0,
            "11745": 9130.0,
            "11750": 10546.0,
            "11755": 11021.0,
            "11760": 9842.0,
            "11765": 10553.0,
            "11770": 9962.0,
            "11775": 10900.0,
            "11780": 10658.0,
            "11785": 9848.0,
            "11790": 10124.0,
            "11795": 10718.0,
            "11800": 10740.0,
            "11805": 11403.0,
            "11810": 9843.0,
            "11815": 9792.0,
            "11820": 9287.0,
            "11825": 9782.0,
            "11830": 9793.0,
            "11835": 10556.0,
            "11840": 11615.0,
            "11845": 10640.0,
            "11850": 10803.0,
            "11855": 9833.0,
            "11860": 10650.0,
            "11865": 11670.0,
            "11870": 9017.0,
            "11875": 12642.0,
            "11880": 10417.0,
            "11885": 9728.0,
            "11890": 12411.0,
            "11895": 10654.0,
            "11900": 10374.0,
            "11905": 9052.0,
            "11910": 11867.0,
            "11915": 10269.0,
            "11920": 10706.0,
            "11925": 12395.0,
            "11930": 11133.0,
            "11935": 9302.0,
            "11940": 10396.0,
            "11945": 10765.0,
            "11950": 10121.0,
            "11955": 10423.0,
            "11960": 10614.0,
            "11965": 10470.0,
            "11970": 9702.0,
            "11975": 9408.0,
            "11980": 11468.0,
            "11985": 10106.0,
            "11990": 10393.0,
            "11995": 11347.0,
            "12000": 10399.0,
            "12005": 10670.0,
            "12010": 10012.0,
            "12015": 10943.0,
            "12020": 11692.0,
            "12025": 10349.0,
            "12030": 9510.0,
            "12035": 10629.0,
            "12040": 11401.0,
            "12045": 11018.0,
            "12050": 8959.0,
            "12055": 11761.0,
            "12060": 10745.0,
            "12065": 10026.0,
            "12070": 10756.0,
            "12075": 11237.0,
            "12080": 12116.0,
            "12085": 10923.0,
            "12090": 10709.0,
            "12095": 10984.0,
            "12100": 10270.0,
            "12105": 10621.0,
            "12110": 9142.0,
            "12115": 11304.0,
            "12120": 11092.0,
            "12125": 10696.0,
            "12130": 9900.0,
            "12135": 10346.0,
            "12140": 9438.0,
            "12145": 10886.0,
            "12150": 10501.0,
            "12155": 9751.0,
            "12160": 11286.0,
            "12165": 9261.0,
            "12170": 10497.0,
            "12175": 10556.0,
            "12180": 11276.0,
            "12185": 10407.0,
            "12190": 10207.0,
            "12195": 12342.0,
            "12200": 11149.0,
            "12205": 8927.0,
            "12210": 10833.0,
            "12215": 11211.0,
            "12220": 10704.0,
            "12225": 10390.0,
            "12230": 9987.0,
            "12235": 10142.0,
            "12240": 9768.0,
            "12245": 11485.0,
            "12250": 9760.0,
            "12255": 9288.0,
            "12260": 9814.0,
            "12265": 10319.0,
            "12270": 10809.0,
            "12275": 11001.0,
            "12280": 9770.0,
            "12285": 10779.0,
            "12290": 10287.0,
            "12295": 9866.0,
            "12300": 9457.0,
            "12305": 12069.0,
            "12310": 9689.0,
            "12315": 10314.0,
            "12320": 10895.0,
            "12325": 11245.0,
            "12330": 9102.0,
            "12335": 9552.0,
            "12340": 10717.0,
            "12345": 11374.0,
            "12350": 10997.0,
            "12355": 11981.0,
            "12360": 9653.0,
            "12365": 10560.0,
            "12370": 11208.0,
            "12375": 10029.0,
            "12380": 10075.0,
            "12385": 11080.0,
            "12390": 9310.0,
            "12395": 10878.0,
            "12400": 11061.0,
            "12405": 11668.0,
            "12410": 10249.0,
            "12415": 11959.0,
            "12420": 10981.0,
            "12425": 9554.0,
            "12430": 11019.0,
            "12435": 11511.0,
            "12440": 10744.0,
            "12445": 10651.0,
            "12450": 11924.0,
            "12455": 10785.0,
            "12460": 10227.0,
            "12465": 9962.0,
            "12470": 11315.0,
            "12475": 10444.0,
            "12480": 12431.0,
            "12485": 10601.0,
            "12490": 10637.0,
            "12495": 9693.0,
            "12500": 11030.0,
            "12505": 9691.0,
            "12510": 11476.0,
            "12515": 9826.0,
            "12520": 11608.0,
            "12525": 10624.0,
            "12530": 9682.0,
            "12535": 9836.0,
            "12540": 10835.0,
            "12545": 9340.0,
            "12550": 11141.0,
            "12555": 12188.0,
            "12560": 10408.0,
            "12565": 9550.0,
            "12570": 9459.0,
            "12575": 10210.0,
            "12580": 9605.0,
            "12585": 9820.0,
            "12590": 9669.0,
            "12595": 12488.0,
            "12600": 11579.0,
            "12605": 11957.0,
            "12610": 9737.0,
            "12615": 10323.0,
            "12620": 10576.0,
            "12625": 10613.0,
            "12630": 10551.0,
            "12635": 11909.0,
            "12640": 11754.0,
            "12645": 9728.0,
            "12650": 10154.0,
            "12655": 11546.0,
            "12660": 10396.0,
            "12665": 8881.0,
            "12670": 10910.0,
            "12675": 10077.0,
            "12680": 10265.0,
            "12685": 11172.0,
            "12690": 10894.0,
            "12695": 10834.0,
            "12700": 9999.0,
            "12705": 9327.0,
            "12710": 9993.0,
            "12715": 9198.0,
            "12720": 9314.0,
            "12725": 10587.0,
            "12730": 9769.0,
            "12735": 10154.0,
            "12740": 11877.0,
            "12745": 9589.0,
            "12750": 12428.0,
            "12755": 10705.0,
            "12760": 10603.0,
            "12765": 11224.0,
            "12770": 10096.0,
            "12775": 9548.0,
            "12780": 10610.0,
            "12785": 11764.0,
            "12790": 9814.0,
            "12795": 11036.0,
            "12800": 9247.0,
            "12805": 9785.0,
            "12810": 11134.0,
            "12815": 10997.0,
            "12820": 10918.0,
            "12825": 9680.0,
            "12830": 9505.0,
            "12835": 9598.0,
            "12840": 10050.0,
            "12845": 10484.0,
            "12850": 9327.0,
            "12855": 9515.0,
            "12860": 10621.0,
            "12865": 10242.0,
            "12870": 11627.0,
            "12875": 10096.0,
            "12880": 9819.0,
            "12885": 9947.0,
            "12890": 10114.0,
            "12895": 9838.0,
            "12900": 11347.0,
            "12905": 11265.0,
            "12910": 11732.0,
            "12915": 10373.0,
            "12920": 10620.0,
            "12925": 10909.0,
            "12930": 10335.0,
            "12935": 10910.0,
            "12940": 9706.0,
            "12945": 10215.0,
            "12950": 13101.0,
            "12955": 9018.0,
            "12960": 10620.0,
            "12965": 10265.0,
            "12970": 10405.0,
            "12975": 12679.0,
            "12980": 11143.0,
            "12985": 11186.0,
            "12990": 10572.0,
            "12995": 10533.0,
            "13000": 11068.0,
            "13005": 10078.0,
            "13010": 10772.0,
            "13015": 10231.0,
            "13020": 10521.0,
            "13025": 10326.0,
            "13030": 10490.0,
            "13035": 10313.0,
            "13040": 9816.0,
            "13045": 14145.0,
            "13050": 9109.0,
            "13055": 10168.0,
            "13060": 10127.0,
            "13065": 8802.0,
            "13070": 10751.0,
            "13075": 10554.0,
            "13080": 10447.0,
            "13085": 10238.0,
            "13090": 11165.0,
            "13095": 11639.0,
            "13100": 10690.0,
            "13105": 9598.0,
            "13110": 10539.0,
            "13115": 10423.0,
            "13120": 10753.0,
            "13125": 10343.0,
            "13130": 9631.0,
            "13135": 11901.0,
            "13140": 10891.0,
            "13145": 9511.0,
            "13150": 10407.0,
            "13155": 10113.0,
            "13160": 10766.0,
            "13165": 10635.0,
            "13170": 10832.0,
            "13175": 10716.0,
            "13180": 9939.0,
            "13185": 10520.0,
            "13190": 11123.0,
            "13195": 9265.0,
            "13200": 11076.0,
            "13205": 10322.0,
            "13210": 10119.0,
            "13215": 10667.0,
            "13220": 9078.0,
            "13225": 12815.0,
            "13230": 10366.0,
            "13235": 10010.0,
            "13240": 9960.0,
            "13245": 11150.0,
            "13250": 10850.0,
            "13255": 9613.0,
            "13260": 11274.0,
            "13265": 10178.0,
            "13270": 10131.0,
            "13275": 10612.0,
            "13280": 11023.0,
            "13285": 9689.0,
            "13290": 9681.0,
            "13295": 10875.0,
            "13300": 10259.0,
            "13305": 11712.0,
            "13310": 9869.0,
            "13315": 13069.0,
            "13320": 11868.0,
            "13325": 10501.0,
            "13330": 9131.0,
            "13335": 11453.0,
            "13340": 11197.0,
            "13345": 9175.0,
            "13350": 10435.0,
            "13355": 11884.0,
            "13360": 12466.0,
            "13365": 10165.0,
            "13370": 10072.0,
            "13375": 9153.0,
            "13380": 9663.0,
            "13385": 11635.0,
            "13390": 10404.0,
            "13395": 9678.0,
            "13400": 10491.0,
            "13405": 9671.0,
            "13410": 9815.0,
            "13415": 10715.0,
            "13420": 10047.0,
            "13425": 10209.0,
            "13430": 10463.0,
            "13435": 11600.0,
            "13440": 11121.0,
            "13445": 11028.0,
            "13450": 10237.0,
            "13455": 9907.0,
            "13460": 9342.0,
            "13465": 10080.0,
            "13470": 10487.0,
            "13475": 10503.0,
            "13480": 9878.0,
            "13485": 10460.0,
            "13490": 11184.0,
            "13495": 11576.0,
            "13500": 10512.0,
            "13505": 9752.0,
            "13510": 10843.0,
            "13515": 10810.0,
            "13520": 11168.0,
            "13525": 9475.0,
            "13530": 10403.0,
            "13535": 11443.0,
            "13540": 9753.0,
            "13545": 11210.0,
            "13550": 9522.0,
            "13555": 10347.0,
            "13560": 10946.0,
            "13565": 9619.0,
            "13570": 11231.0,
            "13575": 10109.0,
            "13580": 10279.0,
            "13585": 9748.0,
            "13590": 10972.0,
            "13595": 11125.0,
            "13600": 11070.0,
            "13605": 11053.0,
            "13610": 9529.0,
            "13615": 9726.0,
            "13620": 9735.0,
            "13625": 10620.0,
            "13630": 10272.0,
            "13635": 9650.0,
            "13640": 10045.0,
            "13645": 11377.0,
            "13650": 11100.0,
            "13655": 9818.0,
            "13660": 10823.0,
            "13665": 9906.0,
            "13670": 10000.0,
            "13675": 9980.0,
            "13680": 10799.0,
            "13685": 10267.0,
            "13690": 9936.0,
            "13695": 10092.0,
            "13700": 11839.0,
            "13705": 11077.0,
            "13710": 10438.0,
            "13715": 9575.0,
            "13720": 10984.0,
            "13725": 11355.0,
            "13730": 10191.0,
            "13735": 10130.0,
            "13740": 10250.0,
            "13745": 12366.0,
            "13750": 10012.0,
            "13755": 10809.0,
            "13760": 10799.0,
            "13765": 10141.0,
            "13770": 10408.0,
            "13775": 11360.0,
            "13780": 9241.0,
            "13785": 11668.0,
            "13790": 9673.0,
            "13795": 9878.0,
            "13800": 10166.0,
            "13805": 9748.0,
            "13810": 11443.0,
            "13815": 10276.0,
            "13820": 9305.0,
            "13825": 10042.0,
            "13830": 10683.0,
            "13835": 10701.0,
            "13840": 10398.0,
            "13845": 12124.0,
            "13850": 10473.0,
            "13855": 9859.0,
            "13860": 10317.0,
            "13865": 10340.0,
            "13870": 10561.0,
            "13875": 11860.0,
            "13880": 11523.0,
            "13885": 10258.0,
            "13890": 11282.0,
            "13895": 10385.0,
            "13900": 10086.0,
            "13905": 10947.0,
            "13910": 10367.0,
            "13915": 11534.0,
            "13920": 9738.0,
            "13925": 10159.0,
            "13930": 10107.0,
            "13935": 9209.0,
            "13940": 10410.0,
            "13945": 10119.0,
            "13950": 8804.0,
            "13955": 9563.0,
            "13960": 10027.0,
            "13965": 12168.0,
            "13970": 10664.0,
            "13975": 9431.0,
            "13980": 9877.0,
            "13985": 10104.0,
            "13990": 10446.0,
            "13995": 11620.0,
            "14000": 9693.0,
            "14005": 10495.0,
            "14010": 10808.0,
            "14015": 10608.0,
            "14020": 9822.0,
            "14025": 10662.0,
            "14030": 11895.0,
            "14035": 11884.0,
            "14040": 9238.0,
            "14045": 9750.0,
            "14050": 10541.0,
            "14055": 11164.0,
            "14060": 10186.0,
            "14065": 12692.0,
            "14070": 11608.0,
            "14075": 11860.0,
            "14080": 11169.0,
            "14085": 10845.0,
            "14090": 10575.0,
            "14095": 10756.0,
            "14100": 10763.0,
            "14105": 11596.0,
            "14110": 9676.0,
            "14115": 10997.0,
            "14120": 11596.0,
            "14125": 11019.0,
            "14130": 11831.0,
            "14135": 11740.0,
            "14140": 10634.0,
            "14145": 10138.0,
            "14150": 10619.0,
            "14155": 10365.0,
            "14160": 11219.0,
            "14165": 9318.0,
            "14170": 10673.0,
            "14175": 9547.0,
            "14180": 11993.0,
            "14185": 10347.0,
            "14190": 8543.0,
            "14195": 10345.0,
            "14200": 9443.0,
            "14205": 10760.0,
            "14210": 12113.0,
            "14215": 11346.0,
            "14220": 10384.0,
            "14225": 11607.0,
            "14230": 12141.0,
            "14235": 11379.0,
            "14240": 9408.0,
            "14245": 9830.0,
            "14250": 10431.0,
            "14255": 9559.0,
            "14260": 10354.0,
            "14265": 9875.0,
            "14270": 10337.0,
            "14275": 10291.0,
            "14280": 10523.0,
            "14285": 11639.0,
            "14290": 10697.0,
            "14295": 11016.0,
            "14300": 9608.0,
            "14305": 11752.0,
            "14310": 10781.0,
            "14315": 10912.0,
            "14320": 9190.0,
            "14325": 9708.0,
            "14330": 10959.0,
            "14335": 11882.0,
            "14340": 10019.0,
            "14345": 11044.0,
            "14350": 9592.0,
            "14355": 11034.0,
            "14360": 10391.0,
            "14365": 11632.0,
            "14370": 10997.0,
            "14375": 10895.0,
            "14380": 10885.0,
            "14385": 10328.0,
            "14390": 8946.0,
            "14395": 12044.0,
            "14400": 10901.0,
            "14405": 10707.0,
            "14410": 11905.0,
            "14415": 9842.0,
            "14420": 9464.0,
            "14425": 10121.0,
            "14430": 10062.0,
            "14435": 10194.0,
            "14440": 10898.0,
            "14445": 10985.0,
            "14450": 10031.0,
            "14455": 9686.0,
            "14460": 10319.0,
            "14465": 9678.0,
            "14470": 10736.0,
            "14475": 12335.0,
            "14480": 10606.0,
            "14485": 11211.0,
            "14490": 10544.0,
            "14495": 9916.0,
            "14500": 9550.0,
            "14505": 10176.0,
            "14510": 9943.0,
            "14515": 9834.0,
            "14520": 9885.0,
            "14525": 10488.0,
            "14530": 13103.0,
            "14535": 9869.0,
            "14540": 11616.0,
            "14545": 10834.0,
            "14550": 10606.0,
            "14555": 12083.0,
            "14560": 9864.0,
            "14565": 10578.0,
            "14570": 10758.0,
            "14575": 11910.0,
            "14580": 10188.0,
            "14585": 12812.0,
            "14590": 10922.0,
            "14595": 11075.0,
            "14600": 10844.0,
            "14605": 11149.0,
            "14610": 12872.0,
            "14615": 10154.0,
            "14620": 10089.0,
            "14625": 11668.0,
            "14630": 11120.0,
            "14635": 11405.0,
            "14640": 10753.0,
            "14645": 10739.0,
            "14650": 10530.0,
            "14655": 10554.0,
            "14660": 9449.0,
            "14665": 10115.0,
            "14670": 9682.0,
            "14675": 10979.0,
            "14680": 10079.0,
            "14685": 12559.0,
            "14690": 10621.0,
            "14695": 9976.0,
            "14700": 12792.0,
            "14705": 11909.0,
            "14710": 10056.0,
            "14715": 10465.0,
            "14720": 10730.0,
            "14725": 8871.0,
            "14730": 11015.0,
            "14735": 11805.0,
            "14740": 11543.0,
            "14745": 8697.0,
            "14750": 11059.0,
            "14755": 10122.0,
            "14760": 9677.0,
            "14765": 10470.0,
            "14770": 10011.0,
            "14775": 12445.0,
            "14780": 11744.0,
            "14785": 11002.0,
            "14790": 10734.0,
            "14795": 9761.0,
            "14800": 10020.0,
            "14805": 9145.0,
            "14810": 10081.0,
            "14815": 9715.0,
            "14820": 9961.0,
            "14825": 11169.0,
            "14830": 10578.0,
            "14835": 8976.0,
            "14840": 10521.0,
            "14845": 9650.0,
            "14850": 9330.0,
            "14855": 10746.0,
            "14860": 10082.0,
            "14865": 10804.0,
            "14870": 9887.0,
            "14875": 12732.0,
            "14880": 9326.0,
            "14885": 11152.0,
            "14890": 10791.0,
            "14895": 10198.0,
            "14900": 10154.0,
            "14905": 9625.0,
            "14910": 9351.0,
            "14915": 9621.0,
            "14920": 10810.0,
            "14925": 9455.0,
            "14930": 9322.0,
            "14935": 13042.0,
            "14940": 9712.0,
            "14945": 9564.0,
            "14950": 10193.0,
            "14955": 10143.0,
            "14960": 10780.0,
            "14965": 9892.0,
            "14970": 10884.0,
            "14975": 10117.0,
            "14980": 11008.0,
            "14985": 9490.0,
            "14990": 10434.0,
            "14995": 11352.0,
            "15000": 9715.0,
            "15005": 11504.0,
            "15010": 11062.0,
            "15015": 11610.0,
            "15020": 9593.0,
            "15025": 10811.0,
            "15030": 10960.0,
            "15035": 10768.0,
            "15040": 10644.0,
            "15045": 10562.0,
            "15050": 9890.0,
            "15055": 11883.0,
            "15060": 10172.0,
            "15065": 12028.0,
            "15070": 11771.0,
            "15075": 9531.0,
            "15080": 9537.0,
            "15085": 10951.0,
            "15090": 11400.0,
            "15095": 11407.0,
            "15100": 10510.0,
            "15105": 10644.0,
            "15110": 10717.0,
            "15115": 9956.0,
            "15120": 11214.0,
            "15125": 10674.0,
            "15130": 11407.0,
            "15135": 10384.0,
            "15140": 12343.0,
            "15145": 10772.0,
            "15150": 12206.0,
            "15155": 11645.0,
            "15160": 10611.0,
            "15165": 10918.0,
            "15170": 10616.0,
            "15175": 11228.0,
            "15180": 10589.0,
            "15185": 10965.0,
            "15190": 9277.0,
            "15195": 10419.0,
            "15200": 11059.0,
            "15205": 10647.0,
            "15210": 11825.0,
            "15215": 10127.0,
            "15220": 11082.0,
            "15225": 10781.0,
            "15230": 9987.0,
            "15235": 11557.0,
            "15240": 11305.0,
            "15245": 10694.0,
            "15250": 8700.0,
            "15255": 11100.0,
            "15260": 10020.0,
            "15265": 10646.0,
            "15270": 10398.0,
            "15275": 9710.0,
            "15280": 10859.0,
            "15285": 10819.0,
            "15290": 10099.0,
            "15295": 9314.0,
            "15300": 10640.0,
            "15305": 10693.0,
            "15310": 10810.0,
            "15315": 10217.0,
            "15320": 11828.0,
            "15325": 10632.0,
            "15330": 10646.0,
            "15335": 10250.0,
            "15340": 9883.0,
            "15345": 10849.0,
            "15350": 10985.0,
            "15355": 11830.0,
            "15360": 11706.0,
            "15365": 12890.0,
            "15370": 9108.0,
            "15375": 10106.0,
            "15380": 10072.0,
            "15385": 10158.0,
            "15390": 12270.0,
            "15395": 10789.0,
            "15400": 11463.0,
            "15405": 11027.0,
            "15410": 11461.0,
            "15415": 10410.0,
            "15420": 11163.0,
            "15425": 9994.0,
            "15430": 10218.0,
            "15435": 9979.0,
            "15440": 10235.0,
            "15445": 11593.0,
            "15450": 9666.0,
            "15455": 10057.0,
            "15460": 10632.0,
            "15465": 10547.0,
            "15470": 9147.0,
            "15475": 10065.0,
            "15480": 9866.0,
            "15485": 10575.0,
            "15490": 10666.0,
            "15495": 11857.0,
            "15500": 10437.0,
            "15505": 12018.0,
            "15510": 10106.0,
            "15515": 10280.0,
            "15520": 12015.0,
            "15525": 10003.0,
            "15530": 9800.0,
            "15535": 9728.0,
            "15540": 9895.0,
            "15545": 9617.0,
            "15550": 10434.0,
            "15555": 9790.0,
            "15560": 9986.0,
            "15565": 8993.0,
            "15570": 10090.0,
            "15575": 10106.0,
            "15580": 12253.0,
            "15585": 9683.0,
            "15590": 10037.0,
            "15595": 10055.0,
            "15600": 10744.0,
            "15605": 10200.0,
            "15610": 9347.0,
            "15615": 11480.0,
            "15620": 10100.0,
            "15625": 10044.0,
            "15630": 13159.0,
            "15635": 11304.0,
            "15640": 11045.0,
            "15645": 11239.0,
            "15650": 10123.0,
            "15655": 10848.0,
            "15660": 10560.0,
            "15665": 11138.0,
            "15670": 10107.0,
            "15675": 11462.0,
            "15680": 9766.0,
            "15685": 10650.0,
            "15690": 9859.0,
            "15695": 11519.0,
            "15700": 10365.0,
            "15705": 12085.0,
            "15710": 11426.0,
            "15715": 9329.0,
            "15720": 9355.0,
            "15725": 10807.0,
            "15730": 8884.0,
            "15735": 12453.0,
            "15740": 10101.0,
            "15745": 10183.0,
            "15750": 12836.0,
            "15755": 10170.0,
            "15760": 11531.0,
            "15765": 10592.0,
            "15770": 10800.0,
            "15775": 11628.0,
            "15780": 11195.0,
            "15785": 9367.0,
            "15790": 12334.0,
            "15795": 11257.0,
            "15800": 9442.0,
            "15805": 9606.0,
            "15810": 10896.0,
            "15815": 10172.0,
            "15820": 9814.0,
            "15825": 10175.0,
            "15830": 9981.0,
            "15835": 11043.0,
            "15840": 9142.0,
            "15845": 10836.0,
            "15850": 11379.0,
            "15855": 10075.0,
            "15860": 11368.0,
            "15865": 10258.0,
            "15870": 10459.0,
            "15875": 10054.0,
            "15880": 10136.0,
            "15885": 11631.0,
            "15890": 10241.0,
            "15895": 10687.0,
            "15900": 11545.0,
            "15905": 9144.0,
            "15910": 10974.0,
            "15915": 10979.0,
            "15920": 9763.0,
            "15925": 10779.0,
            "15930": 10184.0,
            "15935": 10218.0,
            "15940": 10892.0,
            "15945": 11579.0,
            "15950": 12766.0,
            "15955": 10602.0,
            "15960": 10409.0,
            "15965": 10541.0,
            "15970": 11799.0,
            "15975": 9589.0,
            "15980": 10256.0,
            "15985": 10399.0,
            "15990": 12269.0,
            "15995": 9792.0,
            "16000": 10511.0,
            "16005": 12027.0,
            "16010": 10251.0,
            "16015": 10731.0,
            "16020": 11577.0,
            "16025": 11973.0,
            "16030": 11707.0,
            "16035": 10936.0,
            "16040": 10201.0,
            "16045": 10578.0,
            "16050": 10440.0,
            "16055": 9912.0,
            "16060": 10561.0,
            "16065": 9638.0,
            "16070": 12340.0,
            "16075": 9727.0,
            "16080": 10655.0,
            "16085": 10423.0,
            "16090": 9752.0,
            "16095": 9887.0,
            "16100": 11249.0,
            "16105": 12092.0,
            "16110": 11044.0,
            "16115": 11140.0,
            "16120": 11312.0,
            "16125": 10046.0,
            "16130": 10767.0,
            "16135": 10907.0,
            "16140": 10560.0,
            "16145": 11666.0,
            "16150": 9504.0,
            "16155": 9952.0,
            "16160": 11120.0,
            "16165": 9440.0,
            "16170": 10498.0,
            "16175": 12548.0,
            "16180": 11077.0,
            "16185": 10925.0,
            "16190": 10830.0,
            "16195": 10525.0,
            "16200": 10086.0,
            "16205": 10524.0,
            "16210": 10437.0,
            "16215": 10497.0,
            "16220": 11019.0,
            "16225": 9866.0,
            "16230": 9590.0,
            "16235": 12705.0,
            "16240": 11113.0,
            "16245": 11707.0,
            "16250": 10760.0,
            "16255": 9543.0,
            "16260": 13091.0,
            "16265": 10484.0,
            "16270": 10419.0,
            "16275": 10375.0,
            "16280": 10605.0,
            "16285": 9547.0,
            "16290": 9995.0,
            "16295": 10097.0,
            "16300": 10543.0,
            "16305": 11005.0,
            "16310": 10511.0,
            "16315": 10773.0,
            "16320": 10926.0,
            "16325": 10105.0,
            "16330": 12096.0,
            "16335": 9626.0,
            "16340": 9080.0,
            "16345": 11544.0,
            "16350": 10296.0,
            "16355": 11556.0,
            "16360": 10143.0,
            "16365": 11593.0,
            "16370": 11705.0,
            "16375": 11968.0,
            "16380": 12430.0,
            "16385": 10085.0,
            "16390": 9466.0,
            "16395": 11947.0,
            "16400": 11046.0,
            "16405": 9828.0,
            "16410": 12076.0,
            "16415": 10249.0,
            "16420": 10027.0,
            "16425": 9930.0,
            "16430": 10540.0,
            "16435": 9559.0,
            "16440": 10499.0,
            "16445": 11898.0,
            "16450": 11226.0,
            "16455": 11220.0,
            "16460": 9533.0,
            "16465": 11753.0,
            "16470": 9435.0,
            "16475": 10949.0,
            "16480": 10294.0,
            "16485": 11539.0,
            "16490": 10477.0,
            "16495": 11006.0,
            "16500": 10799.0,
            "16505": 10428.0,
            "16510": 11908.0,
            "16515": 10159.0,
            "16520": 11169.0,
            "16525": 9938.0,
            "16530": 12228.0,
            "16535": 13848.0,
            "16540": 12188.0,
            "16545": 9780.0,
            "16550": 10502.0,
            "16555": 11126.0,
            "16560": 11250.0,
            "16565": 11625.0,
            "16570": 10925.0,
            "16575": 10827.0,
            "16580": 10962.0,
            "16585": 10160.0,
            "16590": 10337.0,
            "16595": 10421.0,
            "16600": 10780.0,
            "16605": 12546.0,
            "16610": 9846.0,
            "16615": 11214.0,
            "16620": 10215.0,
            "16625": 11103.0,
            "16630": 10628.0,
            "16635": 11972.0,
            "16640": 10599.0,
            "16645": 10571.0,
            "16650": 11114.0,
            "16655": 11791.0,
            "16660": 10584.0,
            "16665": 11934.0,
            "16670": 10457.0,
            "16675": 12838.0,
            "16680": 10676.0,
            "16685": 11061.0,
            "16690": 9758.0,
            "16695": 11027.0,
            "16700": 11206.0,
            "16705": 11097.0,
            "16710": 11135.0,
            "16715": 12063.0,
            "16720": 10385.0,
            "16725": 11746.0,
            "16730": 12490.0,
            "16735": 11718.0,
            "16740": 10756.0,
            "16745": 11314.0,
            "16750": 12065.0,
            "16755": 9963.0,
            "16760": 10013.0,
            "16765": 10938.0,
            "16770": 10093.0,
            "16775": 11957.0,
            "16780": 10502.0,
            "16785": 10278.0,
            "16790": 10655.0,
            "16795": 10922.0,
            "16800": 9969.0,
            "16805": 10886.0,
            "16810": 11846.0,
            "16815": 11023.0,
            "16820": 11940.0,
            "16825": 12355.0,
            "16830": 10514.0,
            "16835": 9909.0,
            "16840": 9333.0,
            "16845": 10157.0,
            "16850": 9887.0,
            "16855": 11924.0,
            "16860": 11690.0,
            "16865": 10634.0,
            "16870": 10485.0,
            "16875": 10349.0,
            "16880": 10664.0,
            "16885": 11478.0,
            "16890": 10999.0,
            "16895": 10120.0,
            "16900": 11567.0,
            "16905": 10413.0,
            "16910": 11053.0,
            "16915": 10066.0,
            "16920": 11555.0,
            "16925": 11284.0,
            "16930": 11066.0,
            "16935": 10089.0,
            "16940": 11399.0,
            "16945": 8753.0,
            "16950": 10240.0,
            "16955": 11974.0,
            "16960": 10297.0,
            "16965": 10397.0,
            "16970": 10602.0,
            "16975": 9714.0,
            "16980": 10086.0,
            "16985": 11536.0,
            "16990": 10167.0,
            "16995": 10538.0,
            "17000": 11796.0,
            "17005": 10950.0,
            "17010": 10606.0,
            "17015": 9933.0,
            "17020": 10505.0,
            "17025": 10132.0,
            "17030": 10293.0,
            "17035": 10838.0,
            "17040": 11282.0,
            "17045": 9957.0,
            "17050": 10596.0,
            "17055": 11445.0,
            "17060": 11503.0,
            "17065": 10018.0,
            "17070": 10691.0,
            "17075": 10213.0,
            "17080": 10400.0,
            "17085": 10856.0,
            "17090": 10369.0,
            "17095": 10359.0,
            "17100": 9854.0,
            "17105": 12038.0,
            "17110": 10126.0,
            "17115": 10913.0,
            "17120": 11044.0,
            "17125": 10315.0,
            "17130": 11872.0,
            "17135": 11550.0,
            "17140": 11064.0,
            "17145": 11675.0,
            "17150": 11101.0,
            "17155": 9331.0,
            "17160": 12444.0,
            "17165": 10930.0,
            "17170": 11416.0,
            "17175": 10018.0,
            "17180": 10442.0,
            "17185": 9839.0,
            "17190": 11025.0,
            "17195": 11402.0,
            "17200": 11507.0,
            "17205": 10830.0,
            "17210": 11941.0,
            "17215": 11177.0,
            "17220": 11004.0,
            "17225": 9941.0,
            "17230": 10803.0,
            "17235": 9849.0,
            "17240": 10455.0,
            "17245": 10644.0,
            "17250": 10310.0,
            "17255": 9648.0,
            "17260": 9157.0,
            "17265": 10136.0,
            "17270": 13671.0,
            "17275": 10718.0,
            "17280": 10407.0,
            "17285": 10510.0,
            "17290": 10871.0,
            "17295": 10379.0,
            "17300": 11168.0,
            "17305": 12050.0,
            "17310": 10834.0,
            "17315": 11510.0,
            "17320": 12140.0,
            "17325": 11581.0,
            "17330": 11680.0,
            "17335": 11918.0,
            "17340": 10923.0,
            "17345": 10435.0,
            "17350": 10527.0,
            "17355": 10238.0,
            "17360": 11209.0,
            "17365": 11874.0,
            "17370": 11610.0,
            "17375": 10193.0,
            "17380": 10351.0,
            "17385": 10825.0,
            "17390": 10176.0,
            "17395": 10861.0,
            "17400": 10623.0,
            "17405": 11379.0,
            "17410": 10686.0,
            "17415": 11423.0,
            "17420": 11433.0,
            "17425": 10724.0,
            "17430": 10103.0,
            "17435": 11020.0,
            "17440": 10892.0,
            "17445": 9815.0,
            "17450": 11401.0,
            "17455": 9398.0,
            "17460": 10926.0,
            "17465": 10265.0,
            "17470": 11083.0,
            "17475": 11012.0,
            "17480": 10180.0,
            "17485": 11660.0,
            "17490": 10073.0,
            "17495": 10699.0,
            "17500": 9942.0,
            "17505": 10333.0,
            "17510": 9659.0,
            "17515": 11579.0,
            "17520": 10792.0,
            "17525": 9282.0,
            "17530": 11171.0,
            "17535": 10662.0,
            "17540": 9177.0,
            "17545": 10343.0,
            "17550": 11040.0,
            "17555": 10466.0,
            "17560": 10581.0,
            "17565": 10427.0,
            "17570": 11527.0,
            "17575": 10031.0,
            "17580": 10630.0,
            "17585": 10752.0,
            "17590": 10539.0,
            "17595": 9366.0,
            "17600": 10326.0,
            "17605": 11334.0,
            "17610": 11575.0,
            "17615": 11047.0,
            "17620": 9467.0,
            "17625": 9084.0,
            "17630": 9930.0,
            "17635": 10084.0,
            "17640": 12016.0,
            "17645": 10940.0,
            "17650": 9949.0,
            "17655": 10460.0,
            "17660": 9469.0,
            "17665": 10618.0,
            "17670": 8867.0,
            "17675": 11751.0,
            "17680": 9452.0,
            "17685": 10829.0,
            "17690": 8752.0,
            "17695": 11329.0,
            "17700": 11508.0,
            "17705": 9559.0,
            "17710": 10775.0,
            "17715": 11417.0,
            "17720": 11335.0,
            "17725": 9731.0,
            "17730": 12381.0,
            "17735": 11893.0,
            "17740": 11603.0,
            "17745": 10884.0,
            "17750": 12263.0,
            "17755": 11508.0,
            "17760": 11226.0,
            "17765": 11749.0,
            "17770": 9898.0,
            "17775": 10915.0,
            "17780": 11097.0,
            "17785": 9615.0,
            "17790": 9333.0,
            "17795": 10947.0,
            "17800": 9997.0,
            "17805": 11795.0,
            "17810": 10324.0,
            "17815": 11453.0,
            "17820": 10721.0,
            "17825": 9806.0,
            "17830": 10748.0,
            "17835": 9723.0,
            "17840": 11207.0,
            "17845": 11363.0,
            "17850": 11237.0,
            "17855": 12508.0,
            "17860": 11694.0,
            "17865": 10741.0,
            "17870": 10708.0,
            "17875": 10401.0,
            "17880": 9927.0,
            "17885": 10831.0,
            "17890": 10077.0,
            "17895": 9760.0,
            "17900": 11107.0,
            "17905": 9785.0,
            "17910": 11099.0,
            "17915": 12293.0,
            "17920": 10709.0,
            "17925": 11536.0,
            "17930": 9647.0,
            "17935": 10110.0,
            "17940": 12181.0,
            "17945": 8906.0,
            "17950": 10248.0,
            "17955": 10814.0,
            "17960": 10461.0,
            "17965": 10438.0,
            "17970": 10610.0,
            "17975": 9620.0,
            "17980": 9704.0,
            "17985": 10724.0,
            "17990": 10783.0,
            "17995": 10640.0,
            "18000": 10553.0,
            "18005": 11417.0,
            "18010": 11000.0,
            "18015": 10363.0,
            "18020": 10201.0,
            "18025": 11668.0,
            "18030": 9371.0,
            "18035": 11310.0,
            "18040": 10448.0,
            "18045": 11877.0,
            "18050": 9784.0,
            "18055": 9798.0,
            "18060": 11609.0,
            "18065": 10229.0,
            "18070": 12199.0,
            "18075": 12285.0,
            "18080": 11225.0,
            "18085": 11982.0,
            "18090": 9832.0,
            "18095": 10136.0,
            "18100": 10881.0,
            "18105": 10424.0,
            "18110": 10914.0,
            "18115": 10369.0,
            "18120": 10952.0,
            "18125": 10312.0,
            "18130": 10041.0,
            "18135": 11897.0,
            "18140": 11031.0,
            "18145": 12707.0,
            "18150": 10407.0,
            "18155": 10300.0,
            "18160": 10316.0,
            "18165": 10884.0,
            "18170": 9936.0,
            "18175": 8751.0,
            "18180": 11468.0,
            "18185": 12857.0,
            "18190": 12023.0,
            "18195": 10030.0,
            "18200": 11476.0,
            "18205": 10406.0,
            "18210": 9679.0,
            "18215": 11921.0,
            "18220": 10592.0,
            "18225": 10804.0,
            "18230": 9986.0,
            "18235": 9865.0,
            "18240": 10808.0,
            "18245": 10413.0,
            "18250": 11285.0,
            "18255": 12217.0,
            "18260": 10225.0,
            "18265": 11278.0,
            "18270": 10396.0,
            "18275": 11030.0,
            "18280": 10790.0,
            "18285": 9296.0,
            "18290": 11517.0,
            "18295": 10811.0,
            "18300": 12235.0,
            "18305": 11989.0,
            "18310": 9962.0,
            "18315": 9901.0,
            "18320": 10612.0,
            "18325": 10657.0,
            "18330": 10419.0,
            "18335": 9529.0,
            "18340": 9484.0,
            "18345": 11340.0,
            "18350": 14213.0,
            "18355": 10636.0,
            "18360": 11114.0,
            "18365": 12229.0,
            "18370": 9887.0,
            "18375": 11738.0,
            "18380": 10703.0,
            "18385": 10003.0,
            "18390": 10018.0,
            "18395": 11113.0,
            "18400": 9549.0,
            "18405": 11329.0,
            "18410": 10433.0,
            "18415": 11021.0,
            "18420": 10389.0,
            "18425": 9751.0,
            "18430": 11352.0,
            "18435": 9732.0,
            "18440": 13026.0,
            "18445": 10175.0,
            "18450": 11454.0,
            "18455": 10785.0,
            "18460": 10980.0,
            "18465": 10597.0,
            "18470": 12162.0,
            "18475": 10667.0,
            "18480": 10689.0,
            "18485": 12394.0,
            "18490": 11510.0,
            "18495": 10601.0,
            "18500": 10511.0,
            "18505": 10400.0,
            "18510": 12663.0,
            "18515": 11408.0,
            "18520": 10783.0,
            "18525": 10597.0,
            "18530": 11030.0,
            "18535": 9287.0,
            "18540": 10342.0,
            "18545": 9751.0,
            "18550": 10441.0,
            "18555": 12136.0,
            "18560": 12421.0,
            "18565": "nan",
            "18570": 11187.0,
            "18575": 9924.0,
            "18580": 10935.0,
            "18585": 9372.0,
            "18590": 10318.0,
            "18595": 10104.0,
            "18600": 10710.0,
            "18605": 9307.0,
            "18610": 9609.0,
            "18615": 9374.0,
            "18620": 10600.0,
            "18625": 10734.0,
            "18630": 10106.0,
            "18635": 11161.0,
            "18640": 9744.0,
            "18645": 11067.0,
            "18650": 10082.0,
            "18655": 9555.0,
            "18660": 10472.0,
            "18665": 12673.0,
            "18670": 11082.0,
            "18675": 10374.0,
            "18680": 10370.0,
            "18685": 11299.0,
            "18690": 10638.0,
            "18695": 10384.0,
            "18700": 11283.0,
            "18705": 12105.0,
            "18710": 9527.0,
            "18715": 11161.0,
            "18720": 10263.0,
            "18725": 12090.0,
            "18730": 10039.0,
            "18735": 9248.0,
            "18740": 11729.0,
            "18745": 12584.0,
            "18750": 11070.0,
            "18755": 10916.0,
            "18760": 11222.0,
            "18765": 10687.0,
            "18770": 10343.0,
            "18775": 12320.0,
            "18780": 11131.0,
            "18785": 10947.0,
            "18790": 11009.0,
            "18795": 11725.0,
            "18800": 8909.0,
            "18805": 10857.0,
            "18810": 10593.0,
            "18815": 10911.0,
            "18820": 10374.0,
            "18825": 12181.0,
            "18830": 9872.0,
            "18835": 11014.0,
            "18840": 11950.0,
            "18845": 10999.0,
            "18850": 10485.0,
            "18855": 10258.0,
            "18860": 10289.0,
            "18865": 9705.0,
            "18870": 10596.0,
            "18875": 11176.0,
            "18880": 10163.0,
            "18885": 13382.0,
            "18890": 11822.0,
            "18895": 10407.0,
            "18900": 12571.0,
            "18905": 12023.0,
            "18910": 12189.0,
            "18915": 9784.0,
            "18920": 11152.0,
            "18925": 9688.0,
            "18930": 10034.0,
            "18935": 11168.0,
            "18940": 11273.0,
            "18945": 10597.0,
            "18950": 11367.0,
            "18955": 9966.0,
            "18960": 10287.0,
            "18965": 11225.0,
            "18970": 12279.0,
            "18975": 10354.0,
            "18980": 10605.0,
            "18985": 9928.0,
            "18990": 10080.0,
            "18995": 11543.0,
            "19000": 10579.0,
            "19005": 9816.0,
            "19010": 12884.0,
            "19015": 9998.0,
            "19020": 12183.0,
            "19025": 9830.0,
            "19030": 10086.0,
            "19035": 11382.0,
            "19040": 10686.0,
            "19045": 10829.0,
            "19050": 9223.0,
            "19055": 12381.0,
            "19060": 11110.0,
            "19065": 10016.0,
            "19070": 11223.0,
            "19075": 10151.0,
            "19080": 11072.0,
            "19085": 9944.0,
            "19090": 10073.0,
            "19095": 9878.0,
            "19100": 13643.0,
            "19105": 11191.0,
            "19110": 10914.0,
            "19115": 10107.0,
            "19120": 9595.0,
            "19125": 11347.0,
            "19130": 11161.0,
            "19135": 11181.0,
            "19140": 11434.0,
            "19145": 9526.0,
            "19150": 9731.0,
            "19155": 9976.0,
            "19160": 11966.0,
            "19165": 10606.0,
            "19170": 11105.0,
            "19175": 11438.0,
            "19180": 11739.0,
            "19185": 10841.0,
            "19190": 11044.0,
            "19195": 11216.0,
            "19200": 9326.0,
            "19205": 10242.0,
            "19210": 12579.0,
            "19215": 13157.0,
            "19220": 11819.0,
            "19225": 12088.0,
            "19230": 10196.0,
            "19235": 10074.0,
            "19240": 10901.0,
            "19245": 9625.0,
            "19250": 10048.0,
            "19255": 11326.0,
            "19260": 11074.0,
            "19265": 10695.0,
            "19270": 13737.0,
            "19275": 11299.0,
            "19280": 11339.0,
            "19285": 10890.0,
            "19290": 9739.0,
            "19295": 10055.0,
            "19300": 10465.0,
            "19305": 10256.0,
            "19310": 10481.0,
            "19315": 10620.0,
            "19320": 11147.0,
            "19325": 9711.0,
            "19330": 9934.0,
            "19335": 9599.0,
            "19340": 10089.0,
            "19345": 11015.0,
            "19350": 12549.0,
            "19355": 10669.0,
            "19360": 9753.0,
            "19365": 8608.0,
            "19370": 10087.0,
            "19375": 11338.0,
            "19380": 10471.0,
            "19385": 11612.0,
            "19390": 9862.0,
            "19395": 11126.0,
            "19400": 10856.0,
            "19405": 9906.0,
            "19410": 10100.0,
            "19415": 10353.0,
            "19420": 11609.0,
            "19425": 11885.0,
            "19430": 11709.0,
            "19435": 10083.0,
            "19440": 10872.0,
            "19445": 11327.0,
            "19450": 10908.0,
            "19455": 10460.0,
            "19460": 8811.0,
            "19465": 10549.0,
            "19470": 10444.0,
            "19475": 10190.0,
            "19480": 11093.0,
            "19485": 10127.0,
            "19490": 10076.0,
            "19495": 10524.0,
            "19500": 10915.0,
            "19505": 10691.0,
            "19510": 10811.0,
            "19515": 10639.0,
            "19520": 11500.0,
            "19525": 10731.0,
            "19530": 9871.0,
            "19535": 11966.0,
            "19540": 9373.0,
            "19545": 9923.0,
            "19550": 10515.0,
            "19555": 11406.0,
            "19560": 10370.0,
            "19565": 10394.0,
            "19570": 10946.0,
            "19575": 11293.0,
            "19580": 11103.0,
            "19585": 10523.0,
            "19590": 9262.0,
            "19595": 10764.0,
            "19600": 11574.0,
            "19605": 10680.0,
            "19610": 11332.0,
            "19615": 9828.0,
            "19620": 10078.0,
            "19625": 10095.0,
            "19630": 10731.0,
            "19635": 11046.0,
            "19640": 10606.0,
            "19645": 10433.0,
            "19650": 10944.0,
            "19655": 10245.0,
            "19660": 11403.0,
            "19665": 10803.0,
            "19670": 11135.0,
            "19675": 11173.0,
            "19680": 9453.0,
            "19685": 10949.0,
            "19690": 10106.0,
            "19695": 10032.0,
            "19700": 10642.0,
            "19705": 10536.0,
            "19710": 10088.0,
            "19715": 11065.0,
            "19720": 10315.0,
            "19725": 10039.0,
            "19730": 9249.0,
            "19735": 10295.0,
            "19740": 10575.0,
            "19745": 11072.0,
            "19750": 10562.0,
            "19755": 9308.0,
            "19760": 10443.0,
            "19765": 11288.0,
            "19770": 11026.0,
            "19775": 11632.0,
            "19780": 11377.0,
            "19785": 10427.0,
            "19790": 11408.0,
            "19795": 11073.0,
            "19800": 9552.0,
            "19805": 9956.0,
            "19810": 10915.0,
            "19815": 10304.0,
            "19820": 10866.0,
            "19825": 10888.0,
            "19830": 11462.0,
            "19835": 9501.0,
            "19840": 10563.0,
            "19845": 10041.0,
            "19850": 12615.0,
            "19855": 11685.0,
            "19860": 12997.0,
            "19865": 10169.0,
            "19870": 10032.0,
            "19875": 11184.0,
            "19880": 9984.0,
            "19885": 10176.0,
            "19890": 11873.0,
            "19895": 10715.0,
            "19900": 9399.0,
            "19905": 11108.0,
            "19910": 11826.0,
            "19915": 10722.0,
            "19920": 11516.0,
            "19925": 12052.0,
            "19930": 11207.0,
            "19935": 10748.0,
            "19940": 10362.0,
            "19945": 10943.0,
            "19950": 10952.0,
            "19955": 10788.0,
            "19960": 11834.0,
            "19965": 11037.0,
            "19970": 9944.0,
            "19975": 11620.0,
            "19980": 10554.0,
            "19985": 9749.0,
            "19990": 8536.0,
            "19995": 8926.0,
            "20000": 11390.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 20000,
        "step_interval": 5,
        "values": {
            "1": "nan",
            "5": "nan",
            "10": "nan",
            "15": "nan",
            "20": "nan",
            "25": "nan",
            "30": "nan",
            "35": "nan",
            "40": "nan",
            "45": "nan",
            "50": "nan",
            "55": "nan",
            "60": "nan",
            "65": "nan",
            "70": "nan",
            "75": "nan",
            "80": "nan",
            "85": "nan",
            "90": "nan",
            "95": "nan",
            "100": 0.64539,
            "105": "nan",
            "110": "nan",
            "115": "nan",
            "120": "nan",
            "125": "nan",
            "130": "nan",
            "135": "nan",
            "140": "nan",
            "145": "nan",
            "150": "nan",
            "155": "nan",
            "160": "nan",
            "165": "nan",
            "170": "nan",
            "175": "nan",
            "180": "nan",
            "185": "nan",
            "190": "nan",
            "195": "nan",
            "200": 0.29039,
            "205": "nan",
            "210": "nan",
            "215": "nan",
            "220": "nan",
            "225": "nan",
            "230": "nan",
            "235": "nan",
            "240": "nan",
            "245": "nan",
            "250": "nan",
            "255": "nan",
            "260": "nan",
            "265": "nan",
            "270": "nan",
            "275": "nan",
            "280": "nan",
            "285": "nan",
            "290": "nan",
            "295": "nan",
            "300": 0.2969,
            "305": "nan",
            "310": "nan",
            "315": "nan",
            "320": "nan",
            "325": "nan",
            "330": "nan",
            "335": "nan",
            "340": "nan",
            "345": "nan",
            "350": "nan",
            "355": "nan",
            "360": "nan",
            "365": "nan",
            "370": "nan",
            "375": "nan",
            "380": "nan",
            "385": "nan",
            "390": "nan",
            "395": "nan",
            "400": 0.31248,
            "405": "nan",
            "410": "nan",
            "415": "nan",
            "420": "nan",
            "425": "nan",
            "430": "nan",
            "435": "nan",
            "440": "nan",
            "445": "nan",
            "450": "nan",
            "455": "nan",
            "460": "nan",
            "465": "nan",
            "470": "nan",
            "475": "nan",
            "480": "nan",
            "485": "nan",
            "490": "nan",
            "495": "nan",
            "500": 0.30005,
            "505": "nan",
            "510": "nan",
            "515": "nan",
            "520": "nan",
            "525": "nan",
            "530": "nan",
            "535": "nan",
            "540": "nan",
            "545": "nan",
            "550": "nan",
            "555": "nan",
            "560": "nan",
            "565": "nan",
            "570": "nan",
            "575": "nan",
            "580": "nan",
            "585": "nan",
            "590": "nan",
            "595": "nan",
            "600": 0.30025,
            "605": "nan",
            "610": "nan",
            "615": "nan",
            "620": "nan",
            "625": "nan",
            "630": "nan",
            "635": "nan",
            "640": "nan",
            "645": "nan",
            "650": "nan",
            "655": "nan",
            "660": "nan",
            "665": "nan",
            "670": "nan",
            "675": "nan",
            "680": "nan",
            "685": "nan",
            "690": "nan",
            "695": "nan",
            "700": 0.29942,
            "705": "nan",
            "710": "nan",
            "715": "nan",
            "720": "nan",
            "725": "nan",
            "730": "nan",
            "735": "nan",
            "740": "nan",
            "745": "nan",
            "750": "nan",
            "755": "nan",
            "760": "nan",
            "765": "nan",
            "770": "nan",
            "775": "nan",
            "780": "nan",
            "785": "nan",
            "790": "nan",
            "795": "nan",
            "800": 0.29104,
            "805": "nan",
            "810": "nan",
            "815": "nan",
            "820": "nan",
            "825": "nan",
            "830": "nan",
            "835": "nan",
            "840": "nan",
            "845": "nan",
            "850": "nan",
            "855": "nan",
            "860": "nan",
            "865": "nan",
            "870": "nan",
            "875": "nan",
            "880": "nan",
            "885": "nan",
            "890": "nan",
            "895": "nan",
            "900": 0.30737,
            "905": "nan",
            "910": "nan",
            "915": "nan",
            "920": "nan",
            "925": "nan",
            "930": "nan",
            "935": "nan",
            "940": "nan",
            "945": "nan",
            "950": "nan",
            "955": "nan",
            "960": "nan",
            "965": "nan",
            "970": "nan",
            "975": "nan",
            "980": "nan",
            "985": "nan",
            "990": "nan",
            "995": "nan",
            "1000": 0.29366,
            "1005": "nan",
            "1010": "nan",
            "1015": "nan",
            "1020": "nan",
            "1025": "nan",
            "1030": "nan",
            "1035": "nan",
            "1040": "nan",
            "1045": "nan",
            "1050": "nan",
            "1055": "nan",
            "1060": "nan",
            "1065": "nan",
            "1070": "nan",
            "1075": "nan",
            "1080": "nan",
            "1085": "nan",
            "1090": "nan",
            "1095": "nan",
            "1100": 0.30117,
            "1105": "nan",
            "1110": "nan",
            "1115": "nan",
            "1120": "nan",
            "1125": "nan",
            "1130": "nan",
            "1135": "nan",
            "1140": "nan",
            "1145": "nan",
            "1150": "nan",
            "1155": "nan",
            "1160": "nan",
            "1165": "nan",
            "1170": "nan",
            "1175": "nan",
            "1180": "nan",
            "1185": "nan",
            "1190": "nan",
            "1195": "nan",
            "1200": 0.30515,
            "1205": "nan",
            "1210": "nan",
            "1215": "nan",
            "1220": "nan",
            "1225": "nan",
            "1230": "nan",
            "1235": "nan",
            "1240": "nan",
            "1245": "nan",
            "1250": "nan",
            "1255": "nan",
            "1260": "nan",
            "1265": "nan",
            "1270": "nan",
            "1275": "nan",
            "1280": "nan",
            "1285": "nan",
            "1290": "nan",
            "1295": "nan",
            "1300": 0.29886,
            "1305": "nan",
            "1310": "nan",
            "1315": "nan",
            "1320": "nan",
            "1325": "nan",
            "1330": "nan",
            "1335": "nan",
            "1340": "nan",
            "1345": "nan",
            "1350": "nan",
            "1355": "nan",
            "1360": "nan",
            "1365": "nan",
            "1370": "nan",
            "1375": "nan",
            "1380": "nan",
            "1385": "nan",
            "1390": "nan",
            "1395": "nan",
            "1400": 0.31284,
            "1405": "nan",
            "1410": "nan",
            "1415": "nan",
            "1420": "nan",
            "1425": "nan",
            "1430": "nan",
            "1435": "nan",
            "1440": "nan",
            "1445": "nan",
            "1450": "nan",
            "1455": "nan",
            "1460": "nan",
            "1465": "nan",
            "1470": "nan",
            "1475": "nan",
            "1480": "nan",
            "1485": "nan",
            "1490": "nan",
            "1495": "nan",
            "1500": 0.30994,
            "1505": "nan",
            "1510": "nan",
            "1515": "nan",
            "1520": "nan",
            "1525": "nan",
            "1530": "nan",
            "1535": "nan",
            "1540": "nan",
            "1545": "nan",
            "1550": "nan",
            "1555": "nan",
            "1560": "nan",
            "1565": "nan",
            "1570": "nan",
            "1575": "nan",
            "1580": "nan",
            "1585": "nan",
            "1590": "nan",
            "1595": "nan",
            "1600": 0.29741,
            "1605": "nan",
            "1610": "nan",
            "1615": "nan",
            "1620": "nan",
            "1625": "nan",
            "1630": "nan",
            "1635": "nan",
            "1640": "nan",
            "1645": "nan",
            "1650": "nan",
            "1655": "nan",
            "1660": "nan",
            "1665": "nan",
            "1670": "nan",
            "1675": "nan",
            "1680": "nan",
            "1685": "nan",
            "1690": "nan",
            "1695": "nan",
            "1700": 0.29425,
            "1705": "nan",
            "1710": "nan",
            "1715": "nan",
            "1720": "nan",
            "1725": "nan",
            "1730": "nan",
            "1735": "nan",
            "1740": "nan",
            "1745": "nan",
            "1750": "nan",
            "1755": "nan",
            "1760": "nan",
            "1765": "nan",
            "1770": "nan",
            "1775": "nan",
            "1780": "nan",
            "1785": "nan",
            "1790": "nan",
            "1795": "nan",
            "1800": 0.29731,
            "1805": "nan",
            "1810": "nan",
            "1815": "nan",
            "1820": "nan",
            "1825": "nan",
            "1830": "nan",
            "1835": "nan",
            "1840": "nan",
            "1845": "nan",
            "1850": "nan",
            "1855": "nan",
            "1860": "nan",
            "1865": "nan",
            "1870": "nan",
            "1875": "nan",
            "1880": "nan",
            "1885": "nan",
            "1890": "nan",
            "1895": "nan",
            "1900": 0.29135,
            "1905": "nan",
            "1910": "nan",
            "1915": "nan",
            "1920": "nan",
            "1925": "nan",
            "1930": "nan",
            "1935": "nan",
            "1940": "nan",
            "1945": "nan",
            "1950": "nan",
            "1955": "nan",
            "1960": "nan",
            "1965": "nan",
            "1970": "nan",
            "1975": "nan",
            "1980": "nan",
            "1985": "nan",
            "1990": "nan",
            "1995": "nan",
            "2000": 0.29396,
            "2005": "nan",
            "2010": "nan",
            "2015": "nan",
            "2020": "nan",
            "2025": "nan",
            "2030": "nan",
            "2035": "nan",
            "2040": "nan",
            "2045": "nan",
            "2050": "nan",
            "2055": "nan",
            "2060": "nan",
            "2065": "nan",
            "2070": "nan",
            "2075": "nan",
            "2080": "nan",
            "2085": "nan",
            "2090": "nan",
            "2095": "nan",
            "2100": 0.29868,
            "2105": "nan",
            "2110": "nan",
            "2115": "nan",
            "2120": "nan",
            "2125": "nan",
            "2130": "nan",
            "2135": "nan",
            "2140": "nan",
            "2145": "nan",
            "2150": "nan",
            "2155": "nan",
            "2160": "nan",
            "2165": "nan",
            "2170": "nan",
            "2175": "nan",
            "2180": "nan",
            "2185": "nan",
            "2190": "nan",
            "2195": "nan",
            "2200": 0.29659,
            "2205": "nan",
            "2210": "nan",
            "2215": "nan",
            "2220": "nan",
            "2225": "nan",
            "2230": "nan",
            "2235": "nan",
            "2240": "nan",
            "2245": "nan",
            "2250": "nan",
            "2255": "nan",
            "2260": "nan",
            "2265": "nan",
            "2270": "nan",
            "2275": "nan",
            "2280": "nan",
            "2285": "nan",
            "2290": "nan",
            "2295": "nan",
            "2300": 0.30668,
            "2305": "nan",
            "2310": "nan",
            "2315": "nan",
            "2320": "nan",
            "2325": "nan",
            "2330": "nan",
            "2335": "nan",
            "2340": "nan",
            "2345": "nan",
            "2350": "nan",
            "2355": "nan",
            "2360": "nan",
            "2365": "nan",
            "2370": "nan",
            "2375": "nan",
            "2380": "nan",
            "2385": "nan",
            "2390": "nan",
            "2395": "nan",
            "2400": 0.29147,
            "2405": "nan",
            "2410": "nan",
            "2415": "nan",
            "2420": "nan",
            "2425": "nan",
            "2430": "nan",
            "2435": "nan",
            "2440": "nan",
            "2445": "nan",
            "2450": "nan",
            "2455": "nan",
            "2460": "nan",
            "2465": "nan",
            "2470": "nan",
            "2475": "nan",
            "2480": "nan",
            "2485": "nan",
            "2490": "nan",
            "2495": "nan",
            "2500": 0.30306,
            "2505": "nan",
            "2510": "nan",
            "2515": "nan",
            "2520": "nan",
            "2525": "nan",
            "2530": "nan",
            "2535": "nan",
            "2540": "nan",
            "2545": "nan",
            "2550": "nan",
            "2555": "nan",
            "2560": "nan",
            "2565": "nan",
            "2570": "nan",
            "2575": "nan",
            "2580": "nan",
            "2585": "nan",
            "2590": "nan",
            "2595": "nan",
            "2600": 0.29399,
            "2605": "nan",
            "2610": "nan",
            "2615": "nan",
            "2620": "nan",
            "2625": "nan",
            "2630": "nan",
            "2635": "nan",
            "2640": "nan",
            "2645": "nan",
            "2650": "nan",
            "2655": "nan",
            "2660": "nan",
            "2665": "nan",
            "2670": "nan",
            "2675": "nan",
            "2680": "nan",
            "2685": "nan",
            "2690": "nan",
            "2695": "nan",
            "2700": 0.30369,
            "2705": "nan",
            "2710": "nan",
            "2715": "nan",
            "2720": "nan",
            "2725": "nan",
            "2730": "nan",
            "2735": "nan",
            "2740": "nan",
            "2745": "nan",
            "2750": "nan",
            "2755": "nan",
            "2760": "nan",
            "2765": "nan",
            "2770": "nan",
            "2775": "nan",
            "2780": "nan",
            "2785": "nan",
            "2790": "nan",
            "2795": "nan",
            "2800": 0.30933,
            "2805": "nan",
            "2810": "nan",
            "2815": "nan",
            "2820": "nan",
            "2825": "nan",
            "2830": "nan",
            "2835": "nan",
            "2840": "nan",
            "2845": "nan",
            "2850": "nan",
            "2855": "nan",
            "2860": "nan",
            "2865": "nan",
            "2870": "nan",
            "2875": "nan",
            "2880": "nan",
            "2885": "nan",
            "2890": "nan",
            "2895": "nan",
            "2900": 0.29764,
            "2905": "nan",
            "2910": "nan",
            "2915": "nan",
            "2920": "nan",
            "2925": "nan",
            "2930": "nan",
            "2935": "nan",
            "2940": "nan",
            "2945": "nan",
            "2950": "nan",
            "2955": "nan",
            "2960": "nan",
            "2965": "nan",
            "2970": "nan",
            "2975": "nan",
            "2980": "nan",
            "2985": "nan",
            "2990": "nan",
            "2995": "nan",
            "3000": 0.29713,
            "3005": "nan",
            "3010": "nan",
            "3015": "nan",
            "3020": "nan",
            "3025": "nan",
            "3030": "nan",
            "3035": "nan",
            "3040": "nan",
            "3045": "nan",
            "3050": "nan",
            "3055": "nan",
            "3060": "nan",
            "3065": "nan",
            "3070": "nan",
            "3075": "nan",
            "3080": "nan",
            "3085": "nan",
            "3090": "nan",
            "3095": "nan",
            "3100": 0.30356,
            "3105": "nan",
            "3110": "nan",
            "3115": "nan",
            "3120": "nan",
            "3125": "nan",
            "3130": "nan",
            "3135": "nan",
            "3140": "nan",
            "3145": "nan",
            "3150": "nan",
            "3155": "nan",
            "3160": "nan",
            "3165": "nan",
            "3170": "nan",
            "3175": "nan",
            "3180": "nan",
            "3185": "nan",
            "3190": "nan",
            "3195": "nan",
            "3200": 0.29333,
            "3205": "nan",
            "3210": "nan",
            "3215": "nan",
            "3220": "nan",
            "3225": "nan",
            "3230": "nan",
            "3235": "nan",
            "3240": "nan",
            "3245": "nan",
            "3250": "nan",
            "3255": "nan",
            "3260": "nan",
            "3265": "nan",
            "3270": "nan",
            "3275": "nan",
            "3280": "nan",
            "3285": "nan",
            "3290": "nan",
            "3295": "nan",
            "3300": 0.28855,
            "3305": "nan",
            "3310": "nan",
            "3315": "nan",
            "3320": "nan",
            "3325": "nan",
            "3330": "nan",
            "3335": "nan",
            "3340": "nan",
            "3345": "nan",
            "3350": "nan",
            "3355": "nan",
            "3360": "nan",
            "3365": "nan",
            "3370": "nan",
            "3375": "nan",
            "3380": "nan",
            "3385": "nan",
            "3390": "nan",
            "3395": "nan",
            "3400": 0.30944,
            "3405": "nan",
            "3410": "nan",
            "3415": "nan",
            "3420": "nan",
            "3425": "nan",
            "3430": "nan",
            "3435": "nan",
            "3440": "nan",
            "3445": "nan",
            "3450": "nan",
            "3455": "nan",
            "3460": "nan",
            "3465": "nan",
            "3470": "nan",
            "3475": "nan",
            "3480": "nan",
            "3485": "nan",
            "3490": "nan",
            "3495": "nan",
            "3500": 0.29328,
            "3505": "nan",
            "3510": "nan",
            "3515": "nan",
            "3520": "nan",
            "3525": "nan",
            "3530": "nan",
            "3535": "nan",
            "3540": "nan",
            "3545": "nan",
            "3550": "nan",
            "3555": "nan",
            "3560": "nan",
            "3565": "nan",
            "3570": "nan",
            "3575": "nan",
            "3580": "nan",
            "3585": "nan",
            "3590": "nan",
            "3595": "nan",
            "3600": 0.2964,
            "3605": "nan",
            "3610": "nan",
            "3615": "nan",
            "3620": "nan",
            "3625": "nan",
            "3630": "nan",
            "3635": "nan",
            "3640": "nan",
            "3645": "nan",
            "3650": "nan",
            "3655": "nan",
            "3660": "nan",
            "3665": "nan",
            "3670": "nan",
            "3675": "nan",
            "3680": "nan",
            "3685": "nan",
            "3690": "nan",
            "3695": "nan",
            "3700": 0.30483,
            "3705": "nan",
            "3710": "nan",
            "3715": "nan",
            "3720": "nan",
            "3725": "nan",
            "3730": "nan",
            "3735": "nan",
            "3740": "nan",
            "3745": "nan",
            "3750": "nan",
            "3755": "nan",
            "3760": "nan",
            "3765": "nan",
            "3770": "nan",
            "3775": "nan",
            "3780": "nan",
            "3785": "nan",
            "3790": "nan",
            "3795": "nan",
            "3800": 0.29341,
            "3805": "nan",
            "3810": "nan",
            "3815": "nan",
            "3820": "nan",
            "3825": "nan",
            "3830": "nan",
            "3835": "nan",
            "3840": "nan",
            "3845": "nan",
            "3850": "nan",
            "3855": "nan",
            "3860": "nan",
            "3865": "nan",
            "3870": "nan",
            "3875": "nan",
            "3880": "nan",
            "3885": "nan",
            "3890": "nan",
            "3895": "nan",
            "3900": 0.28771,
            "3905": "nan",
            "3910": "nan",
            "3915": "nan",
            "3920": "nan",
            "3925": "nan",
            "3930": "nan",
            "3935": "nan",
            "3940": "nan",
            "3945": "nan",
            "3950": "nan",
            "3955": "nan",
            "3960": "nan",
            "3965": "nan",
            "3970": "nan",
            "3975": "nan",
            "3980": "nan",
            "3985": "nan",
            "3990": "nan",
            "3995": "nan",
            "4000": 0.29046,
            "4005": "nan",
            "4010": "nan",
            "4015": "nan",
            "4020": "nan",
            "4025": "nan",
            "4030": "nan",
            "4035": "nan",
            "4040": "nan",
            "4045": "nan",
            "4050": "nan",
            "4055": "nan",
            "4060": "nan",
            "4065": "nan",
            "4070": "nan",
            "4075": "nan",
            "4080": "nan",
            "4085": "nan",
            "4090": "nan",
            "4095": "nan",
            "4100": 0.30601,
            "4105": "nan",
            "4110": "nan",
            "4115": "nan",
            "4120": "nan",
            "4125": "nan",
            "4130": "nan",
            "4135": "nan",
            "4140": "nan",
            "4145": "nan",
            "4150": "nan",
            "4155": "nan",
            "4160": "nan",
            "4165": "nan",
            "4170": "nan",
            "4175": "nan",
            "4180": "nan",
            "4185": "nan",
            "4190": "nan",
            "4195": "nan",
            "4200": 0.29169,
            "4205": "nan",
            "4210": "nan",
            "4215": "nan",
            "4220": "nan",
            "4225": "nan",
            "4230": "nan",
            "4235": "nan",
            "4240": "nan",
            "4245": "nan",
            "4250": "nan",
            "4255": "nan",
            "4260": "nan",
            "4265": "nan",
            "4270": "nan",
            "4275": "nan",
            "4280": "nan",
            "4285": "nan",
            "4290": "nan",
            "4295": "nan",
            "4300": 0.29779,
            "4305": "nan",
            "4310": "nan",
            "4315": "nan",
            "4320": "nan",
            "4325": "nan",
            "4330": "nan",
            "4335": "nan",
            "4340": "nan",
            "4345": "nan",
            "4350": "nan",
            "4355": "nan",
            "4360": "nan",
            "4365": "nan",
            "4370": "nan",
            "4375": "nan",
            "4380": "nan",
            "4385": "nan",
            "4390": "nan",
            "4395": "nan",
            "4400": 0.31312,
            "4405": "nan",
            "4410": "nan",
            "4415": "nan",
            "4420": "nan",
            "4425": "nan",
            "4430": "nan",
            "4435": "nan",
            "4440": "nan",
            "4445": "nan",
            "4450": "nan",
            "4455": "nan",
            "4460": "nan",
            "4465": "nan",
            "4470": "nan",
            "4475": "nan",
            "4480": "nan",
            "4485": "nan",
            "4490": "nan",
            "4495": "nan",
            "4500": 0.29557,
            "4505": "nan",
            "4510": "nan",
            "4515": "nan",
            "4520": "nan",
            "4525": "nan",
            "4530": "nan",
            "4535": "nan",
            "4540": "nan",
            "4545": "nan",
            "4550": "nan",
            "4555": "nan",
            "4560": "nan",
            "4565": "nan",
            "4570": "nan",
            "4575": "nan",
            "4580": "nan",
            "4585": "nan",
            "4590": "nan",
            "4595": "nan",
            "4600": 0.29602,
            "4605": "nan",
            "4610": "nan",
            "4615": "nan",
            "4620": "nan",
            "4625": "nan",
            "4630": "nan",
            "4635": "nan",
            "4640": "nan",
            "4645": "nan",
            "4650": "nan",
            "4655": "nan",
            "4660": "nan",
            "4665": "nan",
            "4670": "nan",
            "4675": "nan",
            "4680": "nan",
            "4685": "nan",
            "4690": "nan",
            "4695": "nan",
            "4700": 0.29316,
            "4705": "nan",
            "4710": "nan",
            "4715": "nan",
            "4720": "nan",
            "4725": "nan",
            "4730": "nan",
            "4735": "nan",
            "4740": "nan",
            "4745": "nan",
            "4750": "nan",
            "4755": "nan",
            "4760": "nan",
            "4765": "nan",
            "4770": "nan",
            "4775": "nan",
            "4780": "nan",
            "4785": "nan",
            "4790": "nan",
            "4795": "nan",
            "4800": 0.29488,
            "4805": "nan",
            "4810": "nan",
            "4815": "nan",
            "4820": "nan",
            "4825": "nan",
            "4830": "nan",
            "4835": "nan",
            "4840": "nan",
            "4845": "nan",
            "4850": "nan",
            "4855": "nan",
            "4860": "nan",
            "4865": "nan",
            "4870": "nan",
            "4875": "nan",
            "4880": "nan",
            "4885": "nan",
            "4890": "nan",
            "4895": "nan",
            "4900": 0.29017,
            "4905": "nan",
            "4910": "nan",
            "4915": "nan",
            "4920": "nan",
            "4925": "nan",
            "4930": "nan",
            "4935": "nan",
            "4940": "nan",
            "4945": "nan",
            "4950": "nan",
            "4955": "nan",
            "4960": "nan",
            "4965": "nan",
            "4970": "nan",
            "4975": "nan",
            "4980": "nan",
            "4985": "nan",
            "4990": "nan",
            "4995": "nan",
            "5000": 0.29473,
            "5005": "nan",
            "5010": "nan",
            "5015": "nan",
            "5020": "nan",
            "5025": "nan",
            "5030": "nan",
            "5035": "nan",
            "5040": "nan",
            "5045": "nan",
            "5050": "nan",
            "5055": "nan",
            "5060": "nan",
            "5065": "nan",
            "5070": "nan",
            "5075": "nan",
            "5080": "nan",
            "5085": "nan",
            "5090": "nan",
            "5095": "nan",
            "5100": 0.2914,
            "5105": "nan",
            "5110": "nan",
            "5115": "nan",
            "5120": "nan",
            "5125": "nan",
            "5130": "nan",
            "5135": "nan",
            "5140": "nan",
            "5145": "nan",
            "5150": "nan",
            "5155": "nan",
            "5160": "nan",
            "5165": "nan",
            "5170": "nan",
            "5175": "nan",
            "5180": "nan",
            "5185": "nan",
            "5190": "nan",
            "5195": "nan",
            "5200": 0.30349,
            "5205": "nan",
            "5210": "nan",
            "5215": "nan",
            "5220": "nan",
            "5225": "nan",
            "5230": "nan",
            "5235": "nan",
            "5240": "nan",
            "5245": "nan",
            "5250": "nan",
            "5255": "nan",
            "5260": "nan",
            "5265": "nan",
            "5270": "nan",
            "5275": "nan",
            "5280": "nan",
            "5285": "nan",
            "5290": "nan",
            "5295": "nan",
            "5300": 0.29229,
            "5305": "nan",
            "5310": "nan",
            "5315": "nan",
            "5320": "nan",
            "5325": "nan",
            "5330": "nan",
            "5335": "nan",
            "5340": "nan",
            "5345": "nan",
            "5350": "nan",
            "5355": "nan",
            "5360": "nan",
            "5365": "nan",
            "5370": "nan",
            "5375": "nan",
            "5380": "nan",
            "5385": "nan",
            "5390": "nan",
            "5395": "nan",
            "5400": 0.30084,
            "5405": "nan",
            "5410": "nan",
            "5415": "nan",
            "5420": "nan",
            "5425": "nan",
            "5430": "nan",
            "5435": "nan",
            "5440": "nan",
            "5445": "nan",
            "5450": "nan",
            "5455": "nan",
            "5460": "nan",
            "5465": "nan",
            "5470": "nan",
            "5475": "nan",
            "5480": "nan",
            "5485": "nan",
            "5490": "nan",
            "5495": "nan",
            "5500": 0.30346,
            "5505": "nan",
            "5510": "nan",
            "5515": "nan",
            "5520": "nan",
            "5525": "nan",
            "5530": "nan",
            "5535": "nan",
            "5540": "nan",
            "5545": "nan",
            "5550": "nan",
            "5555": "nan",
            "5560": "nan",
            "5565": "nan",
            "5570": "nan",
            "5575": "nan",
            "5580": "nan",
            "5585": "nan",
            "5590": "nan",
            "5595": "nan",
            "5600": 0.29987,
            "5605": "nan",
            "5610": "nan",
            "5615": "nan",
            "5620": "nan",
            "5625": "nan",
            "5630": "nan",
            "5635": "nan",
            "5640": "nan",
            "5645": "nan",
            "5650": "nan",
            "5655": "nan",
            "5660": "nan",
            "5665": "nan",
            "5670": "nan",
            "5675": "nan",
            "5680": "nan",
            "5685": "nan",
            "5690": "nan",
            "5695": "nan",
            "5700": 0.29203,
            "5705": "nan",
            "5710": "nan",
            "5715": "nan",
            "5720": "nan",
            "5725": "nan",
            "5730": "nan",
            "5735": "nan",
            "5740": "nan",
            "5745": "nan",
            "5750": "nan",
            "5755": "nan",
            "5760": "nan",
            "5765": "nan",
            "5770": "nan",
            "5775": "nan",
            "5780": "nan",
            "5785": "nan",
            "5790": "nan",
            "5795": "nan",
            "5800": 0.29203,
            "5805": "nan",
            "5810": "nan",
            "5815": "nan",
            "5820": "nan",
            "5825": "nan",
            "5830": "nan",
            "5835": "nan",
            "5840": "nan",
            "5845": "nan",
            "5850": "nan",
            "5855": "nan",
            "5860": "nan",
            "5865": "nan",
            "5870": "nan",
            "5875": "nan",
            "5880": "nan",
            "5885": "nan",
            "5890": "nan",
            "5895": "nan",
            "5900": 0.29442,
            "5905": "nan",
            "5910": "nan",
            "5915": "nan",
            "5920": "nan",
            "5925": "nan",
            "5930": "nan",
            "5935": "nan",
            "5940": "nan",
            "5945": "nan",
            "5950": "nan",
            "5955": "nan",
            "5960": "nan",
            "5965": "nan",
            "5970": "nan",
            "5975": "nan",
            "5980": "nan",
            "5985": "nan",
            "5990": "nan",
            "5995": "nan",
            "6000": 0.29503,
            "6005": "nan",
            "6010": "nan",
            "6015": "nan",
            "6020": "nan",
            "6025": "nan",
            "6030": "nan",
            "6035": "nan",
            "6040": "nan",
            "6045": "nan",
            "6050": "nan",
            "6055": "nan",
            "6060": "nan",
            "6065": "nan",
            "6070": "nan",
            "6075": "nan",
            "6080": "nan",
            "6085": "nan",
            "6090": "nan",
            "6095": "nan",
            "6100": 0.30417,
            "6105": "nan",
            "6110": "nan",
            "6115": "nan",
            "6120": "nan",
            "6125": "nan",
            "6130": "nan",
            "6135": "nan",
            "6140": "nan",
            "6145": "nan",
            "6150": "nan",
            "6155": "nan",
            "6160": "nan",
            "6165": "nan",
            "6170": "nan",
            "6175": "nan",
            "6180": "nan",
            "6185": "nan",
            "6190": "nan",
            "6195": "nan",
            "6200": 0.29847,
            "6205": "nan",
            "6210": "nan",
            "6215": "nan",
            "6220": "nan",
            "6225": "nan",
            "6230": "nan",
            "6235": "nan",
            "6240": "nan",
            "6245": "nan",
            "6250": "nan",
            "6255": "nan",
            "6260": "nan",
            "6265": "nan",
            "6270": "nan",
            "6275": "nan",
            "6280": "nan",
            "6285": "nan",
            "6290": "nan",
            "6295": "nan",
            "6300": 0.2933,
            "6305": "nan",
            "6310": "nan",
            "6315": "nan",
            "6320": "nan",
            "6325": "nan",
            "6330": "nan",
            "6335": "nan",
            "6340": "nan",
            "6345": "nan",
            "6350": "nan",
            "6355": "nan",
            "6360": "nan",
            "6365": "nan",
            "6370": "nan",
            "6375": "nan",
            "6380": "nan",
            "6385": "nan",
            "6390": "nan",
            "6395": "nan",
            "6400": 0.28914,
            "6405": "nan",
            "6410": "nan",
            "6415": "nan",
            "6420": "nan",
            "6425": "nan",
            "6430": "nan",
            "6435": "nan",
            "6440": "nan",
            "6445": "nan",
            "6450": "nan",
            "6455": "nan",
            "6460": "nan",
            "6465": "nan",
            "6470": "nan",
            "6475": "nan",
            "6480": "nan",
            "6485": "nan",
            "6490": "nan",
            "6495": "nan",
            "6500": 0.29461,
            "6505": "nan",
            "6510": "nan",
            "6515": "nan",
            "6520": "nan",
            "6525": "nan",
            "6530": "nan",
            "6535": "nan",
            "6540": "nan",
            "6545": "nan",
            "6550": "nan",
            "6555": "nan",
            "6560": "nan",
            "6565": "nan",
            "6570": "nan",
            "6575": "nan",
            "6580": "nan",
            "6585": "nan",
            "6590": "nan",
            "6595": "nan",
            "6600": 0.29601,
            "6605": "nan",
            "6610": "nan",
            "6615": "nan",
            "6620": "nan",
            "6625": "nan",
            "6630": "nan",
            "6635": "nan",
            "6640": "nan",
            "6645": "nan",
            "6650": "nan",
            "6655": "nan",
            "6660": "nan",
            "6665": "nan",
            "6670": "nan",
            "6675": "nan",
            "6680": "nan",
            "6685": "nan",
            "6690": "nan",
            "6695": "nan",
            "6700": 0.2975,
            "6705": "nan",
            "6710": "nan",
            "6715": "nan",
            "6720": "nan",
            "6725": "nan",
            "6730": "nan",
            "6735": "nan",
            "6740": "nan",
            "6745": "nan",
            "6750": "nan",
            "6755": "nan",
            "6760": "nan",
            "6765": "nan",
            "6770": "nan",
            "6775": "nan",
            "6780": "nan",
            "6785": "nan",
            "6790": "nan",
            "6795": "nan",
            "6800": 0.29903,
            "6805": "nan",
            "6810": "nan",
            "6815": "nan",
            "6820": "nan",
            "6825": "nan",
            "6830": "nan",
            "6835": "nan",
            "6840": "nan",
            "6845": "nan",
            "6850": "nan",
            "6855": "nan",
            "6860": "nan",
            "6865": "nan",
            "6870": "nan",
            "6875": "nan",
            "6880": "nan",
            "6885": "nan",
            "6890": "nan",
            "6895": "nan",
            "6900": 0.29481,
            "6905": "nan",
            "6910": "nan",
            "6915": "nan",
            "6920": "nan",
            "6925": "nan",
            "6930": "nan",
            "6935": "nan",
            "6940": "nan",
            "6945": "nan",
            "6950": "nan",
            "6955": "nan",
            "6960": "nan",
            "6965": "nan",
            "6970": "nan",
            "6975": "nan",
            "6980": "nan",
            "6985": "nan",
            "6990": "nan",
            "6995": "nan",
            "7000": 0.29695,
            "7005": "nan",
            "7010": "nan",
            "7015": "nan",
            "7020": "nan",
            "7025": "nan",
            "7030": "nan",
            "7035": "nan",
            "7040": "nan",
            "7045": "nan",
            "7050": "nan",
            "7055": "nan",
            "7060": "nan",
            "7065": "nan",
            "7070": "nan",
            "7075": "nan",
            "7080": "nan",
            "7085": "nan",
            "7090": "nan",
            "7095": "nan",
            "7100": 0.29247,
            "7105": "nan",
            "7110": "nan",
            "7115": "nan",
            "7120": "nan",
            "7125": "nan",
            "7130": "nan",
            "7135": "nan",
            "7140": "nan",
            "7145": "nan",
            "7150": "nan",
            "7155": "nan",
            "7160": "nan",
            "7165": "nan",
            "7170": "nan",
            "7175": "nan",
            "7180": "nan",
            "7185": "nan",
            "7190": "nan",
            "7195": "nan",
            "7200": 0.29084,
            "7205": "nan",
            "7210": "nan",
            "7215": "nan",
            "7220": "nan",
            "7225": "nan",
            "7230": "nan",
            "7235": "nan",
            "7240": "nan",
            "7245": "nan",
            "7250": "nan",
            "7255": "nan",
            "7260": "nan",
            "7265": "nan",
            "7270": "nan",
            "7275": "nan",
            "7280": "nan",
            "7285": "nan",
            "7290": "nan",
            "7295": "nan",
            "7300": 0.29315,
            "7305": "nan",
            "7310": "nan",
            "7315": "nan",
            "7320": "nan",
            "7325": "nan",
            "7330": "nan",
            "7335": "nan",
            "7340": "nan",
            "7345": "nan",
            "7350": "nan",
            "7355": "nan",
            "7360": "nan",
            "7365": "nan",
            "7370": "nan",
            "7375": "nan",
            "7380": "nan",
            "7385": "nan",
            "7390": "nan",
            "7395": "nan",
            "7400": 0.298,
            "7405": "nan",
            "7410": "nan",
            "7415": "nan",
            "7420": "nan",
            "7425": "nan",
            "7430": "nan",
            "7435": "nan",
            "7440": "nan",
            "7445": "nan",
            "7450": "nan",
            "7455": "nan",
            "7460": "nan",
            "7465": "nan",
            "7470": "nan",
            "7475": "nan",
            "7480": "nan",
            "7485": "nan",
            "7490": "nan",
            "7495": "nan",
            "7500": 0.30765,
            "7505": "nan",
            "7510": "nan",
            "7515": "nan",
            "7520": "nan",
            "7525": "nan",
            "7530": "nan",
            "7535": "nan",
            "7540": "nan",
            "7545": "nan",
            "7550": "nan",
            "7555": "nan",
            "7560": "nan",
            "7565": "nan",
            "7570": "nan",
            "7575": "nan",
            "7580": "nan",
            "7585": "nan",
            "7590": "nan",
            "7595": "nan",
            "7600": 0.29214,
            "7605": "nan",
            "7610": "nan",
            "7615": "nan",
            "7620": "nan",
            "7625": "nan",
            "7630": "nan",
            "7635": "nan",
            "7640": "nan",
            "7645": "nan",
            "7650": "nan",
            "7655": "nan",
            "7660": "nan",
            "7665": "nan",
            "7670": "nan",
            "7675": "nan",
            "7680": "nan",
            "7685": "nan",
            "7690": "nan",
            "7695": "nan",
            "7700": 0.29013,
            "7705": "nan",
            "7710": "nan",
            "7715": "nan",
            "7720": "nan",
            "7725": "nan",
            "7730": "nan",
            "7735": "nan",
            "7740": "nan",
            "7745": "nan",
            "7750": "nan",
            "7755": "nan",
            "7760": "nan",
            "7765": "nan",
            "7770": "nan",
            "7775": "nan",
            "7780": "nan",
            "7785": "nan",
            "7790": "nan",
            "7795": "nan",
            "7800": 0.29279,
            "7805": "nan",
            "7810": "nan",
            "7815": "nan",
            "7820": "nan",
            "7825": "nan",
            "7830": "nan",
            "7835": "nan",
            "7840": "nan",
            "7845": "nan",
            "7850": "nan",
            "7855": "nan",
            "7860": "nan",
            "7865": "nan",
            "7870": "nan",
            "7875": "nan",
            "7880": "nan",
            "7885": "nan",
            "7890": "nan",
            "7895": "nan",
            "7900": 0.29941,
            "7905": "nan",
            "7910": "nan",
            "7915": "nan",
            "7920": "nan",
            "7925": "nan",
            "7930": "nan",
            "7935": "nan",
            "7940": "nan",
            "7945": "nan",
            "7950": "nan",
            "7955": "nan",
            "7960": "nan",
            "7965": "nan",
            "7970": "nan",
            "7975": "nan",
            "7980": "nan",
            "7985": "nan",
            "7990": "nan",
            "7995": "nan",
            "8000": 0.28757,
            "8005": "nan",
            "8010": "nan",
            "8015": "nan",
            "8020": "nan",
            "8025": "nan",
            "8030": "nan",
            "8035": "nan",
            "8040": "nan",
            "8045": "nan",
            "8050": "nan",
            "8055": "nan",
            "8060": "nan",
            "8065": "nan",
            "8070": "nan",
            "8075": "nan",
            "8080": "nan",
            "8085": "nan",
            "8090": "nan",
            "8095": "nan",
            "8100": 0.29861,
            "8105": "nan",
            "8110": "nan",
            "8115": "nan",
            "8120": "nan",
            "8125": "nan",
            "8130": "nan",
            "8135": "nan",
            "8140": "nan",
            "8145": "nan",
            "8150": "nan",
            "8155": "nan",
            "8160": "nan",
            "8165": "nan",
            "8170": "nan",
            "8175": "nan",
            "8180": "nan",
            "8185": "nan",
            "8190": "nan",
            "8195": "nan",
            "8200": 0.29215,
            "8205": "nan",
            "8210": "nan",
            "8215": "nan",
            "8220": "nan",
            "8225": "nan",
            "8230": "nan",
            "8235": "nan",
            "8240": "nan",
            "8245": "nan",
            "8250": "nan",
            "8255": "nan",
            "8260": "nan",
            "8265": "nan",
            "8270": "nan",
            "8275": "nan",
            "8280": "nan",
            "8285": "nan",
            "8290": "nan",
            "8295": "nan",
            "8300": 0.28995,
            "8305": "nan",
            "8310": "nan",
            "8315": "nan",
            "8320": "nan",
            "8325": "nan",
            "8330": "nan",
            "8335": "nan",
            "8340": "nan",
            "8345": "nan",
            "8350": "nan",
            "8355": "nan",
            "8360": "nan",
            "8365": "nan",
            "8370": "nan",
            "8375": "nan",
            "8380": "nan",
            "8385": "nan",
            "8390": "nan",
            "8395": "nan",
            "8400": 0.29279,
            "8405": "nan",
            "8410": "nan",
            "8415": "nan",
            "8420": "nan",
            "8425": "nan",
            "8430": "nan",
            "8435": "nan",
            "8440": "nan",
            "8445": "nan",
            "8450": "nan",
            "8455": "nan",
            "8460": "nan",
            "8465": "nan",
            "8470": "nan",
            "8475": "nan",
            "8480": "nan",
            "8485": "nan",
            "8490": "nan",
            "8495": "nan",
            "8500": 0.29218,
            "8505": "nan",
            "8510": "nan",
            "8515": "nan",
            "8520": "nan",
            "8525": "nan",
            "8530": "nan",
            "8535": "nan",
            "8540": "nan",
            "8545": "nan",
            "8550": "nan",
            "8555": "nan",
            "8560": "nan",
            "8565": "nan",
            "8570": "nan",
            "8575": "nan",
            "8580": "nan",
            "8585": "nan",
            "8590": "nan",
            "8595": "nan",
            "8600": 0.28848,
            "8605": "nan",
            "8610": "nan",
            "8615": "nan",
            "8620": "nan",
            "8625": "nan",
            "8630": "nan",
            "8635": "nan",
            "8640": "nan",
            "8645": "nan",
            "8650": "nan",
            "8655": "nan",
            "8660": "nan",
            "8665": "nan",
            "8670": "nan",
            "8675": "nan",
            "8680": "nan",
            "8685": "nan",
            "8690": "nan",
            "8695": "nan",
            "8700": 0.29589,
            "8705": "nan",
            "8710": "nan",
            "8715": "nan",
            "8720": "nan",
            "8725": "nan",
            "8730": "nan",
            "8735": "nan",
            "8740": "nan",
            "8745": "nan",
            "8750": "nan",
            "8755": "nan",
            "8760": "nan",
            "8765": "nan",
            "8770": "nan",
            "8775": "nan",
            "8780": "nan",
            "8785": "nan",
            "8790": "nan",
            "8795": "nan",
            "8800": 0.29437,
            "8805": "nan",
            "8810": "nan",
            "8815": "nan",
            "8820": "nan",
            "8825": "nan",
            "8830": "nan",
            "8835": "nan",
            "8840": "nan",
            "8845": "nan",
            "8850": "nan",
            "8855": "nan",
            "8860": "nan",
            "8865": "nan",
            "8870": "nan",
            "8875": "nan",
            "8880": "nan",
            "8885": "nan",
            "8890": "nan",
            "8895": "nan",
            "8900": 0.29367,
            "8905": "nan",
            "8910": "nan",
            "8915": "nan",
            "8920": "nan",
            "8925": "nan",
            "8930": "nan",
            "8935": "nan",
            "8940": "nan",
            "8945": "nan",
            "8950": "nan",
            "8955": "nan",
            "8960": "nan",
            "8965": "nan",
            "8970": "nan",
            "8975": "nan",
            "8980": "nan",
            "8985": "nan",
            "8990": "nan",
            "8995": "nan",
            "9000": 0.29567,
            "9005": "nan",
            "9010": "nan",
            "9015": "nan",
            "9020": "nan",
            "9025": "nan",
            "9030": "nan",
            "9035": "nan",
            "9040": "nan",
            "9045": "nan",
            "9050": "nan",
            "9055": "nan",
            "9060": "nan",
            "9065": "nan",
            "9070": "nan",
            "9075": "nan",
            "9080": "nan",
            "9085": "nan",
            "9090": "nan",
            "9095": "nan",
            "9100": 0.29096,
            "9105": "nan",
            "9110": "nan",
            "9115": "nan",
            "9120": "nan",
            "9125": "nan",
            "9130": "nan",
            "9135": "nan",
            "9140": "nan",
            "9145": "nan",
            "9150": "nan",
            "9155": "nan",
            "9160": "nan",
            "9165": "nan",
            "9170": "nan",
            "9175": "nan",
            "9180": "nan",
            "9185": "nan",
            "9190": "nan",
            "9195": "nan",
            "9200": 0.28403,
            "9205": "nan",
            "9210": "nan",
            "9215": "nan",
            "9220": "nan",
            "9225": "nan",
            "9230": "nan",
            "9235": "nan",
            "9240": "nan",
            "9245": "nan",
            "9250": "nan",
            "9255": "nan",
            "9260": "nan",
            "9265": "nan",
            "9270": "nan",
            "9275": "nan",
            "9280": "nan",
            "9285": "nan",
            "9290": "nan",
            "9295": "nan",
            "9300": 0.29074,
            "9305": "nan",
            "9310": "nan",
            "9315": "nan",
            "9320": "nan",
            "9325": "nan",
            "9330": "nan",
            "9335": "nan",
            "9340": "nan",
            "9345": "nan",
            "9350": "nan",
            "9355": "nan",
            "9360": "nan",
            "9365": "nan",
            "9370": "nan",
            "9375": "nan",
            "9380": "nan",
            "9385": "nan",
            "9390": "nan",
            "9395": "nan",
            "9400": 0.2955,
            "9405": "nan",
            "9410": "nan",
            "9415": "nan",
            "9420": "nan",
            "9425": "nan",
            "9430": "nan",
            "9435": "nan",
            "9440": "nan",
            "9445": "nan",
            "9450": "nan",
            "9455": "nan",
            "9460": "nan",
            "9465": "nan",
            "9470": "nan",
            "9475": "nan",
            "9480": "nan",
            "9485": "nan",
            "9490": "nan",
            "9495": "nan",
            "9500": 0.30297,
            "9505": "nan",
            "9510": "nan",
            "9515": "nan",
            "9520": "nan",
            "9525": "nan",
            "9530": "nan",
            "9535": "nan",
            "9540": "nan",
            "9545": "nan",
            "9550": "nan",
            "9555": "nan",
            "9560": "nan",
            "9565": "nan",
            "9570": "nan",
            "9575": "nan",
            "9580": "nan",
            "9585": "nan",
            "9590": "nan",
            "9595": "nan",
            "9600": 0.29919,
            "9605": "nan",
            "9610": "nan",
            "9615": "nan",
            "9620": "nan",
            "9625": "nan",
            "9630": "nan",
            "9635": "nan",
            "9640": "nan",
            "9645": "nan",
            "9650": "nan",
            "9655": "nan",
            "9660": "nan",
            "9665": "nan",
            "9670": "nan",
            "9675": "nan",
            "9680": "nan",
            "9685": "nan",
            "9690": "nan",
            "9695": "nan",
            "9700": 0.30329,
            "9705": "nan",
            "9710": "nan",
            "9715": "nan",
            "9720": "nan",
            "9725": "nan",
            "9730": "nan",
            "9735": "nan",
            "9740": "nan",
            "9745": "nan",
            "9750": "nan",
            "9755": "nan",
            "9760": "nan",
            "9765": "nan",
            "9770": "nan",
            "9775": "nan",
            "9780": "nan",
            "9785": "nan",
            "9790": "nan",
            "9795": "nan",
            "9800": 0.2953,
            "9805": "nan",
            "9810": "nan",
            "9815": "nan",
            "9820": "nan",
            "9825": "nan",
            "9830": "nan",
            "9835": "nan",
            "9840": "nan",
            "9845": "nan",
            "9850": "nan",
            "9855": "nan",
            "9860": "nan",
            "9865": "nan",
            "9870": "nan",
            "9875": "nan",
            "9880": "nan",
            "9885": "nan",
            "9890": "nan",
            "9895": "nan",
            "9900": 0.29335,
            "9905": "nan",
            "9910": "nan",
            "9915": "nan",
            "9920": "nan",
            "9925": "nan",
            "9930": "nan",
            "9935": "nan",
            "9940": "nan",
            "9945": "nan",
            "9950": "nan",
            "9955": "nan",
            "9960": "nan",
            "9965": "nan",
            "9970": "nan",
            "9975": "nan",
            "9980": "nan",
            "9985": "nan",
            "9990": "nan",
            "9995": "nan",
            "10000": 0.29575,
            "10005": "nan",
            "10010": "nan",
            "10015": "nan",
            "10020": "nan",
            "10025": "nan",
            "10030": "nan",
            "10035": "nan",
            "10040": "nan",
            "10045": "nan",
            "10050": "nan",
            "10055": "nan",
            "10060": "nan",
            "10065": "nan",
            "10070": "nan",
            "10075": "nan",
            "10080": "nan",
            "10085": "nan",
            "10090": "nan",
            "10095": "nan",
            "10100": 0.30017,
            "10105": "nan",
            "10110": "nan",
            "10115": "nan",
            "10120": "nan",
            "10125": "nan",
            "10130": "nan",
            "10135": "nan",
            "10140": "nan",
            "10145": "nan",
            "10150": "nan",
            "10155": "nan",
            "10160": "nan",
            "10165": "nan",
            "10170": "nan",
            "10175": "nan",
            "10180": "nan",
            "10185": "nan",
            "10190": "nan",
            "10195": "nan",
            "10200": 0.28788,
            "10205": "nan",
            "10210": "nan",
            "10215": "nan",
            "10220": "nan",
            "10225": "nan",
            "10230": "nan",
            "10235": "nan",
            "10240": "nan",
            "10245": "nan",
            "10250": "nan",
            "10255": "nan",
            "10260": "nan",
            "10265": "nan",
            "10270": "nan",
            "10275": "nan",
            "10280": "nan",
            "10285": "nan",
            "10290": "nan",
            "10295": "nan",
            "10300": 0.29328,
            "10305": "nan",
            "10310": "nan",
            "10315": "nan",
            "10320": "nan",
            "10325": "nan",
            "10330": "nan",
            "10335": "nan",
            "10340": "nan",
            "10345": "nan",
            "10350": "nan",
            "10355": "nan",
            "10360": "nan",
            "10365": "nan",
            "10370": "nan",
            "10375": "nan",
            "10380": "nan",
            "10385": "nan",
            "10390": "nan",
            "10395": "nan",
            "10400": 0.29073,
            "10405": "nan",
            "10410": "nan",
            "10415": "nan",
            "10420": "nan",
            "10425": "nan",
            "10430": "nan",
            "10435": "nan",
            "10440": "nan",
            "10445": "nan",
            "10450": "nan",
            "10455": "nan",
            "10460": "nan",
            "10465": "nan",
            "10470": "nan",
            "10475": "nan",
            "10480": "nan",
            "10485": "nan",
            "10490": "nan",
            "10495": "nan",
            "10500": 0.2916,
            "10505": "nan",
            "10510": "nan",
            "10515": "nan",
            "10520": "nan",
            "10525": "nan",
            "10530": "nan",
            "10535": "nan",
            "10540": "nan",
            "10545": "nan",
            "10550": "nan",
            "10555": "nan",
            "10560": "nan",
            "10565": "nan",
            "10570": "nan",
            "10575": "nan",
            "10580": "nan",
            "10585": "nan",
            "10590": "nan",
            "10595": "nan",
            "10600": 0.29251,
            "10605": "nan",
            "10610": "nan",
            "10615": "nan",
            "10620": "nan",
            "10625": "nan",
            "10630": "nan",
            "10635": "nan",
            "10640": "nan",
            "10645": "nan",
            "10650": "nan",
            "10655": "nan",
            "10660": "nan",
            "10665": "nan",
            "10670": "nan",
            "10675": "nan",
            "10680": "nan",
            "10685": "nan",
            "10690": "nan",
            "10695": "nan",
            "10700": 0.29815,
            "10705": "nan",
            "10710": "nan",
            "10715": "nan",
            "10720": "nan",
            "10725": "nan",
            "10730": "nan",
            "10735": "nan",
            "10740": "nan",
            "10745": "nan",
            "10750": "nan",
            "10755": "nan",
            "10760": "nan",
            "10765": "nan",
            "10770": "nan",
            "10775": "nan",
            "10780": "nan",
            "10785": "nan",
            "10790": "nan",
            "10795": "nan",
            "10800": 0.2964,
            "10805": "nan",
            "10810": "nan",
            "10815": "nan",
            "10820": "nan",
            "10825": "nan",
            "10830": "nan",
            "10835": "nan",
            "10840": "nan",
            "10845": "nan",
            "10850": "nan",
            "10855": "nan",
            "10860": "nan",
            "10865": "nan",
            "10870": "nan",
            "10875": "nan",
            "10880": "nan",
            "10885": "nan",
            "10890": "nan",
            "10895": "nan",
            "10900": 0.29354,
            "10905": "nan",
            "10910": "nan",
            "10915": "nan",
            "10920": "nan",
            "10925": "nan",
            "10930": "nan",
            "10935": "nan",
            "10940": "nan",
            "10945": "nan",
            "10950": "nan",
            "10955": "nan",
            "10960": "nan",
            "10965": "nan",
            "10970": "nan",
            "10975": "nan",
            "10980": "nan",
            "10985": "nan",
            "10990": "nan",
            "10995": "nan",
            "11000": 0.29398,
            "11005": "nan",
            "11010": "nan",
            "11015": "nan",
            "11020": "nan",
            "11025": "nan",
            "11030": "nan",
            "11035": "nan",
            "11040": "nan",
            "11045": "nan",
            "11050": "nan",
            "11055": "nan",
            "11060": "nan",
            "11065": "nan",
            "11070": "nan",
            "11075": "nan",
            "11080": "nan",
            "11085": "nan",
            "11090": "nan",
            "11095": "nan",
            "11100": 0.28933,
            "11105": "nan",
            "11110": "nan",
            "11115": "nan",
            "11120": "nan",
            "11125": "nan",
            "11130": "nan",
            "11135": "nan",
            "11140": "nan",
            "11145": "nan",
            "11150": "nan",
            "11155": "nan",
            "11160": "nan",
            "11165": "nan",
            "11170": "nan",
            "11175": "nan",
            "11180": "nan",
            "11185": "nan",
            "11190": "nan",
            "11195": "nan",
            "11200": 0.29307,
            "11205": "nan",
            "11210": "nan",
            "11215": "nan",
            "11220": "nan",
            "11225": "nan",
            "11230": "nan",
            "11235": "nan",
            "11240": "nan",
            "11245": "nan",
            "11250": "nan",
            "11255": "nan",
            "11260": "nan",
            "11265": "nan",
            "11270": "nan",
            "11275": "nan",
            "11280": "nan",
            "11285": "nan",
            "11290": "nan",
            "11295": "nan",
            "11300": 0.29257,
            "11305": "nan",
            "11310": "nan",
            "11315": "nan",
            "11320": "nan",
            "11325": "nan",
            "11330": "nan",
            "11335": "nan",
            "11340": "nan",
            "11345": "nan",
            "11350": "nan",
            "11355": "nan",
            "11360": "nan",
            "11365": "nan",
            "11370": "nan",
            "11375": "nan",
            "11380": "nan",
            "11385": "nan",
            "11390": "nan",
            "11395": "nan",
            "11400": 0.29479,
            "11405": "nan",
            "11410": "nan",
            "11415": "nan",
            "11420": "nan",
            "11425": "nan",
            "11430": "nan",
            "11435": "nan",
            "11440": "nan",
            "11445": "nan",
            "11450": "nan",
            "11455": "nan",
            "11460": "nan",
            "11465": "nan",
            "11470": "nan",
            "11475": "nan",
            "11480": "nan",
            "11485": "nan",
            "11490": "nan",
            "11495": "nan",
            "11500": 0.30464,
            "11505": "nan",
            "11510": "nan",
            "11515": "nan",
            "11520": "nan",
            "11525": "nan",
            "11530": "nan",
            "11535": "nan",
            "11540": "nan",
            "11545": "nan",
            "11550": "nan",
            "11555": "nan",
            "11560": "nan",
            "11565": "nan",
            "11570": "nan",
            "11575": "nan",
            "11580": "nan",
            "11585": "nan",
            "11590": "nan",
            "11595": "nan",
            "11600": 0.29822,
            "11605": "nan",
            "11610": "nan",
            "11615": "nan",
            "11620": "nan",
            "11625": "nan",
            "11630": "nan",
            "11635": "nan",
            "11640": "nan",
            "11645": "nan",
            "11650": "nan",
            "11655": "nan",
            "11660": "nan",
            "11665": "nan",
            "11670": "nan",
            "11675": "nan",
            "11680": "nan",
            "11685": "nan",
            "11690": "nan",
            "11695": "nan",
            "11700": 0.29887,
            "11705": "nan",
            "11710": "nan",
            "11715": "nan",
            "11720": "nan",
            "11725": "nan",
            "11730": "nan",
            "11735": "nan",
            "11740": "nan",
            "11745": "nan",
            "11750": "nan",
            "11755": "nan",
            "11760": "nan",
            "11765": "nan",
            "11770": "nan",
            "11775": "nan",
            "11780": "nan",
            "11785": "nan",
            "11790": "nan",
            "11795": "nan",
            "11800": 0.3027,
            "11805": "nan",
            "11810": "nan",
            "11815": "nan",
            "11820": "nan",
            "11825": "nan",
            "11830": "nan",
            "11835": "nan",
            "11840": "nan",
            "11845": "nan",
            "11850": "nan",
            "11855": "nan",
            "11860": "nan",
            "11865": "nan",
            "11870": "nan",
            "11875": "nan",
            "11880": "nan",
            "11885": "nan",
            "11890": "nan",
            "11895": "nan",
            "11900": 0.29833,
            "11905": "nan",
            "11910": "nan",
            "11915": "nan",
            "11920": "nan",
            "11925": "nan",
            "11930": "nan",
            "11935": "nan",
            "11940": "nan",
            "11945": "nan",
            "11950": "nan",
            "11955": "nan",
            "11960": "nan",
            "11965": "nan",
            "11970": "nan",
            "11975": "nan",
            "11980": "nan",
            "11985": "nan",
            "11990": "nan",
            "11995": "nan",
            "12000": 0.29865,
            "12005": "nan",
            "12010": "nan",
            "12015": "nan",
            "12020": "nan",
            "12025": "nan",
            "12030": "nan",
            "12035": "nan",
            "12040": "nan",
            "12045": "nan",
            "12050": "nan",
            "12055": "nan",
            "12060": "nan",
            "12065": "nan",
            "12070": "nan",
            "12075": "nan",
            "12080": "nan",
            "12085": "nan",
            "12090": "nan",
            "12095": "nan",
            "12100": 0.29085,
            "12105": "nan",
            "12110": "nan",
            "12115": "nan",
            "12120": "nan",
            "12125": "nan",
            "12130": "nan",
            "12135": "nan",
            "12140": "nan",
            "12145": "nan",
            "12150": "nan",
            "12155": "nan",
            "12160": "nan",
            "12165": "nan",
            "12170": "nan",
            "12175": "nan",
            "12180": "nan",
            "12185": "nan",
            "12190": "nan",
            "12195": "nan",
            "12200": 0.29165,
            "12205": "nan",
            "12210": "nan",
            "12215": "nan",
            "12220": "nan",
            "12225": "nan",
            "12230": "nan",
            "12235": "nan",
            "12240": "nan",
            "12245": "nan",
            "12250": "nan",
            "12255": "nan",
            "12260": "nan",
            "12265": "nan",
            "12270": "nan",
            "12275": "nan",
            "12280": "nan",
            "12285": "nan",
            "12290": "nan",
            "12295": "nan",
            "12300": 0.28632,
            "12305": "nan",
            "12310": "nan",
            "12315": "nan",
            "12320": "nan",
            "12325": "nan",
            "12330": "nan",
            "12335": "nan",
            "12340": "nan",
            "12345": "nan",
            "12350": "nan",
            "12355": "nan",
            "12360": "nan",
            "12365": "nan",
            "12370": "nan",
            "12375": "nan",
            "12380": "nan",
            "12385": "nan",
            "12390": "nan",
            "12395": "nan",
            "12400": 0.29291,
            "12405": "nan",
            "12410": "nan",
            "12415": "nan",
            "12420": "nan",
            "12425": "nan",
            "12430": "nan",
            "12435": "nan",
            "12440": "nan",
            "12445": "nan",
            "12450": "nan",
            "12455": "nan",
            "12460": "nan",
            "12465": "nan",
            "12470": "nan",
            "12475": "nan",
            "12480": "nan",
            "12485": "nan",
            "12490": "nan",
            "12495": "nan",
            "12500": 0.29211,
            "12505": "nan",
            "12510": "nan",
            "12515": "nan",
            "12520": "nan",
            "12525": "nan",
            "12530": "nan",
            "12535": "nan",
            "12540": "nan",
            "12545": "nan",
            "12550": "nan",
            "12555": "nan",
            "12560": "nan",
            "12565": "nan",
            "12570": "nan",
            "12575": "nan",
            "12580": "nan",
            "12585": "nan",
            "12590": "nan",
            "12595": "nan",
            "12600": 0.2955,
            "12605": "nan",
            "12610": "nan",
            "12615": "nan",
            "12620": "nan",
            "12625": "nan",
            "12630": "nan",
            "12635": "nan",
            "12640": "nan",
            "12645": "nan",
            "12650": "nan",
            "12655": "nan",
            "12660": "nan",
            "12665": "nan",
            "12670": "nan",
            "12675": "nan",
            "12680": "nan",
            "12685": "nan",
            "12690": "nan",
            "12695": "nan",
            "12700": 0.2851,
            "12705": "nan",
            "12710": "nan",
            "12715": "nan",
            "12720": "nan",
            "12725": "nan",
            "12730": "nan",
            "12735": "nan",
            "12740": "nan",
            "12745": "nan",
            "12750": "nan",
            "12755": "nan",
            "12760": "nan",
            "12765": "nan",
            "12770": "nan",
            "12775": "nan",
            "12780": "nan",
            "12785": "nan",
            "12790": "nan",
            "12795": "nan",
            "12800": 0.29696,
            "12805": "nan",
            "12810": "nan",
            "12815": "nan",
            "12820": "nan",
            "12825": "nan",
            "12830": "nan",
            "12835": "nan",
            "12840": "nan",
            "12845": "nan",
            "12850": "nan",
            "12855": "nan",
            "12860": "nan",
            "12865": "nan",
            "12870": "nan",
            "12875": "nan",
            "12880": "nan",
            "12885": "nan",
            "12890": "nan",
            "12895": "nan",
            "12900": 0.29112,
            "12905": "nan",
            "12910": "nan",
            "12915": "nan",
            "12920": "nan",
            "12925": "nan",
            "12930": "nan",
            "12935": "nan",
            "12940": "nan",
            "12945": "nan",
            "12950": "nan",
            "12955": "nan",
            "12960": "nan",
            "12965": "nan",
            "12970": "nan",
            "12975": "nan",
            "12980": "nan",
            "12985": "nan",
            "12990": "nan",
            "12995": "nan",
            "13000": 0.29889,
            "13005": "nan",
            "13010": "nan",
            "13015": "nan",
            "13020": "nan",
            "13025": "nan",
            "13030": "nan",
            "13035": "nan",
            "13040": "nan",
            "13045": "nan",
            "13050": "nan",
            "13055": "nan",
            "13060": "nan",
            "13065": "nan",
            "13070": "nan",
            "13075": "nan",
            "13080": "nan",
            "13085": "nan",
            "13090": "nan",
            "13095": "nan",
            "13100": 0.29889,
            "13105": "nan",
            "13110": "nan",
            "13115": "nan",
            "13120": "nan",
            "13125": "nan",
            "13130": "nan",
            "13135": "nan",
            "13140": "nan",
            "13145": "nan",
            "13150": "nan",
            "13155": "nan",
            "13160": "nan",
            "13165": "nan",
            "13170": "nan",
            "13175": "nan",
            "13180": "nan",
            "13185": "nan",
            "13190": "nan",
            "13195": "nan",
            "13200": 0.29419,
            "13205": "nan",
            "13210": "nan",
            "13215": "nan",
            "13220": "nan",
            "13225": "nan",
            "13230": "nan",
            "13235": "nan",
            "13240": "nan",
            "13245": "nan",
            "13250": "nan",
            "13255": "nan",
            "13260": "nan",
            "13265": "nan",
            "13270": "nan",
            "13275": "nan",
            "13280": "nan",
            "13285": "nan",
            "13290": "nan",
            "13295": "nan",
            "13300": 0.288,
            "13305": "nan",
            "13310": "nan",
            "13315": "nan",
            "13320": "nan",
            "13325": "nan",
            "13330": "nan",
            "13335": "nan",
            "13340": "nan",
            "13345": "nan",
            "13350": "nan",
            "13355": "nan",
            "13360": "nan",
            "13365": "nan",
            "13370": "nan",
            "13375": "nan",
            "13380": "nan",
            "13385": "nan",
            "13390": "nan",
            "13395": "nan",
            "13400": 0.29757,
            "13405": "nan",
            "13410": "nan",
            "13415": "nan",
            "13420": "nan",
            "13425": "nan",
            "13430": "nan",
            "13435": "nan",
            "13440": "nan",
            "13445": "nan",
            "13450": "nan",
            "13455": "nan",
            "13460": "nan",
            "13465": "nan",
            "13470": "nan",
            "13475": "nan",
            "13480": "nan",
            "13485": "nan",
            "13490": "nan",
            "13495": "nan",
            "13500": 0.29383,
            "13505": "nan",
            "13510": "nan",
            "13515": "nan",
            "13520": "nan",
            "13525": "nan",
            "13530": "nan",
            "13535": "nan",
            "13540": "nan",
            "13545": "nan",
            "13550": "nan",
            "13555": "nan",
            "13560": "nan",
            "13565": "nan",
            "13570": "nan",
            "13575": "nan",
            "13580": "nan",
            "13585": "nan",
            "13590": "nan",
            "13595": "nan",
            "13600": 0.29566,
            "13605": "nan",
            "13610": "nan",
            "13615": "nan",
            "13620": "nan",
            "13625": "nan",
            "13630": "nan",
            "13635": "nan",
            "13640": "nan",
            "13645": "nan",
            "13650": "nan",
            "13655": "nan",
            "13660": "nan",
            "13665": "nan",
            "13670": "nan",
            "13675": "nan",
            "13680": "nan",
            "13685": "nan",
            "13690": "nan",
            "13695": "nan",
            "13700": 0.2974,
            "13705": "nan",
            "13710": "nan",
            "13715": "nan",
            "13720": "nan",
            "13725": "nan",
            "13730": "nan",
            "13735": "nan",
            "13740": "nan",
            "13745": "nan",
            "13750": "nan",
            "13755": "nan",
            "13760": "nan",
            "13765": "nan",
            "13770": "nan",
            "13775": "nan",
            "13780": "nan",
            "13785": "nan",
            "13790": "nan",
            "13795": "nan",
            "13800": 0.28893,
            "13805": "nan",
            "13810": "nan",
            "13815": "nan",
            "13820": "nan",
            "13825": "nan",
            "13830": "nan",
            "13835": "nan",
            "13840": "nan",
            "13845": "nan",
            "13850": "nan",
            "13855": "nan",
            "13860": "nan",
            "13865": "nan",
            "13870": "nan",
            "13875": "nan",
            "13880": "nan",
            "13885": "nan",
            "13890": "nan",
            "13895": "nan",
            "13900": 0.28373,
            "13905": "nan",
            "13910": "nan",
            "13915": "nan",
            "13920": "nan",
            "13925": "nan",
            "13930": "nan",
            "13935": "nan",
            "13940": "nan",
            "13945": "nan",
            "13950": "nan",
            "13955": "nan",
            "13960": "nan",
            "13965": "nan",
            "13970": "nan",
            "13975": "nan",
            "13980": "nan",
            "13985": "nan",
            "13990": "nan",
            "13995": "nan",
            "14000": 0.33844,
            "14005": "nan",
            "14010": "nan",
            "14015": "nan",
            "14020": "nan",
            "14025": "nan",
            "14030": "nan",
            "14035": "nan",
            "14040": "nan",
            "14045": "nan",
            "14050": "nan",
            "14055": "nan",
            "14060": "nan",
            "14065": "nan",
            "14070": "nan",
            "14075": "nan",
            "14080": "nan",
            "14085": "nan",
            "14090": "nan",
            "14095": "nan",
            "14100": 0.30085,
            "14105": "nan",
            "14110": "nan",
            "14115": "nan",
            "14120": "nan",
            "14125": "nan",
            "14130": "nan",
            "14135": "nan",
            "14140": "nan",
            "14145": "nan",
            "14150": "nan",
            "14155": "nan",
            "14160": "nan",
            "14165": "nan",
            "14170": "nan",
            "14175": "nan",
            "14180": "nan",
            "14185": "nan",
            "14190": "nan",
            "14195": "nan",
            "14200": 0.29411,
            "14205": "nan",
            "14210": "nan",
            "14215": "nan",
            "14220": "nan",
            "14225": "nan",
            "14230": "nan",
            "14235": "nan",
            "14240": "nan",
            "14245": "nan",
            "14250": "nan",
            "14255": "nan",
            "14260": "nan",
            "14265": "nan",
            "14270": "nan",
            "14275": "nan",
            "14280": "nan",
            "14285": "nan",
            "14290": "nan",
            "14295": "nan",
            "14300": 0.288,
            "14305": "nan",
            "14310": "nan",
            "14315": "nan",
            "14320": "nan",
            "14325": "nan",
            "14330": "nan",
            "14335": "nan",
            "14340": "nan",
            "14345": "nan",
            "14350": "nan",
            "14355": "nan",
            "14360": "nan",
            "14365": "nan",
            "14370": "nan",
            "14375": "nan",
            "14380": "nan",
            "14385": "nan",
            "14390": "nan",
            "14395": "nan",
            "14400": 0.29446,
            "14405": "nan",
            "14410": "nan",
            "14415": "nan",
            "14420": "nan",
            "14425": "nan",
            "14430": "nan",
            "14435": "nan",
            "14440": "nan",
            "14445": "nan",
            "14450": "nan",
            "14455": "nan",
            "14460": "nan",
            "14465": "nan",
            "14470": "nan",
            "14475": "nan",
            "14480": "nan",
            "14485": "nan",
            "14490": "nan",
            "14495": "nan",
            "14500": 0.28518,
            "14505": "nan",
            "14510": "nan",
            "14515": "nan",
            "14520": "nan",
            "14525": "nan",
            "14530": "nan",
            "14535": "nan",
            "14540": "nan",
            "14545": "nan",
            "14550": "nan",
            "14555": "nan",
            "14560": "nan",
            "14565": "nan",
            "14570": "nan",
            "14575": "nan",
            "14580": "nan",
            "14585": "nan",
            "14590": "nan",
            "14595": "nan",
            "14600": 0.28954,
            "14605": "nan",
            "14610": "nan",
            "14615": "nan",
            "14620": "nan",
            "14625": "nan",
            "14630": "nan",
            "14635": "nan",
            "14640": "nan",
            "14645": "nan",
            "14650": "nan",
            "14655": "nan",
            "14660": "nan",
            "14665": "nan",
            "14670": "nan",
            "14675": "nan",
            "14680": "nan",
            "14685": "nan",
            "14690": "nan",
            "14695": "nan",
            "14700": 0.29251,
            "14705": "nan",
            "14710": "nan",
            "14715": "nan",
            "14720": "nan",
            "14725": "nan",
            "14730": "nan",
            "14735": "nan",
            "14740": "nan",
            "14745": "nan",
            "14750": "nan",
            "14755": "nan",
            "14760": "nan",
            "14765": "nan",
            "14770": "nan",
            "14775": "nan",
            "14780": "nan",
            "14785": "nan",
            "14790": "nan",
            "14795": "nan",
            "14800": 0.29451,
            "14805": "nan",
            "14810": "nan",
            "14815": "nan",
            "14820": "nan",
            "14825": "nan",
            "14830": "nan",
            "14835": "nan",
            "14840": "nan",
            "14845": "nan",
            "14850": "nan",
            "14855": "nan",
            "14860": "nan",
            "14865": "nan",
            "14870": "nan",
            "14875": "nan",
            "14880": "nan",
            "14885": "nan",
            "14890": "nan",
            "14895": "nan",
            "14900": 0.29061,
            "14905": "nan",
            "14910": "nan",
            "14915": "nan",
            "14920": "nan",
            "14925": "nan",
            "14930": "nan",
            "14935": "nan",
            "14940": "nan",
            "14945": "nan",
            "14950": "nan",
            "14955": "nan",
            "14960": "nan",
            "14965": "nan",
            "14970": "nan",
            "14975": "nan",
            "14980": "nan",
            "14985": "nan",
            "14990": "nan",
            "14995": "nan",
            "15000": 0.29414,
            "15005": "nan",
            "15010": "nan",
            "15015": "nan",
            "15020": "nan",
            "15025": "nan",
            "15030": "nan",
            "15035": "nan",
            "15040": "nan",
            "15045": "nan",
            "15050": "nan",
            "15055": "nan",
            "15060": "nan",
            "15065": "nan",
            "15070": "nan",
            "15075": "nan",
            "15080": "nan",
            "15085": "nan",
            "15090": "nan",
            "15095": "nan",
            "15100": 0.28785,
            "15105": "nan",
            "15110": "nan",
            "15115": "nan",
            "15120": "nan",
            "15125": "nan",
            "15130": "nan",
            "15135": "nan",
            "15140": "nan",
            "15145": "nan",
            "15150": "nan",
            "15155": "nan",
            "15160": "nan",
            "15165": "nan",
            "15170": "nan",
            "15175": "nan",
            "15180": "nan",
            "15185": "nan",
            "15190": "nan",
            "15195": "nan",
            "15200": 0.28665,
            "15205": "nan",
            "15210": "nan",
            "15215": "nan",
            "15220": "nan",
            "15225": "nan",
            "15230": "nan",
            "15235": "nan",
            "15240": "nan",
            "15245": "nan",
            "15250": "nan",
            "15255": "nan",
            "15260": "nan",
            "15265": "nan",
            "15270": "nan",
            "15275": "nan",
            "15280": "nan",
            "15285": "nan",
            "15290": "nan",
            "15295": "nan",
            "15300": 0.29015,
            "15305": "nan",
            "15310": "nan",
            "15315": "nan",
            "15320": "nan",
            "15325": "nan",
            "15330": "nan",
            "15335": "nan",
            "15340": "nan",
            "15345": "nan",
            "15350": "nan",
            "15355": "nan",
            "15360": "nan",
            "15365": "nan",
            "15370": "nan",
            "15375": "nan",
            "15380": "nan",
            "15385": "nan",
            "15390": "nan",
            "15395": "nan",
            "15400": 0.29171,
            "15405": "nan",
            "15410": "nan",
            "15415": "nan",
            "15420": "nan",
            "15425": "nan",
            "15430": "nan",
            "15435": "nan",
            "15440": "nan",
            "15445": "nan",
            "15450": "nan",
            "15455": "nan",
            "15460": "nan",
            "15465": "nan",
            "15470": "nan",
            "15475": "nan",
            "15480": "nan",
            "15485": "nan",
            "15490": "nan",
            "15495": "nan",
            "15500": 0.28957,
            "15505": "nan",
            "15510": "nan",
            "15515": "nan",
            "15520": "nan",
            "15525": "nan",
            "15530": "nan",
            "15535": "nan",
            "15540": "nan",
            "15545": "nan",
            "15550": "nan",
            "15555": "nan",
            "15560": "nan",
            "15565": "nan",
            "15570": "nan",
            "15575": "nan",
            "15580": "nan",
            "15585": "nan",
            "15590": "nan",
            "15595": "nan",
            "15600": 0.29499,
            "15605": "nan",
            "15610": "nan",
            "15615": "nan",
            "15620": "nan",
            "15625": "nan",
            "15630": "nan",
            "15635": "nan",
            "15640": "nan",
            "15645": "nan",
            "15650": "nan",
            "15655": "nan",
            "15660": "nan",
            "15665": "nan",
            "15670": "nan",
            "15675": "nan",
            "15680": "nan",
            "15685": "nan",
            "15690": "nan",
            "15695": "nan",
            "15700": 0.30239,
            "15705": "nan",
            "15710": "nan",
            "15715": "nan",
            "15720": "nan",
            "15725": "nan",
            "15730": "nan",
            "15735": "nan",
            "15740": "nan",
            "15745": "nan",
            "15750": "nan",
            "15755": "nan",
            "15760": "nan",
            "15765": "nan",
            "15770": "nan",
            "15775": "nan",
            "15780": "nan",
            "15785": "nan",
            "15790": "nan",
            "15795": "nan",
            "15800": 0.29441,
            "15805": "nan",
            "15810": "nan",
            "15815": "nan",
            "15820": "nan",
            "15825": "nan",
            "15830": "nan",
            "15835": "nan",
            "15840": "nan",
            "15845": "nan",
            "15850": "nan",
            "15855": "nan",
            "15860": "nan",
            "15865": "nan",
            "15870": "nan",
            "15875": "nan",
            "15880": "nan",
            "15885": "nan",
            "15890": "nan",
            "15895": "nan",
            "15900": 0.28379,
            "15905": "nan",
            "15910": "nan",
            "15915": "nan",
            "15920": "nan",
            "15925": "nan",
            "15930": "nan",
            "15935": "nan",
            "15940": "nan",
            "15945": "nan",
            "15950": "nan",
            "15955": "nan",
            "15960": "nan",
            "15965": "nan",
            "15970": "nan",
            "15975": "nan",
            "15980": "nan",
            "15985": "nan",
            "15990": "nan",
            "15995": "nan",
            "16000": 0.30864,
            "16005": "nan",
            "16010": "nan",
            "16015": "nan",
            "16020": "nan",
            "16025": "nan",
            "16030": "nan",
            "16035": "nan",
            "16040": "nan",
            "16045": "nan",
            "16050": "nan",
            "16055": "nan",
            "16060": "nan",
            "16065": "nan",
            "16070": "nan",
            "16075": "nan",
            "16080": "nan",
            "16085": "nan",
            "16090": "nan",
            "16095": "nan",
            "16100": 0.28342,
            "16105": "nan",
            "16110": "nan",
            "16115": "nan",
            "16120": "nan",
            "16125": "nan",
            "16130": "nan",
            "16135": "nan",
            "16140": "nan",
            "16145": "nan",
            "16150": "nan",
            "16155": "nan",
            "16160": "nan",
            "16165": "nan",
            "16170": "nan",
            "16175": "nan",
            "16180": "nan",
            "16185": "nan",
            "16190": "nan",
            "16195": "nan",
            "16200": 0.28317,
            "16205": "nan",
            "16210": "nan",
            "16215": "nan",
            "16220": "nan",
            "16225": "nan",
            "16230": "nan",
            "16235": "nan",
            "16240": "nan",
            "16245": "nan",
            "16250": "nan",
            "16255": "nan",
            "16260": "nan",
            "16265": "nan",
            "16270": "nan",
            "16275": "nan",
            "16280": "nan",
            "16285": "nan",
            "16290": "nan",
            "16295": "nan",
            "16300": 0.28554,
            "16305": "nan",
            "16310": "nan",
            "16315": "nan",
            "16320": "nan",
            "16325": "nan",
            "16330": "nan",
            "16335": "nan",
            "16340": "nan",
            "16345": "nan",
            "16350": "nan",
            "16355": "nan",
            "16360": "nan",
            "16365": "nan",
            "16370": "nan",
            "16375": "nan",
            "16380": "nan",
            "16385": "nan",
            "16390": "nan",
            "16395": "nan",
            "16400": 0.32254,
            "16405": "nan",
            "16410": "nan",
            "16415": "nan",
            "16420": "nan",
            "16425": "nan",
            "16430": "nan",
            "16435": "nan",
            "16440": "nan",
            "16445": "nan",
            "16450": "nan",
            "16455": "nan",
            "16460": "nan",
            "16465": "nan",
            "16470": "nan",
            "16475": "nan",
            "16480": "nan",
            "16485": "nan",
            "16490": "nan",
            "16495": "nan",
            "16500": 0.29199,
            "16505": "nan",
            "16510": "nan",
            "16515": "nan",
            "16520": "nan",
            "16525": "nan",
            "16530": "nan",
            "16535": "nan",
            "16540": "nan",
            "16545": "nan",
            "16550": "nan",
            "16555": "nan",
            "16560": "nan",
            "16565": "nan",
            "16570": "nan",
            "16575": "nan",
            "16580": "nan",
            "16585": "nan",
            "16590": "nan",
            "16595": "nan",
            "16600": 0.28704,
            "16605": "nan",
            "16610": "nan",
            "16615": "nan",
            "16620": "nan",
            "16625": "nan",
            "16630": "nan",
            "16635": "nan",
            "16640": "nan",
            "16645": "nan",
            "16650": "nan",
            "16655": "nan",
            "16660": "nan",
            "16665": "nan",
            "16670": "nan",
            "16675": "nan",
            "16680": "nan",
            "16685": "nan",
            "16690": "nan",
            "16695": "nan",
            "16700": 0.29115,
            "16705": "nan",
            "16710": "nan",
            "16715": "nan",
            "16720": "nan",
            "16725": "nan",
            "16730": "nan",
            "16735": "nan",
            "16740": "nan",
            "16745": "nan",
            "16750": "nan",
            "16755": "nan",
            "16760": "nan",
            "16765": "nan",
            "16770": "nan",
            "16775": "nan",
            "16780": "nan",
            "16785": "nan",
            "16790": "nan",
            "16795": "nan",
            "16800": 0.2992,
            "16805": "nan",
            "16810": "nan",
            "16815": "nan",
            "16820": "nan",
            "16825": "nan",
            "16830": "nan",
            "16835": "nan",
            "16840": "nan",
            "16845": "nan",
            "16850": "nan",
            "16855": "nan",
            "16860": "nan",
            "16865": "nan",
            "16870": "nan",
            "16875": "nan",
            "16880": "nan",
            "16885": "nan",
            "16890": "nan",
            "16895": "nan",
            "16900": 0.29073,
            "16905": "nan",
            "16910": "nan",
            "16915": "nan",
            "16920": "nan",
            "16925": "nan",
            "16930": "nan",
            "16935": "nan",
            "16940": "nan",
            "16945": "nan",
            "16950": "nan",
            "16955": "nan",
            "16960": "nan",
            "16965": "nan",
            "16970": "nan",
            "16975": "nan",
            "16980": "nan",
            "16985": "nan",
            "16990": "nan",
            "16995": "nan",
            "17000": 0.29589,
            "17005": "nan",
            "17010": "nan",
            "17015": "nan",
            "17020": "nan",
            "17025": "nan",
            "17030": "nan",
            "17035": "nan",
            "17040": "nan",
            "17045": "nan",
            "17050": "nan",
            "17055": "nan",
            "17060": "nan",
            "17065": "nan",
            "17070": "nan",
            "17075": "nan",
            "17080": "nan",
            "17085": "nan",
            "17090": "nan",
            "17095": "nan",
            "17100": 0.29431,
            "17105": "nan",
            "17110": "nan",
            "17115": "nan",
            "17120": "nan",
            "17125": "nan",
            "17130": "nan",
            "17135": "nan",
            "17140": "nan",
            "17145": "nan",
            "17150": "nan",
            "17155": "nan",
            "17160": "nan",
            "17165": "nan",
            "17170": "nan",
            "17175": "nan",
            "17180": "nan",
            "17185": "nan",
            "17190": "nan",
            "17195": "nan",
            "17200": 0.29774,
            "17205": "nan",
            "17210": "nan",
            "17215": "nan",
            "17220": "nan",
            "17225": "nan",
            "17230": "nan",
            "17235": "nan",
            "17240": "nan",
            "17245": "nan",
            "17250": "nan",
            "17255": "nan",
            "17260": "nan",
            "17265": "nan",
            "17270": "nan",
            "17275": "nan",
            "17280": "nan",
            "17285": "nan",
            "17290": "nan",
            "17295": "nan",
            "17300": 0.29168,
            "17305": "nan",
            "17310": "nan",
            "17315": "nan",
            "17320": "nan",
            "17325": "nan",
            "17330": "nan",
            "17335": "nan",
            "17340": "nan",
            "17345": "nan",
            "17350": "nan",
            "17355": "nan",
            "17360": "nan",
            "17365": "nan",
            "17370": "nan",
            "17375": "nan",
            "17380": "nan",
            "17385": "nan",
            "17390": "nan",
            "17395": "nan",
            "17400": 0.2873,
            "17405": "nan",
            "17410": "nan",
            "17415": "nan",
            "17420": "nan",
            "17425": "nan",
            "17430": "nan",
            "17435": "nan",
            "17440": "nan",
            "17445": "nan",
            "17450": "nan",
            "17455": "nan",
            "17460": "nan",
            "17465": "nan",
            "17470": "nan",
            "17475": "nan",
            "17480": "nan",
            "17485": "nan",
            "17490": "nan",
            "17495": "nan",
            "17500": 0.28948,
            "17505": "nan",
            "17510": "nan",
            "17515": "nan",
            "17520": "nan",
            "17525": "nan",
            "17530": "nan",
            "17535": "nan",
            "17540": "nan",
            "17545": "nan",
            "17550": "nan",
            "17555": "nan",
            "17560": "nan",
            "17565": "nan",
            "17570": "nan",
            "17575": "nan",
            "17580": "nan",
            "17585": "nan",
            "17590": "nan",
            "17595": "nan",
            "17600": 0.28878,
            "17605": "nan",
            "17610": "nan",
            "17615": "nan",
            "17620": "nan",
            "17625": "nan",
            "17630": "nan",
            "17635": "nan",
            "17640": "nan",
            "17645": "nan",
            "17650": "nan",
            "17655": "nan",
            "17660": "nan",
            "17665": "nan",
            "17670": "nan",
            "17675": "nan",
            "17680": "nan",
            "17685": "nan",
            "17690": "nan",
            "17695": "nan",
            "17700": 0.29628,
            "17705": "nan",
            "17710": "nan",
            "17715": "nan",
            "17720": "nan",
            "17725": "nan",
            "17730": "nan",
            "17735": "nan",
            "17740": "nan",
            "17745": "nan",
            "17750": "nan",
            "17755": "nan",
            "17760": "nan",
            "17765": "nan",
            "17770": "nan",
            "17775": "nan",
            "17780": "nan",
            "17785": "nan",
            "17790": "nan",
            "17795": "nan",
            "17800": 0.30409,
            "17805": "nan",
            "17810": "nan",
            "17815": "nan",
            "17820": "nan",
            "17825": "nan",
            "17830": "nan",
            "17835": "nan",
            "17840": "nan",
            "17845": "nan",
            "17850": "nan",
            "17855": "nan",
            "17860": "nan",
            "17865": "nan",
            "17870": "nan",
            "17875": "nan",
            "17880": "nan",
            "17885": "nan",
            "17890": "nan",
            "17895": "nan",
            "17900": 0.28961,
            "17905": "nan",
            "17910": "nan",
            "17915": "nan",
            "17920": "nan",
            "17925": "nan",
            "17930": "nan",
            "17935": "nan",
            "17940": "nan",
            "17945": "nan",
            "17950": "nan",
            "17955": "nan",
            "17960": "nan",
            "17965": "nan",
            "17970": "nan",
            "17975": "nan",
            "17980": "nan",
            "17985": "nan",
            "17990": "nan",
            "17995": "nan",
            "18000": 0.29833,
            "18005": "nan",
            "18010": "nan",
            "18015": "nan",
            "18020": "nan",
            "18025": "nan",
            "18030": "nan",
            "18035": "nan",
            "18040": "nan",
            "18045": "nan",
            "18050": "nan",
            "18055": "nan",
            "18060": "nan",
            "18065": "nan",
            "18070": "nan",
            "18075": "nan",
            "18080": "nan",
            "18085": "nan",
            "18090": "nan",
            "18095": "nan",
            "18100": 0.29411,
            "18105": "nan",
            "18110": "nan",
            "18115": "nan",
            "18120": "nan",
            "18125": "nan",
            "18130": "nan",
            "18135": "nan",
            "18140": "nan",
            "18145": "nan",
            "18150": "nan",
            "18155": "nan",
            "18160": "nan",
            "18165": "nan",
            "18170": "nan",
            "18175": "nan",
            "18180": "nan",
            "18185": "nan",
            "18190": "nan",
            "18195": "nan",
            "18200": 0.29609,
            "18205": "nan",
            "18210": "nan",
            "18215": "nan",
            "18220": "nan",
            "18225": "nan",
            "18230": "nan",
            "18235": "nan",
            "18240": "nan",
            "18245": "nan",
            "18250": "nan",
            "18255": "nan",
            "18260": "nan",
            "18265": "nan",
            "18270": "nan",
            "18275": "nan",
            "18280": "nan",
            "18285": "nan",
            "18290": "nan",
            "18295": "nan",
            "18300": 0.2835,
            "18305": "nan",
            "18310": "nan",
            "18315": "nan",
            "18320": "nan",
            "18325": "nan",
            "18330": "nan",
            "18335": "nan",
            "18340": "nan",
            "18345": "nan",
            "18350": "nan",
            "18355": "nan",
            "18360": "nan",
            "18365": "nan",
            "18370": "nan",
            "18375": "nan",
            "18380": "nan",
            "18385": "nan",
            "18390": "nan",
            "18395": "nan",
            "18400": 0.29893,
            "18405": "nan",
            "18410": "nan",
            "18415": "nan",
            "18420": "nan",
            "18425": "nan",
            "18430": "nan",
            "18435": "nan",
            "18440": "nan",
            "18445": "nan",
            "18450": "nan",
            "18455": "nan",
            "18460": "nan",
            "18465": "nan",
            "18470": "nan",
            "18475": "nan",
            "18480": "nan",
            "18485": "nan",
            "18490": "nan",
            "18495": "nan",
            "18500": 0.29928,
            "18505": "nan",
            "18510": "nan",
            "18515": "nan",
            "18520": "nan",
            "18525": "nan",
            "18530": "nan",
            "18535": "nan",
            "18540": "nan",
            "18545": "nan",
            "18550": "nan",
            "18555": "nan",
            "18560": "nan",
            "18565": "nan",
            "18570": "nan",
            "18575": "nan",
            "18580": "nan",
            "18585": "nan",
            "18590": "nan",
            "18595": "nan",
            "18600": 0.28746,
            "18605": "nan",
            "18610": "nan",
            "18615": "nan",
            "18620": "nan",
            "18625": "nan",
            "18630": "nan",
            "18635": "nan",
            "18640": "nan",
            "18645": "nan",
            "18650": "nan",
            "18655": "nan",
            "18660": "nan",
            "18665": "nan",
            "18670": "nan",
            "18675": "nan",
            "18680": "nan",
            "18685": "nan",
            "18690": "nan",
            "18695": "nan",
            "18700": 0.29459,
            "18705": "nan",
            "18710": "nan",
            "18715": "nan",
            "18720": "nan",
            "18725": "nan",
            "18730": "nan",
            "18735": "nan",
            "18740": "nan",
            "18745": "nan",
            "18750": "nan",
            "18755": "nan",
            "18760": "nan",
            "18765": "nan",
            "18770": "nan",
            "18775": "nan",
            "18780": "nan",
            "18785": "nan",
            "18790": "nan",
            "18795": "nan",
            "18800": 0.29505,
            "18805": "nan",
            "18810": "nan",
            "18815": "nan",
            "18820": "nan",
            "18825": "nan",
            "18830": "nan",
            "18835": "nan",
            "18840": "nan",
            "18845": "nan",
            "18850": "nan",
            "18855": "nan",
            "18860": "nan",
            "18865": "nan",
            "18870": "nan",
            "18875": "nan",
            "18880": "nan",
            "18885": "nan",
            "18890": "nan",
            "18895": "nan",
            "18900": 0.29133,
            "18905": "nan",
            "18910": "nan",
            "18915": "nan",
            "18920": "nan",
            "18925": "nan",
            "18930": "nan",
            "18935": "nan",
            "18940": "nan",
            "18945": "nan",
            "18950": "nan",
            "18955": "nan",
            "18960": "nan",
            "18965": "nan",
            "18970": "nan",
            "18975": "nan",
            "18980": "nan",
            "18985": "nan",
            "18990": "nan",
            "18995": "nan",
            "19000": 0.30679,
            "19005": "nan",
            "19010": "nan",
            "19015": "nan",
            "19020": "nan",
            "19025": "nan",
            "19030": "nan",
            "19035": "nan",
            "19040": "nan",
            "19045": "nan",
            "19050": "nan",
            "19055": "nan",
            "19060": "nan",
            "19065": "nan",
            "19070": "nan",
            "19075": "nan",
            "19080": "nan",
            "19085": "nan",
            "19090": "nan",
            "19095": "nan",
            "19100": 0.29257,
            "19105": "nan",
            "19110": "nan",
            "19115": "nan",
            "19120": "nan",
            "19125": "nan",
            "19130": "nan",
            "19135": "nan",
            "19140": "nan",
            "19145": "nan",
            "19150": "nan",
            "19155": "nan",
            "19160": "nan",
            "19165": "nan",
            "19170": "nan",
            "19175": "nan",
            "19180": "nan",
            "19185": "nan",
            "19190": "nan",
            "19195": "nan",
            "19200": 0.30812,
            "19205": "nan",
            "19210": "nan",
            "19215": "nan",
            "19220": "nan",
            "19225": "nan",
            "19230": "nan",
            "19235": "nan",
            "19240": "nan",
            "19245": "nan",
            "19250": "nan",
            "19255": "nan",
            "19260": "nan",
            "19265": "nan",
            "19270": "nan",
            "19275": "nan",
            "19280": "nan",
            "19285": "nan",
            "19290": "nan",
            "19295": "nan",
            "19300": 0.29739,
            "19305": "nan",
            "19310": "nan",
            "19315": "nan",
            "19320": "nan",
            "19325": "nan",
            "19330": "nan",
            "19335": "nan",
            "19340": "nan",
            "19345": "nan",
            "19350": "nan",
            "19355": "nan",
            "19360": "nan",
            "19365": "nan",
            "19370": "nan",
            "19375": "nan",
            "19380": "nan",
            "19385": "nan",
            "19390": "nan",
            "19395": "nan",
            "19400": 0.2924,
            "19405": "nan",
            "19410": "nan",
            "19415": "nan",
            "19420": "nan",
            "19425": "nan",
            "19430": "nan",
            "19435": "nan",
            "19440": "nan",
            "19445": "nan",
            "19450": "nan",
            "19455": "nan",
            "19460": "nan",
            "19465": "nan",
            "19470": "nan",
            "19475": "nan",
            "19480": "nan",
            "19485": "nan",
            "19490": "nan",
            "19495": "nan",
            "19500": 0.30931,
            "19505": "nan",
            "19510": "nan",
            "19515": "nan",
            "19520": "nan",
            "19525": "nan",
            "19530": "nan",
            "19535": "nan",
            "19540": "nan",
            "19545": "nan",
            "19550": "nan",
            "19555": "nan",
            "19560": "nan",
            "19565": "nan",
            "19570": "nan",
            "19575": "nan",
            "19580": "nan",
            "19585": "nan",
            "19590": "nan",
            "19595": "nan",
            "19600": 0.3085,
            "19605": "nan",
            "19610": "nan",
            "19615": "nan",
            "19620": "nan",
            "19625": "nan",
            "19630": "nan",
            "19635": "nan",
            "19640": "nan",
            "19645": "nan",
            "19650": "nan",
            "19655": "nan",
            "19660": "nan",
            "19665": "nan",
            "19670": "nan",
            "19675": "nan",
            "19680": "nan",
            "19685": "nan",
            "19690": "nan",
            "19695": "nan",
            "19700": 0.30079,
            "19705": "nan",
            "19710": "nan",
            "19715": "nan",
            "19720": "nan",
            "19725": "nan",
            "19730": "nan",
            "19735": "nan",
            "19740": "nan",
            "19745": "nan",
            "19750": "nan",
            "19755": "nan",
            "19760": "nan",
            "19765": "nan",
            "19770": "nan",
            "19775": "nan",
            "19780": "nan",
            "19785": "nan",
            "19790": "nan",
            "19795": "nan",
            "19800": 0.30041,
            "19805": "nan",
            "19810": "nan",
            "19815": "nan",
            "19820": "nan",
            "19825": "nan",
            "19830": "nan",
            "19835": "nan",
            "19840": "nan",
            "19845": "nan",
            "19850": "nan",
            "19855": "nan",
            "19860": "nan",
            "19865": "nan",
            "19870": "nan",
            "19875": "nan",
            "19880": "nan",
            "19885": "nan",
            "19890": "nan",
            "19895": "nan",
            "19900": 0.29519,
            "19905": "nan",
            "19910": "nan",
            "19915": "nan",
            "19920": "nan",
            "19925": "nan",
            "19930": "nan",
            "19935": "nan",
            "19940": "nan",
            "19945": "nan",
            "19950": "nan",
            "19955": "nan",
            "19960": "nan",
            "19965": "nan",
            "19970": "nan",
            "19975": "nan",
            "19980": "nan",
            "19985": "nan",
            "19990": "nan",
            "19995": "nan",
            "20000": 0.29517
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_release/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 25000, "step_interval": 5, "values": {"1": 10.51817, "5": 10.5175, "10": 10.51541, "15": 10.51677, "20": 10.13032, "25": 9.5518, "30": 9.44404, "35": 9.15174, "40": 9.05764, "45": 8.99256, "50": 8.75433, "55": 8.8141, "60": 8.47097, "65": 8.53559, "70": 8.20228, "75": 8.31011, "80": 7.96546, "85": 7.87162, "90": 7.66496, "95": 7.67741, "100": 7.63736, "105": 7.37164, "110": 7.52373, "115": 7.16816, "120": 6.99674, "125": 7.13817, "130": 6.93339, "135": 6.84533, "140": 7.05697, "145": 6.99313, "150": 6.99189, "155": 6.93579, "160": 6.91743, "165": 7.02675, "170": 6.84859, "175": 6.89494, "180": 6.58084, "185": 6.98964, "190": 6.97544, "195": 6.83777, "200": 6.86351, "205": 6.77972, "210": 6.8278, "215": 6.85589, "220": 6.64778, "225": 6.77656, "230": 6.83728, "235": 6.6403, "240": 6.56299, "245": 6.92769, "250": 6.67212, "255": 6.59089, "260": 6.6886, "265": 6.55814, "270": 6.47987, "275": 6.77552, "280": 6.61728, "285": 6.49133, "290": 6.58778, "295": 6.67183, "300": 6.35487, "305": 6.35101, "310": 6.95761, "315": 6.78118, "320": 6.73969, "325": 6.52394, "330": 6.67967, "335": 6.58673, "340": 6.44122, "345": 6.60091, "350": 6.4961, "355": 6.65303, "360": 6.55435, "365": 6.69325, "370": 6.52082, "375": 6.63467, "380": 6.35463, "385": 6.47242, "390": 6.52105, "395": 6.31308, "400": 6.58662, "405": 6.48288, "410": 6.58296, "415": 6.5595, "420": 6.49457, "425": 6.41838, "430": 6.56902, "435": 6.39676, "440": 6.56238, "445": 6.46871, "450": 6.62489, "455": 6.51513, "460": 6.82933, "465": 6.75526, "470": 6.4396, "475": 6.18229, "480": 6.48381, "485": 6.54946, "490": 6.21873, "495": 6.28572, "500": 6.52573, "505": 6.45653, "510": 6.50992, "515": 6.172, "520": 6.55684, "525": 6.50698, "530": 6.31881, "535": 6.13755, "540": 6.58936, "545": 6.42254, "550": 6.42606, "555": 6.39976, "560": 6.42189, "565": 6.30133, "570": 6.38344, "575": 6.587, "580": 6.46785, "585": 6.46954, "590": 6.21249, "595": 6.2525, "600": 6.44785, "605": 6.37774, "610": 6.45558, "615": 6.43486, "620": 6.47888, "625": 6.38285, "630": 6.47552, "635": 6.43607, "640": 6.34837, "645": 6.20656, "650": 6.30271, "655": 6.55191, "660": 6.33696, "665": 6.3171, "670": 6.42225, "675": 6.31522, "680": 6.38141, "685": 6.51535, "690": 6.29353, "695": 6.18153, "700": 6.09304, "705": 6.34929, "710": 6.1791, "715": 6.4177, "720": 6.42039, "725": 6.29226, "730": 6.38785, "735": 6.4945, "740": 6.63836, "745": 6.21078, "750": 6.22274, "755": 6.00822, "760": 6.54247, "765": 6.29186, "770": 6.43211, "775": 6.12405, "780": 6.26062, "785": 6.28638, "790": 6.45124, "795": 6.40875, "800": 6.1263, "805": 6.376, "810": 6.23893, "815": 6.16663, "820": 6.3575, "825": 6.62729, "830": 6.36179, "835": 6.19003, "840": 6.14768, "845": 6.4116, "850": 6.45332, "855": 6.48173, "860": 6.19826, "865": 6.36353, "870": 6.31679, "875": 6.23294, "880": 6.28484, "885": 6.35442, "890": 6.20864, "895": 6.2911, "900": 6.33959, "905": 6.23518, "910": 6.16488, "915": 6.44247, "920": 6.39225, "925": 6.17239, "930": 6.07364, "935": 6.44404, "940": 6.30443, "945": 6.06343, "950": 6.23702, "955": 6.02012, "960": 6.35362, "965": 6.18563, "970": 6.35383, "975": 6.08907, "980": 6.04181, "985": 6.33339, "990": 6.18815, "995": 6.33018, "1000": 6.23352, "1005": 6.4058, "1010": 6.19362, "1015": 6.11686, "1020": 6.08203, "1025": 6.35785, "1030": 6.11803, "1035": 6.03195, "1040": 6.01018, "1045": 6.60093, "1050": 6.34873, "1055": 6.09027, "1060": 6.16403, "1065": 5.85887, "1070": 6.25593, "1075": 5.94946, "1080": 6.10403, "1085": 6.13551, "1090": 6.02268, "1095": 6.25209, "1100": 6.08705, "1105": 6.29611, "1110": 6.19967, "1115": 6.1011, "1120": 6.16046, "1125": 6.14261, "1130": 5.95602, "1135": 6.32195, "1140": 6.40249, "1145": 6.20263, "1150": 6.09901, "1155": 6.03405, "1160": 6.24143, "1165": 6.22108, "1170": 6.39871, "1175": 6.05912, "1180": 6.13279, "1185": 6.24595, "1190": 6.3738, "1195": 6.2675, "1200": 6.02483, "1205": 6.24352, "1210": 6.24381, "1215": 6.08485, "1220": 6.06558, "1225": 6.41096, "1230": 6.22266, "1235": 6.00361, "1240": 6.57248, "1245": 6.07717, "1250": 5.98109, "1255": 6.08225, "1260": 6.1592, "1265": 6.05607, "1270": 5.94447, "1275": 6.10328, "1280": 5.88732, "1285": 5.91185, "1290": 6.11708, "1295": 6.10313, "1300": 6.24041, "1305": 5.96596, "1310": 6.03315, "1315": 6.02144, "1320": 6.07709, "1325": 6.25622, "1330": 6.06937, "1335": 6.03641, "1340": 6.21518, "1345": 6.06063, "1350": 5.92036, "1355": 6.26228, "1360": 6.32347, "1365": 6.07062, "1370": 6.03043, "1375": 6.2884, "1380": 6.14084, "1385": 5.91489, "1390": 5.98972, "1395": 6.00301, "1400": 6.21583, "1405": 5.99994, "1410": 6.1255, "1415": 6.33421, "1420": 6.14871, "1425": 6.1415, "1430": 5.9813, "1435": 6.35489, "1440": 6.31621, "1445": 6.28693, "1450": 6.07265, "1455": 5.78189, "1460": 6.23124, "1465": 5.9566, "1470": 6.00064, "1475": 6.16614, "1480": 6.20798, "1485": 6.02411, "1490": 6.20953, "1495": 6.17788, "1500": 6.16957, "1505": 6.25605, "1510": 6.35487, "1515": 6.12147, "1520": 5.88284, "1525": 5.97354, "1530": 5.97133, "1535": 6.18283, "1540": 6.42884, "1545": 6.28868, "1550": 6.06395, "1555": 5.97533, "1560": 6.20183, "1565": 6.15308, "1570": 6.08513, "1575": 6.07142, "1580": 6.05003, "1585": 6.03634, "1590": 6.30695, "1595": 6.23795, "1600": 5.93011, "1605": 5.94669, "1610": 6.00829, "1615": 6.17491, "1620": 6.09426, "1625": 6.02182, "1630": 6.05703, "1635": 6.34675, "1640": 6.08607, "1645": 6.16877, "1650": 6.27363, "1655": 6.26811, "1660": 6.27462, "1665": 5.87312, "1670": 6.17929, "1675": 5.91525, "1680": 6.13344, "1685": 6.32443, "1690": 6.2644, "1695": 6.01757, "1700": 6.26122, "1705": 6.12344, "1710": 6.19754, "1715": 5.98887, "1720": 6.29412, "1725": 6.22684, "1730": 6.20743, "1735": 6.15486, "1740": 6.04205, "1745": 5.94038, "1750": 6.35924, "1755": 6.40246, "1760": 5.86366, "1765": 6.07505, "1770": 6.23461, "1775": 5.94154, "1780": 6.26637, "1785": 5.81716, "1790": 6.02314, "1795": 6.26227, "1800": 6.14013, "1805": 5.98584, "1810": 6.63134, "1815": 6.21314, "1820": 6.25555, "1825": 6.14411, "1830": 6.15875, "1835": 5.99699, "1840": 5.99038, "1845": 6.1556, "1850": 6.20895, "1855": 6.12019, "1860": 6.11647, "1865": 5.97461, "1870": 6.24845, "1875": 6.15245, "1880": 5.91023, "1885": 6.19165, "1890": 6.13825, "1895": 6.04321, "1900": 6.00342, "1905": 6.33723, "1910": 6.37854, "1915": 6.26141, "1920": 6.11687, "1925": 5.85322, "1930": 5.98664, "1935": 6.09483, "1940": 6.00604, "1945": 5.875, "1950": 6.09782, "1955": 5.96109, "1960": 6.13107, "1965": 6.07042, "1970": 6.04534, "1975": 6.09995, "1980": 6.39683, "1985": 6.30799, "1990": 5.99772, "1995": 6.07316, "2000": 6.00425, "2005": 6.16907, "2010": 6.27124, "2015": 6.05479, "2020": 6.20438, "2025": 6.249, "2030": 6.10151, "2035": 6.14185, "2040": 5.82018, "2045": 5.99301, "2050": 6.18173, "2055": 6.02603, "2060": 6.33486, "2065": 6.18433, "2070": 5.95901, "2075": 6.05091, "2080": 6.11619, "2085": 6.03563, "2090": 6.0318, "2095": 6.24495, "2100": 6.14167, "2105": 6.11567, "2110": 6.02763, "2115": 6.15262, "2120": 6.02194, "2125": 6.25424, "2130": 6.20629, "2135": 5.96906, "2140": 6.07103, "2145": 5.75309, "2150": 6.16524, "2155": 6.07608, "2160": 5.98145, "2165": 5.97813, "2170": 6.01644, "2175": 6.01627, "2180": 5.99747, "2185": 5.89013, "2190": 5.86032, "2195": 6.00796, "2200": 6.26774, "2205": 6.1098, "2210": 5.94775, "2215": 6.14702, "2220": 6.37079, "2225": 5.97553, "2230": 6.16571, "2235": 6.05938, "2240": 5.95049, "2245": 6.23155, "2250": 5.96877, "2255": 5.79235, "2260": 6.13931, "2265": 5.93813, "2270": 5.98911, "2275": 6.05952, "2280": 6.1092, "2285": 5.95013, "2290": 6.02738, "2295": 5.97596, "2300": 6.21482, "2305": 5.7741, "2310": 6.07717, "2315": 6.07771, "2320": 6.02548, "2325": 5.79078, "2330": 5.91772, "2335": 5.99312, "2340": 6.05866, "2345": 5.87771, "2350": 5.93828, "2355": 6.16529, "2360": 5.97877, "2365": 6.16995, "2370": 6.11135, "2375": 6.18199, "2380": 6.05936, "2385": 6.1578, "2390": 6.19628, "2395": 6.07546, "2400": 5.78213, "2405": 5.94596, "2410": 6.21064, "2415": 6.03963, "2420": 6.08969, "2425": 5.83476, "2430": 6.3565, "2435": 6.17395, "2440": 5.93125, "2445": 5.83191, "2450": 6.03301, "2455": 5.69234, "2460": 5.98659, "2465": 6.25034, "2470": 5.84446, "2475": 5.93569, "2480": 5.9655, "2485": 6.0591, "2490": 6.0859, "2495": 6.12826, "2500": 6.07619, "2505": 5.95908, "2510": 5.81342, "2515": 6.04185, "2520": 6.2213, "2525": 5.81758, "2530": 5.96947, "2535": 5.96991, "2540": 6.16668, "2545": 5.96674, "2550": 6.00418, "2555": 5.72836, "2560": 6.20934, "2565": 5.85917, "2570": 6.08596, "2575": 5.69813, "2580": 5.91079, "2585": 5.9093, "2590": 6.12244, "2595": 6.13138, "2600": 6.04809, "2605": 6.04506, "2610": 6.30458, "2615": 6.07116, "2620": 5.98128, "2625": 6.11299, "2630": 6.1626, "2635": 6.16181, "2640": 5.92204, "2645": 6.01467, "2650": 6.03531, "2655": 5.86098, "2660": 5.84779, "2665": 6.17657, "2670": 6.00478, "2675": 5.97899, "2680": 6.13153, "2685": 5.98755, "2690": 6.06885, "2695": 5.96494, "2700": 6.31447, "2705": 6.07933, "2710": 6.13432, "2715": 5.84604, "2720": 5.97065, "2725": 6.19304, "2730": 6.05715, "2735": 6.17752, "2740": 5.88714, "2745": 6.11119, "2750": 5.98743, "2755": 5.99078, "2760": 5.87443, "2765": 6.18089, "2770": 6.03467, "2775": 6.23017, "2780": 6.20071, "2785": 6.02831, "2790": 6.03936, "2795": 5.87056, "2800": 6.12169, "2805": 5.99708, "2810": 6.1354, "2815": 6.09528, "2820": 6.0085, "2825": 5.87472, "2830": 6.28475, "2835": 5.95398, "2840": 6.09487, "2845": 5.88595, "2850": 5.85459, "2855": 6.16742, "2860": 5.83636, "2865": 6.09151, "2870": 6.06759, "2875": 6.11959, "2880": 5.91152, "2885": 5.93185, "2890": 6.05905, "2895": 6.07588, "2900": 5.78771, "2905": 6.10237, "2910": 6.00624, "2915": 6.16915, "2920": 5.9086, "2925": 5.77926, "2930": 6.03226, "2935": 6.03469, "2940": 6.19466, "2945": 5.90452, "2950": 6.10891, "2955": 6.00063, "2960": 5.9171, "2965": 5.88471, "2970": 5.88766, "2975": 5.92895, "2980": 6.14647, "2985": 6.1873, "2990": 5.9706, "2995": 6.26542, "3000": 6.1188, "3005": 5.8684, "3010": 6.31907, "3015": 5.86517, "3020": 5.82006, "3025": 5.88243, "3030": 6.00027, "3035": 5.98462, "3040": 6.25786, "3045": 5.8475, "3050": 6.22786, "3055": 5.88956, "3060": 5.9048, "3065": 5.94588, "3070": 6.14207, "3075": 5.84503, "3080": 5.89853, "3085": 6.08275, "3090": 6.17043, "3095": 6.03701, "3100": 6.10486, "3105": 5.99793, "3110": 5.84414, "3115": 6.06988, "3120": 6.28283, "3125": 6.19285, "3130": 5.98218, "3135": 6.12629, "3140": 5.85226, "3145": 6.36395, "3150": 6.01258, "3155": 6.25503, "3160": 6.04938, "3165": 6.07205, "3170": 6.20961, "3175": 6.02655, "3180": 6.02716, "3185": 6.06282, "3190": 5.99221, "3195": 5.8809, "3200": 6.06466, "3205": 5.9382, "3210": 5.98764, "3215": 5.97029, "3220": 5.89104, "3225": 5.79917, "3230": 6.12085, "3235": 5.8384, "3240": 6.12442, "3245": 6.14385, "3250": 5.97425, "3255": 6.0545, "3260": 5.94933, "3265": 6.07255, "3270": 6.06938, "3275": 6.07655, "3280": 6.0017, "3285": 5.8481, "3290": 6.07231, "3295": 5.77201, "3300": 5.9592, "3305": 5.98806, "3310": 5.84933, "3315": 5.45612, "3320": 5.9375, "3325": 5.86696, "3330": 5.9198, "3335": 6.04057, "3340": 6.01008, "3345": 5.96425, "3350": 5.88267, "3355": 5.92749, "3360": 5.78791, "3365": 6.28219, "3370": 6.08834, "3375": 5.85299, "3380": 6.17452, "3385": 5.93284, "3390": 5.88271, "3395": 5.99314, "3400": 6.03594, "3405": 5.98793, "3410": 6.2627, "3415": 5.96437, "3420": 5.76598, "3425": 6.257, "3430": 5.9041, "3435": 5.93149, "3440": 5.78047, "3445": 5.95834, "3450": 6.04761, "3455": 5.89934, "3460": 5.95712, "3465": 6.02859, "3470": 5.87991, "3475": 5.97779, "3480": 5.83089, "3485": 5.92359, "3490": 5.9974, "3495": 6.074, "3500": 6.00408, "3505": 6.12925, "3510": 6.03277, "3515": 6.05753, "3520": 6.02408, "3525": 6.07911, "3530": 5.91587, "3535": 6.10122, "3540": 6.01707, "3545": 6.23434, "3550": 6.02742, "3555": 6.20687, "3560": 6.03194, "3565": 6.01322, "3570": 6.01305, "3575": 5.75022, "3580": 6.11734, "3585": 5.8682, "3590": 5.97368, "3595": 5.85319, "3600": 5.66878, "3605": 6.05195, "3610": 6.13052, "3615": 5.94939, "3620": 5.97577, "3625": 6.17555, "3630": 5.85973, "3635": 6.06535, "3640": 6.22585, "3645": 5.82602, "3650": 6.05082, "3655": 5.85874, "3660": 5.98228, "3665": 5.82464, "3670": 6.1001, "3675": 5.95103, "3680": 5.99762, "3685": 5.89004, "3690": 5.91176, "3695": 6.05488, "3700": 5.9664, "3705": 6.02399, "3710": 5.93288, "3715": 5.78374, "3720": 6.0928, "3725": 6.20757, "3730": 5.97971, "3735": 5.90662, "3740": 5.87716, "3745": 5.99158, "3750": 6.1208, "3755": 5.81412, "3760": 6.07629, "3765": 5.72966, "3770": 6.11711, "3775": 5.92014, "3780": 6.22326, "3785": 6.01246, "3790": 5.90445, "3795": 6.38234, "3800": 5.71407, "3805": 6.04038, "3810": 5.99639, "3815": 5.97388, "3820": 5.91052, "3825": 6.01581, "3830": 5.69454, "3835": 5.82222, "3840": 5.92453, "3845": 5.86473, "3850": 5.75107, "3855": 6.0997, "3860": 6.0792, "3865": 5.80667, "3870": 5.8698, "3875": 5.91752, "3880": 6.05322, "3885": 6.07776, "3890": 5.93372, "3895": 6.02571, "3900": 5.93583, "3905": 5.77554, "3910": 5.98079, "3915": 5.93836, "3920": 6.0384, "3925": 6.05052, "3930": 5.82468, "3935": 6.00309, "3940": 5.97644, "3945": 5.80364, "3950": 5.71242, "3955": 5.64709, "3960": 5.6928, "3965": 5.8392, "3970": 5.95118, "3975": 6.03905, "3980": 6.01592, "3985": 5.88058, "3990": 5.89601, "3995": 6.06335, "4000": 5.92695, "4005": 5.83767, "4010": 5.90926, "4015": 6.05247, "4020": 5.72979, "4025": 5.9811, "4030": 5.85282, "4035": 5.61899, "4040": 6.1771, "4045": 5.99571, "4050": 5.91783, "4055": 6.17885, "4060": 5.84221, "4065": 5.8833, "4070": 5.96066, "4075": 5.86633, "4080": 5.83322, "4085": 6.04471, "4090": 6.10456, "4095": 5.74723, "4100": 5.75289, "4105": 6.14218, "4110": 5.64252, "4115": 6.01125, "4120": 5.9786, "4125": 5.84296, "4130": 5.94813, "4135": 6.13013, "4140": 5.88369, "4145": 6.04204, "4150": 5.89849, "4155": 5.90384, "4160": 5.70369, "4165": 5.86758, "4170": 5.9657, "4175": 5.57474, "4180": 6.11062, "4185": 5.98628, "4190": 5.84044, "4195": 6.09738, "4200": 5.57131, "4205": 5.80155, "4210": 6.02393, "4215": 5.88891, "4220": 5.82996, "4225": 5.7451, "4230": 5.8151, "4235": 5.78634, "4240": 5.8465, "4245": 5.84784, "4250": 6.11465, "4255": 6.01487, "4260": 5.89099, "4265": 6.08233, "4270": 6.03876, "4275": 5.78635, "4280": 5.96225, "4285": 5.44958, "4290": 5.85094, "4295": 5.75537, "4300": 6.0828, "4305": 5.79049, "4310": 5.88714, "4315": 6.00792, "4320": 5.61666, "4325": 5.91629, "4330": 5.87369, "4335": 6.09393, "4340": 5.64895, "4345": 5.98922, "4350": 6.18923, "4355": 6.0726, "4360": 5.94713, "4365": 5.9624, "4370": 6.12777, "4375": 5.81442, "4380": 6.2151, "4385": 5.95352, "4390": 5.90547, "4395": 5.85397, "4400": 5.96785, "4405": 6.06758, "4410": 5.69651, "4415": 5.68279, "4420": 5.97437, "4425": 5.99099, "4430": 5.73795, "4435": 5.6077, "4440": 5.94604, "4445": 5.98559, "4450": 5.98021, "4455": 5.98509, "4460": 5.91287, "4465": 5.75576, "4470": 6.01444, "4475": 5.97226, "4480": 5.8727, "4485": 5.83316, "4490": 5.72116, "4495": 6.08876, "4500": 5.99656, "4505": 6.06873, "4510": 5.7272, "4515": 6.04657, "4520": 6.02537, "4525": 5.66054, "4530": 5.84621, "4535": 5.78212, "4540": 5.94376, "4545": 5.86302, "4550": 6.06617, "4555": 5.7309, "4560": 5.987, "4565": 5.96833, "4570": 5.8168, "4575": 6.02794, "4580": 5.7049, "4585": 5.98809, "4590": 5.97186, "4595": 5.7722, "4600": 5.96165, "4605": 5.89799, "4610": 5.82583, "4615": 5.9817, "4620": 6.10493, "4625": 5.71283, "4630": 5.83466, "4635": 5.85632, "4640": 5.76916, "4645": 5.82717, "4650": 5.78336, "4655": 5.95854, "4660": 6.04608, "4665": 5.75261, "4670": 6.06464, "4675": 6.22357, "4680": 5.9045, "4685": 5.673, "4690": 5.86325, "4695": 5.70763, "4700": 5.98168, "4705": 5.85174, "4710": 5.96109, "4715": 6.11571, "4720": 5.99887, "4725": 5.93275, "4730": 6.04045, "4735": 5.80809, "4740": 5.94978, "4745": 5.91324, "4750": 5.85717, "4755": 5.65827, "4760": 5.92101, "4765": 5.93362, "4770": 6.09067, "4775": 5.87135, "4780": 6.08819, "4785": 5.8731, "4790": 5.94972, "4795": 5.84614, "4800": 5.64225, "4805": 5.70128, "4810": 5.89968, "4815": 5.9997, "4820": 5.49148, "4825": 5.98043, "4830": 5.94247, "4835": 5.81849, "4840": 6.12999, "4845": 6.01971, "4850": 5.91519, "4855": 6.16703, "4860": 6.06059, "4865": 5.81255, "4870": 5.94657, "4875": 5.96162, "4880": 5.8084, "4885": 6.05684, "4890": 5.7932, "4895": 5.95185, "4900": 5.96264, "4905": 5.88461, "4910": 5.77821, "4915": 5.92909, "4920": 5.9975, "4925": 6.12044, "4930": 5.94594, "4935": 6.02831, "4940": 5.86724, "4945": 5.86159, "4950": 5.78799, "4955": 5.98601, "4960": 5.66078, "4965": 6.06929, "4970": 5.8495, "4975": 6.06051, "4980": 6.09698, "4985": 5.59213, "4990": 5.81326, "4995": 5.91457, "5000": 6.07364, "5005": 5.94825, "5010": 5.95511, "5015": 5.83173, "5020": 6.07714, "5025": 5.91783, "5030": 6.08646, "5035": 5.86503, "5040": 5.89568, "5045": 6.08986, "5050": 5.78338, "5055": 5.95993, "5060": 6.13316, "5065": 5.74849, "5070": 5.8501, "5075": 5.81988, "5080": 5.89304, "5085": 5.87833, "5090": 5.88168, "5095": 6.03774, "5100": 5.67416, "5105": 5.82695, "5110": 5.87161, "5115": 5.91142, "5120": 5.89405, "5125": 6.02768, "5130": 6.11376, "5135": 5.95289, "5140": 5.84183, "5145": 5.67579, "5150": 5.89657, "5155": 5.82823, "5160": 5.93903, "5165": 5.84558, "5170": 6.02827, "5175": 6.09318, "5180": 6.08378, "5185": 6.10424, "5190": 6.16258, "5195": 5.99145, "5200": 6.05603, "5205": 5.86883, "5210": 5.81461, "5215": 5.96669, "5220": 6.03105, "5225": 5.66095, "5230": 6.12338, "5235": 5.72441, "5240": 5.92626, "5245": 6.08622, "5250": 5.75723, "5255": 6.03679, "5260": 5.72606, "5265": 5.90968, "5270": 5.84854, "5275": 5.68685, "5280": 5.75602, "5285": 6.06916, "5290": 6.02692, "5295": 5.94059, "5300": 5.95885, "5305": 6.00005, "5310": 5.67134, "5315": 5.748, "5320": 5.63422, "5325": 5.87397, "5330": 5.66023, "5335": 5.8339, "5340": 5.73971, "5345": 5.78845, "5350": 5.9025, "5355": 5.92206, "5360": 6.07336, "5365": 5.77985, "5370": 5.85376, "5375": 5.6049, "5380": 5.91661, "5385": 6.12394, "5390": 5.82189, "5395": 5.87105, "5400": 5.95236, "5405": 5.68995, "5410": 5.46309, "5415": 6.06397, "5420": 5.9973, "5425": 5.90584, "5430": 5.67683, "5435": 5.977, "5440": 5.87155, "5445": 5.80642, "5450": 6.23539, "5455": 6.06937, "5460": 5.96546, "5465": 5.7541, "5470": 6.10511, "5475": 5.91961, "5480": 5.81743, "5485": 5.80802, "5490": 6.05039, "5495": 5.82617, "5500": 5.93446, "5505": 5.74063, "5510": 5.89965, "5515": 6.04098, "5520": 6.0738, "5525": 5.73201, "5530": 5.97738, "5535": 5.64904, "5540": 5.67747, "5545": 5.69992, "5550": 5.92799, "5555": 5.85669, "5560": 5.83134, "5565": 5.56101, "5570": 5.92158, "5575": 6.0932, "5580": 5.95349, "5585": 6.11276, "5590": 5.98373, "5595": 6.11635, "5600": 6.00274, "5605": 6.05319, "5610": 5.97314, "5615": 5.71298, "5620": 6.24565, "5625": 6.06485, "5630": 6.1334, "5635": 5.88619, "5640": 5.75622, "5645": 6.13513, "5650": 5.876, "5655": 5.69305, "5660": 6.13191, "5665": 5.81365, "5670": 5.73347, "5675": 5.924, "5680": 5.84027, "5685": 5.84329, "5690": 5.75277, "5695": 5.84447, "5700": 5.83425, "5705": 5.87433, "5710": 6.12193, "5715": 5.80536, "5720": 5.68856, "5725": 5.85027, "5730": 5.63776, "5735": 5.81068, "5740": 5.67376, "5745": 5.98577, "5750": 6.02996, "5755": 5.86208, "5760": 6.25156, "5765": 5.71894, "5770": 5.82406, "5775": 5.71575, "5780": 6.12775, "5785": 6.09879, "5790": 6.08178, "5795": 5.93148, "5800": 6.08462, "5805": 5.65564, "5810": 5.55791, "5815": 5.93041, "5820": 5.68461, "5825": 5.84204, "5830": 6.03798, "5835": 6.05667, "5840": 5.86697, "5845": 5.83712, "5850": 5.88625, "5855": 5.96867, "5860": 6.00277, "5865": 5.98086, "5870": 5.63804, "5875": 5.84283, "5880": 5.96442, "5885": 5.97583, "5890": 5.99021, "5895": 5.64368, "5900": 6.02987, "5905": 6.089, "5910": 5.78788, "5915": 5.95501, "5920": 5.71454, "5925": 5.81768, "5930": 5.78305, "5935": 5.9069, "5940": 5.85167, "5945": 5.86709, "5950": 5.90808, "5955": 5.81667, "5960": 5.90225, "5965": 5.79852, "5970": 5.91004, "5975": 5.77538, "5980": 5.99056, "5985": 5.7286, "5990": 5.8351, "5995": 5.97124, "6000": 5.92089, "6005": 6.03179, "6010": 5.88793, "6015": 5.88359, "6020": 6.07171, "6025": 5.89094, "6030": 5.94989, "6035": 5.82931, "6040": 5.82518, "6045": 6.00943, "6050": 5.80604, "6055": 5.95161, "6060": 5.8181, "6065": 6.00115, "6070": 5.75933, "6075": 5.59026, "6080": 5.78149, "6085": 5.87118, "6090": 6.03671, "6095": 5.97082, "6100": 5.57972, "6105": 5.72684, "6110": 6.0764, "6115": 6.0097, "6120": 6.03518, "6125": 5.69707, "6130": 5.95461, "6135": 5.54152, "6140": 5.87637, "6145": 5.75229, "6150": 5.8015, "6155": 5.85735, "6160": 5.78699, "6165": 5.97731, "6170": 5.99949, "6175": 5.98108, "6180": 5.94716, "6185": 5.59701, "6190": 5.87248, "6195": 5.89317, "6200": 5.69626, "6205": 5.45686, "6210": 5.7563, "6215": 5.63617, "6220": 5.88549, "6225": 5.88844, "6230": 5.69624, "6235": 5.86791, "6240": 5.82125, "6245": 5.92873, "6250": 5.96425, "6255": 5.7482, "6260": 5.91595, "6265": 5.79466, "6270": 5.93485, "6275": 5.95373, "6280": 5.7759, "6285": 5.60686, "6290": 5.84971, "6295": 5.71141, "6300": 5.74117, "6305": 5.92574, "6310": 5.61886, "6315": 5.60544, "6320": 5.86826, "6325": 5.93603, "6330": 5.84852, "6335": 5.91784, "6340": 6.0349, "6345": 5.87701, "6350": 5.83306, "6355": 5.81394, "6360": 5.72538, "6365": 6.01883, "6370": 5.77839, "6375": 5.63804, "6380": 5.91633, "6385": 5.82156, "6390": 5.69607, "6395": 6.04616, "6400": 5.77809, "6405": 5.97584, "6410": 5.80263, "6415": 5.97836, "6420": 5.87948, "6425": 5.98962, "6430": 5.79481, "6435": 5.58722, "6440": 5.90316, "6445": 5.9463, "6450": 5.97537, "6455": 5.91002, "6460": 5.972, "6465": 5.75623, "6470": 5.7302, "6475": 5.79067, "6480": 6.09147, "6485": 5.85549, "6490": 5.7152, "6495": 5.79304, "6500": 6.00552, "6505": 5.77009, "6510": 5.78143, "6515": 5.90694, "6520": 6.09193, "6525": 5.81564, "6530": 5.98241, "6535": 5.79532, "6540": 6.03136, "6545": 5.86131, "6550": 5.94544, "6555": 5.66157, "6560": 5.90597, "6565": 5.87107, "6570": 5.8972, "6575": 5.9105, "6580": 5.83622, "6585": 6.03871, "6590": 5.71132, "6595": 6.00204, "6600": 5.90645, "6605": 6.09848, "6610": 6.04391, "6615": 5.71854, "6620": 5.75878, "6625": 5.9494, "6630": 5.70529, "6635": 5.85265, "6640": 5.72095, "6645": 5.66608, "6650": 5.79589, "6655": 6.06189, "6660": 5.82669, "6665": 5.89547, "6670": 6.07109, "6675": 5.74193, "6680": 5.93431, "6685": 5.89928, "6690": 5.79495, "6695": 5.91007, "6700": 5.65257, "6705": 6.01783, "6710": 5.71358, "6715": 5.82411, "6720": 5.98228, "6725": 5.98447, "6730": 5.54247, "6735": 5.82771, "6740": 5.94344, "6745": 5.72387, "6750": 5.65983, "6755": 5.68804, "6760": 5.8599, "6765": 5.94824, "6770": 5.89592, "6775": 5.71643, "6780": 5.75446, "6785": 5.89789, "6790": 5.60939, "6795": 5.84857, "6800": 5.66142, "6805": 5.72656, "6810": 5.83857, "6815": 5.9619, "6820": 5.58571, "6825": 6.01492, "6830": 6.04983, "6835": 5.84127, "6840": 6.04584, "6845": 5.79189, "6850": 5.62788, "6855": 5.83524, "6860": 6.03239, "6865": 5.67083, "6870": 5.79229, "6875": 5.85901, "6880": 6.10503, "6885": 5.96496, "6890": 5.91272, "6895": 5.77253, "6900": 5.90441, "6905": 6.00789, "6910": 5.78713, "6915": 5.90119, "6920": 6.10898, "6925": 5.82724, "6930": 5.98957, "6935": 5.94344, "6940": 5.82049, "6945": 5.63854, "6950": 5.91236, "6955": 5.63333, "6960": 5.81133, "6965": 5.69925, "6970": 5.85376, "6975": 5.85359, "6980": 5.89083, "6985": 5.91882, "6990": 5.73713, "6995": 5.89793, "7000": 5.82103, "7005": 5.90244, "7010": 5.72638, "7015": 5.95834, "7020": 5.64784, "7025": 5.8768, "7030": 5.90527, "7035": 5.91034, "7040": 5.82568, "7045": 5.90077, "7050": 5.82944, "7055": 6.15177, "7060": 5.72798, "7065": 5.52932, "7070": 5.87274, "7075": 5.82944, "7080": 5.81256, "7085": 6.05009, "7090": 5.91977, "7095": 5.70034, "7100": 5.87272, "7105": 5.82099, "7110": 5.79679, "7115": 5.78455, "7120": 5.80729, "7125": 5.78187, "7130": 5.90432, "7135": 5.65231, "7140": 5.95303, "7145": 5.74481, "7150": 5.84274, "7155": 5.70761, "7160": 5.54263, "7165": 5.78246, "7170": 5.92449, "7175": 5.75226, "7180": 5.83466, "7185": 6.01277, "7190": 5.7504, "7195": 6.00854, "7200": 5.36623, "7205": 5.82657, "7210": 5.75722, "7215": 5.70763, "7220": 5.98623, "7225": 5.87521, "7230": 5.83534, "7235": 5.78719, "7240": 5.81129, "7245": 5.75627, "7250": 5.94062, "7255": 5.74688, "7260": 5.90828, "7265": 5.80926, "7270": 5.65987, "7275": 5.61302, "7280": 5.68157, "7285": 6.09783, "7290": 5.78166, "7295": 5.82733, "7300": 6.03172, "7305": 5.80954, "7310": 5.75934, "7315": 5.67646, "7320": 5.82747, "7325": 5.94145, "7330": 5.89161, "7335": 5.81415, "7340": 6.12372, "7345": 5.89585, "7350": 5.89517, "7355": 5.76747, "7360": 5.86277, "7365": 5.97572, "7370": 5.92695, "7375": 5.9998, "7380": 5.62649, "7385": 5.60713, "7390": 5.37832, "7395": 5.84424, "7400": 6.0214, "7405": 5.6846, "7410": 5.53331, "7415": 5.74369, "7420": 5.74541, "7425": 5.71479, "7430": 5.66435, "7435": 5.90399, "7440": 5.78136, "7445": 5.99807, "7450": 5.6349, "7455": 5.73046, "7460": 5.92914, "7465": 5.66417, "7470": 5.90493, "7475": 5.893, "7480": 6.11112, "7485": 6.01242, "7490": 5.78887, "7495": 5.91944, "7500": 5.7222, "7505": 5.46517, "7510": 5.63186, "7515": 5.80544, "7520": 5.56263, "7525": 6.17304, "7530": 5.62589, "7535": 5.87513, "7540": 5.64947, "7545": 5.82008, "7550": 6.10426, "7555": 5.61303, "7560": 5.50947, "7565": 5.72512, "7570": 5.87354, "7575": 5.82948, "7580": 5.85046, "7585": 5.82411, "7590": 5.72754, "7595": 5.91624, "7600": 5.95791, "7605": 5.7625, "7610": 5.86581, "7615": 5.7302, "7620": 5.93153, "7625": 5.5793, "7630": 5.95246, "7635": 5.74829, "7640": 5.63638, "7645": 5.8871, "7650": 5.90615, "7655": 5.87861, "7660": 5.94713, "7665": 5.70738, "7670": 5.96559, "7675": 5.74314, "7680": 5.78888, "7685": 5.58587, "7690": 6.02031, "7695": 5.72888, "7700": 5.88408, "7705": 5.83287, "7710": 5.95595, "7715": 5.99481, "7720": 5.86884, "7725": 5.8895, "7730": 5.81375, "7735": 5.93829, "7740": 5.83309, "7745": 5.77619, "7750": 5.83994, "7755": 5.97219, "7760": 6.1147, "7765": 5.59473, "7770": 5.76657, "7775": 5.74115, "7780": 5.85874, "7785": 5.6001, "7790": 5.884, "7795": 5.94862, "7800": 5.70014, "7805": 5.81943, "7810": 5.41855, "7815": 5.92261, "7820": 5.99822, "7825": 5.73806, "7830": 5.67513, "7835": 5.61757, "7840": 5.78784, "7845": 5.94562, "7850": 5.77884, "7855": 5.80756, "7860": 5.54746, "7865": 5.25609, "7870": 5.94755, "7875": 5.6905, "7880": 5.79617, "7885": 5.68094, "7890": 5.7579, "7895": 5.58516, "7900": 5.95009, "7905": 5.7974, "7910": 5.83403, "7915": 5.62173, "7920": 5.65485, "7925": 5.67705, "7930": 5.90316, "7935": 5.89119, "7940": 5.64913, "7945": 5.76924, "7950": 6.01473, "7955": 5.71644, "7960": 5.77164, "7965": 5.72695, "7970": 5.74712, "7975": 5.90801, "7980": 5.44652, "7985": 5.91901, "7990": 5.69615, "7995": 5.6298, "8000": 5.72284, "8005": 5.86332, "8010": 5.48412, "8015": 5.8511, "8020": 5.80825, "8025": 5.40491, "8030": 5.74312, "8035": 5.64714, "8040": 5.72874, "8045": 5.56588, "8050": 5.79171, "8055": 5.92568, "8060": 5.69134, "8065": 5.86486, "8070": 5.82576, "8075": 5.84657, "8080": 5.64556, "8085": 5.76015, "8090": 5.69718, "8095": 5.57212, "8100": 5.82403, "8105": 5.71188, "8110": 5.59603, "8115": 5.60957, "8120": 5.80626, "8125": 5.516, "8130": 5.93733, "8135": 5.6502, "8140": 5.8428, "8145": 5.80404, "8150": 6.05433, "8155": 5.9796, "8160": 5.73751, "8165": 5.66863, "8170": 5.83506, "8175": 6.02733, "8180": 5.68482, "8185": 5.63858, "8190": 5.71248, "8195": 5.62369, "8200": 5.75792, "8205": 5.5708, "8210": 5.75834, "8215": 5.54481, "8220": 5.67746, "8225": 5.73509, "8230": 5.90221, "8235": 5.63434, "8240": 5.60042, "8245": 5.55364, "8250": 5.87727, "8255": 5.77999, "8260": 5.7375, "8265": 5.69489, "8270": 5.80426, "8275": 5.6306, "8280": 5.81055, "8285": 5.53062, "8290": 5.67358, "8295": 5.81196, "8300": 5.73593, "8305": 5.69522, "8310": 5.84368, "8315": 5.48173, "8320": 5.73258, "8325": 5.9263, "8330": 5.80573, "8335": 5.83229, "8340": 5.67704, "8345": 5.79635, "8350": 5.50639, "8355": 5.82299, "8360": 5.77546, "8365": 5.66248, "8370": 5.76869, "8375": 5.69422, "8380": 5.85131, "8385": 5.78815, "8390": 5.60328, "8395": 5.67423, "8400": 5.81522, "8405": 5.68575, "8410": 5.71193, "8415": 5.84812, "8420": 5.82082, "8425": 5.81672, "8430": 5.43725, "8435": 5.49064, "8440": 5.73602, "8445": 5.40615, "8450": 5.83461, "8455": 5.6964, "8460": 5.71148, "8465": 5.41237, "8470": 5.91572, "8475": 5.74038, "8480": 5.44003, "8485": 5.86751, "8490": 5.75691, "8495": 5.50735, "8500": 5.85486, "8505": 5.46488, "8510": 5.94808, "8515": 5.609, "8520": 5.76642, "8525": 5.38807, "8530": 5.65796, "8535": 5.92774, "8540": 5.55606, "8545": 5.79748, "8550": 5.61649, "8555": 5.75795, "8560": 5.81342, "8565": 5.83405, "8570": 5.38373, "8575": 5.71012, "8580": 5.66105, "8585": 5.4942, "8590": 5.68779, "8595": 5.56707, "8600": 5.82228, "8605": 5.74586, "8610": 5.70183, "8615": 5.86348, "8620": 5.45154, "8625": 5.6085, "8630": 5.79859, "8635": 5.49512, "8640": 5.64539, "8645": 5.745, "8650": 5.70007, "8655": 5.59801, "8660": 5.6, "8665": 5.74373, "8670": 5.40236, "8675": 5.54804, "8680": 5.79269, "8685": 5.81167, "8690": 5.69646, "8695": 5.84499, "8700": 5.66007, "8705": 5.70066, "8710": 5.70913, "8715": 5.60484, "8720": 5.64372, "8725": 5.75667, "8730": 5.7823, "8735": 5.84955, "8740": 5.74703, "8745": 5.61787, "8750": 5.94007, "8755": 5.61608, "8760": 5.46402, "8765": 5.59276, "8770": 6.02898, "8775": 5.51498, "8780": 5.9845, "8785": 5.70372, "8790": 5.82905, "8795": 5.81061, "8800": 5.71435, "8805": 5.83535, "8810": 5.80174, "8815": 5.61371, "8820": 5.66594, "8825": 5.46885, "8830": 5.75579, "8835": 5.74373, "8840": 5.49764, "8845": 5.60756, "8850": 5.73135, "8855": 5.44014, "8860": 5.51977, "8865": 5.62161, "8870": 5.54434, "8875": 5.71215, "8880": 5.63476, "8885": 5.69293, "8890": 5.7104, "8895": 5.62832, "8900": 5.43094, "8905": 5.70281, "8910": 5.70314, "8915": 5.91589, "8920": 5.16861, "8925": 5.67876, "8930": 5.6559, "8935": 5.07527, "8940": 5.86749, "8945": 5.78208, "8950": 5.65786, "8955": 5.74818, "8960": 5.25266, "8965": 5.9926, "8970": 5.60364, "8975": 5.39638, "8980": 5.721, "8985": 5.67142, "8990": 5.8837, "8995": 5.45353, "9000": 5.50147, "9005": 5.49486, "9010": 5.75017, "9015": 5.63327, "9020": 5.75668, "9025": 5.67242, "9030": 5.4956, "9035": 5.80081, "9040": 5.90551, "9045": 5.70986, "9050": 5.76715, "9055": 5.72592, "9060": 5.73708, "9065": 5.5525, "9070": 5.65479, "9075": 5.66817, "9080": 5.65021, "9085": 5.34984, "9090": 5.66204, "9095": 5.40914, "9100": 5.55722, "9105": 5.76811, "9110": 5.78093, "9115": 5.59328, "9120": 5.66293, "9125": 5.57456, "9130": 5.5401, "9135": 5.73938, "9140": 5.86389, "9145": 5.66871, "9150": 5.82171, "9155": 5.56699, "9160": 5.41264, "9165": 5.57659, "9170": 5.64255, "9175": 5.73597, "9180": 5.43397, "9185": 5.63406, "9190": 5.6363, "9195": 5.6396, "9200": 5.58592, "9205": 5.75986, "9210": 5.72434, "9215": 5.68457, "9220": 5.48039, "9225": 5.67313, "9230": 5.86744, "9235": 5.65768, "9240": 5.51386, "9245": 5.79919, "9250": 5.73313, "9255": 5.55845, "9260": 5.35994, "9265": 5.75772, "9270": 5.66636, "9275": 5.53637, "9280": 5.36932, "9285": 5.79772, "9290": 5.61365, "9295": 5.90657, "9300": 5.70808, "9305": 5.7012, "9310": 5.33009, "9315": 5.62003, "9320": 5.58207, "9325": 5.49885, "9330": 5.54335, "9335": 5.82413, "9340": 5.50917, "9345": 5.77417, "9350": 5.48905, "9355": 5.42847, "9360": 5.51044, "9365": 5.29568, "9370": 5.35347, "9375": 5.65943, "9380": 5.59457, "9385": 5.55737, "9390": 5.63691, "9395": 5.57395, "9400": 5.58915, "9405": 5.29538, "9410": 5.4566, "9415": 5.48807, "9420": 5.56795, "9425": 5.75716, "9430": 5.37538, "9435": 5.09477, "9440": 5.59755, "9445": 5.63519, "9450": 5.5206, "9455": 5.44631, "9460": 5.62906, "9465": 5.48316, "9470": 5.68383, "9475": 5.6122, "9480": 5.33545, "9485": 5.65824, "9490": 5.72671, "9495": 5.63357, "9500": 5.63213, "9505": 5.84218, "9510": 5.61837, "9515": 5.42445, "9520": 5.50514, "9525": 5.8456, "9530": 5.70762, "9535": 5.51983, "9540": 5.42291, "9545": 5.48033, "9550": 5.41643, "9555": 5.70902, "9560": 5.55505, "9565": 5.46654, "9570": 5.54315, "9575": 5.64194, "9580": 5.48847, "9585": 5.33709, "9590": 5.60695, "9595": 5.4822, "9600": 5.58624, "9605": 5.37234, "9610": 5.31541, "9615": 5.09623, "9620": 5.54282, "9625": 5.49912, "9630": 5.71081, "9635": 5.52619, "9640": 5.62244, "9645": 5.41501, "9650": 5.75509, "9655": 5.62307, "9660": 5.3647, "9665": 5.30849, "9670": 5.72415, "9675": 5.23017, "9680": 5.5966, "9685": 5.77998, "9690": 5.51237, "9695": 5.48578, "9700": 5.37464, "9705": 5.61373, "9710": 5.68007, "9715": 5.54804, "9720": 5.65807, "9725": 5.3914, "9730": 5.46214, "9735": 5.40781, "9740": 5.72982, "9745": 5.48765, "9750": 5.33607, "9755": 5.42646, "9760": 5.58826, "9765": 5.62524, "9770": 5.19389, "9775": 5.58636, "9780": 5.57133, "9785": 5.33364, "9790": 5.45077, "9795": 5.58703, "9800": 5.50592, "9805": 5.47202, "9810": 5.23286, "9815": 5.49554, "9820": 5.43161, "9825": 5.60899, "9830": 5.36394, "9835": 5.50149, "9840": 5.40274, "9845": 5.74392, "9850": 5.35515, "9855": 5.6459, "9860": 5.44274, "9865": 5.31234, "9870": 5.57249, "9875": 5.5532, "9880": 5.00493, "9885": 5.5462, "9890": 5.65539, "9895": 5.37119, "9900": 5.51365, "9905": 5.33651, "9910": 5.50684, "9915": 5.24271, "9920": 5.66817, "9925": 5.54969, "9930": 5.4118, "9935": 5.54992, "9940": 5.37945, "9945": 5.50179, "9950": 5.5705, "9955": 5.28958, "9960": 5.69402, "9965": 5.57838, "9970": 5.49323, "9975": 5.50238, "9980": 5.62863, "9985": 5.5802, "9990": 5.76479, "9995": 5.37461, "10000": 5.37217, "10005": 5.36176, "10010": 5.54772, "10015": 5.73754, "10020": 5.7272, "10025": 5.60862, "10030": 5.0291, "10035": 5.60087, "10040": 5.441, "10045": 5.77326, "10050": 5.69889, "10055": 5.69791, "10060": 5.36611, "10065": 5.4376, "10070": 5.39917, "10075": 5.34005, "10080": 5.51841, "10085": 5.34771, "10090": 5.6845, "10095": 5.46646, "10100": 5.39041, "10105": 5.40078, "10110": 5.65632, "10115": 5.60608, "10120": 5.02873, "10125": 5.30725, "10130": 5.37288, "10135": 5.5169, "10140": 5.61937, "10145": 5.3742, "10150": 5.34816, "10155": 5.47273, "10160": 5.38916, "10165": 5.58045, "10170": 5.45593, "10175": 5.42438, "10180": 5.2709, "10185": 5.39061, "10190": 5.23709, "10195": 5.39072, "10200": 5.34009, "10205": 5.37003, "10210": 5.57952, "10215": 5.35967, "10220": 5.59501, "10225": 5.41379, "10230": 5.4957, "10235": 5.42919, "10240": 5.52993, "10245": 5.46694, "10250": 5.13013, "10255": 5.08857, "10260": 5.48043, "10265": 5.50391, "10270": 5.4464, "10275": 5.23887, "10280": 5.38853, "10285": 5.2312, "10290": 5.21898, "10295": 5.53407, "10300": 5.41463, "10305": 5.33047, "10310": 5.43449, "10315": 5.21006, "10320": 5.14259, "10325": 5.3647, "10330": 5.41698, "10335": 5.35589, "10340": 5.35679, "10345": 5.26156, "10350": 5.58519, "10355": 5.2316, "10360": 5.39321, "10365": 5.20598, "10370": 5.48835, "10375": 5.55115, "10380": 5.52903, "10385": 5.61277, "10390": 5.38487, "10395": 5.46827, "10400": 5.44774, "10405": 5.4081, "10410": 5.70945, "10415": 5.35008, "10420": 5.4518, "10425": 5.55035, "10430": 5.34628, "10435": 5.25202, "10440": 5.27857, "10445": 5.39387, "10450": 5.59736, "10455": 5.3822, "10460": 5.61593, "10465": 5.52211, "10470": 5.2085, "10475": 5.39076, "10480": 5.43431, "10485": 5.37379, "10490": 5.11247, "10495": 5.17833, "10500": 5.44333, "10505": 5.61455, "10510": 5.37368, "10515": 5.4714, "10520": 5.43458, "10525": 5.3523, "10530": 5.48889, "10535": 5.54636, "10540": 5.33585, "10545": 5.60668, "10550": 5.22712, "10555": 5.57587, "10560": 5.4946, "10565": 5.0012, "10570": 5.4399, "10575": 5.3784, "10580": 5.43564, "10585": 5.4542, "10590": 5.57402, "10595": 5.34212, "10600": 5.28003, "10605": 5.31644, "10610": 5.41609, "10615": 5.35182, "10620": 5.25325, "10625": 5.66255, "10630": 5.35798, "10635": 5.43567, "10640": 5.20932, "10645": 5.35234, "10650": 5.52953, "10655": 5.43858, "10660": 5.45034, "10665": 5.39874, "10670": 5.356, "10675": 5.39505, "10680": 5.26005, "10685": 5.30895, "10690": 5.67238, "10695": 5.3382, "10700": 5.61352, "10705": 5.36639, "10710": 5.2546, "10715": 4.7648, "10720": 5.36798, "10725": 5.08581, "10730": 5.38774, "10735": 5.2315, "10740": 5.50009, "10745": 5.31682, "10750": 4.95337, "10755": 5.46252, "10760": 5.3463, "10765": 5.39233, "10770": 5.26877, "10775": 5.36279, "10780": 5.40003, "10785": 5.13531, "10790": 5.09205, "10795": 5.41714, "10800": 5.24182, "10805": 5.37759, "10810": 5.1841, "10815": 5.20587, "10820": 5.45114, "10825": 5.54565, "10830": 5.24614, "10835": 5.36473, "10840": 5.19786, "10845": 5.18242, "10850": 5.5072, "10855": 5.30986, "10860": 5.40324, "10865": 5.52718, "10870": 5.68078, "10875": 5.53167, "10880": 5.12654, "10885": 5.38662, "10890": 5.39428, "10895": 5.24526, "10900": 5.30257, "10905": 5.38606, "10910": 5.46742, "10915": 5.36187, "10920": 5.4652, "10925": 5.31216, "10930": 5.27235, "10935": 5.52601, "10940": 5.4506, "10945": 5.45866, "10950": 5.04867, "10955": 5.18984, "10960": 5.27339, "10965": 5.26806, "10970": 5.1956, "10975": 5.28983, "10980": 4.98698, "10985": 5.25205, "10990": 5.28065, "10995": 5.32792, "11000": 5.06033, "11005": 5.24549, "11010": 5.37188, "11015": 5.19974, "11020": 5.24522, "11025": 5.08564, "11030": 5.3484, "11035": 5.22237, "11040": 5.34708, "11045": 5.07465, "11050": 5.27395, "11055": 5.5143, "11060": 5.3757, "11065": 5.27853, "11070": 5.26735, "11075": 5.35381, "11080": 5.13287, "11085": 5.22699, "11090": 5.28493, "11095": 5.19245, "11100": 5.34048, "11105": 5.28978, "11110": 5.24824, "11115": 5.31027, "11120": 4.82308, "11125": 5.3662, "11130": 5.54704, "11135": 5.19661, "11140": 5.17157, "11145": 5.3315, "11150": 5.59279, "11155": 5.27752, "11160": 5.03615, "11165": 5.30773, "11170": 5.28313, "11175": 5.21376, "11180": 5.01974, "11185": 5.11332, "11190": 5.4275, "11195": 5.08001, "11200": 5.05585, "11205": 5.27463, "11210": 5.24983, "11215": 5.51707, "11220": 5.10348, "11225": 5.34594, "11230": 5.19582, "11235": 5.23772, "11240": 5.1086, "11245": 5.47357, "11250": 5.38475, "11255": 5.17473, "11260": 5.3059, "11265": 4.97653, "11270": 5.26496, "11275": 5.265, "11280": 4.87211, "11285": 5.3036, "11290": 5.06537, "11295": 5.1824, "11300": 5.01714, "11305": 5.4564, "11310": 5.35135, "11315": 5.30915, "11320": 5.43378, "11325": 5.27055, "11330": 5.24777, "11335": 5.2721, "11340": 5.46113, "11345": 5.04206, "11350": 5.37593, "11355": 5.41239, "11360": 5.60221, "11365": 5.38167, "11370": 5.4196, "11375": 5.30141, "11380": 5.37361, "11385": 5.20055, "11390": 5.18606, "11395": 5.28461, "11400": 4.95284, "11405": 5.07589, "11410": 5.26203, "11415": 5.48228, "11420": 5.00538, "11425": 5.14265, "11430": 5.23302, "11435": 5.02339, "11440": 5.24131, "11445": 5.2604, "11450": 5.3917, "11455": 5.3178, "11460": 5.1791, "11465": 5.45179, "11470": 5.23893, "11475": 5.3124, "11480": 5.09538, "11485": 5.27877, "11490": 5.23733, "11495": 5.5879, "11500": 5.28776, "11505": 5.11349, "11510": 5.17614, "11515": 5.09869, "11520": 5.22095, "11525": 5.2381, "11530": 5.07422, "11535": 5.30826, "11540": 5.13735, "11545": 5.16093, "11550": 5.14439, "11555": 5.36028, "11560": 5.27581, "11565": 5.08411, "11570": 5.09748, "11575": 5.13256, "11580": 5.5915, "11585": 5.27239, "11590": 5.0515, "11595": 5.28831, "11600": 5.22951, "11605": 5.29821, "11610": 5.1794, "11615": 5.27238, "11620": 5.07543, "11625": 5.2532, "11630": 5.16624, "11635": 5.48732, "11640": 4.97976, "11645": 5.09481, "11650": 5.07, "11655": 5.4163, "11660": 5.14729, "11665": 5.22332, "11670": 5.29066, "11675": 5.01149, "11680": 5.24745, "11685": 5.3955, "11690": 5.2877, "11695": 5.31718, "11700": 5.27337, "11705": 5.17844, "11710": 5.0163, "11715": 5.01964, "11720": 5.24626, "11725": 5.18448, "11730": 5.16792, "11735": 5.21466, "11740": 5.22501, "11745": 5.16252, "11750": 5.16248, "11755": 5.16407, "11760": 5.2844, "11765": 5.28437, "11770": 5.04897, "11775": 5.53847, "11780": 5.28538, "11785": 5.53002, "11790": 5.2765, "11795": 5.32103, "11800": 5.13019, "11805": 5.51958, "11810": 5.1034, "11815": 5.38569, "11820": 5.02823, "11825": 4.87016, "11830": 5.12661, "11835": 5.18867, "11840": 5.19562, "11845": 5.17948, "11850": 5.30315, "11855": 5.01859, "11860": 5.42594, "11865": 5.214, "11870": 5.4507, "11875": 5.04925, "11880": 5.28351, "11885": 4.99344, "11890": 5.27355, "11895": 4.94409, "11900": 5.33673, "11905": 5.16021, "11910": 5.36325, "11915": 5.11683, "11920": 5.27474, "11925": 5.14645, "11930": 5.14292, "11935": 5.10027, "11940": 4.93739, "11945": 5.37039, "11950": 5.11549, "11955": 5.03135, "11960": 4.90919, "11965": 5.11583, "11970": 5.20048, "11975": 4.99678, "11980": 5.04864, "11985": 5.16687, "11990": 5.12482, "11995": 5.12138, "12000": 5.08971, "12005": 5.04863, "12010": 5.23243, "12015": 4.9521, "12020": 5.26004, "12025": 5.12761, "12030": 4.93221, "12035": 4.90509, "12040": 5.11885, "12045": 5.35792, "12050": 5.33254, "12055": 4.95158, "12060": 5.24892, "12065": 5.25555, "12070": 5.161, "12075": 5.27294, "12080": 5.24702, "12085": 5.09961, "12090": 5.0043, "12095": 5.48174, "12100": 4.97703, "12105": 5.10722, "12110": 5.18597, "12115": 4.94918, "12120": 4.7691, "12125": 5.01242, "12130": 5.23206, "12135": 5.15475, "12140": 5.28888, "12145": 5.06497, "12150": 4.85407, "12155": 5.08306, "12160": 5.1481, "12165": 5.23872, "12170": 5.19311, "12175": 5.23483, "12180": 5.36383, "12185": 5.20117, "12190": 4.82771, "12195": 5.02846, "12200": 5.1913, "12205": 5.09558, "12210": 5.0232, "12215": 5.07948, "12220": 5.20316, "12225": 5.17347, "12230": 4.99102, "12235": 5.31941, "12240": 5.10751, "12245": 5.1069, "12250": 5.22126, "12255": 4.96663, "12260": 4.85554, "12265": 4.87446, "12270": 4.98725, "12275": 5.14349, "12280": 4.882, "12285": 4.92647, "12290": 5.17059, "12295": 5.25714, "12300": 5.48431, "12305": 5.1721, "12310": 5.27149, "12315": 5.03535, "12320": 5.00285, "12325": 5.28719, "12330": 5.2087, "12335": 5.37795, "12340": 4.96675, "12345": 5.3169, "12350": 4.93664, "12355": 5.1021, "12360": 5.01402, "12365": 4.77975, "12370": 5.10945, "12375": 4.87665, "12380": 5.21343, "12385": 5.22589, "12390": 5.09046, "12395": 5.15768, "12400": 5.13948, "12405": 5.32145, "12410": 5.16139, "12415": 5.30719, "12420": 5.39248, "12425": 5.24358, "12430": 4.92588, "12435": 5.02191, "12440": 4.98595, "12445": 5.24576, "12450": 5.08259, "12455": 5.068, "12460": 4.80591, "12465": 4.96585, "12470": 5.43363, "12475": 5.09508, "12480": 4.94249, "12485": 5.25178, "12490": 5.03435, "12495": 5.36914, "12500": 5.47981, "12505": 5.29396, "12510": 4.79058, "12515": 5.0575, "12520": 5.14624, "12525": 5.11066, "12530": 4.89231, "12535": 5.31974, "12540": 4.99776, "12545": 4.93234, "12550": 5.42742, "12555": 5.0198, "12560": 4.95947, "12565": 5.30194, "12570": 5.011, "12575": 5.15742, "12580": 4.9487, "12585": 5.24504, "12590": 4.81696, "12595": 5.20341, "12600": 5.21134, "12605": 5.10257, "12610": 5.21869, "12615": 5.14651, "12620": 5.32386, "12625": 5.02724, "12630": 5.11801, "12635": 5.30002, "12640": 4.92816, "12645": 5.29811, "12650": 4.97494, "12655": 5.0749, "12660": 5.13519, "12665": 5.07599, "12670": 5.01249, "12675": 5.32137, "12680": 4.90018, "12685": 4.95909, "12690": 5.22862, "12695": 4.88605, "12700": 5.00287, "12705": 5.13113, "12710": 4.94577, "12715": 4.98293, "12720": 4.9516, "12725": 5.04837, "12730": 4.94009, "12735": 4.8895, "12740": 5.11633, "12745": 4.78201, "12750": 4.80699, "12755": 5.05351, "12760": 4.73109, "12765": 5.18477, "12770": 5.01334, "12775": 5.05904, "12780": 5.25674, "12785": 5.08532, "12790": 5.03848, "12795": 5.07792, "12800": 5.26545, "12805": 4.7628, "12810": 5.0439, "12815": 4.87974, "12820": 4.89723, "12825": 5.11194, "12830": 4.99866, "12835": 5.2359, "12840": 5.049, "12845": 5.09335, "12850": 4.74596, "12855": 5.05245, "12860": 5.0862, "12865": 5.10011, "12870": 4.93434, "12875": 5.19139, "12880": 5.00966, "12885": 5.10951, "12890": 5.40403, "12895": 5.06375, "12900": 4.86895, "12905": 5.12256, "12910": 4.89725, "12915": 4.9582, "12920": 5.22713, "12925": 5.02956, "12930": 5.04264, "12935": 4.97126, "12940": 5.28037, "12945": 4.75445, "12950": 5.21552, "12955": 4.77142, "12960": 5.04675, "12965": 5.0651, "12970": 4.74546, "12975": 5.05552, "12980": 4.95216, "12985": 4.91902, "12990": 4.87467, "12995": 5.09789, "13000": 5.05835, "13005": 5.07946, "13010": 4.7969, "13015": 4.97847, "13020": 5.01629, "13025": 5.07514, "13030": 5.07913, "13035": 4.91507, "13040": 5.06975, "13045": 5.06224, "13050": 5.03068, "13055": 4.9495, "13060": 5.23862, "13065": 5.28355, "13070": 5.14095, "13075": 5.22139, "13080": 4.97788, "13085": 5.24489, "13090": 5.06377, "13095": 5.12901, "13100": 5.01727, "13105": 5.12282, "13110": 5.02922, "13115": 4.85394, "13120": 5.09122, "13125": 4.96556, "13130": 5.12758, "13135": 4.98896, "13140": 4.86273, "13145": 5.41116, "13150": 5.15503, "13155": 5.26178, "13160": 4.88213, "13165": 4.9631, "13170": 5.2502, "13175": 5.05872, "13180": 4.64755, "13185": 5.27215, "13190": 4.96128, "13195": 5.25958, "13200": 4.85133, "13205": 5.31655, "13210": 4.77255, "13215": 5.14284, "13220": 4.82247, "13225": 5.19207, "13230": 5.00187, "13235": 5.20956, "13240": 5.00193, "13245": 5.26294, "13250": 4.83095, "13255": 4.68528, "13260": 5.00094, "13265": 5.18161, "13270": 5.10141, "13275": 5.07983, "13280": 4.78838, "13285": 5.3192, "13290": 5.12347, "13295": 5.19099, "13300": 5.11485, "13305": 4.8425, "13310": 4.8633, "13315": 5.11963, "13320": 5.12848, "13325": 4.96432, "13330": 4.91523, "13335": 5.05602, "13340": 4.91728, "13345": 4.73624, "13350": 5.09215, "13355": 4.9043, "13360": 4.95717, "13365": 4.93766, "13370": 4.93088, "13375": 5.02886, "13380": 5.07135, "13385": 4.8992, "13390": 5.25579, "13395": 5.11737, "13400": 4.84342, "13405": 4.94273, "13410": 4.77126, "13415": 4.85958, "13420": 4.98369, "13425": 4.9368, "13430": 4.97047, "13435": 4.98269, "13440": 4.8267, "13445": 4.82577, "13450": 5.0193, "13455": 4.81057, "13460": 4.99756, "13465": 4.97396, "13470": 5.0439, "13475": 4.95065, "13480": 5.07169, "13485": 4.96041, "13490": 5.24316, "13495": 5.12133, "13500": 4.77398, "13505": 4.95219, "13510": 5.20853, "13515": 4.94971, "13520": 5.11479, "13525": 4.92216, "13530": 4.79978, "13535": 4.82208, "13540": 5.08482, "13545": 4.83079, "13550": 5.0817, "13555": 5.05558, "13560": 5.23019, "13565": 5.10773, "13570": 4.94405, "13575": 4.98796, "13580": 4.67198, "13585": 5.37584, "13590": 5.09156, "13595": 4.98963, "13600": 4.9818, "13605": 4.9783, "13610": 5.12268, "13615": 4.82499, "13620": 4.96206, "13625": 5.17516, "13630": 4.76413, "13635": 4.74155, "13640": 5.00152, "13645": 4.9424, "13650": 4.77218, "13655": 4.72743, "13660": 4.97736, "13665": 5.05784, "13670": 4.84851, "13675": 5.19533, "13680": 5.26388, "13685": 4.96761, "13690": 4.94108, "13695": 5.07516, "13700": 4.92774, "13705": 5.00406, "13710": 4.95611, "13715": 4.85472, "13720": 4.85383, "13725": 4.87021, "13730": 4.84225, "13735": 4.98071, "13740": 5.17429, "13745": 5.05733, "13750": 4.89094, "13755": 4.74427, "13760": 4.68823, "13765": 5.02159, "13770": 5.08746, "13775": 4.83846, "13780": 4.7199, "13785": 5.05337, "13790": 4.78727, "13795": 4.78073, "13800": 4.89405, "13805": 4.84388, "13810": 4.96631, "13815": 4.77642, "13820": 4.79512, "13825": 4.96673, "13830": 5.10604, "13835": 4.92983, "13840": 5.18486, "13845": 4.92708, "13850": 4.81692, "13855": 4.84876, "13860": 5.03127, "13865": 4.80171, "13870": 5.14411, "13875": 4.83777, "13880": 4.9312, "13885": 4.98524, "13890": 4.90116, "13895": 4.81853, "13900": 4.58189, "13905": 4.94366, "13910": 4.9113, "13915": 5.04097, "13920": 4.75519, "13925": 5.17589, "13930": 4.63194, "13935": 5.07544, "13940": 5.21085, "13945": 4.96179, "13950": 5.03088, "13955": 4.68391, "13960": 4.93965, "13965": 5.09069, "13970": 5.08755, "13975": 4.92924, "13980": 4.90532, "13985": 4.91967, "13990": 4.97455, "13995": 4.92017, "14000": 5.06041, "14005": 4.90539, "14010": 4.88075, "14015": 4.79011, "14020": 5.06071, "14025": 4.91145, "14030": 4.96213, "14035": 4.78011, "14040": 4.735, "14045": 4.8533, "14050": 4.88, "14055": 5.07218, "14060": 4.9295, "14065": 5.07337, "14070": 4.80278, "14075": 5.03373, "14080": 4.69606, "14085": 5.08593, "14090": 5.07326, "14095": 4.84672, "14100": 4.95517, "14105": 4.87061, "14110": 4.78297, "14115": 5.07097, "14120": 5.01941, "14125": 4.49577, "14130": 5.03963, "14135": 4.84274, "14140": 5.06925, "14145": 4.66266, "14150": 4.85154, "14155": 4.85915, "14160": 4.77968, "14165": 4.95106, "14170": 4.97778, "14175": 4.84399, "14180": 4.80436, "14185": 4.62534, "14190": 5.05832, "14195": 5.00631, "14200": 4.54448, "14205": 5.09607, "14210": 5.00415, "14215": 4.92459, "14220": 4.71498, "14225": 5.27886, "14230": 4.92925, "14235": 4.74359, "14240": 4.85047, "14245": 4.92229, "14250": 5.19985, "14255": 5.22076, "14260": 4.82777, "14265": 4.96885, "14270": 5.24391, "14275": 5.01143, "14280": 4.96848, "14285": 4.67474, "14290": 5.16947, "14295": 5.00669, "14300": 4.69678, "14305": 4.87495, "14310": 5.19659, "14315": 4.86804, "14320": 5.11845, "14325": 5.10999, "14330": 4.60944, "14335": 5.04777, "14340": 4.68584, "14345": 4.89273, "14350": 4.97276, "14355": 4.68719, "14360": 4.87797, "14365": 4.82528, "14370": 4.89646, "14375": 4.94693, "14380": 4.79239, "14385": 5.1627, "14390": 4.91135, "14395": 4.67264, "14400": 4.85019, "14405": 4.94017, "14410": 4.80152, "14415": 5.07025, "14420": 5.18188, "14425": 4.96092, "14430": 4.96924, "14435": 5.17383, "14440": 4.5748, "14445": 4.61065, "14450": 5.03842, "14455": 4.55462, "14460": 4.84223, "14465": 4.96105, "14470": 4.91723, "14475": 4.82672, "14480": 4.89362, "14485": 4.89321, "14490": 4.99969, "14495": 4.79335, "14500": 4.91907, "14505": 5.15574, "14510": 4.59256, "14515": 4.90456, "14520": 4.90639, "14525": 5.0542, "14530": 4.56362, "14535": 5.22447, "14540": 4.95248, "14545": 4.8606, "14550": 4.86175, "14555": 4.74854, "14560": 4.60689, "14565": 4.78215, "14570": 4.96199, "14575": 5.07844, "14580": 4.9694, "14585": 4.75212, "14590": 4.79211, "14595": 4.94406, "14600": 4.76458, "14605": 4.75717, "14610": 4.79664, "14615": 5.04495, "14620": 4.7861, "14625": 4.80258, "14630": 5.0808, "14635": 4.81526, "14640": 4.76814, "14645": 5.06811, "14650": 4.95967, "14655": 4.82524, "14660": 5.02712, "14665": 4.93405, "14670": 4.44961, "14675": 5.00453, "14680": 4.66128, "14685": 4.63629, "14690": 4.65834, "14695": 4.70234, "14700": 4.97001, "14705": 4.84886, "14710": 4.77302, "14715": 5.06561, "14720": 4.73985, "14725": 4.82684, "14730": 4.597, "14735": 4.66732, "14740": 4.95551, "14745": 4.91261, "14750": 4.98104, "14755": 4.91475, "14760": 5.13584, "14765": 4.98991, "14770": 4.94256, "14775": 4.49032, "14780": 4.78737, "14785": 4.83908, "14790": 4.72766, "14795": 4.79291, "14800": 4.69019, "14805": 4.64297, "14810": 4.92113, "14815": 4.58727, "14820": 4.57895, "14825": 4.63825, "14830": 5.11037, "14835": 4.49824, "14840": 5.0666, "14845": 5.14354, "14850": 4.49585, "14855": 4.47509, "14860": 5.02477, "14865": 4.73656, "14870": 5.03937, "14875": 4.78677, "14880": 4.97573, "14885": 4.94523, "14890": 4.7547, "14895": 4.711, "14900": 4.80398, "14905": 4.66544, "14910": 5.11436, "14915": 4.8928, "14920": 5.02061, "14925": 4.69705, "14930": 4.85289, "14935": 5.09179, "14940": 4.66701, "14945": 4.91429, "14950": 4.96155, "14955": 4.88416, "14960": 4.81647, "14965": 4.82959, "14970": 4.72983, "14975": 4.8859, "14980": 4.7443, "14985": 5.01679, "14990": 4.62507, "14995": 5.1501, "15000": 4.61726, "15005": 4.46448, "15010": 4.77228, "15015": 4.78616, "15020": 4.86662, "15025": 4.91126, "15030": 4.82187, "15035": 4.61415, "15040": 4.67773, "15045": 4.7885, "15050": 4.90907, "15055": 4.46094, "15060": 5.03479, "15065": 4.67986, "15070": 4.71613, "15075": 4.80371, "15080": 4.69624, "15085": 4.76377, "15090": 5.10905, "15095": 4.80407, "15100": 4.83957, "15105": 4.78592, "15110": 4.66904, "15115": 4.96919, "15120": 4.75555, "15125": 4.94584, "15130": 4.7302, "15135": 4.81139, "15140": 4.84251, "15145": 4.84664, "15150": 5.04692, "15155": 4.79099, "15160": 4.63616, "15165": 4.67299, "15170": 4.64371, "15175": 4.33728, "15180": 4.93077, "15185": 4.76129, "15190": 4.81275, "15195": 4.732, "15200": 5.00335, "15205": 4.68967, "15210": 5.00343, "15215": 4.99915, "15220": 4.91845, "15225": 4.88304, "15230": 4.56211, "15235": 4.90379, "15240": 4.70754, "15245": 4.79589, "15250": 4.57129, "15255": 5.02666, "15260": 4.63088, "15265": 4.8357, "15270": 4.65458, "15275": 5.09927, "15280": 4.84823, "15285": 4.71025, "15290": 4.88109, "15295": 5.11724, "15300": 4.62948, "15305": 4.87938, "15310": 4.76728, "15315": 4.64962, "15320": 4.91794, "15325": 5.11232, "15330": 4.74389, "15335": 5.06915, "15340": 4.76123, "15345": 4.73613, "15350": 4.57708, "15355": 4.86128, "15360": 4.61814, "15365": 4.91632, "15370": 4.86305, "15375": 4.91495, "15380": 4.56651, "15385": 4.66439, "15390": 4.88144, "15395": 4.53516, "15400": 4.82358, "15405": 4.51013, "15410": 4.59654, "15415": 4.7378, "15420": 4.98025, "15425": 4.78413, "15430": 4.6208, "15435": 4.64564, "15440": 4.88049, "15445": 4.83509, "15450": 4.60014, "15455": 5.06714, "15460": 4.63317, "15465": 4.86895, "15470": 4.67476, "15475": 4.69375, "15480": 4.44911, "15485": 4.82973, "15490": 4.87481, "15495": 4.73048, "15500": 4.73209, "15505": 4.70153, "15510": 4.80897, "15515": 4.91657, "15520": 4.74986, "15525": 4.86181, "15530": 4.49376, "15535": 4.89904, "15540": 4.71343, "15545": 4.42941, "15550": 4.66939, "15555": 4.98194, "15560": 4.83922, "15565": 4.71441, "15570": 4.87976, "15575": 4.76321, "15580": 4.61043, "15585": 4.85345, "15590": 4.75028, "15595": 4.96181, "15600": 4.82499, "15605": 4.94746, "15610": 5.02294, "15615": 4.90828, "15620": 4.75538, "15625": 4.63064, "15630": 4.35107, "15635": 4.95641, "15640": 4.74162, "15645": 4.72694, "15650": 4.838, "15655": 4.89808, "15660": 4.89726, "15665": 4.86149, "15670": 4.68214, "15675": 4.61003, "15680": 4.91538, "15685": 4.67333, "15690": 4.99196, "15695": 4.94638, "15700": 4.87047, "15705": 4.70504, "15710": 4.95721, "15715": 4.8336, "15720": 4.71177, "15725": 4.66986, "15730": 4.80267, "15735": 4.5262, "15740": 4.89773, "15745": 4.76169, "15750": 4.89063, "15755": 4.80249, "15760": 4.58412, "15765": 4.78709, "15770": 4.73682, "15775": 4.89219, "15780": 4.74371, "15785": 4.80648, "15790": 4.94953, "15795": 4.77252, "15800": 5.00844, "15805": 4.97006, "15810": 4.77161, "15815": 4.52479, "15820": 4.49663, "15825": 5.21024, "15830": 4.70069, "15835": 4.7152, "15840": 4.83244, "15845": 5.05392, "15850": 4.65416, "15855": 4.78907, "15860": 4.87932, "15865": 4.59653, "15870": 4.67887, "15875": 4.82706, "15880": 4.81861, "15885": 4.55304, "15890": 4.83067, "15895": 4.67164, "15900": 4.89679, "15905": 4.73252, "15910": 4.67523, "15915": 5.10474, "15920": 4.71633, "15925": 4.98675, "15930": 4.72251, "15935": 4.76102, "15940": 4.65008, "15945": 4.72186, "15950": 4.59063, "15955": 4.59168, "15960": 4.97179, "15965": 4.43611, "15970": 4.76462, "15975": 4.75878, "15980": 4.51832, "15985": 4.75727, "15990": 4.42976, "15995": 4.90496, "16000": 4.73644, "16005": 4.75611, "16010": 4.77518, "16015": 4.74127, "16020": 4.96216, "16025": 4.52848, "16030": 5.10113, "16035": 4.76771, "16040": 4.9631, "16045": 4.59155, "16050": 4.69177, "16055": 4.21813, "16060": 4.79882, "16065": 5.03091, "16070": 4.26489, "16075": 4.57745, "16080": 4.82988, "16085": 4.51359, "16090": 4.8553, "16095": 4.38598, "16100": 4.76893, "16105": 4.74054, "16110": 4.59574, "16115": 4.82994, "16120": 4.61522, "16125": 4.46617, "16130": 4.65964, "16135": 4.42344, "16140": 4.9383, "16145": 4.75281, "16150": 4.90806, "16155": 4.58389, "16160": 5.01088, "16165": 4.67459, "16170": 5.24962, "16175": 4.71061, "16180": 4.85859, "16185": 4.78086, "16190": 4.72136, "16195": 4.86534, "16200": 4.63469, "16205": 4.90568, "16210": 4.87132, "16215": 4.66195, "16220": 4.73631, "16225": 4.71388, "16230": 5.01414, "16235": 4.69234, "16240": 4.57211, "16245": 4.75379, "16250": 4.88843, "16255": 4.91551, "16260": 4.72797, "16265": 4.78156, "16270": 4.13823, "16275": 4.7422, "16280": 4.75245, "16285": 4.64212, "16290": 4.74042, "16295": 4.61102, "16300": 4.84722, "16305": 4.69841, "16310": 4.52174, "16315": 4.76673, "16320": 4.98793, "16325": 4.46588, "16330": 4.33829, "16335": 4.77882, "16340": 4.69208, "16345": 4.51736, "16350": 4.60563, "16355": 4.69158, "16360": 4.41956, "16365": 4.79952, "16370": 4.64817, "16375": 4.59589, "16380": 4.69301, "16385": 4.81637, "16390": 4.57045, "16395": 4.77635, "16400": 4.61618, "16405": 4.71955, "16410": 4.63093, "16415": 4.83192, "16420": 4.70967, "16425": 4.83068, "16430": 4.71843, "16435": 4.51644, "16440": 4.79729, "16445": 4.78431, "16450": 4.60779, "16455": 4.66745, "16460": 4.70261, "16465": 4.73449, "16470": 4.67175, "16475": 4.64561, "16480": 4.82412, "16485": 5.03595, "16490": 4.57843, "16495": 4.5867, "16500": 4.89045, "16505": 5.02283, "16510": 4.8717, "16515": 4.76853, "16520": 4.5538, "16525": 4.65332, "16530": 4.79114, "16535": 4.9551, "16540": 4.65485, "16545": 4.73293, "16550": 4.55559, "16555": 4.57983, "16560": 4.6274, "16565": 4.55136, "16570": 4.58413, "16575": 4.8398, "16580": 4.77564, "16585": 4.56099, "16590": 4.81681, "16595": 4.64738, "16600": 4.72925, "16605": 4.46543, "16610": 4.91362, "16615": 4.80961, "16620": 4.99331, "16625": 4.744, "16630": 4.66241, "16635": 4.91067, "16640": 4.95584, "16645": 4.48154, "16650": 4.3473, "16655": 4.76362, "16660": 4.70514, "16665": 4.70789, "16670": 4.66806, "16675": 4.80162, "16680": 4.57499, "16685": 4.61219, "16690": 4.99249, "16695": 4.85615, "16700": 4.60476, "16705": 4.87509, "16710": 4.75305, "16715": 4.50295, "16720": 4.72607, "16725": 4.61441, "16730": 4.53768, "16735": 4.56662, "16740": 4.58874, "16745": 4.88318, "16750": 4.07585, "16755": 4.30116, "16760": 4.63713, "16765": 4.50832, "16770": 4.90641, "16775": 4.63917, "16780": 4.85792, "16785": 4.43596, "16790": 4.58465, "16795": 4.72904, "16800": 4.65113, "16805": 4.53219, "16810": 4.47866, "16815": 4.3967, "16820": 4.59255, "16825": 4.4347, "16830": 4.83586, "16835": 4.71485, "16840": 4.71498, "16845": 4.74316, "16850": 4.79782, "16855": 4.66274, "16860": 4.82589, "16865": 4.67853, "16870": 4.47921, "16875": 4.56143, "16880": 4.71357, "16885": 4.85956, "16890": 4.54368, "16895": 4.57018, "16900": 4.73456, "16905": 4.64634, "16910": 4.5792, "16915": 4.54175, "16920": 4.87765, "16925": 4.7918, "16930": 4.70895, "16935": 4.90053, "16940": 4.56292, "16945": 4.61775, "16950": 4.68105, "16955": 4.35182, "16960": 4.60685, "16965": 4.63398, "16970": 4.53511, "16975": 4.64009, "16980": 4.72335, "16985": 4.47582, "16990": 4.51653, "16995": 4.79228, "17000": 4.81006, "17005": 4.55551, "17010": 4.76457, "17015": 4.57893, "17020": 4.89882, "17025": 4.54143, "17030": 4.6781, "17035": 4.64487, "17040": 4.83278, "17045": 4.62344, "17050": 4.46731, "17055": 4.57201, "17060": 4.79965, "17065": 4.5098, "17070": 4.71259, "17075": 4.68352, "17080": 4.88659, "17085": 4.51092, "17090": 4.3399, "17095": 4.44552, "17100": 4.87553, "17105": 4.76108, "17110": 4.75337, "17115": 4.88472, "17120": 4.70485, "17125": 4.58564, "17130": 4.6705, "17135": 4.51344, "17140": 4.61783, "17145": 4.78291, "17150": 4.69412, "17155": 4.62375, "17160": 4.26359, "17165": 4.73242, "17170": 4.44998, "17175": 4.90387, "17180": 4.48798, "17185": 4.63893, "17190": 4.82657, "17195": 5.01016, "17200": 4.67327, "17205": 4.80176, "17210": 4.57979, "17215": 4.70195, "17220": 4.47825, "17225": 4.56226, "17230": 4.92622, "17235": 4.57845, "17240": 4.64112, "17245": 4.93738, "17250": 4.84863, "17255": 4.69749, "17260": 4.69931, "17265": 4.57799, "17270": 5.10435, "17275": 4.80572, "17280": 4.71771, "17285": 4.57033, "17290": 4.80859, "17295": 4.54175, "17300": 4.63263, "17305": 4.89761, "17310": 4.71247, "17315": 4.63245, "17320": 4.52882, "17325": 4.4364, "17330": 4.51325, "17335": 4.52212, "17340": 4.66336, "17345": 4.82936, "17350": 4.75599, "17355": 4.42479, "17360": 4.71834, "17365": 4.75938, "17370": 4.48265, "17375": 4.66737, "17380": 4.62505, "17385": 4.65761, "17390": 4.66028, "17395": 4.59545, "17400": 4.59905, "17405": 4.90184, "17410": 4.6109, "17415": 5.06896, "17420": 4.46752, "17425": 4.5227, "17430": 4.67853, "17435": 4.75048, "17440": 4.74094, "17445": 4.83501, "17450": 4.62396, "17455": 4.5253, "17460": 4.51618, "17465": 4.57263, "17470": 4.57913, "17475": 4.71038, "17480": 4.6467, "17485": 4.74419, "17490": 4.70918, "17495": 4.4527, "17500": 4.42808, "17505": 4.61633, "17510": 4.3621, "17515": 4.67811, "17520": 4.42217, "17525": 4.68344, "17530": 4.79629, "17535": 4.47512, "17540": 4.77905, "17545": 4.79439, "17550": 4.89005, "17555": 4.62477, "17560": 4.37778, "17565": 4.43796, "17570": 4.53317, "17575": 4.79207, "17580": 4.62344, "17585": 4.56632, "17590": 4.78663, "17595": 4.20756, "17600": 4.33947, "17605": 4.77035, "17610": 4.40628, "17615": 4.49643, "17620": 4.69887, "17625": 4.48489, "17630": 4.59626, "17635": 4.85978, "17640": 4.68308, "17645": 4.49584, "17650": 4.58199, "17655": 4.70881, "17660": 4.36156, "17665": 4.54974, "17670": 4.88606, "17675": 4.85542, "17680": 4.83807, "17685": 4.72655, "17690": 4.87652, "17695": 4.55257, "17700": 4.4832, "17705": 4.79245, "17710": 4.66351, "17715": 4.8224, "17720": 4.90719, "17725": 4.3819, "17730": 4.75448, "17735": 4.50075, "17740": 4.6142, "17745": 4.66771, "17750": 4.29231, "17755": 4.71925, "17760": 4.43914, "17765": 4.57809, "17770": 4.58769, "17775": 4.55025, "17780": 4.71631, "17785": 4.49097, "17790": 4.62366, "17795": 4.88061, "17800": 4.56311, "17805": 4.7476, "17810": 4.22324, "17815": 4.88838, "17820": 4.69264, "17825": 4.80549, "17830": 4.7681, "17835": 4.63043, "17840": 4.80906, "17845": 4.63118, "17850": 4.62579, "17855": 4.69438, "17860": 4.77084, "17865": 4.75619, "17870": 4.68389, "17875": 4.76077, "17880": 4.73793, "17885": 4.69999, "17890": 4.63289, "17895": 4.36349, "17900": 4.2935, "17905": 4.62394, "17910": 4.70538, "17915": 4.46943, "17920": 4.57111, "17925": 4.50366, "17930": 4.82982, "17935": 4.62689, "17940": 4.62205, "17945": 4.51045, "17950": 4.75139, "17955": 4.48931, "17960": 4.51317, "17965": 4.53564, "17970": 4.57989, "17975": 4.46925, "17980": 4.41075, "17985": 4.59817, "17990": 4.64773, "17995": 4.9858, "18000": 4.54267, "18005": 4.62839, "18010": 4.75788, "18015": 4.67409, "18020": 4.60056, "18025": 4.78308, "18030": 4.5976, "18035": 4.42175, "18040": 4.71128, "18045": 4.6728, "18050": 4.69279, "18055": 4.31758, "18060": 4.67305, "18065": 4.59785, "18070": 4.78982, "18075": 4.61025, "18080": 4.24851, "18085": 4.67895, "18090": 4.54553, "18095": 4.67201, "18100": 4.85044, "18105": 4.63047, "18110": 4.74025, "18115": 4.62119, "18120": 4.60827, "18125": 4.74019, "18130": 4.67008, "18135": 4.81161, "18140": 4.64753, "18145": 4.369, "18150": 4.62485, "18155": 4.46126, "18160": 4.84904, "18165": 4.51582, "18170": 4.53564, "18175": 4.64274, "18180": 4.55416, "18185": 4.58186, "18190": 4.48279, "18195": 4.59479, "18200": 4.74886, "18205": 4.49389, "18210": 4.41001, "18215": 4.84476, "18220": 4.39071, "18225": 4.71667, "18230": 4.49112, "18235": 4.62935, "18240": 4.79786, "18245": 4.60775, "18250": 4.63901, "18255": 4.6543, "18260": 4.58442, "18265": 4.51652, "18270": 4.80473, "18275": 4.48737, "18280": 4.56113, "18285": 4.89253, "18290": 4.64952, "18295": 4.81316, "18300": 4.60972, "18305": 4.33753, "18310": 4.69129, "18315": 4.18635, "18320": 4.28598, "18325": 4.50003, "18330": 4.46752, "18335": 4.76026, "18340": 5.00781, "18345": 4.58459, "18350": 4.60438, "18355": 4.52475, "18360": 4.49551, "18365": 4.44677, "18370": 4.60457, "18375": 4.50868, "18380": 4.67696, "18385": 4.62345, "18390": 4.74967, "18395": 4.65603, "18400": 4.5379, "18405": 4.65347, "18410": 4.52271, "18415": 4.5702, "18420": 4.63743, "18425": 4.64645, "18430": 4.75553, "18435": 4.24213, "18440": 4.44301, "18445": 4.7483, "18450": 4.39981, "18455": 4.63998, "18460": 4.31669, "18465": 4.54673, "18470": 4.28561, "18475": 4.87112, "18480": 4.56955, "18485": 4.57785, "18490": 4.76414, "18495": 4.6237, "18500": 4.73452, "18505": 4.7449, "18510": 4.47383, "18515": 4.36314, "18520": 4.60693, "18525": 4.35145, "18530": 4.74158, "18535": 4.74229, "18540": 4.44775, "18545": 4.6846, "18550": 4.90053, "18555": 4.70875, "18560": 4.31332, "18565": 4.59618, "18570": 4.67911, "18575": 4.55279, "18580": 4.82963, "18585": 4.5419, "18590": 4.81632, "18595": 4.64786, "18600": 4.36163, "18605": 4.73325, "18610": 4.51279, "18615": 4.69515, "18620": 4.36364, "18625": 4.42725, "18630": 4.33219, "18635": 4.40085, "18640": 4.856, "18645": 4.64445, "18650": 4.4747, "18655": 4.46273, "18660": 4.71444, "18665": 4.61387, "18670": 4.52895, "18675": 4.54565, "18680": 4.33753, "18685": 4.43054, "18690": 4.60592, "18695": 4.52619, "18700": 4.74638, "18705": 4.42479, "18710": 4.32703, "18715": 4.4658, "18720": 4.5123, "18725": 4.57057, "18730": 4.47144, "18735": 4.36268, "18740": 4.57819, "18745": 4.44156, "18750": 4.58138, "18755": 4.50182, "18760": 4.75272, "18765": 4.63178, "18770": 4.65315, "18775": 4.26451, "18780": 4.47413, "18785": 4.90897, "18790": 4.48637, "18795": 4.80837, "18800": 4.6657, "18805": 4.28007, "18810": 4.7012, "18815": 4.71722, "18820": 4.54938, "18825": 4.79182, "18830": 4.56919, "18835": 4.59487, "18840": 4.44581, "18845": 4.56059, "18850": 4.72333, "18855": 4.44977, "18860": 4.52308, "18865": 4.75003, "18870": 4.60174, "18875": 4.40419, "18880": 4.78091, "18885": 4.62416, "18890": 4.5369, "18895": 4.54541, "18900": 4.5916, "18905": 4.56475, "18910": 4.62544, "18915": 4.74832, "18920": 4.4313, "18925": 4.26308, "18930": 4.64015, "18935": 4.71589, "18940": 4.5718, "18945": 4.67173, "18950": 4.5568, "18955": 4.72252, "18960": 4.83922, "18965": 4.04581, "18970": 4.24623, "18975": 4.75319, "18980": 4.67134, "18985": 4.38808, "18990": 4.64328, "18995": 4.78835, "19000": 4.61253, "19005": 4.47447, "19010": 4.58417, "19015": 4.49477, "19020": 4.60018, "19025": 4.60011, "19030": 4.46917, "19035": 4.51305, "19040": 4.58187, "19045": 4.74315, "19050": 4.43168, "19055": 4.38998, "19060": 4.50199, "19065": 4.80845, "19070": 4.65877, "19075": 4.60243, "19080": 4.73796, "19085": 4.52475, "19090": 4.57921, "19095": 4.5585, "19100": 4.656, "19105": 4.80028, "19110": 4.4344, "19115": 4.43435, "19120": 4.13201, "19125": 4.37658, "19130": 4.70057, "19135": 4.53089, "19140": 4.58025, "19145": 4.59107, "19150": 4.53357, "19155": 4.37327, "19160": 4.65978, "19165": 4.64166, "19170": 4.42109, "19175": 4.36964, "19180": 4.74881, "19185": 4.70015, "19190": 4.52469, "19195": 4.70496, "19200": 4.44202, "19205": 4.43192, "19210": 4.6448, "19215": 4.49806, "19220": 4.58907, "19225": 4.68636, "19230": 4.31844, "19235": 4.6502, "19240": 4.58176, "19245": 4.59193, "19250": 4.26414, "19255": 4.63363, "19260": 4.5978, "19265": 4.44427, "19270": 4.62637, "19275": 4.42743, "19280": 4.58868, "19285": 4.71063, "19290": 4.43136, "19295": 4.64557, "19300": 4.5755, "19305": 4.72014, "19310": 4.2826, "19315": 4.22894, "19320": 4.83209, "19325": 4.46836, "19330": 4.7038, "19335": 4.35192, "19340": 4.64478, "19345": 4.5561, "19350": 4.67583, "19355": 4.62954, "19360": 4.58004, "19365": 4.87397, "19370": 4.773, "19375": 4.6667, "19380": 4.40798, "19385": 4.3882, "19390": 4.5454, "19395": 4.41139, "19400": 4.6563, "19405": 4.74907, "19410": 4.47842, "19415": 4.66354, "19420": 4.64515, "19425": 4.77727, "19430": 4.67154, "19435": 4.67746, "19440": 4.51631, "19445": 4.34048, "19450": 4.19112, "19455": 4.57009, "19460": 4.52358, "19465": 4.63305, "19470": 4.59146, "19475": 4.25064, "19480": 4.89052, "19485": 4.49461, "19490": 4.35244, "19495": 4.62929, "19500": 4.44557, "19505": 4.61865, "19510": 4.47805, "19515": 4.52399, "19520": 4.64291, "19525": 4.59414, "19530": 4.40508, "19535": 4.53949, "19540": 4.71058, "19545": 4.68448, "19550": 4.52389, "19555": 4.4384, "19560": 4.81034, "19565": 4.91339, "19570": 4.52993, "19575": 4.71901, "19580": 4.77623, "19585": 4.41676, "19590": 4.34956, "19595": 4.68046, "19600": 4.40916, "19605": 4.77361, "19610": 4.7495, "19615": 4.41667, "19620": 4.79909, "19625": 4.19617, "19630": 4.73191, "19635": 4.6578, "19640": 4.60394, "19645": 4.55133, "19650": 4.31026, "19655": 4.59461, "19660": 4.21976, "19665": 4.67517, "19670": 4.5575, "19675": 4.4969, "19680": 4.43156, "19685": 4.64169, "19690": 4.83713, "19695": 4.36072, "19700": 4.65441, "19705": 4.53829, "19710": 4.6117, "19715": 4.54582, "19720": 4.47752, "19725": 4.63546, "19730": 4.57761, "19735": 4.44372, "19740": 4.52101, "19745": 4.55598, "19750": 4.47943, "19755": 4.37928, "19760": 4.38406, "19765": 4.36144, "19770": 4.61172, "19775": 4.42726, "19780": 4.48209, "19785": 4.85932, "19790": 4.5821, "19795": 4.6069, "19800": 4.55334, "19805": 4.457, "19810": 4.72266, "19815": 4.58852, "19820": 4.88717, "19825": 4.54052, "19830": 4.83609, "19835": 4.81761, "19840": 4.37338, "19845": 4.74161, "19850": 4.76964, "19855": 4.46719, "19860": 4.45521, "19865": 4.51713, "19870": 4.5979, "19875": 4.46938, "19880": 4.50257, "19885": 4.69777, "19890": 4.5317, "19895": 4.48675, "19900": 4.44091, "19905": 4.47098, "19910": 4.73032, "19915": 4.30771, "19920": 4.56773, "19925": 4.42538, "19930": 4.33667, "19935": 4.87762, "19940": 4.71203, "19945": 4.5584, "19950": 4.80008, "19955": 4.50588, "19960": 4.43657, "19965": 4.38298, "19970": 4.37515, "19975": 4.50591, "19980": 4.64319, "19985": 4.49542, "19990": 4.64373, "19995": 4.30228, "20000": 4.81103, "20005": "nan", "20010": "nan", "20015": "nan", "20020": "nan", "20025": "nan", "20030": "nan", "20035": "nan", "20040": "nan", "20045": "nan", "20050": "nan", "20055": "nan", "20060": "nan", "20065": "nan", "20070": "nan", "20075": "nan", "20080": "nan", "20085": "nan", "20090": "nan", "20095": "nan", "20100": "nan", "20105": "nan", "20110": "nan", "20115": "nan", "20120": "nan", "20125": "nan", "20130": "nan", "20135": "nan", "20140": "nan", "20145": "nan", "20150": "nan", "20155": "nan", "20160": "nan", "20165": "nan", "20170": "nan", "20175": "nan", "20180": "nan", "20185": "nan", "20190": "nan", "20195": "nan", "20200": "nan", "20205": "nan", "20210": "nan", "20215": "nan", "20220": "nan", "20225": "nan", "20230": "nan", "20235": "nan", "20240": "nan", "20245": "nan", "20250": "nan", "20255": "nan", "20260": "nan", "20265": "nan", "20270": "nan", "20275": "nan", "20280": "nan", "20285": "nan", "20290": "nan", "20295": "nan", "20300": "nan", "20305": "nan", "20310": "nan", "20315": "nan", "20320": "nan", "20325": "nan", "20330": "nan", "20335": "nan", "20340": "nan", "20345": "nan", "20350": "nan", "20355": "nan", "20360": "nan", "20365": "nan", "20370": "nan", "20375": "nan", "20380": "nan", "20385": "nan", "20390": "nan", "20395": "nan", "20400": "nan", "20405": "nan", "20410": "nan", "20415": "nan", "20420": "nan", "20425": "nan", "20430": "nan", "20435": "nan", "20440": "nan", "20445": "nan", "20450": "nan", "20455": "nan", "20460": "nan", "20465": "nan", "20470": "nan", "20475": "nan", "20480": "nan", "20485": "nan", "20490": "nan", "20495": "nan", "20500": "nan", "20505": "nan", "20510": "nan", "20515": "nan", "20520": "nan", "20525": "nan", "20530": "nan", "20535": "nan", "20540": "nan", "20545": "nan", "20550": "nan", "20555": "nan", "20560": "nan", "20565": "nan", "20570": "nan", "20575": "nan", "20580": "nan", "20585": "nan", "20590": "nan", "20595": "nan", "20600": "nan", "20605": "nan", "20610": "nan", "20615": "nan", "20620": "nan", "20625": "nan", "20630": "nan", "20635": "nan", "20640": "nan", "20645": "nan", "20650": "nan", "20655": "nan", "20660": "nan", "20665": "nan", "20670": "nan", "20675": "nan", "20680": "nan", "20685": "nan", "20690": "nan", "20695": "nan", "20700": "nan", "20705": "nan", "20710": "nan", "20715": "nan", "20720": "nan", "20725": "nan", "20730": "nan", "20735": "nan", "20740": "nan", "20745": "nan", "20750": "nan", "20755": "nan", "20760": "nan", "20765": "nan", "20770": "nan", "20775": "nan", "20780": "nan", "20785": "nan", "20790": "nan", "20795": "nan", "20800": "nan", "20805": "nan", "20810": "nan", "20815": "nan", "20820": "nan", "20825": "nan", "20830": "nan", "20835": "nan", "20840": "nan", "20845": "nan", "20850": "nan", "20855": "nan", "20860": "nan", "20865": "nan", "20870": "nan", "20875": "nan", "20880": "nan", "20885": "nan", "20890": "nan", "20895": "nan", "20900": "nan", "20905": "nan", "20910": "nan", "20915": "nan", "20920": "nan", "20925": "nan", "20930": "nan", "20935": "nan", "20940": "nan", "20945": "nan", "20950": "nan", "20955": "nan", "20960": "nan", "20965": "nan", "20970": "nan", "20975": "nan", "20980": "nan", "20985": "nan", "20990": "nan", "20995": "nan", "21000": "nan", "21005": "nan", "21010": "nan", "21015": "nan", "21020": "nan", "21025": "nan", "21030": "nan", "21035": "nan", "21040": "nan", "21045": "nan", "21050": "nan", "21055": "nan", "21060": "nan", "21065": "nan", "21070": "nan", "21075": "nan", "21080": "nan", "21085": "nan", "21090": "nan", "21095": "nan", "21100": "nan", "21105": "nan", "21110": "nan", "21115": "nan", "21120": "nan", "21125": "nan", "21130": "nan", "21135": "nan", "21140": "nan", "21145": "nan", "21150": "nan", "21155": "nan", "21160": "nan", "21165": "nan", "21170": "nan", "21175": "nan", "21180": "nan", "21185": "nan", "21190": "nan", "21195": "nan", "21200": "nan", "21205": "nan", "21210": "nan", "21215": "nan", "21220": "nan", "21225": "nan", "21230": "nan", "21235": "nan", "21240": "nan", "21245": "nan", "21250": "nan", "21255": "nan", "21260": "nan", "21265": "nan", "21270": "nan", "21275": "nan", "21280": "nan", "21285": "nan", "21290": "nan", "21295": "nan", "21300": "nan", "21305": "nan", "21310": "nan", "21315": "nan", "21320": "nan", "21325": "nan", "21330": "nan", "21335": "nan", "21340": "nan", "21345": "nan", "21350": "nan", "21355": "nan", "21360": "nan", "21365": "nan", "21370": "nan", "21375": "nan", "21380": "nan", "21385": "nan", "21390": "nan", "21395": "nan", "21400": "nan", "21405": "nan", "21410": "nan", "21415": "nan", "21420": "nan", "21425": "nan", "21430": "nan", "21435": "nan", "21440": "nan", "21445": "nan", "21450": "nan", "21455": "nan", "21460": "nan", "21465": "nan", "21470": "nan", "21475": "nan", "21480": "nan", "21485": "nan", "21490": "nan", "21495": "nan", "21500": "nan", "21505": "nan", "21510": "nan", "21515": "nan", "21520": "nan", "21525": "nan", "21530": "nan", "21535": "nan", "21540": "nan", "21545": "nan", "21550": "nan", "21555": "nan", "21560": "nan", "21565": "nan", "21570": "nan", "21575": "nan", "21580": "nan", "21585": "nan", "21590": "nan", "21595": "nan", "21600": "nan", "21605": "nan", "21610": "nan", "21615": "nan", "21620": "nan", "21625": "nan", "21630": "nan", "21635": "nan", "21640": "nan", "21645": "nan", "21650": "nan", "21655": "nan", "21660": "nan", "21665": "nan", "21670": "nan", "21675": "nan", "21680": "nan", "21685": "nan", "21690": "nan", "21695": "nan", "21700": "nan", "21705": "nan", "21710": "nan", "21715": "nan", "21720": "nan", "21725": "nan", "21730": "nan", "21735": "nan", "21740": "nan", "21745": "nan", "21750": "nan", "21755": "nan", "21760": "nan", "21765": "nan", "21770": "nan", "21775": "nan", "21780": "nan", "21785": "nan", "21790": "nan", "21795": "nan", "21800": "nan", "21805": "nan", "21810": "nan", "21815": "nan", "21820": "nan", "21825": "nan", "21830": "nan", "21835": "nan", "21840": "nan", "21845": "nan", "21850": "nan", "21855": "nan", "21860": "nan", "21865": "nan", "21870": "nan", "21875": "nan", "21880": "nan", "21885": "nan", "21890": "nan", "21895": "nan", "21900": "nan", "21905": "nan", "21910": "nan", "21915": "nan", "21920": "nan", "21925": "nan", "21930": "nan", "21935": "nan", "21940": "nan", "21945": "nan", "21950": "nan", "21955": "nan", "21960": "nan", "21965": "nan", "21970": "nan", "21975": "nan", "21980": "nan", "21985": "nan", "21990": "nan", "21995": "nan", "22000": "nan", "22005": "nan", "22010": "nan", "22015": "nan", "22020": "nan", "22025": "nan", "22030": "nan", "22035": "nan", "22040": "nan", "22045": "nan", "22050": "nan", "22055": "nan", "22060": "nan", "22065": "nan", "22070": "nan", "22075": "nan", "22080": "nan", "22085": "nan", "22090": "nan", "22095": "nan", "22100": "nan", "22105": "nan", "22110": "nan", "22115": "nan", "22120": "nan", "22125": "nan", "22130": "nan", "22135": "nan", "22140": "nan", "22145": "nan", "22150": "nan", "22155": "nan", "22160": "nan", "22165": "nan", "22170": "nan", "22175": "nan", "22180": "nan", "22185": "nan", "22190": "nan", "22195": "nan", "22200": "nan", "22205": "nan", "22210": "nan", "22215": "nan", "22220": "nan", "22225": "nan", "22230": "nan", "22235": "nan", "22240": "nan", "22245": "nan", "22250": "nan", "22255": "nan", "22260": "nan", "22265": "nan", "22270": "nan", "22275": "nan", "22280": "nan", "22285": "nan", "22290": "nan", "22295": "nan", "22300": "nan", "22305": "nan", "22310": "nan", "22315": "nan", "22320": "nan", "22325": "nan", "22330": "nan", "22335": "nan", "22340": "nan", "22345": "nan", "22350": "nan", "22355": "nan", "22360": "nan", "22365": "nan", "22370": "nan", "22375": "nan", "22380": "nan", "22385": "nan", "22390": "nan", "22395": "nan", "22400": "nan", "22405": "nan", "22410": "nan", "22415": "nan", "22420": "nan", "22425": "nan", "22430": "nan", "22435": "nan", "22440": "nan", "22445": "nan", "22450": "nan", "22455": "nan", "22460": "nan", "22465": "nan", "22470": "nan", "22475": "nan", "22480": "nan", "22485": "nan", "22490": "nan", "22495": "nan", "22500": "nan", "22505": "nan", "22510": "nan", "22515": "nan", "22520": "nan", "22525": "nan", "22530": "nan", "22535": "nan", "22540": "nan", "22545": "nan", "22550": "nan", "22555": "nan", "22560": "nan", "22565": "nan", "22570": "nan", "22575": "nan", "22580": "nan", "22585": "nan", "22590": "nan", "22595": "nan", "22600": "nan", "22605": "nan", "22610": "nan", "22615": "nan", "22620": "nan", "22625": "nan", "22630": "nan", "22635": "nan", "22640": "nan", "22645": "nan", "22650": "nan", "22655": "nan", "22660": "nan", "22665": "nan", "22670": "nan", "22675": "nan", "22680": "nan", "22685": "nan", "22690": "nan", "22695": "nan", "22700": "nan", "22705": "nan", "22710": "nan", "22715": "nan", "22720": "nan", "22725": "nan", "22730": "nan", "22735": "nan", "22740": "nan", "22745": "nan", "22750": "nan", "22755": "nan", "22760": "nan", "22765": "nan", "22770": "nan", "22775": "nan", "22780": "nan", "22785": "nan", "22790": "nan", "22795": "nan", "22800": "nan", "22805": "nan", "22810": "nan", "22815": "nan", "22820": "nan", "22825": "nan", "22830": "nan", "22835": "nan", "22840": "nan", "22845": "nan", "22850": "nan", "22855": "nan", "22860": "nan", "22865": "nan", "22870": "nan", "22875": "nan", "22880": "nan", "22885": "nan", "22890": "nan", "22895": "nan", "22900": "nan", "22905": "nan", "22910": "nan", "22915": "nan", "22920": "nan", "22925": "nan", "22930": "nan", "22935": "nan", "22940": "nan", "22945": "nan", "22950": "nan", "22955": "nan", "22960": "nan", "22965": "nan", "22970": "nan", "22975": "nan", "22980": "nan", "22985": "nan", "22990": "nan", "22995": "nan", "23000": "nan", "23005": "nan", "23010": "nan", "23015": "nan", "23020": "nan", "23025": "nan", "23030": "nan", "23035": "nan", "23040": "nan", "23045": "nan", "23050": "nan", "23055": "nan", "23060": "nan", "23065": "nan", "23070": "nan", "23075": "nan", "23080": "nan", "23085": "nan", "23090": "nan", "23095": "nan", "23100": "nan", "23105": "nan", "23110": "nan", "23115": "nan", "23120": "nan", "23125": "nan", "23130": "nan", "23135": "nan", "23140": "nan", "23145": "nan", "23150": "nan", "23155": "nan", "23160": "nan", "23165": "nan", "23170": "nan", "23175": "nan", "23180": "nan", "23185": "nan", "23190": "nan", "23195": "nan", "23200": "nan", "23205": "nan", "23210": "nan", "23215": "nan", "23220": "nan", "23225": "nan", "23230": "nan", "23235": "nan", "23240": "nan", "23245": "nan", "23250": "nan", "23255": "nan", "23260": "nan", "23265": "nan", "23270": "nan", "23275": "nan", "23280": "nan", "23285": "nan", "23290": "nan", "23295": "nan", "23300": "nan", "23305": "nan", "23310": "nan", "23315": "nan", "23320": "nan", "23325": "nan", "23330": "nan", "23335": "nan", "23340": "nan", "23345": "nan", "23350": "nan", "23355": "nan", "23360": "nan", "23365": "nan", "23370": "nan", "23375": "nan", "23380": "nan", "23385": "nan", "23390": "nan", "23395": "nan", "23400": "nan", "23405": "nan", "23410": "nan", "23415": "nan", "23420": "nan", "23425": "nan", "23430": "nan", "23435": "nan", "23440": "nan", "23445": "nan", "23450": "nan", "23455": "nan", "23460": "nan", "23465": "nan", "23470": "nan", "23475": "nan", "23480": "nan", "23485": "nan", "23490": "nan", "23495": "nan", "23500": "nan", "23505": "nan", "23510": "nan", "23515": "nan", "23520": "nan", "23525": "nan", "23530": "nan", "23535": "nan", "23540": "nan", "23545": "nan", "23550": "nan", "23555": "nan", "23560": "nan", "23565": "nan", "23570": "nan", "23575": "nan", "23580": "nan", "23585": "nan", "23590": "nan", "23595": "nan", "23600": "nan", "23605": "nan", "23610": "nan", "23615": "nan", "23620": "nan", "23625": "nan", "23630": "nan", "23635": "nan", "23640": "nan", "23645": "nan", "23650": "nan", "23655": "nan", "23660": "nan", "23665": "nan", "23670": "nan", "23675": "nan", "23680": "nan", "23685": "nan", "23690": "nan", "23695": "nan", "23700": "nan", "23705": "nan", "23710": "nan", "23715": "nan", "23720": "nan", "23725": "nan", "23730": "nan", "23735": "nan", "23740": "nan", "23745": "nan", "23750": "nan", "23755": "nan", "23760": "nan", "23765": "nan", "23770": "nan", "23775": "nan", "23780": "nan", "23785": "nan", "23790": "nan", "23795": "nan", "23800": "nan", "23805": "nan", "23810": "nan", "23815": "nan", "23820": "nan", "23825": "nan", "23830": "nan", "23835": "nan", "23840": "nan", "23845": "nan", "23850": "nan", "23855": "nan", "23860": "nan", "23865": "nan", "23870": "nan", "23875": "nan", "23880": "nan", "23885": "nan", "23890": "nan", "23895": "nan", "23900": "nan", "23905": "nan", "23910": "nan", "23915": "nan", "23920": "nan", "23925": "nan", "23930": "nan", "23935": "nan", "23940": "nan", "23945": "nan", "23950": "nan", "23955": "nan", "23960": "nan", "23965": "nan", "23970": "nan", "23975": "nan", "23980": "nan", "23985": "nan", "23990": "nan", "23995": "nan", "24000": "nan", "24005": "nan", "24010": "nan", "24015": "nan", "24020": "nan", "24025": "nan", "24030": "nan", "24035": "nan", "24040": "nan", "24045": "nan", "24050": "nan", "24055": "nan", "24060": "nan", "24065": "nan", "24070": "nan", "24075": "nan", "24080": "nan", "24085": "nan", "24090": "nan", "24095": "nan", "24100": "nan", "24105": "nan", "24110": "nan", "24115": "nan", "24120": "nan", "24125": "nan", "24130": "nan", "24135": "nan", "24140": "nan", "24145": "nan", "24150": "nan", "24155": "nan", "24160": "nan", "24165": "nan", "24170": "nan", "24175": "nan", "24180": "nan", "24185": "nan", "24190": "nan", "24195": "nan", "24200": "nan", "24205": "nan", "24210": "nan", "24215": "nan", "24220": "nan", "24225": "nan", "24230": "nan", "24235": "nan", "24240": "nan", "24245": "nan", "24250": "nan", "24255": "nan", "24260": "nan", "24265": "nan", "24270": "nan", "24275": "nan", "24280": "nan", "24285": "nan", "24290": "nan", "24295": "nan", "24300": "nan", "24305": "nan", "24310": "nan", "24315": "nan", "24320": "nan", "24325": "nan", "24330": "nan", "24335": "nan", "24340": "nan", "24345": "nan", "24350": "nan", "24355": "nan", "24360": "nan", "24365": "nan", "24370": "nan", "24375": "nan", "24380": "nan", "24385": "nan", "24390": "nan", "24395": "nan", "24400": "nan", "24405": "nan", "24410": "nan", "24415": "nan", "24420": "nan", "24425": "nan", "24430": "nan", "24435": "nan", "24440": "nan", "24445": "nan", "24450": "nan", "24455": "nan", "24460": "nan", "24465": "nan", "24470": "nan", "24475": "nan", "24480": "nan", "24485": "nan", "24490": "nan", "24495": "nan", "24500": "nan", "24505": "nan", "24510": "nan", "24515": "nan", "24520": "nan", "24525": "nan", "24530": "nan", "24535": "nan", "24540": "nan", "24545": "nan", "24550": "nan", "24555": "nan", "24560": "nan", "24565": "nan", "24570": "nan", "24575": "nan", "24580": "nan", "24585": "nan", "24590": "nan", "24595": "nan", "24600": "nan", "24605": "nan", "24610": "nan", "24615": "nan", "24620": "nan", "24625": "nan", "24630": "nan", "24635": "nan", "24640": "nan", "24645": "nan", "24650": "nan", "24655": "nan", "24660": "nan", "24665": "nan", "24670": "nan", "24675": "nan", "24680": "nan", "24685": "nan", "24690": "nan", "24695": "nan", "24700": "nan", "24705": "nan", "24710": "nan", "24715": "nan", "24720": "nan", "24725": "nan", "24730": "nan", "24735": "nan", "24740": "nan", "24745": "nan", "24750": "nan", "24755": "nan", "24760": "nan", "24765": "nan", "24770": "nan", "24775": "nan", "24780": "nan", "24785": "nan", "24790": "nan", "24795": "nan", "24800": "nan", "24805": "nan", "24810": "nan", "24815": "nan", "24820": "nan", "24825": "nan", "24830": "nan", "24835": "nan", "24840": "nan", "24845": "nan", "24850": "nan", "24855": "nan", "24860": "nan", "24865": "nan", "24870": "nan", "24875": "nan", "24880": "nan", "24885": "nan", "24890": "nan", "24895": "nan", "24900": "nan", "24905": "nan", "24910": "nan", "24915": "nan", "24920": "nan", "24925": "nan", "24930": "nan", "24935": "nan", "24940": "nan", "24945": "nan", "24950": "nan", "24955": "nan", "24960": "nan", "24965": "nan", "24970": "nan", "24975": "nan", "24980": "nan", "24985": "nan", "24990": "nan", "24995": "nan", "25000": "nan"}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 25000, "step_interval": 5, "values": {"1": 146582016.0, "5": 146450944.0, "10": 145926656.0, "15": 146450944.0, "20": 224285696.0, "25": 224285696.0, "30": 224285696.0, "35": 224285696.0, "40": 224285696.0, "45": 224285696.0, "50": 224285696.0, "55": 224285696.0, "60": 224285696.0, "65": 224285696.0, "70": 224285696.0, "75": 224285696.0, "80": 224285696.0, "85": 224285696.0, "90": 224285696.0, "95": 224285696.0, "100": 224285696.0, "105": 224285696.0, "110": 224285696.0, "115": 224285696.0, "120": 224285696.0, "125": 224285696.0, "130": 224285696.0, "135": 224285696.0, "140": 224285696.0, "145": 224285696.0, "150": 224285696.0, "155": 224285696.0, "160": 224285696.0, "165": 224809984.0, "170": 224285696.0, "175": 224285696.0, "180": 224285696.0, "185": 224285696.0, "190": 224809984.0, "195": 224285696.0, "200": 224285696.0, "205": 224285696.0, "210": 224285696.0, "215": 224285696.0, "220": 224285696.0, "225": 224285696.0, "230": 224285696.0, "235": 224285696.0, "240": 224285696.0, "245": 224285696.0, "250": 224809984.0, "255": 224285696.0, "260": 224285696.0, "265": 224285696.0, "270": 224285696.0, "275": 224285696.0, "280": 224285696.0, "285": 224285696.0, "290": 224285696.0, "295": 225858560.0, "300": 224285696.0, "305": 224285696.0, "310": 224285696.0, "315": 224285696.0, "320": 224809984.0, "325": 224285696.0, "330": 224285696.0, "335": 224285696.0, "340": 224285696.0, "345": 224285696.0, "350": 224285696.0, "355": 224285696.0, "360": 224285696.0, "365": 224285696.0, "370": 224285696.0, "375": 224285696.0, "380": 224809984.0, "385": 224285696.0, "390": 224285696.0, "395": 224285696.0, "400": 224285696.0, "405": 224285696.0, "410": 224285696.0, "415": 224285696.0, "420": 224285696.0, "425": 224285696.0, "430": 224285696.0, "435": 224285696.0, "440": 224285696.0, "445": 224285696.0, "450": 224285696.0, "455": 224285696.0, "460": 224285696.0, "465": 224285696.0, "470": 224285696.0, "475": 224285696.0, "480": 224285696.0, "485": 224285696.0, "490": 224285696.0, "495": 224285696.0, "500": 224285696.0, "505": 224285696.0, "510": 224285696.0, "515": 224285696.0, "520": 224285696.0, "525": 224285696.0, "530": 224285696.0, "535": 224285696.0, "540": 224285696.0, "545": 224285696.0, "550": 224285696.0, "555": 224285696.0, "560": 224285696.0, "565": 224285696.0, "570": 224285696.0, "575": 224285696.0, "580": 224809984.0, "585": 224285696.0, "590": 224809984.0, "595": 224285696.0, "600": 224678912.0, "605": 224809984.0, "610": 224285696.0, "615": 224285696.0, "620": 224285696.0, "625": 224809984.0, "630": 224285696.0, "635": 224285696.0, "640": 224285696.0, "645": 224285696.0, "650": 224285696.0, "655": 224285696.0, "660": 224285696.0, "665": 224285696.0, "670": 224285696.0, "675": 224285696.0, "680": 224285696.0, "685": 224285696.0, "690": 224285696.0, "695": 224285696.0, "700": 224285696.0, "705": 224285696.0, "710": 224285696.0, "715": 224285696.0, "720": 224285696.0, "725": 224285696.0, "730": 224285696.0, "735": 224285696.0, "740": 224285696.0, "745": 224285696.0, "750": 224285696.0, "755": 224285696.0, "760": 224285696.0, "765": 224285696.0, "770": 224285696.0, "775": 224285696.0, "780": 224285696.0, "785": 224285696.0, "790": 224285696.0, "795": 224285696.0, "800": 224285696.0, "805": 224285696.0, "810": 224285696.0, "815": 224285696.0, "820": 224285696.0, "825": 225334272.0, "830": 224285696.0, "835": 224285696.0, "840": 224285696.0, "845": 224285696.0, "850": 224285696.0, "855": 224285696.0, "860": 224285696.0, "865": 224285696.0, "870": 224285696.0, "875": 224285696.0, "880": 224678912.0, "885": 224285696.0, "890": 224809984.0, "895": 224809984.0, "900": 224285696.0, "905": 224285696.0, "910": 224285696.0, "915": 224285696.0, "920": 224285696.0, "925": 224285696.0, "930": 224285696.0, "935": 224285696.0, "940": 224285696.0, "945": 224285696.0, "950": 224809984.0, "955": 224285696.0, "960": 224285696.0, "965": 224285696.0, "970": 224285696.0, "975": 224285696.0, "980": 224285696.0, "985": 224285696.0, "990": 224809984.0, "995": 224285696.0, "1000": 224285696.0, "1005": 224285696.0, "1010": 224285696.0, "1015": 224285696.0, "1020": 224285696.0, "1025": 224285696.0, "1030": 224285696.0, "1035": 224285696.0, "1040": 224285696.0, "1045": 224285696.0, "1050": 224285696.0, "1055": 224285696.0, "1060": 224285696.0, "1065": 224285696.0, "1070": 224285696.0, "1075": 224285696.0, "1080": 224285696.0, "1085": 224285696.0, "1090": 224285696.0, "1095": 224285696.0, "1100": 224285696.0, "1105": 224809984.0, "1110": 224285696.0, "1115": 224285696.0, "1120": 224285696.0, "1125": 224285696.0, "1130": 224285696.0, "1135": 224285696.0, "1140": 224285696.0, "1145": 224285696.0, "1150": 225334272.0, "1155": 224285696.0, "1160": 224285696.0, "1165": 224285696.0, "1170": 225334272.0, "1175": 224285696.0, "1180": 224285696.0, "1185": 224285696.0, "1190": 224809984.0, "1195": 224285696.0, "1200": 224285696.0, "1205": 224285696.0, "1210": 224285696.0, "1215": 224285696.0, "1220": 224285696.0, "1225": 224285696.0, "1230": 224285696.0, "1235": 224285696.0, "1240": 224285696.0, "1245": 224285696.0, "1250": 224285696.0, "1255": 224285696.0, "1260": 225334272.0, "1265": 224285696.0, "1270": 224285696.0, "1275": 224285696.0, "1280": 224285696.0, "1285": 224285696.0, "1290": 224285696.0, "1295": 224285696.0, "1300": 224285696.0, "1305": 224285696.0, "1310": 224285696.0, "1315": 224285696.0, "1320": 224285696.0, "1325": 224285696.0, "1330": 224809984.0, "1335": 224285696.0, "1340": 224285696.0, "1345": 224285696.0, "1350": 224285696.0, "1355": 224285696.0, "1360": 224285696.0, "1365": 224285696.0, "1370": 224285696.0, "1375": 224809984.0, "1380": 224285696.0, "1385": 224285696.0, "1390": 224285696.0, "1395": 224285696.0, "1400": 224285696.0, "1405": 224285696.0, "1410": 224809984.0, "1415": 224285696.0, "1420": 224285696.0, "1425": 224285696.0, "1430": 224285696.0, "1435": 224285696.0, "1440": 224285696.0, "1445": 224285696.0, "1450": 224285696.0, "1455": 224809984.0, "1460": 224809984.0, "1465": 224285696.0, "1470": 224285696.0, "1475": 224285696.0, "1480": 224285696.0, "1485": 224285696.0, "1490": 224285696.0, "1495": 224285696.0, "1500": 224285696.0, "1505": 224285696.0, "1510": 224285696.0, "1515": 224285696.0, "1520": 224285696.0, "1525": 224285696.0, "1530": 224285696.0, "1535": 224809984.0, "1540": 224285696.0, "1545": 224285696.0, "1550": 224285696.0, "1555": 224285696.0, "1560": 224285696.0, "1565": 224285696.0, "1570": 224285696.0, "1575": 224285696.0, "1580": 224285696.0, "1585": 224809984.0, "1590": 224285696.0, "1595": 224285696.0, "1600": 224285696.0, "1605": 224285696.0, "1610": 224285696.0, "1615": 224285696.0, "1620": 224809984.0, "1625": 224285696.0, "1630": 224285696.0, "1635": 224285696.0, "1640": 224285696.0, "1645": 224285696.0, "1650": 224285696.0, "1655": 224285696.0, "1660": 224285696.0, "1665": 224285696.0, "1670": 224285696.0, "1675": 224285696.0, "1680": 224285696.0, "1685": 224285696.0, "1690": 224809984.0, "1695": 224285696.0, "1700": 224285696.0, "1705": 224285696.0, "1710": 224285696.0, "1715": 224809984.0, "1720": 224285696.0, "1725": 224809984.0, "1730": 224285696.0, "1735": 224285696.0, "1740": 224285696.0, "1745": 224285696.0, "1750": 224285696.0, "1755": 224285696.0, "1760": 224809984.0, "1765": 224285696.0, "1770": 224809984.0, "1775": 224285696.0, "1780": 224285696.0, "1785": 224285696.0, "1790": 224285696.0, "1795": 224285696.0, "1800": 224285696.0, "1805": 224285696.0, "1810": 224285696.0, "1815": 224285696.0, "1820": 224285696.0, "1825": 224285696.0, "1830": 224285696.0, "1835": 224285696.0, "1840": 224285696.0, "1845": 224285696.0, "1850": 224285696.0, "1855": 224285696.0, "1860": 224285696.0, "1865": 224285696.0, "1870": 224285696.0, "1875": 224285696.0, "1880": 224285696.0, "1885": 224285696.0, "1890": 224285696.0, "1895": 224285696.0, "1900": 224285696.0, "1905": 225334272.0, "1910": 224285696.0, "1915": 224285696.0, "1920": 224285696.0, "1925": 224285696.0, "1930": 224285696.0, "1935": 224285696.0, "1940": 224285696.0, "1945": 224285696.0, "1950": 224285696.0, "1955": 224285696.0, "1960": 224285696.0, "1965": 224285696.0, "1970": 224285696.0, "1975": 224285696.0, "1980": 224285696.0, "1985": 224285696.0, "1990": 224285696.0, "1995": 224285696.0, "2000": 224285696.0, "2005": 224285696.0, "2010": 224285696.0, "2015": 224285696.0, "2020": 224285696.0, "2025": 224285696.0, "2030": 224285696.0, "2035": 224285696.0, "2040": 224285696.0, "2045": 224285696.0, "2050": 224285696.0, "2055": 224285696.0, "2060": 224285696.0, "2065": 224285696.0, "2070": 224285696.0, "2075": 224285696.0, "2080": 224285696.0, "2085": 224285696.0, "2090": 224285696.0, "2095": 224285696.0, "2100": 224285696.0, "2105": 224285696.0, "2110": 224285696.0, "2115": 224285696.0, "2120": 224285696.0, "2125": 224285696.0, "2130": 224285696.0, "2135": 224285696.0, "2140": 224285696.0, "2145": 224285696.0, "2150": 224285696.0, "2155": 224285696.0, "2160": 224285696.0, "2165": 224285696.0, "2170": 224285696.0, "2175": 224285696.0, "2180": 224285696.0, "2185": 224285696.0, "2190": 224285696.0, "2195": 224285696.0, "2200": 224285696.0, "2205": 224285696.0, "2210": 224285696.0, "2215": 224285696.0, "2220": 224285696.0, "2225": 224285696.0, "2230": 224285696.0, "2235": 224285696.0, "2240": 224285696.0, "2245": 224285696.0, "2250": 224285696.0, "2255": 224285696.0, "2260": 224285696.0, "2265": 224285696.0, "2270": 224285696.0, "2275": 224285696.0, "2280": 224285696.0, "2285": 224285696.0, "2290": 224285696.0, "2295": 224285696.0, "2300": 224285696.0, "2305": 224285696.0, "2310": 224285696.0, "2315": 224285696.0, "2320": 224285696.0, "2325": 224285696.0, "2330": 224285696.0, "2335": 224285696.0, "2340": 224285696.0, "2345": 224285696.0, "2350": 224285696.0, "2355": 224285696.0, "2360": 224285696.0, "2365": 224285696.0, "2370": 224285696.0, "2375": 224285696.0, "2380": 224285696.0, "2385": 224285696.0, "2390": 224285696.0, "2395": 224285696.0, "2400": 224285696.0, "2405": 224285696.0, "2410": 224285696.0, "2415": 224285696.0, "2420": 224285696.0, "2425": 224285696.0, "2430": 224285696.0, "2435": 225334272.0, "2440": 224285696.0, "2445": 224285696.0, "2450": 224285696.0, "2455": 224285696.0, "2460": 224285696.0, "2465": 224285696.0, "2470": 225334272.0, "2475": 224285696.0, "2480": 224285696.0, "2485": 224285696.0, "2490": 224809984.0, "2495": 224285696.0, "2500": 224285696.0, "2505": 224285696.0, "2510": 224285696.0, "2515": 224285696.0, "2520": 224285696.0, "2525": 224285696.0, "2530": 224285696.0, "2535": 224285696.0, "2540": 224285696.0, "2545": 224285696.0, "2550": 224285696.0, "2555": 224285696.0, "2560": 224285696.0, "2565": 224809984.0, "2570": 224285696.0, "2575": 224285696.0, "2580": 224285696.0, "2585": 224285696.0, "2590": 224285696.0, "2595": 224285696.0, "2600": 224285696.0, "2605": 224285696.0, "2610": 224285696.0, "2615": 224285696.0, "2620": 224285696.0, "2625": 224285696.0, "2630": 224285696.0, "2635": 224285696.0, "2640": 224285696.0, "2645": 224285696.0, "2650": 224285696.0, "2655": 224285696.0, "2660": 224285696.0, "2665": 224285696.0, "2670": 224285696.0, "2675": 224285696.0, "2680": 224285696.0, "2685": 224285696.0, "2690": 224285696.0, "2695": 224285696.0, "2700": 224285696.0, "2705": 224285696.0, "2710": 224285696.0, "2715": 224809984.0, "2720": 224285696.0, "2725": 224285696.0, "2730": 224285696.0, "2735": 224285696.0, "2740": 224285696.0, "2745": 224285696.0, "2750": 224285696.0, "2755": 224285696.0, "2760": 224285696.0, "2765": 224285696.0, "2770": 224285696.0, "2775": 224809984.0, "2780": 224285696.0, "2785": 224285696.0, "2790": 224285696.0, "2795": 224285696.0, "2800": 224285696.0, "2805": 224285696.0, "2810": 224285696.0, "2815": 224285696.0, "2820": 224285696.0, "2825": 224285696.0, "2830": 224285696.0, "2835": 224285696.0, "2840": 224285696.0, "2845": 224285696.0, "2850": 224285696.0, "2855": 224285696.0, "2860": 224285696.0, "2865": 224285696.0, "2870": 224285696.0, "2875": 224285696.0, "2880": 224285696.0, "2885": 224285696.0, "2890": 224285696.0, "2895": 224285696.0, "2900": 224285696.0, "2905": 224809984.0, "2910": 224285696.0, "2915": 224285696.0, "2920": 224285696.0, "2925": 224285696.0, "2930": 224285696.0, "2935": 224285696.0, "2940": 224285696.0, "2945": 224285696.0, "2950": 224285696.0, "2955": 224285696.0, "2960": 224285696.0, "2965": 224809984.0, "2970": 224285696.0, "2975": 224285696.0, "2980": 224285696.0, "2985": 224285696.0, "2990": 224285696.0, "2995": 224285696.0, "3000": 224809984.0, "3005": 224285696.0, "3010": 224285696.0, "3015": 224285696.0, "3020": 224285696.0, "3025": 224285696.0, "3030": 224285696.0, "3035": 224285696.0, "3040": 224285696.0, "3045": 224285696.0, "3050": 224285696.0, "3055": 224285696.0, "3060": 224809984.0, "3065": 224285696.0, "3070": 224285696.0, "3075": 224285696.0, "3080": 224285696.0, "3085": 224285696.0, "3090": 224285696.0, "3095": 224285696.0, "3100": 224285696.0, "3105": 224285696.0, "3110": 224285696.0, "3115": 224285696.0, "3120": 224809984.0, "3125": 224809984.0, "3130": 224285696.0, "3135": 224285696.0, "3140": 224285696.0, "3145": 224285696.0, "3150": 224285696.0, "3155": 224285696.0, "3160": 224285696.0, "3165": 224285696.0, "3170": 224285696.0, "3175": 224285696.0, "3180": 224285696.0, "3185": 224285696.0, "3190": 224285696.0, "3195": 224285696.0, "3200": 224285696.0, "3205": 224285696.0, "3210": 224285696.0, "3215": 224285696.0, "3220": 224809984.0, "3225": 224285696.0, "3230": 224285696.0, "3235": 224285696.0, "3240": 224285696.0, "3245": 224285696.0, "3250": 224285696.0, "3255": 224285696.0, "3260": 224285696.0, "3265": 224285696.0, "3270": 224285696.0, "3275": 224285696.0, "3280": 224285696.0, "3285": 224285696.0, "3290": 224285696.0, "3295": 224285696.0, "3300": 224285696.0, "3305": 224809984.0, "3310": 224285696.0, "3315": 224285696.0, "3320": 224285696.0, "3325": 224285696.0, "3330": 224285696.0, "3335": 224285696.0, "3340": 224285696.0, "3345": 224285696.0, "3350": 224285696.0, "3355": 224285696.0, "3360": 224285696.0, "3365": 224285696.0, "3370": 224285696.0, "3375": 224285696.0, "3380": 224285696.0, "3385": 224809984.0, "3390": 224285696.0, "3395": 224285696.0, "3400": 224285696.0, "3405": 224809984.0, "3410": 224809984.0, "3415": 224285696.0, "3420": 224285696.0, "3425": 224285696.0, "3430": 224285696.0, "3435": 224285696.0, "3440": 224285696.0, "3445": 224809984.0, "3450": 224285696.0, "3455": 224285696.0, "3460": 224285696.0, "3465": 224285696.0, "3470": 224285696.0, "3475": 224285696.0, "3480": 224285696.0, "3485": 224285696.0, "3490": 224285696.0, "3495": 224285696.0, "3500": 224285696.0, "3505": 224285696.0, "3510": 224285696.0, "3515": 224285696.0, "3520": 224285696.0, "3525": 224285696.0, "3530": 224285696.0, "3535": 224285696.0, "3540": 224285696.0, "3545": 224285696.0, "3550": 224285696.0, "3555": 224285696.0, "3560": 224285696.0, "3565": 224285696.0, "3570": 224285696.0, "3575": 224285696.0, "3580": 224285696.0, "3585": 224285696.0, "3590": 224285696.0, "3595": 224285696.0, "3600": 224285696.0, "3605": 224285696.0, "3610": 224285696.0, "3615": 224285696.0, "3620": 224285696.0, "3625": 224285696.0, "3630": 224285696.0, "3635": 224285696.0, "3640": 224285696.0, "3645": 224285696.0, "3650": 224285696.0, "3655": 224285696.0, "3660": 224285696.0, "3665": 224285696.0, "3670": 224285696.0, "3675": 224809984.0, "3680": 224809984.0, "3685": 224285696.0, "3690": 224809984.0, "3695": 224285696.0, "3700": 225334272.0, "3705": 224809984.0, "3710": 224285696.0, "3715": 224285696.0, "3720": 224285696.0, "3725": 224285696.0, "3730": 224285696.0, "3735": 224809984.0, "3740": 224285696.0, "3745": 224285696.0, "3750": 224285696.0, "3755": 224285696.0, "3760": 224285696.0, "3765": 224809984.0, "3770": 224809984.0, "3775": 224285696.0, "3780": 224285696.0, "3785": 224285696.0, "3790": 224678912.0, "3795": 224285696.0, "3800": 224285696.0, "3805": 224285696.0, "3810": 224285696.0, "3815": 224285696.0, "3820": 224285696.0, "3825": 225334272.0, "3830": 224285696.0, "3835": 224285696.0, "3840": 224285696.0, "3845": 224285696.0, "3850": 224285696.0, "3855": 224285696.0, "3860": 224285696.0, "3865": 224285696.0, "3870": 224285696.0, "3875": 224285696.0, "3880": 224285696.0, "3885": 224285696.0, "3890": 224809984.0, "3895": 224809984.0, "3900": 224285696.0, "3905": 225334272.0, "3910": 224285696.0, "3915": 224285696.0, "3920": 224285696.0, "3925": 224285696.0, "3930": 224285696.0, "3935": 224285696.0, "3940": 224285696.0, "3945": 224285696.0, "3950": 224285696.0, "3955": 224285696.0, "3960": 224285696.0, "3965": 224285696.0, "3970": 224285696.0, "3975": 224285696.0, "3980": 224285696.0, "3985": 224809984.0, "3990": 225334272.0, "3995": 224285696.0, "4000": 224809984.0, "4005": 224285696.0, "4010": 224809984.0, "4015": 224285696.0, "4020": 224809984.0, "4025": 224285696.0, "4030": 224285696.0, "4035": 224285696.0, "4040": 224285696.0, "4045": 224285696.0, "4050": 224285696.0, "4055": 224809984.0, "4060": 224285696.0, "4065": 224285696.0, "4070": 224285696.0, "4075": 224285696.0, "4080": 224285696.0, "4085": 224285696.0, "4090": 224285696.0, "4095": 224285696.0, "4100": 224285696.0, "4105": 224285696.0, "4110": 224285696.0, "4115": 224285696.0, "4120": 224285696.0, "4125": 224285696.0, "4130": 225334272.0, "4135": 224285696.0, "4140": 224285696.0, "4145": 224285696.0, "4150": 224285696.0, "4155": 224285696.0, "4160": 224285696.0, "4165": 224285696.0, "4170": 224285696.0, "4175": 224285696.0, "4180": 224285696.0, "4185": 224285696.0, "4190": 224809984.0, "4195": 224285696.0, "4200": 224285696.0, "4205": 224285696.0, "4210": 224285696.0, "4215": 224285696.0, "4220": 224678912.0, "4225": 224285696.0, "4230": 224285696.0, "4235": 224285696.0, "4240": 224285696.0, "4245": 224678912.0, "4250": 224809984.0, "4255": 224285696.0, "4260": 224285696.0, "4265": 224809984.0, "4270": 224285696.0, "4275": 224809984.0, "4280": 224285696.0, "4285": 224285696.0, "4290": 224809984.0, "4295": 224285696.0, "4300": 224285696.0, "4305": 224809984.0, "4310": 224285696.0, "4315": 224809984.0, "4320": 224285696.0, "4325": 224285696.0, "4330": 224285696.0, "4335": 224285696.0, "4340": 224285696.0, "4345": 224285696.0, "4350": 224285696.0, "4355": 224285696.0, "4360": 224285696.0, "4365": 224285696.0, "4370": 224285696.0, "4375": 224285696.0, "4380": 224285696.0, "4385": 224285696.0, "4390": 224285696.0, "4395": 224285696.0, "4400": 224285696.0, "4405": 224285696.0, "4410": 224285696.0, "4415": 224285696.0, "4420": 225334272.0, "4425": 224285696.0, "4430": 224285696.0, "4435": 224285696.0, "4440": 224285696.0, "4445": 224285696.0, "4450": 224285696.0, "4455": 224285696.0, "4460": 225334272.0, "4465": 224285696.0, "4470": 224285696.0, "4475": 224285696.0, "4480": 224285696.0, "4485": 224285696.0, "4490": 224285696.0, "4495": 224285696.0, "4500": 224285696.0, "4505": 224809984.0, "4510": 224285696.0, "4515": 224285696.0, "4520": 224285696.0, "4525": 224809984.0, "4530": 224809984.0, "4535": 224285696.0, "4540": 224285696.0, "4545": 224285696.0, "4550": 224285696.0, "4555": 224285696.0, "4560": 224285696.0, "4565": 224285696.0, "4570": 225334272.0, "4575": 224285696.0, "4580": 224809984.0, "4585": 224285696.0, "4590": 224809984.0, "4595": 224285696.0, "4600": 224285696.0, "4605": 224285696.0, "4610": 224285696.0, "4615": 224285696.0, "4620": 224285696.0, "4625": 224809984.0, "4630": 224809984.0, "4635": 224285696.0, "4640": 224285696.0, "4645": 224285696.0, "4650": 224285696.0, "4655": 224809984.0, "4660": 224285696.0, "4665": 224285696.0, "4670": 224285696.0, "4675": 224285696.0, "4680": 225334272.0, "4685": 224285696.0, "4690": 224285696.0, "4695": 224285696.0, "4700": 224285696.0, "4705": 224285696.0, "4710": 224285696.0, "4715": 224285696.0, "4720": 224285696.0, "4725": 224285696.0, "4730": 224809984.0, "4735": 224285696.0, "4740": 224285696.0, "4745": 225334272.0, "4750": 224809984.0, "4755": 224285696.0, "4760": 224285696.0, "4765": 224285696.0, "4770": 224285696.0, "4775": 224285696.0, "4780": 224809984.0, "4785": 224285696.0, "4790": 224285696.0, "4795": 224285696.0, "4800": 225334272.0, "4805": 224285696.0, "4810": 224285696.0, "4815": 224285696.0, "4820": 224285696.0, "4825": 224285696.0, "4830": 224809984.0, "4835": 224285696.0, "4840": 224285696.0, "4845": 224285696.0, "4850": 224285696.0, "4855": 224285696.0, "4860": 224285696.0, "4865": 224285696.0, "4870": 224285696.0, "4875": 224285696.0, "4880": 224285696.0, "4885": 224285696.0, "4890": 224285696.0, "4895": 224285696.0, "4900": 224285696.0, "4905": 224285696.0, "4910": 224285696.0, "4915": 224285696.0, "4920": 224285696.0, "4925": 224285696.0, "4930": 224285696.0, "4935": 224285696.0, "4940": 224285696.0, "4945": 224285696.0, "4950": 224285696.0, "4955": 224285696.0, "4960": 224285696.0, "4965": 224285696.0, "4970": 224285696.0, "4975": 224809984.0, "4980": 224285696.0, "4985": 224285696.0, "4990": 224285696.0, "4995": 224285696.0, "5000": 224285696.0, "5005": 224285696.0, "5010": 224285696.0, "5015": 224285696.0, "5020": 224285696.0, "5025": 224285696.0, "5030": 224285696.0, "5035": 224285696.0, "5040": 224809984.0, "5045": 224285696.0, "5050": 224285696.0, "5055": 224285696.0, "5060": 224285696.0, "5065": 225334272.0, "5070": 224809984.0, "5075": 224285696.0, "5080": 224285696.0, "5085": 224285696.0, "5090": 224678912.0, "5095": 224285696.0, "5100": 224285696.0, "5105": 224285696.0, "5110": 224285696.0, "5115": 224285696.0, "5120": 224285696.0, "5125": 224809984.0, "5130": 224285696.0, "5135": 224285696.0, "5140": 224285696.0, "5145": 224285696.0, "5150": 224285696.0, "5155": 224809984.0, "5160": 224809984.0, "5165": 224285696.0, "5170": 224285696.0, "5175": 224285696.0, "5180": 224285696.0, "5185": 224809984.0, "5190": 224285696.0, "5195": 224285696.0, "5200": 224285696.0, "5205": 224285696.0, "5210": 224285696.0, "5215": 224285696.0, "5220": 224285696.0, "5225": 224285696.0, "5230": 224285696.0, "5235": 224285696.0, "5240": 224285696.0, "5245": 224285696.0, "5250": 224285696.0, "5255": 224285696.0, "5260": 224285696.0, "5265": 224285696.0, "5270": 225334272.0, "5275": 224285696.0, "5280": 224285696.0, "5285": 224285696.0, "5290": 224285696.0, "5295": 224285696.0, "5300": 224285696.0, "5305": 224285696.0, "5310": 224285696.0, "5315": 224285696.0, "5320": 224285696.0, "5325": 225334272.0, "5330": 224285696.0, "5335": 224285696.0, "5340": 224285696.0, "5345": 224285696.0, "5350": 224285696.0, "5355": 224285696.0, "5360": 224285696.0, "5365": 224285696.0, "5370": 224285696.0, "5375": 224285696.0, "5380": 224285696.0, "5385": 224285696.0, "5390": 224285696.0, "5395": 224285696.0, "5400": 224285696.0, "5405": 224285696.0, "5410": 224285696.0, "5415": 224285696.0, "5420": 224809984.0, "5425": 224285696.0, "5430": 224285696.0, "5435": 224285696.0, "5440": 225334272.0, "5445": 224809984.0, "5450": 224285696.0, "5455": 224285696.0, "5460": 224285696.0, "5465": 224285696.0, "5470": 224285696.0, "5475": 224285696.0, "5480": 224285696.0, "5485": 224285696.0, "5490": 224285696.0, "5495": 224285696.0, "5500": 224285696.0, "5505": 224285696.0, "5510": 224285696.0, "5515": 224285696.0, "5520": 224285696.0, "5525": 224285696.0, "5530": 224809984.0, "5535": 224285696.0, "5540": 224285696.0, "5545": 224285696.0, "5550": 224285696.0, "5555": 224285696.0, "5560": 224285696.0, "5565": 224285696.0, "5570": 224809984.0, "5575": 224285696.0, "5580": 224809984.0, "5585": 224809984.0, "5590": 224678912.0, "5595": 224285696.0, "5600": 224809984.0, "5605": 224285696.0, "5610": 224809984.0, "5615": 224809984.0, "5620": 224285696.0, "5625": 224285696.0, "5630": 224285696.0, "5635": 224285696.0, "5640": 224285696.0, "5645": 224285696.0, "5650": 224285696.0, "5655": 224285696.0, "5660": 224285696.0, "5665": 224285696.0, "5670": 224285696.0, "5675": 224285696.0, "5680": 224285696.0, "5685": 224285696.0, "5690": 224809984.0, "5695": 224285696.0, "5700": 224285696.0, "5705": 224285696.0, "5710": 224809984.0, "5715": 224285696.0, "5720": 224285696.0, "5725": 224285696.0, "5730": 224285696.0, "5735": 225334272.0, "5740": 224285696.0, "5745": 224285696.0, "5750": 224285696.0, "5755": 224285696.0, "5760": 224285696.0, "5765": 224285696.0, "5770": 224285696.0, "5775": 224285696.0, "5780": 224285696.0, "5785": 224285696.0, "5790": 224285696.0, "5795": 224285696.0, "5800": 224285696.0, "5805": 224285696.0, "5810": 224285696.0, "5815": 224285696.0, "5820": 224285696.0, "5825": 224285696.0, "5830": 224285696.0, "5835": 224285696.0, "5840": 224285696.0, "5845": 224285696.0, "5850": 224285696.0, "5855": 224285696.0, "5860": 224285696.0, "5865": 224285696.0, "5870": 225334272.0, "5875": 225334272.0, "5880": 224285696.0, "5885": 224285696.0, "5890": 224285696.0, "5895": 224285696.0, "5900": 224285696.0, "5905": 224285696.0, "5910": 224285696.0, "5915": 224809984.0, "5920": 224285696.0, "5925": 224285696.0, "5930": 224285696.0, "5935": 224285696.0, "5940": 224285696.0, "5945": 224285696.0, "5950": 224285696.0, "5955": 224809984.0, "5960": 224678912.0, "5965": 224285696.0, "5970": 224285696.0, "5975": 224285696.0, "5980": 224285696.0, "5985": 224285696.0, "5990": 224285696.0, "5995": 224285696.0, "6000": 224285696.0, "6005": 224285696.0, "6010": 224285696.0, "6015": 224285696.0, "6020": 224285696.0, "6025": 224285696.0, "6030": 224285696.0, "6035": 224285696.0, "6040": 225334272.0, "6045": 225334272.0, "6050": 224809984.0, "6055": 224285696.0, "6060": 224285696.0, "6065": 224285696.0, "6070": 224285696.0, "6075": 224285696.0, "6080": 224285696.0, "6085": 224285696.0, "6090": 224285696.0, "6095": 224285696.0, "6100": 224285696.0, "6105": 224285696.0, "6110": 224285696.0, "6115": 224285696.0, "6120": 224285696.0, "6125": 224285696.0, "6130": 224285696.0, "6135": 224285696.0, "6140": 224285696.0, "6145": 224285696.0, "6150": 224285696.0, "6155": 224285696.0, "6160": 224285696.0, "6165": 224285696.0, "6170": 224285696.0, "6175": 224285696.0, "6180": 224809984.0, "6185": 224285696.0, "6190": 224285696.0, "6195": 224285696.0, "6200": 224285696.0, "6205": 224285696.0, "6210": 224285696.0, "6215": 224285696.0, "6220": 224285696.0, "6225": 224285696.0, "6230": 224285696.0, "6235": 224285696.0, "6240": 224285696.0, "6245": 224285696.0, "6250": 224809984.0, "6255": 224285696.0, "6260": 224285696.0, "6265": 224285696.0, "6270": 224285696.0, "6275": 224809984.0, "6280": 224285696.0, "6285": 224285696.0, "6290": 224285696.0, "6295": 224809984.0, "6300": 224285696.0, "6305": 224285696.0, "6310": 224285696.0, "6315": 224285696.0, "6320": 224285696.0, "6325": 224285696.0, "6330": 224285696.0, "6335": 224285696.0, "6340": 224285696.0, "6345": 224285696.0, "6350": 224285696.0, "6355": 224285696.0, "6360": 224285696.0, "6365": 224285696.0, "6370": 224809984.0, "6375": 224285696.0, "6380": 224285696.0, "6385": 224285696.0, "6390": 224285696.0, "6395": 224285696.0, "6400": 224285696.0, "6405": 224285696.0, "6410": 224285696.0, "6415": 224285696.0, "6420": 224285696.0, "6425": 224285696.0, "6430": 224285696.0, "6435": 224285696.0, "6440": 224285696.0, "6445": 224285696.0, "6450": 224285696.0, "6455": 224285696.0, "6460": 224285696.0, "6465": 224285696.0, "6470": 224285696.0, "6475": 225334272.0, "6480": 224285696.0, "6485": 224285696.0, "6490": 224285696.0, "6495": 224285696.0, "6500": 224285696.0, "6505": 224285696.0, "6510": 224285696.0, "6515": 224285696.0, "6520": 224285696.0, "6525": 224809984.0, "6530": 224285696.0, "6535": 224809984.0, "6540": 224285696.0, "6545": 224285696.0, "6550": 224285696.0, "6555": 224285696.0, "6560": 225334272.0, "6565": 224285696.0, "6570": 224285696.0, "6575": 224809984.0, "6580": 224285696.0, "6585": 224285696.0, "6590": 224285696.0, "6595": 224285696.0, "6600": 224809984.0, "6605": 224285696.0, "6610": 224285696.0, "6615": 224285696.0, "6620": 224285696.0, "6625": 224285696.0, "6630": 225334272.0, "6635": 225334272.0, "6640": 224285696.0, "6645": 224285696.0, "6650": 224285696.0, "6655": 224285696.0, "6660": 225334272.0, "6665": 224809984.0, "6670": 224285696.0, "6675": 225334272.0, "6680": 224285696.0, "6685": 224285696.0, "6690": 224285696.0, "6695": 224285696.0, "6700": 224285696.0, "6705": 224285696.0, "6710": 224285696.0, "6715": 224285696.0, "6720": 224285696.0, "6725": 224285696.0, "6730": 224285696.0, "6735": 224285696.0, "6740": 224285696.0, "6745": 224285696.0, "6750": 224285696.0, "6755": 224285696.0, "6760": 224285696.0, "6765": 224285696.0, "6770": 224809984.0, "6775": 224285696.0, "6780": 224285696.0, "6785": 224285696.0, "6790": 224285696.0, "6795": 224285696.0, "6800": 224285696.0, "6805": 224285696.0, "6810": 224285696.0, "6815": 224285696.0, "6820": 224285696.0, "6825": 224285696.0, "6830": 224285696.0, "6835": 224285696.0, "6840": 224285696.0, "6845": 224285696.0, "6850": 224285696.0, "6855": 224285696.0, "6860": 224285696.0, "6865": 224285696.0, "6870": 224285696.0, "6875": 224285696.0, "6880": 224285696.0, "6885": 224285696.0, "6890": 224285696.0, "6895": 224285696.0, "6900": 224285696.0, "6905": 224285696.0, "6910": 224285696.0, "6915": 224285696.0, "6920": 224285696.0, "6925": 224285696.0, "6930": 224285696.0, "6935": 224285696.0, "6940": 224285696.0, "6945": 224285696.0, "6950": 224285696.0, "6955": 224285696.0, "6960": 224285696.0, "6965": 224285696.0, "6970": 224285696.0, "6975": 224285696.0, "6980": 224285696.0, "6985": 224285696.0, "6990": 224285696.0, "6995": 224285696.0, "7000": 224285696.0, "7005": 224285696.0, "7010": 224285696.0, "7015": 224285696.0, "7020": 224285696.0, "7025": 224285696.0, "7030": 224809984.0, "7035": 224285696.0, "7040": 224285696.0, "7045": 224285696.0, "7050": 224285696.0, "7055": 225334272.0, "7060": 224285696.0, "7065": 224285696.0, "7070": 224285696.0, "7075": 224285696.0, "7080": 224285696.0, "7085": 224285696.0, "7090": 224285696.0, "7095": 224285696.0, "7100": 224285696.0, "7105": 224285696.0, "7110": 224285696.0, "7115": 224285696.0, "7120": 224285696.0, "7125": 224285696.0, "7130": 225334272.0, "7135": 224285696.0, "7140": 224285696.0, "7145": 224285696.0, "7150": 224285696.0, "7155": 224285696.0, "7160": 224285696.0, "7165": 224285696.0, "7170": 224285696.0, "7175": 224285696.0, "7180": 224285696.0, "7185": 224285696.0, "7190": 224809984.0, "7195": 224285696.0, "7200": 224285696.0, "7205": 224285696.0, "7210": 224285696.0, "7215": 224285696.0, "7220": 224678912.0, "7225": 224285696.0, "7230": 224285696.0, "7235": 224285696.0, "7240": 224809984.0, "7245": 225334272.0, "7250": 224285696.0, "7255": 224285696.0, "7260": 224285696.0, "7265": 224285696.0, "7270": 224678912.0, "7275": 224285696.0, "7280": 224285696.0, "7285": 224285696.0, "7290": 224285696.0, "7295": 224285696.0, "7300": 224285696.0, "7305": 224285696.0, "7310": 224285696.0, "7315": 224285696.0, "7320": 224285696.0, "7325": 224285696.0, "7330": 224809984.0, "7335": 224809984.0, "7340": 224678912.0, "7345": 224678912.0, "7350": 224285696.0, "7355": 224285696.0, "7360": 224285696.0, "7365": 224285696.0, "7370": 224285696.0, "7375": 224285696.0, "7380": 224285696.0, "7385": 224285696.0, "7390": 224285696.0, "7395": 224285696.0, "7400": 224285696.0, "7405": 224809984.0, "7410": 224285696.0, "7415": 224285696.0, "7420": 224809984.0, "7425": 224285696.0, "7430": 224285696.0, "7435": 224285696.0, "7440": 224285696.0, "7445": 224285696.0, "7450": 224285696.0, "7455": 224809984.0, "7460": 224285696.0, "7465": 224285696.0, "7470": 224285696.0, "7475": 224285696.0, "7480": 224285696.0, "7485": 224285696.0, "7490": 224285696.0, "7495": 224285696.0, "7500": 224285696.0, "7505": 224285696.0, "7510": 224809984.0, "7515": 224285696.0, "7520": 224285696.0, "7525": 224285696.0, "7530": 224809984.0, "7535": 224809984.0, "7540": 224285696.0, "7545": 224285696.0, "7550": 224809984.0, "7555": 224285696.0, "7560": 224285696.0, "7565": 224285696.0, "7570": 224285696.0, "7575": 224678912.0, "7580": 224285696.0, "7585": 224285696.0, "7590": 224285696.0, "7595": 224285696.0, "7600": 224285696.0, "7605": 224285696.0, "7610": 224285696.0, "7615": 224809984.0, "7620": 224285696.0, "7625": 224285696.0, "7630": 224285696.0, "7635": 224285696.0, "7640": 224285696.0, "7645": 224285696.0, "7650": 224285696.0, "7655": 224285696.0, "7660": 224285696.0, "7665": 224809984.0, "7670": 224285696.0, "7675": 224285696.0, "7680": 224285696.0, "7685": 224809984.0, "7690": 224285696.0, "7695": 224285696.0, "7700": 224285696.0, "7705": 224285696.0, "7710": 224285696.0, "7715": 224285696.0, "7720": 225334272.0, "7725": 224285696.0, "7730": 224285696.0, "7735": 224285696.0, "7740": 224285696.0, "7745": 224285696.0, "7750": 224285696.0, "7755": 224285696.0, "7760": 224285696.0, "7765": 225334272.0, "7770": 224285696.0, "7775": 224285696.0, "7780": 224285696.0, "7785": 224285696.0, "7790": 224285696.0, "7795": 224285696.0, "7800": 224285696.0, "7805": 224285696.0, "7810": 224285696.0, "7815": 224285696.0, "7820": 224285696.0, "7825": 224285696.0, "7830": 225334272.0, "7835": 224285696.0, "7840": 224285696.0, "7845": 224285696.0, "7850": 224285696.0, "7855": 224809984.0, "7860": 224809984.0, "7865": 225334272.0, "7870": 224285696.0, "7875": 224285696.0, "7880": 224285696.0, "7885": 224285696.0, "7890": 224285696.0, "7895": 224285696.0, "7900": 224285696.0, "7905": 224285696.0, "7910": 224285696.0, "7915": 224285696.0, "7920": 224285696.0, "7925": 224809984.0, "7930": 224285696.0, "7935": 224285696.0, "7940": 224285696.0, "7945": 224285696.0, "7950": 224285696.0, "7955": 224285696.0, "7960": 224285696.0, "7965": 225334272.0, "7970": 224285696.0, "7975": 224285696.0, "7980": 224285696.0, "7985": 224285696.0, "7990": 224809984.0, "7995": 224285696.0, "8000": 224285696.0, "8005": 224285696.0, "8010": 224285696.0, "8015": 224285696.0, "8020": 224285696.0, "8025": 224285696.0, "8030": 224285696.0, "8035": 224285696.0, "8040": 224285696.0, "8045": 224285696.0, "8050": 224285696.0, "8055": 224285696.0, "8060": 224285696.0, "8065": 224285696.0, "8070": 224285696.0, "8075": 224285696.0, "8080": 224285696.0, "8085": 224285696.0, "8090": 224285696.0, "8095": 224285696.0, "8100": 224285696.0, "8105": 224285696.0, "8110": 224285696.0, "8115": 224809984.0, "8120": 224285696.0, "8125": 224285696.0, "8130": 224285696.0, "8135": 224285696.0, "8140": 224285696.0, "8145": 224285696.0, "8150": 224285696.0, "8155": 224285696.0, "8160": 224285696.0, "8165": 224285696.0, "8170": 224285696.0, "8175": 224285696.0, "8180": 224285696.0, "8185": 224285696.0, "8190": 224678912.0, "8195": 224285696.0, "8200": 224285696.0, "8205": 224809984.0, "8210": 224285696.0, "8215": 224285696.0, "8220": 224285696.0, "8225": 224285696.0, "8230": 224285696.0, "8235": 224285696.0, "8240": 224285696.0, "8245": 225334272.0, "8250": 224285696.0, "8255": 224285696.0, "8260": 224285696.0, "8265": 224285696.0, "8270": 224285696.0, "8275": 224285696.0, "8280": 224285696.0, "8285": 224285696.0, "8290": 224285696.0, "8295": 224285696.0, "8300": 225334272.0, "8305": 224285696.0, "8310": 224285696.0, "8315": 224285696.0, "8320": 224285696.0, "8325": 224285696.0, "8330": 224285696.0, "8335": 224285696.0, "8340": 224285696.0, "8345": 224285696.0, "8350": 224285696.0, "8355": 224285696.0, "8360": 224285696.0, "8365": 224285696.0, "8370": 224809984.0, "8375": 224285696.0, "8380": 224285696.0, "8385": 224285696.0, "8390": 224285696.0, "8395": 224285696.0, "8400": 224285696.0, "8405": 224285696.0, "8410": 224285696.0, "8415": 224285696.0, "8420": 224285696.0, "8425": 224285696.0, "8430": 224285696.0, "8435": 224285696.0, "8440": 224285696.0, "8445": 224285696.0, "8450": 224285696.0, "8455": 224285696.0, "8460": 224285696.0, "8465": 224285696.0, "8470": 224285696.0, "8475": 224285696.0, "8480": 224285696.0, "8485": 224285696.0, "8490": 224285696.0, "8495": 224285696.0, "8500": 224285696.0, "8505": 224285696.0, "8510": 224285696.0, "8515": 224809984.0, "8520": 224285696.0, "8525": 224285696.0, "8530": 224285696.0, "8535": 224285696.0, "8540": 224285696.0, "8545": 224285696.0, "8550": 224285696.0, "8555": 224285696.0, "8560": 224285696.0, "8565": 224285696.0, "8570": 224285696.0, "8575": 224285696.0, "8580": 224285696.0, "8585": 224285696.0, "8590": 224285696.0, "8595": 224285696.0, "8600": 224285696.0, "8605": 224285696.0, "8610": 224285696.0, "8615": 224285696.0, "8620": 224285696.0, "8625": 224285696.0, "8630": 224285696.0, "8635": 224809984.0, "8640": 224285696.0, "8645": 225334272.0, "8650": 224285696.0, "8655": 224285696.0, "8660": 224285696.0, "8665": 224285696.0, "8670": 224285696.0, "8675": 224285696.0, "8680": 224285696.0, "8685": 224285696.0, "8690": 224285696.0, "8695": 224285696.0, "8700": 224285696.0, "8705": 224285696.0, "8710": 224285696.0, "8715": 224285696.0, "8720": 224285696.0, "8725": 224285696.0, "8730": 224285696.0, "8735": 224285696.0, "8740": 224285696.0, "8745": 224285696.0, "8750": 224285696.0, "8755": 224285696.0, "8760": 224809984.0, "8765": 224285696.0, "8770": 224285696.0, "8775": 224285696.0, "8780": 224285696.0, "8785": 224285696.0, "8790": 224285696.0, "8795": 224285696.0, "8800": 224285696.0, "8805": 224285696.0, "8810": 224809984.0, "8815": 224285696.0, "8820": 224809984.0, "8825": 224285696.0, "8830": 224285696.0, "8835": 224285696.0, "8840": 224285696.0, "8845": 224285696.0, "8850": 224285696.0, "8855": 224285696.0, "8860": 224285696.0, "8865": 224285696.0, "8870": 224285696.0, "8875": 224285696.0, "8880": 225334272.0, "8885": 224285696.0, "8890": 224285696.0, "8895": 224285696.0, "8900": 224285696.0, "8905": 224285696.0, "8910": 224809984.0, "8915": 224285696.0, "8920": 224285696.0, "8925": 224285696.0, "8930": 224285696.0, "8935": 224285696.0, "8940": 224285696.0, "8945": 224285696.0, "8950": 224285696.0, "8955": 224285696.0, "8960": 224285696.0, "8965": 224285696.0, "8970": 224285696.0, "8975": 224285696.0, "8980": 224285696.0, "8985": 224809984.0, "8990": 224809984.0, "8995": 224285696.0, "9000": 224285696.0, "9005": 224285696.0, "9010": 224285696.0, "9015": 224285696.0, "9020": 224285696.0, "9025": 224809984.0, "9030": 224285696.0, "9035": 224285696.0, "9040": 224285696.0, "9045": 224285696.0, "9050": 224285696.0, "9055": 224285696.0, "9060": 224285696.0, "9065": 224285696.0, "9070": 224285696.0, "9075": 224285696.0, "9080": 224285696.0, "9085": 224285696.0, "9090": 224285696.0, "9095": 224285696.0, "9100": 224285696.0, "9105": 225334272.0, "9110": 224285696.0, "9115": 224285696.0, "9120": 224285696.0, "9125": 224285696.0, "9130": 225334272.0, "9135": 224285696.0, "9140": 224285696.0, "9145": 224285696.0, "9150": 224285696.0, "9155": 224285696.0, "9160": 224285696.0, "9165": 224285696.0, "9170": 224285696.0, "9175": 224285696.0, "9180": 224285696.0, "9185": 224285696.0, "9190": 224285696.0, "9195": 224285696.0, "9200": 224285696.0, "9205": 224285696.0, "9210": 224285696.0, "9215": 224285696.0, "9220": 224285696.0, "9225": 224285696.0, "9230": 224285696.0, "9235": 224285696.0, "9240": 224285696.0, "9245": 224285696.0, "9250": 224285696.0, "9255": 224285696.0, "9260": 224285696.0, "9265": 224285696.0, "9270": 224285696.0, "9275": 224285696.0, "9280": 224285696.0, "9285": 224285696.0, "9290": 224285696.0, "9295": 224809984.0, "9300": 224285696.0, "9305": 224285696.0, "9310": 224285696.0, "9315": 224285696.0, "9320": 224285696.0, "9325": 224285696.0, "9330": 224285696.0, "9335": 224285696.0, "9340": 224285696.0, "9345": 225334272.0, "9350": 224285696.0, "9355": 224285696.0, "9360": 224285696.0, "9365": 224285696.0, "9370": 224285696.0, "9375": 224285696.0, "9380": 224285696.0, "9385": 224285696.0, "9390": 224285696.0, "9395": 224285696.0, "9400": 224285696.0, "9405": 224285696.0, "9410": 224285696.0, "9415": 224285696.0, "9420": 224285696.0, "9425": 225334272.0, "9430": 224285696.0, "9435": 224285696.0, "9440": 224285696.0, "9445": 224285696.0, "9450": 224285696.0, "9455": 224285696.0, "9460": 224285696.0, "9465": 224285696.0, "9470": 224678912.0, "9475": 224285696.0, "9480": 224285696.0, "9485": 224809984.0, "9490": 224285696.0, "9495": 224285696.0, "9500": 224285696.0, "9505": 224285696.0, "9510": 224285696.0, "9515": 224285696.0, "9520": 224285696.0, "9525": 224285696.0, "9530": 224285696.0, "9535": 224285696.0, "9540": 224285696.0, "9545": 224285696.0, "9550": 224809984.0, "9555": 224809984.0, "9560": 224285696.0, "9565": 224285696.0, "9570": 224285696.0, "9575": 224285696.0, "9580": 224809984.0, "9585": 224285696.0, "9590": 224285696.0, "9595": 224285696.0, "9600": 224285696.0, "9605": 224285696.0, "9610": 224285696.0, "9615": 224285696.0, "9620": 224285696.0, "9625": 224285696.0, "9630": 224285696.0, "9635": 224285696.0, "9640": 224285696.0, "9645": 224285696.0, "9650": 224285696.0, "9655": 224285696.0, "9660": 224285696.0, "9665": 224285696.0, "9670": 224285696.0, "9675": 224285696.0, "9680": 224285696.0, "9685": 224285696.0, "9690": 224809984.0, "9695": 224809984.0, "9700": 224285696.0, "9705": 224285696.0, "9710": 224285696.0, "9715": 224285696.0, "9720": 224285696.0, "9725": 224285696.0, "9730": 224285696.0, "9735": 224285696.0, "9740": 224285696.0, "9745": 224285696.0, "9750": 224285696.0, "9755": 224285696.0, "9760": 224285696.0, "9765": 224285696.0, "9770": 224285696.0, "9775": 224809984.0, "9780": 224285696.0, "9785": 224285696.0, "9790": 224285696.0, "9795": 224285696.0, "9800": 224285696.0, "9805": 224285696.0, "9810": 224285696.0, "9815": 224809984.0, "9820": 224285696.0, "9825": 224285696.0, "9830": 224285696.0, "9835": 224285696.0, "9840": 224285696.0, "9845": 224285696.0, "9850": 224285696.0, "9855": 224809984.0, "9860": 224285696.0, "9865": 224285696.0, "9870": 224285696.0, "9875": 224285696.0, "9880": 224285696.0, "9885": 224285696.0, "9890": 224285696.0, "9895": 224285696.0, "9900": 224285696.0, "9905": 224285696.0, "9910": 224285696.0, "9915": 224285696.0, "9920": 224285696.0, "9925": 224285696.0, "9930": 224285696.0, "9935": 224285696.0, "9940": 224285696.0, "9945": 224285696.0, "9950": 224285696.0, "9955": 224285696.0, "9960": 224285696.0, "9965": 224285696.0, "9970": 224285696.0, "9975": 224285696.0, "9980": 224285696.0, "9985": 224285696.0, "9990": 224285696.0, "9995": 224285696.0, "10000": 225334272.0, "10005": 224285696.0, "10010": 224285696.0, "10015": 224285696.0, "10020": 224285696.0, "10025": 224285696.0, "10030": 224285696.0, "10035": 224285696.0, "10040": 224285696.0, "10045": 224285696.0, "10050": 224285696.0, "10055": 224809984.0, "10060": 224285696.0, "10065": 224285696.0, "10070": 224285696.0, "10075": 224285696.0, "10080": 224285696.0, "10085": 224285696.0, "10090": 224285696.0, "10095": 224285696.0, "10100": 224285696.0, "10105": 224285696.0, "10110": 224285696.0, "10115": 224285696.0, "10120": 224285696.0, "10125": 224285696.0, "10130": 224285696.0, "10135": 224285696.0, "10140": 224809984.0, "10145": 224285696.0, "10150": 225334272.0, "10155": 224285696.0, "10160": 224285696.0, "10165": 224285696.0, "10170": 224285696.0, "10175": 224809984.0, "10180": 224809984.0, "10185": 224285696.0, "10190": 224285696.0, "10195": 224285696.0, "10200": 224285696.0, "10205": 224285696.0, "10210": 224285696.0, "10215": 224285696.0, "10220": 224285696.0, "10225": 224285696.0, "10230": 224285696.0, "10235": 224285696.0, "10240": 224285696.0, "10245": 224285696.0, "10250": 224678912.0, "10255": 224285696.0, "10260": 224285696.0, "10265": 224285696.0, "10270": 224285696.0, "10275": 224285696.0, "10280": 224285696.0, "10285": 224285696.0, "10290": 224285696.0, "10295": 224285696.0, "10300": 224285696.0, "10305": 224285696.0, "10310": 224285696.0, "10315": 224285696.0, "10320": 224678912.0, "10325": 224285696.0, "10330": 224285696.0, "10335": 224285696.0, "10340": 224285696.0, "10345": 224285696.0, "10350": 224285696.0, "10355": 224678912.0, "10360": 224285696.0, "10365": 224285696.0, "10370": 224285696.0, "10375": 224809984.0, "10380": 224285696.0, "10385": 224285696.0, "10390": 224285696.0, "10395": 224285696.0, "10400": 224285696.0, "10405": 224678912.0, "10410": 224285696.0, "10415": 224285696.0, "10420": 224285696.0, "10425": 224285696.0, "10430": 224285696.0, "10435": 224285696.0, "10440": 224285696.0, "10445": 224285696.0, "10450": 224285696.0, "10455": 224285696.0, "10460": 224285696.0, "10465": 224285696.0, "10470": 224285696.0, "10475": 224285696.0, "10480": 224285696.0, "10485": 224285696.0, "10490": 224809984.0, "10495": 224809984.0, "10500": 224285696.0, "10505": 224285696.0, "10510": 224285696.0, "10515": 224285696.0, "10520": 224285696.0, "10525": 224285696.0, "10530": 224285696.0, "10535": 224285696.0, "10540": 224285696.0, "10545": 224809984.0, "10550": 224285696.0, "10555": 224285696.0, "10560": 224285696.0, "10565": 224285696.0, "10570": 224285696.0, "10575": 224809984.0, "10580": 224285696.0, "10585": 224285696.0, "10590": 224285696.0, "10595": 224285696.0, "10600": 224285696.0, "10605": 224285696.0, "10610": 224285696.0, "10615": 224285696.0, "10620": 224285696.0, "10625": 224285696.0, "10630": 224285696.0, "10635": 224285696.0, "10640": 224285696.0, "10645": 224285696.0, "10650": 224809984.0, "10655": 225334272.0, "10660": 224285696.0, "10665": 224285696.0, "10670": 224285696.0, "10675": 224285696.0, "10680": 224285696.0, "10685": 224285696.0, "10690": 224285696.0, "10695": 224285696.0, "10700": 224285696.0, "10705": 224285696.0, "10710": 224285696.0, "10715": 224285696.0, "10720": 224285696.0, "10725": 224285696.0, "10730": 224285696.0, "10735": 224285696.0, "10740": 224809984.0, "10745": 224809984.0, "10750": 224285696.0, "10755": 224285696.0, "10760": 224285696.0, "10765": 224285696.0, "10770": 224285696.0, "10775": 224285696.0, "10780": 224285696.0, "10785": 224285696.0, "10790": 224285696.0, "10795": 224285696.0, "10800": 224285696.0, "10805": 224285696.0, "10810": 224285696.0, "10815": 224285696.0, "10820": 224285696.0, "10825": 224285696.0, "10830": 224285696.0, "10835": 224285696.0, "10840": 224285696.0, "10845": 224285696.0, "10850": 224285696.0, "10855": 224285696.0, "10860": 224285696.0, "10865": 224285696.0, "10870": 224285696.0, "10875": 224285696.0, "10880": 224285696.0, "10885": 224285696.0, "10890": 224809984.0, "10895": 224285696.0, "10900": 224285696.0, "10905": 224285696.0, "10910": 225334272.0, "10915": 224285696.0, "10920": 224285696.0, "10925": 224285696.0, "10930": 225334272.0, "10935": 224285696.0, "10940": 224285696.0, "10945": 224285696.0, "10950": 224285696.0, "10955": 224285696.0, "10960": 224285696.0, "10965": 224678912.0, "10970": 224809984.0, "10975": 224678912.0, "10980": 224809984.0, "10985": 224809984.0, "10990": 224285696.0, "10995": 224285696.0, "11000": 224285696.0, "11005": 224285696.0, "11010": 224285696.0, "11015": 224285696.0, "11020": 224809984.0, "11025": 224809984.0, "11030": 224285696.0, "11035": 224285696.0, "11040": 224809984.0, "11045": 224285696.0, "11050": 224285696.0, "11055": 224285696.0, "11060": 224285696.0, "11065": 224285696.0, "11070": 224285696.0, "11075": 224809984.0, "11080": 224809984.0, "11085": 224285696.0, "11090": 224285696.0, "11095": 224285696.0, "11100": 224285696.0, "11105": 224285696.0, "11110": 224285696.0, "11115": 224285696.0, "11120": 224285696.0, "11125": 224285696.0, "11130": 224285696.0, "11135": 224285696.0, "11140": 224285696.0, "11145": 224285696.0, "11150": 224285696.0, "11155": 224285696.0, "11160": 224285696.0, "11165": 224285696.0, "11170": 224285696.0, "11175": 224285696.0, "11180": 224285696.0, "11185": 224285696.0, "11190": 224285696.0, "11195": 224809984.0, "11200": 224285696.0, "11205": 224285696.0, "11210": 224285696.0, "11215": 224285696.0, "11220": 224285696.0, "11225": 224285696.0, "11230": 224285696.0, "11235": 224285696.0, "11240": 224285696.0, "11245": 224285696.0, "11250": 224285696.0, "11255": 224285696.0, "11260": 224285696.0, "11265": 224285696.0, "11270": 224678912.0, "11275": 224285696.0, "11280": 224285696.0, "11285": 224285696.0, "11290": 224285696.0, "11295": 225334272.0, "11300": 224285696.0, "11305": 224285696.0, "11310": 224285696.0, "11315": 224285696.0, "11320": 224809984.0, "11325": 224285696.0, "11330": 224809984.0, "11335": 224285696.0, "11340": 224285696.0, "11345": 224285696.0, "11350": 224285696.0, "11355": 224285696.0, "11360": 224285696.0, "11365": 224285696.0, "11370": 224285696.0, "11375": 224285696.0, "11380": 224285696.0, "11385": 224285696.0, "11390": 224285696.0, "11395": 224285696.0, "11400": 224285696.0, "11405": 224285696.0, "11410": 224285696.0, "11415": 224285696.0, "11420": 224285696.0, "11425": 224285696.0, "11430": 224285696.0, "11435": 224285696.0, "11440": 224285696.0, "11445": 224285696.0, "11450": 224285696.0, "11455": 224285696.0, "11460": 224285696.0, "11465": 224285696.0, "11470": 224285696.0, "11475": 224285696.0, "11480": 224285696.0, "11485": 224285696.0, "11490": 224285696.0, "11495": 224285696.0, "11500": 224285696.0, "11505": 224285696.0, "11510": 224285696.0, "11515": 224285696.0, "11520": 224285696.0, "11525": 224285696.0, "11530": 224285696.0, "11535": 224285696.0, "11540": 224285696.0, "11545": 224285696.0, "11550": 224809984.0, "11555": 224285696.0, "11560": 225334272.0, "11565": 224285696.0, "11570": 224285696.0, "11575": 224285696.0, "11580": 224285696.0, "11585": 224285696.0, "11590": 224285696.0, "11595": 224285696.0, "11600": 224809984.0, "11605": 224285696.0, "11610": 224285696.0, "11615": 224285696.0, "11620": 224809984.0, "11625": 224285696.0, "11630": 224285696.0, "11635": 224285696.0, "11640": 224285696.0, "11645": 224809984.0, "11650": 224285696.0, "11655": 224285696.0, "11660": 224285696.0, "11665": 224285696.0, "11670": 224285696.0, "11675": 224285696.0, "11680": 224285696.0, "11685": 224285696.0, "11690": 224285696.0, "11695": 224285696.0, "11700": 224285696.0, "11705": 224285696.0, "11710": 224285696.0, "11715": 224285696.0, "11720": 224285696.0, "11725": 224285696.0, "11730": 224285696.0, "11735": 224285696.0, "11740": 224285696.0, "11745": 224285696.0, "11750": 224809984.0, "11755": 225334272.0, "11760": 224285696.0, "11765": 224285696.0, "11770": 224285696.0, "11775": 224285696.0, "11780": 224285696.0, "11785": 224809984.0, "11790": 224285696.0, "11795": 224285696.0, "11800": 224285696.0, "11805": 224809984.0, "11810": 224285696.0, "11815": 224285696.0, "11820": 224285696.0, "11825": 224285696.0, "11830": 224809984.0, "11835": 224285696.0, "11840": 224285696.0, "11845": 224285696.0, "11850": 224285696.0, "11855": 224285696.0, "11860": 224285696.0, "11865": 224285696.0, "11870": 224285696.0, "11875": 224285696.0, "11880": 224285696.0, "11885": 224285696.0, "11890": 224285696.0, "11895": 224285696.0, "11900": 224285696.0, "11905": 224285696.0, "11910": 224285696.0, "11915": 224285696.0, "11920": 224285696.0, "11925": 224285696.0, "11930": 224285696.0, "11935": 224285696.0, "11940": 224285696.0, "11945": 224285696.0, "11950": 224285696.0, "11955": 224285696.0, "11960": 224285696.0, "11965": 224285696.0, "11970": 224285696.0, "11975": 224285696.0, "11980": 224285696.0, "11985": 224285696.0, "11990": 224285696.0, "11995": 224285696.0, "12000": 224285696.0, "12005": 224285696.0, "12010": 224285696.0, "12015": 224285696.0, "12020": 224285696.0, "12025": 224285696.0, "12030": 224285696.0, "12035": 224285696.0, "12040": 224285696.0, "12045": 224285696.0, "12050": 224285696.0, "12055": 224285696.0, "12060": 224285696.0, "12065": 225334272.0, "12070": 224285696.0, "12075": 224285696.0, "12080": 224285696.0, "12085": 224285696.0, "12090": 224285696.0, "12095": 224285696.0, "12100": 224285696.0, "12105": 224285696.0, "12110": 224285696.0, "12115": 224285696.0, "12120": 224285696.0, "12125": 224285696.0, "12130": 224285696.0, "12135": 224809984.0, "12140": 224285696.0, "12145": 224678912.0, "12150": 224285696.0, "12155": 224285696.0, "12160": 224285696.0, "12165": 224285696.0, "12170": 224285696.0, "12175": 224285696.0, "12180": 224285696.0, "12185": 224285696.0, "12190": 224285696.0, "12195": 224285696.0, "12200": 225334272.0, "12205": 224285696.0, "12210": 224285696.0, "12215": 224285696.0, "12220": 224285696.0, "12225": 224285696.0, "12230": 224285696.0, "12235": 224285696.0, "12240": 224809984.0, "12245": 224285696.0, "12250": 224285696.0, "12255": 224285696.0, "12260": 224285696.0, "12265": 224285696.0, "12270": 224285696.0, "12275": 224285696.0, "12280": 224285696.0, "12285": 224285696.0, "12290": 224285696.0, "12295": 224285696.0, "12300": 224285696.0, "12305": 224285696.0, "12310": 224809984.0, "12315": 224285696.0, "12320": 224285696.0, "12325": 224285696.0, "12330": 224285696.0, "12335": 224285696.0, "12340": 224809984.0, "12345": 224285696.0, "12350": 224285696.0, "12355": 224285696.0, "12360": 224285696.0, "12365": 224285696.0, "12370": 224285696.0, "12375": 224285696.0, "12380": 224285696.0, "12385": 224809984.0, "12390": 224285696.0, "12395": 224285696.0, "12400": 224285696.0, "12405": 224285696.0, "12410": 224285696.0, "12415": 224285696.0, "12420": 224285696.0, "12425": 224285696.0, "12430": 224285696.0, "12435": 224285696.0, "12440": 224285696.0, "12445": 225334272.0, "12450": 224285696.0, "12455": 224285696.0, "12460": 224285696.0, "12465": 224285696.0, "12470": 224285696.0, "12475": 224285696.0, "12480": 224285696.0, "12485": 224285696.0, "12490": 224285696.0, "12495": 224285696.0, "12500": 224285696.0, "12505": 224285696.0, "12510": 224285696.0, "12515": 224285696.0, "12520": 224285696.0, "12525": 224285696.0, "12530": 224285696.0, "12535": 224285696.0, "12540": 224285696.0, "12545": 224285696.0, "12550": 224285696.0, "12555": 224285696.0, "12560": 224285696.0, "12565": 224809984.0, "12570": 224285696.0, "12575": 224285696.0, "12580": 224285696.0, "12585": 224285696.0, "12590": 224285696.0, "12595": 224285696.0, "12600": 224285696.0, "12605": 224285696.0, "12610": 224285696.0, "12615": 224285696.0, "12620": 224285696.0, "12625": 224285696.0, "12630": 224285696.0, "12635": 224285696.0, "12640": 224285696.0, "12645": 224285696.0, "12650": 224285696.0, "12655": 224285696.0, "12660": 225334272.0, "12665": 224285696.0, "12670": 224809984.0, "12675": 224285696.0, "12680": 224809984.0, "12685": 224285696.0, "12690": 224285696.0, "12695": 224285696.0, "12700": 224285696.0, "12705": 224285696.0, "12710": 224285696.0, "12715": 224285696.0, "12720": 224285696.0, "12725": 224285696.0, "12730": 224285696.0, "12735": 224285696.0, "12740": 224285696.0, "12745": 224285696.0, "12750": 224285696.0, "12755": 224809984.0, "12760": 224285696.0, "12765": 224285696.0, "12770": 224285696.0, "12775": 224285696.0, "12780": 224285696.0, "12785": 224285696.0, "12790": 224285696.0, "12795": 224285696.0, "12800": 224285696.0, "12805": 224285696.0, "12810": 224285696.0, "12815": 224285696.0, "12820": 224285696.0, "12825": 224285696.0, "12830": 224285696.0, "12835": 224285696.0, "12840": 224285696.0, "12845": 224285696.0, "12850": 224285696.0, "12855": 224285696.0, "12860": 224285696.0, "12865": 224285696.0, "12870": 224285696.0, "12875": 224285696.0, "12880": 224285696.0, "12885": 224285696.0, "12890": 224285696.0, "12895": 224285696.0, "12900": 224285696.0, "12905": 224285696.0, "12910": 224285696.0, "12915": 224285696.0, "12920": 224285696.0, "12925": 224285696.0, "12930": 224285696.0, "12935": 224285696.0, "12940": 224285696.0, "12945": 224285696.0, "12950": 224285696.0, "12955": 224285696.0, "12960": 224809984.0, "12965": 224285696.0, "12970": 224285696.0, "12975": 224285696.0, "12980": 224285696.0, "12985": 224285696.0, "12990": 224285696.0, "12995": 224285696.0, "13000": 224285696.0, "13005": 224285696.0, "13010": 224285696.0, "13015": 225334272.0, "13020": 224285696.0, "13025": 224285696.0, "13030": 224285696.0, "13035": 224285696.0, "13040": 224285696.0, "13045": 224285696.0, "13050": 224285696.0, "13055": 224285696.0, "13060": 224285696.0, "13065": 224285696.0, "13070": 224285696.0, "13075": 224285696.0, "13080": 224285696.0, "13085": 224285696.0, "13090": 224285696.0, "13095": 224285696.0, "13100": 225334272.0, "13105": 225334272.0, "13110": 224285696.0, "13115": 224809984.0, "13120": 224285696.0, "13125": 224285696.0, "13130": 224285696.0, "13135": 224285696.0, "13140": 224285696.0, "13145": 224285696.0, "13150": 224285696.0, "13155": 224285696.0, "13160": 224285696.0, "13165": 224285696.0, "13170": 224285696.0, "13175": 224285696.0, "13180": 224285696.0, "13185": 224285696.0, "13190": 224285696.0, "13195": 224285696.0, "13200": 224285696.0, "13205": 224285696.0, "13210": 224285696.0, "13215": 224285696.0, "13220": 224285696.0, "13225": 224285696.0, "13230": 224809984.0, "13235": 224285696.0, "13240": 224285696.0, "13245": 224285696.0, "13250": 224285696.0, "13255": 224285696.0, "13260": 224285696.0, "13265": 224285696.0, "13270": 224285696.0, "13275": 224285696.0, "13280": 224285696.0, "13285": 224285696.0, "13290": 224285696.0, "13295": 224285696.0, "13300": 224285696.0, "13305": 224285696.0, "13310": 224285696.0, "13315": 224285696.0, "13320": 224285696.0, "13325": 224285696.0, "13330": 224285696.0, "13335": 224285696.0, "13340": 224285696.0, "13345": 224285696.0, "13350": 224285696.0, "13355": 224285696.0, "13360": 224285696.0, "13365": 224285696.0, "13370": 224285696.0, "13375": 224285696.0, "13380": 224285696.0, "13385": 224285696.0, "13390": 224285696.0, "13395": 224285696.0, "13400": 224285696.0, "13405": 224285696.0, "13410": 224285696.0, "13415": 224285696.0, "13420": 224285696.0, "13425": 224285696.0, "13430": 224809984.0, "13435": 224285696.0, "13440": 224285696.0, "13445": 224285696.0, "13450": 224285696.0, "13455": 224285696.0, "13460": 224285696.0, "13465": 224285696.0, "13470": 224285696.0, "13475": 224285696.0, "13480": 224285696.0, "13485": 224285696.0, "13490": 224285696.0, "13495": 224285696.0, "13500": 224285696.0, "13505": 224285696.0, "13510": 224285696.0, "13515": 224809984.0, "13520": 224285696.0, "13525": 224285696.0, "13530": 224285696.0, "13535": 224285696.0, "13540": 224285696.0, "13545": 224285696.0, "13550": 224285696.0, "13555": 224285696.0, "13560": 224285696.0, "13565": 224285696.0, "13570": 224285696.0, "13575": 224285696.0, "13580": 224285696.0, "13585": 224285696.0, "13590": 224285696.0, "13595": 224285696.0, "13600": 224285696.0, "13605": 225334272.0, "13610": 224285696.0, "13615": 224285696.0, "13620": 224285696.0, "13625": 224285696.0, "13630": 224285696.0, "13635": 224285696.0, "13640": 224285696.0, "13645": 224285696.0, "13650": 224285696.0, "13655": 224285696.0, "13660": 224285696.0, "13665": 224285696.0, "13670": 224285696.0, "13675": 224285696.0, "13680": 224285696.0, "13685": 224285696.0, "13690": 224285696.0, "13695": 224285696.0, "13700": 224285696.0, "13705": 224285696.0, "13710": 224678912.0, "13715": 224285696.0, "13720": 224285696.0, "13725": 224285696.0, "13730": 224285696.0, "13735": 224285696.0, "13740": 224285696.0, "13745": 224285696.0, "13750": 224285696.0, "13755": 224285696.0, "13760": 225334272.0, "13765": 224285696.0, "13770": 224285696.0, "13775": 224285696.0, "13780": 224285696.0, "13785": 224285696.0, "13790": 224285696.0, "13795": 224285696.0, "13800": 224285696.0, "13805": 224285696.0, "13810": 224285696.0, "13815": 224285696.0, "13820": 224285696.0, "13825": 224285696.0, "13830": 224809984.0, "13835": 224285696.0, "13840": 224285696.0, "13845": 224285696.0, "13850": 224285696.0, "13855": 224285696.0, "13860": 224285696.0, "13865": 224285696.0, "13870": 224285696.0, "13875": 224285696.0, "13880": 224285696.0, "13885": 225334272.0, "13890": 224285696.0, "13895": 224285696.0, "13900": 224285696.0, "13905": 224285696.0, "13910": 224285696.0, "13915": 224285696.0, "13920": 224285696.0, "13925": 224285696.0, "13930": 224285696.0, "13935": 224285696.0, "13940": 224809984.0, "13945": 224285696.0, "13950": 224285696.0, "13955": 224285696.0, "13960": 224285696.0, "13965": 224285696.0, "13970": 224285696.0, "13975": 224285696.0, "13980": 224285696.0, "13985": 224285696.0, "13990": 224285696.0, "13995": 224285696.0, "14000": 224285696.0, "14005": 224285696.0, "14010": 224285696.0, "14015": 224285696.0, "14020": 224285696.0, "14025": 224809984.0, "14030": 224285696.0, "14035": 224285696.0, "14040": 224285696.0, "14045": 225334272.0, "14050": 225334272.0, "14055": 224285696.0, "14060": 224285696.0, "14065": 224285696.0, "14070": 224285696.0, "14075": 224285696.0, "14080": 224285696.0, "14085": 224285696.0, "14090": 224285696.0, "14095": 224285696.0, "14100": 224809984.0, "14105": 224285696.0, "14110": 224285696.0, "14115": 224285696.0, "14120": 224285696.0, "14125": 224285696.0, "14130": 224285696.0, "14135": 224285696.0, "14140": 224285696.0, "14145": 224285696.0, "14150": 224285696.0, "14155": 224285696.0, "14160": 224285696.0, "14165": 224285696.0, "14170": 224285696.0, "14175": 224285696.0, "14180": 225334272.0, "14185": 224285696.0, "14190": 225334272.0, "14195": 224285696.0, "14200": 224285696.0, "14205": 224285696.0, "14210": 224285696.0, "14215": 224285696.0, "14220": 224285696.0, "14225": 224285696.0, "14230": 224285696.0, "14235": 224285696.0, "14240": 224285696.0, "14245": 224285696.0, "14250": 224285696.0, "14255": 224285696.0, "14260": 224285696.0, "14265": 224285696.0, "14270": 224285696.0, "14275": 224285696.0, "14280": 225334272.0, "14285": 224285696.0, "14290": 224809984.0, "14295": 224809984.0, "14300": 224285696.0, "14305": 224285696.0, "14310": 224285696.0, "14315": 225334272.0, "14320": 224809984.0, "14325": 224285696.0, "14330": 224285696.0, "14335": 224285696.0, "14340": 224285696.0, "14345": 224809984.0, "14350": 224809984.0, "14355": 224809984.0, "14360": 225334272.0, "14365": 224285696.0, "14370": 224285696.0, "14375": 224285696.0, "14380": 225334272.0, "14385": 224285696.0, "14390": 224285696.0, "14395": 224285696.0, "14400": 224285696.0, "14405": 224285696.0, "14410": 224285696.0, "14415": 224285696.0, "14420": 224285696.0, "14425": 224285696.0, "14430": 224285696.0, "14435": 224285696.0, "14440": 224809984.0, "14445": 225334272.0, "14450": 224285696.0, "14455": 224809984.0, "14460": 224285696.0, "14465": 225334272.0, "14470": 224285696.0, "14475": 224285696.0, "14480": 224285696.0, "14485": 224285696.0, "14490": 224285696.0, "14495": 224285696.0, "14500": 224285696.0, "14505": 224285696.0, "14510": 224809984.0, "14515": 224285696.0, "14520": 224285696.0, "14525": 224285696.0, "14530": 224285696.0, "14535": 224285696.0, "14540": 224285696.0, "14545": 224285696.0, "14550": 224285696.0, "14555": 224809984.0, "14560": 224285696.0, "14565": 224285696.0, "14570": 224285696.0, "14575": 224285696.0, "14580": 224285696.0, "14585": 224285696.0, "14590": 224285696.0, "14595": 224285696.0, "14600": 224285696.0, "14605": 224809984.0, "14610": 224285696.0, "14615": 224285696.0, "14620": 224285696.0, "14625": 224285696.0, "14630": 224285696.0, "14635": 224285696.0, "14640": 224285696.0, "14645": 224285696.0, "14650": 224285696.0, "14655": 224285696.0, "14660": 224285696.0, "14665": 224285696.0, "14670": 224285696.0, "14675": 224285696.0, "14680": 224285696.0, "14685": 224285696.0, "14690": 224285696.0, "14695": 224809984.0, "14700": 224285696.0, "14705": 224285696.0, "14710": 224285696.0, "14715": 224809984.0, "14720": 224285696.0, "14725": 224285696.0, "14730": 224285696.0, "14735": 224809984.0, "14740": 224285696.0, "14745": 224285696.0, "14750": 224285696.0, "14755": 224285696.0, "14760": 224285696.0, "14765": 224285696.0, "14770": 224285696.0, "14775": 224285696.0, "14780": 224285696.0, "14785": 224285696.0, "14790": 224285696.0, "14795": 224285696.0, "14800": 224285696.0, "14805": 224285696.0, "14810": 224285696.0, "14815": 224285696.0, "14820": 224285696.0, "14825": 224285696.0, "14830": 224285696.0, "14835": 224285696.0, "14840": 224285696.0, "14845": 224809984.0, "14850": 224809984.0, "14855": 224285696.0, "14860": 224285696.0, "14865": 224285696.0, "14870": 224285696.0, "14875": 225334272.0, "14880": 224285696.0, "14885": 224285696.0, "14890": 224285696.0, "14895": 224285696.0, "14900": 224285696.0, "14905": 224285696.0, "14910": 224285696.0, "14915": 224285696.0, "14920": 224285696.0, "14925": 224285696.0, "14930": 224285696.0, "14935": 224809984.0, "14940": 224285696.0, "14945": 224285696.0, "14950": 224809984.0, "14955": 225334272.0, "14960": 224285696.0, "14965": 224285696.0, "14970": 224285696.0, "14975": 224285696.0, "14980": 225334272.0, "14985": 224285696.0, "14990": 224285696.0, "14995": 224285696.0, "15000": 224285696.0, "15005": 224285696.0, "15010": 224285696.0, "15015": 224285696.0, "15020": 224285696.0, "15025": 224285696.0, "15030": 224678912.0, "15035": 224285696.0, "15040": 224285696.0, "15045": 224285696.0, "15050": 224809984.0, "15055": 224285696.0, "15060": 224285696.0, "15065": 224285696.0, "15070": 224809984.0, "15075": 224285696.0, "15080": 224809984.0, "15085": 225334272.0, "15090": 224809984.0, "15095": 224285696.0, "15100": 224285696.0, "15105": 224285696.0, "15110": 224285696.0, "15115": 224285696.0, "15120": 224809984.0, "15125": 224285696.0, "15130": 224285696.0, "15135": 224285696.0, "15140": 224285696.0, "15145": 224285696.0, "15150": 224285696.0, "15155": 224285696.0, "15160": 224285696.0, "15165": 224285696.0, "15170": 224285696.0, "15175": 224285696.0, "15180": 224678912.0, "15185": 224285696.0, "15190": 224285696.0, "15195": 224285696.0, "15200": 224285696.0, "15205": 224285696.0, "15210": 224285696.0, "15215": 224285696.0, "15220": 224809984.0, "15225": 224285696.0, "15230": 224678912.0, "15235": 224285696.0, "15240": 224285696.0, "15245": 224285696.0, "15250": 224285696.0, "15255": 224285696.0, "15260": 224285696.0, "15265": 224285696.0, "15270": 224285696.0, "15275": 224285696.0, "15280": 224285696.0, "15285": 224285696.0, "15290": 224285696.0, "15295": 224285696.0, "15300": 224809984.0, "15305": 224285696.0, "15310": 224809984.0, "15315": 224809984.0, "15320": 224285696.0, "15325": 224285696.0, "15330": 225334272.0, "15335": 225334272.0, "15340": 224285696.0, "15345": 224285696.0, "15350": 224285696.0, "15355": 224285696.0, "15360": 224285696.0, "15365": 224809984.0, "15370": 224285696.0, "15375": 224809984.0, "15380": 224285696.0, "15385": 224285696.0, "15390": 224285696.0, "15395": 224809984.0, "15400": 224285696.0, "15405": 224285696.0, "15410": 224285696.0, "15415": 224285696.0, "15420": 224809984.0, "15425": 224285696.0, "15430": 224285696.0, "15435": 224285696.0, "15440": 224809984.0, "15445": 224285696.0, "15450": 224285696.0, "15455": 224285696.0, "15460": 224285696.0, "15465": 224285696.0, "15470": 224285696.0, "15475": 224285696.0, "15480": 224285696.0, "15485": 224285696.0, "15490": 224285696.0, "15495": 224285696.0, "15500": 224285696.0, "15505": 224285696.0, "15510": 224285696.0, "15515": 224285696.0, "15520": 224285696.0, "15525": 224285696.0, "15530": 224809984.0, "15535": 224809984.0, "15540": 224285696.0, "15545": 224285696.0, "15550": 224285696.0, "15555": 224285696.0, "15560": 224285696.0, "15565": 224285696.0, "15570": 224285696.0, "15575": 224285696.0, "15580": 224285696.0, "15585": 224285696.0, "15590": 224285696.0, "15595": 224285696.0, "15600": 224285696.0, "15605": 224809984.0, "15610": 224285696.0, "15615": 224285696.0, "15620": 224285696.0, "15625": 224285696.0, "15630": 224285696.0, "15635": 224809984.0, "15640": 224285696.0, "15645": 224285696.0, "15650": 224285696.0, "15655": 224285696.0, "15660": 224285696.0, "15665": 224285696.0, "15670": 224285696.0, "15675": 224285696.0, "15680": 225334272.0, "15685": 224285696.0, "15690": 224285696.0, "15695": 224285696.0, "15700": 224285696.0, "15705": 224285696.0, "15710": 224809984.0, "15715": 224809984.0, "15720": 224809984.0, "15725": 224285696.0, "15730": 224809984.0, "15735": 224285696.0, "15740": 224285696.0, "15745": 224285696.0, "15750": 224285696.0, "15755": 224285696.0, "15760": 224285696.0, "15765": 224285696.0, "15770": 224285696.0, "15775": 224285696.0, "15780": 224285696.0, "15785": 224285696.0, "15790": 224285696.0, "15795": 224285696.0, "15800": 224285696.0, "15805": 224285696.0, "15810": 224285696.0, "15815": 224285696.0, "15820": 224285696.0, "15825": 224285696.0, "15830": 224809984.0, "15835": 224285696.0, "15840": 224285696.0, "15845": 224285696.0, "15850": 224285696.0, "15855": 224285696.0, "15860": 224285696.0, "15865": 224285696.0, "15870": 224285696.0, "15875": 224285696.0, "15880": 224285696.0, "15885": 224285696.0, "15890": 224285696.0, "15895": 224285696.0, "15900": 224285696.0, "15905": 224285696.0, "15910": 224285696.0, "15915": 224285696.0, "15920": 224809984.0, "15925": 224809984.0, "15930": 224285696.0, "15935": 224285696.0, "15940": 224285696.0, "15945": 224285696.0, "15950": 224285696.0, "15955": 224285696.0, "15960": 224285696.0, "15965": 224285696.0, "15970": 224678912.0, "15975": 224285696.0, "15980": 224285696.0, "15985": 224285696.0, "15990": 224285696.0, "15995": 224285696.0, "16000": 224285696.0, "16005": 224285696.0, "16010": 224285696.0, "16015": 224285696.0, "16020": 224285696.0, "16025": 224285696.0, "16030": 224285696.0, "16035": 224285696.0, "16040": 224285696.0, "16045": 224285696.0, "16050": 224285696.0, "16055": 224285696.0, "16060": 224285696.0, "16065": 224285696.0, "16070": 224285696.0, "16075": 224285696.0, "16080": 224285696.0, "16085": 224285696.0, "16090": 224285696.0, "16095": 224285696.0, "16100": 224285696.0, "16105": 224285696.0, "16110": 224809984.0, "16115": 224809984.0, "16120": 224285696.0, "16125": 224285696.0, "16130": 224285696.0, "16135": 224285696.0, "16140": 224285696.0, "16145": 224809984.0, "16150": 224285696.0, "16155": 224285696.0, "16160": 224285696.0, "16165": 224285696.0, "16170": 224285696.0, "16175": 224285696.0, "16180": 224285696.0, "16185": 224285696.0, "16190": 224285696.0, "16195": 224809984.0, "16200": 225334272.0, "16205": 224285696.0, "16210": 224285696.0, "16215": 224285696.0, "16220": 224285696.0, "16225": 224285696.0, "16230": 224285696.0, "16235": 224285696.0, "16240": 224285696.0, "16245": 224285696.0, "16250": 224285696.0, "16255": 224285696.0, "16260": 225334272.0, "16265": 224285696.0, "16270": 224678912.0, "16275": 224285696.0, "16280": 224285696.0, "16285": 224285696.0, "16290": 224285696.0, "16295": 224285696.0, "16300": 224285696.0, "16305": 224285696.0, "16310": 224285696.0, "16315": 224285696.0, "16320": 224285696.0, "16325": 224285696.0, "16330": 224285696.0, "16335": 224809984.0, "16340": 224285696.0, "16345": 224285696.0, "16350": 224285696.0, "16355": 224285696.0, "16360": 224285696.0, "16365": 224678912.0, "16370": 224285696.0, "16375": 225334272.0, "16380": 224285696.0, "16385": 224285696.0, "16390": 224285696.0, "16395": 224285696.0, "16400": 224285696.0, "16405": 224285696.0, "16410": 224285696.0, "16415": 224809984.0, "16420": 224285696.0, "16425": 224809984.0, "16430": 224285696.0, "16435": 224285696.0, "16440": 224285696.0, "16445": 224678912.0, "16450": 224285696.0, "16455": 224285696.0, "16460": 224285696.0, "16465": 224285696.0, "16470": 224285696.0, "16475": 224809984.0, "16480": 224285696.0, "16485": 224285696.0, "16490": 224285696.0, "16495": 224809984.0, "16500": 224809984.0, "16505": 224285696.0, "16510": 224285696.0, "16515": 224285696.0, "16520": 224285696.0, "16525": 224285696.0, "16530": 224285696.0, "16535": 224285696.0, "16540": 224809984.0, "16545": 224285696.0, "16550": 224285696.0, "16555": 224285696.0, "16560": 224285696.0, "16565": 224285696.0, "16570": 224285696.0, "16575": 224285696.0, "16580": 224809984.0, "16585": 224809984.0, "16590": 224678912.0, "16595": 224285696.0, "16600": 224285696.0, "16605": 224285696.0, "16610": 224285696.0, "16615": 224285696.0, "16620": 224285696.0, "16625": 224285696.0, "16630": 224285696.0, "16635": 224285696.0, "16640": 224285696.0, "16645": 224809984.0, "16650": 224809984.0, "16655": 224285696.0, "16660": 224285696.0, "16665": 224285696.0, "16670": 224285696.0, "16675": 224285696.0, "16680": 224285696.0, "16685": 224285696.0, "16690": 224285696.0, "16695": 224285696.0, "16700": 224285696.0, "16705": 224285696.0, "16710": 224285696.0, "16715": 224285696.0, "16720": 224809984.0, "16725": 224285696.0, "16730": 224285696.0, "16735": 224285696.0, "16740": 224285696.0, "16745": 224285696.0, "16750": 224809984.0, "16755": 224809984.0, "16760": 224285696.0, "16765": 224285696.0, "16770": 224285696.0, "16775": 224285696.0, "16780": 224285696.0, "16785": 224285696.0, "16790": 224285696.0, "16795": 224285696.0, "16800": 224285696.0, "16805": 224285696.0, "16810": 224285696.0, "16815": 224285696.0, "16820": 224285696.0, "16825": 224809984.0, "16830": 224285696.0, "16835": 224285696.0, "16840": 224285696.0, "16845": 224285696.0, "16850": 224285696.0, "16855": 224285696.0, "16860": 224285696.0, "16865": 224285696.0, "16870": 224285696.0, "16875": 224285696.0, "16880": 224285696.0, "16885": 224285696.0, "16890": 224285696.0, "16895": 224809984.0, "16900": 224285696.0, "16905": 224285696.0, "16910": 224285696.0, "16915": 224285696.0, "16920": 224285696.0, "16925": 224285696.0, "16930": 224285696.0, "16935": 224285696.0, "16940": 224285696.0, "16945": 224285696.0, "16950": 224285696.0, "16955": 224809984.0, "16960": 224809984.0, "16965": 224285696.0, "16970": 224285696.0, "16975": 224285696.0, "16980": 224285696.0, "16985": 224285696.0, "16990": 224285696.0, "16995": 224285696.0, "17000": 224809984.0, "17005": 224285696.0, "17010": 224285696.0, "17015": 224809984.0, "17020": 224285696.0, "17025": 224285696.0, "17030": 224285696.0, "17035": 224285696.0, "17040": 224285696.0, "17045": 224285696.0, "17050": 224285696.0, "17055": 224285696.0, "17060": 224285696.0, "17065": 224285696.0, "17070": 224285696.0, "17075": 224285696.0, "17080": 224285696.0, "17085": 224285696.0, "17090": 224285696.0, "17095": 224285696.0, "17100": 224285696.0, "17105": 224285696.0, "17110": 224285696.0, "17115": 224285696.0, "17120": 224285696.0, "17125": 224285696.0, "17130": 224678912.0, "17135": 224285696.0, "17140": 224285696.0, "17145": 224285696.0, "17150": 224285696.0, "17155": 224285696.0, "17160": 224285696.0, "17165": 224285696.0, "17170": 224285696.0, "17175": 224285696.0, "17180": 224285696.0, "17185": 224285696.0, "17190": 224809984.0, "17195": 224285696.0, "17200": 224285696.0, "17205": 224285696.0, "17210": 224285696.0, "17215": 224285696.0, "17220": 224285696.0, "17225": 224285696.0, "17230": 224285696.0, "17235": 224285696.0, "17240": 224285696.0, "17245": 224285696.0, "17250": 224285696.0, "17255": 224809984.0, "17260": 224285696.0, "17265": 224285696.0, "17270": 224285696.0, "17275": 224809984.0, "17280": 224285696.0, "17285": 224285696.0, "17290": 224285696.0, "17295": 224285696.0, "17300": 224809984.0, "17305": 224285696.0, "17310": 224285696.0, "17315": 224678912.0, "17320": 224285696.0, "17325": 224285696.0, "17330": 224285696.0, "17335": 224285696.0, "17340": 224285696.0, "17345": 224285696.0, "17350": 224285696.0, "17355": 224285696.0, "17360": 224285696.0, "17365": 224809984.0, "17370": 224285696.0, "17375": 225334272.0, "17380": 224809984.0, "17385": 224809984.0, "17390": 224285696.0, "17395": 224285696.0, "17400": 224285696.0, "17405": 224285696.0, "17410": 224285696.0, "17415": 224285696.0, "17420": 224285696.0, "17425": 224285696.0, "17430": 224285696.0, "17435": 224285696.0, "17440": 224809984.0, "17445": 224285696.0, "17450": 224809984.0, "17455": 224285696.0, "17460": 224809984.0, "17465": 224285696.0, "17470": 224285696.0, "17475": 224809984.0, "17480": 224809984.0, "17485": 224285696.0, "17490": 224285696.0, "17495": 224285696.0, "17500": 224285696.0, "17505": 224285696.0, "17510": 224285696.0, "17515": 224285696.0, "17520": 224285696.0, "17525": 224285696.0, "17530": 224285696.0, "17535": 224809984.0, "17540": 224285696.0, "17545": 224285696.0, "17550": 224285696.0, "17555": 224285696.0, "17560": 224285696.0, "17565": 224285696.0, "17570": 224285696.0, "17575": 224285696.0, "17580": 224809984.0, "17585": 225334272.0, "17590": 224285696.0, "17595": 224285696.0, "17600": 224285696.0, "17605": 224285696.0, "17610": 224285696.0, "17615": 224285696.0, "17620": 224285696.0, "17625": 224285696.0, "17630": 224809984.0, "17635": 224285696.0, "17640": 224285696.0, "17645": 224285696.0, "17650": 224285696.0, "17655": 224285696.0, "17660": 224285696.0, "17665": 224285696.0, "17670": 224285696.0, "17675": 224809984.0, "17680": 224809984.0, "17685": 224285696.0, "17690": 224285696.0, "17695": 224285696.0, "17700": 224285696.0, "17705": 224285696.0, "17710": 224285696.0, "17715": 224285696.0, "17720": 224809984.0, "17725": 224285696.0, "17730": 224285696.0, "17735": 224285696.0, "17740": 224285696.0, "17745": 224285696.0, "17750": 224285696.0, "17755": 224285696.0, "17760": 224285696.0, "17765": 224285696.0, "17770": 224285696.0, "17775": 224809984.0, "17780": 224285696.0, "17785": 224809984.0, "17790": 224285696.0, "17795": 224285696.0, "17800": 224285696.0, "17805": 224285696.0, "17810": 224285696.0, "17815": 224285696.0, "17820": 224809984.0, "17825": 224285696.0, "17830": 224285696.0, "17835": 224285696.0, "17840": 224285696.0, "17845": 224809984.0, "17850": 224285696.0, "17855": 224285696.0, "17860": 224809984.0, "17865": 224285696.0, "17870": 224285696.0, "17875": 224285696.0, "17880": 224285696.0, "17885": 224285696.0, "17890": 224285696.0, "17895": 224285696.0, "17900": 224285696.0, "17905": 224285696.0, "17910": 224285696.0, "17915": 224285696.0, "17920": 224285696.0, "17925": 224285696.0, "17930": 224285696.0, "17935": 224285696.0, "17940": 224809984.0, "17945": 224285696.0, "17950": 224285696.0, "17955": 224285696.0, "17960": 224285696.0, "17965": 224285696.0, "17970": 224285696.0, "17975": 224285696.0, "17980": 224285696.0, "17985": 224285696.0, "17990": 224809984.0, "17995": 224285696.0, "18000": 224678912.0, "18005": 224285696.0, "18010": 224285696.0, "18015": 224285696.0, "18020": 224285696.0, "18025": 224285696.0, "18030": 224285696.0, "18035": 224678912.0, "18040": 224285696.0, "18045": 224285696.0, "18050": 224809984.0, "18055": 224285696.0, "18060": 224285696.0, "18065": 224809984.0, "18070": 224285696.0, "18075": 224285696.0, "18080": 224285696.0, "18085": 224809984.0, "18090": 224285696.0, "18095": 224285696.0, "18100": 225334272.0, "18105": 224285696.0, "18110": 224809984.0, "18115": 224285696.0, "18120": 224285696.0, "18125": 224285696.0, "18130": 224809984.0, "18135": 224285696.0, "18140": 224285696.0, "18145": 224285696.0, "18150": 224285696.0, "18155": 224285696.0, "18160": 224285696.0, "18165": 224285696.0, "18170": 224285696.0, "18175": 224285696.0, "18180": 224285696.0, "18185": 224285696.0, "18190": 224285696.0, "18195": 224285696.0, "18200": 224285696.0, "18205": 224285696.0, "18210": 224285696.0, "18215": 224285696.0, "18220": 224285696.0, "18225": 224285696.0, "18230": 224809984.0, "18235": 224285696.0, "18240": 224809984.0, "18245": 224285696.0, "18250": 224285696.0, "18255": 224285696.0, "18260": 224285696.0, "18265": 224285696.0, "18270": 224809984.0, "18275": 224285696.0, "18280": 224678912.0, "18285": 224285696.0, "18290": 224285696.0, "18295": 224285696.0, "18300": 224285696.0, "18305": 224678912.0, "18310": 224809984.0, "18315": 224285696.0, "18320": 224285696.0, "18325": 224285696.0, "18330": 224285696.0, "18335": 224285696.0, "18340": 224285696.0, "18345": 224285696.0, "18350": 224809984.0, "18355": 224285696.0, "18360": 224285696.0, "18365": 224285696.0, "18370": 224285696.0, "18375": 224285696.0, "18380": 224809984.0, "18385": 224285696.0, "18390": 224285696.0, "18395": 224285696.0, "18400": 224285696.0, "18405": 224285696.0, "18410": 224285696.0, "18415": 224285696.0, "18420": 224809984.0, "18425": 224285696.0, "18430": 224285696.0, "18435": 224285696.0, "18440": 224285696.0, "18445": 224285696.0, "18450": 224285696.0, "18455": 224285696.0, "18460": 224285696.0, "18465": 224285696.0, "18470": 224809984.0, "18475": 224285696.0, "18480": 224285696.0, "18485": 224809984.0, "18490": 224285696.0, "18495": 224285696.0, "18500": 224809984.0, "18505": 224285696.0, "18510": 224285696.0, "18515": 224809984.0, "18520": 224285696.0, "18525": 224285696.0, "18530": 224285696.0, "18535": 224285696.0, "18540": 225334272.0, "18545": 224285696.0, "18550": 224285696.0, "18555": 224285696.0, "18560": 224285696.0, "18565": 224285696.0, "18570": 224678912.0, "18575": 224285696.0, "18580": 224285696.0, "18585": 224285696.0, "18590": 224285696.0, "18595": 224809984.0, "18600": 224285696.0, "18605": 224285696.0, "18610": 224285696.0, "18615": 224285696.0, "18620": 224285696.0, "18625": 224285696.0, "18630": 224285696.0, "18635": 224809984.0, "18640": 224285696.0, "18645": 225334272.0, "18650": 224809984.0, "18655": 224285696.0, "18660": 225334272.0, "18665": 224285696.0, "18670": 224678912.0, "18675": 224285696.0, "18680": 224809984.0, "18685": 224809984.0, "18690": 224285696.0, "18695": 224285696.0, "18700": 224285696.0, "18705": 224285696.0, "18710": 225334272.0, "18715": 224285696.0, "18720": 224285696.0, "18725": 224285696.0, "18730": 224285696.0, "18735": 224285696.0, "18740": 224285696.0, "18745": 224285696.0, "18750": 224678912.0, "18755": 224285696.0, "18760": 224678912.0, "18765": 224285696.0, "18770": 224285696.0, "18775": 224285696.0, "18780": 224285696.0, "18785": 224285696.0, "18790": 224809984.0, "18795": 224285696.0, "18800": 224285696.0, "18805": 224285696.0, "18810": 224285696.0, "18815": 224285696.0, "18820": 224285696.0, "18825": 224285696.0, "18830": 224285696.0, "18835": 224809984.0, "18840": 224285696.0, "18845": 224285696.0, "18850": 224285696.0, "18855": 224809984.0, "18860": 225334272.0, "18865": 224809984.0, "18870": 224285696.0, "18875": 224285696.0, "18880": 224285696.0, "18885": 224285696.0, "18890": 224285696.0, "18895": 224285696.0, "18900": 224285696.0, "18905": 224285696.0, "18910": 224678912.0, "18915": 224285696.0, "18920": 224285696.0, "18925": 224285696.0, "18930": 224285696.0, "18935": 224285696.0, "18940": 224285696.0, "18945": 224285696.0, "18950": 224809984.0, "18955": 224285696.0, "18960": 224285696.0, "18965": 224285696.0, "18970": 224285696.0, "18975": 224285696.0, "18980": 224285696.0, "18985": 224285696.0, "18990": 224678912.0, "18995": 224285696.0, "19000": 224285696.0, "19005": 224285696.0, "19010": 224285696.0, "19015": 224285696.0, "19020": 224285696.0, "19025": 224285696.0, "19030": 224285696.0, "19035": 224285696.0, "19040": 224285696.0, "19045": 224285696.0, "19050": 224285696.0, "19055": 224285696.0, "19060": 224285696.0, "19065": 224285696.0, "19070": 224285696.0, "19075": 224285696.0, "19080": 224285696.0, "19085": 224809984.0, "19090": 224285696.0, "19095": 224285696.0, "19100": 224285696.0, "19105": 224285696.0, "19110": 224809984.0, "19115": 224285696.0, "19120": 224285696.0, "19125": 224285696.0, "19130": 224285696.0, "19135": 224809984.0, "19140": 224285696.0, "19145": 224809984.0, "19150": 224285696.0, "19155": 224285696.0, "19160": 224285696.0, "19165": 224285696.0, "19170": 224285696.0, "19175": 224285696.0, "19180": 224678912.0, "19185": 224285696.0, "19190": 224285696.0, "19195": 224285696.0, "19200": 224285696.0, "19205": 224285696.0, "19210": 224285696.0, "19215": 224285696.0, "19220": 224285696.0, "19225": 224285696.0, "19230": 224285696.0, "19235": 224285696.0, "19240": 224678912.0, "19245": 224809984.0, "19250": 224285696.0, "19255": 224285696.0, "19260": 224285696.0, "19265": 224285696.0, "19270": 224809984.0, "19275": 224285696.0, "19280": 224285696.0, "19285": 224285696.0, "19290": 224285696.0, "19295": 224285696.0, "19300": 224285696.0, "19305": 224285696.0, "19310": 224285696.0, "19315": 224285696.0, "19320": 225334272.0, "19325": 224285696.0, "19330": 224809984.0, "19335": 224809984.0, "19340": 224285696.0, "19345": 224285696.0, "19350": 224809984.0, "19355": 224285696.0, "19360": 224285696.0, "19365": 224285696.0, "19370": 224285696.0, "19375": 224285696.0, "19380": 224285696.0, "19385": 224285696.0, "19390": 224285696.0, "19395": 224809984.0, "19400": 224285696.0, "19405": 224809984.0, "19410": 224285696.0, "19415": 224285696.0, "19420": 224285696.0, "19425": 224285696.0, "19430": 224285696.0, "19435": 224285696.0, "19440": 224285696.0, "19445": 224285696.0, "19450": 224285696.0, "19455": 224285696.0, "19460": 224285696.0, "19465": 224285696.0, "19470": 224285696.0, "19475": 224809984.0, "19480": 224285696.0, "19485": 224285696.0, "19490": 224285696.0, "19495": 224285696.0, "19500": 224285696.0, "19505": 224285696.0, "19510": 224285696.0, "19515": 224809984.0, "19520": 224285696.0, "19525": 224285696.0, "19530": 224285696.0, "19535": 224285696.0, "19540": 224285696.0, "19545": 224809984.0, "19550": 224285696.0, "19555": 224285696.0, "19560": 224285696.0, "19565": 224285696.0, "19570": 224809984.0, "19575": 224285696.0, "19580": 224285696.0, "19585": 224285696.0, "19590": 224285696.0, "19595": 224285696.0, "19600": 224809984.0, "19605": 224285696.0, "19610": 224285696.0, "19615": 224285696.0, "19620": 224285696.0, "19625": 224809984.0, "19630": 224285696.0, "19635": 224285696.0, "19640": 224285696.0, "19645": 224285696.0, "19650": 224285696.0, "19655": 224809984.0, "19660": 224285696.0, "19665": 224285696.0, "19670": 224285696.0, "19675": 224809984.0, "19680": 224285696.0, "19685": 224285696.0, "19690": 224285696.0, "19695": 224809984.0, "19700": 224285696.0, "19705": 224809984.0, "19710": 224285696.0, "19715": 224285696.0, "19720": 224285696.0, "19725": 224285696.0, "19730": 224285696.0, "19735": 224285696.0, "19740": 224285696.0, "19745": 224285696.0, "19750": 224285696.0, "19755": 224285696.0, "19760": 224285696.0, "19765": 224809984.0, "19770": 224285696.0, "19775": 224809984.0, "19780": 224285696.0, "19785": 224285696.0, "19790": 224285696.0, "19795": 224285696.0, "19800": 224285696.0, "19805": 224285696.0, "19810": 224285696.0, "19815": 224809984.0, "19820": 224809984.0, "19825": 224809984.0, "19830": 224809984.0, "19835": 224285696.0, "19840": 224285696.0, "19845": 224285696.0, "19850": 224285696.0, "19855": 224809984.0, "19860": 224285696.0, "19865": 224285696.0, "19870": 224809984.0, "19875": 224809984.0, "19880": 224285696.0, "19885": 224285696.0, "19890": 224285696.0, "19895": 224285696.0, "19900": 224809984.0, "19905": 224285696.0, "19910": 224285696.0, "19915": 224285696.0, "19920": 224285696.0, "19925": 224285696.0, "19930": 224285696.0, "19935": 224285696.0, "19940": 224809984.0, "19945": 224285696.0, "19950": 224809984.0, "19955": 224285696.0, "19960": 224285696.0, "19965": 224285696.0, "19970": 224285696.0, "19975": 224285696.0, "19980": 224809984.0, "19985": 225334272.0, "19990": 224285696.0, "19995": 224285696.0, "20000": 224285696.0, "20005": "nan", "20010": "nan", "20015": "nan", "20020": "nan", "20025": "nan", "20030": "nan", "20035": "nan", "20040": "nan", "20045": "nan", "20050": "nan", "20055": "nan", "20060": "nan", "20065": "nan", "20070": "nan", "20075": "nan", "20080": "nan", "20085": "nan", "20090": "nan", "20095": "nan", "20100": "nan", "20105": "nan", "20110": "nan", "20115": "nan", "20120": "nan", "20125": "nan", "20130": "nan", "20135": "nan", "20140": "nan", "20145": "nan", "20150": "nan", "20155": "nan", "20160": "nan", "20165": "nan", "20170": "nan", "20175": "nan", "20180": "nan", "20185": "nan", "20190": "nan", "20195": "nan", "20200": "nan", "20205": "nan", "20210": "nan", "20215": "nan", "20220": "nan", "20225": "nan", "20230": "nan", "20235": "nan", "20240": "nan", "20245": "nan", "20250": "nan", "20255": "nan", "20260": "nan", "20265": "nan", "20270": "nan", "20275": "nan", "20280": "nan", "20285": "nan", "20290": "nan", "20295": "nan", "20300": "nan", "20305": "nan", "20310": "nan", "20315": "nan", "20320": "nan", "20325": "nan", "20330": "nan", "20335": "nan", "20340": "nan", "20345": "nan", "20350": "nan", "20355": "nan", "20360": "nan", "20365": "nan", "20370": "nan", "20375": "nan", "20380": "nan", "20385": "nan", "20390": "nan", "20395": "nan", "20400": "nan", "20405": "nan", "20410": "nan", "20415": "nan", "20420": "nan", "20425": "nan", "20430": "nan", "20435": "nan", "20440": "nan", "20445": "nan", "20450": "nan", "20455": "nan", "20460": "nan", "20465": "nan", "20470": "nan", "20475": "nan", "20480": "nan", "20485": "nan", "20490": "nan", "20495": "nan", "20500": "nan", "20505": "nan", "20510": "nan", "20515": "nan", "20520": "nan", "20525": "nan", "20530": "nan", "20535": "nan", "20540": "nan", "20545": "nan", "20550": "nan", "20555": "nan", "20560": "nan", "20565": "nan", "20570": "nan", "20575": "nan", "20580": "nan", "20585": "nan", "20590": "nan", "20595": "nan", "20600": "nan", "20605": "nan", "20610": "nan", "20615": "nan", "20620": "nan", "20625": "nan", "20630": "nan", "20635": "nan", "20640": "nan", "20645": "nan", "20650": "nan", "20655": "nan", "20660": "nan", "20665": "nan", "20670": "nan", "20675": "nan", "20680": "nan", "20685": "nan", "20690": "nan", "20695": "nan", "20700": "nan", "20705": "nan", "20710": "nan", "20715": "nan", "20720": "nan", "20725": "nan", "20730": "nan", "20735": "nan", "20740": "nan", "20745": "nan", "20750": "nan", "20755": "nan", "20760": "nan", "20765": "nan", "20770": "nan", "20775": "nan", "20780": "nan", "20785": "nan", "20790": "nan", "20795": "nan", "20800": "nan", "20805": "nan", "20810": "nan", "20815": "nan", "20820": "nan", "20825": "nan", "20830": "nan", "20835": "nan", "20840": "nan", "20845": "nan", "20850": "nan", "20855": "nan", "20860": "nan", "20865": "nan", "20870": "nan", "20875": "nan", "20880": "nan", "20885": "nan", "20890": "nan", "20895": "nan", "20900": "nan", "20905": "nan", "20910": "nan", "20915": "nan", "20920": "nan", "20925": "nan", "20930": "nan", "20935": "nan", "20940": "nan", "20945": "nan", "20950": "nan", "20955": "nan", "20960": "nan", "20965": "nan", "20970": "nan", "20975": "nan", "20980": "nan", "20985": "nan", "20990": "nan", "20995": "nan", "21000": "nan", "21005": "nan", "21010": "nan", "21015": "nan", "21020": "nan", "21025": "nan", "21030": "nan", "21035": "nan", "21040": "nan", "21045": "nan", "21050": "nan", "21055": "nan", "21060": "nan", "21065": "nan", "21070": "nan", "21075": "nan", "21080": "nan", "21085": "nan", "21090": "nan", "21095": "nan", "21100": "nan", "21105": "nan", "21110": "nan", "21115": "nan", "21120": "nan", "21125": "nan", "21130": "nan", "21135": "nan", "21140": "nan", "21145": "nan", "21150": "nan", "21155": "nan", "21160": "nan", "21165": "nan", "21170": "nan", "21175": "nan", "21180": "nan", "21185": "nan", "21190": "nan", "21195": "nan", "21200": "nan", "21205": "nan", "21210": "nan", "21215": "nan", "21220": "nan", "21225": "nan", "21230": "nan", "21235": "nan", "21240": "nan", "21245": "nan", "21250": "nan", "21255": "nan", "21260": "nan", "21265": "nan", "21270": "nan", "21275": "nan", "21280": "nan", "21285": "nan", "21290": "nan", "21295": "nan", "21300": "nan", "21305": "nan", "21310": "nan", "21315": "nan", "21320": "nan", "21325": "nan", "21330": "nan", "21335": "nan", "21340": "nan", "21345": "nan", "21350": "nan", "21355": "nan", "21360": "nan", "21365": "nan", "21370": "nan", "21375": "nan", "21380": "nan", "21385": "nan", "21390": "nan", "21395": "nan", "21400": "nan", "21405": "nan", "21410": "nan", "21415": "nan", "21420": "nan", "21425": "nan", "21430": "nan", "21435": "nan", "21440": "nan", "21445": "nan", "21450": "nan", "21455": "nan", "21460": "nan", "21465": "nan", "21470": "nan", "21475": "nan", "21480": "nan", "21485": "nan", "21490": "nan", "21495": "nan", "21500": "nan", "21505": "nan", "21510": "nan", "21515": "nan", "21520": "nan", "21525": "nan", "21530": "nan", "21535": "nan", "21540": "nan", "21545": "nan", "21550": "nan", "21555": "nan", "21560": "nan", "21565": "nan", "21570": "nan", "21575": "nan", "21580": "nan", "21585": "nan", "21590": "nan", "21595": "nan", "21600": "nan", "21605": "nan", "21610": "nan", "21615": "nan", "21620": "nan", "21625": "nan", "21630": "nan", "21635": "nan", "21640": "nan", "21645": "nan", "21650": "nan", "21655": "nan", "21660": "nan", "21665": "nan", "21670": "nan", "21675": "nan", "21680": "nan", "21685": "nan", "21690": "nan", "21695": "nan", "21700": "nan", "21705": "nan", "21710": "nan", "21715": "nan", "21720": "nan", "21725": "nan", "21730": "nan", "21735": "nan", "21740": "nan", "21745": "nan", "21750": "nan", "21755": "nan", "21760": "nan", "21765": "nan", "21770": "nan", "21775": "nan", "21780": "nan", "21785": "nan", "21790": "nan", "21795": "nan", "21800": "nan", "21805": "nan", "21810": "nan", "21815": "nan", "21820": "nan", "21825": "nan", "21830": "nan", "21835": "nan", "21840": "nan", "21845": "nan", "21850": "nan", "21855": "nan", "21860": "nan", "21865": "nan", "21870": "nan", "21875": "nan", "21880": "nan", "21885": "nan", "21890": "nan", "21895": "nan", "21900": "nan", "21905": "nan", "21910": "nan", "21915": "nan", "21920": "nan", "21925": "nan", "21930": "nan", "21935": "nan", "21940": "nan", "21945": "nan", "21950": "nan", "21955": "nan", "21960": "nan", "21965": "nan", "21970": "nan", "21975": "nan", "21980": "nan", "21985": "nan", "21990": "nan", "21995": "nan", "22000": "nan", "22005": "nan", "22010": "nan", "22015": "nan", "22020": "nan", "22025": "nan", "22030": "nan", "22035": "nan", "22040": "nan", "22045": "nan", "22050": "nan", "22055": "nan", "22060": "nan", "22065": "nan", "22070": "nan", "22075": "nan", "22080": "nan", "22085": "nan", "22090": "nan", "22095": "nan", "22100": "nan", "22105": "nan", "22110": "nan", "22115": "nan", "22120": "nan", "22125": "nan", "22130": "nan", "22135": "nan", "22140": "nan", "22145": "nan", "22150": "nan", "22155": "nan", "22160": "nan", "22165": "nan", "22170": "nan", "22175": "nan", "22180": "nan", "22185": "nan", "22190": "nan", "22195": "nan", "22200": "nan", "22205": "nan", "22210": "nan", "22215": "nan", "22220": "nan", "22225": "nan", "22230": "nan", "22235": "nan", "22240": "nan", "22245": "nan", "22250": "nan", "22255": "nan", "22260": "nan", "22265": "nan", "22270": "nan", "22275": "nan", "22280": "nan", "22285": "nan", "22290": "nan", "22295": "nan", "22300": "nan", "22305": "nan", "22310": "nan", "22315": "nan", "22320": "nan", "22325": "nan", "22330": "nan", "22335": "nan", "22340": "nan", "22345": "nan", "22350": "nan", "22355": "nan", "22360": "nan", "22365": "nan", "22370": "nan", "22375": "nan", "22380": "nan", "22385": "nan", "22390": "nan", "22395": "nan", "22400": "nan", "22405": "nan", "22410": "nan", "22415": "nan", "22420": "nan", "22425": "nan", "22430": "nan", "22435": "nan", "22440": "nan", "22445": "nan", "22450": "nan", "22455": "nan", "22460": "nan", "22465": "nan", "22470": "nan", "22475": "nan", "22480": "nan", "22485": "nan", "22490": "nan", "22495": "nan", "22500": "nan", "22505": "nan", "22510": "nan", "22515": "nan", "22520": "nan", "22525": "nan", "22530": "nan", "22535": "nan", "22540": "nan", "22545": "nan", "22550": "nan", "22555": "nan", "22560": "nan", "22565": "nan", "22570": "nan", "22575": "nan", "22580": "nan", "22585": "nan", "22590": "nan", "22595": "nan", "22600": "nan", "22605": "nan", "22610": "nan", "22615": "nan", "22620": "nan", "22625": "nan", "22630": "nan", "22635": "nan", "22640": "nan", "22645": "nan", "22650": "nan", "22655": "nan", "22660": "nan", "22665": "nan", "22670": "nan", "22675": "nan", "22680": "nan", "22685": "nan", "22690": "nan", "22695": "nan", "22700": "nan", "22705": "nan", "22710": "nan", "22715": "nan", "22720": "nan", "22725": "nan", "22730": "nan", "22735": "nan", "22740": "nan", "22745": "nan", "22750": "nan", "22755": "nan", "22760": "nan", "22765": "nan", "22770": "nan", "22775": "nan", "22780": "nan", "22785": "nan", "22790": "nan", "22795": "nan", "22800": "nan", "22805": "nan", "22810": "nan", "22815": "nan", "22820": "nan", "22825": "nan", "22830": "nan", "22835": "nan", "22840": "nan", "22845": "nan", "22850": "nan", "22855": "nan", "22860": "nan", "22865": "nan", "22870": "nan", "22875": "nan", "22880": "nan", "22885": "nan", "22890": "nan", "22895": "nan", "22900": "nan", "22905": "nan", "22910": "nan", "22915": "nan", "22920": "nan", "22925": "nan", "22930": "nan", "22935": "nan", "22940": "nan", "22945": "nan", "22950": "nan", "22955": "nan", "22960": "nan", "22965": "nan", "22970": "nan", "22975": "nan", "22980": "nan", "22985": "nan", "22990": "nan", "22995": "nan", "23000": "nan", "23005": "nan", "23010": "nan", "23015": "nan", "23020": "nan", "23025": "nan", "23030": "nan", "23035": "nan", "23040": "nan", "23045": "nan", "23050": "nan", "23055": "nan", "23060": "nan", "23065": "nan", "23070": "nan", "23075": "nan", "23080": "nan", "23085": "nan", "23090": "nan", "23095": "nan", "23100": "nan", "23105": "nan", "23110": "nan", "23115": "nan", "23120": "nan", "23125": "nan", "23130": "nan", "23135": "nan", "23140": "nan", "23145": "nan", "23150": "nan", "23155": "nan", "23160": "nan", "23165": "nan", "23170": "nan", "23175": "nan", "23180": "nan", "23185": "nan", "23190": "nan", "23195": "nan", "23200": "nan", "23205": "nan", "23210": "nan", "23215": "nan", "23220": "nan", "23225": "nan", "23230": "nan", "23235": "nan", "23240": "nan", "23245": "nan", "23250": "nan", "23255": "nan", "23260": "nan", "23265": "nan", "23270": "nan", "23275": "nan", "23280": "nan", "23285": "nan", "23290": "nan", "23295": "nan", "23300": "nan", "23305": "nan", "23310": "nan", "23315": "nan", "23320": "nan", "23325": "nan", "23330": "nan", "23335": "nan", "23340": "nan", "23345": "nan", "23350": "nan", "23355": "nan", "23360": "nan", "23365": "nan", "23370": "nan", "23375": "nan", "23380": "nan", "23385": "nan", "23390": "nan", "23395": "nan", "23400": "nan", "23405": "nan", "23410": "nan", "23415": "nan", "23420": "nan", "23425": "nan", "23430": "nan", "23435": "nan", "23440": "nan", "23445": "nan", "23450": "nan", "23455": "nan", "23460": "nan", "23465": "nan", "23470": "nan", "23475": "nan", "23480": "nan", "23485": "nan", "23490": "nan", "23495": "nan", "23500": "nan", "23505": "nan", "23510": "nan", "23515": "nan", "23520": "nan", "23525": "nan", "23530": "nan", "23535": "nan", "23540": "nan", "23545": "nan", "23550": "nan", "23555": "nan", "23560": "nan", "23565": "nan", "23570": "nan", "23575": "nan", "23580": "nan", "23585": "nan", "23590": "nan", "23595": "nan", "23600": "nan", "23605": "nan", "23610": "nan", "23615": "nan", "23620": "nan", "23625": "nan", "23630": "nan", "23635": "nan", "23640": "nan", "23645": "nan", "23650": "nan", "23655": "nan", "23660": "nan", "23665": "nan", "23670": "nan", "23675": "nan", "23680": "nan", "23685": "nan", "23690": "nan", "23695": "nan", "23700": "nan", "23705": "nan", "23710": "nan", "23715": "nan", "23720": "nan", "23725": "nan", "23730": "nan", "23735": "nan", "23740": "nan", "23745": "nan", "23750": "nan", "23755": "nan", "23760": "nan", "23765": "nan", "23770": "nan", "23775": "nan", "23780": "nan", "23785": "nan", "23790": "nan", "23795": "nan", "23800": "nan", "23805": "nan", "23810": "nan", "23815": "nan", "23820": "nan", "23825": "nan", "23830": "nan", "23835": "nan", "23840": "nan", "23845": "nan", "23850": "nan", "23855": "nan", "23860": "nan", "23865": "nan", "23870": "nan", "23875": "nan", "23880": "nan", "23885": "nan", "23890": "nan", "23895": "nan", "23900": "nan", "23905": "nan", "23910": "nan", "23915": "nan", "23920": "nan", "23925": "nan", "23930": "nan", "23935": "nan", "23940": "nan", "23945": "nan", "23950": "nan", "23955": "nan", "23960": "nan", "23965": "nan", "23970": "nan", "23975": "nan", "23980": "nan", "23985": "nan", "23990": "nan", "23995": "nan", "24000": "nan", "24005": "nan", "24010": "nan", "24015": "nan", "24020": "nan", "24025": "nan", "24030": "nan", "24035": "nan", "24040": "nan", "24045": "nan", "24050": "nan", "24055": "nan", "24060": "nan", "24065": "nan", "24070": "nan", "24075": "nan", "24080": "nan", "24085": "nan", "24090": "nan", "24095": "nan", "24100": "nan", "24105": "nan", "24110": "nan", "24115": "nan", "24120": "nan", "24125": "nan", "24130": "nan", "24135": "nan", "24140": "nan", "24145": "nan", "24150": "nan", "24155": "nan", "24160": "nan", "24165": "nan", "24170": "nan", "24175": "nan", "24180": "nan", "24185": "nan", "24190": "nan", "24195": "nan", "24200": "nan", "24205": "nan", "24210": "nan", "24215": "nan", "24220": "nan", "24225": "nan", "24230": "nan", "24235": "nan", "24240": "nan", "24245": "nan", "24250": "nan", "24255": "nan", "24260": "nan", "24265": "nan", "24270": "nan", "24275": "nan", "24280": "nan", "24285": "nan", "24290": "nan", "24295": "nan", "24300": "nan", "24305": "nan", "24310": "nan", "24315": "nan", "24320": "nan", "24325": "nan", "24330": "nan", "24335": "nan", "24340": "nan", "24345": "nan", "24350": "nan", "24355": "nan", "24360": "nan", "24365": "nan", "24370": "nan", "24375": "nan", "24380": "nan", "24385": "nan", "24390": "nan", "24395": "nan", "24400": "nan", "24405": "nan", "24410": "nan", "24415": "nan", "24420": "nan", "24425": "nan", "24430": "nan", "24435": "nan", "24440": "nan", "24445": "nan", "24450": "nan", "24455": "nan", "24460": "nan", "24465": "nan", "24470": "nan", "24475": "nan", "24480": "nan", "24485": "nan", "24490": "nan", "24495": "nan", "24500": "nan", "24505": "nan", "24510": "nan", "24515": "nan", "24520": "nan", "24525": "nan", "24530": "nan", "24535": "nan", "24540": "nan", "24545": "nan", "24550": "nan", "24555": "nan", "24560": "nan", "24565": "nan", "24570": "nan", "24575": "nan", "24580": "nan", "24585": "nan", "24590": "nan", "24595": "nan", "24600": "nan", "24605": "nan", "24610": "nan", "24615": "nan", "24620": "nan", "24625": "nan", "24630": "nan", "24635": "nan", "24640": "nan", "24645": "nan", "24650": "nan", "24655": "nan", "24660": "nan", "24665": "nan", "24670": "nan", "24675": "nan", "24680": "nan", "24685": "nan", "24690": "nan", "24695": "nan", "24700": "nan", "24705": "nan", "24710": "nan", "24715": "nan", "24720": "nan", "24725": "nan", "24730": "nan", "24735": "nan", "24740": "nan", "24745": "nan", "24750": "nan", "24755": "nan", "24760": "nan", "24765": "nan", "24770": "nan", "24775": "nan", "24780": "nan", "24785": "nan", "24790": "nan", "24795": "nan", "24800": "nan", "24805": "nan", "24810": "nan", "24815": "nan", "24820": "nan", "24825": "nan", "24830": "nan", "24835": "nan", "24840": "nan", "24845": "nan", "24850": "nan", "24855": "nan", "24860": "nan", "24865": "nan", "24870": "nan", "24875": "nan", "24880": "nan", "24885": "nan", "24890": "nan", "24895": "nan", "24900": "nan", "24905": "nan", "24910": "nan", "24915": "nan", "24920": "nan", "24925": "nan", "24930": "nan", "24935": "nan", "24940": "nan", "24945": "nan", "24950": "nan", "24955": "nan", "24960": "nan", "24965": "nan", "24970": "nan", "24975": "nan", "24980": "nan", "24985": "nan", "24990": "nan", "24995": "nan", "25000": "nan"}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 25000, "step_interval": 5, "values": {"1": 477404160.0, "5": 477404160.0, "10": 477404160.0, "15": 477404160.0, "20": 477404160.0, "25": 477404160.0, "30": 477404160.0, "35": 477404160.0, "40": 477404160.0, "45": 477404160.0, "50": 477404160.0, "55": 477404160.0, "60": 477404160.0, "65": 477404160.0, "70": 477404160.0, "75": 477404160.0, "80": 477404160.0, "85": 477404160.0, "90": 477404160.0, "95": 477404160.0, "100": 477404160.0, "105": 477404160.0, "110": 477404160.0, "115": 477404160.0, "120": 477404160.0, "125": 477404160.0, "130": 477404160.0, "135": 477404160.0, "140": 477404160.0, "145": 477404160.0, "150": 477404160.0, "155": 477404160.0, "160": 477404160.0, "165": 477404160.0, "170": 477404160.0, "175": 477404160.0, "180": 477404160.0, "185": 477404160.0, "190": 477404160.0, "195": 477404160.0, "200": 477404160.0, "205": 477404160.0, "210": 477404160.0, "215": 477404160.0, "220": 477404160.0, "225": 477404160.0, "230": 477404160.0, "235": 477404160.0, "240": 477404160.0, "245": 477404160.0, "250": 477404160.0, "255": 477404160.0, "260": 477404160.0, "265": 477404160.0, "270": 477404160.0, "275": 477404160.0, "280": 477404160.0, "285": 477404160.0, "290": 477404160.0, "295": 477404160.0, "300": 477404160.0, "305": 477404160.0, "310": 477404160.0, "315": 477404160.0, "320": 477404160.0, "325": 477404160.0, "330": 477404160.0, "335": 477404160.0, "340": 477404160.0, "345": 477404160.0, "350": 477404160.0, "355": 477404160.0, "360": 477404160.0, "365": 477404160.0, "370": 477404160.0, "375": 477404160.0, "380": 477404160.0, "385": 477404160.0, "390": 477404160.0, "395": 477404160.0, "400": 477404160.0, "405": 477404160.0, "410": 477404160.0, "415": 477404160.0, "420": 477404160.0, "425": 477404160.0, "430": 477404160.0, "435": 477404160.0, "440": 477404160.0, "445": 477404160.0, "450": 477404160.0, "455": 477404160.0, "460": 477404160.0, "465": 477404160.0, "470": 477404160.0, "475": 477404160.0, "480": 477404160.0, "485": 477404160.0, "490": 477404160.0, "495": 477404160.0, "500": 477404160.0, "505": 477404160.0, "510": 477404160.0, "515": 477404160.0, "520": 477404160.0, "525": 477404160.0, "530": 477404160.0, "535": 477404160.0, "540": 477404160.0, "545": 477404160.0, "550": 477404160.0, "555": 477404160.0, "560": 477404160.0, "565": 477404160.0, "570": 477404160.0, "575": 477404160.0, "580": 477404160.0, "585": 477404160.0, "590": 477404160.0, "595": 477404160.0, "600": 477404160.0, "605": 477404160.0, "610": 477404160.0, "615": 477404160.0, "620": 477404160.0, "625": 477404160.0, "630": 477404160.0, "635": 477404160.0, "640": 477404160.0, "645": 477404160.0, "650": 477404160.0, "655": 477404160.0, "660": 477404160.0, "665": 477404160.0, "670": 477404160.0, "675": 477404160.0, "680": 477404160.0, "685": 477404160.0, "690": 477404160.0, "695": 477404160.0, "700": 477404160.0, "705": 477404160.0, "710": 477404160.0, "715": 477404160.0, "720": 477404160.0, "725": 477404160.0, "730": 477404160.0, "735": 477404160.0, "740": 477404160.0, "745": 477404160.0, "750": 477404160.0, "755": 477404160.0, "760": 477404160.0, "765": 477404160.0, "770": 477404160.0, "775": 477404160.0, "780": 477404160.0, "785": 477404160.0, "790": 477404160.0, "795": 477404160.0, "800": 477404160.0, "805": 477404160.0, "810": 477404160.0, "815": 477404160.0, "820": 477404160.0, "825": 477404160.0, "830": 477404160.0, "835": 477404160.0, "840": 477404160.0, "845": 477404160.0, "850": 477404160.0, "855": 477404160.0, "860": 477404160.0, "865": 477404160.0, "870": 477404160.0, "875": 477404160.0, "880": 477404160.0, "885": 477404160.0, "890": 477404160.0, "895": 477404160.0, "900": 477404160.0, "905": 477404160.0, "910": 477404160.0, "915": 477404160.0, "920": 477404160.0, "925": 477404160.0, "930": 477404160.0, "935": 477404160.0, "940": 477404160.0, "945": 477404160.0, "950": 477404160.0, "955": 477404160.0, "960": 477404160.0, "965": 477404160.0, "970": 477404160.0, "975": 477404160.0, "980": 477404160.0, "985": 477404160.0, "990": 477404160.0, "995": 477404160.0, "1000": 477404160.0, "1005": 486144000.0, "1010": 486144000.0, "1015": 486144000.0, "1020": 486144000.0, "1025": 486144000.0, "1030": 486144000.0, "1035": 486144000.0, "1040": 486144000.0, "1045": 486144000.0, "1050": 486144000.0, "1055": 486144000.0, "1060": 486144000.0, "1065": 486144000.0, "1070": 486144000.0, "1075": 486144000.0, "1080": 486144000.0, "1085": 486144000.0, "1090": 486144000.0, "1095": 486144000.0, "1100": 486144000.0, "1105": 486144000.0, "1110": 486144000.0, "1115": 486144000.0, "1120": 486144000.0, "1125": 486144000.0, "1130": 486144000.0, "1135": 486144000.0, "1140": 486144000.0, "1145": 486144000.0, "1150": 486144000.0, "1155": 486144000.0, "1160": 486144000.0, "1165": 486144000.0, "1170": 486144000.0, "1175": 486144000.0, "1180": 486144000.0, "1185": 486144000.0, "1190": 486144000.0, "1195": 486144000.0, "1200": 486144000.0, "1205": 486144000.0, "1210": 486144000.0, "1215": 486144000.0, "1220": 486144000.0, "1225": 486144000.0, "1230": 486144000.0, "1235": 486144000.0, "1240": 486144000.0, "1245": 486144000.0, "1250": 486144000.0, "1255": 486144000.0, "1260": 486144000.0, "1265": 486144000.0, "1270": 486144000.0, "1275": 486144000.0, "1280": 486144000.0, "1285": 486144000.0, "1290": 486144000.0, "1295": 486144000.0, "1300": 486144000.0, "1305": 486144000.0, "1310": 486144000.0, "1315": 486144000.0, "1320": 486144000.0, "1325": 486144000.0, "1330": 486144000.0, "1335": 486144000.0, "1340": 486144000.0, "1345": 486144000.0, "1350": 486144000.0, "1355": 486144000.0, "1360": 486144000.0, "1365": 486144000.0, "1370": 486144000.0, "1375": 486144000.0, "1380": 486144000.0, "1385": 486144000.0, "1390": 486144000.0, "1395": 486144000.0, "1400": 486144000.0, "1405": 486144000.0, "1410": 486144000.0, "1415": 486144000.0, "1420": 486144000.0, "1425": 486144000.0, "1430": 486144000.0, "1435": 486144000.0, "1440": 486144000.0, "1445": 486144000.0, "1450": 486144000.0, "1455": 486144000.0, "1460": 486144000.0, "1465": 486144000.0, "1470": 486144000.0, "1475": 486144000.0, "1480": 486144000.0, "1485": 486144000.0, "1490": 486144000.0, "1495": 486144000.0, "1500": 486144000.0, "1505": 486144000.0, "1510": 486144000.0, "1515": 486144000.0, "1520": 486144000.0, "1525": 486144000.0, "1530": 486144000.0, "1535": 486144000.0, "1540": 486144000.0, "1545": 486144000.0, "1550": 486144000.0, "1555": 486144000.0, "1560": 486144000.0, "1565": 486144000.0, "1570": 486144000.0, "1575": 486144000.0, "1580": 486144000.0, "1585": 486144000.0, "1590": 486144000.0, "1595": 486144000.0, "1600": 486144000.0, "1605": 486144000.0, "1610": 486144000.0, "1615": 486144000.0, "1620": 486144000.0, "1625": 486144000.0, "1630": 486144000.0, "1635": 486144000.0, "1640": 486144000.0, "1645": 486144000.0, "1650": 486144000.0, "1655": 486144000.0, "1660": 486144000.0, "1665": 486144000.0, "1670": 486144000.0, "1675": 486144000.0, "1680": 486144000.0, "1685": 486144000.0, "1690": 486144000.0, "1695": 486144000.0, "1700": 486144000.0, "1705": 486144000.0, "1710": 486144000.0, "1715": 486144000.0, "1720": 486144000.0, "1725": 486144000.0, "1730": 486144000.0, "1735": 486144000.0, "1740": 486144000.0, "1745": 486144000.0, "1750": 486144000.0, "1755": 486144000.0, "1760": 486144000.0, "1765": 486144000.0, "1770": 486144000.0, "1775": 486144000.0, "1780": 486144000.0, "1785": 486144000.0, "1790": 486144000.0, "1795": 486144000.0, "1800": 486144000.0, "1805": 486144000.0, "1810": 486144000.0, "1815": 486144000.0, "1820": 486144000.0, "1825": 486144000.0, "1830": 486144000.0, "1835": 486144000.0, "1840": 486144000.0, "1845": 486144000.0, "1850": 486144000.0, "1855": 486144000.0, "1860": 486144000.0, "1865": 486144000.0, "1870": 486144000.0, "1875": 486144000.0, "1880": 486144000.0, "1885": 486144000.0, "1890": 486144000.0, "1895": 486144000.0, "1900": 486144000.0, "1905": 486144000.0, "1910": 486144000.0, "1915": 486144000.0, "1920": 486144000.0, "1925": 486144000.0, "1930": 486144000.0, "1935": 486144000.0, "1940": 486144000.0, "1945": 486144000.0, "1950": 486144000.0, "1955": 486144000.0, "1960": 486144000.0, "1965": 486144000.0, "1970": 486144000.0, "1975": 486144000.0, "1980": 486144000.0, "1985": 486144000.0, "1990": 486144000.0, "1995": 486144000.0, "2000": 486144000.0, "2005": 486144000.0, "2010": 486144000.0, "2015": 486144000.0, "2020": 486144000.0, "2025": 486144000.0, "2030": 486144000.0, "2035": 486144000.0, "2040": 486144000.0, "2045": 486144000.0, "2050": 486144000.0, "2055": 486144000.0, "2060": 486144000.0, "2065": 486144000.0, "2070": 486144000.0, "2075": 486144000.0, "2080": 486144000.0, "2085": 486144000.0, "2090": 486144000.0, "2095": 486144000.0, "2100": 486144000.0, "2105": 486144000.0, "2110": 486144000.0, "2115": 486144000.0, "2120": 486144000.0, "2125": 486144000.0, "2130": 486144000.0, "2135": 486144000.0, "2140": 486144000.0, "2145": 486144000.0, "2150": 486144000.0, "2155": 486144000.0, "2160": 486144000.0, "2165": 486144000.0, "2170": 486144000.0, "2175": 486144000.0, "2180": 486144000.0, "2185": 486144000.0, "2190": 486144000.0, "2195": 486144000.0, "2200": 486144000.0, "2205": 486144000.0, "2210": 486144000.0, "2215": 486144000.0, "2220": 486144000.0, "2225": 486144000.0, "2230": 486144000.0, "2235": 486144000.0, "2240": 486144000.0, "2245": 486144000.0, "2250": 486144000.0, "2255": 486144000.0, "2260": 486144000.0, "2265": 486144000.0, "2270": 486144000.0, "2275": 486144000.0, "2280": 486144000.0, "2285": 486144000.0, "2290": 486144000.0, "2295": 486144000.0, "2300": 486144000.0, "2305": 486144000.0, "2310": 486144000.0, "2315": 486144000.0, "2320": 486144000.0, "2325": 486144000.0, "2330": 486144000.0, "2335": 486144000.0, "2340": 486144000.0, "2345": 486144000.0, "2350": 486144000.0, "2355": 486144000.0, "2360": 486144000.0, "2365": 486144000.0, "2370": 486144000.0, "2375": 486144000.0, "2380": 486144000.0, "2385": 486144000.0, "2390": 486144000.0, "2395": 486144000.0, "2400": 486144000.0, "2405": 486144000.0, "2410": 486144000.0, "2415": 486144000.0, "2420": 486144000.0, "2425": 486144000.0, "2430": 486144000.0, "2435": 486144000.0, "2440": 486144000.0, "2445": 486144000.0, "2450": 486144000.0, "2455": 486144000.0, "2460": 486144000.0, "2465": 486144000.0, "2470": 486144000.0, "2475": 486144000.0, "2480": 486144000.0, "2485": 486144000.0, "2490": 486144000.0, "2495": 486144000.0, "2500": 486144000.0, "2505": 486144000.0, "2510": 486144000.0, "2515": 486144000.0, "2520": 486144000.0, "2525": 486144000.0, "2530": 486144000.0, "2535": 486144000.0, "2540": 486144000.0, "2545": 486144000.0, "2550": 486144000.0, "2555": 486144000.0, "2560": 486144000.0, "2565": 486144000.0, "2570": 486144000.0, "2575": 486144000.0, "2580": 486144000.0, "2585": 486144000.0, "2590": 486144000.0, "2595": 486144000.0, "2600": 486144000.0, "2605": 486144000.0, "2610": 486144000.0, "2615": 486144000.0, "2620": 486144000.0, "2625": 486144000.0, "2630": 486144000.0, "2635": 486144000.0, "2640": 486144000.0, "2645": 486144000.0, "2650": 486144000.0, "2655": 486144000.0, "2660": 486144000.0, "2665": 486144000.0, "2670": 486144000.0, "2675": 486144000.0, "2680": 486144000.0, "2685": 486144000.0, "2690": 486144000.0, "2695": 486144000.0, "2700": 486144000.0, "2705": 486144000.0, "2710": 486144000.0, "2715": 486144000.0, "2720": 486144000.0, "2725": 486144000.0, "2730": 486144000.0, "2735": 486144000.0, "2740": 486144000.0, "2745": 486144000.0, "2750": 486144000.0, "2755": 486144000.0, "2760": 486144000.0, "2765": 486144000.0, "2770": 486144000.0, "2775": 486144000.0, "2780": 486144000.0, "2785": 486144000.0, "2790": 486144000.0, "2795": 486144000.0, "2800": 486144000.0, "2805": 486144000.0, "2810": 486144000.0, "2815": 486144000.0, "2820": 486144000.0, "2825": 486144000.0, "2830": 486144000.0, "2835": 486144000.0, "2840": 486144000.0, "2845": 486144000.0, "2850": 486144000.0, "2855": 486144000.0, "2860": 486144000.0, "2865": 486144000.0, "2870": 486144000.0, "2875": 486144000.0, "2880": 486144000.0, "2885": 486144000.0, "2890": 486144000.0, "2895": 486144000.0, "2900": 486144000.0, "2905": 486144000.0, "2910": 486144000.0, "2915": 486144000.0, "2920": 486144000.0, "2925": 486144000.0, "2930": 486144000.0, "2935": 486144000.0, "2940": 486144000.0, "2945": 486144000.0, "2950": 486144000.0, "2955": 486144000.0, "2960": 486144000.0, "2965": 486144000.0, "2970": 486144000.0, "2975": 486144000.0, "2980": 486144000.0, "2985": 486144000.0, "2990": 486144000.0, "2995": 486144000.0, "3000": 486144000.0, "3005": 486144000.0, "3010": 486144000.0, "3015": 486144000.0, "3020": 486144000.0, "3025": 486144000.0, "3030": 486144000.0, "3035": 486144000.0, "3040": 486144000.0, "3045": 486144000.0, "3050": 486144000.0, "3055": 486144000.0, "3060": 486144000.0, "3065": 486144000.0, "3070": 486144000.0, "3075": 486144000.0, "3080": 486144000.0, "3085": 486144000.0, "3090": 486144000.0, "3095": 486144000.0, "3100": 486144000.0, "3105": 486144000.0, "3110": 486144000.0, "3115": 486144000.0, "3120": 486144000.0, "3125": 486144000.0, "3130": 486144000.0, "3135": 486144000.0, "3140": 486144000.0, "3145": 486144000.0, "3150": 486144000.0, "3155": 486144000.0, "3160": 486144000.0, "3165": 486144000.0, "3170": 486144000.0, "3175": 486144000.0, "3180": 486144000.0, "3185": 486144000.0, "3190": 486144000.0, "3195": 486144000.0, "3200": 486144000.0, "3205": 486144000.0, "3210": 486144000.0, "3215": 486144000.0, "3220": 486144000.0, "3225": 486144000.0, "3230": 486144000.0, "3235": 486144000.0, "3240": 486144000.0, "3245": 486144000.0, "3250": 486144000.0, "3255": 486144000.0, "3260": 486144000.0, "3265": 486144000.0, "3270": 486144000.0, "3275": 486144000.0, "3280": 486144000.0, "3285": 486144000.0, "3290": 486144000.0, "3295": 486144000.0, "3300": 486144000.0, "3305": 486144000.0, "3310": 486144000.0, "3315": 486144000.0, "3320": 486144000.0, "3325": 486144000.0, "3330": 486144000.0, "3335": 486144000.0, "3340": 486144000.0, "3345": 486144000.0, "3350": 486144000.0, "3355": 486144000.0, "3360": 486144000.0, "3365": 486144000.0, "3370": 486144000.0, "3375": 486144000.0, "3380": 486144000.0, "3385": 486144000.0, "3390": 486144000.0, "3395": 486144000.0, "3400": 486144000.0, "3405": 486144000.0, "3410": 486144000.0, "3415": 486144000.0, "3420": 486144000.0, "3425": 486144000.0, "3430": 486144000.0, "3435": 486144000.0, "3440": 486144000.0, "3445": 486144000.0, "3450": 486144000.0, "3455": 486144000.0, "3460": 486144000.0, "3465": 486144000.0, "3470": 486144000.0, "3475": 486144000.0, "3480": 486144000.0, "3485": 486144000.0, "3490": 486144000.0, "3495": 486144000.0, "3500": 486144000.0, "3505": 486144000.0, "3510": 486144000.0, "3515": 486144000.0, "3520": 486144000.0, "3525": 486144000.0, "3530": 486144000.0, "3535": 486144000.0, "3540": 486144000.0, "3545": 486144000.0, "3550": 486144000.0, "3555": 486144000.0, "3560": 486144000.0, "3565": 486144000.0, "3570": 486144000.0, "3575": 486144000.0, "3580": 486144000.0, "3585": 486144000.0, "3590": 486144000.0, "3595": 486144000.0, "3600": 486144000.0, "3605": 486144000.0, "3610": 486144000.0, "3615": 486144000.0, "3620": 486144000.0, "3625": 486144000.0, "3630": 486144000.0, "3635": 486144000.0, "3640": 486144000.0, "3645": 486144000.0, "3650": 486144000.0, "3655": 486144000.0, "3660": 486144000.0, "3665": 486144000.0, "3670": 486144000.0, "3675": 486144000.0, "3680": 486144000.0, "3685": 486144000.0, "3690": 486144000.0, "3695": 486144000.0, "3700": 486144000.0, "3705": 486144000.0, "3710": 486144000.0, "3715": 486144000.0, "3720": 486144000.0, "3725": 486144000.0, "3730": 486144000.0, "3735": 486144000.0, "3740": 486144000.0, "3745": 486144000.0, "3750": 486144000.0, "3755": 486144000.0, "3760": 486144000.0, "3765": 486144000.0, "3770": 486144000.0, "3775": 486144000.0, "3780": 486144000.0, "3785": 486144000.0, "3790": 486144000.0, "3795": 486144000.0, "3800": 486144000.0, "3805": 486144000.0, "3810": 486144000.0, "3815": 486144000.0, "3820": 486144000.0, "3825": 486144000.0, "3830": 486144000.0, "3835": 486144000.0, "3840": 486144000.0, "3845": 486144000.0, "3850": 486144000.0, "3855": 486144000.0, "3860": 486144000.0, "3865": 486144000.0, "3870": 486144000.0, "3875": 486144000.0, "3880": 486144000.0, "3885": 486144000.0, "3890": 486144000.0, "3895": 486144000.0, "3900": 486144000.0, "3905": 486144000.0, "3910": 486144000.0, "3915": 486144000.0, "3920": 486144000.0, "3925": 486144000.0, "3930": 486144000.0, "3935": 486144000.0, "3940": 486144000.0, "3945": 486144000.0, "3950": 486144000.0, "3955": 486144000.0, "3960": 486144000.0, "3965": 486144000.0, "3970": 486144000.0, "3975": 486144000.0, "3980": 486144000.0, "3985": 486144000.0, "3990": 486144000.0, "3995": 486144000.0, "4000": 486144000.0, "4005": 486144000.0, "4010": 486144000.0, "4015": 486144000.0, "4020": 486144000.0, "4025": 486144000.0, "4030": 486144000.0, "4035": 486144000.0, "4040": 486144000.0, "4045": 486144000.0, "4050": 486144000.0, "4055": 486144000.0, "4060": 486144000.0, "4065": 486144000.0, "4070": 486144000.0, "4075": 486144000.0, "4080": 486144000.0, "4085": 486144000.0, "4090": 486144000.0, "4095": 486144000.0, "4100": 486144000.0, "4105": 486144000.0, "4110": 486144000.0, "4115": 486144000.0, "4120": 486144000.0, "4125": 486144000.0, "4130": 486144000.0, "4135": 486144000.0, "4140": 486144000.0, "4145": 486144000.0, "4150": 486144000.0, "4155": 486144000.0, "4160": 486144000.0, "4165": 486144000.0, "4170": 486144000.0, "4175": 486144000.0, "4180": 486144000.0, "4185": 486144000.0, "4190": 486144000.0, "4195": 486144000.0, "4200": 486144000.0, "4205": 486144000.0, "4210": 486144000.0, "4215": 486144000.0, "4220": 486144000.0, "4225": 486144000.0, "4230": 486144000.0, "4235": 486144000.0, "4240": 486144000.0, "4245": 486144000.0, "4250": 486144000.0, "4255": 486144000.0, "4260": 486144000.0, "4265": 486144000.0, "4270": 486144000.0, "4275": 486144000.0, "4280": 486144000.0, "4285": 486144000.0, "4290": 486144000.0, "4295": 486144000.0, "4300": 486144000.0, "4305": 486144000.0, "4310": 486144000.0, "4315": 486144000.0, "4320": 486144000.0, "4325": 486144000.0, "4330": 486144000.0, "4335": 486144000.0, "4340": 486144000.0, "4345": 486144000.0, "4350": 486144000.0, "4355": 486144000.0, "4360": 486144000.0, "4365": 486144000.0, "4370": 486144000.0, "4375": 486144000.0, "4380": 486144000.0, "4385": 486144000.0, "4390": 486144000.0, "4395": 486144000.0, "4400": 486144000.0, "4405": 486144000.0, "4410": 486144000.0, "4415": 486144000.0, "4420": 486144000.0, "4425": 486144000.0, "4430": 486144000.0, "4435": 486144000.0, "4440": 486144000.0, "4445": 486144000.0, "4450": 486144000.0, "4455": 486144000.0, "4460": 486144000.0, "4465": 486144000.0, "4470": 486144000.0, "4475": 486144000.0, "4480": 486144000.0, "4485": 486144000.0, "4490": 486144000.0, "4495": 486144000.0, "4500": 486144000.0, "4505": 486144000.0, "4510": 486144000.0, "4515": 486144000.0, "4520": 486144000.0, "4525": 486144000.0, "4530": 486144000.0, "4535": 486144000.0, "4540": 486144000.0, "4545": 486144000.0, "4550": 486144000.0, "4555": 486144000.0, "4560": 486144000.0, "4565": 486144000.0, "4570": 486144000.0, "4575": 486144000.0, "4580": 486144000.0, "4585": 486144000.0, "4590": 486144000.0, "4595": 486144000.0, "4600": 486144000.0, "4605": 486144000.0, "4610": 486144000.0, "4615": 486144000.0, "4620": 486144000.0, "4625": 486144000.0, "4630": 486144000.0, "4635": 486144000.0, "4640": 486144000.0, "4645": 486144000.0, "4650": 486144000.0, "4655": 486144000.0, "4660": 486144000.0, "4665": 486144000.0, "4670": 486144000.0, "4675": 486144000.0, "4680": 486144000.0, "4685": 486144000.0, "4690": 486144000.0, "4695": 486144000.0, "4700": 486144000.0, "4705": 486144000.0, "4710": 486144000.0, "4715": 486144000.0, "4720": 486144000.0, "4725": 486144000.0, "4730": 486144000.0, "4735": 486144000.0, "4740": 486144000.0, "4745": 486144000.0, "4750": 486144000.0, "4755": 486144000.0, "4760": 486144000.0, "4765": 486144000.0, "4770": 486144000.0, "4775": 486144000.0, "4780": 486144000.0, "4785": 486144000.0, "4790": 486144000.0, "4795": 486144000.0, "4800": 486144000.0, "4805": 486144000.0, "4810": 486144000.0, "4815": 486144000.0, "4820": 486144000.0, "4825": 486144000.0, "4830": 486144000.0, "4835": 486144000.0, "4840": 486144000.0, "4845": 486144000.0, "4850": 486144000.0, "4855": 486144000.0, "4860": 486144000.0, "4865": 486144000.0, "4870": 486144000.0, "4875": 486144000.0, "4880": 486144000.0, "4885": 486144000.0, "4890": 486144000.0, "4895": 486144000.0, "4900": 486144000.0, "4905": 486144000.0, "4910": 486144000.0, "4915": 486144000.0, "4920": 486144000.0, "4925": 486144000.0, "4930": 486144000.0, "4935": 486144000.0, "4940": 486144000.0, "4945": 486144000.0, "4950": 486144000.0, "4955": 486144000.0, "4960": 486144000.0, "4965": 486144000.0, "4970": 486144000.0, "4975": 486144000.0, "4980": 486144000.0, "4985": 486144000.0, "4990": 486144000.0, "4995": 486144000.0, "5000": 486144000.0, "5005": 486144000.0, "5010": 486144000.0, "5015": 486144000.0, "5020": 486144000.0, "5025": 486144000.0, "5030": 486144000.0, "5035": 486144000.0, "5040": 486144000.0, "5045": 486144000.0, "5050": 486144000.0, "5055": 486144000.0, "5060": 486144000.0, "5065": 486144000.0, "5070": 486144000.0, "5075": 486144000.0, "5080": 486144000.0, "5085": 486144000.0, "5090": 486144000.0, "5095": 486144000.0, "5100": 486144000.0, "5105": 486144000.0, "5110": 486144000.0, "5115": 486144000.0, "5120": 486144000.0, "5125": 486144000.0, "5130": 486144000.0, "5135": 486144000.0, "5140": 486144000.0, "5145": 486144000.0, "5150": 486144000.0, "5155": 486144000.0, "5160": 486144000.0, "5165": 486144000.0, "5170": 486144000.0, "5175": 486144000.0, "5180": 486144000.0, "5185": 486144000.0, "5190": 486144000.0, "5195": 486144000.0, "5200": 486144000.0, "5205": 486144000.0, "5210": 486144000.0, "5215": 486144000.0, "5220": 486144000.0, "5225": 486144000.0, "5230": 486144000.0, "5235": 486144000.0, "5240": 486144000.0, "5245": 486144000.0, "5250": 486144000.0, "5255": 486144000.0, "5260": 486144000.0, "5265": 486144000.0, "5270": 486144000.0, "5275": 486144000.0, "5280": 486144000.0, "5285": 486144000.0, "5290": 486144000.0, "5295": 486144000.0, "5300": 486144000.0, "5305": 486144000.0, "5310": 486144000.0, "5315": 486144000.0, "5320": 486144000.0, "5325": 486144000.0, "5330": 486144000.0, "5335": 486144000.0, "5340": 486144000.0, "5345": 486144000.0, "5350": 486144000.0, "5355": 486144000.0, "5360": 486144000.0, "5365": 486144000.0, "5370": 486144000.0, "5375": 486144000.0, "5380": 486144000.0, "5385": 486144000.0, "5390": 486144000.0, "5395": 486144000.0, "5400": 486144000.0, "5405": 486144000.0, "5410": 486144000.0, "5415": 486144000.0, "5420": 486144000.0, "5425": 486144000.0, "5430": 486144000.0, "5435": 486144000.0, "5440": 486144000.0, "5445": 486144000.0, "5450": 486144000.0, "5455": 486144000.0, "5460": 486144000.0, "5465": 486144000.0, "5470": 486144000.0, "5475": 486144000.0, "5480": 486144000.0, "5485": 486144000.0, "5490": 486144000.0, "5495": 486144000.0, "5500": 486144000.0, "5505": 486144000.0, "5510": 486144000.0, "5515": 486144000.0, "5520": 486144000.0, "5525": 486144000.0, "5530": 486144000.0, "5535": 486144000.0, "5540": 486144000.0, "5545": 486144000.0, "5550": 486144000.0, "5555": 486144000.0, "5560": 486144000.0, "5565": 486144000.0, "5570": 486144000.0, "5575": 486144000.0, "5580": 486144000.0, "5585": 486144000.0, "5590": 486144000.0, "5595": 486144000.0, "5600": 486144000.0, "5605": 486144000.0, "5610": 486144000.0, "5615": 486144000.0, "5620": 486144000.0, "5625": 486144000.0, "5630": 486144000.0, "5635": 486144000.0, "5640": 486144000.0, "5645": 486144000.0, "5650": 486144000.0, "5655": 486144000.0, "5660": 486144000.0, "5665": 486144000.0, "5670": 486144000.0, "5675": 486144000.0, "5680": 486144000.0, "5685": 486144000.0, "5690": 486144000.0, "5695": 486144000.0, "5700": 486144000.0, "5705": 486144000.0, "5710": 486144000.0, "5715": 486144000.0, "5720": 486144000.0, "5725": 486144000.0, "5730": 486144000.0, "5735": 486144000.0, "5740": 486144000.0, "5745": 486144000.0, "5750": 486144000.0, "5755": 486144000.0, "5760": 486144000.0, "5765": 486144000.0, "5770": 486144000.0, "5775": 486144000.0, "5780": 486144000.0, "5785": 486144000.0, "5790": 486144000.0, "5795": 486144000.0, "5800": 486144000.0, "5805": 486144000.0, "5810": 486144000.0, "5815": 486144000.0, "5820": 486144000.0, "5825": 486144000.0, "5830": 486144000.0, "5835": 486144000.0, "5840": 486144000.0, "5845": 486144000.0, "5850": 486144000.0, "5855": 486144000.0, "5860": 486144000.0, "5865": 486144000.0, "5870": 486144000.0, "5875": 486144000.0, "5880": 486144000.0, "5885": 486144000.0, "5890": 486144000.0, "5895": 486144000.0, "5900": 486144000.0, "5905": 486144000.0, "5910": 486144000.0, "5915": 486144000.0, "5920": 486144000.0, "5925": 486144000.0, "5930": 486144000.0, "5935": 486144000.0, "5940": 486144000.0, "5945": 486144000.0, "5950": 486144000.0, "5955": 486144000.0, "5960": 486144000.0, "5965": 486144000.0, "5970": 486144000.0, "5975": 486144000.0, "5980": 486144000.0, "5985": 486144000.0, "5990": 486144000.0, "5995": 486144000.0, "6000": 486144000.0, "6005": 486144000.0, "6010": 486144000.0, "6015": 486144000.0, "6020": 486144000.0, "6025": 486144000.0, "6030": 486144000.0, "6035": 486144000.0, "6040": 486144000.0, "6045": 486144000.0, "6050": 486144000.0, "6055": 486144000.0, "6060": 486144000.0, "6065": 486144000.0, "6070": 486144000.0, "6075": 486144000.0, "6080": 486144000.0, "6085": 486144000.0, "6090": 486144000.0, "6095": 486144000.0, "6100": 486144000.0, "6105": 486144000.0, "6110": 486144000.0, "6115": 486144000.0, "6120": 486144000.0, "6125": 486144000.0, "6130": 486144000.0, "6135": 486144000.0, "6140": 486144000.0, "6145": 486144000.0, "6150": 486144000.0, "6155": 486144000.0, "6160": 486144000.0, "6165": 486144000.0, "6170": 486144000.0, "6175": 486144000.0, "6180": 486144000.0, "6185": 486144000.0, "6190": 486144000.0, "6195": 486144000.0, "6200": 486144000.0, "6205": 486144000.0, "6210": 486144000.0, "6215": 486144000.0, "6220": 486144000.0, "6225": 486144000.0, "6230": 486144000.0, "6235": 486144000.0, "6240": 486144000.0, "6245": 486144000.0, "6250": 486144000.0, "6255": 486144000.0, "6260": 486144000.0, "6265": 486144000.0, "6270": 486144000.0, "6275": 486144000.0, "6280": 486144000.0, "6285": 486144000.0, "6290": 486144000.0, "6295": 486144000.0, "6300": 486144000.0, "6305": 486144000.0, "6310": 486144000.0, "6315": 486144000.0, "6320": 486144000.0, "6325": 486144000.0, "6330": 486144000.0, "6335": 486144000.0, "6340": 486144000.0, "6345": 486144000.0, "6350": 486144000.0, "6355": 486144000.0, "6360": 486144000.0, "6365": 486144000.0, "6370": 486144000.0, "6375": 486144000.0, "6380": 486144000.0, "6385": 486144000.0, "6390": 486144000.0, "6395": 486144000.0, "6400": 486144000.0, "6405": 486144000.0, "6410": 486144000.0, "6415": 486144000.0, "6420": 486144000.0, "6425": 486144000.0, "6430": 486144000.0, "6435": 486144000.0, "6440": 486144000.0, "6445": 486144000.0, "6450": 486144000.0, "6455": 486144000.0, "6460": 486144000.0, "6465": 486144000.0, "6470": 486144000.0, "6475": 486144000.0, "6480": 486144000.0, "6485": 486144000.0, "6490": 486144000.0, "6495": 486144000.0, "6500": 486144000.0, "6505": 486144000.0, "6510": 486144000.0, "6515": 486144000.0, "6520": 486144000.0, "6525": 486144000.0, "6530": 486144000.0, "6535": 486144000.0, "6540": 486144000.0, "6545": 486144000.0, "6550": 486144000.0, "6555": 486144000.0, "6560": 486144000.0, "6565": 486144000.0, "6570": 486144000.0, "6575": 486144000.0, "6580": 486144000.0, "6585": 486144000.0, "6590": 486144000.0, "6595": 486144000.0, "6600": 486144000.0, "6605": 486144000.0, "6610": 486144000.0, "6615": 486144000.0, "6620": 486144000.0, "6625": 486144000.0, "6630": 486144000.0, "6635": 486144000.0, "6640": 486144000.0, "6645": 486144000.0, "6650": 486144000.0, "6655": 486144000.0, "6660": 486144000.0, "6665": 486144000.0, "6670": 486144000.0, "6675": 486144000.0, "6680": 486144000.0, "6685": 486144000.0, "6690": 486144000.0, "6695": 486144000.0, "6700": 486144000.0, "6705": 486144000.0, "6710": 486144000.0, "6715": 486144000.0, "6720": 486144000.0, "6725": 486144000.0, "6730": 486144000.0, "6735": 486144000.0, "6740": 486144000.0, "6745": 486144000.0, "6750": 486144000.0, "6755": 486144000.0, "6760": 486144000.0, "6765": 486144000.0, "6770": 486144000.0, "6775": 486144000.0, "6780": 486144000.0, "6785": 486144000.0, "6790": 486144000.0, "6795": 486144000.0, "6800": 486144000.0, "6805": 486144000.0, "6810": 486144000.0, "6815": 486144000.0, "6820": 486144000.0, "6825": 486144000.0, "6830": 486144000.0, "6835": 486144000.0, "6840": 486144000.0, "6845": 486144000.0, "6850": 486144000.0, "6855": 486144000.0, "6860": 486144000.0, "6865": 486144000.0, "6870": 486144000.0, "6875": 486144000.0, "6880": 486144000.0, "6885": 486144000.0, "6890": 486144000.0, "6895": 486144000.0, "6900": 486144000.0, "6905": 486144000.0, "6910": 486144000.0, "6915": 486144000.0, "6920": 486144000.0, "6925": 486144000.0, "6930": 486144000.0, "6935": 486144000.0, "6940": 486144000.0, "6945": 486144000.0, "6950": 486144000.0, "6955": 486144000.0, "6960": 486144000.0, "6965": 486144000.0, "6970": 486144000.0, "6975": 486144000.0, "6980": 486144000.0, "6985": 486144000.0, "6990": 486144000.0, "6995": 486144000.0, "7000": 486144000.0, "7005": 486144000.0, "7010": 486144000.0, "7015": 486144000.0, "7020": 486144000.0, "7025": 486144000.0, "7030": 486144000.0, "7035": 486144000.0, "7040": 486144000.0, "7045": 486144000.0, "7050": 486144000.0, "7055": 486144000.0, "7060": 486144000.0, "7065": 486144000.0, "7070": 486144000.0, "7075": 486144000.0, "7080": 486144000.0, "7085": 486144000.0, "7090": 486144000.0, "7095": 486144000.0, "7100": 486144000.0, "7105": 486144000.0, "7110": 486144000.0, "7115": 486144000.0, "7120": 486144000.0, "7125": 486144000.0, "7130": 486144000.0, "7135": 486144000.0, "7140": 486144000.0, "7145": 486144000.0, "7150": 486144000.0, "7155": 486144000.0, "7160": 486144000.0, "7165": 486144000.0, "7170": 486144000.0, "7175": 486144000.0, "7180": 486144000.0, "7185": 486144000.0, "7190": 486144000.0, "7195": 486144000.0, "7200": 486144000.0, "7205": 486144000.0, "7210": 486144000.0, "7215": 486144000.0, "7220": 486144000.0, "7225": 486144000.0, "7230": 486144000.0, "7235": 486144000.0, "7240": 486144000.0, "7245": 486144000.0, "7250": 486144000.0, "7255": 486144000.0, "7260": 486144000.0, "7265": 486144000.0, "7270": 486144000.0, "7275": 486144000.0, "7280": 486144000.0, "7285": 486144000.0, "7290": 486144000.0, "7295": 486144000.0, "7300": 486144000.0, "7305": 486144000.0, "7310": 486144000.0, "7315": 486144000.0, "7320": 486144000.0, "7325": 486144000.0, "7330": 486144000.0, "7335": 486144000.0, "7340": 486144000.0, "7345": 486144000.0, "7350": 486144000.0, "7355": 486144000.0, "7360": 486144000.0, "7365": 486144000.0, "7370": 486144000.0, "7375": 486144000.0, "7380": 486144000.0, "7385": 486144000.0, "7390": 486144000.0, "7395": 486144000.0, "7400": 486144000.0, "7405": 486144000.0, "7410": 486144000.0, "7415": 486144000.0, "7420": 486144000.0, "7425": 486144000.0, "7430": 486144000.0, "7435": 486144000.0, "7440": 486144000.0, "7445": 486144000.0, "7450": 486144000.0, "7455": 486144000.0, "7460": 486144000.0, "7465": 486144000.0, "7470": 486144000.0, "7475": 486144000.0, "7480": 486144000.0, "7485": 486144000.0, "7490": 486144000.0, "7495": 486144000.0, "7500": 486144000.0, "7505": 486144000.0, "7510": 486144000.0, "7515": 486144000.0, "7520": 486144000.0, "7525": 486144000.0, "7530": 486144000.0, "7535": 486144000.0, "7540": 486144000.0, "7545": 486144000.0, "7550": 486144000.0, "7555": 486144000.0, "7560": 486144000.0, "7565": 486144000.0, "7570": 486144000.0, "7575": 486144000.0, "7580": 486144000.0, "7585": 486144000.0, "7590": 486144000.0, "7595": 486144000.0, "7600": 486144000.0, "7605": 486144000.0, "7610": 486144000.0, "7615": 486144000.0, "7620": 486144000.0, "7625": 486144000.0, "7630": 486144000.0, "7635": 486144000.0, "7640": 486144000.0, "7645": 486144000.0, "7650": 486144000.0, "7655": 486144000.0, "7660": 486144000.0, "7665": 486144000.0, "7670": 486144000.0, "7675": 486144000.0, "7680": 486144000.0, "7685": 486144000.0, "7690": 486144000.0, "7695": 486144000.0, "7700": 486144000.0, "7705": 486144000.0, "7710": 486144000.0, "7715": 486144000.0, "7720": 486144000.0, "7725": 486144000.0, "7730": 486144000.0, "7735": 486144000.0, "7740": 486144000.0, "7745": 486144000.0, "7750": 486144000.0, "7755": 486144000.0, "7760": 486144000.0, "7765": 486144000.0, "7770": 486144000.0, "7775": 486144000.0, "7780": 486144000.0, "7785": 486144000.0, "7790": 486144000.0, "7795": 486144000.0, "7800": 486144000.0, "7805": 486144000.0, "7810": 486144000.0, "7815": 486144000.0, "7820": 486144000.0, "7825": 486144000.0, "7830": 486144000.0, "7835": 486144000.0, "7840": 486144000.0, "7845": 486144000.0, "7850": 486144000.0, "7855": 486144000.0, "7860": 486144000.0, "7865": 486144000.0, "7870": 486144000.0, "7875": 486144000.0, "7880": 486144000.0, "7885": 486144000.0, "7890": 486144000.0, "7895": 486144000.0, "7900": 486144000.0, "7905": 486144000.0, "7910": 486144000.0, "7915": 486144000.0, "7920": 486144000.0, "7925": 486144000.0, "7930": 486144000.0, "7935": 486144000.0, "7940": 486144000.0, "7945": 486144000.0, "7950": 486144000.0, "7955": 486144000.0, "7960": 486144000.0, "7965": 486144000.0, "7970": 486144000.0, "7975": 486144000.0, "7980": 486144000.0, "7985": 486144000.0, "7990": 486144000.0, "7995": 486144000.0, "8000": 486144000.0, "8005": 486144000.0, "8010": 486144000.0, "8015": 486144000.0, "8020": 486144000.0, "8025": 486144000.0, "8030": 486144000.0, "8035": 486144000.0, "8040": 486144000.0, "8045": 486144000.0, "8050": 486144000.0, "8055": 486144000.0, "8060": 486144000.0, "8065": 486144000.0, "8070": 486144000.0, "8075": 486144000.0, "8080": 486144000.0, "8085": 486144000.0, "8090": 486144000.0, "8095": 486144000.0, "8100": 486144000.0, "8105": 486144000.0, "8110": 486144000.0, "8115": 486144000.0, "8120": 486144000.0, "8125": 486144000.0, "8130": 486144000.0, "8135": 486144000.0, "8140": 486144000.0, "8145": 486144000.0, "8150": 486144000.0, "8155": 486144000.0, "8160": 486144000.0, "8165": 486144000.0, "8170": 486144000.0, "8175": 486144000.0, "8180": 486144000.0, "8185": 486144000.0, "8190": 486144000.0, "8195": 486144000.0, "8200": 486144000.0, "8205": 486144000.0, "8210": 486144000.0, "8215": 486144000.0, "8220": 486144000.0, "8225": 486144000.0, "8230": 486144000.0, "8235": 486144000.0, "8240": 486144000.0, "8245": 486144000.0, "8250": 486144000.0, "8255": 486144000.0, "8260": 486144000.0, "8265": 486144000.0, "8270": 486144000.0, "8275": 486144000.0, "8280": 486144000.0, "8285": 486144000.0, "8290": 486144000.0, "8295": 486144000.0, "8300": 486144000.0, "8305": 486144000.0, "8310": 486144000.0, "8315": 486144000.0, "8320": 486144000.0, "8325": 486144000.0, "8330": 486144000.0, "8335": 486144000.0, "8340": 486144000.0, "8345": 486144000.0, "8350": 486144000.0, "8355": 486144000.0, "8360": 486144000.0, "8365": 486144000.0, "8370": 486144000.0, "8375": 486144000.0, "8380": 486144000.0, "8385": 486144000.0, "8390": 486144000.0, "8395": 486144000.0, "8400": 486144000.0, "8405": 486144000.0, "8410": 486144000.0, "8415": 486144000.0, "8420": 486144000.0, "8425": 486144000.0, "8430": 486144000.0, "8435": 486144000.0, "8440": 486144000.0, "8445": 486144000.0, "8450": 486144000.0, "8455": 486144000.0, "8460": 486144000.0, "8465": 486144000.0, "8470": 486144000.0, "8475": 486144000.0, "8480": 486144000.0, "8485": 486144000.0, "8490": 486144000.0, "8495": 486144000.0, "8500": 486144000.0, "8505": 486144000.0, "8510": 486144000.0, "8515": 486144000.0, "8520": 486144000.0, "8525": 486144000.0, "8530": 486144000.0, "8535": 486144000.0, "8540": 486144000.0, "8545": 486144000.0, "8550": 486144000.0, "8555": 486144000.0, "8560": 486144000.0, "8565": 486144000.0, "8570": 486144000.0, "8575": 486144000.0, "8580": 486144000.0, "8585": 486144000.0, "8590": 486144000.0, "8595": 486144000.0, "8600": 486144000.0, "8605": 486144000.0, "8610": 486144000.0, "8615": 486144000.0, "8620": 486144000.0, "8625": 486144000.0, "8630": 486144000.0, "8635": 486144000.0, "8640": 486144000.0, "8645": 486144000.0, "8650": 486144000.0, "8655": 486144000.0, "8660": 486144000.0, "8665": 486144000.0, "8670": 486144000.0, "8675": 486144000.0, "8680": 486144000.0, "8685": 486144000.0, "8690": 486144000.0, "8695": 486144000.0, "8700": 486144000.0, "8705": 486144000.0, "8710": 486144000.0, "8715": 486144000.0, "8720": 486144000.0, "8725": 486144000.0, "8730": 486144000.0, "8735": 486144000.0, "8740": 486144000.0, "8745": 486144000.0, "8750": 486144000.0, "8755": 486144000.0, "8760": 486144000.0, "8765": 486144000.0, "8770": 486144000.0, "8775": 486144000.0, "8780": 486144000.0, "8785": 486144000.0, "8790": 486144000.0, "8795": 486144000.0, "8800": 486144000.0, "8805": 486144000.0, "8810": 486144000.0, "8815": 486144000.0, "8820": 486144000.0, "8825": 486144000.0, "8830": 486144000.0, "8835": 486144000.0, "8840": 486144000.0, "8845": 486144000.0, "8850": 486144000.0, "8855": 486144000.0, "8860": 486144000.0, "8865": 486144000.0, "8870": 486144000.0, "8875": 486144000.0, "8880": 486144000.0, "8885": 486144000.0, "8890": 486144000.0, "8895": 486144000.0, "8900": 486144000.0, "8905": 486144000.0, "8910": 486144000.0, "8915": 486144000.0, "8920": 486144000.0, "8925": 486144000.0, "8930": 486144000.0, "8935": 486144000.0, "8940": 486144000.0, "8945": 486144000.0, "8950": 486144000.0, "8955": 486144000.0, "8960": 486144000.0, "8965": 486144000.0, "8970": 486144000.0, "8975": 486144000.0, "8980": 486144000.0, "8985": 486144000.0, "8990": 486144000.0, "8995": 486144000.0, "9000": 486144000.0, "9005": 486144000.0, "9010": 486144000.0, "9015": 486144000.0, "9020": 486144000.0, "9025": 486144000.0, "9030": 486144000.0, "9035": 486144000.0, "9040": 486144000.0, "9045": 486144000.0, "9050": 486144000.0, "9055": 486144000.0, "9060": 486144000.0, "9065": 486144000.0, "9070": 486144000.0, "9075": 486144000.0, "9080": 486144000.0, "9085": 486144000.0, "9090": 486144000.0, "9095": 486144000.0, "9100": 486144000.0, "9105": 486144000.0, "9110": 486144000.0, "9115": 486144000.0, "9120": 486144000.0, "9125": 486144000.0, "9130": 486144000.0, "9135": 486144000.0, "9140": 486144000.0, "9145": 486144000.0, "9150": 486144000.0, "9155": 486144000.0, "9160": 486144000.0, "9165": 486144000.0, "9170": 486144000.0, "9175": 486144000.0, "9180": 486144000.0, "9185": 486144000.0, "9190": 486144000.0, "9195": 486144000.0, "9200": 486144000.0, "9205": 486144000.0, "9210": 486144000.0, "9215": 486144000.0, "9220": 486144000.0, "9225": 486144000.0, "9230": 486144000.0, "9235": 486144000.0, "9240": 486144000.0, "9245": 486144000.0, "9250": 486144000.0, "9255": 486144000.0, "9260": 486144000.0, "9265": 486144000.0, "9270": 486144000.0, "9275": 486144000.0, "9280": 486144000.0, "9285": 486144000.0, "9290": 486144000.0, "9295": 486144000.0, "9300": 486144000.0, "9305": 486144000.0, "9310": 486144000.0, "9315": 486144000.0, "9320": 486144000.0, "9325": 486144000.0, "9330": 486144000.0, "9335": 486144000.0, "9340": 486144000.0, "9345": 486144000.0, "9350": 486144000.0, "9355": 486144000.0, "9360": 486144000.0, "9365": 486144000.0, "9370": 486144000.0, "9375": 486144000.0, "9380": 486144000.0, "9385": 486144000.0, "9390": 486144000.0, "9395": 486144000.0, "9400": 486144000.0, "9405": 486144000.0, "9410": 486144000.0, "9415": 486144000.0, "9420": 486144000.0, "9425": 486144000.0, "9430": 486144000.0, "9435": 486144000.0, "9440": 486144000.0, "9445": 486144000.0, "9450": 486144000.0, "9455": 486144000.0, "9460": 486144000.0, "9465": 486144000.0, "9470": 486144000.0, "9475": 486144000.0, "9480": 486144000.0, "9485": 486144000.0, "9490": 486144000.0, "9495": 486144000.0, "9500": 486144000.0, "9505": 486144000.0, "9510": 486144000.0, "9515": 486144000.0, "9520": 486144000.0, "9525": 486144000.0, "9530": 486144000.0, "9535": 486144000.0, "9540": 486144000.0, "9545": 486144000.0, "9550": 486144000.0, "9555": 486144000.0, "9560": 486144000.0, "9565": 486144000.0, "9570": 486144000.0, "9575": 486144000.0, "9580": 486144000.0, "9585": 486144000.0, "9590": 486144000.0, "9595": 486144000.0, "9600": 486144000.0, "9605": 486144000.0, "9610": 486144000.0, "9615": 486144000.0, "9620": 486144000.0, "9625": 486144000.0, "9630": 486144000.0, "9635": 486144000.0, "9640": 486144000.0, "9645": 486144000.0, "9650": 486144000.0, "9655": 486144000.0, "9660": 486144000.0, "9665": 486144000.0, "9670": 486144000.0, "9675": 486144000.0, "9680": 486144000.0, "9685": 486144000.0, "9690": 486144000.0, "9695": 486144000.0, "9700": 486144000.0, "9705": 486144000.0, "9710": 486144000.0, "9715": 486144000.0, "9720": 486144000.0, "9725": 486144000.0, "9730": 486144000.0, "9735": 486144000.0, "9740": 486144000.0, "9745": 486144000.0, "9750": 486144000.0, "9755": 486144000.0, "9760": 486144000.0, "9765": 486144000.0, "9770": 486144000.0, "9775": 486144000.0, "9780": 486144000.0, "9785": 486144000.0, "9790": 486144000.0, "9795": 486144000.0, "9800": 486144000.0, "9805": 486144000.0, "9810": 486144000.0, "9815": 486144000.0, "9820": 486144000.0, "9825": 486144000.0, "9830": 486144000.0, "9835": 486144000.0, "9840": 486144000.0, "9845": 486144000.0, "9850": 486144000.0, "9855": 486144000.0, "9860": 486144000.0, "9865": 486144000.0, "9870": 486144000.0, "9875": 486144000.0, "9880": 486144000.0, "9885": 486144000.0, "9890": 486144000.0, "9895": 486144000.0, "9900": 486144000.0, "9905": 486144000.0, "9910": 486144000.0, "9915": 486144000.0, "9920": 486144000.0, "9925": 486144000.0, "9930": 486144000.0, "9935": 486144000.0, "9940": 486144000.0, "9945": 486144000.0, "9950": 486144000.0, "9955": 486144000.0, "9960": 486144000.0, "9965": 486144000.0, "9970": 486144000.0, "9975": 486144000.0, "9980": 486144000.0, "9985": 486144000.0, "9990": 486144000.0, "9995": 486144000.0, "10000": 486144000.0, "10005": 486144000.0, "10010": 486144000.0, "10015": 486144000.0, "10020": 486144000.0, "10025": 486144000.0, "10030": 486144000.0, "10035": 486144000.0, "10040": 486144000.0, "10045": 486144000.0, "10050": 486144000.0, "10055": 486144000.0, "10060": 486144000.0, "10065": 486144000.0, "10070": 486144000.0, "10075": 486144000.0, "10080": 486144000.0, "10085": 486144000.0, "10090": 486144000.0, "10095": 486144000.0, "10100": 486144000.0, "10105": 486144000.0, "10110": 486144000.0, "10115": 486144000.0, "10120": 486144000.0, "10125": 486144000.0, "10130": 486144000.0, "10135": 486144000.0, "10140": 486144000.0, "10145": 486144000.0, "10150": 486144000.0, "10155": 486144000.0, "10160": 486144000.0, "10165": 486144000.0, "10170": 486144000.0, "10175": 486144000.0, "10180": 486144000.0, "10185": 486144000.0, "10190": 486144000.0, "10195": 486144000.0, "10200": 486144000.0, "10205": 486144000.0, "10210": 486144000.0, "10215": 486144000.0, "10220": 486144000.0, "10225": 486144000.0, "10230": 486144000.0, "10235": 486144000.0, "10240": 486144000.0, "10245": 486144000.0, "10250": 486144000.0, "10255": 486144000.0, "10260": 486144000.0, "10265": 486144000.0, "10270": 486144000.0, "10275": 486144000.0, "10280": 486144000.0, "10285": 486144000.0, "10290": 486144000.0, "10295": 486144000.0, "10300": 486144000.0, "10305": 486144000.0, "10310": 486144000.0, "10315": 486144000.0, "10320": 486144000.0, "10325": 486144000.0, "10330": 486144000.0, "10335": 486144000.0, "10340": 486144000.0, "10345": 486144000.0, "10350": 486144000.0, "10355": 486144000.0, "10360": 486144000.0, "10365": 486144000.0, "10370": 486144000.0, "10375": 486144000.0, "10380": 486144000.0, "10385": 486144000.0, "10390": 486144000.0, "10395": 486144000.0, "10400": 486144000.0, "10405": 486144000.0, "10410": 486144000.0, "10415": 486144000.0, "10420": 486144000.0, "10425": 486144000.0, "10430": 486144000.0, "10435": 486144000.0, "10440": 486144000.0, "10445": 486144000.0, "10450": 486144000.0, "10455": 486144000.0, "10460": 486144000.0, "10465": 486144000.0, "10470": 486144000.0, "10475": 486144000.0, "10480": 486144000.0, "10485": 486144000.0, "10490": 486144000.0, "10495": 486144000.0, "10500": 486144000.0, "10505": 486144000.0, "10510": 486144000.0, "10515": 486144000.0, "10520": 486144000.0, "10525": 486144000.0, "10530": 486144000.0, "10535": 486144000.0, "10540": 486144000.0, "10545": 486144000.0, "10550": 486144000.0, "10555": 486144000.0, "10560": 486144000.0, "10565": 486144000.0, "10570": 486144000.0, "10575": 486144000.0, "10580": 486144000.0, "10585": 486144000.0, "10590": 486144000.0, "10595": 486144000.0, "10600": 486144000.0, "10605": 486144000.0, "10610": 486144000.0, "10615": 486144000.0, "10620": 486144000.0, "10625": 486144000.0, "10630": 486144000.0, "10635": 486144000.0, "10640": 486144000.0, "10645": 486144000.0, "10650": 486144000.0, "10655": 486144000.0, "10660": 486144000.0, "10665": 486144000.0, "10670": 486144000.0, "10675": 486144000.0, "10680": 486144000.0, "10685": 486144000.0, "10690": 486144000.0, "10695": 486144000.0, "10700": 486144000.0, "10705": 486144000.0, "10710": 486144000.0, "10715": 486144000.0, "10720": 486144000.0, "10725": 486144000.0, "10730": 486144000.0, "10735": 486144000.0, "10740": 486144000.0, "10745": 486144000.0, "10750": 486144000.0, "10755": 486144000.0, "10760": 486144000.0, "10765": 486144000.0, "10770": 486144000.0, "10775": 486144000.0, "10780": 486144000.0, "10785": 486144000.0, "10790": 486144000.0, "10795": 486144000.0, "10800": 486144000.0, "10805": 486144000.0, "10810": 486144000.0, "10815": 486144000.0, "10820": 486144000.0, "10825": 486144000.0, "10830": 486144000.0, "10835": 486144000.0, "10840": 486144000.0, "10845": 486144000.0, "10850": 486144000.0, "10855": 486144000.0, "10860": 486144000.0, "10865": 486144000.0, "10870": 486144000.0, "10875": 486144000.0, "10880": 486144000.0, "10885": 486144000.0, "10890": 486144000.0, "10895": 486144000.0, "10900": 486144000.0, "10905": 486144000.0, "10910": 486144000.0, "10915": 486144000.0, "10920": 486144000.0, "10925": 486144000.0, "10930": 486144000.0, "10935": 486144000.0, "10940": 486144000.0, "10945": 486144000.0, "10950": 486144000.0, "10955": 486144000.0, "10960": 486144000.0, "10965": 486144000.0, "10970": 486144000.0, "10975": 486144000.0, "10980": 486144000.0, "10985": 486144000.0, "10990": 486144000.0, "10995": 486144000.0, "11000": 486144000.0, "11005": 486144000.0, "11010": 486144000.0, "11015": 486144000.0, "11020": 486144000.0, "11025": 486144000.0, "11030": 486144000.0, "11035": 486144000.0, "11040": 486144000.0, "11045": 486144000.0, "11050": 486144000.0, "11055": 486144000.0, "11060": 486144000.0, "11065": 486144000.0, "11070": 486144000.0, "11075": 486144000.0, "11080": 486144000.0, "11085": 486144000.0, "11090": 486144000.0, "11095": 486144000.0, "11100": 486144000.0, "11105": 486144000.0, "11110": 486144000.0, "11115": 486144000.0, "11120": 486144000.0, "11125": 486144000.0, "11130": 486144000.0, "11135": 486144000.0, "11140": 486144000.0, "11145": 486144000.0, "11150": 486144000.0, "11155": 486144000.0, "11160": 486144000.0, "11165": 486144000.0, "11170": 486144000.0, "11175": 486144000.0, "11180": 486144000.0, "11185": 486144000.0, "11190": 486144000.0, "11195": 486144000.0, "11200": 486144000.0, "11205": 486144000.0, "11210": 486144000.0, "11215": 486144000.0, "11220": 486144000.0, "11225": 486144000.0, "11230": 486144000.0, "11235": 486144000.0, "11240": 486144000.0, "11245": 486144000.0, "11250": 486144000.0, "11255": 486144000.0, "11260": 486144000.0, "11265": 486144000.0, "11270": 486144000.0, "11275": 486144000.0, "11280": 486144000.0, "11285": 486144000.0, "11290": 486144000.0, "11295": 486144000.0, "11300": 486144000.0, "11305": 486144000.0, "11310": 486144000.0, "11315": 486144000.0, "11320": 486144000.0, "11325": 486144000.0, "11330": 486144000.0, "11335": 486144000.0, "11340": 486144000.0, "11345": 486144000.0, "11350": 486144000.0, "11355": 486144000.0, "11360": 486144000.0, "11365": 486144000.0, "11370": 486144000.0, "11375": 486144000.0, "11380": 486144000.0, "11385": 486144000.0, "11390": 486144000.0, "11395": 486144000.0, "11400": 486144000.0, "11405": 486144000.0, "11410": 486144000.0, "11415": 486144000.0, "11420": 486144000.0, "11425": 486144000.0, "11430": 486144000.0, "11435": 486144000.0, "11440": 486144000.0, "11445": 486144000.0, "11450": 486144000.0, "11455": 486144000.0, "11460": 486144000.0, "11465": 486144000.0, "11470": 486144000.0, "11475": 486144000.0, "11480": 486144000.0, "11485": 486144000.0, "11490": 486144000.0, "11495": 486144000.0, "11500": 486144000.0, "11505": 486144000.0, "11510": 486144000.0, "11515": 486144000.0, "11520": 486144000.0, "11525": 486144000.0, "11530": 486144000.0, "11535": 486144000.0, "11540": 486144000.0, "11545": 486144000.0, "11550": 486144000.0, "11555": 486144000.0, "11560": 486144000.0, "11565": 486144000.0, "11570": 486144000.0, "11575": 486144000.0, "11580": 486144000.0, "11585": 486144000.0, "11590": 486144000.0, "11595": 486144000.0, "11600": 486144000.0, "11605": 486144000.0, "11610": 486144000.0, "11615": 486144000.0, "11620": 486144000.0, "11625": 486144000.0, "11630": 486144000.0, "11635": 486144000.0, "11640": 486144000.0, "11645": 486144000.0, "11650": 486144000.0, "11655": 486144000.0, "11660": 486144000.0, "11665": 486144000.0, "11670": 486144000.0, "11675": 486144000.0, "11680": 486144000.0, "11685": 486144000.0, "11690": 486144000.0, "11695": 486144000.0, "11700": 486144000.0, "11705": 486144000.0, "11710": 486144000.0, "11715": 486144000.0, "11720": 486144000.0, "11725": 486144000.0, "11730": 486144000.0, "11735": 486144000.0, "11740": 486144000.0, "11745": 486144000.0, "11750": 486144000.0, "11755": 486144000.0, "11760": 486144000.0, "11765": 486144000.0, "11770": 486144000.0, "11775": 486144000.0, "11780": 486144000.0, "11785": 486144000.0, "11790": 486144000.0, "11795": 486144000.0, "11800": 486144000.0, "11805": 486144000.0, "11810": 486144000.0, "11815": 486144000.0, "11820": 486144000.0, "11825": 486144000.0, "11830": 486144000.0, "11835": 486144000.0, "11840": 486144000.0, "11845": 486144000.0, "11850": 486144000.0, "11855": 486144000.0, "11860": 486144000.0, "11865": 486144000.0, "11870": 486144000.0, "11875": 486144000.0, "11880": 486144000.0, "11885": 486144000.0, "11890": 486144000.0, "11895": 486144000.0, "11900": 486144000.0, "11905": 486144000.0, "11910": 486144000.0, "11915": 486144000.0, "11920": 486144000.0, "11925": 486144000.0, "11930": 486144000.0, "11935": 486144000.0, "11940": 486144000.0, "11945": 486144000.0, "11950": 486144000.0, "11955": 486144000.0, "11960": 486144000.0, "11965": 486144000.0, "11970": 486144000.0, "11975": 486144000.0, "11980": 486144000.0, "11985": 486144000.0, "11990": 486144000.0, "11995": 486144000.0, "12000": 486144000.0, "12005": 486144000.0, "12010": 486144000.0, "12015": 486144000.0, "12020": 486144000.0, "12025": 486144000.0, "12030": 486144000.0, "12035": 486144000.0, "12040": 486144000.0, "12045": 486144000.0, "12050": 486144000.0, "12055": 486144000.0, "12060": 486144000.0, "12065": 486144000.0, "12070": 486144000.0, "12075": 486144000.0, "12080": 486144000.0, "12085": 486144000.0, "12090": 486144000.0, "12095": 486144000.0, "12100": 486144000.0, "12105": 486144000.0, "12110": 486144000.0, "12115": 486144000.0, "12120": 486144000.0, "12125": 486144000.0, "12130": 486144000.0, "12135": 486144000.0, "12140": 486144000.0, "12145": 486144000.0, "12150": 486144000.0, "12155": 486144000.0, "12160": 486144000.0, "12165": 486144000.0, "12170": 486144000.0, "12175": 486144000.0, "12180": 486144000.0, "12185": 486144000.0, "12190": 486144000.0, "12195": 486144000.0, "12200": 486144000.0, "12205": 486144000.0, "12210": 486144000.0, "12215": 486144000.0, "12220": 486144000.0, "12225": 486144000.0, "12230": 486144000.0, "12235": 486144000.0, "12240": 486144000.0, "12245": 486144000.0, "12250": 486144000.0, "12255": 486144000.0, "12260": 486144000.0, "12265": 486144000.0, "12270": 486144000.0, "12275": 486144000.0, "12280": 486144000.0, "12285": 486144000.0, "12290": 486144000.0, "12295": 486144000.0, "12300": 486144000.0, "12305": 486144000.0, "12310": 486144000.0, "12315": 486144000.0, "12320": 486144000.0, "12325": 486144000.0, "12330": 486144000.0, "12335": 486144000.0, "12340": 486144000.0, "12345": 486144000.0, "12350": 486144000.0, "12355": 486144000.0, "12360": 486144000.0, "12365": 486144000.0, "12370": 486144000.0, "12375": 486144000.0, "12380": 486144000.0, "12385": 486144000.0, "12390": 486144000.0, "12395": 486144000.0, "12400": 486144000.0, "12405": 486144000.0, "12410": 486144000.0, "12415": 486144000.0, "12420": 486144000.0, "12425": 486144000.0, "12430": 486144000.0, "12435": 486144000.0, "12440": 486144000.0, "12445": 486144000.0, "12450": 486144000.0, "12455": 486144000.0, "12460": 486144000.0, "12465": 486144000.0, "12470": 486144000.0, "12475": 486144000.0, "12480": 486144000.0, "12485": 486144000.0, "12490": 486144000.0, "12495": 486144000.0, "12500": 486144000.0, "12505": 486144000.0, "12510": 486144000.0, "12515": 486144000.0, "12520": 486144000.0, "12525": 486144000.0, "12530": 486144000.0, "12535": 486144000.0, "12540": 486144000.0, "12545": 486144000.0, "12550": 486144000.0, "12555": 486144000.0, "12560": 486144000.0, "12565": 486144000.0, "12570": 486144000.0, "12575": 486144000.0, "12580": 486144000.0, "12585": 486144000.0, "12590": 486144000.0, "12595": 486144000.0, "12600": 486144000.0, "12605": 486144000.0, "12610": 486144000.0, "12615": 486144000.0, "12620": 486144000.0, "12625": 486144000.0, "12630": 486144000.0, "12635": 486144000.0, "12640": 486144000.0, "12645": 486144000.0, "12650": 486144000.0, "12655": 486144000.0, "12660": 486144000.0, "12665": 486144000.0, "12670": 486144000.0, "12675": 486144000.0, "12680": 486144000.0, "12685": 486144000.0, "12690": 486144000.0, "12695": 486144000.0, "12700": 486144000.0, "12705": 486144000.0, "12710": 486144000.0, "12715": 486144000.0, "12720": 486144000.0, "12725": 486144000.0, "12730": 486144000.0, "12735": 486144000.0, "12740": 486144000.0, "12745": 486144000.0, "12750": 486144000.0, "12755": 486144000.0, "12760": 486144000.0, "12765": 486144000.0, "12770": 486144000.0, "12775": 486144000.0, "12780": 486144000.0, "12785": 486144000.0, "12790": 486144000.0, "12795": 486144000.0, "12800": 486144000.0, "12805": 486144000.0, "12810": 486144000.0, "12815": 486144000.0, "12820": 486144000.0, "12825": 486144000.0, "12830": 486144000.0, "12835": 486144000.0, "12840": 486144000.0, "12845": 486144000.0, "12850": 486144000.0, "12855": 486144000.0, "12860": 486144000.0, "12865": 486144000.0, "12870": 486144000.0, "12875": 486144000.0, "12880": 486144000.0, "12885": 486144000.0, "12890": 486144000.0, "12895": 486144000.0, "12900": 486144000.0, "12905": 486144000.0, "12910": 486144000.0, "12915": 486144000.0, "12920": 486144000.0, "12925": 486144000.0, "12930": 486144000.0, "12935": 486144000.0, "12940": 486144000.0, "12945": 486144000.0, "12950": 486144000.0, "12955": 486144000.0, "12960": 486144000.0, "12965": 486144000.0, "12970": 486144000.0, "12975": 486144000.0, "12980": 486144000.0, "12985": 486144000.0, "12990": 486144000.0, "12995": 486144000.0, "13000": 486144000.0, "13005": 486144000.0, "13010": 486144000.0, "13015": 486144000.0, "13020": 486144000.0, "13025": 486144000.0, "13030": 486144000.0, "13035": 486144000.0, "13040": 486144000.0, "13045": 486144000.0, "13050": 486144000.0, "13055": 486144000.0, "13060": 486144000.0, "13065": 486144000.0, "13070": 486144000.0, "13075": 486144000.0, "13080": 486144000.0, "13085": 486144000.0, "13090": 486144000.0, "13095": 486144000.0, "13100": 486144000.0, "13105": 486144000.0, "13110": 486144000.0, "13115": 486144000.0, "13120": 486144000.0, "13125": 486144000.0, "13130": 486144000.0, "13135": 486144000.0, "13140": 486144000.0, "13145": 486144000.0, "13150": 486144000.0, "13155": 486144000.0, "13160": 486144000.0, "13165": 486144000.0, "13170": 486144000.0, "13175": 486144000.0, "13180": 486144000.0, "13185": 486144000.0, "13190": 486144000.0, "13195": 486144000.0, "13200": 486144000.0, "13205": 486144000.0, "13210": 486144000.0, "13215": 486144000.0, "13220": 486144000.0, "13225": 486144000.0, "13230": 486144000.0, "13235": 486144000.0, "13240": 486144000.0, "13245": 486144000.0, "13250": 486144000.0, "13255": 486144000.0, "13260": 486144000.0, "13265": 486144000.0, "13270": 486144000.0, "13275": 486144000.0, "13280": 486144000.0, "13285": 486144000.0, "13290": 486144000.0, "13295": 486144000.0, "13300": 486144000.0, "13305": 486144000.0, "13310": 486144000.0, "13315": 486144000.0, "13320": 486144000.0, "13325": 486144000.0, "13330": 486144000.0, "13335": 486144000.0, "13340": 486144000.0, "13345": 486144000.0, "13350": 486144000.0, "13355": 486144000.0, "13360": 486144000.0, "13365": 486144000.0, "13370": 486144000.0, "13375": 486144000.0, "13380": 486144000.0, "13385": 486144000.0, "13390": 486144000.0, "13395": 486144000.0, "13400": 486144000.0, "13405": 486144000.0, "13410": 486144000.0, "13415": 486144000.0, "13420": 486144000.0, "13425": 486144000.0, "13430": 486144000.0, "13435": 486144000.0, "13440": 486144000.0, "13445": 486144000.0, "13450": 486144000.0, "13455": 486144000.0, "13460": 486144000.0, "13465": 486144000.0, "13470": 486144000.0, "13475": 486144000.0, "13480": 486144000.0, "13485": 486144000.0, "13490": 486144000.0, "13495": 486144000.0, "13500": 486144000.0, "13505": 486144000.0, "13510": 486144000.0, "13515": 486144000.0, "13520": 486144000.0, "13525": 486144000.0, "13530": 486144000.0, "13535": 486144000.0, "13540": 486144000.0, "13545": 486144000.0, "13550": 486144000.0, "13555": 486144000.0, "13560": 486144000.0, "13565": 486144000.0, "13570": 486144000.0, "13575": 486144000.0, "13580": 486144000.0, "13585": 486144000.0, "13590": 486144000.0, "13595": 486144000.0, "13600": 486144000.0, "13605": 486144000.0, "13610": 486144000.0, "13615": 486144000.0, "13620": 486144000.0, "13625": 486144000.0, "13630": 486144000.0, "13635": 486144000.0, "13640": 486144000.0, "13645": 486144000.0, "13650": 486144000.0, "13655": 486144000.0, "13660": 486144000.0, "13665": 486144000.0, "13670": 486144000.0, "13675": 486144000.0, "13680": 486144000.0, "13685": 486144000.0, "13690": 486144000.0, "13695": 486144000.0, "13700": 486144000.0, "13705": 486144000.0, "13710": 486144000.0, "13715": 486144000.0, "13720": 486144000.0, "13725": 486144000.0, "13730": 486144000.0, "13735": 486144000.0, "13740": 486144000.0, "13745": 486144000.0, "13750": 486144000.0, "13755": 486144000.0, "13760": 486144000.0, "13765": 486144000.0, "13770": 486144000.0, "13775": 486144000.0, "13780": 486144000.0, "13785": 486144000.0, "13790": 486144000.0, "13795": 486144000.0, "13800": 486144000.0, "13805": 486144000.0, "13810": 486144000.0, "13815": 486144000.0, "13820": 486144000.0, "13825": 486144000.0, "13830": 486144000.0, "13835": 486144000.0, "13840": 486144000.0, "13845": 486144000.0, "13850": 486144000.0, "13855": 486144000.0, "13860": 486144000.0, "13865": 486144000.0, "13870": 486144000.0, "13875": 486144000.0, "13880": 486144000.0, "13885": 486144000.0, "13890": 486144000.0, "13895": 486144000.0, "13900": 486144000.0, "13905": 486144000.0, "13910": 486144000.0, "13915": 486144000.0, "13920": 486144000.0, "13925": 486144000.0, "13930": 486144000.0, "13935": 486144000.0, "13940": 486144000.0, "13945": 486144000.0, "13950": 486144000.0, "13955": 486144000.0, "13960": 486144000.0, "13965": 486144000.0, "13970": 486144000.0, "13975": 486144000.0, "13980": 486144000.0, "13985": 486144000.0, "13990": 486144000.0, "13995": 486144000.0, "14000": 486144000.0, "14005": 486144000.0, "14010": 486144000.0, "14015": 486144000.0, "14020": 486144000.0, "14025": 486144000.0, "14030": 486144000.0, "14035": 486144000.0, "14040": 486144000.0, "14045": 486144000.0, "14050": 486144000.0, "14055": 486144000.0, "14060": 486144000.0, "14065": 486144000.0, "14070": 486144000.0, "14075": 486144000.0, "14080": 486144000.0, "14085": 486144000.0, "14090": 486144000.0, "14095": 486144000.0, "14100": 486144000.0, "14105": 486144000.0, "14110": 486144000.0, "14115": 486144000.0, "14120": 486144000.0, "14125": 486144000.0, "14130": 486144000.0, "14135": 486144000.0, "14140": 486144000.0, "14145": 486144000.0, "14150": 486144000.0, "14155": 486144000.0, "14160": 486144000.0, "14165": 486144000.0, "14170": 486144000.0, "14175": 486144000.0, "14180": 486144000.0, "14185": 486144000.0, "14190": 486144000.0, "14195": 486144000.0, "14200": 486144000.0, "14205": 486144000.0, "14210": 486144000.0, "14215": 486144000.0, "14220": 486144000.0, "14225": 486144000.0, "14230": 486144000.0, "14235": 486144000.0, "14240": 486144000.0, "14245": 486144000.0, "14250": 486144000.0, "14255": 486144000.0, "14260": 486144000.0, "14265": 486144000.0, "14270": 486144000.0, "14275": 486144000.0, "14280": 486144000.0, "14285": 486144000.0, "14290": 486144000.0, "14295": 486144000.0, "14300": 486144000.0, "14305": 486144000.0, "14310": 486144000.0, "14315": 486144000.0, "14320": 486144000.0, "14325": 486144000.0, "14330": 486144000.0, "14335": 486144000.0, "14340": 486144000.0, "14345": 486144000.0, "14350": 486144000.0, "14355": 486144000.0, "14360": 486144000.0, "14365": 486144000.0, "14370": 486144000.0, "14375": 486144000.0, "14380": 486144000.0, "14385": 486144000.0, "14390": 486144000.0, "14395": 486144000.0, "14400": 486144000.0, "14405": 486144000.0, "14410": 486144000.0, "14415": 486144000.0, "14420": 486144000.0, "14425": 486144000.0, "14430": 486144000.0, "14435": 486144000.0, "14440": 486144000.0, "14445": 486144000.0, "14450": 486144000.0, "14455": 486144000.0, "14460": 486144000.0, "14465": 486144000.0, "14470": 486144000.0, "14475": 486144000.0, "14480": 486144000.0, "14485": 486144000.0, "14490": 486144000.0, "14495": 486144000.0, "14500": 486144000.0, "14505": 486144000.0, "14510": 486144000.0, "14515": 486144000.0, "14520": 486144000.0, "14525": 486144000.0, "14530": 486144000.0, "14535": 486144000.0, "14540": 486144000.0, "14545": 486144000.0, "14550": 486144000.0, "14555": 486144000.0, "14560": 486144000.0, "14565": 486144000.0, "14570": 486144000.0, "14575": 486144000.0, "14580": 486144000.0, "14585": 486144000.0, "14590": 486144000.0, "14595": 486144000.0, "14600": 486144000.0, "14605": 486144000.0, "14610": 486144000.0, "14615": 486144000.0, "14620": 486144000.0, "14625": 486144000.0, "14630": 486144000.0, "14635": 486144000.0, "14640": 486144000.0, "14645": 486144000.0, "14650": 486144000.0, "14655": 486144000.0, "14660": 486144000.0, "14665": 486144000.0, "14670": 486144000.0, "14675": 486144000.0, "14680": 486144000.0, "14685": 486144000.0, "14690": 486144000.0, "14695": 486144000.0, "14700": 486144000.0, "14705": 486144000.0, "14710": 486144000.0, "14715": 486144000.0, "14720": 486144000.0, "14725": 486144000.0, "14730": 486144000.0, "14735": 486144000.0, "14740": 486144000.0, "14745": 486144000.0, "14750": 486144000.0, "14755": 486144000.0, "14760": 486144000.0, "14765": 486144000.0, "14770": 486144000.0, "14775": 486144000.0, "14780": 486144000.0, "14785": 486144000.0, "14790": 486144000.0, "14795": 486144000.0, "14800": 486144000.0, "14805": 486144000.0, "14810": 486144000.0, "14815": 486144000.0, "14820": 486144000.0, "14825": 486144000.0, "14830": 486144000.0, "14835": 486144000.0, "14840": 486144000.0, "14845": 486144000.0, "14850": 486144000.0, "14855": 486144000.0, "14860": 486144000.0, "14865": 486144000.0, "14870": 486144000.0, "14875": 486144000.0, "14880": 486144000.0, "14885": 486144000.0, "14890": 486144000.0, "14895": 486144000.0, "14900": 486144000.0, "14905": 486144000.0, "14910": 486144000.0, "14915": 486144000.0, "14920": 486144000.0, "14925": 486144000.0, "14930": 486144000.0, "14935": 486144000.0, "14940": 486144000.0, "14945": 486144000.0, "14950": 486144000.0, "14955": 486144000.0, "14960": 486144000.0, "14965": 486144000.0, "14970": 486144000.0, "14975": 486144000.0, "14980": 486144000.0, "14985": 486144000.0, "14990": 486144000.0, "14995": 486144000.0, "15000": 486144000.0, "15005": 486144000.0, "15010": 486144000.0, "15015": 486144000.0, "15020": 486144000.0, "15025": 486144000.0, "15030": 486144000.0, "15035": 486144000.0, "15040": 486144000.0, "15045": 486144000.0, "15050": 486144000.0, "15055": 486144000.0, "15060": 486144000.0, "15065": 486144000.0, "15070": 486144000.0, "15075": 486144000.0, "15080": 486144000.0, "15085": 486144000.0, "15090": 486144000.0, "15095": 486144000.0, "15100": 486144000.0, "15105": 486144000.0, "15110": 486144000.0, "15115": 486144000.0, "15120": 486144000.0, "15125": 486144000.0, "15130": 486144000.0, "15135": 486144000.0, "15140": 486144000.0, "15145": 486144000.0, "15150": 486144000.0, "15155": 486144000.0, "15160": 486144000.0, "15165": 486144000.0, "15170": 486144000.0, "15175": 486144000.0, "15180": 486144000.0, "15185": 486144000.0, "15190": 486144000.0, "15195": 486144000.0, "15200": 486144000.0, "15205": 486144000.0, "15210": 486144000.0, "15215": 486144000.0, "15220": 486144000.0, "15225": 486144000.0, "15230": 486144000.0, "15235": 486144000.0, "15240": 486144000.0, "15245": 486144000.0, "15250": 486144000.0, "15255": 486144000.0, "15260": 486144000.0, "15265": 486144000.0, "15270": 486144000.0, "15275": 486144000.0, "15280": 486144000.0, "15285": 486144000.0, "15290": 486144000.0, "15295": 486144000.0, "15300": 486144000.0, "15305": 486144000.0, "15310": 486144000.0, "15315": 486144000.0, "15320": 486144000.0, "15325": 486144000.0, "15330": 486144000.0, "15335": 486144000.0, "15340": 486144000.0, "15345": 486144000.0, "15350": 486144000.0, "15355": 486144000.0, "15360": 486144000.0, "15365": 486144000.0, "15370": 486144000.0, "15375": 486144000.0, "15380": 486144000.0, "15385": 486144000.0, "15390": 486144000.0, "15395": 486144000.0, "15400": 486144000.0, "15405": 486144000.0, "15410": 486144000.0, "15415": 486144000.0, "15420": 486144000.0, "15425": 486144000.0, "15430": 486144000.0, "15435": 486144000.0, "15440": 486144000.0, "15445": 486144000.0, "15450": 486144000.0, "15455": 486144000.0, "15460": 486144000.0, "15465": 486144000.0, "15470": 486144000.0, "15475": 486144000.0, "15480": 486144000.0, "15485": 486144000.0, "15490": 486144000.0, "15495": 486144000.0, "15500": 486144000.0, "15505": 486144000.0, "15510": 486144000.0, "15515": 486144000.0, "15520": 486144000.0, "15525": 486144000.0, "15530": 486144000.0, "15535": 486144000.0, "15540": 486144000.0, "15545": 486144000.0, "15550": 486144000.0, "15555": 486144000.0, "15560": 486144000.0, "15565": 486144000.0, "15570": 486144000.0, "15575": 486144000.0, "15580": 486144000.0, "15585": 486144000.0, "15590": 486144000.0, "15595": 486144000.0, "15600": 486144000.0, "15605": 486144000.0, "15610": 486144000.0, "15615": 486144000.0, "15620": 486144000.0, "15625": 486144000.0, "15630": 486144000.0, "15635": 486144000.0, "15640": 486144000.0, "15645": 486144000.0, "15650": 486144000.0, "15655": 486144000.0, "15660": 486144000.0, "15665": 486144000.0, "15670": 486144000.0, "15675": 486144000.0, "15680": 486144000.0, "15685": 486144000.0, "15690": 486144000.0, "15695": 486144000.0, "15700": 486144000.0, "15705": 486144000.0, "15710": 486144000.0, "15715": 486144000.0, "15720": 486144000.0, "15725": 486144000.0, "15730": 486144000.0, "15735": 486144000.0, "15740": 486144000.0, "15745": 486144000.0, "15750": 486144000.0, "15755": 486144000.0, "15760": 486144000.0, "15765": 486144000.0, "15770": 486144000.0, "15775": 486144000.0, "15780": 486144000.0, "15785": 486144000.0, "15790": 486144000.0, "15795": 486144000.0, "15800": 486144000.0, "15805": 486144000.0, "15810": 486144000.0, "15815": 486144000.0, "15820": 486144000.0, "15825": 486144000.0, "15830": 486144000.0, "15835": 486144000.0, "15840": 486144000.0, "15845": 486144000.0, "15850": 486144000.0, "15855": 486144000.0, "15860": 486144000.0, "15865": 486144000.0, "15870": 486144000.0, "15875": 486144000.0, "15880": 486144000.0, "15885": 486144000.0, "15890": 486144000.0, "15895": 486144000.0, "15900": 486144000.0, "15905": 486144000.0, "15910": 486144000.0, "15915": 486144000.0, "15920": 486144000.0, "15925": 486144000.0, "15930": 486144000.0, "15935": 486144000.0, "15940": 486144000.0, "15945": 486144000.0, "15950": 486144000.0, "15955": 486144000.0, "15960": 486144000.0, "15965": 486144000.0, "15970": 486144000.0, "15975": 486144000.0, "15980": 486144000.0, "15985": 486144000.0, "15990": 486144000.0, "15995": 486144000.0, "16000": 486144000.0, "16005": 486144000.0, "16010": 486144000.0, "16015": 486144000.0, "16020": 486144000.0, "16025": 486144000.0, "16030": 486144000.0, "16035": 486144000.0, "16040": 486144000.0, "16045": 486144000.0, "16050": 486144000.0, "16055": 486144000.0, "16060": 486144000.0, "16065": 486144000.0, "16070": 486144000.0, "16075": 486144000.0, "16080": 486144000.0, "16085": 486144000.0, "16090": 486144000.0, "16095": 486144000.0, "16100": 486144000.0, "16105": 486144000.0, "16110": 486144000.0, "16115": 486144000.0, "16120": 486144000.0, "16125": 486144000.0, "16130": 486144000.0, "16135": 486144000.0, "16140": 486144000.0, "16145": 486144000.0, "16150": 486144000.0, "16155": 486144000.0, "16160": 486144000.0, "16165": 486144000.0, "16170": 486144000.0, "16175": 486144000.0, "16180": 486144000.0, "16185": 486144000.0, "16190": 486144000.0, "16195": 486144000.0, "16200": 486144000.0, "16205": 486144000.0, "16210": 486144000.0, "16215": 486144000.0, "16220": 486144000.0, "16225": 486144000.0, "16230": 486144000.0, "16235": 486144000.0, "16240": 486144000.0, "16245": 486144000.0, "16250": 486144000.0, "16255": 486144000.0, "16260": 486144000.0, "16265": 486144000.0, "16270": 486144000.0, "16275": 486144000.0, "16280": 486144000.0, "16285": 486144000.0, "16290": 486144000.0, "16295": 486144000.0, "16300": 486144000.0, "16305": 486144000.0, "16310": 486144000.0, "16315": 486144000.0, "16320": 486144000.0, "16325": 486144000.0, "16330": 486144000.0, "16335": 486144000.0, "16340": 486144000.0, "16345": 486144000.0, "16350": 486144000.0, "16355": 486144000.0, "16360": 486144000.0, "16365": 486144000.0, "16370": 486144000.0, "16375": 486144000.0, "16380": 486144000.0, "16385": 486144000.0, "16390": 486144000.0, "16395": 486144000.0, "16400": 486144000.0, "16405": 486144000.0, "16410": 486144000.0, "16415": 486144000.0, "16420": 486144000.0, "16425": 486144000.0, "16430": 486144000.0, "16435": 486144000.0, "16440": 486144000.0, "16445": 486144000.0, "16450": 486144000.0, "16455": 486144000.0, "16460": 486144000.0, "16465": 486144000.0, "16470": 486144000.0, "16475": 486144000.0, "16480": 486144000.0, "16485": 486144000.0, "16490": 486144000.0, "16495": 486144000.0, "16500": 486144000.0, "16505": 486144000.0, "16510": 486144000.0, "16515": 486144000.0, "16520": 486144000.0, "16525": 486144000.0, "16530": 486144000.0, "16535": 486144000.0, "16540": 486144000.0, "16545": 486144000.0, "16550": 486144000.0, "16555": 486144000.0, "16560": 486144000.0, "16565": 486144000.0, "16570": 486144000.0, "16575": 486144000.0, "16580": 486144000.0, "16585": 486144000.0, "16590": 486144000.0, "16595": 486144000.0, "16600": 486144000.0, "16605": 486144000.0, "16610": 486144000.0, "16615": 486144000.0, "16620": 486144000.0, "16625": 486144000.0, "16630": 486144000.0, "16635": 486144000.0, "16640": 486144000.0, "16645": 486144000.0, "16650": 486144000.0, "16655": 486144000.0, "16660": 486144000.0, "16665": 486144000.0, "16670": 486144000.0, "16675": 486144000.0, "16680": 486144000.0, "16685": 486144000.0, "16690": 486144000.0, "16695": 486144000.0, "16700": 486144000.0, "16705": 486144000.0, "16710": 486144000.0, "16715": 486144000.0, "16720": 486144000.0, "16725": 486144000.0, "16730": 486144000.0, "16735": 486144000.0, "16740": 486144000.0, "16745": 486144000.0, "16750": 486144000.0, "16755": 486144000.0, "16760": 486144000.0, "16765": 486144000.0, "16770": 486144000.0, "16775": 486144000.0, "16780": 486144000.0, "16785": 486144000.0, "16790": 486144000.0, "16795": 486144000.0, "16800": 486144000.0, "16805": 486144000.0, "16810": 486144000.0, "16815": 486144000.0, "16820": 486144000.0, "16825": 486144000.0, "16830": 486144000.0, "16835": 486144000.0, "16840": 486144000.0, "16845": 486144000.0, "16850": 486144000.0, "16855": 486144000.0, "16860": 486144000.0, "16865": 486144000.0, "16870": 486144000.0, "16875": 486144000.0, "16880": 486144000.0, "16885": 486144000.0, "16890": 486144000.0, "16895": 486144000.0, "16900": 486144000.0, "16905": 486144000.0, "16910": 486144000.0, "16915": 486144000.0, "16920": 486144000.0, "16925": 486144000.0, "16930": 486144000.0, "16935": 486144000.0, "16940": 486144000.0, "16945": 486144000.0, "16950": 486144000.0, "16955": 486144000.0, "16960": 486144000.0, "16965": 486144000.0, "16970": 486144000.0, "16975": 486144000.0, "16980": 486144000.0, "16985": 486144000.0, "16990": 486144000.0, "16995": 486144000.0, "17000": 486144000.0, "17005": 486144000.0, "17010": 486144000.0, "17015": 486144000.0, "17020": 486144000.0, "17025": 486144000.0, "17030": 486144000.0, "17035": 486144000.0, "17040": 486144000.0, "17045": 486144000.0, "17050": 486144000.0, "17055": 486144000.0, "17060": 486144000.0, "17065": 486144000.0, "17070": 486144000.0, "17075": 486144000.0, "17080": 486144000.0, "17085": 486144000.0, "17090": 486144000.0, "17095": 486144000.0, "17100": 486144000.0, "17105": 486144000.0, "17110": 486144000.0, "17115": 486144000.0, "17120": 486144000.0, "17125": 486144000.0, "17130": 486144000.0, "17135": 486144000.0, "17140": 486144000.0, "17145": 486144000.0, "17150": 486144000.0, "17155": 486144000.0, "17160": 486144000.0, "17165": 486144000.0, "17170": 486144000.0, "17175": 486144000.0, "17180": 486144000.0, "17185": 486144000.0, "17190": 486144000.0, "17195": 486144000.0, "17200": 486144000.0, "17205": 486144000.0, "17210": 486144000.0, "17215": 486144000.0, "17220": 486144000.0, "17225": 486144000.0, "17230": 486144000.0, "17235": 486144000.0, "17240": 486144000.0, "17245": 486144000.0, "17250": 486144000.0, "17255": 486144000.0, "17260": 486144000.0, "17265": 486144000.0, "17270": 486144000.0, "17275": 486144000.0, "17280": 486144000.0, "17285": 486144000.0, "17290": 486144000.0, "17295": 486144000.0, "17300": 486144000.0, "17305": 486144000.0, "17310": 486144000.0, "17315": 486144000.0, "17320": 486144000.0, "17325": 486144000.0, "17330": 486144000.0, "17335": 486144000.0, "17340": 486144000.0, "17345": 486144000.0, "17350": 486144000.0, "17355": 486144000.0, "17360": 486144000.0, "17365": 486144000.0, "17370": 486144000.0, "17375": 486144000.0, "17380": 486144000.0, "17385": 486144000.0, "17390": 486144000.0, "17395": 486144000.0, "17400": 486144000.0, "17405": 486144000.0, "17410": 486144000.0, "17415": 486144000.0, "17420": 486144000.0, "17425": 486144000.0, "17430": 486144000.0, "17435": 486144000.0, "17440": 486144000.0, "17445": 486144000.0, "17450": 486144000.0, "17455": 486144000.0, "17460": 486144000.0, "17465": 486144000.0, "17470": 486144000.0, "17475": 486144000.0, "17480": 486144000.0, "17485": 486144000.0, "17490": 486144000.0, "17495": 486144000.0, "17500": 486144000.0, "17505": 486144000.0, "17510": 486144000.0, "17515": 486144000.0, "17520": 486144000.0, "17525": 486144000.0, "17530": 486144000.0, "17535": 486144000.0, "17540": 486144000.0, "17545": 486144000.0, "17550": 486144000.0, "17555": 486144000.0, "17560": 486144000.0, "17565": 486144000.0, "17570": 486144000.0, "17575": 486144000.0, "17580": 486144000.0, "17585": 486144000.0, "17590": 486144000.0, "17595": 486144000.0, "17600": 486144000.0, "17605": 486144000.0, "17610": 486144000.0, "17615": 486144000.0, "17620": 486144000.0, "17625": 486144000.0, "17630": 486144000.0, "17635": 486144000.0, "17640": 486144000.0, "17645": 486144000.0, "17650": 486144000.0, "17655": 486144000.0, "17660": 486144000.0, "17665": 486144000.0, "17670": 486144000.0, "17675": 486144000.0, "17680": 486144000.0, "17685": 486144000.0, "17690": 486144000.0, "17695": 486144000.0, "17700": 486144000.0, "17705": 486144000.0, "17710": 486144000.0, "17715": 486144000.0, "17720": 486144000.0, "17725": 486144000.0, "17730": 486144000.0, "17735": 486144000.0, "17740": 486144000.0, "17745": 486144000.0, "17750": 486144000.0, "17755": 486144000.0, "17760": 486144000.0, "17765": 486144000.0, "17770": 486144000.0, "17775": 486144000.0, "17780": 486144000.0, "17785": 486144000.0, "17790": 486144000.0, "17795": 486144000.0, "17800": 486144000.0, "17805": 486144000.0, "17810": 486144000.0, "17815": 486144000.0, "17820": 486144000.0, "17825": 486144000.0, "17830": 486144000.0, "17835": 486144000.0, "17840": 486144000.0, "17845": 486144000.0, "17850": 486144000.0, "17855": 486144000.0, "17860": 486144000.0, "17865": 486144000.0, "17870": 486144000.0, "17875": 486144000.0, "17880": 486144000.0, "17885": 486144000.0, "17890": 486144000.0, "17895": 486144000.0, "17900": 486144000.0, "17905": 486144000.0, "17910": 486144000.0, "17915": 486144000.0, "17920": 486144000.0, "17925": 486144000.0, "17930": 486144000.0, "17935": 486144000.0, "17940": 486144000.0, "17945": 486144000.0, "17950": 486144000.0, "17955": 486144000.0, "17960": 486144000.0, "17965": 486144000.0, "17970": 486144000.0, "17975": 486144000.0, "17980": 486144000.0, "17985": 486144000.0, "17990": 486144000.0, "17995": 486144000.0, "18000": 486144000.0, "18005": 486144000.0, "18010": 486144000.0, "18015": 486144000.0, "18020": 486144000.0, "18025": 486144000.0, "18030": 486144000.0, "18035": 486144000.0, "18040": 486144000.0, "18045": 486144000.0, "18050": 486144000.0, "18055": 486144000.0, "18060": 486144000.0, "18065": 486144000.0, "18070": 486144000.0, "18075": 486144000.0, "18080": 486144000.0, "18085": 486144000.0, "18090": 486144000.0, "18095": 486144000.0, "18100": 486144000.0, "18105": 486144000.0, "18110": 486144000.0, "18115": 486144000.0, "18120": 486144000.0, "18125": 486144000.0, "18130": 486144000.0, "18135": 486144000.0, "18140": 486144000.0, "18145": 486144000.0, "18150": 486144000.0, "18155": 486144000.0, "18160": 486144000.0, "18165": 486144000.0, "18170": 486144000.0, "18175": 486144000.0, "18180": 486144000.0, "18185": 486144000.0, "18190": 486144000.0, "18195": 486144000.0, "18200": 486144000.0, "18205": 486144000.0, "18210": 486144000.0, "18215": 486144000.0, "18220": 486144000.0, "18225": 486144000.0, "18230": 486144000.0, "18235": 486144000.0, "18240": 486144000.0, "18245": 486144000.0, "18250": 486144000.0, "18255": 486144000.0, "18260": 486144000.0, "18265": 486144000.0, "18270": 486144000.0, "18275": 486144000.0, "18280": 486144000.0, "18285": 486144000.0, "18290": 486144000.0, "18295": 486144000.0, "18300": 486144000.0, "18305": 486144000.0, "18310": 486144000.0, "18315": 486144000.0, "18320": 486144000.0, "18325": 486144000.0, "18330": 486144000.0, "18335": 486144000.0, "18340": 486144000.0, "18345": 486144000.0, "18350": 486144000.0, "18355": 486144000.0, "18360": 486144000.0, "18365": 486144000.0, "18370": 486144000.0, "18375": 486144000.0, "18380": 486144000.0, "18385": 486144000.0, "18390": 486144000.0, "18395": 486144000.0, "18400": 486144000.0, "18405": 486144000.0, "18410": 486144000.0, "18415": 486144000.0, "18420": 486144000.0, "18425": 486144000.0, "18430": 486144000.0, "18435": 486144000.0, "18440": 486144000.0, "18445": 486144000.0, "18450": 486144000.0, "18455": 486144000.0, "18460": 486144000.0, "18465": 486144000.0, "18470": 486144000.0, "18475": 486144000.0, "18480": 486144000.0, "18485": 486144000.0, "18490": 486144000.0, "18495": 486144000.0, "18500": 486144000.0, "18505": 486144000.0, "18510": 486144000.0, "18515": 486144000.0, "18520": 486144000.0, "18525": 486144000.0, "18530": 486144000.0, "18535": 486144000.0, "18540": 486144000.0, "18545": 486144000.0, "18550": 486144000.0, "18555": 486144000.0, "18560": 486144000.0, "18565": 486144000.0, "18570": 486144000.0, "18575": 486144000.0, "18580": 486144000.0, "18585": 486144000.0, "18590": 486144000.0, "18595": 486144000.0, "18600": 486144000.0, "18605": 486144000.0, "18610": 486144000.0, "18615": 486144000.0, "18620": 486144000.0, "18625": 486144000.0, "18630": 486144000.0, "18635": 486144000.0, "18640": 486144000.0, "18645": 486144000.0, "18650": 486144000.0, "18655": 486144000.0, "18660": 486144000.0, "18665": 486144000.0, "18670": 486144000.0, "18675": 486144000.0, "18680": 486144000.0, "18685": 486144000.0, "18690": 486144000.0, "18695": 486144000.0, "18700": 486144000.0, "18705": 486144000.0, "18710": 486144000.0, "18715": 486144000.0, "18720": 486144000.0, "18725": 486144000.0, "18730": 486144000.0, "18735": 486144000.0, "18740": 486144000.0, "18745": 486144000.0, "18750": 486144000.0, "18755": 486144000.0, "18760": 486144000.0, "18765": 486144000.0, "18770": 486144000.0, "18775": 486144000.0, "18780": 486144000.0, "18785": 486144000.0, "18790": 486144000.0, "18795": 486144000.0, "18800": 486144000.0, "18805": 486144000.0, "18810": 486144000.0, "18815": 486144000.0, "18820": 486144000.0, "18825": 486144000.0, "18830": 486144000.0, "18835": 486144000.0, "18840": 486144000.0, "18845": 486144000.0, "18850": 486144000.0, "18855": 486144000.0, "18860": 486144000.0, "18865": 486144000.0, "18870": 486144000.0, "18875": 486144000.0, "18880": 486144000.0, "18885": 486144000.0, "18890": 486144000.0, "18895": 486144000.0, "18900": 486144000.0, "18905": 486144000.0, "18910": 486144000.0, "18915": 486144000.0, "18920": 486144000.0, "18925": 486144000.0, "18930": 486144000.0, "18935": 486144000.0, "18940": 486144000.0, "18945": 486144000.0, "18950": 486144000.0, "18955": 486144000.0, "18960": 486144000.0, "18965": 486144000.0, "18970": 486144000.0, "18975": 486144000.0, "18980": 486144000.0, "18985": 486144000.0, "18990": 486144000.0, "18995": 486144000.0, "19000": 486144000.0, "19005": 486144000.0, "19010": 486144000.0, "19015": 486144000.0, "19020": 486144000.0, "19025": 486144000.0, "19030": 486144000.0, "19035": 486144000.0, "19040": 486144000.0, "19045": 486144000.0, "19050": 486144000.0, "19055": 486144000.0, "19060": 486144000.0, "19065": 486144000.0, "19070": 486144000.0, "19075": 486144000.0, "19080": 486144000.0, "19085": 486144000.0, "19090": 486144000.0, "19095": 486144000.0, "19100": 486144000.0, "19105": 486144000.0, "19110": 486144000.0, "19115": 486144000.0, "19120": 486144000.0, "19125": 486144000.0, "19130": 486144000.0, "19135": 486144000.0, "19140": 486144000.0, "19145": 486144000.0, "19150": 486144000.0, "19155": 486144000.0, "19160": 486144000.0, "19165": 486144000.0, "19170": 486144000.0, "19175": 486144000.0, "19180": 486144000.0, "19185": 486144000.0, "19190": 486144000.0, "19195": 486144000.0, "19200": 486144000.0, "19205": 486144000.0, "19210": 486144000.0, "19215": 486144000.0, "19220": 486144000.0, "19225": 486144000.0, "19230": 486144000.0, "19235": 486144000.0, "19240": 486144000.0, "19245": 486144000.0, "19250": 486144000.0, "19255": 486144000.0, "19260": 486144000.0, "19265": 486144000.0, "19270": 486144000.0, "19275": 486144000.0, "19280": 486144000.0, "19285": 486144000.0, "19290": 486144000.0, "19295": 486144000.0, "19300": 486144000.0, "19305": 486144000.0, "19310": 486144000.0, "19315": 486144000.0, "19320": 486144000.0, "19325": 486144000.0, "19330": 486144000.0, "19335": 486144000.0, "19340": 486144000.0, "19345": 486144000.0, "19350": 486144000.0, "19355": 486144000.0, "19360": 486144000.0, "19365": 486144000.0, "19370": 486144000.0, "19375": 486144000.0, "19380": 486144000.0, "19385": 486144000.0, "19390": 486144000.0, "19395": 486144000.0, "19400": 486144000.0, "19405": 486144000.0, "19410": 486144000.0, "19415": 486144000.0, "19420": 486144000.0, "19425": 486144000.0, "19430": 486144000.0, "19435": 486144000.0, "19440": 486144000.0, "19445": 486144000.0, "19450": 486144000.0, "19455": 486144000.0, "19460": 486144000.0, "19465": 486144000.0, "19470": 486144000.0, "19475": 486144000.0, "19480": 486144000.0, "19485": 486144000.0, "19490": 486144000.0, "19495": 486144000.0, "19500": 486144000.0, "19505": 486144000.0, "19510": 486144000.0, "19515": 486144000.0, "19520": 486144000.0, "19525": 486144000.0, "19530": 486144000.0, "19535": 486144000.0, "19540": 486144000.0, "19545": 486144000.0, "19550": 486144000.0, "19555": 486144000.0, "19560": 486144000.0, "19565": 486144000.0, "19570": 486144000.0, "19575": 486144000.0, "19580": 486144000.0, "19585": 486144000.0, "19590": 486144000.0, "19595": 486144000.0, "19600": 486144000.0, "19605": 486144000.0, "19610": 486144000.0, "19615": 486144000.0, "19620": 486144000.0, "19625": 486144000.0, "19630": 486144000.0, "19635": 486144000.0, "19640": 486144000.0, "19645": 486144000.0, "19650": 486144000.0, "19655": 486144000.0, "19660": 486144000.0, "19665": 486144000.0, "19670": 486144000.0, "19675": 486144000.0, "19680": 486144000.0, "19685": 486144000.0, "19690": 486144000.0, "19695": 486144000.0, "19700": 486144000.0, "19705": 486144000.0, "19710": 486144000.0, "19715": 486144000.0, "19720": 486144000.0, "19725": 486144000.0, "19730": 486144000.0, "19735": 486144000.0, "19740": 486144000.0, "19745": 486144000.0, "19750": 486144000.0, "19755": 486144000.0, "19760": 486144000.0, "19765": 486144000.0, "19770": 486144000.0, "19775": 486144000.0, "19780": 486144000.0, "19785": 486144000.0, "19790": 486144000.0, "19795": 486144000.0, "19800": 486144000.0, "19805": 486144000.0, "19810": 486144000.0, "19815": 486144000.0, "19820": 486144000.0, "19825": 486144000.0, "19830": 486144000.0, "19835": 486144000.0, "19840": 486144000.0, "19845": 486144000.0, "19850": 486144000.0, "19855": 486144000.0, "19860": 486144000.0, "19865": 486144000.0, "19870": 486144000.0, "19875": 486144000.0, "19880": 486144000.0, "19885": 486144000.0, "19890": 486144000.0, "19895": 486144000.0, "19900": 486144000.0, "19905": 486144000.0, "19910": 486144000.0, "19915": 486144000.0, "19920": 486144000.0, "19925": 486144000.0, "19930": 486144000.0, "19935": 486144000.0, "19940": 486144000.0, "19945": 486144000.0, "19950": 486144000.0, "19955": 486144000.0, "19960": 486144000.0, "19965": 486144000.0, "19970": 486144000.0, "19975": 486144000.0, "19980": 486144000.0, "19985": 486144000.0, "19990": 486144000.0, "19995": 486144000.0, "20000": 486144000.0, "20005": "nan", "20010": "nan", "20015": "nan", "20020": "nan", "20025": "nan", "20030": "nan", "20035": "nan", "20040": "nan", "20045": "nan", "20050": "nan", "20055": "nan", "20060": "nan", "20065": "nan", "20070": "nan", "20075": "nan", "20080": "nan", "20085": "nan", "20090": "nan", "20095": "nan", "20100": "nan", "20105": "nan", "20110": "nan", "20115": "nan", "20120": "nan", "20125": "nan", "20130": "nan", "20135": "nan", "20140": "nan", "20145": "nan", "20150": "nan", "20155": "nan", "20160": "nan", "20165": "nan", "20170": "nan", "20175": "nan", "20180": "nan", "20185": "nan", "20190": "nan", "20195": "nan", "20200": "nan", "20205": "nan", "20210": "nan", "20215": "nan", "20220": "nan", "20225": "nan", "20230": "nan", "20235": "nan", "20240": "nan", "20245": "nan", "20250": "nan", "20255": "nan", "20260": "nan", "20265": "nan", "20270": "nan", "20275": "nan", "20280": "nan", "20285": "nan", "20290": "nan", "20295": "nan", "20300": "nan", "20305": "nan", "20310": "nan", "20315": "nan", "20320": "nan", "20325": "nan", "20330": "nan", "20335": "nan", "20340": "nan", "20345": "nan", "20350": "nan", "20355": "nan", "20360": "nan", "20365": "nan", "20370": "nan", "20375": "nan", "20380": "nan", "20385": "nan", "20390": "nan", "20395": "nan", "20400": "nan", "20405": "nan", "20410": "nan", "20415": "nan", "20420": "nan", "20425": "nan", "20430": "nan", "20435": "nan", "20440": "nan", "20445": "nan", "20450": "nan", "20455": "nan", "20460": "nan", "20465": "nan", "20470": "nan", "20475": "nan", "20480": "nan", "20485": "nan", "20490": "nan", "20495": "nan", "20500": "nan", "20505": "nan", "20510": "nan", "20515": "nan", "20520": "nan", "20525": "nan", "20530": "nan", "20535": "nan", "20540": "nan", "20545": "nan", "20550": "nan", "20555": "nan", "20560": "nan", "20565": "nan", "20570": "nan", "20575": "nan", "20580": "nan", "20585": "nan", "20590": "nan", "20595": "nan", "20600": "nan", "20605": "nan", "20610": "nan", "20615": "nan", "20620": "nan", "20625": "nan", "20630": "nan", "20635": "nan", "20640": "nan", "20645": "nan", "20650": "nan", "20655": "nan", "20660": "nan", "20665": "nan", "20670": "nan", "20675": "nan", "20680": "nan", "20685": "nan", "20690": "nan", "20695": "nan", "20700": "nan", "20705": "nan", "20710": "nan", "20715": "nan", "20720": "nan", "20725": "nan", "20730": "nan", "20735": "nan", "20740": "nan", "20745": "nan", "20750": "nan", "20755": "nan", "20760": "nan", "20765": "nan", "20770": "nan", "20775": "nan", "20780": "nan", "20785": "nan", "20790": "nan", "20795": "nan", "20800": "nan", "20805": "nan", "20810": "nan", "20815": "nan", "20820": "nan", "20825": "nan", "20830": "nan", "20835": "nan", "20840": "nan", "20845": "nan", "20850": "nan", "20855": "nan", "20860": "nan", "20865": "nan", "20870": "nan", "20875": "nan", "20880": "nan", "20885": "nan", "20890": "nan", "20895": "nan", "20900": "nan", "20905": "nan", "20910": "nan", "20915": "nan", "20920": "nan", "20925": "nan", "20930": "nan", "20935": "nan", "20940": "nan", "20945": "nan", "20950": "nan", "20955": "nan", "20960": "nan", "20965": "nan", "20970": "nan", "20975": "nan", "20980": "nan", "20985": "nan", "20990": "nan", "20995": "nan", "21000": "nan", "21005": "nan", "21010": "nan", "21015": "nan", "21020": "nan", "21025": "nan", "21030": "nan", "21035": "nan", "21040": "nan", "21045": "nan", "21050": "nan", "21055": "nan", "21060": "nan", "21065": "nan", "21070": "nan", "21075": "nan", "21080": "nan", "21085": "nan", "21090": "nan", "21095": "nan", "21100": "nan", "21105": "nan", "21110": "nan", "21115": "nan", "21120": "nan", "21125": "nan", "21130": "nan", "21135": "nan", "21140": "nan", "21145": "nan", "21150": "nan", "21155": "nan", "21160": "nan", "21165": "nan", "21170": "nan", "21175": "nan", "21180": "nan", "21185": "nan", "21190": "nan", "21195": "nan", "21200": "nan", "21205": "nan", "21210": "nan", "21215": "nan", "21220": "nan", "21225": "nan", "21230": "nan", "21235": "nan", "21240": "nan", "21245": "nan", "21250": "nan", "21255": "nan", "21260": "nan", "21265": "nan", "21270": "nan", "21275": "nan", "21280": "nan", "21285": "nan", "21290": "nan", "21295": "nan", "21300": "nan", "21305": "nan", "21310": "nan", "21315": "nan", "21320": "nan", "21325": "nan", "21330": "nan", "21335": "nan", "21340": "nan", "21345": "nan", "21350": "nan", "21355": "nan", "21360": "nan", "21365": "nan", "21370": "nan", "21375": "nan", "21380": "nan", "21385": "nan", "21390": "nan", "21395": "nan", "21400": "nan", "21405": "nan", "21410": "nan", "21415": "nan", "21420": "nan", "21425": "nan", "21430": "nan", "21435": "nan", "21440": "nan", "21445": "nan", "21450": "nan", "21455": "nan", "21460": "nan", "21465": "nan", "21470": "nan", "21475": "nan", "21480": "nan", "21485": "nan", "21490": "nan", "21495": "nan", "21500": "nan", "21505": "nan", "21510": "nan", "21515": "nan", "21520": "nan", "21525": "nan", "21530": "nan", "21535": "nan", "21540": "nan", "21545": "nan", "21550": "nan", "21555": "nan", "21560": "nan", "21565": "nan", "21570": "nan", "21575": "nan", "21580": "nan", "21585": "nan", "21590": "nan", "21595": "nan", "21600": "nan", "21605": "nan", "21610": "nan", "21615": "nan", "21620": "nan", "21625": "nan", "21630": "nan", "21635": "nan", "21640": "nan", "21645": "nan", "21650": "nan", "21655": "nan", "21660": "nan", "21665": "nan", "21670": "nan", "21675": "nan", "21680": "nan", "21685": "nan", "21690": "nan", "21695": "nan", "21700": "nan", "21705": "nan", "21710": "nan", "21715": "nan", "21720": "nan", "21725": "nan", "21730": "nan", "21735": "nan", "21740": "nan", "21745": "nan", "21750": "nan", "21755": "nan", "21760": "nan", "21765": "nan", "21770": "nan", "21775": "nan", "21780": "nan", "21785": "nan", "21790": "nan", "21795": "nan", "21800": "nan", "21805": "nan", "21810": "nan", "21815": "nan", "21820": "nan", "21825": "nan", "21830": "nan", "21835": "nan", "21840": "nan", "21845": "nan", "21850": "nan", "21855": "nan", "21860": "nan", "21865": "nan", "21870": "nan", "21875": "nan", "21880": "nan", "21885": "nan", "21890": "nan", "21895": "nan", "21900": "nan", "21905": "nan", "21910": "nan", "21915": "nan", "21920": "nan", "21925": "nan", "21930": "nan", "21935": "nan", "21940": "nan", "21945": "nan", "21950": "nan", "21955": "nan", "21960": "nan", "21965": "nan", "21970": "nan", "21975": "nan", "21980": "nan", "21985": "nan", "21990": "nan", "21995": "nan", "22000": "nan", "22005": "nan", "22010": "nan", "22015": "nan", "22020": "nan", "22025": "nan", "22030": "nan", "22035": "nan", "22040": "nan", "22045": "nan", "22050": "nan", "22055": "nan", "22060": "nan", "22065": "nan", "22070": "nan", "22075": "nan", "22080": "nan", "22085": "nan", "22090": "nan", "22095": "nan", "22100": "nan", "22105": "nan", "22110": "nan", "22115": "nan", "22120": "nan", "22125": "nan", "22130": "nan", "22135": "nan", "22140": "nan", "22145": "nan", "22150": "nan", "22155": "nan", "22160": "nan", "22165": "nan", "22170": "nan", "22175": "nan", "22180": "nan", "22185": "nan", "22190": "nan", "22195": "nan", "22200": "nan", "22205": "nan", "22210": "nan", "22215": "nan", "22220": "nan", "22225": "nan", "22230": "nan", "22235": "nan", "22240": "nan", "22245": "nan", "22250": "nan", "22255": "nan", "22260": "nan", "22265": "nan", "22270": "nan", "22275": "nan", "22280": "nan", "22285": "nan", "22290": "nan", "22295": "nan", "22300": "nan", "22305": "nan", "22310": "nan", "22315": "nan", "22320": "nan", "22325": "nan", "22330": "nan", "22335": "nan", "22340": "nan", "22345": "nan", "22350": "nan", "22355": "nan", "22360": "nan", "22365": "nan", "22370": "nan", "22375": "nan", "22380": "nan", "22385": "nan", "22390": "nan", "22395": "nan", "22400": "nan", "22405": "nan", "22410": "nan", "22415": "nan", "22420": "nan", "22425": "nan", "22430": "nan", "22435": "nan", "22440": "nan", "22445": "nan", "22450": "nan", "22455": "nan", "22460": "nan", "22465": "nan", "22470": "nan", "22475": "nan", "22480": "nan", "22485": "nan", "22490": "nan", "22495": "nan", "22500": "nan", "22505": "nan", "22510": "nan", "22515": "nan", "22520": "nan", "22525": "nan", "22530": "nan", "22535": "nan", "22540": "nan", "22545": "nan", "22550": "nan", "22555": "nan", "22560": "nan", "22565": "nan", "22570": "nan", "22575": "nan", "22580": "nan", "22585": "nan", "22590": "nan", "22595": "nan", "22600": "nan", "22605": "nan", "22610": "nan", "22615": "nan", "22620": "nan", "22625": "nan", "22630": "nan", "22635": "nan", "22640": "nan", "22645": "nan", "22650": "nan", "22655": "nan", "22660": "nan", "22665": "nan", "22670": "nan", "22675": "nan", "22680": "nan", "22685": "nan", "22690": "nan", "22695": "nan", "22700": "nan", "22705": "nan", "22710": "nan", "22715": "nan", "22720": "nan", "22725": "nan", "22730": "nan", "22735": "nan", "22740": "nan", "22745": "nan", "22750": "nan", "22755": "nan", "22760": "nan", "22765": "nan", "22770": "nan", "22775": "nan", "22780": "nan", "22785": "nan", "22790": "nan", "22795": "nan", "22800": "nan", "22805": "nan", "22810": "nan", "22815": "nan", "22820": "nan", "22825": "nan", "22830": "nan", "22835": "nan", "22840": "nan", "22845": "nan", "22850": "nan", "22855": "nan", "22860": "nan", "22865": "nan", "22870": "nan", "22875": "nan", "22880": "nan", "22885": "nan", "22890": "nan", "22895": "nan", "22900": "nan", "22905": "nan", "22910": "nan", "22915": "nan", "22920": "nan", "22925": "nan", "22930": "nan", "22935": "nan", "22940": "nan", "22945": "nan", "22950": "nan", "22955": "nan", "22960": "nan", "22965": "nan", "22970": "nan", "22975": "nan", "22980": "nan", "22985": "nan", "22990": "nan", "22995": "nan", "23000": "nan", "23005": "nan", "23010": "nan", "23015": "nan", "23020": "nan", "23025": "nan", "23030": "nan", "23035": "nan", "23040": "nan", "23045": "nan", "23050": "nan", "23055": "nan", "23060": "nan", "23065": "nan", "23070": "nan", "23075": "nan", "23080": "nan", "23085": "nan", "23090": "nan", "23095": "nan", "23100": "nan", "23105": "nan", "23110": "nan", "23115": "nan", "23120": "nan", "23125": "nan", "23130": "nan", "23135": "nan", "23140": "nan", "23145": "nan", "23150": "nan", "23155": "nan", "23160": "nan", "23165": "nan", "23170": "nan", "23175": "nan", "23180": "nan", "23185": "nan", "23190": "nan", "23195": "nan", "23200": "nan", "23205": "nan", "23210": "nan", "23215": "nan", "23220": "nan", "23225": "nan", "23230": "nan", "23235": "nan", "23240": "nan", "23245": "nan", "23250": "nan", "23255": "nan", "23260": "nan", "23265": "nan", "23270": "nan", "23275": "nan", "23280": "nan", "23285": "nan", "23290": "nan", "23295": "nan", "23300": "nan", "23305": "nan", "23310": "nan", "23315": "nan", "23320": "nan", "23325": "nan", "23330": "nan", "23335": "nan", "23340": "nan", "23345": "nan", "23350": "nan", "23355": "nan", "23360": "nan", "23365": "nan", "23370": "nan", "23375": "nan", "23380": "nan", "23385": "nan", "23390": "nan", "23395": "nan", "23400": "nan", "23405": "nan", "23410": "nan", "23415": "nan", "23420": "nan", "23425": "nan", "23430": "nan", "23435": "nan", "23440": "nan", "23445": "nan", "23450": "nan", "23455": "nan", "23460": "nan", "23465": "nan", "23470": "nan", "23475": "nan", "23480": "nan", "23485": "nan", "23490": "nan", "23495": "nan", "23500": "nan", "23505": "nan", "23510": "nan", "23515": "nan", "23520": "nan", "23525": "nan", "23530": "nan", "23535": "nan", "23540": "nan", "23545": "nan", "23550": "nan", "23555": "nan", "23560": "nan", "23565": "nan", "23570": "nan", "23575": "nan", "23580": "nan", "23585": "nan", "23590": "nan", "23595": "nan", "23600": "nan", "23605": "nan", "23610": "nan", "23615": "nan", "23620": "nan", "23625": "nan", "23630": "nan", "23635": "nan", "23640": "nan", "23645": "nan", "23650": "nan", "23655": "nan", "23660": "nan", "23665": "nan", "23670": "nan", "23675": "nan", "23680": "nan", "23685": "nan", "23690": "nan", "23695": "nan", "23700": "nan", "23705": "nan", "23710": "nan", "23715": "nan", "23720": "nan", "23725": "nan", "23730": "nan", "23735": "nan", "23740": "nan", "23745": "nan", "23750": "nan", "23755": "nan", "23760": "nan", "23765": "nan", "23770": "nan", "23775": "nan", "23780": "nan", "23785": "nan", "23790": "nan", "23795": "nan", "23800": "nan", "23805": "nan", "23810": "nan", "23815": "nan", "23820": "nan", "23825": "nan", "23830": "nan", "23835": "nan", "23840": "nan", "23845": "nan", "23850": "nan", "23855": "nan", "23860": "nan", "23865": "nan", "23870": "nan", "23875": "nan", "23880": "nan", "23885": "nan", "23890": "nan", "23895": "nan", "23900": "nan", "23905": "nan", "23910": "nan", "23915": "nan", "23920": "nan", "23925": "nan", "23930": "nan", "23935": "nan", "23940": "nan", "23945": "nan", "23950": "nan", "23955": "nan", "23960": "nan", "23965": "nan", "23970": "nan", "23975": "nan", "23980": "nan", "23985": "nan", "23990": "nan", "23995": "nan", "24000": "nan", "24005": "nan", "24010": "nan", "24015": "nan", "24020": "nan", "24025": "nan", "24030": "nan", "24035": "nan", "24040": "nan", "24045": "nan", "24050": "nan", "24055": "nan", "24060": "nan", "24065": "nan", "24070": "nan", "24075": "nan", "24080": "nan", "24085": "nan", "24090": "nan", "24095": "nan", "24100": "nan", "24105": "nan", "24110": "nan", "24115": "nan", "24120": "nan", "24125": "nan", "24130": "nan", "24135": "nan", "24140": "nan", "24145": "nan", "24150": "nan", "24155": "nan", "24160": "nan", "24165": "nan", "24170": "nan", "24175": "nan", "24180": "nan", "24185": "nan", "24190": "nan", "24195": "nan", "24200": "nan", "24205": "nan", "24210": "nan", "24215": "nan", "24220": "nan", "24225": "nan", "24230": "nan", "24235": "nan", "24240": "nan", "24245": "nan", "24250": "nan", "24255": "nan", "24260": "nan", "24265": "nan", "24270": "nan", "24275": "nan", "24280": "nan", "24285": "nan", "24290": "nan", "24295": "nan", "24300": "nan", "24305": "nan", "24310": "nan", "24315": "nan", "24320": "nan", "24325": "nan", "24330": "nan", "24335": "nan", "24340": "nan", "24345": "nan", "24350": "nan", "24355": "nan", "24360": "nan", "24365": "nan", "24370": "nan", "24375": "nan", "24380": "nan", "24385": "nan", "24390": "nan", "24395": "nan", "24400": "nan", "24405": "nan", "24410": "nan", "24415": "nan", "24420": "nan", "24425": "nan", "24430": "nan", "24435": "nan", "24440": "nan", "24445": "nan", "24450": "nan", "24455": "nan", "24460": "nan", "24465": "nan", "24470": "nan", "24475": "nan", "24480": "nan", "24485": "nan", "24490": "nan", "24495": "nan", "24500": "nan", "24505": "nan", "24510": "nan", "24515": "nan", "24520": "nan", "24525": "nan", "24530": "nan", "24535": "nan", "24540": "nan", "24545": "nan", "24550": "nan", "24555": "nan", "24560": "nan", "24565": "nan", "24570": "nan", "24575": "nan", "24580": "nan", "24585": "nan", "24590": "nan", "24595": "nan", "24600": "nan", "24605": "nan", "24610": "nan", "24615": "nan", "24620": "nan", "24625": "nan", "24630": "nan", "24635": "nan", "24640": "nan", "24645": "nan", "24650": "nan", "24655": "nan", "24660": "nan", "24665": "nan", "24670": "nan", "24675": "nan", "24680": "nan", "24685": "nan", "24690": "nan", "24695": "nan", "24700": "nan", "24705": "nan", "24710": "nan", "24715": "nan", "24720": "nan", "24725": "nan", "24730": "nan", "24735": "nan", "24740": "nan", "24745": "nan", "24750": "nan", "24755": "nan", "24760": "nan", "24765": "nan", "24770": "nan", "24775": "nan", "24780": "nan", "24785": "nan", "24790": "nan", "24795": "nan", "24800": "nan", "24805": "nan", "24810": "nan", "24815": "nan", "24820": "nan", "24825": "nan", "24830": "nan", "24835": "nan", "24840": "nan", "24845": "nan", "24850": "nan", "24855": "nan", "24860": "nan", "24865": "nan", "24870": "nan", "24875": "nan", "24880": "nan", "24885": "nan", "24890": "nan", "24895": "nan", "24900": "nan", "24905": "nan", "24910": "nan", "24915": "nan", "24920": "nan", "24925": "nan", "24930": "nan", "24935": "nan", "24940": "nan", "24945": "nan", "24950": "nan", "24955": "nan", "24960": "nan", "24965": "nan", "24970": "nan", "24975": "nan", "24980": "nan", "24985": "nan", "24990": "nan", "24995": "nan", "25000": "nan"}}, "num-zeros": {"start_step": 1, "end_step": 25000, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": 1398.0, "25": 1453.0, "30": 1525.0, "35": 1484.0, "40": 1567.0, "45": 1637.0, "50": 1613.0, "55": 1689.0, "60": 1587.0, "65": 1635.0, "70": 1667.0, "75": 1619.0, "80": 1591.0, "85": 1608.0, "90": 1435.0, "95": 1509.0, "100": 1488.0, "105": 1475.0, "110": 1490.0, "115": 1442.0, "120": 1527.0, "125": 1499.0, "130": 1458.0, "135": 1491.0, "140": 1506.0, "145": 1543.0, "150": 1472.0, "155": 1511.0, "160": 1480.0, "165": 1569.0, "170": 1430.0, "175": 1449.0, "180": 1511.0, "185": 1528.0, "190": 1509.0, "195": 1569.0, "200": 1441.0, "205": 1583.0, "210": 1447.0, "215": 1494.0, "220": 1509.0, "225": 1473.0, "230": 1473.0, "235": 1590.0, "240": 1505.0, "245": 1470.0, "250": 1531.0, "255": 1512.0, "260": 1580.0, "265": 1491.0, "270": 1524.0, "275": 1578.0, "280": 1504.0, "285": 1555.0, "290": 1629.0, "295": 1605.0, "300": 1568.0, "305": 1458.0, "310": 1518.0, "315": 1488.0, "320": 1596.0, "325": 1503.0, "330": 1502.0, "335": 1477.0, "340": 1540.0, "345": 1534.0, "350": 1526.0, "355": 1533.0, "360": 1492.0, "365": 1562.0, "370": 1532.0, "375": 1518.0, "380": 1589.0, "385": 1543.0, "390": 1438.0, "395": 1572.0, "400": 1554.0, "405": 1585.0, "410": 1563.0, "415": 1553.0, "420": 1471.0, "425": 1629.0, "430": 1491.0, "435": 1482.0, "440": 1548.0, "445": 1546.0, "450": 1513.0, "455": 1529.0, "460": 1549.0, "465": 1602.0, "470": 1468.0, "475": 1635.0, "480": 1580.0, "485": 1505.0, "490": 1560.0, "495": 1494.0, "500": 1437.0, "505": 1490.0, "510": 1557.0, "515": 1528.0, "520": 1530.0, "525": 1463.0, "530": 1528.0, "535": 1583.0, "540": 1582.0, "545": 1514.0, "550": 1634.0, "555": 1572.0, "560": 1554.0, "565": 1658.0, "570": 1692.0, "575": 1548.0, "580": 1551.0, "585": 1584.0, "590": 1513.0, "595": 1595.0, "600": 1600.0, "605": 1514.0, "610": 1507.0, "615": 1533.0, "620": 1590.0, "625": 1538.0, "630": 1599.0, "635": 1585.0, "640": 1481.0, "645": 1551.0, "650": 1570.0, "655": 1588.0, "660": 1569.0, "665": 1536.0, "670": 1568.0, "675": 1530.0, "680": 1601.0, "685": 1513.0, "690": 1495.0, "695": 1566.0, "700": 1500.0, "705": 1594.0, "710": 1493.0, "715": 1532.0, "720": 1553.0, "725": 1411.0, "730": 1501.0, "735": 1522.0, "740": 1548.0, "745": 1560.0, "750": 1596.0, "755": 1545.0, "760": 1638.0, "765": 1553.0, "770": 1567.0, "775": 1558.0, "780": 1575.0, "785": 1494.0, "790": 1650.0, "795": 1564.0, "800": 1547.0, "805": 1488.0, "810": 1584.0, "815": 1634.0, "820": 1552.0, "825": 1506.0, "830": 1612.0, "835": 1566.0, "840": 1629.0, "845": 1492.0, "850": 1536.0, "855": 1545.0, "860": 1552.0, "865": 1546.0, "870": 1487.0, "875": 1572.0, "880": 1550.0, "885": 1537.0, "890": 1589.0, "895": 1608.0, "900": 1564.0, "905": 1651.0, "910": 1562.0, "915": 1506.0, "920": 1598.0, "925": 1550.0, "930": 1550.0, "935": 1529.0, "940": 1531.0, "945": 1555.0, "950": 1538.0, "955": 1560.0, "960": 1597.0, "965": 1549.0, "970": 1505.0, "975": 1515.0, "980": 1551.0, "985": 1565.0, "990": 1521.0, "995": 1519.0, "1000": 1600.0, "1005": 1571.0, "1010": 1534.0, "1015": 1585.0, "1020": 1530.0, "1025": 1582.0, "1030": 1518.0, "1035": 1526.0, "1040": 1540.0, "1045": 1563.0, "1050": 1532.0, "1055": 1521.0, "1060": 1562.0, "1065": 1480.0, "1070": 1530.0, "1075": 1405.0, "1080": 1625.0, "1085": 1585.0, "1090": 1557.0, "1095": 1487.0, "1100": 1512.0, "1105": 1538.0, "1110": 1583.0, "1115": 1351.0, "1120": 1542.0, "1125": 1566.0, "1130": 1581.0, "1135": 1538.0, "1140": 1518.0, "1145": 1600.0, "1150": 1585.0, "1155": 1603.0, "1160": 1565.0, "1165": 1535.0, "1170": 1708.0, "1175": 1651.0, "1180": 1492.0, "1185": 1594.0, "1190": 1591.0, "1195": 1503.0, "1200": 1649.0, "1205": 1496.0, "1210": 1543.0, "1215": 1565.0, "1220": 1506.0, "1225": 1559.0, "1230": 1587.0, "1235": 1484.0, "1240": 1497.0, "1245": 1617.0, "1250": 1471.0, "1255": 1510.0, "1260": 1589.0, "1265": 1545.0, "1270": 1522.0, "1275": 1565.0, "1280": 1519.0, "1285": 1506.0, "1290": 1532.0, "1295": 1596.0, "1300": 1662.0, "1305": 1572.0, "1310": 1492.0, "1315": 1589.0, "1320": 1568.0, "1325": 1540.0, "1330": 1542.0, "1335": 1541.0, "1340": 1568.0, "1345": 1479.0, "1350": 1622.0, "1355": 1478.0, "1360": 1539.0, "1365": 1580.0, "1370": 1513.0, "1375": 1495.0, "1380": 1561.0, "1385": 1561.0, "1390": 1691.0, "1395": 1578.0, "1400": 1532.0, "1405": 1547.0, "1410": 1586.0, "1415": 1515.0, "1420": 1706.0, "1425": 1669.0, "1430": 1560.0, "1435": 1600.0, "1440": 1565.0, "1445": 1537.0, "1450": 1570.0, "1455": 1481.0, "1460": 1554.0, "1465": 1505.0, "1470": 1599.0, "1475": 1603.0, "1480": 1587.0, "1485": 1577.0, "1490": 1622.0, "1495": 1624.0, "1500": 1607.0, "1505": 1567.0, "1510": 1801.0, "1515": 1549.0, "1520": 1480.0, "1525": 1520.0, "1530": 1566.0, "1535": 1513.0, "1540": 1527.0, "1545": 1575.0, "1550": 1517.0, "1555": 1528.0, "1560": 1523.0, "1565": 1529.0, "1570": 1642.0, "1575": 1502.0, "1580": 1495.0, "1585": 1466.0, "1590": 1552.0, "1595": 1562.0, "1600": 1601.0, "1605": 1527.0, "1610": 1554.0, "1615": 1613.0, "1620": 1564.0, "1625": 1541.0, "1630": 1594.0, "1635": 1573.0, "1640": 1583.0, "1645": 1592.0, "1650": 1534.0, "1655": 1565.0, "1660": 1525.0, "1665": 1532.0, "1670": 1462.0, "1675": 1578.0, "1680": 1550.0, "1685": 1618.0, "1690": 1637.0, "1695": 1495.0, "1700": 1644.0, "1705": 1587.0, "1710": 1948.0, "1715": 1517.0, "1720": 1652.0, "1725": 1626.0, "1730": 1585.0, "1735": 1520.0, "1740": 1572.0, "1745": 1560.0, "1750": 1501.0, "1755": 1605.0, "1760": 1643.0, "1765": 1552.0, "1770": 1554.0, "1775": 1362.0, "1780": 1488.0, "1785": 1485.0, "1790": 1517.0, "1795": 1610.0, "1800": 1535.0, "1805": 1505.0, "1810": 1576.0, "1815": 1568.0, "1820": 1495.0, "1825": 1534.0, "1830": 1686.0, "1835": 1540.0, "1840": 1483.0, "1845": 1560.0, "1850": 1532.0, "1855": 1604.0, "1860": 1556.0, "1865": 1555.0, "1870": 1518.0, "1875": 1579.0, "1880": 1700.0, "1885": 1469.0, "1890": 1554.0, "1895": 1548.0, "1900": 1492.0, "1905": 1511.0, "1910": 1670.0, "1915": 1638.0, "1920": 1544.0, "1925": 1573.0, "1930": 1505.0, "1935": 1581.0, "1940": 1587.0, "1945": 1590.0, "1950": 1595.0, "1955": 1572.0, "1960": 1511.0, "1965": 1478.0, "1970": 1611.0, "1975": 1553.0, "1980": 1527.0, "1985": 1578.0, "1990": 1550.0, "1995": 1495.0, "2000": 1520.0, "2005": 1563.0, "2010": 1533.0, "2015": 1523.0, "2020": 1611.0, "2025": 1544.0, "2030": 1595.0, "2035": 1581.0, "2040": 1672.0, "2045": 1547.0, "2050": 1546.0, "2055": 1535.0, "2060": 1547.0, "2065": 1540.0, "2070": 1489.0, "2075": 1649.0, "2080": 1562.0, "2085": 1611.0, "2090": 1514.0, "2095": 1605.0, "2100": 1599.0, "2105": 1548.0, "2110": 1531.0, "2115": 1573.0, "2120": 1563.0, "2125": 1553.0, "2130": 1537.0, "2135": 1639.0, "2140": 1576.0, "2145": 1519.0, "2150": 1462.0, "2155": 1551.0, "2160": 1665.0, "2165": 1535.0, "2170": 1664.0, "2175": 1604.0, "2180": 1621.0, "2185": 1535.0, "2190": 1556.0, "2195": 1490.0, "2200": 1462.0, "2205": 1548.0, "2210": 1495.0, "2215": 1495.0, "2220": 1563.0, "2225": 1605.0, "2230": 1488.0, "2235": 1575.0, "2240": 1628.0, "2245": 1523.0, "2250": 1630.0, "2255": 1548.0, "2260": 1559.0, "2265": 1592.0, "2270": 1518.0, "2275": 1601.0, "2280": 1596.0, "2285": 1595.0, "2290": 1564.0, "2295": 1581.0, "2300": 1496.0, "2305": 1475.0, "2310": 1586.0, "2315": 1530.0, "2320": 1522.0, "2325": 1354.0, "2330": 1557.0, "2335": 1473.0, "2340": 1525.0, "2345": 1466.0, "2350": 1535.0, "2355": 1583.0, "2360": 1548.0, "2365": 1557.0, "2370": 1503.0, "2375": 1589.0, "2380": 1583.0, "2385": 1627.0, "2390": 1517.0, "2395": 1585.0, "2400": 1585.0, "2405": 1627.0, "2410": 1510.0, "2415": 1534.0, "2420": 1546.0, "2425": 1557.0, "2430": 1489.0, "2435": 1616.0, "2440": 1595.0, "2445": 1553.0, "2450": 1552.0, "2455": 1525.0, "2460": 1616.0, "2465": 1552.0, "2470": 1626.0, "2475": 1603.0, "2480": 1516.0, "2485": 1608.0, "2490": 1565.0, "2495": 1565.0, "2500": 1569.0, "2505": 1489.0, "2510": 1539.0, "2515": 1520.0, "2520": 1578.0, "2525": 1574.0, "2530": 1624.0, "2535": 1577.0, "2540": 1570.0, "2545": 1527.0, "2550": 1508.0, "2555": 1653.0, "2560": 1598.0, "2565": 1626.0, "2570": 1651.0, "2575": 1468.0, "2580": 1545.0, "2585": 1540.0, "2590": 1481.0, "2595": 1603.0, "2600": 1570.0, "2605": 1691.0, "2610": 1546.0, "2615": 1514.0, "2620": 1677.0, "2625": 1615.0, "2630": 1613.0, "2635": 1566.0, "2640": 1574.0, "2645": 1552.0, "2650": 1559.0, "2655": 1550.0, "2660": 1549.0, "2665": 1617.0, "2670": 1614.0, "2675": 1502.0, "2680": 1667.0, "2685": 1639.0, "2690": 1579.0, "2695": 1534.0, "2700": 1534.0, "2705": 1633.0, "2710": 1644.0, "2715": 1460.0, "2720": 1692.0, "2725": 1639.0, "2730": 1605.0, "2735": 1581.0, "2740": 1533.0, "2745": 1537.0, "2750": 1548.0, "2755": 1521.0, "2760": 1545.0, "2765": 1536.0, "2770": 1601.0, "2775": 1526.0, "2780": 1582.0, "2785": 1571.0, "2790": 1600.0, "2795": 1576.0, "2800": 1586.0, "2805": 1675.0, "2810": 1578.0, "2815": 1596.0, "2820": 1574.0, "2825": 1541.0, "2830": 1545.0, "2835": 1597.0, "2840": 1641.0, "2845": 1651.0, "2850": 1470.0, "2855": 1516.0, "2860": 1451.0, "2865": 1546.0, "2870": 1543.0, "2875": 1510.0, "2880": 1600.0, "2885": 1588.0, "2890": 1695.0, "2895": 1586.0, "2900": 1502.0, "2905": 1646.0, "2910": 1635.0, "2915": 1637.0, "2920": 1583.0, "2925": 1511.0, "2930": 1556.0, "2935": 1518.0, "2940": 1532.0, "2945": 1513.0, "2950": 1557.0, "2955": 1562.0, "2960": 1600.0, "2965": 1521.0, "2970": 1543.0, "2975": 1617.0, "2980": 1630.0, "2985": 1549.0, "2990": 1665.0, "2995": 1605.0, "3000": 1559.0, "3005": 1498.0, "3010": 1576.0, "3015": 1498.0, "3020": 1654.0, "3025": 1521.0, "3030": 1681.0, "3035": 1615.0, "3040": 1611.0, "3045": 1580.0, "3050": 1589.0, "3055": 1515.0, "3060": 1561.0, "3065": 1650.0, "3070": 1585.0, "3075": 1607.0, "3080": 1567.0, "3085": 1456.0, "3090": 1563.0, "3095": 1600.0, "3100": 1562.0, "3105": 1573.0, "3110": 1623.0, "3115": 1661.0, "3120": 1562.0, "3125": 1640.0, "3130": 1519.0, "3135": 1621.0, "3140": 1542.0, "3145": 1654.0, "3150": 1588.0, "3155": 1577.0, "3160": 1583.0, "3165": 1527.0, "3170": 1532.0, "3175": 1715.0, "3180": 1563.0, "3185": 1606.0, "3190": 1508.0, "3195": 1698.0, "3200": 1641.0, "3205": 1568.0, "3210": 1562.0, "3215": 1589.0, "3220": 1582.0, "3225": 1585.0, "3230": 1617.0, "3235": 1569.0, "3240": 1510.0, "3245": 1631.0, "3250": 1656.0, "3255": 1543.0, "3260": 1587.0, "3265": 1578.0, "3270": 1578.0, "3275": 1589.0, "3280": 1549.0, "3285": 1535.0, "3290": 1601.0, "3295": 1497.0, "3300": 1616.0, "3305": 1526.0, "3310": 1539.0, "3315": 1551.0, "3320": 1532.0, "3325": 1591.0, "3330": 1602.0, "3335": 1577.0, "3340": 1494.0, "3345": 1557.0, "3350": 1655.0, "3355": 1716.0, "3360": 1578.0, "3365": 1718.0, "3370": 1605.0, "3375": 1653.0, "3380": 1605.0, "3385": 1587.0, "3390": 1553.0, "3395": 1580.0, "3400": 1503.0, "3405": 1506.0, "3410": 1555.0, "3415": 1489.0, "3420": 1552.0, "3425": 1536.0, "3430": 1619.0, "3435": 1543.0, "3440": 1563.0, "3445": 1602.0, "3450": 1578.0, "3455": 1581.0, "3460": 1579.0, "3465": 1632.0, "3470": 1660.0, "3475": 1567.0, "3480": 1683.0, "3485": 1651.0, "3490": 1509.0, "3495": 1578.0, "3500": 1584.0, "3505": 1603.0, "3510": 1578.0, "3515": 1490.0, "3520": 1535.0, "3525": 1593.0, "3530": 1636.0, "3535": 1637.0, "3540": 1571.0, "3545": 1660.0, "3550": 1533.0, "3555": 1623.0, "3560": 1551.0, "3565": 1547.0, "3570": 1538.0, "3575": 1592.0, "3580": 1549.0, "3585": 1616.0, "3590": 1619.0, "3595": 1604.0, "3600": 1726.0, "3605": 1708.0, "3610": 1621.0, "3615": 1640.0, "3620": 1547.0, "3625": 1598.0, "3630": 1595.0, "3635": 1655.0, "3640": 1612.0, "3645": 1631.0, "3650": 1670.0, "3655": 1491.0, "3660": 1536.0, "3665": 1542.0, "3670": 1605.0, "3675": 1693.0, "3680": 1641.0, "3685": 1551.0, "3690": 1543.0, "3695": 1553.0, "3700": 1562.0, "3705": 1615.0, "3710": 1605.0, "3715": 1629.0, "3720": 1572.0, "3725": 1556.0, "3730": 1574.0, "3735": 1573.0, "3740": 1561.0, "3745": 1569.0, "3750": 1555.0, "3755": 1501.0, "3760": 1555.0, "3765": 1558.0, "3770": 1555.0, "3775": 1558.0, "3780": 1602.0, "3785": 1481.0, "3790": 1547.0, "3795": 1548.0, "3800": 1576.0, "3805": 1643.0, "3810": 1639.0, "3815": 1624.0, "3820": 1583.0, "3825": 1624.0, "3830": 1601.0, "3835": 1506.0, "3840": 1621.0, "3845": 1624.0, "3850": 1658.0, "3855": 1679.0, "3860": 1536.0, "3865": 1733.0, "3870": 1592.0, "3875": 1645.0, "3880": 1566.0, "3885": 1539.0, "3890": 1576.0, "3895": 1577.0, "3900": 1571.0, "3905": 1531.0, "3910": 1590.0, "3915": 1575.0, "3920": 1587.0, "3925": 1567.0, "3930": 1505.0, "3935": 1521.0, "3940": 1652.0, "3945": 1638.0, "3950": 1592.0, "3955": 1512.0, "3960": 1538.0, "3965": 1548.0, "3970": 1635.0, "3975": 1633.0, "3980": 1553.0, "3985": 1638.0, "3990": 1549.0, "3995": 1639.0, "4000": 1617.0, "4005": 1599.0, "4010": 1595.0, "4015": 1537.0, "4020": 1546.0, "4025": 1608.0, "4030": 1649.0, "4035": 1629.0, "4040": 1566.0, "4045": 1609.0, "4050": 1529.0, "4055": 1483.0, "4060": 1623.0, "4065": 1596.0, "4070": 1667.0, "4075": 1602.0, "4080": 1494.0, "4085": 1535.0, "4090": 1604.0, "4095": 1572.0, "4100": 1542.0, "4105": 1669.0, "4110": 1694.0, "4115": 1582.0, "4120": 1591.0, "4125": 1615.0, "4130": 1486.0, "4135": 1635.0, "4140": 1593.0, "4145": 1570.0, "4150": 1631.0, "4155": 1579.0, "4160": 1662.0, "4165": 1562.0, "4170": 1578.0, "4175": 1524.0, "4180": 1578.0, "4185": 1586.0, "4190": 1665.0, "4195": 1622.0, "4200": 1641.0, "4205": 1679.0, "4210": 1694.0, "4215": 1698.0, "4220": 1529.0, "4225": 1528.0, "4230": 1552.0, "4235": 1660.0, "4240": 1612.0, "4245": 1609.0, "4250": 1639.0, "4255": 1600.0, "4260": 1602.0, "4265": "nan", "4270": 1584.0, "4275": 1602.0, "4280": 1569.0, "4285": 1626.0, "4290": 1560.0, "4295": 1511.0, "4300": 1595.0, "4305": 1608.0, "4310": 1671.0, "4315": 1715.0, "4320": 1609.0, "4325": 1606.0, "4330": 1704.0, "4335": 1561.0, "4340": 1598.0, "4345": 1608.0, "4350": 1612.0, "4355": 1617.0, "4360": 1627.0, "4365": 1593.0, "4370": 1607.0, "4375": 1610.0, "4380": 1612.0, "4385": 1643.0, "4390": 1598.0, "4395": 1674.0, "4400": 1565.0, "4405": 1523.0, "4410": 1686.0, "4415": 1572.0, "4420": 1599.0, "4425": 1592.0, "4430": 1593.0, "4435": 1585.0, "4440": 1542.0, "4445": 1538.0, "4450": 1550.0, "4455": 1626.0, "4460": 1628.0, "4465": 1563.0, "4470": 1624.0, "4475": 1542.0, "4480": 1556.0, "4485": 1695.0, "4490": 1622.0, "4495": 1636.0, "4500": 1673.0, "4505": 1644.0, "4510": 1749.0, "4515": 1672.0, "4520": 1561.0, "4525": 1560.0, "4530": 1629.0, "4535": 1532.0, "4540": 1680.0, "4545": 1596.0, "4550": 1622.0, "4555": 1697.0, "4560": 1532.0, "4565": 1520.0, "4570": 1541.0, "4575": 1600.0, "4580": 1511.0, "4585": 1633.0, "4590": 1601.0, "4595": 1542.0, "4600": 1581.0, "4605": 1613.0, "4610": 1643.0, "4615": 1607.0, "4620": 1628.0, "4625": 1582.0, "4630": 1617.0, "4635": 1645.0, "4640": 1598.0, "4645": 1664.0, "4650": 1615.0, "4655": 1669.0, "4660": 1626.0, "4665": 1585.0, "4670": 1610.0, "4675": 1528.0, "4680": 1555.0, "4685": 1576.0, "4690": 1757.0, "4695": 1537.0, "4700": 1658.0, "4705": 1643.0, "4710": 1677.0, "4715": 1582.0, "4720": 1563.0, "4725": 1691.0, "4730": 1641.0, "4735": 1628.0, "4740": 1628.0, "4745": 1541.0, "4750": 1563.0, "4755": 1568.0, "4760": 1554.0, "4765": 1584.0, "4770": 1600.0, "4775": 1563.0, "4780": 1655.0, "4785": 1583.0, "4790": 1597.0, "4795": 1767.0, "4800": 1633.0, "4805": 1570.0, "4810": 1554.0, "4815": 1670.0, "4820": 1548.0, "4825": 1527.0, "4830": 1583.0, "4835": 1527.0, "4840": 1598.0, "4845": 1622.0, "4850": 1704.0, "4855": 1655.0, "4860": 1564.0, "4865": 1623.0, "4870": 1546.0, "4875": 1581.0, "4880": 1564.0, "4885": 1584.0, "4890": 1655.0, "4895": 1624.0, "4900": 1618.0, "4905": 1643.0, "4910": 1608.0, "4915": 1619.0, "4920": 1729.0, "4925": 1689.0, "4930": 1576.0, "4935": 1647.0, "4940": 1616.0, "4945": 1668.0, "4950": 1535.0, "4955": 1665.0, "4960": 1832.0, "4965": 1543.0, "4970": 1585.0, "4975": 1602.0, "4980": 1622.0, "4985": 1563.0, "4990": 1976.0, "4995": 1669.0, "5000": 1652.0, "5005": 1652.0, "5010": 1594.0, "5015": 1585.0, "5020": 1643.0, "5025": 1601.0, "5030": 1582.0, "5035": 1616.0, "5040": 1635.0, "5045": 1671.0, "5050": 1661.0, "5055": 1608.0, "5060": 1644.0, "5065": 1601.0, "5070": 1643.0, "5075": 1625.0, "5080": 1634.0, "5085": 1656.0, "5090": 1546.0, "5095": 1599.0, "5100": 1582.0, "5105": 1607.0, "5110": 1560.0, "5115": 1587.0, "5120": 1582.0, "5125": 1642.0, "5130": 1626.0, "5135": 1606.0, "5140": 1613.0, "5145": 1521.0, "5150": 1695.0, "5155": 1571.0, "5160": 1678.0, "5165": 1679.0, "5170": 1623.0, "5175": 1671.0, "5180": 1668.0, "5185": 1633.0, "5190": 1696.0, "5195": 1686.0, "5200": 1712.0, "5205": 1525.0, "5210": 1526.0, "5215": 1598.0, "5220": 1521.0, "5225": 1683.0, "5230": 1579.0, "5235": 1652.0, "5240": 1631.0, "5245": 1611.0, "5250": 1669.0, "5255": 1631.0, "5260": 1611.0, "5265": 1670.0, "5270": 1750.0, "5275": 1572.0, "5280": 1507.0, "5285": 1613.0, "5290": 1607.0, "5295": 1592.0, "5300": 1687.0, "5305": 1587.0, "5310": 1704.0, "5315": 1603.0, "5320": 1523.0, "5325": 1642.0, "5330": 1561.0, "5335": 1554.0, "5340": 1591.0, "5345": 1609.0, "5350": 1626.0, "5355": 1619.0, "5360": 1648.0, "5365": 1634.0, "5370": 1615.0, "5375": 1625.0, "5380": 1600.0, "5385": 1653.0, "5390": 1670.0, "5395": 1619.0, "5400": 1652.0, "5405": 1648.0, "5410": 1585.0, "5415": 1627.0, "5420": 1666.0, "5425": 1661.0, "5430": 1566.0, "5435": 1696.0, "5440": 1678.0, "5445": 1606.0, "5450": 1604.0, "5455": 1643.0, "5460": 1709.0, "5465": 1628.0, "5470": 1614.0, "5475": 1669.0, "5480": 1556.0, "5485": 1677.0, "5490": 1664.0, "5495": 1577.0, "5500": 1658.0, "5505": 1556.0, "5510": 1606.0, "5515": 1641.0, "5520": 1577.0, "5525": 1565.0, "5530": 1642.0, "5535": 1623.0, "5540": 1604.0, "5545": 1608.0, "5550": 1535.0, "5555": 1637.0, "5560": 1621.0, "5565": 1525.0, "5570": 1630.0, "5575": 1639.0, "5580": 1610.0, "5585": 1615.0, "5590": 1620.0, "5595": 1565.0, "5600": 1606.0, "5605": 1668.0, "5610": 1657.0, "5615": 1649.0, "5620": 1656.0, "5625": 1574.0, "5630": 1580.0, "5635": 1622.0, "5640": 1679.0, "5645": 1640.0, "5650": 1681.0, "5655": 1620.0, "5660": 1693.0, "5665": 1681.0, "5670": 1619.0, "5675": 1753.0, "5680": 1628.0, "5685": 1623.0, "5690": 1545.0, "5695": 1622.0, "5700": 1622.0, "5705": 1737.0, "5710": 1639.0, "5715": 1605.0, "5720": 1545.0, "5725": 1658.0, "5730": 1703.0, "5735": 1569.0, "5740": 1624.0, "5745": 1635.0, "5750": 1607.0, "5755": 1621.0, "5760": 1628.0, "5765": 1663.0, "5770": 1512.0, "5775": 1604.0, "5780": 1664.0, "5785": 1658.0, "5790": 1773.0, "5795": 1625.0, "5800": 1604.0, "5805": 1767.0, "5810": 1641.0, "5815": 1681.0, "5820": 1620.0, "5825": 1656.0, "5830": 1657.0, "5835": 1617.0, "5840": 1637.0, "5845": 1673.0, "5850": 1639.0, "5855": 1657.0, "5860": 1736.0, "5865": 1620.0, "5870": 1588.0, "5875": 1600.0, "5880": 1664.0, "5885": 1657.0, "5890": 1610.0, "5895": 1665.0, "5900": 1534.0, "5905": 1605.0, "5910": 1678.0, "5915": 1639.0, "5920": 1666.0, "5925": 1567.0, "5930": 1542.0, "5935": 1679.0, "5940": 1544.0, "5945": 1646.0, "5950": 1741.0, "5955": 1623.0, "5960": 1681.0, "5965": 1626.0, "5970": 1671.0, "5975": 1659.0, "5980": 1668.0, "5985": 1639.0, "5990": 1684.0, "5995": 1676.0, "6000": 1559.0, "6005": 1662.0, "6010": 1810.0, "6015": 1586.0, "6020": 1661.0, "6025": 1617.0, "6030": 1613.0, "6035": 1657.0, "6040": 1632.0, "6045": 1671.0, "6050": 1627.0, "6055": 1582.0, "6060": 1552.0, "6065": 1597.0, "6070": 1583.0, "6075": 1674.0, "6080": 1549.0, "6085": 1569.0, "6090": 1554.0, "6095": 1579.0, "6100": 1628.0, "6105": 1609.0, "6110": 1611.0, "6115": 1618.0, "6120": 1624.0, "6125": 1724.0, "6130": 1556.0, "6135": 1623.0, "6140": 1608.0, "6145": 1632.0, "6150": 1741.0, "6155": 1605.0, "6160": 1639.0, "6165": 1618.0, "6170": 1655.0, "6175": 1687.0, "6180": 1592.0, "6185": 1635.0, "6190": 1638.0, "6195": 1644.0, "6200": 1657.0, "6205": 1628.0, "6210": 1556.0, "6215": 1581.0, "6220": 1608.0, "6225": 1635.0, "6230": 1620.0, "6235": 1700.0, "6240": 1655.0, "6245": 1608.0, "6250": 1716.0, "6255": 1552.0, "6260": 1650.0, "6265": 1539.0, "6270": 1675.0, "6275": 1613.0, "6280": 1640.0, "6285": 1659.0, "6290": 1578.0, "6295": 1586.0, "6300": "nan", "6305": 1566.0, "6310": 1645.0, "6315": 1508.0, "6320": 1758.0, "6325": 1673.0, "6330": 1594.0, "6335": 1622.0, "6340": 1696.0, "6345": 1604.0, "6350": 1608.0, "6355": 1698.0, "6360": 1619.0, "6365": 1627.0, "6370": 1705.0, "6375": 1660.0, "6380": 1653.0, "6385": 1667.0, "6390": 1675.0, "6395": 1779.0, "6400": 1595.0, "6405": 1702.0, "6410": 1726.0, "6415": 1647.0, "6420": 1634.0, "6425": 1725.0, "6430": 1566.0, "6435": 1614.0, "6440": 1569.0, "6445": 1637.0, "6450": 1642.0, "6455": 1655.0, "6460": 1654.0, "6465": 1695.0, "6470": 1697.0, "6475": 1661.0, "6480": 1676.0, "6485": 1616.0, "6490": 1716.0, "6495": 1542.0, "6500": 1734.0, "6505": 1622.0, "6510": 1635.0, "6515": 1568.0, "6520": 1632.0, "6525": 1599.0, "6530": 1627.0, "6535": 1562.0, "6540": 1651.0, "6545": 1593.0, "6550": 1723.0, "6555": 1683.0, "6560": 1847.0, "6565": 1631.0, "6570": 1713.0, "6575": 1610.0, "6580": 1653.0, "6585": 1668.0, "6590": 1644.0, "6595": 1694.0, "6600": 1613.0, "6605": 1553.0, "6610": 1701.0, "6615": 1516.0, "6620": 1607.0, "6625": 1646.0, "6630": 1591.0, "6635": 1694.0, "6640": 1631.0, "6645": 1703.0, "6650": 1720.0, "6655": 1625.0, "6660": 1650.0, "6665": 1674.0, "6670": 1670.0, "6675": 1646.0, "6680": 1679.0, "6685": 1638.0, "6690": 1667.0, "6695": 1672.0, "6700": 1703.0, "6705": 1656.0, "6710": 1715.0, "6715": 1688.0, "6720": 1695.0, "6725": 1725.0, "6730": 1571.0, "6735": 1720.0, "6740": 1542.0, "6745": 1709.0, "6750": 1688.0, "6755": 1628.0, "6760": 1621.0, "6765": 1672.0, "6770": 1600.0, "6775": 1521.0, "6780": 1583.0, "6785": 1561.0, "6790": 1634.0, "6795": 1615.0, "6800": 1562.0, "6805": 1645.0, "6810": 1663.0, "6815": 1688.0, "6820": 1620.0, "6825": 1655.0, "6830": 1633.0, "6835": 1599.0, "6840": 1558.0, "6845": 1667.0, "6850": 1633.0, "6855": 1643.0, "6860": 1741.0, "6865": 1682.0, "6870": 1787.0, "6875": 1583.0, "6880": 1721.0, "6885": 1672.0, "6890": 1693.0, "6895": 1694.0, "6900": 1671.0, "6905": 1661.0, "6910": 1664.0, "6915": 1587.0, "6920": 1665.0, "6925": 1724.0, "6930": 1736.0, "6935": 1644.0, "6940": 1661.0, "6945": 1685.0, "6950": 1686.0, "6955": 1624.0, "6960": 1667.0, "6965": 1675.0, "6970": 1550.0, "6975": 1748.0, "6980": 1663.0, "6985": 1634.0, "6990": 1623.0, "6995": 1662.0, "7000": 1699.0, "7005": 1692.0, "7010": 1654.0, "7015": 1705.0, "7020": 1659.0, "7025": 1630.0, "7030": 1714.0, "7035": 1596.0, "7040": 1637.0, "7045": 1682.0, "7050": 1631.0, "7055": 1668.0, "7060": 1689.0, "7065": 1640.0, "7070": 1729.0, "7075": 1752.0, "7080": 1720.0, "7085": 1719.0, "7090": 1645.0, "7095": 1588.0, "7100": 1549.0, "7105": 1650.0, "7110": 1717.0, "7115": 1688.0, "7120": 1619.0, "7125": 1570.0, "7130": 1706.0, "7135": 1712.0, "7140": 1690.0, "7145": 1640.0, "7150": 1712.0, "7155": 1619.0, "7160": 1654.0, "7165": 1643.0, "7170": 1545.0, "7175": "nan", "7180": 1637.0, "7185": 1773.0, "7190": 1704.0, "7195": 2029.0, "7200": 1826.0, "7205": 1829.0, "7210": 1612.0, "7215": 1811.0, "7220": 1722.0, "7225": 1824.0, "7230": 1807.0, "7235": 1732.0, "7240": 1695.0, "7245": 1824.0, "7250": 1829.0, "7255": 1803.0, "7260": 1769.0, "7265": 1788.0, "7270": 1844.0, "7275": 1644.0, "7280": 1774.0, "7285": 1668.0, "7290": 1954.0, "7295": 1794.0, "7300": 1764.0, "7305": 1851.0, "7310": 1624.0, "7315": 1773.0, "7320": 1792.0, "7325": 1764.0, "7330": 1681.0, "7335": 1710.0, "7340": 1834.0, "7345": 1847.0, "7350": 1818.0, "7355": 1736.0, "7360": 1767.0, "7365": 1829.0, "7370": 1757.0, "7375": 1808.0, "7380": 1950.0, "7385": 1718.0, "7390": 1854.0, "7395": 1933.0, "7400": 1798.0, "7405": 1744.0, "7410": 1758.0, "7415": 1723.0, "7420": 1792.0, "7425": 1894.0, "7430": 1697.0, "7435": 1816.0, "7440": 1864.0, "7445": 1743.0, "7450": 1783.0, "7455": 1934.0, "7460": 1743.0, "7465": 1665.0, "7470": 1809.0, "7475": 1686.0, "7480": 1728.0, "7485": 1787.0, "7490": 1705.0, "7495": 1665.0, "7500": 1803.0, "7505": 1816.0, "7510": 1887.0, "7515": 1697.0, "7520": 1707.0, "7525": 1825.0, "7530": 1772.0, "7535": 1779.0, "7540": 1819.0, "7545": 1716.0, "7550": 1771.0, "7555": 1792.0, "7560": 1658.0, "7565": 1795.0, "7570": 1648.0, "7575": 1801.0, "7580": 1763.0, "7585": 1858.0, "7590": 1846.0, "7595": 1779.0, "7600": 1766.0, "7605": 1496.0, "7610": 1759.0, "7615": 1821.0, "7620": 1801.0, "7625": 1737.0, "7630": 1838.0, "7635": 1746.0, "7640": 1767.0, "7645": 2283.0, "7650": 1754.0, "7655": 1746.0, "7660": 1815.0, "7665": 1825.0, "7670": 1738.0, "7675": 1719.0, "7680": 1806.0, "7685": 1853.0, "7690": 1779.0, "7695": 1733.0, "7700": 1818.0, "7705": 1804.0, "7710": 1860.0, "7715": 1859.0, "7720": 1897.0, "7725": 1764.0, "7730": 1894.0, "7735": 1809.0, "7740": 1859.0, "7745": 1808.0, "7750": 1794.0, "7755": 1873.0, "7760": 1871.0, "7765": 1879.0, "7770": 1697.0, "7775": 1832.0, "7780": 1742.0, "7785": 1779.0, "7790": 1760.0, "7795": 1836.0, "7800": 1941.0, "7805": 1898.0, "7810": 1637.0, "7815": 1783.0, "7820": 1815.0, "7825": 1755.0, "7830": 1733.0, "7835": 1782.0, "7840": 1835.0, "7845": 1767.0, "7850": 1754.0, "7855": 1658.0, "7860": 1868.0, "7865": 1741.0, "7870": 1862.0, "7875": 1747.0, "7880": 1903.0, "7885": 2026.0, "7890": 1873.0, "7895": 1893.0, "7900": 1732.0, "7905": 2023.0, "7910": 1803.0, "7915": 1759.0, "7920": 1772.0, "7925": 1662.0, "7930": 1877.0, "7935": 1771.0, "7940": 1831.0, "7945": 1951.0, "7950": 1821.0, "7955": 1864.0, "7960": 1772.0, "7965": 1767.0, "7970": 1723.0, "7975": 1794.0, "7980": 1820.0, "7985": 1910.0, "7990": 1796.0, "7995": 1705.0, "8000": 1943.0, "8005": 1977.0, "8010": 1732.0, "8015": 1706.0, "8020": 1796.0, "8025": 1981.0, "8030": 1696.0, "8035": 1795.0, "8040": 1850.0, "8045": 1901.0, "8050": 1750.0, "8055": 1933.0, "8060": 1776.0, "8065": 1547.0, "8070": 1710.0, "8075": 1873.0, "8080": 1874.0, "8085": 1922.0, "8090": 1797.0, "8095": 1838.0, "8100": 1813.0, "8105": 1757.0, "8110": 1750.0, "8115": 1900.0, "8120": 1795.0, "8125": 1663.0, "8130": 1871.0, "8135": 1764.0, "8140": 1801.0, "8145": 1975.0, "8150": 1815.0, "8155": 1834.0, "8160": 1684.0, "8165": 1934.0, "8170": 1868.0, "8175": 1844.0, "8180": 1734.0, "8185": 1622.0, "8190": 1702.0, "8195": 1668.0, "8200": 1713.0, "8205": 1715.0, "8210": 1694.0, "8215": 1669.0, "8220": 1757.0, "8225": 1655.0, "8230": 1838.0, "8235": 1716.0, "8240": 1594.0, "8245": 1633.0, "8250": 1707.0, "8255": 1614.0, "8260": 1791.0, "8265": 1680.0, "8270": 1734.0, "8275": 1711.0, "8280": 1642.0, "8285": 1641.0, "8290": 1733.0, "8295": 1686.0, "8300": 1673.0, "8305": 1586.0, "8310": 1742.0, "8315": 1533.0, "8320": 1679.0, "8325": 1642.0, "8330": 1647.0, "8335": 1644.0, "8340": 1738.0, "8345": 1660.0, "8350": 1700.0, "8355": 1665.0, "8360": 1598.0, "8365": 1648.0, "8370": 1747.0, "8375": 1620.0, "8380": 1677.0, "8385": 1654.0, "8390": 1742.0, "8395": 1704.0, "8400": 1710.0, "8405": 1593.0, "8410": 1627.0, "8415": 1715.0, "8420": 1676.0, "8425": 1658.0, "8430": 1694.0, "8435": 1641.0, "8440": 1685.0, "8445": 1641.0, "8450": 1718.0, "8455": 1697.0, "8460": 1642.0, "8465": 1660.0, "8470": 1613.0, "8475": 1779.0, "8480": 1671.0, "8485": 1846.0, "8490": 1774.0, "8495": 1619.0, "8500": 1740.0, "8505": 1662.0, "8510": 1652.0, "8515": 1735.0, "8520": 1719.0, "8525": 1636.0, "8530": 1726.0, "8535": 1683.0, "8540": 1663.0, "8545": 1694.0, "8550": 1943.0, "8555": 1692.0, "8560": 1735.0, "8565": 1788.0, "8570": 1697.0, "8575": 1841.0, "8580": 1682.0, "8585": 1765.0, "8590": 1798.0, "8595": 1695.0, "8600": 1806.0, "8605": 1682.0, "8610": 1643.0, "8615": 1680.0, "8620": 1736.0, "8625": 1711.0, "8630": 1728.0, "8635": 1714.0, "8640": 1702.0, "8645": 1613.0, "8650": 1676.0, "8655": 1671.0, "8660": 1599.0, "8665": 1662.0, "8670": 1684.0, "8675": 1752.0, "8680": 1806.0, "8685": 1682.0, "8690": 1732.0, "8695": 1668.0, "8700": 1696.0, "8705": 1745.0, "8710": 1741.0, "8715": 1748.0, "8720": 1606.0, "8725": 1641.0, "8730": 1652.0, "8735": 1732.0, "8740": 1690.0, "8745": 1727.0, "8750": 1686.0, "8755": 1666.0, "8760": 1728.0, "8765": 1798.0, "8770": 1661.0, "8775": 1706.0, "8780": 1699.0, "8785": 1661.0, "8790": 1702.0, "8795": 1636.0, "8800": 1694.0, "8805": 1786.0, "8810": 1792.0, "8815": 1817.0, "8820": 1627.0, "8825": 1699.0, "8830": 1591.0, "8835": 1626.0, "8840": 1826.0, "8845": 1817.0, "8850": 1690.0, "8855": 1559.0, "8860": 1776.0, "8865": 1631.0, "8870": 1687.0, "8875": 1650.0, "8880": 1606.0, "8885": 1711.0, "8890": 1591.0, "8895": 1676.0, "8900": 1644.0, "8905": 1694.0, "8910": 1711.0, "8915": 1612.0, "8920": 1585.0, "8925": 1743.0, "8930": 1731.0, "8935": 1703.0, "8940": 1717.0, "8945": 1872.0, "8950": 1682.0, "8955": 1656.0, "8960": 1524.0, "8965": 1636.0, "8970": 1665.0, "8975": 1715.0, "8980": 1731.0, "8985": 1721.0, "8990": 1645.0, "8995": 1678.0, "9000": 1699.0, "9005": 1724.0, "9010": 1701.0, "9015": 1678.0, "9020": 1660.0, "9025": 1720.0, "9030": 1732.0, "9035": 1777.0, "9040": 1727.0, "9045": 1749.0, "9050": 1652.0, "9055": 1591.0, "9060": 1692.0, "9065": 1653.0, "9070": 1681.0, "9075": 1620.0, "9080": 1652.0, "9085": 1659.0, "9090": 1740.0, "9095": 1691.0, "9100": 1678.0, "9105": 1643.0, "9110": 1683.0, "9115": 1763.0, "9120": 1722.0, "9125": 1743.0, "9130": 1737.0, "9135": 1762.0, "9140": 1694.0, "9145": 1667.0, "9150": 1716.0, "9155": 1729.0, "9160": 1606.0, "9165": 2067.0, "9170": 1611.0, "9175": 1687.0, "9180": 1545.0, "9185": 1661.0, "9190": 1645.0, "9195": 1563.0, "9200": 1629.0, "9205": 1765.0, "9210": 1655.0, "9215": 1786.0, "9220": 1710.0, "9225": 1750.0, "9230": 1684.0, "9235": 1594.0, "9240": 1774.0, "9245": 1738.0, "9250": 1743.0, "9255": 1809.0, "9260": 1703.0, "9265": 1782.0, "9270": 1656.0, "9275": 1723.0, "9280": 1662.0, "9285": 1793.0, "9290": 1661.0, "9295": 1797.0, "9300": 1719.0, "9305": 1732.0, "9310": 1793.0, "9315": 1717.0, "9320": 1652.0, "9325": 1732.0, "9330": 1834.0, "9335": 1684.0, "9340": 1538.0, "9345": 1740.0, "9350": 1642.0, "9355": 1619.0, "9360": 1722.0, "9365": 1699.0, "9370": 1674.0, "9375": 1649.0, "9380": 1762.0, "9385": 1599.0, "9390": 1636.0, "9395": 1650.0, "9400": 1712.0, "9405": 1680.0, "9410": 1715.0, "9415": 1743.0, "9420": 1734.0, "9425": 1685.0, "9430": 1655.0, "9435": 1726.0, "9440": 1675.0, "9445": 1786.0, "9450": 1714.0, "9455": 1635.0, "9460": 1707.0, "9465": 1772.0, "9470": 1725.0, "9475": 1730.0, "9480": 1882.0, "9485": 1772.0, "9490": 1762.0, "9495": 1679.0, "9500": 1701.0, "9505": 1787.0, "9510": 1725.0, "9515": 1626.0, "9520": 1668.0, "9525": 1699.0, "9530": 1728.0, "9535": 1676.0, "9540": 1780.0, "9545": 1624.0, "9550": 1557.0, "9555": 1651.0, "9560": 1772.0, "9565": 1626.0, "9570": 1842.0, "9575": 1790.0, "9580": 1693.0, "9585": 1736.0, "9590": 1702.0, "9595": 1642.0, "9600": 1551.0, "9605": 1784.0, "9610": 1792.0, "9615": 1624.0, "9620": 1741.0, "9625": 1659.0, "9630": 1792.0, "9635": 1700.0, "9640": 1704.0, "9645": 1701.0, "9650": 1733.0, "9655": 1757.0, "9660": 1682.0, "9665": 1686.0, "9670": 1803.0, "9675": 1706.0, "9680": 1712.0, "9685": 1607.0, "9690": 1714.0, "9695": 1620.0, "9700": 1715.0, "9705": 1804.0, "9710": 1694.0, "9715": 1634.0, "9720": 1636.0, "9725": 1602.0, "9730": 1644.0, "9735": 1605.0, "9740": 1696.0, "9745": 1625.0, "9750": 1735.0, "9755": 1701.0, "9760": 1723.0, "9765": 1710.0, "9770": 1664.0, "9775": 1774.0, "9780": 1785.0, "9785": 1671.0, "9790": 1723.0, "9795": 1689.0, "9800": 1709.0, "9805": 1631.0, "9810": 1726.0, "9815": 1701.0, "9820": 1630.0, "9825": 1697.0, "9830": 1623.0, "9835": 1735.0, "9840": 1523.0, "9845": 1766.0, "9850": 1712.0, "9855": 1645.0, "9860": 1657.0, "9865": 1681.0, "9870": 1627.0, "9875": 1725.0, "9880": 1559.0, "9885": 1863.0, "9890": 1671.0, "9895": 1775.0, "9900": 1723.0, "9905": 1794.0, "9910": 1690.0, "9915": 1715.0, "9920": 1687.0, "9925": 1676.0, "9930": 1671.0, "9935": 1780.0, "9940": 1824.0, "9945": 1823.0, "9950": 1674.0, "9955": 1700.0, "9960": 1717.0, "9965": 1664.0, "9970": 1700.0, "9975": 1536.0, "9980": 1630.0, "9985": 1710.0, "9990": 1705.0, "9995": 1744.0, "10000": 1677.0, "10005": 1688.0, "10010": 1642.0, "10015": 1618.0, "10020": 1624.0, "10025": 1674.0, "10030": 1716.0, "10035": 1733.0, "10040": 1659.0, "10045": 1717.0, "10050": 1708.0, "10055": 1713.0, "10060": 1634.0, "10065": 1738.0, "10070": 1574.0, "10075": 1650.0, "10080": 1685.0, "10085": 1638.0, "10090": 1695.0, "10095": 1644.0, "10100": 1713.0, "10105": 1671.0, "10110": 1629.0, "10115": 1682.0, "10120": 1709.0, "10125": 1719.0, "10130": 1749.0, "10135": 1716.0, "10140": 1628.0, "10145": 1691.0, "10150": 1683.0, "10155": 1713.0, "10160": 1658.0, "10165": 1698.0, "10170": 1720.0, "10175": 1682.0, "10180": 1710.0, "10185": 1739.0, "10190": 1695.0, "10195": 1695.0, "10200": 1719.0, "10205": 1707.0, "10210": 2053.0, "10215": 1608.0, "10220": 1608.0, "10225": 1576.0, "10230": 1603.0, "10235": 1645.0, "10240": 1581.0, "10245": 1697.0, "10250": 2210.0, "10255": 1553.0, "10260": 1591.0, "10265": 1604.0, "10270": 1597.0, "10275": 1611.0, "10280": 1567.0, "10285": 1608.0, "10290": 1607.0, "10295": 1547.0, "10300": 1650.0, "10305": 1729.0, "10310": 1620.0, "10315": 1587.0, "10320": 1572.0, "10325": 1643.0, "10330": 1612.0, "10335": 1585.0, "10340": 1600.0, "10345": 1649.0, "10350": 1619.0, "10355": 1592.0, "10360": 1630.0, "10365": 1587.0, "10370": 1611.0, "10375": 1626.0, "10380": 1591.0, "10385": 1581.0, "10390": 1593.0, "10395": 1550.0, "10400": 1506.0, "10405": 1711.0, "10410": 1633.0, "10415": 1585.0, "10420": 1611.0, "10425": 1640.0, "10430": 1625.0, "10435": 1599.0, "10440": 1604.0, "10445": 1555.0, "10450": 1601.0, "10455": 1673.0, "10460": 1615.0, "10465": 1604.0, "10470": 1569.0, "10475": 1639.0, "10480": 1540.0, "10485": 1668.0, "10490": 1685.0, "10495": 1591.0, "10500": 1627.0, "10505": 1659.0, "10510": 1590.0, "10515": 1661.0, "10520": 1637.0, "10525": 1554.0, "10530": 1662.0, "10535": 1572.0, "10540": 1676.0, "10545": 1581.0, "10550": 1711.0, "10555": 1658.0, "10560": 1586.0, "10565": 1639.0, "10570": 1620.0, "10575": 1567.0, "10580": 1632.0, "10585": 1573.0, "10590": 1561.0, "10595": 1595.0, "10600": 1524.0, "10605": 1629.0, "10610": 1659.0, "10615": 1665.0, "10620": 1625.0, "10625": 1602.0, "10630": 1591.0, "10635": 1649.0, "10640": 1615.0, "10645": 1662.0, "10650": 1560.0, "10655": 1575.0, "10660": 1602.0, "10665": 1642.0, "10670": 1668.0, "10675": 1615.0, "10680": 1555.0, "10685": 1636.0, "10690": 1543.0, "10695": 1662.0, "10700": 1648.0, "10705": 1625.0, "10710": 1669.0, "10715": 1668.0, "10720": 1647.0, "10725": 1689.0, "10730": 1614.0, "10735": 1635.0, "10740": 1602.0, "10745": 1678.0, "10750": 1693.0, "10755": 1643.0, "10760": 1545.0, "10765": 1783.0, "10770": 1669.0, "10775": 1744.0, "10780": 1684.0, "10785": 1648.0, "10790": 1691.0, "10795": 1653.0, "10800": 1748.0, "10805": 1671.0, "10810": 1615.0, "10815": 1713.0, "10820": 1711.0, "10825": 1715.0, "10830": 1703.0, "10835": 1669.0, "10840": 1663.0, "10845": 1652.0, "10850": 1715.0, "10855": 1660.0, "10860": 1622.0, "10865": 1697.0, "10870": 1696.0, "10875": 1727.0, "10880": 1682.0, "10885": 1637.0, "10890": 1701.0, "10895": 1704.0, "10900": 1713.0, "10905": 1707.0, "10910": 1668.0, "10915": 1724.0, "10920": 1678.0, "10925": 1689.0, "10930": 1656.0, "10935": 1747.0, "10940": 1595.0, "10945": 1722.0, "10950": 1588.0, "10955": 1682.0, "10960": 1642.0, "10965": 1674.0, "10970": 1651.0, "10975": 1661.0, "10980": 1679.0, "10985": 1659.0, "10990": 1738.0, "10995": 1718.0, "11000": 1733.0, "11005": 1619.0, "11010": 1682.0, "11015": 1710.0, "11020": 1673.0, "11025": 1689.0, "11030": 1760.0, "11035": 1622.0, "11040": 1604.0, "11045": 1858.0, "11050": 1705.0, "11055": 1719.0, "11060": 1739.0, "11065": 1642.0, "11070": 1648.0, "11075": 1732.0, "11080": 1732.0, "11085": 1668.0, "11090": 1769.0, "11095": 1815.0, "11100": 1718.0, "11105": 1616.0, "11110": 1652.0, "11115": 1690.0, "11120": 1683.0, "11125": 1655.0, "11130": 1779.0, "11135": 1619.0, "11140": 1790.0, "11145": 1754.0, "11150": 1679.0, "11155": 1623.0, "11160": 1616.0, "11165": 1761.0, "11170": 1708.0, "11175": 1695.0, "11180": 1599.0, "11185": 1677.0, "11190": 1503.0, "11195": 1644.0, "11200": 1684.0, "11205": 1684.0, "11210": 1740.0, "11215": 1758.0, "11220": 1662.0, "11225": 1631.0, "11230": 2238.0, "11235": 1669.0, "11240": 1625.0, "11245": 1695.0, "11250": 1613.0, "11255": 1748.0, "11260": 1664.0, "11265": 1663.0, "11270": 1640.0, "11275": 1730.0, "11280": 1662.0, "11285": 1694.0, "11290": 1666.0, "11295": 1762.0, "11300": 1723.0, "11305": 1753.0, "11310": 1644.0, "11315": 1761.0, "11320": 1688.0, "11325": 1740.0, "11330": 1776.0, "11335": 1663.0, "11340": 1690.0, "11345": 1592.0, "11350": 1641.0, "11355": 1615.0, "11360": 1617.0, "11365": 1774.0, "11370": 1601.0, "11375": 1605.0, "11380": 1698.0, "11385": 1680.0, "11390": 1676.0, "11395": 1619.0, "11400": 1680.0, "11405": 1613.0, "11410": 1739.0, "11415": 1628.0, "11420": 1640.0, "11425": 1751.0, "11430": 1709.0, "11435": 1675.0, "11440": 1661.0, "11445": 1671.0, "11450": 1632.0, "11455": 1619.0, "11460": 1690.0, "11465": 1623.0, "11470": 1753.0, "11475": 1726.0, "11480": 1732.0, "11485": 1652.0, "11490": 1643.0, "11495": 1721.0, "11500": 1706.0, "11505": 1704.0, "11510": 1692.0, "11515": 1770.0, "11520": 1707.0, "11525": 1634.0, "11530": 1706.0, "11535": 1751.0, "11540": 1773.0, "11545": 1679.0, "11550": 1727.0, "11555": 1767.0, "11560": 1729.0, "11565": 1663.0, "11570": 1711.0, "11575": 1812.0, "11580": 1716.0, "11585": 1703.0, "11590": 1660.0, "11595": 1630.0, "11600": 1670.0, "11605": 1769.0, "11610": 1761.0, "11615": 1667.0, "11620": 1722.0, "11625": 1781.0, "11630": 1803.0, "11635": 1688.0, "11640": 1601.0, "11645": 1727.0, "11650": 1708.0, "11655": 1673.0, "11660": 1675.0, "11665": 1691.0, "11670": 1831.0, "11675": 1570.0, "11680": 1643.0, "11685": 1672.0, "11690": 1807.0, "11695": 1690.0, "11700": 1707.0, "11705": 1693.0, "11710": 1681.0, "11715": 1667.0, "11720": 1599.0, "11725": 1652.0, "11730": 1525.0, "11735": 1644.0, "11740": 1753.0, "11745": 1638.0, "11750": 1653.0, "11755": 1654.0, "11760": 1610.0, "11765": 1652.0, "11770": 1635.0, "11775": 1611.0, "11780": 1547.0, "11785": 1632.0, "11790": 1701.0, "11795": 1692.0, "11800": 1627.0, "11805": 1667.0, "11810": 1616.0, "11815": 1620.0, "11820": 1641.0, "11825": 1626.0, "11830": 1638.0, "11835": 1666.0, "11840": 1681.0, "11845": 1600.0, "11850": 1591.0, "11855": 1634.0, "11860": 1645.0, "11865": 1615.0, "11870": 1521.0, "11875": 1696.0, "11880": 1612.0, "11885": 1569.0, "11890": 1634.0, "11895": 1647.0, "11900": 1586.0, "11905": 1637.0, "11910": 1721.0, "11915": 1622.0, "11920": 1640.0, "11925": 1666.0, "11930": 1655.0, "11935": 1566.0, "11940": 1661.0, "11945": 1545.0, "11950": 1614.0, "11955": 1607.0, "11960": 1611.0, "11965": 1619.0, "11970": 1589.0, "11975": 1598.0, "11980": 1724.0, "11985": 1644.0, "11990": 1743.0, "11995": 1689.0, "12000": 1692.0, "12005": 1701.0, "12010": 1703.0, "12015": 1735.0, "12020": 1776.0, "12025": 1757.0, "12030": 1592.0, "12035": 1692.0, "12040": 1818.0, "12045": 1730.0, "12050": 1602.0, "12055": 1650.0, "12060": 1737.0, "12065": 1594.0, "12070": 1775.0, "12075": 1773.0, "12080": 1768.0, "12085": 1787.0, "12090": 1806.0, "12095": 1741.0, "12100": 1788.0, "12105": 1788.0, "12110": 1681.0, "12115": 1808.0, "12120": 1910.0, "12125": 1735.0, "12130": 1722.0, "12135": 1818.0, "12140": 1699.0, "12145": 1717.0, "12150": 1657.0, "12155": 1675.0, "12160": 1719.0, "12165": 1649.0, "12170": 1654.0, "12175": 1728.0, "12180": 1802.0, "12185": 1669.0, "12190": 1627.0, "12195": 1694.0, "12200": 1682.0, "12205": 1686.0, "12210": 1799.0, "12215": 1694.0, "12220": 1651.0, "12225": 1721.0, "12230": 1677.0, "12235": 1626.0, "12240": 1663.0, "12245": 1779.0, "12250": 1732.0, "12255": 1646.0, "12260": 1661.0, "12265": 1737.0, "12270": 1737.0, "12275": 1780.0, "12280": 1656.0, "12285": 1673.0, "12290": 1741.0, "12295": 1645.0, "12300": 1613.0, "12305": 1739.0, "12310": 1696.0, "12315": 1699.0, "12320": 1712.0, "12325": 1744.0, "12330": 1680.0, "12335": 1708.0, "12340": 1679.0, "12345": 1778.0, "12350": 1740.0, "12355": 1839.0, "12360": 1636.0, "12365": 1707.0, "12370": 1720.0, "12375": 1729.0, "12380": 1708.0, "12385": 1732.0, "12390": 1701.0, "12395": 1690.0, "12400": 1821.0, "12405": 1751.0, "12410": 1727.0, "12415": 1779.0, "12420": 1740.0, "12425": 1639.0, "12430": 1713.0, "12435": 1740.0, "12440": 1642.0, "12445": 1760.0, "12450": 1905.0, "12455": 1645.0, "12460": 1783.0, "12465": 1747.0, "12470": 1723.0, "12475": 1707.0, "12480": 1668.0, "12485": 1754.0, "12490": 1672.0, "12495": 1692.0, "12500": 1749.0, "12505": 1642.0, "12510": 1651.0, "12515": 1710.0, "12520": 1705.0, "12525": 1693.0, "12530": 1818.0, "12535": 1637.0, "12540": 1746.0, "12545": 1663.0, "12550": 1772.0, "12555": 1747.0, "12560": 1715.0, "12565": 1749.0, "12570": 1727.0, "12575": 1553.0, "12580": 1720.0, "12585": 1630.0, "12590": 1652.0, "12595": 1819.0, "12600": 1705.0, "12605": 1780.0, "12610": 1797.0, "12615": 1689.0, "12620": 1707.0, "12625": 1708.0, "12630": 1693.0, "12635": 1829.0, "12640": 1845.0, "12645": 1708.0, "12650": 1668.0, "12655": 1784.0, "12660": 1700.0, "12665": 1562.0, "12670": 1737.0, "12675": 1675.0, "12680": 1777.0, "12685": 1793.0, "12690": 1736.0, "12695": 1738.0, "12700": 1713.0, "12705": 1631.0, "12710": 1686.0, "12715": 1662.0, "12720": 1668.0, "12725": 1744.0, "12730": 1665.0, "12735": 1735.0, "12740": 1872.0, "12745": 1664.0, "12750": 1751.0, "12755": 1725.0, "12760": 1781.0, "12765": 1739.0, "12770": 1639.0, "12775": 1619.0, "12780": 1734.0, "12785": 1745.0, "12790": 1673.0, "12795": 1659.0, "12800": 1748.0, "12805": 1746.0, "12810": 1734.0, "12815": 1717.0, "12820": 1626.0, "12825": 1623.0, "12830": 1677.0, "12835": 1700.0, "12840": 1704.0, "12845": 1667.0, "12850": 1705.0, "12855": 1664.0, "12860": 1687.0, "12865": 1732.0, "12870": 1723.0, "12875": 1713.0, "12880": 1733.0, "12885": 1712.0, "12890": 1644.0, "12895": 1693.0, "12900": 1718.0, "12905": 1749.0, "12910": 1757.0, "12915": 1720.0, "12920": 1772.0, "12925": 1753.0, "12930": 1643.0, "12935": 1819.0, "12940": 1734.0, "12945": 1736.0, "12950": 2288.0, "12955": 1646.0, "12960": 1759.0, "12965": 1782.0, "12970": 1624.0, "12975": 1757.0, "12980": 1772.0, "12985": 1742.0, "12990": 1644.0, "12995": 1622.0, "13000": 1664.0, "13005": 1636.0, "13010": 1649.0, "13015": 1657.0, "13020": 1617.0, "13025": 1636.0, "13030": 1657.0, "13035": 1640.0, "13040": 1615.0, "13045": 2056.0, "13050": 1596.0, "13055": 1677.0, "13060": 1588.0, "13065": 1598.0, "13070": 1602.0, "13075": 1593.0, "13080": 1657.0, "13085": 1697.0, "13090": 1594.0, "13095": 1740.0, "13100": 1638.0, "13105": 1601.0, "13110": 1611.0, "13115": 1603.0, "13120": 1556.0, "13125": 1640.0, "13130": 1568.0, "13135": 1695.0, "13140": 1676.0, "13145": 1653.0, "13150": 1619.0, "13155": 1639.0, "13160": 1643.0, "13165": 1626.0, "13170": "nan", "13175": 1686.0, "13180": 1627.0, "13185": 1671.0, "13190": 1669.0, "13195": 1609.0, "13200": 1764.0, "13205": 1518.0, "13210": 1636.0, "13215": 1727.0, "13220": 1592.0, "13225": 1652.0, "13230": 1651.0, "13235": 1584.0, "13240": 1642.0, "13245": 1674.0, "13250": 1709.0, "13255": 1624.0, "13260": 1670.0, "13265": 1598.0, "13270": 1599.0, "13275": 1654.0, "13280": 1735.0, "13285": 1631.0, "13290": 1616.0, "13295": 1742.0, "13300": 1683.0, "13305": 1751.0, "13310": 1628.0, "13315": 2158.0, "13320": 1679.0, "13325": 1623.0, "13330": 1538.0, "13335": 1683.0, "13340": 1711.0, "13345": 1627.0, "13350": 1640.0, "13355": 1703.0, "13360": 1675.0, "13365": 1638.0, "13370": 1593.0, "13375": 1554.0, "13380": 1579.0, "13385": 1747.0, "13390": 1659.0, "13395": 1655.0, "13400": 1658.0, "13405": 1607.0, "13410": 1619.0, "13415": 1640.0, "13420": 1619.0, "13425": 1604.0, "13430": 1688.0, "13435": 1652.0, "13440": 1729.0, "13445": 1643.0, "13450": 1629.0, "13455": 1603.0, "13460": 1602.0, "13465": 1623.0, "13470": 1662.0, "13475": 1658.0, "13480": 1654.0, "13485": 1647.0, "13490": 1654.0, "13495": 1679.0, "13500": 1637.0, "13505": 1613.0, "13510": 1684.0, "13515": 1604.0, "13520": 1578.0, "13525": 1615.0, "13530": 1638.0, "13535": 1675.0, "13540": 1622.0, "13545": 1644.0, "13550": 1589.0, "13555": 1640.0, "13560": 1595.0, "13565": 1570.0, "13570": 1616.0, "13575": 1579.0, "13580": 1603.0, "13585": 1627.0, "13590": 1661.0, "13595": 1636.0, "13600": 1660.0, "13605": 1689.0, "13610": 1589.0, "13615": 1604.0, "13620": 1596.0, "13625": 1588.0, "13630": 1624.0, "13635": 1636.0, "13640": 1607.0, "13645": 1765.0, "13650": 1732.0, "13655": 1616.0, "13660": 1633.0, "13665": 1682.0, "13670": 1647.0, "13675": 1597.0, "13680": 1611.0, "13685": 1593.0, "13690": 1619.0, "13695": 1595.0, "13700": 1740.0, "13705": 1574.0, "13710": 1673.0, "13715": 1589.0, "13720": 1658.0, "13725": 1613.0, "13730": 1674.0, "13735": 1648.0, "13740": 1619.0, "13745": 1626.0, "13750": 1611.0, "13755": 1629.0, "13760": 1603.0, "13765": 1641.0, "13770": 1561.0, "13775": 1696.0, "13780": 1577.0, "13785": 1668.0, "13790": 1563.0, "13795": 1677.0, "13800": 1622.0, "13805": 1621.0, "13810": 1662.0, "13815": 1577.0, "13820": 1624.0, "13825": 1592.0, "13830": 1684.0, "13835": 1649.0, "13840": 1588.0, "13845": 1628.0, "13850": 1585.0, "13855": 1648.0, "13860": 1693.0, "13865": 1634.0, "13870": 1655.0, "13875": 2121.0, "13880": 1768.0, "13885": 1622.0, "13890": 1727.0, "13895": 1642.0, "13900": 1600.0, "13905": 1625.0, "13910": 1557.0, "13915": 1565.0, "13920": 1655.0, "13925": 1615.0, "13930": 1664.0, "13935": 1551.0, "13940": 1634.0, "13945": 1685.0, "13950": 1626.0, "13955": 1632.0, "13960": 1725.0, "13965": 1581.0, "13970": 1690.0, "13975": 1638.0, "13980": 1569.0, "13985": 1632.0, "13990": 1675.0, "13995": 1721.0, "14000": 1656.0, "14005": 1611.0, "14010": 1595.0, "14015": 1644.0, "14020": 1601.0, "14025": 1647.0, "14030": 1741.0, "14035": 1698.0, "14040": 1550.0, "14045": 1654.0, "14050": 1681.0, "14055": 1687.0, "14060": 1680.0, "14065": 1666.0, "14070": 1559.0, "14075": 1642.0, "14080": 1706.0, "14085": 1659.0, "14090": 1577.0, "14095": 1667.0, "14100": 1693.0, "14105": 1681.0, "14110": 1549.0, "14115": 1612.0, "14120": 1616.0, "14125": 1757.0, "14130": 1683.0, "14135": 1688.0, "14140": 1560.0, "14145": 1523.0, "14150": 1611.0, "14155": 1625.0, "14160": 1700.0, "14165": 1658.0, "14170": 1638.0, "14175": 1590.0, "14180": 1569.0, "14185": 1645.0, "14190": 1589.0, "14195": 1614.0, "14200": 1546.0, "14205": 1629.0, "14210": 1592.0, "14215": 1643.0, "14220": 1638.0, "14225": 1670.0, "14230": 1615.0, "14235": 1722.0, "14240": 1625.0, "14245": 1531.0, "14250": 1672.0, "14255": 1617.0, "14260": 1640.0, "14265": 1638.0, "14270": 1647.0, "14275": 1605.0, "14280": 1673.0, "14285": 1743.0, "14290": 1705.0, "14295": 1622.0, "14300": 1594.0, "14305": 1638.0, "14310": 1687.0, "14315": 1660.0, "14320": 1537.0, "14325": 1577.0, "14330": 1752.0, "14335": 1693.0, "14340": 1640.0, "14345": 1700.0, "14350": 1618.0, "14355": 1630.0, "14360": 1677.0, "14365": 1721.0, "14370": 1721.0, "14375": 1724.0, "14380": 1793.0, "14385": 1779.0, "14390": 1631.0, "14395": 1773.0, "14400": 1716.0, "14405": 1792.0, "14410": 1802.0, "14415": 1748.0, "14420": 1786.0, "14425": 1746.0, "14430": 1719.0, "14435": 1711.0, "14440": 1671.0, "14445": 1748.0, "14450": 1671.0, "14455": 1655.0, "14460": 1766.0, "14465": 1744.0, "14470": 1800.0, "14475": 1672.0, "14480": 1788.0, "14485": 1779.0, "14490": 1765.0, "14495": 1595.0, "14500": 1726.0, "14505": 1724.0, "14510": 1711.0, "14515": 1694.0, "14520": 1642.0, "14525": 1629.0, "14530": 1826.0, "14535": 1687.0, "14540": 1781.0, "14545": 1788.0, "14550": 1869.0, "14555": 1769.0, "14560": 1766.0, "14565": 1760.0, "14570": 1698.0, "14575": 1796.0, "14580": 1756.0, "14585": 1722.0, "14590": 1738.0, "14595": 1837.0, "14600": 1786.0, "14605": 1740.0, "14610": 1818.0, "14615": 1756.0, "14620": 1694.0, "14625": 1852.0, "14630": 1759.0, "14635": 1783.0, "14640": 1826.0, "14645": 1783.0, "14650": 1740.0, "14655": 1794.0, "14660": 1681.0, "14665": 1773.0, "14670": 1931.0, "14675": 1899.0, "14680": 1826.0, "14685": 1864.0, "14690": 1568.0, "14695": 1716.0, "14700": 1797.0, "14705": 1730.0, "14710": 1753.0, "14715": 1772.0, "14720": 1724.0, "14725": 1731.0, "14730": 1783.0, "14735": 1947.0, "14740": 1716.0, "14745": 1650.0, "14750": 1807.0, "14755": 1726.0, "14760": 1697.0, "14765": 1885.0, "14770": 1800.0, "14775": 1806.0, "14780": 1781.0, "14785": 1790.0, "14790": 1715.0, "14795": 1757.0, "14800": 1747.0, "14805": 1841.0, "14810": 1755.0, "14815": 1727.0, "14820": 1718.0, "14825": 1725.0, "14830": 1796.0, "14835": 1713.0, "14840": 1707.0, "14845": 1664.0, "14850": 1682.0, "14855": 1767.0, "14860": 1771.0, "14865": 1733.0, "14870": 1708.0, "14875": 1841.0, "14880": 1661.0, "14885": 1873.0, "14890": 1673.0, "14895": 1763.0, "14900": 1718.0, "14905": 1732.0, "14910": 1673.0, "14915": 1590.0, "14920": 1817.0, "14925": 1767.0, "14930": 1701.0, "14935": 1900.0, "14940": 1760.0, "14945": 1624.0, "14950": 1628.0, "14955": 1623.0, "14960": 1733.0, "14965": 1690.0, "14970": 1721.0, "14975": 1606.0, "14980": 1805.0, "14985": 1681.0, "14990": 1790.0, "14995": 1895.0, "15000": 1750.0, "15005": 1844.0, "15010": 1761.0, "15015": 1873.0, "15020": 1736.0, "15025": 1610.0, "15030": 1853.0, "15035": 1792.0, "15040": 1630.0, "15045": 1737.0, "15050": 1701.0, "15055": 1771.0, "15060": 1769.0, "15065": 1729.0, "15070": 1836.0, "15075": 1646.0, "15080": 1738.0, "15085": 1748.0, "15090": 1841.0, "15095": 1810.0, "15100": 1767.0, "15105": 1745.0, "15110": 1831.0, "15115": 1790.0, "15120": 1761.0, "15125": 1871.0, "15130": 1737.0, "15135": 1716.0, "15140": 1926.0, "15145": 1734.0, "15150": 1888.0, "15155": 1794.0, "15160": 1712.0, "15165": 1808.0, "15170": 1763.0, "15175": 1787.0, "15180": 1812.0, "15185": 1751.0, "15190": 1760.0, "15195": 1774.0, "15200": 1653.0, "15205": 1770.0, "15210": 1782.0, "15215": 1801.0, "15220": 1822.0, "15225": 1851.0, "15230": 1717.0, "15235": 1701.0, "15240": 1800.0, "15245": 1760.0, "15250": 1653.0, "15255": 1726.0, "15260": 1789.0, "15265": 1810.0, "15270": 1847.0, "15275": 1718.0, "15280": 1748.0, "15285": 1767.0, "15290": 1772.0, "15295": 1664.0, "15300": 1776.0, "15305": 1788.0, "15310": 1862.0, "15315": 1835.0, "15320": 1819.0, "15325": 1770.0, "15330": 1787.0, "15335": 1774.0, "15340": 1840.0, "15345": 1724.0, "15350": 1735.0, "15355": 1861.0, "15360": 1761.0, "15365": 1719.0, "15370": 1628.0, "15375": 1638.0, "15380": 1655.0, "15385": 1582.0, "15390": 1655.0, "15395": 1675.0, "15400": 1605.0, "15405": 1680.0, "15410": 1837.0, "15415": 1660.0, "15420": 1774.0, "15425": 1705.0, "15430": 1728.0, "15435": 1622.0, "15440": 1631.0, "15445": 1664.0, "15450": 1619.0, "15455": 1645.0, "15460": 1594.0, "15465": 1693.0, "15470": 1606.0, "15475": 1640.0, "15480": 1681.0, "15485": 1694.0, "15490": 1678.0, "15495": 1704.0, "15500": 1702.0, "15505": 1692.0, "15510": 1599.0, "15515": 1725.0, "15520": 1666.0, "15525": 1652.0, "15530": 1653.0, "15535": 1607.0, "15540": 1669.0, "15545": 1620.0, "15550": 1754.0, "15555": 1593.0, "15560": 1613.0, "15565": 1646.0, "15570": 1739.0, "15575": 1647.0, "15580": 1664.0, "15585": 1620.0, "15590": 1652.0, "15595": 1673.0, "15600": 1607.0, "15605": 1639.0, "15610": 1524.0, "15615": 1696.0, "15620": 1559.0, "15625": 1702.0, "15630": 1841.0, "15635": 1628.0, "15640": 1647.0, "15645": 1644.0, "15650": 1663.0, "15655": 1633.0, "15660": 1699.0, "15665": 1682.0, "15670": 1597.0, "15675": 1571.0, "15680": 1627.0, "15685": 1651.0, "15690": 1724.0, "15695": 1722.0, "15700": 1625.0, "15705": "nan", "15710": 1578.0, "15715": 1623.0, "15720": 1626.0, "15725": 1608.0, "15730": 1567.0, "15735": 1792.0, "15740": 1672.0, "15745": 1693.0, "15750": 1775.0, "15755": 1608.0, "15760": 1638.0, "15765": 1676.0, "15770": 1689.0, "15775": 1603.0, "15780": 1641.0, "15785": 1600.0, "15790": 1760.0, "15795": 1675.0, "15800": 1559.0, "15805": 1654.0, "15810": 1680.0, "15815": 1615.0, "15820": 1669.0, "15825": 1649.0, "15830": 1626.0, "15835": 1757.0, "15840": 1591.0, "15845": 1608.0, "15850": 1617.0, "15855": 1616.0, "15860": 1678.0, "15865": 1722.0, "15870": 1629.0, "15875": 1667.0, "15880": 1660.0, "15885": 1661.0, "15890": 1647.0, "15895": 1731.0, "15900": 1726.0, "15905": 1572.0, "15910": 1713.0, "15915": 1613.0, "15920": 1594.0, "15925": 1670.0, "15930": 1606.0, "15935": 1649.0, "15940": 1684.0, "15945": 1657.0, "15950": 1719.0, "15955": 1655.0, "15960": 1587.0, "15965": 1657.0, "15970": 1728.0, "15975": 1573.0, "15980": 1610.0, "15985": 1688.0, "15990": 1729.0, "15995": 1633.0, "16000": 1619.0, "16005": 1689.0, "16010": 1670.0, "16015": 1725.0, "16020": 1734.0, "16025": 1755.0, "16030": 1745.0, "16035": 1652.0, "16040": 1674.0, "16045": 1724.0, "16050": 1703.0, "16055": 1655.0, "16060": 1650.0, "16065": 1598.0, "16070": 1703.0, "16075": 1642.0, "16080": 1694.0, "16085": 1687.0, "16090": 1706.0, "16095": 1645.0, "16100": 1741.0, "16105": 1573.0, "16110": 1641.0, "16115": 1681.0, "16120": 1657.0, "16125": 1673.0, "16130": 1718.0, "16135": 1716.0, "16140": 1756.0, "16145": 1665.0, "16150": 1638.0, "16155": 1649.0, "16160": 1725.0, "16165": 1591.0, "16170": 1669.0, "16175": 1733.0, "16180": 1639.0, "16185": 1707.0, "16190": 1665.0, "16195": 1549.0, "16200": 1669.0, "16205": 1714.0, "16210": 1736.0, "16215": 1636.0, "16220": 1719.0, "16225": 1717.0, "16230": 1552.0, "16235": 1660.0, "16240": 1751.0, "16245": 1690.0, "16250": 1615.0, "16255": 1593.0, "16260": 1726.0, "16265": 1588.0, "16270": 1652.0, "16275": 1608.0, "16280": 1678.0, "16285": 1470.0, "16290": 1643.0, "16295": 1648.0, "16300": 1673.0, "16305": 1658.0, "16310": 1756.0, "16315": 1629.0, "16320": 1654.0, "16325": 1633.0, "16330": 1868.0, "16335": 1636.0, "16340": 1605.0, "16345": 1749.0, "16350": 1673.0, "16355": 1578.0, "16360": 1626.0, "16365": 1668.0, "16370": 2085.0, "16375": 1693.0, "16380": 1800.0, "16385": 1630.0, "16390": 1601.0, "16395": 1819.0, "16400": 1731.0, "16405": 1572.0, "16410": 1690.0, "16415": 1631.0, "16420": 1672.0, "16425": 1677.0, "16430": 1732.0, "16435": 1656.0, "16440": 1686.0, "16445": 1750.0, "16450": 1690.0, "16455": 1732.0, "16460": 1628.0, "16465": 1731.0, "16470": 1652.0, "16475": 1721.0, "16480": 1674.0, "16485": 1768.0, "16490": 1619.0, "16495": 1753.0, "16500": 1750.0, "16505": 1679.0, "16510": 1735.0, "16515": 1650.0, "16520": 1665.0, "16525": 1778.0, "16530": 1772.0, "16535": 1892.0, "16540": 1690.0, "16545": 1613.0, "16550": 1595.0, "16555": 1847.0, "16560": 1703.0, "16565": 1885.0, "16570": 1772.0, "16575": 1818.0, "16580": 1829.0, "16585": 1843.0, "16590": 1957.0, "16595": 1769.0, "16600": 1779.0, "16605": 1982.0, "16610": 1653.0, "16615": 1774.0, "16620": 1837.0, "16625": 1891.0, "16630": 1626.0, "16635": 1736.0, "16640": 1840.0, "16645": 1872.0, "16650": 1669.0, "16655": 1885.0, "16660": 1820.0, "16665": 1745.0, "16670": 1728.0, "16675": 1654.0, "16680": 1750.0, "16685": 1971.0, "16690": 1789.0, "16695": 1848.0, "16700": 1897.0, "16705": 1780.0, "16710": 1906.0, "16715": 1906.0, "16720": 1782.0, "16725": 1784.0, "16730": 1961.0, "16735": 1878.0, "16740": 1769.0, "16745": 1781.0, "16750": 1744.0, "16755": 1834.0, "16760": 1710.0, "16765": 1789.0, "16770": 1758.0, "16775": 1868.0, "16780": 1795.0, "16785": 1737.0, "16790": 1788.0, "16795": 1751.0, "16800": 1767.0, "16805": 1820.0, "16810": 1818.0, "16815": 1861.0, "16820": 1931.0, "16825": 1712.0, "16830": 1885.0, "16835": 1752.0, "16840": 1719.0, "16845": 1728.0, "16850": 1766.0, "16855": 1794.0, "16860": 1730.0, "16865": 1760.0, "16870": 1838.0, "16875": 1734.0, "16880": 1828.0, "16885": 1847.0, "16890": 1677.0, "16895": 1656.0, "16900": 1865.0, "16905": 1939.0, "16910": 1836.0, "16915": 1743.0, "16920": 1870.0, "16925": 1865.0, "16930": 1812.0, "16935": 1802.0, "16940": 1756.0, "16945": 1694.0, "16950": 1787.0, "16955": 1967.0, "16960": 1752.0, "16965": 1712.0, "16970": 1830.0, "16975": 1850.0, "16980": 1852.0, "16985": 1578.0, "16990": 1677.0, "16995": 1825.0, "17000": 1806.0, "17005": 1947.0, "17010": 1763.0, "17015": 1840.0, "17020": 1861.0, "17025": 1891.0, "17030": 1750.0, "17035": 1851.0, "17040": 1807.0, "17045": 1820.0, "17050": 1935.0, "17055": 1756.0, "17060": 1841.0, "17065": 1604.0, "17070": 1902.0, "17075": 1728.0, "17080": 1601.0, "17085": 1879.0, "17090": 1940.0, "17095": 1833.0, "17100": 1821.0, "17105": 1897.0, "17110": 1710.0, "17115": 1913.0, "17120": 1860.0, "17125": 1780.0, "17130": 1819.0, "17135": 1889.0, "17140": 1889.0, "17145": 1863.0, "17150": 1851.0, "17155": 1728.0, "17160": 1869.0, "17165": 1928.0, "17170": 1910.0, "17175": 1906.0, "17180": 1832.0, "17185": 1881.0, "17190": 1767.0, "17195": 1771.0, "17200": 1776.0, "17205": 1696.0, "17210": 1718.0, "17215": 1814.0, "17220": 1702.0, "17225": 1774.0, "17230": 1861.0, "17235": 1698.0, "17240": 1876.0, "17245": 1880.0, "17250": 1812.0, "17255": 1726.0, "17260": 1537.0, "17265": 1794.0, "17270": 2016.0, "17275": 1845.0, "17280": 1871.0, "17285": 1842.0, "17290": 1797.0, "17295": 1862.0, "17300": 1784.0, "17305": 1898.0, "17310": 1804.0, "17315": 1963.0, "17320": 1906.0, "17325": 1907.0, "17330": 1664.0, "17335": 1886.0, "17340": 1897.0, "17345": 2005.0, "17350": 1765.0, "17355": 1898.0, "17360": 1848.0, "17365": 1783.0, "17370": 1737.0, "17375": 1847.0, "17380": 1701.0, "17385": 1841.0, "17390": 1796.0, "17395": 1923.0, "17400": 1774.0, "17405": 1832.0, "17410": 1887.0, "17415": 1852.0, "17420": 1926.0, "17425": 1876.0, "17430": 1766.0, "17435": 1790.0, "17440": 1733.0, "17445": 1745.0, "17450": 1949.0, "17455": 1646.0, "17460": 1755.0, "17465": 1965.0, "17470": 1932.0, "17475": 1813.0, "17480": 1857.0, "17485": 1873.0, "17490": 1760.0, "17495": 1880.0, "17500": 1834.0, "17505": 1775.0, "17510": 1846.0, "17515": 1853.0, "17520": 1782.0, "17525": 1735.0, "17530": 1810.0, "17535": 1780.0, "17540": 1791.0, "17545": 1697.0, "17550": 1837.0, "17555": 1756.0, "17560": 1763.0, "17565": 1633.0, "17570": 1620.0, "17575": 1669.0, "17580": 1709.0, "17585": 1691.0, "17590": 1701.0, "17595": 1667.0, "17600": 1724.0, "17605": 1615.0, "17610": 1715.0, "17615": 1663.0, "17620": 1636.0, "17625": 1539.0, "17630": 1652.0, "17635": 1624.0, "17640": 1743.0, "17645": 1733.0, "17650": 1670.0, "17655": 1691.0, "17660": 1591.0, "17665": 1689.0, "17670": 1705.0, "17675": 1842.0, "17680": 1588.0, "17685": 1643.0, "17690": 1599.0, "17695": 1766.0, "17700": 1768.0, "17705": 1758.0, "17710": 1748.0, "17715": 1655.0, "17720": 1760.0, "17725": 1706.0, "17730": 1740.0, "17735": 1726.0, "17740": 1564.0, "17745": 1657.0, "17750": 1794.0, "17755": 1628.0, "17760": 1747.0, "17765": 1719.0, "17770": 1730.0, "17775": 1697.0, "17780": 1805.0, "17785": 1645.0, "17790": 1639.0, "17795": 1779.0, "17800": 1631.0, "17805": 1711.0, "17810": 1676.0, "17815": 1710.0, "17820": 1726.0, "17825": 1671.0, "17830": 1767.0, "17835": 1656.0, "17840": 1784.0, "17845": 1663.0, "17850": 1791.0, "17855": 1743.0, "17860": 1711.0, "17865": 1598.0, "17870": 1687.0, "17875": 1650.0, "17880": 1643.0, "17885": 1701.0, "17890": 1711.0, "17895": 1658.0, "17900": 1675.0, "17905": 1620.0, "17910": 1777.0, "17915": 1681.0, "17920": 1709.0, "17925": 1795.0, "17930": 1535.0, "17935": 1635.0, "17940": 1658.0, "17945": 1558.0, "17950": 1679.0, "17955": 1681.0, "17960": 1698.0, "17965": 1667.0, "17970": 1706.0, "17975": 1552.0, "17980": 1662.0, "17985": 1710.0, "17990": 1629.0, "17995": 1708.0, "18000": 1634.0, "18005": 1724.0, "18010": 1694.0, "18015": 1659.0, "18020": 1679.0, "18025": 1793.0, "18030": 1611.0, "18035": 1743.0, "18040": 1626.0, "18045": 1698.0, "18050": 1641.0, "18055": 1643.0, "18060": 1698.0, "18065": 1629.0, "18070": 1742.0, "18075": 1708.0, "18080": 1711.0, "18085": 1699.0, "18090": 1645.0, "18095": 1682.0, "18100": 1733.0, "18105": 1681.0, "18110": 1534.0, "18115": 1647.0, "18120": 1685.0, "18125": 1681.0, "18130": 1589.0, "18135": 1757.0, "18140": 1746.0, "18145": 1672.0, "18150": 1744.0, "18155": 1703.0, "18160": 1729.0, "18165": 1694.0, "18170": 1656.0, "18175": 1541.0, "18180": 1757.0, "18185": 1814.0, "18190": 1792.0, "18195": 1659.0, "18200": 1736.0, "18205": 1675.0, "18210": 1614.0, "18215": 1758.0, "18220": 1696.0, "18225": 1668.0, "18230": 1634.0, "18235": 1605.0, "18240": 1700.0, "18245": 1656.0, "18250": 1757.0, "18255": 1590.0, "18260": 1745.0, "18265": 1671.0, "18270": 1720.0, "18275": 1669.0, "18280": 1627.0, "18285": 1607.0, "18290": 1733.0, "18295": 1752.0, "18300": 1684.0, "18305": 1741.0, "18310": 1701.0, "18315": 1702.0, "18320": 1701.0, "18325": 1760.0, "18330": 1730.0, "18335": 1664.0, "18340": 1597.0, "18345": 1679.0, "18350": 1582.0, "18355": 1736.0, "18360": 1691.0, "18365": 1606.0, "18370": 1658.0, "18375": 1663.0, "18380": 1669.0, "18385": 1685.0, "18390": 1688.0, "18395": 1745.0, "18400": 1574.0, "18405": 1681.0, "18410": 1562.0, "18415": 1702.0, "18420": 1715.0, "18425": 1686.0, "18430": 1672.0, "18435": 1697.0, "18440": 1847.0, "18445": 1657.0, "18450": 1778.0, "18455": 1658.0, "18460": 1679.0, "18465": 1700.0, "18470": 1702.0, "18475": 1606.0, "18480": 1600.0, "18485": 1721.0, "18490": 1724.0, "18495": 1658.0, "18500": 1681.0, "18505": 1708.0, "18510": 1745.0, "18515": 1659.0, "18520": 1692.0, "18525": 1735.0, "18530": 1728.0, "18535": 1572.0, "18540": 1725.0, "18545": 1615.0, "18550": 1740.0, "18555": 1758.0, "18560": 1750.0, "18565": 1656.0, "18570": 1658.0, "18575": 1613.0, "18580": 1647.0, "18585": 1742.0, "18590": 1640.0, "18595": 1648.0, "18600": 1690.0, "18605": 1632.0, "18610": 1650.0, "18615": 1593.0, "18620": 1643.0, "18625": 1623.0, "18630": 1690.0, "18635": 1798.0, "18640": 1743.0, "18645": 1699.0, "18650": 1695.0, "18655": 1653.0, "18660": 1726.0, "18665": 1711.0, "18670": 1658.0, "18675": 1713.0, "18680": 1773.0, "18685": 1761.0, "18690": 1690.0, "18695": 1727.0, "18700": 1746.0, "18705": 1661.0, "18710": 1654.0, "18715": 1623.0, "18720": "nan", "18725": 1694.0, "18730": 1710.0, "18735": 1601.0, "18740": 1701.0, "18745": 1732.0, "18750": 1712.0, "18755": 1737.0, "18760": 1743.0, "18765": 1689.0, "18770": 1641.0, "18775": 1618.0, "18780": 1806.0, "18785": 1681.0, "18790": 1649.0, "18795": 1690.0, "18800": 1658.0, "18805": 1733.0, "18810": 1650.0, "18815": 1692.0, "18820": 1699.0, "18825": 1726.0, "18830": 1758.0, "18835": 1812.0, "18840": 1631.0, "18845": 1700.0, "18850": 1661.0, "18855": 1669.0, "18860": 1575.0, "18865": 1669.0, "18870": 1728.0, "18875": 1742.0, "18880": 1734.0, "18885": 1948.0, "18890": 1791.0, "18895": 1595.0, "18900": 1803.0, "18905": 1627.0, "18910": 1690.0, "18915": 1667.0, "18920": 1694.0, "18925": 1591.0, "18930": 1650.0, "18935": 1649.0, "18940": 1606.0, "18945": 1619.0, "18950": 1721.0, "18955": 1702.0, "18960": 1762.0, "18965": 1697.0, "18970": 1787.0, "18975": 1620.0, "18980": 1652.0, "18985": 1660.0, "18990": 1618.0, "18995": 1689.0, "19000": 1705.0, "19005": 1568.0, "19010": 1744.0, "19015": 1658.0, "19020": 1646.0, "19025": 1639.0, "19030": 1670.0, "19035": 1737.0, "19040": 1749.0, "19045": 1621.0, "19050": 1584.0, "19055": 1700.0, "19060": 1711.0, "19065": 1690.0, "19070": 1687.0, "19075": 1708.0, "19080": 1724.0, "19085": 1661.0, "19090": 1704.0, "19095": 1606.0, "19100": 1776.0, "19105": 1751.0, "19110": 1680.0, "19115": 1682.0, "19120": 1598.0, "19125": 1571.0, "19130": 1764.0, "19135": 1706.0, "19140": 1668.0, "19145": 1683.0, "19150": 1656.0, "19155": 1615.0, "19160": 1680.0, "19165": 1689.0, "19170": 1709.0, "19175": 1603.0, "19180": 1828.0, "19185": 1665.0, "19190": 1772.0, "19195": 1681.0, "19200": 1663.0, "19205": 1652.0, "19210": 1796.0, "19215": 1934.0, "19220": 1844.0, "19225": 1728.0, "19230": 1753.0, "19235": 1681.0, "19240": 1692.0, "19245": 1684.0, "19250": 1708.0, "19255": 1729.0, "19260": 1722.0, "19265": 1730.0, "19270": 2319.0, "19275": 1782.0, "19280": 1720.0, "19285": 1690.0, "19290": 1753.0, "19295": 1622.0, "19300": 1787.0, "19305": 1655.0, "19310": 1656.0, "19315": 1597.0, "19320": 1728.0, "19325": 1684.0, "19330": 1693.0, "19335": 1617.0, "19340": 1808.0, "19345": 1723.0, "19350": 1727.0, "19355": 1777.0, "19360": 1748.0, "19365": 1618.0, "19370": 1620.0, "19375": 1802.0, "19380": 1590.0, "19385": 1799.0, "19390": 1677.0, "19395": 1721.0, "19400": 1734.0, "19405": 1636.0, "19410": 1589.0, "19415": 1736.0, "19420": 1675.0, "19425": 1732.0, "19430": 1688.0, "19435": 1730.0, "19440": 1730.0, "19445": 1686.0, "19450": 1696.0, "19455": 1654.0, "19460": 1682.0, "19465": 1665.0, "19470": 1741.0, "19475": 1706.0, "19480": 1695.0, "19485": 1723.0, "19490": 1647.0, "19495": 1618.0, "19500": 1772.0, "19505": 1723.0, "19510": 1682.0, "19515": 1778.0, "19520": 1803.0, "19525": 1714.0, "19530": 1734.0, "19535": 1722.0, "19540": 1625.0, "19545": 1717.0, "19550": 1678.0, "19555": 1766.0, "19560": 1664.0, "19565": 1676.0, "19570": 1702.0, "19575": 1690.0, "19580": 1755.0, "19585": 1562.0, "19590": 1624.0, "19595": 1696.0, "19600": 1725.0, "19605": 1708.0, "19610": 1720.0, "19615": 1671.0, "19620": 1629.0, "19625": 1704.0, "19630": 1642.0, "19635": 1684.0, "19640": 1695.0, "19645": 1733.0, "19650": 1649.0, "19655": 1721.0, "19660": 1704.0, "19665": 1751.0, "19670": 1631.0, "19675": 1628.0, "19680": 1618.0, "19685": 1718.0, "19690": 1697.0, "19695": 1651.0, "19700": 1538.0, "19705": 1748.0, "19710": 1700.0, "19715": 1726.0, "19720": 1712.0, "19725": 1695.0, "19730": 1653.0, "19735": 1728.0, "19740": 1701.0, "19745": 1679.0, "19750": 1722.0, "19755": 1546.0, "19760": 1670.0, "19765": 1715.0, "19770": 1726.0, "19775": 1719.0, "19780": 1811.0, "19785": 1585.0, "19790": 1685.0, "19795": 1781.0, "19800": 1612.0, "19805": 1755.0, "19810": 1712.0, "19815": 1695.0, "19820": 1787.0, "19825": 1724.0, "19830": 1675.0, "19835": 1662.0, "19840": 1626.0, "19845": 1645.0, "19850": 1805.0, "19855": 1614.0, "19860": 1692.0, "19865": 1735.0, "19870": 1663.0, "19875": 1726.0, "19880": 1641.0, "19885": 1608.0, "19890": 1715.0, "19895": 1562.0, "19900": 1608.0, "19905": 1762.0, "19910": 1666.0, "19915": 1682.0, "19920": 1753.0, "19925": 1793.0, "19930": 1686.0, "19935": 1681.0, "19940": 1662.0, "19945": 1710.0, "19950": 1599.0, "19955": 1724.0, "19960": 1841.0, "19965": 1705.0, "19970": 1750.0, "19975": 1729.0, "19980": 1709.0, "19985": 1707.0, "19990": 1544.0, "19995": 1630.0, "20000": 1782.0, "20005": "nan", "20010": "nan", "20015": "nan", "20020": "nan", "20025": "nan", "20030": "nan", "20035": "nan", "20040": "nan", "20045": "nan", "20050": "nan", "20055": "nan", "20060": "nan", "20065": "nan", "20070": "nan", "20075": "nan", "20080": "nan", "20085": "nan", "20090": "nan", "20095": "nan", "20100": "nan", "20105": "nan", "20110": "nan", "20115": "nan", "20120": "nan", "20125": "nan", "20130": "nan", "20135": "nan", "20140": "nan", "20145": "nan", "20150": "nan", "20155": "nan", "20160": "nan", "20165": "nan", "20170": "nan", "20175": "nan", "20180": "nan", "20185": "nan", "20190": "nan", "20195": "nan", "20200": "nan", "20205": "nan", "20210": "nan", "20215": "nan", "20220": "nan", "20225": "nan", "20230": "nan", "20235": "nan", "20240": "nan", "20245": "nan", "20250": "nan", "20255": "nan", "20260": "nan", "20265": "nan", "20270": "nan", "20275": "nan", "20280": "nan", "20285": "nan", "20290": "nan", "20295": "nan", "20300": "nan", "20305": "nan", "20310": "nan", "20315": "nan", "20320": "nan", "20325": "nan", "20330": "nan", "20335": "nan", "20340": "nan", "20345": "nan", "20350": "nan", "20355": "nan", "20360": "nan", "20365": "nan", "20370": "nan", "20375": "nan", "20380": "nan", "20385": "nan", "20390": "nan", "20395": "nan", "20400": "nan", "20405": "nan", "20410": "nan", "20415": "nan", "20420": "nan", "20425": "nan", "20430": "nan", "20435": "nan", "20440": "nan", "20445": "nan", "20450": "nan", "20455": "nan", "20460": "nan", "20465": "nan", "20470": "nan", "20475": "nan", "20480": "nan", "20485": "nan", "20490": "nan", "20495": "nan", "20500": "nan", "20505": "nan", "20510": "nan", "20515": "nan", "20520": "nan", "20525": "nan", "20530": "nan", "20535": "nan", "20540": "nan", "20545": "nan", "20550": "nan", "20555": "nan", "20560": "nan", "20565": "nan", "20570": "nan", "20575": "nan", "20580": "nan", "20585": "nan", "20590": "nan", "20595": "nan", "20600": "nan", "20605": "nan", "20610": "nan", "20615": "nan", "20620": "nan", "20625": "nan", "20630": "nan", "20635": "nan", "20640": "nan", "20645": "nan", "20650": "nan", "20655": "nan", "20660": "nan", "20665": "nan", "20670": "nan", "20675": "nan", "20680": "nan", "20685": "nan", "20690": "nan", "20695": "nan", "20700": "nan", "20705": "nan", "20710": "nan", "20715": "nan", "20720": "nan", "20725": "nan", "20730": "nan", "20735": "nan", "20740": "nan", "20745": "nan", "20750": "nan", "20755": "nan", "20760": "nan", "20765": "nan", "20770": "nan", "20775": "nan", "20780": "nan", "20785": "nan", "20790": "nan", "20795": "nan", "20800": "nan", "20805": "nan", "20810": "nan", "20815": "nan", "20820": "nan", "20825": "nan", "20830": "nan", "20835": "nan", "20840": "nan", "20845": "nan", "20850": "nan", "20855": "nan", "20860": "nan", "20865": "nan", "20870": "nan", "20875": "nan", "20880": "nan", "20885": "nan", "20890": "nan", "20895": "nan", "20900": "nan", "20905": "nan", "20910": "nan", "20915": "nan", "20920": "nan", "20925": "nan", "20930": "nan", "20935": "nan", "20940": "nan", "20945": "nan", "20950": "nan", "20955": "nan", "20960": "nan", "20965": "nan", "20970": "nan", "20975": "nan", "20980": "nan", "20985": "nan", "20990": "nan", "20995": "nan", "21000": "nan", "21005": "nan", "21010": "nan", "21015": "nan", "21020": "nan", "21025": "nan", "21030": "nan", "21035": "nan", "21040": "nan", "21045": "nan", "21050": "nan", "21055": "nan", "21060": "nan", "21065": "nan", "21070": "nan", "21075": "nan", "21080": "nan", "21085": "nan", "21090": "nan", "21095": "nan", "21100": "nan", "21105": "nan", "21110": "nan", "21115": "nan", "21120": "nan", "21125": "nan", "21130": "nan", "21135": "nan", "21140": "nan", "21145": "nan", "21150": "nan", "21155": "nan", "21160": "nan", "21165": "nan", "21170": "nan", "21175": "nan", "21180": "nan", "21185": "nan", "21190": "nan", "21195": "nan", "21200": "nan", "21205": "nan", "21210": "nan", "21215": "nan", "21220": "nan", "21225": "nan", "21230": "nan", "21235": "nan", "21240": "nan", "21245": "nan", "21250": "nan", "21255": "nan", "21260": "nan", "21265": "nan", "21270": "nan", "21275": "nan", "21280": "nan", "21285": "nan", "21290": "nan", "21295": "nan", "21300": "nan", "21305": "nan", "21310": "nan", "21315": "nan", "21320": "nan", "21325": "nan", "21330": "nan", "21335": "nan", "21340": "nan", "21345": "nan", "21350": "nan", "21355": "nan", "21360": "nan", "21365": "nan", "21370": "nan", "21375": "nan", "21380": "nan", "21385": "nan", "21390": "nan", "21395": "nan", "21400": "nan", "21405": "nan", "21410": "nan", "21415": "nan", "21420": "nan", "21425": "nan", "21430": "nan", "21435": "nan", "21440": "nan", "21445": "nan", "21450": "nan", "21455": "nan", "21460": "nan", "21465": "nan", "21470": "nan", "21475": "nan", "21480": "nan", "21485": "nan", "21490": "nan", "21495": "nan", "21500": "nan", "21505": "nan", "21510": "nan", "21515": "nan", "21520": "nan", "21525": "nan", "21530": "nan", "21535": "nan", "21540": "nan", "21545": "nan", "21550": "nan", "21555": "nan", "21560": "nan", "21565": "nan", "21570": "nan", "21575": "nan", "21580": "nan", "21585": "nan", "21590": "nan", "21595": "nan", "21600": "nan", "21605": "nan", "21610": "nan", "21615": "nan", "21620": "nan", "21625": "nan", "21630": "nan", "21635": "nan", "21640": "nan", "21645": "nan", "21650": "nan", "21655": "nan", "21660": "nan", "21665": "nan", "21670": "nan", "21675": "nan", "21680": "nan", "21685": "nan", "21690": "nan", "21695": "nan", "21700": "nan", "21705": "nan", "21710": "nan", "21715": "nan", "21720": "nan", "21725": "nan", "21730": "nan", "21735": "nan", "21740": "nan", "21745": "nan", "21750": "nan", "21755": "nan", "21760": "nan", "21765": "nan", "21770": "nan", "21775": "nan", "21780": "nan", "21785": "nan", "21790": "nan", "21795": "nan", "21800": "nan", "21805": "nan", "21810": "nan", "21815": "nan", "21820": "nan", "21825": "nan", "21830": "nan", "21835": "nan", "21840": "nan", "21845": "nan", "21850": "nan", "21855": "nan", "21860": "nan", "21865": "nan", "21870": "nan", "21875": "nan", "21880": "nan", "21885": "nan", "21890": "nan", "21895": "nan", "21900": "nan", "21905": "nan", "21910": "nan", "21915": "nan", "21920": "nan", "21925": "nan", "21930": "nan", "21935": "nan", "21940": "nan", "21945": "nan", "21950": "nan", "21955": "nan", "21960": "nan", "21965": "nan", "21970": "nan", "21975": "nan", "21980": "nan", "21985": "nan", "21990": "nan", "21995": "nan", "22000": "nan", "22005": "nan", "22010": "nan", "22015": "nan", "22020": "nan", "22025": "nan", "22030": "nan", "22035": "nan", "22040": "nan", "22045": "nan", "22050": "nan", "22055": "nan", "22060": "nan", "22065": "nan", "22070": "nan", "22075": "nan", "22080": "nan", "22085": "nan", "22090": "nan", "22095": "nan", "22100": "nan", "22105": "nan", "22110": "nan", "22115": "nan", "22120": "nan", "22125": "nan", "22130": "nan", "22135": "nan", "22140": "nan", "22145": "nan", "22150": "nan", "22155": "nan", "22160": "nan", "22165": "nan", "22170": "nan", "22175": "nan", "22180": "nan", "22185": "nan", "22190": "nan", "22195": "nan", "22200": "nan", "22205": "nan", "22210": "nan", "22215": "nan", "22220": "nan", "22225": "nan", "22230": "nan", "22235": "nan", "22240": "nan", "22245": "nan", "22250": "nan", "22255": "nan", "22260": "nan", "22265": "nan", "22270": "nan", "22275": "nan", "22280": "nan", "22285": "nan", "22290": "nan", "22295": "nan", "22300": "nan", "22305": "nan", "22310": "nan", "22315": "nan", "22320": "nan", "22325": "nan", "22330": "nan", "22335": "nan", "22340": "nan", "22345": "nan", "22350": "nan", "22355": "nan", "22360": "nan", "22365": "nan", "22370": "nan", "22375": "nan", "22380": "nan", "22385": "nan", "22390": "nan", "22395": "nan", "22400": "nan", "22405": "nan", "22410": "nan", "22415": "nan", "22420": "nan", "22425": "nan", "22430": "nan", "22435": "nan", "22440": "nan", "22445": "nan", "22450": "nan", "22455": "nan", "22460": "nan", "22465": "nan", "22470": "nan", "22475": "nan", "22480": "nan", "22485": "nan", "22490": "nan", "22495": "nan", "22500": "nan", "22505": "nan", "22510": "nan", "22515": "nan", "22520": "nan", "22525": "nan", "22530": "nan", "22535": "nan", "22540": "nan", "22545": "nan", "22550": "nan", "22555": "nan", "22560": "nan", "22565": "nan", "22570": "nan", "22575": "nan", "22580": "nan", "22585": "nan", "22590": "nan", "22595": "nan", "22600": "nan", "22605": "nan", "22610": "nan", "22615": "nan", "22620": "nan", "22625": "nan", "22630": "nan", "22635": "nan", "22640": "nan", "22645": "nan", "22650": "nan", "22655": "nan", "22660": "nan", "22665": "nan", "22670": "nan", "22675": "nan", "22680": "nan", "22685": "nan", "22690": "nan", "22695": "nan", "22700": "nan", "22705": "nan", "22710": "nan", "22715": "nan", "22720": "nan", "22725": "nan", "22730": "nan", "22735": "nan", "22740": "nan", "22745": "nan", "22750": "nan", "22755": "nan", "22760": "nan", "22765": "nan", "22770": "nan", "22775": "nan", "22780": "nan", "22785": "nan", "22790": "nan", "22795": "nan", "22800": "nan", "22805": "nan", "22810": "nan", "22815": "nan", "22820": "nan", "22825": "nan", "22830": "nan", "22835": "nan", "22840": "nan", "22845": "nan", "22850": "nan", "22855": "nan", "22860": "nan", "22865": "nan", "22870": "nan", "22875": "nan", "22880": "nan", "22885": "nan", "22890": "nan", "22895": "nan", "22900": "nan", "22905": "nan", "22910": "nan", "22915": "nan", "22920": "nan", "22925": "nan", "22930": "nan", "22935": "nan", "22940": "nan", "22945": "nan", "22950": "nan", "22955": "nan", "22960": "nan", "22965": "nan", "22970": "nan", "22975": "nan", "22980": "nan", "22985": "nan", "22990": "nan", "22995": "nan", "23000": "nan", "23005": "nan", "23010": "nan", "23015": "nan", "23020": "nan", "23025": "nan", "23030": "nan", "23035": "nan", "23040": "nan", "23045": "nan", "23050": "nan", "23055": "nan", "23060": "nan", "23065": "nan", "23070": "nan", "23075": "nan", "23080": "nan", "23085": "nan", "23090": "nan", "23095": "nan", "23100": "nan", "23105": "nan", "23110": "nan", "23115": "nan", "23120": "nan", "23125": "nan", "23130": "nan", "23135": "nan", "23140": "nan", "23145": "nan", "23150": "nan", "23155": "nan", "23160": "nan", "23165": "nan", "23170": "nan", "23175": "nan", "23180": "nan", "23185": "nan", "23190": "nan", "23195": "nan", "23200": "nan", "23205": "nan", "23210": "nan", "23215": "nan", "23220": "nan", "23225": "nan", "23230": "nan", "23235": "nan", "23240": "nan", "23245": "nan", "23250": "nan", "23255": "nan", "23260": "nan", "23265": "nan", "23270": "nan", "23275": "nan", "23280": "nan", "23285": "nan", "23290": "nan", "23295": "nan", "23300": "nan", "23305": "nan", "23310": "nan", "23315": "nan", "23320": "nan", "23325": "nan", "23330": "nan", "23335": "nan", "23340": "nan", "23345": "nan", "23350": "nan", "23355": "nan", "23360": "nan", "23365": "nan", "23370": "nan", "23375": "nan", "23380": "nan", "23385": "nan", "23390": "nan", "23395": "nan", "23400": "nan", "23405": "nan", "23410": "nan", "23415": "nan", "23420": "nan", "23425": "nan", "23430": "nan", "23435": "nan", "23440": "nan", "23445": "nan", "23450": "nan", "23455": "nan", "23460": "nan", "23465": "nan", "23470": "nan", "23475": "nan", "23480": "nan", "23485": "nan", "23490": "nan", "23495": "nan", "23500": "nan", "23505": "nan", "23510": "nan", "23515": "nan", "23520": "nan", "23525": "nan", "23530": "nan", "23535": "nan", "23540": "nan", "23545": "nan", "23550": "nan", "23555": "nan", "23560": "nan", "23565": "nan", "23570": "nan", "23575": "nan", "23580": "nan", "23585": "nan", "23590": "nan", "23595": "nan", "23600": "nan", "23605": "nan", "23610": "nan", "23615": "nan", "23620": "nan", "23625": "nan", "23630": "nan", "23635": "nan", "23640": "nan", "23645": "nan", "23650": "nan", "23655": "nan", "23660": "nan", "23665": "nan", "23670": "nan", "23675": "nan", "23680": "nan", "23685": "nan", "23690": "nan", "23695": "nan", "23700": "nan", "23705": "nan", "23710": "nan", "23715": "nan", "23720": "nan", "23725": "nan", "23730": "nan", "23735": "nan", "23740": "nan", "23745": "nan", "23750": "nan", "23755": "nan", "23760": "nan", "23765": "nan", "23770": "nan", "23775": "nan", "23780": "nan", "23785": "nan", "23790": "nan", "23795": "nan", "23800": "nan", "23805": "nan", "23810": "nan", "23815": "nan", "23820": "nan", "23825": "nan", "23830": "nan", "23835": "nan", "23840": "nan", "23845": "nan", "23850": "nan", "23855": "nan", "23860": "nan", "23865": "nan", "23870": "nan", "23875": "nan", "23880": "nan", "23885": "nan", "23890": "nan", "23895": "nan", "23900": "nan", "23905": "nan", "23910": "nan", "23915": "nan", "23920": "nan", "23925": "nan", "23930": "nan", "23935": "nan", "23940": "nan", "23945": "nan", "23950": "nan", "23955": "nan", "23960": "nan", "23965": "nan", "23970": "nan", "23975": "nan", "23980": "nan", "23985": "nan", "23990": "nan", "23995": "nan", "24000": "nan", "24005": "nan", "24010": "nan", "24015": "nan", "24020": "nan", "24025": "nan", "24030": "nan", "24035": "nan", "24040": "nan", "24045": "nan", "24050": "nan", "24055": "nan", "24060": "nan", "24065": "nan", "24070": "nan", "24075": "nan", "24080": "nan", "24085": "nan", "24090": "nan", "24095": "nan", "24100": "nan", "24105": "nan", "24110": "nan", "24115": "nan", "24120": "nan", "24125": "nan", "24130": "nan", "24135": "nan", "24140": "nan", "24145": "nan", "24150": "nan", "24155": "nan", "24160": "nan", "24165": "nan", "24170": "nan", "24175": "nan", "24180": "nan", "24185": "nan", "24190": "nan", "24195": "nan", "24200": "nan", "24205": "nan", "24210": "nan", "24215": "nan", "24220": "nan", "24225": "nan", "24230": "nan", "24235": "nan", "24240": "nan", "24245": "nan", "24250": "nan", "24255": "nan", "24260": "nan", "24265": "nan", "24270": "nan", "24275": "nan", "24280": "nan", "24285": "nan", "24290": "nan", "24295": "nan", "24300": "nan", "24305": "nan", "24310": "nan", "24315": "nan", "24320": "nan", "24325": "nan", "24330": "nan", "24335": "nan", "24340": "nan", "24345": "nan", "24350": "nan", "24355": "nan", "24360": "nan", "24365": "nan", "24370": "nan", "24375": "nan", "24380": "nan", "24385": "nan", "24390": "nan", "24395": "nan", "24400": "nan", "24405": "nan", "24410": "nan", "24415": "nan", "24420": "nan", "24425": "nan", "24430": "nan", "24435": "nan", "24440": "nan", "24445": "nan", "24450": "nan", "24455": "nan", "24460": "nan", "24465": "nan", "24470": "nan", "24475": "nan", "24480": "nan", "24485": "nan", "24490": "nan", "24495": "nan", "24500": "nan", "24505": "nan", "24510": "nan", "24515": "nan", "24520": "nan", "24525": "nan", "24530": "nan", "24535": "nan", "24540": "nan", "24545": "nan", "24550": "nan", "24555": "nan", "24560": "nan", "24565": "nan", "24570": "nan", "24575": "nan", "24580": "nan", "24585": "nan", "24590": "nan", "24595": "nan", "24600": "nan", "24605": "nan", "24610": "nan", "24615": "nan", "24620": "nan", "24625": "nan", "24630": "nan", "24635": "nan", "24640": "nan", "24645": "nan", "24650": "nan", "24655": "nan", "24660": "nan", "24665": "nan", "24670": "nan", "24675": "nan", "24680": "nan", "24685": "nan", "24690": "nan", "24695": "nan", "24700": "nan", "24705": "nan", "24710": "nan", "24715": "nan", "24720": "nan", "24725": "nan", "24730": "nan", "24735": "nan", "24740": "nan", "24745": "nan", "24750": "nan", "24755": "nan", "24760": "nan", "24765": "nan", "24770": "nan", "24775": "nan", "24780": "nan", "24785": "nan", "24790": "nan", "24795": "nan", "24800": "nan", "24805": "nan", "24810": "nan", "24815": "nan", "24820": "nan", "24825": "nan", "24830": "nan", "24835": "nan", "24840": "nan", "24845": "nan", "24850": "nan", "24855": "nan", "24860": "nan", "24865": "nan", "24870": "nan", "24875": "nan", "24880": "nan", "24885": "nan", "24890": "nan", "24895": "nan", "24900": "nan", "24905": "nan", "24910": "nan", "24915": "nan", "24920": "nan", "24925": "nan", "24930": "nan", "24935": "nan", "24940": "nan", "24945": "nan", "24950": "nan", "24955": "nan", "24960": "nan", "24965": "nan", "24970": "nan", "24975": "nan", "24980": "nan", "24985": "nan", "24990": "nan", "24995": "nan", "25000": "nan"}}, "iteration-time": {"start_step": 1, "end_step": 25000, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": "nan", "25": "nan", "30": "nan", "35": "nan", "40": "nan", "45": "nan", "50": "nan", "55": "nan", "60": "nan", "65": "nan", "70": "nan", "75": "nan", "80": "nan", "85": "nan", "90": "nan", "95": "nan", "100": 0.89451, "105": "nan", "110": "nan", "115": "nan", "120": "nan", "125": "nan", "130": "nan", "135": "nan", "140": "nan", "145": "nan", "150": "nan", "155": "nan", "160": "nan", "165": "nan", "170": "nan", "175": "nan", "180": "nan", "185": "nan", "190": "nan", "195": "nan", "200": 0.3366, "205": "nan", "210": "nan", "215": "nan", "220": "nan", "225": "nan", "230": "nan", "235": "nan", "240": "nan", "245": "nan", "250": "nan", "255": "nan", "260": "nan", "265": "nan", "270": "nan", "275": "nan", "280": "nan", "285": "nan", "290": "nan", "295": "nan", "300": 0.34828, "305": "nan", "310": "nan", "315": "nan", "320": "nan", "325": "nan", "330": "nan", "335": "nan", "340": "nan", "345": "nan", "350": "nan", "355": "nan", "360": "nan", "365": "nan", "370": "nan", "375": "nan", "380": "nan", "385": "nan", "390": "nan", "395": "nan", "400": 0.33982, "405": "nan", "410": "nan", "415": "nan", "420": "nan", "425": "nan", "430": "nan", "435": "nan", "440": "nan", "445": "nan", "450": "nan", "455": "nan", "460": "nan", "465": "nan", "470": "nan", "475": "nan", "480": "nan", "485": "nan", "490": "nan", "495": "nan", "500": 0.33692, "505": "nan", "510": "nan", "515": "nan", "520": "nan", "525": "nan", "530": "nan", "535": "nan", "540": "nan", "545": "nan", "550": "nan", "555": "nan", "560": "nan", "565": "nan", "570": "nan", "575": "nan", "580": "nan", "585": "nan", "590": "nan", "595": "nan", "600": 0.34051, "605": "nan", "610": "nan", "615": "nan", "620": "nan", "625": "nan", "630": "nan", "635": "nan", "640": "nan", "645": "nan", "650": "nan", "655": "nan", "660": "nan", "665": "nan", "670": "nan", "675": "nan", "680": "nan", "685": "nan", "690": "nan", "695": "nan", "700": 0.34721, "705": "nan", "710": "nan", "715": "nan", "720": "nan", "725": "nan", "730": "nan", "735": "nan", "740": "nan", "745": "nan", "750": "nan", "755": "nan", "760": "nan", "765": "nan", "770": "nan", "775": "nan", "780": "nan", "785": "nan", "790": "nan", "795": "nan", "800": 0.35055, "805": "nan", "810": "nan", "815": "nan", "820": "nan", "825": "nan", "830": "nan", "835": "nan", "840": "nan", "845": "nan", "850": "nan", "855": "nan", "860": "nan", "865": "nan", "870": "nan", "875": "nan", "880": "nan", "885": "nan", "890": "nan", "895": "nan", "900": 0.34479, "905": "nan", "910": "nan", "915": "nan", "920": "nan", "925": "nan", "930": "nan", "935": "nan", "940": "nan", "945": "nan", "950": "nan", "955": "nan", "960": "nan", "965": "nan", "970": "nan", "975": "nan", "980": "nan", "985": "nan", "990": "nan", "995": "nan", "1000": 0.345, "1005": "nan", "1010": "nan", "1015": "nan", "1020": "nan", "1025": "nan", "1030": "nan", "1035": "nan", "1040": "nan", "1045": "nan", "1050": "nan", "1055": "nan", "1060": "nan", "1065": "nan", "1070": "nan", "1075": "nan", "1080": "nan", "1085": "nan", "1090": "nan", "1095": "nan", "1100": 0.34021, "1105": "nan", "1110": "nan", "1115": "nan", "1120": "nan", "1125": "nan", "1130": "nan", "1135": "nan", "1140": "nan", "1145": "nan", "1150": "nan", "1155": "nan", "1160": "nan", "1165": "nan", "1170": "nan", "1175": "nan", "1180": "nan", "1185": "nan", "1190": "nan", "1195": "nan", "1200": 0.36831, "1205": "nan", "1210": "nan", "1215": "nan", "1220": "nan", "1225": "nan", "1230": "nan", "1235": "nan", "1240": "nan", "1245": "nan", "1250": "nan", "1255": "nan", "1260": "nan", "1265": "nan", "1270": "nan", "1275": "nan", "1280": "nan", "1285": "nan", "1290": "nan", "1295": "nan", "1300": 0.3576, "1305": "nan", "1310": "nan", "1315": "nan", "1320": "nan", "1325": "nan", "1330": "nan", "1335": "nan", "1340": "nan", "1345": "nan", "1350": "nan", "1355": "nan", "1360": "nan", "1365": "nan", "1370": "nan", "1375": "nan", "1380": "nan", "1385": "nan", "1390": "nan", "1395": "nan", "1400": 0.34793, "1405": "nan", "1410": "nan", "1415": "nan", "1420": "nan", "1425": "nan", "1430": "nan", "1435": "nan", "1440": "nan", "1445": "nan", "1450": "nan", "1455": "nan", "1460": "nan", "1465": "nan", "1470": "nan", "1475": "nan", "1480": "nan", "1485": "nan", "1490": "nan", "1495": "nan", "1500": 0.35052, "1505": "nan", "1510": "nan", "1515": "nan", "1520": "nan", "1525": "nan", "1530": "nan", "1535": "nan", "1540": "nan", "1545": "nan", "1550": "nan", "1555": "nan", "1560": "nan", "1565": "nan", "1570": "nan", "1575": "nan", "1580": "nan", "1585": "nan", "1590": "nan", "1595": "nan", "1600": 0.34763, "1605": "nan", "1610": "nan", "1615": "nan", "1620": "nan", "1625": "nan", "1630": "nan", "1635": "nan", "1640": "nan", "1645": "nan", "1650": "nan", "1655": "nan", "1660": "nan", "1665": "nan", "1670": "nan", "1675": "nan", "1680": "nan", "1685": "nan", "1690": "nan", "1695": "nan", "1700": 0.34595, "1705": "nan", "1710": "nan", "1715": "nan", "1720": "nan", "1725": "nan", "1730": "nan", "1735": "nan", "1740": "nan", "1745": "nan", "1750": "nan", "1755": "nan", "1760": "nan", "1765": "nan", "1770": "nan", "1775": "nan", "1780": "nan", "1785": "nan", "1790": "nan", "1795": "nan", "1800": 0.341, "1805": "nan", "1810": "nan", "1815": "nan", "1820": "nan", "1825": "nan", "1830": "nan", "1835": "nan", "1840": "nan", "1845": "nan", "1850": "nan", "1855": "nan", "1860": "nan", "1865": "nan", "1870": "nan", "1875": "nan", "1880": "nan", "1885": "nan", "1890": "nan", "1895": "nan", "1900": 0.34998, "1905": "nan", "1910": "nan", "1915": "nan", "1920": "nan", "1925": "nan", "1930": "nan", "1935": "nan", "1940": "nan", "1945": "nan", "1950": "nan", "1955": "nan", "1960": "nan", "1965": "nan", "1970": "nan", "1975": "nan", "1980": "nan", "1985": "nan", "1990": "nan", "1995": "nan", "2000": 0.34448, "2005": "nan", "2010": "nan", "2015": "nan", "2020": "nan", "2025": "nan", "2030": "nan", "2035": "nan", "2040": "nan", "2045": "nan", "2050": "nan", "2055": "nan", "2060": "nan", "2065": "nan", "2070": "nan", "2075": "nan", "2080": "nan", "2085": "nan", "2090": "nan", "2095": "nan", "2100": 0.35055, "2105": "nan", "2110": "nan", "2115": "nan", "2120": "nan", "2125": "nan", "2130": "nan", "2135": "nan", "2140": "nan", "2145": "nan", "2150": "nan", "2155": "nan", "2160": "nan", "2165": "nan", "2170": "nan", "2175": "nan", "2180": "nan", "2185": "nan", "2190": "nan", "2195": "nan", "2200": 0.33653, "2205": "nan", "2210": "nan", "2215": "nan", "2220": "nan", "2225": "nan", "2230": "nan", "2235": "nan", "2240": "nan", "2245": "nan", "2250": "nan", "2255": "nan", "2260": "nan", "2265": "nan", "2270": "nan", "2275": "nan", "2280": "nan", "2285": "nan", "2290": "nan", "2295": "nan", "2300": 0.34657, "2305": "nan", "2310": "nan", "2315": "nan", "2320": "nan", "2325": "nan", "2330": "nan", "2335": "nan", "2340": "nan", "2345": "nan", "2350": "nan", "2355": "nan", "2360": "nan", "2365": "nan", "2370": "nan", "2375": "nan", "2380": "nan", "2385": "nan", "2390": "nan", "2395": "nan", "2400": 0.34502, "2405": "nan", "2410": "nan", "2415": "nan", "2420": "nan", "2425": "nan", "2430": "nan", "2435": "nan", "2440": "nan", "2445": "nan", "2450": "nan", "2455": "nan", "2460": "nan", "2465": "nan", "2470": "nan", "2475": "nan", "2480": "nan", "2485": "nan", "2490": "nan", "2495": "nan", "2500": 0.34552, "2505": "nan", "2510": "nan", "2515": "nan", "2520": "nan", "2525": "nan", "2530": "nan", "2535": "nan", "2540": "nan", "2545": "nan", "2550": "nan", "2555": "nan", "2560": "nan", "2565": "nan", "2570": "nan", "2575": "nan", "2580": "nan", "2585": "nan", "2590": "nan", "2595": "nan", "2600": 0.3493, "2605": "nan", "2610": "nan", "2615": "nan", "2620": "nan", "2625": "nan", "2630": "nan", "2635": "nan", "2640": "nan", "2645": "nan", "2650": "nan", "2655": "nan", "2660": "nan", "2665": "nan", "2670": "nan", "2675": "nan", "2680": "nan", "2685": "nan", "2690": "nan", "2695": "nan", "2700": 0.34881, "2705": "nan", "2710": "nan", "2715": "nan", "2720": "nan", "2725": "nan", "2730": "nan", "2735": "nan", "2740": "nan", "2745": "nan", "2750": "nan", "2755": "nan", "2760": "nan", "2765": "nan", "2770": "nan", "2775": "nan", "2780": "nan", "2785": "nan", "2790": "nan", "2795": "nan", "2800": 0.35795, "2805": "nan", "2810": "nan", "2815": "nan", "2820": "nan", "2825": "nan", "2830": "nan", "2835": "nan", "2840": "nan", "2845": "nan", "2850": "nan", "2855": "nan", "2860": "nan", "2865": "nan", "2870": "nan", "2875": "nan", "2880": "nan", "2885": "nan", "2890": "nan", "2895": "nan", "2900": 0.34303, "2905": "nan", "2910": "nan", "2915": "nan", "2920": "nan", "2925": "nan", "2930": "nan", "2935": "nan", "2940": "nan", "2945": "nan", "2950": "nan", "2955": "nan", "2960": "nan", "2965": "nan", "2970": "nan", "2975": "nan", "2980": "nan", "2985": "nan", "2990": "nan", "2995": "nan", "3000": 0.34631, "3005": "nan", "3010": "nan", "3015": "nan", "3020": "nan", "3025": "nan", "3030": "nan", "3035": "nan", "3040": "nan", "3045": "nan", "3050": "nan", "3055": "nan", "3060": "nan", "3065": "nan", "3070": "nan", "3075": "nan", "3080": "nan", "3085": "nan", "3090": "nan", "3095": "nan", "3100": 0.33863, "3105": "nan", "3110": "nan", "3115": "nan", "3120": "nan", "3125": "nan", "3130": "nan", "3135": "nan", "3140": "nan", "3145": "nan", "3150": "nan", "3155": "nan", "3160": "nan", "3165": "nan", "3170": "nan", "3175": "nan", "3180": "nan", "3185": "nan", "3190": "nan", "3195": "nan", "3200": 0.3559, "3205": "nan", "3210": "nan", "3215": "nan", "3220": "nan", "3225": "nan", "3230": "nan", "3235": "nan", "3240": "nan", "3245": "nan", "3250": "nan", "3255": "nan", "3260": "nan", "3265": "nan", "3270": "nan", "3275": "nan", "3280": "nan", "3285": "nan", "3290": "nan", "3295": "nan", "3300": 0.34853, "3305": "nan", "3310": "nan", "3315": "nan", "3320": "nan", "3325": "nan", "3330": "nan", "3335": "nan", "3340": "nan", "3345": "nan", "3350": "nan", "3355": "nan", "3360": "nan", "3365": "nan", "3370": "nan", "3375": "nan", "3380": "nan", "3385": "nan", "3390": "nan", "3395": "nan", "3400": 0.35036, "3405": "nan", "3410": "nan", "3415": "nan", "3420": "nan", "3425": "nan", "3430": "nan", "3435": "nan", "3440": "nan", "3445": "nan", "3450": "nan", "3455": "nan", "3460": "nan", "3465": "nan", "3470": "nan", "3475": "nan", "3480": "nan", "3485": "nan", "3490": "nan", "3495": "nan", "3500": 0.35371, "3505": "nan", "3510": "nan", "3515": "nan", "3520": "nan", "3525": "nan", "3530": "nan", "3535": "nan", "3540": "nan", "3545": "nan", "3550": "nan", "3555": "nan", "3560": "nan", "3565": "nan", "3570": "nan", "3575": "nan", "3580": "nan", "3585": "nan", "3590": "nan", "3595": "nan", "3600": 0.35289, "3605": "nan", "3610": "nan", "3615": "nan", "3620": "nan", "3625": "nan", "3630": "nan", "3635": "nan", "3640": "nan", "3645": "nan", "3650": "nan", "3655": "nan", "3660": "nan", "3665": "nan", "3670": "nan", "3675": "nan", "3680": "nan", "3685": "nan", "3690": "nan", "3695": "nan", "3700": 0.34956, "3705": "nan", "3710": "nan", "3715": "nan", "3720": "nan", "3725": "nan", "3730": "nan", "3735": "nan", "3740": "nan", "3745": "nan", "3750": "nan", "3755": "nan", "3760": "nan", "3765": "nan", "3770": "nan", "3775": "nan", "3780": "nan", "3785": "nan", "3790": "nan", "3795": "nan", "3800": 0.34134, "3805": "nan", "3810": "nan", "3815": "nan", "3820": "nan", "3825": "nan", "3830": "nan", "3835": "nan", "3840": "nan", "3845": "nan", "3850": "nan", "3855": "nan", "3860": "nan", "3865": "nan", "3870": "nan", "3875": "nan", "3880": "nan", "3885": "nan", "3890": "nan", "3895": "nan", "3900": 0.34275, "3905": "nan", "3910": "nan", "3915": "nan", "3920": "nan", "3925": "nan", "3930": "nan", "3935": "nan", "3940": "nan", "3945": "nan", "3950": "nan", "3955": "nan", "3960": "nan", "3965": "nan", "3970": "nan", "3975": "nan", "3980": "nan", "3985": "nan", "3990": "nan", "3995": "nan", "4000": 0.34755, "4005": "nan", "4010": "nan", "4015": "nan", "4020": "nan", "4025": "nan", "4030": "nan", "4035": "nan", "4040": "nan", "4045": "nan", "4050": "nan", "4055": "nan", "4060": "nan", "4065": "nan", "4070": "nan", "4075": "nan", "4080": "nan", "4085": "nan", "4090": "nan", "4095": "nan", "4100": 0.33908, "4105": "nan", "4110": "nan", "4115": "nan", "4120": "nan", "4125": "nan", "4130": "nan", "4135": "nan", "4140": "nan", "4145": "nan", "4150": "nan", "4155": "nan", "4160": "nan", "4165": "nan", "4170": "nan", "4175": "nan", "4180": "nan", "4185": "nan", "4190": "nan", "4195": "nan", "4200": 0.34685, "4205": "nan", "4210": "nan", "4215": "nan", "4220": "nan", "4225": "nan", "4230": "nan", "4235": "nan", "4240": "nan", "4245": "nan", "4250": "nan", "4255": "nan", "4260": "nan", "4265": "nan", "4270": "nan", "4275": "nan", "4280": "nan", "4285": "nan", "4290": "nan", "4295": "nan", "4300": 0.34629, "4305": "nan", "4310": "nan", "4315": "nan", "4320": "nan", "4325": "nan", "4330": "nan", "4335": "nan", "4340": "nan", "4345": "nan", "4350": "nan", "4355": "nan", "4360": "nan", "4365": "nan", "4370": "nan", "4375": "nan", "4380": "nan", "4385": "nan", "4390": "nan", "4395": "nan", "4400": 0.33903, "4405": "nan", "4410": "nan", "4415": "nan", "4420": "nan", "4425": "nan", "4430": "nan", "4435": "nan", "4440": "nan", "4445": "nan", "4450": "nan", "4455": "nan", "4460": "nan", "4465": "nan", "4470": "nan", "4475": "nan", "4480": "nan", "4485": "nan", "4490": "nan", "4495": "nan", "4500": 0.33946, "4505": "nan", "4510": "nan", "4515": "nan", "4520": "nan", "4525": "nan", "4530": "nan", "4535": "nan", "4540": "nan", "4545": "nan", "4550": "nan", "4555": "nan", "4560": "nan", "4565": "nan", "4570": "nan", "4575": "nan", "4580": "nan", "4585": "nan", "4590": "nan", "4595": "nan", "4600": 0.34554, "4605": "nan", "4610": "nan", "4615": "nan", "4620": "nan", "4625": "nan", "4630": "nan", "4635": "nan", "4640": "nan", "4645": "nan", "4650": "nan", "4655": "nan", "4660": "nan", "4665": "nan", "4670": "nan", "4675": "nan", "4680": "nan", "4685": "nan", "4690": "nan", "4695": "nan", "4700": 0.33748, "4705": "nan", "4710": "nan", "4715": "nan", "4720": "nan", "4725": "nan", "4730": "nan", "4735": "nan", "4740": "nan", "4745": "nan", "4750": "nan", "4755": "nan", "4760": "nan", "4765": "nan", "4770": "nan", "4775": "nan", "4780": "nan", "4785": "nan", "4790": "nan", "4795": "nan", "4800": 0.33784, "4805": "nan", "4810": "nan", "4815": "nan", "4820": "nan", "4825": "nan", "4830": "nan", "4835": "nan", "4840": "nan", "4845": "nan", "4850": "nan", "4855": "nan", "4860": "nan", "4865": "nan", "4870": "nan", "4875": "nan", "4880": "nan", "4885": "nan", "4890": "nan", "4895": "nan", "4900": 0.3333, "4905": "nan", "4910": "nan", "4915": "nan", "4920": "nan", "4925": "nan", "4930": "nan", "4935": "nan", "4940": "nan", "4945": "nan", "4950": "nan", "4955": "nan", "4960": "nan", "4965": "nan", "4970": "nan", "4975": "nan", "4980": "nan", "4985": "nan", "4990": "nan", "4995": "nan", "5000": 0.33317, "5005": "nan", "5010": "nan", "5015": "nan", "5020": "nan", "5025": "nan", "5030": "nan", "5035": "nan", "5040": "nan", "5045": "nan", "5050": "nan", "5055": "nan", "5060": "nan", "5065": "nan", "5070": "nan", "5075": "nan", "5080": "nan", "5085": "nan", "5090": "nan", "5095": "nan", "5100": 0.33707, "5105": "nan", "5110": "nan", "5115": "nan", "5120": "nan", "5125": "nan", "5130": "nan", "5135": "nan", "5140": "nan", "5145": "nan", "5150": "nan", "5155": "nan", "5160": "nan", "5165": "nan", "5170": "nan", "5175": "nan", "5180": "nan", "5185": "nan", "5190": "nan", "5195": "nan", "5200": 0.3415, "5205": "nan", "5210": "nan", "5215": "nan", "5220": "nan", "5225": "nan", "5230": "nan", "5235": "nan", "5240": "nan", "5245": "nan", "5250": "nan", "5255": "nan", "5260": "nan", "5265": "nan", "5270": "nan", "5275": "nan", "5280": "nan", "5285": "nan", "5290": "nan", "5295": "nan", "5300": 0.32662, "5305": "nan", "5310": "nan", "5315": "nan", "5320": "nan", "5325": "nan", "5330": "nan", "5335": "nan", "5340": "nan", "5345": "nan", "5350": "nan", "5355": "nan", "5360": "nan", "5365": "nan", "5370": "nan", "5375": "nan", "5380": "nan", "5385": "nan", "5390": "nan", "5395": "nan", "5400": 0.34336, "5405": "nan", "5410": "nan", "5415": "nan", "5420": "nan", "5425": "nan", "5430": "nan", "5435": "nan", "5440": "nan", "5445": "nan", "5450": "nan", "5455": "nan", "5460": "nan", "5465": "nan", "5470": "nan", "5475": "nan", "5480": "nan", "5485": "nan", "5490": "nan", "5495": "nan", "5500": 0.33785, "5505": "nan", "5510": "nan", "5515": "nan", "5520": "nan", "5525": "nan", "5530": "nan", "5535": "nan", "5540": "nan", "5545": "nan", "5550": "nan", "5555": "nan", "5560": "nan", "5565": "nan", "5570": "nan", "5575": "nan", "5580": "nan", "5585": "nan", "5590": "nan", "5595": "nan", "5600": 0.35258, "5605": "nan", "5610": "nan", "5615": "nan", "5620": "nan", "5625": "nan", "5630": "nan", "5635": "nan", "5640": "nan", "5645": "nan", "5650": "nan", "5655": "nan", "5660": "nan", "5665": "nan", "5670": "nan", "5675": "nan", "5680": "nan", "5685": "nan", "5690": "nan", "5695": "nan", "5700": 0.3447, "5705": "nan", "5710": "nan", "5715": "nan", "5720": "nan", "5725": "nan", "5730": "nan", "5735": "nan", "5740": "nan", "5745": "nan", "5750": "nan", "5755": "nan", "5760": "nan", "5765": "nan", "5770": "nan", "5775": "nan", "5780": "nan", "5785": "nan", "5790": "nan", "5795": "nan", "5800": 0.33976, "5805": "nan", "5810": "nan", "5815": "nan", "5820": "nan", "5825": "nan", "5830": "nan", "5835": "nan", "5840": "nan", "5845": "nan", "5850": "nan", "5855": "nan", "5860": "nan", "5865": "nan", "5870": "nan", "5875": "nan", "5880": "nan", "5885": "nan", "5890": "nan", "5895": "nan", "5900": 0.33975, "5905": "nan", "5910": "nan", "5915": "nan", "5920": "nan", "5925": "nan", "5930": "nan", "5935": "nan", "5940": "nan", "5945": "nan", "5950": "nan", "5955": "nan", "5960": "nan", "5965": "nan", "5970": "nan", "5975": "nan", "5980": "nan", "5985": "nan", "5990": "nan", "5995": "nan", "6000": 0.33548, "6005": "nan", "6010": "nan", "6015": "nan", "6020": "nan", "6025": "nan", "6030": "nan", "6035": "nan", "6040": "nan", "6045": "nan", "6050": "nan", "6055": "nan", "6060": "nan", "6065": "nan", "6070": "nan", "6075": "nan", "6080": "nan", "6085": "nan", "6090": "nan", "6095": "nan", "6100": 0.3354, "6105": "nan", "6110": "nan", "6115": "nan", "6120": "nan", "6125": "nan", "6130": "nan", "6135": "nan", "6140": "nan", "6145": "nan", "6150": "nan", "6155": "nan", "6160": "nan", "6165": "nan", "6170": "nan", "6175": "nan", "6180": "nan", "6185": "nan", "6190": "nan", "6195": "nan", "6200": 0.33693, "6205": "nan", "6210": "nan", "6215": "nan", "6220": "nan", "6225": "nan", "6230": "nan", "6235": "nan", "6240": "nan", "6245": "nan", "6250": "nan", "6255": "nan", "6260": "nan", "6265": "nan", "6270": "nan", "6275": "nan", "6280": "nan", "6285": "nan", "6290": "nan", "6295": "nan", "6300": 0.34098, "6305": "nan", "6310": "nan", "6315": "nan", "6320": "nan", "6325": "nan", "6330": "nan", "6335": "nan", "6340": "nan", "6345": "nan", "6350": "nan", "6355": "nan", "6360": "nan", "6365": "nan", "6370": "nan", "6375": "nan", "6380": "nan", "6385": "nan", "6390": "nan", "6395": "nan", "6400": 0.34338, "6405": "nan", "6410": "nan", "6415": "nan", "6420": "nan", "6425": "nan", "6430": "nan", "6435": "nan", "6440": "nan", "6445": "nan", "6450": "nan", "6455": "nan", "6460": "nan", "6465": "nan", "6470": "nan", "6475": "nan", "6480": "nan", "6485": "nan", "6490": "nan", "6495": "nan", "6500": 0.34601, "6505": "nan", "6510": "nan", "6515": "nan", "6520": "nan", "6525": "nan", "6530": "nan", "6535": "nan", "6540": "nan", "6545": "nan", "6550": "nan", "6555": "nan", "6560": "nan", "6565": "nan", "6570": "nan", "6575": "nan", "6580": "nan", "6585": "nan", "6590": "nan", "6595": "nan", "6600": 0.35083, "6605": "nan", "6610": "nan", "6615": "nan", "6620": "nan", "6625": "nan", "6630": "nan", "6635": "nan", "6640": "nan", "6645": "nan", "6650": "nan", "6655": "nan", "6660": "nan", "6665": "nan", "6670": "nan", "6675": "nan", "6680": "nan", "6685": "nan", "6690": "nan", "6695": "nan", "6700": 0.3536, "6705": "nan", "6710": "nan", "6715": "nan", "6720": "nan", "6725": "nan", "6730": "nan", "6735": "nan", "6740": "nan", "6745": "nan", "6750": "nan", "6755": "nan", "6760": "nan", "6765": "nan", "6770": "nan", "6775": "nan", "6780": "nan", "6785": "nan", "6790": "nan", "6795": "nan", "6800": 0.35651, "6805": "nan", "6810": "nan", "6815": "nan", "6820": "nan", "6825": "nan", "6830": "nan", "6835": "nan", "6840": "nan", "6845": "nan", "6850": "nan", "6855": "nan", "6860": "nan", "6865": "nan", "6870": "nan", "6875": "nan", "6880": "nan", "6885": "nan", "6890": "nan", "6895": "nan", "6900": 0.34319, "6905": "nan", "6910": "nan", "6915": "nan", "6920": "nan", "6925": "nan", "6930": "nan", "6935": "nan", "6940": "nan", "6945": "nan", "6950": "nan", "6955": "nan", "6960": "nan", "6965": "nan", "6970": "nan", "6975": "nan", "6980": "nan", "6985": "nan", "6990": "nan", "6995": "nan", "7000": 0.34405, "7005": "nan", "7010": "nan", "7015": "nan", "7020": "nan", "7025": "nan", "7030": "nan", "7035": "nan", "7040": "nan", "7045": "nan", "7050": "nan", "7055": "nan", "7060": "nan", "7065": "nan", "7070": "nan", "7075": "nan", "7080": "nan", "7085": "nan", "7090": "nan", "7095": "nan", "7100": 0.33972, "7105": "nan", "7110": "nan", "7115": "nan", "7120": "nan", "7125": "nan", "7130": "nan", "7135": "nan", "7140": "nan", "7145": "nan", "7150": "nan", "7155": "nan", "7160": "nan", "7165": "nan", "7170": "nan", "7175": "nan", "7180": "nan", "7185": "nan", "7190": "nan", "7195": "nan", "7200": 0.3584, "7205": "nan", "7210": "nan", "7215": "nan", "7220": "nan", "7225": "nan", "7230": "nan", "7235": "nan", "7240": "nan", "7245": "nan", "7250": "nan", "7255": "nan", "7260": "nan", "7265": "nan", "7270": "nan", "7275": "nan", "7280": "nan", "7285": "nan", "7290": "nan", "7295": "nan", "7300": 0.34266, "7305": "nan", "7310": "nan", "7315": "nan", "7320": "nan", "7325": "nan", "7330": "nan", "7335": "nan", "7340": "nan", "7345": "nan", "7350": "nan", "7355": "nan", "7360": "nan", "7365": "nan", "7370": "nan", "7375": "nan", "7380": "nan", "7385": "nan", "7390": "nan", "7395": "nan", "7400": 0.3555, "7405": "nan", "7410": "nan", "7415": "nan", "7420": "nan", "7425": "nan", "7430": "nan", "7435": "nan", "7440": "nan", "7445": "nan", "7450": "nan", "7455": "nan", "7460": "nan", "7465": "nan", "7470": "nan", "7475": "nan", "7480": "nan", "7485": "nan", "7490": "nan", "7495": "nan", "7500": 0.34298, "7505": "nan", "7510": "nan", "7515": "nan", "7520": "nan", "7525": "nan", "7530": "nan", "7535": "nan", "7540": "nan", "7545": "nan", "7550": "nan", "7555": "nan", "7560": "nan", "7565": "nan", "7570": "nan", "7575": "nan", "7580": "nan", "7585": "nan", "7590": "nan", "7595": "nan", "7600": 0.34031, "7605": "nan", "7610": "nan", "7615": "nan", "7620": "nan", "7625": "nan", "7630": "nan", "7635": "nan", "7640": "nan", "7645": "nan", "7650": "nan", "7655": "nan", "7660": "nan", "7665": "nan", "7670": "nan", "7675": "nan", "7680": "nan", "7685": "nan", "7690": "nan", "7695": "nan", "7700": 0.34545, "7705": "nan", "7710": "nan", "7715": "nan", "7720": "nan", "7725": "nan", "7730": "nan", "7735": "nan", "7740": "nan", "7745": "nan", "7750": "nan", "7755": "nan", "7760": "nan", "7765": "nan", "7770": "nan", "7775": "nan", "7780": "nan", "7785": "nan", "7790": "nan", "7795": "nan", "7800": 0.3513, "7805": "nan", "7810": "nan", "7815": "nan", "7820": "nan", "7825": "nan", "7830": "nan", "7835": "nan", "7840": "nan", "7845": "nan", "7850": "nan", "7855": "nan", "7860": "nan", "7865": "nan", "7870": "nan", "7875": "nan", "7880": "nan", "7885": "nan", "7890": "nan", "7895": "nan", "7900": 0.35688, "7905": "nan", "7910": "nan", "7915": "nan", "7920": "nan", "7925": "nan", "7930": "nan", "7935": "nan", "7940": "nan", "7945": "nan", "7950": "nan", "7955": "nan", "7960": "nan", "7965": "nan", "7970": "nan", "7975": "nan", "7980": "nan", "7985": "nan", "7990": "nan", "7995": "nan", "8000": 0.34838, "8005": "nan", "8010": "nan", "8015": "nan", "8020": "nan", "8025": "nan", "8030": "nan", "8035": "nan", "8040": "nan", "8045": "nan", "8050": "nan", "8055": "nan", "8060": "nan", "8065": "nan", "8070": "nan", "8075": "nan", "8080": "nan", "8085": "nan", "8090": "nan", "8095": "nan", "8100": 0.35174, "8105": "nan", "8110": "nan", "8115": "nan", "8120": "nan", "8125": "nan", "8130": "nan", "8135": "nan", "8140": "nan", "8145": "nan", "8150": "nan", "8155": "nan", "8160": "nan", "8165": "nan", "8170": "nan", "8175": "nan", "8180": "nan", "8185": "nan", "8190": "nan", "8195": "nan", "8200": 0.34295, "8205": "nan", "8210": "nan", "8215": "nan", "8220": "nan", "8225": "nan", "8230": "nan", "8235": "nan", "8240": "nan", "8245": "nan", "8250": "nan", "8255": "nan", "8260": "nan", "8265": "nan", "8270": "nan", "8275": "nan", "8280": "nan", "8285": "nan", "8290": "nan", "8295": "nan", "8300": 0.3485, "8305": "nan", "8310": "nan", "8315": "nan", "8320": "nan", "8325": "nan", "8330": "nan", "8335": "nan", "8340": "nan", "8345": "nan", "8350": "nan", "8355": "nan", "8360": "nan", "8365": "nan", "8370": "nan", "8375": "nan", "8380": "nan", "8385": "nan", "8390": "nan", "8395": "nan", "8400": 0.35133, "8405": "nan", "8410": "nan", "8415": "nan", "8420": "nan", "8425": "nan", "8430": "nan", "8435": "nan", "8440": "nan", "8445": "nan", "8450": "nan", "8455": "nan", "8460": "nan", "8465": "nan", "8470": "nan", "8475": "nan", "8480": "nan", "8485": "nan", "8490": "nan", "8495": "nan", "8500": 0.34243, "8505": "nan", "8510": "nan", "8515": "nan", "8520": "nan", "8525": "nan", "8530": "nan", "8535": "nan", "8540": "nan", "8545": "nan", "8550": "nan", "8555": "nan", "8560": "nan", "8565": "nan", "8570": "nan", "8575": "nan", "8580": "nan", "8585": "nan", "8590": "nan", "8595": "nan", "8600": 0.3353, "8605": "nan", "8610": "nan", "8615": "nan", "8620": "nan", "8625": "nan", "8630": "nan", "8635": "nan", "8640": "nan", "8645": "nan", "8650": "nan", "8655": "nan", "8660": "nan", "8665": "nan", "8670": "nan", "8675": "nan", "8680": "nan", "8685": "nan", "8690": "nan", "8695": "nan", "8700": 0.3475, "8705": "nan", "8710": "nan", "8715": "nan", "8720": "nan", "8725": "nan", "8730": "nan", "8735": "nan", "8740": "nan", "8745": "nan", "8750": "nan", "8755": "nan", "8760": "nan", "8765": "nan", "8770": "nan", "8775": "nan", "8780": "nan", "8785": "nan", "8790": "nan", "8795": "nan", "8800": 0.34708, "8805": "nan", "8810": "nan", "8815": "nan", "8820": "nan", "8825": "nan", "8830": "nan", "8835": "nan", "8840": "nan", "8845": "nan", "8850": "nan", "8855": "nan", "8860": "nan", "8865": "nan", "8870": "nan", "8875": "nan", "8880": "nan", "8885": "nan", "8890": "nan", "8895": "nan", "8900": 0.36238, "8905": "nan", "8910": "nan", "8915": "nan", "8920": "nan", "8925": "nan", "8930": "nan", "8935": "nan", "8940": "nan", "8945": "nan", "8950": "nan", "8955": "nan", "8960": "nan", "8965": "nan", "8970": "nan", "8975": "nan", "8980": "nan", "8985": "nan", "8990": "nan", "8995": "nan", "9000": 0.35474, "9005": "nan", "9010": "nan", "9015": "nan", "9020": "nan", "9025": "nan", "9030": "nan", "9035": "nan", "9040": "nan", "9045": "nan", "9050": "nan", "9055": "nan", "9060": "nan", "9065": "nan", "9070": "nan", "9075": "nan", "9080": "nan", "9085": "nan", "9090": "nan", "9095": "nan", "9100": 0.3526, "9105": "nan", "9110": "nan", "9115": "nan", "9120": "nan", "9125": "nan", "9130": "nan", "9135": "nan", "9140": "nan", "9145": "nan", "9150": "nan", "9155": "nan", "9160": "nan", "9165": "nan", "9170": "nan", "9175": "nan", "9180": "nan", "9185": "nan", "9190": "nan", "9195": "nan", "9200": 0.34123, "9205": "nan", "9210": "nan", "9215": "nan", "9220": "nan", "9225": "nan", "9230": "nan", "9235": "nan", "9240": "nan", "9245": "nan", "9250": "nan", "9255": "nan", "9260": "nan", "9265": "nan", "9270": "nan", "9275": "nan", "9280": "nan", "9285": "nan", "9290": "nan", "9295": "nan", "9300": 0.34814, "9305": "nan", "9310": "nan", "9315": "nan", "9320": "nan", "9325": "nan", "9330": "nan", "9335": "nan", "9340": "nan", "9345": "nan", "9350": "nan", "9355": "nan", "9360": "nan", "9365": "nan", "9370": "nan", "9375": "nan", "9380": "nan", "9385": "nan", "9390": "nan", "9395": "nan", "9400": 0.33745, "9405": "nan", "9410": "nan", "9415": "nan", "9420": "nan", "9425": "nan", "9430": "nan", "9435": "nan", "9440": "nan", "9445": "nan", "9450": "nan", "9455": "nan", "9460": "nan", "9465": "nan", "9470": "nan", "9475": "nan", "9480": "nan", "9485": "nan", "9490": "nan", "9495": "nan", "9500": 0.34262, "9505": "nan", "9510": "nan", "9515": "nan", "9520": "nan", "9525": "nan", "9530": "nan", "9535": "nan", "9540": "nan", "9545": "nan", "9550": "nan", "9555": "nan", "9560": "nan", "9565": "nan", "9570": "nan", "9575": "nan", "9580": "nan", "9585": "nan", "9590": "nan", "9595": "nan", "9600": 0.34471, "9605": "nan", "9610": "nan", "9615": "nan", "9620": "nan", "9625": "nan", "9630": "nan", "9635": "nan", "9640": "nan", "9645": "nan", "9650": "nan", "9655": "nan", "9660": "nan", "9665": "nan", "9670": "nan", "9675": "nan", "9680": "nan", "9685": "nan", "9690": "nan", "9695": "nan", "9700": 0.35302, "9705": "nan", "9710": "nan", "9715": "nan", "9720": "nan", "9725": "nan", "9730": "nan", "9735": "nan", "9740": "nan", "9745": "nan", "9750": "nan", "9755": "nan", "9760": "nan", "9765": "nan", "9770": "nan", "9775": "nan", "9780": "nan", "9785": "nan", "9790": "nan", "9795": "nan", "9800": 0.3499, "9805": "nan", "9810": "nan", "9815": "nan", "9820": "nan", "9825": "nan", "9830": "nan", "9835": "nan", "9840": "nan", "9845": "nan", "9850": "nan", "9855": "nan", "9860": "nan", "9865": "nan", "9870": "nan", "9875": "nan", "9880": "nan", "9885": "nan", "9890": "nan", "9895": "nan", "9900": 0.35103, "9905": "nan", "9910": "nan", "9915": "nan", "9920": "nan", "9925": "nan", "9930": "nan", "9935": "nan", "9940": "nan", "9945": "nan", "9950": "nan", "9955": "nan", "9960": "nan", "9965": "nan", "9970": "nan", "9975": "nan", "9980": "nan", "9985": "nan", "9990": "nan", "9995": "nan", "10000": 0.34747, "10005": "nan", "10010": "nan", "10015": "nan", "10020": "nan", "10025": "nan", "10030": "nan", "10035": "nan", "10040": "nan", "10045": "nan", "10050": "nan", "10055": "nan", "10060": "nan", "10065": "nan", "10070": "nan", "10075": "nan", "10080": "nan", "10085": "nan", "10090": "nan", "10095": "nan", "10100": 0.34992, "10105": "nan", "10110": "nan", "10115": "nan", "10120": "nan", "10125": "nan", "10130": "nan", "10135": "nan", "10140": "nan", "10145": "nan", "10150": "nan", "10155": "nan", "10160": "nan", "10165": "nan", "10170": "nan", "10175": "nan", "10180": "nan", "10185": "nan", "10190": "nan", "10195": "nan", "10200": 0.34397, "10205": "nan", "10210": "nan", "10215": "nan", "10220": "nan", "10225": "nan", "10230": "nan", "10235": "nan", "10240": "nan", "10245": "nan", "10250": "nan", "10255": "nan", "10260": "nan", "10265": "nan", "10270": "nan", "10275": "nan", "10280": "nan", "10285": "nan", "10290": "nan", "10295": "nan", "10300": 0.34528, "10305": "nan", "10310": "nan", "10315": "nan", "10320": "nan", "10325": "nan", "10330": "nan", "10335": "nan", "10340": "nan", "10345": "nan", "10350": "nan", "10355": "nan", "10360": "nan", "10365": "nan", "10370": "nan", "10375": "nan", "10380": "nan", "10385": "nan", "10390": "nan", "10395": "nan", "10400": 0.33426, "10405": "nan", "10410": "nan", "10415": "nan", "10420": "nan", "10425": "nan", "10430": "nan", "10435": "nan", "10440": "nan", "10445": "nan", "10450": "nan", "10455": "nan", "10460": "nan", "10465": "nan", "10470": "nan", "10475": "nan", "10480": "nan", "10485": "nan", "10490": "nan", "10495": "nan", "10500": 0.34663, "10505": "nan", "10510": "nan", "10515": "nan", "10520": "nan", "10525": "nan", "10530": "nan", "10535": "nan", "10540": "nan", "10545": "nan", "10550": "nan", "10555": "nan", "10560": "nan", "10565": "nan", "10570": "nan", "10575": "nan", "10580": "nan", "10585": "nan", "10590": "nan", "10595": "nan", "10600": 0.3571, "10605": "nan", "10610": "nan", "10615": "nan", "10620": "nan", "10625": "nan", "10630": "nan", "10635": "nan", "10640": "nan", "10645": "nan", "10650": "nan", "10655": "nan", "10660": "nan", "10665": "nan", "10670": "nan", "10675": "nan", "10680": "nan", "10685": "nan", "10690": "nan", "10695": "nan", "10700": 0.3446, "10705": "nan", "10710": "nan", "10715": "nan", "10720": "nan", "10725": "nan", "10730": "nan", "10735": "nan", "10740": "nan", "10745": "nan", "10750": "nan", "10755": "nan", "10760": "nan", "10765": "nan", "10770": "nan", "10775": "nan", "10780": "nan", "10785": "nan", "10790": "nan", "10795": "nan", "10800": 0.37022, "10805": "nan", "10810": "nan", "10815": "nan", "10820": "nan", "10825": "nan", "10830": "nan", "10835": "nan", "10840": "nan", "10845": "nan", "10850": "nan", "10855": "nan", "10860": "nan", "10865": "nan", "10870": "nan", "10875": "nan", "10880": "nan", "10885": "nan", "10890": "nan", "10895": "nan", "10900": 0.3379, "10905": "nan", "10910": "nan", "10915": "nan", "10920": "nan", "10925": "nan", "10930": "nan", "10935": "nan", "10940": "nan", "10945": "nan", "10950": "nan", "10955": "nan", "10960": "nan", "10965": "nan", "10970": "nan", "10975": "nan", "10980": "nan", "10985": "nan", "10990": "nan", "10995": "nan", "11000": 0.34316, "11005": "nan", "11010": "nan", "11015": "nan", "11020": "nan", "11025": "nan", "11030": "nan", "11035": "nan", "11040": "nan", "11045": "nan", "11050": "nan", "11055": "nan", "11060": "nan", "11065": "nan", "11070": "nan", "11075": "nan", "11080": "nan", "11085": "nan", "11090": "nan", "11095": "nan", "11100": 0.34369, "11105": "nan", "11110": "nan", "11115": "nan", "11120": "nan", "11125": "nan", "11130": "nan", "11135": "nan", "11140": "nan", "11145": "nan", "11150": "nan", "11155": "nan", "11160": "nan", "11165": "nan", "11170": "nan", "11175": "nan", "11180": "nan", "11185": "nan", "11190": "nan", "11195": "nan", "11200": 0.33693, "11205": "nan", "11210": "nan", "11215": "nan", "11220": "nan", "11225": "nan", "11230": "nan", "11235": "nan", "11240": "nan", "11245": "nan", "11250": "nan", "11255": "nan", "11260": "nan", "11265": "nan", "11270": "nan", "11275": "nan", "11280": "nan", "11285": "nan", "11290": "nan", "11295": "nan", "11300": 0.34654, "11305": "nan", "11310": "nan", "11315": "nan", "11320": "nan", "11325": "nan", "11330": "nan", "11335": "nan", "11340": "nan", "11345": "nan", "11350": "nan", "11355": "nan", "11360": "nan", "11365": "nan", "11370": "nan", "11375": "nan", "11380": "nan", "11385": "nan", "11390": "nan", "11395": "nan", "11400": 0.34447, "11405": "nan", "11410": "nan", "11415": "nan", "11420": "nan", "11425": "nan", "11430": "nan", "11435": "nan", "11440": "nan", "11445": "nan", "11450": "nan", "11455": "nan", "11460": "nan", "11465": "nan", "11470": "nan", "11475": "nan", "11480": "nan", "11485": "nan", "11490": "nan", "11495": "nan", "11500": 0.3372, "11505": "nan", "11510": "nan", "11515": "nan", "11520": "nan", "11525": "nan", "11530": "nan", "11535": "nan", "11540": "nan", "11545": "nan", "11550": "nan", "11555": "nan", "11560": "nan", "11565": "nan", "11570": "nan", "11575": "nan", "11580": "nan", "11585": "nan", "11590": "nan", "11595": "nan", "11600": 0.33546, "11605": "nan", "11610": "nan", "11615": "nan", "11620": "nan", "11625": "nan", "11630": "nan", "11635": "nan", "11640": "nan", "11645": "nan", "11650": "nan", "11655": "nan", "11660": "nan", "11665": "nan", "11670": "nan", "11675": "nan", "11680": "nan", "11685": "nan", "11690": "nan", "11695": "nan", "11700": 0.35039, "11705": "nan", "11710": "nan", "11715": "nan", "11720": "nan", "11725": "nan", "11730": "nan", "11735": "nan", "11740": "nan", "11745": "nan", "11750": "nan", "11755": "nan", "11760": "nan", "11765": "nan", "11770": "nan", "11775": "nan", "11780": "nan", "11785": "nan", "11790": "nan", "11795": "nan", "11800": 0.33743, "11805": "nan", "11810": "nan", "11815": "nan", "11820": "nan", "11825": "nan", "11830": "nan", "11835": "nan", "11840": "nan", "11845": "nan", "11850": "nan", "11855": "nan", "11860": "nan", "11865": "nan", "11870": "nan", "11875": "nan", "11880": "nan", "11885": "nan", "11890": "nan", "11895": "nan", "11900": 0.3508, "11905": "nan", "11910": "nan", "11915": "nan", "11920": "nan", "11925": "nan", "11930": "nan", "11935": "nan", "11940": "nan", "11945": "nan", "11950": "nan", "11955": "nan", "11960": "nan", "11965": "nan", "11970": "nan", "11975": "nan", "11980": "nan", "11985": "nan", "11990": "nan", "11995": "nan", "12000": 0.33876, "12005": "nan", "12010": "nan", "12015": "nan", "12020": "nan", "12025": "nan", "12030": "nan", "12035": "nan", "12040": "nan", "12045": "nan", "12050": "nan", "12055": "nan", "12060": "nan", "12065": "nan", "12070": "nan", "12075": "nan", "12080": "nan", "12085": "nan", "12090": "nan", "12095": "nan", "12100": 0.34625, "12105": "nan", "12110": "nan", "12115": "nan", "12120": "nan", "12125": "nan", "12130": "nan", "12135": "nan", "12140": "nan", "12145": "nan", "12150": "nan", "12155": "nan", "12160": "nan", "12165": "nan", "12170": "nan", "12175": "nan", "12180": "nan", "12185": "nan", "12190": "nan", "12195": "nan", "12200": 0.34482, "12205": "nan", "12210": "nan", "12215": "nan", "12220": "nan", "12225": "nan", "12230": "nan", "12235": "nan", "12240": "nan", "12245": "nan", "12250": "nan", "12255": "nan", "12260": "nan", "12265": "nan", "12270": "nan", "12275": "nan", "12280": "nan", "12285": "nan", "12290": "nan", "12295": "nan", "12300": 0.33683, "12305": "nan", "12310": "nan", "12315": "nan", "12320": "nan", "12325": "nan", "12330": "nan", "12335": "nan", "12340": "nan", "12345": "nan", "12350": "nan", "12355": "nan", "12360": "nan", "12365": "nan", "12370": "nan", "12375": "nan", "12380": "nan", "12385": "nan", "12390": "nan", "12395": "nan", "12400": 0.35622, "12405": "nan", "12410": "nan", "12415": "nan", "12420": "nan", "12425": "nan", "12430": "nan", "12435": "nan", "12440": "nan", "12445": "nan", "12450": "nan", "12455": "nan", "12460": "nan", "12465": "nan", "12470": "nan", "12475": "nan", "12480": "nan", "12485": "nan", "12490": "nan", "12495": "nan", "12500": 0.3427, "12505": "nan", "12510": "nan", "12515": "nan", "12520": "nan", "12525": "nan", "12530": "nan", "12535": "nan", "12540": "nan", "12545": "nan", "12550": "nan", "12555": "nan", "12560": "nan", "12565": "nan", "12570": "nan", "12575": "nan", "12580": "nan", "12585": "nan", "12590": "nan", "12595": "nan", "12600": 0.34141, "12605": "nan", "12610": "nan", "12615": "nan", "12620": "nan", "12625": "nan", "12630": "nan", "12635": "nan", "12640": "nan", "12645": "nan", "12650": "nan", "12655": "nan", "12660": "nan", "12665": "nan", "12670": "nan", "12675": "nan", "12680": "nan", "12685": "nan", "12690": "nan", "12695": "nan", "12700": 0.34086, "12705": "nan", "12710": "nan", "12715": "nan", "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": 0.34462, "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": 0.34945, "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": 0.34618, "13005": "nan", "13010": "nan", "13015": "nan", "13020": "nan", "13025": "nan", "13030": "nan", "13035": "nan", "13040": "nan", "13045": "nan", "13050": "nan", "13055": "nan", "13060": "nan", "13065": "nan", "13070": "nan", "13075": "nan", "13080": "nan", "13085": "nan", "13090": "nan", "13095": "nan", "13100": 0.34852, "13105": "nan", "13110": "nan", "13115": "nan", "13120": "nan", "13125": "nan", "13130": "nan", "13135": "nan", "13140": "nan", "13145": "nan", "13150": "nan", "13155": "nan", "13160": "nan", "13165": "nan", "13170": "nan", "13175": "nan", "13180": "nan", "13185": "nan", "13190": "nan", "13195": "nan", "13200": 0.34315, "13205": "nan", "13210": "nan", "13215": "nan", "13220": "nan", "13225": "nan", "13230": "nan", "13235": "nan", "13240": "nan", "13245": "nan", "13250": "nan", "13255": "nan", "13260": "nan", "13265": "nan", "13270": "nan", "13275": "nan", "13280": "nan", "13285": "nan", "13290": "nan", "13295": "nan", "13300": 0.34491, "13305": "nan", "13310": "nan", "13315": "nan", "13320": "nan", "13325": "nan", "13330": "nan", "13335": "nan", "13340": "nan", "13345": "nan", "13350": "nan", "13355": "nan", "13360": "nan", "13365": "nan", "13370": "nan", "13375": "nan", "13380": "nan", "13385": "nan", "13390": "nan", "13395": "nan", "13400": 0.34835, "13405": "nan", "13410": "nan", "13415": "nan", "13420": "nan", "13425": "nan", "13430": "nan", "13435": "nan", "13440": "nan", "13445": "nan", "13450": "nan", "13455": "nan", "13460": "nan", "13465": "nan", "13470": "nan", "13475": "nan", "13480": "nan", "13485": "nan", "13490": "nan", "13495": "nan", "13500": 0.34281, "13505": "nan", "13510": "nan", "13515": "nan", "13520": "nan", "13525": "nan", "13530": "nan", "13535": "nan", "13540": "nan", "13545": "nan", "13550": "nan", "13555": "nan", "13560": "nan", "13565": "nan", "13570": "nan", "13575": "nan", "13580": "nan", "13585": "nan", "13590": "nan", "13595": "nan", "13600": 0.35384, "13605": "nan", "13610": "nan", "13615": "nan", "13620": "nan", "13625": "nan", "13630": "nan", "13635": "nan", "13640": "nan", "13645": "nan", "13650": "nan", "13655": "nan", "13660": "nan", "13665": "nan", "13670": "nan", "13675": "nan", "13680": "nan", "13685": "nan", "13690": "nan", "13695": "nan", "13700": 0.34594, "13705": "nan", "13710": "nan", "13715": "nan", "13720": "nan", "13725": "nan", "13730": "nan", "13735": "nan", "13740": "nan", "13745": "nan", "13750": "nan", "13755": "nan", "13760": "nan", "13765": "nan", "13770": "nan", "13775": "nan", "13780": "nan", "13785": "nan", "13790": "nan", "13795": "nan", "13800": 0.34222, "13805": "nan", "13810": "nan", "13815": "nan", "13820": "nan", "13825": "nan", "13830": "nan", "13835": "nan", "13840": "nan", "13845": "nan", "13850": "nan", "13855": "nan", "13860": "nan", "13865": "nan", "13870": "nan", "13875": "nan", "13880": "nan", "13885": "nan", "13890": "nan", "13895": "nan", "13900": 0.34618, "13905": "nan", "13910": "nan", "13915": "nan", "13920": "nan", "13925": "nan", "13930": "nan", "13935": "nan", "13940": "nan", "13945": "nan", "13950": "nan", "13955": "nan", "13960": "nan", "13965": "nan", "13970": "nan", "13975": "nan", "13980": "nan", "13985": "nan", "13990": "nan", "13995": "nan", "14000": 0.34336, "14005": "nan", "14010": "nan", "14015": "nan", "14020": "nan", "14025": "nan", "14030": "nan", "14035": "nan", "14040": "nan", "14045": "nan", "14050": "nan", "14055": "nan", "14060": "nan", "14065": "nan", "14070": "nan", "14075": "nan", "14080": "nan", "14085": "nan", "14090": "nan", "14095": "nan", "14100": 0.34495, "14105": "nan", "14110": "nan", "14115": "nan", "14120": "nan", "14125": "nan", "14130": "nan", "14135": "nan", "14140": "nan", "14145": "nan", "14150": "nan", "14155": "nan", "14160": "nan", "14165": "nan", "14170": "nan", "14175": "nan", "14180": "nan", "14185": "nan", "14190": "nan", "14195": "nan", "14200": 0.33531, "14205": "nan", "14210": "nan", "14215": "nan", "14220": "nan", "14225": "nan", "14230": "nan", "14235": "nan", "14240": "nan", "14245": "nan", "14250": "nan", "14255": "nan", "14260": "nan", "14265": "nan", "14270": "nan", "14275": "nan", "14280": "nan", "14285": "nan", "14290": "nan", "14295": "nan", "14300": 0.3344, "14305": "nan", "14310": "nan", "14315": "nan", "14320": "nan", "14325": "nan", "14330": "nan", "14335": "nan", "14340": "nan", "14345": "nan", "14350": "nan", "14355": "nan", "14360": "nan", "14365": "nan", "14370": "nan", "14375": "nan", "14380": "nan", "14385": "nan", "14390": "nan", "14395": "nan", "14400": 0.34732, "14405": "nan", "14410": "nan", "14415": "nan", "14420": "nan", "14425": "nan", "14430": "nan", "14435": "nan", "14440": "nan", "14445": "nan", "14450": "nan", "14455": "nan", "14460": "nan", "14465": "nan", "14470": "nan", "14475": "nan", "14480": "nan", "14485": "nan", "14490": "nan", "14495": "nan", "14500": 0.33925, "14505": "nan", "14510": "nan", "14515": "nan", "14520": "nan", "14525": "nan", "14530": "nan", "14535": "nan", "14540": "nan", "14545": "nan", "14550": "nan", "14555": "nan", "14560": "nan", "14565": "nan", "14570": "nan", "14575": "nan", "14580": "nan", "14585": "nan", "14590": "nan", "14595": "nan", "14600": 0.34014, "14605": "nan", "14610": "nan", "14615": "nan", "14620": "nan", "14625": "nan", "14630": "nan", "14635": "nan", "14640": "nan", "14645": "nan", "14650": "nan", "14655": "nan", "14660": "nan", "14665": "nan", "14670": "nan", "14675": "nan", "14680": "nan", "14685": "nan", "14690": "nan", "14695": "nan", "14700": 0.34128, "14705": "nan", "14710": "nan", "14715": "nan", "14720": "nan", "14725": "nan", "14730": "nan", "14735": "nan", "14740": "nan", "14745": "nan", "14750": "nan", "14755": "nan", "14760": "nan", "14765": "nan", "14770": "nan", "14775": "nan", "14780": "nan", "14785": "nan", "14790": "nan", "14795": "nan", "14800": 0.34873, "14805": "nan", "14810": "nan", "14815": "nan", "14820": "nan", "14825": "nan", "14830": "nan", "14835": "nan", "14840": "nan", "14845": "nan", "14850": "nan", "14855": "nan", "14860": "nan", "14865": "nan", "14870": "nan", "14875": "nan", "14880": "nan", "14885": "nan", "14890": "nan", "14895": "nan", "14900": 0.35357, "14905": "nan", "14910": "nan", "14915": "nan", "14920": "nan", "14925": "nan", "14930": "nan", "14935": "nan", "14940": "nan", "14945": "nan", "14950": "nan", "14955": "nan", "14960": "nan", "14965": "nan", "14970": "nan", "14975": "nan", "14980": "nan", "14985": "nan", "14990": "nan", "14995": "nan", "15000": 0.35193, "15005": "nan", "15010": "nan", "15015": "nan", "15020": "nan", "15025": "nan", "15030": "nan", "15035": "nan", "15040": "nan", "15045": "nan", "15050": "nan", "15055": "nan", "15060": "nan", "15065": "nan", "15070": "nan", "15075": "nan", "15080": "nan", "15085": "nan", "15090": "nan", "15095": "nan", "15100": 0.34434, "15105": "nan", "15110": "nan", "15115": "nan", "15120": "nan", "15125": "nan", "15130": "nan", "15135": "nan", "15140": "nan", "15145": "nan", "15150": "nan", "15155": "nan", "15160": "nan", "15165": "nan", "15170": "nan", "15175": "nan", "15180": "nan", "15185": "nan", "15190": "nan", "15195": "nan", "15200": 0.34422, "15205": "nan", "15210": "nan", "15215": "nan", "15220": "nan", "15225": "nan", "15230": "nan", "15235": "nan", "15240": "nan", "15245": "nan", "15250": "nan", "15255": "nan", "15260": "nan", "15265": "nan", "15270": "nan", "15275": "nan", "15280": "nan", "15285": "nan", "15290": "nan", "15295": "nan", "15300": 0.3426, "15305": "nan", "15310": "nan", "15315": "nan", "15320": "nan", "15325": "nan", "15330": "nan", "15335": "nan", "15340": "nan", "15345": "nan", "15350": "nan", "15355": "nan", "15360": "nan", "15365": "nan", "15370": "nan", "15375": "nan", "15380": "nan", "15385": "nan", "15390": "nan", "15395": "nan", "15400": 0.33816, "15405": "nan", "15410": "nan", "15415": "nan", "15420": "nan", "15425": "nan", "15430": "nan", "15435": "nan", "15440": "nan", "15445": "nan", "15450": "nan", "15455": "nan", "15460": "nan", "15465": "nan", "15470": "nan", "15475": "nan", "15480": "nan", "15485": "nan", "15490": "nan", "15495": "nan", "15500": 0.34503, "15505": "nan", "15510": "nan", "15515": "nan", "15520": "nan", "15525": "nan", "15530": "nan", "15535": "nan", "15540": "nan", "15545": "nan", "15550": "nan", "15555": "nan", "15560": "nan", "15565": "nan", "15570": "nan", "15575": "nan", "15580": "nan", "15585": "nan", "15590": "nan", "15595": "nan", "15600": 0.34602, "15605": "nan", "15610": "nan", "15615": "nan", "15620": "nan", "15625": "nan", "15630": "nan", "15635": "nan", "15640": "nan", "15645": "nan", "15650": "nan", "15655": "nan", "15660": "nan", "15665": "nan", "15670": "nan", "15675": "nan", "15680": "nan", "15685": "nan", "15690": "nan", "15695": "nan", "15700": 0.33829, "15705": "nan", "15710": "nan", "15715": "nan", "15720": "nan", "15725": "nan", "15730": "nan", "15735": "nan", "15740": "nan", "15745": "nan", "15750": "nan", "15755": "nan", "15760": "nan", "15765": "nan", "15770": "nan", "15775": "nan", "15780": "nan", "15785": "nan", "15790": "nan", "15795": "nan", "15800": 0.34816, "15805": "nan", "15810": "nan", "15815": "nan", "15820": "nan", "15825": "nan", "15830": "nan", "15835": "nan", "15840": "nan", "15845": "nan", "15850": "nan", "15855": "nan", "15860": "nan", "15865": "nan", "15870": "nan", "15875": "nan", "15880": "nan", "15885": "nan", "15890": "nan", "15895": "nan", "15900": 0.33962, "15905": "nan", "15910": "nan", "15915": "nan", "15920": "nan", "15925": "nan", "15930": "nan", "15935": "nan", "15940": "nan", "15945": "nan", "15950": "nan", "15955": "nan", "15960": "nan", "15965": "nan", "15970": "nan", "15975": "nan", "15980": "nan", "15985": "nan", "15990": "nan", "15995": "nan", "16000": 0.35952, "16005": "nan", "16010": "nan", "16015": "nan", "16020": "nan", "16025": "nan", "16030": "nan", "16035": "nan", "16040": "nan", "16045": "nan", "16050": "nan", "16055": "nan", "16060": "nan", "16065": "nan", "16070": "nan", "16075": "nan", "16080": "nan", "16085": "nan", "16090": "nan", "16095": "nan", "16100": 0.33635, "16105": "nan", "16110": "nan", "16115": "nan", "16120": "nan", "16125": "nan", "16130": "nan", "16135": "nan", "16140": "nan", "16145": "nan", "16150": "nan", "16155": "nan", "16160": "nan", "16165": "nan", "16170": "nan", "16175": "nan", "16180": "nan", "16185": "nan", "16190": "nan", "16195": "nan", "16200": 0.33479, "16205": "nan", "16210": "nan", "16215": "nan", "16220": "nan", "16225": "nan", "16230": "nan", "16235": "nan", "16240": "nan", "16245": "nan", "16250": "nan", "16255": "nan", "16260": "nan", "16265": "nan", "16270": "nan", "16275": "nan", "16280": "nan", "16285": "nan", "16290": "nan", "16295": "nan", "16300": 0.34353, "16305": "nan", "16310": "nan", "16315": "nan", "16320": "nan", "16325": "nan", "16330": "nan", "16335": "nan", "16340": "nan", "16345": "nan", "16350": "nan", "16355": "nan", "16360": "nan", "16365": "nan", "16370": "nan", "16375": "nan", "16380": "nan", "16385": "nan", "16390": "nan", "16395": "nan", "16400": 0.34664, "16405": "nan", "16410": "nan", "16415": "nan", "16420": "nan", "16425": "nan", "16430": "nan", "16435": "nan", "16440": "nan", "16445": "nan", "16450": "nan", "16455": "nan", "16460": "nan", "16465": "nan", "16470": "nan", "16475": "nan", "16480": "nan", "16485": "nan", "16490": "nan", "16495": "nan", "16500": 0.33645, "16505": "nan", "16510": "nan", "16515": "nan", "16520": "nan", "16525": "nan", "16530": "nan", "16535": "nan", "16540": "nan", "16545": "nan", "16550": "nan", "16555": "nan", "16560": "nan", "16565": "nan", "16570": "nan", "16575": "nan", "16580": "nan", "16585": "nan", "16590": "nan", "16595": "nan", "16600": 0.33585, "16605": "nan", "16610": "nan", "16615": "nan", "16620": "nan", "16625": "nan", "16630": "nan", "16635": "nan", "16640": "nan", "16645": "nan", "16650": "nan", "16655": "nan", "16660": "nan", "16665": "nan", "16670": "nan", "16675": "nan", "16680": "nan", "16685": "nan", "16690": "nan", "16695": "nan", "16700": 0.33605, "16705": "nan", "16710": "nan", "16715": "nan", "16720": "nan", "16725": "nan", "16730": "nan", "16735": "nan", "16740": "nan", "16745": "nan", "16750": "nan", "16755": "nan", "16760": "nan", "16765": "nan", "16770": "nan", "16775": "nan", "16780": "nan", "16785": "nan", "16790": "nan", "16795": "nan", "16800": 0.34357, "16805": "nan", "16810": "nan", "16815": "nan", "16820": "nan", "16825": "nan", "16830": "nan", "16835": "nan", "16840": "nan", "16845": "nan", "16850": "nan", "16855": "nan", "16860": "nan", "16865": "nan", "16870": "nan", "16875": "nan", "16880": "nan", "16885": "nan", "16890": "nan", "16895": "nan", "16900": 0.34793, "16905": "nan", "16910": "nan", "16915": "nan", "16920": "nan", "16925": "nan", "16930": "nan", "16935": "nan", "16940": "nan", "16945": "nan", "16950": "nan", "16955": "nan", "16960": "nan", "16965": "nan", "16970": "nan", "16975": "nan", "16980": "nan", "16985": "nan", "16990": "nan", "16995": "nan", "17000": 0.33832, "17005": "nan", "17010": "nan", "17015": "nan", "17020": "nan", "17025": "nan", "17030": "nan", "17035": "nan", "17040": "nan", "17045": "nan", "17050": "nan", "17055": "nan", "17060": "nan", "17065": "nan", "17070": "nan", "17075": "nan", "17080": "nan", "17085": "nan", "17090": "nan", "17095": "nan", "17100": 0.34075, "17105": "nan", "17110": "nan", "17115": "nan", "17120": "nan", "17125": "nan", "17130": "nan", "17135": "nan", "17140": "nan", "17145": "nan", "17150": "nan", "17155": "nan", "17160": "nan", "17165": "nan", "17170": "nan", "17175": "nan", "17180": "nan", "17185": "nan", "17190": "nan", "17195": "nan", "17200": 0.33808, "17205": "nan", "17210": "nan", "17215": "nan", "17220": "nan", "17225": "nan", "17230": "nan", "17235": "nan", "17240": "nan", "17245": "nan", "17250": "nan", "17255": "nan", "17260": "nan", "17265": "nan", "17270": "nan", "17275": "nan", "17280": "nan", "17285": "nan", "17290": "nan", "17295": "nan", "17300": 0.34152, "17305": "nan", "17310": "nan", "17315": "nan", "17320": "nan", "17325": "nan", "17330": "nan", "17335": "nan", "17340": "nan", "17345": "nan", "17350": "nan", "17355": "nan", "17360": "nan", "17365": "nan", "17370": "nan", "17375": "nan", "17380": "nan", "17385": "nan", "17390": "nan", "17395": "nan", "17400": 0.34091, "17405": "nan", "17410": "nan", "17415": "nan", "17420": "nan", "17425": "nan", "17430": "nan", "17435": "nan", "17440": "nan", "17445": "nan", "17450": "nan", "17455": "nan", "17460": "nan", "17465": "nan", "17470": "nan", "17475": "nan", "17480": "nan", "17485": "nan", "17490": "nan", "17495": "nan", "17500": 0.34222, "17505": "nan", "17510": "nan", "17515": "nan", "17520": "nan", "17525": "nan", "17530": "nan", "17535": "nan", "17540": "nan", "17545": "nan", "17550": "nan", "17555": "nan", "17560": "nan", "17565": "nan", "17570": "nan", "17575": "nan", "17580": "nan", "17585": "nan", "17590": "nan", "17595": "nan", "17600": 0.3337, "17605": "nan", "17610": "nan", "17615": "nan", "17620": "nan", "17625": "nan", "17630": "nan", "17635": "nan", "17640": "nan", "17645": "nan", "17650": "nan", "17655": "nan", "17660": "nan", "17665": "nan", "17670": "nan", "17675": "nan", "17680": "nan", "17685": "nan", "17690": "nan", "17695": "nan", "17700": 0.34481, "17705": "nan", "17710": "nan", "17715": "nan", "17720": "nan", "17725": "nan", "17730": "nan", "17735": "nan", "17740": "nan", "17745": "nan", "17750": "nan", "17755": "nan", "17760": "nan", "17765": "nan", "17770": "nan", "17775": "nan", "17780": "nan", "17785": "nan", "17790": "nan", "17795": "nan", "17800": 0.34491, "17805": "nan", "17810": "nan", "17815": "nan", "17820": "nan", "17825": "nan", "17830": "nan", "17835": "nan", "17840": "nan", "17845": "nan", "17850": "nan", "17855": "nan", "17860": "nan", "17865": "nan", "17870": "nan", "17875": "nan", "17880": "nan", "17885": "nan", "17890": "nan", "17895": "nan", "17900": 0.34, "17905": "nan", "17910": "nan", "17915": "nan", "17920": "nan", "17925": "nan", "17930": "nan", "17935": "nan", "17940": "nan", "17945": "nan", "17950": "nan", "17955": "nan", "17960": "nan", "17965": "nan", "17970": "nan", "17975": "nan", "17980": "nan", "17985": "nan", "17990": "nan", "17995": "nan", "18000": 0.33674, "18005": "nan", "18010": "nan", "18015": "nan", "18020": "nan", "18025": "nan", "18030": "nan", "18035": "nan", "18040": "nan", "18045": "nan", "18050": "nan", "18055": "nan", "18060": "nan", "18065": "nan", "18070": "nan", "18075": "nan", "18080": "nan", "18085": "nan", "18090": "nan", "18095": "nan", "18100": 0.33896, "18105": "nan", "18110": "nan", "18115": "nan", "18120": "nan", "18125": "nan", "18130": "nan", "18135": "nan", "18140": "nan", "18145": "nan", "18150": "nan", "18155": "nan", "18160": "nan", "18165": "nan", "18170": "nan", "18175": "nan", "18180": "nan", "18185": "nan", "18190": "nan", "18195": "nan", "18200": 0.3376, "18205": "nan", "18210": "nan", "18215": "nan", "18220": "nan", "18225": "nan", "18230": "nan", "18235": "nan", "18240": "nan", "18245": "nan", "18250": "nan", "18255": "nan", "18260": "nan", "18265": "nan", "18270": "nan", "18275": "nan", "18280": "nan", "18285": "nan", "18290": "nan", "18295": "nan", "18300": 0.32857, "18305": "nan", "18310": "nan", "18315": "nan", "18320": "nan", "18325": "nan", "18330": "nan", "18335": "nan", "18340": "nan", "18345": "nan", "18350": "nan", "18355": "nan", "18360": "nan", "18365": "nan", "18370": "nan", "18375": "nan", "18380": "nan", "18385": "nan", "18390": "nan", "18395": "nan", "18400": 0.34329, "18405": "nan", "18410": "nan", "18415": "nan", "18420": "nan", "18425": "nan", "18430": "nan", "18435": "nan", "18440": "nan", "18445": "nan", "18450": "nan", "18455": "nan", "18460": "nan", "18465": "nan", "18470": "nan", "18475": "nan", "18480": "nan", "18485": "nan", "18490": "nan", "18495": "nan", "18500": 0.33872, "18505": "nan", "18510": "nan", "18515": "nan", "18520": "nan", "18525": "nan", "18530": "nan", "18535": "nan", "18540": "nan", "18545": "nan", "18550": "nan", "18555": "nan", "18560": "nan", "18565": "nan", "18570": "nan", "18575": "nan", "18580": "nan", "18585": "nan", "18590": "nan", "18595": "nan", "18600": 0.33619, "18605": "nan", "18610": "nan", "18615": "nan", "18620": "nan", "18625": "nan", "18630": "nan", "18635": "nan", "18640": "nan", "18645": "nan", "18650": "nan", "18655": "nan", "18660": "nan", "18665": "nan", "18670": "nan", "18675": "nan", "18680": "nan", "18685": "nan", "18690": "nan", "18695": "nan", "18700": 0.33981, "18705": "nan", "18710": "nan", "18715": "nan", "18720": "nan", "18725": "nan", "18730": "nan", "18735": "nan", "18740": "nan", "18745": "nan", "18750": "nan", "18755": "nan", "18760": "nan", "18765": "nan", "18770": "nan", "18775": "nan", "18780": "nan", "18785": "nan", "18790": "nan", "18795": "nan", "18800": 0.34527, "18805": "nan", "18810": "nan", "18815": "nan", "18820": "nan", "18825": "nan", "18830": "nan", "18835": "nan", "18840": "nan", "18845": "nan", "18850": "nan", "18855": "nan", "18860": "nan", "18865": "nan", "18870": "nan", "18875": "nan", "18880": "nan", "18885": "nan", "18890": "nan", "18895": "nan", "18900": 0.33842, "18905": "nan", "18910": "nan", "18915": "nan", "18920": "nan", "18925": "nan", "18930": "nan", "18935": "nan", "18940": "nan", "18945": "nan", "18950": "nan", "18955": "nan", "18960": "nan", "18965": "nan", "18970": "nan", "18975": "nan", "18980": "nan", "18985": "nan", "18990": "nan", "18995": "nan", "19000": 0.33565, "19005": "nan", "19010": "nan", "19015": "nan", "19020": "nan", "19025": "nan", "19030": "nan", "19035": "nan", "19040": "nan", "19045": "nan", "19050": "nan", "19055": "nan", "19060": "nan", "19065": "nan", "19070": "nan", "19075": "nan", "19080": "nan", "19085": "nan", "19090": "nan", "19095": "nan", "19100": 0.33298, "19105": "nan", "19110": "nan", "19115": "nan", "19120": "nan", "19125": "nan", "19130": "nan", "19135": "nan", "19140": "nan", "19145": "nan", "19150": "nan", "19155": "nan", "19160": "nan", "19165": "nan", "19170": "nan", "19175": "nan", "19180": "nan", "19185": "nan", "19190": "nan", "19195": "nan", "19200": 0.33651, "19205": "nan", "19210": "nan", "19215": "nan", "19220": "nan", "19225": "nan", "19230": "nan", "19235": "nan", "19240": "nan", "19245": "nan", "19250": "nan", "19255": "nan", "19260": "nan", "19265": "nan", "19270": "nan", "19275": "nan", "19280": "nan", "19285": "nan", "19290": "nan", "19295": "nan", "19300": 0.33963, "19305": "nan", "19310": "nan", "19315": "nan", "19320": "nan", "19325": "nan", "19330": "nan", "19335": "nan", "19340": "nan", "19345": "nan", "19350": "nan", "19355": "nan", "19360": "nan", "19365": "nan", "19370": "nan", "19375": "nan", "19380": "nan", "19385": "nan", "19390": "nan", "19395": "nan", "19400": 0.33776, "19405": "nan", "19410": "nan", "19415": "nan", "19420": "nan", "19425": "nan", "19430": "nan", "19435": "nan", "19440": "nan", "19445": "nan", "19450": "nan", "19455": "nan", "19460": "nan", "19465": "nan", "19470": "nan", "19475": "nan", "19480": "nan", "19485": "nan", "19490": "nan", "19495": "nan", "19500": 0.34034, "19505": "nan", "19510": "nan", "19515": "nan", "19520": "nan", "19525": "nan", "19530": "nan", "19535": "nan", "19540": "nan", "19545": "nan", "19550": "nan", "19555": "nan", "19560": "nan", "19565": "nan", "19570": "nan", "19575": "nan", "19580": "nan", "19585": "nan", "19590": "nan", "19595": "nan", "19600": 0.34159, "19605": "nan", "19610": "nan", "19615": "nan", "19620": "nan", "19625": "nan", "19630": "nan", "19635": "nan", "19640": "nan", "19645": "nan", "19650": "nan", "19655": "nan", "19660": "nan", "19665": "nan", "19670": "nan", "19675": "nan", "19680": "nan", "19685": "nan", "19690": "nan", "19695": "nan", "19700": 0.34229, "19705": "nan", "19710": "nan", "19715": "nan", "19720": "nan", "19725": "nan", "19730": "nan", "19735": "nan", "19740": "nan", "19745": "nan", "19750": "nan", "19755": "nan", "19760": "nan", "19765": "nan", "19770": "nan", "19775": "nan", "19780": "nan", "19785": "nan", "19790": "nan", "19795": "nan", "19800": 0.35, "19805": "nan", "19810": "nan", "19815": "nan", "19820": "nan", "19825": "nan", "19830": "nan", "19835": "nan", "19840": "nan", "19845": "nan", "19850": "nan", "19855": "nan", "19860": "nan", "19865": "nan", "19870": "nan", "19875": "nan", "19880": "nan", "19885": "nan", "19890": "nan", "19895": "nan", "19900": 0.3377, "19905": "nan", "19910": "nan", "19915": "nan", "19920": "nan", "19925": "nan", "19930": "nan", "19935": "nan", "19940": "nan", "19945": "nan", "19950": "nan", "19955": "nan", "19960": "nan", "19965": "nan", "19970": "nan", "19975": "nan", "19980": "nan", "19985": "nan", "19990": "nan", "19995": "nan", "20000": 0.33554, "20005": "nan", "20010": "nan", "20015": "nan", "20020": "nan", "20025": "nan", "20030": "nan", "20035": "nan", "20040": "nan", "20045": "nan", "20050": "nan", "20055": "nan", "20060": "nan", "20065": "nan", "20070": "nan", "20075": "nan", "20080": "nan", "20085": "nan", "20090": "nan", "20095": "nan", "20100": "nan", "20105": "nan", "20110": "nan", "20115": "nan", "20120": "nan", "20125": "nan", "20130": "nan", "20135": "nan", "20140": "nan", "20145": "nan", "20150": "nan", "20155": "nan", "20160": "nan", "20165": "nan", "20170": "nan", "20175": "nan", "20180": "nan", "20185": "nan", "20190": "nan", "20195": "nan", "20200": "nan", "20205": "nan", "20210": "nan", "20215": "nan", "20220": "nan", "20225": "nan", "20230": "nan", "20235": "nan", "20240": "nan", "20245": "nan", "20250": "nan", "20255": "nan", "20260": "nan", "20265": "nan", "20270": "nan", "20275": "nan", "20280": "nan", "20285": "nan", "20290": "nan", "20295": "nan", "20300": "nan", "20305": "nan", "20310": "nan", "20315": "nan", "20320": "nan", "20325": "nan", "20330": "nan", "20335": "nan", "20340": "nan", "20345": "nan", "20350": "nan", "20355": "nan", "20360": "nan", "20365": "nan", "20370": "nan", "20375": "nan", "20380": "nan", "20385": "nan", "20390": "nan", "20395": "nan", "20400": "nan", "20405": "nan", "20410": "nan", "20415": "nan", "20420": "nan", "20425": "nan", "20430": "nan", "20435": "nan", "20440": "nan", "20445": "nan", "20450": "nan", "20455": "nan", "20460": "nan", "20465": "nan", "20470": "nan", "20475": "nan", "20480": "nan", "20485": "nan", "20490": "nan", "20495": "nan", "20500": "nan", "20505": "nan", "20510": "nan", "20515": "nan", "20520": "nan", "20525": "nan", "20530": "nan", "20535": "nan", "20540": "nan", "20545": "nan", "20550": "nan", "20555": "nan", "20560": "nan", "20565": "nan", "20570": "nan", "20575": "nan", "20580": "nan", "20585": "nan", "20590": "nan", "20595": "nan", "20600": "nan", "20605": "nan", "20610": "nan", "20615": "nan", "20620": "nan", "20625": "nan", "20630": "nan", "20635": "nan", "20640": "nan", "20645": "nan", "20650": "nan", "20655": "nan", "20660": "nan", "20665": "nan", "20670": "nan", "20675": "nan", "20680": "nan", "20685": "nan", "20690": "nan", "20695": "nan", "20700": "nan", "20705": "nan", "20710": "nan", "20715": "nan", "20720": "nan", "20725": "nan", "20730": "nan", "20735": "nan", "20740": "nan", "20745": "nan", "20750": "nan", "20755": "nan", "20760": "nan", "20765": "nan", "20770": "nan", "20775": "nan", "20780": "nan", "20785": "nan", "20790": "nan", "20795": "nan", "20800": "nan", "20805": "nan", "20810": "nan", "20815": "nan", "20820": "nan", "20825": "nan", "20830": "nan", "20835": "nan", "20840": "nan", "20845": "nan", "20850": "nan", "20855": "nan", "20860": "nan", "20865": "nan", "20870": "nan", "20875": "nan", "20880": "nan", "20885": "nan", "20890": "nan", "20895": "nan", "20900": "nan", "20905": "nan", "20910": "nan", "20915": "nan", "20920": "nan", "20925": "nan", "20930": "nan", "20935": "nan", "20940": "nan", "20945": "nan", "20950": "nan", "20955": "nan", "20960": "nan", "20965": "nan", "20970": "nan", "20975": "nan", "20980": "nan", "20985": "nan", "20990": "nan", "20995": "nan", "21000": "nan", "21005": "nan", "21010": "nan", "21015": "nan", "21020": "nan", "21025": "nan", "21030": "nan", "21035": "nan", "21040": "nan", "21045": "nan", "21050": "nan", "21055": "nan", "21060": "nan", "21065": "nan", "21070": "nan", "21075": "nan", "21080": "nan", "21085": "nan", "21090": "nan", "21095": "nan", "21100": "nan", "21105": "nan", "21110": "nan", "21115": "nan", "21120": "nan", "21125": "nan", "21130": "nan", "21135": "nan", "21140": "nan", "21145": "nan", "21150": "nan", "21155": "nan", "21160": "nan", "21165": "nan", "21170": "nan", "21175": "nan", "21180": "nan", "21185": "nan", "21190": "nan", "21195": "nan", "21200": "nan", "21205": "nan", "21210": "nan", "21215": "nan", "21220": "nan", "21225": "nan", "21230": "nan", "21235": "nan", "21240": "nan", "21245": "nan", "21250": "nan", "21255": "nan", "21260": "nan", "21265": "nan", "21270": "nan", "21275": "nan", "21280": "nan", "21285": "nan", "21290": "nan", "21295": "nan", "21300": "nan", "21305": "nan", "21310": "nan", "21315": "nan", "21320": "nan", "21325": "nan", "21330": "nan", "21335": "nan", "21340": "nan", "21345": "nan", "21350": "nan", "21355": "nan", "21360": "nan", "21365": "nan", "21370": "nan", "21375": "nan", "21380": "nan", "21385": "nan", "21390": "nan", "21395": "nan", "21400": "nan", "21405": "nan", "21410": "nan", "21415": "nan", "21420": "nan", "21425": "nan", "21430": "nan", "21435": "nan", "21440": "nan", "21445": "nan", "21450": "nan", "21455": "nan", "21460": "nan", "21465": "nan", "21470": "nan", "21475": "nan", "21480": "nan", "21485": "nan", "21490": "nan", "21495": "nan", "21500": "nan", "21505": "nan", "21510": "nan", "21515": "nan", "21520": "nan", "21525": "nan", "21530": "nan", "21535": "nan", "21540": "nan", "21545": "nan", "21550": "nan", "21555": "nan", "21560": "nan", "21565": "nan", "21570": "nan", "21575": "nan", "21580": "nan", "21585": "nan", "21590": "nan", "21595": "nan", "21600": "nan", "21605": "nan", "21610": "nan", "21615": "nan", "21620": "nan", "21625": "nan", "21630": "nan", "21635": "nan", "21640": "nan", "21645": "nan", "21650": "nan", "21655": "nan", "21660": "nan", "21665": "nan", "21670": "nan", "21675": "nan", "21680": "nan", "21685": "nan", "21690": "nan", "21695": "nan", "21700": "nan", "21705": "nan", "21710": "nan", "21715": "nan", "21720": "nan", "21725": "nan", "21730": "nan", "21735": "nan", "21740": "nan", "21745": "nan", "21750": "nan", "21755": "nan", "21760": "nan", "21765": "nan", "21770": "nan", "21775": "nan", "21780": "nan", "21785": "nan", "21790": "nan", "21795": "nan", "21800": "nan", "21805": "nan", "21810": "nan", "21815": "nan", "21820": "nan", "21825": "nan", "21830": "nan", "21835": "nan", "21840": "nan", "21845": "nan", "21850": "nan", "21855": "nan", "21860": "nan", "21865": "nan", "21870": "nan", "21875": "nan", "21880": "nan", "21885": "nan", "21890": "nan", "21895": "nan", "21900": "nan", "21905": "nan", "21910": "nan", "21915": "nan", "21920": "nan", "21925": "nan", "21930": "nan", "21935": "nan", "21940": "nan", "21945": "nan", "21950": "nan", "21955": "nan", "21960": "nan", "21965": "nan", "21970": "nan", "21975": "nan", "21980": "nan", "21985": "nan", "21990": "nan", "21995": "nan", "22000": "nan", "22005": "nan", "22010": "nan", "22015": "nan", "22020": "nan", "22025": "nan", "22030": "nan", "22035": "nan", "22040": "nan", "22045": "nan", "22050": "nan", "22055": "nan", "22060": "nan", "22065": "nan", "22070": "nan", "22075": "nan", "22080": "nan", "22085": "nan", "22090": "nan", "22095": "nan", "22100": "nan", "22105": "nan", "22110": "nan", "22115": "nan", "22120": "nan", "22125": "nan", "22130": "nan", "22135": "nan", "22140": "nan", "22145": "nan", "22150": "nan", "22155": "nan", "22160": "nan", "22165": "nan", "22170": "nan", "22175": "nan", "22180": "nan", "22185": "nan", "22190": "nan", "22195": "nan", "22200": "nan", "22205": "nan", "22210": "nan", "22215": "nan", "22220": "nan", "22225": "nan", "22230": "nan", "22235": "nan", "22240": "nan", "22245": "nan", "22250": "nan", "22255": "nan", "22260": "nan", "22265": "nan", "22270": "nan", "22275": "nan", "22280": "nan", "22285": "nan", "22290": "nan", "22295": "nan", "22300": "nan", "22305": "nan", "22310": "nan", "22315": "nan", "22320": "nan", "22325": "nan", "22330": "nan", "22335": "nan", "22340": "nan", "22345": "nan", "22350": "nan", "22355": "nan", "22360": "nan", "22365": "nan", "22370": "nan", "22375": "nan", "22380": "nan", "22385": "nan", "22390": "nan", "22395": "nan", "22400": "nan", "22405": "nan", "22410": "nan", "22415": "nan", "22420": "nan", "22425": "nan", "22430": "nan", "22435": "nan", "22440": "nan", "22445": "nan", "22450": "nan", "22455": "nan", "22460": "nan", "22465": "nan", "22470": "nan", "22475": "nan", "22480": "nan", "22485": "nan", "22490": "nan", "22495": "nan", "22500": "nan", "22505": "nan", "22510": "nan", "22515": "nan", "22520": "nan", "22525": "nan", "22530": "nan", "22535": "nan", "22540": "nan", "22545": "nan", "22550": "nan", "22555": "nan", "22560": "nan", "22565": "nan", "22570": "nan", "22575": "nan", "22580": "nan", "22585": "nan", "22590": "nan", "22595": "nan", "22600": "nan", "22605": "nan", "22610": "nan", "22615": "nan", "22620": "nan", "22625": "nan", "22630": "nan", "22635": "nan", "22640": "nan", "22645": "nan", "22650": "nan", "22655": "nan", "22660": "nan", "22665": "nan", "22670": "nan", "22675": "nan", "22680": "nan", "22685": "nan", "22690": "nan", "22695": "nan", "22700": "nan", "22705": "nan", "22710": "nan", "22715": "nan", "22720": "nan", "22725": "nan", "22730": "nan", "22735": "nan", "22740": "nan", "22745": "nan", "22750": "nan", "22755": "nan", "22760": "nan", "22765": "nan", "22770": "nan", "22775": "nan", "22780": "nan", "22785": "nan", "22790": "nan", "22795": "nan", "22800": "nan", "22805": "nan", "22810": "nan", "22815": "nan", "22820": "nan", "22825": "nan", "22830": "nan", "22835": "nan", "22840": "nan", "22845": "nan", "22850": "nan", "22855": "nan", "22860": "nan", "22865": "nan", "22870": "nan", "22875": "nan", "22880": "nan", "22885": "nan", "22890": "nan", "22895": "nan", "22900": "nan", "22905": "nan", "22910": "nan", "22915": "nan", "22920": "nan", "22925": "nan", "22930": "nan", "22935": "nan", "22940": "nan", "22945": "nan", "22950": "nan", "22955": "nan", "22960": "nan", "22965": "nan", "22970": "nan", "22975": "nan", "22980": "nan", "22985": "nan", "22990": "nan", "22995": "nan", "23000": "nan", "23005": "nan", "23010": "nan", "23015": "nan", "23020": "nan", "23025": "nan", "23030": "nan", "23035": "nan", "23040": "nan", "23045": "nan", "23050": "nan", "23055": "nan", "23060": "nan", "23065": "nan", "23070": "nan", "23075": "nan", "23080": "nan", "23085": "nan", "23090": "nan", "23095": "nan", "23100": "nan", "23105": "nan", "23110": "nan", "23115": "nan", "23120": "nan", "23125": "nan", "23130": "nan", "23135": "nan", "23140": "nan", "23145": "nan", "23150": "nan", "23155": "nan", "23160": "nan", "23165": "nan", "23170": "nan", "23175": "nan", "23180": "nan", "23185": "nan", "23190": "nan", "23195": "nan", "23200": "nan", "23205": "nan", "23210": "nan", "23215": "nan", "23220": "nan", "23225": "nan", "23230": "nan", "23235": "nan", "23240": "nan", "23245": "nan", "23250": "nan", "23255": "nan", "23260": "nan", "23265": "nan", "23270": "nan", "23275": "nan", "23280": "nan", "23285": "nan", "23290": "nan", "23295": "nan", "23300": "nan", "23305": "nan", "23310": "nan", "23315": "nan", "23320": "nan", "23325": "nan", "23330": "nan", "23335": "nan", "23340": "nan", "23345": "nan", "23350": "nan", "23355": "nan", "23360": "nan", "23365": "nan", "23370": "nan", "23375": "nan", "23380": "nan", "23385": "nan", "23390": "nan", "23395": "nan", "23400": "nan", "23405": "nan", "23410": "nan", "23415": "nan", "23420": "nan", "23425": "nan", "23430": "nan", "23435": "nan", "23440": "nan", "23445": "nan", "23450": "nan", "23455": "nan", "23460": "nan", "23465": "nan", "23470": "nan", "23475": "nan", "23480": "nan", "23485": "nan", "23490": "nan", "23495": "nan", "23500": "nan", "23505": "nan", "23510": "nan", "23515": "nan", "23520": "nan", "23525": "nan", "23530": "nan", "23535": "nan", "23540": "nan", "23545": "nan", "23550": "nan", "23555": "nan", "23560": "nan", "23565": "nan", "23570": "nan", "23575": "nan", "23580": "nan", "23585": "nan", "23590": "nan", "23595": "nan", "23600": "nan", "23605": "nan", "23610": "nan", "23615": "nan", "23620": "nan", "23625": "nan", "23630": "nan", "23635": "nan", "23640": "nan", "23645": "nan", "23650": "nan", "23655": "nan", "23660": "nan", "23665": "nan", "23670": "nan", "23675": "nan", "23680": "nan", "23685": "nan", "23690": "nan", "23695": "nan", "23700": "nan", "23705": "nan", "23710": "nan", "23715": "nan", "23720": "nan", "23725": "nan", "23730": "nan", "23735": "nan", "23740": "nan", "23745": "nan", "23750": "nan", "23755": "nan", "23760": "nan", "23765": "nan", "23770": "nan", "23775": "nan", "23780": "nan", "23785": "nan", "23790": "nan", "23795": "nan", "23800": "nan", "23805": "nan", "23810": "nan", "23815": "nan", "23820": "nan", "23825": "nan", "23830": "nan", "23835": "nan", "23840": "nan", "23845": "nan", "23850": "nan", "23855": "nan", "23860": "nan", "23865": "nan", "23870": "nan", "23875": "nan", "23880": "nan", "23885": "nan", "23890": "nan", "23895": "nan", "23900": "nan", "23905": "nan", "23910": "nan", "23915": "nan", "23920": "nan", "23925": "nan", "23930": "nan", "23935": "nan", "23940": "nan", "23945": "nan", "23950": "nan", "23955": "nan", "23960": "nan", "23965": "nan", "23970": "nan", "23975": "nan", "23980": "nan", "23985": "nan", "23990": "nan", "23995": "nan", "24000": "nan", "24005": "nan", "24010": "nan", "24015": "nan", "24020": "nan", "24025": "nan", "24030": "nan", "24035": "nan", "24040": "nan", "24045": "nan", "24050": "nan", "24055": "nan", "24060": "nan", "24065": "nan", "24070": "nan", "24075": "nan", "24080": "nan", "24085": "nan", "24090": "nan", "24095": "nan", "24100": "nan", "24105": "nan", "24110": "nan", "24115": "nan", "24120": "nan", "24125": "nan", "24130": "nan", "24135": "nan", "24140": "nan", "24145": "nan", "24150": "nan", "24155": "nan", "24160": "nan", "24165": "nan", "24170": "nan", "24175": "nan", "24180": "nan", "24185": "nan", "24190": "nan", "24195": "nan", "24200": "nan", "24205": "nan", "24210": "nan", "24215": "nan", "24220": "nan", "24225": "nan", "24230": "nan", "24235": "nan", "24240": "nan", "24245": "nan", "24250": "nan", "24255": "nan", "24260": "nan", "24265": "nan", "24270": "nan", "24275": "nan", "24280": "nan", "24285": "nan", "24290": "nan", "24295": "nan", "24300": "nan", "24305": "nan", "24310": "nan", "24315": "nan", "24320": "nan", "24325": "nan", "24330": "nan", "24335": "nan", "24340": "nan", "24345": "nan", "24350": "nan", "24355": "nan", "24360": "nan", "24365": "nan", "24370": "nan", "24375": "nan", "24380": "nan", "24385": "nan", "24390": "nan", "24395": "nan", "24400": "nan", "24405": "nan", "24410": "nan", "24415": "nan", "24420": "nan", "24425": "nan", "24430": "nan", "24435": "nan", "24440": "nan", "24445": "nan", "24450": "nan", "24455": "nan", "24460": "nan", "24465": "nan", "24470": "nan", "24475": "nan", "24480": "nan", "24485": "nan", "24490": "nan", "24495": "nan", "24500": "nan", "24505": "nan", "24510": "nan", "24515": "nan", "24520": "nan", "24525": "nan", "24530": "nan", "24535": "nan", "24540": "nan", "24545": "nan", "24550": "nan", "24555": "nan", "24560": "nan", "24565": "nan", "24570": "nan", "24575": "nan", "24580": "nan", "24585": "nan", "24590": "nan", "24595": "nan", "24600": "nan", "24605": "nan", "24610": "nan", "24615": "nan", "24620": "nan", "24625": "nan", "24630": "nan", "24635": "nan", "24640": "nan", "24645": "nan", "24650": "nan", "24655": "nan", "24660": "nan", "24665": "nan", "24670": "nan", "24675": "nan", "24680": "nan", "24685": "nan", "24690": "nan", "24695": "nan", "24700": "nan", "24705": "nan", "24710": "nan", "24715": "nan", "24720": "nan", "24725": "nan", "24730": "nan", "24735": "nan", "24740": "nan", "24745": "nan", "24750": "nan", "24755": "nan", "24760": "nan", "24765": "nan", "24770": "nan", "24775": "nan", "24780": "nan", "24785": "nan", "24790": "nan", "24795": "nan", "24800": "nan", "24805": "nan", "24810": "nan", "24815": "nan", "24820": "nan", "24825": "nan", "24830": "nan", "24835": "nan", "24840": "nan", "24845": "nan", "24850": "nan", "24855": "nan", "24860": "nan", "24865": "nan", "24870": "nan", "24875": "nan", "24880": "nan", "24885": "nan", "24890": "nan", "24895": "nan", "24900": "nan", "24905": "nan", "24910": "nan", "24915": "nan", "24920": "nan", "24925": "nan", "24930": "nan", "24935": "nan", "24940": "nan", "24945": "nan", "24950": "nan", "24955": "nan", "24960": "nan", "24965": "nan", "24970": "nan", "24975": "nan", "24980": "nan", "24985": "nan", "24990": "nan", "24995": "nan", "25000": "nan"}}}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_release/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: "1"
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: "1"
  NON_DETERMINSTIC_RESULTS: 1
TEST_TYPE: "release"
MODEL_ARGS:
  # Bert model args
  --num-layers: 24
  --hidden-size: 1024
  --num-attention-heads: 16
  --seq-length: 512
  --max-position-embeddings: 512
  # Training args
  --micro-batch-size: 4
  --global-batch-size: 32
  --train-iters: 20000
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --fp16: true
  --lr: 0.0001
  --lr-decay-style: linear
  --min-lr: 1.0e-5
  --lr-warmup-fraction: .01
  --bert-no-binary-head: true
  # Model parallel
  --tensor-model-parallel-size: 8
  --pipeline-model-parallel-size: 8
  # Data args
  --data-path: ${DATA_BLEND}
  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
  --split: 949,50,1
  --data-cache-path: ${DATA_CACHE_PATH}
  # EVAL_AND_LOGGING_ARGS
  --log-interval: 100
  --save-interval: 2000
  --save-retain-interval: 10000
  --eval-interval: 1000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --eval-iters: 10
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  --log-num-zeros-in-grad: true
  --log-params-norm: true
  --log-validation-ppl-to-tensorboard: true
  --wandb-project: megatron-core-release-runs
  --wandb-entity: adlr
  --wandb-exp-name: ${WANDB_EXPERIMENT}
  --attention-backend: unfused
  --exit-interval: 20000
  --wandb-save-dir: ${WANDB_SAVE_PATH}
METRICS:
  - "iteration-time"
  - "lm loss"
  - "mem-allocated-bytes"
  - "mem-max-allocated-bytes"


================================================
FILE: tests/functional_tests/test_cases/bert/bert_release_sm/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 2000,
        "step_interval": 5,
        "values": {
            "1": 10.57016,
            "5": 10.57457,
            "10": 10.5895,
            "15": 10.5701,
            "20": 10.13778,
            "25": 9.49357,
            "30": 9.4056,
            "35": 9.1618,
            "40": 9.08637,
            "45": 8.96435,
            "50": 8.67005,
            "55": 8.77614,
            "60": 8.51912,
            "65": 8.52143,
            "70": 8.20591,
            "75": 8.31295,
            "80": 8.00603,
            "85": 7.79543,
            "90": 7.70836,
            "95": 7.57266,
            "100": 7.53591,
            "105": 7.35517,
            "110": 7.44978,
            "115": 7.20792,
            "120": 6.95942,
            "125": 7.27887,
            "130": 7.01311,
            "135": 6.80831,
            "140": 7.21623,
            "145": 7.00076,
            "150": 7.08728,
            "155": 6.98439,
            "160": 6.99868,
            "165": 7.03626,
            "170": 6.92491,
            "175": 6.87684,
            "180": 6.7349,
            "185": 6.92182,
            "190": 7.09627,
            "195": 6.95652,
            "200": 6.88737,
            "205": 6.88212,
            "210": 6.75061,
            "215": 6.9828,
            "220": 6.66761,
            "225": 6.77153,
            "230": 6.73327,
            "235": 6.84111,
            "240": 6.58632,
            "245": 7.08674,
            "250": 6.60849,
            "255": 6.65796,
            "260": 6.74518,
            "265": 6.62708,
            "270": 6.59842,
            "275": 6.82877,
            "280": 6.53927,
            "285": 6.5668,
            "290": 6.74581,
            "295": 6.73208,
            "300": 6.43994,
            "305": 6.34549,
            "310": 6.95999,
            "315": 6.93016,
            "320": 6.86096,
            "325": 6.66038,
            "330": 6.63878,
            "335": 6.64048,
            "340": 6.58574,
            "345": 6.53944,
            "350": 6.396,
            "355": 6.51173,
            "360": 6.56525,
            "365": 6.61797,
            "370": 6.52094,
            "375": 6.64693,
            "380": 6.45276,
            "385": 6.40779,
            "390": 6.46608,
            "395": 6.25858,
            "400": 6.64783,
            "405": 6.47696,
            "410": 6.36974,
            "415": 6.56753,
            "420": 6.58633,
            "425": 6.53258,
            "430": 6.56715,
            "435": 6.51055,
            "440": 6.54104,
            "445": 6.57032,
            "450": 6.60459,
            "455": 6.64806,
            "460": 6.74269,
            "465": 6.71717,
            "470": 6.49106,
            "475": 6.21387,
            "480": 6.46245,
            "485": 6.5334,
            "490": 6.18455,
            "495": 6.21143,
            "500": 6.61963,
            "505": 6.60945,
            "510": 6.51889,
            "515": 6.20099,
            "520": 6.42318,
            "525": 6.43891,
            "530": 6.41375,
            "535": 6.0017,
            "540": 6.46755,
            "545": 6.34931,
            "550": 6.42553,
            "555": 6.51621,
            "560": 6.39803,
            "565": 6.19623,
            "570": 6.28741,
            "575": 6.63905,
            "580": 6.42899,
            "585": 6.42321,
            "590": 6.18413,
            "595": 6.26483,
            "600": 6.48357,
            "605": 6.49681,
            "610": 6.29344,
            "615": 6.42897,
            "620": 6.50728,
            "625": 6.42103,
            "630": 6.45875,
            "635": 6.48144,
            "640": 6.34499,
            "645": 6.27084,
            "650": 6.24999,
            "655": 6.55351,
            "660": 6.34522,
            "665": 6.27813,
            "670": 6.35548,
            "675": 6.2648,
            "680": 6.36674,
            "685": 6.53052,
            "690": 6.54112,
            "695": 6.21604,
            "700": 5.99887,
            "705": 6.28967,
            "710": 6.17574,
            "715": 6.38195,
            "720": 6.25111,
            "725": 6.43415,
            "730": 6.53264,
            "735": 6.61096,
            "740": 6.5891,
            "745": 6.29544,
            "750": 6.26264,
            "755": 6.00057,
            "760": 6.57044,
            "765": 6.37664,
            "770": 6.44316,
            "775": 6.14596,
            "780": 6.24057,
            "785": 6.29758,
            "790": 6.48672,
            "795": 6.375,
            "800": 6.34408,
            "805": 6.4855,
            "810": 6.17661,
            "815": 6.24969,
            "820": 6.46937,
            "825": 6.49875,
            "830": 6.41187,
            "835": 6.38815,
            "840": 6.33676,
            "845": 6.19728,
            "850": 6.37738,
            "855": 6.40839,
            "860": 6.27525,
            "865": 6.28608,
            "870": 6.36816,
            "875": 6.40001,
            "880": 6.29208,
            "885": 6.49235,
            "890": 6.2164,
            "895": 6.25901,
            "900": 6.25059,
            "905": 6.36206,
            "910": 6.07771,
            "915": 6.50532,
            "920": 6.34417,
            "925": 6.2606,
            "930": 6.21949,
            "935": 6.22412,
            "940": 6.36739,
            "945": 6.03716,
            "950": 6.16067,
            "955": 6.16405,
            "960": 6.2354,
            "965": 6.26383,
            "970": 6.28899,
            "975": 6.18494,
            "980": 6.07856,
            "985": 6.26264,
            "990": 6.16384,
            "995": 6.40483,
            "1000": 6.21386,
            "1005": 6.42444,
            "1010": 6.10073,
            "1015": 6.18252,
            "1020": 6.09469,
            "1025": 6.37207,
            "1030": 6.16964,
            "1035": 5.92432,
            "1040": 6.02639,
            "1045": 6.33766,
            "1050": 6.45656,
            "1055": 6.09236,
            "1060": 6.09897,
            "1065": 5.97221,
            "1070": 6.1916,
            "1075": 5.97914,
            "1080": 6.14591,
            "1085": 6.01944,
            "1090": 6.1107,
            "1095": 6.41051,
            "1100": 6.07832,
            "1105": 6.28098,
            "1110": 6.17453,
            "1115": 6.06118,
            "1120": 6.04548,
            "1125": 6.14937,
            "1130": 5.92201,
            "1135": 6.33183,
            "1140": 6.4784,
            "1145": 6.2523,
            "1150": 6.1203,
            "1155": 5.99932,
            "1160": 6.08841,
            "1165": 6.13199,
            "1170": 6.32067,
            "1175": 6.09731,
            "1180": 6.2554,
            "1185": 6.28789,
            "1190": 6.33396,
            "1195": 6.22166,
            "1200": 6.18768,
            "1205": 6.32714,
            "1210": 6.14663,
            "1215": 6.06385,
            "1220": 6.20885,
            "1225": 6.33596,
            "1230": 6.29505,
            "1235": 6.07554,
            "1240": 6.4444,
            "1245": 6.12619,
            "1250": 5.92236,
            "1255": 6.24939,
            "1260": 6.21401,
            "1265": 5.99277,
            "1270": 6.05496,
            "1275": 6.11892,
            "1280": 5.80143,
            "1285": 5.97887,
            "1290": 6.03117,
            "1295": 6.1527,
            "1300": 6.3507,
            "1305": 5.92647,
            "1310": 6.00999,
            "1315": 6.0662,
            "1320": 6.10065,
            "1325": 6.1658,
            "1330": 6.11215,
            "1335": 6.06255,
            "1340": 6.1044,
            "1345": 6.19047,
            "1350": 5.94001,
            "1355": 6.08141,
            "1360": 6.43968,
            "1365": 5.9775,
            "1370": 6.06872,
            "1375": 6.26164,
            "1380": 6.0897,
            "1385": 5.99699,
            "1390": 5.86771,
            "1395": 5.84369,
            "1400": 6.14356,
            "1405": 6.1545,
            "1410": 6.05662,
            "1415": 6.24539,
            "1420": 6.19344,
            "1425": 6.19564,
            "1430": 6.01059,
            "1435": 6.4846,
            "1440": 6.33193,
            "1445": 6.31451,
            "1450": 6.0827,
            "1455": 5.78442,
            "1460": 6.19138,
            "1465": 6.18086,
            "1470": 6.25935,
            "1475": 6.23589,
            "1480": 6.33248,
            "1485": 6.05732,
            "1490": 6.23182,
            "1495": 6.26111,
            "1500": 6.18782,
            "1505": 6.2482,
            "1510": 6.39892,
            "1515": 6.05634,
            "1520": 5.82788,
            "1525": 5.91483,
            "1530": 5.88868,
            "1535": 6.46938,
            "1540": 6.40193,
            "1545": 6.33528,
            "1550": 6.12725,
            "1555": 6.06753,
            "1560": 6.1525,
            "1565": 6.06122,
            "1570": 6.01284,
            "1575": 6.12295,
            "1580": 6.24347,
            "1585": 6.0764,
            "1590": 6.24692,
            "1595": 6.19689,
            "1600": 6.10773,
            "1605": 6.05819,
            "1610": 6.13265,
            "1615": 6.14311,
            "1620": 6.04295,
            "1625": 6.03832,
            "1630": 6.09542,
            "1635": 6.31795,
            "1640": 6.14846,
            "1645": 6.19578,
            "1650": 6.29627,
            "1655": 6.35313,
            "1660": 6.22447,
            "1665": 6.05814,
            "1670": 6.181,
            "1675": 5.87418,
            "1680": 6.10235,
            "1685": 6.37124,
            "1690": 6.17879,
            "1695": 6.0453,
            "1700": 6.23077,
            "1705": 6.07609,
            "1710": 6.03402,
            "1715": 6.05391,
            "1720": 6.18997,
            "1725": 6.39643,
            "1730": 6.19031,
            "1735": 6.02531,
            "1740": 6.10795,
            "1745": 6.09931,
            "1750": 6.32244,
            "1755": 6.27191,
            "1760": 5.98249,
            "1765": 5.99969,
            "1770": 6.1287,
            "1775": 5.99984,
            "1780": 6.02211,
            "1785": 5.8221,
            "1790": 5.96942,
            "1795": 6.30099,
            "1800": 6.17733,
            "1805": 5.81159,
            "1810": 6.48243,
            "1815": 6.16965,
            "1820": 6.48046,
            "1825": 6.13125,
            "1830": 6.05526,
            "1835": 5.95849,
            "1840": 5.90028,
            "1845": 6.29489,
            "1850": 6.23845,
            "1855": 6.13384,
            "1860": 6.0453,
            "1865": 6.00502,
            "1870": 6.32861,
            "1875": 6.09238,
            "1880": 5.91293,
            "1885": 6.30309,
            "1890": 6.11072,
            "1895": 5.96072,
            "1900": 6.09184,
            "1905": 6.28475,
            "1910": 6.29417,
            "1915": 6.33553,
            "1920": 6.16605,
            "1925": 5.96987,
            "1930": 6.10131,
            "1935": 6.00775,
            "1940": 6.13005,
            "1945": 6.08146,
            "1950": 6.03616,
            "1955": 6.03111,
            "1960": 6.01663,
            "1965": 5.98703,
            "1970": 6.07541,
            "1975": 6.25125,
            "1980": 6.39044,
            "1985": 6.12075,
            "1990": 5.97305,
            "1995": 5.92903,
            "2000": 6.09087
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 2000,
        "step_interval": 5,
        "values": {
            "1": 192387584.0,
            "5": 192387584.0,
            "10": 191994368.0,
            "15": 192387584.0,
            "20": 270223360.0,
            "25": 270223360.0,
            "30": 270223360.0,
            "35": 270223360.0,
            "40": 270223360.0,
            "45": 270223360.0,
            "50": 270223360.0,
            "55": 270223360.0,
            "60": 270223360.0,
            "65": 270223360.0,
            "70": 270223360.0,
            "75": 270223360.0,
            "80": 270223360.0,
            "85": 270223360.0,
            "90": 270223360.0,
            "95": 270223360.0,
            "100": 270223360.0,
            "105": 270223360.0,
            "110": 270223360.0,
            "115": 270223360.0,
            "120": 270223360.0,
            "125": 270223360.0,
            "130": 270223360.0,
            "135": 270223360.0,
            "140": 270223360.0,
            "145": 270223360.0,
            "150": 270223360.0,
            "155": 270223360.0,
            "160": 270223360.0,
            "165": 270223360.0,
            "170": 270223360.0,
            "175": 270223360.0,
            "180": 270223360.0,
            "185": 270223360.0,
            "190": 270223360.0,
            "195": 270223360.0,
            "200": 270223360.0,
            "205": 270223360.0,
            "210": 270223360.0,
            "215": 270223360.0,
            "220": 270223360.0,
            "225": 270223360.0,
            "230": 270223360.0,
            "235": 270223360.0,
            "240": 270223360.0,
            "245": 270223360.0,
            "250": 270223360.0,
            "255": 270223360.0,
            "260": 270223360.0,
            "265": 270223360.0,
            "270": 270223360.0,
            "275": 270223360.0,
            "280": 270223360.0,
            "285": 270223360.0,
            "290": 270223360.0,
            "295": 270223360.0,
            "300": 270223360.0,
            "305": 270223360.0,
            "310": 270223360.0,
            "315": 270223360.0,
            "320": 270223360.0,
            "325": 270223360.0,
            "330": 270223360.0,
            "335": 270223360.0,
            "340": 270223360.0,
            "345": 270223360.0,
            "350": 270223360.0,
            "355": 270223360.0,
            "360": 270223360.0,
            "365": 270223360.0,
            "370": 270223360.0,
            "375": 270223360.0,
            "380": 270223360.0,
            "385": 270223360.0,
            "390": 270223360.0,
            "395": 270223360.0,
            "400": 270223360.0,
            "405": 270223360.0,
            "410": 270223360.0,
            "415": 270223360.0,
            "420": 270223360.0,
            "425": 270223360.0,
            "430": 270223360.0,
            "435": 270223360.0,
            "440": 270223360.0,
            "445": 270223360.0,
            "450": 270223360.0,
            "455": 270223360.0,
            "460": 270223360.0,
            "465": 270223360.0,
            "470": 270223360.0,
            "475": 270223360.0,
            "480": 270223360.0,
            "485": 270223360.0,
            "490": 270223360.0,
            "495": 270223360.0,
            "500": 270223360.0,
            "505": 270223360.0,
            "510": 270223360.0,
            "515": 270223360.0,
            "520": 270223360.0,
            "525": 270223360.0,
            "530": 270223360.0,
            "535": 270223360.0,
            "540": 270223360.0,
            "545": 270223360.0,
            "550": 270223360.0,
            "555": 270223360.0,
            "560": 270223360.0,
            "565": 270223360.0,
            "570": 270223360.0,
            "575": 270223360.0,
            "580": 270223360.0,
            "585": 270223360.0,
            "590": 270223360.0,
            "595": 270223360.0,
            "600": 270223360.0,
            "605": 270223360.0,
            "610": 270223360.0,
            "615": 270223360.0,
            "620": 270223360.0,
            "625": 270223360.0,
            "630": 270223360.0,
            "635": 270223360.0,
            "640": 270223360.0,
            "645": 270223360.0,
            "650": 270223360.0,
            "655": 270223360.0,
            "660": 270223360.0,
            "665": 270223360.0,
            "670": 270223360.0,
            "675": 270223360.0,
            "680": 270223360.0,
            "685": 270223360.0,
            "690": 270223360.0,
            "695": 270223360.0,
            "700": 270223360.0,
            "705": 270223360.0,
            "710": 270223360.0,
            "715": 270223360.0,
            "720": 270223360.0,
            "725": 270223360.0,
            "730": 270223360.0,
            "735": 270223360.0,
            "740": 270223360.0,
            "745": 270223360.0,
            "750": 270223360.0,
            "755": 270223360.0,
            "760": 270223360.0,
            "765": 270223360.0,
            "770": 270223360.0,
            "775": 270223360.0,
            "780": 270223360.0,
            "785": 270223360.0,
            "790": 270223360.0,
            "795": 270223360.0,
            "800": 270223360.0,
            "805": 270223360.0,
            "810": 270223360.0,
            "815": 270223360.0,
            "820": 270223360.0,
            "825": 270223360.0,
            "830": 270223360.0,
            "835": 270223360.0,
            "840": 270223360.0,
            "845": 270223360.0,
            "850": 270223360.0,
            "855": 270223360.0,
            "860": 270223360.0,
            "865": 270223360.0,
            "870": 270223360.0,
            "875": 270223360.0,
            "880": 270223360.0,
            "885": 270223360.0,
            "890": 270223360.0,
            "895": 270223360.0,
            "900": 270223360.0,
            "905": 270223360.0,
            "910": 270223360.0,
            "915": 270223360.0,
            "920": 270223360.0,
            "925": 270223360.0,
            "930": 270223360.0,
            "935": 270223360.0,
            "940": 270223360.0,
            "945": 270223360.0,
            "950": 270223360.0,
            "955": 270223360.0,
            "960": 270223360.0,
            "965": 270223360.0,
            "970": 270223360.0,
            "975": 270223360.0,
            "980": 270223360.0,
            "985": 270223360.0,
            "990": 270223360.0,
            "995": 270223360.0,
            "1000": 270223360.0,
            "1005": 270223360.0,
            "1010": 270223360.0,
            "1015": 270223360.0,
            "1020": 270223360.0,
            "1025": 270223360.0,
            "1030": 270223360.0,
            "1035": 270223360.0,
            "1040": 270223360.0,
            "1045": 270223360.0,
            "1050": 270223360.0,
            "1055": 270223360.0,
            "1060": 270223360.0,
            "1065": 270223360.0,
            "1070": 270223360.0,
            "1075": 270223360.0,
            "1080": 270223360.0,
            "1085": 270223360.0,
            "1090": 270223360.0,
            "1095": 270223360.0,
            "1100": 270223360.0,
            "1105": 270223360.0,
            "1110": 270223360.0,
            "1115": 270223360.0,
            "1120": 270223360.0,
            "1125": 270223360.0,
            "1130": 270223360.0,
            "1135": 270223360.0,
            "1140": 270223360.0,
            "1145": 270223360.0,
            "1150": 270223360.0,
            "1155": 270223360.0,
            "1160": 270223360.0,
            "1165": 270223360.0,
            "1170": 270223360.0,
            "1175": 270223360.0,
            "1180": 270223360.0,
            "1185": 270223360.0,
            "1190": 270223360.0,
            "1195": 270223360.0,
            "1200": 270223360.0,
            "1205": 270223360.0,
            "1210": 270223360.0,
            "1215": 270223360.0,
            "1220": 270223360.0,
            "1225": 270223360.0,
            "1230": 270223360.0,
            "1235": 270223360.0,
            "1240": 270223360.0,
            "1245": 270223360.0,
            "1250": 270223360.0,
            "1255": 270223360.0,
            "1260": 270223360.0,
            "1265": 270223360.0,
            "1270": 270223360.0,
            "1275": 270223360.0,
            "1280": 270223360.0,
            "1285": 270223360.0,
            "1290": 270223360.0,
            "1295": 270223360.0,
            "1300": 270223360.0,
            "1305": 270223360.0,
            "1310": 270223360.0,
            "1315": 270223360.0,
            "1320": 270223360.0,
            "1325": 270223360.0,
            "1330": 270223360.0,
            "1335": 270223360.0,
            "1340": 270223360.0,
            "1345": 270223360.0,
            "1350": 270223360.0,
            "1355": 270223360.0,
            "1360": 270223360.0,
            "1365": 270223360.0,
            "1370": 270223360.0,
            "1375": 270223360.0,
            "1380": 270223360.0,
            "1385": 270223360.0,
            "1390": 270223360.0,
            "1395": 270223360.0,
            "1400": 270223360.0,
            "1405": 270223360.0,
            "1410": 270223360.0,
            "1415": 270223360.0,
            "1420": 270223360.0,
            "1425": 270223360.0,
            "1430": 270223360.0,
            "1435": 270223360.0,
            "1440": 270223360.0,
            "1445": 270223360.0,
            "1450": 270223360.0,
            "1455": 270223360.0,
            "1460": 270223360.0,
            "1465": 270223360.0,
            "1470": 270223360.0,
            "1475": 270223360.0,
            "1480": 270223360.0,
            "1485": 270223360.0,
            "1490": 270223360.0,
            "1495": 270223360.0,
            "1500": 270223360.0,
            "1505": 270223360.0,
            "1510": 270223360.0,
            "1515": 270223360.0,
            "1520": 270223360.0,
            "1525": 270223360.0,
            "1530": 270223360.0,
            "1535": 270223360.0,
            "1540": 270223360.0,
            "1545": 270223360.0,
            "1550": 270223360.0,
            "1555": 270223360.0,
            "1560": 270223360.0,
            "1565": 270223360.0,
            "1570": 270223360.0,
            "1575": 270223360.0,
            "1580": 270223360.0,
            "1585": 270223360.0,
            "1590": 270223360.0,
            "1595": 270223360.0,
            "1600": 270223360.0,
            "1605": 270223360.0,
            "1610": 270223360.0,
            "1615": 270223360.0,
            "1620": 270223360.0,
            "1625": 270223360.0,
            "1630": 270223360.0,
            "1635": 270223360.0,
            "1640": 270223360.0,
            "1645": 270223360.0,
            "1650": 270223360.0,
            "1655": 270223360.0,
            "1660": 270223360.0,
            "1665": 270223360.0,
            "1670": 270223360.0,
            "1675": 270223360.0,
            "1680": 270223360.0,
            "1685": 270223360.0,
            "1690": 270223360.0,
            "1695": 270223360.0,
            "1700": 270223360.0,
            "1705": 270223360.0,
            "1710": 270223360.0,
            "1715": 270223360.0,
            "1720": 270223360.0,
            "1725": 270223360.0,
            "1730": 270223360.0,
            "1735": 270223360.0,
            "1740": 270223360.0,
            "1745": 270223360.0,
            "1750": 270223360.0,
            "1755": 270223360.0,
            "1760": 270223360.0,
            "1765": 270223360.0,
            "1770": 270223360.0,
            "1775": 270223360.0,
            "1780": 270223360.0,
            "1785": 270223360.0,
            "1790": 270223360.0,
            "1795": 270223360.0,
            "1800": 270223360.0,
            "1805": 270223360.0,
            "1810": 270223360.0,
            "1815": 270223360.0,
            "1820": 270223360.0,
            "1825": 270223360.0,
            "1830": 270223360.0,
            "1835": 270223360.0,
            "1840": 270223360.0,
            "1845": 270223360.0,
            "1850": 270223360.0,
            "1855": 270223360.0,
            "1860": 270223360.0,
            "1865": 270223360.0,
            "1870": 270223360.0,
            "1875": 270223360.0,
            "1880": 270223360.0,
            "1885": 270223360.0,
            "1890": 270223360.0,
            "1895": 270223360.0,
            "1900": 270223360.0,
            "1905": 270223360.0,
            "1910": 270223360.0,
            "1915": 270223360.0,
            "1920": 270223360.0,
            "1925": 270223360.0,
            "1930": 270223360.0,
            "1935": 270223360.0,
            "1940": 270223360.0,
            "1945": 270223360.0,
            "1950": 270223360.0,
            "1955": 270223360.0,
            "1960": 270223360.0,
            "1965": 270223360.0,
            "1970": 270223360.0,
            "1975": 270223360.0,
            "1980": 270223360.0,
            "1985": 270223360.0,
            "1990": 270223360.0,
            "1995": 270223360.0,
            "2000": 270223360.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 2000,
        "step_interval": 5,
        "values": {
            "1": 402606592.0,
            "5": 402606592.0,
            "10": 402606592.0,
            "15": 402606592.0,
            "20": 450471936.0,
            "25": 450471936.0,
            "30": 450471936.0,
            "35": 450471936.0,
            "40": 450471936.0,
            "45": 450471936.0,
            "50": 450471936.0,
            "55": 450471936.0,
            "60": 450471936.0,
            "65": 450471936.0,
            "70": 450471936.0,
            "75": 450471936.0,
            "80": 450471936.0,
            "85": 450471936.0,
            "90": 450471936.0,
            "95": 450471936.0,
            "100": 450471936.0,
            "105": 450471936.0,
            "110": 450471936.0,
            "115": 450471936.0,
            "120": 450471936.0,
            "125": 450471936.0,
            "130": 450471936.0,
            "135": 450471936.0,
            "140": 450471936.0,
            "145": 450471936.0,
            "150": 450471936.0,
            "155": 450471936.0,
            "160": 450471936.0,
            "165": 450471936.0,
            "170": 450471936.0,
            "175": 450471936.0,
            "180": 450471936.0,
            "185": 450471936.0,
            "190": 450471936.0,
            "195": 450471936.0,
            "200": 450471936.0,
            "205": 450471936.0,
            "210": 450471936.0,
            "215": 450471936.0,
            "220": 450471936.0,
            "225": 450471936.0,
            "230": 450471936.0,
            "235": 450471936.0,
            "240": 450471936.0,
            "245": 450471936.0,
            "250": 450471936.0,
            "255": 450471936.0,
            "260": 450471936.0,
            "265": 450471936.0,
            "270": 450471936.0,
            "275": 450471936.0,
            "280": 450471936.0,
            "285": 450471936.0,
            "290": 450471936.0,
            "295": 450471936.0,
            "300": 450471936.0,
            "305": 450471936.0,
            "310": 450471936.0,
            "315": 450471936.0,
            "320": 450471936.0,
            "325": 450471936.0,
            "330": 450471936.0,
            "335": 450471936.0,
            "340": 450471936.0,
            "345": 450471936.0,
            "350": 450471936.0,
            "355": 450471936.0,
            "360": 450471936.0,
            "365": 450471936.0,
            "370": 450471936.0,
            "375": 450471936.0,
            "380": 450471936.0,
            "385": 450471936.0,
            "390": 450471936.0,
            "395": 450471936.0,
            "400": 450471936.0,
            "405": 450471936.0,
            "410": 450471936.0,
            "415": 450471936.0,
            "420": 450471936.0,
            "425": 450471936.0,
            "430": 450471936.0,
            "435": 450471936.0,
            "440": 450471936.0,
            "445": 450471936.0,
            "450": 450471936.0,
            "455": 450471936.0,
            "460": 450471936.0,
            "465": 450471936.0,
            "470": 450471936.0,
            "475": 450471936.0,
            "480": 450471936.0,
            "485": 450471936.0,
            "490": 450471936.0,
            "495": 450471936.0,
            "500": 450471936.0,
            "505": 450471936.0,
            "510": 450471936.0,
            "515": 450471936.0,
            "520": 450471936.0,
            "525": 450471936.0,
            "530": 450471936.0,
            "535": 450471936.0,
            "540": 450471936.0,
            "545": 450471936.0,
            "550": 450471936.0,
            "555": 450471936.0,
            "560": 450471936.0,
            "565": 450471936.0,
            "570": 450471936.0,
            "575": 450471936.0,
            "580": 450471936.0,
            "585": 450471936.0,
            "590": 450471936.0,
            "595": 450471936.0,
            "600": 450471936.0,
            "605": 450471936.0,
            "610": 450471936.0,
            "615": 450471936.0,
            "620": 450471936.0,
            "625": 450471936.0,
            "630": 450471936.0,
            "635": 450471936.0,
            "640": 450471936.0,
            "645": 450471936.0,
            "650": 450471936.0,
            "655": 450471936.0,
            "660": 450471936.0,
            "665": 450471936.0,
            "670": 450471936.0,
            "675": 450471936.0,
            "680": 450471936.0,
            "685": 450471936.0,
            "690": 450471936.0,
            "695": 450471936.0,
            "700": 450471936.0,
            "705": 450471936.0,
            "710": 450471936.0,
            "715": 450471936.0,
            "720": 450471936.0,
            "725": 450471936.0,
            "730": 450471936.0,
            "735": 450471936.0,
            "740": 450471936.0,
            "745": 450471936.0,
            "750": 450471936.0,
            "755": 450471936.0,
            "760": 450471936.0,
            "765": 450471936.0,
            "770": 450471936.0,
            "775": 450471936.0,
            "780": 450471936.0,
            "785": 450471936.0,
            "790": 450471936.0,
            "795": 450471936.0,
            "800": 450471936.0,
            "805": 450471936.0,
            "810": 450471936.0,
            "815": 450471936.0,
            "820": 450471936.0,
            "825": 450471936.0,
            "830": 450471936.0,
            "835": 450471936.0,
            "840": 450471936.0,
            "845": 450471936.0,
            "850": 450471936.0,
            "855": 450471936.0,
            "860": 450471936.0,
            "865": 450471936.0,
            "870": 450471936.0,
            "875": 450471936.0,
            "880": 450471936.0,
            "885": 450471936.0,
            "890": 450471936.0,
            "895": 450471936.0,
            "900": 450471936.0,
            "905": 450471936.0,
            "910": 450471936.0,
            "915": 450471936.0,
            "920": 450471936.0,
            "925": 450471936.0,
            "930": 450471936.0,
            "935": 450471936.0,
            "940": 450471936.0,
            "945": 450471936.0,
            "950": 450471936.0,
            "955": 450471936.0,
            "960": 450471936.0,
            "965": 450471936.0,
            "970": 450471936.0,
            "975": 450471936.0,
            "980": 450471936.0,
            "985": 450471936.0,
            "990": 450471936.0,
            "995": 450471936.0,
            "1000": 450471936.0,
            "1005": 450471936.0,
            "1010": 450471936.0,
            "1015": 450471936.0,
            "1020": 450471936.0,
            "1025": 450471936.0,
            "1030": 450471936.0,
            "1035": 450471936.0,
            "1040": 450471936.0,
            "1045": 450471936.0,
            "1050": 450471936.0,
            "1055": 450471936.0,
            "1060": 450471936.0,
            "1065": 450471936.0,
            "1070": 450471936.0,
            "1075": 450471936.0,
            "1080": 450471936.0,
            "1085": 450471936.0,
            "1090": 450471936.0,
            "1095": 450471936.0,
            "1100": 450471936.0,
            "1105": 450471936.0,
            "1110": 450471936.0,
            "1115": 450471936.0,
            "1120": 450471936.0,
            "1125": 450471936.0,
            "1130": 450471936.0,
            "1135": 450471936.0,
            "1140": 450471936.0,
            "1145": 450471936.0,
            "1150": 450471936.0,
            "1155": 450471936.0,
            "1160": 450471936.0,
            "1165": 450471936.0,
            "1170": 450471936.0,
            "1175": 450471936.0,
            "1180": 450471936.0,
            "1185": 450471936.0,
            "1190": 450471936.0,
            "1195": 450471936.0,
            "1200": 450471936.0,
            "1205": 450471936.0,
            "1210": 450471936.0,
            "1215": 450471936.0,
            "1220": 450471936.0,
            "1225": 450471936.0,
            "1230": 450471936.0,
            "1235": 450471936.0,
            "1240": 450471936.0,
            "1245": 450471936.0,
            "1250": 450471936.0,
            "1255": 450471936.0,
            "1260": 450471936.0,
            "1265": 450471936.0,
            "1270": 450471936.0,
            "1275": 450471936.0,
            "1280": 450471936.0,
            "1285": 450471936.0,
            "1290": 450471936.0,
            "1295": 450471936.0,
            "1300": 450471936.0,
            "1305": 450471936.0,
            "1310": 450471936.0,
            "1315": 450471936.0,
            "1320": 450471936.0,
            "1325": 450471936.0,
            "1330": 450471936.0,
            "1335": 450471936.0,
            "1340": 450471936.0,
            "1345": 450471936.0,
            "1350": 450471936.0,
            "1355": 450471936.0,
            "1360": 450471936.0,
            "1365": 450471936.0,
            "1370": 450471936.0,
            "1375": 450471936.0,
            "1380": 450471936.0,
            "1385": 450471936.0,
            "1390": 450471936.0,
            "1395": 450471936.0,
            "1400": 450471936.0,
            "1405": 450471936.0,
            "1410": 450471936.0,
            "1415": 450471936.0,
            "1420": 450471936.0,
            "1425": 450471936.0,
            "1430": 450471936.0,
            "1435": 450471936.0,
            "1440": 450471936.0,
            "1445": 450471936.0,
            "1450": 450471936.0,
            "1455": 450471936.0,
            "1460": 450471936.0,
            "1465": 450471936.0,
            "1470": 450471936.0,
            "1475": 450471936.0,
            "1480": 450471936.0,
            "1485": 450471936.0,
            "1490": 450471936.0,
            "1495": 450471936.0,
            "1500": 450471936.0,
            "1505": 450471936.0,
            "1510": 450471936.0,
            "1515": 450471936.0,
            "1520": 450471936.0,
            "1525": 450471936.0,
            "1530": 450471936.0,
            "1535": 450471936.0,
            "1540": 450471936.0,
            "1545": 450471936.0,
            "1550": 450471936.0,
            "1555": 450471936.0,
            "1560": 450471936.0,
            "1565": 450471936.0,
            "1570": 450471936.0,
            "1575": 450471936.0,
            "1580": 450471936.0,
            "1585": 450471936.0,
            "1590": 450471936.0,
            "1595": 450471936.0,
            "1600": 450471936.0,
            "1605": 450471936.0,
            "1610": 450471936.0,
            "1615": 450471936.0,
            "1620": 450471936.0,
            "1625": 450471936.0,
            "1630": 450471936.0,
            "1635": 450471936.0,
            "1640": 450471936.0,
            "1645": 450471936.0,
            "1650": 450471936.0,
            "1655": 450471936.0,
            "1660": 450471936.0,
            "1665": 450471936.0,
            "1670": 450471936.0,
            "1675": 450471936.0,
            "1680": 450471936.0,
            "1685": 450471936.0,
            "1690": 450471936.0,
            "1695": 450471936.0,
            "1700": 450471936.0,
            "1705": 450471936.0,
            "1710": 450471936.0,
            "1715": 450471936.0,
            "1720": 450471936.0,
            "1725": 450471936.0,
            "1730": 450471936.0,
            "1735": 450471936.0,
            "1740": 450471936.0,
            "1745": 450471936.0,
            "1750": 450471936.0,
            "1755": 450471936.0,
            "1760": 450471936.0,
            "1765": 450471936.0,
            "1770": 450471936.0,
            "1775": 450471936.0,
            "1780": 450471936.0,
            "1785": 450471936.0,
            "1790": 450471936.0,
            "1795": 450471936.0,
            "1800": 450471936.0,
            "1805": 450471936.0,
            "1810": 450471936.0,
            "1815": 450471936.0,
            "1820": 450471936.0,
            "1825": 450471936.0,
            "1830": 450471936.0,
            "1835": 450471936.0,
            "1840": 450471936.0,
            "1845": 450471936.0,
            "1850": 450471936.0,
            "1855": 450471936.0,
            "1860": 450471936.0,
            "1865": 450471936.0,
            "1870": 450471936.0,
            "1875": 450471936.0,
            "1880": 450471936.0,
            "1885": 450471936.0,
            "1890": 450471936.0,
            "1895": 450471936.0,
            "1900": 450471936.0,
            "1905": 450471936.0,
            "1910": 450471936.0,
            "1915": 450471936.0,
            "1920": 450471936.0,
            "1925": 450471936.0,
            "1930": 450471936.0,
            "1935": 450471936.0,
            "1940": 450471936.0,
            "1945": 450471936.0,
            "1950": 450471936.0,
            "1955": 450471936.0,
            "1960": 450471936.0,
            "1965": 450471936.0,
            "1970": 450471936.0,
            "1975": 450471936.0,
            "1980": 450471936.0,
            "1985": 450471936.0,
            "1990": 450471936.0,
            "1995": 450471936.0,
            "2000": 450471936.0
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 2000,
        "step_interval": 5,
        "values": {
            "1": "nan",
            "5": "nan",
            "10": "nan",
            "15": "nan",
            "20": 11992.0,
            "25": 12791.0,
            "30": 17318.0,
            "35": 13313.0,
            "40": 13889.0,
            "45": 15256.0,
            "50": 11370.0,
            "55": 14792.0,
            "60": 13572.0,
            "65": 14618.0,
            "70": 10636.0,
            "75": 12479.0,
            "80": 11546.0,
            "85": 17330.0,
            "90": 10222.0,
            "95": 12866.0,
            "100": 10240.0,
            "105": 12091.0,
            "110": 15229.0,
            "115": 13316.0,
            "120": 15329.0,
            "125": 10543.0,
            "130": 13341.0,
            "135": 11373.0,
            "140": 12167.0,
            "145": 16571.0,
            "150": 13896.0,
            "155": 16085.0,
            "160": 12143.0,
            "165": 16313.0,
            "170": 10658.0,
            "175": 13633.0,
            "180": 10238.0,
            "185": 12445.0,
            "190": 11320.0,
            "195": 15702.0,
            "200": 14683.0,
            "205": 15921.0,
            "210": 13240.0,
            "215": 13450.0,
            "220": 18456.0,
            "225": 12420.0,
            "230": 13176.0,
            "235": 13557.0,
            "240": 14613.0,
            "245": 13040.0,
            "250": 14162.0,
            "255": 14512.0,
            "260": 13823.0,
            "265": 18504.0,
            "270": 15208.0,
            "275": 13893.0,
            "280": 11858.0,
            "285": 14474.0,
            "290": 15059.0,
            "295": 15372.0,
            "300": 12492.0,
            "305": 13020.0,
            "310": 16763.0,
            "315": 16313.0,
            "320": 14840.0,
            "325": 14424.0,
            "330": 14796.0,
            "335": 13147.0,
            "340": 13996.0,
            "345": 16086.0,
            "350": 16038.0,
            "355": 11910.0,
            "360": 13482.0,
            "365": 15937.0,
            "370": 15123.0,
            "375": 11569.0,
            "380": 18286.0,
            "385": 15464.0,
            "390": 13894.0,
            "395": 14113.0,
            "400": 16376.0,
            "405": 17310.0,
            "410": 13934.0,
            "415": 19325.0,
            "420": 14179.0,
            "425": 20879.0,
            "430": 12685.0,
            "435": 14301.0,
            "440": 16235.0,
            "445": 17108.0,
            "450": 13875.0,
            "455": 17519.0,
            "460": 15208.0,
            "465": 15818.0,
            "470": 14442.0,
            "475": 16949.0,
            "480": 15538.0,
            "485": 16196.0,
            "490": 15341.0,
            "495": 16051.0,
            "500": 13419.0,
            "505": 15552.0,
            "510": 16935.0,
            "515": 16824.0,
            "520": 12877.0,
            "525": 14026.0,
            "530": 13620.0,
            "535": 13850.0,
            "540": 14720.0,
            "545": 12288.0,
            "550": 15016.0,
            "555": 16905.0,
            "560": 15507.0,
            "565": 17081.0,
            "570": 16282.0,
            "575": 15907.0,
            "580": 14680.0,
            "585": 17563.0,
            "590": 14132.0,
            "595": 17477.0,
            "600": 17725.0,
            "605": 14044.0,
            "610": 15590.0,
            "615": 16606.0,
            "620": 17061.0,
            "625": 14453.0,
            "630": 17025.0,
            "635": 17716.0,
            "640": 15419.0,
            "645": 13890.0,
            "650": 16352.0,
            "655": 15647.0,
            "660": 15850.0,
            "665": 15369.0,
            "670": 16123.0,
            "675": 17523.0,
            "680": 14561.0,
            "685": 13141.0,
            "690": 14711.0,
            "695": 15253.0,
            "700": 14874.0,
            "705": 15332.0,
            "710": 14777.0,
            "715": 16931.0,
            "720": 15833.0,
            "725": 19244.0,
            "730": 14076.0,
            "735": 16320.0,
            "740": 16859.0,
            "745": 13481.0,
            "750": 14972.0,
            "755": 15488.0,
            "760": 15341.0,
            "765": 16116.0,
            "770": 15517.0,
            "775": 14491.0,
            "780": 15819.0,
            "785": 14058.0,
            "790": 17767.0,
            "795": 13865.0,
            "800": 15919.0,
            "805": 15144.0,
            "810": 16693.0,
            "815": 18203.0,
            "820": 16243.0,
            "825": 13355.0,
            "830": 17683.0,
            "835": 14729.0,
            "840": 18498.0,
            "845": 16504.0,
            "850": 16146.0,
            "855": 15142.0,
            "860": 15373.0,
            "865": 15746.0,
            "870": 16742.0,
            "875": 15626.0,
            "880": 13812.0,
            "885": 14653.0,
            "890": 16362.0,
            "895": 15496.0,
            "900": 16213.0,
            "905": 16081.0,
            "910": 17628.0,
            "915": 15995.0,
            "920": 15730.0,
            "925": 15026.0,
            "930": 15889.0,
            "935": 15551.0,
            "940": 17077.0,
            "945": 15037.0,
            "950": 17784.0,
            "955": 16426.0,
            "960": 18232.0,
            "965": 13735.0,
            "970": 12534.0,
            "975": 14917.0,
            "980": 15858.0,
            "985": 16929.0,
            "990": 16031.0,
            "995": 14929.0,
            "1000": 18652.0,
            "1005": 14100.0,
            "1010": 14165.0,
            "1015": 16922.0,
            "1020": 15414.0,
            "1025": 17850.0,
            "1030": 15709.0,
            "1035": 14603.0,
            "1040": 14928.0,
            "1045": 18600.0,
            "1050": 15802.0,
            "1055": 19174.0,
            "1060": 15022.0,
            "1065": 14666.0,
            "1070": 15402.0,
            "1075": 14816.0,
            "1080": 15724.0,
            "1085": 14881.0,
            "1090": 18898.0,
            "1095": 16011.0,
            "1100": 16835.0,
            "1105": 18298.0,
            "1110": 13161.0,
            "1115": 19553.0,
            "1120": 15198.0,
            "1125": 15939.0,
            "1130": 16832.0,
            "1135": 16160.0,
            "1140": 17129.0,
            "1145": 18492.0,
            "1150": 13452.0,
            "1155": 15636.0,
            "1160": 15013.0,
            "1165": 17731.0,
            "1170": 21903.0,
            "1175": 16121.0,
            "1180": 15737.0,
            "1185": 19161.0,
            "1190": 18360.0,
            "1195": 15405.0,
            "1200": 17015.0,
            "1205": 12343.0,
            "1210": 14123.0,
            "1215": 15810.0,
            "1220": 13957.0,
            "1225": 14139.0,
            "1230": 17362.0,
            "1235": 15869.0,
            "1240": 15904.0,
            "1245": 18478.0,
            "1250": 16019.0,
            "1255": 14828.0,
            "1260": 14875.0,
            "1265": 14493.0,
            "1270": 14007.0,
            "1275": 13660.0,
            "1280": 14056.0,
            "1285": 17708.0,
            "1290": 15145.0,
            "1295": 18088.0,
            "1300": 17203.0,
            "1305": 16560.0,
            "1310": 15669.0,
            "1315": 17341.0,
            "1320": 16307.0,
            "1325": 17612.0,
            "1330": 13539.0,
            "1335": 13802.0,
            "1340": 16415.0,
            "1345": 17711.0,
            "1350": 17117.0,
            "1355": 14693.0,
            "1360": 17885.0,
            "1365": 17267.0,
            "1370": 16646.0,
            "1375": 16270.0,
            "1380": 17787.0,
            "1385": 20402.0,
            "1390": 22226.0,
            "1395": 16008.0,
            "1400": 13993.0,
            "1405": 16304.0,
            "1410": 16616.0,
            "1415": 14189.0,
            "1420": 20514.0,
            "1425": 19277.0,
            "1430": 20527.0,
            "1435": 20429.0,
            "1440": 15174.0,
            "1445": 18024.0,
            "1450": 15190.0,
            "1455": 15578.0,
            "1460": 15951.0,
            "1465": 15106.0,
            "1470": 17513.0,
            "1475": 15760.0,
            "1480": 16046.0,
            "1485": 19743.0,
            "1490": 14777.0,
            "1495": 17049.0,
            "1500": 14367.0,
            "1505": 15647.0,
            "1510": 21215.0,
            "1515": 16621.0,
            "1520": 15834.0,
            "1525": 16291.0,
            "1530": 17942.0,
            "1535": 14546.0,
            "1540": 15021.0,
            "1545": 16479.0,
            "1550": 13428.0,
            "1555": 17283.0,
            "1560": 14157.0,
            "1565": 22260.0,
            "1570": 17688.0,
            "1575": 14820.0,
            "1580": 15551.0,
            "1585": 17850.0,
            "1590": 14709.0,
            "1595": 13862.0,
            "1600": 18194.0,
            "1605": 14203.0,
            "1610": 15675.0,
            "1615": 19634.0,
            "1620": 18751.0,
            "1625": 15551.0,
            "1630": 16515.0,
            "1635": 15822.0,
            "1640": 15486.0,
            "1645": 19133.0,
            "1650": 15387.0,
            "1655": 15879.0,
            "1660": 17098.0,
            "1665": 20649.0,
            "1670": 15996.0,
            "1675": 17422.0,
            "1680": 16103.0,
            "1685": 15754.0,
            "1690": 15361.0,
            "1695": 14877.0,
            "1700": 16444.0,
            "1705": 15040.0,
            "1710": 22005.0,
            "1715": 16108.0,
            "1720": 17863.0,
            "1725": 17126.0,
            "1730": 15137.0,
            "1735": 16200.0,
            "1740": 16536.0,
            "1745": 17812.0,
            "1750": 12662.0,
            "1755": 17016.0,
            "1760": 17337.0,
            "1765": 16694.0,
            "1770": 15580.0,
            "1775": 20158.0,
            "1780": 15690.0,
            "1785": 17227.0,
            "1790": 16492.0,
            "1795": 16192.0,
            "1800": 15733.0,
            "1805": 15477.0,
            "1810": 15899.0,
            "1815": 19125.0,
            "1820": 16134.0,
            "1825": 14656.0,
            "1830": 17925.0,
            "1835": 15461.0,
            "1840": 15620.0,
            "1845": 17909.0,
            "1850": 16805.0,
            "1855": 16469.0,
            "1860": 16604.0,
            "1865": 18986.0,
            "1870": 14919.0,
            "1875": 16415.0,
            "1880": 18604.0,
            "1885": 15624.0,
            "1890": 18572.0,
            "1895": 16082.0,
            "1900": 13936.0,
            "1905": 15561.0,
            "1910": 18143.0,
            "1915": 15272.0,
            "1920": 14559.0,
            "1925": 16145.0,
            "1930": 14397.0,
            "1935": 18377.0,
            "1940": 16762.0,
            "1945": 16837.0,
            "1950": 18209.0,
            "1955": 17811.0,
            "1960": 18479.0,
            "1965": 16120.0,
            "1970": 18199.0,
            "1975": 16385.0,
            "1980": 15824.0,
            "1985": 18034.0,
            "1990": 16429.0,
            "1995": 15744.0,
            "2000": 16481.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 2000,
        "step_interval": 5,
        "values": {
            "1": "nan",
            "5": "nan",
            "10": "nan",
            "15": "nan",
            "20": "nan",
            "25": "nan",
            "30": "nan",
            "35": "nan",
            "40": "nan",
            "45": "nan",
            "50": "nan",
            "55": "nan",
            "60": "nan",
            "65": "nan",
            "70": "nan",
            "75": "nan",
            "80": "nan",
            "85": "nan",
            "90": "nan",
            "95": "nan",
            "100": 0.7866,
            "105": "nan",
            "110": "nan",
            "115": "nan",
            "120": "nan",
            "125": "nan",
            "130": "nan",
            "135": "nan",
            "140": "nan",
            "145": "nan",
            "150": "nan",
            "155": "nan",
            "160": "nan",
            "165": "nan",
            "170": "nan",
            "175": "nan",
            "180": "nan",
            "185": "nan",
            "190": "nan",
            "195": "nan",
            "200": 0.46824,
            "205": "nan",
            "210": "nan",
            "215": "nan",
            "220": "nan",
            "225": "nan",
            "230": "nan",
            "235": "nan",
            "240": "nan",
            "245": "nan",
            "250": "nan",
            "255": "nan",
            "260": "nan",
            "265": "nan",
            "270": "nan",
            "275": "nan",
            "280": "nan",
            "285": "nan",
            "290": "nan",
            "295": "nan",
            "300": 0.39004,
            "305": "nan",
            "310": "nan",
            "315": "nan",
            "320": "nan",
            "325": "nan",
            "330": "nan",
            "335": "nan",
            "340": "nan",
            "345": "nan",
            "350": "nan",
            "355": "nan",
            "360": "nan",
            "365": "nan",
            "370": "nan",
            "375": "nan",
            "380": "nan",
            "385": "nan",
            "390": "nan",
            "395": "nan",
            "400": 0.38578,
            "405": "nan",
            "410": "nan",
            "415": "nan",
            "420": "nan",
            "425": "nan",
            "430": "nan",
            "435": "nan",
            "440": "nan",
            "445": "nan",
            "450": "nan",
            "455": "nan",
            "460": "nan",
            "465": "nan",
            "470": "nan",
            "475": "nan",
            "480": "nan",
            "485": "nan",
            "490": "nan",
            "495": "nan",
            "500": 0.38197,
            "505": "nan",
            "510": "nan",
            "515": "nan",
            "520": "nan",
            "525": "nan",
            "530": "nan",
            "535": "nan",
            "540": "nan",
            "545": "nan",
            "550": "nan",
            "555": "nan",
            "560": "nan",
            "565": "nan",
            "570": "nan",
            "575": "nan",
            "580": "nan",
            "585": "nan",
            "590": "nan",
            "595": "nan",
            "600": 0.40009,
            "605": "nan",
            "610": "nan",
            "615": "nan",
            "620": "nan",
            "625": "nan",
            "630": "nan",
            "635": "nan",
            "640": "nan",
            "645": "nan",
            "650": "nan",
            "655": "nan",
            "660": "nan",
            "665": "nan",
            "670": "nan",
            "675": "nan",
            "680": "nan",
            "685": "nan",
            "690": "nan",
            "695": "nan",
            "700": 0.4229,
            "705": "nan",
            "710": "nan",
            "715": "nan",
            "720": "nan",
            "725": "nan",
            "730": "nan",
            "735": "nan",
            "740": "nan",
            "745": "nan",
            "750": "nan",
            "755": "nan",
            "760": "nan",
            "765": "nan",
            "770": "nan",
            "775": "nan",
            "780": "nan",
            "785": "nan",
            "790": "nan",
            "795": "nan",
            "800": 0.41403,
            "805": "nan",
            "810": "nan",
            "815": "nan",
            "820": "nan",
            "825": "nan",
            "830": "nan",
            "835": "nan",
            "840": "nan",
            "845": "nan",
            "850": "nan",
            "855": "nan",
            "860": "nan",
            "865": "nan",
            "870": "nan",
            "875": "nan",
            "880": "nan",
            "885": "nan",
            "890": "nan",
            "895": "nan",
            "900": 0.43862,
            "905": "nan",
            "910": "nan",
            "915": "nan",
            "920": "nan",
            "925": "nan",
            "930": "nan",
            "935": "nan",
            "940": "nan",
            "945": "nan",
            "950": "nan",
            "955": "nan",
            "960": "nan",
            "965": "nan",
            "970": "nan",
            "975": "nan",
            "980": "nan",
            "985": "nan",
            "990": "nan",
            "995": "nan",
            "1000": 0.40449,
            "1005": "nan",
            "1010": "nan",
            "1015": "nan",
            "1020": "nan",
            "1025": "nan",
            "1030": "nan",
            "1035": "nan",
            "1040": "nan",
            "1045": "nan",
            "1050": "nan",
            "1055": "nan",
            "1060": "nan",
            "1065": "nan",
            "1070": "nan",
            "1075": "nan",
            "1080": "nan",
            "1085": "nan",
            "1090": "nan",
            "1095": "nan",
            "1100": 0.37487,
            "1105": "nan",
            "1110": "nan",
            "1115": "nan",
            "1120": "nan",
            "1125": "nan",
            "1130": "nan",
            "1135": "nan",
            "1140": "nan",
            "1145": "nan",
            "1150": "nan",
            "1155": "nan",
            "1160": "nan",
            "1165": "nan",
            "1170": "nan",
            "1175": "nan",
            "1180": "nan",
            "1185": "nan",
            "1190": "nan",
            "1195": "nan",
            "1200": 0.38459,
            "1205": "nan",
            "1210": "nan",
            "1215": "nan",
            "1220": "nan",
            "1225": "nan",
            "1230": "nan",
            "1235": "nan",
            "1240": "nan",
            "1245": "nan",
            "1250": "nan",
            "1255": "nan",
            "1260": "nan",
            "1265": "nan",
            "1270": "nan",
            "1275": "nan",
            "1280": "nan",
            "1285": "nan",
            "1290": "nan",
            "1295": "nan",
            "1300": 0.40402,
            "1305": "nan",
            "1310": "nan",
            "1315": "nan",
            "1320": "nan",
            "1325": "nan",
            "1330": "nan",
            "1335": "nan",
            "1340": "nan",
            "1345": "nan",
            "1350": "nan",
            "1355": "nan",
            "1360": "nan",
            "1365": "nan",
            "1370": "nan",
            "1375": "nan",
            "1380": "nan",
            "1385": "nan",
            "1390": "nan",
            "1395": "nan",
            "1400": 0.37795,
            "1405": "nan",
            "1410": "nan",
            "1415": "nan",
            "1420": "nan",
            "1425": "nan",
            "1430": "nan",
            "1435": "nan",
            "1440": "nan",
            "1445": "nan",
            "1450": "nan",
            "1455": "nan",
            "1460": "nan",
            "1465": "nan",
            "1470": "nan",
            "1475": "nan",
            "1480": "nan",
            "1485": "nan",
            "1490": "nan",
            "1495": "nan",
            "1500": 0.38043,
            "1505": "nan",
            "1510": "nan",
            "1515": "nan",
            "1520": "nan",
            "1525": "nan",
            "1530": "nan",
            "1535": "nan",
            "1540": "nan",
            "1545": "nan",
            "1550": "nan",
            "1555": "nan",
            "1560": "nan",
            "1565": "nan",
            "1570": "nan",
            "1575": "nan",
            "1580": "nan",
            "1585": "nan",
            "1590": "nan",
            "1595": "nan",
            "1600": 0.38498,
            "1605": "nan",
            "1610": "nan",
            "1615": "nan",
            "1620": "nan",
            "1625": "nan",
            "1630": "nan",
            "1635": "nan",
            "1640": "nan",
            "1645": "nan",
            "1650": "nan",
            "1655": "nan",
            "1660": "nan",
            "1665": "nan",
            "1670": "nan",
            "1675": "nan",
            "1680": "nan",
            "1685": "nan",
            "1690": "nan",
            "1695": "nan",
            "1700": 0.38993,
            "1705": "nan",
            "1710": "nan",
            "1715": "nan",
            "1720": "nan",
            "1725": "nan",
            "1730": "nan",
            "1735": "nan",
            "1740": "nan",
            "1745": "nan",
            "1750": "nan",
            "1755": "nan",
            "1760": "nan",
            "1765": "nan",
            "1770": "nan",
            "1775": "nan",
            "1780": "nan",
            "1785": "nan",
            "1790": "nan",
            "1795": "nan",
            "1800": 0.37943,
            "1805": "nan",
            "1810": "nan",
            "1815": "nan",
            "1820": "nan",
            "1825": "nan",
            "1830": "nan",
            "1835": "nan",
            "1840": "nan",
            "1845": "nan",
            "1850": "nan",
            "1855": "nan",
            "1860": "nan",
            "1865": "nan",
            "1870": "nan",
            "1875": "nan",
            "1880": "nan",
            "1885": "nan",
            "1890": "nan",
            "1895": "nan",
            "1900": 0.38578,
            "1905": "nan",
            "1910": "nan",
            "1915": "nan",
            "1920": "nan",
            "1925": "nan",
            "1930": "nan",
            "1935": "nan",
            "1940": "nan",
            "1945": "nan",
            "1950": "nan",
            "1955": "nan",
            "1960": "nan",
            "1965": "nan",
            "1970": "nan",
            "1975": "nan",
            "1980": "nan",
            "1985": "nan",
            "1990": "nan",
            "1995": "nan",
            "2000": 0.44172
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_release_sm/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 2000,
        "step_interval": 5,
        "values": {
            "1": 10.52518,
            "5": 10.54711,
            "10": 10.57625,
            "15": 10.53228,
            "20": 10.1977,
            "25": 9.60859,
            "30": 9.42961,
            "35": 9.13438,
            "40": 9.11757,
            "45": 8.9598,
            "50": 8.69439,
            "55": 8.74614,
            "60": 8.5262,
            "65": 8.48894,
            "70": 8.18786,
            "75": 8.30128,
            "80": 7.98015,
            "85": 7.75995,
            "90": 7.67797,
            "95": 7.58098,
            "100": 7.53685,
            "105": 7.37115,
            "110": 7.47741,
            "115": 7.16871,
            "120": 6.95404,
            "125": 7.22744,
            "130": 6.98847,
            "135": 6.78308,
            "140": 7.19909,
            "145": 6.9713,
            "150": 7.073,
            "155": 6.97305,
            "160": 7.00146,
            "165": 7.0568,
            "170": 6.9259,
            "175": 6.8686,
            "180": 6.70845,
            "185": 6.92037,
            "190": 7.10711,
            "195": 6.94684,
            "200": 6.84551,
            "205": 6.89554,
            "210": 6.74732,
            "215": 6.98353,
            "220": 6.66662,
            "225": 6.73845,
            "230": 6.68939,
            "235": 6.80476,
            "240": 6.57309,
            "245": 7.09103,
            "250": 6.60586,
            "255": 6.6257,
            "260": 6.71981,
            "265": 6.60514,
            "270": 6.58499,
            "275": 6.84108,
            "280": 6.51956,
            "285": 6.5881,
            "290": 6.77888,
            "295": 6.73633,
            "300": 6.45158,
            "305": 6.35295,
            "310": 6.95462,
            "315": 6.92967,
            "320": 6.85163,
            "325": 6.65274,
            "330": 6.63877,
            "335": 6.6328,
            "340": 6.58326,
            "345": 6.55319,
            "350": 6.39654,
            "355": 6.48845,
            "360": 6.55577,
            "365": 6.60764,
            "370": 6.50698,
            "375": 6.60894,
            "380": 6.43397,
            "385": 6.40337,
            "390": 6.45377,
            "395": 6.25318,
            "400": 6.64296,
            "405": 6.48078,
            "410": 6.36101,
            "415": 6.56149,
            "420": 6.58523,
            "425": 6.51312,
            "430": 6.53315,
            "435": 6.51013,
            "440": 6.53088,
            "445": 6.54469,
            "450": 6.58192,
            "455": 6.61907,
            "460": 6.7284,
            "465": 6.7023,
            "470": 6.4933,
            "475": 6.22155,
            "480": 6.44352,
            "485": 6.51276,
            "490": 6.16963,
            "495": 6.18146,
            "500": 6.58793,
            "505": 6.61849,
            "510": 6.51771,
            "515": 6.19808,
            "520": 6.38782,
            "525": 6.4449,
            "530": 6.38794,
            "535": 5.9946,
            "540": 6.43932,
            "545": 6.34319,
            "550": 6.42034,
            "555": 6.48463,
            "560": 6.41058,
            "565": 6.18975,
            "570": 6.26475,
            "575": 6.60776,
            "580": 6.40861,
            "585": 6.44091,
            "590": 6.17827,
            "595": 6.26133,
            "600": 6.48062,
            "605": 6.48987,
            "610": 6.30567,
            "615": 6.41793,
            "620": 6.50242,
            "625": 6.41944,
            "630": 6.42269,
            "635": 6.48993,
            "640": 6.3142,
            "645": 6.26425,
            "650": 6.24968,
            "655": 6.54216,
            "660": 6.32704,
            "665": 6.28362,
            "670": 6.34635,
            "675": 6.25007,
            "680": 6.36366,
            "685": 6.51787,
            "690": 6.52306,
            "695": 6.19164,
            "700": 6.02684,
            "705": 6.27937,
            "710": 6.15186,
            "715": 6.37122,
            "720": 6.24474,
            "725": 6.4324,
            "730": 6.54322,
            "735": 6.59039,
            "740": 6.56551,
            "745": 6.25838,
            "750": 6.26025,
            "755": 5.99142,
            "760": 6.56569,
            "765": 6.36209,
            "770": 6.41801,
            "775": 6.14492,
            "780": 6.22431,
            "785": 6.2837,
            "790": 6.49366,
            "795": 6.36524,
            "800": 6.31734,
            "805": 6.50546,
            "810": 6.18094,
            "815": 6.23859,
            "820": 6.44193,
            "825": 6.50405,
            "830": 6.39936,
            "835": 6.3749,
            "840": 6.32574,
            "845": 6.18599,
            "850": 6.37284,
            "855": 6.39409,
            "860": 6.25521,
            "865": 6.28276,
            "870": 6.36092,
            "875": 6.40236,
            "880": 6.28502,
            "885": 6.48567,
            "890": 6.18932,
            "895": 6.27216,
            "900": 6.26478,
            "905": 6.36788,
            "910": 6.06337,
            "915": 6.49909,
            "920": 6.33728,
            "925": 6.27291,
            "930": 6.20875,
            "935": 6.2218,
            "940": 6.36607,
            "945": 6.03875,
            "950": 6.16958,
            "955": 6.1498,
            "960": 6.23218,
            "965": 6.27167,
            "970": 6.27182,
            "975": 6.18992,
            "980": 6.0871,
            "985": 6.23515,
            "990": 6.15401,
            "995": 6.39715,
            "1000": 6.199,
            "1005": 6.41564,
            "1010": 6.10307,
            "1015": 6.18253,
            "1020": 6.0715,
            "1025": 6.34874,
            "1030": 6.15345,
            "1035": 5.91993,
            "1040": 6.00182,
            "1045": 6.32773,
            "1050": 6.44289,
            "1055": 6.09205,
            "1060": 6.11626,
            "1065": 5.94789,
            "1070": 6.18822,
            "1075": 5.96807,
            "1080": 6.14028,
            "1085": 6.00403,
            "1090": 6.07878,
            "1095": 6.39724,
            "1100": 6.07371,
            "1105": 6.27733,
            "1110": 6.17767,
            "1115": 6.05213,
            "1120": 6.03505,
            "1125": 6.14642,
            "1130": 5.91905,
            "1135": 6.33701,
            "1140": 6.4673,
            "1145": 6.24062,
            "1150": 6.11148,
            "1155": 6.00366,
            "1160": 6.07106,
            "1165": 6.12458,
            "1170": 6.31238,
            "1175": 6.06133,
            "1180": 6.23575,
            "1185": 6.28767,
            "1190": 6.32616,
            "1195": 6.20409,
            "1200": 6.1787,
            "1205": 6.27853,
            "1210": 6.124,
            "1215": 6.02269,
            "1220": 6.21049,
            "1225": 6.31462,
            "1230": 6.27525,
            "1235": 6.07179,
            "1240": 6.44848,
            "1245": 6.08257,
            "1250": 5.89585,
            "1255": 6.22473,
            "1260": 6.21804,
            "1265": 5.98499,
            "1270": 6.05378,
            "1275": 6.11328,
            "1280": 5.79212,
            "1285": 5.96076,
            "1290": 6.02109,
            "1295": 6.15922,
            "1300": 6.33887,
            "1305": 5.93778,
            "1310": 5.98927,
            "1315": 6.04956,
            "1320": 6.11263,
            "1325": 6.15042,
            "1330": 6.10207,
            "1335": 6.03588,
            "1340": 6.09098,
            "1345": 6.17077,
            "1350": 5.94377,
            "1355": 6.10134,
            "1360": 6.44012,
            "1365": 5.97453,
            "1370": 6.08617,
            "1375": 6.26098,
            "1380": 6.08712,
            "1385": 5.99329,
            "1390": 5.83651,
            "1395": 5.87328,
            "1400": 6.13648,
            "1405": 6.12933,
            "1410": 6.0632,
            "1415": 6.25026,
            "1420": 6.196,
            "1425": 6.18042,
            "1430": 6.01123,
            "1435": 6.51218,
            "1440": 6.34508,
            "1445": 6.3179,
            "1450": 6.0905,
            "1455": 5.80082,
            "1460": 6.19257,
            "1465": 6.16862,
            "1470": 6.26836,
            "1475": 6.25295,
            "1480": 6.30886,
            "1485": 6.05106,
            "1490": 6.25044,
            "1495": 6.25631,
            "1500": 6.17856,
            "1505": 6.24903,
            "1510": 6.38972,
            "1515": 6.04357,
            "1520": 5.83213,
            "1525": 5.90258,
            "1530": 5.87533,
            "1535": 6.49284,
            "1540": 6.40956,
            "1545": 6.36558,
            "1550": 6.13092,
            "1555": 6.06429,
            "1560": 6.15867,
            "1565": 6.04309,
            "1570": 5.99586,
            "1575": 6.10514,
            "1580": 6.22456,
            "1585": 6.04057,
            "1590": 6.25663,
            "1595": 6.16142,
            "1600": 6.07455,
            "1605": 6.05807,
            "1610": 6.14498,
            "1615": 6.12337,
            "1620": 6.03388,
            "1625": 6.07928,
            "1630": 6.09847,
            "1635": 6.32276,
            "1640": 6.14014,
            "1645": 6.19561,
            "1650": 6.29657,
            "1655": 6.36122,
            "1660": 6.224,
            "1665": 6.06213,
            "1670": 6.16801,
            "1675": 5.86985,
            "1680": 6.09215,
            "1685": 6.38653,
            "1690": 6.14996,
            "1695": 5.99868,
            "1700": 6.23211,
            "1705": 6.07235,
            "1710": 6.00732,
            "1715": 6.02437,
            "1720": 6.17613,
            "1725": 6.38947,
            "1730": 6.18961,
            "1735": 6.02104,
            "1740": 6.11735,
            "1745": 6.1043,
            "1750": 6.32512,
            "1755": 6.27834,
            "1760": 5.98423,
            "1765": 6.01155,
            "1770": 6.1114,
            "1775": 5.9844,
            "1780": 6.04965,
            "1785": 5.822,
            "1790": 5.96921,
            "1795": 6.31597,
            "1800": 6.16889,
            "1805": 5.81951,
            "1810": 6.48012,
            "1815": 6.15535,
            "1820": 6.46764,
            "1825": 6.1539,
            "1830": 6.08742,
            "1835": 5.96482,
            "1840": 5.88116,
            "1845": 6.30056,
            "1850": 6.25113,
            "1855": 6.11407,
            "1860": 6.04051,
            "1865": 6.0148,
            "1870": 6.34421,
            "1875": 6.09027,
            "1880": 5.9285,
            "1885": 6.28984,
            "1890": 6.10867,
            "1895": 5.95919,
            "1900": 6.07489,
            "1905": 6.31557,
            "1910": 6.30316,
            "1915": 6.31794,
            "1920": 6.18453,
            "1925": 5.99466,
            "1930": 6.09093,
            "1935": 5.9949,
            "1940": 6.13025,
            "1945": 6.08214,
            "1950": 6.02806,
            "1955": 6.03901,
            "1960": 6.00841,
            "1965": 6.00114,
            "1970": 6.05624,
            "1975": 6.24372,
            "1980": 6.3997,
            "1985": 6.11374,
            "1990": 5.97077,
            "1995": 5.92911,
            "2000": 6.07324
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 2000,
        "step_interval": 5,
        "values": {
            "1": 244161024.0,
            "5": 244161024.0,
            "10": 244161024.0,
            "15": 244161024.0,
            "20": 322521088.0,
            "25": 320948224.0,
            "30": 320948224.0,
            "35": 320948224.0,
            "40": 322521088.0,
            "45": 320948224.0,
            "50": 320948224.0,
            "55": 320948224.0,
            "60": 320948224.0,
            "65": 320948224.0,
            "70": 320948224.0,
            "75": 320948224.0,
            "80": 322521088.0,
            "85": 320948224.0,
            "90": 322521088.0,
            "95": 322521088.0,
            "100": 322521088.0,
            "105": 322521088.0,
            "110": 322521088.0,
            "115": 322521088.0,
            "120": 322521088.0,
            "125": 322521088.0,
            "130": 322521088.0,
            "135": 320948224.0,
            "140": 320948224.0,
            "145": 322521088.0,
            "150": 322521088.0,
            "155": 322521088.0,
            "160": 322521088.0,
            "165": 322521088.0,
            "170": 322521088.0,
            "175": 320948224.0,
            "180": 320948224.0,
            "185": 320948224.0,
            "190": 322521088.0,
            "195": 322521088.0,
            "200": 322521088.0,
            "205": 320948224.0,
            "210": 320948224.0,
            "215": 322521088.0,
            "220": 322521088.0,
            "225": 320948224.0,
            "230": 320948224.0,
            "235": 322521088.0,
            "240": 322521088.0,
            "245": 320948224.0,
            "250": 320948224.0,
            "255": 320948224.0,
            "260": 322521088.0,
            "265": 322521088.0,
            "270": 322521088.0,
            "275": 322521088.0,
            "280": 322521088.0,
            "285": 322521088.0,
            "290": 320948224.0,
            "295": 320948224.0,
            "300": 320948224.0,
            "305": 320948224.0,
            "310": 320948224.0,
            "315": 320948224.0,
            "320": 320948224.0,
            "325": 320948224.0,
            "330": 320948224.0,
            "335": 320948224.0,
            "340": 320948224.0,
            "345": 320948224.0,
            "350": 320948224.0,
            "355": 320948224.0,
            "360": 320948224.0,
            "365": 320948224.0,
            "370": 320948224.0,
            "375": 320948224.0,
            "380": 320948224.0,
            "385": 320948224.0,
            "390": 320948224.0,
            "395": 320948224.0,
            "400": 320948224.0,
            "405": 320948224.0,
            "410": 322521088.0,
            "415": 322521088.0,
            "420": 322521088.0,
            "425": 322521088.0,
            "430": 322521088.0,
            "435": 320948224.0,
            "440": 320948224.0,
            "445": 320948224.0,
            "450": 320948224.0,
            "455": 322521088.0,
            "460": 320948224.0,
            "465": 322521088.0,
            "470": 322521088.0,
            "475": 322521088.0,
            "480": 322521088.0,
            "485": 320948224.0,
            "490": 322521088.0,
            "495": 322521088.0,
            "500": 322521088.0,
            "505": 322521088.0,
            "510": 320948224.0,
            "515": 322521088.0,
            "520": 322521088.0,
            "525": 322521088.0,
            "530": 320948224.0,
            "535": 320948224.0,
            "540": 320948224.0,
            "545": 320948224.0,
            "550": 320948224.0,
            "555": 320948224.0,
            "560": 320948224.0,
            "565": 320948224.0,
            "570": 320948224.0,
            "575": 320948224.0,
            "580": 320948224.0,
            "585": 320948224.0,
            "590": 320948224.0,
            "595": 320948224.0,
            "600": 320948224.0,
            "605": 320948224.0,
            "610": 320948224.0,
            "615": 320948224.0,
            "620": 320948224.0,
            "625": 320948224.0,
            "630": 320948224.0,
            "635": 320948224.0,
            "640": 320948224.0,
            "645": 320948224.0,
            "650": 320948224.0,
            "655": 320948224.0,
            "660": 322521088.0,
            "665": 320948224.0,
            "670": 320948224.0,
            "675": 320948224.0,
            "680": 322521088.0,
            "685": 322521088.0,
            "690": 322521088.0,
            "695": 322521088.0,
            "700": 322521088.0,
            "705": 322521088.0,
            "710": 322521088.0,
            "715": 322521088.0,
            "720": 322521088.0,
            "725": 322521088.0,
            "730": 320948224.0,
            "735": 320948224.0,
            "740": 320948224.0,
            "745": 320948224.0,
            "750": 320948224.0,
            "755": 320948224.0,
            "760": 320948224.0,
            "765": 320948224.0,
            "770": 320948224.0,
            "775": 320948224.0,
            "780": 320948224.0,
            "785": 322521088.0,
            "790": 320948224.0,
            "795": 320948224.0,
            "800": 320948224.0,
            "805": 320948224.0,
            "810": 320948224.0,
            "815": 322521088.0,
            "820": 322521088.0,
            "825": 322521088.0,
            "830": 322521088.0,
            "835": 322521088.0,
            "840": 322521088.0,
            "845": 322521088.0,
            "850": 320948224.0,
            "855": 320948224.0,
            "860": 320948224.0,
            "865": 320948224.0,
            "870": 320948224.0,
            "875": 320948224.0,
            "880": 322521088.0,
            "885": 320948224.0,
            "890": 320948224.0,
            "895": 322521088.0,
            "900": 322521088.0,
            "905": 320948224.0,
            "910": 320948224.0,
            "915": 320948224.0,
            "920": 320948224.0,
            "925": 320948224.0,
            "930": 320948224.0,
            "935": 320948224.0,
            "940": 320948224.0,
            "945": 320948224.0,
            "950": 322521088.0,
            "955": 320948224.0,
            "960": 320948224.0,
            "965": 320948224.0,
            "970": 320948224.0,
            "975": 320948224.0,
            "980": 320948224.0,
            "985": 320948224.0,
            "990": 322521088.0,
            "995": 322521088.0,
            "1000": 322521088.0,
            "1005": 322521088.0,
            "1010": 322521088.0,
            "1015": 322521088.0,
            "1020": 322521088.0,
            "1025": 322521088.0,
            "1030": 322521088.0,
            "1035": 322521088.0,
            "1040": 322521088.0,
            "1045": 322521088.0,
            "1050": 322521088.0,
            "1055": 322521088.0,
            "1060": 320948224.0,
            "1065": 320948224.0,
            "1070": 320948224.0,
            "1075": 320948224.0,
            "1080": 320948224.0,
            "1085": 320948224.0,
            "1090": 320948224.0,
            "1095": 320948224.0,
            "1100": 320948224.0,
            "1105": 320948224.0,
            "1110": 320948224.0,
            "1115": 320948224.0,
            "1120": 320948224.0,
            "1125": 320948224.0,
            "1130": 320948224.0,
            "1135": 320948224.0,
            "1140": 320948224.0,
            "1145": 320948224.0,
            "1150": 320948224.0,
            "1155": 320948224.0,
            "1160": 320948224.0,
            "1165": 320948224.0,
            "1170": 320948224.0,
            "1175": 322521088.0,
            "1180": 322521088.0,
            "1185": 322521088.0,
            "1190": 322521088.0,
            "1195": 322521088.0,
            "1200": 322521088.0,
            "1205": 322521088.0,
            "1210": 322521088.0,
            "1215": 322521088.0,
            "1220": 322521088.0,
            "1225": 322521088.0,
            "1230": 322521088.0,
            "1235": 320948224.0,
            "1240": 322521088.0,
            "1245": 320948224.0,
            "1250": 322521088.0,
            "1255": 322521088.0,
            "1260": 320948224.0,
            "1265": 320948224.0,
            "1270": 320948224.0,
            "1275": 320948224.0,
            "1280": 322521088.0,
            "1285": 320948224.0,
            "1290": 320948224.0,
            "1295": 320948224.0,
            "1300": 322521088.0,
            "1305": 322521088.0,
            "1310": 322521088.0,
            "1315": 322521088.0,
            "1320": 322521088.0,
            "1325": 322521088.0,
            "1330": 322521088.0,
            "1335": 322521088.0,
            "1340": 322521088.0,
            "1345": 322521088.0,
            "1350": 322521088.0,
            "1355": 322521088.0,
            "1360": 322521088.0,
            "1365": 320948224.0,
            "1370": 320948224.0,
            "1375": 320948224.0,
            "1380": 320948224.0,
            "1385": 320948224.0,
            "1390": 320948224.0,
            "1395": 320948224.0,
            "1400": 320948224.0,
            "1405": 320948224.0,
            "1410": 322521088.0,
            "1415": 322521088.0,
            "1420": 320948224.0,
            "1425": 322521088.0,
            "1430": 322521088.0,
            "1435": 320948224.0,
            "1440": 322521088.0,
            "1445": 322521088.0,
            "1450": 322521088.0,
            "1455": 322521088.0,
            "1460": 322521088.0,
            "1465": 320948224.0,
            "1470": 320948224.0,
            "1475": 320948224.0,
            "1480": 322521088.0,
            "1485": 322521088.0,
            "1490": 322521088.0,
            "1495": 322521088.0,
            "1500": 322521088.0,
            "1505": 320948224.0,
            "1510": 320948224.0,
            "1515": 320948224.0,
            "1520": 320948224.0,
            "1525": 320948224.0,
            "1530": 320948224.0,
            "1535": 320948224.0,
            "1540": 320948224.0,
            "1545": 322521088.0,
            "1550": 320948224.0,
            "1555": 320948224.0,
            "1560": 320948224.0,
            "1565": 320948224.0,
            "1570": 320948224.0,
            "1575": 322521088.0,
            "1580": 322521088.0,
            "1585": 320948224.0,
            "1590": 320948224.0,
            "1595": 320948224.0,
            "1600": 322521088.0,
            "1605": 320948224.0,
            "1610": 320948224.0,
            "1615": 320948224.0,
            "1620": 320948224.0,
            "1625": 320948224.0,
            "1630": 320948224.0,
            "1635": 320948224.0,
            "1640": 320948224.0,
            "1645": 320948224.0,
            "1650": 320948224.0,
            "1655": 320948224.0,
            "1660": 322521088.0,
            "1665": 322521088.0,
            "1670": 322521088.0,
            "1675": 322521088.0,
            "1680": 322521088.0,
            "1685": 320948224.0,
            "1690": 322521088.0,
            "1695": 322521088.0,
            "1700": 322521088.0,
            "1705": 322521088.0,
            "1710": 322521088.0,
            "1715": 322521088.0,
            "1720": 322521088.0,
            "1725": 322521088.0,
            "1730": 322521088.0,
            "1735": 322521088.0,
            "1740": 322521088.0,
            "1745": 322521088.0,
            "1750": 322521088.0,
            "1755": 322521088.0,
            "1760": 322521088.0,
            "1765": 322521088.0,
            "1770": 322521088.0,
            "1775": 320948224.0,
            "1780": 322521088.0,
            "1785": 322521088.0,
            "1790": 320948224.0,
            "1795": 320948224.0,
            "1800": 320948224.0,
            "1805": 320948224.0,
            "1810": 320948224.0,
            "1815": 320948224.0,
            "1820": 322521088.0,
            "1825": 322521088.0,
            "1830": 322521088.0,
            "1835": 322521088.0,
            "1840": 320948224.0,
            "1845": 320948224.0,
            "1850": 322521088.0,
            "1855": 322521088.0,
            "1860": 320948224.0,
            "1865": 320948224.0,
            "1870": 320948224.0,
            "1875": 320948224.0,
            "1880": 320948224.0,
            "1885": 320948224.0,
            "1890": 322521088.0,
            "1895": 320948224.0,
            "1900": 320948224.0,
            "1905": 320948224.0,
            "1910": 322521088.0,
            "1915": 320948224.0,
            "1920": 322521088.0,
            "1925": 322521088.0,
            "1930": 320948224.0,
            "1935": 320948224.0,
            "1940": 322521088.0,
            "1945": 320948224.0,
            "1950": 320948224.0,
            "1955": 320948224.0,
            "1960": 322521088.0,
            "1965": 322521088.0,
            "1970": 322521088.0,
            "1975": 322521088.0,
            "1980": 322521088.0,
            "1985": 322521088.0,
            "1990": 322521088.0,
            "1995": 322521088.0,
            "2000": 322521088.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 2000,
        "step_interval": 5,
        "values": {
            "1": 420345856.0,
            "5": 420346368.0,
            "10": 420346368.0,
            "15": 420346368.0,
            "20": 501327360.0,
            "25": 501327872.0,
            "30": 501327872.0,
            "35": 501327872.0,
            "40": 501327872.0,
            "45": 501327872.0,
            "50": 501327872.0,
            "55": 501327872.0,
            "60": 501327872.0,
            "65": 501327872.0,
            "70": 501327872.0,
            "75": 501327872.0,
            "80": 501327872.0,
            "85": 501327872.0,
            "90": 501327872.0,
            "95": 501327872.0,
            "100": 501327872.0,
            "105": 501327872.0,
            "110": 501327872.0,
            "115": 501327872.0,
            "120": 501327872.0,
            "125": 501327872.0,
            "130": 501327872.0,
            "135": 501327872.0,
            "140": 501327872.0,
            "145": 501327872.0,
            "150": 501327872.0,
            "155": 501327872.0,
            "160": 501327872.0,
            "165": 501327872.0,
            "170": 501327872.0,
            "175": 501327872.0,
            "180": 501327872.0,
            "185": 501327872.0,
            "190": 501327872.0,
            "195": 501327872.0,
            "200": 501327872.0,
            "205": 501327872.0,
            "210": 501327872.0,
            "215": 501327872.0,
            "220": 501327872.0,
            "225": 501327872.0,
            "230": 501327872.0,
            "235": 501327872.0,
            "240": 501327872.0,
            "245": 501327872.0,
            "250": 501327872.0,
            "255": 501327872.0,
            "260": 501327872.0,
            "265": 501327872.0,
            "270": 501327872.0,
            "275": 501327872.0,
            "280": 501327872.0,
            "285": 501327872.0,
            "290": 501327872.0,
            "295": 501327872.0,
            "300": 501327872.0,
            "305": 501327872.0,
            "310": 501327872.0,
            "315": 501327872.0,
            "320": 501327872.0,
            "325": 501327872.0,
            "330": 501327872.0,
            "335": 501327872.0,
            "340": 501327872.0,
            "345": 501327872.0,
            "350": 501327872.0,
            "355": 501327872.0,
            "360": 501327872.0,
            "365": 501327872.0,
            "370": 501327872.0,
            "375": 501327872.0,
            "380": 501327872.0,
            "385": 501327872.0,
            "390": 501327872.0,
            "395": 501327872.0,
            "400": 501327872.0,
            "405": 501327872.0,
            "410": 501327872.0,
            "415": 501327872.0,
            "420": 501327872.0,
            "425": 501327872.0,
            "430": 501327872.0,
            "435": 501327872.0,
            "440": 501327872.0,
            "445": 501327872.0,
            "450": 501327872.0,
            "455": 501327872.0,
            "460": 501327872.0,
            "465": 501327872.0,
            "470": 501327872.0,
            "475": 501327872.0,
            "480": 501327872.0,
            "485": 501327872.0,
            "490": 501327872.0,
            "495": 501327872.0,
            "500": 501327872.0,
            "505": 501327872.0,
            "510": 501327872.0,
            "515": 501327872.0,
            "520": 501327872.0,
            "525": 501327872.0,
            "530": 501327872.0,
            "535": 501327872.0,
            "540": 501327872.0,
            "545": 501327872.0,
            "550": 501327872.0,
            "555": 501327872.0,
            "560": 501327872.0,
            "565": 501327872.0,
            "570": 501327872.0,
            "575": 501327872.0,
            "580": 501327872.0,
            "585": 501327872.0,
            "590": 501327872.0,
            "595": 501327872.0,
            "600": 501327872.0,
            "605": 501327872.0,
            "610": 501327872.0,
            "615": 501327872.0,
            "620": 501327872.0,
            "625": 501327872.0,
            "630": 501327872.0,
            "635": 501327872.0,
            "640": 501327872.0,
            "645": 501327872.0,
            "650": 501327872.0,
            "655": 501327872.0,
            "660": 501327872.0,
            "665": 501327872.0,
            "670": 501327872.0,
            "675": 501327872.0,
            "680": 501327872.0,
            "685": 501327872.0,
            "690": 501327872.0,
            "695": 501327872.0,
            "700": 501327872.0,
            "705": 501327872.0,
            "710": 501327872.0,
            "715": 501327872.0,
            "720": 501327872.0,
            "725": 501327872.0,
            "730": 501327872.0,
            "735": 501327872.0,
            "740": 501327872.0,
            "745": 501327872.0,
            "750": 501327872.0,
            "755": 501327872.0,
            "760": 501327872.0,
            "765": 501327872.0,
            "770": 501327872.0,
            "775": 501327872.0,
            "780": 501327872.0,
            "785": 501327872.0,
            "790": 501327872.0,
            "795": 501327872.0,
            "800": 501327872.0,
            "805": 501327872.0,
            "810": 501327872.0,
            "815": 501327872.0,
            "820": 501327872.0,
            "825": 501327872.0,
            "830": 501327872.0,
            "835": 501327872.0,
            "840": 501327872.0,
            "845": 501327872.0,
            "850": 501327872.0,
            "855": 501327872.0,
            "860": 501327872.0,
            "865": 501327872.0,
            "870": 501327872.0,
            "875": 501327872.0,
            "880": 501327872.0,
            "885": 501327872.0,
            "890": 501327872.0,
            "895": 501327872.0,
            "900": 501327872.0,
            "905": 501327872.0,
            "910": 501327872.0,
            "915": 501327872.0,
            "920": 501327872.0,
            "925": 501327872.0,
            "930": 501327872.0,
            "935": 501327872.0,
            "940": 501327872.0,
            "945": 501327872.0,
            "950": 501327872.0,
            "955": 501327872.0,
            "960": 501327872.0,
            "965": 501327872.0,
            "970": 501327872.0,
            "975": 501327872.0,
            "980": 501327872.0,
            "985": 501327872.0,
            "990": 501327872.0,
            "995": 501327872.0,
            "1000": 501327872.0,
            "1005": 501327872.0,
            "1010": 501327872.0,
            "1015": 501327872.0,
            "1020": 501327872.0,
            "1025": 501327872.0,
            "1030": 501327872.0,
            "1035": 501327872.0,
            "1040": 501327872.0,
            "1045": 501327872.0,
            "1050": 501327872.0,
            "1055": 501327872.0,
            "1060": 501327872.0,
            "1065": 501327872.0,
            "1070": 501327872.0,
            "1075": 501327872.0,
            "1080": 501327872.0,
            "1085": 501327872.0,
            "1090": 501327872.0,
            "1095": 501327872.0,
            "1100": 501327872.0,
            "1105": 501327872.0,
            "1110": 501327872.0,
            "1115": 501327872.0,
            "1120": 501327872.0,
            "1125": 501327872.0,
            "1130": 501327872.0,
            "1135": 501327872.0,
            "1140": 501327872.0,
            "1145": 501327872.0,
            "1150": 501327872.0,
            "1155": 501327872.0,
            "1160": 501327872.0,
            "1165": 501327872.0,
            "1170": 501327872.0,
            "1175": 501327872.0,
            "1180": 501327872.0,
            "1185": 501327872.0,
            "1190": 501327872.0,
            "1195": 501327872.0,
            "1200": 501327872.0,
            "1205": 501327872.0,
            "1210": 501327872.0,
            "1215": 501327872.0,
            "1220": 501327872.0,
            "1225": 501327872.0,
            "1230": 501327872.0,
            "1235": 501327872.0,
            "1240": 501327872.0,
            "1245": 501327872.0,
            "1250": 501327872.0,
            "1255": 501327872.0,
            "1260": 501327872.0,
            "1265": 501327872.0,
            "1270": 501327872.0,
            "1275": 501327872.0,
            "1280": 501327872.0,
            "1285": 501327872.0,
            "1290": 501327872.0,
            "1295": 501327872.0,
            "1300": 501327872.0,
            "1305": 501327872.0,
            "1310": 501327872.0,
            "1315": 501327872.0,
            "1320": 501327872.0,
            "1325": 501327872.0,
            "1330": 501327872.0,
            "1335": 501327872.0,
            "1340": 501327872.0,
            "1345": 501327872.0,
            "1350": 501327872.0,
            "1355": 501327872.0,
            "1360": 501327872.0,
            "1365": 501327872.0,
            "1370": 501327872.0,
            "1375": 501327872.0,
            "1380": 501327872.0,
            "1385": 501327872.0,
            "1390": 501327872.0,
            "1395": 501327872.0,
            "1400": 501327872.0,
            "1405": 501327872.0,
            "1410": 501327872.0,
            "1415": 501327872.0,
            "1420": 501327872.0,
            "1425": 501327872.0,
            "1430": 501327872.0,
            "1435": 501327872.0,
            "1440": 501327872.0,
            "1445": 501327872.0,
            "1450": 501327872.0,
            "1455": 501327872.0,
            "1460": 501327872.0,
            "1465": 501327872.0,
            "1470": 501327872.0,
            "1475": 501327872.0,
            "1480": 501327872.0,
            "1485": 501327872.0,
            "1490": 501327872.0,
            "1495": 501327872.0,
            "1500": 501327872.0,
            "1505": 501327872.0,
            "1510": 501327872.0,
            "1515": 501327872.0,
            "1520": 501327872.0,
            "1525": 501327872.0,
            "1530": 501327872.0,
            "1535": 501327872.0,
            "1540": 501327872.0,
            "1545": 501327872.0,
            "1550": 501327872.0,
            "1555": 501327872.0,
            "1560": 501327872.0,
            "1565": 501327872.0,
            "1570": 501327872.0,
            "1575": 501327872.0,
            "1580": 501327872.0,
            "1585": 501327872.0,
            "1590": 501327872.0,
            "1595": 501327872.0,
            "1600": 501327872.0,
            "1605": 501327872.0,
            "1610": 501327872.0,
            "1615": 501327872.0,
            "1620": 501327872.0,
            "1625": 501327872.0,
            "1630": 501327872.0,
            "1635": 501327872.0,
            "1640": 501327872.0,
            "1645": 501327872.0,
            "1650": 501327872.0,
            "1655": 501327872.0,
            "1660": 501327872.0,
            "1665": 501327872.0,
            "1670": 501327872.0,
            "1675": 501327872.0,
            "1680": 501327872.0,
            "1685": 501327872.0,
            "1690": 501327872.0,
            "1695": 501327872.0,
            "1700": 501327872.0,
            "1705": 501327872.0,
            "1710": 501327872.0,
            "1715": 501327872.0,
            "1720": 501327872.0,
            "1725": 501327872.0,
            "1730": 501327872.0,
            "1735": 501327872.0,
            "1740": 501327872.0,
            "1745": 501327872.0,
            "1750": 501327872.0,
            "1755": 501327872.0,
            "1760": 501327872.0,
            "1765": 501327872.0,
            "1770": 501327872.0,
            "1775": 501327872.0,
            "1780": 501327872.0,
            "1785": 501327872.0,
            "1790": 501327872.0,
            "1795": 501327872.0,
            "1800": 501327872.0,
            "1805": 501327872.0,
            "1810": 501327872.0,
            "1815": 501327872.0,
            "1820": 501327872.0,
            "1825": 501327872.0,
            "1830": 501327872.0,
            "1835": 501327872.0,
            "1840": 501327872.0,
            "1845": 501327872.0,
            "1850": 501327872.0,
            "1855": 501327872.0,
            "1860": 501327872.0,
            "1865": 501327872.0,
            "1870": 501327872.0,
            "1875": 501327872.0,
            "1880": 501327872.0,
            "1885": 501327872.0,
            "1890": 501327872.0,
            "1895": 501327872.0,
            "1900": 501327872.0,
            "1905": 501327872.0,
            "1910": 501327872.0,
            "1915": 501327872.0,
            "1920": 501327872.0,
            "1925": 501327872.0,
            "1930": 501327872.0,
            "1935": 501327872.0,
            "1940": 501327872.0,
            "1945": 501327872.0,
            "1950": 501327872.0,
            "1955": 501327872.0,
            "1960": 501327872.0,
            "1965": 501327872.0,
            "1970": 501327872.0,
            "1975": 501327872.0,
            "1980": 501327872.0,
            "1985": 501327872.0,
            "1990": 501327872.0,
            "1995": 501327872.0,
            "2000": 501327872.0
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 2000,
        "step_interval": 5,
        "values": {
            "1": "nan",
            "5": "nan",
            "10": "nan",
            "15": "nan",
            "20": 12399.0,
            "25": 12768.0,
            "30": 16641.0,
            "35": 12861.0,
            "40": 13048.0,
            "45": 13826.0,
            "50": 10792.0,
            "55": 12500.0,
            "60": 13751.0,
            "65": 15066.0,
            "70": 10815.0,
            "75": 11843.0,
            "80": 10739.0,
            "85": 17210.0,
            "90": 12167.0,
            "95": 12456.0,
            "100": 11589.0,
            "105": 13445.0,
            "110": 14807.0,
            "115": 11882.0,
            "120": 12425.0,
            "125": 11696.0,
            "130": 12874.0,
            "135": 11719.0,
            "140": 12660.0,
            "145": 17034.0,
            "150": 12668.0,
            "155": 14247.0,
            "160": 10069.0,
            "165": 16144.0,
            "170": 9903.0,
            "175": 14183.0,
            "180": 12261.0,
            "185": 12523.0,
            "190": 10611.0,
            "195": 15090.0,
            "200": 13729.0,
            "205": 13604.0,
            "210": 12246.0,
            "215": 11912.0,
            "220": 16876.0,
            "225": 14473.0,
            "230": 14743.0,
            "235": 14900.0,
            "240": 14588.0,
            "245": 13489.0,
            "250": 15237.0,
            "255": 15391.0,
            "260": 13542.0,
            "265": 17355.0,
            "270": 14088.0,
            "275": 13259.0,
            "280": 14138.0,
            "285": 13254.0,
            "290": 14182.0,
            "295": 15382.0,
            "300": 13137.0,
            "305": 12597.0,
            "310": 16148.0,
            "315": 16526.0,
            "320": 15265.0,
            "325": 15245.0,
            "330": 14420.0,
            "335": 12278.0,
            "340": 15325.0,
            "345": 14203.0,
            "350": 15636.0,
            "355": 13620.0,
            "360": 12816.0,
            "365": 15947.0,
            "370": 16936.0,
            "375": 13675.0,
            "380": 17271.0,
            "385": 14395.0,
            "390": 13666.0,
            "395": 14357.0,
            "400": 18807.0,
            "405": 18024.0,
            "410": 15076.0,
            "415": 18044.0,
            "420": 13713.0,
            "425": 19144.0,
            "430": 13480.0,
            "435": 13479.0,
            "440": 16351.0,
            "445": 16067.0,
            "450": 15695.0,
            "455": 16439.0,
            "460": 15762.0,
            "465": 16117.0,
            "470": 14570.0,
            "475": 16073.0,
            "480": 16836.0,
            "485": 15823.0,
            "490": 17428.0,
            "495": 15714.0,
            "500": 15473.0,
            "505": 14010.0,
            "510": 17357.0,
            "515": 16311.0,
            "520": 15036.0,
            "525": 13085.0,
            "530": 12049.0,
            "535": 15795.0,
            "540": 15218.0,
            "545": 13095.0,
            "550": 14667.0,
            "555": 16625.0,
            "560": 14978.0,
            "565": 16884.0,
            "570": 17396.0,
            "575": 15771.0,
            "580": 14244.0,
            "585": 17594.0,
            "590": 14149.0,
            "595": 16441.0,
            "600": 16689.0,
            "605": 14090.0,
            "610": 13665.0,
            "615": 15655.0,
            "620": 15122.0,
            "625": 14692.0,
            "630": 18197.0,
            "635": 16871.0,
            "640": 16280.0,
            "645": 15121.0,
            "650": 17339.0,
            "655": 15328.0,
            "660": 15865.0,
            "665": 11765.0,
            "670": 14995.0,
            "675": 17923.0,
            "680": 14080.0,
            "685": 14368.0,
            "690": 13369.0,
            "695": 15366.0,
            "700": 13216.0,
            "705": 15326.0,
            "710": 15574.0,
            "715": 15741.0,
            "720": 16788.0,
            "725": 18627.0,
            "730": 15154.0,
            "735": 15522.0,
            "740": 16941.0,
            "745": 15349.0,
            "750": 16371.0,
            "755": 14916.0,
            "760": 16842.0,
            "765": 15406.0,
            "770": 16940.0,
            "775": 15582.0,
            "780": 16231.0,
            "785": 15112.0,
            "790": 17318.0,
            "795": 14480.0,
            "800": 14988.0,
            "805": 14551.0,
            "810": 14601.0,
            "815": 17625.0,
            "820": 16695.0,
            "825": 12130.0,
            "830": 16119.0,
            "835": 15071.0,
            "840": 18768.0,
            "845": 14302.0,
            "850": 15455.0,
            "855": 15110.0,
            "860": 15268.0,
            "865": 15904.0,
            "870": 15028.0,
            "875": 16093.0,
            "880": 12820.0,
            "885": 14034.0,
            "890": 15560.0,
            "895": 16321.0,
            "900": 15868.0,
            "905": 15995.0,
            "910": 15372.0,
            "915": 15635.0,
            "920": 15878.0,
            "925": 17142.0,
            "930": 16179.0,
            "935": 14544.0,
            "940": 16970.0,
            "945": 15322.0,
            "950": 16045.0,
            "955": 15838.0,
            "960": 19384.0,
            "965": 14166.0,
            "970": 13564.0,
            "975": 13604.0,
            "980": 15645.0,
            "985": 16705.0,
            "990": 16844.0,
            "995": 15594.0,
            "1000": 18440.0,
            "1005": 13331.0,
            "1010": 15473.0,
            "1015": 17161.0,
            "1020": 15060.0,
            "1025": 16612.0,
            "1030": 14790.0,
            "1035": 14648.0,
            "1040": 15360.0,
            "1045": 19850.0,
            "1050": 14983.0,
            "1055": 18180.0,
            "1060": 15044.0,
            "1065": 15459.0,
            "1070": 16779.0,
            "1075": 14457.0,
            "1080": 14730.0,
            "1085": 14956.0,
            "1090": 18878.0,
            "1095": 17996.0,
            "1100": 15911.0,
            "1105": 18119.0,
            "1110": 14685.0,
            "1115": 18203.0,
            "1120": 14214.0,
            "1125": 16461.0,
            "1130": 17344.0,
            "1135": 16215.0,
            "1140": 17530.0,
            "1145": 17567.0,
            "1150": 14462.0,
            "1155": 15942.0,
            "1160": 14909.0,
            "1165": 18851.0,
            "1170": 22578.0,
            "1175": 14920.0,
            "1180": 16032.0,
            "1185": 18810.0,
            "1190": 16783.0,
            "1195": 14758.0,
            "1200": 18966.0,
            "1205": 12641.0,
            "1210": 14496.0,
            "1215": 16522.0,
            "1220": 14614.0,
            "1225": 14367.0,
            "1230": 17478.0,
            "1235": 15641.0,
            "1240": 15478.0,
            "1245": 18552.0,
            "1250": 16757.0,
            "1255": 15297.0,
            "1260": 15694.0,
            "1265": 14704.0,
            "1270": 14225.0,
            "1275": 14655.0,
            "1280": 14686.0,
            "1285": 17248.0,
            "1290": 16072.0,
            "1295": 18917.0,
            "1300": 17183.0,
            "1305": 17118.0,
            "1310": 15283.0,
            "1315": 17086.0,
            "1320": 16520.0,
            "1325": 18610.0,
            "1330": 14080.0,
            "1335": 14118.0,
            "1340": 17189.0,
            "1345": 16845.0,
            "1350": 17053.0,
            "1355": 14369.0,
            "1360": 16477.0,
            "1365": 17514.0,
            "1370": 15038.0,
            "1375": 17672.0,
            "1380": 18278.0,
            "1385": 16726.0,
            "1390": 22921.0,
            "1395": 15140.0,
            "1400": 15183.0,
            "1405": 14967.0,
            "1410": 16606.0,
            "1415": 15109.0,
            "1420": 19431.0,
            "1425": 17505.0,
            "1430": 18660.0,
            "1435": 20507.0,
            "1440": 15585.0,
            "1445": 18728.0,
            "1450": 14608.0,
            "1455": 16242.0,
            "1460": 15575.0,
            "1465": 15658.0,
            "1470": 17025.0,
            "1475": 16266.0,
            "1480": 15608.0,
            "1485": 17129.0,
            "1490": 15290.0,
            "1495": 16548.0,
            "1500": 15479.0,
            "1505": 15923.0,
            "1510": 19409.0,
            "1515": 13514.0,
            "1520": 16060.0,
            "1525": 15836.0,
            "1530": 18468.0,
            "1535": 13807.0,
            "1540": 14881.0,
            "1545": 15470.0,
            "1550": 12872.0,
            "1555": 17358.0,
            "1560": 16032.0,
            "1565": 23723.0,
            "1570": 19253.0,
            "1575": 14498.0,
            "1580": 16635.0,
            "1585": 16477.0,
            "1590": 14446.0,
            "1595": 13727.0,
            "1600": 19013.0,
            "1605": 16523.0,
            "1610": 15293.0,
            "1615": 19549.0,
            "1620": 17366.0,
            "1625": 14557.0,
            "1630": 16260.0,
            "1635": 17512.0,
            "1640": 15433.0,
            "1645": 19414.0,
            "1650": 17363.0,
            "1655": 15694.0,
            "1660": 16322.0,
            "1665": 19927.0,
            "1670": 15723.0,
            "1675": 17732.0,
            "1680": 16517.0,
            "1685": 16166.0,
            "1690": 14234.0,
            "1695": 15753.0,
            "1700": 15548.0,
            "1705": 15450.0,
            "1710": 20463.0,
            "1715": 15973.0,
            "1720": 17542.0,
            "1725": 18316.0,
            "1730": 15131.0,
            "1735": 15759.0,
            "1740": 16241.0,
            "1745": 15234.0,
            "1750": 14370.0,
            "1755": 16531.0,
            "1760": 16839.0,
            "1765": 17326.0,
            "1770": 15857.0,
            "1775": 21195.0,
            "1780": 14469.0,
            "1785": 16678.0,
            "1790": 16776.0,
            "1795": 17759.0,
            "1800": 15019.0,
            "1805": 14821.0,
            "1810": 18102.0,
            "1815": 20126.0,
            "1820": 14974.0,
            "1825": 14647.0,
            "1830": 15838.0,
            "1835": 14329.0,
            "1840": 14177.0,
            "1845": 18461.0,
            "1850": 15969.0,
            "1855": 16030.0,
            "1860": 17732.0,
            "1865": 20004.0,
            "1870": 14896.0,
            "1875": 16398.0,
            "1880": 17982.0,
            "1885": 15943.0,
            "1890": 17442.0,
            "1895": 15338.0,
            "1900": 12685.0,
            "1905": 14507.0,
            "1910": 17727.0,
            "1915": 16421.0,
            "1920": 15923.0,
            "1925": 15230.0,
            "1930": 15692.0,
            "1935": 18265.0,
            "1940": 16992.0,
            "1945": 16655.0,
            "1950": 16376.0,
            "1955": 17354.0,
            "1960": 19155.0,
            "1965": 15851.0,
            "1970": 16382.0,
            "1975": 17019.0,
            "1980": 17139.0,
            "1985": 17579.0,
            "1990": 17860.0,
            "1995": 16695.0,
            "2000": 15874.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 2000,
        "step_interval": 5,
        "values": {
            "1": "nan",
            "5": "nan",
            "10": "nan",
            "15": "nan",
            "20": "nan",
            "25": "nan",
            "30": "nan",
            "35": "nan",
            "40": "nan",
            "45": "nan",
            "50": "nan",
            "55": "nan",
            "60": "nan",
            "65": "nan",
            "70": "nan",
            "75": "nan",
            "80": "nan",
            "85": "nan",
            "90": "nan",
            "95": "nan",
            "100": 0.58989,
            "105": "nan",
            "110": "nan",
            "115": "nan",
            "120": "nan",
            "125": "nan",
            "130": "nan",
            "135": "nan",
            "140": "nan",
            "145": "nan",
            "150": "nan",
            "155": "nan",
            "160": "nan",
            "165": "nan",
            "170": "nan",
            "175": "nan",
            "180": "nan",
            "185": "nan",
            "190": "nan",
            "195": "nan",
            "200": 0.26173,
            "205": "nan",
            "210": "nan",
            "215": "nan",
            "220": "nan",
            "225": "nan",
            "230": "nan",
            "235": "nan",
            "240": "nan",
            "245": "nan",
            "250": "nan",
            "255": "nan",
            "260": "nan",
            "265": "nan",
            "270": "nan",
            "275": "nan",
            "280": "nan",
            "285": "nan",
            "290": "nan",
            "295": "nan",
            "300": 0.26925,
            "305": "nan",
            "310": "nan",
            "315": "nan",
            "320": "nan",
            "325": "nan",
            "330": "nan",
            "335": "nan",
            "340": "nan",
            "345": "nan",
            "350": "nan",
            "355": "nan",
            "360": "nan",
            "365": "nan",
            "370": "nan",
            "375": "nan",
            "380": "nan",
            "385": "nan",
            "390": "nan",
            "395": "nan",
            "400": 0.2606,
            "405": "nan",
            "410": "nan",
            "415": "nan",
            "420": "nan",
            "425": "nan",
            "430": "nan",
            "435": "nan",
            "440": "nan",
            "445": "nan",
            "450": "nan",
            "455": "nan",
            "460": "nan",
            "465": "nan",
            "470": "nan",
            "475": "nan",
            "480": "nan",
            "485": "nan",
            "490": "nan",
            "495": "nan",
            "500": 0.25904,
            "505": "nan",
            "510": "nan",
            "515": "nan",
            "520": "nan",
            "525": "nan",
            "530": "nan",
            "535": "nan",
            "540": "nan",
            "545": "nan",
            "550": "nan",
            "555": "nan",
            "560": "nan",
            "565": "nan",
            "570": "nan",
            "575": "nan",
            "580": "nan",
            "585": "nan",
            "590": "nan",
            "595": "nan",
            "600": 0.27358,
            "605": "nan",
            "610": "nan",
            "615": "nan",
            "620": "nan",
            "625": "nan",
            "630": "nan",
            "635": "nan",
            "640": "nan",
            "645": "nan",
            "650": "nan",
            "655": "nan",
            "660": "nan",
            "665": "nan",
            "670": "nan",
            "675": "nan",
            "680": "nan",
            "685": "nan",
            "690": "nan",
            "695": "nan",
            "700": 0.30298,
            "705": "nan",
            "710": "nan",
            "715": "nan",
            "720": "nan",
            "725": "nan",
            "730": "nan",
            "735": "nan",
            "740": "nan",
            "745": "nan",
            "750": "nan",
            "755": "nan",
            "760": "nan",
            "765": "nan",
            "770": "nan",
            "775": "nan",
            "780": "nan",
            "785": "nan",
            "790": "nan",
            "795": "nan",
            "800": 0.29186,
            "805": "nan",
            "810": "nan",
            "815": "nan",
            "820": "nan",
            "825": "nan",
            "830": "nan",
            "835": "nan",
            "840": "nan",
            "845": "nan",
            "850": "nan",
            "855": "nan",
            "860": "nan",
            "865": "nan",
            "870": "nan",
            "875": "nan",
            "880": "nan",
            "885": "nan",
            "890": "nan",
            "895": "nan",
            "900": 0.29862,
            "905": "nan",
            "910": "nan",
            "915": "nan",
            "920": "nan",
            "925": "nan",
            "930": "nan",
            "935": "nan",
            "940": "nan",
            "945": "nan",
            "950": "nan",
            "955": "nan",
            "960": "nan",
            "965": "nan",
            "970": "nan",
            "975": "nan",
            "980": "nan",
            "985": "nan",
            "990": "nan",
            "995": "nan",
            "1000": 0.27405,
            "1005": "nan",
            "1010": "nan",
            "1015": "nan",
            "1020": "nan",
            "1025": "nan",
            "1030": "nan",
            "1035": "nan",
            "1040": "nan",
            "1045": "nan",
            "1050": "nan",
            "1055": "nan",
            "1060": "nan",
            "1065": "nan",
            "1070": "nan",
            "1075": "nan",
            "1080": "nan",
            "1085": "nan",
            "1090": "nan",
            "1095": "nan",
            "1100": 0.27684,
            "1105": "nan",
            "1110": "nan",
            "1115": "nan",
            "1120": "nan",
            "1125": "nan",
            "1130": "nan",
            "1135": "nan",
            "1140": "nan",
            "1145": "nan",
            "1150": "nan",
            "1155": "nan",
            "1160": "nan",
            "1165": "nan",
            "1170": "nan",
            "1175": "nan",
            "1180": "nan",
            "1185": "nan",
            "1190": "nan",
            "1195": "nan",
            "1200": 0.27559,
            "1205": "nan",
            "1210": "nan",
            "1215": "nan",
            "1220": "nan",
            "1225": "nan",
            "1230": "nan",
            "1235": "nan",
            "1240": "nan",
            "1245": "nan",
            "1250": "nan",
            "1255": "nan",
            "1260": "nan",
            "1265": "nan",
            "1270": "nan",
            "1275": "nan",
            "1280": "nan",
            "1285": "nan",
            "1290": "nan",
            "1295": "nan",
            "1300": 0.27282,
            "1305": "nan",
            "1310": "nan",
            "1315": "nan",
            "1320": "nan",
            "1325": "nan",
            "1330": "nan",
            "1335": "nan",
            "1340": "nan",
            "1345": "nan",
            "1350": "nan",
            "1355": "nan",
            "1360": "nan",
            "1365": "nan",
            "1370": "nan",
            "1375": "nan",
            "1380": "nan",
            "1385": "nan",
            "1390": "nan",
            "1395": "nan",
            "1400": 0.25948,
            "1405": "nan",
            "1410": "nan",
            "1415": "nan",
            "1420": "nan",
            "1425": "nan",
            "1430": "nan",
            "1435": "nan",
            "1440": "nan",
            "1445": "nan",
            "1450": "nan",
            "1455": "nan",
            "1460": "nan",
            "1465": "nan",
            "1470": "nan",
            "1475": "nan",
            "1480": "nan",
            "1485": "nan",
            "1490": "nan",
            "1495": "nan",
            "1500": 0.27252,
            "1505": "nan",
            "1510": "nan",
            "1515": "nan",
            "1520": "nan",
            "1525": "nan",
            "1530": "nan",
            "1535": "nan",
            "1540": "nan",
            "1545": "nan",
            "1550": "nan",
            "1555": "nan",
            "1560": "nan",
            "1565": "nan",
            "1570": "nan",
            "1575": "nan",
            "1580": "nan",
            "1585": "nan",
            "1590": "nan",
            "1595": "nan",
            "1600": 0.27844,
            "1605": "nan",
            "1610": "nan",
            "1615": "nan",
            "1620": "nan",
            "1625": "nan",
            "1630": "nan",
            "1635": "nan",
            "1640": "nan",
            "1645": "nan",
            "1650": "nan",
            "1655": "nan",
            "1660": "nan",
            "1665": "nan",
            "1670": "nan",
            "1675": "nan",
            "1680": "nan",
            "1685": "nan",
            "1690": "nan",
            "1695": "nan",
            "1700": 0.27607,
            "1705": "nan",
            "1710": "nan",
            "1715": "nan",
            "1720": "nan",
            "1725": "nan",
            "1730": "nan",
            "1735": "nan",
            "1740": "nan",
            "1745": "nan",
            "1750": "nan",
            "1755": "nan",
            "1760": "nan",
            "1765": "nan",
            "1770": "nan",
            "1775": "nan",
            "1780": "nan",
            "1785": "nan",
            "1790": "nan",
            "1795": "nan",
            "1800": 0.27527,
            "1805": "nan",
            "1810": "nan",
            "1815": "nan",
            "1820": "nan",
            "1825": "nan",
            "1830": "nan",
            "1835": "nan",
            "1840": "nan",
            "1845": "nan",
            "1850": "nan",
            "1855": "nan",
            "1860": "nan",
            "1865": "nan",
            "1870": "nan",
            "1875": "nan",
            "1880": "nan",
            "1885": "nan",
            "1890": "nan",
            "1895": "nan",
            "1900": 0.27903,
            "1905": "nan",
            "1910": "nan",
            "1915": "nan",
            "1920": "nan",
            "1925": "nan",
            "1930": "nan",
            "1935": "nan",
            "1940": "nan",
            "1945": "nan",
            "1950": "nan",
            "1955": "nan",
            "1960": "nan",
            "1965": "nan",
            "1970": "nan",
            "1975": "nan",
            "1980": "nan",
            "1985": "nan",
            "1990": "nan",
            "1995": "nan",
            "2000": 0.31051
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/bert/bert_release_sm/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: "1"
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: "1"
  NON_DETERMINSTIC_RESULTS: 1
TEST_TYPE: "release"
MODEL_ARGS:
  # Bert model args
  --num-layers: 24
  --hidden-size: 1024
  --num-attention-heads: 16
  --seq-length: 512
  --max-position-embeddings: 512
  # Training args
  --micro-batch-size: 4
  --global-batch-size: 32
  --train-iters: 20000
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --fp16: true
  --lr: 0.0001
  --lr-decay-style: linear
  --min-lr: 1.0e-5
  --lr-warmup-fraction: .01
  --bert-no-binary-head: true
  # Model parallel
  --tensor-model-parallel-size: 8
  --pipeline-model-parallel-size: 8
  # Data args
  --data-path: ${DATA_BLEND}
  --vocab-file: ${DATA_PATH}/text/the_pile/bert_shard00/vocab.txt
  --split: 949,50,1
  --data-cache-path: ${DATA_CACHE_PATH}
  # EVAL_AND_LOGGING_ARGS
  --log-interval: 100
  --save-interval: 2000
  --save-retain-interval: 10000
  --eval-interval: 1000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --eval-iters: 10
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  --log-num-zeros-in-grad: true
  --log-params-norm: true
  --log-validation-ppl-to-tensorboard: true
  --wandb-project: megatron-core-release-runs
  --wandb-entity: adlr
  --wandb-exp-name: ${WANDB_EXPERIMENT}
  --attention-backend: unfused
  --exit-interval: 2000
  --wandb-save-dir: ${WANDB_SAVE_PATH}
METRICS:
  - "iteration-time"
  - "lm loss"
  - "mem-allocated-bytes"
  - "mem-max-allocated-bytes"


================================================
FILE: tests/functional_tests/test_cases/common/ckpt_converter/__main__.py
================================================
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.

import os
import shutil
import subprocess
import sys
import time
import types
import typing as T
from collections import namedtuple
from functools import partial

import numpy as np
import torch
from tqdm import tqdm

from gpt_builders import gpt_builder
from megatron.core import parallel_state
from megatron.core.datasets.gpt_dataset import _get_ltor_masks_and_position_ids
from megatron.core.enums import ModelType
from megatron.core.models.multimodal.llava_model import DEFAULT_IMAGE_TOKEN_INDEX
from megatron.core.pipeline_parallel import get_forward_backward_func
from megatron.core.tensor_parallel.mappings import gather_from_tensor_model_parallel_region
from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
from megatron.training import get_args, get_tokenizer
from megatron.training.arguments import parse_args, validate_args
from megatron.training.checkpointing import load_checkpoint as _load_checkpoint
from megatron.training.checkpointing import save_checkpoint as _save_checkpoint
from megatron.training.global_vars import set_global_variables, unset_global_variables
from megatron.training.training import get_model
from model_provider import model_provider
from tests.unit_tests.test_utilities import Utils

CHECKPOINTS_DIR = "/tmp/ckpt-converter-tests"
FORWARD_ITERS = 1  # *3
SKIP_CONVERSION = False


def is_model_parallel_rank_0():
    return (
        parallel_state.get_tensor_model_parallel_rank() == 0
        and parallel_state.get_pipeline_model_parallel_rank() == 0
    )


def broadcast(item):
    """Broadcast data from TP rank 0 to other ranks."""
    if item is not None:
        torch.distributed.broadcast(
            item,
            parallel_state.get_tensor_model_parallel_src_rank(),
            group=parallel_state.get_tensor_model_parallel_group(),
        )


class TempSharedDir:
    """Context that makes & removes a directory to hold the checkpoints."""

    def __enter__(self):
        """Make checkpoint directory."""
        torch.distributed.barrier()
        if torch.distributed.get_rank() == 0:
            shutil.rmtree(CHECKPOINTS_DIR, ignore_errors=True)
            os.mkdir(CHECKPOINTS_DIR)
        torch.distributed.barrier()

    def __exit__(self, exc_type, exc_value, exc_tb):
        """Remove checkpoint directory."""
        torch.distributed.barrier()
        if torch.distributed.get_rank() == 0:
            shutil.rmtree(CHECKPOINTS_DIR, ignore_errors=True)
        torch.distributed.barrier()


_ModelParallelState = namedtuple("_ModelParallelState", "tp pp ep")


class ModelParallelState(_ModelParallelState):
    """Parallel state struct, that contains TP, PP, and EP."""

    def __new__(cls, tp=1, pp=1, ep=1):
        return super(ModelParallelState, cls).__new__(cls, tp, pp, ep)

    def __str__(self):
        return f"t{self.tp},p{self.pp},e{self.ep}"


class ModelMeta:
    """Basic information about a model.

    Args:
        format (str): 'core', 'meta', 'hf', or 'llava'.
        mp (ModelParallelState): Defines TP, PP, EP.
        transformer_impl (str): 'transformer_engine' or 'local'.
    """

    def __init__(self, format: str, mp: ModelParallelState, transformer_impl: str = None):

        if isinstance(mp, tuple):
            mp = ModelParallelState(*mp)
        if transformer_impl is None:
            transformer_impl = "transformer_engine" if format in ("core", "llava") else "local"

        assert format in ("core", "meta", "hf", "llava")
        assert isinstance(mp, ModelParallelState)
        assert transformer_impl in ("transformer_engine", "local")

        self.format = format
        self.mp = mp
        self.transformer_impl = transformer_impl

    def __str__(self):
        return f"{self.format}|({self.mp})|{self.transformer_impl}"


class Pipeline:
    """A pipeline manages a single conversion and validation.

    The pipeline consists of the following steps:
    - Initialize model & inference pass.
    - Save model.
    - Convert model.
    - Load model & inference pass.
    - Validate before/after output tensors.

    Args:
        src (ModelMeta): Model meta for loading.
        dst (ModelMeta): Model meta for storing.
    """

    def __init__(self, src: ModelMeta, dst: ModelMeta):
        """Source & destination metas."""
        assert isinstance(src, ModelMeta)
        assert isinstance(dst, ModelMeta)
        self.src = src
        self.dst = dst

    def __str__(self):
        return f"src <{self.src}>; dst <{self.dst}>"

    def get_model_argv(self):
        """Get argv list for customizing initialization."""
        raise NotImplementedError(self.__class__.__name__ + ".get_model_argv()")

    def get_converter_model_type(self):
        """Get converter type: 'GPT' or 'Bert'."""
        raise NotImplementedError(self.__class__.__name__ + ".get_converter_model_type()")

    def get_meta(self, key):
        """Get meta from key, which must be either 'src' or 'dst'."""
        assert key in ("src", "dst")
        return getattr(self, f"{key}")

    def init_args_and_model(self, key):
        """Initialize Megatron and build model."""

        meta = self.get_meta(key)

        # Destroy & initialize new parallel state.
        unset_global_variables()
        Utils.destroy_model_parallel()
        Utils.initialize_model_parallel(
            tensor_model_parallel_size=meta.mp.tp,
            pipeline_model_parallel_size=meta.mp.pp,
            expert_model_parallel_size=meta.mp.ep,
        )

        # Environment vars.
        os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "1"
        os.environ["NVTE_ALLOW_NONDETERMINISTIC_ALGO"] = "0"

        # Command line args.
        sys.argv = [
            "[script]",
            *self.get_model_argv(),
            "--tensor-model-parallel-size",
            str(meta.mp.tp),
            "--pipeline-model-parallel-size",
            str(meta.mp.pp),
            "--expert-model-parallel-size",
            str(meta.mp.ep),
            "--save-interval",
            "2",
            "--save",
            os.path.join(CHECKPOINTS_DIR, "src"),
            "--load",
            os.path.join(CHECKPOINTS_DIR, "dst" if not SKIP_CONVERSION else "src"),
            "--ckpt-format",
            "torch",
            "--use-checkpoint-args",
            "--no-save-optim",
            "--no-save-rng",
            "--no-load-optim",
            "--no-load-rng",
            "--bf16",
            "--use-cpu-initialization",
            "--no-one-logger",
            "--transformer-impl",
            meta.transformer_impl,
        ]

        # Fail on missing checkpoint.
        if key == "dst":
            sys.argv.append("--exit-on-missing-checkpoint")

        # Parse args.
        args = parse_args()
        validate_args(args)

        # Set global args, build tokenizer.
        unset_global_variables()
        set_global_variables(args)

        # Random seed.
        torch.manual_seed(123)
        model_parallel_cuda_manual_seed(123)

        # Model.
        models = self.build_model()

        return args, models

    @staticmethod
    def build_model():
        model_provider_func = partial(model_provider, gpt_builder)
        models = get_model(
            model_provider_func=model_provider_func, model_type=ModelType.encoder_or_decoder
        )
        [m.eval() for m in models]

        return models

    @staticmethod
    def get_input_ids():
        """Randomly initialize input token IDs."""
        if is_model_parallel_rank_0():
            # Generate different data on each DP rank.
            args = get_args()

            orig_numpy_seed = np.random.get_state()[1][0]
            temp_numpy_seed = orig_numpy_seed + torch.distributed.get_rank()

            np.random.seed(temp_numpy_seed)
            numpy_input_ids = np.random.randint(
                low=0, high=args.vocab_size, size=(args.seq_length,), dtype=np.int64
            )
            np.random.seed(orig_numpy_seed)

            torch_input_ids = torch.from_numpy(numpy_input_ids).to("cuda")

            return torch_input_ids
        else:
            return None

    @staticmethod
    def get_batch(input_ids):
        """Get batch of data, from input token IDs."""

        args = get_args()

        # TP rank 0, PP rank 0.
        # (Note: mimics megatron/training/utils.py:get_batch_on_this_tp_rank().)
        if is_model_parallel_rank_0():

            tokenizer = get_tokenizer()

            attention_mask, loss_mask, position_ids = _get_ltor_masks_and_position_ids(
                data=input_ids,
                eod_token=tokenizer.eod,
                reset_position_ids=args.reset_position_ids,
                reset_attention_mask=args.reset_attention_mask,
                eod_mask_loss=args.eod_mask_loss,
                create_attention_mask=args.create_attention_mask_in_dataloader,
            )
            input_ids = input_ids.unsqueeze(0)
            position_ids = position_ids.unsqueeze(0)
            attention_mask = attention_mask.unsqueeze(0)

        # Other TP ranks on PP rank 0.
        elif parallel_state.is_pipeline_first_stage():
            input_ids = torch.empty(
                (args.micro_batch_size, args.seq_length),
                dtype=torch.int64,
                device=torch.cuda.current_device(),
            )
            position_ids = torch.empty(
                (args.micro_batch_size, args.seq_length),
                dtype=torch.int64,
                device=torch.cuda.current_device(),
            )
            if args.create_attention_mask_in_dataloader:
                attention_mask = torch.empty(
                    (args.micro_batch_size, 1, args.seq_length, args.seq_length),
                    dtype=torch.bool,
                    device=torch.cuda.current_device(),
                )
            else:
                attention_mask = None

        # Other PP ranks.
        # (Note: mimics pretrain_gpt.py:get_batch().)
        else:
            input_ids = None
            position_ids = None
            attention_mask = None

        # Broadcast.
        if parallel_state.is_pipeline_first_stage():
            broadcast(input_ids)
            broadcast(attention_mask)
            broadcast(position_ids)

        return input_ids, position_ids, attention_mask

    @classmethod
    def forward_step(cls, orig_input_ids: T.Iterator, model: torch.nn.Module):
        """Forward step.

        Args:
            orig_input_ids (T.Iterator): Input token IDs.
            model (GPTModel): The GPT Model.
        """

        # Unpack input ids.
        orig_input_ids = list(orig_input_ids)[0]

        # Get batch.
        input_ids, position_ids, attention_mask = cls.get_batch(orig_input_ids)

        # Forward pass test data (multi iters for JIT warm-up).
        for _ in range(FORWARD_ITERS):
            output_tensor = model(input_ids, position_ids, attention_mask)

        # Aggregate data, for validation.
        data = {
            "orig_input_ids": orig_input_ids,
            "input_ids": input_ids,
            "position_ids": position_ids,
            "attention_mask": attention_mask,
            "output_tensor": output_tensor,
        }

        return output_tensor, lambda _, non_loss_data: data

    @classmethod
    def forward_model(cls, models, orig_input_ids):
        """Forward pass data, and gather parallel output tensors."""

        args = get_args()

        # Forward pass.
        forward_backward_func = get_forward_backward_func()
        data = forward_backward_func(
            forward_step_func=cls.forward_step,
            data_iterator=iter([orig_input_ids]),
            model=models,
            num_microbatches=1,
            seq_length=args.seq_length,
            micro_batch_size=args.micro_batch_size,
            forward_only=True,
            collect_non_loss_data=True,
        )
        if parallel_state.is_pipeline_last_stage():
            output_tensor = data[0]["output_tensor"]
        else:
            output_tensor = None

        # All-gather across the partitions.
        if parallel_state.is_pipeline_last_stage():
            output_tensor_gathered = gather_from_tensor_model_parallel_region(output_tensor)
        else:
            output_tensor_gathered = None

        return output_tensor_gathered

    def rand_init_model_params(self, key, models):
        """Randomly initialize model params."""

        meta = self.get_meta(key)

        # The test is only designed to work with single model
        assert len(models) == 1
        model = models[0]

        with torch.no_grad():

            # Randomly initialize all params.
            for m in models:
                for p in m.parameters():
                    p.normal_(0, 0.1)

            # Synchronize embeddings.
            if meta.mp.pp != 1:
                emb = model.module.module.shared_embedding_or_output_weight()
                # Make embedding the same on ranks that has is
                if emb is not None:
                    torch.distributed.all_reduce(emb, group=parallel_state.get_embedding_group())

    def save_checkpoint(self):
        """Initialize params, forward pass data, and save checkpoint."""

        args, models = self.init_args_and_model("src")

        # Init params.
        self.rand_init_model_params("src", models)

        # Test data.
        orig_input_ids = self.get_input_ids()
        output_tensor = self.forward_model(models, orig_input_ids)

        # Save checkpoint.
        _save_checkpoint(
            iteration=2,
            model=models,
            optimizer=None,
            opt_param_scheduler=None,
            num_floating_point_operations_so_far=None,
        )

        return output_tensor, orig_input_ids

    def load_checkpoint(self, orig_input_ids):
        """Load checkpoint, and forward pass data."""

        args, models = self.init_args_and_model("dst")

        # Load checkpoint.
        args.iteration, args.num_floating_point_operations_so_far = _load_checkpoint(
            models, optimizer=None, opt_param_scheduler=None
        )

        # Test data.
        output_tensor_real = self.forward_model(models, orig_input_ids)

        # Random output tensor.
        # Note: need two random initializations to differ from `save_checkpoint()` above.
        self.rand_init_model_params("dst", models)
        self.rand_init_model_params("dst", models)
        self.rand_init_model_params("dst", models)
        output_tensor_fake = self.forward_model(models, orig_input_ids)

        return output_tensor_real, output_tensor_fake

    def convert_checkpoint(self):
        """Convert checkpoint"""

        args = get_args()

        torch.distributed.barrier()

        # Convert.
        if torch.distributed.get_rank() == 0:

            cmd = [
                "python",
                "tools/checkpoint/convert.py",
                "--model-type",
                self.get_converter_model_type(),
                "--loader",
                self.src.format,
                "--load-dir",
                args.save,
                "--loader-transformer-impl",
                self.src.transformer_impl,
                "--saver",
                self.dst.format,
                "--save-dir",
                args.load,
                "--saver-transformer-impl",
                self.dst.transformer_impl,
                "--target-tensor-parallel-size",
                str(self.dst.mp.tp),
                "--target-pipeline-parallel-size",
                str(self.dst.mp.pp),
                "--megatron-path",
                os.getcwd(),
            ]
            print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
            print("convert checkpoint cmd: %s" % " ".join(cmd))
            print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

            result = subprocess.run(cmd)

            assert result.returncode == 0, "checkpoint conversion failed."

        torch.distributed.barrier()

    def run(self):
        """Run pipeline.

        Running a pipeline consists of:

        - Save checkpoint (includes initializing params & forward passing data).
        - Convert checkpoint.
        - Load checkpoint (includes forward passing data).
        - Validate before/after output tensors.
        """

        Utils.initialize_model_parallel(
            tensor_model_parallel_size=self.src.mp.tp,
            pipeline_model_parallel_size=self.src.mp.pp,
            expert_model_parallel_size=self.src.mp.ep,
        )
        with TempSharedDir():

            # Save checkpoint.
            src_output_tensor, input_ids = self.save_checkpoint()

            # Convert checkpoint.
            if not SKIP_CONVERSION:
                self.convert_checkpoint()

            # Load checkpoint.
            dst_output_tensor_real, dst_output_tensor_fake = self.load_checkpoint(input_ids)

            # Validate output tensor.
            torch.distributed.barrier()
            rank = torch.distributed.get_rank()
            world_size = torch.distributed.get_world_size()
            if rank == world_size - 1:
                args = get_args()
                get_mse = lambda dst_output_tensor: torch.nn.MSELoss()(
                    src_output_tensor[:, :, : args.vocab_size],
                    dst_output_tensor[:, :, : args.vocab_size],
                ).item()
                mse_real = get_mse(dst_output_tensor_real)
                mse_fake = get_mse(dst_output_tensor_fake)
            torch.distributed.barrier()

            # Teardown.
            unset_global_variables()
            Utils.destroy_model_parallel()

            # Broadcast MSE's.
            mses = torch.zeros((2,), dtype=torch.float, device="cuda")
            if rank == world_size - 1:
                mses[0] = mse_real
                mses[1] = mse_fake
            torch.distributed.broadcast(mses, world_size - 1)

            return mses.tolist()


class GPTPipeline(Pipeline):
    """GPT-specific pipeline customizations.

    Args:
        src (Union[ModelMeta, Tuple]): Model meta for loading.
        dst (Union[ModelMeta, Tuple]): Model meta for storing.
        num_moe_experts (Optional[int]): Number of MoE experts.
    """

    def __init__(self, src: ModelMeta, dst: ModelMeta, num_moe_experts: T.Optional[int] = None):
        super().__init__(ModelMeta(*src), ModelMeta(*dst))
        assert isinstance(num_moe_experts, (int, types.NoneType))
        self.num_moe_experts = num_moe_experts

    def __str__(self):
        return "%s; moe %s" % (
            super().__str__(),
            "--" if self.num_moe_experts is None else self.num_moe_experts,
        )

    def get_model_argv(self):
        """GPT model args."""
        args = [
            "--num-layers",
            "8",
            "--hidden-size",
            "16",
            "--num-attention-heads",
            "8",
            "--seq-length",
            "16",
            "--max-position-embeddings",
            "16",
            "--micro-batch-size",
            "1",  # single sample generated.
            "--tokenizer-type",
            "NullTokenizer",
            "--vocab-size",
            "127",  # ... NullTokenizer adds +1 EOD token.
            "--make-vocab-size-divisible-by",
            "1",
        ]
        if self.num_moe_experts is not None and self.num_moe_experts > 1:
            args.extend(["--num-experts", str(self.num_moe_experts or 1), "--sequence-parallel"])
        return args

    def get_converter_model_type(self):
        return "GPT"


class LLaVAPipeline(Pipeline):
    def __init__(
        self, src: ModelMeta, dst: ModelMeta, language_model_type: str, vision_model_type: str
    ):
        super().__init__(ModelMeta(*src), ModelMeta(*dst))
        self.language_model_type = language_model_type
        self.vision_model_type = vision_model_type
        sys.path.insert(0, './examples/multimodal')

    def __str__(self):
        return "%s; lang %s; vis %s" % (
            super().__str__(),
            self.language_model_type,
            self.vision_model_type,
        )

    def get_model_argv(self):
        """LLaVA model args."""
        args = [
            "--use-te",
            "--num-layers",
            "8",
            "--hidden-size",
            "64",
            "--num-attention-heads",
            "8",
            "--seq-length",
            "128",
            "--max-position-embeddings",
            "1024",
            "--micro-batch-size",
            "1",  # single sample generated.
            "--tokenizer-type",
            "NullMultimodalTokenizer",
            "--vocab-size",
            "127",  # ... NullTokenizer adds +1 EOD token.
            "--make-vocab-size-divisible-by",
            "1",
            "--language-model-type",
            self.language_model_type,
            "--vision-model-type",
            self.vision_model_type,
            "--tokenizer-prompt-format",
            "llama3",  # dummy value since using NullMultimodalTokenizer. maybe need actual dummy value
            "--decoder-seq-length",
            "1024",
            "--img-w",
            "140",
            "--img-h",
            "140",
            "--patch-dim",
            "14",
        ]
        return args

    @staticmethod
    def get_test_image():
        args = get_args()
        test_image = torch.ones((1, 3, args.img_h, args.img_w)).to("cuda")
        return test_image

    @staticmethod
    def get_input_ids():
        """Randomly initialize input token IDs."""
        if is_model_parallel_rank_0():
            # Generate different data on each DP rank.
            args = get_args()

            orig_numpy_seed = np.random.get_state()[1][0]
            temp_numpy_seed = orig_numpy_seed + torch.distributed.get_rank()

            np.random.seed(temp_numpy_seed)
            # TODO: CHANGE TEMP SIZE TO SOMETHING REAL
            numpy_input_ids = np.random.randint(
                low=0, high=args.vocab_size, size=(args.seq_length,), dtype=np.int64
            )
            np.random.seed(orig_numpy_seed)

            numpy_input_ids[0] = DEFAULT_IMAGE_TOKEN_INDEX

            torch_input_ids = torch.from_numpy(numpy_input_ids).to("cuda")

            return torch_input_ids
        else:
            return None

    @classmethod
    def forward_step(cls, orig_input_ids: T.Iterator, model: torch.nn.Module):
        """Forward step.

        Args:
            orig_input_ids (T.Iterator): Input token IDs.
            model (GPTModel): The GPT Model.
        """

        # Unpack input ids.
        orig_input_ids = list(orig_input_ids)[0]

        # Get batch.
        input_ids, position_ids, _ = cls.get_batch(orig_input_ids)

        # Forward pass test data (multi iters for JIT warm-up).
        for _ in range(FORWARD_ITERS):
            output_tensor = model(cls.get_test_image(), input_ids, position_ids, None)

        # Aggregate data, for validation.
        data = {
            "orig_input_ids": orig_input_ids,
            "input_ids": input_ids,
            "position_ids": position_ids,
            "attention_mask": None,
            "output_tensor": output_tensor,
        }

        return output_tensor, lambda _, non_loss_data: data

    @classmethod
    def forward_model(cls, models, orig_input_ids):
        """Forward pass data, and gather parallel output tensors."""

        args = get_args()

        # Forward pass.
        forward_backward_func = get_forward_backward_func()
        data = forward_backward_func(
            forward_step_func=cls.forward_step,
            data_iterator=iter([orig_input_ids]),
            model=models,
            num_microbatches=1,
            seq_length=args.seq_length,
            micro_batch_size=args.micro_batch_size,
            forward_only=True,
            collect_non_loss_data=True,
        )

        if parallel_state.is_pipeline_last_stage():
            output_tensor = data[0]["output_tensor"][0]
        else:
            output_tensor = None

        # All-gather across the partitions.
        if parallel_state.is_pipeline_last_stage():
            output_tensor_gathered = gather_from_tensor_model_parallel_region(output_tensor)
        else:
            output_tensor_gathered = None

        return output_tensor_gathered

    @staticmethod
    def build_model():
        from examples.multimodal.model import model_provider

        models = get_model(
            model_provider_func=model_provider, model_type=ModelType.encoder_or_decoder
        )
        [m.eval() for m in models]

        return models

    def get_converter_model_type(self):
        return "GPT"

    def init_args_and_model(self, key):
        """Initialize Megatron and build model."""

        meta = self.get_meta(key)

        # Destroy & initialize new parallel state.
        unset_global_variables()
        Utils.destroy_model_parallel()
        Utils.initialize_model_parallel(
            tensor_model_parallel_size=meta.mp.tp,
            pipeline_model_parallel_size=meta.mp.pp,
            expert_model_parallel_size=meta.mp.ep,
        )

        # Environment vars.
        os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "1"
        os.environ["NVTE_ALLOW_NONDETERMINISTIC_ALGO"] = "0"

        # Command line args.
        sys.argv = [
            "[script]",
            *self.get_model_argv(),
            "--tensor-model-parallel-size",
            str(meta.mp.tp),
            "--pipeline-model-parallel-size",
            str(meta.mp.pp),
            "--expert-model-parallel-size",
            str(meta.mp.ep),
            "--save-interval",
            "2",
            "--save",
            os.path.join(CHECKPOINTS_DIR, "src"),
            "--load",
            os.path.join(CHECKPOINTS_DIR, "dst" if not SKIP_CONVERSION else "src"),
            "--ckpt-format",
            "torch",
            "--use-checkpoint-args",
            "--no-save-optim",
            "--no-save-rng",
            "--no-load-optim",
            "--no-load-rng",
            "--bf16",
            "--use-cpu-initialization",
            "--no-one-logger",
            "--transformer-impl",
            meta.transformer_impl,
        ]

        # Fail on missing checkpoint.
        if key == "dst":
            sys.argv.append("--exit-on-missing-checkpoint")

        # Parse args.
        from examples.multimodal.multimodal_args import add_multimodal_extra_args

        args = parse_args(extra_args_provider=add_multimodal_extra_args)
        validate_args(args)

        # Set global args, build tokenizer.
        unset_global_variables()
        set_global_variables(args)

        # Random seed.
        torch.manual_seed(123)
        model_parallel_cuda_manual_seed(123)

        # Model.
        models = self.build_model()

        return args, models


def get_gpt_pipelines():
    """Get GPT (non-MoE) pipelines."""
    return [
        GPTPipeline(("core", (8, 1)), ("core", (1, 8))),
        GPTPipeline(("core", (4, 2)), ("core", (2, 4))),
        GPTPipeline(("core", (2, 4)), ("core", (4, 2))),
        GPTPipeline(("core", (1, 8)), ("core", (8, 1))),
        GPTPipeline(("core", (4, 2)), ("core", (2, 4), "local")),
        GPTPipeline(("core", (4, 2), "local"), ("core", (2, 4), "local")),
        GPTPipeline(("core", (4, 2), "local"), ("core", (2, 4))),
        # [todo] GPTPipeline("meta", "core", None, (8, 1)),
        # [todo] GPTPipeline("hf", "core", None, (8, 1)),
    ]


def get_moe_pipelines():
    """Get MoE pipelines."""
    return [
        GPTPipeline(("core", (2, 1, 2)), ("core", (1, 4, 1)), num_moe_experts=8),
        GPTPipeline(("core", (1, 4, 1)), ("core", (2, 1, 2)), num_moe_experts=4),
    ]


def get_llava_pipelines():
    return [
        LLaVAPipeline(
            ("llava", (8, 1)),
            ("llava", (8, 1)),
            language_model_type="llama3.2_1b",
            vision_model_type="siglip",
        ),
        LLaVAPipeline(
            ("llava", (8, 1)),
            ("llava", (8, 1)),
            language_model_type="llama3.2_1b",
            vision_model_type="radio",
        ),
        LLaVAPipeline(
            ("llava", (8, 1)),
            ("llava", (8, 1)),
            language_model_type="llama3.2_1b",
            vision_model_type="clip",
        ),
    ]


def test_all_pipelines():
    """Run all pipelines."""

    # Collect pipelines.
    pipelines = [
        *get_gpt_pipelines(),
        # *get_llava_pipelines(), #TODO: add these back on once working on CI
        # [todo] *get_moe_pipelines(), # todo: MoE support in loader_core.py.
        # [todo] *get_bert_pipelines(),
        # [todo] *get_t5_pipelines(),
    ]

    # Run pipelines.
    results = []
    for pipeline in tqdm(pipelines, "ckpt pipelines"):
        t = time.time()
        mses = pipeline.run()
        latency = time.time() - t
        results.append((latency, *mses))
        torch.cuda.empty_cache()

    # Print results.
    if int(os.environ["RANK"]) == 0:
        print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
        print("checkpoint converter results:")
        success = []
        for result_id, (latency, mse_real, mse_fake) in enumerate(results):
            success.append(mse_real < 0.05 * mse_fake)
            print(
                "  %d. mse: real %.1e, fake %.1e%s ... time %.1f sec | %s"
                % (
                    result_id,
                    mse_real,
                    mse_fake,
                    "" if success[-1] else " (failed)",
                    latency,
                    pipelines[result_id],
                )
            )
        print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")

        num_failures = sum(not s for s in success)
        assert num_failures == 0, "mse_real >= mse_fake, for %d test(s)." % num_failures


if __name__ == "__main__":
    test_all_pipelines()


================================================
FILE: tests/functional_tests/test_cases/common/ckpt_converter/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Tree
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/common/moe_perf/__main__.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
"""GPU performance regression tests for the MoE layer."""

from __future__ import annotations

import gc
import json
import os
import statistics
from contextlib import nullcontext
from pathlib import Path
from typing import Any, Dict, Mapping, Sequence, cast

import pytest  # type: ignore[import]
import torch

from megatron.core.config import set_experimental_flag
from megatron.core.fp8_utils import get_fp8_context
from megatron.core.models.gpt.gpt_layer_specs import (
    get_gpt_layer_with_transformer_engine_submodules,
)
from megatron.core.transformer.moe.fused_a2a import HAVE_DEEP_EP, HAVE_HYBRIDEP
from megatron.core.transformer.moe.moe_layer import MoELayer
from megatron.core.transformer.moe.moe_utils import RandomSTE
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.utils import is_te_min_version
from megatron.training.initialize import _set_random_seed
from tests.unit_tests.test_utilities import Utils

from .test_cases import PERFORMANCE_CASES, MoEPerformanceCase

# NOTE: Performance regression threshold
DEFAULT_MAX_REGRESSION_RATIO = 1.02
DEFAULT_MAX_VARIANCE_RATIO = 0.02  # The std/mean should be less than 2%
WARMUP_ITERS = 5
MEASURE_ITERS = 20


BASELINES_PATH = Path(__file__).resolve().parent / "baseline.json"
UPDATE_BASELINES_ENV = "MEGATRON_UPDATE_PERF_BASELINES"


def _build_transformer_config(case: MoEPerformanceCase) -> TransformerConfig:
    model = case.model
    config_kwargs = dict(
        num_layers=1,
        hidden_size=model.hidden_size,
        moe_ffn_hidden_size=model.moe_ffn_hidden_size,
        num_attention_heads=model.num_attention_heads,
        # MoE Arguments
        num_moe_experts=model.num_experts,
        moe_router_topk=model.router_topk,
        moe_router_load_balancing_type="aux_loss",
        moe_aux_loss_coeff=1.0,
        moe_token_dispatcher_type=case.token_dispatcher,
        moe_flex_dispatcher_backend=case.moe_flex_dispatcher_backend,
        use_cpu_initialization=True,
        add_bias_linear=False,
        # Router Arguments
        moe_router_num_groups=model.moe_router_num_groups,
        moe_router_group_topk=model.moe_router_group_topk,
        moe_router_score_function=model.moe_router_score_function,
        moe_router_dtype=model.moe_router_dtype,
        moe_router_enable_expert_bias=model.moe_router_enable_expert_bias,
        # Parallelism Arguments
        sequence_parallel=case.tensor_model_parallel_size > 1,
        tensor_model_parallel_size=case.tensor_model_parallel_size,
        pipeline_model_parallel_size=case.pipeline_model_parallel_size,
        expert_model_parallel_size=case.expert_model_parallel_size,
        expert_tensor_parallel_size=case.expert_tensor_parallel_size,
        context_parallel_size=case.context_parallel_size,
        params_dtype=case.input_dtype,
        bf16=True,
        fp8=case.fp8,
        moe_permute_fusion=case.moe_permute_fusion,
        moe_router_fusion=case.moe_router_fusion,
        moe_router_force_load_balancing=case.moe_router_force_load_balancing,
    )

    if case.fp8:
        config_kwargs.update(
            dict(fp8="hybrid", fp8_margin=0, fp8_interval=1, fp8_recipe="blockwise")
        )

    return TransformerConfig(**config_kwargs)


# NOTE: Only TE backend is covered in this test.
def _resolve_moe_submodules(case: MoEPerformanceCase):
    return get_gpt_layer_with_transformer_engine_submodules(
        num_experts=case.model.num_experts, moe_grouped_gemm=True
    ).mlp.submodules


def _load_baselines() -> Dict[str, Dict[str, float]]:
    if not BASELINES_PATH.exists():
        return {}
    with BASELINES_PATH.open("r", encoding="utf-8") as fh:
        return json.load(fh)


def _persist_baselines(data: Dict[str, Dict[str, float]]) -> None:
    BASELINES_PATH.parent.mkdir(parents=True, exist_ok=True)
    with BASELINES_PATH.open("w", encoding="utf-8") as fh:
        json.dump(data, fh, indent=2, sort_keys=True)
        fh.write("\n")


def _serialize_metrics(metrics: Dict[str, float]) -> Dict[str, float]:
    forward_ms = metrics["forward_ms"]
    backward_ms = metrics["backward_ms"]
    return {
        "forward_ms": forward_ms,
        "backward_ms": backward_ms,
        "max_allocated_bytes": metrics["max_allocated_bytes"],
        "max_regression_ratio": DEFAULT_MAX_REGRESSION_RATIO,
    }


def _assert_within_baseline(
    case_name: str, metrics: Mapping[str, Any], baselines: Dict[str, Dict[str, float]]
):
    baseline = baselines.get(case_name)
    if baseline is None:
        pytest.fail(
            f"Missing baseline data for {case_name}. Set {UPDATE_BASELINES_ENV}=1 to record."
        )

    max_ratio = baseline.get("max_regression_ratio", DEFAULT_MAX_REGRESSION_RATIO)

    def _limit(metric_name: str) -> float:
        baseline_value = baseline.get(metric_name)
        if baseline_value is None:
            return float("inf")
        ratio_limit = baseline_value * max_ratio
        return ratio_limit

    fwd_limit = _limit("forward_ms")
    bwd_limit = _limit("backward_ms")
    mem_limit = _limit("max_allocated_bytes")

    forward_ms = cast(float, metrics["forward_ms"])
    backward_ms = cast(float, metrics["backward_ms"])
    max_allocated_bytes = cast(float, metrics["max_allocated_bytes"])

    forward_std_ms = cast(float, metrics.get("forward_std_ms", 0.0))
    backward_std_ms = cast(float, metrics.get("backward_std_ms", 0.0))
    forward_timings = cast(Sequence[float], metrics.get("forward_timings", ()))
    backward_timings = cast(Sequence[float], metrics.get("backward_timings", ()))

    assert (
        forward_ms <= fwd_limit
    ), f"Forward pass for {case_name} regressed: {forward_ms:.3f} ms (limit {fwd_limit:.3f} ms)."
    assert (
        backward_ms <= bwd_limit
    ), f"Backward pass for {case_name} regressed: {backward_ms:.3f} ms (limit {bwd_limit:.3f} ms)."

    if forward_ms > 0.0:
        assert forward_std_ms / forward_ms <= DEFAULT_MAX_VARIANCE_RATIO, (
            "Forward pass for "
            f"{case_name} has high variance: {forward_std_ms:.3f} ms "
            f"(limit {DEFAULT_MAX_VARIANCE_RATIO:.3f} of {forward_ms:.3f} ms). "
            f"The full timings are {list(forward_timings)}."
        )
    if backward_ms > 0.0:
        assert backward_std_ms / backward_ms <= DEFAULT_MAX_VARIANCE_RATIO, (
            "Backward pass for "
            f"{case_name} has high variance: {backward_std_ms:.3f} ms "
            f"(limit {DEFAULT_MAX_VARIANCE_RATIO:.3f} of {backward_ms:.3f} ms). "
            f"The full timings are {list(backward_timings)}."
        )
    assert max_allocated_bytes <= mem_limit, (
        "Max allocated memory for "
        f"{case_name} regressed: {max_allocated_bytes / (1024 ** 2):.3f} MiB "
        f"(limit {mem_limit / (1024 ** 2):.3f} MiB)."
    )


def _benchmark_moe_layer(layer: MoELayer, case: MoEPerformanceCase):
    torch.cuda.synchronize()
    set_experimental_flag(True)

    forward_timings = []
    backward_timings = []
    max_allocated_bytes = []

    generator = torch.Generator(device="cuda").manual_seed(1234)
    model = case.model

    if case.manual_gc:
        torch.cuda.empty_cache()
        gc.disable()
        gc.collect()

    # NOTE: Using the same input tensor for all iterations to prevent different routing results,
    # which may lead to different kernels and library load/compile overhead.
    input_tensor = torch.randn(
        model.seq_length,
        model.micro_batch_size,
        model.hidden_size,
        device="cuda",
        dtype=case.input_dtype,
        generator=generator,
    )
    input_tensor.requires_grad_(True)
    for iteration in range(WARMUP_ITERS + MEASURE_ITERS):
        if RandomSTE.generator is not None:
            RandomSTE.generator.manual_seed(RandomSTE.generator.initial_seed())
        if torch.distributed.is_available() and torch.distributed.is_initialized():
            torch.distributed.barrier()
        torch.cuda.nvtx.range_push(f"({case.name}) iteration {iteration}")
        # Use a long CUDA kernel to hide the router launch overhead
        with torch.cuda.nvtx.range("(dummy GEMM)"):
            dummy_tensor = torch.randn(8192, 8192, device="cuda")
            torch.matmul(dummy_tensor, dummy_tensor)
            del dummy_tensor
        input_tensor.grad = None
        layer.zero_grad(set_to_none=True)
        torch.cuda.reset_peak_memory_stats()

        fwd_start = torch.cuda.Event(enable_timing=True)
        fwd_end = torch.cuda.Event(enable_timing=True)
        bwd_start = torch.cuda.Event(enable_timing=True)
        bwd_end = torch.cuda.Event(enable_timing=True)

        context = get_fp8_context(layer.config) if case.fp8 else nullcontext()
        with context:
            fwd_start.record()
            output, _ = layer(input_tensor)
            fwd_end.record()

            backward_grad = torch.randn_like(output)
            bwd_start.record()
            output.backward(backward_grad)
            bwd_end.record()

        torch.cuda.nvtx.range_pop()
        torch.cuda.synchronize()

        if iteration >= WARMUP_ITERS:
            forward_timings.append(fwd_start.elapsed_time(fwd_end))
            backward_timings.append(bwd_start.elapsed_time(bwd_end))
            max_allocated_bytes.append(torch.cuda.max_memory_allocated())

    # Exclude the top 3 values from timings lists to avoid outliers
    forward_timings_sorted = sorted(forward_timings)[:-3]
    backward_timings_sorted = sorted(backward_timings)[:-3]
    forward_ms = statistics.mean(forward_timings)
    backward_ms = statistics.mean(backward_timings)
    max_allocated_bytes = statistics.mean(max_allocated_bytes)

    if case.manual_gc:
        gc.collect()
        gc.enable()

    if Utils.rank == 0:
        print(f"({case.name}) forward times {forward_timings}")
    return {
        "forward_ms": forward_ms,
        "backward_ms": backward_ms,
        "forward_std_ms": statistics.pstdev(forward_timings) if len(forward_timings) > 1 else 0.0,
        "backward_std_ms": (
            statistics.pstdev(backward_timings) if len(backward_timings) > 1 else 0.0
        ),
        "max_allocated_bytes": max_allocated_bytes,
        "forward_timings": forward_timings,
        "backward_timings": backward_timings,
    }


def _maybe_update_baseline(
    case: MoEPerformanceCase, metrics: Dict[str, float], baselines: Dict[str, Dict[str, float]]
):
    forward_ms = metrics["forward_ms"]
    backward_ms = metrics["backward_ms"]
    forward_std_ms = metrics["forward_std_ms"]
    backward_std_ms = metrics["backward_std_ms"]
    assert forward_std_ms / forward_ms <= DEFAULT_MAX_VARIANCE_RATIO, (
        "Forward pass for "
        f"{case.name} has high variance: {forward_std_ms:.3f} ms "
        f"(limit {DEFAULT_MAX_VARIANCE_RATIO:.3f} of {forward_ms:.3f} ms)."
    )
    assert backward_std_ms / backward_ms <= DEFAULT_MAX_VARIANCE_RATIO, (
        "Backward pass for "
        f"{case.name} has high variance: {backward_std_ms:.3f} ms "
        f"(limit {DEFAULT_MAX_VARIANCE_RATIO:.3f} of {backward_ms:.3f} ms)."
    )
    baselines[case.name] = _serialize_metrics(metrics)
    _persist_baselines(baselines)


def _prepare_moe_layer(case: MoEPerformanceCase) -> MoELayer:
    config = _build_transformer_config(case)
    submodules = _resolve_moe_submodules(case)
    layer = MoELayer(config=config, submodules=submodules).cuda().to(dtype=torch.bfloat16)

    layer.train()
    return layer


def _check_env():
    NCCL_MAX_NCHANNELS = os.environ.get("NCCL_MAX_NCHANNELS")
    if NCCL_MAX_NCHANNELS is not None:
        pytest.fail(
            f"NCCL_MAX_NCHANNELS is set to {NCCL_MAX_NCHANNELS}, this may lead to performance regression"
        )


def _check_dependencies(case: MoEPerformanceCase):
    if case.token_dispatcher == "flex":
        if case.moe_flex_dispatcher_backend == "deepep":
            if not HAVE_DEEP_EP:
                pytest.skip("DeepEP is not available")
        elif case.moe_flex_dispatcher_backend == "hybridep":
            if not HAVE_HYBRIDEP:
                pytest.skip("HybridEP is not available")


@pytest.mark.flaky(reruns=10)
@pytest.mark.internal
@pytest.mark.skipif(
    not torch.cuda.is_available(), reason="CUDA is required for MoE performance benchmarking"
)
@pytest.mark.parametrize("perf_case", PERFORMANCE_CASES, ids=lambda c: c.name)
def test_moe_layer_performance(perf_case: MoEPerformanceCase, debug_mode: bool = False):
    _check_env()
    _check_dependencies(perf_case)
    if not perf_case.is_current_platform():
        pytest.skip(
            "GPU platform mismatch: "
            f"expected '{perf_case.gpu_platform}', "
            f"found '{torch.cuda.get_device_name(torch.cuda.current_device())}'."
        )

    Utils.initialize_model_parallel(
        tensor_model_parallel_size=perf_case.tensor_model_parallel_size,
        pipeline_model_parallel_size=perf_case.pipeline_model_parallel_size,
        expert_model_parallel_size=perf_case.expert_model_parallel_size,
        context_parallel_size=perf_case.context_parallel_size,
        expert_tensor_parallel_size=perf_case.expert_tensor_parallel_size,
    )

    try:
        _set_random_seed(seed_=123, data_parallel_random_init=False)
        torch.cuda.reset_peak_memory_stats()
        layer = _prepare_moe_layer(perf_case)
        with torch.cuda.nvtx.range(f"({perf_case.name})"):
            metrics = _benchmark_moe_layer(layer, perf_case)

        summary = (
            f"MoE layer performance ({perf_case.name}): forward {metrics['forward_ms']:.3f} ms "
            f"(σ={metrics['forward_std_ms']:.3f}), backward {metrics['backward_ms']:.3f} ms "
            f"(σ={metrics['backward_std_ms']:.3f}), max mem {metrics['max_allocated_bytes'] / (1024 ** 2):.3f} MiB"
        )
        if Utils.rank == 0:
            print(summary)

        if torch.distributed.is_initialized():
            torch.distributed.barrier()

        # Don't check performance if profiling is enabled
        baseline_failed = False
        baseline_failure_message = ""

        # Only rank 0 checks the baseline
        if Utils.rank == 0 and not debug_mode:
            baselines = _load_baselines()
            try:
                if os.getenv(UPDATE_BASELINES_ENV) == "1":
                    _maybe_update_baseline(perf_case, metrics, baselines)
                else:
                    _assert_within_baseline(perf_case.name, metrics, baselines)
            except AssertionError as exc:
                baseline_failed = True
                baseline_failure_message = str(exc)

        failure_tensor = torch.tensor(
            [1 if baseline_failed else 0],
            device=torch.device("cuda", torch.cuda.current_device()),
            dtype=torch.int32,
        )
        torch.distributed.all_reduce(failure_tensor, op=torch.distributed.ReduceOp.MAX)
        baseline_failed = bool(failure_tensor.item())

        if baseline_failed:
            if Utils.rank != 0:
                baseline_failure_message = "Baseline regression detected on rank 0."
                pytest.fail(baseline_failure_message, pytrace=False)
            else:
                pytest.fail(baseline_failure_message, pytrace=True)

    finally:
        Utils.destroy_model_parallel()
        torch.cuda.empty_cache()


# Main entry for local performance testing
# Commands to run with nsys profiling:
# nsys profile --sample=none --cpuctxsw=none -t cuda,nvtx \
#         -f true -x true \
#         --cuda-graph-trace=node \
#         --capture-range=cudaProfilerApi \
#         --capture-range-end=stop \
#         -o output \
#         uv run --no-sync python -m torch.distributed.run --nproc_per_node=8 --nnodes=1 -m tests.functional_tests.test_cases.common.moe_perf
# Commands to run with pytest:
# export MEGATRON_UPDATE_PERF_BASELINES=0 # set to 1 to update baseline perf numbers
# uv run --no-sync python -m torch.distributed.run --nproc_per_node=8 --nnodes=1 -m tests.functional_tests.test_cases.common.moe_perf
if __name__ == "__main__":
    pytest.main(["-x", "-v", "-s", __file__])  # -xvs
    # torch.cuda.cudart().cudaProfilerStart()
    # torch.autograd.profiler.emit_nvtx(record_shapes=True).__enter__()
    # for case in PERFORMANCE_CASES:
    #     if case.name == "mixtral_a2a_tp1ep8_fp8":
    #         test_moe_layer_performance(case, debug_mode=True)
    # torch.cuda.cudart().cudaProfilerStop()
    # torch.distributed.destroy_process_group()


================================================
FILE: tests/functional_tests/test_cases/common/moe_perf/baseline.json
================================================
{
  "deepseek_a2a_tp1ep8_bf16": {
    "backward_ms": 10.482670497894286,
    "forward_ms": 7.119169592857361,
    "max_allocated_bytes": 3442750464,
    "max_regression_ratio": 1.02
  },
  "deepseek_a2a_tp1ep8_fp8": {
    "backward_ms": 10.126460886001587,
    "forward_ms": 7.609272027015686,
    "max_allocated_bytes": 4412930560,
    "max_regression_ratio": 1.02
  },
  "deepseek_deepep_tp1ep8_bf16": {
    "backward_ms": 9.055137538909912,
    "forward_ms": 5.681718397140503,
    "max_allocated_bytes": 2445288448,
    "max_regression_ratio": 1.02
  },
  "deepseek_deepep_tp1ep8_fp8": {
    "backward_ms": 8.60211353302002,
    "forward_ms": 6.034772801399231,
    "max_allocated_bytes": 3707410944,
    "max_regression_ratio": 1.02
  },
  "deepseek_hybridep_tp1ep8_bf16": {
    "backward_ms": 8.795001602172851,
    "forward_ms": 5.390828824043274,
    "max_allocated_bytes": 2424369664,
    "max_regression_ratio": 1.02
  },
  "deepseek_hybridep_tp1ep8_fp8": {
    "backward_ms": 7.6874864339828495,
    "forward_ms": 5.142886424064637,
    "max_allocated_bytes": 2652078464,
    "max_regression_ratio": 1.02
  },
  "mixtral_a2a_tp1ep8_bf16": {
    "backward_ms": 7.684332823753357,
    "forward_ms": 4.32688798904419,
    "max_allocated_bytes": 2890180198.4,
    "max_regression_ratio": 1.02
  },
  "mixtral_a2a_tp1ep8_fp8": {
    "backward_ms": 6.618246412277221,
    "forward_ms": 3.7876319885253906,
    "max_allocated_bytes": 3095914086.4,
    "max_regression_ratio": 1.02
  },
  "mixtral_deepep_tp1ep8_bf16": {
    "backward_ms": 7.854356813430786,
    "forward_ms": 4.635550403594971,
    "max_allocated_bytes": 2890295808,
    "max_regression_ratio": 1.02
  },
  "mixtral_deepep_tp1ep8_fp8": {
    "backward_ms": 6.82720000743866,
    "forward_ms": 4.101150441169739,
    "max_allocated_bytes": 3095448064,
    "max_regression_ratio": 1.02
  },
  "mixtral_hybridep_tp1ep8_bf16": {
    "backward_ms": 7.740782427787781,
    "forward_ms": 4.342604804039001,
    "max_allocated_bytes": 2889559552,
    "max_regression_ratio": 1.02
  },
  "mixtral_hybridep_tp1ep8_fp8": {
    "backward_ms": 6.57167682647705,
    "forward_ms": 3.7007392168045046,
    "max_allocated_bytes": 3095269376,
    "max_regression_ratio": 1.02
  }
}


================================================
FILE: tests/functional_tests/test_cases/common/moe_perf/test_cases.py
================================================
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
from dataclasses import dataclass
from typing import Iterable, Optional

import torch


@dataclass(frozen=True)
class MoEModelConfig:
    seq_length: int
    micro_batch_size: int
    hidden_size: int
    moe_ffn_hidden_size: int
    num_experts: int
    router_topk: int
    num_attention_heads: int = 8
    moe_shared_expert_intermediate_size: Optional[int] = None

    # Router related
    moe_router_load_balancing_type: str = "aux_loss"
    moe_router_num_groups: Optional[int] = None
    moe_router_group_topk: Optional[int] = None
    moe_router_score_function: str = "softmax"
    moe_router_dtype: str = "fp32"
    moe_router_enable_expert_bias: bool = False


@dataclass(frozen=True)
class MoEPerformanceCase:
    """Describes a single MoE performance configuration to exercise."""

    name: str
    model: MoEModelConfig

    # Token dispatcher related
    token_dispatcher: str
    moe_flex_dispatcher_backend: str = "deepep"

    # FP8 related
    fp8: Optional[str] = None
    fp8_recipe: Optional[str] = None

    # Tested GPU platform
    gpu_platform: str = "H100"

    # Parallelism related
    tensor_model_parallel_size: int = 1
    pipeline_model_parallel_size: int = 1
    expert_model_parallel_size: int = 1
    context_parallel_size: int = 1
    expert_tensor_parallel_size: int = 1

    # kernel fusion related
    moe_permute_fusion: bool = True
    moe_router_fusion: bool = True

    # Performance stability related
    moe_router_force_load_balancing: bool = True
    manual_gc: bool = True

    @property
    def input_dtype(self) -> torch.dtype:
        return torch.bfloat16

    def is_current_platform(self) -> bool:
        if self.gpu_platform is None:
            return True
        device_name = torch.cuda.get_device_name(torch.cuda.current_device())
        return self.gpu_platform.lower() in device_name.lower()


MIXTRAL_PROXY = MoEModelConfig(
    seq_length=4096,
    micro_batch_size=1,
    hidden_size=4096,
    moe_ffn_hidden_size=14336,
    num_experts=8,
    router_topk=2,
    moe_router_load_balancing_type="aux_loss",
)

DEEPSEEK_PROXY = MoEModelConfig(
    seq_length=4096,
    micro_batch_size=1,
    hidden_size=7168,
    moe_ffn_hidden_size=2048,
    num_experts=32,
    router_topk=8,
    moe_router_load_balancing_type="seq_aux_loss",
    moe_router_num_groups=8,
    moe_router_group_topk=4,
    moe_router_score_function="sigmoid",
    moe_router_dtype="fp32",
    moe_router_enable_expert_bias=True,
    moe_shared_expert_intermediate_size=2048,
)


PERFORMANCE_CASES: Iterable[MoEPerformanceCase] = (
    MoEPerformanceCase(
        name="mixtral_a2a_tp1ep8_fp8",
        token_dispatcher="alltoall",
        model=MIXTRAL_PROXY,
        tensor_model_parallel_size=1,
        expert_model_parallel_size=8,
        fp8="e4m3",
        fp8_recipe="blockwise",
    ),
    MoEPerformanceCase(
        name="mixtral_deepep_tp1ep8_fp8",
        token_dispatcher="flex",
        moe_flex_dispatcher_backend="deepep",
        model=MIXTRAL_PROXY,
        tensor_model_parallel_size=1,
        expert_model_parallel_size=8,
        fp8="e4m3",
        fp8_recipe="blockwise",
    ),
    MoEPerformanceCase(
        name="mixtral_hybridep_tp1ep8_fp8",
        token_dispatcher="flex",
        moe_flex_dispatcher_backend="hybridep",
        model=MIXTRAL_PROXY,
        tensor_model_parallel_size=1,
        expert_model_parallel_size=8,
        fp8="e4m3",
        fp8_recipe="blockwise",
    ),
    MoEPerformanceCase(
        name="deepseek_a2a_tp1ep8_fp8",
        token_dispatcher="alltoall",
        model=DEEPSEEK_PROXY,
        tensor_model_parallel_size=1,
        expert_model_parallel_size=8,
        fp8="e4m3",
        fp8_recipe="blockwise",
    ),
    MoEPerformanceCase(
        name="deepseek_hybridep_tp1ep8_fp8",
        token_dispatcher="flex",
        moe_flex_dispatcher_backend="hybridep",
        model=DEEPSEEK_PROXY,
        tensor_model_parallel_size=1,
        expert_model_parallel_size=8,
        fp8="e4m3",
        fp8_recipe="blockwise",
    ),
    MoEPerformanceCase(
        name="deepseek_deepep_tp1ep8_fp8",
        token_dispatcher="flex",
        moe_flex_dispatcher_backend="deepep",
        model=DEEPSEEK_PROXY,
        tensor_model_parallel_size=1,
        expert_model_parallel_size=8,
        fp8="e4m3",
        fp8_recipe="blockwise",
    ),
    MoEPerformanceCase(
        name="mixtral_a2a_tp1ep8_bf16",
        token_dispatcher="alltoall",
        model=MIXTRAL_PROXY,
        tensor_model_parallel_size=1,
        expert_model_parallel_size=8,
    ),
    MoEPerformanceCase(
        name="mixtral_deepep_tp1ep8_bf16",
        token_dispatcher="flex",
        moe_flex_dispatcher_backend="deepep",
        model=MIXTRAL_PROXY,
        tensor_model_parallel_size=1,
        expert_model_parallel_size=8,
    ),
    MoEPerformanceCase(
        name="mixtral_hybridep_tp1ep8_bf16",
        token_dispatcher="flex",
        moe_flex_dispatcher_backend="hybridep",
        model=MIXTRAL_PROXY,
        tensor_model_parallel_size=1,
        expert_model_parallel_size=8,
    ),
    MoEPerformanceCase(
        name="deepseek_a2a_tp1ep8_bf16",
        token_dispatcher="alltoall",
        model=DEEPSEEK_PROXY,
        tensor_model_parallel_size=1,
        expert_model_parallel_size=8,
    ),
    MoEPerformanceCase(
        name="deepseek_deepep_tp1ep8_bf16",
        token_dispatcher="flex",
        moe_flex_dispatcher_backend="deepep",
        model=DEEPSEEK_PROXY,
        tensor_model_parallel_size=1,
        expert_model_parallel_size=8,
    ),
    MoEPerformanceCase(
        name="deepseek_hybridep_tp1ep8_bf16",
        token_dispatcher="flex",
        moe_flex_dispatcher_backend="hybridep",
        model=DEEPSEEK_PROXY,
        tensor_model_parallel_size=1,
        expert_model_parallel_size=8,
    ),
)


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release/golden_values_dev_dgx_h100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 51000, "step_interval": 5, "values": {"1": 12.95636, "5": 12.91492, "10": 12.06071, "15": 11.39909, "20": 10.42783, "25": 9.97711, "30": 9.6286, "35": 9.36624, "40": 9.17254, "45": 9.00255, "50": 8.8422, "55": 8.63834, "60": 8.63846, "65": 8.45294, "70": 8.43263, "75": 8.26644, "80": 8.09687, "85": 8.11139, "90": 7.96479, "95": 7.93325, "100": 7.79677, "105": 7.67345, "110": 7.56066, "115": 7.50422, "120": 7.46202, "125": 7.44329, "130": 7.26251, "135": 7.2425, "140": 7.2044, "145": 7.00436, "150": 7.15832, "155": 7.0238, "160": 6.90308, "165": 6.88146, "170": 6.79477, "175": 6.82432, "180": 6.79919, "185": 6.73194, "190": 6.67383, "195": 6.63462, "200": 6.65786, "205": 6.62088, "210": 6.52161, "215": 6.48652, "220": 6.5137, "225": 6.49893, "230": 6.47324, "235": 6.41688, "240": 6.36555, "245": 6.34188, "250": 6.29649, "255": 6.41633, "260": 6.34333, "265": 6.25441, "270": 6.20479, "275": 6.22791, "280": 6.19939, "285": 6.20459, "290": 6.15582, "295": 6.13042, "300": 6.11567, "305": 6.03889, "310": 6.09181, "315": 6.08567, "320": 5.96246, "325": 5.94091, "330": 6.0021, "335": 6.00696, "340": 5.923, "345": 5.91037, "350": 5.90166, "355": 5.83938, "360": 5.86421, "365": 5.83851, "370": 5.79736, "375": 5.88069, "380": 5.84632, "385": 5.79522, "390": 5.78997, "395": 5.68485, "400": 5.65935, "405": 5.67324, "410": 5.67915, "415": 5.70348, "420": 5.62598, "425": 5.69901, "430": 5.63413, "435": 5.57726, "440": 5.62055, "445": 5.56107, "450": 5.59801, "455": 5.52289, "460": 5.52019, "465": 5.59069, "470": 5.55559, "475": 5.51408, "480": 5.46367, "485": 5.4963, "490": 5.47827, "495": 5.47537, "500": 5.40917, "505": 5.39348, "510": 5.44131, "515": 5.4227, "520": 5.42475, "525": 5.30792, "530": 5.32991, "535": 5.32045, "540": 5.33396, "545": 5.37814, "550": 5.33432, "555": 5.22468, "560": 5.34448, "565": 5.26025, "570": 5.25496, "575": 5.26508, "580": 5.2261, "585": 5.18523, "590": 5.23531, "595": 5.21597, "600": 5.25481, "605": 5.22827, "610": 5.24152, "615": 5.16897, "620": 5.20393, "625": 5.21192, "630": 5.12559, "635": 5.0983, "640": 5.06269, "645": 5.11363, "650": 5.11504, "655": 5.10817, "660": 5.05975, "665": 5.07345, "670": 5.01161, "675": 5.00973, "680": 5.00039, "685": 4.98384, "690": 4.9873, "695": 4.96491, "700": 4.94539, "705": 4.93223, "710": 4.94081, "715": 4.83795, "720": 4.83274, "725": 4.78491, "730": 4.8244, "735": 4.81083, "740": 4.79976, "745": 4.68904, "750": 4.72059, "755": 4.74714, "760": 4.74038, "765": 4.70597, "770": 4.65318, "775": 4.64132, "780": 4.66637, "785": 4.73973, "790": 4.64876, "795": 4.61121, "800": 4.57544, "805": 4.59394, "810": 4.62435, "815": 4.56173, "820": 4.59615, "825": 4.57955, "830": 4.57467, "835": 4.53309, "840": 4.43742, "845": 4.45689, "850": 4.4204, "855": 4.49913, "860": 4.41134, "865": 4.48287, "870": 4.45121, "875": 4.35614, "880": 4.40109, "885": 4.36448, "890": 4.41124, "895": 4.39546, "900": 4.38397, "905": 4.31785, "910": 4.34052, "915": 4.31549, "920": 4.36392, "925": 4.36028, "930": 4.28386, "935": 4.27082, "940": 4.3445, "945": 4.29313, "950": 4.32563, "955": 4.24296, "960": 4.17223, "965": 4.25597, "970": 4.28174, "975": 4.23195, "980": 4.21503, "985": 4.17046, "990": 4.12753, "995": 4.17941, "1000": 4.21, "1005": 4.18, "1010": 4.15769, "1015": 4.13164, "1020": 4.15168, "1025": 4.20911, "1030": 4.1252, "1035": 4.07002, "1040": 4.10269, "1045": 4.09399, "1050": 4.15348, "1055": 4.08902, "1060": 4.10468, "1065": 4.07269, "1070": 4.04306, "1075": 4.05269, "1080": 4.05887, "1085": 4.05794, "1090": 4.01935, "1095": 4.09802, "1100": 4.05819, "1105": 4.07337, "1110": 4.02964, "1115": 4.01863, "1120": 3.99359, "1125": 4.00426, "1130": 4.04876, "1135": 4.01693, "1140": 3.99429, "1145": 3.93168, "1150": 4.02653, "1155": 3.99541, "1160": 3.9714, "1165": 3.87632, "1170": 3.92636, "1175": 3.94936, "1180": 3.95967, "1185": 3.98907, "1190": 3.93252, "1195": 3.92857, "1200": 3.89894, "1205": 3.87873, "1210": 3.97403, "1215": 3.83292, "1220": 3.85378, "1225": 3.81043, "1230": 3.90187, "1235": 3.88706, "1240": 3.86921, "1245": 3.79765, "1250": 3.82361, "1255": 3.8381, "1260": 3.89029, "1265": 3.80359, "1270": 3.87241, "1275": 3.84526, "1280": 3.80629, "1285": 3.82994, "1290": 3.86508, "1295": 3.84607, "1300": 3.81967, "1305": 3.82011, "1310": 3.81461, "1315": 3.81049, "1320": 3.81606, "1325": 3.73795, "1330": 3.79026, "1335": 3.75693, "1340": 3.74375, "1345": 3.74487, "1350": 3.71701, "1355": 3.76787, "1360": 3.7186, "1365": 3.72539, "1370": 3.73828, "1375": 3.74462, "1380": 3.75674, "1385": 3.73386, "1390": 3.65962, "1395": 3.73066, "1400": 3.72491, "1405": 3.69202, "1410": 3.66598, "1415": 3.65193, "1420": 3.69456, "1425": 3.69844, "1430": 3.66393, "1435": 3.65627, "1440": 3.62498, "1445": 3.6696, "1450": 3.69663, "1455": 3.63137, "1460": 3.64731, "1465": 3.67112, "1470": 3.63013, "1475": 3.69433, "1480": 3.66255, "1485": 3.66509, "1490": 3.62624, "1495": 3.60631, "1500": 3.63843, "1505": 3.68734, "1510": 3.55474, "1515": 3.60925, "1520": 3.63424, "1525": 3.60142, "1530": 3.58944, "1535": 3.58501, "1540": 3.60659, "1545": 3.60649, "1550": 3.54653, "1555": 3.56825, "1560": 3.6147, "1565": 3.60815, "1570": 3.5778, "1575": 3.54167, "1580": 3.58396, "1585": 3.57407, "1590": 3.46676, "1595": 3.51414, "1600": 3.49796, "1605": 3.5515, "1610": 3.55668, "1615": 3.49679, "1620": 3.52064, "1625": 3.47672, "1630": 3.48704, "1635": 3.52932, "1640": 3.52367, "1645": 3.54971, "1650": 3.48988, "1655": 3.46075, "1660": 3.51173, "1665": 3.44215, "1670": 3.50351, "1675": 3.4802, "1680": 3.46089, "1685": 3.48893, "1690": 3.46943, "1695": 3.48425, "1700": 3.44953, "1705": 3.39196, "1710": 3.48652, "1715": 3.48793, "1720": 3.43118, "1725": 3.41242, "1730": 3.41777, "1735": 3.47852, "1740": 3.44983, "1745": 3.43522, "1750": 3.39895, "1755": 3.40903, "1760": 3.38927, "1765": 3.40755, "1770": 3.42923, "1775": 3.38591, "1780": 3.42658, "1785": 3.40444, "1790": 3.38439, "1795": 3.40535, "1800": 3.33166, "1805": 3.37603, "1810": 3.33282, "1815": 3.42134, "1820": 3.41546, "1825": 3.36766, "1830": 3.32066, "1835": 3.42536, "1840": 3.39121, "1845": 3.42489, "1850": 3.37927, "1855": 3.3713, "1860": 3.33898, "1865": 3.38851, "1870": 3.29235, "1875": 3.4528, "1880": 3.34099, "1885": 3.33274, "1890": 3.32822, "1895": 3.38492, "1900": 3.37448, "1905": 3.30425, "1910": 3.32231, "1915": 3.31181, "1920": 3.3444, "1925": 3.33399, "1930": 3.31583, "1935": 3.31016, "1940": 3.3515, "1945": 3.25588, "1950": 3.39773, "1955": 3.28689, "1960": 3.32443, "1965": 3.28105, "1970": 3.29757, "1975": 3.34318, "1980": 3.31963, "1985": 3.22542, "1990": 3.28739, "1995": 3.26682, "2000": 3.27416, "2005": 3.26052, "2010": 3.28094, "2015": 3.22314, "2020": 3.25467, "2025": 3.26542, "2030": 3.27138, "2035": 3.28935, "2040": 3.24079, "2045": 3.25173, "2050": 3.27733, "2055": 3.32689, "2060": 3.27543, "2065": 3.22857, "2070": 3.29279, "2075": 3.21899, "2080": 3.21147, "2085": 3.26803, "2090": 3.14261, "2095": 3.26058, "2100": 3.22125, "2105": 3.18724, "2110": 3.19276, "2115": 3.22171, "2120": 3.19625, "2125": 3.23152, "2130": 3.21674, "2135": 3.28177, "2140": 3.19948, "2145": 3.19664, "2150": 3.20859, "2155": 3.22243, "2160": 3.19393, "2165": 3.26074, "2170": 3.21218, "2175": 3.17959, "2180": 3.22001, "2185": 3.23917, "2190": 3.23779, "2195": 3.14891, "2200": 3.18829, "2205": 3.16479, "2210": 3.12373, "2215": 3.19545, "2220": 3.20558, "2225": 3.174, "2230": 3.10842, "2235": 3.15865, "2240": 3.1913, "2245": 3.18121, "2250": 3.20457, "2255": 3.12359, "2260": 3.1331, "2265": 3.20917, "2270": 3.17105, "2275": 3.13201, "2280": 3.17833, "2285": 3.15621, "2290": 3.16254, "2295": 3.19267, "2300": 3.13278, "2305": 3.15786, "2310": 3.12532, "2315": 3.06071, "2320": 3.13371, "2325": 3.19407, "2330": 3.12802, "2335": 3.10681, "2340": 3.14944, "2345": 3.10274, "2350": 3.12237, "2355": 3.10907, "2360": 3.14996, "2365": 3.08277, "2370": 3.12842, "2375": 3.11528, "2380": 3.1055, "2385": 3.06986, "2390": 3.09395, "2395": 3.09025, "2400": 3.08194, "2405": 3.09558, "2410": 3.09058, "2415": 3.0889, "2420": 3.07101, "2425": 3.06266, "2430": 3.07155, "2435": 3.06133, "2440": 3.07173, "2445": 3.04845, "2450": 3.11453, "2455": 3.14895, "2460": 3.08519, "2465": 3.07738, "2470": 3.03239, "2475": 3.06413, "2480": 3.07943, "2485": 3.041, "2490": 3.04845, "2495": 3.07411, "2500": 3.07396, "2505": 3.09823, "2510": 3.14261, "2515": 3.06704, "2520": 3.08062, "2525": 3.01262, "2530": 3.03518, "2535": 3.08259, "2540": 3.06948, "2545": 3.04228, "2550": 2.99398, "2555": 3.10216, "2560": 3.0536, "2565": 3.10722, "2570": 3.01387, "2575": 3.04236, "2580": 3.07128, "2585": 3.00621, "2590": 3.05694, "2595": 2.98766, "2600": 3.06568, "2605": 3.03782, "2610": 3.04081, "2615": 3.05548, "2620": 2.97523, "2625": 3.00532, "2630": 3.02671, "2635": 3.05504, "2640": 3.00792, "2645": 3.05042, "2650": 3.01078, "2655": 2.99214, "2660": 3.00983, "2665": 3.03174, "2670": 2.97743, "2675": 2.95504, "2680": 2.99183, "2685": 2.99544, "2690": 2.98959, "2695": 2.98946, "2700": 3.02346, "2705": 2.98127, "2710": 2.96511, "2715": 2.95329, "2720": 3.03239, "2725": 2.98164, "2730": 3.03003, "2735": 2.99527, "2740": 2.99451, "2745": 3.0112, "2750": 3.00078, "2755": 2.96115, "2760": 2.99208, "2765": 3.00285, "2770": 2.9749, "2775": 2.98917, "2780": 3.0085, "2785": 2.94734, "2790": 2.95569, "2795": 2.94377, "2800": 2.94977, "2805": 2.92865, "2810": 2.97389, "2815": 2.95135, "2820": 3.02213, "2825": 3.01629, "2830": 2.98344, "2835": 2.93202, "2840": 2.95989, "2845": 3.03518, "2850": 2.98907, "2855": 2.96129, "2860": 2.93919, "2865": 2.90158, "2870": 2.9697, "2875": 2.90772, "2880": 2.94687, "2885": 2.92211, "2890": 2.98697, "2895": 2.9264, "2900": 2.94262, "2905": 3.02309, "2910": 2.89985, "2915": 2.93111, "2920": 2.9441, "2925": 2.93478, "2930": 2.94507, "2935": 2.93986, "2940": 2.98902, "2945": 2.95604, "2950": 2.98702, "2955": 2.90701, "2960": 2.9702, "2965": 2.86016, "2970": 2.95014, "2975": 2.98567, "2980": 2.93197, "2985": 3.0277, "2990": 2.93226, "2995": 2.86816, "3000": 2.93115, "3005": 2.88275, "3010": 2.92824, "3015": 2.9268, "3020": 2.93421, "3025": 2.93985, "3030": 2.96317, "3035": 2.9625, "3040": 2.91622, "3045": 2.84124, "3050": 2.88836, "3055": 2.8776, "3060": 2.91221, "3065": 2.90966, "3070": 2.87285, "3075": 2.86981, "3080": 2.96502, "3085": 2.91555, "3090": 2.91847, "3095": 2.93047, "3100": 2.86017, "3105": 2.91739, "3110": 2.89514, "3115": 2.93439, "3120": 2.94998, "3125": 2.84834, "3130": 2.92406, "3135": 2.9177, "3140": 2.86842, "3145": 2.92748, "3150": 2.84987, "3155": 2.84495, "3160": 2.84633, "3165": 2.84225, "3170": 2.89341, "3175": 2.9016, "3180": 2.87099, "3185": 2.88931, "3190": 2.90373, "3195": 2.93434, "3200": 2.9408, "3205": 2.85572, "3210": 2.87321, "3215": 2.92774, "3220": 2.87037, "3225": 2.86564, "3230": 2.81225, "3235": 2.87699, "3240": 2.86764, "3245": 2.89613, "3250": 2.85176, "3255": 2.84676, "3260": 2.86032, "3265": 2.86922, "3270": 2.84306, "3275": 2.8643, "3280": 2.79994, "3285": 2.80951, "3290": 2.86935, "3295": 2.89678, "3300": 2.87657, "3305": 2.86319, "3310": 2.8566, "3315": 2.82889, "3320": 2.83388, "3325": 2.82673, "3330": 2.82526, "3335": 2.84912, "3340": 2.82419, "3345": 2.86214, "3350": 2.83997, "3355": 2.86622, "3360": 2.79892, "3365": 2.85095, "3370": 2.84587, "3375": 2.83655, "3380": 2.84755, "3385": 2.87496, "3390": 2.8658, "3395": 2.8076, "3400": 2.78399, "3405": 2.84067, "3410": 2.8544, "3415": 2.86126, "3420": 2.82116, "3425": 2.80498, "3430": 2.83624, "3435": 2.91559, "3440": 2.8246, "3445": 2.86585, "3450": 2.81294, "3455": 2.7892, "3460": 2.8114, "3465": 2.84512, "3470": 2.83477, "3475": 2.76958, "3480": 2.843, "3485": 2.82096, "3490": 2.8897, "3495": 2.85252, "3500": 2.83235, "3505": 2.82263, "3510": 2.81576, "3515": 2.83489, "3520": 2.77798, "3525": 2.80371, "3530": 2.85287, "3535": 2.78258, "3540": 2.83807, "3545": 2.80902, "3550": 2.79498, "3555": 2.81904, "3560": 2.8227, "3565": 2.82949, "3570": 2.80416, "3575": 2.80909, "3580": 2.82128, "3585": 2.83589, "3590": 2.82998, "3595": 2.78314, "3600": 2.75247, "3605": 2.79098, "3610": 2.85161, "3615": 2.75399, "3620": 2.80041, "3625": 2.8819, "3630": 2.77751, "3635": 2.78765, "3640": 2.78673, "3645": 2.7706, "3650": 2.80418, "3655": 2.81865, "3660": 2.76784, "3665": 2.78719, "3670": 2.77233, "3675": 2.775, "3680": 2.81514, "3685": 2.80323, "3690": 2.80698, "3695": 2.8113, "3700": 2.79386, "3705": 2.78495, "3710": 2.75189, "3715": 2.80584, "3720": 2.79164, "3725": 2.78971, "3730": 2.84277, "3735": 2.80084, "3740": 2.75167, "3745": 2.78977, "3750": 2.8098, "3755": 2.79727, "3760": 2.75924, "3765": 2.75857, "3770": 2.76223, "3775": 2.7724, "3780": 2.75873, "3785": 2.78248, "3790": 2.74027, "3795": 2.79302, "3800": 2.80519, "3805": 2.74912, "3810": 2.80038, "3815": 2.76417, "3820": 2.78637, "3825": 2.73345, "3830": 2.74434, "3835": 2.81528, "3840": 2.72535, "3845": 2.71517, "3850": 2.7759, "3855": 2.71819, "3860": 2.80223, "3865": 2.75145, "3870": 2.77884, "3875": 2.7548, "3880": 2.78858, "3885": 2.78561, "3890": 2.7427, "3895": 2.80078, "3900": 2.763, "3905": 2.72369, "3910": 2.74471, "3915": 2.74792, "3920": 2.7957, "3925": 2.78136, "3930": 2.70664, "3935": 2.73875, "3940": 2.7512, "3945": 2.74087, "3950": 2.72616, "3955": 2.78258, "3960": 2.76074, "3965": 2.73604, "3970": 2.76053, "3975": 2.7228, "3980": 2.73681, "3985": 2.74351, "3990": 2.69308, "3995": 2.78087, "4000": 2.73439, "4005": 2.7645, "4010": 2.71156, "4015": 2.7225, "4020": 2.74979, "4025": 2.73791, "4030": 2.66145, "4035": 2.69476, "4040": 2.74909, "4045": 2.75195, "4050": 2.793, "4055": 2.72064, "4060": 2.71579, "4065": 2.65039, "4070": 2.80685, "4075": 2.75766, "4080": 2.7176, "4085": 2.75184, "4090": 2.67829, "4095": 2.68941, "4100": 2.71116, "4105": 2.7381, "4110": 2.73433, "4115": 2.70383, "4120": 2.72644, "4125": 2.70326, "4130": 2.69567, "4135": 2.68847, "4140": 2.68244, "4145": 2.78166, "4150": 2.7109, "4155": 2.74325, "4160": 2.75924, "4165": 2.72284, "4170": 2.67671, "4175": 2.7184, "4180": 2.72668, "4185": 2.73222, "4190": 2.7433, "4195": 2.69591, "4200": 2.70483, "4205": 2.7474, "4210": 2.67596, "4215": 2.66392, "4220": 2.65965, "4225": 2.70274, "4230": 2.72377, "4235": 2.74411, "4240": 2.70703, "4245": 2.69466, "4250": 2.71128, "4255": 2.65417, "4260": 2.72613, "4265": 2.73162, "4270": 2.71959, "4275": 2.68688, "4280": 2.70044, "4285": 2.72954, "4290": 2.68891, "4295": 2.69172, "4300": 2.70243, "4305": 2.69874, "4310": 2.73143, "4315": 2.70797, "4320": 2.69666, "4325": 2.70512, "4330": 2.70808, "4335": 2.69036, "4340": 2.69965, "4345": 2.72447, "4350": 2.678, "4355": 2.69645, "4360": 2.71029, "4365": 2.78504, "4370": 2.73503, "4375": 2.74263, "4380": 2.70273, "4385": 2.70048, "4390": 2.70114, "4395": 2.75447, "4400": 2.66541, "4405": 2.66733, "4410": 2.68151, "4415": 2.70285, "4420": 2.70485, "4425": 2.72478, "4430": 2.69205, "4435": 2.68088, "4440": 2.69275, "4445": 2.67713, "4450": 2.65775, "4455": 2.66498, "4460": 2.6845, "4465": 2.70038, "4470": 2.66579, "4475": 2.68877, "4480": 2.65661, "4485": 2.70035, "4490": 2.65268, "4495": 2.70983, "4500": 2.70222, "4505": 2.69419, "4510": 2.65219, "4515": 2.70118, "4520": 2.67034, "4525": 2.66874, "4530": 2.6731, "4535": 2.6728, "4540": 2.7079, "4545": 2.65464, "4550": 2.69785, "4555": 2.6815, "4560": 2.65856, "4565": 2.63679, "4570": 2.63684, "4575": 2.66782, "4580": 2.68611, "4585": 2.68414, "4590": 2.61609, "4595": 2.66262, "4600": 2.67606, "4605": 2.67468, "4610": 2.654, "4615": 2.6622, "4620": 2.65616, "4625": 2.68499, "4630": 2.67253, "4635": 2.6466, "4640": 2.69285, "4645": 2.64556, "4650": 2.69984, "4655": 2.70726, "4660": 2.67453, "4665": 2.68291, "4670": 2.67662, "4675": 2.68532, "4680": 2.66375, "4685": 2.65801, "4690": 2.70245, "4695": 2.65383, "4700": 2.67085, "4705": 2.64735, "4710": 2.67431, "4715": 2.64693, "4720": 2.72245, "4725": 2.62735, "4730": 2.65203, "4735": 2.68854, "4740": 2.64278, "4745": 2.65231, "4750": 2.64294, "4755": 2.6543, "4760": 2.66362, "4765": 2.64482, "4770": 2.62263, "4775": 2.6566, "4780": 2.65634, "4785": 2.68806, "4790": 2.64951, "4795": 2.67216, "4800": 2.62823, "4805": 2.64022, "4810": 2.66167, "4815": 2.64636, "4820": 2.67328, "4825": 2.64853, "4830": 2.61783, "4835": 2.64848, "4840": 2.65599, "4845": 2.63544, "4850": 2.6234, "4855": 2.60014, "4860": 2.64988, "4865": 2.62358, "4870": 2.6371, "4875": 2.61926, "4880": 2.62723, "4885": 2.62608, "4890": 2.68019, "4895": 2.66044, "4900": 2.61984, "4905": 2.62136, "4910": 2.63889, "4915": 2.61291, "4920": 2.65162, "4925": 2.65078, "4930": 2.56796, "4935": 2.65004, "4940": 2.63024, "4945": 2.63576, "4950": 2.62436, "4955": 2.61774, "4960": 2.61629, "4965": 2.65631, "4970": 2.59705, "4975": 2.65252, "4980": 2.61904, "4985": 2.63678, "4990": 2.66152, "4995": 2.58063, "5000": 2.66045, "5005": 2.66356, "5010": 2.68499, "5015": 2.63424, "5020": 2.64137, "5025": 2.68407, "5030": 2.64422, "5035": 2.61707, "5040": 2.62087, "5045": 2.60544, "5050": 2.62396, "5055": 2.64733, "5060": 2.64318, "5065": 2.68772, "5070": 2.60466, "5075": 2.61227, "5080": 2.61173, "5085": 2.60499, "5090": 2.58989, "5095": 2.64843, "5100": 2.6477, "5105": 2.61034, "5110": 2.66373, "5115": 2.61769, "5120": 2.66964, "5125": 2.62941, "5130": 2.62064, "5135": 2.61463, "5140": 2.57312, "5145": 2.62795, "5150": 2.63616, "5155": 2.6153, "5160": 2.65997, "5165": 2.58469, "5170": 2.59083, "5175": 2.61858, "5180": 2.60231, "5185": 2.62039, "5190": 2.62266, "5195": 2.66986, "5200": 2.59923, "5205": 2.60567, "5210": 2.60375, "5215": 2.64444, "5220": 2.58584, "5225": 2.55198, "5230": 2.63426, "5235": 2.6137, "5240": 2.63897, "5245": 2.62503, "5250": 2.59342, "5255": 2.61945, "5260": 2.55675, "5265": 2.5966, "5270": 2.58634, "5275": 2.61598, "5280": 2.6093, "5285": 2.60352, "5290": 2.63147, "5295": 2.62046, "5300": 2.57796, "5305": 2.59879, "5310": 2.60951, "5315": 2.58631, "5320": 2.61356, "5325": 2.64475, "5330": 2.60019, "5335": 2.58018, "5340": 2.56441, "5345": 2.65636, "5350": 2.61842, "5355": 2.5783, "5360": 2.59377, "5365": 2.61801, "5370": 2.61254, "5375": 2.62746, "5380": 2.58006, "5385": 2.56497, "5390": 2.58471, "5395": 2.61543, "5400": 2.60535, "5405": 2.54471, "5410": 2.61116, "5415": 2.59493, "5420": 2.61087, "5425": 2.62359, "5430": 2.62944, "5435": 2.57511, "5440": 2.58461, "5445": 2.62941, "5450": 2.64785, "5455": 2.60933, "5460": 2.59031, "5465": 2.60448, "5470": 2.59656, "5475": 2.62571, "5480": 2.58862, "5485": 2.58774, "5490": 2.57674, "5495": 2.56937, "5500": 2.57162, "5505": 2.61732, "5510": 2.62327, "5515": 2.58103, "5520": 2.55613, "5525": 2.58505, "5530": 2.6623, "5535": 2.61983, "5540": 2.57971, "5545": 2.5963, "5550": 2.55065, "5555": 2.57137, "5560": 2.56341, "5565": 2.60658, "5570": 2.6531, "5575": 2.62897, "5580": 2.57097, "5585": 2.59387, "5590": 2.55903, "5595": 2.5812, "5600": 2.55625, "5605": 2.59949, "5610": 2.6032, "5615": 2.58541, "5620": 2.58891, "5625": 2.55113, "5630": 2.56709, "5635": 2.62959, "5640": 2.59006, "5645": 2.57007, "5650": 2.57618, "5655": 2.54883, "5660": 2.55672, "5665": 2.58378, "5670": 2.56521, "5675": 2.60465, "5680": 2.52584, "5685": 2.56696, "5690": 2.60045, "5695": 2.56182, "5700": 2.59568, "5705": 2.59394, "5710": 2.57669, "5715": 2.5826, "5720": 2.53423, "5725": 2.5999, "5730": 2.57165, "5735": 2.6098, "5740": 2.59181, "5745": 2.55744, "5750": 2.53622, "5755": 2.55744, "5760": 2.61016, "5765": 2.55712, "5770": 2.53921, "5775": 2.58317, "5780": 2.57555, "5785": 2.54017, "5790": 2.56288, "5795": 2.60105, "5800": 2.5443, "5805": 2.53292, "5810": 2.55578, "5815": 2.52579, "5820": 2.59564, "5825": 2.50445, "5830": 2.49591, "5835": 2.59555, "5840": 2.53664, "5845": 2.55102, "5850": 2.61182, "5855": 2.50591, "5860": 2.5595, "5865": 2.5168, "5870": 2.5709, "5875": 2.60648, "5880": 2.58466, "5885": 2.56491, "5890": 2.58066, "5895": 2.55433, "5900": 2.61282, "5905": 2.55708, "5910": 2.59606, "5915": 2.61004, "5920": 2.58628, "5925": 2.54892, "5930": 2.57556, "5935": 2.55218, "5940": 2.56846, "5945": 2.51922, "5950": 2.55501, "5955": 2.58444, "5960": 2.56546, "5965": 2.61692, "5970": 2.54907, "5975": 2.5782, "5980": 2.55619, "5985": 2.55991, "5990": 2.55528, "5995": 2.5556, "6000": 2.55374, "6005": 2.5191, "6010": 2.55957, "6015": 2.52279, "6020": 2.53663, "6025": 2.5575, "6030": 2.60349, "6035": 2.53977, "6040": 2.54732, "6045": 2.48956, "6050": 2.59373, "6055": 2.51759, "6060": 2.54364, "6065": 2.52506, "6070": 2.52795, "6075": 2.53438, "6080": 2.53245, "6085": 2.59511, "6090": 2.56684, "6095": 2.53299, "6100": 2.53951, "6105": 2.51884, "6110": 2.55311, "6115": 2.58258, "6120": 2.55371, "6125": 2.53728, "6130": 2.47259, "6135": 2.55215, "6140": 2.55304, "6145": 2.55523, "6150": 2.52413, "6155": 2.50692, "6160": 2.5399, "6165": 2.57075, "6170": 2.54324, "6175": 2.60272, "6180": 2.5117, "6185": 2.55096, "6190": 2.49087, "6195": 2.58142, "6200": 2.55511, "6205": 2.53743, "6210": 2.51886, "6215": 2.51354, "6220": 2.56404, "6225": 2.51038, "6230": 2.50949, "6235": 2.55782, "6240": 2.5474, "6245": 2.51953, "6250": 2.53154, "6255": 2.57376, "6260": 2.52132, "6265": 2.57082, "6270": 2.52458, "6275": 2.5647, "6280": 2.52114, "6285": 2.51936, "6290": 2.51772, "6295": 2.50495, "6300": 2.55371, "6305": 2.52341, "6310": 2.51156, "6315": 2.53799, "6320": 2.48865, "6325": 2.59662, "6330": 2.55743, "6335": 2.51069, "6340": 2.51007, "6345": 2.55246, "6350": 2.55366, "6355": 2.52156, "6360": 2.51885, "6365": 2.48184, "6370": 2.53372, "6375": 2.49002, "6380": 2.55558, "6385": 2.5767, "6390": 2.50208, "6395": 2.54964, "6400": 2.50648, "6405": 2.5284, "6410": 2.51476, "6415": 2.52527, "6420": 2.53987, "6425": 2.53375, "6430": 2.57553, "6435": 2.54291, "6440": 2.53606, "6445": 2.5261, "6450": 2.53105, "6455": 2.52052, "6460": 2.51435, "6465": 2.56395, "6470": 2.51723, "6475": 2.52453, "6480": 2.48625, "6485": 2.52837, "6490": 2.50622, "6495": 2.49818, "6500": 2.52195, "6505": 2.4931, "6510": 2.54046, "6515": 2.50814, "6520": 2.50918, "6525": 2.49368, "6530": 2.54205, "6535": 2.53099, "6540": 2.52728, "6545": 2.55812, "6550": 2.49925, "6555": 2.55498, "6560": 2.50772, "6565": 2.51955, "6570": 2.58197, "6575": 2.51851, "6580": 2.49803, "6585": 2.50479, "6590": 2.5071, "6595": 2.49587, "6600": 2.49494, "6605": 2.53677, "6610": 2.47396, "6615": 2.5644, "6620": 2.53086, "6625": 2.50746, "6630": 2.50876, "6635": 2.47027, "6640": 2.53637, "6645": 2.59496, "6650": 2.50951, "6655": 2.49786, "6660": 2.5728, "6665": 2.51935, "6670": 2.56777, "6675": 2.46606, "6680": 2.54467, "6685": 2.53215, "6690": 2.51119, "6695": 2.48473, "6700": 2.52243, "6705": 2.51555, "6710": 2.48867, "6715": 2.51712, "6720": 2.50708, "6725": 2.51742, "6730": 2.51663, "6735": 2.48198, "6740": 2.51211, "6745": 2.4922, "6750": 2.55648, "6755": 2.4723, "6760": 2.53712, "6765": 2.48397, "6770": 2.51507, "6775": 2.51025, "6780": 2.53511, "6785": 2.46839, "6790": 2.54093, "6795": 2.49591, "6800": 2.52369, "6805": 2.51004, "6810": 2.50199, "6815": 2.51897, "6820": 2.48774, "6825": 2.50198, "6830": 2.54013, "6835": 2.50512, "6840": 2.50711, "6845": 2.52125, "6850": 2.47229, "6855": 2.50969, "6860": 2.50054, "6865": 2.48572, "6870": 2.54956, "6875": 2.4712, "6880": 2.54881, "6885": 2.47487, "6890": 2.5436, "6895": 2.49828, "6900": 2.48596, "6905": 2.49507, "6910": 2.51699, "6915": 2.51703, "6920": 2.53146, "6925": 2.54357, "6930": 2.48963, "6935": 2.53168, "6940": 2.49926, "6945": 2.45917, "6950": 2.47996, "6955": 2.52555, "6960": 2.51973, "6965": 2.48892, "6970": 2.46869, "6975": 2.5198, "6980": 2.45127, "6985": 2.51378, "6990": 2.52745, "6995": 2.46386, "7000": 2.49292, "7005": 2.47076, "7010": 2.47193, "7015": 2.51923, "7020": 2.4656, "7025": 2.44932, "7030": 2.48167, "7035": 2.47601, "7040": 2.50296, "7045": 2.51709, "7050": 2.52386, "7055": 2.4407, "7060": 2.47677, "7065": 2.48307, "7070": 2.49149, "7075": 2.49187, "7080": 2.53617, "7085": 2.48543, "7090": 2.47936, "7095": 2.49991, "7100": 2.5145, "7105": 2.48528, "7110": 2.48447, "7115": 2.50545, "7120": 2.47118, "7125": 2.46427, "7130": 2.48687, "7135": 2.51791, "7140": 2.49857, "7145": 2.49905, "7150": 2.50975, "7155": 2.50276, "7160": 2.47314, "7165": 2.45551, "7170": 2.50452, "7175": 2.50876, "7180": 2.50782, "7185": 2.48119, "7190": 2.46308, "7195": 2.46511, "7200": 2.51021, "7205": 2.48975, "7210": 2.44283, "7215": 2.47686, "7220": 2.44115, "7225": 2.51312, "7230": 2.50722, "7235": 2.48205, "7240": 2.47643, "7245": 2.49755, "7250": 2.50762, "7255": 2.49721, "7260": 2.45929, "7265": 2.4525, "7270": 2.46942, "7275": 2.50166, "7280": 2.49282, "7285": 2.42281, "7290": 2.47949, "7295": 2.48657, "7300": 2.41668, "7305": 2.44463, "7310": 2.44614, "7315": 2.49102, "7320": 2.48398, "7325": 2.4587, "7330": 2.48805, "7335": 2.50988, "7340": 2.48182, "7345": 2.50357, "7350": 2.51481, "7355": 2.4955, "7360": 2.47833, "7365": 2.46532, "7370": 2.46748, "7375": 2.44794, "7380": 2.491, "7385": 2.48227, "7390": 2.47045, "7395": 2.47121, "7400": 2.47683, "7405": 2.43696, "7410": 2.47825, "7415": 2.46926, "7420": 2.49284, "7425": 2.45478, "7430": 2.52081, "7435": 2.48941, "7440": 2.51887, "7445": 2.50846, "7450": 2.47068, "7455": 2.45514, "7460": 2.46382, "7465": 2.47283, "7470": 2.44824, "7475": 2.45552, "7480": 2.51126, "7485": 2.44795, "7490": 2.47288, "7495": 2.47872, "7500": 2.4954, "7505": 2.43931, "7510": 2.43679, "7515": 2.42602, "7520": 2.49488, "7525": 2.49779, "7530": 2.4764, "7535": 2.4608, "7540": 2.47375, "7545": 2.47324, "7550": 2.49111, "7555": 2.45457, "7560": 2.42605, "7565": 2.50739, "7570": 2.48406, "7575": 2.43649, "7580": 2.45684, "7585": 2.48096, "7590": 2.48062, "7595": 2.46295, "7600": 2.46282, "7605": 2.44641, "7610": 2.44873, "7615": 2.42447, "7620": 2.54299, "7625": 2.47896, "7630": 2.42334, "7635": 2.42729, "7640": 2.45175, "7645": 2.47452, "7650": 2.46063, "7655": 2.48138, "7660": 2.45116, "7665": 2.43199, "7670": 2.43787, "7675": 2.45582, "7680": 2.48499, "7685": 2.43139, "7690": 2.48082, "7695": 2.45409, "7700": 2.48065, "7705": 2.49703, "7710": 2.49215, "7715": 2.44079, "7720": 2.46645, "7725": 2.4781, "7730": 2.45576, "7735": 2.4706, "7740": 2.43587, "7745": 2.44755, "7750": 2.43627, "7755": 2.46488, "7760": 2.45025, "7765": 2.45357, "7770": 2.46966, "7775": 2.45184, "7780": 2.41565, "7785": 2.44429, "7790": 2.48161, "7795": 2.4387, "7800": 2.46107, "7805": 2.47974, "7810": 2.50216, "7815": 2.48469, "7820": 2.44517, "7825": 2.51302, "7830": 2.45284, "7835": 2.46636, "7840": 2.47831, "7845": 2.45814, "7850": 2.41481, "7855": 2.47178, "7860": 2.49851, "7865": 2.42189, "7870": 2.46676, "7875": 2.44559, "7880": 2.45197, "7885": 2.46105, "7890": 2.46839, "7895": 2.44416, "7900": 2.44327, "7905": 2.4399, "7910": 2.42503, "7915": 2.48124, "7920": 2.47492, "7925": 2.42086, "7930": 2.47094, "7935": 2.45121, "7940": 2.41955, "7945": 2.46935, "7950": 2.443, "7955": 2.41741, "7960": 2.48735, "7965": 2.51531, "7970": 2.52005, "7975": 2.44889, "7980": 2.43925, "7985": 2.46589, "7990": 2.43286, "7995": 2.46965, "8000": 2.43555, "8005": 2.41767, "8010": 2.45778, "8015": 2.46851, "8020": 2.48289, "8025": 2.47366, "8030": 2.45191, "8035": 2.47038, "8040": 2.41739, "8045": 2.45121, "8050": 2.44577, "8055": 2.42416, "8060": 2.4439, "8065": 2.46086, "8070": 2.45679, "8075": 2.45789, "8080": 2.44963, "8085": 2.44356, "8090": 2.42934, "8095": 2.42538, "8100": 2.43745, "8105": 2.49294, "8110": 2.43619, "8115": 2.44708, "8120": 2.46809, "8125": 2.46565, "8130": 2.45026, "8135": 2.45222, "8140": 2.43783, "8145": 2.42444, "8150": 2.41897, "8155": 2.48401, "8160": 2.45031, "8165": 2.44066, "8170": 2.43331, "8175": 2.42291, "8180": 2.49505, "8185": 2.42413, "8190": 2.46727, "8195": 2.45702, "8200": 2.44543, "8205": 2.44239, "8210": 2.42957, "8215": 2.43863, "8220": 2.43571, "8225": 2.40744, "8230": 2.4383, "8235": 2.46481, "8240": 2.42743, "8245": 2.44499, "8250": 2.44268, "8255": 2.43811, "8260": 2.43043, "8265": 2.42635, "8270": 2.4314, "8275": 2.439, "8280": 2.39471, "8285": 2.4371, "8290": 2.4787, "8295": 2.4468, "8300": 2.45767, "8305": 2.40684, "8310": 2.43334, "8315": 2.45502, "8320": 2.39805, "8325": 2.39197, "8330": 2.43308, "8335": 2.44308, "8340": 2.48888, "8345": 2.44628, "8350": 2.44639, "8355": 2.40546, "8360": 2.39929, "8365": 2.45417, "8370": 2.44948, "8375": 2.42207, "8380": 2.41704, "8385": 2.42193, "8390": 2.43392, "8395": 2.43972, "8400": 2.43656, "8405": 2.48737, "8410": 2.43642, "8415": 2.43178, "8420": 2.4154, "8425": 2.43742, "8430": 2.45796, "8435": 2.4016, "8440": 2.4493, "8445": 2.45931, "8450": 2.40687, "8455": 2.4583, "8460": 2.45299, "8465": 2.43611, "8470": 2.40664, "8475": 2.47491, "8480": 2.39991, "8485": 2.41159, "8490": 2.4636, "8495": 2.4338, "8500": 2.44284, "8505": 2.40127, "8510": 2.40109, "8515": 2.42708, "8520": 2.42327, "8525": 2.4893, "8530": 2.37072, "8535": 2.3984, "8540": 2.48234, "8545": 2.37887, "8550": 2.43613, "8555": 2.44987, "8560": 2.46804, "8565": 2.41811, "8570": 2.42855, "8575": 2.44716, "8580": 2.43817, "8585": 2.41778, "8590": 2.40295, "8595": 2.42576, "8600": 2.40987, "8605": 2.48948, "8610": 2.41782, "8615": 2.3859, "8620": 2.44601, "8625": 2.42188, "8630": 2.45373, "8635": 2.44792, "8640": 2.43343, "8645": 2.47174, "8650": 2.41846, "8655": 2.44973, "8660": 2.45273, "8665": 2.38524, "8670": 2.40906, "8675": 2.42542, "8680": 2.44664, "8685": 2.42834, "8690": 2.40798, "8695": 2.44069, "8700": 2.43236, "8705": 2.41824, "8710": 2.42475, "8715": 2.44588, "8720": 2.47311, "8725": 2.40868, "8730": 2.38864, "8735": 2.43207, "8740": 2.42712, "8745": 2.39549, "8750": 2.43473, "8755": 2.42122, "8760": 2.39833, "8765": 2.43262, "8770": 2.4022, "8775": 2.43497, "8780": 2.41731, "8785": 2.46882, "8790": 2.41927, "8795": 2.41496, "8800": 2.41258, "8805": 2.4022, "8810": 2.4086, "8815": 2.47171, "8820": 2.45088, "8825": 2.42205, "8830": 2.38509, "8835": 2.42014, "8840": 2.39166, "8845": 2.42406, "8850": 2.43217, "8855": 2.40153, "8860": 2.42654, "8865": 2.42394, "8870": 2.43195, "8875": 2.43696, "8880": 2.41009, "8885": 2.39164, "8890": 2.44477, "8895": 2.42648, "8900": 2.40963, "8905": 2.40108, "8910": 2.39946, "8915": 2.41554, "8920": 2.4314, "8925": 2.46336, "8930": 2.41363, "8935": 2.41074, "8940": 2.3913, "8945": 2.39406, "8950": 2.41727, "8955": 2.39375, "8960": 2.4301, "8965": 2.41455, "8970": 2.40198, "8975": 2.47351, "8980": 2.43912, "8985": 2.3725, "8990": 2.4072, "8995": 2.41532, "9000": 2.45263, "9005": 2.4103, "9010": 2.3721, "9015": 2.40665, "9020": 2.39666, "9025": 2.36727, "9030": 2.3982, "9035": 2.42357, "9040": 2.41928, "9045": 2.41653, "9050": 2.39419, "9055": 2.41359, "9060": 2.42243, "9065": 2.40648, "9070": 2.44561, "9075": 2.40006, "9080": 2.4353, "9085": 2.41274, "9090": 2.40888, "9095": 2.39364, "9100": 2.39726, "9105": 2.35553, "9110": 2.46347, "9115": 2.41422, "9120": 2.40286, "9125": 2.45663, "9130": 2.39303, "9135": 2.44715, "9140": 2.43347, "9145": 2.42418, "9150": 2.42245, "9155": 2.37233, "9160": 2.41548, "9165": 2.42188, "9170": 2.37128, "9175": 2.41634, "9180": 2.37539, "9185": 2.43602, "9190": 2.41006, "9195": 2.39528, "9200": 2.38905, "9205": 2.44683, "9210": 2.35959, "9215": 2.46103, "9220": 2.44517, "9225": 2.38308, "9230": 2.44429, "9235": 2.39669, "9240": 2.40026, "9245": 2.43419, "9250": 2.42714, "9255": 2.4276, "9260": 2.38373, "9265": 2.43608, "9270": 2.43409, "9275": 2.3931, "9280": 2.38867, "9285": 2.4203, "9290": 2.4023, "9295": 2.38238, "9300": 2.42128, "9305": 2.4009, "9310": 2.41329, "9315": 2.40763, "9320": 2.44088, "9325": 2.36703, "9330": 2.40015, "9335": 2.35797, "9340": 2.40398, "9345": 2.41196, "9350": 2.43628, "9355": 2.47465, "9360": 2.43472, "9365": 2.38496, "9370": 2.43401, "9375": 2.43106, "9380": 2.35009, "9385": 2.39823, "9390": 2.38017, "9395": 2.38556, "9400": 2.44206, "9405": 2.41147, "9410": 2.39576, "9415": 2.43559, "9420": 2.44166, "9425": 2.42901, "9430": 2.4458, "9435": 2.41221, "9440": 2.49235, "9445": 2.37336, "9450": 2.39239, "9455": 2.39943, "9460": 2.38476, "9465": 2.3775, "9470": 2.37809, "9475": 2.36216, "9480": 2.43335, "9485": 2.38637, "9490": 2.41899, "9495": 2.37932, "9500": 2.36225, "9505": 2.42686, "9510": 2.39739, "9515": 2.42913, "9520": 2.41611, "9525": 2.38695, "9530": 2.45362, "9535": 2.39932, "9540": 2.41586, "9545": 2.37484, "9550": 2.41798, "9555": 2.38604, "9560": 2.41979, "9565": 2.40246, "9570": 2.36939, "9575": 2.40875, "9580": 2.39212, "9585": 2.41991, "9590": 2.42589, "9595": 2.44516, "9600": 2.38872, "9605": 2.38204, "9610": 2.41846, "9615": 2.41173, "9620": 2.41141, "9625": 2.44446, "9630": 2.3934, "9635": 2.39835, "9640": 2.44333, "9645": 2.41006, "9650": 2.397, "9655": 2.37165, "9660": 2.42221, "9665": 2.38505, "9670": 2.3791, "9675": 2.35566, "9680": 2.39367, "9685": 2.39384, "9690": 2.46146, "9695": 2.37783, "9700": 2.37469, "9705": 2.3801, "9710": 2.36226, "9715": 2.38566, "9720": 2.43419, "9725": 2.43853, "9730": 2.42613, "9735": 2.38543, "9740": 2.37875, "9745": 2.42516, "9750": 2.39583, "9755": 2.40544, "9760": 2.40898, "9765": 2.36552, "9770": 2.44988, "9775": 2.39711, "9780": 2.35806, "9785": 2.39847, "9790": 2.40274, "9795": 2.35572, "9800": 2.39394, "9805": 2.40272, "9810": 2.40625, "9815": 2.37647, "9820": 2.37365, "9825": 2.40207, "9830": 2.41904, "9835": 2.38138, "9840": 2.41256, "9845": 2.36186, "9850": 2.39561, "9855": 2.39362, "9860": 2.3883, "9865": 2.37949, "9870": 2.38395, "9875": 2.37812, "9880": 2.4481, "9885": 2.39026, "9890": 2.35045, "9895": 2.31743, "9900": 2.39277, "9905": 2.42068, "9910": 2.35297, "9915": 2.36096, "9920": 2.40786, "9925": 2.39424, "9930": 2.37696, "9935": 2.34707, "9940": 2.38124, "9945": 2.37663, "9950": 2.40083, "9955": 2.44659, "9960": 2.42126, "9965": 2.35351, "9970": 2.40527, "9975": 2.38036, "9980": 2.32759, "9985": 2.40307, "9990": 2.39363, "9995": 2.39332, "10000": 2.36327, "10005": 2.37074, "10010": 2.3806, "10015": 2.44338, "10020": 2.36135, "10025": 2.38541, "10030": 2.38361, "10035": 2.40681, "10040": 2.40097, "10045": 2.3805, "10050": 2.34561, "10055": 2.36626, "10060": 2.41567, "10065": 2.37141, "10070": 2.41944, "10075": 2.3688, "10080": 2.35987, "10085": 2.36723, "10090": 2.34419, "10095": 2.3992, "10100": 2.31087, "10105": 2.38011, "10110": 2.40947, "10115": 2.38354, "10120": 2.35477, "10125": 2.36669, "10130": 2.356, "10135": 2.37949, "10140": 2.40945, "10145": 2.40422, "10150": 2.37295, "10155": 2.39229, "10160": 2.35925, "10165": 2.37936, "10170": 2.42042, "10175": 2.32198, "10180": 2.39163, "10185": 2.38075, "10190": 2.43982, "10195": 2.39938, "10200": 2.38772, "10205": 2.38511, "10210": 2.36479, "10215": 2.33972, "10220": 2.4154, "10225": 2.42649, "10230": 2.35414, "10235": 2.38366, "10240": 2.36909, "10245": 2.38841, "10250": 2.38635, "10255": 2.40743, "10260": 2.33221, "10265": 2.34569, "10270": 2.34683, "10275": 2.36907, "10280": 2.44605, "10285": 2.35463, "10290": 2.38162, "10295": 2.37197, "10300": 2.3663, "10305": 2.41127, "10310": 2.38546, "10315": 2.3572, "10320": 2.3632, "10325": 2.35759, "10330": 2.40967, "10335": 2.35857, "10340": 2.41803, "10345": 2.36647, "10350": 2.35282, "10355": 2.39491, "10360": 2.37006, "10365": 2.35612, "10370": 2.33807, "10375": 2.35472, "10380": 2.41541, "10385": 2.40294, "10390": 2.37794, "10395": 2.35575, "10400": 2.37529, "10405": 2.34605, "10410": 2.33698, "10415": 2.41294, "10420": 2.37655, "10425": 2.32626, "10430": 2.35825, "10435": 2.36985, "10440": 2.36924, "10445": 2.35764, "10450": 2.35758, "10455": 2.37741, "10460": 2.37814, "10465": 2.30072, "10470": 2.35475, "10475": 2.37652, "10480": 2.35981, "10485": 2.35755, "10490": 2.41106, "10495": 2.36508, "10500": 2.36082, "10505": 2.36862, "10510": 2.38026, "10515": 2.37107, "10520": 2.39974, "10525": 2.38773, "10530": 2.3903, "10535": 2.35314, "10540": 2.40139, "10545": 2.35508, "10550": 2.37537, "10555": 2.35766, "10560": 2.33867, "10565": 2.36928, "10570": 2.37241, "10575": 2.35337, "10580": 2.37591, "10585": 2.36718, "10590": 2.37595, "10595": 2.37386, "10600": 2.32788, "10605": 2.36934, "10610": 2.36309, "10615": 2.36046, "10620": 2.34468, "10625": 2.41612, "10630": 2.36684, "10635": 2.32028, "10640": 2.36019, "10645": 2.41858, "10650": 2.35833, "10655": 2.30656, "10660": 2.34518, "10665": 2.3959, "10670": 2.31378, "10675": 2.41297, "10680": 2.35287, "10685": 2.28643, "10690": 2.38254, "10695": 2.32835, "10700": 2.38122, "10705": 2.38282, "10710": 2.34041, "10715": 2.38011, "10720": 2.32215, "10725": 2.35031, "10730": 2.34591, "10735": 2.35167, "10740": 2.31662, "10745": 2.33673, "10750": 2.33158, "10755": 2.40211, "10760": 2.36227, "10765": 2.33285, "10770": 2.3645, "10775": 2.38419, "10780": 2.36626, "10785": 2.38858, "10790": 2.34271, "10795": 2.38424, "10800": 2.31983, "10805": 2.393, "10810": 2.37973, "10815": 2.35306, "10820": 2.34071, "10825": 2.37053, "10830": 2.3355, "10835": 2.34671, "10840": 2.32871, "10845": 2.38434, "10850": 2.3306, "10855": 2.36019, "10860": 2.32813, "10865": 2.31818, "10870": 2.3208, "10875": 2.30074, "10880": 2.38978, "10885": 2.39941, "10890": 2.35879, "10895": 2.36801, "10900": 2.32918, "10905": 2.30954, "10910": 2.40367, "10915": 2.36886, "10920": 2.37185, "10925": 2.35928, "10930": 2.31545, "10935": 2.35817, "10940": 2.35304, "10945": 2.34499, "10950": 2.3586, "10955": 2.36083, "10960": 2.31013, "10965": 2.36276, "10970": 2.3544, "10975": 2.40621, "10980": 2.37281, "10985": 2.34116, "10990": 2.39677, "10995": 2.3618, "11000": 2.33513, "11005": 2.35938, "11010": 2.3399, "11015": 2.32623, "11020": 2.3316, "11025": 2.3632, "11030": 2.33667, "11035": 2.31108, "11040": 2.31576, "11045": 2.31425, "11050": 2.31472, "11055": 2.28685, "11060": 2.33683, "11065": 2.3084, "11070": 2.39127, "11075": 2.31786, "11080": 2.35222, "11085": 2.33351, "11090": 2.34392, "11095": 2.36797, "11100": 2.3265, "11105": 2.31367, "11110": 2.35986, "11115": 2.37088, "11120": 2.37916, "11125": 2.31188, "11130": 2.347, "11135": 2.33062, "11140": 2.36971, "11145": 2.34718, "11150": 2.39317, "11155": 2.34385, "11160": 2.3649, "11165": 2.36082, "11170": 2.33945, "11175": 2.33319, "11180": 2.37023, "11185": 2.309, "11190": 2.27541, "11195": 2.32471, "11200": 2.34445, "11205": 2.35933, "11210": 2.32942, "11215": 2.31568, "11220": 2.34093, "11225": 2.36886, "11230": 2.36356, "11235": 2.31702, "11240": 2.33955, "11245": 2.35489, "11250": 2.32935, "11255": 2.33291, "11260": 2.35257, "11265": 2.38669, "11270": 2.28545, "11275": 2.3118, "11280": 2.36637, "11285": 2.2869, "11290": 2.34473, "11295": 2.36142, "11300": 2.37738, "11305": 2.332, "11310": 2.32754, "11315": 2.2952, "11320": 2.30184, "11325": 2.31182, "11330": 2.351, "11335": 2.33633, "11340": 2.30613, "11345": 2.31075, "11350": 2.29315, "11355": 2.31908, "11360": 2.34945, "11365": 2.29079, "11370": 2.35052, "11375": 2.32524, "11380": 2.33757, "11385": 2.34424, "11390": 2.33157, "11395": 2.28458, "11400": 2.3055, "11405": 2.35199, "11410": 2.3536, "11415": 2.3827, "11420": 2.34858, "11425": 2.30526, "11430": 2.36661, "11435": 2.35913, "11440": 2.34643, "11445": 2.36097, "11450": 2.31898, "11455": 2.30167, "11460": 2.34902, "11465": 2.34154, "11470": 2.37093, "11475": 2.31071, "11480": 2.32253, "11485": 2.30803, "11490": 2.34285, "11495": 2.40367, "11500": 2.33764, "11505": 2.34714, "11510": 2.3602, "11515": 2.3205, "11520": 2.3015, "11525": 2.35776, "11530": 2.31267, "11535": 2.31909, "11540": 2.34372, "11545": 2.3401, "11550": 2.36264, "11555": 2.32453, "11560": 2.34618, "11565": 2.33767, "11570": 2.34761, "11575": 2.29347, "11580": 2.32709, "11585": 2.34922, "11590": 2.36, "11595": 2.33327, "11600": 2.35455, "11605": 2.32074, "11610": 2.35918, "11615": 2.35646, "11620": 2.29387, "11625": 2.27425, "11630": 2.32342, "11635": 2.33955, "11640": 2.30268, "11645": 2.30512, "11650": 2.32491, "11655": 2.3481, "11660": 2.33381, "11665": 2.32741, "11670": 2.29796, "11675": 2.29473, "11680": 2.32184, "11685": 2.33352, "11690": 2.34134, "11695": 2.31568, "11700": 2.32253, "11705": 2.29936, "11710": 2.3417, "11715": 2.31194, "11720": 2.29665, "11725": 2.33711, "11730": 2.30157, "11735": 2.32655, "11740": 2.26884, "11745": 2.3155, "11750": 2.32488, "11755": 2.34917, "11760": 2.3101, "11765": 2.33577, "11770": 2.28093, "11775": 2.32264, "11780": 2.25232, "11785": 2.29425, "11790": 2.31246, "11795": 2.31787, "11800": 2.33247, "11805": 2.30216, "11810": 2.30241, "11815": 2.32778, "11820": 2.31616, "11825": 2.35743, "11830": 2.31432, "11835": 2.33536, "11840": 2.33846, "11845": 2.31424, "11850": 2.30054, "11855": 2.31139, "11860": 2.34035, "11865": 2.35585, "11870": 2.37693, "11875": 2.27806, "11880": 2.28912, "11885": 2.33326, "11890": 2.29073, "11895": 2.28718, "11900": 2.33152, "11905": 2.31761, "11910": 2.27514, "11915": 2.31044, "11920": 2.33178, "11925": 2.30105, "11930": 2.3035, "11935": 2.31451, "11940": 2.31634, "11945": 2.33969, "11950": 2.29695, "11955": 2.311, "11960": 2.33584, "11965": 2.29229, "11970": 2.27852, "11975": 2.33299, "11980": 2.30437, "11985": 2.27578, "11990": 2.30051, "11995": 2.3254, "12000": 2.32044, "12005": 2.32254, "12010": 2.287, "12015": 2.30787, "12020": 2.32644, "12025": 2.33234, "12030": 2.30964, "12035": 2.33396, "12040": 2.31428, "12045": 2.30916, "12050": 2.30676, "12055": 2.33194, "12060": 2.29554, "12065": 2.32836, "12070": 2.30021, "12075": 2.27383, "12080": 2.34819, "12085": 2.3365, "12090": 2.3352, "12095": 2.27931, "12100": 2.31493, "12105": 2.30672, "12110": 2.32804, "12115": 2.30368, "12120": 2.3055, "12125": 2.29313, "12130": 2.3033, "12135": 2.32683, "12140": 2.29358, "12145": 2.25451, "12150": 2.25825, "12155": 2.34057, "12160": 2.35496, "12165": 2.31764, "12170": 2.3298, "12175": 2.33954, "12180": 2.32799, "12185": 2.33928, "12190": 2.33266, "12195": 2.29593, "12200": 2.2978, "12205": 2.32039, "12210": 2.35327, "12215": 2.30138, "12220": 2.29662, "12225": 2.24091, "12230": 2.33199, "12235": 2.33672, "12240": 2.32196, "12245": 2.28554, "12250": 2.27163, "12255": 2.3345, "12260": 2.31205, "12265": 2.33977, "12270": 2.31018, "12275": 2.31145, "12280": 2.31694, "12285": 2.28488, "12290": 2.30964, "12295": 2.26475, "12300": 2.32695, "12305": 2.26556, "12310": 2.28586, "12315": 2.35322, "12320": 2.29379, "12325": 2.31819, "12330": 2.29723, "12335": 2.31787, "12340": 2.33832, "12345": 2.36577, "12350": 2.34163, "12355": 2.3043, "12360": 2.31129, "12365": 2.32798, "12370": 2.28923, "12375": 2.29848, "12380": 2.29133, "12385": 2.28854, "12390": 2.24724, "12395": 2.30213, "12400": 2.2967, "12405": 2.30923, "12410": 2.30152, "12415": 2.27992, "12420": 2.31384, "12425": 2.29772, "12430": 2.31344, "12435": 2.29676, "12440": 2.32871, "12445": 2.31804, "12450": 2.30467, "12455": 2.2384, "12460": 2.33454, "12465": 2.36151, "12470": 2.27396, "12475": 2.2715, "12480": 2.28953, "12485": 2.30371, "12490": 2.3285, "12495": 2.26783, "12500": 2.3181, "12505": 2.33334, "12510": 2.35496, "12515": 2.26669, "12520": 2.31731, "12525": 2.28543, "12530": 2.31834, "12535": 2.26993, "12540": 2.28318, "12545": 2.28908, "12550": 2.31502, "12555": 2.32139, "12560": 2.2992, "12565": 2.33342, "12570": 2.27563, "12575": 2.29847, "12580": 2.30925, "12585": 2.28922, "12590": 2.33257, "12595": 2.32225, "12600": 2.27889, "12605": 2.31806, "12610": 2.36075, "12615": 2.30398, "12620": 2.3309, "12625": 2.32719, "12630": 2.29525, "12635": 2.33274, "12640": 2.291, "12645": 2.27656, "12650": 2.32322, "12655": 2.26383, "12660": 2.33939, "12665": 2.31714, "12670": 2.30946, "12675": 2.31663, "12680": 2.27207, "12685": 2.36333, "12690": 2.29991, "12695": 2.32975, "12700": 2.29122, "12705": 2.30394, "12710": 2.30455, "12715": 2.28428, "12720": 2.31067, "12725": 2.27276, "12730": 2.33673, "12735": 2.29462, "12740": 2.33558, "12745": 2.28584, "12750": 2.2699, "12755": 2.27891, "12760": 2.2643, "12765": 2.33516, "12770": 2.32517, "12775": 2.26194, "12780": 2.31758, "12785": 2.29858, "12790": 2.305, "12795": 2.31784, "12800": 2.295, "12805": 2.31423, "12810": 2.2707, "12815": 2.29649, "12820": 2.32214, "12825": 2.32192, "12830": 2.28913, "12835": 2.26628, "12840": 2.26923, "12845": 2.31102, "12850": 2.27498, "12855": 2.27205, "12860": 2.26959, "12865": 2.31517, "12870": 2.26394, "12875": 2.33856, "12880": 2.31598, "12885": 2.27957, "12890": 2.3054, "12895": 2.24332, "12900": 2.32356, "12905": 2.31445, "12910": 2.28719, "12915": 2.28526, "12920": 2.29812, "12925": 2.29729, "12930": 2.2709, "12935": 2.23939, "12940": 2.25626, "12945": 2.30725, "12950": 2.28201, "12955": 2.32451, "12960": 2.31483, "12965": 2.28687, "12970": 2.26921, "12975": 2.26485, "12980": 2.33074, "12985": 2.27621, "12990": 2.27994, "12995": 2.27207, "13000": 2.24859, "13005": 2.33964, "13010": 2.31719, "13015": 2.27104, "13020": 2.27682, "13025": 2.29836, "13030": 2.27504, "13035": 2.2833, "13040": 2.31854, "13045": 2.27937, "13050": 2.2816, "13055": 2.28997, "13060": 2.29601, "13065": 2.29914, "13070": 2.32275, "13075": 2.29548, "13080": 2.27474, "13085": 2.27554, "13090": 2.29324, "13095": 2.30441, "13100": 2.30096, "13105": 2.30963, "13110": 2.3384, "13115": 2.22536, "13120": 2.30436, "13125": 2.23793, "13130": 2.26885, "13135": 2.31132, "13140": 2.30251, "13145": 2.2621, "13150": 2.28132, "13155": 2.25766, "13160": 2.24345, "13165": 2.25608, "13170": 2.31002, "13175": 2.21209, "13180": 2.28248, "13185": 2.26244, "13190": 2.26902, "13195": 2.25976, "13200": 2.31592, "13205": 2.30729, "13210": 2.27532, "13215": 2.30175, "13220": 2.28281, "13225": 2.29494, "13230": 2.2523, "13235": 2.26645, "13240": 2.26921, "13245": 2.30828, "13250": 2.26873, "13255": 2.29193, "13260": 2.29028, "13265": 2.27689, "13270": 2.27613, "13275": 2.27018, "13280": 2.26618, "13285": 2.27978, "13290": 2.30548, "13295": 2.30792, "13300": 2.24039, "13305": 2.26709, "13310": 2.25901, "13315": 2.22115, "13320": 2.25159, "13325": 2.3082, "13330": 2.27151, "13335": 2.3034, "13340": 2.2802, "13345": 2.29134, "13350": 2.25221, "13355": 2.27666, "13360": 2.22747, "13365": 2.25646, "13370": 2.31861, "13375": 2.67549, "13380": 2.30447, "13385": 2.29677, "13390": 2.32032, "13395": 2.27181, "13400": 2.27274, "13405": 2.32028, "13410": 2.26657, "13415": 2.2623, "13420": 2.29077, "13425": 2.24928, "13430": 2.28355, "13435": 2.29974, "13440": 2.32077, "13445": 2.26871, "13450": 2.29282, "13455": 2.29875, "13460": 2.29413, "13465": 2.28947, "13470": 2.28103, "13475": 2.28205, "13480": 2.27026, "13485": 2.284, "13490": 2.25712, "13495": 2.24561, "13500": 2.23139, "13505": 2.28571, "13510": 2.26375, "13515": 2.24785, "13520": 2.31312, "13525": 2.26168, "13530": 2.29887, "13535": 2.3103, "13540": 2.26916, "13545": 2.25116, "13550": 2.23608, "13555": 2.29608, "13560": 2.29278, "13565": 2.30201, "13570": 2.23629, "13575": 2.23636, "13580": 2.28136, "13585": 2.30566, "13590": 2.32676, "13595": 2.27285, "13600": 2.19804, "13605": 2.28798, "13610": 2.21919, "13615": 2.31505, "13620": 2.29095, "13625": 2.2586, "13630": 2.28563, "13635": 2.23362, "13640": 2.24566, "13645": 2.27209, "13650": 2.29878, "13655": 2.26944, "13660": 2.24171, "13665": 2.24805, "13670": 2.24718, "13675": 2.27663, "13680": 2.22934, "13685": 2.29934, "13690": 2.25804, "13695": 2.2748, "13700": 2.27039, "13705": 2.2602, "13710": 2.24626, "13715": 2.26633, "13720": 2.27607, "13725": 2.29478, "13730": 2.32023, "13735": 2.34274, "13740": 2.27245, "13745": 2.2707, "13750": 2.32478, "13755": 2.264, "13760": 2.27121, "13765": 2.27772, "13770": 2.29252, "13775": 2.34474, "13780": 2.20989, "13785": 2.21869, "13790": 2.28704, "13795": 2.26525, "13800": 2.29987, "13805": 2.27087, "13810": 2.2503, "13815": 2.24542, "13820": 2.26672, "13825": 2.24809, "13830": 2.28522, "13835": 2.28568, "13840": 2.20291, "13845": 2.26479, "13850": 2.27273, "13855": 2.30596, "13860": 2.2819, "13865": 2.25773, "13870": 2.21582, "13875": 2.25498, "13880": 2.24713, "13885": 2.25853, "13890": 2.22765, "13895": 2.29668, "13900": 2.22549, "13905": 2.27014, "13910": 2.22592, "13915": 2.27527, "13920": 2.25788, "13925": 2.29607, "13930": 2.2323, "13935": 2.28396, "13940": 2.31431, "13945": 2.27286, "13950": 2.23414, "13955": 2.29762, "13960": 2.23765, "13965": 2.24071, "13970": 2.25051, "13975": 2.27124, "13980": 2.27826, "13985": 2.28723, "13990": 2.29432, "13995": 2.26315, "14000": 2.23096, "14005": 2.25527, "14010": 2.28205, "14015": 2.30156, "14020": 2.22202, "14025": 2.23593, "14030": 2.27639, "14035": 2.25629, "14040": 2.26723, "14045": 2.23975, "14050": 2.2456, "14055": 2.25938, "14060": 2.21519, "14065": 2.31383, "14070": 2.24963, "14075": 2.24562, "14080": 2.3167, "14085": 2.27831, "14090": 2.25461, "14095": 2.31221, "14100": 2.26571, "14105": 2.27158, "14110": 2.24213, "14115": 2.25089, "14120": 2.30099, "14125": 2.20538, "14130": 2.22216, "14135": 2.22475, "14140": 2.25539, "14145": 2.25639, "14150": 2.3028, "14155": 2.24072, "14160": 2.25989, "14165": 2.20636, "14170": 2.34546, "14175": 2.29435, "14180": 2.22867, "14185": 2.24225, "14190": 2.2526, "14195": 2.25723, "14200": 2.30263, "14205": 2.26032, "14210": 2.24737, "14215": 2.22093, "14220": 2.2358, "14225": 2.28027, "14230": 2.21415, "14235": 2.23654, "14240": 2.28217, "14245": 2.27873, "14250": 2.2544, "14255": 2.29226, "14260": 2.25042, "14265": 2.22744, "14270": 2.19684, "14275": 2.24507, "14280": 2.23814, "14285": 2.28375, "14290": 2.22789, "14295": 2.26328, "14300": 2.28204, "14305": 2.28106, "14310": 2.21961, "14315": 2.26319, "14320": 2.26122, "14325": 2.29105, "14330": 2.29036, "14335": 2.30922, "14340": 2.29946, "14345": 2.25439, "14350": 2.26925, "14355": 2.28703, "14360": 2.2755, "14365": 2.27839, "14370": 2.26993, "14375": 2.26952, "14380": 2.27949, "14385": 2.21965, "14390": 2.26369, "14395": 2.29704, "14400": 2.25742, "14405": 2.23511, "14410": 2.20857, "14415": 2.21911, "14420": 2.27115, "14425": 2.26607, "14430": 2.25844, "14435": 2.26502, "14440": 2.21947, "14445": 2.2406, "14450": 2.25234, "14455": 2.29085, "14460": 2.26448, "14465": 2.28642, "14470": 2.24203, "14475": 2.24232, "14480": 2.23252, "14485": 2.25668, "14490": 2.28724, "14495": 2.26431, "14500": 2.25031, "14505": 2.28065, "14510": 2.19539, "14515": 2.27147, "14520": 2.2404, "14525": 2.24362, "14530": 2.25759, "14535": 2.29865, "14540": 2.23202, "14545": 2.29288, "14550": 2.25593, "14555": 2.2563, "14560": 2.26103, "14565": 2.27078, "14570": 2.26556, "14575": 2.29872, "14580": 2.27592, "14585": 2.22211, "14590": 2.23005, "14595": 2.26723, "14600": 2.26048, "14605": 2.23305, "14610": 2.30703, "14615": 2.26311, "14620": 2.29589, "14625": 2.24507, "14630": 2.26364, "14635": 2.25434, "14640": 2.25908, "14645": 2.2796, "14650": 2.30888, "14655": 2.26689, "14660": 2.22617, "14665": 2.22842, "14670": 2.25918, "14675": 2.22982, "14680": 2.24986, "14685": 2.25418, "14690": 2.26708, "14695": 2.27194, "14700": 2.25691, "14705": 2.24288, "14710": 2.22454, "14715": 2.26024, "14720": 2.23879, "14725": 2.21893, "14730": 2.26742, "14735": 2.24052, "14740": 2.24686, "14745": 2.20477, "14750": 2.23755, "14755": 2.31791, "14760": 2.2235, "14765": 2.24898, "14770": 2.24013, "14775": 2.21311, "14780": 2.21498, "14785": 2.26365, "14790": 2.27118, "14795": 2.23662, "14800": 2.27754, "14805": 2.26882, "14810": 2.27465, "14815": 2.25778, "14820": 2.25852, "14825": 2.26395, "14830": 2.21317, "14835": 2.25181, "14840": 2.24159, "14845": 2.22237, "14850": 2.30587, "14855": 2.27892, "14860": 2.23632, "14865": 2.23998, "14870": 2.26544, "14875": 2.24137, "14880": 2.23102, "14885": 2.27724, "14890": 2.26823, "14895": 2.25412, "14900": 2.27399, "14905": 2.21638, "14910": 2.28931, "14915": 2.26774, "14920": 2.27017, "14925": 2.26007, "14930": 2.25543, "14935": 2.20837, "14940": 2.288, "14945": 2.26302, "14950": 2.22639, "14955": 2.22787, "14960": 2.25711, "14965": 2.25178, "14970": 2.25213, "14975": 2.24942, "14980": 2.20962, "14985": 2.19299, "14990": 2.2501, "14995": 2.22744, "15000": 2.17841, "15005": 2.28841, "15010": 2.22388, "15015": 2.23758, "15020": 2.25373, "15025": 2.20232, "15030": 2.24651, "15035": 2.22803, "15040": 2.25343, "15045": 2.26413, "15050": 2.23433, "15055": 2.20072, "15060": 2.24183, "15065": 2.22981, "15070": 2.24582, "15075": 2.22713, "15080": 2.21657, "15085": 2.21282, "15090": 2.2633, "15095": 2.27077, "15100": 2.25554, "15105": 2.23999, "15110": 2.22815, "15115": 2.27037, "15120": 2.27289, "15125": 2.20472, "15130": 2.25022, "15135": 2.25053, "15140": 2.29504, "15145": 2.2392, "15150": 2.24351, "15155": 2.24027, "15160": 2.24569, "15165": 2.23127, "15170": 2.23132, "15175": 2.25332, "15180": 2.27956, "15185": 2.24399, "15190": 2.24367, "15195": 2.23788, "15200": 2.22904, "15205": 2.26991, "15210": 2.23298, "15215": 2.27303, "15220": 2.23938, "15225": 2.28239, "15230": 2.25615, "15235": 2.24329, "15240": 2.22013, "15245": 2.23169, "15250": 2.24382, "15255": 2.23921, "15260": 2.26245, "15265": 2.24258, "15270": 2.25216, "15275": 2.25942, "15280": 2.22443, "15285": 2.25531, "15290": 2.24563, "15295": 2.2551, "15300": 2.2692, "15305": 2.27038, "15310": 2.30856, "15315": 2.22152, "15320": 2.24452, "15325": 2.24875, "15330": 2.27487, "15335": 2.21429, "15340": 2.22023, "15345": 2.23742, "15350": 2.22137, "15355": 2.23831, "15360": 2.24045, "15365": 2.22109, "15370": 2.2655, "15375": 2.21274, "15380": 2.20374, "15385": 2.20257, "15390": 2.27981, "15395": 2.24389, "15400": 2.27964, "15405": 2.23015, "15410": 2.22796, "15415": 2.28061, "15420": 2.2456, "15425": 2.23264, "15430": 2.23291, "15435": 2.26326, "15440": 2.25901, "15445": 2.24594, "15450": 2.25696, "15455": 2.26849, "15460": 2.21372, "15465": 2.2413, "15470": 2.24748, "15475": 2.21566, "15480": 2.22179, "15485": 2.20216, "15490": 2.27888, "15495": 2.24048, "15500": 2.20276, "15505": 2.20746, "15510": 2.20445, "15515": 2.23645, "15520": 2.20155, "15525": 2.26622, "15530": 2.24659, "15535": 2.23466, "15540": 2.25055, "15545": 2.23473, "15550": 2.23086, "15555": 2.23934, "15560": 2.2725, "15565": 2.21449, "15570": 2.20534, "15575": 2.26327, "15580": 2.26289, "15585": 2.22697, "15590": 2.26445, "15595": 2.18803, "15600": 2.30055, "15605": 2.24718, "15610": 2.20713, "15615": 2.22859, "15620": 2.23152, "15625": 2.19335, "15630": 2.19166, "15635": 2.20153, "15640": 2.23001, "15645": 2.25301, "15650": 2.20504, "15655": 2.21267, "15660": 2.2159, "15665": 2.23948, "15670": 2.21119, "15675": 2.22844, "15680": 2.21146, "15685": 2.2607, "15690": 2.20174, "15695": 2.19282, "15700": 2.24166, "15705": 2.19337, "15710": 2.19499, "15715": 2.24766, "15720": 2.24121, "15725": 2.24134, "15730": 2.22405, "15735": 2.28297, "15740": 2.18739, "15745": 2.23143, "15750": 2.22209, "15755": 2.25743, "15760": 2.26264, "15765": 2.25316, "15770": 2.25484, "15775": 2.21583, "15780": 2.27991, "15785": 2.19543, "15790": 2.24229, "15795": 2.23871, "15800": 2.2193, "15805": 2.23458, "15810": 2.16006, "15815": 2.27117, "15820": 2.22432, "15825": 2.20183, "15830": 2.20289, "15835": 2.20585, "15840": 2.25178, "15845": 2.22118, "15850": 2.21787, "15855": 2.23614, "15860": 2.22874, "15865": 2.24204, "15870": 2.23297, "15875": 2.26225, "15880": 2.24219, "15885": 2.20294, "15890": 2.21335, "15895": 2.27614, "15900": 2.19471, "15905": 2.28888, "15910": 2.24952, "15915": 2.19368, "15920": 2.26318, "15925": 2.221, "15930": 2.23144, "15935": 2.23014, "15940": 2.18966, "15945": 2.23888, "15950": 2.22462, "15955": 2.21796, "15960": 2.23053, "15965": 2.21942, "15970": 2.2255, "15975": 2.19762, "15980": 2.24995, "15985": 2.2152, "15990": 2.23175, "15995": 2.23703, "16000": 2.20323, "16005": 2.20816, "16010": 2.18129, "16015": 2.20958, "16020": 2.21748, "16025": 2.23983, "16030": 2.24631, "16035": 2.17241, "16040": 2.27401, "16045": 2.20846, "16050": 2.24313, "16055": 2.22135, "16060": 2.1931, "16065": 2.23231, "16070": 2.24203, "16075": 2.18392, "16080": 2.27328, "16085": 2.22708, "16090": 2.21339, "16095": 2.22862, "16100": 2.2186, "16105": 2.27057, "16110": 2.26263, "16115": 2.19658, "16120": 2.19472, "16125": 2.2324, "16130": 2.20965, "16135": 2.24812, "16140": 2.18573, "16145": 2.23458, "16150": 2.20802, "16155": 2.24396, "16160": 2.27821, "16165": 2.20566, "16170": 2.19873, "16175": 2.17336, "16180": 2.24011, "16185": 2.23042, "16190": 2.23, "16195": 2.25053, "16200": 2.18606, "16205": 2.23786, "16210": 2.24149, "16215": 2.22541, "16220": 2.24466, "16225": 2.24578, "16230": 2.22553, "16235": 2.20941, "16240": 2.26387, "16245": 2.21549, "16250": 2.20582, "16255": 2.213, "16260": 2.22336, "16265": 2.24305, "16270": 2.24138, "16275": 2.23147, "16280": 2.1671, "16285": 2.23584, "16290": 2.23891, "16295": 2.22566, "16300": 2.26537, "16305": 2.20756, "16310": 2.19721, "16315": 2.20469, "16320": 2.2934, "16325": 2.24937, "16330": 2.18748, "16335": 2.18879, "16340": 2.18134, "16345": 2.21519, "16350": 2.25984, "16355": 2.22182, "16360": 2.2214, "16365": 2.21975, "16370": 2.23885, "16375": 2.23722, "16380": 2.21852, "16385": 2.20647, "16390": 2.20649, "16395": 2.22521, "16400": 2.19974, "16405": 2.20739, "16410": 2.24006, "16415": 2.21043, "16420": 2.22285, "16425": 2.20507, "16430": 2.27378, "16435": 2.24626, "16440": 2.24031, "16445": 2.20686, "16450": 2.20625, "16455": 2.25724, "16460": 2.24371, "16465": 2.23932, "16470": 2.18138, "16475": 2.22028, "16480": 2.15682, "16485": 2.25415, "16490": 2.20275, "16495": 2.22067, "16500": 2.23006, "16505": 2.20495, "16510": 2.23998, "16515": 2.26127, "16520": 2.22697, "16525": 2.16878, "16530": 2.25921, "16535": 2.18149, "16540": 2.2566, "16545": 2.2034, "16550": 2.19717, "16555": 2.22214, "16560": 2.2471, "16565": 2.23918, "16570": 2.23002, "16575": 2.19381, "16580": 2.18495, "16585": 2.20838, "16590": 2.22739, "16595": 2.18051, "16600": 2.1916, "16605": 2.21347, "16610": 2.2281, "16615": 2.20746, "16620": 2.21109, "16625": 2.20781, "16630": 2.2368, "16635": 2.20199, "16640": 2.23267, "16645": 2.21117, "16650": 2.251, "16655": 2.25564, "16660": 2.2192, "16665": 2.18646, "16670": 2.20918, "16675": 2.22515, "16680": 2.21832, "16685": 2.22185, "16690": 2.21779, "16695": 2.26681, "16700": 2.187, "16705": 2.22263, "16710": 2.23623, "16715": 2.27088, "16720": 2.24198, "16725": 2.23872, "16730": 2.1789, "16735": 2.20659, "16740": 2.19178, "16745": 2.21822, "16750": 2.22416, "16755": 2.21039, "16760": 2.1954, "16765": 2.21269, "16770": 2.22263, "16775": 2.20322, "16780": 2.23732, "16785": 2.22241, "16790": 2.21471, "16795": 2.25742, "16800": 2.22189, "16805": 2.24469, "16810": 2.17045, "16815": 2.22682, "16820": 2.22845, "16825": 2.22035, "16830": 2.22482, "16835": 2.19513, "16840": 2.20321, "16845": 2.14415, "16850": 2.2459, "16855": 2.22574, "16860": 2.23154, "16865": 2.25333, "16870": 2.18951, "16875": 2.17603, "16880": 2.22035, "16885": 2.20742, "16890": 2.24445, "16895": 2.1792, "16900": 2.24264, "16905": 2.24448, "16910": 2.2222, "16915": 2.238, "16920": 2.20899, "16925": 2.19223, "16930": 2.21232, "16935": 2.19944, "16940": 2.24047, "16945": 2.19779, "16950": 2.21666, "16955": 2.23437, "16960": 2.2261, "16965": 2.18003, "16970": 2.18812, "16975": 2.21365, "16980": 2.21513, "16985": 2.20904, "16990": 2.2215, "16995": 2.17088, "17000": 2.26248, "17005": 2.24598, "17010": 2.21879, "17015": 2.18581, "17020": 2.22696, "17025": 2.16923, "17030": 2.19462, "17035": 2.19944, "17040": 2.23116, "17045": 2.20295, "17050": 2.16144, "17055": 2.2155, "17060": 2.24355, "17065": 2.18879, "17070": 2.18069, "17075": 2.20926, "17080": 2.21019, "17085": 2.22572, "17090": 2.23849, "17095": 2.21602, "17100": 2.25296, "17105": 2.24892, "17110": 2.2237, "17115": 2.19275, "17120": 2.16861, "17125": 2.2121, "17130": 2.24673, "17135": 2.19694, "17140": 2.23285, "17145": 2.22174, "17150": 2.18268, "17155": 2.18859, "17160": 2.26695, "17165": 2.21319, "17170": 2.22604, "17175": 2.21278, "17180": 2.2019, "17185": 2.21264, "17190": 2.16421, "17195": 2.21919, "17200": 2.21458, "17205": 2.19591, "17210": 2.23221, "17215": 2.19982, "17220": 2.22652, "17225": 2.20636, "17230": 2.22552, "17235": 2.2111, "17240": 2.20458, "17245": 2.20776, "17250": 2.20047, "17255": 2.1881, "17260": 2.21441, "17265": 2.20814, "17270": 2.21366, "17275": 2.25477, "17280": 2.21467, "17285": 2.22643, "17290": 2.21091, "17295": 2.22458, "17300": 2.2054, "17305": 2.22536, "17310": 2.23319, "17315": 2.22404, "17320": 2.20416, "17325": 2.25634, "17330": 2.24128, "17335": 2.23232, "17340": 2.15535, "17345": 2.18967, "17350": 2.18498, "17355": 2.2207, "17360": 2.18916, "17365": 2.1919, "17370": 2.20915, "17375": 2.23698, "17380": 2.21405, "17385": 2.19408, "17390": 2.20155, "17395": 2.18312, "17400": 2.18607, "17405": 2.23443, "17410": 2.19364, "17415": 2.17577, "17420": 2.21513, "17425": 2.19978, "17430": 2.19278, "17435": 2.19193, "17440": 2.22457, "17445": 2.16491, "17450": 2.22043, "17455": 2.19173, "17460": 2.20653, "17465": 2.16763, "17470": 2.21009, "17475": 2.23193, "17480": 2.17827, "17485": 2.22498, "17490": 2.19291, "17495": 2.16505, "17500": 2.20865, "17505": 2.20376, "17510": 2.16831, "17515": 2.21178, "17520": 2.19446, "17525": 2.20549, "17530": 2.19761, "17535": 2.1885, "17540": 2.20928, "17545": 2.16592, "17550": 2.2084, "17555": 2.1575, "17560": 2.18324, "17565": 2.21247, "17570": 2.192, "17575": 2.19138, "17580": 2.18219, "17585": 2.19729, "17590": 2.25592, "17595": 2.22193, "17600": 2.2434, "17605": 2.20456, "17610": 2.199, "17615": 2.21, "17620": 2.21426, "17625": 2.14382, "17630": 2.20477, "17635": 2.18605, "17640": 2.20937, "17645": 2.27838, "17650": 2.18071, "17655": 2.20535, "17660": 2.24249, "17665": 2.21805, "17670": 2.22477, "17675": 2.22506, "17680": 2.19941, "17685": 2.20564, "17690": 2.16086, "17695": 2.24103, "17700": 2.21417, "17705": 2.24536, "17710": 2.1626, "17715": 2.15282, "17720": 2.21302, "17725": 2.21408, "17730": 2.19552, "17735": 2.18573, "17740": 2.1791, "17745": 2.21214, "17750": 2.26268, "17755": 2.20345, "17760": 2.19985, "17765": 2.18432, "17770": 2.19701, "17775": 2.21574, "17780": 2.23662, "17785": 2.2007, "17790": 2.1835, "17795": 2.20344, "17800": 2.18325, "17805": 2.19352, "17810": 2.19677, "17815": 2.20186, "17820": 2.16292, "17825": 2.25807, "17830": 2.23872, "17835": 2.21717, "17840": 2.18331, "17845": 2.18043, "17850": 2.19654, "17855": 2.20502, "17860": 2.17723, "17865": 2.17966, "17870": 2.23899, "17875": 2.20702, "17880": 2.1813, "17885": 2.23008, "17890": 2.22193, "17895": 2.21418, "17900": 2.15602, "17905": 2.20624, "17910": 2.20258, "17915": 2.19477, "17920": 2.23894, "17925": 2.19341, "17930": 2.16552, "17935": 2.20447, "17940": 2.21869, "17945": 2.1913, "17950": 2.22776, "17955": 2.22401, "17960": 2.17214, "17965": 2.19741, "17970": 2.2278, "17975": 2.13354, "17980": 2.19824, "17985": 2.20942, "17990": 2.21941, "17995": 2.20673, "18000": 2.18908, "18005": 2.194, "18010": 2.21457, "18015": 2.18892, "18020": 2.22215, "18025": 2.21186, "18030": 2.20958, "18035": 2.21852, "18040": 2.21202, "18045": 2.21456, "18050": 2.2135, "18055": 2.20441, "18060": 2.2294, "18065": 2.17525, "18070": 2.2367, "18075": 2.21128, "18080": 2.1555, "18085": 2.21967, "18090": 2.17466, "18095": 2.18737, "18100": 2.15814, "18105": 2.1816, "18110": 2.20873, "18115": 2.24147, "18120": 2.235, "18125": 2.18886, "18130": 2.19921, "18135": 2.15617, "18140": 2.20144, "18145": 2.1858, "18150": 2.20636, "18155": 2.20101, "18160": 2.23405, "18165": 2.22917, "18170": 2.20974, "18175": 2.19023, "18180": 2.20822, "18185": 2.1906, "18190": 2.18414, "18195": 2.15897, "18200": 2.22997, "18205": 2.20012, "18210": 2.20573, "18215": 2.16694, "18220": 2.17652, "18225": 2.22625, "18230": 2.22664, "18235": 2.18905, "18240": 2.20127, "18245": 2.20482, "18250": 2.18067, "18255": 2.27997, "18260": 2.20843, "18265": 2.20528, "18270": 2.18178, "18275": 2.18349, "18280": 2.20224, "18285": 2.21915, "18290": 2.17628, "18295": 2.21721, "18300": 2.22419, "18305": 2.24744, "18310": 2.2384, "18315": 2.21859, "18320": 2.17984, "18325": 2.21367, "18330": 2.20229, "18335": 2.214, "18340": 2.19999, "18345": 2.17847, "18350": 2.24073, "18355": 2.17161, "18360": 2.22479, "18365": 2.18354, "18370": 2.15847, "18375": 2.21106, "18380": 2.21006, "18385": 2.20639, "18390": 2.18538, "18395": 2.23866, "18400": 2.19382, "18405": 2.23731, "18410": 2.20397, "18415": 2.17243, "18420": 2.17821, "18425": 2.18131, "18430": 2.15256, "18435": 2.19331, "18440": 2.19878, "18445": 2.16531, "18450": 2.21548, "18455": 2.15228, "18460": 2.19338, "18465": 2.20821, "18470": 2.19454, "18475": 2.15127, "18480": 2.19713, "18485": 2.15845, "18490": 2.19803, "18495": 2.24256, "18500": 2.19669, "18505": 2.16756, "18510": 2.21794, "18515": 2.17446, "18520": 2.1896, "18525": 2.17121, "18530": 2.17854, "18535": 2.17116, "18540": 2.21196, "18545": 2.17391, "18550": 2.21635, "18555": 2.16823, "18560": 2.18895, "18565": 2.22211, "18570": 2.21232, "18575": 2.14528, "18580": 2.22074, "18585": 2.20959, "18590": 2.21077, "18595": 2.17953, "18600": 2.15849, "18605": 2.20897, "18610": 2.20741, "18615": 2.19598, "18620": 2.21288, "18625": 2.1957, "18630": 2.16987, "18635": 2.19742, "18640": 2.16692, "18645": 2.17433, "18650": 2.25549, "18655": 2.16529, "18660": 2.2348, "18665": 2.22198, "18670": 2.20676, "18675": 2.18958, "18680": 2.20788, "18685": 2.17722, "18690": 2.21144, "18695": 2.15491, "18700": 2.16055, "18705": 2.17957, "18710": 2.24321, "18715": 2.17684, "18720": 2.21664, "18725": 2.17984, "18730": 2.18038, "18735": 2.13805, "18740": 2.19144, "18745": 2.21571, "18750": 2.21471, "18755": 2.19549, "18760": 2.22351, "18765": 2.1569, "18770": 2.14848, "18775": 2.21279, "18780": 2.16878, "18785": 2.20895, "18790": 2.20064, "18795": 2.2063, "18800": 2.18521, "18805": 2.18152, "18810": 2.20565, "18815": 2.16544, "18820": 2.18715, "18825": 2.20943, "18830": 2.19734, "18835": 2.21929, "18840": 2.14207, "18845": 2.19305, "18850": 2.18505, "18855": 2.17466, "18860": 2.15545, "18865": 2.18123, "18870": 2.16188, "18875": 2.16298, "18880": 2.20875, "18885": 2.1728, "18890": 2.17728, "18895": 2.19922, "18900": 2.17016, "18905": 2.18277, "18910": 2.17896, "18915": 2.17372, "18920": 2.19035, "18925": 2.27109, "18930": 2.19777, "18935": 2.20515, "18940": 2.23368, "18945": 2.26174, "18950": 2.19111, "18955": 2.18864, "18960": 2.20657, "18965": 2.14039, "18970": 2.20652, "18975": 2.22027, "18980": 2.20506, "18985": 2.1914, "18990": 2.23116, "18995": 2.19709, "19000": 2.16467, "19005": 2.16284, "19010": 2.19608, "19015": 2.19535, "19020": 2.20234, "19025": 2.16757, "19030": 2.21738, "19035": 2.16388, "19040": 2.14789, "19045": 2.21781, "19050": 2.16234, "19055": 2.18545, "19060": 2.1843, "19065": 2.18877, "19070": 2.17354, "19075": 2.21719, "19080": 2.18441, "19085": 2.17725, "19090": 2.17638, "19095": 2.16527, "19100": 2.19879, "19105": 2.19376, "19110": 2.17346, "19115": 2.20124, "19120": 2.19251, "19125": 2.21254, "19130": 2.14587, "19135": 2.22623, "19140": 2.21152, "19145": 2.19251, "19150": 2.18358, "19155": 2.21164, "19160": 2.18294, "19165": 2.13001, "19170": 2.17401, "19175": 2.18283, "19180": 2.12735, "19185": 2.19169, "19190": 2.18343, "19195": 2.20757, "19200": 2.20292, "19205": 2.18395, "19210": 2.18617, "19215": 2.13265, "19220": 2.17726, "19225": 2.16038, "19230": 2.21822, "19235": 2.21072, "19240": 2.17119, "19245": 2.17214, "19250": 2.18363, "19255": 2.14904, "19260": 2.21665, "19265": 2.1543, "19270": 2.1715, "19275": 2.18659, "19280": 2.17531, "19285": 2.13883, "19290": 2.16728, "19295": 2.18765, "19300": 2.16879, "19305": 2.22271, "19310": 2.22234, "19315": 2.20759, "19320": 2.16079, "19325": 2.18844, "19330": 2.21827, "19335": 2.21543, "19340": 2.21572, "19345": 2.19343, "19350": 2.16594, "19355": 2.13175, "19360": 2.15417, "19365": 2.18009, "19370": 2.18297, "19375": 2.19207, "19380": 2.16213, "19385": 2.15741, "19390": 2.17726, "19395": 2.18101, "19400": 2.18439, "19405": 2.17651, "19410": 2.16293, "19415": 2.18293, "19420": 2.19201, "19425": 2.17944, "19430": 2.23979, "19435": 2.17063, "19440": 2.16763, "19445": 2.18193, "19450": 2.17444, "19455": 2.16913, "19460": 2.20081, "19465": 2.16934, "19470": 2.17962, "19475": 2.22594, "19480": 2.18638, "19485": 2.168, "19490": 2.18657, "19495": 2.17619, "19500": 2.20207, "19505": 2.14938, "19510": 2.21186, "19515": 2.17242, "19520": 2.19071, "19525": 2.1705, "19530": 2.16235, "19535": 2.13268, "19540": 2.21756, "19545": 2.18391, "19550": 2.14731, "19555": 2.21857, "19560": 2.18446, "19565": 2.15065, "19570": 2.17587, "19575": 2.17222, "19580": 2.18874, "19585": 2.21673, "19590": 2.2032, "19595": 2.19411, "19600": 2.19815, "19605": 2.21359, "19610": 2.19184, "19615": 2.17482, "19620": 2.21313, "19625": 2.18926, "19630": 2.19065, "19635": 2.19266, "19640": 2.17254, "19645": 2.14687, "19650": 2.17657, "19655": 2.15661, "19660": 2.17284, "19665": 2.17308, "19670": 2.18234, "19675": 2.2111, "19680": 2.19424, "19685": 2.17819, "19690": 2.18381, "19695": 2.15486, "19700": 2.16514, "19705": 2.1863, "19710": 2.2425, "19715": 2.15608, "19720": 2.15124, "19725": 2.22953, "19730": 2.17277, "19735": 2.1783, "19740": 2.2164, "19745": 2.20163, "19750": 2.18259, "19755": 2.17045, "19760": 2.15499, "19765": 2.20808, "19770": 2.21054, "19775": 2.16839, "19780": 2.15292, "19785": 2.17028, "19790": 2.22428, "19795": 2.16615, "19800": 2.16368, "19805": 2.16425, "19810": 2.21926, "19815": 2.18836, "19820": 2.20092, "19825": 2.21218, "19830": 2.16912, "19835": 2.18166, "19840": 2.20508, "19845": 2.16367, "19850": 2.1697, "19855": 2.17532, "19860": 2.18492, "19865": 2.16799, "19870": 2.20752, "19875": 2.13504, "19880": 2.16478, "19885": 2.15357, "19890": 2.1624, "19895": 2.20489, "19900": 2.18451, "19905": 2.15391, "19910": 2.17316, "19915": 2.19457, "19920": 2.10683, "19925": 2.16496, "19930": 2.1464, "19935": 2.17154, "19940": 2.17487, "19945": 2.18172, "19950": 2.16548, "19955": 2.1694, "19960": 2.16374, "19965": 2.16678, "19970": 2.12812, "19975": 2.17622, "19980": 2.214, "19985": 2.17072, "19990": 2.1695, "19995": 2.19604, "20000": 2.22075, "20005": 2.19026, "20010": 2.17432, "20015": 2.18868, "20020": 2.16316, "20025": 2.11594, "20030": 2.14564, "20035": 2.18273, "20040": 2.18157, "20045": 2.19425, "20050": 2.19499, "20055": 2.19589, "20060": 2.20349, "20065": 2.16481, "20070": 2.18551, "20075": 2.14677, "20080": 2.16243, "20085": 2.19205, "20090": 2.17723, "20095": 2.16147, "20100": 2.18701, "20105": 2.13963, "20110": 2.17193, "20115": 2.15592, "20120": 2.18633, "20125": 2.16344, "20130": 2.1723, "20135": 2.17981, "20140": 2.16213, "20145": 2.17536, "20150": 2.1423, "20155": 2.22988, "20160": 2.1786, "20165": 2.17617, "20170": 2.16363, "20175": 2.14716, "20180": 2.18381, "20185": 2.18283, "20190": 2.19464, "20195": 2.19731, "20200": 2.15757, "20205": 2.17988, "20210": 2.17516, "20215": 2.18773, "20220": 2.17729, "20225": 2.21239, "20230": 2.16861, "20235": 2.20765, "20240": 2.18178, "20245": 2.19036, "20250": 2.14181, "20255": 2.14549, "20260": 2.17254, "20265": 2.17891, "20270": 2.17857, "20275": 2.18058, "20280": 2.14852, "20285": 2.16783, "20290": 2.19009, "20295": 2.19597, "20300": 2.15169, "20305": 2.21528, "20310": 2.11218, "20315": 2.16866, "20320": 2.18354, "20325": 2.18485, "20330": 2.15204, "20335": 2.20622, "20340": 2.17329, "20345": 2.16624, "20350": 2.14435, "20355": 2.1902, "20360": 2.19479, "20365": 2.16734, "20370": 2.19803, "20375": 2.18365, "20380": 2.182, "20385": 2.17779, "20390": 2.15288, "20395": 2.1533, "20400": 2.2626, "20405": 2.15076, "20410": 2.14157, "20415": 2.19447, "20420": 2.15613, "20425": 2.17822, "20430": 2.16499, "20435": 2.14733, "20440": 2.15845, "20445": 2.1797, "20450": 2.17104, "20455": 2.20815, "20460": 2.17602, "20465": 2.1656, "20470": 2.17796, "20475": 2.15645, "20480": 2.19406, "20485": 2.19661, "20490": 2.19315, "20495": 2.16839, "20500": 2.17114, "20505": 2.1554, "20510": 2.18759, "20515": 2.18634, "20520": 2.15608, "20525": 2.15627, "20530": 2.17267, "20535": 2.18875, "20540": 2.15224, "20545": 2.15455, "20550": 2.15008, "20555": 2.12631, "20560": 2.17027, "20565": 2.1837, "20570": 2.17524, "20575": 2.15467, "20580": 2.19775, "20585": 2.15912, "20590": 2.18902, "20595": 2.12972, "20600": 2.14402, "20605": 2.17958, "20610": 2.19461, "20615": 2.15412, "20620": 2.14238, "20625": 2.1464, "20630": 2.12237, "20635": 2.13977, "20640": 2.19477, "20645": 2.19024, "20650": 2.17824, "20655": 2.20718, "20660": 2.18466, "20665": 2.18385, "20670": 2.16724, "20675": 2.11517, "20680": 2.17356, "20685": 2.18795, "20690": 2.18808, "20695": 2.19698, "20700": 2.1355, "20705": 2.18985, "20710": 2.15358, "20715": 2.17392, "20720": 2.22396, "20725": 2.17712, "20730": 2.1521, "20735": 2.14285, "20740": 2.15863, "20745": 2.18747, "20750": 2.17673, "20755": 2.18198, "20760": 2.15882, "20765": 2.15131, "20770": 2.18172, "20775": 2.23055, "20780": 2.1862, "20785": 2.17905, "20790": 2.16056, "20795": 2.16763, "20800": 2.20432, "20805": 2.1712, "20810": 2.20169, "20815": 2.16374, "20820": 2.19563, "20825": 2.18047, "20830": 2.14258, "20835": 2.13983, "20840": 2.1898, "20845": 2.21514, "20850": 2.18011, "20855": 2.2076, "20860": 2.18461, "20865": 2.18989, "20870": 2.20669, "20875": 2.20558, "20880": 2.18518, "20885": 2.18465, "20890": 2.17424, "20895": 2.16234, "20900": 2.17698, "20905": 2.17792, "20910": 2.17917, "20915": 2.16842, "20920": 2.16897, "20925": 2.17955, "20930": 2.19755, "20935": 2.20301, "20940": 2.15778, "20945": 2.14573, "20950": 2.17353, "20955": 2.20218, "20960": 2.17383, "20965": 2.14342, "20970": 2.16337, "20975": 2.16701, "20980": 2.14893, "20985": 2.18241, "20990": 2.16698, "20995": 2.11832, "21000": 2.1463, "21005": 2.19512, "21010": 2.20357, "21015": 2.11942, "21020": 2.15385, "21025": 2.15432, "21030": 2.14419, "21035": 2.14454, "21040": 2.13972, "21045": 2.1689, "21050": 2.12876, "21055": 2.16909, "21060": 2.09796, "21065": 2.16947, "21070": 2.13587, "21075": 2.17006, "21080": 2.17321, "21085": 2.1891, "21090": 2.17761, "21095": 2.19931, "21100": 2.16296, "21105": 2.19628, "21110": 2.17201, "21115": 2.19598, "21120": 2.20846, "21125": 2.16257, "21130": 2.18958, "21135": 2.17616, "21140": 2.14954, "21145": 2.18661, "21150": 2.1602, "21155": 2.16818, "21160": 2.15687, "21165": 2.16022, "21170": 2.1456, "21175": 2.15896, "21180": 2.16792, "21185": 2.19578, "21190": 2.14127, "21195": 2.15661, "21200": 2.17135, "21205": 2.18732, "21210": 2.15714, "21215": 2.14553, "21220": 2.17665, "21225": 2.15298, "21230": 2.17788, "21235": 2.15354, "21240": 2.18545, "21245": 2.15476, "21250": 2.14917, "21255": 2.14093, "21260": 2.18782, "21265": 2.21591, "21270": 2.2226, "21275": 2.21415, "21280": 2.19199, "21285": 2.17847, "21290": 2.14527, "21295": 2.14337, "21300": 2.21355, "21305": 2.21442, "21310": 2.16878, "21315": 2.16919, "21320": 2.16103, "21325": 2.13175, "21330": 2.15609, "21335": 2.12501, "21340": 2.17889, "21345": 2.16029, "21350": 2.13938, "21355": 2.18343, "21360": 2.16896, "21365": 2.18891, "21370": 2.14272, "21375": 2.14239, "21380": 2.15404, "21385": 2.17756, "21390": 2.18145, "21395": 2.13103, "21400": 2.18127, "21405": 2.19904, "21410": 2.15419, "21415": 2.18781, "21420": 2.20849, "21425": 2.16836, "21430": 2.20282, "21435": 2.15701, "21440": 2.20422, "21445": 2.1709, "21450": 2.16484, "21455": 2.18775, "21460": 2.16029, "21465": 2.16363, "21470": 2.19535, "21475": 2.14339, "21480": 2.19395, "21485": 2.21493, "21490": 2.21044, "21495": 2.17906, "21500": 2.13876, "21505": 2.18145, "21510": 2.16833, "21515": 2.17617, "21520": 2.13239, "21525": 2.15725, "21530": 2.12206, "21535": 2.15652, "21540": 2.18398, "21545": 2.16942, "21550": 2.18359, "21555": 2.15394, "21560": 2.15083, "21565": 2.13931, "21570": 2.12491, "21575": 2.1916, "21580": 2.15674, "21585": 2.17931, "21590": 2.19606, "21595": 2.18815, "21600": 2.18142, "21605": 2.17964, "21610": 2.16381, "21615": 2.16046, "21620": 2.18238, "21625": 2.17743, "21630": 2.19807, "21635": 2.13512, "21640": 2.18277, "21645": 2.17014, "21650": 2.1642, "21655": 2.17403, "21660": 2.15687, "21665": 2.10499, "21670": 2.17552, "21675": 2.13539, "21680": 2.16892, "21685": 2.1394, "21690": 2.12313, "21695": 2.17451, "21700": 2.15838, "21705": 2.18426, "21710": 2.09269, "21715": 2.13759, "21720": 2.18344, "21725": 2.15345, "21730": 2.15634, "21735": 2.12928, "21740": 2.16364, "21745": 2.18119, "21750": 2.16476, "21755": 2.17863, "21760": 2.13762, "21765": 2.1259, "21770": 2.14509, "21775": 2.16125, "21780": 2.1939, "21785": 2.17386, "21790": 2.16106, "21795": 2.16076, "21800": 2.13159, "21805": 2.15942, "21810": 2.1645, "21815": 2.18492, "21820": 2.14934, "21825": 2.16647, "21830": 2.1251, "21835": 2.14319, "21840": 2.16676, "21845": 2.14797, "21850": 2.18131, "21855": 2.13965, "21860": 2.16422, "21865": 2.16712, "21870": 2.14063, "21875": 2.17952, "21880": 2.14676, "21885": 2.18011, "21890": 2.17622, "21895": 2.15932, "21900": 2.18937, "21905": 2.17627, "21910": 2.17185, "21915": 2.1888, "21920": 2.15833, "21925": 2.18888, "21930": 2.13153, "21935": 2.19991, "21940": 2.1492, "21945": 2.17189, "21950": 2.16543, "21955": 2.12209, "21960": 2.11173, "21965": 2.17688, "21970": 2.15413, "21975": 2.13926, "21980": 2.17934, "21985": 2.13345, "21990": 2.16373, "21995": 2.17062, "22000": 2.20631, "22005": 2.14572, "22010": 2.17815, "22015": 2.17284, "22020": 2.19203, "22025": 2.15177, "22030": 2.16541, "22035": 2.11867, "22040": 2.18478, "22045": 2.20437, "22050": 2.12299, "22055": 2.13732, "22060": 2.17351, "22065": 2.14773, "22070": 2.13963, "22075": 2.16893, "22080": 2.19663, "22085": 2.13107, "22090": 2.16395, "22095": 2.15082, "22100": 2.15592, "22105": 2.1837, "22110": 2.15717, "22115": 2.1376, "22120": 2.1684, "22125": 2.14185, "22130": 2.18317, "22135": 2.15736, "22140": 2.18772, "22145": 2.14533, "22150": 2.18555, "22155": 2.13397, "22160": 2.13243, "22165": 2.1753, "22170": 2.13893, "22175": 2.19233, "22180": 2.11922, "22185": 2.15178, "22190": 2.16396, "22195": 2.17466, "22200": 2.17745, "22205": 2.17669, "22210": 2.10028, "22215": 2.13978, "22220": 2.188, "22225": 2.11972, "22230": 2.17197, "22235": 2.178, "22240": 2.13669, "22245": 2.18173, "22250": 2.17308, "22255": 2.12896, "22260": 2.18829, "22265": 2.15226, "22270": 2.14797, "22275": 2.14443, "22280": 2.18837, "22285": 2.19297, "22290": 2.17198, "22295": 2.17623, "22300": 2.14139, "22305": 2.12633, "22310": 2.15943, "22315": 2.15518, "22320": 2.14985, "22325": 2.15761, "22330": 2.14496, "22335": 2.19017, "22340": 2.1688, "22345": 2.16143, "22350": 2.19109, "22355": 2.16117, "22360": 2.1793, "22365": 2.17102, "22370": 2.20309, "22375": 2.1517, "22380": 2.15442, "22385": 2.15443, "22390": 2.16933, "22395": 2.17172, "22400": 2.15688, "22405": 2.10087, "22410": 2.18216, "22415": 2.21278, "22420": 2.17285, "22425": 2.17971, "22430": 2.1289, "22435": 2.16549, "22440": 2.19092, "22445": 2.20219, "22450": 2.14094, "22455": 2.14828, "22460": 2.15464, "22465": 2.15497, "22470": 2.19774, "22475": 2.19207, "22480": 2.11596, "22485": 2.14075, "22490": 2.17018, "22495": 2.12247, "22500": 2.09431, "22505": 2.1708, "22510": 2.12038, "22515": 2.14831, "22520": 2.18582, "22525": 2.1469, "22530": 2.13806, "22535": 2.16554, "22540": 2.16697, "22545": 2.20357, "22550": 2.21491, "22555": 2.13511, "22560": 2.15055, "22565": 2.16082, "22570": 2.14834, "22575": 2.15253, "22580": 2.09556, "22585": 2.17311, "22590": 2.13332, "22595": 2.14618, "22600": 2.17418, "22605": 2.13186, "22610": 2.15037, "22615": 2.14645, "22620": 2.1168, "22625": 2.20421, "22630": 2.17714, "22635": 2.19574, "22640": 2.13671, "22645": 2.11607, "22650": 2.16945, "22655": 2.13495, "22660": 2.16215, "22665": 2.12964, "22670": 2.15528, "22675": 2.11196, "22680": 2.16374, "22685": 2.19879, "22690": 2.16279, "22695": 2.15911, "22700": 2.13915, "22705": 2.13123, "22710": 2.1459, "22715": 2.1463, "22720": 2.14117, "22725": 2.13383, "22730": 2.16056, "22735": 2.1893, "22740": 2.15122, "22745": 2.15025, "22750": 2.08768, "22755": 2.16564, "22760": 2.19658, "22765": 2.19018, "22770": 2.14769, "22775": 2.18923, "22780": 2.15541, "22785": 2.15434, "22790": 2.18269, "22795": 2.12412, "22800": 2.12132, "22805": 2.1098, "22810": 2.14845, "22815": 2.0857, "22820": 2.17026, "22825": 2.14619, "22830": 2.15514, "22835": 2.13097, "22840": 2.19266, "22845": 2.17986, "22850": 2.15722, "22855": 2.13361, "22860": 2.13572, "22865": 2.15277, "22870": 2.14394, "22875": 2.13449, "22880": 2.17583, "22885": 2.15651, "22890": 2.13796, "22895": 2.1346, "22900": 2.16184, "22905": 2.14342, "22910": 2.16614, "22915": 2.17379, "22920": 2.13748, "22925": 2.13074, "22930": 2.17097, "22935": 2.14416, "22940": 2.16173, "22945": 2.17643, "22950": 2.11386, "22955": 2.182, "22960": 2.13611, "22965": 2.09324, "22970": 2.17385, "22975": 2.16956, "22980": 2.16313, "22985": 2.15398, "22990": 2.11202, "22995": 2.19048, "23000": 2.1642, "23005": 2.11346, "23010": 2.18352, "23015": 2.13484, "23020": 2.14254, "23025": 2.14475, "23030": 2.10113, "23035": 2.18052, "23040": 2.15708, "23045": 2.16647, "23050": 2.15382, "23055": 2.1616, "23060": 2.14907, "23065": 2.14867, "23070": 2.14213, "23075": 2.17355, "23080": 2.15081, "23085": 2.14139, "23090": 2.11132, "23095": 2.15335, "23100": 2.11109, "23105": 2.13216, "23110": 2.18415, "23115": 2.17301, "23120": 2.14897, "23125": 2.12465, "23130": 2.1641, "23135": 2.19054, "23140": 2.13389, "23145": 2.12138, "23150": 2.14471, "23155": 2.15813, "23160": 2.17386, "23165": 2.1809, "23170": 2.11903, "23175": 2.13963, "23180": 2.18896, "23185": 2.14106, "23190": 2.1569, "23195": 2.1357, "23200": 2.13015, "23205": 2.13079, "23210": 2.17669, "23215": 2.17848, "23220": 2.15124, "23225": 2.15566, "23230": 2.18104, "23235": 2.13496, "23240": 2.16652, "23245": 2.17937, "23250": 2.1869, "23255": 2.18509, "23260": 2.20408, "23265": 2.14508, "23270": 2.1678, "23275": 2.1691, "23280": 2.13859, "23285": 2.0992, "23290": 2.20333, "23295": 2.15388, "23300": 2.19304, "23305": 2.20193, "23310": 2.10957, "23315": 2.15448, "23320": 2.17723, "23325": 2.1777, "23330": 2.11798, "23335": 2.1844, "23340": 2.1798, "23345": 2.15909, "23350": 2.14239, "23355": 2.1415, "23360": 2.16609, "23365": 2.10429, "23370": 2.17926, "23375": 2.11275, "23380": 2.10327, "23385": 2.13812, "23390": 2.13068, "23395": 2.16972, "23400": 2.1308, "23405": 2.11308, "23410": 2.12888, "23415": 2.12116, "23420": 2.17721, "23425": 2.20217, "23430": 2.17107, "23435": 2.16666, "23440": 2.13176, "23445": 2.10649, "23450": 2.13927, "23455": 2.16494, "23460": 2.18606, "23465": 2.18038, "23470": 2.09778, "23475": 2.11698, "23480": 2.16419, "23485": 2.14281, "23490": 2.16208, "23495": 2.11033, "23500": 2.1699, "23505": 2.11591, "23510": 2.1457, "23515": 2.13156, "23520": 2.15983, "23525": 2.11607, "23530": 2.13753, "23535": 2.13709, "23540": 2.14397, "23545": 2.09982, "23550": 2.13307, "23555": 2.19188, "23560": 2.14795, "23565": 2.15014, "23570": 2.1485, "23575": 2.144, "23580": 2.14199, "23585": 2.17121, "23590": 2.13911, "23595": 2.12389, "23600": 2.11159, "23605": 2.18595, "23610": 2.16505, "23615": 2.14271, "23620": 2.13589, "23625": 2.16836, "23630": 2.18628, "23635": 2.17302, "23640": 2.17929, "23645": 2.16068, "23650": 2.14874, "23655": 2.14913, "23660": 2.1261, "23665": 2.15879, "23670": 2.16912, "23675": 2.16426, "23680": 2.13191, "23685": 2.15806, "23690": 2.20865, "23695": 2.15842, "23700": 2.10964, "23705": 2.13806, "23710": 2.17705, "23715": 2.10448, "23720": 2.08835, "23725": 2.15545, "23730": 2.15071, "23735": 2.16139, "23740": 2.13073, "23745": 2.18872, "23750": 2.14384, "23755": 2.16369, "23760": 2.16399, "23765": 2.19729, "23770": 2.12159, "23775": 2.12832, "23780": 2.14357, "23785": 2.12522, "23790": 2.16181, "23795": 2.16442, "23800": 2.13927, "23805": 2.16972, "23810": 2.12418, "23815": 2.16921, "23820": 2.11553, "23825": 2.17152, "23830": 2.1907, "23835": 2.16638, "23840": 2.09294, "23845": 2.15773, "23850": 2.1913, "23855": 2.10304, "23860": 2.14354, "23865": 2.11409, "23870": 2.15219, "23875": 2.17278, "23880": 2.14046, "23885": 2.12944, "23890": 2.14875, "23895": 2.12226, "23900": 2.15599, "23905": 2.14009, "23910": 2.14677, "23915": 2.09339, "23920": 2.11209, "23925": 2.13107, "23930": 2.16503, "23935": 2.15843, "23940": 2.1652, "23945": 2.13468, "23950": 2.12686, "23955": 2.14404, "23960": 2.12701, "23965": 2.16459, "23970": 2.15137, "23975": 2.16568, "23980": 2.18918, "23985": 2.11706, "23990": 2.12908, "23995": 2.10252, "24000": 2.14907, "24005": 2.16291, "24010": 2.16505, "24015": 2.12295, "24020": 2.13087, "24025": 2.18228, "24030": 2.12562, "24035": 2.18619, "24040": 2.12741, "24045": 2.13829, "24050": 2.16618, "24055": 2.18911, "24060": 2.14077, "24065": 2.11396, "24070": 2.10502, "24075": 2.13817, "24080": 2.11699, "24085": 2.19157, "24090": 2.14335, "24095": 2.1348, "24100": 2.14749, "24105": 2.10486, "24110": 2.09553, "24115": 2.1269, "24120": 2.17182, "24125": 2.10688, "24130": 2.17577, "24135": 2.13814, "24140": 2.14413, "24145": 2.173, "24150": 2.13961, "24155": 2.14864, "24160": 2.10689, "24165": 2.16535, "24170": 2.16911, "24175": 2.16554, "24180": 2.13652, "24185": 2.15873, "24190": 2.1393, "24195": 2.16819, "24200": 2.19934, "24205": 2.10774, "24210": 2.13674, "24215": 2.18904, "24220": 2.16152, "24225": 2.15289, "24230": 2.16201, "24235": 2.17315, "24240": 2.16667, "24245": 2.1417, "24250": 2.15586, "24255": 2.13536, "24260": 2.18032, "24265": 2.17274, "24270": 2.15566, "24275": 2.13475, "24280": 2.15299, "24285": 2.1052, "24290": 2.15335, "24295": 2.1157, "24300": 2.14479, "24305": 2.13333, "24310": 2.14648, "24315": 2.15943, "24320": 2.15521, "24325": 2.17458, "24330": 2.12245, "24335": 2.12601, "24340": 2.12999, "24345": 2.10608, "24350": 2.20353, "24355": 2.12673, "24360": 2.12413, "24365": 2.13357, "24370": 2.15761, "24375": 2.12646, "24380": 2.16501, "24385": 2.14686, "24390": 2.10274, "24395": 2.17434, "24400": 2.18466, "24405": 2.2052, "24410": 2.11719, "24415": 2.15004, "24420": 2.16549, "24425": 2.11839, "24430": 2.1315, "24435": 2.15857, "24440": 2.11409, "24445": 2.11466, "24450": 2.16754, "24455": 2.12426, "24460": 2.15253, "24465": 2.11466, "24470": 2.0949, "24475": 2.13658, "24480": 2.13968, "24485": 2.159, "24490": 2.14889, "24495": 2.10632, "24500": 2.17686, "24505": 2.16403, "24510": 2.15912, "24515": 2.10146, "24520": 2.14116, "24525": 2.11151, "24530": 2.19259, "24535": 2.11559, "24540": 2.17495, "24545": 2.13965, "24550": 2.13916, "24555": 2.15023, "24560": 2.18417, "24565": 2.18561, "24570": 2.13756, "24575": 2.12254, "24580": 2.17109, "24585": 2.15691, "24590": 2.17687, "24595": 2.12945, "24600": 2.15883, "24605": 2.16144, "24610": 2.17792, "24615": 2.10026, "24620": 2.12148, "24625": 2.12431, "24630": 2.17914, "24635": 2.10327, "24640": 2.16793, "24645": 2.17389, "24650": 2.17037, "24655": 2.10502, "24660": 2.12417, "24665": 2.164, "24670": 2.15136, "24675": 2.14671, "24680": 2.13848, "24685": 2.13983, "24690": 2.13235, "24695": 2.1768, "24700": 2.11982, "24705": 2.15053, "24710": 2.12523, "24715": 2.16255, "24720": 2.16059, "24725": 2.15237, "24730": 2.16673, "24735": 2.11679, "24740": 2.12864, "24745": 2.133, "24750": 2.14015, "24755": 2.16682, "24760": 2.1355, "24765": 2.12933, "24770": 2.14683, "24775": 2.13171, "24780": 2.09804, "24785": 2.1401, "24790": 2.16139, "24795": 2.14241, "24800": 2.12911, "24805": 2.16852, "24810": 2.14436, "24815": 2.13589, "24820": 2.12215, "24825": 2.1622, "24830": 2.12071, "24835": 2.14963, "24840": 2.15976, "24845": 2.16855, "24850": 2.14333, "24855": 2.15334, "24860": 2.12395, "24865": 2.08721, "24870": 2.14768, "24875": 2.20531, "24880": 2.16764, "24885": 2.14206, "24890": 2.13343, "24895": 2.16182, "24900": 2.13714, "24905": 2.15267, "24910": 2.14255, "24915": 2.14796, "24920": 2.09448, "24925": 2.14051, "24930": 2.14302, "24935": 2.18777, "24940": 2.09199, "24945": 2.13617, "24950": 2.12422, "24955": 2.1303, "24960": 2.12201, "24965": 2.14971, "24970": 2.09097, "24975": 2.14568, "24980": 2.13304, "24985": 2.17034, "24990": 2.14779, "24995": 2.12782, "25000": 2.10278, "25005": 2.09147, "25010": 2.1437, "25015": 2.13642, "25020": 2.10296, "25025": 2.16672, "25030": 2.12206, "25035": 2.18257, "25040": 2.11462, "25045": 2.12117, "25050": 2.13567, "25055": 2.14675, "25060": 2.11262, "25065": 2.15888, "25070": 2.13426, "25075": 2.16425, "25080": 2.11767, "25085": 2.17336, "25090": 2.15568, "25095": 2.11153, "25100": 2.10337, "25105": 2.10051, "25110": 2.1349, "25115": 2.12346, "25120": 2.14156, "25125": 2.11666, "25130": 2.11238, "25135": 2.13104, "25140": 2.10367, "25145": 2.19649, "25150": 2.14074, "25155": 2.15611, "25160": 2.11933, "25165": 2.1063, "25170": 2.17075, "25175": 2.11301, "25180": 2.12291, "25185": 2.14535, "25190": 2.14697, "25195": 2.1564, "25200": 2.13092, "25205": 2.17574, "25210": 2.18495, "25215": 2.11516, "25220": 2.15083, "25225": 2.12953, "25230": 2.14196, "25235": 2.14132, "25240": 2.14799, "25245": 2.12092, "25250": 2.12899, "25255": 2.15111, "25260": 2.17565, "25265": 2.11564, "25270": 2.13624, "25275": 2.09565, "25280": 2.15307, "25285": 2.13458, "25290": 2.16461, "25295": 2.1326, "25300": 2.12231, "25305": 2.11447, "25310": 2.14117, "25315": 2.09099, "25320": 2.14212, "25325": 2.13789, "25330": 2.18865, "25335": 2.12267, "25340": 2.12823, "25345": 2.1269, "25350": 2.13112, "25355": 2.11715, "25360": 2.10278, "25365": 2.09432, "25370": 2.11113, "25375": 2.14467, "25380": 2.10473, "25385": 2.17041, "25390": 2.1095, "25395": 2.13663, "25400": 2.10712, "25405": 2.13904, "25410": 2.19604, "25415": 2.17106, "25420": 2.12302, "25425": 2.15152, "25430": 2.08894, "25435": 2.15549, "25440": 2.10474, "25445": 2.15861, "25450": 2.12564, "25455": 2.14994, "25460": 2.16594, "25465": 2.17199, "25470": 2.17646, "25475": 2.12318, "25480": 2.13862, "25485": 2.14251, "25490": 2.15176, "25495": 2.14973, "25500": 2.15656, "25505": 2.16424, "25510": 2.11122, "25515": 2.15367, "25520": 2.1262, "25525": 2.0981, "25530": 2.15575, "25535": 2.12835, "25540": 2.13038, "25545": 2.15844, "25550": 2.14764, "25555": 2.16118, "25560": 2.10607, "25565": 2.13096, "25570": 2.14296, "25575": 2.16848, "25580": 2.11852, "25585": 2.17538, "25590": 2.15368, "25595": 2.16016, "25600": 2.18205, "25605": 2.14799, "25610": 2.10199, "25615": 2.13713, "25620": 2.11722, "25625": 2.16753, "25630": 2.09365, "25635": 2.14745, "25640": 2.12327, "25645": 2.13787, "25650": 2.15491, "25655": 2.18547, "25660": 2.07122, "25665": 2.13423, "25670": 2.11036, "25675": 2.11443, "25680": 2.16233, "25685": 2.14923, "25690": 2.1407, "25695": 2.13393, "25700": 2.13092, "25705": 2.12842, "25710": 2.13003, "25715": 2.06947, "25720": 2.11469, "25725": 2.19184, "25730": 2.14477, "25735": 2.15282, "25740": 2.10409, "25745": 2.12491, "25750": 2.12897, "25755": 2.15464, "25760": 2.16588, "25765": 2.15757, "25770": 2.11659, "25775": 2.1042, "25780": 2.10752, "25785": 2.12756, "25790": 2.17143, "25795": 2.1426, "25800": 2.12715, "25805": 2.08287, "25810": 2.1266, "25815": 2.08947, "25820": 2.16622, "25825": 2.13821, "25830": 2.10228, "25835": 2.15152, "25840": 2.15718, "25845": 2.14809, "25850": 2.13039, "25855": 2.12974, "25860": 2.15727, "25865": 2.15944, "25870": 2.08888, "25875": 2.08041, "25880": 2.10266, "25885": 2.13944, "25890": 2.11371, "25895": 2.12252, "25900": 2.11036, "25905": 2.12024, "25910": 2.16353, "25915": 2.12364, "25920": 2.09258, "25925": 2.15033, "25930": 2.16176, "25935": 2.11039, "25940": 2.14527, "25945": 2.08586, "25950": 2.13446, "25955": 2.09393, "25960": 2.1325, "25965": 2.19469, "25970": 2.107, "25975": 2.13263, "25980": 2.13826, "25985": 2.17572, "25990": 2.10936, "25995": 2.16553, "26000": 2.15138, "26005": 2.16982, "26010": 2.12102, "26015": 2.135, "26020": 2.1486, "26025": 2.13747, "26030": 2.16213, "26035": 2.14014, "26040": 2.13481, "26045": 2.17438, "26050": 2.15429, "26055": 2.16882, "26060": 2.15873, "26065": 2.13289, "26070": 2.15995, "26075": 2.15965, "26080": 2.19717, "26085": 2.11752, "26090": 2.10219, "26095": 2.08448, "26100": 2.15968, "26105": 2.08655, "26110": 2.0981, "26115": 2.14262, "26120": 2.10102, "26125": 2.10697, "26130": 2.12797, "26135": 2.14886, "26140": 2.1361, "26145": 2.1451, "26150": 2.17734, "26155": 2.16567, "26160": 2.13347, "26165": 2.11727, "26170": 2.1019, "26175": 2.13334, "26180": 2.15871, "26185": 2.15831, "26190": 2.13481, "26195": 2.13079, "26200": 2.0914, "26205": 2.15994, "26210": 2.08929, "26215": 2.12423, "26220": 2.1145, "26225": 2.15683, "26230": 2.15769, "26235": 2.15416, "26240": 2.13496, "26245": 2.14865, "26250": 2.19124, "26255": 2.10558, "26260": 2.14488, "26265": 2.15432, "26270": 2.12705, "26275": 2.11315, "26280": 2.11094, "26285": 2.1283, "26290": 2.12891, "26295": 2.14523, "26300": 2.11659, "26305": 2.13577, "26310": 2.15492, "26315": 2.18831, "26320": 2.11459, "26325": 2.13982, "26330": 2.17069, "26335": 2.14379, "26340": 2.16126, "26345": 2.12537, "26350": 2.13529, "26355": 2.10284, "26360": 2.10268, "26365": 2.16247, "26370": 2.09278, "26375": 2.11874, "26380": 2.10091, "26385": 2.117, "26390": 2.15437, "26395": 2.1221, "26400": 2.12429, "26405": 2.09729, "26410": 2.11044, "26415": 2.12686, "26420": 2.11466, "26425": 2.1279, "26430": 2.10941, "26435": 2.13882, "26440": 2.16479, "26445": 2.13449, "26450": 2.14245, "26455": 2.0944, "26460": 2.11297, "26465": 2.10533, "26470": 2.14085, "26475": 2.14312, "26480": 2.11548, "26485": 2.13335, "26490": 2.13369, "26495": 2.13015, "26500": 2.15367, "26505": 2.10513, "26510": 2.13104, "26515": 2.15892, "26520": 2.12327, "26525": 2.16075, "26530": 2.14398, "26535": 2.11443, "26540": 2.12308, "26545": 2.15186, "26550": 2.10707, "26555": 2.11525, "26560": 2.10144, "26565": 2.06877, "26570": 2.12608, "26575": 2.16417, "26580": 2.06922, "26585": 2.11826, "26590": 2.10361, "26595": 2.11659, "26600": 2.12827, "26605": 2.16328, "26610": 2.11349, "26615": 2.13429, "26620": 2.13775, "26625": 2.15625, "26630": 2.1345, "26635": 2.15793, "26640": 2.07672, "26645": 2.14661, "26650": 2.13923, "26655": 2.16018, "26660": 2.12628, "26665": 2.14199, "26670": 2.12383, "26675": 2.15169, "26680": 2.13683, "26685": 2.10376, "26690": 2.15562, "26695": 2.13812, "26700": 2.09929, "26705": 2.15505, "26710": 2.14512, "26715": 2.11516, "26720": 2.08075, "26725": 2.08894, "26730": 2.1211, "26735": 2.10485, "26740": 2.11272, "26745": 2.13866, "26750": 2.17985, "26755": 2.1196, "26760": 2.10553, "26765": 2.14337, "26770": 2.08688, "26775": 2.13497, "26780": 2.16071, "26785": 2.0941, "26790": 2.11726, "26795": 2.16537, "26800": 2.10821, "26805": 2.12625, "26810": 2.09135, "26815": 2.10933, "26820": 2.11579, "26825": 2.13162, "26830": 2.10579, "26835": 2.13006, "26840": 2.13333, "26845": 2.14399, "26850": 2.09453, "26855": 2.09199, "26860": 2.0714, "26865": 2.11149, "26870": 2.15037, "26875": 2.14866, "26880": 2.12412, "26885": 2.12032, "26890": 2.13928, "26895": 2.15392, "26900": 2.12186, "26905": 2.13337, "26910": 2.13973, "26915": 2.09881, "26920": 2.14591, "26925": 2.16648, "26930": 2.13604, "26935": 2.10213, "26940": 2.17016, "26945": 2.12817, "26950": 2.13085, "26955": 2.11932, "26960": 2.19144, "26965": 2.11077, "26970": 2.11296, "26975": 2.1231, "26980": 2.11853, "26985": 2.0954, "26990": 2.11189, "26995": 2.11384, "27000": 2.14377, "27005": 2.11637, "27010": 2.11273, "27015": 2.09753, "27020": 2.12986, "27025": 2.11598, "27030": 2.16817, "27035": 2.12702, "27040": 2.13464, "27045": 2.1352, "27050": 2.15884, "27055": 2.11061, "27060": 2.09056, "27065": 2.14914, "27070": 2.12844, "27075": 2.15408, "27080": 2.13587, "27085": 2.11289, "27090": 2.1097, "27095": 2.12834, "27100": 2.13265, "27105": 2.15366, "27110": 2.12358, "27115": 2.12186, "27120": 2.13766, "27125": 2.1268, "27130": 2.11335, "27135": 2.12697, "27140": 2.14147, "27145": 2.13388, "27150": 2.13967, "27155": 2.11592, "27160": 2.10502, "27165": 2.12815, "27170": 2.12563, "27175": 2.15334, "27180": 2.11289, "27185": 2.12101, "27190": 2.13528, "27195": 2.13389, "27200": 2.11879, "27205": 2.09949, "27210": 2.17613, "27215": 2.13392, "27220": 2.1245, "27225": 2.11984, "27230": 2.1362, "27235": 2.08771, "27240": 2.13282, "27245": 2.13377, "27250": 2.09918, "27255": 2.11501, "27260": 2.09958, "27265": 2.1416, "27270": 2.16711, "27275": 2.03163, "27280": 2.11304, "27285": 2.14228, "27290": 2.13739, "27295": 2.1493, "27300": 2.11857, "27305": 2.13495, "27310": 2.09602, "27315": 2.14927, "27320": 2.19539, "27325": 2.1245, "27330": 2.1002, "27335": 2.0965, "27340": 2.09404, "27345": 2.13324, "27350": 2.1224, "27355": 2.15314, "27360": 2.09526, "27365": 2.11799, "27370": 2.12755, "27375": 2.15602, "27380": 2.10553, "27385": 2.12877, "27390": 2.15535, "27395": 2.09967, "27400": 2.13096, "27405": 2.17162, "27410": 2.13355, "27415": 2.11986, "27420": 2.12207, "27425": 2.12016, "27430": 2.10627, "27435": 2.11843, "27440": 2.15637, "27445": 2.10577, "27450": 2.12533, "27455": 2.10242, "27460": 2.10888, "27465": 2.12331, "27470": 2.09287, "27475": 2.11923, "27480": 2.12353, "27485": 2.12245, "27490": 2.11894, "27495": 2.10826, "27500": 2.11691, "27505": 2.13772, "27510": 2.10365, "27515": 2.11436, "27520": 2.10237, "27525": 2.14869, "27530": 2.1952, "27535": 2.13763, "27540": 2.07873, "27545": 2.13669, "27550": 2.16639, "27555": 2.09052, "27560": 2.12369, "27565": 2.14108, "27570": 2.08973, "27575": 2.13388, "27580": 2.12435, "27585": 2.14417, "27590": 2.12007, "27595": 2.17421, "27600": 2.13408, "27605": 2.12064, "27610": 2.15175, "27615": 2.12358, "27620": 2.10808, "27625": 2.16984, "27630": 2.08501, "27635": 2.16231, "27640": 2.12874, "27645": 2.11972, "27650": 2.13142, "27655": 2.04129, "27660": 2.08099, "27665": 2.15827, "27670": 2.10583, "27675": 2.12356, "27680": 2.14558, "27685": 2.11678, "27690": 2.13045, "27695": 2.11033, "27700": 2.1241, "27705": 2.12811, "27710": 2.1443, "27715": 2.09272, "27720": 2.14159, "27725": 2.09865, "27730": 2.11727, "27735": 2.11712, "27740": 2.15177, "27745": 2.12934, "27750": 2.12535, "27755": 2.11685, "27760": 2.05388, "27765": 2.13643, "27770": 2.10994, "27775": 2.09834, "27780": 2.10643, "27785": 2.08807, "27790": 2.10262, "27795": 2.08442, "27800": 2.10109, "27805": 2.09853, "27810": 2.12653, "27815": 2.16932, "27820": 2.11534, "27825": 2.17524, "27830": 2.11726, "27835": 2.11558, "27840": 2.14015, "27845": 2.08758, "27850": 2.15076, "27855": 2.1595, "27860": 2.08703, "27865": 2.12674, "27870": 2.0906, "27875": 2.15162, "27880": 2.08592, "27885": 2.12884, "27890": 2.10853, "27895": 2.13881, "27900": 2.14576, "27905": 2.10738, "27910": 2.09992, "27915": 2.1088, "27920": 2.1753, "27925": 2.13615, "27930": 2.13483, "27935": 2.11932, "27940": 2.13072, "27945": 2.13905, "27950": 2.10385, "27955": 2.12913, "27960": 2.10186, "27965": 2.09479, "27970": 2.11532, "27975": 2.10523, "27980": 2.11784, "27985": 2.14384, "27990": 2.13864, "27995": 2.17574, "28000": 2.12751, "28005": 2.07553, "28010": 2.12456, "28015": 2.13094, "28020": 2.11301, "28025": 2.12774, "28030": 2.09132, "28035": 2.11211, "28040": 2.13294, "28045": 2.12097, "28050": 2.08793, "28055": 2.12687, "28060": 2.1238, "28065": 2.117, "28070": 2.10612, "28075": 2.1118, "28080": 2.13955, "28085": 2.11495, "28090": 2.16269, "28095": 2.08731, "28100": 2.14238, "28105": 2.12101, "28110": 2.09521, "28115": 2.11077, "28120": 2.08737, "28125": 2.11673, "28130": 2.14366, "28135": 2.0925, "28140": 2.15522, "28145": 2.118, "28150": 2.17812, "28155": 2.12602, "28160": 2.17389, "28165": 2.11359, "28170": 2.17575, "28175": 2.10806, "28180": 2.0989, "28185": 2.11015, "28190": 2.16829, "28195": 2.10105, "28200": 2.14818, "28205": 2.1397, "28210": 2.13487, "28215": 2.12679, "28220": 2.11859, "28225": 2.13983, "28230": 2.15316, "28235": 2.12276, "28240": 2.13855, "28245": 2.16177, "28250": 2.10624, "28255": 2.13756, "28260": 2.10068, "28265": 2.13138, "28270": 2.09345, "28275": 2.14647, "28280": 2.119, "28285": 2.11984, "28290": 2.13565, "28295": 2.13949, "28300": 2.1491, "28305": 2.09642, "28310": 2.13345, "28315": 2.11274, "28320": 2.11725, "28325": 2.14661, "28330": 2.11493, "28335": 2.08836, "28340": 2.11148, "28345": 2.12203, "28350": 2.13403, "28355": 2.15811, "28360": 2.13541, "28365": 2.11658, "28370": 2.12381, "28375": 2.09284, "28380": 2.11621, "28385": 2.09566, "28390": 2.12405, "28395": 2.12289, "28400": 2.09379, "28405": 2.13793, "28410": 2.08311, "28415": 2.1015, "28420": 2.10275, "28425": 2.13747, "28430": 2.12876, "28435": 2.13273, "28440": 2.09981, "28445": 2.08284, "28450": 2.11749, "28455": 2.10253, "28460": 2.12171, "28465": 2.19036, "28470": 2.0863, "28475": 2.12463, "28480": 2.08986, "28485": 2.09575, "28490": 2.13704, "28495": 2.0973, "28500": 2.08465, "28505": 2.11313, "28510": 2.15107, "28515": 2.15935, "28520": 2.10894, "28525": 2.12469, "28530": 2.10884, "28535": 2.10495, "28540": 2.12579, "28545": 2.11393, "28550": 2.08526, "28555": 2.13091, "28560": 2.11936, "28565": 2.15534, "28570": 2.13168, "28575": 2.0955, "28580": 2.11196, "28585": 2.09673, "28590": 2.1104, "28595": 2.15025, "28600": 2.08586, "28605": 2.09822, "28610": 2.09667, "28615": 2.13926, "28620": 2.05722, "28625": 2.10811, "28630": 2.10099, "28635": 2.09917, "28640": 2.10116, "28645": 2.13559, "28650": 2.13664, "28655": 2.1042, "28660": 2.07102, "28665": 2.12202, "28670": 2.16769, "28675": 2.09694, "28680": 2.0834, "28685": 2.16288, "28690": 2.06641, "28695": 2.10701, "28700": 2.15986, "28705": 2.11154, "28710": 2.11051, "28715": 2.08822, "28720": 2.11024, "28725": 2.11216, "28730": 2.13078, "28735": 2.11591, "28740": 2.09988, "28745": 2.10929, "28750": 2.07311, "28755": 2.12975, "28760": 2.11288, "28765": 2.1038, "28770": 2.16117, "28775": 2.09949, "28780": 2.14524, "28785": 2.1028, "28790": 2.10446, "28795": 2.12259, "28800": 2.09659, "28805": 2.17736, "28810": 2.13716, "28815": 2.17102, "28820": 2.08795, "28825": 2.11412, "28830": 2.13429, "28835": 2.1408, "28840": 2.10988, "28845": 2.11316, "28850": 2.11936, "28855": 2.16295, "28860": 2.15368, "28865": 2.1558, "28870": 2.14859, "28875": 2.13604, "28880": 2.14601, "28885": 2.14049, "28890": 2.15794, "28895": 2.10713, "28900": 2.11184, "28905": 2.09553, "28910": 2.11138, "28915": 2.10338, "28920": 2.14542, "28925": 2.13112, "28930": 2.08534, "28935": 2.08373, "28940": 2.13681, "28945": 2.09928, "28950": 2.10816, "28955": 2.14998, "28960": 2.12776, "28965": 2.12826, "28970": 2.16775, "28975": 2.10365, "28980": 2.15153, "28985": 2.12358, "28990": 2.13288, "28995": 2.09842, "29000": 2.10218, "29005": 2.13522, "29010": 2.08759, "29015": 2.11427, "29020": 2.08242, "29025": 2.09265, "29030": 2.13591, "29035": 2.07074, "29040": 2.12334, "29045": 2.10911, "29050": 2.07446, "29055": 2.1142, "29060": 2.09789, "29065": 2.11835, "29070": 2.08018, "29075": 2.10831, "29080": 2.0993, "29085": 2.09532, "29090": 2.11884, "29095": 2.09978, "29100": 2.13171, "29105": 2.13139, "29110": 2.11827, "29115": 2.09856, "29120": 2.12072, "29125": 2.1717, "29130": 2.13769, "29135": 2.07345, "29140": 2.08865, "29145": 2.11882, "29150": 2.18065, "29155": 2.07937, "29160": 2.09982, "29165": 2.1057, "29170": 2.13032, "29175": 2.08988, "29180": 2.13301, "29185": 2.09662, "29190": 2.13395, "29195": 2.02647, "29200": 2.15157, "29205": 2.13109, "29210": 2.09913, "29215": 2.15197, "29220": 2.14983, "29225": 2.14256, "29230": 2.08467, "29235": 2.11862, "29240": 2.07417, "29245": 2.10282, "29250": 2.08609, "29255": 2.12685, "29260": 2.10818, "29265": 2.15161, "29270": 2.13431, "29275": 2.1144, "29280": 2.12741, "29285": 2.10412, "29290": 2.10857, "29295": 2.12327, "29300": 2.06814, "29305": 2.08443, "29310": 2.13758, "29315": 2.08618, "29320": 2.08888, "29325": 2.06871, "29330": 2.13684, "29335": 2.14244, "29340": 2.11572, "29345": 2.14574, "29350": 2.16247, "29355": 2.14739, "29360": 2.15747, "29365": 2.11646, "29370": 2.13497, "29375": 2.08914, "29380": 2.09982, "29385": 2.11834, "29390": 2.13045, "29395": 2.14342, "29400": 2.0914, "29405": 2.11012, "29410": 2.10593, "29415": 2.1202, "29420": 2.1007, "29425": 2.14763, "29430": 2.11357, "29435": 2.11446, "29440": 2.11722, "29445": 2.11302, "29450": 2.08193, "29455": 2.13262, "29460": 2.06225, "29465": 2.16333, "29470": 2.09904, "29475": 2.14546, "29480": 2.13448, "29485": 2.14141, "29490": 2.10961, "29495": 2.14257, "29500": 2.08816, "29505": 2.15601, "29510": 2.12125, "29515": 2.1187, "29520": 2.12479, "29525": 2.13134, "29530": 2.11642, "29535": 2.11652, "29540": 2.10395, "29545": 2.11568, "29550": 2.12937, "29555": 2.10468, "29560": 2.11423, "29565": 2.08357, "29570": 2.11166, "29575": 2.11617, "29580": 2.07899, "29585": 2.10443, "29590": 2.12648, "29595": 2.12029, "29600": 2.12414, "29605": 2.13437, "29610": 2.13214, "29615": 2.1308, "29620": 2.07685, "29625": 2.08776, "29630": 2.10567, "29635": 2.1127, "29640": 2.09273, "29645": 2.11637, "29650": 2.09671, "29655": 2.11783, "29660": 2.12906, "29665": 2.10229, "29670": 2.10696, "29675": 2.12606, "29680": 2.11228, "29685": 2.08691, "29690": 2.14854, "29695": 2.13625, "29700": 2.09477, "29705": 2.16295, "29710": 2.11437, "29715": 2.10824, "29720": 2.11828, "29725": 2.102, "29730": 2.09518, "29735": 2.14579, "29740": 2.07548, "29745": 2.14201, "29750": 2.0907, "29755": 2.09707, "29760": 2.13752, "29765": 2.09477, "29770": 2.05808, "29775": 2.11404, "29780": 2.10357, "29785": 2.14872, "29790": 2.12516, "29795": 2.12401, "29800": 2.12111, "29805": 2.11106, "29810": 2.15412, "29815": 2.18641, "29820": 2.11249, "29825": 2.09947, "29830": 2.11797, "29835": 2.12281, "29840": 2.12909, "29845": 2.09745, "29850": 2.10862, "29855": 2.09212, "29860": 2.09938, "29865": 2.14716, "29870": 2.09013, "29875": 2.11914, "29880": 2.09239, "29885": 2.09743, "29890": 2.13923, "29895": 2.06775, "29900": 2.1249, "29905": 2.1004, "29910": 2.10095, "29915": 2.15858, "29920": 2.13625, "29925": 2.06784, "29930": 2.12527, "29935": 2.11235, "29940": 2.10195, "29945": 2.10157, "29950": 2.08766, "29955": 2.09602, "29960": 2.11103, "29965": 2.10124, "29970": 2.14779, "29975": 2.15871, "29980": 2.08831, "29985": 2.10066, "29990": 2.13839, "29995": 2.09043, "30000": 2.15338, "30005": 2.08573, "30010": 2.13347, "30015": 2.12365, "30020": 2.11055, "30025": 2.09658, "30030": 2.06574, "30035": 2.10385, "30040": 2.10003, "30045": 2.10441, "30050": 2.15714, "30055": 2.09857, "30060": 2.0992, "30065": 2.10633, "30070": 2.12768, "30075": 2.09781, "30080": 2.11557, "30085": 2.09703, "30090": 2.13294, "30095": 2.11362, "30100": 2.07101, "30105": 2.1278, "30110": 2.12618, "30115": 2.12397, "30120": 2.08951, "30125": 2.08064, "30130": 2.12425, "30135": 2.11751, "30140": 2.14429, "30145": 2.05189, "30150": 2.09254, "30155": 2.11988, "30160": 2.12931, "30165": 2.13569, "30170": 2.1328, "30175": 2.12507, "30180": 2.14303, "30185": 2.09947, "30190": 2.13094, "30195": 2.09649, "30200": 2.14166, "30205": 2.10655, "30210": 2.12661, "30215": 2.09686, "30220": 2.13556, "30225": 2.1345, "30230": 2.12421, "30235": 2.09703, "30240": 2.11079, "30245": 2.09444, "30250": 2.10541, "30255": 2.09149, "30260": 2.05404, "30265": 2.10927, "30270": 2.09696, "30275": 2.12871, "30280": 2.06903, "30285": 2.11067, "30290": 2.10454, "30295": 2.09905, "30300": 2.15735, "30305": 2.12674, "30310": 2.12271, "30315": 2.10175, "30320": 2.09469, "30325": 2.14137, "30330": 2.07226, "30335": 2.11948, "30340": 2.11528, "30345": 2.13815, "30350": 2.1364, "30355": 2.15699, "30360": 2.09941, "30365": 2.1548, "30370": 2.12445, "30375": 2.10571, "30380": 2.15143, "30385": 2.14802, "30390": 2.10291, "30395": 2.12635, "30400": 2.06563, "30405": 2.10903, "30410": 2.09138, "30415": 2.1394, "30420": 2.11225, "30425": 2.10014, "30430": 2.13217, "30435": 2.1084, "30440": 2.06038, "30445": 2.13976, "30450": 2.10747, "30455": 2.14106, "30460": 2.09995, "30465": 2.10573, "30470": 2.09177, "30475": 2.12591, "30480": 2.10239, "30485": 2.13829, "30490": 2.09982, "30495": 2.10102, "30500": 2.10832, "30505": 2.12997, "30510": 2.13824, "30515": 2.14509, "30520": 2.13231, "30525": 2.11464, "30530": 2.14782, "30535": 2.0751, "30540": 2.09636, "30545": 2.10211, "30550": 2.1086, "30555": 2.13565, "30560": 2.1241, "30565": 2.1041, "30570": 2.12652, "30575": 2.07746, "30580": 2.11169, "30585": 2.09926, "30590": 2.11183, "30595": 2.10469, "30600": 2.12481, "30605": 2.0879, "30610": 2.13232, "30615": 2.15328, "30620": 2.11309, "30625": 2.08842, "30630": 2.13318, "30635": 2.12142, "30640": 2.07314, "30645": 2.10411, "30650": 2.09678, "30655": 2.12449, "30660": 2.10322, "30665": 2.09255, "30670": 2.11412, "30675": 2.09889, "30680": 2.07652, "30685": 2.16479, "30690": 2.079, "30695": 2.10826, "30700": 2.06995, "30705": 2.11006, "30710": 2.14807, "30715": 2.12168, "30720": 2.13252, "30725": 2.13585, "30730": 2.13142, "30735": 2.10291, "30740": 2.14631, "30745": 2.12805, "30750": 2.10958, "30755": 2.10062, "30760": 2.10827, "30765": 2.16, "30770": 2.12959, "30775": 2.11793, "30780": 2.11676, "30785": 2.14323, "30790": 2.11633, "30795": 2.11526, "30800": 2.11717, "30805": 2.12077, "30810": 2.1278, "30815": 2.1468, "30820": 2.12268, "30825": 2.0738, "30830": 2.09547, "30835": 2.11287, "30840": 2.09914, "30845": 2.15114, "30850": 2.10761, "30855": 2.09532, "30860": 2.11005, "30865": 2.12491, "30870": 2.11426, "30875": 2.10221, "30880": 2.09774, "30885": 2.07309, "30890": 2.0854, "30895": 2.11637, "30900": 2.15608, "30905": 2.14277, "30910": 2.11738, "30915": 2.09237, "30920": 2.12502, "30925": 2.12719, "30930": 2.09773, "30935": 2.08893, "30940": 2.13781, "30945": 2.11941, "30950": 2.11939, "30955": 2.08309, "30960": 2.11525, "30965": 2.13052, "30970": 2.10184, "30975": 2.10045, "30980": 2.16583, "30985": 2.12662, "30990": 2.14827, "30995": 2.09041, "31000": 2.0907, "31005": 2.08677, "31010": 2.07818, "31015": 2.11242, "31020": 2.12101, "31025": 2.13846, "31030": 2.14358, "31035": 2.08416, "31040": 2.1242, "31045": 2.16675, "31050": 2.12571, "31055": 2.07687, "31060": 2.0775, "31065": 2.09908, "31070": 2.06806, "31075": 2.1247, "31080": 2.07251, "31085": 2.15419, "31090": 2.04249, "31095": 2.15112, "31100": 2.10333, "31105": 2.16038, "31110": 2.16466, "31115": 2.11032, "31120": 2.11133, "31125": 2.13794, "31130": 2.08121, "31135": 2.14683, "31140": 2.10567, "31145": 2.06145, "31150": 2.12064, "31155": 2.06666, "31160": 2.10796, "31165": 2.09979, "31170": 2.13763, "31175": 2.11396, "31180": 2.08135, "31185": 2.09124, "31190": 2.08278, "31195": 2.15668, "31200": 2.12095, "31205": 2.11338, "31210": 2.06652, "31215": 2.10291, "31220": 2.1139, "31225": 2.09392, "31230": 2.15977, "31235": 2.10589, "31240": 2.12508, "31245": 2.14435, "31250": 2.12852, "31255": 2.06214, "31260": 2.15145, "31265": 2.11482, "31270": 2.09251, "31275": 2.09659, "31280": 2.12034, "31285": 2.11991, "31290": 2.09759, "31295": 2.14121, "31300": 2.13507, "31305": 2.07448, "31310": 2.06934, "31315": 2.11918, "31320": 2.15195, "31325": 2.14106, "31330": 2.12016, "31335": 2.10769, "31340": 2.10248, "31345": 2.07385, "31350": 2.14515, "31355": 2.13505, "31360": 2.13191, "31365": 2.05118, "31370": 2.11505, "31375": 2.09494, "31380": 2.14431, "31385": 2.1162, "31390": 2.11362, "31395": 2.14667, "31400": 2.10691, "31405": 2.09671, "31410": 2.07817, "31415": 2.13227, "31420": 2.11837, "31425": 2.14681, "31430": 2.11646, "31435": 2.09747, "31440": 2.10689, "31445": 2.10886, "31450": 2.07555, "31455": 2.13249, "31460": 2.14332, "31465": 2.13422, "31470": 2.12379, "31475": 2.07185, "31480": 2.07044, "31485": 2.11442, "31490": 2.09614, "31495": 2.09646, "31500": 2.09878, "31505": 2.14591, "31510": 2.11131, "31515": 2.13343, "31520": 2.12828, "31525": 2.10928, "31530": 2.11873, "31535": 2.09854, "31540": 2.15206, "31545": 2.10185, "31550": 2.13168, "31555": 2.10702, "31560": 2.12513, "31565": 2.09547, "31570": 2.10212, "31575": 2.08839, "31580": 2.11466, "31585": 2.12431, "31590": 2.13331, "31595": 2.13279, "31600": 2.11507, "31605": 2.13298, "31610": 2.10224, "31615": 2.11356, "31620": 2.10797, "31625": 2.1072, "31630": 2.11861, "31635": 2.08113, "31640": 2.10093, "31645": 2.08239, "31650": 2.1134, "31655": 2.08557, "31660": 2.07901, "31665": 2.10617, "31670": 2.06756, "31675": 2.10019, "31680": 2.18821, "31685": 2.065, "31690": 2.13141, "31695": 2.15502, "31700": 2.13016, "31705": 2.12724, "31710": 2.12244, "31715": 2.13479, "31720": 2.09835, "31725": 2.0781, "31730": 2.13837, "31735": 2.10693, "31740": 2.12152, "31745": 2.12674, "31750": 2.0917, "31755": 2.11164, "31760": 2.12337, "31765": 2.1476, "31770": 2.13296, "31775": 2.09514, "31780": 2.11554, "31785": 2.10641, "31790": 2.08816, "31795": 2.07049, "31800": 2.11006, "31805": 2.0458, "31810": 2.10642, "31815": 2.07713, "31820": 2.06965, "31825": 2.12287, "31830": 2.12468, "31835": 2.08016, "31840": 2.12045, "31845": 2.03116, "31850": 2.08688, "31855": 2.13626, "31860": 2.13234, "31865": 2.12499, "31870": 2.07057, "31875": 2.07811, "31880": 2.10239, "31885": 2.11643, "31890": 2.11542, "31895": 2.10901, "31900": 2.10486, "31905": 2.08863, "31910": 2.1262, "31915": 2.14169, "31920": 2.11409, "31925": 2.10815, "31930": 2.12263, "31935": 2.1193, "31940": 2.12075, "31945": 2.0592, "31950": 2.11297, "31955": 2.11363, "31960": 2.11181, "31965": 2.11934, "31970": 2.0555, "31975": 2.04729, "31980": 2.10916, "31985": 2.0838, "31990": 2.13992, "31995": 2.14432, "32000": 2.08653, "32005": 2.08325, "32010": 2.11268, "32015": 2.12432, "32020": 2.12377, "32025": 2.04789, "32030": 2.06283, "32035": 2.07672, "32040": 2.09365, "32045": 2.10146, "32050": 2.09967, "32055": 2.08628, "32060": 2.05923, "32065": 2.15782, "32070": 2.10302, "32075": 2.12324, "32080": 2.09069, "32085": 2.10757, "32090": 2.16141, "32095": 2.15027, "32100": 2.13975, "32105": 2.09415, "32110": 2.11689, "32115": 2.08475, "32120": 2.12494, "32125": 2.09859, "32130": 2.13269, "32135": 2.06801, "32140": 2.04969, "32145": 2.13866, "32150": 2.09092, "32155": 2.08716, "32160": 2.0814, "32165": 2.09621, "32170": 2.10633, "32175": 2.10372, "32180": 2.09987, "32185": 2.09655, "32190": 2.08306, "32195": 2.10624, "32200": 2.0931, "32205": 2.05627, "32210": 2.11059, "32215": 2.12266, "32220": 2.10447, "32225": 2.13997, "32230": 2.08739, "32235": 2.08954, "32240": 2.10909, "32245": 2.10009, "32250": 2.09989, "32255": 2.15338, "32260": 2.0585, "32265": 2.09265, "32270": 2.03623, "32275": 2.14276, "32280": 2.12726, "32285": 2.14442, "32290": 2.08077, "32295": 2.12309, "32300": 2.10636, "32305": 2.09336, "32310": 2.12394, "32315": 2.16904, "32320": 2.08961, "32325": 2.10778, "32330": 2.10251, "32335": 2.12204, "32340": 2.10561, "32345": 2.08539, "32350": 2.09416, "32355": 2.09264, "32360": 2.10232, "32365": 2.08623, "32370": 2.08882, "32375": 2.08587, "32380": 2.12664, "32385": 2.08556, "32390": 2.08158, "32395": 2.10443, "32400": 2.07796, "32405": 2.088, "32410": 2.12608, "32415": 2.09928, "32420": 2.08919, "32425": 2.08853, "32430": 2.10852, "32435": 2.15258, "32440": 2.12288, "32445": 2.09488, "32450": 2.09787, "32455": 2.06534, "32460": 2.10092, "32465": 2.10456, "32470": 2.08111, "32475": 2.0829, "32480": 2.09385, "32485": 2.12316, "32490": 2.09586, "32495": 2.09051, "32500": 2.11283, "32505": 2.08984, "32510": 2.06108, "32515": 2.13495, "32520": 2.08284, "32525": 2.08625, "32530": 2.12452, "32535": 2.09773, "32540": 2.11535, "32545": 2.11469, "32550": 2.10574, "32555": 2.05709, "32560": 2.11051, "32565": 2.11686, "32570": 2.10749, "32575": 2.11677, "32580": 2.1249, "32585": 2.10275, "32590": 2.11185, "32595": 2.11842, "32600": 2.12772, "32605": 2.08812, "32610": 2.10183, "32615": 2.12052, "32620": 2.12349, "32625": 2.07869, "32630": 2.09656, "32635": 2.0968, "32640": 2.08124, "32645": 2.11275, "32650": 2.1122, "32655": 2.09306, "32660": 2.09869, "32665": 2.07985, "32670": 2.14371, "32675": 2.07973, "32680": 2.07973, "32685": 2.06657, "32690": 2.11651, "32695": 2.06871, "32700": 2.13081, "32705": 2.1029, "32710": 2.08058, "32715": 2.13387, "32720": 2.06679, "32725": 2.0799, "32730": 2.05843, "32735": 2.09946, "32740": 2.0935, "32745": 2.10921, "32750": 2.06143, "32755": 2.09572, "32760": 2.11196, "32765": 2.11501, "32770": 2.0865, "32775": 2.10404, "32780": 2.11296, "32785": 2.10206, "32790": 2.11725, "32795": 2.09814, "32800": 2.11844, "32805": 2.10477, "32810": 2.09538, "32815": 2.08949, "32820": 2.12872, "32825": 2.08993, "32830": 2.11065, "32835": 2.1274, "32840": 2.09183, "32845": 2.10004, "32850": 2.12547, "32855": 2.1102, "32860": 2.0978, "32865": 2.10806, "32870": 2.10646, "32875": 2.09665, "32880": 2.08527, "32885": 2.05092, "32890": 2.08623, "32895": 2.11972, "32900": 2.11226, "32905": 2.12589, "32910": 2.09545, "32915": 2.10152, "32920": 2.0836, "32925": 2.0805, "32930": 2.10131, "32935": 2.11916, "32940": 2.09477, "32945": 2.08708, "32950": 2.0462, "32955": 2.10685, "32960": 2.08112, "32965": 2.16704, "32970": 2.14035, "32975": 2.07109, "32980": 2.0832, "32985": 2.07365, "32990": 2.16049, "32995": 2.1447, "33000": 2.13382, "33005": 2.09869, "33010": 2.09257, "33015": 2.10434, "33020": 2.1177, "33025": 2.07471, "33030": 2.10447, "33035": 2.07824, "33040": 2.08611, "33045": 2.11637, "33050": 2.09903, "33055": 2.1266, "33060": 2.07993, "33065": 2.10462, "33070": 2.09333, "33075": 2.11817, "33080": 2.09254, "33085": 2.10962, "33090": 2.11442, "33095": 2.08955, "33100": 2.13323, "33105": 2.07533, "33110": 2.09937, "33115": 2.10032, "33120": 2.0761, "33125": 2.105, "33130": 2.08017, "33135": 2.11573, "33140": 2.09972, "33145": 2.09518, "33150": 2.16382, "33155": 2.17169, "33160": 2.11272, "33165": 2.05284, "33170": 2.11704, "33175": 2.08351, "33180": 2.09416, "33185": 2.10119, "33190": 2.08496, "33195": 2.11885, "33200": 2.05117, "33205": 2.0955, "33210": 2.10035, "33215": 2.07506, "33220": 2.09254, "33225": 2.10789, "33230": 2.07181, "33235": 2.08828, "33240": 2.12232, "33245": 2.07877, "33250": 2.12565, "33255": 2.09654, "33260": 2.10367, "33265": 2.07531, "33270": 2.09611, "33275": 2.10905, "33280": 2.09854, "33285": 2.1028, "33290": 2.10029, "33295": 2.11854, "33300": 2.06759, "33305": 2.11084, "33310": 2.06624, "33315": 2.09468, "33320": 2.07644, "33325": 2.11896, "33330": 2.11924, "33335": 2.11198, "33340": 2.0932, "33345": 2.11921, "33350": 2.11062, "33355": 2.09364, "33360": 2.08212, "33365": 2.16972, "33370": 2.06322, "33375": 2.06122, "33380": 2.12054, "33385": 2.08219, "33390": 2.09153, "33395": 2.11328, "33400": 2.08766, "33405": 2.13042, "33410": 2.09011, "33415": 2.09075, "33420": 2.11634, "33425": 2.12112, "33430": 2.12496, "33435": 2.11673, "33440": 2.12803, "33445": 2.13816, "33450": 2.10824, "33455": 2.09156, "33460": 2.08039, "33465": 2.10551, "33470": 2.13403, "33475": 2.14552, "33480": 2.12296, "33485": 2.14209, "33490": 2.09706, "33495": 2.09836, "33500": 2.06515, "33505": 2.12766, "33510": 2.10011, "33515": 2.10299, "33520": 2.13072, "33525": 2.10836, "33530": 2.13882, "33535": 2.11891, "33540": 2.12571, "33545": 2.08957, "33550": 2.11473, "33555": 2.09598, "33560": 2.07798, "33565": 2.08873, "33570": 2.08424, "33575": 2.12298, "33580": 2.10215, "33585": 2.11981, "33590": 2.09891, "33595": 2.12934, "33600": 2.09105, "33605": 2.07731, "33610": 2.08162, "33615": 2.11096, "33620": 2.13606, "33625": 2.08885, "33630": 2.13396, "33635": 2.1035, "33640": 2.1219, "33645": 2.06012, "33650": 2.07161, "33655": 2.082, "33660": 2.05129, "33665": 2.09856, "33670": 2.14052, "33675": 2.09139, "33680": 2.09754, "33685": 2.08412, "33690": 2.07014, "33695": 2.13123, "33700": 2.10627, "33705": 2.08919, "33710": 2.12776, "33715": 2.08403, "33720": 2.11333, "33725": 2.10608, "33730": 2.09364, "33735": 2.16425, "33740": 2.09258, "33745": 2.09705, "33750": 2.12934, "33755": 2.12092, "33760": 2.11552, "33765": 2.09823, "33770": 2.10774, "33775": 2.10429, "33780": 2.05676, "33785": 2.07267, "33790": 2.08609, "33795": 2.08349, "33800": 2.11948, "33805": 2.11094, "33810": 2.10453, "33815": 2.10637, "33820": 2.12937, "33825": 2.07227, "33830": 2.06445, "33835": 2.0924, "33840": 2.08321, "33845": 2.07306, "33850": 2.08361, "33855": 2.06691, "33860": 2.08663, "33865": 2.11232, "33870": 2.07375, "33875": 2.11067, "33880": 2.09684, "33885": 2.08472, "33890": 2.09458, "33895": 2.12665, "33900": 2.1008, "33905": 2.12152, "33910": 2.09048, "33915": 2.09243, "33920": 2.13162, "33925": 2.09662, "33930": 2.06198, "33935": 2.08455, "33940": 2.10911, "33945": 2.10213, "33950": 2.10952, "33955": 2.09147, "33960": 2.05745, "33965": 2.08294, "33970": 2.11006, "33975": 2.09339, "33980": 2.11048, "33985": 2.07614, "33990": 2.11118, "33995": 2.04142, "34000": 2.1075, "34005": 2.07328, "34010": 2.08076, "34015": 2.09018, "34020": 2.11771, "34025": 2.11522, "34030": 2.1044, "34035": 2.07895, "34040": 2.07628, "34045": 2.08377, "34050": 2.1153, "34055": 2.09047, "34060": 2.10435, "34065": 2.09799, "34070": 2.0544, "34075": 2.07115, "34080": 2.13227, "34085": 2.11804, "34090": 2.06831, "34095": 2.07508, "34100": 2.09727, "34105": 2.11642, "34110": 2.11541, "34115": 2.12099, "34120": 2.1209, "34125": 2.11971, "34130": 2.12218, "34135": 2.11412, "34140": 2.06394, "34145": 2.08856, "34150": 2.11598, "34155": 2.11819, "34160": 2.1049, "34165": 2.12137, "34170": 2.0683, "34175": 2.1144, "34180": 2.06581, "34185": 2.12862, "34190": 2.09636, "34195": 2.10832, "34200": 2.09276, "34205": 2.08545, "34210": 2.13098, "34215": 2.10096, "34220": 2.1177, "34225": 2.11733, "34230": 2.09036, "34235": 2.07029, "34240": 2.12702, "34245": 2.08085, "34250": 2.07399, "34255": 2.09048, "34260": 2.05018, "34265": 2.10381, "34270": 2.06979, "34275": 2.10994, "34280": 2.10361, "34285": 2.10632, "34290": 2.10877, "34295": 2.12297, "34300": 2.09646, "34305": 2.08557, "34310": 2.03506, "34315": 2.11185, "34320": 2.11603, "34325": 2.1447, "34330": 2.1266, "34335": 2.09252, "34340": 2.12082, "34345": 2.11038, "34350": 2.08713, "34355": 2.13845, "34360": 2.07871, "34365": 2.06685, "34370": 2.08309, "34375": 2.13117, "34380": 2.0895, "34385": 2.12325, "34390": 2.09598, "34395": 2.12127, "34400": 2.07513, "34405": 2.07452, "34410": 2.09194, "34415": 2.055, "34420": 2.12273, "34425": 2.12844, "34430": 2.07925, "34435": 2.06337, "34440": 2.09935, "34445": 2.08054, "34450": 2.14921, "34455": 2.09686, "34460": 2.12533, "34465": 2.15236, "34470": 2.06456, "34475": 2.1112, "34480": 2.0945, "34485": 2.11179, "34490": 2.09182, "34495": 2.11998, "34500": 2.0899, "34505": 2.14114, "34510": 2.13413, "34515": 2.10328, "34520": 2.07779, "34525": 2.10945, "34530": 2.08114, "34535": 2.12129, "34540": 2.07075, "34545": 2.11365, "34550": 2.09476, "34555": 2.15314, "34560": 2.11129, "34565": 2.06443, "34570": 2.12634, "34575": 2.09525, "34580": 2.09506, "34585": 2.09563, "34590": 2.07645, "34595": 2.10289, "34600": 2.04029, "34605": 2.08475, "34610": 2.09271, "34615": 2.07952, "34620": 2.10034, "34625": 2.07817, "34630": 2.07377, "34635": 2.07546, "34640": 2.07128, "34645": 2.12112, "34650": 2.08214, "34655": 2.09358, "34660": 2.09974, "34665": 2.15691, "34670": 2.10081, "34675": 2.09698, "34680": 2.10345, "34685": 2.09731, "34690": 2.13997, "34695": 2.13624, "34700": 2.09836, "34705": 2.13196, "34710": 2.06654, "34715": 2.06892, "34720": 2.06974, "34725": 2.0427, "34730": 2.1183, "34735": 2.07302, "34740": 2.09958, "34745": 2.10141, "34750": 2.09514, "34755": 2.06361, "34760": 2.11525, "34765": 2.08062, "34770": 2.12097, "34775": 2.07713, "34780": 2.04708, "34785": 2.11212, "34790": 2.06282, "34795": 2.12298, "34800": 2.08941, "34805": 2.04831, "34810": 2.0696, "34815": 2.11509, "34820": 2.12149, "34825": 2.11286, "34830": 2.13423, "34835": 2.12706, "34840": 2.09447, "34845": 2.07766, "34850": 2.09665, "34855": 2.07453, "34860": 2.12362, "34865": 2.06559, "34870": 2.09816, "34875": 2.10537, "34880": 2.0856, "34885": 2.02216, "34890": 2.07477, "34895": 2.10187, "34900": 2.0922, "34905": 2.07977, "34910": 2.07656, "34915": 2.10078, "34920": 2.07558, "34925": 2.09071, "34930": 2.13899, "34935": 2.0897, "34940": 2.12327, "34945": 2.07146, "34950": 2.07244, "34955": 2.13253, "34960": 2.08677, "34965": 2.09863, "34970": 2.07335, "34975": 2.06916, "34980": 2.11836, "34985": 2.06659, "34990": 2.11902, "34995": 2.07173, "35000": 2.07938, "35005": 2.12344, "35010": 2.08608, "35015": 2.06652, "35020": 2.07723, "35025": 2.10163, "35030": 2.11696, "35035": 2.08725, "35040": 2.08172, "35045": 2.126, "35050": 2.07547, "35055": 2.08661, "35060": 2.11624, "35065": 2.11115, "35070": 2.08294, "35075": 2.09141, "35080": 2.09431, "35085": 2.10277, "35090": 2.10018, "35095": 2.06004, "35100": 2.09826, "35105": 2.09593, "35110": 2.09949, "35115": 2.07523, "35120": 2.05531, "35125": 2.06056, "35130": 2.07485, "35135": 2.09896, "35140": 2.12492, "35145": 2.0796, "35150": 2.07755, "35155": 2.12612, "35160": 2.05771, "35165": 2.11058, "35170": 2.07782, "35175": 2.09901, "35180": 2.09356, "35185": 2.07789, "35190": 2.05923, "35195": 2.13183, "35200": 2.06695, "35205": 2.11309, "35210": 2.08399, "35215": 2.06004, "35220": 2.12404, "35225": 2.11007, "35230": 2.09165, "35235": 2.08318, "35240": 2.07367, "35245": 2.1031, "35250": 2.1071, "35255": 2.11067, "35260": 2.10657, "35265": 2.07258, "35270": 2.09729, "35275": 2.07948, "35280": 2.07352, "35285": 2.09845, "35290": 2.07387, "35295": 2.06775, "35300": 2.06227, "35305": 2.09672, "35310": 2.10279, "35315": 2.067, "35320": 2.04684, "35325": 2.11572, "35330": 2.10337, "35335": 2.09592, "35340": 2.07653, "35345": 2.07058, "35350": 2.10693, "35355": 2.11342, "35360": 2.06521, "35365": 2.07118, "35370": 2.11484, "35375": 2.10834, "35380": 2.10589, "35385": 2.11859, "35390": 2.11309, "35395": 2.06142, "35400": 2.12235, "35405": 2.09708, "35410": 2.10517, "35415": 2.10409, "35420": 2.08597, "35425": 2.07071, "35430": 2.12124, "35435": 2.11437, "35440": 2.12334, "35445": 2.11909, "35450": 2.05795, "35455": 2.08593, "35460": 2.11459, "35465": 2.12629, "35470": 2.08515, "35475": 2.12226, "35480": 2.07401, "35485": 2.07612, "35490": 2.05606, "35495": 2.09623, "35500": 2.06953, "35505": 2.09459, "35510": 2.08383, "35515": 2.04873, "35520": 2.08979, "35525": 2.08895, "35530": 2.07121, "35535": 2.07231, "35540": 2.10745, "35545": 2.09744, "35550": 2.07855, "35555": 2.07145, "35560": 2.08705, "35565": 2.11635, "35570": 2.09464, "35575": 2.13034, "35580": 2.13733, "35585": 2.11067, "35590": 2.12608, "35595": 2.06663, "35600": 2.09234, "35605": 2.11876, "35610": 2.14402, "35615": 2.11486, "35620": 2.09073, "35625": 2.08236, "35630": 2.07994, "35635": 2.09257, "35640": 2.0986, "35645": 2.08607, "35650": 2.06601, "35655": 2.10384, "35660": 2.10636, "35665": 2.11147, "35670": 2.12382, "35675": 2.06465, "35680": 2.09007, "35685": 2.07828, "35690": 2.10681, "35695": 2.12268, "35700": 2.07048, "35705": 2.13604, "35710": 2.05243, "35715": 2.08286, "35720": 2.104, "35725": 2.11317, "35730": 2.06334, "35735": 2.09333, "35740": 2.10829, "35745": 2.10705, "35750": 2.09121, "35755": 2.07809, "35760": 2.09321, "35765": 2.08979, "35770": 2.10819, "35775": 2.0775, "35780": 2.05345, "35785": 2.06848, "35790": 2.03481, "35795": 2.07284, "35800": 2.07872, "35805": 2.08564, "35810": 2.10843, "35815": 2.0576, "35820": 2.08262, "35825": 2.05807, "35830": 2.05976, "35835": 2.09344, "35840": 2.08516, "35845": 2.10389, "35850": 2.0737, "35855": 2.09943, "35860": 2.08645, "35865": 2.10105, "35870": 2.07712, "35875": 2.11192, "35880": 2.0896, "35885": 2.07026, "35890": 2.12338, "35895": 2.12555, "35900": 2.08637, "35905": 2.06414, "35910": 2.09335, "35915": 2.10096, "35920": 2.07418, "35925": 2.07809, "35930": 2.10843, "35935": 2.12719, "35940": 2.08917, "35945": 2.09539, "35950": 2.10057, "35955": 2.08452, "35960": 2.07969, "35965": 2.08488, "35970": 2.10835, "35975": 2.12373, "35980": 2.09248, "35985": 2.06055, "35990": 2.09419, "35995": 2.11344, "36000": 2.10073, "36005": 2.06828, "36010": 2.05442, "36015": 2.08707, "36020": 2.0706, "36025": 2.07977, "36030": 2.12563, "36035": 2.08709, "36040": 2.11145, "36045": 2.05904, "36050": 2.09426, "36055": 2.12661, "36060": 2.15219, "36065": 2.0905, "36070": 2.12324, "36075": 2.07838, "36080": 2.12332, "36085": 2.08403, "36090": 2.08365, "36095": 2.10543, "36100": 2.09696, "36105": 2.09914, "36110": 2.07888, "36115": 2.06383, "36120": 2.09019, "36125": 2.07944, "36130": 2.07411, "36135": 2.08272, "36140": 2.07751, "36145": 2.11403, "36150": 2.11321, "36155": 2.12162, "36160": 2.11535, "36165": 2.09072, "36170": 2.0966, "36175": 2.09451, "36180": 2.05754, "36185": 2.03252, "36190": 2.08423, "36195": 2.10832, "36200": 2.12671, "36205": 2.05221, "36210": 2.07793, "36215": 2.08576, "36220": 2.10673, "36225": 2.07887, "36230": 2.09707, "36235": 2.08292, "36240": 2.0927, "36245": 2.10184, "36250": 2.10734, "36255": 2.08186, "36260": 2.0799, "36265": 2.0832, "36270": 2.06474, "36275": 2.10618, "36280": 2.07786, "36285": 2.09413, "36290": 2.11833, "36295": 2.07236, "36300": 2.10806, "36305": 2.10604, "36310": 2.06872, "36315": 2.13194, "36320": 2.15341, "36325": 2.13107, "36330": 2.09881, "36335": 2.11305, "36340": 2.09278, "36345": 2.10756, "36350": 2.06228, "36355": 2.05823, "36360": 2.06965, "36365": 2.12845, "36370": 2.12884, "36375": 2.07911, "36380": 2.08388, "36385": 2.16533, "36390": 2.0982, "36395": 2.09914, "36400": 2.0904, "36405": 2.06819, "36410": 2.11489, "36415": 2.12479, "36420": 2.11876, "36425": 2.08238, "36430": 2.04837, "36435": 2.08198, "36440": 2.114, "36445": 2.09852, "36450": 2.1267, "36455": 2.05777, "36460": 2.09416, "36465": 2.10289, "36470": 2.07522, "36475": 2.0579, "36480": 2.09808, "36485": 2.09727, "36490": 2.09654, "36495": 2.09477, "36500": 2.07095, "36505": 2.09999, "36510": 2.11306, "36515": 2.08464, "36520": 2.13681, "36525": 2.05416, "36530": 2.09071, "36535": 2.08529, "36540": 2.07207, "36545": 2.11998, "36550": 2.08936, "36555": 2.09645, "36560": 2.05149, "36565": 2.07469, "36570": 2.11715, "36575": 2.12198, "36580": 2.12327, "36585": 2.07338, "36590": 2.06841, "36595": 2.11245, "36600": 2.04922, "36605": 2.11825, "36610": 2.09865, "36615": 2.089, "36620": 2.14509, "36625": 2.11014, "36630": 2.08872, "36635": 2.10537, "36640": 2.11161, "36645": 2.0828, "36650": 2.09338, "36655": 2.08406, "36660": 2.13958, "36665": 2.07987, "36670": 2.12204, "36675": 2.08612, "36680": 2.1236, "36685": 2.09102, "36690": 2.07409, "36695": 2.07372, "36700": 2.1287, "36705": 2.10349, "36710": 2.06371, "36715": 2.05097, "36720": 2.08664, "36725": 2.09382, "36730": 2.11354, "36735": 2.08164, "36740": 2.10957, "36745": 2.09617, "36750": 2.09736, "36755": 2.11694, "36760": 2.09708, "36765": 2.05945, "36770": 2.1069, "36775": 2.04877, "36780": 2.1201, "36785": 2.08374, "36790": 2.08496, "36795": 2.11805, "36800": 2.09743, "36805": 2.10726, "36810": 2.1031, "36815": 2.04266, "36820": 2.07953, "36825": 2.06775, "36830": 2.09626, "36835": 2.08079, "36840": 2.07665, "36845": 2.07291, "36850": 2.11909, "36855": 2.04861, "36860": 2.04335, "36865": 2.07619, "36870": 2.12056, "36875": 2.05392, "36880": 2.0738, "36885": 2.0989, "36890": 2.10862, "36895": 2.07061, "36900": 2.1007, "36905": 2.06791, "36910": 2.09834, "36915": 2.0934, "36920": 2.06981, "36925": 2.06923, "36930": 2.09539, "36935": 2.08179, "36940": 2.09136, "36945": 2.09013, "36950": 2.08564, "36955": 2.08359, "36960": 2.05782, "36965": 2.11101, "36970": 2.09612, "36975": 2.1037, "36980": 2.09594, "36985": 2.07019, "36990": 2.07039, "36995": 2.10852, "37000": 2.0329, "37005": 2.08764, "37010": 2.06858, "37015": 2.10086, "37020": 2.11415, "37025": 2.09255, "37030": 2.05853, "37035": 2.06252, "37040": 2.08433, "37045": 2.12745, "37050": 2.09809, "37055": 2.08484, "37060": 2.08536, "37065": 2.09927, "37070": 2.11805, "37075": 2.11902, "37080": 2.11407, "37085": 2.08759, "37090": 2.11905, "37095": 2.06784, "37100": 2.1284, "37105": 2.05312, "37110": 2.05376, "37115": 2.04806, "37120": 2.08175, "37125": 2.11834, "37130": 2.08196, "37135": 2.11798, "37140": 2.07421, "37145": 2.10672, "37150": 2.10047, "37155": 2.09666, "37160": 2.12162, "37165": 2.06313, "37170": 2.09093, "37175": 2.08197, "37180": 2.09668, "37185": 2.09546, "37190": 2.05553, "37195": 2.08889, "37200": 2.09433, "37205": 2.09743, "37210": 2.09899, "37215": 2.06046, "37220": 2.0515, "37225": 2.11017, "37230": 2.07173, "37235": 2.08977, "37240": 2.06812, "37245": 2.05989, "37250": 2.07032, "37255": 2.10835, "37260": 2.08107, "37265": 2.06739, "37270": 2.0617, "37275": 2.0512, "37280": 2.09058, "37285": 2.08453, "37290": 2.10073, "37295": 2.08214, "37300": 2.08893, "37305": 2.10688, "37310": 2.07646, "37315": 2.1191, "37320": 2.11261, "37325": 2.11608, "37330": 2.0897, "37335": 2.05058, "37340": 2.04733, "37345": 2.04723, "37350": 2.11586, "37355": 2.05422, "37360": 2.12296, "37365": 2.08658, "37370": 2.12385, "37375": 2.07954, "37380": 2.06884, "37385": 2.06487, "37390": 2.06533, "37395": 2.0912, "37400": 2.12045, "37405": 2.07963, "37410": 2.087, "37415": 2.08273, "37420": 2.10391, "37425": 2.10008, "37430": 2.10259, "37435": 2.05876, "37440": 2.07091, "37445": 2.09059, "37450": 2.08773, "37455": 2.11232, "37460": 2.12824, "37465": 2.08557, "37470": 2.12199, "37475": 2.08168, "37480": 2.05029, "37485": 2.03836, "37490": 2.11876, "37495": 2.09932, "37500": 2.11112, "37505": 2.10131, "37510": 2.08041, "37515": 2.08862, "37520": 2.09649, "37525": 2.09053, "37530": 2.09686, "37535": 2.02559, "37540": 2.09375, "37545": 2.06907, "37550": 2.0574, "37555": 2.11038, "37560": 2.10442, "37565": 2.09788, "37570": 2.06194, "37575": 2.08694, "37580": 2.11765, "37585": 2.09498, "37590": 2.04895, "37595": 2.09326, "37600": 2.08514, "37605": 2.10338, "37610": 2.07146, "37615": 2.09569, "37620": 2.09603, "37625": 2.10257, "37630": 2.13711, "37635": 2.09571, "37640": 2.06238, "37645": 2.05843, "37650": 2.08, "37655": 2.1569, "37660": 2.099, "37665": 2.06853, "37670": 2.10093, "37675": 2.07892, "37680": 2.0789, "37685": 2.1339, "37690": 2.1099, "37695": 2.06481, "37700": 2.0864, "37705": 2.07254, "37710": 2.11856, "37715": 2.09142, "37720": 2.07627, "37725": 2.0553, "37730": 2.10622, "37735": 2.1075, "37740": 2.12812, "37745": 2.10491, "37750": 2.09272, "37755": 2.1024, "37760": 2.08556, "37765": 2.11281, "37770": 2.06082, "37775": 2.10693, "37780": 2.08941, "37785": 2.09357, "37790": 2.11553, "37795": 2.09382, "37800": 2.10078, "37805": 2.08922, "37810": 2.08697, "37815": 2.07254, "37820": 2.07075, "37825": 2.11985, "37830": 2.12402, "37835": 2.09633, "37840": 2.05997, "37845": 2.15104, "37850": 2.12915, "37855": 2.1026, "37860": 2.12764, "37865": 2.11993, "37870": 2.03477, "37875": 2.07764, "37880": 2.10852, "37885": 2.05812, "37890": 2.08209, "37895": 2.1218, "37900": 2.08707, "37905": 2.07603, "37910": 2.05606, "37915": 2.10287, "37920": 2.13146, "37925": 2.09541, "37930": 2.08209, "37935": 2.09374, "37940": 2.07862, "37945": 2.0742, "37950": 2.11203, "37955": 2.09501, "37960": 2.11701, "37965": 2.10201, "37970": 2.01848, "37975": 2.06887, "37980": 2.0937, "37985": 2.0885, "37990": 2.07339, "37995": 2.06705, "38000": 2.11281, "38005": 2.09794, "38010": 2.09572, "38015": 2.06181, "38020": 2.07728, "38025": 2.08923, "38030": 2.07557, "38035": 2.08685, "38040": 2.09307, "38045": 2.07576, "38050": 2.06847, "38055": 2.11694, "38060": 2.09995, "38065": 2.11062, "38070": 2.10241, "38075": 2.07593, "38080": 2.07929, "38085": 2.096, "38090": 2.06605, "38095": 2.10665, "38100": 2.06053, "38105": 2.10291, "38110": 2.08651, "38115": 2.09548, "38120": 2.07451, "38125": 2.11724, "38130": 2.06035, "38135": 2.11699, "38140": 2.0509, "38145": 2.06729, "38150": 2.09799, "38155": 2.09034, "38160": 2.10451, "38165": 2.05558, "38170": 2.03879, "38175": 2.06806, "38180": 2.08744, "38185": 2.11254, "38190": 2.09325, "38195": 2.08078, "38200": 2.07654, "38205": 2.09812, "38210": 2.0818, "38215": 2.11477, "38220": 2.10804, "38225": 2.12157, "38230": 2.13795, "38235": 2.07172, "38240": 2.10158, "38245": 2.09696, "38250": 2.0671, "38255": 2.06681, "38260": 2.10119, "38265": 2.12847, "38270": 2.10511, "38275": 2.09866, "38280": 2.0548, "38285": 2.0812, "38290": 2.08799, "38295": 2.06146, "38300": 2.04745, "38305": 2.0377, "38310": 2.06217, "38315": 2.09641, "38320": 2.06788, "38325": 2.05172, "38330": 2.09972, "38335": 2.09701, "38340": 2.05547, "38345": 2.0868, "38350": 2.08974, "38355": 2.08142, "38360": 2.04539, "38365": 2.06925, "38370": 2.08496, "38375": 2.1224, "38380": 2.09218, "38385": 2.05884, "38390": 2.10878, "38395": 2.08968, "38400": 2.09349, "38405": 2.07948, "38410": 2.0898, "38415": 2.08567, "38420": 2.09113, "38425": 2.0866, "38430": 2.05261, "38435": 2.11523, "38440": 2.09354, "38445": 2.05315, "38450": 2.13358, "38455": 2.09884, "38460": 2.05591, "38465": 2.05941, "38470": 2.08045, "38475": 2.09381, "38480": 2.08872, "38485": 2.09672, "38490": 2.09492, "38495": 2.10822, "38500": 2.09104, "38505": 2.11219, "38510": 2.09945, "38515": 2.10085, "38520": 2.09362, "38525": 2.0912, "38530": 2.08783, "38535": 2.0907, "38540": 2.09151, "38545": 2.07638, "38550": 2.06372, "38555": 2.03855, "38560": 2.07875, "38565": 2.07187, "38570": 2.07062, "38575": 2.09227, "38580": 2.11472, "38585": 2.10308, "38590": 2.08726, "38595": 2.06962, "38600": 2.10393, "38605": 2.07108, "38610": 2.09216, "38615": 2.0896, "38620": 2.04921, "38625": 2.05438, "38630": 2.04774, "38635": 2.08927, "38640": 2.13178, "38645": 2.0735, "38650": 2.03127, "38655": 2.10625, "38660": 2.0709, "38665": 2.11872, "38670": 2.12776, "38675": 2.0674, "38680": 2.09449, "38685": 2.06242, "38690": 2.11025, "38695": 2.09581, "38700": 2.07727, "38705": 2.0851, "38710": 2.05874, "38715": 2.06778, "38720": 2.097, "38725": 2.05685, "38730": 2.07058, "38735": 2.05105, "38740": 2.0676, "38745": 2.10868, "38750": 2.0725, "38755": 2.06166, "38760": 2.0979, "38765": 2.06645, "38770": 2.07484, "38775": 2.07733, "38780": 2.02181, "38785": 2.09875, "38790": 2.04153, "38795": 2.1053, "38800": 2.09476, "38805": 2.08491, "38810": 2.08333, "38815": 2.07746, "38820": 2.06893, "38825": 2.09876, "38830": 2.11198, "38835": 2.08016, "38840": 2.11316, "38845": 2.09344, "38850": 2.07082, "38855": 2.05897, "38860": 2.07288, "38865": 2.10799, "38870": 2.09011, "38875": 2.07276, "38880": 2.12698, "38885": 2.07622, "38890": 2.10848, "38895": 2.08172, "38900": 2.07472, "38905": 2.10117, "38910": 2.08083, "38915": 2.08773, "38920": 2.05359, "38925": 2.06783, "38930": 2.1028, "38935": 2.08111, "38940": 2.06323, "38945": 2.06674, "38950": 2.0897, "38955": 2.09263, "38960": 2.05116, "38965": 2.08202, "38970": 2.09395, "38975": 2.08477, "38980": 2.06478, "38985": 2.07469, "38990": 2.08737, "38995": 2.06896, "39000": 2.11387, "39005": 2.11714, "39010": 2.11743, "39015": 2.07672, "39020": 2.10779, "39025": 2.1125, "39030": 2.068, "39035": 2.0836, "39040": 2.07847, "39045": 2.05892, "39050": 2.09245, "39055": 2.09012, "39060": 2.08525, "39065": 2.08961, "39070": 2.09749, "39075": 2.0877, "39080": 2.06621, "39085": 2.12025, "39090": 2.08728, "39095": 2.09745, "39100": 2.09934, "39105": 2.0656, "39110": 2.09421, "39115": 2.06636, "39120": 2.11263, "39125": 2.07893, "39130": 2.05584, "39135": 2.07095, "39140": 2.09587, "39145": 2.15938, "39150": 2.08116, "39155": 2.07165, "39160": 2.08095, "39165": 2.12037, "39170": 2.12416, "39175": 2.06829, "39180": 2.10451, "39185": 2.09205, "39190": 2.10611, "39195": 2.12085, "39200": 2.07126, "39205": 2.12502, "39210": 2.11334, "39215": 2.08461, "39220": 2.12484, "39225": 2.09614, "39230": 2.0715, "39235": 2.07915, "39240": 2.11056, "39245": 2.07854, "39250": 2.1204, "39255": 2.08475, "39260": 2.09531, "39265": 2.12887, "39270": 2.10845, "39275": 2.0486, "39280": 2.08286, "39285": 2.05937, "39290": 2.08695, "39295": 2.12007, "39300": 2.05169, "39305": 2.08547, "39310": 2.11371, "39315": 2.08057, "39320": 2.08118, "39325": 2.02911, "39330": 2.09076, "39335": 2.10663, "39340": 2.09185, "39345": 2.09055, "39350": 2.05805, "39355": 2.08532, "39360": 2.02908, "39365": 2.0702, "39370": 2.0754, "39375": 2.09284, "39380": 2.10268, "39385": 2.08848, "39390": 2.05846, "39395": 2.05918, "39400": 2.07805, "39405": 2.05412, "39410": 2.09034, "39415": 2.07053, "39420": 2.05445, "39425": 2.06084, "39430": 2.04048, "39435": 2.09205, "39440": 2.0705, "39445": 2.06351, "39450": 2.11113, "39455": 2.05035, "39460": 2.07891, "39465": 2.07852, "39470": 2.05121, "39475": 2.08743, "39480": 2.11056, "39485": 2.13227, "39490": 2.06767, "39495": 2.10299, "39500": 2.09538, "39505": 2.10587, "39510": 2.09488, "39515": 2.1154, "39520": 2.07393, "39525": 2.05536, "39530": 2.11599, "39535": 2.1312, "39540": 2.07455, "39545": 2.10348, "39550": 2.07348, "39555": 2.08225, "39560": 2.05938, "39565": 2.08528, "39570": 2.05263, "39575": 2.05412, "39580": 2.06365, "39585": 2.06932, "39590": 2.09971, "39595": 2.08976, "39600": 2.11109, "39605": 2.08415, "39610": 2.08662, "39615": 2.04056, "39620": 2.12599, "39625": 2.09969, "39630": 2.10186, "39635": 2.10695, "39640": 2.05416, "39645": 2.0785, "39650": 2.05971, "39655": 2.07608, "39660": 2.06764, "39665": 2.07382, "39670": 2.08418, "39675": 2.06891, "39680": 2.05643, "39685": 2.06488, "39690": 2.10561, "39695": 2.10029, "39700": 2.10304, "39705": 2.07715, "39710": 2.09282, "39715": 2.02594, "39720": 2.12744, "39725": 2.07843, "39730": 2.08487, "39735": 2.09483, "39740": 2.10644, "39745": 2.07035, "39750": 2.07698, "39755": 2.09308, "39760": 2.07513, "39765": 2.12608, "39770": 2.09568, "39775": 2.07023, "39780": 2.09705, "39785": 2.06258, "39790": 2.0703, "39795": 2.09403, "39800": 2.0803, "39805": 2.1296, "39810": 2.06912, "39815": 2.08517, "39820": 2.03529, "39825": 2.0774, "39830": 2.09635, "39835": 2.08483, "39840": 2.11583, "39845": 2.08912, "39850": 2.05956, "39855": 2.07887, "39860": 2.08148, "39865": 2.07311, "39870": 2.07233, "39875": 2.08888, "39880": 2.04558, "39885": 2.09395, "39890": 2.07499, "39895": 2.06258, "39900": 2.05975, "39905": 2.09652, "39910": 2.08276, "39915": 2.04883, "39920": 2.06341, "39925": 2.1038, "39930": 2.09296, "39935": 2.06986, "39940": 2.11162, "39945": 2.09735, "39950": 2.03505, "39955": 2.10168, "39960": 2.10669, "39965": 2.11196, "39970": 2.04714, "39975": 2.07715, "39980": 2.09324, "39985": 2.06229, "39990": 2.08255, "39995": 2.09518, "40000": 2.07423, "40005": 2.06511, "40010": 2.06668, "40015": 2.10979, "40020": 2.07613, "40025": 2.08849, "40030": 2.0924, "40035": 2.08912, "40040": 2.07605, "40045": 2.06855, "40050": 2.09862, "40055": 2.1155, "40060": 2.11529, "40065": 2.11195, "40070": 2.07395, "40075": 2.06336, "40080": 2.09402, "40085": 2.08089, "40090": 2.09611, "40095": 2.0888, "40100": 2.07298, "40105": 2.02694, "40110": 2.08001, "40115": 2.07319, "40120": 2.0579, "40125": 2.06984, "40130": 2.05931, "40135": 2.0874, "40140": 2.09326, "40145": 2.06445, "40150": 2.11889, "40155": 2.06141, "40160": 2.06731, "40165": 2.08565, "40170": 2.06331, "40175": 2.12131, "40180": 2.10553, "40185": 2.0594, "40190": 2.08577, "40195": 2.04786, "40200": 2.05211, "40205": 2.08038, "40210": 2.0752, "40215": 2.08789, "40220": 2.09214, "40225": 2.05382, "40230": 2.12094, "40235": 2.06809, "40240": 2.06583, "40245": 2.08302, "40250": 2.11374, "40255": 2.1181, "40260": 2.07883, "40265": 2.04777, "40270": 2.0568, "40275": 2.09355, "40280": 2.07709, "40285": 2.08079, "40290": 2.04802, "40295": 2.06831, "40300": 2.05792, "40305": 2.10277, "40310": 2.1197, "40315": 2.08263, "40320": 2.11435, "40325": 2.09092, "40330": 2.09738, "40335": 2.10708, "40340": 2.09547, "40345": 2.08429, "40350": 2.06844, "40355": 2.09557, "40360": 2.08942, "40365": 2.0672, "40370": 2.11207, "40375": 2.08951, "40380": 2.0991, "40385": 2.0664, "40390": 2.09872, "40395": 2.08173, "40400": 2.04071, "40405": 2.05733, "40410": 2.07743, "40415": 2.06207, "40420": 2.05504, "40425": 2.05734, "40430": 2.0985, "40435": 2.04615, "40440": 2.0505, "40445": 2.109, "40450": 2.05844, "40455": 2.02543, "40460": 2.05308, "40465": 2.08298, "40470": 2.07305, "40475": 2.0824, "40480": 2.07025, "40485": 2.0627, "40490": 2.13214, "40495": 2.06913, "40500": 2.05339, "40505": 2.07901, "40510": 2.02093, "40515": 2.0808, "40520": 2.09915, "40525": 2.11446, "40530": 2.08513, "40535": 2.04222, "40540": 2.08159, "40545": 2.10135, "40550": 2.05287, "40555": 2.08557, "40560": 2.11253, "40565": 2.08123, "40570": 2.04822, "40575": 2.0531, "40580": 2.09237, "40585": 2.05862, "40590": 2.0588, "40595": 2.08424, "40600": 2.0546, "40605": 2.04853, "40610": 2.06884, "40615": 2.09022, "40620": 2.0468, "40625": 2.06713, "40630": 2.11143, "40635": 2.09791, "40640": 2.07189, "40645": 2.06255, "40650": 2.09327, "40655": 2.1162, "40660": 2.05706, "40665": 2.06682, "40670": 2.08825, "40675": 2.06794, "40680": 2.08669, "40685": 2.08471, "40690": 2.08277, "40695": 2.07555, "40700": 2.0808, "40705": 2.06346, "40710": 2.06014, "40715": 2.12917, "40720": 2.05981, "40725": 2.02343, "40730": 2.06056, "40735": 2.07862, "40740": 2.11028, "40745": 2.07632, "40750": 2.07207, "40755": 2.0746, "40760": 2.10497, "40765": 2.11443, "40770": 2.03471, "40775": 2.06379, "40780": 2.04664, "40785": 2.08607, "40790": 2.09151, "40795": 2.11088, "40800": 2.05047, "40805": 2.04712, "40810": 2.04521, "40815": 2.07849, "40820": 2.10373, "40825": 2.07786, "40830": 2.0711, "40835": 2.07691, "40840": 2.09709, "40845": 2.10066, "40850": 2.0651, "40855": 2.08425, "40860": 2.06627, "40865": 2.09428, "40870": 2.07111, "40875": 2.0497, "40880": 2.06621, "40885": 2.04424, "40890": 2.10821, "40895": 2.09083, "40900": 2.09011, "40905": 2.05698, "40910": 2.09812, "40915": 2.07387, "40920": 2.08933, "40925": 2.09706, "40930": 2.09335, "40935": 2.07659, "40940": 2.05729, "40945": 2.06666, "40950": 2.07861, "40955": 2.11324, "40960": 2.10006, "40965": 2.06479, "40970": 2.07444, "40975": 2.07887, "40980": 2.07841, "40985": 2.08518, "40990": 2.08796, "40995": 2.10178, "41000": 2.0767, "41005": 2.05895, "41010": 2.07418, "41015": 2.11522, "41020": 2.05984, "41025": 2.122, "41030": 2.03832, "41035": 2.07513, "41040": 2.07448, "41045": 2.06816, "41050": 2.07568, "41055": 2.11037, "41060": 2.08932, "41065": 2.09183, "41070": 2.13355, "41075": 2.10386, "41080": 2.11174, "41085": 2.05856, "41090": 2.08675, "41095": 2.07067, "41100": 2.06726, "41105": 2.09689, "41110": 2.07754, "41115": 2.08348, "41120": 2.09345, "41125": 2.08684, "41130": 2.04584, "41135": 2.0648, "41140": 2.07722, "41145": 2.04459, "41150": 2.0483, "41155": 2.07171, "41160": 2.10307, "41165": 2.06609, "41170": 2.09385, "41175": 2.07948, "41180": 2.07138, "41185": 2.05315, "41190": 2.03484, "41195": 2.04446, "41200": 2.01454, "41205": 2.02225, "41210": 2.09158, "41215": 2.12475, "41220": 2.07461, "41225": 2.10966, "41230": 2.01784, "41235": 2.08458, "41240": 2.08557, "41245": 2.06194, "41250": 2.07814, "41255": 2.05845, "41260": 2.07525, "41265": 2.05029, "41270": 2.0943, "41275": 2.08223, "41280": 2.08298, "41285": 2.11752, "41290": 2.12376, "41295": 2.04346, "41300": 2.16521, "41305": 2.03343, "41310": 2.07837, "41315": 2.11334, "41320": 2.10063, "41325": 2.05674, "41330": 2.07478, "41335": 2.09251, "41340": 2.12365, "41345": 2.10347, "41350": 2.08033, "41355": 2.07882, "41360": 2.06111, "41365": 2.11377, "41370": 2.08395, "41375": 2.07822, "41380": 2.03787, "41385": 2.05773, "41390": 2.02909, "41395": 2.12722, "41400": 2.116, "41405": 2.12941, "41410": 2.06425, "41415": 2.0678, "41420": 2.05707, "41425": 2.08571, "41430": 2.08664, "41435": 2.10082, "41440": 2.07893, "41445": 2.0838, "41450": 2.07243, "41455": 2.05434, "41460": 2.08537, "41465": 2.07911, "41470": 2.10937, "41475": 2.06205, "41480": 2.06688, "41485": 2.07735, "41490": 2.08783, "41495": 2.08196, "41500": 2.09047, "41505": 2.0454, "41510": 2.10528, "41515": 2.08207, "41520": 2.09555, "41525": 2.09028, "41530": 2.03326, "41535": 2.0673, "41540": 2.0836, "41545": 2.08264, "41550": 2.05129, "41555": 2.0625, "41560": 2.10098, "41565": 2.04389, "41570": 2.0583, "41575": 2.09251, "41580": 2.09157, "41585": 2.1103, "41590": 2.06779, "41595": 2.07435, "41600": 2.05673, "41605": 2.09348, "41610": 2.04149, "41615": 2.11103, "41620": 2.08378, "41625": 2.07137, "41630": 2.08226, "41635": 2.10895, "41640": 2.08359, "41645": 2.10859, "41650": 2.04875, "41655": 2.08935, "41660": 2.08874, "41665": 2.06928, "41670": 2.05182, "41675": 2.06886, "41680": 2.04768, "41685": 2.10694, "41690": 2.08236, "41695": 2.04464, "41700": 2.08345, "41705": 2.0889, "41710": 2.11796, "41715": 2.04866, "41720": 2.06064, "41725": 2.05348, "41730": 2.09199, "41735": 2.09867, "41740": 2.09189, "41745": 2.08653, "41750": 2.11154, "41755": 2.06861, "41760": 2.06461, "41765": 2.08184, "41770": 2.09216, "41775": 2.08664, "41780": 2.08354, "41785": 2.06544, "41790": 2.0713, "41795": 2.12378, "41800": 2.08981, "41805": 2.08774, "41810": 2.09156, "41815": 2.12524, "41820": 2.11219, "41825": 2.03512, "41830": 2.10795, "41835": 2.0592, "41840": 2.04883, "41845": 2.1018, "41850": 2.03544, "41855": 2.10964, "41860": 2.05692, "41865": 2.08051, "41870": 2.09204, "41875": 2.10908, "41880": 2.08301, "41885": 2.08741, "41890": 2.10168, "41895": 2.0685, "41900": 2.12787, "41905": 2.06966, "41910": 2.04849, "41915": 2.10455, "41920": 2.09399, "41925": 2.05248, "41930": 2.09956, "41935": 2.06585, "41940": 2.10853, "41945": 2.06547, "41950": 2.07495, "41955": 2.06417, "41960": 2.12818, "41965": 2.04319, "41970": 2.12052, "41975": 2.05181, "41980": 2.09068, "41985": 2.07184, "41990": 2.05654, "41995": 2.08225, "42000": 2.07513, "42005": 2.11512, "42010": 2.08214, "42015": 2.08641, "42020": 2.08292, "42025": 2.07048, "42030": 2.06631, "42035": 2.10543, "42040": 2.10948, "42045": 2.06191, "42050": 2.04638, "42055": 2.0878, "42060": 2.08236, "42065": 2.04928, "42070": 2.09144, "42075": 2.09526, "42080": 2.08457, "42085": 2.04194, "42090": 2.08138, "42095": 2.02928, "42100": 2.0423, "42105": 2.0723, "42110": 2.09499, "42115": 2.07763, "42120": 2.06312, "42125": 2.05136, "42130": 2.09118, "42135": 2.06496, "42140": 2.06116, "42145": 2.05868, "42150": 2.07282, "42155": 2.06926, "42160": 2.09628, "42165": 2.03565, "42170": 2.08957, "42175": 2.0624, "42180": 2.10171, "42185": 2.08737, "42190": 2.0968, "42195": 2.06122, "42200": 2.06368, "42205": 2.08078, "42210": 2.07907, "42215": 2.07125, "42220": 2.02899, "42225": 2.08024, "42230": 2.08089, "42235": 2.09026, "42240": 2.07657, "42245": 2.10448, "42250": 2.11996, "42255": 2.07325, "42260": 2.09636, "42265": 2.0841, "42270": 2.10772, "42275": 2.07334, "42280": 2.08232, "42285": 2.0838, "42290": 2.05245, "42295": 2.04622, "42300": 2.0363, "42305": 2.0565, "42310": 2.08595, "42315": 2.0673, "42320": 2.09509, "42325": 2.0772, "42330": 2.07879, "42335": 2.07043, "42340": 2.11083, "42345": 2.07244, "42350": 2.08355, "42355": 2.02736, "42360": 2.0482, "42365": 2.08499, "42370": 2.04556, "42375": 2.08809, "42380": 2.08068, "42385": 2.0549, "42390": 2.04897, "42395": 2.11601, "42400": 2.1003, "42405": 2.10778, "42410": 2.07847, "42415": 2.04966, "42420": 2.09866, "42425": 2.11442, "42430": 2.03384, "42435": 2.07536, "42440": 2.08428, "42445": 2.09515, "42450": 2.09219, "42455": 2.11308, "42460": 2.10119, "42465": 2.10229, "42470": 2.05668, "42475": 2.05469, "42480": 2.05005, "42485": 2.06977, "42490": 2.09092, "42495": 2.05359, "42500": 2.0935, "42505": 2.09216, "42510": 2.13142, "42515": 2.0733, "42520": 2.02748, "42525": 2.07738, "42530": 2.09012, "42535": 2.104, "42540": 2.06633, "42545": 2.10231, "42550": 2.0823, "42555": 2.06168, "42560": 2.03475, "42565": 2.0961, "42570": 2.0522, "42575": 2.03053, "42580": 2.06218, "42585": 2.09259, "42590": 2.0647, "42595": 2.06055, "42600": 2.06056, "42605": 2.01538, "42610": 2.06836, "42615": 2.09273, "42620": 2.09954, "42625": 2.08517, "42630": 2.0625, "42635": 2.07727, "42640": 2.08621, "42645": 2.05713, "42650": 2.06601, "42655": 2.09113, "42660": 2.05906, "42665": 2.0446, "42670": 2.1078, "42675": 2.02854, "42680": 2.06294, "42685": 2.07149, "42690": 2.05458, "42695": 2.0839, "42700": 2.0815, "42705": 2.03957, "42710": 2.11119, "42715": 2.0444, "42720": 2.08064, "42725": 2.05266, "42730": 2.03503, "42735": 2.06178, "42740": 2.08476, "42745": 2.03774, "42750": 2.08158, "42755": 2.02353, "42760": 2.07248, "42765": 2.07529, "42770": 2.09502, "42775": 2.10077, "42780": 2.05616, "42785": 2.1084, "42790": 2.03896, "42795": 2.05902, "42800": 2.10963, "42805": 2.08544, "42810": 2.07903, "42815": 2.12139, "42820": 2.08165, "42825": 2.07717, "42830": 2.07892, "42835": 2.11222, "42840": 2.03365, "42845": 2.09206, "42850": 2.10093, "42855": 2.10423, "42860": 2.08314, "42865": 2.10397, "42870": 2.05725, "42875": 2.06764, "42880": 2.06961, "42885": 2.05725, "42890": 2.0973, "42895": 2.07779, "42900": 2.06947, "42905": 2.09148, "42910": 2.09962, "42915": 2.03995, "42920": 2.05344, "42925": 2.10416, "42930": 2.07758, "42935": 2.09621, "42940": 2.05547, "42945": 2.05045, "42950": 2.06926, "42955": 2.04572, "42960": 2.09741, "42965": 2.11589, "42970": 2.03658, "42975": 2.05119, "42980": 2.05176, "42985": 2.08361, "42990": 2.02177, "42995": 2.06717, "43000": 2.04306, "43005": 2.09162, "43010": 2.09384, "43015": 2.05201, "43020": 2.05159, "43025": 2.07182, "43030": 2.04818, "43035": 2.07555, "43040": 2.09442, "43045": 2.03288, "43050": 2.05555, "43055": 2.06702, "43060": 2.08461, "43065": 2.03689, "43070": 2.0955, "43075": 2.07471, "43080": 2.03019, "43085": 2.09004, "43090": 2.11913, "43095": 2.03116, "43100": 2.07926, "43105": 2.06862, "43110": 2.1023, "43115": 2.07665, "43120": 2.09448, "43125": 2.08793, "43130": 2.14207, "43135": 2.07575, "43140": 2.08883, "43145": 2.10815, "43150": 2.10007, "43155": 2.06539, "43160": 2.05003, "43165": 2.06508, "43170": 2.11875, "43175": 2.07729, "43180": 2.08957, "43185": 2.07935, "43190": 2.10667, "43195": 2.11489, "43200": 2.09235, "43205": 2.0008, "43210": 2.08422, "43215": 2.09208, "43220": 2.07418, "43225": 2.1338, "43230": 2.0387, "43235": 2.11472, "43240": 2.07987, "43245": 2.06842, "43250": 2.12135, "43255": 2.05191, "43260": 2.11693, "43265": 2.04773, "43270": 2.09463, "43275": 2.10897, "43280": 2.06733, "43285": 2.07204, "43290": 2.09203, "43295": 2.07909, "43300": 2.09801, "43305": 2.14743, "43310": 2.09629, "43315": 2.08191, "43320": 2.08652, "43325": 2.06802, "43330": 2.07745, "43335": 2.06601, "43340": 2.06986, "43345": 2.03398, "43350": 2.10648, "43355": 2.09641, "43360": 2.07351, "43365": 2.04949, "43370": 2.08538, "43375": 2.09957, "43380": 2.07285, "43385": 2.07447, "43390": 2.09646, "43395": 2.09343, "43400": 2.06732, "43405": 2.04516, "43410": 2.07857, "43415": 2.047, "43420": 2.07193, "43425": 2.06356, "43430": 2.06897, "43435": 2.08351, "43440": 2.07519, "43445": 2.0888, "43450": 2.03429, "43455": 2.10607, "43460": 2.11304, "43465": 2.09489, "43470": 2.08375, "43475": 2.10035, "43480": 2.06778, "43485": 2.08066, "43490": 2.09572, "43495": 2.08466, "43500": 2.06023, "43505": 2.06444, "43510": 2.0603, "43515": 2.05515, "43520": 2.08946, "43525": 2.02199, "43530": 2.08006, "43535": 2.0822, "43540": 2.08428, "43545": 2.05509, "43550": 2.07408, "43555": 2.03571, "43560": 2.06103, "43565": 2.09633, "43570": 2.05694, "43575": 2.07375, "43580": 2.08294, "43585": 2.01889, "43590": 2.08947, "43595": 2.05915, "43600": 2.06503, "43605": 2.10003, "43610": 2.0901, "43615": 2.04958, "43620": 2.09243, "43625": 2.10239, "43630": 2.06048, "43635": 2.04659, "43640": 2.03183, "43645": 2.06046, "43650": 2.09965, "43655": 2.09332, "43660": 2.07469, "43665": 2.05748, "43670": 2.04556, "43675": 2.05319, "43680": 2.08559, "43685": 2.03085, "43690": 2.05421, "43695": 2.09624, "43700": 2.07372, "43705": 2.08353, "43710": 2.06558, "43715": 2.05329, "43720": 2.12115, "43725": 2.10011, "43730": 2.05192, "43735": 2.07791, "43740": 2.04145, "43745": 2.04325, "43750": 2.08061, "43755": 2.05136, "43760": 2.08879, "43765": 2.08737, "43770": 2.06882, "43775": 2.04713, "43780": 2.10811, "43785": 2.05913, "43790": 2.09654, "43795": 2.0761, "43800": 2.11572, "43805": 2.09709, "43810": 2.08332, "43815": 2.07388, "43820": 2.04734, "43825": 2.14198, "43830": 2.05032, "43835": 2.08765, "43840": 2.06187, "43845": 2.08884, "43850": 2.07438, "43855": 2.06475, "43860": 2.05985, "43865": 2.11992, "43870": 2.06839, "43875": 2.0365, "43880": 2.06781, "43885": 2.09657, "43890": 2.02466, "43895": 2.06351, "43900": 2.06108, "43905": 2.04822, "43910": 2.13155, "43915": 2.12346, "43920": 2.03737, "43925": 2.04717, "43930": 2.05956, "43935": 2.04218, "43940": 2.07343, "43945": 2.02458, "43950": 2.10603, "43955": 2.03639, "43960": 2.09794, "43965": 2.06394, "43970": 2.09931, "43975": 2.05436, "43980": 2.03758, "43985": 2.07417, "43990": 2.04059, "43995": 2.08325, "44000": 2.05083, "44005": 2.04069, "44010": 2.1, "44015": 2.04478, "44020": 2.08181, "44025": 2.10194, "44030": 2.07517, "44035": 2.05349, "44040": 2.00233, "44045": 2.11308, "44050": 2.08724, "44055": 2.10106, "44060": 2.1098, "44065": 2.05859, "44070": 2.08198, "44075": 2.09282, "44080": 2.01431, "44085": 2.0755, "44090": 2.04394, "44095": 2.06503, "44100": 2.09792, "44105": 2.09529, "44110": 2.06285, "44115": 2.07085, "44120": 2.05106, "44125": 2.07711, "44130": 2.0896, "44135": 2.06238, "44140": 2.05121, "44145": 2.05642, "44150": 2.10515, "44155": 2.09161, "44160": 2.09485, "44165": 2.08764, "44170": 2.05483, "44175": 2.06931, "44180": 2.07595, "44185": 2.05181, "44190": 2.0927, "44195": 2.03635, "44200": 2.05171, "44205": 2.07914, "44210": 2.0539, "44215": 2.06433, "44220": 2.07594, "44225": 2.06876, "44230": 2.08284, "44235": 2.05748, "44240": 2.11157, "44245": 2.06867, "44250": 2.06986, "44255": 2.08101, "44260": 2.04105, "44265": 2.04843, "44270": 2.08449, "44275": 2.04229, "44280": 2.07827, "44285": 2.14006, "44290": 2.09569, "44295": 2.11423, "44300": 2.06647, "44305": 2.05478, "44310": 2.05338, "44315": 2.04805, "44320": 2.04626, "44325": 2.07416, "44330": 2.0973, "44335": 2.08394, "44340": 2.08287, "44345": 2.07486, "44350": 2.06219, "44355": 2.09358, "44360": 2.10338, "44365": 2.08279, "44370": 2.08104, "44375": 2.07704, "44380": 2.08471, "44385": 2.06506, "44390": 2.08104, "44395": 2.10675, "44400": 2.06882, "44405": 2.04986, "44410": 2.0559, "44415": 2.08284, "44420": 2.07196, "44425": 2.09685, "44430": 2.05623, "44435": 2.07801, "44440": 2.02748, "44445": 2.11459, "44450": 2.05565, "44455": 2.07951, "44460": 2.04412, "44465": 2.06022, "44470": 2.086, "44475": 2.09117, "44480": 2.06065, "44485": 2.06062, "44490": 2.06618, "44495": 2.09329, "44500": 2.10228, "44505": 2.06463, "44510": 2.06722, "44515": 2.05682, "44520": 2.04088, "44525": 2.03297, "44530": 2.06855, "44535": 2.04371, "44540": 2.10056, "44545": 2.10452, "44550": 2.06015, "44555": 2.10775, "44560": 2.03393, "44565": 2.07725, "44570": 2.05542, "44575": 2.10881, "44580": 2.07919, "44585": 2.03041, "44590": 2.0813, "44595": 2.06407, "44600": 2.08103, "44605": 2.05985, "44610": 2.10419, "44615": 2.05564, "44620": 2.03585, "44625": 2.0746, "44630": 2.09028, "44635": 2.05796, "44640": 2.02683, "44645": 2.05162, "44650": 2.08798, "44655": 2.06345, "44660": 2.08736, "44665": 2.09004, "44670": 2.08785, "44675": 2.0957, "44680": 2.1098, "44685": 2.09203, "44690": 2.05861, "44695": 2.07068, "44700": 2.06504, "44705": 2.04873, "44710": 2.09704, "44715": 2.03781, "44720": 2.0601, "44725": 2.07218, "44730": 2.10104, "44735": 2.04234, "44740": 2.03693, "44745": 2.0431, "44750": 2.10322, "44755": 2.10183, "44760": 2.11621, "44765": 2.07358, "44770": 2.07254, "44775": 2.05581, "44780": 2.04454, "44785": 2.08112, "44790": 2.09669, "44795": 2.07797, "44800": 2.05667, "44805": 2.08126, "44810": 2.08845, "44815": 2.08832, "44820": 2.08289, "44825": 2.12158, "44830": 2.04808, "44835": 2.1061, "44840": 2.08943, "44845": 2.10235, "44850": 2.09351, "44855": 2.08059, "44860": 2.0589, "44865": 2.11994, "44870": 2.07798, "44875": 2.09537, "44880": 2.07951, "44885": 2.10296, "44890": 2.0517, "44895": 2.09907, "44900": 2.04524, "44905": 2.04471, "44910": 2.06054, "44915": 2.07391, "44920": 2.08053, "44925": 2.04236, "44930": 2.07767, "44935": 2.01301, "44940": 2.099, "44945": 2.07217, "44950": 2.08666, "44955": 2.01364, "44960": 2.13703, "44965": 2.07165, "44970": 2.06256, "44975": 2.09533, "44980": 2.06059, "44985": 2.06828, "44990": 2.10291, "44995": 2.02582, "45000": 2.08661, "45005": 2.092, "45010": 2.06208, "45015": 2.06865, "45020": 2.06112, "45025": 2.0737, "45030": 2.07814, "45035": 2.07086, "45040": 2.05558, "45045": 2.08815, "45050": 2.10255, "45055": 2.07395, "45060": 2.07826, "45065": 2.08179, "45070": 2.05673, "45075": 2.04645, "45080": 2.05061, "45085": 2.04657, "45090": 2.06115, "45095": 2.08441, "45100": 2.07194, "45105": 2.12497, "45110": 2.0764, "45115": 2.06327, "45120": 2.12628, "45125": 2.08423, "45130": 2.05235, "45135": 2.05106, "45140": 2.04955, "45145": 2.04621, "45150": 2.05486, "45155": 2.06525, "45160": 2.06823, "45165": 2.1003, "45170": 2.07848, "45175": 2.05128, "45180": 2.08513, "45185": 2.07316, "45190": 2.07675, "45195": 2.09053, "45200": 2.0785, "45205": 2.05489, "45210": 2.09959, "45215": 2.07099, "45220": 2.03113, "45225": 2.08213, "45230": 2.09341, "45235": 2.09092, "45240": 2.08358, "45245": 2.09999, "45250": 2.01934, "45255": 2.04624, "45260": 2.08656, "45265": 2.08026, "45270": 2.05554, "45275": 2.06222, "45280": 2.02015, "45285": 2.0674, "45290": 2.06442, "45295": 2.10711, "45300": 2.04213, "45305": 2.06357, "45310": 2.07024, "45315": 2.09541, "45320": 2.10993, "45325": 2.04668, "45330": 2.06318, "45335": 2.08867, "45340": 2.10571, "45345": 2.07293, "45350": 2.07724, "45355": 2.06703, "45360": 2.03543, "45365": 2.08363, "45370": 2.09131, "45375": 2.08236, "45380": 2.09009, "45385": 2.12009, "45390": 2.11422, "45395": 2.08668, "45400": 2.07073, "45405": 2.05979, "45410": 2.07236, "45415": 2.12861, "45420": 2.08167, "45425": 2.06616, "45430": 2.04251, "45435": 2.04825, "45440": 2.06656, "45445": 2.08193, "45450": 2.09456, "45455": 2.05757, "45460": 2.07132, "45465": 2.04904, "45470": 2.04075, "45475": 2.08682, "45480": 2.08922, "45485": 2.07306, "45490": 2.06964, "45495": 2.03862, "45500": 2.03698, "45505": 2.08783, "45510": 2.09998, "45515": 2.0634, "45520": 2.04652, "45525": 2.0567, "45530": 2.11812, "45535": 2.04591, "45540": 2.0438, "45545": 2.04093, "45550": 2.09339, "45555": 2.05127, "45560": 2.0596, "45565": 2.10353, "45570": 2.04244, "45575": 2.0674, "45580": 2.06381, "45585": 2.11948, "45590": 2.00055, "45595": 2.07955, "45600": 2.06168, "45605": 2.10809, "45610": 2.0974, "45615": 2.03887, "45620": 2.07277, "45625": 2.06586, "45630": 2.06493, "45635": 2.09444, "45640": 2.07735, "45645": 2.09775, "45650": 2.04411, "45655": 2.03688, "45660": 2.07298, "45665": 2.10081, "45670": 2.07351, "45675": 2.03638, "45680": 2.06227, "45685": 2.07458, "45690": 2.04039, "45695": 2.11336, "45700": 2.06345, "45705": 2.00797, "45710": 2.09728, "45715": 2.0965, "45720": 2.03696, "45725": 2.09781, "45730": 2.05652, "45735": 2.03839, "45740": 2.12157, "45745": 2.04759, "45750": 2.03777, "45755": 2.09905, "45760": 2.05753, "45765": 2.04561, "45770": 2.08379, "45775": 2.07825, "45780": 2.03534, "45785": 2.04599, "45790": 2.08417, "45795": 2.07385, "45800": 2.09757, "45805": 2.07854, "45810": 2.04932, "45815": 2.08105, "45820": 2.09787, "45825": 2.05382, "45830": 2.0252, "45835": 2.04721, "45840": 2.0645, "45845": 2.03467, "45850": 2.07713, "45855": 2.07118, "45860": 2.11606, "45865": 2.08875, "45870": 2.02727, "45875": 2.12792, "45880": 2.09336, "45885": 2.05513, "45890": 2.04859, "45895": 2.06387, "45900": 2.05261, "45905": 2.09783, "45910": 2.06537, "45915": 2.09759, "45920": 2.10227, "45925": 2.09173, "45930": 2.0377, "45935": 2.1007, "45940": 2.08322, "45945": 2.09697, "45950": 2.05472, "45955": 2.04674, "45960": 2.07265, "45965": 2.09474, "45970": 2.06209, "45975": 2.07634, "45980": 2.11539, "45985": 2.08114, "45990": 2.07029, "45995": 2.06545, "46000": 2.08835, "46005": 2.10212, "46010": 2.12707, "46015": 2.08968, "46020": 2.02295, "46025": 2.03342, "46030": 2.04547, "46035": 2.0758, "46040": 2.09118, "46045": 2.07616, "46050": 2.09677, "46055": 2.08879, "46060": 2.04403, "46065": 2.05921, "46070": 2.05552, "46075": 2.08168, "46080": 2.07228, "46085": 2.08273, "46090": 2.03268, "46095": 2.0587, "46100": 2.04301, "46105": 2.08828, "46110": 2.06404, "46115": 2.04297, "46120": 2.0346, "46125": 2.0817, "46130": 2.05674, "46135": 2.05139, "46140": 2.08065, "46145": 2.05708, "46150": 2.06777, "46155": 2.06538, "46160": 2.0699, "46165": 2.09022, "46170": 2.10352, "46175": 2.07434, "46180": 2.0556, "46185": 2.02399, "46190": 2.081, "46195": 2.06918, "46200": 2.06104, "46205": 2.09021, "46210": 2.02172, "46215": 2.03147, "46220": 2.06854, "46225": 2.0795, "46230": 2.02102, "46235": 2.07348, "46240": 2.04681, "46245": 2.06378, "46250": 2.05936, "46255": 2.07176, "46260": 2.08339, "46265": 2.08049, "46270": 2.06525, "46275": 2.06906, "46280": 2.08766, "46285": 2.05395, "46290": 2.06726, "46295": 2.05224, "46300": 2.07607, "46305": 2.09359, "46310": 2.04845, "46315": 2.09625, "46320": 2.05948, "46325": 2.07596, "46330": 2.02107, "46335": 2.05939, "46340": 2.09293, "46345": 2.08505, "46350": 2.07212, "46355": 2.05036, "46360": 2.07329, "46365": 2.10417, "46370": 2.05127, "46375": 2.06674, "46380": 2.07415, "46385": 2.04332, "46390": 2.0636, "46395": 2.08024, "46400": 2.03925, "46405": 2.05582, "46410": 2.08831, "46415": 2.04931, "46420": 2.0509, "46425": 2.06122, "46430": 2.081, "46435": 2.03905, "46440": 2.11057, "46445": 2.05456, "46450": 2.03935, "46455": 2.07361, "46460": 2.07346, "46465": 2.04657, "46470": 2.00548, "46475": 2.09947, "46480": 2.09781, "46485": 2.08374, "46490": 2.11113, "46495": 2.10681, "46500": 2.08073, "46505": 2.06179, "46510": 2.07983, "46515": 2.0742, "46520": 2.01315, "46525": 2.08122, "46530": 2.05192, "46535": 2.08144, "46540": 2.0836, "46545": 2.05057, "46550": 2.06822, "46555": 2.08865, "46560": 2.05758, "46565": 2.09305, "46570": 2.06846, "46575": 2.08182, "46580": 2.0686, "46585": 2.06143, "46590": 2.05525, "46595": 2.06589, "46600": 2.09097, "46605": 2.08104, "46610": 2.10015, "46615": 2.06426, "46620": 2.06218, "46625": 2.07348, "46630": 2.08708, "46635": 2.06289, "46640": 2.10711, "46645": 2.09421, "46650": 2.03055, "46655": 2.03176, "46660": 2.0548, "46665": 2.09709, "46670": 2.07816, "46675": 2.05073, "46680": 2.03824, "46685": 2.03203, "46690": 2.09415, "46695": 2.06167, "46700": 2.03975, "46705": 2.06212, "46710": 2.04602, "46715": 2.10012, "46720": 2.08631, "46725": 2.07979, "46730": 2.06635, "46735": 2.04778, "46740": 2.08374, "46745": 2.06621, "46750": 2.09346, "46755": 2.05357, "46760": 2.07269, "46765": 2.06714, "46770": 2.05264, "46775": 2.08997, "46780": 2.11257, "46785": 2.07023, "46790": 2.09003, "46795": 2.08601, "46800": 2.07127, "46805": 2.06729, "46810": 2.07723, "46815": 2.03121, "46820": 2.05688, "46825": 2.07547, "46830": 2.0983, "46835": 2.06899, "46840": 2.06898, "46845": 2.04931, "46850": 2.11269, "46855": 2.08181, "46860": 2.05215, "46865": 2.1028, "46870": 2.07398, "46875": 2.01718, "46880": 2.12429, "46885": 2.09545, "46890": 2.08384, "46895": 2.05321, "46900": 2.01412, "46905": 2.0657, "46910": 2.03995, "46915": 2.05352, "46920": 2.04331, "46925": 2.0725, "46930": 2.08022, "46935": 2.0631, "46940": 2.08993, "46945": 2.07126, "46950": 2.04452, "46955": 2.05541, "46960": 2.09192, "46965": 2.07285, "46970": 2.01991, "46975": 2.04064, "46980": 2.10022, "46985": 2.0948, "46990": 2.07432, "46995": 2.03097, "47000": 2.07614, "47005": 2.08427, "47010": 2.05974, "47015": 2.07658, "47020": 2.06204, "47025": 2.07045, "47030": 2.04863, "47035": 2.08723, "47040": 2.05247, "47045": 2.05732, "47050": 2.04499, "47055": 2.02933, "47060": 2.05211, "47065": 2.05522, "47070": 2.05812, "47075": 2.05018, "47080": 2.04719, "47085": 2.10214, "47090": 2.0323, "47095": 2.04772, "47100": 2.12098, "47105": 2.05474, "47110": 2.06299, "47115": 2.09688, "47120": 2.05577, "47125": 2.0271, "47130": 2.0591, "47135": 2.08382, "47140": 2.07596, "47145": 2.08876, "47150": 2.05658, "47155": 2.04063, "47160": 2.08689, "47165": 2.05554, "47170": 2.0719, "47175": 2.08461, "47180": 2.0662, "47185": 2.04895, "47190": 2.06953, "47195": 2.08961, "47200": 2.08644, "47205": 2.0487, "47210": 2.08672, "47215": 2.06681, "47220": 2.04221, "47225": 2.10199, "47230": 2.08423, "47235": 2.03899, "47240": 2.10377, "47245": 2.05671, "47250": 2.11052, "47255": 2.1, "47260": 2.07501, "47265": 2.08274, "47270": 2.07246, "47275": 2.08916, "47280": 2.07315, "47285": 2.08451, "47290": 2.04055, "47295": 2.05583, "47300": 2.03426, "47305": 2.06891, "47310": 2.06322, "47315": 2.03321, "47320": 2.07238, "47325": 2.05413, "47330": 2.10016, "47335": 2.0907, "47340": 2.07742, "47345": 2.09204, "47350": 2.04676, "47355": 2.06631, "47360": 2.07459, "47365": 2.08056, "47370": 2.09168, "47375": 2.05682, "47380": 2.03082, "47385": 2.05614, "47390": 2.10651, "47395": 2.05233, "47400": 2.08258, "47405": 2.05821, "47410": 2.07972, "47415": 2.06722, "47420": 2.02665, "47425": 2.03181, "47430": 2.08844, "47435": 2.06461, "47440": 2.07354, "47445": 2.10666, "47450": 2.07907, "47455": 2.03902, "47460": 2.08965, "47465": 2.05575, "47470": 2.02931, "47475": 2.08936, "47480": 2.0592, "47485": 2.0387, "47490": 2.05829, "47495": 2.09416, "47500": 2.04429, "47505": 2.06962, "47510": 2.05668, "47515": 2.10642, "47520": 2.10449, "47525": 2.02029, "47530": 2.07815, "47535": 2.04105, "47540": 2.08936, "47545": 2.06295, "47550": 2.08735, "47555": 2.08396, "47560": 2.06889, "47565": 2.0643, "47570": 2.02513, "47575": 2.07809, "47580": 2.06345, "47585": 2.06668, "47590": 2.05672, "47595": 2.08521, "47600": 2.09907, "47605": 2.08983, "47610": 2.00536, "47615": 2.08366, "47620": 2.03443, "47625": 2.12372, "47630": 2.06373, "47635": 2.06389, "47640": 2.07546, "47645": 2.04347, "47650": 2.01946, "47655": 2.06645, "47660": 2.07214, "47665": 2.07532, "47670": 2.02711, "47675": 2.07136, "47680": 2.02713, "47685": 1.98378, "47690": 2.06353, "47695": 2.08811, "47700": 2.05867, "47705": 2.1045, "47710": 2.06829, "47715": 2.06952, "47720": 2.02746, "47725": 2.097, "47730": 2.0909, "47735": 2.04897, "47740": 2.04993, "47745": 2.0455, "47750": 2.04644, "47755": 2.08941, "47760": 2.07345, "47765": 2.0483, "47770": 2.04007, "47775": 2.05886, "47780": 2.06576, "47785": 2.06139, "47790": 2.04058, "47795": 2.04365, "47800": 2.08451, "47805": 2.04926, "47810": 2.08398, "47815": 2.11708, "47820": 2.05963, "47825": 2.05216, "47830": 2.07449, "47835": 2.05693, "47840": 2.06932, "47845": 2.04791, "47850": 2.09001, "47855": 2.05111, "47860": 2.06482, "47865": 2.06435, "47870": 2.05209, "47875": 2.07908, "47880": 2.00654, "47885": 2.06019, "47890": 2.11284, "47895": 2.05508, "47900": 2.03598, "47905": 2.0539, "47910": 2.08342, "47915": 2.05162, "47920": 2.09016, "47925": 2.07326, "47930": 2.09856, "47935": 2.07167, "47940": 2.06225, "47945": 2.10247, "47950": 2.05422, "47955": 2.02168, "47960": 2.07465, "47965": 2.12788, "47970": 2.08883, "47975": 2.08813, "47980": 2.07511, "47985": 2.0245, "47990": 2.08124, "47995": 2.03696, "48000": 2.08852, "48005": 2.06213, "48010": 2.08117, "48015": 2.0607, "48020": 2.15186, "48025": 2.03063, "48030": 2.0485, "48035": 2.04808, "48040": 2.05637, "48045": 2.03577, "48050": 2.09255, "48055": 1.99616, "48060": 2.01789, "48065": 2.04629, "48070": 2.08863, "48075": 2.07627, "48080": 2.06463, "48085": 2.03709, "48090": 2.08269, "48095": 2.04902, "48100": 2.08314, "48105": 2.05243, "48110": 2.05776, "48115": 2.07501, "48120": 2.0704, "48125": 2.13076, "48130": 2.04738, "48135": 2.07984, "48140": 2.05605, "48145": 2.02159, "48150": 2.09716, "48155": 2.03463, "48160": 2.066, "48165": 2.06162, "48170": 2.04345, "48175": 2.06427, "48180": 2.06876, "48185": 2.0275, "48190": 2.02737, "48195": 2.03316, "48200": 2.05885, "48205": 2.06444, "48210": 2.0241, "48215": 2.08402, "48220": 2.05307, "48225": 2.03832, "48230": 2.06873, "48235": 2.10719, "48240": 2.0547, "48245": 2.06394, "48250": 2.07554, "48255": 2.0525, "48260": 2.07473, "48265": 2.07137, "48270": 2.07655, "48275": 2.08649, "48280": 2.02114, "48285": 2.09176, "48290": 2.05783, "48295": 2.06405, "48300": 2.05578, "48305": 2.08641, "48310": 2.0672, "48315": 2.07537, "48320": 2.05697, "48325": 2.08724, "48330": 2.06354, "48335": 2.10154, "48340": 2.0874, "48345": 2.10798, "48350": 2.0647, "48355": 2.08621, "48360": 2.05446, "48365": 2.07192, "48370": 2.05453, "48375": 2.05697, "48380": 2.07898, "48385": 2.06768, "48390": 2.0984, "48395": 2.00819, "48400": 2.04876, "48405": 2.07254, "48410": 2.08807, "48415": 2.06475, "48420": 2.08719, "48425": 2.05886, "48430": 2.06237, "48435": 2.02595, "48440": 2.10573, "48445": 2.05005, "48450": 2.10285, "48455": 2.03654, "48460": 2.0604, "48465": 2.08307, "48470": 2.02306, "48475": 2.05953, "48480": 2.1173, "48485": 2.05156, "48490": 2.08259, "48495": 2.10726, "48500": 2.05936, "48505": 2.08515, "48510": 2.07967, "48515": 2.08984, "48520": 2.02881, "48525": 2.05952, "48530": 2.04907, "48535": 2.07592, "48540": 2.02071, "48545": 2.10201, "48550": 2.0524, "48555": 2.09026, "48560": 2.03159, "48565": 2.04821, "48570": 2.01107, "48575": 2.06885, "48580": 2.10005, "48585": 2.01545, "48590": 2.05877, "48595": 2.0273, "48600": 2.0859, "48605": 2.11203, "48610": 2.04201, "48615": 2.05411, "48620": 2.05926, "48625": 2.04683, "48630": 2.12132, "48635": 2.08097, "48640": 2.07446, "48645": 2.0615, "48650": 2.07558, "48655": 2.05592, "48660": 2.06969, "48665": 2.05806, "48670": 2.02936, "48675": 2.04236, "48680": 2.05257, "48685": 2.06109, "48690": 2.08625, "48695": 2.0875, "48700": 2.08826, "48705": 2.05491, "48710": 2.06961, "48715": 2.09904, "48720": 2.01252, "48725": 2.06013, "48730": 2.05641, "48735": 2.04136, "48740": 2.07059, "48745": 2.01152, "48750": 2.05878, "48755": 2.06201, "48760": 2.03577, "48765": 2.09056, "48770": 2.08521, "48775": 2.03528, "48780": 2.01968, "48785": 2.07535, "48790": 2.0617, "48795": 2.03439, "48800": 2.05241, "48805": 2.05485, "48810": 2.05684, "48815": 2.04823, "48820": 2.11395, "48825": 2.03115, "48830": 2.04244, "48835": 2.03547, "48840": 2.05547, "48845": 2.08272, "48850": 2.04393, "48855": 2.05198, "48860": 2.06469, "48865": 2.06919, "48870": 2.08402, "48875": 2.0555, "48880": 2.03071, "48885": 2.07188, "48890": 2.03858, "48895": 2.05188, "48900": 2.08597, "48905": 2.07703, "48910": 2.05442, "48915": 2.09628, "48920": 2.09497, "48925": 2.06899, "48930": 2.09179, "48935": 2.0709, "48940": 2.03034, "48945": 2.04353, "48950": 2.07151, "48955": 2.09632, "48960": 2.05367, "48965": 2.03328, "48970": 2.03769, "48975": 2.07163, "48980": 2.12468, "48985": 2.0814, "48990": 2.06565, "48995": 2.09218, "49000": 2.08334, "49005": 2.09507, "49010": 2.09845, "49015": 2.04723, "49020": 2.06679, "49025": 2.0686, "49030": 2.09796, "49035": 2.05052, "49040": 2.09971, "49045": 2.0735, "49050": 2.07134, "49055": 2.06966, "49060": 2.03006, "49065": 2.06373, "49070": 2.04984, "49075": 2.05293, "49080": 2.08878, "49085": 2.11119, "49090": 2.05042, "49095": 2.02683, "49100": 2.05988, "49105": 2.08421, "49110": 2.09257, "49115": 2.13127, "49120": 2.10168, "49125": 2.03682, "49130": 2.0907, "49135": 2.08175, "49140": 2.07159, "49145": 2.10117, "49150": 2.05854, "49155": 2.04085, "49160": 2.01553, "49165": 2.07948, "49170": 2.06173, "49175": 2.04007, "49180": 2.01776, "49185": 2.04512, "49190": 2.04643, "49195": 2.05677, "49200": 2.02821, "49205": 2.05431, "49210": 2.10462, "49215": 2.01837, "49220": 2.04157, "49225": 2.06177, "49230": 2.03607, "49235": 2.07011, "49240": 2.08076, "49245": 2.04886, "49250": 2.01345, "49255": 2.01569, "49260": 2.0514, "49265": 2.06617, "49270": 2.04841, "49275": 2.07598, "49280": 2.06067, "49285": 2.04611, "49290": 2.06436, "49295": 2.07787, "49300": 2.03162, "49305": 2.06881, "49310": 2.05098, "49315": 2.04133, "49320": 2.08774, "49325": 2.08409, "49330": 2.0717, "49335": 2.02262, "49340": 2.03432, "49345": 2.0689, "49350": 2.04546, "49355": 2.07805, "49360": 2.0507, "49365": 2.05258, "49370": 2.04266, "49375": 2.1131, "49380": 2.08117, "49385": 2.08404, "49390": 2.07224, "49395": 2.0655, "49400": 2.078, "49405": 2.05229, "49410": 2.08799, "49415": 2.0827, "49420": 2.04761, "49425": 2.05115, "49430": 2.09093, "49435": 2.11967, "49440": 2.11465, "49445": 2.05771, "49450": 2.02308, "49455": 2.05643, "49460": 2.08298, "49465": 1.99248, "49470": 2.10209, "49475": 2.06612, "49480": 2.07451, "49485": 2.04772, "49490": 2.04358, "49495": 2.06753, "49500": 2.03139, "49505": 2.08642, "49510": 2.0477, "49515": 2.07093, "49520": 2.1014, "49525": 2.06727, "49530": 2.13282, "49535": 2.06868, "49540": 2.03923, "49545": 2.03126, "49550": 2.09211, "49555": 2.03991, "49560": 2.09033, "49565": 2.10625, "49570": 2.06155, "49575": 2.09889, "49580": 2.11354, "49585": 2.05849, "49590": 2.0693, "49595": 2.07617, "49600": 2.08896, "49605": 2.07354, "49610": 2.08402, "49615": 2.05228, "49620": 2.09092, "49625": 2.08561, "49630": 2.09151, "49635": 2.06177, "49640": 2.06995, "49645": 2.01183, "49650": 2.0711, "49655": 2.11683, "49660": 2.10639, "49665": 2.06377, "49670": 2.06892, "49675": 2.05464, "49680": 2.0394, "49685": 2.04456, "49690": 2.05665, "49695": 2.10353, "49700": 2.01499, "49705": 2.08919, "49710": 2.08266, "49715": 2.08773, "49720": 2.05732, "49725": 2.08483, "49730": 2.08599, "49735": 2.00502, "49740": 2.06809, "49745": 2.06069, "49750": 2.07778, "49755": 2.09322, "49760": 1.98501, "49765": 2.04526, "49770": 2.0323, "49775": 2.05507, "49780": 2.05248, "49785": 2.02912, "49790": 2.02905, "49795": 2.09417, "49800": 2.05537, "49805": 2.00348, "49810": 2.07426, "49815": 2.0194, "49820": 2.04185, "49825": 2.02979, "49830": 2.05723, "49835": 2.04782, "49840": 2.06193, "49845": 2.11683, "49850": 2.11732, "49855": 2.06506, "49860": 2.04311, "49865": 2.09011, "49870": 2.05903, "49875": 2.04072, "49880": 2.06351, "49885": 2.08152, "49890": 2.08516, "49895": 2.03313, "49900": 2.07629, "49905": 2.07153, "49910": 2.03444, "49915": 2.05865, "49920": 2.05491, "49925": 2.02415, "49930": 2.1043, "49935": 2.06767, "49940": 2.04284, "49945": 2.08668, "49950": 2.04572, "49955": 2.09398, "49960": 2.05617, "49965": 2.06926, "49970": 2.09438, "49975": 2.07691, "49980": 2.11978, "49985": 2.06197, "49990": 2.08421, "49995": 2.06759, "50000": 2.0586, "50005": 2.06124, "50010": 2.0883, "50015": 2.0498, "50020": 2.02636, "50025": 2.12283, "50030": 2.02483, "50035": 2.04083, "50040": 2.08199, "50045": 2.07305, "50050": 2.09551, "50055": 2.07267, "50060": 2.05476, "50065": 2.06646, "50070": 2.02871, "50075": 2.05271, "50080": 2.09138, "50085": 2.07857, "50090": 2.04572, "50095": 2.07469, "50100": 2.02986, "50105": 2.0867, "50110": 2.05141, "50115": 2.07172, "50120": 2.06475, "50125": 2.05788, "50130": 2.01615, "50135": 2.04165, "50140": 2.10888, "50145": 2.04666, "50150": 2.07186, "50155": 2.06616, "50160": 2.02997, "50165": 2.05795, "50170": 2.06066, "50175": 2.08509, "50180": 2.06467, "50185": 2.06369, "50190": 2.02529, "50195": 2.05626, "50200": 2.03893, "50205": 2.07528, "50210": 2.07022, "50215": 2.08579, "50220": 2.0682, "50225": 2.05247, "50230": 2.0825, "50235": 2.08934, "50240": 2.07791, "50245": 2.05994, "50250": 2.0499, "50255": 2.06738, "50260": 2.02207, "50265": 2.02874, "50270": 1.98082, "50275": 2.04672, "50280": 2.06284, "50285": 2.07979, "50290": 2.06299, "50295": 2.09692, "50300": 2.05777, "50305": 2.08107, "50310": 2.04275, "50315": 2.06779, "50320": 2.07541, "50325": 2.05674, "50330": 2.07395, "50335": 2.08145, "50340": 2.06892, "50345": 2.05563, "50350": 2.04468, "50355": 2.07153, "50360": 2.06343, "50365": 2.0539, "50370": 2.05327, "50375": 2.06016, "50380": 2.07124, "50385": 2.06464, "50390": 2.05016, "50395": 2.07873, "50400": 2.04219, "50405": 2.07342, "50410": 2.05347, "50415": 2.01551, "50420": 2.06647, "50425": 2.05716, "50430": 2.07991, "50435": 2.0657, "50440": 2.0163, "50445": 2.03422, "50450": 2.09721, "50455": 2.06057, "50460": 2.06374, "50465": 2.0469, "50470": 2.07141, "50475": 2.04765, "50480": 2.0989, "50485": 2.0977, "50490": 2.0663, "50495": 2.09451, "50500": 2.06797, "50505": 2.09299, "50510": 2.02699, "50515": 2.06447, "50520": 2.05988, "50525": 2.08939, "50530": 2.08274, "50535": 2.06061, "50540": 2.03401, "50545": 2.02626, "50550": 2.07543, "50555": 2.05574, "50560": 2.04247, "50565": 2.08856, "50570": 2.01533, "50575": 2.03709, "50580": 2.03097, "50585": 2.07323, "50590": 2.0599, "50595": 2.08732, "50600": 2.0585, "50605": 2.0523, "50610": 2.08442, "50615": 2.09533, "50620": 2.09299, "50625": 2.05218, "50630": 2.03679, "50635": 2.09866, "50640": 2.08655, "50645": 2.09674, "50650": 2.05026, "50655": 2.05633, "50660": 2.03295, "50665": 2.02829, "50670": 2.04643, "50675": 2.08462, "50680": 2.04611, "50685": 2.03395, "50690": 2.04425, "50695": 2.04696, "50700": 2.10403, "50705": 2.05345, "50710": 2.11825, "50715": 2.00846, "50720": 2.04271, "50725": 2.08483, "50730": 2.07133, "50735": 2.08636, "50740": 2.0249, "50745": 2.09675, "50750": 2.07423, "50755": 2.09445, "50760": 2.10942, "50765": 2.06223, "50770": 2.06677, "50775": 2.06933, "50780": 2.06743, "50785": 2.03893, "50790": 2.05921, "50795": 2.0669, "50800": 2.08925, "50805": 2.06195, "50810": 2.06155, "50815": 2.06002, "50820": 2.03455, "50825": 2.0623, "50830": 2.02559, "50835": 2.03955, "50840": 2.08234, "50845": 2.03775, "50850": 2.06885, "50855": 2.06065, "50860": 2.06794, "50865": "nan", "50870": "nan", "50875": "nan", "50880": "nan", "50885": "nan", "50890": "nan", "50895": "nan", "50900": "nan", "50905": "nan", "50910": "nan", "50915": "nan", "50920": "nan", "50925": "nan", "50930": "nan", "50935": "nan", "50940": "nan", "50945": "nan", "50950": "nan", "50955": "nan", "50960": "nan", "50965": "nan", "50970": "nan", "50975": "nan", "50980": "nan", "50985": "nan", "50990": "nan", "50995": "nan", "51000": "nan"}}, "num-zeros": {"start_step": 1, "end_step": 51000, "step_interval": 5, "values": {"1": 956237696.0, "5": 967339392.0, "10": 971388800.0, "15": 946505216.0, "20": 957341568.0, "25": 1063700672.0, "30": 1177983744.0, "35": 1231755520.0, "40": 1192466560.0, "45": 1126067456.0, "50": 1114216448.0, "55": 1079671040.0, "60": 1029223552.0, "65": 1011117888.0, "70": 988166720.0, "75": 985815488.0, "80": 1005812224.0, "85": 1002490752.0, "90": 981786240.0, "95": 957714432.0, "100": 971167232.0, "105": 981191808.0, "110": 977441664.0, "115": 977276224.0, "120": 961933504.0, "125": 942809280.0, "130": 975741120.0, "135": 965415616.0, "140": 962876864.0, "145": 976340160.0, "150": 921349568.0, "155": 968107008.0, "160": 956345536.0, "165": 959770368.0, "170": 974327872.0, "175": 948980096.0, "180": 946698240.0, "185": 972002688.0, "190": 969051904.0, "195": 985124224.0, "200": 945771008.0, "205": 958353792.0, "210": 979434816.0, "215": 967486208.0, "220": 956437440.0, "225": 962386048.0, "230": 948181632.0, "235": 965230208.0, "240": 966069696.0, "245": 969164928.0, "250": 974439680.0, "255": 925065984.0, "260": 965636416.0, "265": 970679936.0, "270": 959137664.0, "275": 953990528.0, "280": 963424768.0, "285": 945780544.0, "290": 974124608.0, "295": 966702464.0, "300": 967153728.0, "305": 964508672.0, "310": 940351296.0, "315": 967398848.0, "320": 969005760.0, "325": 980552512.0, "330": 972096384.0, "335": 946870656.0, "340": 966597568.0, "345": 973021824.0, "350": 973921536.0, "355": 963261568.0, "360": 948355840.0, "365": 964819200.0, "370": 962954240.0, "375": 958444992.0, "380": 947155328.0, "385": 955989312.0, "390": 945403008.0, "395": 970427136.0, "400": 979779968.0, "405": 968345920.0, "410": 970073088.0, "415": 953159296.0, "420": 943574912.0, "425": 954775744.0, "430": 962665472.0, "435": 977077248.0, "440": 954810304.0, "445": 971892352.0, "450": 963505280.0, "455": 973129984.0, "460": 983713280.0, "465": 945280896.0, "470": 942058368.0, "475": 967008768.0, "480": 966105344.0, "485": 976417088.0, "490": 962540096.0, "495": 945465088.0, "500": 964456704.0, "505": 986012544.0, "510": 965683200.0, "515": 943410944.0, "520": 945023168.0, "525": 971260672.0, "530": 971886336.0, "535": 979137280.0, "540": 969534464.0, "545": 954127616.0, "550": 951273984.0, "555": 987221248.0, "560": 960431872.0, "565": 966618752.0, "570": 975729536.0, "575": 927228672.0, "580": 970699520.0, "585": 961181952.0, "590": 972964672.0, "595": 963680256.0, "600": 937084736.0, "605": 951475072.0, "610": 963356928.0, "615": 970009856.0, "620": 976472320.0, "625": 949581760.0, "630": 954446592.0, "635": 986048000.0, "640": 980979648.0, "645": 955010944.0, "650": 958552064.0, "655": 951652416.0, "660": 961038976.0, "665": 967554048.0, "670": 962519296.0, "675": 968338048.0, "680": 965621376.0, "685": 962866816.0, "690": 961916928.0, "695": 954764416.0, "700": 970337664.0, "705": 945514816.0, "710": 943885696.0, "715": 973360768.0, "720": 968364672.0, "725": 978492288.0, "730": 952195008.0, "735": 948806080.0, "740": 955634048.0, "745": 975868928.0, "750": 981236288.0, "755": 962160384.0, "760": 951966848.0, "765": 967343488.0, "770": 976152000.0, "775": 970549312.0, "780": 977535872.0, "785": 931529856.0, "790": 960436096.0, "795": 964583168.0, "800": 967024000.0, "805": 962319616.0, "810": 940975488.0, "815": 949038592.0, "820": 953187136.0, "825": 954501504.0, "830": 976441984.0, "835": 956076416.0, "840": 948408256.0, "845": 965154944.0, "850": 966027968.0, "855": 960902912.0, "860": 976027648.0, "865": 938159360.0, "870": 966416640.0, "875": 972314112.0, "880": 963122432.0, "885": 967748096.0, "890": 949971200.0, "895": 960022272.0, "900": 974229248.0, "905": 963968128.0, "910": 958434048.0, "915": 956355264.0, "920": 943974528.0, "925": 960832064.0, "930": 978849856.0, "935": 971073664.0, "940": 960906624.0, "945": 945065728.0, "950": 957427328.0, "955": 979038400.0, "960": 983588864.0, "965": 966166656.0, "970": 951225984.0, "975": 961577728.0, "980": 968069248.0, "985": 968991488.0, "990": 984393408.0, "995": 953290752.0, "1000": 934784576.0, "1005": 960148224.0, "1010": 971539968.0, "1015": 985184576.0, "1020": 962781952.0, "1025": 935010176.0, "1030": 974678016.0, "1035": 964993280.0, "1040": 980465984.0, "1045": 960828544.0, "1050": 955202048.0, "1055": 957781248.0, "1060": 967749952.0, "1065": 967116608.0, "1070": 966602496.0, "1075": 950064128.0, "1080": 954510080.0, "1085": 967253056.0, "1090": 977134336.0, "1095": 961236480.0, "1100": 979611520.0, "1105": 953366400.0, "1110": 965954816.0, "1115": 966986240.0, "1120": 970350912.0, "1125": 965707776.0, "1130": 954941568.0, "1135": 965840384.0, "1140": 965175296.0, "1145": 970988736.0, "1150": 955559040.0, "1155": 930575360.0, "1160": 957774080.0, "1165": 978121472.0, "1170": 974302400.0, "1175": 973055616.0, "1180": 973083648.0, "1185": 947337152.0, "1190": 964790912.0, "1195": 953140224.0, "1200": 972841920.0, "1205": 988480064.0, "1210": 931124032.0, "1215": 968645312.0, "1220": 969160960.0, "1225": 975949952.0, "1230": 967335040.0, "1235": 943443840.0, "1240": 955856832.0, "1245": 981502976.0, "1250": 966111360.0, "1255": 973675520.0, "1260": 946496256.0, "1265": 963996800.0, "1270": 960487872.0, "1275": 973615488.0, "1280": 961114944.0, "1285": 957584768.0, "1290": 952526080.0, "1295": 971612928.0, "1300": 968863104.0, "1305": 963740032.0, "1310": 963336512.0, "1315": 943554560.0, "1320": 966307712.0, "1325": 989784064.0, "1330": 969508096.0, "1335": 972301184.0, "1340": 972269824.0, "1345": 960656512.0, "1350": 968638720.0, "1355": 955854080.0, "1360": 971824960.0, "1365": 960387584.0, "1370": 948790144.0, "1375": 973532160.0, "1380": 953468928.0, "1385": 969148800.0, "1390": 975720192.0, "1395": 931676928.0, "1400": 945856000.0, "1405": 976749952.0, "1410": 974511744.0, "1415": 967570688.0, "1420": 966746944.0, "1425": 937380672.0, "1430": 973915392.0, "1435": 978333568.0, "1440": 964180352.0, "1445": 958058496.0, "1450": 946145152.0, "1455": 983924544.0, "1460": 968651776.0, "1465": 948745344.0, "1470": 984244608.0, "1475": 943901440.0, "1480": 963976064.0, "1485": 957344960.0, "1490": 961259520.0, "1495": 980540032.0, "1500": 958333504.0, "1505": 942865344.0, "1510": 984179200.0, "1515": 959092736.0, "1520": 959105408.0, "1525": 952784256.0, "1530": 957742848.0, "1535": 949430528.0, "1540": 971086784.0, "1545": 963132160.0, "1550": 978666752.0, "1555": 952318464.0, "1560": 980088192.0, "1565": 967314816.0, "1570": 973843840.0, "1575": 975494016.0, "1580": 941859776.0, "1585": 970028928.0, "1590": 983821568.0, "1595": 948630720.0, "1600": 967445120.0, "1605": 952448256.0, "1610": 969618752.0, "1615": 983145472.0, "1620": 968019328.0, "1625": 970714880.0, "1630": 962886784.0, "1635": 942310784.0, "1640": 981611648.0, "1645": 973974848.0, "1650": 974185216.0, "1655": 967264640.0, "1660": 940688448.0, "1665": 961705344.0, "1670": 962903424.0, "1675": 971278976.0, "1680": 980878144.0, "1685": 944413568.0, "1690": 964690304.0, "1695": 965643200.0, "1700": 966343488.0, "1705": 985201024.0, "1710": 978352768.0, "1715": 943211648.0, "1720": 977087424.0, "1725": 965872512.0, "1730": 968970624.0, "1735": 965085568.0, "1740": 949710336.0, "1745": 970012288.0, "1750": 959681408.0, "1755": 960088704.0, "1760": 966377216.0, "1765": 951816576.0, "1770": 954664832.0, "1775": 973750208.0, "1780": 970534656.0, "1785": 968824640.0, "1790": 950232832.0, "1795": 945129792.0, "1800": 984665088.0, "1805": 987163008.0, "1810": 977768448.0, "1815": 948003840.0, "1820": 949208832.0, "1825": 978852864.0, "1830": 966363136.0, "1835": 964131200.0, "1840": 972319296.0, "1845": 935413504.0, "1850": 952500096.0, "1855": 980050944.0, "1860": 975869184.0, "1865": 958965632.0, "1870": 958948864.0, "1875": 932592896.0, "1880": 973574336.0, "1885": 978847232.0, "1890": 971359104.0, "1895": 959211136.0, "1900": 947392384.0, "1905": 981829248.0, "1910": 969124864.0, "1915": 970039360.0, "1920": 975598400.0, "1925": 960493568.0, "1930": 977921792.0, "1935": 963248000.0, "1940": 952460288.0, "1945": 981336320.0, "1950": 939170048.0, "1955": 960605632.0, "1960": 970030464.0, "1965": 981174976.0, "1970": 962045184.0, "1975": 952821056.0, "1980": 936850624.0, "1985": 975939584.0, "1990": 965966208.0, "1995": 962612352.0, "2000": 960553984.0, "2005": 954498240.0, "2010": 975579776.0, "2015": 991802368.0, "2020": 975435200.0, "2025": 974304000.0, "2030": 952083392.0, "2035": 967847168.0, "2040": 987458560.0, "2045": 976478208.0, "2050": 984701760.0, "2055": 942835520.0, "2060": 942594048.0, "2065": 966208896.0, "2070": 969622144.0, "2075": 980554496.0, "2080": 977599744.0, "2085": 939639744.0, "2090": 969873792.0, "2095": 961277184.0, "2100": 976720000.0, "2105": 972538176.0, "2110": 959904640.0, "2115": 956878720.0, "2120": 977480448.0, "2125": 962563200.0, "2130": 979618944.0, "2135": 950537088.0, "2140": 946995136.0, "2145": 962275840.0, "2150": 973404288.0, "2155": 972689536.0, "2160": 970314752.0, "2165": 948640896.0, "2170": 961541504.0, "2175": 969376512.0, "2180": 969330368.0, "2185": 947446464.0, "2190": 940481664.0, "2195": 986087168.0, "2200": 961863296.0, "2205": 978922624.0, "2210": 964102016.0, "2215": 963502016.0, "2220": 951310464.0, "2225": 969316416.0, "2230": 976329984.0, "2235": 974026496.0, "2240": 975494272.0, "2245": 960233472.0, "2250": 967639936.0, "2255": 969132864.0, "2260": 975064384.0, "2265": 968259584.0, "2270": 951744320.0, "2275": 962768896.0, "2280": 969639680.0, "2285": 971692160.0, "2290": 962890496.0, "2295": 931408512.0, "2300": 959907072.0, "2305": 970426432.0, "2310": 967447424.0, "2315": 970905280.0, "2320": 975588288.0, "2325": 938587584.0, "2330": 988436864.0, "2335": 977489408.0, "2340": 964596864.0, "2345": 964166400.0, "2350": 947554304.0, "2355": 977028992.0, "2360": 966898944.0, "2365": 977296384.0, "2370": 965072512.0, "2375": 953967872.0, "2380": 962917376.0, "2385": 967195392.0, "2390": 963077056.0, "2395": 974465024.0, "2400": 958409600.0, "2405": 968120320.0, "2410": 951586752.0, "2415": 965904256.0, "2420": 966516608.0, "2425": 959045120.0, "2430": 956685568.0, "2435": 961387520.0, "2440": 959756160.0, "2445": 970889984.0, "2450": 961997312.0, "2455": 922720512.0, "2460": 951952640.0, "2465": 955729920.0, "2470": 972571520.0, "2475": 973810944.0, "2480": 943894016.0, "2485": 944185664.0, "2490": 972411392.0, "2495": 974450048.0, "2500": 973908224.0, "2505": 958492736.0, "2510": 939509504.0, "2515": 979551488.0, "2520": 970470528.0, "2525": 964388736.0, "2530": 955797440.0, "2535": 936598656.0, "2540": 969027712.0, "2545": 970385472.0, "2550": 969460608.0, "2555": 969438912.0, "2560": 964977280.0, "2565": 959762560.0, "2570": 985176000.0, "2575": 957424512.0, "2580": 967424768.0, "2585": 966022912.0, "2590": 956355136.0, "2595": 981829120.0, "2600": 959531072.0, "2605": 963000704.0, "2610": 965972480.0, "2615": 951925504.0, "2620": 971242816.0, "2625": 976456192.0, "2630": 974411648.0, "2635": 948071296.0, "2640": 948136960.0, "2645": 963038464.0, "2650": 953982592.0, "2655": 977113152.0, "2660": 949621248.0, "2665": 953929664.0, "2670": 959063424.0, "2675": 979276416.0, "2680": 961396032.0, "2685": 970701632.0, "2690": 965222976.0, "2695": 943553472.0, "2700": 969425408.0, "2705": 978962752.0, "2710": 971811456.0, "2715": 990814592.0, "2720": 942647936.0, "2725": 967955776.0, "2730": 955466880.0, "2735": 970672704.0, "2740": 977921600.0, "2745": 932279872.0, "2750": 947857536.0, "2755": 956318784.0, "2760": 981697664.0, "2765": 966112384.0, "2770": 948915456.0, "2775": 935831424.0, "2780": 964779136.0, "2785": 969570176.0, "2790": 974275072.0, "2795": 966887168.0, "2800": 944390784.0, "2805": 964354048.0, "2810": 969610880.0, "2815": 975845632.0, "2820": 963086080.0, "2825": 937629632.0, "2830": 956741632.0, "2835": 986321792.0, "2840": 961756992.0, "2845": 967504256.0, "2850": 951714432.0, "2855": 962093312.0, "2860": 954243456.0, "2865": 955882624.0, "2870": 944664192.0, "2875": 974664256.0, "2880": 968201664.0, "2885": 981081792.0, "2890": 953454016.0, "2895": 957178304.0, "2900": 964989184.0, "2905": 931706112.0, "2910": 955731584.0, "2915": 979477568.0, "2920": 970491648.0, "2925": 964976768.0, "2930": 964047872.0, "2935": 940140928.0, "2940": 964910080.0, "2945": 989149056.0, "2950": 965209280.0, "2955": 965103104.0, "2960": 933159616.0, "2965": 968794176.0, "2970": 973035008.0, "2975": 958092288.0, "2980": 964496448.0, "2985": 937268160.0, "2990": 951265664.0, "2995": 978314880.0, "3000": 969288192.0, "3005": 974687104.0, "3010": 950234880.0, "3015": 943841152.0, "3020": 958439808.0, "3025": 975185152.0, "3030": 965016448.0, "3035": 963453248.0, "3040": 952134016.0, "3045": 989792128.0, "3050": 965545600.0, "3055": 982534208.0, "3060": 971239680.0, "3065": 943928960.0, "3070": 978433152.0, "3075": 975218240.0, "3080": 961004672.0, "3085": 962362880.0, "3090": 945965312.0, "3095": 938126336.0, "3100": 972940416.0, "3105": 962001280.0, "3110": 970668928.0, "3115": 963401856.0, "3120": 947127744.0, "3125": 972732736.0, "3130": 952985216.0, "3135": 966054272.0, "3140": 968500672.0, "3145": 937863232.0, "3150": 975022208.0, "3155": 976827712.0, "3160": 969640448.0, "3165": 982207104.0, "3170": 937972992.0, "3175": 953838208.0, "3180": 983808640.0, "3185": 965181312.0, "3190": 968494848.0, "3195": 950944384.0, "3200": 945114112.0, "3205": 959875072.0, "3210": 957499008.0, "3215": 958031616.0, "3220": 968142912.0, "3225": 935627264.0, "3230": 962601728.0, "3235": 975787712.0, "3240": 962634496.0, "3245": 981286656.0, "3250": 943273728.0, "3255": 954611712.0, "3260": 980375424.0, "3265": 963632192.0, "3270": 965176320.0, "3275": 959744256.0, "3280": 967058368.0, "3285": 982490560.0, "3290": 947700736.0, "3295": 966434432.0, "3300": 959177984.0, "3305": 949142528.0, "3310": 979522560.0, "3315": 964293760.0, "3320": 969218688.0, "3325": 956205312.0, "3330": 941178240.0, "3335": 964985216.0, "3340": 956914112.0, "3345": 972512128.0, "3350": 964588288.0, "3355": 943357824.0, "3360": 970049472.0, "3365": 969467968.0, "3370": 954775616.0, "3375": 958689216.0, "3380": 971477056.0, "3385": 947985152.0, "3390": 965792064.0, "3395": 978403392.0, "3400": 978138944.0, "3405": 976736128.0, "3410": 924204288.0, "3415": 955436544.0, "3420": 971832960.0, "3425": 977168704.0, "3430": 973837824.0, "3435": 936084480.0, "3440": 970515968.0, "3445": 957315840.0, "3450": 959854080.0, "3455": 963864960.0, "3460": 967893952.0, "3465": 931331712.0, "3470": 952348416.0, "3475": 973717120.0, "3480": 959751552.0, "3485": 979960256.0, "3490": 944683200.0, "3495": 953918464.0, "3500": 969329536.0, "3505": 964360640.0, "3510": 971234432.0, "3515": 955958976.0, "3520": 958746368.0, "3525": 971927040.0, "3530": 964137408.0, "3535": 983203968.0, "3540": 937502400.0, "3545": 944742912.0, "3550": 984475328.0, "3555": 978070592.0, "3560": 974386432.0, "3565": 968812608.0, "3570": 946706816.0, "3575": 976117120.0, "3580": 977506112.0, "3585": 954581440.0, "3590": 956438144.0, "3595": 951456192.0, "3600": 989008896.0, "3605": 962019392.0, "3610": 965066688.0, "3615": 974645952.0, "3620": 954899904.0, "3625": 939527360.0, "3630": 990160768.0, "3635": 971447552.0, "3640": 976037504.0, "3645": 961500416.0, "3650": 945815168.0, "3655": 965794688.0, "3660": 976206464.0, "3665": 964033344.0, "3670": 977443456.0, "3675": 943487616.0, "3680": 958196736.0, "3685": 964298624.0, "3690": 982104448.0, "3695": 963138048.0, "3700": 950583040.0, "3705": 947358464.0, "3710": 982366848.0, "3715": 972694336.0, "3720": 976149504.0, "3725": 964049920.0, "3730": 948862208.0, "3735": 967090688.0, "3740": 960972160.0, "3745": 969333760.0, "3750": 963948416.0, "3755": 953433216.0, "3760": 976653952.0, "3765": 979841216.0, "3770": 972384000.0, "3775": 972385280.0, "3780": 952585472.0, "3785": 960248960.0, "3790": 985610880.0, "3795": 969194880.0, "3800": 957877504.0, "3805": 972408448.0, "3810": 954518272.0, "3815": 974566272.0, "3820": 963031552.0, "3825": 962071424.0, "3830": 969396096.0, "3835": 934745216.0, "3840": 971268864.0, "3845": 986849920.0, "3850": 968860992.0, "3855": 965248768.0, "3860": 948057088.0, "3865": 975049600.0, "3870": 985098496.0, "3875": 983042048.0, "3880": 963627648.0, "3885": 953012224.0, "3890": 960284416.0, "3895": 960590016.0, "3900": 984925184.0, "3905": 976210752.0, "3910": 987335040.0, "3915": 946004800.0, "3920": 974854784.0, "3925": 961222144.0, "3930": 976777920.0, "3935": 978900992.0, "3940": 950279616.0, "3945": 960260992.0, "3950": 974173440.0, "3955": 972954688.0, "3960": 974065728.0, "3965": 950861568.0, "3970": 980655744.0, "3975": 960713920.0, "3980": 977517760.0, "3985": 962937856.0, "3990": 972730368.0, "3995": 953677056.0, "4000": 974976256.0, "4005": 971633408.0, "4010": 978396928.0, "4015": 971467904.0, "4020": 950298240.0, "4025": 968421120.0, "4030": 997923008.0, "4035": 978547840.0, "4040": 959800128.0, "4045": 939652096.0, "4050": 944706176.0, "4055": 980972800.0, "4060": 977684224.0, "4065": 975706752.0, "4070": 942152384.0, "4075": 945751936.0, "4080": 988740864.0, "4085": 962090880.0, "4090": 983356928.0, "4095": 986933504.0, "4100": 957188032.0, "4105": 954060864.0, "4110": 966474624.0, "4115": 976029056.0, "4120": 983520064.0, "4125": 960028608.0, "4130": 967265536.0, "4135": 971430848.0, "4140": 963158400.0, "4145": 956174592.0, "4150": 960281984.0, "4155": 946192000.0, "4160": 968427008.0, "4165": 970313664.0, "4170": 971946112.0, "4175": 955862016.0, "4180": 940960256.0, "4185": 968293376.0, "4190": 967979008.0, "4195": 989198208.0, "4200": 962665472.0, "4205": 960582912.0, "4210": 971850496.0, "4215": 974144512.0, "4220": 981120128.0, "4225": 975220544.0, "4230": 952676480.0, "4235": 958456768.0, "4240": 966762496.0, "4245": 961720320.0, "4250": 965821184.0, "4255": 958241472.0, "4260": 949544704.0, "4265": 964125824.0, "4270": 978384192.0, "4275": 975396608.0, "4280": 962647168.0, "4285": 951414656.0, "4290": 980006464.0, "4295": 968817920.0, "4300": 958261632.0, "4305": 966765952.0, "4310": 939589632.0, "4315": 949379840.0, "4320": 984498048.0, "4325": 982535424.0, "4330": 974722240.0, "4335": 949411200.0, "4340": 959466368.0, "4345": 956582656.0, "4350": 979887168.0, "4355": 968833216.0, "4360": 966302592.0, "4365": 941183744.0, "4370": 969446400.0, "4375": 972946560.0, "4380": 966135104.0, "4385": 972011392.0, "4390": 954129728.0, "4395": 951844480.0, "4400": 973640448.0, "4405": 972441536.0, "4410": 967883456.0, "4415": 958935296.0, "4420": 960754176.0, "4425": 976378496.0, "4430": 965894272.0, "4435": 975864832.0, "4440": 962318848.0, "4445": 954800512.0, "4450": 978306560.0, "4455": 960217472.0, "4460": 968556544.0, "4465": 968818176.0, "4470": 944090368.0, "4475": 951985344.0, "4480": 978813312.0, "4485": 968291776.0, "4490": 957012928.0, "4495": 938938240.0, "4500": 953228544.0, "4505": 977166976.0, "4510": 978663168.0, "4515": 962467968.0, "4520": 958812416.0, "4525": 958089472.0, "4530": 964404288.0, "4535": 976579840.0, "4540": 976810752.0, "4545": 970120192.0, "4550": 953162496.0, "4555": 959540352.0, "4560": 972472576.0, "4565": 973406208.0, "4570": 978884160.0, "4575": 957749504.0, "4580": 963142016.0, "4585": 957332736.0, "4590": 986493888.0, "4595": 960140544.0, "4600": 952217600.0, "4605": 959237120.0, "4610": 963698432.0, "4615": 957914816.0, "4620": 960206336.0, "4625": 973786624.0, "4630": 944507584.0, "4635": 977000960.0, "4640": 960237184.0, "4645": 981955840.0, "4650": 962374912.0, "4655": 939421568.0, "4660": 963976064.0, "4665": 962541184.0, "4670": 976647744.0, "4675": 963453440.0, "4680": 957476224.0, "4685": 949612992.0, "4690": 956925952.0, "4695": 969914496.0, "4700": 961201344.0, "4705": 970769664.0, "4710": 934456064.0, "4715": 970426560.0, "4720": 966299904.0, "4725": 980266176.0, "4730": 965863168.0, "4735": 937858560.0, "4740": 960087936.0, "4745": 975985280.0, "4750": 967947392.0, "4755": 984923520.0, "4760": 959099520.0, "4765": 955175424.0, "4770": 958548096.0, "4775": 991105024.0, "4780": 976782016.0, "4785": 967521664.0, "4790": 943682624.0, "4795": 955762176.0, "4800": 967672448.0, "4805": 976431360.0, "4810": 965111936.0, "4815": 957921792.0, "4820": 973931968.0, "4825": 961468672.0, "4830": 962552576.0, "4835": 972519808.0, "4840": 948889600.0, "4845": 965740160.0, "4850": 960280640.0, "4855": 964155520.0, "4860": 963038848.0, "4865": 967556096.0, "4870": 957206784.0, "4875": 983599488.0, "4880": 957109248.0, "4885": 977052224.0, "4890": 959789440.0, "4895": 942188544.0, "4900": 973688256.0, "4905": 975152128.0, "4910": 969183616.0, "4915": 970176512.0, "4920": 941193856.0, "4925": 954809152.0, "4930": 977082688.0, "4935": 963810752.0, "4940": 972659840.0, "4945": 960080576.0, "4950": 940855552.0, "4955": 968110784.0, "4960": 976874688.0, "4965": 961033088.0, "4970": 958689280.0, "4975": 933824448.0, "4980": 960873984.0, "4985": 962999808.0, "4990": 963540096.0, "4995": 986307712.0, "5000": 940715200.0, "5005": 968960896.0, "5010": 970321408.0, "5015": 965233920.0, "5020": 966764160.0, "5025": 949496512.0, "5030": 953564096.0, "5035": 967340800.0, "5040": 955739264.0, "5045": 969203008.0, "5050": 953393024.0, "5055": 954910720.0, "5060": 963091840.0, "5065": 952219520.0, "5070": 973683456.0, "5075": 978643712.0, "5080": 942915712.0, "5085": 965887808.0, "5090": 972943104.0, "5095": 964484352.0, "5100": 958355840.0, "5105": 965342592.0, "5110": 950524928.0, "5115": 972396800.0, "5120": 960499584.0, "5125": 969767808.0, "5130": 938808256.0, "5135": 943709952.0, "5140": 969926400.0, "5145": 968603136.0, "5150": 970577024.0, "5155": 972696320.0, "5160": 926576640.0, "5165": 961591936.0, "5170": 966824832.0, "5175": 966086400.0, "5180": 963704640.0, "5185": 930839552.0, "5190": 949792512.0, "5195": 972422464.0, "5200": 973807872.0, "5205": 968262784.0, "5210": 960529216.0, "5215": 928730816.0, "5220": 979248128.0, "5225": 984779584.0, "5230": 975037312.0, "5235": 975117248.0, "5240": 944363200.0, "5245": 970823936.0, "5250": 972439424.0, "5255": 966945152.0, "5260": 976728448.0, "5265": 942271616.0, "5270": 969238400.0, "5275": 970110208.0, "5280": 962826048.0, "5285": 964110208.0, "5290": 932563840.0, "5295": 951751296.0, "5300": 975649664.0, "5305": 951891648.0, "5310": 968053376.0, "5315": 955830400.0, "5320": 950897856.0, "5325": 973052672.0, "5330": 967782400.0, "5335": 967572736.0, "5340": 966528896.0, "5345": 962937344.0, "5350": 978875904.0, "5355": 972191360.0, "5360": 963806848.0, "5365": 965259392.0, "5370": 947850240.0, "5375": 948784192.0, "5380": 967162688.0, "5385": 980479296.0, "5390": 965300608.0, "5395": 955098240.0, "5400": 948261888.0, "5405": 974342144.0, "5410": 967819776.0, "5415": 976149248.0, "5420": 967383168.0, "5425": 937375808.0, "5430": 963834112.0, "5435": 971825536.0, "5440": 969029888.0, "5445": 957427648.0, "5450": 919394688.0, "5455": 952053056.0, "5460": 962425216.0, "5465": 978843520.0, "5470": 980920448.0, "5475": 941623936.0, "5480": 955756032.0, "5485": 964890240.0, "5490": 975949696.0, "5495": 962805760.0, "5500": 971168256.0, "5505": 956940416.0, "5510": 968550272.0, "5515": 945418496.0, "5520": 963072192.0, "5525": 975784960.0, "5530": 936611136.0, "5535": 970585344.0, "5540": 960295232.0, "5545": 971999488.0, "5550": 967873344.0, "5555": 955846720.0, "5560": 954460096.0, "5565": 968703872.0, "5570": 945188480.0, "5575": 960527744.0, "5580": 960624704.0, "5585": 959532672.0, "5590": 977681792.0, "5595": 975232640.0, "5600": 962960576.0, "5605": 964164352.0, "5610": 943157504.0, "5615": 966504512.0, "5620": 963223936.0, "5625": 982325056.0, "5630": 976025856.0, "5635": 957261504.0, "5640": 951443648.0, "5645": 967781888.0, "5650": 979046656.0, "5655": 983486336.0, "5660": 956445184.0, "5665": 953464128.0, "5670": 966061120.0, "5675": 967702144.0, "5680": 978586624.0, "5685": 962008576.0, "5690": 935823488.0, "5695": 963689344.0, "5700": 952525248.0, "5705": 974463616.0, "5710": 971231168.0, "5715": 946086016.0, "5720": 974980032.0, "5725": 967463168.0, "5730": 978476288.0, "5735": 964844736.0, "5740": 943575232.0, "5745": 971143168.0, "5750": 981914624.0, "5755": 956730112.0, "5760": 963635840.0, "5765": 957732224.0, "5770": 955711296.0, "5775": 970774784.0, "5780": 962812032.0, "5785": 970539776.0, "5790": 974654208.0, "5795": 949741312.0, "5800": 965950976.0, "5805": 968725888.0, "5810": 976109824.0, "5815": 970066176.0, "5820": 936352384.0, "5825": 969328192.0, "5830": 977687808.0, "5835": 974980928.0, "5840": 963062144.0, "5845": 968703104.0, "5850": 942971520.0, "5855": 975999424.0, "5860": 979586624.0, "5865": 978395520.0, "5870": 968775680.0, "5875": 942340608.0, "5880": 964669824.0, "5885": 974771072.0, "5890": 972536704.0, "5895": 965705856.0, "5900": 941287168.0, "5905": 961841600.0, "5910": 958532352.0, "5915": 968100160.0, "5920": 977603456.0, "5925": 959444608.0, "5930": 946798592.0, "5935": 952395776.0, "5940": 977588288.0, "5945": 984744768.0, "5950": 980615232.0, "5955": 935081472.0, "5960": 961687424.0, "5965": 965939328.0, "5970": 970661952.0, "5975": 961920512.0, "5980": 958124032.0, "5985": 964514624.0, "5990": 973467136.0, "5995": 955772416.0, "6000": 955669888.0, "6005": 961478720.0, "6010": 952744256.0, "6015": 974379648.0, "6020": 978154624.0, "6025": 972141696.0, "6030": 955398592.0, "6035": 946880896.0, "6040": 962580608.0, "6045": 983767936.0, "6050": 956662912.0, "6055": 963544704.0, "6060": 945865920.0, "6065": 958456064.0, "6070": 978428800.0, "6075": 978161088.0, "6080": 957456384.0, "6085": 947654272.0, "6090": 953659200.0, "6095": 964709376.0, "6100": 980095936.0, "6105": 971074752.0, "6110": 961828224.0, "6115": 943667968.0, "6120": 968723520.0, "6125": 960777728.0, "6130": 983979328.0, "6135": 961105408.0, "6140": 958684480.0, "6145": 971213056.0, "6150": 968517632.0, "6155": 975113984.0, "6160": 977265536.0, "6165": 952767872.0, "6170": 951326592.0, "6175": 963530560.0, "6180": 969536640.0, "6185": 966516736.0, "6190": 963682112.0, "6195": 947105856.0, "6200": 969230848.0, "6205": 967100096.0, "6210": 959352384.0, "6215": 973061120.0, "6220": 936620928.0, "6225": 978629632.0, "6230": 975851008.0, "6235": 971778176.0, "6240": 965956864.0, "6245": 956139776.0, "6250": 956707072.0, "6255": 973315008.0, "6260": 978914816.0, "6265": 974979072.0, "6270": 958878848.0, "6275": 963824064.0, "6280": 973312128.0, "6285": 966184576.0, "6290": 971298816.0, "6295": 987699904.0, "6300": 947701056.0, "6305": 964870080.0, "6310": 979226624.0, "6315": 978307584.0, "6320": 972080768.0, "6325": 923137536.0, "6330": 959022784.0, "6335": 974903552.0, "6340": 984825152.0, "6345": 967069760.0, "6350": 944819712.0, "6355": 958161920.0, "6360": 972798080.0, "6365": 972392192.0, "6370": 959115008.0, "6375": 967055360.0, "6380": 951425152.0, "6385": 973597952.0, "6390": 965502592.0, "6395": 975388928.0, "6400": 984070144.0, "6405": 944016128.0, "6410": 977448256.0, "6415": 971746240.0, "6420": 956628224.0, "6425": 960888768.0, "6430": 957427456.0, "6435": 960346304.0, "6440": 968984128.0, "6445": 973406336.0, "6450": 974268800.0, "6455": 962375168.0, "6460": 941364096.0, "6465": 974263040.0, "6470": 980219712.0, "6475": 960861952.0, "6480": 967717888.0, "6485": 948647616.0, "6490": 970933056.0, "6495": 988578304.0, "6500": 980555840.0, "6505": 972183808.0, "6510": 951796928.0, "6515": 957996160.0, "6520": 978926080.0, "6525": 979186176.0, "6530": 973382272.0, "6535": 968037696.0, "6540": 950455808.0, "6545": 966356224.0, "6550": 979446080.0, "6555": 967071104.0, "6560": 975526784.0, "6565": 949561536.0, "6570": 951958208.0, "6575": 962492928.0, "6580": 975792192.0, "6585": 979410496.0, "6590": 949175936.0, "6595": 961422848.0, "6600": 961133056.0, "6605": 961670400.0, "6610": 985151488.0, "6615": 959678848.0, "6620": 944466688.0, "6625": 971029504.0, "6630": 971405760.0, "6635": 963920128.0, "6640": 959698176.0, "6645": 951054464.0, "6650": 978800128.0, "6655": 965999232.0, "6660": 968643392.0, "6665": 968929280.0, "6670": 932690048.0, "6675": 970736768.0, "6680": 969200640.0, "6685": 958747520.0, "6690": 956166912.0, "6695": 955598592.0, "6700": 962320384.0, "6705": 979218176.0, "6710": 970841984.0, "6715": 966999616.0, "6720": 974273536.0, "6725": 941645568.0, "6730": 979351360.0, "6735": 994439936.0, "6740": 976650880.0, "6745": 974640640.0, "6750": 939272064.0, "6755": 977883328.0, "6760": 969930432.0, "6765": 978504320.0, "6770": 975415232.0, "6775": 943524160.0, "6780": 947062208.0, "6785": 975199232.0, "6790": 960729984.0, "6795": 975887168.0, "6800": 973443200.0, "6805": 946941120.0, "6810": 958561536.0, "6815": 970560832.0, "6820": 977681024.0, "6825": 969132160.0, "6830": 950460288.0, "6835": 981514432.0, "6840": 983366080.0, "6845": 948775552.0, "6850": 965699712.0, "6855": 954334848.0, "6860": 978927424.0, "6865": 983816064.0, "6870": 964977920.0, "6875": 978664064.0, "6880": 950285312.0, "6885": 958336256.0, "6890": 960325248.0, "6895": 965973568.0, "6900": 985150080.0, "6905": 968441984.0, "6910": 950195392.0, "6915": 970950656.0, "6920": 967263616.0, "6925": 965365120.0, "6930": 964372544.0, "6935": 951826880.0, "6940": 962836096.0, "6945": 986543104.0, "6950": 973485952.0, "6955": 964715520.0, "6960": 940065152.0, "6965": 974552192.0, "6970": 978652800.0, "6975": 985017472.0, "6980": 982778112.0, "6985": 959752320.0, "6990": 945422464.0, "6995": 987310208.0, "7000": 963009664.0, "7005": 963413120.0, "7010": 984971008.0, "7015": 945336704.0, "7020": 982957568.0, "7025": 968743040.0, "7030": 953315840.0, "7035": 982994688.0, "7040": 950446528.0, "7045": 955967040.0, "7050": 960083328.0, "7055": 963732864.0, "7060": 976911488.0, "7065": 967935552.0, "7070": 953732224.0, "7075": 956965120.0, "7080": 968976896.0, "7085": 966213248.0, "7090": 969459648.0, "7095": 960016256.0, "7100": 973810688.0, "7105": 973110016.0, "7110": 970278528.0, "7115": 958338560.0, "7120": 948768640.0, "7125": 963201536.0, "7130": 971102080.0, "7135": 964430592.0, "7140": 961484096.0, "7145": 930612736.0, "7150": 946205568.0, "7155": 990982144.0, "7160": 968749312.0, "7165": 957108864.0, "7170": 968313024.0, "7175": 955766656.0, "7180": 958354944.0, "7185": 984977152.0, "7190": 978660352.0, "7195": 973747456.0, "7200": 936297920.0, "7205": 957499456.0, "7210": 967015296.0, "7215": 969965440.0, "7220": 982170816.0, "7225": 929009408.0, "7230": 949732608.0, "7235": 966944256.0, "7240": 967163712.0, "7245": 967663168.0, "7250": 949798400.0, "7255": 957633664.0, "7260": 970605760.0, "7265": 974911488.0, "7270": 960167680.0, "7275": 959348864.0, "7280": 957318848.0, "7285": 977699712.0, "7290": 977372160.0, "7295": 963379264.0, "7300": 975708800.0, "7305": 964464832.0, "7310": 977847936.0, "7315": 967448576.0, "7320": 975081600.0, "7325": 967078912.0, "7330": 959888768.0, "7335": 964315776.0, "7340": 978180544.0, "7345": 968171008.0, "7350": 985013248.0, "7355": 960018176.0, "7360": 949260032.0, "7365": 973312512.0, "7370": 982323136.0, "7375": 963800192.0, "7380": 964468480.0, "7385": 949008896.0, "7390": 964179584.0, "7395": 958676480.0, "7400": 970365056.0, "7405": 987661952.0, "7410": 952435456.0, "7415": 951094912.0, "7420": 967128576.0, "7425": 982951424.0, "7430": 965749888.0, "7435": 973344000.0, "7440": 937633024.0, "7445": 968964416.0, "7450": 980498752.0, "7455": 971944192.0, "7460": 972784960.0, "7465": 940039936.0, "7470": 972626432.0, "7475": 958032960.0, "7480": 969480320.0, "7485": 962116672.0, "7490": 934059264.0, "7495": 957444608.0, "7500": 969511424.0, "7505": 970357632.0, "7510": 972798336.0, "7515": 980198016.0, "7520": 952079232.0, "7525": 970714560.0, "7530": 954433600.0, "7535": 972156608.0, "7540": 980149312.0, "7545": 959762304.0, "7550": 960636096.0, "7555": 960525824.0, "7560": 970859840.0, "7565": 955045632.0, "7570": 943035968.0, "7575": 965961472.0, "7580": 983059200.0, "7585": 979324672.0, "7590": 970082112.0, "7595": 950410432.0, "7600": 945883840.0, "7605": 982304768.0, "7610": 969670784.0, "7615": 988946304.0, "7620": 957921408.0, "7625": 941958464.0, "7630": 972042304.0, "7635": 985015552.0, "7640": 983913216.0, "7645": 968896896.0, "7650": 959393792.0, "7655": 963051008.0, "7660": 969727488.0, "7665": 978232896.0, "7670": 976323392.0, "7675": 976062592.0, "7680": 943277696.0, "7685": 960327552.0, "7690": 976064384.0, "7695": 982073088.0, "7700": 979640640.0, "7705": 940866560.0, "7710": 974836736.0, "7715": 980008320.0, "7720": 968217344.0, "7725": 961117504.0, "7730": 943583360.0, "7735": 968991488.0, "7740": 980401920.0, "7745": 964777472.0, "7750": 964296448.0, "7755": 960489024.0, "7760": 971146112.0, "7765": 971109504.0, "7770": 963099520.0, "7775": 982731776.0, "7780": 964777536.0, "7785": 960010752.0, "7790": 969020672.0, "7795": 969093248.0, "7800": 972249344.0, "7805": 969105920.0, "7810": 946365504.0, "7815": 963135808.0, "7820": 974454784.0, "7825": 964041600.0, "7830": 957617792.0, "7835": 949820352.0, "7840": 957932736.0, "7845": 953599104.0, "7850": 979624448.0, "7855": 987223552.0, "7860": 947657984.0, "7865": 949892608.0, "7870": 965543808.0, "7875": 976304128.0, "7880": 968983104.0, "7885": 969715584.0, "7890": 952691584.0, "7895": 975051840.0, "7900": 964665088.0, "7905": 964590912.0, "7910": 965388928.0, "7915": 943426944.0, "7920": 951386624.0, "7925": 970163200.0, "7930": 965423744.0, "7935": 985062784.0, "7940": 965396352.0, "7945": 951263872.0, "7950": 962160832.0, "7955": 980512192.0, "7960": 964193408.0, "7965": 952750848.0, "7970": 952280960.0, "7975": 970003904.0, "7980": 965943808.0, "7985": 960074816.0, "7990": 968522432.0, "7995": 947320384.0, "8000": 963013248.0, "8005": 981047040.0, "8010": 966316736.0, "8015": 983533760.0, "8020": 961360768.0, "8025": 965629696.0, "8030": 958851904.0, "8035": 975494400.0, "8040": 961355776.0, "8045": 949108096.0, "8050": 959722304.0, "8055": 980170560.0, "8060": 970010240.0, "8065": 958664576.0, "8070": 964964480.0, "8075": 942806912.0, "8080": 967017600.0, "8085": 967515264.0, "8090": 984019072.0, "8095": 989546368.0, "8100": 967157632.0, "8105": 944375296.0, "8110": 969276672.0, "8115": 986359552.0, "8120": 975144000.0, "8125": 964411392.0, "8130": 966665984.0, "8135": 968621376.0, "8140": 964940032.0, "8145": 996095616.0, "8150": 973534656.0, "8155": 939410560.0, "8160": 964990656.0, "8165": 973062144.0, "8170": 968963584.0, "8175": 962471680.0, "8180": 936188800.0, "8185": 962871040.0, "8190": 968241408.0, "8195": 978060928.0, "8200": 956798208.0, "8205": 960788608.0, "8210": 946912640.0, "8215": 982705856.0, "8220": 988418624.0, "8225": 966611776.0, "8230": 963107264.0, "8235": 934844160.0, "8240": 980808832.0, "8245": 977013248.0, "8250": 964524800.0, "8255": 977911744.0, "8260": 957658112.0, "8265": 983584256.0, "8270": 953573184.0, "8275": 974800640.0, "8280": 975041792.0, "8285": 953655168.0, "8290": 940576064.0, "8295": 981949440.0, "8300": 973544320.0, "8305": 978676224.0, "8310": 951809792.0, "8315": 938193216.0, "8320": 977903744.0, "8325": 968795264.0, "8330": 989983232.0, "8335": 975895936.0, "8340": 948017984.0, "8345": 971613312.0, "8350": 970832768.0, "8355": 975569024.0, "8360": 979946880.0, "8365": 933543680.0, "8370": 966227456.0, "8375": 980274304.0, "8380": 965729024.0, "8385": 973246464.0, "8390": 963535296.0, "8395": 951937536.0, "8400": 972865408.0, "8405": 952362624.0, "8410": 961355264.0, "8415": 966175616.0, "8420": 942404928.0, "8425": 968671296.0, "8430": 961774848.0, "8435": 966619520.0, "8440": 969991168.0, "8445": 953524352.0, "8450": 984989952.0, "8455": 991070720.0, "8460": 969161856.0, "8465": 967884544.0, "8470": 963893760.0, "8475": 943008128.0, "8480": 987667200.0, "8485": 980218496.0, "8490": 992391808.0, "8495": 972319872.0, "8500": 951896576.0, "8505": 983952704.0, "8510": 974457216.0, "8515": 969563264.0, "8520": 961668672.0, "8525": 945328896.0, "8530": 984857664.0, "8535": 978657856.0, "8540": 968429696.0, "8545": 968413184.0, "8550": 942507840.0, "8555": 972446656.0, "8560": 958553216.0, "8565": 976230016.0, "8570": 975626048.0, "8575": 972369920.0, "8580": 932718592.0, "8585": 965402368.0, "8590": 979388160.0, "8595": 979351424.0, "8600": 983978240.0, "8605": 958345216.0, "8610": 983554688.0, "8615": 977405696.0, "8620": 963756544.0, "8625": 979885312.0, "8630": 943580800.0, "8635": 962524096.0, "8640": 973586112.0, "8645": 970513344.0, "8650": 969978304.0, "8655": 971464448.0, "8660": 944005312.0, "8665": 987088256.0, "8670": 960526336.0, "8675": 974746496.0, "8680": 962300288.0, "8685": 956283136.0, "8690": 979005696.0, "8695": 969298816.0, "8700": 972840704.0, "8705": 974221056.0, "8710": 947608192.0, "8715": 973552128.0, "8720": 957996288.0, "8725": 979542912.0, "8730": 986164992.0, "8735": 952769088.0, "8740": 941357312.0, "8745": 988254976.0, "8750": 972406016.0, "8755": 971958976.0, "8760": 965189056.0, "8765": 934950784.0, "8770": 986804544.0, "8775": 970046592.0, "8780": 967688576.0, "8785": 962590848.0, "8790": 948154368.0, "8795": 969972224.0, "8800": 971467456.0, "8805": 973842240.0, "8810": 983676992.0, "8815": 951496192.0, "8820": 940080128.0, "8825": 964986880.0, "8830": 981446528.0, "8835": 971656832.0, "8840": 979737344.0, "8845": 951272832.0, "8850": 987252096.0, "8855": 971422528.0, "8860": 962038208.0, "8865": 957224064.0, "8870": 945592192.0, "8875": 968985344.0, "8880": 984260480.0, "8885": 971123072.0, "8890": 970361472.0, "8895": 952923136.0, "8900": 962393728.0, "8905": 977356160.0, "8910": 982197184.0, "8915": 980744640.0, "8920": 967766912.0, "8925": 940582912.0, "8930": 970554048.0, "8935": 964544256.0, "8940": 978258944.0, "8945": 981998976.0, "8950": 946421888.0, "8955": 972681664.0, "8960": 973921792.0, "8965": 974120064.0, "8970": 966655936.0, "8975": 937370880.0, "8980": 952939904.0, "8985": 978074944.0, "8990": 967547072.0, "8995": 980675840.0, "9000": 952691328.0, "9005": 951140160.0, "9010": 975464576.0, "9015": 982987904.0, "9020": 959574464.0, "9025": 979055488.0, "9030": 953850880.0, "9035": 969057280.0, "9040": 978233088.0, "9045": 968536320.0, "9050": 983257472.0, "9055": 948082624.0, "9060": 956809472.0, "9065": 970196672.0, "9070": 967610176.0, "9075": 980402304.0, "9080": 952774144.0, "9085": 971884032.0, "9090": 963801984.0, "9095": 968484736.0, "9100": 974940288.0, "9105": 960280064.0, "9110": 948169984.0, "9115": 956527232.0, "9120": 985297728.0, "9125": 963394944.0, "9130": 958899840.0, "9135": 951915264.0, "9140": 967283136.0, "9145": 977446016.0, "9150": 987097024.0, "9155": 977066304.0, "9160": 958063104.0, "9165": 950416064.0, "9170": 988689024.0, "9175": 971729792.0, "9180": 967682304.0, "9185": 955225600.0, "9190": 956969088.0, "9195": 966090368.0, "9200": 969077632.0, "9205": 967564544.0, "9210": 984929024.0, "9215": 931745088.0, "9220": 949613056.0, "9225": 971292416.0, "9230": 971097984.0, "9235": 971771264.0, "9240": 959648576.0, "9245": 963473728.0, "9250": 961491072.0, "9255": 983400832.0, "9260": 979554048.0, "9265": 952867264.0, "9270": 949155648.0, "9275": 978710592.0, "9280": 978075072.0, "9285": 962841088.0, "9290": 979562624.0, "9295": 958820032.0, "9300": 965839488.0, "9305": 968469824.0, "9310": 973406912.0, "9315": 976365952.0, "9320": 948440640.0, "9325": 979542656.0, "9330": 977705216.0, "9335": 974960512.0, "9340": 960507648.0, "9345": 943243648.0, "9350": 952835456.0, "9355": 963267200.0, "9360": 960381184.0, "9365": 983744512.0, "9370": 982901632.0, "9375": 942093376.0, "9380": 983045376.0, "9385": 985615616.0, "9390": 972658176.0, "9395": 978847744.0, "9400": 938124800.0, "9405": 968672896.0, "9410": 981583104.0, "9415": 991871360.0, "9420": 960331392.0, "9425": 956964224.0, "9430": 938990080.0, "9435": 974695680.0, "9440": 959359616.0, "9445": 974077376.0, "9450": 961386048.0, "9455": 946206208.0, "9460": 978430528.0, "9465": 988213760.0, "9470": 963271552.0, "9475": 983884288.0, "9480": 931087104.0, "9485": 987454528.0, "9490": 963582528.0, "9495": 971974400.0, "9500": 982317824.0, "9505": 970212864.0, "9510": 964407040.0, "9515": 956606272.0, "9520": 948667712.0, "9525": 965385664.0, "9530": 958457408.0, "9535": 951344128.0, "9540": 954108160.0, "9545": 979198784.0, "9550": 955975808.0, "9555": 953296128.0, "9560": 958396160.0, "9565": 970409088.0, "9570": 977579392.0, "9575": 958776960.0, "9580": 963159680.0, "9585": 946394176.0, "9590": 948504832.0, "9595": 967056896.0, "9600": 984908800.0, "9605": 985093952.0, "9610": 943751936.0, "9615": 952777408.0, "9620": 981166720.0, "9625": 978353408.0, "9630": 969893632.0, "9635": 974832448.0, "9640": 939607040.0, "9645": 962511104.0, "9650": 971435392.0, "9655": 987213632.0, "9660": 963517440.0, "9665": 950305152.0, "9670": 966027264.0, "9675": 963370880.0, "9680": 965102272.0, "9685": 986587392.0, "9690": 939995264.0, "9695": 950398848.0, "9700": 975309568.0, "9705": 972761984.0, "9710": 967645568.0, "9715": 971613120.0, "9720": 940540416.0, "9725": 966086464.0, "9730": 973278016.0, "9735": 974260416.0, "9740": 971652864.0, "9745": 951258496.0, "9750": 979875328.0, "9755": 970305728.0, "9760": 968052864.0, "9765": 963860800.0, "9770": 952553472.0, "9775": 957051456.0, "9780": 970428160.0, "9785": 958725504.0, "9790": 961387584.0, "9795": 958509568.0, "9800": 949746944.0, "9805": 962062336.0, "9810": 978824576.0, "9815": 977226688.0, "9820": 982521664.0, "9825": 939388672.0, "9830": 969615168.0, "9835": 972929536.0, "9840": 971663616.0, "9845": 967123904.0, "9850": 947011200.0, "9855": 957548032.0, "9860": 987729536.0, "9865": 970151360.0, "9870": 990237120.0, "9875": 956841216.0, "9880": 931559296.0, "9885": 963396672.0, "9890": 972209216.0, "9895": 983391360.0, "9900": 956558720.0, "9905": 939071744.0, "9910": 978935296.0, "9915": 973456896.0, "9920": 944131328.0, "9925": 963003840.0, "9930": 947627264.0, "9935": 960708736.0, "9940": 965943808.0, "9945": 958838464.0, "9950": 964449280.0, "9955": 943721728.0, "9960": 966575488.0, "9965": 983584512.0, "9970": 966238592.0, "9975": 963673472.0, "9980": 980845504.0, "9985": 942624128.0, "9990": 976737536.0, "9995": 982545920.0, "10000": 971778048.0, "10005": 969985024.0, "10010": 944172480.0, "10015": 983376640.0, "10020": 978186112.0, "10025": 979783296.0, "10030": 971282176.0, "10035": 946816128.0, "10040": 950633472.0, "10045": 977916160.0, "10050": 985600256.0, "10055": 990333952.0, "10060": 958886144.0, "10065": 947505728.0, "10070": 966822336.0, "10075": 979426432.0, "10080": 971596992.0, "10085": 974217920.0, "10090": 943947072.0, "10095": 962670976.0, "10100": 972363200.0, "10105": 976305536.0, "10110": 972240768.0, "10115": 948723840.0, "10120": 962374272.0, "10125": 974257536.0, "10130": 980380288.0, "10135": 972433024.0, "10140": 957769344.0, "10145": 934071680.0, "10150": 973609856.0, "10155": 969790464.0, "10160": 961962816.0, "10165": 974641088.0, "10170": 944539072.0, "10175": 978869440.0, "10180": 983775808.0, "10185": 978846464.0, "10190": 955485248.0, "10195": 936806720.0, "10200": 988020992.0, "10205": 972775424.0, "10210": 966743296.0, "10215": 975507904.0, "10220": 948752768.0, "10225": 950524032.0, "10230": 975814720.0, "10235": 954213760.0, "10240": 969580352.0, "10245": 961860224.0, "10250": 936561344.0, "10255": 979455232.0, "10260": 964780480.0, "10265": 967134528.0, "10270": 968352384.0, "10275": 935771392.0, "10280": 969993088.0, "10285": 996121664.0, "10290": 979468672.0, "10295": 981344128.0, "10300": 952149824.0, "10305": 971914816.0, "10310": 959905472.0, "10315": 971362176.0, "10320": 985085056.0, "10325": 983314944.0, "10330": 934841600.0, "10335": 976405568.0, "10340": 956756096.0, "10345": 973044928.0, "10350": 984782336.0, "10355": 942468544.0, "10360": 961947520.0, "10365": 974306560.0, "10370": 980581120.0, "10375": 970159360.0, "10380": 961712896.0, "10385": 955283584.0, "10390": 990513024.0, "10395": 964710656.0, "10400": 960839488.0, "10405": 950091840.0, "10410": 955368384.0, "10415": 976147200.0, "10420": 967716096.0, "10425": 969617408.0, "10430": 964979456.0, "10435": 962841088.0, "10440": 972232960.0, "10445": 972258560.0, "10450": 975035328.0, "10455": 966492928.0, "10460": 948677888.0, "10465": 971977088.0, "10470": 972020032.0, "10475": 978974336.0, "10480": 997502144.0, "10485": 949509248.0, "10490": 935179520.0, "10495": 968924352.0, "10500": 978344960.0, "10505": 959172352.0, "10510": 951412864.0, "10515": 953965568.0, "10520": 971768832.0, "10525": 969708800.0, "10530": 970265344.0, "10535": 986403136.0, "10540": 946658560.0, "10545": 970177152.0, "10550": 969483072.0, "10555": 959779328.0, "10560": 975758848.0, "10565": 961376448.0, "10570": 969094592.0, "10575": 972903680.0, "10580": 960673408.0, "10585": 973677504.0, "10590": 951254016.0, "10595": 956503808.0, "10600": 967554176.0, "10605": 986761280.0, "10610": 966344576.0, "10615": 976109696.0, "10620": 941302656.0, "10625": 964987648.0, "10630": 967776896.0, "10635": 973236224.0, "10640": 974345728.0, "10645": 948583168.0, "10650": 965725696.0, "10655": 985625216.0, "10660": 976510208.0, "10665": 967187712.0, "10670": 954993216.0, "10675": 933952000.0, "10680": 986374784.0, "10685": 990980480.0, "10690": 963702528.0, "10695": 971799424.0, "10700": 949858816.0, "10705": 978167808.0, "10710": 968170048.0, "10715": 967180992.0, "10720": 966100288.0, "10725": 944195456.0, "10730": 980097472.0, "10735": 960897472.0, "10740": 971062144.0, "10745": 984408448.0, "10750": 981431296.0, "10755": 944270720.0, "10760": 969686400.0, "10765": 972260800.0, "10770": 973753920.0, "10775": 958490880.0, "10780": 949348736.0, "10785": 953276928.0, "10790": 970196480.0, "10795": 960534016.0, "10800": 971560128.0, "10805": 951313280.0, "10810": 974272512.0, "10815": 959362944.0, "10820": 971118400.0, "10825": 967174656.0, "10830": 956881920.0, "10835": 963175936.0, "10840": 970863232.0, "10845": 964107584.0, "10850": 958114176.0, "10855": 967547392.0, "10860": 950629120.0, "10865": 964270784.0, "10870": 983258752.0, "10875": 982410240.0, "10880": 958501824.0, "10885": 954811456.0, "10890": 972669440.0, "10895": 973446720.0, "10900": 970239424.0, "10905": 964950400.0, "10910": 938804096.0, "10915": 960431872.0, "10920": 983408000.0, "10925": 970055552.0, "10930": 967519104.0, "10935": 962824000.0, "10940": 953832064.0, "10945": 964926208.0, "10950": 971742592.0, "10955": 966363328.0, "10960": 971830976.0, "10965": 966640192.0, "10970": 983560512.0, "10975": 965134720.0, "10980": 974656640.0, "10985": 986584960.0, "10990": 950737344.0, "10995": 963452160.0, "11000": 985385216.0, "11005": 978512320.0, "11010": 971320192.0, "11015": 969543168.0, "11020": 947877120.0, "11025": 960056576.0, "11030": 978046592.0, "11035": 976005440.0, "11040": 986192128.0, "11045": 956783552.0, "11050": 973469440.0, "11055": 974383104.0, "11060": 962312128.0, "11065": 985456704.0, "11070": 949975360.0, "11075": 976382336.0, "11080": 972082304.0, "11085": 966899584.0, "11090": 976448896.0, "11095": 946696384.0, "11100": 965695104.0, "11105": 973979840.0, "11110": 980818816.0, "11115": 967850880.0, "11120": 956745344.0, "11125": 956648960.0, "11130": 975479936.0, "11135": 979181888.0, "11140": 964637440.0, "11145": 966267392.0, "11150": 936047360.0, "11155": 976259584.0, "11160": 983803264.0, "11165": 982264832.0, "11170": 977572736.0, "11175": 957875456.0, "11180": 962049216.0, "11185": 971572864.0, "11190": 979667328.0, "11195": 985257088.0, "11200": 982891392.0, "11205": 942081920.0, "11210": 984642624.0, "11215": 967409856.0, "11220": 983243648.0, "11225": 961495872.0, "11230": 953190528.0, "11235": 981572352.0, "11240": 977461760.0, "11245": 965768768.0, "11250": 969196288.0, "11255": 959700992.0, "11260": 979838656.0, "11265": 963705856.0, "11270": 981070528.0, "11275": 967679616.0, "11280": 955758592.0, "11285": 953271296.0, "11290": 956046720.0, "11295": 968309120.0, "11300": 962673920.0, "11305": 958748416.0, "11310": 946218944.0, "11315": 983061568.0, "11320": 964829952.0, "11325": 980858048.0, "11330": 975367168.0, "11335": 952199936.0, "11340": 970535360.0, "11345": 969853184.0, "11350": 981433280.0, "11355": 982250304.0, "11360": 941189888.0, "11365": 970665600.0, "11370": 978499456.0, "11375": 974872576.0, "11380": 968134784.0, "11385": 958587392.0, "11390": 938157568.0, "11395": 977236992.0, "11400": 973515136.0, "11405": 961225216.0, "11410": 966096192.0, "11415": 929165312.0, "11420": 964412800.0, "11425": 981073408.0, "11430": 978465216.0, "11435": 969859328.0, "11440": 945061120.0, "11445": 975107200.0, "11450": 984245248.0, "11455": 971279232.0, "11460": 964864192.0, "11465": 959856640.0, "11470": 954970496.0, "11475": 972494912.0, "11480": 956248576.0, "11485": 977237248.0, "11490": 986332160.0, "11495": 958962112.0, "11500": 969461376.0, "11505": 963878400.0, "11510": 976432896.0, "11515": 977388352.0, "11520": 953804416.0, "11525": 975831936.0, "11530": 976613248.0, "11535": 979406080.0, "11540": 974288256.0, "11545": 953484544.0, "11550": 953061184.0, "11555": 981856000.0, "11560": 984606528.0, "11565": 965181696.0, "11570": 966290816.0, "11575": 951658816.0, "11580": 975914752.0, "11585": 977316736.0, "11590": 968897408.0, "11595": 976658368.0, "11600": 946304896.0, "11605": 973201152.0, "11610": 982273600.0, "11615": 971911936.0, "11620": 968823168.0, "11625": 949566144.0, "11630": 937309312.0, "11635": 973593600.0, "11640": 981026240.0, "11645": 979959424.0, "11650": 971634816.0, "11655": 955835904.0, "11660": 980527872.0, "11665": 958142272.0, "11670": 982520832.0, "11675": 972255616.0, "11680": 956398080.0, "11685": 982870464.0, "11690": 968762624.0, "11695": 967596288.0, "11700": 973819968.0, "11705": 955953920.0, "11710": 964548352.0, "11715": 983282880.0, "11720": 983845120.0, "11725": 965156032.0, "11730": 955363392.0, "11735": 942769536.0, "11740": 973890560.0, "11745": 971150656.0, "11750": 961698688.0, "11755": 962872064.0, "11760": 950034688.0, "11765": 983656576.0, "11770": 984529088.0, "11775": 975365504.0, "11780": 985393792.0, "11785": 947039616.0, "11790": 972346304.0, "11795": 969823488.0, "11800": 973104384.0, "11805": 986497920.0, "11810": 966928000.0, "11815": 955346560.0, "11820": 973459456.0, "11825": 970671424.0, "11830": 974810432.0, "11835": 961655424.0, "11840": 943915648.0, "11845": 980657472.0, "11850": 974320064.0, "11855": 977637056.0, "11860": 971502592.0, "11865": 938509952.0, "11870": 940151488.0, "11875": 989951232.0, "11880": 972416768.0, "11885": 962889280.0, "11890": 970098176.0, "11895": 965160960.0, "11900": 979258432.0, "11905": 961678720.0, "11910": 983543936.0, "11915": 989710464.0, "11920": 944599488.0, "11925": 993353536.0, "11930": 964681728.0, "11935": 963454272.0, "11940": 976904640.0, "11945": 944602688.0, "11950": 977625984.0, "11955": 979215040.0, "11960": 972337408.0, "11965": 976176576.0, "11970": 963209344.0, "11975": 963277504.0, "11980": 977745600.0, "11985": 953135616.0, "11990": 968791616.0, "11995": 965169408.0, "12000": 958565248.0, "12005": 974608704.0, "12010": 979281792.0, "12015": 972163520.0, "12020": 973478656.0, "12025": 934679104.0, "12030": 968964480.0, "12035": 984459328.0, "12040": 977642880.0, "12045": 981830656.0, "12050": 931288064.0, "12055": 938631872.0, "12060": 974558464.0, "12065": 965858176.0, "12070": 968068032.0, "12075": 950014976.0, "12080": 953539456.0, "12085": 972453056.0, "12090": 963919936.0, "12095": 963443008.0, "12100": 976787328.0, "12105": 950458304.0, "12110": 972070912.0, "12115": 968115264.0, "12120": 986617152.0, "12125": 980687040.0, "12130": 941848000.0, "12135": 954827520.0, "12140": 976076096.0, "12145": 979183424.0, "12150": 979743232.0, "12155": 961879616.0, "12160": 946605632.0, "12165": 968601600.0, "12170": 964511552.0, "12175": 967827136.0, "12180": 975152192.0, "12185": 953058240.0, "12190": 988557888.0, "12195": 970825920.0, "12200": 965056000.0, "12205": 969050624.0, "12210": 939428096.0, "12215": 997057856.0, "12220": 970394432.0, "12225": 979864576.0, "12230": 980493632.0, "12235": 950568512.0, "12240": 963527616.0, "12245": 965876608.0, "12250": 976794944.0, "12255": 968276992.0, "12260": 983937728.0, "12265": 931992576.0, "12270": 966666816.0, "12275": 979886848.0, "12280": 977237888.0, "12285": 970714816.0, "12290": 929624064.0, "12295": 976836672.0, "12300": 985817984.0, "12305": 970138496.0, "12310": 986106624.0, "12315": 936140544.0, "12320": 958025408.0, "12325": 966648512.0, "12330": 968443392.0, "12335": 963752320.0, "12340": 957778368.0, "12345": 944506816.0, "12350": 967021504.0, "12355": 975942976.0, "12360": 979030592.0, "12365": 964647360.0, "12370": 948744768.0, "12375": 963974336.0, "12380": 964800512.0, "12385": 973078400.0, "12390": 961563520.0, "12395": 961740864.0, "12400": 975370688.0, "12405": 976192000.0, "12410": 953891840.0, "12415": 962747776.0, "12420": 944053312.0, "12425": 949260608.0, "12430": 972526528.0, "12435": 969239936.0, "12440": 962018304.0, "12445": 951987392.0, "12450": 947452992.0, "12455": 981519744.0, "12460": 974198784.0, "12465": 954266112.0, "12470": 981008320.0, "12475": 958668352.0, "12480": 966925632.0, "12485": 978329792.0, "12490": 974317376.0, "12495": 969742656.0, "12500": 961279872.0, "12505": 943290304.0, "12510": 961002304.0, "12515": 969652928.0, "12520": 973507008.0, "12525": 972145600.0, "12530": 944744512.0, "12535": 976635584.0, "12540": 965806848.0, "12545": 971935296.0, "12550": 969648832.0, "12555": 941153728.0, "12560": 964396224.0, "12565": 947301248.0, "12570": 974334208.0, "12575": 963006336.0, "12580": 958128448.0, "12585": 964338304.0, "12590": 965973504.0, "12595": 978928128.0, "12600": 981886272.0, "12605": 948892992.0, "12610": 937684992.0, "12615": 961993024.0, "12620": 961313088.0, "12625": 966212736.0, "12630": 970591488.0, "12635": 962027840.0, "12640": 978369856.0, "12645": 969272640.0, "12650": 969990976.0, "12655": 963738112.0, "12660": 932330944.0, "12665": 956866752.0, "12670": 985898496.0, "12675": 965530688.0, "12680": 960980032.0, "12685": 950824896.0, "12690": 945331584.0, "12695": 978400320.0, "12700": 985119168.0, "12705": 958834112.0, "12710": 968639936.0, "12715": 956316992.0, "12720": 976710912.0, "12725": 965181440.0, "12730": 969345664.0, "12735": 986755648.0, "12740": 936964416.0, "12745": 970716096.0, "12750": 973947008.0, "12755": 979906368.0, "12760": 969880896.0, "12765": 941915328.0, "12770": 951706560.0, "12775": 951386880.0, "12780": 969166464.0, "12785": 955975488.0, "12790": 962833600.0, "12795": 952687040.0, "12800": 962692288.0, "12805": 972811648.0, "12810": 973658688.0, "12815": 952614912.0, "12820": 941215040.0, "12825": 966757696.0, "12830": 998611904.0, "12835": 976905792.0, "12840": 961832512.0, "12845": 941553024.0, "12850": 959002304.0, "12855": 961191168.0, "12860": 972028992.0, "12865": 977907200.0, "12870": 974585024.0, "12875": 955913856.0, "12880": 967675584.0, "12885": 981254464.0, "12890": 959949376.0, "12895": 969690816.0, "12900": 937965312.0, "12905": 968297472.0, "12910": 981888320.0, "12915": 975493696.0, "12920": 955796928.0, "12925": 949376896.0, "12930": 960241664.0, "12935": 990807104.0, "12940": 968153728.0, "12945": 975368704.0, "12950": 971630144.0, "12955": 955943104.0, "12960": 976926144.0, "12965": 961093312.0, "12970": 961969600.0, "12975": 959972672.0, "12980": 938003200.0, "12985": 963413056.0, "12990": 967858304.0, "12995": 975684160.0, "13000": 980060416.0, "13005": 954304128.0, "13010": 951638144.0, "13015": 964639616.0, "13020": 960579712.0, "13025": 979604032.0, "13030": 971955520.0, "13035": 955032960.0, "13040": 954238400.0, "13045": 972488384.0, "13050": 967829632.0, "13055": 967515648.0, "13060": 951367488.0, "13065": 970043584.0, "13070": 975204032.0, "13075": 961484544.0, "13080": 961150976.0, "13085": 944415488.0, "13090": 973751104.0, "13095": 980587136.0, "13100": 965335104.0, "13105": 974070208.0, "13110": 924704704.0, "13115": 973706688.0, "13120": 981022400.0, "13125": 975243584.0, "13130": 951363200.0, "13135": 944759936.0, "13140": 925842368.0, "13145": 990046144.0, "13150": 983399872.0, "13155": 976137984.0, "13160": 969677888.0, "13165": 952714368.0, "13170": 977235264.0, "13175": 982386112.0, "13180": 976815616.0, "13185": 974405504.0, "13190": 949417344.0, "13195": 964529408.0, "13200": 969661376.0, "13205": 983195904.0, "13210": 971546688.0, "13215": 947497536.0, "13220": 962338496.0, "13225": 964033216.0, "13230": 976047424.0, "13235": 977122368.0, "13240": 965944576.0, "13245": 936376960.0, "13250": 977705088.0, "13255": 960459328.0, "13260": 966587712.0, "13265": 973129152.0, "13270": 954513600.0, "13275": 959328832.0, "13280": 976962816.0, "13285": 974970688.0, "13290": 966694720.0, "13295": 942271424.0, "13300": 969229440.0, "13305": 982244864.0, "13310": 971951040.0, "13315": 968039296.0, "13320": 976581120.0, "13325": 943462976.0, "13330": 972600960.0, "13335": 980398464.0, "13340": 960269248.0, "13345": 978395968.0, "13350": 954946752.0, "13355": 968853696.0, "13360": 982665280.0, "13365": 957284608.0, "13370": 966744256.0, "13375": 937628288.0, "13380": 953521536.0, "13385": 980559744.0, "13390": 979147712.0, "13395": 982612288.0, "13400": 951785728.0, "13405": 934339264.0, "13410": 975949632.0, "13415": 973288896.0, "13420": 985150592.0, "13425": 966116160.0, "13430": 954388992.0, "13435": 980849984.0, "13440": 966707200.0, "13445": 966654592.0, "13450": 969838784.0, "13455": 951994752.0, "13460": 964976960.0, "13465": 974494016.0, "13470": 972140672.0, "13475": 969584704.0, "13480": 966415680.0, "13485": 966859072.0, "13490": 977761984.0, "13495": 960557632.0, "13500": 971015232.0, "13505": 964991680.0, "13510": 963358848.0, "13515": 989823168.0, "13520": 958611136.0, "13525": 976805952.0, "13530": 983633600.0, "13535": 931406784.0, "13540": 946745216.0, "13545": 972277440.0, "13550": 971011968.0, "13555": 981193536.0, "13560": 959590848.0, "13565": 964305600.0, "13570": 972566784.0, "13575": 977412672.0, "13580": 975746368.0, "13585": 963088064.0, "13590": 939766336.0, "13595": 983151936.0, "13600": 989848000.0, "13605": 968935488.0, "13610": 973448896.0, "13615": 945219136.0, "13620": 972624640.0, "13625": 971620864.0, "13630": 958887360.0, "13635": 985389760.0, "13640": 980786304.0, "13645": 961124928.0, "13650": 953463680.0, "13655": 965794304.0, "13660": 978308928.0, "13665": 970195776.0, "13670": 956588928.0, "13675": 977970112.0, "13680": 976622912.0, "13685": 968255168.0, "13690": 971582656.0, "13695": 944342848.0, "13700": 974531520.0, "13705": 973578816.0, "13710": 976070784.0, "13715": 950920320.0, "13720": 951790400.0, "13725": 972891328.0, "13730": 969675520.0, "13735": 962111552.0, "13740": 973339520.0, "13745": 974172608.0, "13750": 958279168.0, "13755": 977676608.0, "13760": 965060480.0, "13765": 968186176.0, "13770": 979094784.0, "13775": 928936640.0, "13780": 974094144.0, "13785": 981952000.0, "13790": 963654848.0, "13795": 979298368.0, "13800": 956024320.0, "13805": 967824000.0, "13810": 964034624.0, "13815": 981442432.0, "13820": 976323648.0, "13825": 958418624.0, "13830": 945817792.0, "13835": 974637824.0, "13840": 996537024.0, "13845": 967646272.0, "13850": 975120704.0, "13855": 933668160.0, "13860": 971723776.0, "13865": 976904512.0, "13870": 987168512.0, "13875": 978978368.0, "13880": 951348864.0, "13885": 975606720.0, "13890": 968536512.0, "13895": 945013952.0, "13900": 979471808.0, "13905": 966741824.0, "13910": 965687360.0, "13915": 977762176.0, "13920": 975809664.0, "13925": 980972544.0, "13930": 963049920.0, "13935": 956760064.0, "13940": 965576128.0, "13945": 965457792.0, "13950": 977971648.0, "13955": 977333440.0, "13960": 974525632.0, "13965": 974808896.0, "13970": 972724416.0, "13975": 971455168.0, "13980": 983308544.0, "13985": 968884608.0, "13990": 962529856.0, "13995": 980717568.0, "14000": 972247168.0, "14005": 973160960.0, "14010": 965989184.0, "14015": 942211904.0, "14020": 980680704.0, "14025": 977229120.0, "14030": 972227264.0, "14035": 971613760.0, "14040": 936449984.0, "14045": 968629696.0, "14050": 975017536.0, "14055": 978886848.0, "14060": 976674688.0, "14065": 928763392.0, "14070": 978253184.0, "14075": 979776320.0, "14080": 964984384.0, "14085": 969160896.0, "14090": 966112064.0, "14095": 950865088.0, "14100": 972209536.0, "14105": 971073792.0, "14110": 987498240.0, "14115": 967806720.0, "14120": 944600384.0, "14125": 973015424.0, "14130": 971102912.0, "14135": 978812672.0, "14140": 972571968.0, "14145": 947377792.0, "14150": 974346560.0, "14155": 979377536.0, "14160": 972831296.0, "14165": 972681024.0, "14170": 946903744.0, "14175": 949611968.0, "14180": 976773568.0, "14185": 972050496.0, "14190": 979868416.0, "14195": 973823424.0, "14200": 945287232.0, "14205": 980726592.0, "14210": 981399168.0, "14215": 970218368.0, "14220": 965550592.0, "14225": 956572608.0, "14230": 984026112.0, "14235": 984244224.0, "14240": 980549952.0, "14245": 969715904.0, "14250": 954038080.0, "14255": 941107264.0, "14260": 963469248.0, "14265": 988750912.0, "14270": 986631296.0, "14275": 968543616.0, "14280": 954382528.0, "14285": 962062336.0, "14290": 965773184.0, "14295": 965975936.0, "14300": 982712000.0, "14305": 936529344.0, "14310": 969217600.0, "14315": 976933824.0, "14320": 975459072.0, "14325": 980012800.0, "14330": 956847808.0, "14335": 962695360.0, "14340": 955736256.0, "14345": 967726848.0, "14350": 963249280.0, "14355": 946253120.0, "14360": 949435136.0, "14365": 972105408.0, "14370": 953574080.0, "14375": 956149376.0, "14380": 957414976.0, "14385": 951106752.0, "14390": 982025216.0, "14395": 970929408.0, "14400": 963463040.0, "14405": 969451200.0, "14410": 965232320.0, "14415": 963674432.0, "14420": 960306240.0, "14425": 976545600.0, "14430": 978892224.0, "14435": 970688512.0, "14440": 973328960.0, "14445": 980361344.0, "14450": 973283200.0, "14455": 960866880.0, "14460": 955855552.0, "14465": 934258944.0, "14470": 960492736.0, "14475": 966149184.0, "14480": 967106624.0, "14485": 975712064.0, "14490": 947118400.0, "14495": 959674048.0, "14500": 971364800.0, "14505": 966338432.0, "14510": 977503808.0, "14515": 942594112.0, "14520": 953515456.0, "14525": 971643136.0, "14530": 957168192.0, "14535": 979046016.0, "14540": 954386496.0, "14545": 940196096.0, "14550": 988357376.0, "14555": 971921344.0, "14560": 973568832.0, "14565": 969347264.0, "14570": 948500032.0, "14575": 966111360.0, "14580": 966160320.0, "14585": 976657280.0, "14590": 975167232.0, "14595": 945736000.0, "14600": 958379968.0, "14605": 968413696.0, "14610": 953982144.0, "14615": 967615168.0, "14620": 952971072.0, "14625": 963427968.0, "14630": 971352128.0, "14635": 976461248.0, "14640": 978118848.0, "14645": 974367232.0, "14650": 946909952.0, "14655": 977294784.0, "14660": 975102784.0, "14665": 953914944.0, "14670": 970402624.0, "14675": 952297472.0, "14680": 970076672.0, "14685": 971632960.0, "14690": 967223104.0, "14695": 949121920.0, "14700": 934701312.0, "14705": 958877120.0, "14710": 978267584.0, "14715": 981875520.0, "14720": 967315264.0, "14725": 962249856.0, "14730": 937900928.0, "14735": 954838848.0, "14740": 967118912.0, "14745": 987303232.0, "14750": 973917888.0, "14755": 944213632.0, "14760": 974132288.0, "14765": 970042048.0, "14770": 989082880.0, "14775": 982314688.0, "14780": 945437760.0, "14785": 958015360.0, "14790": 971123392.0, "14795": 974771776.0, "14800": 969298624.0, "14805": 949938304.0, "14810": 951464448.0, "14815": 966450048.0, "14820": 968305472.0, "14825": 964218752.0, "14830": 956750912.0, "14835": 946355904.0, "14840": 967377408.0, "14845": 980655680.0, "14850": 953438400.0, "14855": 960567488.0, "14860": 967008448.0, "14865": 976763392.0, "14870": 967021120.0, "14875": 979814720.0, "14880": 957290048.0, "14885": 962229376.0, "14890": 950189248.0, "14895": 976166720.0, "14900": 971652160.0, "14905": 980249792.0, "14910": 971976064.0, "14915": 945792256.0, "14920": 952685120.0, "14925": 977241344.0, "14930": 976140224.0, "14935": 973680576.0, "14940": 939816704.0, "14945": 975466432.0, "14950": 977146624.0, "14955": 983379328.0, "14960": 955411584.0, "14965": 956142912.0, "14970": 943448512.0, "14975": 960750784.0, "14980": 977487040.0, "14985": 984206272.0, "14990": 966934592.0, "14995": 952593152.0, "15000": 976591680.0, "15005": 965374144.0, "15010": 967020480.0, "15015": 971395264.0, "15020": 944558528.0, "15025": 974029760.0, "15030": 970423104.0, "15035": 970091328.0, "15040": 971396544.0, "15045": 949953216.0, "15050": 948354368.0, "15055": 968193728.0, "15060": 970391872.0, "15065": 972696000.0, "15070": 953606336.0, "15075": 948999040.0, "15080": 970486720.0, "15085": 973647680.0, "15090": 971761600.0, "15095": 950338112.0, "15100": 958171904.0, "15105": 987425856.0, "15110": 980727552.0, "15115": 957037888.0, "15120": 959749952.0, "15125": 962356672.0, "15130": 968518912.0, "15135": 982609472.0, "15140": 956470464.0, "15145": 976497344.0, "15150": 938380608.0, "15155": 938606528.0, "15160": 968071936.0, "15165": 975085888.0, "15170": 959624384.0, "15175": 974371776.0, "15180": 945343168.0, "15185": 964351744.0, "15190": 963067328.0, "15195": 971119296.0, "15200": 985519872.0, "15205": 941408192.0, "15210": 963598272.0, "15215": 965366720.0, "15220": 983389632.0, "15225": 966414528.0, "15230": 949523072.0, "15235": 933473600.0, "15240": 979904704.0, "15245": 958179008.0, "15250": 964738112.0, "15255": 972057024.0, "15260": 955110848.0, "15265": 981713344.0, "15270": 958492736.0, "15275": 973592960.0, "15280": 967867072.0, "15285": 942845568.0, "15290": 980505856.0, "15295": 967674432.0, "15300": 956800192.0, "15305": 960885632.0, "15310": 932074624.0, "15315": 939401472.0, "15320": 969233920.0, "15325": 970276032.0, "15330": 955326656.0, "15335": 967296000.0, "15340": 951620544.0, "15345": 987830144.0, "15350": 973398208.0, "15355": 972395328.0, "15360": 968439488.0, "15365": 940874304.0, "15370": 955745344.0, "15375": 972376384.0, "15380": 972518592.0, "15385": 977431104.0, "15390": 946487168.0, "15395": 950450688.0, "15400": 961817984.0, "15405": 973391808.0, "15410": 964326528.0, "15415": 964179328.0, "15420": 961646528.0, "15425": 969668224.0, "15430": 973392320.0, "15435": 959190656.0, "15440": 956438272.0, "15445": 951850048.0, "15450": 979378368.0, "15455": 955254592.0, "15460": 971277056.0, "15465": 980072768.0, "15470": 966336896.0, "15475": 958639232.0, "15480": 967960960.0, "15485": 956319296.0, "15490": 956100544.0, "15495": 954554688.0, "15500": 968508416.0, "15505": 967880320.0, "15510": 966672384.0, "15515": 972130944.0, "15520": 963768512.0, "15525": 928841856.0, "15530": 977210816.0, "15535": 976577408.0, "15540": 974676288.0, "15545": 967685120.0, "15550": 950234880.0, "15555": 963396032.0, "15560": 983174144.0, "15565": 962390848.0, "15570": 969144192.0, "15575": 954410304.0, "15580": 959533184.0, "15585": 977611136.0, "15590": 986874304.0, "15595": 974501056.0, "15600": 951899264.0, "15605": 943950592.0, "15610": 973446912.0, "15615": 970559744.0, "15620": 953556288.0, "15625": 979771328.0, "15630": 950251520.0, "15635": 976511808.0, "15640": 963121344.0, "15645": 961854400.0, "15650": 984034816.0, "15655": 939635776.0, "15660": 979599552.0, "15665": 965694144.0, "15670": 971450752.0, "15675": 978767168.0, "15680": 952474816.0, "15685": 945667136.0, "15690": 967519296.0, "15695": 965502400.0, "15700": 975300224.0, "15705": 967108608.0, "15710": 949718592.0, "15715": 979176384.0, "15720": 960088448.0, "15725": 971187520.0, "15730": 959780608.0, "15735": 943153280.0, "15740": 985987904.0, "15745": 978668928.0, "15750": 982186048.0, "15755": 941256576.0, "15760": 958319360.0, "15765": 955351872.0, "15770": 979854464.0, "15775": 964280768.0, "15780": 952299648.0, "15785": 962452608.0, "15790": 934844544.0, "15795": 968499328.0, "15800": 974326144.0, "15805": 971420288.0, "15810": 974817472.0, "15815": 939242176.0, "15820": 973930048.0, "15825": 987836288.0, "15830": 968153024.0, "15835": 981094848.0, "15840": 932538048.0, "15845": 969748608.0, "15850": 964410112.0, "15855": 985217408.0, "15860": 957801280.0, "15865": 963624064.0, "15870": 941040384.0, "15875": 972175808.0, "15880": 965057472.0, "15885": 982073600.0, "15890": 968902400.0, "15895": 955749632.0, "15900": 992159360.0, "15905": 966077184.0, "15910": 980096960.0, "15915": 972014400.0, "15920": 948662144.0, "15925": 968077632.0, "15930": 983336064.0, "15935": 968342848.0, "15940": 971028160.0, "15945": 980933888.0, "15950": 964190848.0, "15955": 975166848.0, "15960": 974016640.0, "15965": 971359296.0, "15970": 968846336.0, "15975": 954414464.0, "15980": 975854208.0, "15985": 975825024.0, "15990": 985283584.0, "15995": 974119296.0, "16000": 969040640.0, "16005": 961206976.0, "16010": 973232960.0, "16015": 980823488.0, "16020": 964724096.0, "16025": 960423360.0, "16030": 951443520.0, "16035": 984190144.0, "16040": 960717184.0, "16045": 971084352.0, "16050": 955584448.0, "16055": 965946176.0, "16060": 964988672.0, "16065": 972284224.0, "16070": 968120384.0, "16075": 978491840.0, "16080": 950549888.0, "16085": 970541184.0, "16090": 973335744.0, "16095": 967243584.0, "16100": 981083136.0, "16105": 930045952.0, "16110": 961992384.0, "16115": 967942912.0, "16120": 970878336.0, "16125": 975837312.0, "16130": 960847424.0, "16135": 950750784.0, "16140": 977372160.0, "16145": 977334528.0, "16150": 982344768.0, "16155": 977310080.0, "16160": 942417600.0, "16165": 970480128.0, "16170": 963290816.0, "16175": 978512960.0, "16180": 976714240.0, "16185": 953227200.0, "16190": 951136256.0, "16195": 977969152.0, "16200": 975577920.0, "16205": 946649728.0, "16210": 962238720.0, "16215": 950769408.0, "16220": 978672000.0, "16225": 967705920.0, "16230": 972345280.0, "16235": 973349312.0, "16240": 940937856.0, "16245": 974909568.0, "16250": 979472512.0, "16255": 989964096.0, "16260": 969736896.0, "16265": 955014784.0, "16270": 963655680.0, "16275": 968986688.0, "16280": 991812160.0, "16285": 976337920.0, "16290": 940568512.0, "16295": 944058112.0, "16300": 957723968.0, "16305": 980701696.0, "16310": 975368384.0, "16315": 972531904.0, "16320": 942312448.0, "16325": 975084224.0, "16330": 978479040.0, "16335": 975352640.0, "16340": 988054336.0, "16345": 947281216.0, "16350": 962162880.0, "16355": 967681216.0, "16360": 957697088.0, "16365": 972235264.0, "16370": 934248320.0, "16375": 962336832.0, "16380": 984551680.0, "16385": 974644992.0, "16390": 976973184.0, "16395": 962976512.0, "16400": 951378048.0, "16405": 962078208.0, "16410": 963954048.0, "16415": 979884416.0, "16420": 967807552.0, "16425": 956077760.0, "16430": 959672256.0, "16435": 971814976.0, "16440": 970529600.0, "16445": 969300928.0, "16450": 946201984.0, "16455": 934052864.0, "16460": 981408704.0, "16465": 964657344.0, "16470": 979311104.0, "16475": 950238016.0, "16480": 962505984.0, "16485": 971450688.0, "16490": 965989568.0, "16495": 984134464.0, "16500": 990588608.0, "16505": 956996416.0, "16510": 967146368.0, "16515": 954302464.0, "16520": 971851200.0, "16525": 977432640.0, "16530": 931562304.0, "16535": 974210880.0, "16540": 958177472.0, "16545": 971191616.0, "16550": 974157760.0, "16555": 951232768.0, "16560": 959455104.0, "16565": 964970944.0, "16570": 970977280.0, "16575": 975099136.0, "16580": 971691584.0, "16585": 946506560.0, "16590": 964437312.0, "16595": 975018304.0, "16600": 980841920.0, "16605": 972555904.0, "16610": 946667072.0, "16615": 985657472.0, "16620": 959627328.0, "16625": 978544896.0, "16630": 973432192.0, "16635": 956677888.0, "16640": 967636992.0, "16645": 969854336.0, "16650": 967709632.0, "16655": 964946432.0, "16660": 975610240.0, "16665": 951671232.0, "16670": 972286080.0, "16675": 966241664.0, "16680": 960710080.0, "16685": 968944256.0, "16690": 945824832.0, "16695": 968735616.0, "16700": 970501248.0, "16705": 963141376.0, "16710": 972204224.0, "16715": 947077888.0, "16720": 963907392.0, "16725": 966642432.0, "16730": 974724416.0, "16735": 988913792.0, "16740": 952151168.0, "16745": 950031296.0, "16750": 949754432.0, "16755": 965206976.0, "16760": 968736448.0, "16765": 967450752.0, "16770": 925728640.0, "16775": 972659008.0, "16780": 972465984.0, "16785": 957074816.0, "16790": 960783808.0, "16795": 942238464.0, "16800": 950647232.0, "16805": 965461760.0, "16810": 960408640.0, "16815": 964222080.0, "16820": 952733760.0, "16825": 965585856.0, "16830": 972323456.0, "16835": 976195968.0, "16840": 965431296.0, "16845": 972732608.0, "16850": 962841728.0, "16855": 984073536.0, "16860": 975811328.0, "16865": 960597376.0, "16870": 953081408.0, "16875": 960695360.0, "16880": 976187840.0, "16885": 975185664.0, "16890": 965494400.0, "16895": 966077824.0, "16900": 951864768.0, "16905": 960978432.0, "16910": 965537472.0, "16915": 973649152.0, "16920": 957501248.0, "16925": 977342080.0, "16930": 940353792.0, "16935": 977382080.0, "16940": 969239872.0, "16945": 978051520.0, "16950": 961992832.0, "16955": 945311424.0, "16960": 970351680.0, "16965": 971732672.0, "16970": 987789824.0, "16975": 975307456.0, "16980": 947353600.0, "16985": 960118400.0, "16990": 972868352.0, "16995": 960377536.0, "17000": 958489792.0, "17005": 966254720.0, "17010": 938374464.0, "17015": 979242688.0, "17020": 964781312.0, "17025": 968010112.0, "17030": 971379136.0, "17035": 951261184.0, "17040": 965137536.0, "17045": 973158208.0, "17050": 974466752.0, "17055": 975962560.0, "17060": 943160064.0, "17065": 978030144.0, "17070": 978979904.0, "17075": 971242560.0, "17080": 971137024.0, "17085": 956106368.0, "17090": 963354880.0, "17095": 982625024.0, "17100": 960187712.0, "17105": 960568064.0, "17110": 962066752.0, "17115": 940310272.0, "17120": 970265792.0, "17125": 968144960.0, "17130": 962371264.0, "17135": 968930944.0, "17140": 951860160.0, "17145": 969830144.0, "17150": 962214848.0, "17155": 966697984.0, "17160": 976200512.0, "17165": 963607552.0, "17170": 963355520.0, "17175": 977630400.0, "17180": 958676352.0, "17185": 983477696.0, "17190": 974079168.0, "17195": 937096960.0, "17200": 962464448.0, "17205": 970780032.0, "17210": 982835392.0, "17215": 979404416.0, "17220": 937180480.0, "17225": 961188928.0, "17230": 949749568.0, "17235": 969804800.0, "17240": 981937088.0, "17245": 951925184.0, "17250": 967162432.0, "17255": 977212224.0, "17260": 962373888.0, "17265": 965028224.0, "17270": 956157952.0, "17275": 945101888.0, "17280": 975326528.0, "17285": 968847040.0, "17290": 954056000.0, "17295": 957349760.0, "17300": 935575296.0, "17305": 978993984.0, "17310": 964906112.0, "17315": 971438464.0, "17320": 956916800.0, "17325": 944864768.0, "17330": 954899392.0, "17335": 966046464.0, "17340": 966174400.0, "17345": 971078208.0, "17350": 952877184.0, "17355": 949192512.0, "17360": 981008256.0, "17365": 963467648.0, "17370": 967914816.0, "17375": 961641088.0, "17380": 944753152.0, "17385": 968006592.0, "17390": 966630656.0, "17395": 968834176.0, "17400": 977226816.0, "17405": 946346496.0, "17410": 962779136.0, "17415": 979275840.0, "17420": 963479872.0, "17425": 967058688.0, "17430": 952646080.0, "17435": 951889216.0, "17440": 963948416.0, "17445": 967661376.0, "17450": 963006784.0, "17455": 984289344.0, "17460": 943885248.0, "17465": 968975616.0, "17470": 975419904.0, "17475": 958466112.0, "17480": 974196608.0, "17485": 949910720.0, "17490": 970906560.0, "17495": 969819776.0, "17500": 963049920.0, "17505": 963461760.0, "17510": 954001920.0, "17515": 960545216.0, "17520": 982659520.0, "17525": 969968576.0, "17530": 966070976.0, "17535": 961268032.0, "17540": 953066880.0, "17545": 972288640.0, "17550": 987134912.0, "17555": 974504384.0, "17560": 956336896.0, "17565": 945768256.0, "17570": 966123904.0, "17575": 988054464.0, "17580": 977933312.0, "17585": 959972480.0, "17590": 941384064.0, "17595": 943718912.0, "17600": 969918912.0, "17605": 968775104.0, "17610": 977525824.0, "17615": 954499904.0, "17620": 956347328.0, "17625": 983575936.0, "17630": 968078016.0, "17635": 981289856.0, "17640": 973830656.0, "17645": 946217984.0, "17650": 966544832.0, "17655": 975541632.0, "17660": 966385344.0, "17665": 971502144.0, "17670": 959125376.0, "17675": 950870336.0, "17680": 957323200.0, "17685": 964267392.0, "17690": 981433728.0, "17695": 953457472.0, "17700": 948850880.0, "17705": 969590912.0, "17710": 977930112.0, "17715": 974669824.0, "17720": 960497920.0, "17725": 934049280.0, "17730": 960710528.0, "17735": 975021440.0, "17740": 972477696.0, "17745": 964628608.0, "17750": 955924864.0, "17755": 973483520.0, "17760": 978165312.0, "17765": 979154304.0, "17770": 959492672.0, "17775": 959356672.0, "17780": 954272320.0, "17785": 975256512.0, "17790": 975596544.0, "17795": 958798592.0, "17800": 950705728.0, "17805": 954696192.0, "17810": 979303360.0, "17815": 963873408.0, "17820": 973116352.0, "17825": 967497408.0, "17830": 935262080.0, "17835": 961570304.0, "17840": 974462144.0, "17845": 988072832.0, "17850": 984955840.0, "17855": 940870784.0, "17860": 963454656.0, "17865": 971266624.0, "17870": 959378688.0, "17875": 973712448.0, "17880": 958471680.0, "17885": 943399936.0, "17890": 960015744.0, "17895": 975586688.0, "17900": 952555072.0, "17905": 983188160.0, "17910": 948381312.0, "17915": 962730240.0, "17920": 957202432.0, "17925": 959578048.0, "17930": 983258240.0, "17935": 962848320.0, "17940": 959768448.0, "17945": 963872512.0, "17950": 950128832.0, "17955": 959587072.0, "17960": 957347200.0, "17965": 963112000.0, "17970": 969069440.0, "17975": 966158272.0, "17980": 986812800.0, "17985": 966139904.0, "17990": 941639872.0, "17995": 974241344.0, "18000": 978146944.0, "18005": 976728384.0, "18010": 961512960.0, "18015": 959185024.0, "18020": 967907776.0, "18025": 982396416.0, "18030": 974489088.0, "18035": 959129600.0, "18040": 942412352.0, "18045": 959404032.0, "18050": 966574336.0, "18055": 966170368.0, "18060": 972171008.0, "18065": 961408832.0, "18070": 944494272.0, "18075": 971557888.0, "18080": 963089728.0, "18085": 966994176.0, "18090": 987406976.0, "18095": 946981824.0, "18100": 984714944.0, "18105": 971167168.0, "18110": 970334464.0, "18115": 981096832.0, "18120": 948747328.0, "18125": 968361920.0, "18130": 951011072.0, "18135": 972263744.0, "18140": 956224960.0, "18145": 957051328.0, "18150": 940433472.0, "18155": 976709632.0, "18160": 961521664.0, "18165": 973977408.0, "18170": 969216128.0, "18175": 958312640.0, "18180": 967823616.0, "18185": 971735232.0, "18190": 991070976.0, "18195": 973906304.0, "18200": 934520768.0, "18205": 962136704.0, "18210": 968196352.0, "18215": 972400768.0, "18220": 975838336.0, "18225": 958770560.0, "18230": 949412480.0, "18235": 979818048.0, "18240": 958290944.0, "18245": 960914816.0, "18250": 961863936.0, "18255": 936394496.0, "18260": 973689216.0, "18265": 958732032.0, "18270": 978516480.0, "18275": 979461376.0, "18280": 929115008.0, "18285": 953253824.0, "18290": 969291136.0, "18295": 979153920.0, "18300": 956326848.0, "18305": 941762624.0, "18310": 943489088.0, "18315": 959712832.0, "18320": 968325376.0, "18325": 970224576.0, "18330": 958932544.0, "18335": 945668224.0, "18340": 970479168.0, "18345": 969093440.0, "18350": 951502656.0, "18355": 973497984.0, "18360": 952391936.0, "18365": 980419520.0, "18370": 968473472.0, "18375": 972647040.0, "18380": 958430144.0, "18385": 954414976.0, "18390": 962720960.0, "18395": 967685504.0, "18400": 967687744.0, "18405": 955398336.0, "18410": 960563328.0, "18415": 965312832.0, "18420": 966403072.0, "18425": 966489984.0, "18430": 997388224.0, "18435": 975178880.0, "18440": 958123264.0, "18445": 978577600.0, "18450": 965986880.0, "18455": 987256768.0, "18460": 982485056.0, "18465": 952003904.0, "18470": 957418048.0, "18475": 973270400.0, "18480": 995083072.0, "18485": 974077248.0, "18490": 955937600.0, "18495": 935399040.0, "18500": 979344704.0, "18505": 972221952.0, "18510": 967780160.0, "18515": 977073280.0, "18520": 933980160.0, "18525": 991876160.0, "18530": 979358208.0, "18535": 986143104.0, "18540": 969697600.0, "18545": 965010752.0, "18550": 971571520.0, "18555": 963001344.0, "18560": 967877888.0, "18565": 975838784.0, "18570": 966077632.0, "18575": 976466688.0, "18580": 974603136.0, "18585": 969224064.0, "18590": 964527808.0, "18595": 967780224.0, "18600": 954137472.0, "18605": 978620160.0, "18610": 977226944.0, "18615": 967141120.0, "18620": 963548864.0, "18625": 951436352.0, "18630": 974511488.0, "18635": 972256512.0, "18640": 963616960.0, "18645": 967840512.0, "18650": 942602240.0, "18655": 980808000.0, "18660": 973065152.0, "18665": 968928256.0, "18670": 972232384.0, "18675": 950729984.0, "18680": 944083008.0, "18685": 980439424.0, "18690": 963210752.0, "18695": 981707904.0, "18700": 978893056.0, "18705": 950218816.0, "18710": 961421440.0, "18715": 984381312.0, "18720": 971669120.0, "18725": 978818368.0, "18730": 944611136.0, "18735": 967245376.0, "18740": 963536128.0, "18745": 973892352.0, "18750": 970200448.0, "18755": 957671744.0, "18760": 940065792.0, "18765": 977416960.0, "18770": 967288960.0, "18775": 974129856.0, "18780": 978350528.0, "18785": 957964736.0, "18790": 970323456.0, "18795": 982497088.0, "18800": 968240256.0, "18805": 977037120.0, "18810": 947896960.0, "18815": 988612992.0, "18820": 966438848.0, "18825": 969181888.0, "18830": 964904192.0, "18835": 953488576.0, "18840": 962720320.0, "18845": 958844224.0, "18850": 969585280.0, "18855": 980622592.0, "18860": 971568576.0, "18865": 944712256.0, "18870": 966393920.0, "18875": 972270400.0, "18880": 962263808.0, "18885": 959513152.0, "18890": 958003648.0, "18895": 991875712.0, "18900": 976023168.0, "18905": 969869184.0, "18910": 979628032.0, "18915": 954494400.0, "18920": 945975680.0, "18925": 978493504.0, "18930": 973547712.0, "18935": 982084992.0, "18940": 962666496.0, "18945": 933789504.0, "18950": 964863296.0, "18955": 974765248.0, "18960": 986348224.0, "18965": 972799744.0, "18970": 942518144.0, "18975": 960595072.0, "18980": 971032896.0, "18985": 964107648.0, "18990": 962558144.0, "18995": 952178752.0, "19000": 960378368.0, "19005": 975548736.0, "19010": 972461056.0, "19015": 986357824.0, "19020": 954409984.0, "19025": 953351232.0, "19030": 970317056.0, "19035": 970503040.0, "19040": 962318592.0, "19045": 961522240.0, "19050": 946725952.0, "19055": 970092544.0, "19060": 964757632.0, "19065": 981002624.0, "19070": 962843328.0, "19075": 933059200.0, "19080": 965113856.0, "19085": 981777664.0, "19090": 956732096.0, "19095": 974185792.0, "19100": 934428160.0, "19105": 955579712.0, "19110": 968241728.0, "19115": 972768704.0, "19120": 959068800.0, "19125": 953944128.0, "19130": 950169664.0, "19135": 964317824.0, "19140": 955434688.0, "19145": 975593472.0, "19150": 977306432.0, "19155": 933968064.0, "19160": 952927616.0, "19165": 977576192.0, "19170": 963703168.0, "19175": 969360256.0, "19180": 950848768.0, "19185": 964874880.0, "19190": 971920384.0, "19195": 956265088.0, "19200": 969452480.0, "19205": 976896960.0, "19210": 947430720.0, "19215": 972575680.0, "19220": 959425856.0, "19225": 978007936.0, "19230": 976556096.0, "19235": 952356992.0, "19240": 978093632.0, "19245": 981337344.0, "19250": 984504064.0, "19255": 966825536.0, "19260": 935682752.0, "19265": 964811904.0, "19270": 972864256.0, "19275": 966718464.0, "19280": 962031424.0, "19285": 972032192.0, "19290": 946509696.0, "19295": 973596096.0, "19300": 983707712.0, "19305": 958353664.0, "19310": 955094720.0, "19315": 950435520.0, "19320": 979758528.0, "19325": 968400896.0, "19330": 963727168.0, "19335": 974025536.0, "19340": 940062336.0, "19345": 965761664.0, "19350": 973180736.0, "19355": 981237632.0, "19360": 976490368.0, "19365": 957170112.0, "19370": 947958336.0, "19375": 962794112.0, "19380": 981762240.0, "19385": 988626752.0, "19390": 979350016.0, "19395": 928064384.0, "19400": 971699392.0, "19405": 978916288.0, "19410": 969003584.0, "19415": 975820864.0, "19420": 938906304.0, "19425": 962147712.0, "19430": 949581632.0, "19435": 964517696.0, "19440": 984189568.0, "19445": 948716480.0, "19450": 945895296.0, "19455": 965665600.0, "19460": 972664640.0, "19465": 974486464.0, "19470": 965448064.0, "19475": 947035392.0, "19480": 975316608.0, "19485": 965475520.0, "19490": 963094592.0, "19495": 969861312.0, "19500": 948180992.0, "19505": 968665408.0, "19510": 951246528.0, "19515": 961891008.0, "19520": 965803264.0, "19525": 946485632.0, "19530": 977646464.0, "19535": 985763712.0, "19540": 957583232.0, "19545": 965066816.0, "19550": 946982400.0, "19555": 951455296.0, "19560": 989812160.0, "19565": 979161280.0, "19570": 968241216.0, "19575": 959511488.0, "19580": 945433728.0, "19585": 964210688.0, "19590": 967591744.0, "19595": 972116288.0, "19600": 969457920.0, "19605": 946853696.0, "19610": 974119296.0, "19615": 971710144.0, "19620": 952649856.0, "19625": 971742464.0, "19630": 976360448.0, "19635": 940490240.0, "19640": 971919680.0, "19645": 967990272.0, "19650": 971213248.0, "19655": 981193280.0, "19660": 946422400.0, "19665": 957109184.0, "19670": 965308800.0, "19675": 965065984.0, "19680": 971908416.0, "19685": 942203840.0, "19690": 971226496.0, "19695": 965035072.0, "19700": 967885632.0, "19705": 977449408.0, "19710": 946078336.0, "19715": 966968064.0, "19720": 970598720.0, "19725": 981716224.0, "19730": 966108096.0, "19735": 970990784.0, "19740": 939894080.0, "19745": 966153792.0, "19750": 966740736.0, "19755": 978529984.0, "19760": 943198656.0, "19765": 935466368.0, "19770": 968555136.0, "19775": 963577728.0, "19780": 965650240.0, "19785": 964617152.0, "19790": 940121792.0, "19795": 968945408.0, "19800": 981668672.0, "19805": 971497152.0, "19810": 966967296.0, "19815": 946180736.0, "19820": 937468864.0, "19825": 970665408.0, "19830": 974045376.0, "19835": 981499776.0, "19840": 976925568.0, "19845": 938494336.0, "19850": 990916096.0, "19855": 970166848.0, "19860": 955321344.0, "19865": 972615872.0, "19870": 949571904.0, "19875": 979530112.0, "19880": 975358848.0, "19885": 967723712.0, "19890": 976310464.0, "19895": 946521344.0, "19900": 955207616.0, "19905": 975622528.0, "19910": 959765632.0, "19915": 976882496.0, "19920": 984648960.0, "19925": 964599488.0, "19930": 979387008.0, "19935": 978736000.0, "19940": 969559552.0, "19945": 959619264.0, "19950": 956288704.0, "19955": 975697408.0, "19960": 960256704.0, "19965": 973325952.0, "19970": 978001792.0, "19975": 944598592.0, "19980": 959525440.0, "19985": 964113728.0, "19990": 968710464.0, "19995": 957491584.0, "20000": 960736384.0, "20005": 939120448.0, "20010": 974802624.0, "20015": 967192320.0, "20020": 994363648.0, "20025": 961134016.0, "20030": 942177408.0, "20035": 966772096.0, "20040": 972717568.0, "20045": 979758976.0, "20050": 968191232.0, "20055": 946115712.0, "20060": 958141824.0, "20065": 984434688.0, "20070": 960224576.0, "20075": 972521472.0, "20080": 969505728.0, "20085": 948136000.0, "20090": 961134464.0, "20095": 962823616.0, "20100": 958848192.0, "20105": 971642624.0, "20110": 956923712.0, "20115": 967641152.0, "20120": 961169728.0, "20125": 969420352.0, "20130": 966034176.0, "20135": 946597760.0, "20140": 959385216.0, "20145": 969015296.0, "20150": 965629312.0, "20155": 977286912.0, "20160": 962835968.0, "20165": 966016512.0, "20170": 971378752.0, "20175": 974395328.0, "20180": 965206592.0, "20185": 957043072.0, "20190": 942196608.0, "20195": 961627776.0, "20200": 969077504.0, "20205": 976896000.0, "20210": 958614336.0, "20215": 937972608.0, "20220": 957770560.0, "20225": 971083840.0, "20230": 967356992.0, "20235": 970569664.0, "20240": 943944000.0, "20245": 943593664.0, "20250": 979660224.0, "20255": 976582720.0, "20260": 971213504.0, "20265": 953161984.0, "20270": 943156736.0, "20275": 971821056.0, "20280": 963107712.0, "20285": 970823424.0, "20290": 969095488.0, "20295": 939662336.0, "20300": 968869120.0, "20305": 961956352.0, "20310": 973162816.0, "20315": 991843712.0, "20320": 953245312.0, "20325": 948754560.0, "20330": 970202176.0, "20335": 962295168.0, "20340": 959756416.0, "20345": 961763648.0, "20350": 952750848.0, "20355": 975100544.0, "20360": 964095680.0, "20365": 971780992.0, "20370": 975489920.0, "20375": 943121472.0, "20380": 968961600.0, "20385": 965496704.0, "20390": 969480640.0, "20395": 985897600.0, "20400": 957137728.0, "20405": 964788992.0, "20410": 980857088.0, "20415": 966502144.0, "20420": 964739840.0, "20425": 947787520.0, "20430": 932691968.0, "20435": 981343488.0, "20440": 968918528.0, "20445": 980380736.0, "20450": 959509120.0, "20455": 939348416.0, "20460": 955433984.0, "20465": 980312384.0, "20470": 977842816.0, "20475": 968879616.0, "20480": 945996416.0, "20485": 957640512.0, "20490": 979249920.0, "20495": 963446784.0, "20500": 974222272.0, "20505": 961565312.0, "20510": 944464512.0, "20515": 981506432.0, "20520": 989535232.0, "20525": 979577216.0, "20530": 962187520.0, "20535": 945669248.0, "20540": 970226368.0, "20545": 957542720.0, "20550": 966079616.0, "20555": 985519360.0, "20560": 944072192.0, "20565": 965152320.0, "20570": 968740096.0, "20575": 961965312.0, "20580": 953526592.0, "20585": 954135168.0, "20590": 986085120.0, "20595": 964475072.0, "20600": 960823232.0, "20605": 964729152.0, "20610": 944749376.0, "20615": 935682688.0, "20620": 975280256.0, "20625": 974425792.0, "20630": 970287872.0, "20635": 984370176.0, "20640": 953802240.0, "20645": 970097600.0, "20650": 972646656.0, "20655": 956608832.0, "20660": 965998656.0, "20665": 943113536.0, "20670": 958763200.0, "20675": 981738752.0, "20680": 970755200.0, "20685": 978234304.0, "20690": 965061248.0, "20695": 935216832.0, "20700": 971460352.0, "20705": 964299712.0, "20710": 976012480.0, "20715": 975968384.0, "20720": 954084096.0, "20725": 975895296.0, "20730": 977073408.0, "20735": 961377472.0, "20740": 991416256.0, "20745": 956105856.0, "20750": 956792448.0, "20755": 975380672.0, "20760": 985261760.0, "20765": 984414400.0, "20770": 950290560.0, "20775": 928637888.0, "20780": 967329664.0, "20785": 976582400.0, "20790": 961099136.0, "20795": 974132224.0, "20800": 949701632.0, "20805": 987259328.0, "20810": 979723456.0, "20815": 970048000.0, "20820": 965679552.0, "20825": 948706432.0, "20830": 976443520.0, "20835": 962781568.0, "20840": 974899840.0, "20845": 971314304.0, "20850": 939644800.0, "20855": 963998400.0, "20860": 974605120.0, "20865": 968020224.0, "20870": 963462400.0, "20875": 963852224.0, "20880": 954201408.0, "20885": 972070912.0, "20890": 976584000.0, "20895": 964157952.0, "20900": 982536448.0, "20905": 935112512.0, "20910": 963326464.0, "20915": 965956608.0, "20920": 983105088.0, "20925": 969679936.0, "20930": 950874176.0, "20935": 943406080.0, "20940": 987682048.0, "20945": 965220608.0, "20950": 951597312.0, "20955": 957062784.0, "20960": 938291520.0, "20965": 966407168.0, "20970": 968888128.0, "20975": 965060672.0, "20980": 973075584.0, "20985": 940291456.0, "20990": 969119872.0, "20995": 986679616.0, "21000": 979131072.0, "21005": 957054336.0, "21010": 951001408.0, "21015": 978524800.0, "21020": 974970496.0, "21025": 974617984.0, "21030": 967764608.0, "21035": 968216448.0, "21040": 977776320.0, "21045": 980010432.0, "21050": 963422592.0, "21055": 971888768.0, "21060": 970133696.0, "21065": 944540096.0, "21070": 984857792.0, "21075": 977210496.0, "21080": 967810304.0, "21085": 966433408.0, "21090": 948360448.0, "21095": 986958400.0, "21100": 980753664.0, "21105": 960489856.0, "21110": 963164032.0, "21115": 962963584.0, "21120": 964024192.0, "21125": 976133760.0, "21130": 972193536.0, "21135": 970945344.0, "21140": 960367872.0, "21145": 935262976.0, "21150": 965185152.0, "21155": 999656192.0, "21160": 972701120.0, "21165": 988076672.0, "21170": 955338752.0, "21175": 969692800.0, "21180": 967208000.0, "21185": 967800448.0, "21190": 977552448.0, "21195": 966050752.0, "21200": 954523200.0, "21205": 980323648.0, "21210": 963707328.0, "21215": 985165568.0, "21220": 967261056.0, "21225": 958666432.0, "21230": 961314624.0, "21235": 970724800.0, "21240": 980644224.0, "21245": 974099200.0, "21250": 945989952.0, "21255": 982531648.0, "21260": 971577920.0, "21265": 968163456.0, "21270": 952561024.0, "21275": 945695872.0, "21280": 965104960.0, "21285": 980846720.0, "21290": 974392576.0, "21295": 970255744.0, "21300": 925512512.0, "21305": 955644416.0, "21310": 987344768.0, "21315": 988374912.0, "21320": 966034112.0, "21325": 970366784.0, "21330": 957177728.0, "21335": 978709312.0, "21340": 976417280.0, "21345": 978789568.0, "21350": 979623744.0, "21355": 946377216.0, "21360": 964263168.0, "21365": 967202688.0, "21370": 991961472.0, "21375": 971613888.0, "21380": 957458304.0, "21385": 942371520.0, "21390": 958140096.0, "21395": 983325952.0, "21400": 970253440.0, "21405": 961116800.0, "21410": 962848896.0, "21415": 963800448.0, "21420": 963114560.0, "21425": 980576384.0, "21430": 962118976.0, "21435": 949292800.0, "21440": 963747328.0, "21445": 969010368.0, "21450": 977216832.0, "21455": 972394752.0, "21460": 961584320.0, "21465": 968649856.0, "21470": 966831232.0, "21475": 964024000.0, "21480": 964913920.0, "21485": 938069184.0, "21490": 942225792.0, "21495": 968883584.0, "21500": 968856576.0, "21505": 976946944.0, "21510": 985276864.0, "21515": 942973568.0, "21520": 976178688.0, "21525": 981461760.0, "21530": 977972736.0, "21535": 970822656.0, "21540": 939753856.0, "21545": 958281344.0, "21550": 975050880.0, "21555": 977941440.0, "21560": 955472960.0, "21565": 970913536.0, "21570": 934508736.0, "21575": 960511296.0, "21580": 972204352.0, "21585": 963608576.0, "21590": 966525376.0, "21595": 949652160.0, "21600": 974066752.0, "21605": 966504704.0, "21610": 952867840.0, "21615": 969208256.0, "21620": 944466688.0, "21625": 956620224.0, "21630": 965983232.0, "21635": 984408448.0, "21640": 963871104.0, "21645": 944327680.0, "21650": 948383744.0, "21655": 963022400.0, "21660": 965689216.0, "21665": 992282176.0, "21670": 955751424.0, "21675": 950062720.0, "21680": 975184128.0, "21685": 978708928.0, "21690": 974871552.0, "21695": 952475264.0, "21700": 952842496.0, "21705": 967273856.0, "21710": 976867584.0, "21715": 973135552.0, "21720": 954626880.0, "21725": 936850048.0, "21730": 960727040.0, "21735": 978850944.0, "21740": 970068672.0, "21745": 955812544.0, "21750": 944214912.0, "21755": 960897664.0, "21760": 973619392.0, "21765": 966955136.0, "21770": 978764544.0, "21775": 980920256.0, "21780": 946388608.0, "21785": 979735808.0, "21790": 976564096.0, "21795": 974930432.0, "21800": 971024768.0, "21805": 949085184.0, "21810": 969324416.0, "21815": 953104384.0, "21820": 977010176.0, "21825": 963895808.0, "21830": 962880512.0, "21835": 958003456.0, "21840": 968637376.0, "21845": 964147264.0, "21850": 965111680.0, "21855": 981442432.0, "21860": 950321920.0, "21865": 954845632.0, "21870": 968256000.0, "21875": 971332288.0, "21880": 964264064.0, "21885": 960565824.0, "21890": 968280128.0, "21895": 970833728.0, "21900": 971388544.0, "21905": 959073280.0, "21910": 982142976.0, "21915": 949971264.0, "21920": 969836224.0, "21925": 969943936.0, "21930": 962800896.0, "21935": 943550784.0, "21940": 940228928.0, "21945": 976455936.0, "21950": 967910016.0, "21955": 967486720.0, "21960": 978247232.0, "21965": 937968576.0, "21970": 975550656.0, "21975": 964717952.0, "21980": 958127744.0, "21985": 974239744.0, "21990": 952844160.0, "21995": 939578176.0, "22000": 965552128.0, "22005": 966566144.0, "22010": 969406080.0, "22015": 966183744.0, "22020": 950852736.0, "22025": 975288320.0, "22030": 978604544.0, "22035": 985870016.0, "22040": 964251200.0, "22045": 938904256.0, "22050": 976534528.0, "22055": 972142976.0, "22060": 969975424.0, "22065": 969917184.0, "22070": 955033152.0, "22075": 944861184.0, "22080": 966849344.0, "22085": 966369088.0, "22090": 979145152.0, "22095": 971818112.0, "22100": 948924800.0, "22105": 963500096.0, "22110": 975521152.0, "22115": 973752128.0, "22120": 984555008.0, "22125": 950891264.0, "22130": 969574912.0, "22135": 955729792.0, "22140": 974555776.0, "22145": 968858240.0, "22150": 944026304.0, "22155": 960610880.0, "22160": 981716288.0, "22165": 968039936.0, "22170": 956510592.0, "22175": 928427712.0, "22180": 966147328.0, "22185": 972285696.0, "22190": 957354432.0, "22195": 956591936.0, "22200": 949783168.0, "22205": 941651520.0, "22210": 985907072.0, "22215": 978838976.0, "22220": 964248384.0, "22225": 975791680.0, "22230": 937205568.0, "22235": 957220352.0, "22240": 974411200.0, "22245": 974065152.0, "22250": 962007296.0, "22255": 965828416.0, "22260": 942931904.0, "22265": 974323968.0, "22270": 983488448.0, "22275": 964652672.0, "22280": 959558272.0, "22285": 952906496.0, "22290": 954115584.0, "22295": 960092928.0, "22300": 982795520.0, "22305": 972326144.0, "22310": 936499456.0, "22315": 968342528.0, "22320": 956228992.0, "22325": 957704320.0, "22330": 973198848.0, "22335": 946689088.0, "22340": 951660160.0, "22345": 969778368.0, "22350": 970076800.0, "22355": 966494080.0, "22360": 954836992.0, "22365": 968771200.0, "22370": 964937216.0, "22375": 975409920.0, "22380": 975765120.0, "22385": 963150208.0, "22390": 951715456.0, "22395": 971223744.0, "22400": 966947520.0, "22405": 961586880.0, "22410": 969681664.0, "22415": 936935168.0, "22420": 972481984.0, "22425": 965149632.0, "22430": 978339328.0, "22435": 973599744.0, "22440": 931833408.0, "22445": 951578176.0, "22450": 975256384.0, "22455": 960061504.0, "22460": 967477568.0, "22465": 987958592.0, "22470": 943332224.0, "22475": 975371712.0, "22480": 978506112.0, "22485": 977026688.0, "22490": 966395584.0, "22495": 942064128.0, "22500": 980783872.0, "22505": 957299008.0, "22510": 984672832.0, "22515": 966363456.0, "22520": 951734400.0, "22525": 965640000.0, "22530": 985626944.0, "22535": 973832704.0, "22540": 983125504.0, "22545": 952189056.0, "22550": 946777088.0, "22555": 972088064.0, "22560": 972537920.0, "22565": 966590016.0, "22570": 980134080.0, "22575": 947857344.0, "22580": 963925760.0, "22585": 947635776.0, "22590": 976062656.0, "22595": 982929728.0, "22600": 950264064.0, "22605": 978107008.0, "22610": 969092096.0, "22615": 950911232.0, "22620": 985022976.0, "22625": 959099136.0, "22630": 943706752.0, "22635": 954819072.0, "22640": 971169792.0, "22645": 988791168.0, "22650": 956740992.0, "22655": 945617920.0, "22660": 962659648.0, "22665": 971512384.0, "22670": 969287232.0, "22675": 978913920.0, "22680": 940127808.0, "22685": 958822144.0, "22690": 978084864.0, "22695": 958887680.0, "22700": 978417920.0, "22705": 954619776.0, "22710": 958467136.0, "22715": 972642688.0, "22720": 983204736.0, "22725": 961985408.0, "22730": 959234432.0, "22735": 948914752.0, "22740": 968654592.0, "22745": 965848320.0, "22750": 998210368.0, "22755": 974238400.0, "22760": 940236096.0, "22765": 967194176.0, "22770": 969031296.0, "22775": 949947840.0, "22780": 977922560.0, "22785": 955835072.0, "22790": 955314176.0, "22795": 976348160.0, "22800": 970434944.0, "22805": 962764032.0, "22810": 958915648.0, "22815": 945518528.0, "22820": 978604992.0, "22825": 966189184.0, "22830": 966580288.0, "22835": 969349888.0, "22840": 940560192.0, "22845": 973252480.0, "22850": 975093440.0, "22855": 951949440.0, "22860": 963869376.0, "22865": 949077376.0, "22870": 977529920.0, "22875": 974651136.0, "22880": 969767936.0, "22885": 977360000.0, "22890": 946943232.0, "22895": 944158336.0, "22900": 961893248.0, "22905": 971695232.0, "22910": 979847360.0, "22915": 971449344.0, "22920": 946243392.0, "22925": 970860352.0, "22930": 975883264.0, "22935": 969311168.0, "22940": 972723008.0, "22945": 940769024.0, "22950": 975565312.0, "22955": 972440256.0, "22960": 976089728.0, "22965": 965032896.0, "22970": 956951296.0, "22975": 931257600.0, "22980": 979264256.0, "22985": 959890816.0, "22990": 965137792.0, "22995": 958638400.0, "23000": 965940928.0, "23005": 972947392.0, "23010": 964742848.0, "23015": 968680064.0, "23020": 981688064.0, "23025": 955788864.0, "23030": 990828928.0, "23035": 968641536.0, "23040": 973906112.0, "23045": 959290496.0, "23050": 947753088.0, "23055": 949380160.0, "23060": 966646592.0, "23065": 980618240.0, "23070": 960584704.0, "23075": 957495744.0, "23080": 941365440.0, "23085": 975619712.0, "23090": 979867328.0, "23095": 965804544.0, "23100": 986516992.0, "23105": 952084096.0, "23110": 970819712.0, "23115": 970185408.0, "23120": 974876160.0, "23125": 971695552.0, "23130": 941274624.0, "23135": 948277632.0, "23140": 980447104.0, "23145": 979943552.0, "23150": 982390144.0, "23155": 969999296.0, "23160": 936489024.0, "23165": 969741696.0, "23170": 981215936.0, "23175": 991932544.0, "23180": 958330432.0, "23185": 950333312.0, "23190": 956434304.0, "23195": 982119168.0, "23200": 977819392.0, "23205": 967948736.0, "23210": 951754368.0, "23215": 931253056.0, "23220": 966504768.0, "23225": 958629312.0, "23230": 977475648.0, "23235": 964592384.0, "23240": 958203264.0, "23245": 973857344.0, "23250": 950713408.0, "23255": 973957376.0, "23260": 963662336.0, "23265": 954380224.0, "23270": 981346368.0, "23275": 974594752.0, "23280": 966934976.0, "23285": 971595264.0, "23290": 940360064.0, "23295": 965070592.0, "23300": 960141888.0, "23305": 952521984.0, "23310": 966737856.0, "23315": 958344512.0, "23320": 959743296.0, "23325": 972195072.0, "23330": 985902912.0, "23335": 958487040.0, "23340": 936160064.0, "23345": 945433472.0, "23350": 977039488.0, "23355": 988704192.0, "23360": 966783040.0, "23365": 985332352.0, "23370": 938684352.0, "23375": 962907712.0, "23380": 961648448.0, "23385": 989367168.0, "23390": 986816960.0, "23395": 959540672.0, "23400": 961949120.0, "23405": 958955200.0, "23410": 965964736.0, "23415": 981645952.0, "23420": 951967552.0, "23425": 937969856.0, "23430": 971168384.0, "23435": 969543104.0, "23440": 976321280.0, "23445": 971868672.0, "23450": 955568448.0, "23455": 972562496.0, "23460": 964866304.0, "23465": 950838976.0, "23470": 978519488.0, "23475": 957678784.0, "23480": 961190016.0, "23485": 985448832.0, "23490": 961720000.0, "23495": 974364480.0, "23500": 956531136.0, "23505": 948718912.0, "23510": 975608960.0, "23515": 968097536.0, "23520": 961741696.0, "23525": 977671872.0, "23530": 945938048.0, "23535": 975953600.0, "23540": 979066496.0, "23545": 979502720.0, "23550": 975492096.0, "23555": 940316544.0, "23560": 963717632.0, "23565": 964986624.0, "23570": 978306304.0, "23575": 945494144.0, "23580": 941551936.0, "23585": 958849152.0, "23590": 972289408.0, "23595": 970261312.0, "23600": 966201600.0, "23605": 961806272.0, "23610": 937656704.0, "23615": 973480256.0, "23620": 973145024.0, "23625": 981388480.0, "23630": 969953792.0, "23635": 949431424.0, "23640": 961721600.0, "23645": 982384192.0, "23650": 967860352.0, "23655": 969251904.0, "23660": 953668352.0, "23665": 951757440.0, "23670": 985154560.0, "23675": 972642432.0, "23680": 976162432.0, "23685": 959406080.0, "23690": 932345920.0, "23695": 963781376.0, "23700": 966944256.0, "23705": 977710400.0, "23710": 961550016.0, "23715": 954783680.0, "23720": 988370496.0, "23725": 968508352.0, "23730": 970200448.0, "23735": 958983616.0, "23740": 952009600.0, "23745": 948442816.0, "23750": 979139968.0, "23755": 967348864.0, "23760": 972155520.0, "23765": 947805632.0, "23770": 951372288.0, "23775": 967979904.0, "23780": 967169664.0, "23785": 962500288.0, "23790": 979788736.0, "23795": 943855168.0, "23800": 974725824.0, "23805": 974349504.0, "23810": 973364992.0, "23815": 971353472.0, "23820": 958570176.0, "23825": 959877952.0, "23830": 965336512.0, "23835": 970057664.0, "23840": 980429184.0, "23845": 944087936.0, "23850": 945186688.0, "23855": 970395968.0, "23860": 970504960.0, "23865": 970788544.0, "23870": 986389696.0, "23875": 947722432.0, "23880": 955235648.0, "23885": 972186560.0, "23890": 972156352.0, "23895": 984792960.0, "23900": 951336256.0, "23905": 954510144.0, "23910": 971654784.0, "23915": 962336192.0, "23920": 960480448.0, "23925": 943657728.0, "23930": 948584192.0, "23935": 981474944.0, "23940": 971026624.0, "23945": 970178624.0, "23950": 972119168.0, "23955": 943982144.0, "23960": 973914752.0, "23965": 976822080.0, "23970": 963563776.0, "23975": 960572416.0, "23980": 944965376.0, "23985": 969454272.0, "23990": 997633984.0, "23995": 975584512.0, "24000": 962982592.0, "24005": 936955712.0, "24010": 949694912.0, "24015": 967951168.0, "24020": 985816704.0, "24025": 967111040.0, "24030": 966560704.0, "24035": 937547392.0, "24040": 963392384.0, "24045": 984638208.0, "24050": 962514880.0, "24055": 968398592.0, "24060": 946953792.0, "24065": 975174272.0, "24070": 974961408.0, "24075": 975411072.0, "24080": 979913984.0, "24085": 956839104.0, "24090": 970314368.0, "24095": 963496576.0, "24100": 977312192.0, "24105": 981236288.0, "24110": 968341888.0, "24115": 953159424.0, "24120": 963011712.0, "24125": 964068032.0, "24130": 965229696.0, "24135": 959334720.0, "24140": 965762880.0, "24145": 966381120.0, "24150": 968699392.0, "24155": 955074304.0, "24160": 971818432.0, "24165": 939498560.0, "24170": 959885120.0, "24175": 981098688.0, "24180": 968262784.0, "24185": 957217920.0, "24190": 951073536.0, "24195": 952150592.0, "24200": 969406464.0, "24205": 972009344.0, "24210": 962535936.0, "24215": 931958080.0, "24220": 956790784.0, "24225": 981089984.0, "24230": 959009600.0, "24235": 948137664.0, "24240": 969110080.0, "24245": 949633408.0, "24250": 955876352.0, "24255": 973133184.0, "24260": 968724096.0, "24265": 968294784.0, "24270": 953505536.0, "24275": 961500544.0, "24280": 958418752.0, "24285": 986984384.0, "24290": 969797632.0, "24295": 964597504.0, "24300": 946052544.0, "24305": 980405824.0, "24310": 971641472.0, "24315": 974382272.0, "24320": 964884608.0, "24325": 939089216.0, "24330": 955542400.0, "24335": 966914240.0, "24340": 991522112.0, "24345": 971016256.0, "24350": 924297600.0, "24355": 969017984.0, "24360": 967196416.0, "24365": 963896128.0, "24370": 968227264.0, "24375": 974197952.0, "24380": 953492928.0, "24385": 977932288.0, "24390": 974656256.0, "24395": 956441216.0, "24400": 968623552.0, "24405": 943043520.0, "24410": 979717760.0, "24415": 982052224.0, "24420": 958701440.0, "24425": 972899904.0, "24430": 944677184.0, "24435": 958615360.0, "24440": 978723904.0, "24445": 972605504.0, "24450": 951085248.0, "24455": 941423168.0, "24460": 947295296.0, "24465": 975075392.0, "24470": 988046272.0, "24475": 973915328.0, "24480": 962592960.0, "24485": 921007680.0, "24490": 970006912.0, "24495": 982438656.0, "24500": 976206528.0, "24505": 966765696.0, "24510": 958246016.0, "24515": 1000089792.0, "24520": 954560384.0, "24525": 965744512.0, "24530": 958743872.0, "24535": 957887744.0, "24540": 956576960.0, "24545": 968619264.0, "24550": 962638336.0, "24555": 958835008.0, "24560": 954368640.0, "24565": 947466304.0, "24570": 966778368.0, "24575": 973607360.0, "24580": 965555968.0, "24585": 958008896.0, "24590": 941951488.0, "24595": 973534080.0, "24600": 970890816.0, "24605": 971446208.0, "24610": 953368896.0, "24615": 942877824.0, "24620": 958756544.0, "24625": 979941568.0, "24630": 979409728.0, "24635": 976323840.0, "24640": 941716416.0, "24645": 956421568.0, "24650": 954228864.0, "24655": 981157952.0, "24660": 956254784.0, "24665": 957842944.0, "24670": 952600704.0, "24675": 960817472.0, "24680": 970431872.0, "24685": 964530304.0, "24690": 958971840.0, "24695": 938121792.0, "24700": 980350656.0, "24705": 970555968.0, "24710": 959291456.0, "24715": 972261696.0, "24720": 947429248.0, "24725": 962311936.0, "24730": 961372480.0, "24735": 951552192.0, "24740": 981715008.0, "24745": 964159808.0, "24750": 949033536.0, "24755": 970943424.0, "24760": 963132096.0, "24765": 987640512.0, "24770": 969057024.0, "24775": 942017344.0, "24780": 968023808.0, "24785": 970567104.0, "24790": 960729792.0, "24795": 961555584.0, "24800": 945512128.0, "24805": 933549056.0, "24810": 965066688.0, "24815": 963344128.0, "24820": 972270016.0, "24825": 960449280.0, "24830": 962933952.0, "24835": 979100800.0, "24840": 956538688.0, "24845": 963919680.0, "24850": 973706496.0, "24855": 948978368.0, "24860": 989245504.0, "24865": 994243392.0, "24870": 970913088.0, "24875": 963958208.0, "24880": 948093952.0, "24885": 964516288.0, "24890": 970247552.0, "24895": 973639552.0, "24900": 966249984.0, "24905": 950187136.0, "24910": 965856832.0, "24915": 957541056.0, "24920": 985676800.0, "24925": 976050816.0, "24930": 965156096.0, "24935": 914747776.0, "24940": 962938560.0, "24945": 982624768.0, "24950": 982679616.0, "24955": 961931648.0, "24960": 948530240.0, "24965": 972782272.0, "24970": 964822464.0, "24975": 967883968.0, "24980": 965040320.0, "24985": 956415744.0, "24990": 971310528.0, "24995": 967391680.0, "25000": 954523712.0, "25005": 969450624.0, "25010": 973183168.0, "25015": 956299008.0, "25020": 980885952.0, "25025": 959555008.0, "25030": 957681344.0, "25035": 965316352.0, "25040": 949790912.0, "25045": 977887104.0, "25050": 967968896.0, "25055": 962325184.0, "25060": 969474944.0, "25065": 950428544.0, "25070": 951902656.0, "25075": 970045504.0, "25080": 962995712.0, "25085": 966771136.0, "25090": 959491392.0, "25095": 935635968.0, "25100": 990226944.0, "25105": 977402816.0, "25110": 977492544.0, "25115": 963095104.0, "25120": 950524416.0, "25125": 960735232.0, "25130": 972981184.0, "25135": 976465728.0, "25140": 972141184.0, "25145": 938235904.0, "25150": 961543040.0, "25155": 965008960.0, "25160": 964322624.0, "25165": 977871808.0, "25170": 937861568.0, "25175": 971790144.0, "25180": 976352512.0, "25185": 984233984.0, "25190": 975155584.0, "25195": 958700800.0, "25200": 951606144.0, "25205": 951301824.0, "25210": 963626560.0, "25215": 975555008.0, "25220": 973046464.0, "25225": 943706496.0, "25230": 966926656.0, "25235": 968694720.0, "25240": 963853632.0, "25245": 963927168.0, "25250": 955407616.0, "25255": 962068608.0, "25260": 972810048.0, "25265": 965193344.0, "25270": 975826240.0, "25275": 965463040.0, "25280": 941116288.0, "25285": 970703296.0, "25290": 957631616.0, "25295": 964512256.0, "25300": 970113728.0, "25305": 943867520.0, "25310": 964433088.0, "25315": 975132416.0, "25320": 963093952.0, "25325": 956488768.0, "25330": 949876608.0, "25335": 977791424.0, "25340": 965992512.0, "25345": 975550208.0, "25350": 974380160.0, "25355": 955371136.0, "25360": 954584640.0, "25365": 975419200.0, "25370": 987247104.0, "25375": 968019648.0, "25380": 980690240.0, "25385": 935582848.0, "25390": 968086656.0, "25395": 969021056.0, "25400": 975137856.0, "25405": 984657664.0, "25410": 940563200.0, "25415": 973939776.0, "25420": 963957568.0, "25425": 980911680.0, "25430": 970929920.0, "25435": 943341056.0, "25440": 946577088.0, "25445": 972253184.0, "25450": 979742656.0, "25455": 976952640.0, "25460": 951541632.0, "25465": 954784512.0, "25470": 980735168.0, "25475": 961728448.0, "25480": 957579200.0, "25485": 968416832.0, "25490": 956737728.0, "25495": 985948864.0, "25500": 974168384.0, "25505": 993200448.0, "25510": 963468224.0, "25515": 961360640.0, "25520": 960735424.0, "25525": 984694016.0, "25530": 970777024.0, "25535": 970863232.0, "25540": 955703744.0, "25545": 951174912.0, "25550": 972753152.0, "25555": 973331136.0, "25560": 977309888.0, "25565": 964284480.0, "25570": 930680192.0, "25575": 973809472.0, "25580": 976876160.0, "25585": 978208704.0, "25590": 981382912.0, "25595": 940618048.0, "25600": 948513216.0, "25605": 969402560.0, "25610": 974690496.0, "25615": 967838848.0, "25620": 970664192.0, "25625": 948249600.0, "25630": 980388224.0, "25635": 966140928.0, "25640": 976574336.0, "25645": 975101504.0, "25650": 954119232.0, "25655": 975552896.0, "25660": 968766848.0, "25665": 964008576.0, "25670": 976390144.0, "25675": 957833920.0, "25680": 943259776.0, "25685": 973540480.0, "25690": 970358016.0, "25695": 981173760.0, "25700": 957805504.0, "25705": 948879232.0, "25710": 978719808.0, "25715": 978657920.0, "25720": 967005440.0, "25725": 952203904.0, "25730": 957843840.0, "25735": 976772096.0, "25740": 977448704.0, "25745": 972614144.0, "25750": 958340672.0, "25755": 958216704.0, "25760": 960596096.0, "25765": 987843456.0, "25770": 974430592.0, "25775": 969406656.0, "25780": 967210240.0, "25785": 954205056.0, "25790": 964993920.0, "25795": 970184832.0, "25800": 971391488.0, "25805": 980763520.0, "25810": 938471040.0, "25815": 973379136.0, "25820": 964492992.0, "25825": 973123904.0, "25830": 986039936.0, "25835": 939640832.0, "25840": 974726592.0, "25845": 967750080.0, "25850": 967408000.0, "25855": 987968832.0, "25860": 940698112.0, "25865": 942847616.0, "25870": 969566976.0, "25875": 968110976.0, "25880": 965027200.0, "25885": 946489856.0, "25890": 951712384.0, "25895": 975813248.0, "25900": 968146432.0, "25905": 974957568.0, "25910": 963381440.0, "25915": 944330880.0, "25920": 963957888.0, "25925": 971017728.0, "25930": 962362496.0, "25935": 973917056.0, "25940": 952914624.0, "25945": 981770688.0, "25950": 981881728.0, "25955": 965237184.0, "25960": 961090880.0, "25965": 944432384.0, "25970": 974824960.0, "25975": 991837376.0, "25980": 957119104.0, "25985": 981354112.0, "25990": 967496320.0, "25995": 930921408.0, "26000": 972951104.0, "26005": 968459392.0, "26010": 979103808.0, "26015": 973459072.0, "26020": 953657792.0, "26025": 969713408.0, "26030": 968165632.0, "26035": 984756544.0, "26040": 967086848.0, "26045": 957686016.0, "26050": 961238784.0, "26055": 959758080.0, "26060": 967067776.0, "26065": 981278976.0, "26070": 976569472.0, "26075": 937296192.0, "26080": 966663040.0, "26085": 964855808.0, "26090": 979332672.0, "26095": 980998016.0, "26100": 959069056.0, "26105": 973020224.0, "26110": 967484224.0, "26115": 970335552.0, "26120": 968963200.0, "26125": 949882432.0, "26130": 967323328.0, "26135": 978920704.0, "26140": 971789760.0, "26145": 964668928.0, "26150": 951049984.0, "26155": 954456832.0, "26160": 967932864.0, "26165": 977206528.0, "26170": 976266368.0, "26175": 967171840.0, "26180": 950088704.0, "26185": 970305344.0, "26190": 991630464.0, "26195": 964254592.0, "26200": 987324160.0, "26205": 948162560.0, "26210": 971834048.0, "26215": 971421440.0, "26220": 969991360.0, "26225": 973867136.0, "26230": 955809920.0, "26235": 943592064.0, "26240": 974057408.0, "26245": 960823168.0, "26250": 969751296.0, "26255": 968293696.0, "26260": 952439552.0, "26265": 984714112.0, "26270": 964052416.0, "26275": 976784128.0, "26280": 982287744.0, "26285": 958898432.0, "26290": 982748672.0, "26295": 966268352.0, "26300": 957871360.0, "26305": 980775040.0, "26310": 955578048.0, "26315": 941875968.0, "26320": 964094912.0, "26325": 965383424.0, "26330": 960993792.0, "26335": 955184128.0, "26340": 939315968.0, "26345": 961648768.0, "26350": 967264704.0, "26355": 965978752.0, "26360": 972027776.0, "26365": 950163712.0, "26370": 969627136.0, "26375": 974682176.0, "26380": 967933696.0, "26385": 975695872.0, "26390": 951889280.0, "26395": 966907776.0, "26400": 975594048.0, "26405": 964399616.0, "26410": 957351808.0, "26415": 968523200.0, "26420": 947540608.0, "26425": 972755840.0, "26430": 966999232.0, "26435": 962746752.0, "26440": 964269696.0, "26445": 943174272.0, "26450": 966274432.0, "26455": 979025408.0, "26460": 964844352.0, "26465": 968440640.0, "26470": 945827904.0, "26475": 972010944.0, "26480": 978744000.0, "26485": 979511104.0, "26490": 975273280.0, "26495": 965201152.0, "26500": 963615488.0, "26505": 970391232.0, "26510": 959538496.0, "26515": 966010112.0, "26520": 972854528.0, "26525": 935441216.0, "26530": 970741312.0, "26535": 958532480.0, "26540": 961994240.0, "26545": 968191744.0, "26550": 956092736.0, "26555": 966120896.0, "26560": 964556480.0, "26565": 965458432.0, "26570": 962700160.0, "26575": 949065408.0, "26580": 963657984.0, "26585": 963338432.0, "26590": 964829184.0, "26595": 964935424.0, "26600": 970217856.0, "26605": 937948544.0, "26610": 971052928.0, "26615": 969858176.0, "26620": 979239680.0, "26625": 960039680.0, "26630": 944242624.0, "26635": 959488896.0, "26640": 968954240.0, "26645": 965588672.0, "26650": 967122560.0, "26655": 940525824.0, "26660": 945964032.0, "26665": 972682304.0, "26670": 969383424.0, "26675": 967439232.0, "26680": 951971072.0, "26685": 945945728.0, "26690": 967529600.0, "26695": 971274752.0, "26700": 975782848.0, "26705": 982842048.0, "26710": 952883776.0, "26715": 963957888.0, "26720": 986652352.0, "26725": 951987200.0, "26730": 970067456.0, "26735": 960229824.0, "26740": 961107904.0, "26745": 964974720.0, "26750": 967146624.0, "26755": 977368000.0, "26760": 975249088.0, "26765": 943045568.0, "26770": 972509760.0, "26775": 974542144.0, "26780": 962779264.0, "26785": 969658432.0, "26790": 943009984.0, "26795": 948489216.0, "26800": 965788800.0, "26805": 963927552.0, "26810": 967422336.0, "26815": 951742336.0, "26820": 969848832.0, "26825": 963277376.0, "26830": 979997568.0, "26835": 971413568.0, "26840": 957810496.0, "26845": 946348672.0, "26850": 983130496.0, "26855": 970692288.0, "26860": 973172224.0, "26865": 963191360.0, "26870": 954429248.0, "26875": 972791680.0, "26880": 972779200.0, "26885": 968639872.0, "26890": 961239168.0, "26895": 938212800.0, "26900": 967420544.0, "26905": 967154048.0, "26910": 957283584.0, "26915": 974536128.0, "26920": 943696384.0, "26925": 951184000.0, "26930": 961651584.0, "26935": 976181568.0, "26940": 966975616.0, "26945": 973702400.0, "26950": 949592640.0, "26955": 971797952.0, "26960": 972835648.0, "26965": 973067456.0, "26970": 971092416.0, "26975": 949102528.0, "26980": 981715008.0, "26985": 975937408.0, "26990": 975136000.0, "26995": 961705984.0, "27000": 952517952.0, "27005": 956632448.0, "27010": 968572096.0, "27015": 962651968.0, "27020": 956301824.0, "27025": 990515712.0, "27030": 935915392.0, "27035": 954243456.0, "27040": 959378176.0, "27045": 978381248.0, "27050": 951651072.0, "27055": 951991040.0, "27060": 991322176.0, "27065": 964321024.0, "27070": 975271168.0, "27075": 965574144.0, "27080": 943340544.0, "27085": 962889856.0, "27090": 958757824.0, "27095": 980390336.0, "27100": 958523264.0, "27105": 939094464.0, "27110": 948833664.0, "27115": 973977216.0, "27120": 974072448.0, "27125": 971613056.0, "27130": 960144704.0, "27135": 948501248.0, "27140": 968356352.0, "27145": 973620800.0, "27150": 967693056.0, "27155": 975396096.0, "27160": 943922560.0, "27165": 974137856.0, "27170": 960957120.0, "27175": 967888960.0, "27180": 968746496.0, "27185": 975401472.0, "27190": 944771520.0, "27195": 961481856.0, "27200": 969522816.0, "27205": 975402240.0, "27210": 955803968.0, "27215": 950148160.0, "27220": 974843520.0, "27225": 962991744.0, "27230": 962889664.0, "27235": 953014016.0, "27240": 956534144.0, "27245": 963199232.0, "27250": 965852672.0, "27255": 969620032.0, "27260": 952861824.0, "27265": 962007552.0, "27270": 963401408.0, "27275": 992487680.0, "27280": 957800192.0, "27285": 964426624.0, "27290": 969267904.0, "27295": 932789440.0, "27300": 976000256.0, "27305": 962503424.0, "27310": 975193408.0, "27315": 968555008.0, "27320": 952019072.0, "27325": 965392704.0, "27330": 960525952.0, "27335": 984055680.0, "27340": 980095040.0, "27345": 941484096.0, "27350": 958113856.0, "27355": 957603840.0, "27360": 975555904.0, "27365": 968411968.0, "27370": 959146368.0, "27375": 939232192.0, "27380": 966079744.0, "27385": 958887040.0, "27390": 957134080.0, "27395": 984942592.0, "27400": 952719232.0, "27405": 977464768.0, "27410": 983030336.0, "27415": 960643840.0, "27420": 972484672.0, "27425": 938826944.0, "27430": 960440384.0, "27435": 963292032.0, "27440": 963377216.0, "27445": 970779776.0, "27450": 935271360.0, "27455": 976238016.0, "27460": 961324672.0, "27465": 977227264.0, "27470": 971758592.0, "27475": 964919552.0, "27480": 949139136.0, "27485": 964181632.0, "27490": 969076928.0, "27495": 971255488.0, "27500": 967360896.0, "27505": 960690432.0, "27510": 966353984.0, "27515": 967971136.0, "27520": 979922368.0, "27525": 966798784.0, "27530": 952570304.0, "27535": 951756096.0, "27540": 975593856.0, "27545": 970630208.0, "27550": 965915200.0, "27555": 957290176.0, "27560": 941227712.0, "27565": 966693120.0, "27570": 963942272.0, "27575": 978962560.0, "27580": 952199488.0, "27585": 935643456.0, "27590": 963153024.0, "27595": 969302720.0, "27600": 976384128.0, "27605": 963922816.0, "27610": 945595776.0, "27615": 962442880.0, "27620": 972792832.0, "27625": 973333184.0, "27630": 963142528.0, "27635": 957910848.0, "27640": 949710912.0, "27645": 981422464.0, "27650": 972438144.0, "27655": 976819584.0, "27660": 973536576.0, "27665": 953891904.0, "27670": 956254464.0, "27675": 957933568.0, "27680": 958748160.0, "27685": 978045248.0, "27690": 932733312.0, "27695": 986551040.0, "27700": 967460736.0, "27705": 963206912.0, "27710": 953497536.0, "27715": 956069376.0, "27720": 949940736.0, "27725": 987472128.0, "27730": 969099904.0, "27735": 973044544.0, "27740": 962564224.0, "27745": 945694848.0, "27750": 972952192.0, "27755": 974997504.0, "27760": 971726720.0, "27765": 972085632.0, "27770": 946442240.0, "27775": 970667776.0, "27780": 979308672.0, "27785": 968801728.0, "27790": 975520896.0, "27795": 941442560.0, "27800": 959777280.0, "27805": 975802944.0, "27810": 955318272.0, "27815": 973854528.0, "27820": 985089600.0, "27825": 940233600.0, "27830": 968142848.0, "27835": 979577472.0, "27840": 959808704.0, "27845": 974235776.0, "27850": 949439552.0, "27855": 979707712.0, "27860": 985274816.0, "27865": 978656448.0, "27870": 983757440.0, "27875": 939789696.0, "27880": 971043584.0, "27885": 980649472.0, "27890": 970323456.0, "27895": 970170048.0, "27900": 937867648.0, "27905": 952531072.0, "27910": 979617344.0, "27915": 979714112.0, "27920": 953482752.0, "27925": 970775232.0, "27930": 948190848.0, "27935": 978766400.0, "27940": 991194624.0, "27945": 964256640.0, "27950": 969107328.0, "27955": 939478592.0, "27960": 976210176.0, "27965": 969857088.0, "27970": 961203328.0, "27975": 966485952.0, "27980": 942831360.0, "27985": 963246528.0, "27990": 976047808.0, "27995": 975505088.0, "28000": 976717760.0, "28005": 960293440.0, "28010": 940184960.0, "28015": 977438144.0, "28020": 964682176.0, "28025": 973764736.0, "28030": 966844864.0, "28035": 942501504.0, "28040": 960975296.0, "28045": 971441792.0, "28050": 974763200.0, "28055": 970877696.0, "28060": 949711872.0, "28065": 953179392.0, "28070": 972013824.0, "28075": 964512576.0, "28080": 973132800.0, "28085": 987600064.0, "28090": 941578368.0, "28095": 972836096.0, "28100": 971343232.0, "28105": 973174208.0, "28110": 985194048.0, "28115": 949747648.0, "28120": 992342528.0, "28125": 961859648.0, "28130": 968253952.0, "28135": 951607552.0, "28140": 936266368.0, "28145": 951093632.0, "28150": 962634880.0, "28155": 969317120.0, "28160": 965100480.0, "28165": 944770624.0, "28170": 944024512.0, "28175": 976244352.0, "28180": 972676160.0, "28185": 975973312.0, "28190": 952322496.0, "28195": 950144960.0, "28200": 973837760.0, "28205": 975876416.0, "28210": 978516352.0, "28215": 979193408.0, "28220": 936898368.0, "28225": 968557888.0, "28230": 973639360.0, "28235": 959059968.0, "28240": 967800768.0, "28245": 953045184.0, "28250": 959420352.0, "28255": 956183040.0, "28260": 959474048.0, "28265": 973346048.0, "28270": 973861248.0, "28275": 950809920.0, "28280": 964353344.0, "28285": 964736064.0, "28290": 969923840.0, "28295": 965245952.0, "28300": 954335680.0, "28305": 964673984.0, "28310": 963667200.0, "28315": 964568704.0, "28320": 962165952.0, "28325": 948792384.0, "28330": 970160320.0, "28335": 966349568.0, "28340": 977382848.0, "28345": 965165056.0, "28350": 959844160.0, "28355": 952046144.0, "28360": 966130112.0, "28365": 988517312.0, "28370": 971977728.0, "28375": 961742592.0, "28380": 949004864.0, "28385": 965207872.0, "28390": 972357632.0, "28395": 964944960.0, "28400": 983763008.0, "28405": 955728256.0, "28410": 980567872.0, "28415": 976035072.0, "28420": 970010752.0, "28425": 974348160.0, "28430": 940005248.0, "28435": 955790464.0, "28440": 976726272.0, "28445": 974038464.0, "28450": 981460416.0, "28455": 975242752.0, "28460": 955373184.0, "28465": 966633216.0, "28470": 956499392.0, "28475": 970140288.0, "28480": 976755200.0, "28485": 947695488.0, "28490": 949981824.0, "28495": 962992000.0, "28500": 990149952.0, "28505": 968831872.0, "28510": 949104384.0, "28515": 946708672.0, "28520": 970533120.0, "28525": 973332864.0, "28530": 972079296.0, "28535": 966815936.0, "28540": 958371584.0, "28545": 967295680.0, "28550": 966356352.0, "28555": 974004096.0, "28560": 969604416.0, "28565": 956190848.0, "28570": 980292480.0, "28575": 958666112.0, "28580": 972633792.0, "28585": 970837696.0, "28590": 948107264.0, "28595": 950111040.0, "28600": 970711232.0, "28605": 972291008.0, "28610": 977484480.0, "28615": 949630656.0, "28620": 948336448.0, "28625": 967830784.0, "28630": 970250368.0, "28635": 966583296.0, "28640": 969005376.0, "28645": 954634176.0, "28650": 974573696.0, "28655": 982959168.0, "28660": 983718528.0, "28665": 967165632.0, "28670": 944847168.0, "28675": 964863744.0, "28680": 985702912.0, "28685": 969981824.0, "28690": 989419008.0, "28695": 946522624.0, "28700": 931255872.0, "28705": 965949632.0, "28710": 978240512.0, "28715": 978905088.0, "28720": 985300096.0, "28725": 955584128.0, "28730": 966351040.0, "28735": 960882304.0, "28740": 976498880.0, "28745": 966008512.0, "28750": 948279680.0, "28755": 951486336.0, "28760": 969064512.0, "28765": 975634368.0, "28770": 955957184.0, "28775": 971514304.0, "28780": 946099008.0, "28785": 964696256.0, "28790": 970132608.0, "28795": 975689472.0, "28800": 957436672.0, "28805": 954137984.0, "28810": 971441344.0, "28815": 964936256.0, "28820": 973117696.0, "28825": 948440320.0, "28830": 933123776.0, "28835": 966454528.0, "28840": 978216192.0, "28845": 967180160.0, "28850": 975818816.0, "28855": 956725632.0, "28860": 939742144.0, "28865": 973638016.0, "28870": 963151680.0, "28875": 966363136.0, "28880": 962296256.0, "28885": 961598976.0, "28890": 966774400.0, "28895": 966023360.0, "28900": 971171584.0, "28905": 985002496.0, "28910": 930799296.0, "28915": 963032256.0, "28920": 967830592.0, "28925": 971075264.0, "28930": 990005056.0, "28935": 950092224.0, "28940": 951204672.0, "28945": 985635712.0, "28950": 962278848.0, "28955": 959840896.0, "28960": 961622080.0, "28965": 974140032.0, "28970": 958558208.0, "28975": 968756096.0, "28980": 958113856.0, "28985": 966351040.0, "28990": 937292608.0, "28995": 960079488.0, "29000": 980366080.0, "29005": 970911744.0, "29010": 977056128.0, "29015": 946481088.0, "29020": 963916480.0, "29025": 948753984.0, "29030": 976342976.0, "29035": 983381696.0, "29040": 943261888.0, "29045": 964355776.0, "29050": 987073152.0, "29055": 968292032.0, "29060": 952881664.0, "29065": 959827072.0, "29070": 945063680.0, "29075": 977131776.0, "29080": 968572800.0, "29085": 973991552.0, "29090": 973884160.0, "29095": 921521088.0, "29100": 960533568.0, "29105": 983929088.0, "29110": 974598848.0, "29115": 967647168.0, "29120": 947057536.0, "29125": 945229056.0, "29130": 979859456.0, "29135": 968825344.0, "29140": 970369280.0, "29145": 961086784.0, "29150": 947199872.0, "29155": 987081088.0, "29160": 970737984.0, "29165": 984069888.0, "29170": 970030016.0, "29175": 952875584.0, "29180": 969439936.0, "29185": 970914368.0, "29190": 948262528.0, "29195": 978811456.0, "29200": 958959872.0, "29205": 966087104.0, "29210": 974268160.0, "29215": 966080000.0, "29220": 953564608.0, "29225": 962401280.0, "29230": 948292160.0, "29235": 973602752.0, "29240": 975987328.0, "29245": 970547776.0, "29250": 977189376.0, "29255": 957229888.0, "29260": 973163904.0, "29265": 982767936.0, "29270": 966301376.0, "29275": 952223424.0, "29280": 937321600.0, "29285": 975489600.0, "29290": 980360064.0, "29295": 958868288.0, "29300": 965556544.0, "29305": 961234240.0, "29310": 951294464.0, "29315": 982316096.0, "29320": 965585664.0, "29325": 985101440.0, "29330": 962259712.0, "29335": 937764352.0, "29340": 967979456.0, "29345": 963427072.0, "29350": 969335424.0, "29355": 985267008.0, "29360": 941555584.0, "29365": 961356736.0, "29370": 971086208.0, "29375": 970274368.0, "29380": 976583232.0, "29385": 954725248.0, "29390": 954316608.0, "29395": 961480384.0, "29400": 979843328.0, "29405": 963441280.0, "29410": 961272320.0, "29415": 947113152.0, "29420": 983234368.0, "29425": 965539072.0, "29430": 972055936.0, "29435": 969395008.0, "29440": 934011520.0, "29445": 985728448.0, "29450": 961695552.0, "29455": 976532096.0, "29460": 978024704.0, "29465": 941592192.0, "29470": 956094784.0, "29475": 974468288.0, "29480": 957763328.0, "29485": 972142144.0, "29490": 956751232.0, "29495": 951530752.0, "29500": 973528512.0, "29505": 962009920.0, "29510": 969576064.0, "29515": 973758784.0, "29520": 950651072.0, "29525": 961854784.0, "29530": 969197440.0, "29535": 970779712.0, "29540": 965133440.0, "29545": 928337664.0, "29550": 934406720.0, "29555": 978132544.0, "29560": 963407488.0, "29565": 972499136.0, "29570": 963026880.0, "29575": 948053888.0, "29580": 968911296.0, "29585": 974860480.0, "29590": 971638272.0, "29595": 958747392.0, "29600": 949864960.0, "29605": 959016384.0, "29610": 963974528.0, "29615": 960174208.0, "29620": 973001024.0, "29625": 966087104.0, "29630": 964405952.0, "29635": 965910208.0, "29640": 973625408.0, "29645": 971063040.0, "29650": 970858496.0, "29655": 936751872.0, "29660": 957099776.0, "29665": 964662208.0, "29670": 956936256.0, "29675": 972842688.0, "29680": 946352576.0, "29685": 978881728.0, "29690": 963936640.0, "29695": 964839104.0, "29700": 975894144.0, "29705": 960988224.0, "29710": 965571392.0, "29715": 971632384.0, "29720": 970575360.0, "29725": 967947072.0, "29730": 960298368.0, "29735": 954035584.0, "29740": 986865472.0, "29745": 958993216.0, "29750": 971994432.0, "29755": 953974720.0, "29760": 941998144.0, "29765": 964957632.0, "29770": 974298368.0, "29775": 971280832.0, "29780": 966209024.0, "29785": 939563712.0, "29790": 969224256.0, "29795": 974764736.0, "29800": 973807872.0, "29805": 981627072.0, "29810": 935844800.0, "29815": 949974592.0, "29820": 969408320.0, "29825": 969827520.0, "29830": 969545152.0, "29835": 957534656.0, "29840": 935203328.0, "29845": 954067968.0, "29850": 972683264.0, "29855": 961848832.0, "29860": 980645568.0, "29865": 938260864.0, "29870": 958728896.0, "29875": 979230272.0, "29880": 965938816.0, "29885": 962290880.0, "29890": 920047040.0, "29895": 994033408.0, "29900": 955345408.0, "29905": 959510080.0, "29910": 954933568.0, "29915": 965615168.0, "29920": 962218880.0, "29925": 985996480.0, "29930": 957096704.0, "29935": 970301120.0, "29940": 970113856.0, "29945": 969259840.0, "29950": 964799232.0, "29955": 977245504.0, "29960": 974895232.0, "29965": 972693952.0, "29970": 923972480.0, "29975": 959022848.0, "29980": 972720512.0, "29985": 963929664.0, "29990": 967740224.0, "29995": 967892864.0, "30000": 942633920.0, "30005": 967746304.0, "30010": 956645952.0, "30015": 966179584.0, "30020": 957892992.0, "30025": 944669056.0, "30030": 978139200.0, "30035": 978413056.0, "30040": 983354880.0, "30045": 966079296.0, "30050": 937787200.0, "30055": 972872640.0, "30060": 959918464.0, "30065": 976486912.0, "30070": 962763456.0, "30075": 956923520.0, "30080": 954122048.0, "30085": 965398464.0, "30090": 972929216.0, "30095": 950136960.0, "30100": 962530240.0, "30105": 952985024.0, "30110": 963626688.0, "30115": 963089408.0, "30120": 972050880.0, "30125": 965466496.0, "30130": 934063424.0, "30135": 965468864.0, "30140": 953670912.0, "30145": 980745216.0, "30150": 975323904.0, "30155": 931081856.0, "30160": 967401984.0, "30165": 966457024.0, "30170": 970459072.0, "30175": 956027072.0, "30180": 931998208.0, "30185": 944373120.0, "30190": 970343552.0, "30195": 972916480.0, "30200": 958245184.0, "30205": 950627776.0, "30210": 940393664.0, "30215": 969717632.0, "30220": 963057664.0, "30225": 954753536.0, "30230": 965140544.0, "30235": 953275456.0, "30240": 959005184.0, "30245": 973992128.0, "30250": 968899264.0, "30255": 985686912.0, "30260": 978155520.0, "30265": 969023296.0, "30270": 966714240.0, "30275": 974994432.0, "30280": 984309376.0, "30285": 967028288.0, "30290": 959459840.0, "30295": 960473472.0, "30300": 953483200.0, "30305": 980447360.0, "30310": 967082816.0, "30315": 962002880.0, "30320": 966634496.0, "30325": 962171584.0, "30330": 960867520.0, "30335": 960694272.0, "30340": 959687232.0, "30345": 957702528.0, "30350": 972232704.0, "30355": 974410048.0, "30360": 966659648.0, "30365": 968551168.0, "30370": 948299328.0, "30375": 976024832.0, "30380": 963794880.0, "30385": 962998144.0, "30390": 986281088.0, "30395": 930748672.0, "30400": 976198080.0, "30405": 990363136.0, "30410": 977602048.0, "30415": 962611584.0, "30420": 945919552.0, "30425": 969538688.0, "30430": 977155840.0, "30435": 972229632.0, "30440": 986706880.0, "30445": 936843776.0, "30450": 945070208.0, "30455": 955780160.0, "30460": 973524416.0, "30465": 981099072.0, "30470": 981835328.0, "30475": 937120576.0, "30480": 964699008.0, "30485": 959958080.0, "30490": 986738752.0, "30495": 975629248.0, "30500": 958488512.0, "30505": 975407104.0, "30510": 963589312.0, "30515": 970558784.0, "30520": 974838336.0, "30525": 944130944.0, "30530": 965080704.0, "30535": 967193792.0, "30540": 975692864.0, "30545": 967105600.0, "30550": 982108544.0, "30555": 955290816.0, "30560": 974765952.0, "30565": 969966784.0, "30570": 965301312.0, "30575": 966031232.0, "30580": 940476608.0, "30585": 973530112.0, "30590": 960622400.0, "30595": 972363840.0, "30600": 978924352.0, "30605": 940024384.0, "30610": 968690944.0, "30615": 964393280.0, "30620": 968182144.0, "30625": 969624768.0, "30630": 965838656.0, "30635": 946173504.0, "30640": 975645440.0, "30645": 973049728.0, "30650": 967352960.0, "30655": 959626688.0, "30660": 943478784.0, "30665": 983381504.0, "30670": 967624448.0, "30675": 971393408.0, "30680": 975384960.0, "30685": 934283264.0, "30690": 961457280.0, "30695": 967187008.0, "30700": 972513088.0, "30705": 961536960.0, "30710": 964075840.0, "30715": 946704320.0, "30720": 969496960.0, "30725": 972899200.0, "30730": 976632576.0, "30735": 967320512.0, "30740": 947637312.0, "30745": 966896000.0, "30750": 964606848.0, "30755": 994910080.0, "30760": 988401216.0, "30765": 942245376.0, "30770": 961795840.0, "30775": 970958912.0, "30780": 977599232.0, "30785": 970572480.0, "30790": 952597312.0, "30795": 961976064.0, "30800": 961991232.0, "30805": 959196032.0, "30810": 951540928.0, "30815": 956328064.0, "30820": 949092352.0, "30825": 989150208.0, "30830": 974463808.0, "30835": 966576768.0, "30840": 975416448.0, "30845": 937090496.0, "30850": 968346688.0, "30855": 987474112.0, "30860": 970638464.0, "30865": 968568256.0, "30870": 956738560.0, "30875": 948522240.0, "30880": 985876160.0, "30885": 981161920.0, "30890": 974793600.0, "30895": 946573760.0, "30900": 942026304.0, "30905": 952540288.0, "30910": 977420480.0, "30915": 980569792.0, "30920": 969029824.0, "30925": 949439616.0, "30930": 974832192.0, "30935": 967384384.0, "30940": 958744832.0, "30945": 974606848.0, "30950": 950793984.0, "30955": 973293888.0, "30960": 971688448.0, "30965": 965349952.0, "30970": 973983552.0, "30975": 975306432.0, "30980": 947335296.0, "30985": 971499456.0, "30990": 966306752.0, "30995": 968070080.0, "31000": 968464256.0, "31005": 952778368.0, "31010": 983734592.0, "31015": 957534656.0, "31020": 961469376.0, "31025": 964626112.0, "31030": 943789952.0, "31035": 977819008.0, "31040": 981951296.0, "31045": 963741440.0, "31050": 967472384.0, "31055": 953233152.0, "31060": 979021888.0, "31065": 981148736.0, "31070": 972774400.0, "31075": 970281792.0, "31080": 972788736.0, "31085": 940273088.0, "31090": 987577856.0, "31095": 970085824.0, "31100": 975968256.0, "31105": 961801536.0, "31110": 953356544.0, "31115": 968440768.0, "31120": 978458688.0, "31125": 971479488.0, "31130": 970036736.0, "31135": 936902784.0, "31140": 952704256.0, "31145": 986903808.0, "31150": 967168064.0, "31155": 967511232.0, "31160": 954785344.0, "31165": 958428096.0, "31170": 969884096.0, "31175": 968721344.0, "31180": 968513152.0, "31185": 981036352.0, "31190": 956366656.0, "31195": 963146880.0, "31200": 957269824.0, "31205": 958574016.0, "31210": 985482304.0, "31215": 941568768.0, "31220": 960966336.0, "31225": 967556032.0, "31230": 962419648.0, "31235": 968347968.0, "31240": 959220352.0, "31245": 943048896.0, "31250": 956640384.0, "31255": 960012928.0, "31260": 966260672.0, "31265": 949778944.0, "31270": 948709504.0, "31275": 973716864.0, "31280": 977721344.0, "31285": 963410560.0, "31290": 973090560.0, "31295": 935438400.0, "31300": 977310144.0, "31305": 988522432.0, "31310": 970017984.0, "31315": 958029184.0, "31320": 946108736.0, "31325": 940571456.0, "31330": 969883712.0, "31335": 980763264.0, "31340": 956088896.0, "31345": 960530752.0, "31350": 936457728.0, "31355": 963484224.0, "31360": 961995328.0, "31365": 974236672.0, "31370": 968931712.0, "31375": 936554944.0, "31380": 957269056.0, "31385": 956880896.0, "31390": 959786368.0, "31395": 981827904.0, "31400": 965841344.0, "31405": 955706816.0, "31410": 960079680.0, "31415": 972568064.0, "31420": 969230912.0, "31425": 943348928.0, "31430": 960621440.0, "31435": 974479104.0, "31440": 972743424.0, "31445": 970642624.0, "31450": 980399296.0, "31455": 936859136.0, "31460": 988764480.0, "31465": 963137152.0, "31470": 952709824.0, "31475": 962708608.0, "31480": 952899776.0, "31485": 960388544.0, "31490": 974676288.0, "31495": 957241472.0, "31500": 974900736.0, "31505": 944704832.0, "31510": 958728128.0, "31515": 959367040.0, "31520": 975108544.0, "31525": 963943616.0, "31530": 965521600.0, "31535": 933644288.0, "31540": 961015552.0, "31545": 976258368.0, "31550": 973397888.0, "31555": 967130432.0, "31560": 946733760.0, "31565": 940690624.0, "31570": 971210688.0, "31575": 979298560.0, "31580": 961529664.0, "31585": 943931840.0, "31590": 929278784.0, "31595": 967489280.0, "31600": 964082432.0, "31605": 959774464.0, "31610": 966436928.0, "31615": 946334464.0, "31620": 968654080.0, "31625": 943466624.0, "31630": 960764032.0, "31635": 971627264.0, "31640": 947673984.0, "31645": 958414912.0, "31650": 963100736.0, "31655": 984255936.0, "31660": 960991360.0, "31665": 968082816.0, "31670": 974951360.0, "31675": 960559424.0, "31680": 957164160.0, "31685": 981669824.0, "31690": 957329408.0, "31695": 933457088.0, "31700": 957083264.0, "31705": 951685632.0, "31710": 963824000.0, "31715": 959811328.0, "31720": 941881344.0, "31725": 965141568.0, "31730": 965406400.0, "31735": 966055552.0, "31740": 959546496.0, "31745": 943556864.0, "31750": 970804672.0, "31755": 966364928.0, "31760": 970111744.0, "31765": 971516992.0, "31770": 944265856.0, "31775": 953801728.0, "31780": 968979264.0, "31785": 985194496.0, "31790": 986589440.0, "31795": 966457344.0, "31800": 943510400.0, "31805": 967788544.0, "31810": 973914880.0, "31815": 967801216.0, "31820": 956217728.0, "31825": 938425920.0, "31830": 962843328.0, "31835": 976691968.0, "31840": 964110400.0, "31845": 991815488.0, "31850": 966849216.0, "31855": 936307840.0, "31860": 968385664.0, "31865": 961029632.0, "31870": 989111616.0, "31875": 984433856.0, "31880": 951984832.0, "31885": 965769728.0, "31890": 942735360.0, "31895": 963667456.0, "31900": 966902144.0, "31905": 946603776.0, "31910": 975597760.0, "31915": 964668416.0, "31920": 950325376.0, "31925": 961887232.0, "31930": 947218304.0, "31935": 956033088.0, "31940": 975210112.0, "31945": 975731136.0, "31950": 962078208.0, "31955": 955973504.0, "31960": 942126976.0, "31965": 953948992.0, "31970": 984446336.0, "31975": 971089536.0, "31980": 971781376.0, "31985": 948926848.0, "31990": 964582080.0, "31995": 976271488.0, "32000": 974644224.0, "32005": 989579392.0, "32010": 948276416.0, "32015": 952396288.0, "32020": 964013440.0, "32025": 983363776.0, "32030": 966721472.0, "32035": 973451648.0, "32040": 944792576.0, "32045": 972207360.0, "32050": 951426944.0, "32055": 980891136.0, "32060": 965897216.0, "32065": 958183296.0, "32070": 961746560.0, "32075": 968084352.0, "32080": 976035968.0, "32085": 969590080.0, "32090": 946206144.0, "32095": 984198912.0, "32100": 971621312.0, "32105": 976798720.0, "32110": 971450752.0, "32115": 963478336.0, "32120": 970832128.0, "32125": 966478976.0, "32130": 958121600.0, "32135": 964678144.0, "32140": 971787520.0, "32145": 924002688.0, "32150": 975307328.0, "32155": 978956800.0, "32160": 957570048.0, "32165": 962722368.0, "32170": 938582592.0, "32175": 950623040.0, "32180": 971841664.0, "32185": 984913024.0, "32190": 958663360.0, "32195": 954049280.0, "32200": 959356416.0, "32205": 975471296.0, "32210": 979887936.0, "32215": 961955584.0, "32220": 957318080.0, "32225": 946389760.0, "32230": 979897088.0, "32235": 973243648.0, "32240": 965357760.0, "32245": 968365760.0, "32250": 965122048.0, "32255": 961131712.0, "32260": 975331264.0, "32265": 971271360.0, "32270": 968704128.0, "32275": 952090560.0, "32280": 949265664.0, "32285": 961473728.0, "32290": 976620096.0, "32295": 969866496.0, "32300": 948392960.0, "32305": 953782400.0, "32310": 966344256.0, "32315": 956353408.0, "32320": 970571008.0, "32325": 963555840.0, "32330": 950009984.0, "32335": 982313472.0, "32340": 960462912.0, "32345": 964802048.0, "32350": 966707648.0, "32355": 952280000.0, "32360": 964252800.0, "32365": 976662720.0, "32370": 965134464.0, "32375": 967446016.0, "32380": 931079680.0, "32385": 964619904.0, "32390": 979875840.0, "32395": 962132416.0, "32400": 961127040.0, "32405": 969092288.0, "32410": 952334144.0, "32415": 941693376.0, "32420": 973605504.0, "32425": 975756672.0, "32430": 983844736.0, "32435": 946569344.0, "32440": 961953472.0, "32445": 979853760.0, "32450": 975030528.0, "32455": 971760384.0, "32460": 968695808.0, "32465": 955285504.0, "32470": 962763904.0, "32475": 968103552.0, "32480": 975753600.0, "32485": 949701120.0, "32490": 949810496.0, "32495": 957336512.0, "32500": 971613824.0, "32505": 974183808.0, "32510": 989383808.0, "32515": 951586240.0, "32520": 947181952.0, "32525": 967420416.0, "32530": 961143744.0, "32535": 982240960.0, "32540": 957796608.0, "32545": 958558400.0, "32550": 979538944.0, "32555": 972848000.0, "32560": 961136768.0, "32565": 958925760.0, "32570": 946204992.0, "32575": 964845504.0, "32580": 962726976.0, "32585": 973138560.0, "32590": 973869952.0, "32595": 952715072.0, "32600": 981595328.0, "32605": 975675136.0, "32610": 954457408.0, "32615": 953446656.0, "32620": 952556288.0, "32625": 978917504.0, "32630": 969080512.0, "32635": 980612416.0, "32640": 960832128.0, "32645": 940899456.0, "32650": 966541888.0, "32655": 962328704.0, "32660": 974455808.0, "32665": 988501120.0, "32670": 967721408.0, "32675": 952329152.0, "32680": 970896896.0, "32685": 972378240.0, "32690": 973635712.0, "32695": 976197568.0, "32700": 937074240.0, "32705": 965043136.0, "32710": 978066240.0, "32715": 960187904.0, "32720": 979219840.0, "32725": 956986368.0, "32730": 969312768.0, "32735": 958903168.0, "32740": 954219136.0, "32745": 980683264.0, "32750": 964166336.0, "32755": 958824064.0, "32760": 970832128.0, "32765": 962647360.0, "32770": 969080256.0, "32775": 973942720.0, "32780": 949386752.0, "32785": 964402944.0, "32790": 984225024.0, "32795": 969203584.0, "32800": 972973696.0, "32805": 950331392.0, "32810": 952354432.0, "32815": 975907008.0, "32820": 978017984.0, "32825": 979291904.0, "32830": 951930944.0, "32835": 949348736.0, "32840": 975154112.0, "32845": 977837440.0, "32850": 962873216.0, "32855": 975116864.0, "32860": 953091520.0, "32865": 962935552.0, "32870": 962611712.0, "32875": 975534400.0, "32880": 965602496.0, "32885": 970606080.0, "32890": 970386688.0, "32895": 957568384.0, "32900": 974015744.0, "32905": 975437440.0, "32910": 959847808.0, "32915": 957065664.0, "32920": 957606784.0, "32925": 970640640.0, "32930": 959412352.0, "32935": 977707904.0, "32940": 957654656.0, "32945": 968907776.0, "32950": 987831168.0, "32955": 980465280.0, "32960": 967107264.0, "32965": 924777920.0, "32970": 945659136.0, "32975": 970103232.0, "32980": 975128832.0, "32985": 978863680.0, "32990": 935022400.0, "32995": 967955712.0, "33000": 963558400.0, "33005": 960614912.0, "33010": 976189696.0, "33015": 961714944.0, "33020": 956843136.0, "33025": 984390848.0, "33030": 976616768.0, "33035": 976883968.0, "33040": 982812928.0, "33045": 941299008.0, "33050": 971160320.0, "33055": 965639744.0, "33060": 973616704.0, "33065": 970549376.0, "33070": 947122816.0, "33075": 969946624.0, "33080": 975953152.0, "33085": 986161152.0, "33090": 958781504.0, "33095": 961411840.0, "33100": 946060928.0, "33105": 971961600.0, "33110": 988334656.0, "33115": 968637760.0, "33120": 966184960.0, "33125": 940897856.0, "33130": 963932480.0, "33135": 966362880.0, "33140": 982940544.0, "33145": 973194880.0, "33150": 949213888.0, "33155": 955381568.0, "33160": 959774336.0, "33165": 966206464.0, "33170": 968681344.0, "33175": 967523008.0, "33180": 938150080.0, "33185": 964434816.0, "33190": 961187456.0, "33195": 975014528.0, "33200": 964822400.0, "33205": 942999744.0, "33210": 976627072.0, "33215": 965908992.0, "33220": 983616576.0, "33225": 982063744.0, "33230": 961070784.0, "33235": 967765184.0, "33240": 973076992.0, "33245": 972426496.0, "33250": 964687104.0, "33255": 947177536.0, "33260": 944831744.0, "33265": 979765760.0, "33270": 978717440.0, "33275": 971800448.0, "33280": 968049152.0, "33285": 960848512.0, "33290": 971615552.0, "33295": 961206784.0, "33300": 975337344.0, "33305": 982086656.0, "33310": 971024640.0, "33315": 974351744.0, "33320": 966574080.0, "33325": 973133056.0, "33330": 957841728.0, "33335": 951003776.0, "33340": 958489984.0, "33345": 974273024.0, "33350": 975366400.0, "33355": 988002624.0, "33360": 966249152.0, "33365": 946410880.0, "33370": 986983040.0, "33375": 961382848.0, "33380": 972495168.0, "33385": 993250048.0, "33390": 956814848.0, "33395": 964252160.0, "33400": 972419264.0, "33405": 975000064.0, "33410": 981457984.0, "33415": 947803200.0, "33420": 969240704.0, "33425": 991153728.0, "33430": 960249024.0, "33435": 972045696.0, "33440": 952178112.0, "33445": 924865024.0, "33450": 967993216.0, "33455": 967586624.0, "33460": 974901824.0, "33465": 982249472.0, "33470": 958986688.0, "33475": 982592640.0, "33480": 971407936.0, "33485": 961266048.0, "33490": 975061824.0, "33495": 953901312.0, "33500": 985054720.0, "33505": 983659968.0, "33510": 975594752.0, "33515": 964731584.0, "33520": 955807872.0, "33525": 956181568.0, "33530": 965229824.0, "33535": 971824128.0, "33540": 982076544.0, "33545": 960186496.0, "33550": 934193472.0, "33555": 967802112.0, "33560": 986893632.0, "33565": 975240576.0, "33570": 973316096.0, "33575": 938689408.0, "33580": 965173312.0, "33585": 973692928.0, "33590": 986437888.0, "33595": 979988928.0, "33600": 958060224.0, "33605": 951611648.0, "33610": 969560192.0, "33615": 957838400.0, "33620": 972114560.0, "33625": 975357184.0, "33630": 953160064.0, "33635": 978798464.0, "33640": 951824768.0, "33645": 968049792.0, "33650": 981113664.0, "33655": 957745664.0, "33660": 981584704.0, "33665": 966743680.0, "33670": 961690624.0, "33675": 984117056.0, "33680": 938018432.0, "33685": 950820544.0, "33690": 971814464.0, "33695": 978409536.0, "33700": 963921152.0, "33705": 966719808.0, "33710": 966177344.0, "33715": 977833344.0, "33720": 965096064.0, "33725": 981558656.0, "33730": 966646976.0, "33735": 932830208.0, "33740": 967026112.0, "33745": 972008448.0, "33750": 969967424.0, "33755": 957331200.0, "33760": 935654848.0, "33765": 961261824.0, "33770": 971305216.0, "33775": 975561728.0, "33780": 977143168.0, "33785": 951336320.0, "33790": 950343808.0, "33795": 960399552.0, "33800": 971864128.0, "33805": 975852416.0, "33810": 959719296.0, "33815": 946870208.0, "33820": 970060800.0, "33825": 976710784.0, "33830": 973304576.0, "33835": 965585152.0, "33840": 953276864.0, "33845": 961077888.0, "33850": 973752384.0, "33855": 956175744.0, "33860": 972114048.0, "33865": 949542528.0, "33870": 959108736.0, "33875": 970548352.0, "33880": 960267648.0, "33885": 969698752.0, "33890": 949848640.0, "33895": 943568448.0, "33900": 963117056.0, "33905": 965407168.0, "33910": 972530432.0, "33915": 954561152.0, "33920": 937300992.0, "33925": 972511488.0, "33930": 975011776.0, "33935": 954240768.0, "33940": 979749376.0, "33945": 953349440.0, "33950": 968828288.0, "33955": 967812288.0, "33960": 972220288.0, "33965": 973513216.0, "33970": 953877568.0, "33975": 959767808.0, "33980": 958754560.0, "33985": 975535744.0, "33990": 983445760.0, "33995": 953576768.0, "34000": 932588864.0, "34005": 965253056.0, "34010": 977942208.0, "34015": 962712320.0, "34020": 977216768.0, "34025": 930549760.0, "34030": 959737024.0, "34035": 973896128.0, "34040": 968699008.0, "34045": 968441792.0, "34050": 943277184.0, "34055": 968452160.0, "34060": 973374720.0, "34065": 975929536.0, "34070": 959251456.0, "34075": 968815104.0, "34080": 944542592.0, "34085": 965303680.0, "34090": 971224064.0, "34095": 964428544.0, "34100": 960463744.0, "34105": 956279744.0, "34110": 976429952.0, "34115": 956875008.0, "34120": 962426048.0, "34125": 956759552.0, "34130": 947257600.0, "34135": 956360768.0, "34140": 972401600.0, "34145": 969176000.0, "34150": 953762304.0, "34155": 951561792.0, "34160": 950750784.0, "34165": 963375424.0, "34170": 987425984.0, "34175": 972027328.0, "34180": 975395712.0, "34185": 932221120.0, "34190": 956501248.0, "34195": 973390720.0, "34200": 971309056.0, "34205": 950636672.0, "34210": 917554048.0, "34215": 948750336.0, "34220": 967851904.0, "34225": 979110720.0, "34230": 967797184.0, "34235": 950560384.0, "34240": 947841280.0, "34245": 971475136.0, "34250": 993622848.0, "34255": 961641856.0, "34260": 976217088.0, "34265": 941329280.0, "34270": 968090624.0, "34275": 964880000.0, "34280": 968482496.0, "34285": 972286144.0, "34290": 951057792.0, "34295": 950771200.0, "34300": 978447552.0, "34305": 972224064.0, "34310": 972217984.0, "34315": 953563136.0, "34320": 947761600.0, "34325": 966291520.0, "34330": 975933248.0, "34335": 971640704.0, "34340": 961741568.0, "34345": 950446528.0, "34350": 954620608.0, "34355": 965452160.0, "34360": 954836864.0, "34365": 957036160.0, "34370": 941053248.0, "34375": 972587072.0, "34380": 979096960.0, "34385": 958909696.0, "34390": 966836608.0, "34395": 958441664.0, "34400": 953475392.0, "34405": 968023424.0, "34410": 974169472.0, "34415": 977253440.0, "34420": 966206592.0, "34425": 953748928.0, "34430": 972075904.0, "34435": 968571584.0, "34440": 971942592.0, "34445": 979796736.0, "34450": 934335360.0, "34455": 959405760.0, "34460": 968968064.0, "34465": 972944064.0, "34470": 971200128.0, "34475": 939006080.0, "34480": 971990336.0, "34485": 972992896.0, "34490": 965046976.0, "34495": 971378752.0, "34500": 958257344.0, "34505": 954423424.0, "34510": 974287296.0, "34515": 967279808.0, "34520": 975245632.0, "34525": 967105344.0, "34530": 946886720.0, "34535": 966132736.0, "34540": 975223296.0, "34545": 957728640.0, "34550": 960405056.0, "34555": 932129344.0, "34560": 961483328.0, "34565": 977809152.0, "34570": 956317568.0, "34575": 969050176.0, "34580": 953256960.0, "34585": 966691904.0, "34590": 965966208.0, "34595": 958274176.0, "34600": 967948352.0, "34605": 977721152.0, "34610": 949935808.0, "34615": 967926272.0, "34620": 984803968.0, "34625": 971743616.0, "34630": 970791488.0, "34635": 942839552.0, "34640": 961167360.0, "34645": 964712768.0, "34650": 978140608.0, "34655": 971532864.0, "34660": 948025920.0, "34665": 943830592.0, "34670": 972777472.0, "34675": 969004864.0, "34680": 974701248.0, "34685": 968329152.0, "34690": 928280512.0, "34695": 968749248.0, "34700": 970260224.0, "34705": 961508864.0, "34710": 976409280.0, "34715": 953779392.0, "34720": 967802816.0, "34725": 969350016.0, "34730": 963821248.0, "34735": 987746752.0, "34740": 961937152.0, "34745": 966025600.0, "34750": 984576064.0, "34755": 964252224.0, "34760": 972127616.0, "34765": 973697728.0, "34770": 946975808.0, "34775": 991221632.0, "34780": 972299008.0, "34785": 975424192.0, "34790": 957164032.0, "34795": 947319552.0, "34800": 954793856.0, "34805": 983768384.0, "34810": 968779840.0, "34815": 958815424.0, "34820": 935296448.0, "34825": 974591488.0, "34830": 975027520.0, "34835": 965675200.0, "34840": 974592576.0, "34845": 954822784.0, "34850": 941717696.0, "34855": 980345600.0, "34860": 964258944.0, "34865": 966733888.0, "34870": 976362240.0, "34875": 941470976.0, "34880": 959383424.0, "34885": 969360512.0, "34890": 975080832.0, "34895": 980193856.0, "34900": 948669952.0, "34905": 960844352.0, "34910": 956249600.0, "34915": 956455616.0, "34920": 975141056.0, "34925": 949468416.0, "34930": 939750272.0, "34935": 966371776.0, "34940": 971380544.0, "34945": 972733632.0, "34950": 978711360.0, "34955": 940640768.0, "34960": 959003328.0, "34965": 985031680.0, "34970": 974797440.0, "34975": 975381248.0, "34980": 946897664.0, "34985": 972967936.0, "34990": 975475264.0, "34995": 969822528.0, "35000": 972793536.0, "35005": 938366208.0, "35010": 952079552.0, "35015": 982992832.0, "35020": 973608192.0, "35025": 971422272.0, "35030": 954431296.0, "35035": 961297664.0, "35040": 968677440.0, "35045": 969528192.0, "35050": 982186816.0, "35055": 969442432.0, "35060": 930681088.0, "35065": 975052032.0, "35070": 971900992.0, "35075": 970438848.0, "35080": 973464960.0, "35085": 944401920.0, "35090": 966224128.0, "35095": 998007936.0, "35100": 970842432.0, "35105": 978301376.0, "35110": 941349824.0, "35115": 959689216.0, "35120": 976130944.0, "35125": 959718464.0, "35130": 979773376.0, "35135": 972598720.0, "35140": 936312960.0, "35145": 967140224.0, "35150": 970193152.0, "35155": 974688384.0, "35160": 977641536.0, "35165": 959154752.0, "35170": 963872512.0, "35175": 974467648.0, "35180": 970405568.0, "35185": 982530368.0, "35190": 961365184.0, "35195": 962500736.0, "35200": 957841600.0, "35205": 975259008.0, "35210": 980536448.0, "35215": 979559296.0, "35220": 945077312.0, "35225": 967866112.0, "35230": 979016384.0, "35235": 974514688.0, "35240": 962279488.0, "35245": 951225984.0, "35250": 953876928.0, "35255": 977521856.0, "35260": 975075968.0, "35265": 963141184.0, "35270": 942669248.0, "35275": 956127552.0, "35280": 983564096.0, "35285": 973097664.0, "35290": 971764800.0, "35295": 961452672.0, "35300": 957339200.0, "35305": 976064768.0, "35310": 975347072.0, "35315": 987408320.0, "35320": 974035008.0, "35325": 958099648.0, "35330": 974321920.0, "35335": 969375872.0, "35340": 976320320.0, "35345": 973693248.0, "35350": 945893632.0, "35355": 954598848.0, "35360": 967568832.0, "35365": 956765120.0, "35370": 973330112.0, "35375": 951744640.0, "35380": 965274496.0, "35385": 977242752.0, "35390": 967783872.0, "35395": 966459584.0, "35400": 977066432.0, "35405": 930930368.0, "35410": 964408128.0, "35415": 966296960.0, "35420": 963064640.0, "35425": 970795008.0, "35430": 950653312.0, "35435": 953433024.0, "35440": 958036352.0, "35445": 978145728.0, "35450": 975503360.0, "35455": 940618176.0, "35460": 949445440.0, "35465": 973116672.0, "35470": 975754752.0, "35475": 958440192.0, "35480": 953877568.0, "35485": 959840704.0, "35490": 961137792.0, "35495": 980510656.0, "35500": 967928128.0, "35505": 964468416.0, "35510": 954323008.0, "35515": 971430464.0, "35520": 975892352.0, "35525": 964418240.0, "35530": 971586240.0, "35535": 959831232.0, "35540": 946329856.0, "35545": 967890304.0, "35550": 974736576.0, "35555": 981370240.0, "35560": 959149568.0, "35565": 946299264.0, "35570": 968005952.0, "35575": 977711104.0, "35580": 982679872.0, "35585": 961085504.0, "35590": 927050432.0, "35595": 984927872.0, "35600": 971107712.0, "35605": 976948352.0, "35610": 956508928.0, "35615": 959864064.0, "35620": 964994112.0, "35625": 968075328.0, "35630": 971529728.0, "35635": 972445440.0, "35640": 965475712.0, "35645": 938789504.0, "35650": 956446016.0, "35655": 973428928.0, "35660": 966709888.0, "35665": 984044160.0, "35670": 938130048.0, "35675": 981468928.0, "35680": 967859136.0, "35685": 981161536.0, "35690": 970015488.0, "35695": 941563712.0, "35700": 958355392.0, "35705": 969443328.0, "35710": 977255488.0, "35715": 972459136.0, "35720": 967223616.0, "35725": 939044800.0, "35730": 964726912.0, "35735": 988944640.0, "35740": 967866560.0, "35745": 962507840.0, "35750": 947651008.0, "35755": 966832512.0, "35760": 965304512.0, "35765": 969893696.0, "35770": 956970816.0, "35775": 947202752.0, "35780": 970714432.0, "35785": 970327936.0, "35790": 969042496.0, "35795": 971372160.0, "35800": 948330176.0, "35805": 945238720.0, "35810": 968335360.0, "35815": 961586304.0, "35820": 985713408.0, "35825": 978982400.0, "35830": 957785344.0, "35835": 969151680.0, "35840": 966549696.0, "35845": 967169920.0, "35850": 986420800.0, "35855": 956058880.0, "35860": 958192064.0, "35865": 968031232.0, "35870": 974436352.0, "35875": 968540224.0, "35880": 950488384.0, "35885": 968424064.0, "35890": 986068352.0, "35895": 961233536.0, "35900": 970289152.0, "35905": 954412672.0, "35910": 954886400.0, "35915": 978927424.0, "35920": 972842176.0, "35925": 981443264.0, "35930": 957418432.0, "35935": 958872832.0, "35940": 969329472.0, "35945": 969633472.0, "35950": 978712832.0, "35955": 967297216.0, "35960": 939696256.0, "35965": 944776768.0, "35970": 957886464.0, "35975": 978111232.0, "35980": 973912512.0, "35985": 961695232.0, "35990": 943922368.0, "35995": 958529984.0, "36000": 970289216.0, "36005": 969294528.0, "36010": 981284928.0, "36015": 953697920.0, "36020": 983281152.0, "36025": 981516224.0, "36030": 970979008.0, "36035": 965241280.0, "36040": 944468352.0, "36045": 973983040.0, "36050": 969978240.0, "36055": 966729344.0, "36060": 973650880.0, "36065": 946781184.0, "36070": 956581376.0, "36075": 976124224.0, "36080": 962337280.0, "36085": 971948096.0, "36090": 956809408.0, "36095": 951180544.0, "36100": 960704256.0, "36105": 963817536.0, "36110": 974143488.0, "36115": 974970048.0, "36120": 962195968.0, "36125": 964989376.0, "36130": 969754240.0, "36135": 975424192.0, "36140": 970608576.0, "36145": 934675456.0, "36150": 968809280.0, "36155": 960420544.0, "36160": 963023424.0, "36165": 957185984.0, "36170": 940826496.0, "36175": 953299968.0, "36180": 975911040.0, "36185": 966641856.0, "36190": 973122752.0, "36195": 941097600.0, "36200": 923484928.0, "36205": 974842432.0, "36210": 972827072.0, "36215": 971820352.0, "36220": 966439552.0, "36225": 951705344.0, "36230": 960195072.0, "36235": 977292736.0, "36240": 973809216.0, "36245": 966907968.0, "36250": 966833408.0, "36255": 963103616.0, "36260": 966960384.0, "36265": 968909376.0, "36270": 973487168.0, "36275": 972777472.0, "36280": 947737472.0, "36285": 952872000.0, "36290": 960929792.0, "36295": 974676864.0, "36300": 975629888.0, "36305": 944692608.0, "36310": 967625152.0, "36315": 953389824.0, "36320": 953382528.0, "36325": 960781696.0, "36330": 943457792.0, "36335": 963621248.0, "36340": 975879680.0, "36345": 966290624.0, "36350": 978613952.0, "36355": 962816192.0, "36360": 955694400.0, "36365": 973477184.0, "36370": 960778880.0, "36375": 981916672.0, "36380": 948331648.0, "36385": 941367680.0, "36390": 961894400.0, "36395": 967797888.0, "36400": 970856768.0, "36405": 960199104.0, "36410": 938700992.0, "36415": 947702080.0, "36420": 965660928.0, "36425": 963029824.0, "36430": 973630016.0, "36435": 966189056.0, "36440": 945789312.0, "36445": 952793280.0, "36450": 965295552.0, "36455": 985897216.0, "36460": 979297792.0, "36465": 939161408.0, "36470": 980073728.0, "36475": 974792832.0, "36480": 967263296.0, "36485": 985578816.0, "36490": 945102912.0, "36495": 964137664.0, "36500": 968225344.0, "36505": 967424576.0, "36510": 961451776.0, "36515": 960662208.0, "36520": 935567168.0, "36525": 978165440.0, "36530": 963817792.0, "36535": 966944000.0, "36540": 975513216.0, "36545": 945554944.0, "36550": 967519936.0, "36555": 973404544.0, "36560": 962147840.0, "36565": 972727104.0, "36570": 963552064.0, "36575": 976812992.0, "36580": 954937344.0, "36585": 972121536.0, "36590": 984701120.0, "36595": 968342144.0, "36600": 965850176.0, "36605": 971434048.0, "36610": 972789504.0, "36615": 981482496.0, "36620": 952306112.0, "36625": 940206592.0, "36630": 970746176.0, "36635": 972237696.0, "36640": 964445312.0, "36645": 976901056.0, "36650": 935435648.0, "36655": 980051136.0, "36660": 971581696.0, "36665": 962153088.0, "36670": 966426944.0, "36675": 951661632.0, "36680": 946219712.0, "36685": 978424320.0, "36690": 984362432.0, "36695": 991112128.0, "36700": 969597376.0, "36705": 926689920.0, "36710": 966684032.0, "36715": 969889536.0, "36720": 974891712.0, "36725": 969750272.0, "36730": 948848704.0, "36735": 967761216.0, "36740": 968448320.0, "36745": 951178432.0, "36750": 962258880.0, "36755": 935667904.0, "36760": 964118080.0, "36765": 978416064.0, "36770": 963289344.0, "36775": 965314624.0, "36780": 949425600.0, "36785": 946264384.0, "36790": 978146880.0, "36795": 964836032.0, "36800": 960673216.0, "36805": 953818752.0, "36810": 936658496.0, "36815": 986554112.0, "36820": 972146560.0, "36825": 978077632.0, "36830": 968823168.0, "36835": 931209600.0, "36840": 971481792.0, "36845": 979681280.0, "36850": 962112320.0, "36855": 949518912.0, "36860": 961924416.0, "36865": 944986112.0, "36870": 972460416.0, "36875": 973393344.0, "36880": 971605824.0, "36885": 977981632.0, "36890": 933826240.0, "36895": 979388928.0, "36900": 957577728.0, "36905": 967239552.0, "36910": 980476032.0, "36915": 937815680.0, "36920": 960940800.0, "36925": 979644096.0, "36930": 973440448.0, "36935": 978143424.0, "36940": 960224192.0, "36945": 955212480.0, "36950": 970652096.0, "36955": 974708288.0, "36960": 969183296.0, "36965": 946412224.0, "36970": 944538048.0, "36975": 972719168.0, "36980": 974652480.0, "36985": 953453696.0, "36990": 956334848.0, "36995": 943131456.0, "37000": 975101888.0, "37005": 974928768.0, "37010": 963105024.0, "37015": 966943104.0, "37020": 946490624.0, "37025": 958022848.0, "37030": 964719232.0, "37035": 956158784.0, "37040": 978787264.0, "37045": 943001664.0, "37050": 955355136.0, "37055": 980904512.0, "37060": 966795456.0, "37065": 967604480.0, "37070": 967593664.0, "37075": 952735360.0, "37080": 959770880.0, "37085": 973102400.0, "37090": 961044480.0, "37095": 975511296.0, "37100": 942504000.0, "37105": 955779264.0, "37110": 956811136.0, "37115": 971662464.0, "37120": 962834624.0, "37125": 951178176.0, "37130": 939444032.0, "37135": 981943040.0, "37140": 976160128.0, "37145": 971738368.0, "37150": 971003840.0, "37155": 938534336.0, "37160": 961324352.0, "37165": 964406144.0, "37170": 958558080.0, "37175": 986075520.0, "37180": 942205056.0, "37185": 957717696.0, "37190": 970152896.0, "37195": 964031488.0, "37200": 976603648.0, "37205": 944637696.0, "37210": 966869888.0, "37215": 975308352.0, "37220": 957226688.0, "37225": 975701824.0, "37230": 971006720.0, "37235": 953319296.0, "37240": 976911552.0, "37245": 985486144.0, "37250": 955882816.0, "37255": 954936640.0, "37260": 949713344.0, "37265": 990098240.0, "37270": 963007168.0, "37275": 973845632.0, "37280": 959063680.0, "37285": 946265024.0, "37290": 948444608.0, "37295": 980516800.0, "37300": 947238720.0, "37305": 959282944.0, "37310": 953540864.0, "37315": 942131328.0, "37320": 962264832.0, "37325": 978004864.0, "37330": 976520384.0, "37335": 967693504.0, "37340": 941894400.0, "37345": 950570432.0, "37350": 958302720.0, "37355": 971660736.0, "37360": 974702336.0, "37365": 961342400.0, "37370": 951541056.0, "37375": 967507520.0, "37380": 976783040.0, "37385": 965924288.0, "37390": 964861312.0, "37395": 959569472.0, "37400": 963306816.0, "37405": 983672640.0, "37410": 966623808.0, "37415": 964912576.0, "37420": 939648192.0, "37425": 964467520.0, "37430": 963140928.0, "37435": 955994048.0, "37440": 963755456.0, "37445": 947588928.0, "37450": 993312128.0, "37455": 965324288.0, "37460": 961366912.0, "37465": 962310784.0, "37470": 921234880.0, "37475": 951773312.0, "37480": 981713792.0, "37485": 979271104.0, "37490": 968921024.0, "37495": 950340864.0, "37500": 944319936.0, "37505": 959163200.0, "37510": 973449152.0, "37515": 983816768.0, "37520": 962529344.0, "37525": 951728064.0, "37530": 963630016.0, "37535": 980401472.0, "37540": 952745792.0, "37545": 994384064.0, "37550": 951996800.0, "37555": 966660544.0, "37560": 978344704.0, "37565": 972374784.0, "37570": 967970880.0, "37575": 973075968.0, "37580": 943695744.0, "37585": 960294336.0, "37590": 979313088.0, "37595": 977391680.0, "37600": 980949760.0, "37605": 957074176.0, "37610": 975733056.0, "37615": 974040320.0, "37620": 968688512.0, "37625": 982488000.0, "37630": 948691328.0, "37635": 977710912.0, "37640": 959770368.0, "37645": 977888192.0, "37650": 963713920.0, "37655": 925868288.0, "37660": 947696448.0, "37665": 967103424.0, "37670": 966322240.0, "37675": 975231488.0, "37680": 976968640.0, "37685": 949378240.0, "37690": 971134400.0, "37695": 968515072.0, "37700": 967546496.0, "37705": 978504000.0, "37710": 934237568.0, "37715": 977465344.0, "37720": 972792832.0, "37725": 990782144.0, "37730": 985100864.0, "37735": 934683328.0, "37740": 941853632.0, "37745": 967295616.0, "37750": 962152320.0, "37755": 970447104.0, "37760": 958411136.0, "37765": 932229696.0, "37770": 969874432.0, "37775": 963665024.0, "37780": 962350272.0, "37785": 964519040.0, "37790": 940257152.0, "37795": 980025792.0, "37800": 963223360.0, "37805": 973374016.0, "37810": 983149440.0, "37815": 960066048.0, "37820": 938243328.0, "37825": 955229952.0, "37830": 960209152.0, "37835": 974689216.0, "37840": 984925952.0, "37845": 930983872.0, "37850": 952604032.0, "37855": 979340224.0, "37860": 960611008.0, "37865": 964909568.0, "37870": 960207232.0, "37875": 986849472.0, "37880": 951821632.0, "37885": 977205248.0, "37890": 966195648.0, "37895": 934328384.0, "37900": 976764544.0, "37905": 969596096.0, "37910": 979949440.0, "37915": 963085248.0, "37920": 947330624.0, "37925": 945790272.0, "37930": 985910144.0, "37935": 965593088.0, "37940": 960548480.0, "37945": 972592512.0, "37950": 941614272.0, "37955": 966011712.0, "37960": 981400768.0, "37965": 969360448.0, "37970": 992397824.0, "37975": 945170112.0, "37980": 949376192.0, "37985": 976812352.0, "37990": 973725696.0, "37995": 970616192.0, "38000": 941005760.0, "38005": 945598272.0, "38010": 980775232.0, "38015": 969415360.0, "38020": 977280320.0, "38025": 964194432.0, "38030": 950314048.0, "38035": 979651136.0, "38040": 960582528.0, "38045": 968799296.0, "38050": 982792064.0, "38055": 951498112.0, "38060": 971121856.0, "38065": 985853632.0, "38070": 967316224.0, "38075": 969977984.0, "38080": 947524672.0, "38085": 973006080.0, "38090": 978115456.0, "38095": 965308992.0, "38100": 956022592.0, "38105": 963291072.0, "38110": 946859264.0, "38115": 959769920.0, "38120": 972537472.0, "38125": 971404544.0, "38130": 974423232.0, "38135": 950458368.0, "38140": 966564672.0, "38145": 972114816.0, "38150": 974388544.0, "38155": 966775744.0, "38160": 949193920.0, "38165": 965286016.0, "38170": 980339520.0, "38175": 968025024.0, "38180": 965458368.0, "38185": 950114944.0, "38190": 933664832.0, "38195": 983403712.0, "38200": 996543424.0, "38205": 975667584.0, "38210": 964248384.0, "38215": 946584064.0, "38220": 973331840.0, "38225": 964213184.0, "38230": 955814592.0, "38235": 969750016.0, "38240": 934704192.0, "38245": 974418304.0, "38250": 993017984.0, "38255": 967865280.0, "38260": 968345600.0, "38265": 953445568.0, "38270": 949415936.0, "38275": 967178624.0, "38280": 980253312.0, "38285": 967726272.0, "38290": 962478912.0, "38295": 958149568.0, "38300": 972256640.0, "38305": 981310528.0, "38310": 959938240.0, "38315": 961880832.0, "38320": 954317888.0, "38325": 982275328.0, "38330": 974743552.0, "38335": 968625344.0, "38340": 979828864.0, "38345": 941982592.0, "38350": 956551872.0, "38355": 974596160.0, "38360": 963123008.0, "38365": 979162944.0, "38370": 942197184.0, "38375": 937892672.0, "38380": 978258240.0, "38385": 966668864.0, "38390": 953273280.0, "38395": 959605824.0, "38400": 942091904.0, "38405": 969633088.0, "38410": 975622336.0, "38415": 976250048.0, "38420": 964445120.0, "38425": 945188224.0, "38430": 953597120.0, "38435": 960567488.0, "38440": 981729920.0, "38445": 968948928.0, "38450": 946352064.0, "38455": 941478976.0, "38460": 961748288.0, "38465": 962196992.0, "38470": 960672960.0, "38475": 994033728.0, "38480": 953011072.0, "38485": 971912512.0, "38490": 972390848.0, "38495": 952106624.0, "38500": 969091136.0, "38505": 945378752.0, "38510": 971435072.0, "38515": 967914176.0, "38520": 959365312.0, "38525": 967582656.0, "38530": 945213184.0, "38535": 954169920.0, "38540": 966385664.0, "38545": 959352640.0, "38550": 962011136.0, "38555": 971274048.0, "38560": 969536384.0, "38565": 975194560.0, "38570": 964506304.0, "38575": 960478400.0, "38580": 969898496.0, "38585": 947135104.0, "38590": 962681984.0, "38595": 974788608.0, "38600": 968444288.0, "38605": 983081664.0, "38610": 932180224.0, "38615": 947191616.0, "38620": 971533376.0, "38625": 969831488.0, "38630": 981794432.0, "38635": 966624192.0, "38640": 948811776.0, "38645": 966496128.0, "38650": 980346496.0, "38655": 965419840.0, "38660": 956054016.0, "38665": 936473792.0, "38670": 945489536.0, "38675": 954918208.0, "38680": 974696128.0, "38685": 988315776.0, "38690": 938759360.0, "38695": 961225536.0, "38700": 971545088.0, "38705": 982550848.0, "38710": 961675008.0, "38715": 993017856.0, "38720": 955054016.0, "38725": 967953984.0, "38730": 962189952.0, "38735": 957349824.0, "38740": 967664960.0, "38745": 938742592.0, "38750": 989258624.0, "38755": 967226304.0, "38760": 967373568.0, "38765": 976471552.0, "38770": 950718080.0, "38775": 962786048.0, "38780": 961876800.0, "38785": 978780544.0, "38790": 984560704.0, "38795": 942117504.0, "38800": 970837568.0, "38805": 980260928.0, "38810": 950125888.0, "38815": 973848896.0, "38820": 954505152.0, "38825": 950206784.0, "38830": 975923456.0, "38835": 967344832.0, "38840": 970064000.0, "38845": 963719872.0, "38850": 945178240.0, "38855": 963260352.0, "38860": 978947712.0, "38865": 966865152.0, "38870": 961221504.0, "38875": 956851712.0, "38880": 963976448.0, "38885": 965936960.0, "38890": 962745344.0, "38895": 977546944.0, "38900": 951077824.0, "38905": 947219072.0, "38910": 958765248.0, "38915": 978232000.0, "38920": 980603904.0, "38925": 975698240.0, "38930": 933888768.0, "38935": 961005248.0, "38940": 968632832.0, "38945": 965827456.0, "38950": 967621312.0, "38955": 958535616.0, "38960": 955876928.0, "38965": 954464064.0, "38970": 967506368.0, "38975": 958174720.0, "38980": 948119296.0, "38985": 948143808.0, "38990": 964539136.0, "38995": 962191936.0, "39000": 966715456.0, "39005": 961968640.0, "39010": 941392704.0, "39015": 984318912.0, "39020": 977164800.0, "39025": 956384768.0, "39030": 958998784.0, "39035": 945313472.0, "39040": 984910848.0, "39045": 968595136.0, "39050": 955152448.0, "39055": 973484608.0, "39060": 950428096.0, "39065": 949891776.0, "39070": 969089728.0, "39075": 979402752.0, "39080": 976042688.0, "39085": 963381696.0, "39090": 947122624.0, "39095": 943569984.0, "39100": 973497408.0, "39105": 980491776.0, "39110": 969627584.0, "39115": 950148864.0, "39120": 962145856.0, "39125": 975328704.0, "39130": 989465920.0, "39135": 965111680.0, "39140": 945036672.0, "39145": 950574592.0, "39150": 963180032.0, "39155": 958928704.0, "39160": 966516608.0, "39165": 939472768.0, "39170": 937740160.0, "39175": 984960320.0, "39180": 962221376.0, "39185": 964966144.0, "39190": 974517568.0, "39195": 940984384.0, "39200": 964151872.0, "39205": 953685120.0, "39210": 977433792.0, "39215": 969363776.0, "39220": 944889216.0, "39225": 962929408.0, "39230": 961933248.0, "39235": 968226624.0, "39240": 971773376.0, "39245": 952228800.0, "39250": 940750656.0, "39255": 975955648.0, "39260": 958443776.0, "39265": 972275904.0, "39270": 965235264.0, "39275": 954935488.0, "39280": 969137664.0, "39285": 975374592.0, "39290": 973404736.0, "39295": 961689984.0, "39300": 936798848.0, "39305": 959315904.0, "39310": 975194688.0, "39315": 971810048.0, "39320": 963850560.0, "39325": 975414400.0, "39330": 943132608.0, "39335": 976359296.0, "39340": 963363328.0, "39345": 968832192.0, "39350": 983688768.0, "39355": 957918080.0, "39360": 972968448.0, "39365": 972699328.0, "39370": 982531456.0, "39375": 963836928.0, "39380": 933211776.0, "39385": 959163712.0, "39390": 968068352.0, "39395": 965533440.0, "39400": 963660288.0, "39405": 957009728.0, "39410": 937297984.0, "39415": 954415680.0, "39420": 978714624.0, "39425": 961760832.0, "39430": 961081280.0, "39435": 952026560.0, "39440": 971704448.0, "39445": 966926336.0, "39450": 956123520.0, "39455": 964641792.0, "39460": 937537664.0, "39465": 995100992.0, "39470": 973794112.0, "39475": 960556736.0, "39480": 957020352.0, "39485": 964282944.0, "39490": 963800448.0, "39495": 968518016.0, "39500": 961681088.0, "39505": 973561344.0, "39510": 941536512.0, "39515": 934793728.0, "39520": 971115776.0, "39525": 966360704.0, "39530": 981483904.0, "39535": 955491008.0, "39540": 955127872.0, "39545": 951591104.0, "39550": 970630016.0, "39555": 973819072.0, "39560": 990418816.0, "39565": 953603072.0, "39570": 957662336.0, "39575": 971686464.0, "39580": 960128576.0, "39585": 971636864.0, "39590": 976982144.0, "39595": 948741248.0, "39600": 954415104.0, "39605": 957941504.0, "39610": 968289216.0, "39615": 979157568.0, "39620": 934251840.0, "39625": 978343104.0, "39630": 948289088.0, "39635": 961437952.0, "39640": 972449280.0, "39645": 951423104.0, "39650": 975582208.0, "39655": 969819840.0, "39660": 973069376.0, "39665": 959906432.0, "39670": 963942592.0, "39675": 959199168.0, "39680": 968285632.0, "39685": 971563200.0, "39690": 967583360.0, "39695": 958297792.0, "39700": 933114176.0, "39705": 974542592.0, "39710": 984713408.0, "39715": 990486656.0, "39720": 964862208.0, "39725": 943754560.0, "39730": 976482304.0, "39735": 970686144.0, "39740": 965354496.0, "39745": 973653312.0, "39750": 929903360.0, "39755": 961325440.0, "39760": 970422464.0, "39765": 962466048.0, "39770": 967330688.0, "39775": 965175040.0, "39780": 948130560.0, "39785": 970341440.0, "39790": 972816896.0, "39795": 973908928.0, "39800": 968444352.0, "39805": 951073408.0, "39810": 978404928.0, "39815": 971975104.0, "39820": 973589568.0, "39825": 975519488.0, "39830": 951527936.0, "39835": 950301568.0, "39840": 972914240.0, "39845": 972374912.0, "39850": 976589632.0, "39855": 956127936.0, "39860": 941639168.0, "39865": 963819392.0, "39870": 968285056.0, "39875": 975444736.0, "39880": 975273536.0, "39885": 936911744.0, "39890": 964017600.0, "39895": 974996928.0, "39900": 969661056.0, "39905": 967651200.0, "39910": 938726912.0, "39915": 982177280.0, "39920": 978235264.0, "39925": 960310336.0, "39930": 966023808.0, "39935": 954593728.0, "39940": 948787584.0, "39945": 977705728.0, "39950": 973941952.0, "39955": 966299904.0, "39960": 967246336.0, "39965": 940301696.0, "39970": 972890880.0, "39975": 977882752.0, "39980": 980832960.0, "39985": 970423616.0, "39990": 947603904.0, "39995": 962967808.0, "40000": 968948928.0, "40005": 970181312.0, "40010": 967437824.0, "40015": 959373312.0, "40020": 951940608.0, "40025": 967891264.0, "40030": 970022336.0, "40035": 988701248.0, "40040": 958267776.0, "40045": 959795456.0, "40050": 966719424.0, "40055": 965453888.0, "40060": 966185024.0, "40065": 966196736.0, "40070": 955661760.0, "40075": 981852160.0, "40080": 975475648.0, "40085": 954550720.0, "40090": 970433856.0, "40095": 943219968.0, "40100": 951322048.0, "40105": 963922112.0, "40110": 956677952.0, "40115": 978856000.0, "40120": 973339712.0, "40125": 946173504.0, "40130": 986427328.0, "40135": 975478080.0, "40140": 971400640.0, "40145": 978719744.0, "40150": 929625920.0, "40155": 989448704.0, "40160": 983972608.0, "40165": 971532672.0, "40170": 983171904.0, "40175": 930842688.0, "40180": 964892800.0, "40185": 981435776.0, "40190": 975194112.0, "40195": 963992960.0, "40200": 941346560.0, "40205": 960944640.0, "40210": 984598720.0, "40215": 962994752.0, "40220": 965918912.0, "40225": 961497088.0, "40230": 944752832.0, "40235": 972055168.0, "40240": 968267840.0, "40245": 975534592.0, "40250": 976865216.0, "40255": 957357120.0, "40260": 974435456.0, "40265": 963609664.0, "40270": 961761280.0, "40275": 978438208.0, "40280": 962943104.0, "40285": 973150336.0, "40290": 971490240.0, "40295": 968952256.0, "40300": 970952448.0, "40305": 960061120.0, "40310": 941051968.0, "40315": 973389056.0, "40320": 964599488.0, "40325": 961068352.0, "40330": 975575488.0, "40335": 944158336.0, "40340": 972786176.0, "40345": 963667392.0, "40350": 967140928.0, "40355": 971594048.0, "40360": 945771520.0, "40365": 964863872.0, "40370": 969022848.0, "40375": 979320640.0, "40380": 982485696.0, "40385": 970206336.0, "40390": 926233792.0, "40395": 962254528.0, "40400": 979125312.0, "40405": 973433152.0, "40410": 963905408.0, "40415": 943447104.0, "40420": 965502208.0, "40425": 968578368.0, "40430": 962296640.0, "40435": 971225856.0, "40440": 942272640.0, "40445": 962571456.0, "40450": 973673728.0, "40455": 985869312.0, "40460": 971962560.0, "40465": 940689216.0, "40470": 947175680.0, "40475": 981595200.0, "40480": 972580672.0, "40485": 987932288.0, "40490": 940456640.0, "40495": 949637568.0, "40500": 969045632.0, "40505": 966459456.0, "40510": 973989824.0, "40515": 984456896.0, "40520": 940897536.0, "40525": 961901760.0, "40530": 973487104.0, "40535": 959513536.0, "40540": 955761280.0, "40545": 959506944.0, "40550": 965190400.0, "40555": 975483008.0, "40560": 973002240.0, "40565": 968782528.0, "40570": 976093568.0, "40575": 943076864.0, "40580": 975740864.0, "40585": 977573440.0, "40590": 974302528.0, "40595": 975758912.0, "40600": 960120576.0, "40605": 972172928.0, "40610": 984779328.0, "40615": 967899584.0, "40620": 984069120.0, "40625": 946325760.0, "40630": 948432768.0, "40635": 974630720.0, "40640": 982964800.0, "40645": 977373568.0, "40650": 952535744.0, "40655": 951584768.0, "40660": 964488704.0, "40665": 977502720.0, "40670": 991685568.0, "40675": 971047680.0, "40680": 947310848.0, "40685": 959287104.0, "40690": 966245760.0, "40695": 972799040.0, "40700": 966741696.0, "40705": 968994944.0, "40710": 987028480.0, "40715": 967203712.0, "40720": 961136576.0, "40725": 986930240.0, "40730": 963818752.0, "40735": 955373888.0, "40740": 966436544.0, "40745": 959771264.0, "40750": 965201984.0, "40755": 982259136.0, "40760": 945338304.0, "40765": 972480704.0, "40770": 972643648.0, "40775": 974511808.0, "40780": 975293440.0, "40785": 949666752.0, "40790": 972724480.0, "40795": 977224064.0, "40800": 964566720.0, "40805": 960503616.0, "40810": 949009536.0, "40815": 962172736.0, "40820": 971025152.0, "40825": 968316416.0, "40830": 967351104.0, "40835": 971071872.0, "40840": 943651200.0, "40845": 961951552.0, "40850": 980168128.0, "40855": 968376128.0, "40860": 969107968.0, "40865": 944213120.0, "40870": 978761152.0, "40875": 968431680.0, "40880": 977570880.0, "40885": 965841472.0, "40890": 947151296.0, "40895": 952494400.0, "40900": 953347008.0, "40905": 982061952.0, "40910": 971215424.0, "40915": 959678720.0, "40920": 945110592.0, "40925": 966908672.0, "40930": 971496128.0, "40935": 981014720.0, "40940": 974697728.0, "40945": 946578560.0, "40950": 969721088.0, "40955": 968260480.0, "40960": 969617856.0, "40965": 974885760.0, "40970": 945926720.0, "40975": 951750720.0, "40980": 970974464.0, "40985": 971958912.0, "40990": 962860672.0, "40995": 942471104.0, "41000": 973135168.0, "41005": 963867200.0, "41010": 966404736.0, "41015": 954084288.0, "41020": 960884800.0, "41025": 935204288.0, "41030": 974715520.0, "41035": 973602240.0, "41040": 954779840.0, "41045": 973462656.0, "41050": 946860352.0, "41055": 963660800.0, "41060": 985940288.0, "41065": 974204416.0, "41070": 969616256.0, "41075": 943151296.0, "41080": 959456448.0, "41085": 968929024.0, "41090": 961059520.0, "41095": 975852352.0, "41100": 943746432.0, "41105": 947658688.0, "41110": 962517376.0, "41115": 967628224.0, "41120": 981946368.0, "41125": 956602944.0, "41130": 947300672.0, "41135": 960429888.0, "41140": 966870208.0, "41145": 964187776.0, "41150": 981475840.0, "41155": 952757824.0, "41160": 940435648.0, "41165": 972423872.0, "41170": 969516800.0, "41175": 957033984.0, "41180": 958665984.0, "41185": 958449152.0, "41190": 972866752.0, "41195": 966250752.0, "41200": 978370176.0, "41205": 965739264.0, "41210": 940032896.0, "41215": 983259520.0, "41220": 975452416.0, "41225": 962204032.0, "41230": 994214208.0, "41235": 944876416.0, "41240": 970655744.0, "41245": 955375296.0, "41250": 980622912.0, "41255": 961323776.0, "41260": 965693760.0, "41265": 951890048.0, "41270": 965282112.0, "41275": 975508544.0, "41280": 976578112.0, "41285": 958070208.0, "41290": 933606720.0, "41295": 968823616.0, "41300": 972585152.0, "41305": 963358784.0, "41310": 984474944.0, "41315": 936701696.0, "41320": 944687616.0, "41325": 971011584.0, "41330": 969769152.0, "41335": 974137920.0, "41340": 946991424.0, "41345": 936562496.0, "41350": 969593344.0, "41355": 974093632.0, "41360": 982735168.0, "41365": 944704000.0, "41370": 952008640.0, "41375": 964852480.0, "41380": 958819008.0, "41385": 965921024.0, "41390": 978306176.0, "41395": 939588864.0, "41400": 947677184.0, "41405": 964797056.0, "41410": 977651392.0, "41415": 963231168.0, "41420": 947117440.0, "41425": 951454208.0, "41430": 982514816.0, "41435": 975863232.0, "41440": 966534592.0, "41445": 958494912.0, "41450": 942604032.0, "41455": 988799232.0, "41460": 956425856.0, "41465": 955890176.0, "41470": 973450816.0, "41475": 955956288.0, "41480": 980371136.0, "41485": 967227008.0, "41490": 965580288.0, "41495": 964828672.0, "41500": 939845120.0, "41505": 967100096.0, "41510": 958164544.0, "41515": 983476608.0, "41520": 965828480.0, "41525": 954395840.0, "41530": 970821312.0, "41535": 960157312.0, "41540": 972547840.0, "41545": 988239168.0, "41550": 969244992.0, "41555": 949723456.0, "41560": 952138176.0, "41565": 977737600.0, "41570": 975490688.0, "41575": 958225536.0, "41580": 946154048.0, "41585": 967104704.0, "41590": 976195520.0, "41595": 960220096.0, "41600": 980973440.0, "41605": 946123456.0, "41610": 949434880.0, "41615": 965854720.0, "41620": 971563520.0, "41625": 978830720.0, "41630": 961014400.0, "41635": 950909376.0, "41640": 977953728.0, "41645": 958770688.0, "41650": 973567424.0, "41655": 968514432.0, "41660": 943047040.0, "41665": 976184320.0, "41670": 984904512.0, "41675": 958605504.0, "41680": 972738752.0, "41685": 940902784.0, "41690": 965496960.0, "41695": 961309632.0, "41700": 976822912.0, "41705": 955963200.0, "41710": 957173184.0, "41715": 952345216.0, "41720": 973628672.0, "41725": 972406016.0, "41730": 952806336.0, "41735": 960218688.0, "41740": 947023680.0, "41745": 964914624.0, "41750": 973482432.0, "41755": 975169856.0, "41760": 974293696.0, "41765": 949476480.0, "41770": 965329472.0, "41775": 964667072.0, "41780": 973836416.0, "41785": 977613248.0, "41790": 953822208.0, "41795": 955245440.0, "41800": 961044352.0, "41805": 958854912.0, "41810": 971757952.0, "41815": 961233024.0, "41820": 955203840.0, "41825": 962319680.0, "41830": 957823360.0, "41835": 970759744.0, "41840": 974693184.0, "41845": 934449216.0, "41850": 973497152.0, "41855": 970490560.0, "41860": 976521664.0, "41865": 980042112.0, "41870": 967392000.0, "41875": 963371776.0, "41880": 966399296.0, "41885": 961187264.0, "41890": 966290048.0, "41895": 952738432.0, "41900": 931181568.0, "41905": 983136448.0, "41910": 975461312.0, "41915": 967735744.0, "41920": 965206272.0, "41925": 936807232.0, "41930": 968493376.0, "41935": 978155200.0, "41940": 987701568.0, "41945": 957372992.0, "41950": 966094720.0, "41955": 980047232.0, "41960": 960225600.0, "41965": 965005056.0, "41970": 968483072.0, "41975": 955146368.0, "41980": 955632768.0, "41985": 969862976.0, "41990": 979014848.0, "41995": 970461184.0, "42000": 977828864.0, "42005": 937471296.0, "42010": 967112512.0, "42015": 976366464.0, "42020": 966915776.0, "42025": 963451200.0, "42030": 955202304.0, "42035": 948521280.0, "42040": 958734336.0, "42045": 969506944.0, "42050": 979513088.0, "42055": 927858240.0, "42060": 935676864.0, "42065": 977626688.0, "42070": 964669568.0, "42075": 979118912.0, "42080": 952204288.0, "42085": 947811648.0, "42090": 967108096.0, "42095": 967025408.0, "42100": 959684480.0, "42105": 968138112.0, "42110": 966169216.0, "42115": 977983680.0, "42120": 968044480.0, "42125": 971934208.0, "42130": 964215616.0, "42135": 947072384.0, "42140": 969635648.0, "42145": 974428992.0, "42150": 971357184.0, "42155": 963721664.0, "42160": 949094144.0, "42165": 942843328.0, "42170": 964226432.0, "42175": 988088640.0, "42180": 956935936.0, "42185": 952052288.0, "42190": 942471296.0, "42195": 971687744.0, "42200": 976745792.0, "42205": 959628864.0, "42210": 966896576.0, "42215": 945922432.0, "42220": 956349248.0, "42225": 985175360.0, "42230": 959464832.0, "42235": 975262592.0, "42240": 955048192.0, "42245": 944105408.0, "42250": 962708672.0, "42255": 962998016.0, "42260": 967678592.0, "42265": 965294528.0, "42270": 955671552.0, "42275": 975115008.0, "42280": 970441856.0, "42285": 963711616.0, "42290": 978297280.0, "42295": 951733888.0, "42300": 966621184.0, "42305": 979176704.0, "42310": 953229952.0, "42315": 971884160.0, "42320": 961140480.0, "42325": 957468864.0, "42330": 974662656.0, "42335": 972115968.0, "42340": 960805632.0, "42345": 969176192.0, "42350": 947598848.0, "42355": 985856896.0, "42360": 973266176.0, "42365": 969778496.0, "42370": 975520960.0, "42375": 946524160.0, "42380": 961241472.0, "42385": 976132160.0, "42390": 981166528.0, "42395": 948520384.0, "42400": 935898176.0, "42405": 932594880.0, "42410": 969776064.0, "42415": 976196288.0, "42420": 974767232.0, "42425": 953764992.0, "42430": 939159744.0, "42435": 975101504.0, "42440": 968136640.0, "42445": 984377792.0, "42450": 970476480.0, "42455": 950417088.0, "42460": 974063616.0, "42465": 967977856.0, "42470": 972818048.0, "42475": 971652032.0, "42480": 945696832.0, "42485": 941859712.0, "42490": 960485568.0, "42495": 972640128.0, "42500": 958169856.0, "42505": 962376960.0, "42510": 945334464.0, "42515": 966455552.0, "42520": 970228032.0, "42525": 976982784.0, "42530": 972181120.0, "42535": 959396352.0, "42540": 971129472.0, "42545": 971085568.0, "42550": 964016320.0, "42555": 961916224.0, "42560": 947807552.0, "42565": 951106048.0, "42570": 970485056.0, "42575": 972111424.0, "42580": 969739584.0, "42585": 953813632.0, "42590": 967799744.0, "42595": 957494464.0, "42600": 982271872.0, "42605": 971553920.0, "42610": 979101184.0, "42615": 953047168.0, "42620": 970142848.0, "42625": 980117312.0, "42630": 983649280.0, "42635": 967077056.0, "42640": 935639040.0, "42645": 962149696.0, "42650": 969350016.0, "42655": 985986752.0, "42660": 977503808.0, "42665": 948560896.0, "42670": 956381568.0, "42675": 982633920.0, "42680": 967092224.0, "42685": 971701568.0, "42690": 977762304.0, "42695": 944024960.0, "42700": 972580608.0, "42705": 972670784.0, "42710": 968992896.0, "42715": 974599552.0, "42720": 959453056.0, "42725": 973321792.0, "42730": 975170944.0, "42735": 960209728.0, "42740": 983690432.0, "42745": 970434752.0, "42750": 956456448.0, "42755": 968021312.0, "42760": 968535168.0, "42765": 973150144.0, "42770": 963694464.0, "42775": 944781440.0, "42780": 985470208.0, "42785": 968480640.0, "42790": 975853184.0, "42795": 983914176.0, "42800": 941617792.0, "42805": 982393600.0, "42810": 970771840.0, "42815": 962769152.0, "42820": 969110208.0, "42825": 937321728.0, "42830": 962838656.0, "42835": 962279616.0, "42840": 988629760.0, "42845": 973003776.0, "42850": 950453056.0, "42855": 948921920.0, "42860": 978591872.0, "42865": 978154944.0, "42870": 977320256.0, "42875": 972425408.0, "42880": 949318528.0, "42885": 981526208.0, "42890": 964422528.0, "42895": 972681856.0, "42900": 971505792.0, "42905": 946512320.0, "42910": 947274304.0, "42915": 969317248.0, "42920": 966597248.0, "42925": 981796928.0, "42930": 958445120.0, "42935": 953170496.0, "42940": 978023872.0, "42945": 957157504.0, "42950": 969317888.0, "42955": 967198208.0, "42960": 949749184.0, "42965": 980675648.0, "42970": 976528960.0, "42975": 985384960.0, "42980": 958040512.0, "42985": 938719872.0, "42990": 977371520.0, "42995": 969206400.0, "43000": 970828864.0, "43005": 976707264.0, "43010": 955153152.0, "43015": 974875776.0, "43020": 982102016.0, "43025": 973807040.0, "43030": 967172928.0, "43035": 979458688.0, "43040": 953688896.0, "43045": 961184960.0, "43050": 973840128.0, "43055": 975971584.0, "43060": 970621568.0, "43065": 948047552.0, "43070": 961550272.0, "43075": 969619520.0, "43080": 986434752.0, "43085": 981406080.0, "43090": 945947264.0, "43095": 962568448.0, "43100": 973198848.0, "43105": 981951936.0, "43110": 969097664.0, "43115": 961355840.0, "43120": 950644672.0, "43125": 971435136.0, "43130": 956419456.0, "43135": 970558720.0, "43140": 972356096.0, "43145": 962024192.0, "43150": 967229312.0, "43155": 970896256.0, "43160": 968675392.0, "43165": 973954112.0, "43170": 955554496.0, "43175": 967353408.0, "43180": 968574912.0, "43185": 971233792.0, "43190": 962304128.0, "43195": 948424512.0, "43200": 944792704.0, "43205": 972468096.0, "43210": 953299520.0, "43215": 972161920.0, "43220": 965375168.0, "43225": 950596096.0, "43230": 979550592.0, "43235": 966272256.0, "43240": 962464320.0, "43245": 978106944.0, "43250": 952615040.0, "43255": 973844416.0, "43260": 965989632.0, "43265": 970890304.0, "43270": 957070336.0, "43275": 945222272.0, "43280": 957119104.0, "43285": 957782592.0, "43290": 965890432.0, "43295": 985587712.0, "43300": 961549056.0, "43305": 932249984.0, "43310": 969526912.0, "43315": 971927296.0, "43320": 973753152.0, "43325": 974679616.0, "43330": 951751168.0, "43335": 975919040.0, "43340": 973698944.0, "43345": 976767488.0, "43350": 969145856.0, "43355": 946703296.0, "43360": 962762816.0, "43365": 971065856.0, "43370": 961609216.0, "43375": 992122112.0, "43380": 973857344.0, "43385": 935011776.0, "43390": 974427712.0, "43395": 967000320.0, "43400": 954472128.0, "43405": 967137728.0, "43410": 943287936.0, "43415": 984854336.0, "43420": 971064064.0, "43425": 960050176.0, "43430": 966860608.0, "43435": 948398144.0, "43440": 946605760.0, "43445": 969232128.0, "43450": 968682880.0, "43455": 959600896.0, "43460": 963457408.0, "43465": 947600640.0, "43470": 979254016.0, "43475": 960295552.0, "43480": 963405888.0, "43485": 960307904.0, "43490": 939681536.0, "43495": 967407168.0, "43500": 990272640.0, "43505": 973871040.0, "43510": 969838848.0, "43515": 948892672.0, "43520": 951628352.0, "43525": 968298752.0, "43530": 976792064.0, "43535": 985706496.0, "43540": 939822144.0, "43545": 942881088.0, "43550": 951923392.0, "43555": 968001792.0, "43560": 986513024.0, "43565": 954970688.0, "43570": 939022976.0, "43575": 970231680.0, "43580": 976582208.0, "43585": 975000832.0, "43590": 969739520.0, "43595": 942844160.0, "43600": 957475200.0, "43605": 977386240.0, "43610": 966976832.0, "43615": 985966144.0, "43620": 943374592.0, "43625": 932263936.0, "43630": 975421760.0, "43635": 944274816.0, "43640": 975647296.0, "43645": 950633280.0, "43650": 945139840.0, "43655": 968132608.0, "43660": 971521664.0, "43665": 964982144.0, "43670": 965092928.0, "43675": 950578304.0, "43680": 964274240.0, "43685": 970785664.0, "43690": 969655808.0, "43695": 963720320.0, "43700": 930811520.0, "43705": 965048512.0, "43710": 967902976.0, "43715": 968828352.0, "43720": 960545408.0, "43725": 953470464.0, "43730": 952657280.0, "43735": 972707520.0, "43740": 994069440.0, "43745": 963057984.0, "43750": 965363712.0, "43755": 940547968.0, "43760": 968310272.0, "43765": 965692992.0, "43770": 953086144.0, "43775": 969626176.0, "43780": 937024064.0, "43785": 965134144.0, "43790": 956956160.0, "43795": 949172736.0, "43800": 970879296.0, "43805": 932489408.0, "43810": 957007296.0, "43815": 961283840.0, "43820": 970090752.0, "43825": 961407424.0, "43830": 974689600.0, "43835": 934897088.0, "43840": 962849984.0, "43845": 972793152.0, "43850": 961633024.0, "43855": 962603840.0, "43860": 953294080.0, "43865": 959883136.0, "43870": 975194752.0, "43875": 972915264.0, "43880": 961165248.0, "43885": 941303296.0, "43890": 970790528.0, "43895": 982339456.0, "43900": 974076288.0, "43905": 969974976.0, "43910": 951228032.0, "43915": 939044416.0, "43920": 968087936.0, "43925": 969999488.0, "43930": 958794112.0, "43935": 978251648.0, "43940": 957338112.0, "43945": 996316992.0, "43950": 968584960.0, "43955": 983234688.0, "43960": 970098432.0, "43965": 947924736.0, "43970": 964513216.0, "43975": 968685504.0, "43980": 973322624.0, "43985": 960334528.0, "43990": 955779904.0, "43995": 944006208.0, "44000": 961339520.0, "44005": 951284416.0, "44010": 970477376.0, "44015": 984716480.0, "44020": 940270848.0, "44025": 955824704.0, "44030": 976283328.0, "44035": 977613056.0, "44040": 978851392.0, "44045": 933277376.0, "44050": 955301120.0, "44055": 967358656.0, "44060": 981546624.0, "44065": 973535872.0, "44070": 939892032.0, "44075": 932986496.0, "44080": 975616256.0, "44085": 963576640.0, "44090": 969212928.0, "44095": 962290240.0, "44100": 951694336.0, "44105": 964145088.0, "44110": 966247296.0, "44115": 964813568.0, "44120": 961664704.0, "44125": 951726720.0, "44130": 965545984.0, "44135": 970796864.0, "44140": 964820672.0, "44145": 973911616.0, "44150": 945709440.0, "44155": 950750336.0, "44160": 965871744.0, "44165": 987711936.0, "44170": 982241216.0, "44175": 958219008.0, "44180": 938932992.0, "44185": 975263360.0, "44190": 966425856.0, "44195": 966511872.0, "44200": 963623936.0, "44205": 937402112.0, "44210": 970994112.0, "44215": 968788736.0, "44220": 974359808.0, "44225": 973353472.0, "44230": 956119040.0, "44235": 962585344.0, "44240": 956998848.0, "44245": 966539968.0, "44250": 972669888.0, "44255": 961438720.0, "44260": 962664512.0, "44265": 962800128.0, "44270": 972159424.0, "44275": 960564288.0, "44280": 975369152.0, "44285": 938447232.0, "44290": 956344000.0, "44295": 971957056.0, "44300": 979131392.0, "44305": 968580416.0, "44310": 954028928.0, "44315": 955590720.0, "44320": 986658816.0, "44325": 972229824.0, "44330": 950136832.0, "44335": 950271104.0, "44340": 943890240.0, "44345": 983792640.0, "44350": 965195456.0, "44355": 959165248.0, "44360": 950362944.0, "44365": 934645120.0, "44370": 965915328.0, "44375": 974040704.0, "44380": 973133632.0, "44385": 960060352.0, "44390": 948138880.0, "44395": 964085184.0, "44400": 978573056.0, "44405": 981219264.0, "44410": 965479680.0, "44415": 961712384.0, "44420": 956100736.0, "44425": 967719872.0, "44430": 969808448.0, "44435": 971621248.0, "44440": 953748096.0, "44445": 936864448.0, "44450": 966886528.0, "44455": 956862464.0, "44460": 971782784.0, "44465": 989200640.0, "44470": 958149504.0, "44475": 944299904.0, "44480": 956277504.0, "44485": 963300224.0, "44490": 969802496.0, "44495": 954302592.0, "44500": 966172864.0, "44505": 976484160.0, "44510": 969385472.0, "44515": 968203520.0, "44520": 965838976.0, "44525": 957353472.0, "44530": 967075968.0, "44535": 986183040.0, "44540": 970982592.0, "44545": 971055360.0, "44550": 960515136.0, "44555": 951303680.0, "44560": 976025536.0, "44565": 971431104.0, "44570": 959060224.0, "44575": 945798912.0, "44580": 968039936.0, "44585": 960589504.0, "44590": 973274624.0, "44595": 976006016.0, "44600": 942810176.0, "44605": 956819328.0, "44610": 962892032.0, "44615": 982252544.0, "44620": 986537984.0, "44625": 970465792.0, "44630": 951113152.0, "44635": 968614912.0, "44640": 982430656.0, "44645": 968291072.0, "44650": 966204224.0, "44655": 953217920.0, "44660": 960210176.0, "44665": 960933248.0, "44670": 960748928.0, "44675": 963800256.0, "44680": 945787328.0, "44685": 943865792.0, "44690": 960860992.0, "44695": 964490368.0, "44700": 980281536.0, "44705": 976322368.0, "44710": 962477696.0, "44715": 964387136.0, "44720": 971031168.0, "44725": 952052480.0, "44730": 959171072.0, "44735": 965893696.0, "44740": 971018944.0, "44745": 968992640.0, "44750": 964761024.0, "44755": 962285824.0, "44760": 942054656.0, "44765": 955957568.0, "44770": 976291008.0, "44775": 970172416.0, "44780": 976522688.0, "44785": 953827328.0, "44790": 953107968.0, "44795": 954572928.0, "44800": 982041920.0, "44805": 988917696.0, "44810": 966057216.0, "44815": 948259456.0, "44820": 974907584.0, "44825": 954859776.0, "44830": 978956096.0, "44835": 968993088.0, "44840": 926662464.0, "44845": 971683456.0, "44850": 974581504.0, "44855": 980656896.0, "44860": 962420928.0, "44865": 947256128.0, "44870": 959950464.0, "44875": 964387584.0, "44880": 976203520.0, "44885": 959135232.0, "44890": 979285376.0, "44895": 940181120.0, "44900": 973764736.0, "44905": 959054208.0, "44910": 971409024.0, "44915": 964360960.0, "44920": 947797184.0, "44925": 959914496.0, "44930": 964669184.0, "44935": 977331968.0, "44940": 966809152.0, "44945": 946800256.0, "44950": 961811968.0, "44955": 979360256.0, "44960": 961863808.0, "44965": 961155776.0, "44970": 971773056.0, "44975": 955275648.0, "44980": 980470976.0, "44985": 961726720.0, "44990": 967971200.0, "44995": 973718144.0, "45000": 940709888.0, "45005": 957532608.0, "45010": 970957312.0, "45015": 978633088.0, "45020": 968397440.0, "45025": 948523520.0, "45030": 971533568.0, "45035": 964356352.0, "45040": 973028608.0, "45045": 973723648.0, "45050": 957770496.0, "45055": 942005632.0, "45060": 962916736.0, "45065": 958492928.0, "45070": 994507264.0, "45075": 982423552.0, "45080": 948224384.0, "45085": 970266496.0, "45090": 980607552.0, "45095": 994064256.0, "45100": 970332480.0, "45105": 938393280.0, "45110": 951676544.0, "45115": 985705344.0, "45120": 958988544.0, "45125": 977542656.0, "45130": 959660288.0, "45135": 965498880.0, "45140": 986178560.0, "45145": 970674688.0, "45150": 969059840.0, "45155": 968231680.0, "45160": 958115072.0, "45165": 968287424.0, "45170": 978875072.0, "45175": 968160832.0, "45180": 956450944.0, "45185": 958237632.0, "45190": 974317952.0, "45195": 962545280.0, "45200": 976020480.0, "45205": 983037888.0, "45210": 938860544.0, "45215": 955821312.0, "45220": 991993024.0, "45225": 972504448.0, "45230": 967396352.0, "45235": 970096064.0, "45240": 946111296.0, "45245": 975264064.0, "45250": 978316672.0, "45255": 979624704.0, "45260": 967948608.0, "45265": 939092352.0, "45270": 983704128.0, "45275": 966993408.0, "45280": 979110016.0, "45285": 979337344.0, "45290": 945743168.0, "45295": 955007232.0, "45300": 975410368.0, "45305": 980214464.0, "45310": 969116288.0, "45315": 966124096.0, "45320": 944341312.0, "45325": 979458560.0, "45330": 983557824.0, "45335": 977521536.0, "45340": 960594816.0, "45345": 947208704.0, "45350": 967891456.0, "45355": 967523904.0, "45360": 969764096.0, "45365": 977813632.0, "45370": 946977600.0, "45375": 948680448.0, "45380": 966850112.0, "45385": 958530560.0, "45390": 963649536.0, "45395": 961191744.0, "45400": 945617728.0, "45405": 976846272.0, "45410": 963070912.0, "45415": 965548416.0, "45420": 963243264.0, "45425": 953146624.0, "45430": 972809344.0, "45435": 977118720.0, "45440": 973306112.0, "45445": 959465088.0, "45450": 947649408.0, "45455": 971001216.0, "45460": 976756608.0, "45465": 958715904.0, "45470": 977429120.0, "45475": 954385280.0, "45480": 939575168.0, "45485": 977266240.0, "45490": 967493952.0, "45495": 982989888.0, "45500": 981472256.0, "45505": 927955776.0, "45510": 973407296.0, "45515": 962088960.0, "45520": 978719616.0, "45525": 989636800.0, "45530": 942145728.0, "45535": 971724096.0, "45540": 961247232.0, "45545": 965903040.0, "45550": 976503808.0, "45555": 961867520.0, "45560": 964538880.0, "45565": 953196544.0, "45570": 981451200.0, "45575": 971958976.0, "45580": 952346432.0, "45585": 944713280.0, "45590": 989684288.0, "45595": 975669248.0, "45600": 969548928.0, "45605": 975370432.0, "45610": 960208448.0, "45615": 968624768.0, "45620": 966922176.0, "45625": 978501696.0, "45630": 956597120.0, "45635": 948832320.0, "45640": 950480448.0, "45645": 977422208.0, "45650": 963951296.0, "45655": 971491008.0, "45660": 963305344.0, "45665": 957448128.0, "45670": 974427584.0, "45675": 962780160.0, "45680": 972731968.0, "45685": 969588544.0, "45690": 952064704.0, "45695": 973299968.0, "45700": 969111936.0, "45705": 972340160.0, "45710": 961905664.0, "45715": 936221760.0, "45720": 950208704.0, "45725": 967218176.0, "45730": 966587456.0, "45735": 988867136.0, "45740": 946384640.0, "45745": 945608256.0, "45750": 970360704.0, "45755": 959050112.0, "45760": 965685504.0, "45765": 979839680.0, "45770": 941360064.0, "45775": 966659968.0, "45780": 969308352.0, "45785": 958635776.0, "45790": 969325440.0, "45795": 953925312.0, "45800": 941566528.0, "45805": 971976768.0, "45810": 977641024.0, "45815": 964613824.0, "45820": 942287552.0, "45825": 964126976.0, "45830": 960248512.0, "45835": 965591744.0, "45840": 972869248.0, "45845": 975448448.0, "45850": 941631488.0, "45855": 963807744.0, "45860": 973481856.0, "45865": 953287296.0, "45870": 969008960.0, "45875": 949198080.0, "45880": 974662848.0, "45885": 986318848.0, "45890": 972854272.0, "45895": 964348864.0, "45900": 944448896.0, "45905": 965608384.0, "45910": 976428864.0, "45915": 953204032.0, "45920": 957556672.0, "45925": 950005504.0, "45930": 938197376.0, "45935": 986628096.0, "45940": 959404544.0, "45945": 973139136.0, "45950": 977001344.0, "45955": 940616320.0, "45960": 976370176.0, "45965": 962241792.0, "45970": 979174912.0, "45975": 975111104.0, "45980": 918157504.0, "45985": 963212928.0, "45990": 959238528.0, "45995": 968887680.0, "46000": 972027136.0, "46005": 966689920.0, "46010": 951119616.0, "46015": 965136512.0, "46020": 978936704.0, "46025": 968913408.0, "46030": 969816640.0, "46035": 951431552.0, "46040": 953275840.0, "46045": 973622656.0, "46050": 962425792.0, "46055": 972542400.0, "46060": 954091328.0, "46065": 968330560.0, "46070": 952433792.0, "46075": 966848256.0, "46080": 961076544.0, "46085": 939837184.0, "46090": 970455744.0, "46095": 985182400.0, "46100": 970818176.0, "46105": 964047104.0, "46110": 942456704.0, "46115": 953283008.0, "46120": 978321728.0, "46125": 969169408.0, "46130": 980073728.0, "46135": 967284928.0, "46140": 952709568.0, "46145": 956918656.0, "46150": 963957760.0, "46155": 968721024.0, "46160": 963790976.0, "46165": 940458304.0, "46170": 971813376.0, "46175": 977996928.0, "46180": 967341888.0, "46185": 968009344.0, "46190": 950691328.0, "46195": 951764224.0, "46200": 954424832.0, "46205": 975251456.0, "46210": 965314688.0, "46215": 976561728.0, "46220": 953248960.0, "46225": 970972416.0, "46230": 962267776.0, "46235": 969614272.0, "46240": 976117184.0, "46245": 962740352.0, "46250": 984859840.0, "46255": 972497728.0, "46260": 974403456.0, "46265": 957291392.0, "46270": 946010240.0, "46275": 960371200.0, "46280": 960503808.0, "46285": 980225024.0, "46290": 974458880.0, "46295": 972476352.0, "46300": 933067712.0, "46305": 961168448.0, "46310": 973045888.0, "46315": 963349312.0, "46320": 947193984.0, "46325": 951591232.0, "46330": 975572736.0, "46335": 979085696.0, "46340": 972139264.0, "46345": 978393024.0, "46350": 939675840.0, "46355": 955141696.0, "46360": 976635392.0, "46365": 967847104.0, "46370": 969343040.0, "46375": 952500352.0, "46380": 939651008.0, "46385": 987122880.0, "46390": 971159936.0, "46395": 966100352.0, "46400": 957062976.0, "46405": 937421760.0, "46410": 979327744.0, "46415": 975972864.0, "46420": 972512000.0, "46425": 958496576.0, "46430": 944623168.0, "46435": 953325888.0, "46440": 950597952.0, "46445": 981405632.0, "46450": 970214912.0, "46455": 973261632.0, "46460": 947066624.0, "46465": 968673920.0, "46470": 988813120.0, "46475": 961266688.0, "46480": 975262464.0, "46485": 947727360.0, "46490": 964350208.0, "46495": 959700224.0, "46500": 967507072.0, "46505": 950364608.0, "46510": 955142080.0, "46515": 978580480.0, "46520": 964067648.0, "46525": 959893760.0, "46530": 974185984.0, "46535": 949754816.0, "46540": 952361792.0, "46545": 975624960.0, "46550": 965775360.0, "46555": 950664640.0, "46560": 961396288.0, "46565": 943879232.0, "46570": 979931264.0, "46575": 968089024.0, "46580": 976430784.0, "46585": 956496960.0, "46590": 945830656.0, "46595": 957802048.0, "46600": 973749056.0, "46605": 967004224.0, "46610": 971902720.0, "46615": 958587776.0, "46620": 948901312.0, "46625": 969301440.0, "46630": 965118208.0, "46635": 967309760.0, "46640": 956707328.0, "46645": 941623680.0, "46650": 963232576.0, "46655": 963244800.0, "46660": 957924352.0, "46665": 959793728.0, "46670": 943215808.0, "46675": 971661952.0, "46680": 965141952.0, "46685": 971193536.0, "46690": 974638080.0, "46695": 962875072.0, "46700": 971706816.0, "46705": 976828736.0, "46710": 965860608.0, "46715": 962697344.0, "46720": 967408512.0, "46725": 961555072.0, "46730": 955769216.0, "46735": 971142272.0, "46740": 967698176.0, "46745": 966407424.0, "46750": 949051136.0, "46755": 972758208.0, "46760": 968212544.0, "46765": 969267008.0, "46770": 984900544.0, "46775": 941407424.0, "46780": 938859776.0, "46785": 962045184.0, "46790": 955854848.0, "46795": 971995520.0, "46800": 942857856.0, "46805": 949233728.0, "46810": 973141504.0, "46815": 980436224.0, "46820": 966770112.0, "46825": 963803136.0, "46830": 945155456.0, "46835": 973964480.0, "46840": 979261824.0, "46845": 970858880.0, "46850": 956005056.0, "46855": 940619008.0, "46860": 963883712.0, "46865": 966501184.0, "46870": 977242048.0, "46875": 961922176.0, "46880": 942719488.0, "46885": 952065280.0, "46890": 982621056.0, "46895": 958434112.0, "46900": 968606592.0, "46905": 953208064.0, "46910": 949884224.0, "46915": 975233152.0, "46920": 960068288.0, "46925": 979229120.0, "46930": 983089152.0, "46935": 951585728.0, "46940": 958398464.0, "46945": 949548864.0, "46950": 964496192.0, "46955": 973655232.0, "46960": 948976640.0, "46965": 965493248.0, "46970": 981273600.0, "46975": 969951040.0, "46980": 957943424.0, "46985": 929991296.0, "46990": 932412416.0, "46995": 977973184.0, "47000": 973539712.0, "47005": 969273024.0, "47010": 972083264.0, "47015": 942133312.0, "47020": 979483904.0, "47025": 975136832.0, "47030": 972996928.0, "47035": 960556992.0, "47040": 946027968.0, "47045": 962233088.0, "47050": 972507072.0, "47055": 976554944.0, "47060": 976953984.0, "47065": 970779584.0, "47070": 948291200.0, "47075": 974767552.0, "47080": 982836736.0, "47085": 962335488.0, "47090": 987365184.0, "47095": 935047552.0, "47100": 956919360.0, "47105": 969019328.0, "47110": 983131520.0, "47115": 977621760.0, "47120": 939266816.0, "47125": 982905536.0, "47130": 983996864.0, "47135": 969623360.0, "47140": 973619072.0, "47145": 964405568.0, "47150": 956953600.0, "47155": 983234752.0, "47160": 968491520.0, "47165": 975816832.0, "47170": 970401152.0, "47175": 964594944.0, "47180": 979411584.0, "47185": 962180672.0, "47190": 973944512.0, "47195": 977922816.0, "47200": 963325248.0, "47205": 971082368.0, "47210": 971223680.0, "47215": 976302592.0, "47220": 976650944.0, "47225": 948351040.0, "47230": 949942336.0, "47235": 975590336.0, "47240": 977672768.0, "47245": 978371776.0, "47250": 953692928.0, "47255": 935033664.0, "47260": 985841216.0, "47265": 982823168.0, "47270": 965897920.0, "47275": 963637120.0, "47280": 935062656.0, "47285": 964401024.0, "47290": 981855104.0, "47295": 971995520.0, "47300": 987082496.0, "47305": 949739904.0, "47310": 965566528.0, "47315": 987744768.0, "47320": 974559808.0, "47325": 972480704.0, "47330": 963257536.0, "47335": 940701696.0, "47340": 967242432.0, "47345": 982685248.0, "47350": 976924096.0, "47355": 973693888.0, "47360": 951636224.0, "47365": 974988736.0, "47370": 957423744.0, "47375": 953572928.0, "47380": 983517440.0, "47385": 960341056.0, "47390": 957146176.0, "47395": 966354560.0, "47400": 968040704.0, "47405": 975731072.0, "47410": 932504832.0, "47415": 950064512.0, "47420": 979871488.0, "47425": 969704832.0, "47430": 965235264.0, "47435": 970424512.0, "47440": 948910784.0, "47445": 972317760.0, "47450": 967299392.0, "47455": 968281088.0, "47460": 975964544.0, "47465": 950461120.0, "47470": 981283264.0, "47475": 971667712.0, "47480": 972439552.0, "47485": 976469632.0, "47490": 958373824.0, "47495": 951083136.0, "47500": 974866048.0, "47505": 980006144.0, "47510": 985915520.0, "47515": 961382528.0, "47520": 942418816.0, "47525": 975547264.0, "47530": 976654976.0, "47535": 974510848.0, "47540": 968787136.0, "47545": 943159744.0, "47550": 966814464.0, "47555": 970826304.0, "47560": 980099072.0, "47565": 975763520.0, "47570": 946119616.0, "47575": 961273600.0, "47580": 970101888.0, "47585": 974804224.0, "47590": 961407104.0, "47595": 966776000.0, "47600": 959536768.0, "47605": 968704832.0, "47610": 979453248.0, "47615": 963931840.0, "47620": 973658240.0, "47625": 937055616.0, "47630": 964473728.0, "47635": 968569216.0, "47640": 964696000.0, "47645": 958755840.0, "47650": 951838080.0, "47655": 976264960.0, "47660": 981216576.0, "47665": 970652736.0, "47670": 974299712.0, "47675": 955936576.0, "47680": 968631424.0, "47685": 985318208.0, "47690": 962527104.0, "47695": 971574464.0, "47700": 979408256.0, "47705": 958386816.0, "47710": 973353408.0, "47715": 984049344.0, "47720": 983392000.0, "47725": 973472192.0, "47730": 939917824.0, "47735": 967241152.0, "47740": 969729792.0, "47745": 983211712.0, "47750": 984048000.0, "47755": 937872128.0, "47760": 954278208.0, "47765": 963516736.0, "47770": 965407808.0, "47775": 957145536.0, "47780": 975405952.0, "47785": 959925632.0, "47790": 967371520.0, "47795": 974319936.0, "47800": 954135744.0, "47805": 981911680.0, "47810": 943100992.0, "47815": 969706752.0, "47820": 963144512.0, "47825": 974282368.0, "47830": 976520128.0, "47835": 945462976.0, "47840": 962103040.0, "47845": 975559424.0, "47850": 969646784.0, "47855": 979557632.0, "47860": 962702464.0, "47865": 956119872.0, "47870": 961931776.0, "47875": 961583296.0, "47880": 974827968.0, "47885": 964535296.0, "47890": 940200704.0, "47895": 970505728.0, "47900": 987559744.0, "47905": 963556160.0, "47910": 962126080.0, "47915": 943006592.0, "47920": 963938432.0, "47925": 980136320.0, "47930": 971376576.0, "47935": 955335552.0, "47940": 963852544.0, "47945": 936114432.0, "47950": 972100096.0, "47955": 976014272.0, "47960": 981100992.0, "47965": 958888704.0, "47970": 945659840.0, "47975": 967053952.0, "47980": 957029760.0, "47985": 992466688.0, "47990": 978109184.0, "47995": 952373440.0, "48000": 961524800.0, "48005": 966948480.0, "48010": 974618752.0, "48015": 963153920.0, "48020": 939898688.0, "48025": 961911168.0, "48030": 959888064.0, "48035": 978914048.0, "48040": 978943296.0, "48045": 961984576.0, "48050": 954712256.0, "48055": 981855232.0, "48060": 970486784.0, "48065": 980113792.0, "48070": 958054080.0, "48075": 944645632.0, "48080": 982813696.0, "48085": 974220224.0, "48090": 966282880.0, "48095": 979735936.0, "48100": 944448256.0, "48105": 975811200.0, "48110": 966474048.0, "48115": 962409216.0, "48120": 969604224.0, "48125": 940704320.0, "48130": 949285376.0, "48135": 961050240.0, "48140": 972626112.0, "48145": 970582592.0, "48150": 956494336.0, "48155": 938841536.0, "48160": 961683968.0, "48165": 966587520.0, "48170": 981129664.0, "48175": 975657344.0, "48180": 936525824.0, "48185": 949769024.0, "48190": 982314560.0, "48195": 968988160.0, "48200": 970306624.0, "48205": 966049280.0, "48210": 957219264.0, "48215": 952968384.0, "48220": 971763136.0, "48225": 980860608.0, "48230": 980282304.0, "48235": 935771840.0, "48240": 963967232.0, "48245": 981068224.0, "48250": 957897536.0, "48255": 976811136.0, "48260": 935815680.0, "48265": 976082560.0, "48270": 962541312.0, "48275": 972030080.0, "48280": 959528000.0, "48285": 950158848.0, "48290": 957231936.0, "48295": 974086656.0, "48300": 975687040.0, "48305": 968979328.0, "48310": 948915072.0, "48315": 952384384.0, "48320": 972754112.0, "48325": 967886080.0, "48330": 973010688.0, "48335": 958913664.0, "48340": 934359808.0, "48345": 953258048.0, "48350": 970240128.0, "48355": 972912640.0, "48360": 959099008.0, "48365": 930200448.0, "48370": 958033856.0, "48375": 973048448.0, "48380": 974956992.0, "48385": 960125568.0, "48390": 936502976.0, "48395": 976827712.0, "48400": 972201024.0, "48405": 973161920.0, "48410": 966394688.0, "48415": 968391104.0, "48420": 939973120.0, "48425": 978838784.0, "48430": 965013184.0, "48435": 974533696.0, "48440": 970143360.0, "48445": 958049856.0, "48450": 961390272.0, "48455": 960044416.0, "48460": 967949760.0, "48465": 971002944.0, "48470": 951783808.0, "48475": 936392128.0, "48480": 958913088.0, "48485": 974183616.0, "48490": 958954816.0, "48495": 952681664.0, "48500": 935954496.0, "48505": 969730560.0, "48510": 957307200.0, "48515": 974737344.0, "48520": 960521920.0, "48525": 933811392.0, "48530": 961247552.0, "48535": 975971776.0, "48540": 976001280.0, "48545": 969673856.0, "48550": 949604416.0, "48555": 951630336.0, "48560": 967766976.0, "48565": 972710336.0, "48570": 975117760.0, "48575": 961605632.0, "48580": 932609664.0, "48585": 978937088.0, "48590": 983038336.0, "48595": 966683392.0, "48600": 957026240.0, "48605": 938468288.0, "48610": 957875712.0, "48615": 971057664.0, "48620": 975032384.0, "48625": 980970112.0, "48630": 940132800.0, "48635": 957016256.0, "48640": 978345856.0, "48645": 967471360.0, "48650": 970627008.0, "48655": 963445760.0, "48660": 945451584.0, "48665": 968681152.0, "48670": 972583744.0, "48675": 984477248.0, "48680": 960281472.0, "48685": 950089728.0, "48690": 965185792.0, "48695": 971867968.0, "48700": 966923136.0, "48705": 967164544.0, "48710": 947290560.0, "48715": 960649024.0, "48720": 965627968.0, "48725": 956865472.0, "48730": 972394496.0, "48735": 962576000.0, "48740": 959208000.0, "48745": 972066880.0, "48750": 966475072.0, "48755": 981394496.0, "48760": 968372736.0, "48765": 948915968.0, "48770": 957158976.0, "48775": 988592384.0, "48780": 965741056.0, "48785": 965548352.0, "48790": 942196480.0, "48795": 952157952.0, "48800": 978441600.0, "48805": 982329280.0, "48810": 957310272.0, "48815": 955259904.0, "48820": 928579264.0, "48825": 977852416.0, "48830": 971477440.0, "48835": 969780032.0, "48840": 969902528.0, "48845": 954572096.0, "48850": 964203968.0, "48855": 971300480.0, "48860": 974656128.0, "48865": 971562368.0, "48870": 949823104.0, "48875": 972064832.0, "48880": 974441792.0, "48885": 961685312.0, "48890": 979795200.0, "48895": 956788800.0, "48900": 947607488.0, "48905": 962532160.0, "48910": 964164544.0, "48915": 960384064.0, "48920": 961473920.0, "48925": 939002880.0, "48930": 965296896.0, "48935": 965898624.0, "48940": 951003200.0, "48945": 986817728.0, "48950": 940028032.0, "48955": 973126976.0, "48960": 969962752.0, "48965": 962869824.0, "48970": 969144640.0, "48975": 929832384.0, "48980": 966835968.0, "48985": 968802240.0, "48990": 975686336.0, "48995": 974050816.0, "49000": 961650816.0, "49005": 942319744.0, "49010": 973519232.0, "49015": 971660800.0, "49020": 959903552.0, "49025": 945137984.0, "49030": 935213312.0, "49035": 978858368.0, "49040": 973072192.0, "49045": 963021504.0, "49050": 961084288.0, "49055": 941741184.0, "49060": 955689856.0, "49065": 965948544.0, "49070": 978514432.0, "49075": 975192768.0, "49080": 939222272.0, "49085": 949174080.0, "49090": 962807296.0, "49095": 983244352.0, "49100": 967743424.0, "49105": 969950848.0, "49110": 944439296.0, "49115": 978776320.0, "49120": 977715968.0, "49125": 981670656.0, "49130": 946876608.0, "49135": 954025472.0, "49140": 952596160.0, "49145": 970296192.0, "49150": 951187968.0, "49155": 967625792.0, "49160": 947668672.0, "49165": 973680640.0, "49170": 978582464.0, "49175": 971485696.0, "49180": 979798208.0, "49185": 974896576.0, "49190": 961936576.0, "49195": 988715072.0, "49200": 969801024.0, "49205": 963115776.0, "49210": 978180672.0, "49215": 940548416.0, "49220": 979221632.0, "49225": 964078336.0, "49230": 976388480.0, "49235": 975331584.0, "49240": 945099072.0, "49245": 961660928.0, "49250": 974694784.0, "49255": 993684416.0, "49260": 972322304.0, "49265": 950476480.0, "49270": 940801344.0, "49275": 962156544.0, "49280": 988047360.0, "49285": 983956032.0, "49290": 961673856.0, "49295": 939562304.0, "49300": 975910208.0, "49305": 980154816.0, "49310": 961176576.0, "49315": 968066368.0, "49320": 942934720.0, "49325": 965861888.0, "49330": 962939072.0, "49335": 959080128.0, "49340": 977462208.0, "49345": 964850176.0, "49350": 955898880.0, "49355": 971279168.0, "49360": 966261760.0, "49365": 958705600.0, "49370": 951281728.0, "49375": 929924800.0, "49380": 968931712.0, "49385": 958908992.0, "49390": 949236416.0, "49395": 975235392.0, "49400": 930305472.0, "49405": 959137216.0, "49410": 966835840.0, "49415": 968853760.0, "49420": 969351936.0, "49425": 947060032.0, "49430": 961556032.0, "49435": 970174976.0, "49440": 964515520.0, "49445": 967615552.0, "49450": 963221952.0, "49455": 937830016.0, "49460": 975228736.0, "49465": 969356032.0, "49470": 960243136.0, "49475": 973415808.0, "49480": 961407744.0, "49485": 960715264.0, "49490": 976333696.0, "49495": 978441600.0, "49500": 953724608.0, "49505": 952981440.0, "49510": 972210816.0, "49515": 960456192.0, "49520": 968158464.0, "49525": 973955584.0, "49530": 943433280.0, "49535": 952745984.0, "49540": 962349056.0, "49545": 982296704.0, "49550": 982876736.0, "49555": 974600704.0, "49560": 938399808.0, "49565": 965819008.0, "49570": 965376832.0, "49575": 973994944.0, "49580": 979393856.0, "49585": 962078336.0, "49590": 977139072.0, "49595": 973366592.0, "49600": 978551872.0, "49605": 960769472.0, "49610": 945855232.0, "49615": 955935616.0, "49620": 958190272.0, "49625": 947380224.0, "49630": 969153280.0, "49635": 963498368.0, "49640": 958060544.0, "49645": 989479616.0, "49650": 970450688.0, "49655": 952102144.0, "49660": 959635136.0, "49665": 948925440.0, "49670": 970308096.0, "49675": 979366848.0, "49680": 968123200.0, "49685": 968329856.0, "49690": 958790208.0, "49695": 941135616.0, "49700": 967881728.0, "49705": 980865024.0, "49710": 970546624.0, "49715": 968606016.0, "49720": 939044736.0, "49725": 955543744.0, "49730": 978317312.0, "49735": 983569024.0, "49740": 959283072.0, "49745": 926928448.0, "49750": 977772096.0, "49755": 960586944.0, "49760": 987106624.0, "49765": 963774528.0, "49770": 964165376.0, "49775": 951096576.0, "49780": 979896384.0, "49785": 967852992.0, "49790": 969896256.0, "49795": 948509888.0, "49800": 961860544.0, "49805": 978133504.0, "49810": 975412928.0, "49815": 972126592.0, "49820": 958990208.0, "49825": 966851904.0, "49830": 974014080.0, "49835": 966532672.0, "49840": 965316032.0, "49845": 967072896.0, "49850": 931881088.0, "49855": 972274304.0, "49860": 962993536.0, "49865": 958623680.0, "49870": 980375232.0, "49875": 960065600.0, "49880": 968182720.0, "49885": 968290560.0, "49890": 968080384.0, "49895": 975869760.0, "49900": 965939648.0, "49905": 953094720.0, "49910": 994940032.0, "49915": 977610816.0, "49920": 963431680.0, "49925": 979384512.0, "49930": 959505408.0, "49935": 954952512.0, "49940": 981967616.0, "49945": 977883072.0, "49950": 970022464.0, "49955": 946072576.0, "49960": 955687808.0, "49965": 963895936.0, "49970": 973698112.0, "49975": 965262464.0, "49980": 976114624.0, "49985": 961186176.0, "49990": 973719168.0, "49995": 967943872.0, "50000": 978347904.0, "50005": 979190592.0, "50010": 943499328.0, "50015": 958932160.0, "50020": 971149504.0, "50025": 981602688.0, "50030": 978569664.0, "50035": 956400896.0, "50040": 953167680.0, "50045": 967900352.0, "50050": 966549184.0, "50055": 943887808.0, "50060": 970824448.0, "50065": 957263424.0, "50070": 981350336.0, "50075": 960053440.0, "50080": 964713024.0, "50085": 966674176.0, "50090": 950354624.0, "50095": 984344896.0, "50100": 969726336.0, "50105": 958012224.0, "50110": 974449920.0, "50115": 954851328.0, "50120": 968474496.0, "50125": 960076480.0, "50130": 979611712.0, "50135": 975063040.0, "50140": 943069248.0, "50145": 975608896.0, "50150": 989107520.0, "50155": 983827776.0, "50160": 983611264.0, "50165": 970469312.0, "50170": 940005504.0, "50175": 948259200.0, "50180": 996879232.0, "50185": 970247680.0, "50190": 976660224.0, "50195": 952938688.0, "50200": 971919168.0, "50205": 958860032.0, "50210": 974562624.0, "50215": 972510848.0, "50220": 954243136.0, "50225": 950064512.0, "50230": 974049792.0, "50235": 964566528.0, "50240": 973263040.0, "50245": 964615808.0, "50250": 959533120.0, "50255": 970826112.0, "50260": 986281728.0, "50265": 976610880.0, "50270": 981633536.0, "50275": 950602880.0, "50280": 971512960.0, "50285": 968151872.0, "50290": 957759040.0, "50295": 965351232.0, "50300": 943438272.0, "50305": 964332416.0, "50310": 978294080.0, "50315": 966606144.0, "50320": 962566336.0, "50325": 955643200.0, "50330": 954950400.0, "50335": 974528704.0, "50340": 972392448.0, "50345": 961943872.0, "50350": 970767552.0, "50355": 946348864.0, "50360": 979963008.0, "50365": 966384000.0, "50370": 974940864.0, "50375": 974724032.0, "50380": 948142208.0, "50385": 971894464.0, "50390": 979349312.0, "50395": 971600256.0, "50400": 978060544.0, "50405": 953706688.0, "50410": 941624960.0, "50415": 968230336.0, "50420": 972969152.0, "50425": 987336000.0, "50430": 964462272.0, "50435": 952257664.0, "50440": 971175936.0, "50445": 971474944.0, "50450": 963608576.0, "50455": 975430656.0, "50460": 949340096.0, "50465": 977647040.0, "50470": 980395392.0, "50475": 968509888.0, "50480": 969026240.0, "50485": 955628544.0, "50490": 955305600.0, "50495": 962631232.0, "50500": 963508288.0, "50505": 967256896.0, "50510": 967797568.0, "50515": 942237632.0, "50520": 972084544.0, "50525": 955563840.0, "50530": 954793536.0, "50535": 977912768.0, "50540": 938491712.0, "50545": 972917952.0, "50550": 988214528.0, "50555": 971678592.0, "50560": 973791872.0, "50565": 946258496.0, "50570": 961360832.0, "50575": 970691328.0, "50580": 966243776.0, "50585": 969350912.0, "50590": 958380800.0, "50595": 949739392.0, "50600": 972563904.0, "50605": 965481984.0, "50610": 977818880.0, "50615": 963430080.0, "50620": 934474176.0, "50625": 977451392.0, "50630": 983340416.0, "50635": 974326336.0, "50640": 959529984.0, "50645": 943762048.0, "50650": 958210112.0, "50655": 963804224.0, "50660": 963515136.0, "50665": 977219648.0, "50670": 948871040.0, "50675": 949746688.0, "50680": 968006656.0, "50685": 969228416.0, "50690": 972244416.0, "50695": 971659136.0, "50700": 954135360.0, "50705": 974163648.0, "50710": 965486144.0, "50715": 978720576.0, "50720": 961956416.0, "50725": 945101056.0, "50730": 965848768.0, "50735": 963077952.0, "50740": 958461440.0, "50745": 966624704.0, "50750": 962479296.0, "50755": 953336448.0, "50760": 969117760.0, "50765": 956840256.0, "50770": 960861888.0, "50775": 955274880.0, "50780": 935469824.0, "50785": 986928960.0, "50790": 973863424.0, "50795": 972959744.0, "50800": 957108480.0, "50805": 939782208.0, "50810": 956630720.0, "50815": 985629376.0, "50820": 974645824.0, "50825": 959307840.0, "50830": 956334464.0, "50835": 951673216.0, "50840": 971297664.0, "50845": 977495744.0, "50850": 975148928.0, "50855": 950077376.0, "50860": 948351808.0, "50865": "nan", "50870": "nan", "50875": "nan", "50880": "nan", "50885": "nan", "50890": "nan", "50895": "nan", "50900": "nan", "50905": "nan", "50910": "nan", "50915": "nan", "50920": "nan", "50925": "nan", "50930": "nan", "50935": "nan", "50940": "nan", "50945": "nan", "50950": "nan", "50955": "nan", "50960": "nan", "50965": "nan", "50970": "nan", "50975": "nan", "50980": "nan", "50985": "nan", "50990": "nan", "50995": "nan", "51000": "nan"}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 51000, "step_interval": 5, "values": {"1": 12779284480.0, "5": 12779284480.0, "10": 12779284480.0, "15": 12779284480.0, "20": 12779284480.0, "25": 12779284480.0, "30": 12779284480.0, "35": 12779284480.0, "40": 12779284480.0, "45": 12779284480.0, "50": 12779284480.0, "55": 12779284480.0, "60": 12779284480.0, "65": 12779284480.0, "70": 12779284480.0, "75": 12779284480.0, "80": 12779284480.0, "85": 12779284480.0, "90": 12779284480.0, "95": 12779284480.0, "100": 12779284480.0, "105": 12779284480.0, "110": 12779284480.0, "115": 12779284480.0, "120": 12779284480.0, "125": 12779284480.0, "130": 12779284480.0, "135": 12779284480.0, "140": 12779284480.0, "145": 12779284480.0, "150": 12779284480.0, "155": 12779284480.0, "160": 12779284480.0, "165": 12779284480.0, "170": 12779284480.0, "175": 12779284480.0, "180": 12779284480.0, "185": 12779284480.0, "190": 12779284480.0, "195": 12779284480.0, "200": 12779284480.0, "205": 12779284480.0, "210": 12779284480.0, "215": 12779284480.0, "220": 12779284480.0, "225": 12779284480.0, "230": 12779284480.0, "235": 12779284480.0, "240": 12779284480.0, "245": 12779284480.0, "250": 12779284480.0, "255": 12779284480.0, "260": 12779284480.0, "265": 12779284480.0, "270": 12779284480.0, "275": 12779284480.0, "280": 12779284480.0, "285": 12779284480.0, "290": 12779284480.0, "295": 12779284480.0, "300": 12779284480.0, "305": 12779284480.0, "310": 12779284480.0, "315": 12779284480.0, "320": 12779284480.0, "325": 12779284480.0, "330": 12779284480.0, "335": 12779284480.0, "340": 12779284480.0, "345": 12779284480.0, "350": 12779284480.0, "355": 12779284480.0, "360": 12779284480.0, "365": 12779284480.0, "370": 12779284480.0, "375": 12779284480.0, "380": 12779284480.0, "385": 12779284480.0, "390": 12779284480.0, "395": 12779284480.0, "400": 12779284480.0, "405": 12779284480.0, "410": 12779284480.0, "415": 12779284480.0, "420": 12779284480.0, "425": 12779284480.0, "430": 12779284480.0, "435": 12779284480.0, "440": 12779284480.0, "445": 12779284480.0, "450": 12779284480.0, "455": 12779284480.0, "460": 12779284480.0, "465": 12779284480.0, "470": 12779284480.0, "475": 12779284480.0, "480": 12779284480.0, "485": 12779284480.0, "490": 12779284480.0, "495": 12779284480.0, "500": 12779284480.0, "505": 12779284480.0, "510": 12779284480.0, "515": 12779284480.0, "520": 12779284480.0, "525": 12779284480.0, "530": 12779284480.0, "535": 12779284480.0, "540": 12779284480.0, "545": 12779284480.0, "550": 12779284480.0, "555": 12779284480.0, "560": 12779284480.0, "565": 12779284480.0, "570": 12779284480.0, "575": 12779284480.0, "580": 12779284480.0, "585": 12779284480.0, "590": 12779284480.0, "595": 12779284480.0, "600": 12779284480.0, "605": 12779284480.0, "610": 12779284480.0, "615": 12779284480.0, "620": 12779284480.0, "625": 12779284480.0, "630": 12779284480.0, "635": 12779284480.0, "640": 12779284480.0, "645": 12779284480.0, "650": 12779284480.0, "655": 12779284480.0, "660": 12779284480.0, "665": 12779284480.0, "670": 12779284480.0, "675": 12779284480.0, "680": 12779284480.0, "685": 12779284480.0, "690": 12779284480.0, "695": 12779284480.0, "700": 12779284480.0, "705": 12779284480.0, "710": 12779284480.0, "715": 12779284480.0, "720": 12779284480.0, "725": 12779284480.0, "730": 12779284480.0, "735": 12779284480.0, "740": 12779284480.0, "745": 12779284480.0, "750": 12779284480.0, "755": 12779284480.0, "760": 12779284480.0, "765": 12779284480.0, "770": 12779284480.0, "775": 12779284480.0, "780": 12779284480.0, "785": 12779284480.0, "790": 12779284480.0, "795": 12779284480.0, "800": 12779284480.0, "805": 12779284480.0, "810": 12779284480.0, "815": 12779284480.0, "820": 12779284480.0, "825": 12779284480.0, "830": 12779284480.0, "835": 12779284480.0, "840": 12779284480.0, "845": 12779284480.0, "850": 12779284480.0, "855": 12779284480.0, "860": 12779284480.0, "865": 12779284480.0, "870": 12779284480.0, "875": 12779284480.0, "880": 12779284480.0, "885": 12779284480.0, "890": 12779284480.0, "895": 12779284480.0, "900": 12779284480.0, "905": 12779284480.0, "910": 12779284480.0, "915": 12779284480.0, "920": 12779284480.0, "925": 12779284480.0, "930": 12779284480.0, "935": 12779284480.0, "940": 12779284480.0, "945": 12779284480.0, "950": 12779284480.0, "955": 12779284480.0, "960": 12779284480.0, "965": 12779284480.0, "970": 12779284480.0, "975": 12779284480.0, "980": 12779284480.0, "985": 12779284480.0, "990": 12779284480.0, "995": 12779284480.0, "1000": 12779284480.0, "1005": 12779284480.0, "1010": 12779284480.0, "1015": 12779284480.0, "1020": 12779284480.0, "1025": 12779284480.0, "1030": 12779284480.0, "1035": 12779284480.0, "1040": 12779284480.0, "1045": 12779284480.0, "1050": 12779284480.0, "1055": 12779284480.0, "1060": 12779284480.0, "1065": 12779284480.0, "1070": 12779284480.0, "1075": 12779284480.0, "1080": 12779284480.0, "1085": 12779284480.0, "1090": 12779284480.0, "1095": 12779284480.0, "1100": 12779284480.0, "1105": 12779284480.0, "1110": 12779284480.0, "1115": 12779284480.0, "1120": 12779284480.0, "1125": 12779284480.0, "1130": 12779284480.0, "1135": 12779284480.0, "1140": 12779284480.0, "1145": 12779284480.0, "1150": 12779284480.0, "1155": 12779284480.0, "1160": 12779284480.0, "1165": 12779284480.0, "1170": 12779284480.0, "1175": 12779284480.0, "1180": 12779284480.0, "1185": 12779284480.0, "1190": 12779284480.0, "1195": 12779284480.0, "1200": 12779284480.0, "1205": 12779284480.0, "1210": 12779284480.0, "1215": 12779284480.0, "1220": 12779284480.0, "1225": 12779284480.0, "1230": 12779284480.0, "1235": 12779284480.0, "1240": 12779284480.0, "1245": 12779284480.0, "1250": 12779284480.0, "1255": 12779284480.0, "1260": 12779284480.0, "1265": 12779284480.0, "1270": 12779284480.0, "1275": 12779284480.0, "1280": 12779284480.0, "1285": 12779284480.0, "1290": 12779284480.0, "1295": 12779284480.0, "1300": 12779284480.0, "1305": 12779284480.0, "1310": 12779284480.0, "1315": 12779284480.0, "1320": 12779284480.0, "1325": 12779284480.0, "1330": 12779284480.0, "1335": 12779284480.0, "1340": 12779284480.0, "1345": 12779284480.0, "1350": 12779284480.0, "1355": 12779284480.0, "1360": 12779284480.0, "1365": 12779284480.0, "1370": 12779284480.0, "1375": 12779284480.0, "1380": 12779284480.0, "1385": 12779284480.0, "1390": 12779284480.0, "1395": 12779284480.0, "1400": 12779284480.0, "1405": 12779284480.0, "1410": 12779284480.0, "1415": 12779284480.0, "1420": 12779284480.0, "1425": 12779284480.0, "1430": 12779284480.0, "1435": 12779284480.0, "1440": 12779284480.0, "1445": 12779284480.0, "1450": 12779284480.0, "1455": 12779284480.0, "1460": 12779284480.0, "1465": 12779284480.0, "1470": 12779284480.0, "1475": 12779284480.0, "1480": 12779284480.0, "1485": 12779284480.0, "1490": 12779284480.0, "1495": 12779284480.0, "1500": 12779284480.0, "1505": 12779284480.0, "1510": 12779284480.0, "1515": 12779284480.0, "1520": 12779284480.0, "1525": 12779284480.0, "1530": 12779284480.0, "1535": 12779284480.0, "1540": 12779284480.0, "1545": 12779284480.0, "1550": 12779284480.0, "1555": 12779284480.0, "1560": 12779284480.0, "1565": 12779284480.0, "1570": 12779284480.0, "1575": 12779284480.0, "1580": 12779284480.0, "1585": 12779284480.0, "1590": 12779284480.0, "1595": 12779284480.0, "1600": 12779284480.0, "1605": 12779284480.0, "1610": 12779284480.0, "1615": 12779284480.0, "1620": 12779284480.0, "1625": 12779284480.0, "1630": 12779284480.0, "1635": 12779284480.0, "1640": 12779284480.0, "1645": 12779284480.0, "1650": 12779284480.0, "1655": 12779284480.0, "1660": 12779284480.0, "1665": 12779284480.0, "1670": 12779284480.0, "1675": 12779284480.0, "1680": 12779284480.0, "1685": 12779284480.0, "1690": 12779284480.0, "1695": 12779284480.0, "1700": 12779284480.0, "1705": 12779284480.0, "1710": 12779284480.0, "1715": 12779284480.0, "1720": 12779284480.0, "1725": 12779284480.0, "1730": 12779284480.0, "1735": 12779284480.0, "1740": 12779284480.0, "1745": 12779284480.0, "1750": 12779284480.0, "1755": 12779284480.0, "1760": 12779284480.0, "1765": 12779284480.0, "1770": 12779284480.0, "1775": 12779284480.0, "1780": 12779284480.0, "1785": 12779284480.0, "1790": 12779284480.0, "1795": 12779284480.0, "1800": 12779284480.0, "1805": 12779284480.0, "1810": 12779284480.0, "1815": 12779284480.0, "1820": 12779284480.0, "1825": 12779284480.0, "1830": 12779284480.0, "1835": 12779284480.0, "1840": 12779284480.0, "1845": 12779284480.0, "1850": 12779284480.0, "1855": 12779284480.0, "1860": 12779284480.0, "1865": 12779284480.0, "1870": 12779284480.0, "1875": 12779284480.0, "1880": 12779284480.0, "1885": 12779284480.0, "1890": 12779284480.0, "1895": 12779284480.0, "1900": 12779284480.0, "1905": 12779284480.0, "1910": 12779284480.0, "1915": 12779284480.0, "1920": 12779284480.0, "1925": 12779284480.0, "1930": 12779284480.0, "1935": 12779284480.0, "1940": 12779284480.0, "1945": 12779284480.0, "1950": 12779284480.0, "1955": 12779284480.0, "1960": 12779284480.0, "1965": 12779284480.0, "1970": 12779284480.0, "1975": 12779284480.0, "1980": 12779284480.0, "1985": 12779284480.0, "1990": 12779284480.0, "1995": 12779284480.0, "2000": 12779284480.0, "2005": 12779284480.0, "2010": 12779284480.0, "2015": 12779284480.0, "2020": 12779284480.0, "2025": 12779284480.0, "2030": 12779284480.0, "2035": 12779284480.0, "2040": 12779284480.0, "2045": 12779284480.0, "2050": 12779284480.0, "2055": 12779284480.0, "2060": 12779284480.0, "2065": 12779284480.0, "2070": 12779284480.0, "2075": 12779284480.0, "2080": 12779284480.0, "2085": 12779284480.0, "2090": 12779284480.0, "2095": 12779284480.0, "2100": 12779284480.0, "2105": 12779284480.0, "2110": 12779284480.0, "2115": 12779284480.0, "2120": 12779284480.0, "2125": 12779284480.0, "2130": 12779284480.0, "2135": 12779284480.0, "2140": 12779284480.0, "2145": 12779284480.0, "2150": 12779284480.0, "2155": 12779284480.0, "2160": 12779284480.0, "2165": 12779284480.0, "2170": 12779284480.0, "2175": 12779284480.0, "2180": 12779284480.0, "2185": 12779284480.0, "2190": 12779284480.0, "2195": 12779284480.0, "2200": 12779284480.0, "2205": 12779284480.0, "2210": 12779284480.0, "2215": 12779284480.0, "2220": 12779284480.0, "2225": 12779284480.0, "2230": 12779284480.0, "2235": 12779284480.0, "2240": 12779284480.0, "2245": 12779284480.0, "2250": 12779284480.0, "2255": 12779284480.0, "2260": 12779284480.0, "2265": 12779284480.0, "2270": 12779284480.0, "2275": 12779284480.0, "2280": 12779284480.0, "2285": 12779284480.0, "2290": 12779284480.0, "2295": 12779284480.0, "2300": 12779284480.0, "2305": 12779284480.0, "2310": 12779284480.0, "2315": 12779284480.0, "2320": 12779284480.0, "2325": 12779284480.0, "2330": 12779284480.0, "2335": 12779284480.0, "2340": 12779284480.0, "2345": 12779284480.0, "2350": 12779284480.0, "2355": 12779284480.0, "2360": 12779284480.0, "2365": 12779284480.0, "2370": 12779284480.0, "2375": 12779284480.0, "2380": 12779284480.0, "2385": 12779284480.0, "2390": 12779284480.0, "2395": 12779284480.0, "2400": 12779284480.0, "2405": 12779284480.0, "2410": 12779284480.0, "2415": 12779284480.0, "2420": 12779284480.0, "2425": 12779284480.0, "2430": 12779284480.0, "2435": 12779284480.0, "2440": 12779284480.0, "2445": 12779284480.0, "2450": 12779284480.0, "2455": 12779284480.0, "2460": 12779284480.0, "2465": 12779284480.0, "2470": 12779284480.0, "2475": 12779284480.0, "2480": 12779284480.0, "2485": 12779284480.0, "2490": 12779284480.0, "2495": 12779284480.0, "2500": 12779284480.0, "2505": 12779284480.0, "2510": 12779284480.0, "2515": 12779284480.0, "2520": 12779284480.0, "2525": 12779284480.0, "2530": 12779284480.0, "2535": 12779284480.0, "2540": 12779284480.0, "2545": 12779284480.0, "2550": 12779284480.0, "2555": 12779284480.0, "2560": 12779284480.0, "2565": 12779284480.0, "2570": 12779284480.0, "2575": 12779284480.0, "2580": 12779284480.0, "2585": 12779284480.0, "2590": 12779284480.0, "2595": 12779284480.0, "2600": 12779284480.0, "2605": 12779284480.0, "2610": 12779284480.0, "2615": 12779284480.0, "2620": 12779284480.0, "2625": 12779284480.0, "2630": 12779284480.0, "2635": 12779284480.0, "2640": 12779284480.0, "2645": 12779284480.0, "2650": 12779284480.0, "2655": 12779284480.0, "2660": 12779284480.0, "2665": 12779284480.0, "2670": 12779284480.0, "2675": 12779284480.0, "2680": 12779284480.0, "2685": 12779284480.0, "2690": 12779284480.0, "2695": 12779284480.0, "2700": 12779284480.0, "2705": 12779284480.0, "2710": 12779284480.0, "2715": 12779284480.0, "2720": 12779284480.0, "2725": 12779284480.0, "2730": 12779284480.0, "2735": 12779284480.0, "2740": 12779284480.0, "2745": 12779284480.0, "2750": 12779284480.0, "2755": 12779284480.0, "2760": 12779284480.0, "2765": 12779284480.0, "2770": 12779284480.0, "2775": 12779284480.0, "2780": 12779284480.0, "2785": 12779284480.0, "2790": 12779284480.0, "2795": 12779284480.0, "2800": 12779284480.0, "2805": 12779284480.0, "2810": 12779284480.0, "2815": 12779284480.0, "2820": 12779284480.0, "2825": 12779284480.0, "2830": 12779284480.0, "2835": 12779284480.0, "2840": 12779284480.0, "2845": 12779284480.0, "2850": 12779284480.0, "2855": 12779284480.0, "2860": 12779284480.0, "2865": 12779284480.0, "2870": 12779284480.0, "2875": 12779284480.0, "2880": 12779284480.0, "2885": 12779284480.0, "2890": 12779284480.0, "2895": 12779284480.0, "2900": 12779284480.0, "2905": 12779284480.0, "2910": 12779284480.0, "2915": 12779284480.0, "2920": 12779284480.0, "2925": 12779284480.0, "2930": 12779284480.0, "2935": 12779284480.0, "2940": 12779284480.0, "2945": 12779284480.0, "2950": 12779284480.0, "2955": 12779284480.0, "2960": 12779284480.0, "2965": 12779284480.0, "2970": 12779284480.0, "2975": 12779284480.0, "2980": 12779284480.0, "2985": 12779284480.0, "2990": 12779284480.0, "2995": 12779284480.0, "3000": 12779284480.0, "3005": 12779284480.0, "3010": 12779284480.0, "3015": 12779284480.0, "3020": 12779284480.0, "3025": 12779284480.0, "3030": 12779284480.0, "3035": 12779284480.0, "3040": 12779284480.0, "3045": 12779284480.0, "3050": 12779284480.0, "3055": 12779284480.0, "3060": 12779284480.0, "3065": 12779284480.0, "3070": 12779284480.0, "3075": 12779284480.0, "3080": 12779284480.0, "3085": 12779284480.0, "3090": 12779284480.0, "3095": 12779284480.0, "3100": 12779284480.0, "3105": 12779284480.0, "3110": 12779284480.0, "3115": 12779284480.0, "3120": 12779284480.0, "3125": 12779284480.0, "3130": 12779284480.0, "3135": 12779284480.0, "3140": 12779284480.0, "3145": 12779284480.0, "3150": 12779284480.0, "3155": 12779284480.0, "3160": 12779284480.0, "3165": 12779284480.0, "3170": 12779284480.0, "3175": 12779284480.0, "3180": 12779284480.0, "3185": 12779284480.0, "3190": 12779284480.0, "3195": 12779284480.0, "3200": 12779284480.0, "3205": 12779284480.0, "3210": 12779284480.0, "3215": 12779284480.0, "3220": 12779284480.0, "3225": 12779284480.0, "3230": 12779284480.0, "3235": 12779284480.0, "3240": 12779284480.0, "3245": 12779284480.0, "3250": 12779284480.0, "3255": 12779284480.0, "3260": 12779284480.0, "3265": 12779284480.0, "3270": 12779284480.0, "3275": 12779284480.0, "3280": 12779284480.0, "3285": 12779284480.0, "3290": 12779284480.0, "3295": 12779284480.0, "3300": 12779284480.0, "3305": 12779284480.0, "3310": 12779284480.0, "3315": 12779284480.0, "3320": 12779284480.0, "3325": 12779284480.0, "3330": 12779284480.0, "3335": 12779284480.0, "3340": 12779284480.0, "3345": 12779284480.0, "3350": 12779284480.0, "3355": 12779284480.0, "3360": 12779284480.0, "3365": 12779284480.0, "3370": 12779284480.0, "3375": 12779284480.0, "3380": 12779284480.0, "3385": 12779284480.0, "3390": 12779284480.0, "3395": 12779284480.0, "3400": 12779284480.0, "3405": 12779284480.0, "3410": 12779284480.0, "3415": 12779284480.0, "3420": 12779284480.0, "3425": 12779284480.0, "3430": 12779284480.0, "3435": 12779284480.0, "3440": 12779284480.0, "3445": 12779284480.0, "3450": 12779284480.0, "3455": 12779284480.0, "3460": 12779284480.0, "3465": 12779284480.0, "3470": 12779284480.0, "3475": 12779284480.0, "3480": 12779284480.0, "3485": 12779284480.0, "3490": 12779284480.0, "3495": 12779284480.0, "3500": 12779284480.0, "3505": 12779284480.0, "3510": 12779284480.0, "3515": 12779284480.0, "3520": 12779284480.0, "3525": 12779284480.0, "3530": 12779284480.0, "3535": 12779284480.0, "3540": 12779284480.0, "3545": 12779284480.0, "3550": 12779284480.0, "3555": 12779284480.0, "3560": 12779284480.0, "3565": 12779284480.0, "3570": 12779284480.0, "3575": 12779284480.0, "3580": 12779284480.0, "3585": 12779284480.0, "3590": 12779284480.0, "3595": 12779284480.0, "3600": 12779284480.0, "3605": 12779284480.0, "3610": 12779284480.0, "3615": 12779284480.0, "3620": 12779284480.0, "3625": 12779284480.0, "3630": 12779284480.0, "3635": 12779284480.0, "3640": 12779284480.0, "3645": 12779284480.0, "3650": 12779284480.0, "3655": 12779284480.0, "3660": 12779284480.0, "3665": 12779284480.0, "3670": 12779284480.0, "3675": 12779284480.0, "3680": 12779284480.0, "3685": 12779284480.0, "3690": 12779284480.0, "3695": 12779284480.0, "3700": 12779284480.0, "3705": 12779284480.0, "3710": 12779284480.0, "3715": 12779284480.0, "3720": 12779284480.0, "3725": 12779284480.0, "3730": 12779284480.0, "3735": 12779284480.0, "3740": 12779284480.0, "3745": 12779284480.0, "3750": 12779284480.0, "3755": 12779284480.0, "3760": 12779284480.0, "3765": 12779284480.0, "3770": 12779284480.0, "3775": 12779284480.0, "3780": 12779284480.0, "3785": 12779284480.0, "3790": 12779284480.0, "3795": 12779284480.0, "3800": 12779284480.0, "3805": 12779284480.0, "3810": 12779284480.0, "3815": 12779284480.0, "3820": 12779284480.0, "3825": 12779284480.0, "3830": 12779284480.0, "3835": 12779284480.0, "3840": 12779284480.0, "3845": 12779284480.0, "3850": 12779284480.0, "3855": 12779284480.0, "3860": 12779284480.0, "3865": 12779284480.0, "3870": 12779284480.0, "3875": 12779284480.0, "3880": 12779284480.0, "3885": 12779284480.0, "3890": 12779284480.0, "3895": 12779284480.0, "3900": 12779284480.0, "3905": 12779284480.0, "3910": 12779284480.0, "3915": 12779284480.0, "3920": 12779284480.0, "3925": 12779284480.0, "3930": 12779284480.0, "3935": 12779284480.0, "3940": 12779284480.0, "3945": 12779284480.0, "3950": 12779284480.0, "3955": 12779284480.0, "3960": 12779284480.0, "3965": 12779284480.0, "3970": 12779284480.0, "3975": 12779284480.0, "3980": 12779284480.0, "3985": 12779284480.0, "3990": 12779284480.0, "3995": 12779284480.0, "4000": 12779284480.0, "4005": 12779284480.0, "4010": 12779284480.0, "4015": 12779284480.0, "4020": 12779284480.0, "4025": 12779284480.0, "4030": 12779284480.0, "4035": 12779284480.0, "4040": 12779284480.0, "4045": 12779284480.0, "4050": 12779284480.0, "4055": 12779284480.0, "4060": 12779284480.0, "4065": 12779284480.0, "4070": 12779284480.0, "4075": 12779284480.0, "4080": 12779284480.0, "4085": 12779284480.0, "4090": 12779284480.0, "4095": 12779284480.0, "4100": 12779284480.0, "4105": 12779284480.0, "4110": 12779284480.0, "4115": 12779284480.0, "4120": 12779284480.0, "4125": 12779284480.0, "4130": 12779284480.0, "4135": 12779284480.0, "4140": 12779284480.0, "4145": 12779284480.0, "4150": 12779284480.0, "4155": 12779284480.0, "4160": 12779284480.0, "4165": 12779284480.0, "4170": 12779284480.0, "4175": 12779284480.0, "4180": 12779284480.0, "4185": 12779284480.0, "4190": 12779284480.0, "4195": 12779284480.0, "4200": 12779284480.0, "4205": 12779284480.0, "4210": 12779284480.0, "4215": 12779284480.0, "4220": 12779284480.0, "4225": 12779284480.0, "4230": 12779284480.0, "4235": 12779284480.0, "4240": 12779284480.0, "4245": 12779284480.0, "4250": 12779284480.0, "4255": 12779284480.0, "4260": 12779284480.0, "4265": 12779284480.0, "4270": 12779284480.0, "4275": 12779284480.0, "4280": 12779284480.0, "4285": 12779284480.0, "4290": 12779284480.0, "4295": 12779284480.0, "4300": 12779284480.0, "4305": 12779284480.0, "4310": 12779284480.0, "4315": 12779284480.0, "4320": 12779284480.0, "4325": 12779284480.0, "4330": 12779284480.0, "4335": 12779284480.0, "4340": 12779284480.0, "4345": 12779284480.0, "4350": 12779284480.0, "4355": 12779284480.0, "4360": 12779284480.0, "4365": 12779284480.0, "4370": 12779284480.0, "4375": 12779284480.0, "4380": 12779284480.0, "4385": 12779284480.0, "4390": 12779284480.0, "4395": 12779284480.0, "4400": 12779284480.0, "4405": 12779284480.0, "4410": 12779284480.0, "4415": 12779284480.0, "4420": 12779284480.0, "4425": 12779284480.0, "4430": 12779284480.0, "4435": 12779284480.0, "4440": 12779284480.0, "4445": 12779284480.0, "4450": 12779284480.0, "4455": 12779284480.0, "4460": 12779284480.0, "4465": 12779284480.0, "4470": 12779284480.0, "4475": 12779284480.0, "4480": 12779284480.0, "4485": 12779284480.0, "4490": 12779284480.0, "4495": 12779284480.0, "4500": 12779284480.0, "4505": 12779284480.0, "4510": 12779284480.0, "4515": 12779284480.0, "4520": 12779284480.0, "4525": 12779284480.0, "4530": 12779284480.0, "4535": 12779284480.0, "4540": 12779284480.0, "4545": 12779284480.0, "4550": 12779284480.0, "4555": 12779284480.0, "4560": 12779284480.0, "4565": 12779284480.0, "4570": 12779284480.0, "4575": 12779284480.0, "4580": 12779284480.0, "4585": 12779284480.0, "4590": 12779284480.0, "4595": 12779284480.0, "4600": 12779284480.0, "4605": 12779284480.0, "4610": 12779284480.0, "4615": 12779284480.0, "4620": 12779284480.0, "4625": 12779284480.0, "4630": 12779284480.0, "4635": 12779284480.0, "4640": 12779284480.0, "4645": 12779284480.0, "4650": 12779284480.0, "4655": 12779284480.0, "4660": 12779284480.0, "4665": 12779284480.0, "4670": 12779284480.0, "4675": 12779284480.0, "4680": 12779284480.0, "4685": 12779284480.0, "4690": 12779284480.0, "4695": 12779284480.0, "4700": 12779284480.0, "4705": 12779284480.0, "4710": 12779284480.0, "4715": 12779284480.0, "4720": 12779284480.0, "4725": 12779284480.0, "4730": 12779284480.0, "4735": 12779284480.0, "4740": 12779284480.0, "4745": 12779284480.0, "4750": 12779284480.0, "4755": 12779284480.0, "4760": 12779284480.0, "4765": 12779284480.0, "4770": 12779284480.0, "4775": 12779284480.0, "4780": 12779284480.0, "4785": 12779284480.0, "4790": 12779284480.0, "4795": 12779284480.0, "4800": 12779284480.0, "4805": 12779284480.0, "4810": 12779284480.0, "4815": 12779284480.0, "4820": 12779284480.0, "4825": 12779284480.0, "4830": 12779284480.0, "4835": 12779284480.0, "4840": 12779284480.0, "4845": 12779284480.0, "4850": 12779284480.0, "4855": 12779284480.0, "4860": 12779284480.0, "4865": 12779284480.0, "4870": 12779284480.0, "4875": 12779284480.0, "4880": 12779284480.0, "4885": 12779284480.0, "4890": 12779284480.0, "4895": 12779284480.0, "4900": 12779284480.0, "4905": 12779284480.0, "4910": 12779284480.0, "4915": 12779284480.0, "4920": 12779284480.0, "4925": 12779284480.0, "4930": 12779284480.0, "4935": 12779284480.0, "4940": 12779284480.0, "4945": 12779284480.0, "4950": 12779284480.0, "4955": 12779284480.0, "4960": 12779284480.0, "4965": 12779284480.0, "4970": 12779284480.0, "4975": 12779284480.0, "4980": 12779284480.0, "4985": 12779284480.0, "4990": 12779284480.0, "4995": 12779284480.0, "5000": 12779284480.0, "5005": 12779284480.0, "5010": 12779284480.0, "5015": 12779284480.0, "5020": 12779284480.0, "5025": 12779284480.0, "5030": 12779284480.0, "5035": 12779284480.0, "5040": 12779284480.0, "5045": 12779284480.0, "5050": 12779284480.0, "5055": 12779284480.0, "5060": 12779284480.0, "5065": 12779284480.0, "5070": 12779284480.0, "5075": 12779284480.0, "5080": 12779284480.0, "5085": 12779284480.0, "5090": 12779284480.0, "5095": 12779284480.0, "5100": 12779284480.0, "5105": 12779284480.0, "5110": 12779284480.0, "5115": 12779284480.0, "5120": 12779284480.0, "5125": 12779284480.0, "5130": 12779284480.0, "5135": 12779284480.0, "5140": 12779284480.0, "5145": 12779284480.0, "5150": 12779284480.0, "5155": 12779284480.0, "5160": 12779284480.0, "5165": 12779284480.0, "5170": 12779284480.0, "5175": 12779284480.0, "5180": 12779284480.0, "5185": 12779284480.0, "5190": 12779284480.0, "5195": 12779284480.0, "5200": 12779284480.0, "5205": 12779284480.0, "5210": 12779284480.0, "5215": 12779284480.0, "5220": 12779284480.0, "5225": 12779284480.0, "5230": 12779284480.0, "5235": 12779284480.0, "5240": 12779284480.0, "5245": 12779284480.0, "5250": 12779284480.0, "5255": 12779284480.0, "5260": 12779284480.0, "5265": 12779284480.0, "5270": 12779284480.0, "5275": 12779284480.0, "5280": 12779284480.0, "5285": 12779284480.0, "5290": 12779284480.0, "5295": 12779284480.0, "5300": 12779284480.0, "5305": 12779284480.0, "5310": 12779284480.0, "5315": 12779284480.0, "5320": 12779284480.0, "5325": 12779284480.0, "5330": 12779284480.0, "5335": 12779284480.0, "5340": 12779284480.0, "5345": 12779284480.0, "5350": 12779284480.0, "5355": 12779284480.0, "5360": 12779284480.0, "5365": 12779284480.0, "5370": 12779284480.0, "5375": 12779284480.0, "5380": 12779284480.0, "5385": 12779284480.0, "5390": 12779284480.0, "5395": 12779284480.0, "5400": 12779284480.0, "5405": 12779284480.0, "5410": 12779284480.0, "5415": 12779284480.0, "5420": 12779284480.0, "5425": 12779284480.0, "5430": 12779284480.0, "5435": 12779284480.0, "5440": 12779284480.0, "5445": 12779284480.0, "5450": 12779284480.0, "5455": 12779284480.0, "5460": 12779284480.0, "5465": 12779284480.0, "5470": 12779284480.0, "5475": 12779284480.0, "5480": 12779284480.0, "5485": 12779284480.0, "5490": 12779284480.0, "5495": 12779284480.0, "5500": 12779284480.0, "5505": 12779284480.0, "5510": 12779284480.0, "5515": 12779284480.0, "5520": 12779284480.0, "5525": 12779284480.0, "5530": 12779284480.0, "5535": 12779284480.0, "5540": 12779284480.0, "5545": 12779284480.0, "5550": 12779284480.0, "5555": 12779284480.0, "5560": 12779284480.0, "5565": 12779284480.0, "5570": 12779284480.0, "5575": 12779284480.0, "5580": 12779284480.0, "5585": 12779284480.0, "5590": 12779284480.0, "5595": 12779284480.0, "5600": 12779284480.0, "5605": 12779284480.0, "5610": 12779284480.0, "5615": 12779284480.0, "5620": 12779284480.0, "5625": 12779284480.0, "5630": 12779284480.0, "5635": 12779284480.0, "5640": 12779284480.0, "5645": 12779284480.0, "5650": 12779284480.0, "5655": 12779284480.0, "5660": 12779284480.0, "5665": 12779284480.0, "5670": 12779284480.0, "5675": 12779284480.0, "5680": 12779284480.0, "5685": 12779284480.0, "5690": 12779284480.0, "5695": 12779284480.0, "5700": 12779284480.0, "5705": 12779284480.0, "5710": 12779284480.0, "5715": 12779284480.0, "5720": 12779284480.0, "5725": 12779284480.0, "5730": 12779284480.0, "5735": 12779284480.0, "5740": 12779284480.0, "5745": 12779284480.0, "5750": 12779284480.0, "5755": 12779284480.0, "5760": 12779284480.0, "5765": 12779284480.0, "5770": 12779284480.0, "5775": 12779284480.0, "5780": 12779284480.0, "5785": 12779284480.0, "5790": 12779284480.0, "5795": 12779284480.0, "5800": 12779284480.0, "5805": 12779284480.0, "5810": 12779284480.0, "5815": 12779284480.0, "5820": 12779284480.0, "5825": 12779284480.0, "5830": 12779284480.0, "5835": 12779284480.0, "5840": 12779284480.0, "5845": 12779284480.0, "5850": 12779284480.0, "5855": 12779284480.0, "5860": 12779284480.0, "5865": 12779284480.0, "5870": 12779284480.0, "5875": 12779284480.0, "5880": 12779284480.0, "5885": 12779284480.0, "5890": 12779284480.0, "5895": 12779284480.0, "5900": 12779284480.0, "5905": 12779284480.0, "5910": 12779284480.0, "5915": 12779284480.0, "5920": 12779284480.0, "5925": 12779284480.0, "5930": 12779284480.0, "5935": 12779284480.0, "5940": 12779284480.0, "5945": 12779284480.0, "5950": 12779284480.0, "5955": 12779284480.0, "5960": 12779284480.0, "5965": 12779284480.0, "5970": 12779284480.0, "5975": 12779284480.0, "5980": 12779284480.0, "5985": 12779284480.0, "5990": 12779284480.0, "5995": 12779284480.0, "6000": 12779284480.0, "6005": 12779284480.0, "6010": 12779284480.0, "6015": 12779284480.0, "6020": 12779284480.0, "6025": 12779284480.0, "6030": 12779284480.0, "6035": 12779284480.0, "6040": 12779284480.0, "6045": 12779284480.0, "6050": 12779284480.0, "6055": 12779284480.0, "6060": 12779284480.0, "6065": 12779284480.0, "6070": 12779284480.0, "6075": 12779284480.0, "6080": 12779284480.0, "6085": 12779284480.0, "6090": 12779284480.0, "6095": 12779284480.0, "6100": 12779284480.0, "6105": 12779284480.0, "6110": 12779284480.0, "6115": 12779284480.0, "6120": 12779284480.0, "6125": 12779284480.0, "6130": 12779284480.0, "6135": 12779284480.0, "6140": 12779284480.0, "6145": 12779284480.0, "6150": 12779284480.0, "6155": 12779284480.0, "6160": 12779284480.0, "6165": 12779284480.0, "6170": 12779284480.0, "6175": 12779284480.0, "6180": 12779284480.0, "6185": 12779284480.0, "6190": 12779284480.0, "6195": 12779284480.0, "6200": 12779284480.0, "6205": 12779284480.0, "6210": 12779284480.0, "6215": 12779284480.0, "6220": 12779284480.0, "6225": 12779284480.0, "6230": 12779284480.0, "6235": 12779284480.0, "6240": 12779284480.0, "6245": 12779284480.0, "6250": 12779284480.0, "6255": 12779284480.0, "6260": 12779284480.0, "6265": 12779284480.0, "6270": 12779284480.0, "6275": 12779284480.0, "6280": 12779284480.0, "6285": 12779284480.0, "6290": 12779284480.0, "6295": 12779284480.0, "6300": 12779284480.0, "6305": 12779284480.0, "6310": 12779284480.0, "6315": 12779284480.0, "6320": 12779284480.0, "6325": 12779284480.0, "6330": 12779284480.0, "6335": 12779284480.0, "6340": 12779284480.0, "6345": 12779284480.0, "6350": 12779284480.0, "6355": 12779284480.0, "6360": 12779284480.0, "6365": 12779284480.0, "6370": 12779284480.0, "6375": 12779284480.0, "6380": 12779284480.0, "6385": 12779284480.0, "6390": 12779284480.0, "6395": 12779284480.0, "6400": 12779284480.0, "6405": 12779284480.0, "6410": 12779284480.0, "6415": 12779284480.0, "6420": 12779284480.0, "6425": 12779284480.0, "6430": 12779284480.0, "6435": 12779284480.0, "6440": 12779284480.0, "6445": 12779284480.0, "6450": 12779284480.0, "6455": 12779284480.0, "6460": 12779284480.0, "6465": 12779284480.0, "6470": 12779284480.0, "6475": 12779284480.0, "6480": 12779284480.0, "6485": 12779284480.0, "6490": 12779284480.0, "6495": 12779284480.0, "6500": 12779284480.0, "6505": 12779284480.0, "6510": 12779284480.0, "6515": 12779284480.0, "6520": 12779284480.0, "6525": 12779284480.0, "6530": 12779284480.0, "6535": 12779284480.0, "6540": 12779284480.0, "6545": 12779284480.0, "6550": 12779284480.0, "6555": 12779284480.0, "6560": 12779284480.0, "6565": 12779284480.0, "6570": 12779284480.0, "6575": 12779284480.0, "6580": 12779284480.0, "6585": 12779284480.0, "6590": 12779284480.0, "6595": 12779284480.0, "6600": 12779284480.0, "6605": 12779284480.0, "6610": 12779284480.0, "6615": 12779284480.0, "6620": 12779284480.0, "6625": 12779284480.0, "6630": 12779284480.0, "6635": 12779284480.0, "6640": 12779284480.0, "6645": 12779284480.0, "6650": 12779284480.0, "6655": 12779284480.0, "6660": 12779284480.0, "6665": 12779284480.0, "6670": 12779284480.0, "6675": 12779284480.0, "6680": 12779284480.0, "6685": 12779284480.0, "6690": 12779284480.0, "6695": 12779284480.0, "6700": 12779284480.0, "6705": 12779284480.0, "6710": 12779284480.0, "6715": 12779284480.0, "6720": 12779284480.0, "6725": 12779284480.0, "6730": 12779284480.0, "6735": 12779284480.0, "6740": 12779284480.0, "6745": 12779284480.0, "6750": 12779284480.0, "6755": 12779284480.0, "6760": 12779284480.0, "6765": 12779284480.0, "6770": 12779284480.0, "6775": 12779284480.0, "6780": 12779284480.0, "6785": 12779284480.0, "6790": 12779284480.0, "6795": 12779284480.0, "6800": 12779284480.0, "6805": 12779284480.0, "6810": 12779284480.0, "6815": 12779284480.0, "6820": 12779284480.0, "6825": 12779284480.0, "6830": 12779284480.0, "6835": 12779284480.0, "6840": 12779284480.0, "6845": 12779284480.0, "6850": 12779284480.0, "6855": 12779284480.0, "6860": 12779284480.0, "6865": 12779284480.0, "6870": 12779284480.0, "6875": 12779284480.0, "6880": 12779284480.0, "6885": 12779284480.0, "6890": 12779284480.0, "6895": 12779284480.0, "6900": 12779284480.0, "6905": 12779284480.0, "6910": 12779284480.0, "6915": 12779284480.0, "6920": 12779284480.0, "6925": 12779284480.0, "6930": 12779284480.0, "6935": 12779284480.0, "6940": 12779284480.0, "6945": 12779284480.0, "6950": 12779284480.0, "6955": 12779284480.0, "6960": 12779284480.0, "6965": 12779284480.0, "6970": 12779284480.0, "6975": 12779284480.0, "6980": 12779284480.0, "6985": 12779284480.0, "6990": 12779284480.0, "6995": 12779284480.0, "7000": 12779284480.0, "7005": 12779284480.0, "7010": 12779284480.0, "7015": 12779284480.0, "7020": 12779284480.0, "7025": 12779284480.0, "7030": 12779284480.0, "7035": 12779284480.0, "7040": 12779284480.0, "7045": 12779284480.0, "7050": 12779284480.0, "7055": 12779284480.0, "7060": 12779284480.0, "7065": 12779284480.0, "7070": 12779284480.0, "7075": 12779284480.0, "7080": 12779284480.0, "7085": 12779284480.0, "7090": 12779284480.0, "7095": 12779284480.0, "7100": 12779284480.0, "7105": 12779284480.0, "7110": 12779284480.0, "7115": 12779284480.0, "7120": 12779284480.0, "7125": 12779284480.0, "7130": 12779284480.0, "7135": 12779284480.0, "7140": 12779284480.0, "7145": 12779284480.0, "7150": 12779284480.0, "7155": 12779284480.0, "7160": 12779284480.0, "7165": 12779284480.0, "7170": 12779284480.0, "7175": 12779284480.0, "7180": 12779284480.0, "7185": 12779284480.0, "7190": 12779284480.0, "7195": 12779284480.0, "7200": 12779284480.0, "7205": 12779284480.0, "7210": 12779284480.0, "7215": 12779284480.0, "7220": 12779284480.0, "7225": 12779284480.0, "7230": 12779284480.0, "7235": 12779284480.0, "7240": 12779284480.0, "7245": 12779284480.0, "7250": 12779284480.0, "7255": 12779284480.0, "7260": 12779284480.0, "7265": 12779284480.0, "7270": 12779284480.0, "7275": 12779284480.0, "7280": 12779284480.0, "7285": 12779284480.0, "7290": 12779284480.0, "7295": 12779284480.0, "7300": 12779284480.0, "7305": 12779284480.0, "7310": 12779284480.0, "7315": 12779284480.0, "7320": 12779284480.0, "7325": 12779284480.0, "7330": 12779284480.0, "7335": 12779284480.0, "7340": 12779284480.0, "7345": 12779284480.0, "7350": 12779284480.0, "7355": 12779284480.0, "7360": 12779284480.0, "7365": 12779284480.0, "7370": 12779284480.0, "7375": 12779284480.0, "7380": 12779284480.0, "7385": 12779284480.0, "7390": 12779284480.0, "7395": 12779284480.0, "7400": 12779284480.0, "7405": 12779284480.0, "7410": 12779284480.0, "7415": 12779284480.0, "7420": 12779284480.0, "7425": 12779284480.0, "7430": 12779284480.0, "7435": 12779284480.0, "7440": 12779284480.0, "7445": 12779284480.0, "7450": 12779284480.0, "7455": 12779284480.0, "7460": 12779284480.0, "7465": 12779284480.0, "7470": 12779284480.0, "7475": 12779284480.0, "7480": 12779284480.0, "7485": 12779284480.0, "7490": 12779284480.0, "7495": 12779284480.0, "7500": 12779284480.0, "7505": 12779284480.0, "7510": 12779284480.0, "7515": 12779284480.0, "7520": 12779284480.0, "7525": 12779284480.0, "7530": 12779284480.0, "7535": 12779284480.0, "7540": 12779284480.0, "7545": 12779284480.0, "7550": 12779284480.0, "7555": 12779284480.0, "7560": 12779284480.0, "7565": 12779284480.0, "7570": 12779284480.0, "7575": 12779284480.0, "7580": 12779284480.0, "7585": 12779284480.0, "7590": 12779284480.0, "7595": 12779284480.0, "7600": 12779284480.0, "7605": 12779284480.0, "7610": 12779284480.0, "7615": 12779284480.0, "7620": 12779284480.0, "7625": 12779284480.0, "7630": 12779284480.0, "7635": 12779284480.0, "7640": 12779284480.0, "7645": 12779284480.0, "7650": 12779284480.0, "7655": 12779284480.0, "7660": 12779284480.0, "7665": 12779284480.0, "7670": 12779284480.0, "7675": 12779284480.0, "7680": 12779284480.0, "7685": 12779284480.0, "7690": 12779284480.0, "7695": 12779284480.0, "7700": 12779284480.0, "7705": 12779284480.0, "7710": 12779284480.0, "7715": 12779284480.0, "7720": 12779284480.0, "7725": 12779284480.0, "7730": 12779284480.0, "7735": 12779284480.0, "7740": 12779284480.0, "7745": 12779284480.0, "7750": 12779284480.0, "7755": 12779284480.0, "7760": 12779284480.0, "7765": 12779284480.0, "7770": 12779284480.0, "7775": 12779284480.0, "7780": 12779284480.0, "7785": 12779284480.0, "7790": 12779284480.0, "7795": 12779284480.0, "7800": 12779284480.0, "7805": 12779284480.0, "7810": 12779284480.0, "7815": 12779284480.0, "7820": 12779284480.0, "7825": 12779284480.0, "7830": 12779284480.0, "7835": 12779284480.0, "7840": 12779284480.0, "7845": 12779284480.0, "7850": 12779284480.0, "7855": 12779284480.0, "7860": 12779284480.0, "7865": 12779284480.0, "7870": 12779284480.0, "7875": 12779284480.0, "7880": 12779284480.0, "7885": 12779284480.0, "7890": 12779284480.0, "7895": 12779284480.0, "7900": 12779284480.0, "7905": 12779284480.0, "7910": 12779284480.0, "7915": 12779284480.0, "7920": 12779284480.0, "7925": 12779284480.0, "7930": 12779284480.0, "7935": 12779284480.0, "7940": 12779284480.0, "7945": 12779284480.0, "7950": 12779284480.0, "7955": 12779284480.0, "7960": 12779284480.0, "7965": 12779284480.0, "7970": 12779284480.0, "7975": 12779284480.0, "7980": 12779284480.0, "7985": 12779284480.0, "7990": 12779284480.0, "7995": 12779284480.0, "8000": 12779284480.0, "8005": 12779284480.0, "8010": 12779284480.0, "8015": 12779284480.0, "8020": 12779284480.0, "8025": 12779284480.0, "8030": 12779284480.0, "8035": 12779284480.0, "8040": 12779284480.0, "8045": 12779284480.0, "8050": 12779284480.0, "8055": 12779284480.0, "8060": 12779284480.0, "8065": 12779284480.0, "8070": 12779284480.0, "8075": 12779284480.0, "8080": 12779284480.0, "8085": 12779284480.0, "8090": 12779284480.0, "8095": 12779284480.0, "8100": 12779284480.0, "8105": 12779284480.0, "8110": 12779284480.0, "8115": 12779284480.0, "8120": 12779284480.0, "8125": 12779284480.0, "8130": 12779284480.0, "8135": 12779284480.0, "8140": 12779284480.0, "8145": 12779284480.0, "8150": 12779284480.0, "8155": 12779284480.0, "8160": 12779284480.0, "8165": 12779284480.0, "8170": 12779284480.0, "8175": 12779284480.0, "8180": 12779284480.0, "8185": 12779284480.0, "8190": 12779284480.0, "8195": 12779284480.0, "8200": 12779284480.0, "8205": 12779284480.0, "8210": 12779284480.0, "8215": 12779284480.0, "8220": 12779284480.0, "8225": 12779284480.0, "8230": 12779284480.0, "8235": 12779284480.0, "8240": 12779284480.0, "8245": 12779284480.0, "8250": 12779284480.0, "8255": 12779284480.0, "8260": 12779284480.0, "8265": 12779284480.0, "8270": 12779284480.0, "8275": 12779284480.0, "8280": 12779284480.0, "8285": 12779284480.0, "8290": 12779284480.0, "8295": 12779284480.0, "8300": 12779284480.0, "8305": 12779284480.0, "8310": 12779284480.0, "8315": 12779284480.0, "8320": 12779284480.0, "8325": 12779284480.0, "8330": 12779284480.0, "8335": 12779284480.0, "8340": 12779284480.0, "8345": 12779284480.0, "8350": 12779284480.0, "8355": 12779284480.0, "8360": 12779284480.0, "8365": 12779284480.0, "8370": 12779284480.0, "8375": 12779284480.0, "8380": 12779284480.0, "8385": 12779284480.0, "8390": 12779284480.0, "8395": 12779284480.0, "8400": 12779284480.0, "8405": 12779284480.0, "8410": 12779284480.0, "8415": 12779284480.0, "8420": 12779284480.0, "8425": 12779284480.0, "8430": 12779284480.0, "8435": 12779284480.0, "8440": 12779284480.0, "8445": 12779284480.0, "8450": 12779284480.0, "8455": 12779284480.0, "8460": 12779284480.0, "8465": 12779284480.0, "8470": 12779284480.0, "8475": 12779284480.0, "8480": 12779284480.0, "8485": 12779284480.0, "8490": 12779284480.0, "8495": 12779284480.0, "8500": 12779284480.0, "8505": 12779284480.0, "8510": 12779284480.0, "8515": 12779284480.0, "8520": 12779284480.0, "8525": 12779284480.0, "8530": 12779284480.0, "8535": 12779284480.0, "8540": 12779284480.0, "8545": 12779284480.0, "8550": 12779284480.0, "8555": 12779284480.0, "8560": 12779284480.0, "8565": 12779284480.0, "8570": 12779284480.0, "8575": 12779284480.0, "8580": 12779284480.0, "8585": 12779284480.0, "8590": 12779284480.0, "8595": 12779284480.0, "8600": 12779284480.0, "8605": 12779284480.0, "8610": 12777701376.0, "8615": 12777701376.0, "8620": 12777701376.0, "8625": 12777701376.0, "8630": 12777701376.0, "8635": 12777701376.0, "8640": 12777701376.0, "8645": 12777701376.0, "8650": 12777701376.0, "8655": 12777701376.0, "8660": 12777701376.0, "8665": 12777701376.0, "8670": 12777701376.0, "8675": 12777701376.0, "8680": 12777701376.0, "8685": 12777701376.0, "8690": 12777701376.0, "8695": 12777701376.0, "8700": 12777701376.0, "8705": 12777701376.0, "8710": 12777701376.0, "8715": 12777701376.0, "8720": 12777701376.0, "8725": 12777701376.0, "8730": 12777701376.0, "8735": 12777701376.0, "8740": 12777701376.0, "8745": 12777701376.0, "8750": 12777701376.0, "8755": 12777701376.0, "8760": 12777701376.0, "8765": 12777701376.0, "8770": 12777701376.0, "8775": 12777701376.0, "8780": 12777701376.0, "8785": 12777701376.0, "8790": 12777701376.0, "8795": 12777701376.0, "8800": 12777701376.0, "8805": 12777701376.0, "8810": 12777701376.0, "8815": 12777701376.0, "8820": 12777701376.0, "8825": 12777701376.0, "8830": 12777701376.0, "8835": 12777701376.0, "8840": 12777701376.0, "8845": 12777701376.0, "8850": 12777701376.0, "8855": 12777701376.0, "8860": 12777701376.0, "8865": 12777701376.0, "8870": 12777701376.0, "8875": 12777701376.0, "8880": 12777701376.0, "8885": 12777701376.0, "8890": 12777701376.0, "8895": 12777701376.0, "8900": 12777701376.0, "8905": 12777701376.0, "8910": 12777701376.0, "8915": 12777701376.0, "8920": 12777701376.0, "8925": 12777701376.0, "8930": 12777701376.0, "8935": 12777701376.0, "8940": 12777701376.0, "8945": 12777701376.0, "8950": 12777701376.0, "8955": 12777701376.0, "8960": 12777701376.0, "8965": 12777701376.0, "8970": 12777701376.0, "8975": 12777701376.0, "8980": 12777701376.0, "8985": 12777701376.0, "8990": 12777701376.0, "8995": 12777701376.0, "9000": 12777701376.0, "9005": 12777701376.0, "9010": 12777701376.0, "9015": 12777701376.0, "9020": 12777701376.0, "9025": 12777701376.0, "9030": 12777701376.0, "9035": 12777701376.0, "9040": 12777701376.0, "9045": 12777701376.0, "9050": 12777701376.0, "9055": 12777701376.0, "9060": 12777701376.0, "9065": 12777701376.0, "9070": 12777701376.0, "9075": 12777701376.0, "9080": 12777701376.0, "9085": 12777701376.0, "9090": 12777701376.0, "9095": 12777701376.0, "9100": 12777701376.0, "9105": 12777701376.0, "9110": 12777701376.0, "9115": 12777701376.0, "9120": 12777701376.0, "9125": 12777701376.0, "9130": 12777701376.0, "9135": 12777701376.0, "9140": 12777701376.0, "9145": 12777701376.0, "9150": 12777701376.0, "9155": 12777701376.0, "9160": 12777701376.0, "9165": 12777701376.0, "9170": 12777701376.0, "9175": 12777701376.0, "9180": 12777701376.0, "9185": 12777701376.0, "9190": 12777701376.0, "9195": 12777701376.0, "9200": 12777701376.0, "9205": 12777701376.0, "9210": 12777701376.0, "9215": 12777701376.0, "9220": 12777701376.0, "9225": 12777701376.0, "9230": 12777701376.0, "9235": 12777701376.0, "9240": 12777701376.0, "9245": 12777701376.0, "9250": 12777701376.0, "9255": 12777701376.0, "9260": 12777701376.0, "9265": 12777701376.0, "9270": 12777701376.0, "9275": 12777701376.0, "9280": 12777701376.0, "9285": 12777701376.0, "9290": 12777701376.0, "9295": 12777701376.0, "9300": 12777701376.0, "9305": 12777701376.0, "9310": 12777701376.0, "9315": 12777701376.0, "9320": 12777701376.0, "9325": 12777701376.0, "9330": 12777701376.0, "9335": 12777701376.0, "9340": 12777701376.0, "9345": 12777701376.0, "9350": 12777701376.0, "9355": 12777701376.0, "9360": 12777701376.0, "9365": 12777701376.0, "9370": 12777701376.0, "9375": 12777701376.0, "9380": 12777701376.0, "9385": 12777701376.0, "9390": 12777701376.0, "9395": 12777701376.0, "9400": 12777701376.0, "9405": 12777701376.0, "9410": 12777701376.0, "9415": 12777701376.0, "9420": 12777701376.0, "9425": 12777701376.0, "9430": 12777701376.0, "9435": 12777701376.0, "9440": 12777701376.0, "9445": 12777701376.0, "9450": 12777701376.0, "9455": 12777701376.0, "9460": 12777701376.0, "9465": 12777701376.0, "9470": 12777701376.0, "9475": 12777701376.0, "9480": 12777701376.0, "9485": 12777701376.0, "9490": 12777701376.0, "9495": 12777701376.0, "9500": 12777701376.0, "9505": 12777701376.0, "9510": 12777701376.0, "9515": 12777701376.0, "9520": 12777701376.0, "9525": 12777701376.0, "9530": 12777701376.0, "9535": 12777701376.0, "9540": 12777701376.0, "9545": 12777701376.0, "9550": 12777701376.0, "9555": 12777701376.0, "9560": 12777701376.0, "9565": 12777701376.0, "9570": 12777701376.0, "9575": 12777701376.0, "9580": 12777701376.0, "9585": 12777701376.0, "9590": 12777701376.0, "9595": 12777701376.0, "9600": 12777701376.0, "9605": 12777701376.0, "9610": 12777701376.0, "9615": 12777701376.0, "9620": 12777701376.0, "9625": 12777701376.0, "9630": 12777701376.0, "9635": 12777701376.0, "9640": 12777701376.0, "9645": 12777701376.0, "9650": 12777701376.0, "9655": 12777701376.0, "9660": 12777701376.0, "9665": 12777701376.0, "9670": 12777701376.0, "9675": 12777701376.0, "9680": 12777701376.0, "9685": 12777701376.0, "9690": 12777701376.0, "9695": 12777701376.0, "9700": 12777701376.0, "9705": 12777701376.0, "9710": 12777701376.0, "9715": 12777701376.0, "9720": 12777701376.0, "9725": 12777701376.0, "9730": 12777701376.0, "9735": 12777701376.0, "9740": 12777701376.0, "9745": 12777701376.0, "9750": 12777701376.0, "9755": 12777701376.0, "9760": 12777701376.0, "9765": 12777701376.0, "9770": 12777701376.0, "9775": 12777701376.0, "9780": 12777701376.0, "9785": 12777701376.0, "9790": 12777701376.0, "9795": 12777701376.0, "9800": 12777701376.0, "9805": 12777701376.0, "9810": 12777701376.0, "9815": 12777701376.0, "9820": 12777701376.0, "9825": 12777701376.0, "9830": 12777701376.0, "9835": 12777701376.0, "9840": 12777701376.0, "9845": 12777701376.0, "9850": 12777701376.0, "9855": 12777701376.0, "9860": 12777701376.0, "9865": 12777701376.0, "9870": 12777701376.0, "9875": 12777701376.0, "9880": 12777701376.0, "9885": 12777701376.0, "9890": 12777701376.0, "9895": 12777701376.0, "9900": 12777701376.0, "9905": 12777701376.0, "9910": 12777701376.0, "9915": 12777701376.0, "9920": 12777701376.0, "9925": 12777701376.0, "9930": 12777701376.0, "9935": 12777701376.0, "9940": 12777701376.0, "9945": 12777701376.0, "9950": 12777701376.0, "9955": 12777701376.0, "9960": 12777701376.0, "9965": 12777701376.0, "9970": 12777701376.0, "9975": 12777701376.0, "9980": 12777701376.0, "9985": 12777701376.0, "9990": 12777701376.0, "9995": 12777701376.0, "10000": 12777701376.0, "10005": 12777701376.0, "10010": 12777701376.0, "10015": 12777701376.0, "10020": 12777701376.0, "10025": 12777701376.0, "10030": 12777701376.0, "10035": 12777701376.0, "10040": 12777701376.0, "10045": 12777701376.0, "10050": 12777701376.0, "10055": 12777701376.0, "10060": 12777701376.0, "10065": 12777701376.0, "10070": 12777701376.0, "10075": 12777701376.0, "10080": 12777701376.0, "10085": 12777701376.0, "10090": 12777701376.0, "10095": 12777701376.0, "10100": 12777701376.0, "10105": 12777701376.0, "10110": 12777701376.0, "10115": 12777701376.0, "10120": 12777701376.0, "10125": 12777701376.0, "10130": 12777701376.0, "10135": 12777701376.0, "10140": 12777701376.0, "10145": 12777701376.0, "10150": 12777701376.0, "10155": 12777701376.0, "10160": 12777701376.0, "10165": 12777701376.0, "10170": 12777701376.0, "10175": 12777701376.0, "10180": 12777701376.0, "10185": 12777701376.0, "10190": 12777701376.0, "10195": 12777701376.0, "10200": 12777701376.0, "10205": 12777701376.0, "10210": 12777701376.0, "10215": 12777701376.0, "10220": 12777701376.0, "10225": 12777701376.0, "10230": 12777701376.0, "10235": 12777701376.0, "10240": 12777701376.0, "10245": 12777701376.0, "10250": 12777701376.0, "10255": 12777701376.0, "10260": 12777701376.0, "10265": 12777701376.0, "10270": 12777701376.0, "10275": 12777701376.0, "10280": 12777701376.0, "10285": 12777701376.0, "10290": 12777701376.0, "10295": 12777701376.0, "10300": 12777701376.0, "10305": 12777701376.0, "10310": 12777701376.0, "10315": 12777701376.0, "10320": 12777701376.0, "10325": 12777701376.0, "10330": 12777701376.0, "10335": 12777701376.0, "10340": 12777701376.0, "10345": 12777701376.0, "10350": 12777701376.0, "10355": 12777701376.0, "10360": 12777701376.0, "10365": 12777701376.0, "10370": 12777701376.0, "10375": 12777701376.0, "10380": 12777701376.0, "10385": 12777701376.0, "10390": 12777701376.0, "10395": 12777701376.0, "10400": 12777701376.0, "10405": 12777701376.0, "10410": 12777701376.0, "10415": 12777701376.0, "10420": 12777701376.0, "10425": 12777701376.0, "10430": 12777701376.0, "10435": 12777701376.0, "10440": 12777701376.0, "10445": 12777701376.0, "10450": 12777701376.0, "10455": 12777701376.0, "10460": 12777701376.0, "10465": 12777701376.0, "10470": 12777701376.0, "10475": 12777701376.0, "10480": 12777701376.0, "10485": 12777701376.0, "10490": 12777701376.0, "10495": 12777701376.0, "10500": 12777701376.0, "10505": 12777701376.0, "10510": 12777701376.0, "10515": 12777701376.0, "10520": 12777701376.0, "10525": 12777701376.0, "10530": 12777701376.0, "10535": 12777701376.0, "10540": 12777701376.0, "10545": 12777701376.0, "10550": 12777701376.0, "10555": 12777701376.0, "10560": 12777701376.0, "10565": 12777701376.0, "10570": 12777701376.0, "10575": 12777701376.0, "10580": 12777701376.0, "10585": 12777701376.0, "10590": 12777701376.0, "10595": 12777701376.0, "10600": 12777701376.0, "10605": 12777701376.0, "10610": 12777701376.0, "10615": 12777701376.0, "10620": 12777701376.0, "10625": 12777701376.0, "10630": 12777701376.0, "10635": 12777701376.0, "10640": 12777701376.0, "10645": 12777701376.0, "10650": 12777701376.0, "10655": 12777701376.0, "10660": 12777701376.0, "10665": 12777701376.0, "10670": 12777701376.0, "10675": 12777701376.0, "10680": 12777701376.0, "10685": 12777701376.0, "10690": 12777701376.0, "10695": 12777701376.0, "10700": 12777701376.0, "10705": 12777701376.0, "10710": 12777701376.0, "10715": 12777701376.0, "10720": 12777701376.0, "10725": 12777701376.0, "10730": 12777701376.0, "10735": 12777701376.0, "10740": 12777701376.0, "10745": 12777701376.0, "10750": 12777701376.0, "10755": 12777701376.0, "10760": 12777701376.0, "10765": 12777701376.0, "10770": 12777701376.0, "10775": 12777701376.0, "10780": 12777701376.0, "10785": 12777701376.0, "10790": 12777701376.0, "10795": 12777701376.0, "10800": 12777701376.0, "10805": 12777701376.0, "10810": 12777701376.0, "10815": 12777701376.0, "10820": 12777701376.0, "10825": 12777701376.0, "10830": 12777701376.0, "10835": 12777701376.0, "10840": 12777701376.0, "10845": 12777701376.0, "10850": 12777701376.0, "10855": 12777701376.0, "10860": 12777701376.0, "10865": 12777701376.0, "10870": 12777701376.0, "10875": 12777701376.0, "10880": 12777701376.0, "10885": 12777701376.0, "10890": 12777701376.0, "10895": 12777701376.0, "10900": 12777701376.0, "10905": 12777701376.0, "10910": 12777701376.0, "10915": 12777701376.0, "10920": 12777701376.0, "10925": 12777701376.0, "10930": 12777701376.0, "10935": 12777701376.0, "10940": 12777701376.0, "10945": 12777701376.0, "10950": 12777701376.0, "10955": 12777701376.0, "10960": 12777701376.0, "10965": 12777701376.0, "10970": 12777701376.0, "10975": 12777701376.0, "10980": 12777701376.0, "10985": 12777701376.0, "10990": 12777701376.0, "10995": 12777701376.0, "11000": 12777701376.0, "11005": 12777701376.0, "11010": 12777701376.0, "11015": 12777701376.0, "11020": 12777701376.0, "11025": 12777701376.0, "11030": 12777701376.0, "11035": 12777701376.0, "11040": 12777701376.0, "11045": 12777701376.0, "11050": 12777701376.0, "11055": 12777701376.0, "11060": 12777701376.0, "11065": 12777701376.0, "11070": 12777701376.0, "11075": 12777701376.0, "11080": 12777701376.0, "11085": 12777701376.0, "11090": 12777701376.0, "11095": 12777701376.0, "11100": 12777701376.0, "11105": 12777701376.0, "11110": 12777701376.0, "11115": 12777701376.0, "11120": 12777701376.0, "11125": 12777701376.0, "11130": 12777701376.0, "11135": 12777701376.0, "11140": 12777701376.0, "11145": 12777701376.0, "11150": 12777701376.0, "11155": 12777701376.0, "11160": 12777701376.0, "11165": 12777701376.0, "11170": 12777701376.0, "11175": 12777701376.0, "11180": 12777701376.0, "11185": 12777701376.0, "11190": 12777701376.0, "11195": 12777701376.0, "11200": 12777701376.0, "11205": 12777701376.0, "11210": 12777701376.0, "11215": 12777701376.0, "11220": 12777701376.0, "11225": 12777701376.0, "11230": 12777701376.0, "11235": 12777701376.0, "11240": 12777701376.0, "11245": 12777701376.0, "11250": 12777701376.0, "11255": 12777701376.0, "11260": 12777701376.0, "11265": 12777701376.0, "11270": 12777701376.0, "11275": 12777701376.0, "11280": 12777701376.0, "11285": 12777701376.0, "11290": 12777701376.0, "11295": 12777701376.0, "11300": 12777701376.0, "11305": 12777701376.0, "11310": 12777701376.0, "11315": 12777701376.0, "11320": 12777701376.0, "11325": 12777701376.0, "11330": 12777701376.0, "11335": 12777701376.0, "11340": 12777701376.0, "11345": 12777701376.0, "11350": 12777701376.0, "11355": 12777701376.0, "11360": 12777701376.0, "11365": 12777701376.0, "11370": 12777701376.0, "11375": 12777701376.0, "11380": 12777701376.0, "11385": 12777701376.0, "11390": 12777701376.0, "11395": 12777701376.0, "11400": 12777701376.0, "11405": 12777701376.0, "11410": 12777701376.0, "11415": 12777701376.0, "11420": 12777701376.0, "11425": 12777701376.0, "11430": 12777701376.0, "11435": 12777701376.0, "11440": 12777701376.0, "11445": 12777701376.0, "11450": 12777701376.0, "11455": 12777701376.0, "11460": 12777701376.0, "11465": 12777701376.0, "11470": 12777701376.0, "11475": 12777701376.0, "11480": 12777701376.0, "11485": 12777701376.0, "11490": 12777701376.0, "11495": 12777701376.0, "11500": 12777701376.0, "11505": 12777701376.0, "11510": 12777701376.0, "11515": 12777701376.0, "11520": 12777701376.0, "11525": 12777701376.0, "11530": 12777701376.0, "11535": 12777701376.0, "11540": 12777701376.0, "11545": 12777701376.0, "11550": 12777701376.0, "11555": 12777701376.0, "11560": 12777701376.0, "11565": 12777701376.0, "11570": 12777701376.0, "11575": 12777701376.0, "11580": 12777701376.0, "11585": 12777701376.0, "11590": 12777701376.0, "11595": 12777701376.0, "11600": 12777701376.0, "11605": 12777701376.0, "11610": 12777701376.0, "11615": 12777701376.0, "11620": 12777701376.0, "11625": 12777701376.0, "11630": 12777701376.0, "11635": 12777701376.0, "11640": 12777701376.0, "11645": 12777701376.0, "11650": 12777701376.0, "11655": 12777701376.0, "11660": 12777701376.0, "11665": 12777701376.0, "11670": 12777701376.0, "11675": 12777701376.0, "11680": 12777701376.0, "11685": 12777701376.0, "11690": 12777701376.0, "11695": 12777701376.0, "11700": 12777701376.0, "11705": 12777701376.0, "11710": 12777701376.0, "11715": 12777701376.0, "11720": 12777701376.0, "11725": 12777701376.0, "11730": 12777701376.0, "11735": 12777701376.0, "11740": 12777701376.0, "11745": 12777701376.0, "11750": 12777701376.0, "11755": 12777701376.0, "11760": 12777701376.0, "11765": 12777701376.0, "11770": 12777701376.0, "11775": 12777701376.0, "11780": 12777701376.0, "11785": 12777701376.0, "11790": 12777701376.0, "11795": 12777701376.0, "11800": 12777701376.0, "11805": 12777701376.0, "11810": 12777701376.0, "11815": 12777701376.0, "11820": 12777701376.0, "11825": 12777701376.0, "11830": 12777701376.0, "11835": 12777701376.0, "11840": 12777701376.0, "11845": 12777701376.0, "11850": 12777701376.0, "11855": 12777701376.0, "11860": 12777701376.0, "11865": 12777701376.0, "11870": 12777701376.0, "11875": 12777701376.0, "11880": 12777701376.0, "11885": 12777701376.0, "11890": 12777701376.0, "11895": 12777701376.0, "11900": 12777701376.0, "11905": 12777701376.0, "11910": 12777701376.0, "11915": 12777701376.0, "11920": 12777701376.0, "11925": 12777701376.0, "11930": 12777701376.0, "11935": 12777701376.0, "11940": 12777701376.0, "11945": 12777701376.0, "11950": 12777701376.0, "11955": 12777701376.0, "11960": 12777701376.0, "11965": 12777701376.0, "11970": 12777701376.0, "11975": 12777701376.0, "11980": 12777701376.0, "11985": 12777701376.0, "11990": 12777701376.0, "11995": 12777701376.0, "12000": 12777701376.0, "12005": 12777701376.0, "12010": 12777701376.0, "12015": 12777701376.0, "12020": 12777701376.0, "12025": 12777701376.0, "12030": 12777701376.0, "12035": 12777701376.0, "12040": 12777701376.0, "12045": 12777701376.0, "12050": 12777701376.0, "12055": 12777701376.0, "12060": 12777701376.0, "12065": 12777701376.0, "12070": 12777701376.0, "12075": 12777701376.0, "12080": 12777701376.0, "12085": 12777701376.0, "12090": 12777701376.0, "12095": 12777701376.0, "12100": 12777701376.0, "12105": 12777701376.0, "12110": 12777701376.0, "12115": 12777701376.0, "12120": 12777701376.0, "12125": 12777701376.0, "12130": 12777701376.0, "12135": 12777701376.0, "12140": 12777701376.0, "12145": 12777701376.0, "12150": 12777701376.0, "12155": 12777701376.0, "12160": 12777701376.0, "12165": 12777701376.0, "12170": 12777701376.0, "12175": 12777701376.0, "12180": 12777701376.0, "12185": 12777701376.0, "12190": 12777701376.0, "12195": 12777701376.0, "12200": 12777701376.0, "12205": 12777701376.0, "12210": 12777701376.0, "12215": 12777701376.0, "12220": 12777701376.0, "12225": 12777701376.0, "12230": 12777701376.0, "12235": 12777701376.0, "12240": 12777701376.0, "12245": 12777701376.0, "12250": 12777701376.0, "12255": 12777701376.0, "12260": 12777701376.0, "12265": 12777701376.0, "12270": 12777701376.0, "12275": 12777701376.0, "12280": 12777701376.0, "12285": 12777701376.0, "12290": 12777701376.0, "12295": 12777701376.0, "12300": 12777701376.0, "12305": 12777701376.0, "12310": 12777701376.0, "12315": 12777701376.0, "12320": 12777701376.0, "12325": 12777701376.0, "12330": 12777701376.0, "12335": 12777701376.0, "12340": 12777701376.0, "12345": 12777701376.0, "12350": 12777701376.0, "12355": 12777701376.0, "12360": 12777701376.0, "12365": 12777701376.0, "12370": 12777701376.0, "12375": 12777701376.0, "12380": 12777701376.0, "12385": 12777701376.0, "12390": 12777701376.0, "12395": 12777701376.0, "12400": 12777701376.0, "12405": 12777701376.0, "12410": 12777701376.0, "12415": 12777701376.0, "12420": 12777701376.0, "12425": 12777701376.0, "12430": 12777701376.0, "12435": 12777701376.0, "12440": 12777701376.0, "12445": 12777701376.0, "12450": 12777701376.0, "12455": 12777701376.0, "12460": 12777701376.0, "12465": 12777701376.0, "12470": 12777701376.0, "12475": 12777701376.0, "12480": 12777701376.0, "12485": 12777701376.0, "12490": 12777701376.0, "12495": 12777701376.0, "12500": 12777701376.0, "12505": 12777701376.0, "12510": 12777701376.0, "12515": 12777701376.0, "12520": 12777701376.0, "12525": 12777701376.0, "12530": 12777701376.0, "12535": 12777701376.0, "12540": 12777701376.0, "12545": 12777701376.0, "12550": 12777701376.0, "12555": 12777701376.0, "12560": 12777701376.0, "12565": 12777701376.0, "12570": 12777701376.0, "12575": 12777701376.0, "12580": 12777701376.0, "12585": 12777701376.0, "12590": 12777701376.0, "12595": 12777701376.0, "12600": 12777701376.0, "12605": 12777701376.0, "12610": 12777701376.0, "12615": 12777701376.0, "12620": 12777701376.0, "12625": 12777701376.0, "12630": 12777701376.0, "12635": 12777701376.0, "12640": 12777701376.0, "12645": 12777701376.0, "12650": 12777701376.0, "12655": 12777701376.0, "12660": 12777701376.0, "12665": 12777701376.0, "12670": 12777701376.0, "12675": 12777701376.0, "12680": 12777701376.0, "12685": 12777701376.0, "12690": 12777701376.0, "12695": 12777701376.0, "12700": 12777701376.0, "12705": 12777701376.0, "12710": 12777701376.0, "12715": 12777701376.0, "12720": 12777701376.0, "12725": 12777701376.0, "12730": 12777701376.0, "12735": 12777701376.0, "12740": 12777701376.0, "12745": 12777701376.0, "12750": 12777701376.0, "12755": 12777701376.0, "12760": 12777701376.0, "12765": 12777701376.0, "12770": 12777701376.0, "12775": 12777701376.0, "12780": 12777701376.0, "12785": 12777701376.0, "12790": 12777701376.0, "12795": 12777701376.0, "12800": 12777701376.0, "12805": 12777701376.0, "12810": 12777701376.0, "12815": 12777701376.0, "12820": 12777701376.0, "12825": 12777701376.0, "12830": 12777701376.0, "12835": 12777701376.0, "12840": 12777701376.0, "12845": 12777701376.0, "12850": 12777701376.0, "12855": 12777701376.0, "12860": 12777701376.0, "12865": 12777701376.0, "12870": 12777701376.0, "12875": 12777701376.0, "12880": 12777701376.0, "12885": 12777701376.0, "12890": 12777701376.0, "12895": 12777701376.0, "12900": 12777701376.0, "12905": 12777701376.0, "12910": 12777701376.0, "12915": 12777701376.0, "12920": 12777701376.0, "12925": 12777701376.0, "12930": 12777701376.0, "12935": 12777701376.0, "12940": 12777701376.0, "12945": 12777701376.0, "12950": 12777701376.0, "12955": 12777701376.0, "12960": 12777701376.0, "12965": 12777701376.0, "12970": 12777701376.0, "12975": 12777701376.0, "12980": 12777701376.0, "12985": 12777701376.0, "12990": 12777701376.0, "12995": 12777701376.0, "13000": 12777701376.0, "13005": 12777701376.0, "13010": 12777701376.0, "13015": 12777701376.0, "13020": 12777701376.0, "13025": 12777701376.0, "13030": 12777701376.0, "13035": 12777701376.0, "13040": 12777701376.0, "13045": 12777701376.0, "13050": 12777701376.0, "13055": 12777701376.0, "13060": 12777701376.0, "13065": 12777701376.0, "13070": 12777701376.0, "13075": 12777701376.0, "13080": 12777701376.0, "13085": 12777701376.0, "13090": 12777701376.0, "13095": 12777701376.0, "13100": 12777701376.0, "13105": 12777701376.0, "13110": 12777701376.0, "13115": 12777701376.0, "13120": 12777701376.0, "13125": 12777701376.0, "13130": 12777701376.0, "13135": 12777701376.0, "13140": 12777701376.0, "13145": 12777701376.0, "13150": 12777701376.0, "13155": 12777701376.0, "13160": 12777701376.0, "13165": 12777701376.0, "13170": 12777701376.0, "13175": 12777701376.0, "13180": 12777701376.0, "13185": 12777701376.0, "13190": 12777701376.0, "13195": 12777701376.0, "13200": 12777701376.0, "13205": 12777701376.0, "13210": 12777701376.0, "13215": 12777701376.0, "13220": 12777701376.0, "13225": 12777701376.0, "13230": 12777701376.0, "13235": 12777701376.0, "13240": 12777701376.0, "13245": 12777701376.0, "13250": 12777701376.0, "13255": 12777701376.0, "13260": 12777701376.0, "13265": 12777701376.0, "13270": 12777701376.0, "13275": 12777701376.0, "13280": 12777701376.0, "13285": 12777701376.0, "13290": 12777701376.0, "13295": 12777701376.0, "13300": 12777701376.0, "13305": 12777701376.0, "13310": 12777701376.0, "13315": 12777701376.0, "13320": 12777701376.0, "13325": 12777701376.0, "13330": 12777701376.0, "13335": 12777701376.0, "13340": 12777701376.0, "13345": 12777701376.0, "13350": 12777701376.0, "13355": 12777701376.0, "13360": 12777701376.0, "13365": 12777701376.0, "13370": 12777701376.0, "13375": 12777701376.0, "13380": 12777701376.0, "13385": 12777701376.0, "13390": 12777701376.0, "13395": 12777701376.0, "13400": 12777701376.0, "13405": 12777701376.0, "13410": 12777701376.0, "13415": 12777701376.0, "13420": 12777701376.0, "13425": 12777701376.0, "13430": 12777701376.0, "13435": 12777701376.0, "13440": 12777701376.0, "13445": 12777701376.0, "13450": 12777701376.0, "13455": 12777701376.0, "13460": 12777701376.0, "13465": 12777701376.0, "13470": 12777701376.0, "13475": 12777701376.0, "13480": 12777701376.0, "13485": 12777701376.0, "13490": 12777701376.0, "13495": 12777701376.0, "13500": 12777701376.0, "13505": 12777701376.0, "13510": 12777701376.0, "13515": 12777701376.0, "13520": 12777701376.0, "13525": 12777701376.0, "13530": 12777701376.0, "13535": 12777701376.0, "13540": 12777701376.0, "13545": 12777701376.0, "13550": 12777701376.0, "13555": 12777701376.0, "13560": 12777701376.0, "13565": 12777701376.0, "13570": 12777701376.0, "13575": 12777701376.0, "13580": 12777701376.0, "13585": 12777701376.0, "13590": 12777701376.0, "13595": 12777701376.0, "13600": 12777701376.0, "13605": 12777701376.0, "13610": 12777701376.0, "13615": 12777701376.0, "13620": 12777701376.0, "13625": 12777701376.0, "13630": 12777701376.0, "13635": 12777701376.0, "13640": 12777701376.0, "13645": 12777701376.0, "13650": 12777701376.0, "13655": 12777701376.0, "13660": 12777701376.0, "13665": 12777701376.0, "13670": 12777701376.0, "13675": 12777701376.0, "13680": 12777701376.0, "13685": 12777701376.0, "13690": 12777701376.0, "13695": 12777701376.0, "13700": 12777701376.0, "13705": 12777701376.0, "13710": 12777701376.0, "13715": 12777701376.0, "13720": 12777701376.0, "13725": 12777701376.0, "13730": 12777701376.0, "13735": 12777701376.0, "13740": 12777701376.0, "13745": 12777701376.0, "13750": 12777701376.0, "13755": 12777701376.0, "13760": 12777701376.0, "13765": 12777701376.0, "13770": 12777701376.0, "13775": 12777701376.0, "13780": 12777701376.0, "13785": 12777701376.0, "13790": 12777701376.0, "13795": 12777701376.0, "13800": 12777701376.0, "13805": 12777701376.0, "13810": 12777701376.0, "13815": 12777701376.0, "13820": 12777701376.0, "13825": 12777701376.0, "13830": 12777701376.0, "13835": 12777701376.0, "13840": 12777701376.0, "13845": 12777701376.0, "13850": 12777701376.0, "13855": 12777701376.0, "13860": 12777701376.0, "13865": 12777701376.0, "13870": 12777701376.0, "13875": 12777701376.0, "13880": 12777701376.0, "13885": 12777701376.0, "13890": 12777701376.0, "13895": 12777701376.0, "13900": 12777701376.0, "13905": 12777701376.0, "13910": 12777701376.0, "13915": 12777701376.0, "13920": 12777701376.0, "13925": 12777701376.0, "13930": 12777701376.0, "13935": 12777701376.0, "13940": 12777701376.0, "13945": 12777701376.0, "13950": 12777701376.0, "13955": 12777701376.0, "13960": 12777701376.0, "13965": 12777701376.0, "13970": 12777701376.0, "13975": 12777701376.0, "13980": 12777701376.0, "13985": 12777701376.0, "13990": 12777701376.0, "13995": 12777701376.0, "14000": 12777701376.0, "14005": 12777701376.0, "14010": 12777701376.0, "14015": 12777701376.0, "14020": 12777701376.0, "14025": 12777701376.0, "14030": 12777701376.0, "14035": 12777701376.0, "14040": 12777701376.0, "14045": 12777701376.0, "14050": 12777701376.0, "14055": 12777701376.0, "14060": 12777701376.0, "14065": 12777701376.0, "14070": 12777701376.0, "14075": 12777701376.0, "14080": 12777701376.0, "14085": 12777701376.0, "14090": 12777701376.0, "14095": 12777701376.0, "14100": 12777701376.0, "14105": 12777701376.0, "14110": 12777701376.0, "14115": 12777701376.0, "14120": 12777701376.0, "14125": 12777701376.0, "14130": 12777701376.0, "14135": 12777701376.0, "14140": 12777701376.0, "14145": 12777701376.0, "14150": 12777701376.0, "14155": 12777701376.0, "14160": 12777701376.0, "14165": 12777701376.0, "14170": 12777701376.0, "14175": 12777701376.0, "14180": 12777701376.0, "14185": 12777701376.0, "14190": 12777701376.0, "14195": 12777701376.0, "14200": 12777701376.0, "14205": 12777701376.0, "14210": 12777701376.0, "14215": 12777701376.0, "14220": 12777701376.0, "14225": 12777701376.0, "14230": 12777701376.0, "14235": 12777701376.0, "14240": 12777701376.0, "14245": 12777701376.0, "14250": 12777701376.0, "14255": 12777701376.0, "14260": 12777701376.0, "14265": 12777701376.0, "14270": 12777701376.0, "14275": 12777701376.0, "14280": 12777701376.0, "14285": 12777701376.0, "14290": 12777701376.0, "14295": 12777701376.0, "14300": 12777701376.0, "14305": 12777701376.0, "14310": 12777701376.0, "14315": 12777701376.0, "14320": 12777701376.0, "14325": 12777701376.0, "14330": 12777701376.0, "14335": 12777701376.0, "14340": 12777701376.0, "14345": 12777701376.0, "14350": 12777701376.0, "14355": 12777701376.0, "14360": 12777701376.0, "14365": 12777701376.0, "14370": 12777701376.0, "14375": 12777701376.0, "14380": 12777701376.0, "14385": 12777701376.0, "14390": 12777701376.0, "14395": 12777701376.0, "14400": 12777701376.0, "14405": 12777701376.0, "14410": 12777701376.0, "14415": 12777701376.0, "14420": 12777701376.0, "14425": 12777701376.0, "14430": 12777701376.0, "14435": 12777701376.0, "14440": 12777701376.0, "14445": 12777701376.0, "14450": 12777701376.0, "14455": 12777701376.0, "14460": 12777701376.0, "14465": 12777701376.0, "14470": 12777701376.0, "14475": 12777701376.0, "14480": 12777701376.0, "14485": 12777701376.0, "14490": 12777701376.0, "14495": 12777701376.0, "14500": 12777701376.0, "14505": 12777701376.0, "14510": 12777701376.0, "14515": 12777701376.0, "14520": 12777701376.0, "14525": 12777701376.0, "14530": 12777701376.0, "14535": 12777701376.0, "14540": 12777701376.0, "14545": 12777701376.0, "14550": 12777701376.0, "14555": 12777701376.0, "14560": 12777701376.0, "14565": 12777701376.0, "14570": 12777701376.0, "14575": 12777701376.0, "14580": 12777701376.0, "14585": 12777701376.0, "14590": 12777701376.0, "14595": 12777701376.0, "14600": 12777701376.0, "14605": 12777701376.0, "14610": 12777701376.0, "14615": 12777701376.0, "14620": 12777701376.0, "14625": 12777701376.0, "14630": 12777701376.0, "14635": 12777701376.0, "14640": 12777701376.0, "14645": 12777701376.0, "14650": 12777701376.0, "14655": 12777701376.0, "14660": 12777701376.0, "14665": 12777701376.0, "14670": 12777701376.0, "14675": 12777701376.0, "14680": 12777701376.0, "14685": 12777701376.0, "14690": 12777701376.0, "14695": 12777701376.0, "14700": 12777701376.0, "14705": 12777701376.0, "14710": 12777701376.0, "14715": 12777701376.0, "14720": 12777701376.0, "14725": 12777701376.0, "14730": 12777701376.0, "14735": 12777701376.0, "14740": 12777701376.0, "14745": 12777701376.0, "14750": 12777701376.0, "14755": 12777701376.0, "14760": 12777701376.0, "14765": 12777701376.0, "14770": 12777701376.0, "14775": 12777701376.0, "14780": 12777701376.0, "14785": 12777701376.0, "14790": 12777701376.0, "14795": 12777701376.0, "14800": 12777701376.0, "14805": 12777701376.0, "14810": 12777701376.0, "14815": 12777701376.0, "14820": 12777701376.0, "14825": 12777701376.0, "14830": 12777701376.0, "14835": 12777701376.0, "14840": 12777701376.0, "14845": 12777701376.0, "14850": 12777701376.0, "14855": 12777701376.0, "14860": 12777701376.0, "14865": 12777701376.0, "14870": 12777701376.0, "14875": 12777701376.0, "14880": 12777701376.0, "14885": 12777701376.0, "14890": 12777701376.0, "14895": 12777701376.0, "14900": 12777701376.0, "14905": 12777701376.0, "14910": 12777701376.0, "14915": 12777701376.0, "14920": 12777701376.0, "14925": 12777701376.0, "14930": 12777701376.0, "14935": 12777701376.0, "14940": 12777701376.0, "14945": 12777701376.0, "14950": 12777701376.0, "14955": 12777701376.0, "14960": 12777701376.0, "14965": 12777701376.0, "14970": 12777701376.0, "14975": 12777701376.0, "14980": 12777701376.0, "14985": 12777701376.0, "14990": 12777701376.0, "14995": 12777701376.0, "15000": 12777701376.0, "15005": 12777701376.0, "15010": 12777701376.0, "15015": 12777701376.0, "15020": 12777701376.0, "15025": 12777701376.0, "15030": 12777701376.0, "15035": 12777701376.0, "15040": 12777701376.0, "15045": 12777701376.0, "15050": 12777701376.0, "15055": 12777701376.0, "15060": 12777701376.0, "15065": 12777701376.0, "15070": 12777701376.0, "15075": 12777701376.0, "15080": 12777701376.0, "15085": 12777701376.0, "15090": 12777701376.0, "15095": 12777701376.0, "15100": 12777701376.0, "15105": 12777701376.0, "15110": 12777701376.0, "15115": 12777701376.0, "15120": 12777701376.0, "15125": 12777701376.0, "15130": 12777701376.0, "15135": 12777701376.0, "15140": 12777701376.0, "15145": 12777701376.0, "15150": 12777701376.0, "15155": 12777701376.0, "15160": 12777701376.0, "15165": 12777701376.0, "15170": 12777701376.0, "15175": 12777701376.0, "15180": 12777701376.0, "15185": 12777701376.0, "15190": 12777701376.0, "15195": 12777701376.0, "15200": 12777701376.0, "15205": 12777701376.0, "15210": 12777701376.0, "15215": 12777701376.0, "15220": 12777701376.0, "15225": 12777701376.0, "15230": 12777701376.0, "15235": 12777701376.0, "15240": 12777701376.0, "15245": 12777701376.0, "15250": 12777701376.0, "15255": 12777701376.0, "15260": 12777701376.0, "15265": 12777701376.0, "15270": 12777701376.0, "15275": 12777701376.0, "15280": 12777701376.0, "15285": 12777701376.0, "15290": 12777701376.0, "15295": 12777701376.0, "15300": 12777701376.0, "15305": 12777701376.0, "15310": 12777701376.0, "15315": 12777701376.0, "15320": 12777701376.0, "15325": 12777701376.0, "15330": 12777701376.0, "15335": 12777701376.0, "15340": 12777701376.0, "15345": 12777701376.0, "15350": 12777701376.0, "15355": 12777701376.0, "15360": 12777701376.0, "15365": 12777701376.0, "15370": 12777701376.0, "15375": 12777701376.0, "15380": 12777701376.0, "15385": 12777701376.0, "15390": 12777701376.0, "15395": 12777701376.0, "15400": 12777701376.0, "15405": 12777701376.0, "15410": 12777701376.0, "15415": 12777701376.0, "15420": 12777701376.0, "15425": 12777701376.0, "15430": 12777701376.0, "15435": 12777701376.0, "15440": 12777701376.0, "15445": 12777701376.0, "15450": 12777701376.0, "15455": 12777701376.0, "15460": 12777701376.0, "15465": 12777701376.0, "15470": 12777701376.0, "15475": 12777701376.0, "15480": 12777701376.0, "15485": 12777701376.0, "15490": 12777701376.0, "15495": 12777701376.0, "15500": 12777701376.0, "15505": 12777701376.0, "15510": 12777701376.0, "15515": 12777701376.0, "15520": 12777701376.0, "15525": 12777701376.0, "15530": 12777701376.0, "15535": 12777701376.0, "15540": 12777701376.0, "15545": 12777701376.0, "15550": 12777701376.0, "15555": 12777701376.0, "15560": 12777701376.0, "15565": 12777701376.0, "15570": 12777701376.0, "15575": 12777701376.0, "15580": 12777701376.0, "15585": 12777701376.0, "15590": 12777701376.0, "15595": 12777701376.0, "15600": 12777701376.0, "15605": 12777701376.0, "15610": 12777701376.0, "15615": 12777701376.0, "15620": 12777701376.0, "15625": 12777701376.0, "15630": 12777701376.0, "15635": 12777701376.0, "15640": 12777701376.0, "15645": 12777701376.0, "15650": 12777701376.0, "15655": 12777701376.0, "15660": 12777701376.0, "15665": 12777701376.0, "15670": 12777701376.0, "15675": 12777701376.0, "15680": 12777701376.0, "15685": 12777701376.0, "15690": 12777701376.0, "15695": 12777701376.0, "15700": 12777701376.0, "15705": 12777701376.0, "15710": 12777701376.0, "15715": 12777701376.0, "15720": 12777701376.0, "15725": 12777701376.0, "15730": 12777701376.0, "15735": 12777701376.0, "15740": 12777701376.0, "15745": 12777701376.0, "15750": 12777701376.0, "15755": 12777701376.0, "15760": 12777701376.0, "15765": 12777701376.0, "15770": 12777701376.0, "15775": 12777701376.0, "15780": 12777701376.0, "15785": 12777701376.0, "15790": 12777701376.0, "15795": 12777701376.0, "15800": 12777701376.0, "15805": 12777701376.0, "15810": 12777701376.0, "15815": 12777701376.0, "15820": 12777701376.0, "15825": 12777701376.0, "15830": 12777701376.0, "15835": 12777701376.0, "15840": 12777701376.0, "15845": 12777701376.0, "15850": 12777701376.0, "15855": 12777701376.0, "15860": 12777701376.0, "15865": 12777701376.0, "15870": 12777701376.0, "15875": 12777701376.0, "15880": 12777701376.0, "15885": 12777701376.0, "15890": 12777701376.0, "15895": 12777701376.0, "15900": 12777701376.0, "15905": 12777701376.0, "15910": 12777701376.0, "15915": 12777701376.0, "15920": 12777701376.0, "15925": 12777701376.0, "15930": 12777701376.0, "15935": 12777701376.0, "15940": 12777701376.0, "15945": 12777701376.0, "15950": 12777701376.0, "15955": 12777701376.0, "15960": 12777701376.0, "15965": 12777701376.0, "15970": 12777701376.0, "15975": 12777701376.0, "15980": 12777701376.0, "15985": 12777701376.0, "15990": 12777701376.0, "15995": 12777701376.0, "16000": 12777701376.0, "16005": 12777701376.0, "16010": 12777701376.0, "16015": 12777701376.0, "16020": 12777701376.0, "16025": 12777701376.0, "16030": 12777701376.0, "16035": 12777701376.0, "16040": 12777701376.0, "16045": 12777701376.0, "16050": 12777701376.0, "16055": 12777701376.0, "16060": 12777701376.0, "16065": 12777701376.0, "16070": 12777701376.0, "16075": 12777701376.0, "16080": 12777701376.0, "16085": 12777701376.0, "16090": 12777701376.0, "16095": 12777701376.0, "16100": 12777701376.0, "16105": 12777701376.0, "16110": 12777701376.0, "16115": 12777701376.0, "16120": 12777701376.0, "16125": 12777701376.0, "16130": 12777701376.0, "16135": 12777701376.0, "16140": 12777701376.0, "16145": 12777701376.0, "16150": 12777701376.0, "16155": 12777701376.0, "16160": 12777701376.0, "16165": 12777701376.0, "16170": 12777701376.0, "16175": 12777701376.0, "16180": 12777701376.0, "16185": 12777701376.0, "16190": 12777701376.0, "16195": 12777701376.0, "16200": 12777701376.0, "16205": 12777701376.0, "16210": 12777701376.0, "16215": 12777701376.0, "16220": 12777701376.0, "16225": 12777701376.0, "16230": 12777701376.0, "16235": 12777701376.0, "16240": 12777701376.0, "16245": 12777701376.0, "16250": 12777701376.0, "16255": 12777701376.0, "16260": 12777701376.0, "16265": 12777701376.0, "16270": 12777701376.0, "16275": 12777701376.0, "16280": 12777701376.0, "16285": 12777701376.0, "16290": 12777701376.0, "16295": 12777701376.0, "16300": 12777701376.0, "16305": 12777701376.0, "16310": 12777701376.0, "16315": 12777701376.0, "16320": 12777701376.0, "16325": 12777701376.0, "16330": 12777701376.0, "16335": 12777701376.0, "16340": 12777701376.0, "16345": 12777701376.0, "16350": 12777701376.0, "16355": 12777701376.0, "16360": 12777701376.0, "16365": 12777701376.0, "16370": 12777701376.0, "16375": 12777701376.0, "16380": 12777701376.0, "16385": 12777701376.0, "16390": 12777701376.0, "16395": 12777701376.0, "16400": 12777701376.0, "16405": 12777701376.0, "16410": 12777701376.0, "16415": 12777701376.0, "16420": 12777701376.0, "16425": 12777701376.0, "16430": 12777701376.0, "16435": 12777701376.0, "16440": 12777701376.0, "16445": 12777701376.0, "16450": 12777701376.0, "16455": 12777701376.0, "16460": 12777701376.0, "16465": 12777701376.0, "16470": 12777701376.0, "16475": 12777701376.0, "16480": 12777701376.0, "16485": 12777701376.0, "16490": 12777701376.0, "16495": 12777701376.0, "16500": 12777701376.0, "16505": 12777701376.0, "16510": 12777701376.0, "16515": 12777701376.0, "16520": 12777701376.0, "16525": 12777701376.0, "16530": 12777701376.0, "16535": 12777701376.0, "16540": 12777701376.0, "16545": 12777701376.0, "16550": 12777701376.0, "16555": 12777701376.0, "16560": 12777701376.0, "16565": 12777701376.0, "16570": 12777701376.0, "16575": 12777701376.0, "16580": 12777701376.0, "16585": 12777701376.0, "16590": 12777701376.0, "16595": 12777701376.0, "16600": 12777701376.0, "16605": 12777701376.0, "16610": 12777701376.0, "16615": 12777701376.0, "16620": 12777701376.0, "16625": 12777701376.0, "16630": 12777701376.0, "16635": 12777701376.0, "16640": 12777701376.0, "16645": 12777701376.0, "16650": 12777701376.0, "16655": 12777701376.0, "16660": 12777701376.0, "16665": 12777701376.0, "16670": 12777701376.0, "16675": 12777701376.0, "16680": 12777701376.0, "16685": 12777701376.0, "16690": 12777701376.0, "16695": 12777701376.0, "16700": 12777701376.0, "16705": 12777701376.0, "16710": 12777701376.0, "16715": 12777701376.0, "16720": 12777701376.0, "16725": 12777701376.0, "16730": 12777701376.0, "16735": 12777701376.0, "16740": 12777701376.0, "16745": 12777701376.0, "16750": 12777701376.0, "16755": 12777701376.0, "16760": 12777701376.0, "16765": 12777701376.0, "16770": 12777701376.0, "16775": 12777701376.0, "16780": 12777701376.0, "16785": 12777701376.0, "16790": 12777701376.0, "16795": 12777701376.0, "16800": 12777701376.0, "16805": 12777701376.0, "16810": 12777701376.0, "16815": 12777701376.0, "16820": 12777701376.0, "16825": 12777701376.0, "16830": 12777701376.0, "16835": 12777701376.0, "16840": 12777701376.0, "16845": 12777701376.0, "16850": 12777701376.0, "16855": 12777701376.0, "16860": 12777701376.0, "16865": 12777701376.0, "16870": 12777701376.0, "16875": 12777701376.0, "16880": 12777701376.0, "16885": 12777701376.0, "16890": 12777701376.0, "16895": 12777701376.0, "16900": 12777701376.0, "16905": 12777701376.0, "16910": 12777701376.0, "16915": 12777701376.0, "16920": 12777701376.0, "16925": 12777701376.0, "16930": 12777701376.0, "16935": 12777701376.0, "16940": 12777701376.0, "16945": 12777701376.0, "16950": 12777701376.0, "16955": 12777701376.0, "16960": 12777701376.0, "16965": 12777701376.0, "16970": 12777701376.0, "16975": 12777701376.0, "16980": 12777701376.0, "16985": 12777701376.0, "16990": 12777701376.0, "16995": 12777701376.0, "17000": 12777701376.0, "17005": 12777701376.0, "17010": 12777701376.0, "17015": 12777701376.0, "17020": 12777701376.0, "17025": 12777701376.0, "17030": 12777701376.0, "17035": 12777701376.0, "17040": 12777701376.0, "17045": 12777701376.0, "17050": 12777701376.0, "17055": 12777701376.0, "17060": 12777701376.0, "17065": 12777701376.0, "17070": 12777701376.0, "17075": 12777701376.0, "17080": 12777701376.0, "17085": 12777701376.0, "17090": 12777701376.0, "17095": 12777701376.0, "17100": 12777701376.0, "17105": 12777701376.0, "17110": 12777701376.0, "17115": 12777701376.0, "17120": 12777701376.0, "17125": 12777701376.0, "17130": 12777701376.0, "17135": 12777701376.0, "17140": 12777701376.0, "17145": 12777701376.0, "17150": 12777701376.0, "17155": 12777701376.0, "17160": 12777701376.0, "17165": 12777701376.0, "17170": 12777701376.0, "17175": 12777701376.0, "17180": 12777701376.0, "17185": 12777701376.0, "17190": 12777701376.0, "17195": 12777701376.0, "17200": 12777701376.0, "17205": 12777701376.0, "17210": 12777701376.0, "17215": 12777701376.0, "17220": 12777701376.0, "17225": 12777701376.0, "17230": 12777701376.0, "17235": 12777701376.0, "17240": 12777701376.0, "17245": 12777701376.0, "17250": 12777701376.0, "17255": 12777701376.0, "17260": 12777701376.0, "17265": 12777701376.0, "17270": 12777701376.0, "17275": 12777701376.0, "17280": 12777701376.0, "17285": 12777701376.0, "17290": 12777701376.0, "17295": 12777701376.0, "17300": 12777701376.0, "17305": 12777701376.0, "17310": 12777701376.0, "17315": 12777701376.0, "17320": 12777701376.0, "17325": 12777701376.0, "17330": 12777701376.0, "17335": 12777701376.0, "17340": 12777701376.0, "17345": 12777701376.0, "17350": 12777701376.0, "17355": 12777701376.0, "17360": 12777701376.0, "17365": 12777701376.0, "17370": 12777701376.0, "17375": 12777701376.0, "17380": 12777701376.0, "17385": 12777701376.0, "17390": 12777701376.0, "17395": 12777701376.0, "17400": 12777701376.0, "17405": 12777701376.0, "17410": 12777701376.0, "17415": 12777701376.0, "17420": 12777701376.0, "17425": 12777701376.0, "17430": 12777701376.0, "17435": 12777701376.0, "17440": 12777701376.0, "17445": 12777701376.0, "17450": 12777701376.0, "17455": 12777701376.0, "17460": 12777701376.0, "17465": 12777701376.0, "17470": 12777701376.0, "17475": 12777701376.0, "17480": 12777701376.0, "17485": 12777701376.0, "17490": 12777701376.0, "17495": 12777701376.0, "17500": 12777701376.0, "17505": 12777701376.0, "17510": 12777701376.0, "17515": 12777701376.0, "17520": 12777701376.0, "17525": 12777701376.0, "17530": 12777701376.0, "17535": 12777701376.0, "17540": 12777701376.0, "17545": 12777701376.0, "17550": 12777701376.0, "17555": 12777701376.0, "17560": 12777701376.0, "17565": 12777701376.0, "17570": 12777701376.0, "17575": 12777701376.0, "17580": 12777701376.0, "17585": 12777701376.0, "17590": 12777701376.0, "17595": 12777701376.0, "17600": 12777701376.0, "17605": 12777701376.0, "17610": 12777701376.0, "17615": 12777701376.0, "17620": 12777701376.0, "17625": 12777701376.0, "17630": 12777701376.0, "17635": 12777701376.0, "17640": 12777701376.0, "17645": 12777701376.0, "17650": 12777701376.0, "17655": 12777701376.0, "17660": 12777701376.0, "17665": 12777701376.0, "17670": 12777701376.0, "17675": 12777701376.0, "17680": 12777701376.0, "17685": 12777701376.0, "17690": 12777701376.0, "17695": 12777701376.0, "17700": 12777701376.0, "17705": 12777701376.0, "17710": 12777701376.0, "17715": 12777701376.0, "17720": 12777701376.0, "17725": 12777701376.0, "17730": 12777701376.0, "17735": 12777701376.0, "17740": 12777701376.0, "17745": 12777701376.0, "17750": 12777701376.0, "17755": 12777701376.0, "17760": 12777701376.0, "17765": 12777701376.0, "17770": 12777701376.0, "17775": 12777701376.0, "17780": 12777701376.0, "17785": 12777701376.0, "17790": 12777701376.0, "17795": 12777701376.0, "17800": 12777701376.0, "17805": 12777701376.0, "17810": 12777701376.0, "17815": 12777701376.0, "17820": 12777701376.0, "17825": 12777701376.0, "17830": 12777701376.0, "17835": 12777701376.0, "17840": 12777701376.0, "17845": 12777701376.0, "17850": 12777701376.0, "17855": 12777701376.0, "17860": 12777701376.0, "17865": 12777701376.0, "17870": 12777701376.0, "17875": 12777701376.0, "17880": 12777701376.0, "17885": 12777701376.0, "17890": 12777701376.0, "17895": 12777701376.0, "17900": 12777701376.0, "17905": 12777701376.0, "17910": 12777701376.0, "17915": 12777701376.0, "17920": 12777701376.0, "17925": 12777701376.0, "17930": 12777701376.0, "17935": 12777701376.0, "17940": 12777701376.0, "17945": 12777701376.0, "17950": 12777701376.0, "17955": 12777701376.0, "17960": 12777701376.0, "17965": 12777701376.0, "17970": 12777701376.0, "17975": 12777701376.0, "17980": 12777701376.0, "17985": 12777701376.0, "17990": 12777701376.0, "17995": 12777701376.0, "18000": 12777701376.0, "18005": 12777701376.0, "18010": 12777701376.0, "18015": 12777701376.0, "18020": 12777701376.0, "18025": 12777701376.0, "18030": 12777701376.0, "18035": 12777701376.0, "18040": 12777701376.0, "18045": 12777701376.0, "18050": 12777701376.0, "18055": 12777701376.0, "18060": 12777701376.0, "18065": 12777701376.0, "18070": 12777701376.0, "18075": 12777701376.0, "18080": 12777701376.0, "18085": 12777701376.0, "18090": 12777701376.0, "18095": 12777701376.0, "18100": 12777701376.0, "18105": 12777701376.0, "18110": 12777701376.0, "18115": 12777701376.0, "18120": 12777701376.0, "18125": 12777701376.0, "18130": 12777701376.0, "18135": 12777701376.0, "18140": 12777701376.0, "18145": 12777701376.0, "18150": 12777701376.0, "18155": 12777701376.0, "18160": 12777701376.0, "18165": 12777701376.0, "18170": 12777701376.0, "18175": 12777701376.0, "18180": 12777701376.0, "18185": 12777701376.0, "18190": 12777701376.0, "18195": 12777701376.0, "18200": 12777701376.0, "18205": 12777701376.0, "18210": 12777701376.0, "18215": 12777701376.0, "18220": 12777701376.0, "18225": 12777701376.0, "18230": 12777701376.0, "18235": 12777701376.0, "18240": 12777701376.0, "18245": 12777701376.0, "18250": 12777701376.0, "18255": 12777701376.0, "18260": 12777701376.0, "18265": 12777701376.0, "18270": 12777701376.0, "18275": 12777701376.0, "18280": 12777701376.0, "18285": 12777701376.0, "18290": 12777701376.0, "18295": 12777701376.0, "18300": 12777701376.0, "18305": 12777701376.0, "18310": 12777701376.0, "18315": 12777701376.0, "18320": 12777701376.0, "18325": 12777701376.0, "18330": 12777701376.0, "18335": 12777701376.0, "18340": 12777701376.0, "18345": 12777701376.0, "18350": 12777701376.0, "18355": 12777701376.0, "18360": 12777701376.0, "18365": 12777701376.0, "18370": 12777701376.0, "18375": 12777701376.0, "18380": 12777701376.0, "18385": 12777701376.0, "18390": 12777701376.0, "18395": 12777701376.0, "18400": 12777701376.0, "18405": 12777701376.0, "18410": 12777701376.0, "18415": 12777701376.0, "18420": 12777701376.0, "18425": 12777701376.0, "18430": 12777701376.0, "18435": 12777701376.0, "18440": 12777701376.0, "18445": 12777701376.0, "18450": 12777701376.0, "18455": 12777701376.0, "18460": 12777701376.0, "18465": 12777701376.0, "18470": 12777701376.0, "18475": 12777701376.0, "18480": 12777701376.0, "18485": 12777701376.0, "18490": 12777701376.0, "18495": 12777701376.0, "18500": 12777701376.0, "18505": 12777701376.0, "18510": 12777701376.0, "18515": 12777701376.0, "18520": 12777701376.0, "18525": 12777701376.0, "18530": 12777701376.0, "18535": 12777701376.0, "18540": 12777701376.0, "18545": 12777701376.0, "18550": 12777701376.0, "18555": 12777701376.0, "18560": 12777701376.0, "18565": 12777701376.0, "18570": 12777701376.0, "18575": 12777701376.0, "18580": 12777701376.0, "18585": 12777701376.0, "18590": 12777701376.0, "18595": 12777701376.0, "18600": 12777701376.0, "18605": 12777701376.0, "18610": 12777701376.0, "18615": 12777701376.0, "18620": 12777701376.0, "18625": 12777701376.0, "18630": 12777701376.0, "18635": 12777701376.0, "18640": 12777701376.0, "18645": 12777701376.0, "18650": 12777701376.0, "18655": 12777701376.0, "18660": 12777701376.0, "18665": 12777701376.0, "18670": 12777701376.0, "18675": 12777701376.0, "18680": 12777701376.0, "18685": 12777701376.0, "18690": 12777701376.0, "18695": 12777701376.0, "18700": 12777701376.0, "18705": 12777701376.0, "18710": 12777701376.0, "18715": 12777701376.0, "18720": 12777701376.0, "18725": 12777701376.0, "18730": 12777701376.0, "18735": 12777701376.0, "18740": 12777701376.0, "18745": 12777701376.0, "18750": 12777701376.0, "18755": 12777701376.0, "18760": 12777701376.0, "18765": 12777701376.0, "18770": 12777701376.0, "18775": 12777701376.0, "18780": 12777701376.0, "18785": 12777701376.0, "18790": 12777701376.0, "18795": 12777701376.0, "18800": 12777701376.0, "18805": 12777701376.0, "18810": 12777701376.0, "18815": 12777701376.0, "18820": 12777701376.0, "18825": 12777701376.0, "18830": 12777701376.0, "18835": 12777701376.0, "18840": 12777701376.0, "18845": 12777701376.0, "18850": 12777701376.0, "18855": 12777701376.0, "18860": 12777701376.0, "18865": 12777701376.0, "18870": 12777701376.0, "18875": 12777701376.0, "18880": 12777701376.0, "18885": 12777701376.0, "18890": 12777701376.0, "18895": 12777701376.0, "18900": 12777701376.0, "18905": 12777701376.0, "18910": 12777701376.0, "18915": 12777701376.0, "18920": 12777701376.0, "18925": 12777701376.0, "18930": 12777701376.0, "18935": 12777701376.0, "18940": 12777701376.0, "18945": 12777701376.0, "18950": 12777701376.0, "18955": 12777701376.0, "18960": 12777701376.0, "18965": 12777701376.0, "18970": 12777701376.0, "18975": 12777701376.0, "18980": 12777701376.0, "18985": 12777701376.0, "18990": 12777701376.0, "18995": 12777701376.0, "19000": 12777701376.0, "19005": 12777701376.0, "19010": 12777701376.0, "19015": 12777701376.0, "19020": 12777701376.0, "19025": 12777701376.0, "19030": 12777701376.0, "19035": 12777701376.0, "19040": 12777701376.0, "19045": 12777701376.0, "19050": 12777701376.0, "19055": 12777701376.0, "19060": 12777701376.0, "19065": 12777701376.0, "19070": 12777701376.0, "19075": 12777701376.0, "19080": 12777701376.0, "19085": 12777701376.0, "19090": 12777701376.0, "19095": 12777701376.0, "19100": 12777701376.0, "19105": 12777701376.0, "19110": 12777701376.0, "19115": 12777701376.0, "19120": 12777701376.0, "19125": 12777701376.0, "19130": 12777701376.0, "19135": 12777701376.0, "19140": 12777701376.0, "19145": 12777701376.0, "19150": 12777701376.0, "19155": 12777701376.0, "19160": 12777701376.0, "19165": 12777701376.0, "19170": 12777701376.0, "19175": 12777701376.0, "19180": 12777701376.0, "19185": 12777701376.0, "19190": 12777701376.0, "19195": 12777701376.0, "19200": 12777701376.0, "19205": 12777701376.0, "19210": 12777701376.0, "19215": 12777701376.0, "19220": 12777701376.0, "19225": 12777701376.0, "19230": 12777701376.0, "19235": 12777701376.0, "19240": 12777701376.0, "19245": 12777701376.0, "19250": 12777701376.0, "19255": 12777701376.0, "19260": 12777701376.0, "19265": 12777701376.0, "19270": 12777701376.0, "19275": 12777701376.0, "19280": 12777701376.0, "19285": 12777701376.0, "19290": 12777701376.0, "19295": 12777701376.0, "19300": 12777701376.0, "19305": 12777701376.0, "19310": 12777701376.0, "19315": 12777701376.0, "19320": 12777701376.0, "19325": 12777701376.0, "19330": 12777701376.0, "19335": 12777701376.0, "19340": 12777701376.0, "19345": 12777701376.0, "19350": 12777701376.0, "19355": 12777701376.0, "19360": 12777701376.0, "19365": 12777701376.0, "19370": 12777701376.0, "19375": 12777701376.0, "19380": 12777701376.0, "19385": 12777701376.0, "19390": 12777701376.0, "19395": 12777701376.0, "19400": 12777701376.0, "19405": 12777701376.0, "19410": 12777701376.0, "19415": 12777701376.0, "19420": 12777701376.0, "19425": 12777701376.0, "19430": 12777701376.0, "19435": 12777701376.0, "19440": 12777701376.0, "19445": 12777701376.0, "19450": 12777701376.0, "19455": 12777701376.0, "19460": 12777701376.0, "19465": 12777701376.0, "19470": 12777701376.0, "19475": 12777701376.0, "19480": 12777701376.0, "19485": 12777701376.0, "19490": 12777701376.0, "19495": 12777701376.0, "19500": 12777701376.0, "19505": 12777701376.0, "19510": 12777701376.0, "19515": 12777701376.0, "19520": 12777701376.0, "19525": 12777701376.0, "19530": 12777701376.0, "19535": 12777701376.0, "19540": 12777701376.0, "19545": 12777701376.0, "19550": 12777701376.0, "19555": 12777701376.0, "19560": 12777701376.0, "19565": 12777701376.0, "19570": 12777701376.0, "19575": 12777701376.0, "19580": 12777701376.0, "19585": 12777701376.0, "19590": 12777701376.0, "19595": 12777701376.0, "19600": 12777701376.0, "19605": 12777701376.0, "19610": 12777701376.0, "19615": 12777701376.0, "19620": 12777701376.0, "19625": 12777701376.0, "19630": 12777701376.0, "19635": 12777701376.0, "19640": 12777701376.0, "19645": 12777701376.0, "19650": 12777701376.0, "19655": 12777701376.0, "19660": 12777701376.0, "19665": 12777701376.0, "19670": 12777701376.0, "19675": 12777701376.0, "19680": 12777701376.0, "19685": 12777701376.0, "19690": 12777701376.0, "19695": 12777701376.0, "19700": 12777701376.0, "19705": 12777701376.0, "19710": 12777701376.0, "19715": 12777701376.0, "19720": 12777701376.0, "19725": 12777701376.0, "19730": 12777701376.0, "19735": 12777701376.0, "19740": 12777701376.0, "19745": 12777701376.0, "19750": 12777701376.0, "19755": 12777701376.0, "19760": 12777701376.0, "19765": 12777701376.0, "19770": 12777701376.0, "19775": 12777701376.0, "19780": 12777701376.0, "19785": 12777701376.0, "19790": 12777701376.0, "19795": 12777701376.0, "19800": 12777701376.0, "19805": 12777701376.0, "19810": 12777701376.0, "19815": 12777701376.0, "19820": 12777701376.0, "19825": 12777701376.0, "19830": 12777701376.0, "19835": 12777701376.0, "19840": 12777701376.0, "19845": 12777701376.0, "19850": 12777701376.0, "19855": 12777701376.0, "19860": 12777701376.0, "19865": 12777701376.0, "19870": 12777701376.0, "19875": 12777701376.0, "19880": 12777701376.0, "19885": 12777701376.0, "19890": 12777701376.0, "19895": 12777701376.0, "19900": 12777701376.0, "19905": 12777701376.0, "19910": 12777701376.0, "19915": 12777701376.0, "19920": 12777701376.0, "19925": 12777701376.0, "19930": 12777701376.0, "19935": 12777701376.0, "19940": 12777701376.0, "19945": 12777701376.0, "19950": 12777701376.0, "19955": 12777701376.0, "19960": 12777701376.0, "19965": 12777701376.0, "19970": 12777701376.0, "19975": 12777701376.0, "19980": 12777701376.0, "19985": 12777701376.0, "19990": 12777701376.0, "19995": 12777701376.0, "20000": 12777701376.0, "20005": 12777701376.0, "20010": 12777701376.0, "20015": 12777701376.0, "20020": 12777701376.0, "20025": 12777701376.0, "20030": 12777701376.0, "20035": 12777701376.0, "20040": 12777701376.0, "20045": 12777701376.0, "20050": 12777701376.0, "20055": 12777701376.0, "20060": 12777701376.0, "20065": 12777701376.0, "20070": 12777701376.0, "20075": 12777701376.0, "20080": 12777701376.0, "20085": 12777701376.0, "20090": 12777701376.0, "20095": 12777701376.0, "20100": 12777701376.0, "20105": 12777701376.0, "20110": 12777701376.0, "20115": 12777701376.0, "20120": 12777701376.0, "20125": 12777701376.0, "20130": 12777701376.0, "20135": 12777701376.0, "20140": 12777701376.0, "20145": 12777701376.0, "20150": 12777701376.0, "20155": 12777701376.0, "20160": 12777701376.0, "20165": 12777701376.0, "20170": 12777701376.0, "20175": 12777701376.0, "20180": 12777701376.0, "20185": 12777701376.0, "20190": 12777701376.0, "20195": 12777701376.0, "20200": 12777701376.0, "20205": 12777701376.0, "20210": 12777701376.0, "20215": 12777701376.0, "20220": 12777701376.0, "20225": 12777701376.0, "20230": 12777701376.0, "20235": 12777701376.0, "20240": 12777701376.0, "20245": 12777701376.0, "20250": 12777701376.0, "20255": 12777701376.0, "20260": 12777701376.0, "20265": 12777701376.0, "20270": 12777701376.0, "20275": 12777701376.0, "20280": 12777701376.0, "20285": 12777701376.0, "20290": 12777701376.0, "20295": 12777701376.0, "20300": 12777701376.0, "20305": 12777701376.0, "20310": 12777701376.0, "20315": 12777701376.0, "20320": 12777701376.0, "20325": 12777701376.0, "20330": 12777701376.0, "20335": 12777701376.0, "20340": 12777701376.0, "20345": 12777701376.0, "20350": 12777701376.0, "20355": 12777701376.0, "20360": 12777701376.0, "20365": 12777701376.0, "20370": 12777701376.0, "20375": 12777701376.0, "20380": 12777701376.0, "20385": 12777701376.0, "20390": 12777701376.0, "20395": 12777701376.0, "20400": 12777701376.0, "20405": 12777701376.0, "20410": 12777701376.0, "20415": 12777701376.0, "20420": 12777701376.0, "20425": 12777701376.0, "20430": 12777701376.0, "20435": 12777701376.0, "20440": 12777701376.0, "20445": 12777701376.0, "20450": 12777701376.0, "20455": 12777701376.0, "20460": 12777701376.0, "20465": 12777701376.0, "20470": 12777701376.0, "20475": 12777701376.0, "20480": 12777701376.0, "20485": 12777701376.0, "20490": 12777701376.0, "20495": 12777701376.0, "20500": 12777701376.0, "20505": 12777701376.0, "20510": 12777701376.0, "20515": 12777701376.0, "20520": 12777701376.0, "20525": 12777701376.0, "20530": 12777701376.0, "20535": 12777701376.0, "20540": 12777701376.0, "20545": 12777701376.0, "20550": 12777701376.0, "20555": 12777701376.0, "20560": 12777701376.0, "20565": 12777701376.0, "20570": 12777701376.0, "20575": 12777701376.0, "20580": 12777701376.0, "20585": 12777701376.0, "20590": 12777701376.0, "20595": 12777701376.0, "20600": 12777701376.0, "20605": 12777701376.0, "20610": 12777701376.0, "20615": 12777701376.0, "20620": 12777701376.0, "20625": 12777701376.0, "20630": 12777701376.0, "20635": 12777701376.0, "20640": 12777701376.0, "20645": 12777701376.0, "20650": 12777701376.0, "20655": 12777701376.0, "20660": 12777701376.0, "20665": 12777701376.0, "20670": 12777701376.0, "20675": 12777701376.0, "20680": 12777701376.0, "20685": 12777701376.0, "20690": 12777701376.0, "20695": 12777701376.0, "20700": 12777701376.0, "20705": 12777701376.0, "20710": 12777701376.0, "20715": 12777701376.0, "20720": 12777701376.0, "20725": 12777701376.0, "20730": 12777701376.0, "20735": 12777701376.0, "20740": 12777701376.0, "20745": 12777701376.0, "20750": 12777701376.0, "20755": 12777701376.0, "20760": 12777701376.0, "20765": 12777701376.0, "20770": 12777701376.0, "20775": 12777701376.0, "20780": 12777701376.0, "20785": 12777701376.0, "20790": 12777701376.0, "20795": 12777701376.0, "20800": 12777701376.0, "20805": 12777701376.0, "20810": 12777701376.0, "20815": 12777701376.0, "20820": 12777701376.0, "20825": 12777701376.0, "20830": 12777701376.0, "20835": 12777701376.0, "20840": 12777701376.0, "20845": 12777701376.0, "20850": 12777701376.0, "20855": 12777701376.0, "20860": 12777701376.0, "20865": 12777701376.0, "20870": 12777701376.0, "20875": 12777701376.0, "20880": 12777701376.0, "20885": 12777701376.0, "20890": 12777701376.0, "20895": 12777701376.0, "20900": 12777701376.0, "20905": 12777701376.0, "20910": 12777701376.0, "20915": 12777701376.0, "20920": 12777701376.0, "20925": 12777701376.0, "20930": 12777701376.0, "20935": 12777701376.0, "20940": 12777701376.0, "20945": 12777701376.0, "20950": 12777701376.0, "20955": 12777701376.0, "20960": 12777701376.0, "20965": 12777701376.0, "20970": 12777701376.0, "20975": 12777701376.0, "20980": 12777701376.0, "20985": 12777701376.0, "20990": 12777701376.0, "20995": 12777701376.0, "21000": 12777701376.0, "21005": 12777701376.0, "21010": 12777701376.0, "21015": 12777701376.0, "21020": 12777701376.0, "21025": 12777701376.0, "21030": 12777701376.0, "21035": 12777701376.0, "21040": 12777701376.0, "21045": 12777701376.0, "21050": 12777701376.0, "21055": 12777701376.0, "21060": 12777701376.0, "21065": 12777701376.0, "21070": 12777701376.0, "21075": 12777701376.0, "21080": 12777701376.0, "21085": 12777701376.0, "21090": 12777701376.0, "21095": 12777701376.0, "21100": 12777701376.0, "21105": 12777701376.0, "21110": 12777701376.0, "21115": 12777701376.0, "21120": 12777701376.0, "21125": 12777701376.0, "21130": 12777701376.0, "21135": 12777701376.0, "21140": 12777701376.0, "21145": 12777701376.0, "21150": 12777701376.0, "21155": 12777701376.0, "21160": 12777701376.0, "21165": 12777701376.0, "21170": 12777701376.0, "21175": 12777701376.0, "21180": 12777701376.0, "21185": 12777701376.0, "21190": 12777701376.0, "21195": 12777701376.0, "21200": 12777701376.0, "21205": 12777701376.0, "21210": 12777701376.0, "21215": 12777701376.0, "21220": 12777701376.0, "21225": 12777701376.0, "21230": 12777701376.0, "21235": 12777701376.0, "21240": 12777701376.0, "21245": 12777701376.0, "21250": 12777701376.0, "21255": 12777701376.0, "21260": 12777701376.0, "21265": 12777701376.0, "21270": 12777701376.0, "21275": 12777701376.0, "21280": 12777701376.0, "21285": 12777701376.0, "21290": 12777701376.0, "21295": 12777701376.0, "21300": 12777701376.0, "21305": 12777701376.0, "21310": 12777701376.0, "21315": 12777701376.0, "21320": 12777701376.0, "21325": 12777701376.0, "21330": 12777701376.0, "21335": 12777701376.0, "21340": 12777701376.0, "21345": 12777701376.0, "21350": 12777701376.0, "21355": 12777701376.0, "21360": 12777701376.0, "21365": 12777701376.0, "21370": 12777701376.0, "21375": 12777701376.0, "21380": 12777701376.0, "21385": 12777701376.0, "21390": 12777701376.0, "21395": 12777701376.0, "21400": 12777701376.0, "21405": 12777701376.0, "21410": 12777701376.0, "21415": 12777701376.0, "21420": 12777701376.0, "21425": 12777701376.0, "21430": 12777701376.0, "21435": 12777701376.0, "21440": 12777701376.0, "21445": 12777701376.0, "21450": 12777701376.0, "21455": 12777701376.0, "21460": 12777701376.0, "21465": 12777701376.0, "21470": 12777701376.0, "21475": 12777701376.0, "21480": 12777701376.0, "21485": 12777701376.0, "21490": 12777701376.0, "21495": 12777701376.0, "21500": 12777701376.0, "21505": 12777701376.0, "21510": 12777701376.0, "21515": 12777701376.0, "21520": 12777701376.0, "21525": 12777701376.0, "21530": 12777701376.0, "21535": 12777701376.0, "21540": 12777701376.0, "21545": 12777701376.0, "21550": 12777701376.0, "21555": 12777701376.0, "21560": 12777701376.0, "21565": 12777701376.0, "21570": 12777701376.0, "21575": 12777701376.0, "21580": 12777701376.0, "21585": 12777701376.0, "21590": 12777701376.0, "21595": 12777701376.0, "21600": 12777701376.0, "21605": 12777701376.0, "21610": 12777701376.0, "21615": 12777701376.0, "21620": 12777701376.0, "21625": 12777701376.0, "21630": 12777701376.0, "21635": 12777701376.0, "21640": 12777701376.0, "21645": 12777701376.0, "21650": 12777701376.0, "21655": 12777701376.0, "21660": 12777701376.0, "21665": 12777701376.0, "21670": 12777701376.0, "21675": 12777701376.0, "21680": 12777701376.0, "21685": 12777701376.0, "21690": 12777701376.0, "21695": 12777701376.0, "21700": 12777701376.0, "21705": 12777701376.0, "21710": 12777701376.0, "21715": 12777701376.0, "21720": 12777701376.0, "21725": 12777701376.0, "21730": 12777701376.0, "21735": 12777701376.0, "21740": 12777701376.0, "21745": 12777701376.0, "21750": 12777701376.0, "21755": 12777701376.0, "21760": 12777701376.0, "21765": 12777701376.0, "21770": 12777701376.0, "21775": 12777701376.0, "21780": 12777701376.0, "21785": 12777701376.0, "21790": 12777701376.0, "21795": 12777701376.0, "21800": 12777701376.0, "21805": 12777701376.0, "21810": 12777701376.0, "21815": 12777701376.0, "21820": 12777701376.0, "21825": 12777701376.0, "21830": 12777701376.0, "21835": 12777701376.0, "21840": 12777701376.0, "21845": 12777701376.0, "21850": 12777701376.0, "21855": 12777701376.0, "21860": 12777701376.0, "21865": 12777701376.0, "21870": 12777701376.0, "21875": 12777701376.0, "21880": 12777701376.0, "21885": 12777701376.0, "21890": 12777701376.0, "21895": 12777701376.0, "21900": 12777701376.0, "21905": 12777701376.0, "21910": 12777701376.0, "21915": 12777701376.0, "21920": 12777701376.0, "21925": 12777701376.0, "21930": 12777701376.0, "21935": 12777701376.0, "21940": 12777701376.0, "21945": 12777701376.0, "21950": 12777701376.0, "21955": 12777701376.0, "21960": 12777701376.0, "21965": 12777701376.0, "21970": 12777701376.0, "21975": 12777701376.0, "21980": 12777701376.0, "21985": 12777701376.0, "21990": 12777701376.0, "21995": 12777701376.0, "22000": 12777701376.0, "22005": 12777701376.0, "22010": 12777701376.0, "22015": 12777701376.0, "22020": 12777701376.0, "22025": 12777701376.0, "22030": 12777701376.0, "22035": 12777701376.0, "22040": 12777701376.0, "22045": 12777701376.0, "22050": 12777701376.0, "22055": 12777701376.0, "22060": 12777701376.0, "22065": 12777701376.0, "22070": 12777701376.0, "22075": 12777701376.0, "22080": 12777701376.0, "22085": 12777701376.0, "22090": 12777701376.0, "22095": 12777701376.0, "22100": 12777701376.0, "22105": 12777701376.0, "22110": 12777701376.0, "22115": 12777701376.0, "22120": 12777701376.0, "22125": 12777701376.0, "22130": 12777701376.0, "22135": 12777701376.0, "22140": 12777701376.0, "22145": 12777701376.0, "22150": 12777701376.0, "22155": 12777701376.0, "22160": 12777701376.0, "22165": 12777701376.0, "22170": 12777701376.0, "22175": 12777701376.0, "22180": 12777701376.0, "22185": 12777701376.0, "22190": 12777701376.0, "22195": 12777701376.0, "22200": 12777701376.0, "22205": 12777701376.0, "22210": 12777701376.0, "22215": 12777701376.0, "22220": 12777701376.0, "22225": 12777701376.0, "22230": 12777701376.0, "22235": 12777701376.0, "22240": 12777701376.0, "22245": 12777701376.0, "22250": 12777701376.0, "22255": 12777701376.0, "22260": 12777701376.0, "22265": 12777701376.0, "22270": 12777701376.0, "22275": 12777701376.0, "22280": 12777701376.0, "22285": 12777701376.0, "22290": 12777701376.0, "22295": 12777701376.0, "22300": 12777701376.0, "22305": 12777701376.0, "22310": 12777701376.0, "22315": 12777701376.0, "22320": 12777701376.0, "22325": 12777701376.0, "22330": 12777701376.0, "22335": 12777701376.0, "22340": 12777701376.0, "22345": 12777701376.0, "22350": 12777701376.0, "22355": 12777701376.0, "22360": 12777701376.0, "22365": 12777701376.0, "22370": 12777701376.0, "22375": 12777701376.0, "22380": 12777701376.0, "22385": 12777701376.0, "22390": 12777701376.0, "22395": 12777701376.0, "22400": 12777701376.0, "22405": 12777701376.0, "22410": 12777701376.0, "22415": 12777701376.0, "22420": 12777701376.0, "22425": 12777701376.0, "22430": 12777701376.0, "22435": 12777701376.0, "22440": 12777701376.0, "22445": 12777701376.0, "22450": 12777701376.0, "22455": 12777701376.0, "22460": 12777701376.0, "22465": 12777701376.0, "22470": 12777701376.0, "22475": 12777701376.0, "22480": 12777701376.0, "22485": 12777701376.0, "22490": 12777701376.0, "22495": 12777701376.0, "22500": 12777701376.0, "22505": 12777701376.0, "22510": 12777701376.0, "22515": 12777701376.0, "22520": 12777701376.0, "22525": 12777701376.0, "22530": 12777701376.0, "22535": 12777701376.0, "22540": 12777701376.0, "22545": 12777701376.0, "22550": 12777701376.0, "22555": 12777701376.0, "22560": 12777701376.0, "22565": 12777701376.0, "22570": 12777701376.0, "22575": 12777701376.0, "22580": 12777701376.0, "22585": 12777701376.0, "22590": 12777701376.0, "22595": 12777701376.0, "22600": 12777701376.0, "22605": 12777701376.0, "22610": 12777701376.0, "22615": 12777701376.0, "22620": 12777701376.0, "22625": 12777701376.0, "22630": 12777701376.0, "22635": 12777701376.0, "22640": 12777701376.0, "22645": 12777701376.0, "22650": 12777701376.0, "22655": 12777701376.0, "22660": 12777701376.0, "22665": 12777701376.0, "22670": 12777701376.0, "22675": 12777701376.0, "22680": 12777701376.0, "22685": 12777701376.0, "22690": 12777701376.0, "22695": 12777701376.0, "22700": 12777701376.0, "22705": 12777701376.0, "22710": 12777701376.0, "22715": 12777701376.0, "22720": 12777701376.0, "22725": 12777701376.0, "22730": 12777701376.0, "22735": 12777701376.0, "22740": 12777701376.0, "22745": 12777701376.0, "22750": 12777701376.0, "22755": 12777701376.0, "22760": 12777701376.0, "22765": 12777701376.0, "22770": 12777701376.0, "22775": 12777701376.0, "22780": 12777701376.0, "22785": 12777701376.0, "22790": 12777701376.0, "22795": 12777701376.0, "22800": 12777701376.0, "22805": 12777701376.0, "22810": 12777701376.0, "22815": 12777701376.0, "22820": 12777701376.0, "22825": 12777701376.0, "22830": 12777701376.0, "22835": 12777701376.0, "22840": 12777701376.0, "22845": 12777701376.0, "22850": 12777701376.0, "22855": 12777701376.0, "22860": 12777701376.0, "22865": 12777701376.0, "22870": 12777701376.0, "22875": 12777701376.0, "22880": 12777701376.0, "22885": 12777701376.0, "22890": 12777701376.0, "22895": 12777701376.0, "22900": 12777701376.0, "22905": 12777701376.0, "22910": 12777701376.0, "22915": 12777701376.0, "22920": 12777701376.0, "22925": 12777701376.0, "22930": 12777701376.0, "22935": 12777701376.0, "22940": 12777701376.0, "22945": 12777701376.0, "22950": 12777701376.0, "22955": 12777701376.0, "22960": 12777701376.0, "22965": 12777701376.0, "22970": 12777701376.0, "22975": 12777701376.0, "22980": 12777701376.0, "22985": 12777701376.0, "22990": 12777701376.0, "22995": 12777701376.0, "23000": 12777701376.0, "23005": 12777701376.0, "23010": 12777701376.0, "23015": 12777701376.0, "23020": 12777701376.0, "23025": 12777701376.0, "23030": 12777701376.0, "23035": 12777701376.0, "23040": 12777701376.0, "23045": 12777701376.0, "23050": 12777701376.0, "23055": 12777701376.0, "23060": 12777701376.0, "23065": 12777701376.0, "23070": 12777701376.0, "23075": 12777701376.0, "23080": 12777701376.0, "23085": 12777701376.0, "23090": 12777701376.0, "23095": 12777701376.0, "23100": 12777701376.0, "23105": 12777701376.0, "23110": 12777701376.0, "23115": 12777701376.0, "23120": 12777701376.0, "23125": 12777701376.0, "23130": 12777701376.0, "23135": 12777701376.0, "23140": 12777701376.0, "23145": 12777701376.0, "23150": 12777701376.0, "23155": 12777701376.0, "23160": 12777701376.0, "23165": 12777701376.0, "23170": 12777701376.0, "23175": 12777701376.0, "23180": 12777701376.0, "23185": 12777701376.0, "23190": 12777701376.0, "23195": 12777701376.0, "23200": 12777701376.0, "23205": 12777701376.0, "23210": 12777701376.0, "23215": 12777701376.0, "23220": 12777701376.0, "23225": 12777701376.0, "23230": 12777701376.0, "23235": 12777701376.0, "23240": 12777701376.0, "23245": 12777701376.0, "23250": 12777701376.0, "23255": 12777701376.0, "23260": 12777701376.0, "23265": 12777701376.0, "23270": 12777701376.0, "23275": 12777701376.0, "23280": 12777701376.0, "23285": 12777701376.0, "23290": 12777701376.0, "23295": 12777701376.0, "23300": 12777701376.0, "23305": 12777701376.0, "23310": 12777701376.0, "23315": 12777701376.0, "23320": 12777701376.0, "23325": 12777701376.0, "23330": 12777701376.0, "23335": 12777701376.0, "23340": 12777701376.0, "23345": 12777701376.0, "23350": 12777701376.0, "23355": 12777701376.0, "23360": 12777701376.0, "23365": 12777701376.0, "23370": 12777701376.0, "23375": 12777701376.0, "23380": 12777701376.0, "23385": 12777701376.0, "23390": 12777701376.0, "23395": 12777701376.0, "23400": 12777701376.0, "23405": 12777701376.0, "23410": 12777701376.0, "23415": 12777701376.0, "23420": 12777701376.0, "23425": 12777701376.0, "23430": 12777701376.0, "23435": 12777701376.0, "23440": 12777701376.0, "23445": 12777701376.0, "23450": 12777701376.0, "23455": 12777701376.0, "23460": 12777701376.0, "23465": 12777701376.0, "23470": 12777701376.0, "23475": 12777701376.0, "23480": 12777701376.0, "23485": 12777701376.0, "23490": 12777701376.0, "23495": 12777701376.0, "23500": 12777701376.0, "23505": 12777701376.0, "23510": 12777701376.0, "23515": 12777701376.0, "23520": 12777701376.0, "23525": 12777701376.0, "23530": 12777701376.0, "23535": 12777701376.0, "23540": 12777701376.0, "23545": 12777701376.0, "23550": 12777701376.0, "23555": 12777701376.0, "23560": 12777701376.0, "23565": 12777701376.0, "23570": 12777701376.0, "23575": 12777701376.0, "23580": 12777701376.0, "23585": 12777701376.0, "23590": 12777701376.0, "23595": 12777701376.0, "23600": 12777701376.0, "23605": 12777701376.0, "23610": 12777701376.0, "23615": 12777701376.0, "23620": 12777701376.0, "23625": 12777701376.0, "23630": 12777701376.0, "23635": 12777701376.0, "23640": 12777701376.0, "23645": 12777701376.0, "23650": 12777701376.0, "23655": 12777701376.0, "23660": 12777701376.0, "23665": 12777701376.0, "23670": 12777701376.0, "23675": 12777701376.0, "23680": 12777701376.0, "23685": 12777701376.0, "23690": 12777701376.0, "23695": 12777701376.0, "23700": 12777701376.0, "23705": 12777701376.0, "23710": 12777701376.0, "23715": 12777701376.0, "23720": 12777701376.0, "23725": 12777701376.0, "23730": 12777701376.0, "23735": 12777701376.0, "23740": 12777701376.0, "23745": 12777701376.0, "23750": 12777701376.0, "23755": 12777701376.0, "23760": 12777701376.0, "23765": 12777701376.0, "23770": 12777701376.0, "23775": 12777701376.0, "23780": 12777701376.0, "23785": 12777701376.0, "23790": 12777701376.0, "23795": 12777701376.0, "23800": 12777701376.0, "23805": 12777701376.0, "23810": 12777701376.0, "23815": 12777701376.0, "23820": 12777701376.0, "23825": 12777701376.0, "23830": 12777701376.0, "23835": 12777701376.0, "23840": 12777701376.0, "23845": 12777701376.0, "23850": 12777701376.0, "23855": 12777701376.0, "23860": 12777701376.0, "23865": 12777701376.0, "23870": 12777701376.0, "23875": 12777701376.0, "23880": 12777701376.0, "23885": 12777701376.0, "23890": 12777701376.0, "23895": 12777701376.0, "23900": 12777701376.0, "23905": 12777701376.0, "23910": 12777701376.0, "23915": 12777701376.0, "23920": 12777701376.0, "23925": 12777701376.0, "23930": 12777701376.0, "23935": 12777701376.0, "23940": 12777701376.0, "23945": 12777701376.0, "23950": 12777701376.0, "23955": 12777701376.0, "23960": 12777701376.0, "23965": 12777701376.0, "23970": 12777701376.0, "23975": 12777701376.0, "23980": 12777701376.0, "23985": 12777701376.0, "23990": 12777701376.0, "23995": 12777701376.0, "24000": 12777701376.0, "24005": 12777701376.0, "24010": 12777701376.0, "24015": 12777701376.0, "24020": 12777701376.0, "24025": 12777701376.0, "24030": 12777701376.0, "24035": 12777701376.0, "24040": 12777701376.0, "24045": 12777701376.0, "24050": 12777701376.0, "24055": 12777701376.0, "24060": 12777701376.0, "24065": 12777701376.0, "24070": 12777701376.0, "24075": 12777701376.0, "24080": 12777701376.0, "24085": 12777701376.0, "24090": 12777701376.0, "24095": 12777701376.0, "24100": 12777701376.0, "24105": 12777701376.0, "24110": 12777701376.0, "24115": 12777701376.0, "24120": 12777701376.0, "24125": 12777701376.0, "24130": 12777701376.0, "24135": 12777701376.0, "24140": 12777701376.0, "24145": 12777701376.0, "24150": 12777701376.0, "24155": 12777701376.0, "24160": 12777701376.0, "24165": 12777701376.0, "24170": 12777701376.0, "24175": 12777701376.0, "24180": 12777701376.0, "24185": 12777701376.0, "24190": 12777701376.0, "24195": 12777701376.0, "24200": 12777701376.0, "24205": 12777701376.0, "24210": 12777701376.0, "24215": 12777701376.0, "24220": 12777701376.0, "24225": 12777701376.0, "24230": 12777701376.0, "24235": 12777701376.0, "24240": 12777701376.0, "24245": 12777701376.0, "24250": 12777701376.0, "24255": 12777701376.0, "24260": 12777701376.0, "24265": 12777701376.0, "24270": 12777701376.0, "24275": 12777701376.0, "24280": 12777701376.0, "24285": 12777701376.0, "24290": 12777701376.0, "24295": 12777701376.0, "24300": 12777701376.0, "24305": 12777701376.0, "24310": 12777701376.0, "24315": 12777701376.0, "24320": 12777701376.0, "24325": 12777701376.0, "24330": 12777701376.0, "24335": 12777701376.0, "24340": 12777701376.0, "24345": 12777701376.0, "24350": 12777701376.0, "24355": 12777701376.0, "24360": 12777701376.0, "24365": 12777701376.0, "24370": 12777701376.0, "24375": 12777701376.0, "24380": 12777701376.0, "24385": 12777701376.0, "24390": 12777701376.0, "24395": 12777701376.0, "24400": 12777701376.0, "24405": 12777701376.0, "24410": 12777701376.0, "24415": 12777701376.0, "24420": 12777701376.0, "24425": 12777701376.0, "24430": 12777701376.0, "24435": 12777701376.0, "24440": 12777701376.0, "24445": 12777701376.0, "24450": 12777701376.0, "24455": 12777701376.0, "24460": 12777701376.0, "24465": 12777701376.0, "24470": 12777701376.0, "24475": 12777701376.0, "24480": 12777701376.0, "24485": 12777701376.0, "24490": 12777701376.0, "24495": 12777701376.0, "24500": 12777701376.0, "24505": 12777701376.0, "24510": 12777701376.0, "24515": 12777701376.0, "24520": 12777701376.0, "24525": 12777701376.0, "24530": 12777701376.0, "24535": 12777701376.0, "24540": 12777701376.0, "24545": 12777701376.0, "24550": 12777701376.0, "24555": 12777701376.0, "24560": 12777701376.0, "24565": 12777701376.0, "24570": 12777701376.0, "24575": 12777701376.0, "24580": 12777701376.0, "24585": 12777701376.0, "24590": 12777701376.0, "24595": 12777701376.0, "24600": 12777701376.0, "24605": 12777701376.0, "24610": 12777701376.0, "24615": 12777701376.0, "24620": 12777701376.0, "24625": 12777701376.0, "24630": 12777701376.0, "24635": 12777701376.0, "24640": 12777701376.0, "24645": 12777701376.0, "24650": 12777701376.0, "24655": 12777701376.0, "24660": 12777701376.0, "24665": 12777701376.0, "24670": 12777701376.0, "24675": 12777701376.0, "24680": 12777701376.0, "24685": 12777701376.0, "24690": 12777701376.0, "24695": 12777701376.0, "24700": 12777701376.0, "24705": 12777701376.0, "24710": 12777701376.0, "24715": 12777701376.0, "24720": 12777701376.0, "24725": 12777701376.0, "24730": 12777701376.0, "24735": 12777701376.0, "24740": 12777701376.0, "24745": 12777701376.0, "24750": 12777701376.0, "24755": 12777701376.0, "24760": 12777701376.0, "24765": 12777701376.0, "24770": 12777701376.0, "24775": 12777701376.0, "24780": 12777701376.0, "24785": 12777701376.0, "24790": 12777701376.0, "24795": 12777701376.0, "24800": 12777701376.0, "24805": 12777701376.0, "24810": 12777701376.0, "24815": 12777701376.0, "24820": 12777701376.0, "24825": 12777701376.0, "24830": 12777701376.0, "24835": 12777701376.0, "24840": 12777701376.0, "24845": 12777701376.0, "24850": 12777701376.0, "24855": 12777701376.0, "24860": 12777701376.0, "24865": 12777701376.0, "24870": 12777701376.0, "24875": 12777701376.0, "24880": 12777701376.0, "24885": 12777701376.0, "24890": 12777701376.0, "24895": 12777701376.0, "24900": 12777701376.0, "24905": 12777701376.0, "24910": 12777701376.0, "24915": 12777701376.0, "24920": 12777701376.0, "24925": 12777701376.0, "24930": 12777701376.0, "24935": 12777701376.0, "24940": 12777701376.0, "24945": 12777701376.0, "24950": 12777701376.0, "24955": 12777701376.0, "24960": 12777701376.0, "24965": 12777701376.0, "24970": 12777701376.0, "24975": 12777701376.0, "24980": 12777701376.0, "24985": 12777701376.0, "24990": 12777701376.0, "24995": 12777701376.0, "25000": 12777701376.0, "25005": 12777701376.0, "25010": 12777701376.0, "25015": 12777701376.0, "25020": 12777701376.0, "25025": 12777701376.0, "25030": 12777701376.0, "25035": 12777701376.0, "25040": 12777701376.0, "25045": 12777701376.0, "25050": 12777701376.0, "25055": 12777701376.0, "25060": 12777701376.0, "25065": 12777701376.0, "25070": 12777701376.0, "25075": 12777701376.0, "25080": 12777701376.0, "25085": 12777701376.0, "25090": 12777701376.0, "25095": 12777701376.0, "25100": 12777701376.0, "25105": 12777701376.0, "25110": 12777701376.0, "25115": 12777701376.0, "25120": 12777701376.0, "25125": 12777701376.0, "25130": 12777701376.0, "25135": 12777701376.0, "25140": 12777701376.0, "25145": 12777701376.0, "25150": 12777701376.0, "25155": 12777701376.0, "25160": 12777701376.0, "25165": 12777701376.0, "25170": 12777701376.0, "25175": 12777701376.0, "25180": 12777701376.0, "25185": 12777701376.0, "25190": 12777701376.0, "25195": 12777701376.0, "25200": 12777701376.0, "25205": 12777701376.0, "25210": 12777701376.0, "25215": 12777701376.0, "25220": 12777701376.0, "25225": 12777701376.0, "25230": 12777701376.0, "25235": 12777701376.0, "25240": 12777701376.0, "25245": 12777701376.0, "25250": 12777701376.0, "25255": 12777701376.0, "25260": 12777701376.0, "25265": 12777701376.0, "25270": 12777701376.0, "25275": 12777701376.0, "25280": 12777701376.0, "25285": 12777701376.0, "25290": 12777701376.0, "25295": 12777701376.0, "25300": 12777701376.0, "25305": 12777701376.0, "25310": 12777701376.0, "25315": 12777701376.0, "25320": 12777701376.0, "25325": 12777701376.0, "25330": 12777701376.0, "25335": 12777701376.0, "25340": 12777701376.0, "25345": 12777701376.0, "25350": 12777701376.0, "25355": 12777701376.0, "25360": 12777701376.0, "25365": 12777701376.0, "25370": 12777701376.0, "25375": 12777701376.0, "25380": 12777701376.0, "25385": 12777701376.0, "25390": 12777701376.0, "25395": 12777701376.0, "25400": 12777701376.0, "25405": 12777701376.0, "25410": 12777701376.0, "25415": 12777701376.0, "25420": 12777701376.0, "25425": 12777701376.0, "25430": 12777701376.0, "25435": 12777701376.0, "25440": 12777701376.0, "25445": 12777701376.0, "25450": 12777701376.0, "25455": 12777701376.0, "25460": 12777701376.0, "25465": 12777701376.0, "25470": 12777701376.0, "25475": 12777701376.0, "25480": 12777701376.0, "25485": 12777701376.0, "25490": 12777701376.0, "25495": 12777701376.0, "25500": 12777701376.0, "25505": 12777701376.0, "25510": 12777701376.0, "25515": 12777701376.0, "25520": 12777701376.0, "25525": 12777701376.0, "25530": 12777701376.0, "25535": 12777701376.0, "25540": 12777701376.0, "25545": 12777701376.0, "25550": 12777701376.0, "25555": 12777701376.0, "25560": 12777701376.0, "25565": 12777701376.0, "25570": 12777701376.0, "25575": 12777701376.0, "25580": 12777701376.0, "25585": 12777701376.0, "25590": 12777701376.0, "25595": 12777701376.0, "25600": 12777701376.0, "25605": 12777701376.0, "25610": 12777701376.0, "25615": 12777701376.0, "25620": 12777701376.0, "25625": 12777701376.0, "25630": 12777701376.0, "25635": 12777701376.0, "25640": 12777701376.0, "25645": 12777701376.0, "25650": 12777701376.0, "25655": 12777701376.0, "25660": 12777701376.0, "25665": 12777701376.0, "25670": 12777701376.0, "25675": 12777701376.0, "25680": 12777701376.0, "25685": 12777701376.0, "25690": 12777701376.0, "25695": 12777701376.0, "25700": 12777701376.0, "25705": 12777701376.0, "25710": 12777701376.0, "25715": 12777701376.0, "25720": 12777701376.0, "25725": 12777701376.0, "25730": 12777701376.0, "25735": 12777701376.0, "25740": 12777701376.0, "25745": 12777701376.0, "25750": 12777701376.0, "25755": 12777701376.0, "25760": 12777701376.0, "25765": 12777701376.0, "25770": 12777701376.0, "25775": 12777701376.0, "25780": 12777701376.0, "25785": 12777701376.0, "25790": 12777701376.0, "25795": 12777701376.0, "25800": 12777701376.0, "25805": 12777701376.0, "25810": 12777701376.0, "25815": 12777701376.0, "25820": 12777701376.0, "25825": 12777701376.0, "25830": 12777701376.0, "25835": 12777701376.0, "25840": 12777701376.0, "25845": 12777701376.0, "25850": 12777701376.0, "25855": 12777701376.0, "25860": 12777701376.0, "25865": 12777701376.0, "25870": 12777701376.0, "25875": 12777701376.0, "25880": 12777701376.0, "25885": 12777701376.0, "25890": 12777701376.0, "25895": 12777701376.0, "25900": 12777701376.0, "25905": 12777701376.0, "25910": 12777701376.0, "25915": 12777701376.0, "25920": 12777701376.0, "25925": 12777701376.0, "25930": 12777701376.0, "25935": 12777701376.0, "25940": 12777701376.0, "25945": 12777701376.0, "25950": 12777701376.0, "25955": 12777701376.0, "25960": 12777701376.0, "25965": 12777701376.0, "25970": 12777701376.0, "25975": 12777701376.0, "25980": 12777701376.0, "25985": 12777701376.0, "25990": 12777701376.0, "25995": 12777701376.0, "26000": 12777701376.0, "26005": 12777701376.0, "26010": 12777701376.0, "26015": 12777701376.0, "26020": 12777701376.0, "26025": 12777701376.0, "26030": 12777701376.0, "26035": 12777701376.0, "26040": 12777701376.0, "26045": 12777701376.0, "26050": 12777701376.0, "26055": 12777701376.0, "26060": 12777701376.0, "26065": 12777701376.0, "26070": 12777701376.0, "26075": 12777701376.0, "26080": 12777701376.0, "26085": 12777701376.0, "26090": 12777701376.0, "26095": 12777701376.0, "26100": 12777701376.0, "26105": 12777701376.0, "26110": 12777701376.0, "26115": 12777701376.0, "26120": 12777701376.0, "26125": 12777701376.0, "26130": 12777701376.0, "26135": 12777701376.0, "26140": 12777701376.0, "26145": 12777701376.0, "26150": 12777701376.0, "26155": 12777701376.0, "26160": 12777701376.0, "26165": 12777701376.0, "26170": 12777701376.0, "26175": 12777701376.0, "26180": 12777701376.0, "26185": 12777701376.0, "26190": 12777701376.0, "26195": 12777701376.0, "26200": 12777701376.0, "26205": 12777701376.0, "26210": 12777701376.0, "26215": 12777701376.0, "26220": 12777701376.0, "26225": 12777701376.0, "26230": 12777701376.0, "26235": 12777701376.0, "26240": 12777701376.0, "26245": 12777701376.0, "26250": 12777701376.0, "26255": 12777701376.0, "26260": 12777701376.0, "26265": 12777701376.0, "26270": 12777701376.0, "26275": 12777701376.0, "26280": 12777701376.0, "26285": 12777701376.0, "26290": 12777701376.0, "26295": 12777701376.0, "26300": 12777701376.0, "26305": 12777701376.0, "26310": 12777701376.0, "26315": 12777701376.0, "26320": 12777701376.0, "26325": 12777701376.0, "26330": 12777701376.0, "26335": 12777701376.0, "26340": 12777701376.0, "26345": 12777701376.0, "26350": 12777701376.0, "26355": 12777701376.0, "26360": 12777701376.0, "26365": 12777701376.0, "26370": 12777701376.0, "26375": 12777701376.0, "26380": 12777701376.0, "26385": 12777701376.0, "26390": 12777701376.0, "26395": 12777701376.0, "26400": 12777701376.0, "26405": 12777701376.0, "26410": 12777701376.0, "26415": 12777701376.0, "26420": 12777701376.0, "26425": 12777701376.0, "26430": 12777701376.0, "26435": 12777701376.0, "26440": 12777701376.0, "26445": 12777701376.0, "26450": 12777701376.0, "26455": 12777701376.0, "26460": 12777701376.0, "26465": 12777701376.0, "26470": 12777701376.0, "26475": 12777701376.0, "26480": 12777701376.0, "26485": 12777701376.0, "26490": 12777701376.0, "26495": 12777701376.0, "26500": 12777701376.0, "26505": 12777701376.0, "26510": 12777701376.0, "26515": 12777701376.0, "26520": 12777701376.0, "26525": 12777701376.0, "26530": 12777701376.0, "26535": 12777701376.0, "26540": 12777701376.0, "26545": 12777701376.0, "26550": 12777701376.0, "26555": 12777701376.0, "26560": 12777701376.0, "26565": 12777701376.0, "26570": 12777701376.0, "26575": 12777701376.0, "26580": 12777701376.0, "26585": 12777701376.0, "26590": 12777701376.0, "26595": 12777701376.0, "26600": 12777701376.0, "26605": 12777701376.0, "26610": 12777701376.0, "26615": 12777701376.0, "26620": 12777701376.0, "26625": 12777701376.0, "26630": 12777701376.0, "26635": 12777701376.0, "26640": 12777701376.0, "26645": 12777701376.0, "26650": 12777701376.0, "26655": 12777701376.0, "26660": 12777701376.0, "26665": 12777701376.0, "26670": 12777701376.0, "26675": 12777701376.0, "26680": 12777701376.0, "26685": 12777701376.0, "26690": 12777701376.0, "26695": 12777701376.0, "26700": 12777701376.0, "26705": 12777701376.0, "26710": 12777701376.0, "26715": 12777701376.0, "26720": 12777701376.0, "26725": 12777701376.0, "26730": 12777701376.0, "26735": 12777701376.0, "26740": 12777701376.0, "26745": 12777701376.0, "26750": 12777701376.0, "26755": 12777701376.0, "26760": 12777701376.0, "26765": 12777701376.0, "26770": 12777701376.0, "26775": 12777701376.0, "26780": 12777701376.0, "26785": 12777701376.0, "26790": 12777701376.0, "26795": 12777701376.0, "26800": 12777701376.0, "26805": 12777701376.0, "26810": 12777701376.0, "26815": 12777701376.0, "26820": 12777701376.0, "26825": 12777701376.0, "26830": 12777701376.0, "26835": 12777701376.0, "26840": 12777701376.0, "26845": 12777701376.0, "26850": 12777701376.0, "26855": 12777701376.0, "26860": 12777701376.0, "26865": 12777701376.0, "26870": 12777701376.0, "26875": 12777701376.0, "26880": 12777701376.0, "26885": 12777701376.0, "26890": 12777701376.0, "26895": 12777701376.0, "26900": 12777701376.0, "26905": 12777701376.0, "26910": 12777701376.0, "26915": 12777701376.0, "26920": 12777701376.0, "26925": 12777701376.0, "26930": 12777701376.0, "26935": 12777701376.0, "26940": 12777701376.0, "26945": 12777701376.0, "26950": 12777701376.0, "26955": 12777701376.0, "26960": 12777701376.0, "26965": 12777701376.0, "26970": 12777701376.0, "26975": 12777701376.0, "26980": 12777701376.0, "26985": 12777701376.0, "26990": 12777701376.0, "26995": 12777701376.0, "27000": 12777701376.0, "27005": 12777701376.0, "27010": 12777701376.0, "27015": 12777701376.0, "27020": 12777701376.0, "27025": 12777701376.0, "27030": 12777701376.0, "27035": 12777701376.0, "27040": 12777701376.0, "27045": 12777701376.0, "27050": 12777701376.0, "27055": 12777701376.0, "27060": 12777701376.0, "27065": 12777701376.0, "27070": 12777701376.0, "27075": 12777701376.0, "27080": 12777701376.0, "27085": 12777701376.0, "27090": 12777701376.0, "27095": 12777701376.0, "27100": 12777701376.0, "27105": 12777701376.0, "27110": 12777701376.0, "27115": 12777701376.0, "27120": 12777701376.0, "27125": 12777701376.0, "27130": 12777701376.0, "27135": 12777701376.0, "27140": 12777701376.0, "27145": 12777701376.0, "27150": 12777701376.0, "27155": 12777701376.0, "27160": 12777701376.0, "27165": 12777701376.0, "27170": 12777701376.0, "27175": 12777701376.0, "27180": 12777701376.0, "27185": 12777701376.0, "27190": 12777701376.0, "27195": 12777701376.0, "27200": 12777701376.0, "27205": 12777701376.0, "27210": 12777701376.0, "27215": 12777701376.0, "27220": 12777701376.0, "27225": 12777701376.0, "27230": 12777701376.0, "27235": 12777701376.0, "27240": 12777701376.0, "27245": 12777701376.0, "27250": 12777701376.0, "27255": 12777701376.0, "27260": 12777701376.0, "27265": 12777701376.0, "27270": 12777701376.0, "27275": 12777701376.0, "27280": 12777701376.0, "27285": 12777701376.0, "27290": 12777701376.0, "27295": 12777701376.0, "27300": 12777701376.0, "27305": 12777701376.0, "27310": 12777701376.0, "27315": 12777701376.0, "27320": 12777701376.0, "27325": 12777701376.0, "27330": 12777701376.0, "27335": 12777701376.0, "27340": 12777701376.0, "27345": 12777701376.0, "27350": 12777701376.0, "27355": 12777701376.0, "27360": 12777701376.0, "27365": 12777701376.0, "27370": 12777701376.0, "27375": 12777701376.0, "27380": 12777701376.0, "27385": 12777701376.0, "27390": 12777701376.0, "27395": 12777701376.0, "27400": 12777701376.0, "27405": 12777701376.0, "27410": 12777701376.0, "27415": 12777701376.0, "27420": 12777701376.0, "27425": 12777701376.0, "27430": 12777701376.0, "27435": 12777701376.0, "27440": 12777701376.0, "27445": 12777701376.0, "27450": 12777701376.0, "27455": 12777701376.0, "27460": 12777701376.0, "27465": 12777701376.0, "27470": 12777701376.0, "27475": 12777701376.0, "27480": 12777701376.0, "27485": 12777701376.0, "27490": 12777701376.0, "27495": 12777701376.0, "27500": 12777701376.0, "27505": 12777701376.0, "27510": 12777701376.0, "27515": 12777701376.0, "27520": 12777701376.0, "27525": 12777701376.0, "27530": 12777701376.0, "27535": 12777701376.0, "27540": 12777701376.0, "27545": 12777701376.0, "27550": 12777701376.0, "27555": 12777701376.0, "27560": 12777701376.0, "27565": 12777701376.0, "27570": 12777701376.0, "27575": 12777701376.0, "27580": 12777701376.0, "27585": 12777701376.0, "27590": 12777701376.0, "27595": 12777701376.0, "27600": 12777701376.0, "27605": 12777701376.0, "27610": 12777701376.0, "27615": 12777701376.0, "27620": 12777701376.0, "27625": 12777701376.0, "27630": 12777701376.0, "27635": 12777701376.0, "27640": 12777701376.0, "27645": 12777701376.0, "27650": 12777701376.0, "27655": 12777701376.0, "27660": 12777701376.0, "27665": 12777701376.0, "27670": 12777701376.0, "27675": 12777701376.0, "27680": 12777701376.0, "27685": 12777701376.0, "27690": 12777701376.0, "27695": 12777701376.0, "27700": 12777701376.0, "27705": 12777701376.0, "27710": 12777701376.0, "27715": 12777701376.0, "27720": 12777701376.0, "27725": 12777701376.0, "27730": 12777701376.0, "27735": 12777701376.0, "27740": 12777701376.0, "27745": 12777701376.0, "27750": 12777701376.0, "27755": 12777701376.0, "27760": 12777701376.0, "27765": 12777701376.0, "27770": 12777701376.0, "27775": 12777701376.0, "27780": 12777701376.0, "27785": 12777701376.0, "27790": 12777701376.0, "27795": 12777701376.0, "27800": 12777701376.0, "27805": 12777701376.0, "27810": 12777701376.0, "27815": 12777701376.0, "27820": 12777701376.0, "27825": 12777701376.0, "27830": 12777701376.0, "27835": 12777701376.0, "27840": 12777701376.0, "27845": 12777701376.0, "27850": 12777701376.0, "27855": 12777701376.0, "27860": 12777701376.0, "27865": 12777701376.0, "27870": 12777701376.0, "27875": 12777701376.0, "27880": 12777701376.0, "27885": 12777701376.0, "27890": 12777701376.0, "27895": 12777701376.0, "27900": 12777701376.0, "27905": 12777701376.0, "27910": 12777701376.0, "27915": 12777701376.0, "27920": 12777701376.0, "27925": 12777701376.0, "27930": 12777701376.0, "27935": 12777701376.0, "27940": 12777701376.0, "27945": 12777701376.0, "27950": 12777701376.0, "27955": 12777701376.0, "27960": 12777701376.0, "27965": 12777701376.0, "27970": 12777701376.0, "27975": 12777701376.0, "27980": 12777701376.0, "27985": 12777701376.0, "27990": 12777701376.0, "27995": 12777701376.0, "28000": 12777701376.0, "28005": 12777701376.0, "28010": 12777701376.0, "28015": 12777701376.0, "28020": 12777701376.0, "28025": 12777701376.0, "28030": 12777701376.0, "28035": 12777701376.0, "28040": 12777701376.0, "28045": 12777701376.0, "28050": 12777701376.0, "28055": 12777701376.0, "28060": 12777701376.0, "28065": 12777701376.0, "28070": 12777701376.0, "28075": 12777701376.0, "28080": 12777701376.0, "28085": 12777701376.0, "28090": 12777701376.0, "28095": 12777701376.0, "28100": 12777701376.0, "28105": 12777701376.0, "28110": 12777701376.0, "28115": 12777701376.0, "28120": 12777701376.0, "28125": 12777701376.0, "28130": 12777701376.0, "28135": 12777701376.0, "28140": 12777701376.0, "28145": 12777701376.0, "28150": 12777701376.0, "28155": 12777701376.0, "28160": 12777701376.0, "28165": 12777701376.0, "28170": 12777701376.0, "28175": 12777701376.0, "28180": 12777701376.0, "28185": 12777701376.0, "28190": 12777701376.0, "28195": 12777701376.0, "28200": 12777701376.0, "28205": 12777701376.0, "28210": 12777701376.0, "28215": 12777701376.0, "28220": 12777701376.0, "28225": 12777701376.0, "28230": 12777701376.0, "28235": 12777701376.0, "28240": 12777701376.0, "28245": 12777701376.0, "28250": 12777701376.0, "28255": 12777701376.0, "28260": 12777701376.0, "28265": 12777701376.0, "28270": 12777701376.0, "28275": 12777701376.0, "28280": 12777701376.0, "28285": 12777701376.0, "28290": 12777701376.0, "28295": 12777701376.0, "28300": 12777701376.0, "28305": 12777701376.0, "28310": 12777701376.0, "28315": 12777701376.0, "28320": 12777701376.0, "28325": 12777701376.0, "28330": 12777701376.0, "28335": 12777701376.0, "28340": 12777701376.0, "28345": 12777701376.0, "28350": 12777701376.0, "28355": 12777701376.0, "28360": 12777701376.0, "28365": 12777701376.0, "28370": 12777701376.0, "28375": 12777701376.0, "28380": 12777701376.0, "28385": 12777701376.0, "28390": 12777701376.0, "28395": 12777701376.0, "28400": 12777701376.0, "28405": 12777701376.0, "28410": 12777701376.0, "28415": 12777701376.0, "28420": 12777701376.0, "28425": 12777701376.0, "28430": 12777701376.0, "28435": 12777701376.0, "28440": 12777701376.0, "28445": 12777701376.0, "28450": 12777701376.0, "28455": 12777701376.0, "28460": 12777701376.0, "28465": 12777701376.0, "28470": 12777701376.0, "28475": 12777701376.0, "28480": 12777701376.0, "28485": 12777701376.0, "28490": 12777701376.0, "28495": 12777701376.0, "28500": 12777701376.0, "28505": 12777701376.0, "28510": 12777701376.0, "28515": 12777701376.0, "28520": 12777701376.0, "28525": 12777701376.0, "28530": 12777701376.0, "28535": 12777701376.0, "28540": 12777701376.0, "28545": 12777701376.0, "28550": 12777701376.0, "28555": 12777701376.0, "28560": 12777701376.0, "28565": 12777701376.0, "28570": 12777701376.0, "28575": 12777701376.0, "28580": 12777701376.0, "28585": 12777701376.0, "28590": 12777701376.0, "28595": 12777701376.0, "28600": 12777701376.0, "28605": 12777701376.0, "28610": 12777701376.0, "28615": 12777701376.0, "28620": 12777701376.0, "28625": 12777701376.0, "28630": 12777701376.0, "28635": 12777701376.0, "28640": 12777701376.0, "28645": 12777701376.0, "28650": 12777701376.0, "28655": 12777701376.0, "28660": 12777701376.0, "28665": 12777701376.0, "28670": 12777701376.0, "28675": 12777701376.0, "28680": 12777701376.0, "28685": 12777701376.0, "28690": 12777701376.0, "28695": 12777701376.0, "28700": 12777701376.0, "28705": 12777701376.0, "28710": 12777701376.0, "28715": 12777701376.0, "28720": 12777701376.0, "28725": 12777701376.0, "28730": 12777701376.0, "28735": 12777701376.0, "28740": 12777701376.0, "28745": 12777701376.0, "28750": 12777701376.0, "28755": 12777701376.0, "28760": 12777701376.0, "28765": 12777701376.0, "28770": 12777701376.0, "28775": 12777701376.0, "28780": 12777701376.0, "28785": 12777701376.0, "28790": 12777701376.0, "28795": 12777701376.0, "28800": 12777701376.0, "28805": 12777701376.0, "28810": 12777701376.0, "28815": 12777701376.0, "28820": 12777701376.0, "28825": 12777701376.0, "28830": 12777701376.0, "28835": 12777701376.0, "28840": 12777701376.0, "28845": 12777701376.0, "28850": 12777701376.0, "28855": 12777701376.0, "28860": 12777701376.0, "28865": 12777701376.0, "28870": 12777701376.0, "28875": 12777701376.0, "28880": 12777701376.0, "28885": 12777701376.0, "28890": 12777701376.0, "28895": 12777701376.0, "28900": 12777701376.0, "28905": 12777701376.0, "28910": 12777701376.0, "28915": 12777701376.0, "28920": 12777701376.0, "28925": 12777701376.0, "28930": 12777701376.0, "28935": 12777701376.0, "28940": 12777701376.0, "28945": 12777701376.0, "28950": 12777701376.0, "28955": 12777701376.0, "28960": 12777701376.0, "28965": 12777701376.0, "28970": 12777701376.0, "28975": 12777701376.0, "28980": 12777701376.0, "28985": 12777701376.0, "28990": 12777701376.0, "28995": 12777701376.0, "29000": 12777701376.0, "29005": 12777701376.0, "29010": 12777701376.0, "29015": 12777701376.0, "29020": 12777701376.0, "29025": 12777701376.0, "29030": 12777701376.0, "29035": 12777701376.0, "29040": 12777701376.0, "29045": 12777701376.0, "29050": 12777701376.0, "29055": 12777701376.0, "29060": 12777701376.0, "29065": 12777701376.0, "29070": 12777701376.0, "29075": 12777701376.0, "29080": 12777701376.0, "29085": 12777701376.0, "29090": 12777701376.0, "29095": 12777701376.0, "29100": 12777701376.0, "29105": 12777701376.0, "29110": 12777701376.0, "29115": 12777701376.0, "29120": 12777701376.0, "29125": 12777701376.0, "29130": 12777701376.0, "29135": 12777701376.0, "29140": 12777701376.0, "29145": 12777701376.0, "29150": 12777701376.0, "29155": 12777701376.0, "29160": 12777701376.0, "29165": 12777701376.0, "29170": 12777701376.0, "29175": 12777701376.0, "29180": 12777701376.0, "29185": 12777701376.0, "29190": 12777701376.0, "29195": 12777701376.0, "29200": 12777701376.0, "29205": 12777701376.0, "29210": 12777701376.0, "29215": 12777701376.0, "29220": 12777701376.0, "29225": 12777701376.0, "29230": 12777701376.0, "29235": 12777701376.0, "29240": 12777701376.0, "29245": 12777701376.0, "29250": 12777701376.0, "29255": 12777701376.0, "29260": 12777701376.0, "29265": 12777701376.0, "29270": 12777701376.0, "29275": 12777701376.0, "29280": 12777701376.0, "29285": 12777701376.0, "29290": 12777701376.0, "29295": 12777701376.0, "29300": 12777701376.0, "29305": 12777701376.0, "29310": 12777701376.0, "29315": 12777701376.0, "29320": 12777701376.0, "29325": 12777701376.0, "29330": 12777701376.0, "29335": 12777701376.0, "29340": 12777701376.0, "29345": 12777701376.0, "29350": 12777701376.0, "29355": 12777701376.0, "29360": 12777701376.0, "29365": 12777701376.0, "29370": 12777701376.0, "29375": 12777701376.0, "29380": 12777701376.0, "29385": 12777701376.0, "29390": 12777701376.0, "29395": 12777701376.0, "29400": 12777701376.0, "29405": 12777701376.0, "29410": 12777701376.0, "29415": 12777701376.0, "29420": 12777701376.0, "29425": 12777701376.0, "29430": 12777701376.0, "29435": 12777701376.0, "29440": 12777701376.0, "29445": 12777701376.0, "29450": 12777701376.0, "29455": 12777701376.0, "29460": 12777701376.0, "29465": 12777701376.0, "29470": 12777701376.0, "29475": 12777701376.0, "29480": 12777701376.0, "29485": 12777701376.0, "29490": 12777701376.0, "29495": 12777701376.0, "29500": 12777701376.0, "29505": 12777701376.0, "29510": 12777701376.0, "29515": 12777701376.0, "29520": 12777701376.0, "29525": 12777701376.0, "29530": 12777701376.0, "29535": 12777701376.0, "29540": 12777701376.0, "29545": 12777701376.0, "29550": 12777701376.0, "29555": 12777701376.0, "29560": 12777701376.0, "29565": 12777701376.0, "29570": 12777701376.0, "29575": 12777701376.0, "29580": 12777701376.0, "29585": 12777701376.0, "29590": 12777701376.0, "29595": 12777701376.0, "29600": 12777701376.0, "29605": 12777701376.0, "29610": 12777701376.0, "29615": 12777701376.0, "29620": 12777701376.0, "29625": 12777701376.0, "29630": 12777701376.0, "29635": 12777701376.0, "29640": 12777701376.0, "29645": 12777701376.0, "29650": 12777701376.0, "29655": 12777701376.0, "29660": 12777701376.0, "29665": 12777701376.0, "29670": 12777701376.0, "29675": 12777701376.0, "29680": 12777701376.0, "29685": 12777701376.0, "29690": 12777701376.0, "29695": 12777701376.0, "29700": 12777701376.0, "29705": 12777701376.0, "29710": 12777701376.0, "29715": 12777701376.0, "29720": 12777701376.0, "29725": 12777701376.0, "29730": 12777701376.0, "29735": 12777701376.0, "29740": 12777701376.0, "29745": 12777701376.0, "29750": 12777701376.0, "29755": 12777701376.0, "29760": 12777701376.0, "29765": 12777701376.0, "29770": 12777701376.0, "29775": 12777701376.0, "29780": 12777701376.0, "29785": 12777701376.0, "29790": 12777701376.0, "29795": 12777701376.0, "29800": 12777701376.0, "29805": 12777701376.0, "29810": 12777701376.0, "29815": 12777701376.0, "29820": 12777701376.0, "29825": 12777701376.0, "29830": 12777701376.0, "29835": 12777701376.0, "29840": 12777701376.0, "29845": 12777701376.0, "29850": 12777701376.0, "29855": 12777701376.0, "29860": 12777701376.0, "29865": 12777701376.0, "29870": 12777701376.0, "29875": 12777701376.0, "29880": 12777701376.0, "29885": 12777701376.0, "29890": 12777701376.0, "29895": 12777701376.0, "29900": 12777701376.0, "29905": 12777701376.0, "29910": 12777701376.0, "29915": 12777701376.0, "29920": 12777701376.0, "29925": 12777701376.0, "29930": 12777701376.0, "29935": 12777701376.0, "29940": 12777701376.0, "29945": 12777701376.0, "29950": 12777701376.0, "29955": 12777701376.0, "29960": 12777701376.0, "29965": 12777701376.0, "29970": 12777701376.0, "29975": 12777701376.0, "29980": 12777701376.0, "29985": 12777701376.0, "29990": 12777701376.0, "29995": 12777701376.0, "30000": 12777701376.0, "30005": 12777701376.0, "30010": 12777701376.0, "30015": 12777701376.0, "30020": 12777701376.0, "30025": 12777701376.0, "30030": 12777701376.0, "30035": 12777701376.0, "30040": 12777701376.0, "30045": 12777701376.0, "30050": 12777701376.0, "30055": 12777701376.0, "30060": 12777701376.0, "30065": 12777701376.0, "30070": 12777701376.0, "30075": 12777701376.0, "30080": 12777701376.0, "30085": 12777701376.0, "30090": 12777701376.0, "30095": 12777701376.0, "30100": 12777701376.0, "30105": 12777701376.0, "30110": 12777701376.0, "30115": 12777701376.0, "30120": 12777701376.0, "30125": 12777701376.0, "30130": 12777701376.0, "30135": 12777701376.0, "30140": 12777701376.0, "30145": 12777701376.0, "30150": 12777701376.0, "30155": 12777701376.0, "30160": 12777701376.0, "30165": 12777701376.0, "30170": 12777701376.0, "30175": 12777701376.0, "30180": 12777701376.0, "30185": 12777701376.0, "30190": 12777701376.0, "30195": 12777701376.0, "30200": 12777701376.0, "30205": 12777701376.0, "30210": 12777701376.0, "30215": 12777701376.0, "30220": 12777701376.0, "30225": 12777701376.0, "30230": 12777701376.0, "30235": 12777701376.0, "30240": 12777701376.0, "30245": 12777701376.0, "30250": 12777701376.0, "30255": 12777701376.0, "30260": 12777701376.0, "30265": 12777701376.0, "30270": 12777701376.0, "30275": 12777701376.0, "30280": 12777701376.0, "30285": 12777701376.0, "30290": 12777701376.0, "30295": 12777701376.0, "30300": 12777701376.0, "30305": 12777701376.0, "30310": 12777701376.0, "30315": 12777701376.0, "30320": 12777701376.0, "30325": 12777701376.0, "30330": 12777701376.0, "30335": 12777701376.0, "30340": 12777701376.0, "30345": 12777701376.0, "30350": 12777701376.0, "30355": 12777701376.0, "30360": 12777701376.0, "30365": 12777701376.0, "30370": 12777701376.0, "30375": 12777701376.0, "30380": 12777701376.0, "30385": 12777701376.0, "30390": 12777701376.0, "30395": 12777701376.0, "30400": 12777701376.0, "30405": 12777701376.0, "30410": 12777701376.0, "30415": 12777701376.0, "30420": 12777701376.0, "30425": 12777701376.0, "30430": 12777701376.0, "30435": 12777701376.0, "30440": 12777701376.0, "30445": 12777701376.0, "30450": 12777701376.0, "30455": 12777701376.0, "30460": 12777701376.0, "30465": 12777701376.0, "30470": 12777701376.0, "30475": 12777701376.0, "30480": 12777701376.0, "30485": 12777701376.0, "30490": 12777701376.0, "30495": 12777701376.0, "30500": 12777701376.0, "30505": 12777701376.0, "30510": 12777701376.0, "30515": 12777701376.0, "30520": 12777701376.0, "30525": 12777701376.0, "30530": 12777701376.0, "30535": 12777701376.0, "30540": 12777701376.0, "30545": 12777701376.0, "30550": 12777701376.0, "30555": 12777701376.0, "30560": 12777701376.0, "30565": 12777701376.0, "30570": 12777701376.0, "30575": 12777701376.0, "30580": 12777701376.0, "30585": 12777701376.0, "30590": 12777701376.0, "30595": 12777701376.0, "30600": 12777701376.0, "30605": 12777701376.0, "30610": 12777701376.0, "30615": 12777701376.0, "30620": 12777701376.0, "30625": 12777701376.0, "30630": 12777701376.0, "30635": 12777701376.0, "30640": 12777701376.0, "30645": 12777701376.0, "30650": 12777701376.0, "30655": 12777701376.0, "30660": 12777701376.0, "30665": 12777701376.0, "30670": 12777701376.0, "30675": 12777701376.0, "30680": 12777701376.0, "30685": 12777701376.0, "30690": 12777701376.0, "30695": 12777701376.0, "30700": 12777701376.0, "30705": 12777701376.0, "30710": 12777701376.0, "30715": 12777701376.0, "30720": 12777701376.0, "30725": 12777701376.0, "30730": 12777701376.0, "30735": 12777701376.0, "30740": 12777701376.0, "30745": 12777701376.0, "30750": 12777701376.0, "30755": 12777701376.0, "30760": 12777701376.0, "30765": 12777701376.0, "30770": 12777701376.0, "30775": 12777701376.0, "30780": 12777701376.0, "30785": 12777701376.0, "30790": 12777701376.0, "30795": 12777701376.0, "30800": 12777701376.0, "30805": 12777701376.0, "30810": 12777701376.0, "30815": 12777701376.0, "30820": 12777701376.0, "30825": 12777701376.0, "30830": 12777701376.0, "30835": 12777701376.0, "30840": 12777701376.0, "30845": 12777701376.0, "30850": 12777701376.0, "30855": 12777701376.0, "30860": 12777701376.0, "30865": 12777701376.0, "30870": 12777701376.0, "30875": 12777701376.0, "30880": 12777701376.0, "30885": 12777701376.0, "30890": 12777701376.0, "30895": 12777701376.0, "30900": 12777701376.0, "30905": 12777701376.0, "30910": 12777701376.0, "30915": 12777701376.0, "30920": 12777701376.0, "30925": 12777701376.0, "30930": 12777701376.0, "30935": 12777701376.0, "30940": 12777701376.0, "30945": 12777701376.0, "30950": 12777701376.0, "30955": 12777701376.0, "30960": 12777701376.0, "30965": 12777701376.0, "30970": 12777701376.0, "30975": 12777701376.0, "30980": 12777701376.0, "30985": 12777701376.0, "30990": 12777701376.0, "30995": 12777701376.0, "31000": 12777701376.0, "31005": 12777701376.0, "31010": 12777701376.0, "31015": 12777701376.0, "31020": 12777701376.0, "31025": 12777701376.0, "31030": 12777701376.0, "31035": 12777701376.0, "31040": 12777701376.0, "31045": 12777701376.0, "31050": 12777701376.0, "31055": 12777701376.0, "31060": 12777701376.0, "31065": 12777701376.0, "31070": 12777701376.0, "31075": 12777701376.0, "31080": 12777701376.0, "31085": 12777701376.0, "31090": 12777701376.0, "31095": 12777701376.0, "31100": 12777701376.0, "31105": 12777701376.0, "31110": 12777701376.0, "31115": 12777701376.0, "31120": 12777701376.0, "31125": 12777701376.0, "31130": 12777701376.0, "31135": 12777701376.0, "31140": 12777701376.0, "31145": 12777701376.0, "31150": 12777701376.0, "31155": 12777701376.0, "31160": 12777701376.0, "31165": 12777701376.0, "31170": 12777701376.0, "31175": 12777701376.0, "31180": 12777701376.0, "31185": 12777701376.0, "31190": 12777701376.0, "31195": 12777701376.0, "31200": 12777701376.0, "31205": 12777701376.0, "31210": 12777701376.0, "31215": 12777701376.0, "31220": 12777701376.0, "31225": 12777701376.0, "31230": 12777701376.0, "31235": 12777701376.0, "31240": 12777701376.0, "31245": 12777701376.0, "31250": 12777701376.0, "31255": 12777701376.0, "31260": 12777701376.0, "31265": 12777701376.0, "31270": 12777701376.0, "31275": 12777701376.0, "31280": 12777701376.0, "31285": 12777701376.0, "31290": 12777701376.0, "31295": 12777701376.0, "31300": 12777701376.0, "31305": 12777701376.0, "31310": 12777701376.0, "31315": 12777701376.0, "31320": 12777701376.0, "31325": 12777701376.0, "31330": 12777701376.0, "31335": 12777701376.0, "31340": 12777701376.0, "31345": 12777701376.0, "31350": 12777701376.0, "31355": 12777701376.0, "31360": 12777701376.0, "31365": 12777701376.0, "31370": 12777701376.0, "31375": 12777701376.0, "31380": 12777701376.0, "31385": 12777701376.0, "31390": 12777701376.0, "31395": 12777701376.0, "31400": 12777701376.0, "31405": 12777701376.0, "31410": 12777701376.0, "31415": 12777701376.0, "31420": 12777701376.0, "31425": 12777701376.0, "31430": 12777701376.0, "31435": 12777701376.0, "31440": 12777701376.0, "31445": 12777701376.0, "31450": 12777701376.0, "31455": 12777701376.0, "31460": 12777701376.0, "31465": 12777701376.0, "31470": 12777701376.0, "31475": 12777701376.0, "31480": 12777701376.0, "31485": 12777701376.0, "31490": 12777701376.0, "31495": 12777701376.0, "31500": 12777701376.0, "31505": 12777701376.0, "31510": 12777701376.0, "31515": 12777701376.0, "31520": 12777701376.0, "31525": 12777701376.0, "31530": 12777701376.0, "31535": 12777701376.0, "31540": 12777701376.0, "31545": 12777701376.0, "31550": 12777701376.0, "31555": 12777701376.0, "31560": 12777701376.0, "31565": 12777701376.0, "31570": 12777701376.0, "31575": 12777701376.0, "31580": 12777701376.0, "31585": 12777701376.0, "31590": 12777701376.0, "31595": 12777701376.0, "31600": 12777701376.0, "31605": 12777701376.0, "31610": 12777701376.0, "31615": 12777701376.0, "31620": 12777701376.0, "31625": 12777701376.0, "31630": 12777701376.0, "31635": 12777701376.0, "31640": 12777701376.0, "31645": 12777701376.0, "31650": 12777701376.0, "31655": 12777701376.0, "31660": 12777701376.0, "31665": 12777701376.0, "31670": 12777701376.0, "31675": 12777701376.0, "31680": 12777701376.0, "31685": 12777701376.0, "31690": 12777701376.0, "31695": 12777701376.0, "31700": 12777701376.0, "31705": 12777701376.0, "31710": 12777701376.0, "31715": 12777701376.0, "31720": 12777701376.0, "31725": 12777701376.0, "31730": 12777701376.0, "31735": 12777701376.0, "31740": 12777701376.0, "31745": 12777701376.0, "31750": 12777701376.0, "31755": 12777701376.0, "31760": 12777701376.0, "31765": 12777701376.0, "31770": 12777701376.0, "31775": 12777701376.0, "31780": 12777701376.0, "31785": 12777701376.0, "31790": 12777701376.0, "31795": 12777701376.0, "31800": 12777701376.0, "31805": 12777701376.0, "31810": 12777701376.0, "31815": 12777701376.0, "31820": 12777701376.0, "31825": 12777701376.0, "31830": 12777701376.0, "31835": 12777701376.0, "31840": 12777701376.0, "31845": 12777701376.0, "31850": 12777701376.0, "31855": 12777701376.0, "31860": 12777701376.0, "31865": 12777701376.0, "31870": 12777701376.0, "31875": 12777701376.0, "31880": 12777701376.0, "31885": 12777701376.0, "31890": 12777701376.0, "31895": 12777701376.0, "31900": 12777701376.0, "31905": 12777701376.0, "31910": 12777701376.0, "31915": 12777701376.0, "31920": 12777701376.0, "31925": 12777701376.0, "31930": 12777701376.0, "31935": 12777701376.0, "31940": 12777701376.0, "31945": 12777701376.0, "31950": 12777701376.0, "31955": 12777701376.0, "31960": 12777701376.0, "31965": 12777701376.0, "31970": 12777701376.0, "31975": 12777701376.0, "31980": 12777701376.0, "31985": 12777701376.0, "31990": 12777701376.0, "31995": 12777701376.0, "32000": 12777701376.0, "32005": 12777701376.0, "32010": 12777701376.0, "32015": 12777701376.0, "32020": 12777701376.0, "32025": 12777701376.0, "32030": 12777701376.0, "32035": 12777701376.0, "32040": 12777701376.0, "32045": 12777701376.0, "32050": 12777701376.0, "32055": 12777701376.0, "32060": 12777701376.0, "32065": 12777701376.0, "32070": 12777701376.0, "32075": 12777701376.0, "32080": 12777701376.0, "32085": 12777701376.0, "32090": 12777701376.0, "32095": 12777701376.0, "32100": 12777701376.0, "32105": 12777701376.0, "32110": 12777701376.0, "32115": 12777701376.0, "32120": 12777701376.0, "32125": 12777701376.0, "32130": 12777701376.0, "32135": 12777701376.0, "32140": 12777701376.0, "32145": 12777701376.0, "32150": 12777701376.0, "32155": 12777701376.0, "32160": 12777701376.0, "32165": 12777701376.0, "32170": 12777701376.0, "32175": 12777701376.0, "32180": 12777701376.0, "32185": 12777701376.0, "32190": 12777701376.0, "32195": 12777701376.0, "32200": 12777701376.0, "32205": 12777701376.0, "32210": 12777701376.0, "32215": 12777701376.0, "32220": 12777701376.0, "32225": 12777701376.0, "32230": 12777701376.0, "32235": 12777701376.0, "32240": 12777701376.0, "32245": 12777701376.0, "32250": 12777701376.0, "32255": 12777701376.0, "32260": 12777701376.0, "32265": 12777701376.0, "32270": 12777701376.0, "32275": 12777701376.0, "32280": 12777701376.0, "32285": 12777701376.0, "32290": 12777701376.0, "32295": 12777701376.0, "32300": 12777701376.0, "32305": 12777701376.0, "32310": 12777701376.0, "32315": 12777701376.0, "32320": 12777701376.0, "32325": 12777701376.0, "32330": 12777701376.0, "32335": 12777701376.0, "32340": 12777701376.0, "32345": 12777701376.0, "32350": 12777701376.0, "32355": 12777701376.0, "32360": 12777701376.0, "32365": 12777701376.0, "32370": 12777701376.0, "32375": 12777701376.0, "32380": 12777701376.0, "32385": 12777701376.0, "32390": 12777701376.0, "32395": 12777701376.0, "32400": 12777701376.0, "32405": 12777701376.0, "32410": 12777701376.0, "32415": 12777701376.0, "32420": 12777701376.0, "32425": 12777701376.0, "32430": 12777701376.0, "32435": 12777701376.0, "32440": 12777701376.0, "32445": 12777701376.0, "32450": 12777701376.0, "32455": 12777701376.0, "32460": 12777701376.0, "32465": 12777701376.0, "32470": 12777701376.0, "32475": 12777701376.0, "32480": 12777701376.0, "32485": 12777701376.0, "32490": 12777701376.0, "32495": 12777701376.0, "32500": 12777701376.0, "32505": 12777701376.0, "32510": 12777701376.0, "32515": 12777701376.0, "32520": 12777701376.0, "32525": 12777701376.0, "32530": 12777701376.0, "32535": 12777701376.0, "32540": 12777701376.0, "32545": 12777701376.0, "32550": 12777701376.0, "32555": 12777701376.0, "32560": 12777701376.0, "32565": 12777701376.0, "32570": 12777701376.0, "32575": 12777701376.0, "32580": 12777701376.0, "32585": 12777701376.0, "32590": 12777701376.0, "32595": 12777701376.0, "32600": 12777701376.0, "32605": 12777701376.0, "32610": 12777701376.0, "32615": 12777701376.0, "32620": 12777701376.0, "32625": 12777701376.0, "32630": 12777701376.0, "32635": 12777701376.0, "32640": 12777701376.0, "32645": 12777701376.0, "32650": 12777701376.0, "32655": 12777701376.0, "32660": 12777701376.0, "32665": 12777701376.0, "32670": 12777701376.0, "32675": 12777701376.0, "32680": 12777701376.0, "32685": 12777701376.0, "32690": 12777701376.0, "32695": 12777701376.0, "32700": 12777701376.0, "32705": 12777701376.0, "32710": 12777701376.0, "32715": 12777701376.0, "32720": 12777701376.0, "32725": 12777701376.0, "32730": 12777701376.0, "32735": 12777701376.0, "32740": 12777701376.0, "32745": 12777701376.0, "32750": 12777701376.0, "32755": 12777701376.0, "32760": 12777701376.0, "32765": 12777701376.0, "32770": 12777701376.0, "32775": 12777701376.0, "32780": 12777701376.0, "32785": 12777701376.0, "32790": 12777701376.0, "32795": 12777701376.0, "32800": 12777701376.0, "32805": 12777701376.0, "32810": 12777701376.0, "32815": 12777701376.0, "32820": 12777701376.0, "32825": 12777701376.0, "32830": 12777701376.0, "32835": 12777701376.0, "32840": 12777701376.0, "32845": 12777701376.0, "32850": 12777701376.0, "32855": 12777701376.0, "32860": 12777701376.0, "32865": 12777701376.0, "32870": 12777701376.0, "32875": 12777701376.0, "32880": 12777701376.0, "32885": 12777701376.0, "32890": 12777701376.0, "32895": 12777701376.0, "32900": 12777701376.0, "32905": 12777701376.0, "32910": 12777701376.0, "32915": 12777701376.0, "32920": 12777701376.0, "32925": 12777701376.0, "32930": 12777701376.0, "32935": 12777701376.0, "32940": 12777701376.0, "32945": 12777701376.0, "32950": 12777701376.0, "32955": 12777701376.0, "32960": 12777701376.0, "32965": 12777701376.0, "32970": 12777701376.0, "32975": 12777701376.0, "32980": 12777701376.0, "32985": 12777701376.0, "32990": 12777701376.0, "32995": 12777701376.0, "33000": 12777701376.0, "33005": 12777701376.0, "33010": 12777701376.0, "33015": 12777701376.0, "33020": 12777701376.0, "33025": 12777701376.0, "33030": 12777701376.0, "33035": 12777701376.0, "33040": 12777701376.0, "33045": 12777701376.0, "33050": 12777701376.0, "33055": 12777701376.0, "33060": 12777701376.0, "33065": 12777701376.0, "33070": 12777701376.0, "33075": 12777701376.0, "33080": 12777701376.0, "33085": 12777701376.0, "33090": 12777701376.0, "33095": 12777701376.0, "33100": 12777701376.0, "33105": 12777701376.0, "33110": 12777701376.0, "33115": 12777701376.0, "33120": 12777701376.0, "33125": 12777701376.0, "33130": 12777701376.0, "33135": 12777701376.0, "33140": 12777701376.0, "33145": 12777701376.0, "33150": 12777701376.0, "33155": 12777701376.0, "33160": 12777701376.0, "33165": 12777701376.0, "33170": 12777701376.0, "33175": 12777701376.0, "33180": 12777701376.0, "33185": 12777701376.0, "33190": 12777701376.0, "33195": 12777701376.0, "33200": 12777701376.0, "33205": 12777701376.0, "33210": 12777701376.0, "33215": 12777701376.0, "33220": 12777701376.0, "33225": 12777701376.0, "33230": 12777701376.0, "33235": 12777701376.0, "33240": 12777701376.0, "33245": 12777701376.0, "33250": 12777701376.0, "33255": 12777701376.0, "33260": 12777701376.0, "33265": 12777701376.0, "33270": 12777701376.0, "33275": 12777701376.0, "33280": 12777701376.0, "33285": 12777701376.0, "33290": 12777701376.0, "33295": 12777701376.0, "33300": 12777701376.0, "33305": 12777701376.0, "33310": 12777701376.0, "33315": 12777701376.0, "33320": 12777701376.0, "33325": 12777701376.0, "33330": 12777701376.0, "33335": 12777701376.0, "33340": 12777701376.0, "33345": 12777701376.0, "33350": 12777701376.0, "33355": 12777701376.0, "33360": 12777701376.0, "33365": 12777701376.0, "33370": 12777701376.0, "33375": 12777701376.0, "33380": 12777701376.0, "33385": 12777701376.0, "33390": 12777701376.0, "33395": 12777701376.0, "33400": 12777701376.0, "33405": 12777701376.0, "33410": 12777701376.0, "33415": 12777701376.0, "33420": 12777701376.0, "33425": 12777701376.0, "33430": 12777701376.0, "33435": 12777701376.0, "33440": 12777701376.0, "33445": 12777701376.0, "33450": 12777701376.0, "33455": 12777701376.0, "33460": 12777701376.0, "33465": 12777701376.0, "33470": 12777701376.0, "33475": 12777701376.0, "33480": 12777701376.0, "33485": 12777701376.0, "33490": 12777701376.0, "33495": 12777701376.0, "33500": 12777701376.0, "33505": 12777701376.0, "33510": 12777701376.0, "33515": 12777701376.0, "33520": 12777701376.0, "33525": 12777701376.0, "33530": 12777701376.0, "33535": 12777701376.0, "33540": 12777701376.0, "33545": 12777701376.0, "33550": 12777701376.0, "33555": 12777701376.0, "33560": 12777701376.0, "33565": 12777701376.0, "33570": 12777701376.0, "33575": 12777701376.0, "33580": 12777701376.0, "33585": 12777701376.0, "33590": 12777701376.0, "33595": 12777701376.0, "33600": 12777701376.0, "33605": 12777701376.0, "33610": 12777701376.0, "33615": 12777701376.0, "33620": 12777701376.0, "33625": 12777701376.0, "33630": 12777701376.0, "33635": 12777701376.0, "33640": 12777701376.0, "33645": 12777701376.0, "33650": 12777701376.0, "33655": 12777701376.0, "33660": 12777701376.0, "33665": 12777701376.0, "33670": 12777701376.0, "33675": 12777701376.0, "33680": 12777701376.0, "33685": 12777701376.0, "33690": 12777701376.0, "33695": 12777701376.0, "33700": 12777701376.0, "33705": 12777701376.0, "33710": 12777701376.0, "33715": 12777701376.0, "33720": 12777701376.0, "33725": 12777701376.0, "33730": 12777701376.0, "33735": 12777701376.0, "33740": 12777701376.0, "33745": 12777701376.0, "33750": 12777701376.0, "33755": 12777701376.0, "33760": 12777701376.0, "33765": 12777701376.0, "33770": 12777701376.0, "33775": 12777701376.0, "33780": 12777701376.0, "33785": 12777701376.0, "33790": 12777701376.0, "33795": 12777701376.0, "33800": 12777701376.0, "33805": 12777701376.0, "33810": 12777701376.0, "33815": 12777701376.0, "33820": 12777701376.0, "33825": 12777701376.0, "33830": 12777701376.0, "33835": 12777701376.0, "33840": 12777701376.0, "33845": 12777701376.0, "33850": 12777701376.0, "33855": 12777701376.0, "33860": 12777701376.0, "33865": 12777701376.0, "33870": 12777701376.0, "33875": 12777701376.0, "33880": 12777701376.0, "33885": 12777701376.0, "33890": 12777701376.0, "33895": 12777701376.0, "33900": 12777701376.0, "33905": 12777701376.0, "33910": 12777701376.0, "33915": 12777701376.0, "33920": 12777701376.0, "33925": 12777701376.0, "33930": 12777701376.0, "33935": 12777701376.0, "33940": 12777701376.0, "33945": 12777701376.0, "33950": 12777701376.0, "33955": 12777701376.0, "33960": 12777701376.0, "33965": 12777701376.0, "33970": 12777701376.0, "33975": 12777701376.0, "33980": 12777701376.0, "33985": 12777701376.0, "33990": 12777701376.0, "33995": 12777701376.0, "34000": 12777701376.0, "34005": 12777701376.0, "34010": 12777701376.0, "34015": 12777701376.0, "34020": 12777701376.0, "34025": 12777701376.0, "34030": 12777701376.0, "34035": 12777701376.0, "34040": 12777701376.0, "34045": 12777701376.0, "34050": 12777701376.0, "34055": 12777701376.0, "34060": 12777701376.0, "34065": 12777701376.0, "34070": 12777701376.0, "34075": 12777701376.0, "34080": 12777701376.0, "34085": 12777701376.0, "34090": 12777701376.0, "34095": 12777701376.0, "34100": 12777701376.0, "34105": 12777701376.0, "34110": 12777701376.0, "34115": 12777701376.0, "34120": 12777701376.0, "34125": 12777701376.0, "34130": 12777701376.0, "34135": 12777701376.0, "34140": 12777701376.0, "34145": 12777701376.0, "34150": 12777701376.0, "34155": 12777701376.0, "34160": 12777701376.0, "34165": 12777701376.0, "34170": 12777701376.0, "34175": 12777701376.0, "34180": 12777701376.0, "34185": 12777701376.0, "34190": 12777701376.0, "34195": 12777701376.0, "34200": 12777701376.0, "34205": 12777701376.0, "34210": 12777701376.0, "34215": 12777701376.0, "34220": 12777701376.0, "34225": 12777701376.0, "34230": 12777701376.0, "34235": 12777701376.0, "34240": 12777701376.0, "34245": 12777701376.0, "34250": 12777701376.0, "34255": 12777701376.0, "34260": 12777701376.0, "34265": 12777701376.0, "34270": 12777701376.0, "34275": 12777701376.0, "34280": 12777701376.0, "34285": 12777701376.0, "34290": 12777701376.0, "34295": 12777701376.0, "34300": 12777701376.0, "34305": 12777701376.0, "34310": 12777701376.0, "34315": 12777701376.0, "34320": 12777701376.0, "34325": 12777701376.0, "34330": 12777701376.0, "34335": 12777701376.0, "34340": 12777701376.0, "34345": 12777701376.0, "34350": 12777701376.0, "34355": 12777701376.0, "34360": 12777701376.0, "34365": 12777701376.0, "34370": 12777701376.0, "34375": 12777701376.0, "34380": 12777701376.0, "34385": 12777701376.0, "34390": 12777701376.0, "34395": 12777701376.0, "34400": 12777701376.0, "34405": 12777701376.0, "34410": 12777701376.0, "34415": 12777701376.0, "34420": 12777701376.0, "34425": 12777701376.0, "34430": 12777701376.0, "34435": 12777701376.0, "34440": 12777701376.0, "34445": 12777701376.0, "34450": 12777701376.0, "34455": 12777701376.0, "34460": 12777701376.0, "34465": 12777701376.0, "34470": 12777701376.0, "34475": 12777701376.0, "34480": 12777701376.0, "34485": 12777701376.0, "34490": 12777701376.0, "34495": 12777701376.0, "34500": 12777701376.0, "34505": 12777701376.0, "34510": 12777701376.0, "34515": 12777701376.0, "34520": 12777701376.0, "34525": 12777701376.0, "34530": 12777701376.0, "34535": 12777701376.0, "34540": 12777701376.0, "34545": 12777701376.0, "34550": 12777701376.0, "34555": 12777701376.0, "34560": 12777701376.0, "34565": 12777701376.0, "34570": 12777701376.0, "34575": 12777701376.0, "34580": 12777701376.0, "34585": 12777701376.0, "34590": 12777701376.0, "34595": 12777701376.0, "34600": 12777701376.0, "34605": 12777701376.0, "34610": 12777701376.0, "34615": 12777701376.0, "34620": 12777701376.0, "34625": 12777701376.0, "34630": 12777701376.0, "34635": 12777701376.0, "34640": 12777701376.0, "34645": 12777701376.0, "34650": 12777701376.0, "34655": 12777701376.0, "34660": 12777701376.0, "34665": 12777701376.0, "34670": 12777701376.0, "34675": 12777701376.0, "34680": 12777701376.0, "34685": 12777701376.0, "34690": 12777701376.0, "34695": 12777701376.0, "34700": 12777701376.0, "34705": 12777701376.0, "34710": 12777701376.0, "34715": 12777701376.0, "34720": 12777701376.0, "34725": 12777701376.0, "34730": 12777701376.0, "34735": 12777701376.0, "34740": 12777701376.0, "34745": 12777701376.0, "34750": 12777701376.0, "34755": 12777701376.0, "34760": 12777701376.0, "34765": 12777701376.0, "34770": 12777701376.0, "34775": 12777701376.0, "34780": 12777701376.0, "34785": 12777701376.0, "34790": 12777701376.0, "34795": 12777701376.0, "34800": 12777701376.0, "34805": 12777701376.0, "34810": 12777701376.0, "34815": 12777701376.0, "34820": 12777701376.0, "34825": 12777701376.0, "34830": 12777701376.0, "34835": 12777701376.0, "34840": 12777701376.0, "34845": 12777701376.0, "34850": 12777701376.0, "34855": 12777701376.0, "34860": 12777701376.0, "34865": 12777701376.0, "34870": 12777701376.0, "34875": 12777701376.0, "34880": 12777701376.0, "34885": 12777701376.0, "34890": 12777701376.0, "34895": 12777701376.0, "34900": 12777701376.0, "34905": 12777701376.0, "34910": 12777701376.0, "34915": 12777701376.0, "34920": 12777701376.0, "34925": 12777701376.0, "34930": 12777701376.0, "34935": 12777701376.0, "34940": 12777701376.0, "34945": 12777701376.0, "34950": 12777701376.0, "34955": 12777701376.0, "34960": 12777701376.0, "34965": 12777701376.0, "34970": 12777701376.0, "34975": 12777701376.0, "34980": 12777701376.0, "34985": 12777701376.0, "34990": 12777701376.0, "34995": 12777701376.0, "35000": 12777701376.0, "35005": 12777701376.0, "35010": 12777701376.0, "35015": 12777701376.0, "35020": 12777701376.0, "35025": 12777701376.0, "35030": 12777701376.0, "35035": 12777701376.0, "35040": 12777701376.0, "35045": 12777701376.0, "35050": 12777701376.0, "35055": 12777701376.0, "35060": 12777701376.0, "35065": 12777701376.0, "35070": 12777701376.0, "35075": 12777701376.0, "35080": 12777701376.0, "35085": 12777701376.0, "35090": 12777701376.0, "35095": 12777701376.0, "35100": 12777701376.0, "35105": 12777701376.0, "35110": 12777701376.0, "35115": 12777701376.0, "35120": 12777701376.0, "35125": 12777701376.0, "35130": 12777701376.0, "35135": 12777701376.0, "35140": 12777701376.0, "35145": 12777701376.0, "35150": 12777701376.0, "35155": 12777701376.0, "35160": 12777701376.0, "35165": 12777701376.0, "35170": 12777701376.0, "35175": 12777701376.0, "35180": 12777701376.0, "35185": 12777701376.0, "35190": 12777701376.0, "35195": 12777701376.0, "35200": 12777701376.0, "35205": 12777701376.0, "35210": 12777701376.0, "35215": 12777701376.0, "35220": 12777701376.0, "35225": 12777701376.0, "35230": 12777701376.0, "35235": 12777701376.0, "35240": 12777701376.0, "35245": 12777701376.0, "35250": 12777701376.0, "35255": 12777701376.0, "35260": 12777701376.0, "35265": 12777701376.0, "35270": 12777701376.0, "35275": 12777701376.0, "35280": 12777701376.0, "35285": 12777701376.0, "35290": 12777701376.0, "35295": 12777701376.0, "35300": 12777701376.0, "35305": 12777701376.0, "35310": 12777701376.0, "35315": 12777701376.0, "35320": 12777701376.0, "35325": 12777701376.0, "35330": 12777701376.0, "35335": 12777701376.0, "35340": 12777701376.0, "35345": 12777701376.0, "35350": 12777701376.0, "35355": 12777701376.0, "35360": 12777701376.0, "35365": 12777701376.0, "35370": 12777701376.0, "35375": 12777701376.0, "35380": 12777701376.0, "35385": 12777701376.0, "35390": 12777701376.0, "35395": 12777701376.0, "35400": 12777701376.0, "35405": 12777701376.0, "35410": 12777701376.0, "35415": 12777701376.0, "35420": 12777701376.0, "35425": 12777701376.0, "35430": 12777701376.0, "35435": 12777701376.0, "35440": 12777701376.0, "35445": 12777701376.0, "35450": 12777701376.0, "35455": 12777701376.0, "35460": 12777701376.0, "35465": 12777701376.0, "35470": 12777701376.0, "35475": 12777701376.0, "35480": 12777701376.0, "35485": 12777701376.0, "35490": 12777701376.0, "35495": 12777701376.0, "35500": 12777701376.0, "35505": 12777701376.0, "35510": 12777701376.0, "35515": 12777701376.0, "35520": 12777701376.0, "35525": 12777701376.0, "35530": 12777701376.0, "35535": 12777701376.0, "35540": 12777701376.0, "35545": 12777701376.0, "35550": 12777701376.0, "35555": 12777701376.0, "35560": 12777701376.0, "35565": 12777701376.0, "35570": 12777701376.0, "35575": 12777701376.0, "35580": 12777701376.0, "35585": 12777701376.0, "35590": 12777701376.0, "35595": 12777701376.0, "35600": 12777701376.0, "35605": 12777701376.0, "35610": 12777701376.0, "35615": 12777701376.0, "35620": 12777701376.0, "35625": 12777701376.0, "35630": 12777701376.0, "35635": 12777701376.0, "35640": 12777701376.0, "35645": 12777701376.0, "35650": 12777701376.0, "35655": 12777701376.0, "35660": 12777701376.0, "35665": 12777701376.0, "35670": 12777701376.0, "35675": 12777701376.0, "35680": 12777701376.0, "35685": 12777701376.0, "35690": 12777701376.0, "35695": 12777701376.0, "35700": 12777701376.0, "35705": 12777701376.0, "35710": 12777701376.0, "35715": 12777701376.0, "35720": 12777701376.0, "35725": 12777701376.0, "35730": 12777701376.0, "35735": 12777701376.0, "35740": 12777701376.0, "35745": 12777701376.0, "35750": 12777701376.0, "35755": 12777701376.0, "35760": 12777701376.0, "35765": 12777701376.0, "35770": 12777701376.0, "35775": 12777701376.0, "35780": 12777701376.0, "35785": 12777701376.0, "35790": 12777701376.0, "35795": 12777701376.0, "35800": 12777701376.0, "35805": 12777701376.0, "35810": 12777701376.0, "35815": 12777701376.0, "35820": 12777701376.0, "35825": 12777701376.0, "35830": 12777701376.0, "35835": 12777701376.0, "35840": 12777701376.0, "35845": 12777701376.0, "35850": 12777701376.0, "35855": 12777701376.0, "35860": 12777701376.0, "35865": 12777701376.0, "35870": 12777701376.0, "35875": 12777701376.0, "35880": 12777701376.0, "35885": 12777701376.0, "35890": 12777701376.0, "35895": 12777701376.0, "35900": 12777701376.0, "35905": 12777701376.0, "35910": 12777701376.0, "35915": 12777701376.0, "35920": 12777701376.0, "35925": 12777701376.0, "35930": 12777701376.0, "35935": 12777701376.0, "35940": 12777701376.0, "35945": 12777701376.0, "35950": 12777701376.0, "35955": 12777701376.0, "35960": 12777701376.0, "35965": 12777701376.0, "35970": 12777701376.0, "35975": 12777701376.0, "35980": 12777701376.0, "35985": 12777701376.0, "35990": 12777701376.0, "35995": 12777701376.0, "36000": 12777701376.0, "36005": 12777701376.0, "36010": 12777701376.0, "36015": 12777701376.0, "36020": 12777701376.0, "36025": 12777701376.0, "36030": 12777701376.0, "36035": 12777701376.0, "36040": 12777701376.0, "36045": 12777701376.0, "36050": 12777701376.0, "36055": 12777701376.0, "36060": 12777701376.0, "36065": 12777701376.0, "36070": 12777701376.0, "36075": 12777701376.0, "36080": 12777701376.0, "36085": 12777701376.0, "36090": 12777701376.0, "36095": 12777701376.0, "36100": 12777701376.0, "36105": 12777701376.0, "36110": 12777701376.0, "36115": 12777701376.0, "36120": 12777701376.0, "36125": 12777701376.0, "36130": 12777701376.0, "36135": 12777701376.0, "36140": 12777701376.0, "36145": 12777701376.0, "36150": 12777701376.0, "36155": 12777701376.0, "36160": 12777701376.0, "36165": 12777701376.0, "36170": 12777701376.0, "36175": 12777701376.0, "36180": 12777701376.0, "36185": 12777701376.0, "36190": 12777701376.0, "36195": 12777701376.0, "36200": 12777701376.0, "36205": 12777701376.0, "36210": 12777701376.0, "36215": 12777701376.0, "36220": 12777701376.0, "36225": 12777701376.0, "36230": 12777701376.0, "36235": 12777701376.0, "36240": 12777701376.0, "36245": 12777701376.0, "36250": 12777701376.0, "36255": 12777701376.0, "36260": 12777701376.0, "36265": 12777701376.0, "36270": 12777701376.0, "36275": 12777701376.0, "36280": 12777701376.0, "36285": 12777701376.0, "36290": 12777701376.0, "36295": 12777701376.0, "36300": 12777701376.0, "36305": 12777701376.0, "36310": 12777701376.0, "36315": 12777701376.0, "36320": 12777701376.0, "36325": 12777701376.0, "36330": 12777701376.0, "36335": 12777701376.0, "36340": 12777701376.0, "36345": 12777701376.0, "36350": 12777701376.0, "36355": 12777701376.0, "36360": 12777701376.0, "36365": 12777701376.0, "36370": 12777701376.0, "36375": 12777701376.0, "36380": 12777701376.0, "36385": 12777701376.0, "36390": 12777701376.0, "36395": 12777701376.0, "36400": 12777701376.0, "36405": 12777701376.0, "36410": 12777701376.0, "36415": 12777701376.0, "36420": 12777701376.0, "36425": 12777701376.0, "36430": 12777701376.0, "36435": 12777701376.0, "36440": 12777701376.0, "36445": 12777701376.0, "36450": 12777701376.0, "36455": 12777701376.0, "36460": 12777701376.0, "36465": 12777701376.0, "36470": 12777701376.0, "36475": 12777701376.0, "36480": 12777701376.0, "36485": 12777701376.0, "36490": 12777701376.0, "36495": 12777701376.0, "36500": 12777701376.0, "36505": 12777701376.0, "36510": 12777701376.0, "36515": 12777701376.0, "36520": 12777701376.0, "36525": 12777701376.0, "36530": 12777701376.0, "36535": 12777701376.0, "36540": 12777701376.0, "36545": 12777701376.0, "36550": 12777701376.0, "36555": 12777701376.0, "36560": 12777701376.0, "36565": 12777701376.0, "36570": 12777701376.0, "36575": 12777701376.0, "36580": 12777701376.0, "36585": 12777701376.0, "36590": 12777701376.0, "36595": 12777701376.0, "36600": 12777701376.0, "36605": 12777701376.0, "36610": 12777701376.0, "36615": 12777701376.0, "36620": 12777701376.0, "36625": 12777701376.0, "36630": 12777701376.0, "36635": 12777701376.0, "36640": 12777701376.0, "36645": 12777701376.0, "36650": 12777701376.0, "36655": 12777701376.0, "36660": 12777701376.0, "36665": 12777701376.0, "36670": 12777701376.0, "36675": 12777701376.0, "36680": 12777701376.0, "36685": 12777701376.0, "36690": 12777701376.0, "36695": 12777701376.0, "36700": 12777701376.0, "36705": 12777701376.0, "36710": 12777701376.0, "36715": 12777701376.0, "36720": 12777701376.0, "36725": 12777701376.0, "36730": 12777701376.0, "36735": 12777701376.0, "36740": 12777701376.0, "36745": 12777701376.0, "36750": 12777701376.0, "36755": 12777701376.0, "36760": 12777701376.0, "36765": 12777701376.0, "36770": 12777701376.0, "36775": 12777701376.0, "36780": 12777701376.0, "36785": 12777701376.0, "36790": 12777701376.0, "36795": 12777701376.0, "36800": 12777701376.0, "36805": 12777701376.0, "36810": 12777701376.0, "36815": 12777701376.0, "36820": 12777701376.0, "36825": 12777701376.0, "36830": 12777701376.0, "36835": 12777701376.0, "36840": 12777701376.0, "36845": 12777701376.0, "36850": 12777701376.0, "36855": 12777701376.0, "36860": 12777701376.0, "36865": 12777701376.0, "36870": 12777701376.0, "36875": 12777701376.0, "36880": 12777701376.0, "36885": 12777701376.0, "36890": 12777701376.0, "36895": 12777701376.0, "36900": 12777701376.0, "36905": 12777701376.0, "36910": 12777701376.0, "36915": 12777701376.0, "36920": 12777701376.0, "36925": 12777701376.0, "36930": 12777701376.0, "36935": 12777701376.0, "36940": 12777701376.0, "36945": 12777701376.0, "36950": 12777701376.0, "36955": 12777701376.0, "36960": 12777701376.0, "36965": 12777701376.0, "36970": 12777701376.0, "36975": 12777701376.0, "36980": 12777701376.0, "36985": 12777701376.0, "36990": 12777701376.0, "36995": 12777701376.0, "37000": 12777701376.0, "37005": 12777701376.0, "37010": 12777701376.0, "37015": 12777701376.0, "37020": 12777701376.0, "37025": 12777701376.0, "37030": 12777701376.0, "37035": 12777701376.0, "37040": 12777701376.0, "37045": 12777701376.0, "37050": 12777701376.0, "37055": 12777701376.0, "37060": 12777701376.0, "37065": 12777701376.0, "37070": 12777701376.0, "37075": 12777701376.0, "37080": 12777701376.0, "37085": 12777701376.0, "37090": 12777701376.0, "37095": 12777701376.0, "37100": 12777701376.0, "37105": 12777701376.0, "37110": 12777701376.0, "37115": 12777701376.0, "37120": 12777701376.0, "37125": 12777701376.0, "37130": 12777701376.0, "37135": 12777701376.0, "37140": 12777701376.0, "37145": 12777701376.0, "37150": 12777701376.0, "37155": 12777701376.0, "37160": 12777701376.0, "37165": 12777701376.0, "37170": 12777701376.0, "37175": 12777701376.0, "37180": 12777701376.0, "37185": 12777701376.0, "37190": 12777701376.0, "37195": 12777701376.0, "37200": 12777701376.0, "37205": 12777701376.0, "37210": 12777701376.0, "37215": 12777701376.0, "37220": 12777701376.0, "37225": 12777701376.0, "37230": 12777701376.0, "37235": 12777701376.0, "37240": 12777701376.0, "37245": 12777701376.0, "37250": 12777701376.0, "37255": 12777701376.0, "37260": 12777701376.0, "37265": 12777701376.0, "37270": 12777701376.0, "37275": 12777701376.0, "37280": 12777701376.0, "37285": 12777701376.0, "37290": 12777701376.0, "37295": 12777701376.0, "37300": 12777701376.0, "37305": 12777701376.0, "37310": 12777701376.0, "37315": 12777701376.0, "37320": 12777701376.0, "37325": 12777701376.0, "37330": 12777701376.0, "37335": 12777701376.0, "37340": 12777701376.0, "37345": 12777701376.0, "37350": 12777701376.0, "37355": 12777701376.0, "37360": 12777701376.0, "37365": 12777701376.0, "37370": 12777701376.0, "37375": 12777701376.0, "37380": 12777701376.0, "37385": 12777701376.0, "37390": 12777701376.0, "37395": 12777701376.0, "37400": 12777701376.0, "37405": 12777701376.0, "37410": 12777701376.0, "37415": 12777701376.0, "37420": 12777701376.0, "37425": 12777701376.0, "37430": 12777701376.0, "37435": 12777701376.0, "37440": 12777701376.0, "37445": 12777701376.0, "37450": 12777701376.0, "37455": 12777701376.0, "37460": 12777701376.0, "37465": 12777701376.0, "37470": 12777701376.0, "37475": 12777701376.0, "37480": 12777701376.0, "37485": 12777701376.0, "37490": 12777701376.0, "37495": 12777701376.0, "37500": 12777701376.0, "37505": 12777701376.0, "37510": 12777701376.0, "37515": 12777701376.0, "37520": 12777701376.0, "37525": 12777701376.0, "37530": 12777701376.0, "37535": 12777701376.0, "37540": 12777701376.0, "37545": 12777701376.0, "37550": 12777701376.0, "37555": 12777701376.0, "37560": 12777701376.0, "37565": 12777701376.0, "37570": 12777701376.0, "37575": 12777701376.0, "37580": 12777701376.0, "37585": 12777701376.0, "37590": 12777701376.0, "37595": 12777701376.0, "37600": 12777701376.0, "37605": 12777701376.0, "37610": 12777701376.0, "37615": 12777701376.0, "37620": 12777701376.0, "37625": 12777701376.0, "37630": 12777701376.0, "37635": 12777701376.0, "37640": 12777701376.0, "37645": 12777701376.0, "37650": 12777701376.0, "37655": 12777701376.0, "37660": 12777701376.0, "37665": 12777701376.0, "37670": 12777701376.0, "37675": 12777701376.0, "37680": 12777701376.0, "37685": 12777701376.0, "37690": 12777701376.0, "37695": 12777701376.0, "37700": 12777701376.0, "37705": 12777701376.0, "37710": 12777701376.0, "37715": 12777701376.0, "37720": 12777701376.0, "37725": 12777701376.0, "37730": 12777701376.0, "37735": 12777701376.0, "37740": 12777701376.0, "37745": 12777701376.0, "37750": 12777701376.0, "37755": 12777701376.0, "37760": 12777701376.0, "37765": 12777701376.0, "37770": 12777701376.0, "37775": 12777701376.0, "37780": 12777701376.0, "37785": 12777701376.0, "37790": 12777701376.0, "37795": 12777701376.0, "37800": 12777701376.0, "37805": 12777701376.0, "37810": 12777701376.0, "37815": 12777701376.0, "37820": 12777701376.0, "37825": 12777701376.0, "37830": 12777701376.0, "37835": 12777701376.0, "37840": 12777701376.0, "37845": 12777701376.0, "37850": 12777701376.0, "37855": 12777701376.0, "37860": 12777701376.0, "37865": 12777701376.0, "37870": 12777701376.0, "37875": 12777701376.0, "37880": 12777701376.0, "37885": 12777701376.0, "37890": 12777701376.0, "37895": 12777701376.0, "37900": 12777701376.0, "37905": 12777701376.0, "37910": 12777701376.0, "37915": 12777701376.0, "37920": 12777701376.0, "37925": 12777701376.0, "37930": 12777701376.0, "37935": 12777701376.0, "37940": 12777701376.0, "37945": 12777701376.0, "37950": 12777701376.0, "37955": 12777701376.0, "37960": 12777701376.0, "37965": 12777701376.0, "37970": 12777701376.0, "37975": 12777701376.0, "37980": 12777701376.0, "37985": 12777701376.0, "37990": 12777701376.0, "37995": 12777701376.0, "38000": 12777701376.0, "38005": 12777701376.0, "38010": 12777701376.0, "38015": 12777701376.0, "38020": 12777701376.0, "38025": 12777701376.0, "38030": 12777701376.0, "38035": 12777701376.0, "38040": 12777701376.0, "38045": 12777701376.0, "38050": 12777701376.0, "38055": 12777701376.0, "38060": 12777701376.0, "38065": 12777701376.0, "38070": 12777701376.0, "38075": 12777701376.0, "38080": 12777701376.0, "38085": 12777701376.0, "38090": 12777701376.0, "38095": 12777701376.0, "38100": 12777701376.0, "38105": 12777701376.0, "38110": 12777701376.0, "38115": 12777701376.0, "38120": 12777701376.0, "38125": 12777701376.0, "38130": 12777701376.0, "38135": 12777701376.0, "38140": 12777701376.0, "38145": 12777701376.0, "38150": 12777701376.0, "38155": 12777701376.0, "38160": 12777701376.0, "38165": 12777701376.0, "38170": 12777701376.0, "38175": 12777701376.0, "38180": 12777701376.0, "38185": 12777701376.0, "38190": 12777701376.0, "38195": 12777701376.0, "38200": 12777701376.0, "38205": 12777701376.0, "38210": 12777701376.0, "38215": 12777701376.0, "38220": 12777701376.0, "38225": 12777701376.0, "38230": 12777701376.0, "38235": 12777701376.0, "38240": 12777701376.0, "38245": 12777701376.0, "38250": 12777701376.0, "38255": 12777701376.0, "38260": 12777701376.0, "38265": 12777701376.0, "38270": 12777701376.0, "38275": 12777701376.0, "38280": 12777701376.0, "38285": 12777701376.0, "38290": 12777701376.0, "38295": 12777701376.0, "38300": 12777701376.0, "38305": 12777701376.0, "38310": 12777701376.0, "38315": 12777701376.0, "38320": 12777701376.0, "38325": 12777701376.0, "38330": 12777701376.0, "38335": 12777701376.0, "38340": 12777701376.0, "38345": 12777701376.0, "38350": 12777701376.0, "38355": 12777701376.0, "38360": 12777701376.0, "38365": 12777701376.0, "38370": 12777701376.0, "38375": 12777701376.0, "38380": 12777701376.0, "38385": 12777701376.0, "38390": 12777701376.0, "38395": 12777701376.0, "38400": 12777701376.0, "38405": 12777701376.0, "38410": 12777701376.0, "38415": 12777701376.0, "38420": 12777701376.0, "38425": 12777701376.0, "38430": 12777701376.0, "38435": 12777701376.0, "38440": 12777701376.0, "38445": 12777701376.0, "38450": 12777701376.0, "38455": 12777701376.0, "38460": 12777701376.0, "38465": 12777701376.0, "38470": 12777701376.0, "38475": 12777701376.0, "38480": 12777701376.0, "38485": 12777701376.0, "38490": 12777701376.0, "38495": 12777701376.0, "38500": 12777701376.0, "38505": 12777701376.0, "38510": 12777701376.0, "38515": 12777701376.0, "38520": 12777701376.0, "38525": 12777701376.0, "38530": 12777701376.0, "38535": 12777701376.0, "38540": 12777701376.0, "38545": 12777701376.0, "38550": 12777701376.0, "38555": 12777701376.0, "38560": 12777701376.0, "38565": 12777701376.0, "38570": 12777701376.0, "38575": 12777701376.0, "38580": 12777701376.0, "38585": 12777701376.0, "38590": 12777701376.0, "38595": 12777701376.0, "38600": 12777701376.0, "38605": 12777701376.0, "38610": 12777701376.0, "38615": 12777701376.0, "38620": 12777701376.0, "38625": 12777701376.0, "38630": 12777701376.0, "38635": 12777701376.0, "38640": 12777701376.0, "38645": 12777701376.0, "38650": 12777701376.0, "38655": 12777701376.0, "38660": 12777701376.0, "38665": 12777701376.0, "38670": 12777701376.0, "38675": 12777701376.0, "38680": 12777701376.0, "38685": 12777701376.0, "38690": 12777701376.0, "38695": 12777701376.0, "38700": 12777701376.0, "38705": 12777701376.0, "38710": 12777701376.0, "38715": 12777701376.0, "38720": 12777701376.0, "38725": 12777701376.0, "38730": 12777701376.0, "38735": 12777701376.0, "38740": 12777701376.0, "38745": 12777701376.0, "38750": 12777701376.0, "38755": 12777701376.0, "38760": 12777701376.0, "38765": 12777701376.0, "38770": 12777701376.0, "38775": 12777701376.0, "38780": 12777701376.0, "38785": 12777701376.0, "38790": 12777701376.0, "38795": 12777701376.0, "38800": 12777701376.0, "38805": 12777701376.0, "38810": 12777701376.0, "38815": 12777701376.0, "38820": 12777701376.0, "38825": 12777701376.0, "38830": 12777701376.0, "38835": 12777701376.0, "38840": 12777701376.0, "38845": 12777701376.0, "38850": 12777701376.0, "38855": 12777701376.0, "38860": 12777701376.0, "38865": 12777701376.0, "38870": 12777701376.0, "38875": 12777701376.0, "38880": 12777701376.0, "38885": 12777701376.0, "38890": 12777701376.0, "38895": 12777701376.0, "38900": 12777701376.0, "38905": 12777701376.0, "38910": 12777701376.0, "38915": 12777701376.0, "38920": 12777701376.0, "38925": 12777701376.0, "38930": 12777701376.0, "38935": 12777701376.0, "38940": 12777701376.0, "38945": 12777701376.0, "38950": 12777701376.0, "38955": 12777701376.0, "38960": 12777701376.0, "38965": 12777701376.0, "38970": 12777701376.0, "38975": 12777701376.0, "38980": 12777701376.0, "38985": 12777701376.0, "38990": 12777701376.0, "38995": 12777701376.0, "39000": 12777701376.0, "39005": 12777701376.0, "39010": 12777701376.0, "39015": 12777701376.0, "39020": 12777701376.0, "39025": 12777701376.0, "39030": 12777701376.0, "39035": 12777701376.0, "39040": 12777701376.0, "39045": 12777701376.0, "39050": 12777701376.0, "39055": 12777701376.0, "39060": 12777701376.0, "39065": 12777701376.0, "39070": 12777701376.0, "39075": 12777701376.0, "39080": 12777701376.0, "39085": 12777701376.0, "39090": 12777701376.0, "39095": 12777701376.0, "39100": 12777701376.0, "39105": 12777701376.0, "39110": 12777701376.0, "39115": 12777701376.0, "39120": 12777701376.0, "39125": 12777701376.0, "39130": 12777701376.0, "39135": 12777701376.0, "39140": 12777701376.0, "39145": 12777701376.0, "39150": 12777701376.0, "39155": 12777701376.0, "39160": 12777701376.0, "39165": 12777701376.0, "39170": 12777701376.0, "39175": 12777701376.0, "39180": 12777701376.0, "39185": 12777701376.0, "39190": 12777701376.0, "39195": 12777701376.0, "39200": 12777701376.0, "39205": 12777701376.0, "39210": 12777701376.0, "39215": 12777701376.0, "39220": 12777701376.0, "39225": 12777701376.0, "39230": 12777701376.0, "39235": 12777701376.0, "39240": 12777701376.0, "39245": 12777701376.0, "39250": 12777701376.0, "39255": 12777701376.0, "39260": 12777701376.0, "39265": 12777701376.0, "39270": 12777701376.0, "39275": 12777701376.0, "39280": 12777701376.0, "39285": 12777701376.0, "39290": 12777701376.0, "39295": 12777701376.0, "39300": 12777701376.0, "39305": 12777701376.0, "39310": 12777701376.0, "39315": 12777701376.0, "39320": 12777701376.0, "39325": 12777701376.0, "39330": 12777701376.0, "39335": 12777701376.0, "39340": 12777701376.0, "39345": 12777701376.0, "39350": 12777701376.0, "39355": 12777701376.0, "39360": 12777701376.0, "39365": 12777701376.0, "39370": 12777701376.0, "39375": 12777701376.0, "39380": 12777701376.0, "39385": 12777701376.0, "39390": 12777701376.0, "39395": 12777701376.0, "39400": 12777701376.0, "39405": 12777701376.0, "39410": 12777701376.0, "39415": 12777701376.0, "39420": 12777701376.0, "39425": 12777701376.0, "39430": 12777701376.0, "39435": 12777701376.0, "39440": 12777701376.0, "39445": 12777701376.0, "39450": 12777701376.0, "39455": 12777701376.0, "39460": 12777701376.0, "39465": 12777701376.0, "39470": 12777701376.0, "39475": 12777701376.0, "39480": 12777701376.0, "39485": 12777701376.0, "39490": 12777701376.0, "39495": 12777701376.0, "39500": 12777701376.0, "39505": 12777701376.0, "39510": 12777701376.0, "39515": 12777701376.0, "39520": 12777701376.0, "39525": 12777701376.0, "39530": 12777701376.0, "39535": 12777701376.0, "39540": 12777701376.0, "39545": 12777701376.0, "39550": 12777701376.0, "39555": 12777701376.0, "39560": 12777701376.0, "39565": 12777701376.0, "39570": 12777701376.0, "39575": 12777701376.0, "39580": 12777701376.0, "39585": 12777701376.0, "39590": 12777701376.0, "39595": 12777701376.0, "39600": 12777701376.0, "39605": 12777701376.0, "39610": 12777701376.0, "39615": 12777701376.0, "39620": 12777701376.0, "39625": 12777701376.0, "39630": 12777701376.0, "39635": 12777701376.0, "39640": 12777701376.0, "39645": 12777701376.0, "39650": 12777701376.0, "39655": 12777701376.0, "39660": 12777701376.0, "39665": 12777701376.0, "39670": 12777701376.0, "39675": 12777701376.0, "39680": 12777701376.0, "39685": 12777701376.0, "39690": 12777701376.0, "39695": 12777701376.0, "39700": 12777701376.0, "39705": 12777701376.0, "39710": 12777701376.0, "39715": 12777701376.0, "39720": 12777701376.0, "39725": 12777701376.0, "39730": 12777701376.0, "39735": 12777701376.0, "39740": 12777701376.0, "39745": 12777701376.0, "39750": 12777701376.0, "39755": 12777701376.0, "39760": 12777701376.0, "39765": 12777701376.0, "39770": 12777701376.0, "39775": 12777701376.0, "39780": 12777701376.0, "39785": 12777701376.0, "39790": 12777701376.0, "39795": 12777701376.0, "39800": 12777701376.0, "39805": 12777701376.0, "39810": 12777701376.0, "39815": 12777701376.0, "39820": 12777701376.0, "39825": 12777701376.0, "39830": 12777701376.0, "39835": 12777701376.0, "39840": 12777701376.0, "39845": 12777701376.0, "39850": 12777701376.0, "39855": 12777701376.0, "39860": 12777701376.0, "39865": 12777701376.0, "39870": 12777701376.0, "39875": 12777701376.0, "39880": 12777701376.0, "39885": 12777701376.0, "39890": 12777701376.0, "39895": 12777701376.0, "39900": 12777701376.0, "39905": 12777701376.0, "39910": 12777701376.0, "39915": 12777701376.0, "39920": 12777701376.0, "39925": 12777701376.0, "39930": 12777701376.0, "39935": 12777701376.0, "39940": 12777701376.0, "39945": 12777701376.0, "39950": 12777701376.0, "39955": 12777701376.0, "39960": 12777701376.0, "39965": 12777701376.0, "39970": 12777701376.0, "39975": 12777701376.0, "39980": 12777701376.0, "39985": 12777701376.0, "39990": 12777701376.0, "39995": 12777701376.0, "40000": 12777701376.0, "40005": 12777701376.0, "40010": 12777701376.0, "40015": 12777701376.0, "40020": 12777701376.0, "40025": 12777701376.0, "40030": 12777701376.0, "40035": 12777701376.0, "40040": 12777701376.0, "40045": 12777701376.0, "40050": 12777701376.0, "40055": 12777701376.0, "40060": 12777701376.0, "40065": 12777701376.0, "40070": 12777701376.0, "40075": 12777701376.0, "40080": 12777701376.0, "40085": 12777701376.0, "40090": 12777701376.0, "40095": 12777701376.0, "40100": 12777701376.0, "40105": 12777701376.0, "40110": 12777701376.0, "40115": 12777701376.0, "40120": 12777701376.0, "40125": 12777701376.0, "40130": 12777701376.0, "40135": 12777701376.0, "40140": 12777701376.0, "40145": 12777701376.0, "40150": 12777701376.0, "40155": 12777701376.0, "40160": 12777701376.0, "40165": 12777701376.0, "40170": 12777701376.0, "40175": 12777701376.0, "40180": 12777701376.0, "40185": 12777701376.0, "40190": 12777701376.0, "40195": 12777701376.0, "40200": 12777701376.0, "40205": 12777701376.0, "40210": 12777701376.0, "40215": 12777701376.0, "40220": 12777701376.0, "40225": 12777701376.0, "40230": 12777701376.0, "40235": 12777701376.0, "40240": 12777701376.0, "40245": 12777701376.0, "40250": 12777701376.0, "40255": 12777701376.0, "40260": 12777701376.0, "40265": 12777701376.0, "40270": 12777701376.0, "40275": 12777701376.0, "40280": 12777701376.0, "40285": 12777701376.0, "40290": 12777701376.0, "40295": 12777701376.0, "40300": 12777701376.0, "40305": 12777701376.0, "40310": 12777701376.0, "40315": 12777701376.0, "40320": 12777701376.0, "40325": 12777701376.0, "40330": 12777701376.0, "40335": 12777701376.0, "40340": 12777701376.0, "40345": 12777701376.0, "40350": 12777701376.0, "40355": 12777701376.0, "40360": 12777701376.0, "40365": 12777701376.0, "40370": 12777701376.0, "40375": 12777701376.0, "40380": 12777701376.0, "40385": 12777701376.0, "40390": 12777701376.0, "40395": 12777701376.0, "40400": 12777701376.0, "40405": 12777701376.0, "40410": 12777701376.0, "40415": 12777701376.0, "40420": 12777701376.0, "40425": 12777701376.0, "40430": 12777701376.0, "40435": 12777701376.0, "40440": 12777701376.0, "40445": 12777701376.0, "40450": 12777701376.0, "40455": 12777701376.0, "40460": 12777701376.0, "40465": 12777701376.0, "40470": 12777701376.0, "40475": 12777701376.0, "40480": 12777701376.0, "40485": 12777701376.0, "40490": 12777701376.0, "40495": 12777701376.0, "40500": 12777701376.0, "40505": 12777701376.0, "40510": 12777701376.0, "40515": 12777701376.0, "40520": 12777701376.0, "40525": 12777701376.0, "40530": 12777701376.0, "40535": 12777701376.0, "40540": 12777701376.0, "40545": 12777701376.0, "40550": 12777701376.0, "40555": 12777701376.0, "40560": 12777701376.0, "40565": 12777701376.0, "40570": 12777701376.0, "40575": 12777701376.0, "40580": 12777701376.0, "40585": 12777701376.0, "40590": 12777701376.0, "40595": 12777701376.0, "40600": 12777701376.0, "40605": 12777701376.0, "40610": 12777701376.0, "40615": 12777701376.0, "40620": 12777701376.0, "40625": 12777701376.0, "40630": 12777701376.0, "40635": 12777701376.0, "40640": 12777701376.0, "40645": 12777701376.0, "40650": 12777701376.0, "40655": 12777701376.0, "40660": 12777701376.0, "40665": 12777701376.0, "40670": 12777701376.0, "40675": 12777701376.0, "40680": 12777701376.0, "40685": 12777701376.0, "40690": 12777701376.0, "40695": 12777701376.0, "40700": 12777701376.0, "40705": 12777701376.0, "40710": 12777701376.0, "40715": 12777701376.0, "40720": 12777701376.0, "40725": 12777701376.0, "40730": 12777701376.0, "40735": 12777701376.0, "40740": 12777701376.0, "40745": 12777701376.0, "40750": 12777701376.0, "40755": 12777701376.0, "40760": 12777701376.0, "40765": 12777701376.0, "40770": 12777701376.0, "40775": 12777701376.0, "40780": 12777701376.0, "40785": 12777701376.0, "40790": 12777701376.0, "40795": 12777701376.0, "40800": 12777701376.0, "40805": 12777701376.0, "40810": 12777701376.0, "40815": 12777701376.0, "40820": 12777701376.0, "40825": 12777701376.0, "40830": 12777701376.0, "40835": 12777701376.0, "40840": 12777701376.0, "40845": 12777701376.0, "40850": 12777701376.0, "40855": 12777701376.0, "40860": 12777701376.0, "40865": 12777701376.0, "40870": 12777701376.0, "40875": 12777701376.0, "40880": 12777701376.0, "40885": 12777701376.0, "40890": 12777701376.0, "40895": 12777701376.0, "40900": 12777701376.0, "40905": 12777701376.0, "40910": 12777701376.0, "40915": 12777701376.0, "40920": 12777701376.0, "40925": 12777701376.0, "40930": 12777701376.0, "40935": 12777701376.0, "40940": 12777701376.0, "40945": 12777701376.0, "40950": 12777701376.0, "40955": 12777701376.0, "40960": 12777701376.0, "40965": 12777701376.0, "40970": 12777701376.0, "40975": 12777701376.0, "40980": 12777701376.0, "40985": 12777701376.0, "40990": 12777701376.0, "40995": 12777701376.0, "41000": 12777701376.0, "41005": 12777701376.0, "41010": 12777701376.0, "41015": 12777701376.0, "41020": 12777701376.0, "41025": 12777701376.0, "41030": 12777701376.0, "41035": 12777701376.0, "41040": 12777701376.0, "41045": 12777701376.0, "41050": 12777701376.0, "41055": 12777701376.0, "41060": 12777701376.0, "41065": 12777701376.0, "41070": 12777701376.0, "41075": 12777701376.0, "41080": 12777701376.0, "41085": 12777701376.0, "41090": 12777701376.0, "41095": 12777701376.0, "41100": 12777701376.0, "41105": 12777701376.0, "41110": 12777701376.0, "41115": 12777701376.0, "41120": 12777701376.0, "41125": 12777701376.0, "41130": 12777701376.0, "41135": 12777701376.0, "41140": 12777701376.0, "41145": 12777701376.0, "41150": 12777701376.0, "41155": 12777701376.0, "41160": 12777701376.0, "41165": 12777701376.0, "41170": 12777701376.0, "41175": 12777701376.0, "41180": 12777701376.0, "41185": 12777701376.0, "41190": 12777701376.0, "41195": 12777701376.0, "41200": 12777701376.0, "41205": 12777701376.0, "41210": 12777701376.0, "41215": 12777701376.0, "41220": 12777701376.0, "41225": 12777701376.0, "41230": 12777701376.0, "41235": 12777701376.0, "41240": 12777701376.0, "41245": 12777701376.0, "41250": 12777701376.0, "41255": 12777701376.0, "41260": 12777701376.0, "41265": 12777701376.0, "41270": 12777701376.0, "41275": 12777701376.0, "41280": 12777701376.0, "41285": 12777701376.0, "41290": 12777701376.0, "41295": 12777701376.0, "41300": 12777701376.0, "41305": 12777701376.0, "41310": 12777701376.0, "41315": 12777701376.0, "41320": 12777701376.0, "41325": 12777701376.0, "41330": 12777701376.0, "41335": 12777701376.0, "41340": 12777701376.0, "41345": 12777701376.0, "41350": 12777701376.0, "41355": 12777701376.0, "41360": 12777701376.0, "41365": 12777701376.0, "41370": 12777701376.0, "41375": 12777701376.0, "41380": 12777701376.0, "41385": 12777701376.0, "41390": 12777701376.0, "41395": 12777701376.0, "41400": 12777701376.0, "41405": 12777701376.0, "41410": 12777701376.0, "41415": 12777701376.0, "41420": 12777701376.0, "41425": 12777701376.0, "41430": 12777701376.0, "41435": 12777701376.0, "41440": 12777701376.0, "41445": 12777701376.0, "41450": 12777701376.0, "41455": 12777701376.0, "41460": 12777701376.0, "41465": 12777701376.0, "41470": 12777701376.0, "41475": 12777701376.0, "41480": 12777701376.0, "41485": 12777701376.0, "41490": 12777701376.0, "41495": 12777701376.0, "41500": 12777701376.0, "41505": 12777701376.0, "41510": 12777701376.0, "41515": 12777701376.0, "41520": 12777701376.0, "41525": 12777701376.0, "41530": 12777701376.0, "41535": 12777701376.0, "41540": 12777701376.0, "41545": 12777701376.0, "41550": 12777701376.0, "41555": 12777701376.0, "41560": 12777701376.0, "41565": 12777701376.0, "41570": 12777701376.0, "41575": 12777701376.0, "41580": 12777701376.0, "41585": 12777701376.0, "41590": 12777701376.0, "41595": 12777701376.0, "41600": 12777701376.0, "41605": 12777701376.0, "41610": 12777701376.0, "41615": 12777701376.0, "41620": 12777701376.0, "41625": 12777701376.0, "41630": 12777701376.0, "41635": 12777701376.0, "41640": 12777701376.0, "41645": 12777701376.0, "41650": 12777701376.0, "41655": 12777701376.0, "41660": 12777701376.0, "41665": 12777701376.0, "41670": 12777701376.0, "41675": 12777701376.0, "41680": 12777701376.0, "41685": 12777701376.0, "41690": 12777701376.0, "41695": 12777701376.0, "41700": 12777701376.0, "41705": 12777701376.0, "41710": 12777701376.0, "41715": 12777701376.0, "41720": 12777701376.0, "41725": 12777701376.0, "41730": 12777701376.0, "41735": 12777701376.0, "41740": 12777701376.0, "41745": 12777701376.0, "41750": 12777701376.0, "41755": 12777701376.0, "41760": 12777701376.0, "41765": 12777701376.0, "41770": 12777701376.0, "41775": 12777701376.0, "41780": 12777701376.0, "41785": 12777701376.0, "41790": 12777701376.0, "41795": 12777701376.0, "41800": 12777701376.0, "41805": 12777701376.0, "41810": 12777701376.0, "41815": 12777701376.0, "41820": 12777701376.0, "41825": 12777701376.0, "41830": 12777701376.0, "41835": 12777701376.0, "41840": 12777701376.0, "41845": 12777701376.0, "41850": 12777701376.0, "41855": 12777701376.0, "41860": 12777701376.0, "41865": 12777701376.0, "41870": 12777701376.0, "41875": 12777701376.0, "41880": 12777701376.0, "41885": 12777701376.0, "41890": 12777701376.0, "41895": 12777701376.0, "41900": 12777701376.0, "41905": 12777701376.0, "41910": 12777701376.0, "41915": 12777701376.0, "41920": 12777701376.0, "41925": 12777701376.0, "41930": 12777701376.0, "41935": 12777701376.0, "41940": 12777701376.0, "41945": 12777701376.0, "41950": 12777701376.0, "41955": 12777701376.0, "41960": 12777701376.0, "41965": 12777701376.0, "41970": 12777701376.0, "41975": 12777701376.0, "41980": 12777701376.0, "41985": 12777701376.0, "41990": 12777701376.0, "41995": 12777701376.0, "42000": 12777701376.0, "42005": 12777701376.0, "42010": 12777701376.0, "42015": 12777701376.0, "42020": 12777701376.0, "42025": 12777701376.0, "42030": 12777701376.0, "42035": 12777701376.0, "42040": 12777701376.0, "42045": 12777701376.0, "42050": 12777701376.0, "42055": 12777701376.0, "42060": 12777701376.0, "42065": 12777701376.0, "42070": 12777701376.0, "42075": 12777701376.0, "42080": 12777701376.0, "42085": 12777701376.0, "42090": 12777701376.0, "42095": 12777701376.0, "42100": 12777701376.0, "42105": 12777701376.0, "42110": 12777701376.0, "42115": 12777701376.0, "42120": 12777701376.0, "42125": 12777701376.0, "42130": 12777701376.0, "42135": 12777701376.0, "42140": 12777701376.0, "42145": 12777701376.0, "42150": 12777701376.0, "42155": 12777701376.0, "42160": 12777701376.0, "42165": 12777701376.0, "42170": 12777701376.0, "42175": 12777701376.0, "42180": 12777701376.0, "42185": 12777701376.0, "42190": 12777701376.0, "42195": 12777701376.0, "42200": 12777701376.0, "42205": 12777701376.0, "42210": 12777701376.0, "42215": 12777701376.0, "42220": 12777701376.0, "42225": 12777701376.0, "42230": 12777701376.0, "42235": 12777701376.0, "42240": 12777701376.0, "42245": 12777701376.0, "42250": 12777701376.0, "42255": 12777701376.0, "42260": 12777701376.0, "42265": 12777701376.0, "42270": 12777701376.0, "42275": 12777701376.0, "42280": 12777701376.0, "42285": 12777701376.0, "42290": 12777701376.0, "42295": 12777701376.0, "42300": 12777701376.0, "42305": 12777701376.0, "42310": 12777701376.0, "42315": 12777701376.0, "42320": 12777701376.0, "42325": 12777701376.0, "42330": 12777701376.0, "42335": 12777701376.0, "42340": 12777701376.0, "42345": 12777701376.0, "42350": 12777701376.0, "42355": 12777701376.0, "42360": 12777701376.0, "42365": 12777701376.0, "42370": 12777701376.0, "42375": 12777701376.0, "42380": 12777701376.0, "42385": 12777701376.0, "42390": 12777701376.0, "42395": 12777701376.0, "42400": 12777701376.0, "42405": 12777701376.0, "42410": 12777701376.0, "42415": 12777701376.0, "42420": 12777701376.0, "42425": 12777701376.0, "42430": 12777701376.0, "42435": 12777701376.0, "42440": 12777701376.0, "42445": 12777701376.0, "42450": 12777701376.0, "42455": 12777701376.0, "42460": 12777701376.0, "42465": 12777701376.0, "42470": 12777701376.0, "42475": 12777701376.0, "42480": 12777701376.0, "42485": 12777701376.0, "42490": 12777701376.0, "42495": 12777701376.0, "42500": 12777701376.0, "42505": 12777701376.0, "42510": 12777701376.0, "42515": 12777701376.0, "42520": 12777701376.0, "42525": 12777701376.0, "42530": 12777701376.0, "42535": 12777701376.0, "42540": 12777701376.0, "42545": 12777701376.0, "42550": 12777701376.0, "42555": 12777701376.0, "42560": 12777701376.0, "42565": 12777701376.0, "42570": 12777701376.0, "42575": 12777701376.0, "42580": 12777701376.0, "42585": 12777701376.0, "42590": 12777701376.0, "42595": 12777701376.0, "42600": 12777701376.0, "42605": 12777701376.0, "42610": 12777701376.0, "42615": 12777701376.0, "42620": 12777701376.0, "42625": 12777701376.0, "42630": 12777701376.0, "42635": 12777701376.0, "42640": 12777701376.0, "42645": 12777701376.0, "42650": 12777701376.0, "42655": 12777701376.0, "42660": 12777701376.0, "42665": 12777701376.0, "42670": 12777701376.0, "42675": 12777701376.0, "42680": 12777701376.0, "42685": 12777701376.0, "42690": 12777701376.0, "42695": 12777701376.0, "42700": 12777701376.0, "42705": 12777701376.0, "42710": 12777701376.0, "42715": 12777701376.0, "42720": 12777701376.0, "42725": 12777701376.0, "42730": 12777701376.0, "42735": 12777701376.0, "42740": 12777701376.0, "42745": 12777701376.0, "42750": 12777701376.0, "42755": 12777701376.0, "42760": 12777701376.0, "42765": 12777701376.0, "42770": 12777701376.0, "42775": 12777701376.0, "42780": 12777701376.0, "42785": 12777701376.0, "42790": 12777701376.0, "42795": 12777701376.0, "42800": 12777701376.0, "42805": 12777701376.0, "42810": 12777701376.0, "42815": 12777701376.0, "42820": 12777701376.0, "42825": 12777701376.0, "42830": 12777701376.0, "42835": 12777701376.0, "42840": 12777701376.0, "42845": 12777701376.0, "42850": 12777701376.0, "42855": 12777701376.0, "42860": 12777701376.0, "42865": 12777701376.0, "42870": 12777701376.0, "42875": 12777701376.0, "42880": 12777701376.0, "42885": 12777701376.0, "42890": 12777701376.0, "42895": 12777701376.0, "42900": 12777701376.0, "42905": 12777701376.0, "42910": 12777701376.0, "42915": 12777701376.0, "42920": 12777701376.0, "42925": 12777701376.0, "42930": 12777701376.0, "42935": 12777701376.0, "42940": 12777701376.0, "42945": 12777701376.0, "42950": 12777701376.0, "42955": 12777701376.0, "42960": 12777701376.0, "42965": 12777701376.0, "42970": 12777701376.0, "42975": 12777701376.0, "42980": 12777701376.0, "42985": 12777701376.0, "42990": 12777701376.0, "42995": 12777701376.0, "43000": 12777701376.0, "43005": 12777701376.0, "43010": 12777701376.0, "43015": 12777701376.0, "43020": 12777701376.0, "43025": 12777701376.0, "43030": 12777701376.0, "43035": 12777701376.0, "43040": 12777701376.0, "43045": 12777701376.0, "43050": 12777701376.0, "43055": 12777701376.0, "43060": 12777701376.0, "43065": 12777701376.0, "43070": 12777701376.0, "43075": 12777701376.0, "43080": 12777701376.0, "43085": 12777701376.0, "43090": 12777701376.0, "43095": 12777701376.0, "43100": 12777701376.0, "43105": 12777701376.0, "43110": 12777701376.0, "43115": 12777701376.0, "43120": 12777701376.0, "43125": 12777701376.0, "43130": 12777701376.0, "43135": 12777701376.0, "43140": 12777701376.0, "43145": 12777701376.0, "43150": 12777701376.0, "43155": 12777701376.0, "43160": 12777701376.0, "43165": 12777701376.0, "43170": 12777701376.0, "43175": 12777701376.0, "43180": 12777701376.0, "43185": 12777701376.0, "43190": 12777701376.0, "43195": 12777701376.0, "43200": 12777701376.0, "43205": 12777701376.0, "43210": 12777701376.0, "43215": 12777701376.0, "43220": 12777701376.0, "43225": 12777701376.0, "43230": 12777701376.0, "43235": 12777701376.0, "43240": 12777701376.0, "43245": 12777701376.0, "43250": 12777701376.0, "43255": 12777701376.0, "43260": 12777701376.0, "43265": 12777701376.0, "43270": 12777701376.0, "43275": 12777701376.0, "43280": 12777701376.0, "43285": 12777701376.0, "43290": 12777701376.0, "43295": 12777701376.0, "43300": 12777701376.0, "43305": 12777701376.0, "43310": 12777701376.0, "43315": 12777701376.0, "43320": 12777701376.0, "43325": 12777701376.0, "43330": 12777701376.0, "43335": 12777701376.0, "43340": 12777701376.0, "43345": 12777701376.0, "43350": 12777701376.0, "43355": 12777701376.0, "43360": 12777701376.0, "43365": 12777701376.0, "43370": 12777701376.0, "43375": 12777701376.0, "43380": 12777701376.0, "43385": 12777701376.0, "43390": 12777701376.0, "43395": 12777701376.0, "43400": 12777701376.0, "43405": 12777701376.0, "43410": 12777701376.0, "43415": 12777701376.0, "43420": 12777701376.0, "43425": 12777701376.0, "43430": 12777701376.0, "43435": 12777701376.0, "43440": 12777701376.0, "43445": 12777701376.0, "43450": 12777701376.0, "43455": 12777701376.0, "43460": 12777701376.0, "43465": 12777701376.0, "43470": 12777701376.0, "43475": 12777701376.0, "43480": 12777701376.0, "43485": 12777701376.0, "43490": 12777701376.0, "43495": 12777701376.0, "43500": 12777701376.0, "43505": 12777701376.0, "43510": 12777701376.0, "43515": 12777701376.0, "43520": 12777701376.0, "43525": 12777701376.0, "43530": 12777701376.0, "43535": 12777701376.0, "43540": 12777701376.0, "43545": 12777701376.0, "43550": 12777701376.0, "43555": 12777701376.0, "43560": 12777701376.0, "43565": 12777701376.0, "43570": 12777701376.0, "43575": 12777701376.0, "43580": 12777701376.0, "43585": 12777701376.0, "43590": 12777701376.0, "43595": 12777701376.0, "43600": 12777701376.0, "43605": 12777701376.0, "43610": 12777701376.0, "43615": 12777701376.0, "43620": 12777701376.0, "43625": 12777701376.0, "43630": 12777701376.0, "43635": 12777701376.0, "43640": 12777701376.0, "43645": 12777701376.0, "43650": 12777701376.0, "43655": 12777701376.0, "43660": 12777701376.0, "43665": 12777701376.0, "43670": 12777701376.0, "43675": 12777701376.0, "43680": 12777701376.0, "43685": 12777701376.0, "43690": 12777701376.0, "43695": 12777701376.0, "43700": 12777701376.0, "43705": 12777701376.0, "43710": 12777701376.0, "43715": 12777701376.0, "43720": 12777701376.0, "43725": 12777701376.0, "43730": 12777701376.0, "43735": 12777701376.0, "43740": 12777701376.0, "43745": 12777701376.0, "43750": 12777701376.0, "43755": 12777701376.0, "43760": 12777701376.0, "43765": 12777701376.0, "43770": 12777701376.0, "43775": 12777701376.0, "43780": 12777701376.0, "43785": 12777701376.0, "43790": 12777701376.0, "43795": 12777701376.0, "43800": 12777701376.0, "43805": 12777701376.0, "43810": 12777701376.0, "43815": 12777701376.0, "43820": 12777701376.0, "43825": 12777701376.0, "43830": 12777701376.0, "43835": 12777701376.0, "43840": 12777701376.0, "43845": 12777701376.0, "43850": 12777701376.0, "43855": 12777701376.0, "43860": 12777701376.0, "43865": 12777701376.0, "43870": 12777701376.0, "43875": 12777701376.0, "43880": 12777701376.0, "43885": 12777701376.0, "43890": 12777701376.0, "43895": 12777701376.0, "43900": 12777701376.0, "43905": 12777701376.0, "43910": 12777701376.0, "43915": 12777701376.0, "43920": 12777701376.0, "43925": 12777701376.0, "43930": 12777701376.0, "43935": 12777701376.0, "43940": 12777701376.0, "43945": 12777701376.0, "43950": 12777701376.0, "43955": 12777701376.0, "43960": 12777701376.0, "43965": 12777701376.0, "43970": 12777701376.0, "43975": 12777701376.0, "43980": 12777701376.0, "43985": 12777701376.0, "43990": 12777701376.0, "43995": 12777701376.0, "44000": 12777701376.0, "44005": 12777701376.0, "44010": 12777701376.0, "44015": 12777701376.0, "44020": 12777701376.0, "44025": 12777701376.0, "44030": 12777701376.0, "44035": 12777701376.0, "44040": 12777701376.0, "44045": 12777701376.0, "44050": 12777701376.0, "44055": 12777701376.0, "44060": 12777701376.0, "44065": 12777701376.0, "44070": 12777701376.0, "44075": 12777701376.0, "44080": 12777701376.0, "44085": 12777701376.0, "44090": 12777701376.0, "44095": 12777701376.0, "44100": 12777701376.0, "44105": 12777701376.0, "44110": 12777701376.0, "44115": 12777701376.0, "44120": 12777701376.0, "44125": 12777701376.0, "44130": 12777701376.0, "44135": 12777701376.0, "44140": 12777701376.0, "44145": 12777701376.0, "44150": 12777701376.0, "44155": 12777701376.0, "44160": 12777701376.0, "44165": 12777701376.0, "44170": 12777701376.0, "44175": 12777701376.0, "44180": 12777701376.0, "44185": 12777701376.0, "44190": 12777701376.0, "44195": 12777701376.0, "44200": 12777701376.0, "44205": 12777701376.0, "44210": 12777701376.0, "44215": 12777701376.0, "44220": 12777701376.0, "44225": 12777701376.0, "44230": 12777701376.0, "44235": 12777701376.0, "44240": 12777701376.0, "44245": 12777701376.0, "44250": 12777701376.0, "44255": 12777701376.0, "44260": 12777701376.0, "44265": 12777701376.0, "44270": 12777701376.0, "44275": 12777701376.0, "44280": 12777701376.0, "44285": 12777701376.0, "44290": 12777701376.0, "44295": 12777701376.0, "44300": 12777701376.0, "44305": 12777701376.0, "44310": 12777701376.0, "44315": 12777701376.0, "44320": 12777701376.0, "44325": 12777701376.0, "44330": 12777701376.0, "44335": 12777701376.0, "44340": 12777701376.0, "44345": 12777701376.0, "44350": 12777701376.0, "44355": 12777701376.0, "44360": 12777701376.0, "44365": 12777701376.0, "44370": 12777701376.0, "44375": 12777701376.0, "44380": 12777701376.0, "44385": 12777701376.0, "44390": 12777701376.0, "44395": 12777701376.0, "44400": 12777701376.0, "44405": 12777701376.0, "44410": 12777701376.0, "44415": 12777701376.0, "44420": 12777701376.0, "44425": 12777701376.0, "44430": 12777701376.0, "44435": 12777701376.0, "44440": 12777701376.0, "44445": 12777701376.0, "44450": 12777701376.0, "44455": 12777701376.0, "44460": 12777701376.0, "44465": 12777701376.0, "44470": 12777701376.0, "44475": 12777701376.0, "44480": 12777701376.0, "44485": 12777701376.0, "44490": 12777701376.0, "44495": 12777701376.0, "44500": 12777701376.0, "44505": 12777701376.0, "44510": 12777701376.0, "44515": 12777701376.0, "44520": 12777701376.0, "44525": 12777701376.0, "44530": 12777701376.0, "44535": 12777701376.0, "44540": 12777701376.0, "44545": 12777701376.0, "44550": 12777701376.0, "44555": 12777701376.0, "44560": 12777701376.0, "44565": 12777701376.0, "44570": 12777701376.0, "44575": 12777701376.0, "44580": 12777701376.0, "44585": 12777701376.0, "44590": 12777701376.0, "44595": 12777701376.0, "44600": 12777701376.0, "44605": 12777701376.0, "44610": 12777701376.0, "44615": 12777701376.0, "44620": 12777701376.0, "44625": 12777701376.0, "44630": 12777701376.0, "44635": 12777701376.0, "44640": 12777701376.0, "44645": 12777701376.0, "44650": 12777701376.0, "44655": 12777701376.0, "44660": 12777701376.0, "44665": 12777701376.0, "44670": 12777701376.0, "44675": 12777701376.0, "44680": 12777701376.0, "44685": 12777701376.0, "44690": 12777701376.0, "44695": 12777701376.0, "44700": 12777701376.0, "44705": 12777701376.0, "44710": 12777701376.0, "44715": 12777701376.0, "44720": 12777701376.0, "44725": 12777701376.0, "44730": 12777701376.0, "44735": 12777701376.0, "44740": 12777701376.0, "44745": 12777701376.0, "44750": 12777701376.0, "44755": 12777701376.0, "44760": 12777701376.0, "44765": 12777701376.0, "44770": 12777701376.0, "44775": 12777701376.0, "44780": 12777701376.0, "44785": 12777701376.0, "44790": 12777701376.0, "44795": 12777701376.0, "44800": 12777701376.0, "44805": 12777701376.0, "44810": 12777701376.0, "44815": 12777701376.0, "44820": 12777701376.0, "44825": 12777701376.0, "44830": 12777701376.0, "44835": 12777701376.0, "44840": 12777701376.0, "44845": 12777701376.0, "44850": 12777701376.0, "44855": 12777701376.0, "44860": 12777701376.0, "44865": 12777701376.0, "44870": 12777701376.0, "44875": 12777701376.0, "44880": 12777701376.0, "44885": 12777701376.0, "44890": 12777701376.0, "44895": 12777701376.0, "44900": 12777701376.0, "44905": 12777701376.0, "44910": 12777701376.0, "44915": 12777701376.0, "44920": 12777701376.0, "44925": 12777701376.0, "44930": 12777701376.0, "44935": 12777701376.0, "44940": 12777701376.0, "44945": 12777701376.0, "44950": 12777701376.0, "44955": 12777701376.0, "44960": 12777701376.0, "44965": 12777701376.0, "44970": 12777701376.0, "44975": 12777701376.0, "44980": 12777701376.0, "44985": 12777701376.0, "44990": 12777701376.0, "44995": 12777701376.0, "45000": 12777701376.0, "45005": 12777701376.0, "45010": 12777701376.0, "45015": 12777701376.0, "45020": 12777701376.0, "45025": 12777701376.0, "45030": 12777701376.0, "45035": 12777701376.0, "45040": 12777701376.0, "45045": 12777701376.0, "45050": 12777701376.0, "45055": 12777701376.0, "45060": 12777701376.0, "45065": 12777701376.0, "45070": 12777701376.0, "45075": 12777701376.0, "45080": 12777701376.0, "45085": 12777701376.0, "45090": 12777701376.0, "45095": 12777701376.0, "45100": 12777701376.0, "45105": 12777701376.0, "45110": 12777701376.0, "45115": 12777701376.0, "45120": 12777701376.0, "45125": 12777701376.0, "45130": 12777701376.0, "45135": 12777701376.0, "45140": 12777701376.0, "45145": 12777701376.0, "45150": 12777701376.0, "45155": 12777701376.0, "45160": 12777701376.0, "45165": 12777701376.0, "45170": 12777701376.0, "45175": 12777701376.0, "45180": 12777701376.0, "45185": 12777701376.0, "45190": 12777701376.0, "45195": 12777701376.0, "45200": 12777701376.0, "45205": 12777701376.0, "45210": 12777701376.0, "45215": 12777701376.0, "45220": 12777701376.0, "45225": 12777701376.0, "45230": 12777701376.0, "45235": 12777701376.0, "45240": 12777701376.0, "45245": 12777701376.0, "45250": 12777701376.0, "45255": 12777701376.0, "45260": 12777701376.0, "45265": 12777701376.0, "45270": 12777701376.0, "45275": 12777701376.0, "45280": 12777701376.0, "45285": 12777701376.0, "45290": 12777701376.0, "45295": 12777701376.0, "45300": 12777701376.0, "45305": 12777701376.0, "45310": 12777701376.0, "45315": 12777701376.0, "45320": 12777701376.0, "45325": 12777701376.0, "45330": 12777701376.0, "45335": 12777701376.0, "45340": 12777701376.0, "45345": 12777701376.0, "45350": 12777701376.0, "45355": 12777701376.0, "45360": 12777701376.0, "45365": 12777701376.0, "45370": 12777701376.0, "45375": 12777701376.0, "45380": 12777701376.0, "45385": 12777701376.0, "45390": 12777701376.0, "45395": 12777701376.0, "45400": 12777701376.0, "45405": 12777701376.0, "45410": 12777701376.0, "45415": 12777701376.0, "45420": 12777701376.0, "45425": 12777701376.0, "45430": 12777701376.0, "45435": 12777701376.0, "45440": 12777701376.0, "45445": 12777701376.0, "45450": 12777701376.0, "45455": 12777701376.0, "45460": 12777701376.0, "45465": 12777701376.0, "45470": 12777701376.0, "45475": 12777701376.0, "45480": 12777701376.0, "45485": 12777701376.0, "45490": 12777701376.0, "45495": 12777701376.0, "45500": 12777701376.0, "45505": 12777701376.0, "45510": 12777701376.0, "45515": 12777701376.0, "45520": 12777701376.0, "45525": 12777701376.0, "45530": 12777701376.0, "45535": 12777701376.0, "45540": 12777701376.0, "45545": 12777701376.0, "45550": 12777701376.0, "45555": 12777701376.0, "45560": 12777701376.0, "45565": 12777701376.0, "45570": 12777701376.0, "45575": 12777701376.0, "45580": 12777701376.0, "45585": 12777701376.0, "45590": 12777701376.0, "45595": 12777701376.0, "45600": 12777701376.0, "45605": 12777701376.0, "45610": 12777701376.0, "45615": 12777701376.0, "45620": 12777701376.0, "45625": 12777701376.0, "45630": 12777701376.0, "45635": 12777701376.0, "45640": 12777701376.0, "45645": 12777701376.0, "45650": 12777701376.0, "45655": 12777701376.0, "45660": 12777701376.0, "45665": 12777701376.0, "45670": 12777701376.0, "45675": 12777701376.0, "45680": 12777701376.0, "45685": 12777701376.0, "45690": 12777701376.0, "45695": 12777701376.0, "45700": 12777701376.0, "45705": 12777701376.0, "45710": 12777701376.0, "45715": 12777701376.0, "45720": 12777701376.0, "45725": 12777701376.0, "45730": 12777701376.0, "45735": 12777701376.0, "45740": 12777701376.0, "45745": 12777701376.0, "45750": 12777701376.0, "45755": 12777701376.0, "45760": 12777701376.0, "45765": 12777701376.0, "45770": 12777701376.0, "45775": 12777701376.0, "45780": 12777701376.0, "45785": 12777701376.0, "45790": 12777701376.0, "45795": 12777701376.0, "45800": 12777701376.0, "45805": 12777701376.0, "45810": 12777701376.0, "45815": 12777701376.0, "45820": 12777701376.0, "45825": 12777701376.0, "45830": 12777701376.0, "45835": 12777701376.0, "45840": 12777701376.0, "45845": 12777701376.0, "45850": 12777701376.0, "45855": 12777701376.0, "45860": 12777701376.0, "45865": 12777701376.0, "45870": 12777701376.0, "45875": 12777701376.0, "45880": 12777701376.0, "45885": 12777701376.0, "45890": 12777701376.0, "45895": 12777701376.0, "45900": 12777701376.0, "45905": 12777701376.0, "45910": 12777701376.0, "45915": 12777701376.0, "45920": 12777701376.0, "45925": 12777701376.0, "45930": 12777701376.0, "45935": 12777701376.0, "45940": 12777701376.0, "45945": 12777701376.0, "45950": 12777701376.0, "45955": 12777701376.0, "45960": 12777701376.0, "45965": 12777701376.0, "45970": 12777701376.0, "45975": 12777701376.0, "45980": 12777701376.0, "45985": 12777701376.0, "45990": 12777701376.0, "45995": 12777701376.0, "46000": 12777701376.0, "46005": 12777701376.0, "46010": 12777701376.0, "46015": 12777701376.0, "46020": 12777701376.0, "46025": 12777701376.0, "46030": 12777701376.0, "46035": 12777701376.0, "46040": 12777701376.0, "46045": 12777701376.0, "46050": 12777701376.0, "46055": 12777701376.0, "46060": 12777701376.0, "46065": 12777701376.0, "46070": 12777701376.0, "46075": 12777701376.0, "46080": 12777701376.0, "46085": 12777701376.0, "46090": 12777701376.0, "46095": 12777701376.0, "46100": 12777701376.0, "46105": 12777701376.0, "46110": 12777701376.0, "46115": 12777701376.0, "46120": 12777701376.0, "46125": 12777701376.0, "46130": 12777701376.0, "46135": 12777701376.0, "46140": 12777701376.0, "46145": 12777701376.0, "46150": 12777701376.0, "46155": 12777701376.0, "46160": 12777701376.0, "46165": 12777701376.0, "46170": 12777701376.0, "46175": 12777701376.0, "46180": 12777701376.0, "46185": 12777701376.0, "46190": 12777701376.0, "46195": 12777701376.0, "46200": 12777701376.0, "46205": 12777701376.0, "46210": 12777701376.0, "46215": 12777701376.0, "46220": 12777701376.0, "46225": 12777701376.0, "46230": 12777701376.0, "46235": 12777701376.0, "46240": 12777701376.0, "46245": 12777701376.0, "46250": 12777701376.0, "46255": 12777701376.0, "46260": 12777701376.0, "46265": 12777701376.0, "46270": 12777701376.0, "46275": 12777701376.0, "46280": 12777701376.0, "46285": 12777701376.0, "46290": 12777701376.0, "46295": 12777701376.0, "46300": 12777701376.0, "46305": 12777701376.0, "46310": 12777701376.0, "46315": 12777701376.0, "46320": 12777701376.0, "46325": 12777701376.0, "46330": 12777701376.0, "46335": 12777701376.0, "46340": 12777701376.0, "46345": 12777701376.0, "46350": 12777701376.0, "46355": 12777701376.0, "46360": 12777701376.0, "46365": 12777701376.0, "46370": 12777701376.0, "46375": 12777701376.0, "46380": 12777701376.0, "46385": 12777701376.0, "46390": 12777701376.0, "46395": 12777701376.0, "46400": 12777701376.0, "46405": 12777701376.0, "46410": 12777701376.0, "46415": 12777701376.0, "46420": 12777701376.0, "46425": 12777701376.0, "46430": 12777701376.0, "46435": 12777701376.0, "46440": 12777701376.0, "46445": 12777701376.0, "46450": 12777701376.0, "46455": 12777701376.0, "46460": 12777701376.0, "46465": 12777701376.0, "46470": 12777701376.0, "46475": 12777701376.0, "46480": 12777701376.0, "46485": 12777701376.0, "46490": 12777701376.0, "46495": 12777701376.0, "46500": 12777701376.0, "46505": 12777701376.0, "46510": 12777701376.0, "46515": 12777701376.0, "46520": 12777701376.0, "46525": 12777701376.0, "46530": 12777701376.0, "46535": 12777701376.0, "46540": 12777701376.0, "46545": 12777701376.0, "46550": 12777701376.0, "46555": 12777701376.0, "46560": 12777701376.0, "46565": 12777701376.0, "46570": 12777701376.0, "46575": 12777701376.0, "46580": 12777701376.0, "46585": 12777701376.0, "46590": 12777701376.0, "46595": 12777701376.0, "46600": 12777701376.0, "46605": 12777701376.0, "46610": 12777701376.0, "46615": 12777701376.0, "46620": 12777701376.0, "46625": 12777701376.0, "46630": 12777701376.0, "46635": 12777701376.0, "46640": 12777701376.0, "46645": 12777701376.0, "46650": 12777701376.0, "46655": 12777701376.0, "46660": 12777701376.0, "46665": 12777701376.0, "46670": 12777701376.0, "46675": 12777701376.0, "46680": 12777701376.0, "46685": 12777701376.0, "46690": 12777701376.0, "46695": 12777701376.0, "46700": 12777701376.0, "46705": 12777701376.0, "46710": 12777701376.0, "46715": 12777701376.0, "46720": 12777701376.0, "46725": 12777701376.0, "46730": 12777701376.0, "46735": 12777701376.0, "46740": 12777701376.0, "46745": 12777701376.0, "46750": 12777701376.0, "46755": 12777701376.0, "46760": 12777701376.0, "46765": 12777701376.0, "46770": 12777701376.0, "46775": 12777701376.0, "46780": 12777701376.0, "46785": 12777701376.0, "46790": 12777701376.0, "46795": 12777701376.0, "46800": 12777701376.0, "46805": 12777701376.0, "46810": 12777701376.0, "46815": 12777701376.0, "46820": 12777701376.0, "46825": 12777701376.0, "46830": 12777701376.0, "46835": 12777701376.0, "46840": 12777701376.0, "46845": 12777701376.0, "46850": 12777701376.0, "46855": 12777701376.0, "46860": 12777701376.0, "46865": 12777701376.0, "46870": 12777701376.0, "46875": 12777701376.0, "46880": 12777701376.0, "46885": 12777701376.0, "46890": 12777701376.0, "46895": 12777701376.0, "46900": 12777701376.0, "46905": 12777701376.0, "46910": 12777701376.0, "46915": 12777701376.0, "46920": 12777701376.0, "46925": 12777701376.0, "46930": 12777701376.0, "46935": 12777701376.0, "46940": 12777701376.0, "46945": 12777701376.0, "46950": 12777701376.0, "46955": 12777701376.0, "46960": 12777701376.0, "46965": 12777701376.0, "46970": 12777701376.0, "46975": 12777701376.0, "46980": 12777701376.0, "46985": 12777701376.0, "46990": 12777701376.0, "46995": 12777701376.0, "47000": 12777701376.0, "47005": 12777701376.0, "47010": 12777701376.0, "47015": 12777701376.0, "47020": 12777701376.0, "47025": 12777701376.0, "47030": 12777701376.0, "47035": 12777701376.0, "47040": 12777701376.0, "47045": 12777701376.0, "47050": 12777701376.0, "47055": 12777701376.0, "47060": 12777701376.0, "47065": 12777701376.0, "47070": 12777701376.0, "47075": 12777701376.0, "47080": 12777701376.0, "47085": 12777701376.0, "47090": 12777701376.0, "47095": 12777701376.0, "47100": 12777701376.0, "47105": 12777701376.0, "47110": 12777701376.0, "47115": 12777701376.0, "47120": 12777701376.0, "47125": 12777701376.0, "47130": 12777701376.0, "47135": 12777701376.0, "47140": 12777701376.0, "47145": 12777701376.0, "47150": 12777701376.0, "47155": 12777701376.0, "47160": 12777701376.0, "47165": 12777701376.0, "47170": 12777701376.0, "47175": 12777701376.0, "47180": 12777701376.0, "47185": 12777701376.0, "47190": 12777701376.0, "47195": 12777701376.0, "47200": 12777701376.0, "47205": 12777701376.0, "47210": 12777701376.0, "47215": 12777701376.0, "47220": 12777701376.0, "47225": 12777701376.0, "47230": 12777701376.0, "47235": 12777701376.0, "47240": 12777701376.0, "47245": 12777701376.0, "47250": 12777701376.0, "47255": 12777701376.0, "47260": 12777701376.0, "47265": 12777701376.0, "47270": 12777701376.0, "47275": 12777701376.0, "47280": 12777701376.0, "47285": 12777701376.0, "47290": 12777701376.0, "47295": 12777701376.0, "47300": 12777701376.0, "47305": 12777701376.0, "47310": 12777701376.0, "47315": 12777701376.0, "47320": 12777701376.0, "47325": 12777701376.0, "47330": 12777701376.0, "47335": 12777701376.0, "47340": 12777701376.0, "47345": 12777701376.0, "47350": 12777701376.0, "47355": 12777701376.0, "47360": 12777701376.0, "47365": 12777701376.0, "47370": 12777701376.0, "47375": 12777701376.0, "47380": 12777701376.0, "47385": 12777701376.0, "47390": 12777701376.0, "47395": 12777701376.0, "47400": 12777701376.0, "47405": 12777701376.0, "47410": 12777701376.0, "47415": 12777701376.0, "47420": 12777701376.0, "47425": 12777701376.0, "47430": 12777701376.0, "47435": 12777701376.0, "47440": 12777701376.0, "47445": 12777701376.0, "47450": 12777701376.0, "47455": 12777701376.0, "47460": 12777701376.0, "47465": 12777701376.0, "47470": 12777701376.0, "47475": 12777701376.0, "47480": 12777701376.0, "47485": 12777701376.0, "47490": 12777701376.0, "47495": 12777701376.0, "47500": 12777701376.0, "47505": 12777701376.0, "47510": 12777701376.0, "47515": 12777701376.0, "47520": 12777701376.0, "47525": 12777701376.0, "47530": 12777701376.0, "47535": 12777701376.0, "47540": 12777701376.0, "47545": 12777701376.0, "47550": 12777701376.0, "47555": 12777701376.0, "47560": 12777701376.0, "47565": 12777701376.0, "47570": 12777701376.0, "47575": 12777701376.0, "47580": 12777701376.0, "47585": 12777701376.0, "47590": 12777701376.0, "47595": 12777701376.0, "47600": 12777701376.0, "47605": 12777701376.0, "47610": 12777701376.0, "47615": 12777701376.0, "47620": 12777701376.0, "47625": 12777701376.0, "47630": 12777701376.0, "47635": 12777701376.0, "47640": 12777701376.0, "47645": 12777701376.0, "47650": 12777701376.0, "47655": 12777701376.0, "47660": 12777701376.0, "47665": 12777701376.0, "47670": 12777701376.0, "47675": 12777701376.0, "47680": 12777701376.0, "47685": 12777701376.0, "47690": 12777701376.0, "47695": 12777701376.0, "47700": 12777701376.0, "47705": 12777701376.0, "47710": 12777701376.0, "47715": 12777701376.0, "47720": 12777701376.0, "47725": 12777701376.0, "47730": 12777701376.0, "47735": 12777701376.0, "47740": 12777701376.0, "47745": 12777701376.0, "47750": 12777701376.0, "47755": 12777701376.0, "47760": 12777701376.0, "47765": 12777701376.0, "47770": 12777701376.0, "47775": 12777701376.0, "47780": 12777701376.0, "47785": 12777701376.0, "47790": 12777701376.0, "47795": 12777701376.0, "47800": 12777701376.0, "47805": 12777701376.0, "47810": 12777701376.0, "47815": 12777701376.0, "47820": 12777701376.0, "47825": 12777701376.0, "47830": 12777701376.0, "47835": 12777701376.0, "47840": 12777701376.0, "47845": 12777701376.0, "47850": 12777701376.0, "47855": 12777701376.0, "47860": 12777701376.0, "47865": 12777701376.0, "47870": 12777701376.0, "47875": 12777701376.0, "47880": 12777701376.0, "47885": 12777701376.0, "47890": 12777701376.0, "47895": 12777701376.0, "47900": 12777701376.0, "47905": 12777701376.0, "47910": 12777701376.0, "47915": 12777701376.0, "47920": 12777701376.0, "47925": 12777701376.0, "47930": 12777701376.0, "47935": 12777701376.0, "47940": 12777701376.0, "47945": 12777701376.0, "47950": 12777701376.0, "47955": 12777701376.0, "47960": 12777701376.0, "47965": 12777701376.0, "47970": 12777701376.0, "47975": 12777701376.0, "47980": 12777701376.0, "47985": 12777701376.0, "47990": 12777701376.0, "47995": 12777701376.0, "48000": 12777701376.0, "48005": 12777701376.0, "48010": 12777701376.0, "48015": 12777701376.0, "48020": 12777701376.0, "48025": 12777701376.0, "48030": 12777701376.0, "48035": 12777701376.0, "48040": 12777701376.0, "48045": 12777701376.0, "48050": 12777701376.0, "48055": 12777701376.0, "48060": 12777701376.0, "48065": 12777701376.0, "48070": 12777701376.0, "48075": 12777701376.0, "48080": 12777701376.0, "48085": 12777701376.0, "48090": 12777701376.0, "48095": 12777701376.0, "48100": 12777701376.0, "48105": 12777701376.0, "48110": 12777701376.0, "48115": 12777701376.0, "48120": 12777701376.0, "48125": 12777701376.0, "48130": 12777701376.0, "48135": 12777701376.0, "48140": 12777701376.0, "48145": 12777701376.0, "48150": 12777701376.0, "48155": 12777701376.0, "48160": 12777701376.0, "48165": 12777701376.0, "48170": 12777701376.0, "48175": 12777701376.0, "48180": 12777701376.0, "48185": 12777701376.0, "48190": 12777701376.0, "48195": 12777701376.0, "48200": 12777701376.0, "48205": 12777701376.0, "48210": 12777701376.0, "48215": 12777701376.0, "48220": 12777701376.0, "48225": 12777701376.0, "48230": 12777701376.0, "48235": 12777701376.0, "48240": 12777701376.0, "48245": 12777701376.0, "48250": 12777701376.0, "48255": 12777701376.0, "48260": 12777701376.0, "48265": 12777701376.0, "48270": 12777701376.0, "48275": 12777701376.0, "48280": 12777701376.0, "48285": 12777701376.0, "48290": 12777701376.0, "48295": 12777701376.0, "48300": 12777701376.0, "48305": 12777701376.0, "48310": 12777701376.0, "48315": 12777701376.0, "48320": 12777701376.0, "48325": 12777701376.0, "48330": 12777701376.0, "48335": 12777701376.0, "48340": 12777701376.0, "48345": 12777701376.0, "48350": 12777701376.0, "48355": 12777701376.0, "48360": 12777701376.0, "48365": 12777701376.0, "48370": 12777701376.0, "48375": 12777701376.0, "48380": 12777701376.0, "48385": 12777701376.0, "48390": 12777701376.0, "48395": 12777701376.0, "48400": 12777701376.0, "48405": 12777701376.0, "48410": 12777701376.0, "48415": 12777701376.0, "48420": 12777701376.0, "48425": 12777701376.0, "48430": 12777701376.0, "48435": 12777701376.0, "48440": 12777701376.0, "48445": 12777701376.0, "48450": 12777701376.0, "48455": 12777701376.0, "48460": 12777701376.0, "48465": 12777701376.0, "48470": 12777701376.0, "48475": 12777701376.0, "48480": 12777701376.0, "48485": 12777701376.0, "48490": 12777701376.0, "48495": 12777701376.0, "48500": 12777701376.0, "48505": 12777701376.0, "48510": 12777701376.0, "48515": 12777701376.0, "48520": 12777701376.0, "48525": 12777701376.0, "48530": 12777701376.0, "48535": 12777701376.0, "48540": 12777701376.0, "48545": 12777701376.0, "48550": 12777701376.0, "48555": 12777701376.0, "48560": 12777701376.0, "48565": 12777701376.0, "48570": 12777701376.0, "48575": 12777701376.0, "48580": 12777701376.0, "48585": 12777701376.0, "48590": 12777701376.0, "48595": 12777701376.0, "48600": 12777701376.0, "48605": 12777701376.0, "48610": 12777701376.0, "48615": 12777701376.0, "48620": 12777701376.0, "48625": 12777701376.0, "48630": 12777701376.0, "48635": 12777701376.0, "48640": 12777701376.0, "48645": 12777701376.0, "48650": 12777701376.0, "48655": 12777701376.0, "48660": 12777701376.0, "48665": 12777701376.0, "48670": 12777701376.0, "48675": 12777701376.0, "48680": 12777701376.0, "48685": 12777701376.0, "48690": 12777701376.0, "48695": 12777701376.0, "48700": 12777701376.0, "48705": 12777701376.0, "48710": 12777701376.0, "48715": 12777701376.0, "48720": 12777701376.0, "48725": 12777701376.0, "48730": 12777701376.0, "48735": 12777701376.0, "48740": 12777701376.0, "48745": 12777701376.0, "48750": 12777701376.0, "48755": 12777701376.0, "48760": 12777701376.0, "48765": 12777701376.0, "48770": 12777701376.0, "48775": 12777701376.0, "48780": 12777701376.0, "48785": 12777701376.0, "48790": 12777701376.0, "48795": 12777701376.0, "48800": 12777701376.0, "48805": 12777701376.0, "48810": 12777701376.0, "48815": 12777701376.0, "48820": 12777701376.0, "48825": 12777701376.0, "48830": 12777701376.0, "48835": 12777701376.0, "48840": 12777701376.0, "48845": 12777701376.0, "48850": 12777701376.0, "48855": 12777701376.0, "48860": 12777701376.0, "48865": 12777701376.0, "48870": 12777701376.0, "48875": 12777701376.0, "48880": 12777701376.0, "48885": 12777701376.0, "48890": 12777701376.0, "48895": 12777701376.0, "48900": 12777701376.0, "48905": 12777701376.0, "48910": 12777701376.0, "48915": 12777701376.0, "48920": 12777701376.0, "48925": 12777701376.0, "48930": 12777701376.0, "48935": 12777701376.0, "48940": 12777701376.0, "48945": 12777701376.0, "48950": 12777701376.0, "48955": 12777701376.0, "48960": 12777701376.0, "48965": 12777701376.0, "48970": 12777701376.0, "48975": 12777701376.0, "48980": 12777701376.0, "48985": 12777701376.0, "48990": 12777701376.0, "48995": 12777701376.0, "49000": 12777701376.0, "49005": 12777701376.0, "49010": 12777701376.0, "49015": 12777701376.0, "49020": 12777701376.0, "49025": 12777701376.0, "49030": 12777701376.0, "49035": 12777701376.0, "49040": 12777701376.0, "49045": 12777701376.0, "49050": 12777701376.0, "49055": 12777701376.0, "49060": 12777701376.0, "49065": 12777701376.0, "49070": 12777701376.0, "49075": 12777701376.0, "49080": 12777701376.0, "49085": 12777701376.0, "49090": 12777701376.0, "49095": 12777701376.0, "49100": 12777701376.0, "49105": 12777701376.0, "49110": 12777701376.0, "49115": 12777701376.0, "49120": 12777701376.0, "49125": 12777701376.0, "49130": 12777701376.0, "49135": 12777701376.0, "49140": 12777701376.0, "49145": 12777701376.0, "49150": 12777701376.0, "49155": 12777701376.0, "49160": 12777701376.0, "49165": 12777701376.0, "49170": 12777701376.0, "49175": 12777701376.0, "49180": 12777701376.0, "49185": 12777701376.0, "49190": 12777701376.0, "49195": 12777701376.0, "49200": 12777701376.0, "49205": 12777701376.0, "49210": 12777701376.0, "49215": 12777701376.0, "49220": 12777701376.0, "49225": 12777701376.0, "49230": 12777701376.0, "49235": 12777701376.0, "49240": 12777701376.0, "49245": 12777701376.0, "49250": 12777701376.0, "49255": 12777701376.0, "49260": 12777701376.0, "49265": 12777701376.0, "49270": 12777701376.0, "49275": 12777701376.0, "49280": 12777701376.0, "49285": 12777701376.0, "49290": 12777701376.0, "49295": 12777701376.0, "49300": 12777701376.0, "49305": 12777701376.0, "49310": 12777701376.0, "49315": 12777701376.0, "49320": 12777701376.0, "49325": 12777701376.0, "49330": 12777701376.0, "49335": 12777701376.0, "49340": 12777701376.0, "49345": 12777701376.0, "49350": 12777701376.0, "49355": 12777701376.0, "49360": 12777701376.0, "49365": 12777701376.0, "49370": 12777701376.0, "49375": 12777701376.0, "49380": 12777701376.0, "49385": 12777701376.0, "49390": 12777701376.0, "49395": 12777701376.0, "49400": 12777701376.0, "49405": 12777701376.0, "49410": 12777701376.0, "49415": 12777701376.0, "49420": 12777701376.0, "49425": 12777701376.0, "49430": 12777701376.0, "49435": 12777701376.0, "49440": 12777701376.0, "49445": 12777701376.0, "49450": 12777701376.0, "49455": 12777701376.0, "49460": 12777701376.0, "49465": 12777701376.0, "49470": 12777701376.0, "49475": 12777701376.0, "49480": 12777701376.0, "49485": 12777701376.0, "49490": 12777701376.0, "49495": 12777701376.0, "49500": 12777701376.0, "49505": 12777701376.0, "49510": 12777701376.0, "49515": 12777701376.0, "49520": 12777701376.0, "49525": 12777701376.0, "49530": 12777701376.0, "49535": 12777701376.0, "49540": 12777701376.0, "49545": 12777701376.0, "49550": 12777701376.0, "49555": 12777701376.0, "49560": 12777701376.0, "49565": 12777701376.0, "49570": 12777701376.0, "49575": 12777701376.0, "49580": 12777701376.0, "49585": 12777701376.0, "49590": 12777701376.0, "49595": 12777701376.0, "49600": 12777701376.0, "49605": 12777701376.0, "49610": 12777701376.0, "49615": 12777701376.0, "49620": 12777701376.0, "49625": 12777701376.0, "49630": 12777701376.0, "49635": 12777701376.0, "49640": 12777701376.0, "49645": 12777701376.0, "49650": 12777701376.0, "49655": 12777701376.0, "49660": 12777701376.0, "49665": 12777701376.0, "49670": 12777701376.0, "49675": 12777701376.0, "49680": 12777701376.0, "49685": 12777701376.0, "49690": 12777701376.0, "49695": 12777701376.0, "49700": 12777701376.0, "49705": 12777701376.0, "49710": 12777701376.0, "49715": 12777701376.0, "49720": 12777701376.0, "49725": 12777701376.0, "49730": 12777701376.0, "49735": 12777701376.0, "49740": 12777701376.0, "49745": 12777701376.0, "49750": 12777701376.0, "49755": 12777701376.0, "49760": 12777701376.0, "49765": 12777701376.0, "49770": 12777701376.0, "49775": 12777701376.0, "49780": 12777701376.0, "49785": 12777701376.0, "49790": 12777701376.0, "49795": 12777701376.0, "49800": 12777701376.0, "49805": 12777701376.0, "49810": 12777701376.0, "49815": 12777701376.0, "49820": 12777701376.0, "49825": 12777701376.0, "49830": 12777701376.0, "49835": 12777701376.0, "49840": 12777701376.0, "49845": 12777701376.0, "49850": 12777701376.0, "49855": 12777701376.0, "49860": 12777701376.0, "49865": 12777701376.0, "49870": 12777701376.0, "49875": 12777701376.0, "49880": 12777701376.0, "49885": 12777701376.0, "49890": 12777701376.0, "49895": 12777701376.0, "49900": 12777701376.0, "49905": 12777701376.0, "49910": 12777701376.0, "49915": 12777701376.0, "49920": 12777701376.0, "49925": 12777701376.0, "49930": 12777701376.0, "49935": 12777701376.0, "49940": 12777701376.0, "49945": 12777701376.0, "49950": 12777701376.0, "49955": 12777701376.0, "49960": 12777701376.0, "49965": 12777701376.0, "49970": 12777701376.0, "49975": 12777701376.0, "49980": 12777701376.0, "49985": 12777701376.0, "49990": 12777701376.0, "49995": 12777701376.0, "50000": 12777701376.0, "50005": 12777701376.0, "50010": 12777701376.0, "50015": 12777701376.0, "50020": 12777701376.0, "50025": 12777701376.0, "50030": 12777701376.0, "50035": 12777701376.0, "50040": 12777701376.0, "50045": 12777701376.0, "50050": 12777701376.0, "50055": 12777701376.0, "50060": 12777701376.0, "50065": 12777701376.0, "50070": 12777701376.0, "50075": 12777701376.0, "50080": 12777701376.0, "50085": 12777701376.0, "50090": 12777701376.0, "50095": 12777701376.0, "50100": 12777701376.0, "50105": 12777701376.0, "50110": 12777701376.0, "50115": 12777701376.0, "50120": 12777701376.0, "50125": 12777701376.0, "50130": 12777701376.0, "50135": 12777701376.0, "50140": 12777701376.0, "50145": 12777701376.0, "50150": 12777701376.0, "50155": 12777701376.0, "50160": 12777701376.0, "50165": 12777701376.0, "50170": 12777701376.0, "50175": 12777701376.0, "50180": 12777701376.0, "50185": 12777701376.0, "50190": 12777701376.0, "50195": 12777701376.0, "50200": 12777701376.0, "50205": 12777701376.0, "50210": 12777701376.0, "50215": 12777701376.0, "50220": 12777701376.0, "50225": 12777701376.0, "50230": 12777701376.0, "50235": 12777701376.0, "50240": 12777701376.0, "50245": 12777701376.0, "50250": 12777701376.0, "50255": 12777701376.0, "50260": 12777701376.0, "50265": 12777701376.0, "50270": 12777701376.0, "50275": 12777701376.0, "50280": 12777701376.0, "50285": 12777701376.0, "50290": 12777701376.0, "50295": 12777701376.0, "50300": 12777701376.0, "50305": 12777701376.0, "50310": 12777701376.0, "50315": 12777701376.0, "50320": 12777701376.0, "50325": 12777701376.0, "50330": 12777701376.0, "50335": 12777701376.0, "50340": 12777701376.0, "50345": 12777701376.0, "50350": 12777701376.0, "50355": 12777701376.0, "50360": 12777701376.0, "50365": 12777701376.0, "50370": 12777701376.0, "50375": 12777701376.0, "50380": 12777701376.0, "50385": 12777701376.0, "50390": 12777701376.0, "50395": 12777701376.0, "50400": 12777701376.0, "50405": 12777701376.0, "50410": 12777701376.0, "50415": 12777701376.0, "50420": 12777701376.0, "50425": 12777701376.0, "50430": 12777701376.0, "50435": 12777701376.0, "50440": 12777701376.0, "50445": 12777701376.0, "50450": 12777701376.0, "50455": 12777701376.0, "50460": 12777701376.0, "50465": 12777701376.0, "50470": 12777701376.0, "50475": 12777701376.0, "50480": 12777701376.0, "50485": 12777701376.0, "50490": 12777701376.0, "50495": 12777701376.0, "50500": 12777701376.0, "50505": 12777701376.0, "50510": 12777701376.0, "50515": 12777701376.0, "50520": 12777701376.0, "50525": 12777701376.0, "50530": 12777701376.0, "50535": 12777701376.0, "50540": 12777701376.0, "50545": 12777701376.0, "50550": 12777701376.0, "50555": 12777701376.0, "50560": 12777701376.0, "50565": 12777701376.0, "50570": 12777701376.0, "50575": 12777701376.0, "50580": 12777701376.0, "50585": 12777701376.0, "50590": 12777701376.0, "50595": 12777701376.0, "50600": 12777701376.0, "50605": 12777701376.0, "50610": 12777701376.0, "50615": 12777701376.0, "50620": 12777701376.0, "50625": 12777701376.0, "50630": 12777701376.0, "50635": 12777701376.0, "50640": 12777701376.0, "50645": 12777701376.0, "50650": 12777701376.0, "50655": 12777701376.0, "50660": 12777701376.0, "50665": 12777701376.0, "50670": 12777701376.0, "50675": 12777701376.0, "50680": 12777701376.0, "50685": 12777701376.0, "50690": 12777701376.0, "50695": 12777701376.0, "50700": 12777701376.0, "50705": 12777701376.0, "50710": 12777701376.0, "50715": 12777701376.0, "50720": 12777701376.0, "50725": 12777701376.0, "50730": 12777701376.0, "50735": 12777701376.0, "50740": 12777701376.0, "50745": 12777701376.0, "50750": 12777701376.0, "50755": 12777701376.0, "50760": 12777701376.0, "50765": 12777701376.0, "50770": 12777701376.0, "50775": 12777701376.0, "50780": 12777701376.0, "50785": 12777701376.0, "50790": 12777701376.0, "50795": 12777701376.0, "50800": 12777701376.0, "50805": 12777701376.0, "50810": 12777701376.0, "50815": 12777701376.0, "50820": 12777701376.0, "50825": 12777701376.0, "50830": 12777701376.0, "50835": 12777701376.0, "50840": 12777701376.0, "50845": 12777701376.0, "50850": 12777701376.0, "50855": 12777701376.0, "50860": 12777701376.0, "50865": "nan", "50870": "nan", "50875": "nan", "50880": "nan", "50885": "nan", "50890": "nan", "50895": "nan", "50900": "nan", "50905": "nan", "50910": "nan", "50915": "nan", "50920": "nan", "50925": "nan", "50930": "nan", "50935": "nan", "50940": "nan", "50945": "nan", "50950": "nan", "50955": "nan", "50960": "nan", "50965": "nan", "50970": "nan", "50975": "nan", "50980": "nan", "50985": "nan", "50990": "nan", "50995": "nan", "51000": "nan"}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 51000, "step_interval": 5, "values": {"1": 27167903744.0, "5": 27667937280.0, "10": 27667939328.0, "15": 27667939328.0, "20": 27667939328.0, "25": 27667939328.0, "30": 27667939328.0, "35": 27667939328.0, "40": 27667939328.0, "45": 27667939328.0, "50": 27667939328.0, "55": 27667939328.0, "60": 27667939328.0, "65": 27667939328.0, "70": 27667939328.0, "75": 27667939328.0, "80": 27667939328.0, "85": 27667939328.0, "90": 27667939328.0, "95": 27667939328.0, "100": 27667939328.0, "105": 27667939328.0, "110": 27667939328.0, "115": 27667939328.0, "120": 27667939328.0, "125": 27667939328.0, "130": 27667939328.0, "135": 27667939328.0, "140": 27667939328.0, "145": 27667939328.0, "150": 27667939328.0, "155": 27667939328.0, "160": 27667939328.0, "165": 27667939328.0, "170": 27667939328.0, "175": 27667939328.0, "180": 27667939328.0, "185": 27667939328.0, "190": 27667939328.0, "195": 27667939328.0, "200": 27667939328.0, "205": 27667939328.0, "210": 27667939328.0, "215": 27667939328.0, "220": 27667939328.0, "225": 27667939328.0, "230": 27667939328.0, "235": 27667939328.0, "240": 27667939328.0, "245": 27667939328.0, "250": 27667939328.0, "255": 27667939328.0, "260": 27667939328.0, "265": 27667939328.0, "270": 27667939328.0, "275": 27667939328.0, "280": 27667939328.0, "285": 27667939328.0, "290": 27667939328.0, "295": 27667939328.0, "300": 27667939328.0, "305": 27667939328.0, "310": 27667939328.0, "315": 27667939328.0, "320": 27667939328.0, "325": 27667939328.0, "330": 27667939328.0, "335": 27667939328.0, "340": 27667939328.0, "345": 27667939328.0, "350": 27667939328.0, "355": 27667939328.0, "360": 27667939328.0, "365": 27667939328.0, "370": 27667939328.0, "375": 27667939328.0, "380": 27667939328.0, "385": 27667939328.0, "390": 27667939328.0, "395": 27667939328.0, "400": 27667939328.0, "405": 27667939328.0, "410": 27667939328.0, "415": 27667939328.0, "420": 27667939328.0, "425": 27667939328.0, "430": 27667939328.0, "435": 27667939328.0, "440": 27667939328.0, "445": 27667939328.0, "450": 27667939328.0, "455": 27667939328.0, "460": 27667939328.0, "465": 27667939328.0, "470": 27667939328.0, "475": 27667939328.0, "480": 27667939328.0, "485": 27667939328.0, "490": 27667939328.0, "495": 27667939328.0, "500": 27667939328.0, "505": 27667939328.0, "510": 27667939328.0, "515": 27667939328.0, "520": 27667939328.0, "525": 27667939328.0, "530": 27667939328.0, "535": 27667939328.0, "540": 27667939328.0, "545": 27667939328.0, "550": 27667939328.0, "555": 27667939328.0, "560": 27667939328.0, "565": 27667939328.0, "570": 27667939328.0, "575": 27667939328.0, "580": 27667939328.0, "585": 27667939328.0, "590": 27667939328.0, "595": 27667939328.0, "600": 27667939328.0, "605": 27667939328.0, "610": 27667939328.0, "615": 27667939328.0, "620": 27667939328.0, "625": 27667939328.0, "630": 27667939328.0, "635": 27667939328.0, "640": 27667939328.0, "645": 27667939328.0, "650": 27667939328.0, "655": 27667939328.0, "660": 27667939328.0, "665": 27667939328.0, "670": 27667939328.0, "675": 27667939328.0, "680": 27667939328.0, "685": 27667939328.0, "690": 27667939328.0, "695": 27667939328.0, "700": 27667939328.0, "705": 27667939328.0, "710": 27667939328.0, "715": 27667939328.0, "720": 27667939328.0, "725": 27667939328.0, "730": 27667939328.0, "735": 27667939328.0, "740": 27667939328.0, "745": 27667939328.0, "750": 27667939328.0, "755": 27667939328.0, "760": 27667939328.0, "765": 27667939328.0, "770": 27667939328.0, "775": 27667939328.0, "780": 27667939328.0, "785": 27667939328.0, "790": 27667939328.0, "795": 27667939328.0, "800": 27667939328.0, "805": 27667939328.0, "810": 27667939328.0, "815": 27667939328.0, "820": 27667939328.0, "825": 27667939328.0, "830": 27667939328.0, "835": 27667939328.0, "840": 27667939328.0, "845": 27667939328.0, "850": 27667939328.0, "855": 27667939328.0, "860": 27667939328.0, "865": 27667939328.0, "870": 27667939328.0, "875": 27667939328.0, "880": 27667939328.0, "885": 27667939328.0, "890": 27667939328.0, "895": 27667939328.0, "900": 27667939328.0, "905": 27667939328.0, "910": 27667939328.0, "915": 27667939328.0, "920": 27667939328.0, "925": 27667939328.0, "930": 27667939328.0, "935": 27667939328.0, "940": 27667939328.0, "945": 27667939328.0, "950": 27667939328.0, "955": 27667939328.0, "960": 27667939328.0, "965": 27667939328.0, "970": 27667939328.0, "975": 27667939328.0, "980": 27667939328.0, "985": 27667939328.0, "990": 27667939328.0, "995": 27667939328.0, "1000": 27667939328.0, "1005": 27667939328.0, "1010": 27667939328.0, "1015": 27667939328.0, "1020": 27667939328.0, "1025": 27667939328.0, "1030": 27667939328.0, "1035": 27667939328.0, "1040": 27667939328.0, "1045": 27667939328.0, "1050": 27667939328.0, "1055": 27667939328.0, "1060": 27667939328.0, "1065": 27667939328.0, "1070": 27667939328.0, "1075": 27667939328.0, "1080": 27667939328.0, "1085": 27667939328.0, "1090": 27667939328.0, "1095": 27667939328.0, "1100": 27667939328.0, "1105": 27667939328.0, "1110": 27667939328.0, "1115": 27667939328.0, "1120": 27667939328.0, "1125": 27667939328.0, "1130": 27667939328.0, "1135": 27667939328.0, "1140": 27667939328.0, "1145": 27667939328.0, "1150": 27667939328.0, "1155": 27667939328.0, "1160": 27667939328.0, "1165": 27667939328.0, "1170": 27667939328.0, "1175": 27667939328.0, "1180": 27667939328.0, "1185": 27667939328.0, "1190": 27667939328.0, "1195": 27667939328.0, "1200": 27667939328.0, "1205": 27667939328.0, "1210": 27667939328.0, "1215": 27667939328.0, "1220": 27667939328.0, "1225": 27667939328.0, "1230": 27667939328.0, "1235": 27667939328.0, "1240": 27667939328.0, "1245": 27667939328.0, "1250": 27667939328.0, "1255": 27667939328.0, "1260": 27667939328.0, "1265": 27667939328.0, "1270": 27667939328.0, "1275": 27667939328.0, "1280": 27667939328.0, "1285": 27667939328.0, "1290": 27667939328.0, "1295": 27667939328.0, "1300": 27667939328.0, "1305": 27667939328.0, "1310": 27667939328.0, "1315": 27667939328.0, "1320": 27667939328.0, "1325": 27667939328.0, "1330": 27667939328.0, "1335": 27667939328.0, "1340": 27667939328.0, "1345": 27667939328.0, "1350": 27667939328.0, "1355": 27667939328.0, "1360": 27667939328.0, "1365": 27667939328.0, "1370": 27667939328.0, "1375": 27667939328.0, "1380": 27667939328.0, "1385": 27667939328.0, "1390": 27667939328.0, "1395": 27667939328.0, "1400": 27667939328.0, "1405": 27667939328.0, "1410": 27667939328.0, "1415": 27667939328.0, "1420": 27667939328.0, "1425": 27667939328.0, "1430": 27667939328.0, "1435": 27667939328.0, "1440": 27667939328.0, "1445": 27667939328.0, "1450": 27667939328.0, "1455": 27667939328.0, "1460": 27667939328.0, "1465": 27667939328.0, "1470": 27667939328.0, "1475": 27667939328.0, "1480": 27667939328.0, "1485": 27667939328.0, "1490": 27667939328.0, "1495": 27667939328.0, "1500": 27667939328.0, "1505": 27667939328.0, "1510": 27667939328.0, "1515": 27667939328.0, "1520": 27667939328.0, "1525": 27667939328.0, "1530": 27667939328.0, "1535": 27667939328.0, "1540": 27667939328.0, "1545": 27667939328.0, "1550": 27667939328.0, "1555": 27667939328.0, "1560": 27667939328.0, "1565": 27667939328.0, "1570": 27667939328.0, "1575": 27667939328.0, "1580": 27667939328.0, "1585": 27667939328.0, "1590": 27667939328.0, "1595": 27667939328.0, "1600": 27667939328.0, "1605": 27667939328.0, "1610": 27667939328.0, "1615": 27667939328.0, "1620": 27667939328.0, "1625": 27667939328.0, "1630": 27667939328.0, "1635": 27667939328.0, "1640": 27667939328.0, "1645": 27667939328.0, "1650": 27667939328.0, "1655": 27667939328.0, "1660": 27667939328.0, "1665": 27667939328.0, "1670": 27667939328.0, "1675": 27667939328.0, "1680": 27667939328.0, "1685": 27667939328.0, "1690": 27667939328.0, "1695": 27667939328.0, "1700": 27667939328.0, "1705": 27667939328.0, "1710": 27667939328.0, "1715": 27667939328.0, "1720": 27667939328.0, "1725": 27667939328.0, "1730": 27667939328.0, "1735": 27667939328.0, "1740": 27667939328.0, "1745": 27667939328.0, "1750": 27667939328.0, "1755": 27667939328.0, "1760": 27667939328.0, "1765": 27667939328.0, "1770": 27667939328.0, "1775": 27667939328.0, "1780": 27667939328.0, "1785": 27667939328.0, "1790": 27667939328.0, "1795": 27667939328.0, "1800": 27667939328.0, "1805": 27667939328.0, "1810": 27667939328.0, "1815": 27667939328.0, "1820": 27667939328.0, "1825": 27667939328.0, "1830": 27667939328.0, "1835": 27667939328.0, "1840": 27667939328.0, "1845": 27667939328.0, "1850": 27667939328.0, "1855": 27667939328.0, "1860": 27667939328.0, "1865": 27667939328.0, "1870": 27667939328.0, "1875": 27667939328.0, "1880": 27667939328.0, "1885": 27667939328.0, "1890": 27667939328.0, "1895": 27667939328.0, "1900": 27667939328.0, "1905": 27667939328.0, "1910": 27667939328.0, "1915": 27667939328.0, "1920": 27667939328.0, "1925": 27667939328.0, "1930": 27667939328.0, "1935": 27667939328.0, "1940": 27667939328.0, "1945": 27667939328.0, "1950": 27667939328.0, "1955": 27667939328.0, "1960": 27667939328.0, "1965": 27667939328.0, "1970": 27667939328.0, "1975": 27667939328.0, "1980": 27667939328.0, "1985": 27667939328.0, "1990": 27667939328.0, "1995": 27667939328.0, "2000": 27667939328.0, "2005": 27667939328.0, "2010": 27667939328.0, "2015": 27667939328.0, "2020": 27667939328.0, "2025": 27667939328.0, "2030": 27667939328.0, "2035": 27667939328.0, "2040": 27667939328.0, "2045": 27667939328.0, "2050": 27667939328.0, "2055": 27667939328.0, "2060": 27667939328.0, "2065": 27667939328.0, "2070": 27667939328.0, "2075": 27667939328.0, "2080": 27667939328.0, "2085": 27667939328.0, "2090": 27667939328.0, "2095": 27667939328.0, "2100": 27667939328.0, "2105": 27667939328.0, "2110": 27667939328.0, "2115": 27667939328.0, "2120": 27667939328.0, "2125": 27667939328.0, "2130": 27667939328.0, "2135": 27667939328.0, "2140": 27667939328.0, "2145": 27667939328.0, "2150": 27667939328.0, "2155": 27667939328.0, "2160": 27667939328.0, "2165": 27667939328.0, "2170": 27667939328.0, "2175": 27667939328.0, "2180": 27667939328.0, "2185": 27667939328.0, "2190": 27667939328.0, "2195": 27667939328.0, "2200": 27667939328.0, "2205": 27667939328.0, "2210": 27667939328.0, "2215": 27667939328.0, "2220": 27667939328.0, "2225": 27667939328.0, "2230": 27667939328.0, "2235": 27667939328.0, "2240": 27667939328.0, "2245": 27667939328.0, "2250": 27667939328.0, "2255": 27667939328.0, "2260": 27667939328.0, "2265": 27667939328.0, "2270": 27667939328.0, "2275": 27667939328.0, "2280": 27667939328.0, "2285": 27667939328.0, "2290": 27667939328.0, "2295": 27667939328.0, "2300": 27667939328.0, "2305": 27667939328.0, "2310": 27667939328.0, "2315": 27667939328.0, "2320": 27667939328.0, "2325": 27667939328.0, "2330": 27667939328.0, "2335": 27667939328.0, "2340": 27667939328.0, "2345": 27667939328.0, "2350": 27667939328.0, "2355": 27667939328.0, "2360": 27667939328.0, "2365": 27667939328.0, "2370": 27667939328.0, "2375": 27667939328.0, "2380": 27667939328.0, "2385": 27667939328.0, "2390": 27667939328.0, "2395": 27667939328.0, "2400": 27667939328.0, "2405": 27667939328.0, "2410": 27667939328.0, "2415": 27667939328.0, "2420": 27667939328.0, "2425": 27667939328.0, "2430": 27667939328.0, "2435": 27667939328.0, "2440": 27667939328.0, "2445": 27667939328.0, "2450": 27667939328.0, "2455": 27667939328.0, "2460": 27667939328.0, "2465": 27667939328.0, "2470": 27667939328.0, "2475": 27667939328.0, "2480": 27667939328.0, "2485": 27667939328.0, "2490": 27667939328.0, "2495": 27667939328.0, "2500": 27667939328.0, "2505": 27667939328.0, "2510": 27667939328.0, "2515": 27667939328.0, "2520": 27667939328.0, "2525": 27667939328.0, "2530": 27667939328.0, "2535": 27667939328.0, "2540": 27667939328.0, "2545": 27667939328.0, "2550": 27667939328.0, "2555": 27667939328.0, "2560": 27667939328.0, "2565": 27667939328.0, "2570": 27667939328.0, "2575": 27667939328.0, "2580": 27667939328.0, "2585": 27667939328.0, "2590": 27667939328.0, "2595": 27667939328.0, "2600": 27667939328.0, "2605": 27667939328.0, "2610": 27667939328.0, "2615": 27667939328.0, "2620": 27667939328.0, "2625": 27667939328.0, "2630": 27667939328.0, "2635": 27667939328.0, "2640": 27667939328.0, "2645": 27667939328.0, "2650": 27667939328.0, "2655": 27667939328.0, "2660": 27667939328.0, "2665": 27667939328.0, "2670": 27667939328.0, "2675": 27667939328.0, "2680": 27667939328.0, "2685": 27667939328.0, "2690": 27667939328.0, "2695": 27667939328.0, "2700": 27667939328.0, "2705": 27667939328.0, "2710": 27667939328.0, "2715": 27667939328.0, "2720": 27667939328.0, "2725": 27667939328.0, "2730": 27667939328.0, "2735": 27667939328.0, "2740": 27667939328.0, "2745": 27667939328.0, "2750": 27667939328.0, "2755": 27667939328.0, "2760": 27667939328.0, "2765": 27667939328.0, "2770": 27667939328.0, "2775": 27667939328.0, "2780": 27667939328.0, "2785": 27667939328.0, "2790": 27667939328.0, "2795": 27667939328.0, "2800": 27667939328.0, "2805": 27667939328.0, "2810": 27667939328.0, "2815": 27667939328.0, "2820": 27667939328.0, "2825": 27667939328.0, "2830": 27667939328.0, "2835": 27667939328.0, "2840": 27667939328.0, "2845": 27667939328.0, "2850": 27667939328.0, "2855": 27667939328.0, "2860": 27667939328.0, "2865": 27667939328.0, "2870": 27667939328.0, "2875": 27667939328.0, "2880": 27667939328.0, "2885": 27667939328.0, "2890": 27667939328.0, "2895": 27667939328.0, "2900": 27667939328.0, "2905": 27667939328.0, "2910": 27667939328.0, "2915": 27667939328.0, "2920": 27667939328.0, "2925": 27667939328.0, "2930": 27667939328.0, "2935": 27667939328.0, "2940": 27667939328.0, "2945": 27667939328.0, "2950": 27667939328.0, "2955": 27667939328.0, "2960": 27667939328.0, "2965": 27667939328.0, "2970": 27667939328.0, "2975": 27667939328.0, "2980": 27667939328.0, "2985": 27667939328.0, "2990": 27667939328.0, "2995": 27667939328.0, "3000": 27667939328.0, "3005": 27667939328.0, "3010": 27667939328.0, "3015": 27667939328.0, "3020": 27667939328.0, "3025": 27667939328.0, "3030": 27667939328.0, "3035": 27667939328.0, "3040": 27667939328.0, "3045": 27667939328.0, "3050": 27667939328.0, "3055": 27667939328.0, "3060": 27667939328.0, "3065": 27667939328.0, "3070": 27667939328.0, "3075": 27667939328.0, "3080": 27667939328.0, "3085": 27667939328.0, "3090": 27667939328.0, "3095": 27667939328.0, "3100": 27667939328.0, "3105": 27667939328.0, "3110": 27667939328.0, "3115": 27667939328.0, "3120": 27667939328.0, "3125": 27667939328.0, "3130": 27667939328.0, "3135": 27667939328.0, "3140": 27667939328.0, "3145": 27667939328.0, "3150": 27667939328.0, "3155": 27667939328.0, "3160": 27667939328.0, "3165": 27667939328.0, "3170": 27667939328.0, "3175": 27667939328.0, "3180": 27667939328.0, "3185": 27667939328.0, "3190": 27667939328.0, "3195": 27667939328.0, "3200": 27667939328.0, "3205": 27667939328.0, "3210": 27667939328.0, "3215": 27667939328.0, "3220": 27667939328.0, "3225": 27667939328.0, "3230": 27667939328.0, "3235": 27667939328.0, "3240": 27667939328.0, "3245": 27667939328.0, "3250": 27667939328.0, "3255": 27667939328.0, "3260": 27667939328.0, "3265": 27667939328.0, "3270": 27667939328.0, "3275": 27667939328.0, "3280": 27667939328.0, "3285": 27667939328.0, "3290": 27667939328.0, "3295": 27667939328.0, "3300": 27667939328.0, "3305": 27667939328.0, "3310": 27667939328.0, "3315": 27667939328.0, "3320": 27667939328.0, "3325": 27667939328.0, "3330": 27667939328.0, "3335": 27667939328.0, "3340": 27667939328.0, "3345": 27667939328.0, "3350": 27667939328.0, "3355": 27667939328.0, "3360": 27667939328.0, "3365": 27667939328.0, "3370": 27667939328.0, "3375": 27667939328.0, "3380": 27667939328.0, "3385": 27667939328.0, "3390": 27667939328.0, "3395": 27667939328.0, "3400": 27667939328.0, "3405": 27667939328.0, "3410": 27667939328.0, "3415": 27667939328.0, "3420": 27667939328.0, "3425": 27667939328.0, "3430": 27667939328.0, "3435": 27667939328.0, "3440": 27667939328.0, "3445": 27667939328.0, "3450": 27667939328.0, "3455": 27667939328.0, "3460": 27667939328.0, "3465": 27667939328.0, "3470": 27667939328.0, "3475": 27667939328.0, "3480": 27667939328.0, "3485": 27667939328.0, "3490": 27667939328.0, "3495": 27667939328.0, "3500": 27667939328.0, "3505": 27667939328.0, "3510": 27667939328.0, "3515": 27667939328.0, "3520": 27667939328.0, "3525": 27667939328.0, "3530": 27667939328.0, "3535": 27667939328.0, "3540": 27667939328.0, "3545": 27667939328.0, "3550": 27667939328.0, "3555": 27667939328.0, "3560": 27667939328.0, "3565": 27667939328.0, "3570": 27667939328.0, "3575": 27667939328.0, "3580": 27667939328.0, "3585": 27667939328.0, "3590": 27667939328.0, "3595": 27667939328.0, "3600": 27667939328.0, "3605": 27667939328.0, "3610": 27667939328.0, "3615": 27667939328.0, "3620": 27667939328.0, "3625": 27667939328.0, "3630": 27667939328.0, "3635": 27667939328.0, "3640": 27667939328.0, "3645": 27667939328.0, "3650": 27667939328.0, "3655": 27667939328.0, "3660": 27667939328.0, "3665": 27667939328.0, "3670": 27667939328.0, "3675": 27667939328.0, "3680": 27667939328.0, "3685": 27667939328.0, "3690": 27667939328.0, "3695": 27667939328.0, "3700": 27667939328.0, "3705": 27667939328.0, "3710": 27667939328.0, "3715": 27667939328.0, "3720": 27667939328.0, "3725": 27667939328.0, "3730": 27667939328.0, "3735": 27667939328.0, "3740": 27667939328.0, "3745": 27667939328.0, "3750": 27667939328.0, "3755": 27667939328.0, "3760": 27667939328.0, "3765": 27667939328.0, "3770": 27667939328.0, "3775": 27667939328.0, "3780": 27667939328.0, "3785": 27667939328.0, "3790": 27667939328.0, "3795": 27667939328.0, "3800": 27667939328.0, "3805": 27667939328.0, "3810": 27667939328.0, "3815": 27667939328.0, "3820": 27667939328.0, "3825": 27667939328.0, "3830": 27667939328.0, "3835": 27667939328.0, "3840": 27667939328.0, "3845": 27667939328.0, "3850": 27667939328.0, "3855": 27667939328.0, "3860": 27667939328.0, "3865": 27667939328.0, "3870": 27667939328.0, "3875": 27667939328.0, "3880": 27667939328.0, "3885": 27667939328.0, "3890": 27667939328.0, "3895": 27667939328.0, "3900": 27667939328.0, "3905": 27667939328.0, "3910": 27667939328.0, "3915": 27667939328.0, "3920": 27667939328.0, "3925": 27667939328.0, "3930": 27667939328.0, "3935": 27667939328.0, "3940": 27667939328.0, "3945": 27667939328.0, "3950": 27667939328.0, "3955": 27667939328.0, "3960": 27667939328.0, "3965": 27667939328.0, "3970": 27667939328.0, "3975": 27667939328.0, "3980": 27667939328.0, "3985": 27667939328.0, "3990": 27667939328.0, "3995": 27667939328.0, "4000": 27667939328.0, "4005": 27667939328.0, "4010": 27667939328.0, "4015": 27667939328.0, "4020": 27667939328.0, "4025": 27667939328.0, "4030": 27667939328.0, "4035": 27667939328.0, "4040": 27667939328.0, "4045": 27667939328.0, "4050": 27667939328.0, "4055": 27667939328.0, "4060": 27667939328.0, "4065": 27667939328.0, "4070": 27667939328.0, "4075": 27667939328.0, "4080": 27667939328.0, "4085": 27667939328.0, "4090": 27667939328.0, "4095": 27667939328.0, "4100": 27667939328.0, "4105": 27667939328.0, "4110": 27667939328.0, "4115": 27667939328.0, "4120": 27667939328.0, "4125": 27667939328.0, "4130": 27667939328.0, "4135": 27667939328.0, "4140": 27667939328.0, "4145": 27667939328.0, "4150": 27667939328.0, "4155": 27667939328.0, "4160": 27667939328.0, "4165": 27667939328.0, "4170": 27667939328.0, "4175": 27667939328.0, "4180": 27667939328.0, "4185": 27667939328.0, "4190": 27667939328.0, "4195": 27667939328.0, "4200": 27667939328.0, "4205": 27667939328.0, "4210": 27667939328.0, "4215": 27667939328.0, "4220": 27667939328.0, "4225": 27667939328.0, "4230": 27667939328.0, "4235": 27667939328.0, "4240": 27667939328.0, "4245": 27667939328.0, "4250": 27667939328.0, "4255": 27667939328.0, "4260": 27667939328.0, "4265": 27667939328.0, "4270": 27667939328.0, "4275": 27667939328.0, "4280": 27667939328.0, "4285": 27667939328.0, "4290": 27667939328.0, "4295": 27667939328.0, "4300": 27667939328.0, "4305": 27667939328.0, "4310": 27667939328.0, "4315": 27667939328.0, "4320": 27667939328.0, "4325": 27667939328.0, "4330": 27667939328.0, "4335": 27667939328.0, "4340": 27667939328.0, "4345": 27667939328.0, "4350": 27667939328.0, "4355": 27667939328.0, "4360": 27667939328.0, "4365": 27667939328.0, "4370": 27667939328.0, "4375": 27667939328.0, "4380": 27667939328.0, "4385": 27667939328.0, "4390": 27667939328.0, "4395": 27667939328.0, "4400": 27667939328.0, "4405": 27667939328.0, "4410": 27667939328.0, "4415": 27667939328.0, "4420": 27667939328.0, "4425": 27667939328.0, "4430": 27667939328.0, "4435": 27667939328.0, "4440": 27667939328.0, "4445": 27667939328.0, "4450": 27667939328.0, "4455": 27667939328.0, "4460": 27667939328.0, "4465": 27667939328.0, "4470": 27667939328.0, "4475": 27667939328.0, "4480": 27667939328.0, "4485": 27667939328.0, "4490": 27667939328.0, "4495": 27667939328.0, "4500": 27667939328.0, "4505": 27667939328.0, "4510": 27667939328.0, "4515": 27667939328.0, "4520": 27667939328.0, "4525": 27667939328.0, "4530": 27667939328.0, "4535": 27667939328.0, "4540": 27667939328.0, "4545": 27667939328.0, "4550": 27667939328.0, "4555": 27667939328.0, "4560": 27667939328.0, "4565": 27667939328.0, "4570": 27667939328.0, "4575": 27667939328.0, "4580": 27667939328.0, "4585": 27667939328.0, "4590": 27667939328.0, "4595": 27667939328.0, "4600": 27667939328.0, "4605": 27667939328.0, "4610": 27667939328.0, "4615": 27667939328.0, "4620": 27667939328.0, "4625": 27667939328.0, "4630": 27667939328.0, "4635": 27667939328.0, "4640": 27667939328.0, "4645": 27667939328.0, "4650": 27667939328.0, "4655": 27667939328.0, "4660": 27667939328.0, "4665": 27667939328.0, "4670": 27667939328.0, "4675": 27667939328.0, "4680": 27667939328.0, "4685": 27667939328.0, "4690": 27667939328.0, "4695": 27667939328.0, "4700": 27667939328.0, "4705": 27667939328.0, "4710": 27667939328.0, "4715": 27667939328.0, "4720": 27667939328.0, "4725": 27667939328.0, "4730": 27667939328.0, "4735": 27667939328.0, "4740": 27667939328.0, "4745": 27667939328.0, "4750": 27667939328.0, "4755": 27667939328.0, "4760": 27667939328.0, "4765": 27667939328.0, "4770": 27667939328.0, "4775": 27667939328.0, "4780": 27667939328.0, "4785": 27667939328.0, "4790": 27667939328.0, "4795": 27667939328.0, "4800": 27667939328.0, "4805": 27667939328.0, "4810": 27667939328.0, "4815": 27667939328.0, "4820": 27667939328.0, "4825": 27667939328.0, "4830": 27667939328.0, "4835": 27667939328.0, "4840": 27667939328.0, "4845": 27667939328.0, "4850": 27667939328.0, "4855": 27667939328.0, "4860": 27667939328.0, "4865": 27667939328.0, "4870": 27667939328.0, "4875": 27667939328.0, "4880": 27667939328.0, "4885": 27667939328.0, "4890": 27667939328.0, "4895": 27667939328.0, "4900": 27667939328.0, "4905": 27667939328.0, "4910": 27667939328.0, "4915": 27667939328.0, "4920": 27667939328.0, "4925": 27667939328.0, "4930": 27667939328.0, "4935": 27667939328.0, "4940": 27667939328.0, "4945": 27667939328.0, "4950": 27667939328.0, "4955": 27667939328.0, "4960": 27667939328.0, "4965": 27667939328.0, "4970": 27667939328.0, "4975": 27667939328.0, "4980": 27667939328.0, "4985": 27667939328.0, "4990": 27667939328.0, "4995": 27667939328.0, "5000": 27667939328.0, "5005": 27667939328.0, "5010": 27667939328.0, "5015": 27667939328.0, "5020": 27667939328.0, "5025": 27667939328.0, "5030": 27667939328.0, "5035": 27667939328.0, "5040": 27667939328.0, "5045": 27667939328.0, "5050": 27667939328.0, "5055": 27667939328.0, "5060": 27667939328.0, "5065": 27667939328.0, "5070": 27667939328.0, "5075": 27667939328.0, "5080": 27667939328.0, "5085": 27667939328.0, "5090": 27667939328.0, "5095": 27667939328.0, "5100": 27667939328.0, "5105": 27667939328.0, "5110": 27667939328.0, "5115": 27667939328.0, "5120": 27667939328.0, "5125": 27667939328.0, "5130": 27667939328.0, "5135": 27667939328.0, "5140": 27667939328.0, "5145": 27667939328.0, "5150": 27667939328.0, "5155": 27667939328.0, "5160": 27667939328.0, "5165": 27667939328.0, "5170": 27667939328.0, "5175": 27667939328.0, "5180": 27667939328.0, "5185": 27667939328.0, "5190": 27667939328.0, "5195": 27667939328.0, "5200": 27667939328.0, "5205": 27667939328.0, "5210": 27667939328.0, "5215": 27667939328.0, "5220": 27667939328.0, "5225": 27667939328.0, "5230": 27667939328.0, "5235": 27667939328.0, "5240": 27667939328.0, "5245": 27667939328.0, "5250": 27667939328.0, "5255": 27667939328.0, "5260": 27667939328.0, "5265": 27667939328.0, "5270": 27667939328.0, "5275": 27667939328.0, "5280": 27667939328.0, "5285": 27667939328.0, "5290": 27667939328.0, "5295": 27667939328.0, "5300": 27667939328.0, "5305": 27667939328.0, "5310": 27667939328.0, "5315": 27667939328.0, "5320": 27667939328.0, "5325": 27667939328.0, "5330": 27667939328.0, "5335": 27667939328.0, "5340": 27667939328.0, "5345": 27667939328.0, "5350": 27667939328.0, "5355": 27667939328.0, "5360": 27667939328.0, "5365": 27667939328.0, "5370": 27667939328.0, "5375": 27667939328.0, "5380": 27667939328.0, "5385": 27667939328.0, "5390": 27667939328.0, "5395": 27667939328.0, "5400": 27667939328.0, "5405": 27667939328.0, "5410": 27667939328.0, "5415": 27667939328.0, "5420": 27667939328.0, "5425": 27667939328.0, "5430": 27667939328.0, "5435": 27667939328.0, "5440": 27667939328.0, "5445": 27667939328.0, "5450": 27667939328.0, "5455": 27667939328.0, "5460": 27667939328.0, "5465": 27667939328.0, "5470": 27667939328.0, "5475": 27667939328.0, "5480": 27667939328.0, "5485": 27667939328.0, "5490": 27667939328.0, "5495": 27667939328.0, "5500": 27667939328.0, "5505": 27667939328.0, "5510": 27667939328.0, "5515": 27667939328.0, "5520": 27667939328.0, "5525": 27667939328.0, "5530": 27667939328.0, "5535": 27667939328.0, "5540": 27667939328.0, "5545": 27667939328.0, "5550": 27667939328.0, "5555": 27667939328.0, "5560": 27667939328.0, "5565": 27667939328.0, "5570": 27667939328.0, "5575": 27667939328.0, "5580": 27667939328.0, "5585": 27667939328.0, "5590": 27667939328.0, "5595": 27667939328.0, "5600": 27667939328.0, "5605": 27667939328.0, "5610": 27667939328.0, "5615": 27667939328.0, "5620": 27667939328.0, "5625": 27667939328.0, "5630": 27667939328.0, "5635": 27667939328.0, "5640": 27667939328.0, "5645": 27667939328.0, "5650": 27667939328.0, "5655": 27667939328.0, "5660": 27667939328.0, "5665": 27667939328.0, "5670": 27667939328.0, "5675": 27667939328.0, "5680": 27667939328.0, "5685": 27667939328.0, "5690": 27667939328.0, "5695": 27667939328.0, "5700": 27667939328.0, "5705": 27667939328.0, "5710": 27667939328.0, "5715": 27667939328.0, "5720": 27667939328.0, "5725": 27667939328.0, "5730": 27667939328.0, "5735": 27667939328.0, "5740": 27667939328.0, "5745": 27667939328.0, "5750": 27667939328.0, "5755": 27667939328.0, "5760": 27667939328.0, "5765": 27667939328.0, "5770": 27667939328.0, "5775": 27667939328.0, "5780": 27667939328.0, "5785": 27667939328.0, "5790": 27667939328.0, "5795": 27667939328.0, "5800": 27667939328.0, "5805": 27667939328.0, "5810": 27667939328.0, "5815": 27667939328.0, "5820": 27667939328.0, "5825": 27667939328.0, "5830": 27667939328.0, "5835": 27667939328.0, "5840": 27667939328.0, "5845": 27667939328.0, "5850": 27667939328.0, "5855": 27667939328.0, "5860": 27667939328.0, "5865": 27667939328.0, "5870": 27667939328.0, "5875": 27667939328.0, "5880": 27667939328.0, "5885": 27667939328.0, "5890": 27667939328.0, "5895": 27667939328.0, "5900": 27667939328.0, "5905": 27667939328.0, "5910": 27667939328.0, "5915": 27667939328.0, "5920": 27667939328.0, "5925": 27667939328.0, "5930": 27667939328.0, "5935": 27667939328.0, "5940": 27667939328.0, "5945": 27667939328.0, "5950": 27667939328.0, "5955": 27667939328.0, "5960": 27667939328.0, "5965": 27667939328.0, "5970": 27667939328.0, "5975": 27667939328.0, "5980": 27667939328.0, "5985": 27667939328.0, "5990": 27667939328.0, "5995": 27667939328.0, "6000": 27667939328.0, "6005": 27667939328.0, "6010": 27667939328.0, "6015": 27667939328.0, "6020": 27667939328.0, "6025": 27667939328.0, "6030": 27667939328.0, "6035": 27667939328.0, "6040": 27667939328.0, "6045": 27667939328.0, "6050": 27667939328.0, "6055": 27667939328.0, "6060": 27667939328.0, "6065": 27667939328.0, "6070": 27667939328.0, "6075": 27667939328.0, "6080": 27667939328.0, "6085": 27667939328.0, "6090": 27667939328.0, "6095": 27667939328.0, "6100": 27667939328.0, "6105": 27667939328.0, "6110": 27667939328.0, "6115": 27667939328.0, "6120": 27667939328.0, "6125": 27667939328.0, "6130": 27667939328.0, "6135": 27667939328.0, "6140": 27667939328.0, "6145": 27667939328.0, "6150": 27667939328.0, "6155": 27667939328.0, "6160": 27667939328.0, "6165": 27667939328.0, "6170": 27667939328.0, "6175": 27667939328.0, "6180": 27667939328.0, "6185": 27667939328.0, "6190": 27667939328.0, "6195": 27667939328.0, "6200": 27667939328.0, "6205": 27667939328.0, "6210": 27667939328.0, "6215": 27667939328.0, "6220": 27667939328.0, "6225": 27667939328.0, "6230": 27667939328.0, "6235": 27667939328.0, "6240": 27667939328.0, "6245": 27667939328.0, "6250": 27667939328.0, "6255": 27667939328.0, "6260": 27667939328.0, "6265": 27667939328.0, "6270": 27667939328.0, "6275": 27667939328.0, "6280": 27667939328.0, "6285": 27667939328.0, "6290": 27667939328.0, "6295": 27667939328.0, "6300": 27667939328.0, "6305": 27667939328.0, "6310": 27667939328.0, "6315": 27667939328.0, "6320": 27667939328.0, "6325": 27667939328.0, "6330": 27667939328.0, "6335": 27667939328.0, "6340": 27667939328.0, "6345": 27667939328.0, "6350": 27667939328.0, "6355": 27667939328.0, "6360": 27667939328.0, "6365": 27667939328.0, "6370": 27667939328.0, "6375": 27667939328.0, "6380": 27667939328.0, "6385": 27667939328.0, "6390": 27667939328.0, "6395": 27667939328.0, "6400": 27667939328.0, "6405": 27667939328.0, "6410": 27667939328.0, "6415": 27667939328.0, "6420": 27667939328.0, "6425": 27667939328.0, "6430": 27667939328.0, "6435": 27667939328.0, "6440": 27667939328.0, "6445": 27667939328.0, "6450": 27667939328.0, "6455": 27667939328.0, "6460": 27667939328.0, "6465": 27667939328.0, "6470": 27667939328.0, "6475": 27667939328.0, "6480": 27667939328.0, "6485": 27667939328.0, "6490": 27667939328.0, "6495": 27667939328.0, "6500": 27667939328.0, "6505": 27667939328.0, "6510": 27667939328.0, "6515": 27667939328.0, "6520": 27667939328.0, "6525": 27667939328.0, "6530": 27667939328.0, "6535": 27667939328.0, "6540": 27667939328.0, "6545": 27667939328.0, "6550": 27667939328.0, "6555": 27667939328.0, "6560": 27667939328.0, "6565": 27667939328.0, "6570": 27667939328.0, "6575": 27667939328.0, "6580": 27667939328.0, "6585": 27667939328.0, "6590": 27667939328.0, "6595": 27667939328.0, "6600": 27667939328.0, "6605": 27667939328.0, "6610": 27667939328.0, "6615": 27667939328.0, "6620": 27667939328.0, "6625": 27667939328.0, "6630": 27667939328.0, "6635": 27667939328.0, "6640": 27667939328.0, "6645": 27667939328.0, "6650": 27667939328.0, "6655": 27667939328.0, "6660": 27667939328.0, "6665": 27667939328.0, "6670": 27667939328.0, "6675": 27667939328.0, "6680": 27667939328.0, "6685": 27667939328.0, "6690": 27667939328.0, "6695": 27667939328.0, "6700": 27667939328.0, "6705": 27667939328.0, "6710": 27667939328.0, "6715": 27667939328.0, "6720": 27667939328.0, "6725": 27667939328.0, "6730": 27667939328.0, "6735": 27667939328.0, "6740": 27667939328.0, "6745": 27667939328.0, "6750": 27667939328.0, "6755": 27667939328.0, "6760": 27667939328.0, "6765": 27667939328.0, "6770": 27667939328.0, "6775": 27667939328.0, "6780": 27667939328.0, "6785": 27667939328.0, "6790": 27667939328.0, "6795": 27667939328.0, "6800": 27667939328.0, "6805": 27667939328.0, "6810": 27667939328.0, "6815": 27667939328.0, "6820": 27667939328.0, "6825": 27667939328.0, "6830": 27667939328.0, "6835": 27667939328.0, "6840": 27667939328.0, "6845": 27667939328.0, "6850": 27667939328.0, "6855": 27667939328.0, "6860": 27667939328.0, "6865": 27667939328.0, "6870": 27667939328.0, "6875": 27667939328.0, "6880": 27667939328.0, "6885": 27667939328.0, "6890": 27667939328.0, "6895": 27667939328.0, "6900": 27667939328.0, "6905": 27667939328.0, "6910": 27667939328.0, "6915": 27667939328.0, "6920": 27667939328.0, "6925": 27667939328.0, "6930": 27667939328.0, "6935": 27667939328.0, "6940": 27667939328.0, "6945": 27667939328.0, "6950": 27667939328.0, "6955": 27667939328.0, "6960": 27667939328.0, "6965": 27667939328.0, "6970": 27667939328.0, "6975": 27667939328.0, "6980": 27667939328.0, "6985": 27667939328.0, "6990": 27667939328.0, "6995": 27667939328.0, "7000": 27667939328.0, "7005": 27667939328.0, "7010": 27667939328.0, "7015": 27667939328.0, "7020": 27667939328.0, "7025": 27667939328.0, "7030": 27667939328.0, "7035": 27667939328.0, "7040": 27667939328.0, "7045": 27667939328.0, "7050": 27667939328.0, "7055": 27667939328.0, "7060": 27667939328.0, "7065": 27667939328.0, "7070": 27667939328.0, "7075": 27667939328.0, "7080": 27667939328.0, "7085": 27667939328.0, "7090": 27667939328.0, "7095": 27667939328.0, "7100": 27667939328.0, "7105": 27667939328.0, "7110": 27667939328.0, "7115": 27667939328.0, "7120": 27667939328.0, "7125": 27667939328.0, "7130": 27667939328.0, "7135": 27667939328.0, "7140": 27667939328.0, "7145": 27667939328.0, "7150": 27667939328.0, "7155": 27667939328.0, "7160": 27667939328.0, "7165": 27667939328.0, "7170": 27667939328.0, "7175": 27667939328.0, "7180": 27667939328.0, "7185": 27667939328.0, "7190": 27667939328.0, "7195": 27667939328.0, "7200": 27667939328.0, "7205": 27667939328.0, "7210": 27667939328.0, "7215": 27667939328.0, "7220": 27667939328.0, "7225": 27667939328.0, "7230": 27667939328.0, "7235": 27667939328.0, "7240": 27667939328.0, "7245": 27667939328.0, "7250": 27667939328.0, "7255": 27667939328.0, "7260": 27667939328.0, "7265": 27667939328.0, "7270": 27667939328.0, "7275": 27667939328.0, "7280": 27667939328.0, "7285": 27667939328.0, "7290": 27667939328.0, "7295": 27667939328.0, "7300": 27667939328.0, "7305": 27667939328.0, "7310": 27667939328.0, "7315": 27667939328.0, "7320": 27667939328.0, "7325": 27667939328.0, "7330": 27667939328.0, "7335": 27667939328.0, "7340": 27667939328.0, "7345": 27667939328.0, "7350": 27667939328.0, "7355": 27667939328.0, "7360": 27667939328.0, "7365": 27667939328.0, "7370": 27667939328.0, "7375": 27667939328.0, "7380": 27667939328.0, "7385": 27667939328.0, "7390": 27667939328.0, "7395": 27667939328.0, "7400": 27667939328.0, "7405": 27667939328.0, "7410": 27667939328.0, "7415": 27667939328.0, "7420": 27667939328.0, "7425": 27667939328.0, "7430": 27667939328.0, "7435": 27667939328.0, "7440": 27667939328.0, "7445": 27667939328.0, "7450": 27667939328.0, "7455": 27667939328.0, "7460": 27667939328.0, "7465": 27667939328.0, "7470": 27667939328.0, "7475": 27667939328.0, "7480": 27667939328.0, "7485": 27667939328.0, "7490": 27667939328.0, "7495": 27667939328.0, "7500": 27667939328.0, "7505": 27667939328.0, "7510": 27667939328.0, "7515": 27667939328.0, "7520": 27667939328.0, "7525": 27667939328.0, "7530": 27667939328.0, "7535": 27667939328.0, "7540": 27667939328.0, "7545": 27667939328.0, "7550": 27667939328.0, "7555": 27667939328.0, "7560": 27667939328.0, "7565": 27667939328.0, "7570": 27667939328.0, "7575": 27667939328.0, "7580": 27667939328.0, "7585": 27667939328.0, "7590": 27667939328.0, "7595": 27667939328.0, "7600": 27667939328.0, "7605": 27667939328.0, "7610": 27667939328.0, "7615": 27667939328.0, "7620": 27667939328.0, "7625": 27667939328.0, "7630": 27667939328.0, "7635": 27667939328.0, "7640": 27667939328.0, "7645": 27667939328.0, "7650": 27667939328.0, "7655": 27667939328.0, "7660": 27667939328.0, "7665": 27667939328.0, "7670": 27667939328.0, "7675": 27667939328.0, "7680": 27667939328.0, "7685": 27667939328.0, "7690": 27667939328.0, "7695": 27667939328.0, "7700": 27667939328.0, "7705": 27667939328.0, "7710": 27667939328.0, "7715": 27667939328.0, "7720": 27667939328.0, "7725": 27667939328.0, "7730": 27667939328.0, "7735": 27667939328.0, "7740": 27667939328.0, "7745": 27667939328.0, "7750": 27667939328.0, "7755": 27667939328.0, "7760": 27667939328.0, "7765": 27667939328.0, "7770": 27667939328.0, "7775": 27667939328.0, "7780": 27667939328.0, "7785": 27667939328.0, "7790": 27667939328.0, "7795": 27667939328.0, "7800": 27667939328.0, "7805": 27667939328.0, "7810": 27667939328.0, "7815": 27667939328.0, "7820": 27667939328.0, "7825": 27667939328.0, "7830": 27667939328.0, "7835": 27667939328.0, "7840": 27667939328.0, "7845": 27667939328.0, "7850": 27667939328.0, "7855": 27667939328.0, "7860": 27667939328.0, "7865": 27667939328.0, "7870": 27667939328.0, "7875": 27667939328.0, "7880": 27667939328.0, "7885": 27667939328.0, "7890": 27667939328.0, "7895": 27667939328.0, "7900": 27667939328.0, "7905": 27667939328.0, "7910": 27667939328.0, "7915": 27667939328.0, "7920": 27667939328.0, "7925": 27667939328.0, "7930": 27667939328.0, "7935": 27667939328.0, "7940": 27667939328.0, "7945": 27667939328.0, "7950": 27667939328.0, "7955": 27667939328.0, "7960": 27667939328.0, "7965": 27667939328.0, "7970": 27667939328.0, "7975": 27667939328.0, "7980": 27667939328.0, "7985": 27667939328.0, "7990": 27667939328.0, "7995": 27667939328.0, "8000": 27667939328.0, "8005": 27667939328.0, "8010": 27667939328.0, "8015": 27667939328.0, "8020": 27667939328.0, "8025": 27667939328.0, "8030": 27667939328.0, "8035": 27667939328.0, "8040": 27667939328.0, "8045": 27667939328.0, "8050": 27667939328.0, "8055": 27667939328.0, "8060": 27667939328.0, "8065": 27667939328.0, "8070": 27667939328.0, "8075": 27667939328.0, "8080": 27667939328.0, "8085": 27667939328.0, "8090": 27667939328.0, "8095": 27667939328.0, "8100": 27667939328.0, "8105": 27667939328.0, "8110": 27667939328.0, "8115": 27667939328.0, "8120": 27667939328.0, "8125": 27667939328.0, "8130": 27667939328.0, "8135": 27667939328.0, "8140": 27667939328.0, "8145": 27667939328.0, "8150": 27667939328.0, "8155": 27667939328.0, "8160": 27667939328.0, "8165": 27667939328.0, "8170": 27667939328.0, "8175": 27667939328.0, "8180": 27667939328.0, "8185": 27667939328.0, "8190": 27667939328.0, "8195": 27667939328.0, "8200": 27667939328.0, "8205": 27667939328.0, "8210": 27667939328.0, "8215": 27667939328.0, "8220": 27667939328.0, "8225": 27667939328.0, "8230": 27667939328.0, "8235": 27667939328.0, "8240": 27667939328.0, "8245": 27667939328.0, "8250": 27667939328.0, "8255": 27667939328.0, "8260": 27667939328.0, "8265": 27667939328.0, "8270": 27667939328.0, "8275": 27667939328.0, "8280": 27667939328.0, "8285": 27667939328.0, "8290": 27667939328.0, "8295": 27667939328.0, "8300": 27667939328.0, "8305": 27667939328.0, "8310": 27667939328.0, "8315": 27667939328.0, "8320": 27667939328.0, "8325": 27667939328.0, "8330": 27667939328.0, "8335": 27667939328.0, "8340": 27667939328.0, "8345": 27667939328.0, "8350": 27667939328.0, "8355": 27667939328.0, "8360": 27667939328.0, "8365": 27667939328.0, "8370": 27667939328.0, "8375": 27667939328.0, "8380": 27667939328.0, "8385": 27667939328.0, "8390": 27667939328.0, "8395": 27667939328.0, "8400": 27667939328.0, "8405": 27667939328.0, "8410": 27667939328.0, "8415": 27667939328.0, "8420": 27667939328.0, "8425": 27667939328.0, "8430": 27667939328.0, "8435": 27667939328.0, "8440": 27667939328.0, "8445": 27667939328.0, "8450": 27667939328.0, "8455": 27667939328.0, "8460": 27667939328.0, "8465": 27667939328.0, "8470": 27667939328.0, "8475": 27667939328.0, "8480": 27667939328.0, "8485": 27667939328.0, "8490": 27667939328.0, "8495": 27667939328.0, "8500": 27667939328.0, "8505": 27667939328.0, "8510": 27667939328.0, "8515": 27667939328.0, "8520": 27667939328.0, "8525": 27667939328.0, "8530": 27667939328.0, "8535": 27667939328.0, "8540": 27667939328.0, "8545": 27667939328.0, "8550": 27667939328.0, "8555": 27667939328.0, "8560": 27667939328.0, "8565": 27667939328.0, "8570": 27667939328.0, "8575": 27667939328.0, "8580": 27667939328.0, "8585": 27667939328.0, "8590": 27667939328.0, "8595": 27667939328.0, "8600": 27667939328.0, "8605": 27667939328.0, "8610": 27666171904.0, "8615": 27666171904.0, "8620": 27666173952.0, "8625": 27666173952.0, "8630": 27666763776.0, "8635": 27666763776.0, "8640": 27666763776.0, "8645": 27666763776.0, "8650": 27666763776.0, "8655": 27666763776.0, "8660": 27666763776.0, "8665": 27666763776.0, "8670": 27666763776.0, "8675": 27666763776.0, "8680": 27666763776.0, "8685": 27666763776.0, "8690": 27666763776.0, "8695": 27666763776.0, "8700": 27666763776.0, "8705": 27666763776.0, "8710": 27666763776.0, "8715": 27666763776.0, "8720": 27666763776.0, "8725": 27666763776.0, "8730": 27666763776.0, "8735": 27666763776.0, "8740": 27666763776.0, "8745": 27666763776.0, "8750": 27666763776.0, "8755": 27666763776.0, "8760": 27666763776.0, "8765": 27666763776.0, "8770": 27666763776.0, "8775": 27666763776.0, "8780": 27666763776.0, "8785": 27666763776.0, "8790": 27666763776.0, "8795": 27666763776.0, "8800": 27666763776.0, "8805": 27666763776.0, "8810": 27666763776.0, "8815": 27666763776.0, "8820": 27666763776.0, "8825": 27666763776.0, "8830": 27666763776.0, "8835": 27666763776.0, "8840": 27666763776.0, "8845": 27666763776.0, "8850": 27666763776.0, "8855": 27666763776.0, "8860": 27666763776.0, "8865": 27666763776.0, "8870": 27666763776.0, "8875": 27666763776.0, "8880": 27666763776.0, "8885": 27666763776.0, "8890": 27666763776.0, "8895": 27666763776.0, "8900": 27666763776.0, "8905": 27666763776.0, "8910": 27666763776.0, "8915": 27666763776.0, "8920": 27666763776.0, "8925": 27666763776.0, "8930": 27666763776.0, "8935": 27666763776.0, "8940": 27666763776.0, "8945": 27666763776.0, "8950": 27666763776.0, "8955": 27666763776.0, "8960": 27666763776.0, "8965": 27666763776.0, "8970": 27666763776.0, "8975": 27666763776.0, "8980": 27666763776.0, "8985": 27666763776.0, "8990": 27666763776.0, "8995": 27666763776.0, "9000": 27666763776.0, "9005": 27666763776.0, "9010": 27666763776.0, "9015": 27666763776.0, "9020": 27666763776.0, "9025": 27666763776.0, "9030": 27666763776.0, "9035": 27666763776.0, "9040": 27666763776.0, "9045": 27666763776.0, "9050": 27666763776.0, "9055": 27666763776.0, "9060": 27666763776.0, "9065": 27666763776.0, "9070": 27666763776.0, "9075": 27666763776.0, "9080": 27666763776.0, "9085": 27666763776.0, "9090": 27666763776.0, "9095": 27666763776.0, "9100": 27666763776.0, "9105": 27666763776.0, "9110": 27666763776.0, "9115": 27666763776.0, "9120": 27666763776.0, "9125": 27666763776.0, "9130": 27666763776.0, "9135": 27666763776.0, "9140": 27666763776.0, "9145": 27666763776.0, "9150": 27666763776.0, "9155": 27666763776.0, "9160": 27666763776.0, "9165": 27666763776.0, "9170": 27666763776.0, "9175": 27666763776.0, "9180": 27666763776.0, "9185": 27666763776.0, "9190": 27666763776.0, "9195": 27666763776.0, "9200": 27666763776.0, "9205": 27666763776.0, "9210": 27666763776.0, "9215": 27666763776.0, "9220": 27666763776.0, "9225": 27666763776.0, "9230": 27666763776.0, "9235": 27666763776.0, "9240": 27666763776.0, "9245": 27666763776.0, "9250": 27666763776.0, "9255": 27666763776.0, "9260": 27666763776.0, "9265": 27666763776.0, "9270": 27666763776.0, "9275": 27666763776.0, "9280": 27666763776.0, "9285": 27666763776.0, "9290": 27666763776.0, "9295": 27666763776.0, "9300": 27666763776.0, "9305": 27666763776.0, "9310": 27666763776.0, "9315": 27666763776.0, "9320": 27666763776.0, "9325": 27666763776.0, "9330": 27666763776.0, "9335": 27666763776.0, "9340": 27666763776.0, "9345": 27666763776.0, "9350": 27666763776.0, "9355": 27666763776.0, "9360": 27666763776.0, "9365": 27666763776.0, "9370": 27666763776.0, "9375": 27666763776.0, "9380": 27666763776.0, "9385": 27666763776.0, "9390": 27666763776.0, "9395": 27666763776.0, "9400": 27666763776.0, "9405": 27666763776.0, "9410": 27666763776.0, "9415": 27666763776.0, "9420": 27666763776.0, "9425": 27666763776.0, "9430": 27666763776.0, "9435": 27666763776.0, "9440": 27666763776.0, "9445": 27666763776.0, "9450": 27666763776.0, "9455": 27666763776.0, "9460": 27666763776.0, "9465": 27666763776.0, "9470": 27666763776.0, "9475": 27666763776.0, "9480": 27666763776.0, "9485": 27666763776.0, "9490": 27666763776.0, "9495": 27666763776.0, "9500": 27666763776.0, "9505": 27666763776.0, "9510": 27666763776.0, "9515": 27666763776.0, "9520": 27666763776.0, "9525": 27666763776.0, "9530": 27666763776.0, "9535": 27666763776.0, "9540": 27666763776.0, "9545": 27666763776.0, "9550": 27666763776.0, "9555": 27666763776.0, "9560": 27666763776.0, "9565": 27666763776.0, "9570": 27666763776.0, "9575": 27666763776.0, "9580": 27666763776.0, "9585": 27666763776.0, "9590": 27666763776.0, "9595": 27666763776.0, "9600": 27666763776.0, "9605": 27666763776.0, "9610": 27666763776.0, "9615": 27666763776.0, "9620": 27666763776.0, "9625": 27666763776.0, "9630": 27666763776.0, "9635": 27666763776.0, "9640": 27666763776.0, "9645": 27666763776.0, "9650": 27666763776.0, "9655": 27666763776.0, "9660": 27666763776.0, "9665": 27666763776.0, "9670": 27666763776.0, "9675": 27666763776.0, "9680": 27666763776.0, "9685": 27666763776.0, "9690": 27666763776.0, "9695": 27666763776.0, "9700": 27666763776.0, "9705": 27666763776.0, "9710": 27666763776.0, "9715": 27666763776.0, "9720": 27666763776.0, "9725": 27666763776.0, "9730": 27666763776.0, "9735": 27666763776.0, "9740": 27666763776.0, "9745": 27666763776.0, "9750": 27666763776.0, "9755": 27666763776.0, "9760": 27666763776.0, "9765": 27666763776.0, "9770": 27666763776.0, "9775": 27666763776.0, "9780": 27666763776.0, "9785": 27666763776.0, "9790": 27666763776.0, "9795": 27666763776.0, "9800": 27666763776.0, "9805": 27666763776.0, "9810": 27666763776.0, "9815": 27666763776.0, "9820": 27666763776.0, "9825": 27666763776.0, "9830": 27666763776.0, "9835": 27666763776.0, "9840": 27666763776.0, "9845": 27666763776.0, "9850": 27666763776.0, "9855": 27666763776.0, "9860": 27666763776.0, "9865": 27666763776.0, "9870": 27666763776.0, "9875": 27666763776.0, "9880": 27666763776.0, "9885": 27666763776.0, "9890": 27666763776.0, "9895": 27666763776.0, "9900": 27666763776.0, "9905": 27666763776.0, "9910": 27666763776.0, "9915": 27666763776.0, "9920": 27666763776.0, "9925": 27666763776.0, "9930": 27666763776.0, "9935": 27666763776.0, "9940": 27666763776.0, "9945": 27666763776.0, "9950": 27666763776.0, "9955": 27666763776.0, "9960": 27666763776.0, "9965": 27666763776.0, "9970": 27666763776.0, "9975": 27666763776.0, "9980": 27666763776.0, "9985": 27666763776.0, "9990": 27666763776.0, "9995": 27666763776.0, "10000": 27666763776.0, "10005": 27666763776.0, "10010": 27666763776.0, "10015": 27666763776.0, "10020": 27666763776.0, "10025": 27666763776.0, "10030": 27666763776.0, "10035": 27666763776.0, "10040": 27666763776.0, "10045": 27666763776.0, "10050": 27666763776.0, "10055": 27666763776.0, "10060": 27666763776.0, "10065": 27666763776.0, "10070": 27666763776.0, "10075": 27666763776.0, "10080": 27666763776.0, "10085": 27666763776.0, "10090": 27666763776.0, "10095": 27666763776.0, "10100": 27666763776.0, "10105": 27666763776.0, "10110": 27666763776.0, "10115": 27666763776.0, "10120": 27666763776.0, "10125": 27666763776.0, "10130": 27666763776.0, "10135": 27666763776.0, "10140": 27666763776.0, "10145": 27666763776.0, "10150": 27666763776.0, "10155": 27666763776.0, "10160": 27666763776.0, "10165": 27666763776.0, "10170": 27666763776.0, "10175": 27666763776.0, "10180": 27666763776.0, "10185": 27666763776.0, "10190": 27666763776.0, "10195": 27666763776.0, "10200": 27666763776.0, "10205": 27666763776.0, "10210": 27666763776.0, "10215": 27666763776.0, "10220": 27666763776.0, "10225": 27666763776.0, "10230": 27666763776.0, "10235": 27666763776.0, "10240": 27666763776.0, "10245": 27666763776.0, "10250": 27666763776.0, "10255": 27666763776.0, "10260": 27666763776.0, "10265": 27666763776.0, "10270": 27666763776.0, "10275": 27666763776.0, "10280": 27666763776.0, "10285": 27666763776.0, "10290": 27666763776.0, "10295": 27666763776.0, "10300": 27666763776.0, "10305": 27666763776.0, "10310": 27666763776.0, "10315": 27666763776.0, "10320": 27666763776.0, "10325": 27666763776.0, "10330": 27666763776.0, "10335": 27666763776.0, "10340": 27666763776.0, "10345": 27666763776.0, "10350": 27666763776.0, "10355": 27666763776.0, "10360": 27666763776.0, "10365": 27666763776.0, "10370": 27666763776.0, "10375": 27666763776.0, "10380": 27666763776.0, "10385": 27666763776.0, "10390": 27666763776.0, "10395": 27666763776.0, "10400": 27666763776.0, "10405": 27666763776.0, "10410": 27666763776.0, "10415": 27666763776.0, "10420": 27666763776.0, "10425": 27666763776.0, "10430": 27666763776.0, "10435": 27666763776.0, "10440": 27666763776.0, "10445": 27666763776.0, "10450": 27666763776.0, "10455": 27666763776.0, "10460": 27666763776.0, "10465": 27666763776.0, "10470": 27666763776.0, "10475": 27666763776.0, "10480": 27666763776.0, "10485": 27666763776.0, "10490": 27666763776.0, "10495": 27666763776.0, "10500": 27666763776.0, "10505": 27666763776.0, "10510": 27666763776.0, "10515": 27666763776.0, "10520": 27666763776.0, "10525": 27666763776.0, "10530": 27666763776.0, "10535": 27666763776.0, "10540": 27666763776.0, "10545": 27666763776.0, "10550": 27666763776.0, "10555": 27666763776.0, "10560": 27666763776.0, "10565": 27666763776.0, "10570": 27666763776.0, "10575": 27666763776.0, "10580": 27666763776.0, "10585": 27666763776.0, "10590": 27666763776.0, "10595": 27666763776.0, "10600": 27666763776.0, "10605": 27666763776.0, "10610": 27666763776.0, "10615": 27666763776.0, "10620": 27666763776.0, "10625": 27666763776.0, "10630": 27666763776.0, "10635": 27666763776.0, "10640": 27666763776.0, "10645": 27666763776.0, "10650": 27666763776.0, "10655": 27666763776.0, "10660": 27666763776.0, "10665": 27666763776.0, "10670": 27666763776.0, "10675": 27666763776.0, "10680": 27666763776.0, "10685": 27666763776.0, "10690": 27666763776.0, "10695": 27666763776.0, "10700": 27666763776.0, "10705": 27666763776.0, "10710": 27666763776.0, "10715": 27666763776.0, "10720": 27666763776.0, "10725": 27666763776.0, "10730": 27666763776.0, "10735": 27666763776.0, "10740": 27666763776.0, "10745": 27666763776.0, "10750": 27666763776.0, "10755": 27666763776.0, "10760": 27666763776.0, "10765": 27666763776.0, "10770": 27666763776.0, "10775": 27666763776.0, "10780": 27666763776.0, "10785": 27666763776.0, "10790": 27666763776.0, "10795": 27666763776.0, "10800": 27666763776.0, "10805": 27666763776.0, "10810": 27666763776.0, "10815": 27666763776.0, "10820": 27666763776.0, "10825": 27666763776.0, "10830": 27666763776.0, "10835": 27666763776.0, "10840": 27666763776.0, "10845": 27666763776.0, "10850": 27666763776.0, "10855": 27666763776.0, "10860": 27666763776.0, "10865": 27666763776.0, "10870": 27666763776.0, "10875": 27666763776.0, "10880": 27666763776.0, "10885": 27666763776.0, "10890": 27666763776.0, "10895": 27666763776.0, "10900": 27666763776.0, "10905": 27666763776.0, "10910": 27666763776.0, "10915": 27666763776.0, "10920": 27666763776.0, "10925": 27666763776.0, "10930": 27666763776.0, "10935": 27666763776.0, "10940": 27666763776.0, "10945": 27666763776.0, "10950": 27666763776.0, "10955": 27666763776.0, "10960": 27666763776.0, "10965": 27666763776.0, "10970": 27666763776.0, "10975": 27666763776.0, "10980": 27666763776.0, "10985": 27666763776.0, "10990": 27666763776.0, "10995": 27666763776.0, "11000": 27666763776.0, "11005": 27666763776.0, "11010": 27666763776.0, "11015": 27666763776.0, "11020": 27666763776.0, "11025": 27666763776.0, "11030": 27666763776.0, "11035": 27666763776.0, "11040": 27666763776.0, "11045": 27666763776.0, "11050": 27666763776.0, "11055": 27666763776.0, "11060": 27666763776.0, "11065": 27666763776.0, "11070": 27666763776.0, "11075": 27666763776.0, "11080": 27666763776.0, "11085": 27666763776.0, "11090": 27666763776.0, "11095": 27666763776.0, "11100": 27666763776.0, "11105": 27666763776.0, "11110": 27666763776.0, "11115": 27666763776.0, "11120": 27666763776.0, "11125": 27666763776.0, "11130": 27666763776.0, "11135": 27666763776.0, "11140": 27666763776.0, "11145": 27666763776.0, "11150": 27666763776.0, "11155": 27666763776.0, "11160": 27666763776.0, "11165": 27666763776.0, "11170": 27666763776.0, "11175": 27666763776.0, "11180": 27666763776.0, "11185": 27666763776.0, "11190": 27666763776.0, "11195": 27666763776.0, "11200": 27666763776.0, "11205": 27666763776.0, "11210": 27666763776.0, "11215": 27666763776.0, "11220": 27666763776.0, "11225": 27666763776.0, "11230": 27666763776.0, "11235": 27666763776.0, "11240": 27666763776.0, "11245": 27666763776.0, "11250": 27666763776.0, "11255": 27666763776.0, "11260": 27666763776.0, "11265": 27666763776.0, "11270": 27666763776.0, "11275": 27666763776.0, "11280": 27666763776.0, "11285": 27666763776.0, "11290": 27666763776.0, "11295": 27666763776.0, "11300": 27666763776.0, "11305": 27666763776.0, "11310": 27666763776.0, "11315": 27666763776.0, "11320": 27666763776.0, "11325": 27666763776.0, "11330": 27666763776.0, "11335": 27666763776.0, "11340": 27666763776.0, "11345": 27666763776.0, "11350": 27666763776.0, "11355": 27666763776.0, "11360": 27666763776.0, "11365": 27666763776.0, "11370": 27666763776.0, "11375": 27666763776.0, "11380": 27666763776.0, "11385": 27666763776.0, "11390": 27666763776.0, "11395": 27666763776.0, "11400": 27666763776.0, "11405": 27666763776.0, "11410": 27666763776.0, "11415": 27666763776.0, "11420": 27666763776.0, "11425": 27666763776.0, "11430": 27666763776.0, "11435": 27666763776.0, "11440": 27666763776.0, "11445": 27666763776.0, "11450": 27666763776.0, "11455": 27666763776.0, "11460": 27666763776.0, "11465": 27666763776.0, "11470": 27666763776.0, "11475": 27666763776.0, "11480": 27666763776.0, "11485": 27666763776.0, "11490": 27666763776.0, "11495": 27666763776.0, "11500": 27666763776.0, "11505": 27666763776.0, "11510": 27666763776.0, "11515": 27666763776.0, "11520": 27666763776.0, "11525": 27666763776.0, "11530": 27666763776.0, "11535": 27666763776.0, "11540": 27666763776.0, "11545": 27666763776.0, "11550": 27666763776.0, "11555": 27666763776.0, "11560": 27666763776.0, "11565": 27666763776.0, "11570": 27666763776.0, "11575": 27666763776.0, "11580": 27666763776.0, "11585": 27666763776.0, "11590": 27666763776.0, "11595": 27666763776.0, "11600": 27666763776.0, "11605": 27666763776.0, "11610": 27666763776.0, "11615": 27666763776.0, "11620": 27666763776.0, "11625": 27666763776.0, "11630": 27666763776.0, "11635": 27666763776.0, "11640": 27666763776.0, "11645": 27666763776.0, "11650": 27666763776.0, "11655": 27666763776.0, "11660": 27666763776.0, "11665": 27666763776.0, "11670": 27666763776.0, "11675": 27666763776.0, "11680": 27666763776.0, "11685": 27666763776.0, "11690": 27666763776.0, "11695": 27666763776.0, "11700": 27666763776.0, "11705": 27666763776.0, "11710": 27666763776.0, "11715": 27666763776.0, "11720": 27666763776.0, "11725": 27666763776.0, "11730": 27666763776.0, "11735": 27666763776.0, "11740": 27666763776.0, "11745": 27666763776.0, "11750": 27666763776.0, "11755": 27666763776.0, "11760": 27666763776.0, "11765": 27666763776.0, "11770": 27666763776.0, "11775": 27666763776.0, "11780": 27666763776.0, "11785": 27666763776.0, "11790": 27666763776.0, "11795": 27666763776.0, "11800": 27666763776.0, "11805": 27666763776.0, "11810": 27666763776.0, "11815": 27666763776.0, "11820": 27666763776.0, "11825": 27666763776.0, "11830": 27666763776.0, "11835": 27666763776.0, "11840": 27666763776.0, "11845": 27666763776.0, "11850": 27666763776.0, "11855": 27666763776.0, "11860": 27666763776.0, "11865": 27666763776.0, "11870": 27666763776.0, "11875": 27666763776.0, "11880": 27666763776.0, "11885": 27666763776.0, "11890": 27666763776.0, "11895": 27666763776.0, "11900": 27666763776.0, "11905": 27666763776.0, "11910": 27666763776.0, "11915": 27666763776.0, "11920": 27666763776.0, "11925": 27666763776.0, "11930": 27666763776.0, "11935": 27666763776.0, "11940": 27666763776.0, "11945": 27666763776.0, "11950": 27666763776.0, "11955": 27666763776.0, "11960": 27666763776.0, "11965": 27666763776.0, "11970": 27666763776.0, "11975": 27666763776.0, "11980": 27666763776.0, "11985": 27666763776.0, "11990": 27666763776.0, "11995": 27666763776.0, "12000": 27666763776.0, "12005": 27666763776.0, "12010": 27666763776.0, "12015": 27666763776.0, "12020": 27666763776.0, "12025": 27666763776.0, "12030": 27666763776.0, "12035": 27666763776.0, "12040": 27666763776.0, "12045": 27666763776.0, "12050": 27666763776.0, "12055": 27666763776.0, "12060": 27666763776.0, "12065": 27666763776.0, "12070": 27666763776.0, "12075": 27666763776.0, "12080": 27666763776.0, "12085": 27666763776.0, "12090": 27666763776.0, "12095": 27666763776.0, "12100": 27666763776.0, "12105": 27666763776.0, "12110": 27666763776.0, "12115": 27666763776.0, "12120": 27666763776.0, "12125": 27666763776.0, "12130": 27666763776.0, "12135": 27666763776.0, "12140": 27666763776.0, "12145": 27666763776.0, "12150": 27666763776.0, "12155": 27666763776.0, "12160": 27666763776.0, "12165": 27666763776.0, "12170": 27666763776.0, "12175": 27666763776.0, "12180": 27666763776.0, "12185": 27666763776.0, "12190": 27666763776.0, "12195": 27666763776.0, "12200": 27666763776.0, "12205": 27666763776.0, "12210": 27666763776.0, "12215": 27666763776.0, "12220": 27666763776.0, "12225": 27666763776.0, "12230": 27666763776.0, "12235": 27666763776.0, "12240": 27666763776.0, "12245": 27666763776.0, "12250": 27666763776.0, "12255": 27666763776.0, "12260": 27666763776.0, "12265": 27666763776.0, "12270": 27666763776.0, "12275": 27666763776.0, "12280": 27666763776.0, "12285": 27666763776.0, "12290": 27666763776.0, "12295": 27666763776.0, "12300": 27666763776.0, "12305": 27666763776.0, "12310": 27666763776.0, "12315": 27666763776.0, "12320": 27666763776.0, "12325": 27666763776.0, "12330": 27666763776.0, "12335": 27666763776.0, "12340": 27666763776.0, "12345": 27666763776.0, "12350": 27666763776.0, "12355": 27666763776.0, "12360": 27666763776.0, "12365": 27666763776.0, "12370": 27666763776.0, "12375": 27666763776.0, "12380": 27666763776.0, "12385": 27666763776.0, "12390": 27666763776.0, "12395": 27666763776.0, "12400": 27666763776.0, "12405": 27666763776.0, "12410": 27666763776.0, "12415": 27666763776.0, "12420": 27666763776.0, "12425": 27666763776.0, "12430": 27666763776.0, "12435": 27666763776.0, "12440": 27666763776.0, "12445": 27666763776.0, "12450": 27666763776.0, "12455": 27666763776.0, "12460": 27666763776.0, "12465": 27666763776.0, "12470": 27666763776.0, "12475": 27666763776.0, "12480": 27666763776.0, "12485": 27666763776.0, "12490": 27666763776.0, "12495": 27666763776.0, "12500": 27666763776.0, "12505": 27666763776.0, "12510": 27666763776.0, "12515": 27666763776.0, "12520": 27666763776.0, "12525": 27666763776.0, "12530": 27666763776.0, "12535": 27666763776.0, "12540": 27666763776.0, "12545": 27666763776.0, "12550": 27666763776.0, "12555": 27666763776.0, "12560": 27666763776.0, "12565": 27666763776.0, "12570": 27666763776.0, "12575": 27666763776.0, "12580": 27666763776.0, "12585": 27666763776.0, "12590": 27666763776.0, "12595": 27666763776.0, "12600": 27666763776.0, "12605": 27666763776.0, "12610": 27666763776.0, "12615": 27666763776.0, "12620": 27666763776.0, "12625": 27666763776.0, "12630": 27666763776.0, "12635": 27666763776.0, "12640": 27666763776.0, "12645": 27666763776.0, "12650": 27666763776.0, "12655": 27666763776.0, "12660": 27666763776.0, "12665": 27666763776.0, "12670": 27666763776.0, "12675": 27666763776.0, "12680": 27666763776.0, "12685": 27666763776.0, "12690": 27666763776.0, "12695": 27666763776.0, "12700": 27666763776.0, "12705": 27666763776.0, "12710": 27666763776.0, "12715": 27666763776.0, "12720": 27666763776.0, "12725": 27666763776.0, "12730": 27666763776.0, "12735": 27666763776.0, "12740": 27666763776.0, "12745": 27666763776.0, "12750": 27666763776.0, "12755": 27666763776.0, "12760": 27666763776.0, "12765": 27666763776.0, "12770": 27666763776.0, "12775": 27666763776.0, "12780": 27666763776.0, "12785": 27666763776.0, "12790": 27666763776.0, "12795": 27666763776.0, "12800": 27666763776.0, "12805": 27666763776.0, "12810": 27666763776.0, "12815": 27666763776.0, "12820": 27666763776.0, "12825": 27666763776.0, "12830": 27666763776.0, "12835": 27666763776.0, "12840": 27666763776.0, "12845": 27666763776.0, "12850": 27666763776.0, "12855": 27666763776.0, "12860": 27666763776.0, "12865": 27666763776.0, "12870": 27666763776.0, "12875": 27666763776.0, "12880": 27666763776.0, "12885": 27666763776.0, "12890": 27666763776.0, "12895": 27666763776.0, "12900": 27666763776.0, "12905": 27666763776.0, "12910": 27666763776.0, "12915": 27666763776.0, "12920": 27666763776.0, "12925": 27666763776.0, "12930": 27666763776.0, "12935": 27666763776.0, "12940": 27666763776.0, "12945": 27666763776.0, "12950": 27666763776.0, "12955": 27666763776.0, "12960": 27666763776.0, "12965": 27666763776.0, "12970": 27666763776.0, "12975": 27666763776.0, "12980": 27666763776.0, "12985": 27666763776.0, "12990": 27666763776.0, "12995": 27666763776.0, "13000": 27666763776.0, "13005": 27666763776.0, "13010": 27666763776.0, "13015": 27666763776.0, "13020": 27666763776.0, "13025": 27666763776.0, "13030": 27666763776.0, "13035": 27666763776.0, "13040": 27666763776.0, "13045": 27666763776.0, "13050": 27666763776.0, "13055": 27666763776.0, "13060": 27666763776.0, "13065": 27666763776.0, "13070": 27666763776.0, "13075": 27666763776.0, "13080": 27666763776.0, "13085": 27666763776.0, "13090": 27666763776.0, "13095": 27666763776.0, "13100": 27666763776.0, "13105": 27666763776.0, "13110": 27666763776.0, "13115": 27666763776.0, "13120": 27666763776.0, "13125": 27666763776.0, "13130": 27666763776.0, "13135": 27666763776.0, "13140": 27666763776.0, "13145": 27666763776.0, "13150": 27666763776.0, "13155": 27666763776.0, "13160": 27666763776.0, "13165": 27666763776.0, "13170": 27666763776.0, "13175": 27666763776.0, "13180": 27666763776.0, "13185": 27666763776.0, "13190": 27666763776.0, "13195": 27666763776.0, "13200": 27666763776.0, "13205": 27666763776.0, "13210": 27666763776.0, "13215": 27666763776.0, "13220": 27666763776.0, "13225": 27666763776.0, "13230": 27666763776.0, "13235": 27666763776.0, "13240": 27666763776.0, "13245": 27666763776.0, "13250": 27666763776.0, "13255": 27666763776.0, "13260": 27666763776.0, "13265": 27666763776.0, "13270": 27666763776.0, "13275": 27666763776.0, "13280": 27666763776.0, "13285": 27666763776.0, "13290": 27666763776.0, "13295": 27666763776.0, "13300": 27666763776.0, "13305": 27666763776.0, "13310": 27666763776.0, "13315": 27666763776.0, "13320": 27666763776.0, "13325": 27666763776.0, "13330": 27666763776.0, "13335": 27666763776.0, "13340": 27666763776.0, "13345": 27666763776.0, "13350": 27666763776.0, "13355": 27666763776.0, "13360": 27666763776.0, "13365": 27666763776.0, "13370": 27666763776.0, "13375": 27666763776.0, "13380": 27666763776.0, "13385": 27666763776.0, "13390": 27666763776.0, "13395": 27666763776.0, "13400": 27666763776.0, "13405": 27666763776.0, "13410": 27666763776.0, "13415": 27666763776.0, "13420": 27666763776.0, "13425": 27666763776.0, "13430": 27666763776.0, "13435": 27666763776.0, "13440": 27666763776.0, "13445": 27666763776.0, "13450": 27666763776.0, "13455": 27666763776.0, "13460": 27666763776.0, "13465": 27666763776.0, "13470": 27666763776.0, "13475": 27666763776.0, "13480": 27666763776.0, "13485": 27666763776.0, "13490": 27666763776.0, "13495": 27666763776.0, "13500": 27666763776.0, "13505": 27666763776.0, "13510": 27666763776.0, "13515": 27666763776.0, "13520": 27666763776.0, "13525": 27666763776.0, "13530": 27666763776.0, "13535": 27666763776.0, "13540": 27666763776.0, "13545": 27666763776.0, "13550": 27666763776.0, "13555": 27666763776.0, "13560": 27666763776.0, "13565": 27666763776.0, "13570": 27666763776.0, "13575": 27666763776.0, "13580": 27666763776.0, "13585": 27666763776.0, "13590": 27666763776.0, "13595": 27666763776.0, "13600": 27666763776.0, "13605": 27666763776.0, "13610": 27666763776.0, "13615": 27666763776.0, "13620": 27666763776.0, "13625": 27666763776.0, "13630": 27666763776.0, "13635": 27666763776.0, "13640": 27666763776.0, "13645": 27666763776.0, "13650": 27666763776.0, "13655": 27666763776.0, "13660": 27666763776.0, "13665": 27666763776.0, "13670": 27666763776.0, "13675": 27666763776.0, "13680": 27666763776.0, "13685": 27666763776.0, "13690": 27666763776.0, "13695": 27666763776.0, "13700": 27666763776.0, "13705": 27666763776.0, "13710": 27666763776.0, "13715": 27666763776.0, "13720": 27666763776.0, "13725": 27666763776.0, "13730": 27666763776.0, "13735": 27666763776.0, "13740": 27666763776.0, "13745": 27666763776.0, "13750": 27666763776.0, "13755": 27666763776.0, "13760": 27666763776.0, "13765": 27666763776.0, "13770": 27666763776.0, "13775": 27666763776.0, "13780": 27666763776.0, "13785": 27666763776.0, "13790": 27666763776.0, "13795": 27666763776.0, "13800": 27666763776.0, "13805": 27666763776.0, "13810": 27666763776.0, "13815": 27666763776.0, "13820": 27666763776.0, "13825": 27666763776.0, "13830": 27666763776.0, "13835": 27666763776.0, "13840": 27666763776.0, "13845": 27666763776.0, "13850": 27666763776.0, "13855": 27666763776.0, "13860": 27666763776.0, "13865": 27666763776.0, "13870": 27666763776.0, "13875": 27666763776.0, "13880": 27666763776.0, "13885": 27666763776.0, "13890": 27666763776.0, "13895": 27666763776.0, "13900": 27666763776.0, "13905": 27666763776.0, "13910": 27666763776.0, "13915": 27666763776.0, "13920": 27666763776.0, "13925": 27666763776.0, "13930": 27666763776.0, "13935": 27666763776.0, "13940": 27666763776.0, "13945": 27666763776.0, "13950": 27666763776.0, "13955": 27666763776.0, "13960": 27666763776.0, "13965": 27666763776.0, "13970": 27666763776.0, "13975": 27666763776.0, "13980": 27666763776.0, "13985": 27666763776.0, "13990": 27666763776.0, "13995": 27666763776.0, "14000": 27666763776.0, "14005": 27666763776.0, "14010": 27666763776.0, "14015": 27666763776.0, "14020": 27666763776.0, "14025": 27666763776.0, "14030": 27666763776.0, "14035": 27666763776.0, "14040": 27666763776.0, "14045": 27666763776.0, "14050": 27666763776.0, "14055": 27666763776.0, "14060": 27666763776.0, "14065": 27666763776.0, "14070": 27666763776.0, "14075": 27666763776.0, "14080": 27666763776.0, "14085": 27666763776.0, "14090": 27666763776.0, "14095": 27666763776.0, "14100": 27666763776.0, "14105": 27666763776.0, "14110": 27666763776.0, "14115": 27666763776.0, "14120": 27666763776.0, "14125": 27666763776.0, "14130": 27666763776.0, "14135": 27666763776.0, "14140": 27666763776.0, "14145": 27666763776.0, "14150": 27666763776.0, "14155": 27666763776.0, "14160": 27666763776.0, "14165": 27666763776.0, "14170": 27666763776.0, "14175": 27666763776.0, "14180": 27666763776.0, "14185": 27666763776.0, "14190": 27666763776.0, "14195": 27666763776.0, "14200": 27666763776.0, "14205": 27666763776.0, "14210": 27666763776.0, "14215": 27666763776.0, "14220": 27666763776.0, "14225": 27666763776.0, "14230": 27666763776.0, "14235": 27666763776.0, "14240": 27666763776.0, "14245": 27666763776.0, "14250": 27666763776.0, "14255": 27666763776.0, "14260": 27666763776.0, "14265": 27666763776.0, "14270": 27666763776.0, "14275": 27666763776.0, "14280": 27666763776.0, "14285": 27666763776.0, "14290": 27666763776.0, "14295": 27666763776.0, "14300": 27666763776.0, "14305": 27666763776.0, "14310": 27666763776.0, "14315": 27666763776.0, "14320": 27666763776.0, "14325": 27666763776.0, "14330": 27666763776.0, "14335": 27666763776.0, "14340": 27666763776.0, "14345": 27666763776.0, "14350": 27666763776.0, "14355": 27666763776.0, "14360": 27666763776.0, "14365": 27666763776.0, "14370": 27666763776.0, "14375": 27666763776.0, "14380": 27666763776.0, "14385": 27666763776.0, "14390": 27666763776.0, "14395": 27666763776.0, "14400": 27666763776.0, "14405": 27666763776.0, "14410": 27666763776.0, "14415": 27666763776.0, "14420": 27666763776.0, "14425": 27666763776.0, "14430": 27666763776.0, "14435": 27666763776.0, "14440": 27666763776.0, "14445": 27666763776.0, "14450": 27666763776.0, "14455": 27666763776.0, "14460": 27666763776.0, "14465": 27666763776.0, "14470": 27666763776.0, "14475": 27666763776.0, "14480": 27666763776.0, "14485": 27666763776.0, "14490": 27666763776.0, "14495": 27666763776.0, "14500": 27666763776.0, "14505": 27666763776.0, "14510": 27666763776.0, "14515": 27666763776.0, "14520": 27666763776.0, "14525": 27666763776.0, "14530": 27666763776.0, "14535": 27666763776.0, "14540": 27666763776.0, "14545": 27666763776.0, "14550": 27666763776.0, "14555": 27666763776.0, "14560": 27666763776.0, "14565": 27666763776.0, "14570": 27666763776.0, "14575": 27666763776.0, "14580": 27666763776.0, "14585": 27666763776.0, "14590": 27666763776.0, "14595": 27666763776.0, "14600": 27666763776.0, "14605": 27666763776.0, "14610": 27666763776.0, "14615": 27666763776.0, "14620": 27666763776.0, "14625": 27666763776.0, "14630": 27666763776.0, "14635": 27666763776.0, "14640": 27666763776.0, "14645": 27666763776.0, "14650": 27666763776.0, "14655": 27666763776.0, "14660": 27666763776.0, "14665": 27666763776.0, "14670": 27666763776.0, "14675": 27666763776.0, "14680": 27666763776.0, "14685": 27666763776.0, "14690": 27666763776.0, "14695": 27666763776.0, "14700": 27666763776.0, "14705": 27666763776.0, "14710": 27666763776.0, "14715": 27666763776.0, "14720": 27666763776.0, "14725": 27666763776.0, "14730": 27666763776.0, "14735": 27666763776.0, "14740": 27666763776.0, "14745": 27666763776.0, "14750": 27666763776.0, "14755": 27666763776.0, "14760": 27666763776.0, "14765": 27666763776.0, "14770": 27666763776.0, "14775": 27666763776.0, "14780": 27666763776.0, "14785": 27666763776.0, "14790": 27666763776.0, "14795": 27666763776.0, "14800": 27666763776.0, "14805": 27666763776.0, "14810": 27666763776.0, "14815": 27666763776.0, "14820": 27666763776.0, "14825": 27666763776.0, "14830": 27666763776.0, "14835": 27666763776.0, "14840": 27666763776.0, "14845": 27666763776.0, "14850": 27666763776.0, "14855": 27666763776.0, "14860": 27666763776.0, "14865": 27666763776.0, "14870": 27666763776.0, "14875": 27666763776.0, "14880": 27666763776.0, "14885": 27666763776.0, "14890": 27666763776.0, "14895": 27666763776.0, "14900": 27666763776.0, "14905": 27666763776.0, "14910": 27666763776.0, "14915": 27666763776.0, "14920": 27666763776.0, "14925": 27666763776.0, "14930": 27666763776.0, "14935": 27666763776.0, "14940": 27666763776.0, "14945": 27666763776.0, "14950": 27666763776.0, "14955": 27666763776.0, "14960": 27666763776.0, "14965": 27666763776.0, "14970": 27666763776.0, "14975": 27666763776.0, "14980": 27666763776.0, "14985": 27666763776.0, "14990": 27666763776.0, "14995": 27666763776.0, "15000": 27666763776.0, "15005": 27666763776.0, "15010": 27666763776.0, "15015": 27666763776.0, "15020": 27666763776.0, "15025": 27666763776.0, "15030": 27666763776.0, "15035": 27666763776.0, "15040": 27666763776.0, "15045": 27666763776.0, "15050": 27666763776.0, "15055": 27666763776.0, "15060": 27666763776.0, "15065": 27666763776.0, "15070": 27666763776.0, "15075": 27666763776.0, "15080": 27666763776.0, "15085": 27666763776.0, "15090": 27666763776.0, "15095": 27666763776.0, "15100": 27666763776.0, "15105": 27666763776.0, "15110": 27666763776.0, "15115": 27666763776.0, "15120": 27666763776.0, "15125": 27666763776.0, "15130": 27666763776.0, "15135": 27666763776.0, "15140": 27666763776.0, "15145": 27666763776.0, "15150": 27666763776.0, "15155": 27666763776.0, "15160": 27666763776.0, "15165": 27666763776.0, "15170": 27666763776.0, "15175": 27666763776.0, "15180": 27666763776.0, "15185": 27666763776.0, "15190": 27666763776.0, "15195": 27666763776.0, "15200": 27666763776.0, "15205": 27666763776.0, "15210": 27666763776.0, "15215": 27666763776.0, "15220": 27666763776.0, "15225": 27666763776.0, "15230": 27666763776.0, "15235": 27666763776.0, "15240": 27666763776.0, "15245": 27666763776.0, "15250": 27666763776.0, "15255": 27666763776.0, "15260": 27666763776.0, "15265": 27666763776.0, "15270": 27666763776.0, "15275": 27666763776.0, "15280": 27666763776.0, "15285": 27666763776.0, "15290": 27666763776.0, "15295": 27666763776.0, "15300": 27666763776.0, "15305": 27666763776.0, "15310": 27666763776.0, "15315": 27666763776.0, "15320": 27666763776.0, "15325": 27666763776.0, "15330": 27666763776.0, "15335": 27666763776.0, "15340": 27666763776.0, "15345": 27666763776.0, "15350": 27666763776.0, "15355": 27666763776.0, "15360": 27666763776.0, "15365": 27666763776.0, "15370": 27666763776.0, "15375": 27666763776.0, "15380": 27666763776.0, "15385": 27666763776.0, "15390": 27666763776.0, "15395": 27666763776.0, "15400": 27666763776.0, "15405": 27666763776.0, "15410": 27666763776.0, "15415": 27666763776.0, "15420": 27666763776.0, "15425": 27666763776.0, "15430": 27666763776.0, "15435": 27666763776.0, "15440": 27666763776.0, "15445": 27666763776.0, "15450": 27666763776.0, "15455": 27666763776.0, "15460": 27666763776.0, "15465": 27666763776.0, "15470": 27666763776.0, "15475": 27666763776.0, "15480": 27666763776.0, "15485": 27666763776.0, "15490": 27666763776.0, "15495": 27666763776.0, "15500": 27666763776.0, "15505": 27666763776.0, "15510": 27666763776.0, "15515": 27666763776.0, "15520": 27666763776.0, "15525": 27666763776.0, "15530": 27666763776.0, "15535": 27666763776.0, "15540": 27666763776.0, "15545": 27666763776.0, "15550": 27666763776.0, "15555": 27666763776.0, "15560": 27666763776.0, "15565": 27666763776.0, "15570": 27666763776.0, "15575": 27666763776.0, "15580": 27666763776.0, "15585": 27666763776.0, "15590": 27666763776.0, "15595": 27666763776.0, "15600": 27666763776.0, "15605": 27666763776.0, "15610": 27666763776.0, "15615": 27666763776.0, "15620": 27666763776.0, "15625": 27666763776.0, "15630": 27666763776.0, "15635": 27666763776.0, "15640": 27666763776.0, "15645": 27666763776.0, "15650": 27666763776.0, "15655": 27666763776.0, "15660": 27666763776.0, "15665": 27666763776.0, "15670": 27666763776.0, "15675": 27666763776.0, "15680": 27666763776.0, "15685": 27666763776.0, "15690": 27666763776.0, "15695": 27666763776.0, "15700": 27666763776.0, "15705": 27666763776.0, "15710": 27666763776.0, "15715": 27666763776.0, "15720": 27666763776.0, "15725": 27666763776.0, "15730": 27666763776.0, "15735": 27666763776.0, "15740": 27666763776.0, "15745": 27666763776.0, "15750": 27666763776.0, "15755": 27666763776.0, "15760": 27666763776.0, "15765": 27666763776.0, "15770": 27666763776.0, "15775": 27666763776.0, "15780": 27666763776.0, "15785": 27666763776.0, "15790": 27666763776.0, "15795": 27666763776.0, "15800": 27666763776.0, "15805": 27666763776.0, "15810": 27666763776.0, "15815": 27666763776.0, "15820": 27666763776.0, "15825": 27666763776.0, "15830": 27666763776.0, "15835": 27666763776.0, "15840": 27666763776.0, "15845": 27666763776.0, "15850": 27666763776.0, "15855": 27666763776.0, "15860": 27666763776.0, "15865": 27666763776.0, "15870": 27666763776.0, "15875": 27666763776.0, "15880": 27666763776.0, "15885": 27666763776.0, "15890": 27666763776.0, "15895": 27666763776.0, "15900": 27666763776.0, "15905": 27666763776.0, "15910": 27666763776.0, "15915": 27666763776.0, "15920": 27666763776.0, "15925": 27666763776.0, "15930": 27666763776.0, "15935": 27666763776.0, "15940": 27666763776.0, "15945": 27666763776.0, "15950": 27666763776.0, "15955": 27666763776.0, "15960": 27666763776.0, "15965": 27666763776.0, "15970": 27666763776.0, "15975": 27666763776.0, "15980": 27666763776.0, "15985": 27666763776.0, "15990": 27666763776.0, "15995": 27666763776.0, "16000": 27666763776.0, "16005": 27666763776.0, "16010": 27666763776.0, "16015": 27666763776.0, "16020": 27666763776.0, "16025": 27666763776.0, "16030": 27666763776.0, "16035": 27666763776.0, "16040": 27666763776.0, "16045": 27666763776.0, "16050": 27666763776.0, "16055": 27666763776.0, "16060": 27666763776.0, "16065": 27666763776.0, "16070": 27666763776.0, "16075": 27666763776.0, "16080": 27666763776.0, "16085": 27666763776.0, "16090": 27666763776.0, "16095": 27666763776.0, "16100": 27666763776.0, "16105": 27666763776.0, "16110": 27666763776.0, "16115": 27666763776.0, "16120": 27666763776.0, "16125": 27666763776.0, "16130": 27666763776.0, "16135": 27666763776.0, "16140": 27666763776.0, "16145": 27666763776.0, "16150": 27666763776.0, "16155": 27666763776.0, "16160": 27666763776.0, "16165": 27666763776.0, "16170": 27666763776.0, "16175": 27666763776.0, "16180": 27666763776.0, "16185": 27666763776.0, "16190": 27666763776.0, "16195": 27666763776.0, "16200": 27666763776.0, "16205": 27666763776.0, "16210": 27666763776.0, "16215": 27666763776.0, "16220": 27666763776.0, "16225": 27666763776.0, "16230": 27666763776.0, "16235": 27666763776.0, "16240": 27666763776.0, "16245": 27666763776.0, "16250": 27666763776.0, "16255": 27666763776.0, "16260": 27666763776.0, "16265": 27666763776.0, "16270": 27666763776.0, "16275": 27666763776.0, "16280": 27666763776.0, "16285": 27666763776.0, "16290": 27666763776.0, "16295": 27666763776.0, "16300": 27666763776.0, "16305": 27666763776.0, "16310": 27666763776.0, "16315": 27666763776.0, "16320": 27666763776.0, "16325": 27666763776.0, "16330": 27666763776.0, "16335": 27666763776.0, "16340": 27666763776.0, "16345": 27666763776.0, "16350": 27666763776.0, "16355": 27666763776.0, "16360": 27666763776.0, "16365": 27666763776.0, "16370": 27666763776.0, "16375": 27666763776.0, "16380": 27666763776.0, "16385": 27666763776.0, "16390": 27666763776.0, "16395": 27666763776.0, "16400": 27666763776.0, "16405": 27666763776.0, "16410": 27666763776.0, "16415": 27666763776.0, "16420": 27666763776.0, "16425": 27666763776.0, "16430": 27666763776.0, "16435": 27666763776.0, "16440": 27666763776.0, "16445": 27666763776.0, "16450": 27666763776.0, "16455": 27666763776.0, "16460": 27666763776.0, "16465": 27666763776.0, "16470": 27666763776.0, "16475": 27666763776.0, "16480": 27666763776.0, "16485": 27666763776.0, "16490": 27666763776.0, "16495": 27666763776.0, "16500": 27666763776.0, "16505": 27666763776.0, "16510": 27666763776.0, "16515": 27666763776.0, "16520": 27666763776.0, "16525": 27666763776.0, "16530": 27666763776.0, "16535": 27666763776.0, "16540": 27666763776.0, "16545": 27666763776.0, "16550": 27666763776.0, "16555": 27666763776.0, "16560": 27666763776.0, "16565": 27666763776.0, "16570": 27666763776.0, "16575": 27666763776.0, "16580": 27666763776.0, "16585": 27666763776.0, "16590": 27666763776.0, "16595": 27666763776.0, "16600": 27666763776.0, "16605": 27666763776.0, "16610": 27666763776.0, "16615": 27666763776.0, "16620": 27666763776.0, "16625": 27666763776.0, "16630": 27666763776.0, "16635": 27666763776.0, "16640": 27666763776.0, "16645": 27666763776.0, "16650": 27666763776.0, "16655": 27666763776.0, "16660": 27666763776.0, "16665": 27666763776.0, "16670": 27666763776.0, "16675": 27666763776.0, "16680": 27666763776.0, "16685": 27666763776.0, "16690": 27666763776.0, "16695": 27666763776.0, "16700": 27666763776.0, "16705": 27666763776.0, "16710": 27666763776.0, "16715": 27666763776.0, "16720": 27666763776.0, "16725": 27666763776.0, "16730": 27666763776.0, "16735": 27666763776.0, "16740": 27666763776.0, "16745": 27666763776.0, "16750": 27666763776.0, "16755": 27666763776.0, "16760": 27666763776.0, "16765": 27666763776.0, "16770": 27666763776.0, "16775": 27666763776.0, "16780": 27666763776.0, "16785": 27666763776.0, "16790": 27666763776.0, "16795": 27666763776.0, "16800": 27666763776.0, "16805": 27666763776.0, "16810": 27666763776.0, "16815": 27666763776.0, "16820": 27666763776.0, "16825": 27666763776.0, "16830": 27666763776.0, "16835": 27666763776.0, "16840": 27666763776.0, "16845": 27666763776.0, "16850": 27666763776.0, "16855": 27666763776.0, "16860": 27666763776.0, "16865": 27666763776.0, "16870": 27666763776.0, "16875": 27666763776.0, "16880": 27666763776.0, "16885": 27666763776.0, "16890": 27666763776.0, "16895": 27666763776.0, "16900": 27666763776.0, "16905": 27666763776.0, "16910": 27666763776.0, "16915": 27666763776.0, "16920": 27666763776.0, "16925": 27666763776.0, "16930": 27666763776.0, "16935": 27666763776.0, "16940": 27666763776.0, "16945": 27666763776.0, "16950": 27666763776.0, "16955": 27666763776.0, "16960": 27666763776.0, "16965": 27666763776.0, "16970": 27666763776.0, "16975": 27666763776.0, "16980": 27666763776.0, "16985": 27666763776.0, "16990": 27666763776.0, "16995": 27666763776.0, "17000": 27666763776.0, "17005": 27666763776.0, "17010": 27666763776.0, "17015": 27666763776.0, "17020": 27666763776.0, "17025": 27666763776.0, "17030": 27666763776.0, "17035": 27666763776.0, "17040": 27666763776.0, "17045": 27666763776.0, "17050": 27666763776.0, "17055": 27666763776.0, "17060": 27666763776.0, "17065": 27666763776.0, "17070": 27666763776.0, "17075": 27666763776.0, "17080": 27666763776.0, "17085": 27666763776.0, "17090": 27666763776.0, "17095": 27666763776.0, "17100": 27666763776.0, "17105": 27666763776.0, "17110": 27666763776.0, "17115": 27666763776.0, "17120": 27666763776.0, "17125": 27666763776.0, "17130": 27666763776.0, "17135": 27666763776.0, "17140": 27666763776.0, "17145": 27666763776.0, "17150": 27666763776.0, "17155": 27666763776.0, "17160": 27666763776.0, "17165": 27666763776.0, "17170": 27666763776.0, "17175": 27666763776.0, "17180": 27666763776.0, "17185": 27666763776.0, "17190": 27666763776.0, "17195": 27666763776.0, "17200": 27666763776.0, "17205": 27666763776.0, "17210": 27666763776.0, "17215": 27666763776.0, "17220": 27666763776.0, "17225": 27666763776.0, "17230": 27666763776.0, "17235": 27666763776.0, "17240": 27666763776.0, "17245": 27666763776.0, "17250": 27666763776.0, "17255": 27666763776.0, "17260": 27666763776.0, "17265": 27666763776.0, "17270": 27666763776.0, "17275": 27666763776.0, "17280": 27666763776.0, "17285": 27666763776.0, "17290": 27666763776.0, "17295": 27666763776.0, "17300": 27666763776.0, "17305": 27666763776.0, "17310": 27666763776.0, "17315": 27666763776.0, "17320": 27666763776.0, "17325": 27666763776.0, "17330": 27666763776.0, "17335": 27666763776.0, "17340": 27666763776.0, "17345": 27666763776.0, "17350": 27666763776.0, "17355": 27666763776.0, "17360": 27666763776.0, "17365": 27666763776.0, "17370": 27666763776.0, "17375": 27666763776.0, "17380": 27666763776.0, "17385": 27666763776.0, "17390": 27666763776.0, "17395": 27666763776.0, "17400": 27666763776.0, "17405": 27666763776.0, "17410": 27666763776.0, "17415": 27666763776.0, "17420": 27666763776.0, "17425": 27666763776.0, "17430": 27666763776.0, "17435": 27666763776.0, "17440": 27666763776.0, "17445": 27666763776.0, "17450": 27666763776.0, "17455": 27666763776.0, "17460": 27666763776.0, "17465": 27666763776.0, "17470": 27666763776.0, "17475": 27666763776.0, "17480": 27666763776.0, "17485": 27666763776.0, "17490": 27666763776.0, "17495": 27666763776.0, "17500": 27666763776.0, "17505": 27666763776.0, "17510": 27666763776.0, "17515": 27666763776.0, "17520": 27666763776.0, "17525": 27666763776.0, "17530": 27666763776.0, "17535": 27666763776.0, "17540": 27666763776.0, "17545": 27666763776.0, "17550": 27666763776.0, "17555": 27666763776.0, "17560": 27666763776.0, "17565": 27666763776.0, "17570": 27666763776.0, "17575": 27666763776.0, "17580": 27666763776.0, "17585": 27666763776.0, "17590": 27666763776.0, "17595": 27666763776.0, "17600": 27666763776.0, "17605": 27666763776.0, "17610": 27666763776.0, "17615": 27666763776.0, "17620": 27666763776.0, "17625": 27666763776.0, "17630": 27666763776.0, "17635": 27666763776.0, "17640": 27666763776.0, "17645": 27666763776.0, "17650": 27666763776.0, "17655": 27666763776.0, "17660": 27666763776.0, "17665": 27666763776.0, "17670": 27666763776.0, "17675": 27666763776.0, "17680": 27666763776.0, "17685": 27666763776.0, "17690": 27666763776.0, "17695": 27666763776.0, "17700": 27666763776.0, "17705": 27666763776.0, "17710": 27666763776.0, "17715": 27666763776.0, "17720": 27666763776.0, "17725": 27666763776.0, "17730": 27666763776.0, "17735": 27666763776.0, "17740": 27666763776.0, "17745": 27666763776.0, "17750": 27666763776.0, "17755": 27666763776.0, "17760": 27666763776.0, "17765": 27666763776.0, "17770": 27666763776.0, "17775": 27666763776.0, "17780": 27666763776.0, "17785": 27666763776.0, "17790": 27666763776.0, "17795": 27666763776.0, "17800": 27666763776.0, "17805": 27666763776.0, "17810": 27666763776.0, "17815": 27666763776.0, "17820": 27666171904.0, "17825": 27666171904.0, "17830": 27666763776.0, "17835": 27666763776.0, "17840": 27666763776.0, "17845": 27666763776.0, "17850": 27666763776.0, "17855": 27666763776.0, "17860": 27666763776.0, "17865": 27666763776.0, "17870": 27666763776.0, "17875": 27666763776.0, "17880": 27666763776.0, "17885": 27666763776.0, "17890": 27666763776.0, "17895": 27666763776.0, "17900": 27666763776.0, "17905": 27666763776.0, "17910": 27666763776.0, "17915": 27666763776.0, "17920": 27666763776.0, "17925": 27666763776.0, "17930": 27666763776.0, "17935": 27666763776.0, "17940": 27666763776.0, "17945": 27666763776.0, "17950": 27666763776.0, "17955": 27666763776.0, "17960": 27666763776.0, "17965": 27666763776.0, "17970": 27666763776.0, "17975": 27666763776.0, "17980": 27666763776.0, "17985": 27666763776.0, "17990": 27666763776.0, "17995": 27666763776.0, "18000": 27666763776.0, "18005": 27666763776.0, "18010": 27666763776.0, "18015": 27666763776.0, "18020": 27666763776.0, "18025": 27666763776.0, "18030": 27666763776.0, "18035": 27666763776.0, "18040": 27666763776.0, "18045": 27666763776.0, "18050": 27666763776.0, "18055": 27666763776.0, "18060": 27666763776.0, "18065": 27666763776.0, "18070": 27666763776.0, "18075": 27666763776.0, "18080": 27666763776.0, "18085": 27666763776.0, "18090": 27666763776.0, "18095": 27666763776.0, "18100": 27666763776.0, "18105": 27666763776.0, "18110": 27666763776.0, "18115": 27666763776.0, "18120": 27666763776.0, "18125": 27666763776.0, "18130": 27666763776.0, "18135": 27666763776.0, "18140": 27666763776.0, "18145": 27666763776.0, "18150": 27666763776.0, "18155": 27666763776.0, "18160": 27666763776.0, "18165": 27666763776.0, "18170": 27666763776.0, "18175": 27666763776.0, "18180": 27666763776.0, "18185": 27666763776.0, "18190": 27666763776.0, "18195": 27666763776.0, "18200": 27666763776.0, "18205": 27666763776.0, "18210": 27666763776.0, "18215": 27666763776.0, "18220": 27666763776.0, "18225": 27666763776.0, "18230": 27666763776.0, "18235": 27666763776.0, "18240": 27666763776.0, "18245": 27666763776.0, "18250": 27666763776.0, "18255": 27666763776.0, "18260": 27666763776.0, "18265": 27666763776.0, "18270": 27666763776.0, "18275": 27666763776.0, "18280": 27666763776.0, "18285": 27666763776.0, "18290": 27666763776.0, "18295": 27666763776.0, "18300": 27666763776.0, "18305": 27666763776.0, "18310": 27666763776.0, "18315": 27666763776.0, "18320": 27666763776.0, "18325": 27666763776.0, "18330": 27666763776.0, "18335": 27666763776.0, "18340": 27666763776.0, "18345": 27666763776.0, "18350": 27666763776.0, "18355": 27666763776.0, "18360": 27666763776.0, "18365": 27666763776.0, "18370": 27666763776.0, "18375": 27666763776.0, "18380": 27666763776.0, "18385": 27666763776.0, "18390": 27666763776.0, "18395": 27666763776.0, "18400": 27666763776.0, "18405": 27666763776.0, "18410": 27666763776.0, "18415": 27666763776.0, "18420": 27666763776.0, "18425": 27666763776.0, "18430": 27666763776.0, "18435": 27666763776.0, "18440": 27666763776.0, "18445": 27666763776.0, "18450": 27666763776.0, "18455": 27666763776.0, "18460": 27666763776.0, "18465": 27666763776.0, "18470": 27666763776.0, "18475": 27666763776.0, "18480": 27666763776.0, "18485": 27666763776.0, "18490": 27666763776.0, "18495": 27666763776.0, "18500": 27666763776.0, "18505": 27666763776.0, "18510": 27666763776.0, "18515": 27666763776.0, "18520": 27666763776.0, "18525": 27666763776.0, "18530": 27666763776.0, "18535": 27666763776.0, "18540": 27666763776.0, "18545": 27666763776.0, "18550": 27666763776.0, "18555": 27666763776.0, "18560": 27666763776.0, "18565": 27666763776.0, "18570": 27666763776.0, "18575": 27666763776.0, "18580": 27666763776.0, "18585": 27666763776.0, "18590": 27666763776.0, "18595": 27666763776.0, "18600": 27666763776.0, "18605": 27666763776.0, "18610": 27666763776.0, "18615": 27666763776.0, "18620": 27666763776.0, "18625": 27666763776.0, "18630": 27666763776.0, "18635": 27666763776.0, "18640": 27666763776.0, "18645": 27666763776.0, "18650": 27666763776.0, "18655": 27666763776.0, "18660": 27666763776.0, "18665": 27666763776.0, "18670": 27666763776.0, "18675": 27666763776.0, "18680": 27666763776.0, "18685": 27666763776.0, "18690": 27666763776.0, "18695": 27666763776.0, "18700": 27666763776.0, "18705": 27666763776.0, "18710": 27666763776.0, "18715": 27666763776.0, "18720": 27666763776.0, "18725": 27666763776.0, "18730": 27666763776.0, "18735": 27666763776.0, "18740": 27666763776.0, "18745": 27666763776.0, "18750": 27666763776.0, "18755": 27666763776.0, "18760": 27666763776.0, "18765": 27666763776.0, "18770": 27666763776.0, "18775": 27666763776.0, "18780": 27666763776.0, "18785": 27666763776.0, "18790": 27666763776.0, "18795": 27666763776.0, "18800": 27666763776.0, "18805": 27666763776.0, "18810": 27666763776.0, "18815": 27666763776.0, "18820": 27666763776.0, "18825": 27666763776.0, "18830": 27666763776.0, "18835": 27666763776.0, "18840": 27666763776.0, "18845": 27666763776.0, "18850": 27666763776.0, "18855": 27666763776.0, "18860": 27666763776.0, "18865": 27666763776.0, "18870": 27666763776.0, "18875": 27666763776.0, "18880": 27666763776.0, "18885": 27666763776.0, "18890": 27666763776.0, "18895": 27666763776.0, "18900": 27666763776.0, "18905": 27666763776.0, "18910": 27666763776.0, "18915": 27666763776.0, "18920": 27666763776.0, "18925": 27666763776.0, "18930": 27666763776.0, "18935": 27666763776.0, "18940": 27666763776.0, "18945": 27666763776.0, "18950": 27666763776.0, "18955": 27666763776.0, "18960": 27666763776.0, "18965": 27666763776.0, "18970": 27666763776.0, "18975": 27666763776.0, "18980": 27666763776.0, "18985": 27666763776.0, "18990": 27666763776.0, "18995": 27666763776.0, "19000": 27666763776.0, "19005": 27666763776.0, "19010": 27666763776.0, "19015": 27666763776.0, "19020": 27666763776.0, "19025": 27666763776.0, "19030": 27666763776.0, "19035": 27666763776.0, "19040": 27666763776.0, "19045": 27666763776.0, "19050": 27666763776.0, "19055": 27666763776.0, "19060": 27666763776.0, "19065": 27666763776.0, "19070": 27666763776.0, "19075": 27666763776.0, "19080": 27666763776.0, "19085": 27666763776.0, "19090": 27666763776.0, "19095": 27666763776.0, "19100": 27666763776.0, "19105": 27666763776.0, "19110": 27666763776.0, "19115": 27666763776.0, "19120": 27666763776.0, "19125": 27666763776.0, "19130": 27666763776.0, "19135": 27666763776.0, "19140": 27666763776.0, "19145": 27666763776.0, "19150": 27666763776.0, "19155": 27666763776.0, "19160": 27666763776.0, "19165": 27666763776.0, "19170": 27666763776.0, "19175": 27666763776.0, "19180": 27666763776.0, "19185": 27666763776.0, "19190": 27666763776.0, "19195": 27666763776.0, "19200": 27666763776.0, "19205": 27666763776.0, "19210": 27666763776.0, "19215": 27666763776.0, "19220": 27666763776.0, "19225": 27666763776.0, "19230": 27666763776.0, "19235": 27666763776.0, "19240": 27666763776.0, "19245": 27666763776.0, "19250": 27666763776.0, "19255": 27666763776.0, "19260": 27666763776.0, "19265": 27666763776.0, "19270": 27666763776.0, "19275": 27666763776.0, "19280": 27666763776.0, "19285": 27666763776.0, "19290": 27666763776.0, "19295": 27666763776.0, "19300": 27666763776.0, "19305": 27666763776.0, "19310": 27666763776.0, "19315": 27666763776.0, "19320": 27666763776.0, "19325": 27666763776.0, "19330": 27666763776.0, "19335": 27666763776.0, "19340": 27666763776.0, "19345": 27666763776.0, "19350": 27666763776.0, "19355": 27666763776.0, "19360": 27666763776.0, "19365": 27666763776.0, "19370": 27666763776.0, "19375": 27666763776.0, "19380": 27666763776.0, "19385": 27666763776.0, "19390": 27666763776.0, "19395": 27666763776.0, "19400": 27666763776.0, "19405": 27666763776.0, "19410": 27666763776.0, "19415": 27666763776.0, "19420": 27666763776.0, "19425": 27666763776.0, "19430": 27666763776.0, "19435": 27666763776.0, "19440": 27666763776.0, "19445": 27666763776.0, "19450": 27666763776.0, "19455": 27666763776.0, "19460": 27666763776.0, "19465": 27666763776.0, "19470": 27666763776.0, "19475": 27666763776.0, "19480": 27666763776.0, "19485": 27666763776.0, "19490": 27666763776.0, "19495": 27666763776.0, "19500": 27666763776.0, "19505": 27666763776.0, "19510": 27666763776.0, "19515": 27666763776.0, "19520": 27666763776.0, "19525": 27666763776.0, "19530": 27666763776.0, "19535": 27666763776.0, "19540": 27666763776.0, "19545": 27666763776.0, "19550": 27666763776.0, "19555": 27666763776.0, "19560": 27666763776.0, "19565": 27666763776.0, "19570": 27666763776.0, "19575": 27666763776.0, "19580": 27666763776.0, "19585": 27666763776.0, "19590": 27666763776.0, "19595": 27666763776.0, "19600": 27666763776.0, "19605": 27666763776.0, "19610": 27666763776.0, "19615": 27666763776.0, "19620": 27666763776.0, "19625": 27666763776.0, "19630": 27666763776.0, "19635": 27666763776.0, "19640": 27666763776.0, "19645": 27666763776.0, "19650": 27666763776.0, "19655": 27666763776.0, "19660": 27666763776.0, "19665": 27666763776.0, "19670": 27666763776.0, "19675": 27666763776.0, "19680": 27666763776.0, "19685": 27666763776.0, "19690": 27666763776.0, "19695": 27666763776.0, "19700": 27666763776.0, "19705": 27666763776.0, "19710": 27666763776.0, "19715": 27666763776.0, "19720": 27666763776.0, "19725": 27666763776.0, "19730": 27666763776.0, "19735": 27666763776.0, "19740": 27666763776.0, "19745": 27666763776.0, "19750": 27666763776.0, "19755": 27666763776.0, "19760": 27666763776.0, "19765": 27666763776.0, "19770": 27666763776.0, "19775": 27666763776.0, "19780": 27666763776.0, "19785": 27666763776.0, "19790": 27666763776.0, "19795": 27666763776.0, "19800": 27666763776.0, "19805": 27666763776.0, "19810": 27666763776.0, "19815": 27666763776.0, "19820": 27666763776.0, "19825": 27666763776.0, "19830": 27666763776.0, "19835": 27666763776.0, "19840": 27666763776.0, "19845": 27666763776.0, "19850": 27666763776.0, "19855": 27666763776.0, "19860": 27666763776.0, "19865": 27666763776.0, "19870": 27666763776.0, "19875": 27666763776.0, "19880": 27666763776.0, "19885": 27666763776.0, "19890": 27666763776.0, "19895": 27666763776.0, "19900": 27666763776.0, "19905": 27666763776.0, "19910": 27666763776.0, "19915": 27666763776.0, "19920": 27666763776.0, "19925": 27666763776.0, "19930": 27666763776.0, "19935": 27666763776.0, "19940": 27666763776.0, "19945": 27666763776.0, "19950": 27666763776.0, "19955": 27666763776.0, "19960": 27666763776.0, "19965": 27666763776.0, "19970": 27666763776.0, "19975": 27666763776.0, "19980": 27666763776.0, "19985": 27666763776.0, "19990": 27666763776.0, "19995": 27666763776.0, "20000": 27666763776.0, "20005": 27666763776.0, "20010": 27666763776.0, "20015": 27666763776.0, "20020": 27666763776.0, "20025": 27666763776.0, "20030": 27666763776.0, "20035": 27666763776.0, "20040": 27666763776.0, "20045": 27666763776.0, "20050": 27666763776.0, "20055": 27666763776.0, "20060": 27666763776.0, "20065": 27666763776.0, "20070": 27666763776.0, "20075": 27666763776.0, "20080": 27666763776.0, "20085": 27666763776.0, "20090": 27666763776.0, "20095": 27666763776.0, "20100": 27666763776.0, "20105": 27666763776.0, "20110": 27666763776.0, "20115": 27666763776.0, "20120": 27666763776.0, "20125": 27666763776.0, "20130": 27666763776.0, "20135": 27666763776.0, "20140": 27666763776.0, "20145": 27666763776.0, "20150": 27666763776.0, "20155": 27666763776.0, "20160": 27666763776.0, "20165": 27666763776.0, "20170": 27666763776.0, "20175": 27666763776.0, "20180": 27666763776.0, "20185": 27666763776.0, "20190": 27666763776.0, "20195": 27666763776.0, "20200": 27666763776.0, "20205": 27666763776.0, "20210": 27666763776.0, "20215": 27666763776.0, "20220": 27666763776.0, "20225": 27666763776.0, "20230": 27666763776.0, "20235": 27666763776.0, "20240": 27666763776.0, "20245": 27666763776.0, "20250": 27666763776.0, "20255": 27666763776.0, "20260": 27666763776.0, "20265": 27666763776.0, "20270": 27666763776.0, "20275": 27666763776.0, "20280": 27666763776.0, "20285": 27666763776.0, "20290": 27666763776.0, "20295": 27666763776.0, "20300": 27666763776.0, "20305": 27666763776.0, "20310": 27666763776.0, "20315": 27666763776.0, "20320": 27666763776.0, "20325": 27666763776.0, "20330": 27666763776.0, "20335": 27666763776.0, "20340": 27666763776.0, "20345": 27666763776.0, "20350": 27666763776.0, "20355": 27666763776.0, "20360": 27666763776.0, "20365": 27666763776.0, "20370": 27666763776.0, "20375": 27666763776.0, "20380": 27666763776.0, "20385": 27666763776.0, "20390": 27666763776.0, "20395": 27666763776.0, "20400": 27666763776.0, "20405": 27666763776.0, "20410": 27666763776.0, "20415": 27666763776.0, "20420": 27666763776.0, "20425": 27666763776.0, "20430": 27666763776.0, "20435": 27666763776.0, "20440": 27666763776.0, "20445": 27666763776.0, "20450": 27666763776.0, "20455": 27666763776.0, "20460": 27666763776.0, "20465": 27666763776.0, "20470": 27666763776.0, "20475": 27666763776.0, "20480": 27666763776.0, "20485": 27666763776.0, "20490": 27666763776.0, "20495": 27666763776.0, "20500": 27666763776.0, "20505": 27666763776.0, "20510": 27666763776.0, "20515": 27666763776.0, "20520": 27666763776.0, "20525": 27666763776.0, "20530": 27666763776.0, "20535": 27666763776.0, "20540": 27666763776.0, "20545": 27666763776.0, "20550": 27666763776.0, "20555": 27666763776.0, "20560": 27666763776.0, "20565": 27666763776.0, "20570": 27666763776.0, "20575": 27666763776.0, "20580": 27666763776.0, "20585": 27666763776.0, "20590": 27666763776.0, "20595": 27666763776.0, "20600": 27666763776.0, "20605": 27666763776.0, "20610": 27666763776.0, "20615": 27666763776.0, "20620": 27666763776.0, "20625": 27666763776.0, "20630": 27666763776.0, "20635": 27666763776.0, "20640": 27666763776.0, "20645": 27666763776.0, "20650": 27666763776.0, "20655": 27666763776.0, "20660": 27666763776.0, "20665": 27666763776.0, "20670": 27666763776.0, "20675": 27666763776.0, "20680": 27666763776.0, "20685": 27666763776.0, "20690": 27666763776.0, "20695": 27666763776.0, "20700": 27666763776.0, "20705": 27666763776.0, "20710": 27666763776.0, "20715": 27666763776.0, "20720": 27666763776.0, "20725": 27666763776.0, "20730": 27666763776.0, "20735": 27666763776.0, "20740": 27666763776.0, "20745": 27666763776.0, "20750": 27666763776.0, "20755": 27666763776.0, "20760": 27666763776.0, "20765": 27666763776.0, "20770": 27666763776.0, "20775": 27666763776.0, "20780": 27666763776.0, "20785": 27666763776.0, "20790": 27666763776.0, "20795": 27666763776.0, "20800": 27666763776.0, "20805": 27666763776.0, "20810": 27666763776.0, "20815": 27666763776.0, "20820": 27666763776.0, "20825": 27666763776.0, "20830": 27666763776.0, "20835": 27666763776.0, "20840": 27666763776.0, "20845": 27666763776.0, "20850": 27666763776.0, "20855": 27666763776.0, "20860": 27666763776.0, "20865": 27666763776.0, "20870": 27666763776.0, "20875": 27666763776.0, "20880": 27666763776.0, "20885": 27666763776.0, "20890": 27666763776.0, "20895": 27666763776.0, "20900": 27666763776.0, "20905": 27666763776.0, "20910": 27666763776.0, "20915": 27666763776.0, "20920": 27666763776.0, "20925": 27666763776.0, "20930": 27666763776.0, "20935": 27666763776.0, "20940": 27666763776.0, "20945": 27666763776.0, "20950": 27666763776.0, "20955": 27666763776.0, "20960": 27666763776.0, "20965": 27666763776.0, "20970": 27666763776.0, "20975": 27666763776.0, "20980": 27666763776.0, "20985": 27666763776.0, "20990": 27666763776.0, "20995": 27666763776.0, "21000": 27666763776.0, "21005": 27666763776.0, "21010": 27666763776.0, "21015": 27666763776.0, "21020": 27666763776.0, "21025": 27666763776.0, "21030": 27666763776.0, "21035": 27666763776.0, "21040": 27666763776.0, "21045": 27666763776.0, "21050": 27666763776.0, "21055": 27666763776.0, "21060": 27666763776.0, "21065": 27666763776.0, "21070": 27666763776.0, "21075": 27666763776.0, "21080": 27666763776.0, "21085": 27666763776.0, "21090": 27666763776.0, "21095": 27666763776.0, "21100": 27666763776.0, "21105": 27666763776.0, "21110": 27666763776.0, "21115": 27666763776.0, "21120": 27666763776.0, "21125": 27666763776.0, "21130": 27666763776.0, "21135": 27666763776.0, "21140": 27666763776.0, "21145": 27666763776.0, "21150": 27666763776.0, "21155": 27666763776.0, "21160": 27666763776.0, "21165": 27666763776.0, "21170": 27666763776.0, "21175": 27666763776.0, "21180": 27666763776.0, "21185": 27666763776.0, "21190": 27666763776.0, "21195": 27666763776.0, "21200": 27666763776.0, "21205": 27666763776.0, "21210": 27666763776.0, "21215": 27666763776.0, "21220": 27666763776.0, "21225": 27666763776.0, "21230": 27666763776.0, "21235": 27666763776.0, "21240": 27666763776.0, "21245": 27666763776.0, "21250": 27666763776.0, "21255": 27666763776.0, "21260": 27666763776.0, "21265": 27666763776.0, "21270": 27666763776.0, "21275": 27666763776.0, "21280": 27666763776.0, "21285": 27666763776.0, "21290": 27666763776.0, "21295": 27666763776.0, "21300": 27666763776.0, "21305": 27666763776.0, "21310": 27666763776.0, "21315": 27666763776.0, "21320": 27666763776.0, "21325": 27666763776.0, "21330": 27666763776.0, "21335": 27666763776.0, "21340": 27666763776.0, "21345": 27666763776.0, "21350": 27666763776.0, "21355": 27666763776.0, "21360": 27666763776.0, "21365": 27666763776.0, "21370": 27666763776.0, "21375": 27666763776.0, "21380": 27666763776.0, "21385": 27666763776.0, "21390": 27666763776.0, "21395": 27666763776.0, "21400": 27666763776.0, "21405": 27666763776.0, "21410": 27666763776.0, "21415": 27666763776.0, "21420": 27666763776.0, "21425": 27666763776.0, "21430": 27666763776.0, "21435": 27666763776.0, "21440": 27666763776.0, "21445": 27666763776.0, "21450": 27666763776.0, "21455": 27666763776.0, "21460": 27666763776.0, "21465": 27666763776.0, "21470": 27666763776.0, "21475": 27666763776.0, "21480": 27666763776.0, "21485": 27666763776.0, "21490": 27666763776.0, "21495": 27666763776.0, "21500": 27666763776.0, "21505": 27666763776.0, "21510": 27666763776.0, "21515": 27666763776.0, "21520": 27666763776.0, "21525": 27666763776.0, "21530": 27666763776.0, "21535": 27666763776.0, "21540": 27666763776.0, "21545": 27666763776.0, "21550": 27666763776.0, "21555": 27666763776.0, "21560": 27666763776.0, "21565": 27666763776.0, "21570": 27666763776.0, "21575": 27666763776.0, "21580": 27666763776.0, "21585": 27666763776.0, "21590": 27666763776.0, "21595": 27666763776.0, "21600": 27666763776.0, "21605": 27666763776.0, "21610": 27666763776.0, "21615": 27666763776.0, "21620": 27666763776.0, "21625": 27666763776.0, "21630": 27666763776.0, "21635": 27666763776.0, "21640": 27666763776.0, "21645": 27666763776.0, "21650": 27666763776.0, "21655": 27666763776.0, "21660": 27666763776.0, "21665": 27666763776.0, "21670": 27666763776.0, "21675": 27666763776.0, "21680": 27666763776.0, "21685": 27666763776.0, "21690": 27666763776.0, "21695": 27666763776.0, "21700": 27666763776.0, "21705": 27666763776.0, "21710": 27666763776.0, "21715": 27666763776.0, "21720": 27666763776.0, "21725": 27666763776.0, "21730": 27666763776.0, "21735": 27666763776.0, "21740": 27666763776.0, "21745": 27666763776.0, "21750": 27666763776.0, "21755": 27666763776.0, "21760": 27666763776.0, "21765": 27666763776.0, "21770": 27666763776.0, "21775": 27666763776.0, "21780": 27666763776.0, "21785": 27666763776.0, "21790": 27666763776.0, "21795": 27666763776.0, "21800": 27666763776.0, "21805": 27666763776.0, "21810": 27666763776.0, "21815": 27666763776.0, "21820": 27666763776.0, "21825": 27666763776.0, "21830": 27666763776.0, "21835": 27666763776.0, "21840": 27666763776.0, "21845": 27666763776.0, "21850": 27666763776.0, "21855": 27666763776.0, "21860": 27666763776.0, "21865": 27666763776.0, "21870": 27666763776.0, "21875": 27666763776.0, "21880": 27666763776.0, "21885": 27666763776.0, "21890": 27666763776.0, "21895": 27666763776.0, "21900": 27666763776.0, "21905": 27666763776.0, "21910": 27666763776.0, "21915": 27666763776.0, "21920": 27666763776.0, "21925": 27666763776.0, "21930": 27666763776.0, "21935": 27666763776.0, "21940": 27666763776.0, "21945": 27666763776.0, "21950": 27666763776.0, "21955": 27666763776.0, "21960": 27666763776.0, "21965": 27666763776.0, "21970": 27666763776.0, "21975": 27666763776.0, "21980": 27666763776.0, "21985": 27666763776.0, "21990": 27666763776.0, "21995": 27666763776.0, "22000": 27666763776.0, "22005": 27666763776.0, "22010": 27666763776.0, "22015": 27666763776.0, "22020": 27666763776.0, "22025": 27666763776.0, "22030": 27666763776.0, "22035": 27666763776.0, "22040": 27666763776.0, "22045": 27666763776.0, "22050": 27666763776.0, "22055": 27666763776.0, "22060": 27666763776.0, "22065": 27666763776.0, "22070": 27666763776.0, "22075": 27666763776.0, "22080": 27666763776.0, "22085": 27666763776.0, "22090": 27666763776.0, "22095": 27666763776.0, "22100": 27666763776.0, "22105": 27666763776.0, "22110": 27666763776.0, "22115": 27666763776.0, "22120": 27666763776.0, "22125": 27666763776.0, "22130": 27666763776.0, "22135": 27666763776.0, "22140": 27666763776.0, "22145": 27666763776.0, "22150": 27666763776.0, "22155": 27666763776.0, "22160": 27666763776.0, "22165": 27666763776.0, "22170": 27666763776.0, "22175": 27666763776.0, "22180": 27666763776.0, "22185": 27666763776.0, "22190": 27666763776.0, "22195": 27666763776.0, "22200": 27666763776.0, "22205": 27666763776.0, "22210": 27666763776.0, "22215": 27666763776.0, "22220": 27666763776.0, "22225": 27666763776.0, "22230": 27666763776.0, "22235": 27666763776.0, "22240": 27666763776.0, "22245": 27666763776.0, "22250": 27666763776.0, "22255": 27666763776.0, "22260": 27666763776.0, "22265": 27666763776.0, "22270": 27666763776.0, "22275": 27666763776.0, "22280": 27666763776.0, "22285": 27666763776.0, "22290": 27666763776.0, "22295": 27666763776.0, "22300": 27666763776.0, "22305": 27666763776.0, "22310": 27666763776.0, "22315": 27666763776.0, "22320": 27666763776.0, "22325": 27666763776.0, "22330": 27666763776.0, "22335": 27666763776.0, "22340": 27666763776.0, "22345": 27666763776.0, "22350": 27666763776.0, "22355": 27666763776.0, "22360": 27666763776.0, "22365": 27666763776.0, "22370": 27666763776.0, "22375": 27666763776.0, "22380": 27666763776.0, "22385": 27666763776.0, "22390": 27666763776.0, "22395": 27666763776.0, "22400": 27666763776.0, "22405": 27666763776.0, "22410": 27666763776.0, "22415": 27666763776.0, "22420": 27666763776.0, "22425": 27666763776.0, "22430": 27666763776.0, "22435": 27666763776.0, "22440": 27666763776.0, "22445": 27666763776.0, "22450": 27666763776.0, "22455": 27666763776.0, "22460": 27666763776.0, "22465": 27666763776.0, "22470": 27666763776.0, "22475": 27666763776.0, "22480": 27666763776.0, "22485": 27666763776.0, "22490": 27666763776.0, "22495": 27666763776.0, "22500": 27666763776.0, "22505": 27666763776.0, "22510": 27666763776.0, "22515": 27666763776.0, "22520": 27666763776.0, "22525": 27666763776.0, "22530": 27666763776.0, "22535": 27666763776.0, "22540": 27666763776.0, "22545": 27666763776.0, "22550": 27666763776.0, "22555": 27666763776.0, "22560": 27666763776.0, "22565": 27666763776.0, "22570": 27666763776.0, "22575": 27666763776.0, "22580": 27666763776.0, "22585": 27666763776.0, "22590": 27666763776.0, "22595": 27666763776.0, "22600": 27666763776.0, "22605": 27666763776.0, "22610": 27666763776.0, "22615": 27666763776.0, "22620": 27666763776.0, "22625": 27666763776.0, "22630": 27666763776.0, "22635": 27666763776.0, "22640": 27666763776.0, "22645": 27666763776.0, "22650": 27666763776.0, "22655": 27666763776.0, "22660": 27666763776.0, "22665": 27666763776.0, "22670": 27666763776.0, "22675": 27666763776.0, "22680": 27666763776.0, "22685": 27666763776.0, "22690": 27666763776.0, "22695": 27666763776.0, "22700": 27666763776.0, "22705": 27666763776.0, "22710": 27666763776.0, "22715": 27666763776.0, "22720": 27666763776.0, "22725": 27666763776.0, "22730": 27666763776.0, "22735": 27666763776.0, "22740": 27666763776.0, "22745": 27666763776.0, "22750": 27666763776.0, "22755": 27666763776.0, "22760": 27666763776.0, "22765": 27666763776.0, "22770": 27666763776.0, "22775": 27666763776.0, "22780": 27666763776.0, "22785": 27666763776.0, "22790": 27666763776.0, "22795": 27666763776.0, "22800": 27666763776.0, "22805": 27666763776.0, "22810": 27666763776.0, "22815": 27666763776.0, "22820": 27666763776.0, "22825": 27666763776.0, "22830": 27666763776.0, "22835": 27666763776.0, "22840": 27666763776.0, "22845": 27666763776.0, "22850": 27666763776.0, "22855": 27666763776.0, "22860": 27666763776.0, "22865": 27666763776.0, "22870": 27666763776.0, "22875": 27666763776.0, "22880": 27666763776.0, "22885": 27666763776.0, "22890": 27666763776.0, "22895": 27666763776.0, "22900": 27666763776.0, "22905": 27666763776.0, "22910": 27666763776.0, "22915": 27666763776.0, "22920": 27666763776.0, "22925": 27666763776.0, "22930": 27666763776.0, "22935": 27666763776.0, "22940": 27666763776.0, "22945": 27666763776.0, "22950": 27666763776.0, "22955": 27666763776.0, "22960": 27666763776.0, "22965": 27666763776.0, "22970": 27666763776.0, "22975": 27666763776.0, "22980": 27666763776.0, "22985": 27666763776.0, "22990": 27666763776.0, "22995": 27666763776.0, "23000": 27666763776.0, "23005": 27666763776.0, "23010": 27666763776.0, "23015": 27666763776.0, "23020": 27666763776.0, "23025": 27666763776.0, "23030": 27666763776.0, "23035": 27666763776.0, "23040": 27666763776.0, "23045": 27666763776.0, "23050": 27666763776.0, "23055": 27666763776.0, "23060": 27666763776.0, "23065": 27666763776.0, "23070": 27666763776.0, "23075": 27666763776.0, "23080": 27666763776.0, "23085": 27666763776.0, "23090": 27666763776.0, "23095": 27666763776.0, "23100": 27666763776.0, "23105": 27666763776.0, "23110": 27666763776.0, "23115": 27666763776.0, "23120": 27666763776.0, "23125": 27666763776.0, "23130": 27666763776.0, "23135": 27666763776.0, "23140": 27666763776.0, "23145": 27666763776.0, "23150": 27666763776.0, "23155": 27666763776.0, "23160": 27666763776.0, "23165": 27666763776.0, "23170": 27666763776.0, "23175": 27666763776.0, "23180": 27666763776.0, "23185": 27666763776.0, "23190": 27666763776.0, "23195": 27666763776.0, "23200": 27666763776.0, "23205": 27666763776.0, "23210": 27666763776.0, "23215": 27666763776.0, "23220": 27666763776.0, "23225": 27666763776.0, "23230": 27666763776.0, "23235": 27666763776.0, "23240": 27666763776.0, "23245": 27666763776.0, "23250": 27666763776.0, "23255": 27666763776.0, "23260": 27666763776.0, "23265": 27666763776.0, "23270": 27666763776.0, "23275": 27666763776.0, "23280": 27666763776.0, "23285": 27666763776.0, "23290": 27666763776.0, "23295": 27666763776.0, "23300": 27666763776.0, "23305": 27666763776.0, "23310": 27666763776.0, "23315": 27666763776.0, "23320": 27666763776.0, "23325": 27666763776.0, "23330": 27666763776.0, "23335": 27666763776.0, "23340": 27666763776.0, "23345": 27666763776.0, "23350": 27666763776.0, "23355": 27666763776.0, "23360": 27666763776.0, "23365": 27666763776.0, "23370": 27666763776.0, "23375": 27666763776.0, "23380": 27666763776.0, "23385": 27666763776.0, "23390": 27666763776.0, "23395": 27666763776.0, "23400": 27666763776.0, "23405": 27666763776.0, "23410": 27666763776.0, "23415": 27666763776.0, "23420": 27666763776.0, "23425": 27666763776.0, "23430": 27666763776.0, "23435": 27666763776.0, "23440": 27666763776.0, "23445": 27666763776.0, "23450": 27666763776.0, "23455": 27666763776.0, "23460": 27666763776.0, "23465": 27666763776.0, "23470": 27666763776.0, "23475": 27666763776.0, "23480": 27666763776.0, "23485": 27666763776.0, "23490": 27666763776.0, "23495": 27666763776.0, "23500": 27666763776.0, "23505": 27666763776.0, "23510": 27666763776.0, "23515": 27666763776.0, "23520": 27666763776.0, "23525": 27666763776.0, "23530": 27666763776.0, "23535": 27666763776.0, "23540": 27666763776.0, "23545": 27666763776.0, "23550": 27666763776.0, "23555": 27666763776.0, "23560": 27666763776.0, "23565": 27666763776.0, "23570": 27666763776.0, "23575": 27666763776.0, "23580": 27666763776.0, "23585": 27666763776.0, "23590": 27666763776.0, "23595": 27666763776.0, "23600": 27666763776.0, "23605": 27666763776.0, "23610": 27666763776.0, "23615": 27666763776.0, "23620": 27666763776.0, "23625": 27666763776.0, "23630": 27666763776.0, "23635": 27666763776.0, "23640": 27666763776.0, "23645": 27666763776.0, "23650": 27666763776.0, "23655": 27666763776.0, "23660": 27666763776.0, "23665": 27666763776.0, "23670": 27666763776.0, "23675": 27666763776.0, "23680": 27666763776.0, "23685": 27666763776.0, "23690": 27666763776.0, "23695": 27666763776.0, "23700": 27666763776.0, "23705": 27666763776.0, "23710": 27666763776.0, "23715": 27666763776.0, "23720": 27666763776.0, "23725": 27666763776.0, "23730": 27666763776.0, "23735": 27666763776.0, "23740": 27666763776.0, "23745": 27666763776.0, "23750": 27666763776.0, "23755": 27666763776.0, "23760": 27666763776.0, "23765": 27666763776.0, "23770": 27666763776.0, "23775": 27666763776.0, "23780": 27666763776.0, "23785": 27666763776.0, "23790": 27666763776.0, "23795": 27666763776.0, "23800": 27666763776.0, "23805": 27666763776.0, "23810": 27666763776.0, "23815": 27666763776.0, "23820": 27666763776.0, "23825": 27666763776.0, "23830": 27666763776.0, "23835": 27666763776.0, "23840": 27666763776.0, "23845": 27666763776.0, "23850": 27666763776.0, "23855": 27666763776.0, "23860": 27666763776.0, "23865": 27666763776.0, "23870": 27666763776.0, "23875": 27666763776.0, "23880": 27666763776.0, "23885": 27666763776.0, "23890": 27666763776.0, "23895": 27666763776.0, "23900": 27666763776.0, "23905": 27666763776.0, "23910": 27666763776.0, "23915": 27666763776.0, "23920": 27666763776.0, "23925": 27666763776.0, "23930": 27666763776.0, "23935": 27666763776.0, "23940": 27666763776.0, "23945": 27666763776.0, "23950": 27666763776.0, "23955": 27666763776.0, "23960": 27666763776.0, "23965": 27666763776.0, "23970": 27666763776.0, "23975": 27666763776.0, "23980": 27666763776.0, "23985": 27666763776.0, "23990": 27666763776.0, "23995": 27666763776.0, "24000": 27666763776.0, "24005": 27666763776.0, "24010": 27666763776.0, "24015": 27666763776.0, "24020": 27666763776.0, "24025": 27666763776.0, "24030": 27666763776.0, "24035": 27666763776.0, "24040": 27666763776.0, "24045": 27666763776.0, "24050": 27666763776.0, "24055": 27666763776.0, "24060": 27666763776.0, "24065": 27666763776.0, "24070": 27666763776.0, "24075": 27666763776.0, "24080": 27666763776.0, "24085": 27666763776.0, "24090": 27666763776.0, "24095": 27666763776.0, "24100": 27666763776.0, "24105": 27666763776.0, "24110": 27666763776.0, "24115": 27666763776.0, "24120": 27666763776.0, "24125": 27666763776.0, "24130": 27666763776.0, "24135": 27666763776.0, "24140": 27666763776.0, "24145": 27666763776.0, "24150": 27666763776.0, "24155": 27666763776.0, "24160": 27666763776.0, "24165": 27666763776.0, "24170": 27666763776.0, "24175": 27666763776.0, "24180": 27666763776.0, "24185": 27666763776.0, "24190": 27666763776.0, "24195": 27666763776.0, "24200": 27666763776.0, "24205": 27666763776.0, "24210": 27666763776.0, "24215": 27666763776.0, "24220": 27666763776.0, "24225": 27666763776.0, "24230": 27666763776.0, "24235": 27666763776.0, "24240": 27666763776.0, "24245": 27666763776.0, "24250": 27666763776.0, "24255": 27666763776.0, "24260": 27666763776.0, "24265": 27666763776.0, "24270": 27666763776.0, "24275": 27666763776.0, "24280": 27666763776.0, "24285": 27666763776.0, "24290": 27666763776.0, "24295": 27666763776.0, "24300": 27666763776.0, "24305": 27666763776.0, "24310": 27666763776.0, "24315": 27666763776.0, "24320": 27666763776.0, "24325": 27666763776.0, "24330": 27666763776.0, "24335": 27666763776.0, "24340": 27666763776.0, "24345": 27666763776.0, "24350": 27666763776.0, "24355": 27666763776.0, "24360": 27666763776.0, "24365": 27666763776.0, "24370": 27666763776.0, "24375": 27666763776.0, "24380": 27666763776.0, "24385": 27666763776.0, "24390": 27666763776.0, "24395": 27666763776.0, "24400": 27666763776.0, "24405": 27666763776.0, "24410": 27666763776.0, "24415": 27666763776.0, "24420": 27666763776.0, "24425": 27666763776.0, "24430": 27666763776.0, "24435": 27666763776.0, "24440": 27666763776.0, "24445": 27666763776.0, "24450": 27666763776.0, "24455": 27666763776.0, "24460": 27666763776.0, "24465": 27666763776.0, "24470": 27666763776.0, "24475": 27666763776.0, "24480": 27666763776.0, "24485": 27666763776.0, "24490": 27666763776.0, "24495": 27666763776.0, "24500": 27666763776.0, "24505": 27666763776.0, "24510": 27666763776.0, "24515": 27666763776.0, "24520": 27666763776.0, "24525": 27666763776.0, "24530": 27666763776.0, "24535": 27666763776.0, "24540": 27666763776.0, "24545": 27666763776.0, "24550": 27666763776.0, "24555": 27666763776.0, "24560": 27666763776.0, "24565": 27666763776.0, "24570": 27666763776.0, "24575": 27666763776.0, "24580": 27666763776.0, "24585": 27666763776.0, "24590": 27666763776.0, "24595": 27666763776.0, "24600": 27666763776.0, "24605": 27666763776.0, "24610": 27666763776.0, "24615": 27666763776.0, "24620": 27666763776.0, "24625": 27666763776.0, "24630": 27666763776.0, "24635": 27666763776.0, "24640": 27666763776.0, "24645": 27666763776.0, "24650": 27666763776.0, "24655": 27666763776.0, "24660": 27666763776.0, "24665": 27666763776.0, "24670": 27666763776.0, "24675": 27666763776.0, "24680": 27666763776.0, "24685": 27666763776.0, "24690": 27666763776.0, "24695": 27666763776.0, "24700": 27666763776.0, "24705": 27666763776.0, "24710": 27666763776.0, "24715": 27666763776.0, "24720": 27666763776.0, "24725": 27666763776.0, "24730": 27666763776.0, "24735": 27666763776.0, "24740": 27666763776.0, "24745": 27666763776.0, "24750": 27666763776.0, "24755": 27666763776.0, "24760": 27666763776.0, "24765": 27666763776.0, "24770": 27666763776.0, "24775": 27666763776.0, "24780": 27666763776.0, "24785": 27666763776.0, "24790": 27666763776.0, "24795": 27666763776.0, "24800": 27666763776.0, "24805": 27666763776.0, "24810": 27666763776.0, "24815": 27666763776.0, "24820": 27666763776.0, "24825": 27666763776.0, "24830": 27666763776.0, "24835": 27666763776.0, "24840": 27666763776.0, "24845": 27666763776.0, "24850": 27666763776.0, "24855": 27666763776.0, "24860": 27666763776.0, "24865": 27666763776.0, "24870": 27666763776.0, "24875": 27666763776.0, "24880": 27666763776.0, "24885": 27666763776.0, "24890": 27666763776.0, "24895": 27666763776.0, "24900": 27666763776.0, "24905": 27666763776.0, "24910": 27666763776.0, "24915": 27666763776.0, "24920": 27666763776.0, "24925": 27666763776.0, "24930": 27666763776.0, "24935": 27666763776.0, "24940": 27666763776.0, "24945": 27666763776.0, "24950": 27666763776.0, "24955": 27666763776.0, "24960": 27666763776.0, "24965": 27666763776.0, "24970": 27666763776.0, "24975": 27666763776.0, "24980": 27666763776.0, "24985": 27666763776.0, "24990": 27666763776.0, "24995": 27666763776.0, "25000": 27666763776.0, "25005": 27666763776.0, "25010": 27666763776.0, "25015": 27666763776.0, "25020": 27666763776.0, "25025": 27666763776.0, "25030": 27666763776.0, "25035": 27666763776.0, "25040": 27666763776.0, "25045": 27666763776.0, "25050": 27666763776.0, "25055": 27666763776.0, "25060": 27666763776.0, "25065": 27666763776.0, "25070": 27666763776.0, "25075": 27666763776.0, "25080": 27666763776.0, "25085": 27666763776.0, "25090": 27666763776.0, "25095": 27666763776.0, "25100": 27666763776.0, "25105": 27666763776.0, "25110": 27666763776.0, "25115": 27666763776.0, "25120": 27666763776.0, "25125": 27666763776.0, "25130": 27666763776.0, "25135": 27666763776.0, "25140": 27666763776.0, "25145": 27666763776.0, "25150": 27666763776.0, "25155": 27666763776.0, "25160": 27666763776.0, "25165": 27666763776.0, "25170": 27666763776.0, "25175": 27666763776.0, "25180": 27666763776.0, "25185": 27666763776.0, "25190": 27666763776.0, "25195": 27666763776.0, "25200": 27666763776.0, "25205": 27666763776.0, "25210": 27666763776.0, "25215": 27666763776.0, "25220": 27666763776.0, "25225": 27666763776.0, "25230": 27666763776.0, "25235": 27666763776.0, "25240": 27666763776.0, "25245": 27666763776.0, "25250": 27666763776.0, "25255": 27666763776.0, "25260": 27666763776.0, "25265": 27666763776.0, "25270": 27666763776.0, "25275": 27666763776.0, "25280": 27666763776.0, "25285": 27666763776.0, "25290": 27666763776.0, "25295": 27666763776.0, "25300": 27666763776.0, "25305": 27666763776.0, "25310": 27666763776.0, "25315": 27666763776.0, "25320": 27666763776.0, "25325": 27666763776.0, "25330": 27666763776.0, "25335": 27666763776.0, "25340": 27666763776.0, "25345": 27666763776.0, "25350": 27666763776.0, "25355": 27666763776.0, "25360": 27666763776.0, "25365": 27666763776.0, "25370": 27666763776.0, "25375": 27666763776.0, "25380": 27666763776.0, "25385": 27666763776.0, "25390": 27666763776.0, "25395": 27666763776.0, "25400": 27666763776.0, "25405": 27666763776.0, "25410": 27666763776.0, "25415": 27666763776.0, "25420": 27666763776.0, "25425": 27666763776.0, "25430": 27666763776.0, "25435": 27666763776.0, "25440": 27666763776.0, "25445": 27666763776.0, "25450": 27666763776.0, "25455": 27666763776.0, "25460": 27666763776.0, "25465": 27666763776.0, "25470": 27666763776.0, "25475": 27666763776.0, "25480": 27666763776.0, "25485": 27666763776.0, "25490": 27666763776.0, "25495": 27666763776.0, "25500": 27666763776.0, "25505": 27666763776.0, "25510": 27666763776.0, "25515": 27666763776.0, "25520": 27666763776.0, "25525": 27666763776.0, "25530": 27666763776.0, "25535": 27666763776.0, "25540": 27666763776.0, "25545": 27666763776.0, "25550": 27666763776.0, "25555": 27666763776.0, "25560": 27666763776.0, "25565": 27666763776.0, "25570": 27666763776.0, "25575": 27666763776.0, "25580": 27666763776.0, "25585": 27666763776.0, "25590": 27666763776.0, "25595": 27666763776.0, "25600": 27666763776.0, "25605": 27666763776.0, "25610": 27666763776.0, "25615": 27666763776.0, "25620": 27666763776.0, "25625": 27666763776.0, "25630": 27666763776.0, "25635": 27666763776.0, "25640": 27666763776.0, "25645": 27666763776.0, "25650": 27666763776.0, "25655": 27666763776.0, "25660": 27666763776.0, "25665": 27666763776.0, "25670": 27666763776.0, "25675": 27666763776.0, "25680": 27666763776.0, "25685": 27666763776.0, "25690": 27666763776.0, "25695": 27666763776.0, "25700": 27666763776.0, "25705": 27666763776.0, "25710": 27666763776.0, "25715": 27666763776.0, "25720": 27666763776.0, "25725": 27666763776.0, "25730": 27666763776.0, "25735": 27666763776.0, "25740": 27666763776.0, "25745": 27666763776.0, "25750": 27666763776.0, "25755": 27666763776.0, "25760": 27666763776.0, "25765": 27666763776.0, "25770": 27666763776.0, "25775": 27666763776.0, "25780": 27666763776.0, "25785": 27666763776.0, "25790": 27666763776.0, "25795": 27666763776.0, "25800": 27666763776.0, "25805": 27666763776.0, "25810": 27666763776.0, "25815": 27666763776.0, "25820": 27666763776.0, "25825": 27666763776.0, "25830": 27666763776.0, "25835": 27666763776.0, "25840": 27666763776.0, "25845": 27666763776.0, "25850": 27666763776.0, "25855": 27666763776.0, "25860": 27666763776.0, "25865": 27666763776.0, "25870": 27666763776.0, "25875": 27666763776.0, "25880": 27666763776.0, "25885": 27666763776.0, "25890": 27666763776.0, "25895": 27666763776.0, "25900": 27666763776.0, "25905": 27666763776.0, "25910": 27666763776.0, "25915": 27666763776.0, "25920": 27666763776.0, "25925": 27666763776.0, "25930": 27666763776.0, "25935": 27666763776.0, "25940": 27666763776.0, "25945": 27666763776.0, "25950": 27666763776.0, "25955": 27666763776.0, "25960": 27666763776.0, "25965": 27666763776.0, "25970": 27666763776.0, "25975": 27666763776.0, "25980": 27666763776.0, "25985": 27666763776.0, "25990": 27666763776.0, "25995": 27666763776.0, "26000": 27666763776.0, "26005": 27666763776.0, "26010": 27666763776.0, "26015": 27666763776.0, "26020": 27666763776.0, "26025": 27666763776.0, "26030": 27666763776.0, "26035": 27666763776.0, "26040": 27666763776.0, "26045": 27666763776.0, "26050": 27666763776.0, "26055": 27666763776.0, "26060": 27666763776.0, "26065": 27666763776.0, "26070": 27666763776.0, "26075": 27666763776.0, "26080": 27666763776.0, "26085": 27666763776.0, "26090": 27666763776.0, "26095": 27666763776.0, "26100": 27666763776.0, "26105": 27666763776.0, "26110": 27666763776.0, "26115": 27666763776.0, "26120": 27666763776.0, "26125": 27666763776.0, "26130": 27666763776.0, "26135": 27666763776.0, "26140": 27666763776.0, "26145": 27666763776.0, "26150": 27666763776.0, "26155": 27666763776.0, "26160": 27666763776.0, "26165": 27666763776.0, "26170": 27666763776.0, "26175": 27666763776.0, "26180": 27666763776.0, "26185": 27666763776.0, "26190": 27666763776.0, "26195": 27666763776.0, "26200": 27666763776.0, "26205": 27666763776.0, "26210": 27666763776.0, "26215": 27666763776.0, "26220": 27666763776.0, "26225": 27666763776.0, "26230": 27666763776.0, "26235": 27666763776.0, "26240": 27666763776.0, "26245": 27666763776.0, "26250": 27666763776.0, "26255": 27666763776.0, "26260": 27666763776.0, "26265": 27666763776.0, "26270": 27666763776.0, "26275": 27666763776.0, "26280": 27666763776.0, "26285": 27666763776.0, "26290": 27666763776.0, "26295": 27666763776.0, "26300": 27666763776.0, "26305": 27666763776.0, "26310": 27666763776.0, "26315": 27666763776.0, "26320": 27666763776.0, "26325": 27666763776.0, "26330": 27666763776.0, "26335": 27666763776.0, "26340": 27666763776.0, "26345": 27666763776.0, "26350": 27666763776.0, "26355": 27666763776.0, "26360": 27666763776.0, "26365": 27666763776.0, "26370": 27666763776.0, "26375": 27666763776.0, "26380": 27666763776.0, "26385": 27666763776.0, "26390": 27666763776.0, "26395": 27666763776.0, "26400": 27666763776.0, "26405": 27666763776.0, "26410": 27666763776.0, "26415": 27666763776.0, "26420": 27666763776.0, "26425": 27666763776.0, "26430": 27666763776.0, "26435": 27666763776.0, "26440": 27666763776.0, "26445": 27666763776.0, "26450": 27666763776.0, "26455": 27666763776.0, "26460": 27666763776.0, "26465": 27666763776.0, "26470": 27666763776.0, "26475": 27666763776.0, "26480": 27666763776.0, "26485": 27666763776.0, "26490": 27666763776.0, "26495": 27666763776.0, "26500": 27666763776.0, "26505": 27666763776.0, "26510": 27666763776.0, "26515": 27666763776.0, "26520": 27666763776.0, "26525": 27666763776.0, "26530": 27666763776.0, "26535": 27666763776.0, "26540": 27666763776.0, "26545": 27666763776.0, "26550": 27666763776.0, "26555": 27666763776.0, "26560": 27666763776.0, "26565": 27666763776.0, "26570": 27666763776.0, "26575": 27666763776.0, "26580": 27666763776.0, "26585": 27666763776.0, "26590": 27666763776.0, "26595": 27666763776.0, "26600": 27666763776.0, "26605": 27666763776.0, "26610": 27666763776.0, "26615": 27666763776.0, "26620": 27666763776.0, "26625": 27666763776.0, "26630": 27666763776.0, "26635": 27666763776.0, "26640": 27666763776.0, "26645": 27666763776.0, "26650": 27666763776.0, "26655": 27666763776.0, "26660": 27666763776.0, "26665": 27666763776.0, "26670": 27666763776.0, "26675": 27666763776.0, "26680": 27666763776.0, "26685": 27666763776.0, "26690": 27666763776.0, "26695": 27666763776.0, "26700": 27666763776.0, "26705": 27666763776.0, "26710": 27666763776.0, "26715": 27666763776.0, "26720": 27666763776.0, "26725": 27666763776.0, "26730": 27666763776.0, "26735": 27666763776.0, "26740": 27666763776.0, "26745": 27666763776.0, "26750": 27666763776.0, "26755": 27666763776.0, "26760": 27666763776.0, "26765": 27666763776.0, "26770": 27666763776.0, "26775": 27666763776.0, "26780": 27666763776.0, "26785": 27666763776.0, "26790": 27666763776.0, "26795": 27666763776.0, "26800": 27666763776.0, "26805": 27666763776.0, "26810": 27666763776.0, "26815": 27666763776.0, "26820": 27666763776.0, "26825": 27666763776.0, "26830": 27666763776.0, "26835": 27666763776.0, "26840": 27666763776.0, "26845": 27666763776.0, "26850": 27666763776.0, "26855": 27666763776.0, "26860": 27666763776.0, "26865": 27666763776.0, "26870": 27666763776.0, "26875": 27666763776.0, "26880": 27666763776.0, "26885": 27666763776.0, "26890": 27666763776.0, "26895": 27666763776.0, "26900": 27666763776.0, "26905": 27666763776.0, "26910": 27666763776.0, "26915": 27666763776.0, "26920": 27666763776.0, "26925": 27666763776.0, "26930": 27666763776.0, "26935": 27666763776.0, "26940": 27666171904.0, "26945": 27666171904.0, "26950": 27666763776.0, "26955": 27666763776.0, "26960": 27666763776.0, "26965": 27666763776.0, "26970": 27666763776.0, "26975": 27666763776.0, "26980": 27666763776.0, "26985": 27666763776.0, "26990": 27666763776.0, "26995": 27666763776.0, "27000": 27666763776.0, "27005": 27666763776.0, "27010": 27666763776.0, "27015": 27666763776.0, "27020": 27666763776.0, "27025": 27666763776.0, "27030": 27666763776.0, "27035": 27666763776.0, "27040": 27666763776.0, "27045": 27666763776.0, "27050": 27666763776.0, "27055": 27666763776.0, "27060": 27666763776.0, "27065": 27666763776.0, "27070": 27666763776.0, "27075": 27666763776.0, "27080": 27666763776.0, "27085": 27666763776.0, "27090": 27666763776.0, "27095": 27666763776.0, "27100": 27666763776.0, "27105": 27666763776.0, "27110": 27666763776.0, "27115": 27666763776.0, "27120": 27666763776.0, "27125": 27666763776.0, "27130": 27666763776.0, "27135": 27666763776.0, "27140": 27666763776.0, "27145": 27666763776.0, "27150": 27666763776.0, "27155": 27666763776.0, "27160": 27666763776.0, "27165": 27666763776.0, "27170": 27666763776.0, "27175": 27666763776.0, "27180": 27666763776.0, "27185": 27666763776.0, "27190": 27666763776.0, "27195": 27666763776.0, "27200": 27666763776.0, "27205": 27666763776.0, "27210": 27666763776.0, "27215": 27666763776.0, "27220": 27666763776.0, "27225": 27666763776.0, "27230": 27666763776.0, "27235": 27666763776.0, "27240": 27666763776.0, "27245": 27666763776.0, "27250": 27666763776.0, "27255": 27666763776.0, "27260": 27666763776.0, "27265": 27666763776.0, "27270": 27666763776.0, "27275": 27666763776.0, "27280": 27666763776.0, "27285": 27666763776.0, "27290": 27666763776.0, "27295": 27666763776.0, "27300": 27666763776.0, "27305": 27666763776.0, "27310": 27666763776.0, "27315": 27666763776.0, "27320": 27666763776.0, "27325": 27666763776.0, "27330": 27666763776.0, "27335": 27666763776.0, "27340": 27666763776.0, "27345": 27666763776.0, "27350": 27666763776.0, "27355": 27666763776.0, "27360": 27666763776.0, "27365": 27666763776.0, "27370": 27666763776.0, "27375": 27666763776.0, "27380": 27666763776.0, "27385": 27666763776.0, "27390": 27666763776.0, "27395": 27666763776.0, "27400": 27666763776.0, "27405": 27666763776.0, "27410": 27666763776.0, "27415": 27666763776.0, "27420": 27666763776.0, "27425": 27666763776.0, "27430": 27666763776.0, "27435": 27666763776.0, "27440": 27666763776.0, "27445": 27666763776.0, "27450": 27666763776.0, "27455": 27666763776.0, "27460": 27666763776.0, "27465": 27666763776.0, "27470": 27666763776.0, "27475": 27666763776.0, "27480": 27666763776.0, "27485": 27666763776.0, "27490": 27666763776.0, "27495": 27666763776.0, "27500": 27666763776.0, "27505": 27666763776.0, "27510": 27666763776.0, "27515": 27666763776.0, "27520": 27666763776.0, "27525": 27666763776.0, "27530": 27666763776.0, "27535": 27666763776.0, "27540": 27666763776.0, "27545": 27666763776.0, "27550": 27666763776.0, "27555": 27666763776.0, "27560": 27666763776.0, "27565": 27666763776.0, "27570": 27666763776.0, "27575": 27666763776.0, "27580": 27666763776.0, "27585": 27666763776.0, "27590": 27666763776.0, "27595": 27666763776.0, "27600": 27666763776.0, "27605": 27666763776.0, "27610": 27666763776.0, "27615": 27666763776.0, "27620": 27666763776.0, "27625": 27666763776.0, "27630": 27666763776.0, "27635": 27666763776.0, "27640": 27666763776.0, "27645": 27666763776.0, "27650": 27666763776.0, "27655": 27666763776.0, "27660": 27666763776.0, "27665": 27666763776.0, "27670": 27666763776.0, "27675": 27666763776.0, "27680": 27666763776.0, "27685": 27666763776.0, "27690": 27666763776.0, "27695": 27666763776.0, "27700": 27666763776.0, "27705": 27666763776.0, "27710": 27666763776.0, "27715": 27666763776.0, "27720": 27666763776.0, "27725": 27666763776.0, "27730": 27666763776.0, "27735": 27666763776.0, "27740": 27666763776.0, "27745": 27666763776.0, "27750": 27666763776.0, "27755": 27666763776.0, "27760": 27666763776.0, "27765": 27666763776.0, "27770": 27666763776.0, "27775": 27666763776.0, "27780": 27666763776.0, "27785": 27666763776.0, "27790": 27666763776.0, "27795": 27666763776.0, "27800": 27666763776.0, "27805": 27666763776.0, "27810": 27666763776.0, "27815": 27666763776.0, "27820": 27666763776.0, "27825": 27666763776.0, "27830": 27666763776.0, "27835": 27666763776.0, "27840": 27666763776.0, "27845": 27666763776.0, "27850": 27666763776.0, "27855": 27666763776.0, "27860": 27666763776.0, "27865": 27666763776.0, "27870": 27666763776.0, "27875": 27666763776.0, "27880": 27666763776.0, "27885": 27666763776.0, "27890": 27666763776.0, "27895": 27666763776.0, "27900": 27666763776.0, "27905": 27666763776.0, "27910": 27666763776.0, "27915": 27666763776.0, "27920": 27666763776.0, "27925": 27666763776.0, "27930": 27666763776.0, "27935": 27666763776.0, "27940": 27666763776.0, "27945": 27666763776.0, "27950": 27666763776.0, "27955": 27666763776.0, "27960": 27666763776.0, "27965": 27666763776.0, "27970": 27666763776.0, "27975": 27666763776.0, "27980": 27666763776.0, "27985": 27666763776.0, "27990": 27666763776.0, "27995": 27666763776.0, "28000": 27666763776.0, "28005": 27666763776.0, "28010": 27666763776.0, "28015": 27666763776.0, "28020": 27666763776.0, "28025": 27666763776.0, "28030": 27666763776.0, "28035": 27666763776.0, "28040": 27666763776.0, "28045": 27666763776.0, "28050": 27666763776.0, "28055": 27666763776.0, "28060": 27666763776.0, "28065": 27666763776.0, "28070": 27666763776.0, "28075": 27666763776.0, "28080": 27666763776.0, "28085": 27666763776.0, "28090": 27666763776.0, "28095": 27666763776.0, "28100": 27666763776.0, "28105": 27666763776.0, "28110": 27666763776.0, "28115": 27666763776.0, "28120": 27666763776.0, "28125": 27666763776.0, "28130": 27666763776.0, "28135": 27666763776.0, "28140": 27666763776.0, "28145": 27666763776.0, "28150": 27666763776.0, "28155": 27666763776.0, "28160": 27666763776.0, "28165": 27666763776.0, "28170": 27666763776.0, "28175": 27666763776.0, "28180": 27666763776.0, "28185": 27666763776.0, "28190": 27666763776.0, "28195": 27666763776.0, "28200": 27666763776.0, "28205": 27666763776.0, "28210": 27666763776.0, "28215": 27666763776.0, "28220": 27666763776.0, "28225": 27666763776.0, "28230": 27666763776.0, "28235": 27666763776.0, "28240": 27666763776.0, "28245": 27666763776.0, "28250": 27666763776.0, "28255": 27666763776.0, "28260": 27666763776.0, "28265": 27666763776.0, "28270": 27666763776.0, "28275": 27666763776.0, "28280": 27666763776.0, "28285": 27666763776.0, "28290": 27666763776.0, "28295": 27666763776.0, "28300": 27666763776.0, "28305": 27666763776.0, "28310": 27666763776.0, "28315": 27666763776.0, "28320": 27666763776.0, "28325": 27666763776.0, "28330": 27666763776.0, "28335": 27666763776.0, "28340": 27666763776.0, "28345": 27666763776.0, "28350": 27666763776.0, "28355": 27666763776.0, "28360": 27666763776.0, "28365": 27666763776.0, "28370": 27666763776.0, "28375": 27666763776.0, "28380": 27666763776.0, "28385": 27666763776.0, "28390": 27666763776.0, "28395": 27666763776.0, "28400": 27666763776.0, "28405": 27666763776.0, "28410": 27666763776.0, "28415": 27666763776.0, "28420": 27666763776.0, "28425": 27666763776.0, "28430": 27666763776.0, "28435": 27666763776.0, "28440": 27666763776.0, "28445": 27666763776.0, "28450": 27666763776.0, "28455": 27666763776.0, "28460": 27666763776.0, "28465": 27666763776.0, "28470": 27666763776.0, "28475": 27666763776.0, "28480": 27666763776.0, "28485": 27666763776.0, "28490": 27666763776.0, "28495": 27666763776.0, "28500": 27666763776.0, "28505": 27666763776.0, "28510": 27666763776.0, "28515": 27666763776.0, "28520": 27666763776.0, "28525": 27666763776.0, "28530": 27666763776.0, "28535": 27666763776.0, "28540": 27666763776.0, "28545": 27666763776.0, "28550": 27666763776.0, "28555": 27666763776.0, "28560": 27666763776.0, "28565": 27666763776.0, "28570": 27666763776.0, "28575": 27666763776.0, "28580": 27666763776.0, "28585": 27666763776.0, "28590": 27666763776.0, "28595": 27666763776.0, "28600": 27666763776.0, "28605": 27666763776.0, "28610": 27666763776.0, "28615": 27666763776.0, "28620": 27666763776.0, "28625": 27666763776.0, "28630": 27666763776.0, "28635": 27666763776.0, "28640": 27666763776.0, "28645": 27666763776.0, "28650": 27666763776.0, "28655": 27666763776.0, "28660": 27666763776.0, "28665": 27666763776.0, "28670": 27666763776.0, "28675": 27666763776.0, "28680": 27666763776.0, "28685": 27666763776.0, "28690": 27666763776.0, "28695": 27666763776.0, "28700": 27666763776.0, "28705": 27666763776.0, "28710": 27666763776.0, "28715": 27666763776.0, "28720": 27666763776.0, "28725": 27666763776.0, "28730": 27666763776.0, "28735": 27666763776.0, "28740": 27666763776.0, "28745": 27666763776.0, "28750": 27666763776.0, "28755": 27666763776.0, "28760": 27666763776.0, "28765": 27666763776.0, "28770": 27666763776.0, "28775": 27666763776.0, "28780": 27666763776.0, "28785": 27666763776.0, "28790": 27666763776.0, "28795": 27666763776.0, "28800": 27666763776.0, "28805": 27666763776.0, "28810": 27666763776.0, "28815": 27666763776.0, "28820": 27666763776.0, "28825": 27666763776.0, "28830": 27666763776.0, "28835": 27666763776.0, "28840": 27666763776.0, "28845": 27666763776.0, "28850": 27666763776.0, "28855": 27666763776.0, "28860": 27666763776.0, "28865": 27666763776.0, "28870": 27666763776.0, "28875": 27666763776.0, "28880": 27666763776.0, "28885": 27666763776.0, "28890": 27666763776.0, "28895": 27666763776.0, "28900": 27666763776.0, "28905": 27666763776.0, "28910": 27666763776.0, "28915": 27666763776.0, "28920": 27666763776.0, "28925": 27666763776.0, "28930": 27666763776.0, "28935": 27666763776.0, "28940": 27666763776.0, "28945": 27666763776.0, "28950": 27666763776.0, "28955": 27666763776.0, "28960": 27666763776.0, "28965": 27666763776.0, "28970": 27666763776.0, "28975": 27666763776.0, "28980": 27666763776.0, "28985": 27666763776.0, "28990": 27666763776.0, "28995": 27666763776.0, "29000": 27666763776.0, "29005": 27666763776.0, "29010": 27666763776.0, "29015": 27666763776.0, "29020": 27666763776.0, "29025": 27666763776.0, "29030": 27666763776.0, "29035": 27666763776.0, "29040": 27666763776.0, "29045": 27666763776.0, "29050": 27666763776.0, "29055": 27666763776.0, "29060": 27666763776.0, "29065": 27666763776.0, "29070": 27666763776.0, "29075": 27666763776.0, "29080": 27666763776.0, "29085": 27666763776.0, "29090": 27666763776.0, "29095": 27666763776.0, "29100": 27666763776.0, "29105": 27666763776.0, "29110": 27666763776.0, "29115": 27666763776.0, "29120": 27666763776.0, "29125": 27666763776.0, "29130": 27666763776.0, "29135": 27666763776.0, "29140": 27666763776.0, "29145": 27666763776.0, "29150": 27666763776.0, "29155": 27666763776.0, "29160": 27666763776.0, "29165": 27666763776.0, "29170": 27666763776.0, "29175": 27666763776.0, "29180": 27666763776.0, "29185": 27666763776.0, "29190": 27666763776.0, "29195": 27666763776.0, "29200": 27666763776.0, "29205": 27666763776.0, "29210": 27666763776.0, "29215": 27666763776.0, "29220": 27666763776.0, "29225": 27666763776.0, "29230": 27666763776.0, "29235": 27666763776.0, "29240": 27666763776.0, "29245": 27666763776.0, "29250": 27666763776.0, "29255": 27666763776.0, "29260": 27666763776.0, "29265": 27666763776.0, "29270": 27666763776.0, "29275": 27666763776.0, "29280": 27666763776.0, "29285": 27666763776.0, "29290": 27666763776.0, "29295": 27666763776.0, "29300": 27666763776.0, "29305": 27666763776.0, "29310": 27666763776.0, "29315": 27666763776.0, "29320": 27666763776.0, "29325": 27666763776.0, "29330": 27666763776.0, "29335": 27666763776.0, "29340": 27666763776.0, "29345": 27666763776.0, "29350": 27666763776.0, "29355": 27666763776.0, "29360": 27666763776.0, "29365": 27666763776.0, "29370": 27666763776.0, "29375": 27666763776.0, "29380": 27666763776.0, "29385": 27666763776.0, "29390": 27666763776.0, "29395": 27666763776.0, "29400": 27666763776.0, "29405": 27666763776.0, "29410": 27666763776.0, "29415": 27666763776.0, "29420": 27666763776.0, "29425": 27666763776.0, "29430": 27666763776.0, "29435": 27666763776.0, "29440": 27666763776.0, "29445": 27666763776.0, "29450": 27666763776.0, "29455": 27666763776.0, "29460": 27666763776.0, "29465": 27666763776.0, "29470": 27666763776.0, "29475": 27666763776.0, "29480": 27666763776.0, "29485": 27666763776.0, "29490": 27666763776.0, "29495": 27666763776.0, "29500": 27666763776.0, "29505": 27666763776.0, "29510": 27666763776.0, "29515": 27666763776.0, "29520": 27666763776.0, "29525": 27666763776.0, "29530": 27666763776.0, "29535": 27666763776.0, "29540": 27666763776.0, "29545": 27666763776.0, "29550": 27666763776.0, "29555": 27666763776.0, "29560": 27666763776.0, "29565": 27666763776.0, "29570": 27666763776.0, "29575": 27666763776.0, "29580": 27666763776.0, "29585": 27666763776.0, "29590": 27666763776.0, "29595": 27666763776.0, "29600": 27666763776.0, "29605": 27666763776.0, "29610": 27666763776.0, "29615": 27666763776.0, "29620": 27666763776.0, "29625": 27666763776.0, "29630": 27666763776.0, "29635": 27666763776.0, "29640": 27666763776.0, "29645": 27666763776.0, "29650": 27666763776.0, "29655": 27666763776.0, "29660": 27666763776.0, "29665": 27666763776.0, "29670": 27666763776.0, "29675": 27666763776.0, "29680": 27666763776.0, "29685": 27666763776.0, "29690": 27666763776.0, "29695": 27666763776.0, "29700": 27666763776.0, "29705": 27666763776.0, "29710": 27666763776.0, "29715": 27666763776.0, "29720": 27666763776.0, "29725": 27666763776.0, "29730": 27666763776.0, "29735": 27666763776.0, "29740": 27666763776.0, "29745": 27666763776.0, "29750": 27666763776.0, "29755": 27666763776.0, "29760": 27666763776.0, "29765": 27666763776.0, "29770": 27666763776.0, "29775": 27666763776.0, "29780": 27666763776.0, "29785": 27666763776.0, "29790": 27666763776.0, "29795": 27666763776.0, "29800": 27666763776.0, "29805": 27666763776.0, "29810": 27666763776.0, "29815": 27666763776.0, "29820": 27666763776.0, "29825": 27666763776.0, "29830": 27666763776.0, "29835": 27666763776.0, "29840": 27666763776.0, "29845": 27666763776.0, "29850": 27666763776.0, "29855": 27666763776.0, "29860": 27666763776.0, "29865": 27666763776.0, "29870": 27666763776.0, "29875": 27666763776.0, "29880": 27666763776.0, "29885": 27666763776.0, "29890": 27666763776.0, "29895": 27666763776.0, "29900": 27666763776.0, "29905": 27666763776.0, "29910": 27666763776.0, "29915": 27666763776.0, "29920": 27666763776.0, "29925": 27666763776.0, "29930": 27666763776.0, "29935": 27666763776.0, "29940": 27666763776.0, "29945": 27666763776.0, "29950": 27666763776.0, "29955": 27666763776.0, "29960": 27666763776.0, "29965": 27666763776.0, "29970": 27666763776.0, "29975": 27666763776.0, "29980": 27666763776.0, "29985": 27666763776.0, "29990": 27666763776.0, "29995": 27666763776.0, "30000": 27666763776.0, "30005": 27666763776.0, "30010": 27666763776.0, "30015": 27666763776.0, "30020": 27666763776.0, "30025": 27666763776.0, "30030": 27666763776.0, "30035": 27666763776.0, "30040": 27666763776.0, "30045": 27666763776.0, "30050": 27666763776.0, "30055": 27666763776.0, "30060": 27666763776.0, "30065": 27666763776.0, "30070": 27666763776.0, "30075": 27666763776.0, "30080": 27666763776.0, "30085": 27666763776.0, "30090": 27666763776.0, "30095": 27666763776.0, "30100": 27666763776.0, "30105": 27666763776.0, "30110": 27666763776.0, "30115": 27666763776.0, "30120": 27666763776.0, "30125": 27666763776.0, "30130": 27666763776.0, "30135": 27666763776.0, "30140": 27666763776.0, "30145": 27666763776.0, "30150": 27666763776.0, "30155": 27666763776.0, "30160": 27666763776.0, "30165": 27666763776.0, "30170": 27666763776.0, "30175": 27666763776.0, "30180": 27666763776.0, "30185": 27666763776.0, "30190": 27666763776.0, "30195": 27666763776.0, "30200": 27666763776.0, "30205": 27666763776.0, "30210": 27666763776.0, "30215": 27666763776.0, "30220": 27666763776.0, "30225": 27666763776.0, "30230": 27666763776.0, "30235": 27666763776.0, "30240": 27666763776.0, "30245": 27666763776.0, "30250": 27666763776.0, "30255": 27666763776.0, "30260": 27666763776.0, "30265": 27666763776.0, "30270": 27666763776.0, "30275": 27666763776.0, "30280": 27666763776.0, "30285": 27666763776.0, "30290": 27666763776.0, "30295": 27666763776.0, "30300": 27666763776.0, "30305": 27666763776.0, "30310": 27666763776.0, "30315": 27666763776.0, "30320": 27666763776.0, "30325": 27666763776.0, "30330": 27666763776.0, "30335": 27666763776.0, "30340": 27666763776.0, "30345": 27666763776.0, "30350": 27666763776.0, "30355": 27666763776.0, "30360": 27666763776.0, "30365": 27666763776.0, "30370": 27666763776.0, "30375": 27666763776.0, "30380": 27666763776.0, "30385": 27666763776.0, "30390": 27666763776.0, "30395": 27666763776.0, "30400": 27666763776.0, "30405": 27666763776.0, "30410": 27666763776.0, "30415": 27666763776.0, "30420": 27666763776.0, "30425": 27666763776.0, "30430": 27666763776.0, "30435": 27666763776.0, "30440": 27666763776.0, "30445": 27666763776.0, "30450": 27666763776.0, "30455": 27666763776.0, "30460": 27666763776.0, "30465": 27666763776.0, "30470": 27666763776.0, "30475": 27666763776.0, "30480": 27666763776.0, "30485": 27666763776.0, "30490": 27666763776.0, "30495": 27666763776.0, "30500": 27666763776.0, "30505": 27666763776.0, "30510": 27666763776.0, "30515": 27666763776.0, "30520": 27666763776.0, "30525": 27666763776.0, "30530": 27666763776.0, "30535": 27666763776.0, "30540": 27666763776.0, "30545": 27666763776.0, "30550": 27666763776.0, "30555": 27666763776.0, "30560": 27666763776.0, "30565": 27666763776.0, "30570": 27666763776.0, "30575": 27666763776.0, "30580": 27666763776.0, "30585": 27666763776.0, "30590": 27666763776.0, "30595": 27666763776.0, "30600": 27666763776.0, "30605": 27666763776.0, "30610": 27666763776.0, "30615": 27666763776.0, "30620": 27666763776.0, "30625": 27666763776.0, "30630": 27666763776.0, "30635": 27666763776.0, "30640": 27666763776.0, "30645": 27666763776.0, "30650": 27666763776.0, "30655": 27666763776.0, "30660": 27666763776.0, "30665": 27666763776.0, "30670": 27666763776.0, "30675": 27666763776.0, "30680": 27666763776.0, "30685": 27666763776.0, "30690": 27666763776.0, "30695": 27666763776.0, "30700": 27666763776.0, "30705": 27666763776.0, "30710": 27666763776.0, "30715": 27666763776.0, "30720": 27666763776.0, "30725": 27666763776.0, "30730": 27666763776.0, "30735": 27666763776.0, "30740": 27666763776.0, "30745": 27666763776.0, "30750": 27666763776.0, "30755": 27666763776.0, "30760": 27666763776.0, "30765": 27666763776.0, "30770": 27666763776.0, "30775": 27666763776.0, "30780": 27666763776.0, "30785": 27666763776.0, "30790": 27666763776.0, "30795": 27666763776.0, "30800": 27666763776.0, "30805": 27666763776.0, "30810": 27666763776.0, "30815": 27666763776.0, "30820": 27666763776.0, "30825": 27666763776.0, "30830": 27666763776.0, "30835": 27666763776.0, "30840": 27666763776.0, "30845": 27666763776.0, "30850": 27666763776.0, "30855": 27666763776.0, "30860": 27666763776.0, "30865": 27666763776.0, "30870": 27666763776.0, "30875": 27666763776.0, "30880": 27666763776.0, "30885": 27666763776.0, "30890": 27666763776.0, "30895": 27666763776.0, "30900": 27666763776.0, "30905": 27666763776.0, "30910": 27666763776.0, "30915": 27666763776.0, "30920": 27666763776.0, "30925": 27666763776.0, "30930": 27666763776.0, "30935": 27666763776.0, "30940": 27666763776.0, "30945": 27666763776.0, "30950": 27666763776.0, "30955": 27666763776.0, "30960": 27666763776.0, "30965": 27666763776.0, "30970": 27666763776.0, "30975": 27666763776.0, "30980": 27666763776.0, "30985": 27666763776.0, "30990": 27666763776.0, "30995": 27666763776.0, "31000": 27666763776.0, "31005": 27666763776.0, "31010": 27666763776.0, "31015": 27666763776.0, "31020": 27666763776.0, "31025": 27666763776.0, "31030": 27666763776.0, "31035": 27666763776.0, "31040": 27666763776.0, "31045": 27666763776.0, "31050": 27666763776.0, "31055": 27666763776.0, "31060": 27666763776.0, "31065": 27666763776.0, "31070": 27666763776.0, "31075": 27666763776.0, "31080": 27666763776.0, "31085": 27666763776.0, "31090": 27666763776.0, "31095": 27666763776.0, "31100": 27666763776.0, "31105": 27666763776.0, "31110": 27666763776.0, "31115": 27666763776.0, "31120": 27666763776.0, "31125": 27666763776.0, "31130": 27666763776.0, "31135": 27666763776.0, "31140": 27666763776.0, "31145": 27666763776.0, "31150": 27666763776.0, "31155": 27666763776.0, "31160": 27666763776.0, "31165": 27666763776.0, "31170": 27666763776.0, "31175": 27666763776.0, "31180": 27666763776.0, "31185": 27666763776.0, "31190": 27666763776.0, "31195": 27666763776.0, "31200": 27666763776.0, "31205": 27666763776.0, "31210": 27666763776.0, "31215": 27666763776.0, "31220": 27666763776.0, "31225": 27666763776.0, "31230": 27666763776.0, "31235": 27666763776.0, "31240": 27666763776.0, "31245": 27666763776.0, "31250": 27666763776.0, "31255": 27666763776.0, "31260": 27666763776.0, "31265": 27666763776.0, "31270": 27666763776.0, "31275": 27666763776.0, "31280": 27666763776.0, "31285": 27666763776.0, "31290": 27666763776.0, "31295": 27666763776.0, "31300": 27666763776.0, "31305": 27666763776.0, "31310": 27666763776.0, "31315": 27666763776.0, "31320": 27666763776.0, "31325": 27666763776.0, "31330": 27666763776.0, "31335": 27666763776.0, "31340": 27666763776.0, "31345": 27666763776.0, "31350": 27666763776.0, "31355": 27666763776.0, "31360": 27666763776.0, "31365": 27666763776.0, "31370": 27666763776.0, "31375": 27666763776.0, "31380": 27666763776.0, "31385": 27666763776.0, "31390": 27666763776.0, "31395": 27666763776.0, "31400": 27666763776.0, "31405": 27666763776.0, "31410": 27666763776.0, "31415": 27666763776.0, "31420": 27666763776.0, "31425": 27666763776.0, "31430": 27666763776.0, "31435": 27666763776.0, "31440": 27666763776.0, "31445": 27666763776.0, "31450": 27666763776.0, "31455": 27666763776.0, "31460": 27666763776.0, "31465": 27666763776.0, "31470": 27666763776.0, "31475": 27666763776.0, "31480": 27666763776.0, "31485": 27666763776.0, "31490": 27666763776.0, "31495": 27666763776.0, "31500": 27666763776.0, "31505": 27666763776.0, "31510": 27666763776.0, "31515": 27666763776.0, "31520": 27666763776.0, "31525": 27666763776.0, "31530": 27666763776.0, "31535": 27666763776.0, "31540": 27666763776.0, "31545": 27666763776.0, "31550": 27666763776.0, "31555": 27666763776.0, "31560": 27666763776.0, "31565": 27666763776.0, "31570": 27666763776.0, "31575": 27666763776.0, "31580": 27666763776.0, "31585": 27666763776.0, "31590": 27666763776.0, "31595": 27666763776.0, "31600": 27666763776.0, "31605": 27666763776.0, "31610": 27666763776.0, "31615": 27666763776.0, "31620": 27666763776.0, "31625": 27666763776.0, "31630": 27666763776.0, "31635": 27666763776.0, "31640": 27666763776.0, "31645": 27666763776.0, "31650": 27666763776.0, "31655": 27666763776.0, "31660": 27666763776.0, "31665": 27666763776.0, "31670": 27666763776.0, "31675": 27666763776.0, "31680": 27666763776.0, "31685": 27666763776.0, "31690": 27666763776.0, "31695": 27666763776.0, "31700": 27666763776.0, "31705": 27666763776.0, "31710": 27666763776.0, "31715": 27666763776.0, "31720": 27666763776.0, "31725": 27666763776.0, "31730": 27666763776.0, "31735": 27666763776.0, "31740": 27666763776.0, "31745": 27666763776.0, "31750": 27666763776.0, "31755": 27666763776.0, "31760": 27666763776.0, "31765": 27666763776.0, "31770": 27666763776.0, "31775": 27666763776.0, "31780": 27666763776.0, "31785": 27666763776.0, "31790": 27666763776.0, "31795": 27666763776.0, "31800": 27666763776.0, "31805": 27666763776.0, "31810": 27666763776.0, "31815": 27666763776.0, "31820": 27666763776.0, "31825": 27666763776.0, "31830": 27666763776.0, "31835": 27666763776.0, "31840": 27666763776.0, "31845": 27666763776.0, "31850": 27666763776.0, "31855": 27666763776.0, "31860": 27666763776.0, "31865": 27666763776.0, "31870": 27666763776.0, "31875": 27666763776.0, "31880": 27666763776.0, "31885": 27666763776.0, "31890": 27666763776.0, "31895": 27666763776.0, "31900": 27666763776.0, "31905": 27666763776.0, "31910": 27666763776.0, "31915": 27666763776.0, "31920": 27666763776.0, "31925": 27666763776.0, "31930": 27666763776.0, "31935": 27666763776.0, "31940": 27666763776.0, "31945": 27666763776.0, "31950": 27666763776.0, "31955": 27666763776.0, "31960": 27666763776.0, "31965": 27666763776.0, "31970": 27666763776.0, "31975": 27666763776.0, "31980": 27666763776.0, "31985": 27666763776.0, "31990": 27666763776.0, "31995": 27666763776.0, "32000": 27666763776.0, "32005": 27666763776.0, "32010": 27666763776.0, "32015": 27666763776.0, "32020": 27666763776.0, "32025": 27666763776.0, "32030": 27666763776.0, "32035": 27666763776.0, "32040": 27666763776.0, "32045": 27666763776.0, "32050": 27666763776.0, "32055": 27666763776.0, "32060": 27666763776.0, "32065": 27666763776.0, "32070": 27666763776.0, "32075": 27666763776.0, "32080": 27666763776.0, "32085": 27666763776.0, "32090": 27666763776.0, "32095": 27666763776.0, "32100": 27666763776.0, "32105": 27666763776.0, "32110": 27666763776.0, "32115": 27666763776.0, "32120": 27666763776.0, "32125": 27666763776.0, "32130": 27666763776.0, "32135": 27666763776.0, "32140": 27666763776.0, "32145": 27666763776.0, "32150": 27666763776.0, "32155": 27666763776.0, "32160": 27666763776.0, "32165": 27666763776.0, "32170": 27666763776.0, "32175": 27666763776.0, "32180": 27666763776.0, "32185": 27666763776.0, "32190": 27666763776.0, "32195": 27666763776.0, "32200": 27666763776.0, "32205": 27666763776.0, "32210": 27666763776.0, "32215": 27666763776.0, "32220": 27666763776.0, "32225": 27666763776.0, "32230": 27666763776.0, "32235": 27666763776.0, "32240": 27666763776.0, "32245": 27666763776.0, "32250": 27666763776.0, "32255": 27666763776.0, "32260": 27666763776.0, "32265": 27666763776.0, "32270": 27666763776.0, "32275": 27666763776.0, "32280": 27666763776.0, "32285": 27666763776.0, "32290": 27666763776.0, "32295": 27666763776.0, "32300": 27666763776.0, "32305": 27666763776.0, "32310": 27666763776.0, "32315": 27666763776.0, "32320": 27666763776.0, "32325": 27666763776.0, "32330": 27666763776.0, "32335": 27666763776.0, "32340": 27666763776.0, "32345": 27666763776.0, "32350": 27666763776.0, "32355": 27666763776.0, "32360": 27666763776.0, "32365": 27666763776.0, "32370": 27666763776.0, "32375": 27666763776.0, "32380": 27666763776.0, "32385": 27666763776.0, "32390": 27666763776.0, "32395": 27666763776.0, "32400": 27666763776.0, "32405": 27666763776.0, "32410": 27666763776.0, "32415": 27666763776.0, "32420": 27666763776.0, "32425": 27666763776.0, "32430": 27666763776.0, "32435": 27666763776.0, "32440": 27666763776.0, "32445": 27666763776.0, "32450": 27666763776.0, "32455": 27666763776.0, "32460": 27666763776.0, "32465": 27666763776.0, "32470": 27666763776.0, "32475": 27666763776.0, "32480": 27666763776.0, "32485": 27666763776.0, "32490": 27666763776.0, "32495": 27666763776.0, "32500": 27666763776.0, "32505": 27666763776.0, "32510": 27666763776.0, "32515": 27666763776.0, "32520": 27666763776.0, "32525": 27666763776.0, "32530": 27666763776.0, "32535": 27666763776.0, "32540": 27666763776.0, "32545": 27666763776.0, "32550": 27666763776.0, "32555": 27666763776.0, "32560": 27666763776.0, "32565": 27666763776.0, "32570": 27666763776.0, "32575": 27666763776.0, "32580": 27666763776.0, "32585": 27666763776.0, "32590": 27666763776.0, "32595": 27666763776.0, "32600": 27666763776.0, "32605": 27666763776.0, "32610": 27666763776.0, "32615": 27666763776.0, "32620": 27666763776.0, "32625": 27666763776.0, "32630": 27666763776.0, "32635": 27666763776.0, "32640": 27666763776.0, "32645": 27666763776.0, "32650": 27666763776.0, "32655": 27666763776.0, "32660": 27666763776.0, "32665": 27666763776.0, "32670": 27666763776.0, "32675": 27666763776.0, "32680": 27666763776.0, "32685": 27666763776.0, "32690": 27666763776.0, "32695": 27666763776.0, "32700": 27666763776.0, "32705": 27666763776.0, "32710": 27666763776.0, "32715": 27666763776.0, "32720": 27666763776.0, "32725": 27666763776.0, "32730": 27666763776.0, "32735": 27666763776.0, "32740": 27666763776.0, "32745": 27666763776.0, "32750": 27666763776.0, "32755": 27666763776.0, "32760": 27666763776.0, "32765": 27666763776.0, "32770": 27666763776.0, "32775": 27666763776.0, "32780": 27666763776.0, "32785": 27666763776.0, "32790": 27666763776.0, "32795": 27666763776.0, "32800": 27666763776.0, "32805": 27666763776.0, "32810": 27666763776.0, "32815": 27666763776.0, "32820": 27666763776.0, "32825": 27666763776.0, "32830": 27666763776.0, "32835": 27666763776.0, "32840": 27666763776.0, "32845": 27666763776.0, "32850": 27666763776.0, "32855": 27666763776.0, "32860": 27666763776.0, "32865": 27666763776.0, "32870": 27666763776.0, "32875": 27666763776.0, "32880": 27666763776.0, "32885": 27666763776.0, "32890": 27666763776.0, "32895": 27666763776.0, "32900": 27666763776.0, "32905": 27666763776.0, "32910": 27666763776.0, "32915": 27666763776.0, "32920": 27666763776.0, "32925": 27666763776.0, "32930": 27666763776.0, "32935": 27666763776.0, "32940": 27666763776.0, "32945": 27666763776.0, "32950": 27666763776.0, "32955": 27666763776.0, "32960": 27666763776.0, "32965": 27666763776.0, "32970": 27666763776.0, "32975": 27666763776.0, "32980": 27666763776.0, "32985": 27666763776.0, "32990": 27666763776.0, "32995": 27666763776.0, "33000": 27666763776.0, "33005": 27666763776.0, "33010": 27666763776.0, "33015": 27666763776.0, "33020": 27666763776.0, "33025": 27666763776.0, "33030": 27666763776.0, "33035": 27666763776.0, "33040": 27666763776.0, "33045": 27666763776.0, "33050": 27666763776.0, "33055": 27666763776.0, "33060": 27666763776.0, "33065": 27666763776.0, "33070": 27666763776.0, "33075": 27666763776.0, "33080": 27666763776.0, "33085": 27666763776.0, "33090": 27666763776.0, "33095": 27666763776.0, "33100": 27666763776.0, "33105": 27666763776.0, "33110": 27666763776.0, "33115": 27666763776.0, "33120": 27666763776.0, "33125": 27666763776.0, "33130": 27666763776.0, "33135": 27666763776.0, "33140": 27666763776.0, "33145": 27666763776.0, "33150": 27666763776.0, "33155": 27666763776.0, "33160": 27666763776.0, "33165": 27666763776.0, "33170": 27666763776.0, "33175": 27666763776.0, "33180": 27666763776.0, "33185": 27666763776.0, "33190": 27666763776.0, "33195": 27666763776.0, "33200": 27666763776.0, "33205": 27666763776.0, "33210": 27666763776.0, "33215": 27666763776.0, "33220": 27666763776.0, "33225": 27666763776.0, "33230": 27666763776.0, "33235": 27666763776.0, "33240": 27666763776.0, "33245": 27666763776.0, "33250": 27666763776.0, "33255": 27666763776.0, "33260": 27666763776.0, "33265": 27666763776.0, "33270": 27666763776.0, "33275": 27666763776.0, "33280": 27666763776.0, "33285": 27666763776.0, "33290": 27666763776.0, "33295": 27666763776.0, "33300": 27666763776.0, "33305": 27666763776.0, "33310": 27666763776.0, "33315": 27666763776.0, "33320": 27666763776.0, "33325": 27666763776.0, "33330": 27666763776.0, "33335": 27666763776.0, "33340": 27666763776.0, "33345": 27666763776.0, "33350": 27666763776.0, "33355": 27666763776.0, "33360": 27666763776.0, "33365": 27666763776.0, "33370": 27666763776.0, "33375": 27666763776.0, "33380": 27666763776.0, "33385": 27666763776.0, "33390": 27666763776.0, "33395": 27666763776.0, "33400": 27666763776.0, "33405": 27666763776.0, "33410": 27666763776.0, "33415": 27666763776.0, "33420": 27666763776.0, "33425": 27666763776.0, "33430": 27666763776.0, "33435": 27666763776.0, "33440": 27666763776.0, "33445": 27666763776.0, "33450": 27666763776.0, "33455": 27666763776.0, "33460": 27666763776.0, "33465": 27666763776.0, "33470": 27666763776.0, "33475": 27666763776.0, "33480": 27666763776.0, "33485": 27666763776.0, "33490": 27666763776.0, "33495": 27666763776.0, "33500": 27666763776.0, "33505": 27666763776.0, "33510": 27666763776.0, "33515": 27666763776.0, "33520": 27666763776.0, "33525": 27666763776.0, "33530": 27666763776.0, "33535": 27666763776.0, "33540": 27666763776.0, "33545": 27666763776.0, "33550": 27666763776.0, "33555": 27666763776.0, "33560": 27666763776.0, "33565": 27666763776.0, "33570": 27666763776.0, "33575": 27666763776.0, "33580": 27666763776.0, "33585": 27666763776.0, "33590": 27666763776.0, "33595": 27666763776.0, "33600": 27666763776.0, "33605": 27666763776.0, "33610": 27666763776.0, "33615": 27666763776.0, "33620": 27666763776.0, "33625": 27666763776.0, "33630": 27666763776.0, "33635": 27666763776.0, "33640": 27666763776.0, "33645": 27666763776.0, "33650": 27666763776.0, "33655": 27666763776.0, "33660": 27666763776.0, "33665": 27666763776.0, "33670": 27666763776.0, "33675": 27666763776.0, "33680": 27666763776.0, "33685": 27666763776.0, "33690": 27666763776.0, "33695": 27666763776.0, "33700": 27666763776.0, "33705": 27666763776.0, "33710": 27666763776.0, "33715": 27666763776.0, "33720": 27666763776.0, "33725": 27666763776.0, "33730": 27666763776.0, "33735": 27666763776.0, "33740": 27666763776.0, "33745": 27666763776.0, "33750": 27666763776.0, "33755": 27666763776.0, "33760": 27666763776.0, "33765": 27666763776.0, "33770": 27666763776.0, "33775": 27666763776.0, "33780": 27666763776.0, "33785": 27666763776.0, "33790": 27666763776.0, "33795": 27666763776.0, "33800": 27666763776.0, "33805": 27666763776.0, "33810": 27666763776.0, "33815": 27666763776.0, "33820": 27666763776.0, "33825": 27666763776.0, "33830": 27666763776.0, "33835": 27666763776.0, "33840": 27666763776.0, "33845": 27666763776.0, "33850": 27666763776.0, "33855": 27666763776.0, "33860": 27666763776.0, "33865": 27666763776.0, "33870": 27666763776.0, "33875": 27666763776.0, "33880": 27666763776.0, "33885": 27666763776.0, "33890": 27666763776.0, "33895": 27666763776.0, "33900": 27666763776.0, "33905": 27666763776.0, "33910": 27666763776.0, "33915": 27666763776.0, "33920": 27666763776.0, "33925": 27666763776.0, "33930": 27666763776.0, "33935": 27666763776.0, "33940": 27666763776.0, "33945": 27666763776.0, "33950": 27666763776.0, "33955": 27666763776.0, "33960": 27666763776.0, "33965": 27666763776.0, "33970": 27666763776.0, "33975": 27666763776.0, "33980": 27666763776.0, "33985": 27666763776.0, "33990": 27666763776.0, "33995": 27666763776.0, "34000": 27666763776.0, "34005": 27666763776.0, "34010": 27666763776.0, "34015": 27666763776.0, "34020": 27666763776.0, "34025": 27666763776.0, "34030": 27666763776.0, "34035": 27666763776.0, "34040": 27666763776.0, "34045": 27666763776.0, "34050": 27666763776.0, "34055": 27666763776.0, "34060": 27666763776.0, "34065": 27666763776.0, "34070": 27666763776.0, "34075": 27666763776.0, "34080": 27666763776.0, "34085": 27666763776.0, "34090": 27666763776.0, "34095": 27666763776.0, "34100": 27666763776.0, "34105": 27666763776.0, "34110": 27666763776.0, "34115": 27666763776.0, "34120": 27666763776.0, "34125": 27666763776.0, "34130": 27666763776.0, "34135": 27666763776.0, "34140": 27666763776.0, "34145": 27666763776.0, "34150": 27666763776.0, "34155": 27666763776.0, "34160": 27666763776.0, "34165": 27666763776.0, "34170": 27666763776.0, "34175": 27666763776.0, "34180": 27666763776.0, "34185": 27666763776.0, "34190": 27666763776.0, "34195": 27666763776.0, "34200": 27666763776.0, "34205": 27666763776.0, "34210": 27666763776.0, "34215": 27666763776.0, "34220": 27666763776.0, "34225": 27666763776.0, "34230": 27666763776.0, "34235": 27666763776.0, "34240": 27666763776.0, "34245": 27666763776.0, "34250": 27666763776.0, "34255": 27666763776.0, "34260": 27666763776.0, "34265": 27666763776.0, "34270": 27666763776.0, "34275": 27666763776.0, "34280": 27666763776.0, "34285": 27666763776.0, "34290": 27666763776.0, "34295": 27666763776.0, "34300": 27666763776.0, "34305": 27666763776.0, "34310": 27666763776.0, "34315": 27666763776.0, "34320": 27666763776.0, "34325": 27666763776.0, "34330": 27666763776.0, "34335": 27666763776.0, "34340": 27666763776.0, "34345": 27666763776.0, "34350": 27666763776.0, "34355": 27666763776.0, "34360": 27666763776.0, "34365": 27666763776.0, "34370": 27666763776.0, "34375": 27666763776.0, "34380": 27666763776.0, "34385": 27666763776.0, "34390": 27666763776.0, "34395": 27666763776.0, "34400": 27666763776.0, "34405": 27666763776.0, "34410": 27666763776.0, "34415": 27666763776.0, "34420": 27666763776.0, "34425": 27666763776.0, "34430": 27666763776.0, "34435": 27666763776.0, "34440": 27666763776.0, "34445": 27666763776.0, "34450": 27666763776.0, "34455": 27666763776.0, "34460": 27666763776.0, "34465": 27666763776.0, "34470": 27666763776.0, "34475": 27666763776.0, "34480": 27666763776.0, "34485": 27666763776.0, "34490": 27666763776.0, "34495": 27666763776.0, "34500": 27666763776.0, "34505": 27666763776.0, "34510": 27666763776.0, "34515": 27666763776.0, "34520": 27666763776.0, "34525": 27666763776.0, "34530": 27666763776.0, "34535": 27666763776.0, "34540": 27666763776.0, "34545": 27666763776.0, "34550": 27666763776.0, "34555": 27666763776.0, "34560": 27666763776.0, "34565": 27666763776.0, "34570": 27666763776.0, "34575": 27666763776.0, "34580": 27666763776.0, "34585": 27666763776.0, "34590": 27666763776.0, "34595": 27666763776.0, "34600": 27666763776.0, "34605": 27666763776.0, "34610": 27666763776.0, "34615": 27666763776.0, "34620": 27666763776.0, "34625": 27666763776.0, "34630": 27666763776.0, "34635": 27666763776.0, "34640": 27666763776.0, "34645": 27666763776.0, "34650": 27666763776.0, "34655": 27666763776.0, "34660": 27666763776.0, "34665": 27666763776.0, "34670": 27666763776.0, "34675": 27666763776.0, "34680": 27666763776.0, "34685": 27666763776.0, "34690": 27666763776.0, "34695": 27666763776.0, "34700": 27666763776.0, "34705": 27666763776.0, "34710": 27666763776.0, "34715": 27666763776.0, "34720": 27666763776.0, "34725": 27666763776.0, "34730": 27666763776.0, "34735": 27666763776.0, "34740": 27666763776.0, "34745": 27666763776.0, "34750": 27666763776.0, "34755": 27666763776.0, "34760": 27666763776.0, "34765": 27666763776.0, "34770": 27666763776.0, "34775": 27666763776.0, "34780": 27666763776.0, "34785": 27666763776.0, "34790": 27666763776.0, "34795": 27666763776.0, "34800": 27666763776.0, "34805": 27666763776.0, "34810": 27666763776.0, "34815": 27666763776.0, "34820": 27666763776.0, "34825": 27666763776.0, "34830": 27666763776.0, "34835": 27666763776.0, "34840": 27666763776.0, "34845": 27666763776.0, "34850": 27666763776.0, "34855": 27666763776.0, "34860": 27666763776.0, "34865": 27666763776.0, "34870": 27666763776.0, "34875": 27666763776.0, "34880": 27666763776.0, "34885": 27666763776.0, "34890": 27666763776.0, "34895": 27666763776.0, "34900": 27666763776.0, "34905": 27666763776.0, "34910": 27666763776.0, "34915": 27666763776.0, "34920": 27666763776.0, "34925": 27666763776.0, "34930": 27666763776.0, "34935": 27666763776.0, "34940": 27666763776.0, "34945": 27666763776.0, "34950": 27666763776.0, "34955": 27666763776.0, "34960": 27666763776.0, "34965": 27666763776.0, "34970": 27666763776.0, "34975": 27666763776.0, "34980": 27666763776.0, "34985": 27666763776.0, "34990": 27666763776.0, "34995": 27666763776.0, "35000": 27666763776.0, "35005": 27666763776.0, "35010": 27666763776.0, "35015": 27666763776.0, "35020": 27666763776.0, "35025": 27666763776.0, "35030": 27666763776.0, "35035": 27666763776.0, "35040": 27666763776.0, "35045": 27666763776.0, "35050": 27666763776.0, "35055": 27666763776.0, "35060": 27666763776.0, "35065": 27666763776.0, "35070": 27666763776.0, "35075": 27666763776.0, "35080": 27666763776.0, "35085": 27666763776.0, "35090": 27666763776.0, "35095": 27666763776.0, "35100": 27666763776.0, "35105": 27666763776.0, "35110": 27666763776.0, "35115": 27666763776.0, "35120": 27666763776.0, "35125": 27666763776.0, "35130": 27666763776.0, "35135": 27666763776.0, "35140": 27666763776.0, "35145": 27666763776.0, "35150": 27666763776.0, "35155": 27666763776.0, "35160": 27666763776.0, "35165": 27666763776.0, "35170": 27666763776.0, "35175": 27666763776.0, "35180": 27666763776.0, "35185": 27666763776.0, "35190": 27666763776.0, "35195": 27666763776.0, "35200": 27666763776.0, "35205": 27666763776.0, "35210": 27666763776.0, "35215": 27666763776.0, "35220": 27666763776.0, "35225": 27666763776.0, "35230": 27666763776.0, "35235": 27666763776.0, "35240": 27666763776.0, "35245": 27666763776.0, "35250": 27666763776.0, "35255": 27666763776.0, "35260": 27666763776.0, "35265": 27666763776.0, "35270": 27666763776.0, "35275": 27666763776.0, "35280": 27666763776.0, "35285": 27666763776.0, "35290": 27666763776.0, "35295": 27666763776.0, "35300": 27666763776.0, "35305": 27666763776.0, "35310": 27666763776.0, "35315": 27666763776.0, "35320": 27666763776.0, "35325": 27666763776.0, "35330": 27666763776.0, "35335": 27666763776.0, "35340": 27666763776.0, "35345": 27666763776.0, "35350": 27666763776.0, "35355": 27666763776.0, "35360": 27666763776.0, "35365": 27666763776.0, "35370": 27666763776.0, "35375": 27666763776.0, "35380": 27666763776.0, "35385": 27666763776.0, "35390": 27666763776.0, "35395": 27666763776.0, "35400": 27666763776.0, "35405": 27666763776.0, "35410": 27666763776.0, "35415": 27666763776.0, "35420": 27666763776.0, "35425": 27666763776.0, "35430": 27666763776.0, "35435": 27666763776.0, "35440": 27666763776.0, "35445": 27666763776.0, "35450": 27666763776.0, "35455": 27666763776.0, "35460": 27666763776.0, "35465": 27666763776.0, "35470": 27666763776.0, "35475": 27666763776.0, "35480": 27666763776.0, "35485": 27666763776.0, "35490": 27666763776.0, "35495": 27666763776.0, "35500": 27666763776.0, "35505": 27666763776.0, "35510": 27666763776.0, "35515": 27666763776.0, "35520": 27666763776.0, "35525": 27666763776.0, "35530": 27666763776.0, "35535": 27666763776.0, "35540": 27666763776.0, "35545": 27666763776.0, "35550": 27666763776.0, "35555": 27666763776.0, "35560": 27666763776.0, "35565": 27666763776.0, "35570": 27666763776.0, "35575": 27666763776.0, "35580": 27666763776.0, "35585": 27666763776.0, "35590": 27666763776.0, "35595": 27666763776.0, "35600": 27666763776.0, "35605": 27666763776.0, "35610": 27666763776.0, "35615": 27666763776.0, "35620": 27666763776.0, "35625": 27666763776.0, "35630": 27666763776.0, "35635": 27666763776.0, "35640": 27666763776.0, "35645": 27666763776.0, "35650": 27666763776.0, "35655": 27666763776.0, "35660": 27666763776.0, "35665": 27666763776.0, "35670": 27666763776.0, "35675": 27666763776.0, "35680": 27666763776.0, "35685": 27666763776.0, "35690": 27666763776.0, "35695": 27666763776.0, "35700": 27666763776.0, "35705": 27666763776.0, "35710": 27666763776.0, "35715": 27666763776.0, "35720": 27666763776.0, "35725": 27666763776.0, "35730": 27666763776.0, "35735": 27666763776.0, "35740": 27666763776.0, "35745": 27666763776.0, "35750": 27666763776.0, "35755": 27666763776.0, "35760": 27666763776.0, "35765": 27666763776.0, "35770": 27666763776.0, "35775": 27666763776.0, "35780": 27666763776.0, "35785": 27666763776.0, "35790": 27666763776.0, "35795": 27666763776.0, "35800": 27666763776.0, "35805": 27666763776.0, "35810": 27666763776.0, "35815": 27666763776.0, "35820": 27666763776.0, "35825": 27666763776.0, "35830": 27666763776.0, "35835": 27666763776.0, "35840": 27666763776.0, "35845": 27666763776.0, "35850": 27666763776.0, "35855": 27666763776.0, "35860": 27666763776.0, "35865": 27666763776.0, "35870": 27666763776.0, "35875": 27666763776.0, "35880": 27666763776.0, "35885": 27666763776.0, "35890": 27666763776.0, "35895": 27666763776.0, "35900": 27666763776.0, "35905": 27666763776.0, "35910": 27666763776.0, "35915": 27666763776.0, "35920": 27666763776.0, "35925": 27666763776.0, "35930": 27666763776.0, "35935": 27666763776.0, "35940": 27666763776.0, "35945": 27666763776.0, "35950": 27666763776.0, "35955": 27666763776.0, "35960": 27666763776.0, "35965": 27666763776.0, "35970": 27666763776.0, "35975": 27666763776.0, "35980": 27666763776.0, "35985": 27666763776.0, "35990": 27666763776.0, "35995": 27666763776.0, "36000": 27666763776.0, "36005": 27666763776.0, "36010": 27666763776.0, "36015": 27666763776.0, "36020": 27666763776.0, "36025": 27666763776.0, "36030": 27666763776.0, "36035": 27666763776.0, "36040": 27666763776.0, "36045": 27666763776.0, "36050": 27666763776.0, "36055": 27666763776.0, "36060": 27666763776.0, "36065": 27666763776.0, "36070": 27666763776.0, "36075": 27666763776.0, "36080": 27666763776.0, "36085": 27666763776.0, "36090": 27666763776.0, "36095": 27666763776.0, "36100": 27666763776.0, "36105": 27666763776.0, "36110": 27666763776.0, "36115": 27666763776.0, "36120": 27666763776.0, "36125": 27666763776.0, "36130": 27666763776.0, "36135": 27666763776.0, "36140": 27666763776.0, "36145": 27666763776.0, "36150": 27666763776.0, "36155": 27666763776.0, "36160": 27666763776.0, "36165": 27666763776.0, "36170": 27666763776.0, "36175": 27666763776.0, "36180": 27666763776.0, "36185": 27666763776.0, "36190": 27666763776.0, "36195": 27666763776.0, "36200": 27666763776.0, "36205": 27666763776.0, "36210": 27666763776.0, "36215": 27666763776.0, "36220": 27666763776.0, "36225": 27666763776.0, "36230": 27666763776.0, "36235": 27666763776.0, "36240": 27666763776.0, "36245": 27666763776.0, "36250": 27666763776.0, "36255": 27666763776.0, "36260": 27666763776.0, "36265": 27666763776.0, "36270": 27666763776.0, "36275": 27666763776.0, "36280": 27666763776.0, "36285": 27666763776.0, "36290": 27666763776.0, "36295": 27666763776.0, "36300": 27666763776.0, "36305": 27666763776.0, "36310": 27666763776.0, "36315": 27666763776.0, "36320": 27666763776.0, "36325": 27666763776.0, "36330": 27666763776.0, "36335": 27666763776.0, "36340": 27666763776.0, "36345": 27666763776.0, "36350": 27666763776.0, "36355": 27666763776.0, "36360": 27666763776.0, "36365": 27666763776.0, "36370": 27666763776.0, "36375": 27666763776.0, "36380": 27666763776.0, "36385": 27666763776.0, "36390": 27666763776.0, "36395": 27666763776.0, "36400": 27666763776.0, "36405": 27666763776.0, "36410": 27666763776.0, "36415": 27666763776.0, "36420": 27666763776.0, "36425": 27666763776.0, "36430": 27666763776.0, "36435": 27666763776.0, "36440": 27666763776.0, "36445": 27666763776.0, "36450": 27666763776.0, "36455": 27666763776.0, "36460": 27666763776.0, "36465": 27666763776.0, "36470": 27666763776.0, "36475": 27666763776.0, "36480": 27666763776.0, "36485": 27666763776.0, "36490": 27666763776.0, "36495": 27666763776.0, "36500": 27666763776.0, "36505": 27666763776.0, "36510": 27666763776.0, "36515": 27666763776.0, "36520": 27666763776.0, "36525": 27666763776.0, "36530": 27666763776.0, "36535": 27666763776.0, "36540": 27666763776.0, "36545": 27666763776.0, "36550": 27666763776.0, "36555": 27666763776.0, "36560": 27666763776.0, "36565": 27666763776.0, "36570": 27666763776.0, "36575": 27666763776.0, "36580": 27666763776.0, "36585": 27666763776.0, "36590": 27666763776.0, "36595": 27666763776.0, "36600": 27666763776.0, "36605": 27666763776.0, "36610": 27666763776.0, "36615": 27666763776.0, "36620": 27666763776.0, "36625": 27666763776.0, "36630": 27666763776.0, "36635": 27666763776.0, "36640": 27666763776.0, "36645": 27666763776.0, "36650": 27666763776.0, "36655": 27666763776.0, "36660": 27666763776.0, "36665": 27666763776.0, "36670": 27666763776.0, "36675": 27666763776.0, "36680": 27666763776.0, "36685": 27666763776.0, "36690": 27666763776.0, "36695": 27666763776.0, "36700": 27666763776.0, "36705": 27666763776.0, "36710": 27666763776.0, "36715": 27666763776.0, "36720": 27666763776.0, "36725": 27666763776.0, "36730": 27666763776.0, "36735": 27666763776.0, "36740": 27666763776.0, "36745": 27666763776.0, "36750": 27666763776.0, "36755": 27666763776.0, "36760": 27666763776.0, "36765": 27666763776.0, "36770": 27666763776.0, "36775": 27666763776.0, "36780": 27666763776.0, "36785": 27666763776.0, "36790": 27666763776.0, "36795": 27666763776.0, "36800": 27666763776.0, "36805": 27666763776.0, "36810": 27666763776.0, "36815": 27666763776.0, "36820": 27666763776.0, "36825": 27666763776.0, "36830": 27666763776.0, "36835": 27666763776.0, "36840": 27666763776.0, "36845": 27666763776.0, "36850": 27666763776.0, "36855": 27666763776.0, "36860": 27666763776.0, "36865": 27666763776.0, "36870": 27666763776.0, "36875": 27666763776.0, "36880": 27666763776.0, "36885": 27666763776.0, "36890": 27666763776.0, "36895": 27666763776.0, "36900": 27666763776.0, "36905": 27666763776.0, "36910": 27666763776.0, "36915": 27666763776.0, "36920": 27666763776.0, "36925": 27666763776.0, "36930": 27666763776.0, "36935": 27666763776.0, "36940": 27666763776.0, "36945": 27666763776.0, "36950": 27666763776.0, "36955": 27666763776.0, "36960": 27666763776.0, "36965": 27666763776.0, "36970": 27666763776.0, "36975": 27666763776.0, "36980": 27666763776.0, "36985": 27666763776.0, "36990": 27666763776.0, "36995": 27666763776.0, "37000": 27666763776.0, "37005": 27666763776.0, "37010": 27666763776.0, "37015": 27666763776.0, "37020": 27666763776.0, "37025": 27666763776.0, "37030": 27666763776.0, "37035": 27666763776.0, "37040": 27666763776.0, "37045": 27666763776.0, "37050": 27666763776.0, "37055": 27666763776.0, "37060": 27666763776.0, "37065": 27666763776.0, "37070": 27666763776.0, "37075": 27666763776.0, "37080": 27666763776.0, "37085": 27666763776.0, "37090": 27666763776.0, "37095": 27666763776.0, "37100": 27666763776.0, "37105": 27666763776.0, "37110": 27666763776.0, "37115": 27666763776.0, "37120": 27666763776.0, "37125": 27666763776.0, "37130": 27666763776.0, "37135": 27666763776.0, "37140": 27666763776.0, "37145": 27666763776.0, "37150": 27666763776.0, "37155": 27666763776.0, "37160": 27666763776.0, "37165": 27666763776.0, "37170": 27666763776.0, "37175": 27666763776.0, "37180": 27666763776.0, "37185": 27666763776.0, "37190": 27666763776.0, "37195": 27666763776.0, "37200": 27666763776.0, "37205": 27666763776.0, "37210": 27666763776.0, "37215": 27666763776.0, "37220": 27666763776.0, "37225": 27666763776.0, "37230": 27666763776.0, "37235": 27666763776.0, "37240": 27666763776.0, "37245": 27666763776.0, "37250": 27666763776.0, "37255": 27666763776.0, "37260": 27666763776.0, "37265": 27666763776.0, "37270": 27666763776.0, "37275": 27666763776.0, "37280": 27666763776.0, "37285": 27666763776.0, "37290": 27666763776.0, "37295": 27666763776.0, "37300": 27666763776.0, "37305": 27666763776.0, "37310": 27666763776.0, "37315": 27666763776.0, "37320": 27666763776.0, "37325": 27666763776.0, "37330": 27666763776.0, "37335": 27666763776.0, "37340": 27666763776.0, "37345": 27666763776.0, "37350": 27666763776.0, "37355": 27666763776.0, "37360": 27666763776.0, "37365": 27666763776.0, "37370": 27666763776.0, "37375": 27666763776.0, "37380": 27666763776.0, "37385": 27666763776.0, "37390": 27666763776.0, "37395": 27666763776.0, "37400": 27666763776.0, "37405": 27666763776.0, "37410": 27666763776.0, "37415": 27666763776.0, "37420": 27666763776.0, "37425": 27666763776.0, "37430": 27666763776.0, "37435": 27666763776.0, "37440": 27666763776.0, "37445": 27666763776.0, "37450": 27666763776.0, "37455": 27666763776.0, "37460": 27666763776.0, "37465": 27666763776.0, "37470": 27666763776.0, "37475": 27666763776.0, "37480": 27666763776.0, "37485": 27666763776.0, "37490": 27666763776.0, "37495": 27666763776.0, "37500": 27666763776.0, "37505": 27666763776.0, "37510": 27666763776.0, "37515": 27666763776.0, "37520": 27666763776.0, "37525": 27666763776.0, "37530": 27666763776.0, "37535": 27666763776.0, "37540": 27666763776.0, "37545": 27666763776.0, "37550": 27666763776.0, "37555": 27666763776.0, "37560": 27666763776.0, "37565": 27666763776.0, "37570": 27666763776.0, "37575": 27666763776.0, "37580": 27666763776.0, "37585": 27666763776.0, "37590": 27666763776.0, "37595": 27666763776.0, "37600": 27666763776.0, "37605": 27666763776.0, "37610": 27666763776.0, "37615": 27666763776.0, "37620": 27666763776.0, "37625": 27666763776.0, "37630": 27666763776.0, "37635": 27666763776.0, "37640": 27666763776.0, "37645": 27666763776.0, "37650": 27666763776.0, "37655": 27666763776.0, "37660": 27666763776.0, "37665": 27666763776.0, "37670": 27666763776.0, "37675": 27666763776.0, "37680": 27666763776.0, "37685": 27666763776.0, "37690": 27666763776.0, "37695": 27666763776.0, "37700": 27666763776.0, "37705": 27666763776.0, "37710": 27666763776.0, "37715": 27666763776.0, "37720": 27666763776.0, "37725": 27666763776.0, "37730": 27666763776.0, "37735": 27666763776.0, "37740": 27666763776.0, "37745": 27666763776.0, "37750": 27666763776.0, "37755": 27666763776.0, "37760": 27666763776.0, "37765": 27666763776.0, "37770": 27666763776.0, "37775": 27666763776.0, "37780": 27666763776.0, "37785": 27666763776.0, "37790": 27666763776.0, "37795": 27666763776.0, "37800": 27666763776.0, "37805": 27666763776.0, "37810": 27666763776.0, "37815": 27666763776.0, "37820": 27666763776.0, "37825": 27666763776.0, "37830": 27666763776.0, "37835": 27666763776.0, "37840": 27666763776.0, "37845": 27666763776.0, "37850": 27666763776.0, "37855": 27666763776.0, "37860": 27666763776.0, "37865": 27666763776.0, "37870": 27666763776.0, "37875": 27666763776.0, "37880": 27666763776.0, "37885": 27666763776.0, "37890": 27666763776.0, "37895": 27666763776.0, "37900": 27666763776.0, "37905": 27666763776.0, "37910": 27666763776.0, "37915": 27666763776.0, "37920": 27666763776.0, "37925": 27666763776.0, "37930": 27666763776.0, "37935": 27666763776.0, "37940": 27666763776.0, "37945": 27666763776.0, "37950": 27666763776.0, "37955": 27666763776.0, "37960": 27666763776.0, "37965": 27666763776.0, "37970": 27666763776.0, "37975": 27666763776.0, "37980": 27666763776.0, "37985": 27666763776.0, "37990": 27666763776.0, "37995": 27666763776.0, "38000": 27666763776.0, "38005": 27666763776.0, "38010": 27666763776.0, "38015": 27666763776.0, "38020": 27666763776.0, "38025": 27666763776.0, "38030": 27666763776.0, "38035": 27666763776.0, "38040": 27666763776.0, "38045": 27666763776.0, "38050": 27666763776.0, "38055": 27666763776.0, "38060": 27666763776.0, "38065": 27666763776.0, "38070": 27666763776.0, "38075": 27666763776.0, "38080": 27666763776.0, "38085": 27666763776.0, "38090": 27666763776.0, "38095": 27666763776.0, "38100": 27666763776.0, "38105": 27666763776.0, "38110": 27666763776.0, "38115": 27666763776.0, "38120": 27666763776.0, "38125": 27666763776.0, "38130": 27666763776.0, "38135": 27666763776.0, "38140": 27666763776.0, "38145": 27666763776.0, "38150": 27666763776.0, "38155": 27666763776.0, "38160": 27666763776.0, "38165": 27666763776.0, "38170": 27666763776.0, "38175": 27666763776.0, "38180": 27666763776.0, "38185": 27666763776.0, "38190": 27666763776.0, "38195": 27666763776.0, "38200": 27666763776.0, "38205": 27666763776.0, "38210": 27666763776.0, "38215": 27666763776.0, "38220": 27666763776.0, "38225": 27666763776.0, "38230": 27666763776.0, "38235": 27666763776.0, "38240": 27666763776.0, "38245": 27666763776.0, "38250": 27666763776.0, "38255": 27666763776.0, "38260": 27666763776.0, "38265": 27666763776.0, "38270": 27666763776.0, "38275": 27666763776.0, "38280": 27666763776.0, "38285": 27666763776.0, "38290": 27666763776.0, "38295": 27666763776.0, "38300": 27666763776.0, "38305": 27666763776.0, "38310": 27666763776.0, "38315": 27666763776.0, "38320": 27666763776.0, "38325": 27666763776.0, "38330": 27666763776.0, "38335": 27666763776.0, "38340": 27666763776.0, "38345": 27666763776.0, "38350": 27666763776.0, "38355": 27666763776.0, "38360": 27666763776.0, "38365": 27666763776.0, "38370": 27666763776.0, "38375": 27666763776.0, "38380": 27666763776.0, "38385": 27666763776.0, "38390": 27666763776.0, "38395": 27666763776.0, "38400": 27666763776.0, "38405": 27666763776.0, "38410": 27666763776.0, "38415": 27666763776.0, "38420": 27666763776.0, "38425": 27666763776.0, "38430": 27666763776.0, "38435": 27666763776.0, "38440": 27666763776.0, "38445": 27666763776.0, "38450": 27666763776.0, "38455": 27666763776.0, "38460": 27666763776.0, "38465": 27666763776.0, "38470": 27666763776.0, "38475": 27666763776.0, "38480": 27666763776.0, "38485": 27666763776.0, "38490": 27666763776.0, "38495": 27666763776.0, "38500": 27666763776.0, "38505": 27666763776.0, "38510": 27666763776.0, "38515": 27666763776.0, "38520": 27666763776.0, "38525": 27666763776.0, "38530": 27666763776.0, "38535": 27666763776.0, "38540": 27666763776.0, "38545": 27666763776.0, "38550": 27666763776.0, "38555": 27666763776.0, "38560": 27666763776.0, "38565": 27666763776.0, "38570": 27666763776.0, "38575": 27666763776.0, "38580": 27666763776.0, "38585": 27666763776.0, "38590": 27666763776.0, "38595": 27666763776.0, "38600": 27666763776.0, "38605": 27666763776.0, "38610": 27666763776.0, "38615": 27666763776.0, "38620": 27666763776.0, "38625": 27666763776.0, "38630": 27666763776.0, "38635": 27666763776.0, "38640": 27666763776.0, "38645": 27666763776.0, "38650": 27666763776.0, "38655": 27666763776.0, "38660": 27666763776.0, "38665": 27666763776.0, "38670": 27666763776.0, "38675": 27666763776.0, "38680": 27666763776.0, "38685": 27666763776.0, "38690": 27666763776.0, "38695": 27666763776.0, "38700": 27666763776.0, "38705": 27666763776.0, "38710": 27666763776.0, "38715": 27666763776.0, "38720": 27666763776.0, "38725": 27666763776.0, "38730": 27666763776.0, "38735": 27666763776.0, "38740": 27666763776.0, "38745": 27666763776.0, "38750": 27666763776.0, "38755": 27666763776.0, "38760": 27666763776.0, "38765": 27666763776.0, "38770": 27666763776.0, "38775": 27666763776.0, "38780": 27666763776.0, "38785": 27666763776.0, "38790": 27666763776.0, "38795": 27666763776.0, "38800": 27666763776.0, "38805": 27666763776.0, "38810": 27666763776.0, "38815": 27666763776.0, "38820": 27666763776.0, "38825": 27666763776.0, "38830": 27666763776.0, "38835": 27666763776.0, "38840": 27666763776.0, "38845": 27666763776.0, "38850": 27666763776.0, "38855": 27666763776.0, "38860": 27666763776.0, "38865": 27666763776.0, "38870": 27666763776.0, "38875": 27666763776.0, "38880": 27666763776.0, "38885": 27666763776.0, "38890": 27666763776.0, "38895": 27666763776.0, "38900": 27666763776.0, "38905": 27666763776.0, "38910": 27666763776.0, "38915": 27666763776.0, "38920": 27666763776.0, "38925": 27666763776.0, "38930": 27666763776.0, "38935": 27666763776.0, "38940": 27666763776.0, "38945": 27666763776.0, "38950": 27666763776.0, "38955": 27666763776.0, "38960": 27666763776.0, "38965": 27666763776.0, "38970": 27666763776.0, "38975": 27666763776.0, "38980": 27666763776.0, "38985": 27666763776.0, "38990": 27666763776.0, "38995": 27666763776.0, "39000": 27666763776.0, "39005": 27666763776.0, "39010": 27666763776.0, "39015": 27666763776.0, "39020": 27666763776.0, "39025": 27666763776.0, "39030": 27666763776.0, "39035": 27666763776.0, "39040": 27666763776.0, "39045": 27666763776.0, "39050": 27666763776.0, "39055": 27666763776.0, "39060": 27666763776.0, "39065": 27666763776.0, "39070": 27666763776.0, "39075": 27666763776.0, "39080": 27666763776.0, "39085": 27666763776.0, "39090": 27666763776.0, "39095": 27666763776.0, "39100": 27666763776.0, "39105": 27666763776.0, "39110": 27666763776.0, "39115": 27666763776.0, "39120": 27666763776.0, "39125": 27666763776.0, "39130": 27666763776.0, "39135": 27666763776.0, "39140": 27666763776.0, "39145": 27666763776.0, "39150": 27666763776.0, "39155": 27666763776.0, "39160": 27666763776.0, "39165": 27666763776.0, "39170": 27666763776.0, "39175": 27666763776.0, "39180": 27666763776.0, "39185": 27666763776.0, "39190": 27666763776.0, "39195": 27666763776.0, "39200": 27666763776.0, "39205": 27666763776.0, "39210": 27666763776.0, "39215": 27666763776.0, "39220": 27666763776.0, "39225": 27666763776.0, "39230": 27666763776.0, "39235": 27666763776.0, "39240": 27666763776.0, "39245": 27666763776.0, "39250": 27666763776.0, "39255": 27666763776.0, "39260": 27666763776.0, "39265": 27666763776.0, "39270": 27666763776.0, "39275": 27666763776.0, "39280": 27666763776.0, "39285": 27666763776.0, "39290": 27666763776.0, "39295": 27666763776.0, "39300": 27666763776.0, "39305": 27666763776.0, "39310": 27666763776.0, "39315": 27666763776.0, "39320": 27666763776.0, "39325": 27666763776.0, "39330": 27666763776.0, "39335": 27666763776.0, "39340": 27666763776.0, "39345": 27666763776.0, "39350": 27666763776.0, "39355": 27666763776.0, "39360": 27666763776.0, "39365": 27666763776.0, "39370": 27666763776.0, "39375": 27666763776.0, "39380": 27666763776.0, "39385": 27666763776.0, "39390": 27666763776.0, "39395": 27666763776.0, "39400": 27666763776.0, "39405": 27666763776.0, "39410": 27666763776.0, "39415": 27666763776.0, "39420": 27666763776.0, "39425": 27666763776.0, "39430": 27666763776.0, "39435": 27666763776.0, "39440": 27666763776.0, "39445": 27666763776.0, "39450": 27666763776.0, "39455": 27666763776.0, "39460": 27666763776.0, "39465": 27666763776.0, "39470": 27666763776.0, "39475": 27666763776.0, "39480": 27666763776.0, "39485": 27666763776.0, "39490": 27666763776.0, "39495": 27666763776.0, "39500": 27666763776.0, "39505": 27666763776.0, "39510": 27666763776.0, "39515": 27666763776.0, "39520": 27666763776.0, "39525": 27666763776.0, "39530": 27666763776.0, "39535": 27666763776.0, "39540": 27666763776.0, "39545": 27666763776.0, "39550": 27666763776.0, "39555": 27666763776.0, "39560": 27666763776.0, "39565": 27666763776.0, "39570": 27666763776.0, "39575": 27666763776.0, "39580": 27666763776.0, "39585": 27666763776.0, "39590": 27666763776.0, "39595": 27666763776.0, "39600": 27666763776.0, "39605": 27666763776.0, "39610": 27666763776.0, "39615": 27666763776.0, "39620": 27666763776.0, "39625": 27666763776.0, "39630": 27666763776.0, "39635": 27666763776.0, "39640": 27666763776.0, "39645": 27666763776.0, "39650": 27666763776.0, "39655": 27666763776.0, "39660": 27666763776.0, "39665": 27666763776.0, "39670": 27666763776.0, "39675": 27666763776.0, "39680": 27666763776.0, "39685": 27666763776.0, "39690": 27666763776.0, "39695": 27666763776.0, "39700": 27666763776.0, "39705": 27666763776.0, "39710": 27666763776.0, "39715": 27666763776.0, "39720": 27666763776.0, "39725": 27666763776.0, "39730": 27666763776.0, "39735": 27666763776.0, "39740": 27666763776.0, "39745": 27666763776.0, "39750": 27666763776.0, "39755": 27666763776.0, "39760": 27666763776.0, "39765": 27666763776.0, "39770": 27666763776.0, "39775": 27666763776.0, "39780": 27666763776.0, "39785": 27666763776.0, "39790": 27666763776.0, "39795": 27666763776.0, "39800": 27666763776.0, "39805": 27666763776.0, "39810": 27666763776.0, "39815": 27666763776.0, "39820": 27666763776.0, "39825": 27666763776.0, "39830": 27666763776.0, "39835": 27666763776.0, "39840": 27666763776.0, "39845": 27666763776.0, "39850": 27666763776.0, "39855": 27666763776.0, "39860": 27666763776.0, "39865": 27666763776.0, "39870": 27666763776.0, "39875": 27666763776.0, "39880": 27666763776.0, "39885": 27666763776.0, "39890": 27666763776.0, "39895": 27666763776.0, "39900": 27666763776.0, "39905": 27666763776.0, "39910": 27666763776.0, "39915": 27666763776.0, "39920": 27666763776.0, "39925": 27666763776.0, "39930": 27666763776.0, "39935": 27666763776.0, "39940": 27666763776.0, "39945": 27666763776.0, "39950": 27666763776.0, "39955": 27666763776.0, "39960": 27666763776.0, "39965": 27666763776.0, "39970": 27666763776.0, "39975": 27666763776.0, "39980": 27666763776.0, "39985": 27666763776.0, "39990": 27666763776.0, "39995": 27666763776.0, "40000": 27666763776.0, "40005": 27666763776.0, "40010": 27666763776.0, "40015": 27666763776.0, "40020": 27666763776.0, "40025": 27666763776.0, "40030": 27666763776.0, "40035": 27666763776.0, "40040": 27666763776.0, "40045": 27666763776.0, "40050": 27666763776.0, "40055": 27666763776.0, "40060": 27666763776.0, "40065": 27666763776.0, "40070": 27666763776.0, "40075": 27666763776.0, "40080": 27666763776.0, "40085": 27666763776.0, "40090": 27666763776.0, "40095": 27666763776.0, "40100": 27666763776.0, "40105": 27666763776.0, "40110": 27666763776.0, "40115": 27666763776.0, "40120": 27666763776.0, "40125": 27666763776.0, "40130": 27666763776.0, "40135": 27666763776.0, "40140": 27666763776.0, "40145": 27666763776.0, "40150": 27666763776.0, "40155": 27666763776.0, "40160": 27666763776.0, "40165": 27666763776.0, "40170": 27666763776.0, "40175": 27666763776.0, "40180": 27666763776.0, "40185": 27666763776.0, "40190": 27666763776.0, "40195": 27666763776.0, "40200": 27666763776.0, "40205": 27666763776.0, "40210": 27666763776.0, "40215": 27666763776.0, "40220": 27666763776.0, "40225": 27666763776.0, "40230": 27666763776.0, "40235": 27666763776.0, "40240": 27666763776.0, "40245": 27666763776.0, "40250": 27666763776.0, "40255": 27666763776.0, "40260": 27666763776.0, "40265": 27666763776.0, "40270": 27666763776.0, "40275": 27666763776.0, "40280": 27666763776.0, "40285": 27666763776.0, "40290": 27666763776.0, "40295": 27666763776.0, "40300": 27666763776.0, "40305": 27666763776.0, "40310": 27666763776.0, "40315": 27666763776.0, "40320": 27666763776.0, "40325": 27666763776.0, "40330": 27666763776.0, "40335": 27666763776.0, "40340": 27666763776.0, "40345": 27666763776.0, "40350": 27666763776.0, "40355": 27666763776.0, "40360": 27666763776.0, "40365": 27666763776.0, "40370": 27666763776.0, "40375": 27666763776.0, "40380": 27666763776.0, "40385": 27666763776.0, "40390": 27666763776.0, "40395": 27666763776.0, "40400": 27666763776.0, "40405": 27666763776.0, "40410": 27666763776.0, "40415": 27666763776.0, "40420": 27666763776.0, "40425": 27666763776.0, "40430": 27666763776.0, "40435": 27666763776.0, "40440": 27666763776.0, "40445": 27666763776.0, "40450": 27666763776.0, "40455": 27666763776.0, "40460": 27666763776.0, "40465": 27666763776.0, "40470": 27666763776.0, "40475": 27666763776.0, "40480": 27666763776.0, "40485": 27666763776.0, "40490": 27666763776.0, "40495": 27666763776.0, "40500": 27666763776.0, "40505": 27666763776.0, "40510": 27666763776.0, "40515": 27666763776.0, "40520": 27666763776.0, "40525": 27666763776.0, "40530": 27666763776.0, "40535": 27666763776.0, "40540": 27666763776.0, "40545": 27666763776.0, "40550": 27666763776.0, "40555": 27666763776.0, "40560": 27666763776.0, "40565": 27666763776.0, "40570": 27666763776.0, "40575": 27666763776.0, "40580": 27666763776.0, "40585": 27666763776.0, "40590": 27666763776.0, "40595": 27666763776.0, "40600": 27666763776.0, "40605": 27666763776.0, "40610": 27666763776.0, "40615": 27666763776.0, "40620": 27666763776.0, "40625": 27666763776.0, "40630": 27666763776.0, "40635": 27666763776.0, "40640": 27666763776.0, "40645": 27666763776.0, "40650": 27666763776.0, "40655": 27666763776.0, "40660": 27666763776.0, "40665": 27666763776.0, "40670": 27666763776.0, "40675": 27666763776.0, "40680": 27666763776.0, "40685": 27666763776.0, "40690": 27666763776.0, "40695": 27666763776.0, "40700": 27666763776.0, "40705": 27666763776.0, "40710": 27666763776.0, "40715": 27666763776.0, "40720": 27666763776.0, "40725": 27666763776.0, "40730": 27666763776.0, "40735": 27666763776.0, "40740": 27666763776.0, "40745": 27666763776.0, "40750": 27666763776.0, "40755": 27666763776.0, "40760": 27666763776.0, "40765": 27666763776.0, "40770": 27666763776.0, "40775": 27666763776.0, "40780": 27666763776.0, "40785": 27666763776.0, "40790": 27666763776.0, "40795": 27666763776.0, "40800": 27666763776.0, "40805": 27666763776.0, "40810": 27666763776.0, "40815": 27666763776.0, "40820": 27666763776.0, "40825": 27666763776.0, "40830": 27666763776.0, "40835": 27666763776.0, "40840": 27666763776.0, "40845": 27666763776.0, "40850": 27666763776.0, "40855": 27666763776.0, "40860": 27666763776.0, "40865": 27666763776.0, "40870": 27666763776.0, "40875": 27666763776.0, "40880": 27666763776.0, "40885": 27666763776.0, "40890": 27666763776.0, "40895": 27666763776.0, "40900": 27666763776.0, "40905": 27666763776.0, "40910": 27666763776.0, "40915": 27666763776.0, "40920": 27666763776.0, "40925": 27666763776.0, "40930": 27666763776.0, "40935": 27666763776.0, "40940": 27666763776.0, "40945": 27666763776.0, "40950": 27666763776.0, "40955": 27666763776.0, "40960": 27666763776.0, "40965": 27666763776.0, "40970": 27666763776.0, "40975": 27666763776.0, "40980": 27666763776.0, "40985": 27666763776.0, "40990": 27666763776.0, "40995": 27666763776.0, "41000": 27666763776.0, "41005": 27666763776.0, "41010": 27666763776.0, "41015": 27666763776.0, "41020": 27666763776.0, "41025": 27666763776.0, "41030": 27666763776.0, "41035": 27666763776.0, "41040": 27666763776.0, "41045": 27666763776.0, "41050": 27666763776.0, "41055": 27666763776.0, "41060": 27666763776.0, "41065": 27666763776.0, "41070": 27666763776.0, "41075": 27666763776.0, "41080": 27666763776.0, "41085": 27666763776.0, "41090": 27666763776.0, "41095": 27666763776.0, "41100": 27666763776.0, "41105": 27666763776.0, "41110": 27666763776.0, "41115": 27666763776.0, "41120": 27666763776.0, "41125": 27666763776.0, "41130": 27666763776.0, "41135": 27666763776.0, "41140": 27666763776.0, "41145": 27666763776.0, "41150": 27666763776.0, "41155": 27666763776.0, "41160": 27666763776.0, "41165": 27666763776.0, "41170": 27666763776.0, "41175": 27666763776.0, "41180": 27666763776.0, "41185": 27666763776.0, "41190": 27666763776.0, "41195": 27666763776.0, "41200": 27666763776.0, "41205": 27666763776.0, "41210": 27666763776.0, "41215": 27666763776.0, "41220": 27666763776.0, "41225": 27666763776.0, "41230": 27666763776.0, "41235": 27666763776.0, "41240": 27666763776.0, "41245": 27666763776.0, "41250": 27666763776.0, "41255": 27666763776.0, "41260": 27666763776.0, "41265": 27666763776.0, "41270": 27666763776.0, "41275": 27666763776.0, "41280": 27666763776.0, "41285": 27666763776.0, "41290": 27666763776.0, "41295": 27666763776.0, "41300": 27666763776.0, "41305": 27666763776.0, "41310": 27666763776.0, "41315": 27666763776.0, "41320": 27666763776.0, "41325": 27666763776.0, "41330": 27666763776.0, "41335": 27666763776.0, "41340": 27666763776.0, "41345": 27666763776.0, "41350": 27666763776.0, "41355": 27666763776.0, "41360": 27666763776.0, "41365": 27666763776.0, "41370": 27666763776.0, "41375": 27666763776.0, "41380": 27666763776.0, "41385": 27666763776.0, "41390": 27666763776.0, "41395": 27666763776.0, "41400": 27666763776.0, "41405": 27666763776.0, "41410": 27666763776.0, "41415": 27666763776.0, "41420": 27666763776.0, "41425": 27666763776.0, "41430": 27666763776.0, "41435": 27666763776.0, "41440": 27666763776.0, "41445": 27666763776.0, "41450": 27666763776.0, "41455": 27666763776.0, "41460": 27666763776.0, "41465": 27666763776.0, "41470": 27666763776.0, "41475": 27666763776.0, "41480": 27666763776.0, "41485": 27666763776.0, "41490": 27666763776.0, "41495": 27666763776.0, "41500": 27666763776.0, "41505": 27666763776.0, "41510": 27666763776.0, "41515": 27666763776.0, "41520": 27666763776.0, "41525": 27666763776.0, "41530": 27666763776.0, "41535": 27666763776.0, "41540": 27666763776.0, "41545": 27666763776.0, "41550": 27666763776.0, "41555": 27666763776.0, "41560": 27666763776.0, "41565": 27666763776.0, "41570": 27666763776.0, "41575": 27666763776.0, "41580": 27666763776.0, "41585": 27666763776.0, "41590": 27666763776.0, "41595": 27666763776.0, "41600": 27666763776.0, "41605": 27666763776.0, "41610": 27666763776.0, "41615": 27666763776.0, "41620": 27666763776.0, "41625": 27666763776.0, "41630": 27666763776.0, "41635": 27666763776.0, "41640": 27666763776.0, "41645": 27666763776.0, "41650": 27666763776.0, "41655": 27666763776.0, "41660": 27666763776.0, "41665": 27666763776.0, "41670": 27666763776.0, "41675": 27666763776.0, "41680": 27666763776.0, "41685": 27666763776.0, "41690": 27666763776.0, "41695": 27666763776.0, "41700": 27666763776.0, "41705": 27666763776.0, "41710": 27666763776.0, "41715": 27666763776.0, "41720": 27666763776.0, "41725": 27666763776.0, "41730": 27666763776.0, "41735": 27666763776.0, "41740": 27666763776.0, "41745": 27666763776.0, "41750": 27666763776.0, "41755": 27666763776.0, "41760": 27666763776.0, "41765": 27666763776.0, "41770": 27666763776.0, "41775": 27666763776.0, "41780": 27666763776.0, "41785": 27666763776.0, "41790": 27666763776.0, "41795": 27666763776.0, "41800": 27666763776.0, "41805": 27666763776.0, "41810": 27666763776.0, "41815": 27666763776.0, "41820": 27666763776.0, "41825": 27666763776.0, "41830": 27666763776.0, "41835": 27666763776.0, "41840": 27666763776.0, "41845": 27666763776.0, "41850": 27666763776.0, "41855": 27666763776.0, "41860": 27666763776.0, "41865": 27666763776.0, "41870": 27666763776.0, "41875": 27666763776.0, "41880": 27666763776.0, "41885": 27666763776.0, "41890": 27666763776.0, "41895": 27666763776.0, "41900": 27666763776.0, "41905": 27666763776.0, "41910": 27666763776.0, "41915": 27666763776.0, "41920": 27666763776.0, "41925": 27666763776.0, "41930": 27666763776.0, "41935": 27666763776.0, "41940": 27666763776.0, "41945": 27666763776.0, "41950": 27666763776.0, "41955": 27666763776.0, "41960": 27666763776.0, "41965": 27666763776.0, "41970": 27666763776.0, "41975": 27666763776.0, "41980": 27666763776.0, "41985": 27666763776.0, "41990": 27666763776.0, "41995": 27666763776.0, "42000": 27666763776.0, "42005": 27666763776.0, "42010": 27666763776.0, "42015": 27666763776.0, "42020": 27666763776.0, "42025": 27666763776.0, "42030": 27666763776.0, "42035": 27666763776.0, "42040": 27666763776.0, "42045": 27666763776.0, "42050": 27666763776.0, "42055": 27666763776.0, "42060": 27666763776.0, "42065": 27666763776.0, "42070": 27666763776.0, "42075": 27666763776.0, "42080": 27666763776.0, "42085": 27666763776.0, "42090": 27666763776.0, "42095": 27666763776.0, "42100": 27666763776.0, "42105": 27666763776.0, "42110": 27666763776.0, "42115": 27666763776.0, "42120": 27666763776.0, "42125": 27666763776.0, "42130": 27666763776.0, "42135": 27666763776.0, "42140": 27666763776.0, "42145": 27666763776.0, "42150": 27666763776.0, "42155": 27666763776.0, "42160": 27666763776.0, "42165": 27666763776.0, "42170": 27666763776.0, "42175": 27666763776.0, "42180": 27666763776.0, "42185": 27666763776.0, "42190": 27666763776.0, "42195": 27666763776.0, "42200": 27666763776.0, "42205": 27666763776.0, "42210": 27666763776.0, "42215": 27666763776.0, "42220": 27666763776.0, "42225": 27666763776.0, "42230": 27666763776.0, "42235": 27666763776.0, "42240": 27666763776.0, "42245": 27666763776.0, "42250": 27666763776.0, "42255": 27666763776.0, "42260": 27666763776.0, "42265": 27666763776.0, "42270": 27666763776.0, "42275": 27666763776.0, "42280": 27666763776.0, "42285": 27666763776.0, "42290": 27666763776.0, "42295": 27666763776.0, "42300": 27666763776.0, "42305": 27666763776.0, "42310": 27666763776.0, "42315": 27666763776.0, "42320": 27666763776.0, "42325": 27666763776.0, "42330": 27666763776.0, "42335": 27666763776.0, "42340": 27666763776.0, "42345": 27666763776.0, "42350": 27666763776.0, "42355": 27666763776.0, "42360": 27666763776.0, "42365": 27666763776.0, "42370": 27666763776.0, "42375": 27666763776.0, "42380": 27666763776.0, "42385": 27666763776.0, "42390": 27666763776.0, "42395": 27666763776.0, "42400": 27666763776.0, "42405": 27666763776.0, "42410": 27666763776.0, "42415": 27666763776.0, "42420": 27666763776.0, "42425": 27666763776.0, "42430": 27666763776.0, "42435": 27666763776.0, "42440": 27666763776.0, "42445": 27666763776.0, "42450": 27666763776.0, "42455": 27666763776.0, "42460": 27666763776.0, "42465": 27666763776.0, "42470": 27666763776.0, "42475": 27666763776.0, "42480": 27666763776.0, "42485": 27666763776.0, "42490": 27666763776.0, "42495": 27666763776.0, "42500": 27666763776.0, "42505": 27666763776.0, "42510": 27666763776.0, "42515": 27666763776.0, "42520": 27666763776.0, "42525": 27666763776.0, "42530": 27666763776.0, "42535": 27666763776.0, "42540": 27666763776.0, "42545": 27666763776.0, "42550": 27666763776.0, "42555": 27666763776.0, "42560": 27666763776.0, "42565": 27666763776.0, "42570": 27666763776.0, "42575": 27666763776.0, "42580": 27666763776.0, "42585": 27666763776.0, "42590": 27666763776.0, "42595": 27666763776.0, "42600": 27666763776.0, "42605": 27666763776.0, "42610": 27666763776.0, "42615": 27666763776.0, "42620": 27666763776.0, "42625": 27666763776.0, "42630": 27666763776.0, "42635": 27666763776.0, "42640": 27666763776.0, "42645": 27666763776.0, "42650": 27666763776.0, "42655": 27666763776.0, "42660": 27666763776.0, "42665": 27666763776.0, "42670": 27666763776.0, "42675": 27666763776.0, "42680": 27666763776.0, "42685": 27666763776.0, "42690": 27666763776.0, "42695": 27666763776.0, "42700": 27666763776.0, "42705": 27666763776.0, "42710": 27666763776.0, "42715": 27666763776.0, "42720": 27666763776.0, "42725": 27666763776.0, "42730": 27666763776.0, "42735": 27666763776.0, "42740": 27666763776.0, "42745": 27666763776.0, "42750": 27666763776.0, "42755": 27666763776.0, "42760": 27666763776.0, "42765": 27666763776.0, "42770": 27666763776.0, "42775": 27666763776.0, "42780": 27666763776.0, "42785": 27666763776.0, "42790": 27666763776.0, "42795": 27666763776.0, "42800": 27666763776.0, "42805": 27666763776.0, "42810": 27666763776.0, "42815": 27666763776.0, "42820": 27666763776.0, "42825": 27666763776.0, "42830": 27666763776.0, "42835": 27666763776.0, "42840": 27666763776.0, "42845": 27666763776.0, "42850": 27666763776.0, "42855": 27666763776.0, "42860": 27666763776.0, "42865": 27666763776.0, "42870": 27666763776.0, "42875": 27666763776.0, "42880": 27666763776.0, "42885": 27666763776.0, "42890": 27666763776.0, "42895": 27666763776.0, "42900": 27666763776.0, "42905": 27666763776.0, "42910": 27666763776.0, "42915": 27666763776.0, "42920": 27666763776.0, "42925": 27666763776.0, "42930": 27666763776.0, "42935": 27666763776.0, "42940": 27666763776.0, "42945": 27666763776.0, "42950": 27666763776.0, "42955": 27666763776.0, "42960": 27666763776.0, "42965": 27666763776.0, "42970": 27666763776.0, "42975": 27666763776.0, "42980": 27666763776.0, "42985": 27666763776.0, "42990": 27666763776.0, "42995": 27666763776.0, "43000": 27666763776.0, "43005": 27666763776.0, "43010": 27666763776.0, "43015": 27666763776.0, "43020": 27666763776.0, "43025": 27666763776.0, "43030": 27666763776.0, "43035": 27666763776.0, "43040": 27666763776.0, "43045": 27666763776.0, "43050": 27666763776.0, "43055": 27666763776.0, "43060": 27666763776.0, "43065": 27666763776.0, "43070": 27666763776.0, "43075": 27666763776.0, "43080": 27666763776.0, "43085": 27666763776.0, "43090": 27666763776.0, "43095": 27666763776.0, "43100": 27666763776.0, "43105": 27666763776.0, "43110": 27666763776.0, "43115": 27666763776.0, "43120": 27666763776.0, "43125": 27666763776.0, "43130": 27666763776.0, "43135": 27666763776.0, "43140": 27666763776.0, "43145": 27666763776.0, "43150": 27666763776.0, "43155": 27666763776.0, "43160": 27666763776.0, "43165": 27666763776.0, "43170": 27666763776.0, "43175": 27666763776.0, "43180": 27666763776.0, "43185": 27666763776.0, "43190": 27666763776.0, "43195": 27666763776.0, "43200": 27666763776.0, "43205": 27666763776.0, "43210": 27666763776.0, "43215": 27666763776.0, "43220": 27666763776.0, "43225": 27666763776.0, "43230": 27666763776.0, "43235": 27666763776.0, "43240": 27666763776.0, "43245": 27666763776.0, "43250": 27666763776.0, "43255": 27666763776.0, "43260": 27666763776.0, "43265": 27666763776.0, "43270": 27666763776.0, "43275": 27666763776.0, "43280": 27666763776.0, "43285": 27666763776.0, "43290": 27666763776.0, "43295": 27666763776.0, "43300": 27666763776.0, "43305": 27666763776.0, "43310": 27666763776.0, "43315": 27666763776.0, "43320": 27666763776.0, "43325": 27666763776.0, "43330": 27666763776.0, "43335": 27666763776.0, "43340": 27666763776.0, "43345": 27666763776.0, "43350": 27666763776.0, "43355": 27666763776.0, "43360": 27666763776.0, "43365": 27666763776.0, "43370": 27666763776.0, "43375": 27666763776.0, "43380": 27666763776.0, "43385": 27666763776.0, "43390": 27666763776.0, "43395": 27666763776.0, "43400": 27666763776.0, "43405": 27666763776.0, "43410": 27666763776.0, "43415": 27666763776.0, "43420": 27666763776.0, "43425": 27666763776.0, "43430": 27666763776.0, "43435": 27666763776.0, "43440": 27666763776.0, "43445": 27666763776.0, "43450": 27666763776.0, "43455": 27666763776.0, "43460": 27666763776.0, "43465": 27666763776.0, "43470": 27666763776.0, "43475": 27666763776.0, "43480": 27666763776.0, "43485": 27666763776.0, "43490": 27666763776.0, "43495": 27666763776.0, "43500": 27666763776.0, "43505": 27666763776.0, "43510": 27666763776.0, "43515": 27666763776.0, "43520": 27666763776.0, "43525": 27666763776.0, "43530": 27666763776.0, "43535": 27666763776.0, "43540": 27666763776.0, "43545": 27666763776.0, "43550": 27666763776.0, "43555": 27666763776.0, "43560": 27666763776.0, "43565": 27666763776.0, "43570": 27666763776.0, "43575": 27666763776.0, "43580": 27666763776.0, "43585": 27666763776.0, "43590": 27666763776.0, "43595": 27666763776.0, "43600": 27666763776.0, "43605": 27666763776.0, "43610": 27666763776.0, "43615": 27666763776.0, "43620": 27666763776.0, "43625": 27666763776.0, "43630": 27666763776.0, "43635": 27666763776.0, "43640": 27666763776.0, "43645": 27666763776.0, "43650": 27666763776.0, "43655": 27666763776.0, "43660": 27666763776.0, "43665": 27666763776.0, "43670": 27666763776.0, "43675": 27666763776.0, "43680": 27666763776.0, "43685": 27666763776.0, "43690": 27666763776.0, "43695": 27666763776.0, "43700": 27666763776.0, "43705": 27666763776.0, "43710": 27666763776.0, "43715": 27666763776.0, "43720": 27666763776.0, "43725": 27666763776.0, "43730": 27666763776.0, "43735": 27666763776.0, "43740": 27666763776.0, "43745": 27666763776.0, "43750": 27666763776.0, "43755": 27666763776.0, "43760": 27666763776.0, "43765": 27666763776.0, "43770": 27666763776.0, "43775": 27666763776.0, "43780": 27666763776.0, "43785": 27666763776.0, "43790": 27666763776.0, "43795": 27666763776.0, "43800": 27666763776.0, "43805": 27666763776.0, "43810": 27666763776.0, "43815": 27666763776.0, "43820": 27666763776.0, "43825": 27666763776.0, "43830": 27666763776.0, "43835": 27666763776.0, "43840": 27666763776.0, "43845": 27666763776.0, "43850": 27666763776.0, "43855": 27666763776.0, "43860": 27666763776.0, "43865": 27666763776.0, "43870": 27666763776.0, "43875": 27666763776.0, "43880": 27666763776.0, "43885": 27666763776.0, "43890": 27666763776.0, "43895": 27666763776.0, "43900": 27666763776.0, "43905": 27666763776.0, "43910": 27666763776.0, "43915": 27666763776.0, "43920": 27666763776.0, "43925": 27666763776.0, "43930": 27666763776.0, "43935": 27666763776.0, "43940": 27666763776.0, "43945": 27666763776.0, "43950": 27666763776.0, "43955": 27666763776.0, "43960": 27666763776.0, "43965": 27666763776.0, "43970": 27666763776.0, "43975": 27666763776.0, "43980": 27666763776.0, "43985": 27666763776.0, "43990": 27666763776.0, "43995": 27666763776.0, "44000": 27666763776.0, "44005": 27666763776.0, "44010": 27666763776.0, "44015": 27666763776.0, "44020": 27666763776.0, "44025": 27666763776.0, "44030": 27666763776.0, "44035": 27666763776.0, "44040": 27666763776.0, "44045": 27666763776.0, "44050": 27666763776.0, "44055": 27666763776.0, "44060": 27666763776.0, "44065": 27666763776.0, "44070": 27666763776.0, "44075": 27666763776.0, "44080": 27666763776.0, "44085": 27666763776.0, "44090": 27666763776.0, "44095": 27666763776.0, "44100": 27666763776.0, "44105": 27666763776.0, "44110": 27666763776.0, "44115": 27666763776.0, "44120": 27666763776.0, "44125": 27666763776.0, "44130": 27666763776.0, "44135": 27666763776.0, "44140": 27666763776.0, "44145": 27666763776.0, "44150": 27666763776.0, "44155": 27666763776.0, "44160": 27666763776.0, "44165": 27666763776.0, "44170": 27666763776.0, "44175": 27666763776.0, "44180": 27666763776.0, "44185": 27666763776.0, "44190": 27666763776.0, "44195": 27666763776.0, "44200": 27666763776.0, "44205": 27666763776.0, "44210": 27666763776.0, "44215": 27666763776.0, "44220": 27666763776.0, "44225": 27666763776.0, "44230": 27666763776.0, "44235": 27666763776.0, "44240": 27666763776.0, "44245": 27666763776.0, "44250": 27666763776.0, "44255": 27666763776.0, "44260": 27666763776.0, "44265": 27666763776.0, "44270": 27666763776.0, "44275": 27666763776.0, "44280": 27666763776.0, "44285": 27666763776.0, "44290": 27666763776.0, "44295": 27666763776.0, "44300": 27666763776.0, "44305": 27666763776.0, "44310": 27666763776.0, "44315": 27666763776.0, "44320": 27666763776.0, "44325": 27666763776.0, "44330": 27666763776.0, "44335": 27666763776.0, "44340": 27666763776.0, "44345": 27666763776.0, "44350": 27666763776.0, "44355": 27666763776.0, "44360": 27666763776.0, "44365": 27666763776.0, "44370": 27666763776.0, "44375": 27666763776.0, "44380": 27666763776.0, "44385": 27666763776.0, "44390": 27666763776.0, "44395": 27666763776.0, "44400": 27666763776.0, "44405": 27666763776.0, "44410": 27666763776.0, "44415": 27666763776.0, "44420": 27666763776.0, "44425": 27666763776.0, "44430": 27666763776.0, "44435": 27666763776.0, "44440": 27666763776.0, "44445": 27666763776.0, "44450": 27666763776.0, "44455": 27666763776.0, "44460": 27666763776.0, "44465": 27666763776.0, "44470": 27666763776.0, "44475": 27666763776.0, "44480": 27666763776.0, "44485": 27666763776.0, "44490": 27666763776.0, "44495": 27666763776.0, "44500": 27666763776.0, "44505": 27666763776.0, "44510": 27666763776.0, "44515": 27666763776.0, "44520": 27666763776.0, "44525": 27666763776.0, "44530": 27666763776.0, "44535": 27666763776.0, "44540": 27666763776.0, "44545": 27666763776.0, "44550": 27666763776.0, "44555": 27666763776.0, "44560": 27666763776.0, "44565": 27666763776.0, "44570": 27666763776.0, "44575": 27666763776.0, "44580": 27666763776.0, "44585": 27666763776.0, "44590": 27666763776.0, "44595": 27666763776.0, "44600": 27666763776.0, "44605": 27666763776.0, "44610": 27666763776.0, "44615": 27666763776.0, "44620": 27666763776.0, "44625": 27666763776.0, "44630": 27666763776.0, "44635": 27666763776.0, "44640": 27666763776.0, "44645": 27666763776.0, "44650": 27666763776.0, "44655": 27666763776.0, "44660": 27666763776.0, "44665": 27666763776.0, "44670": 27666763776.0, "44675": 27666763776.0, "44680": 27666763776.0, "44685": 27666763776.0, "44690": 27666763776.0, "44695": 27666763776.0, "44700": 27666763776.0, "44705": 27666763776.0, "44710": 27666763776.0, "44715": 27666763776.0, "44720": 27666763776.0, "44725": 27666763776.0, "44730": 27666763776.0, "44735": 27666763776.0, "44740": 27666763776.0, "44745": 27666763776.0, "44750": 27666763776.0, "44755": 27666763776.0, "44760": 27666763776.0, "44765": 27666763776.0, "44770": 27666763776.0, "44775": 27666763776.0, "44780": 27666763776.0, "44785": 27666763776.0, "44790": 27666763776.0, "44795": 27666763776.0, "44800": 27666763776.0, "44805": 27666763776.0, "44810": 27666763776.0, "44815": 27666763776.0, "44820": 27666763776.0, "44825": 27666763776.0, "44830": 27666763776.0, "44835": 27666763776.0, "44840": 27666763776.0, "44845": 27666763776.0, "44850": 27666763776.0, "44855": 27666763776.0, "44860": 27666763776.0, "44865": 27666763776.0, "44870": 27666763776.0, "44875": 27666763776.0, "44880": 27666763776.0, "44885": 27666763776.0, "44890": 27666763776.0, "44895": 27666763776.0, "44900": 27666763776.0, "44905": 27666763776.0, "44910": 27666763776.0, "44915": 27666763776.0, "44920": 27666763776.0, "44925": 27666763776.0, "44930": 27666763776.0, "44935": 27666763776.0, "44940": 27666763776.0, "44945": 27666763776.0, "44950": 27666763776.0, "44955": 27666763776.0, "44960": 27666763776.0, "44965": 27666763776.0, "44970": 27666763776.0, "44975": 27666763776.0, "44980": 27666763776.0, "44985": 27666763776.0, "44990": 27666763776.0, "44995": 27666763776.0, "45000": 27666763776.0, "45005": 27666763776.0, "45010": 27666763776.0, "45015": 27666763776.0, "45020": 27666763776.0, "45025": 27666763776.0, "45030": 27666763776.0, "45035": 27666763776.0, "45040": 27666763776.0, "45045": 27666763776.0, "45050": 27666763776.0, "45055": 27666763776.0, "45060": 27666763776.0, "45065": 27666763776.0, "45070": 27666763776.0, "45075": 27666763776.0, "45080": 27666763776.0, "45085": 27666763776.0, "45090": 27666763776.0, "45095": 27666763776.0, "45100": 27666763776.0, "45105": 27666763776.0, "45110": 27666763776.0, "45115": 27666763776.0, "45120": 27666763776.0, "45125": 27666763776.0, "45130": 27666763776.0, "45135": 27666763776.0, "45140": 27666763776.0, "45145": 27666763776.0, "45150": 27666763776.0, "45155": 27666763776.0, "45160": 27666763776.0, "45165": 27666763776.0, "45170": 27666763776.0, "45175": 27666763776.0, "45180": 27666763776.0, "45185": 27666763776.0, "45190": 27666763776.0, "45195": 27666763776.0, "45200": 27666763776.0, "45205": 27666763776.0, "45210": 27666763776.0, "45215": 27666763776.0, "45220": 27666763776.0, "45225": 27666763776.0, "45230": 27666763776.0, "45235": 27666763776.0, "45240": 27666763776.0, "45245": 27666763776.0, "45250": 27666763776.0, "45255": 27666763776.0, "45260": 27666763776.0, "45265": 27666763776.0, "45270": 27666763776.0, "45275": 27666763776.0, "45280": 27666763776.0, "45285": 27666763776.0, "45290": 27666763776.0, "45295": 27666763776.0, "45300": 27666763776.0, "45305": 27666171904.0, "45310": 27666171904.0, "45315": 27666171904.0, "45320": 27666171904.0, "45325": 27666171904.0, "45330": 27666171904.0, "45335": 27666171904.0, "45340": 27666171904.0, "45345": 27666171904.0, "45350": 27666171904.0, "45355": 27666171904.0, "45360": 27666171904.0, "45365": 27666171904.0, "45370": 27666171904.0, "45375": 27666171904.0, "45380": 27666173952.0, "45385": 27666763776.0, "45390": 27666763776.0, "45395": 27666763776.0, "45400": 27666763776.0, "45405": 27666763776.0, "45410": 27666763776.0, "45415": 27666763776.0, "45420": 27666763776.0, "45425": 27666763776.0, "45430": 27666763776.0, "45435": 27666763776.0, "45440": 27666763776.0, "45445": 27666763776.0, "45450": 27666763776.0, "45455": 27666763776.0, "45460": 27666763776.0, "45465": 27666763776.0, "45470": 27666763776.0, "45475": 27666763776.0, "45480": 27666763776.0, "45485": 27666763776.0, "45490": 27666763776.0, "45495": 27666763776.0, "45500": 27666763776.0, "45505": 27666763776.0, "45510": 27666763776.0, "45515": 27666763776.0, "45520": 27666763776.0, "45525": 27666763776.0, "45530": 27666763776.0, "45535": 27666763776.0, "45540": 27666763776.0, "45545": 27666763776.0, "45550": 27666763776.0, "45555": 27666763776.0, "45560": 27666763776.0, "45565": 27666763776.0, "45570": 27666763776.0, "45575": 27666763776.0, "45580": 27666763776.0, "45585": 27666763776.0, "45590": 27666763776.0, "45595": 27666763776.0, "45600": 27666763776.0, "45605": 27666763776.0, "45610": 27666763776.0, "45615": 27666763776.0, "45620": 27666763776.0, "45625": 27666763776.0, "45630": 27666763776.0, "45635": 27666763776.0, "45640": 27666763776.0, "45645": 27666763776.0, "45650": 27666763776.0, "45655": 27666763776.0, "45660": 27666763776.0, "45665": 27666763776.0, "45670": 27666763776.0, "45675": 27666763776.0, "45680": 27666763776.0, "45685": 27666763776.0, "45690": 27666763776.0, "45695": 27666763776.0, "45700": 27666763776.0, "45705": 27666763776.0, "45710": 27666763776.0, "45715": 27666763776.0, "45720": 27666763776.0, "45725": 27666763776.0, "45730": 27666763776.0, "45735": 27666763776.0, "45740": 27666763776.0, "45745": 27666763776.0, "45750": 27666763776.0, "45755": 27666763776.0, "45760": 27666763776.0, "45765": 27666763776.0, "45770": 27666763776.0, "45775": 27666763776.0, "45780": 27666763776.0, "45785": 27666763776.0, "45790": 27666763776.0, "45795": 27666763776.0, "45800": 27666763776.0, "45805": 27666763776.0, "45810": 27666763776.0, "45815": 27666763776.0, "45820": 27666763776.0, "45825": 27666763776.0, "45830": 27666763776.0, "45835": 27666763776.0, "45840": 27666763776.0, "45845": 27666763776.0, "45850": 27666763776.0, "45855": 27666763776.0, "45860": 27666763776.0, "45865": 27666763776.0, "45870": 27666763776.0, "45875": 27666763776.0, "45880": 27666763776.0, "45885": 27666763776.0, "45890": 27666763776.0, "45895": 27666763776.0, "45900": 27666763776.0, "45905": 27666763776.0, "45910": 27666763776.0, "45915": 27666763776.0, "45920": 27666763776.0, "45925": 27666763776.0, "45930": 27666763776.0, "45935": 27666763776.0, "45940": 27666763776.0, "45945": 27666763776.0, "45950": 27666763776.0, "45955": 27666763776.0, "45960": 27666763776.0, "45965": 27666763776.0, "45970": 27666763776.0, "45975": 27666763776.0, "45980": 27666763776.0, "45985": 27666763776.0, "45990": 27666763776.0, "45995": 27666763776.0, "46000": 27666763776.0, "46005": 27666763776.0, "46010": 27666763776.0, "46015": 27666763776.0, "46020": 27666763776.0, "46025": 27666763776.0, "46030": 27666763776.0, "46035": 27666763776.0, "46040": 27666763776.0, "46045": 27666763776.0, "46050": 27666763776.0, "46055": 27666763776.0, "46060": 27666763776.0, "46065": 27666763776.0, "46070": 27666763776.0, "46075": 27666763776.0, "46080": 27666763776.0, "46085": 27666763776.0, "46090": 27666763776.0, "46095": 27666763776.0, "46100": 27666763776.0, "46105": 27666763776.0, "46110": 27666763776.0, "46115": 27666763776.0, "46120": 27666763776.0, "46125": 27666763776.0, "46130": 27666763776.0, "46135": 27666763776.0, "46140": 27666763776.0, "46145": 27666763776.0, "46150": 27666763776.0, "46155": 27666763776.0, "46160": 27666763776.0, "46165": 27666763776.0, "46170": 27666763776.0, "46175": 27666763776.0, "46180": 27666763776.0, "46185": 27666763776.0, "46190": 27666763776.0, "46195": 27666763776.0, "46200": 27666763776.0, "46205": 27666763776.0, "46210": 27666763776.0, "46215": 27666763776.0, "46220": 27666763776.0, "46225": 27666763776.0, "46230": 27666763776.0, "46235": 27666763776.0, "46240": 27666763776.0, "46245": 27666763776.0, "46250": 27666763776.0, "46255": 27666763776.0, "46260": 27666763776.0, "46265": 27666763776.0, "46270": 27666763776.0, "46275": 27666763776.0, "46280": 27666763776.0, "46285": 27666763776.0, "46290": 27666763776.0, "46295": 27666763776.0, "46300": 27666763776.0, "46305": 27666763776.0, "46310": 27666763776.0, "46315": 27666763776.0, "46320": 27666763776.0, "46325": 27666763776.0, "46330": 27666763776.0, "46335": 27666763776.0, "46340": 27666763776.0, "46345": 27666763776.0, "46350": 27666763776.0, "46355": 27666763776.0, "46360": 27666763776.0, "46365": 27666763776.0, "46370": 27666763776.0, "46375": 27666763776.0, "46380": 27666763776.0, "46385": 27666763776.0, "46390": 27666763776.0, "46395": 27666763776.0, "46400": 27666763776.0, "46405": 27666763776.0, "46410": 27666763776.0, "46415": 27666763776.0, "46420": 27666763776.0, "46425": 27666763776.0, "46430": 27666763776.0, "46435": 27666763776.0, "46440": 27666763776.0, "46445": 27666763776.0, "46450": 27666763776.0, "46455": 27666763776.0, "46460": 27666763776.0, "46465": 27666763776.0, "46470": 27666763776.0, "46475": 27666763776.0, "46480": 27666763776.0, "46485": 27666763776.0, "46490": 27666763776.0, "46495": 27666763776.0, "46500": 27666763776.0, "46505": 27666763776.0, "46510": 27666763776.0, "46515": 27666763776.0, "46520": 27666763776.0, "46525": 27666763776.0, "46530": 27666763776.0, "46535": 27666763776.0, "46540": 27666763776.0, "46545": 27666763776.0, "46550": 27666763776.0, "46555": 27666763776.0, "46560": 27666763776.0, "46565": 27666763776.0, "46570": 27666763776.0, "46575": 27666763776.0, "46580": 27666763776.0, "46585": 27666763776.0, "46590": 27666763776.0, "46595": 27666763776.0, "46600": 27666763776.0, "46605": 27666763776.0, "46610": 27666763776.0, "46615": 27666763776.0, "46620": 27666763776.0, "46625": 27666763776.0, "46630": 27666763776.0, "46635": 27666763776.0, "46640": 27666763776.0, "46645": 27666763776.0, "46650": 27666763776.0, "46655": 27666763776.0, "46660": 27666763776.0, "46665": 27666763776.0, "46670": 27666763776.0, "46675": 27666763776.0, "46680": 27666763776.0, "46685": 27666763776.0, "46690": 27666763776.0, "46695": 27666763776.0, "46700": 27666763776.0, "46705": 27666763776.0, "46710": 27666763776.0, "46715": 27666763776.0, "46720": 27666763776.0, "46725": 27666763776.0, "46730": 27666763776.0, "46735": 27666763776.0, "46740": 27666763776.0, "46745": 27666763776.0, "46750": 27666763776.0, "46755": 27666763776.0, "46760": 27666763776.0, "46765": 27666763776.0, "46770": 27666763776.0, "46775": 27666763776.0, "46780": 27666763776.0, "46785": 27666763776.0, "46790": 27666763776.0, "46795": 27666763776.0, "46800": 27666763776.0, "46805": 27666763776.0, "46810": 27666763776.0, "46815": 27666763776.0, "46820": 27666763776.0, "46825": 27666763776.0, "46830": 27666763776.0, "46835": 27666763776.0, "46840": 27666763776.0, "46845": 27666763776.0, "46850": 27666763776.0, "46855": 27666763776.0, "46860": 27666763776.0, "46865": 27666763776.0, "46870": 27666763776.0, "46875": 27666763776.0, "46880": 27666763776.0, "46885": 27666763776.0, "46890": 27666763776.0, "46895": 27666763776.0, "46900": 27666763776.0, "46905": 27666763776.0, "46910": 27666763776.0, "46915": 27666763776.0, "46920": 27666763776.0, "46925": 27666763776.0, "46930": 27666763776.0, "46935": 27666763776.0, "46940": 27666763776.0, "46945": 27666763776.0, "46950": 27666763776.0, "46955": 27666763776.0, "46960": 27666763776.0, "46965": 27666763776.0, "46970": 27666763776.0, "46975": 27666763776.0, "46980": 27666763776.0, "46985": 27666763776.0, "46990": 27666763776.0, "46995": 27666763776.0, "47000": 27666763776.0, "47005": 27666763776.0, "47010": 27666763776.0, "47015": 27666763776.0, "47020": 27666763776.0, "47025": 27666763776.0, "47030": 27666763776.0, "47035": 27666763776.0, "47040": 27666763776.0, "47045": 27666763776.0, "47050": 27666763776.0, "47055": 27666763776.0, "47060": 27666763776.0, "47065": 27666763776.0, "47070": 27666763776.0, "47075": 27666763776.0, "47080": 27666763776.0, "47085": 27666763776.0, "47090": 27666763776.0, "47095": 27666763776.0, "47100": 27666763776.0, "47105": 27666763776.0, "47110": 27666763776.0, "47115": 27666763776.0, "47120": 27666763776.0, "47125": 27666763776.0, "47130": 27666763776.0, "47135": 27666763776.0, "47140": 27666763776.0, "47145": 27666763776.0, "47150": 27666763776.0, "47155": 27666763776.0, "47160": 27666763776.0, "47165": 27666763776.0, "47170": 27666763776.0, "47175": 27666763776.0, "47180": 27666763776.0, "47185": 27666763776.0, "47190": 27666763776.0, "47195": 27666763776.0, "47200": 27666763776.0, "47205": 27666763776.0, "47210": 27666763776.0, "47215": 27666763776.0, "47220": 27666763776.0, "47225": 27666763776.0, "47230": 27666763776.0, "47235": 27666763776.0, "47240": 27666763776.0, "47245": 27666763776.0, "47250": 27666763776.0, "47255": 27666763776.0, "47260": 27666763776.0, "47265": 27666763776.0, "47270": 27666763776.0, "47275": 27666763776.0, "47280": 27666763776.0, "47285": 27666763776.0, "47290": 27666763776.0, "47295": 27666763776.0, "47300": 27666763776.0, "47305": 27666763776.0, "47310": 27666763776.0, "47315": 27666763776.0, "47320": 27666763776.0, "47325": 27666763776.0, "47330": 27666763776.0, "47335": 27666763776.0, "47340": 27666763776.0, "47345": 27666763776.0, "47350": 27666763776.0, "47355": 27666763776.0, "47360": 27666763776.0, "47365": 27666763776.0, "47370": 27666763776.0, "47375": 27666763776.0, "47380": 27666763776.0, "47385": 27666763776.0, "47390": 27666763776.0, "47395": 27666763776.0, "47400": 27666763776.0, "47405": 27666763776.0, "47410": 27666763776.0, "47415": 27666763776.0, "47420": 27666763776.0, "47425": 27666763776.0, "47430": 27666763776.0, "47435": 27666763776.0, "47440": 27666763776.0, "47445": 27666763776.0, "47450": 27666763776.0, "47455": 27666763776.0, "47460": 27666763776.0, "47465": 27666763776.0, "47470": 27666763776.0, "47475": 27666763776.0, "47480": 27666763776.0, "47485": 27666763776.0, "47490": 27666763776.0, "47495": 27666763776.0, "47500": 27666763776.0, "47505": 27666763776.0, "47510": 27666763776.0, "47515": 27666763776.0, "47520": 27666763776.0, "47525": 27666763776.0, "47530": 27666763776.0, "47535": 27666763776.0, "47540": 27666763776.0, "47545": 27666763776.0, "47550": 27666763776.0, "47555": 27666763776.0, "47560": 27666763776.0, "47565": 27666763776.0, "47570": 27666763776.0, "47575": 27666763776.0, "47580": 27666763776.0, "47585": 27666763776.0, "47590": 27666763776.0, "47595": 27666763776.0, "47600": 27666763776.0, "47605": 27666763776.0, "47610": 27666763776.0, "47615": 27666763776.0, "47620": 27666763776.0, "47625": 27666763776.0, "47630": 27666763776.0, "47635": 27666763776.0, "47640": 27666763776.0, "47645": 27666763776.0, "47650": 27666763776.0, "47655": 27666763776.0, "47660": 27666763776.0, "47665": 27666763776.0, "47670": 27666763776.0, "47675": 27666763776.0, "47680": 27666763776.0, "47685": 27666763776.0, "47690": 27666763776.0, "47695": 27666763776.0, "47700": 27666763776.0, "47705": 27666763776.0, "47710": 27666763776.0, "47715": 27666763776.0, "47720": 27666763776.0, "47725": 27666763776.0, "47730": 27666763776.0, "47735": 27666763776.0, "47740": 27666763776.0, "47745": 27666763776.0, "47750": 27666763776.0, "47755": 27666763776.0, "47760": 27666763776.0, "47765": 27666763776.0, "47770": 27666763776.0, "47775": 27666763776.0, "47780": 27666763776.0, "47785": 27666763776.0, "47790": 27666763776.0, "47795": 27666763776.0, "47800": 27666763776.0, "47805": 27666763776.0, "47810": 27666763776.0, "47815": 27666763776.0, "47820": 27666763776.0, "47825": 27666763776.0, "47830": 27666763776.0, "47835": 27666763776.0, "47840": 27666763776.0, "47845": 27666763776.0, "47850": 27666763776.0, "47855": 27666763776.0, "47860": 27666763776.0, "47865": 27666763776.0, "47870": 27666763776.0, "47875": 27666763776.0, "47880": 27666763776.0, "47885": 27666763776.0, "47890": 27666763776.0, "47895": 27666763776.0, "47900": 27666763776.0, "47905": 27666763776.0, "47910": 27666763776.0, "47915": 27666763776.0, "47920": 27666763776.0, "47925": 27666763776.0, "47930": 27666763776.0, "47935": 27666763776.0, "47940": 27666763776.0, "47945": 27666763776.0, "47950": 27666763776.0, "47955": 27666763776.0, "47960": 27666763776.0, "47965": 27666763776.0, "47970": 27666763776.0, "47975": 27666763776.0, "47980": 27666763776.0, "47985": 27666763776.0, "47990": 27666763776.0, "47995": 27666763776.0, "48000": 27666763776.0, "48005": 27666763776.0, "48010": 27666763776.0, "48015": 27666763776.0, "48020": 27666763776.0, "48025": 27666763776.0, "48030": 27666763776.0, "48035": 27666763776.0, "48040": 27666763776.0, "48045": 27666763776.0, "48050": 27666763776.0, "48055": 27666763776.0, "48060": 27666763776.0, "48065": 27666763776.0, "48070": 27666763776.0, "48075": 27666763776.0, "48080": 27666763776.0, "48085": 27666763776.0, "48090": 27666763776.0, "48095": 27666763776.0, "48100": 27666763776.0, "48105": 27666763776.0, "48110": 27666763776.0, "48115": 27666763776.0, "48120": 27666763776.0, "48125": 27666763776.0, "48130": 27666763776.0, "48135": 27666763776.0, "48140": 27666763776.0, "48145": 27666763776.0, "48150": 27666763776.0, "48155": 27666763776.0, "48160": 27666763776.0, "48165": 27666763776.0, "48170": 27666763776.0, "48175": 27666763776.0, "48180": 27666763776.0, "48185": 27666763776.0, "48190": 27666763776.0, "48195": 27666763776.0, "48200": 27666763776.0, "48205": 27666763776.0, "48210": 27666763776.0, "48215": 27666763776.0, "48220": 27666763776.0, "48225": 27666763776.0, "48230": 27666763776.0, "48235": 27666763776.0, "48240": 27666763776.0, "48245": 27666763776.0, "48250": 27666763776.0, "48255": 27666763776.0, "48260": 27666763776.0, "48265": 27666763776.0, "48270": 27666763776.0, "48275": 27666763776.0, "48280": 27666763776.0, "48285": 27666763776.0, "48290": 27666763776.0, "48295": 27666763776.0, "48300": 27666763776.0, "48305": 27666763776.0, "48310": 27666763776.0, "48315": 27666763776.0, "48320": 27666763776.0, "48325": 27666763776.0, "48330": 27666763776.0, "48335": 27666763776.0, "48340": 27666763776.0, "48345": 27666763776.0, "48350": 27666763776.0, "48355": 27666763776.0, "48360": 27666763776.0, "48365": 27666763776.0, "48370": 27666763776.0, "48375": 27666763776.0, "48380": 27666763776.0, "48385": 27666763776.0, "48390": 27666763776.0, "48395": 27666763776.0, "48400": 27666763776.0, "48405": 27666763776.0, "48410": 27666763776.0, "48415": 27666763776.0, "48420": 27666763776.0, "48425": 27666763776.0, "48430": 27666763776.0, "48435": 27666763776.0, "48440": 27666763776.0, "48445": 27666763776.0, "48450": 27666763776.0, "48455": 27666763776.0, "48460": 27666763776.0, "48465": 27666763776.0, "48470": 27666763776.0, "48475": 27666763776.0, "48480": 27666763776.0, "48485": 27666763776.0, "48490": 27666763776.0, "48495": 27666763776.0, "48500": 27666763776.0, "48505": 27666763776.0, "48510": 27666763776.0, "48515": 27666763776.0, "48520": 27666763776.0, "48525": 27666763776.0, "48530": 27666763776.0, "48535": 27666763776.0, "48540": 27666763776.0, "48545": 27666763776.0, "48550": 27666763776.0, "48555": 27666763776.0, "48560": 27666763776.0, "48565": 27666763776.0, "48570": 27666763776.0, "48575": 27666763776.0, "48580": 27666763776.0, "48585": 27666763776.0, "48590": 27666763776.0, "48595": 27666763776.0, "48600": 27666763776.0, "48605": 27666763776.0, "48610": 27666763776.0, "48615": 27666763776.0, "48620": 27666763776.0, "48625": 27666763776.0, "48630": 27666763776.0, "48635": 27666763776.0, "48640": 27666763776.0, "48645": 27666763776.0, "48650": 27666763776.0, "48655": 27666763776.0, "48660": 27666763776.0, "48665": 27666763776.0, "48670": 27666763776.0, "48675": 27666763776.0, "48680": 27666763776.0, "48685": 27666763776.0, "48690": 27666763776.0, "48695": 27666763776.0, "48700": 27666763776.0, "48705": 27666763776.0, "48710": 27666763776.0, "48715": 27666763776.0, "48720": 27666763776.0, "48725": 27666763776.0, "48730": 27666763776.0, "48735": 27666763776.0, "48740": 27666763776.0, "48745": 27666763776.0, "48750": 27666763776.0, "48755": 27666763776.0, "48760": 27666763776.0, "48765": 27666763776.0, "48770": 27666763776.0, "48775": 27666763776.0, "48780": 27666763776.0, "48785": 27666763776.0, "48790": 27666763776.0, "48795": 27666763776.0, "48800": 27666763776.0, "48805": 27666763776.0, "48810": 27666763776.0, "48815": 27666763776.0, "48820": 27666763776.0, "48825": 27666763776.0, "48830": 27666763776.0, "48835": 27666763776.0, "48840": 27666763776.0, "48845": 27666763776.0, "48850": 27666763776.0, "48855": 27666763776.0, "48860": 27666763776.0, "48865": 27666763776.0, "48870": 27666763776.0, "48875": 27666763776.0, "48880": 27666763776.0, "48885": 27666763776.0, "48890": 27666763776.0, "48895": 27666763776.0, "48900": 27666763776.0, "48905": 27666763776.0, "48910": 27666763776.0, "48915": 27666763776.0, "48920": 27666763776.0, "48925": 27666763776.0, "48930": 27666763776.0, "48935": 27666763776.0, "48940": 27666763776.0, "48945": 27666763776.0, "48950": 27666763776.0, "48955": 27666763776.0, "48960": 27666763776.0, "48965": 27666763776.0, "48970": 27666763776.0, "48975": 27666763776.0, "48980": 27666763776.0, "48985": 27666763776.0, "48990": 27666763776.0, "48995": 27666763776.0, "49000": 27666763776.0, "49005": 27666763776.0, "49010": 27666763776.0, "49015": 27666763776.0, "49020": 27666763776.0, "49025": 27666763776.0, "49030": 27666763776.0, "49035": 27666763776.0, "49040": 27666763776.0, "49045": 27666763776.0, "49050": 27666763776.0, "49055": 27666763776.0, "49060": 27666763776.0, "49065": 27666763776.0, "49070": 27666763776.0, "49075": 27666763776.0, "49080": 27666763776.0, "49085": 27666763776.0, "49090": 27666763776.0, "49095": 27666763776.0, "49100": 27666763776.0, "49105": 27666763776.0, "49110": 27666763776.0, "49115": 27666763776.0, "49120": 27666763776.0, "49125": 27666763776.0, "49130": 27666763776.0, "49135": 27666763776.0, "49140": 27666763776.0, "49145": 27666763776.0, "49150": 27666763776.0, "49155": 27666763776.0, "49160": 27666763776.0, "49165": 27666763776.0, "49170": 27666763776.0, "49175": 27666763776.0, "49180": 27666763776.0, "49185": 27666763776.0, "49190": 27666763776.0, "49195": 27666763776.0, "49200": 27666763776.0, "49205": 27666763776.0, "49210": 27666763776.0, "49215": 27666763776.0, "49220": 27666763776.0, "49225": 27666763776.0, "49230": 27666763776.0, "49235": 27666763776.0, "49240": 27666763776.0, "49245": 27666763776.0, "49250": 27666763776.0, "49255": 27666763776.0, "49260": 27666763776.0, "49265": 27666763776.0, "49270": 27666763776.0, "49275": 27666763776.0, "49280": 27666763776.0, "49285": 27666763776.0, "49290": 27666763776.0, "49295": 27666763776.0, "49300": 27666763776.0, "49305": 27666763776.0, "49310": 27666763776.0, "49315": 27666763776.0, "49320": 27666763776.0, "49325": 27666763776.0, "49330": 27666763776.0, "49335": 27666763776.0, "49340": 27666763776.0, "49345": 27666763776.0, "49350": 27666763776.0, "49355": 27666763776.0, "49360": 27666763776.0, "49365": 27666763776.0, "49370": 27666763776.0, "49375": 27666763776.0, "49380": 27666763776.0, "49385": 27666763776.0, "49390": 27666763776.0, "49395": 27666763776.0, "49400": 27666763776.0, "49405": 27666763776.0, "49410": 27666763776.0, "49415": 27666763776.0, "49420": 27666763776.0, "49425": 27666763776.0, "49430": 27666763776.0, "49435": 27666763776.0, "49440": 27666763776.0, "49445": 27666763776.0, "49450": 27666763776.0, "49455": 27666763776.0, "49460": 27666763776.0, "49465": 27666763776.0, "49470": 27666763776.0, "49475": 27666763776.0, "49480": 27666763776.0, "49485": 27666763776.0, "49490": 27666763776.0, "49495": 27666763776.0, "49500": 27666763776.0, "49505": 27666763776.0, "49510": 27666763776.0, "49515": 27666763776.0, "49520": 27666763776.0, "49525": 27666763776.0, "49530": 27666763776.0, "49535": 27666763776.0, "49540": 27666763776.0, "49545": 27666763776.0, "49550": 27666763776.0, "49555": 27666763776.0, "49560": 27666763776.0, "49565": 27666763776.0, "49570": 27666763776.0, "49575": 27666763776.0, "49580": 27666763776.0, "49585": 27666763776.0, "49590": 27666763776.0, "49595": 27666763776.0, "49600": 27666763776.0, "49605": 27666763776.0, "49610": 27666763776.0, "49615": 27666763776.0, "49620": 27666763776.0, "49625": 27666763776.0, "49630": 27666763776.0, "49635": 27666763776.0, "49640": 27666763776.0, "49645": 27666763776.0, "49650": 27666763776.0, "49655": 27666763776.0, "49660": 27666763776.0, "49665": 27666763776.0, "49670": 27666763776.0, "49675": 27666763776.0, "49680": 27666763776.0, "49685": 27666763776.0, "49690": 27666763776.0, "49695": 27666763776.0, "49700": 27666763776.0, "49705": 27666763776.0, "49710": 27666763776.0, "49715": 27666763776.0, "49720": 27666763776.0, "49725": 27666763776.0, "49730": 27666763776.0, "49735": 27666763776.0, "49740": 27666763776.0, "49745": 27666763776.0, "49750": 27666763776.0, "49755": 27666763776.0, "49760": 27666763776.0, "49765": 27666763776.0, "49770": 27666763776.0, "49775": 27666763776.0, "49780": 27666763776.0, "49785": 27666763776.0, "49790": 27666763776.0, "49795": 27666763776.0, "49800": 27666763776.0, "49805": 27666763776.0, "49810": 27666763776.0, "49815": 27666763776.0, "49820": 27666763776.0, "49825": 27666763776.0, "49830": 27666763776.0, "49835": 27666763776.0, "49840": 27666763776.0, "49845": 27666763776.0, "49850": 27666763776.0, "49855": 27666763776.0, "49860": 27666763776.0, "49865": 27666763776.0, "49870": 27666763776.0, "49875": 27666763776.0, "49880": 27666763776.0, "49885": 27666763776.0, "49890": 27666763776.0, "49895": 27666763776.0, "49900": 27666763776.0, "49905": 27666763776.0, "49910": 27666763776.0, "49915": 27666763776.0, "49920": 27666763776.0, "49925": 27666763776.0, "49930": 27666763776.0, "49935": 27666763776.0, "49940": 27666763776.0, "49945": 27666763776.0, "49950": 27666763776.0, "49955": 27666763776.0, "49960": 27666763776.0, "49965": 27666763776.0, "49970": 27666763776.0, "49975": 27666763776.0, "49980": 27666763776.0, "49985": 27666763776.0, "49990": 27666763776.0, "49995": 27666763776.0, "50000": 27666763776.0, "50005": 27666763776.0, "50010": 27666763776.0, "50015": 27666763776.0, "50020": 27666763776.0, "50025": 27666763776.0, "50030": 27666763776.0, "50035": 27666763776.0, "50040": 27666763776.0, "50045": 27666763776.0, "50050": 27666763776.0, "50055": 27666763776.0, "50060": 27666763776.0, "50065": 27666763776.0, "50070": 27666763776.0, "50075": 27666763776.0, "50080": 27666763776.0, "50085": 27666763776.0, "50090": 27666763776.0, "50095": 27666763776.0, "50100": 27666763776.0, "50105": 27666763776.0, "50110": 27666763776.0, "50115": 27666763776.0, "50120": 27666763776.0, "50125": 27666763776.0, "50130": 27666763776.0, "50135": 27666763776.0, "50140": 27666763776.0, "50145": 27666763776.0, "50150": 27666763776.0, "50155": 27666763776.0, "50160": 27666763776.0, "50165": 27666763776.0, "50170": 27666763776.0, "50175": 27666763776.0, "50180": 27666763776.0, "50185": 27666763776.0, "50190": 27666763776.0, "50195": 27666763776.0, "50200": 27666763776.0, "50205": 27666763776.0, "50210": 27666763776.0, "50215": 27666763776.0, "50220": 27666763776.0, "50225": 27666763776.0, "50230": 27666763776.0, "50235": 27666763776.0, "50240": 27666763776.0, "50245": 27666763776.0, "50250": 27666763776.0, "50255": 27666763776.0, "50260": 27666763776.0, "50265": 27666763776.0, "50270": 27666763776.0, "50275": 27666763776.0, "50280": 27666763776.0, "50285": 27666763776.0, "50290": 27666763776.0, "50295": 27666763776.0, "50300": 27666763776.0, "50305": 27666763776.0, "50310": 27666763776.0, "50315": 27666763776.0, "50320": 27666763776.0, "50325": 27666763776.0, "50330": 27666763776.0, "50335": 27666763776.0, "50340": 27666763776.0, "50345": 27666763776.0, "50350": 27666763776.0, "50355": 27666763776.0, "50360": 27666763776.0, "50365": 27666763776.0, "50370": 27666763776.0, "50375": 27666763776.0, "50380": 27666763776.0, "50385": 27666763776.0, "50390": 27666763776.0, "50395": 27666763776.0, "50400": 27666763776.0, "50405": 27666763776.0, "50410": 27666763776.0, "50415": 27666763776.0, "50420": 27666763776.0, "50425": 27666763776.0, "50430": 27666763776.0, "50435": 27666763776.0, "50440": 27666763776.0, "50445": 27666763776.0, "50450": 27666763776.0, "50455": 27666763776.0, "50460": 27666763776.0, "50465": 27666763776.0, "50470": 27666763776.0, "50475": 27666763776.0, "50480": 27666763776.0, "50485": 27666763776.0, "50490": 27666763776.0, "50495": 27666763776.0, "50500": 27666763776.0, "50505": 27666763776.0, "50510": 27666763776.0, "50515": 27666763776.0, "50520": 27666763776.0, "50525": 27666763776.0, "50530": 27666763776.0, "50535": 27666763776.0, "50540": 27666763776.0, "50545": 27666763776.0, "50550": 27666763776.0, "50555": 27666763776.0, "50560": 27666763776.0, "50565": 27666763776.0, "50570": 27666763776.0, "50575": 27666763776.0, "50580": 27666763776.0, "50585": 27666763776.0, "50590": 27666763776.0, "50595": 27666763776.0, "50600": 27666763776.0, "50605": 27666763776.0, "50610": 27666763776.0, "50615": 27666763776.0, "50620": 27666763776.0, "50625": 27666763776.0, "50630": 27666763776.0, "50635": 27666763776.0, "50640": 27666763776.0, "50645": 27666763776.0, "50650": 27666763776.0, "50655": 27666763776.0, "50660": 27666763776.0, "50665": 27666763776.0, "50670": 27666763776.0, "50675": 27666763776.0, "50680": 27666763776.0, "50685": 27666763776.0, "50690": 27666763776.0, "50695": 27666763776.0, "50700": 27666763776.0, "50705": 27666763776.0, "50710": 27666763776.0, "50715": 27666763776.0, "50720": 27666763776.0, "50725": 27666763776.0, "50730": 27666763776.0, "50735": 27666763776.0, "50740": 27666763776.0, "50745": 27666763776.0, "50750": 27666763776.0, "50755": 27666763776.0, "50760": 27666763776.0, "50765": 27666763776.0, "50770": 27666763776.0, "50775": 27666763776.0, "50780": 27666763776.0, "50785": 27666763776.0, "50790": 27666763776.0, "50795": 27666763776.0, "50800": 27666763776.0, "50805": 27666763776.0, "50810": 27666763776.0, "50815": 27666763776.0, "50820": 27666763776.0, "50825": 27666763776.0, "50830": 27666763776.0, "50835": 27666763776.0, "50840": 27666763776.0, "50845": 27666763776.0, "50850": 27666763776.0, "50855": 27666763776.0, "50860": 27666763776.0, "50865": "nan", "50870": "nan", "50875": "nan", "50880": "nan", "50885": "nan", "50890": "nan", "50895": "nan", "50900": "nan", "50905": "nan", "50910": "nan", "50915": "nan", "50920": "nan", "50925": "nan", "50930": "nan", "50935": "nan", "50940": "nan", "50945": "nan", "50950": "nan", "50955": "nan", "50960": "nan", "50965": "nan", "50970": "nan", "50975": "nan", "50980": "nan", "50985": "nan", "50990": "nan", "50995": "nan", "51000": "nan"}}, "iteration-time": {"start_step": 1, "end_step": 51000, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": "nan", "25": "nan", "30": "nan", "35": "nan", "40": "nan", "45": "nan", "50": "nan", "55": "nan", "60": "nan", "65": "nan", "70": "nan", "75": "nan", "80": "nan", "85": "nan", "90": "nan", "95": "nan", "100": 1.57192, "105": "nan", "110": "nan", "115": "nan", "120": "nan", "125": "nan", "130": "nan", "135": "nan", "140": "nan", "145": "nan", "150": "nan", "155": "nan", "160": "nan", "165": "nan", "170": "nan", "175": "nan", "180": "nan", "185": "nan", "190": "nan", "195": "nan", "200": 1.45674, "205": "nan", "210": "nan", "215": "nan", "220": "nan", "225": "nan", "230": "nan", "235": "nan", "240": "nan", "245": "nan", "250": "nan", "255": "nan", "260": "nan", "265": "nan", "270": "nan", "275": "nan", "280": "nan", "285": "nan", "290": "nan", "295": "nan", "300": 1.45507, "305": "nan", "310": "nan", "315": "nan", "320": "nan", "325": "nan", "330": "nan", "335": "nan", "340": "nan", "345": "nan", "350": "nan", "355": "nan", "360": "nan", "365": "nan", "370": "nan", "375": "nan", "380": "nan", "385": "nan", "390": "nan", "395": "nan", "400": 1.45317, "405": "nan", "410": "nan", "415": "nan", "420": "nan", "425": "nan", "430": "nan", "435": "nan", "440": "nan", "445": "nan", "450": "nan", "455": "nan", "460": "nan", "465": "nan", "470": "nan", "475": "nan", "480": "nan", "485": "nan", "490": "nan", "495": "nan", "500": 1.45301, "505": "nan", "510": "nan", "515": "nan", "520": "nan", "525": "nan", "530": "nan", "535": "nan", "540": "nan", "545": "nan", "550": "nan", "555": "nan", "560": "nan", "565": "nan", "570": "nan", "575": "nan", "580": "nan", "585": "nan", "590": "nan", "595": "nan", "600": 1.45218, "605": "nan", "610": "nan", "615": "nan", "620": "nan", "625": "nan", "630": "nan", "635": "nan", "640": "nan", "645": "nan", "650": "nan", "655": "nan", "660": "nan", "665": "nan", "670": "nan", "675": "nan", "680": "nan", "685": "nan", "690": "nan", "695": "nan", "700": 1.45019, "705": "nan", "710": "nan", "715": "nan", "720": "nan", "725": "nan", "730": "nan", "735": "nan", "740": "nan", "745": "nan", "750": "nan", "755": "nan", "760": "nan", "765": "nan", "770": "nan", "775": "nan", "780": "nan", "785": "nan", "790": "nan", "795": "nan", "800": 1.45333, "805": "nan", "810": "nan", "815": "nan", "820": "nan", "825": "nan", "830": "nan", "835": "nan", "840": "nan", "845": "nan", "850": "nan", "855": "nan", "860": "nan", "865": "nan", "870": "nan", "875": "nan", "880": "nan", "885": "nan", "890": "nan", "895": "nan", "900": 1.44929, "905": "nan", "910": "nan", "915": "nan", "920": "nan", "925": "nan", "930": "nan", "935": "nan", "940": "nan", "945": "nan", "950": "nan", "955": "nan", "960": "nan", "965": "nan", "970": "nan", "975": "nan", "980": "nan", "985": "nan", "990": "nan", "995": "nan", "1000": 1.44839, "1005": "nan", "1010": "nan", "1015": "nan", "1020": "nan", "1025": "nan", "1030": "nan", "1035": "nan", "1040": "nan", "1045": "nan", "1050": "nan", "1055": "nan", "1060": "nan", "1065": "nan", "1070": "nan", "1075": "nan", "1080": "nan", "1085": "nan", "1090": "nan", "1095": "nan", "1100": 1.44708, "1105": "nan", "1110": "nan", "1115": "nan", "1120": "nan", "1125": "nan", "1130": "nan", "1135": "nan", "1140": "nan", "1145": "nan", "1150": "nan", "1155": "nan", "1160": "nan", "1165": "nan", "1170": "nan", "1175": "nan", "1180": "nan", "1185": "nan", "1190": "nan", "1195": "nan", "1200": 1.44217, "1205": "nan", "1210": "nan", "1215": "nan", "1220": "nan", "1225": "nan", "1230": "nan", "1235": "nan", "1240": "nan", "1245": "nan", "1250": "nan", "1255": "nan", "1260": "nan", "1265": "nan", "1270": "nan", "1275": "nan", "1280": "nan", "1285": "nan", "1290": "nan", "1295": "nan", "1300": 1.43567, "1305": "nan", "1310": "nan", "1315": "nan", "1320": "nan", "1325": "nan", "1330": "nan", "1335": "nan", "1340": "nan", "1345": "nan", "1350": "nan", "1355": "nan", "1360": "nan", "1365": "nan", "1370": "nan", "1375": "nan", "1380": "nan", "1385": "nan", "1390": "nan", "1395": "nan", "1400": 1.43866, "1405": "nan", "1410": "nan", "1415": "nan", "1420": "nan", "1425": "nan", "1430": "nan", "1435": "nan", "1440": "nan", "1445": "nan", "1450": "nan", "1455": "nan", "1460": "nan", "1465": "nan", "1470": "nan", "1475": "nan", "1480": "nan", "1485": "nan", "1490": "nan", "1495": "nan", "1500": 1.43988, "1505": "nan", "1510": "nan", "1515": "nan", "1520": "nan", "1525": "nan", "1530": "nan", "1535": "nan", "1540": "nan", "1545": "nan", "1550": "nan", "1555": "nan", "1560": "nan", "1565": "nan", "1570": "nan", "1575": "nan", "1580": "nan", "1585": "nan", "1590": "nan", "1595": "nan", "1600": 1.44315, "1605": "nan", "1610": "nan", "1615": "nan", "1620": "nan", "1625": "nan", "1630": "nan", "1635": "nan", "1640": "nan", "1645": "nan", "1650": "nan", "1655": "nan", "1660": "nan", "1665": "nan", "1670": "nan", "1675": "nan", "1680": "nan", "1685": "nan", "1690": "nan", "1695": "nan", "1700": 1.43619, "1705": "nan", "1710": "nan", "1715": "nan", "1720": "nan", "1725": "nan", "1730": "nan", "1735": "nan", "1740": "nan", "1745": "nan", "1750": "nan", "1755": "nan", "1760": "nan", "1765": "nan", "1770": "nan", "1775": "nan", "1780": "nan", "1785": "nan", "1790": "nan", "1795": "nan", "1800": 1.43304, "1805": "nan", "1810": "nan", "1815": "nan", "1820": "nan", "1825": "nan", "1830": "nan", "1835": "nan", "1840": "nan", "1845": "nan", "1850": "nan", "1855": "nan", "1860": "nan", "1865": "nan", "1870": "nan", "1875": "nan", "1880": "nan", "1885": "nan", "1890": "nan", "1895": "nan", "1900": 1.43344, "1905": "nan", "1910": "nan", "1915": "nan", "1920": "nan", "1925": "nan", "1930": "nan", "1935": "nan", "1940": "nan", "1945": "nan", "1950": "nan", "1955": "nan", "1960": "nan", "1965": "nan", "1970": "nan", "1975": "nan", "1980": "nan", "1985": "nan", "1990": "nan", "1995": "nan", "2000": 1.43358, "2005": "nan", "2010": "nan", "2015": "nan", "2020": "nan", "2025": "nan", "2030": "nan", "2035": "nan", "2040": "nan", "2045": "nan", "2050": "nan", "2055": "nan", "2060": "nan", "2065": "nan", "2070": "nan", "2075": "nan", "2080": "nan", "2085": "nan", "2090": "nan", "2095": "nan", "2100": 1.43271, "2105": "nan", "2110": "nan", "2115": "nan", "2120": "nan", "2125": "nan", "2130": "nan", "2135": "nan", "2140": "nan", "2145": "nan", "2150": "nan", "2155": "nan", "2160": "nan", "2165": "nan", "2170": "nan", "2175": "nan", "2180": "nan", "2185": "nan", "2190": "nan", "2195": "nan", "2200": 1.43782, "2205": "nan", "2210": "nan", "2215": "nan", "2220": "nan", "2225": "nan", "2230": "nan", "2235": "nan", "2240": "nan", "2245": "nan", "2250": "nan", "2255": "nan", "2260": "nan", "2265": "nan", "2270": "nan", "2275": "nan", "2280": "nan", "2285": "nan", "2290": "nan", "2295": "nan", "2300": 1.43678, "2305": "nan", "2310": "nan", "2315": "nan", "2320": "nan", "2325": "nan", "2330": "nan", "2335": "nan", "2340": "nan", "2345": "nan", "2350": "nan", "2355": "nan", "2360": "nan", "2365": "nan", "2370": "nan", "2375": "nan", "2380": "nan", "2385": "nan", "2390": "nan", "2395": "nan", "2400": 1.43119, "2405": "nan", "2410": "nan", "2415": "nan", "2420": "nan", "2425": "nan", "2430": "nan", "2435": "nan", "2440": "nan", "2445": "nan", "2450": "nan", "2455": "nan", "2460": "nan", "2465": "nan", "2470": "nan", "2475": "nan", "2480": "nan", "2485": "nan", "2490": "nan", "2495": "nan", "2500": 1.43602, "2505": "nan", "2510": "nan", "2515": "nan", "2520": "nan", "2525": "nan", "2530": "nan", "2535": "nan", "2540": "nan", "2545": "nan", "2550": "nan", "2555": "nan", "2560": "nan", "2565": "nan", "2570": "nan", "2575": "nan", "2580": "nan", "2585": "nan", "2590": "nan", "2595": "nan", "2600": 1.43407, "2605": "nan", "2610": "nan", "2615": "nan", "2620": "nan", "2625": "nan", "2630": "nan", "2635": "nan", "2640": "nan", "2645": "nan", "2650": "nan", "2655": "nan", "2660": "nan", "2665": "nan", "2670": "nan", "2675": "nan", "2680": "nan", "2685": "nan", "2690": "nan", "2695": "nan", "2700": 1.4325, "2705": "nan", "2710": "nan", "2715": "nan", "2720": "nan", "2725": "nan", "2730": "nan", "2735": "nan", "2740": "nan", "2745": "nan", "2750": "nan", "2755": "nan", "2760": "nan", "2765": "nan", "2770": "nan", "2775": "nan", "2780": "nan", "2785": "nan", "2790": "nan", "2795": "nan", "2800": 1.43246, "2805": "nan", "2810": "nan", "2815": "nan", "2820": "nan", "2825": "nan", "2830": "nan", "2835": "nan", "2840": "nan", "2845": "nan", "2850": "nan", "2855": "nan", "2860": "nan", "2865": "nan", "2870": "nan", "2875": "nan", "2880": "nan", "2885": "nan", "2890": "nan", "2895": "nan", "2900": 1.43175, "2905": "nan", "2910": "nan", "2915": "nan", "2920": "nan", "2925": "nan", "2930": "nan", "2935": "nan", "2940": "nan", "2945": "nan", "2950": "nan", "2955": "nan", "2960": "nan", "2965": "nan", "2970": "nan", "2975": "nan", "2980": "nan", "2985": "nan", "2990": "nan", "2995": "nan", "3000": 1.43463, "3005": "nan", "3010": "nan", "3015": "nan", "3020": "nan", "3025": "nan", "3030": "nan", "3035": "nan", "3040": "nan", "3045": "nan", "3050": "nan", "3055": "nan", "3060": "nan", "3065": "nan", "3070": "nan", "3075": "nan", "3080": "nan", "3085": "nan", "3090": "nan", "3095": "nan", "3100": 1.43283, "3105": "nan", "3110": "nan", "3115": "nan", "3120": "nan", "3125": "nan", "3130": "nan", "3135": "nan", "3140": "nan", "3145": "nan", "3150": "nan", "3155": "nan", "3160": "nan", "3165": "nan", "3170": "nan", "3175": "nan", "3180": "nan", "3185": "nan", "3190": "nan", "3195": "nan", "3200": 1.42883, "3205": "nan", "3210": "nan", "3215": "nan", "3220": "nan", "3225": "nan", "3230": "nan", "3235": "nan", "3240": "nan", "3245": "nan", "3250": "nan", "3255": "nan", "3260": "nan", "3265": "nan", "3270": "nan", "3275": "nan", "3280": "nan", "3285": "nan", "3290": "nan", "3295": "nan", "3300": 1.42998, "3305": "nan", "3310": "nan", "3315": "nan", "3320": "nan", "3325": "nan", "3330": "nan", "3335": "nan", "3340": "nan", "3345": "nan", "3350": "nan", "3355": "nan", "3360": "nan", "3365": "nan", "3370": "nan", "3375": "nan", "3380": "nan", "3385": "nan", "3390": "nan", "3395": "nan", "3400": 1.43261, "3405": "nan", "3410": "nan", "3415": "nan", "3420": "nan", "3425": "nan", "3430": "nan", "3435": "nan", "3440": "nan", "3445": "nan", "3450": "nan", "3455": "nan", "3460": "nan", "3465": "nan", "3470": "nan", "3475": "nan", "3480": "nan", "3485": "nan", "3490": "nan", "3495": "nan", "3500": 1.43437, "3505": "nan", "3510": "nan", "3515": "nan", "3520": "nan", "3525": "nan", "3530": "nan", "3535": "nan", "3540": "nan", "3545": "nan", "3550": "nan", "3555": "nan", "3560": "nan", "3565": "nan", "3570": "nan", "3575": "nan", "3580": "nan", "3585": "nan", "3590": "nan", "3595": "nan", "3600": 1.43134, "3605": "nan", "3610": "nan", "3615": "nan", "3620": "nan", "3625": "nan", "3630": "nan", "3635": "nan", "3640": "nan", "3645": "nan", "3650": "nan", "3655": "nan", "3660": "nan", "3665": "nan", "3670": "nan", "3675": "nan", "3680": "nan", "3685": "nan", "3690": "nan", "3695": "nan", "3700": 1.43056, "3705": "nan", "3710": "nan", "3715": "nan", "3720": "nan", "3725": "nan", "3730": "nan", "3735": "nan", "3740": "nan", "3745": "nan", "3750": "nan", "3755": "nan", "3760": "nan", "3765": "nan", "3770": "nan", "3775": "nan", "3780": "nan", "3785": "nan", "3790": "nan", "3795": "nan", "3800": 1.43644, "3805": "nan", "3810": "nan", "3815": "nan", "3820": "nan", "3825": "nan", "3830": "nan", "3835": "nan", "3840": "nan", "3845": "nan", "3850": "nan", "3855": "nan", "3860": "nan", "3865": "nan", "3870": "nan", "3875": "nan", "3880": "nan", "3885": "nan", "3890": "nan", "3895": "nan", "3900": 1.4317, "3905": "nan", "3910": "nan", "3915": "nan", "3920": "nan", "3925": "nan", "3930": "nan", "3935": "nan", "3940": "nan", "3945": "nan", "3950": "nan", "3955": "nan", "3960": "nan", "3965": "nan", "3970": "nan", "3975": "nan", "3980": "nan", "3985": "nan", "3990": "nan", "3995": "nan", "4000": 1.43009, "4005": "nan", "4010": "nan", "4015": "nan", "4020": "nan", "4025": "nan", "4030": "nan", "4035": "nan", "4040": "nan", "4045": "nan", "4050": "nan", "4055": "nan", "4060": "nan", "4065": "nan", "4070": "nan", "4075": "nan", "4080": "nan", "4085": "nan", "4090": "nan", "4095": "nan", "4100": 1.42741, "4105": "nan", "4110": "nan", "4115": "nan", "4120": "nan", "4125": "nan", "4130": "nan", "4135": "nan", "4140": "nan", "4145": "nan", "4150": "nan", "4155": "nan", "4160": "nan", "4165": "nan", "4170": "nan", "4175": "nan", "4180": "nan", "4185": "nan", "4190": "nan", "4195": "nan", "4200": 1.43121, "4205": "nan", "4210": "nan", "4215": "nan", "4220": "nan", "4225": "nan", "4230": "nan", "4235": "nan", "4240": "nan", "4245": "nan", "4250": "nan", "4255": "nan", "4260": "nan", "4265": "nan", "4270": "nan", "4275": "nan", "4280": "nan", "4285": "nan", "4290": "nan", "4295": "nan", "4300": 1.42721, "4305": "nan", "4310": "nan", "4315": "nan", "4320": "nan", "4325": "nan", "4330": "nan", "4335": "nan", "4340": "nan", "4345": "nan", "4350": "nan", "4355": "nan", "4360": "nan", "4365": "nan", "4370": "nan", "4375": "nan", "4380": "nan", "4385": "nan", "4390": "nan", "4395": "nan", "4400": 1.43191, "4405": "nan", "4410": "nan", "4415": "nan", "4420": "nan", "4425": "nan", "4430": "nan", "4435": "nan", "4440": "nan", "4445": "nan", "4450": "nan", "4455": "nan", "4460": "nan", "4465": "nan", "4470": "nan", "4475": "nan", "4480": "nan", "4485": "nan", "4490": "nan", "4495": "nan", "4500": 1.43349, "4505": "nan", "4510": "nan", "4515": "nan", "4520": "nan", "4525": "nan", "4530": "nan", "4535": "nan", "4540": "nan", "4545": "nan", "4550": "nan", "4555": "nan", "4560": "nan", "4565": "nan", "4570": "nan", "4575": "nan", "4580": "nan", "4585": "nan", "4590": "nan", "4595": "nan", "4600": 1.43286, "4605": "nan", "4610": "nan", "4615": "nan", "4620": "nan", "4625": "nan", "4630": "nan", "4635": "nan", "4640": "nan", "4645": "nan", "4650": "nan", "4655": "nan", "4660": "nan", "4665": "nan", "4670": "nan", "4675": "nan", "4680": "nan", "4685": "nan", "4690": "nan", "4695": "nan", "4700": 3.71165, "4705": "nan", "4710": "nan", "4715": "nan", "4720": "nan", "4725": "nan", "4730": "nan", "4735": "nan", "4740": "nan", "4745": "nan", "4750": "nan", "4755": "nan", "4760": "nan", "4765": "nan", "4770": "nan", "4775": "nan", "4780": "nan", "4785": "nan", "4790": "nan", "4795": "nan", "4800": 1.42924, "4805": "nan", "4810": "nan", "4815": "nan", "4820": "nan", "4825": "nan", "4830": "nan", "4835": "nan", "4840": "nan", "4845": "nan", "4850": "nan", "4855": "nan", "4860": "nan", "4865": "nan", "4870": "nan", "4875": "nan", "4880": "nan", "4885": "nan", "4890": "nan", "4895": "nan", "4900": 1.43045, "4905": "nan", "4910": "nan", "4915": "nan", "4920": "nan", "4925": "nan", "4930": "nan", "4935": "nan", "4940": "nan", "4945": "nan", "4950": "nan", "4955": "nan", "4960": "nan", "4965": "nan", "4970": "nan", "4975": "nan", "4980": "nan", "4985": "nan", "4990": "nan", "4995": "nan", "5000": 1.43017, "5005": "nan", "5010": "nan", "5015": "nan", "5020": "nan", "5025": "nan", "5030": "nan", "5035": "nan", "5040": "nan", "5045": "nan", "5050": "nan", "5055": "nan", "5060": "nan", "5065": "nan", "5070": "nan", "5075": "nan", "5080": "nan", "5085": "nan", "5090": "nan", "5095": "nan", "5100": 1.42822, "5105": "nan", "5110": "nan", "5115": "nan", "5120": "nan", "5125": "nan", "5130": "nan", "5135": "nan", "5140": "nan", "5145": "nan", "5150": "nan", "5155": "nan", "5160": "nan", "5165": "nan", "5170": "nan", "5175": "nan", "5180": "nan", "5185": "nan", "5190": "nan", "5195": "nan", "5200": 1.43114, "5205": "nan", "5210": "nan", "5215": "nan", "5220": "nan", "5225": "nan", "5230": "nan", "5235": "nan", "5240": "nan", "5245": "nan", "5250": "nan", "5255": "nan", "5260": "nan", "5265": "nan", "5270": "nan", "5275": "nan", "5280": "nan", "5285": "nan", "5290": "nan", "5295": "nan", "5300": 1.4292, "5305": "nan", "5310": "nan", "5315": "nan", "5320": "nan", "5325": "nan", "5330": "nan", "5335": "nan", "5340": "nan", "5345": "nan", "5350": "nan", "5355": "nan", "5360": "nan", "5365": "nan", "5370": "nan", "5375": "nan", "5380": "nan", "5385": "nan", "5390": "nan", "5395": "nan", "5400": 1.4317, "5405": "nan", "5410": "nan", "5415": "nan", "5420": "nan", "5425": "nan", "5430": "nan", "5435": "nan", "5440": "nan", "5445": "nan", "5450": "nan", "5455": "nan", "5460": "nan", "5465": "nan", "5470": "nan", "5475": "nan", "5480": "nan", "5485": "nan", "5490": "nan", "5495": "nan", "5500": 1.42816, "5505": "nan", "5510": "nan", "5515": "nan", "5520": "nan", "5525": "nan", "5530": "nan", "5535": "nan", "5540": "nan", "5545": "nan", "5550": "nan", "5555": "nan", "5560": "nan", "5565": "nan", "5570": "nan", "5575": "nan", "5580": "nan", "5585": "nan", "5590": "nan", "5595": "nan", "5600": 1.43226, "5605": "nan", "5610": "nan", "5615": "nan", "5620": "nan", "5625": "nan", "5630": "nan", "5635": "nan", "5640": "nan", "5645": "nan", "5650": "nan", "5655": "nan", "5660": "nan", "5665": "nan", "5670": "nan", "5675": "nan", "5680": "nan", "5685": "nan", "5690": "nan", "5695": "nan", "5700": 1.43384, "5705": "nan", "5710": "nan", "5715": "nan", "5720": "nan", "5725": "nan", "5730": "nan", "5735": "nan", "5740": "nan", "5745": "nan", "5750": "nan", "5755": "nan", "5760": "nan", "5765": "nan", "5770": "nan", "5775": "nan", "5780": "nan", "5785": "nan", "5790": "nan", "5795": "nan", "5800": 1.42987, "5805": "nan", "5810": "nan", "5815": "nan", "5820": "nan", "5825": "nan", "5830": "nan", "5835": "nan", "5840": "nan", "5845": "nan", "5850": "nan", "5855": "nan", "5860": "nan", "5865": "nan", "5870": "nan", "5875": "nan", "5880": "nan", "5885": "nan", "5890": "nan", "5895": "nan", "5900": 1.43081, "5905": "nan", "5910": "nan", "5915": "nan", "5920": "nan", "5925": "nan", "5930": "nan", "5935": "nan", "5940": "nan", "5945": "nan", "5950": "nan", "5955": "nan", "5960": "nan", "5965": "nan", "5970": "nan", "5975": "nan", "5980": "nan", "5985": "nan", "5990": "nan", "5995": "nan", "6000": 1.43544, "6005": "nan", "6010": "nan", "6015": "nan", "6020": "nan", "6025": "nan", "6030": "nan", "6035": "nan", "6040": "nan", "6045": "nan", "6050": "nan", "6055": "nan", "6060": "nan", "6065": "nan", "6070": "nan", "6075": "nan", "6080": "nan", "6085": "nan", "6090": "nan", "6095": "nan", "6100": 1.4309, "6105": "nan", "6110": "nan", "6115": "nan", "6120": "nan", "6125": "nan", "6130": "nan", "6135": "nan", "6140": "nan", "6145": "nan", "6150": "nan", "6155": "nan", "6160": "nan", "6165": "nan", "6170": "nan", "6175": "nan", "6180": "nan", "6185": "nan", "6190": "nan", "6195": "nan", "6200": 1.4316, "6205": "nan", "6210": "nan", "6215": "nan", "6220": "nan", "6225": "nan", "6230": "nan", "6235": "nan", "6240": "nan", "6245": "nan", "6250": "nan", "6255": "nan", "6260": "nan", "6265": "nan", "6270": "nan", "6275": "nan", "6280": "nan", "6285": "nan", "6290": "nan", "6295": "nan", "6300": 1.43711, "6305": "nan", "6310": "nan", "6315": "nan", "6320": "nan", "6325": "nan", "6330": "nan", "6335": "nan", "6340": "nan", "6345": "nan", "6350": "nan", "6355": "nan", "6360": "nan", "6365": "nan", "6370": "nan", "6375": "nan", "6380": "nan", "6385": "nan", "6390": "nan", "6395": "nan", "6400": 1.42957, "6405": "nan", "6410": "nan", "6415": "nan", "6420": "nan", "6425": "nan", "6430": "nan", "6435": "nan", "6440": "nan", "6445": "nan", "6450": "nan", "6455": "nan", "6460": "nan", "6465": "nan", "6470": "nan", "6475": "nan", "6480": "nan", "6485": "nan", "6490": "nan", "6495": "nan", "6500": 1.43224, "6505": "nan", "6510": "nan", "6515": "nan", "6520": "nan", "6525": "nan", "6530": "nan", "6535": "nan", "6540": "nan", "6545": "nan", "6550": "nan", "6555": "nan", "6560": "nan", "6565": "nan", "6570": "nan", "6575": "nan", "6580": "nan", "6585": "nan", "6590": "nan", "6595": "nan", "6600": 1.43401, "6605": "nan", "6610": "nan", "6615": "nan", "6620": "nan", "6625": "nan", "6630": "nan", "6635": "nan", "6640": "nan", "6645": "nan", "6650": "nan", "6655": "nan", "6660": "nan", "6665": "nan", "6670": "nan", "6675": "nan", "6680": "nan", "6685": "nan", "6690": "nan", "6695": "nan", "6700": 1.43088, "6705": "nan", "6710": "nan", "6715": "nan", "6720": "nan", "6725": "nan", "6730": "nan", "6735": "nan", "6740": "nan", "6745": "nan", "6750": "nan", "6755": "nan", "6760": "nan", "6765": "nan", "6770": "nan", "6775": "nan", "6780": "nan", "6785": "nan", "6790": "nan", "6795": "nan", "6800": 1.4311, "6805": "nan", "6810": "nan", "6815": "nan", "6820": "nan", "6825": "nan", "6830": "nan", "6835": "nan", "6840": "nan", "6845": "nan", "6850": "nan", "6855": "nan", "6860": "nan", "6865": "nan", "6870": "nan", "6875": "nan", "6880": "nan", "6885": "nan", "6890": "nan", "6895": "nan", "6900": 1.43471, "6905": "nan", "6910": "nan", "6915": "nan", "6920": "nan", "6925": "nan", "6930": "nan", "6935": "nan", "6940": "nan", "6945": "nan", "6950": "nan", "6955": "nan", "6960": "nan", "6965": "nan", "6970": "nan", "6975": "nan", "6980": "nan", "6985": "nan", "6990": "nan", "6995": "nan", "7000": 1.43036, "7005": "nan", "7010": "nan", "7015": "nan", "7020": "nan", "7025": "nan", "7030": "nan", "7035": "nan", "7040": "nan", "7045": "nan", "7050": "nan", "7055": "nan", "7060": "nan", "7065": "nan", "7070": "nan", "7075": "nan", "7080": "nan", "7085": "nan", "7090": "nan", "7095": "nan", "7100": 1.4376, "7105": "nan", "7110": "nan", "7115": "nan", "7120": "nan", "7125": "nan", "7130": "nan", "7135": "nan", "7140": "nan", "7145": "nan", "7150": "nan", "7155": "nan", "7160": "nan", "7165": "nan", "7170": "nan", "7175": "nan", "7180": "nan", "7185": "nan", "7190": "nan", "7195": "nan", "7200": 1.42982, "7205": "nan", "7210": "nan", "7215": "nan", "7220": "nan", "7225": "nan", "7230": "nan", "7235": "nan", "7240": "nan", "7245": "nan", "7250": "nan", "7255": "nan", "7260": "nan", "7265": "nan", "7270": "nan", "7275": "nan", "7280": "nan", "7285": "nan", "7290": "nan", "7295": "nan", "7300": 1.43392, "7305": "nan", "7310": "nan", "7315": "nan", "7320": "nan", "7325": "nan", "7330": "nan", "7335": "nan", "7340": "nan", "7345": "nan", "7350": "nan", "7355": "nan", "7360": "nan", "7365": "nan", "7370": "nan", "7375": "nan", "7380": "nan", "7385": "nan", "7390": "nan", "7395": "nan", "7400": 1.42904, "7405": "nan", "7410": "nan", "7415": "nan", "7420": "nan", "7425": "nan", "7430": "nan", "7435": "nan", "7440": "nan", "7445": "nan", "7450": "nan", "7455": "nan", "7460": "nan", "7465": "nan", "7470": "nan", "7475": "nan", "7480": "nan", "7485": "nan", "7490": "nan", "7495": "nan", "7500": 1.43035, "7505": "nan", "7510": "nan", "7515": "nan", "7520": "nan", "7525": "nan", "7530": "nan", "7535": "nan", "7540": "nan", "7545": "nan", "7550": "nan", "7555": "nan", "7560": "nan", "7565": "nan", "7570": "nan", "7575": "nan", "7580": "nan", "7585": "nan", "7590": "nan", "7595": "nan", "7600": 1.4362, "7605": "nan", "7610": "nan", "7615": "nan", "7620": "nan", "7625": "nan", "7630": "nan", "7635": "nan", "7640": "nan", "7645": "nan", "7650": "nan", "7655": "nan", "7660": "nan", "7665": "nan", "7670": "nan", "7675": "nan", "7680": "nan", "7685": "nan", "7690": "nan", "7695": "nan", "7700": 1.42859, "7705": "nan", "7710": "nan", "7715": "nan", "7720": "nan", "7725": "nan", "7730": "nan", "7735": "nan", "7740": "nan", "7745": "nan", "7750": "nan", "7755": "nan", "7760": "nan", "7765": "nan", "7770": "nan", "7775": "nan", "7780": "nan", "7785": "nan", "7790": "nan", "7795": "nan", "7800": 1.43678, "7805": "nan", "7810": "nan", "7815": "nan", "7820": "nan", "7825": "nan", "7830": "nan", "7835": "nan", "7840": "nan", "7845": "nan", "7850": "nan", "7855": "nan", "7860": "nan", "7865": "nan", "7870": "nan", "7875": "nan", "7880": "nan", "7885": "nan", "7890": "nan", "7895": "nan", "7900": 1.42859, "7905": "nan", "7910": "nan", "7915": "nan", "7920": "nan", "7925": "nan", "7930": "nan", "7935": "nan", "7940": "nan", "7945": "nan", "7950": "nan", "7955": "nan", "7960": "nan", "7965": "nan", "7970": "nan", "7975": "nan", "7980": "nan", "7985": "nan", "7990": "nan", "7995": "nan", "8000": 1.43129, "8005": "nan", "8010": "nan", "8015": "nan", "8020": "nan", "8025": "nan", "8030": "nan", "8035": "nan", "8040": "nan", "8045": "nan", "8050": "nan", "8055": "nan", "8060": "nan", "8065": "nan", "8070": "nan", "8075": "nan", "8080": "nan", "8085": "nan", "8090": "nan", "8095": "nan", "8100": 1.4292, "8105": "nan", "8110": "nan", "8115": "nan", "8120": "nan", "8125": "nan", "8130": "nan", "8135": "nan", "8140": "nan", "8145": "nan", "8150": "nan", "8155": "nan", "8160": "nan", "8165": "nan", "8170": "nan", "8175": "nan", "8180": "nan", "8185": "nan", "8190": "nan", "8195": "nan", "8200": 1.43499, "8205": "nan", "8210": "nan", "8215": "nan", "8220": "nan", "8225": "nan", "8230": "nan", "8235": "nan", "8240": "nan", "8245": "nan", "8250": "nan", "8255": "nan", "8260": "nan", "8265": "nan", "8270": "nan", "8275": "nan", "8280": "nan", "8285": "nan", "8290": "nan", "8295": "nan", "8300": 1.43237, "8305": "nan", "8310": "nan", "8315": "nan", "8320": "nan", "8325": "nan", "8330": "nan", "8335": "nan", "8340": "nan", "8345": "nan", "8350": "nan", "8355": "nan", "8360": "nan", "8365": "nan", "8370": "nan", "8375": "nan", "8380": "nan", "8385": "nan", "8390": "nan", "8395": "nan", "8400": 1.43531, "8405": "nan", "8410": "nan", "8415": "nan", "8420": "nan", "8425": "nan", "8430": "nan", "8435": "nan", "8440": "nan", "8445": "nan", "8450": "nan", "8455": "nan", "8460": "nan", "8465": "nan", "8470": "nan", "8475": "nan", "8480": "nan", "8485": "nan", "8490": "nan", "8495": "nan", "8500": 1.42929, "8505": "nan", "8510": "nan", "8515": "nan", "8520": "nan", "8525": "nan", "8530": "nan", "8535": "nan", "8540": "nan", "8545": "nan", "8550": "nan", "8555": "nan", "8560": "nan", "8565": "nan", "8570": "nan", "8575": "nan", "8580": "nan", "8585": "nan", "8590": "nan", "8595": "nan", "8600": 1.4331, "8605": "nan", "8610": "nan", "8615": "nan", "8620": "nan", "8625": "nan", "8630": "nan", "8635": "nan", "8640": "nan", "8645": "nan", "8650": "nan", "8655": "nan", "8660": "nan", "8665": "nan", "8670": "nan", "8675": "nan", "8680": "nan", "8685": "nan", "8690": "nan", "8695": "nan", "8700": 1.5651, "8705": "nan", "8710": "nan", "8715": "nan", "8720": "nan", "8725": "nan", "8730": "nan", "8735": "nan", "8740": "nan", "8745": "nan", "8750": "nan", "8755": "nan", "8760": "nan", "8765": "nan", "8770": "nan", "8775": "nan", "8780": "nan", "8785": "nan", "8790": "nan", "8795": "nan", "8800": 1.43225, "8805": "nan", "8810": "nan", "8815": "nan", "8820": "nan", "8825": "nan", "8830": "nan", "8835": "nan", "8840": "nan", "8845": "nan", "8850": "nan", "8855": "nan", "8860": "nan", "8865": "nan", "8870": "nan", "8875": "nan", "8880": "nan", "8885": "nan", "8890": "nan", "8895": "nan", "8900": 1.4356, "8905": "nan", "8910": "nan", "8915": "nan", "8920": "nan", "8925": "nan", "8930": "nan", "8935": "nan", "8940": "nan", "8945": "nan", "8950": "nan", "8955": "nan", "8960": "nan", "8965": "nan", "8970": "nan", "8975": "nan", "8980": "nan", "8985": "nan", "8990": "nan", "8995": "nan", "9000": 1.43205, "9005": "nan", "9010": "nan", "9015": "nan", "9020": "nan", "9025": "nan", "9030": "nan", "9035": "nan", "9040": "nan", "9045": "nan", "9050": "nan", "9055": "nan", "9060": "nan", "9065": "nan", "9070": "nan", "9075": "nan", "9080": "nan", "9085": "nan", "9090": "nan", "9095": "nan", "9100": 1.43368, "9105": "nan", "9110": "nan", "9115": "nan", "9120": "nan", "9125": "nan", "9130": "nan", "9135": "nan", "9140": "nan", "9145": "nan", "9150": "nan", "9155": "nan", "9160": "nan", "9165": "nan", "9170": "nan", "9175": "nan", "9180": "nan", "9185": "nan", "9190": "nan", "9195": "nan", "9200": 1.43276, "9205": "nan", "9210": "nan", "9215": "nan", "9220": "nan", "9225": "nan", "9230": "nan", "9235": "nan", "9240": "nan", "9245": "nan", "9250": "nan", "9255": "nan", "9260": "nan", "9265": "nan", "9270": "nan", "9275": "nan", "9280": "nan", "9285": "nan", "9290": "nan", "9295": "nan", "9300": 1.42947, "9305": "nan", "9310": "nan", "9315": "nan", "9320": "nan", "9325": "nan", "9330": "nan", "9335": "nan", "9340": "nan", "9345": "nan", "9350": "nan", "9355": "nan", "9360": "nan", "9365": "nan", "9370": "nan", "9375": "nan", "9380": "nan", "9385": "nan", "9390": "nan", "9395": "nan", "9400": 1.43015, "9405": "nan", "9410": "nan", "9415": "nan", "9420": "nan", "9425": "nan", "9430": "nan", "9435": "nan", "9440": "nan", "9445": "nan", "9450": "nan", "9455": "nan", "9460": "nan", "9465": "nan", "9470": "nan", "9475": "nan", "9480": "nan", "9485": "nan", "9490": "nan", "9495": "nan", "9500": 1.42971, "9505": "nan", "9510": "nan", "9515": "nan", "9520": "nan", "9525": "nan", "9530": "nan", "9535": "nan", "9540": "nan", "9545": "nan", "9550": "nan", "9555": "nan", "9560": "nan", "9565": "nan", "9570": "nan", "9575": "nan", "9580": "nan", "9585": "nan", "9590": "nan", "9595": "nan", "9600": 1.43075, "9605": "nan", "9610": "nan", "9615": "nan", "9620": "nan", "9625": "nan", "9630": "nan", "9635": "nan", "9640": "nan", "9645": "nan", "9650": "nan", "9655": "nan", "9660": "nan", "9665": "nan", "9670": "nan", "9675": "nan", "9680": "nan", "9685": "nan", "9690": "nan", "9695": "nan", "9700": 1.43177, "9705": "nan", "9710": "nan", "9715": "nan", "9720": "nan", "9725": "nan", "9730": "nan", "9735": "nan", "9740": "nan", "9745": "nan", "9750": "nan", "9755": "nan", "9760": "nan", "9765": "nan", "9770": "nan", "9775": "nan", "9780": "nan", "9785": "nan", "9790": "nan", "9795": "nan", "9800": 1.43003, "9805": "nan", "9810": "nan", "9815": "nan", "9820": "nan", "9825": "nan", "9830": "nan", "9835": "nan", "9840": "nan", "9845": "nan", "9850": "nan", "9855": "nan", "9860": "nan", "9865": "nan", "9870": "nan", "9875": "nan", "9880": "nan", "9885": "nan", "9890": "nan", "9895": "nan", "9900": 1.4321, "9905": "nan", "9910": "nan", "9915": "nan", "9920": "nan", "9925": "nan", "9930": "nan", "9935": "nan", "9940": "nan", "9945": "nan", "9950": "nan", "9955": "nan", "9960": "nan", "9965": "nan", "9970": "nan", "9975": "nan", "9980": "nan", "9985": "nan", "9990": "nan", "9995": "nan", "10000": 1.43257, "10005": "nan", "10010": "nan", "10015": "nan", "10020": "nan", "10025": "nan", "10030": "nan", "10035": "nan", "10040": "nan", "10045": "nan", "10050": "nan", "10055": "nan", "10060": "nan", "10065": "nan", "10070": "nan", "10075": "nan", "10080": "nan", "10085": "nan", "10090": "nan", "10095": "nan", "10100": 1.43058, "10105": "nan", "10110": "nan", "10115": "nan", "10120": "nan", "10125": "nan", "10130": "nan", "10135": "nan", "10140": "nan", "10145": "nan", "10150": "nan", "10155": "nan", "10160": "nan", "10165": "nan", "10170": "nan", "10175": "nan", "10180": "nan", "10185": "nan", "10190": "nan", "10195": "nan", "10200": 1.44031, "10205": "nan", "10210": "nan", "10215": "nan", "10220": "nan", "10225": "nan", "10230": "nan", "10235": "nan", "10240": "nan", "10245": "nan", "10250": "nan", "10255": "nan", "10260": "nan", "10265": "nan", "10270": "nan", "10275": "nan", "10280": "nan", "10285": "nan", "10290": "nan", "10295": "nan", "10300": 1.42855, "10305": "nan", "10310": "nan", "10315": "nan", "10320": "nan", "10325": "nan", "10330": "nan", "10335": "nan", "10340": "nan", "10345": "nan", "10350": "nan", "10355": "nan", "10360": "nan", "10365": "nan", "10370": "nan", "10375": "nan", "10380": "nan", "10385": "nan", "10390": "nan", "10395": "nan", "10400": 1.43328, "10405": "nan", "10410": "nan", "10415": "nan", "10420": "nan", "10425": "nan", "10430": "nan", "10435": "nan", "10440": "nan", "10445": "nan", "10450": "nan", "10455": "nan", "10460": "nan", "10465": "nan", "10470": "nan", "10475": "nan", "10480": "nan", "10485": "nan", "10490": "nan", "10495": "nan", "10500": 1.42867, "10505": "nan", "10510": "nan", "10515": "nan", "10520": "nan", "10525": "nan", "10530": "nan", "10535": "nan", "10540": "nan", "10545": "nan", "10550": "nan", "10555": "nan", "10560": "nan", "10565": "nan", "10570": "nan", "10575": "nan", "10580": "nan", "10585": "nan", "10590": "nan", "10595": "nan", "10600": 1.43072, "10605": "nan", "10610": "nan", "10615": "nan", "10620": "nan", "10625": "nan", "10630": "nan", "10635": "nan", "10640": "nan", "10645": "nan", "10650": "nan", "10655": "nan", "10660": "nan", "10665": "nan", "10670": "nan", "10675": "nan", "10680": "nan", "10685": "nan", "10690": "nan", "10695": "nan", "10700": 1.42775, "10705": "nan", "10710": "nan", "10715": "nan", "10720": "nan", "10725": "nan", "10730": "nan", "10735": "nan", "10740": "nan", "10745": "nan", "10750": "nan", "10755": "nan", "10760": "nan", "10765": "nan", "10770": "nan", "10775": "nan", "10780": "nan", "10785": "nan", "10790": "nan", "10795": "nan", "10800": 1.43172, "10805": "nan", "10810": "nan", "10815": "nan", "10820": "nan", "10825": "nan", "10830": "nan", "10835": "nan", "10840": "nan", "10845": "nan", "10850": "nan", "10855": "nan", "10860": "nan", "10865": "nan", "10870": "nan", "10875": "nan", "10880": "nan", "10885": "nan", "10890": "nan", "10895": "nan", "10900": 1.43036, "10905": "nan", "10910": "nan", "10915": "nan", "10920": "nan", "10925": "nan", "10930": "nan", "10935": "nan", "10940": "nan", "10945": "nan", "10950": "nan", "10955": "nan", "10960": "nan", "10965": "nan", "10970": "nan", "10975": "nan", "10980": "nan", "10985": "nan", "10990": "nan", "10995": "nan", "11000": 1.43284, "11005": "nan", "11010": "nan", "11015": "nan", "11020": "nan", "11025": "nan", "11030": "nan", "11035": "nan", "11040": "nan", "11045": "nan", "11050": "nan", "11055": "nan", "11060": "nan", "11065": "nan", "11070": "nan", "11075": "nan", "11080": "nan", "11085": "nan", "11090": "nan", "11095": "nan", "11100": 1.42847, "11105": "nan", "11110": "nan", "11115": "nan", "11120": "nan", "11125": "nan", "11130": "nan", "11135": "nan", "11140": "nan", "11145": "nan", "11150": "nan", "11155": "nan", "11160": "nan", "11165": "nan", "11170": "nan", "11175": "nan", "11180": "nan", "11185": "nan", "11190": "nan", "11195": "nan", "11200": 1.43014, "11205": "nan", "11210": "nan", "11215": "nan", "11220": "nan", "11225": "nan", "11230": "nan", "11235": "nan", "11240": "nan", "11245": "nan", "11250": "nan", "11255": "nan", "11260": "nan", "11265": "nan", "11270": "nan", "11275": "nan", "11280": "nan", "11285": "nan", "11290": "nan", "11295": "nan", "11300": 1.43285, "11305": "nan", "11310": "nan", "11315": "nan", "11320": "nan", "11325": "nan", "11330": "nan", "11335": "nan", "11340": "nan", "11345": "nan", "11350": "nan", "11355": "nan", "11360": "nan", "11365": "nan", "11370": "nan", "11375": "nan", "11380": "nan", "11385": "nan", "11390": "nan", "11395": "nan", "11400": 1.43092, "11405": "nan", "11410": "nan", "11415": "nan", "11420": "nan", "11425": "nan", "11430": "nan", "11435": "nan", "11440": "nan", "11445": "nan", "11450": "nan", "11455": "nan", "11460": "nan", "11465": "nan", "11470": "nan", "11475": "nan", "11480": "nan", "11485": "nan", "11490": "nan", "11495": "nan", "11500": 1.43357, "11505": "nan", "11510": "nan", "11515": "nan", "11520": "nan", "11525": "nan", "11530": "nan", "11535": "nan", "11540": "nan", "11545": "nan", "11550": "nan", "11555": "nan", "11560": "nan", "11565": "nan", "11570": "nan", "11575": "nan", "11580": "nan", "11585": "nan", "11590": "nan", "11595": "nan", "11600": 1.43311, "11605": "nan", "11610": "nan", "11615": "nan", "11620": "nan", "11625": "nan", "11630": "nan", "11635": "nan", "11640": "nan", "11645": "nan", "11650": "nan", "11655": "nan", "11660": "nan", "11665": "nan", "11670": "nan", "11675": "nan", "11680": "nan", "11685": "nan", "11690": "nan", "11695": "nan", "11700": 1.42727, "11705": "nan", "11710": "nan", "11715": "nan", "11720": "nan", "11725": "nan", "11730": "nan", "11735": "nan", "11740": "nan", "11745": "nan", "11750": "nan", "11755": "nan", "11760": "nan", "11765": "nan", "11770": "nan", "11775": "nan", "11780": "nan", "11785": "nan", "11790": "nan", "11795": "nan", "11800": 1.43666, "11805": "nan", "11810": "nan", "11815": "nan", "11820": "nan", "11825": "nan", "11830": "nan", "11835": "nan", "11840": "nan", "11845": "nan", "11850": "nan", "11855": "nan", "11860": "nan", "11865": "nan", "11870": "nan", "11875": "nan", "11880": "nan", "11885": "nan", "11890": "nan", "11895": "nan", "11900": 1.4307, "11905": "nan", "11910": "nan", "11915": "nan", "11920": "nan", "11925": "nan", "11930": "nan", "11935": "nan", "11940": "nan", "11945": "nan", "11950": "nan", "11955": "nan", "11960": "nan", "11965": "nan", "11970": "nan", "11975": "nan", "11980": "nan", "11985": "nan", "11990": "nan", "11995": "nan", "12000": 1.43074, "12005": "nan", "12010": "nan", "12015": "nan", "12020": "nan", "12025": "nan", "12030": "nan", "12035": "nan", "12040": "nan", "12045": "nan", "12050": "nan", "12055": "nan", "12060": "nan", "12065": "nan", "12070": "nan", "12075": "nan", "12080": "nan", "12085": "nan", "12090": "nan", "12095": "nan", "12100": 1.43213, "12105": "nan", "12110": "nan", "12115": "nan", "12120": "nan", "12125": "nan", "12130": "nan", "12135": "nan", "12140": "nan", "12145": "nan", "12150": "nan", "12155": "nan", "12160": "nan", "12165": "nan", "12170": "nan", "12175": "nan", "12180": "nan", "12185": "nan", "12190": "nan", "12195": "nan", "12200": 1.42984, "12205": "nan", "12210": "nan", "12215": "nan", "12220": "nan", "12225": "nan", "12230": "nan", "12235": "nan", "12240": "nan", "12245": "nan", "12250": "nan", "12255": "nan", "12260": "nan", "12265": "nan", "12270": "nan", "12275": "nan", "12280": "nan", "12285": "nan", "12290": "nan", "12295": "nan", "12300": 1.43861, "12305": "nan", "12310": "nan", "12315": "nan", "12320": "nan", "12325": "nan", "12330": "nan", "12335": "nan", "12340": "nan", "12345": "nan", "12350": "nan", "12355": "nan", "12360": "nan", "12365": "nan", "12370": "nan", "12375": "nan", "12380": "nan", "12385": "nan", "12390": "nan", "12395": "nan", "12400": 1.43459, "12405": "nan", "12410": "nan", "12415": "nan", "12420": "nan", "12425": "nan", "12430": "nan", "12435": "nan", "12440": "nan", "12445": "nan", "12450": "nan", "12455": "nan", "12460": "nan", "12465": "nan", "12470": "nan", "12475": "nan", "12480": "nan", "12485": "nan", "12490": "nan", "12495": "nan", "12500": 1.43331, "12505": "nan", "12510": "nan", "12515": "nan", "12520": "nan", "12525": "nan", "12530": "nan", "12535": "nan", "12540": "nan", "12545": "nan", "12550": "nan", "12555": "nan", "12560": "nan", "12565": "nan", "12570": "nan", "12575": "nan", "12580": "nan", "12585": "nan", "12590": "nan", "12595": "nan", "12600": 1.43213, "12605": "nan", "12610": "nan", "12615": "nan", "12620": "nan", "12625": "nan", "12630": "nan", "12635": "nan", "12640": "nan", "12645": "nan", "12650": "nan", "12655": "nan", "12660": "nan", "12665": "nan", "12670": "nan", "12675": "nan", "12680": "nan", "12685": "nan", "12690": "nan", "12695": "nan", "12700": 1.43625, "12705": "nan", "12710": "nan", "12715": "nan", "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": 1.42902, "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": 1.43167, "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": 1.43131, "13005": "nan", "13010": "nan", "13015": "nan", "13020": "nan", "13025": "nan", "13030": "nan", "13035": "nan", "13040": "nan", "13045": "nan", "13050": "nan", "13055": "nan", "13060": "nan", "13065": "nan", "13070": "nan", "13075": "nan", "13080": "nan", "13085": "nan", "13090": "nan", "13095": "nan", "13100": 1.42918, "13105": "nan", "13110": "nan", "13115": "nan", "13120": "nan", "13125": "nan", "13130": "nan", "13135": "nan", "13140": "nan", "13145": "nan", "13150": "nan", "13155": "nan", "13160": "nan", "13165": "nan", "13170": "nan", "13175": "nan", "13180": "nan", "13185": "nan", "13190": "nan", "13195": "nan", "13200": 1.43094, "13205": "nan", "13210": "nan", "13215": "nan", "13220": "nan", "13225": "nan", "13230": "nan", "13235": "nan", "13240": "nan", "13245": "nan", "13250": "nan", "13255": "nan", "13260": "nan", "13265": "nan", "13270": "nan", "13275": "nan", "13280": "nan", "13285": "nan", "13290": "nan", "13295": "nan", "13300": 1.42995, "13305": "nan", "13310": "nan", "13315": "nan", "13320": "nan", "13325": "nan", "13330": "nan", "13335": "nan", "13340": "nan", "13345": "nan", "13350": "nan", "13355": "nan", "13360": "nan", "13365": "nan", "13370": "nan", "13375": "nan", "13380": "nan", "13385": "nan", "13390": "nan", "13395": "nan", "13400": 1.43243, "13405": "nan", "13410": "nan", "13415": "nan", "13420": "nan", "13425": "nan", "13430": "nan", "13435": "nan", "13440": "nan", "13445": "nan", "13450": "nan", "13455": "nan", "13460": "nan", "13465": "nan", "13470": "nan", "13475": "nan", "13480": "nan", "13485": "nan", "13490": "nan", "13495": "nan", "13500": 1.43579, "13505": "nan", "13510": "nan", "13515": "nan", "13520": "nan", "13525": "nan", "13530": "nan", "13535": "nan", "13540": "nan", "13545": "nan", "13550": "nan", "13555": "nan", "13560": "nan", "13565": "nan", "13570": "nan", "13575": "nan", "13580": "nan", "13585": "nan", "13590": "nan", "13595": "nan", "13600": 1.42694, "13605": "nan", "13610": "nan", "13615": "nan", "13620": "nan", "13625": "nan", "13630": "nan", "13635": "nan", "13640": "nan", "13645": "nan", "13650": "nan", "13655": "nan", "13660": "nan", "13665": "nan", "13670": "nan", "13675": "nan", "13680": "nan", "13685": "nan", "13690": "nan", "13695": "nan", "13700": 1.42901, "13705": "nan", "13710": "nan", "13715": "nan", "13720": "nan", "13725": "nan", "13730": "nan", "13735": "nan", "13740": "nan", "13745": "nan", "13750": "nan", "13755": "nan", "13760": "nan", "13765": "nan", "13770": "nan", "13775": "nan", "13780": "nan", "13785": "nan", "13790": "nan", "13795": "nan", "13800": 1.43364, "13805": "nan", "13810": "nan", "13815": "nan", "13820": "nan", "13825": "nan", "13830": "nan", "13835": "nan", "13840": "nan", "13845": "nan", "13850": "nan", "13855": "nan", "13860": "nan", "13865": "nan", "13870": "nan", "13875": "nan", "13880": "nan", "13885": "nan", "13890": "nan", "13895": "nan", "13900": 1.42931, "13905": "nan", "13910": "nan", "13915": "nan", "13920": "nan", "13925": "nan", "13930": "nan", "13935": "nan", "13940": "nan", "13945": "nan", "13950": "nan", "13955": "nan", "13960": "nan", "13965": "nan", "13970": "nan", "13975": "nan", "13980": "nan", "13985": "nan", "13990": "nan", "13995": "nan", "14000": 1.43174, "14005": "nan", "14010": "nan", "14015": "nan", "14020": "nan", "14025": "nan", "14030": "nan", "14035": "nan", "14040": "nan", "14045": "nan", "14050": "nan", "14055": "nan", "14060": "nan", "14065": "nan", "14070": "nan", "14075": "nan", "14080": "nan", "14085": "nan", "14090": "nan", "14095": "nan", "14100": 1.43215, "14105": "nan", "14110": "nan", "14115": "nan", "14120": "nan", "14125": "nan", "14130": "nan", "14135": "nan", "14140": "nan", "14145": "nan", "14150": "nan", "14155": "nan", "14160": "nan", "14165": "nan", "14170": "nan", "14175": "nan", "14180": "nan", "14185": "nan", "14190": "nan", "14195": "nan", "14200": 1.43238, "14205": "nan", "14210": "nan", "14215": "nan", "14220": "nan", "14225": "nan", "14230": "nan", "14235": "nan", "14240": "nan", "14245": "nan", "14250": "nan", "14255": "nan", "14260": "nan", "14265": "nan", "14270": "nan", "14275": "nan", "14280": "nan", "14285": "nan", "14290": "nan", "14295": "nan", "14300": 1.43206, "14305": "nan", "14310": "nan", "14315": "nan", "14320": "nan", "14325": "nan", "14330": "nan", "14335": "nan", "14340": "nan", "14345": "nan", "14350": "nan", "14355": "nan", "14360": "nan", "14365": "nan", "14370": "nan", "14375": "nan", "14380": "nan", "14385": "nan", "14390": "nan", "14395": "nan", "14400": 1.42871, "14405": "nan", "14410": "nan", "14415": "nan", "14420": "nan", "14425": "nan", "14430": "nan", "14435": "nan", "14440": "nan", "14445": "nan", "14450": "nan", "14455": "nan", "14460": "nan", "14465": "nan", "14470": "nan", "14475": "nan", "14480": "nan", "14485": "nan", "14490": "nan", "14495": "nan", "14500": 1.43326, "14505": "nan", "14510": "nan", "14515": "nan", "14520": "nan", "14525": "nan", "14530": "nan", "14535": "nan", "14540": "nan", "14545": "nan", "14550": "nan", "14555": "nan", "14560": "nan", "14565": "nan", "14570": "nan", "14575": "nan", "14580": "nan", "14585": "nan", "14590": "nan", "14595": "nan", "14600": 1.42896, "14605": "nan", "14610": "nan", "14615": "nan", "14620": "nan", "14625": "nan", "14630": "nan", "14635": "nan", "14640": "nan", "14645": "nan", "14650": "nan", "14655": "nan", "14660": "nan", "14665": "nan", "14670": "nan", "14675": "nan", "14680": "nan", "14685": "nan", "14690": "nan", "14695": "nan", "14700": 1.42934, "14705": "nan", "14710": "nan", "14715": "nan", "14720": "nan", "14725": "nan", "14730": "nan", "14735": "nan", "14740": "nan", "14745": "nan", "14750": "nan", "14755": "nan", "14760": "nan", "14765": "nan", "14770": "nan", "14775": "nan", "14780": "nan", "14785": "nan", "14790": "nan", "14795": "nan", "14800": 1.42909, "14805": "nan", "14810": "nan", "14815": "nan", "14820": "nan", "14825": "nan", "14830": "nan", "14835": "nan", "14840": "nan", "14845": "nan", "14850": "nan", "14855": "nan", "14860": "nan", "14865": "nan", "14870": "nan", "14875": "nan", "14880": "nan", "14885": "nan", "14890": "nan", "14895": "nan", "14900": 1.43203, "14905": "nan", "14910": "nan", "14915": "nan", "14920": "nan", "14925": "nan", "14930": "nan", "14935": "nan", "14940": "nan", "14945": "nan", "14950": "nan", "14955": "nan", "14960": "nan", "14965": "nan", "14970": "nan", "14975": "nan", "14980": "nan", "14985": "nan", "14990": "nan", "14995": "nan", "15000": 1.42856, "15005": "nan", "15010": "nan", "15015": "nan", "15020": "nan", "15025": "nan", "15030": "nan", "15035": "nan", "15040": "nan", "15045": "nan", "15050": "nan", "15055": "nan", "15060": "nan", "15065": "nan", "15070": "nan", "15075": "nan", "15080": "nan", "15085": "nan", "15090": "nan", "15095": "nan", "15100": 1.43255, "15105": "nan", "15110": "nan", "15115": "nan", "15120": "nan", "15125": "nan", "15130": "nan", "15135": "nan", "15140": "nan", "15145": "nan", "15150": "nan", "15155": "nan", "15160": "nan", "15165": "nan", "15170": "nan", "15175": "nan", "15180": "nan", "15185": "nan", "15190": "nan", "15195": "nan", "15200": 1.43016, "15205": "nan", "15210": "nan", "15215": "nan", "15220": "nan", "15225": "nan", "15230": "nan", "15235": "nan", "15240": "nan", "15245": "nan", "15250": "nan", "15255": "nan", "15260": "nan", "15265": "nan", "15270": "nan", "15275": "nan", "15280": "nan", "15285": "nan", "15290": "nan", "15295": "nan", "15300": 1.43183, "15305": "nan", "15310": "nan", "15315": "nan", "15320": "nan", "15325": "nan", "15330": "nan", "15335": "nan", "15340": "nan", "15345": "nan", "15350": "nan", "15355": "nan", "15360": "nan", "15365": "nan", "15370": "nan", "15375": "nan", "15380": "nan", "15385": "nan", "15390": "nan", "15395": "nan", "15400": 1.42954, "15405": "nan", "15410": "nan", "15415": "nan", "15420": "nan", "15425": "nan", "15430": "nan", "15435": "nan", "15440": "nan", "15445": "nan", "15450": "nan", "15455": "nan", "15460": "nan", "15465": "nan", "15470": "nan", "15475": "nan", "15480": "nan", "15485": "nan", "15490": "nan", "15495": "nan", "15500": 1.42822, "15505": "nan", "15510": "nan", "15515": "nan", "15520": "nan", "15525": "nan", "15530": "nan", "15535": "nan", "15540": "nan", "15545": "nan", "15550": "nan", "15555": "nan", "15560": "nan", "15565": "nan", "15570": "nan", "15575": "nan", "15580": "nan", "15585": "nan", "15590": "nan", "15595": "nan", "15600": 1.43605, "15605": "nan", "15610": "nan", "15615": "nan", "15620": "nan", "15625": "nan", "15630": "nan", "15635": "nan", "15640": "nan", "15645": "nan", "15650": "nan", "15655": "nan", "15660": "nan", "15665": "nan", "15670": "nan", "15675": "nan", "15680": "nan", "15685": "nan", "15690": "nan", "15695": "nan", "15700": 1.43507, "15705": "nan", "15710": "nan", "15715": "nan", "15720": "nan", "15725": "nan", "15730": "nan", "15735": "nan", "15740": "nan", "15745": "nan", "15750": "nan", "15755": "nan", "15760": "nan", "15765": "nan", "15770": "nan", "15775": "nan", "15780": "nan", "15785": "nan", "15790": "nan", "15795": "nan", "15800": 1.42806, "15805": "nan", "15810": "nan", "15815": "nan", "15820": "nan", "15825": "nan", "15830": "nan", "15835": "nan", "15840": "nan", "15845": "nan", "15850": "nan", "15855": "nan", "15860": "nan", "15865": "nan", "15870": "nan", "15875": "nan", "15880": "nan", "15885": "nan", "15890": "nan", "15895": "nan", "15900": 1.42825, "15905": "nan", "15910": "nan", "15915": "nan", "15920": "nan", "15925": "nan", "15930": "nan", "15935": "nan", "15940": "nan", "15945": "nan", "15950": "nan", "15955": "nan", "15960": "nan", "15965": "nan", "15970": "nan", "15975": "nan", "15980": "nan", "15985": "nan", "15990": "nan", "15995": "nan", "16000": 1.43226, "16005": "nan", "16010": "nan", "16015": "nan", "16020": "nan", "16025": "nan", "16030": "nan", "16035": "nan", "16040": "nan", "16045": "nan", "16050": "nan", "16055": "nan", "16060": "nan", "16065": "nan", "16070": "nan", "16075": "nan", "16080": "nan", "16085": "nan", "16090": "nan", "16095": "nan", "16100": 1.43136, "16105": "nan", "16110": "nan", "16115": "nan", "16120": "nan", "16125": "nan", "16130": "nan", "16135": "nan", "16140": "nan", "16145": "nan", "16150": "nan", "16155": "nan", "16160": "nan", "16165": "nan", "16170": "nan", "16175": "nan", "16180": "nan", "16185": "nan", "16190": "nan", "16195": "nan", "16200": 1.43106, "16205": "nan", "16210": "nan", "16215": "nan", "16220": "nan", "16225": "nan", "16230": "nan", "16235": "nan", "16240": "nan", "16245": "nan", "16250": "nan", "16255": "nan", "16260": "nan", "16265": "nan", "16270": "nan", "16275": "nan", "16280": "nan", "16285": "nan", "16290": "nan", "16295": "nan", "16300": 1.43475, "16305": "nan", "16310": "nan", "16315": "nan", "16320": "nan", "16325": "nan", "16330": "nan", "16335": "nan", "16340": "nan", "16345": "nan", "16350": "nan", "16355": "nan", "16360": "nan", "16365": "nan", "16370": "nan", "16375": "nan", "16380": "nan", "16385": "nan", "16390": "nan", "16395": "nan", "16400": 1.42649, "16405": "nan", "16410": "nan", "16415": "nan", "16420": "nan", "16425": "nan", "16430": "nan", "16435": "nan", "16440": "nan", "16445": "nan", "16450": "nan", "16455": "nan", "16460": "nan", "16465": "nan", "16470": "nan", "16475": "nan", "16480": "nan", "16485": "nan", "16490": "nan", "16495": "nan", "16500": 1.43013, "16505": "nan", "16510": "nan", "16515": "nan", "16520": "nan", "16525": "nan", "16530": "nan", "16535": "nan", "16540": "nan", "16545": "nan", "16550": "nan", "16555": "nan", "16560": "nan", "16565": "nan", "16570": "nan", "16575": "nan", "16580": "nan", "16585": "nan", "16590": "nan", "16595": "nan", "16600": 1.43233, "16605": "nan", "16610": "nan", "16615": "nan", "16620": "nan", "16625": "nan", "16630": "nan", "16635": "nan", "16640": "nan", "16645": "nan", "16650": "nan", "16655": "nan", "16660": "nan", "16665": "nan", "16670": "nan", "16675": "nan", "16680": "nan", "16685": "nan", "16690": "nan", "16695": "nan", "16700": 1.42949, "16705": "nan", "16710": "nan", "16715": "nan", "16720": "nan", "16725": "nan", "16730": "nan", "16735": "nan", "16740": "nan", "16745": "nan", "16750": "nan", "16755": "nan", "16760": "nan", "16765": "nan", "16770": "nan", "16775": "nan", "16780": "nan", "16785": "nan", "16790": "nan", "16795": "nan", "16800": 1.42861, "16805": "nan", "16810": "nan", "16815": "nan", "16820": "nan", "16825": "nan", "16830": "nan", "16835": "nan", "16840": "nan", "16845": "nan", "16850": "nan", "16855": "nan", "16860": "nan", "16865": "nan", "16870": "nan", "16875": "nan", "16880": "nan", "16885": "nan", "16890": "nan", "16895": "nan", "16900": 1.43279, "16905": "nan", "16910": "nan", "16915": "nan", "16920": "nan", "16925": "nan", "16930": "nan", "16935": "nan", "16940": "nan", "16945": "nan", "16950": "nan", "16955": "nan", "16960": "nan", "16965": "nan", "16970": "nan", "16975": "nan", "16980": "nan", "16985": "nan", "16990": "nan", "16995": "nan", "17000": 1.42745, "17005": "nan", "17010": "nan", "17015": "nan", "17020": "nan", "17025": "nan", "17030": "nan", "17035": "nan", "17040": "nan", "17045": "nan", "17050": "nan", "17055": "nan", "17060": "nan", "17065": "nan", "17070": "nan", "17075": "nan", "17080": "nan", "17085": "nan", "17090": "nan", "17095": "nan", "17100": 1.43416, "17105": "nan", "17110": "nan", "17115": "nan", "17120": "nan", "17125": "nan", "17130": "nan", "17135": "nan", "17140": "nan", "17145": "nan", "17150": "nan", "17155": "nan", "17160": "nan", "17165": "nan", "17170": "nan", "17175": "nan", "17180": "nan", "17185": "nan", "17190": "nan", "17195": "nan", "17200": 1.42831, "17205": "nan", "17210": "nan", "17215": "nan", "17220": "nan", "17225": "nan", "17230": "nan", "17235": "nan", "17240": "nan", "17245": "nan", "17250": "nan", "17255": "nan", "17260": "nan", "17265": "nan", "17270": "nan", "17275": "nan", "17280": "nan", "17285": "nan", "17290": "nan", "17295": "nan", "17300": 1.43259, "17305": "nan", "17310": "nan", "17315": "nan", "17320": "nan", "17325": "nan", "17330": "nan", "17335": "nan", "17340": "nan", "17345": "nan", "17350": "nan", "17355": "nan", "17360": "nan", "17365": "nan", "17370": "nan", "17375": "nan", "17380": "nan", "17385": "nan", "17390": "nan", "17395": "nan", "17400": 1.4343, "17405": "nan", "17410": "nan", "17415": "nan", "17420": "nan", "17425": "nan", "17430": "nan", "17435": "nan", "17440": "nan", "17445": "nan", "17450": "nan", "17455": "nan", "17460": "nan", "17465": "nan", "17470": "nan", "17475": "nan", "17480": "nan", "17485": "nan", "17490": "nan", "17495": "nan", "17500": 1.43511, "17505": "nan", "17510": "nan", "17515": "nan", "17520": "nan", "17525": "nan", "17530": "nan", "17535": "nan", "17540": "nan", "17545": "nan", "17550": "nan", "17555": "nan", "17560": "nan", "17565": "nan", "17570": "nan", "17575": "nan", "17580": "nan", "17585": "nan", "17590": "nan", "17595": "nan", "17600": 1.43068, "17605": "nan", "17610": "nan", "17615": "nan", "17620": "nan", "17625": "nan", "17630": "nan", "17635": "nan", "17640": "nan", "17645": "nan", "17650": "nan", "17655": "nan", "17660": "nan", "17665": "nan", "17670": "nan", "17675": "nan", "17680": "nan", "17685": "nan", "17690": "nan", "17695": "nan", "17700": 1.43407, "17705": "nan", "17710": "nan", "17715": "nan", "17720": "nan", "17725": "nan", "17730": "nan", "17735": "nan", "17740": "nan", "17745": "nan", "17750": "nan", "17755": "nan", "17760": "nan", "17765": "nan", "17770": "nan", "17775": "nan", "17780": "nan", "17785": "nan", "17790": "nan", "17795": "nan", "17800": 1.42856, "17805": "nan", "17810": "nan", "17815": "nan", "17820": "nan", "17825": "nan", "17830": "nan", "17835": "nan", "17840": "nan", "17845": "nan", "17850": "nan", "17855": "nan", "17860": "nan", "17865": "nan", "17870": "nan", "17875": "nan", "17880": "nan", "17885": "nan", "17890": "nan", "17895": "nan", "17900": 1.58623, "17905": "nan", "17910": "nan", "17915": "nan", "17920": "nan", "17925": "nan", "17930": "nan", "17935": "nan", "17940": "nan", "17945": "nan", "17950": "nan", "17955": "nan", "17960": "nan", "17965": "nan", "17970": "nan", "17975": "nan", "17980": "nan", "17985": "nan", "17990": "nan", "17995": "nan", "18000": 1.44103, "18005": "nan", "18010": "nan", "18015": "nan", "18020": "nan", "18025": "nan", "18030": "nan", "18035": "nan", "18040": "nan", "18045": "nan", "18050": "nan", "18055": "nan", "18060": "nan", "18065": "nan", "18070": "nan", "18075": "nan", "18080": "nan", "18085": "nan", "18090": "nan", "18095": "nan", "18100": 1.43733, "18105": "nan", "18110": "nan", "18115": "nan", "18120": "nan", "18125": "nan", "18130": "nan", "18135": "nan", "18140": "nan", "18145": "nan", "18150": "nan", "18155": "nan", "18160": "nan", "18165": "nan", "18170": "nan", "18175": "nan", "18180": "nan", "18185": "nan", "18190": "nan", "18195": "nan", "18200": 1.43905, "18205": "nan", "18210": "nan", "18215": "nan", "18220": "nan", "18225": "nan", "18230": "nan", "18235": "nan", "18240": "nan", "18245": "nan", "18250": "nan", "18255": "nan", "18260": "nan", "18265": "nan", "18270": "nan", "18275": "nan", "18280": "nan", "18285": "nan", "18290": "nan", "18295": "nan", "18300": 1.43723, "18305": "nan", "18310": "nan", "18315": "nan", "18320": "nan", "18325": "nan", "18330": "nan", "18335": "nan", "18340": "nan", "18345": "nan", "18350": "nan", "18355": "nan", "18360": "nan", "18365": "nan", "18370": "nan", "18375": "nan", "18380": "nan", "18385": "nan", "18390": "nan", "18395": "nan", "18400": 1.44, "18405": "nan", "18410": "nan", "18415": "nan", "18420": "nan", "18425": "nan", "18430": "nan", "18435": "nan", "18440": "nan", "18445": "nan", "18450": "nan", "18455": "nan", "18460": "nan", "18465": "nan", "18470": "nan", "18475": "nan", "18480": "nan", "18485": "nan", "18490": "nan", "18495": "nan", "18500": 1.43594, "18505": "nan", "18510": "nan", "18515": "nan", "18520": "nan", "18525": "nan", "18530": "nan", "18535": "nan", "18540": "nan", "18545": "nan", "18550": "nan", "18555": "nan", "18560": "nan", "18565": "nan", "18570": "nan", "18575": "nan", "18580": "nan", "18585": "nan", "18590": "nan", "18595": "nan", "18600": 1.47861, "18605": "nan", "18610": "nan", "18615": "nan", "18620": "nan", "18625": "nan", "18630": "nan", "18635": "nan", "18640": "nan", "18645": "nan", "18650": "nan", "18655": "nan", "18660": "nan", "18665": "nan", "18670": "nan", "18675": "nan", "18680": "nan", "18685": "nan", "18690": "nan", "18695": "nan", "18700": 1.44262, "18705": "nan", "18710": "nan", "18715": "nan", "18720": "nan", "18725": "nan", "18730": "nan", "18735": "nan", "18740": "nan", "18745": "nan", "18750": "nan", "18755": "nan", "18760": "nan", "18765": "nan", "18770": "nan", "18775": "nan", "18780": "nan", "18785": "nan", "18790": "nan", "18795": "nan", "18800": 1.43137, "18805": "nan", "18810": "nan", "18815": "nan", "18820": "nan", "18825": "nan", "18830": "nan", "18835": "nan", "18840": "nan", "18845": "nan", "18850": "nan", "18855": "nan", "18860": "nan", "18865": "nan", "18870": "nan", "18875": "nan", "18880": "nan", "18885": "nan", "18890": "nan", "18895": "nan", "18900": 1.43375, "18905": "nan", "18910": "nan", "18915": "nan", "18920": "nan", "18925": "nan", "18930": "nan", "18935": "nan", "18940": "nan", "18945": "nan", "18950": "nan", "18955": "nan", "18960": "nan", "18965": "nan", "18970": "nan", "18975": "nan", "18980": "nan", "18985": "nan", "18990": "nan", "18995": "nan", "19000": 1.44213, "19005": "nan", "19010": "nan", "19015": "nan", "19020": "nan", "19025": "nan", "19030": "nan", "19035": "nan", "19040": "nan", "19045": "nan", "19050": "nan", "19055": "nan", "19060": "nan", "19065": "nan", "19070": "nan", "19075": "nan", "19080": "nan", "19085": "nan", "19090": "nan", "19095": "nan", "19100": 1.43993, "19105": "nan", "19110": "nan", "19115": "nan", "19120": "nan", "19125": "nan", "19130": "nan", "19135": "nan", "19140": "nan", "19145": "nan", "19150": "nan", "19155": "nan", "19160": "nan", "19165": "nan", "19170": "nan", "19175": "nan", "19180": "nan", "19185": "nan", "19190": "nan", "19195": "nan", "19200": 1.43883, "19205": "nan", "19210": "nan", "19215": "nan", "19220": "nan", "19225": "nan", "19230": "nan", "19235": "nan", "19240": "nan", "19245": "nan", "19250": "nan", "19255": "nan", "19260": "nan", "19265": "nan", "19270": "nan", "19275": "nan", "19280": "nan", "19285": "nan", "19290": "nan", "19295": "nan", "19300": 1.44455, "19305": "nan", "19310": "nan", "19315": "nan", "19320": "nan", "19325": "nan", "19330": "nan", "19335": "nan", "19340": "nan", "19345": "nan", "19350": "nan", "19355": "nan", "19360": "nan", "19365": "nan", "19370": "nan", "19375": "nan", "19380": "nan", "19385": "nan", "19390": "nan", "19395": "nan", "19400": 1.44433, "19405": "nan", "19410": "nan", "19415": "nan", "19420": "nan", "19425": "nan", "19430": "nan", "19435": "nan", "19440": "nan", "19445": "nan", "19450": "nan", "19455": "nan", "19460": "nan", "19465": "nan", "19470": "nan", "19475": "nan", "19480": "nan", "19485": "nan", "19490": "nan", "19495": "nan", "19500": 1.43798, "19505": "nan", "19510": "nan", "19515": "nan", "19520": "nan", "19525": "nan", "19530": "nan", "19535": "nan", "19540": "nan", "19545": "nan", "19550": "nan", "19555": "nan", "19560": "nan", "19565": "nan", "19570": "nan", "19575": "nan", "19580": "nan", "19585": "nan", "19590": "nan", "19595": "nan", "19600": 1.43784, "19605": "nan", "19610": "nan", "19615": "nan", "19620": "nan", "19625": "nan", "19630": "nan", "19635": "nan", "19640": "nan", "19645": "nan", "19650": "nan", "19655": "nan", "19660": "nan", "19665": "nan", "19670": "nan", "19675": "nan", "19680": "nan", "19685": "nan", "19690": "nan", "19695": "nan", "19700": 1.43625, "19705": "nan", "19710": "nan", "19715": "nan", "19720": "nan", "19725": "nan", "19730": "nan", "19735": "nan", "19740": "nan", "19745": "nan", "19750": "nan", "19755": "nan", "19760": "nan", "19765": "nan", "19770": "nan", "19775": "nan", "19780": "nan", "19785": "nan", "19790": "nan", "19795": "nan", "19800": 1.44034, "19805": "nan", "19810": "nan", "19815": "nan", "19820": "nan", "19825": "nan", "19830": "nan", "19835": "nan", "19840": "nan", "19845": "nan", "19850": "nan", "19855": "nan", "19860": "nan", "19865": "nan", "19870": "nan", "19875": "nan", "19880": "nan", "19885": "nan", "19890": "nan", "19895": "nan", "19900": 1.44013, "19905": "nan", "19910": "nan", "19915": "nan", "19920": "nan", "19925": "nan", "19930": "nan", "19935": "nan", "19940": "nan", "19945": "nan", "19950": "nan", "19955": "nan", "19960": "nan", "19965": "nan", "19970": "nan", "19975": "nan", "19980": "nan", "19985": "nan", "19990": "nan", "19995": "nan", "20000": 1.44868, "20005": "nan", "20010": "nan", "20015": "nan", "20020": "nan", "20025": "nan", "20030": "nan", "20035": "nan", "20040": "nan", "20045": "nan", "20050": "nan", "20055": "nan", "20060": "nan", "20065": "nan", "20070": "nan", "20075": "nan", "20080": "nan", "20085": "nan", "20090": "nan", "20095": "nan", "20100": 1.43922, "20105": "nan", "20110": "nan", "20115": "nan", "20120": "nan", "20125": "nan", "20130": "nan", "20135": "nan", "20140": "nan", "20145": "nan", "20150": "nan", "20155": "nan", "20160": "nan", "20165": "nan", "20170": "nan", "20175": "nan", "20180": "nan", "20185": "nan", "20190": "nan", "20195": "nan", "20200": 1.43538, "20205": "nan", "20210": "nan", "20215": "nan", "20220": "nan", "20225": "nan", "20230": "nan", "20235": "nan", "20240": "nan", "20245": "nan", "20250": "nan", "20255": "nan", "20260": "nan", "20265": "nan", "20270": "nan", "20275": "nan", "20280": "nan", "20285": "nan", "20290": "nan", "20295": "nan", "20300": 1.43757, "20305": "nan", "20310": "nan", "20315": "nan", "20320": "nan", "20325": "nan", "20330": "nan", "20335": "nan", "20340": "nan", "20345": "nan", "20350": "nan", "20355": "nan", "20360": "nan", "20365": "nan", "20370": "nan", "20375": "nan", "20380": "nan", "20385": "nan", "20390": "nan", "20395": "nan", "20400": 1.4357, "20405": "nan", "20410": "nan", "20415": "nan", "20420": "nan", "20425": "nan", "20430": "nan", "20435": "nan", "20440": "nan", "20445": "nan", "20450": "nan", "20455": "nan", "20460": "nan", "20465": "nan", "20470": "nan", "20475": "nan", "20480": "nan", "20485": "nan", "20490": "nan", "20495": "nan", "20500": 1.43142, "20505": "nan", "20510": "nan", "20515": "nan", "20520": "nan", "20525": "nan", "20530": "nan", "20535": "nan", "20540": "nan", "20545": "nan", "20550": "nan", "20555": "nan", "20560": "nan", "20565": "nan", "20570": "nan", "20575": "nan", "20580": "nan", "20585": "nan", "20590": "nan", "20595": "nan", "20600": 1.43666, "20605": "nan", "20610": "nan", "20615": "nan", "20620": "nan", "20625": "nan", "20630": "nan", "20635": "nan", "20640": "nan", "20645": "nan", "20650": "nan", "20655": "nan", "20660": "nan", "20665": "nan", "20670": "nan", "20675": "nan", "20680": "nan", "20685": "nan", "20690": "nan", "20695": "nan", "20700": 1.43886, "20705": "nan", "20710": "nan", "20715": "nan", "20720": "nan", "20725": "nan", "20730": "nan", "20735": "nan", "20740": "nan", "20745": "nan", "20750": "nan", "20755": "nan", "20760": "nan", "20765": "nan", "20770": "nan", "20775": "nan", "20780": "nan", "20785": "nan", "20790": "nan", "20795": "nan", "20800": 1.43876, "20805": "nan", "20810": "nan", "20815": "nan", "20820": "nan", "20825": "nan", "20830": "nan", "20835": "nan", "20840": "nan", "20845": "nan", "20850": "nan", "20855": "nan", "20860": "nan", "20865": "nan", "20870": "nan", "20875": "nan", "20880": "nan", "20885": "nan", "20890": "nan", "20895": "nan", "20900": 1.44019, "20905": "nan", "20910": "nan", "20915": "nan", "20920": "nan", "20925": "nan", "20930": "nan", "20935": "nan", "20940": "nan", "20945": "nan", "20950": "nan", "20955": "nan", "20960": "nan", "20965": "nan", "20970": "nan", "20975": "nan", "20980": "nan", "20985": "nan", "20990": "nan", "20995": "nan", "21000": 1.43829, "21005": "nan", "21010": "nan", "21015": "nan", "21020": "nan", "21025": "nan", "21030": "nan", "21035": "nan", "21040": "nan", "21045": "nan", "21050": "nan", "21055": "nan", "21060": "nan", "21065": "nan", "21070": "nan", "21075": "nan", "21080": "nan", "21085": "nan", "21090": "nan", "21095": "nan", "21100": 1.43916, "21105": "nan", "21110": "nan", "21115": "nan", "21120": "nan", "21125": "nan", "21130": "nan", "21135": "nan", "21140": "nan", "21145": "nan", "21150": "nan", "21155": "nan", "21160": "nan", "21165": "nan", "21170": "nan", "21175": "nan", "21180": "nan", "21185": "nan", "21190": "nan", "21195": "nan", "21200": 1.43727, "21205": "nan", "21210": "nan", "21215": "nan", "21220": "nan", "21225": "nan", "21230": "nan", "21235": "nan", "21240": "nan", "21245": "nan", "21250": "nan", "21255": "nan", "21260": "nan", "21265": "nan", "21270": "nan", "21275": "nan", "21280": "nan", "21285": "nan", "21290": "nan", "21295": "nan", "21300": 1.43665, "21305": "nan", "21310": "nan", "21315": "nan", "21320": "nan", "21325": "nan", "21330": "nan", "21335": "nan", "21340": "nan", "21345": "nan", "21350": "nan", "21355": "nan", "21360": "nan", "21365": "nan", "21370": "nan", "21375": "nan", "21380": "nan", "21385": "nan", "21390": "nan", "21395": "nan", "21400": 1.44158, "21405": "nan", "21410": "nan", "21415": "nan", "21420": "nan", "21425": "nan", "21430": "nan", "21435": "nan", "21440": "nan", "21445": "nan", "21450": "nan", "21455": "nan", "21460": "nan", "21465": "nan", "21470": "nan", "21475": "nan", "21480": "nan", "21485": "nan", "21490": "nan", "21495": "nan", "21500": 1.43758, "21505": "nan", "21510": "nan", "21515": "nan", "21520": "nan", "21525": "nan", "21530": "nan", "21535": "nan", "21540": "nan", "21545": "nan", "21550": "nan", "21555": "nan", "21560": "nan", "21565": "nan", "21570": "nan", "21575": "nan", "21580": "nan", "21585": "nan", "21590": "nan", "21595": "nan", "21600": 1.43506, "21605": "nan", "21610": "nan", "21615": "nan", "21620": "nan", "21625": "nan", "21630": "nan", "21635": "nan", "21640": "nan", "21645": "nan", "21650": "nan", "21655": "nan", "21660": "nan", "21665": "nan", "21670": "nan", "21675": "nan", "21680": "nan", "21685": "nan", "21690": "nan", "21695": "nan", "21700": 1.43238, "21705": "nan", "21710": "nan", "21715": "nan", "21720": "nan", "21725": "nan", "21730": "nan", "21735": "nan", "21740": "nan", "21745": "nan", "21750": "nan", "21755": "nan", "21760": "nan", "21765": "nan", "21770": "nan", "21775": "nan", "21780": "nan", "21785": "nan", "21790": "nan", "21795": "nan", "21800": 1.43709, "21805": "nan", "21810": "nan", "21815": "nan", "21820": "nan", "21825": "nan", "21830": "nan", "21835": "nan", "21840": "nan", "21845": "nan", "21850": "nan", "21855": "nan", "21860": "nan", "21865": "nan", "21870": "nan", "21875": "nan", "21880": "nan", "21885": "nan", "21890": "nan", "21895": "nan", "21900": 1.4365, "21905": "nan", "21910": "nan", "21915": "nan", "21920": "nan", "21925": "nan", "21930": "nan", "21935": "nan", "21940": "nan", "21945": "nan", "21950": "nan", "21955": "nan", "21960": "nan", "21965": "nan", "21970": "nan", "21975": "nan", "21980": "nan", "21985": "nan", "21990": "nan", "21995": "nan", "22000": 1.4378, "22005": "nan", "22010": "nan", "22015": "nan", "22020": "nan", "22025": "nan", "22030": "nan", "22035": "nan", "22040": "nan", "22045": "nan", "22050": "nan", "22055": "nan", "22060": "nan", "22065": "nan", "22070": "nan", "22075": "nan", "22080": "nan", "22085": "nan", "22090": "nan", "22095": "nan", "22100": 1.43426, "22105": "nan", "22110": "nan", "22115": "nan", "22120": "nan", "22125": "nan", "22130": "nan", "22135": "nan", "22140": "nan", "22145": "nan", "22150": "nan", "22155": "nan", "22160": "nan", "22165": "nan", "22170": "nan", "22175": "nan", "22180": "nan", "22185": "nan", "22190": "nan", "22195": "nan", "22200": 1.4369, "22205": "nan", "22210": "nan", "22215": "nan", "22220": "nan", "22225": "nan", "22230": "nan", "22235": "nan", "22240": "nan", "22245": "nan", "22250": "nan", "22255": "nan", "22260": "nan", "22265": "nan", "22270": "nan", "22275": "nan", "22280": "nan", "22285": "nan", "22290": "nan", "22295": "nan", "22300": 1.44303, "22305": "nan", "22310": "nan", "22315": "nan", "22320": "nan", "22325": "nan", "22330": "nan", "22335": "nan", "22340": "nan", "22345": "nan", "22350": "nan", "22355": "nan", "22360": "nan", "22365": "nan", "22370": "nan", "22375": "nan", "22380": "nan", "22385": "nan", "22390": "nan", "22395": "nan", "22400": 1.43455, "22405": "nan", "22410": "nan", "22415": "nan", "22420": "nan", "22425": "nan", "22430": "nan", "22435": "nan", "22440": "nan", "22445": "nan", "22450": "nan", "22455": "nan", "22460": "nan", "22465": "nan", "22470": "nan", "22475": "nan", "22480": "nan", "22485": "nan", "22490": "nan", "22495": "nan", "22500": 1.44081, "22505": "nan", "22510": "nan", "22515": "nan", "22520": "nan", "22525": "nan", "22530": "nan", "22535": "nan", "22540": "nan", "22545": "nan", "22550": "nan", "22555": "nan", "22560": "nan", "22565": "nan", "22570": "nan", "22575": "nan", "22580": "nan", "22585": "nan", "22590": "nan", "22595": "nan", "22600": 1.43518, "22605": "nan", "22610": "nan", "22615": "nan", "22620": "nan", "22625": "nan", "22630": "nan", "22635": "nan", "22640": "nan", "22645": "nan", "22650": "nan", "22655": "nan", "22660": "nan", "22665": "nan", "22670": "nan", "22675": "nan", "22680": "nan", "22685": "nan", "22690": "nan", "22695": "nan", "22700": 1.43453, "22705": "nan", "22710": "nan", "22715": "nan", "22720": "nan", "22725": "nan", "22730": "nan", "22735": "nan", "22740": "nan", "22745": "nan", "22750": "nan", "22755": "nan", "22760": "nan", "22765": "nan", "22770": "nan", "22775": "nan", "22780": "nan", "22785": "nan", "22790": "nan", "22795": "nan", "22800": 1.43519, "22805": "nan", "22810": "nan", "22815": "nan", "22820": "nan", "22825": "nan", "22830": "nan", "22835": "nan", "22840": "nan", "22845": "nan", "22850": "nan", "22855": "nan", "22860": "nan", "22865": "nan", "22870": "nan", "22875": "nan", "22880": "nan", "22885": "nan", "22890": "nan", "22895": "nan", "22900": 1.43339, "22905": "nan", "22910": "nan", "22915": "nan", "22920": "nan", "22925": "nan", "22930": "nan", "22935": "nan", "22940": "nan", "22945": "nan", "22950": "nan", "22955": "nan", "22960": "nan", "22965": "nan", "22970": "nan", "22975": "nan", "22980": "nan", "22985": "nan", "22990": "nan", "22995": "nan", "23000": 1.43724, "23005": "nan", "23010": "nan", "23015": "nan", "23020": "nan", "23025": "nan", "23030": "nan", "23035": "nan", "23040": "nan", "23045": "nan", "23050": "nan", "23055": "nan", "23060": "nan", "23065": "nan", "23070": "nan", "23075": "nan", "23080": "nan", "23085": "nan", "23090": "nan", "23095": "nan", "23100": 1.43855, "23105": "nan", "23110": "nan", "23115": "nan", "23120": "nan", "23125": "nan", "23130": "nan", "23135": "nan", "23140": "nan", "23145": "nan", "23150": "nan", "23155": "nan", "23160": "nan", "23165": "nan", "23170": "nan", "23175": "nan", "23180": "nan", "23185": "nan", "23190": "nan", "23195": "nan", "23200": 1.43436, "23205": "nan", "23210": "nan", "23215": "nan", "23220": "nan", "23225": "nan", "23230": "nan", "23235": "nan", "23240": "nan", "23245": "nan", "23250": "nan", "23255": "nan", "23260": "nan", "23265": "nan", "23270": "nan", "23275": "nan", "23280": "nan", "23285": "nan", "23290": "nan", "23295": "nan", "23300": 1.43246, "23305": "nan", "23310": "nan", "23315": "nan", "23320": "nan", "23325": "nan", "23330": "nan", "23335": "nan", "23340": "nan", "23345": "nan", "23350": "nan", "23355": "nan", "23360": "nan", "23365": "nan", "23370": "nan", "23375": "nan", "23380": "nan", "23385": "nan", "23390": "nan", "23395": "nan", "23400": 1.43638, "23405": "nan", "23410": "nan", "23415": "nan", "23420": "nan", "23425": "nan", "23430": "nan", "23435": "nan", "23440": "nan", "23445": "nan", "23450": "nan", "23455": "nan", "23460": "nan", "23465": "nan", "23470": "nan", "23475": "nan", "23480": "nan", "23485": "nan", "23490": "nan", "23495": "nan", "23500": 1.43583, "23505": "nan", "23510": "nan", "23515": "nan", "23520": "nan", "23525": "nan", "23530": "nan", "23535": "nan", "23540": "nan", "23545": "nan", "23550": "nan", "23555": "nan", "23560": "nan", "23565": "nan", "23570": "nan", "23575": "nan", "23580": "nan", "23585": "nan", "23590": "nan", "23595": "nan", "23600": 1.43679, "23605": "nan", "23610": "nan", "23615": "nan", "23620": "nan", "23625": "nan", "23630": "nan", "23635": "nan", "23640": "nan", "23645": "nan", "23650": "nan", "23655": "nan", "23660": "nan", "23665": "nan", "23670": "nan", "23675": "nan", "23680": "nan", "23685": "nan", "23690": "nan", "23695": "nan", "23700": 1.44325, "23705": "nan", "23710": "nan", "23715": "nan", "23720": "nan", "23725": "nan", "23730": "nan", "23735": "nan", "23740": "nan", "23745": "nan", "23750": "nan", "23755": "nan", "23760": "nan", "23765": "nan", "23770": "nan", "23775": "nan", "23780": "nan", "23785": "nan", "23790": "nan", "23795": "nan", "23800": 1.4359, "23805": "nan", "23810": "nan", "23815": "nan", "23820": "nan", "23825": "nan", "23830": "nan", "23835": "nan", "23840": "nan", "23845": "nan", "23850": "nan", "23855": "nan", "23860": "nan", "23865": "nan", "23870": "nan", "23875": "nan", "23880": "nan", "23885": "nan", "23890": "nan", "23895": "nan", "23900": 1.4373, "23905": "nan", "23910": "nan", "23915": "nan", "23920": "nan", "23925": "nan", "23930": "nan", "23935": "nan", "23940": "nan", "23945": "nan", "23950": "nan", "23955": "nan", "23960": "nan", "23965": "nan", "23970": "nan", "23975": "nan", "23980": "nan", "23985": "nan", "23990": "nan", "23995": "nan", "24000": 1.44236, "24005": "nan", "24010": "nan", "24015": "nan", "24020": "nan", "24025": "nan", "24030": "nan", "24035": "nan", "24040": "nan", "24045": "nan", "24050": "nan", "24055": "nan", "24060": "nan", "24065": "nan", "24070": "nan", "24075": "nan", "24080": "nan", "24085": "nan", "24090": "nan", "24095": "nan", "24100": 1.43322, "24105": "nan", "24110": "nan", "24115": "nan", "24120": "nan", "24125": "nan", "24130": "nan", "24135": "nan", "24140": "nan", "24145": "nan", "24150": "nan", "24155": "nan", "24160": "nan", "24165": "nan", "24170": "nan", "24175": "nan", "24180": "nan", "24185": "nan", "24190": "nan", "24195": "nan", "24200": 1.43834, "24205": "nan", "24210": "nan", "24215": "nan", "24220": "nan", "24225": "nan", "24230": "nan", "24235": "nan", "24240": "nan", "24245": "nan", "24250": "nan", "24255": "nan", "24260": "nan", "24265": "nan", "24270": "nan", "24275": "nan", "24280": "nan", "24285": "nan", "24290": "nan", "24295": "nan", "24300": 1.43634, "24305": "nan", "24310": "nan", "24315": "nan", "24320": "nan", "24325": "nan", "24330": "nan", "24335": "nan", "24340": "nan", "24345": "nan", "24350": "nan", "24355": "nan", "24360": "nan", "24365": "nan", "24370": "nan", "24375": "nan", "24380": "nan", "24385": "nan", "24390": "nan", "24395": "nan", "24400": 1.43662, "24405": "nan", "24410": "nan", "24415": "nan", "24420": "nan", "24425": "nan", "24430": "nan", "24435": "nan", "24440": "nan", "24445": "nan", "24450": "nan", "24455": "nan", "24460": "nan", "24465": "nan", "24470": "nan", "24475": "nan", "24480": "nan", "24485": "nan", "24490": "nan", "24495": "nan", "24500": 1.43203, "24505": "nan", "24510": "nan", "24515": "nan", "24520": "nan", "24525": "nan", "24530": "nan", "24535": "nan", "24540": "nan", "24545": "nan", "24550": "nan", "24555": "nan", "24560": "nan", "24565": "nan", "24570": "nan", "24575": "nan", "24580": "nan", "24585": "nan", "24590": "nan", "24595": "nan", "24600": 1.43604, "24605": "nan", "24610": "nan", "24615": "nan", "24620": "nan", "24625": "nan", "24630": "nan", "24635": "nan", "24640": "nan", "24645": "nan", "24650": "nan", "24655": "nan", "24660": "nan", "24665": "nan", "24670": "nan", "24675": "nan", "24680": "nan", "24685": "nan", "24690": "nan", "24695": "nan", "24700": 1.43931, "24705": "nan", "24710": "nan", "24715": "nan", "24720": "nan", "24725": "nan", "24730": "nan", "24735": "nan", "24740": "nan", "24745": "nan", "24750": "nan", "24755": "nan", "24760": "nan", "24765": "nan", "24770": "nan", "24775": "nan", "24780": "nan", "24785": "nan", "24790": "nan", "24795": "nan", "24800": 1.43302, "24805": "nan", "24810": "nan", "24815": "nan", "24820": "nan", "24825": "nan", "24830": "nan", "24835": "nan", "24840": "nan", "24845": "nan", "24850": "nan", "24855": "nan", "24860": "nan", "24865": "nan", "24870": "nan", "24875": "nan", "24880": "nan", "24885": "nan", "24890": "nan", "24895": "nan", "24900": 1.43366, "24905": "nan", "24910": "nan", "24915": "nan", "24920": "nan", "24925": "nan", "24930": "nan", "24935": "nan", "24940": "nan", "24945": "nan", "24950": "nan", "24955": "nan", "24960": "nan", "24965": "nan", "24970": "nan", "24975": "nan", "24980": "nan", "24985": "nan", "24990": "nan", "24995": "nan", "25000": 1.43399, "25005": "nan", "25010": "nan", "25015": "nan", "25020": "nan", "25025": "nan", "25030": "nan", "25035": "nan", "25040": "nan", "25045": "nan", "25050": "nan", "25055": "nan", "25060": "nan", "25065": "nan", "25070": "nan", "25075": "nan", "25080": "nan", "25085": "nan", "25090": "nan", "25095": "nan", "25100": 1.43367, "25105": "nan", "25110": "nan", "25115": "nan", "25120": "nan", "25125": "nan", "25130": "nan", "25135": "nan", "25140": "nan", "25145": "nan", "25150": "nan", "25155": "nan", "25160": "nan", "25165": "nan", "25170": "nan", "25175": "nan", "25180": "nan", "25185": "nan", "25190": "nan", "25195": "nan", "25200": 1.43267, "25205": "nan", "25210": "nan", "25215": "nan", "25220": "nan", "25225": "nan", "25230": "nan", "25235": "nan", "25240": "nan", "25245": "nan", "25250": "nan", "25255": "nan", "25260": "nan", "25265": "nan", "25270": "nan", "25275": "nan", "25280": "nan", "25285": "nan", "25290": "nan", "25295": "nan", "25300": 1.43747, "25305": "nan", "25310": "nan", "25315": "nan", "25320": "nan", "25325": "nan", "25330": "nan", "25335": "nan", "25340": "nan", "25345": "nan", "25350": "nan", "25355": "nan", "25360": "nan", "25365": "nan", "25370": "nan", "25375": "nan", "25380": "nan", "25385": "nan", "25390": "nan", "25395": "nan", "25400": 1.43645, "25405": "nan", "25410": "nan", "25415": "nan", "25420": "nan", "25425": "nan", "25430": "nan", "25435": "nan", "25440": "nan", "25445": "nan", "25450": "nan", "25455": "nan", "25460": "nan", "25465": "nan", "25470": "nan", "25475": "nan", "25480": "nan", "25485": "nan", "25490": "nan", "25495": "nan", "25500": 1.43728, "25505": "nan", "25510": "nan", "25515": "nan", "25520": "nan", "25525": "nan", "25530": "nan", "25535": "nan", "25540": "nan", "25545": "nan", "25550": "nan", "25555": "nan", "25560": "nan", "25565": "nan", "25570": "nan", "25575": "nan", "25580": "nan", "25585": "nan", "25590": "nan", "25595": "nan", "25600": 1.43702, "25605": "nan", "25610": "nan", "25615": "nan", "25620": "nan", "25625": "nan", "25630": "nan", "25635": "nan", "25640": "nan", "25645": "nan", "25650": "nan", "25655": "nan", "25660": "nan", "25665": "nan", "25670": "nan", "25675": "nan", "25680": "nan", "25685": "nan", "25690": "nan", "25695": "nan", "25700": 1.43856, "25705": "nan", "25710": "nan", "25715": "nan", "25720": "nan", "25725": "nan", "25730": "nan", "25735": "nan", "25740": "nan", "25745": "nan", "25750": "nan", "25755": "nan", "25760": "nan", "25765": "nan", "25770": "nan", "25775": "nan", "25780": "nan", "25785": "nan", "25790": "nan", "25795": "nan", "25800": 1.43196, "25805": "nan", "25810": "nan", "25815": "nan", "25820": "nan", "25825": "nan", "25830": "nan", "25835": "nan", "25840": "nan", "25845": "nan", "25850": "nan", "25855": "nan", "25860": "nan", "25865": "nan", "25870": "nan", "25875": "nan", "25880": "nan", "25885": "nan", "25890": "nan", "25895": "nan", "25900": 1.43211, "25905": "nan", "25910": "nan", "25915": "nan", "25920": "nan", "25925": "nan", "25930": "nan", "25935": "nan", "25940": "nan", "25945": "nan", "25950": "nan", "25955": "nan", "25960": "nan", "25965": "nan", "25970": "nan", "25975": "nan", "25980": "nan", "25985": "nan", "25990": "nan", "25995": "nan", "26000": 1.44455, "26005": "nan", "26010": "nan", "26015": "nan", "26020": "nan", "26025": "nan", "26030": "nan", "26035": "nan", "26040": "nan", "26045": "nan", "26050": "nan", "26055": "nan", "26060": "nan", "26065": "nan", "26070": "nan", "26075": "nan", "26080": "nan", "26085": "nan", "26090": "nan", "26095": "nan", "26100": 1.43688, "26105": "nan", "26110": "nan", "26115": "nan", "26120": "nan", "26125": "nan", "26130": "nan", "26135": "nan", "26140": "nan", "26145": "nan", "26150": "nan", "26155": "nan", "26160": "nan", "26165": "nan", "26170": "nan", "26175": "nan", "26180": "nan", "26185": "nan", "26190": "nan", "26195": "nan", "26200": 1.43908, "26205": "nan", "26210": "nan", "26215": "nan", "26220": "nan", "26225": "nan", "26230": "nan", "26235": "nan", "26240": "nan", "26245": "nan", "26250": "nan", "26255": "nan", "26260": "nan", "26265": "nan", "26270": "nan", "26275": "nan", "26280": "nan", "26285": "nan", "26290": "nan", "26295": "nan", "26300": 1.43524, "26305": "nan", "26310": "nan", "26315": "nan", "26320": "nan", "26325": "nan", "26330": "nan", "26335": "nan", "26340": "nan", "26345": "nan", "26350": "nan", "26355": "nan", "26360": "nan", "26365": "nan", "26370": "nan", "26375": "nan", "26380": "nan", "26385": "nan", "26390": "nan", "26395": "nan", "26400": 1.4328, "26405": "nan", "26410": "nan", "26415": "nan", "26420": "nan", "26425": "nan", "26430": "nan", "26435": "nan", "26440": "nan", "26445": "nan", "26450": "nan", "26455": "nan", "26460": "nan", "26465": "nan", "26470": "nan", "26475": "nan", "26480": "nan", "26485": "nan", "26490": "nan", "26495": "nan", "26500": 1.43281, "26505": "nan", "26510": "nan", "26515": "nan", "26520": "nan", "26525": "nan", "26530": "nan", "26535": "nan", "26540": "nan", "26545": "nan", "26550": "nan", "26555": "nan", "26560": "nan", "26565": "nan", "26570": "nan", "26575": "nan", "26580": "nan", "26585": "nan", "26590": "nan", "26595": "nan", "26600": 1.43171, "26605": "nan", "26610": "nan", "26615": "nan", "26620": "nan", "26625": "nan", "26630": "nan", "26635": "nan", "26640": "nan", "26645": "nan", "26650": "nan", "26655": "nan", "26660": "nan", "26665": "nan", "26670": "nan", "26675": "nan", "26680": "nan", "26685": "nan", "26690": "nan", "26695": "nan", "26700": 1.59155, "26705": "nan", "26710": "nan", "26715": "nan", "26720": "nan", "26725": "nan", "26730": "nan", "26735": "nan", "26740": "nan", "26745": "nan", "26750": "nan", "26755": "nan", "26760": "nan", "26765": "nan", "26770": "nan", "26775": "nan", "26780": "nan", "26785": "nan", "26790": "nan", "26795": "nan", "26800": 1.4433, "26805": "nan", "26810": "nan", "26815": "nan", "26820": "nan", "26825": "nan", "26830": "nan", "26835": "nan", "26840": "nan", "26845": "nan", "26850": "nan", "26855": "nan", "26860": "nan", "26865": "nan", "26870": "nan", "26875": "nan", "26880": "nan", "26885": "nan", "26890": "nan", "26895": "nan", "26900": 1.43346, "26905": "nan", "26910": "nan", "26915": "nan", "26920": "nan", "26925": "nan", "26930": "nan", "26935": "nan", "26940": "nan", "26945": "nan", "26950": "nan", "26955": "nan", "26960": "nan", "26965": "nan", "26970": "nan", "26975": "nan", "26980": "nan", "26985": "nan", "26990": "nan", "26995": "nan", "27000": 1.63506, "27005": "nan", "27010": "nan", "27015": "nan", "27020": "nan", "27025": "nan", "27030": "nan", "27035": "nan", "27040": "nan", "27045": "nan", "27050": "nan", "27055": "nan", "27060": "nan", "27065": "nan", "27070": "nan", "27075": "nan", "27080": "nan", "27085": "nan", "27090": "nan", "27095": "nan", "27100": 1.43356, "27105": "nan", "27110": "nan", "27115": "nan", "27120": "nan", "27125": "nan", "27130": "nan", "27135": "nan", "27140": "nan", "27145": "nan", "27150": "nan", "27155": "nan", "27160": "nan", "27165": "nan", "27170": "nan", "27175": "nan", "27180": "nan", "27185": "nan", "27190": "nan", "27195": "nan", "27200": 1.42807, "27205": "nan", "27210": "nan", "27215": "nan", "27220": "nan", "27225": "nan", "27230": "nan", "27235": "nan", "27240": "nan", "27245": "nan", "27250": "nan", "27255": "nan", "27260": "nan", "27265": "nan", "27270": "nan", "27275": "nan", "27280": "nan", "27285": "nan", "27290": "nan", "27295": "nan", "27300": 1.43455, "27305": "nan", "27310": "nan", "27315": "nan", "27320": "nan", "27325": "nan", "27330": "nan", "27335": "nan", "27340": "nan", "27345": "nan", "27350": "nan", "27355": "nan", "27360": "nan", "27365": "nan", "27370": "nan", "27375": "nan", "27380": "nan", "27385": "nan", "27390": "nan", "27395": "nan", "27400": 1.4318, "27405": "nan", "27410": "nan", "27415": "nan", "27420": "nan", "27425": "nan", "27430": "nan", "27435": "nan", "27440": "nan", "27445": "nan", "27450": "nan", "27455": "nan", "27460": "nan", "27465": "nan", "27470": "nan", "27475": "nan", "27480": "nan", "27485": "nan", "27490": "nan", "27495": "nan", "27500": 1.43257, "27505": "nan", "27510": "nan", "27515": "nan", "27520": "nan", "27525": "nan", "27530": "nan", "27535": "nan", "27540": "nan", "27545": "nan", "27550": "nan", "27555": "nan", "27560": "nan", "27565": "nan", "27570": "nan", "27575": "nan", "27580": "nan", "27585": "nan", "27590": "nan", "27595": "nan", "27600": 1.43877, "27605": "nan", "27610": "nan", "27615": "nan", "27620": "nan", "27625": "nan", "27630": "nan", "27635": "nan", "27640": "nan", "27645": "nan", "27650": "nan", "27655": "nan", "27660": "nan", "27665": "nan", "27670": "nan", "27675": "nan", "27680": "nan", "27685": "nan", "27690": "nan", "27695": "nan", "27700": 1.42487, "27705": "nan", "27710": "nan", "27715": "nan", "27720": "nan", "27725": "nan", "27730": "nan", "27735": "nan", "27740": "nan", "27745": "nan", "27750": "nan", "27755": "nan", "27760": "nan", "27765": "nan", "27770": "nan", "27775": "nan", "27780": "nan", "27785": "nan", "27790": "nan", "27795": "nan", "27800": 1.42402, "27805": "nan", "27810": "nan", "27815": "nan", "27820": "nan", "27825": "nan", "27830": "nan", "27835": "nan", "27840": "nan", "27845": "nan", "27850": "nan", "27855": "nan", "27860": "nan", "27865": "nan", "27870": "nan", "27875": "nan", "27880": "nan", "27885": "nan", "27890": "nan", "27895": "nan", "27900": 1.43195, "27905": "nan", "27910": "nan", "27915": "nan", "27920": "nan", "27925": "nan", "27930": "nan", "27935": "nan", "27940": "nan", "27945": "nan", "27950": "nan", "27955": "nan", "27960": "nan", "27965": "nan", "27970": "nan", "27975": "nan", "27980": "nan", "27985": "nan", "27990": "nan", "27995": "nan", "28000": 1.42734, "28005": "nan", "28010": "nan", "28015": "nan", "28020": "nan", "28025": "nan", "28030": "nan", "28035": "nan", "28040": "nan", "28045": "nan", "28050": "nan", "28055": "nan", "28060": "nan", "28065": "nan", "28070": "nan", "28075": "nan", "28080": "nan", "28085": "nan", "28090": "nan", "28095": "nan", "28100": 1.45989, "28105": "nan", "28110": "nan", "28115": "nan", "28120": "nan", "28125": "nan", "28130": "nan", "28135": "nan", "28140": "nan", "28145": "nan", "28150": "nan", "28155": "nan", "28160": "nan", "28165": "nan", "28170": "nan", "28175": "nan", "28180": "nan", "28185": "nan", "28190": "nan", "28195": "nan", "28200": 1.42994, "28205": "nan", "28210": "nan", "28215": "nan", "28220": "nan", "28225": "nan", "28230": "nan", "28235": "nan", "28240": "nan", "28245": "nan", "28250": "nan", "28255": "nan", "28260": "nan", "28265": "nan", "28270": "nan", "28275": "nan", "28280": "nan", "28285": "nan", "28290": "nan", "28295": "nan", "28300": 1.434, "28305": "nan", "28310": "nan", "28315": "nan", "28320": "nan", "28325": "nan", "28330": "nan", "28335": "nan", "28340": "nan", "28345": "nan", "28350": "nan", "28355": "nan", "28360": "nan", "28365": "nan", "28370": "nan", "28375": "nan", "28380": "nan", "28385": "nan", "28390": "nan", "28395": "nan", "28400": 1.44814, "28405": "nan", "28410": "nan", "28415": "nan", "28420": "nan", "28425": "nan", "28430": "nan", "28435": "nan", "28440": "nan", "28445": "nan", "28450": "nan", "28455": "nan", "28460": "nan", "28465": "nan", "28470": "nan", "28475": "nan", "28480": "nan", "28485": "nan", "28490": "nan", "28495": "nan", "28500": 1.42627, "28505": "nan", "28510": "nan", "28515": "nan", "28520": "nan", "28525": "nan", "28530": "nan", "28535": "nan", "28540": "nan", "28545": "nan", "28550": "nan", "28555": "nan", "28560": "nan", "28565": "nan", "28570": "nan", "28575": "nan", "28580": "nan", "28585": "nan", "28590": "nan", "28595": "nan", "28600": 1.42691, "28605": "nan", "28610": "nan", "28615": "nan", "28620": "nan", "28625": "nan", "28630": "nan", "28635": "nan", "28640": "nan", "28645": "nan", "28650": "nan", "28655": "nan", "28660": "nan", "28665": "nan", "28670": "nan", "28675": "nan", "28680": "nan", "28685": "nan", "28690": "nan", "28695": "nan", "28700": 1.42987, "28705": "nan", "28710": "nan", "28715": "nan", "28720": "nan", "28725": "nan", "28730": "nan", "28735": "nan", "28740": "nan", "28745": "nan", "28750": "nan", "28755": "nan", "28760": "nan", "28765": "nan", "28770": "nan", "28775": "nan", "28780": "nan", "28785": "nan", "28790": "nan", "28795": "nan", "28800": 1.42896, "28805": "nan", "28810": "nan", "28815": "nan", "28820": "nan", "28825": "nan", "28830": "nan", "28835": "nan", "28840": "nan", "28845": "nan", "28850": "nan", "28855": "nan", "28860": "nan", "28865": "nan", "28870": "nan", "28875": "nan", "28880": "nan", "28885": "nan", "28890": "nan", "28895": "nan", "28900": 1.42754, "28905": "nan", "28910": "nan", "28915": "nan", "28920": "nan", "28925": "nan", "28930": "nan", "28935": "nan", "28940": "nan", "28945": "nan", "28950": "nan", "28955": "nan", "28960": "nan", "28965": "nan", "28970": "nan", "28975": "nan", "28980": "nan", "28985": "nan", "28990": "nan", "28995": "nan", "29000": 1.43436, "29005": "nan", "29010": "nan", "29015": "nan", "29020": "nan", "29025": "nan", "29030": "nan", "29035": "nan", "29040": "nan", "29045": "nan", "29050": "nan", "29055": "nan", "29060": "nan", "29065": "nan", "29070": "nan", "29075": "nan", "29080": "nan", "29085": "nan", "29090": "nan", "29095": "nan", "29100": 1.42717, "29105": "nan", "29110": "nan", "29115": "nan", "29120": "nan", "29125": "nan", "29130": "nan", "29135": "nan", "29140": "nan", "29145": "nan", "29150": "nan", "29155": "nan", "29160": "nan", "29165": "nan", "29170": "nan", "29175": "nan", "29180": "nan", "29185": "nan", "29190": "nan", "29195": "nan", "29200": 1.42538, "29205": "nan", "29210": "nan", "29215": "nan", "29220": "nan", "29225": "nan", "29230": "nan", "29235": "nan", "29240": "nan", "29245": "nan", "29250": "nan", "29255": "nan", "29260": "nan", "29265": "nan", "29270": "nan", "29275": "nan", "29280": "nan", "29285": "nan", "29290": "nan", "29295": "nan", "29300": 1.42591, "29305": "nan", "29310": "nan", "29315": "nan", "29320": "nan", "29325": "nan", "29330": "nan", "29335": "nan", "29340": "nan", "29345": "nan", "29350": "nan", "29355": "nan", "29360": "nan", "29365": "nan", "29370": "nan", "29375": "nan", "29380": "nan", "29385": "nan", "29390": "nan", "29395": "nan", "29400": 1.4282, "29405": "nan", "29410": "nan", "29415": "nan", "29420": "nan", "29425": "nan", "29430": "nan", "29435": "nan", "29440": "nan", "29445": "nan", "29450": "nan", "29455": "nan", "29460": "nan", "29465": "nan", "29470": "nan", "29475": "nan", "29480": "nan", "29485": "nan", "29490": "nan", "29495": "nan", "29500": 1.42552, "29505": "nan", "29510": "nan", "29515": "nan", "29520": "nan", "29525": "nan", "29530": "nan", "29535": "nan", "29540": "nan", "29545": "nan", "29550": "nan", "29555": "nan", "29560": "nan", "29565": "nan", "29570": "nan", "29575": "nan", "29580": "nan", "29585": "nan", "29590": "nan", "29595": "nan", "29600": 1.42836, "29605": "nan", "29610": "nan", "29615": "nan", "29620": "nan", "29625": "nan", "29630": "nan", "29635": "nan", "29640": "nan", "29645": "nan", "29650": "nan", "29655": "nan", "29660": "nan", "29665": "nan", "29670": "nan", "29675": "nan", "29680": "nan", "29685": "nan", "29690": "nan", "29695": "nan", "29700": 1.42797, "29705": "nan", "29710": "nan", "29715": "nan", "29720": "nan", "29725": "nan", "29730": "nan", "29735": "nan", "29740": "nan", "29745": "nan", "29750": "nan", "29755": "nan", "29760": "nan", "29765": "nan", "29770": "nan", "29775": "nan", "29780": "nan", "29785": "nan", "29790": "nan", "29795": "nan", "29800": 1.42395, "29805": "nan", "29810": "nan", "29815": "nan", "29820": "nan", "29825": "nan", "29830": "nan", "29835": "nan", "29840": "nan", "29845": "nan", "29850": "nan", "29855": "nan", "29860": "nan", "29865": "nan", "29870": "nan", "29875": "nan", "29880": "nan", "29885": "nan", "29890": "nan", "29895": "nan", "29900": 1.4291, "29905": "nan", "29910": "nan", "29915": "nan", "29920": "nan", "29925": "nan", "29930": "nan", "29935": "nan", "29940": "nan", "29945": "nan", "29950": "nan", "29955": "nan", "29960": "nan", "29965": "nan", "29970": "nan", "29975": "nan", "29980": "nan", "29985": "nan", "29990": "nan", "29995": "nan", "30000": 1.4284, "30005": "nan", "30010": "nan", "30015": "nan", "30020": "nan", "30025": "nan", "30030": "nan", "30035": "nan", "30040": "nan", "30045": "nan", "30050": "nan", "30055": "nan", "30060": "nan", "30065": "nan", "30070": "nan", "30075": "nan", "30080": "nan", "30085": "nan", "30090": "nan", "30095": "nan", "30100": 1.42857, "30105": "nan", "30110": "nan", "30115": "nan", "30120": "nan", "30125": "nan", "30130": "nan", "30135": "nan", "30140": "nan", "30145": "nan", "30150": "nan", "30155": "nan", "30160": "nan", "30165": "nan", "30170": "nan", "30175": "nan", "30180": "nan", "30185": "nan", "30190": "nan", "30195": "nan", "30200": 1.43096, "30205": "nan", "30210": "nan", "30215": "nan", "30220": "nan", "30225": "nan", "30230": "nan", "30235": "nan", "30240": "nan", "30245": "nan", "30250": "nan", "30255": "nan", "30260": "nan", "30265": "nan", "30270": "nan", "30275": "nan", "30280": "nan", "30285": "nan", "30290": "nan", "30295": "nan", "30300": 1.43459, "30305": "nan", "30310": "nan", "30315": "nan", "30320": "nan", "30325": "nan", "30330": "nan", "30335": "nan", "30340": "nan", "30345": "nan", "30350": "nan", "30355": "nan", "30360": "nan", "30365": "nan", "30370": "nan", "30375": "nan", "30380": "nan", "30385": "nan", "30390": "nan", "30395": "nan", "30400": 1.42596, "30405": "nan", "30410": "nan", "30415": "nan", "30420": "nan", "30425": "nan", "30430": "nan", "30435": "nan", "30440": "nan", "30445": "nan", "30450": "nan", "30455": "nan", "30460": "nan", "30465": "nan", "30470": "nan", "30475": "nan", "30480": "nan", "30485": "nan", "30490": "nan", "30495": "nan", "30500": 1.42753, "30505": "nan", "30510": "nan", "30515": "nan", "30520": "nan", "30525": "nan", "30530": "nan", "30535": "nan", "30540": "nan", "30545": "nan", "30550": "nan", "30555": "nan", "30560": "nan", "30565": "nan", "30570": "nan", "30575": "nan", "30580": "nan", "30585": "nan", "30590": "nan", "30595": "nan", "30600": 1.426, "30605": "nan", "30610": "nan", "30615": "nan", "30620": "nan", "30625": "nan", "30630": "nan", "30635": "nan", "30640": "nan", "30645": "nan", "30650": "nan", "30655": "nan", "30660": "nan", "30665": "nan", "30670": "nan", "30675": "nan", "30680": "nan", "30685": "nan", "30690": "nan", "30695": "nan", "30700": 1.4273, "30705": "nan", "30710": "nan", "30715": "nan", "30720": "nan", "30725": "nan", "30730": "nan", "30735": "nan", "30740": "nan", "30745": "nan", "30750": "nan", "30755": "nan", "30760": "nan", "30765": "nan", "30770": "nan", "30775": "nan", "30780": "nan", "30785": "nan", "30790": "nan", "30795": "nan", "30800": 1.42609, "30805": "nan", "30810": "nan", "30815": "nan", "30820": "nan", "30825": "nan", "30830": "nan", "30835": "nan", "30840": "nan", "30845": "nan", "30850": "nan", "30855": "nan", "30860": "nan", "30865": "nan", "30870": "nan", "30875": "nan", "30880": "nan", "30885": "nan", "30890": "nan", "30895": "nan", "30900": 1.42364, "30905": "nan", "30910": "nan", "30915": "nan", "30920": "nan", "30925": "nan", "30930": "nan", "30935": "nan", "30940": "nan", "30945": "nan", "30950": "nan", "30955": "nan", "30960": "nan", "30965": "nan", "30970": "nan", "30975": "nan", "30980": "nan", "30985": "nan", "30990": "nan", "30995": "nan", "31000": 1.42505, "31005": "nan", "31010": "nan", "31015": "nan", "31020": "nan", "31025": "nan", "31030": "nan", "31035": "nan", "31040": "nan", "31045": "nan", "31050": "nan", "31055": "nan", "31060": "nan", "31065": "nan", "31070": "nan", "31075": "nan", "31080": "nan", "31085": "nan", "31090": "nan", "31095": "nan", "31100": 1.42503, "31105": "nan", "31110": "nan", "31115": "nan", "31120": "nan", "31125": "nan", "31130": "nan", "31135": "nan", "31140": "nan", "31145": "nan", "31150": "nan", "31155": "nan", "31160": "nan", "31165": "nan", "31170": "nan", "31175": "nan", "31180": "nan", "31185": "nan", "31190": "nan", "31195": "nan", "31200": 1.42689, "31205": "nan", "31210": "nan", "31215": "nan", "31220": "nan", "31225": "nan", "31230": "nan", "31235": "nan", "31240": "nan", "31245": "nan", "31250": "nan", "31255": "nan", "31260": "nan", "31265": "nan", "31270": "nan", "31275": "nan", "31280": "nan", "31285": "nan", "31290": "nan", "31295": "nan", "31300": 1.42808, "31305": "nan", "31310": "nan", "31315": "nan", "31320": "nan", "31325": "nan", "31330": "nan", "31335": "nan", "31340": "nan", "31345": "nan", "31350": "nan", "31355": "nan", "31360": "nan", "31365": "nan", "31370": "nan", "31375": "nan", "31380": "nan", "31385": "nan", "31390": "nan", "31395": "nan", "31400": 1.42709, "31405": "nan", "31410": "nan", "31415": "nan", "31420": "nan", "31425": "nan", "31430": "nan", "31435": "nan", "31440": "nan", "31445": "nan", "31450": "nan", "31455": "nan", "31460": "nan", "31465": "nan", "31470": "nan", "31475": "nan", "31480": "nan", "31485": "nan", "31490": "nan", "31495": "nan", "31500": 1.42525, "31505": "nan", "31510": "nan", "31515": "nan", "31520": "nan", "31525": "nan", "31530": "nan", "31535": "nan", "31540": "nan", "31545": "nan", "31550": "nan", "31555": "nan", "31560": "nan", "31565": "nan", "31570": "nan", "31575": "nan", "31580": "nan", "31585": "nan", "31590": "nan", "31595": "nan", "31600": 1.43175, "31605": "nan", "31610": "nan", "31615": "nan", "31620": "nan", "31625": "nan", "31630": "nan", "31635": "nan", "31640": "nan", "31645": "nan", "31650": "nan", "31655": "nan", "31660": "nan", "31665": "nan", "31670": "nan", "31675": "nan", "31680": "nan", "31685": "nan", "31690": "nan", "31695": "nan", "31700": 1.42302, "31705": "nan", "31710": "nan", "31715": "nan", "31720": "nan", "31725": "nan", "31730": "nan", "31735": "nan", "31740": "nan", "31745": "nan", "31750": "nan", "31755": "nan", "31760": "nan", "31765": "nan", "31770": "nan", "31775": "nan", "31780": "nan", "31785": "nan", "31790": "nan", "31795": "nan", "31800": 1.42502, "31805": "nan", "31810": "nan", "31815": "nan", "31820": "nan", "31825": "nan", "31830": "nan", "31835": "nan", "31840": "nan", "31845": "nan", "31850": "nan", "31855": "nan", "31860": "nan", "31865": "nan", "31870": "nan", "31875": "nan", "31880": "nan", "31885": "nan", "31890": "nan", "31895": "nan", "31900": 1.4318, "31905": "nan", "31910": "nan", "31915": "nan", "31920": "nan", "31925": "nan", "31930": "nan", "31935": "nan", "31940": "nan", "31945": "nan", "31950": "nan", "31955": "nan", "31960": "nan", "31965": "nan", "31970": "nan", "31975": "nan", "31980": "nan", "31985": "nan", "31990": "nan", "31995": "nan", "32000": 1.42533, "32005": "nan", "32010": "nan", "32015": "nan", "32020": "nan", "32025": "nan", "32030": "nan", "32035": "nan", "32040": "nan", "32045": "nan", "32050": "nan", "32055": "nan", "32060": "nan", "32065": "nan", "32070": "nan", "32075": "nan", "32080": "nan", "32085": "nan", "32090": "nan", "32095": "nan", "32100": 1.42439, "32105": "nan", "32110": "nan", "32115": "nan", "32120": "nan", "32125": "nan", "32130": "nan", "32135": "nan", "32140": "nan", "32145": "nan", "32150": "nan", "32155": "nan", "32160": "nan", "32165": "nan", "32170": "nan", "32175": "nan", "32180": "nan", "32185": "nan", "32190": "nan", "32195": "nan", "32200": 1.43174, "32205": "nan", "32210": "nan", "32215": "nan", "32220": "nan", "32225": "nan", "32230": "nan", "32235": "nan", "32240": "nan", "32245": "nan", "32250": "nan", "32255": "nan", "32260": "nan", "32265": "nan", "32270": "nan", "32275": "nan", "32280": "nan", "32285": "nan", "32290": "nan", "32295": "nan", "32300": 1.42804, "32305": "nan", "32310": "nan", "32315": "nan", "32320": "nan", "32325": "nan", "32330": "nan", "32335": "nan", "32340": "nan", "32345": "nan", "32350": "nan", "32355": "nan", "32360": "nan", "32365": "nan", "32370": "nan", "32375": "nan", "32380": "nan", "32385": "nan", "32390": "nan", "32395": "nan", "32400": 1.42831, "32405": "nan", "32410": "nan", "32415": "nan", "32420": "nan", "32425": "nan", "32430": "nan", "32435": "nan", "32440": "nan", "32445": "nan", "32450": "nan", "32455": "nan", "32460": "nan", "32465": "nan", "32470": "nan", "32475": "nan", "32480": "nan", "32485": "nan", "32490": "nan", "32495": "nan", "32500": 1.42772, "32505": "nan", "32510": "nan", "32515": "nan", "32520": "nan", "32525": "nan", "32530": "nan", "32535": "nan", "32540": "nan", "32545": "nan", "32550": "nan", "32555": "nan", "32560": "nan", "32565": "nan", "32570": "nan", "32575": "nan", "32580": "nan", "32585": "nan", "32590": "nan", "32595": "nan", "32600": 1.43262, "32605": "nan", "32610": "nan", "32615": "nan", "32620": "nan", "32625": "nan", "32630": "nan", "32635": "nan", "32640": "nan", "32645": "nan", "32650": "nan", "32655": "nan", "32660": "nan", "32665": "nan", "32670": "nan", "32675": "nan", "32680": "nan", "32685": "nan", "32690": "nan", "32695": "nan", "32700": 1.42688, "32705": "nan", "32710": "nan", "32715": "nan", "32720": "nan", "32725": "nan", "32730": "nan", "32735": "nan", "32740": "nan", "32745": "nan", "32750": "nan", "32755": "nan", "32760": "nan", "32765": "nan", "32770": "nan", "32775": "nan", "32780": "nan", "32785": "nan", "32790": "nan", "32795": "nan", "32800": 1.43019, "32805": "nan", "32810": "nan", "32815": "nan", "32820": "nan", "32825": "nan", "32830": "nan", "32835": "nan", "32840": "nan", "32845": "nan", "32850": "nan", "32855": "nan", "32860": "nan", "32865": "nan", "32870": "nan", "32875": "nan", "32880": "nan", "32885": "nan", "32890": "nan", "32895": "nan", "32900": 1.43475, "32905": "nan", "32910": "nan", "32915": "nan", "32920": "nan", "32925": "nan", "32930": "nan", "32935": "nan", "32940": "nan", "32945": "nan", "32950": "nan", "32955": "nan", "32960": "nan", "32965": "nan", "32970": "nan", "32975": "nan", "32980": "nan", "32985": "nan", "32990": "nan", "32995": "nan", "33000": 1.42615, "33005": "nan", "33010": "nan", "33015": "nan", "33020": "nan", "33025": "nan", "33030": "nan", "33035": "nan", "33040": "nan", "33045": "nan", "33050": "nan", "33055": "nan", "33060": "nan", "33065": "nan", "33070": "nan", "33075": "nan", "33080": "nan", "33085": "nan", "33090": "nan", "33095": "nan", "33100": 1.42989, "33105": "nan", "33110": "nan", "33115": "nan", "33120": "nan", "33125": "nan", "33130": "nan", "33135": "nan", "33140": "nan", "33145": "nan", "33150": "nan", "33155": "nan", "33160": "nan", "33165": "nan", "33170": "nan", "33175": "nan", "33180": "nan", "33185": "nan", "33190": "nan", "33195": "nan", "33200": 1.42812, "33205": "nan", "33210": "nan", "33215": "nan", "33220": "nan", "33225": "nan", "33230": "nan", "33235": "nan", "33240": "nan", "33245": "nan", "33250": "nan", "33255": "nan", "33260": "nan", "33265": "nan", "33270": "nan", "33275": "nan", "33280": "nan", "33285": "nan", "33290": "nan", "33295": "nan", "33300": 1.42494, "33305": "nan", "33310": "nan", "33315": "nan", "33320": "nan", "33325": "nan", "33330": "nan", "33335": "nan", "33340": "nan", "33345": "nan", "33350": "nan", "33355": "nan", "33360": "nan", "33365": "nan", "33370": "nan", "33375": "nan", "33380": "nan", "33385": "nan", "33390": "nan", "33395": "nan", "33400": 1.42788, "33405": "nan", "33410": "nan", "33415": "nan", "33420": "nan", "33425": "nan", "33430": "nan", "33435": "nan", "33440": "nan", "33445": "nan", "33450": "nan", "33455": "nan", "33460": "nan", "33465": "nan", "33470": "nan", "33475": "nan", "33480": "nan", "33485": "nan", "33490": "nan", "33495": "nan", "33500": 1.42627, "33505": "nan", "33510": "nan", "33515": "nan", "33520": "nan", "33525": "nan", "33530": "nan", "33535": "nan", "33540": "nan", "33545": "nan", "33550": "nan", "33555": "nan", "33560": "nan", "33565": "nan", "33570": "nan", "33575": "nan", "33580": "nan", "33585": "nan", "33590": "nan", "33595": "nan", "33600": 1.42678, "33605": "nan", "33610": "nan", "33615": "nan", "33620": "nan", "33625": "nan", "33630": "nan", "33635": "nan", "33640": "nan", "33645": "nan", "33650": "nan", "33655": "nan", "33660": "nan", "33665": "nan", "33670": "nan", "33675": "nan", "33680": "nan", "33685": "nan", "33690": "nan", "33695": "nan", "33700": 1.42959, "33705": "nan", "33710": "nan", "33715": "nan", "33720": "nan", "33725": "nan", "33730": "nan", "33735": "nan", "33740": "nan", "33745": "nan", "33750": "nan", "33755": "nan", "33760": "nan", "33765": "nan", "33770": "nan", "33775": "nan", "33780": "nan", "33785": "nan", "33790": "nan", "33795": "nan", "33800": 1.4283, "33805": "nan", "33810": "nan", "33815": "nan", "33820": "nan", "33825": "nan", "33830": "nan", "33835": "nan", "33840": "nan", "33845": "nan", "33850": "nan", "33855": "nan", "33860": "nan", "33865": "nan", "33870": "nan", "33875": "nan", "33880": "nan", "33885": "nan", "33890": "nan", "33895": "nan", "33900": 1.42494, "33905": "nan", "33910": "nan", "33915": "nan", "33920": "nan", "33925": "nan", "33930": "nan", "33935": "nan", "33940": "nan", "33945": "nan", "33950": "nan", "33955": "nan", "33960": "nan", "33965": "nan", "33970": "nan", "33975": "nan", "33980": "nan", "33985": "nan", "33990": "nan", "33995": "nan", "34000": 1.42542, "34005": "nan", "34010": "nan", "34015": "nan", "34020": "nan", "34025": "nan", "34030": "nan", "34035": "nan", "34040": "nan", "34045": "nan", "34050": "nan", "34055": "nan", "34060": "nan", "34065": "nan", "34070": "nan", "34075": "nan", "34080": "nan", "34085": "nan", "34090": "nan", "34095": "nan", "34100": 1.42583, "34105": "nan", "34110": "nan", "34115": "nan", "34120": "nan", "34125": "nan", "34130": "nan", "34135": "nan", "34140": "nan", "34145": "nan", "34150": "nan", "34155": "nan", "34160": "nan", "34165": "nan", "34170": "nan", "34175": "nan", "34180": "nan", "34185": "nan", "34190": "nan", "34195": "nan", "34200": 1.42802, "34205": "nan", "34210": "nan", "34215": "nan", "34220": "nan", "34225": "nan", "34230": "nan", "34235": "nan", "34240": "nan", "34245": "nan", "34250": "nan", "34255": "nan", "34260": "nan", "34265": "nan", "34270": "nan", "34275": "nan", "34280": "nan", "34285": "nan", "34290": "nan", "34295": "nan", "34300": 1.42967, "34305": "nan", "34310": "nan", "34315": "nan", "34320": "nan", "34325": "nan", "34330": "nan", "34335": "nan", "34340": "nan", "34345": "nan", "34350": "nan", "34355": "nan", "34360": "nan", "34365": "nan", "34370": "nan", "34375": "nan", "34380": "nan", "34385": "nan", "34390": "nan", "34395": "nan", "34400": 1.42975, "34405": "nan", "34410": "nan", "34415": "nan", "34420": "nan", "34425": "nan", "34430": "nan", "34435": "nan", "34440": "nan", "34445": "nan", "34450": "nan", "34455": "nan", "34460": "nan", "34465": "nan", "34470": "nan", "34475": "nan", "34480": "nan", "34485": "nan", "34490": "nan", "34495": "nan", "34500": 1.42401, "34505": "nan", "34510": "nan", "34515": "nan", "34520": "nan", "34525": "nan", "34530": "nan", "34535": "nan", "34540": "nan", "34545": "nan", "34550": "nan", "34555": "nan", "34560": "nan", "34565": "nan", "34570": "nan", "34575": "nan", "34580": "nan", "34585": "nan", "34590": "nan", "34595": "nan", "34600": 1.43113, "34605": "nan", "34610": "nan", "34615": "nan", "34620": "nan", "34625": "nan", "34630": "nan", "34635": "nan", "34640": "nan", "34645": "nan", "34650": "nan", "34655": "nan", "34660": "nan", "34665": "nan", "34670": "nan", "34675": "nan", "34680": "nan", "34685": "nan", "34690": "nan", "34695": "nan", "34700": 1.42837, "34705": "nan", "34710": "nan", "34715": "nan", "34720": "nan", "34725": "nan", "34730": "nan", "34735": "nan", "34740": "nan", "34745": "nan", "34750": "nan", "34755": "nan", "34760": "nan", "34765": "nan", "34770": "nan", "34775": "nan", "34780": "nan", "34785": "nan", "34790": "nan", "34795": "nan", "34800": 1.42587, "34805": "nan", "34810": "nan", "34815": "nan", "34820": "nan", "34825": "nan", "34830": "nan", "34835": "nan", "34840": "nan", "34845": "nan", "34850": "nan", "34855": "nan", "34860": "nan", "34865": "nan", "34870": "nan", "34875": "nan", "34880": "nan", "34885": "nan", "34890": "nan", "34895": "nan", "34900": 1.43242, "34905": "nan", "34910": "nan", "34915": "nan", "34920": "nan", "34925": "nan", "34930": "nan", "34935": "nan", "34940": "nan", "34945": "nan", "34950": "nan", "34955": "nan", "34960": "nan", "34965": "nan", "34970": "nan", "34975": "nan", "34980": "nan", "34985": "nan", "34990": "nan", "34995": "nan", "35000": 1.43108, "35005": "nan", "35010": "nan", "35015": "nan", "35020": "nan", "35025": "nan", "35030": "nan", "35035": "nan", "35040": "nan", "35045": "nan", "35050": "nan", "35055": "nan", "35060": "nan", "35065": "nan", "35070": "nan", "35075": "nan", "35080": "nan", "35085": "nan", "35090": "nan", "35095": "nan", "35100": 1.42714, "35105": "nan", "35110": "nan", "35115": "nan", "35120": "nan", "35125": "nan", "35130": "nan", "35135": "nan", "35140": "nan", "35145": "nan", "35150": "nan", "35155": "nan", "35160": "nan", "35165": "nan", "35170": "nan", "35175": "nan", "35180": "nan", "35185": "nan", "35190": "nan", "35195": "nan", "35200": 1.43064, "35205": "nan", "35210": "nan", "35215": "nan", "35220": "nan", "35225": "nan", "35230": "nan", "35235": "nan", "35240": "nan", "35245": "nan", "35250": "nan", "35255": "nan", "35260": "nan", "35265": "nan", "35270": "nan", "35275": "nan", "35280": "nan", "35285": "nan", "35290": "nan", "35295": "nan", "35300": 1.42643, "35305": "nan", "35310": "nan", "35315": "nan", "35320": "nan", "35325": "nan", "35330": "nan", "35335": "nan", "35340": "nan", "35345": "nan", "35350": "nan", "35355": "nan", "35360": "nan", "35365": "nan", "35370": "nan", "35375": "nan", "35380": "nan", "35385": "nan", "35390": "nan", "35395": "nan", "35400": 1.43065, "35405": "nan", "35410": "nan", "35415": "nan", "35420": "nan", "35425": "nan", "35430": "nan", "35435": "nan", "35440": "nan", "35445": "nan", "35450": "nan", "35455": "nan", "35460": "nan", "35465": "nan", "35470": "nan", "35475": "nan", "35480": "nan", "35485": "nan", "35490": "nan", "35495": "nan", "35500": 1.42833, "35505": "nan", "35510": "nan", "35515": "nan", "35520": "nan", "35525": "nan", "35530": "nan", "35535": "nan", "35540": "nan", "35545": "nan", "35550": "nan", "35555": "nan", "35560": "nan", "35565": "nan", "35570": "nan", "35575": "nan", "35580": "nan", "35585": "nan", "35590": "nan", "35595": "nan", "35600": 1.42934, "35605": "nan", "35610": "nan", "35615": "nan", "35620": "nan", "35625": "nan", "35630": "nan", "35635": "nan", "35640": "nan", "35645": "nan", "35650": "nan", "35655": "nan", "35660": "nan", "35665": "nan", "35670": "nan", "35675": "nan", "35680": "nan", "35685": "nan", "35690": "nan", "35695": "nan", "35700": 1.42435, "35705": "nan", "35710": "nan", "35715": "nan", "35720": "nan", "35725": "nan", "35730": "nan", "35735": "nan", "35740": "nan", "35745": "nan", "35750": "nan", "35755": "nan", "35760": "nan", "35765": "nan", "35770": "nan", "35775": "nan", "35780": "nan", "35785": "nan", "35790": "nan", "35795": "nan", "35800": 1.42751, "35805": "nan", "35810": "nan", "35815": "nan", "35820": "nan", "35825": "nan", "35830": "nan", "35835": "nan", "35840": "nan", "35845": "nan", "35850": "nan", "35855": "nan", "35860": "nan", "35865": "nan", "35870": "nan", "35875": "nan", "35880": "nan", "35885": "nan", "35890": "nan", "35895": "nan", "35900": 1.42511, "35905": "nan", "35910": "nan", "35915": "nan", "35920": "nan", "35925": "nan", "35930": "nan", "35935": "nan", "35940": "nan", "35945": "nan", "35950": "nan", "35955": "nan", "35960": "nan", "35965": "nan", "35970": "nan", "35975": "nan", "35980": "nan", "35985": "nan", "35990": "nan", "35995": "nan", "36000": 1.43068, "36005": "nan", "36010": "nan", "36015": "nan", "36020": "nan", "36025": "nan", "36030": "nan", "36035": "nan", "36040": "nan", "36045": "nan", "36050": "nan", "36055": "nan", "36060": "nan", "36065": "nan", "36070": "nan", "36075": "nan", "36080": "nan", "36085": "nan", "36090": "nan", "36095": "nan", "36100": 1.4294, "36105": "nan", "36110": "nan", "36115": "nan", "36120": "nan", "36125": "nan", "36130": "nan", "36135": "nan", "36140": "nan", "36145": "nan", "36150": "nan", "36155": "nan", "36160": "nan", "36165": "nan", "36170": "nan", "36175": "nan", "36180": "nan", "36185": "nan", "36190": "nan", "36195": "nan", "36200": 1.59945, "36205": "nan", "36210": "nan", "36215": "nan", "36220": "nan", "36225": "nan", "36230": "nan", "36235": "nan", "36240": "nan", "36245": "nan", "36250": "nan", "36255": "nan", "36260": "nan", "36265": "nan", "36270": "nan", "36275": "nan", "36280": "nan", "36285": "nan", "36290": "nan", "36295": "nan", "36300": 1.43517, "36305": "nan", "36310": "nan", "36315": "nan", "36320": "nan", "36325": "nan", "36330": "nan", "36335": "nan", "36340": "nan", "36345": "nan", "36350": "nan", "36355": "nan", "36360": "nan", "36365": "nan", "36370": "nan", "36375": "nan", "36380": "nan", "36385": "nan", "36390": "nan", "36395": "nan", "36400": 1.43462, "36405": "nan", "36410": "nan", "36415": "nan", "36420": "nan", "36425": "nan", "36430": "nan", "36435": "nan", "36440": "nan", "36445": "nan", "36450": "nan", "36455": "nan", "36460": "nan", "36465": "nan", "36470": "nan", "36475": "nan", "36480": "nan", "36485": "nan", "36490": "nan", "36495": "nan", "36500": 1.43193, "36505": "nan", "36510": "nan", "36515": "nan", "36520": "nan", "36525": "nan", "36530": "nan", "36535": "nan", "36540": "nan", "36545": "nan", "36550": "nan", "36555": "nan", "36560": "nan", "36565": "nan", "36570": "nan", "36575": "nan", "36580": "nan", "36585": "nan", "36590": "nan", "36595": "nan", "36600": 1.43731, "36605": "nan", "36610": "nan", "36615": "nan", "36620": "nan", "36625": "nan", "36630": "nan", "36635": "nan", "36640": "nan", "36645": "nan", "36650": "nan", "36655": "nan", "36660": "nan", "36665": "nan", "36670": "nan", "36675": "nan", "36680": "nan", "36685": "nan", "36690": "nan", "36695": "nan", "36700": 1.43883, "36705": "nan", "36710": "nan", "36715": "nan", "36720": "nan", "36725": "nan", "36730": "nan", "36735": "nan", "36740": "nan", "36745": "nan", "36750": "nan", "36755": "nan", "36760": "nan", "36765": "nan", "36770": "nan", "36775": "nan", "36780": "nan", "36785": "nan", "36790": "nan", "36795": "nan", "36800": 1.43204, "36805": "nan", "36810": "nan", "36815": "nan", "36820": "nan", "36825": "nan", "36830": "nan", "36835": "nan", "36840": "nan", "36845": "nan", "36850": "nan", "36855": "nan", "36860": "nan", "36865": "nan", "36870": "nan", "36875": "nan", "36880": "nan", "36885": "nan", "36890": "nan", "36895": "nan", "36900": 1.43974, "36905": "nan", "36910": "nan", "36915": "nan", "36920": "nan", "36925": "nan", "36930": "nan", "36935": "nan", "36940": "nan", "36945": "nan", "36950": "nan", "36955": "nan", "36960": "nan", "36965": "nan", "36970": "nan", "36975": "nan", "36980": "nan", "36985": "nan", "36990": "nan", "36995": "nan", "37000": 1.43599, "37005": "nan", "37010": "nan", "37015": "nan", "37020": "nan", "37025": "nan", "37030": "nan", "37035": "nan", "37040": "nan", "37045": "nan", "37050": "nan", "37055": "nan", "37060": "nan", "37065": "nan", "37070": "nan", "37075": "nan", "37080": "nan", "37085": "nan", "37090": "nan", "37095": "nan", "37100": 1.43335, "37105": "nan", "37110": "nan", "37115": "nan", "37120": "nan", "37125": "nan", "37130": "nan", "37135": "nan", "37140": "nan", "37145": "nan", "37150": "nan", "37155": "nan", "37160": "nan", "37165": "nan", "37170": "nan", "37175": "nan", "37180": "nan", "37185": "nan", "37190": "nan", "37195": "nan", "37200": 1.43417, "37205": "nan", "37210": "nan", "37215": "nan", "37220": "nan", "37225": "nan", "37230": "nan", "37235": "nan", "37240": "nan", "37245": "nan", "37250": "nan", "37255": "nan", "37260": "nan", "37265": "nan", "37270": "nan", "37275": "nan", "37280": "nan", "37285": "nan", "37290": "nan", "37295": "nan", "37300": 1.43483, "37305": "nan", "37310": "nan", "37315": "nan", "37320": "nan", "37325": "nan", "37330": "nan", "37335": "nan", "37340": "nan", "37345": "nan", "37350": "nan", "37355": "nan", "37360": "nan", "37365": "nan", "37370": "nan", "37375": "nan", "37380": "nan", "37385": "nan", "37390": "nan", "37395": "nan", "37400": 1.4312, "37405": "nan", "37410": "nan", "37415": "nan", "37420": "nan", "37425": "nan", "37430": "nan", "37435": "nan", "37440": "nan", "37445": "nan", "37450": "nan", "37455": "nan", "37460": "nan", "37465": "nan", "37470": "nan", "37475": "nan", "37480": "nan", "37485": "nan", "37490": "nan", "37495": "nan", "37500": 1.43276, "37505": "nan", "37510": "nan", "37515": "nan", "37520": "nan", "37525": "nan", "37530": "nan", "37535": "nan", "37540": "nan", "37545": "nan", "37550": "nan", "37555": "nan", "37560": "nan", "37565": "nan", "37570": "nan", "37575": "nan", "37580": "nan", "37585": "nan", "37590": "nan", "37595": "nan", "37600": 1.43754, "37605": "nan", "37610": "nan", "37615": "nan", "37620": "nan", "37625": "nan", "37630": "nan", "37635": "nan", "37640": "nan", "37645": "nan", "37650": "nan", "37655": "nan", "37660": "nan", "37665": "nan", "37670": "nan", "37675": "nan", "37680": "nan", "37685": "nan", "37690": "nan", "37695": "nan", "37700": 1.44004, "37705": "nan", "37710": "nan", "37715": "nan", "37720": "nan", "37725": "nan", "37730": "nan", "37735": "nan", "37740": "nan", "37745": "nan", "37750": "nan", "37755": "nan", "37760": "nan", "37765": "nan", "37770": "nan", "37775": "nan", "37780": "nan", "37785": "nan", "37790": "nan", "37795": "nan", "37800": 1.43413, "37805": "nan", "37810": "nan", "37815": "nan", "37820": "nan", "37825": "nan", "37830": "nan", "37835": "nan", "37840": "nan", "37845": "nan", "37850": "nan", "37855": "nan", "37860": "nan", "37865": "nan", "37870": "nan", "37875": "nan", "37880": "nan", "37885": "nan", "37890": "nan", "37895": "nan", "37900": 1.43847, "37905": "nan", "37910": "nan", "37915": "nan", "37920": "nan", "37925": "nan", "37930": "nan", "37935": "nan", "37940": "nan", "37945": "nan", "37950": "nan", "37955": "nan", "37960": "nan", "37965": "nan", "37970": "nan", "37975": "nan", "37980": "nan", "37985": "nan", "37990": "nan", "37995": "nan", "38000": 1.43753, "38005": "nan", "38010": "nan", "38015": "nan", "38020": "nan", "38025": "nan", "38030": "nan", "38035": "nan", "38040": "nan", "38045": "nan", "38050": "nan", "38055": "nan", "38060": "nan", "38065": "nan", "38070": "nan", "38075": "nan", "38080": "nan", "38085": "nan", "38090": "nan", "38095": "nan", "38100": 1.43592, "38105": "nan", "38110": "nan", "38115": "nan", "38120": "nan", "38125": "nan", "38130": "nan", "38135": "nan", "38140": "nan", "38145": "nan", "38150": "nan", "38155": "nan", "38160": "nan", "38165": "nan", "38170": "nan", "38175": "nan", "38180": "nan", "38185": "nan", "38190": "nan", "38195": "nan", "38200": 1.46581, "38205": "nan", "38210": "nan", "38215": "nan", "38220": "nan", "38225": "nan", "38230": "nan", "38235": "nan", "38240": "nan", "38245": "nan", "38250": "nan", "38255": "nan", "38260": "nan", "38265": "nan", "38270": "nan", "38275": "nan", "38280": "nan", "38285": "nan", "38290": "nan", "38295": "nan", "38300": 1.43756, "38305": "nan", "38310": "nan", "38315": "nan", "38320": "nan", "38325": "nan", "38330": "nan", "38335": "nan", "38340": "nan", "38345": "nan", "38350": "nan", "38355": "nan", "38360": "nan", "38365": "nan", "38370": "nan", "38375": "nan", "38380": "nan", "38385": "nan", "38390": "nan", "38395": "nan", "38400": 1.43785, "38405": "nan", "38410": "nan", "38415": "nan", "38420": "nan", "38425": "nan", "38430": "nan", "38435": "nan", "38440": "nan", "38445": "nan", "38450": "nan", "38455": "nan", "38460": "nan", "38465": "nan", "38470": "nan", "38475": "nan", "38480": "nan", "38485": "nan", "38490": "nan", "38495": "nan", "38500": 1.43637, "38505": "nan", "38510": "nan", "38515": "nan", "38520": "nan", "38525": "nan", "38530": "nan", "38535": "nan", "38540": "nan", "38545": "nan", "38550": "nan", "38555": "nan", "38560": "nan", "38565": "nan", "38570": "nan", "38575": "nan", "38580": "nan", "38585": "nan", "38590": "nan", "38595": "nan", "38600": 1.43508, "38605": "nan", "38610": "nan", "38615": "nan", "38620": "nan", "38625": "nan", "38630": "nan", "38635": "nan", "38640": "nan", "38645": "nan", "38650": "nan", "38655": "nan", "38660": "nan", "38665": "nan", "38670": "nan", "38675": "nan", "38680": "nan", "38685": "nan", "38690": "nan", "38695": "nan", "38700": 1.43214, "38705": "nan", "38710": "nan", "38715": "nan", "38720": "nan", "38725": "nan", "38730": "nan", "38735": "nan", "38740": "nan", "38745": "nan", "38750": "nan", "38755": "nan", "38760": "nan", "38765": "nan", "38770": "nan", "38775": "nan", "38780": "nan", "38785": "nan", "38790": "nan", "38795": "nan", "38800": 1.43817, "38805": "nan", "38810": "nan", "38815": "nan", "38820": "nan", "38825": "nan", "38830": "nan", "38835": "nan", "38840": "nan", "38845": "nan", "38850": "nan", "38855": "nan", "38860": "nan", "38865": "nan", "38870": "nan", "38875": "nan", "38880": "nan", "38885": "nan", "38890": "nan", "38895": "nan", "38900": 1.43792, "38905": "nan", "38910": "nan", "38915": "nan", "38920": "nan", "38925": "nan", "38930": "nan", "38935": "nan", "38940": "nan", "38945": "nan", "38950": "nan", "38955": "nan", "38960": "nan", "38965": "nan", "38970": "nan", "38975": "nan", "38980": "nan", "38985": "nan", "38990": "nan", "38995": "nan", "39000": 1.43538, "39005": "nan", "39010": "nan", "39015": "nan", "39020": "nan", "39025": "nan", "39030": "nan", "39035": "nan", "39040": "nan", "39045": "nan", "39050": "nan", "39055": "nan", "39060": "nan", "39065": "nan", "39070": "nan", "39075": "nan", "39080": "nan", "39085": "nan", "39090": "nan", "39095": "nan", "39100": 1.44878, "39105": "nan", "39110": "nan", "39115": "nan", "39120": "nan", "39125": "nan", "39130": "nan", "39135": "nan", "39140": "nan", "39145": "nan", "39150": "nan", "39155": "nan", "39160": "nan", "39165": "nan", "39170": "nan", "39175": "nan", "39180": "nan", "39185": "nan", "39190": "nan", "39195": "nan", "39200": 1.43447, "39205": "nan", "39210": "nan", "39215": "nan", "39220": "nan", "39225": "nan", "39230": "nan", "39235": "nan", "39240": "nan", "39245": "nan", "39250": "nan", "39255": "nan", "39260": "nan", "39265": "nan", "39270": "nan", "39275": "nan", "39280": "nan", "39285": "nan", "39290": "nan", "39295": "nan", "39300": 1.43818, "39305": "nan", "39310": "nan", "39315": "nan", "39320": "nan", "39325": "nan", "39330": "nan", "39335": "nan", "39340": "nan", "39345": "nan", "39350": "nan", "39355": "nan", "39360": "nan", "39365": "nan", "39370": "nan", "39375": "nan", "39380": "nan", "39385": "nan", "39390": "nan", "39395": "nan", "39400": 1.43973, "39405": "nan", "39410": "nan", "39415": "nan", "39420": "nan", "39425": "nan", "39430": "nan", "39435": "nan", "39440": "nan", "39445": "nan", "39450": "nan", "39455": "nan", "39460": "nan", "39465": "nan", "39470": "nan", "39475": "nan", "39480": "nan", "39485": "nan", "39490": "nan", "39495": "nan", "39500": 1.43779, "39505": "nan", "39510": "nan", "39515": "nan", "39520": "nan", "39525": "nan", "39530": "nan", "39535": "nan", "39540": "nan", "39545": "nan", "39550": "nan", "39555": "nan", "39560": "nan", "39565": "nan", "39570": "nan", "39575": "nan", "39580": "nan", "39585": "nan", "39590": "nan", "39595": "nan", "39600": 1.43659, "39605": "nan", "39610": "nan", "39615": "nan", "39620": "nan", "39625": "nan", "39630": "nan", "39635": "nan", "39640": "nan", "39645": "nan", "39650": "nan", "39655": "nan", "39660": "nan", "39665": "nan", "39670": "nan", "39675": "nan", "39680": "nan", "39685": "nan", "39690": "nan", "39695": "nan", "39700": 1.43861, "39705": "nan", "39710": "nan", "39715": "nan", "39720": "nan", "39725": "nan", "39730": "nan", "39735": "nan", "39740": "nan", "39745": "nan", "39750": "nan", "39755": "nan", "39760": "nan", "39765": "nan", "39770": "nan", "39775": "nan", "39780": "nan", "39785": "nan", "39790": "nan", "39795": "nan", "39800": 1.43852, "39805": "nan", "39810": "nan", "39815": "nan", "39820": "nan", "39825": "nan", "39830": "nan", "39835": "nan", "39840": "nan", "39845": "nan", "39850": "nan", "39855": "nan", "39860": "nan", "39865": "nan", "39870": "nan", "39875": "nan", "39880": "nan", "39885": "nan", "39890": "nan", "39895": "nan", "39900": 1.43399, "39905": "nan", "39910": "nan", "39915": "nan", "39920": "nan", "39925": "nan", "39930": "nan", "39935": "nan", "39940": "nan", "39945": "nan", "39950": "nan", "39955": "nan", "39960": "nan", "39965": "nan", "39970": "nan", "39975": "nan", "39980": "nan", "39985": "nan", "39990": "nan", "39995": "nan", "40000": 1.43501, "40005": "nan", "40010": "nan", "40015": "nan", "40020": "nan", "40025": "nan", "40030": "nan", "40035": "nan", "40040": "nan", "40045": "nan", "40050": "nan", "40055": "nan", "40060": "nan", "40065": "nan", "40070": "nan", "40075": "nan", "40080": "nan", "40085": "nan", "40090": "nan", "40095": "nan", "40100": 1.43787, "40105": "nan", "40110": "nan", "40115": "nan", "40120": "nan", "40125": "nan", "40130": "nan", "40135": "nan", "40140": "nan", "40145": "nan", "40150": "nan", "40155": "nan", "40160": "nan", "40165": "nan", "40170": "nan", "40175": "nan", "40180": "nan", "40185": "nan", "40190": "nan", "40195": "nan", "40200": 1.43559, "40205": "nan", "40210": "nan", "40215": "nan", "40220": "nan", "40225": "nan", "40230": "nan", "40235": "nan", "40240": "nan", "40245": "nan", "40250": "nan", "40255": "nan", "40260": "nan", "40265": "nan", "40270": "nan", "40275": "nan", "40280": "nan", "40285": "nan", "40290": "nan", "40295": "nan", "40300": 1.43588, "40305": "nan", "40310": "nan", "40315": "nan", "40320": "nan", "40325": "nan", "40330": "nan", "40335": "nan", "40340": "nan", "40345": "nan", "40350": "nan", "40355": "nan", "40360": "nan", "40365": "nan", "40370": "nan", "40375": "nan", "40380": "nan", "40385": "nan", "40390": "nan", "40395": "nan", "40400": 1.43665, "40405": "nan", "40410": "nan", "40415": "nan", "40420": "nan", "40425": "nan", "40430": "nan", "40435": "nan", "40440": "nan", "40445": "nan", "40450": "nan", "40455": "nan", "40460": "nan", "40465": "nan", "40470": "nan", "40475": "nan", "40480": "nan", "40485": "nan", "40490": "nan", "40495": "nan", "40500": 1.43985, "40505": "nan", "40510": "nan", "40515": "nan", "40520": "nan", "40525": "nan", "40530": "nan", "40535": "nan", "40540": "nan", "40545": "nan", "40550": "nan", "40555": "nan", "40560": "nan", "40565": "nan", "40570": "nan", "40575": "nan", "40580": "nan", "40585": "nan", "40590": "nan", "40595": "nan", "40600": 1.43498, "40605": "nan", "40610": "nan", "40615": "nan", "40620": "nan", "40625": "nan", "40630": "nan", "40635": "nan", "40640": "nan", "40645": "nan", "40650": "nan", "40655": "nan", "40660": "nan", "40665": "nan", "40670": "nan", "40675": "nan", "40680": "nan", "40685": "nan", "40690": "nan", "40695": "nan", "40700": 1.4432, "40705": "nan", "40710": "nan", "40715": "nan", "40720": "nan", "40725": "nan", "40730": "nan", "40735": "nan", "40740": "nan", "40745": "nan", "40750": "nan", "40755": "nan", "40760": "nan", "40765": "nan", "40770": "nan", "40775": "nan", "40780": "nan", "40785": "nan", "40790": "nan", "40795": "nan", "40800": 1.43546, "40805": "nan", "40810": "nan", "40815": "nan", "40820": "nan", "40825": "nan", "40830": "nan", "40835": "nan", "40840": "nan", "40845": "nan", "40850": "nan", "40855": "nan", "40860": "nan", "40865": "nan", "40870": "nan", "40875": "nan", "40880": "nan", "40885": "nan", "40890": "nan", "40895": "nan", "40900": 1.43518, "40905": "nan", "40910": "nan", "40915": "nan", "40920": "nan", "40925": "nan", "40930": "nan", "40935": "nan", "40940": "nan", "40945": "nan", "40950": "nan", "40955": "nan", "40960": "nan", "40965": "nan", "40970": "nan", "40975": "nan", "40980": "nan", "40985": "nan", "40990": "nan", "40995": "nan", "41000": 1.43536, "41005": "nan", "41010": "nan", "41015": "nan", "41020": "nan", "41025": "nan", "41030": "nan", "41035": "nan", "41040": "nan", "41045": "nan", "41050": "nan", "41055": "nan", "41060": "nan", "41065": "nan", "41070": "nan", "41075": "nan", "41080": "nan", "41085": "nan", "41090": "nan", "41095": "nan", "41100": 1.43744, "41105": "nan", "41110": "nan", "41115": "nan", "41120": "nan", "41125": "nan", "41130": "nan", "41135": "nan", "41140": "nan", "41145": "nan", "41150": "nan", "41155": "nan", "41160": "nan", "41165": "nan", "41170": "nan", "41175": "nan", "41180": "nan", "41185": "nan", "41190": "nan", "41195": "nan", "41200": 1.43538, "41205": "nan", "41210": "nan", "41215": "nan", "41220": "nan", "41225": "nan", "41230": "nan", "41235": "nan", "41240": "nan", "41245": "nan", "41250": "nan", "41255": "nan", "41260": "nan", "41265": "nan", "41270": "nan", "41275": "nan", "41280": "nan", "41285": "nan", "41290": "nan", "41295": "nan", "41300": 1.43432, "41305": "nan", "41310": "nan", "41315": "nan", "41320": "nan", "41325": "nan", "41330": "nan", "41335": "nan", "41340": "nan", "41345": "nan", "41350": "nan", "41355": "nan", "41360": "nan", "41365": "nan", "41370": "nan", "41375": "nan", "41380": "nan", "41385": "nan", "41390": "nan", "41395": "nan", "41400": 1.43398, "41405": "nan", "41410": "nan", "41415": "nan", "41420": "nan", "41425": "nan", "41430": "nan", "41435": "nan", "41440": "nan", "41445": "nan", "41450": "nan", "41455": "nan", "41460": "nan", "41465": "nan", "41470": "nan", "41475": "nan", "41480": "nan", "41485": "nan", "41490": "nan", "41495": "nan", "41500": 1.43301, "41505": "nan", "41510": "nan", "41515": "nan", "41520": "nan", "41525": "nan", "41530": "nan", "41535": "nan", "41540": "nan", "41545": "nan", "41550": "nan", "41555": "nan", "41560": "nan", "41565": "nan", "41570": "nan", "41575": "nan", "41580": "nan", "41585": "nan", "41590": "nan", "41595": "nan", "41600": 1.43494, "41605": "nan", "41610": "nan", "41615": "nan", "41620": "nan", "41625": "nan", "41630": "nan", "41635": "nan", "41640": "nan", "41645": "nan", "41650": "nan", "41655": "nan", "41660": "nan", "41665": "nan", "41670": "nan", "41675": "nan", "41680": "nan", "41685": "nan", "41690": "nan", "41695": "nan", "41700": 1.43464, "41705": "nan", "41710": "nan", "41715": "nan", "41720": "nan", "41725": "nan", "41730": "nan", "41735": "nan", "41740": "nan", "41745": "nan", "41750": "nan", "41755": "nan", "41760": "nan", "41765": "nan", "41770": "nan", "41775": "nan", "41780": "nan", "41785": "nan", "41790": "nan", "41795": "nan", "41800": 1.43589, "41805": "nan", "41810": "nan", "41815": "nan", "41820": "nan", "41825": "nan", "41830": "nan", "41835": "nan", "41840": "nan", "41845": "nan", "41850": "nan", "41855": "nan", "41860": "nan", "41865": "nan", "41870": "nan", "41875": "nan", "41880": "nan", "41885": "nan", "41890": "nan", "41895": "nan", "41900": 1.43977, "41905": "nan", "41910": "nan", "41915": "nan", "41920": "nan", "41925": "nan", "41930": "nan", "41935": "nan", "41940": "nan", "41945": "nan", "41950": "nan", "41955": "nan", "41960": "nan", "41965": "nan", "41970": "nan", "41975": "nan", "41980": "nan", "41985": "nan", "41990": "nan", "41995": "nan", "42000": 1.43581, "42005": "nan", "42010": "nan", "42015": "nan", "42020": "nan", "42025": "nan", "42030": "nan", "42035": "nan", "42040": "nan", "42045": "nan", "42050": "nan", "42055": "nan", "42060": "nan", "42065": "nan", "42070": "nan", "42075": "nan", "42080": "nan", "42085": "nan", "42090": "nan", "42095": "nan", "42100": 1.43224, "42105": "nan", "42110": "nan", "42115": "nan", "42120": "nan", "42125": "nan", "42130": "nan", "42135": "nan", "42140": "nan", "42145": "nan", "42150": "nan", "42155": "nan", "42160": "nan", "42165": "nan", "42170": "nan", "42175": "nan", "42180": "nan", "42185": "nan", "42190": "nan", "42195": "nan", "42200": 1.43225, "42205": "nan", "42210": "nan", "42215": "nan", "42220": "nan", "42225": "nan", "42230": "nan", "42235": "nan", "42240": "nan", "42245": "nan", "42250": "nan", "42255": "nan", "42260": "nan", "42265": "nan", "42270": "nan", "42275": "nan", "42280": "nan", "42285": "nan", "42290": "nan", "42295": "nan", "42300": 1.43614, "42305": "nan", "42310": "nan", "42315": "nan", "42320": "nan", "42325": "nan", "42330": "nan", "42335": "nan", "42340": "nan", "42345": "nan", "42350": "nan", "42355": "nan", "42360": "nan", "42365": "nan", "42370": "nan", "42375": "nan", "42380": "nan", "42385": "nan", "42390": "nan", "42395": "nan", "42400": 1.43493, "42405": "nan", "42410": "nan", "42415": "nan", "42420": "nan", "42425": "nan", "42430": "nan", "42435": "nan", "42440": "nan", "42445": "nan", "42450": "nan", "42455": "nan", "42460": "nan", "42465": "nan", "42470": "nan", "42475": "nan", "42480": "nan", "42485": "nan", "42490": "nan", "42495": "nan", "42500": 1.4402, "42505": "nan", "42510": "nan", "42515": "nan", "42520": "nan", "42525": "nan", "42530": "nan", "42535": "nan", "42540": "nan", "42545": "nan", "42550": "nan", "42555": "nan", "42560": "nan", "42565": "nan", "42570": "nan", "42575": "nan", "42580": "nan", "42585": "nan", "42590": "nan", "42595": "nan", "42600": 1.43784, "42605": "nan", "42610": "nan", "42615": "nan", "42620": "nan", "42625": "nan", "42630": "nan", "42635": "nan", "42640": "nan", "42645": "nan", "42650": "nan", "42655": "nan", "42660": "nan", "42665": "nan", "42670": "nan", "42675": "nan", "42680": "nan", "42685": "nan", "42690": "nan", "42695": "nan", "42700": 1.43552, "42705": "nan", "42710": "nan", "42715": "nan", "42720": "nan", "42725": "nan", "42730": "nan", "42735": "nan", "42740": "nan", "42745": "nan", "42750": "nan", "42755": "nan", "42760": "nan", "42765": "nan", "42770": "nan", "42775": "nan", "42780": "nan", "42785": "nan", "42790": "nan", "42795": "nan", "42800": 1.43894, "42805": "nan", "42810": "nan", "42815": "nan", "42820": "nan", "42825": "nan", "42830": "nan", "42835": "nan", "42840": "nan", "42845": "nan", "42850": "nan", "42855": "nan", "42860": "nan", "42865": "nan", "42870": "nan", "42875": "nan", "42880": "nan", "42885": "nan", "42890": "nan", "42895": "nan", "42900": 1.43404, "42905": "nan", "42910": "nan", "42915": "nan", "42920": "nan", "42925": "nan", "42930": "nan", "42935": "nan", "42940": "nan", "42945": "nan", "42950": "nan", "42955": "nan", "42960": "nan", "42965": "nan", "42970": "nan", "42975": "nan", "42980": "nan", "42985": "nan", "42990": "nan", "42995": "nan", "43000": 1.4377, "43005": "nan", "43010": "nan", "43015": "nan", "43020": "nan", "43025": "nan", "43030": "nan", "43035": "nan", "43040": "nan", "43045": "nan", "43050": "nan", "43055": "nan", "43060": "nan", "43065": "nan", "43070": "nan", "43075": "nan", "43080": "nan", "43085": "nan", "43090": "nan", "43095": "nan", "43100": 1.43788, "43105": "nan", "43110": "nan", "43115": "nan", "43120": "nan", "43125": "nan", "43130": "nan", "43135": "nan", "43140": "nan", "43145": "nan", "43150": "nan", "43155": "nan", "43160": "nan", "43165": "nan", "43170": "nan", "43175": "nan", "43180": "nan", "43185": "nan", "43190": "nan", "43195": "nan", "43200": 1.44608, "43205": "nan", "43210": "nan", "43215": "nan", "43220": "nan", "43225": "nan", "43230": "nan", "43235": "nan", "43240": "nan", "43245": "nan", "43250": "nan", "43255": "nan", "43260": "nan", "43265": "nan", "43270": "nan", "43275": "nan", "43280": "nan", "43285": "nan", "43290": "nan", "43295": "nan", "43300": 1.43217, "43305": "nan", "43310": "nan", "43315": "nan", "43320": "nan", "43325": "nan", "43330": "nan", "43335": "nan", "43340": "nan", "43345": "nan", "43350": "nan", "43355": "nan", "43360": "nan", "43365": "nan", "43370": "nan", "43375": "nan", "43380": "nan", "43385": "nan", "43390": "nan", "43395": "nan", "43400": 1.43155, "43405": "nan", "43410": "nan", "43415": "nan", "43420": "nan", "43425": "nan", "43430": "nan", "43435": "nan", "43440": "nan", "43445": "nan", "43450": "nan", "43455": "nan", "43460": "nan", "43465": "nan", "43470": "nan", "43475": "nan", "43480": "nan", "43485": "nan", "43490": "nan", "43495": "nan", "43500": 1.4446, "43505": "nan", "43510": "nan", "43515": "nan", "43520": "nan", "43525": "nan", "43530": "nan", "43535": "nan", "43540": "nan", "43545": "nan", "43550": "nan", "43555": "nan", "43560": "nan", "43565": "nan", "43570": "nan", "43575": "nan", "43580": "nan", "43585": "nan", "43590": "nan", "43595": "nan", "43600": 1.4346, "43605": "nan", "43610": "nan", "43615": "nan", "43620": "nan", "43625": "nan", "43630": "nan", "43635": "nan", "43640": "nan", "43645": "nan", "43650": "nan", "43655": "nan", "43660": "nan", "43665": "nan", "43670": "nan", "43675": "nan", "43680": "nan", "43685": "nan", "43690": "nan", "43695": "nan", "43700": 1.44016, "43705": "nan", "43710": "nan", "43715": "nan", "43720": "nan", "43725": "nan", "43730": "nan", "43735": "nan", "43740": "nan", "43745": "nan", "43750": "nan", "43755": "nan", "43760": "nan", "43765": "nan", "43770": "nan", "43775": "nan", "43780": "nan", "43785": "nan", "43790": "nan", "43795": "nan", "43800": 1.435, "43805": "nan", "43810": "nan", "43815": "nan", "43820": "nan", "43825": "nan", "43830": "nan", "43835": "nan", "43840": "nan", "43845": "nan", "43850": "nan", "43855": "nan", "43860": "nan", "43865": "nan", "43870": "nan", "43875": "nan", "43880": "nan", "43885": "nan", "43890": "nan", "43895": "nan", "43900": 1.43649, "43905": "nan", "43910": "nan", "43915": "nan", "43920": "nan", "43925": "nan", "43930": "nan", "43935": "nan", "43940": "nan", "43945": "nan", "43950": "nan", "43955": "nan", "43960": "nan", "43965": "nan", "43970": "nan", "43975": "nan", "43980": "nan", "43985": "nan", "43990": "nan", "43995": "nan", "44000": 1.43481, "44005": "nan", "44010": "nan", "44015": "nan", "44020": "nan", "44025": "nan", "44030": "nan", "44035": "nan", "44040": "nan", "44045": "nan", "44050": "nan", "44055": "nan", "44060": "nan", "44065": "nan", "44070": "nan", "44075": "nan", "44080": "nan", "44085": "nan", "44090": "nan", "44095": "nan", "44100": 1.43472, "44105": "nan", "44110": "nan", "44115": "nan", "44120": "nan", "44125": "nan", "44130": "nan", "44135": "nan", "44140": "nan", "44145": "nan", "44150": "nan", "44155": "nan", "44160": "nan", "44165": "nan", "44170": "nan", "44175": "nan", "44180": "nan", "44185": "nan", "44190": "nan", "44195": "nan", "44200": 1.43636, "44205": "nan", "44210": "nan", "44215": "nan", "44220": "nan", "44225": "nan", "44230": "nan", "44235": "nan", "44240": "nan", "44245": "nan", "44250": "nan", "44255": "nan", "44260": "nan", "44265": "nan", "44270": "nan", "44275": "nan", "44280": "nan", "44285": "nan", "44290": "nan", "44295": "nan", "44300": 1.43549, "44305": "nan", "44310": "nan", "44315": "nan", "44320": "nan", "44325": "nan", "44330": "nan", "44335": "nan", "44340": "nan", "44345": "nan", "44350": "nan", "44355": "nan", "44360": "nan", "44365": "nan", "44370": "nan", "44375": "nan", "44380": "nan", "44385": "nan", "44390": "nan", "44395": "nan", "44400": 1.43673, "44405": "nan", "44410": "nan", "44415": "nan", "44420": "nan", "44425": "nan", "44430": "nan", "44435": "nan", "44440": "nan", "44445": "nan", "44450": "nan", "44455": "nan", "44460": "nan", "44465": "nan", "44470": "nan", "44475": "nan", "44480": "nan", "44485": "nan", "44490": "nan", "44495": "nan", "44500": 1.43573, "44505": "nan", "44510": "nan", "44515": "nan", "44520": "nan", "44525": "nan", "44530": "nan", "44535": "nan", "44540": "nan", "44545": "nan", "44550": "nan", "44555": "nan", "44560": "nan", "44565": "nan", "44570": "nan", "44575": "nan", "44580": "nan", "44585": "nan", "44590": "nan", "44595": "nan", "44600": 1.43042, "44605": "nan", "44610": "nan", "44615": "nan", "44620": "nan", "44625": "nan", "44630": "nan", "44635": "nan", "44640": "nan", "44645": "nan", "44650": "nan", "44655": "nan", "44660": "nan", "44665": "nan", "44670": "nan", "44675": "nan", "44680": "nan", "44685": "nan", "44690": "nan", "44695": "nan", "44700": 1.43648, "44705": "nan", "44710": "nan", "44715": "nan", "44720": "nan", "44725": "nan", "44730": "nan", "44735": "nan", "44740": "nan", "44745": "nan", "44750": "nan", "44755": "nan", "44760": "nan", "44765": "nan", "44770": "nan", "44775": "nan", "44780": "nan", "44785": "nan", "44790": "nan", "44795": "nan", "44800": 1.43628, "44805": "nan", "44810": "nan", "44815": "nan", "44820": "nan", "44825": "nan", "44830": "nan", "44835": "nan", "44840": "nan", "44845": "nan", "44850": "nan", "44855": "nan", "44860": "nan", "44865": "nan", "44870": "nan", "44875": "nan", "44880": "nan", "44885": "nan", "44890": "nan", "44895": "nan", "44900": 1.43217, "44905": "nan", "44910": "nan", "44915": "nan", "44920": "nan", "44925": "nan", "44930": "nan", "44935": "nan", "44940": "nan", "44945": "nan", "44950": "nan", "44955": "nan", "44960": "nan", "44965": "nan", "44970": "nan", "44975": "nan", "44980": "nan", "44985": "nan", "44990": "nan", "44995": "nan", "45000": 1.43298, "45005": "nan", "45010": "nan", "45015": "nan", "45020": "nan", "45025": "nan", "45030": "nan", "45035": "nan", "45040": "nan", "45045": "nan", "45050": "nan", "45055": "nan", "45060": "nan", "45065": "nan", "45070": "nan", "45075": "nan", "45080": "nan", "45085": "nan", "45090": "nan", "45095": "nan", "45100": 1.43609, "45105": "nan", "45110": "nan", "45115": "nan", "45120": "nan", "45125": "nan", "45130": "nan", "45135": "nan", "45140": "nan", "45145": "nan", "45150": "nan", "45155": "nan", "45160": "nan", "45165": "nan", "45170": "nan", "45175": "nan", "45180": "nan", "45185": "nan", "45190": "nan", "45195": "nan", "45200": 1.43579, "45205": "nan", "45210": "nan", "45215": "nan", "45220": "nan", "45225": "nan", "45230": "nan", "45235": "nan", "45240": "nan", "45245": "nan", "45250": "nan", "45255": "nan", "45260": "nan", "45265": "nan", "45270": "nan", "45275": "nan", "45280": "nan", "45285": "nan", "45290": "nan", "45295": "nan", "45300": 1.43945, "45305": "nan", "45310": "nan", "45315": "nan", "45320": "nan", "45325": "nan", "45330": "nan", "45335": "nan", "45340": "nan", "45345": "nan", "45350": "nan", "45355": "nan", "45360": "nan", "45365": "nan", "45370": "nan", "45375": "nan", "45380": "nan", "45385": "nan", "45390": "nan", "45395": "nan", "45400": 1.56882, "45405": "nan", "45410": "nan", "45415": "nan", "45420": "nan", "45425": "nan", "45430": "nan", "45435": "nan", "45440": "nan", "45445": "nan", "45450": "nan", "45455": "nan", "45460": "nan", "45465": "nan", "45470": "nan", "45475": "nan", "45480": "nan", "45485": "nan", "45490": "nan", "45495": "nan", "45500": 1.43298, "45505": "nan", "45510": "nan", "45515": "nan", "45520": "nan", "45525": "nan", "45530": "nan", "45535": "nan", "45540": "nan", "45545": "nan", "45550": "nan", "45555": "nan", "45560": "nan", "45565": "nan", "45570": "nan", "45575": "nan", "45580": "nan", "45585": "nan", "45590": "nan", "45595": "nan", "45600": 1.4319, "45605": "nan", "45610": "nan", "45615": "nan", "45620": "nan", "45625": "nan", "45630": "nan", "45635": "nan", "45640": "nan", "45645": "nan", "45650": "nan", "45655": "nan", "45660": "nan", "45665": "nan", "45670": "nan", "45675": "nan", "45680": "nan", "45685": "nan", "45690": "nan", "45695": "nan", "45700": 1.43982, "45705": "nan", "45710": "nan", "45715": "nan", "45720": "nan", "45725": "nan", "45730": "nan", "45735": "nan", "45740": "nan", "45745": "nan", "45750": "nan", "45755": "nan", "45760": "nan", "45765": "nan", "45770": "nan", "45775": "nan", "45780": "nan", "45785": "nan", "45790": "nan", "45795": "nan", "45800": 1.43138, "45805": "nan", "45810": "nan", "45815": "nan", "45820": "nan", "45825": "nan", "45830": "nan", "45835": "nan", "45840": "nan", "45845": "nan", "45850": "nan", "45855": "nan", "45860": "nan", "45865": "nan", "45870": "nan", "45875": "nan", "45880": "nan", "45885": "nan", "45890": "nan", "45895": "nan", "45900": 1.43516, "45905": "nan", "45910": "nan", "45915": "nan", "45920": "nan", "45925": "nan", "45930": "nan", "45935": "nan", "45940": "nan", "45945": "nan", "45950": "nan", "45955": "nan", "45960": "nan", "45965": "nan", "45970": "nan", "45975": "nan", "45980": "nan", "45985": "nan", "45990": "nan", "45995": "nan", "46000": 1.43215, "46005": "nan", "46010": "nan", "46015": "nan", "46020": "nan", "46025": "nan", "46030": "nan", "46035": "nan", "46040": "nan", "46045": "nan", "46050": "nan", "46055": "nan", "46060": "nan", "46065": "nan", "46070": "nan", "46075": "nan", "46080": "nan", "46085": "nan", "46090": "nan", "46095": "nan", "46100": 1.43521, "46105": "nan", "46110": "nan", "46115": "nan", "46120": "nan", "46125": "nan", "46130": "nan", "46135": "nan", "46140": "nan", "46145": "nan", "46150": "nan", "46155": "nan", "46160": "nan", "46165": "nan", "46170": "nan", "46175": "nan", "46180": "nan", "46185": "nan", "46190": "nan", "46195": "nan", "46200": 1.43394, "46205": "nan", "46210": "nan", "46215": "nan", "46220": "nan", "46225": "nan", "46230": "nan", "46235": "nan", "46240": "nan", "46245": "nan", "46250": "nan", "46255": "nan", "46260": "nan", "46265": "nan", "46270": "nan", "46275": "nan", "46280": "nan", "46285": "nan", "46290": "nan", "46295": "nan", "46300": 1.43268, "46305": "nan", "46310": "nan", "46315": "nan", "46320": "nan", "46325": "nan", "46330": "nan", "46335": "nan", "46340": "nan", "46345": "nan", "46350": "nan", "46355": "nan", "46360": "nan", "46365": "nan", "46370": "nan", "46375": "nan", "46380": "nan", "46385": "nan", "46390": "nan", "46395": "nan", "46400": 1.42917, "46405": "nan", "46410": "nan", "46415": "nan", "46420": "nan", "46425": "nan", "46430": "nan", "46435": "nan", "46440": "nan", "46445": "nan", "46450": "nan", "46455": "nan", "46460": "nan", "46465": "nan", "46470": "nan", "46475": "nan", "46480": "nan", "46485": "nan", "46490": "nan", "46495": "nan", "46500": 1.4325, "46505": "nan", "46510": "nan", "46515": "nan", "46520": "nan", "46525": "nan", "46530": "nan", "46535": "nan", "46540": "nan", "46545": "nan", "46550": "nan", "46555": "nan", "46560": "nan", "46565": "nan", "46570": "nan", "46575": "nan", "46580": "nan", "46585": "nan", "46590": "nan", "46595": "nan", "46600": 1.43815, "46605": "nan", "46610": "nan", "46615": "nan", "46620": "nan", "46625": "nan", "46630": "nan", "46635": "nan", "46640": "nan", "46645": "nan", "46650": "nan", "46655": "nan", "46660": "nan", "46665": "nan", "46670": "nan", "46675": "nan", "46680": "nan", "46685": "nan", "46690": "nan", "46695": "nan", "46700": 1.43515, "46705": "nan", "46710": "nan", "46715": "nan", "46720": "nan", "46725": "nan", "46730": "nan", "46735": "nan", "46740": "nan", "46745": "nan", "46750": "nan", "46755": "nan", "46760": "nan", "46765": "nan", "46770": "nan", "46775": "nan", "46780": "nan", "46785": "nan", "46790": "nan", "46795": "nan", "46800": 1.42874, "46805": "nan", "46810": "nan", "46815": "nan", "46820": "nan", "46825": "nan", "46830": "nan", "46835": "nan", "46840": "nan", "46845": "nan", "46850": "nan", "46855": "nan", "46860": "nan", "46865": "nan", "46870": "nan", "46875": "nan", "46880": "nan", "46885": "nan", "46890": "nan", "46895": "nan", "46900": 1.43153, "46905": "nan", "46910": "nan", "46915": "nan", "46920": "nan", "46925": "nan", "46930": "nan", "46935": "nan", "46940": "nan", "46945": "nan", "46950": "nan", "46955": "nan", "46960": "nan", "46965": "nan", "46970": "nan", "46975": "nan", "46980": "nan", "46985": "nan", "46990": "nan", "46995": "nan", "47000": 1.434, "47005": "nan", "47010": "nan", "47015": "nan", "47020": "nan", "47025": "nan", "47030": "nan", "47035": "nan", "47040": "nan", "47045": "nan", "47050": "nan", "47055": "nan", "47060": "nan", "47065": "nan", "47070": "nan", "47075": "nan", "47080": "nan", "47085": "nan", "47090": "nan", "47095": "nan", "47100": 1.43747, "47105": "nan", "47110": "nan", "47115": "nan", "47120": "nan", "47125": "nan", "47130": "nan", "47135": "nan", "47140": "nan", "47145": "nan", "47150": "nan", "47155": "nan", "47160": "nan", "47165": "nan", "47170": "nan", "47175": "nan", "47180": "nan", "47185": "nan", "47190": "nan", "47195": "nan", "47200": 1.43317, "47205": "nan", "47210": "nan", "47215": "nan", "47220": "nan", "47225": "nan", "47230": "nan", "47235": "nan", "47240": "nan", "47245": "nan", "47250": "nan", "47255": "nan", "47260": "nan", "47265": "nan", "47270": "nan", "47275": "nan", "47280": "nan", "47285": "nan", "47290": "nan", "47295": "nan", "47300": 1.43479, "47305": "nan", "47310": "nan", "47315": "nan", "47320": "nan", "47325": "nan", "47330": "nan", "47335": "nan", "47340": "nan", "47345": "nan", "47350": "nan", "47355": "nan", "47360": "nan", "47365": "nan", "47370": "nan", "47375": "nan", "47380": "nan", "47385": "nan", "47390": "nan", "47395": "nan", "47400": 1.43226, "47405": "nan", "47410": "nan", "47415": "nan", "47420": "nan", "47425": "nan", "47430": "nan", "47435": "nan", "47440": "nan", "47445": "nan", "47450": "nan", "47455": "nan", "47460": "nan", "47465": "nan", "47470": "nan", "47475": "nan", "47480": "nan", "47485": "nan", "47490": "nan", "47495": "nan", "47500": 1.43625, "47505": "nan", "47510": "nan", "47515": "nan", "47520": "nan", "47525": "nan", "47530": "nan", "47535": "nan", "47540": "nan", "47545": "nan", "47550": "nan", "47555": "nan", "47560": "nan", "47565": "nan", "47570": "nan", "47575": "nan", "47580": "nan", "47585": "nan", "47590": "nan", "47595": "nan", "47600": 1.43052, "47605": "nan", "47610": "nan", "47615": "nan", "47620": "nan", "47625": "nan", "47630": "nan", "47635": "nan", "47640": "nan", "47645": "nan", "47650": "nan", "47655": "nan", "47660": "nan", "47665": "nan", "47670": "nan", "47675": "nan", "47680": "nan", "47685": "nan", "47690": "nan", "47695": "nan", "47700": 1.43291, "47705": "nan", "47710": "nan", "47715": "nan", "47720": "nan", "47725": "nan", "47730": "nan", "47735": "nan", "47740": "nan", "47745": "nan", "47750": "nan", "47755": "nan", "47760": "nan", "47765": "nan", "47770": "nan", "47775": "nan", "47780": "nan", "47785": "nan", "47790": "nan", "47795": "nan", "47800": 1.43488, "47805": "nan", "47810": "nan", "47815": "nan", "47820": "nan", "47825": "nan", "47830": "nan", "47835": "nan", "47840": "nan", "47845": "nan", "47850": "nan", "47855": "nan", "47860": "nan", "47865": "nan", "47870": "nan", "47875": "nan", "47880": "nan", "47885": "nan", "47890": "nan", "47895": "nan", "47900": 1.43399, "47905": "nan", "47910": "nan", "47915": "nan", "47920": "nan", "47925": "nan", "47930": "nan", "47935": "nan", "47940": "nan", "47945": "nan", "47950": "nan", "47955": "nan", "47960": "nan", "47965": "nan", "47970": "nan", "47975": "nan", "47980": "nan", "47985": "nan", "47990": "nan", "47995": "nan", "48000": 1.43118, "48005": "nan", "48010": "nan", "48015": "nan", "48020": "nan", "48025": "nan", "48030": "nan", "48035": "nan", "48040": "nan", "48045": "nan", "48050": "nan", "48055": "nan", "48060": "nan", "48065": "nan", "48070": "nan", "48075": "nan", "48080": "nan", "48085": "nan", "48090": "nan", "48095": "nan", "48100": 1.43054, "48105": "nan", "48110": "nan", "48115": "nan", "48120": "nan", "48125": "nan", "48130": "nan", "48135": "nan", "48140": "nan", "48145": "nan", "48150": "nan", "48155": "nan", "48160": "nan", "48165": "nan", "48170": "nan", "48175": "nan", "48180": "nan", "48185": "nan", "48190": "nan", "48195": "nan", "48200": 1.42888, "48205": "nan", "48210": "nan", "48215": "nan", "48220": "nan", "48225": "nan", "48230": "nan", "48235": "nan", "48240": "nan", "48245": "nan", "48250": "nan", "48255": "nan", "48260": "nan", "48265": "nan", "48270": "nan", "48275": "nan", "48280": "nan", "48285": "nan", "48290": "nan", "48295": "nan", "48300": 1.42959, "48305": "nan", "48310": "nan", "48315": "nan", "48320": "nan", "48325": "nan", "48330": "nan", "48335": "nan", "48340": "nan", "48345": "nan", "48350": "nan", "48355": "nan", "48360": "nan", "48365": "nan", "48370": "nan", "48375": "nan", "48380": "nan", "48385": "nan", "48390": "nan", "48395": "nan", "48400": 1.43281, "48405": "nan", "48410": "nan", "48415": "nan", "48420": "nan", "48425": "nan", "48430": "nan", "48435": "nan", "48440": "nan", "48445": "nan", "48450": "nan", "48455": "nan", "48460": "nan", "48465": "nan", "48470": "nan", "48475": "nan", "48480": "nan", "48485": "nan", "48490": "nan", "48495": "nan", "48500": 1.43825, "48505": "nan", "48510": "nan", "48515": "nan", "48520": "nan", "48525": "nan", "48530": "nan", "48535": "nan", "48540": "nan", "48545": "nan", "48550": "nan", "48555": "nan", "48560": "nan", "48565": "nan", "48570": "nan", "48575": "nan", "48580": "nan", "48585": "nan", "48590": "nan", "48595": "nan", "48600": 1.43317, "48605": "nan", "48610": "nan", "48615": "nan", "48620": "nan", "48625": "nan", "48630": "nan", "48635": "nan", "48640": "nan", "48645": "nan", "48650": "nan", "48655": "nan", "48660": "nan", "48665": "nan", "48670": "nan", "48675": "nan", "48680": "nan", "48685": "nan", "48690": "nan", "48695": "nan", "48700": 1.43366, "48705": "nan", "48710": "nan", "48715": "nan", "48720": "nan", "48725": "nan", "48730": "nan", "48735": "nan", "48740": "nan", "48745": "nan", "48750": "nan", "48755": "nan", "48760": "nan", "48765": "nan", "48770": "nan", "48775": "nan", "48780": "nan", "48785": "nan", "48790": "nan", "48795": "nan", "48800": 1.43376, "48805": "nan", "48810": "nan", "48815": "nan", "48820": "nan", "48825": "nan", "48830": "nan", "48835": "nan", "48840": "nan", "48845": "nan", "48850": "nan", "48855": "nan", "48860": "nan", "48865": "nan", "48870": "nan", "48875": "nan", "48880": "nan", "48885": "nan", "48890": "nan", "48895": "nan", "48900": 1.43026, "48905": "nan", "48910": "nan", "48915": "nan", "48920": "nan", "48925": "nan", "48930": "nan", "48935": "nan", "48940": "nan", "48945": "nan", "48950": "nan", "48955": "nan", "48960": "nan", "48965": "nan", "48970": "nan", "48975": "nan", "48980": "nan", "48985": "nan", "48990": "nan", "48995": "nan", "49000": 1.43553, "49005": "nan", "49010": "nan", "49015": "nan", "49020": "nan", "49025": "nan", "49030": "nan", "49035": "nan", "49040": "nan", "49045": "nan", "49050": "nan", "49055": "nan", "49060": "nan", "49065": "nan", "49070": "nan", "49075": "nan", "49080": "nan", "49085": "nan", "49090": "nan", "49095": "nan", "49100": 1.43451, "49105": "nan", "49110": "nan", "49115": "nan", "49120": "nan", "49125": "nan", "49130": "nan", "49135": "nan", "49140": "nan", "49145": "nan", "49150": "nan", "49155": "nan", "49160": "nan", "49165": "nan", "49170": "nan", "49175": "nan", "49180": "nan", "49185": "nan", "49190": "nan", "49195": "nan", "49200": 1.43261, "49205": "nan", "49210": "nan", "49215": "nan", "49220": "nan", "49225": "nan", "49230": "nan", "49235": "nan", "49240": "nan", "49245": "nan", "49250": "nan", "49255": "nan", "49260": "nan", "49265": "nan", "49270": "nan", "49275": "nan", "49280": "nan", "49285": "nan", "49290": "nan", "49295": "nan", "49300": 1.43284, "49305": "nan", "49310": "nan", "49315": "nan", "49320": "nan", "49325": "nan", "49330": "nan", "49335": "nan", "49340": "nan", "49345": "nan", "49350": "nan", "49355": "nan", "49360": "nan", "49365": "nan", "49370": "nan", "49375": "nan", "49380": "nan", "49385": "nan", "49390": "nan", "49395": "nan", "49400": 1.43444, "49405": "nan", "49410": "nan", "49415": "nan", "49420": "nan", "49425": "nan", "49430": "nan", "49435": "nan", "49440": "nan", "49445": "nan", "49450": "nan", "49455": "nan", "49460": "nan", "49465": "nan", "49470": "nan", "49475": "nan", "49480": "nan", "49485": "nan", "49490": "nan", "49495": "nan", "49500": 1.43466, "49505": "nan", "49510": "nan", "49515": "nan", "49520": "nan", "49525": "nan", "49530": "nan", "49535": "nan", "49540": "nan", "49545": "nan", "49550": "nan", "49555": "nan", "49560": "nan", "49565": "nan", "49570": "nan", "49575": "nan", "49580": "nan", "49585": "nan", "49590": "nan", "49595": "nan", "49600": 1.43045, "49605": "nan", "49610": "nan", "49615": "nan", "49620": "nan", "49625": "nan", "49630": "nan", "49635": "nan", "49640": "nan", "49645": "nan", "49650": "nan", "49655": "nan", "49660": "nan", "49665": "nan", "49670": "nan", "49675": "nan", "49680": "nan", "49685": "nan", "49690": "nan", "49695": "nan", "49700": 1.42935, "49705": "nan", "49710": "nan", "49715": "nan", "49720": "nan", "49725": "nan", "49730": "nan", "49735": "nan", "49740": "nan", "49745": "nan", "49750": "nan", "49755": "nan", "49760": "nan", "49765": "nan", "49770": "nan", "49775": "nan", "49780": "nan", "49785": "nan", "49790": "nan", "49795": "nan", "49800": 1.43239, "49805": "nan", "49810": "nan", "49815": "nan", "49820": "nan", "49825": "nan", "49830": "nan", "49835": "nan", "49840": "nan", "49845": "nan", "49850": "nan", "49855": "nan", "49860": "nan", "49865": "nan", "49870": "nan", "49875": "nan", "49880": "nan", "49885": "nan", "49890": "nan", "49895": "nan", "49900": 1.42731, "49905": "nan", "49910": "nan", "49915": "nan", "49920": "nan", "49925": "nan", "49930": "nan", "49935": "nan", "49940": "nan", "49945": "nan", "49950": "nan", "49955": "nan", "49960": "nan", "49965": "nan", "49970": "nan", "49975": "nan", "49980": "nan", "49985": "nan", "49990": "nan", "49995": "nan", "50000": 1.43586, "50005": "nan", "50010": "nan", "50015": "nan", "50020": "nan", "50025": "nan", "50030": "nan", "50035": "nan", "50040": "nan", "50045": "nan", "50050": "nan", "50055": "nan", "50060": "nan", "50065": "nan", "50070": "nan", "50075": "nan", "50080": "nan", "50085": "nan", "50090": "nan", "50095": "nan", "50100": 1.43114, "50105": "nan", "50110": "nan", "50115": "nan", "50120": "nan", "50125": "nan", "50130": "nan", "50135": "nan", "50140": "nan", "50145": "nan", "50150": "nan", "50155": "nan", "50160": "nan", "50165": "nan", "50170": "nan", "50175": "nan", "50180": "nan", "50185": "nan", "50190": "nan", "50195": "nan", "50200": 1.43067, "50205": "nan", "50210": "nan", "50215": "nan", "50220": "nan", "50225": "nan", "50230": "nan", "50235": "nan", "50240": "nan", "50245": "nan", "50250": "nan", "50255": "nan", "50260": "nan", "50265": "nan", "50270": "nan", "50275": "nan", "50280": "nan", "50285": "nan", "50290": "nan", "50295": "nan", "50300": 1.42825, "50305": "nan", "50310": "nan", "50315": "nan", "50320": "nan", "50325": "nan", "50330": "nan", "50335": "nan", "50340": "nan", "50345": "nan", "50350": "nan", "50355": "nan", "50360": "nan", "50365": "nan", "50370": "nan", "50375": "nan", "50380": "nan", "50385": "nan", "50390": "nan", "50395": "nan", "50400": 1.43126, "50405": "nan", "50410": "nan", "50415": "nan", "50420": "nan", "50425": "nan", "50430": "nan", "50435": "nan", "50440": "nan", "50445": "nan", "50450": "nan", "50455": "nan", "50460": "nan", "50465": "nan", "50470": "nan", "50475": "nan", "50480": "nan", "50485": "nan", "50490": "nan", "50495": "nan", "50500": 1.43811, "50505": "nan", "50510": "nan", "50515": "nan", "50520": "nan", "50525": "nan", "50530": "nan", "50535": "nan", "50540": "nan", "50545": "nan", "50550": "nan", "50555": "nan", "50560": "nan", "50565": "nan", "50570": "nan", "50575": "nan", "50580": "nan", "50585": "nan", "50590": "nan", "50595": "nan", "50600": 1.43539, "50605": "nan", "50610": "nan", "50615": "nan", "50620": "nan", "50625": "nan", "50630": "nan", "50635": "nan", "50640": "nan", "50645": "nan", "50650": "nan", "50655": "nan", "50660": "nan", "50665": "nan", "50670": "nan", "50675": "nan", "50680": "nan", "50685": "nan", "50690": "nan", "50695": "nan", "50700": 1.42946, "50705": "nan", "50710": "nan", "50715": "nan", "50720": "nan", "50725": "nan", "50730": "nan", "50735": "nan", "50740": "nan", "50745": "nan", "50750": "nan", "50755": "nan", "50760": "nan", "50765": "nan", "50770": "nan", "50775": "nan", "50780": "nan", "50785": "nan", "50790": "nan", "50795": "nan", "50800": 1.43158, "50805": "nan", "50810": "nan", "50815": "nan", "50820": "nan", "50825": "nan", "50830": "nan", "50835": "nan", "50840": "nan", "50845": "nan", "50850": "nan", "50855": "nan", "50860": "nan", "50865": "nan", "50870": "nan", "50875": "nan", "50880": "nan", "50885": "nan", "50890": "nan", "50895": "nan", "50900": "nan", "50905": "nan", "50910": "nan", "50915": "nan", "50920": "nan", "50925": "nan", "50930": "nan", "50935": "nan", "50940": "nan", "50945": "nan", "50950": "nan", "50955": "nan", "50960": "nan", "50965": "nan", "50970": "nan", "50975": "nan", "50980": "nan", "50985": "nan", "50990": "nan", "50995": "nan", "51000": "nan"}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 55000, "step_interval": 5, "values": {"1": 12.98419, "5": 12.93858, "10": 12.06407, "15": 11.97884, "20": 10.53585, "25": 10.11955, "30": 9.72859, "35": 9.44174, "40": 9.23731, "45": 9.03754, "50": 8.85161, "55": 8.64089, "60": 8.62422, "65": 8.50999, "70": 8.4418, "75": 8.29574, "80": 8.14976, "85": 8.13593, "90": 8.00273, "95": 7.96902, "100": 7.81667, "105": 7.66273, "110": 7.58481, "115": 7.42485, "120": 7.44126, "125": 7.3989, "130": 7.27474, "135": 7.20875, "140": 7.20106, "145": 7.01308, "150": 7.17638, "155": 7.00442, "160": 6.91984, "165": 6.93802, "170": 6.8774, "175": 6.87944, "180": 6.80918, "185": 6.72638, "190": 6.68851, "195": 6.63505, "200": 6.65129, "205": 6.64355, "210": 6.54681, "215": 6.50711, "220": 6.50946, "225": 6.4951, "230": 6.50868, "235": 6.46836, "240": 6.37687, "245": 6.37564, "250": 6.31205, "255": 6.4411, "260": 6.32499, "265": 6.2545, "270": 6.22176, "275": 6.22323, "280": 6.17675, "285": 6.18928, "290": 6.13821, "295": 6.14995, "300": 6.13281, "305": 6.03727, "310": 6.07025, "315": 6.07371, "320": 5.95538, "325": 5.90339, "330": 5.95591, "335": 6.00893, "340": 5.92363, "345": 5.94625, "350": 5.8931, "355": 5.85673, "360": 5.85383, "365": 5.83245, "370": 5.78974, "375": 5.81562, "380": 5.86636, "385": 5.8012, "390": 5.79624, "395": 5.69635, "400": 5.63903, "405": 5.66708, "410": 5.66194, "415": 5.72265, "420": 5.65143, "425": 5.65576, "430": 5.6242, "435": 5.58511, "440": 5.60981, "445": 5.51531, "450": 5.58108, "455": 5.54555, "460": 5.49145, "465": 5.57158, "470": 5.55468, "475": 5.47562, "480": 5.4736, "485": 5.51128, "490": 5.47458, "495": 5.46603, "500": 5.42756, "505": 5.37763, "510": 5.47649, "515": 5.40611, "520": 5.43944, "525": 5.29932, "530": 5.34437, "535": 5.32791, "540": 5.34311, "545": 5.39504, "550": 5.38438, "555": 5.20891, "560": 5.34278, "565": 5.29181, "570": 5.26874, "575": 5.28636, "580": 5.22433, "585": 5.22245, "590": 5.23216, "595": 5.23127, "600": 5.29846, "605": 5.22827, "610": 5.22475, "615": 5.18555, "620": 5.18716, "625": 5.1959, "630": 5.16315, "635": 5.14271, "640": 5.08829, "645": 5.14725, "650": 5.15759, "655": 5.12605, "660": 5.0694, "665": 5.12441, "670": 5.06357, "675": 5.06852, "680": 5.05386, "685": 4.99695, "690": 5.0311, "695": 4.97523, "700": 4.9974, "705": 4.94217, "710": 4.98576, "715": 4.9, "720": 4.85747, "725": 4.79903, "730": 4.87283, "735": 4.84954, "740": 4.87024, "745": 4.7352, "750": 4.74275, "755": 4.79321, "760": 4.80364, "765": 4.71614, "770": 4.7147, "775": 4.65336, "780": 4.68425, "785": 4.74427, "790": 4.67452, "795": 4.65093, "800": 4.60245, "805": 4.62442, "810": 4.64417, "815": 4.6198, "820": 4.6214, "825": 4.60404, "830": 4.58841, "835": 4.56298, "840": 4.45957, "845": 4.47507, "850": 4.44717, "855": 4.51055, "860": 4.43034, "865": 4.52505, "870": 4.47754, "875": 4.38926, "880": 4.42097, "885": 4.38191, "890": 4.44113, "895": 4.41851, "900": 4.39769, "905": 4.32608, "910": 4.34904, "915": 4.32655, "920": 4.36397, "925": 4.37779, "930": 4.31557, "935": 4.3102, "940": 4.36209, "945": 4.29849, "950": 4.35346, "955": 4.24688, "960": 4.20416, "965": 4.28046, "970": 4.27281, "975": 4.23248, "980": 4.24327, "985": 4.17166, "990": 4.14359, "995": 4.18029, "1000": 4.22378, "1005": 4.17564, "1010": 4.18312, "1015": 4.12789, "1020": 4.17244, "1025": 4.21284, "1030": 4.11666, "1035": 4.10004, "1040": 4.12213, "1045": 4.11692, "1050": 4.1408, "1055": 4.10275, "1060": 4.10603, "1065": 4.07455, "1070": 4.05142, "1075": 4.05972, "1080": 4.08374, "1085": 4.06121, "1090": 4.02164, "1095": 4.09567, "1100": 4.05382, "1105": 4.07552, "1110": 4.03976, "1115": 4.01326, "1120": 4.00303, "1125": 4.00744, "1130": 4.04373, "1135": 3.99397, "1140": 4.00571, "1145": 3.93554, "1150": 4.04192, "1155": 3.98881, "1160": 3.96915, "1165": 3.86551, "1170": 3.9321, "1175": 3.94632, "1180": 3.97713, "1185": 3.98965, "1190": 3.94207, "1195": 3.95147, "1200": 3.89842, "1205": 3.86474, "1210": 3.98989, "1215": 3.83689, "1220": 3.89352, "1225": 3.82645, "1230": 3.90601, "1235": 3.898, "1240": 3.87861, "1245": 3.79685, "1250": 3.83838, "1255": 3.85958, "1260": 3.87792, "1265": 3.78786, "1270": 3.87793, "1275": 3.84734, "1280": 3.81941, "1285": 3.85299, "1290": 3.86738, "1295": 3.85224, "1300": 3.82753, "1305": 3.81932, "1310": 3.81931, "1315": 3.81484, "1320": 3.82172, "1325": 3.72909, "1330": 3.7948, "1335": 3.77281, "1340": 3.75747, "1345": 3.7587, "1350": 3.74378, "1355": 3.80012, "1360": 3.75372, "1365": 3.73189, "1370": 3.74649, "1375": 3.7362, "1380": 3.75138, "1385": 3.74391, "1390": 3.69448, "1395": 3.75208, "1400": 3.72501, "1405": 3.67898, "1410": 3.67717, "1415": 3.68163, "1420": 3.70505, "1425": 3.70951, "1430": 3.6804, "1435": 3.66368, "1440": 3.63441, "1445": 3.69601, "1450": 3.67167, "1455": 3.63269, "1460": 3.65693, "1465": 3.68324, "1470": 3.62886, "1475": 3.6965, "1480": 3.68269, "1485": 3.66399, "1490": 3.62744, "1495": 3.60305, "1500": 3.64871, "1505": 3.71778, "1510": 3.55762, "1515": 3.62166, "1520": 3.62712, "1525": 3.61897, "1530": 3.59347, "1535": 3.59437, "1540": 3.61176, "1545": 3.60665, "1550": 3.57615, "1555": 3.5821, "1560": 3.62284, "1565": 3.63849, "1570": 3.60821, "1575": 3.56029, "1580": 3.59282, "1585": 3.57531, "1590": 3.46929, "1595": 3.51663, "1600": 3.50711, "1605": 3.57192, "1610": 3.57622, "1615": 3.51645, "1620": 3.52744, "1625": 3.48121, "1630": 3.49899, "1635": 3.54752, "1640": 3.52188, "1645": 3.54341, "1650": 3.49288, "1655": 3.47966, "1660": 3.52139, "1665": 3.47847, "1670": 3.51594, "1675": 3.49377, "1680": 3.46992, "1685": 3.47741, "1690": 3.48724, "1695": 3.50446, "1700": 3.46703, "1705": 3.40852, "1710": 3.49306, "1715": 3.48903, "1720": 3.43328, "1725": 3.42895, "1730": 3.42096, "1735": 3.47912, "1740": 3.46644, "1745": 3.44476, "1750": 3.42008, "1755": 3.42968, "1760": 3.39579, "1765": 3.42317, "1770": 3.43451, "1775": 3.39639, "1780": 3.43733, "1785": 3.42439, "1790": 3.38518, "1795": 3.41436, "1800": 3.34474, "1805": 3.41076, "1810": 3.33239, "1815": 3.43441, "1820": 3.4094, "1825": 3.37886, "1830": 3.33304, "1835": 3.4221, "1840": 3.39791, "1845": 3.45375, "1850": 3.39518, "1855": 3.38323, "1860": 3.35364, "1865": 3.39135, "1870": 3.30156, "1875": 3.45027, "1880": 3.34842, "1885": 3.34936, "1890": 3.34384, "1895": 3.39469, "1900": 3.37197, "1905": 3.32889, "1910": 3.33764, "1915": 3.32254, "1920": 3.36886, "1925": 3.3361, "1930": 3.31881, "1935": 3.31019, "1940": 3.36329, "1945": 3.2749, "1950": 3.41129, "1955": 3.30765, "1960": 3.3092, "1965": 3.26866, "1970": 3.30398, "1975": 3.33543, "1980": 3.33457, "1985": 3.24918, "1990": 3.31267, "1995": 3.29228, "2000": 3.28027, "2005": 3.25984, "2010": 3.26226, "2015": 3.22499, "2020": 3.27422, "2025": 3.2932, "2030": 3.28385, "2035": 3.2917, "2040": 3.24064, "2045": 3.23721, "2050": 3.29462, "2055": 3.32542, "2060": 3.29203, "2065": 3.23534, "2070": 3.29653, "2075": 3.25315, "2080": 3.23987, "2085": 3.28576, "2090": 3.14754, "2095": 3.28685, "2100": 3.23197, "2105": 3.20069, "2110": 3.21711, "2115": 3.24001, "2120": 3.17869, "2125": 3.22332, "2130": 3.21619, "2135": 3.28262, "2140": 3.1965, "2145": 3.2106, "2150": 3.21726, "2155": 3.24018, "2160": 3.20112, "2165": 3.22278, "2170": 3.20285, "2175": 3.21809, "2180": 3.25586, "2185": 3.25851, "2190": 3.23631, "2195": 3.16969, "2200": 3.2017, "2205": 3.16847, "2210": 3.12143, "2215": 3.19027, "2220": 3.19538, "2225": 3.2071, "2230": 3.18003, "2235": 3.20905, "2240": 3.21697, "2245": 3.17997, "2250": 3.19354, "2255": 3.12735, "2260": 3.1459, "2265": 3.21447, "2270": 3.17604, "2275": 3.1531, "2280": 3.17242, "2285": 3.16162, "2290": 3.17124, "2295": 3.21134, "2300": 3.15096, "2305": 3.1736, "2310": 3.1267, "2315": 3.05928, "2320": 3.11652, "2325": 3.17439, "2330": 3.12666, "2335": 3.10341, "2340": 3.1715, "2345": 3.127, "2350": 3.13997, "2355": 3.15496, "2360": 3.1737, "2365": 3.14036, "2370": 3.1741, "2375": 3.13256, "2380": 3.11415, "2385": 3.07912, "2390": 3.11349, "2395": 3.09553, "2400": 3.08695, "2405": 3.09731, "2410": 3.08255, "2415": 3.09623, "2420": 3.07727, "2425": 3.07494, "2430": 3.08005, "2435": 3.07556, "2440": 3.08857, "2445": 3.05936, "2450": 3.13258, "2455": 3.16151, "2460": 3.08718, "2465": 3.15081, "2470": 3.06282, "2475": 3.09385, "2480": 3.09199, "2485": 3.06428, "2490": 3.06821, "2495": 3.07642, "2500": 3.07971, "2505": 3.15981, "2510": 3.13739, "2515": 3.05211, "2520": 3.08072, "2525": 3.05036, "2530": 3.05513, "2535": 3.09385, "2540": 3.07617, "2545": 3.05737, "2550": 3.00939, "2555": 3.10223, "2560": 3.05369, "2565": 3.11411, "2570": 3.01031, "2575": 3.05088, "2580": 3.08611, "2585": 3.01916, "2590": 3.0772, "2595": 3.00794, "2600": 3.07038, "2605": 3.04767, "2610": 3.05854, "2615": 3.05941, "2620": 2.99431, "2625": 3.00798, "2630": 3.03934, "2635": 3.06996, "2640": 3.01981, "2645": 3.05534, "2650": 3.02745, "2655": 3.00241, "2660": 3.01577, "2665": 3.04583, "2670": 2.99086, "2675": 2.97292, "2680": 3.00164, "2685": 3.00821, "2690": 3.00456, "2695": 2.99215, "2700": 3.03084, "2705": 2.99007, "2710": 2.98168, "2715": 2.97811, "2720": 3.03893, "2725": 3.00297, "2730": 3.02662, "2735": 2.98068, "2740": 2.98071, "2745": 3.02258, "2750": 3.01704, "2755": 2.97908, "2760": 3.01484, "2765": 3.01622, "2770": 2.9863, "2775": 3.00036, "2780": 3.02793, "2785": 2.96079, "2790": 2.96513, "2795": 2.95697, "2800": 2.96372, "2805": 2.9375, "2810": 2.98722, "2815": 2.96584, "2820": 3.08399, "2825": 3.06283, "2830": 3.00913, "2835": 2.92423, "2840": 2.9361, "2845": 2.95589, "2850": 2.96628, "2855": 2.9635, "2860": 2.96101, "2865": 2.91677, "2870": 2.99107, "2875": 2.92252, "2880": 2.96169, "2885": 2.92576, "2890": 2.98139, "2895": 2.93832, "2900": 2.96345, "2905": 3.00726, "2910": 2.91655, "2915": 2.94018, "2920": 2.953, "2925": 2.94276, "2930": 2.96159, "2935": 2.94911, "2940": 2.94428, "2945": 2.90791, "2950": 2.98357, "2955": 2.91352, "2960": 2.97522, "2965": 2.87466, "2970": 2.96538, "2975": 3.00087, "2980": 2.94534, "2985": 3.03787, "2990": 2.94073, "2995": 2.8743, "3000": 2.93212, "3005": 2.89936, "3010": 2.93659, "3015": 2.92077, "3020": 2.91719, "3025": 2.92663, "3030": 2.92307, "3035": 2.9616, "3040": 2.93203, "3045": 2.83427, "3050": 2.90284, "3055": 2.89765, "3060": 2.93054, "3065": 2.92169, "3070": 2.88648, "3075": 2.87388, "3080": 2.92415, "3085": 2.90403, "3090": 2.9213, "3095": 2.92644, "3100": 2.87162, "3105": 2.92814, "3110": 2.9059, "3115": 2.94577, "3120": 2.97115, "3125": 2.86544, "3130": 2.9363, "3135": 2.92939, "3140": 2.87369, "3145": 2.92025, "3150": 2.85752, "3155": 2.84977, "3160": 2.84157, "3165": 2.84565, "3170": 2.89323, "3175": 2.90533, "3180": 2.8551, "3185": 2.89148, "3190": 2.90645, "3195": 2.92939, "3200": 2.92462, "3205": 2.86378, "3210": 2.87152, "3215": 2.91825, "3220": 2.87631, "3225": 2.8731, "3230": 2.81542, "3235": 2.87916, "3240": 2.87316, "3245": 2.90373, "3250": 2.85679, "3255": 2.85479, "3260": 2.86294, "3265": 2.87011, "3270": 2.85644, "3275": 2.86753, "3280": 2.80617, "3285": 2.81412, "3290": 2.8685, "3295": 2.89718, "3300": 2.87683, "3305": 2.86231, "3310": 2.85952, "3315": 2.82037, "3320": 2.87839, "3325": 2.84404, "3330": 2.85988, "3335": 2.85513, "3340": 2.8241, "3345": 2.83286, "3350": 2.84054, "3355": 2.86925, "3360": 2.80101, "3365": 2.85143, "3370": 2.84251, "3375": 2.83977, "3380": 2.85272, "3385": 2.88104, "3390": 2.87068, "3395": 2.81433, "3400": 2.79738, "3405": 2.83699, "3410": 2.84678, "3415": 2.86066, "3420": 2.82454, "3425": 2.8157, "3430": 2.83768, "3435": 2.8964, "3440": 2.81788, "3445": 2.86729, "3450": 2.82404, "3455": 2.79119, "3460": 2.81878, "3465": 2.85649, "3470": 2.84076, "3475": 2.77793, "3480": 2.84401, "3485": 2.82663, "3490": 2.89801, "3495": 2.85114, "3500": 2.83179, "3505": 2.82651, "3510": 2.82016, "3515": 2.83494, "3520": 2.77632, "3525": 2.81133, "3530": 2.86039, "3535": 2.78447, "3540": 2.83894, "3545": 2.81492, "3550": 2.79891, "3555": 2.81918, "3560": 2.83242, "3565": 2.82672, "3570": 2.80479, "3575": 2.80405, "3580": 2.81907, "3585": 2.83647, "3590": 2.83171, "3595": 2.78325, "3600": 2.75461, "3605": 2.78977, "3610": 2.85232, "3615": 2.75121, "3620": 2.80277, "3625": 2.89845, "3630": 2.78491, "3635": 2.79023, "3640": 2.78772, "3645": 2.77321, "3650": 2.81718, "3655": 2.82295, "3660": 2.76966, "3665": 2.78734, "3670": 2.76944, "3675": 2.78073, "3680": 2.81274, "3685": 2.80567, "3690": 2.80789, "3695": 2.81047, "3700": 2.79008, "3705": 2.79091, "3710": 2.75141, "3715": 2.80384, "3720": 2.79716, "3725": 2.79349, "3730": 2.83918, "3735": 2.79952, "3740": 2.75436, "3745": 2.79491, "3750": 2.80757, "3755": 2.79742, "3760": 2.76088, "3765": 2.75438, "3770": 2.76209, "3775": 2.77189, "3780": 2.76162, "3785": 2.78011, "3790": 2.74787, "3795": 2.79028, "3800": 2.80085, "3805": 2.75432, "3810": 2.80606, "3815": 2.77067, "3820": 2.78867, "3825": 2.73887, "3830": 2.75026, "3835": 2.81516, "3840": 2.7302, "3845": 2.71806, "3850": 2.77465, "3855": 2.72019, "3860": 2.80503, "3865": 2.75686, "3870": 2.77712, "3875": 2.75755, "3880": 2.78933, "3885": 2.7868, "3890": 2.74707, "3895": 2.79985, "3900": 2.76703, "3905": 2.72383, "3910": 2.74699, "3915": 2.75045, "3920": 2.80191, "3925": 2.78052, "3930": 2.71313, "3935": 2.74359, "3940": 2.75227, "3945": 2.74406, "3950": 2.72928, "3955": 2.78059, "3960": 2.76403, "3965": 2.74159, "3970": 2.75662, "3975": 2.72506, "3980": 2.74254, "3985": 2.74757, "3990": 2.69086, "3995": 2.78703, "4000": 2.73982, "4005": 2.76955, "4010": 2.70923, "4015": 2.7254, "4020": 2.74981, "4025": 2.73552, "4030": 2.66314, "4035": 2.69898, "4040": 2.75225, "4045": 2.75239, "4050": 2.78981, "4055": 2.72411, "4060": 2.71629, "4065": 2.65004, "4070": 2.80903, "4075": 2.75957, "4080": 2.7176, "4085": 2.75392, "4090": 2.68029, "4095": 2.69048, "4100": 2.71245, "4105": 2.74074, "4110": 2.72713, "4115": 2.70365, "4120": 2.73057, "4125": 2.70979, "4130": 2.70198, "4135": 2.69741, "4140": 2.68445, "4145": 2.78418, "4150": 2.71174, "4155": 2.74179, "4160": 2.76366, "4165": 2.72055, "4170": 2.67555, "4175": 2.72194, "4180": 2.72955, "4185": 2.72429, "4190": 2.74242, "4195": 2.69693, "4200": 2.71063, "4205": 2.74869, "4210": 2.67843, "4215": 2.66753, "4220": 2.66038, "4225": 2.71178, "4230": 2.72333, "4235": 2.74338, "4240": 2.71244, "4245": 2.69764, "4250": 2.71507, "4255": 2.64984, "4260": 2.72714, "4265": 2.73595, "4270": 2.72303, "4275": 2.68834, "4280": 2.70282, "4285": 2.73183, "4290": 2.68559, "4295": 2.69672, "4300": 2.70229, "4305": 2.69989, "4310": 2.73499, "4315": 2.71369, "4320": 2.70547, "4325": 2.70836, "4330": 2.71414, "4335": 2.69307, "4340": 2.70518, "4345": 2.72599, "4350": 2.67681, "4355": 2.69595, "4360": 2.7149, "4365": 2.78459, "4370": 2.73234, "4375": 2.74888, "4380": 2.70715, "4385": 2.70069, "4390": 2.70417, "4395": 2.75491, "4400": 2.66707, "4405": 2.67508, "4410": 2.68379, "4415": 2.70408, "4420": 2.70779, "4425": 2.71984, "4430": 2.69625, "4435": 2.68224, "4440": 2.69528, "4445": 2.68349, "4450": 2.65403, "4455": 2.66667, "4460": 2.69057, "4465": 2.69891, "4470": 2.66975, "4475": 2.6866, "4480": 2.65652, "4485": 2.70066, "4490": 2.65265, "4495": 2.71528, "4500": 2.70739, "4505": 2.69552, "4510": 2.65063, "4515": 2.70152, "4520": 2.67082, "4525": 2.66852, "4530": 2.67827, "4535": 2.67428, "4540": 2.70913, "4545": 2.66032, "4550": 2.70372, "4555": 2.68167, "4560": 2.657, "4565": 2.64021, "4570": 2.63917, "4575": 2.66903, "4580": 2.68757, "4585": 2.68271, "4590": 2.62237, "4595": 2.66522, "4600": 2.67849, "4605": 2.68027, "4610": 2.67121, "4615": 2.66679, "4620": 2.66215, "4625": 2.68794, "4630": 2.67344, "4635": 2.64816, "4640": 2.69958, "4645": 2.64633, "4650": 2.7029, "4655": 2.7093, "4660": 2.67742, "4665": 2.68721, "4670": 2.67771, "4675": 2.68762, "4680": 2.66534, "4685": 2.65744, "4690": 2.70805, "4695": 2.6566, "4700": 2.67417, "4705": 2.65401, "4710": 2.67914, "4715": 2.64681, "4720": 2.72282, "4725": 2.63344, "4730": 2.65369, "4735": 2.68855, "4740": 2.64546, "4745": 2.65283, "4750": 2.64711, "4755": 2.6554, "4760": 2.66479, "4765": 2.64505, "4770": 2.62395, "4775": 2.65868, "4780": 2.65863, "4785": 2.69068, "4790": 2.64994, "4795": 2.67407, "4800": 2.63006, "4805": 2.64157, "4810": 2.66269, "4815": 2.64726, "4820": 2.67503, "4825": 2.65232, "4830": 2.61648, "4835": 2.65142, "4840": 2.65648, "4845": 2.63717, "4850": 2.62987, "4855": 2.60358, "4860": 2.65309, "4865": 2.62639, "4870": 2.64201, "4875": 2.62168, "4880": 2.6283, "4885": 2.62692, "4890": 2.6832, "4895": 2.66059, "4900": 2.62393, "4905": 2.62377, "4910": 2.64015, "4915": 2.61782, "4920": 2.65618, "4925": 2.65141, "4930": 2.57162, "4935": 2.65454, "4940": 2.63741, "4945": 2.64228, "4950": 2.62741, "4955": 2.62215, "4960": 2.62285, "4965": 2.65873, "4970": 2.59789, "4975": 2.65583, "4980": 2.62079, "4985": 2.63206, "4990": 2.65813, "4995": 2.58175, "5000": 2.66396, "5005": 2.66712, "5010": 2.68345, "5015": 2.63375, "5020": 2.64132, "5025": 2.68755, "5030": 2.64654, "5035": 2.61812, "5040": 2.6232, "5045": 2.60853, "5050": 2.62811, "5055": 2.64982, "5060": 2.6488, "5065": 2.68964, "5070": 2.61589, "5075": 2.61628, "5080": 2.61295, "5085": 2.60633, "5090": 2.59345, "5095": 2.6514, "5100": 2.64765, "5105": 2.60989, "5110": 2.66303, "5115": 2.61733, "5120": 2.67365, "5125": 2.63679, "5130": 2.61926, "5135": 2.61473, "5140": 2.57791, "5145": 2.6305, "5150": 2.63979, "5155": 2.61812, "5160": 2.66157, "5165": 2.58528, "5170": 2.59214, "5175": 2.61874, "5180": 2.60768, "5185": 2.62236, "5190": 2.62687, "5195": 2.67536, "5200": 2.59835, "5205": 2.60613, "5210": 2.60847, "5215": 2.64838, "5220": 2.58936, "5225": 2.56126, "5230": 2.63955, "5235": 2.61901, "5240": 2.61771, "5245": 2.63662, "5250": 2.59714, "5255": 2.62476, "5260": 2.56122, "5265": 2.59883, "5270": 2.59852, "5275": 2.62151, "5280": 2.61298, "5285": 2.60578, "5290": 2.63643, "5295": 2.623, "5300": 2.57741, "5305": 2.5989, "5310": 2.61263, "5315": 2.58786, "5320": 2.6196, "5325": 2.64702, "5330": 2.60458, "5335": 2.58556, "5340": 2.56527, "5345": 2.65891, "5350": 2.62457, "5355": 2.58024, "5360": 2.59613, "5365": 2.62539, "5370": 2.61436, "5375": 2.62912, "5380": 2.57933, "5385": 2.56305, "5390": 2.58666, "5395": 2.62213, "5400": 2.60994, "5405": 2.54769, "5410": 2.61357, "5415": 2.59505, "5420": 2.61517, "5425": 2.62654, "5430": 2.62821, "5435": 2.58129, "5440": 2.58714, "5445": 2.63376, "5450": 2.64964, "5455": 2.61559, "5460": 2.59232, "5465": 2.60556, "5470": 2.60117, "5475": 2.62743, "5480": 2.59131, "5485": 2.59302, "5490": 2.57824, "5495": 2.57336, "5500": 2.57112, "5505": 2.6195, "5510": 2.62857, "5515": 2.5862, "5520": 2.55747, "5525": 2.5861, "5530": 2.66605, "5535": 2.62214, "5540": 2.57243, "5545": 2.59908, "5550": 2.55129, "5555": 2.57549, "5560": 2.56567, "5565": 2.6152, "5570": 2.6569, "5575": 2.63597, "5580": 2.57578, "5585": 2.59657, "5590": 2.56504, "5595": 2.58497, "5600": 2.55605, "5605": 2.60039, "5610": 2.58507, "5615": 2.58194, "5620": 2.58536, "5625": 2.55433, "5630": 2.57398, "5635": 2.63496, "5640": 2.59626, "5645": 2.5738, "5650": 2.5792, "5655": 2.5508, "5660": 2.561, "5665": 2.58755, "5670": 2.57014, "5675": 2.60801, "5680": 2.53099, "5685": 2.57004, "5690": 2.60438, "5695": 2.56059, "5700": 2.59906, "5705": 2.59954, "5710": 2.58603, "5715": 2.58537, "5720": 2.53859, "5725": 2.60581, "5730": 2.57691, "5735": 2.61284, "5740": 2.59693, "5745": 2.56181, "5750": 2.54469, "5755": 2.56931, "5760": 2.62006, "5765": 2.56211, "5770": 2.54337, "5775": 2.5863, "5780": 2.5802, "5785": 2.54219, "5790": 2.56458, "5795": 2.60267, "5800": 2.54581, "5805": 2.53911, "5810": 2.55793, "5815": 2.52573, "5820": 2.59799, "5825": 2.50727, "5830": 2.49954, "5835": 2.59981, "5840": 2.54168, "5845": 2.55463, "5850": 2.61394, "5855": 2.50988, "5860": 2.5659, "5865": 2.51855, "5870": 2.57631, "5875": 2.60899, "5880": 2.58767, "5885": 2.56787, "5890": 2.58485, "5895": 2.55509, "5900": 2.61499, "5905": 2.55842, "5910": 2.59998, "5915": 2.61074, "5920": 2.5879, "5925": 2.53656, "5930": 2.58052, "5935": 2.55536, "5940": 2.57457, "5945": 2.52086, "5950": 2.55692, "5955": 2.58802, "5960": 2.57272, "5965": 2.62142, "5970": 2.55248, "5975": 2.58216, "5980": 2.56041, "5985": 2.56023, "5990": 2.55693, "5995": 2.55875, "6000": 2.55621, "6005": 2.52173, "6010": 2.56421, "6015": 2.52542, "6020": 2.53609, "6025": 2.55967, "6030": 2.60714, "6035": 2.54354, "6040": 2.55297, "6045": 2.49257, "6050": 2.59754, "6055": 2.51907, "6060": 2.54668, "6065": 2.52596, "6070": 2.53048, "6075": 2.53891, "6080": 2.53531, "6085": 2.59709, "6090": 2.56903, "6095": 2.53852, "6100": 2.54728, "6105": 2.52455, "6110": 2.55806, "6115": 2.58759, "6120": 2.56097, "6125": 2.54072, "6130": 2.47535, "6135": 2.55859, "6140": 2.55763, "6145": 2.55775, "6150": 2.52553, "6155": 2.51243, "6160": 2.54146, "6165": 2.57285, "6170": 2.54532, "6175": 2.60155, "6180": 2.51252, "6185": 2.55432, "6190": 2.4927, "6195": 2.57909, "6200": 2.55361, "6205": 2.53801, "6210": 2.52133, "6215": 2.51599, "6220": 2.56663, "6225": 2.51776, "6230": 2.51427, "6235": 2.56357, "6240": 2.55265, "6245": 2.52578, "6250": 2.53799, "6255": 2.57562, "6260": 2.52377, "6265": 2.57576, "6270": 2.52753, "6275": 2.56594, "6280": 2.52372, "6285": 2.52115, "6290": 2.52038, "6295": 2.51065, "6300": 2.55843, "6305": 2.52583, "6310": 2.51451, "6315": 2.53777, "6320": 2.49034, "6325": 2.59934, "6330": 2.55604, "6335": 2.513, "6340": 2.51543, "6345": 2.55864, "6350": 2.55968, "6355": 2.52601, "6360": 2.52331, "6365": 2.48776, "6370": 2.53724, "6375": 2.4974, "6380": 2.56221, "6385": 2.5789, "6390": 2.50677, "6395": 2.55368, "6400": 2.51009, "6405": 2.53092, "6410": 2.51825, "6415": 2.52588, "6420": 2.54318, "6425": 2.53753, "6430": 2.57807, "6435": 2.54458, "6440": 2.53912, "6445": 2.53027, "6450": 2.53202, "6455": 2.52469, "6460": 2.51828, "6465": 2.56245, "6470": 2.51861, "6475": 2.52526, "6480": 2.49016, "6485": 2.53079, "6490": 2.50913, "6495": 2.49961, "6500": 2.52573, "6505": 2.49774, "6510": 2.54674, "6515": 2.51159, "6520": 2.51264, "6525": 2.4959, "6530": 2.54459, "6535": 2.53407, "6540": 2.53454, "6545": 2.56309, "6550": 2.50202, "6555": 2.55849, "6560": 2.51261, "6565": 2.52455, "6570": 2.58887, "6575": 2.52608, "6580": 2.5007, "6585": 2.50756, "6590": 2.51109, "6595": 2.50567, "6600": 2.48982, "6605": 2.54092, "6610": 2.47917, "6615": 2.56734, "6620": 2.53378, "6625": 2.51199, "6630": 2.51326, "6635": 2.47477, "6640": 2.54127, "6645": 2.597, "6650": 2.51198, "6655": 2.5013, "6660": 2.57657, "6665": 2.52293, "6670": 2.57104, "6675": 2.46955, "6680": 2.54992, "6685": 2.53662, "6690": 2.51483, "6695": 2.48975, "6700": 2.52588, "6705": 2.51985, "6710": 2.49406, "6715": 2.51664, "6720": 2.51367, "6725": 2.52151, "6730": 2.52133, "6735": 2.48551, "6740": 2.51513, "6745": 2.49672, "6750": 2.55798, "6755": 2.47753, "6760": 2.54437, "6765": 2.49045, "6770": 2.51992, "6775": 2.51138, "6780": 2.54025, "6785": 2.47355, "6790": 2.54577, "6795": 2.49881, "6800": 2.52604, "6805": 2.51297, "6810": 2.50728, "6815": 2.52366, "6820": 2.48623, "6825": 2.50624, "6830": 2.54235, "6835": 2.50779, "6840": 2.51167, "6845": 2.52457, "6850": 2.47359, "6855": 2.51574, "6860": 2.50359, "6865": 2.48991, "6870": 2.55402, "6875": 2.47618, "6880": 2.55239, "6885": 2.47805, "6890": 2.54658, "6895": 2.50237, "6900": 2.49038, "6905": 2.49821, "6910": 2.52075, "6915": 2.51828, "6920": 2.53564, "6925": 2.54308, "6930": 2.4922, "6935": 2.52267, "6940": 2.50655, "6945": 2.46434, "6950": 2.48714, "6955": 2.53097, "6960": 2.52343, "6965": 2.49573, "6970": 2.47328, "6975": 2.5243, "6980": 2.4557, "6985": 2.5166, "6990": 2.5307, "6995": 2.46246, "7000": 2.48865, "7005": 2.47048, "7010": 2.47417, "7015": 2.52128, "7020": 2.46777, "7025": 2.45498, "7030": 2.4868, "7035": 2.48643, "7040": 2.5075, "7045": 2.52212, "7050": 2.52685, "7055": 2.4436, "7060": 2.47364, "7065": 2.48475, "7070": 2.491, "7075": 2.49759, "7080": 2.5372, "7085": 2.48609, "7090": 2.47864, "7095": 2.50268, "7100": 2.51609, "7105": 2.49024, "7110": 2.48723, "7115": 2.50414, "7120": 2.47222, "7125": 2.46338, "7130": 2.48748, "7135": 2.51624, "7140": 2.50087, "7145": 2.49897, "7150": 2.51234, "7155": 2.5054, "7160": 2.47499, "7165": 2.4576, "7170": 2.50799, "7175": 2.50359, "7180": 2.50483, "7185": 2.48328, "7190": 2.46103, "7195": 2.46681, "7200": 2.51066, "7205": 2.48989, "7210": 2.44416, "7215": 2.48256, "7220": 2.44385, "7225": 2.51438, "7230": 2.51009, "7235": 2.48459, "7240": 2.48094, "7245": 2.50102, "7250": 2.50961, "7255": 2.49442, "7260": 2.45996, "7265": 2.4513, "7270": 2.47389, "7275": 2.50151, "7280": 2.49535, "7285": 2.42532, "7290": 2.4816, "7295": 2.48816, "7300": 2.41927, "7305": 2.4454, "7310": 2.44926, "7315": 2.49017, "7320": 2.48559, "7325": 2.45991, "7330": 2.49144, "7335": 2.47554, "7340": 2.46492, "7345": 2.49451, "7350": 2.51104, "7355": 2.49613, "7360": 2.48111, "7365": 2.47052, "7370": 2.47034, "7375": 2.45108, "7380": 2.49378, "7385": 2.48432, "7390": 2.47252, "7395": 2.47391, "7400": 2.48, "7405": 2.43779, "7410": 2.48111, "7415": 2.4723, "7420": 2.49386, "7425": 2.4572, "7430": 2.52353, "7435": 2.49126, "7440": 2.52078, "7445": 2.50965, "7450": 2.47575, "7455": 2.45756, "7460": 2.46525, "7465": 2.47818, "7470": 2.45137, "7475": 2.45748, "7480": 2.51445, "7485": 2.45105, "7490": 2.47439, "7495": 2.4828, "7500": 2.49635, "7505": 2.44035, "7510": 2.43615, "7515": 2.42039, "7520": 2.49459, "7525": 2.49856, "7530": 2.47616, "7535": 2.46067, "7540": 2.47269, "7545": 2.47439, "7550": 2.4896, "7555": 2.45594, "7560": 2.42902, "7565": 2.50918, "7570": 2.48451, "7575": 2.43881, "7580": 2.45733, "7585": 2.4823, "7590": 2.48072, "7595": 2.46547, "7600": 2.46329, "7605": 2.44663, "7610": 2.45006, "7615": 2.42705, "7620": 2.54677, "7625": 2.48038, "7630": 2.4252, "7635": 2.42633, "7640": 2.45525, "7645": 2.47318, "7650": 2.46195, "7655": 2.48509, "7660": 2.45297, "7665": 2.43317, "7670": 2.4421, "7675": 2.45567, "7680": 2.48734, "7685": 2.4314, "7690": 2.4795, "7695": 2.4543, "7700": 2.48159, "7705": 2.50522, "7710": 2.49534, "7715": 2.44331, "7720": 2.47034, "7725": 2.48293, "7730": 2.45916, "7735": 2.47226, "7740": 2.43855, "7745": 2.44756, "7750": 2.43801, "7755": 2.46634, "7760": 2.45302, "7765": 2.45449, "7770": 2.4699, "7775": 2.45381, "7780": 2.41751, "7785": 2.44689, "7790": 2.48057, "7795": 2.44068, "7800": 2.46241, "7805": 2.48105, "7810": 2.50292, "7815": 2.48754, "7820": 2.44667, "7825": 2.5154, "7830": 2.45336, "7835": 2.46991, "7840": 2.47831, "7845": 2.46035, "7850": 2.41785, "7855": 2.47228, "7860": 2.50125, "7865": 2.42679, "7870": 2.46779, "7875": 2.44766, "7880": 2.45412, "7885": 2.46148, "7890": 2.47141, "7895": 2.44777, "7900": 2.44021, "7905": 2.43767, "7910": 2.42476, "7915": 2.48306, "7920": 2.47768, "7925": 2.42266, "7930": 2.47317, "7935": 2.45138, "7940": 2.42102, "7945": 2.47004, "7950": 2.44405, "7955": 2.41923, "7960": 2.48981, "7965": 2.51926, "7970": 2.52229, "7975": 2.45176, "7980": 2.44085, "7985": 2.46889, "7990": 2.43243, "7995": 2.47132, "8000": 2.43813, "8005": 2.4209, "8010": 2.45777, "8015": 2.47127, "8020": 2.48262, "8025": 2.4748, "8030": 2.45258, "8035": 2.47428, "8040": 2.42241, "8045": 2.45383, "8050": 2.44908, "8055": 2.42664, "8060": 2.44399, "8065": 2.46327, "8070": 2.45817, "8075": 2.46196, "8080": 2.44473, "8085": 2.44238, "8090": 2.42658, "8095": 2.42329, "8100": 2.4396, "8105": 2.49529, "8110": 2.44056, "8115": 2.44665, "8120": 2.46802, "8125": 2.46755, "8130": 2.45406, "8135": 2.45366, "8140": 2.44199, "8145": 2.42755, "8150": 2.42357, "8155": 2.48556, "8160": 2.45471, "8165": 2.44338, "8170": 2.43648, "8175": 2.42332, "8180": 2.49727, "8185": 2.4271, "8190": 2.46741, "8195": 2.4579, "8200": 2.44861, "8205": 2.44535, "8210": 2.43219, "8215": 2.44073, "8220": 2.43591, "8225": 2.41034, "8230": 2.44074, "8235": 2.46491, "8240": 2.42763, "8245": 2.4535, "8250": 2.44694, "8255": 2.44097, "8260": 2.43244, "8265": 2.42715, "8270": 2.43217, "8275": 2.44288, "8280": 2.39801, "8285": 2.4408, "8290": 2.48088, "8295": 2.44783, "8300": 2.45859, "8305": 2.40863, "8310": 2.43565, "8315": 2.45681, "8320": 2.39968, "8325": 2.39394, "8330": 2.43512, "8335": 2.44473, "8340": 2.48965, "8345": 2.44983, "8350": 2.45153, "8355": 2.40982, "8360": 2.40328, "8365": 2.45494, "8370": 2.45104, "8375": 2.42385, "8380": 2.41814, "8385": 2.42376, "8390": 2.43704, "8395": 2.44272, "8400": 2.44074, "8405": 2.49078, "8410": 2.43868, "8415": 2.43312, "8420": 2.4159, "8425": 2.43983, "8430": 2.46202, "8435": 2.40609, "8440": 2.45195, "8445": 2.45872, "8450": 2.40661, "8455": 2.45911, "8460": 2.45463, "8465": 2.43634, "8470": 2.40933, "8475": 2.47645, "8480": 2.40234, "8485": 2.41569, "8490": 2.46417, "8495": 2.43687, "8500": 2.44547, "8505": 2.40586, "8510": 2.40266, "8515": 2.42869, "8520": 2.42531, "8525": 2.49167, "8530": 2.37317, "8535": 2.4009, "8540": 2.48407, "8545": 2.38087, "8550": 2.43851, "8555": 2.4532, "8560": 2.47057, "8565": 2.41968, "8570": 2.43304, "8575": 2.44872, "8580": 2.441, "8585": 2.42202, "8590": 2.40363, "8595": 2.42806, "8600": 2.41265, "8605": 2.49263, "8610": 2.42292, "8615": 2.38779, "8620": 2.44791, "8625": 2.42668, "8630": 2.45697, "8635": 2.4493, "8640": 2.43571, "8645": 2.47378, "8650": 2.42177, "8655": 2.45426, "8660": 2.45575, "8665": 2.38622, "8670": 2.41062, "8675": 2.42941, "8680": 2.44966, "8685": 2.43158, "8690": 2.41157, "8695": 2.44384, "8700": 2.43391, "8705": 2.4188, "8710": 2.42909, "8715": 2.44963, "8720": 2.47627, "8725": 2.41367, "8730": 2.39149, "8735": 2.43323, "8740": 2.43095, "8745": 2.3965, "8750": 2.43723, "8755": 2.42465, "8760": 2.40102, "8765": 2.43539, "8770": 2.40497, "8775": 2.43756, "8780": 2.42148, "8785": 2.47061, "8790": 2.42112, "8795": 2.41959, "8800": 2.41679, "8805": 2.40559, "8810": 2.41091, "8815": 2.47604, "8820": 2.45311, "8825": 2.42709, "8830": 2.38746, "8835": 2.42192, "8840": 2.39222, "8845": 2.42545, "8850": 2.43382, "8855": 2.4041, "8860": 2.42736, "8865": 2.42613, "8870": 2.43495, "8875": 2.4397, "8880": 2.41143, "8885": 2.3942, "8890": 2.44707, "8895": 2.42828, "8900": 2.41199, "8905": 2.40349, "8910": 2.40159, "8915": 2.41867, "8920": 2.43269, "8925": 2.46507, "8930": 2.41485, "8935": 2.40892, "8940": 2.39057, "8945": 2.39342, "8950": 2.4188, "8955": 2.39531, "8960": 2.43412, "8965": 2.4145, "8970": 2.40297, "8975": 2.4763, "8980": 2.44071, "8985": 2.37362, "8990": 2.41006, "8995": 2.41538, "9000": 2.45666, "9005": 2.41268, "9010": 2.37496, "9015": 2.40806, "9020": 2.39856, "9025": 2.36953, "9030": 2.39957, "9035": 2.42478, "9040": 2.42103, "9045": 2.41893, "9050": 2.39574, "9055": 2.41923, "9060": 2.4219, "9065": 2.40682, "9070": 2.44633, "9075": 2.39548, "9080": 2.43748, "9085": 2.41296, "9090": 2.41213, "9095": 2.39655, "9100": 2.40105, "9105": 2.35885, "9110": 2.46805, "9115": 2.41855, "9120": 2.40554, "9125": 2.45999, "9130": 2.39656, "9135": 2.44831, "9140": 2.43637, "9145": 2.43125, "9150": 2.42663, "9155": 2.37737, "9160": 2.41991, "9165": 2.42483, "9170": 2.37559, "9175": 2.41897, "9180": 2.38273, "9185": 2.4444, "9190": 2.4158, "9195": 2.4015, "9200": 2.39022, "9205": 2.44813, "9210": 2.35821, "9215": 2.46294, "9220": 2.44575, "9225": 2.38132, "9230": 2.44433, "9235": 2.39605, "9240": 2.40141, "9245": 2.43609, "9250": 2.43191, "9255": 2.42992, "9260": 2.38607, "9265": 2.43814, "9270": 2.43532, "9275": 2.39476, "9280": 2.39029, "9285": 2.4227, "9290": 2.40444, "9295": 2.38527, "9300": 2.42555, "9305": 2.40565, "9310": 2.41467, "9315": 2.40888, "9320": 2.44594, "9325": 2.37018, "9330": 2.4025, "9335": 2.36136, "9340": 2.40856, "9345": 2.41593, "9350": 2.43991, "9355": 2.47711, "9360": 2.43894, "9365": 2.38891, "9370": 2.43676, "9375": 2.43381, "9380": 2.35345, "9385": 2.40228, "9390": 2.38462, "9395": 2.39157, "9400": 2.44756, "9405": 2.41751, "9410": 2.39915, "9415": 2.43984, "9420": 2.44477, "9425": 2.43203, "9430": 2.44767, "9435": 2.415, "9440": 2.48099, "9445": 2.37747, "9450": 2.39451, "9455": 2.40211, "9460": 2.38651, "9465": 2.37928, "9470": 2.3821, "9475": 2.36515, "9480": 2.43529, "9485": 2.38676, "9490": 2.42092, "9495": 2.38346, "9500": 2.36398, "9505": 2.43119, "9510": 2.40064, "9515": 2.43184, "9520": 2.41918, "9525": 2.39026, "9530": 2.45399, "9535": 2.40281, "9540": 2.42013, "9545": 2.37936, "9550": 2.42119, "9555": 2.39168, "9560": 2.42365, "9565": 2.40503, "9570": 2.37085, "9575": 2.41184, "9580": 2.39863, "9585": 2.42528, "9590": 2.42952, "9595": 2.44875, "9600": 2.39205, "9605": 2.38524, "9610": 2.42196, "9615": 2.41639, "9620": 2.41532, "9625": 2.44692, "9630": 2.39667, "9635": 2.40245, "9640": 2.44671, "9645": 2.41032, "9650": 2.39926, "9655": 2.37417, "9660": 2.42433, "9665": 2.39002, "9670": 2.38266, "9675": 2.35612, "9680": 2.39798, "9685": 2.3994, "9690": 2.46313, "9695": 2.38258, "9700": 2.3784, "9705": 2.3847, "9710": 2.36659, "9715": 2.38876, "9720": 2.43553, "9725": 2.44244, "9730": 2.42858, "9735": 2.38615, "9740": 2.38015, "9745": 2.42681, "9750": 2.3992, "9755": 2.40867, "9760": 2.411, "9765": 2.3701, "9770": 2.45578, "9775": 2.40096, "9780": 2.36199, "9785": 2.40122, "9790": 2.40937, "9795": 2.35959, "9800": 2.39812, "9805": 2.40601, "9810": 2.40911, "9815": 2.37797, "9820": 2.37792, "9825": 2.40713, "9830": 2.42215, "9835": 2.38632, "9840": 2.41523, "9845": 2.36332, "9850": 2.39833, "9855": 2.39492, "9860": 2.3901, "9865": 2.38054, "9870": 2.38693, "9875": 2.38259, "9880": 2.452, "9885": 2.39237, "9890": 2.35253, "9895": 2.32236, "9900": 2.39637, "9905": 2.42587, "9910": 2.3558, "9915": 2.36434, "9920": 2.41221, "9925": 2.39752, "9930": 2.38061, "9935": 2.35025, "9940": 2.38558, "9945": 2.37958, "9950": 2.40293, "9955": 2.44825, "9960": 2.42656, "9965": 2.35652, "9970": 2.40857, "9975": 2.38548, "9980": 2.33271, "9985": 2.40551, "9990": 2.39663, "9995": 2.39663, "10000": 2.36712, "10005": 2.37293, "10010": 2.38301, "10015": 2.44482, "10020": 2.3651, "10025": 2.3902, "10030": 2.38972, "10035": 2.4106, "10040": 2.40357, "10045": 2.38245, "10050": 2.35307, "10055": 2.36905, "10060": 2.41943, "10065": 2.37469, "10070": 2.42364, "10075": 2.37285, "10080": 2.36155, "10085": 2.36988, "10090": 2.34864, "10095": 2.40234, "10100": 2.31433, "10105": 2.38203, "10110": 2.41029, "10115": 2.38782, "10120": 2.35732, "10125": 2.37008, "10130": 2.35996, "10135": 2.38442, "10140": 2.41256, "10145": 2.4073, "10150": 2.37822, "10155": 2.39638, "10160": 2.36203, "10165": 2.38403, "10170": 2.42425, "10175": 2.32585, "10180": 2.3949, "10185": 2.38343, "10190": 2.44642, "10195": 2.40463, "10200": 2.39145, "10205": 2.38825, "10210": 2.36837, "10215": 2.34249, "10220": 2.42032, "10225": 2.43063, "10230": 2.35631, "10235": 2.38817, "10240": 2.37339, "10245": 2.38948, "10250": 2.38836, "10255": 2.41399, "10260": 2.3365, "10265": 2.35089, "10270": 2.35153, "10275": 2.37138, "10280": 2.45108, "10285": 2.35881, "10290": 2.38563, "10295": 2.37404, "10300": 2.36903, "10305": 2.41604, "10310": 2.38937, "10315": 2.36155, "10320": 2.36818, "10325": 2.36087, "10330": 2.41262, "10335": 2.36134, "10340": 2.41912, "10345": 2.37129, "10350": 2.35887, "10355": 2.39713, "10360": 2.37284, "10365": 2.35961, "10370": 2.34095, "10375": 2.35823, "10380": 2.42018, "10385": 2.40667, "10390": 2.38217, "10395": 2.35927, "10400": 2.37981, "10405": 2.3514, "10410": 2.34036, "10415": 2.41721, "10420": 2.38056, "10425": 2.3279, "10430": 2.35961, "10435": 2.37189, "10440": 2.37164, "10445": 2.36072, "10450": 2.36035, "10455": 2.38141, "10460": 2.38156, "10465": 2.30307, "10470": 2.35838, "10475": 2.37993, "10480": 2.36349, "10485": 2.3616, "10490": 2.41347, "10495": 2.3663, "10500": 2.36345, "10505": 2.3704, "10510": 2.38164, "10515": 2.37404, "10520": 2.40177, "10525": 2.39082, "10530": 2.39269, "10535": 2.35511, "10540": 2.40525, "10545": 2.35799, "10550": 2.37769, "10555": 2.35937, "10560": 2.34047, "10565": 2.37358, "10570": 2.37583, "10575": 2.35391, "10580": 2.37861, "10585": 2.36778, "10590": 2.37825, "10595": 2.3768, "10600": 2.33294, "10605": 2.3744, "10610": 2.36684, "10615": 2.36418, "10620": 2.3481, "10625": 2.41869, "10630": 2.37036, "10635": 2.32612, "10640": 2.3686, "10645": 2.42519, "10650": 2.36272, "10655": 2.30887, "10660": 2.34729, "10665": 2.3999, "10670": 2.31555, "10675": 2.41524, "10680": 2.35522, "10685": 2.28877, "10690": 2.38421, "10695": 2.33125, "10700": 2.38493, "10705": 2.38421, "10710": 2.34429, "10715": 2.38217, "10720": 2.32577, "10725": 2.3526, "10730": 2.34927, "10735": 2.35372, "10740": 2.31873, "10745": 2.33924, "10750": 2.33441, "10755": 2.40608, "10760": 2.36475, "10765": 2.33642, "10770": 2.3682, "10775": 2.38648, "10780": 2.37665, "10785": 2.3935, "10790": 2.34817, "10795": 2.38655, "10800": 2.3224, "10805": 2.39526, "10810": 2.37478, "10815": 2.35241, "10820": 2.34227, "10825": 2.37052, "10830": 2.33632, "10835": 2.3473, "10840": 2.32969, "10845": 2.38523, "10850": 2.33171, "10855": 2.36349, "10860": 2.33347, "10865": 2.32055, "10870": 2.32391, "10875": 2.30336, "10880": 2.3961, "10885": 2.40395, "10890": 2.36318, "10895": 2.37233, "10900": 2.33301, "10905": 2.31392, "10910": 2.40641, "10915": 2.37091, "10920": 2.37548, "10925": 2.36326, "10930": 2.31925, "10935": 2.36279, "10940": 2.35505, "10945": 2.34736, "10950": 2.36169, "10955": 2.36448, "10960": 2.3103, "10965": 2.36627, "10970": 2.3575, "10975": 2.40912, "10980": 2.37974, "10985": 2.34619, "10990": 2.39953, "10995": 2.36446, "11000": 2.33882, "11005": 2.36197, "11010": 2.34312, "11015": 2.32815, "11020": 2.33509, "11025": 2.36622, "11030": 2.34039, "11035": 2.31535, "11040": 2.31892, "11045": 2.31979, "11050": 2.31837, "11055": 2.29034, "11060": 2.34014, "11065": 2.31064, "11070": 2.39397, "11075": 2.31875, "11080": 2.35555, "11085": 2.33651, "11090": 2.34699, "11095": 2.37202, "11100": 2.33105, "11105": 2.31705, "11110": 2.36363, "11115": 2.37339, "11120": 2.38228, "11125": 2.31554, "11130": 2.3509, "11135": 2.33363, "11140": 2.37213, "11145": 2.34949, "11150": 2.39594, "11155": 2.34144, "11160": 2.36451, "11165": 2.36448, "11170": 2.34169, "11175": 2.33521, "11180": 2.374, "11185": 2.31357, "11190": 2.27903, "11195": 2.32844, "11200": 2.34711, "11205": 2.36188, "11210": 2.33322, "11215": 2.32109, "11220": 2.34347, "11225": 2.37149, "11230": 2.36724, "11235": 2.32024, "11240": 2.34215, "11245": 2.35727, "11250": 2.3338, "11255": 2.33621, "11260": 2.35675, "11265": 2.38911, "11270": 2.28815, "11275": 2.31508, "11280": 2.36994, "11285": 2.29241, "11290": 2.34689, "11295": 2.36516, "11300": 2.38206, "11305": 2.33607, "11310": 2.33068, "11315": 2.29847, "11320": 2.3046, "11325": 2.31535, "11330": 2.35402, "11335": 2.33875, "11340": 2.3078, "11345": 2.31465, "11350": 2.29669, "11355": 2.3213, "11360": 2.35247, "11365": 2.29522, "11370": 2.3528, "11375": 2.32856, "11380": 2.34063, "11385": 2.34871, "11390": 2.33492, "11395": 2.28864, "11400": 2.3102, "11405": 2.35527, "11410": 2.35614, "11415": 2.38615, "11420": 2.35171, "11425": 2.30946, "11430": 2.36841, "11435": 2.36187, "11440": 2.3483, "11445": 2.36353, "11450": 2.32332, "11455": 2.30629, "11460": 2.35014, "11465": 2.3444, "11470": 2.37439, "11475": 2.31267, "11480": 2.3259, "11485": 2.31121, "11490": 2.34671, "11495": 2.407, "11500": 2.33989, "11505": 2.35047, "11510": 2.36392, "11515": 2.32205, "11520": 2.30585, "11525": 2.36185, "11530": 2.31522, "11535": 2.32365, "11540": 2.34735, "11545": 2.34432, "11550": 2.36631, "11555": 2.32694, "11560": 2.34966, "11565": 2.34173, "11570": 2.3502, "11575": 2.29757, "11580": 2.3294, "11585": 2.35373, "11590": 2.36296, "11595": 2.33611, "11600": 2.35751, "11605": 2.32338, "11610": 2.36183, "11615": 2.36051, "11620": 2.29702, "11625": 2.27757, "11630": 2.3262, "11635": 2.34311, "11640": 2.30542, "11645": 2.30869, "11650": 2.32745, "11655": 2.35171, "11660": 2.33608, "11665": 2.3304, "11670": 2.30005, "11675": 2.29899, "11680": 2.32637, "11685": 2.33631, "11690": 2.34485, "11695": 2.31804, "11700": 2.32598, "11705": 2.30211, "11710": 2.34494, "11715": 2.31589, "11720": 2.30005, "11725": 2.33992, "11730": 2.30547, "11735": 2.32892, "11740": 2.27321, "11745": 2.31819, "11750": 2.32956, "11755": 2.35314, "11760": 2.3133, "11765": 2.33574, "11770": 2.27545, "11775": 2.32591, "11780": 2.25501, "11785": 2.29713, "11790": 2.31266, "11795": 2.32051, "11800": 2.33547, "11805": 2.30455, "11810": 2.30487, "11815": 2.33091, "11820": 2.31988, "11825": 2.36013, "11830": 2.31774, "11835": 2.33804, "11840": 2.34135, "11845": 2.31733, "11850": 2.3054, "11855": 2.31454, "11860": 2.34447, "11865": 2.35919, "11870": 2.38007, "11875": 2.28185, "11880": 2.29237, "11885": 2.3367, "11890": 2.2924, "11895": 2.29094, "11900": 2.33312, "11905": 2.31933, "11910": 2.27797, "11915": 2.30957, "11920": 2.33374, "11925": 2.30219, "11930": 2.30636, "11935": 2.31643, "11940": 2.31891, "11945": 2.34349, "11950": 2.29982, "11955": 2.31381, "11960": 2.3383, "11965": 2.29651, "11970": 2.28199, "11975": 2.33623, "11980": 2.30647, "11985": 2.27844, "11990": 2.30583, "11995": 2.33073, "12000": 2.32542, "12005": 2.3262, "12010": 2.28942, "12015": 2.31138, "12020": 2.33047, "12025": 2.33749, "12030": 2.31363, "12035": 2.33774, "12040": 2.3169, "12045": 2.31277, "12050": 2.30986, "12055": 2.33934, "12060": 2.305, "12065": 2.33371, "12070": 2.30513, "12075": 2.27789, "12080": 2.35097, "12085": 2.3392, "12090": 2.33229, "12095": 2.28203, "12100": 2.31574, "12105": 2.30887, "12110": 2.32964, "12115": 2.30612, "12120": 2.30664, "12125": 2.29507, "12130": 2.3042, "12135": 2.32923, "12140": 2.29515, "12145": 2.25709, "12150": 2.26046, "12155": 2.34281, "12160": 2.35878, "12165": 2.31984, "12170": 2.33291, "12175": 2.34154, "12180": 2.32985, "12185": 2.34137, "12190": 2.33546, "12195": 2.29796, "12200": 2.30043, "12205": 2.32324, "12210": 2.35794, "12215": 2.30579, "12220": 2.3035, "12225": 2.24616, "12230": 2.33515, "12235": 2.33794, "12240": 2.32568, "12245": 2.28814, "12250": 2.27379, "12255": 2.33621, "12260": 2.31317, "12265": 2.34169, "12270": 2.31253, "12275": 2.31435, "12280": 2.32016, "12285": 2.28631, "12290": 2.31142, "12295": 2.26644, "12300": 2.33017, "12305": 2.26932, "12310": 2.28998, "12315": 2.35448, "12320": 2.29669, "12325": 2.32168, "12330": 2.30123, "12335": 2.31992, "12340": 2.34073, "12345": 2.36903, "12350": 2.34349, "12355": 2.30698, "12360": 2.31406, "12365": 2.32989, "12370": 2.29421, "12375": 2.30085, "12380": 2.29304, "12385": 2.29101, "12390": 2.24955, "12395": 2.30545, "12400": 2.29941, "12405": 2.31184, "12410": 2.30405, "12415": 2.28283, "12420": 2.31662, "12425": 2.30137, "12430": 2.31604, "12435": 2.30015, "12440": 2.33187, "12445": 2.3202, "12450": 2.30747, "12455": 2.24213, "12460": 2.33606, "12465": 2.36534, "12470": 2.27775, "12475": 2.27393, "12480": 2.29381, "12485": 2.30843, "12490": 2.33217, "12495": 2.27121, "12500": 2.32285, "12505": 2.33741, "12510": 2.35764, "12515": 2.27091, "12520": 2.32023, "12525": 2.28798, "12530": 2.3219, "12535": 2.27256, "12540": 2.28698, "12545": 2.29168, "12550": 2.31654, "12555": 2.32445, "12560": 2.30087, "12565": 2.33624, "12570": 2.27857, "12575": 2.30063, "12580": 2.31146, "12585": 2.29288, "12590": 2.33487, "12595": 2.32535, "12600": 2.28391, "12605": 2.32044, "12610": 2.36395, "12615": 2.30644, "12620": 2.33479, "12625": 2.33047, "12630": 2.3071, "12635": 2.34868, "12640": 2.30142, "12645": 2.28328, "12650": 2.329, "12655": 2.2673, "12660": 2.34214, "12665": 2.31792, "12670": 2.31058, "12675": 2.31876, "12680": 2.27478, "12685": 2.36523, "12690": 2.30351, "12695": 2.33151, "12700": 2.29281, "12705": 2.30608, "12710": 2.30727, "12715": 2.28776, "12720": 2.31371, "12725": 2.27582, "12730": 2.33944, "12735": 2.29875, "12740": 2.3397, "12745": 2.28992, "12750": 2.27568, "12755": 2.2832, "12760": 2.26789, "12765": 2.33799, "12770": 2.32691, "12775": 2.26482, "12780": 2.32042, "12785": 2.30192, "12790": 2.30792, "12795": 2.32186, "12800": 2.29824, "12805": 2.3178, "12810": 2.27634, "12815": 2.29991, "12820": 2.32379, "12825": 2.32501, "12830": 2.2916, "12835": 2.26968, "12840": 2.27163, "12845": 2.31382, "12850": 2.27883, "12855": 2.2737, "12860": 2.27273, "12865": 2.31789, "12870": 2.26753, "12875": 2.34213, "12880": 2.32009, "12885": 2.28139, "12890": 2.30861, "12895": 2.2465, "12900": 2.32698, "12905": 2.31778, "12910": 2.2877, "12915": 2.28648, "12920": 2.30121, "12925": 2.30097, "12930": 2.27435, "12935": 2.24293, "12940": 2.25996, "12945": 2.31122, "12950": 2.28561, "12955": 2.32949, "12960": 2.31865, "12965": 2.29094, "12970": 2.27405, "12975": 2.26846, "12980": 2.33507, "12985": 2.27991, "12990": 2.28159, "12995": 2.27524, "13000": 2.25238, "13005": 2.34256, "13010": 2.3203, "13015": 2.27436, "13020": 2.2797, "13025": 2.30079, "13030": 2.27793, "13035": 2.28547, "13040": 2.31999, "13045": 2.27721, "13050": 2.28207, "13055": 2.29207, "13060": 2.29978, "13065": 2.30145, "13070": 2.32557, "13075": 2.2975, "13080": 2.27996, "13085": 2.2805, "13090": 2.2968, "13095": 2.30813, "13100": 2.3045, "13105": 2.31265, "13110": 2.34177, "13115": 2.22927, "13120": 2.30632, "13125": 2.24282, "13130": 2.27263, "13135": 2.31511, "13140": 2.30575, "13145": 2.26505, "13150": 2.28492, "13155": 2.26049, "13160": 2.24764, "13165": 2.25782, "13170": 2.31213, "13175": 2.21522, "13180": 2.2845, "13185": 2.26615, "13190": 2.27202, "13195": 2.26331, "13200": 2.31869, "13205": 2.31106, "13210": 2.27891, "13215": 2.30658, "13220": 2.28435, "13225": 2.29703, "13230": 2.25521, "13235": 2.26859, "13240": 2.27257, "13245": 2.31222, "13250": 2.2715, "13255": 2.29454, "13260": 2.29307, "13265": 2.27922, "13270": 2.27703, "13275": 2.2735, "13280": 2.26799, "13285": 2.28237, "13290": 2.30742, "13295": 2.30869, "13300": 2.24235, "13305": 2.27109, "13310": 2.26214, "13315": 2.22494, "13320": 2.25505, "13325": 2.31156, "13330": 2.27345, "13335": 2.30641, "13340": 2.28514, "13345": 2.29317, "13350": 2.25548, "13355": 2.28245, "13360": 2.23077, "13365": 2.25949, "13370": 2.32322, "13375": 2.31524, "13380": 2.29222, "13385": 2.29235, "13390": 2.31908, "13395": 2.27289, "13400": 2.27484, "13405": 2.32213, "13410": 2.26909, "13415": 2.26576, "13420": 2.29348, "13425": 2.25198, "13430": 2.28604, "13435": 2.30239, "13440": 2.32415, "13445": 2.27332, "13450": 2.29641, "13455": 2.30269, "13460": 2.29633, "13465": 2.29117, "13470": 2.28336, "13475": 2.28459, "13480": 2.27549, "13485": 2.2873, "13490": 2.26065, "13495": 2.24973, "13500": 2.23548, "13505": 2.28991, "13510": 2.26755, "13515": 2.25174, "13520": 2.31629, "13525": 2.26538, "13530": 2.30197, "13535": 2.31444, "13540": 2.27226, "13545": 2.25643, "13550": 2.23973, "13555": 2.29996, "13560": 2.29498, "13565": 2.30435, "13570": 2.23824, "13575": 2.23376, "13580": 2.28369, "13585": 2.3089, "13590": 2.33038, "13595": 2.27531, "13600": 2.20095, "13605": 2.29033, "13610": 2.22312, "13615": 2.31811, "13620": 2.29488, "13625": 2.26287, "13630": 2.27328, "13635": 2.22955, "13640": 2.24573, "13645": 2.27214, "13650": 2.30308, "13655": 2.27347, "13660": 2.24598, "13665": 2.25271, "13670": 2.24999, "13675": 2.28037, "13680": 2.23174, "13685": 2.30174, "13690": 2.26251, "13695": 2.27837, "13700": 2.27327, "13705": 2.2636, "13710": 2.24937, "13715": 2.2699, "13720": 2.27936, "13725": 2.29684, "13730": 2.3257, "13735": 2.34752, "13740": 2.27554, "13745": 2.286, "13750": 2.32906, "13755": 2.26681, "13760": 2.27417, "13765": 2.28038, "13770": 2.29379, "13775": 2.34727, "13780": 2.21314, "13785": 2.22244, "13790": 2.28974, "13795": 2.26814, "13800": 2.30161, "13805": 2.27409, "13810": 2.25177, "13815": 2.24765, "13820": 2.26798, "13825": 2.25037, "13830": 2.28637, "13835": 2.28874, "13840": 2.20488, "13845": 2.26658, "13850": 2.27474, "13855": 2.30786, "13860": 2.28371, "13865": 2.26135, "13870": 2.21934, "13875": 2.25616, "13880": 2.24966, "13885": 2.26172, "13890": 2.23106, "13895": 2.3004, "13900": 2.22935, "13905": 2.2718, "13910": 2.23019, "13915": 2.27703, "13920": 2.26037, "13925": 2.29872, "13930": 2.23499, "13935": 2.28539, "13940": 2.31774, "13945": 2.27601, "13950": 2.23563, "13955": 2.29834, "13960": 2.24094, "13965": 2.24283, "13970": 2.25266, "13975": 2.27434, "13980": 2.28112, "13985": 2.28596, "13990": 2.29658, "13995": 2.26627, "14000": 2.23278, "14005": 2.25805, "14010": 2.28419, "14015": 2.30525, "14020": 2.22466, "14025": 2.23803, "14030": 2.27975, "14035": 2.26007, "14040": 2.26904, "14045": 2.24314, "14050": 2.24702, "14055": 2.26204, "14060": 2.21953, "14065": 2.31897, "14070": 2.25298, "14075": 2.24962, "14080": 2.32098, "14085": 2.28267, "14090": 2.25788, "14095": 2.3156, "14100": 2.24311, "14105": 2.27291, "14110": 2.24522, "14115": 2.25461, "14120": 2.30416, "14125": 2.20776, "14130": 2.22624, "14135": 2.2288, "14140": 2.25832, "14145": 2.25901, "14150": 2.30758, "14155": 2.24557, "14160": 2.26294, "14165": 2.20916, "14170": 2.34849, "14175": 2.29822, "14180": 2.22957, "14185": 2.24466, "14190": 2.25475, "14195": 2.25893, "14200": 2.30571, "14205": 2.26388, "14210": 2.25044, "14215": 2.22247, "14220": 2.23795, "14225": 2.28296, "14230": 2.21569, "14235": 2.23865, "14240": 2.28582, "14245": 2.28129, "14250": 2.25781, "14255": 2.29588, "14260": 2.25205, "14265": 2.23016, "14270": 2.19988, "14275": 2.246, "14280": 2.24024, "14285": 2.2859, "14290": 2.22882, "14295": 2.2652, "14300": 2.28257, "14305": 2.28394, "14310": 2.22208, "14315": 2.26501, "14320": 2.26482, "14325": 2.29223, "14330": 2.29316, "14335": 2.31081, "14340": 2.3021, "14345": 2.25641, "14350": 2.27295, "14355": 2.29116, "14360": 2.27791, "14365": 2.28183, "14370": 2.27232, "14375": 2.27134, "14380": 2.28179, "14385": 2.22215, "14390": 2.26776, "14395": 2.29949, "14400": 2.25983, "14405": 2.23767, "14410": 2.21065, "14415": 2.22307, "14420": 2.27351, "14425": 2.2691, "14430": 2.26049, "14435": 2.26765, "14440": 2.23323, "14445": 2.24421, "14450": 2.2553, "14455": 2.29375, "14460": 2.26741, "14465": 2.28758, "14470": 2.24673, "14475": 2.24553, "14480": 2.23528, "14485": 2.26383, "14490": 2.29156, "14495": 2.26755, "14500": 2.25343, "14505": 2.284, "14510": 2.19756, "14515": 2.2749, "14520": 2.24301, "14525": 2.24757, "14530": 2.25925, "14535": 2.30164, "14540": 2.23419, "14545": 2.28163, "14550": 2.25756, "14555": 2.25836, "14560": 2.26385, "14565": 2.27264, "14570": 2.26875, "14575": 2.30261, "14580": 2.27999, "14585": 2.22573, "14590": 2.23428, "14595": 2.27236, "14600": 2.26409, "14605": 2.23756, "14610": 2.31103, "14615": 2.26627, "14620": 2.30058, "14625": 2.24857, "14630": 2.26611, "14635": 2.25761, "14640": 2.26232, "14645": 2.28114, "14650": 2.31129, "14655": 2.26887, "14660": 2.2287, "14665": 2.22975, "14670": 2.26143, "14675": 2.23296, "14680": 2.25374, "14685": 2.25724, "14690": 2.27049, "14695": 2.27368, "14700": 2.25974, "14705": 2.24493, "14710": 2.22798, "14715": 2.2609, "14720": 2.24065, "14725": 2.22238, "14730": 2.27464, "14735": 2.24585, "14740": 2.24942, "14745": 2.20783, "14750": 2.23982, "14755": 2.32007, "14760": 2.22619, "14765": 2.25222, "14770": 2.24199, "14775": 2.21576, "14780": 2.21833, "14785": 2.26584, "14790": 2.2743, "14795": 2.24006, "14800": 2.28097, "14805": 2.27045, "14810": 2.27706, "14815": 2.2604, "14820": 2.26072, "14825": 2.26676, "14830": 2.21469, "14835": 2.25339, "14840": 2.24344, "14845": 2.22676, "14850": 2.30905, "14855": 2.28094, "14860": 2.23794, "14865": 2.24147, "14870": 2.26855, "14875": 2.24381, "14880": 2.23362, "14885": 2.27995, "14890": 2.27134, "14895": 2.25476, "14900": 2.27642, "14905": 2.21876, "14910": 2.29201, "14915": 2.27223, "14920": 2.27175, "14925": 2.26138, "14930": 2.25865, "14935": 2.21346, "14940": 2.29079, "14945": 2.26589, "14950": 2.23023, "14955": 2.23049, "14960": 2.26076, "14965": 2.2546, "14970": 2.25435, "14975": 2.25107, "14980": 2.21347, "14985": 2.19574, "14990": 2.25195, "14995": 2.22934, "15000": 2.18102, "15005": 2.29201, "15010": 2.22767, "15015": 2.23885, "15020": 2.25786, "15025": 2.20374, "15030": 2.24769, "15035": 2.23004, "15040": 2.25543, "15045": 2.25413, "15050": 2.23512, "15055": 2.20271, "15060": 2.2459, "15065": 2.23336, "15070": 2.25002, "15075": 2.22942, "15080": 2.21986, "15085": 2.21334, "15090": 2.26492, "15095": 2.27277, "15100": 2.25636, "15105": 2.24283, "15110": 2.23081, "15115": 2.27415, "15120": 2.27694, "15125": 2.20886, "15130": 2.2544, "15135": 2.25331, "15140": 2.30151, "15145": 2.24206, "15150": 2.24791, "15155": 2.24312, "15160": 2.24859, "15165": 2.24057, "15170": 2.2357, "15175": 2.25644, "15180": 2.28308, "15185": 2.24634, "15190": 2.24554, "15195": 2.24052, "15200": 2.23154, "15205": 2.27277, "15210": 2.23549, "15215": 2.27589, "15220": 2.24161, "15225": 2.28412, "15230": 2.25894, "15235": 2.24581, "15240": 2.2226, "15245": 2.23378, "15250": 2.2464, "15255": 2.24198, "15260": 2.26567, "15265": 2.24494, "15270": 2.25605, "15275": 2.26142, "15280": 2.22653, "15285": 2.25746, "15290": 2.24765, "15295": 2.25795, "15300": 2.27344, "15305": 2.2739, "15310": 2.31233, "15315": 2.22422, "15320": 2.24646, "15325": 2.25144, "15330": 2.27898, "15335": 2.21775, "15340": 2.22148, "15345": 2.24018, "15350": 2.22352, "15355": 2.2403, "15360": 2.24186, "15365": 2.22286, "15370": 2.26822, "15375": 2.2187, "15380": 2.20968, "15385": 2.206, "15390": 2.28269, "15395": 2.24631, "15400": 2.28192, "15405": 2.23114, "15410": 2.22897, "15415": 2.28247, "15420": 2.24736, "15425": 2.23497, "15430": 2.23507, "15435": 2.26387, "15440": 2.26123, "15445": 2.24638, "15450": 2.25835, "15455": 2.27164, "15460": 2.2153, "15465": 2.24422, "15470": 2.2509, "15475": 2.21883, "15480": 2.2246, "15485": 2.20418, "15490": 2.28227, "15495": 2.24244, "15500": 2.20499, "15505": 2.20694, "15510": 2.20774, "15515": 2.23862, "15520": 2.20404, "15525": 2.26918, "15530": 2.2501, "15535": 2.23661, "15540": 2.25279, "15545": 2.23733, "15550": 2.23386, "15555": 2.24194, "15560": 2.27387, "15565": 2.21847, "15570": 2.20874, "15575": 2.26593, "15580": 2.26517, "15585": 2.22792, "15590": 2.26669, "15595": 2.18924, "15600": 2.30333, "15605": 2.24947, "15610": 2.20892, "15615": 2.23217, "15620": 2.23442, "15625": 2.19575, "15630": 2.19456, "15635": 2.20423, "15640": 2.2318, "15645": 2.25547, "15650": 2.20723, "15655": 2.21332, "15660": 2.21751, "15665": 2.24123, "15670": 2.21341, "15675": 2.23133, "15680": 2.21394, "15685": 2.26373, "15690": 2.20579, "15695": 2.19759, "15700": 2.24602, "15705": 2.19469, "15710": 2.1955, "15715": 2.25006, "15720": 2.24335, "15725": 2.24469, "15730": 2.22745, "15735": 2.28564, "15740": 2.18951, "15745": 2.23511, "15750": 2.22563, "15755": 2.25945, "15760": 2.26441, "15765": 2.25588, "15770": 2.25762, "15775": 2.21843, "15780": 2.28163, "15785": 2.19774, "15790": 2.2441, "15795": 2.24189, "15800": 2.2215, "15805": 2.2375, "15810": 2.16201, "15815": 2.27278, "15820": 2.22655, "15825": 2.20315, "15830": 2.20658, "15835": 2.20818, "15840": 2.25442, "15845": 2.22462, "15850": 2.21948, "15855": 2.23775, "15860": 2.22948, "15865": 2.24435, "15870": 2.23493, "15875": 2.26534, "15880": 2.24439, "15885": 2.20473, "15890": 2.21689, "15895": 2.27875, "15900": 2.19724, "15905": 2.29266, "15910": 2.25189, "15915": 2.19644, "15920": 2.26653, "15925": 2.22225, "15930": 2.23386, "15935": 2.23225, "15940": 2.19207, "15945": 2.24081, "15950": 2.22776, "15955": 2.2209, "15960": 2.23376, "15965": 2.22378, "15970": 2.22971, "15975": 2.19953, "15980": 2.25246, "15985": 2.21775, "15990": 2.23412, "15995": 2.23895, "16000": 2.20518, "16005": 2.2111, "16010": 2.18302, "16015": 2.21214, "16020": 2.2153, "16025": 2.24097, "16030": 2.2423, "16035": 2.1731, "16040": 2.27529, "16045": 2.21031, "16050": 2.24371, "16055": 2.22449, "16060": 2.19598, "16065": 2.23498, "16070": 2.24376, "16075": 2.18519, "16080": 2.27621, "16085": 2.22932, "16090": 2.216, "16095": 2.23198, "16100": 2.22129, "16105": 2.27051, "16110": 2.26488, "16115": 2.19876, "16120": 2.19589, "16125": 2.23517, "16130": 2.21311, "16135": 2.25178, "16140": 2.18863, "16145": 2.23705, "16150": 2.21096, "16155": 2.24587, "16160": 2.28051, "16165": 2.20818, "16170": 2.2004, "16175": 2.17407, "16180": 2.24129, "16185": 2.23168, "16190": 2.23283, "16195": 2.25477, "16200": 2.18919, "16205": 2.24027, "16210": 2.24364, "16215": 2.22816, "16220": 2.24602, "16225": 2.2476, "16230": 2.22891, "16235": 2.21221, "16240": 2.26516, "16245": 2.21804, "16250": 2.20843, "16255": 2.21565, "16260": 2.22581, "16265": 2.245, "16270": 2.24294, "16275": 2.23369, "16280": 2.16959, "16285": 2.23995, "16290": 2.24182, "16295": 2.22851, "16300": 2.26864, "16305": 2.21109, "16310": 2.20066, "16315": 2.20656, "16320": 2.29683, "16325": 2.25162, "16330": 2.19042, "16335": 2.19069, "16340": 2.18229, "16345": 2.21756, "16350": 2.26111, "16355": 2.22445, "16360": 2.22366, "16365": 2.22369, "16370": 2.24164, "16375": 2.24115, "16380": 2.22115, "16385": 2.20884, "16390": 2.21006, "16395": 2.22618, "16400": 2.20182, "16405": 2.20945, "16410": 2.24184, "16415": 2.21288, "16420": 2.22487, "16425": 2.207, "16430": 2.27598, "16435": 2.24884, "16440": 2.24177, "16445": 2.20989, "16450": 2.20863, "16455": 2.26067, "16460": 2.24585, "16465": 2.24235, "16470": 2.18466, "16475": 2.2224, "16480": 2.16023, "16485": 2.25654, "16490": 2.20606, "16495": 2.22275, "16500": 2.23193, "16505": 2.2078, "16510": 2.2421, "16515": 2.26451, "16520": 2.23046, "16525": 2.17186, "16530": 2.26191, "16535": 2.18416, "16540": 2.25392, "16545": 2.20273, "16550": 2.19775, "16555": 2.22412, "16560": 2.24968, "16565": 2.24148, "16570": 2.2327, "16575": 2.19744, "16580": 2.18874, "16585": 2.21076, "16590": 2.23106, "16595": 2.18267, "16600": 2.19342, "16605": 2.21556, "16610": 2.23104, "16615": 2.20891, "16620": 2.21331, "16625": 2.21033, "16630": 2.2395, "16635": 2.20513, "16640": 2.23571, "16645": 2.21343, "16650": 2.25277, "16655": 2.25743, "16660": 2.21988, "16665": 2.1892, "16670": 2.21124, "16675": 2.22756, "16680": 2.21969, "16685": 2.22515, "16690": 2.21847, "16695": 2.26723, "16700": 2.18795, "16705": 2.22397, "16710": 2.23979, "16715": 2.27371, "16720": 2.23726, "16725": 2.2392, "16730": 2.18083, "16735": 2.20819, "16740": 2.19304, "16745": 2.21715, "16750": 2.22283, "16755": 2.21131, "16760": 2.19668, "16765": 2.21457, "16770": 2.22361, "16775": 2.20599, "16780": 2.23754, "16785": 2.22449, "16790": 2.21629, "16795": 2.25935, "16800": 2.22194, "16805": 2.24692, "16810": 2.17305, "16815": 2.22873, "16820": 2.23015, "16825": 2.22334, "16830": 2.22737, "16835": 2.19732, "16840": 2.20456, "16845": 2.14605, "16850": 2.248, "16855": 2.22672, "16860": 2.23239, "16865": 2.25582, "16870": 2.19231, "16875": 2.17797, "16880": 2.22193, "16885": 2.21109, "16890": 2.24767, "16895": 2.18351, "16900": 2.24499, "16905": 2.24806, "16910": 2.22424, "16915": 2.2389, "16920": 2.21164, "16925": 2.19457, "16930": 2.21442, "16935": 2.20072, "16940": 2.24179, "16945": 2.19963, "16950": 2.21769, "16955": 2.23561, "16960": 2.22731, "16965": 2.18261, "16970": 2.18979, "16975": 2.21545, "16980": 2.21544, "16985": 2.20991, "16990": 2.22295, "16995": 2.17423, "17000": 2.26386, "17005": 2.2489, "17010": 2.22091, "17015": 2.18828, "17020": 2.22938, "17025": 2.17164, "17030": 2.19665, "17035": 2.20107, "17040": 2.23352, "17045": 2.20529, "17050": 2.16414, "17055": 2.21872, "17060": 2.24598, "17065": 2.19012, "17070": 2.18358, "17075": 2.20976, "17080": 2.21146, "17085": 2.22798, "17090": 2.24133, "17095": 2.21878, "17100": 2.25464, "17105": 2.25044, "17110": 2.22576, "17115": 2.19325, "17120": 2.17125, "17125": 2.21385, "17130": 2.24797, "17135": 2.19864, "17140": 2.2345, "17145": 2.22484, "17150": 2.18482, "17155": 2.19138, "17160": 2.26039, "17165": 2.22395, "17170": 2.22996, "17175": 2.21399, "17180": 2.20353, "17185": 2.21406, "17190": 2.16517, "17195": 2.21956, "17200": 2.21447, "17205": 2.19647, "17210": 2.23405, "17215": 2.20228, "17220": 2.22729, "17225": 2.2088, "17230": 2.22692, "17235": 2.2126, "17240": 2.20728, "17245": 2.21154, "17250": 2.20211, "17255": 2.19029, "17260": 2.21572, "17265": 2.20899, "17270": 2.21605, "17275": 2.25623, "17280": 2.21562, "17285": 2.22853, "17290": 2.21425, "17295": 2.22708, "17300": 2.20618, "17305": 2.2268, "17310": 2.23418, "17315": 2.22672, "17320": 2.20735, "17325": 2.25941, "17330": 2.24333, "17335": 2.23401, "17340": 2.15813, "17345": 2.1914, "17350": 2.18744, "17355": 2.22327, "17360": 2.19213, "17365": 2.1947, "17370": 2.21114, "17375": 2.23927, "17380": 2.21596, "17385": 2.19583, "17390": 2.20397, "17395": 2.18544, "17400": 2.18861, "17405": 2.2381, "17410": 2.19631, "17415": 2.17844, "17420": 2.21699, "17425": 2.20265, "17430": 2.1952, "17435": 2.19379, "17440": 2.2261, "17445": 2.16782, "17450": 2.22171, "17455": 2.19508, "17460": 2.20902, "17465": 2.17073, "17470": 2.21185, "17475": 2.23384, "17480": 2.18091, "17485": 2.22744, "17490": 2.19616, "17495": 2.16747, "17500": 2.20968, "17505": 2.20787, "17510": 2.1702, "17515": 2.2164, "17520": 2.1962, "17525": 2.20717, "17530": 2.19934, "17535": 2.19016, "17540": 2.21256, "17545": 2.16798, "17550": 2.21259, "17555": 2.16177, "17560": 2.18508, "17565": 2.21325, "17570": 2.19418, "17575": 2.19277, "17580": 2.18276, "17585": 2.19914, "17590": 2.25858, "17595": 2.22496, "17600": 2.24561, "17605": 2.20631, "17610": 2.20043, "17615": 2.21276, "17620": 2.21656, "17625": 2.1452, "17630": 2.20712, "17635": 2.18808, "17640": 2.21022, "17645": 2.28058, "17650": 2.18273, "17655": 2.20661, "17660": 2.24429, "17665": 2.22023, "17670": 2.22795, "17675": 2.22707, "17680": 2.20102, "17685": 2.20622, "17690": 2.16215, "17695": 2.24212, "17700": 2.21517, "17705": 2.24654, "17710": 2.16436, "17715": 2.15425, "17720": 2.21566, "17725": 2.21631, "17730": 2.19739, "17735": 2.18836, "17740": 2.18099, "17745": 2.21512, "17750": 2.26346, "17755": 2.20586, "17760": 2.2022, "17765": 2.18548, "17770": 2.19755, "17775": 2.21816, "17780": 2.23907, "17785": 2.20365, "17790": 2.18576, "17795": 2.20364, "17800": 2.1852, "17805": 2.1948, "17810": 2.19668, "17815": 2.20382, "17820": 2.16673, "17825": 2.25894, "17830": 2.24072, "17835": 2.22137, "17840": 2.18578, "17845": 2.18235, "17850": 2.1984, "17855": 2.20723, "17860": 2.17901, "17865": 2.18329, "17870": 2.24088, "17875": 2.20917, "17880": 2.18392, "17885": 2.23181, "17890": 2.22431, "17895": 2.21595, "17900": 2.15792, "17905": 2.20915, "17910": 2.2046, "17915": 2.19617, "17920": 2.2408, "17925": 2.19661, "17930": 2.16724, "17935": 2.20705, "17940": 2.22018, "17945": 2.19285, "17950": 2.23147, "17955": 2.22721, "17960": 2.17367, "17965": 2.19914, "17970": 2.22845, "17975": 2.13621, "17980": 2.19781, "17985": 2.21081, "17990": 2.22122, "17995": 2.208, "18000": 2.19006, "18005": 2.19211, "18010": 2.21418, "18015": 2.19079, "18020": 2.22363, "18025": 2.2136, "18030": 2.21108, "18035": 2.22054, "18040": 2.21366, "18045": 2.21668, "18050": 2.21639, "18055": 2.20708, "18060": 2.23301, "18065": 2.17738, "18070": 2.23954, "18075": 2.21254, "18080": 2.15751, "18085": 2.22205, "18090": 2.17742, "18095": 2.1901, "18100": 2.15906, "18105": 2.18499, "18110": 2.20892, "18115": 2.24298, "18120": 2.23688, "18125": 2.19153, "18130": 2.2018, "18135": 2.1583, "18140": 2.20415, "18145": 2.18591, "18150": 2.20933, "18155": 2.2048, "18160": 2.23619, "18165": 2.22831, "18170": 2.21176, "18175": 2.19234, "18180": 2.21063, "18185": 2.19211, "18190": 2.18625, "18195": 2.16073, "18200": 2.23225, "18205": 2.20192, "18210": 2.20742, "18215": 2.16989, "18220": 2.17792, "18225": 2.22789, "18230": 2.22804, "18235": 2.19105, "18240": 2.20255, "18245": 2.20708, "18250": 2.18367, "18255": 2.28029, "18260": 2.21051, "18265": 2.20723, "18270": 2.18562, "18275": 2.1848, "18280": 2.19974, "18285": 2.21972, "18290": 2.17871, "18295": 2.21785, "18300": 2.22508, "18305": 2.24905, "18310": 2.242, "18315": 2.22102, "18320": 2.18142, "18325": 2.21571, "18330": 2.20363, "18335": 2.21518, "18340": 2.20167, "18345": 2.18143, "18350": 2.24136, "18355": 2.17189, "18360": 2.22831, "18365": 2.18543, "18370": 2.16015, "18375": 2.21197, "18380": 2.21289, "18385": 2.20792, "18390": 2.18659, "18395": 2.24122, "18400": 2.19544, "18405": 2.23914, "18410": 2.20678, "18415": 2.17412, "18420": 2.1801, "18425": 2.18314, "18430": 2.15369, "18435": 2.19427, "18440": 2.19951, "18445": 2.16714, "18450": 2.21747, "18455": 2.15439, "18460": 2.19625, "18465": 2.21027, "18470": 2.19678, "18475": 2.15233, "18480": 2.19962, "18485": 2.16068, "18490": 2.20032, "18495": 2.24416, "18500": 2.19799, "18505": 2.16896, "18510": 2.22074, "18515": 2.17615, "18520": 2.19193, "18525": 2.17382, "18530": 2.1801, "18535": 2.17328, "18540": 2.21339, "18545": 2.17659, "18550": 2.21846, "18555": 2.16978, "18560": 2.19191, "18565": 2.22678, "18570": 2.21439, "18575": 2.14694, "18580": 2.22222, "18585": 2.21105, "18590": 2.2138, "18595": 2.18145, "18600": 2.15982, "18605": 2.21036, "18610": 2.20904, "18615": 2.19777, "18620": 2.21564, "18625": 2.19862, "18630": 2.17281, "18635": 2.19972, "18640": 2.16827, "18645": 2.17661, "18650": 2.25794, "18655": 2.16626, "18660": 2.23681, "18665": 2.22478, "18670": 2.2096, "18675": 2.19176, "18680": 2.20897, "18685": 2.18021, "18690": 2.21481, "18695": 2.15783, "18700": 2.16267, "18705": 2.18203, "18710": 2.24503, "18715": 2.17789, "18720": 2.21896, "18725": 2.18284, "18730": 2.18275, "18735": 2.14045, "18740": 2.19264, "18745": 2.21859, "18750": 2.21632, "18755": 2.19781, "18760": 2.22523, "18765": 2.15976, "18770": 2.15109, "18775": 2.21398, "18780": 2.16885, "18785": 2.21246, "18790": 2.20192, "18795": 2.2064, "18800": 2.18676, "18805": 2.18183, "18810": 2.20856, "18815": 2.16842, "18820": 2.18871, "18825": 2.21143, "18830": 2.20084, "18835": 2.22075, "18840": 2.14496, "18845": 2.19647, "18850": 2.18675, "18855": 2.17706, "18860": 2.15865, "18865": 2.1829, "18870": 2.1645, "18875": 2.16719, "18880": 2.20835, "18885": 2.17481, "18890": 2.18098, "18895": 2.20117, "18900": 2.17253, "18905": 2.18549, "18910": 2.18038, "18915": 2.17686, "18920": 2.19365, "18925": 2.27352, "18930": 2.19976, "18935": 2.20761, "18940": 2.23602, "18945": 2.26392, "18950": 2.19196, "18955": 2.19037, "18960": 2.20853, "18965": 2.1418, "18970": 2.20931, "18975": 2.22049, "18980": 2.20713, "18985": 2.1927, "18990": 2.23286, "18995": 2.19774, "19000": 2.16675, "19005": 2.16908, "19010": 2.19917, "19015": 2.19768, "19020": 2.20399, "19025": 2.17085, "19030": 2.22006, "19035": 2.16702, "19040": 2.15094, "19045": 2.22022, "19050": 2.16524, "19055": 2.18857, "19060": 2.18706, "19065": 2.19026, "19070": 2.17545, "19075": 2.21913, "19080": 2.18591, "19085": 2.18092, "19090": 2.17846, "19095": 2.16789, "19100": 2.2017, "19105": 2.19519, "19110": 2.17554, "19115": 2.20286, "19120": 2.19386, "19125": 2.21476, "19130": 2.14877, "19135": 2.22807, "19140": 2.21298, "19145": 2.19465, "19150": 2.18569, "19155": 2.21337, "19160": 2.18549, "19165": 2.13224, "19170": 2.17574, "19175": 2.18558, "19180": 2.12712, "19185": 2.19449, "19190": 2.18645, "19195": 2.20922, "19200": 2.20369, "19205": 2.18718, "19210": 2.18739, "19215": 2.13401, "19220": 2.17887, "19225": 2.16314, "19230": 2.22031, "19235": 2.21264, "19240": 2.17428, "19245": 2.17401, "19250": 2.18787, "19255": 2.15238, "19260": 2.21944, "19265": 2.15636, "19270": 2.17304, "19275": 2.18772, "19280": 2.17768, "19285": 2.1407, "19290": 2.16895, "19295": 2.19056, "19300": 2.17178, "19305": 2.22544, "19310": 2.22494, "19315": 2.20898, "19320": 2.16144, "19325": 2.19044, "19330": 2.22019, "19335": 2.21804, "19340": 2.21707, "19345": 2.19541, "19350": 2.16753, "19355": 2.13406, "19360": 2.15545, "19365": 2.18209, "19370": 2.18471, "19375": 2.19432, "19380": 2.16424, "19385": 2.15941, "19390": 2.18014, "19395": 2.1827, "19400": 2.18629, "19405": 2.17899, "19410": 2.16489, "19415": 2.18471, "19420": 2.19342, "19425": 2.18047, "19430": 2.24191, "19435": 2.17193, "19440": 2.16912, "19445": 2.18288, "19450": 2.17635, "19455": 2.17209, "19460": 2.20238, "19465": 2.17286, "19470": 2.18163, "19475": 2.2265, "19480": 2.18682, "19485": 2.16975, "19490": 2.18848, "19495": 2.1774, "19500": 2.20415, "19505": 2.15082, "19510": 2.21293, "19515": 2.1738, "19520": 2.19101, "19525": 2.17156, "19530": 2.16361, "19535": 2.13436, "19540": 2.21996, "19545": 2.18569, "19550": 2.14869, "19555": 2.22011, "19560": 2.18591, "19565": 2.15149, "19570": 2.17734, "19575": 2.17717, "19580": 2.19545, "19585": 2.22114, "19590": 2.2079, "19595": 2.19627, "19600": 2.20058, "19605": 2.21583, "19610": 2.19158, "19615": 2.17767, "19620": 2.2149, "19625": 2.19097, "19630": 2.19186, "19635": 2.19073, "19640": 2.17183, "19645": 2.14836, "19650": 2.17429, "19655": 2.1535, "19660": 2.17173, "19665": 2.17316, "19670": 2.18016, "19675": 2.20924, "19680": 2.19378, "19685": 2.17824, "19690": 2.1858, "19695": 2.15738, "19700": 2.16726, "19705": 2.18919, "19710": 2.24444, "19715": 2.15866, "19720": 2.15222, "19725": 2.23192, "19730": 2.174, "19735": 2.18083, "19740": 2.21844, "19745": 2.20412, "19750": 2.18516, "19755": 2.173, "19760": 2.15649, "19765": 2.21005, "19770": 2.2105, "19775": 2.17122, "19780": 2.15611, "19785": 2.17215, "19790": 2.22573, "19795": 2.16688, "19800": 2.16487, "19805": 2.16612, "19810": 2.21605, "19815": 2.1884, "19820": 2.20118, "19825": 2.21351, "19830": 2.16893, "19835": 2.18271, "19840": 2.20494, "19845": 2.16529, "19850": 2.17147, "19855": 2.17608, "19860": 2.18443, "19865": 2.16929, "19870": 2.20889, "19875": 2.13464, "19880": 2.16717, "19885": 2.15449, "19890": 2.16389, "19895": 2.20691, "19900": 2.18606, "19905": 2.15456, "19910": 2.1739, "19915": 2.19627, "19920": 2.10782, "19925": 2.16573, "19930": 2.14811, "19935": 2.17314, "19940": 2.17712, "19945": 2.18391, "19950": 2.16738, "19955": 2.1714, "19960": 2.16541, "19965": 2.16885, "19970": 2.12903, "19975": 2.17835, "19980": 2.21476, "19985": 2.17239, "19990": 2.17203, "19995": 2.19604, "20000": 2.22131, "20005": 2.19083, "20010": 2.17537, "20015": 2.19017, "20020": 2.16348, "20025": 2.11724, "20030": 2.14627, "20035": 2.18041, "20040": 2.17961, "20045": 2.19671, "20050": 2.19453, "20055": 2.19098, "20060": 2.20275, "20065": 2.16637, "20070": 2.18645, "20075": 2.14962, "20080": 2.16406, "20085": 2.19318, "20090": 2.17878, "20095": 2.1622, "20100": 2.18877, "20105": 2.14125, "20110": 2.17459, "20115": 2.15778, "20120": 2.1887, "20125": 2.16234, "20130": 2.1741, "20135": 2.18221, "20140": 2.16327, "20145": 2.17725, "20150": 2.14355, "20155": 2.23125, "20160": 2.1801, "20165": 2.177, "20170": 2.16475, "20175": 2.14874, "20180": 2.18468, "20185": 2.18502, "20190": 2.19566, "20195": 2.19856, "20200": 2.1592, "20205": 2.18155, "20210": 2.1757, "20215": 2.18783, "20220": 2.17907, "20225": 2.21229, "20230": 2.17038, "20235": 2.20856, "20240": 2.18316, "20245": 2.19177, "20250": 2.14269, "20255": 2.14595, "20260": 2.17267, "20265": 2.17964, "20270": 2.18046, "20275": 2.18293, "20280": 2.14952, "20285": 2.16887, "20290": 2.1916, "20295": 2.19759, "20300": 2.15349, "20305": 2.21744, "20310": 2.11411, "20315": 2.17017, "20320": 2.18523, "20325": 2.1864, "20330": 2.15277, "20335": 2.20784, "20340": 2.17478, "20345": 2.16963, "20350": 2.14576, "20355": 2.19215, "20360": 2.19772, "20365": 2.16909, "20370": 2.19977, "20375": 2.18564, "20380": 2.18346, "20385": 2.17909, "20390": 2.15385, "20395": 2.15428, "20400": 2.26361, "20405": 2.15136, "20410": 2.14332, "20415": 2.19687, "20420": 2.15731, "20425": 2.17802, "20430": 2.16526, "20435": 2.14761, "20440": 2.16149, "20445": 2.18241, "20450": 2.17372, "20455": 2.20974, "20460": 2.17768, "20465": 2.16657, "20470": 2.17942, "20475": 2.15834, "20480": 2.19502, "20485": 2.1983, "20490": 2.19449, "20495": 2.17032, "20500": 2.17163, "20505": 2.15611, "20510": 2.18855, "20515": 2.18695, "20520": 2.15776, "20525": 2.15829, "20530": 2.17403, "20535": 2.18954, "20540": 2.15406, "20545": 2.15498, "20550": 2.15082, "20555": 2.12889, "20560": 2.17263, "20565": 2.18535, "20570": 2.17691, "20575": 2.15675, "20580": 2.19958, "20585": 2.16069, "20590": 2.19027, "20595": 2.13095, "20600": 2.14705, "20605": 2.1813, "20610": 2.19605, "20615": 2.15539, "20620": 2.14315, "20625": 2.14735, "20630": 2.12405, "20635": 2.14056, "20640": 2.19475, "20645": 2.19322, "20650": 2.1801, "20655": 2.21054, "20660": 2.18908, "20665": 2.18503, "20670": 2.17056, "20675": 2.11771, "20680": 2.1749, "20685": 2.18897, "20690": 2.18899, "20695": 2.19724, "20700": 2.13588, "20705": 2.18976, "20710": 2.15405, "20715": 2.17477, "20720": 2.22587, "20725": 2.17884, "20730": 2.15311, "20735": 2.14391, "20740": 2.1614, "20745": 2.18853, "20750": 2.17755, "20755": 2.1846, "20760": 2.16076, "20765": 2.15318, "20770": 2.18263, "20775": 2.23266, "20780": 2.18829, "20785": 2.17951, "20790": 2.1625, "20795": 2.16997, "20800": 2.20641, "20805": 2.17295, "20810": 2.20215, "20815": 2.16524, "20820": 2.19836, "20825": 2.18195, "20830": 2.1445, "20835": 2.14208, "20840": 2.19173, "20845": 2.21581, "20850": 2.18452, "20855": 2.20965, "20860": 2.18899, "20865": 2.19189, "20870": 2.20919, "20875": 2.20665, "20880": 2.18733, "20885": 2.18641, "20890": 2.17586, "20895": 2.16367, "20900": 2.17825, "20905": 2.17912, "20910": 2.1802, "20915": 2.16983, "20920": 2.17046, "20925": 2.18206, "20930": 2.19853, "20935": 2.20571, "20940": 2.15987, "20945": 2.1461, "20950": 2.17538, "20955": 2.20226, "20960": 2.17481, "20965": 2.14448, "20970": 2.16487, "20975": 2.17008, "20980": 2.15063, "20985": 2.18419, "20990": 2.16877, "20995": 2.12084, "21000": 2.14792, "21005": 2.19581, "21010": 2.20566, "21015": 2.12056, "21020": 2.15588, "21025": 2.15635, "21030": 2.14495, "21035": 2.14453, "21040": 2.14174, "21045": 2.17067, "21050": 2.12946, "21055": 2.17038, "21060": 2.10025, "21065": 2.17182, "21070": 2.13742, "21075": 2.17197, "21080": 2.17558, "21085": 2.19061, "21090": 2.17886, "21095": 2.20083, "21100": 2.16482, "21105": 2.19834, "21110": 2.17375, "21115": 2.19273, "21120": 2.20868, "21125": 2.16414, "21130": 2.19125, "21135": 2.17894, "21140": 2.15092, "21145": 2.18896, "21150": 2.16319, "21155": 2.17041, "21160": 2.15752, "21165": 2.16243, "21170": 2.14851, "21175": 2.16159, "21180": 2.17079, "21185": 2.19781, "21190": 2.14444, "21195": 2.15892, "21200": 2.17406, "21205": 2.1881, "21210": 2.15927, "21215": 2.14792, "21220": 2.179, "21225": 2.15428, "21230": 2.18033, "21235": 2.15648, "21240": 2.18825, "21245": 2.15733, "21250": 2.15176, "21255": 2.14328, "21260": 2.18984, "21265": 2.21896, "21270": 2.22414, "21275": 2.21677, "21280": 2.19361, "21285": 2.18139, "21290": 2.1502, "21295": 2.14735, "21300": 2.21585, "21305": 2.21782, "21310": 2.17015, "21315": 2.17055, "21320": 2.16147, "21325": 2.13197, "21330": 2.15633, "21335": 2.12568, "21340": 2.17968, "21345": 2.1608, "21350": 2.1401, "21355": 2.18388, "21360": 2.16905, "21365": 2.18996, "21370": 2.14428, "21375": 2.14461, "21380": 2.1561, "21385": 2.17961, "21390": 2.18409, "21395": 2.13333, "21400": 2.18446, "21405": 2.20093, "21410": 2.15611, "21415": 2.18992, "21420": 2.21002, "21425": 2.17016, "21430": 2.2045, "21435": 2.1578, "21440": 2.20658, "21445": 2.17212, "21450": 2.16678, "21455": 2.19074, "21460": 2.16151, "21465": 2.16586, "21470": 2.20066, "21475": 2.14683, "21480": 2.19553, "21485": 2.2172, "21490": 2.21234, "21495": 2.18171, "21500": 2.14003, "21505": 2.18203, "21510": 2.17027, "21515": 2.17764, "21520": 2.13374, "21525": 2.15921, "21530": 2.12379, "21535": 2.15772, "21540": 2.18508, "21545": 2.17105, "21550": 2.18588, "21555": 2.15623, "21560": 2.15269, "21565": 2.14206, "21570": 2.12533, "21575": 2.1936, "21580": 2.15899, "21585": 2.17987, "21590": 2.19899, "21595": 2.18951, "21600": 2.18418, "21605": 2.18294, "21610": 2.16608, "21615": 2.1597, "21620": 2.18359, "21625": 2.17951, "21630": 2.20119, "21635": 2.13688, "21640": 2.18426, "21645": 2.17131, "21650": 2.16589, "21655": 2.17557, "21660": 2.15847, "21665": 2.10668, "21670": 2.17755, "21675": 2.13688, "21680": 2.17001, "21685": 2.14007, "21690": 2.12476, "21695": 2.17591, "21700": 2.16016, "21705": 2.18562, "21710": 2.09489, "21715": 2.14053, "21720": 2.18449, "21725": 2.15536, "21730": 2.15909, "21735": 2.13019, "21740": 2.16568, "21745": 2.18326, "21750": 2.16628, "21755": 2.18001, "21760": 2.14106, "21765": 2.12821, "21770": 2.14733, "21775": 2.16296, "21780": 2.19585, "21785": 2.17527, "21790": 2.16271, "21795": 2.16229, "21800": 2.13298, "21805": 2.16114, "21810": 2.16607, "21815": 2.18739, "21820": 2.15178, "21825": 2.16823, "21830": 2.12592, "21835": 2.1435, "21840": 2.16991, "21845": 2.15055, "21850": 2.18225, "21855": 2.14138, "21860": 2.16614, "21865": 2.1697, "21870": 2.14272, "21875": 2.18032, "21880": 2.14894, "21885": 2.1824, "21890": 2.17816, "21895": 2.16174, "21900": 2.19065, "21905": 2.17696, "21910": 2.17443, "21915": 2.19027, "21920": 2.16001, "21925": 2.19086, "21930": 2.13361, "21935": 2.20245, "21940": 2.15082, "21945": 2.17434, "21950": 2.16735, "21955": 2.12439, "21960": 2.11394, "21965": 2.17865, "21970": 2.15638, "21975": 2.1403, "21980": 2.18127, "21985": 2.13395, "21990": 2.16604, "21995": 2.17238, "22000": 2.2084, "22005": 2.14778, "22010": 2.18027, "22015": 2.17368, "22020": 2.19406, "22025": 2.15296, "22030": 2.16695, "22035": 2.11919, "22040": 2.18568, "22045": 2.20389, "22050": 2.12425, "22055": 2.13761, "22060": 2.1751, "22065": 2.14791, "22070": 2.14205, "22075": 2.17051, "22080": 2.19837, "22085": 2.13328, "22090": 2.16605, "22095": 2.15249, "22100": 2.15239, "22105": 2.18509, "22110": 2.16025, "22115": 2.13916, "22120": 2.16931, "22125": 2.14251, "22130": 2.1833, "22135": 2.15895, "22140": 2.18819, "22145": 2.1476, "22150": 2.18664, "22155": 2.13479, "22160": 2.13427, "22165": 2.17594, "22170": 2.14088, "22175": 2.1942, "22180": 2.12003, "22185": 2.15308, "22190": 2.1675, "22195": 2.17513, "22200": 2.18057, "22205": 2.17852, "22210": 2.10397, "22215": 2.14153, "22220": 2.18993, "22225": 2.12258, "22230": 2.17344, "22235": 2.17934, "22240": 2.13812, "22245": 2.18301, "22250": 2.17433, "22255": 2.13008, "22260": 2.18954, "22265": 2.12604, "22270": 2.15035, "22275": 2.14584, "22280": 2.18941, "22285": 2.21118, "22290": 2.17175, "22295": 2.17702, "22300": 2.14105, "22305": 2.12627, "22310": 2.15981, "22315": 2.15456, "22320": 2.15089, "22325": 2.15759, "22330": 2.14471, "22335": 2.19083, "22340": 2.16885, "22345": 2.16159, "22350": 2.19114, "22355": 2.16103, "22360": 2.18048, "22365": 2.17256, "22370": 2.20354, "22375": 2.15322, "22380": 2.1568, "22385": 2.15503, "22390": 2.17056, "22395": 2.17487, "22400": 2.15679, "22405": 2.10276, "22410": 2.1837, "22415": 2.21391, "22420": 2.17375, "22425": 2.18023, "22430": 2.12969, "22435": 2.16637, "22440": 2.18996, "22445": 2.20168, "22450": 2.14031, "22455": 2.14858, "22460": 2.15651, "22465": 2.15679, "22470": 2.19903, "22475": 2.19328, "22480": 2.11609, "22485": 2.14255, "22490": 2.17166, "22495": 2.12352, "22500": 2.09676, "22505": 2.17327, "22510": 2.12161, "22515": 2.14878, "22520": 2.18696, "22525": 2.14794, "22530": 2.13874, "22535": 2.16658, "22540": 2.16712, "22545": 2.20467, "22550": 2.21754, "22555": 2.13618, "22560": 2.14783, "22565": 2.16138, "22570": 2.14901, "22575": 2.15339, "22580": 2.09792, "22585": 2.17526, "22590": 2.13528, "22595": 2.14836, "22600": 2.1768, "22605": 2.13339, "22610": 2.15175, "22615": 2.14764, "22620": 2.12008, "22625": 2.20916, "22630": 2.18123, "22635": 2.19825, "22640": 2.13907, "22645": 2.11753, "22650": 2.17086, "22655": 2.13648, "22660": 2.16345, "22665": 2.13028, "22670": 2.15613, "22675": 2.11184, "22680": 2.16577, "22685": 2.20131, "22690": 2.16467, "22695": 2.15979, "22700": 2.14123, "22705": 2.13199, "22710": 2.14721, "22715": 2.1478, "22720": 2.14107, "22725": 2.1361, "22730": 2.16224, "22735": 2.19068, "22740": 2.15346, "22745": 2.15185, "22750": 2.08885, "22755": 2.16729, "22760": 2.19994, "22765": 2.19186, "22770": 2.148, "22775": 2.19099, "22780": 2.15725, "22785": 2.15645, "22790": 2.1847, "22795": 2.12636, "22800": 2.12274, "22805": 2.11111, "22810": 2.14941, "22815": 2.08651, "22820": 2.17158, "22825": 2.14815, "22830": 2.15628, "22835": 2.13229, "22840": 2.19481, "22845": 2.18147, "22850": 2.15738, "22855": 2.13536, "22860": 2.13835, "22865": 2.15617, "22870": 2.14684, "22875": 2.13578, "22880": 2.17712, "22885": 2.15902, "22890": 2.13896, "22895": 2.13573, "22900": 2.16315, "22905": 2.14429, "22910": 2.16822, "22915": 2.17655, "22920": 2.14056, "22925": 2.13198, "22930": 2.17228, "22935": 2.14538, "22940": 2.16101, "22945": 2.17781, "22950": 2.11444, "22955": 2.18338, "22960": 2.13663, "22965": 2.09435, "22970": 2.17426, "22975": 2.16983, "22980": 2.16432, "22985": 2.15531, "22990": 2.11261, "22995": 2.19197, "23000": 2.1656, "23005": 2.11671, "23010": 2.18455, "23015": 2.13707, "23020": 2.14467, "23025": 2.14695, "23030": 2.10276, "23035": 2.18211, "23040": 2.15906, "23045": 2.1675, "23050": 2.15374, "23055": 2.16203, "23060": 2.15042, "23065": 2.15106, "23070": 2.14293, "23075": 2.17585, "23080": 2.15325, "23085": 2.14387, "23090": 2.11307, "23095": 2.15394, "23100": 2.11205, "23105": 2.13362, "23110": 2.1851, "23115": 2.17488, "23120": 2.1507, "23125": 2.12622, "23130": 2.16601, "23135": 2.19298, "23140": 2.13467, "23145": 2.12615, "23150": 2.147, "23155": 2.1603, "23160": 2.175, "23165": 2.18371, "23170": 2.12045, "23175": 2.1413, "23180": 2.1897, "23185": 2.14244, "23190": 2.15864, "23195": 2.13817, "23200": 2.13262, "23205": 2.13212, "23210": 2.17764, "23215": 2.17991, "23220": 2.15199, "23225": 2.15749, "23230": 2.1839, "23235": 2.13668, "23240": 2.16726, "23245": 2.18092, "23250": 2.1872, "23255": 2.18645, "23260": 2.20302, "23265": 2.14637, "23270": 2.17133, "23275": 2.17171, "23280": 2.14166, "23285": 2.10133, "23290": 2.20418, "23295": 2.15618, "23300": 2.19437, "23305": 2.20351, "23310": 2.11162, "23315": 2.15624, "23320": 2.17876, "23325": 2.17909, "23330": 2.11993, "23335": 2.18626, "23340": 2.18324, "23345": 2.15821, "23350": 2.14535, "23355": 2.14415, "23360": 2.16796, "23365": 2.10551, "23370": 2.18139, "23375": 2.11476, "23380": 2.10469, "23385": 2.13941, "23390": 2.13204, "23395": 2.17153, "23400": 2.13178, "23405": 2.11388, "23410": 2.13032, "23415": 2.12223, "23420": 2.17893, "23425": 2.20395, "23430": 2.17313, "23435": 2.16916, "23440": 2.13236, "23445": 2.1084, "23450": 2.14132, "23455": 2.16635, "23460": 2.18818, "23465": 2.18215, "23470": 2.09951, "23475": 2.11816, "23480": 2.1673, "23485": 2.1455, "23490": 2.16389, "23495": 2.11274, "23500": 2.17143, "23505": 2.11759, "23510": 2.14759, "23515": 2.1325, "23520": 2.16363, "23525": 2.12156, "23530": 2.14107, "23535": 2.13869, "23540": 2.14466, "23545": 2.10135, "23550": 2.13253, "23555": 2.19156, "23560": 2.14776, "23565": 2.1492, "23570": 2.14845, "23575": 2.14356, "23580": 2.14115, "23585": 2.17055, "23590": 2.13825, "23595": 2.12279, "23600": 2.1123, "23605": 2.18643, "23610": 2.16523, "23615": 2.14192, "23620": 2.13847, "23625": 2.16856, "23630": 2.1867, "23635": 2.17455, "23640": 2.18147, "23645": 2.16132, "23650": 2.151, "23655": 2.15085, "23660": 2.12679, "23665": 2.16068, "23670": 2.1703, "23675": 2.1662, "23680": 2.13372, "23685": 2.16011, "23690": 2.21052, "23695": 2.15979, "23700": 2.11113, "23705": 2.13206, "23710": 2.17878, "23715": 2.10653, "23720": 2.09081, "23725": 2.15689, "23730": 2.1534, "23735": 2.16317, "23740": 2.13226, "23745": 2.19021, "23750": 2.14451, "23755": 2.1636, "23760": 2.1669, "23765": 2.19807, "23770": 2.12352, "23775": 2.129, "23780": 2.14504, "23785": 2.12629, "23790": 2.16184, "23795": 2.16563, "23800": 2.13886, "23805": 2.17101, "23810": 2.12665, "23815": 2.17155, "23820": 2.11727, "23825": 2.17367, "23830": 2.1928, "23835": 2.16775, "23840": 2.09421, "23845": 2.16104, "23850": 2.19337, "23855": 2.10499, "23860": 2.14445, "23865": 2.11546, "23870": 2.15424, "23875": 2.17484, "23880": 2.14166, "23885": 2.13111, "23890": 2.15024, "23895": 2.1249, "23900": 2.15825, "23905": 2.14215, "23910": 2.14719, "23915": 2.09716, "23920": 2.11399, "23925": 2.13337, "23930": 2.167, "23935": 2.15836, "23940": 2.16848, "23945": 2.13573, "23950": 2.12821, "23955": 2.14567, "23960": 2.12871, "23965": 2.16547, "23970": 2.15265, "23975": 2.16606, "23980": 2.19022, "23985": 2.11895, "23990": 2.13059, "23995": 2.1049, "24000": 2.15211, "24005": 2.16507, "24010": 2.16742, "24015": 2.12479, "24020": 2.13269, "24025": 2.18471, "24030": 2.12745, "24035": 2.18848, "24040": 2.13059, "24045": 2.13954, "24050": 2.16811, "24055": 2.1912, "24060": 2.14344, "24065": 2.11655, "24070": 2.10791, "24075": 2.14108, "24080": 2.1195, "24085": 2.19306, "24090": 2.1463, "24095": 2.13713, "24100": 2.14937, "24105": 2.10634, "24110": 2.09804, "24115": 2.12855, "24120": 2.17368, "24125": 2.10948, "24130": 2.17774, "24135": 2.14071, "24140": 2.14629, "24145": 2.17556, "24150": 2.14195, "24155": 2.14927, "24160": 2.10913, "24165": 2.16743, "24170": 2.17048, "24175": 2.16604, "24180": 2.13804, "24185": 2.16096, "24190": 2.14033, "24195": 2.16965, "24200": 2.20094, "24205": 2.1093, "24210": 2.13882, "24215": 2.19206, "24220": 2.16315, "24225": 2.15502, "24230": 2.16376, "24235": 2.17676, "24240": 2.16894, "24245": 2.14375, "24250": 2.15886, "24255": 2.1364, "24260": 2.18243, "24265": 2.17437, "24270": 2.16028, "24275": 2.13635, "24280": 2.15377, "24285": 2.10797, "24290": 2.15507, "24295": 2.1179, "24300": 2.14622, "24305": 2.1338, "24310": 2.14803, "24315": 2.16149, "24320": 2.15779, "24325": 2.17472, "24330": 2.12432, "24335": 2.12771, "24340": 2.13167, "24345": 2.10716, "24350": 2.2054, "24355": 2.12895, "24360": 2.12431, "24365": 2.1351, "24370": 2.15807, "24375": 2.12893, "24380": 2.16708, "24385": 2.14894, "24390": 2.10452, "24395": 2.17483, "24400": 2.18619, "24405": 2.20739, "24410": 2.11931, "24415": 2.15104, "24420": 2.16808, "24425": 2.12126, "24430": 2.13358, "24435": 2.16129, "24440": 2.11615, "24445": 2.11655, "24450": 2.16914, "24455": 2.12629, "24460": 2.15548, "24465": 2.11766, "24470": 2.09776, "24475": 2.13857, "24480": 2.14188, "24485": 2.16137, "24490": 2.15078, "24495": 2.09587, "24500": 2.17669, "24505": 2.1586, "24510": 2.15878, "24515": 2.10099, "24520": 2.14186, "24525": 2.11368, "24530": 2.19434, "24535": 2.11689, "24540": 2.17781, "24545": 2.14287, "24550": 2.14161, "24555": 2.15228, "24560": 2.18504, "24565": 2.18644, "24570": 2.13837, "24575": 2.12423, "24580": 2.17188, "24585": 2.15737, "24590": 2.17732, "24595": 2.1313, "24600": 2.16052, "24605": 2.16233, "24610": 2.17924, "24615": 2.10288, "24620": 2.12314, "24625": 2.12607, "24630": 2.18109, "24635": 2.10519, "24640": 2.16879, "24645": 2.17453, "24650": 2.17176, "24655": 2.10672, "24660": 2.12608, "24665": 2.16671, "24670": 2.15333, "24675": 2.14934, "24680": 2.14082, "24685": 2.14316, "24690": 2.13464, "24695": 2.17939, "24700": 2.12176, "24705": 2.15275, "24710": 2.12674, "24715": 2.16325, "24720": 2.16145, "24725": 2.154, "24730": 2.16942, "24735": 2.11819, "24740": 2.13085, "24745": 2.13431, "24750": 2.14134, "24755": 2.1686, "24760": 2.13653, "24765": 2.13068, "24770": 2.14829, "24775": 2.13263, "24780": 2.10023, "24785": 2.1409, "24790": 2.16342, "24795": 2.14489, "24800": 2.13134, "24805": 2.17088, "24810": 2.14805, "24815": 2.13829, "24820": 2.12339, "24825": 2.16367, "24830": 2.12169, "24835": 2.15139, "24840": 2.16018, "24845": 2.17206, "24850": 2.14524, "24855": 2.15481, "24860": 2.12555, "24865": 2.0893, "24870": 2.14954, "24875": 2.20696, "24880": 2.17038, "24885": 2.14356, "24890": 2.13485, "24895": 2.1621, "24900": 2.13968, "24905": 2.15389, "24910": 2.14381, "24915": 2.15018, "24920": 2.09766, "24925": 2.14105, "24930": 2.14525, "24935": 2.19, "24940": 2.09386, "24945": 2.13885, "24950": 2.12605, "24955": 2.13125, "24960": 2.12436, "24965": 2.15175, "24970": 2.09356, "24975": 2.14714, "24980": 2.13412, "24985": 2.17147, "24990": 2.14981, "24995": 2.13031, "25000": 2.10508, "25005": 2.0935, "25010": 2.14541, "25015": 2.13928, "25020": 2.10461, "25025": 2.16961, "25030": 2.12439, "25035": 2.1854, "25040": 2.11791, "25045": 2.12259, "25050": 2.13672, "25055": 2.14954, "25060": 2.1144, "25065": 2.16029, "25070": 2.13524, "25075": 2.16681, "25080": 2.11959, "25085": 2.17527, "25090": 2.15788, "25095": 2.11312, "25100": 2.10378, "25105": 2.10119, "25110": 2.137, "25115": 2.12523, "25120": 2.14202, "25125": 2.11803, "25130": 2.11358, "25135": 2.13311, "25140": 2.10548, "25145": 2.19747, "25150": 2.14262, "25155": 2.1574, "25160": 2.12167, "25165": 2.10718, "25170": 2.17368, "25175": 2.1167, "25180": 2.12467, "25185": 2.14724, "25190": 2.14826, "25195": 2.15785, "25200": 2.13149, "25205": 2.17763, "25210": 2.18532, "25215": 2.11588, "25220": 2.15083, "25225": 2.13202, "25230": 2.14423, "25235": 2.14406, "25240": 2.14894, "25245": 2.12925, "25250": 2.13454, "25255": 2.15475, "25260": 2.18087, "25265": 2.11775, "25270": 2.13873, "25275": 2.09635, "25280": 2.15431, "25285": 2.13514, "25290": 2.16731, "25295": 2.13462, "25300": 2.12345, "25305": 2.11474, "25310": 2.14275, "25315": 2.09285, "25320": 2.14461, "25325": 2.13956, "25330": 2.19148, "25335": 2.12378, "25340": 2.12965, "25345": 2.12887, "25350": 2.13213, "25355": 2.11888, "25360": 2.1059, "25365": 2.09638, "25370": 2.11306, "25375": 2.1467, "25380": 2.10709, "25385": 2.17261, "25390": 2.11278, "25395": 2.13816, "25400": 2.10913, "25405": 2.14093, "25410": 2.19808, "25415": 2.17237, "25420": 2.12491, "25425": 2.1537, "25430": 2.09154, "25435": 2.15871, "25440": 2.10666, "25445": 2.16015, "25450": 2.12759, "25455": 2.15266, "25460": 2.16664, "25465": 2.17351, "25470": 2.17732, "25475": 2.12327, "25480": 2.13996, "25485": 2.14449, "25490": 2.1532, "25495": 2.15216, "25500": 2.15909, "25505": 2.16602, "25510": 2.11383, "25515": 2.15533, "25520": 2.12854, "25525": 2.09998, "25530": 2.15804, "25535": 2.12931, "25540": 2.13135, "25545": 2.16068, "25550": 2.14763, "25555": 2.16244, "25560": 2.10742, "25565": 2.13274, "25570": 2.14507, "25575": 2.16498, "25580": 2.1212, "25585": 2.17717, "25590": 2.15514, "25595": 2.16179, "25600": 2.18421, "25605": 2.14857, "25610": 2.10402, "25615": 2.14025, "25620": 2.12012, "25625": 2.16977, "25630": 2.09608, "25635": 2.14927, "25640": 2.12592, "25645": 2.13979, "25650": 2.15811, "25655": 2.18701, "25660": 2.07347, "25665": 2.13593, "25670": 2.11104, "25675": 2.11771, "25680": 2.16462, "25685": 2.15135, "25690": 2.14239, "25695": 2.13519, "25700": 2.13352, "25705": 2.12982, "25710": 2.13145, "25715": 2.07115, "25720": 2.11601, "25725": 2.19226, "25730": 2.14628, "25735": 2.15506, "25740": 2.10501, "25745": 2.127, "25750": 2.13128, "25755": 2.15606, "25760": 2.16697, "25765": 2.16053, "25770": 2.11853, "25775": 2.10562, "25780": 2.10902, "25785": 2.12863, "25790": 2.17368, "25795": 2.14496, "25800": 2.12905, "25805": 2.08524, "25810": 2.12983, "25815": 2.09324, "25820": 2.16798, "25825": 2.14057, "25830": 2.1047, "25835": 2.15117, "25840": 2.15849, "25845": 2.15019, "25850": 2.13295, "25855": 2.13178, "25860": 2.15972, "25865": 2.16099, "25870": 2.09391, "25875": 2.08312, "25880": 2.10635, "25885": 2.14348, "25890": 2.11644, "25895": 2.12544, "25900": 2.11317, "25905": 2.12273, "25910": 2.16627, "25915": 2.12478, "25920": 2.09338, "25925": 2.1517, "25930": 2.16353, "25935": 2.11329, "25940": 2.14761, "25945": 2.08793, "25950": 2.13604, "25955": 2.09517, "25960": 2.13487, "25965": 2.19629, "25970": 2.11031, "25975": 2.13503, "25980": 2.13907, "25985": 2.17879, "25990": 2.11107, "25995": 2.16905, "26000": 2.15261, "26005": 2.1734, "26010": 2.12349, "26015": 2.13689, "26020": 2.15064, "26025": 2.13945, "26030": 2.16465, "26035": 2.14256, "26040": 2.13641, "26045": 2.17501, "26050": 2.15683, "26055": 2.16953, "26060": 2.16006, "26065": 2.13466, "26070": 2.16211, "26075": 2.16186, "26080": 2.19845, "26085": 2.11805, "26090": 2.1047, "26095": 2.08672, "26100": 2.15863, "26105": 2.08701, "26110": 2.09791, "26115": 2.14255, "26120": 2.10179, "26125": 2.10894, "26130": 2.12967, "26135": 2.15007, "26140": 2.13705, "26145": 2.14678, "26150": 2.17911, "26155": 2.16757, "26160": 2.13593, "26165": 2.1187, "26170": 2.10402, "26175": 2.13641, "26180": 2.15957, "26185": 2.15923, "26190": 2.13669, "26195": 2.13264, "26200": 2.09331, "26205": 2.16192, "26210": 2.09053, "26215": 2.12503, "26220": 2.11618, "26225": 2.15954, "26230": 2.15774, "26235": 2.15518, "26240": 2.13655, "26245": 2.15068, "26250": 2.19018, "26255": 2.10736, "26260": 2.1473, "26265": 2.15666, "26270": 2.12822, "26275": 2.11554, "26280": 2.11156, "26285": 2.13055, "26290": 2.131, "26295": 2.1466, "26300": 2.11712, "26305": 2.13717, "26310": 2.15652, "26315": 2.19052, "26320": 2.11558, "26325": 2.1421, "26330": 2.17277, "26335": 2.1456, "26340": 2.16321, "26345": 2.12829, "26350": 2.13659, "26355": 2.10411, "26360": 2.10349, "26365": 2.16405, "26370": 2.09295, "26375": 2.12093, "26380": 2.1016, "26385": 2.11961, "26390": 2.15677, "26395": 2.12418, "26400": 2.1251, "26405": 2.10008, "26410": 2.11132, "26415": 2.12956, "26420": 2.11677, "26425": 2.13056, "26430": 2.10983, "26435": 2.14101, "26440": 2.167, "26445": 2.13568, "26450": 2.14434, "26455": 2.09664, "26460": 2.11529, "26465": 2.10708, "26470": 2.14197, "26475": 2.14506, "26480": 2.118, "26485": 2.13559, "26490": 2.13522, "26495": 2.13296, "26500": 2.15624, "26505": 2.10824, "26510": 2.1341, "26515": 2.16058, "26520": 2.12576, "26525": 2.16288, "26530": 2.14719, "26535": 2.11573, "26540": 2.12491, "26545": 2.15252, "26550": 2.10679, "26555": 2.1177, "26560": 2.10607, "26565": 2.07083, "26570": 2.12709, "26575": 2.16492, "26580": 2.07127, "26585": 2.11963, "26590": 2.10486, "26595": 2.11722, "26600": 2.13102, "26605": 2.16489, "26610": 2.11539, "26615": 2.13664, "26620": 2.13997, "26625": 2.15934, "26630": 2.13705, "26635": 2.15977, "26640": 2.07916, "26645": 2.14851, "26650": 2.13992, "26655": 2.16195, "26660": 2.12942, "26665": 2.14369, "26670": 2.12532, "26675": 2.15372, "26680": 2.13919, "26685": 2.10444, "26690": 2.15741, "26695": 2.14051, "26700": 2.10177, "26705": 2.15609, "26710": 2.14665, "26715": 2.11618, "26720": 2.08097, "26725": 2.09109, "26730": 2.12342, "26735": 2.10658, "26740": 2.11511, "26745": 2.13942, "26750": 2.18214, "26755": 2.12165, "26760": 2.10585, "26765": 2.14431, "26770": 2.08803, "26775": 2.13503, "26780": 2.16249, "26785": 2.09653, "26790": 2.11903, "26795": 2.1679, "26800": 2.10971, "26805": 2.12706, "26810": 2.09286, "26815": 2.11053, "26820": 2.11731, "26825": 2.13305, "26830": 2.10576, "26835": 2.13245, "26840": 2.13546, "26845": 2.14679, "26850": 2.09758, "26855": 2.09396, "26860": 2.07326, "26865": 2.11334, "26870": 2.15206, "26875": 2.15061, "26880": 2.12596, "26885": 2.12179, "26890": 2.1413, "26895": 2.15603, "26900": 2.12318, "26905": 2.13427, "26910": 2.14, "26915": 2.1002, "26920": 2.14651, "26925": 2.16853, "26930": 2.13749, "26935": 2.10332, "26940": 2.16996, "26945": 2.12778, "26950": 2.13249, "26955": 2.1213, "26960": 2.19338, "26965": 2.11285, "26970": 2.11467, "26975": 2.12567, "26980": 2.12003, "26985": 2.09841, "26990": 2.11242, "26995": 2.11699, "27000": 2.1456, "27005": 2.11935, "27010": 2.11448, "27015": 2.09775, "27020": 2.13179, "27025": 2.11531, "27030": 2.17045, "27035": 2.13021, "27040": 2.13601, "27045": 2.13678, "27050": 2.16213, "27055": 2.11266, "27060": 2.09306, "27065": 2.15157, "27070": 2.13163, "27075": 2.15575, "27080": 2.13787, "27085": 2.11558, "27090": 2.11222, "27095": 2.12948, "27100": 2.13381, "27105": 2.15558, "27110": 2.12478, "27115": 2.12334, "27120": 2.1387, "27125": 2.12838, "27130": 2.11076, "27135": 2.12766, "27140": 2.14245, "27145": 2.13466, "27150": 2.14312, "27155": 2.11785, "27160": 2.10672, "27165": 2.12953, "27170": 2.12709, "27175": 2.15467, "27180": 2.11407, "27185": 2.1218, "27190": 2.13846, "27195": 2.13595, "27200": 2.12047, "27205": 2.10145, "27210": 2.17751, "27215": 2.13589, "27220": 2.12684, "27225": 2.12139, "27230": 2.13727, "27235": 2.08882, "27240": 2.13439, "27245": 2.13592, "27250": 2.09986, "27255": 2.1165, "27260": 2.10251, "27265": 2.14336, "27270": 2.16872, "27275": 2.03253, "27280": 2.11467, "27285": 2.14375, "27290": 2.13791, "27295": 2.15113, "27300": 2.12012, "27305": 2.13685, "27310": 2.09748, "27315": 2.15065, "27320": 2.19661, "27325": 2.12644, "27330": 2.10122, "27335": 2.09806, "27340": 2.09528, "27345": 2.13464, "27350": 2.1242, "27355": 2.15511, "27360": 2.09658, "27365": 2.12021, "27370": 2.12828, "27375": 2.15764, "27380": 2.10649, "27385": 2.13042, "27390": 2.15712, "27395": 2.10074, "27400": 2.13264, "27405": 2.17343, "27410": 2.13645, "27415": 2.1211, "27420": 2.12379, "27425": 2.12136, "27430": 2.10905, "27435": 2.12047, "27440": 2.15806, "27445": 2.10733, "27450": 2.12683, "27455": 2.10368, "27460": 2.10946, "27465": 2.12476, "27470": 2.09539, "27475": 2.12202, "27480": 2.12609, "27485": 2.12448, "27490": 2.12081, "27495": 2.11107, "27500": 2.11801, "27505": 2.13954, "27510": 2.10385, "27515": 2.11638, "27520": 2.10441, "27525": 2.1506, "27530": 2.19648, "27535": 2.13903, "27540": 2.08019, "27545": 2.13783, "27550": 2.16807, "27555": 2.09273, "27560": 2.12452, "27565": 2.14255, "27570": 2.09247, "27575": 2.13557, "27580": 2.12597, "27585": 2.1456, "27590": 2.12179, "27595": 2.17639, "27600": 2.13701, "27605": 2.12321, "27610": 2.15363, "27615": 2.12638, "27620": 2.10931, "27625": 2.15549, "27630": 2.08812, "27635": 2.16297, "27640": 2.12966, "27645": 2.12164, "27650": 2.13385, "27655": 2.0427, "27660": 2.08318, "27665": 2.15997, "27670": 2.10639, "27675": 2.12543, "27680": 2.14573, "27685": 2.11779, "27690": 2.13012, "27695": 2.11075, "27700": 2.12519, "27705": 2.13024, "27710": 2.14462, "27715": 2.0945, "27720": 2.14315, "27725": 2.09895, "27730": 2.1184, "27735": 2.11826, "27740": 2.15323, "27745": 2.13269, "27750": 2.12817, "27755": 2.11759, "27760": 2.05518, "27765": 2.13794, "27770": 2.11003, "27775": 2.09858, "27780": 2.10921, "27785": 2.08914, "27790": 2.10355, "27795": 2.08682, "27800": 2.10193, "27805": 2.10008, "27810": 2.12749, "27815": 2.17029, "27820": 2.11627, "27825": 2.1765, "27830": 2.11913, "27835": 2.11821, "27840": 2.14224, "27845": 2.0895, "27850": 2.15397, "27855": 2.16157, "27860": 2.08868, "27865": 2.12912, "27870": 2.09815, "27875": 2.15321, "27880": 2.08756, "27885": 2.13095, "27890": 2.10963, "27895": 2.13875, "27900": 2.14608, "27905": 2.10894, "27910": 2.10164, "27915": 2.11077, "27920": 2.1765, "27925": 2.13737, "27930": 2.13753, "27935": 2.12115, "27940": 2.13255, "27945": 2.14051, "27950": 2.10558, "27955": 2.13128, "27960": 2.10258, "27965": 2.09674, "27970": 2.11739, "27975": 2.10698, "27980": 2.11827, "27985": 2.14618, "27990": 2.13991, "27995": 2.17869, "28000": 2.13034, "28005": 2.07726, "28010": 2.12576, "28015": 2.13353, "28020": 2.11445, "28025": 2.12856, "28030": 2.09208, "28035": 2.11297, "28040": 2.13622, "28045": 2.12174, "28050": 2.09021, "28055": 2.12921, "28060": 2.12694, "28065": 2.11803, "28070": 2.10849, "28075": 2.11356, "28080": 2.1395, "28085": 2.1169, "28090": 2.16359, "28095": 2.08832, "28100": 2.14455, "28105": 2.12296, "28110": 2.09626, "28115": 2.11287, "28120": 2.08865, "28125": 2.11769, "28130": 2.14615, "28135": 2.09358, "28140": 2.15613, "28145": 2.12031, "28150": 2.17892, "28155": 2.1263, "28160": 2.17468, "28165": 2.11566, "28170": 2.17781, "28175": 2.10978, "28180": 2.09971, "28185": 2.1111, "28190": 2.16918, "28195": 2.10283, "28200": 2.1503, "28205": 2.14137, "28210": 2.13573, "28215": 2.12759, "28220": 2.11938, "28225": 2.14126, "28230": 2.1533, "28235": 2.12446, "28240": 2.13953, "28245": 2.16355, "28250": 2.10771, "28255": 2.1386, "28260": 2.10154, "28265": 2.1341, "28270": 2.09543, "28275": 2.14876, "28280": 2.12174, "28285": 2.12087, "28290": 2.13637, "28295": 2.14221, "28300": 2.15112, "28305": 2.0978, "28310": 2.13656, "28315": 2.11376, "28320": 2.11909, "28325": 2.14892, "28330": 2.11575, "28335": 2.08885, "28340": 2.11305, "28345": 2.12449, "28350": 2.13674, "28355": 2.16036, "28360": 2.13672, "28365": 2.11803, "28370": 2.12402, "28375": 2.09362, "28380": 2.11723, "28385": 2.09741, "28390": 2.12442, "28395": 2.12493, "28400": 2.09558, "28405": 2.13926, "28410": 2.08415, "28415": 2.10162, "28420": 2.10447, "28425": 2.1395, "28430": 2.12979, "28435": 2.13385, "28440": 2.10254, "28445": 2.08482, "28450": 2.11967, "28455": 2.10275, "28460": 2.12367, "28465": 2.19266, "28470": 2.08725, "28475": 2.12554, "28480": 2.09132, "28485": 2.09698, "28490": 2.1385, "28495": 2.09847, "28500": 2.08578, "28505": 2.11349, "28510": 2.15226, "28515": 2.15997, "28520": 2.11156, "28525": 2.12661, "28530": 2.11162, "28535": 2.10864, "28540": 2.12758, "28545": 2.11624, "28550": 2.08671, "28555": 2.13243, "28560": 2.12105, "28565": 2.15732, "28570": 2.13332, "28575": 2.09642, "28580": 2.11313, "28585": 2.09779, "28590": 2.11117, "28595": 2.15244, "28600": 2.08849, "28605": 2.10025, "28610": 2.10096, "28615": 2.14112, "28620": 2.06035, "28625": 2.11085, "28630": 2.10192, "28635": 2.10147, "28640": 2.10192, "28645": 2.13733, "28650": 2.13812, "28655": 2.10495, "28660": 2.0721, "28665": 2.12407, "28670": 2.1679, "28675": 2.09711, "28680": 2.08447, "28685": 2.16448, "28690": 2.06737, "28695": 2.10936, "28700": 2.16135, "28705": 2.11318, "28710": 2.11122, "28715": 2.09184, "28720": 2.11324, "28725": 2.11451, "28730": 2.13259, "28735": 2.11719, "28740": 2.10089, "28745": 2.11125, "28750": 2.07575, "28755": 2.13171, "28760": 2.1147, "28765": 2.10619, "28770": 2.16285, "28775": 2.10196, "28780": 2.1474, "28785": 2.10407, "28790": 2.10736, "28795": 2.12461, "28800": 2.09803, "28805": 2.17757, "28810": 2.13788, "28815": 2.17338, "28820": 2.08876, "28825": 2.11437, "28830": 2.13469, "28835": 2.14187, "28840": 2.11139, "28845": 2.11505, "28850": 2.12134, "28855": 2.16567, "28860": 2.1559, "28865": 2.15734, "28870": 2.15067, "28875": 2.13869, "28880": 2.14743, "28885": 2.14261, "28890": 2.16036, "28895": 2.10875, "28900": 2.11347, "28905": 2.09731, "28910": 2.11268, "28915": 2.10576, "28920": 2.14804, "28925": 2.13392, "28930": 2.08625, "28935": 2.08552, "28940": 2.13915, "28945": 2.10145, "28950": 2.11092, "28955": 2.15034, "28960": 2.12879, "28965": 2.13123, "28970": 2.17048, "28975": 2.10496, "28980": 2.15301, "28985": 2.12898, "28990": 2.1362, "28995": 2.10164, "29000": 2.10457, "29005": 2.13737, "29010": 2.08933, "29015": 2.11669, "29020": 2.08409, "29025": 2.09505, "29030": 2.13801, "29035": 2.07191, "29040": 2.12542, "29045": 2.11001, "29050": 2.07563, "29055": 2.11642, "29060": 2.09977, "29065": 2.12052, "29070": 2.08283, "29075": 2.11012, "29080": 2.10172, "29085": 2.09728, "29090": 2.12166, "29095": 2.10102, "29100": 2.13415, "29105": 2.13262, "29110": 2.11927, "29115": 2.09883, "29120": 2.12175, "29125": 2.17281, "29130": 2.14003, "29135": 2.07496, "29140": 2.08926, "29145": 2.11984, "29150": 2.18145, "29155": 2.0807, "29160": 2.10169, "29165": 2.10824, "29170": 2.1311, "29175": 2.09121, "29180": 2.13592, "29185": 2.09855, "29190": 2.13502, "29195": 2.02814, "29200": 2.15236, "29205": 2.1329, "29210": 2.10126, "29215": 2.15306, "29220": 2.15295, "29225": 2.1448, "29230": 2.08576, "29235": 2.12018, "29240": 2.07678, "29245": 2.10306, "29250": 2.08707, "29255": 2.1286, "29260": 2.11005, "29265": 2.15261, "29270": 2.13661, "29275": 2.11779, "29280": 2.12933, "29285": 2.10558, "29290": 2.11106, "29295": 2.12579, "29300": 2.07064, "29305": 2.08581, "29310": 2.13955, "29315": 2.09118, "29320": 2.09309, "29325": 2.07181, "29330": 2.14112, "29335": 2.14371, "29340": 2.11648, "29345": 2.14709, "29350": 2.16488, "29355": 2.15, "29360": 2.15877, "29365": 2.11872, "29370": 2.13601, "29375": 2.09098, "29380": 2.10035, "29385": 2.12004, "29390": 2.13257, "29395": 2.14584, "29400": 2.09349, "29405": 2.11238, "29410": 2.10765, "29415": 2.12255, "29420": 2.10417, "29425": 2.14938, "29430": 2.11673, "29435": 2.11583, "29440": 2.1192, "29445": 2.11385, "29450": 2.08292, "29455": 2.13564, "29460": 2.06399, "29465": 2.16491, "29470": 2.1023, "29475": 2.14764, "29480": 2.13775, "29485": 2.14307, "29490": 2.11287, "29495": 2.14365, "29500": 2.08926, "29505": 2.1582, "29510": 2.12389, "29515": 2.12043, "29520": 2.12645, "29525": 2.13317, "29530": 2.11825, "29535": 2.11792, "29540": 2.10674, "29545": 2.11827, "29550": 2.13118, "29555": 2.10553, "29560": 2.11604, "29565": 2.08512, "29570": 2.11291, "29575": 2.11739, "29580": 2.08041, "29585": 2.10756, "29590": 2.12861, "29595": 2.12318, "29600": 2.12673, "29605": 2.13656, "29610": 2.13409, "29615": 2.13231, "29620": 2.07872, "29625": 2.09025, "29630": 2.10805, "29635": 2.11449, "29640": 2.09561, "29645": 2.11913, "29650": 2.09826, "29655": 2.11916, "29660": 2.13145, "29665": 2.10475, "29670": 2.10884, "29675": 2.12755, "29680": 2.11228, "29685": 2.08842, "29690": 2.151, "29695": 2.13819, "29700": 2.09625, "29705": 2.16442, "29710": 2.11794, "29715": 2.10998, "29720": 2.12015, "29725": 2.10175, "29730": 2.09634, "29735": 2.14858, "29740": 2.07632, "29745": 2.14453, "29750": 2.09168, "29755": 2.09896, "29760": 2.13998, "29765": 2.0971, "29770": 2.06025, "29775": 2.11593, "29780": 2.1058, "29785": 2.15011, "29790": 2.12693, "29795": 2.12567, "29800": 2.12319, "29805": 2.11393, "29810": 2.15624, "29815": 2.18807, "29820": 2.11404, "29825": 2.10132, "29830": 2.11826, "29835": 2.12395, "29840": 2.13137, "29845": 2.09972, "29850": 2.11021, "29855": 2.09261, "29860": 2.10222, "29865": 2.1494, "29870": 2.09211, "29875": 2.12008, "29880": 2.09283, "29885": 2.10091, "29890": 2.1408, "29895": 2.0696, "29900": 2.12719, "29905": 2.10369, "29910": 2.10313, "29915": 2.16088, "29920": 2.13849, "29925": 2.06949, "29930": 2.12668, "29935": 2.11384, "29940": 2.10399, "29945": 2.1029, "29950": 2.08946, "29955": 2.09805, "29960": 2.11103, "29965": 2.10159, "29970": 2.15112, "29975": 2.1592, "29980": 2.09091, "29985": 2.10124, "29990": 2.14112, "29995": 2.09254, "30000": 2.15578, "30005": 2.08705, "30010": 2.13546, "30015": 2.12601, "30020": 2.11175, "30025": 2.09909, "30030": 2.06756, "30035": 2.10504, "30040": 2.10169, "30045": 2.10522, "30050": 2.16004, "30055": 2.1008, "30060": 2.10082, "30065": 2.10717, "30070": 2.12888, "30075": 2.09967, "30080": 2.11701, "30085": 2.0995, "30090": 2.1354, "30095": 2.115, "30100": 2.07281, "30105": 2.12917, "30110": 2.12661, "30115": 2.12583, "30120": 2.09374, "30125": 2.08312, "30130": 2.12673, "30135": 2.11853, "30140": 2.14506, "30145": 2.05374, "30150": 2.09362, "30155": 2.12047, "30160": 2.13116, "30165": 2.13916, "30170": 2.13634, "30175": 2.12757, "30180": 2.1453, "30185": 2.10122, "30190": 2.13375, "30195": 2.09908, "30200": 2.14365, "30205": 2.10955, "30210": 2.12863, "30215": 2.09824, "30220": 2.13802, "30225": 2.13746, "30230": 2.125, "30235": 2.09863, "30240": 2.11197, "30245": 2.09601, "30250": 2.10767, "30255": 2.0919, "30260": 2.05523, "30265": 2.11206, "30270": 2.09712, "30275": 2.12972, "30280": 2.07104, "30285": 2.1132, "30290": 2.10708, "30295": 2.10055, "30300": 2.15964, "30305": 2.12803, "30310": 2.12367, "30315": 2.10354, "30320": 2.09587, "30325": 2.1426, "30330": 2.07302, "30335": 2.12045, "30340": 2.11581, "30345": 2.13888, "30350": 2.13798, "30355": 2.15921, "30360": 2.10183, "30365": 2.15751, "30370": 2.1266, "30375": 2.10781, "30380": 2.15301, "30385": 2.15031, "30390": 2.10437, "30395": 2.1283, "30400": 2.0672, "30405": 2.10959, "30410": 2.09267, "30415": 2.14089, "30420": 2.11378, "30425": 2.10269, "30430": 2.13473, "30435": 2.11102, "30440": 2.0595, "30445": 2.13975, "30450": 2.10843, "30455": 2.1413, "30460": 2.10073, "30465": 2.10513, "30470": 2.09321, "30475": 2.1269, "30480": 2.10335, "30485": 2.13975, "30490": 2.09987, "30495": 2.10074, "30500": 2.11014, "30505": 2.13022, "30510": 2.14101, "30515": 2.13176, "30520": 2.13251, "30525": 2.11546, "30530": 2.14905, "30535": 2.07705, "30540": 2.09822, "30545": 2.10216, "30550": 2.10992, "30555": 2.13815, "30560": 2.12671, "30565": 2.10651, "30570": 2.12808, "30575": 2.07851, "30580": 2.11301, "30585": 2.10072, "30590": 2.11352, "30595": 2.10658, "30600": 2.12632, "30605": 2.08911, "30610": 2.13466, "30615": 2.15455, "30620": 2.1145, "30625": 2.09074, "30630": 2.13648, "30635": 2.12247, "30640": 2.07584, "30645": 2.10475, "30650": 2.09952, "30655": 2.12634, "30660": 2.1048, "30665": 2.09424, "30670": 2.11644, "30675": 2.10123, "30680": 2.07819, "30685": 2.16688, "30690": 2.08059, "30695": 2.11093, "30700": 2.07214, "30705": 2.11212, "30710": 2.14923, "30715": 2.1233, "30720": 2.1342, "30725": 2.13773, "30730": 2.13375, "30735": 2.1046, "30740": 2.1475, "30745": 2.13062, "30750": 2.1118, "30755": 2.10272, "30760": 2.11093, "30765": 2.16219, "30770": 2.13094, "30775": 2.12059, "30780": 2.1187, "30785": 2.14502, "30790": 2.11688, "30795": 2.11536, "30800": 2.11716, "30805": 2.12237, "30810": 2.12851, "30815": 2.14768, "30820": 2.12371, "30825": 2.07571, "30830": 2.09694, "30835": 2.11459, "30840": 2.10131, "30845": 2.15385, "30850": 2.10893, "30855": 2.09693, "30860": 2.11181, "30865": 2.12645, "30870": 2.11544, "30875": 2.10382, "30880": 2.09914, "30885": 2.07328, "30890": 2.08756, "30895": 2.11809, "30900": 2.15826, "30905": 2.14466, "30910": 2.11793, "30915": 2.09343, "30920": 2.12574, "30925": 2.1299, "30930": 2.0998, "30935": 2.08926, "30940": 2.13874, "30945": 2.12131, "30950": 2.11955, "30955": 2.08518, "30960": 2.11683, "30965": 2.13121, "30970": 2.10444, "30975": 2.10234, "30980": 2.16632, "30985": 2.12846, "30990": 2.14932, "30995": 2.09202, "31000": 2.09359, "31005": 2.08811, "31010": 2.07924, "31015": 2.11397, "31020": 2.12247, "31025": 2.13898, "31030": 2.14396, "31035": 2.08532, "31040": 2.12508, "31045": 2.16868, "31050": 2.12621, "31055": 2.07882, "31060": 2.07717, "31065": 2.10004, "31070": 2.07041, "31075": 2.12519, "31080": 2.07558, "31085": 2.15528, "31090": 2.04424, "31095": 2.15374, "31100": 2.104, "31105": 2.16213, "31110": 2.1661, "31115": 2.11254, "31120": 2.11246, "31125": 2.14026, "31130": 2.08258, "31135": 2.14971, "31140": 2.10724, "31145": 2.06228, "31150": 2.12252, "31155": 2.06811, "31160": 2.11086, "31165": 2.10105, "31170": 2.13962, "31175": 2.1149, "31180": 2.08378, "31185": 2.09234, "31190": 2.08478, "31195": 2.16712, "31200": 2.12217, "31205": 2.11483, "31210": 2.06797, "31215": 2.10435, "31220": 2.11445, "31225": 2.09565, "31230": 2.16183, "31235": 2.10973, "31240": 2.12795, "31245": 2.14794, "31250": 2.13071, "31255": 2.06347, "31260": 2.15359, "31265": 2.11636, "31270": 2.0949, "31275": 2.09769, "31280": 2.12307, "31285": 2.12182, "31290": 2.10042, "31295": 2.14375, "31300": 2.13782, "31305": 2.07667, "31310": 2.0712, "31315": 2.12125, "31320": 2.15356, "31325": 2.14199, "31330": 2.12319, "31335": 2.1095, "31340": 2.10355, "31345": 2.07688, "31350": 2.14669, "31355": 2.13774, "31360": 2.13329, "31365": 2.05304, "31370": 2.1172, "31375": 2.09793, "31380": 2.1453, "31385": 2.11802, "31390": 2.11565, "31395": 2.14928, "31400": 2.10921, "31405": 2.09739, "31410": 2.08113, "31415": 2.13502, "31420": 2.11985, "31425": 2.15022, "31430": 2.11747, "31435": 2.10002, "31440": 2.10981, "31445": 2.10891, "31450": 2.07909, "31455": 2.13448, "31460": 2.14466, "31465": 2.13655, "31470": 2.12503, "31475": 2.07257, "31480": 2.07348, "31485": 2.11649, "31490": 2.09649, "31495": 2.09974, "31500": 2.10059, "31505": 2.14789, "31510": 2.11324, "31515": 2.13569, "31520": 2.1293, "31525": 2.1108, "31530": 2.12119, "31535": 2.10015, "31540": 2.15295, "31545": 2.10269, "31550": 2.13278, "31555": 2.10865, "31560": 2.12573, "31565": 2.09665, "31570": 2.10285, "31575": 2.08838, "31580": 2.11634, "31585": 2.12535, "31590": 2.13194, "31595": 2.13328, "31600": 2.11527, "31605": 2.1355, "31610": 2.10214, "31615": 2.11495, "31620": 2.10822, "31625": 2.1084, "31630": 2.11904, "31635": 2.08137, "31640": 2.101, "31645": 2.08355, "31650": 2.11313, "31655": 2.08821, "31660": 2.08095, "31665": 2.10768, "31670": 2.0691, "31675": 2.10231, "31680": 2.19077, "31685": 2.06787, "31690": 2.13232, "31695": 2.15562, "31700": 2.13074, "31705": 2.12903, "31710": 2.12471, "31715": 2.13586, "31720": 2.09943, "31725": 2.07972, "31730": 2.13858, "31735": 2.10842, "31740": 2.12341, "31745": 2.12822, "31750": 2.09316, "31755": 2.11346, "31760": 2.12399, "31765": 2.14957, "31770": 2.13515, "31775": 2.09549, "31780": 2.11635, "31785": 2.10876, "31790": 2.09032, "31795": 2.0714, "31800": 2.11123, "31805": 2.04699, "31810": 2.10759, "31815": 2.07836, "31820": 2.07116, "31825": 2.12533, "31830": 2.12592, "31835": 2.0817, "31840": 2.12271, "31845": 2.03197, "31850": 2.08693, "31855": 2.13747, "31860": 2.13374, "31865": 2.12651, "31870": 2.07248, "31875": 2.08044, "31880": 2.10309, "31885": 2.11729, "31890": 2.1164, "31895": 2.11101, "31900": 2.10651, "31905": 2.09037, "31910": 2.12844, "31915": 2.14359, "31920": 2.11534, "31925": 2.10943, "31930": 2.12494, "31935": 2.1192, "31940": 2.12268, "31945": 2.06138, "31950": 2.11439, "31955": 2.11489, "31960": 2.11372, "31965": 2.11981, "31970": 2.05718, "31975": 2.04893, "31980": 2.11119, "31985": 2.08512, "31990": 2.14176, "31995": 2.14513, "32000": 2.08732, "32005": 2.08503, "32010": 2.11487, "32015": 2.12571, "32020": 2.12597, "32025": 2.04955, "32030": 2.06471, "32035": 2.07842, "32040": 2.09573, "32045": 2.10256, "32050": 2.10137, "32055": 2.08765, "32060": 2.06084, "32065": 2.15961, "32070": 2.10441, "32075": 2.12514, "32080": 2.09175, "32085": 2.10862, "32090": 2.16327, "32095": 2.15165, "32100": 2.14085, "32105": 2.09651, "32110": 2.119, "32115": 2.08774, "32120": 2.1271, "32125": 2.10171, "32130": 2.13466, "32135": 2.06991, "32140": 2.05117, "32145": 2.14026, "32150": 2.09275, "32155": 2.0894, "32160": 2.08267, "32165": 2.09769, "32170": 2.10945, "32175": 2.105, "32180": 2.10072, "32185": 2.10049, "32190": 2.08486, "32195": 2.10777, "32200": 2.09372, "32205": 2.05763, "32210": 2.11196, "32215": 2.12377, "32220": 2.10652, "32225": 2.14048, "32230": 2.08846, "32235": 2.09105, "32240": 2.11162, "32245": 2.10158, "32250": 2.1021, "32255": 2.15642, "32260": 2.0618, "32265": 2.09351, "32270": 2.03907, "32275": 2.14469, "32280": 2.1295, "32285": 2.14619, "32290": 2.0823, "32295": 2.1233, "32300": 2.10745, "32305": 2.09567, "32310": 2.12562, "32315": 2.16689, "32320": 2.09024, "32325": 2.10885, "32330": 2.10349, "32335": 2.12482, "32340": 2.10857, "32345": 2.08855, "32350": 2.09591, "32355": 2.09503, "32360": 2.10435, "32365": 2.08801, "32370": 2.09032, "32375": 2.08759, "32380": 2.12991, "32385": 2.08796, "32390": 2.08393, "32395": 2.10539, "32400": 2.08086, "32405": 2.08963, "32410": 2.12809, "32415": 2.10038, "32420": 2.09051, "32425": 2.08939, "32430": 2.10868, "32435": 2.15487, "32440": 2.12469, "32445": 2.09715, "32450": 2.09849, "32455": 2.06698, "32460": 2.09975, "32465": 2.10669, "32470": 2.08192, "32475": 2.08443, "32480": 2.09572, "32485": 2.12582, "32490": 2.09633, "32495": 2.09082, "32500": 2.11417, "32505": 2.09153, "32510": 2.06165, "32515": 2.13799, "32520": 2.08426, "32525": 2.08775, "32530": 2.12605, "32535": 2.09779, "32540": 2.11718, "32545": 2.1165, "32550": 2.10728, "32555": 2.05832, "32560": 2.11126, "32565": 2.11801, "32570": 2.10904, "32575": 2.11801, "32580": 2.12669, "32585": 2.10403, "32590": 2.11432, "32595": 2.12099, "32600": 2.12953, "32605": 2.09175, "32610": 2.10397, "32615": 2.1209, "32620": 2.12445, "32625": 2.08041, "32630": 2.0986, "32635": 2.09822, "32640": 2.08343, "32645": 2.11405, "32650": 2.11359, "32655": 2.09495, "32660": 2.10169, "32665": 2.08101, "32670": 2.14615, "32675": 2.0809, "32680": 2.08086, "32685": 2.06818, "32690": 2.11823, "32695": 2.07072, "32700": 2.13245, "32705": 2.10505, "32710": 2.08247, "32715": 2.13885, "32720": 2.06808, "32725": 2.08133, "32730": 2.05989, "32735": 2.1018, "32740": 2.09611, "32745": 2.11253, "32750": 2.06469, "32755": 2.09928, "32760": 2.11432, "32765": 2.11754, "32770": 2.09061, "32775": 2.10537, "32780": 2.1141, "32785": 2.1038, "32790": 2.1183, "32795": 2.10144, "32800": 2.12033, "32805": 2.10679, "32810": 2.09784, "32815": 2.09214, "32820": 2.13041, "32825": 2.09182, "32830": 2.11275, "32835": 2.12894, "32840": 2.09281, "32845": 2.10107, "32850": 2.11602, "32855": 2.11042, "32860": 2.09571, "32865": 2.10677, "32870": 2.10783, "32875": 2.0979, "32880": 2.0877, "32885": 2.05317, "32890": 2.08974, "32895": 2.1229, "32900": 2.11577, "32905": 2.12848, "32910": 2.09985, "32915": 2.1046, "32920": 2.0879, "32925": 2.08415, "32930": 2.10449, "32935": 2.12281, "32940": 2.09736, "32945": 2.08992, "32950": 2.049, "32955": 2.10929, "32960": 2.08493, "32965": 2.16997, "32970": 2.14251, "32975": 2.07278, "32980": 2.08652, "32985": 2.07485, "32990": 2.16282, "32995": 2.14653, "33000": 2.13527, "33005": 2.10005, "33010": 2.09487, "33015": 2.10636, "33020": 2.12045, "33025": 2.07753, "33030": 2.10554, "33035": 2.07985, "33040": 2.0864, "33045": 2.11877, "33050": 2.10117, "33055": 2.12934, "33060": 2.08133, "33065": 2.10665, "33070": 2.09296, "33075": 2.1192, "33080": 2.09549, "33085": 2.1102, "33090": 2.11555, "33095": 2.09127, "33100": 2.13519, "33105": 2.07658, "33110": 2.10072, "33115": 2.09848, "33120": 2.07752, "33125": 2.10514, "33130": 2.08122, "33135": 2.12051, "33140": 2.10234, "33145": 2.09741, "33150": 2.16437, "33155": 2.17374, "33160": 2.11447, "33165": 2.0536, "33170": 2.11898, "33175": 2.08458, "33180": 2.09668, "33185": 2.1024, "33190": 2.08619, "33195": 2.11886, "33200": 2.05276, "33205": 2.09585, "33210": 2.1019, "33215": 2.07471, "33220": 2.09441, "33225": 2.10953, "33230": 2.07322, "33235": 2.08992, "33240": 2.12433, "33245": 2.07936, "33250": 2.12773, "33255": 2.09711, "33260": 2.10567, "33265": 2.07786, "33270": 2.09641, "33275": 2.11114, "33280": 2.09889, "33285": 2.10472, "33290": 2.10124, "33295": 2.12163, "33300": 2.06876, "33305": 2.11228, "33310": 2.06741, "33315": 2.09546, "33320": 2.07868, "33325": 2.12043, "33330": 2.11921, "33335": 2.11395, "33340": 2.09365, "33345": 2.12073, "33350": 2.11188, "33355": 2.09457, "33360": 2.08349, "33365": 2.17106, "33370": 2.06716, "33375": 2.06463, "33380": 2.12217, "33385": 2.0846, "33390": 2.09367, "33395": 2.11515, "33400": 2.08983, "33405": 2.13223, "33410": 2.09147, "33415": 2.09306, "33420": 2.11803, "33425": 2.12304, "33430": 2.12666, "33435": 2.11895, "33440": 2.12912, "33445": 2.13943, "33450": 2.11034, "33455": 2.09361, "33460": 2.0826, "33465": 2.10707, "33470": 2.13499, "33475": 2.14659, "33480": 2.12565, "33485": 2.14326, "33490": 2.09931, "33495": 2.10123, "33500": 2.06665, "33505": 2.12837, "33510": 2.10061, "33515": 2.1039, "33520": 2.13257, "33525": 2.11006, "33530": 2.13997, "33535": 2.12107, "33540": 2.12782, "33545": 2.09013, "33550": 2.11543, "33555": 2.09718, "33560": 2.07929, "33565": 2.08931, "33570": 2.0852, "33575": 2.12355, "33580": 2.10344, "33585": 2.12087, "33590": 2.1007, "33595": 2.13006, "33600": 2.0921, "33605": 2.07791, "33610": 2.08268, "33615": 2.11187, "33620": 2.1376, "33625": 2.09102, "33630": 2.13547, "33635": 2.10542, "33640": 2.13109, "33645": 2.06296, "33650": 2.07369, "33655": 2.08304, "33660": 2.05398, "33665": 2.10067, "33670": 2.1424, "33675": 2.09294, "33680": 2.09842, "33685": 2.08587, "33690": 2.07187, "33695": 2.13365, "33700": 2.10874, "33705": 2.08971, "33710": 2.13071, "33715": 2.08628, "33720": 2.115, "33725": 2.10707, "33730": 2.09476, "33735": 2.1672, "33740": 2.09361, "33745": 2.09973, "33750": 2.13189, "33755": 2.12211, "33760": 2.11656, "33765": 2.10089, "33770": 2.10799, "33775": 2.10631, "33780": 2.05775, "33785": 2.07428, "33790": 2.08801, "33795": 2.0855, "33800": 2.12119, "33805": 2.1123, "33810": 2.10761, "33815": 2.10751, "33820": 2.13022, "33825": 2.07417, "33830": 2.06606, "33835": 2.09449, "33840": 2.08396, "33845": 2.07481, "33850": 2.08473, "33855": 2.06961, "33860": 2.0882, "33865": 2.11279, "33870": 2.07362, "33875": 2.11163, "33880": 2.09925, "33885": 2.08639, "33890": 2.09602, "33895": 2.12831, "33900": 2.10278, "33905": 2.12323, "33910": 2.09304, "33915": 2.09475, "33920": 2.13377, "33925": 2.09825, "33930": 2.06307, "33935": 2.08535, "33940": 2.11183, "33945": 2.10251, "33950": 2.11016, "33955": 2.09259, "33960": 2.05744, "33965": 2.08277, "33970": 2.11149, "33975": 2.09438, "33980": 2.11124, "33985": 2.07744, "33990": 2.113, "33995": 2.04436, "34000": 2.10934, "34005": 2.07551, "34010": 2.08313, "34015": 2.0902, "34020": 2.119, "34025": 2.11765, "34030": 2.10542, "34035": 2.08028, "34040": 2.07779, "34045": 2.08405, "34050": 2.11657, "34055": 2.09155, "34060": 2.10536, "34065": 2.10002, "34070": 2.05601, "34075": 2.07461, "34080": 2.13238, "34085": 2.12002, "34090": 2.06914, "34095": 2.07748, "34100": 2.09973, "34105": 2.11742, "34110": 2.11702, "34115": 2.12208, "34120": 2.12291, "34125": 2.12193, "34130": 2.12325, "34135": 2.11647, "34140": 2.06518, "34145": 2.09069, "34150": 2.1183, "34155": 2.12042, "34160": 2.1049, "34165": 2.12234, "34170": 2.06993, "34175": 2.1163, "34180": 2.0667, "34185": 2.12925, "34190": 2.09669, "34195": 2.10932, "34200": 2.09421, "34205": 2.08524, "34210": 2.13324, "34215": 2.1024, "34220": 2.1198, "34225": 2.11856, "34230": 2.09187, "34235": 2.06993, "34240": 2.12933, "34245": 2.08187, "34250": 2.07523, "34255": 2.09243, "34260": 2.05205, "34265": 2.10564, "34270": 2.07126, "34275": 2.1119, "34280": 2.10515, "34285": 2.10847, "34290": 2.11033, "34295": 2.12369, "34300": 2.09763, "34305": 2.08567, "34310": 2.03788, "34315": 2.11474, "34320": 2.11834, "34325": 2.14741, "34330": 2.12732, "34335": 2.09535, "34340": 2.12184, "34345": 2.11204, "34350": 2.08929, "34355": 2.14025, "34360": 2.07981, "34365": 2.06727, "34370": 2.08412, "34375": 2.13167, "34380": 2.08943, "34385": 2.1238, "34390": 2.09795, "34395": 2.12315, "34400": 2.07587, "34405": 2.07643, "34410": 2.09282, "34415": 2.05591, "34420": 2.12403, "34425": 2.12822, "34430": 2.07916, "34435": 2.06427, "34440": 2.10043, "34445": 2.08191, "34450": 2.15013, "34455": 2.09879, "34460": 2.12776, "34465": 2.15336, "34470": 2.06617, "34475": 2.11273, "34480": 2.09534, "34485": 2.11281, "34490": 2.0931, "34495": 2.12077, "34500": 2.09082, "34505": 2.14155, "34510": 2.13562, "34515": 2.10616, "34520": 2.07922, "34525": 2.11062, "34530": 2.08171, "34535": 2.12193, "34540": 2.07076, "34545": 2.11425, "34550": 2.09634, "34555": 2.15375, "34560": 2.11148, "34565": 2.0647, "34570": 2.12689, "34575": 2.09685, "34580": 2.0967, "34585": 2.09645, "34590": 2.07749, "34595": 2.10388, "34600": 2.04047, "34605": 2.08557, "34610": 2.09418, "34615": 2.08064, "34620": 2.10201, "34625": 2.07932, "34630": 2.07462, "34635": 2.07645, "34640": 2.07253, "34645": 2.12249, "34650": 2.08402, "34655": 2.09466, "34660": 2.10157, "34665": 2.15935, "34670": 2.10128, "34675": 2.09895, "34680": 2.1038, "34685": 2.09819, "34690": 2.14205, "34695": 2.13781, "34700": 2.09977, "34705": 2.13321, "34710": 2.06802, "34715": 2.07109, "34720": 2.0712, "34725": 2.04403, "34730": 2.11912, "34735": 2.07372, "34740": 2.10181, "34745": 2.1035, "34750": 2.09776, "34755": 2.06416, "34760": 2.1166, "34765": 2.0823, "34770": 2.12272, "34775": 2.07912, "34780": 2.04733, "34785": 2.114, "34790": 2.06359, "34795": 2.12392, "34800": 2.09023, "34805": 2.05027, "34810": 2.07046, "34815": 2.11658, "34820": 2.12278, "34825": 2.11558, "34830": 2.13794, "34835": 2.1281, "34840": 2.09658, "34845": 2.07967, "34850": 2.09858, "34855": 2.077, "34860": 2.12471, "34865": 2.06581, "34870": 2.10019, "34875": 2.10627, "34880": 2.08699, "34885": 2.02285, "34890": 2.07501, "34895": 2.10332, "34900": 2.09423, "34905": 2.08253, "34910": 2.07809, "34915": 2.10301, "34920": 2.07524, "34925": 2.09209, "34930": 2.13833, "34935": 2.09151, "34940": 2.12509, "34945": 2.07202, "34950": 2.0725, "34955": 2.13399, "34960": 2.08734, "34965": 2.10051, "34970": 2.07558, "34975": 2.07329, "34980": 2.1193, "34985": 2.06913, "34990": 2.12164, "34995": 2.07286, "35000": 2.08159, "35005": 2.12592, "35010": 2.08627, "35015": 2.0674, "35020": 2.07816, "35025": 2.1037, "35030": 2.11813, "35035": 2.08885, "35040": 2.08364, "35045": 2.12929, "35050": 2.077, "35055": 2.08866, "35060": 2.11803, "35065": 2.11233, "35070": 2.08442, "35075": 2.09258, "35080": 2.09642, "35085": 2.10486, "35090": 2.10234, "35095": 2.06421, "35100": 2.10063, "35105": 2.09787, "35110": 2.10051, "35115": 2.07674, "35120": 2.05755, "35125": 2.06258, "35130": 2.07737, "35135": 2.09982, "35140": 2.12691, "35145": 2.08272, "35150": 2.07804, "35155": 2.12652, "35160": 2.05888, "35165": 2.11206, "35170": 2.07934, "35175": 2.10002, "35180": 2.09455, "35185": 2.07873, "35190": 2.06112, "35195": 2.13437, "35200": 2.06956, "35205": 2.11399, "35210": 2.08519, "35215": 2.06101, "35220": 2.12567, "35225": 2.11113, "35230": 2.09361, "35235": 2.08563, "35240": 2.0752, "35245": 2.10498, "35250": 2.10902, "35255": 2.11307, "35260": 2.10787, "35265": 2.07475, "35270": 2.09971, "35275": 2.08245, "35280": 2.07421, "35285": 2.09951, "35290": 2.07493, "35295": 2.06967, "35300": 2.06635, "35305": 2.09794, "35310": 2.10397, "35315": 2.06968, "35320": 2.04946, "35325": 2.1171, "35330": 2.10622, "35335": 2.0988, "35340": 2.07763, "35345": 2.07228, "35350": 2.1086, "35355": 2.11394, "35360": 2.06832, "35365": 2.07176, "35370": 2.11691, "35375": 2.10978, "35380": 2.10702, "35385": 2.11725, "35390": 2.11493, "35395": 2.06316, "35400": 2.12396, "35405": 2.09865, "35410": 2.10726, "35415": 2.10505, "35420": 2.08712, "35425": 2.07058, "35430": 2.12129, "35435": 2.1155, "35440": 2.12412, "35445": 2.12109, "35450": 2.05961, "35455": 2.08684, "35460": 2.11595, "35465": 2.12714, "35470": 2.08577, "35475": 2.12402, "35480": 2.07261, "35485": 2.07787, "35490": 2.058, "35495": 2.09701, "35500": 2.07056, "35505": 2.09828, "35510": 2.08533, "35515": 2.05012, "35520": 2.09105, "35525": 2.08986, "35530": 2.07194, "35535": 2.07526, "35540": 2.1072, "35545": 2.10088, "35550": 2.07925, "35555": 2.07235, "35560": 2.08881, "35565": 2.11772, "35570": 2.09606, "35575": 2.13358, "35580": 2.13913, "35585": 2.11157, "35590": 2.12655, "35595": 2.06864, "35600": 2.0948, "35605": 2.12215, "35610": 2.14698, "35615": 2.1172, "35620": 2.09248, "35625": 2.08458, "35630": 2.08219, "35635": 2.09264, "35640": 2.10049, "35645": 2.08739, "35650": 2.06828, "35655": 2.10565, "35660": 2.10811, "35665": 2.11238, "35670": 2.12465, "35675": 2.06693, "35680": 2.09116, "35685": 2.07861, "35690": 2.10741, "35695": 2.12462, "35700": 2.07101, "35705": 2.13873, "35710": 2.05273, "35715": 2.08432, "35720": 2.10454, "35725": 2.11366, "35730": 2.06283, "35735": 2.09544, "35740": 2.10947, "35745": 2.10945, "35750": 2.0923, "35755": 2.09204, "35760": 2.09543, "35765": 2.0914, "35770": 2.11161, "35775": 2.07999, "35780": 2.05547, "35785": 2.06976, "35790": 2.03707, "35795": 2.0741, "35800": 2.08033, "35805": 2.08753, "35810": 2.11108, "35815": 2.05889, "35820": 2.08277, "35825": 2.05875, "35830": 2.05971, "35835": 2.09395, "35840": 2.08588, "35845": 2.10453, "35850": 2.0756, "35855": 2.10088, "35860": 2.08693, "35865": 2.10252, "35870": 2.07877, "35875": 2.11244, "35880": 2.09192, "35885": 2.07219, "35890": 2.12602, "35895": 2.12657, "35900": 2.08912, "35905": 2.06503, "35910": 2.09653, "35915": 2.10196, "35920": 2.07577, "35925": 2.08003, "35930": 2.11027, "35935": 2.12897, "35940": 2.09094, "35945": 2.0967, "35950": 2.10101, "35955": 2.07936, "35960": 2.07862, "35965": 2.08452, "35970": 2.10977, "35975": 2.12473, "35980": 2.09398, "35985": 2.06126, "35990": 2.09436, "35995": 2.11542, "36000": 2.102, "36005": 2.07052, "36010": 2.05533, "36015": 2.08842, "36020": 2.07246, "36025": 2.08185, "36030": 2.12816, "36035": 2.08894, "36040": 2.11374, "36045": 2.06445, "36050": 2.10056, "36055": 2.13195, "36060": 2.15587, "36065": 2.09307, "36070": 2.12567, "36075": 2.07958, "36080": 2.12547, "36085": 2.08536, "36090": 2.08361, "36095": 2.10607, "36100": 2.09744, "36105": 2.10112, "36110": 2.08051, "36115": 2.06533, "36120": 2.09146, "36125": 2.08192, "36130": 2.07482, "36135": 2.08528, "36140": 2.07963, "36145": 2.11411, "36150": 2.11546, "36155": 2.124, "36160": 2.11495, "36165": 2.0924, "36170": 2.09858, "36175": 2.09507, "36180": 2.05922, "36185": 2.0338, "36190": 2.0868, "36195": 2.11049, "36200": 2.12883, "36205": 2.05412, "36210": 2.07974, "36215": 2.08662, "36220": 2.10764, "36225": 2.08042, "36230": 2.0997, "36235": 2.08638, "36240": 2.09386, "36245": 2.10456, "36250": 2.10916, "36255": 2.08417, "36260": 2.08097, "36265": 2.08466, "36270": 2.06672, "36275": 2.10806, "36280": 2.07923, "36285": 2.09507, "36290": 2.12032, "36295": 2.07451, "36300": 2.10906, "36305": 2.10881, "36310": 2.0702, "36315": 2.13417, "36320": 2.15467, "36325": 2.13189, "36330": 2.10005, "36335": 2.11413, "36340": 2.09377, "36345": 2.11033, "36350": 2.06448, "36355": 2.05354, "36360": 2.07032, "36365": 2.12933, "36370": 2.13009, "36375": 2.08118, "36380": 2.08644, "36385": 2.16759, "36390": 2.09983, "36395": 2.10119, "36400": 2.09423, "36405": 2.0692, "36410": 2.11604, "36415": 2.12818, "36420": 2.11986, "36425": 2.08431, "36430": 2.04972, "36435": 2.0824, "36440": 2.11589, "36445": 2.10209, "36450": 2.12913, "36455": 2.06009, "36460": 2.095, "36465": 2.10567, "36470": 2.07621, "36475": 2.06017, "36480": 2.09969, "36485": 2.09891, "36490": 2.09785, "36495": 2.09599, "36500": 2.0728, "36505": 2.10113, "36510": 2.11515, "36515": 2.0871, "36520": 2.13985, "36525": 2.0567, "36530": 2.09188, "36535": 2.08838, "36540": 2.07431, "36545": 2.12188, "36550": 2.09101, "36555": 2.09812, "36560": 2.0543, "36565": 2.07505, "36570": 2.11947, "36575": 2.12386, "36580": 2.12471, "36585": 2.07705, "36590": 2.07162, "36595": 2.11311, "36600": 2.05097, "36605": 2.12163, "36610": 2.10135, "36615": 2.09001, "36620": 2.14899, "36625": 2.11206, "36630": 2.09047, "36635": 2.1079, "36640": 2.11363, "36645": 2.08508, "36650": 2.09756, "36655": 2.08664, "36660": 2.14177, "36665": 2.0824, "36670": 2.11954, "36675": 2.09015, "36680": 2.12565, "36685": 2.09415, "36690": 2.07539, "36695": 2.0752, "36700": 2.13097, "36705": 2.10522, "36710": 2.06609, "36715": 2.05289, "36720": 2.08866, "36725": 2.09443, "36730": 2.11613, "36735": 2.0832, "36740": 2.11027, "36745": 2.09641, "36750": 2.09957, "36755": 2.11912, "36760": 2.09938, "36765": 2.06044, "36770": 2.10918, "36775": 2.04965, "36780": 2.12208, "36785": 2.08617, "36790": 2.08767, "36795": 2.12076, "36800": 2.09889, "36805": 2.10805, "36810": 2.10481, "36815": 2.04431, "36820": 2.08119, "36825": 2.0693, "36830": 2.09732, "36835": 2.08395, "36840": 2.07861, "36845": 2.07389, "36850": 2.1211, "36855": 2.05078, "36860": 2.04631, "36865": 2.07741, "36870": 2.12208, "36875": 2.05507, "36880": 2.07599, "36885": 2.09953, "36890": 2.1045, "36895": 2.07185, "36900": 2.10147, "36905": 2.06942, "36910": 2.09963, "36915": 2.09582, "36920": 2.07112, "36925": 2.07131, "36930": 2.10051, "36935": 2.08596, "36940": 2.09418, "36945": 2.09334, "36950": 2.08834, "36955": 2.08517, "36960": 2.05973, "36965": 2.11202, "36970": 2.09733, "36975": 2.10514, "36980": 2.09629, "36985": 2.071, "36990": 2.07118, "36995": 2.11015, "37000": 2.03395, "37005": 2.09048, "37010": 2.06983, "37015": 2.10283, "37020": 2.11512, "37025": 2.09295, "37030": 2.0614, "37035": 2.06391, "37040": 2.08553, "37045": 2.12969, "37050": 2.09822, "37055": 2.08712, "37060": 2.08554, "37065": 2.10148, "37070": 2.11795, "37075": 2.12042, "37080": 2.11589, "37085": 2.08889, "37090": 2.12195, "37095": 2.06881, "37100": 2.12819, "37105": 2.05448, "37110": 2.05643, "37115": 2.04907, "37120": 2.08441, "37125": 2.11994, "37130": 2.08495, "37135": 2.11715, "37140": 2.0619, "37145": 2.10497, "37150": 2.10348, "37155": 2.09886, "37160": 2.12798, "37165": 2.06747, "37170": 2.09628, "37175": 2.08459, "37180": 2.10079, "37185": 2.09803, "37190": 2.05804, "37195": 2.0913, "37200": 2.09778, "37205": 2.09985, "37210": 2.10107, "37215": 2.06295, "37220": 2.05352, "37225": 2.11138, "37230": 2.0751, "37235": 2.09015, "37240": 2.06877, "37245": 2.06297, "37250": 2.07283, "37255": 2.10961, "37260": 2.0833, "37265": 2.06918, "37270": 2.06311, "37275": 2.05248, "37280": 2.09261, "37285": 2.08568, "37290": 2.10331, "37295": 2.08369, "37300": 2.09031, "37305": 2.10817, "37310": 2.07758, "37315": 2.12163, "37320": 2.11667, "37325": 2.11678, "37330": 2.09061, "37335": 2.05228, "37340": 2.0496, "37345": 2.04912, "37350": 2.11878, "37355": 2.05633, "37360": 2.12443, "37365": 2.08675, "37370": 2.1255, "37375": 2.08017, "37380": 2.07092, "37385": 2.06677, "37390": 2.06812, "37395": 2.09342, "37400": 2.12142, "37405": 2.08126, "37410": 2.08877, "37415": 2.08358, "37420": 2.10624, "37425": 2.10262, "37430": 2.10426, "37435": 2.06065, "37440": 2.07371, "37445": 2.09338, "37450": 2.09672, "37455": 2.11363, "37460": 2.12924, "37465": 2.08672, "37470": 2.12388, "37475": 2.0842, "37480": 2.05164, "37485": 2.04041, "37490": 2.12041, "37495": 2.10137, "37500": 2.11181, "37505": 2.10278, "37510": 2.08279, "37515": 2.09086, "37520": 2.09827, "37525": 2.09136, "37530": 2.09984, "37535": 2.02713, "37540": 2.09676, "37545": 2.0717, "37550": 2.05952, "37555": 2.11213, "37560": 2.10546, "37565": 2.09945, "37570": 2.06375, "37575": 2.08901, "37580": 2.12071, "37585": 2.09673, "37590": 2.05072, "37595": 2.0952, "37600": 2.08843, "37605": 2.10789, "37610": 2.07118, "37615": 2.09867, "37620": 2.09725, "37625": 2.10319, "37630": 2.13936, "37635": 2.09831, "37640": 2.06513, "37645": 2.06036, "37650": 2.08233, "37655": 2.159, "37660": 2.10217, "37665": 2.06974, "37670": 2.1032, "37675": 2.07991, "37680": 2.08002, "37685": 2.13654, "37690": 2.11206, "37695": 2.06561, "37700": 2.08739, "37705": 2.07349, "37710": 2.11962, "37715": 2.09359, "37720": 2.07827, "37725": 2.05744, "37730": 2.10843, "37735": 2.10884, "37740": 2.13064, "37745": 2.10624, "37750": 2.09441, "37755": 2.10372, "37760": 2.08719, "37765": 2.11537, "37770": 2.06275, "37775": 2.10824, "37780": 2.0915, "37785": 2.0954, "37790": 2.11921, "37795": 2.095, "37800": 2.10358, "37805": 2.09113, "37810": 2.08739, "37815": 2.0741, "37820": 2.0723, "37825": 2.12048, "37830": 2.12509, "37835": 2.09918, "37840": 2.06302, "37845": 2.15357, "37850": 2.13026, "37855": 2.1045, "37860": 2.1289, "37865": 2.12293, "37870": 2.03676, "37875": 2.07938, "37880": 2.1096, "37885": 2.05984, "37890": 2.08209, "37895": 2.09736, "37900": 2.07597, "37905": 2.06911, "37910": 2.05438, "37915": 2.102, "37920": 2.13212, "37925": 2.09664, "37930": 2.08372, "37935": 2.09543, "37940": 2.08084, "37945": 2.0768, "37950": 2.11467, "37955": 2.09864, "37960": 2.12164, "37965": 2.1058, "37970": 2.0202, "37975": 2.07126, "37980": 2.09505, "37985": 2.09147, "37990": 2.07558, "37995": 2.0693, "38000": 2.11514, "38005": 2.10096, "38010": 2.09746, "38015": 2.0647, "38020": 2.07868, "38025": 2.09108, "38030": 2.07823, "38035": 2.08881, "38040": 2.09444, "38045": 2.07901, "38050": 2.07054, "38055": 2.12112, "38060": 2.10219, "38065": 2.11204, "38070": 2.10501, "38075": 2.07662, "38080": 2.08269, "38085": 2.09708, "38090": 2.06792, "38095": 2.10759, "38100": 2.06204, "38105": 2.10429, "38110": 2.0867, "38115": 2.09807, "38120": 2.07858, "38125": 2.11968, "38130": 2.06147, "38135": 2.1184, "38140": 2.05282, "38145": 2.07041, "38150": 2.10034, "38155": 2.09244, "38160": 2.10652, "38165": 2.05571, "38170": 2.04143, "38175": 2.06936, "38180": 2.08872, "38185": 2.11554, "38190": 2.09487, "38195": 2.08269, "38200": 2.07855, "38205": 2.10052, "38210": 2.08363, "38215": 2.11573, "38220": 2.10872, "38225": 2.12247, "38230": 2.1389, "38235": 2.07276, "38240": 2.10318, "38245": 2.09882, "38250": 2.06845, "38255": 2.06858, "38260": 2.10249, "38265": 2.131, "38270": 2.10777, "38275": 2.10059, "38280": 2.05695, "38285": 2.08349, "38290": 2.08974, "38295": 2.06335, "38300": 2.04887, "38305": 2.04098, "38310": 2.06398, "38315": 2.09889, "38320": 2.06917, "38325": 2.05298, "38330": 2.10103, "38335": 2.09816, "38340": 2.05681, "38345": 2.08549, "38350": 2.09091, "38355": 2.08255, "38360": 2.04888, "38365": 2.07158, "38370": 2.08803, "38375": 2.12568, "38380": 2.09552, "38385": 2.06105, "38390": 2.11064, "38395": 2.09161, "38400": 2.09523, "38405": 2.08153, "38410": 2.09081, "38415": 2.08842, "38420": 2.09292, "38425": 2.08922, "38430": 2.05419, "38435": 2.116, "38440": 2.09524, "38445": 2.05519, "38450": 2.13498, "38455": 2.09963, "38460": 2.05761, "38465": 2.06018, "38470": 2.08332, "38475": 2.09617, "38480": 2.09119, "38485": 2.09743, "38490": 2.09592, "38495": 2.11005, "38500": 2.09297, "38505": 2.11431, "38510": 2.10242, "38515": 2.10362, "38520": 2.09415, "38525": 2.09361, "38530": 2.09023, "38535": 2.09323, "38540": 2.09296, "38545": 2.07905, "38550": 2.06576, "38555": 2.04123, "38560": 2.08034, "38565": 2.07321, "38570": 2.07297, "38575": 2.09381, "38580": 2.11659, "38585": 2.10611, "38590": 2.0877, "38595": 2.07125, "38600": 2.10542, "38605": 2.07209, "38610": 2.09398, "38615": 2.09017, "38620": 2.05116, "38625": 2.05793, "38630": 2.05061, "38635": 2.09055, "38640": 2.13453, "38645": 2.07489, "38650": 2.03256, "38655": 2.10752, "38660": 2.07211, "38665": 2.1191, "38670": 2.13078, "38675": 2.06917, "38680": 2.09575, "38685": 2.06521, "38690": 2.1115, "38695": 2.09825, "38700": 2.0785, "38705": 2.08743, "38710": 2.06012, "38715": 2.07059, "38720": 2.0985, "38725": 2.05988, "38730": 2.07216, "38735": 2.05308, "38740": 2.07095, "38745": 2.11061, "38750": 2.07355, "38755": 2.06261, "38760": 2.10056, "38765": 2.0692, "38770": 2.07549, "38775": 2.07956, "38780": 2.02416, "38785": 2.10143, "38790": 2.04397, "38795": 2.10825, "38800": 2.09392, "38805": 2.08697, "38810": 2.08494, "38815": 2.07825, "38820": 2.07022, "38825": 2.10024, "38830": 2.11305, "38835": 2.08013, "38840": 2.11422, "38845": 2.09511, "38850": 2.07135, "38855": 2.06044, "38860": 2.07539, "38865": 2.10991, "38870": 2.09275, "38875": 2.07534, "38880": 2.13043, "38885": 2.07868, "38890": 2.11021, "38895": 2.08278, "38900": 2.07573, "38905": 2.102, "38910": 2.08286, "38915": 2.08879, "38920": 2.05566, "38925": 2.06984, "38930": 2.10415, "38935": 2.08289, "38940": 2.06409, "38945": 2.06681, "38950": 2.09146, "38955": 2.09394, "38960": 2.05285, "38965": 2.08354, "38970": 2.09612, "38975": 2.08736, "38980": 2.0678, "38985": 2.07459, "38990": 2.09032, "38995": 2.07099, "39000": 2.11591, "39005": 2.11856, "39010": 2.11866, "39015": 2.07779, "39020": 2.1088, "39025": 2.11468, "39030": 2.07042, "39035": 2.08428, "39040": 2.0799, "39045": 2.06057, "39050": 2.09393, "39055": 2.09019, "39060": 2.0864, "39065": 2.09025, "39070": 2.09947, "39075": 2.08945, "39080": 2.06875, "39085": 2.12219, "39090": 2.09082, "39095": 2.09789, "39100": 2.1004, "39105": 2.06891, "39110": 2.09547, "39115": 2.06829, "39120": 2.11404, "39125": 2.07915, "39130": 2.05507, "39135": 2.07137, "39140": 2.09747, "39145": 2.16132, "39150": 2.0814, "39155": 2.07108, "39160": 2.0818, "39165": 2.12183, "39170": 2.1254, "39175": 2.07019, "39180": 2.10666, "39185": 2.09454, "39190": 2.10776, "39195": 2.12271, "39200": 2.07303, "39205": 2.12653, "39210": 2.11503, "39215": 2.08544, "39220": 2.12768, "39225": 2.09855, "39230": 2.07157, "39235": 2.0803, "39240": 2.11129, "39245": 2.07938, "39250": 2.12113, "39255": 2.08644, "39260": 2.09734, "39265": 2.13087, "39270": 2.10951, "39275": 2.04959, "39280": 2.08553, "39285": 2.06036, "39290": 2.08901, "39295": 2.12104, "39300": 2.05376, "39305": 2.08676, "39310": 2.11474, "39315": 2.08206, "39320": 2.08239, "39325": 2.0304, "39330": 2.09093, "39335": 2.10853, "39340": 2.09305, "39345": 2.09222, "39350": 2.05919, "39355": 2.08728, "39360": 2.03006, "39365": 2.0719, "39370": 2.07749, "39375": 2.09389, "39380": 2.10488, "39385": 2.08912, "39390": 2.05984, "39395": 2.05988, "39400": 2.0792, "39405": 2.0546, "39410": 2.09258, "39415": 2.07306, "39420": 2.05503, "39425": 2.0626, "39430": 2.04211, "39435": 2.09343, "39440": 2.07182, "39445": 2.06518, "39450": 2.11337, "39455": 2.05176, "39460": 2.08183, "39465": 2.08067, "39470": 2.05332, "39475": 2.08893, "39480": 2.11265, "39485": 2.1345, "39490": 2.06928, "39495": 2.10826, "39500": 2.09765, "39505": 2.10927, "39510": 2.09712, "39515": 2.11634, "39520": 2.0764, "39525": 2.05669, "39530": 2.11885, "39535": 2.1335, "39540": 2.07634, "39545": 2.10468, "39550": 2.075, "39555": 2.0844, "39560": 2.0612, "39565": 2.08645, "39570": 2.05478, "39575": 2.05618, "39580": 2.06468, "39585": 2.07092, "39590": 2.10268, "39595": 2.09202, "39600": 2.11246, "39605": 2.08821, "39610": 2.08863, "39615": 2.04158, "39620": 2.12754, "39625": 2.10047, "39630": 2.1034, "39635": 2.10658, "39640": 2.05464, "39645": 2.0796, "39650": 2.06202, "39655": 2.07663, "39660": 2.06821, "39665": 2.07446, "39670": 2.08563, "39675": 2.07029, "39680": 2.05594, "39685": 2.06547, "39690": 2.10558, "39695": 2.10097, "39700": 2.10419, "39705": 2.07744, "39710": 2.09285, "39715": 2.02733, "39720": 2.1281, "39725": 2.07969, "39730": 2.08602, "39735": 2.09633, "39740": 2.10742, "39745": 2.07064, "39750": 2.07869, "39755": 2.09509, "39760": 2.07742, "39765": 2.12828, "39770": 2.09787, "39775": 2.07142, "39780": 2.0989, "39785": 2.06298, "39790": 2.07209, "39795": 2.09344, "39800": 2.08277, "39805": 2.13139, "39810": 2.07258, "39815": 2.08714, "39820": 2.03704, "39825": 2.08009, "39830": 2.0974, "39835": 2.08669, "39840": 2.1166, "39845": 2.08995, "39850": 2.06129, "39855": 2.07918, "39860": 2.08267, "39865": 2.07595, "39870": 2.0747, "39875": 2.09346, "39880": 2.04746, "39885": 2.09641, "39890": 2.07759, "39895": 2.06508, "39900": 2.06263, "39905": 2.09613, "39910": 2.08549, "39915": 2.05221, "39920": 2.06911, "39925": 2.12812, "39930": 2.10191, "39935": 2.07537, "39940": 2.11994, "39945": 2.10064, "39950": 2.03709, "39955": 2.10375, "39960": 2.10853, "39965": 2.11321, "39970": 2.04792, "39975": 2.07755, "39980": 2.09464, "39985": 2.06344, "39990": 2.08519, "39995": 2.0968, "40000": 2.07598, "40005": 2.06669, "40010": 2.06762, "40015": 2.11133, "40020": 2.07859, "40025": 2.09003, "40030": 2.09313, "40035": 2.09027, "40040": 2.07738, "40045": 2.07067, "40050": 2.10132, "40055": 2.11723, "40060": 2.11654, "40065": 2.11411, "40070": 2.07581, "40075": 2.06518, "40080": 2.09595, "40085": 2.08309, "40090": 2.09684, "40095": 2.09085, "40100": 2.07392, "40105": 2.02869, "40110": 2.08184, "40115": 2.07504, "40120": 2.05945, "40125": 2.07037, "40130": 2.06035, "40135": 2.08935, "40140": 2.0963, "40145": 2.06552, "40150": 2.12054, "40155": 2.06317, "40160": 2.06967, "40165": 2.08994, "40170": 2.06556, "40175": 2.12342, "40180": 2.10745, "40185": 2.06069, "40190": 2.08742, "40195": 2.04933, "40200": 2.05519, "40205": 2.08235, "40210": 2.07776, "40215": 2.09106, "40220": 2.09466, "40225": 2.05362, "40230": 2.12348, "40235": 2.06994, "40240": 2.06768, "40245": 2.08561, "40250": 2.1166, "40255": 2.12005, "40260": 2.08076, "40265": 2.04945, "40270": 2.0595, "40275": 2.09582, "40280": 2.07988, "40285": 2.0822, "40290": 2.04995, "40295": 2.0703, "40300": 2.0593, "40305": 2.10447, "40310": 2.12243, "40315": 2.08382, "40320": 2.11613, "40325": 2.09056, "40330": 2.1005, "40335": 2.10787, "40340": 2.09631, "40345": 2.08738, "40350": 2.06872, "40355": 2.09768, "40360": 2.09242, "40365": 2.06826, "40370": 2.113, "40375": 2.09149, "40380": 2.1007, "40385": 2.06867, "40390": 2.10046, "40395": 2.08482, "40400": 2.04352, "40405": 2.05932, "40410": 2.07915, "40415": 2.06274, "40420": 2.0556, "40425": 2.05932, "40430": 2.0997, "40435": 2.04834, "40440": 2.0518, "40445": 2.11098, "40450": 2.0606, "40455": 2.02826, "40460": 2.055, "40465": 2.08392, "40470": 2.07458, "40475": 2.08511, "40480": 2.07123, "40485": 2.06448, "40490": 2.13174, "40495": 2.07076, "40500": 2.0554, "40505": 2.08226, "40510": 2.02297, "40515": 2.08148, "40520": 2.10112, "40525": 2.11637, "40530": 2.08654, "40535": 2.04429, "40540": 2.08334, "40545": 2.10288, "40550": 2.0537, "40555": 2.08765, "40560": 2.11387, "40565": 2.08366, "40570": 2.05111, "40575": 2.05516, "40580": 2.09251, "40585": 2.05966, "40590": 2.05954, "40595": 2.08587, "40600": 2.05714, "40605": 2.05171, "40610": 2.07098, "40615": 2.09277, "40620": 2.04694, "40625": 2.06863, "40630": 2.11282, "40635": 2.09915, "40640": 2.07473, "40645": 2.06399, "40650": 2.09459, "40655": 2.11717, "40660": 2.05898, "40665": 2.06842, "40670": 2.0897, "40675": 2.06835, "40680": 2.08819, "40685": 2.08624, "40690": 2.08434, "40695": 2.07836, "40700": 2.08157, "40705": 2.06465, "40710": 2.06167, "40715": 2.12903, "40720": 2.05956, "40725": 2.02447, "40730": 2.06067, "40735": 2.08203, "40740": 2.11228, "40745": 2.07815, "40750": 2.07557, "40755": 2.07739, "40760": 2.10721, "40765": 2.11608, "40770": 2.03608, "40775": 2.06583, "40780": 2.0477, "40785": 2.08796, "40790": 2.09524, "40795": 2.11195, "40800": 2.05131, "40805": 2.048, "40810": 2.04678, "40815": 2.07926, "40820": 2.1066, "40825": 2.0794, "40830": 2.07432, "40835": 2.07901, "40840": 2.09895, "40845": 2.10201, "40850": 2.06695, "40855": 2.08564, "40860": 2.06682, "40865": 2.09571, "40870": 2.07212, "40875": 2.05167, "40880": 2.06757, "40885": 2.04684, "40890": 2.11095, "40895": 2.09308, "40900": 2.09153, "40905": 2.05962, "40910": 2.09984, "40915": 2.07594, "40920": 2.09059, "40925": 2.09957, "40930": 2.09445, "40935": 2.07788, "40940": 2.05885, "40945": 2.06913, "40950": 2.0788, "40955": 2.11232, "40960": 2.10233, "40965": 2.06574, "40970": 2.07719, "40975": 2.08027, "40980": 2.08134, "40985": 2.0878, "40990": 2.08924, "40995": 2.10265, "41000": 2.07823, "41005": 2.0596, "41010": 2.07412, "41015": 2.11677, "41020": 2.06082, "41025": 2.12341, "41030": 2.03974, "41035": 2.07785, "41040": 2.07629, "41045": 2.06904, "41050": 2.07617, "41055": 2.11225, "41060": 2.09071, "41065": 2.09393, "41070": 2.1344, "41075": 2.10499, "41080": 2.11207, "41085": 2.06027, "41090": 2.08722, "41095": 2.07169, "41100": 2.06923, "41105": 2.09798, "41110": 2.07929, "41115": 2.08515, "41120": 2.0973, "41125": 2.08921, "41130": 2.04854, "41135": 2.06737, "41140": 2.07849, "41145": 2.04955, "41150": 2.05215, "41155": 2.07424, "41160": 2.10558, "41165": 2.06935, "41170": 2.09618, "41175": 2.08045, "41180": 2.07325, "41185": 2.05514, "41190": 2.03687, "41195": 2.045, "41200": 2.01604, "41205": 2.02492, "41210": 2.09299, "41215": 2.12645, "41220": 2.07618, "41225": 2.11142, "41230": 2.02044, "41235": 2.08597, "41240": 2.07908, "41245": 2.06375, "41250": 2.07991, "41255": 2.0603, "41260": 2.07751, "41265": 2.05258, "41270": 2.09646, "41275": 2.08358, "41280": 2.08534, "41285": 2.11899, "41290": 2.12651, "41295": 2.04605, "41300": 2.1679, "41305": 2.03775, "41310": 2.08049, "41315": 2.11612, "41320": 2.10153, "41325": 2.05742, "41330": 2.07668, "41335": 2.09575, "41340": 2.12537, "41345": 2.10591, "41350": 2.08164, "41355": 2.08095, "41360": 2.06346, "41365": 2.11564, "41370": 2.08656, "41375": 2.07988, "41380": 2.041, "41385": 2.06005, "41390": 2.03177, "41395": 2.12907, "41400": 2.12015, "41405": 2.13508, "41410": 2.06895, "41415": 2.07188, "41420": 2.05893, "41425": 2.08625, "41430": 2.08782, "41435": 2.10045, "41440": 2.08006, "41445": 2.08405, "41450": 2.07287, "41455": 2.05463, "41460": 2.08699, "41465": 2.0808, "41470": 2.11066, "41475": 2.06345, "41480": 2.06885, "41485": 2.07889, "41490": 2.08852, "41495": 2.084, "41500": 2.09197, "41505": 2.04732, "41510": 2.1074, "41515": 2.08277, "41520": 2.09698, "41525": 2.09184, "41530": 2.03521, "41535": 2.06913, "41540": 2.08471, "41545": 2.08445, "41550": 2.0526, "41555": 2.06368, "41560": 2.10278, "41565": 2.0463, "41570": 2.06114, "41575": 2.09501, "41580": 2.09391, "41585": 2.11227, "41590": 2.06933, "41595": 2.07611, "41600": 2.05859, "41605": 2.0958, "41610": 2.04332, "41615": 2.11264, "41620": 2.08617, "41625": 2.07282, "41630": 2.08465, "41635": 2.11108, "41640": 2.08424, "41645": 2.1124, "41650": 2.05112, "41655": 2.09133, "41660": 2.09114, "41665": 2.07188, "41670": 2.05394, "41675": 2.07047, "41680": 2.04894, "41685": 2.10993, "41690": 2.08376, "41695": 2.04596, "41700": 2.08593, "41705": 2.09004, "41710": 2.12083, "41715": 2.05163, "41720": 2.06176, "41725": 2.05564, "41730": 2.0946, "41735": 2.09974, "41740": 2.09473, "41745": 2.08923, "41750": 2.11378, "41755": 2.07026, "41760": 2.06715, "41765": 2.08375, "41770": 2.09416, "41775": 2.08797, "41780": 2.08579, "41785": 2.06633, "41790": 2.07255, "41795": 2.1261, "41800": 2.09147, "41805": 2.08966, "41810": 2.09285, "41815": 2.12528, "41820": 2.11524, "41825": 2.03648, "41830": 2.11093, "41835": 2.06161, "41840": 2.05191, "41845": 2.10427, "41850": 2.03755, "41855": 2.11233, "41860": 2.06087, "41865": 2.08375, "41870": 2.0941, "41875": 2.11168, "41880": 2.08503, "41885": 2.09036, "41890": 2.10566, "41895": 2.07177, "41900": 2.13085, "41905": 2.07138, "41910": 2.05043, "41915": 2.10604, "41920": 2.09618, "41925": 2.05532, "41930": 2.10267, "41935": 2.06714, "41940": 2.11021, "41945": 2.06813, "41950": 2.07693, "41955": 2.06533, "41960": 2.13004, "41965": 2.04598, "41970": 2.12203, "41975": 2.05434, "41980": 2.09149, "41985": 2.07473, "41990": 2.05851, "41995": 2.0853, "42000": 2.07745, "42005": 2.11645, "42010": 2.08482, "42015": 2.08773, "42020": 2.08481, "42025": 2.07162, "42030": 2.06911, "42035": 2.10803, "42040": 2.11135, "42045": 2.06451, "42050": 2.04774, "42055": 2.0898, "42060": 2.08379, "42065": 2.05151, "42070": 2.09357, "42075": 2.09756, "42080": 2.08601, "42085": 2.04513, "42090": 2.08244, "42095": 2.03133, "42100": 2.04421, "42105": 2.07383, "42110": 2.09649, "42115": 2.08112, "42120": 2.06562, "42125": 2.05347, "42130": 2.09322, "42135": 2.06601, "42140": 2.06334, "42145": 2.06046, "42150": 2.07405, "42155": 2.07181, "42160": 2.0973, "42165": 2.03611, "42170": 2.09096, "42175": 2.065, "42180": 2.10502, "42185": 2.08921, "42190": 2.09916, "42195": 2.06385, "42200": 2.06467, "42205": 2.08248, "42210": 2.08109, "42215": 2.07228, "42220": 2.03059, "42225": 2.08053, "42230": 2.08268, "42235": 2.09371, "42240": 2.07769, "42245": 2.10692, "42250": 2.12252, "42255": 2.07559, "42260": 2.09902, "42265": 2.08653, "42270": 2.11002, "42275": 2.0754, "42280": 2.0852, "42285": 2.08585, "42290": 2.0553, "42295": 2.04859, "42300": 2.03792, "42305": 2.05811, "42310": 2.08899, "42315": 2.06921, "42320": 2.0967, "42325": 2.08009, "42330": 2.0802, "42335": 2.07187, "42340": 2.11383, "42345": 2.0748, "42350": 2.08652, "42355": 2.02999, "42360": 2.04997, "42365": 2.08628, "42370": 2.04729, "42375": 2.09131, "42380": 2.08305, "42385": 2.05726, "42390": 2.05214, "42395": 2.11801, "42400": 2.10421, "42405": 2.10956, "42410": 2.08154, "42415": 2.05111, "42420": 2.10066, "42425": 2.11545, "42430": 2.03661, "42435": 2.07581, "42440": 2.0855, "42445": 2.09954, "42450": 2.09421, "42455": 2.11466, "42460": 2.10356, "42465": 2.10244, "42470": 2.05914, "42475": 2.05707, "42480": 2.05233, "42485": 2.07164, "42490": 2.0936, "42495": 2.05485, "42500": 2.09639, "42505": 2.09257, "42510": 2.13385, "42515": 2.07623, "42520": 2.0302, "42525": 2.07511, "42530": 2.09226, "42535": 2.10523, "42540": 2.06865, "42545": 2.10463, "42550": 2.08405, "42555": 2.06337, "42560": 2.03561, "42565": 2.09725, "42570": 2.05483, "42575": 2.03215, "42580": 2.06504, "42585": 2.09349, "42590": 2.06688, "42595": 2.06151, "42600": 2.06135, "42605": 2.01726, "42610": 2.06907, "42615": 2.09402, "42620": 2.10266, "42625": 2.08622, "42630": 2.06404, "42635": 2.07812, "42640": 2.08736, "42645": 2.05926, "42650": 2.06807, "42655": 2.09289, "42660": 2.0605, "42665": 2.04579, "42670": 2.10882, "42675": 2.03012, "42680": 2.06553, "42685": 2.07435, "42690": 2.05562, "42695": 2.08501, "42700": 2.08268, "42705": 2.04085, "42710": 2.11298, "42715": 2.04576, "42720": 2.08247, "42725": 2.05381, "42730": 2.03585, "42735": 2.06311, "42740": 2.08544, "42745": 2.03915, "42750": 2.0826, "42755": 2.02494, "42760": 2.07545, "42765": 2.07654, "42770": 2.09769, "42775": 2.10325, "42780": 2.0579, "42785": 2.10987, "42790": 2.04142, "42795": 2.06121, "42800": 2.11059, "42805": 2.08755, "42810": 2.08042, "42815": 2.12347, "42820": 2.08305, "42825": 2.07942, "42830": 2.08019, "42835": 2.11443, "42840": 2.03602, "42845": 2.09416, "42850": 2.10299, "42855": 2.1051, "42860": 2.08549, "42865": 2.10539, "42870": 2.05888, "42875": 2.0691, "42880": 2.07253, "42885": 2.0581, "42890": 2.09897, "42895": 2.08018, "42900": 2.07128, "42905": 2.09341, "42910": 2.10052, "42915": 2.04175, "42920": 2.05517, "42925": 2.10639, "42930": 2.07996, "42935": 2.09872, "42940": 2.05651, "42945": 2.05328, "42950": 2.07173, "42955": 2.04902, "42960": 2.09954, "42965": 2.11906, "42970": 2.0378, "42975": 2.05323, "42980": 2.05428, "42985": 2.08477, "42990": 2.02262, "42995": 2.06857, "43000": 2.04551, "43005": 2.09504, "43010": 2.09529, "43015": 2.05414, "43020": 2.05282, "43025": 2.07468, "43030": 2.04959, "43035": 2.07698, "43040": 2.09689, "43045": 2.03439, "43050": 2.0569, "43055": 2.07045, "43060": 2.08785, "43065": 2.03946, "43070": 2.09746, "43075": 2.07742, "43080": 2.03207, "43085": 2.09306, "43090": 2.12147, "43095": 2.03324, "43100": 2.082, "43105": 2.07021, "43110": 2.10208, "43115": 2.07827, "43120": 2.09668, "43125": 2.09106, "43130": 2.1439, "43135": 2.07858, "43140": 2.09133, "43145": 2.11079, "43150": 2.1029, "43155": 2.06816, "43160": 2.05251, "43165": 2.06718, "43170": 2.12075, "43175": 2.07839, "43180": 2.09095, "43185": 2.08232, "43190": 2.10971, "43195": 2.11642, "43200": 2.09344, "43205": 2.00265, "43210": 2.08436, "43215": 2.09327, "43220": 2.07584, "43225": 2.13532, "43230": 2.03969, "43235": 2.11587, "43240": 2.08258, "43245": 2.06929, "43250": 2.12348, "43255": 2.05308, "43260": 2.11874, "43265": 2.04932, "43270": 2.09636, "43275": 2.11107, "43280": 2.06974, "43285": 2.0739, "43290": 2.09387, "43295": 2.08249, "43300": 2.0999, "43305": 2.14941, "43310": 2.09866, "43315": 2.08387, "43320": 2.08879, "43325": 2.07017, "43330": 2.08032, "43335": 2.06821, "43340": 2.07191, "43345": 2.03543, "43350": 2.10933, "43355": 2.09981, "43360": 2.07564, "43365": 2.05151, "43370": 2.08762, "43375": 2.10163, "43380": 2.07536, "43385": 2.07659, "43390": 2.09805, "43395": 2.09546, "43400": 2.07032, "43405": 2.04674, "43410": 2.08056, "43415": 2.04925, "43420": 2.07408, "43425": 2.06632, "43430": 2.07127, "43435": 2.08548, "43440": 2.07775, "43445": 2.09133, "43450": 2.03665, "43455": 2.10806, "43460": 2.1155, "43465": 2.09662, "43470": 2.08632, "43475": 2.10184, "43480": 2.06991, "43485": 2.0822, "43490": 2.09798, "43495": 2.08573, "43500": 2.06147, "43505": 2.06674, "43510": 2.06341, "43515": 2.05646, "43520": 2.09068, "43525": 2.02279, "43530": 2.08126, "43535": 2.08359, "43540": 2.08697, "43545": 2.05681, "43550": 2.07592, "43555": 2.0384, "43560": 2.06266, "43565": 2.09767, "43570": 2.05846, "43575": 2.07565, "43580": 2.08431, "43585": 2.02045, "43590": 2.09229, "43595": 2.05927, "43600": 2.0681, "43605": 2.10216, "43610": 2.09119, "43615": 2.05268, "43620": 2.09569, "43625": 2.10423, "43630": 2.06173, "43635": 2.04722, "43640": 2.03293, "43645": 2.0626, "43650": 2.10186, "43655": 2.09463, "43660": 2.07694, "43665": 2.0592, "43670": 2.04587, "43675": 2.05575, "43680": 2.12638, "43685": 2.03188, "43690": 2.05388, "43695": 2.09752, "43700": 2.07385, "43705": 2.08387, "43710": 2.06777, "43715": 2.05487, "43720": 2.12352, "43725": 2.10096, "43730": 2.05386, "43735": 2.07947, "43740": 2.0426, "43745": 2.04423, "43750": 2.08193, "43755": 2.05315, "43760": 2.08993, "43765": 2.0892, "43770": 2.07081, "43775": 2.0477, "43780": 2.10895, "43785": 2.06024, "43790": 2.09781, "43795": 2.0768, "43800": 2.11698, "43805": 2.09816, "43810": 2.0842, "43815": 2.07611, "43820": 2.04916, "43825": 2.14386, "43830": 2.05366, "43835": 2.09039, "43840": 2.06274, "43845": 2.0901, "43850": 2.07884, "43855": 2.06692, "43860": 2.062, "43865": 2.12182, "43870": 2.07037, "43875": 2.037, "43880": 2.06871, "43885": 2.09811, "43890": 2.02594, "43895": 2.06423, "43900": 2.06317, "43905": 2.04983, "43910": 2.13438, "43915": 2.12489, "43920": 2.03949, "43925": 2.04962, "43930": 2.06144, "43935": 2.0446, "43940": 2.0737, "43945": 2.02602, "43950": 2.10677, "43955": 2.03717, "43960": 2.09965, "43965": 2.06475, "43970": 2.10024, "43975": 2.05584, "43980": 2.03934, "43985": 2.0748, "43990": 2.04113, "43995": 2.08406, "44000": 2.05167, "44005": 2.04129, "44010": 2.10131, "44015": 2.04651, "44020": 2.08422, "44025": 2.10582, "44030": 2.07784, "44035": 2.05603, "44040": 2.00557, "44045": 2.1144, "44050": 2.08797, "44055": 2.10247, "44060": 2.11095, "44065": 2.06043, "44070": 2.08527, "44075": 2.09267, "44080": 2.01558, "44085": 2.07606, "44090": 2.04562, "44095": 2.06689, "44100": 2.1002, "44105": 2.09608, "44110": 2.06272, "44115": 2.07283, "44120": 2.05312, "44125": 2.07869, "44130": 2.09156, "44135": 2.06388, "44140": 2.05258, "44145": 2.05863, "44150": 2.10639, "44155": 2.09636, "44160": 2.09907, "44165": 2.08854, "44170": 2.05598, "44175": 2.07068, "44180": 2.07802, "44185": 2.05264, "44190": 2.09356, "44195": 2.03787, "44200": 2.05239, "44205": 2.08025, "44210": 2.0552, "44215": 2.0666, "44220": 2.07758, "44225": 2.07062, "44230": 2.08358, "44235": 2.05928, "44240": 2.11212, "44245": 2.07133, "44250": 2.07215, "44255": 2.08204, "44260": 2.04095, "44265": 2.05023, "44270": 2.08605, "44275": 2.04372, "44280": 2.07918, "44285": 2.14209, "44290": 2.09671, "44295": 2.11625, "44300": 2.06897, "44305": 2.05673, "44310": 2.05467, "44315": 2.04916, "44320": 2.04699, "44325": 2.07596, "44330": 2.09896, "44335": 2.08601, "44340": 2.08491, "44345": 2.07727, "44350": 2.06428, "44355": 2.09584, "44360": 2.10441, "44365": 2.086, "44370": 2.08505, "44375": 2.07972, "44380": 2.08757, "44385": 2.06737, "44390": 2.08481, "44395": 2.10842, "44400": 2.07047, "44405": 2.05159, "44410": 2.05777, "44415": 2.08471, "44420": 2.07385, "44425": 2.09766, "44430": 2.05886, "44435": 2.0789, "44440": 2.02874, "44445": 2.11742, "44450": 2.05751, "44455": 2.08165, "44460": 2.04634, "44465": 2.06376, "44470": 2.08767, "44475": 2.09187, "44480": 2.06105, "44485": 2.06212, "44490": 2.06674, "44495": 2.09475, "44500": 2.10483, "44505": 2.06623, "44510": 2.06919, "44515": 2.05809, "44520": 2.04244, "44525": 2.03533, "44530": 2.07097, "44535": 2.04529, "44540": 2.10223, "44545": 2.10681, "44550": 2.06005, "44555": 2.10804, "44560": 2.03583, "44565": 2.07956, "44570": 2.0562, "44575": 2.10978, "44580": 2.08123, "44585": 2.03259, "44590": 2.08305, "44595": 2.06671, "44600": 2.08378, "44605": 2.06174, "44610": 2.10677, "44615": 2.05726, "44620": 2.03815, "44625": 2.07695, "44630": 2.0931, "44635": 2.05993, "44640": 2.02784, "44645": 2.05391, "44650": 2.08836, "44655": 2.06502, "44660": 2.09016, "44665": 2.09129, "44670": 2.08925, "44675": 2.09637, "44680": 2.11092, "44685": 2.09405, "44690": 2.05821, "44695": 2.07209, "44700": 2.0659, "44705": 2.05063, "44710": 2.09969, "44715": 2.04001, "44720": 2.06402, "44725": 2.07408, "44730": 2.10239, "44735": 2.04415, "44740": 2.03869, "44745": 2.04436, "44750": 2.1053, "44755": 2.10422, "44760": 2.11747, "44765": 2.07515, "44770": 2.0749, "44775": 2.05686, "44780": 2.04638, "44785": 2.08471, "44790": 2.09941, "44795": 2.07953, "44800": 2.05906, "44805": 2.08358, "44810": 2.0881, "44815": 2.09032, "44820": 2.08504, "44825": 2.12377, "44830": 2.0515, "44835": 2.1082, "44840": 2.09082, "44845": 2.10408, "44850": 2.09631, "44855": 2.08278, "44860": 2.06089, "44865": 2.12209, "44870": 2.07851, "44875": 2.09758, "44880": 2.08143, "44885": 2.10337, "44890": 2.0525, "44895": 2.10096, "44900": 2.04688, "44905": 2.04652, "44910": 2.06184, "44915": 2.07532, "44920": 2.08274, "44925": 2.04524, "44930": 2.07721, "44935": 2.01544, "44940": 2.10058, "44945": 2.07442, "44950": 2.08785, "44955": 2.01365, "44960": 2.13924, "44965": 2.07356, "44970": 2.06457, "44975": 2.09709, "44980": 2.06155, "44985": 2.07095, "44990": 2.10415, "44995": 2.02685, "45000": 2.08803, "45005": 2.09414, "45010": 2.06374, "45015": 2.07078, "45020": 2.06358, "45025": 2.07644, "45030": 2.08074, "45035": 2.07275, "45040": 2.05833, "45045": 2.09141, "45050": 2.10392, "45055": 2.07419, "45060": 2.08088, "45065": 2.08353, "45070": 2.05931, "45075": 2.04871, "45080": 2.0536, "45085": 2.04965, "45090": 2.06306, "45095": 2.08424, "45100": 2.0751, "45105": 2.12644, "45110": 2.07772, "45115": 2.06457, "45120": 2.12742, "45125": 2.08494, "45130": 2.05375, "45135": 2.05258, "45140": 2.05152, "45145": 2.04773, "45150": 2.05496, "45155": 2.06703, "45160": 2.06981, "45165": 2.10175, "45170": 2.08052, "45175": 2.05323, "45180": 2.08656, "45185": 2.07367, "45190": 2.07788, "45195": 2.09178, "45200": 2.08142, "45205": 2.05722, "45210": 2.10248, "45215": 2.07328, "45220": 2.03453, "45225": 2.08357, "45230": 2.09567, "45235": 2.09337, "45240": 2.08557, "45245": 2.10135, "45250": 2.02064, "45255": 2.04831, "45260": 2.08702, "45265": 2.08209, "45270": 2.05831, "45275": 2.06304, "45280": 2.02105, "45285": 2.06832, "45290": 2.06617, "45295": 2.10947, "45300": 2.04516, "45305": 2.06581, "45310": 2.07064, "45315": 2.09769, "45320": 2.10982, "45325": 2.04765, "45330": 2.04765, "45335": 2.08614, "45340": 2.10524, "45345": 2.07211, "45350": 2.07796, "45355": 2.06814, "45360": 2.03861, "45365": 2.08546, "45370": 2.09524, "45375": 2.08518, "45380": 2.09176, "45385": 2.12135, "45390": 2.11663, "45395": 2.08915, "45400": 2.07418, "45405": 2.06121, "45410": 2.07537, "45415": 2.13103, "45420": 2.08333, "45425": 2.06878, "45430": 2.04349, "45435": 2.05125, "45440": 2.06863, "45445": 2.08328, "45450": 2.09695, "45455": 2.0604, "45460": 2.07263, "45465": 2.05153, "45470": 2.0437, "45475": 2.08882, "45480": 2.09101, "45485": 2.07265, "45490": 2.0698, "45495": 2.03928, "45500": 2.03958, "45505": 2.09096, "45510": 2.1035, "45515": 2.06769, "45520": 2.05011, "45525": 2.06022, "45530": 2.12153, "45535": 2.04952, "45540": 2.04509, "45545": 2.04316, "45550": 2.0952, "45555": 2.0524, "45560": 2.06073, "45565": 2.10593, "45570": 2.04517, "45575": 2.06923, "45580": 2.06602, "45585": 2.12093, "45590": 2.00228, "45595": 2.08219, "45600": 2.06278, "45605": 2.11004, "45610": 2.0989, "45615": 2.04103, "45620": 2.07393, "45625": 2.06744, "45630": 2.0679, "45635": 2.09602, "45640": 2.07872, "45645": 2.09982, "45650": 2.04506, "45655": 2.03779, "45660": 2.07455, "45665": 2.10169, "45670": 2.07389, "45675": 2.03746, "45680": 2.06379, "45685": 2.07689, "45690": 2.04161, "45695": 2.11605, "45700": 2.06561, "45705": 2.01052, "45710": 2.0988, "45715": 2.09821, "45720": 2.03789, "45725": 2.09936, "45730": 2.05815, "45735": 2.04061, "45740": 2.12282, "45745": 2.04869, "45750": 2.03887, "45755": 2.1002, "45760": 2.05966, "45765": 2.04696, "45770": 2.08484, "45775": 2.0799, "45780": 2.03788, "45785": 2.04782, "45790": 2.08547, "45795": 2.07498, "45800": 2.09878, "45805": 2.07951, "45810": 2.05135, "45815": 2.08133, "45820": 2.10219, "45825": 2.05538, "45830": 2.0284, "45835": 2.04929, "45840": 2.06706, "45845": 2.03757, "45850": 2.07901, "45855": 2.07585, "45860": 2.11788, "45865": 2.09072, "45870": 2.02812, "45875": 2.1292, "45880": 2.09497, "45885": 2.05583, "45890": 2.05033, "45895": 2.06544, "45900": 2.05461, "45905": 2.09753, "45910": 2.06635, "45915": 2.09974, "45920": 2.10515, "45925": 2.09242, "45930": 2.03945, "45935": 2.10277, "45940": 2.08606, "45945": 2.09952, "45950": 2.05635, "45955": 2.04839, "45960": 2.07443, "45965": 2.09457, "45970": 2.06368, "45975": 2.078, "45980": 2.11744, "45985": 2.08327, "45990": 2.07157, "45995": 2.06764, "46000": 2.0893, "46005": 2.10441, "46010": 2.12918, "46015": 2.0913, "46020": 2.02622, "46025": 2.03523, "46030": 2.04736, "46035": 2.07746, "46040": 2.09308, "46045": 2.07684, "46050": 2.09838, "46055": 2.09021, "46060": 2.0462, "46065": 2.06147, "46070": 2.05797, "46075": 2.08273, "46080": 2.07444, "46085": 2.08477, "46090": 2.03478, "46095": 2.06191, "46100": 2.04434, "46105": 2.08883, "46110": 2.06558, "46115": 2.04422, "46120": 2.03528, "46125": 2.08366, "46130": 2.05785, "46135": 2.05206, "46140": 2.08219, "46145": 2.05982, "46150": 2.06975, "46155": 2.06684, "46160": 2.07112, "46165": 2.09218, "46170": 2.10561, "46175": 2.0756, "46180": 2.05856, "46185": 2.02585, "46190": 2.08283, "46195": 2.07098, "46200": 2.06265, "46205": 2.09123, "46210": 2.02361, "46215": 2.03287, "46220": 2.06955, "46225": 2.0816, "46230": 2.02226, "46235": 2.0755, "46240": 2.0479, "46245": 2.0665, "46250": 2.06373, "46255": 2.0734, "46260": 2.08578, "46265": 2.08358, "46270": 2.06781, "46275": 2.0711, "46280": 2.09065, "46285": 2.05536, "46290": 2.06824, "46295": 2.05301, "46300": 2.07672, "46305": 2.0956, "46310": 2.05015, "46315": 2.09761, "46320": 2.05978, "46325": 2.07762, "46330": 2.02205, "46335": 2.06025, "46340": 2.09387, "46345": 2.08637, "46350": 2.07414, "46355": 2.05309, "46360": 2.07578, "46365": 2.1064, "46370": 2.05233, "46375": 2.06805, "46380": 2.07698, "46385": 2.0447, "46390": 2.06456, "46395": 2.08174, "46400": 2.04124, "46405": 2.05701, "46410": 2.09117, "46415": 2.05146, "46420": 2.05243, "46425": 2.06301, "46430": 2.08295, "46435": 2.04112, "46440": 2.11167, "46445": 2.05632, "46450": 2.04049, "46455": 2.07511, "46460": 2.07491, "46465": 2.04728, "46470": 2.00727, "46475": 2.10111, "46480": 2.09906, "46485": 2.08477, "46490": 2.11297, "46495": 2.10834, "46500": 2.08161, "46505": 2.06363, "46510": 2.08132, "46515": 2.0763, "46520": 2.01359, "46525": 2.08167, "46530": 2.05435, "46535": 2.08342, "46540": 2.08469, "46545": 2.05172, "46550": 2.06886, "46555": 2.09007, "46560": 2.05869, "46565": 2.09486, "46570": 2.07038, "46575": 2.08319, "46580": 2.07119, "46585": 2.06388, "46590": 2.05766, "46595": 2.0681, "46600": 2.09244, "46605": 2.08314, "46610": 2.10127, "46615": 2.06665, "46620": 2.06591, "46625": 2.07539, "46630": 2.0899, "46635": 2.06532, "46640": 2.10956, "46645": 2.0961, "46650": 2.03274, "46655": 2.03462, "46660": 2.0619, "46665": 2.10125, "46670": 2.08192, "46675": 2.05492, "46680": 2.03947, "46685": 2.03432, "46690": 2.09603, "46695": 2.0627, "46700": 2.04126, "46705": 2.0635, "46710": 2.04649, "46715": 2.10191, "46720": 2.08689, "46725": 2.0807, "46730": 2.06941, "46735": 2.05196, "46740": 2.08495, "46745": 2.06803, "46750": 2.0949, "46755": 2.05527, "46760": 2.07369, "46765": 2.06887, "46770": 2.05339, "46775": 2.09148, "46780": 2.1153, "46785": 2.07174, "46790": 2.09186, "46795": 2.08717, "46800": 2.07184, "46805": 2.06742, "46810": 2.07868, "46815": 2.03362, "46820": 2.06056, "46825": 2.077, "46830": 2.10097, "46835": 2.06921, "46840": 2.06869, "46845": 2.05137, "46850": 2.11625, "46855": 2.08482, "46860": 2.05241, "46865": 2.10445, "46870": 2.07573, "46875": 2.0192, "46880": 2.12529, "46885": 2.09652, "46890": 2.0848, "46895": 2.05568, "46900": 2.01618, "46905": 2.06636, "46910": 2.04221, "46915": 2.05456, "46920": 2.04472, "46925": 2.07512, "46930": 2.08156, "46935": 2.06487, "46940": 2.09308, "46945": 2.07394, "46950": 2.04613, "46955": 2.05777, "46960": 2.0937, "46965": 2.07491, "46970": 2.02252, "46975": 2.04232, "46980": 2.10403, "46985": 2.09678, "46990": 2.07587, "46995": 2.03308, "47000": 2.07771, "47005": 2.08546, "47010": 2.06249, "47015": 2.07881, "47020": 2.06376, "47025": 2.07256, "47030": 2.04994, "47035": 2.08946, "47040": 2.05463, "47045": 2.05988, "47050": 2.04673, "47055": 2.03227, "47060": 2.05441, "47065": 2.05665, "47070": 2.06068, "47075": 2.05246, "47080": 2.04882, "47085": 2.10393, "47090": 2.03478, "47095": 2.05022, "47100": 2.12235, "47105": 2.05732, "47110": 2.06418, "47115": 2.09752, "47120": 2.05815, "47125": 2.02826, "47130": 2.06097, "47135": 2.08688, "47140": 2.07674, "47145": 2.09083, "47150": 2.05919, "47155": 2.04249, "47160": 2.08786, "47165": 2.05678, "47170": 2.07311, "47175": 2.08592, "47180": 2.06787, "47185": 2.05174, "47190": 2.07174, "47195": 2.09108, "47200": 2.08855, "47205": 2.05139, "47210": 2.08848, "47215": 2.069, "47220": 2.04314, "47225": 2.10357, "47230": 2.08488, "47235": 2.04199, "47240": 2.10501, "47245": 2.05836, "47250": 2.11203, "47255": 2.10106, "47260": 2.07673, "47265": 2.08587, "47270": 2.07277, "47275": 2.09039, "47280": 2.07404, "47285": 2.08603, "47290": 2.043, "47295": 2.05682, "47300": 2.03606, "47305": 2.07069, "47310": 2.06467, "47315": 2.03526, "47320": 2.07429, "47325": 2.05577, "47330": 2.10077, "47335": 2.09096, "47340": 2.07884, "47345": 2.09351, "47350": 2.04838, "47355": 2.06718, "47360": 2.07536, "47365": 2.08095, "47370": 2.09291, "47375": 2.05914, "47380": 2.03223, "47385": 2.05746, "47390": 2.10753, "47395": 2.05483, "47400": 2.08474, "47405": 2.06081, "47410": 2.08183, "47415": 2.06745, "47420": 2.02892, "47425": 2.03348, "47430": 2.09027, "47435": 2.06674, "47440": 2.07511, "47445": 2.10715, "47450": 2.08005, "47455": 2.04112, "47460": 2.09102, "47465": 2.05849, "47470": 2.03122, "47475": 2.091, "47480": 2.06222, "47485": 2.0406, "47490": 2.06111, "47495": 2.09507, "47500": 2.04634, "47505": 2.07249, "47510": 2.05869, "47515": 2.10926, "47520": 2.1057, "47525": 2.02208, "47530": 2.07994, "47535": 2.04232, "47540": 2.09085, "47545": 2.06418, "47550": 2.08861, "47555": 2.08493, "47560": 2.07053, "47565": 2.0655, "47570": 2.02637, "47575": 2.07972, "47580": 2.06474, "47585": 2.07018, "47590": 2.05824, "47595": 2.08747, "47600": 2.10115, "47605": 2.09142, "47610": 2.00843, "47615": 2.0856, "47620": 2.0356, "47625": 2.12488, "47630": 2.06474, "47635": 2.06478, "47640": 2.07701, "47645": 2.04547, "47650": 2.02193, "47655": 2.06858, "47660": 2.07472, "47665": 2.07856, "47670": 2.02916, "47675": 2.07338, "47680": 2.02911, "47685": 1.98486, "47690": 2.06546, "47695": 2.09014, "47700": 2.06099, "47705": 2.10531, "47710": 2.07109, "47715": 2.07199, "47720": 2.02884, "47725": 2.09861, "47730": 2.09142, "47735": 2.04928, "47740": 2.05188, "47745": 2.04587, "47750": 2.04818, "47755": 2.07902, "47760": 2.07516, "47765": 2.05028, "47770": 2.04279, "47775": 2.05952, "47780": 2.06792, "47785": 2.06413, "47790": 2.04086, "47795": 2.04483, "47800": 2.08714, "47805": 2.05159, "47810": 2.08597, "47815": 2.11899, "47820": 2.06034, "47825": 2.05494, "47830": 2.07722, "47835": 2.05925, "47840": 2.07078, "47845": 2.05075, "47850": 2.09284, "47855": 2.05238, "47860": 2.06642, "47865": 2.06575, "47870": 2.05397, "47875": 2.0805, "47880": 2.00936, "47885": 2.06231, "47890": 2.11555, "47895": 2.05633, "47900": 2.03841, "47905": 2.05506, "47910": 2.08637, "47915": 2.05386, "47920": 2.09121, "47925": 2.07385, "47930": 2.1001, "47935": 2.07326, "47940": 2.0639, "47945": 2.10333, "47950": 2.05673, "47955": 2.02292, "47960": 2.07571, "47965": 2.12877, "47970": 2.09049, "47975": 2.09018, "47980": 2.07661, "47985": 2.02605, "47990": 2.08167, "47995": 2.03772, "48000": 2.08945, "48005": 2.06362, "48010": 2.08342, "48015": 2.0631, "48020": 2.15002, "48025": 2.03143, "48030": 2.05025, "48035": 2.05007, "48040": 2.05598, "48045": 2.0364, "48050": 2.09427, "48055": 1.99733, "48060": 2.01958, "48065": 2.04927, "48070": 2.09097, "48075": 2.07856, "48080": 2.06623, "48085": 2.03981, "48090": 2.08292, "48095": 2.05082, "48100": 2.08487, "48105": 2.05525, "48110": 2.05875, "48115": 2.07714, "48120": 2.07054, "48125": 2.13198, "48130": 2.04882, "48135": 2.08142, "48140": 2.05677, "48145": 2.02349, "48150": 2.09833, "48155": 2.03678, "48160": 2.06706, "48165": 2.06193, "48170": 2.04604, "48175": 2.06699, "48180": 2.07045, "48185": 2.03039, "48190": 2.02762, "48195": 2.03445, "48200": 2.0602, "48205": 2.06649, "48210": 2.026, "48215": 2.08574, "48220": 2.05366, "48225": 2.04015, "48230": 2.07063, "48235": 2.11017, "48240": 2.05602, "48245": 2.06558, "48250": 2.07689, "48255": 2.05458, "48260": 2.0765, "48265": 2.07415, "48270": 2.07833, "48275": 2.08957, "48280": 2.02251, "48285": 2.09437, "48290": 2.05908, "48295": 2.06671, "48300": 2.05758, "48305": 2.08766, "48310": 2.06895, "48315": 2.07713, "48320": 2.05856, "48325": 2.08835, "48330": 2.06516, "48335": 2.10301, "48340": 2.09074, "48345": 2.10828, "48350": 2.06662, "48355": 2.08926, "48360": 2.05588, "48365": 2.07416, "48370": 2.05825, "48375": 2.05941, "48380": 2.08001, "48385": 2.07045, "48390": 2.09994, "48395": 2.01081, "48400": 2.0501, "48405": 2.07422, "48410": 2.08828, "48415": 2.0662, "48420": 2.08976, "48425": 2.05946, "48430": 2.06385, "48435": 2.02747, "48440": 2.10635, "48445": 2.05208, "48450": 2.10346, "48455": 2.03792, "48460": 2.06222, "48465": 2.08637, "48470": 2.02497, "48475": 2.06141, "48480": 2.11906, "48485": 2.05415, "48490": 2.08335, "48495": 2.10886, "48500": 2.06164, "48505": 2.08717, "48510": 2.08097, "48515": 2.09232, "48520": 2.03046, "48525": 2.0612, "48530": 2.05016, "48535": 2.07767, "48540": 2.02274, "48545": 2.10234, "48550": 2.05369, "48555": 2.09199, "48560": 2.03368, "48565": 2.04962, "48570": 2.01233, "48575": 2.0697, "48580": 2.10137, "48585": 2.01616, "48590": 2.06096, "48595": 2.02826, "48600": 2.08716, "48605": 2.11348, "48610": 2.04442, "48615": 2.05531, "48620": 2.06148, "48625": 2.04957, "48630": 2.12317, "48635": 2.08214, "48640": 2.07554, "48645": 2.06368, "48650": 2.07682, "48655": 2.0585, "48660": 2.07143, "48665": 2.05968, "48670": 2.03087, "48675": 2.04441, "48680": 2.05437, "48685": 2.06172, "48690": 2.08591, "48695": 2.08931, "48700": 2.09072, "48705": 2.05582, "48710": 2.07195, "48715": 2.10096, "48720": 2.01425, "48725": 2.06197, "48730": 2.05729, "48735": 2.04277, "48740": 2.07258, "48745": 2.01342, "48750": 2.06066, "48755": 2.06252, "48760": 2.03921, "48765": 2.09258, "48770": 2.0875, "48775": 2.03699, "48780": 2.02198, "48785": 2.0769, "48790": 2.06396, "48795": 2.0359, "48800": 2.05347, "48805": 2.05641, "48810": 2.05942, "48815": 2.05002, "48820": 2.11586, "48825": 2.03268, "48830": 2.04465, "48835": 2.03724, "48840": 2.05712, "48845": 2.08227, "48850": 2.04557, "48855": 2.0536, "48860": 2.06713, "48865": 2.07039, "48870": 2.08661, "48875": 2.05799, "48880": 2.03196, "48885": 2.07261, "48890": 2.03966, "48895": 2.05337, "48900": 2.08753, "48905": 2.07819, "48910": 2.05691, "48915": 2.09761, "48920": 2.09666, "48925": 2.07062, "48930": 2.09229, "48935": 2.0737, "48940": 2.03305, "48945": 2.04629, "48950": 2.07421, "48955": 2.09763, "48960": 2.05614, "48965": 2.03481, "48970": 2.03868, "48975": 2.07302, "48980": 2.12581, "48985": 2.08382, "48990": 2.06741, "48995": 2.09422, "49000": 2.08653, "49005": 2.09656, "49010": 2.09946, "49015": 2.04984, "49020": 2.06924, "49025": 2.06952, "49030": 2.09952, "49035": 2.05294, "49040": 2.10157, "49045": 2.07699, "49050": 2.07233, "49055": 2.07026, "49060": 2.03284, "49065": 2.06433, "49070": 2.0507, "49075": 2.05488, "49080": 2.09151, "49085": 2.1143, "49090": 2.05257, "49095": 2.02922, "49100": 2.06292, "49105": 2.08641, "49110": 2.09429, "49115": 2.13254, "49120": 2.10293, "49125": 2.04094, "49130": 2.09209, "49135": 2.08364, "49140": 2.07404, "49145": 2.10275, "49150": 2.06187, "49155": 2.04146, "49160": 2.01811, "49165": 2.08061, "49170": 2.06394, "49175": 2.04287, "49180": 2.0189, "49185": 2.04668, "49190": 2.04843, "49195": 2.05751, "49200": 2.02922, "49205": 2.05543, "49210": 2.10471, "49215": 2.02007, "49220": 2.04251, "49225": 2.06355, "49230": 2.03734, "49235": 2.07038, "49240": 2.0824, "49245": 2.04952, "49250": 2.0171, "49255": 2.01838, "49260": 2.0535, "49265": 2.069, "49270": 2.05022, "49275": 2.07639, "49280": 2.06144, "49285": 2.04904, "49290": 2.06548, "49295": 2.07878, "49300": 2.03272, "49305": 2.07031, "49310": 2.05275, "49315": 2.0424, "49320": 2.08841, "49325": 2.08581, "49330": 2.0727, "49335": 2.02358, "49340": 2.03659, "49345": 2.06985, "49350": 2.0464, "49355": 2.08023, "49360": 2.05357, "49365": 2.05336, "49370": 2.04512, "49375": 2.11431, "49380": 2.08276, "49385": 2.08608, "49390": 2.07401, "49395": 2.06677, "49400": 2.08031, "49405": 2.05351, "49410": 2.09059, "49415": 2.08519, "49420": 2.04926, "49425": 2.05185, "49430": 2.09218, "49435": 2.11991, "49440": 2.11536, "49445": 2.05871, "49450": 2.02537, "49455": 2.05776, "49460": 2.08436, "49465": 1.99489, "49470": 2.10459, "49475": 2.06827, "49480": 2.07686, "49485": 2.04896, "49490": 2.04637, "49495": 2.06906, "49500": 2.03274, "49505": 2.08807, "49510": 2.0484, "49515": 2.07182, "49520": 2.1027, "49525": 2.06968, "49530": 2.13431, "49535": 2.07141, "49540": 2.0412, "49545": 2.03306, "49550": 2.09392, "49555": 2.04171, "49560": 2.09338, "49565": 2.10983, "49570": 2.06432, "49575": 2.10061, "49580": 2.11544, "49585": 2.06107, "49590": 2.07152, "49595": 2.07681, "49600": 2.09056, "49605": 2.07457, "49610": 2.08521, "49615": 2.05486, "49620": 2.09187, "49625": 2.0863, "49630": 2.09315, "49635": 2.06319, "49640": 2.06975, "49645": 2.01421, "49650": 2.0718, "49655": 2.11785, "49660": 2.1081, "49665": 2.0645, "49670": 2.07108, "49675": 2.05647, "49680": 2.03999, "49685": 2.04789, "49690": 2.0589, "49695": 2.10577, "49700": 2.01514, "49705": 2.09154, "49710": 2.08533, "49715": 2.0888, "49720": 2.05795, "49725": 2.08516, "49730": 2.08685, "49735": 2.00532, "49740": 2.06849, "49745": 2.05896, "49750": 2.07918, "49755": 2.09501, "49760": 1.98692, "49765": 2.04589, "49770": 2.03384, "49775": 2.05592, "49780": 2.05226, "49785": 2.03021, "49790": 2.02986, "49795": 2.09572, "49800": 2.05646, "49805": 2.00495, "49810": 2.07611, "49815": 2.02068, "49820": 2.04386, "49825": 2.03205, "49830": 2.05913, "49835": 2.04797, "49840": 2.06319, "49845": 2.11941, "49850": 2.11953, "49855": 2.06734, "49860": 2.04555, "49865": 2.0916, "49870": 2.0616, "49875": 2.04138, "49880": 2.06596, "49885": 2.08274, "49890": 2.08737, "49895": 2.0357, "49900": 2.07716, "49905": 2.07389, "49910": 2.0363, "49915": 2.06033, "49920": 2.05753, "49925": 2.02536, "49930": 2.10623, "49935": 2.07033, "49940": 2.04463, "49945": 2.08902, "49950": 2.04797, "49955": 2.09562, "49960": 2.05791, "49965": 2.0708, "49970": 2.09701, "49975": 2.07721, "49980": 2.12126, "49985": 2.06276, "49990": 2.08639, "49995": 2.06656, "50000": 2.05994, "50005": 2.06174, "50010": 2.09073, "50015": 2.05117, "50020": 2.02893, "50025": 2.12422, "50030": 2.02401, "50035": 2.04274, "50040": 2.08248, "50045": 2.07558, "50050": 2.09735, "50055": 2.07339, "50060": 2.05636, "50065": 2.06775, "50070": 2.03011, "50075": 2.05484, "50080": 2.0908, "50085": 2.08026, "50090": 2.0478, "50095": 2.07583, "50100": 2.03287, "50105": 2.08842, "50110": 2.05323, "50115": 2.07361, "50120": 2.06556, "50125": 2.0587, "50130": 2.01792, "50135": 2.04408, "50140": 2.1107, "50145": 2.04823, "50150": 2.07326, "50155": 2.06796, "50160": 2.03197, "50165": 2.05969, "50170": 2.06223, "50175": 2.08769, "50180": 2.06685, "50185": 2.06573, "50190": 2.02627, "50195": 2.05797, "50200": 2.04014, "50205": 2.07694, "50210": 2.0719, "50215": 2.08708, "50220": 2.06943, "50225": 2.05342, "50230": 2.08485, "50235": 2.09256, "50240": 2.07927, "50245": 2.06046, "50250": 2.05046, "50255": 2.06819, "50260": 2.02419, "50265": 2.03201, "50270": 1.98273, "50275": 2.04794, "50280": 2.06523, "50285": 2.07975, "50290": 2.06492, "50295": 2.09827, "50300": 2.05853, "50305": 2.08138, "50310": 2.04414, "50315": 2.06943, "50320": 2.07621, "50325": 2.05715, "50330": 2.07467, "50335": 2.08286, "50340": 2.07026, "50345": 2.05772, "50350": 2.0428, "50355": 2.0741, "50360": 2.06522, "50365": 2.05507, "50370": 2.05597, "50375": 2.06113, "50380": 2.07358, "50385": 2.06688, "50390": 2.05196, "50395": 2.07484, "50400": 2.04539, "50405": 2.07527, "50410": 2.05664, "50415": 2.01815, "50420": 2.06866, "50425": 2.05822, "50430": 2.08258, "50435": 2.06837, "50440": 2.01913, "50445": 2.03672, "50450": 2.09854, "50455": 2.06294, "50460": 2.06654, "50465": 2.04816, "50470": 2.07233, "50475": 2.0488, "50480": 2.10157, "50485": 2.10088, "50490": 2.06926, "50495": 2.09619, "50500": 2.06886, "50505": 2.09452, "50510": 2.02854, "50515": 2.06648, "50520": 2.06105, "50525": 2.09047, "50530": 2.08393, "50535": 2.06308, "50540": 2.03741, "50545": 2.02859, "50550": 2.07781, "50555": 2.05733, "50560": 2.04363, "50565": 2.09061, "50570": 2.01766, "50575": 2.03831, "50580": 2.03193, "50585": 2.07508, "50590": 2.06091, "50595": 2.08871, "50600": 2.06102, "50605": 2.05398, "50610": 2.08596, "50615": 2.09795, "50620": 2.09429, "50625": 2.05317, "50630": 2.03835, "50635": 2.10037, "50640": 2.08742, "50645": 2.09893, "50650": 2.05161, "50655": 2.0521, "50660": 2.03348, "50665": 2.02949, "50670": 2.04889, "50675": 2.08406, "50680": 2.04721, "50685": 2.03373, "50690": 2.04526, "50695": 2.04877, "50700": 2.10562, "50705": 2.05511, "50710": 2.12045, "50715": 2.01083, "50720": 2.04333, "50725": 2.08719, "50730": 2.07468, "50735": 2.08797, "50740": 2.02606, "50745": 2.10467, "50750": 2.07706, "50755": 2.09646, "50760": 2.11095, "50765": 2.06401, "50770": 2.06875, "50775": 2.07091, "50780": 2.06883, "50785": 2.04037, "50790": 2.06015, "50795": 2.0678, "50800": 2.09007, "50805": 2.06263, "50810": 2.06275, "50815": 2.06213, "50820": 2.03512, "50825": 2.06464, "50830": 2.02717, "50835": 2.04195, "50840": 2.08448, "50845": 2.03896, "50850": 2.07083, "50855": 2.06308, "50860": 2.06903, "50865": "nan", "50870": "nan", "50875": "nan", "50880": "nan", "50885": "nan", "50890": "nan", "50895": "nan", "50900": "nan", "50905": "nan", "50910": "nan", "50915": "nan", "50920": "nan", "50925": "nan", "50930": "nan", "50935": "nan", "50940": "nan", "50945": "nan", "50950": "nan", "50955": "nan", "50960": "nan", "50965": "nan", "50970": "nan", "50975": "nan", "50980": "nan", "50985": "nan", "50990": "nan", "50995": "nan", "51000": "nan", "51005": "nan", "51010": "nan", "51015": "nan", "51020": "nan", "51025": "nan", "51030": "nan", "51035": "nan", "51040": "nan", "51045": "nan", "51050": "nan", "51055": "nan", "51060": "nan", "51065": "nan", "51070": "nan", "51075": "nan", "51080": "nan", "51085": "nan", "51090": "nan", "51095": "nan", "51100": "nan", "51105": "nan", "51110": "nan", "51115": "nan", "51120": "nan", "51125": "nan", "51130": "nan", "51135": "nan", "51140": "nan", "51145": "nan", "51150": "nan", "51155": "nan", "51160": "nan", "51165": "nan", "51170": "nan", "51175": "nan", "51180": "nan", "51185": "nan", "51190": "nan", "51195": "nan", "51200": "nan", "51205": "nan", "51210": "nan", "51215": "nan", "51220": "nan", "51225": "nan", "51230": "nan", "51235": "nan", "51240": "nan", "51245": "nan", "51250": "nan", "51255": "nan", "51260": "nan", "51265": "nan", "51270": "nan", "51275": "nan", "51280": "nan", "51285": "nan", "51290": "nan", "51295": "nan", "51300": "nan", "51305": "nan", "51310": "nan", "51315": "nan", "51320": "nan", "51325": "nan", "51330": "nan", "51335": "nan", "51340": "nan", "51345": "nan", "51350": "nan", "51355": "nan", "51360": "nan", "51365": "nan", "51370": "nan", "51375": "nan", "51380": "nan", "51385": "nan", "51390": "nan", "51395": "nan", "51400": "nan", "51405": "nan", "51410": "nan", "51415": "nan", "51420": "nan", "51425": "nan", "51430": "nan", "51435": "nan", "51440": "nan", "51445": "nan", "51450": "nan", "51455": "nan", "51460": "nan", "51465": "nan", "51470": "nan", "51475": "nan", "51480": "nan", "51485": "nan", "51490": "nan", "51495": "nan", "51500": "nan", "51505": "nan", "51510": "nan", "51515": "nan", "51520": "nan", "51525": "nan", "51530": "nan", "51535": "nan", "51540": "nan", "51545": "nan", "51550": "nan", "51555": "nan", "51560": "nan", "51565": "nan", "51570": "nan", "51575": "nan", "51580": "nan", "51585": "nan", "51590": "nan", "51595": "nan", "51600": "nan", "51605": "nan", "51610": "nan", "51615": "nan", "51620": "nan", "51625": "nan", "51630": "nan", "51635": "nan", "51640": "nan", "51645": "nan", "51650": "nan", "51655": "nan", "51660": "nan", "51665": "nan", "51670": "nan", "51675": "nan", "51680": "nan", "51685": "nan", "51690": "nan", "51695": "nan", "51700": "nan", "51705": "nan", "51710": "nan", "51715": "nan", "51720": "nan", "51725": "nan", "51730": "nan", "51735": "nan", "51740": "nan", "51745": "nan", "51750": "nan", "51755": "nan", "51760": "nan", "51765": "nan", "51770": "nan", "51775": "nan", "51780": "nan", "51785": "nan", "51790": "nan", "51795": "nan", "51800": "nan", "51805": "nan", "51810": "nan", "51815": "nan", "51820": "nan", "51825": "nan", "51830": "nan", "51835": "nan", "51840": "nan", "51845": "nan", "51850": "nan", "51855": "nan", "51860": "nan", "51865": "nan", "51870": "nan", "51875": "nan", "51880": "nan", "51885": "nan", "51890": "nan", "51895": "nan", "51900": "nan", "51905": "nan", "51910": "nan", "51915": "nan", "51920": "nan", "51925": "nan", "51930": "nan", "51935": "nan", "51940": "nan", "51945": "nan", "51950": "nan", "51955": "nan", "51960": "nan", "51965": "nan", "51970": "nan", "51975": "nan", "51980": "nan", "51985": "nan", "51990": "nan", "51995": "nan", "52000": "nan", "52005": "nan", "52010": "nan", "52015": "nan", "52020": "nan", "52025": "nan", "52030": "nan", "52035": "nan", "52040": "nan", "52045": "nan", "52050": "nan", "52055": "nan", "52060": "nan", "52065": "nan", "52070": "nan", "52075": "nan", "52080": "nan", "52085": "nan", "52090": "nan", "52095": "nan", "52100": "nan", "52105": "nan", "52110": "nan", "52115": "nan", "52120": "nan", "52125": "nan", "52130": "nan", "52135": "nan", "52140": "nan", "52145": "nan", "52150": "nan", "52155": "nan", "52160": "nan", "52165": "nan", "52170": "nan", "52175": "nan", "52180": "nan", "52185": "nan", "52190": "nan", "52195": "nan", "52200": "nan", "52205": "nan", "52210": "nan", "52215": "nan", "52220": "nan", "52225": "nan", "52230": "nan", "52235": "nan", "52240": "nan", "52245": "nan", "52250": "nan", "52255": "nan", "52260": "nan", "52265": "nan", "52270": "nan", "52275": "nan", "52280": "nan", "52285": "nan", "52290": "nan", "52295": "nan", "52300": "nan", "52305": "nan", "52310": "nan", "52315": "nan", "52320": "nan", "52325": "nan", "52330": "nan", "52335": "nan", "52340": "nan", "52345": "nan", "52350": "nan", "52355": "nan", "52360": "nan", "52365": "nan", "52370": "nan", "52375": "nan", "52380": "nan", "52385": "nan", "52390": "nan", "52395": "nan", "52400": "nan", "52405": "nan", "52410": "nan", "52415": "nan", "52420": "nan", "52425": "nan", "52430": "nan", "52435": "nan", "52440": "nan", "52445": "nan", "52450": "nan", "52455": "nan", "52460": "nan", "52465": "nan", "52470": "nan", "52475": "nan", "52480": "nan", "52485": "nan", "52490": "nan", "52495": "nan", "52500": "nan", "52505": "nan", "52510": "nan", "52515": "nan", "52520": "nan", "52525": "nan", "52530": "nan", "52535": "nan", "52540": "nan", "52545": "nan", "52550": "nan", "52555": "nan", "52560": "nan", "52565": "nan", "52570": "nan", "52575": "nan", "52580": "nan", "52585": "nan", "52590": "nan", "52595": "nan", "52600": "nan", "52605": "nan", "52610": "nan", "52615": "nan", "52620": "nan", "52625": "nan", "52630": "nan", "52635": "nan", "52640": "nan", "52645": "nan", "52650": "nan", "52655": "nan", "52660": "nan", "52665": "nan", "52670": "nan", "52675": "nan", "52680": "nan", "52685": "nan", "52690": "nan", "52695": "nan", "52700": "nan", "52705": "nan", "52710": "nan", "52715": "nan", "52720": "nan", "52725": "nan", "52730": "nan", "52735": "nan", "52740": "nan", "52745": "nan", "52750": "nan", "52755": "nan", "52760": "nan", "52765": "nan", "52770": "nan", "52775": "nan", "52780": "nan", "52785": "nan", "52790": "nan", "52795": "nan", "52800": "nan", "52805": "nan", "52810": "nan", "52815": "nan", "52820": "nan", "52825": "nan", "52830": "nan", "52835": "nan", "52840": "nan", "52845": "nan", "52850": "nan", "52855": "nan", "52860": "nan", "52865": "nan", "52870": "nan", "52875": "nan", "52880": "nan", "52885": "nan", "52890": "nan", "52895": "nan", "52900": "nan", "52905": "nan", "52910": "nan", "52915": "nan", "52920": "nan", "52925": "nan", "52930": "nan", "52935": "nan", "52940": "nan", "52945": "nan", "52950": "nan", "52955": "nan", "52960": "nan", "52965": "nan", "52970": "nan", "52975": "nan", "52980": "nan", "52985": "nan", "52990": "nan", "52995": "nan", "53000": "nan", "53005": "nan", "53010": "nan", "53015": "nan", "53020": "nan", "53025": "nan", "53030": "nan", "53035": "nan", "53040": "nan", "53045": "nan", "53050": "nan", "53055": "nan", "53060": "nan", "53065": "nan", "53070": "nan", "53075": "nan", "53080": "nan", "53085": "nan", "53090": "nan", "53095": "nan", "53100": "nan", "53105": "nan", "53110": "nan", "53115": "nan", "53120": "nan", "53125": "nan", "53130": "nan", "53135": "nan", "53140": "nan", "53145": "nan", "53150": "nan", "53155": "nan", "53160": "nan", "53165": "nan", "53170": "nan", "53175": "nan", "53180": "nan", "53185": "nan", "53190": "nan", "53195": "nan", "53200": "nan", "53205": "nan", "53210": "nan", "53215": "nan", "53220": "nan", "53225": "nan", "53230": "nan", "53235": "nan", "53240": "nan", "53245": "nan", "53250": "nan", "53255": "nan", "53260": "nan", "53265": "nan", "53270": "nan", "53275": "nan", "53280": "nan", "53285": "nan", "53290": "nan", "53295": "nan", "53300": "nan", "53305": "nan", "53310": "nan", "53315": "nan", "53320": "nan", "53325": "nan", "53330": "nan", "53335": "nan", "53340": "nan", "53345": "nan", "53350": "nan", "53355": "nan", "53360": "nan", "53365": "nan", "53370": "nan", "53375": "nan", "53380": "nan", "53385": "nan", "53390": "nan", "53395": "nan", "53400": "nan", "53405": "nan", "53410": "nan", "53415": "nan", "53420": "nan", "53425": "nan", "53430": "nan", "53435": "nan", "53440": "nan", "53445": "nan", "53450": "nan", "53455": "nan", "53460": "nan", "53465": "nan", "53470": "nan", "53475": "nan", "53480": "nan", "53485": "nan", "53490": "nan", "53495": "nan", "53500": "nan", "53505": "nan", "53510": "nan", "53515": "nan", "53520": "nan", "53525": "nan", "53530": "nan", "53535": "nan", "53540": "nan", "53545": "nan", "53550": "nan", "53555": "nan", "53560": "nan", "53565": "nan", "53570": "nan", "53575": "nan", "53580": "nan", "53585": "nan", "53590": "nan", "53595": "nan", "53600": "nan", "53605": "nan", "53610": "nan", "53615": "nan", "53620": "nan", "53625": "nan", "53630": "nan", "53635": "nan", "53640": "nan", "53645": "nan", "53650": "nan", "53655": "nan", "53660": "nan", "53665": "nan", "53670": "nan", "53675": "nan", "53680": "nan", "53685": "nan", "53690": "nan", "53695": "nan", "53700": "nan", "53705": "nan", "53710": "nan", "53715": "nan", "53720": "nan", "53725": "nan", "53730": "nan", "53735": "nan", "53740": "nan", "53745": "nan", "53750": "nan", "53755": "nan", "53760": "nan", "53765": "nan", "53770": "nan", "53775": "nan", "53780": "nan", "53785": "nan", "53790": "nan", "53795": "nan", "53800": "nan", "53805": "nan", "53810": "nan", "53815": "nan", "53820": "nan", "53825": "nan", "53830": "nan", "53835": "nan", "53840": "nan", "53845": "nan", "53850": "nan", "53855": "nan", "53860": "nan", "53865": "nan", "53870": "nan", "53875": "nan", "53880": "nan", "53885": "nan", "53890": "nan", "53895": "nan", "53900": "nan", "53905": "nan", "53910": "nan", "53915": "nan", "53920": "nan", "53925": "nan", "53930": "nan", "53935": "nan", "53940": "nan", "53945": "nan", "53950": "nan", "53955": "nan", "53960": "nan", "53965": "nan", "53970": "nan", "53975": "nan", "53980": "nan", "53985": "nan", "53990": "nan", "53995": "nan", "54000": "nan", "54005": "nan", "54010": "nan", "54015": "nan", "54020": "nan", "54025": "nan", "54030": "nan", "54035": "nan", "54040": "nan", "54045": "nan", "54050": "nan", "54055": "nan", "54060": "nan", "54065": "nan", "54070": "nan", "54075": "nan", "54080": "nan", "54085": "nan", "54090": "nan", "54095": "nan", "54100": "nan", "54105": "nan", "54110": "nan", "54115": "nan", "54120": "nan", "54125": "nan", "54130": "nan", "54135": "nan", "54140": "nan", "54145": "nan", "54150": "nan", "54155": "nan", "54160": "nan", "54165": "nan", "54170": "nan", "54175": "nan", "54180": "nan", "54185": "nan", "54190": "nan", "54195": "nan", "54200": "nan", "54205": "nan", "54210": "nan", "54215": "nan", "54220": "nan", "54225": "nan", "54230": "nan", "54235": "nan", "54240": "nan", "54245": "nan", "54250": "nan", "54255": "nan", "54260": "nan", "54265": "nan", "54270": "nan", "54275": "nan", "54280": "nan", "54285": "nan", "54290": "nan", "54295": "nan", "54300": "nan", "54305": "nan", "54310": "nan", "54315": "nan", "54320": "nan", "54325": "nan", "54330": "nan", "54335": "nan", "54340": "nan", "54345": "nan", "54350": "nan", "54355": "nan", "54360": "nan", "54365": "nan", "54370": "nan", "54375": "nan", "54380": "nan", "54385": "nan", "54390": "nan", "54395": "nan", "54400": "nan", "54405": "nan", "54410": "nan", "54415": "nan", "54420": "nan", "54425": "nan", "54430": "nan", "54435": "nan", "54440": "nan", "54445": "nan", "54450": "nan", "54455": "nan", "54460": "nan", "54465": "nan", "54470": "nan", "54475": "nan", "54480": "nan", "54485": "nan", "54490": "nan", "54495": "nan", "54500": "nan", "54505": "nan", "54510": "nan", "54515": "nan", "54520": "nan", "54525": "nan", "54530": "nan", "54535": "nan", "54540": "nan", "54545": "nan", "54550": "nan", "54555": "nan", "54560": "nan", "54565": "nan", "54570": "nan", "54575": "nan", "54580": "nan", "54585": "nan", "54590": "nan", "54595": "nan", "54600": "nan", "54605": "nan", "54610": "nan", "54615": "nan", "54620": "nan", "54625": "nan", "54630": "nan", "54635": "nan", "54640": "nan", "54645": "nan", "54650": "nan", "54655": "nan", "54660": "nan", "54665": "nan", "54670": "nan", "54675": "nan", "54680": "nan", "54685": "nan", "54690": "nan", "54695": "nan", "54700": "nan", "54705": "nan", "54710": "nan", "54715": "nan", "54720": "nan", "54725": "nan", "54730": "nan", "54735": "nan", "54740": "nan", "54745": "nan", "54750": "nan", "54755": "nan", "54760": "nan", "54765": "nan", "54770": "nan", "54775": "nan", "54780": "nan", "54785": "nan", "54790": "nan", "54795": "nan", "54800": "nan", "54805": "nan", "54810": "nan", "54815": "nan", "54820": "nan", "54825": "nan", "54830": "nan", "54835": "nan", "54840": "nan", "54845": "nan", "54850": "nan", "54855": "nan", "54860": "nan", "54865": "nan", "54870": "nan", "54875": "nan", "54880": "nan", "54885": "nan", "54890": "nan", "54895": "nan", "54900": "nan", "54905": "nan", "54910": "nan", "54915": "nan", "54920": "nan", "54925": "nan", "54930": "nan", "54935": "nan", "54940": "nan", "54945": "nan", "54950": "nan", "54955": "nan", "54960": "nan", "54965": "nan", "54970": "nan", "54975": "nan", "54980": "nan", "54985": "nan", "54990": "nan", "54995": "nan", "55000": "nan"}}, "num-zeros": {"start_step": 1, "end_step": 55000, "step_interval": 5, "values": {"1": 956236928.0, "5": 967338240.0, "10": 971387328.0, "15": 946439808.0, "20": 961331840.0, "25": 1083777408.0, "30": 1211181952.0, "35": 1297902976.0, "40": 1271625984.0, "45": 1175158400.0, "50": 1126817536.0, "55": 1083226112.0, "60": 1045323520.0, "65": 1031515136.0, "70": 1003266944.0, "75": 999293504.0, "80": 1016238784.0, "85": 1006028160.0, "90": 986973824.0, "95": 960754816.0, "100": 974773952.0, "105": 983495424.0, "110": 978458112.0, "115": 977383232.0, "120": 960790976.0, "125": 941421056.0, "130": 974815616.0, "135": 964788672.0, "140": 962624192.0, "145": 976238912.0, "150": 921297152.0, "155": 968158144.0, "160": 956309888.0, "165": 959817792.0, "170": 974334528.0, "175": 949058368.0, "180": 946695616.0, "185": 972038656.0, "190": 969043840.0, "195": 985143296.0, "200": 945758080.0, "205": 958348928.0, "210": 979433984.0, "215": 967488000.0, "220": 956428992.0, "225": 962401088.0, "230": 948172928.0, "235": 965223808.0, "240": 966069888.0, "245": 969162496.0, "250": 974435200.0, "255": 925060928.0, "260": 965639872.0, "265": 970676288.0, "270": 959135744.0, "275": 954000128.0, "280": 963434432.0, "285": 945787840.0, "290": 974123392.0, "295": 966701824.0, "300": 967149376.0, "305": 964507392.0, "310": 940355328.0, "315": 967395904.0, "320": 969004928.0, "325": 980559616.0, "330": 972097408.0, "335": 946864960.0, "340": 966599424.0, "345": 973019264.0, "350": 973912704.0, "355": 963256576.0, "360": 948352960.0, "365": 964820928.0, "370": 962946944.0, "375": 958451584.0, "380": 947150080.0, "385": 955995072.0, "390": 945398976.0, "395": 970419328.0, "400": 979777088.0, "405": 968349824.0, "410": 970067584.0, "415": 953156608.0, "420": 943564864.0, "425": 954775296.0, "430": 962664704.0, "435": 977082496.0, "440": 954810112.0, "445": 971894912.0, "450": 963501120.0, "455": 973129856.0, "460": 983711552.0, "465": 945280512.0, "470": 942044288.0, "475": 967008000.0, "480": 966105472.0, "485": 976410496.0, "490": 962541696.0, "495": 945462144.0, "500": 964455040.0, "505": 986011712.0, "510": 965676608.0, "515": 943412608.0, "520": 945022592.0, "525": 971260416.0, "530": 971888384.0, "535": 979142144.0, "540": 969531968.0, "545": 954126976.0, "550": 951262080.0, "555": 987222784.0, "560": 960428288.0, "565": 966616960.0, "570": 975727488.0, "575": 927227392.0, "580": 970696576.0, "585": 961176256.0, "590": 972965504.0, "595": 963679232.0, "600": 937075328.0, "605": 951475712.0, "610": 963362944.0, "615": 970010496.0, "620": 976474240.0, "625": 949580416.0, "630": 954440832.0, "635": 986041984.0, "640": 980978304.0, "645": 955008512.0, "650": 958550656.0, "655": 951655552.0, "660": 961041792.0, "665": 967549440.0, "670": 962517632.0, "675": 968324288.0, "680": 965619584.0, "685": 962867712.0, "690": 961923328.0, "695": 954771328.0, "700": 970337600.0, "705": 945516288.0, "710": 943885568.0, "715": 973357632.0, "720": 968368448.0, "725": 978493568.0, "730": 952192256.0, "735": 948808192.0, "740": 955632704.0, "745": 975870272.0, "750": 981239104.0, "755": 962158208.0, "760": 951961856.0, "765": 967348480.0, "770": 976147200.0, "775": 970548800.0, "780": 977538176.0, "785": 931526208.0, "790": 960440128.0, "795": 964581120.0, "800": 967017280.0, "805": 962319360.0, "810": 940973056.0, "815": 949037056.0, "820": 953185408.0, "825": 954502400.0, "830": 976442304.0, "835": 956072320.0, "840": 948405120.0, "845": 965156736.0, "850": 966028672.0, "855": 960902144.0, "860": 976026880.0, "865": 938155008.0, "870": 966413312.0, "875": 972309632.0, "880": 963121024.0, "885": 967744896.0, "890": 949968576.0, "895": 960018816.0, "900": 974229888.0, "905": 963966720.0, "910": 958434944.0, "915": 956353792.0, "920": 943974400.0, "925": 960832320.0, "930": 978849600.0, "935": 971070464.0, "940": 960905856.0, "945": 945065344.0, "950": 957425024.0, "955": 979037312.0, "960": 983587968.0, "965": 966164224.0, "970": 951228992.0, "975": 961577408.0, "980": 968068800.0, "985": 968991744.0, "990": 984389632.0, "995": 953289152.0, "1000": 934782400.0, "1005": 960145856.0, "1010": 971536256.0, "1015": 985186752.0, "1020": 962778624.0, "1025": 935008768.0, "1030": 974680768.0, "1035": 964991744.0, "1040": 980465408.0, "1045": 960827200.0, "1050": 955202688.0, "1055": 957780416.0, "1060": 967748864.0, "1065": 967116864.0, "1070": 966602112.0, "1075": 950060800.0, "1080": 954507008.0, "1085": 967250816.0, "1090": 977132352.0, "1095": 961237376.0, "1100": 979613568.0, "1105": 953366144.0, "1110": 965952320.0, "1115": 966984192.0, "1120": 970352512.0, "1125": 965707648.0, "1130": 954943744.0, "1135": 965842944.0, "1140": 965176512.0, "1145": 970987776.0, "1150": 955558912.0, "1155": 930576704.0, "1160": 957775296.0, "1165": 978127552.0, "1170": 974302528.0, "1175": 973059392.0, "1180": 973081984.0, "1185": 947340416.0, "1190": 964793152.0, "1195": 953136768.0, "1200": 972842944.0, "1205": 988478912.0, "1210": 931125888.0, "1215": 968646528.0, "1220": 969161664.0, "1225": 975951232.0, "1230": 967334016.0, "1235": 943445568.0, "1240": 955855360.0, "1245": 981504768.0, "1250": 966111104.0, "1255": 973673600.0, "1260": 946497088.0, "1265": 963997952.0, "1270": 960485184.0, "1275": 973617472.0, "1280": 961114624.0, "1285": 957582144.0, "1290": 952531136.0, "1295": 971611136.0, "1300": 968864320.0, "1305": 963740160.0, "1310": 963336256.0, "1315": 943555456.0, "1320": 966309696.0, "1325": 989783936.0, "1330": 969506688.0, "1335": 972302592.0, "1340": 972269056.0, "1345": 960658816.0, "1350": 968636480.0, "1355": 955851264.0, "1360": 971820352.0, "1365": 960386560.0, "1370": 948790912.0, "1375": 973532480.0, "1380": 953470976.0, "1385": 969148928.0, "1390": 975713664.0, "1395": 931675072.0, "1400": 945856320.0, "1405": 976753664.0, "1410": 974512320.0, "1415": 967571200.0, "1420": 966747008.0, "1425": 937378688.0, "1430": 973915904.0, "1435": 978332928.0, "1440": 964179584.0, "1445": 958056960.0, "1450": 946147392.0, "1455": 983923008.0, "1460": 968652160.0, "1465": 948745088.0, "1470": 984244032.0, "1475": 943904704.0, "1480": 963975104.0, "1485": 957349696.0, "1490": 961260160.0, "1495": 980539136.0, "1500": 958333184.0, "1505": 942865152.0, "1510": 984180864.0, "1515": 959090176.0, "1520": 959104128.0, "1525": 952786176.0, "1530": 957740928.0, "1535": 949430400.0, "1540": 971085952.0, "1545": 963133248.0, "1550": 978666560.0, "1555": 952319744.0, "1560": 980087168.0, "1565": 967316480.0, "1570": 973842432.0, "1575": 975492992.0, "1580": 941862528.0, "1585": 970029952.0, "1590": 983822848.0, "1595": 948633344.0, "1600": 967445376.0, "1605": 952448064.0, "1610": 969618624.0, "1615": 983146624.0, "1620": 968021184.0, "1625": 970714752.0, "1630": 962888000.0, "1635": 942311552.0, "1640": 981612032.0, "1645": 973977472.0, "1650": 974186240.0, "1655": 967264000.0, "1660": 940688832.0, "1665": 961700992.0, "1670": 962902016.0, "1675": 971280384.0, "1680": 980879488.0, "1685": 944417152.0, "1690": 964689024.0, "1695": 965639808.0, "1700": 966342016.0, "1705": 985201408.0, "1710": 978354048.0, "1715": 943213504.0, "1720": 977089536.0, "1725": 965873152.0, "1730": 968970944.0, "1735": 965086464.0, "1740": 949714048.0, "1745": 970011008.0, "1750": 959681024.0, "1755": 960086336.0, "1760": 966379904.0, "1765": 951817600.0, "1770": 954665600.0, "1775": 973750912.0, "1780": 970534912.0, "1785": 968826176.0, "1790": 950236416.0, "1795": 945131392.0, "1800": 984665088.0, "1805": 987162368.0, "1810": 977769472.0, "1815": 948005120.0, "1820": 949208320.0, "1825": 978854272.0, "1830": 966362624.0, "1835": 964132992.0, "1840": 972319744.0, "1845": 935413888.0, "1850": 952499584.0, "1855": 980051904.0, "1860": 975867904.0, "1865": 958966336.0, "1870": 958948672.0, "1875": 932594176.0, "1880": 973575040.0, "1885": 978846976.0, "1890": 971360256.0, "1895": 959213184.0, "1900": 947393792.0, "1905": 981826944.0, "1910": 969126272.0, "1915": 970039808.0, "1920": 975598592.0, "1925": 960497024.0, "1930": 977924224.0, "1935": 963252096.0, "1940": 952460544.0, "1945": 981338368.0, "1950": 939172160.0, "1955": 960604416.0, "1960": 970030592.0, "1965": 981177280.0, "1970": 962044288.0, "1975": 952824448.0, "1980": 936849792.0, "1985": 975940480.0, "1990": 965967232.0, "1995": 962611456.0, "2000": 960555008.0, "2005": 954497408.0, "2010": 975581184.0, "2015": 991803008.0, "2020": 975434624.0, "2025": 974303232.0, "2030": 952084608.0, "2035": 967850112.0, "2040": 987461632.0, "2045": 976480768.0, "2050": 984700544.0, "2055": 942839680.0, "2060": 942594816.0, "2065": 966210816.0, "2070": 969622656.0, "2075": 980553216.0, "2080": 977598720.0, "2085": 939641344.0, "2090": 969873920.0, "2095": 961274624.0, "2100": 976718720.0, "2105": 972538880.0, "2110": 959902400.0, "2115": 956879488.0, "2120": 977481088.0, "2125": 962567296.0, "2130": 979619904.0, "2135": 950537408.0, "2140": 946996800.0, "2145": 962276224.0, "2150": 973404416.0, "2155": 972690944.0, "2160": 970313216.0, "2165": 948644224.0, "2170": 961545600.0, "2175": 969376896.0, "2180": 969329856.0, "2185": 947448640.0, "2190": 940481984.0, "2195": 986088384.0, "2200": 961861952.0, "2205": 978924800.0, "2210": 964103360.0, "2215": 963502592.0, "2220": 951311360.0, "2225": 969314304.0, "2230": 976329216.0, "2235": 974022912.0, "2240": 975493760.0, "2245": 960231680.0, "2250": 967641344.0, "2255": 969131776.0, "2260": 975064576.0, "2265": 968259456.0, "2270": 951745536.0, "2275": 962768640.0, "2280": 969640576.0, "2285": 971693184.0, "2290": 962890816.0, "2295": 931409664.0, "2300": 959905920.0, "2305": 970427008.0, "2310": 967446208.0, "2315": 970905280.0, "2320": 975591936.0, "2325": 938587840.0, "2330": 988438528.0, "2335": 977491648.0, "2340": 964595840.0, "2345": 964166080.0, "2350": 947554816.0, "2355": 977028096.0, "2360": 966898560.0, "2365": 977293696.0, "2370": 965072000.0, "2375": 953965632.0, "2380": 962918208.0, "2385": 967195136.0, "2390": 963075968.0, "2395": 974465664.0, "2400": 958411264.0, "2405": 968120384.0, "2410": 951587456.0, "2415": 965904384.0, "2420": 966518784.0, "2425": 959044352.0, "2430": 956685760.0, "2435": 961388160.0, "2440": 959755904.0, "2445": 970891008.0, "2450": 961996544.0, "2455": 922720896.0, "2460": 951953472.0, "2465": 955726848.0, "2470": 972570368.0, "2475": 973809920.0, "2480": 943895936.0, "2485": 944185600.0, "2490": 972409984.0, "2495": 974451456.0, "2500": 973908224.0, "2505": 958488320.0, "2510": 939509120.0, "2515": 979553408.0, "2520": 970473216.0, "2525": 964385920.0, "2530": 955797696.0, "2535": 936597504.0, "2540": 969027328.0, "2545": 970384512.0, "2550": 969460352.0, "2555": 969438464.0, "2560": 964978176.0, "2565": 959763776.0, "2570": 985176576.0, "2575": 957425728.0, "2580": 967425152.0, "2585": 966024320.0, "2590": 956354240.0, "2595": 981829952.0, "2600": 959531648.0, "2605": 963001216.0, "2610": 965972096.0, "2615": 951925504.0, "2620": 971242752.0, "2625": 976457600.0, "2630": 974410560.0, "2635": 948070592.0, "2640": 948136320.0, "2645": 963037760.0, "2650": 953982848.0, "2655": 977112192.0, "2660": 949622720.0, "2665": 953929088.0, "2670": 959064384.0, "2675": 979277760.0, "2680": 961396608.0, "2685": 970700160.0, "2690": 965221824.0, "2695": 943555328.0, "2700": 969423616.0, "2705": 978961536.0, "2710": 971811008.0, "2715": 990814848.0, "2720": 942648832.0, "2725": 967955840.0, "2730": 955467008.0, "2735": 970674560.0, "2740": 977922688.0, "2745": 932281472.0, "2750": 947858176.0, "2755": 956318208.0, "2760": 981695872.0, "2765": 966113024.0, "2770": 948915776.0, "2775": 935830784.0, "2780": 964778176.0, "2785": 969569664.0, "2790": 974273984.0, "2795": 966886784.0, "2800": 944390528.0, "2805": 964354368.0, "2810": 969610944.0, "2815": 975843968.0, "2820": 963083392.0, "2825": 937628160.0, "2830": 956740224.0, "2835": 986321920.0, "2840": 961758720.0, "2845": 967507072.0, "2850": 951716864.0, "2855": 962092800.0, "2860": 954242688.0, "2865": 955882496.0, "2870": 944664128.0, "2875": 974664384.0, "2880": 968202112.0, "2885": 981082368.0, "2890": 953455360.0, "2895": 957178304.0, "2900": 964989568.0, "2905": 931709312.0, "2910": 955731968.0, "2915": 979477312.0, "2920": 970492928.0, "2925": 964977024.0, "2930": 964049792.0, "2935": 940141376.0, "2940": 964913024.0, "2945": 989149952.0, "2950": 965209344.0, "2955": 965105280.0, "2960": 933160384.0, "2965": 968793600.0, "2970": 973035072.0, "2975": 958091840.0, "2980": 964496704.0, "2985": 937268864.0, "2990": 951253632.0, "2995": 978315648.0, "3000": 969276928.0, "3005": 974685952.0, "3010": 950235008.0, "3015": 943841536.0, "3020": 958441088.0, "3025": 975185280.0, "3030": 965018624.0, "3035": 963454336.0, "3040": 952133504.0, "3045": 989793664.0, "3050": 965545920.0, "3055": 982522240.0, "3060": 971227392.0, "3065": 943916928.0, "3070": 978408192.0, "3075": 975205760.0, "3080": 960992640.0, "3085": 962353152.0, "3090": 945953792.0, "3095": 938116928.0, "3100": 972929152.0, "3105": 961989760.0, "3110": 970657216.0, "3115": 963390912.0, "3120": 947115520.0, "3125": 972720320.0, "3130": 952972992.0, "3135": 966042048.0, "3140": 968489088.0, "3145": 937852288.0, "3150": 975010176.0, "3155": 976814720.0, "3160": 969629632.0, "3165": 982195328.0, "3170": 937961088.0, "3175": 953825408.0, "3180": 983809280.0, "3185": 965170048.0, "3190": 968482688.0, "3195": 950933568.0, "3200": 945102656.0, "3205": 959863424.0, "3210": 957487360.0, "3215": 958021440.0, "3220": 968129792.0, "3225": 935614656.0, "3230": 962590336.0, "3235": 975776128.0, "3240": 962621888.0, "3245": 981275008.0, "3250": 943260928.0, "3255": 954598016.0, "3260": 980364608.0, "3265": 963620736.0, "3270": 965163520.0, "3275": 959732608.0, "3280": 967045888.0, "3285": 982476544.0, "3290": 947690816.0, "3295": 966423424.0, "3300": 959166720.0, "3305": 949131328.0, "3310": 979510784.0, "3315": 964283008.0, "3320": 969218432.0, "3325": 956192256.0, "3330": 941165056.0, "3335": 964972736.0, "3340": 956901440.0, "3345": 972513088.0, "3350": 964588928.0, "3355": 943358592.0, "3360": 970050368.0, "3365": 969467136.0, "3370": 954775680.0, "3375": 958690432.0, "3380": 971475584.0, "3385": 947973632.0, "3390": 965793344.0, "3395": 978390720.0, "3400": 978138368.0, "3405": 976724480.0, "3410": 924217216.0, "3415": 955436544.0, "3420": 971844864.0, "3425": 977167232.0, "3430": 973851136.0, "3435": 936082496.0, "3440": 970527616.0, "3445": 957329792.0, "3450": 959852160.0, "3455": 963865728.0, "3460": 967907200.0, "3465": 931342720.0, "3470": 952348416.0, "3475": 973728576.0, "3480": 959750528.0, "3485": 979985152.0, "3490": 944695104.0, "3495": 953941696.0, "3500": 969354048.0, "3505": 964372608.0, "3510": 971234944.0, "3515": 955970816.0, "3520": 958758912.0, "3525": 971938688.0, "3530": 964149056.0, "3535": 983228800.0, "3540": 937527232.0, "3545": 944780416.0, "3550": 984511872.0, "3555": 978082048.0, "3560": 974410880.0, "3565": 968826752.0, "3570": 946732672.0, "3575": 976129920.0, "3580": 977505728.0, "3585": 954593024.0, "3590": 956438528.0, "3595": 951456320.0, "3600": 989021888.0, "3605": 962044992.0, "3610": 965078272.0, "3615": 974672000.0, "3620": 954912448.0, "3625": 939540864.0, "3630": 990186048.0, "3635": 971459200.0, "3640": 976049792.0, "3645": 961513472.0, "3650": 945840000.0, "3655": 965794816.0, "3660": 976220224.0, "3665": 964032896.0, "3670": 977469824.0, "3675": 943487872.0, "3680": 958209984.0, "3685": 964311104.0, "3690": 982128640.0, "3695": 963150464.0, "3700": 950582848.0, "3705": 947370112.0, "3710": 982403584.0, "3715": 972693760.0, "3720": 976175360.0, "3725": 964076032.0, "3730": 948875008.0, "3735": 967116288.0, "3740": 960997632.0, "3745": 969346560.0, "3750": 963972352.0, "3755": 953445760.0, "3760": 976691328.0, "3765": 979853952.0, "3770": 972408448.0, "3775": 972410240.0, "3780": 952609792.0, "3785": 960262272.0, "3790": 985610176.0, "3795": 969220480.0, "3800": 957914496.0, "3805": 972445376.0, "3810": 954542976.0, "3815": 974604544.0, "3820": 963044288.0, "3825": 962107776.0, "3830": 969444224.0, "3835": 934770176.0, "3840": 971294080.0, "3845": 986874368.0, "3850": 968897920.0, "3855": 965297024.0, "3860": 948082176.0, "3865": 975097216.0, "3870": 985136256.0, "3875": 983078784.0, "3880": 963664512.0, "3885": 953048960.0, "3890": 960307840.0, "3895": 960639488.0, "3900": 984950080.0, "3905": 976260608.0, "3910": 987360064.0, "3915": 946042496.0, "3920": 974892032.0, "3925": 961246720.0, "3930": 976801408.0, "3935": 978925696.0, "3940": 950316928.0, "3945": 960310272.0, "3950": 974222016.0, "3955": 972991488.0, "3960": 974090816.0, "3965": 950898688.0, "3970": 980705152.0, "3975": 960762624.0, "3980": 977556224.0, "3985": 962986240.0, "3990": 972767744.0, "3995": 953702080.0, "4000": 974988928.0, "4005": 971683200.0, "4010": 978432384.0, "4015": 971516928.0, "4020": 950334144.0, "4025": 968445888.0, "4030": 997972480.0, "4035": 978572736.0, "4040": 959825152.0, "4045": 939676160.0, "4050": 944743936.0, "4055": 980984448.0, "4060": 977708672.0, "4065": 975730496.0, "4070": 942177600.0, "4075": 945777984.0, "4080": 988765184.0, "4085": 962093184.0, "4090": 983368320.0, "4095": 986970560.0, "4100": 957211520.0, "4105": 954111424.0, "4110": 966511488.0, "4115": 976029568.0, "4120": 983544512.0, "4125": 960066432.0, "4130": 967277440.0, "4135": 971443136.0, "4140": 963207744.0, "4145": 956211136.0, "4150": 960331648.0, "4155": 946241600.0, "4160": 968451520.0, "4165": 970375936.0, "4170": 971983872.0, "4175": 955886208.0, "4180": 940996864.0, "4185": 968306752.0, "4190": 968015360.0, "4195": 989235968.0, "4200": 962678400.0, "4205": 960594816.0, "4210": 971851008.0, "4215": 974157568.0, "4220": 981156608.0, "4225": 975256192.0, "4230": 952688704.0, "4235": 958478784.0, "4240": 966798464.0, "4245": 961744512.0, "4250": 965856896.0, "4255": 958280192.0, "4260": 949568832.0, "4265": 964137856.0, "4270": 978409024.0, "4275": 975433856.0, "4280": 962670848.0, "4285": 951463680.0, "4290": 980041472.0, "4295": 968843136.0, "4300": 958285312.0, "4305": 966803328.0, "4310": 939625088.0, "4315": 949415936.0, "4320": 984547456.0, "4325": 982583424.0, "4330": 974770304.0, "4335": 949458624.0, "4340": 959490624.0, "4345": 956632576.0, "4350": 979935872.0, "4355": 968856960.0, "4360": 966351424.0, "4365": 941246080.0, "4370": 969507200.0, "4375": 972995328.0, "4380": 966159552.0, "4385": 972036288.0, "4390": 954165312.0, "4395": 951880576.0, "4400": 973640960.0, "4405": 972466240.0, "4410": 967907712.0, "4415": 958972672.0, "4420": 960790144.0, "4425": 976428928.0, "4430": 965967104.0, "4435": 975925632.0, "4440": 962330112.0, "4445": 954848512.0, "4450": 978344256.0, "4455": 960242176.0, "4460": 968569216.0, "4465": 968842880.0, "4470": 944126592.0, "4475": 951997952.0, "4480": 978837248.0, "4485": 968316928.0, "4490": 957012288.0, "4495": 938974272.0, "4500": 953252416.0, "4505": 977178368.0, "4510": 978738304.0, "4515": 962492288.0, "4520": 958812224.0, "4525": 958125952.0, "4530": 964391296.0, "4535": 976629376.0, "4540": 976860288.0, "4545": 970157568.0, "4550": 953211456.0, "4555": 959565632.0, "4560": 972473792.0, "4565": 973454848.0, "4570": 978908992.0, "4575": 957797440.0, "4580": 963167744.0, "4585": 957357184.0, "4590": 986555520.0, "4595": 960164864.0, "4600": 952241728.0, "4605": 959297984.0, "4610": 963723264.0, "4615": 957951168.0, "4620": 960230912.0, "4625": 973810240.0, "4630": 944483712.0, "4635": 977037248.0, "4640": 960284800.0, "4645": 981980800.0, "4650": 962374848.0, "4655": 939420544.0, "4660": 963988864.0, "4665": 962541056.0, "4670": 976623872.0, "4675": 963453824.0, "4680": 957438720.0, "4685": 949637888.0, "4690": 956936768.0, "4695": 969901696.0, "4700": 961177536.0, "4705": 970756800.0, "4710": 934432448.0, "4715": 970413824.0, "4720": 966300416.0, "4725": 980290176.0, "4730": 965825536.0, "4735": 937858176.0, "4740": 960074880.0, "4745": 975984896.0, "4750": 967959424.0, "4755": 984923136.0, "4760": 959111680.0, "4765": 955212096.0, "4770": 958584832.0, "4775": 991080640.0, "4780": 976818688.0, "4785": 967545088.0, "4790": 943707648.0, "4795": 955763328.0, "4800": 967709696.0, "4805": 976505216.0, "4810": 965123584.0, "4815": 957970176.0, "4820": 973992448.0, "4825": 961457472.0, "4830": 962576832.0, "4835": 972495104.0, "4840": 948839744.0, "4845": 965667200.0, "4850": 960267136.0, "4855": 964069376.0, "4860": 962953024.0, "4865": 967433472.0, "4870": 957145920.0, "4875": 983537280.0, "4880": 957047360.0, "4885": 977003904.0, "4890": 959703488.0, "4895": 942151680.0, "4900": 973638400.0, "4905": 975163712.0, "4910": 969207552.0, "4915": 970088448.0, "4920": 941181696.0, "4925": 954772416.0, "4930": 976996416.0, "4935": 963761216.0, "4940": 972585728.0, "4945": 960018944.0, "4950": 940782080.0, "4955": 968048256.0, "4960": 976825664.0, "4965": 960982912.0, "4970": 958604352.0, "4975": 933713216.0, "4980": 960811776.0, "4985": 962999552.0, "4990": 963527936.0, "4995": 986247040.0, "5000": 940714944.0, "5005": 968924352.0, "5010": 970258944.0, "5015": 965221632.0, "5020": 966701888.0, "5025": 949398016.0, "5030": 953514752.0, "5035": 967340288.0, "5040": 955652480.0, "5045": 969141120.0, "5050": 953404800.0, "5055": 954798720.0, "5060": 963040896.0, "5065": 952169856.0, "5070": 973607616.0, "5075": 978580288.0, "5080": 942865024.0, "5085": 965814016.0, "5090": 972844160.0, "5095": 964386112.0, "5100": 958342336.0, "5105": 965354112.0, "5110": 950414208.0, "5115": 972297088.0, "5120": 960388928.0, "5125": 969730304.0, "5130": 938733568.0, "5135": 943683904.0, "5140": 969962176.0, "5145": 968627136.0, "5150": 970637696.0, "5155": 972646912.0, "5160": 926588032.0, "5165": 961653568.0, "5170": 966862336.0, "5175": 966085632.0, "5180": 963617792.0, "5185": 930827136.0, "5190": 949853504.0, "5195": 972470912.0, "5200": 973771648.0, "5205": 968212608.0, "5210": 960515200.0, "5215": 928840320.0, "5220": 979186304.0, "5225": 984766976.0, "5230": 975047424.0, "5235": 975104256.0, "5240": 944325120.0, "5245": 970871104.0, "5250": 972475904.0, "5255": 966981376.0, "5260": 976765760.0, "5265": 942308032.0, "5270": 969336064.0, "5275": 970121984.0, "5280": 962825728.0, "5285": 964206976.0, "5290": 932550272.0, "5295": 951786880.0, "5300": 975661568.0, "5305": 951964672.0, "5310": 968040640.0, "5315": 955780032.0, "5320": 950957696.0, "5325": 973114368.0, "5330": 967843328.0, "5335": 967585024.0, "5340": 966491520.0, "5345": 963010304.0, "5350": 978888192.0, "5355": 972313600.0, "5360": 963857024.0, "5365": 965220928.0, "5370": 947898880.0, "5375": 948931136.0, "5380": 967272320.0, "5385": 980504000.0, "5390": 965336832.0, "5395": 955123392.0, "5400": 948311360.0, "5405": 974304768.0, "5410": 967856000.0, "5415": 976076032.0, "5420": 967407488.0, "5425": 937388224.0, "5430": 963882816.0, "5435": 971849280.0, "5440": 969163776.0, "5445": 957476480.0, "5450": 919419072.0, "5455": 952027008.0, "5460": 962375040.0, "5465": 978855424.0, "5470": 980968448.0, "5475": 941672192.0, "5480": 955792000.0, "5485": 964987968.0, "5490": 976035584.0, "5495": 962780864.0, "5500": 971241984.0, "5505": 956903296.0, "5510": 968524544.0, "5515": 945466560.0, "5520": 963207488.0, "5525": 975944960.0, "5530": 936671872.0, "5535": 970682240.0, "5540": 960320128.0, "5545": 972024064.0, "5550": 967908992.0, "5555": 955919232.0, "5560": 954446976.0, "5565": 968850368.0, "5570": 945198336.0, "5575": 960514176.0, "5580": 960587456.0, "5585": 959445824.0, "5590": 977692928.0, "5595": 975170944.0, "5600": 962984576.0, "5605": 964115584.0, "5610": 943194496.0, "5615": 966565504.0, "5620": 963285888.0, "5625": 982337024.0, "5630": 976099392.0, "5635": 957322560.0, "5640": 951529472.0, "5645": 967755968.0, "5650": 979070464.0, "5655": 983571456.0, "5660": 956566272.0, "5665": 953463296.0, "5670": 966110464.0, "5675": 967689088.0, "5680": 978610752.0, "5685": 962142592.0, "5690": 935932416.0, "5695": 963727296.0, "5700": 952561728.0, "5705": 974524608.0, "5710": 971206400.0, "5715": 946109952.0, "5720": 975028480.0, "5725": 967436672.0, "5730": 978450304.0, "5735": 964807360.0, "5740": 943611008.0, "5745": 971105024.0, "5750": 981888960.0, "5755": 956728064.0, "5760": 963584512.0, "5765": 957669376.0, "5770": 955648896.0, "5775": 970773312.0, "5780": 962799360.0, "5785": 970564352.0, "5790": 974579328.0, "5795": 949900544.0, "5800": 965987072.0, "5805": 968750464.0, "5810": 976024064.0, "5815": 970005696.0, "5820": 936364480.0, "5825": 969315328.0, "5830": 977724928.0, "5835": 974979136.0, "5840": 963209280.0, "5845": 968678400.0, "5850": 943021312.0, "5855": 976035840.0, "5860": 979463680.0, "5865": 978418944.0, "5870": 968886656.0, "5875": 942291328.0, "5880": 964409984.0, "5885": 974708352.0, "5890": 972781824.0, "5895": 965571456.0, "5900": 941310144.0, "5905": 961779200.0, "5910": 958470912.0, "5915": 968160768.0, "5920": 977492288.0, "5925": 959494272.0, "5930": 946798720.0, "5935": 952284544.0, "5940": 977587968.0, "5945": 984831104.0, "5950": 980578944.0, "5955": 934946816.0, "5960": 961760256.0, "5965": 965852672.0, "5970": 970612608.0, "5975": 961785600.0, "5980": 958160384.0, "5985": 964575872.0, "5990": 973541120.0, "5995": 955821440.0, "6000": 955485568.0, "6005": 961342720.0, "6010": 952682880.0, "6015": 974317184.0, "6020": 978216448.0, "6025": 972189376.0, "6030": 955288128.0, "6035": 946880512.0, "6040": 962776960.0, "6045": 983706304.0, "6050": 956355200.0, "6055": 963458560.0, "6060": 945741952.0, "6065": 958394880.0, "6070": 978267072.0, "6075": 978026112.0, "6080": 957443456.0, "6085": 947604736.0, "6090": 953622080.0, "6095": 964659328.0, "6100": 979826432.0, "6105": 971098368.0, "6110": 961828032.0, "6115": 943679552.0, "6120": 968549376.0, "6125": 960604160.0, "6130": 984126144.0, "6135": 960980992.0, "6140": 958646400.0, "6145": 971176000.0, "6150": 968357504.0, "6155": 974880896.0, "6160": 977191040.0, "6165": 952705792.0, "6170": 950969152.0, "6175": 963273920.0, "6180": 969708800.0, "6185": 966320128.0, "6190": 963596800.0, "6195": 947069440.0, "6200": 969132352.0, "6205": 967002240.0, "6210": 959254016.0, "6215": 972828416.0, "6220": 936351360.0, "6225": 978679104.0, "6230": 976121472.0, "6235": 971654784.0, "6240": 965933696.0, "6245": 955931072.0, "6250": 956485888.0, "6255": 973227264.0, "6260": 978829184.0, "6265": 974893120.0, "6270": 958694912.0, "6275": 963725312.0, "6280": 973091840.0, "6285": 965974912.0, "6290": 970880320.0, "6295": 987268608.0, "6300": 947369216.0, "6305": 964783360.0, "6310": 979004032.0, "6315": 978417792.0, "6320": 971626560.0, "6325": 923014400.0, "6330": 959219584.0, "6335": 974817664.0, "6340": 984700800.0, "6345": 966798848.0, "6350": 944611840.0, "6355": 958100992.0, "6360": 972563712.0, "6365": 972293888.0, "6370": 958991680.0, "6375": 966944384.0, "6380": 951202816.0, "6385": 973499008.0, "6390": 965318400.0, "6395": 975006912.0, "6400": 984129152.0, "6405": 943990976.0, "6410": 977583104.0, "6415": 971573632.0, "6420": 956320640.0, "6425": 961023360.0, "6430": 957267072.0, "6435": 960063232.0, "6440": 968848832.0, "6445": 973282304.0, "6450": 974022144.0, "6455": 962043520.0, "6460": 941032000.0, "6465": 974249088.0, "6470": 979850880.0, "6475": 960713600.0, "6480": 967655360.0, "6485": 948745280.0, "6490": 970785664.0, "6495": 988369472.0, "6500": 980186752.0, "6505": 971888256.0, "6510": 951427328.0, "6515": 957614080.0, "6520": 978606592.0, "6525": 978695168.0, "6530": 973222464.0, "6535": 967718784.0, "6540": 950270976.0, "6545": 966207872.0, "6550": 979370880.0, "6555": 967204864.0, "6560": 975181952.0, "6565": 949338112.0, "6570": 951969536.0, "6575": 962405568.0, "6580": 975741056.0, "6585": 979138944.0, "6590": 948979584.0, "6595": 961312128.0, "6600": 961120640.0, "6605": 961620224.0, "6610": 985126144.0, "6615": 959507072.0, "6620": 944354944.0, "6625": 970843712.0, "6630": 971195520.0, "6635": 964018432.0, "6640": 959660800.0, "6645": 950943360.0, "6650": 978750208.0, "6655": 965764672.0, "6660": 968420992.0, "6665": 968632320.0, "6670": 932726656.0, "6675": 970662080.0, "6680": 968904704.0, "6685": 958660288.0, "6690": 956115968.0, "6695": 955389440.0, "6700": 962061056.0, "6705": 978946944.0, "6710": 970803456.0, "6715": 966703040.0, "6720": 973941120.0, "6725": 941769984.0, "6730": 979177984.0, "6735": 994463808.0, "6740": 976454592.0, "6745": 974430976.0, "6750": 939088000.0, "6755": 977686272.0, "6760": 969756672.0, "6765": 978417792.0, "6770": 975243136.0, "6775": 943412224.0, "6780": 947073920.0, "6785": 975137536.0, "6790": 960532224.0, "6795": 975996736.0, "6800": 973000128.0, "6805": 946719744.0, "6810": 958291328.0, "6815": 970598144.0, "6820": 977595136.0, "6825": 969094912.0, "6830": 950410816.0, "6835": 981538048.0, "6840": 983083712.0, "6845": 948799936.0, "6850": 965379776.0, "6855": 953891904.0, "6860": 979162112.0, "6865": 983841472.0, "6870": 964854016.0, "6875": 978750144.0, "6880": 950162816.0, "6885": 958361216.0, "6890": 960264576.0, "6895": 965900160.0, "6900": 985333888.0, "6905": 968675136.0, "6910": 949826816.0, "6915": 971221248.0, "6920": 967154176.0, "6925": 965218176.0, "6930": 964827328.0, "6935": 951901184.0, "6940": 962897280.0, "6945": 986408832.0, "6950": 973498240.0, "6955": 964899648.0, "6960": 940223872.0, "6965": 974601280.0, "6970": 978443776.0, "6975": 985127168.0, "6980": 982543488.0, "6985": 959765632.0, "6990": 945299072.0, "6995": 987481984.0, "7000": 963280192.0, "7005": 962713472.0, "7010": 985046400.0, "7015": 945386112.0, "7020": 982823424.0, "7025": 968730752.0, "7030": 953341824.0, "7035": 982773504.0, "7040": 950581632.0, "7045": 955758080.0, "7050": 960010240.0, "7055": 963966080.0, "7060": 976753088.0, "7065": 968280192.0, "7070": 953400320.0, "7075": 956558976.0, "7080": 968794048.0, "7085": 965708416.0, "7090": 969042048.0, "7095": 959832896.0, "7100": 973245952.0, "7105": 973283200.0, "7110": 969972608.0, "7115": 958363392.0, "7120": 948867520.0, "7125": 963349824.0, "7130": 971607680.0, "7135": 964726656.0, "7140": 961275648.0, "7145": 930649856.0, "7150": 946305088.0, "7155": 991006976.0, "7160": 968614784.0, "7165": 956875328.0, "7170": 968422912.0, "7175": 955901632.0, "7180": 958281984.0, "7185": 984990464.0, "7190": 978589824.0, "7195": 973712576.0, "7200": 935967488.0, "7205": 957513792.0, "7210": 967226048.0, "7215": 969351296.0, "7220": 982454144.0, "7225": 928641920.0, "7230": 949769728.0, "7235": 966932800.0, "7240": 967041856.0, "7245": 967762048.0, "7250": 949565376.0, "7255": 957351296.0, "7260": 969857280.0, "7265": 975096960.0, "7270": 960057856.0, "7275": 959274880.0, "7280": 957208448.0, "7285": 977515264.0, "7290": 977360128.0, "7295": 963293952.0, "7300": 975425920.0, "7305": 964146176.0, "7310": 977369216.0, "7315": 966995008.0, "7320": 974626944.0, "7325": 967005952.0, "7330": 959789824.0, "7335": 963853184.0, "7340": 977766656.0, "7345": 967866112.0, "7350": 984941440.0, "7355": 959652224.0, "7360": 948880832.0, "7365": 972736512.0, "7370": 982668288.0, "7375": 963395968.0, "7380": 964186752.0, "7385": 948542976.0, "7390": 964009216.0, "7395": 958504448.0, "7400": 969985664.0, "7405": 987712256.0, "7410": 952128640.0, "7415": 950653056.0, "7420": 966785152.0, "7425": 983124352.0, "7430": 965774336.0, "7435": 972987648.0, "7440": 937093504.0, "7445": 969099520.0, "7450": 980130752.0, "7455": 971625216.0, "7460": 972011904.0, "7465": 939610240.0, "7470": 971876992.0, "7475": 957910656.0, "7480": 969111552.0, "7485": 961649920.0, "7490": 934195584.0, "7495": 957038976.0, "7500": 969204224.0, "7505": 970185600.0, "7510": 972381568.0, "7515": 979644544.0, "7520": 951920512.0, "7525": 970236416.0, "7530": 954386048.0, "7535": 971174016.0, "7540": 979535296.0, "7545": 959308928.0, "7550": 960342528.0, "7555": 960060288.0, "7560": 970357312.0, "7565": 955146112.0, "7570": 942534912.0, "7575": 965483392.0, "7580": 982372288.0, "7585": 978797248.0, "7590": 970353152.0, "7595": 950017536.0, "7600": 946042880.0, "7605": 982403968.0, "7610": 969081664.0, "7615": 988479104.0, "7620": 957073920.0, "7625": 941467712.0, "7630": 971465216.0, "7635": 984510720.0, "7640": 983434368.0, "7645": 968332288.0, "7650": 959172608.0, "7655": 962498048.0, "7660": 969014912.0, "7665": 978269952.0, "7670": 975339712.0, "7675": 975608704.0, "7680": 942958208.0, "7685": 959898112.0, "7690": 975341184.0, "7695": 982332032.0, "7700": 979531264.0, "7705": 941062400.0, "7710": 974640384.0, "7715": 979357376.0, "7720": 967848448.0, "7725": 960256704.0, "7730": 943374528.0, "7735": 968376704.0, "7740": 979934720.0, "7745": 964236416.0, "7750": 963842176.0, "7755": 959838016.0, "7760": 970692480.0, "7765": 970913088.0, "7770": 962584064.0, "7775": 981736832.0, "7780": 965048064.0, "7785": 959641216.0, "7790": 968295936.0, "7795": 968748672.0, "7800": 971660032.0, "7805": 968442240.0, "7810": 945554816.0, "7815": 963381248.0, "7820": 974272384.0, "7825": 963600512.0, "7830": 957470336.0, "7835": 949993088.0, "7840": 956876672.0, "7845": 954004608.0, "7850": 979796224.0, "7855": 986867648.0, "7860": 947485696.0, "7865": 949192064.0, "7870": 965064832.0, "7875": 975690112.0, "7880": 968676096.0, "7885": 969298560.0, "7890": 951869184.0, "7895": 974683520.0, "7900": 963584960.0, "7905": 963817664.0, "7910": 965721216.0, "7915": 943329024.0, "7920": 950845312.0, "7925": 969377152.0, "7930": 964895040.0, "7935": 984167168.0, "7940": 964855296.0, "7945": 950477888.0, "7950": 962049344.0, "7955": 980107072.0, "7960": 963665024.0, "7965": 952922752.0, "7970": 951568896.0, "7975": 969586688.0, "7980": 965489216.0, "7985": 959361920.0, "7990": 967723904.0, "7995": 946595840.0, "8000": 962583296.0, "8005": 980285696.0, "8010": 965777536.0, "8015": 982931264.0, "8020": 960893952.0, "8025": 965003648.0, "8030": 958139392.0, "8035": 975788928.0, "8040": 960643840.0, "8045": 948186496.0, "8050": 959476608.0, "8055": 979199232.0, "8060": 969518080.0, "8065": 958173568.0, "8070": 964066432.0, "8075": 942119424.0, "8080": 966231680.0, "8085": 967123328.0, "8090": 983810624.0, "8095": 988589312.0, "8100": 966618624.0, "8105": 944351936.0, "8110": 969080960.0, "8115": 985832064.0, "8120": 975143936.0, "8125": 964386944.0, "8130": 966519936.0, "8135": 967785472.0, "8140": 964105600.0, "8145": 995444608.0, "8150": 973583744.0, "8155": 938598656.0, "8160": 964389760.0, "8165": 973000128.0, "8170": 968399616.0, "8175": 961771968.0, "8180": 936386560.0, "8185": 962859648.0, "8190": 968107648.0, "8195": 977299648.0, "8200": 956551680.0, "8205": 960689728.0, "8210": 946557504.0, "8215": 982399168.0, "8220": 988222336.0, "8225": 966145024.0, "8230": 962627200.0, "8235": 933984704.0, "8240": 980219328.0, "8245": 976447168.0, "8250": 964168960.0, "8255": 977296640.0, "8260": 956760320.0, "8265": 982798080.0, "8270": 952725888.0, "8275": 974345280.0, "8280": 974266944.0, "8285": 953310016.0, "8290": 939739392.0, "8295": 981224192.0, "8300": 973285056.0, "8305": 978110464.0, "8310": 951379712.0, "8315": 937898112.0, "8320": 977203200.0, "8325": 967860608.0, "8330": 990289920.0, "8335": 975931840.0, "8340": 947415936.0, "8345": 970604800.0, "8350": 970242560.0, "8355": 974916352.0, "8360": 979823232.0, "8365": 932732416.0, "8370": 965341568.0, "8375": 979768512.0, "8380": 965163008.0, "8385": 972866304.0, "8390": 962884608.0, "8395": 951090176.0, "8400": 972495296.0, "8405": 951600128.0, "8410": 960753536.0, "8415": 965672320.0, "8420": 941336192.0, "8425": 967933184.0, "8430": 960999552.0, "8435": 966164288.0, "8440": 969584704.0, "8445": 952885376.0, "8450": 984278080.0, "8455": 990309504.0, "8460": 968657472.0, "8465": 967061248.0, "8470": 962886016.0, "8475": 943167040.0, "8480": 987175808.0, "8485": 979849216.0, "8490": 992145920.0, "8495": 971360768.0, "8500": 951416832.0, "8505": 982946240.0, "8510": 973928768.0, "8515": 968727424.0, "8520": 961237760.0, "8525": 944984448.0, "8530": 984022144.0, "8535": 978191360.0, "8540": 967986944.0, "8545": 968830720.0, "8550": 942016000.0, "8555": 971328000.0, "8560": 958455424.0, "8565": 975368896.0, "8570": 974863680.0, "8575": 971251328.0, "8580": 931772672.0, "8585": 965746176.0, "8590": 978872512.0, "8595": 978823808.0, "8600": 983768448.0, "8605": 957877696.0, "8610": 983824064.0, "8615": 977404160.0, "8620": 963240832.0, "8625": 979195712.0, "8630": 943359360.0, "8635": 961699968.0, "8640": 973168128.0, "8645": 970280576.0, "8650": 969437056.0, "8655": 970777024.0, "8660": 944447296.0, "8665": 986252928.0, "8670": 960562944.0, "8675": 974156544.0, "8680": 962545600.0, "8685": 955619584.0, "8690": 978649344.0, "8695": 968671360.0, "8700": 972545664.0, "8705": 973533184.0, "8710": 947103872.0, "8715": 973601152.0, "8720": 958241472.0, "8725": 978965504.0, "8730": 985525888.0, "8735": 952264832.0, "8740": 940743616.0, "8745": 987691072.0, "8750": 971851840.0, "8755": 971516864.0, "8760": 965213632.0, "8765": 934667136.0, "8770": 986116224.0, "8775": 969482688.0, "8780": 967209344.0, "8785": 961878272.0, "8790": 947539968.0, "8795": 969136896.0, "8800": 970631104.0, "8805": 973104192.0, "8810": 983295488.0, "8815": 951557056.0, "8820": 939391232.0, "8825": 964419968.0, "8830": 981041280.0, "8835": 971360768.0, "8840": 979209472.0, "8845": 950879232.0, "8850": 986147072.0, "8855": 970943104.0, "8860": 961275840.0, "8865": 956904064.0, "8870": 945825152.0, "8875": 968124672.0, "8880": 983718464.0, "8885": 971306176.0, "8890": 969710400.0, "8895": 952431232.0, "8900": 961792512.0, "8905": 976569216.0, "8910": 981655872.0, "8915": 980585088.0, "8920": 967791104.0, "8925": 939956224.0, "8930": 970308160.0, "8935": 964089664.0, "8940": 977498496.0, "8945": 981753280.0, "8950": 945894016.0, "8955": 972228160.0, "8960": 973184512.0, "8965": 973432320.0, "8970": 966054272.0, "8975": 936486272.0, "8980": 952668928.0, "8985": 977594560.0, "8990": 967118080.0, "8995": 980344704.0, "9000": 952151552.0, "9005": 950465216.0, "9010": 974875904.0, "9015": 982693952.0, "9020": 958984576.0, "9025": 978882880.0, "9030": 953347072.0, "9035": 968492416.0, "9040": 978109824.0, "9045": 968253056.0, "9050": 982816192.0, "9055": 947639424.0, "9060": 956281344.0, "9065": 969546368.0, "9070": 967695936.0, "9075": 980611392.0, "9080": 952467584.0, "9085": 971135168.0, "9090": 963409216.0, "9095": 968092672.0, "9100": 974166976.0, "9105": 959837248.0, "9110": 947530496.0, "9115": 956208000.0, "9120": 985150336.0, "9125": 962891072.0, "9130": 958432320.0, "9135": 951398848.0, "9140": 967024576.0, "9145": 977211776.0, "9150": 986827072.0, "9155": 976526208.0, "9160": 957535104.0, "9165": 950255488.0, "9170": 988185280.0, "9175": 971250304.0, "9180": 967313344.0, "9185": 955040640.0, "9190": 956463936.0, "9195": 965782336.0, "9200": 968548672.0, "9205": 967095936.0, "9210": 984474048.0, "9215": 931338752.0, "9220": 949489600.0, "9225": 970800896.0, "9230": 970693120.0, "9235": 971328960.0, "9240": 959487616.0, "9245": 963670720.0, "9250": 961232576.0, "9255": 982689408.0, "9260": 978928128.0, "9265": 952523392.0, "9270": 948933440.0, "9275": 978121472.0, "9280": 977484992.0, "9285": 961968256.0, "9290": 979059520.0, "9295": 958266944.0, "9300": 965323072.0, "9305": 968641600.0, "9310": 972522432.0, "9315": 975739584.0, "9320": 947739712.0, "9325": 979369664.0, "9330": 977595584.0, "9335": 975341248.0, "9340": 959953856.0, "9345": 942886528.0, "9350": 952369152.0, "9355": 963106176.0, "9360": 959864576.0, "9365": 983288960.0, "9370": 982299584.0, "9375": 941577280.0, "9380": 982577728.0, "9385": 984914560.0, "9390": 972902784.0, "9395": 978331072.0, "9400": 937841216.0, "9405": 968278976.0, "9410": 981422080.0, "9415": 991440256.0, "9420": 960023168.0, "9425": 956607104.0, "9430": 938412992.0, "9435": 974240832.0, "9440": 959112640.0, "9445": 973684992.0, "9450": 961275584.0, "9455": 945837440.0, "9460": 978074368.0, "9465": 988064768.0, "9470": 962891008.0, "9475": 983403904.0, "9480": 930682496.0, "9485": 987380224.0, "9490": 963581312.0, "9495": 972219648.0, "9500": 982010560.0, "9505": 969793600.0, "9510": 964137152.0, "9515": 956654464.0, "9520": 947856832.0, "9525": 964672320.0, "9530": 958002496.0, "9535": 951122368.0, "9540": 953887040.0, "9545": 978878976.0, "9550": 955607232.0, "9555": 952779264.0, "9560": 958038784.0, "9565": 969906688.0, "9570": 977382208.0, "9575": 958640832.0, "9580": 962532800.0, "9585": 946074944.0, "9590": 948038272.0, "9595": 966578240.0, "9600": 983999744.0, "9605": 984897152.0, "9610": 943271552.0, "9615": 952518272.0, "9620": 980526592.0, "9625": 977971264.0, "9630": 969622144.0, "9635": 974327744.0, "9640": 939839744.0, "9645": 961873344.0, "9650": 970304960.0, "9655": 987041792.0, "9660": 963000512.0, "9665": 949370112.0, "9670": 966014656.0, "9675": 962892160.0, "9680": 964376576.0, "9685": 986230144.0, "9690": 940105920.0, "9695": 950214912.0, "9700": 975383360.0, "9705": 972294656.0, "9710": 966858240.0, "9715": 971231232.0, "9720": 940085376.0, "9725": 965705216.0, "9730": 973584384.0, "9735": 973793344.0, "9740": 970939520.0, "9745": 950754432.0, "9750": 979162240.0, "9755": 969999296.0, "9760": 967597760.0, "9765": 963382592.0, "9770": 952393984.0, "9775": 956338688.0, "9780": 969874176.0, "9785": 957877056.0, "9790": 960476928.0, "9795": 957698432.0, "9800": 949169216.0, "9805": 961902208.0, "9810": 978356480.0, "9815": 977139200.0, "9820": 982114496.0, "9825": 938686592.0, "9830": 968840192.0, "9835": 972559616.0, "9840": 971219392.0, "9845": 966595072.0, "9850": 946261504.0, "9855": 957007040.0, "9860": 987089088.0, "9865": 969659520.0, "9870": 989806144.0, "9875": 956434944.0, "9880": 930822016.0, "9885": 962916544.0, "9890": 971889408.0, "9895": 983366016.0, "9900": 956250624.0, "9905": 938542912.0, "9910": 978160192.0, "9915": 973259712.0, "9920": 943713344.0, "9925": 962487040.0, "9930": 947295040.0, "9935": 960536896.0, "9940": 965587008.0, "9945": 958371328.0, "9950": 963564544.0, "9955": 942922624.0, "9960": 966402432.0, "9965": 983081024.0, "9970": 966496320.0, "9975": 963193152.0, "9980": 980255104.0, "9985": 942169024.0, "9990": 976220544.0, "9995": 982398912.0, "10000": 971531648.0, "10005": 969492928.0, "10010": 943791424.0, "10015": 982674240.0, "10020": 977669504.0, "10025": 979205760.0, "10030": 970900416.0, "10035": 946114688.0, "10040": 949810432.0, "10045": 977387264.0, "10050": 985182208.0, "10055": 989743168.0, "10060": 958221312.0, "10065": 946718912.0, "10070": 966760256.0, "10075": 978687872.0, "10080": 971265408.0, "10085": 974315904.0, "10090": 943627520.0, "10095": 962400704.0, "10100": 971718016.0, "10105": 975678912.0, "10110": 971575744.0, "10115": 948170944.0, "10120": 962054528.0, "10125": 973397376.0, "10130": 979950592.0, "10135": 971671360.0, "10140": 957388224.0, "10145": 933799744.0, "10150": 973008320.0, "10155": 969396992.0, "10160": 961593344.0, "10165": 974493440.0, "10170": 943961728.0, "10175": 978597760.0, "10180": 983307584.0, "10185": 978366912.0, "10190": 954844288.0, "10195": 936670272.0, "10200": 987159936.0, "10205": 972259328.0, "10210": 966114368.0, "10215": 975225536.0, "10220": 947989376.0, "10225": 949748160.0, "10230": 975372288.0, "10235": 953414464.0, "10240": 969235520.0, "10245": 961465856.0, "10250": 935823168.0, "10255": 978915072.0, "10260": 964238976.0, "10265": 967060480.0, "10270": 968363968.0, "10275": 935611392.0, "10280": 969193728.0, "10285": 995604480.0, "10290": 979061824.0, "10295": 980998592.0, "10300": 951447680.0, "10305": 971312128.0, "10310": 959547520.0, "10315": 970966464.0, "10320": 984654272.0, "10325": 982749888.0, "10330": 934349504.0, "10335": 975888128.0, "10340": 957223552.0, "10345": 972810496.0, "10350": 984142400.0, "10355": 941619200.0, "10360": 961565568.0, "10365": 973863168.0, "10370": 980199040.0, "10375": 969594112.0, "10380": 961182976.0, "10385": 954925568.0, "10390": 989898624.0, "10395": 964267008.0, "10400": 960273792.0, "10405": 949623808.0, "10410": 954532352.0, "10415": 975162752.0, "10420": 966792320.0, "10425": 969199040.0, "10430": 964069888.0, "10435": 962545600.0, "10440": 971631232.0, "10445": 971803264.0, "10450": 974728320.0, "10455": 965557568.0, "10460": 947965184.0, "10465": 971264256.0, "10470": 972031744.0, "10475": 978801920.0, "10480": 996814080.0, "10485": 949004864.0, "10490": 934195712.0, "10495": 968703104.0, "10500": 977977024.0, "10505": 958765632.0, "10510": 950726272.0, "10515": 952908672.0, "10520": 971411712.0, "10525": 969315392.0, "10530": 969847616.0, "10535": 986083712.0, "10540": 946276672.0, "10545": 970004992.0, "10550": 968855808.0, "10555": 958968832.0, "10560": 975462784.0, "10565": 960871296.0, "10570": 968492800.0, "10575": 972301824.0, "10580": 960305024.0, "10585": 973136512.0, "10590": 951290880.0, "10595": 955962880.0, "10600": 967012608.0, "10605": 986195584.0, "10610": 966024704.0, "10615": 976146752.0, "10620": 940478208.0, "10625": 964642688.0, "10630": 967259968.0, "10635": 972449344.0, "10640": 974099520.0, "10645": 948127616.0, "10650": 965343744.0, "10655": 985206080.0, "10660": 976079808.0, "10665": 966646272.0, "10670": 954464384.0, "10675": 933841408.0, "10680": 985773312.0, "10685": 990660352.0, "10690": 963321088.0, "10695": 971319552.0, "10700": 949404032.0, "10705": 977602048.0, "10710": 967788736.0, "10715": 966799360.0, "10720": 965854720.0, "10725": 943986688.0, "10730": 979519616.0, "10735": 960466432.0, "10740": 970951296.0, "10745": 983647296.0, "10750": 981209600.0, "10755": 944527488.0, "10760": 969304832.0, "10765": 972296704.0, "10770": 973187648.0, "10775": 958036224.0, "10780": 948867584.0, "10785": 953068288.0, "10790": 969457600.0, "10795": 959855808.0, "10800": 971608832.0, "10805": 950772608.0, "10810": 973437376.0, "10815": 959226496.0, "10820": 970724992.0, "10825": 966829952.0, "10830": 956401600.0, "10835": 962487232.0, "10840": 970224704.0, "10845": 963579072.0, "10850": 957315072.0, "10855": 967337984.0, "10860": 950124608.0, "10865": 963582144.0, "10870": 982582592.0, "10875": 981586560.0, "10880": 957947264.0, "10885": 954281984.0, "10890": 972287616.0, "10895": 972880128.0, "10900": 970042432.0, "10905": 964422336.0, "10910": 938176704.0, "10915": 960308288.0, "10920": 982695680.0, "10925": 969317312.0, "10930": 967665664.0, "10935": 962479040.0, "10940": 953537408.0, "10945": 964545088.0, "10950": 971975296.0, "10955": 966140864.0, "10960": 971424896.0, "10965": 966086784.0, "10970": 983081472.0, "10975": 965366976.0, "10980": 974114944.0, "10985": 986277440.0, "10990": 950453952.0, "10995": 962922752.0, "11000": 984953856.0, "11005": 977749888.0, "11010": 970815488.0, "11015": 969346112.0, "11020": 947445632.0, "11025": 959428864.0, "11030": 977394304.0, "11035": 975477120.0, "11040": 985859200.0, "11045": 956130880.0, "11050": 972905280.0, "11055": 974138432.0, "11060": 961819520.0, "11065": 984976640.0, "11070": 949372416.0, "11075": 975928256.0, "11080": 971540160.0, "11085": 966603648.0, "11090": 976152896.0, "11095": 946217024.0, "11100": 965448576.0, "11105": 973316288.0, "11110": 980276864.0, "11115": 967505920.0, "11120": 956731648.0, "11125": 956339456.0, "11130": 974975616.0, "11135": 978936448.0, "11140": 964281216.0, "11145": 965823872.0, "11150": 935543488.0, "11155": 975312768.0, "11160": 983311040.0, "11165": 981528192.0, "11170": 977155584.0, "11175": 957494208.0, "11180": 961889728.0, "11185": 971511744.0, "11190": 979483072.0, "11195": 984803200.0, "11200": 982398720.0, "11205": 941640448.0, "11210": 984274944.0, "11215": 967065088.0, "11220": 982764544.0, "11225": 961334976.0, "11230": 952660352.0, "11235": 981251520.0, "11240": 977069056.0, "11245": 965755008.0, "11250": 968827520.0, "11255": 960019776.0, "11260": 979200000.0, "11265": 963361728.0, "11270": 980995968.0, "11275": 967899456.0, "11280": 955253376.0, "11285": 953011328.0, "11290": 955751744.0, "11295": 967620992.0, "11300": 961972736.0, "11305": 958059520.0, "11310": 945726016.0, "11315": 982324352.0, "11320": 963968704.0, "11325": 980413696.0, "11330": 974899968.0, "11335": 951559360.0, "11340": 969847168.0, "11345": 969238400.0, "11350": 981063552.0, "11355": 981524608.0, "11360": 940624448.0, "11365": 970136192.0, "11370": 978117760.0, "11375": 974649728.0, "11380": 967778432.0, "11385": 958096256.0, "11390": 937715072.0, "11395": 976315008.0, "11400": 973084416.0, "11405": 960831296.0, "11410": 965811968.0, "11415": 928796608.0, "11420": 963971136.0, "11425": 980680128.0, "11430": 978157504.0, "11435": 969637824.0, "11440": 944715776.0, "11445": 974552640.0, "11450": 983961600.0, "11455": 970947136.0, "11460": 964506752.0, "11465": 959389312.0, "11470": 954502528.0, "11475": 972359360.0, "11480": 956161408.0, "11485": 976658560.0, "11490": 986111168.0, "11495": 958519488.0, "11500": 969473792.0, "11505": 963606976.0, "11510": 976137792.0, "11515": 977707648.0, "11520": 953128192.0, "11525": 975596800.0, "11530": 976293120.0, "11535": 979613760.0, "11540": 974030464.0, "11545": 953114560.0, "11550": 952619584.0, "11555": 981548672.0, "11560": 984335232.0, "11565": 964873088.0, "11570": 965847488.0, "11575": 951204096.0, "11580": 975743488.0, "11585": 976616768.0, "11590": 968823168.0, "11595": 976264256.0, "11600": 945875200.0, "11605": 972757056.0, "11610": 981991616.0, "11615": 971641088.0, "11620": 968909184.0, "11625": 949000448.0, "11630": 937161152.0, "11635": 973286784.0, "11640": 980719104.0, "11645": 979713280.0, "11650": 970849024.0, "11655": 955638784.0, "11660": 980392960.0, "11665": 958056896.0, "11670": 982212864.0, "11675": 972230912.0, "11680": 955929344.0, "11685": 982193536.0, "11690": 968270464.0, "11695": 967632064.0, "11700": 973535680.0, "11705": 956015232.0, "11710": 964252992.0, "11715": 983036928.0, "11720": 983451072.0, "11725": 965093248.0, "11730": 955153984.0, "11735": 942437696.0, "11740": 973423936.0, "11745": 970891520.0, "11750": 961268224.0, "11755": 962944768.0, "11760": 949726912.0, "11765": 983187968.0, "11770": 983938496.0, "11775": 975204928.0, "11780": 984938624.0, "11785": 946781312.0, "11790": 972112384.0, "11795": 970007168.0, "11800": 972686784.0, "11805": 986386560.0, "11810": 967320640.0, "11815": 955383424.0, "11820": 973113728.0, "11825": 970450048.0, "11830": 974525632.0, "11835": 961396480.0, "11840": 944110464.0, "11845": 980448128.0, "11850": 974183232.0, "11855": 977463936.0, "11860": 971427904.0, "11865": 938336000.0, "11870": 939904768.0, "11875": 989052864.0, "11880": 971838400.0, "11885": 962717504.0, "11890": 969937280.0, "11895": 964864512.0, "11900": 978962688.0, "11905": 961394816.0, "11910": 983113472.0, "11915": 989501760.0, "11920": 944339840.0, "11925": 993843712.0, "11930": 964116928.0, "11935": 963072256.0, "11940": 976523008.0, "11945": 944381376.0, "11950": 977258368.0, "11955": 978576384.0, "11960": 971882752.0, "11965": 975708800.0, "11970": 962765568.0, "11975": 962834752.0, "11980": 977351808.0, "11985": 952718208.0, "11990": 968262720.0, "11995": 964812928.0, "12000": 958072704.0, "12005": 974252160.0, "12010": 979084352.0, "12015": 971941952.0, "12020": 972665920.0, "12025": 934382464.0, "12030": 968780288.0, "12035": 983867712.0, "12040": 977260608.0, "12045": 981523328.0, "12050": 931053376.0, "12055": 938395136.0, "12060": 974223808.0, "12065": 965353216.0, "12070": 967882688.0, "12075": 949509760.0, "12080": 952985408.0, "12085": 972292800.0, "12090": 963722816.0, "12095": 963491200.0, "12100": 976566208.0, "12105": 950003584.0, "12110": 971456192.0, "12115": 967868608.0, "12120": 986162304.0, "12125": 980404288.0, "12130": 941380480.0, "12135": 955380864.0, "12140": 975707968.0, "12145": 979354624.0, "12150": 979350592.0, "12155": 961571456.0, "12160": 946311168.0, "12165": 968243904.0, "12170": 964228416.0, "12175": 967336128.0, "12180": 974684480.0, "12185": 952824256.0, "12190": 988261120.0, "12195": 970531328.0, "12200": 964612992.0, "12205": 968632384.0, "12210": 939168256.0, "12215": 996811136.0, "12220": 969718208.0, "12225": 979311040.0, "12230": 980050752.0, "12235": 949942528.0, "12240": 963404800.0, "12245": 965396608.0, "12250": 976155328.0, "12255": 967979456.0, "12260": 983519040.0, "12265": 931414400.0, "12270": 966382592.0, "12275": 979565824.0, "12280": 977335296.0, "12285": 970173376.0, "12290": 929206400.0, "12295": 976676416.0, "12300": 985731648.0, "12305": 969487616.0, "12310": 985834624.0, "12315": 936090944.0, "12320": 957471872.0, "12325": 966228864.0, "12330": 967951104.0, "12335": 963468544.0, "12340": 957162816.0, "12345": 943953024.0, "12350": 966456128.0, "12355": 975290944.0, "12360": 978525888.0, "12365": 964167680.0, "12370": 948720000.0, "12375": 963458368.0, "12380": 964456768.0, "12385": 972525056.0, "12390": 961268480.0, "12395": 961224384.0, "12400": 974902976.0, "12405": 976068288.0, "12410": 953497664.0, "12415": 962808256.0, "12420": 944027648.0, "12425": 949199232.0, "12430": 972341760.0, "12435": 969139520.0, "12440": 961869440.0, "12445": 951986688.0, "12450": 947354304.0, "12455": 981089408.0, "12460": 973706432.0, "12465": 954352128.0, "12470": 980946944.0, "12475": 957722048.0, "12480": 966555776.0, "12485": 978058624.0, "12490": 973910528.0, "12495": 969262528.0, "12500": 961241216.0, "12505": 943067520.0, "12510": 960570944.0, "12515": 969099456.0, "12520": 973592256.0, "12525": 971836928.0, "12530": 944239168.0, "12535": 976315712.0, "12540": 965499648.0, "12545": 971751488.0, "12550": 969426624.0, "12555": 940636736.0, "12560": 964014336.0, "12565": 947128640.0, "12570": 973903104.0, "12575": 962710400.0, "12580": 957710784.0, "12585": 963968320.0, "12590": 965309632.0, "12595": 978474048.0, "12600": 981750336.0, "12605": 949065280.0, "12610": 937499136.0, "12615": 962324416.0, "12620": 960734528.0, "12625": 966077376.0, "12630": 970487296.0, "12635": 961963520.0, "12640": 978159744.0, "12645": 968667712.0, "12650": 969398464.0, "12655": 963797376.0, "12660": 932021504.0, "12665": 956654784.0, "12670": 985811200.0, "12675": 965185152.0, "12680": 960867520.0, "12685": 950922304.0, "12690": 945182976.0, "12695": 977858560.0, "12700": 984982912.0, "12705": 958107712.0, "12710": 968195968.0, "12715": 955984256.0, "12720": 976169280.0, "12725": 964923264.0, "12730": 968927744.0, "12735": 986408896.0, "12740": 937023552.0, "12745": 970419008.0, "12750": 973958016.0, "12755": 979621888.0, "12760": 969694656.0, "12765": 941532928.0, "12770": 951642816.0, "12775": 950722112.0, "12780": 968833728.0, "12785": 955702976.0, "12790": 962587008.0, "12795": 952279936.0, "12800": 962101568.0, "12805": 972404928.0, "12810": 973263872.0, "12815": 952244800.0, "12820": 940870080.0, "12825": 966388864.0, "12830": 998463168.0, "12835": 976438208.0, "12840": 962187520.0, "12845": 941331264.0, "12850": 958583552.0, "12855": 960955968.0, "12860": 972040640.0, "12865": 977648448.0, "12870": 974128960.0, "12875": 955544192.0, "12880": 967355072.0, "12885": 980885376.0, "12890": 959517504.0, "12895": 969369728.0, "12900": 937557056.0, "12905": 968000128.0, "12910": 981310912.0, "12915": 974928576.0, "12920": 955526464.0, "12925": 948798272.0, "12930": 960179136.0, "12935": 990793984.0, "12940": 967159232.0, "12945": 975369216.0, "12950": 971162944.0, "12955": 955488000.0, "12960": 977097600.0, "12965": 960551936.0, "12970": 961759488.0, "12975": 959663424.0, "12980": 937497728.0, "12985": 962920448.0, "12990": 967698880.0, "12995": 975229184.0, "13000": 979703232.0, "13005": 953898432.0, "13010": 950777088.0, "13015": 964294080.0, "13020": 960197248.0, "13025": 979467520.0, "13030": 971598528.0, "13035": 954749824.0, "13040": 953719872.0, "13045": 972230400.0, "13050": 967373696.0, "13055": 967416640.0, "13060": 950481600.0, "13065": 970005376.0, "13070": 974908032.0, "13075": 961214208.0, "13080": 960842880.0, "13085": 944157056.0, "13090": 973418304.0, "13095": 980451328.0, "13100": 964953344.0, "13105": 973640128.0, "13110": 924433024.0, "13115": 973448000.0, "13120": 980824768.0, "13125": 974996416.0, "13130": 951176320.0, "13135": 944599040.0, "13140": 925485760.0, "13145": 989725760.0, "13150": 983139840.0, "13155": 975940736.0, "13160": 969554944.0, "13165": 952590592.0, "13170": 976828288.0, "13175": 982089600.0, "13180": 976543744.0, "13185": 974724672.0, "13190": 949058880.0, "13195": 964049344.0, "13200": 969241856.0, "13205": 982678400.0, "13210": 971447424.0, "13215": 947571072.0, "13220": 962201856.0, "13225": 963504192.0, "13230": 975714752.0, "13235": 977022976.0, "13240": 965512320.0, "13245": 936032064.0, "13250": 977655104.0, "13255": 960188416.0, "13260": 966243264.0, "13265": 972833344.0, "13270": 954500544.0, "13275": 959167424.0, "13280": 976592960.0, "13285": 974515200.0, "13290": 966372352.0, "13295": 941926912.0, "13300": 968674752.0, "13305": 981813248.0, "13310": 971385088.0, "13315": 967313600.0, "13320": 976014720.0, "13325": 943276608.0, "13330": 972109440.0, "13335": 979721216.0, "13340": 959887168.0, "13345": 978087808.0, "13350": 954515968.0, "13355": 968495744.0, "13360": 982135168.0, "13365": 956963136.0, "13370": 966373824.0, "13375": 937254208.0, "13380": 952834880.0, "13385": 980008448.0, "13390": 978545728.0, "13395": 982182464.0, "13400": 951233664.0, "13405": 933923264.0, "13410": 975654080.0, "13415": 972747456.0, "13420": 984694528.0, "13425": 965735744.0, "13430": 953787072.0, "13435": 980714880.0, "13440": 966228224.0, "13445": 966187776.0, "13450": 969482240.0, "13455": 951441408.0, "13460": 964325696.0, "13465": 974087424.0, "13470": 971453056.0, "13475": 969315200.0, "13480": 966304960.0, "13485": 966501952.0, "13490": 977760896.0, "13495": 959966720.0, "13500": 971014848.0, "13505": 965163712.0, "13510": 962915328.0, "13515": 990128960.0, "13520": 958450816.0, "13525": 976584832.0, "13530": 983325568.0, "13535": 931111616.0, "13540": 946251712.0, "13545": 972117312.0, "13550": 970237824.0, "13555": 980911104.0, "13560": 959124480.0, "13565": 963886656.0, "13570": 972271680.0, "13575": 976835264.0, "13580": 975807168.0, "13585": 963320832.0, "13590": 939925248.0, "13595": 983274496.0, "13600": 989970688.0, "13605": 968984384.0, "13610": 973681344.0, "13615": 945095040.0, "13620": 972402688.0, "13625": 971239488.0, "13630": 958592448.0, "13635": 985120704.0, "13640": 980443136.0, "13645": 960721536.0, "13650": 953083008.0, "13655": 965253504.0, "13660": 977916736.0, "13665": 969815936.0, "13670": 956231104.0, "13675": 977515072.0, "13680": 976291328.0, "13685": 967763264.0, "13690": 971396160.0, "13695": 944083776.0, "13700": 974076352.0, "13705": 973049920.0, "13710": 975860864.0, "13715": 950428928.0, "13720": 951397376.0, "13725": 972326080.0, "13730": 968925632.0, "13735": 961582208.0, "13740": 972785152.0, "13745": 973802752.0, "13750": 958008704.0, "13755": 977221440.0, "13760": 964591808.0, "13765": 967657152.0, "13770": 978897600.0, "13775": 928788224.0, "13780": 973602176.0, "13785": 981607104.0, "13790": 963469120.0, "13795": 979592704.0, "13800": 955802496.0, "13805": 967515648.0, "13810": 963702336.0, "13815": 981084544.0, "13820": 975819520.0, "13825": 957889472.0, "13830": 945460288.0, "13835": 974441600.0, "13840": 996340224.0, "13845": 967583424.0, "13850": 974764672.0, "13855": 933372864.0, "13860": 971353856.0, "13865": 976535040.0, "13870": 986824128.0, "13875": 978719680.0, "13880": 951090112.0, "13885": 975569792.0, "13890": 967982720.0, "13895": 944447424.0, "13900": 978880640.0, "13905": 965856000.0, "13910": 964752320.0, "13915": 976913152.0, "13920": 975158912.0, "13925": 980442688.0, "13930": 962718528.0, "13935": 956230336.0, "13940": 965168960.0, "13945": 964977088.0, "13950": 977233216.0, "13955": 976779136.0, "13960": 974351744.0, "13965": 974745856.0, "13970": 972133824.0, "13975": 971368384.0, "13980": 983074496.0, "13985": 968687040.0, "13990": 962172800.0, "13995": 980224512.0, "14000": 971927104.0, "14005": 972939584.0, "14010": 965680256.0, "14015": 941755520.0, "14020": 980275008.0, "14025": 976786176.0, "14030": 971783680.0, "14035": 971182016.0, "14040": 936043904.0, "14045": 968322112.0, "14050": 974783488.0, "14055": 978909888.0, "14060": 976415872.0, "14065": 928651328.0, "14070": 978142016.0, "14075": 979456064.0, "14080": 964615168.0, "14085": 968926336.0, "14090": 965791168.0, "14095": 950508416.0, "14100": 972049280.0, "14105": 970851584.0, "14110": 987498560.0, "14115": 967732224.0, "14120": 944391936.0, "14125": 973124864.0, "14130": 970955136.0, "14135": 978615744.0, "14140": 972436160.0, "14145": 947254016.0, "14150": 974211648.0, "14155": 979536768.0, "14160": 972891456.0, "14165": 972754240.0, "14170": 947001344.0, "14175": 949488704.0, "14180": 976564096.0, "14185": 971828544.0, "14190": 979707840.0, "14195": 973491648.0, "14200": 945004928.0, "14205": 980393280.0, "14210": 981165440.0, "14215": 970032704.0, "14220": 965303936.0, "14225": 956374720.0, "14230": 983989184.0, "14235": 984008192.0, "14240": 980500288.0, "14245": 969505728.0, "14250": 953962944.0, "14255": 941277696.0, "14260": 963554880.0, "14265": 988972160.0, "14270": 987060736.0, "14275": 969009856.0, "14280": 954787392.0, "14285": 962406080.0, "14290": 966104448.0, "14295": 966147456.0, "14300": 983130496.0, "14305": 936872768.0, "14310": 969647488.0, "14315": 977202688.0, "14320": 975875328.0, "14325": 980343744.0, "14330": 957007296.0, "14335": 962928128.0, "14340": 956029568.0, "14345": 968216512.0, "14350": 963384256.0, "14355": 946375552.0, "14360": 949630848.0, "14365": 972326912.0, "14370": 953610112.0, "14375": 956098880.0, "14380": 957511168.0, "14385": 951425792.0, "14390": 982245696.0, "14395": 970915968.0, "14400": 963241472.0, "14405": 969487744.0, "14410": 965317696.0, "14415": 963697536.0, "14420": 960403712.0, "14425": 976605888.0, "14430": 979001792.0, "14435": 970516224.0, "14440": 973549056.0, "14445": 980175296.0, "14450": 973135104.0, "14455": 960706560.0, "14460": 955437824.0, "14465": 934319040.0, "14470": 959987072.0, "14475": 965804160.0, "14480": 966944896.0, "14485": 975378752.0, "14490": 946612608.0, "14495": 959352512.0, "14500": 970735424.0, "14505": 966115584.0, "14510": 977269696.0, "14515": 942285824.0, "14520": 953120448.0, "14525": 971309952.0, "14530": 956982016.0, "14535": 978651648.0, "14540": 954128064.0, "14545": 940059904.0, "14550": 988161088.0, "14555": 971624704.0, "14560": 973371136.0, "14565": 969026368.0, "14570": 948155264.0, "14575": 966134400.0, "14580": 965988288.0, "14585": 976619968.0, "14590": 974982272.0, "14595": 945465280.0, "14600": 958231104.0, "14605": 968240384.0, "14610": 953574976.0, "14615": 967515648.0, "14620": 952799872.0, "14625": 963045632.0, "14630": 971055936.0, "14635": 976214656.0, "14640": 977982848.0, "14645": 974157568.0, "14650": 946738304.0, "14655": 976962048.0, "14660": 975089472.0, "14665": 953730304.0, "14670": 970328320.0, "14675": 952209984.0, "14680": 970198336.0, "14685": 971545920.0, "14690": 967100352.0, "14695": 949219008.0, "14700": 934553280.0, "14705": 958961600.0, "14710": 978193536.0, "14715": 981824000.0, "14720": 967142144.0, "14725": 962358656.0, "14730": 937923840.0, "14735": 954617344.0, "14740": 967426176.0, "14745": 987179776.0, "14750": 973732864.0, "14755": 944016320.0, "14760": 973690176.0, "14765": 970028736.0, "14770": 988910080.0, "14775": 982104128.0, "14780": 945153856.0, "14785": 957903168.0, "14790": 971183488.0, "14795": 974832128.0, "14800": 969137856.0, "14805": 949764608.0, "14810": 951574848.0, "14815": 966449216.0, "14820": 968120192.0, "14825": 963948544.0, "14830": 956540992.0, "14835": 945972992.0, "14840": 967388864.0, "14845": 980384448.0, "14850": 952957376.0, "14855": 960725696.0, "14860": 967093248.0, "14865": 977143424.0, "14870": 966909184.0, "14875": 979765312.0, "14880": 957056768.0, "14885": 962081408.0, "14890": 950016512.0, "14895": 976252160.0, "14900": 971564928.0, "14905": 980114368.0, "14910": 972196224.0, "14915": 945667456.0, "14920": 952525376.0, "14925": 977190912.0, "14930": 975869632.0, "14935": 973605632.0, "14940": 939766208.0, "14945": 975415936.0, "14950": 977170560.0, "14955": 983219008.0, "14960": 955386688.0, "14965": 956326272.0, "14970": 943115328.0, "14975": 960380800.0, "14980": 977240640.0, "14985": 984438720.0, "14990": 966773376.0, "14995": 952456960.0, "15000": 976750464.0, "15005": 965213504.0, "15010": 966983488.0, "15015": 971528960.0, "15020": 944374016.0, "15025": 974053056.0, "15030": 970421952.0, "15035": 969943488.0, "15040": 971530048.0, "15045": 949742976.0, "15050": 948426816.0, "15055": 968058176.0, "15060": 970698304.0, "15065": 972522304.0, "15070": 953825856.0, "15075": 948972608.0, "15080": 970426048.0, "15085": 973400768.0, "15090": 971676736.0, "15095": 949957504.0, "15100": 958023168.0, "15105": 987363264.0, "15110": 980801216.0, "15115": 957061696.0, "15120": 959650176.0, "15125": 962171328.0, "15130": 968198464.0, "15135": 982411456.0, "15140": 956186624.0, "15145": 976349376.0, "15150": 937987264.0, "15155": 938433728.0, "15160": 967984640.0, "15165": 975071744.0, "15170": 959477184.0, "15175": 974211072.0, "15180": 945354560.0, "15185": 964731392.0, "15190": 963139328.0, "15195": 971020608.0, "15200": 985433408.0, "15205": 941664192.0, "15210": 963585216.0, "15215": 965254976.0, "15220": 983227648.0, "15225": 966241600.0, "15230": 949497408.0, "15235": 933472832.0, "15240": 979892288.0, "15245": 958078464.0, "15250": 964588992.0, "15255": 972068864.0, "15260": 955060992.0, "15265": 981602368.0, "15270": 958442176.0, "15275": 973591616.0, "15280": 967866304.0, "15285": 942660288.0, "15290": 980382784.0, "15295": 967821504.0, "15300": 956589760.0, "15305": 960945664.0, "15310": 932035840.0, "15315": 939510592.0, "15320": 969085184.0, "15325": 970151744.0, "15330": 955275904.0, "15335": 967147456.0, "15340": 951607360.0, "15345": 987656768.0, "15350": 973188416.0, "15355": 972321216.0, "15360": 968389632.0, "15365": 940923392.0, "15370": 955695744.0, "15375": 972178688.0, "15380": 972283712.0, "15385": 977233600.0, "15390": 946424640.0, "15395": 950289472.0, "15400": 961718016.0, "15405": 973353920.0, "15410": 964252608.0, "15415": 963907904.0, "15420": 961375808.0, "15425": 969715456.0, "15430": 973366016.0, "15435": 959127808.0, "15440": 956364096.0, "15445": 951590528.0, "15450": 979352128.0, "15455": 955560384.0, "15460": 971190464.0, "15465": 980107136.0, "15470": 966569664.0, "15475": 958760384.0, "15480": 968058496.0, "15485": 956516160.0, "15490": 956025024.0, "15495": 954504256.0, "15500": 968445888.0, "15505": 967793408.0, "15510": 966647040.0, "15515": 972105792.0, "15520": 963939520.0, "15525": 928937792.0, "15530": 977222528.0, "15535": 976528000.0, "15540": 974663040.0, "15545": 967684928.0, "15550": 950454464.0, "15555": 963493440.0, "15560": 983137024.0, "15565": 962315648.0, "15570": 968996480.0, "15575": 954298816.0, "15580": 959434560.0, "15585": 977216768.0, "15590": 986885312.0, "15595": 974339712.0, "15600": 951578624.0, "15605": 943864512.0, "15610": 973420608.0, "15615": 970459968.0, "15620": 953418816.0, "15625": 979734400.0, "15630": 950104128.0, "15635": 976829248.0, "15640": 962961216.0, "15645": 961889856.0, "15650": 983897152.0, "15655": 939254592.0, "15660": 979574656.0, "15665": 965632896.0, "15670": 971474688.0, "15675": 978754048.0, "15680": 952217024.0, "15685": 945702144.0, "15690": 967530368.0, "15695": 965292928.0, "15700": 975089920.0, "15705": 967425792.0, "15710": 949533184.0, "15715": 979249280.0, "15720": 960184832.0, "15725": 971136576.0, "15730": 959791680.0, "15735": 943103296.0, "15740": 985987840.0, "15745": 978262784.0, "15750": 981680192.0, "15755": 940996608.0, "15760": 958134528.0, "15765": 955399616.0, "15770": 979668480.0, "15775": 964120832.0, "15780": 952299584.0, "15785": 962315456.0, "15790": 934807552.0, "15795": 968351040.0, "15800": 974225600.0, "15805": 971443456.0, "15810": 974878784.0, "15815": 939401792.0, "15820": 973708544.0, "15825": 987662464.0, "15830": 968102528.0, "15835": 981008000.0, "15840": 932340160.0, "15845": 969856320.0, "15850": 964212992.0, "15855": 984859968.0, "15860": 957642112.0, "15865": 963525440.0, "15870": 941039616.0, "15875": 972174656.0, "15880": 964860736.0, "15885": 981999872.0, "15890": 968593664.0, "15895": 955724992.0, "15900": 992000320.0, "15905": 966076800.0, "15910": 980157632.0, "15915": 972223872.0, "15920": 948734592.0, "15925": 968248960.0, "15930": 983507008.0, "15935": 968378688.0, "15940": 971272448.0, "15945": 981030208.0, "15950": 964200960.0, "15955": 975079104.0, "15960": 974101120.0, "15965": 971186496.0, "15970": 968698560.0, "15975": 954449344.0, "15980": 975681472.0, "15985": 975737088.0, "15990": 985281728.0, "15995": 974069056.0, "16000": 969151168.0, "16005": 961120512.0, "16010": 973158976.0, "16015": 980798464.0, "16020": 964859328.0, "16025": 960951104.0, "16030": 951468032.0, "16035": 984349632.0, "16040": 960691136.0, "16045": 971010944.0, "16050": 955523328.0, "16055": 965959040.0, "16060": 965124288.0, "16065": 972210176.0, "16070": 968157504.0, "16075": 978405120.0, "16080": 950573824.0, "16085": 970577728.0, "16090": 973297024.0, "16095": 967390400.0, "16100": 981402624.0, "16105": 929935680.0, "16110": 962029056.0, "16115": 967831872.0, "16120": 970988992.0, "16125": 975849472.0, "16130": 960872000.0, "16135": 950590144.0, "16140": 977334656.0, "16145": 977272320.0, "16150": 982258304.0, "16155": 977334336.0, "16160": 942257664.0, "16165": 970393984.0, "16170": 963181568.0, "16175": 978524864.0, "16180": 976689856.0, "16185": 953276032.0, "16190": 951627328.0, "16195": 978041920.0, "16200": 975478592.0, "16205": 946734144.0, "16210": 962176512.0, "16215": 950853888.0, "16220": 978611072.0, "16225": 968074624.0, "16230": 972135296.0, "16235": 973324288.0, "16240": 940704512.0, "16245": 974834816.0, "16250": 979471744.0, "16255": 989950016.0, "16260": 969760320.0, "16265": 955025344.0, "16270": 963900800.0, "16275": 969059712.0, "16280": 991922688.0, "16285": 976522112.0, "16290": 940554624.0, "16295": 944204608.0, "16300": 957758784.0, "16305": 980640384.0, "16310": 975478272.0, "16315": 972431488.0, "16320": 942165376.0, "16325": 975046592.0, "16330": 978453440.0, "16335": 975302912.0, "16340": 988102656.0, "16345": 947267456.0, "16350": 962271808.0, "16355": 967581312.0, "16360": 957758720.0, "16365": 971989248.0, "16370": 934050880.0, "16375": 962619136.0, "16380": 984477632.0, "16385": 974755584.0, "16390": 976849280.0, "16395": 962863680.0, "16400": 951474624.0, "16405": 962016704.0, "16410": 963916800.0, "16415": 979870464.0, "16420": 967719808.0, "16425": 955966528.0, "16430": 959425600.0, "16435": 971813696.0, "16440": 970480896.0, "16445": 969177472.0, "16450": 946274368.0, "16455": 933991040.0, "16460": 981236608.0, "16465": 964644416.0, "16470": 979236416.0, "16475": 950114432.0, "16480": 962455616.0, "16485": 971302272.0, "16490": 966124672.0, "16495": 983875904.0, "16500": 990465920.0, "16505": 957192576.0, "16510": 967501568.0, "16515": 954374912.0, "16520": 971800832.0, "16525": 977345344.0, "16530": 931414528.0, "16535": 974123584.0, "16540": 958128192.0, "16545": 971239232.0, "16550": 974439616.0, "16555": 951281792.0, "16560": 959295040.0, "16565": 965031936.0, "16570": 970879936.0, "16575": 975087616.0, "16580": 971678976.0, "16585": 946555392.0, "16590": 964324352.0, "16595": 974931648.0, "16600": 980644096.0, "16605": 972567616.0, "16610": 946421056.0, "16615": 985669056.0, "16620": 959552512.0, "16625": 978448256.0, "16630": 973505024.0, "16635": 956700928.0, "16640": 967513664.0, "16645": 969828800.0, "16650": 967624064.0, "16655": 965204608.0, "16660": 975425280.0, "16665": 951585152.0, "16670": 972444928.0, "16675": 966143168.0, "16680": 960843776.0, "16685": 969005312.0, "16690": 945775424.0, "16695": 968810048.0, "16700": 970574656.0, "16705": 963091200.0, "16710": 972351552.0, "16715": 946819456.0, "16720": 964067008.0, "16725": 966715648.0, "16730": 974711232.0, "16735": 988851968.0, "16740": 952151104.0, "16745": 950031936.0, "16750": 949730496.0, "16755": 965023744.0, "16760": 968773760.0, "16765": 967524224.0, "16770": 925814208.0, "16775": 972535360.0, "16780": 972477824.0, "16785": 957136128.0, "16790": 960587008.0, "16795": 942249792.0, "16800": 950610304.0, "16805": 965645952.0, "16810": 960175744.0, "16815": 964480000.0, "16820": 952758848.0, "16825": 965794176.0, "16830": 972518272.0, "16835": 976318080.0, "16840": 965332544.0, "16845": 972782144.0, "16850": 962620416.0, "16855": 984319872.0, "16860": 975848128.0, "16865": 960584640.0, "16870": 953006848.0, "16875": 960585408.0, "16880": 976187712.0, "16885": 975394816.0, "16890": 965457216.0, "16895": 966029184.0, "16900": 951789376.0, "16905": 960952896.0, "16910": 965377280.0, "16915": 973685760.0, "16920": 957463616.0, "16925": 977268032.0, "16930": 940132608.0, "16935": 977186176.0, "16940": 968969536.0, "16945": 977965760.0, "16950": 961944128.0, "16955": 945200320.0, "16960": 970449472.0, "16965": 971535296.0, "16970": 987469824.0, "16975": 975159744.0, "16980": 947304576.0, "16985": 960216320.0, "16990": 973285312.0, "16995": 960721024.0, "17000": 958674944.0, "17005": 966290880.0, "17010": 938631232.0, "17015": 979266176.0, "17020": 964890752.0, "17025": 968083008.0, "17030": 971477696.0, "17035": 951384256.0, "17040": 965247168.0, "17045": 973416128.0, "17050": 974835648.0, "17055": 976478080.0, "17060": 943675648.0, "17065": 978238720.0, "17070": 979175040.0, "17075": 971536640.0, "17080": 971479936.0, "17085": 956413504.0, "17090": 963477184.0, "17095": 982981440.0, "17100": 960566912.0, "17105": 961045952.0, "17110": 962275328.0, "17115": 940567232.0, "17120": 970499328.0, "17125": 968586880.0, "17130": 962728896.0, "17135": 969383232.0, "17140": 951969920.0, "17145": 970137088.0, "17150": 962521856.0, "17155": 967029248.0, "17160": 976224576.0, "17165": 963888960.0, "17170": 963599424.0, "17175": 977825920.0, "17180": 958700288.0, "17185": 983574784.0, "17190": 974214336.0, "17195": 937181952.0, "17200": 962733504.0, "17205": 971098176.0, "17210": 983018112.0, "17215": 979575488.0, "17220": 937573184.0, "17225": 961336256.0, "17230": 949872960.0, "17235": 969889600.0, "17240": 982156608.0, "17245": 952048768.0, "17250": 967297344.0, "17255": 977384640.0, "17260": 962520768.0, "17265": 965126528.0, "17270": 956305152.0, "17275": 945002368.0, "17280": 975595904.0, "17285": 969055488.0, "17290": 954190208.0, "17295": 957509056.0, "17300": 935955712.0, "17305": 979091456.0, "17310": 964940928.0, "17315": 971487616.0, "17320": 956830656.0, "17325": 944863680.0, "17330": 954960576.0, "17335": 965849792.0, "17340": 966014080.0, "17345": 971262016.0, "17350": 952803200.0, "17355": 949154816.0, "17360": 981033216.0, "17365": 963234240.0, "17370": 967729664.0, "17375": 961825664.0, "17380": 944740096.0, "17385": 967906496.0, "17390": 966752256.0, "17395": 968894592.0, "17400": 977347712.0, "17405": 946321408.0, "17410": 962778688.0, "17415": 979902400.0, "17420": 963552064.0, "17425": 967058176.0, "17430": 952743296.0, "17435": 951803008.0, "17440": 963911232.0, "17445": 967624448.0, "17450": 962980672.0, "17455": 984400512.0, "17460": 943970496.0, "17465": 968863744.0, "17470": 975356736.0, "17475": 958503488.0, "17480": 974195776.0, "17485": 949897408.0, "17490": 970808640.0, "17495": 969658304.0, "17500": 962926144.0, "17505": 963398720.0, "17510": 953963840.0, "17515": 960359872.0, "17520": 982473152.0, "17525": 969919232.0, "17530": 965983488.0, "17535": 961008960.0, "17540": 952894336.0, "17545": 971945280.0, "17550": 987134144.0, "17555": 974245888.0, "17560": 956139520.0, "17565": 945424576.0, "17570": 965952064.0, "17575": 987831488.0, "17580": 977748352.0, "17585": 959787328.0, "17590": 941739712.0, "17595": 943312768.0, "17600": 969806720.0, "17605": 968430016.0, "17610": 977378560.0, "17615": 954462848.0, "17620": 956321984.0, "17625": 983697920.0, "17630": 967992128.0, "17635": 981031168.0, "17640": 973707776.0, "17645": 946032896.0, "17650": 966421376.0, "17655": 975455680.0, "17660": 966102528.0, "17665": 971269184.0, "17670": 958952192.0, "17675": 950746880.0, "17680": 957506560.0, "17685": 964218880.0, "17690": 981149824.0, "17695": 953432000.0, "17700": 948616128.0, "17705": 969380224.0, "17710": 977671424.0, "17715": 974387328.0, "17720": 960337984.0, "17725": 933913344.0, "17730": 960759168.0, "17735": 974910336.0, "17740": 972217856.0, "17745": 964443264.0, "17750": 955689920.0, "17755": 973385408.0, "17760": 977832448.0, "17765": 978992832.0, "17770": 959528576.0, "17775": 959307776.0, "17780": 954210624.0, "17785": 975046656.0, "17790": 975375744.0, "17795": 958541312.0, "17800": 950507904.0, "17805": 954511552.0, "17810": 979104576.0, "17815": 963822336.0, "17820": 973152192.0, "17825": 967202048.0, "17830": 935175616.0, "17835": 961408896.0, "17840": 974129088.0, "17845": 987851072.0, "17850": 984649280.0, "17855": 940721472.0, "17860": 963281408.0, "17865": 971044992.0, "17870": 959156032.0, "17875": 973564864.0, "17880": 958237440.0, "17885": 943227328.0, "17890": 959929088.0, "17895": 975523968.0, "17900": 952393536.0, "17905": 983063680.0, "17910": 948454336.0, "17915": 962533312.0, "17920": 956820800.0, "17925": 959455040.0, "17930": 983220928.0, "17935": 962711360.0, "17940": 959719552.0, "17945": 963970624.0, "17950": 949894528.0, "17955": 959314496.0, "17960": 957162048.0, "17965": 963098368.0, "17970": 968922048.0, "17975": 966207488.0, "17980": 987009280.0, "17985": 966113920.0, "17990": 941577408.0, "17995": 974032256.0, "18000": 978073536.0, "18005": 976642176.0, "18010": 961255424.0, "18015": 959185344.0, "18020": 967601216.0, "18025": 982273280.0, "18030": 974095808.0, "18035": 958944448.0, "18040": 942498944.0, "18045": 959244352.0, "18050": 966636352.0, "18055": 966108160.0, "18060": 972072064.0, "18065": 961187200.0, "18070": 944150848.0, "18075": 971447616.0, "18080": 962930176.0, "18085": 966833024.0, "18090": 987185792.0, "18095": 947030784.0, "18100": 984370752.0, "18105": 970859840.0, "18110": 970113472.0, "18115": 981095680.0, "18120": 948414592.0, "18125": 968287104.0, "18130": 950900096.0, "18135": 971993280.0, "18140": 956186880.0, "18145": 956976640.0, "18150": 940347008.0, "18155": 976402048.0, "18160": 961373952.0, "18165": 973815936.0, "18170": 969656896.0, "18175": 958225984.0, "18180": 967638592.0, "18185": 971723136.0, "18190": 990922432.0, "18195": 973611776.0, "18200": 934103488.0, "18205": 961940416.0, "18210": 968111232.0, "18215": 972252352.0, "18220": 975653568.0, "18225": 958659904.0, "18230": 949362752.0, "18235": 979695168.0, "18240": 958032064.0, "18245": 960717056.0, "18250": 961691008.0, "18255": 936172480.0, "18260": 973614720.0, "18265": 958608896.0, "18270": 978307328.0, "18275": 979424000.0, "18280": 929348736.0, "18285": 953633792.0, "18290": 969671040.0, "18295": 979693376.0, "18300": 956805632.0, "18305": 942032192.0, "18310": 943722048.0, "18315": 959811520.0, "18320": 968570752.0, "18325": 970311104.0, "18330": 958847168.0, "18335": 945815104.0, "18340": 970687680.0, "18345": 969166528.0, "18350": 951464768.0, "18355": 974150208.0, "18360": 952318592.0, "18365": 980235264.0, "18370": 968288640.0, "18375": 972450688.0, "18380": 958135168.0, "18385": 954229696.0, "18390": 962400512.0, "18395": 967611264.0, "18400": 967552128.0, "18405": 955152256.0, "18410": 960513856.0, "18415": 965115648.0, "18420": 966193792.0, "18425": 966206848.0, "18430": 997129536.0, "18435": 974982208.0, "18440": 957974848.0, "18445": 978282752.0, "18450": 965728960.0, "18455": 987034368.0, "18460": 982312192.0, "18465": 951759232.0, "18470": 957306752.0, "18475": 973122688.0, "18480": 994848704.0, "18485": 974003264.0, "18490": 955702656.0, "18495": 935201920.0, "18500": 979210048.0, "18505": 972012608.0, "18510": 967557760.0, "18515": 976764736.0, "18520": 933893312.0, "18525": 991765824.0, "18530": 979184768.0, "18535": 985799104.0, "18540": 969439040.0, "18545": 964728128.0, "18550": 971350272.0, "18555": 962730560.0, "18560": 967630464.0, "18565": 975630592.0, "18570": 965905216.0, "18575": 976244800.0, "18580": 974430912.0, "18585": 968928704.0, "18590": 964330944.0, "18595": 967472000.0, "18600": 953964928.0, "18605": 978473792.0, "18610": 976931776.0, "18615": 966907648.0, "18620": 963290496.0, "18625": 951203840.0, "18630": 974278080.0, "18635": 972108288.0, "18640": 963383296.0, "18645": 967631808.0, "18650": 942282816.0, "18655": 980562112.0, "18660": 972794304.0, "18665": 968938880.0, "18670": 971950080.0, "18675": 950484160.0, "18680": 943860352.0, "18685": 980180608.0, "18690": 962988672.0, "18695": 981461888.0, "18700": 978720320.0, "18705": 950193856.0, "18710": 961138688.0, "18715": 984086592.0, "18720": 971447616.0, "18725": 978534336.0, "18730": 944302208.0, "18735": 967023872.0, "18740": 963266304.0, "18745": 973745408.0, "18750": 969880192.0, "18755": 957229440.0, "18760": 939733184.0, "18765": 977109568.0, "18770": 966797312.0, "18775": 973748352.0, "18780": 978032128.0, "18785": 957718016.0, "18790": 970075968.0, "18795": 982337088.0, "18800": 968007424.0, "18805": 976619136.0, "18810": 947676160.0, "18815": 988317248.0, "18820": 966438080.0, "18825": 968934208.0, "18830": 964558528.0, "18835": 953340032.0, "18840": 962646592.0, "18845": 958597888.0, "18850": 969474432.0, "18855": 980117888.0, "18860": 971372800.0, "18865": 944428992.0, "18870": 966098432.0, "18875": 972048704.0, "18880": 962066560.0, "18885": 959354240.0, "18890": 957684096.0, "18895": 991788864.0, "18900": 975850816.0, "18905": 969905792.0, "18910": 979431104.0, "18915": 954372160.0, "18920": 945889408.0, "18925": 978271616.0, "18930": 973264320.0, "18935": 981850048.0, "18940": 962553344.0, "18945": 933394944.0, "18950": 964518976.0, "18955": 974469184.0, "18960": 986175232.0, "18965": 972774208.0, "18970": 942382528.0, "18975": 960496768.0, "18980": 970811200.0, "18985": 963985152.0, "18990": 962374272.0, "18995": 952057024.0, "19000": 959848960.0, "19005": 975523840.0, "19010": 972252800.0, "19015": 986210624.0, "19020": 954113536.0, "19025": 953226688.0, "19030": 970229120.0, "19035": 970476672.0, "19040": 962183360.0, "19045": 961251456.0, "19050": 946516352.0, "19055": 969883712.0, "19060": 964756800.0, "19065": 980818816.0, "19070": 962719872.0, "19075": 932801856.0, "19080": 965224576.0, "19085": 981544064.0, "19090": 956558464.0, "19095": 974049344.0, "19100": 934047552.0, "19105": 955382464.0, "19110": 968081728.0, "19115": 972657856.0, "19120": 958934144.0, "19125": 953697024.0, "19130": 950070528.0, "19135": 964328960.0, "19140": 955114432.0, "19145": 975630208.0, "19150": 977244480.0, "19155": 933990336.0, "19160": 952743360.0, "19165": 977600704.0, "19170": 963617088.0, "19175": 969470528.0, "19180": 950713920.0, "19185": 964960000.0, "19190": 971772480.0, "19195": 956229120.0, "19200": 969539264.0, "19205": 976958336.0, "19210": 947528320.0, "19215": 972759872.0, "19220": 959425600.0, "19225": 977723840.0, "19230": 976371904.0, "19235": 951793792.0, "19240": 977674944.0, "19245": 980894592.0, "19250": 983951872.0, "19255": 966518400.0, "19260": 935301248.0, "19265": 964296704.0, "19270": 972446208.0, "19275": 966583424.0, "19280": 961575488.0, "19285": 971588608.0, "19290": 946189120.0, "19295": 973240320.0, "19300": 983289472.0, "19305": 957774912.0, "19310": 954738944.0, "19315": 949883136.0, "19320": 979279168.0, "19325": 967983168.0, "19330": 963198272.0, "19335": 973484736.0, "19340": 939495744.0, "19345": 965405568.0, "19350": 972957952.0, "19355": 981065600.0, "19360": 976158144.0, "19365": 956876032.0, "19370": 947638848.0, "19375": 962559168.0, "19380": 981615744.0, "19385": 988406720.0, "19390": 978932288.0, "19395": 927572288.0, "19400": 971243776.0, "19405": 978448000.0, "19410": 968707840.0, "19415": 975574144.0, "19420": 938855744.0, "19425": 961999744.0, "19430": 949310656.0, "19435": 964430848.0, "19440": 983992896.0, "19445": 948653888.0, "19450": 945894592.0, "19455": 965529664.0, "19460": 972579072.0, "19465": 974276928.0, "19470": 965274688.0, "19475": 946678912.0, "19480": 975243456.0, "19485": 965340224.0, "19490": 962946688.0, "19495": 969701376.0, "19500": 947947392.0, "19505": 968371392.0, "19510": 951100096.0, "19515": 961779456.0, "19520": 965704128.0, "19525": 946424384.0, "19530": 977424512.0, "19535": 985763456.0, "19540": 957178496.0, "19545": 964856384.0, "19550": 946883648.0, "19555": 951209536.0, "19560": 989799680.0, "19565": 979063104.0, "19570": 968019520.0, "19575": 959498240.0, "19580": 945222656.0, "19585": 964197504.0, "19590": 967664576.0, "19595": 971832128.0, "19600": 969579520.0, "19605": 946925056.0, "19610": 974044672.0, "19615": 971586176.0, "19620": 952648512.0, "19625": 971864704.0, "19630": 976091072.0, "19635": 940464832.0, "19640": 971515392.0, "19645": 967609664.0, "19650": 971483648.0, "19655": 981242816.0, "19660": 946447488.0, "19665": 957220672.0, "19670": 965444032.0, "19675": 965041152.0, "19680": 971809792.0, "19685": 942154816.0, "19690": 971079680.0, "19695": 965280320.0, "19700": 967836416.0, "19705": 977513216.0, "19710": 946387456.0, "19715": 966993024.0, "19720": 970537408.0, "19725": 981864512.0, "19730": 966182272.0, "19735": 970622144.0, "19740": 940090368.0, "19745": 966128896.0, "19750": 966837312.0, "19755": 978712896.0, "19760": 943186688.0, "19765": 935441344.0, "19770": 968517504.0, "19775": 963553536.0, "19780": 965637632.0, "19785": 964580096.0, "19790": 939925312.0, "19795": 969227456.0, "19800": 981692416.0, "19805": 971568512.0, "19810": 967103104.0, "19815": 946229568.0, "19820": 937838080.0, "19825": 970947840.0, "19830": 974403072.0, "19835": 981673344.0, "19840": 977024768.0, "19845": 938754112.0, "19850": 990878848.0, "19855": 970413504.0, "19860": 955506880.0, "19865": 972763456.0, "19870": 949720640.0, "19875": 979751360.0, "19880": 975629184.0, "19885": 968031488.0, "19890": 976593920.0, "19895": 946644672.0, "19900": 955367872.0, "19905": 975880512.0, "19910": 959815360.0, "19915": 976930432.0, "19920": 984794688.0, "19925": 964760960.0, "19930": 979498048.0, "19935": 978760960.0, "19940": 969829568.0, "19945": 959545472.0, "19950": 956301312.0, "19955": 975771328.0, "19960": 960355200.0, "19965": 973424576.0, "19970": 978149120.0, "19975": 944537088.0, "19980": 959710336.0, "19985": 964174784.0, "19990": 968784000.0, "19995": 957529344.0, "20000": 960761408.0, "20005": 939244224.0, "20010": 974838912.0, "20015": 967254272.0, "20020": 994634752.0, "20025": 961110336.0, "20030": 942239488.0, "20035": 966834304.0, "20040": 972767360.0, "20045": 979648128.0, "20050": 968105984.0, "20055": 946350144.0, "20060": 958204288.0, "20065": 984596032.0, "20070": 960187904.0, "20075": 972546944.0, "20080": 969739456.0, "20085": 948259136.0, "20090": 961073600.0, "20095": 962898048.0, "20100": 959045120.0, "20105": 971680192.0, "20110": 956960000.0, "20115": 967691520.0, "20120": 961256064.0, "20125": 969261184.0, "20130": 966059136.0, "20135": 946769728.0, "20140": 959299072.0, "20145": 969127040.0, "20150": 965531520.0, "20155": 977348928.0, "20160": 962884672.0, "20165": 965845120.0, "20170": 971230912.0, "20175": 974493696.0, "20180": 965194368.0, "20185": 957202624.0, "20190": 941938368.0, "20195": 961567360.0, "20200": 969077888.0, "20205": 976858816.0, "20210": 958405952.0, "20215": 937984512.0, "20220": 958004608.0, "20225": 971231360.0, "20230": 967417536.0, "20235": 970555264.0, "20240": 943857856.0, "20245": 943495168.0, "20250": 979624576.0, "20255": 976423488.0, "20260": 971165440.0, "20265": 953014912.0, "20270": 943021440.0, "20275": 971414336.0, "20280": 962776064.0, "20285": 970822912.0, "20290": 969130304.0, "20295": 939675008.0, "20300": 968769152.0, "20305": 961882560.0, "20310": 973014912.0, "20315": 991881344.0, "20320": 953160256.0, "20325": 948643840.0, "20330": 970103424.0, "20335": 962220928.0, "20340": 959731584.0, "20345": 961517696.0, "20350": 952713408.0, "20355": 974976704.0, "20360": 963960512.0, "20365": 971781248.0, "20370": 975464512.0, "20375": 942936576.0, "20380": 968961344.0, "20385": 965324992.0, "20390": 969346688.0, "20395": 985700928.0, "20400": 957050880.0, "20405": 964874752.0, "20410": 980708416.0, "20415": 966440448.0, "20420": 964874048.0, "20425": 947849984.0, "20430": 932618112.0, "20435": 981170752.0, "20440": 968744640.0, "20445": 980331264.0, "20450": 959409984.0, "20455": 939248896.0, "20460": 955286208.0, "20465": 980238592.0, "20470": 977879040.0, "20475": 968829376.0, "20480": 946081152.0, "20485": 957541184.0, "20490": 979334784.0, "20495": 963324352.0, "20500": 974246400.0, "20505": 961355392.0, "20510": 944377088.0, "20515": 981495040.0, "20520": 989706816.0, "20525": 979368320.0, "20530": 962051072.0, "20535": 945632320.0, "20540": 970201792.0, "20545": 957419712.0, "20550": 965992512.0, "20555": 985346048.0, "20560": 943875200.0, "20565": 965090496.0, "20570": 968714560.0, "20575": 961816448.0, "20580": 953427584.0, "20585": 954095872.0, "20590": 986120000.0, "20595": 964450176.0, "20600": 960848192.0, "20605": 964912064.0, "20610": 944649472.0, "20615": 935769408.0, "20620": 975118080.0, "20625": 974461760.0, "20630": 970237760.0, "20635": 984296320.0, "20640": 953862912.0, "20645": 970233088.0, "20650": 972646528.0, "20655": 956632320.0, "20660": 965923776.0, "20665": 943075712.0, "20670": 958602880.0, "20675": 981725632.0, "20680": 970790528.0, "20685": 978098048.0, "20690": 964900160.0, "20695": 935266304.0, "20700": 971447744.0, "20705": 964322816.0, "20710": 976121472.0, "20715": 975893824.0, "20720": 954328000.0, "20725": 975808704.0, "20730": 977074368.0, "20735": 961400960.0, "20740": 991488384.0, "20745": 956240832.0, "20750": 956607168.0, "20755": 975490624.0, "20760": 985334336.0, "20765": 984598016.0, "20770": 950314240.0, "20775": 928722624.0, "20780": 967550656.0, "20785": 976827136.0, "20790": 961184384.0, "20795": 974119872.0, "20800": 949786752.0, "20805": 987332480.0, "20810": 979759424.0, "20815": 969948032.0, "20820": 965752576.0, "20825": 948618560.0, "20830": 976403904.0, "20835": 962830336.0, "20840": 975146560.0, "20845": 971533952.0, "20850": 939545984.0, "20855": 964169344.0, "20860": 974715776.0, "20865": 967983296.0, "20870": 963338752.0, "20875": 963691200.0, "20880": 954262784.0, "20885": 972021760.0, "20890": 976926848.0, "20895": 964562624.0, "20900": 982916992.0, "20905": 935542848.0, "20910": 963708224.0, "20915": 966288704.0, "20920": 983461056.0, "20925": 970134016.0, "20930": 951253504.0, "20935": 943859328.0, "20940": 988038272.0, "20945": 965723392.0, "20950": 951953024.0, "20955": 957382080.0, "20960": 938623744.0, "20965": 966836352.0, "20970": 969207232.0, "20975": 965452864.0, "20980": 973530432.0, "20985": 940450944.0, "20990": 969414016.0, "20995": 986899648.0, "21000": 979485632.0, "21005": 957336192.0, "21010": 951294272.0, "21015": 978758528.0, "21020": 975289472.0, "21025": 974876224.0, "21030": 968144256.0, "21035": 968560576.0, "21040": 978119040.0, "21045": 980267776.0, "21050": 963655808.0, "21055": 972170496.0, "21060": 970525632.0, "21065": 944772864.0, "21070": 985078464.0, "21075": 977626496.0, "21080": 968275712.0, "21085": 966900224.0, "21090": 948778496.0, "21095": 987387328.0, "21100": 981072832.0, "21105": 960907072.0, "21110": 963507200.0, "21115": 963543296.0, "21120": 964538752.0, "21125": 976587456.0, "21130": 972743936.0, "21135": 971582336.0, "21140": 960919040.0, "21145": 935888960.0, "21150": 965811008.0, "21155": 1000256576.0, "21160": 973190912.0, "21165": 988616384.0, "21170": 955780672.0, "21175": 970195264.0, "21180": 967796160.0, "21185": 968107328.0, "21190": 978080448.0, "21195": 966444096.0, "21200": 954938560.0, "21205": 980815296.0, "21210": 964136768.0, "21215": 985496000.0, "21220": 967936704.0, "21225": 959119936.0, "21230": 961559040.0, "21235": 971301824.0, "21240": 981086208.0, "21245": 974429248.0, "21250": 946222720.0, "21255": 982764416.0, "21260": 972031488.0, "21265": 968420928.0, "21270": 953001920.0, "21275": 945718272.0, "21280": 965411712.0, "21285": 981263808.0, "21290": 974526528.0, "21295": 970316928.0, "21300": 925831296.0, "21305": 955742848.0, "21310": 987614848.0, "21315": 988178752.0, "21320": 966303744.0, "21325": 970501888.0, "21330": 957262016.0, "21335": 978953792.0, "21340": 976430080.0, "21345": 978825600.0, "21350": 979609792.0, "21355": 946512128.0, "21360": 964459328.0, "21365": 967373824.0, "21370": 992231872.0, "21375": 972042880.0, "21380": 957556736.0, "21385": 942617536.0, "21390": 958471424.0, "21395": 983829696.0, "21400": 970498880.0, "21405": 961412032.0, "21410": 963156416.0, "21415": 964032960.0, "21420": 963420160.0, "21425": 980884160.0, "21430": 962523584.0, "21435": 949637184.0, "21440": 963881792.0, "21445": 969464512.0, "21450": 977327680.0, "21455": 972700928.0, "21460": 961743168.0, "21465": 968881024.0, "21470": 966865728.0, "21475": 964365440.0, "21480": 964850752.0, "21485": 938202560.0, "21490": 942518272.0, "21495": 968968384.0, "21500": 968952384.0, "21505": 976994304.0, "21510": 985397632.0, "21515": 943083840.0, "21520": 976459456.0, "21525": 981682112.0, "21530": 978045632.0, "21535": 971054912.0, "21540": 939924800.0, "21545": 958452480.0, "21550": 975147200.0, "21555": 977952896.0, "21560": 955717056.0, "21565": 971059648.0, "21570": 934507584.0, "21575": 960792192.0, "21580": 972401216.0, "21585": 963891200.0, "21590": 966473152.0, "21595": 949675264.0, "21600": 974213504.0, "21605": 966725056.0, "21610": 952953600.0, "21615": 969611840.0, "21620": 944600128.0, "21625": 956792192.0, "21630": 966043904.0, "21635": 984543168.0, "21640": 964164096.0, "21645": 944534592.0, "21650": 948566528.0, "21655": 963131520.0, "21660": 965785536.0, "21665": 992661952.0, "21670": 955712832.0, "21675": 950465600.0, "21680": 975305472.0, "21685": 978904896.0, "21690": 974981952.0, "21695": 952559488.0, "21700": 953259520.0, "21705": 967308800.0, "21710": 977037824.0, "21715": 973219968.0, "21720": 954674944.0, "21725": 937009600.0, "21730": 960724736.0, "21735": 978923328.0, "21740": 970288448.0, "21745": 956105280.0, "21750": 944336960.0, "21755": 961252928.0, "21760": 973740864.0, "21765": 967162112.0, "21770": 978849152.0, "21775": 981091200.0, "21780": 946536320.0, "21785": 979691456.0, "21790": 976748480.0, "21795": 974978112.0, "21800": 971159104.0, "21805": 949293312.0, "21810": 969495552.0, "21815": 953435200.0, "21820": 977143360.0, "21825": 963979456.0, "21830": 963308224.0, "21835": 958629696.0, "21840": 969249152.0, "21845": 964772736.0, "21850": 965798464.0, "21855": 981981952.0, "21860": 951007872.0, "21865": 955177984.0, "21870": 968929408.0, "21875": 971737088.0, "21880": 964545920.0, "21885": 960921152.0, "21890": 968610048.0, "21895": 971102336.0, "21900": 971683136.0, "21905": 959478400.0, "21910": 982485888.0, "21915": 950215936.0, "21920": 970190592.0, "21925": 970127680.0, "21930": 962997312.0, "21935": 943957312.0, "21940": 940449280.0, "21945": 976639040.0, "21950": 968153856.0, "21955": 967608832.0, "21960": 978430464.0, "21965": 938091008.0, "21970": 975586816.0, "21975": 964973760.0, "21980": 958298944.0, "21985": 974434368.0, "21990": 953101504.0, "21995": 939761024.0, "22000": 965846272.0, "22005": 966921216.0, "22010": 969600768.0, "22015": 966416896.0, "22020": 951109312.0, "22025": 975508608.0, "22030": 978812992.0, "22035": 986018368.0, "22040": 964274304.0, "22045": 939001344.0, "22050": 976533824.0, "22055": 972068032.0, "22060": 970060352.0, "22065": 970064064.0, "22070": 955253056.0, "22075": 944981888.0, "22080": 966885632.0, "22085": 966589824.0, "22090": 979330112.0, "22095": 971768192.0, "22100": 949232256.0, "22105": 963597120.0, "22110": 975789888.0, "22115": 973726656.0, "22120": 984455424.0, "22125": 950756032.0, "22130": 969722688.0, "22135": 955741888.0, "22140": 974740608.0, "22145": 968833216.0, "22150": 944209088.0, "22155": 960574336.0, "22160": 981877184.0, "22165": 968199936.0, "22170": 956448256.0, "22175": 928403648.0, "22180": 966208064.0, "22185": 972284864.0, "22190": 957367808.0, "22195": 956481920.0, "22200": 949598080.0, "22205": 941651776.0, "22210": 985857216.0, "22215": 978801792.0, "22220": 964260672.0, "22225": 975681536.0, "22230": 937044736.0, "22235": 957218880.0, "22240": 974225536.0, "22245": 974113856.0, "22250": 961956736.0, "22255": 965569792.0, "22260": 942967872.0, "22265": 974262656.0, "22270": 983190208.0, "22275": 964491776.0, "22280": 958981952.0, "22285": 952586176.0, "22290": 953674432.0, "22295": 959809920.0, "22300": 982574208.0, "22305": 971908864.0, "22310": 936118656.0, "22315": 967973568.0, "22320": 955872832.0, "22325": 957224512.0, "22330": 973052032.0, "22335": 946503616.0, "22340": 951562560.0, "22345": 969594432.0, "22350": 969917696.0, "22355": 966408448.0, "22360": 954688256.0, "22365": 968585856.0, "22370": 964813632.0, "22375": 975151552.0, "22380": 975655104.0, "22385": 963002496.0, "22390": 951532224.0, "22395": 971074368.0, "22400": 966848768.0, "22405": 961611008.0, "22410": 969632064.0, "22415": 936812096.0, "22420": 972297728.0, "22425": 964817152.0, "22430": 978191424.0, "22435": 973575040.0, "22440": 931710592.0, "22445": 951393600.0, "22450": 975194944.0, "22455": 960062080.0, "22460": 967402624.0, "22465": 987713024.0, "22470": 943367808.0, "22475": 975359680.0, "22480": 978295808.0, "22485": 976927424.0, "22490": 966396608.0, "22495": 941990912.0, "22500": 980697536.0, "22505": 957299392.0, "22510": 984856960.0, "22515": 966424128.0, "22520": 951757632.0, "22525": 965552448.0, "22530": 985626688.0, "22535": 973979776.0, "22540": 983100608.0, "22545": 952422720.0, "22550": 946776832.0, "22555": 972026432.0, "22560": 972611968.0, "22565": 966589056.0, "22570": 980084544.0, "22575": 947745600.0, "22580": 964083712.0, "22585": 947500032.0, "22590": 976038016.0, "22595": 982878720.0, "22600": 950387328.0, "22605": 978155712.0, "22610": 969017344.0, "22615": 950922304.0, "22620": 984984640.0, "22625": 959035136.0, "22630": 943656192.0, "22635": 954767808.0, "22640": 970922048.0, "22645": 988666752.0, "22650": 956776192.0, "22655": 945408000.0, "22660": 962721216.0, "22665": 971511488.0, "22670": 969385344.0, "22675": 978913728.0, "22680": 940139648.0, "22685": 958856896.0, "22690": 978097024.0, "22695": 959047296.0, "22700": 978527488.0, "22705": 954644992.0, "22710": 958663552.0, "22715": 972739520.0, "22720": 983277376.0, "22725": 961947392.0, "22730": 959393792.0, "22735": 949098752.0, "22740": 968652672.0, "22745": 965688704.0, "22750": 998296256.0, "22755": 974237952.0, "22760": 940260096.0, "22765": 967218176.0, "22770": 969215232.0, "22775": 949946624.0, "22780": 978007936.0, "22785": 955872640.0, "22790": 955338624.0, "22795": 976459264.0, "22800": 970471168.0, "22805": 962837696.0, "22810": 958902912.0, "22815": 945604032.0, "22820": 978751744.0, "22825": 966287424.0, "22830": 966616128.0, "22835": 969619072.0, "22840": 940448640.0, "22845": 973350656.0, "22850": 975214336.0, "22855": 952181312.0, "22860": 964127424.0, "22865": 949198208.0, "22870": 977715456.0, "22875": 974761728.0, "22880": 969840832.0, "22885": 977408320.0, "22890": 947041152.0, "22895": 944157312.0, "22900": 962028544.0, "22905": 971878912.0, "22910": 979983424.0, "22915": 971374528.0, "22920": 946427968.0, "22925": 970894912.0, "22930": 975907328.0, "22935": 969322560.0, "22940": 972992448.0, "22945": 940929984.0, "22950": 975748800.0, "22955": 972562304.0, "22960": 975990208.0, "22965": 965069440.0, "22970": 957110528.0, "22975": 931330752.0, "22980": 979422976.0, "22985": 960012672.0, "22990": 965259328.0, "22995": 958502208.0, "23000": 966136960.0, "23005": 973192512.0, "23010": 964865152.0, "23015": 968764544.0, "23020": 981834880.0, "23025": 955886336.0, "23030": 990913920.0, "23035": 968665408.0, "23040": 973929664.0, "23045": 959424192.0, "23050": 947837376.0, "23055": 949624576.0, "23060": 966793472.0, "23065": 981120512.0, "23070": 960705920.0, "23075": 958170368.0, "23080": 941548416.0, "23085": 975656192.0, "23090": 979977216.0, "23095": 966086528.0, "23100": 986737600.0, "23105": 952649216.0, "23110": 970954880.0, "23115": 970356416.0, "23120": 974926336.0, "23125": 971880000.0, "23130": 941432320.0, "23135": 948436928.0, "23140": 980643392.0, "23145": 980115584.0, "23150": 982769664.0, "23155": 969937472.0, "23160": 936917824.0, "23165": 969949568.0, "23170": 981423616.0, "23175": 992251968.0, "23180": 958721920.0, "23185": 950713600.0, "23190": 956887808.0, "23195": 982302464.0, "23200": 977745152.0, "23205": 968279552.0, "23210": 952233792.0, "23215": 931486208.0, "23220": 966737792.0, "23225": 959095616.0, "23230": 977745088.0, "23235": 964849344.0, "23240": 958583232.0, "23245": 974103040.0, "23250": 950921472.0, "23255": 974276416.0, "23260": 963673024.0, "23265": 954550912.0, "23270": 981664448.0, "23275": 974778112.0, "23280": 967142720.0, "23285": 971889344.0, "23290": 940593408.0, "23295": 965364032.0, "23300": 960437312.0, "23305": 952778752.0, "23310": 966749504.0, "23315": 958649984.0, "23320": 960110656.0, "23325": 972452352.0, "23330": 986234432.0, "23335": 959038912.0, "23340": 936368000.0, "23345": 945720960.0, "23350": 977173440.0, "23355": 988962048.0, "23360": 966917056.0, "23365": 985724736.0, "23370": 938989760.0, "23375": 963140864.0, "23380": 961967616.0, "23385": 989721664.0, "23390": 987037760.0, "23395": 959711872.0, "23400": 962218880.0, "23405": 959323648.0, "23410": 966197888.0, "23415": 981977152.0, "23420": 952372544.0, "23425": 938410496.0, "23430": 971425408.0, "23435": 969885696.0, "23440": 976579008.0, "23445": 972014720.0, "23450": 955750784.0, "23455": 972671552.0, "23460": 965222848.0, "23465": 950886976.0, "23470": 978739392.0, "23475": 958084224.0, "23480": 961409344.0, "23485": 985705280.0, "23490": 962039104.0, "23495": 974670528.0, "23500": 956849088.0, "23505": 948964096.0, "23510": 975866176.0, "23515": 968232704.0, "23520": 961986432.0, "23525": 977855040.0, "23530": 946207104.0, "23535": 976075008.0, "23540": 979201792.0, "23545": 979527552.0, "23550": 975466688.0, "23555": 940413952.0, "23560": 963716928.0, "23565": 965194304.0, "23570": 978661184.0, "23575": 945751744.0, "23580": 941686208.0, "23585": 959056896.0, "23590": 972534912.0, "23595": 970444544.0, "23600": 966397952.0, "23605": 962039232.0, "23610": 937852992.0, "23615": 973515200.0, "23620": 973340544.0, "23625": 981658880.0, "23630": 970001280.0, "23635": 949835712.0, "23640": 961892160.0, "23645": 982591936.0, "23650": 968007040.0, "23655": 969423808.0, "23660": 953913856.0, "23665": 951953472.0, "23670": 985103872.0, "23675": 972913344.0, "23680": 976444864.0, "23685": 959626240.0, "23690": 932552576.0, "23695": 963952192.0, "23700": 966831744.0, "23705": 977537536.0, "23710": 961806528.0, "23715": 954586432.0, "23720": 988443456.0, "23725": 968506624.0, "23730": 970222464.0, "23735": 959190144.0, "23740": 952059328.0, "23745": 948516224.0, "23750": 979507776.0, "23755": 967581120.0, "23760": 972265280.0, "23765": 947925824.0, "23770": 951445696.0, "23775": 968458880.0, "23780": 967304832.0, "23785": 962868672.0, "23790": 980206784.0, "23795": 944297024.0, "23800": 975031680.0, "23805": 974348928.0, "23810": 973229184.0, "23815": 971254528.0, "23820": 958557184.0, "23825": 959914496.0, "23830": 965642752.0, "23835": 970204480.0, "23840": 980453120.0, "23845": 943927360.0, "23850": 945112192.0, "23855": 970137728.0, "23860": 970135424.0, "23865": 970578176.0, "23870": 986475840.0, "23875": 947733888.0, "23880": 955271808.0, "23885": 972295744.0, "23890": 972217216.0, "23895": 984951744.0, "23900": 951531968.0, "23905": 954644416.0, "23910": 971764480.0, "23915": 962249152.0, "23920": 960775744.0, "23925": 943926016.0, "23930": 948829312.0, "23935": 981720384.0, "23940": 971320448.0, "23945": 970201920.0, "23950": 972326272.0, "23955": 944141376.0, "23960": 974059776.0, "23965": 976956288.0, "23970": 963673088.0, "23975": 960767872.0, "23980": 945296960.0, "23985": 969700096.0, "23990": 997904256.0, "23995": 976027584.0, "24000": 963129792.0, "24005": 937127296.0, "24010": 949867200.0, "24015": 967765760.0, "24020": 986123456.0, "24025": 967380096.0, "24030": 966793472.0, "24035": 937300736.0, "24040": 963280832.0, "24045": 984921408.0, "24050": 962612160.0, "24055": 968337472.0, "24060": 947137728.0, "24065": 975369472.0, "24070": 975280128.0, "24075": 975594816.0, "24080": 980072384.0, "24085": 956924544.0, "24090": 970522944.0, "24095": 963728960.0, "24100": 977619904.0, "24105": 981344896.0, "24110": 968414400.0, "24115": 953392064.0, "24120": 963195200.0, "24125": 964263360.0, "24130": 965376512.0, "24135": 959652864.0, "24140": 965749248.0, "24145": 966550464.0, "24150": 968783616.0, "24155": 955380224.0, "24160": 972001664.0, "24165": 939535360.0, "24170": 960191360.0, "24175": 981305920.0, "24180": 968567808.0, "24185": 957438528.0, "24190": 951329600.0, "24195": 952223040.0, "24200": 969579520.0, "24205": 972241088.0, "24210": 962632896.0, "24215": 932104768.0, "24220": 957047168.0, "24225": 981407488.0, "24230": 959229440.0, "24235": 948283456.0, "24240": 969343296.0, "24245": 949779776.0, "24250": 956169984.0, "24255": 973511104.0, "24260": 968968960.0, "24265": 968514688.0, "24270": 953650304.0, "24275": 961683648.0, "24280": 958540608.0, "24285": 987351360.0, "24290": 970030144.0, "24295": 964755776.0, "24300": 946211392.0, "24305": 980639424.0, "24310": 971898048.0, "24315": 974627456.0, "24320": 964971584.0, "24325": 939296704.0, "24330": 955664192.0, "24335": 967122240.0, "24340": 991718976.0, "24345": 971187200.0, "24350": 924541952.0, "24355": 969373632.0, "24360": 967391680.0, "24365": 964215616.0, "24370": 968605888.0, "24375": 974406144.0, "24380": 953736960.0, "24385": 978055296.0, "24390": 974840576.0, "24395": 956502144.0, "24400": 968795200.0, "24405": 943252224.0, "24410": 979839680.0, "24415": 982185536.0, "24420": 958872640.0, "24425": 973169664.0, "24430": 944909184.0, "24435": 958834752.0, "24440": 979005952.0, "24445": 972923840.0, "24450": 951257088.0, "24455": 941631744.0, "24460": 947343616.0, "24465": 974988288.0, "24470": 988081664.0, "24475": 973988480.0, "24480": 962481792.0, "24485": 920969280.0, "24490": 970055360.0, "24495": 982501376.0, "24500": 976415424.0, "24505": 967111488.0, "24510": 958430208.0, "24515": 1000126208.0, "24520": 954757184.0, "24525": 965977472.0, "24530": 958706944.0, "24535": 958009728.0, "24540": 956723456.0, "24545": 968803776.0, "24550": 962772736.0, "24555": 958907968.0, "24560": 954454336.0, "24565": 947626624.0, "24570": 966973440.0, "24575": 973644544.0, "24580": 965629888.0, "24585": 958205760.0, "24590": 942096384.0, "24595": 973766336.0, "24600": 971111744.0, "24605": 971544832.0, "24610": 953664384.0, "24615": 943245952.0, "24620": 958989504.0, "24625": 979977088.0, "24630": 979630592.0, "24635": 976545088.0, "24640": 941985408.0, "24645": 956641728.0, "24650": 954522112.0, "24655": 981426752.0, "24660": 956426176.0, "24665": 957927104.0, "24670": 952796288.0, "24675": 961040064.0, "24680": 970628544.0, "24685": 964702016.0, "24690": 959229184.0, "24695": 938330432.0, "24700": 980620160.0, "24705": 970654144.0, "24710": 959499712.0, "24715": 972506944.0, "24720": 947637888.0, "24725": 962655488.0, "24730": 961556096.0, "24735": 951662400.0, "24740": 981947520.0, "24745": 964380672.0, "24750": 949461248.0, "24755": 971311040.0, "24760": 963499904.0, "24765": 987773952.0, "24770": 969337856.0, "24775": 942176320.0, "24780": 968268224.0, "24785": 970737984.0, "24790": 960814912.0, "24795": 961861760.0, "24800": 945817856.0, "24805": 933880128.0, "24810": 965334400.0, "24815": 963502400.0, "24820": 972379200.0, "24825": 960669568.0, "24830": 963203392.0, "24835": 979161088.0, "24840": 956758976.0, "24845": 964164480.0, "24850": 973816512.0, "24855": 949113344.0, "24860": 989489024.0, "24865": 994513984.0, "24870": 971144448.0, "24875": 964130752.0, "24880": 948164864.0, "24885": 964773824.0, "24890": 970393664.0, "24895": 973909824.0, "24900": 966495360.0, "24905": 950494016.0, "24910": 966102592.0, "24915": 957724480.0, "24920": 985822400.0, "24925": 976332288.0, "24930": 965216704.0, "24935": 914931776.0, "24940": 963256320.0, "24945": 982869248.0, "24950": 982862592.0, "24955": 962029568.0, "24960": 948749696.0, "24965": 972979264.0, "24970": 964969344.0, "24975": 968066368.0, "24980": 965185920.0, "24985": 956709568.0, "24990": 971469824.0, "24995": 967378560.0, "25000": 954620992.0, "25005": 969768768.0, "25010": 973415232.0, "25015": 956665920.0, "25020": 981167424.0, "25025": 959812992.0, "25030": 957839360.0, "25035": 965695808.0, "25040": 949924416.0, "25045": 977898432.0, "25050": 968041792.0, "25055": 962643968.0, "25060": 969694528.0, "25065": 950598976.0, "25070": 952183808.0, "25075": 970229568.0, "25080": 963203200.0, "25085": 966930560.0, "25090": 959601536.0, "25095": 935832192.0, "25100": 990324672.0, "25105": 977600320.0, "25110": 977737984.0, "25115": 963438400.0, "25120": 950868224.0, "25125": 960929536.0, "25130": 973214848.0, "25135": 976661120.0, "25140": 972398272.0, "25145": 938216512.0, "25150": 961800896.0, "25155": 965119104.0, "25160": 964592448.0, "25165": 978141120.0, "25170": 938204160.0, "25175": 971962240.0, "25180": 976535936.0, "25185": 984380672.0, "25190": 975301440.0, "25195": 958920384.0, "25200": 951886528.0, "25205": 951423424.0, "25210": 963822912.0, "25215": 975726272.0, "25220": 973205504.0, "25225": 944013504.0, "25230": 967061376.0, "25235": 968975296.0, "25240": 964072896.0, "25245": 964059200.0, "25250": 955627264.0, "25255": 962445568.0, "25260": 973101696.0, "25265": 965398784.0, "25270": 976106496.0, "25275": 965719360.0, "25280": 941298944.0, "25285": 970995776.0, "25290": 957888960.0, "25295": 964707136.0, "25300": 970333632.0, "25305": 944138112.0, "25310": 964677248.0, "25315": 975265984.0, "25320": 963300544.0, "25325": 956708288.0, "25330": 950122240.0, "25335": 978011968.0, "25340": 966113920.0, "25345": 975745792.0, "25350": 974588096.0, "25355": 955639872.0, "25360": 954793408.0, "25365": 975835968.0, "25370": 987627968.0, "25375": 968191232.0, "25380": 980848384.0, "25385": 935802112.0, "25390": 968134848.0, "25395": 969167488.0, "25400": 975295936.0, "25405": 984828992.0, "25410": 940846464.0, "25415": 974245120.0, "25420": 964165824.0, "25425": 981242688.0, "25430": 971224832.0, "25435": 943513152.0, "25440": 946748672.0, "25445": 972411456.0, "25450": 980024448.0, "25455": 977147200.0, "25460": 951649856.0, "25465": 955127104.0, "25470": 980932480.0, "25475": 961996928.0, "25480": 957836480.0, "25485": 968428096.0, "25490": 956882304.0, "25495": 986193536.0, "25500": 974216960.0, "25505": 993371520.0, "25510": 963651008.0, "25515": 961777152.0, "25520": 960992000.0, "25525": 984779584.0, "25530": 970984000.0, "25535": 971156928.0, "25540": 955801408.0, "25545": 951333376.0, "25550": 972887424.0, "25555": 973439360.0, "25560": 977566912.0, "25565": 964369472.0, "25570": 930912320.0, "25575": 974102592.0, "25580": 977024128.0, "25585": 978122944.0, "25590": 981604288.0, "25595": 940716928.0, "25600": 948525504.0, "25605": 969672128.0, "25610": 974861568.0, "25615": 967827520.0, "25620": 970883904.0, "25625": 948431168.0, "25630": 980546752.0, "25635": 966213760.0, "25640": 976623808.0, "25645": 975358464.0, "25650": 954228672.0, "25655": 975698880.0, "25660": 968876608.0, "25665": 964117568.0, "25670": 976450816.0, "25675": 957820928.0, "25680": 943479168.0, "25685": 973748480.0, "25690": 970553536.0, "25695": 981222464.0, "25700": 958036608.0, "25705": 949246848.0, "25710": 978889408.0, "25715": 978803712.0, "25720": 967164544.0, "25725": 952375744.0, "25730": 957879104.0, "25735": 977016832.0, "25740": 977225536.0, "25745": 972920320.0, "25750": 958548224.0, "25755": 958449920.0, "25760": 960926144.0, "25765": 988248320.0, "25770": 974897664.0, "25775": 969724992.0, "25780": 967453248.0, "25785": 954290432.0, "25790": 965199872.0, "25795": 970589120.0, "25800": 971697408.0, "25805": 980970624.0, "25810": 938727488.0, "25815": 973489280.0, "25820": 964748864.0, "25825": 973356288.0, "25830": 986358080.0, "25835": 939945920.0, "25840": 974836480.0, "25845": 967934144.0, "25850": 967590464.0, "25855": 988152832.0, "25860": 941201600.0, "25865": 943251648.0, "25870": 969923520.0, "25875": 968368384.0, "25880": 965407552.0, "25885": 946858752.0, "25890": 952042816.0, "25895": 976081984.0, "25900": 968390208.0, "25905": 975177472.0, "25910": 963600896.0, "25915": 944636864.0, "25920": 964289408.0, "25925": 971041408.0, "25930": 962803520.0, "25935": 974025984.0, "25940": 953036608.0, "25945": 982053440.0, "25950": 982199872.0, "25955": 965727168.0, "25960": 961104256.0, "25965": 944492928.0, "25970": 974922624.0, "25975": 991995904.0, "25980": 957166016.0, "25985": 981390720.0, "25990": 967531904.0, "25995": 930821824.0, "26000": 972790720.0, "26005": 968520768.0, "26010": 979117312.0, "26015": 973347072.0, "26020": 953670336.0, "26025": 969588160.0, "26030": 968102656.0, "26035": 984668864.0, "26040": 967022912.0, "26045": 957734016.0, "26050": 961212608.0, "26055": 959646784.0, "26060": 967102528.0, "26065": 981118208.0, "26070": 976360704.0, "26075": 937159872.0, "26080": 966404736.0, "26085": 964609472.0, "26090": 979183040.0, "26095": 980848896.0, "26100": 958870016.0, "26105": 972822080.0, "26110": 967459072.0, "26115": 970163072.0, "26120": 968888128.0, "26125": 949758080.0, "26130": 967150272.0, "26135": 978919424.0, "26140": 971888576.0, "26145": 964704640.0, "26150": 950986560.0, "26155": 954418560.0, "26160": 967920192.0, "26165": 977083200.0, "26170": 976191488.0, "26175": 967158016.0, "26180": 949964608.0, "26185": 970144832.0, "26190": 991506176.0, "26195": 963982656.0, "26200": 987143872.0, "26205": 947792640.0, "26210": 971708352.0, "26215": 971396224.0, "26220": 969769280.0, "26225": 973619904.0, "26230": 955833536.0, "26235": 943664640.0, "26240": 974155072.0, "26245": 960858944.0, "26250": 969898048.0, "26255": 968280896.0, "26260": 952610432.0, "26265": 984909312.0, "26270": 964123776.0, "26275": 976918976.0, "26280": 982423296.0, "26285": 959019840.0, "26290": 982931328.0, "26295": 966451072.0, "26300": 958006464.0, "26305": 980921856.0, "26310": 955737216.0, "26315": 941998912.0, "26320": 964313472.0, "26325": 965688576.0, "26330": 961139904.0, "26335": 955292224.0, "26340": 939254080.0, "26345": 961734848.0, "26350": 967275456.0, "26355": 966050624.0, "26360": 972038784.0, "26365": 950298880.0, "26370": 969699648.0, "26375": 974742976.0, "26380": 968056896.0, "26385": 975791168.0, "26390": 951984960.0, "26395": 967041984.0, "26400": 975778304.0, "26405": 964558848.0, "26410": 957657792.0, "26415": 968756032.0, "26420": 947748224.0, "26425": 972889664.0, "26430": 967255232.0, "26435": 963027584.0, "26440": 964490368.0, "26445": 943345856.0, "26450": 966519232.0, "26455": 979332224.0, "26460": 964954240.0, "26465": 968636288.0, "26470": 946095872.0, "26475": 972071296.0, "26480": 979074432.0, "26485": 979743360.0, "26490": 975407104.0, "26495": 965433408.0, "26500": 963835584.0, "26505": 970721216.0, "26510": 959696128.0, "26515": 966255936.0, "26520": 973049856.0, "26525": 935685760.0, "26530": 970900288.0, "26535": 958825664.0, "26540": 962286720.0, "26545": 968300544.0, "26550": 956276416.0, "26555": 966229888.0, "26560": 964641344.0, "26565": 965604224.0, "26570": 962883008.0, "26575": 949260736.0, "26580": 963841152.0, "26585": 963718336.0, "26590": 965209792.0, "26595": 965031232.0, "26600": 970388544.0, "26605": 938057088.0, "26610": 971296896.0, "26615": 969992192.0, "26620": 979374400.0, "26625": 960185472.0, "26630": 944266752.0, "26635": 959636224.0, "26640": 969051136.0, "26645": 965673408.0, "26650": 967305280.0, "26655": 940635648.0, "26660": 946048320.0, "26665": 972742848.0, "26670": 969480896.0, "26675": 967598784.0, "26680": 952116736.0, "26685": 946066816.0, "26690": 967810624.0, "26695": 971458240.0, "26700": 975905280.0, "26705": 983037632.0, "26710": 953129088.0, "26715": 964178496.0, "26720": 986798912.0, "26725": 952109568.0, "26730": 970238656.0, "26735": 960401984.0, "26740": 961242240.0, "26745": 965120448.0, "26750": 967355072.0, "26755": 977576448.0, "26760": 975420032.0, "26765": 943265472.0, "26770": 972693248.0, "26775": 974651776.0, "26780": 962926272.0, "26785": 969939328.0, "26790": 943205440.0, "26795": 948696192.0, "26800": 965922816.0, "26805": 964061376.0, "26810": 967568512.0, "26815": 951974912.0, "26820": 970045120.0, "26825": 963474112.0, "26830": 980168896.0, "26835": 971645568.0, "26840": 958006016.0, "26845": 946494656.0, "26850": 983215488.0, "26855": 970813248.0, "26860": 973466432.0, "26865": 963288512.0, "26870": 954588032.0, "26875": 972877248.0, "26880": 972962816.0, "26885": 968750656.0, "26890": 961495296.0, "26895": 938395840.0, "26900": 967492352.0, "26905": 967300352.0, "26910": 957505472.0, "26915": 974634816.0, "26920": 943708160.0, "26925": 951415872.0, "26930": 961871488.0, "26935": 976340608.0, "26940": 967246016.0, "26945": 973739712.0, "26950": 949813248.0, "26955": 971760192.0, "26960": 972993920.0, "26965": 973152768.0, "26970": 971298944.0, "26975": 949311168.0, "26980": 982131520.0, "26985": 976095040.0, "26990": 975109888.0, "26995": 961802624.0, "27000": 952553728.0, "27005": 956790272.0, "27010": 968829504.0, "27015": 962737536.0, "27020": 956274944.0, "27025": 990773312.0, "27030": 936085952.0, "27035": 954253376.0, "27040": 959524288.0, "27045": 978416448.0, "27050": 951858816.0, "27055": 952062976.0, "27060": 991493824.0, "27065": 964431296.0, "27070": 975405120.0, "27075": 965560448.0, "27080": 943338048.0, "27085": 963109248.0, "27090": 958769792.0, "27095": 980437184.0, "27100": 958534272.0, "27105": 939168000.0, "27110": 948830912.0, "27115": 974184320.0, "27120": 974169024.0, "27125": 971736768.0, "27130": 960254016.0, "27135": 948634496.0, "27140": 968369024.0, "27145": 973706944.0, "27150": 967889600.0, "27155": 975567872.0, "27160": 943859776.0, "27165": 974063168.0, "27170": 961251008.0, "27175": 967948352.0, "27180": 968892416.0, "27185": 975363840.0, "27190": 944818752.0, "27195": 961592000.0, "27200": 969558592.0, "27205": 975450752.0, "27210": 955813312.0, "27215": 950282560.0, "27220": 975002880.0, "27225": 962963712.0, "27230": 962999168.0, "27235": 953087424.0, "27240": 956679488.0, "27245": 963223552.0, "27250": 965899840.0, "27255": 969570944.0, "27260": 952884032.0, "27265": 962239680.0, "27270": 963510208.0, "27275": 992658368.0, "27280": 957700608.0, "27285": 964462912.0, "27290": 969267520.0, "27295": 932652928.0, "27300": 976110400.0, "27305": 962624704.0, "27310": 975144896.0, "27315": 968554816.0, "27320": 952054912.0, "27325": 965538688.0, "27330": 960597824.0, "27335": 984153728.0, "27340": 980229888.0, "27345": 941434688.0, "27350": 958309184.0, "27355": 957701056.0, "27360": 975603456.0, "27365": 968385664.0, "27370": 959255488.0, "27375": 939415168.0, "27380": 966225856.0, "27385": 958921408.0, "27390": 957170048.0, "27395": 985040320.0, "27400": 952779584.0, "27405": 977550272.0, "27410": 983017088.0, "27415": 960618176.0, "27420": 972544832.0, "27425": 938837696.0, "27430": 960489152.0, "27435": 963536768.0, "27440": 963498432.0, "27445": 970826752.0, "27450": 935245504.0, "27455": 976248832.0, "27460": 961399104.0, "27465": 977262912.0, "27470": 971831296.0, "27475": 964954240.0, "27480": 949051776.0, "27485": 964315904.0, "27490": 969038208.0, "27495": 971281088.0, "27500": 967312000.0, "27505": 960714880.0, "27510": 966342976.0, "27515": 968068224.0, "27520": 979971072.0, "27525": 966871488.0, "27530": 952507840.0, "27535": 951718272.0, "27540": 975740736.0, "27545": 970653120.0, "27550": 966036672.0, "27555": 957461056.0, "27560": 941301184.0, "27565": 966876928.0, "27570": 963977280.0, "27575": 979181312.0, "27580": 952480960.0, "27585": 935765312.0, "27590": 963337088.0, "27595": 969387776.0, "27600": 976358336.0, "27605": 963946496.0, "27610": 945569216.0, "27615": 962626176.0, "27620": 972889280.0, "27625": 973516672.0, "27630": 962980608.0, "27635": 957540032.0, "27640": 949573504.0, "27645": 981236480.0, "27650": 972226688.0, "27655": 976768960.0, "27660": 973288640.0, "27665": 953755840.0, "27670": 956092672.0, "27675": 957612736.0, "27680": 958328960.0, "27685": 978032192.0, "27690": 932535296.0, "27695": 986463488.0, "27700": 967374400.0, "27705": 963070848.0, "27710": 953300672.0, "27715": 955822208.0, "27720": 949854336.0, "27725": 987336960.0, "27730": 968890048.0, "27735": 972896000.0, "27740": 962392064.0, "27745": 945583296.0, "27750": 972963264.0, "27755": 974800640.0, "27760": 971739136.0, "27765": 972036928.0, "27770": 946318464.0, "27775": 970433280.0, "27780": 979256960.0, "27785": 968555328.0, "27790": 975433408.0, "27795": 941196160.0, "27800": 959702080.0, "27805": 975790208.0, "27810": 955268288.0, "27815": 973632448.0, "27820": 985076544.0, "27825": 940158784.0, "27830": 968005888.0, "27835": 979552896.0, "27840": 959808576.0, "27845": 974173760.0, "27850": 949401984.0, "27855": 979582784.0, "27860": 985273024.0, "27865": 978729344.0, "27870": 983523136.0, "27875": 939505216.0, "27880": 970759680.0, "27885": 980463616.0, "27890": 970200064.0, "27895": 969971584.0, "27900": 937695104.0, "27905": 952172480.0, "27910": 979617024.0, "27915": 979539392.0, "27920": 953321152.0, "27925": 970590144.0, "27930": 948140992.0, "27935": 978629696.0, "27940": 991255872.0, "27945": 964365760.0, "27950": 969179392.0, "27955": 939526912.0, "27960": 976258432.0, "27965": 969794432.0, "27970": 961103488.0, "27975": 966571136.0, "27980": 942891648.0, "27985": 963270464.0, "27990": 976084224.0, "27995": 975504064.0, "28000": 976900032.0, "28005": 960377536.0, "28010": 940208064.0, "28015": 977547648.0, "28020": 964666688.0, "28025": 973861184.0, "28030": 966770240.0, "28035": 942524352.0, "28040": 960998144.0, "28045": 971329856.0, "28050": 974921536.0, "28055": 970765056.0, "28060": 949808512.0, "28065": 953399488.0, "28070": 972134976.0, "28075": 964633600.0, "28080": 973156480.0, "28085": 987684032.0, "28090": 941601280.0, "28095": 972809792.0, "28100": 971415552.0, "28105": 973197888.0, "28110": 985315520.0, "28115": 949882752.0, "28120": 992550208.0, "28125": 962030720.0, "28130": 968338496.0, "28135": 951681152.0, "28140": 936425088.0, "28145": 951264256.0, "28150": 962682240.0, "28155": 969597376.0, "28160": 965270656.0, "28165": 944830784.0, "28170": 944207424.0, "28175": 976131968.0, "28180": 972638848.0, "28185": 976046336.0, "28190": 952297856.0, "28195": 950229504.0, "28200": 973849024.0, "28205": 975900224.0, "28210": 978563712.0, "28215": 979229184.0, "28220": 936873088.0, "28225": 968703552.0, "28230": 973835776.0, "28235": 959231040.0, "28240": 967787392.0, "28245": 953094272.0, "28250": 959382144.0, "28255": 956181952.0, "28260": 959446656.0, "28265": 973356544.0, "28270": 973786624.0, "28275": 950955200.0, "28280": 964278592.0, "28285": 964992768.0, "28290": 969946624.0, "28295": 965219904.0, "28300": 954309504.0, "28305": 964771520.0, "28310": 963887616.0, "28315": 964802176.0, "28320": 962103552.0, "28325": 948939200.0, "28330": 970306048.0, "28335": 966397376.0, "28340": 977345024.0, "28345": 965077760.0, "28350": 959660096.0, "28355": 951994624.0, "28360": 966228096.0, "28365": 988529152.0, "28370": 972051456.0, "28375": 961802560.0, "28380": 949063936.0, "28385": 965305984.0, "28390": 972282560.0, "28395": 964955200.0, "28400": 983749120.0, "28405": 955788224.0, "28410": 980419648.0, "28415": 976059520.0, "28420": 969936128.0, "28425": 974433408.0, "28430": 940030400.0, "28435": 955913856.0, "28440": 976897536.0, "28445": 974062528.0, "28450": 981546304.0, "28455": 975241856.0, "28460": 955483072.0, "28465": 966656896.0, "28470": 956536320.0, "28475": 970004736.0, "28480": 976851968.0, "28485": 947609216.0, "28490": 949979776.0, "28495": 962978624.0, "28500": 990160448.0, "28505": 968843840.0, "28510": 949041216.0, "28515": 946658560.0, "28520": 970654016.0, "28525": 973479104.0, "28530": 972225600.0, "28535": 966888576.0, "28540": 958420224.0, "28545": 967306880.0, "28550": 966490624.0, "28555": 974076544.0, "28560": 969775616.0, "28565": 956140992.0, "28570": 980353280.0, "28575": 958690240.0, "28580": 972682496.0, "28585": 970726400.0, "28590": 948229056.0, "28595": 950232960.0, "28600": 970673408.0, "28605": 972227584.0, "28610": 977261184.0, "28615": 949407808.0, "28620": 948015552.0, "28625": 967473280.0, "28630": 969941824.0, "28635": 966213632.0, "28640": 968807168.0, "28645": 954301888.0, "28650": 974338432.0, "28655": 982835776.0, "28660": 983483776.0, "28665": 966980736.0, "28670": 944588288.0, "28675": 964470400.0, "28680": 985493248.0, "28685": 969649088.0, "28690": 989136064.0, "28695": 946276288.0, "28700": 930997824.0, "28705": 965714816.0, "28710": 978079168.0, "28715": 978533888.0, "28720": 985151360.0, "28725": 955435008.0, "28730": 966276160.0, "28735": 960673856.0, "28740": 976361600.0, "28745": 965834624.0, "28750": 948143872.0, "28755": 951201664.0, "28760": 968829952.0, "28765": 975484288.0, "28770": 955698752.0, "28775": 971354560.0, "28780": 945876160.0, "28785": 964522880.0, "28790": 970033216.0, "28795": 975417472.0, "28800": 957128192.0, "28805": 953878400.0, "28810": 971293376.0, "28815": 964738880.0, "28820": 973117760.0, "28825": 948329216.0, "28830": 933024320.0, "28835": 966354752.0, "28840": 977993984.0, "28845": 967154560.0, "28850": 975818368.0, "28855": 956711744.0, "28860": 939507264.0, "28865": 973501376.0, "28870": 962978624.0, "28875": 966237504.0, "28880": 962134976.0, "28885": 961389056.0, "28890": 966564800.0, "28895": 965848960.0, "28900": 971169664.0, "28905": 985038656.0, "28910": 930688896.0, "28915": 962933056.0, "28920": 967584704.0, "28925": 970753664.0, "28930": 989818560.0, "28935": 949808064.0, "28940": 951104448.0, "28945": 985423936.0, "28950": 962339648.0, "28955": 959752448.0, "28960": 961559104.0, "28965": 974125312.0, "28970": 958654784.0, "28975": 968743104.0, "28980": 957952640.0, "28985": 966199296.0, "28990": 937215808.0, "28995": 960053376.0, "29000": 980349888.0, "29005": 970958080.0, "29010": 977104384.0, "29015": 946392896.0, "29020": 963841856.0, "29025": 948813632.0, "29030": 976279296.0, "29035": 983331136.0, "29040": 943198016.0, "29045": 964303744.0, "29050": 987035200.0, "29055": 968326080.0, "29060": 952769984.0, "29065": 959813312.0, "29070": 945061888.0, "29075": 976958464.0, "29080": 968620544.0, "29085": 973867200.0, "29090": 973870592.0, "29095": 921556992.0, "29100": 960458560.0, "29105": 984026304.0, "29110": 974572992.0, "29115": 967621632.0, "29120": 946969600.0, "29125": 945140800.0, "29130": 979734336.0, "29135": 968590208.0, "29140": 970207616.0, "29145": 960789888.0, "29150": 947014272.0, "29155": 986858432.0, "29160": 970614016.0, "29165": 983698304.0, "29170": 969968064.0, "29175": 952714944.0, "29180": 969254272.0, "29185": 970691136.0, "29190": 948076288.0, "29195": 978379584.0, "29200": 958775872.0, "29205": 966257600.0, "29210": 974082048.0, "29215": 965930944.0, "29220": 953466368.0, "29225": 962363008.0, "29230": 948215744.0, "29235": 973565376.0, "29240": 975604736.0, "29245": 970397824.0, "29250": 977225472.0, "29255": 956994880.0, "29260": 972916800.0, "29265": 982447552.0, "29270": 966091328.0, "29275": 951878016.0, "29280": 937172032.0, "29285": 975376960.0, "29290": 980284672.0, "29295": 958582976.0, "29300": 965531072.0, "29305": 960986368.0, "29310": 951133440.0, "29315": 982227776.0, "29320": 965461440.0, "29325": 984913024.0, "29330": 961889344.0, "29335": 937503040.0, "29340": 967645120.0, "29345": 963154560.0, "29350": 968977472.0, "29355": 985154880.0, "29360": 941430336.0, "29365": 961257216.0, "29370": 970998912.0, "29375": 970100864.0, "29380": 976643904.0, "29385": 954599616.0, "29390": 954326848.0, "29395": 961477888.0, "29400": 979828928.0, "29405": 963070144.0, "29410": 961233792.0, "29415": 947123392.0, "29420": 983084736.0, "29425": 965636672.0, "29430": 971994432.0, "29435": 969123776.0, "29440": 933727296.0, "29445": 985529600.0, "29450": 961484416.0, "29455": 976493568.0, "29460": 977950016.0, "29465": 941528896.0, "29470": 956155200.0, "29475": 974171392.0, "29480": 957564736.0, "29485": 972140672.0, "29490": 956404352.0, "29495": 951565248.0, "29500": 973711296.0, "29505": 962007872.0, "29510": 969489536.0, "29515": 973917312.0, "29520": 950600256.0, "29525": 961742080.0, "29530": 968960768.0, "29535": 970741760.0, "29540": 964972928.0, "29545": 928176640.0, "29550": 934516032.0, "29555": 977994944.0, "29560": 963492672.0, "29565": 972374400.0, "29570": 962988928.0, "29575": 948137408.0, "29580": 968848832.0, "29585": 974674816.0, "29590": 971636800.0, "29595": 958831488.0, "29600": 949912448.0, "29605": 958817152.0, "29610": 963985216.0, "29615": 960234624.0, "29620": 973012544.0, "29625": 965828672.0, "29630": 964146624.0, "29635": 965821504.0, "29640": 973512128.0, "29645": 970999808.0, "29650": 970954624.0, "29655": 936541376.0, "29660": 957135616.0, "29665": 964562112.0, "29670": 956752128.0, "29675": 972902912.0, "29680": 946143232.0, "29685": 978793472.0, "29690": 963847616.0, "29695": 964739584.0, "29700": 975783168.0, "29705": 960875968.0, "29710": 965520640.0, "29715": 971372608.0, "29720": 970549824.0, "29725": 967946560.0, "29730": 960370496.0, "29735": 954133184.0, "29740": 986827328.0, "29745": 959077312.0, "29750": 972091648.0, "29755": 954010048.0, "29760": 941798720.0, "29765": 964968384.0, "29770": 974297344.0, "29775": 971341824.0, "29780": 966269632.0, "29785": 939513088.0, "29790": 969123392.0, "29795": 974751552.0, "29800": 973805696.0, "29805": 981527552.0, "29810": 935757184.0, "29815": 949973312.0, "29820": 969456256.0, "29825": 969876224.0, "29830": 969591936.0, "29835": 957606528.0, "29840": 935300672.0, "29845": 954066560.0, "29850": 972622336.0, "29855": 961749440.0, "29860": 980741696.0, "29865": 938247872.0, "29870": 958728384.0, "29875": 979143872.0, "29880": 965839808.0, "29885": 962006848.0, "29890": 920021568.0, "29895": 993957632.0, "29900": 955195840.0, "29905": 959545088.0, "29910": 954980288.0, "29915": 965539520.0, "29920": 962130880.0, "29925": 985896384.0, "29930": 957131520.0, "29935": 970384640.0, "29940": 970211712.0, "29945": 969271040.0, "29950": 964760832.0, "29955": 977256256.0, "29960": 975016128.0, "29965": 972753408.0, "29970": 923971712.0, "29975": 959095936.0, "29980": 972596480.0, "29985": 963966720.0, "29990": 967800448.0, "29995": 968013248.0, "30000": 942632704.0, "30005": 967548032.0, "30010": 956620032.0, "30015": 966251648.0, "30020": 957927808.0, "30025": 944790272.0, "30030": 978346496.0, "30035": 978398720.0, "30040": 983451264.0, "30045": 965880896.0, "30050": 937907712.0, "30055": 972907392.0, "30060": 959978432.0, "30065": 976522880.0, "30070": 962675456.0, "30075": 957117568.0, "30080": 954011072.0, "30085": 965520128.0, "30090": 973099712.0, "30095": 950060864.0, "30100": 962601600.0, "30105": 953044160.0, "30110": 963454144.0, "30115": 963175168.0, "30120": 972012544.0, "30125": 965514176.0, "30130": 934074496.0, "30135": 965492096.0, "30140": 953620672.0, "30145": 980868032.0, "30150": 975273536.0, "30155": 931105280.0, "30160": 967387200.0, "30165": 966567040.0, "30170": 970347136.0, "30175": 956073984.0, "30180": 931934784.0, "30185": 944482240.0, "30190": 970316864.0, "30195": 972891328.0, "30200": 958268928.0, "30205": 950737408.0, "30210": 940393920.0, "30215": 969704256.0, "30220": 963202944.0, "30225": 954800768.0, "30230": 965152192.0, "30235": 953250240.0, "30240": 959016192.0, "30245": 974040448.0, "30250": 968812864.0, "30255": 985513088.0, "30260": 978203840.0, "30265": 968960832.0, "30270": 966712128.0, "30275": 974869568.0, "30280": 984136576.0, "30285": 967028032.0, "30290": 959471552.0, "30295": 960387008.0, "30300": 953433536.0, "30305": 980409536.0, "30310": 967081216.0, "30315": 961914816.0, "30320": 966559616.0, "30325": 962218176.0, "30330": 960940800.0, "30335": 960839808.0, "30340": 959685248.0, "30345": 957689728.0, "30350": 972157888.0, "30355": 974347456.0, "30360": 966743872.0, "30365": 968636224.0, "30370": 948445632.0, "30375": 975986560.0, "30380": 963781504.0, "30385": 963169536.0, "30390": 986231104.0, "30395": 930748224.0, "30400": 976112128.0, "30405": 990201024.0, "30410": 977514240.0, "30415": 962572800.0, "30420": 945880192.0, "30425": 969598400.0, "30430": 977215232.0, "30435": 972350592.0, "30440": 986705536.0, "30445": 936757120.0, "30450": 945166848.0, "30455": 955593344.0, "30460": 973462784.0, "30465": 980950336.0, "30470": 981699648.0, "30475": 936972224.0, "30480": 964586240.0, "30485": 959857344.0, "30490": 986701056.0, "30495": 975590208.0, "30500": 958427136.0, "30505": 975343552.0, "30510": 963526656.0, "30515": 970484480.0, "30520": 974776192.0, "30525": 944092672.0, "30530": 965153344.0, "30535": 967253888.0, "30540": 975667072.0, "30545": 967350400.0, "30550": 982194816.0, "30555": 955301248.0, "30560": 974763392.0, "30565": 969953152.0, "30570": 965275264.0, "30575": 966042176.0, "30580": 940400832.0, "30585": 973517440.0, "30590": 960583488.0, "30595": 972412288.0, "30600": 978799360.0, "30605": 939963136.0, "30610": 968603776.0, "30615": 964416768.0, "30620": 968168896.0, "30625": 969488448.0, "30630": 965764544.0, "30635": 946186368.0, "30640": 975668928.0, "30645": 973009856.0, "30650": 967264384.0, "30655": 959612416.0, "30660": 943476480.0, "30665": 983331456.0, "30670": 967757824.0, "30675": 971392896.0, "30680": 975543360.0, "30685": 934257984.0, "30690": 961492032.0, "30695": 967382400.0, "30700": 972609472.0, "30705": 961585408.0, "30710": 964296512.0, "30715": 946727552.0, "30720": 969519296.0, "30725": 973019328.0, "30730": 976766144.0, "30735": 967467008.0, "30740": 947881152.0, "30745": 967030336.0, "30750": 964815680.0, "30755": 994933888.0, "30760": 988424448.0, "30765": 942205312.0, "30770": 961966976.0, "30775": 970970112.0, "30780": 977684032.0, "30785": 970669632.0, "30790": 952634048.0, "30795": 962086784.0, "30800": 961953088.0, "30805": 959268352.0, "30810": 951675520.0, "30815": 956463168.0, "30820": 949201408.0, "30825": 989149568.0, "30830": 974611200.0, "30835": 966672768.0, "30840": 975599680.0, "30845": 937174976.0, "30850": 968566208.0, "30855": 987522624.0, "30860": 970600064.0, "30865": 968469888.0, "30870": 956822976.0, "30875": 948607360.0, "30880": 985960064.0, "30885": 981319040.0, "30890": 974928448.0, "30895": 946584000.0, "30900": 942000896.0, "30905": 952575808.0, "30910": 977554112.0, "30915": 980593088.0, "30920": 969164352.0, "30925": 949610304.0, "30930": 974989888.0, "30935": 967457088.0, "30940": 958929408.0, "30945": 974580928.0, "30950": 950842240.0, "30955": 973378368.0, "30960": 971785728.0, "30965": 965372288.0, "30970": 974031424.0, "30975": 975256768.0, "30980": 947273152.0, "30985": 971732928.0, "30990": 966356480.0, "30995": 967995136.0, "31000": 968488064.0, "31005": 952802560.0, "31010": 983720768.0, "31015": 957435200.0, "31020": 961565440.0, "31025": 964538688.0, "31030": 943789824.0, "31035": 977682432.0, "31040": 981852416.0, "31045": 963691136.0, "31050": 967507264.0, "31055": 953219648.0, "31060": 978983616.0, "31065": 981123968.0, "31070": 972773952.0, "31075": 970318080.0, "31080": 972897600.0, "31085": 940382336.0, "31090": 987589632.0, "31095": 970047232.0, "31100": 976016448.0, "31105": 961824960.0, "31110": 953428032.0, "31115": 968402176.0, "31120": 978432640.0, "31125": 971614208.0, "31130": 969936768.0, "31135": 936925696.0, "31140": 952886400.0, "31145": 986876800.0, "31150": 967104704.0, "31155": 967584128.0, "31160": 954759168.0, "31165": 958474944.0, "31170": 969722176.0, "31175": 968756416.0, "31180": 968622464.0, "31185": 981072512.0, "31190": 956463680.0, "31195": 963095744.0, "31200": 957305536.0, "31205": 958597120.0, "31210": 985419840.0, "31215": 941615296.0, "31220": 960951680.0, "31225": 967481344.0, "31230": 962369216.0, "31235": 968323136.0, "31240": 959268736.0, "31245": 943072000.0, "31250": 956725632.0, "31255": 959938432.0, "31260": 966063168.0, "31265": 949717248.0, "31270": 948794880.0, "31275": 973690112.0, "31280": 977621184.0, "31285": 963322432.0, "31290": 973162816.0, "31295": 935387456.0, "31300": 977271488.0, "31305": 988484608.0, "31310": 969893376.0, "31315": 958052544.0, "31320": 946192704.0, "31325": 940558272.0, "31330": 969955968.0, "31335": 980884736.0, "31340": 956173888.0, "31345": 960467456.0, "31350": 936432320.0, "31355": 963472256.0, "31360": 961982592.0, "31365": 974213504.0, "31370": 969052224.0, "31375": 936529216.0, "31380": 957181696.0, "31385": 956855744.0, "31390": 959907776.0, "31395": 981889088.0, "31400": 966086464.0, "31405": 955890176.0, "31410": 960360832.0, "31415": 972738432.0, "31420": 969440064.0, "31425": 943594240.0, "31430": 960730688.0, "31435": 974575104.0, "31440": 973048832.0, "31445": 970837632.0, "31450": 980422848.0, "31455": 937017984.0, "31460": 988921664.0, "31465": 963245632.0, "31470": 952831360.0, "31475": 962780736.0, "31480": 953032704.0, "31485": 960423808.0, "31490": 974699456.0, "31495": 957448832.0, "31500": 974985920.0, "31505": 944790400.0, "31510": 958837056.0, "31515": 959414144.0, "31520": 975156288.0, "31525": 964041344.0, "31530": 965582720.0, "31535": 933482048.0, "31540": 960781184.0, "31545": 976049792.0, "31550": 973200960.0, "31555": 966920384.0, "31560": 946463552.0, "31565": 940542272.0, "31570": 971149248.0, "31575": 979483200.0, "31580": 961442752.0, "31585": 943906432.0, "31590": 929302848.0, "31595": 967450432.0, "31600": 963994752.0, "31605": 959760256.0, "31610": 966545600.0, "31615": 946345536.0, "31620": 968713856.0, "31625": 943502528.0, "31630": 960774656.0, "31635": 971786496.0, "31640": 947782912.0, "31645": 958413696.0, "31650": 963014208.0, "31655": 984341056.0, "31660": 961113088.0, "31665": 968155456.0, "31670": 975011712.0, "31675": 960668480.0, "31680": 957274048.0, "31685": 981729344.0, "31690": 957599552.0, "31695": 933578368.0, "31700": 957131648.0, "31705": 951757696.0, "31710": 963935360.0, "31715": 959847936.0, "31720": 942139264.0, "31725": 965313216.0, "31730": 965527040.0, "31735": 966214336.0, "31740": 959521920.0, "31745": 943849664.0, "31750": 971061120.0, "31755": 966646976.0, "31760": 970233472.0, "31765": 971649728.0, "31770": 944449280.0, "31775": 954009728.0, "31780": 968954752.0, "31785": 985180992.0, "31790": 986723776.0, "31795": 966675904.0, "31800": 943741504.0, "31805": 967971328.0, "31810": 974123008.0, "31815": 967899328.0, "31820": 956400896.0, "31825": 938596672.0, "31830": 963063232.0, "31835": 976887936.0, "31840": 964305216.0, "31845": 991876736.0, "31850": 966971648.0, "31855": 936403840.0, "31860": 968323328.0, "31865": 961224576.0, "31870": 989208192.0, "31875": 984531008.0, "31880": 952132160.0, "31885": 966064192.0, "31890": 942856704.0, "31895": 963825472.0, "31900": 967061824.0, "31905": 946651008.0, "31910": 975817408.0, "31915": 964765888.0, "31920": 950410240.0, "31925": 962070400.0, "31930": 947463168.0, "31935": 956092992.0, "31940": 975306624.0, "31945": 975865408.0, "31950": 962285440.0, "31955": 956045504.0, "31960": 942212800.0, "31965": 954217408.0, "31970": 984615808.0, "31975": 971309824.0, "31980": 971816320.0, "31985": 949084288.0, "31990": 964837760.0, "31995": 976356288.0, "32000": 974728192.0, "32005": 989773952.0, "32010": 948472576.0, "32015": 952627200.0, "32020": 964184896.0, "32025": 983510144.0, "32030": 966830464.0, "32035": 973511360.0, "32040": 944889216.0, "32045": 972365568.0, "32050": 951622464.0, "32055": 981001728.0, "32060": 965945088.0, "32065": 958292608.0, "32070": 961904384.0, "32075": 968219264.0, "32080": 976194624.0, "32085": 969821952.0, "32090": 946413696.0, "32095": 984418944.0, "32100": 971643392.0, "32105": 977006144.0, "32110": 971401152.0, "32115": 963501824.0, "32120": 970941504.0, "32125": 966550720.0, "32130": 958206592.0, "32135": 964799424.0, "32140": 971847040.0, "32145": 923964736.0, "32150": 975207936.0, "32155": 979238912.0, "32160": 957654784.0, "32165": 962917696.0, "32170": 938543744.0, "32175": 950633216.0, "32180": 971988224.0, "32185": 985120832.0, "32190": 958673472.0, "32195": 953974208.0, "32200": 959539584.0, "32205": 975654144.0, "32210": 980119296.0, "32215": 962163584.0, "32220": 957476544.0, "32225": 946560704.0, "32230": 980018816.0, "32235": 973426752.0, "32240": 965479040.0, "32245": 968537024.0, "32250": 965342272.0, "32255": 960983296.0, "32260": 975452864.0, "32265": 971257088.0, "32270": 968714112.0, "32275": 952040448.0, "32280": 949252352.0, "32285": 961570816.0, "32290": 976767104.0, "32295": 970038016.0, "32300": 948625152.0, "32305": 953978880.0, "32310": 966663168.0, "32315": 956622912.0, "32320": 970693312.0, "32325": 963825408.0, "32330": 950231680.0, "32335": 982535104.0, "32340": 960732224.0, "32345": 964961024.0, "32350": 966927616.0, "32355": 952427456.0, "32360": 964509120.0, "32365": 976908032.0, "32370": 965342208.0, "32375": 967581056.0, "32380": 931347584.0, "32385": 964704384.0, "32390": 980145664.0, "32395": 962341184.0, "32400": 961309248.0, "32405": 969349504.0, "32410": 952541568.0, "32415": 941914240.0, "32420": 973678016.0, "32425": 976012736.0, "32430": 984052224.0, "32435": 946825792.0, "32440": 962211328.0, "32445": 980085888.0, "32450": 975324416.0, "32455": 971943616.0, "32460": 968967040.0, "32465": 955481024.0, "32470": 962959424.0, "32475": 968262400.0, "32480": 976097408.0, "32485": 949921216.0, "32490": 950018560.0, "32495": 957519488.0, "32500": 971944448.0, "32505": 974552960.0, "32510": 989789184.0, "32515": 951843776.0, "32520": 947279552.0, "32525": 967627712.0, "32530": 961413312.0, "32535": 982670336.0, "32540": 958078144.0, "32545": 958877632.0, "32550": 979882048.0, "32555": 973018624.0, "32560": 961209216.0, "32565": 958961728.0, "32570": 946253696.0, "32575": 964930816.0, "32580": 962628352.0, "32585": 973259840.0, "32590": 973966528.0, "32595": 952983040.0, "32600": 981643072.0, "32605": 975735104.0, "32610": 954688256.0, "32615": 953445568.0, "32620": 952689792.0, "32625": 979150016.0, "32630": 969263808.0, "32635": 980770944.0, "32640": 960964544.0, "32645": 941083200.0, "32650": 966638080.0, "32655": 962463040.0, "32660": 974688064.0, "32665": 988684416.0, "32670": 967867072.0, "32675": 952561728.0, "32680": 970945344.0, "32685": 972584960.0, "32690": 973758400.0, "32695": 976221824.0, "32700": 937194944.0, "32705": 965165440.0, "32710": 978322560.0, "32715": 960281600.0, "32720": 979181440.0, "32725": 957105792.0, "32730": 969519808.0, "32735": 959097728.0, "32740": 954314944.0, "32745": 980926144.0, "32750": 964359808.0, "32755": 958968768.0, "32760": 971002048.0, "32765": 962829120.0, "32770": 969224640.0, "32775": 974149760.0, "32780": 949606720.0, "32785": 964598336.0, "32790": 984542656.0, "32795": 969546432.0, "32800": 973192832.0, "32805": 950489472.0, "32810": 952621952.0, "32815": 976016192.0, "32820": 978163520.0, "32825": 979486976.0, "32830": 952210880.0, "32835": 949532032.0, "32840": 975300160.0, "32845": 978107072.0, "32850": 963056896.0, "32855": 975373824.0, "32860": 953091072.0, "32865": 963205696.0, "32870": 962784192.0, "32875": 975717312.0, "32880": 965737472.0, "32885": 970727872.0, "32890": 970473472.0, "32895": 957764352.0, "32900": 974235520.0, "32905": 975570816.0, "32910": 960092608.0, "32915": 957309504.0, "32920": 957752960.0, "32925": 970835200.0, "32930": 959620480.0, "32935": 977989056.0, "32940": 957886720.0, "32945": 969016832.0, "32950": 987989696.0, "32955": 980661120.0, "32960": 967228800.0, "32965": 924789120.0, "32970": 945816896.0, "32975": 970335936.0, "32980": 975103488.0, "32985": 978997632.0, "32990": 935107584.0, "32995": 967892864.0, "33000": 963495168.0, "33005": 960662656.0, "33010": 976275840.0, "33015": 961812480.0, "33020": 956878528.0, "33025": 984623040.0, "33030": 976664640.0, "33035": 976932096.0, "33040": 982983168.0, "33045": 941383168.0, "33050": 971368320.0, "33055": 965625600.0, "33060": 973714688.0, "33065": 970573056.0, "33070": 947170560.0, "33075": 970018496.0, "33080": 976123264.0, "33085": 986233472.0, "33090": 958891136.0, "33095": 961446912.0, "33100": 946157056.0, "33105": 971935168.0, "33110": 988457408.0, "33115": 968648640.0, "33120": 966208192.0, "33125": 941007104.0, "33130": 963882880.0, "33135": 966545472.0, "33140": 983025600.0, "33145": 973315840.0, "33150": 949247680.0, "33155": 955281472.0, "33160": 959834880.0, "33165": 966069184.0, "33170": 968606528.0, "33175": 967398912.0, "33180": 938012864.0, "33185": 964435072.0, "33190": 961099840.0, "33195": 975074496.0, "33200": 964686592.0, "33205": 942901248.0, "33210": 976527552.0, "33215": 965785024.0, "33220": 983554560.0, "33225": 982062144.0, "33230": 961033152.0, "33235": 967774976.0, "33240": 973014272.0, "33245": 972327104.0, "33250": 964575488.0, "33255": 947004480.0, "33260": 944806784.0, "33265": 979703040.0, "33270": 978752384.0, "33275": 971652736.0, "33280": 967986624.0, "33285": 960749504.0, "33290": 971627008.0, "33295": 961231232.0, "33300": 975250496.0, "33305": 982036608.0, "33310": 971097472.0, "33315": 974240064.0, "33320": 966438208.0, "33325": 973008896.0, "33330": 957644288.0, "33335": 950915456.0, "33340": 958378944.0, "33345": 974123456.0, "33350": 975267136.0, "33355": 987805056.0, "33360": 966236480.0, "33365": 946323712.0, "33370": 987057664.0, "33375": 961393152.0, "33380": 972432640.0, "33385": 993223616.0, "33390": 956776000.0, "33395": 964164032.0, "33400": 972331904.0, "33405": 974875840.0, "33410": 981457280.0, "33415": 947889344.0, "33420": 969190528.0, "33425": 991188544.0, "33430": 960235520.0, "33435": 971971840.0, "33440": 952225792.0, "33445": 924814784.0, "33450": 967980032.0, "33455": 967535808.0, "33460": 974863296.0, "33465": 982186880.0, "33470": 959045952.0, "33475": 982714752.0, "33480": 971418368.0, "33485": 961289984.0, "33490": 975010560.0, "33495": 953937856.0, "33500": 985115072.0, "33505": 983610176.0, "33510": 975703040.0, "33515": 964781312.0, "33520": 955793920.0, "33525": 956230208.0, "33530": 965349824.0, "33535": 971872896.0, "33540": 982185728.0, "33545": 960148608.0, "33550": 934155776.0, "33555": 967811520.0, "33560": 986744256.0, "33565": 975165760.0, "33570": 973340544.0, "33575": 938540224.0, "33580": 965050304.0, "33585": 973666944.0, "33590": 986497600.0, "33595": 979987264.0, "33600": 958046336.0, "33605": 951695872.0, "33610": 969483968.0, "33615": 957936832.0, "33620": 972125568.0, "33625": 975478464.0, "33630": 953208704.0, "33635": 978747904.0, "33640": 951981568.0, "33645": 968121984.0, "33650": 981297280.0, "33655": 957794368.0, "33660": 981657536.0, "33665": 966742016.0, "33670": 961641024.0, "33675": 984176128.0, "33680": 938214400.0, "33685": 950744704.0, "33690": 971874304.0, "33695": 978272320.0, "33700": 964018688.0, "33705": 966780672.0, "33710": 966359232.0, "33715": 977832448.0, "33720": 964995264.0, "33725": 981533952.0, "33730": 966657984.0, "33735": 932864704.0, "33740": 967172416.0, "33745": 971958784.0, "33750": 969879936.0, "33755": 957539392.0, "33760": 935800192.0, "33765": 961468736.0, "33770": 971438912.0, "33775": 975816768.0, "33780": 977043712.0, "33785": 951408320.0, "33790": 950563904.0, "33795": 960497536.0, "33800": 972008768.0, "33805": 976010816.0, "33810": 959926912.0, "33815": 947027584.0, "33820": 970183424.0, "33825": 976880832.0, "33830": 973339904.0, "33835": 965633856.0, "33840": 953435136.0, "33845": 961359168.0, "33850": 973948224.0, "33855": 956235456.0, "33860": 972087104.0, "33865": 949552832.0, "33870": 959155840.0, "33875": 970607488.0, "33880": 960326720.0, "33885": 969771456.0, "33890": 950080000.0, "33895": 943837312.0, "33900": 963226560.0, "33905": 965564544.0, "33910": 972639744.0, "33915": 954596096.0, "33920": 937373504.0, "33925": 972596160.0, "33930": 975084352.0, "33935": 954301440.0, "33940": 979943744.0, "33945": 953581184.0, "33950": 968963136.0, "33955": 968031936.0, "33960": 972366080.0, "33965": 973499776.0, "33970": 954085952.0, "33975": 960024704.0, "33980": 958606656.0, "33985": 975817152.0, "33990": 983701568.0, "33995": 953722240.0, "34000": 932710784.0, "34005": 965436224.0, "34010": 978027072.0, "34015": 962733696.0, "34020": 977412416.0, "34025": 930598592.0, "34030": 959969472.0, "34035": 974152320.0, "34040": 968895040.0, "34045": 968600384.0, "34050": 943348096.0, "34055": 968609984.0, "34060": 973558144.0, "34065": 976136064.0, "34070": 959422144.0, "34075": 968997312.0, "34080": 944651456.0, "34085": 965475520.0, "34090": 971271360.0, "34095": 964501632.0, "34100": 960608640.0, "34105": 956339328.0, "34110": 976477248.0, "34115": 957021376.0, "34120": 962547712.0, "34125": 956819968.0, "34130": 947316800.0, "34135": 956556992.0, "34140": 972509760.0, "34145": 969272960.0, "34150": 953871936.0, "34155": 951684032.0, "34160": 950823872.0, "34165": 963680384.0, "34170": 987608576.0, "34175": 972272576.0, "34180": 975566784.0, "34185": 932441920.0, "34190": 956622400.0, "34195": 973476544.0, "34200": 971455872.0, "34205": 950744832.0, "34210": 917601216.0, "34215": 948969984.0, "34220": 967973824.0, "34225": 979281088.0, "34230": 967942528.0, "34235": 951013376.0, "34240": 948354752.0, "34245": 972025152.0, "34250": 994211840.0, "34255": 962316544.0, "34260": 976831424.0, "34265": 941893120.0, "34270": 968703296.0, "34275": 965554816.0, "34280": 969046080.0, "34285": 972812608.0, "34290": 951595392.0, "34295": 951187584.0, "34300": 978998848.0, "34305": 972788352.0, "34310": 972731968.0, "34315": 954039232.0, "34320": 948361472.0, "34325": 966767552.0, "34330": 976373888.0, "34335": 971957504.0, "34340": 961997696.0, "34345": 950912192.0, "34350": 954803264.0, "34355": 965868928.0, "34360": 955290624.0, "34365": 957475584.0, "34370": 941237888.0, "34375": 972966976.0, "34380": 979476480.0, "34385": 959338688.0, "34390": 967339264.0, "34395": 958857280.0, "34400": 953830400.0, "34405": 968341696.0, "34410": 974524928.0, "34415": 977351424.0, "34420": 965971840.0, "34425": 954091328.0, "34430": 972307840.0, "34435": 968963776.0, "34440": 972370368.0, "34445": 979992128.0, "34450": 934850240.0, "34455": 959392640.0, "34460": 968967616.0, "34465": 973262336.0, "34470": 971665728.0, "34475": 939362496.0, "34480": 972383424.0, "34485": 973433344.0, "34490": 965574400.0, "34495": 971844032.0, "34500": 958501504.0, "34505": 954840576.0, "34510": 974704256.0, "34515": 967254080.0, "34520": 975232128.0, "34525": 967152256.0, "34530": 946824448.0, "34535": 965995968.0, "34540": 975122304.0, "34545": 957432768.0, "34550": 960466048.0, "34555": 932250496.0, "34560": 961457024.0, "34565": 977636544.0, "34570": 956047232.0, "34575": 969048000.0, "34580": 953218112.0, "34585": 966566656.0, "34590": 965928064.0, "34595": 958272960.0, "34600": 967861248.0, "34605": 977917120.0, "34610": 950168192.0, "34615": 968108864.0, "34620": 984999808.0, "34625": 971901632.0, "34630": 970960512.0, "34635": 942962112.0, "34640": 961018880.0, "34645": 964883904.0, "34650": 978249536.0, "34655": 971543680.0, "34660": 948218816.0, "34665": 943853440.0, "34670": 973010112.0, "34675": 969187648.0, "34680": 974821568.0, "34685": 968487808.0, "34690": 928156928.0, "34695": 968551296.0, "34700": 970444160.0, "34705": 961752192.0, "34710": 976358592.0, "34715": 954049024.0, "34720": 967972288.0, "34725": 969459520.0, "34730": 963414976.0, "34735": 987597632.0, "34740": 962095360.0, "34745": 966184896.0, "34750": 984659584.0, "34755": 964508672.0, "34760": 972384512.0, "34765": 973684096.0, "34770": 947097920.0, "34775": 991478976.0, "34780": 972531584.0, "34785": 975582976.0, "34790": 957126464.0, "34795": 947159104.0, "34800": 954730432.0, "34805": 984024704.0, "34810": 968999360.0, "34815": 959109696.0, "34820": 935234240.0, "34825": 974821824.0, "34830": 975000960.0, "34835": 965994304.0, "34840": 974703104.0, "34845": 954723840.0, "34850": 941702976.0, "34855": 980184448.0, "34860": 964478464.0, "34865": 966585152.0, "34870": 976779136.0, "34875": 941690048.0, "34880": 959589696.0, "34885": 969506048.0, "34890": 975263680.0, "34895": 980413888.0, "34900": 948939072.0, "34905": 961149632.0, "34910": 956482112.0, "34915": 956624704.0, "34920": 975250240.0, "34925": 949577408.0, "34930": 939505024.0, "34935": 966467904.0, "34940": 971514432.0, "34945": 972965888.0, "34950": 979091264.0, "34955": 940860096.0, "34960": 958978176.0, "34965": 985385152.0, "34970": 975140224.0, "34975": 975699264.0, "34980": 947080832.0, "34985": 973213184.0, "34990": 975683264.0, "34995": 969980160.0, "35000": 972963328.0, "35005": 938536192.0, "35010": 952042048.0, "35015": 983146112.0, "35020": 973755456.0, "35025": 971555648.0, "35030": 954565504.0, "35035": 961455680.0, "35040": 968810432.0, "35045": 969491392.0, "35050": 982455296.0, "35055": 969686848.0, "35060": 930728576.0, "35065": 975272896.0, "35070": 972157568.0, "35075": 970610496.0, "35080": 973647296.0, "35085": 944659072.0, "35090": 966518400.0, "35095": 998018880.0, "35100": 971049408.0, "35105": 978718144.0, "35110": 941681536.0, "35115": 959834688.0, "35120": 976338624.0, "35125": 960024512.0, "35130": 980030656.0, "35135": 972818752.0, "35140": 936594432.0, "35145": 967285568.0, "35150": 970425088.0, "35155": 974614080.0, "35160": 977922496.0, "35165": 959277376.0, "35170": 964067712.0, "35175": 974491584.0, "35180": 970655744.0, "35185": 982627200.0, "35190": 961093952.0, "35195": 962659584.0, "35200": 957740992.0, "35205": 975268864.0, "35210": 980718336.0, "35215": 979667904.0, "35220": 945173504.0, "35225": 967925120.0, "35230": 979223552.0, "35235": 974647936.0, "35240": 962412800.0, "35245": 951506624.0, "35250": 954082240.0, "35255": 977740928.0, "35260": 975332224.0, "35265": 963286528.0, "35270": 942752896.0, "35275": 956199488.0, "35280": 983451200.0, "35285": 973305472.0, "35290": 971922944.0, "35295": 961525120.0, "35300": 957363264.0, "35305": 976137984.0, "35310": 975518464.0, "35315": 987504384.0, "35320": 973995776.0, "35325": 958170432.0, "35330": 974270336.0, "35335": 969509248.0, "35340": 976257152.0, "35345": 973690880.0, "35350": 945805440.0, "35355": 954597376.0, "35360": 967455488.0, "35365": 956492800.0, "35370": 973342144.0, "35375": 951620480.0, "35380": 965125504.0, "35385": 977093440.0, "35390": 967684736.0, "35395": 966383936.0, "35400": 976953984.0, "35405": 930881024.0, "35410": 964246080.0, "35415": 966100096.0, "35420": 963050752.0, "35425": 970818624.0, "35430": 950714752.0, "35435": 953271104.0, "35440": 957939008.0, "35445": 977971264.0, "35450": 975550016.0, "35455": 940604544.0, "35460": 949542016.0, "35465": 973090432.0, "35470": 975766976.0, "35475": 958303168.0, "35480": 953950784.0, "35485": 959888448.0, "35490": 961296704.0, "35495": 980510720.0, "35500": 967987392.0, "35505": 964713792.0, "35510": 954198848.0, "35515": 971331584.0, "35520": 975646080.0, "35525": 964343680.0, "35530": 971561216.0, "35535": 959682176.0, "35540": 946401984.0, "35545": 967718528.0, "35550": 974774144.0, "35555": 981259712.0, "35560": 959013056.0, "35565": 946175168.0, "35570": 967930752.0, "35575": 977709376.0, "35580": 982863232.0, "35585": 961071232.0, "35590": 926888448.0, "35595": 984913728.0, "35600": 971155328.0, "35605": 976883840.0, "35610": 956408832.0, "35615": 959751744.0, "35620": 964833216.0, "35625": 967926080.0, "35630": 971393536.0, "35635": 972480448.0, "35640": 965449856.0, "35645": 938788736.0, "35650": 956382848.0, "35655": 973315968.0, "35660": 966512512.0, "35665": 983919680.0, "35670": 937944320.0, "35675": 981370048.0, "35680": 967475008.0, "35685": 980987648.0, "35690": 969877952.0, "35695": 941445632.0, "35700": 958181888.0, "35705": 969283264.0, "35710": 977501376.0, "35715": 972309184.0, "35720": 967456320.0, "35725": 939436672.0, "35730": 965156608.0, "35735": 989237632.0, "35740": 968147008.0, "35745": 962654656.0, "35750": 947968384.0, "35755": 967101504.0, "35760": 965682688.0, "35765": 970333056.0, "35770": 957386688.0, "35775": 947446336.0, "35780": 971131328.0, "35785": 970572032.0, "35790": 969422784.0, "35795": 971629632.0, "35800": 948586816.0, "35805": 945422144.0, "35810": 968665856.0, "35815": 962051904.0, "35820": 986252224.0, "35825": 979201728.0, "35830": 958189696.0, "35835": 969446528.0, "35840": 967039936.0, "35845": 967647872.0, "35850": 986860032.0, "35855": 956424320.0, "35860": 958497664.0, "35865": 968313280.0, "35870": 974889600.0, "35875": 968907584.0, "35880": 950905152.0, "35885": 968717184.0, "35890": 986557888.0, "35895": 961686016.0, "35900": 970569536.0, "35905": 954828928.0, "35910": 955167552.0, "35915": 979330240.0, "35920": 973183936.0, "35925": 981527936.0, "35930": 957479040.0, "35935": 959227328.0, "35940": 969758656.0, "35945": 970062400.0, "35950": 979017152.0, "35955": 967668544.0, "35960": 939990400.0, "35965": 945047040.0, "35970": 958059264.0, "35975": 978466112.0, "35980": 974195392.0, "35985": 962027456.0, "35990": 944264000.0, "35995": 958836032.0, "36000": 970570816.0, "36005": 969342208.0, "36010": 981516480.0, "36015": 953905728.0, "36020": 983587584.0, "36025": 981735424.0, "36030": 971150080.0, "36035": 965621184.0, "36040": 944785344.0, "36045": 974263616.0, "36050": 970319872.0, "36055": 966801472.0, "36060": 973857728.0, "36065": 947197120.0, "36070": 956910336.0, "36075": 976417920.0, "36080": 962644224.0, "36085": 972267584.0, "36090": 957090368.0, "36095": 951498752.0, "36100": 960972160.0, "36105": 964024640.0, "36110": 974548032.0, "36115": 975226752.0, "36120": 962218496.0, "36125": 965345024.0, "36130": 970133824.0, "36135": 975669504.0, "36140": 970717504.0, "36145": 934944448.0, "36150": 969103040.0, "36155": 960652672.0, "36160": 963243584.0, "36165": 957282752.0, "36170": 941009920.0, "36175": 953434240.0, "36180": 976057088.0, "36185": 966775552.0, "36190": 973231808.0, "36195": 941317824.0, "36200": 923630272.0, "36205": 974927424.0, "36210": 972911616.0, "36215": 971892928.0, "36220": 966610688.0, "36225": 951728000.0, "36230": 960230336.0, "36235": 977388480.0, "36240": 973917888.0, "36245": 966993152.0, "36250": 966955200.0, "36255": 963163200.0, "36260": 967105920.0, "36265": 969129344.0, "36270": 973596928.0, "36275": 972959552.0, "36280": 947809088.0, "36285": 953041728.0, "36290": 960966080.0, "36295": 974895296.0, "36300": 975812672.0, "36305": 944776512.0, "36310": 967797440.0, "36315": 953511296.0, "36320": 953455104.0, "36325": 960989952.0, "36330": 943579392.0, "36335": 963816064.0, "36340": 976062400.0, "36345": 966325952.0, "36350": 978526656.0, "36355": 962753792.0, "36360": 956009984.0, "36365": 973795264.0, "36370": 961218816.0, "36375": 982272256.0, "36380": 948797696.0, "36385": 941772032.0, "36390": 962201408.0, "36395": 968250880.0, "36400": 971236032.0, "36405": 960616064.0, "36410": 939141440.0, "36415": 948116992.0, "36420": 965943168.0, "36425": 963397376.0, "36430": 973924608.0, "36435": 966482240.0, "36440": 946095936.0, "36445": 953024640.0, "36450": 965651264.0, "36455": 985933248.0, "36460": 979504128.0, "36465": 939467648.0, "36470": 980355008.0, "36475": 975049920.0, "36480": 967644096.0, "36485": 985798976.0, "36490": 945260608.0, "36495": 964344448.0, "36500": 968543232.0, "36505": 967742272.0, "36510": 961721024.0, "36515": 960907008.0, "36520": 935750528.0, "36525": 978483072.0, "36530": 964012608.0, "36535": 967273344.0, "36540": 975671104.0, "36545": 945786816.0, "36550": 967628992.0, "36555": 973869696.0, "36560": 962380096.0, "36565": 973021184.0, "36570": 963967744.0, "36575": 976922880.0, "36580": 955205952.0, "36585": 972292544.0, "36590": 984884096.0, "36595": 968599232.0, "36600": 966106240.0, "36605": 971703872.0, "36610": 972986048.0, "36615": 981750272.0, "36620": 952341824.0, "36625": 940451520.0, "36630": 971112768.0, "36635": 972469312.0, "36640": 964529728.0, "36645": 977108672.0, "36650": 935568192.0, "36655": 980588800.0, "36660": 971849600.0, "36665": 962236864.0, "36670": 966499584.0, "36675": 951833088.0, "36680": 946353472.0, "36685": 978238400.0, "36690": 984618944.0, "36695": 991391552.0, "36700": 969779584.0, "36705": 927007232.0, "36710": 966879296.0, "36715": 970208128.0, "36720": 975136256.0, "36725": 970093120.0, "36730": 949252224.0, "36735": 968152768.0, "36740": 968778752.0, "36745": 951300096.0, "36750": 962601152.0, "36755": 936157952.0, "36760": 964608832.0, "36765": 978806848.0, "36770": 963337664.0, "36775": 965546560.0, "36780": 949718400.0, "36785": 946532992.0, "36790": 978562176.0, "36795": 965216000.0, "36800": 960756928.0, "36805": 954220032.0, "36810": 936963904.0, "36815": 986662976.0, "36820": 972625088.0, "36825": 978394944.0, "36830": 969189952.0, "36835": 931244992.0, "36840": 971884480.0, "36845": 979729152.0, "36850": 962196864.0, "36855": 949504128.0, "36860": 962157056.0, "36865": 945156736.0, "36870": 972814272.0, "36875": 973712000.0, "36880": 971861696.0, "36885": 978263232.0, "36890": 934008512.0, "36895": 979742592.0, "36900": 957748544.0, "36905": 967409600.0, "36910": 980609344.0, "36915": 937986304.0, "36920": 961074560.0, "36925": 979715456.0, "36930": 973411840.0, "36935": 978142016.0, "36940": 960000000.0, "36945": 955259328.0, "36950": 970896128.0, "36955": 975013184.0, "36960": 969499968.0, "36965": 946606912.0, "36970": 944745280.0, "36975": 972679552.0, "36980": 974786432.0, "36985": 953452736.0, "36990": 956517568.0, "36995": 943239488.0, "37000": 975357248.0, "37005": 974852032.0, "37010": 963090176.0, "37015": 967052096.0, "37020": 946525376.0, "37025": 958230592.0, "37030": 964952000.0, "37035": 956402496.0, "37040": 978858816.0, "37045": 943380608.0, "37050": 955427776.0, "37055": 981160704.0, "37060": 966965632.0, "37065": 967775040.0, "37070": 967665152.0, "37075": 952759040.0, "37080": 959867392.0, "37085": 973039104.0, "37090": 960883520.0, "37095": 975509504.0, "37100": 942772736.0, "37105": 955888896.0, "37110": 957007616.0, "37115": 971783936.0, "37120": 962906880.0, "37125": 951213824.0, "37130": 939417664.0, "37135": 981905792.0, "37140": 976430784.0, "37145": 971908800.0, "37150": 970880576.0, "37155": 938730688.0, "37160": 961691520.0, "37165": 964121536.0, "37170": 958581952.0, "37175": 986148224.0, "37180": 942560192.0, "37185": 957493632.0, "37190": 970113984.0, "37195": 964066624.0, "37200": 976761216.0, "37205": 944720960.0, "37210": 966817280.0, "37215": 975380032.0, "37220": 957334016.0, "37225": 975650432.0, "37230": 971103872.0, "37235": 953563328.0, "37240": 976651712.0, "37245": 985557888.0, "37250": 956200768.0, "37255": 955302592.0, "37260": 949343040.0, "37265": 990059904.0, "37270": 963152768.0, "37275": 974065536.0, "37280": 958914176.0, "37285": 946324672.0, "37290": 948430464.0, "37295": 980637632.0, "37300": 947569280.0, "37305": 959208256.0, "37310": 953502208.0, "37315": 942177344.0, "37320": 962336832.0, "37325": 978089088.0, "37330": 976543808.0, "37335": 967765440.0, "37340": 942199360.0, "37345": 950753664.0, "37350": 958423872.0, "37355": 971549504.0, "37360": 974847488.0, "37365": 961647872.0, "37370": 951626048.0, "37375": 967630080.0, "37380": 976708992.0, "37385": 965774912.0, "37390": 964798464.0, "37395": 959764032.0, "37400": 963304000.0, "37405": 983657600.0, "37410": 966413184.0, "37415": 965021504.0, "37420": 939671744.0, "37425": 964502784.0, "37430": 963213824.0, "37435": 956176832.0, "37440": 963852224.0, "37445": 947661184.0, "37450": 993358272.0, "37455": 965481728.0, "37460": 961414976.0, "37465": 962457088.0, "37470": 921441664.0, "37475": 951796800.0, "37480": 981969984.0, "37485": 979207488.0, "37490": 968858432.0, "37495": 950155840.0, "37500": 944379648.0, "37505": 958965056.0, "37510": 973533568.0, "37515": 983937856.0, "37520": 962464832.0, "37525": 951862592.0, "37530": 963701760.0, "37535": 980559232.0, "37540": 952889536.0, "37545": 994345280.0, "37550": 951920832.0, "37555": 966708736.0, "37560": 978502656.0, "37565": 972582208.0, "37570": 967943936.0, "37575": 973011328.0, "37580": 943865600.0, "37585": 960378944.0, "37590": 979507456.0, "37595": 977217344.0, "37600": 980898176.0, "37605": 957109504.0, "37610": 975731584.0, "37615": 973916224.0, "37620": 968773760.0, "37625": 982314048.0, "37630": 948664000.0, "37635": 977894272.0, "37640": 959879040.0, "37645": 978034304.0, "37650": 963761024.0, "37655": 925952256.0, "37660": 947607936.0, "37665": 967090240.0, "37670": 966320832.0, "37675": 975351616.0, "37680": 976819840.0, "37685": 949487616.0, "37690": 971156864.0, "37695": 968513024.0, "37700": 967741568.0, "37705": 978686400.0, "37710": 934444224.0, "37715": 977549696.0, "37720": 972852928.0, "37725": 990864832.0, "37730": 985418560.0, "37735": 934915584.0, "37740": 941901184.0, "37745": 967700096.0, "37750": 962236032.0, "37755": 970629696.0, "37760": 958654656.0, "37765": 932277120.0, "37770": 969947008.0, "37775": 963761536.0, "37780": 962543744.0, "37785": 964369664.0, "37790": 940489280.0, "37795": 980184640.0, "37800": 963332480.0, "37805": 973457984.0, "37810": 983184000.0, "37815": 960138560.0, "37820": 938291520.0, "37825": 955375296.0, "37830": 960268864.0, "37835": 974810752.0, "37840": 985132992.0, "37845": 931228160.0, "37850": 952836224.0, "37855": 979400512.0, "37860": 960683840.0, "37865": 965118528.0, "37870": 960377920.0, "37875": 987155968.0, "37880": 951819264.0, "37885": 977228288.0, "37890": 966255744.0, "37895": 934702208.0, "37900": 976863296.0, "37905": 969560256.0, "37910": 980109696.0, "37915": 963183936.0, "37920": 947307840.0, "37925": 945889408.0, "37930": 986022016.0, "37935": 965643712.0, "37940": 960658240.0, "37945": 972726272.0, "37950": 941810752.0, "37955": 966269184.0, "37960": 981534592.0, "37965": 969529664.0, "37970": 992570176.0, "37975": 945045248.0, "37980": 949277120.0, "37985": 976602112.0, "37990": 973379584.0, "37995": 970357760.0, "38000": 940720960.0, "38005": 945277568.0, "38010": 980503936.0, "38015": 969291904.0, "38020": 977009664.0, "38025": 963971904.0, "38030": 950079488.0, "38035": 979428928.0, "38040": 960285248.0, "38045": 968429184.0, "38050": 982594944.0, "38055": 951201344.0, "38060": 970813888.0, "38065": 985643328.0, "38070": 967045120.0, "38075": 969940608.0, "38080": 947351104.0, "38085": 972784064.0, "38090": 977881472.0, "38095": 964939968.0, "38100": 955959616.0, "38105": 962934528.0, "38110": 946648512.0, "38115": 959633216.0, "38120": 972389184.0, "38125": 971452608.0, "38130": 974128640.0, "38135": 950347456.0, "38140": 966304128.0, "38145": 972051776.0, "38150": 974068096.0, "38155": 966554048.0, "38160": 949045632.0, "38165": 965322176.0, "38170": 980448832.0, "38175": 967925696.0, "38180": 965286592.0, "38185": 949794112.0, "38190": 933479040.0, "38195": 983180864.0, "38200": 996134464.0, "38205": 975395456.0, "38210": 963902848.0, "38215": 946521024.0, "38220": 973060352.0, "38225": 963952192.0, "38230": 955579584.0, "38235": 969662720.0, "38240": 934260736.0, "38245": 974109888.0, "38250": 992919040.0, "38255": 967654784.0, "38260": 968209472.0, "38265": 953234176.0, "38270": 949046336.0, "38275": 967227136.0, "38280": 980179520.0, "38285": 967404480.0, "38290": 962366080.0, "38295": 958148480.0, "38300": 971948608.0, "38305": 981050496.0, "38310": 959568192.0, "38315": 961771200.0, "38320": 954119488.0, "38325": 981769024.0, "38330": 974286720.0, "38335": 968316096.0, "38340": 979581504.0, "38345": 941758464.0, "38350": 956452032.0, "38355": 974251456.0, "38360": 963096768.0, "38365": 979025408.0, "38370": 942060800.0, "38375": 937767552.0, "38380": 978084480.0, "38385": 966469760.0, "38390": 953049536.0, "38395": 959284608.0, "38400": 941918912.0, "38405": 969447104.0, "38410": 975399616.0, "38415": 975904128.0, "38420": 964258304.0, "38425": 944928704.0, "38430": 953424896.0, "38435": 960529664.0, "38440": 981408576.0, "38445": 968811520.0, "38450": 945981120.0, "38455": 941268800.0, "38460": 961722560.0, "38465": 962121984.0, "38470": 960450688.0, "38475": 993712320.0, "38480": 952751872.0, "38485": 971652864.0, "38490": 972119744.0, "38495": 951872448.0, "38500": 968917568.0, "38505": 945057920.0, "38510": 971125696.0, "38515": 967567616.0, "38520": 958919680.0, "38525": 967396160.0, "38530": 944781312.0, "38535": 953788416.0, "38540": 966199744.0, "38545": 958957120.0, "38550": 961812160.0, "38555": 971074176.0, "38560": 969300800.0, "38565": 975094336.0, "38570": 964247168.0, "38575": 960329408.0, "38580": 969652224.0, "38585": 946987392.0, "38590": 962398208.0, "38595": 974442880.0, "38600": 968122432.0, "38605": 982736512.0, "38610": 931908992.0, "38615": 946954944.0, "38620": 971249600.0, "38625": 969632640.0, "38630": 981387520.0, "38635": 966291456.0, "38640": 948626240.0, "38645": 966077888.0, "38650": 980259392.0, "38655": 965370496.0, "38660": 955781312.0, "38665": 936237952.0, "38670": 945217344.0, "38675": 954731200.0, "38680": 974302336.0, "38685": 988130880.0, "38690": 938511680.0, "38695": 961002816.0, "38700": 971371328.0, "38705": 982155712.0, "38710": 961389632.0, "38715": 992843776.0, "38720": 954769536.0, "38725": 967657088.0, "38730": 962016768.0, "38735": 957164224.0, "38740": 967526976.0, "38745": 938531072.0, "38750": 989097856.0, "38755": 967039232.0, "38760": 967065280.0, "38765": 976260672.0, "38770": 950617280.0, "38775": 962563904.0, "38780": 961592896.0, "38785": 978471168.0, "38790": 984421504.0, "38795": 941758208.0, "38800": 970466560.0, "38805": 980221696.0, "38810": 949965376.0, "38815": 973588800.0, "38820": 954232512.0, "38825": 949971584.0, "38830": 975676352.0, "38835": 967110144.0, "38840": 969988416.0, "38845": 963558144.0, "38850": 945004288.0, "38855": 962878464.0, "38860": 978528960.0, "38865": 966494016.0, "38870": 960887680.0, "38875": 957009472.0, "38880": 963752576.0, "38885": 965762560.0, "38890": 962497280.0, "38895": 977312384.0, "38900": 950978432.0, "38905": 946934208.0, "38910": 958454784.0, "38915": 977972288.0, "38920": 980503424.0, "38925": 975561600.0, "38930": 933826560.0, "38935": 960560512.0, "38940": 968286656.0, "38945": 965616768.0, "38950": 967372992.0, "38955": 958166080.0, "38960": 955690368.0, "38965": 954131072.0, "38970": 967196608.0, "38975": 957841408.0, "38980": 947907840.0, "38985": 948043456.0, "38990": 964267200.0, "38995": 961882880.0, "39000": 966358464.0, "39005": 961821120.0, "39010": 941205312.0, "39015": 984059072.0, "39020": 976941440.0, "39025": 955966336.0, "39030": 958775616.0, "39035": 945115776.0, "39040": 984750528.0, "39045": 968470592.0, "39050": 955027776.0, "39055": 973385728.0, "39060": 950296640.0, "39065": 949902336.0, "39070": 968916416.0, "39075": 979486720.0, "39080": 975931072.0, "39085": 963206784.0, "39090": 947022400.0, "39095": 943322240.0, "39100": 973311936.0, "39105": 980417024.0, "39110": 969491392.0, "39115": 950048768.0, "39120": 962107008.0, "39125": 974971392.0, "39130": 989205888.0, "39135": 965306944.0, "39140": 945084352.0, "39145": 950450688.0, "39150": 962907712.0, "39155": 958718592.0, "39160": 966244864.0, "39165": 939287296.0, "39170": 937407680.0, "39175": 984736640.0, "39180": 961839552.0, "39185": 964644864.0, "39190": 974393792.0, "39195": 940712000.0, "39200": 963991872.0, "39205": 953438272.0, "39210": 977199616.0, "39215": 968932992.0, "39220": 944543360.0, "39225": 962472640.0, "39230": 961587392.0, "39235": 967905216.0, "39240": 971550400.0, "39245": 951957056.0, "39250": 940416704.0, "39255": 975609408.0, "39260": 958147136.0, "39265": 972102528.0, "39270": 964767424.0, "39275": 954577920.0, "39280": 969148480.0, "39285": 975065856.0, "39290": 973391424.0, "39295": 961602176.0, "39300": 936686848.0, "39305": 959461376.0, "39310": 975166848.0, "39315": 971463808.0, "39320": 963602944.0, "39325": 975203712.0, "39330": 942970496.0, "39335": 976271232.0, "39340": 963263488.0, "39345": 968941952.0, "39350": 983354240.0, "39355": 957719168.0, "39360": 972770432.0, "39365": 972575104.0, "39370": 982370624.0, "39375": 963258624.0, "39380": 932828544.0, "39385": 959051904.0, "39390": 967832640.0, "39395": 965261824.0, "39400": 963461952.0, "39405": 956848768.0, "39410": 937076416.0, "39415": 954203776.0, "39420": 978503744.0, "39425": 961514304.0, "39430": 960797504.0, "39435": 951900288.0, "39440": 971495040.0, "39445": 966900352.0, "39450": 955923776.0, "39455": 964307008.0, "39460": 937474048.0, "39465": 994939392.0, "39470": 973497280.0, "39475": 960445824.0, "39480": 956858944.0, "39485": 964207936.0, "39490": 963614848.0, "39495": 968418560.0, "39500": 961397632.0, "39505": 973216000.0, "39510": 941252096.0, "39515": 934619712.0, "39520": 970979072.0, "39525": 966064320.0, "39530": 981175680.0, "39535": 955243648.0, "39540": 955052672.0, "39545": 951526912.0, "39550": 970789120.0, "39555": 973977216.0, "39560": 990565568.0, "39565": 953922944.0, "39570": 957944320.0, "39575": 971943040.0, "39580": 960224384.0, "39585": 971770624.0, "39590": 977262656.0, "39595": 948911040.0, "39600": 954756544.0, "39605": 958358144.0, "39610": 968618496.0, "39615": 979401728.0, "39620": 934507520.0, "39625": 978672768.0, "39630": 948631488.0, "39635": 961681216.0, "39640": 972803584.0, "39645": 951605312.0, "39650": 975751360.0, "39655": 969904384.0, "39660": 973117696.0, "39665": 959817408.0, "39670": 963756416.0, "39675": 959025536.0, "39680": 968196992.0, "39685": 971548032.0, "39690": 967520512.0, "39695": 958306944.0, "39700": 933087360.0, "39705": 974467264.0, "39710": 984723840.0, "39715": 990324672.0, "39720": 964701312.0, "39725": 943754880.0, "39730": 976556416.0, "39735": 970634624.0, "39740": 965279296.0, "39745": 973652032.0, "39750": 929791424.0, "39755": 961421632.0, "39760": 970358912.0, "39765": 962416320.0, "39770": 967526208.0, "39775": 965160768.0, "39780": 948079232.0, "39785": 970449920.0, "39790": 973160064.0, "39795": 973970112.0, "39800": 968516480.0, "39805": 951147456.0, "39810": 978538048.0, "39815": 971937344.0, "39820": 973501248.0, "39825": 975382080.0, "39830": 951586816.0, "39835": 950349248.0, "39840": 972937664.0, "39845": 972457344.0, "39850": 976636928.0, "39855": 956102528.0, "39860": 941723008.0, "39865": 963854528.0, "39870": 968332352.0, "39875": 975454400.0, "39880": 975406720.0, "39885": 936995840.0, "39890": 964002560.0, "39895": 975019200.0, "39900": 969782208.0, "39905": 967735040.0, "39910": 938725056.0, "39915": 982359744.0, "39920": 978269312.0, "39925": 960326912.0, "39930": 966055680.0, "39935": 954737088.0, "39940": 948733696.0, "39945": 977739200.0, "39950": 973938048.0, "39955": 966419648.0, "39960": 967354304.0, "39965": 940336192.0, "39970": 972962816.0, "39975": 977830528.0, "39980": 981014912.0, "39985": 970604288.0, "39990": 947809856.0, "39995": 963063232.0, "40000": 969131392.0, "40005": 970228480.0, "40010": 967545792.0, "40015": 959443328.0, "40020": 952061376.0, "40025": 967937920.0, "40030": 970094336.0, "40035": 988712512.0, "40040": 958155648.0, "40045": 959966208.0, "40050": 966938944.0, "40055": 965585984.0, "40060": 966257408.0, "40065": 966441216.0, "40070": 955830976.0, "40075": 982009344.0, "40080": 975572480.0, "40085": 954745536.0, "40090": 970651904.0, "40095": 943401280.0, "40100": 951477760.0, "40105": 964152768.0, "40110": 956761792.0, "40115": 978976896.0, "40120": 973582592.0, "40125": 946270016.0, "40130": 986634112.0, "40135": 975599296.0, "40140": 971446592.0, "40145": 978840576.0, "40150": 929732736.0, "40155": 989606528.0, "40160": 984118976.0, "40165": 971592064.0, "40170": 983355136.0, "40175": 930963328.0, "40180": 964988672.0, "40185": 981470848.0, "40190": 975328320.0, "40195": 963929984.0, "40200": 941394560.0, "40205": 960930112.0, "40210": 984755648.0, "40215": 963128704.0, "40220": 966016128.0, "40225": 961656064.0, "40230": 944949248.0, "40235": 972200832.0, "40240": 968400192.0, "40245": 975754624.0, "40250": 977046016.0, "40255": 957515008.0, "40260": 974556288.0, "40265": 963791744.0, "40270": 961844672.0, "40275": 978620032.0, "40280": 962928256.0, "40285": 973306944.0, "40290": 971585856.0, "40295": 969135296.0, "40300": 971146624.0, "40305": 960170624.0, "40310": 941135936.0, "40315": 973521792.0, "40320": 964683584.0, "40325": 961189568.0, "40330": 975721984.0, "40335": 944402880.0, "40340": 973104320.0, "40345": 963873984.0, "40350": 967385856.0, "40355": 971826176.0, "40360": 945867456.0, "40365": 965132224.0, "40370": 969218624.0, "40375": 979564288.0, "40380": 982582592.0, "40385": 970290240.0, "40390": 926466112.0, "40395": 962424512.0, "40400": 979431232.0, "40405": 973726272.0, "40410": 964149632.0, "40415": 943703296.0, "40420": 965819776.0, "40425": 968848384.0, "40430": 962675584.0, "40435": 971457728.0, "40440": 942517056.0, "40445": 962804224.0, "40450": 973955136.0, "40455": 986028032.0, "40460": 972156224.0, "40465": 940895232.0, "40470": 947432000.0, "40475": 981814400.0, "40480": 972776896.0, "40485": 988200832.0, "40490": 940614080.0, "40495": 949916928.0, "40500": 969400320.0, "40505": 966579840.0, "40510": 974012096.0, "40515": 984640640.0, "40520": 940846336.0, "40525": 961901248.0, "40530": 973535104.0, "40535": 959732672.0, "40540": 955920256.0, "40545": 959678080.0, "40550": 965373248.0, "40555": 975751104.0, "40560": 973170560.0, "40565": 968890624.0, "40570": 976189696.0, "40575": 943283584.0, "40580": 975554176.0, "40585": 977669952.0, "40590": 974546688.0, "40595": 975904128.0, "40600": 960240576.0, "40605": 972293760.0, "40610": 984974016.0, "40615": 968031168.0, "40620": 984128064.0, "40625": 946433920.0, "40630": 948542272.0, "40635": 974691648.0, "40640": 983012288.0, "40645": 977286656.0, "40650": 952778816.0, "40655": 951643904.0, "40660": 964683712.0, "40665": 977452096.0, "40670": 991930560.0, "40675": 971241472.0, "40680": 947320832.0, "40685": 959579712.0, "40690": 966478464.0, "40695": 973078080.0, "40700": 966998848.0, "40705": 969215104.0, "40710": 987346304.0, "40715": 967461312.0, "40720": 961319168.0, "40725": 987015552.0, "40730": 964050368.0, "40735": 955641792.0, "40740": 966580864.0, "40745": 959940672.0, "40750": 965444544.0, "40755": 982452416.0, "40760": 945520320.0, "40765": 972601856.0, "40770": 972900608.0, "40775": 974596480.0, "40780": 975550208.0, "40785": 949528320.0, "40790": 972942848.0, "40795": 977367488.0, "40800": 964871424.0, "40805": 960661184.0, "40810": 949131072.0, "40815": 962404096.0, "40820": 970986816.0, "40825": 968659072.0, "40830": 967287296.0, "40835": 971364096.0, "40840": 943783040.0, "40845": 962085504.0, "40850": 980042880.0, "40855": 968559424.0, "40860": 968909504.0, "40865": 944358528.0, "40870": 978746176.0, "40875": 968282048.0, "40880": 977655168.0, "40885": 965802688.0, "40890": 947197568.0, "40895": 952454784.0, "40900": 953271488.0, "40905": 982097728.0, "40910": 971447808.0, "40915": 959577984.0, "40920": 945268800.0, "40925": 966796480.0, "40930": 971321600.0, "40935": 980962880.0, "40940": 974363072.0, "40945": 946329984.0, "40950": 969461184.0, "40955": 967803200.0, "40960": 969271168.0, "40965": 974564608.0, "40970": 945408640.0, "40975": 951404480.0, "40980": 970320704.0, "40985": 971490432.0, "40990": 962256960.0, "40995": 941817920.0, "41000": 972569280.0, "41005": 963313152.0, "41010": 965973248.0, "41015": 953701952.0, "41020": 960943616.0, "41025": 934747904.0, "41030": 974027200.0, "41035": 973233216.0, "41040": 954397376.0, "41045": 973042944.0, "41050": 946243968.0, "41055": 963117504.0, "41060": 985485248.0, "41065": 973772864.0, "41070": 969062080.0, "41075": 942682944.0, "41080": 958940992.0, "41085": 968522304.0, "41090": 960663232.0, "41095": 975346752.0, "41100": 943167808.0, "41105": 947583424.0, "41110": 962099712.0, "41115": 967245568.0, "41120": 981612928.0, "41125": 956206848.0, "41130": 946856960.0, "41135": 959873920.0, "41140": 966292288.0, "41145": 963839936.0, "41150": 981031936.0, "41155": 952435584.0, "41160": 940064768.0, "41165": 972077184.0, "41170": 969121664.0, "41175": 956492096.0, "41180": 958296448.0, "41185": 958065664.0, "41190": 972409600.0, "41195": 965953600.0, "41200": 977987712.0, "41205": 965380096.0, "41210": 939649344.0, "41215": 982863936.0, "41220": 975033216.0, "41225": 961662336.0, "41230": 993770560.0, "41235": 944580352.0, "41240": 970372480.0, "41245": 954907008.0, "41250": 980117888.0, "41255": 960942208.0, "41260": 965421760.0, "41265": 951642624.0, "41270": 964813056.0, "41275": 975053184.0, "41280": 975961280.0, "41285": 957686912.0, "41290": 933199680.0, "41295": 968342848.0, "41300": 972140480.0, "41305": 962975104.0, "41310": 984459328.0, "41315": 936196544.0, "41320": 944267008.0, "41325": 970678784.0, "41330": 969398464.0, "41335": 973780160.0, "41340": 946755328.0, "41345": 936093248.0, "41350": 969308416.0, "41355": 973526656.0, "41360": 982206400.0, "41365": 944444224.0, "41370": 951392320.0, "41375": 964321280.0, "41380": 958288384.0, "41385": 965477760.0, "41390": 977862272.0, "41395": 939169856.0, "41400": 947292928.0, "41405": 964409792.0, "41410": 977389504.0, "41415": 962906880.0, "41420": 946917120.0, "41425": 951069824.0, "41430": 982144384.0, "41435": 975455296.0, "41440": 966053312.0, "41445": 958001856.0, "41450": 942269888.0, "41455": 988390400.0, "41460": 955968512.0, "41465": 955507008.0, "41470": 973030656.0, "41475": 955537920.0, "41480": 979951488.0, "41485": 966917376.0, "41490": 965198528.0, "41495": 964371712.0, "41500": 939377472.0, "41505": 966655040.0, "41510": 957818496.0, "41515": 983044032.0, "41520": 965471168.0, "41525": 954039296.0, "41530": 970500672.0, "41535": 959737280.0, "41540": 972190592.0, "41545": 987942848.0, "41550": 968740352.0, "41555": 949328704.0, "41560": 951841600.0, "41565": 977255872.0, "41570": 975021248.0, "41575": 957891840.0, "41580": 945808832.0, "41585": 966658752.0, "41590": 975751360.0, "41595": 959812416.0, "41600": 980577024.0, "41605": 945826688.0, "41610": 949075968.0, "41615": 965410176.0, "41620": 971179904.0, "41625": 978350464.0, "41630": 960459456.0, "41635": 950453184.0, "41640": 977435264.0, "41645": 958362688.0, "41650": 973234240.0, "41655": 968093504.0, "41660": 942653248.0, "41665": 975875840.0, "41670": 984472064.0, "41675": 958246528.0, "41680": 972380864.0, "41685": 940446656.0, "41690": 965064192.0, "41695": 960940224.0, "41700": 976501504.0, "41705": 955604928.0, "41710": 957000192.0, "41715": 952147392.0, "41720": 973406400.0, "41725": 971998656.0, "41730": 952435392.0, "41735": 959774656.0, "41740": 946492224.0, "41745": 964494208.0, "41750": 973124096.0, "41755": 974763392.0, "41760": 973848448.0, "41765": 949057152.0, "41770": 964921728.0, "41775": 964136896.0, "41780": 973467072.0, "41785": 977181632.0, "41790": 953364608.0, "41795": 954690624.0, "41800": 960599488.0, "41805": 958374144.0, "41810": 971374720.0, "41815": 960899200.0, "41820": 954772992.0, "41825": 962355456.0, "41830": 957415552.0, "41835": 970671360.0, "41840": 974493248.0, "41845": 934336512.0, "41850": 973274624.0, "41855": 970217088.0, "41860": 976310528.0, "41865": 979806528.0, "41870": 967353088.0, "41875": 963063552.0, "41880": 966165248.0, "41885": 960963648.0, "41890": 966164736.0, "41895": 952478144.0, "41900": 931142848.0, "41905": 982925312.0, "41910": 975385088.0, "41915": 967562048.0, "41920": 965365120.0, "41925": 936830592.0, "41930": 968282624.0, "41935": 978264960.0, "41940": 987699392.0, "41945": 957223232.0, "41950": 966031104.0, "41955": 980169024.0, "41960": 959978816.0, "41965": 964746240.0, "41970": 968088960.0, "41975": 955192768.0, "41980": 955642944.0, "41985": 969529152.0, "41990": 979048896.0, "41995": 970421632.0, "42000": 977519872.0, "42005": 937505792.0, "42010": 966975104.0, "42015": 976388544.0, "42020": 966605504.0, "42025": 963203456.0, "42030": 955212800.0, "42035": 948346304.0, "42040": 958768640.0, "42045": 969345152.0, "42050": 979313536.0, "42055": 927708544.0, "42060": 935504064.0, "42065": 977379648.0, "42070": 964483008.0, "42075": 978699456.0, "42080": 951993536.0, "42085": 947575872.0, "42090": 966910464.0, "42095": 966691840.0, "42100": 959584320.0, "42105": 968184704.0, "42110": 965885120.0, "42115": 977907840.0, "42120": 967882240.0, "42125": 971821056.0, "42130": 963980288.0, "42135": 946874496.0, "42140": 969277696.0, "42145": 974144448.0, "42150": 971110080.0, "42155": 963548096.0, "42160": 949029888.0, "42165": 942533184.0, "42170": 964137152.0, "42175": 987593728.0, "42180": 956748672.0, "42185": 951964992.0, "42190": 942420160.0, "42195": 971514752.0, "42200": 976425024.0, "42205": 959147968.0, "42210": 966930944.0, "42215": 945785856.0, "42220": 956434496.0, "42225": 984928064.0, "42230": 959279360.0, "42235": 975186880.0, "42240": 954873600.0, "42245": 943807872.0, "42250": 962633280.0, "42255": 962725952.0, "42260": 967380096.0, "42265": 964935744.0, "42270": 955411392.0, "42275": 975309056.0, "42280": 970107456.0, "42285": 963499328.0, "42290": 978085760.0, "42295": 951792128.0, "42300": 966225280.0, "42305": 978965376.0, "42310": 952907776.0, "42315": 971476736.0, "42320": 960844800.0, "42325": 957232640.0, "42330": 974414208.0, "42335": 971867712.0, "42340": 960483840.0, "42345": 968977408.0, "42350": 947288192.0, "42355": 985523328.0, "42360": 973068160.0, "42365": 969739968.0, "42370": 975050432.0, "42375": 946436032.0, "42380": 961078208.0, "42385": 975601664.0, "42390": 980782400.0, "42395": 948173504.0, "42400": 935514880.0, "42405": 932185792.0, "42410": 969700736.0, "42415": 975911360.0, "42420": 974592576.0, "42425": 953419584.0, "42430": 938936320.0, "42435": 974780544.0, "42440": 967876928.0, "42445": 984266304.0, "42450": 970277440.0, "42455": 950021760.0, "42460": 974246016.0, "42465": 967731584.0, "42470": 972766528.0, "42475": 971281600.0, "42480": 945595840.0, "42485": 941894720.0, "42490": 960606784.0, "42495": 972958080.0, "42500": 958511744.0, "42505": 962251520.0, "42510": 945122048.0, "42515": 966084864.0, "42520": 970101952.0, "42525": 976833856.0, "42530": 972277376.0, "42535": 959185280.0, "42540": 970880896.0, "42545": 971182144.0, "42550": 964015680.0, "42555": 961901568.0, "42560": 947681152.0, "42565": 951018816.0, "42570": 970336256.0, "42575": 972012032.0, "42580": 969701632.0, "42585": 953480704.0, "42590": 967515712.0, "42595": 957160704.0, "42600": 981678848.0, "42605": 971035200.0, "42610": 978595392.0, "42615": 952664384.0, "42620": 969747136.0, "42625": 979586240.0, "42630": 983180032.0, "42635": 966632000.0, "42640": 935170048.0, "42645": 961718592.0, "42650": 968929664.0, "42655": 985480768.0, "42660": 977256064.0, "42665": 948252224.0, "42670": 955961664.0, "42675": 982153600.0, "42680": 966820416.0, "42685": 971145344.0, "42690": 977331648.0, "42695": 943604480.0, "42700": 972247424.0, "42705": 972226688.0, "42710": 968572288.0, "42715": 974301568.0, "42720": 959045888.0, "42725": 972864960.0, "42730": 974925120.0, "42735": 959875328.0, "42740": 983209536.0, "42745": 970113408.0, "42750": 955864768.0, "42755": 967761600.0, "42760": 968103744.0, "42765": 972668416.0, "42770": 963385344.0, "42775": 944508352.0, "42780": 985136384.0, "42785": 968305984.0, "42790": 975667520.0, "42795": 983936896.0, "42800": 941196928.0, "42805": 981997312.0, "42810": 970549760.0, "42815": 962397440.0, "42820": 968886464.0, "42825": 936975424.0, "42830": 962652800.0, "42835": 962055872.0, "42840": 988319360.0, "42845": 972681728.0, "42850": 950401600.0, "42855": 948231360.0, "42860": 977987584.0, "42865": 977858368.0, "42870": 976900928.0, "42875": 972165696.0, "42880": 948924224.0, "42885": 981069632.0, "42890": 964088896.0, "42895": 972434688.0, "42900": 971220480.0, "42905": 946140928.0, "42910": 947038848.0, "42915": 968984064.0, "42920": 966189696.0, "42925": 981524672.0, "42930": 958259072.0, "42935": 952835648.0, "42940": 977727360.0, "42945": 956687168.0, "42950": 968836032.0, "42955": 966816192.0, "42960": 949327744.0, "42965": 980304384.0, "42970": 976379584.0, "42975": 984976064.0, "42980": 957781184.0, "42985": 938349760.0, "42990": 977196736.0, "42995": 968983360.0, "43000": 970494336.0, "43005": 976359360.0, "43010": 954831424.0, "43015": 974738048.0, "43020": 981732096.0, "43025": 973471808.0, "43030": 966840000.0, "43035": 979198016.0, "43040": 953304768.0, "43045": 960752832.0, "43050": 973224320.0, "43055": 975821248.0, "43060": 970507968.0, "43065": 947701120.0, "43070": 961007232.0, "43075": 969248384.0, "43080": 986027008.0, "43085": 981182976.0, "43090": 945773376.0, "43095": 962320256.0, "43100": 973160064.0, "43105": 981667264.0, "43110": 969058880.0, "43115": 961071936.0, "43120": 950409728.0, "43125": 971286272.0, "43130": 956295040.0, "43135": 970336640.0, "43140": 972072256.0, "43145": 961899328.0, "43150": 966810304.0, "43155": 970672448.0, "43160": 968453120.0, "43165": 973582336.0, "43170": 955195776.0, "43175": 967141504.0, "43180": 968205056.0, "43185": 970973632.0, "43190": 962006336.0, "43195": 948115328.0, "43200": 944582208.0, "43205": 972134912.0, "43210": 952867712.0, "43215": 971924992.0, "43220": 965065088.0, "43225": 950250752.0, "43230": 979254016.0, "43235": 966010816.0, "43240": 962425152.0, "43245": 977858560.0, "43250": 952441728.0, "43255": 973535040.0, "43260": 965802880.0, "43265": 970753152.0, "43270": 957130176.0, "43275": 945023808.0, "43280": 956981376.0, "43285": 957497472.0, "43290": 965703680.0, "43295": 985326400.0, "43300": 961349952.0, "43305": 931840896.0, "43310": 969167360.0, "43315": 971703680.0, "43320": 973394112.0, "43325": 974443712.0, "43330": 951538944.0, "43335": 975744576.0, "43340": 973401600.0, "43345": 976593152.0, "43350": 968874304.0, "43355": 946479808.0, "43360": 962367104.0, "43365": 970978432.0, "43370": 961212608.0, "43375": 991960384.0, "43380": 973695168.0, "43385": 934837568.0, "43390": 974253632.0, "43395": 966826880.0, "43400": 954040320.0, "43405": 966814976.0, "43410": 942904640.0, "43415": 984447296.0, "43420": 970766336.0, "43425": 959605440.0, "43430": 966833728.0, "43435": 948149760.0, "43440": 946247488.0, "43445": 968947136.0, "43450": 968411264.0, "43455": 959278080.0, "43460": 963184896.0, "43465": 947352384.0, "43470": 978833984.0, "43475": 959863360.0, "43480": 963084864.0, "43485": 960060224.0, "43490": 939311296.0, "43495": 967146816.0, "43500": 989826816.0, "43505": 973426688.0, "43510": 969455360.0, "43515": 948461184.0, "43520": 951270080.0, "43525": 967865792.0, "43530": 976495936.0, "43535": 985482240.0, "43540": 939709184.0, "43545": 942670080.0, "43550": 951539904.0, "43555": 967778432.0, "43560": 986314688.0, "43565": 954574272.0, "43570": 938934080.0, "43575": 970265344.0, "43580": 976789888.0, "43585": 975072640.0, "43590": 969799104.0, "43595": 943076416.0, "43600": 957596928.0, "43605": 977618112.0, "43610": 967072256.0, "43615": 986234560.0, "43620": 943434304.0, "43625": 932223680.0, "43630": 975603776.0, "43635": 944506880.0, "43640": 975707264.0, "43645": 950852096.0, "43650": 945039872.0, "43655": 968277184.0, "43660": 971419904.0, "43665": 965066048.0, "43670": 965203072.0, "43675": 950589504.0, "43680": 964346496.0, "43685": 970783744.0, "43690": 969606464.0, "43695": 963843648.0, "43700": 930735360.0, "43705": 965096256.0, "43710": 967827008.0, "43715": 968863616.0, "43720": 960506624.0, "43725": 953591232.0, "43730": 952605824.0, "43735": 972582336.0, "43740": 993955072.0, "43745": 962883200.0, "43750": 965154112.0, "43755": 940459648.0, "43760": 968174528.0, "43765": 965665920.0, "43770": 952691648.0, "43775": 969636800.0, "43780": 937046336.0, "43785": 965194368.0, "43790": 956990976.0, "43795": 948690816.0, "43800": 970827712.0, "43805": 932093248.0, "43810": 956980864.0, "43815": 961307008.0, "43820": 970026688.0, "43825": 961418496.0, "43830": 974233344.0, "43835": 934552064.0, "43840": 962688000.0, "43845": 972470656.0, "43850": 961201152.0, "43855": 962490560.0, "43860": 953377792.0, "43865": 959918272.0, "43870": 975204736.0, "43875": 972827136.0, "43880": 960770752.0, "43885": 941202432.0, "43890": 970713984.0, "43895": 982373632.0, "43900": 974000640.0, "43905": 969738560.0, "43910": 950894912.0, "43915": 938684608.0, "43920": 967679936.0, "43925": 969677760.0, "43930": 958828352.0, "43935": 977893056.0, "43940": 957410112.0, "43945": 996080832.0, "43950": 968398720.0, "43955": 983060672.0, "43960": 969776832.0, "43965": 947812544.0, "43970": 964572480.0, "43975": 968708160.0, "43980": 973075392.0, "43985": 960184896.0, "43990": 955532608.0, "43995": 943734720.0, "44000": 960958080.0, "44005": 950963968.0, "44010": 970303552.0, "44015": 984556096.0, "44020": 940072896.0, "44025": 955525568.0, "44030": 975936384.0, "44035": 977218112.0, "44040": 978505024.0, "44045": 933325440.0, "44050": 955176960.0, "44055": 967430528.0, "44060": 981631616.0, "44065": 973313728.0, "44070": 939940672.0, "44075": 932763328.0, "44080": 975616064.0, "44085": 963365696.0, "44090": 969224768.0, "44095": 962166080.0, "44100": 951681152.0, "44105": 963935552.0, "44110": 966024832.0, "44115": 964529216.0, "44120": 961148224.0, "44125": 951405568.0, "44130": 965260544.0, "44135": 970438976.0, "44140": 964622080.0, "44145": 973909184.0, "44150": 945153088.0, "44155": 950513728.0, "44160": 965475264.0, "44165": 987427200.0, "44170": 981979904.0, "44175": 957921856.0, "44180": 938525824.0, "44185": 974868736.0, "44190": 966559552.0, "44195": 966657088.0, "44200": 963769856.0, "44205": 937572736.0, "44210": 971090624.0, "44215": 968997248.0, "44220": 974590720.0, "44225": 973438592.0, "44230": 956277184.0, "44235": 962742656.0, "44240": 957056512.0, "44245": 966611008.0, "44250": 972938752.0, "44255": 961596864.0, "44260": 962809984.0, "44265": 962945280.0, "44270": 972256896.0, "44275": 960710080.0, "44280": 975354752.0, "44285": 938606784.0, "44290": 956488192.0, "44295": 972127296.0, "44300": 979215168.0, "44305": 968614400.0, "44310": 953989376.0, "44315": 955772480.0, "44320": 986558400.0, "44325": 972288128.0, "44330": 950208192.0, "44335": 950182976.0, "44340": 943901696.0, "44345": 983900928.0, "44350": 965279744.0, "44355": 959334848.0, "44360": 950495616.0, "44365": 934420672.0, "44370": 966023232.0, "44375": 973927936.0, "44380": 973253504.0, "44385": 959701888.0, "44390": 947891072.0, "44395": 963749504.0, "44400": 978754432.0, "44405": 980848448.0, "44410": 965514048.0, "44415": 961403072.0, "44420": 955766336.0, "44425": 967741696.0, "44430": 969867648.0, "44435": 971101504.0, "44440": 953781888.0, "44445": 936714496.0, "44450": 966871104.0, "44455": 956810560.0, "44460": 971596096.0, "44465": 989297856.0, "44470": 957925376.0, "44475": 944077312.0, "44480": 955819968.0, "44485": 963014784.0, "44490": 969702912.0, "44495": 953844608.0, "44500": 965863040.0, "44505": 975928896.0, "44510": 968819648.0, "44515": 967746880.0, "44520": 965383744.0, "44525": 956946240.0, "44530": 966594240.0, "44535": 985825408.0, "44540": 970389696.0, "44545": 970661568.0, "44550": 959898304.0, "44555": 950821824.0, "44560": 975606208.0, "44565": 970864960.0, "44570": 958493824.0, "44575": 945478144.0, "44580": 967411456.0, "44585": 960171648.0, "44590": 972781760.0, "44595": 975683904.0, "44600": 942142144.0, "44605": 956264960.0, "44610": 962251648.0, "44615": 981611264.0, "44620": 985983040.0, "44625": 969909888.0, "44630": 950508864.0, "44635": 968688128.0, "44640": 981912832.0, "44645": 967639360.0, "44650": 965647744.0, "44655": 952637760.0, "44660": 959483648.0, "44665": 960930688.0, "44670": 960120960.0, "44675": 963123200.0, "44680": 945110464.0, "44685": 943544128.0, "44690": 960331904.0, "44695": 963776832.0, "44700": 979788544.0, "44705": 975643904.0, "44710": 961971136.0, "44715": 963905536.0, "44720": 970464192.0, "44725": 951275200.0, "44730": 958688960.0, "44735": 965584128.0, "44740": 970475072.0, "44745": 968989568.0, "44750": 964169216.0, "44755": 961681536.0, "44760": 941585984.0, "44765": 955242560.0, "44770": 975612288.0, "44775": 969556032.0, "44780": 976004672.0, "44785": 953333504.0, "44790": 952467008.0, "44795": 953993792.0, "44800": 981475584.0, "44805": 988277248.0, "44810": 965415168.0, "44815": 947641280.0, "44820": 974227904.0, "44825": 954318016.0, "44830": 978339520.0, "44835": 968276992.0, "44840": 926094400.0, "44845": 971017536.0, "44850": 973902976.0, "44855": 980015872.0, "44860": 961879104.0, "44865": 946626176.0, "44870": 959347392.0, "44875": 963684928.0, "44880": 975611584.0, "44885": 958396800.0, "44890": 978669440.0, "44895": 939602112.0, "44900": 973123712.0, "44905": 958498880.0, "44910": 971012032.0, "44915": 963707200.0, "44920": 947181120.0, "44925": 959370688.0, "44930": 964028416.0, "44935": 976863744.0, "44940": 966302336.0, "44945": 946171456.0, "44950": 961293632.0, "44955": 978644480.0, "44960": 961160896.0, "44965": 960465408.0, "44970": 971144576.0, "44975": 954524800.0, "44980": 979817920.0, "44985": 961109440.0, "44990": 967207424.0, "44995": 973199936.0, "45000": 939969856.0, "45005": 956904768.0, "45010": 970280320.0, "45015": 977905792.0, "45020": 967658048.0, "45025": 948006016.0, "45030": 970818752.0, "45035": 963579584.0, "45040": 972264576.0, "45045": 973033024.0, "45050": 957153408.0, "45055": 941326976.0, "45060": 962237952.0, "45065": 957863744.0, "45070": 993877760.0, "45075": 981646784.0, "45080": 947977152.0, "45085": 969661888.0, "45090": 979831360.0, "45095": 993299840.0, "45100": 969666944.0, "45105": 937690048.0, "45110": 951048640.0, "45115": 984978240.0, "45120": 958248128.0, "45125": 977258176.0, "45130": 958956672.0, "45135": 964794752.0, "45140": 985661440.0, "45145": 969875264.0, "45150": 968321536.0, "45155": 967603072.0, "45160": 957400640.0, "45165": 967645184.0, "45170": 978159424.0, "45175": 967508928.0, "45180": 955933504.0, "45185": 957878656.0, "45190": 973689024.0, "45195": 961878208.0, "45200": 975294080.0, "45205": 982348352.0, "45210": 938551296.0, "45215": 955451840.0, "45220": 992040128.0, "45225": 972613120.0, "45230": 967590976.0, "45235": 969910272.0, "45240": 945826560.0, "45245": 974980480.0, "45250": 977885120.0, "45255": 979132160.0, "45260": 967590208.0, "45265": 939004032.0, "45270": 983258368.0, "45275": 966624256.0, "45280": 978431168.0, "45285": 978819008.0, "45290": 945164544.0, "45295": 954354816.0, "45300": 974941248.0, "45305": 979584704.0, "45310": 968475712.0, "45315": 965654720.0, "45320": 943824320.0, "45325": 979419712.0, "45330": 983129664.0, "45335": 976931200.0, "45340": 959881664.0, "45345": 946878016.0, "45350": 967326144.0, "45355": 967136768.0, "45360": 969014464.0, "45365": 977234624.0, "45370": 946572672.0, "45375": 948274752.0, "45380": 966221888.0, "45385": 958086784.0, "45390": 963193216.0, "45395": 960735296.0, "45400": 945172736.0, "45405": 976587584.0, "45410": 962602432.0, "45415": 965140032.0, "45420": 962713536.0, "45425": 952825472.0, "45430": 972169024.0, "45435": 976526976.0, "45440": 972628096.0, "45445": 959194240.0, "45450": 947120064.0, "45455": 970471616.0, "45460": 976325888.0, "45465": 958297408.0, "45470": 977083776.0, "45475": 953830336.0, "45480": 938996672.0, "45485": 976686784.0, "45490": 966976512.0, "45495": 982486144.0, "45500": 980930816.0, "45505": 927561856.0, "45510": 972717312.0, "45515": 961385792.0, "45520": 978190208.0, "45525": 988860224.0, "45530": 941468544.0, "45535": 971009152.0, "45540": 960778624.0, "45545": 965471232.0, "45550": 975937152.0, "45555": 961128064.0, "45560": 963725952.0, "45565": 952580864.0, "45570": 980772928.0, "45575": 971195200.0, "45580": 951606784.0, "45585": 943851264.0, "45590": 989044032.0, "45595": 975028352.0, "45600": 968846784.0, "45605": 974557184.0, "45610": 959481088.0, "45615": 967825152.0, "45620": 966047872.0, "45625": 977849472.0, "45630": 955833600.0, "45635": 948105920.0, "45640": 949802752.0, "45645": 976608896.0, "45650": 963187264.0, "45655": 970775936.0, "45660": 962505152.0, "45665": 956782912.0, "45670": 973651840.0, "45675": 962152000.0, "45680": 971956224.0, "45685": 968751552.0, "45690": 951361024.0, "45695": 972584448.0, "45700": 968554752.0, "45705": 971870528.0, "45710": 961105152.0, "45715": 935544384.0, "45720": 949505984.0, "45725": 966503232.0, "45730": 965884608.0, "45735": 987993792.0, "45740": 945535488.0, "45745": 944919552.0, "45750": 969548672.0, "45755": 958175808.0, "45760": 964810048.0, "45765": 979002112.0, "45770": 940608896.0, "45775": 965895808.0, "45780": 968570752.0, "45785": 957810752.0, "45790": 968501120.0, "45795": 953223872.0, "45800": 940742272.0, "45805": 971287872.0, "45810": 976790144.0, "45815": 963740544.0, "45820": 941560000.0, "45825": 963473600.0, "45830": 959619264.0, "45835": 964791680.0, "45840": 972042624.0, "45845": 974587136.0, "45850": 940805632.0, "45855": 963130304.0, "45860": 972643584.0, "45865": 952486528.0, "45870": 968319872.0, "45875": 948396288.0, "45880": 973898560.0, "45885": 985530304.0, "45890": 972064896.0, "45895": 963585792.0, "45900": 943698624.0, "45905": 964758976.0, "45910": 975665920.0, "45915": 952428800.0, "45920": 956817408.0, "45925": 949155392.0, "45930": 937446912.0, "45935": 985766400.0, "45940": 958554432.0, "45945": 972325504.0, "45950": 976237376.0, "45955": 939765568.0, "45960": 975569152.0, "45965": 961491264.0, "45970": 978411264.0, "45975": 974421376.0, "45980": 917552576.0, "45985": 962521664.0, "45990": 958511744.0, "45995": 968319872.0, "46000": 971312384.0, "46005": 965987776.0, "46010": 950378816.0, "46015": 964532032.0, "46020": 978283840.0, "46025": 968137920.0, "46030": 969323840.0, "46035": 950949632.0, "46040": 952856320.0, "46045": 973069184.0, "46050": 961957184.0, "46055": 971975104.0, "46060": 953610176.0, "46065": 967701568.0, "46070": 951915328.0, "46075": 966244288.0, "46080": 960508800.0, "46085": 939121728.0, "46090": 969764416.0, "46095": 984431040.0, "46100": 970151744.0, "46105": 963492928.0, "46110": 941753792.0, "46115": 952776576.0, "46120": 977828864.0, "46125": 968602752.0, "46130": 979518720.0, "46135": 966533248.0, "46140": 951969472.0, "46145": 956178624.0, "46150": 963353216.0, "46155": 968191168.0, "46160": 963112960.0, "46165": 939816000.0, "46170": 971208896.0, "46175": 977380416.0, "46180": 966650560.0, "46185": 967244544.0, "46190": 949976128.0, "46195": 951061056.0, "46200": 953674048.0, "46205": 974425920.0, "46210": 964808768.0, "46215": 975759744.0, "46220": 952670016.0, "46225": 970341376.0, "46230": 961810752.0, "46235": 969231168.0, "46240": 975746496.0, "46245": 962418688.0, "46250": 984524736.0, "46255": 972152512.0, "46260": 973995008.0, "46265": 956723968.0, "46270": 945540608.0, "46275": 959889984.0, "46280": 960108800.0, "46285": 979804800.0, "46290": 974284416.0, "46295": 972512064.0, "46300": 933028800.0, "46305": 961066880.0, "46310": 972896384.0, "46315": 963175296.0, "46320": 946995456.0, "46325": 951330816.0, "46330": 975385472.0, "46335": 978739968.0, "46340": 971718016.0, "46345": 978232704.0, "46350": 939344064.0, "46355": 954781632.0, "46360": 976437312.0, "46365": 967489408.0, "46370": 969303936.0, "46375": 952215744.0, "46380": 939218432.0, "46385": 986753408.0, "46390": 970653760.0, "46395": 965546944.0, "46400": 956854080.0, "46405": 937322496.0, "46410": 978846784.0, "46415": 975380544.0, "46420": 972042752.0, "46425": 957917120.0, "46430": 944068416.0, "46435": 952647168.0, "46440": 949993792.0, "46445": 980850496.0, "46450": 969513408.0, "46455": 972681664.0, "46460": 946511040.0, "46465": 968266368.0, "46470": 988146304.0, "46475": 961031040.0, "46480": 974781760.0, "46485": 947271104.0, "46490": 963867584.0, "46495": 959058688.0, "46500": 966792704.0, "46505": 949822208.0, "46510": 954523968.0, "46515": 978101376.0, "46520": 963403648.0, "46525": 959364800.0, "46530": 973482816.0, "46535": 949187264.0, "46540": 951696192.0, "46545": 975045248.0, "46550": 965121920.0, "46555": 950096320.0, "46560": 960742080.0, "46565": 943398144.0, "46570": 979401408.0, "46575": 967423872.0, "46580": 975826176.0, "46585": 955856256.0, "46590": 945312000.0, "46595": 957074688.0, "46600": 973279936.0, "46605": 966386688.0, "46610": 971360448.0, "46615": 958008384.0, "46620": 948249152.0, "46625": 968561792.0, "46630": 964502144.0, "46635": 966753024.0, "46640": 956189696.0, "46645": 941154112.0, "46650": 962627264.0, "46655": 962701824.0, "46660": 956948288.0, "46665": 959028992.0, "46670": 942770752.0, "46675": 971009344.0, "46680": 964474816.0, "46685": 970685632.0, "46690": 974168832.0, "46695": 962576896.0, "46700": 971089024.0, "46705": 976162112.0, "46710": 965329088.0, "46715": 962043904.0, "46720": 966729856.0, "46725": 960852544.0, "46730": 955188800.0, "46735": 970636800.0, "46740": 967117952.0, "46745": 965986112.0, "46750": 948556928.0, "46755": 972128960.0, "46760": 967607168.0, "46765": 968590144.0, "46770": 984258688.0, "46775": 940643328.0, "46780": 938341376.0, "46785": 961465984.0, "46790": 955164032.0, "46795": 971195456.0, "46800": 942290240.0, "46805": 948702656.0, "46810": 972684864.0, "46815": 979917056.0, "46820": 966213888.0, "46825": 963209280.0, "46830": 944499776.0, "46835": 973408128.0, "46840": 978571008.0, "46845": 970266240.0, "46850": 955484800.0, "46855": 939989248.0, "46860": 963598016.0, "46865": 965883840.0, "46870": 976771648.0, "46875": 961624512.0, "46880": 942285312.0, "46885": 951865792.0, "46890": 982617600.0, "46895": 958112000.0, "46900": 968160512.0, "46905": 953046080.0, "46910": 949759552.0, "46915": 974862848.0, "46920": 959622144.0, "46925": 978882432.0, "46930": 982901696.0, "46935": 951312640.0, "46940": 957988800.0, "46945": 949103104.0, "46950": 964174016.0, "46955": 973333632.0, "46960": 948716800.0, "46965": 965208064.0, "46970": 980988160.0, "46975": 969664448.0, "46980": 957657920.0, "46985": 929644416.0, "46990": 932102656.0, "46995": 977638720.0, "47000": 973168576.0, "47005": 968961408.0, "47010": 971798912.0, "47015": 941860608.0, "47020": 979101312.0, "47025": 974887936.0, "47030": 972490240.0, "47035": 960012416.0, "47040": 945705216.0, "47045": 962070336.0, "47050": 972196736.0, "47055": 976158656.0, "47060": 976816384.0, "47065": 970334848.0, "47070": 947943872.0, "47075": 974361024.0, "47080": 982441408.0, "47085": 962001792.0, "47090": 986895616.0, "47095": 934701824.0, "47100": 956609408.0, "47105": 968622848.0, "47110": 982648192.0, "47115": 977323456.0, "47120": 938993920.0, "47125": 982632896.0, "47130": 983614272.0, "47135": 969276672.0, "47140": 973295936.0, "47145": 963985152.0, "47150": 956496768.0, "47155": 982801728.0, "47160": 968083648.0, "47165": 975359872.0, "47170": 970128192.0, "47175": 964211712.0, "47180": 979028800.0, "47185": 961883968.0, "47190": 973573760.0, "47195": 977515520.0, "47200": 962916352.0, "47205": 970697536.0, "47210": 970754496.0, "47215": 975980224.0, "47220": 976194560.0, "47225": 947906048.0, "47230": 949594624.0, "47235": 975132736.0, "47240": 977302080.0, "47245": 978062464.0, "47250": 953210496.0, "47255": 934562496.0, "47260": 985371008.0, "47265": 982392448.0, "47270": 965415424.0, "47275": 963204800.0, "47280": 934618240.0, "47285": 963918592.0, "47290": 981433984.0, "47295": 971637632.0, "47300": 986810048.0, "47305": 949221184.0, "47310": 965170048.0, "47315": 987359680.0, "47320": 974127104.0, "47325": 972011520.0, "47330": 962837184.0, "47335": 940280704.0, "47340": 966747584.0, "47345": 982412160.0, "47350": 976428800.0, "47355": 973507008.0, "47360": 951228160.0, "47365": 974678272.0, "47370": 957016256.0, "47375": 953250368.0, "47380": 983170304.0, "47385": 959945408.0, "47390": 956615040.0, "47395": 965908480.0, "47400": 967534272.0, "47405": 975273664.0, "47410": 932059968.0, "47415": 949484160.0, "47420": 979538048.0, "47425": 969492864.0, "47430": 964839424.0, "47435": 970041152.0, "47440": 948366848.0, "47445": 971909120.0, "47450": 966917056.0, "47455": 967922752.0, "47460": 975484032.0, "47465": 949918144.0, "47470": 980763968.0, "47475": 971235520.0, "47480": 972007680.0, "47485": 976073792.0, "47490": 958137792.0, "47495": 950700672.0, "47500": 974507008.0, "47505": 979610368.0, "47510": 985703808.0, "47515": 961060160.0, "47520": 941801792.0, "47525": 975126528.0, "47530": 976210624.0, "47535": 974115264.0, "47540": 968390528.0, "47545": 942543552.0, "47550": 966515904.0, "47555": 970418304.0, "47560": 979569536.0, "47565": 975453056.0, "47570": 945871488.0, "47575": 960793088.0, "47580": 969483008.0, "47585": 974273408.0, "47590": 961122496.0, "47595": 966380160.0, "47600": 959105024.0, "47605": 968358976.0, "47610": 979180288.0, "47615": 963621632.0, "47620": 973336256.0, "47625": 936735168.0, "47630": 964114048.0, "47635": 968210816.0, "47640": 964263616.0, "47645": 958311424.0, "47650": 951614848.0, "47655": 975783488.0, "47660": 980819904.0, "47665": 970393152.0, "47670": 974039360.0, "47675": 955553344.0, "47680": 968298368.0, "47685": 985230272.0, "47690": 962142784.0, "47695": 971104384.0, "47700": 978988992.0, "47705": 957892672.0, "47710": 972847296.0, "47715": 983628032.0, "47720": 983020032.0, "47725": 973003648.0, "47730": 939412608.0, "47735": 966734208.0, "47740": 969223680.0, "47745": 982779712.0, "47750": 983652096.0, "47755": 937525312.0, "47760": 953956992.0, "47765": 963096960.0, "47770": 965024832.0, "47775": 956871680.0, "47780": 975083648.0, "47785": 959568192.0, "47790": 966853888.0, "47795": 973861376.0, "47800": 953827328.0, "47805": 981492864.0, "47810": 942816832.0, "47815": 969434432.0, "47820": 962700416.0, "47825": 973861760.0, "47830": 976174080.0, "47835": 945055744.0, "47840": 961781376.0, "47845": 974856832.0, "47850": 969164224.0, "47855": 979235328.0, "47860": 962283456.0, "47865": 955711808.0, "47870": 961474432.0, "47875": 961164160.0, "47880": 974407296.0, "47885": 964125248.0, "47890": 939879936.0, "47895": 970074240.0, "47900": 987163520.0, "47905": 963148416.0, "47910": 961706432.0, "47915": 942574336.0, "47920": 963309952.0, "47925": 979568768.0, "47930": 971127680.0, "47935": 954854464.0, "47940": 963494208.0, "47945": 935742464.0, "47950": 971655360.0, "47955": 975545344.0, "47960": 980508736.0, "47965": 958407360.0, "47970": 945202432.0, "47975": 966646464.0, "47980": 956756352.0, "47985": 992181440.0, "47990": 977723520.0, "47995": 952138880.0, "48000": 961374848.0, "48005": 966675328.0, "48010": 974321792.0, "48015": 962805504.0, "48020": 939663872.0, "48025": 961627264.0, "48030": 959712576.0, "48035": 978727232.0, "48040": 978732672.0, "48045": 961712576.0, "48050": 954330176.0, "48055": 981605888.0, "48060": 970263872.0, "48065": 979938944.0, "48070": 957917056.0, "48075": 944387136.0, "48080": 982271168.0, "48085": 973850432.0, "48090": 965703808.0, "48095": 979413696.0, "48100": 943844096.0, "48105": 975232064.0, "48110": 965882112.0, "48115": 961928192.0, "48120": 969281600.0, "48125": 940099456.0, "48130": 948694016.0, "48135": 960668544.0, "48140": 972205888.0, "48145": 970149376.0, "48150": 955940032.0, "48155": 938236864.0, "48160": 961104512.0, "48165": 965934016.0, "48170": 980377856.0, "48175": 974955392.0, "48180": 935883840.0, "48185": 949275264.0, "48190": 981795392.0, "48195": 968420224.0, "48200": 969788608.0, "48205": 965495040.0, "48210": 956700928.0, "48215": 952536896.0, "48220": 971220736.0, "48225": 980588096.0, "48230": 979874688.0, "48235": 935265536.0, "48240": 963757952.0, "48245": 980992128.0, "48250": 957821504.0, "48255": 976662528.0, "48260": 935566912.0, "48265": 975994560.0, "48270": 962402816.0, "48275": 971708096.0, "48280": 959428032.0, "48285": 949921920.0, "48290": 957180544.0, "48295": 974134720.0, "48300": 975389376.0, "48305": 968745152.0, "48310": 948654912.0, "48315": 952137600.0, "48320": 972506496.0, "48325": 967822976.0, "48330": 972700928.0, "48335": 958691840.0, "48340": 934233792.0, "48345": 953010432.0, "48350": 970128192.0, "48355": 972776000.0, "48360": 958975232.0, "48365": 929890688.0, "48370": 957810752.0, "48375": 973034752.0, "48380": 974807168.0, "48385": 959851136.0, "48390": 936302848.0, "48395": 976936384.0, "48400": 972161024.0, "48405": 972939904.0, "48410": 966331840.0, "48415": 968241984.0, "48420": 939663232.0, "48425": 978591744.0, "48430": 964790784.0, "48435": 974422272.0, "48440": 970116224.0, "48445": 957962112.0, "48450": 961253120.0, "48455": 959919872.0, "48460": 967701504.0, "48465": 970621120.0, "48470": 951818048.0, "48475": 936279616.0, "48480": 958751808.0, "48485": 973885824.0, "48490": 958708096.0, "48495": 952334976.0, "48500": 935583296.0, "48505": 969360000.0, "48510": 957193792.0, "48515": 974637312.0, "48520": 960727040.0, "48525": 933932608.0, "48530": 961441088.0, "48535": 975969920.0, "48540": 976292928.0, "48545": 969843584.0, "48550": 949809600.0, "48555": 951788160.0, "48560": 967997184.0, "48565": 972878976.0, "48570": 975360512.0, "48575": 961481088.0, "48580": 932717376.0, "48585": 979069440.0, "48590": 983159680.0, "48595": 966631488.0, "48600": 957011584.0, "48605": 938636928.0, "48610": 957947136.0, "48615": 971141632.0, "48620": 975300480.0, "48625": 981005760.0, "48630": 940338368.0, "48635": 957272512.0, "48640": 978455104.0, "48645": 967419648.0, "48650": 970808832.0, "48655": 963604416.0, "48660": 945645696.0, "48665": 968530560.0, "48670": 972766912.0, "48675": 984586624.0, "48680": 960400960.0, "48685": 950296640.0, "48690": 965318208.0, "48695": 971755456.0, "48700": 966945280.0, "48705": 967297088.0, "48710": 947361408.0, "48715": 960683008.0, "48720": 965786176.0, "48725": 956998400.0, "48730": 972502336.0, "48735": 962574208.0, "48740": 959243200.0, "48745": 972187072.0, "48750": 966558656.0, "48755": 981270976.0, "48760": 968331776.0, "48765": 948951744.0, "48770": 957206592.0, "48775": 988491648.0, "48780": 965875072.0, "48785": 965484928.0, "48790": 942353728.0, "48795": 952166848.0, "48800": 978513088.0, "48805": 982352064.0, "48810": 957344064.0, "48815": 955331968.0, "48820": 928516096.0, "48825": 977972928.0, "48830": 971462912.0, "48835": 969888384.0, "48840": 969949312.0, "48845": 954618112.0, "48850": 964311680.0, "48855": 971285952.0, "48860": 974715200.0, "48865": 971597952.0, "48870": 949858624.0, "48875": 972074624.0, "48880": 974303552.0, "48885": 961842496.0, "48890": 980002496.0, "48895": 956688128.0, "48900": 947544768.0, "48905": 962592960.0, "48910": 964139008.0, "48915": 960479872.0, "48920": 961570304.0, "48925": 939098688.0, "48930": 965196992.0, "48935": 965932288.0, "48940": 951124800.0, "48945": 987012288.0, "48950": 940162048.0, "48955": 973149056.0, "48960": 970108288.0, "48965": 962928896.0, "48970": 969340288.0, "48975": 929877952.0, "48980": 967044480.0, "48985": 968703360.0, "48990": 975721024.0, "48995": 974084352.0, "49000": 961673344.0, "49005": 942340864.0, "49010": 973528576.0, "49015": 971719040.0, "49020": 959925312.0, "49025": 944914752.0, "49030": 935002240.0, "49035": 979028096.0, "49040": 972749888.0, "49045": 963203008.0, "49050": 961033664.0, "49055": 941775488.0, "49060": 955736256.0, "49065": 965589632.0, "49070": 978535296.0, "49075": 975202368.0, "49080": 939171712.0, "49085": 949184704.0, "49090": 962891200.0, "49095": 983227904.0, "49100": 967839552.0, "49105": 969913024.0, "49110": 944534400.0, "49115": 979008704.0, "49120": 977775680.0, "49125": 981644224.0, "49130": 946665856.0, "49135": 954183040.0, "49140": 952630208.0, "49145": 970072448.0, "49150": 951259584.0, "49155": 967537920.0, "49160": 947445376.0, "49165": 973813120.0, "49170": 978690688.0, "49175": 971606784.0, "49180": 979586688.0, "49185": 974649472.0, "49190": 961651136.0, "49195": 988515584.0, "49200": 969516160.0, "49205": 962721152.0, "49210": 978044096.0, "49215": 940325056.0, "49220": 978875712.0, "49225": 963892160.0, "49230": 976190208.0, "49235": 975146048.0, "49240": 944815168.0, "49245": 961437248.0, "49250": 974496128.0, "49255": 993570432.0, "49260": 972087040.0, "49265": 950265344.0, "49270": 940441856.0, "49275": 961797440.0, "49280": 987786816.0, "49285": 983768384.0, "49290": 961400768.0, "49295": 939339392.0, "49300": 975687808.0, "49305": 979919424.0, "49310": 960952768.0, "49315": 967867520.0, "49320": 942685888.0, "49325": 965491008.0, "49330": 962788480.0, "49335": 958856256.0, "49340": 977176448.0, "49345": 964417280.0, "49350": 955331584.0, "49355": 971080448.0, "49360": 965939520.0, "49365": 958446336.0, "49370": 950972224.0, "49375": 929687936.0, "49380": 968499648.0, "49385": 958501312.0, "49390": 948570560.0, "49395": 975047488.0, "49400": 929946112.0, "49405": 958704896.0, "49410": 966244416.0, "49415": 968520064.0, "49420": 968760704.0, "49425": 946787328.0, "49430": 961048960.0, "49435": 969889792.0, "49440": 964291264.0, "49445": 967564288.0, "49450": 963035264.0, "49455": 937606528.0, "49460": 975029824.0, "49465": 969131840.0, "49470": 960080384.0, "49475": 973216896.0, "49480": 961257600.0, "49485": 960601728.0, "49490": 976172288.0, "49495": 978230080.0, "49500": 953513856.0, "49505": 952673344.0, "49510": 972061440.0, "49515": 960171776.0, "49520": 967887424.0, "49525": 973794048.0, "49530": 943210112.0, "49535": 952681280.0, "49540": 962272832.0, "49545": 981936384.0, "49550": 982553408.0, "49555": 974314752.0, "49560": 938287424.0, "49565": 965545728.0, "49570": 965166464.0, "49575": 973807680.0, "49580": 979132288.0, "49585": 961867328.0, "49590": 976890752.0, "49595": 973020608.0, "49600": 978280000.0, "49605": 960657152.0, "49610": 945534080.0, "49615": 955749184.0, "49620": 958079104.0, "49625": 947180928.0, "49630": 968769600.0, "49635": 962979008.0, "49640": 957736448.0, "49645": 989182720.0, "49650": 970251840.0, "49655": 952025472.0, "49660": 959397632.0, "49665": 948604736.0, "49670": 970184128.0, "49675": 979155776.0, "49680": 967997696.0, "49685": 968203136.0, "49690": 958837632.0, "49695": 941083648.0, "49700": 967719232.0, "49705": 980838336.0, "49710": 970433920.0, "49715": 968492800.0, "49720": 938820928.0, "49725": 955406592.0, "49730": 978192192.0, "49735": 983455680.0, "49740": 959096512.0, "49745": 926962752.0, "49750": 977412608.0, "49755": 960253056.0, "49760": 986723968.0, "49765": 963526464.0, "49770": 963941248.0, "49775": 950958528.0, "49780": 979673216.0, "49785": 967283968.0, "49790": 969623680.0, "49795": 948298432.0, "49800": 961709952.0, "49805": 977959744.0, "49810": 975188672.0, "49815": 971730176.0, "49820": 958309504.0, "49825": 966517824.0, "49830": 973692608.0, "49835": 966050752.0, "49840": 964969664.0, "49845": 966444032.0, "49850": 931215168.0, "49855": 971890944.0, "49860": 962376192.0, "49865": 958264640.0, "49870": 979867520.0, "49875": 959668352.0, "49880": 967701312.0, "49885": 968041472.0, "49890": 967523968.0, "49895": 975450304.0, "49900": 965384064.0, "49905": 952439040.0, "49910": 994371904.0, "49915": 977375040.0, "49920": 963010240.0, "49925": 978840768.0, "49930": 958974080.0, "49935": 954692032.0, "49940": 981376256.0, "49945": 977179584.0, "49950": 969366400.0, "49955": 945886464.0, "49960": 955341824.0, "49965": 963513472.0, "49970": 973033280.0, "49975": 964695296.0, "49980": 975926912.0, "49985": 960642944.0, "49990": 973433088.0, "49995": 967461888.0, "50000": 977988864.0, "50005": 978707584.0, "50010": 943128256.0, "50015": 958635776.0, "50020": 970631040.0, "50025": 980912768.0, "50030": 977941376.0, "50035": 955674112.0, "50040": 952453120.0, "50045": 967235328.0, "50050": 965870656.0, "50055": 943270720.0, "50060": 970109696.0, "50065": 956599104.0, "50070": 980699264.0, "50075": 959559360.0, "50080": 964170880.0, "50085": 966020544.0, "50090": 949651456.0, "50095": 983652608.0, "50100": 969060160.0, "50105": 957382784.0, "50110": 973894144.0, "50115": 954185152.0, "50120": 967833792.0, "50125": 959607680.0, "50130": 978849280.0, "50135": 974261568.0, "50140": 942341952.0, "50145": 974918592.0, "50150": 988467008.0, "50155": 983174016.0, "50160": 982993152.0, "50165": 969766080.0, "50170": 939363712.0, "50175": 948060416.0, "50180": 996274944.0, "50185": 969532544.0, "50190": 976092480.0, "50195": 952370624.0, "50200": 971166912.0, "50205": 958292992.0, "50210": 973873408.0, "50215": 971856576.0, "50220": 953614528.0, "50225": 949496640.0, "50230": 973542208.0, "50235": 963875968.0, "50240": 972705728.0, "50245": 963999744.0, "50250": 958952448.0, "50255": 970222592.0, "50260": 985713600.0, "50265": 976178752.0, "50270": 981017472.0, "50275": 949813056.0, "50280": 970834560.0, "50285": 967388736.0, "50290": 956883712.0, "50295": 964501056.0, "50300": 942673920.0, "50305": 963495360.0, "50310": 977516288.0, "50315": 965855040.0, "50320": 961789376.0, "50325": 954916608.0, "50330": 954124736.0, "50335": 973887424.0, "50340": 971788672.0, "50345": 961769344.0, "50350": 970913344.0, "50355": 946076480.0, "50360": 980035776.0, "50365": 966297024.0, "50370": 974792064.0, "50375": 974402688.0, "50380": 947696704.0, "50385": 971818688.0, "50390": 979384512.0, "50395": 971473984.0, "50400": 977861632.0, "50405": 953741184.0, "50410": 941486912.0, "50415": 967652416.0, "50420": 972794688.0, "50425": 987395584.0, "50430": 964386112.0, "50435": 951663936.0, "50440": 970940416.0, "50445": 971190656.0, "50450": 963114880.0, "50455": 975281792.0, "50460": 948946688.0, "50465": 977681856.0, "50470": 979766976.0, "50475": 968618688.0, "50480": 968656384.0, "50485": 955576512.0, "50490": 954725568.0, "50495": 962334144.0, "50500": 963187648.0, "50505": 966726400.0, "50510": 967672640.0, "50515": 941865216.0, "50520": 971491968.0, "50525": 955229056.0, "50530": 954631488.0, "50535": 977346368.0, "50540": 938354304.0, "50545": 972313984.0, "50550": 987756992.0, "50555": 971134848.0, "50560": 973261568.0, "50565": 946378752.0, "50570": 960928512.0, "50575": 970467136.0, "50580": 966068288.0, "50585": 969029184.0, "50590": 957985664.0, "50595": 949467712.0, "50600": 972007488.0, "50605": 965159232.0, "50610": 977385920.0, "50615": 963022592.0, "50620": 933882432.0, "50625": 976969536.0, "50630": 983141696.0, "50635": 974128576.0, "50640": 959060672.0, "50645": 943379776.0, "50650": 957875520.0, "50655": 963285504.0, "50660": 963009152.0, "50665": 977020160.0, "50670": 948500736.0, "50675": 949241792.0, "50680": 967623104.0, "50685": 968968640.0, "50690": 971738880.0, "50695": 971153088.0, "50700": 953826432.0, "50705": 973769664.0, "50710": 965472640.0, "50715": 978436288.0, "50720": 961671744.0, "50725": 944545792.0, "50730": 965257024.0, "50735": 962461440.0, "50740": 958386688.0, "50745": 966339072.0, "50750": 962502208.0, "50755": 952769600.0, "50760": 968611456.0, "50765": 956629120.0, "50770": 960662592.0, "50775": 954940672.0, "50780": 934927424.0, "50785": 986435968.0, "50790": 973554944.0, "50795": 972652224.0, "50800": 956787904.0, "50805": 939191232.0, "50810": 955942656.0, "50815": 985013056.0, "50820": 974337024.0, "50825": 958862400.0, "50830": 955866432.0, "50835": 951525696.0, "50840": 970741440.0, "50845": 976782080.0, "50850": 974678144.0, "50855": 949718976.0, "50860": 947636672.0, "50865": "nan", "50870": "nan", "50875": "nan", "50880": "nan", "50885": "nan", "50890": "nan", "50895": "nan", "50900": "nan", "50905": "nan", "50910": "nan", "50915": "nan", "50920": "nan", "50925": "nan", "50930": "nan", "50935": "nan", "50940": "nan", "50945": "nan", "50950": "nan", "50955": "nan", "50960": "nan", "50965": "nan", "50970": "nan", "50975": "nan", "50980": "nan", "50985": "nan", "50990": "nan", "50995": "nan", "51000": "nan", "51005": "nan", "51010": "nan", "51015": "nan", "51020": "nan", "51025": "nan", "51030": "nan", "51035": "nan", "51040": "nan", "51045": "nan", "51050": "nan", "51055": "nan", "51060": "nan", "51065": "nan", "51070": "nan", "51075": "nan", "51080": "nan", "51085": "nan", "51090": "nan", "51095": "nan", "51100": "nan", "51105": "nan", "51110": "nan", "51115": "nan", "51120": "nan", "51125": "nan", "51130": "nan", "51135": "nan", "51140": "nan", "51145": "nan", "51150": "nan", "51155": "nan", "51160": "nan", "51165": "nan", "51170": "nan", "51175": "nan", "51180": "nan", "51185": "nan", "51190": "nan", "51195": "nan", "51200": "nan", "51205": "nan", "51210": "nan", "51215": "nan", "51220": "nan", "51225": "nan", "51230": "nan", "51235": "nan", "51240": "nan", "51245": "nan", "51250": "nan", "51255": "nan", "51260": "nan", "51265": "nan", "51270": "nan", "51275": "nan", "51280": "nan", "51285": "nan", "51290": "nan", "51295": "nan", "51300": "nan", "51305": "nan", "51310": "nan", "51315": "nan", "51320": "nan", "51325": "nan", "51330": "nan", "51335": "nan", "51340": "nan", "51345": "nan", "51350": "nan", "51355": "nan", "51360": "nan", "51365": "nan", "51370": "nan", "51375": "nan", "51380": "nan", "51385": "nan", "51390": "nan", "51395": "nan", "51400": "nan", "51405": "nan", "51410": "nan", "51415": "nan", "51420": "nan", "51425": "nan", "51430": "nan", "51435": "nan", "51440": "nan", "51445": "nan", "51450": "nan", "51455": "nan", "51460": "nan", "51465": "nan", "51470": "nan", "51475": "nan", "51480": "nan", "51485": "nan", "51490": "nan", "51495": "nan", "51500": "nan", "51505": "nan", "51510": "nan", "51515": "nan", "51520": "nan", "51525": "nan", "51530": "nan", "51535": "nan", "51540": "nan", "51545": "nan", "51550": "nan", "51555": "nan", "51560": "nan", "51565": "nan", "51570": "nan", "51575": "nan", "51580": "nan", "51585": "nan", "51590": "nan", "51595": "nan", "51600": "nan", "51605": "nan", "51610": "nan", "51615": "nan", "51620": "nan", "51625": "nan", "51630": "nan", "51635": "nan", "51640": "nan", "51645": "nan", "51650": "nan", "51655": "nan", "51660": "nan", "51665": "nan", "51670": "nan", "51675": "nan", "51680": "nan", "51685": "nan", "51690": "nan", "51695": "nan", "51700": "nan", "51705": "nan", "51710": "nan", "51715": "nan", "51720": "nan", "51725": "nan", "51730": "nan", "51735": "nan", "51740": "nan", "51745": "nan", "51750": "nan", "51755": "nan", "51760": "nan", "51765": "nan", "51770": "nan", "51775": "nan", "51780": "nan", "51785": "nan", "51790": "nan", "51795": "nan", "51800": "nan", "51805": "nan", "51810": "nan", "51815": "nan", "51820": "nan", "51825": "nan", "51830": "nan", "51835": "nan", "51840": "nan", "51845": "nan", "51850": "nan", "51855": "nan", "51860": "nan", "51865": "nan", "51870": "nan", "51875": "nan", "51880": "nan", "51885": "nan", "51890": "nan", "51895": "nan", "51900": "nan", "51905": "nan", "51910": "nan", "51915": "nan", "51920": "nan", "51925": "nan", "51930": "nan", "51935": "nan", "51940": "nan", "51945": "nan", "51950": "nan", "51955": "nan", "51960": "nan", "51965": "nan", "51970": "nan", "51975": "nan", "51980": "nan", "51985": "nan", "51990": "nan", "51995": "nan", "52000": "nan", "52005": "nan", "52010": "nan", "52015": "nan", "52020": "nan", "52025": "nan", "52030": "nan", "52035": "nan", "52040": "nan", "52045": "nan", "52050": "nan", "52055": "nan", "52060": "nan", "52065": "nan", "52070": "nan", "52075": "nan", "52080": "nan", "52085": "nan", "52090": "nan", "52095": "nan", "52100": "nan", "52105": "nan", "52110": "nan", "52115": "nan", "52120": "nan", "52125": "nan", "52130": "nan", "52135": "nan", "52140": "nan", "52145": "nan", "52150": "nan", "52155": "nan", "52160": "nan", "52165": "nan", "52170": "nan", "52175": "nan", "52180": "nan", "52185": "nan", "52190": "nan", "52195": "nan", "52200": "nan", "52205": "nan", "52210": "nan", "52215": "nan", "52220": "nan", "52225": "nan", "52230": "nan", "52235": "nan", "52240": "nan", "52245": "nan", "52250": "nan", "52255": "nan", "52260": "nan", "52265": "nan", "52270": "nan", "52275": "nan", "52280": "nan", "52285": "nan", "52290": "nan", "52295": "nan", "52300": "nan", "52305": "nan", "52310": "nan", "52315": "nan", "52320": "nan", "52325": "nan", "52330": "nan", "52335": "nan", "52340": "nan", "52345": "nan", "52350": "nan", "52355": "nan", "52360": "nan", "52365": "nan", "52370": "nan", "52375": "nan", "52380": "nan", "52385": "nan", "52390": "nan", "52395": "nan", "52400": "nan", "52405": "nan", "52410": "nan", "52415": "nan", "52420": "nan", "52425": "nan", "52430": "nan", "52435": "nan", "52440": "nan", "52445": "nan", "52450": "nan", "52455": "nan", "52460": "nan", "52465": "nan", "52470": "nan", "52475": "nan", "52480": "nan", "52485": "nan", "52490": "nan", "52495": "nan", "52500": "nan", "52505": "nan", "52510": "nan", "52515": "nan", "52520": "nan", "52525": "nan", "52530": "nan", "52535": "nan", "52540": "nan", "52545": "nan", "52550": "nan", "52555": "nan", "52560": "nan", "52565": "nan", "52570": "nan", "52575": "nan", "52580": "nan", "52585": "nan", "52590": "nan", "52595": "nan", "52600": "nan", "52605": "nan", "52610": "nan", "52615": "nan", "52620": "nan", "52625": "nan", "52630": "nan", "52635": "nan", "52640": "nan", "52645": "nan", "52650": "nan", "52655": "nan", "52660": "nan", "52665": "nan", "52670": "nan", "52675": "nan", "52680": "nan", "52685": "nan", "52690": "nan", "52695": "nan", "52700": "nan", "52705": "nan", "52710": "nan", "52715": "nan", "52720": "nan", "52725": "nan", "52730": "nan", "52735": "nan", "52740": "nan", "52745": "nan", "52750": "nan", "52755": "nan", "52760": "nan", "52765": "nan", "52770": "nan", "52775": "nan", "52780": "nan", "52785": "nan", "52790": "nan", "52795": "nan", "52800": "nan", "52805": "nan", "52810": "nan", "52815": "nan", "52820": "nan", "52825": "nan", "52830": "nan", "52835": "nan", "52840": "nan", "52845": "nan", "52850": "nan", "52855": "nan", "52860": "nan", "52865": "nan", "52870": "nan", "52875": "nan", "52880": "nan", "52885": "nan", "52890": "nan", "52895": "nan", "52900": "nan", "52905": "nan", "52910": "nan", "52915": "nan", "52920": "nan", "52925": "nan", "52930": "nan", "52935": "nan", "52940": "nan", "52945": "nan", "52950": "nan", "52955": "nan", "52960": "nan", "52965": "nan", "52970": "nan", "52975": "nan", "52980": "nan", "52985": "nan", "52990": "nan", "52995": "nan", "53000": "nan", "53005": "nan", "53010": "nan", "53015": "nan", "53020": "nan", "53025": "nan", "53030": "nan", "53035": "nan", "53040": "nan", "53045": "nan", "53050": "nan", "53055": "nan", "53060": "nan", "53065": "nan", "53070": "nan", "53075": "nan", "53080": "nan", "53085": "nan", "53090": "nan", "53095": "nan", "53100": "nan", "53105": "nan", "53110": "nan", "53115": "nan", "53120": "nan", "53125": "nan", "53130": "nan", "53135": "nan", "53140": "nan", "53145": "nan", "53150": "nan", "53155": "nan", "53160": "nan", "53165": "nan", "53170": "nan", "53175": "nan", "53180": "nan", "53185": "nan", "53190": "nan", "53195": "nan", "53200": "nan", "53205": "nan", "53210": "nan", "53215": "nan", "53220": "nan", "53225": "nan", "53230": "nan", "53235": "nan", "53240": "nan", "53245": "nan", "53250": "nan", "53255": "nan", "53260": "nan", "53265": "nan", "53270": "nan", "53275": "nan", "53280": "nan", "53285": "nan", "53290": "nan", "53295": "nan", "53300": "nan", "53305": "nan", "53310": "nan", "53315": "nan", "53320": "nan", "53325": "nan", "53330": "nan", "53335": "nan", "53340": "nan", "53345": "nan", "53350": "nan", "53355": "nan", "53360": "nan", "53365": "nan", "53370": "nan", "53375": "nan", "53380": "nan", "53385": "nan", "53390": "nan", "53395": "nan", "53400": "nan", "53405": "nan", "53410": "nan", "53415": "nan", "53420": "nan", "53425": "nan", "53430": "nan", "53435": "nan", "53440": "nan", "53445": "nan", "53450": "nan", "53455": "nan", "53460": "nan", "53465": "nan", "53470": "nan", "53475": "nan", "53480": "nan", "53485": "nan", "53490": "nan", "53495": "nan", "53500": "nan", "53505": "nan", "53510": "nan", "53515": "nan", "53520": "nan", "53525": "nan", "53530": "nan", "53535": "nan", "53540": "nan", "53545": "nan", "53550": "nan", "53555": "nan", "53560": "nan", "53565": "nan", "53570": "nan", "53575": "nan", "53580": "nan", "53585": "nan", "53590": "nan", "53595": "nan", "53600": "nan", "53605": "nan", "53610": "nan", "53615": "nan", "53620": "nan", "53625": "nan", "53630": "nan", "53635": "nan", "53640": "nan", "53645": "nan", "53650": "nan", "53655": "nan", "53660": "nan", "53665": "nan", "53670": "nan", "53675": "nan", "53680": "nan", "53685": "nan", "53690": "nan", "53695": "nan", "53700": "nan", "53705": "nan", "53710": "nan", "53715": "nan", "53720": "nan", "53725": "nan", "53730": "nan", "53735": "nan", "53740": "nan", "53745": "nan", "53750": "nan", "53755": "nan", "53760": "nan", "53765": "nan", "53770": "nan", "53775": "nan", "53780": "nan", "53785": "nan", "53790": "nan", "53795": "nan", "53800": "nan", "53805": "nan", "53810": "nan", "53815": "nan", "53820": "nan", "53825": "nan", "53830": "nan", "53835": "nan", "53840": "nan", "53845": "nan", "53850": "nan", "53855": "nan", "53860": "nan", "53865": "nan", "53870": "nan", "53875": "nan", "53880": "nan", "53885": "nan", "53890": "nan", "53895": "nan", "53900": "nan", "53905": "nan", "53910": "nan", "53915": "nan", "53920": "nan", "53925": "nan", "53930": "nan", "53935": "nan", "53940": "nan", "53945": "nan", "53950": "nan", "53955": "nan", "53960": "nan", "53965": "nan", "53970": "nan", "53975": "nan", "53980": "nan", "53985": "nan", "53990": "nan", "53995": "nan", "54000": "nan", "54005": "nan", "54010": "nan", "54015": "nan", "54020": "nan", "54025": "nan", "54030": "nan", "54035": "nan", "54040": "nan", "54045": "nan", "54050": "nan", "54055": "nan", "54060": "nan", "54065": "nan", "54070": "nan", "54075": "nan", "54080": "nan", "54085": "nan", "54090": "nan", "54095": "nan", "54100": "nan", "54105": "nan", "54110": "nan", "54115": "nan", "54120": "nan", "54125": "nan", "54130": "nan", "54135": "nan", "54140": "nan", "54145": "nan", "54150": "nan", "54155": "nan", "54160": "nan", "54165": "nan", "54170": "nan", "54175": "nan", "54180": "nan", "54185": "nan", "54190": "nan", "54195": "nan", "54200": "nan", "54205": "nan", "54210": "nan", "54215": "nan", "54220": "nan", "54225": "nan", "54230": "nan", "54235": "nan", "54240": "nan", "54245": "nan", "54250": "nan", "54255": "nan", "54260": "nan", "54265": "nan", "54270": "nan", "54275": "nan", "54280": "nan", "54285": "nan", "54290": "nan", "54295": "nan", "54300": "nan", "54305": "nan", "54310": "nan", "54315": "nan", "54320": "nan", "54325": "nan", "54330": "nan", "54335": "nan", "54340": "nan", "54345": "nan", "54350": "nan", "54355": "nan", "54360": "nan", "54365": "nan", "54370": "nan", "54375": "nan", "54380": "nan", "54385": "nan", "54390": "nan", "54395": "nan", "54400": "nan", "54405": "nan", "54410": "nan", "54415": "nan", "54420": "nan", "54425": "nan", "54430": "nan", "54435": "nan", "54440": "nan", "54445": "nan", "54450": "nan", "54455": "nan", "54460": "nan", "54465": "nan", "54470": "nan", "54475": "nan", "54480": "nan", "54485": "nan", "54490": "nan", "54495": "nan", "54500": "nan", "54505": "nan", "54510": "nan", "54515": "nan", "54520": "nan", "54525": "nan", "54530": "nan", "54535": "nan", "54540": "nan", "54545": "nan", "54550": "nan", "54555": "nan", "54560": "nan", "54565": "nan", "54570": "nan", "54575": "nan", "54580": "nan", "54585": "nan", "54590": "nan", "54595": "nan", "54600": "nan", "54605": "nan", "54610": "nan", "54615": "nan", "54620": "nan", "54625": "nan", "54630": "nan", "54635": "nan", "54640": "nan", "54645": "nan", "54650": "nan", "54655": "nan", "54660": "nan", "54665": "nan", "54670": "nan", "54675": "nan", "54680": "nan", "54685": "nan", "54690": "nan", "54695": "nan", "54700": "nan", "54705": "nan", "54710": "nan", "54715": "nan", "54720": "nan", "54725": "nan", "54730": "nan", "54735": "nan", "54740": "nan", "54745": "nan", "54750": "nan", "54755": "nan", "54760": "nan", "54765": "nan", "54770": "nan", "54775": "nan", "54780": "nan", "54785": "nan", "54790": "nan", "54795": "nan", "54800": "nan", "54805": "nan", "54810": "nan", "54815": "nan", "54820": "nan", "54825": "nan", "54830": "nan", "54835": "nan", "54840": "nan", "54845": "nan", "54850": "nan", "54855": "nan", "54860": "nan", "54865": "nan", "54870": "nan", "54875": "nan", "54880": "nan", "54885": "nan", "54890": "nan", "54895": "nan", "54900": "nan", "54905": "nan", "54910": "nan", "54915": "nan", "54920": "nan", "54925": "nan", "54930": "nan", "54935": "nan", "54940": "nan", "54945": "nan", "54950": "nan", "54955": "nan", "54960": "nan", "54965": "nan", "54970": "nan", "54975": "nan", "54980": "nan", "54985": "nan", "54990": "nan", "54995": "nan", "55000": "nan"}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 55000, "step_interval": 5, "values": {"1": 12698293248.0, "5": 12698293248.0, "10": 12698293248.0, "15": 12698293248.0, "20": 12698293248.0, "25": 12698293248.0, "30": 12698293248.0, "35": 12698293248.0, "40": 12698293248.0, "45": 12698293248.0, "50": 12698293248.0, "55": 12698293248.0, "60": 12698293248.0, "65": 12698293248.0, "70": 12698293248.0, "75": 12698293248.0, "80": 12698293248.0, "85": 12698293248.0, "90": 12698293248.0, "95": 12698293248.0, "100": 12698293248.0, "105": 12698293248.0, "110": 12698293248.0, "115": 12698293248.0, "120": 12698293248.0, "125": 12698293248.0, "130": 12698293248.0, "135": 12698293248.0, "140": 12698293248.0, "145": 12698293248.0, "150": 12698293248.0, "155": 12698293248.0, "160": 12698293248.0, "165": 12698293248.0, "170": 12698293248.0, "175": 12698293248.0, "180": 12698293248.0, "185": 12698293248.0, "190": 12698293248.0, "195": 12698293248.0, "200": 12698293248.0, "205": 12698293248.0, "210": 12698293248.0, "215": 12698293248.0, "220": 12698293248.0, "225": 12698293248.0, "230": 12698293248.0, "235": 12698293248.0, "240": 12698293248.0, "245": 12698293248.0, "250": 12698293248.0, "255": 12698293248.0, "260": 12698293248.0, "265": 12698293248.0, "270": 12698293248.0, "275": 12698293248.0, "280": 12698293248.0, "285": 12698293248.0, "290": 12698293248.0, "295": 12698293248.0, "300": 12698293248.0, "305": 12698293248.0, "310": 12698293248.0, "315": 12698293248.0, "320": 12698293248.0, "325": 12698293248.0, "330": 12698293248.0, "335": 12698293248.0, "340": 12698293248.0, "345": 12698293248.0, "350": 12698293248.0, "355": 12698293248.0, "360": 12698293248.0, "365": 12698293248.0, "370": 12698293248.0, "375": 12698293248.0, "380": 12698293248.0, "385": 12698293248.0, "390": 12698293248.0, "395": 12698293248.0, "400": 12698293248.0, "405": 12698293248.0, "410": 12698293248.0, "415": 12698293248.0, "420": 12698293248.0, "425": 12698293248.0, "430": 12698293248.0, "435": 12698293248.0, "440": 12698293248.0, "445": 12698293248.0, "450": 12698293248.0, "455": 12698293248.0, "460": 12698293248.0, "465": 12698293248.0, "470": 12698293248.0, "475": 12698293248.0, "480": 12698293248.0, "485": 12698293248.0, "490": 12698293248.0, "495": 12698293248.0, "500": 12698293248.0, "505": 12698293248.0, "510": 12698293248.0, "515": 12698293248.0, "520": 12698293248.0, "525": 12698293248.0, "530": 12698293248.0, "535": 12698293248.0, "540": 12698293248.0, "545": 12698293248.0, "550": 12698293248.0, "555": 12698293248.0, "560": 12698293248.0, "565": 12698293248.0, "570": 12698293248.0, "575": 12698293248.0, "580": 12698293248.0, "585": 12698293248.0, "590": 12698293248.0, "595": 12698293248.0, "600": 12698293248.0, "605": 12698293248.0, "610": 12698293248.0, "615": 12698293248.0, "620": 12698293248.0, "625": 12698293248.0, "630": 12698293248.0, "635": 12698293248.0, "640": 12698293248.0, "645": 12698293248.0, "650": 12698293248.0, "655": 12698293248.0, "660": 12698293248.0, "665": 12698293248.0, "670": 12698293248.0, "675": 12698293248.0, "680": 12698293248.0, "685": 12698293248.0, "690": 12698293248.0, "695": 12698293248.0, "700": 12698293248.0, "705": 12698293248.0, "710": 12698293248.0, "715": 12698293248.0, "720": 12698293248.0, "725": 12698293248.0, "730": 12698293248.0, "735": 12698293248.0, "740": 12698293248.0, "745": 12698293248.0, "750": 12698293248.0, "755": 12698293248.0, "760": 12698293248.0, "765": 12698293248.0, "770": 12698293248.0, "775": 12698293248.0, "780": 12698293248.0, "785": 12698293248.0, "790": 12698293248.0, "795": 12698293248.0, "800": 12698293248.0, "805": 12698293248.0, "810": 12698293248.0, "815": 12698293248.0, "820": 12698293248.0, "825": 12698293248.0, "830": 12698293248.0, "835": 12698293248.0, "840": 12698293248.0, "845": 12698293248.0, "850": 12698293248.0, "855": 12698293248.0, "860": 12698293248.0, "865": 12698293248.0, "870": 12698293248.0, "875": 12698293248.0, "880": 12698293248.0, "885": 12698293248.0, "890": 12698293248.0, "895": 12698293248.0, "900": 12698293248.0, "905": 12698293248.0, "910": 12698293248.0, "915": 12698293248.0, "920": 12698293248.0, "925": 12698293248.0, "930": 12698293248.0, "935": 12698293248.0, "940": 12698293248.0, "945": 12698293248.0, "950": 12698293248.0, "955": 12698293248.0, "960": 12698293248.0, "965": 12698293248.0, "970": 12698293248.0, "975": 12698293248.0, "980": 12698293248.0, "985": 12698293248.0, "990": 12698293248.0, "995": 12698293248.0, "1000": 12698293248.0, "1005": 12698293248.0, "1010": 12698293248.0, "1015": 12698293248.0, "1020": 12698293248.0, "1025": 12698293248.0, "1030": 12698293248.0, "1035": 12698293248.0, "1040": 12698293248.0, "1045": 12698293248.0, "1050": 12698293248.0, "1055": 12698293248.0, "1060": 12698293248.0, "1065": 12698293248.0, "1070": 12698293248.0, "1075": 12698293248.0, "1080": 12698293248.0, "1085": 12698293248.0, "1090": 12698293248.0, "1095": 12698293248.0, "1100": 12698293248.0, "1105": 12698293248.0, "1110": 12698293248.0, "1115": 12698293248.0, "1120": 12698293248.0, "1125": 12698293248.0, "1130": 12698293248.0, "1135": 12698293248.0, "1140": 12698293248.0, "1145": 12698293248.0, "1150": 12698293248.0, "1155": 12698293248.0, "1160": 12698293248.0, "1165": 12698293248.0, "1170": 12698293248.0, "1175": 12698293248.0, "1180": 12698293248.0, "1185": 12698293248.0, "1190": 12698293248.0, "1195": 12698293248.0, "1200": 12698293248.0, "1205": 12698293248.0, "1210": 12698293248.0, "1215": 12698293248.0, "1220": 12698293248.0, "1225": 12698293248.0, "1230": 12698293248.0, "1235": 12698293248.0, "1240": 12698293248.0, "1245": 12698293248.0, "1250": 12698293248.0, "1255": 12698293248.0, "1260": 12698293248.0, "1265": 12698293248.0, "1270": 12698293248.0, "1275": 12698293248.0, "1280": 12698293248.0, "1285": 12698293248.0, "1290": 12698293248.0, "1295": 12698293248.0, "1300": 12698293248.0, "1305": 12698293248.0, "1310": 12698293248.0, "1315": 12698293248.0, "1320": 12698293248.0, "1325": 12698293248.0, "1330": 12698293248.0, "1335": 12698293248.0, "1340": 12698293248.0, "1345": 12698293248.0, "1350": 12698293248.0, "1355": 12698293248.0, "1360": 12698293248.0, "1365": 12698293248.0, "1370": 12698293248.0, "1375": 12698293248.0, "1380": 12698293248.0, "1385": 12698293248.0, "1390": 12698293248.0, "1395": 12698293248.0, "1400": 12698293248.0, "1405": 12698293248.0, "1410": 12698293248.0, "1415": 12698293248.0, "1420": 12698293248.0, "1425": 12698293248.0, "1430": 12698293248.0, "1435": 12698293248.0, "1440": 12698293248.0, "1445": 12698293248.0, "1450": 12698293248.0, "1455": 12698293248.0, "1460": 12698293248.0, "1465": 12698293248.0, "1470": 12698293248.0, "1475": 12698293248.0, "1480": 12698293248.0, "1485": 12698293248.0, "1490": 12698293248.0, "1495": 12698293248.0, "1500": 12698293248.0, "1505": 12698293248.0, "1510": 12698293248.0, "1515": 12698293248.0, "1520": 12698293248.0, "1525": 12698293248.0, "1530": 12698293248.0, "1535": 12698293248.0, "1540": 12698293248.0, "1545": 12698293248.0, "1550": 12698293248.0, "1555": 12698293248.0, "1560": 12698293248.0, "1565": 12698293248.0, "1570": 12698293248.0, "1575": 12698293248.0, "1580": 12698293248.0, "1585": 12698293248.0, "1590": 12698293248.0, "1595": 12698293248.0, "1600": 12698293248.0, "1605": 12698293248.0, "1610": 12698293248.0, "1615": 12698293248.0, "1620": 12698293248.0, "1625": 12698293248.0, "1630": 12698293248.0, "1635": 12698293248.0, "1640": 12698293248.0, "1645": 12698293248.0, "1650": 12698293248.0, "1655": 12698293248.0, "1660": 12698293248.0, "1665": 12698293248.0, "1670": 12698293248.0, "1675": 12698293248.0, "1680": 12698293248.0, "1685": 12698293248.0, "1690": 12698293248.0, "1695": 12698293248.0, "1700": 12698293248.0, "1705": 12698293248.0, "1710": 12698293248.0, "1715": 12698293248.0, "1720": 12698293248.0, "1725": 12698293248.0, "1730": 12698293248.0, "1735": 12698293248.0, "1740": 12698293248.0, "1745": 12698293248.0, "1750": 12698293248.0, "1755": 12698293248.0, "1760": 12698293248.0, "1765": 12698293248.0, "1770": 12698293248.0, "1775": 12698293248.0, "1780": 12698293248.0, "1785": 12698293248.0, "1790": 12698293248.0, "1795": 12698293248.0, "1800": 12698293248.0, "1805": 12698293248.0, "1810": 12698293248.0, "1815": 12698293248.0, "1820": 12698293248.0, "1825": 12698293248.0, "1830": 12698293248.0, "1835": 12698293248.0, "1840": 12698293248.0, "1845": 12698293248.0, "1850": 12698293248.0, "1855": 12698293248.0, "1860": 12698293248.0, "1865": 12698293248.0, "1870": 12698293248.0, "1875": 12698293248.0, "1880": 12698293248.0, "1885": 12698293248.0, "1890": 12698293248.0, "1895": 12698293248.0, "1900": 12698293248.0, "1905": 12698293248.0, "1910": 12698293248.0, "1915": 12698293248.0, "1920": 12698293248.0, "1925": 12698293248.0, "1930": 12698293248.0, "1935": 12698293248.0, "1940": 12698293248.0, "1945": 12698293248.0, "1950": 12698293248.0, "1955": 12698293248.0, "1960": 12698293248.0, "1965": 12698293248.0, "1970": 12698293248.0, "1975": 12698293248.0, "1980": 12698293248.0, "1985": 12698293248.0, "1990": 12698293248.0, "1995": 12698293248.0, "2000": 12698293248.0, "2005": 12698293248.0, "2010": 12698293248.0, "2015": 12698293248.0, "2020": 12698293248.0, "2025": 12698293248.0, "2030": 12698293248.0, "2035": 12698293248.0, "2040": 12698293248.0, "2045": 12698293248.0, "2050": 12698293248.0, "2055": 12698293248.0, "2060": 12698293248.0, "2065": 12698293248.0, "2070": 12698293248.0, "2075": 12698293248.0, "2080": 12698293248.0, "2085": 12698293248.0, "2090": 12698293248.0, "2095": 12698293248.0, "2100": 12698293248.0, "2105": 12698293248.0, "2110": 12698293248.0, "2115": 12698293248.0, "2120": 12698293248.0, "2125": 12698293248.0, "2130": 12698293248.0, "2135": 12698293248.0, "2140": 12698293248.0, "2145": 12698293248.0, "2150": 12698293248.0, "2155": 12698293248.0, "2160": 12698293248.0, "2165": 12698293248.0, "2170": 12698293248.0, "2175": 12698293248.0, "2180": 12698293248.0, "2185": 12698293248.0, "2190": 12698293248.0, "2195": 12698293248.0, "2200": 12698293248.0, "2205": 12698293248.0, "2210": 12698293248.0, "2215": 12698293248.0, "2220": 12698293248.0, "2225": 12698293248.0, "2230": 12698293248.0, "2235": 12698293248.0, "2240": 12698293248.0, "2245": 12698293248.0, "2250": 12698293248.0, "2255": 12698293248.0, "2260": 12698293248.0, "2265": 12698293248.0, "2270": 12698293248.0, "2275": 12698293248.0, "2280": 12698293248.0, "2285": 12698293248.0, "2290": 12698293248.0, "2295": 12698293248.0, "2300": 12698293248.0, "2305": 12698293248.0, "2310": 12698293248.0, "2315": 12698293248.0, "2320": 12698293248.0, "2325": 12698293248.0, "2330": 12698293248.0, "2335": 12698293248.0, "2340": 12698293248.0, "2345": 12698293248.0, "2350": 12698293248.0, "2355": 12698293248.0, "2360": 12698293248.0, "2365": 12698293248.0, "2370": 12698293248.0, "2375": 12698293248.0, "2380": 12698293248.0, "2385": 12698293248.0, "2390": 12698293248.0, "2395": 12698293248.0, "2400": 12698293248.0, "2405": 12698293248.0, "2410": 12698293248.0, "2415": 12698293248.0, "2420": 12698293248.0, "2425": 12698293248.0, "2430": 12698293248.0, "2435": 12698293248.0, "2440": 12698293248.0, "2445": 12698293248.0, "2450": 12698293248.0, "2455": 12698293248.0, "2460": 12698293248.0, "2465": 12698293248.0, "2470": 12698293248.0, "2475": 12698293248.0, "2480": 12698293248.0, "2485": 12698293248.0, "2490": 12698293248.0, "2495": 12698293248.0, "2500": 12698293248.0, "2505": 12698293248.0, "2510": 12698293248.0, "2515": 12698293248.0, "2520": 12698293248.0, "2525": 12698293248.0, "2530": 12698293248.0, "2535": 12698293248.0, "2540": 12698293248.0, "2545": 12698293248.0, "2550": 12698293248.0, "2555": 12698293248.0, "2560": 12698293248.0, "2565": 12698293248.0, "2570": 12698293248.0, "2575": 12698293248.0, "2580": 12698293248.0, "2585": 12698293248.0, "2590": 12698293248.0, "2595": 12698293248.0, "2600": 12698293248.0, "2605": 12698293248.0, "2610": 12698293248.0, "2615": 12698293248.0, "2620": 12698293248.0, "2625": 12698293248.0, "2630": 12698293248.0, "2635": 12698293248.0, "2640": 12698293248.0, "2645": 12698293248.0, "2650": 12698293248.0, "2655": 12698293248.0, "2660": 12698293248.0, "2665": 12698293248.0, "2670": 12698293248.0, "2675": 12698293248.0, "2680": 12698293248.0, "2685": 12698293248.0, "2690": 12698293248.0, "2695": 12698293248.0, "2700": 12698293248.0, "2705": 12698293248.0, "2710": 12698293248.0, "2715": 12698293248.0, "2720": 12698293248.0, "2725": 12698293248.0, "2730": 12698293248.0, "2735": 12698293248.0, "2740": 12698293248.0, "2745": 12698293248.0, "2750": 12698293248.0, "2755": 12698293248.0, "2760": 12698293248.0, "2765": 12698293248.0, "2770": 12698293248.0, "2775": 12698293248.0, "2780": 12698293248.0, "2785": 12698293248.0, "2790": 12698293248.0, "2795": 12698293248.0, "2800": 12698293248.0, "2805": 12698293248.0, "2810": 12698293248.0, "2815": 12698293248.0, "2820": 12698293248.0, "2825": 12698293248.0, "2830": 12698293248.0, "2835": 12698293248.0, "2840": 12698293248.0, "2845": 12698293248.0, "2850": 12698293248.0, "2855": 12698293248.0, "2860": 12698293248.0, "2865": 12698293248.0, "2870": 12698293248.0, "2875": 12698293248.0, "2880": 12698293248.0, "2885": 12698293248.0, "2890": 12698293248.0, "2895": 12698293248.0, "2900": 12698293248.0, "2905": 12698293248.0, "2910": 12698293248.0, "2915": 12698293248.0, "2920": 12698293248.0, "2925": 12698293248.0, "2930": 12698293248.0, "2935": 12698293248.0, "2940": 12698293248.0, "2945": 12698293248.0, "2950": 12698293248.0, "2955": 12698293248.0, "2960": 12698293248.0, "2965": 12698293248.0, "2970": 12698293248.0, "2975": 12698293248.0, "2980": 12698293248.0, "2985": 12698293248.0, "2990": 12698293248.0, "2995": 12698293248.0, "3000": 12698293248.0, "3005": 12698293248.0, "3010": 12698293248.0, "3015": 12698293248.0, "3020": 12698293248.0, "3025": 12698293248.0, "3030": 12698293248.0, "3035": 12698293248.0, "3040": 12698293248.0, "3045": 12698293248.0, "3050": 12698293248.0, "3055": 12698293248.0, "3060": 12698293248.0, "3065": 12698293248.0, "3070": 12698293248.0, "3075": 12698293248.0, "3080": 12698293248.0, "3085": 12698293248.0, "3090": 12698293248.0, "3095": 12698293248.0, "3100": 12698293248.0, "3105": 12698293248.0, "3110": 12698293248.0, "3115": 12698293248.0, "3120": 12698293248.0, "3125": 12698293248.0, "3130": 12698293248.0, "3135": 12698293248.0, "3140": 12698293248.0, "3145": 12698293248.0, "3150": 12698293248.0, "3155": 12698293248.0, "3160": 12698293248.0, "3165": 12698293248.0, "3170": 12698293248.0, "3175": 12698293248.0, "3180": 12698293248.0, "3185": 12698293248.0, "3190": 12698293248.0, "3195": 12698293248.0, "3200": 12698293248.0, "3205": 12698293248.0, "3210": 12698293248.0, "3215": 12698293248.0, "3220": 12698293248.0, "3225": 12698293248.0, "3230": 12698293248.0, "3235": 12698293248.0, "3240": 12698293248.0, "3245": 12698293248.0, "3250": 12698293248.0, "3255": 12698293248.0, "3260": 12698293248.0, "3265": 12698293248.0, "3270": 12698293248.0, "3275": 12698293248.0, "3280": 12698293248.0, "3285": 12698293248.0, "3290": 12698293248.0, "3295": 12698293248.0, "3300": 12698293248.0, "3305": 12698293248.0, "3310": 12698293248.0, "3315": 12698293248.0, "3320": 12698293248.0, "3325": 12698293248.0, "3330": 12698293248.0, "3335": 12698293248.0, "3340": 12698293248.0, "3345": 12698293248.0, "3350": 12698293248.0, "3355": 12698293248.0, "3360": 12698293248.0, "3365": 12698293248.0, "3370": 12698293248.0, "3375": 12698293248.0, "3380": 12698293248.0, "3385": 12698293248.0, "3390": 12698293248.0, "3395": 12698293248.0, "3400": 12698293248.0, "3405": 12698293248.0, "3410": 12698293248.0, "3415": 12698293248.0, "3420": 12698293248.0, "3425": 12698293248.0, "3430": 12698293248.0, "3435": 12698293248.0, "3440": 12698293248.0, "3445": 12698293248.0, "3450": 12698293248.0, "3455": 12698293248.0, "3460": 12698293248.0, "3465": 12698293248.0, "3470": 12698293248.0, "3475": 12698293248.0, "3480": 12698293248.0, "3485": 12698293248.0, "3490": 12698293248.0, "3495": 12698293248.0, "3500": 12698293248.0, "3505": 12698293248.0, "3510": 12698293248.0, "3515": 12698293248.0, "3520": 12698293248.0, "3525": 12698293248.0, "3530": 12698293248.0, "3535": 12698293248.0, "3540": 12698293248.0, "3545": 12698293248.0, "3550": 12698293248.0, "3555": 12698293248.0, "3560": 12698293248.0, "3565": 12698293248.0, "3570": 12698293248.0, "3575": 12698293248.0, "3580": 12698293248.0, "3585": 12698293248.0, "3590": 12698293248.0, "3595": 12698293248.0, "3600": 12698293248.0, "3605": 12698293248.0, "3610": 12698293248.0, "3615": 12698293248.0, "3620": 12698293248.0, "3625": 12698293248.0, "3630": 12698293248.0, "3635": 12698293248.0, "3640": 12698293248.0, "3645": 12698293248.0, "3650": 12698293248.0, "3655": 12698293248.0, "3660": 12698293248.0, "3665": 12698293248.0, "3670": 12698293248.0, "3675": 12698293248.0, "3680": 12698492928.0, "3685": 12698492928.0, "3690": 12698492928.0, "3695": 12698492928.0, "3700": 12698492928.0, "3705": 12698492928.0, "3710": 12698492928.0, "3715": 12698492928.0, "3720": 12698492928.0, "3725": 12698492928.0, "3730": 12698492928.0, "3735": 12698492928.0, "3740": 12698492928.0, "3745": 12698492928.0, "3750": 12698492928.0, "3755": 12698492928.0, "3760": 12698492928.0, "3765": 12698492928.0, "3770": 12698492928.0, "3775": 12698492928.0, "3780": 12698492928.0, "3785": 12698492928.0, "3790": 12698492928.0, "3795": 12698492928.0, "3800": 12698492928.0, "3805": 12698492928.0, "3810": 12698492928.0, "3815": 12698492928.0, "3820": 12698492928.0, "3825": 12698492928.0, "3830": 12698492928.0, "3835": 12698492928.0, "3840": 12698492928.0, "3845": 12698492928.0, "3850": 12698492928.0, "3855": 12698492928.0, "3860": 12698492928.0, "3865": 12698492928.0, "3870": 12698492928.0, "3875": 12698492928.0, "3880": 12698492928.0, "3885": 12698492928.0, "3890": 12698492928.0, "3895": 12698492928.0, "3900": 12698492928.0, "3905": 12698492928.0, "3910": 12698492928.0, "3915": 12698492928.0, "3920": 12698492928.0, "3925": 12698492928.0, "3930": 12698492928.0, "3935": 12698492928.0, "3940": 12698492928.0, "3945": 12698492928.0, "3950": 12698492928.0, "3955": 12698492928.0, "3960": 12698492928.0, "3965": 12698492928.0, "3970": 12698492928.0, "3975": 12698492928.0, "3980": 12698492928.0, "3985": 12698492928.0, "3990": 12698492928.0, "3995": 12698492928.0, "4000": 12698492928.0, "4005": 12698492928.0, "4010": 12698492928.0, "4015": 12698492928.0, "4020": 12698492928.0, "4025": 12698492928.0, "4030": 12698492928.0, "4035": 12698492928.0, "4040": 12698492928.0, "4045": 12698492928.0, "4050": 12698492928.0, "4055": 12698492928.0, "4060": 12698492928.0, "4065": 12698492928.0, "4070": 12698492928.0, "4075": 12698492928.0, "4080": 12698492928.0, "4085": 12698492928.0, "4090": 12698492928.0, "4095": 12698492928.0, "4100": 12698492928.0, "4105": 12698492928.0, "4110": 12698492928.0, "4115": 12698492928.0, "4120": 12698492928.0, "4125": 12698492928.0, "4130": 12698492928.0, "4135": 12698492928.0, "4140": 12698492928.0, "4145": 12698492928.0, "4150": 12698492928.0, "4155": 12698492928.0, "4160": 12698492928.0, "4165": 12698492928.0, "4170": 12698492928.0, "4175": 12698492928.0, "4180": 12698492928.0, "4185": 12698492928.0, "4190": 12698492928.0, "4195": 12698492928.0, "4200": 12698492928.0, "4205": 12698492928.0, "4210": 12698492928.0, "4215": 12698492928.0, "4220": 12698492928.0, "4225": 12698492928.0, "4230": 12698492928.0, "4235": 12698492928.0, "4240": 12698492928.0, "4245": 12698492928.0, "4250": 12698492928.0, "4255": 12698492928.0, "4260": 12698492928.0, "4265": 12698492928.0, "4270": 12698492928.0, "4275": 12698492928.0, "4280": 12698492928.0, "4285": 12698492928.0, "4290": 12698492928.0, "4295": 12698492928.0, "4300": 12698492928.0, "4305": 12698492928.0, "4310": 12698492928.0, "4315": 12698492928.0, "4320": 12698492928.0, "4325": 12698492928.0, "4330": 12698492928.0, "4335": 12698492928.0, "4340": 12698492928.0, "4345": 12698492928.0, "4350": 12698492928.0, "4355": 12698492928.0, "4360": 12698492928.0, "4365": 12698492928.0, "4370": 12698492928.0, "4375": 12698492928.0, "4380": 12698492928.0, "4385": 12698492928.0, "4390": 12698492928.0, "4395": 12698492928.0, "4400": 12698492928.0, "4405": 12698492928.0, "4410": 12698492928.0, "4415": 12698492928.0, "4420": 12698492928.0, "4425": 12698492928.0, "4430": 12698492928.0, "4435": 12698492928.0, "4440": 12698492928.0, "4445": 12698492928.0, "4450": 12698492928.0, "4455": 12698492928.0, "4460": 12698492928.0, "4465": 12698492928.0, "4470": 12698492928.0, "4475": 12698492928.0, "4480": 12698492928.0, "4485": 12698492928.0, "4490": 12698492928.0, "4495": 12698492928.0, "4500": 12698492928.0, "4505": 12698492928.0, "4510": 12698492928.0, "4515": 12698492928.0, "4520": 12698492928.0, "4525": 12698492928.0, "4530": 12698492928.0, "4535": 12698492928.0, "4540": 12698492928.0, "4545": 12698492928.0, "4550": 12698492928.0, "4555": 12698492928.0, "4560": 12698492928.0, "4565": 12698492928.0, "4570": 12698492928.0, "4575": 12698492928.0, "4580": 12698492928.0, "4585": 12698492928.0, "4590": 12698492928.0, "4595": 12698492928.0, "4600": 12698492928.0, "4605": 12698492928.0, "4610": 12698492928.0, "4615": 12698492928.0, "4620": 12698492928.0, "4625": 12698492928.0, "4630": 12698492928.0, "4635": 12698492928.0, "4640": 12698492928.0, "4645": 12698492928.0, "4650": 12698492928.0, "4655": 12698492928.0, "4660": 12698492928.0, "4665": 12698492928.0, "4670": 12698492928.0, "4675": 12698492928.0, "4680": 12698492928.0, "4685": 12698492928.0, "4690": 12698492928.0, "4695": 12698492928.0, "4700": 12698492928.0, "4705": 12698492928.0, "4710": 12698492928.0, "4715": 12698492928.0, "4720": 12698492928.0, "4725": 12698492928.0, "4730": 12698492928.0, "4735": 12698492928.0, "4740": 12698492928.0, "4745": 12698492928.0, "4750": 12698492928.0, "4755": 12698492928.0, "4760": 12698492928.0, "4765": 12698492928.0, "4770": 12698492928.0, "4775": 12698492928.0, "4780": 12698492928.0, "4785": 12698492928.0, "4790": 12698492928.0, "4795": 12698492928.0, "4800": 12698492928.0, "4805": 12698492928.0, "4810": 12698492928.0, "4815": 12698492928.0, "4820": 12698492928.0, "4825": 12698492928.0, "4830": 12698492928.0, "4835": 12698492928.0, "4840": 12698492928.0, "4845": 12698492928.0, "4850": 12698492928.0, "4855": 12698492928.0, "4860": 12698492928.0, "4865": 12698492928.0, "4870": 12698492928.0, "4875": 12698492928.0, "4880": 12698492928.0, "4885": 12698492928.0, "4890": 12698492928.0, "4895": 12698492928.0, "4900": 12698492928.0, "4905": 12698492928.0, "4910": 12698492928.0, "4915": 12698492928.0, "4920": 12698492928.0, "4925": 12698492928.0, "4930": 12698492928.0, "4935": 12698492928.0, "4940": 12698492928.0, "4945": 12698492928.0, "4950": 12698492928.0, "4955": 12698492928.0, "4960": 12698492928.0, "4965": 12698492928.0, "4970": 12698492928.0, "4975": 12698492928.0, "4980": 12698492928.0, "4985": 12698492928.0, "4990": 12698492928.0, "4995": 12698492928.0, "5000": 12698492928.0, "5005": 12698492928.0, "5010": 12698492928.0, "5015": 12698492928.0, "5020": 12698492928.0, "5025": 12698492928.0, "5030": 12698492928.0, "5035": 12698492928.0, "5040": 12698492928.0, "5045": 12698492928.0, "5050": 12698492928.0, "5055": 12698492928.0, "5060": 12698492928.0, "5065": 12698492928.0, "5070": 12698492928.0, "5075": 12698492928.0, "5080": 12698492928.0, "5085": 12698492928.0, "5090": 12698492928.0, "5095": 12698492928.0, "5100": 12698492928.0, "5105": 12698492928.0, "5110": 12698492928.0, "5115": 12698492928.0, "5120": 12698492928.0, "5125": 12698492928.0, "5130": 12698492928.0, "5135": 12698492928.0, "5140": 12698492928.0, "5145": 12698492928.0, "5150": 12698492928.0, "5155": 12698492928.0, "5160": 12698492928.0, "5165": 12698492928.0, "5170": 12698492928.0, "5175": 12698492928.0, "5180": 12698492928.0, "5185": 12698492928.0, "5190": 12698492928.0, "5195": 12698492928.0, "5200": 12698492928.0, "5205": 12698492928.0, "5210": 12698492928.0, "5215": 12698492928.0, "5220": 12698492928.0, "5225": 12698492928.0, "5230": 12698492928.0, "5235": 12698492928.0, "5240": 12698492928.0, "5245": 12698492928.0, "5250": 12698492928.0, "5255": 12698492928.0, "5260": 12698492928.0, "5265": 12698492928.0, "5270": 12698492928.0, "5275": 12698492928.0, "5280": 12698492928.0, "5285": 12698492928.0, "5290": 12698492928.0, "5295": 12698492928.0, "5300": 12698492928.0, "5305": 12698492928.0, "5310": 12698492928.0, "5315": 12698492928.0, "5320": 12698492928.0, "5325": 12698492928.0, "5330": 12698492928.0, "5335": 12698492928.0, "5340": 12698492928.0, "5345": 12698492928.0, "5350": 12698492928.0, "5355": 12698492928.0, "5360": 12698492928.0, "5365": 12698492928.0, "5370": 12698492928.0, "5375": 12698492928.0, "5380": 12698492928.0, "5385": 12698492928.0, "5390": 12698492928.0, "5395": 12698492928.0, "5400": 12698492928.0, "5405": 12698492928.0, "5410": 12698492928.0, "5415": 12698492928.0, "5420": 12698492928.0, "5425": 12698492928.0, "5430": 12698492928.0, "5435": 12698492928.0, "5440": 12698492928.0, "5445": 12698492928.0, "5450": 12698492928.0, "5455": 12698492928.0, "5460": 12698492928.0, "5465": 12698492928.0, "5470": 12698492928.0, "5475": 12698492928.0, "5480": 12698492928.0, "5485": 12698492928.0, "5490": 12698492928.0, "5495": 12698492928.0, "5500": 12698492928.0, "5505": 12698492928.0, "5510": 12698492928.0, "5515": 12698492928.0, "5520": 12698492928.0, "5525": 12698492928.0, "5530": 12698492928.0, "5535": 12698492928.0, "5540": 12698492928.0, "5545": 12698492928.0, "5550": 12698492928.0, "5555": 12698492928.0, "5560": 12698492928.0, "5565": 12698492928.0, "5570": 12698492928.0, "5575": 12698492928.0, "5580": 12698492928.0, "5585": 12698492928.0, "5590": 12698492928.0, "5595": 12698492928.0, "5600": 12698492928.0, "5605": 12698492928.0, "5610": 12698492928.0, "5615": 12698492928.0, "5620": 12698492928.0, "5625": 12698492928.0, "5630": 12698492928.0, "5635": 12698492928.0, "5640": 12698492928.0, "5645": 12698492928.0, "5650": 12698492928.0, "5655": 12698492928.0, "5660": 12698492928.0, "5665": 12698492928.0, "5670": 12698492928.0, "5675": 12698492928.0, "5680": 12698492928.0, "5685": 12698492928.0, "5690": 12698492928.0, "5695": 12698492928.0, "5700": 12698492928.0, "5705": 12698492928.0, "5710": 12698492928.0, "5715": 12698492928.0, "5720": 12698492928.0, "5725": 12698492928.0, "5730": 12698492928.0, "5735": 12698492928.0, "5740": 12698492928.0, "5745": 12698492928.0, "5750": 12698492928.0, "5755": 12698492928.0, "5760": 12698492928.0, "5765": 12698492928.0, "5770": 12698492928.0, "5775": 12698492928.0, "5780": 12698492928.0, "5785": 12698492928.0, "5790": 12698492928.0, "5795": 12698492928.0, "5800": 12698492928.0, "5805": 12698492928.0, "5810": 12698492928.0, "5815": 12698492928.0, "5820": 12698492928.0, "5825": 12698492928.0, "5830": 12698492928.0, "5835": 12698492928.0, "5840": 12698492928.0, "5845": 12698492928.0, "5850": 12698492928.0, "5855": 12698492928.0, "5860": 12698492928.0, "5865": 12698492928.0, "5870": 12698492928.0, "5875": 12698492928.0, "5880": 12698492928.0, "5885": 12698492928.0, "5890": 12698492928.0, "5895": 12698492928.0, "5900": 12698492928.0, "5905": 12698492928.0, "5910": 12698492928.0, "5915": 12698492928.0, "5920": 12698492928.0, "5925": 12698492928.0, "5930": 12698492928.0, "5935": 12698492928.0, "5940": 12698492928.0, "5945": 12698492928.0, "5950": 12698492928.0, "5955": 12698492928.0, "5960": 12698492928.0, "5965": 12698492928.0, "5970": 12698492928.0, "5975": 12698492928.0, "5980": 12698492928.0, "5985": 12698492928.0, "5990": 12698492928.0, "5995": 12698492928.0, "6000": 12698492928.0, "6005": 12698492928.0, "6010": 12698492928.0, "6015": 12698492928.0, "6020": 12698492928.0, "6025": 12698492928.0, "6030": 12698492928.0, "6035": 12698492928.0, "6040": 12698492928.0, "6045": 12698492928.0, "6050": 12698492928.0, "6055": 12698492928.0, "6060": 12698492928.0, "6065": 12698492928.0, "6070": 12698492928.0, "6075": 12698492928.0, "6080": 12698492928.0, "6085": 12698492928.0, "6090": 12698492928.0, "6095": 12698492928.0, "6100": 12698492928.0, "6105": 12698492928.0, "6110": 12698492928.0, "6115": 12698492928.0, "6120": 12698492928.0, "6125": 12698492928.0, "6130": 12698492928.0, "6135": 12698492928.0, "6140": 12698492928.0, "6145": 12698492928.0, "6150": 12698492928.0, "6155": 12698492928.0, "6160": 12698492928.0, "6165": 12698492928.0, "6170": 12698492928.0, "6175": 12698492928.0, "6180": 12698492928.0, "6185": 12698492928.0, "6190": 12698492928.0, "6195": 12698492928.0, "6200": 12698492928.0, "6205": 12698492928.0, "6210": 12698492928.0, "6215": 12698492928.0, "6220": 12698492928.0, "6225": 12698492928.0, "6230": 12698492928.0, "6235": 12698492928.0, "6240": 12698492928.0, "6245": 12698492928.0, "6250": 12698492928.0, "6255": 12698492928.0, "6260": 12698492928.0, "6265": 12698492928.0, "6270": 12698492928.0, "6275": 12698492928.0, "6280": 12698492928.0, "6285": 12698492928.0, "6290": 12698492928.0, "6295": 12698492928.0, "6300": 12698492928.0, "6305": 12698492928.0, "6310": 12698492928.0, "6315": 12698492928.0, "6320": 12698492928.0, "6325": 12698492928.0, "6330": 12698492928.0, "6335": 12698492928.0, "6340": 12698492928.0, "6345": 12698492928.0, "6350": 12698492928.0, "6355": 12698492928.0, "6360": 12698492928.0, "6365": 12698492928.0, "6370": 12698492928.0, "6375": 12698492928.0, "6380": 12698492928.0, "6385": 12698492928.0, "6390": 12698492928.0, "6395": 12698492928.0, "6400": 12698492928.0, "6405": 12698492928.0, "6410": 12698492928.0, "6415": 12698492928.0, "6420": 12698492928.0, "6425": 12698492928.0, "6430": 12698492928.0, "6435": 12698492928.0, "6440": 12698492928.0, "6445": 12698492928.0, "6450": 12698492928.0, "6455": 12698492928.0, "6460": 12698492928.0, "6465": 12698492928.0, "6470": 12698492928.0, "6475": 12698492928.0, "6480": 12698492928.0, "6485": 12698492928.0, "6490": 12698492928.0, "6495": 12698492928.0, "6500": 12698492928.0, "6505": 12698492928.0, "6510": 12698492928.0, "6515": 12698492928.0, "6520": 12698492928.0, "6525": 12698492928.0, "6530": 12698492928.0, "6535": 12698492928.0, "6540": 12698492928.0, "6545": 12698492928.0, "6550": 12698492928.0, "6555": 12698492928.0, "6560": 12698492928.0, "6565": 12698492928.0, "6570": 12698492928.0, "6575": 12698492928.0, "6580": 12698492928.0, "6585": 12698492928.0, "6590": 12698492928.0, "6595": 12698492928.0, "6600": 12698492928.0, "6605": 12698492928.0, "6610": 12698492928.0, "6615": 12698492928.0, "6620": 12698492928.0, "6625": 12698492928.0, "6630": 12698492928.0, "6635": 12698492928.0, "6640": 12698492928.0, "6645": 12698492928.0, "6650": 12698492928.0, "6655": 12698492928.0, "6660": 12698492928.0, "6665": 12698492928.0, "6670": 12698492928.0, "6675": 12698492928.0, "6680": 12698492928.0, "6685": 12698492928.0, "6690": 12698492928.0, "6695": 12698492928.0, "6700": 12698492928.0, "6705": 12698492928.0, "6710": 12698492928.0, "6715": 12698492928.0, "6720": 12698492928.0, "6725": 12698492928.0, "6730": 12698492928.0, "6735": 12698492928.0, "6740": 12698492928.0, "6745": 12698492928.0, "6750": 12698492928.0, "6755": 12698492928.0, "6760": 12698492928.0, "6765": 12698492928.0, "6770": 12698492928.0, "6775": 12698492928.0, "6780": 12698492928.0, "6785": 12698492928.0, "6790": 12698492928.0, "6795": 12698492928.0, "6800": 12698492928.0, "6805": 12698492928.0, "6810": 12698492928.0, "6815": 12698492928.0, "6820": 12698492928.0, "6825": 12698492928.0, "6830": 12698492928.0, "6835": 12698492928.0, "6840": 12698492928.0, "6845": 12698492928.0, "6850": 12698492928.0, "6855": 12698492928.0, "6860": 12698492928.0, "6865": 12698492928.0, "6870": 12698492928.0, "6875": 12698492928.0, "6880": 12698492928.0, "6885": 12698492928.0, "6890": 12698492928.0, "6895": 12698492928.0, "6900": 12698492928.0, "6905": 12698492928.0, "6910": 12698492928.0, "6915": 12698492928.0, "6920": 12698492928.0, "6925": 12698492928.0, "6930": 12698492928.0, "6935": 12698492928.0, "6940": 12698492928.0, "6945": 12698492928.0, "6950": 12698492928.0, "6955": 12698492928.0, "6960": 12698492928.0, "6965": 12698492928.0, "6970": 12698492928.0, "6975": 12698492928.0, "6980": 12698492928.0, "6985": 12698492928.0, "6990": 12698492928.0, "6995": 12698492928.0, "7000": 12698492928.0, "7005": 12698492928.0, "7010": 12698492928.0, "7015": 12698492928.0, "7020": 12698492928.0, "7025": 12698492928.0, "7030": 12698492928.0, "7035": 12698492928.0, "7040": 12698492928.0, "7045": 12698492928.0, "7050": 12698492928.0, "7055": 12698492928.0, "7060": 12698492928.0, "7065": 12698492928.0, "7070": 12698492928.0, "7075": 12698492928.0, "7080": 12698492928.0, "7085": 12698492928.0, "7090": 12698492928.0, "7095": 12698492928.0, "7100": 12698492928.0, "7105": 12698492928.0, "7110": 12698492928.0, "7115": 12698492928.0, "7120": 12698492928.0, "7125": 12698492928.0, "7130": 12698492928.0, "7135": 12698492928.0, "7140": 12698492928.0, "7145": 12698492928.0, "7150": 12698492928.0, "7155": 12698492928.0, "7160": 12698492928.0, "7165": 12698492928.0, "7170": 12698492928.0, "7175": 12698492928.0, "7180": 12698492928.0, "7185": 12698492928.0, "7190": 12698492928.0, "7195": 12698492928.0, "7200": 12698492928.0, "7205": 12698492928.0, "7210": 12698492928.0, "7215": 12698492928.0, "7220": 12698492928.0, "7225": 12698492928.0, "7230": 12698492928.0, "7235": 12698492928.0, "7240": 12698492928.0, "7245": 12698492928.0, "7250": 12698492928.0, "7255": 12698492928.0, "7260": 12698492928.0, "7265": 12698492928.0, "7270": 12698492928.0, "7275": 12698492928.0, "7280": 12698492928.0, "7285": 12698492928.0, "7290": 12698492928.0, "7295": 12698492928.0, "7300": 12698492928.0, "7305": 12698492928.0, "7310": 12698492928.0, "7315": 12698492928.0, "7320": 12698492928.0, "7325": 12698492928.0, "7330": 12698492928.0, "7335": 12698492928.0, "7340": 12698492928.0, "7345": 12698492928.0, "7350": 12698492928.0, "7355": 12698492928.0, "7360": 12698492928.0, "7365": 12698492928.0, "7370": 12698492928.0, "7375": 12698492928.0, "7380": 12698492928.0, "7385": 12698492928.0, "7390": 12698492928.0, "7395": 12698492928.0, "7400": 12698492928.0, "7405": 12698492928.0, "7410": 12698492928.0, "7415": 12698492928.0, "7420": 12698492928.0, "7425": 12698492928.0, "7430": 12698492928.0, "7435": 12698492928.0, "7440": 12698492928.0, "7445": 12698492928.0, "7450": 12698492928.0, "7455": 12698492928.0, "7460": 12698492928.0, "7465": 12698492928.0, "7470": 12698492928.0, "7475": 12698492928.0, "7480": 12698492928.0, "7485": 12698492928.0, "7490": 12698492928.0, "7495": 12698492928.0, "7500": 12698492928.0, "7505": 12698492928.0, "7510": 12698492928.0, "7515": 12698492928.0, "7520": 12698492928.0, "7525": 12698492928.0, "7530": 12698492928.0, "7535": 12698492928.0, "7540": 12698492928.0, "7545": 12698492928.0, "7550": 12698492928.0, "7555": 12698492928.0, "7560": 12698492928.0, "7565": 12698492928.0, "7570": 12698492928.0, "7575": 12698492928.0, "7580": 12698492928.0, "7585": 12698492928.0, "7590": 12698492928.0, "7595": 12698492928.0, "7600": 12698492928.0, "7605": 12698492928.0, "7610": 12698492928.0, "7615": 12698492928.0, "7620": 12698492928.0, "7625": 12698492928.0, "7630": 12698492928.0, "7635": 12698492928.0, "7640": 12698492928.0, "7645": 12698492928.0, "7650": 12698492928.0, "7655": 12698492928.0, "7660": 12698492928.0, "7665": 12698492928.0, "7670": 12698492928.0, "7675": 12698492928.0, "7680": 12698492928.0, "7685": 12698492928.0, "7690": 12698492928.0, "7695": 12698492928.0, "7700": 12698492928.0, "7705": 12698492928.0, "7710": 12698492928.0, "7715": 12698492928.0, "7720": 12698492928.0, "7725": 12698492928.0, "7730": 12698492928.0, "7735": 12698492928.0, "7740": 12698492928.0, "7745": 12698492928.0, "7750": 12698492928.0, "7755": 12698492928.0, "7760": 12698492928.0, "7765": 12698492928.0, "7770": 12698492928.0, "7775": 12698492928.0, "7780": 12698492928.0, "7785": 12698492928.0, "7790": 12698492928.0, "7795": 12698492928.0, "7800": 12698492928.0, "7805": 12698492928.0, "7810": 12698492928.0, "7815": 12698492928.0, "7820": 12698492928.0, "7825": 12698492928.0, "7830": 12698492928.0, "7835": 12698492928.0, "7840": 12698492928.0, "7845": 12698492928.0, "7850": 12698492928.0, "7855": 12698492928.0, "7860": 12698492928.0, "7865": 12698492928.0, "7870": 12698492928.0, "7875": 12698492928.0, "7880": 12698492928.0, "7885": 12698492928.0, "7890": 12698492928.0, "7895": 12698492928.0, "7900": 12698492928.0, "7905": 12698492928.0, "7910": 12698492928.0, "7915": 12698492928.0, "7920": 12698492928.0, "7925": 12698492928.0, "7930": 12698492928.0, "7935": 12698492928.0, "7940": 12698492928.0, "7945": 12698492928.0, "7950": 12698492928.0, "7955": 12698492928.0, "7960": 12698492928.0, "7965": 12698492928.0, "7970": 12698492928.0, "7975": 12698492928.0, "7980": 12698492928.0, "7985": 12698492928.0, "7990": 12698492928.0, "7995": 12698492928.0, "8000": 12698492928.0, "8005": 12698492928.0, "8010": 12698492928.0, "8015": 12698492928.0, "8020": 12698492928.0, "8025": 12698492928.0, "8030": 12698492928.0, "8035": 12698492928.0, "8040": 12698492928.0, "8045": 12698492928.0, "8050": 12698492928.0, "8055": 12698492928.0, "8060": 12698492928.0, "8065": 12698492928.0, "8070": 12698492928.0, "8075": 12698492928.0, "8080": 12698492928.0, "8085": 12698492928.0, "8090": 12698492928.0, "8095": 12698492928.0, "8100": 12698492928.0, "8105": 12698492928.0, "8110": 12698492928.0, "8115": 12698492928.0, "8120": 12698492928.0, "8125": 12698492928.0, "8130": 12698492928.0, "8135": 12698492928.0, "8140": 12698492928.0, "8145": 12698492928.0, "8150": 12698492928.0, "8155": 12698492928.0, "8160": 12698492928.0, "8165": 12698492928.0, "8170": 12698492928.0, "8175": 12698492928.0, "8180": 12698492928.0, "8185": 12698492928.0, "8190": 12698492928.0, "8195": 12698492928.0, "8200": 12698492928.0, "8205": 12698492928.0, "8210": 12698492928.0, "8215": 12698492928.0, "8220": 12698492928.0, "8225": 12698492928.0, "8230": 12698492928.0, "8235": 12698492928.0, "8240": 12698492928.0, "8245": 12698492928.0, "8250": 12698492928.0, "8255": 12698492928.0, "8260": 12698492928.0, "8265": 12698492928.0, "8270": 12698492928.0, "8275": 12698492928.0, "8280": 12698492928.0, "8285": 12698492928.0, "8290": 12698492928.0, "8295": 12698492928.0, "8300": 12698492928.0, "8305": 12698492928.0, "8310": 12698492928.0, "8315": 12698492928.0, "8320": 12698492928.0, "8325": 12698492928.0, "8330": 12698492928.0, "8335": 12698492928.0, "8340": 12698492928.0, "8345": 12698492928.0, "8350": 12698492928.0, "8355": 12698492928.0, "8360": 12698492928.0, "8365": 12698492928.0, "8370": 12698492928.0, "8375": 12698492928.0, "8380": 12698492928.0, "8385": 12698492928.0, "8390": 12698492928.0, "8395": 12698492928.0, "8400": 12698492928.0, "8405": 12698492928.0, "8410": 12698492928.0, "8415": 12698492928.0, "8420": 12698492928.0, "8425": 12698492928.0, "8430": 12698492928.0, "8435": 12698492928.0, "8440": 12698492928.0, "8445": 12698492928.0, "8450": 12698492928.0, "8455": 12698492928.0, "8460": 12698492928.0, "8465": 12698492928.0, "8470": 12698492928.0, "8475": 12698492928.0, "8480": 12698492928.0, "8485": 12698492928.0, "8490": 12698492928.0, "8495": 12698492928.0, "8500": 12698492928.0, "8505": 12698492928.0, "8510": 12698492928.0, "8515": 12698492928.0, "8520": 12698492928.0, "8525": 12698492928.0, "8530": 12698492928.0, "8535": 12698492928.0, "8540": 12698492928.0, "8545": 12698492928.0, "8550": 12698492928.0, "8555": 12698492928.0, "8560": 12698492928.0, "8565": 12698492928.0, "8570": 12698492928.0, "8575": 12698492928.0, "8580": 12698492928.0, "8585": 12698492928.0, "8590": 12698492928.0, "8595": 12698492928.0, "8600": 12698492928.0, "8605": 12698492928.0, "8610": 12698492928.0, "8615": 12698492928.0, "8620": 12698492928.0, "8625": 12698492928.0, "8630": 12698492928.0, "8635": 12698492928.0, "8640": 12698492928.0, "8645": 12698492928.0, "8650": 12698492928.0, "8655": 12698492928.0, "8660": 12698492928.0, "8665": 12698492928.0, "8670": 12698492928.0, "8675": 12698492928.0, "8680": 12698492928.0, "8685": 12698492928.0, "8690": 12698492928.0, "8695": 12698492928.0, "8700": 12698492928.0, "8705": 12698492928.0, "8710": 12698492928.0, "8715": 12698492928.0, "8720": 12698492928.0, "8725": 12698492928.0, "8730": 12698492928.0, "8735": 12698492928.0, "8740": 12698492928.0, "8745": 12698492928.0, "8750": 12698492928.0, "8755": 12698492928.0, "8760": 12698492928.0, "8765": 12698492928.0, "8770": 12698492928.0, "8775": 12698492928.0, "8780": 12698492928.0, "8785": 12698492928.0, "8790": 12698492928.0, "8795": 12698492928.0, "8800": 12698492928.0, "8805": 12698492928.0, "8810": 12698492928.0, "8815": 12698492928.0, "8820": 12698492928.0, "8825": 12698492928.0, "8830": 12698492928.0, "8835": 12698492928.0, "8840": 12698492928.0, "8845": 12698492928.0, "8850": 12698492928.0, "8855": 12698492928.0, "8860": 12698492928.0, "8865": 12698492928.0, "8870": 12698492928.0, "8875": 12698492928.0, "8880": 12698492928.0, "8885": 12698492928.0, "8890": 12698492928.0, "8895": 12698492928.0, "8900": 12698492928.0, "8905": 12698492928.0, "8910": 12698492928.0, "8915": 12698492928.0, "8920": 12698492928.0, "8925": 12698492928.0, "8930": 12698492928.0, "8935": 12698492928.0, "8940": 12698492928.0, "8945": 12698492928.0, "8950": 12698492928.0, "8955": 12698492928.0, "8960": 12698492928.0, "8965": 12698492928.0, "8970": 12698492928.0, "8975": 12698492928.0, "8980": 12698492928.0, "8985": 12698492928.0, "8990": 12698492928.0, "8995": 12698492928.0, "9000": 12698492928.0, "9005": 12698492928.0, "9010": 12698492928.0, "9015": 12698492928.0, "9020": 12698492928.0, "9025": 12698492928.0, "9030": 12698492928.0, "9035": 12698492928.0, "9040": 12698492928.0, "9045": 12698492928.0, "9050": 12698492928.0, "9055": 12698492928.0, "9060": 12698492928.0, "9065": 12698492928.0, "9070": 12698492928.0, "9075": 12698492928.0, "9080": 12698492928.0, "9085": 12698492928.0, "9090": 12698492928.0, "9095": 12698492928.0, "9100": 12698492928.0, "9105": 12698492928.0, "9110": 12698492928.0, "9115": 12698492928.0, "9120": 12698492928.0, "9125": 12698492928.0, "9130": 12698492928.0, "9135": 12698492928.0, "9140": 12698492928.0, "9145": 12698492928.0, "9150": 12698492928.0, "9155": 12698492928.0, "9160": 12698492928.0, "9165": 12698492928.0, "9170": 12698492928.0, "9175": 12698492928.0, "9180": 12698492928.0, "9185": 12698492928.0, "9190": 12698492928.0, "9195": 12698492928.0, "9200": 12698492928.0, "9205": 12698492928.0, "9210": 12698492928.0, "9215": 12698492928.0, "9220": 12698492928.0, "9225": 12698492928.0, "9230": 12698492928.0, "9235": 12698492928.0, "9240": 12698492928.0, "9245": 12698492928.0, "9250": 12698492928.0, "9255": 12698492928.0, "9260": 12698492928.0, "9265": 12698492928.0, "9270": 12698492928.0, "9275": 12698492928.0, "9280": 12698492928.0, "9285": 12698492928.0, "9290": 12698492928.0, "9295": 12698492928.0, "9300": 12698492928.0, "9305": 12698492928.0, "9310": 12698492928.0, "9315": 12698492928.0, "9320": 12698492928.0, "9325": 12698492928.0, "9330": 12698492928.0, "9335": 12698492928.0, "9340": 12698492928.0, "9345": 12698492928.0, "9350": 12698492928.0, "9355": 12698492928.0, "9360": 12698492928.0, "9365": 12698492928.0, "9370": 12698492928.0, "9375": 12698492928.0, "9380": 12698492928.0, "9385": 12698492928.0, "9390": 12698492928.0, "9395": 12698492928.0, "9400": 12698492928.0, "9405": 12698492928.0, "9410": 12698492928.0, "9415": 12698492928.0, "9420": 12698492928.0, "9425": 12698492928.0, "9430": 12698492928.0, "9435": 12698492928.0, "9440": 12698492928.0, "9445": 12698492928.0, "9450": 12698492928.0, "9455": 12698492928.0, "9460": 12698492928.0, "9465": 12698492928.0, "9470": 12698492928.0, "9475": 12698492928.0, "9480": 12698492928.0, "9485": 12698492928.0, "9490": 12698492928.0, "9495": 12698492928.0, "9500": 12698492928.0, "9505": 12698492928.0, "9510": 12698492928.0, "9515": 12698492928.0, "9520": 12698492928.0, "9525": 12698492928.0, "9530": 12698492928.0, "9535": 12698492928.0, "9540": 12698492928.0, "9545": 12698492928.0, "9550": 12698492928.0, "9555": 12698492928.0, "9560": 12698492928.0, "9565": 12698492928.0, "9570": 12698492928.0, "9575": 12698492928.0, "9580": 12698492928.0, "9585": 12698492928.0, "9590": 12698492928.0, "9595": 12698492928.0, "9600": 12698492928.0, "9605": 12698492928.0, "9610": 12698492928.0, "9615": 12698492928.0, "9620": 12698492928.0, "9625": 12698492928.0, "9630": 12698492928.0, "9635": 12698492928.0, "9640": 12698492928.0, "9645": 12698492928.0, "9650": 12698492928.0, "9655": 12698492928.0, "9660": 12698492928.0, "9665": 12698492928.0, "9670": 12698492928.0, "9675": 12698492928.0, "9680": 12698492928.0, "9685": 12698492928.0, "9690": 12698492928.0, "9695": 12698492928.0, "9700": 12698492928.0, "9705": 12698492928.0, "9710": 12698492928.0, "9715": 12698492928.0, "9720": 12698492928.0, "9725": 12698492928.0, "9730": 12698492928.0, "9735": 12698492928.0, "9740": 12698492928.0, "9745": 12698492928.0, "9750": 12698492928.0, "9755": 12698492928.0, "9760": 12698492928.0, "9765": 12698492928.0, "9770": 12698492928.0, "9775": 12698492928.0, "9780": 12698492928.0, "9785": 12698492928.0, "9790": 12698492928.0, "9795": 12698492928.0, "9800": 12698492928.0, "9805": 12698492928.0, "9810": 12698492928.0, "9815": 12698492928.0, "9820": 12698492928.0, "9825": 12698492928.0, "9830": 12698492928.0, "9835": 12698492928.0, "9840": 12698492928.0, "9845": 12698492928.0, "9850": 12698492928.0, "9855": 12698492928.0, "9860": 12698492928.0, "9865": 12698492928.0, "9870": 12698492928.0, "9875": 12698492928.0, "9880": 12698492928.0, "9885": 12698492928.0, "9890": 12698492928.0, "9895": 12698492928.0, "9900": 12698492928.0, "9905": 12698492928.0, "9910": 12698492928.0, "9915": 12698492928.0, "9920": 12698492928.0, "9925": 12698492928.0, "9930": 12698492928.0, "9935": 12698492928.0, "9940": 12698492928.0, "9945": 12698492928.0, "9950": 12698492928.0, "9955": 12698492928.0, "9960": 12698492928.0, "9965": 12698492928.0, "9970": 12698492928.0, "9975": 12698492928.0, "9980": 12698492928.0, "9985": 12698492928.0, "9990": 12698492928.0, "9995": 12698492928.0, "10000": 12698492928.0, "10005": 12698492928.0, "10010": 12698492928.0, "10015": 12698492928.0, "10020": 12698492928.0, "10025": 12698492928.0, "10030": 12698492928.0, "10035": 12698492928.0, "10040": 12698492928.0, "10045": 12698492928.0, "10050": 12698492928.0, "10055": 12698492928.0, "10060": 12698492928.0, "10065": 12698492928.0, "10070": 12698492928.0, "10075": 12698492928.0, "10080": 12698492928.0, "10085": 12698492928.0, "10090": 12698492928.0, "10095": 12698492928.0, "10100": 12698492928.0, "10105": 12698492928.0, "10110": 12698492928.0, "10115": 12698492928.0, "10120": 12698492928.0, "10125": 12698492928.0, "10130": 12698492928.0, "10135": 12698492928.0, "10140": 12698492928.0, "10145": 12698492928.0, "10150": 12698492928.0, "10155": 12698492928.0, "10160": 12698492928.0, "10165": 12698492928.0, "10170": 12698492928.0, "10175": 12698492928.0, "10180": 12698492928.0, "10185": 12698492928.0, "10190": 12698492928.0, "10195": 12698492928.0, "10200": 12698492928.0, "10205": 12698492928.0, "10210": 12698492928.0, "10215": 12698492928.0, "10220": 12698492928.0, "10225": 12698492928.0, "10230": 12698492928.0, "10235": 12698492928.0, "10240": 12698492928.0, "10245": 12698492928.0, "10250": 12698492928.0, "10255": 12698492928.0, "10260": 12698492928.0, "10265": 12698492928.0, "10270": 12698492928.0, "10275": 12698492928.0, "10280": 12698492928.0, "10285": 12698492928.0, "10290": 12698492928.0, "10295": 12698492928.0, "10300": 12698492928.0, "10305": 12698492928.0, "10310": 12698492928.0, "10315": 12698492928.0, "10320": 12698492928.0, "10325": 12698492928.0, "10330": 12698492928.0, "10335": 12698492928.0, "10340": 12698492928.0, "10345": 12698492928.0, "10350": 12698492928.0, "10355": 12698492928.0, "10360": 12698492928.0, "10365": 12698492928.0, "10370": 12698492928.0, "10375": 12698492928.0, "10380": 12698492928.0, "10385": 12698492928.0, "10390": 12698492928.0, "10395": 12698492928.0, "10400": 12698492928.0, "10405": 12698492928.0, "10410": 12698492928.0, "10415": 12698492928.0, "10420": 12698492928.0, "10425": 12698492928.0, "10430": 12698492928.0, "10435": 12698492928.0, "10440": 12698492928.0, "10445": 12698492928.0, "10450": 12698492928.0, "10455": 12698492928.0, "10460": 12698492928.0, "10465": 12698492928.0, "10470": 12698492928.0, "10475": 12698492928.0, "10480": 12698492928.0, "10485": 12698492928.0, "10490": 12698492928.0, "10495": 12698492928.0, "10500": 12698492928.0, "10505": 12698492928.0, "10510": 12698492928.0, "10515": 12698492928.0, "10520": 12698492928.0, "10525": 12698492928.0, "10530": 12698492928.0, "10535": 12698492928.0, "10540": 12698492928.0, "10545": 12698492928.0, "10550": 12698492928.0, "10555": 12698492928.0, "10560": 12698492928.0, "10565": 12698492928.0, "10570": 12698492928.0, "10575": 12698492928.0, "10580": 12698492928.0, "10585": 12698492928.0, "10590": 12698492928.0, "10595": 12698492928.0, "10600": 12698492928.0, "10605": 12698492928.0, "10610": 12698492928.0, "10615": 12698492928.0, "10620": 12698492928.0, "10625": 12698492928.0, "10630": 12698492928.0, "10635": 12698492928.0, "10640": 12698492928.0, "10645": 12698492928.0, "10650": 12698492928.0, "10655": 12698492928.0, "10660": 12698492928.0, "10665": 12698492928.0, "10670": 12698492928.0, "10675": 12698492928.0, "10680": 12698492928.0, "10685": 12698492928.0, "10690": 12698492928.0, "10695": 12698492928.0, "10700": 12698492928.0, "10705": 12698492928.0, "10710": 12698492928.0, "10715": 12698492928.0, "10720": 12698492928.0, "10725": 12698492928.0, "10730": 12698492928.0, "10735": 12698492928.0, "10740": 12698492928.0, "10745": 12698492928.0, "10750": 12698492928.0, "10755": 12698492928.0, "10760": 12698492928.0, "10765": 12698492928.0, "10770": 12698492928.0, "10775": 12698492928.0, "10780": 12698492928.0, "10785": 12698492928.0, "10790": 12698492928.0, "10795": 12698492928.0, "10800": 12698492928.0, "10805": 12698492928.0, "10810": 12698492928.0, "10815": 12698492928.0, "10820": 12698492928.0, "10825": 12698492928.0, "10830": 12698492928.0, "10835": 12698492928.0, "10840": 12698492928.0, "10845": 12698492928.0, "10850": 12698492928.0, "10855": 12698492928.0, "10860": 12698492928.0, "10865": 12698492928.0, "10870": 12698492928.0, "10875": 12698492928.0, "10880": 12698492928.0, "10885": 12698492928.0, "10890": 12698492928.0, "10895": 12698492928.0, "10900": 12698492928.0, "10905": 12698492928.0, "10910": 12698492928.0, "10915": 12698492928.0, "10920": 12698492928.0, "10925": 12698492928.0, "10930": 12698492928.0, "10935": 12698492928.0, "10940": 12698492928.0, "10945": 12698492928.0, "10950": 12698492928.0, "10955": 12698492928.0, "10960": 12698492928.0, "10965": 12698492928.0, "10970": 12698492928.0, "10975": 12698492928.0, "10980": 12698492928.0, "10985": 12698492928.0, "10990": 12698492928.0, "10995": 12698492928.0, "11000": 12698492928.0, "11005": 12698492928.0, "11010": 12698492928.0, "11015": 12698492928.0, "11020": 12698492928.0, "11025": 12698492928.0, "11030": 12698492928.0, "11035": 12698492928.0, "11040": 12698492928.0, "11045": 12698492928.0, "11050": 12698492928.0, "11055": 12698492928.0, "11060": 12698492928.0, "11065": 12698492928.0, "11070": 12698492928.0, "11075": 12698492928.0, "11080": 12698492928.0, "11085": 12698492928.0, "11090": 12698492928.0, "11095": 12698492928.0, "11100": 12698492928.0, "11105": 12698492928.0, "11110": 12698492928.0, "11115": 12698492928.0, "11120": 12698492928.0, "11125": 12698492928.0, "11130": 12698492928.0, "11135": 12698492928.0, "11140": 12698492928.0, "11145": 12698492928.0, "11150": 12698492928.0, "11155": 12698492928.0, "11160": 12698492928.0, "11165": 12698492928.0, "11170": 12698492928.0, "11175": 12698492928.0, "11180": 12698492928.0, "11185": 12698492928.0, "11190": 12698492928.0, "11195": 12698492928.0, "11200": 12698492928.0, "11205": 12698492928.0, "11210": 12698492928.0, "11215": 12698492928.0, "11220": 12698492928.0, "11225": 12698492928.0, "11230": 12698492928.0, "11235": 12698492928.0, "11240": 12698492928.0, "11245": 12698492928.0, "11250": 12698492928.0, "11255": 12698492928.0, "11260": 12698492928.0, "11265": 12698492928.0, "11270": 12698492928.0, "11275": 12698492928.0, "11280": 12698492928.0, "11285": 12698492928.0, "11290": 12698492928.0, "11295": 12698492928.0, "11300": 12698492928.0, "11305": 12698492928.0, "11310": 12698492928.0, "11315": 12698492928.0, "11320": 12698492928.0, "11325": 12698492928.0, "11330": 12698492928.0, "11335": 12698492928.0, "11340": 12698492928.0, "11345": 12698492928.0, "11350": 12698492928.0, "11355": 12698492928.0, "11360": 12698492928.0, "11365": 12698492928.0, "11370": 12698492928.0, "11375": 12698492928.0, "11380": 12698492928.0, "11385": 12698492928.0, "11390": 12698492928.0, "11395": 12698492928.0, "11400": 12698492928.0, "11405": 12698492928.0, "11410": 12698492928.0, "11415": 12698492928.0, "11420": 12698492928.0, "11425": 12698492928.0, "11430": 12698492928.0, "11435": 12698492928.0, "11440": 12698492928.0, "11445": 12698492928.0, "11450": 12698492928.0, "11455": 12698492928.0, "11460": 12698492928.0, "11465": 12698492928.0, "11470": 12698492928.0, "11475": 12698492928.0, "11480": 12698492928.0, "11485": 12698492928.0, "11490": 12698492928.0, "11495": 12698492928.0, "11500": 12698492928.0, "11505": 12698492928.0, "11510": 12698492928.0, "11515": 12698492928.0, "11520": 12698492928.0, "11525": 12698492928.0, "11530": 12698492928.0, "11535": 12698492928.0, "11540": 12698492928.0, "11545": 12698492928.0, "11550": 12698492928.0, "11555": 12698492928.0, "11560": 12698492928.0, "11565": 12698492928.0, "11570": 12698492928.0, "11575": 12698492928.0, "11580": 12698492928.0, "11585": 12698492928.0, "11590": 12698492928.0, "11595": 12698492928.0, "11600": 12698492928.0, "11605": 12698492928.0, "11610": 12698492928.0, "11615": 12698492928.0, "11620": 12698492928.0, "11625": 12698492928.0, "11630": 12698492928.0, "11635": 12698492928.0, "11640": 12698492928.0, "11645": 12698492928.0, "11650": 12698492928.0, "11655": 12698492928.0, "11660": 12698492928.0, "11665": 12698492928.0, "11670": 12698492928.0, "11675": 12698492928.0, "11680": 12698492928.0, "11685": 12698492928.0, "11690": 12698492928.0, "11695": 12698492928.0, "11700": 12698492928.0, "11705": 12698492928.0, "11710": 12698492928.0, "11715": 12698492928.0, "11720": 12698492928.0, "11725": 12698492928.0, "11730": 12698492928.0, "11735": 12698492928.0, "11740": 12698492928.0, "11745": 12698492928.0, "11750": 12698492928.0, "11755": 12698492928.0, "11760": 12698492928.0, "11765": 12698492928.0, "11770": 12698492928.0, "11775": 12698492928.0, "11780": 12698492928.0, "11785": 12698492928.0, "11790": 12698492928.0, "11795": 12698492928.0, "11800": 12698492928.0, "11805": 12698492928.0, "11810": 12698492928.0, "11815": 12698492928.0, "11820": 12698492928.0, "11825": 12698492928.0, "11830": 12698492928.0, "11835": 12698492928.0, "11840": 12698492928.0, "11845": 12698492928.0, "11850": 12698492928.0, "11855": 12698492928.0, "11860": 12698492928.0, "11865": 12698492928.0, "11870": 12698492928.0, "11875": 12698492928.0, "11880": 12698492928.0, "11885": 12698492928.0, "11890": 12698492928.0, "11895": 12698492928.0, "11900": 12698492928.0, "11905": 12698492928.0, "11910": 12698492928.0, "11915": 12698492928.0, "11920": 12698492928.0, "11925": 12698492928.0, "11930": 12698492928.0, "11935": 12698492928.0, "11940": 12698492928.0, "11945": 12698492928.0, "11950": 12698492928.0, "11955": 12698492928.0, "11960": 12698492928.0, "11965": 12698492928.0, "11970": 12698492928.0, "11975": 12698492928.0, "11980": 12698492928.0, "11985": 12698492928.0, "11990": 12698492928.0, "11995": 12698492928.0, "12000": 12698492928.0, "12005": 12698492928.0, "12010": 12698492928.0, "12015": 12698492928.0, "12020": 12698492928.0, "12025": 12698492928.0, "12030": 12698492928.0, "12035": 12698492928.0, "12040": 12698492928.0, "12045": 12698492928.0, "12050": 12698492928.0, "12055": 12698492928.0, "12060": 12698492928.0, "12065": 12698492928.0, "12070": 12698492928.0, "12075": 12698492928.0, "12080": 12698492928.0, "12085": 12698492928.0, "12090": 12698492928.0, "12095": 12698492928.0, "12100": 12698492928.0, "12105": 12698492928.0, "12110": 12698492928.0, "12115": 12698492928.0, "12120": 12698492928.0, "12125": 12698492928.0, "12130": 12698492928.0, "12135": 12698492928.0, "12140": 12698492928.0, "12145": 12698492928.0, "12150": 12698492928.0, "12155": 12698492928.0, "12160": 12698492928.0, "12165": 12698492928.0, "12170": 12698492928.0, "12175": 12698492928.0, "12180": 12698492928.0, "12185": 12698492928.0, "12190": 12698492928.0, "12195": 12698492928.0, "12200": 12698492928.0, "12205": 12698492928.0, "12210": 12698492928.0, "12215": 12698492928.0, "12220": 12698492928.0, "12225": 12698492928.0, "12230": 12698492928.0, "12235": 12698492928.0, "12240": 12698492928.0, "12245": 12698492928.0, "12250": 12698492928.0, "12255": 12698492928.0, "12260": 12698492928.0, "12265": 12698492928.0, "12270": 12698492928.0, "12275": 12698492928.0, "12280": 12698492928.0, "12285": 12698492928.0, "12290": 12698492928.0, "12295": 12698492928.0, "12300": 12698492928.0, "12305": 12698492928.0, "12310": 12698492928.0, "12315": 12698492928.0, "12320": 12698492928.0, "12325": 12698492928.0, "12330": 12698492928.0, "12335": 12698492928.0, "12340": 12698492928.0, "12345": 12698492928.0, "12350": 12698492928.0, "12355": 12698492928.0, "12360": 12698492928.0, "12365": 12698492928.0, "12370": 12698492928.0, "12375": 12698492928.0, "12380": 12698492928.0, "12385": 12698492928.0, "12390": 12698492928.0, "12395": 12698492928.0, "12400": 12698492928.0, "12405": 12698492928.0, "12410": 12698492928.0, "12415": 12698492928.0, "12420": 12698492928.0, "12425": 12698492928.0, "12430": 12698492928.0, "12435": 12698492928.0, "12440": 12698492928.0, "12445": 12698492928.0, "12450": 12698492928.0, "12455": 12698492928.0, "12460": 12698492928.0, "12465": 12698492928.0, "12470": 12698492928.0, "12475": 12698492928.0, "12480": 12698492928.0, "12485": 12698492928.0, "12490": 12698492928.0, "12495": 12698492928.0, "12500": 12698492928.0, "12505": 12698492928.0, "12510": 12698492928.0, "12515": 12698492928.0, "12520": 12698492928.0, "12525": 12698492928.0, "12530": 12698492928.0, "12535": 12698492928.0, "12540": 12698492928.0, "12545": 12698492928.0, "12550": 12698492928.0, "12555": 12698492928.0, "12560": 12698492928.0, "12565": 12698492928.0, "12570": 12698492928.0, "12575": 12698492928.0, "12580": 12698492928.0, "12585": 12698492928.0, "12590": 12698492928.0, "12595": 12698492928.0, "12600": 12698492928.0, "12605": 12698492928.0, "12610": 12698492928.0, "12615": 12698492928.0, "12620": 12698492928.0, "12625": 12698492928.0, "12630": 12698492928.0, "12635": 12698492928.0, "12640": 12698492928.0, "12645": 12698492928.0, "12650": 12698492928.0, "12655": 12698492928.0, "12660": 12698492928.0, "12665": 12698492928.0, "12670": 12698492928.0, "12675": 12698492928.0, "12680": 12698492928.0, "12685": 12698492928.0, "12690": 12698492928.0, "12695": 12698492928.0, "12700": 12698492928.0, "12705": 12698492928.0, "12710": 12698492928.0, "12715": 12698492928.0, "12720": 12698492928.0, "12725": 12698492928.0, "12730": 12698492928.0, "12735": 12698492928.0, "12740": 12698492928.0, "12745": 12698492928.0, "12750": 12698492928.0, "12755": 12698492928.0, "12760": 12698492928.0, "12765": 12698492928.0, "12770": 12698492928.0, "12775": 12698492928.0, "12780": 12698492928.0, "12785": 12698492928.0, "12790": 12698492928.0, "12795": 12698492928.0, "12800": 12698492928.0, "12805": 12698492928.0, "12810": 12698492928.0, "12815": 12698492928.0, "12820": 12698492928.0, "12825": 12698492928.0, "12830": 12698492928.0, "12835": 12698492928.0, "12840": 12698492928.0, "12845": 12698492928.0, "12850": 12698492928.0, "12855": 12698492928.0, "12860": 12698492928.0, "12865": 12698492928.0, "12870": 12698492928.0, "12875": 12698492928.0, "12880": 12698492928.0, "12885": 12698492928.0, "12890": 12698492928.0, "12895": 12698492928.0, "12900": 12698492928.0, "12905": 12698492928.0, "12910": 12698492928.0, "12915": 12698492928.0, "12920": 12698492928.0, "12925": 12698492928.0, "12930": 12698492928.0, "12935": 12698492928.0, "12940": 12698492928.0, "12945": 12698492928.0, "12950": 12698492928.0, "12955": 12698492928.0, "12960": 12698492928.0, "12965": 12698492928.0, "12970": 12698492928.0, "12975": 12698492928.0, "12980": 12698492928.0, "12985": 12698492928.0, "12990": 12698492928.0, "12995": 12698492928.0, "13000": 12698492928.0, "13005": 12698492928.0, "13010": 12698492928.0, "13015": 12698492928.0, "13020": 12698492928.0, "13025": 12698492928.0, "13030": 12698492928.0, "13035": 12698492928.0, "13040": 12698492928.0, "13045": 12698492928.0, "13050": 12698492928.0, "13055": 12698492928.0, "13060": 12698492928.0, "13065": 12698492928.0, "13070": 12698492928.0, "13075": 12698492928.0, "13080": 12698492928.0, "13085": 12698492928.0, "13090": 12698492928.0, "13095": 12698492928.0, "13100": 12698492928.0, "13105": 12698492928.0, "13110": 12698492928.0, "13115": 12698492928.0, "13120": 12698492928.0, "13125": 12698492928.0, "13130": 12698492928.0, "13135": 12698492928.0, "13140": 12698492928.0, "13145": 12698492928.0, "13150": 12698492928.0, "13155": 12698492928.0, "13160": 12698492928.0, "13165": 12698492928.0, "13170": 12698492928.0, "13175": 12698492928.0, "13180": 12698492928.0, "13185": 12698492928.0, "13190": 12698492928.0, "13195": 12698492928.0, "13200": 12698492928.0, "13205": 12698492928.0, "13210": 12698492928.0, "13215": 12698492928.0, "13220": 12698492928.0, "13225": 12698492928.0, "13230": 12698492928.0, "13235": 12698492928.0, "13240": 12698492928.0, "13245": 12698492928.0, "13250": 12698492928.0, "13255": 12698492928.0, "13260": 12698492928.0, "13265": 12698492928.0, "13270": 12698492928.0, "13275": 12698492928.0, "13280": 12698492928.0, "13285": 12698492928.0, "13290": 12698492928.0, "13295": 12698492928.0, "13300": 12698492928.0, "13305": 12698492928.0, "13310": 12698492928.0, "13315": 12698492928.0, "13320": 12698492928.0, "13325": 12698492928.0, "13330": 12698492928.0, "13335": 12698492928.0, "13340": 12698492928.0, "13345": 12698492928.0, "13350": 12698492928.0, "13355": 12698492928.0, "13360": 12698492928.0, "13365": 12698492928.0, "13370": 12698492928.0, "13375": 12698492928.0, "13380": 12698492928.0, "13385": 12698492928.0, "13390": 12698492928.0, "13395": 12698492928.0, "13400": 12698492928.0, "13405": 12698492928.0, "13410": 12698492928.0, "13415": 12698492928.0, "13420": 12698492928.0, "13425": 12698492928.0, "13430": 12698492928.0, "13435": 12698492928.0, "13440": 12698492928.0, "13445": 12698492928.0, "13450": 12698492928.0, "13455": 12698492928.0, "13460": 12698492928.0, "13465": 12698492928.0, "13470": 12698492928.0, "13475": 12698492928.0, "13480": 12698492928.0, "13485": 12698492928.0, "13490": 12698492928.0, "13495": 12698492928.0, "13500": 12698492928.0, "13505": 12698492928.0, "13510": 12698492928.0, "13515": 12698492928.0, "13520": 12698492928.0, "13525": 12698492928.0, "13530": 12698492928.0, "13535": 12698492928.0, "13540": 12698492928.0, "13545": 12698492928.0, "13550": 12698492928.0, "13555": 12698492928.0, "13560": 12698492928.0, "13565": 12698492928.0, "13570": 12698492928.0, "13575": 12698492928.0, "13580": 12698492928.0, "13585": 12698492928.0, "13590": 12698492928.0, "13595": 12698492928.0, "13600": 12698492928.0, "13605": 12698492928.0, "13610": 12698492928.0, "13615": 12698492928.0, "13620": 12698492928.0, "13625": 12698492928.0, "13630": 12698492928.0, "13635": 12698492928.0, "13640": 12698492928.0, "13645": 12698492928.0, "13650": 12698492928.0, "13655": 12698492928.0, "13660": 12698492928.0, "13665": 12698492928.0, "13670": 12698492928.0, "13675": 12698492928.0, "13680": 12698492928.0, "13685": 12698492928.0, "13690": 12698492928.0, "13695": 12698492928.0, "13700": 12698492928.0, "13705": 12698492928.0, "13710": 12698492928.0, "13715": 12698492928.0, "13720": 12698492928.0, "13725": 12698492928.0, "13730": 12698492928.0, "13735": 12698492928.0, "13740": 12698492928.0, "13745": 12698492928.0, "13750": 12698492928.0, "13755": 12698492928.0, "13760": 12698492928.0, "13765": 12698492928.0, "13770": 12698492928.0, "13775": 12698492928.0, "13780": 12698492928.0, "13785": 12698492928.0, "13790": 12698492928.0, "13795": 12698492928.0, "13800": 12698492928.0, "13805": 12698492928.0, "13810": 12698492928.0, "13815": 12698492928.0, "13820": 12698492928.0, "13825": 12698492928.0, "13830": 12698492928.0, "13835": 12698492928.0, "13840": 12698492928.0, "13845": 12698492928.0, "13850": 12698492928.0, "13855": 12698492928.0, "13860": 12698492928.0, "13865": 12698492928.0, "13870": 12698492928.0, "13875": 12698492928.0, "13880": 12698492928.0, "13885": 12698492928.0, "13890": 12698492928.0, "13895": 12698492928.0, "13900": 12698492928.0, "13905": 12698492928.0, "13910": 12698492928.0, "13915": 12698492928.0, "13920": 12698492928.0, "13925": 12698492928.0, "13930": 12698492928.0, "13935": 12698492928.0, "13940": 12698492928.0, "13945": 12698492928.0, "13950": 12698492928.0, "13955": 12698492928.0, "13960": 12698492928.0, "13965": 12698492928.0, "13970": 12698492928.0, "13975": 12698492928.0, "13980": 12698492928.0, "13985": 12698492928.0, "13990": 12698492928.0, "13995": 12698492928.0, "14000": 12698492928.0, "14005": 12698492928.0, "14010": 12698492928.0, "14015": 12698492928.0, "14020": 12698492928.0, "14025": 12698492928.0, "14030": 12698492928.0, "14035": 12698492928.0, "14040": 12698492928.0, "14045": 12698492928.0, "14050": 12698492928.0, "14055": 12698492928.0, "14060": 12698492928.0, "14065": 12698492928.0, "14070": 12698492928.0, "14075": 12698492928.0, "14080": 12698492928.0, "14085": 12698492928.0, "14090": 12698492928.0, "14095": 12698492928.0, "14100": 12698492928.0, "14105": 12698492928.0, "14110": 12698492928.0, "14115": 12698492928.0, "14120": 12698492928.0, "14125": 12698492928.0, "14130": 12698492928.0, "14135": 12698492928.0, "14140": 12698492928.0, "14145": 12698492928.0, "14150": 12698492928.0, "14155": 12698492928.0, "14160": 12698492928.0, "14165": 12698492928.0, "14170": 12698492928.0, "14175": 12698492928.0, "14180": 12698492928.0, "14185": 12698492928.0, "14190": 12698492928.0, "14195": 12698492928.0, "14200": 12698492928.0, "14205": 12698492928.0, "14210": 12698492928.0, "14215": 12698492928.0, "14220": 12698492928.0, "14225": 12698492928.0, "14230": 12698492928.0, "14235": 12698492928.0, "14240": 12698492928.0, "14245": 12698492928.0, "14250": 12698492928.0, "14255": 12698492928.0, "14260": 12698492928.0, "14265": 12698492928.0, "14270": 12698492928.0, "14275": 12698492928.0, "14280": 12698492928.0, "14285": 12698492928.0, "14290": 12698492928.0, "14295": 12698492928.0, "14300": 12698492928.0, "14305": 12698492928.0, "14310": 12698492928.0, "14315": 12698492928.0, "14320": 12698492928.0, "14325": 12698492928.0, "14330": 12698492928.0, "14335": 12698492928.0, "14340": 12698492928.0, "14345": 12698492928.0, "14350": 12698492928.0, "14355": 12698492928.0, "14360": 12698492928.0, "14365": 12698492928.0, "14370": 12698492928.0, "14375": 12698492928.0, "14380": 12698492928.0, "14385": 12698492928.0, "14390": 12698492928.0, "14395": 12698492928.0, "14400": 12698492928.0, "14405": 12698492928.0, "14410": 12698492928.0, "14415": 12698492928.0, "14420": 12698492928.0, "14425": 12698492928.0, "14430": 12698492928.0, "14435": 12698492928.0, "14440": 12698492928.0, "14445": 12698492928.0, "14450": 12698492928.0, "14455": 12698492928.0, "14460": 12698492928.0, "14465": 12698492928.0, "14470": 12698492928.0, "14475": 12698492928.0, "14480": 12698492928.0, "14485": 12698492928.0, "14490": 12698492928.0, "14495": 12698492928.0, "14500": 12698492928.0, "14505": 12698492928.0, "14510": 12698492928.0, "14515": 12698492928.0, "14520": 12698492928.0, "14525": 12698492928.0, "14530": 12698492928.0, "14535": 12698492928.0, "14540": 12698492928.0, "14545": 12698492928.0, "14550": 12698492928.0, "14555": 12698492928.0, "14560": 12698492928.0, "14565": 12698492928.0, "14570": 12698492928.0, "14575": 12698492928.0, "14580": 12698492928.0, "14585": 12698492928.0, "14590": 12698492928.0, "14595": 12698492928.0, "14600": 12698492928.0, "14605": 12698492928.0, "14610": 12698492928.0, "14615": 12698492928.0, "14620": 12698492928.0, "14625": 12698492928.0, "14630": 12698492928.0, "14635": 12698492928.0, "14640": 12698492928.0, "14645": 12698492928.0, "14650": 12698492928.0, "14655": 12698492928.0, "14660": 12698492928.0, "14665": 12698492928.0, "14670": 12698492928.0, "14675": 12698492928.0, "14680": 12698492928.0, "14685": 12698492928.0, "14690": 12698492928.0, "14695": 12698492928.0, "14700": 12698492928.0, "14705": 12698492928.0, "14710": 12698492928.0, "14715": 12698492928.0, "14720": 12698492928.0, "14725": 12698492928.0, "14730": 12698492928.0, "14735": 12698492928.0, "14740": 12698492928.0, "14745": 12698492928.0, "14750": 12698492928.0, "14755": 12698492928.0, "14760": 12698492928.0, "14765": 12698492928.0, "14770": 12698492928.0, "14775": 12698492928.0, "14780": 12698492928.0, "14785": 12698492928.0, "14790": 12698492928.0, "14795": 12698492928.0, "14800": 12698492928.0, "14805": 12698492928.0, "14810": 12698492928.0, "14815": 12698492928.0, "14820": 12698492928.0, "14825": 12698492928.0, "14830": 12698492928.0, "14835": 12698492928.0, "14840": 12698492928.0, "14845": 12698492928.0, "14850": 12698492928.0, "14855": 12698492928.0, "14860": 12698492928.0, "14865": 12698492928.0, "14870": 12698492928.0, "14875": 12698492928.0, "14880": 12698492928.0, "14885": 12698492928.0, "14890": 12698492928.0, "14895": 12698492928.0, "14900": 12698492928.0, "14905": 12698492928.0, "14910": 12698492928.0, "14915": 12698492928.0, "14920": 12698492928.0, "14925": 12698492928.0, "14930": 12698492928.0, "14935": 12698492928.0, "14940": 12698492928.0, "14945": 12698492928.0, "14950": 12698492928.0, "14955": 12698492928.0, "14960": 12698492928.0, "14965": 12698492928.0, "14970": 12698492928.0, "14975": 12698492928.0, "14980": 12698492928.0, "14985": 12698492928.0, "14990": 12698492928.0, "14995": 12698492928.0, "15000": 12698492928.0, "15005": 12698492928.0, "15010": 12698492928.0, "15015": 12698492928.0, "15020": 12698492928.0, "15025": 12698492928.0, "15030": 12698492928.0, "15035": 12698492928.0, "15040": 12698492928.0, "15045": 12698492928.0, "15050": 12698492928.0, "15055": 12698492928.0, "15060": 12698492928.0, "15065": 12698492928.0, "15070": 12698492928.0, "15075": 12698492928.0, "15080": 12698492928.0, "15085": 12698492928.0, "15090": 12698492928.0, "15095": 12698492928.0, "15100": 12698492928.0, "15105": 12698492928.0, "15110": 12698492928.0, "15115": 12698492928.0, "15120": 12698492928.0, "15125": 12698492928.0, "15130": 12698492928.0, "15135": 12698492928.0, "15140": 12698492928.0, "15145": 12698492928.0, "15150": 12698492928.0, "15155": 12698492928.0, "15160": 12698492928.0, "15165": 12698492928.0, "15170": 12698492928.0, "15175": 12698492928.0, "15180": 12698492928.0, "15185": 12698492928.0, "15190": 12698492928.0, "15195": 12698492928.0, "15200": 12698492928.0, "15205": 12698492928.0, "15210": 12698492928.0, "15215": 12698492928.0, "15220": 12698492928.0, "15225": 12698492928.0, "15230": 12698492928.0, "15235": 12698492928.0, "15240": 12698492928.0, "15245": 12698492928.0, "15250": 12698492928.0, "15255": 12698492928.0, "15260": 12698492928.0, "15265": 12698492928.0, "15270": 12698492928.0, "15275": 12698492928.0, "15280": 12698492928.0, "15285": 12698492928.0, "15290": 12698492928.0, "15295": 12698492928.0, "15300": 12698492928.0, "15305": 12698492928.0, "15310": 12698492928.0, "15315": 12698492928.0, "15320": 12698492928.0, "15325": 12698492928.0, "15330": 12698492928.0, "15335": 12698492928.0, "15340": 12698492928.0, "15345": 12698492928.0, "15350": 12698492928.0, "15355": 12698492928.0, "15360": 12698492928.0, "15365": 12698492928.0, "15370": 12698492928.0, "15375": 12698492928.0, "15380": 12698492928.0, "15385": 12698492928.0, "15390": 12698492928.0, "15395": 12698492928.0, "15400": 12698492928.0, "15405": 12698492928.0, "15410": 12698492928.0, "15415": 12698492928.0, "15420": 12698492928.0, "15425": 12698492928.0, "15430": 12698492928.0, "15435": 12698492928.0, "15440": 12698492928.0, "15445": 12698492928.0, "15450": 12698492928.0, "15455": 12698492928.0, "15460": 12698492928.0, "15465": 12698492928.0, "15470": 12698492928.0, "15475": 12698492928.0, "15480": 12698492928.0, "15485": 12698492928.0, "15490": 12698492928.0, "15495": 12698492928.0, "15500": 12698492928.0, "15505": 12698492928.0, "15510": 12698492928.0, "15515": 12698492928.0, "15520": 12698492928.0, "15525": 12698492928.0, "15530": 12698492928.0, "15535": 12698492928.0, "15540": 12698492928.0, "15545": 12698492928.0, "15550": 12698492928.0, "15555": 12698492928.0, "15560": 12698492928.0, "15565": 12698492928.0, "15570": 12698492928.0, "15575": 12698492928.0, "15580": 12698492928.0, "15585": 12698492928.0, "15590": 12698492928.0, "15595": 12698492928.0, "15600": 12698492928.0, "15605": 12698492928.0, "15610": 12698492928.0, "15615": 12698492928.0, "15620": 12698492928.0, "15625": 12698492928.0, "15630": 12698492928.0, "15635": 12698492928.0, "15640": 12698492928.0, "15645": 12698492928.0, "15650": 12698492928.0, "15655": 12698492928.0, "15660": 12698492928.0, "15665": 12698492928.0, "15670": 12698492928.0, "15675": 12698492928.0, "15680": 12698492928.0, "15685": 12698492928.0, "15690": 12698492928.0, "15695": 12698492928.0, "15700": 12698492928.0, "15705": 12698492928.0, "15710": 12698492928.0, "15715": 12698492928.0, "15720": 12698492928.0, "15725": 12698492928.0, "15730": 12698492928.0, "15735": 12698492928.0, "15740": 12698492928.0, "15745": 12698492928.0, "15750": 12698492928.0, "15755": 12698492928.0, "15760": 12698492928.0, "15765": 12698492928.0, "15770": 12698492928.0, "15775": 12698492928.0, "15780": 12698492928.0, "15785": 12698492928.0, "15790": 12698492928.0, "15795": 12698492928.0, "15800": 12698492928.0, "15805": 12698492928.0, "15810": 12698492928.0, "15815": 12698492928.0, "15820": 12698492928.0, "15825": 12698492928.0, "15830": 12698492928.0, "15835": 12698492928.0, "15840": 12698492928.0, "15845": 12698492928.0, "15850": 12698492928.0, "15855": 12698492928.0, "15860": 12698492928.0, "15865": 12698492928.0, "15870": 12698492928.0, "15875": 12698492928.0, "15880": 12698492928.0, "15885": 12698492928.0, "15890": 12698492928.0, "15895": 12698492928.0, "15900": 12698492928.0, "15905": 12698492928.0, "15910": 12698492928.0, "15915": 12698492928.0, "15920": 12698492928.0, "15925": 12698492928.0, "15930": 12698492928.0, "15935": 12698492928.0, "15940": 12698492928.0, "15945": 12698492928.0, "15950": 12698492928.0, "15955": 12698492928.0, "15960": 12698492928.0, "15965": 12698492928.0, "15970": 12698492928.0, "15975": 12698492928.0, "15980": 12698492928.0, "15985": 12698492928.0, "15990": 12698492928.0, "15995": 12698492928.0, "16000": 12698492928.0, "16005": 12698492928.0, "16010": 12698492928.0, "16015": 12698492928.0, "16020": 12698492928.0, "16025": 12698492928.0, "16030": 12698492928.0, "16035": 12698492928.0, "16040": 12698492928.0, "16045": 12698492928.0, "16050": 12698492928.0, "16055": 12698492928.0, "16060": 12698492928.0, "16065": 12698492928.0, "16070": 12698492928.0, "16075": 12698492928.0, "16080": 12698492928.0, "16085": 12698492928.0, "16090": 12698492928.0, "16095": 12698492928.0, "16100": 12698492928.0, "16105": 12698492928.0, "16110": 12698492928.0, "16115": 12698492928.0, "16120": 12698492928.0, "16125": 12698492928.0, "16130": 12698492928.0, "16135": 12698492928.0, "16140": 12698492928.0, "16145": 12698492928.0, "16150": 12698492928.0, "16155": 12698492928.0, "16160": 12698492928.0, "16165": 12698492928.0, "16170": 12698492928.0, "16175": 12698492928.0, "16180": 12698492928.0, "16185": 12698492928.0, "16190": 12698492928.0, "16195": 12698492928.0, "16200": 12698492928.0, "16205": 12698492928.0, "16210": 12698492928.0, "16215": 12698492928.0, "16220": 12698492928.0, "16225": 12698492928.0, "16230": 12698492928.0, "16235": 12698492928.0, "16240": 12698492928.0, "16245": 12698492928.0, "16250": 12698492928.0, "16255": 12698492928.0, "16260": 12698492928.0, "16265": 12698492928.0, "16270": 12698492928.0, "16275": 12698492928.0, "16280": 12698492928.0, "16285": 12698492928.0, "16290": 12698492928.0, "16295": 12698492928.0, "16300": 12698492928.0, "16305": 12698492928.0, "16310": 12698492928.0, "16315": 12698492928.0, "16320": 12698492928.0, "16325": 12698492928.0, "16330": 12698492928.0, "16335": 12698492928.0, "16340": 12698492928.0, "16345": 12698492928.0, "16350": 12698492928.0, "16355": 12698492928.0, "16360": 12698492928.0, "16365": 12698492928.0, "16370": 12698492928.0, "16375": 12698492928.0, "16380": 12698492928.0, "16385": 12698492928.0, "16390": 12698492928.0, "16395": 12698492928.0, "16400": 12698492928.0, "16405": 12698492928.0, "16410": 12698492928.0, "16415": 12698492928.0, "16420": 12698492928.0, "16425": 12698492928.0, "16430": 12698492928.0, "16435": 12698492928.0, "16440": 12698492928.0, "16445": 12698492928.0, "16450": 12698492928.0, "16455": 12698492928.0, "16460": 12698492928.0, "16465": 12698492928.0, "16470": 12698492928.0, "16475": 12698492928.0, "16480": 12698492928.0, "16485": 12698492928.0, "16490": 12698492928.0, "16495": 12698492928.0, "16500": 12698492928.0, "16505": 12698492928.0, "16510": 12698492928.0, "16515": 12698492928.0, "16520": 12698492928.0, "16525": 12698492928.0, "16530": 12698492928.0, "16535": 12698492928.0, "16540": 12698492928.0, "16545": 12698492928.0, "16550": 12698492928.0, "16555": 12698492928.0, "16560": 12698492928.0, "16565": 12698492928.0, "16570": 12698492928.0, "16575": 12698492928.0, "16580": 12698492928.0, "16585": 12698492928.0, "16590": 12698492928.0, "16595": 12698492928.0, "16600": 12698492928.0, "16605": 12698492928.0, "16610": 12698492928.0, "16615": 12698492928.0, "16620": 12698492928.0, "16625": 12698492928.0, "16630": 12698492928.0, "16635": 12698492928.0, "16640": 12698492928.0, "16645": 12698492928.0, "16650": 12698492928.0, "16655": 12698492928.0, "16660": 12698492928.0, "16665": 12698492928.0, "16670": 12698492928.0, "16675": 12698492928.0, "16680": 12698492928.0, "16685": 12698492928.0, "16690": 12698492928.0, "16695": 12698492928.0, "16700": 12698492928.0, "16705": 12698492928.0, "16710": 12698492928.0, "16715": 12698492928.0, "16720": 12698492928.0, "16725": 12698492928.0, "16730": 12698492928.0, "16735": 12698492928.0, "16740": 12698492928.0, "16745": 12698492928.0, "16750": 12698492928.0, "16755": 12698492928.0, "16760": 12698492928.0, "16765": 12698492928.0, "16770": 12698492928.0, "16775": 12698492928.0, "16780": 12698492928.0, "16785": 12698492928.0, "16790": 12698492928.0, "16795": 12698492928.0, "16800": 12698492928.0, "16805": 12698492928.0, "16810": 12698492928.0, "16815": 12698492928.0, "16820": 12698492928.0, "16825": 12698492928.0, "16830": 12698492928.0, "16835": 12698492928.0, "16840": 12698492928.0, "16845": 12698492928.0, "16850": 12698492928.0, "16855": 12698492928.0, "16860": 12698492928.0, "16865": 12698492928.0, "16870": 12698492928.0, "16875": 12698492928.0, "16880": 12698492928.0, "16885": 12698492928.0, "16890": 12698492928.0, "16895": 12698492928.0, "16900": 12698492928.0, "16905": 12698492928.0, "16910": 12698492928.0, "16915": 12698492928.0, "16920": 12698492928.0, "16925": 12698492928.0, "16930": 12698492928.0, "16935": 12698492928.0, "16940": 12698492928.0, "16945": 12698492928.0, "16950": 12698492928.0, "16955": 12698492928.0, "16960": 12698492928.0, "16965": 12698492928.0, "16970": 12698492928.0, "16975": 12698492928.0, "16980": 12698492928.0, "16985": 12698492928.0, "16990": 12698492928.0, "16995": 12698492928.0, "17000": 12698492928.0, "17005": 12698492928.0, "17010": 12698492928.0, "17015": 12698492928.0, "17020": 12698492928.0, "17025": 12698492928.0, "17030": 12698492928.0, "17035": 12698492928.0, "17040": 12698492928.0, "17045": 12698492928.0, "17050": 12698492928.0, "17055": 12698492928.0, "17060": 12698492928.0, "17065": 12698492928.0, "17070": 12698492928.0, "17075": 12698492928.0, "17080": 12698492928.0, "17085": 12698492928.0, "17090": 12698492928.0, "17095": 12698492928.0, "17100": 12698492928.0, "17105": 12698492928.0, "17110": 12698492928.0, "17115": 12698492928.0, "17120": 12698492928.0, "17125": 12698492928.0, "17130": 12698492928.0, "17135": 12698492928.0, "17140": 12698492928.0, "17145": 12698492928.0, "17150": 12698492928.0, "17155": 12698492928.0, "17160": 12698492928.0, "17165": 12698492928.0, "17170": 12698492928.0, "17175": 12698492928.0, "17180": 12698492928.0, "17185": 12698492928.0, "17190": 12698492928.0, "17195": 12698492928.0, "17200": 12698492928.0, "17205": 12698492928.0, "17210": 12698492928.0, "17215": 12698492928.0, "17220": 12698492928.0, "17225": 12698492928.0, "17230": 12698492928.0, "17235": 12698492928.0, "17240": 12698492928.0, "17245": 12698492928.0, "17250": 12698492928.0, "17255": 12698492928.0, "17260": 12698492928.0, "17265": 12698492928.0, "17270": 12698492928.0, "17275": 12698492928.0, "17280": 12698492928.0, "17285": 12698492928.0, "17290": 12698492928.0, "17295": 12698492928.0, "17300": 12698492928.0, "17305": 12698492928.0, "17310": 12698492928.0, "17315": 12698492928.0, "17320": 12698492928.0, "17325": 12698492928.0, "17330": 12698492928.0, "17335": 12698492928.0, "17340": 12698492928.0, "17345": 12698492928.0, "17350": 12698492928.0, "17355": 12698492928.0, "17360": 12698492928.0, "17365": 12698492928.0, "17370": 12698492928.0, "17375": 12698492928.0, "17380": 12698492928.0, "17385": 12698492928.0, "17390": 12698492928.0, "17395": 12698492928.0, "17400": 12698492928.0, "17405": 12698492928.0, "17410": 12698492928.0, "17415": 12698492928.0, "17420": 12698492928.0, "17425": 12698492928.0, "17430": 12698492928.0, "17435": 12698492928.0, "17440": 12698492928.0, "17445": 12698492928.0, "17450": 12698492928.0, "17455": 12698492928.0, "17460": 12698492928.0, "17465": 12698492928.0, "17470": 12698492928.0, "17475": 12698492928.0, "17480": 12698492928.0, "17485": 12698492928.0, "17490": 12698492928.0, "17495": 12698492928.0, "17500": 12698492928.0, "17505": 12698492928.0, "17510": 12698492928.0, "17515": 12698492928.0, "17520": 12698492928.0, "17525": 12698492928.0, "17530": 12698492928.0, "17535": 12698492928.0, "17540": 12698492928.0, "17545": 12698492928.0, "17550": 12698492928.0, "17555": 12698492928.0, "17560": 12698492928.0, "17565": 12698492928.0, "17570": 12698492928.0, "17575": 12698492928.0, "17580": 12698492928.0, "17585": 12698492928.0, "17590": 12698492928.0, "17595": 12698492928.0, "17600": 12698492928.0, "17605": 12698492928.0, "17610": 12698492928.0, "17615": 12698492928.0, "17620": 12698492928.0, "17625": 12698492928.0, "17630": 12698492928.0, "17635": 12698492928.0, "17640": 12698492928.0, "17645": 12698492928.0, "17650": 12698492928.0, "17655": 12698492928.0, "17660": 12698492928.0, "17665": 12698492928.0, "17670": 12698492928.0, "17675": 12698492928.0, "17680": 12698492928.0, "17685": 12698492928.0, "17690": 12698492928.0, "17695": 12698492928.0, "17700": 12698492928.0, "17705": 12698492928.0, "17710": 12698492928.0, "17715": 12698492928.0, "17720": 12698492928.0, "17725": 12698492928.0, "17730": 12698492928.0, "17735": 12698492928.0, "17740": 12698492928.0, "17745": 12698492928.0, "17750": 12698492928.0, "17755": 12698492928.0, "17760": 12698492928.0, "17765": 12698492928.0, "17770": 12698492928.0, "17775": 12698492928.0, "17780": 12698492928.0, "17785": 12698492928.0, "17790": 12698492928.0, "17795": 12698492928.0, "17800": 12698492928.0, "17805": 12698492928.0, "17810": 12698492928.0, "17815": 12698492928.0, "17820": 12698492928.0, "17825": 12698492928.0, "17830": 12698492928.0, "17835": 12698492928.0, "17840": 12698492928.0, "17845": 12698492928.0, "17850": 12698492928.0, "17855": 12698492928.0, "17860": 12698492928.0, "17865": 12698492928.0, "17870": 12698492928.0, "17875": 12698492928.0, "17880": 12698492928.0, "17885": 12698492928.0, "17890": 12698492928.0, "17895": 12698492928.0, "17900": 12698492928.0, "17905": 12698492928.0, "17910": 12698492928.0, "17915": 12698492928.0, "17920": 12698492928.0, "17925": 12698492928.0, "17930": 12698492928.0, "17935": 12698492928.0, "17940": 12698492928.0, "17945": 12698492928.0, "17950": 12698492928.0, "17955": 12698492928.0, "17960": 12698492928.0, "17965": 12698492928.0, "17970": 12698492928.0, "17975": 12698492928.0, "17980": 12698492928.0, "17985": 12698492928.0, "17990": 12698492928.0, "17995": 12698492928.0, "18000": 12698492928.0, "18005": 12698492928.0, "18010": 12698492928.0, "18015": 12698492928.0, "18020": 12698492928.0, "18025": 12698492928.0, "18030": 12698492928.0, "18035": 12698492928.0, "18040": 12698492928.0, "18045": 12698492928.0, "18050": 12698492928.0, "18055": 12698492928.0, "18060": 12698492928.0, "18065": 12698492928.0, "18070": 12698492928.0, "18075": 12698492928.0, "18080": 12698492928.0, "18085": 12698492928.0, "18090": 12698492928.0, "18095": 12698492928.0, "18100": 12698492928.0, "18105": 12698492928.0, "18110": 12698492928.0, "18115": 12698492928.0, "18120": 12698492928.0, "18125": 12698492928.0, "18130": 12698492928.0, "18135": 12698492928.0, "18140": 12698492928.0, "18145": 12698492928.0, "18150": 12698492928.0, "18155": 12698492928.0, "18160": 12698492928.0, "18165": 12698492928.0, "18170": 12698492928.0, "18175": 12698492928.0, "18180": 12698492928.0, "18185": 12698492928.0, "18190": 12698492928.0, "18195": 12698492928.0, "18200": 12698492928.0, "18205": 12698492928.0, "18210": 12698492928.0, "18215": 12698492928.0, "18220": 12698492928.0, "18225": 12698492928.0, "18230": 12698492928.0, "18235": 12698492928.0, "18240": 12698492928.0, "18245": 12698492928.0, "18250": 12698492928.0, "18255": 12698492928.0, "18260": 12698492928.0, "18265": 12698492928.0, "18270": 12698492928.0, "18275": 12698492928.0, "18280": 12698492928.0, "18285": 12698492928.0, "18290": 12698492928.0, "18295": 12698492928.0, "18300": 12698492928.0, "18305": 12698492928.0, "18310": 12698492928.0, "18315": 12698492928.0, "18320": 12698492928.0, "18325": 12698492928.0, "18330": 12698492928.0, "18335": 12698492928.0, "18340": 12698492928.0, "18345": 12698492928.0, "18350": 12698492928.0, "18355": 12698492928.0, "18360": 12698492928.0, "18365": 12698492928.0, "18370": 12698492928.0, "18375": 12698492928.0, "18380": 12698492928.0, "18385": 12698492928.0, "18390": 12698492928.0, "18395": 12698492928.0, "18400": 12698492928.0, "18405": 12698492928.0, "18410": 12698492928.0, "18415": 12698492928.0, "18420": 12698492928.0, "18425": 12698492928.0, "18430": 12698492928.0, "18435": 12698492928.0, "18440": 12698492928.0, "18445": 12698492928.0, "18450": 12698492928.0, "18455": 12698492928.0, "18460": 12698492928.0, "18465": 12698492928.0, "18470": 12698492928.0, "18475": 12698492928.0, "18480": 12698492928.0, "18485": 12698492928.0, "18490": 12698492928.0, "18495": 12698492928.0, "18500": 12698492928.0, "18505": 12698492928.0, "18510": 12698492928.0, "18515": 12698492928.0, "18520": 12698492928.0, "18525": 12698492928.0, "18530": 12698492928.0, "18535": 12698492928.0, "18540": 12698492928.0, "18545": 12698492928.0, "18550": 12698492928.0, "18555": 12698492928.0, "18560": 12698492928.0, "18565": 12698492928.0, "18570": 12698492928.0, "18575": 12698492928.0, "18580": 12698492928.0, "18585": 12698492928.0, "18590": 12698492928.0, "18595": 12698492928.0, "18600": 12698492928.0, "18605": 12698492928.0, "18610": 12698492928.0, "18615": 12698492928.0, "18620": 12698492928.0, "18625": 12698492928.0, "18630": 12698492928.0, "18635": 12698492928.0, "18640": 12698492928.0, "18645": 12698492928.0, "18650": 12698492928.0, "18655": 12698492928.0, "18660": 12698492928.0, "18665": 12698492928.0, "18670": 12698492928.0, "18675": 12698492928.0, "18680": 12698492928.0, "18685": 12698492928.0, "18690": 12698492928.0, "18695": 12698492928.0, "18700": 12698492928.0, "18705": 12698492928.0, "18710": 12698492928.0, "18715": 12698492928.0, "18720": 12698492928.0, "18725": 12698492928.0, "18730": 12698492928.0, "18735": 12698492928.0, "18740": 12698492928.0, "18745": 12698492928.0, "18750": 12698492928.0, "18755": 12698492928.0, "18760": 12698492928.0, "18765": 12698492928.0, "18770": 12698492928.0, "18775": 12698492928.0, "18780": 12698492928.0, "18785": 12698492928.0, "18790": 12698492928.0, "18795": 12698492928.0, "18800": 12698492928.0, "18805": 12698492928.0, "18810": 12698492928.0, "18815": 12698492928.0, "18820": 12698492928.0, "18825": 12698492928.0, "18830": 12698492928.0, "18835": 12698492928.0, "18840": 12698492928.0, "18845": 12698492928.0, "18850": 12698492928.0, "18855": 12698492928.0, "18860": 12698492928.0, "18865": 12698492928.0, "18870": 12698492928.0, "18875": 12698492928.0, "18880": 12698492928.0, "18885": 12698492928.0, "18890": 12698492928.0, "18895": 12698492928.0, "18900": 12698492928.0, "18905": 12698492928.0, "18910": 12698492928.0, "18915": 12698492928.0, "18920": 12698492928.0, "18925": 12698492928.0, "18930": 12698492928.0, "18935": 12698492928.0, "18940": 12698492928.0, "18945": 12698492928.0, "18950": 12698492928.0, "18955": 12698492928.0, "18960": 12698492928.0, "18965": 12698492928.0, "18970": 12698492928.0, "18975": 12698492928.0, "18980": 12698492928.0, "18985": 12698492928.0, "18990": 12698492928.0, "18995": 12698492928.0, "19000": 12698492928.0, "19005": 12698492928.0, "19010": 12698492928.0, "19015": 12698492928.0, "19020": 12698492928.0, "19025": 12698492928.0, "19030": 12698492928.0, "19035": 12698492928.0, "19040": 12698492928.0, "19045": 12698492928.0, "19050": 12698492928.0, "19055": 12698492928.0, "19060": 12698492928.0, "19065": 12698492928.0, "19070": 12698492928.0, "19075": 12698492928.0, "19080": 12698492928.0, "19085": 12698492928.0, "19090": 12698492928.0, "19095": 12698492928.0, "19100": 12698492928.0, "19105": 12698492928.0, "19110": 12698492928.0, "19115": 12698492928.0, "19120": 12698492928.0, "19125": 12698492928.0, "19130": 12698492928.0, "19135": 12698492928.0, "19140": 12698492928.0, "19145": 12698492928.0, "19150": 12698492928.0, "19155": 12698492928.0, "19160": 12698492928.0, "19165": 12698492928.0, "19170": 12698492928.0, "19175": 12698492928.0, "19180": 12698492928.0, "19185": 12698492928.0, "19190": 12698492928.0, "19195": 12698492928.0, "19200": 12698492928.0, "19205": 12698492928.0, "19210": 12698492928.0, "19215": 12698492928.0, "19220": 12698492928.0, "19225": 12698492928.0, "19230": 12698492928.0, "19235": 12698492928.0, "19240": 12698492928.0, "19245": 12698492928.0, "19250": 12698492928.0, "19255": 12698492928.0, "19260": 12698492928.0, "19265": 12698492928.0, "19270": 12698492928.0, "19275": 12698492928.0, "19280": 12698492928.0, "19285": 12698492928.0, "19290": 12698492928.0, "19295": 12698492928.0, "19300": 12698492928.0, "19305": 12698492928.0, "19310": 12698492928.0, "19315": 12698492928.0, "19320": 12698492928.0, "19325": 12698492928.0, "19330": 12698492928.0, "19335": 12698492928.0, "19340": 12698492928.0, "19345": 12698492928.0, "19350": 12698492928.0, "19355": 12698492928.0, "19360": 12698492928.0, "19365": 12698492928.0, "19370": 12698492928.0, "19375": 12698492928.0, "19380": 12698492928.0, "19385": 12698492928.0, "19390": 12698492928.0, "19395": 12698492928.0, "19400": 12698492928.0, "19405": 12698492928.0, "19410": 12698492928.0, "19415": 12698492928.0, "19420": 12698492928.0, "19425": 12698492928.0, "19430": 12698492928.0, "19435": 12698492928.0, "19440": 12698492928.0, "19445": 12698492928.0, "19450": 12698492928.0, "19455": 12698492928.0, "19460": 12698492928.0, "19465": 12698492928.0, "19470": 12698492928.0, "19475": 12698492928.0, "19480": 12698492928.0, "19485": 12698492928.0, "19490": 12698492928.0, "19495": 12698492928.0, "19500": 12698492928.0, "19505": 12698492928.0, "19510": 12698492928.0, "19515": 12698492928.0, "19520": 12698492928.0, "19525": 12698492928.0, "19530": 12698492928.0, "19535": 12698492928.0, "19540": 12698492928.0, "19545": 12698492928.0, "19550": 12698492928.0, "19555": 12698492928.0, "19560": 12698492928.0, "19565": 12698492928.0, "19570": 12698492928.0, "19575": 12698492928.0, "19580": 12698492928.0, "19585": 12698492928.0, "19590": 12698492928.0, "19595": 12698492928.0, "19600": 12698492928.0, "19605": 12698492928.0, "19610": 12698492928.0, "19615": 12698492928.0, "19620": 12698492928.0, "19625": 12698492928.0, "19630": 12698492928.0, "19635": 12698492928.0, "19640": 12698492928.0, "19645": 12698492928.0, "19650": 12698492928.0, "19655": 12698492928.0, "19660": 12698492928.0, "19665": 12698492928.0, "19670": 12698492928.0, "19675": 12698492928.0, "19680": 12698492928.0, "19685": 12698492928.0, "19690": 12698492928.0, "19695": 12698492928.0, "19700": 12698492928.0, "19705": 12698492928.0, "19710": 12698492928.0, "19715": 12698492928.0, "19720": 12698492928.0, "19725": 12698492928.0, "19730": 12698492928.0, "19735": 12698492928.0, "19740": 12698492928.0, "19745": 12698492928.0, "19750": 12698492928.0, "19755": 12698492928.0, "19760": 12698492928.0, "19765": 12698492928.0, "19770": 12698492928.0, "19775": 12698492928.0, "19780": 12698492928.0, "19785": 12698492928.0, "19790": 12698492928.0, "19795": 12698492928.0, "19800": 12698492928.0, "19805": 12698492928.0, "19810": 12698492928.0, "19815": 12698492928.0, "19820": 12698492928.0, "19825": 12698492928.0, "19830": 12698492928.0, "19835": 12698492928.0, "19840": 12698492928.0, "19845": 12698492928.0, "19850": 12698492928.0, "19855": 12698492928.0, "19860": 12698492928.0, "19865": 12698492928.0, "19870": 12698492928.0, "19875": 12698492928.0, "19880": 12698492928.0, "19885": 12698492928.0, "19890": 12698492928.0, "19895": 12698492928.0, "19900": 12698492928.0, "19905": 12698492928.0, "19910": 12698492928.0, "19915": 12698492928.0, "19920": 12698492928.0, "19925": 12698492928.0, "19930": 12698492928.0, "19935": 12698492928.0, "19940": 12698492928.0, "19945": 12698492928.0, "19950": 12698492928.0, "19955": 12698492928.0, "19960": 12698492928.0, "19965": 12698492928.0, "19970": 12698492928.0, "19975": 12698492928.0, "19980": 12698492928.0, "19985": 12698492928.0, "19990": 12698492928.0, "19995": 12698492928.0, "20000": 12698492928.0, "20005": 12698492928.0, "20010": 12698492928.0, "20015": 12698492928.0, "20020": 12698492928.0, "20025": 12698492928.0, "20030": 12698492928.0, "20035": 12698492928.0, "20040": 12698492928.0, "20045": 12698492928.0, "20050": 12698492928.0, "20055": 12698492928.0, "20060": 12698492928.0, "20065": 12698492928.0, "20070": 12698492928.0, "20075": 12698492928.0, "20080": 12698492928.0, "20085": 12698492928.0, "20090": 12698492928.0, "20095": 12698492928.0, "20100": 12698492928.0, "20105": 12698492928.0, "20110": 12698492928.0, "20115": 12698492928.0, "20120": 12698492928.0, "20125": 12698492928.0, "20130": 12698492928.0, "20135": 12698492928.0, "20140": 12698492928.0, "20145": 12698492928.0, "20150": 12698492928.0, "20155": 12698492928.0, "20160": 12698492928.0, "20165": 12698492928.0, "20170": 12698492928.0, "20175": 12698492928.0, "20180": 12698492928.0, "20185": 12698492928.0, "20190": 12698492928.0, "20195": 12698492928.0, "20200": 12698492928.0, "20205": 12698492928.0, "20210": 12698492928.0, "20215": 12698492928.0, "20220": 12698492928.0, "20225": 12698492928.0, "20230": 12698492928.0, "20235": 12698492928.0, "20240": 12698492928.0, "20245": 12698492928.0, "20250": 12698492928.0, "20255": 12698492928.0, "20260": 12698492928.0, "20265": 12698492928.0, "20270": 12698492928.0, "20275": 12698492928.0, "20280": 12698492928.0, "20285": 12698492928.0, "20290": 12698492928.0, "20295": 12698492928.0, "20300": 12698492928.0, "20305": 12698492928.0, "20310": 12698492928.0, "20315": 12698492928.0, "20320": 12698492928.0, "20325": 12698492928.0, "20330": 12698492928.0, "20335": 12698492928.0, "20340": 12698492928.0, "20345": 12698492928.0, "20350": 12698492928.0, "20355": 12698492928.0, "20360": 12698492928.0, "20365": 12698492928.0, "20370": 12698492928.0, "20375": 12698492928.0, "20380": 12698492928.0, "20385": 12698492928.0, "20390": 12698492928.0, "20395": 12698492928.0, "20400": 12698492928.0, "20405": 12698492928.0, "20410": 12698492928.0, "20415": 12698492928.0, "20420": 12698492928.0, "20425": 12698492928.0, "20430": 12698492928.0, "20435": 12698492928.0, "20440": 12698492928.0, "20445": 12698492928.0, "20450": 12698492928.0, "20455": 12698492928.0, "20460": 12698492928.0, "20465": 12698492928.0, "20470": 12698492928.0, "20475": 12698492928.0, "20480": 12698492928.0, "20485": 12698492928.0, "20490": 12698492928.0, "20495": 12698492928.0, "20500": 12698492928.0, "20505": 12698492928.0, "20510": 12698492928.0, "20515": 12698492928.0, "20520": 12698492928.0, "20525": 12698492928.0, "20530": 12698492928.0, "20535": 12698492928.0, "20540": 12698492928.0, "20545": 12698492928.0, "20550": 12698492928.0, "20555": 12698492928.0, "20560": 12698492928.0, "20565": 12698492928.0, "20570": 12698492928.0, "20575": 12698492928.0, "20580": 12698492928.0, "20585": 12698492928.0, "20590": 12698492928.0, "20595": 12698492928.0, "20600": 12698492928.0, "20605": 12698492928.0, "20610": 12698492928.0, "20615": 12698492928.0, "20620": 12698492928.0, "20625": 12698492928.0, "20630": 12698492928.0, "20635": 12698492928.0, "20640": 12698492928.0, "20645": 12698492928.0, "20650": 12698492928.0, "20655": 12698492928.0, "20660": 12698492928.0, "20665": 12698492928.0, "20670": 12698492928.0, "20675": 12698492928.0, "20680": 12698492928.0, "20685": 12698492928.0, "20690": 12698492928.0, "20695": 12698492928.0, "20700": 12698492928.0, "20705": 12698492928.0, "20710": 12698492928.0, "20715": 12698492928.0, "20720": 12698492928.0, "20725": 12698492928.0, "20730": 12698492928.0, "20735": 12698492928.0, "20740": 12698492928.0, "20745": 12698492928.0, "20750": 12698492928.0, "20755": 12698492928.0, "20760": 12698492928.0, "20765": 12698492928.0, "20770": 12698492928.0, "20775": 12698492928.0, "20780": 12698492928.0, "20785": 12698492928.0, "20790": 12698492928.0, "20795": 12698492928.0, "20800": 12698492928.0, "20805": 12698492928.0, "20810": 12698492928.0, "20815": 12698492928.0, "20820": 12698492928.0, "20825": 12698492928.0, "20830": 12698492928.0, "20835": 12698492928.0, "20840": 12698492928.0, "20845": 12698492928.0, "20850": 12698492928.0, "20855": 12698492928.0, "20860": 12698492928.0, "20865": 12698492928.0, "20870": 12698492928.0, "20875": 12698492928.0, "20880": 12698492928.0, "20885": 12698492928.0, "20890": 12698492928.0, "20895": 12698492928.0, "20900": 12698492928.0, "20905": 12698492928.0, "20910": 12698492928.0, "20915": 12698492928.0, "20920": 12698492928.0, "20925": 12698492928.0, "20930": 12698492928.0, "20935": 12698492928.0, "20940": 12698492928.0, "20945": 12698492928.0, "20950": 12698492928.0, "20955": 12698492928.0, "20960": 12698492928.0, "20965": 12698492928.0, "20970": 12698492928.0, "20975": 12698492928.0, "20980": 12698492928.0, "20985": 12698492928.0, "20990": 12698492928.0, "20995": 12698492928.0, "21000": 12698492928.0, "21005": 12698492928.0, "21010": 12698492928.0, "21015": 12698492928.0, "21020": 12698492928.0, "21025": 12698492928.0, "21030": 12698492928.0, "21035": 12698492928.0, "21040": 12698492928.0, "21045": 12698492928.0, "21050": 12698492928.0, "21055": 12698492928.0, "21060": 12698492928.0, "21065": 12698492928.0, "21070": 12698492928.0, "21075": 12698492928.0, "21080": 12698492928.0, "21085": 12698492928.0, "21090": 12698492928.0, "21095": 12698492928.0, "21100": 12698492928.0, "21105": 12698492928.0, "21110": 12698492928.0, "21115": 12698492928.0, "21120": 12698492928.0, "21125": 12698492928.0, "21130": 12698492928.0, "21135": 12698492928.0, "21140": 12698492928.0, "21145": 12698492928.0, "21150": 12698492928.0, "21155": 12698492928.0, "21160": 12698492928.0, "21165": 12698492928.0, "21170": 12698492928.0, "21175": 12698492928.0, "21180": 12698492928.0, "21185": 12698492928.0, "21190": 12698492928.0, "21195": 12698492928.0, "21200": 12698492928.0, "21205": 12698492928.0, "21210": 12698492928.0, "21215": 12698492928.0, "21220": 12698492928.0, "21225": 12698492928.0, "21230": 12698492928.0, "21235": 12698492928.0, "21240": 12698492928.0, "21245": 12698492928.0, "21250": 12698492928.0, "21255": 12698492928.0, "21260": 12698492928.0, "21265": 12698492928.0, "21270": 12698492928.0, "21275": 12698492928.0, "21280": 12698492928.0, "21285": 12698492928.0, "21290": 12698492928.0, "21295": 12698492928.0, "21300": 12698492928.0, "21305": 12698492928.0, "21310": 12698492928.0, "21315": 12698492928.0, "21320": 12698492928.0, "21325": 12698492928.0, "21330": 12698492928.0, "21335": 12698492928.0, "21340": 12698492928.0, "21345": 12698492928.0, "21350": 12698492928.0, "21355": 12698492928.0, "21360": 12698492928.0, "21365": 12698492928.0, "21370": 12698492928.0, "21375": 12698492928.0, "21380": 12698492928.0, "21385": 12698492928.0, "21390": 12698492928.0, "21395": 12698492928.0, "21400": 12698492928.0, "21405": 12698492928.0, "21410": 12698492928.0, "21415": 12698492928.0, "21420": 12698492928.0, "21425": 12698492928.0, "21430": 12698492928.0, "21435": 12698492928.0, "21440": 12698492928.0, "21445": 12698492928.0, "21450": 12698492928.0, "21455": 12698492928.0, "21460": 12698492928.0, "21465": 12698492928.0, "21470": 12698492928.0, "21475": 12698492928.0, "21480": 12698492928.0, "21485": 12698492928.0, "21490": 12698492928.0, "21495": 12698492928.0, "21500": 12698492928.0, "21505": 12698492928.0, "21510": 12698492928.0, "21515": 12698492928.0, "21520": 12698492928.0, "21525": 12698492928.0, "21530": 12698492928.0, "21535": 12698492928.0, "21540": 12698492928.0, "21545": 12698492928.0, "21550": 12698492928.0, "21555": 12698492928.0, "21560": 12698492928.0, "21565": 12698492928.0, "21570": 12698492928.0, "21575": 12698492928.0, "21580": 12698492928.0, "21585": 12698492928.0, "21590": 12698492928.0, "21595": 12698492928.0, "21600": 12698492928.0, "21605": 12698492928.0, "21610": 12698492928.0, "21615": 12698492928.0, "21620": 12698492928.0, "21625": 12698492928.0, "21630": 12698492928.0, "21635": 12698492928.0, "21640": 12698492928.0, "21645": 12698492928.0, "21650": 12698492928.0, "21655": 12698492928.0, "21660": 12698492928.0, "21665": 12698492928.0, "21670": 12698492928.0, "21675": 12698492928.0, "21680": 12698492928.0, "21685": 12698492928.0, "21690": 12698492928.0, "21695": 12698492928.0, "21700": 12698492928.0, "21705": 12698492928.0, "21710": 12698492928.0, "21715": 12698492928.0, "21720": 12698492928.0, "21725": 12698492928.0, "21730": 12698492928.0, "21735": 12698492928.0, "21740": 12698492928.0, "21745": 12698492928.0, "21750": 12698492928.0, "21755": 12698492928.0, "21760": 12698492928.0, "21765": 12698492928.0, "21770": 12698492928.0, "21775": 12698492928.0, "21780": 12698492928.0, "21785": 12698492928.0, "21790": 12698492928.0, "21795": 12698492928.0, "21800": 12698492928.0, "21805": 12698492928.0, "21810": 12698492928.0, "21815": 12698492928.0, "21820": 12698492928.0, "21825": 12698492928.0, "21830": 12698492928.0, "21835": 12698492928.0, "21840": 12698492928.0, "21845": 12698492928.0, "21850": 12698492928.0, "21855": 12698492928.0, "21860": 12698492928.0, "21865": 12698492928.0, "21870": 12698492928.0, "21875": 12698492928.0, "21880": 12698492928.0, "21885": 12698492928.0, "21890": 12698492928.0, "21895": 12698492928.0, "21900": 12698492928.0, "21905": 12698492928.0, "21910": 12698492928.0, "21915": 12698492928.0, "21920": 12698492928.0, "21925": 12698492928.0, "21930": 12698492928.0, "21935": 12698492928.0, "21940": 12698492928.0, "21945": 12698492928.0, "21950": 12698492928.0, "21955": 12698492928.0, "21960": 12698492928.0, "21965": 12698492928.0, "21970": 12698492928.0, "21975": 12698492928.0, "21980": 12698492928.0, "21985": 12698492928.0, "21990": 12698492928.0, "21995": 12698492928.0, "22000": 12698492928.0, "22005": 12698492928.0, "22010": 12698492928.0, "22015": 12698492928.0, "22020": 12698492928.0, "22025": 12698492928.0, "22030": 12698492928.0, "22035": 12698492928.0, "22040": 12698492928.0, "22045": 12698492928.0, "22050": 12698492928.0, "22055": 12698492928.0, "22060": 12698492928.0, "22065": 12698492928.0, "22070": 12698492928.0, "22075": 12698492928.0, "22080": 12698492928.0, "22085": 12698492928.0, "22090": 12698492928.0, "22095": 12698492928.0, "22100": 12698492928.0, "22105": 12698492928.0, "22110": 12698492928.0, "22115": 12698492928.0, "22120": 12698492928.0, "22125": 12698492928.0, "22130": 12698492928.0, "22135": 12698492928.0, "22140": 12698492928.0, "22145": 12698492928.0, "22150": 12698492928.0, "22155": 12698492928.0, "22160": 12698492928.0, "22165": 12698492928.0, "22170": 12698492928.0, "22175": 12698492928.0, "22180": 12698492928.0, "22185": 12698492928.0, "22190": 12698492928.0, "22195": 12698492928.0, "22200": 12698492928.0, "22205": 12698492928.0, "22210": 12698492928.0, "22215": 12698492928.0, "22220": 12698492928.0, "22225": 12698492928.0, "22230": 12698492928.0, "22235": 12698492928.0, "22240": 12698492928.0, "22245": 12698492928.0, "22250": 12698492928.0, "22255": 12698492928.0, "22260": 12698492928.0, "22265": 12698492928.0, "22270": 12698492928.0, "22275": 12698492928.0, "22280": 12698492928.0, "22285": 12698492928.0, "22290": 12698492928.0, "22295": 12698492928.0, "22300": 12698492928.0, "22305": 12698492928.0, "22310": 12698492928.0, "22315": 12698492928.0, "22320": 12698492928.0, "22325": 12698492928.0, "22330": 12698492928.0, "22335": 12698492928.0, "22340": 12698492928.0, "22345": 12698492928.0, "22350": 12698492928.0, "22355": 12698492928.0, "22360": 12698492928.0, "22365": 12698492928.0, "22370": 12698492928.0, "22375": 12698492928.0, "22380": 12698492928.0, "22385": 12698492928.0, "22390": 12698492928.0, "22395": 12698492928.0, "22400": 12698492928.0, "22405": 12698492928.0, "22410": 12698492928.0, "22415": 12698492928.0, "22420": 12698492928.0, "22425": 12698492928.0, "22430": 12698492928.0, "22435": 12698492928.0, "22440": 12698492928.0, "22445": 12698492928.0, "22450": 12698492928.0, "22455": 12698492928.0, "22460": 12698492928.0, "22465": 12698492928.0, "22470": 12698492928.0, "22475": 12698492928.0, "22480": 12698492928.0, "22485": 12698492928.0, "22490": 12698492928.0, "22495": 12698492928.0, "22500": 12698492928.0, "22505": 12698492928.0, "22510": 12698492928.0, "22515": 12698492928.0, "22520": 12698492928.0, "22525": 12698492928.0, "22530": 12698492928.0, "22535": 12698492928.0, "22540": 12698492928.0, "22545": 12698492928.0, "22550": 12698492928.0, "22555": 12698492928.0, "22560": 12698492928.0, "22565": 12698492928.0, "22570": 12698492928.0, "22575": 12698492928.0, "22580": 12698492928.0, "22585": 12698492928.0, "22590": 12698492928.0, "22595": 12698492928.0, "22600": 12698492928.0, "22605": 12698492928.0, "22610": 12698492928.0, "22615": 12698492928.0, "22620": 12698492928.0, "22625": 12698492928.0, "22630": 12698492928.0, "22635": 12698492928.0, "22640": 12698492928.0, "22645": 12698492928.0, "22650": 12698492928.0, "22655": 12698492928.0, "22660": 12698492928.0, "22665": 12698492928.0, "22670": 12698492928.0, "22675": 12698492928.0, "22680": 12698492928.0, "22685": 12698492928.0, "22690": 12698492928.0, "22695": 12698492928.0, "22700": 12698492928.0, "22705": 12698492928.0, "22710": 12698492928.0, "22715": 12698492928.0, "22720": 12698492928.0, "22725": 12698492928.0, "22730": 12698492928.0, "22735": 12698492928.0, "22740": 12698492928.0, "22745": 12698492928.0, "22750": 12698492928.0, "22755": 12698492928.0, "22760": 12698492928.0, "22765": 12698492928.0, "22770": 12698492928.0, "22775": 12698492928.0, "22780": 12698492928.0, "22785": 12698492928.0, "22790": 12698492928.0, "22795": 12698492928.0, "22800": 12698492928.0, "22805": 12698492928.0, "22810": 12698492928.0, "22815": 12698492928.0, "22820": 12698492928.0, "22825": 12698492928.0, "22830": 12698492928.0, "22835": 12698492928.0, "22840": 12698492928.0, "22845": 12698492928.0, "22850": 12698492928.0, "22855": 12698492928.0, "22860": 12698492928.0, "22865": 12698492928.0, "22870": 12698492928.0, "22875": 12698492928.0, "22880": 12698492928.0, "22885": 12698492928.0, "22890": 12698492928.0, "22895": 12698492928.0, "22900": 12698492928.0, "22905": 12698492928.0, "22910": 12698492928.0, "22915": 12698492928.0, "22920": 12698492928.0, "22925": 12698492928.0, "22930": 12698492928.0, "22935": 12698492928.0, "22940": 12698492928.0, "22945": 12698492928.0, "22950": 12698492928.0, "22955": 12698492928.0, "22960": 12698492928.0, "22965": 12698492928.0, "22970": 12698492928.0, "22975": 12698492928.0, "22980": 12698492928.0, "22985": 12698492928.0, "22990": 12698492928.0, "22995": 12698492928.0, "23000": 12698492928.0, "23005": 12698492928.0, "23010": 12698492928.0, "23015": 12698492928.0, "23020": 12698492928.0, "23025": 12698492928.0, "23030": 12698492928.0, "23035": 12698492928.0, "23040": 12698492928.0, "23045": 12698492928.0, "23050": 12698492928.0, "23055": 12698492928.0, "23060": 12698492928.0, "23065": 12698492928.0, "23070": 12698492928.0, "23075": 12698492928.0, "23080": 12698492928.0, "23085": 12698492928.0, "23090": 12698492928.0, "23095": 12698492928.0, "23100": 12698492928.0, "23105": 12698492928.0, "23110": 12698492928.0, "23115": 12698492928.0, "23120": 12698492928.0, "23125": 12698492928.0, "23130": 12698492928.0, "23135": 12698492928.0, "23140": 12698492928.0, "23145": 12698492928.0, "23150": 12698492928.0, "23155": 12698492928.0, "23160": 12698492928.0, "23165": 12698492928.0, "23170": 12698492928.0, "23175": 12698492928.0, "23180": 12698492928.0, "23185": 12698492928.0, "23190": 12698492928.0, "23195": 12698492928.0, "23200": 12698492928.0, "23205": 12698492928.0, "23210": 12698492928.0, "23215": 12698492928.0, "23220": 12698492928.0, "23225": 12698492928.0, "23230": 12698492928.0, "23235": 12698492928.0, "23240": 12698492928.0, "23245": 12698492928.0, "23250": 12698492928.0, "23255": 12698492928.0, "23260": 12698492928.0, "23265": 12698492928.0, "23270": 12698492928.0, "23275": 12698492928.0, "23280": 12698492928.0, "23285": 12698492928.0, "23290": 12698492928.0, "23295": 12698492928.0, "23300": 12698492928.0, "23305": 12698492928.0, "23310": 12698492928.0, "23315": 12698492928.0, "23320": 12698492928.0, "23325": 12698492928.0, "23330": 12698492928.0, "23335": 12698492928.0, "23340": 12698492928.0, "23345": 12698492928.0, "23350": 12698492928.0, "23355": 12698492928.0, "23360": 12698492928.0, "23365": 12698492928.0, "23370": 12698492928.0, "23375": 12698492928.0, "23380": 12698492928.0, "23385": 12698492928.0, "23390": 12698492928.0, "23395": 12698492928.0, "23400": 12698492928.0, "23405": 12698492928.0, "23410": 12698492928.0, "23415": 12698492928.0, "23420": 12698492928.0, "23425": 12698492928.0, "23430": 12698492928.0, "23435": 12698492928.0, "23440": 12698492928.0, "23445": 12698492928.0, "23450": 12698492928.0, "23455": 12698492928.0, "23460": 12698492928.0, "23465": 12698492928.0, "23470": 12698492928.0, "23475": 12698492928.0, "23480": 12698492928.0, "23485": 12698492928.0, "23490": 12698492928.0, "23495": 12698492928.0, "23500": 12698492928.0, "23505": 12698492928.0, "23510": 12698492928.0, "23515": 12698492928.0, "23520": 12698492928.0, "23525": 12698492928.0, "23530": 12698492928.0, "23535": 12698492928.0, "23540": 12698492928.0, "23545": 12698492928.0, "23550": 12698492928.0, "23555": 12698492928.0, "23560": 12698492928.0, "23565": 12698492928.0, "23570": 12698492928.0, "23575": 12698492928.0, "23580": 12698492928.0, "23585": 12698492928.0, "23590": 12698492928.0, "23595": 12698492928.0, "23600": 12698492928.0, "23605": 12698492928.0, "23610": 12698492928.0, "23615": 12698492928.0, "23620": 12698492928.0, "23625": 12698492928.0, "23630": 12698492928.0, "23635": 12698492928.0, "23640": 12698492928.0, "23645": 12698492928.0, "23650": 12698492928.0, "23655": 12698492928.0, "23660": 12698492928.0, "23665": 12698492928.0, "23670": 12698492928.0, "23675": 12698492928.0, "23680": 12698492928.0, "23685": 12698492928.0, "23690": 12698492928.0, "23695": 12698492928.0, "23700": 12698492928.0, "23705": 12698492928.0, "23710": 12698492928.0, "23715": 12698492928.0, "23720": 12698492928.0, "23725": 12698492928.0, "23730": 12698492928.0, "23735": 12698492928.0, "23740": 12698492928.0, "23745": 12698492928.0, "23750": 12698492928.0, "23755": 12698492928.0, "23760": 12698492928.0, "23765": 12698492928.0, "23770": 12698492928.0, "23775": 12698492928.0, "23780": 12698492928.0, "23785": 12698492928.0, "23790": 12698492928.0, "23795": 12698492928.0, "23800": 12698492928.0, "23805": 12698492928.0, "23810": 12698492928.0, "23815": 12698492928.0, "23820": 12698492928.0, "23825": 12698492928.0, "23830": 12698492928.0, "23835": 12698492928.0, "23840": 12698492928.0, "23845": 12698492928.0, "23850": 12698492928.0, "23855": 12698492928.0, "23860": 12698492928.0, "23865": 12698492928.0, "23870": 12698492928.0, "23875": 12698492928.0, "23880": 12698492928.0, "23885": 12698492928.0, "23890": 12698492928.0, "23895": 12698492928.0, "23900": 12698492928.0, "23905": 12698492928.0, "23910": 12698492928.0, "23915": 12698492928.0, "23920": 12698492928.0, "23925": 12698492928.0, "23930": 12698492928.0, "23935": 12698492928.0, "23940": 12698492928.0, "23945": 12698492928.0, "23950": 12698492928.0, "23955": 12698492928.0, "23960": 12698492928.0, "23965": 12698492928.0, "23970": 12698492928.0, "23975": 12698492928.0, "23980": 12698492928.0, "23985": 12698492928.0, "23990": 12698492928.0, "23995": 12698492928.0, "24000": 12698492928.0, "24005": 12698492928.0, "24010": 12698492928.0, "24015": 12698492928.0, "24020": 12698492928.0, "24025": 12698492928.0, "24030": 12698492928.0, "24035": 12698492928.0, "24040": 12698492928.0, "24045": 12698492928.0, "24050": 12698492928.0, "24055": 12698492928.0, "24060": 12698492928.0, "24065": 12698492928.0, "24070": 12698492928.0, "24075": 12698492928.0, "24080": 12698492928.0, "24085": 12698492928.0, "24090": 12698492928.0, "24095": 12698492928.0, "24100": 12698492928.0, "24105": 12698492928.0, "24110": 12698492928.0, "24115": 12698492928.0, "24120": 12698492928.0, "24125": 12698492928.0, "24130": 12698492928.0, "24135": 12698492928.0, "24140": 12698492928.0, "24145": 12698492928.0, "24150": 12698492928.0, "24155": 12698492928.0, "24160": 12698492928.0, "24165": 12698492928.0, "24170": 12698492928.0, "24175": 12698492928.0, "24180": 12698492928.0, "24185": 12698492928.0, "24190": 12698492928.0, "24195": 12698492928.0, "24200": 12698492928.0, "24205": 12698492928.0, "24210": 12698492928.0, "24215": 12698492928.0, "24220": 12698492928.0, "24225": 12698492928.0, "24230": 12698492928.0, "24235": 12698492928.0, "24240": 12698492928.0, "24245": 12698492928.0, "24250": 12698492928.0, "24255": 12698492928.0, "24260": 12698492928.0, "24265": 12698492928.0, "24270": 12698492928.0, "24275": 12698492928.0, "24280": 12698492928.0, "24285": 12698492928.0, "24290": 12698492928.0, "24295": 12698492928.0, "24300": 12698492928.0, "24305": 12698492928.0, "24310": 12698492928.0, "24315": 12698492928.0, "24320": 12698492928.0, "24325": 12698492928.0, "24330": 12698492928.0, "24335": 12698492928.0, "24340": 12698492928.0, "24345": 12698492928.0, "24350": 12698492928.0, "24355": 12698492928.0, "24360": 12698492928.0, "24365": 12698492928.0, "24370": 12698492928.0, "24375": 12698492928.0, "24380": 12698492928.0, "24385": 12698492928.0, "24390": 12698492928.0, "24395": 12698492928.0, "24400": 12698492928.0, "24405": 12698492928.0, "24410": 12698492928.0, "24415": 12698492928.0, "24420": 12698492928.0, "24425": 12698492928.0, "24430": 12698492928.0, "24435": 12698492928.0, "24440": 12698492928.0, "24445": 12698492928.0, "24450": 12698492928.0, "24455": 12698492928.0, "24460": 12698492928.0, "24465": 12698492928.0, "24470": 12698492928.0, "24475": 12698492928.0, "24480": 12698492928.0, "24485": 12698492928.0, "24490": 12698492928.0, "24495": 12698492928.0, "24500": 12698492928.0, "24505": 12698492928.0, "24510": 12698492928.0, "24515": 12698492928.0, "24520": 12698492928.0, "24525": 12698492928.0, "24530": 12698492928.0, "24535": 12698492928.0, "24540": 12698492928.0, "24545": 12698492928.0, "24550": 12698492928.0, "24555": 12698492928.0, "24560": 12698492928.0, "24565": 12698492928.0, "24570": 12698492928.0, "24575": 12698492928.0, "24580": 12698492928.0, "24585": 12698492928.0, "24590": 12698492928.0, "24595": 12698492928.0, "24600": 12698492928.0, "24605": 12698492928.0, "24610": 12698492928.0, "24615": 12698492928.0, "24620": 12698492928.0, "24625": 12698492928.0, "24630": 12698492928.0, "24635": 12698492928.0, "24640": 12698492928.0, "24645": 12698492928.0, "24650": 12698492928.0, "24655": 12698492928.0, "24660": 12698492928.0, "24665": 12698492928.0, "24670": 12698492928.0, "24675": 12698492928.0, "24680": 12698492928.0, "24685": 12698492928.0, "24690": 12698492928.0, "24695": 12698492928.0, "24700": 12698492928.0, "24705": 12698492928.0, "24710": 12698492928.0, "24715": 12698492928.0, "24720": 12698492928.0, "24725": 12698492928.0, "24730": 12698492928.0, "24735": 12698492928.0, "24740": 12698492928.0, "24745": 12698492928.0, "24750": 12698492928.0, "24755": 12698492928.0, "24760": 12698492928.0, "24765": 12698492928.0, "24770": 12698492928.0, "24775": 12698492928.0, "24780": 12698492928.0, "24785": 12698492928.0, "24790": 12698492928.0, "24795": 12698492928.0, "24800": 12698492928.0, "24805": 12698492928.0, "24810": 12698492928.0, "24815": 12698492928.0, "24820": 12698492928.0, "24825": 12698492928.0, "24830": 12698492928.0, "24835": 12698492928.0, "24840": 12698492928.0, "24845": 12698492928.0, "24850": 12698492928.0, "24855": 12698492928.0, "24860": 12698492928.0, "24865": 12698492928.0, "24870": 12698492928.0, "24875": 12698492928.0, "24880": 12698492928.0, "24885": 12698492928.0, "24890": 12698492928.0, "24895": 12698492928.0, "24900": 12698492928.0, "24905": 12698492928.0, "24910": 12698492928.0, "24915": 12698492928.0, "24920": 12698492928.0, "24925": 12698492928.0, "24930": 12698492928.0, "24935": 12698492928.0, "24940": 12698492928.0, "24945": 12698492928.0, "24950": 12698492928.0, "24955": 12698492928.0, "24960": 12698492928.0, "24965": 12698492928.0, "24970": 12698492928.0, "24975": 12698492928.0, "24980": 12698492928.0, "24985": 12698492928.0, "24990": 12698492928.0, "24995": 12698492928.0, "25000": 12698492928.0, "25005": 12698492928.0, "25010": 12698492928.0, "25015": 12698492928.0, "25020": 12698492928.0, "25025": 12698492928.0, "25030": 12698492928.0, "25035": 12698492928.0, "25040": 12698492928.0, "25045": 12698492928.0, "25050": 12698492928.0, "25055": 12698492928.0, "25060": 12698492928.0, "25065": 12698492928.0, "25070": 12698492928.0, "25075": 12698492928.0, "25080": 12698492928.0, "25085": 12698492928.0, "25090": 12698492928.0, "25095": 12698492928.0, "25100": 12698492928.0, "25105": 12698492928.0, "25110": 12698492928.0, "25115": 12698492928.0, "25120": 12698492928.0, "25125": 12698492928.0, "25130": 12698492928.0, "25135": 12698492928.0, "25140": 12698492928.0, "25145": 12698492928.0, "25150": 12698492928.0, "25155": 12698492928.0, "25160": 12698492928.0, "25165": 12698492928.0, "25170": 12698492928.0, "25175": 12698492928.0, "25180": 12698492928.0, "25185": 12698492928.0, "25190": 12698492928.0, "25195": 12698492928.0, "25200": 12698492928.0, "25205": 12698492928.0, "25210": 12698492928.0, "25215": 12698492928.0, "25220": 12698492928.0, "25225": 12698492928.0, "25230": 12698492928.0, "25235": 12698492928.0, "25240": 12698492928.0, "25245": 12698492928.0, "25250": 12698492928.0, "25255": 12698492928.0, "25260": 12698492928.0, "25265": 12698492928.0, "25270": 12698492928.0, "25275": 12698492928.0, "25280": 12698492928.0, "25285": 12698492928.0, "25290": 12698492928.0, "25295": 12698492928.0, "25300": 12698492928.0, "25305": 12698492928.0, "25310": 12698492928.0, "25315": 12698492928.0, "25320": 12698492928.0, "25325": 12698492928.0, "25330": 12698492928.0, "25335": 12698492928.0, "25340": 12698492928.0, "25345": 12698492928.0, "25350": 12698492928.0, "25355": 12698492928.0, "25360": 12698492928.0, "25365": 12698492928.0, "25370": 12698492928.0, "25375": 12698492928.0, "25380": 12698492928.0, "25385": 12698492928.0, "25390": 12698492928.0, "25395": 12698492928.0, "25400": 12698492928.0, "25405": 12698492928.0, "25410": 12698492928.0, "25415": 12698492928.0, "25420": 12698492928.0, "25425": 12698492928.0, "25430": 12698492928.0, "25435": 12698492928.0, "25440": 12698492928.0, "25445": 12698492928.0, "25450": 12698492928.0, "25455": 12698492928.0, "25460": 12698492928.0, "25465": 12698492928.0, "25470": 12698492928.0, "25475": 12698492928.0, "25480": 12698492928.0, "25485": 12698492928.0, "25490": 12698492928.0, "25495": 12698492928.0, "25500": 12698492928.0, "25505": 12698492928.0, "25510": 12698492928.0, "25515": 12698492928.0, "25520": 12698492928.0, "25525": 12698492928.0, "25530": 12698492928.0, "25535": 12698492928.0, "25540": 12698492928.0, "25545": 12698492928.0, "25550": 12698492928.0, "25555": 12698492928.0, "25560": 12698492928.0, "25565": 12698492928.0, "25570": 12698492928.0, "25575": 12698492928.0, "25580": 12698492928.0, "25585": 12698492928.0, "25590": 12698492928.0, "25595": 12698492928.0, "25600": 12698492928.0, "25605": 12698492928.0, "25610": 12698492928.0, "25615": 12698492928.0, "25620": 12698492928.0, "25625": 12698492928.0, "25630": 12698492928.0, "25635": 12698492928.0, "25640": 12698492928.0, "25645": 12698492928.0, "25650": 12698492928.0, "25655": 12698492928.0, "25660": 12698492928.0, "25665": 12698492928.0, "25670": 12698492928.0, "25675": 12698492928.0, "25680": 12698492928.0, "25685": 12698492928.0, "25690": 12698492928.0, "25695": 12698492928.0, "25700": 12698492928.0, "25705": 12698492928.0, "25710": 12698492928.0, "25715": 12698492928.0, "25720": 12698492928.0, "25725": 12698492928.0, "25730": 12698492928.0, "25735": 12698492928.0, "25740": 12698492928.0, "25745": 12698492928.0, "25750": 12698492928.0, "25755": 12698492928.0, "25760": 12698492928.0, "25765": 12698492928.0, "25770": 12698492928.0, "25775": 12698492928.0, "25780": 12698492928.0, "25785": 12698492928.0, "25790": 12698492928.0, "25795": 12698492928.0, "25800": 12698492928.0, "25805": 12698492928.0, "25810": 12698492928.0, "25815": 12698492928.0, "25820": 12698492928.0, "25825": 12698492928.0, "25830": 12698492928.0, "25835": 12698492928.0, "25840": 12698492928.0, "25845": 12698492928.0, "25850": 12698492928.0, "25855": 12698492928.0, "25860": 12698492928.0, "25865": 12698492928.0, "25870": 12698492928.0, "25875": 12698492928.0, "25880": 12698492928.0, "25885": 12698492928.0, "25890": 12698492928.0, "25895": 12698492928.0, "25900": 12698492928.0, "25905": 12698492928.0, "25910": 12698492928.0, "25915": 12698492928.0, "25920": 12698492928.0, "25925": 12698492928.0, "25930": 12698492928.0, "25935": 12698492928.0, "25940": 12698492928.0, "25945": 12698492928.0, "25950": 12698492928.0, "25955": 12698492928.0, "25960": 12698492928.0, "25965": 12698492928.0, "25970": 12698492928.0, "25975": 12698492928.0, "25980": 12698492928.0, "25985": 12698492928.0, "25990": 12698492928.0, "25995": 12698492928.0, "26000": 12698492928.0, "26005": 12698492928.0, "26010": 12698492928.0, "26015": 12698492928.0, "26020": 12698492928.0, "26025": 12698492928.0, "26030": 12698492928.0, "26035": 12698492928.0, "26040": 12698492928.0, "26045": 12698492928.0, "26050": 12698492928.0, "26055": 12698492928.0, "26060": 12698492928.0, "26065": 12698492928.0, "26070": 12698492928.0, "26075": 12698492928.0, "26080": 12698492928.0, "26085": 12698492928.0, "26090": 12698492928.0, "26095": 12698492928.0, "26100": 12698492928.0, "26105": 12698492928.0, "26110": 12698492928.0, "26115": 12698492928.0, "26120": 12698492928.0, "26125": 12698492928.0, "26130": 12698492928.0, "26135": 12698492928.0, "26140": 12698492928.0, "26145": 12698492928.0, "26150": 12698492928.0, "26155": 12698492928.0, "26160": 12698492928.0, "26165": 12698492928.0, "26170": 12698492928.0, "26175": 12698492928.0, "26180": 12698492928.0, "26185": 12698492928.0, "26190": 12698492928.0, "26195": 12698492928.0, "26200": 12698492928.0, "26205": 12698492928.0, "26210": 12698492928.0, "26215": 12698492928.0, "26220": 12698492928.0, "26225": 12698492928.0, "26230": 12698492928.0, "26235": 12698492928.0, "26240": 12698492928.0, "26245": 12698492928.0, "26250": 12698492928.0, "26255": 12698492928.0, "26260": 12698492928.0, "26265": 12698492928.0, "26270": 12698492928.0, "26275": 12698492928.0, "26280": 12698492928.0, "26285": 12698492928.0, "26290": 12698492928.0, "26295": 12698492928.0, "26300": 12698492928.0, "26305": 12698492928.0, "26310": 12698492928.0, "26315": 12698492928.0, "26320": 12698492928.0, "26325": 12698492928.0, "26330": 12698492928.0, "26335": 12698492928.0, "26340": 12698492928.0, "26345": 12698492928.0, "26350": 12698492928.0, "26355": 12698492928.0, "26360": 12698492928.0, "26365": 12698492928.0, "26370": 12698492928.0, "26375": 12698492928.0, "26380": 12698492928.0, "26385": 12698492928.0, "26390": 12698492928.0, "26395": 12698492928.0, "26400": 12698492928.0, "26405": 12698492928.0, "26410": 12698492928.0, "26415": 12698492928.0, "26420": 12698492928.0, "26425": 12698492928.0, "26430": 12698492928.0, "26435": 12698492928.0, "26440": 12698492928.0, "26445": 12698492928.0, "26450": 12698492928.0, "26455": 12698492928.0, "26460": 12698492928.0, "26465": 12698492928.0, "26470": 12698492928.0, "26475": 12698492928.0, "26480": 12698492928.0, "26485": 12698492928.0, "26490": 12698492928.0, "26495": 12698492928.0, "26500": 12698492928.0, "26505": 12698492928.0, "26510": 12698492928.0, "26515": 12698492928.0, "26520": 12698492928.0, "26525": 12698492928.0, "26530": 12698492928.0, "26535": 12698492928.0, "26540": 12698492928.0, "26545": 12698492928.0, "26550": 12698492928.0, "26555": 12698492928.0, "26560": 12698492928.0, "26565": 12698492928.0, "26570": 12698492928.0, "26575": 12698492928.0, "26580": 12698492928.0, "26585": 12698492928.0, "26590": 12698492928.0, "26595": 12698492928.0, "26600": 12698492928.0, "26605": 12698492928.0, "26610": 12698492928.0, "26615": 12698492928.0, "26620": 12698492928.0, "26625": 12698492928.0, "26630": 12698492928.0, "26635": 12698492928.0, "26640": 12698492928.0, "26645": 12698492928.0, "26650": 12698492928.0, "26655": 12698492928.0, "26660": 12698492928.0, "26665": 12698492928.0, "26670": 12698492928.0, "26675": 12698492928.0, "26680": 12698492928.0, "26685": 12698492928.0, "26690": 12698492928.0, "26695": 12698492928.0, "26700": 12698492928.0, "26705": 12698492928.0, "26710": 12698492928.0, "26715": 12698492928.0, "26720": 12698492928.0, "26725": 12698492928.0, "26730": 12698492928.0, "26735": 12698492928.0, "26740": 12698492928.0, "26745": 12698492928.0, "26750": 12698492928.0, "26755": 12698492928.0, "26760": 12698492928.0, "26765": 12698492928.0, "26770": 12698492928.0, "26775": 12698492928.0, "26780": 12698492928.0, "26785": 12698492928.0, "26790": 12698492928.0, "26795": 12698492928.0, "26800": 12698492928.0, "26805": 12698492928.0, "26810": 12698492928.0, "26815": 12698492928.0, "26820": 12698492928.0, "26825": 12698492928.0, "26830": 12698492928.0, "26835": 12698492928.0, "26840": 12698492928.0, "26845": 12698492928.0, "26850": 12698492928.0, "26855": 12698492928.0, "26860": 12698492928.0, "26865": 12698492928.0, "26870": 12698492928.0, "26875": 12698492928.0, "26880": 12698492928.0, "26885": 12698492928.0, "26890": 12698492928.0, "26895": 12698492928.0, "26900": 12698492928.0, "26905": 12698492928.0, "26910": 12698492928.0, "26915": 12698492928.0, "26920": 12698492928.0, "26925": 12698492928.0, "26930": 12698492928.0, "26935": 12698492928.0, "26940": 12698492928.0, "26945": 12698492928.0, "26950": 12698492928.0, "26955": 12698492928.0, "26960": 12698492928.0, "26965": 12698492928.0, "26970": 12698492928.0, "26975": 12698492928.0, "26980": 12698492928.0, "26985": 12698492928.0, "26990": 12698492928.0, "26995": 12698492928.0, "27000": 12698492928.0, "27005": 12698492928.0, "27010": 12698492928.0, "27015": 12698492928.0, "27020": 12698492928.0, "27025": 12698492928.0, "27030": 12698492928.0, "27035": 12698492928.0, "27040": 12698492928.0, "27045": 12698492928.0, "27050": 12698492928.0, "27055": 12698492928.0, "27060": 12698492928.0, "27065": 12698492928.0, "27070": 12698492928.0, "27075": 12698492928.0, "27080": 12698492928.0, "27085": 12698492928.0, "27090": 12698492928.0, "27095": 12698492928.0, "27100": 12698492928.0, "27105": 12698492928.0, "27110": 12698492928.0, "27115": 12698492928.0, "27120": 12698492928.0, "27125": 12698492928.0, "27130": 12698492928.0, "27135": 12698492928.0, "27140": 12698492928.0, "27145": 12698492928.0, "27150": 12698492928.0, "27155": 12698492928.0, "27160": 12698492928.0, "27165": 12698492928.0, "27170": 12698492928.0, "27175": 12698492928.0, "27180": 12698492928.0, "27185": 12698492928.0, "27190": 12698492928.0, "27195": 12698492928.0, "27200": 12698492928.0, "27205": 12698492928.0, "27210": 12698492928.0, "27215": 12698492928.0, "27220": 12698492928.0, "27225": 12698492928.0, "27230": 12698492928.0, "27235": 12698492928.0, "27240": 12698492928.0, "27245": 12698492928.0, "27250": 12698492928.0, "27255": 12698492928.0, "27260": 12698492928.0, "27265": 12698492928.0, "27270": 12698492928.0, "27275": 12698492928.0, "27280": 12698492928.0, "27285": 12698492928.0, "27290": 12698492928.0, "27295": 12698492928.0, "27300": 12698492928.0, "27305": 12698492928.0, "27310": 12698492928.0, "27315": 12698492928.0, "27320": 12698492928.0, "27325": 12698492928.0, "27330": 12698492928.0, "27335": 12698492928.0, "27340": 12698492928.0, "27345": 12698492928.0, "27350": 12698492928.0, "27355": 12698492928.0, "27360": 12698492928.0, "27365": 12698492928.0, "27370": 12698492928.0, "27375": 12698492928.0, "27380": 12698492928.0, "27385": 12698492928.0, "27390": 12698492928.0, "27395": 12698492928.0, "27400": 12698492928.0, "27405": 12698492928.0, "27410": 12698492928.0, "27415": 12698492928.0, "27420": 12698492928.0, "27425": 12698492928.0, "27430": 12698492928.0, "27435": 12698492928.0, "27440": 12698492928.0, "27445": 12698492928.0, "27450": 12698492928.0, "27455": 12698492928.0, "27460": 12698492928.0, "27465": 12698492928.0, "27470": 12698492928.0, "27475": 12698492928.0, "27480": 12698492928.0, "27485": 12698492928.0, "27490": 12698492928.0, "27495": 12698492928.0, "27500": 12698492928.0, "27505": 12698492928.0, "27510": 12698492928.0, "27515": 12698492928.0, "27520": 12698492928.0, "27525": 12698492928.0, "27530": 12698492928.0, "27535": 12698492928.0, "27540": 12698492928.0, "27545": 12698492928.0, "27550": 12698492928.0, "27555": 12698492928.0, "27560": 12698492928.0, "27565": 12698492928.0, "27570": 12698492928.0, "27575": 12698492928.0, "27580": 12698492928.0, "27585": 12698492928.0, "27590": 12698492928.0, "27595": 12698492928.0, "27600": 12698492928.0, "27605": 12698492928.0, "27610": 12698492928.0, "27615": 12698492928.0, "27620": 12698492928.0, "27625": 12698492928.0, "27630": 12698492928.0, "27635": 12698492928.0, "27640": 12698492928.0, "27645": 12698492928.0, "27650": 12698492928.0, "27655": 12698492928.0, "27660": 12698492928.0, "27665": 12698492928.0, "27670": 12698492928.0, "27675": 12698492928.0, "27680": 12698492928.0, "27685": 12698492928.0, "27690": 12698492928.0, "27695": 12698492928.0, "27700": 12698492928.0, "27705": 12698492928.0, "27710": 12698492928.0, "27715": 12698492928.0, "27720": 12698492928.0, "27725": 12698492928.0, "27730": 12698492928.0, "27735": 12698492928.0, "27740": 12698492928.0, "27745": 12698492928.0, "27750": 12698492928.0, "27755": 12698492928.0, "27760": 12698492928.0, "27765": 12698492928.0, "27770": 12698492928.0, "27775": 12698492928.0, "27780": 12698492928.0, "27785": 12698492928.0, "27790": 12698492928.0, "27795": 12698492928.0, "27800": 12698492928.0, "27805": 12698492928.0, "27810": 12698492928.0, "27815": 12698492928.0, "27820": 12698492928.0, "27825": 12698492928.0, "27830": 12698492928.0, "27835": 12698492928.0, "27840": 12698492928.0, "27845": 12698492928.0, "27850": 12698492928.0, "27855": 12698492928.0, "27860": 12698492928.0, "27865": 12698492928.0, "27870": 12698492928.0, "27875": 12698492928.0, "27880": 12698492928.0, "27885": 12698492928.0, "27890": 12698492928.0, "27895": 12698492928.0, "27900": 12698492928.0, "27905": 12698492928.0, "27910": 12698492928.0, "27915": 12698492928.0, "27920": 12698492928.0, "27925": 12698492928.0, "27930": 12698492928.0, "27935": 12698492928.0, "27940": 12698492928.0, "27945": 12698492928.0, "27950": 12698492928.0, "27955": 12698492928.0, "27960": 12698492928.0, "27965": 12698492928.0, "27970": 12698492928.0, "27975": 12698492928.0, "27980": 12698492928.0, "27985": 12698492928.0, "27990": 12698492928.0, "27995": 12698492928.0, "28000": 12698492928.0, "28005": 12698492928.0, "28010": 12698492928.0, "28015": 12698492928.0, "28020": 12698492928.0, "28025": 12698492928.0, "28030": 12698492928.0, "28035": 12698492928.0, "28040": 12698492928.0, "28045": 12698492928.0, "28050": 12698492928.0, "28055": 12698492928.0, "28060": 12698492928.0, "28065": 12698492928.0, "28070": 12698492928.0, "28075": 12698492928.0, "28080": 12698492928.0, "28085": 12698492928.0, "28090": 12698492928.0, "28095": 12698492928.0, "28100": 12698492928.0, "28105": 12698492928.0, "28110": 12698492928.0, "28115": 12698492928.0, "28120": 12698492928.0, "28125": 12698492928.0, "28130": 12698492928.0, "28135": 12698492928.0, "28140": 12698492928.0, "28145": 12698492928.0, "28150": 12698492928.0, "28155": 12698492928.0, "28160": 12698492928.0, "28165": 12698492928.0, "28170": 12698492928.0, "28175": 12698492928.0, "28180": 12698492928.0, "28185": 12698492928.0, "28190": 12698492928.0, "28195": 12698492928.0, "28200": 12698492928.0, "28205": 12698492928.0, "28210": 12698492928.0, "28215": 12698492928.0, "28220": 12698492928.0, "28225": 12698492928.0, "28230": 12698492928.0, "28235": 12698492928.0, "28240": 12698492928.0, "28245": 12698492928.0, "28250": 12698492928.0, "28255": 12698492928.0, "28260": 12698492928.0, "28265": 12698492928.0, "28270": 12698492928.0, "28275": 12698492928.0, "28280": 12698492928.0, "28285": 12698492928.0, "28290": 12698492928.0, "28295": 12698492928.0, "28300": 12698492928.0, "28305": 12698492928.0, "28310": 12698492928.0, "28315": 12698492928.0, "28320": 12698492928.0, "28325": 12698492928.0, "28330": 12698492928.0, "28335": 12698492928.0, "28340": 12698492928.0, "28345": 12698492928.0, "28350": 12698492928.0, "28355": 12698492928.0, "28360": 12698492928.0, "28365": 12698492928.0, "28370": 12698492928.0, "28375": 12698492928.0, "28380": 12698492928.0, "28385": 12698492928.0, "28390": 12698492928.0, "28395": 12698492928.0, "28400": 12698492928.0, "28405": 12698492928.0, "28410": 12698492928.0, "28415": 12698492928.0, "28420": 12698492928.0, "28425": 12698492928.0, "28430": 12698492928.0, "28435": 12698492928.0, "28440": 12698492928.0, "28445": 12698492928.0, "28450": 12698492928.0, "28455": 12698492928.0, "28460": 12698492928.0, "28465": 12698492928.0, "28470": 12698492928.0, "28475": 12698492928.0, "28480": 12698492928.0, "28485": 12698492928.0, "28490": 12698492928.0, "28495": 12698492928.0, "28500": 12698492928.0, "28505": 12698492928.0, "28510": 12698492928.0, "28515": 12698492928.0, "28520": 12698492928.0, "28525": 12698492928.0, "28530": 12698492928.0, "28535": 12698492928.0, "28540": 12698492928.0, "28545": 12698492928.0, "28550": 12698492928.0, "28555": 12698492928.0, "28560": 12698492928.0, "28565": 12698492928.0, "28570": 12698492928.0, "28575": 12698492928.0, "28580": 12698492928.0, "28585": 12698492928.0, "28590": 12698492928.0, "28595": 12698492928.0, "28600": 12698492928.0, "28605": 12698492928.0, "28610": 12698492928.0, "28615": 12698492928.0, "28620": 12698492928.0, "28625": 12698492928.0, "28630": 12698492928.0, "28635": 12698492928.0, "28640": 12698492928.0, "28645": 12698492928.0, "28650": 12698492928.0, "28655": 12698492928.0, "28660": 12698492928.0, "28665": 12698492928.0, "28670": 12698492928.0, "28675": 12698492928.0, "28680": 12698492928.0, "28685": 12698492928.0, "28690": 12698492928.0, "28695": 12698492928.0, "28700": 12698492928.0, "28705": 12698492928.0, "28710": 12698492928.0, "28715": 12698492928.0, "28720": 12698492928.0, "28725": 12698492928.0, "28730": 12698492928.0, "28735": 12698492928.0, "28740": 12698492928.0, "28745": 12698492928.0, "28750": 12698492928.0, "28755": 12698492928.0, "28760": 12698492928.0, "28765": 12698492928.0, "28770": 12698492928.0, "28775": 12698492928.0, "28780": 12698492928.0, "28785": 12698492928.0, "28790": 12698492928.0, "28795": 12698492928.0, "28800": 12698492928.0, "28805": 12698492928.0, "28810": 12698492928.0, "28815": 12698492928.0, "28820": 12698492928.0, "28825": 12698492928.0, "28830": 12698492928.0, "28835": 12698492928.0, "28840": 12698492928.0, "28845": 12698492928.0, "28850": 12698492928.0, "28855": 12698492928.0, "28860": 12698492928.0, "28865": 12698492928.0, "28870": 12698492928.0, "28875": 12698492928.0, "28880": 12698492928.0, "28885": 12698492928.0, "28890": 12698492928.0, "28895": 12698492928.0, "28900": 12698492928.0, "28905": 12698492928.0, "28910": 12698492928.0, "28915": 12698492928.0, "28920": 12698492928.0, "28925": 12698492928.0, "28930": 12698492928.0, "28935": 12698492928.0, "28940": 12698492928.0, "28945": 12698492928.0, "28950": 12698492928.0, "28955": 12698492928.0, "28960": 12698492928.0, "28965": 12698492928.0, "28970": 12698492928.0, "28975": 12698492928.0, "28980": 12698492928.0, "28985": 12698492928.0, "28990": 12698492928.0, "28995": 12698492928.0, "29000": 12698492928.0, "29005": 12698492928.0, "29010": 12698492928.0, "29015": 12698492928.0, "29020": 12698492928.0, "29025": 12698492928.0, "29030": 12698492928.0, "29035": 12698492928.0, "29040": 12698492928.0, "29045": 12698492928.0, "29050": 12698492928.0, "29055": 12698492928.0, "29060": 12698492928.0, "29065": 12698492928.0, "29070": 12698492928.0, "29075": 12698492928.0, "29080": 12698492928.0, "29085": 12698492928.0, "29090": 12698492928.0, "29095": 12698492928.0, "29100": 12698492928.0, "29105": 12698492928.0, "29110": 12698492928.0, "29115": 12698492928.0, "29120": 12698492928.0, "29125": 12698492928.0, "29130": 12698492928.0, "29135": 12698492928.0, "29140": 12698492928.0, "29145": 12698492928.0, "29150": 12698492928.0, "29155": 12698492928.0, "29160": 12698492928.0, "29165": 12698492928.0, "29170": 12698492928.0, "29175": 12698492928.0, "29180": 12698492928.0, "29185": 12698492928.0, "29190": 12698492928.0, "29195": 12698492928.0, "29200": 12698492928.0, "29205": 12698492928.0, "29210": 12698492928.0, "29215": 12698492928.0, "29220": 12698492928.0, "29225": 12698492928.0, "29230": 12698492928.0, "29235": 12698492928.0, "29240": 12698492928.0, "29245": 12698492928.0, "29250": 12698492928.0, "29255": 12698492928.0, "29260": 12698492928.0, "29265": 12698492928.0, "29270": 12698492928.0, "29275": 12698492928.0, "29280": 12698492928.0, "29285": 12698492928.0, "29290": 12698492928.0, "29295": 12698492928.0, "29300": 12698492928.0, "29305": 12698492928.0, "29310": 12698492928.0, "29315": 12698492928.0, "29320": 12698492928.0, "29325": 12698492928.0, "29330": 12698492928.0, "29335": 12698492928.0, "29340": 12698492928.0, "29345": 12698492928.0, "29350": 12698492928.0, "29355": 12698492928.0, "29360": 12698492928.0, "29365": 12698492928.0, "29370": 12698492928.0, "29375": 12698492928.0, "29380": 12698492928.0, "29385": 12698492928.0, "29390": 12698492928.0, "29395": 12698492928.0, "29400": 12698492928.0, "29405": 12698492928.0, "29410": 12698492928.0, "29415": 12698492928.0, "29420": 12698492928.0, "29425": 12698492928.0, "29430": 12698492928.0, "29435": 12698492928.0, "29440": 12698492928.0, "29445": 12698492928.0, "29450": 12698492928.0, "29455": 12698492928.0, "29460": 12698492928.0, "29465": 12698492928.0, "29470": 12698492928.0, "29475": 12698492928.0, "29480": 12698492928.0, "29485": 12698492928.0, "29490": 12698492928.0, "29495": 12698492928.0, "29500": 12698492928.0, "29505": 12698492928.0, "29510": 12698492928.0, "29515": 12698492928.0, "29520": 12698492928.0, "29525": 12698492928.0, "29530": 12698492928.0, "29535": 12698492928.0, "29540": 12698492928.0, "29545": 12698492928.0, "29550": 12698492928.0, "29555": 12698492928.0, "29560": 12698492928.0, "29565": 12698492928.0, "29570": 12698492928.0, "29575": 12698492928.0, "29580": 12698492928.0, "29585": 12698492928.0, "29590": 12698492928.0, "29595": 12698492928.0, "29600": 12698492928.0, "29605": 12698492928.0, "29610": 12698492928.0, "29615": 12698492928.0, "29620": 12698492928.0, "29625": 12698492928.0, "29630": 12698492928.0, "29635": 12698492928.0, "29640": 12698492928.0, "29645": 12698492928.0, "29650": 12698492928.0, "29655": 12698492928.0, "29660": 12698492928.0, "29665": 12698492928.0, "29670": 12698492928.0, "29675": 12698492928.0, "29680": 12698492928.0, "29685": 12698492928.0, "29690": 12698492928.0, "29695": 12698492928.0, "29700": 12698492928.0, "29705": 12698492928.0, "29710": 12698492928.0, "29715": 12698492928.0, "29720": 12698492928.0, "29725": 12698492928.0, "29730": 12698492928.0, "29735": 12698492928.0, "29740": 12698492928.0, "29745": 12698492928.0, "29750": 12698492928.0, "29755": 12698492928.0, "29760": 12698492928.0, "29765": 12698492928.0, "29770": 12698492928.0, "29775": 12698492928.0, "29780": 12698492928.0, "29785": 12698492928.0, "29790": 12698492928.0, "29795": 12698492928.0, "29800": 12698492928.0, "29805": 12698492928.0, "29810": 12698492928.0, "29815": 12698492928.0, "29820": 12698492928.0, "29825": 12698492928.0, "29830": 12698492928.0, "29835": 12698492928.0, "29840": 12698492928.0, "29845": 12698492928.0, "29850": 12698492928.0, "29855": 12698492928.0, "29860": 12698492928.0, "29865": 12698492928.0, "29870": 12698492928.0, "29875": 12698492928.0, "29880": 12698492928.0, "29885": 12698492928.0, "29890": 12698492928.0, "29895": 12698492928.0, "29900": 12698492928.0, "29905": 12698492928.0, "29910": 12698492928.0, "29915": 12698492928.0, "29920": 12698492928.0, "29925": 12698492928.0, "29930": 12698492928.0, "29935": 12698492928.0, "29940": 12698492928.0, "29945": 12698492928.0, "29950": 12698492928.0, "29955": 12698492928.0, "29960": 12698492928.0, "29965": 12698492928.0, "29970": 12698492928.0, "29975": 12698492928.0, "29980": 12698492928.0, "29985": 12698492928.0, "29990": 12698492928.0, "29995": 12698492928.0, "30000": 12698492928.0, "30005": 12698492928.0, "30010": 12698492928.0, "30015": 12698492928.0, "30020": 12698492928.0, "30025": 12698492928.0, "30030": 12698492928.0, "30035": 12698492928.0, "30040": 12698492928.0, "30045": 12698492928.0, "30050": 12698492928.0, "30055": 12698492928.0, "30060": 12698492928.0, "30065": 12698492928.0, "30070": 12698492928.0, "30075": 12698492928.0, "30080": 12698492928.0, "30085": 12698492928.0, "30090": 12698492928.0, "30095": 12698492928.0, "30100": 12698492928.0, "30105": 12698492928.0, "30110": 12698492928.0, "30115": 12698492928.0, "30120": 12698492928.0, "30125": 12698492928.0, "30130": 12698492928.0, "30135": 12698492928.0, "30140": 12698492928.0, "30145": 12698492928.0, "30150": 12698492928.0, "30155": 12698492928.0, "30160": 12698492928.0, "30165": 12698492928.0, "30170": 12698492928.0, "30175": 12698492928.0, "30180": 12698492928.0, "30185": 12698492928.0, "30190": 12698492928.0, "30195": 12698492928.0, "30200": 12698492928.0, "30205": 12698492928.0, "30210": 12698492928.0, "30215": 12698492928.0, "30220": 12698492928.0, "30225": 12698492928.0, "30230": 12698492928.0, "30235": 12698492928.0, "30240": 12698492928.0, "30245": 12698492928.0, "30250": 12698492928.0, "30255": 12698492928.0, "30260": 12698492928.0, "30265": 12698492928.0, "30270": 12698492928.0, "30275": 12698492928.0, "30280": 12698492928.0, "30285": 12698492928.0, "30290": 12698492928.0, "30295": 12698492928.0, "30300": 12698492928.0, "30305": 12698492928.0, "30310": 12698492928.0, "30315": 12698492928.0, "30320": 12698492928.0, "30325": 12698492928.0, "30330": 12698492928.0, "30335": 12698492928.0, "30340": 12698492928.0, "30345": 12698492928.0, "30350": 12698492928.0, "30355": 12698492928.0, "30360": 12698492928.0, "30365": 12698492928.0, "30370": 12698492928.0, "30375": 12698492928.0, "30380": 12698492928.0, "30385": 12698492928.0, "30390": 12698492928.0, "30395": 12698492928.0, "30400": 12698492928.0, "30405": 12698492928.0, "30410": 12698492928.0, "30415": 12698492928.0, "30420": 12698492928.0, "30425": 12698492928.0, "30430": 12698492928.0, "30435": 12698492928.0, "30440": 12698492928.0, "30445": 12698492928.0, "30450": 12698492928.0, "30455": 12698492928.0, "30460": 12698492928.0, "30465": 12698492928.0, "30470": 12698492928.0, "30475": 12698492928.0, "30480": 12698492928.0, "30485": 12698492928.0, "30490": 12698492928.0, "30495": 12698492928.0, "30500": 12698492928.0, "30505": 12698492928.0, "30510": 12698492928.0, "30515": 12698492928.0, "30520": 12698492928.0, "30525": 12698492928.0, "30530": 12698492928.0, "30535": 12698492928.0, "30540": 12698492928.0, "30545": 12698492928.0, "30550": 12698492928.0, "30555": 12698492928.0, "30560": 12698492928.0, "30565": 12698492928.0, "30570": 12698492928.0, "30575": 12698492928.0, "30580": 12698492928.0, "30585": 12698492928.0, "30590": 12698492928.0, "30595": 12698492928.0, "30600": 12698492928.0, "30605": 12698492928.0, "30610": 12698492928.0, "30615": 12698492928.0, "30620": 12698492928.0, "30625": 12698492928.0, "30630": 12698492928.0, "30635": 12698492928.0, "30640": 12698492928.0, "30645": 12698492928.0, "30650": 12698492928.0, "30655": 12698492928.0, "30660": 12698492928.0, "30665": 12698492928.0, "30670": 12698492928.0, "30675": 12698492928.0, "30680": 12698492928.0, "30685": 12698492928.0, "30690": 12698492928.0, "30695": 12698492928.0, "30700": 12698492928.0, "30705": 12698492928.0, "30710": 12698492928.0, "30715": 12698492928.0, "30720": 12698492928.0, "30725": 12698492928.0, "30730": 12698492928.0, "30735": 12698492928.0, "30740": 12698492928.0, "30745": 12698492928.0, "30750": 12698492928.0, "30755": 12698492928.0, "30760": 12698492928.0, "30765": 12698492928.0, "30770": 12698492928.0, "30775": 12698492928.0, "30780": 12698492928.0, "30785": 12698492928.0, "30790": 12698492928.0, "30795": 12698492928.0, "30800": 12698492928.0, "30805": 12698492928.0, "30810": 12698492928.0, "30815": 12698492928.0, "30820": 12698492928.0, "30825": 12698492928.0, "30830": 12698492928.0, "30835": 12698492928.0, "30840": 12698492928.0, "30845": 12698492928.0, "30850": 12698492928.0, "30855": 12698492928.0, "30860": 12698492928.0, "30865": 12698492928.0, "30870": 12698492928.0, "30875": 12698492928.0, "30880": 12698492928.0, "30885": 12698492928.0, "30890": 12698492928.0, "30895": 12698492928.0, "30900": 12698492928.0, "30905": 12698492928.0, "30910": 12698492928.0, "30915": 12698492928.0, "30920": 12698492928.0, "30925": 12698492928.0, "30930": 12698492928.0, "30935": 12698492928.0, "30940": 12698492928.0, "30945": 12698492928.0, "30950": 12698492928.0, "30955": 12698492928.0, "30960": 12698492928.0, "30965": 12698492928.0, "30970": 12698492928.0, "30975": 12698492928.0, "30980": 12698492928.0, "30985": 12698492928.0, "30990": 12698492928.0, "30995": 12698492928.0, "31000": 12698492928.0, "31005": 12698492928.0, "31010": 12698492928.0, "31015": 12698492928.0, "31020": 12698492928.0, "31025": 12698492928.0, "31030": 12698492928.0, "31035": 12698492928.0, "31040": 12698492928.0, "31045": 12698492928.0, "31050": 12698492928.0, "31055": 12698492928.0, "31060": 12698492928.0, "31065": 12698492928.0, "31070": 12698492928.0, "31075": 12698492928.0, "31080": 12698492928.0, "31085": 12698492928.0, "31090": 12698492928.0, "31095": 12698492928.0, "31100": 12698492928.0, "31105": 12698492928.0, "31110": 12698492928.0, "31115": 12698492928.0, "31120": 12698492928.0, "31125": 12698492928.0, "31130": 12698492928.0, "31135": 12698492928.0, "31140": 12698492928.0, "31145": 12698492928.0, "31150": 12698492928.0, "31155": 12698492928.0, "31160": 12698492928.0, "31165": 12698492928.0, "31170": 12698492928.0, "31175": 12698492928.0, "31180": 12698492928.0, "31185": 12698492928.0, "31190": 12698492928.0, "31195": 12698492928.0, "31200": 12698492928.0, "31205": 12698492928.0, "31210": 12698492928.0, "31215": 12698492928.0, "31220": 12698492928.0, "31225": 12698492928.0, "31230": 12698492928.0, "31235": 12698492928.0, "31240": 12698492928.0, "31245": 12698492928.0, "31250": 12698492928.0, "31255": 12698492928.0, "31260": 12698492928.0, "31265": 12698492928.0, "31270": 12698492928.0, "31275": 12698492928.0, "31280": 12698492928.0, "31285": 12698492928.0, "31290": 12698492928.0, "31295": 12698492928.0, "31300": 12698492928.0, "31305": 12698492928.0, "31310": 12698492928.0, "31315": 12698492928.0, "31320": 12698492928.0, "31325": 12698492928.0, "31330": 12698492928.0, "31335": 12698492928.0, "31340": 12698492928.0, "31345": 12698492928.0, "31350": 12698492928.0, "31355": 12698492928.0, "31360": 12698492928.0, "31365": 12698492928.0, "31370": 12698492928.0, "31375": 12698492928.0, "31380": 12698492928.0, "31385": 12698492928.0, "31390": 12698492928.0, "31395": 12698492928.0, "31400": 12698492928.0, "31405": 12698492928.0, "31410": 12698492928.0, "31415": 12698492928.0, "31420": 12698492928.0, "31425": 12698492928.0, "31430": 12698492928.0, "31435": 12698492928.0, "31440": 12698492928.0, "31445": 12698492928.0, "31450": 12698492928.0, "31455": 12698492928.0, "31460": 12698492928.0, "31465": 12698492928.0, "31470": 12698492928.0, "31475": 12698492928.0, "31480": 12698492928.0, "31485": 12698492928.0, "31490": 12698492928.0, "31495": 12698492928.0, "31500": 12698492928.0, "31505": 12698492928.0, "31510": 12698492928.0, "31515": 12698492928.0, "31520": 12698492928.0, "31525": 12698492928.0, "31530": 12698492928.0, "31535": 12698492928.0, "31540": 12698492928.0, "31545": 12698492928.0, "31550": 12698492928.0, "31555": 12698492928.0, "31560": 12698492928.0, "31565": 12698492928.0, "31570": 12698492928.0, "31575": 12698492928.0, "31580": 12698492928.0, "31585": 12698492928.0, "31590": 12698492928.0, "31595": 12698492928.0, "31600": 12698492928.0, "31605": 12698492928.0, "31610": 12698492928.0, "31615": 12698492928.0, "31620": 12698492928.0, "31625": 12698492928.0, "31630": 12698492928.0, "31635": 12698492928.0, "31640": 12698492928.0, "31645": 12698492928.0, "31650": 12698492928.0, "31655": 12698492928.0, "31660": 12698492928.0, "31665": 12698492928.0, "31670": 12698492928.0, "31675": 12698492928.0, "31680": 12698492928.0, "31685": 12698492928.0, "31690": 12698492928.0, "31695": 12698492928.0, "31700": 12698492928.0, "31705": 12698492928.0, "31710": 12698492928.0, "31715": 12698492928.0, "31720": 12698492928.0, "31725": 12698492928.0, "31730": 12698492928.0, "31735": 12698492928.0, "31740": 12698492928.0, "31745": 12698492928.0, "31750": 12698492928.0, "31755": 12698492928.0, "31760": 12698492928.0, "31765": 12698492928.0, "31770": 12698492928.0, "31775": 12698492928.0, "31780": 12698492928.0, "31785": 12698492928.0, "31790": 12698492928.0, "31795": 12698492928.0, "31800": 12698492928.0, "31805": 12698492928.0, "31810": 12698492928.0, "31815": 12698492928.0, "31820": 12698492928.0, "31825": 12698492928.0, "31830": 12698492928.0, "31835": 12698492928.0, "31840": 12698492928.0, "31845": 12698492928.0, "31850": 12698492928.0, "31855": 12698492928.0, "31860": 12698492928.0, "31865": 12698492928.0, "31870": 12698492928.0, "31875": 12698492928.0, "31880": 12698492928.0, "31885": 12698492928.0, "31890": 12698492928.0, "31895": 12698492928.0, "31900": 12698492928.0, "31905": 12698492928.0, "31910": 12698492928.0, "31915": 12698492928.0, "31920": 12698492928.0, "31925": 12698492928.0, "31930": 12698492928.0, "31935": 12698492928.0, "31940": 12698492928.0, "31945": 12698492928.0, "31950": 12698492928.0, "31955": 12698492928.0, "31960": 12698492928.0, "31965": 12698492928.0, "31970": 12698492928.0, "31975": 12698492928.0, "31980": 12698492928.0, "31985": 12698492928.0, "31990": 12698492928.0, "31995": 12698492928.0, "32000": 12698492928.0, "32005": 12698492928.0, "32010": 12698492928.0, "32015": 12698492928.0, "32020": 12698492928.0, "32025": 12698492928.0, "32030": 12698492928.0, "32035": 12698492928.0, "32040": 12698492928.0, "32045": 12698492928.0, "32050": 12698492928.0, "32055": 12698492928.0, "32060": 12698492928.0, "32065": 12698492928.0, "32070": 12698492928.0, "32075": 12698492928.0, "32080": 12698492928.0, "32085": 12698492928.0, "32090": 12698492928.0, "32095": 12698492928.0, "32100": 12698492928.0, "32105": 12698492928.0, "32110": 12698492928.0, "32115": 12698492928.0, "32120": 12698492928.0, "32125": 12698492928.0, "32130": 12698492928.0, "32135": 12698492928.0, "32140": 12698492928.0, "32145": 12698492928.0, "32150": 12698492928.0, "32155": 12698492928.0, "32160": 12698492928.0, "32165": 12698492928.0, "32170": 12698492928.0, "32175": 12698492928.0, "32180": 12698492928.0, "32185": 12698492928.0, "32190": 12698492928.0, "32195": 12698492928.0, "32200": 12698492928.0, "32205": 12698492928.0, "32210": 12698492928.0, "32215": 12698492928.0, "32220": 12698492928.0, "32225": 12698492928.0, "32230": 12698492928.0, "32235": 12698492928.0, "32240": 12698492928.0, "32245": 12698492928.0, "32250": 12698492928.0, "32255": 12698492928.0, "32260": 12698492928.0, "32265": 12698492928.0, "32270": 12698492928.0, "32275": 12698492928.0, "32280": 12698492928.0, "32285": 12698492928.0, "32290": 12698492928.0, "32295": 12698492928.0, "32300": 12698492928.0, "32305": 12698492928.0, "32310": 12698492928.0, "32315": 12698492928.0, "32320": 12698492928.0, "32325": 12698492928.0, "32330": 12698492928.0, "32335": 12698492928.0, "32340": 12698492928.0, "32345": 12698492928.0, "32350": 12698492928.0, "32355": 12698492928.0, "32360": 12698492928.0, "32365": 12698492928.0, "32370": 12698492928.0, "32375": 12698492928.0, "32380": 12698492928.0, "32385": 12698492928.0, "32390": 12698492928.0, "32395": 12698492928.0, "32400": 12698492928.0, "32405": 12698492928.0, "32410": 12698492928.0, "32415": 12698492928.0, "32420": 12698492928.0, "32425": 12698492928.0, "32430": 12698492928.0, "32435": 12698492928.0, "32440": 12698492928.0, "32445": 12698492928.0, "32450": 12698492928.0, "32455": 12698492928.0, "32460": 12698492928.0, "32465": 12698492928.0, "32470": 12698492928.0, "32475": 12698492928.0, "32480": 12698492928.0, "32485": 12698492928.0, "32490": 12698492928.0, "32495": 12698492928.0, "32500": 12698492928.0, "32505": 12698492928.0, "32510": 12698492928.0, "32515": 12698492928.0, "32520": 12698492928.0, "32525": 12698492928.0, "32530": 12698492928.0, "32535": 12698492928.0, "32540": 12698492928.0, "32545": 12698492928.0, "32550": 12698492928.0, "32555": 12698492928.0, "32560": 12698492928.0, "32565": 12698492928.0, "32570": 12698492928.0, "32575": 12698492928.0, "32580": 12698492928.0, "32585": 12698492928.0, "32590": 12698492928.0, "32595": 12698492928.0, "32600": 12698492928.0, "32605": 12698492928.0, "32610": 12698492928.0, "32615": 12698492928.0, "32620": 12698492928.0, "32625": 12698492928.0, "32630": 12698492928.0, "32635": 12698492928.0, "32640": 12698492928.0, "32645": 12698492928.0, "32650": 12698492928.0, "32655": 12698492928.0, "32660": 12698492928.0, "32665": 12698492928.0, "32670": 12698492928.0, "32675": 12698492928.0, "32680": 12698492928.0, "32685": 12698492928.0, "32690": 12698492928.0, "32695": 12698492928.0, "32700": 12698492928.0, "32705": 12698492928.0, "32710": 12698492928.0, "32715": 12698492928.0, "32720": 12698492928.0, "32725": 12698492928.0, "32730": 12698492928.0, "32735": 12698492928.0, "32740": 12698492928.0, "32745": 12698492928.0, "32750": 12698492928.0, "32755": 12698492928.0, "32760": 12698492928.0, "32765": 12698492928.0, "32770": 12698492928.0, "32775": 12698492928.0, "32780": 12698492928.0, "32785": 12698492928.0, "32790": 12698492928.0, "32795": 12698492928.0, "32800": 12698492928.0, "32805": 12698492928.0, "32810": 12698492928.0, "32815": 12698492928.0, "32820": 12698492928.0, "32825": 12698492928.0, "32830": 12698492928.0, "32835": 12698492928.0, "32840": 12698492928.0, "32845": 12698492928.0, "32850": 12698492928.0, "32855": 12698492928.0, "32860": 12698492928.0, "32865": 12698492928.0, "32870": 12698492928.0, "32875": 12698492928.0, "32880": 12698492928.0, "32885": 12698492928.0, "32890": 12698492928.0, "32895": 12698492928.0, "32900": 12698492928.0, "32905": 12698492928.0, "32910": 12698492928.0, "32915": 12698492928.0, "32920": 12698492928.0, "32925": 12698492928.0, "32930": 12698492928.0, "32935": 12698492928.0, "32940": 12698492928.0, "32945": 12698492928.0, "32950": 12698492928.0, "32955": 12698492928.0, "32960": 12698492928.0, "32965": 12698492928.0, "32970": 12698492928.0, "32975": 12698492928.0, "32980": 12698492928.0, "32985": 12698492928.0, "32990": 12698492928.0, "32995": 12698492928.0, "33000": 12698492928.0, "33005": 12698492928.0, "33010": 12698492928.0, "33015": 12698492928.0, "33020": 12698492928.0, "33025": 12698492928.0, "33030": 12698492928.0, "33035": 12698492928.0, "33040": 12698492928.0, "33045": 12698492928.0, "33050": 12698492928.0, "33055": 12698492928.0, "33060": 12698492928.0, "33065": 12698492928.0, "33070": 12698492928.0, "33075": 12698492928.0, "33080": 12698492928.0, "33085": 12698492928.0, "33090": 12698492928.0, "33095": 12698492928.0, "33100": 12698492928.0, "33105": 12698492928.0, "33110": 12698492928.0, "33115": 12698492928.0, "33120": 12698492928.0, "33125": 12698492928.0, "33130": 12698492928.0, "33135": 12698492928.0, "33140": 12698492928.0, "33145": 12698492928.0, "33150": 12698492928.0, "33155": 12698492928.0, "33160": 12698492928.0, "33165": 12698492928.0, "33170": 12698492928.0, "33175": 12698492928.0, "33180": 12698492928.0, "33185": 12698492928.0, "33190": 12698492928.0, "33195": 12698492928.0, "33200": 12698492928.0, "33205": 12698492928.0, "33210": 12698492928.0, "33215": 12698492928.0, "33220": 12698492928.0, "33225": 12698492928.0, "33230": 12698492928.0, "33235": 12698492928.0, "33240": 12698492928.0, "33245": 12698492928.0, "33250": 12698492928.0, "33255": 12698492928.0, "33260": 12698492928.0, "33265": 12698492928.0, "33270": 12698492928.0, "33275": 12698492928.0, "33280": 12698492928.0, "33285": 12698492928.0, "33290": 12698492928.0, "33295": 12698492928.0, "33300": 12698492928.0, "33305": 12698492928.0, "33310": 12698492928.0, "33315": 12698492928.0, "33320": 12698492928.0, "33325": 12698492928.0, "33330": 12698492928.0, "33335": 12698492928.0, "33340": 12698492928.0, "33345": 12698492928.0, "33350": 12698492928.0, "33355": 12698492928.0, "33360": 12698492928.0, "33365": 12698492928.0, "33370": 12698492928.0, "33375": 12698492928.0, "33380": 12698492928.0, "33385": 12698492928.0, "33390": 12698492928.0, "33395": 12698492928.0, "33400": 12698492928.0, "33405": 12698492928.0, "33410": 12698492928.0, "33415": 12698492928.0, "33420": 12698492928.0, "33425": 12698492928.0, "33430": 12698492928.0, "33435": 12698492928.0, "33440": 12698492928.0, "33445": 12698492928.0, "33450": 12698492928.0, "33455": 12698492928.0, "33460": 12698492928.0, "33465": 12698492928.0, "33470": 12698492928.0, "33475": 12698492928.0, "33480": 12698492928.0, "33485": 12698492928.0, "33490": 12698492928.0, "33495": 12698492928.0, "33500": 12698492928.0, "33505": 12698492928.0, "33510": 12698492928.0, "33515": 12698492928.0, "33520": 12698492928.0, "33525": 12698492928.0, "33530": 12698492928.0, "33535": 12698492928.0, "33540": 12698492928.0, "33545": 12698492928.0, "33550": 12698492928.0, "33555": 12698492928.0, "33560": 12698492928.0, "33565": 12698492928.0, "33570": 12698492928.0, "33575": 12698492928.0, "33580": 12698492928.0, "33585": 12698492928.0, "33590": 12698492928.0, "33595": 12698492928.0, "33600": 12698492928.0, "33605": 12698492928.0, "33610": 12698492928.0, "33615": 12698492928.0, "33620": 12698492928.0, "33625": 12698492928.0, "33630": 12698492928.0, "33635": 12698492928.0, "33640": 12698492928.0, "33645": 12698492928.0, "33650": 12698492928.0, "33655": 12698492928.0, "33660": 12698492928.0, "33665": 12698492928.0, "33670": 12698492928.0, "33675": 12698492928.0, "33680": 12698492928.0, "33685": 12698492928.0, "33690": 12698492928.0, "33695": 12698492928.0, "33700": 12698492928.0, "33705": 12698492928.0, "33710": 12698492928.0, "33715": 12698492928.0, "33720": 12698492928.0, "33725": 12698492928.0, "33730": 12698492928.0, "33735": 12698492928.0, "33740": 12698492928.0, "33745": 12698492928.0, "33750": 12698492928.0, "33755": 12698492928.0, "33760": 12698492928.0, "33765": 12698492928.0, "33770": 12698492928.0, "33775": 12698492928.0, "33780": 12698492928.0, "33785": 12698492928.0, "33790": 12698492928.0, "33795": 12698492928.0, "33800": 12698492928.0, "33805": 12698492928.0, "33810": 12698492928.0, "33815": 12698492928.0, "33820": 12698492928.0, "33825": 12698492928.0, "33830": 12698492928.0, "33835": 12698492928.0, "33840": 12698492928.0, "33845": 12698492928.0, "33850": 12698492928.0, "33855": 12698492928.0, "33860": 12698492928.0, "33865": 12698492928.0, "33870": 12698492928.0, "33875": 12698492928.0, "33880": 12698492928.0, "33885": 12698492928.0, "33890": 12698492928.0, "33895": 12698492928.0, "33900": 12698492928.0, "33905": 12698492928.0, "33910": 12698492928.0, "33915": 12698492928.0, "33920": 12698492928.0, "33925": 12698492928.0, "33930": 12698492928.0, "33935": 12698492928.0, "33940": 12698492928.0, "33945": 12698492928.0, "33950": 12698492928.0, "33955": 12698492928.0, "33960": 12698492928.0, "33965": 12698492928.0, "33970": 12698492928.0, "33975": 12698492928.0, "33980": 12698492928.0, "33985": 12698492928.0, "33990": 12698492928.0, "33995": 12698492928.0, "34000": 12698492928.0, "34005": 12698492928.0, "34010": 12698492928.0, "34015": 12698492928.0, "34020": 12698492928.0, "34025": 12698492928.0, "34030": 12698492928.0, "34035": 12698492928.0, "34040": 12698492928.0, "34045": 12698492928.0, "34050": 12698492928.0, "34055": 12698492928.0, "34060": 12698492928.0, "34065": 12698492928.0, "34070": 12698492928.0, "34075": 12698492928.0, "34080": 12698492928.0, "34085": 12698492928.0, "34090": 12698492928.0, "34095": 12698492928.0, "34100": 12698492928.0, "34105": 12698492928.0, "34110": 12698492928.0, "34115": 12698492928.0, "34120": 12698492928.0, "34125": 12698492928.0, "34130": 12698492928.0, "34135": 12698492928.0, "34140": 12698492928.0, "34145": 12698492928.0, "34150": 12698492928.0, "34155": 12698492928.0, "34160": 12698492928.0, "34165": 12698492928.0, "34170": 12698492928.0, "34175": 12698492928.0, "34180": 12698492928.0, "34185": 12698492928.0, "34190": 12698492928.0, "34195": 12698492928.0, "34200": 12698492928.0, "34205": 12698492928.0, "34210": 12698492928.0, "34215": 12698492928.0, "34220": 12698492928.0, "34225": 12698492928.0, "34230": 12698492928.0, "34235": 12698492928.0, "34240": 12698492928.0, "34245": 12698492928.0, "34250": 12698492928.0, "34255": 12698492928.0, "34260": 12698492928.0, "34265": 12698492928.0, "34270": 12698492928.0, "34275": 12698492928.0, "34280": 12698492928.0, "34285": 12698492928.0, "34290": 12698492928.0, "34295": 12698492928.0, "34300": 12698492928.0, "34305": 12698492928.0, "34310": 12698492928.0, "34315": 12698492928.0, "34320": 12698492928.0, "34325": 12698492928.0, "34330": 12698492928.0, "34335": 12698492928.0, "34340": 12698492928.0, "34345": 12698492928.0, "34350": 12698492928.0, "34355": 12698492928.0, "34360": 12698492928.0, "34365": 12698492928.0, "34370": 12698492928.0, "34375": 12698492928.0, "34380": 12698492928.0, "34385": 12698492928.0, "34390": 12698492928.0, "34395": 12698492928.0, "34400": 12698492928.0, "34405": 12698492928.0, "34410": 12698492928.0, "34415": 12698492928.0, "34420": 12698492928.0, "34425": 12698492928.0, "34430": 12698492928.0, "34435": 12698492928.0, "34440": 12698492928.0, "34445": 12698492928.0, "34450": 12698492928.0, "34455": 12698492928.0, "34460": 12698492928.0, "34465": 12698492928.0, "34470": 12698492928.0, "34475": 12698492928.0, "34480": 12698492928.0, "34485": 12698492928.0, "34490": 12698492928.0, "34495": 12698492928.0, "34500": 12698492928.0, "34505": 12698492928.0, "34510": 12698492928.0, "34515": 12698492928.0, "34520": 12698492928.0, "34525": 12698492928.0, "34530": 12698492928.0, "34535": 12698492928.0, "34540": 12698492928.0, "34545": 12698492928.0, "34550": 12698492928.0, "34555": 12698492928.0, "34560": 12698492928.0, "34565": 12698492928.0, "34570": 12698492928.0, "34575": 12698492928.0, "34580": 12698492928.0, "34585": 12698492928.0, "34590": 12698492928.0, "34595": 12698492928.0, "34600": 12698492928.0, "34605": 12698492928.0, "34610": 12698492928.0, "34615": 12698492928.0, "34620": 12698492928.0, "34625": 12698492928.0, "34630": 12698492928.0, "34635": 12698492928.0, "34640": 12698492928.0, "34645": 12698492928.0, "34650": 12698492928.0, "34655": 12698492928.0, "34660": 12698492928.0, "34665": 12698492928.0, "34670": 12698492928.0, "34675": 12698492928.0, "34680": 12698492928.0, "34685": 12698492928.0, "34690": 12698492928.0, "34695": 12698492928.0, "34700": 12698492928.0, "34705": 12698492928.0, "34710": 12698492928.0, "34715": 12698492928.0, "34720": 12698492928.0, "34725": 12698492928.0, "34730": 12698492928.0, "34735": 12698492928.0, "34740": 12698492928.0, "34745": 12698492928.0, "34750": 12698492928.0, "34755": 12698492928.0, "34760": 12698492928.0, "34765": 12698492928.0, "34770": 12698492928.0, "34775": 12698492928.0, "34780": 12698492928.0, "34785": 12698492928.0, "34790": 12698492928.0, "34795": 12698492928.0, "34800": 12698492928.0, "34805": 12698492928.0, "34810": 12698492928.0, "34815": 12698492928.0, "34820": 12698492928.0, "34825": 12698492928.0, "34830": 12698492928.0, "34835": 12698492928.0, "34840": 12698492928.0, "34845": 12698492928.0, "34850": 12698492928.0, "34855": 12698492928.0, "34860": 12698492928.0, "34865": 12698492928.0, "34870": 12698492928.0, "34875": 12698492928.0, "34880": 12698492928.0, "34885": 12698492928.0, "34890": 12698492928.0, "34895": 12698492928.0, "34900": 12698492928.0, "34905": 12698492928.0, "34910": 12698492928.0, "34915": 12698492928.0, "34920": 12698492928.0, "34925": 12698492928.0, "34930": 12698492928.0, "34935": 12698492928.0, "34940": 12698492928.0, "34945": 12698492928.0, "34950": 12698492928.0, "34955": 12698492928.0, "34960": 12698492928.0, "34965": 12698492928.0, "34970": 12698492928.0, "34975": 12698492928.0, "34980": 12698492928.0, "34985": 12698492928.0, "34990": 12698492928.0, "34995": 12698492928.0, "35000": 12698492928.0, "35005": 12698492928.0, "35010": 12698492928.0, "35015": 12698492928.0, "35020": 12698492928.0, "35025": 12698492928.0, "35030": 12698492928.0, "35035": 12698492928.0, "35040": 12698492928.0, "35045": 12698492928.0, "35050": 12698492928.0, "35055": 12698492928.0, "35060": 12698492928.0, "35065": 12698492928.0, "35070": 12698492928.0, "35075": 12698492928.0, "35080": 12698492928.0, "35085": 12698492928.0, "35090": 12698492928.0, "35095": 12698492928.0, "35100": 12698492928.0, "35105": 12698492928.0, "35110": 12698492928.0, "35115": 12698492928.0, "35120": 12698492928.0, "35125": 12698492928.0, "35130": 12698492928.0, "35135": 12698492928.0, "35140": 12698492928.0, "35145": 12698492928.0, "35150": 12698492928.0, "35155": 12698492928.0, "35160": 12698492928.0, "35165": 12698492928.0, "35170": 12698492928.0, "35175": 12698492928.0, "35180": 12698492928.0, "35185": 12698492928.0, "35190": 12698492928.0, "35195": 12698492928.0, "35200": 12698492928.0, "35205": 12698492928.0, "35210": 12698492928.0, "35215": 12698492928.0, "35220": 12698492928.0, "35225": 12698492928.0, "35230": 12698492928.0, "35235": 12698492928.0, "35240": 12698492928.0, "35245": 12698492928.0, "35250": 12698492928.0, "35255": 12698492928.0, "35260": 12698492928.0, "35265": 12698492928.0, "35270": 12698492928.0, "35275": 12698492928.0, "35280": 12698492928.0, "35285": 12698492928.0, "35290": 12698492928.0, "35295": 12698492928.0, "35300": 12698492928.0, "35305": 12698492928.0, "35310": 12698492928.0, "35315": 12698492928.0, "35320": 12698492928.0, "35325": 12698492928.0, "35330": 12698492928.0, "35335": 12698492928.0, "35340": 12698492928.0, "35345": 12698492928.0, "35350": 12698492928.0, "35355": 12698492928.0, "35360": 12698492928.0, "35365": 12698492928.0, "35370": 12698492928.0, "35375": 12698492928.0, "35380": 12698492928.0, "35385": 12698492928.0, "35390": 12698492928.0, "35395": 12698492928.0, "35400": 12698492928.0, "35405": 12698492928.0, "35410": 12698492928.0, "35415": 12698492928.0, "35420": 12698492928.0, "35425": 12698492928.0, "35430": 12698492928.0, "35435": 12698492928.0, "35440": 12698492928.0, "35445": 12698492928.0, "35450": 12698492928.0, "35455": 12698492928.0, "35460": 12698492928.0, "35465": 12698492928.0, "35470": 12698492928.0, "35475": 12698492928.0, "35480": 12698492928.0, "35485": 12698492928.0, "35490": 12698492928.0, "35495": 12698492928.0, "35500": 12698492928.0, "35505": 12698492928.0, "35510": 12698492928.0, "35515": 12698492928.0, "35520": 12698492928.0, "35525": 12698492928.0, "35530": 12698492928.0, "35535": 12698492928.0, "35540": 12698492928.0, "35545": 12698492928.0, "35550": 12698492928.0, "35555": 12698492928.0, "35560": 12698492928.0, "35565": 12698492928.0, "35570": 12698492928.0, "35575": 12698492928.0, "35580": 12698492928.0, "35585": 12698492928.0, "35590": 12698492928.0, "35595": 12698492928.0, "35600": 12698492928.0, "35605": 12698492928.0, "35610": 12698492928.0, "35615": 12698492928.0, "35620": 12698492928.0, "35625": 12698492928.0, "35630": 12698492928.0, "35635": 12698492928.0, "35640": 12698492928.0, "35645": 12698492928.0, "35650": 12698492928.0, "35655": 12698492928.0, "35660": 12698492928.0, "35665": 12698492928.0, "35670": 12698492928.0, "35675": 12698492928.0, "35680": 12698492928.0, "35685": 12698492928.0, "35690": 12698492928.0, "35695": 12698492928.0, "35700": 12698492928.0, "35705": 12698492928.0, "35710": 12698492928.0, "35715": 12698492928.0, "35720": 12698492928.0, "35725": 12698492928.0, "35730": 12698492928.0, "35735": 12698492928.0, "35740": 12698492928.0, "35745": 12698492928.0, "35750": 12698492928.0, "35755": 12698492928.0, "35760": 12698492928.0, "35765": 12698492928.0, "35770": 12698492928.0, "35775": 12698492928.0, "35780": 12698492928.0, "35785": 12698492928.0, "35790": 12698492928.0, "35795": 12698492928.0, "35800": 12698492928.0, "35805": 12698492928.0, "35810": 12698492928.0, "35815": 12698492928.0, "35820": 12698492928.0, "35825": 12698492928.0, "35830": 12698492928.0, "35835": 12698492928.0, "35840": 12698492928.0, "35845": 12698492928.0, "35850": 12698492928.0, "35855": 12698492928.0, "35860": 12698492928.0, "35865": 12698492928.0, "35870": 12698492928.0, "35875": 12698492928.0, "35880": 12698492928.0, "35885": 12698492928.0, "35890": 12698492928.0, "35895": 12698492928.0, "35900": 12698492928.0, "35905": 12698492928.0, "35910": 12698492928.0, "35915": 12698492928.0, "35920": 12698492928.0, "35925": 12698492928.0, "35930": 12698492928.0, "35935": 12698492928.0, "35940": 12698492928.0, "35945": 12698492928.0, "35950": 12698492928.0, "35955": 12698492928.0, "35960": 12698492928.0, "35965": 12698492928.0, "35970": 12698492928.0, "35975": 12698492928.0, "35980": 12698492928.0, "35985": 12698492928.0, "35990": 12698492928.0, "35995": 12698492928.0, "36000": 12698492928.0, "36005": 12698492928.0, "36010": 12698492928.0, "36015": 12698492928.0, "36020": 12698492928.0, "36025": 12698492928.0, "36030": 12698492928.0, "36035": 12698492928.0, "36040": 12698492928.0, "36045": 12698492928.0, "36050": 12698492928.0, "36055": 12698492928.0, "36060": 12698492928.0, "36065": 12698492928.0, "36070": 12698492928.0, "36075": 12698492928.0, "36080": 12698492928.0, "36085": 12698492928.0, "36090": 12698492928.0, "36095": 12698492928.0, "36100": 12698492928.0, "36105": 12698492928.0, "36110": 12698492928.0, "36115": 12698492928.0, "36120": 12698492928.0, "36125": 12698492928.0, "36130": 12698492928.0, "36135": 12698492928.0, "36140": 12698492928.0, "36145": 12698492928.0, "36150": 12698492928.0, "36155": 12698492928.0, "36160": 12698492928.0, "36165": 12698492928.0, "36170": 12698492928.0, "36175": 12698492928.0, "36180": 12698492928.0, "36185": 12698492928.0, "36190": 12698492928.0, "36195": 12698492928.0, "36200": 12698492928.0, "36205": 12698492928.0, "36210": 12698492928.0, "36215": 12698492928.0, "36220": 12698492928.0, "36225": 12698492928.0, "36230": 12698492928.0, "36235": 12698492928.0, "36240": 12698492928.0, "36245": 12698492928.0, "36250": 12698492928.0, "36255": 12698492928.0, "36260": 12698492928.0, "36265": 12698492928.0, "36270": 12698492928.0, "36275": 12698492928.0, "36280": 12698492928.0, "36285": 12698492928.0, "36290": 12698492928.0, "36295": 12698492928.0, "36300": 12698492928.0, "36305": 12698492928.0, "36310": 12698492928.0, "36315": 12698492928.0, "36320": 12698492928.0, "36325": 12698492928.0, "36330": 12698492928.0, "36335": 12698492928.0, "36340": 12698492928.0, "36345": 12698492928.0, "36350": 12698492928.0, "36355": 12698492928.0, "36360": 12698492928.0, "36365": 12698492928.0, "36370": 12698492928.0, "36375": 12698492928.0, "36380": 12698492928.0, "36385": 12698492928.0, "36390": 12698492928.0, "36395": 12698492928.0, "36400": 12698492928.0, "36405": 12698492928.0, "36410": 12698492928.0, "36415": 12698492928.0, "36420": 12698492928.0, "36425": 12698492928.0, "36430": 12698492928.0, "36435": 12698492928.0, "36440": 12698492928.0, "36445": 12698492928.0, "36450": 12698492928.0, "36455": 12698492928.0, "36460": 12698492928.0, "36465": 12698492928.0, "36470": 12698492928.0, "36475": 12698492928.0, "36480": 12698492928.0, "36485": 12698492928.0, "36490": 12698492928.0, "36495": 12698492928.0, "36500": 12698492928.0, "36505": 12698492928.0, "36510": 12698492928.0, "36515": 12698492928.0, "36520": 12698492928.0, "36525": 12698492928.0, "36530": 12698492928.0, "36535": 12698492928.0, "36540": 12698492928.0, "36545": 12698492928.0, "36550": 12698492928.0, "36555": 12698492928.0, "36560": 12698492928.0, "36565": 12698492928.0, "36570": 12698492928.0, "36575": 12698492928.0, "36580": 12698492928.0, "36585": 12698492928.0, "36590": 12698492928.0, "36595": 12698492928.0, "36600": 12698492928.0, "36605": 12698492928.0, "36610": 12698492928.0, "36615": 12698492928.0, "36620": 12698492928.0, "36625": 12698492928.0, "36630": 12698492928.0, "36635": 12698492928.0, "36640": 12698492928.0, "36645": 12698492928.0, "36650": 12698492928.0, "36655": 12698492928.0, "36660": 12698492928.0, "36665": 12698492928.0, "36670": 12698492928.0, "36675": 12698492928.0, "36680": 12698492928.0, "36685": 12698492928.0, "36690": 12698492928.0, "36695": 12698492928.0, "36700": 12698492928.0, "36705": 12698492928.0, "36710": 12698492928.0, "36715": 12698492928.0, "36720": 12698492928.0, "36725": 12698492928.0, "36730": 12698492928.0, "36735": 12698492928.0, "36740": 12698492928.0, "36745": 12698492928.0, "36750": 12698492928.0, "36755": 12698492928.0, "36760": 12698492928.0, "36765": 12698492928.0, "36770": 12698492928.0, "36775": 12698492928.0, "36780": 12698492928.0, "36785": 12698492928.0, "36790": 12698492928.0, "36795": 12698492928.0, "36800": 12698492928.0, "36805": 12698492928.0, "36810": 12698492928.0, "36815": 12698492928.0, "36820": 12698492928.0, "36825": 12698492928.0, "36830": 12698492928.0, "36835": 12698492928.0, "36840": 12698492928.0, "36845": 12698492928.0, "36850": 12698492928.0, "36855": 12698492928.0, "36860": 12698492928.0, "36865": 12698492928.0, "36870": 12698492928.0, "36875": 12698492928.0, "36880": 12698492928.0, "36885": 12698492928.0, "36890": 12698492928.0, "36895": 12698492928.0, "36900": 12698492928.0, "36905": 12698492928.0, "36910": 12698492928.0, "36915": 12698492928.0, "36920": 12698492928.0, "36925": 12698492928.0, "36930": 12698492928.0, "36935": 12698492928.0, "36940": 12698492928.0, "36945": 12698492928.0, "36950": 12698492928.0, "36955": 12698492928.0, "36960": 12698492928.0, "36965": 12698492928.0, "36970": 12698492928.0, "36975": 12698492928.0, "36980": 12698492928.0, "36985": 12698492928.0, "36990": 12698492928.0, "36995": 12698492928.0, "37000": 12698492928.0, "37005": 12698492928.0, "37010": 12698492928.0, "37015": 12698492928.0, "37020": 12698492928.0, "37025": 12698492928.0, "37030": 12698492928.0, "37035": 12698492928.0, "37040": 12698492928.0, "37045": 12698492928.0, "37050": 12698492928.0, "37055": 12698492928.0, "37060": 12698492928.0, "37065": 12698492928.0, "37070": 12698492928.0, "37075": 12698492928.0, "37080": 12698492928.0, "37085": 12698492928.0, "37090": 12698492928.0, "37095": 12698492928.0, "37100": 12698492928.0, "37105": 12698492928.0, "37110": 12698492928.0, "37115": 12698492928.0, "37120": 12698492928.0, "37125": 12698492928.0, "37130": 12698492928.0, "37135": 12698492928.0, "37140": 12698492928.0, "37145": 12698492928.0, "37150": 12698492928.0, "37155": 12698492928.0, "37160": 12698492928.0, "37165": 12698492928.0, "37170": 12698492928.0, "37175": 12698492928.0, "37180": 12698492928.0, "37185": 12698492928.0, "37190": 12698492928.0, "37195": 12698492928.0, "37200": 12698492928.0, "37205": 12698492928.0, "37210": 12698492928.0, "37215": 12698492928.0, "37220": 12698492928.0, "37225": 12698492928.0, "37230": 12698492928.0, "37235": 12698492928.0, "37240": 12698492928.0, "37245": 12698492928.0, "37250": 12698492928.0, "37255": 12698492928.0, "37260": 12698492928.0, "37265": 12698492928.0, "37270": 12698492928.0, "37275": 12698492928.0, "37280": 12698492928.0, "37285": 12698492928.0, "37290": 12698492928.0, "37295": 12698492928.0, "37300": 12698492928.0, "37305": 12698492928.0, "37310": 12698492928.0, "37315": 12698492928.0, "37320": 12698492928.0, "37325": 12698492928.0, "37330": 12698492928.0, "37335": 12698492928.0, "37340": 12698492928.0, "37345": 12698492928.0, "37350": 12698492928.0, "37355": 12698492928.0, "37360": 12698492928.0, "37365": 12698492928.0, "37370": 12698492928.0, "37375": 12698492928.0, "37380": 12698492928.0, "37385": 12698492928.0, "37390": 12698492928.0, "37395": 12698492928.0, "37400": 12698492928.0, "37405": 12698492928.0, "37410": 12698492928.0, "37415": 12698492928.0, "37420": 12698492928.0, "37425": 12698492928.0, "37430": 12698492928.0, "37435": 12698492928.0, "37440": 12698492928.0, "37445": 12698492928.0, "37450": 12698492928.0, "37455": 12698492928.0, "37460": 12698492928.0, "37465": 12698492928.0, "37470": 12698492928.0, "37475": 12698492928.0, "37480": 12698492928.0, "37485": 12698492928.0, "37490": 12698492928.0, "37495": 12698492928.0, "37500": 12698492928.0, "37505": 12698492928.0, "37510": 12698492928.0, "37515": 12698492928.0, "37520": 12698492928.0, "37525": 12698492928.0, "37530": 12698492928.0, "37535": 12698492928.0, "37540": 12698492928.0, "37545": 12698492928.0, "37550": 12698492928.0, "37555": 12698492928.0, "37560": 12698492928.0, "37565": 12698492928.0, "37570": 12698492928.0, "37575": 12698492928.0, "37580": 12698492928.0, "37585": 12698492928.0, "37590": 12698492928.0, "37595": 12698492928.0, "37600": 12698492928.0, "37605": 12698492928.0, "37610": 12698492928.0, "37615": 12698492928.0, "37620": 12698492928.0, "37625": 12698492928.0, "37630": 12698492928.0, "37635": 12698492928.0, "37640": 12698492928.0, "37645": 12698492928.0, "37650": 12698492928.0, "37655": 12698492928.0, "37660": 12698492928.0, "37665": 12698492928.0, "37670": 12698492928.0, "37675": 12698492928.0, "37680": 12698492928.0, "37685": 12698492928.0, "37690": 12698492928.0, "37695": 12698492928.0, "37700": 12698492928.0, "37705": 12698492928.0, "37710": 12698492928.0, "37715": 12698492928.0, "37720": 12698492928.0, "37725": 12698492928.0, "37730": 12698492928.0, "37735": 12698492928.0, "37740": 12698492928.0, "37745": 12698492928.0, "37750": 12698492928.0, "37755": 12698492928.0, "37760": 12698492928.0, "37765": 12698492928.0, "37770": 12698492928.0, "37775": 12698492928.0, "37780": 12698492928.0, "37785": 12698492928.0, "37790": 12698492928.0, "37795": 12698492928.0, "37800": 12698492928.0, "37805": 12698492928.0, "37810": 12698492928.0, "37815": 12698492928.0, "37820": 12698492928.0, "37825": 12698492928.0, "37830": 12698492928.0, "37835": 12698492928.0, "37840": 12698492928.0, "37845": 12698492928.0, "37850": 12698492928.0, "37855": 12698492928.0, "37860": 12698492928.0, "37865": 12698492928.0, "37870": 12698492928.0, "37875": 12698492928.0, "37880": 12698492928.0, "37885": 12698492928.0, "37890": 12698492928.0, "37895": 12698492928.0, "37900": 12698492928.0, "37905": 12698492928.0, "37910": 12698492928.0, "37915": 12698492928.0, "37920": 12698492928.0, "37925": 12698492928.0, "37930": 12698492928.0, "37935": 12698492928.0, "37940": 12698492928.0, "37945": 12698492928.0, "37950": 12698492928.0, "37955": 12698492928.0, "37960": 12698492928.0, "37965": 12698492928.0, "37970": 12698492928.0, "37975": 12698492928.0, "37980": 12698492928.0, "37985": 12698492928.0, "37990": 12698492928.0, "37995": 12698492928.0, "38000": 12698492928.0, "38005": 12698492928.0, "38010": 12698492928.0, "38015": 12698492928.0, "38020": 12698492928.0, "38025": 12698492928.0, "38030": 12698492928.0, "38035": 12698492928.0, "38040": 12698492928.0, "38045": 12698492928.0, "38050": 12698492928.0, "38055": 12698492928.0, "38060": 12698492928.0, "38065": 12698492928.0, "38070": 12698492928.0, "38075": 12698492928.0, "38080": 12698492928.0, "38085": 12698492928.0, "38090": 12698492928.0, "38095": 12698492928.0, "38100": 12698492928.0, "38105": 12698492928.0, "38110": 12698492928.0, "38115": 12698492928.0, "38120": 12698492928.0, "38125": 12698492928.0, "38130": 12698492928.0, "38135": 12698492928.0, "38140": 12698492928.0, "38145": 12698492928.0, "38150": 12698492928.0, "38155": 12698492928.0, "38160": 12698492928.0, "38165": 12698492928.0, "38170": 12698492928.0, "38175": 12698492928.0, "38180": 12698492928.0, "38185": 12698492928.0, "38190": 12698492928.0, "38195": 12698492928.0, "38200": 12698492928.0, "38205": 12698492928.0, "38210": 12698492928.0, "38215": 12698492928.0, "38220": 12698492928.0, "38225": 12698492928.0, "38230": 12698492928.0, "38235": 12698492928.0, "38240": 12698492928.0, "38245": 12698492928.0, "38250": 12698492928.0, "38255": 12698492928.0, "38260": 12698492928.0, "38265": 12698492928.0, "38270": 12698492928.0, "38275": 12698492928.0, "38280": 12698492928.0, "38285": 12698492928.0, "38290": 12698492928.0, "38295": 12698492928.0, "38300": 12698492928.0, "38305": 12698492928.0, "38310": 12698492928.0, "38315": 12698492928.0, "38320": 12698492928.0, "38325": 12698492928.0, "38330": 12698492928.0, "38335": 12698492928.0, "38340": 12698492928.0, "38345": 12698492928.0, "38350": 12698492928.0, "38355": 12698492928.0, "38360": 12698492928.0, "38365": 12698492928.0, "38370": 12698492928.0, "38375": 12698492928.0, "38380": 12698492928.0, "38385": 12698492928.0, "38390": 12698492928.0, "38395": 12698492928.0, "38400": 12698492928.0, "38405": 12698492928.0, "38410": 12698492928.0, "38415": 12698492928.0, "38420": 12698492928.0, "38425": 12698492928.0, "38430": 12698492928.0, "38435": 12698492928.0, "38440": 12698492928.0, "38445": 12698492928.0, "38450": 12698492928.0, "38455": 12698492928.0, "38460": 12698492928.0, "38465": 12698492928.0, "38470": 12698492928.0, "38475": 12698492928.0, "38480": 12698492928.0, "38485": 12698492928.0, "38490": 12698492928.0, "38495": 12698492928.0, "38500": 12698492928.0, "38505": 12698492928.0, "38510": 12698492928.0, "38515": 12698492928.0, "38520": 12698492928.0, "38525": 12698492928.0, "38530": 12698492928.0, "38535": 12698492928.0, "38540": 12698492928.0, "38545": 12698492928.0, "38550": 12698492928.0, "38555": 12698492928.0, "38560": 12698492928.0, "38565": 12698492928.0, "38570": 12698492928.0, "38575": 12698492928.0, "38580": 12698492928.0, "38585": 12698492928.0, "38590": 12698492928.0, "38595": 12698492928.0, "38600": 12698492928.0, "38605": 12698492928.0, "38610": 12698492928.0, "38615": 12698492928.0, "38620": 12698492928.0, "38625": 12698492928.0, "38630": 12698492928.0, "38635": 12698492928.0, "38640": 12698492928.0, "38645": 12698492928.0, "38650": 12698492928.0, "38655": 12698492928.0, "38660": 12698492928.0, "38665": 12698492928.0, "38670": 12698492928.0, "38675": 12698492928.0, "38680": 12698492928.0, "38685": 12698492928.0, "38690": 12698492928.0, "38695": 12698492928.0, "38700": 12698492928.0, "38705": 12698492928.0, "38710": 12698492928.0, "38715": 12698492928.0, "38720": 12698492928.0, "38725": 12698492928.0, "38730": 12698492928.0, "38735": 12698492928.0, "38740": 12698492928.0, "38745": 12698492928.0, "38750": 12698492928.0, "38755": 12698492928.0, "38760": 12698492928.0, "38765": 12698492928.0, "38770": 12698492928.0, "38775": 12698492928.0, "38780": 12698492928.0, "38785": 12698492928.0, "38790": 12698492928.0, "38795": 12698492928.0, "38800": 12698492928.0, "38805": 12698492928.0, "38810": 12698492928.0, "38815": 12698492928.0, "38820": 12698492928.0, "38825": 12698492928.0, "38830": 12698492928.0, "38835": 12698492928.0, "38840": 12698492928.0, "38845": 12698492928.0, "38850": 12698492928.0, "38855": 12698492928.0, "38860": 12698492928.0, "38865": 12698492928.0, "38870": 12698492928.0, "38875": 12698492928.0, "38880": 12698492928.0, "38885": 12698492928.0, "38890": 12698492928.0, "38895": 12698492928.0, "38900": 12698492928.0, "38905": 12698492928.0, "38910": 12698492928.0, "38915": 12698492928.0, "38920": 12698492928.0, "38925": 12698492928.0, "38930": 12698492928.0, "38935": 12698492928.0, "38940": 12698492928.0, "38945": 12698492928.0, "38950": 12698492928.0, "38955": 12698492928.0, "38960": 12698492928.0, "38965": 12698492928.0, "38970": 12698492928.0, "38975": 12698492928.0, "38980": 12698492928.0, "38985": 12698492928.0, "38990": 12698492928.0, "38995": 12698492928.0, "39000": 12698492928.0, "39005": 12698492928.0, "39010": 12698492928.0, "39015": 12698492928.0, "39020": 12698492928.0, "39025": 12698492928.0, "39030": 12698492928.0, "39035": 12698492928.0, "39040": 12698492928.0, "39045": 12698492928.0, "39050": 12698492928.0, "39055": 12698492928.0, "39060": 12698492928.0, "39065": 12698492928.0, "39070": 12698492928.0, "39075": 12698492928.0, "39080": 12698492928.0, "39085": 12698492928.0, "39090": 12698492928.0, "39095": 12698492928.0, "39100": 12698492928.0, "39105": 12698492928.0, "39110": 12698492928.0, "39115": 12698492928.0, "39120": 12698492928.0, "39125": 12698492928.0, "39130": 12698492928.0, "39135": 12698492928.0, "39140": 12698492928.0, "39145": 12698492928.0, "39150": 12698492928.0, "39155": 12698492928.0, "39160": 12698492928.0, "39165": 12698492928.0, "39170": 12698492928.0, "39175": 12698492928.0, "39180": 12698492928.0, "39185": 12698492928.0, "39190": 12698492928.0, "39195": 12698492928.0, "39200": 12698492928.0, "39205": 12698492928.0, "39210": 12698492928.0, "39215": 12698492928.0, "39220": 12698492928.0, "39225": 12698492928.0, "39230": 12698492928.0, "39235": 12698492928.0, "39240": 12698492928.0, "39245": 12698492928.0, "39250": 12698492928.0, "39255": 12698492928.0, "39260": 12698492928.0, "39265": 12698492928.0, "39270": 12698492928.0, "39275": 12698492928.0, "39280": 12698492928.0, "39285": 12698492928.0, "39290": 12698492928.0, "39295": 12698492928.0, "39300": 12698492928.0, "39305": 12698492928.0, "39310": 12698492928.0, "39315": 12698492928.0, "39320": 12698492928.0, "39325": 12698492928.0, "39330": 12698492928.0, "39335": 12698492928.0, "39340": 12698492928.0, "39345": 12698492928.0, "39350": 12698492928.0, "39355": 12698492928.0, "39360": 12698492928.0, "39365": 12698492928.0, "39370": 12698492928.0, "39375": 12698492928.0, "39380": 12698492928.0, "39385": 12698492928.0, "39390": 12698492928.0, "39395": 12698492928.0, "39400": 12698492928.0, "39405": 12698492928.0, "39410": 12698492928.0, "39415": 12698492928.0, "39420": 12698492928.0, "39425": 12698492928.0, "39430": 12698492928.0, "39435": 12698492928.0, "39440": 12698492928.0, "39445": 12698492928.0, "39450": 12698492928.0, "39455": 12698492928.0, "39460": 12698492928.0, "39465": 12698492928.0, "39470": 12698492928.0, "39475": 12698492928.0, "39480": 12698492928.0, "39485": 12698492928.0, "39490": 12698492928.0, "39495": 12698492928.0, "39500": 12698492928.0, "39505": 12698492928.0, "39510": 12698492928.0, "39515": 12698492928.0, "39520": 12698492928.0, "39525": 12698492928.0, "39530": 12698492928.0, "39535": 12698492928.0, "39540": 12698492928.0, "39545": 12698492928.0, "39550": 12698492928.0, "39555": 12698492928.0, "39560": 12698492928.0, "39565": 12698492928.0, "39570": 12698492928.0, "39575": 12698492928.0, "39580": 12698492928.0, "39585": 12698492928.0, "39590": 12698492928.0, "39595": 12698492928.0, "39600": 12698492928.0, "39605": 12698492928.0, "39610": 12698492928.0, "39615": 12698492928.0, "39620": 12698492928.0, "39625": 12698492928.0, "39630": 12698492928.0, "39635": 12698492928.0, "39640": 12698492928.0, "39645": 12698492928.0, "39650": 12698492928.0, "39655": 12698492928.0, "39660": 12698492928.0, "39665": 12698492928.0, "39670": 12698492928.0, "39675": 12698492928.0, "39680": 12698492928.0, "39685": 12698492928.0, "39690": 12698492928.0, "39695": 12698492928.0, "39700": 12698492928.0, "39705": 12698492928.0, "39710": 12698492928.0, "39715": 12698492928.0, "39720": 12698492928.0, "39725": 12698492928.0, "39730": 12698492928.0, "39735": 12698492928.0, "39740": 12698492928.0, "39745": 12698492928.0, "39750": 12698492928.0, "39755": 12698492928.0, "39760": 12698492928.0, "39765": 12698492928.0, "39770": 12698492928.0, "39775": 12698492928.0, "39780": 12698492928.0, "39785": 12698492928.0, "39790": 12698492928.0, "39795": 12698492928.0, "39800": 12698492928.0, "39805": 12698492928.0, "39810": 12698492928.0, "39815": 12698492928.0, "39820": 12698492928.0, "39825": 12698492928.0, "39830": 12698492928.0, "39835": 12698492928.0, "39840": 12698492928.0, "39845": 12698492928.0, "39850": 12698492928.0, "39855": 12698492928.0, "39860": 12698492928.0, "39865": 12698492928.0, "39870": 12698492928.0, "39875": 12698492928.0, "39880": 12698492928.0, "39885": 12698492928.0, "39890": 12698492928.0, "39895": 12698492928.0, "39900": 12698492928.0, "39905": 12698492928.0, "39910": 12698492928.0, "39915": 12698492928.0, "39920": 12698492928.0, "39925": 12698492928.0, "39930": 12698492928.0, "39935": 12698492928.0, "39940": 12698492928.0, "39945": 12698492928.0, "39950": 12698492928.0, "39955": 12698492928.0, "39960": 12698492928.0, "39965": 12698492928.0, "39970": 12698492928.0, "39975": 12698492928.0, "39980": 12698492928.0, "39985": 12698492928.0, "39990": 12698492928.0, "39995": 12698492928.0, "40000": 12698492928.0, "40005": 12698492928.0, "40010": 12698492928.0, "40015": 12698492928.0, "40020": 12698492928.0, "40025": 12698492928.0, "40030": 12698492928.0, "40035": 12698492928.0, "40040": 12698492928.0, "40045": 12698492928.0, "40050": 12698492928.0, "40055": 12698492928.0, "40060": 12698492928.0, "40065": 12698492928.0, "40070": 12698492928.0, "40075": 12698492928.0, "40080": 12698492928.0, "40085": 12698492928.0, "40090": 12698492928.0, "40095": 12698492928.0, "40100": 12698492928.0, "40105": 12698492928.0, "40110": 12698492928.0, "40115": 12698492928.0, "40120": 12698492928.0, "40125": 12698492928.0, "40130": 12698492928.0, "40135": 12698492928.0, "40140": 12698492928.0, "40145": 12698492928.0, "40150": 12698492928.0, "40155": 12698492928.0, "40160": 12698492928.0, "40165": 12698492928.0, "40170": 12698492928.0, "40175": 12698492928.0, "40180": 12698492928.0, "40185": 12698492928.0, "40190": 12698492928.0, "40195": 12698492928.0, "40200": 12698492928.0, "40205": 12698492928.0, "40210": 12698492928.0, "40215": 12698492928.0, "40220": 12698492928.0, "40225": 12698492928.0, "40230": 12698492928.0, "40235": 12698492928.0, "40240": 12698492928.0, "40245": 12698492928.0, "40250": 12698492928.0, "40255": 12698492928.0, "40260": 12698492928.0, "40265": 12698492928.0, "40270": 12698492928.0, "40275": 12698492928.0, "40280": 12698492928.0, "40285": 12698492928.0, "40290": 12698492928.0, "40295": 12698492928.0, "40300": 12698492928.0, "40305": 12698492928.0, "40310": 12698492928.0, "40315": 12698492928.0, "40320": 12698492928.0, "40325": 12698492928.0, "40330": 12698492928.0, "40335": 12698492928.0, "40340": 12698492928.0, "40345": 12698492928.0, "40350": 12698492928.0, "40355": 12698492928.0, "40360": 12698492928.0, "40365": 12698492928.0, "40370": 12698492928.0, "40375": 12698492928.0, "40380": 12698492928.0, "40385": 12698492928.0, "40390": 12698492928.0, "40395": 12698492928.0, "40400": 12698492928.0, "40405": 12698492928.0, "40410": 12698492928.0, "40415": 12698492928.0, "40420": 12698492928.0, "40425": 12698492928.0, "40430": 12698492928.0, "40435": 12698492928.0, "40440": 12698492928.0, "40445": 12698492928.0, "40450": 12698492928.0, "40455": 12698492928.0, "40460": 12698492928.0, "40465": 12698492928.0, "40470": 12698492928.0, "40475": 12698492928.0, "40480": 12698492928.0, "40485": 12698492928.0, "40490": 12698492928.0, "40495": 12698492928.0, "40500": 12698492928.0, "40505": 12698492928.0, "40510": 12698492928.0, "40515": 12698492928.0, "40520": 12698492928.0, "40525": 12698492928.0, "40530": 12698492928.0, "40535": 12698492928.0, "40540": 12698492928.0, "40545": 12698492928.0, "40550": 12698492928.0, "40555": 12698492928.0, "40560": 12698492928.0, "40565": 12698492928.0, "40570": 12698492928.0, "40575": 12698492928.0, "40580": 12698492928.0, "40585": 12698492928.0, "40590": 12698492928.0, "40595": 12698492928.0, "40600": 12698492928.0, "40605": 12698492928.0, "40610": 12698492928.0, "40615": 12698492928.0, "40620": 12698492928.0, "40625": 12698492928.0, "40630": 12698492928.0, "40635": 12698492928.0, "40640": 12698492928.0, "40645": 12698492928.0, "40650": 12698492928.0, "40655": 12698492928.0, "40660": 12698492928.0, "40665": 12698492928.0, "40670": 12698492928.0, "40675": 12698492928.0, "40680": 12698492928.0, "40685": 12698492928.0, "40690": 12698492928.0, "40695": 12698492928.0, "40700": 12698492928.0, "40705": 12698492928.0, "40710": 12698492928.0, "40715": 12698492928.0, "40720": 12698492928.0, "40725": 12698492928.0, "40730": 12698492928.0, "40735": 12698492928.0, "40740": 12698492928.0, "40745": 12698492928.0, "40750": 12698492928.0, "40755": 12698492928.0, "40760": 12698492928.0, "40765": 12698492928.0, "40770": 12698492928.0, "40775": 12698492928.0, "40780": 12698492928.0, "40785": 12698492928.0, "40790": 12698492928.0, "40795": 12698492928.0, "40800": 12698492928.0, "40805": 12698492928.0, "40810": 12698492928.0, "40815": 12698492928.0, "40820": 12698492928.0, "40825": 12698492928.0, "40830": 12698492928.0, "40835": 12698492928.0, "40840": 12698492928.0, "40845": 12698492928.0, "40850": 12698492928.0, "40855": 12698492928.0, "40860": 12698492928.0, "40865": 12698492928.0, "40870": 12698492928.0, "40875": 12698492928.0, "40880": 12698492928.0, "40885": 12698492928.0, "40890": 12698492928.0, "40895": 12698492928.0, "40900": 12698492928.0, "40905": 12698492928.0, "40910": 12698492928.0, "40915": 12698492928.0, "40920": 12698492928.0, "40925": 12698492928.0, "40930": 12698492928.0, "40935": 12698492928.0, "40940": 12698492928.0, "40945": 12698492928.0, "40950": 12698492928.0, "40955": 12698492928.0, "40960": 12698492928.0, "40965": 12698492928.0, "40970": 12698492928.0, "40975": 12698492928.0, "40980": 12698492928.0, "40985": 12698492928.0, "40990": 12698492928.0, "40995": 12698492928.0, "41000": 12698492928.0, "41005": 12698492928.0, "41010": 12698492928.0, "41015": 12698492928.0, "41020": 12698492928.0, "41025": 12698492928.0, "41030": 12698492928.0, "41035": 12698492928.0, "41040": 12698492928.0, "41045": 12698492928.0, "41050": 12698492928.0, "41055": 12698492928.0, "41060": 12698492928.0, "41065": 12698492928.0, "41070": 12698492928.0, "41075": 12698492928.0, "41080": 12698492928.0, "41085": 12698492928.0, "41090": 12698492928.0, "41095": 12698492928.0, "41100": 12698492928.0, "41105": 12698492928.0, "41110": 12698492928.0, "41115": 12698492928.0, "41120": 12698492928.0, "41125": 12698492928.0, "41130": 12698492928.0, "41135": 12698492928.0, "41140": 12698492928.0, "41145": 12698492928.0, "41150": 12698492928.0, "41155": 12698492928.0, "41160": 12698492928.0, "41165": 12698492928.0, "41170": 12698492928.0, "41175": 12698492928.0, "41180": 12698492928.0, "41185": 12698492928.0, "41190": 12698492928.0, "41195": 12698492928.0, "41200": 12698492928.0, "41205": 12698492928.0, "41210": 12698492928.0, "41215": 12698492928.0, "41220": 12698492928.0, "41225": 12698492928.0, "41230": 12698492928.0, "41235": 12698492928.0, "41240": 12698492928.0, "41245": 12698492928.0, "41250": 12698492928.0, "41255": 12698492928.0, "41260": 12698492928.0, "41265": 12698492928.0, "41270": 12698492928.0, "41275": 12698492928.0, "41280": 12698492928.0, "41285": 12698492928.0, "41290": 12698492928.0, "41295": 12698492928.0, "41300": 12698492928.0, "41305": 12698492928.0, "41310": 12698492928.0, "41315": 12698492928.0, "41320": 12698492928.0, "41325": 12698492928.0, "41330": 12698492928.0, "41335": 12698492928.0, "41340": 12698492928.0, "41345": 12698492928.0, "41350": 12698492928.0, "41355": 12698492928.0, "41360": 12698492928.0, "41365": 12698492928.0, "41370": 12698492928.0, "41375": 12698492928.0, "41380": 12698492928.0, "41385": 12698492928.0, "41390": 12698492928.0, "41395": 12698492928.0, "41400": 12698492928.0, "41405": 12698492928.0, "41410": 12698492928.0, "41415": 12698492928.0, "41420": 12698492928.0, "41425": 12698492928.0, "41430": 12698492928.0, "41435": 12698492928.0, "41440": 12698492928.0, "41445": 12698492928.0, "41450": 12698492928.0, "41455": 12698492928.0, "41460": 12698492928.0, "41465": 12698492928.0, "41470": 12698492928.0, "41475": 12698492928.0, "41480": 12698492928.0, "41485": 12698492928.0, "41490": 12698492928.0, "41495": 12698492928.0, "41500": 12698492928.0, "41505": 12698492928.0, "41510": 12698492928.0, "41515": 12698492928.0, "41520": 12698492928.0, "41525": 12698492928.0, "41530": 12698492928.0, "41535": 12698492928.0, "41540": 12698492928.0, "41545": 12698492928.0, "41550": 12698492928.0, "41555": 12698492928.0, "41560": 12698492928.0, "41565": 12698492928.0, "41570": 12698492928.0, "41575": 12698492928.0, "41580": 12698492928.0, "41585": 12698492928.0, "41590": 12698492928.0, "41595": 12698492928.0, "41600": 12698492928.0, "41605": 12698492928.0, "41610": 12698492928.0, "41615": 12698492928.0, "41620": 12698492928.0, "41625": 12698492928.0, "41630": 12698492928.0, "41635": 12698492928.0, "41640": 12698492928.0, "41645": 12698492928.0, "41650": 12698492928.0, "41655": 12698492928.0, "41660": 12698492928.0, "41665": 12698492928.0, "41670": 12698492928.0, "41675": 12698492928.0, "41680": 12698492928.0, "41685": 12698492928.0, "41690": 12698492928.0, "41695": 12698492928.0, "41700": 12698492928.0, "41705": 12698492928.0, "41710": 12698492928.0, "41715": 12698492928.0, "41720": 12698492928.0, "41725": 12698492928.0, "41730": 12698492928.0, "41735": 12698492928.0, "41740": 12698492928.0, "41745": 12698492928.0, "41750": 12698492928.0, "41755": 12698492928.0, "41760": 12698492928.0, "41765": 12698492928.0, "41770": 12698492928.0, "41775": 12698492928.0, "41780": 12698492928.0, "41785": 12698492928.0, "41790": 12698492928.0, "41795": 12698492928.0, "41800": 12698492928.0, "41805": 12698492928.0, "41810": 12698492928.0, "41815": 12698492928.0, "41820": 12698492928.0, "41825": 12698492928.0, "41830": 12698492928.0, "41835": 12698492928.0, "41840": 12698492928.0, "41845": 12698492928.0, "41850": 12698492928.0, "41855": 12698492928.0, "41860": 12698492928.0, "41865": 12698492928.0, "41870": 12698492928.0, "41875": 12698492928.0, "41880": 12698492928.0, "41885": 12698492928.0, "41890": 12698492928.0, "41895": 12698492928.0, "41900": 12698492928.0, "41905": 12698492928.0, "41910": 12698492928.0, "41915": 12698492928.0, "41920": 12698492928.0, "41925": 12698492928.0, "41930": 12698492928.0, "41935": 12698492928.0, "41940": 12698492928.0, "41945": 12698492928.0, "41950": 12698492928.0, "41955": 12698492928.0, "41960": 12698492928.0, "41965": 12698492928.0, "41970": 12698492928.0, "41975": 12698492928.0, "41980": 12698492928.0, "41985": 12698492928.0, "41990": 12698492928.0, "41995": 12698492928.0, "42000": 12698492928.0, "42005": 12698492928.0, "42010": 12698492928.0, "42015": 12698492928.0, "42020": 12698492928.0, "42025": 12698492928.0, "42030": 12698492928.0, "42035": 12698492928.0, "42040": 12698492928.0, "42045": 12698492928.0, "42050": 12698492928.0, "42055": 12698492928.0, "42060": 12698492928.0, "42065": 12698492928.0, "42070": 12698492928.0, "42075": 12698492928.0, "42080": 12698492928.0, "42085": 12698492928.0, "42090": 12698492928.0, "42095": 12698492928.0, "42100": 12698492928.0, "42105": 12698492928.0, "42110": 12698492928.0, "42115": 12698492928.0, "42120": 12698492928.0, "42125": 12698492928.0, "42130": 12698492928.0, "42135": 12698492928.0, "42140": 12698492928.0, "42145": 12698492928.0, "42150": 12698492928.0, "42155": 12698492928.0, "42160": 12698492928.0, "42165": 12698492928.0, "42170": 12698492928.0, "42175": 12698492928.0, "42180": 12698492928.0, "42185": 12698492928.0, "42190": 12698492928.0, "42195": 12698492928.0, "42200": 12698492928.0, "42205": 12698492928.0, "42210": 12698492928.0, "42215": 12698492928.0, "42220": 12698492928.0, "42225": 12698492928.0, "42230": 12698492928.0, "42235": 12698492928.0, "42240": 12698492928.0, "42245": 12698492928.0, "42250": 12698492928.0, "42255": 12698492928.0, "42260": 12698492928.0, "42265": 12698492928.0, "42270": 12698492928.0, "42275": 12698492928.0, "42280": 12698492928.0, "42285": 12698492928.0, "42290": 12698492928.0, "42295": 12698492928.0, "42300": 12698492928.0, "42305": 12698492928.0, "42310": 12698492928.0, "42315": 12698492928.0, "42320": 12698492928.0, "42325": 12698492928.0, "42330": 12698492928.0, "42335": 12698492928.0, "42340": 12698492928.0, "42345": 12698492928.0, "42350": 12698492928.0, "42355": 12698492928.0, "42360": 12698492928.0, "42365": 12698492928.0, "42370": 12698492928.0, "42375": 12698492928.0, "42380": 12698492928.0, "42385": 12698492928.0, "42390": 12698492928.0, "42395": 12698492928.0, "42400": 12698492928.0, "42405": 12698492928.0, "42410": 12698492928.0, "42415": 12698492928.0, "42420": 12698492928.0, "42425": 12698492928.0, "42430": 12698492928.0, "42435": 12698492928.0, "42440": 12698492928.0, "42445": 12698492928.0, "42450": 12698492928.0, "42455": 12698492928.0, "42460": 12698492928.0, "42465": 12698492928.0, "42470": 12698492928.0, "42475": 12698492928.0, "42480": 12698492928.0, "42485": 12698492928.0, "42490": 12698492928.0, "42495": 12698492928.0, "42500": 12698492928.0, "42505": 12698492928.0, "42510": 12698492928.0, "42515": 12698492928.0, "42520": 12698492928.0, "42525": 12698492928.0, "42530": 12698492928.0, "42535": 12698492928.0, "42540": 12698492928.0, "42545": 12698492928.0, "42550": 12698492928.0, "42555": 12698492928.0, "42560": 12698492928.0, "42565": 12698492928.0, "42570": 12698492928.0, "42575": 12698492928.0, "42580": 12698492928.0, "42585": 12698492928.0, "42590": 12698492928.0, "42595": 12698492928.0, "42600": 12698492928.0, "42605": 12698492928.0, "42610": 12698492928.0, "42615": 12698492928.0, "42620": 12698492928.0, "42625": 12698492928.0, "42630": 12698492928.0, "42635": 12698492928.0, "42640": 12698492928.0, "42645": 12698492928.0, "42650": 12698492928.0, "42655": 12698492928.0, "42660": 12698492928.0, "42665": 12698492928.0, "42670": 12698492928.0, "42675": 12698492928.0, "42680": 12698492928.0, "42685": 12698492928.0, "42690": 12698492928.0, "42695": 12698492928.0, "42700": 12698492928.0, "42705": 12698492928.0, "42710": 12698492928.0, "42715": 12698492928.0, "42720": 12698492928.0, "42725": 12698492928.0, "42730": 12698492928.0, "42735": 12698492928.0, "42740": 12698492928.0, "42745": 12698492928.0, "42750": 12698492928.0, "42755": 12698492928.0, "42760": 12698492928.0, "42765": 12698492928.0, "42770": 12698492928.0, "42775": 12698492928.0, "42780": 12698492928.0, "42785": 12698492928.0, "42790": 12698492928.0, "42795": 12698492928.0, "42800": 12698492928.0, "42805": 12698492928.0, "42810": 12698492928.0, "42815": 12698492928.0, "42820": 12698492928.0, "42825": 12698492928.0, "42830": 12698492928.0, "42835": 12698492928.0, "42840": 12698492928.0, "42845": 12698492928.0, "42850": 12698492928.0, "42855": 12698492928.0, "42860": 12698492928.0, "42865": 12698492928.0, "42870": 12698492928.0, "42875": 12698492928.0, "42880": 12698492928.0, "42885": 12698492928.0, "42890": 12698492928.0, "42895": 12698492928.0, "42900": 12698492928.0, "42905": 12698492928.0, "42910": 12698492928.0, "42915": 12698492928.0, "42920": 12698492928.0, "42925": 12698492928.0, "42930": 12698492928.0, "42935": 12698492928.0, "42940": 12698492928.0, "42945": 12698492928.0, "42950": 12698492928.0, "42955": 12698492928.0, "42960": 12698492928.0, "42965": 12698492928.0, "42970": 12698492928.0, "42975": 12698492928.0, "42980": 12698492928.0, "42985": 12698492928.0, "42990": 12698492928.0, "42995": 12698492928.0, "43000": 12698492928.0, "43005": 12698492928.0, "43010": 12698492928.0, "43015": 12698492928.0, "43020": 12698492928.0, "43025": 12698492928.0, "43030": 12698492928.0, "43035": 12698492928.0, "43040": 12698492928.0, "43045": 12698492928.0, "43050": 12698492928.0, "43055": 12698492928.0, "43060": 12698492928.0, "43065": 12698492928.0, "43070": 12698492928.0, "43075": 12698492928.0, "43080": 12698492928.0, "43085": 12698492928.0, "43090": 12698492928.0, "43095": 12698492928.0, "43100": 12698492928.0, "43105": 12698492928.0, "43110": 12698492928.0, "43115": 12698492928.0, "43120": 12698492928.0, "43125": 12698492928.0, "43130": 12698492928.0, "43135": 12698492928.0, "43140": 12698492928.0, "43145": 12698492928.0, "43150": 12698492928.0, "43155": 12698492928.0, "43160": 12698492928.0, "43165": 12698492928.0, "43170": 12698492928.0, "43175": 12698492928.0, "43180": 12698492928.0, "43185": 12698492928.0, "43190": 12698492928.0, "43195": 12698492928.0, "43200": 12698492928.0, "43205": 12698492928.0, "43210": 12698492928.0, "43215": 12698492928.0, "43220": 12698492928.0, "43225": 12698492928.0, "43230": 12698492928.0, "43235": 12698492928.0, "43240": 12698492928.0, "43245": 12698492928.0, "43250": 12698492928.0, "43255": 12698492928.0, "43260": 12698492928.0, "43265": 12698492928.0, "43270": 12698492928.0, "43275": 12698492928.0, "43280": 12698492928.0, "43285": 12698492928.0, "43290": 12698492928.0, "43295": 12698492928.0, "43300": 12698492928.0, "43305": 12698492928.0, "43310": 12698492928.0, "43315": 12698492928.0, "43320": 12698492928.0, "43325": 12698492928.0, "43330": 12698492928.0, "43335": 12698492928.0, "43340": 12698492928.0, "43345": 12698492928.0, "43350": 12698492928.0, "43355": 12698492928.0, "43360": 12698492928.0, "43365": 12698492928.0, "43370": 12698492928.0, "43375": 12698492928.0, "43380": 12698492928.0, "43385": 12698492928.0, "43390": 12698492928.0, "43395": 12698492928.0, "43400": 12698492928.0, "43405": 12698492928.0, "43410": 12698492928.0, "43415": 12698492928.0, "43420": 12698492928.0, "43425": 12698492928.0, "43430": 12698492928.0, "43435": 12698492928.0, "43440": 12698492928.0, "43445": 12698492928.0, "43450": 12698492928.0, "43455": 12698492928.0, "43460": 12698492928.0, "43465": 12698492928.0, "43470": 12698492928.0, "43475": 12698492928.0, "43480": 12698492928.0, "43485": 12698492928.0, "43490": 12698492928.0, "43495": 12698492928.0, "43500": 12698492928.0, "43505": 12698492928.0, "43510": 12698492928.0, "43515": 12698492928.0, "43520": 12698492928.0, "43525": 12698492928.0, "43530": 12698492928.0, "43535": 12698492928.0, "43540": 12698492928.0, "43545": 12698492928.0, "43550": 12698492928.0, "43555": 12698492928.0, "43560": 12698492928.0, "43565": 12698492928.0, "43570": 12698492928.0, "43575": 12698492928.0, "43580": 12698492928.0, "43585": 12698492928.0, "43590": 12698492928.0, "43595": 12698492928.0, "43600": 12698492928.0, "43605": 12698492928.0, "43610": 12698492928.0, "43615": 12698492928.0, "43620": 12698492928.0, "43625": 12698492928.0, "43630": 12698492928.0, "43635": 12698492928.0, "43640": 12698492928.0, "43645": 12698492928.0, "43650": 12698492928.0, "43655": 12698492928.0, "43660": 12698492928.0, "43665": 12698492928.0, "43670": 12698492928.0, "43675": 12698492928.0, "43680": 12698492928.0, "43685": 12698492928.0, "43690": 12698492928.0, "43695": 12698492928.0, "43700": 12698492928.0, "43705": 12698492928.0, "43710": 12698492928.0, "43715": 12698492928.0, "43720": 12698492928.0, "43725": 12698492928.0, "43730": 12698492928.0, "43735": 12698492928.0, "43740": 12698492928.0, "43745": 12698492928.0, "43750": 12698492928.0, "43755": 12698492928.0, "43760": 12698492928.0, "43765": 12698492928.0, "43770": 12698492928.0, "43775": 12698492928.0, "43780": 12698492928.0, "43785": 12698492928.0, "43790": 12698492928.0, "43795": 12698492928.0, "43800": 12698492928.0, "43805": 12698492928.0, "43810": 12698492928.0, "43815": 12698492928.0, "43820": 12698492928.0, "43825": 12698492928.0, "43830": 12698492928.0, "43835": 12698492928.0, "43840": 12698492928.0, "43845": 12698492928.0, "43850": 12698492928.0, "43855": 12698492928.0, "43860": 12698492928.0, "43865": 12698492928.0, "43870": 12698492928.0, "43875": 12698492928.0, "43880": 12698492928.0, "43885": 12698492928.0, "43890": 12698492928.0, "43895": 12698492928.0, "43900": 12698492928.0, "43905": 12698492928.0, "43910": 12698492928.0, "43915": 12698492928.0, "43920": 12698492928.0, "43925": 12698492928.0, "43930": 12698492928.0, "43935": 12698492928.0, "43940": 12698492928.0, "43945": 12698492928.0, "43950": 12698492928.0, "43955": 12698492928.0, "43960": 12698492928.0, "43965": 12698492928.0, "43970": 12698492928.0, "43975": 12698492928.0, "43980": 12698492928.0, "43985": 12698492928.0, "43990": 12698492928.0, "43995": 12698492928.0, "44000": 12698492928.0, "44005": 12698492928.0, "44010": 12698492928.0, "44015": 12698492928.0, "44020": 12698492928.0, "44025": 12698492928.0, "44030": 12698492928.0, "44035": 12698492928.0, "44040": 12698492928.0, "44045": 12698492928.0, "44050": 12698492928.0, "44055": 12698492928.0, "44060": 12698492928.0, "44065": 12698492928.0, "44070": 12698492928.0, "44075": 12698492928.0, "44080": 12698492928.0, "44085": 12698492928.0, "44090": 12698492928.0, "44095": 12698492928.0, "44100": 12698492928.0, "44105": 12698492928.0, "44110": 12698492928.0, "44115": 12698492928.0, "44120": 12698492928.0, "44125": 12698492928.0, "44130": 12698492928.0, "44135": 12698492928.0, "44140": 12698492928.0, "44145": 12698492928.0, "44150": 12698492928.0, "44155": 12698492928.0, "44160": 12698492928.0, "44165": 12698492928.0, "44170": 12698492928.0, "44175": 12698492928.0, "44180": 12698492928.0, "44185": 12698492928.0, "44190": 12698492928.0, "44195": 12698492928.0, "44200": 12698492928.0, "44205": 12698492928.0, "44210": 12698492928.0, "44215": 12698492928.0, "44220": 12698492928.0, "44225": 12698492928.0, "44230": 12698492928.0, "44235": 12698492928.0, "44240": 12698492928.0, "44245": 12698492928.0, "44250": 12698492928.0, "44255": 12698492928.0, "44260": 12698492928.0, "44265": 12698492928.0, "44270": 12698492928.0, "44275": 12698492928.0, "44280": 12698492928.0, "44285": 12698492928.0, "44290": 12698492928.0, "44295": 12698492928.0, "44300": 12698492928.0, "44305": 12698492928.0, "44310": 12698492928.0, "44315": 12698492928.0, "44320": 12698492928.0, "44325": 12698492928.0, "44330": 12698492928.0, "44335": 12698492928.0, "44340": 12698492928.0, "44345": 12698492928.0, "44350": 12698492928.0, "44355": 12698492928.0, "44360": 12698492928.0, "44365": 12698492928.0, "44370": 12698492928.0, "44375": 12698492928.0, "44380": 12698492928.0, "44385": 12698492928.0, "44390": 12698492928.0, "44395": 12698492928.0, "44400": 12698492928.0, "44405": 12698492928.0, "44410": 12698492928.0, "44415": 12698492928.0, "44420": 12698492928.0, "44425": 12698492928.0, "44430": 12698492928.0, "44435": 12698492928.0, "44440": 12698492928.0, "44445": 12698492928.0, "44450": 12698492928.0, "44455": 12698492928.0, "44460": 12698492928.0, "44465": 12698492928.0, "44470": 12698492928.0, "44475": 12698492928.0, "44480": 12698492928.0, "44485": 12698492928.0, "44490": 12698492928.0, "44495": 12698492928.0, "44500": 12698492928.0, "44505": 12698492928.0, "44510": 12698492928.0, "44515": 12698492928.0, "44520": 12698492928.0, "44525": 12698492928.0, "44530": 12698492928.0, "44535": 12698492928.0, "44540": 12698492928.0, "44545": 12698492928.0, "44550": 12698492928.0, "44555": 12698492928.0, "44560": 12698492928.0, "44565": 12698492928.0, "44570": 12698492928.0, "44575": 12698492928.0, "44580": 12698492928.0, "44585": 12698492928.0, "44590": 12698492928.0, "44595": 12698492928.0, "44600": 12698492928.0, "44605": 12698492928.0, "44610": 12698492928.0, "44615": 12698492928.0, "44620": 12698492928.0, "44625": 12698492928.0, "44630": 12698492928.0, "44635": 12698492928.0, "44640": 12698492928.0, "44645": 12698492928.0, "44650": 12698492928.0, "44655": 12698492928.0, "44660": 12698492928.0, "44665": 12698492928.0, "44670": 12698492928.0, "44675": 12698492928.0, "44680": 12698492928.0, "44685": 12698492928.0, "44690": 12698492928.0, "44695": 12698492928.0, "44700": 12698492928.0, "44705": 12698492928.0, "44710": 12698492928.0, "44715": 12698492928.0, "44720": 12698492928.0, "44725": 12698492928.0, "44730": 12698492928.0, "44735": 12698492928.0, "44740": 12698492928.0, "44745": 12698492928.0, "44750": 12698492928.0, "44755": 12698492928.0, "44760": 12698492928.0, "44765": 12698492928.0, "44770": 12698492928.0, "44775": 12698492928.0, "44780": 12698492928.0, "44785": 12698492928.0, "44790": 12698492928.0, "44795": 12698492928.0, "44800": 12698492928.0, "44805": 12698492928.0, "44810": 12698492928.0, "44815": 12698492928.0, "44820": 12698492928.0, "44825": 12698492928.0, "44830": 12698492928.0, "44835": 12698492928.0, "44840": 12698492928.0, "44845": 12698492928.0, "44850": 12698492928.0, "44855": 12698492928.0, "44860": 12698492928.0, "44865": 12698492928.0, "44870": 12698492928.0, "44875": 12698492928.0, "44880": 12698492928.0, "44885": 12698492928.0, "44890": 12698492928.0, "44895": 12698492928.0, "44900": 12698492928.0, "44905": 12698492928.0, "44910": 12698492928.0, "44915": 12698492928.0, "44920": 12698492928.0, "44925": 12698492928.0, "44930": 12698492928.0, "44935": 12698492928.0, "44940": 12698492928.0, "44945": 12698492928.0, "44950": 12698492928.0, "44955": 12698492928.0, "44960": 12698492928.0, "44965": 12698492928.0, "44970": 12698492928.0, "44975": 12698492928.0, "44980": 12698492928.0, "44985": 12698492928.0, "44990": 12698492928.0, "44995": 12698492928.0, "45000": 12698492928.0, "45005": 12698492928.0, "45010": 12698492928.0, "45015": 12698492928.0, "45020": 12698492928.0, "45025": 12698492928.0, "45030": 12698492928.0, "45035": 12698492928.0, "45040": 12698492928.0, "45045": 12698492928.0, "45050": 12698492928.0, "45055": 12698492928.0, "45060": 12698492928.0, "45065": 12698492928.0, "45070": 12698492928.0, "45075": 12698492928.0, "45080": 12698492928.0, "45085": 12698492928.0, "45090": 12698492928.0, "45095": 12698492928.0, "45100": 12698492928.0, "45105": 12698492928.0, "45110": 12698492928.0, "45115": 12698492928.0, "45120": 12698492928.0, "45125": 12698492928.0, "45130": 12698492928.0, "45135": 12698492928.0, "45140": 12698492928.0, "45145": 12698492928.0, "45150": 12698492928.0, "45155": 12698492928.0, "45160": 12698492928.0, "45165": 12698492928.0, "45170": 12698492928.0, "45175": 12698492928.0, "45180": 12698492928.0, "45185": 12698492928.0, "45190": 12698492928.0, "45195": 12698492928.0, "45200": 12698492928.0, "45205": 12698492928.0, "45210": 12698492928.0, "45215": 12698492928.0, "45220": 12698492928.0, "45225": 12698492928.0, "45230": 12698492928.0, "45235": 12698492928.0, "45240": 12698492928.0, "45245": 12698492928.0, "45250": 12698492928.0, "45255": 12698492928.0, "45260": 12698492928.0, "45265": 12698492928.0, "45270": 12698492928.0, "45275": 12698492928.0, "45280": 12698492928.0, "45285": 12698492928.0, "45290": 12698492928.0, "45295": 12698492928.0, "45300": 12698492928.0, "45305": 12698492928.0, "45310": 12698492928.0, "45315": 12698492928.0, "45320": 12698492928.0, "45325": 12698492928.0, "45330": 12698492928.0, "45335": 12698492928.0, "45340": 12698492928.0, "45345": 12698492928.0, "45350": 12698492928.0, "45355": 12698492928.0, "45360": 12698492928.0, "45365": 12698492928.0, "45370": 12698492928.0, "45375": 12698492928.0, "45380": 12698492928.0, "45385": 12698492928.0, "45390": 12698492928.0, "45395": 12698492928.0, "45400": 12698492928.0, "45405": 12698492928.0, "45410": 12698492928.0, "45415": 12698492928.0, "45420": 12698492928.0, "45425": 12698492928.0, "45430": 12698492928.0, "45435": 12698492928.0, "45440": 12698492928.0, "45445": 12698492928.0, "45450": 12698492928.0, "45455": 12698492928.0, "45460": 12698492928.0, "45465": 12698492928.0, "45470": 12698492928.0, "45475": 12698492928.0, "45480": 12698492928.0, "45485": 12698492928.0, "45490": 12698492928.0, "45495": 12698492928.0, "45500": 12698492928.0, "45505": 12698492928.0, "45510": 12698492928.0, "45515": 12698492928.0, "45520": 12698492928.0, "45525": 12698492928.0, "45530": 12698492928.0, "45535": 12698492928.0, "45540": 12698492928.0, "45545": 12698492928.0, "45550": 12698492928.0, "45555": 12698492928.0, "45560": 12698492928.0, "45565": 12698492928.0, "45570": 12698492928.0, "45575": 12698492928.0, "45580": 12698492928.0, "45585": 12698492928.0, "45590": 12698492928.0, "45595": 12698492928.0, "45600": 12698492928.0, "45605": 12698492928.0, "45610": 12698492928.0, "45615": 12698492928.0, "45620": 12698492928.0, "45625": 12698492928.0, "45630": 12698492928.0, "45635": 12698492928.0, "45640": 12698492928.0, "45645": 12698492928.0, "45650": 12698492928.0, "45655": 12698492928.0, "45660": 12698492928.0, "45665": 12698492928.0, "45670": 12698492928.0, "45675": 12698492928.0, "45680": 12698492928.0, "45685": 12698492928.0, "45690": 12698492928.0, "45695": 12698492928.0, "45700": 12698492928.0, "45705": 12698492928.0, "45710": 12698492928.0, "45715": 12698492928.0, "45720": 12698492928.0, "45725": 12698492928.0, "45730": 12698492928.0, "45735": 12698492928.0, "45740": 12698492928.0, "45745": 12698492928.0, "45750": 12698492928.0, "45755": 12698492928.0, "45760": 12698492928.0, "45765": 12698492928.0, "45770": 12698492928.0, "45775": 12698492928.0, "45780": 12698492928.0, "45785": 12698492928.0, "45790": 12698492928.0, "45795": 12698492928.0, "45800": 12698492928.0, "45805": 12698492928.0, "45810": 12698492928.0, "45815": 12698492928.0, "45820": 12698492928.0, "45825": 12698492928.0, "45830": 12698492928.0, "45835": 12698492928.0, "45840": 12698492928.0, "45845": 12698492928.0, "45850": 12698492928.0, "45855": 12698492928.0, "45860": 12698492928.0, "45865": 12698492928.0, "45870": 12698492928.0, "45875": 12698492928.0, "45880": 12698492928.0, "45885": 12698492928.0, "45890": 12698492928.0, "45895": 12698492928.0, "45900": 12698492928.0, "45905": 12698492928.0, "45910": 12698492928.0, "45915": 12698492928.0, "45920": 12698492928.0, "45925": 12698492928.0, "45930": 12698492928.0, "45935": 12698492928.0, "45940": 12698492928.0, "45945": 12698492928.0, "45950": 12698492928.0, "45955": 12698492928.0, "45960": 12698492928.0, "45965": 12698492928.0, "45970": 12698492928.0, "45975": 12698492928.0, "45980": 12698492928.0, "45985": 12698492928.0, "45990": 12698492928.0, "45995": 12698492928.0, "46000": 12698492928.0, "46005": 12698492928.0, "46010": 12698492928.0, "46015": 12698492928.0, "46020": 12698492928.0, "46025": 12698492928.0, "46030": 12698492928.0, "46035": 12698492928.0, "46040": 12698492928.0, "46045": 12698492928.0, "46050": 12698492928.0, "46055": 12698492928.0, "46060": 12698492928.0, "46065": 12698492928.0, "46070": 12698492928.0, "46075": 12698492928.0, "46080": 12698492928.0, "46085": 12698492928.0, "46090": 12698492928.0, "46095": 12698492928.0, "46100": 12698492928.0, "46105": 12698492928.0, "46110": 12698492928.0, "46115": 12698492928.0, "46120": 12698492928.0, "46125": 12698492928.0, "46130": 12698492928.0, "46135": 12698492928.0, "46140": 12698492928.0, "46145": 12698492928.0, "46150": 12698492928.0, "46155": 12698492928.0, "46160": 12698492928.0, "46165": 12698492928.0, "46170": 12698492928.0, "46175": 12698492928.0, "46180": 12698492928.0, "46185": 12698492928.0, "46190": 12698492928.0, "46195": 12698492928.0, "46200": 12698492928.0, "46205": 12698492928.0, "46210": 12698492928.0, "46215": 12698492928.0, "46220": 12698492928.0, "46225": 12698492928.0, "46230": 12698492928.0, "46235": 12698492928.0, "46240": 12698492928.0, "46245": 12698492928.0, "46250": 12698492928.0, "46255": 12698492928.0, "46260": 12698492928.0, "46265": 12698492928.0, "46270": 12698492928.0, "46275": 12698492928.0, "46280": 12698492928.0, "46285": 12698492928.0, "46290": 12698492928.0, "46295": 12698492928.0, "46300": 12698492928.0, "46305": 12698492928.0, "46310": 12698492928.0, "46315": 12698492928.0, "46320": 12698492928.0, "46325": 12698492928.0, "46330": 12698492928.0, "46335": 12698492928.0, "46340": 12698492928.0, "46345": 12698492928.0, "46350": 12698492928.0, "46355": 12698492928.0, "46360": 12698492928.0, "46365": 12698492928.0, "46370": 12698492928.0, "46375": 12698492928.0, "46380": 12698492928.0, "46385": 12698492928.0, "46390": 12698492928.0, "46395": 12698492928.0, "46400": 12698492928.0, "46405": 12698492928.0, "46410": 12698492928.0, "46415": 12698492928.0, "46420": 12698492928.0, "46425": 12698492928.0, "46430": 12698492928.0, "46435": 12698492928.0, "46440": 12698492928.0, "46445": 12698492928.0, "46450": 12698492928.0, "46455": 12698492928.0, "46460": 12698492928.0, "46465": 12698492928.0, "46470": 12698492928.0, "46475": 12698492928.0, "46480": 12698492928.0, "46485": 12698492928.0, "46490": 12698492928.0, "46495": 12698492928.0, "46500": 12698492928.0, "46505": 12698492928.0, "46510": 12698492928.0, "46515": 12698492928.0, "46520": 12698492928.0, "46525": 12698492928.0, "46530": 12698492928.0, "46535": 12698492928.0, "46540": 12698492928.0, "46545": 12698492928.0, "46550": 12698492928.0, "46555": 12698492928.0, "46560": 12698492928.0, "46565": 12698492928.0, "46570": 12698492928.0, "46575": 12698492928.0, "46580": 12698492928.0, "46585": 12698492928.0, "46590": 12698492928.0, "46595": 12698492928.0, "46600": 12698492928.0, "46605": 12698492928.0, "46610": 12698492928.0, "46615": 12698492928.0, "46620": 12698492928.0, "46625": 12698492928.0, "46630": 12698492928.0, "46635": 12698492928.0, "46640": 12698492928.0, "46645": 12698492928.0, "46650": 12698492928.0, "46655": 12698492928.0, "46660": 12698492928.0, "46665": 12698492928.0, "46670": 12698492928.0, "46675": 12698492928.0, "46680": 12698492928.0, "46685": 12698492928.0, "46690": 12698492928.0, "46695": 12698492928.0, "46700": 12698492928.0, "46705": 12698492928.0, "46710": 12698492928.0, "46715": 12698492928.0, "46720": 12698492928.0, "46725": 12698492928.0, "46730": 12698492928.0, "46735": 12698492928.0, "46740": 12698492928.0, "46745": 12698492928.0, "46750": 12698492928.0, "46755": 12698492928.0, "46760": 12698492928.0, "46765": 12698492928.0, "46770": 12698492928.0, "46775": 12698492928.0, "46780": 12698492928.0, "46785": 12698492928.0, "46790": 12698492928.0, "46795": 12698492928.0, "46800": 12698492928.0, "46805": 12698492928.0, "46810": 12698492928.0, "46815": 12698492928.0, "46820": 12698492928.0, "46825": 12698492928.0, "46830": 12698492928.0, "46835": 12698492928.0, "46840": 12698492928.0, "46845": 12698492928.0, "46850": 12698492928.0, "46855": 12698492928.0, "46860": 12698492928.0, "46865": 12698492928.0, "46870": 12698492928.0, "46875": 12698492928.0, "46880": 12698492928.0, "46885": 12698492928.0, "46890": 12698492928.0, "46895": 12698492928.0, "46900": 12698492928.0, "46905": 12698492928.0, "46910": 12698492928.0, "46915": 12698492928.0, "46920": 12698492928.0, "46925": 12698492928.0, "46930": 12698492928.0, "46935": 12698492928.0, "46940": 12698492928.0, "46945": 12698492928.0, "46950": 12698492928.0, "46955": 12698492928.0, "46960": 12698492928.0, "46965": 12698492928.0, "46970": 12698492928.0, "46975": 12698492928.0, "46980": 12698492928.0, "46985": 12698492928.0, "46990": 12698492928.0, "46995": 12698492928.0, "47000": 12698492928.0, "47005": 12698492928.0, "47010": 12698492928.0, "47015": 12698492928.0, "47020": 12698492928.0, "47025": 12698492928.0, "47030": 12698492928.0, "47035": 12698492928.0, "47040": 12698492928.0, "47045": 12698492928.0, "47050": 12698492928.0, "47055": 12698492928.0, "47060": 12698492928.0, "47065": 12698492928.0, "47070": 12698492928.0, "47075": 12698492928.0, "47080": 12698492928.0, "47085": 12698492928.0, "47090": 12698492928.0, "47095": 12698492928.0, "47100": 12698492928.0, "47105": 12698492928.0, "47110": 12698492928.0, "47115": 12698492928.0, "47120": 12698492928.0, "47125": 12698492928.0, "47130": 12698492928.0, "47135": 12698492928.0, "47140": 12698492928.0, "47145": 12698492928.0, "47150": 12698492928.0, "47155": 12698492928.0, "47160": 12698492928.0, "47165": 12698492928.0, "47170": 12698492928.0, "47175": 12698492928.0, "47180": 12698492928.0, "47185": 12698492928.0, "47190": 12698492928.0, "47195": 12698492928.0, "47200": 12698492928.0, "47205": 12698492928.0, "47210": 12698492928.0, "47215": 12698492928.0, "47220": 12698492928.0, "47225": 12698492928.0, "47230": 12698492928.0, "47235": 12698492928.0, "47240": 12698492928.0, "47245": 12698492928.0, "47250": 12698492928.0, "47255": 12698492928.0, "47260": 12698492928.0, "47265": 12698492928.0, "47270": 12698492928.0, "47275": 12698492928.0, "47280": 12698492928.0, "47285": 12698492928.0, "47290": 12698492928.0, "47295": 12698492928.0, "47300": 12698492928.0, "47305": 12698492928.0, "47310": 12698492928.0, "47315": 12698492928.0, "47320": 12698492928.0, "47325": 12698492928.0, "47330": 12698492928.0, "47335": 12698492928.0, "47340": 12698492928.0, "47345": 12698492928.0, "47350": 12698492928.0, "47355": 12698492928.0, "47360": 12698492928.0, "47365": 12698492928.0, "47370": 12698492928.0, "47375": 12698492928.0, "47380": 12698492928.0, "47385": 12698492928.0, "47390": 12698492928.0, "47395": 12698492928.0, "47400": 12698492928.0, "47405": 12698492928.0, "47410": 12698492928.0, "47415": 12698492928.0, "47420": 12698492928.0, "47425": 12698492928.0, "47430": 12698492928.0, "47435": 12698492928.0, "47440": 12698492928.0, "47445": 12698492928.0, "47450": 12698492928.0, "47455": 12698492928.0, "47460": 12698492928.0, "47465": 12698492928.0, "47470": 12698492928.0, "47475": 12698492928.0, "47480": 12698492928.0, "47485": 12698492928.0, "47490": 12698492928.0, "47495": 12698492928.0, "47500": 12698492928.0, "47505": 12698492928.0, "47510": 12698492928.0, "47515": 12698492928.0, "47520": 12698492928.0, "47525": 12698492928.0, "47530": 12698492928.0, "47535": 12698492928.0, "47540": 12698492928.0, "47545": 12698492928.0, "47550": 12698492928.0, "47555": 12698492928.0, "47560": 12698492928.0, "47565": 12698492928.0, "47570": 12698492928.0, "47575": 12698492928.0, "47580": 12698492928.0, "47585": 12698492928.0, "47590": 12698492928.0, "47595": 12698492928.0, "47600": 12698492928.0, "47605": 12698492928.0, "47610": 12698492928.0, "47615": 12698492928.0, "47620": 12698492928.0, "47625": 12698492928.0, "47630": 12698492928.0, "47635": 12698492928.0, "47640": 12698492928.0, "47645": 12698492928.0, "47650": 12698492928.0, "47655": 12698492928.0, "47660": 12698492928.0, "47665": 12698492928.0, "47670": 12698492928.0, "47675": 12698492928.0, "47680": 12698492928.0, "47685": 12698492928.0, "47690": 12698492928.0, "47695": 12698492928.0, "47700": 12698492928.0, "47705": 12698492928.0, "47710": 12698492928.0, "47715": 12698492928.0, "47720": 12698492928.0, "47725": 12698492928.0, "47730": 12698492928.0, "47735": 12698492928.0, "47740": 12698492928.0, "47745": 12698492928.0, "47750": 12698492928.0, "47755": 12698492928.0, "47760": 12698492928.0, "47765": 12698492928.0, "47770": 12698492928.0, "47775": 12698492928.0, "47780": 12698492928.0, "47785": 12698492928.0, "47790": 12698492928.0, "47795": 12698492928.0, "47800": 12698492928.0, "47805": 12698492928.0, "47810": 12698492928.0, "47815": 12698492928.0, "47820": 12698492928.0, "47825": 12698492928.0, "47830": 12698492928.0, "47835": 12698492928.0, "47840": 12698492928.0, "47845": 12698492928.0, "47850": 12698492928.0, "47855": 12698492928.0, "47860": 12698492928.0, "47865": 12698492928.0, "47870": 12698492928.0, "47875": 12698492928.0, "47880": 12698492928.0, "47885": 12698492928.0, "47890": 12698492928.0, "47895": 12698492928.0, "47900": 12698492928.0, "47905": 12698492928.0, "47910": 12698492928.0, "47915": 12698492928.0, "47920": 12698492928.0, "47925": 12698492928.0, "47930": 12698492928.0, "47935": 12698492928.0, "47940": 12698492928.0, "47945": 12698492928.0, "47950": 12698492928.0, "47955": 12698492928.0, "47960": 12698492928.0, "47965": 12698492928.0, "47970": 12698492928.0, "47975": 12698492928.0, "47980": 12698492928.0, "47985": 12698492928.0, "47990": 12698492928.0, "47995": 12698492928.0, "48000": 12698492928.0, "48005": 12698492928.0, "48010": 12698492928.0, "48015": 12698492928.0, "48020": 12698492928.0, "48025": 12698492928.0, "48030": 12698492928.0, "48035": 12698492928.0, "48040": 12698492928.0, "48045": 12698492928.0, "48050": 12698492928.0, "48055": 12698492928.0, "48060": 12698492928.0, "48065": 12698492928.0, "48070": 12698492928.0, "48075": 12698492928.0, "48080": 12698492928.0, "48085": 12698492928.0, "48090": 12698492928.0, "48095": 12698492928.0, "48100": 12698492928.0, "48105": 12698492928.0, "48110": 12698492928.0, "48115": 12698492928.0, "48120": 12698492928.0, "48125": 12698492928.0, "48130": 12698492928.0, "48135": 12698492928.0, "48140": 12698492928.0, "48145": 12698492928.0, "48150": 12698492928.0, "48155": 12698492928.0, "48160": 12698492928.0, "48165": 12698492928.0, "48170": 12698492928.0, "48175": 12698492928.0, "48180": 12698492928.0, "48185": 12698492928.0, "48190": 12698492928.0, "48195": 12698492928.0, "48200": 12698492928.0, "48205": 12698492928.0, "48210": 12698492928.0, "48215": 12698492928.0, "48220": 12698492928.0, "48225": 12698492928.0, "48230": 12698492928.0, "48235": 12698492928.0, "48240": 12698492928.0, "48245": 12698492928.0, "48250": 12698492928.0, "48255": 12698492928.0, "48260": 12698492928.0, "48265": 12698492928.0, "48270": 12698492928.0, "48275": 12698492928.0, "48280": 12698492928.0, "48285": 12698492928.0, "48290": 12698492928.0, "48295": 12698492928.0, "48300": 12698492928.0, "48305": 12698492928.0, "48310": 12698492928.0, "48315": 12698492928.0, "48320": 12698492928.0, "48325": 12698492928.0, "48330": 12698492928.0, "48335": 12698492928.0, "48340": 12698492928.0, "48345": 12698492928.0, "48350": 12698492928.0, "48355": 12698492928.0, "48360": 12698492928.0, "48365": 12698492928.0, "48370": 12698492928.0, "48375": 12698492928.0, "48380": 12698492928.0, "48385": 12698492928.0, "48390": 12698492928.0, "48395": 12698492928.0, "48400": 12698492928.0, "48405": 12698492928.0, "48410": 12698492928.0, "48415": 12698492928.0, "48420": 12698492928.0, "48425": 12698492928.0, "48430": 12698492928.0, "48435": 12698492928.0, "48440": 12698492928.0, "48445": 12698492928.0, "48450": 12698492928.0, "48455": 12698492928.0, "48460": 12698492928.0, "48465": 12698492928.0, "48470": 12698492928.0, "48475": 12698492928.0, "48480": 12698492928.0, "48485": 12698492928.0, "48490": 12698492928.0, "48495": 12698492928.0, "48500": 12698492928.0, "48505": 12698492928.0, "48510": 12698492928.0, "48515": 12698492928.0, "48520": 12698492928.0, "48525": 12698492928.0, "48530": 12698492928.0, "48535": 12698492928.0, "48540": 12698492928.0, "48545": 12698492928.0, "48550": 12698492928.0, "48555": 12698492928.0, "48560": 12698492928.0, "48565": 12698492928.0, "48570": 12698492928.0, "48575": 12698492928.0, "48580": 12698492928.0, "48585": 12698492928.0, "48590": 12698492928.0, "48595": 12698492928.0, "48600": 12698492928.0, "48605": 12698492928.0, "48610": 12698492928.0, "48615": 12698492928.0, "48620": 12698492928.0, "48625": 12698492928.0, "48630": 12698492928.0, "48635": 12698492928.0, "48640": 12698492928.0, "48645": 12698492928.0, "48650": 12698492928.0, "48655": 12698492928.0, "48660": 12698492928.0, "48665": 12698492928.0, "48670": 12698492928.0, "48675": 12698492928.0, "48680": 12698492928.0, "48685": 12698492928.0, "48690": 12698492928.0, "48695": 12698492928.0, "48700": 12698492928.0, "48705": 12698492928.0, "48710": 12698492928.0, "48715": 12698492928.0, "48720": 12698492928.0, "48725": 12698492928.0, "48730": 12698492928.0, "48735": 12698492928.0, "48740": 12698492928.0, "48745": 12698492928.0, "48750": 12698492928.0, "48755": 12698492928.0, "48760": 12698492928.0, "48765": 12698492928.0, "48770": 12698492928.0, "48775": 12698492928.0, "48780": 12698492928.0, "48785": 12698492928.0, "48790": 12698492928.0, "48795": 12698492928.0, "48800": 12698492928.0, "48805": 12698492928.0, "48810": 12698492928.0, "48815": 12698492928.0, "48820": 12698492928.0, "48825": 12698492928.0, "48830": 12698492928.0, "48835": 12698492928.0, "48840": 12698492928.0, "48845": 12698492928.0, "48850": 12698492928.0, "48855": 12698492928.0, "48860": 12698492928.0, "48865": 12698492928.0, "48870": 12698492928.0, "48875": 12698492928.0, "48880": 12698492928.0, "48885": 12698492928.0, "48890": 12698492928.0, "48895": 12698492928.0, "48900": 12698492928.0, "48905": 12698492928.0, "48910": 12698492928.0, "48915": 12698492928.0, "48920": 12698492928.0, "48925": 12698492928.0, "48930": 12698492928.0, "48935": 12698492928.0, "48940": 12698492928.0, "48945": 12698492928.0, "48950": 12698492928.0, "48955": 12698492928.0, "48960": 12698492928.0, "48965": 12698492928.0, "48970": 12698492928.0, "48975": 12698492928.0, "48980": 12698492928.0, "48985": 12698492928.0, "48990": 12698492928.0, "48995": 12698492928.0, "49000": 12698492928.0, "49005": 12698492928.0, "49010": 12698492928.0, "49015": 12698492928.0, "49020": 12698492928.0, "49025": 12698492928.0, "49030": 12698492928.0, "49035": 12698492928.0, "49040": 12698492928.0, "49045": 12698492928.0, "49050": 12698492928.0, "49055": 12698492928.0, "49060": 12698492928.0, "49065": 12698492928.0, "49070": 12698492928.0, "49075": 12698492928.0, "49080": 12698492928.0, "49085": 12698492928.0, "49090": 12698492928.0, "49095": 12698492928.0, "49100": 12698492928.0, "49105": 12698492928.0, "49110": 12698492928.0, "49115": 12698492928.0, "49120": 12698492928.0, "49125": 12698492928.0, "49130": 12698492928.0, "49135": 12698492928.0, "49140": 12698492928.0, "49145": 12698492928.0, "49150": 12698492928.0, "49155": 12698492928.0, "49160": 12698492928.0, "49165": 12698492928.0, "49170": 12698492928.0, "49175": 12698492928.0, "49180": 12698492928.0, "49185": 12698492928.0, "49190": 12698492928.0, "49195": 12698492928.0, "49200": 12698492928.0, "49205": 12698492928.0, "49210": 12698492928.0, "49215": 12698492928.0, "49220": 12698492928.0, "49225": 12698492928.0, "49230": 12698492928.0, "49235": 12698492928.0, "49240": 12698492928.0, "49245": 12698492928.0, "49250": 12698492928.0, "49255": 12698492928.0, "49260": 12698492928.0, "49265": 12698492928.0, "49270": 12698492928.0, "49275": 12698492928.0, "49280": 12698492928.0, "49285": 12698492928.0, "49290": 12698492928.0, "49295": 12698492928.0, "49300": 12698492928.0, "49305": 12698492928.0, "49310": 12698492928.0, "49315": 12698492928.0, "49320": 12698492928.0, "49325": 12698492928.0, "49330": 12698492928.0, "49335": 12698492928.0, "49340": 12698492928.0, "49345": 12698492928.0, "49350": 12698492928.0, "49355": 12698492928.0, "49360": 12698492928.0, "49365": 12698492928.0, "49370": 12698492928.0, "49375": 12698492928.0, "49380": 12698492928.0, "49385": 12698492928.0, "49390": 12698492928.0, "49395": 12698492928.0, "49400": 12698492928.0, "49405": 12698492928.0, "49410": 12698492928.0, "49415": 12698492928.0, "49420": 12698492928.0, "49425": 12698492928.0, "49430": 12698492928.0, "49435": 12698492928.0, "49440": 12698492928.0, "49445": 12698492928.0, "49450": 12698492928.0, "49455": 12698492928.0, "49460": 12698492928.0, "49465": 12698492928.0, "49470": 12698492928.0, "49475": 12698492928.0, "49480": 12698492928.0, "49485": 12698492928.0, "49490": 12698492928.0, "49495": 12698492928.0, "49500": 12698492928.0, "49505": 12698492928.0, "49510": 12698492928.0, "49515": 12698492928.0, "49520": 12698492928.0, "49525": 12698492928.0, "49530": 12698492928.0, "49535": 12698492928.0, "49540": 12698492928.0, "49545": 12698492928.0, "49550": 12698492928.0, "49555": 12698492928.0, "49560": 12698492928.0, "49565": 12698492928.0, "49570": 12698492928.0, "49575": 12698492928.0, "49580": 12698492928.0, "49585": 12698492928.0, "49590": 12698492928.0, "49595": 12698492928.0, "49600": 12698492928.0, "49605": 12698492928.0, "49610": 12698492928.0, "49615": 12698492928.0, "49620": 12698492928.0, "49625": 12698492928.0, "49630": 12698492928.0, "49635": 12698492928.0, "49640": 12698492928.0, "49645": 12698492928.0, "49650": 12698492928.0, "49655": 12698492928.0, "49660": 12698492928.0, "49665": 12698492928.0, "49670": 12698492928.0, "49675": 12698492928.0, "49680": 12698492928.0, "49685": 12698492928.0, "49690": 12698492928.0, "49695": 12698492928.0, "49700": 12698492928.0, "49705": 12698492928.0, "49710": 12698492928.0, "49715": 12698492928.0, "49720": 12698492928.0, "49725": 12698492928.0, "49730": 12698492928.0, "49735": 12698492928.0, "49740": 12698492928.0, "49745": 12698492928.0, "49750": 12698492928.0, "49755": 12698492928.0, "49760": 12698492928.0, "49765": 12698492928.0, "49770": 12698492928.0, "49775": 12698492928.0, "49780": 12698492928.0, "49785": 12698492928.0, "49790": 12698492928.0, "49795": 12698492928.0, "49800": 12698492928.0, "49805": 12698492928.0, "49810": 12698492928.0, "49815": 12698492928.0, "49820": 12698492928.0, "49825": 12698492928.0, "49830": 12698492928.0, "49835": 12698492928.0, "49840": 12698492928.0, "49845": 12698492928.0, "49850": 12698492928.0, "49855": 12698492928.0, "49860": 12698492928.0, "49865": 12698492928.0, "49870": 12698492928.0, "49875": 12698492928.0, "49880": 12698492928.0, "49885": 12698492928.0, "49890": 12698492928.0, "49895": 12698492928.0, "49900": 12698492928.0, "49905": 12698492928.0, "49910": 12698492928.0, "49915": 12698492928.0, "49920": 12698492928.0, "49925": 12698492928.0, "49930": 12698492928.0, "49935": 12698492928.0, "49940": 12698492928.0, "49945": 12698492928.0, "49950": 12698492928.0, "49955": 12698492928.0, "49960": 12698492928.0, "49965": 12698492928.0, "49970": 12698492928.0, "49975": 12698492928.0, "49980": 12698492928.0, "49985": 12698492928.0, "49990": 12698492928.0, "49995": 12698492928.0, "50000": 12698492928.0, "50005": 12698492928.0, "50010": 12698492928.0, "50015": 12698492928.0, "50020": 12698492928.0, "50025": 12698492928.0, "50030": 12698492928.0, "50035": 12698492928.0, "50040": 12698492928.0, "50045": 12698492928.0, "50050": 12698492928.0, "50055": 12698492928.0, "50060": 12698492928.0, "50065": 12698492928.0, "50070": 12698492928.0, "50075": 12698492928.0, "50080": 12698492928.0, "50085": 12698492928.0, "50090": 12698492928.0, "50095": 12698492928.0, "50100": 12698492928.0, "50105": 12698492928.0, "50110": 12698492928.0, "50115": 12698492928.0, "50120": 12698492928.0, "50125": 12698492928.0, "50130": 12698492928.0, "50135": 12698492928.0, "50140": 12698492928.0, "50145": 12698492928.0, "50150": 12698492928.0, "50155": 12698492928.0, "50160": 12698492928.0, "50165": 12698492928.0, "50170": 12698492928.0, "50175": 12698492928.0, "50180": 12698492928.0, "50185": 12698492928.0, "50190": 12698492928.0, "50195": 12698492928.0, "50200": 12698492928.0, "50205": 12698492928.0, "50210": 12698492928.0, "50215": 12698492928.0, "50220": 12698492928.0, "50225": 12698492928.0, "50230": 12698492928.0, "50235": 12698492928.0, "50240": 12698492928.0, "50245": 12698492928.0, "50250": 12698492928.0, "50255": 12698492928.0, "50260": 12698492928.0, "50265": 12698492928.0, "50270": 12698492928.0, "50275": 12698492928.0, "50280": 12698492928.0, "50285": 12698492928.0, "50290": 12698492928.0, "50295": 12698492928.0, "50300": 12698492928.0, "50305": 12698492928.0, "50310": 12698492928.0, "50315": 12698492928.0, "50320": 12698492928.0, "50325": 12698492928.0, "50330": 12698492928.0, "50335": 12698492928.0, "50340": 12698492928.0, "50345": 12698492928.0, "50350": 12698492928.0, "50355": 12698492928.0, "50360": 12698492928.0, "50365": 12698492928.0, "50370": 12698492928.0, "50375": 12698492928.0, "50380": 12698492928.0, "50385": 12698492928.0, "50390": 12698492928.0, "50395": 12698492928.0, "50400": 12698492928.0, "50405": 12698492928.0, "50410": 12698492928.0, "50415": 12698492928.0, "50420": 12698492928.0, "50425": 12698492928.0, "50430": 12698492928.0, "50435": 12698492928.0, "50440": 12698492928.0, "50445": 12698492928.0, "50450": 12698492928.0, "50455": 12698492928.0, "50460": 12698492928.0, "50465": 12698492928.0, "50470": 12698492928.0, "50475": 12698492928.0, "50480": 12698492928.0, "50485": 12698492928.0, "50490": 12698492928.0, "50495": 12698492928.0, "50500": 12698492928.0, "50505": 12698492928.0, "50510": 12698492928.0, "50515": 12698492928.0, "50520": 12698492928.0, "50525": 12698492928.0, "50530": 12698492928.0, "50535": 12698492928.0, "50540": 12698492928.0, "50545": 12698492928.0, "50550": 12698492928.0, "50555": 12698492928.0, "50560": 12698492928.0, "50565": 12698492928.0, "50570": 12698492928.0, "50575": 12698492928.0, "50580": 12698492928.0, "50585": 12698492928.0, "50590": 12698492928.0, "50595": 12698492928.0, "50600": 12698492928.0, "50605": 12698492928.0, "50610": 12698492928.0, "50615": 12698492928.0, "50620": 12698492928.0, "50625": 12698492928.0, "50630": 12698492928.0, "50635": 12698492928.0, "50640": 12698492928.0, "50645": 12698492928.0, "50650": 12698492928.0, "50655": 12698492928.0, "50660": 12698492928.0, "50665": 12698492928.0, "50670": 12698492928.0, "50675": 12698492928.0, "50680": 12698492928.0, "50685": 12698492928.0, "50690": 12698492928.0, "50695": 12698492928.0, "50700": 12698492928.0, "50705": 12698492928.0, "50710": 12698492928.0, "50715": 12698492928.0, "50720": 12698492928.0, "50725": 12698492928.0, "50730": 12698492928.0, "50735": 12698492928.0, "50740": 12698492928.0, "50745": 12698492928.0, "50750": 12698492928.0, "50755": 12698492928.0, "50760": 12698492928.0, "50765": 12698492928.0, "50770": 12698492928.0, "50775": 12698492928.0, "50780": 12698492928.0, "50785": 12698492928.0, "50790": 12698492928.0, "50795": 12698492928.0, "50800": 12698492928.0, "50805": 12698492928.0, "50810": 12698492928.0, "50815": 12698492928.0, "50820": 12698492928.0, "50825": 12698492928.0, "50830": 12698492928.0, "50835": 12698492928.0, "50840": 12698492928.0, "50845": 12698492928.0, "50850": 12698492928.0, "50855": 12698492928.0, "50860": 12698492928.0, "50865": "nan", "50870": "nan", "50875": "nan", "50880": "nan", "50885": "nan", "50890": "nan", "50895": "nan", "50900": "nan", "50905": "nan", "50910": "nan", "50915": "nan", "50920": "nan", "50925": "nan", "50930": "nan", "50935": "nan", "50940": "nan", "50945": "nan", "50950": "nan", "50955": "nan", "50960": "nan", "50965": "nan", "50970": "nan", "50975": "nan", "50980": "nan", "50985": "nan", "50990": "nan", "50995": "nan", "51000": "nan", "51005": "nan", "51010": "nan", "51015": "nan", "51020": "nan", "51025": "nan", "51030": "nan", "51035": "nan", "51040": "nan", "51045": "nan", "51050": "nan", "51055": "nan", "51060": "nan", "51065": "nan", "51070": "nan", "51075": "nan", "51080": "nan", "51085": "nan", "51090": "nan", "51095": "nan", "51100": "nan", "51105": "nan", "51110": "nan", "51115": "nan", "51120": "nan", "51125": "nan", "51130": "nan", "51135": "nan", "51140": "nan", "51145": "nan", "51150": "nan", "51155": "nan", "51160": "nan", "51165": "nan", "51170": "nan", "51175": "nan", "51180": "nan", "51185": "nan", "51190": "nan", "51195": "nan", "51200": "nan", "51205": "nan", "51210": "nan", "51215": "nan", "51220": "nan", "51225": "nan", "51230": "nan", "51235": "nan", "51240": "nan", "51245": "nan", "51250": "nan", "51255": "nan", "51260": "nan", "51265": "nan", "51270": "nan", "51275": "nan", "51280": "nan", "51285": "nan", "51290": "nan", "51295": "nan", "51300": "nan", "51305": "nan", "51310": "nan", "51315": "nan", "51320": "nan", "51325": "nan", "51330": "nan", "51335": "nan", "51340": "nan", "51345": "nan", "51350": "nan", "51355": "nan", "51360": "nan", "51365": "nan", "51370": "nan", "51375": "nan", "51380": "nan", "51385": "nan", "51390": "nan", "51395": "nan", "51400": "nan", "51405": "nan", "51410": "nan", "51415": "nan", "51420": "nan", "51425": "nan", "51430": "nan", "51435": "nan", "51440": "nan", "51445": "nan", "51450": "nan", "51455": "nan", "51460": "nan", "51465": "nan", "51470": "nan", "51475": "nan", "51480": "nan", "51485": "nan", "51490": "nan", "51495": "nan", "51500": "nan", "51505": "nan", "51510": "nan", "51515": "nan", "51520": "nan", "51525": "nan", "51530": "nan", "51535": "nan", "51540": "nan", "51545": "nan", "51550": "nan", "51555": "nan", "51560": "nan", "51565": "nan", "51570": "nan", "51575": "nan", "51580": "nan", "51585": "nan", "51590": "nan", "51595": "nan", "51600": "nan", "51605": "nan", "51610": "nan", "51615": "nan", "51620": "nan", "51625": "nan", "51630": "nan", "51635": "nan", "51640": "nan", "51645": "nan", "51650": "nan", "51655": "nan", "51660": "nan", "51665": "nan", "51670": "nan", "51675": "nan", "51680": "nan", "51685": "nan", "51690": "nan", "51695": "nan", "51700": "nan", "51705": "nan", "51710": "nan", "51715": "nan", "51720": "nan", "51725": "nan", "51730": "nan", "51735": "nan", "51740": "nan", "51745": "nan", "51750": "nan", "51755": "nan", "51760": "nan", "51765": "nan", "51770": "nan", "51775": "nan", "51780": "nan", "51785": "nan", "51790": "nan", "51795": "nan", "51800": "nan", "51805": "nan", "51810": "nan", "51815": "nan", "51820": "nan", "51825": "nan", "51830": "nan", "51835": "nan", "51840": "nan", "51845": "nan", "51850": "nan", "51855": "nan", "51860": "nan", "51865": "nan", "51870": "nan", "51875": "nan", "51880": "nan", "51885": "nan", "51890": "nan", "51895": "nan", "51900": "nan", "51905": "nan", "51910": "nan", "51915": "nan", "51920": "nan", "51925": "nan", "51930": "nan", "51935": "nan", "51940": "nan", "51945": "nan", "51950": "nan", "51955": "nan", "51960": "nan", "51965": "nan", "51970": "nan", "51975": "nan", "51980": "nan", "51985": "nan", "51990": "nan", "51995": "nan", "52000": "nan", "52005": "nan", "52010": "nan", "52015": "nan", "52020": "nan", "52025": "nan", "52030": "nan", "52035": "nan", "52040": "nan", "52045": "nan", "52050": "nan", "52055": "nan", "52060": "nan", "52065": "nan", "52070": "nan", "52075": "nan", "52080": "nan", "52085": "nan", "52090": "nan", "52095": "nan", "52100": "nan", "52105": "nan", "52110": "nan", "52115": "nan", "52120": "nan", "52125": "nan", "52130": "nan", "52135": "nan", "52140": "nan", "52145": "nan", "52150": "nan", "52155": "nan", "52160": "nan", "52165": "nan", "52170": "nan", "52175": "nan", "52180": "nan", "52185": "nan", "52190": "nan", "52195": "nan", "52200": "nan", "52205": "nan", "52210": "nan", "52215": "nan", "52220": "nan", "52225": "nan", "52230": "nan", "52235": "nan", "52240": "nan", "52245": "nan", "52250": "nan", "52255": "nan", "52260": "nan", "52265": "nan", "52270": "nan", "52275": "nan", "52280": "nan", "52285": "nan", "52290": "nan", "52295": "nan", "52300": "nan", "52305": "nan", "52310": "nan", "52315": "nan", "52320": "nan", "52325": "nan", "52330": "nan", "52335": "nan", "52340": "nan", "52345": "nan", "52350": "nan", "52355": "nan", "52360": "nan", "52365": "nan", "52370": "nan", "52375": "nan", "52380": "nan", "52385": "nan", "52390": "nan", "52395": "nan", "52400": "nan", "52405": "nan", "52410": "nan", "52415": "nan", "52420": "nan", "52425": "nan", "52430": "nan", "52435": "nan", "52440": "nan", "52445": "nan", "52450": "nan", "52455": "nan", "52460": "nan", "52465": "nan", "52470": "nan", "52475": "nan", "52480": "nan", "52485": "nan", "52490": "nan", "52495": "nan", "52500": "nan", "52505": "nan", "52510": "nan", "52515": "nan", "52520": "nan", "52525": "nan", "52530": "nan", "52535": "nan", "52540": "nan", "52545": "nan", "52550": "nan", "52555": "nan", "52560": "nan", "52565": "nan", "52570": "nan", "52575": "nan", "52580": "nan", "52585": "nan", "52590": "nan", "52595": "nan", "52600": "nan", "52605": "nan", "52610": "nan", "52615": "nan", "52620": "nan", "52625": "nan", "52630": "nan", "52635": "nan", "52640": "nan", "52645": "nan", "52650": "nan", "52655": "nan", "52660": "nan", "52665": "nan", "52670": "nan", "52675": "nan", "52680": "nan", "52685": "nan", "52690": "nan", "52695": "nan", "52700": "nan", "52705": "nan", "52710": "nan", "52715": "nan", "52720": "nan", "52725": "nan", "52730": "nan", "52735": "nan", "52740": "nan", "52745": "nan", "52750": "nan", "52755": "nan", "52760": "nan", "52765": "nan", "52770": "nan", "52775": "nan", "52780": "nan", "52785": "nan", "52790": "nan", "52795": "nan", "52800": "nan", "52805": "nan", "52810": "nan", "52815": "nan", "52820": "nan", "52825": "nan", "52830": "nan", "52835": "nan", "52840": "nan", "52845": "nan", "52850": "nan", "52855": "nan", "52860": "nan", "52865": "nan", "52870": "nan", "52875": "nan", "52880": "nan", "52885": "nan", "52890": "nan", "52895": "nan", "52900": "nan", "52905": "nan", "52910": "nan", "52915": "nan", "52920": "nan", "52925": "nan", "52930": "nan", "52935": "nan", "52940": "nan", "52945": "nan", "52950": "nan", "52955": "nan", "52960": "nan", "52965": "nan", "52970": "nan", "52975": "nan", "52980": "nan", "52985": "nan", "52990": "nan", "52995": "nan", "53000": "nan", "53005": "nan", "53010": "nan", "53015": "nan", "53020": "nan", "53025": "nan", "53030": "nan", "53035": "nan", "53040": "nan", "53045": "nan", "53050": "nan", "53055": "nan", "53060": "nan", "53065": "nan", "53070": "nan", "53075": "nan", "53080": "nan", "53085": "nan", "53090": "nan", "53095": "nan", "53100": "nan", "53105": "nan", "53110": "nan", "53115": "nan", "53120": "nan", "53125": "nan", "53130": "nan", "53135": "nan", "53140": "nan", "53145": "nan", "53150": "nan", "53155": "nan", "53160": "nan", "53165": "nan", "53170": "nan", "53175": "nan", "53180": "nan", "53185": "nan", "53190": "nan", "53195": "nan", "53200": "nan", "53205": "nan", "53210": "nan", "53215": "nan", "53220": "nan", "53225": "nan", "53230": "nan", "53235": "nan", "53240": "nan", "53245": "nan", "53250": "nan", "53255": "nan", "53260": "nan", "53265": "nan", "53270": "nan", "53275": "nan", "53280": "nan", "53285": "nan", "53290": "nan", "53295": "nan", "53300": "nan", "53305": "nan", "53310": "nan", "53315": "nan", "53320": "nan", "53325": "nan", "53330": "nan", "53335": "nan", "53340": "nan", "53345": "nan", "53350": "nan", "53355": "nan", "53360": "nan", "53365": "nan", "53370": "nan", "53375": "nan", "53380": "nan", "53385": "nan", "53390": "nan", "53395": "nan", "53400": "nan", "53405": "nan", "53410": "nan", "53415": "nan", "53420": "nan", "53425": "nan", "53430": "nan", "53435": "nan", "53440": "nan", "53445": "nan", "53450": "nan", "53455": "nan", "53460": "nan", "53465": "nan", "53470": "nan", "53475": "nan", "53480": "nan", "53485": "nan", "53490": "nan", "53495": "nan", "53500": "nan", "53505": "nan", "53510": "nan", "53515": "nan", "53520": "nan", "53525": "nan", "53530": "nan", "53535": "nan", "53540": "nan", "53545": "nan", "53550": "nan", "53555": "nan", "53560": "nan", "53565": "nan", "53570": "nan", "53575": "nan", "53580": "nan", "53585": "nan", "53590": "nan", "53595": "nan", "53600": "nan", "53605": "nan", "53610": "nan", "53615": "nan", "53620": "nan", "53625": "nan", "53630": "nan", "53635": "nan", "53640": "nan", "53645": "nan", "53650": "nan", "53655": "nan", "53660": "nan", "53665": "nan", "53670": "nan", "53675": "nan", "53680": "nan", "53685": "nan", "53690": "nan", "53695": "nan", "53700": "nan", "53705": "nan", "53710": "nan", "53715": "nan", "53720": "nan", "53725": "nan", "53730": "nan", "53735": "nan", "53740": "nan", "53745": "nan", "53750": "nan", "53755": "nan", "53760": "nan", "53765": "nan", "53770": "nan", "53775": "nan", "53780": "nan", "53785": "nan", "53790": "nan", "53795": "nan", "53800": "nan", "53805": "nan", "53810": "nan", "53815": "nan", "53820": "nan", "53825": "nan", "53830": "nan", "53835": "nan", "53840": "nan", "53845": "nan", "53850": "nan", "53855": "nan", "53860": "nan", "53865": "nan", "53870": "nan", "53875": "nan", "53880": "nan", "53885": "nan", "53890": "nan", "53895": "nan", "53900": "nan", "53905": "nan", "53910": "nan", "53915": "nan", "53920": "nan", "53925": "nan", "53930": "nan", "53935": "nan", "53940": "nan", "53945": "nan", "53950": "nan", "53955": "nan", "53960": "nan", "53965": "nan", "53970": "nan", "53975": "nan", "53980": "nan", "53985": "nan", "53990": "nan", "53995": "nan", "54000": "nan", "54005": "nan", "54010": "nan", "54015": "nan", "54020": "nan", "54025": "nan", "54030": "nan", "54035": "nan", "54040": "nan", "54045": "nan", "54050": "nan", "54055": "nan", "54060": "nan", "54065": "nan", "54070": "nan", "54075": "nan", "54080": "nan", "54085": "nan", "54090": "nan", "54095": "nan", "54100": "nan", "54105": "nan", "54110": "nan", "54115": "nan", "54120": "nan", "54125": "nan", "54130": "nan", "54135": "nan", "54140": "nan", "54145": "nan", "54150": "nan", "54155": "nan", "54160": "nan", "54165": "nan", "54170": "nan", "54175": "nan", "54180": "nan", "54185": "nan", "54190": "nan", "54195": "nan", "54200": "nan", "54205": "nan", "54210": "nan", "54215": "nan", "54220": "nan", "54225": "nan", "54230": "nan", "54235": "nan", "54240": "nan", "54245": "nan", "54250": "nan", "54255": "nan", "54260": "nan", "54265": "nan", "54270": "nan", "54275": "nan", "54280": "nan", "54285": "nan", "54290": "nan", "54295": "nan", "54300": "nan", "54305": "nan", "54310": "nan", "54315": "nan", "54320": "nan", "54325": "nan", "54330": "nan", "54335": "nan", "54340": "nan", "54345": "nan", "54350": "nan", "54355": "nan", "54360": "nan", "54365": "nan", "54370": "nan", "54375": "nan", "54380": "nan", "54385": "nan", "54390": "nan", "54395": "nan", "54400": "nan", "54405": "nan", "54410": "nan", "54415": "nan", "54420": "nan", "54425": "nan", "54430": "nan", "54435": "nan", "54440": "nan", "54445": "nan", "54450": "nan", "54455": "nan", "54460": "nan", "54465": "nan", "54470": "nan", "54475": "nan", "54480": "nan", "54485": "nan", "54490": "nan", "54495": "nan", "54500": "nan", "54505": "nan", "54510": "nan", "54515": "nan", "54520": "nan", "54525": "nan", "54530": "nan", "54535": "nan", "54540": "nan", "54545": "nan", "54550": "nan", "54555": "nan", "54560": "nan", "54565": "nan", "54570": "nan", "54575": "nan", "54580": "nan", "54585": "nan", "54590": "nan", "54595": "nan", "54600": "nan", "54605": "nan", "54610": "nan", "54615": "nan", "54620": "nan", "54625": "nan", "54630": "nan", "54635": "nan", "54640": "nan", "54645": "nan", "54650": "nan", "54655": "nan", "54660": "nan", "54665": "nan", "54670": "nan", "54675": "nan", "54680": "nan", "54685": "nan", "54690": "nan", "54695": "nan", "54700": "nan", "54705": "nan", "54710": "nan", "54715": "nan", "54720": "nan", "54725": "nan", "54730": "nan", "54735": "nan", "54740": "nan", "54745": "nan", "54750": "nan", "54755": "nan", "54760": "nan", "54765": "nan", "54770": "nan", "54775": "nan", "54780": "nan", "54785": "nan", "54790": "nan", "54795": "nan", "54800": "nan", "54805": "nan", "54810": "nan", "54815": "nan", "54820": "nan", "54825": "nan", "54830": "nan", "54835": "nan", "54840": "nan", "54845": "nan", "54850": "nan", "54855": "nan", "54860": "nan", "54865": "nan", "54870": "nan", "54875": "nan", "54880": "nan", "54885": "nan", "54890": "nan", "54895": "nan", "54900": "nan", "54905": "nan", "54910": "nan", "54915": "nan", "54920": "nan", "54925": "nan", "54930": "nan", "54935": "nan", "54940": "nan", "54945": "nan", "54950": "nan", "54955": "nan", "54960": "nan", "54965": "nan", "54970": "nan", "54975": "nan", "54980": "nan", "54985": "nan", "54990": "nan", "54995": "nan", "55000": "nan"}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 55000, "step_interval": 5, "values": {"1": 27893811200.0, "5": 28391901184.0, "10": 28391901184.0, "15": 28391901184.0, "20": 28391901184.0, "25": 28391901184.0, "30": 28391901184.0, "35": 28391901184.0, "40": 28391901184.0, "45": 28391901184.0, "50": 28391901184.0, "55": 28391901184.0, "60": 28391901184.0, "65": 28391901184.0, "70": 28391901184.0, "75": 28391901184.0, "80": 28391901184.0, "85": 28391901184.0, "90": 28391901184.0, "95": 28391901184.0, "100": 28391901184.0, "105": 28391901184.0, "110": 28391901184.0, "115": 28391901184.0, "120": 28391901184.0, "125": 28391901184.0, "130": 28391901184.0, "135": 28391901184.0, "140": 28391901184.0, "145": 28391901184.0, "150": 28391901184.0, "155": 28391901184.0, "160": 28391901184.0, "165": 28391901184.0, "170": 28391901184.0, "175": 28391901184.0, "180": 28391901184.0, "185": 28391901184.0, "190": 28391901184.0, "195": 28391901184.0, "200": 28391901184.0, "205": 28391901184.0, "210": 28391901184.0, "215": 28391901184.0, "220": 28391901184.0, "225": 28391901184.0, "230": 28391901184.0, "235": 28391901184.0, "240": 28391901184.0, "245": 28391901184.0, "250": 28391901184.0, "255": 28391901184.0, "260": 28391901184.0, "265": 28391901184.0, "270": 28391901184.0, "275": 28391901184.0, "280": 28391901184.0, "285": 28391901184.0, "290": 28391901184.0, "295": 28391901184.0, "300": 28391901184.0, "305": 28391901184.0, "310": 28391901184.0, "315": 28391901184.0, "320": 28391901184.0, "325": 28391901184.0, "330": 28391901184.0, "335": 28391901184.0, "340": 28391901184.0, "345": 28391901184.0, "350": 28391901184.0, "355": 28391901184.0, "360": 28391901184.0, "365": 28391901184.0, "370": 28391901184.0, "375": 28391901184.0, "380": 28391901184.0, "385": 28391901184.0, "390": 28391901184.0, "395": 28391901184.0, "400": 28391901184.0, "405": 28391901184.0, "410": 28391901184.0, "415": 28391901184.0, "420": 28391901184.0, "425": 28391901184.0, "430": 28391901184.0, "435": 28391901184.0, "440": 28391901184.0, "445": 28391901184.0, "450": 28391901184.0, "455": 28391901184.0, "460": 28391901184.0, "465": 28391901184.0, "470": 28391901184.0, "475": 28391901184.0, "480": 28391901184.0, "485": 28391901184.0, "490": 28391901184.0, "495": 28391901184.0, "500": 28391901184.0, "505": 28391901184.0, "510": 28391901184.0, "515": 28391901184.0, "520": 28391901184.0, "525": 28391901184.0, "530": 28391901184.0, "535": 28391901184.0, "540": 28391901184.0, "545": 28391901184.0, "550": 28391901184.0, "555": 28391901184.0, "560": 28391901184.0, "565": 28391901184.0, "570": 28391901184.0, "575": 28391901184.0, "580": 28391901184.0, "585": 28391901184.0, "590": 28391901184.0, "595": 28391901184.0, "600": 28391901184.0, "605": 28391901184.0, "610": 28391901184.0, "615": 28391901184.0, "620": 28391901184.0, "625": 28391901184.0, "630": 28391901184.0, "635": 28391901184.0, "640": 28391901184.0, "645": 28391901184.0, "650": 28391901184.0, "655": 28391901184.0, "660": 28391901184.0, "665": 28391901184.0, "670": 28391901184.0, "675": 28391901184.0, "680": 28391901184.0, "685": 28391901184.0, "690": 28391901184.0, "695": 28391901184.0, "700": 28391901184.0, "705": 28391901184.0, "710": 28391901184.0, "715": 28391901184.0, "720": 28391901184.0, "725": 28391901184.0, "730": 28391901184.0, "735": 28391901184.0, "740": 28391901184.0, "745": 28391901184.0, "750": 28391901184.0, "755": 28391901184.0, "760": 28391901184.0, "765": 28391901184.0, "770": 28391901184.0, "775": 28391901184.0, "780": 28391901184.0, "785": 28391901184.0, "790": 28391901184.0, "795": 28391901184.0, "800": 28391901184.0, "805": 28391901184.0, "810": 28391901184.0, "815": 28391901184.0, "820": 28391901184.0, "825": 28391901184.0, "830": 28391901184.0, "835": 28391901184.0, "840": 28391901184.0, "845": 28391901184.0, "850": 28391901184.0, "855": 28391901184.0, "860": 28391901184.0, "865": 28391901184.0, "870": 28391901184.0, "875": 28391901184.0, "880": 28391901184.0, "885": 28391901184.0, "890": 28391901184.0, "895": 28391901184.0, "900": 28391901184.0, "905": 28391901184.0, "910": 28391901184.0, "915": 28391901184.0, "920": 28391901184.0, "925": 28391901184.0, "930": 28391901184.0, "935": 28391901184.0, "940": 28391901184.0, "945": 28391901184.0, "950": 28391901184.0, "955": 28391901184.0, "960": 28391901184.0, "965": 28391901184.0, "970": 28391901184.0, "975": 28391901184.0, "980": 28391901184.0, "985": 28391901184.0, "990": 28391901184.0, "995": 28391901184.0, "1000": 28391901184.0, "1005": 28391901184.0, "1010": 28391901184.0, "1015": 28391901184.0, "1020": 28391901184.0, "1025": 28391901184.0, "1030": 28391901184.0, "1035": 28391901184.0, "1040": 28391901184.0, "1045": 28391901184.0, "1050": 28391901184.0, "1055": 28391901184.0, "1060": 28391901184.0, "1065": 28391901184.0, "1070": 28391901184.0, "1075": 28391901184.0, "1080": 28391901184.0, "1085": 28391901184.0, "1090": 28391901184.0, "1095": 28391901184.0, "1100": 28391901184.0, "1105": 28391901184.0, "1110": 28391901184.0, "1115": 28391901184.0, "1120": 28391901184.0, "1125": 28391901184.0, "1130": 28391901184.0, "1135": 28391901184.0, "1140": 28391901184.0, "1145": 28391901184.0, "1150": 28391901184.0, "1155": 28391901184.0, "1160": 28391901184.0, "1165": 28391901184.0, "1170": 28391901184.0, "1175": 28391901184.0, "1180": 28391901184.0, "1185": 28391901184.0, "1190": 28391901184.0, "1195": 28391901184.0, "1200": 28391901184.0, "1205": 28391901184.0, "1210": 28391901184.0, "1215": 28391901184.0, "1220": 28391901184.0, "1225": 28391901184.0, "1230": 28391901184.0, "1235": 28391901184.0, "1240": 28391901184.0, "1245": 28391901184.0, "1250": 28391901184.0, "1255": 28391901184.0, "1260": 28391901184.0, "1265": 28391901184.0, "1270": 28391901184.0, "1275": 28391901184.0, "1280": 28391901184.0, "1285": 28391901184.0, "1290": 28391901184.0, "1295": 28391901184.0, "1300": 28391901184.0, "1305": 28391901184.0, "1310": 28391901184.0, "1315": 28391901184.0, "1320": 28391901184.0, "1325": 28391901184.0, "1330": 28391901184.0, "1335": 28391901184.0, "1340": 28391901184.0, "1345": 28391901184.0, "1350": 28391901184.0, "1355": 28391901184.0, "1360": 28391901184.0, "1365": 28391901184.0, "1370": 28391901184.0, "1375": 28391901184.0, "1380": 28391901184.0, "1385": 28391901184.0, "1390": 28391901184.0, "1395": 28391901184.0, "1400": 28391901184.0, "1405": 28391901184.0, "1410": 28391901184.0, "1415": 28391901184.0, "1420": 28391901184.0, "1425": 28391901184.0, "1430": 28391901184.0, "1435": 28391901184.0, "1440": 28391901184.0, "1445": 28391901184.0, "1450": 28391901184.0, "1455": 28391901184.0, "1460": 28391901184.0, "1465": 28391901184.0, "1470": 28391901184.0, "1475": 28391901184.0, "1480": 28391901184.0, "1485": 28391901184.0, "1490": 28391901184.0, "1495": 28391901184.0, "1500": 28391901184.0, "1505": 28391901184.0, "1510": 28391901184.0, "1515": 28391901184.0, "1520": 28391901184.0, "1525": 28391901184.0, "1530": 28391901184.0, "1535": 28391901184.0, "1540": 28391901184.0, "1545": 28391901184.0, "1550": 28391901184.0, "1555": 28391901184.0, "1560": 28391901184.0, "1565": 28391901184.0, "1570": 28391901184.0, "1575": 28391901184.0, "1580": 28391901184.0, "1585": 28391901184.0, "1590": 28391901184.0, "1595": 28391901184.0, "1600": 28391901184.0, "1605": 28391901184.0, "1610": 28391901184.0, "1615": 28391901184.0, "1620": 28391901184.0, "1625": 28391901184.0, "1630": 28391901184.0, "1635": 28391901184.0, "1640": 28391901184.0, "1645": 28391901184.0, "1650": 28391901184.0, "1655": 28391901184.0, "1660": 28391901184.0, "1665": 28391901184.0, "1670": 28391901184.0, "1675": 28391901184.0, "1680": 28391901184.0, "1685": 28391901184.0, "1690": 28391901184.0, "1695": 28391901184.0, "1700": 28391901184.0, "1705": 28391901184.0, "1710": 28391901184.0, "1715": 28391901184.0, "1720": 28391901184.0, "1725": 28391901184.0, "1730": 28391901184.0, "1735": 28391901184.0, "1740": 28391901184.0, "1745": 28391901184.0, "1750": 28391901184.0, "1755": 28391901184.0, "1760": 28391901184.0, "1765": 28391901184.0, "1770": 28391901184.0, "1775": 28391901184.0, "1780": 28391901184.0, "1785": 28391901184.0, "1790": 28391901184.0, "1795": 28391901184.0, "1800": 28391901184.0, "1805": 28391901184.0, "1810": 28391901184.0, "1815": 28391901184.0, "1820": 28391901184.0, "1825": 28391901184.0, "1830": 28391901184.0, "1835": 28391901184.0, "1840": 28391901184.0, "1845": 28391901184.0, "1850": 28391901184.0, "1855": 28391901184.0, "1860": 28391901184.0, "1865": 28391901184.0, "1870": 28391901184.0, "1875": 28391901184.0, "1880": 28391901184.0, "1885": 28391901184.0, "1890": 28391901184.0, "1895": 28391901184.0, "1900": 28391901184.0, "1905": 28391901184.0, "1910": 28391901184.0, "1915": 28391901184.0, "1920": 28391901184.0, "1925": 28391901184.0, "1930": 28391901184.0, "1935": 28391901184.0, "1940": 28391901184.0, "1945": 28391901184.0, "1950": 28391901184.0, "1955": 28391901184.0, "1960": 28391901184.0, "1965": 28391901184.0, "1970": 28391901184.0, "1975": 28391901184.0, "1980": 28391901184.0, "1985": 28391901184.0, "1990": 28391901184.0, "1995": 28391901184.0, "2000": 28391901184.0, "2005": 28391901184.0, "2010": 28391901184.0, "2015": 28391901184.0, "2020": 28391901184.0, "2025": 28391901184.0, "2030": 28391901184.0, "2035": 28391901184.0, "2040": 28391901184.0, "2045": 28391901184.0, "2050": 28391901184.0, "2055": 28391901184.0, "2060": 28391901184.0, "2065": 28391901184.0, "2070": 28391901184.0, "2075": 28391901184.0, "2080": 28391901184.0, "2085": 28391901184.0, "2090": 28391901184.0, "2095": 28391901184.0, "2100": 28391901184.0, "2105": 28391901184.0, "2110": 28391901184.0, "2115": 28391901184.0, "2120": 28391901184.0, "2125": 28391901184.0, "2130": 28391901184.0, "2135": 28391901184.0, "2140": 28391901184.0, "2145": 28391901184.0, "2150": 28391901184.0, "2155": 28391901184.0, "2160": 28391901184.0, "2165": 28391901184.0, "2170": 28391901184.0, "2175": 28391901184.0, "2180": 28391901184.0, "2185": 28391901184.0, "2190": 28391901184.0, "2195": 28391901184.0, "2200": 28391901184.0, "2205": 28391901184.0, "2210": 28391901184.0, "2215": 28391901184.0, "2220": 28391901184.0, "2225": 28391901184.0, "2230": 28391901184.0, "2235": 28391901184.0, "2240": 28391901184.0, "2245": 28391901184.0, "2250": 28391901184.0, "2255": 28391901184.0, "2260": 28391901184.0, "2265": 28391901184.0, "2270": 28391901184.0, "2275": 28391901184.0, "2280": 28391901184.0, "2285": 28391901184.0, "2290": 28391901184.0, "2295": 28391901184.0, "2300": 28391901184.0, "2305": 28391901184.0, "2310": 28391901184.0, "2315": 28391901184.0, "2320": 28391901184.0, "2325": 28391901184.0, "2330": 28391901184.0, "2335": 28391901184.0, "2340": 28391901184.0, "2345": 28391901184.0, "2350": 28391901184.0, "2355": 28391901184.0, "2360": 28391901184.0, "2365": 28391901184.0, "2370": 28391901184.0, "2375": 28391901184.0, "2380": 28391901184.0, "2385": 28391901184.0, "2390": 28391901184.0, "2395": 28391901184.0, "2400": 28391901184.0, "2405": 28391901184.0, "2410": 28391901184.0, "2415": 28391901184.0, "2420": 28391901184.0, "2425": 28391901184.0, "2430": 28391901184.0, "2435": 28391901184.0, "2440": 28391901184.0, "2445": 28391901184.0, "2450": 28391901184.0, "2455": 28391901184.0, "2460": 28391901184.0, "2465": 28391901184.0, "2470": 28391901184.0, "2475": 28391901184.0, "2480": 28391901184.0, "2485": 28391901184.0, "2490": 28391901184.0, "2495": 28391901184.0, "2500": 28391901184.0, "2505": 28391901184.0, "2510": 28391901184.0, "2515": 28391901184.0, "2520": 28391901184.0, "2525": 28391901184.0, "2530": 28391901184.0, "2535": 28391901184.0, "2540": 28391901184.0, "2545": 28391901184.0, "2550": 28391901184.0, "2555": 28391901184.0, "2560": 28391901184.0, "2565": 28391901184.0, "2570": 28391901184.0, "2575": 28391901184.0, "2580": 28391901184.0, "2585": 28391901184.0, "2590": 28391901184.0, "2595": 28391901184.0, "2600": 28391901184.0, "2605": 28391901184.0, "2610": 28391901184.0, "2615": 28391901184.0, "2620": 28391901184.0, "2625": 28391901184.0, "2630": 28391901184.0, "2635": 28391901184.0, "2640": 28391901184.0, "2645": 28391901184.0, "2650": 28391901184.0, "2655": 28391901184.0, "2660": 28391901184.0, "2665": 28391901184.0, "2670": 28391901184.0, "2675": 28391901184.0, "2680": 28391901184.0, "2685": 28391901184.0, "2690": 28391901184.0, "2695": 28391901184.0, "2700": 28391901184.0, "2705": 28391901184.0, "2710": 28391901184.0, "2715": 28391901184.0, "2720": 28391901184.0, "2725": 28391901184.0, "2730": 28391901184.0, "2735": 28391901184.0, "2740": 28391901184.0, "2745": 28391901184.0, "2750": 28391901184.0, "2755": 28391901184.0, "2760": 28391901184.0, "2765": 28391901184.0, "2770": 28391901184.0, "2775": 28391901184.0, "2780": 28391901184.0, "2785": 28391901184.0, "2790": 28391901184.0, "2795": 28391901184.0, "2800": 28391901184.0, "2805": 28391901184.0, "2810": 28391901184.0, "2815": 28391901184.0, "2820": 28391901184.0, "2825": 28391901184.0, "2830": 28391901184.0, "2835": 28391901184.0, "2840": 28391901184.0, "2845": 28391901184.0, "2850": 28391901184.0, "2855": 28391901184.0, "2860": 28391901184.0, "2865": 28391901184.0, "2870": 28391901184.0, "2875": 28391901184.0, "2880": 28391901184.0, "2885": 28391901184.0, "2890": 28391901184.0, "2895": 28391901184.0, "2900": 28391901184.0, "2905": 28391901184.0, "2910": 28391901184.0, "2915": 28391901184.0, "2920": 28391901184.0, "2925": 28391901184.0, "2930": 28391901184.0, "2935": 28391901184.0, "2940": 28391901184.0, "2945": 28391901184.0, "2950": 28391901184.0, "2955": 28391901184.0, "2960": 28391901184.0, "2965": 28391901184.0, "2970": 28391901184.0, "2975": 28391901184.0, "2980": 28391901184.0, "2985": 28391901184.0, "2990": 28391901184.0, "2995": 28391901184.0, "3000": 28391901184.0, "3005": 28391901184.0, "3010": 28391901184.0, "3015": 28391901184.0, "3020": 28391901184.0, "3025": 28391901184.0, "3030": 28391901184.0, "3035": 28391901184.0, "3040": 28391901184.0, "3045": 28391901184.0, "3050": 28391901184.0, "3055": 28391901184.0, "3060": 28391901184.0, "3065": 28391901184.0, "3070": 28391901184.0, "3075": 28391901184.0, "3080": 28391901184.0, "3085": 28391901184.0, "3090": 28391901184.0, "3095": 28391901184.0, "3100": 28391901184.0, "3105": 28391901184.0, "3110": 28391901184.0, "3115": 28391901184.0, "3120": 28391901184.0, "3125": 28391901184.0, "3130": 28391901184.0, "3135": 28391901184.0, "3140": 28391901184.0, "3145": 28391901184.0, "3150": 28391901184.0, "3155": 28391901184.0, "3160": 28391901184.0, "3165": 28391901184.0, "3170": 28391901184.0, "3175": 28391901184.0, "3180": 28391901184.0, "3185": 28391901184.0, "3190": 28391901184.0, "3195": 28391901184.0, "3200": 28391901184.0, "3205": 28391901184.0, "3210": 28391901184.0, "3215": 28391901184.0, "3220": 28391901184.0, "3225": 28391901184.0, "3230": 28391901184.0, "3235": 28391901184.0, "3240": 28391901184.0, "3245": 28391901184.0, "3250": 28391901184.0, "3255": 28391901184.0, "3260": 28391901184.0, "3265": 28391901184.0, "3270": 28391901184.0, "3275": 28391901184.0, "3280": 28391901184.0, "3285": 28391901184.0, "3290": 28391901184.0, "3295": 28391901184.0, "3300": 28391901184.0, "3305": 28391901184.0, "3310": 28391901184.0, "3315": 28391901184.0, "3320": 28391901184.0, "3325": 28391901184.0, "3330": 28391901184.0, "3335": 28391901184.0, "3340": 28391901184.0, "3345": 28391901184.0, "3350": 28391901184.0, "3355": 28391901184.0, "3360": 28391901184.0, "3365": 28391901184.0, "3370": 28391901184.0, "3375": 28391901184.0, "3380": 28391901184.0, "3385": 28391901184.0, "3390": 28391901184.0, "3395": 28391901184.0, "3400": 28391901184.0, "3405": 28391901184.0, "3410": 28391901184.0, "3415": 28391901184.0, "3420": 28391901184.0, "3425": 28391901184.0, "3430": 28391901184.0, "3435": 28391901184.0, "3440": 28391901184.0, "3445": 28391901184.0, "3450": 28391901184.0, "3455": 28391901184.0, "3460": 28391901184.0, "3465": 28391901184.0, "3470": 28391901184.0, "3475": 28391901184.0, "3480": 28391901184.0, "3485": 28391901184.0, "3490": 28391901184.0, "3495": 28391901184.0, "3500": 28391901184.0, "3505": 28391901184.0, "3510": 28391901184.0, "3515": 28391901184.0, "3520": 28391901184.0, "3525": 28391901184.0, "3530": 28391901184.0, "3535": 28391901184.0, "3540": 28391901184.0, "3545": 28391901184.0, "3550": 28391901184.0, "3555": 28391901184.0, "3560": 28391901184.0, "3565": 28391901184.0, "3570": 28391901184.0, "3575": 28391901184.0, "3580": 28391901184.0, "3585": 28391901184.0, "3590": 28391901184.0, "3595": 28391901184.0, "3600": 28391901184.0, "3605": 28391901184.0, "3610": 28391901184.0, "3615": 28391901184.0, "3620": 28391901184.0, "3625": 28391901184.0, "3630": 28391901184.0, "3635": 28391901184.0, "3640": 28391901184.0, "3645": 28391901184.0, "3650": 28391901184.0, "3655": 28391901184.0, "3660": 28391901184.0, "3665": 28391901184.0, "3670": 28391901184.0, "3675": 28391901184.0, "3680": 28391712768.0, "3685": 28391712768.0, "3690": 28391712768.0, "3695": 28391712768.0, "3700": 28391712768.0, "3705": 28391712768.0, "3710": 28391712768.0, "3715": 28391712768.0, "3720": 28391712768.0, "3725": 28391712768.0, "3730": 28391712768.0, "3735": 28391712768.0, "3740": 28391712768.0, "3745": 28391712768.0, "3750": 28391712768.0, "3755": 28391712768.0, "3760": 28391712768.0, "3765": 28391712768.0, "3770": 28391712768.0, "3775": 28391712768.0, "3780": 28391712768.0, "3785": 28391712768.0, "3790": 28391712768.0, "3795": 28391712768.0, "3800": 28391712768.0, "3805": 28391712768.0, "3810": 28391712768.0, "3815": 28391712768.0, "3820": 28391712768.0, "3825": 28391712768.0, "3830": 28391712768.0, "3835": 28391712768.0, "3840": 28391712768.0, "3845": 28391712768.0, "3850": 28391712768.0, "3855": 28391712768.0, "3860": 28391712768.0, "3865": 28391712768.0, "3870": 28391712768.0, "3875": 28391712768.0, "3880": 28391712768.0, "3885": 28391712768.0, "3890": 28391712768.0, "3895": 28391712768.0, "3900": 28391712768.0, "3905": 28391712768.0, "3910": 28391712768.0, "3915": 28391712768.0, "3920": 28391712768.0, "3925": 28391712768.0, "3930": 28391712768.0, "3935": 28391712768.0, "3940": 28391712768.0, "3945": 28391712768.0, "3950": 28391712768.0, "3955": 28391712768.0, "3960": 28391712768.0, "3965": 28391712768.0, "3970": 28391712768.0, "3975": 28391712768.0, "3980": 28391712768.0, "3985": 28391712768.0, "3990": 28391712768.0, "3995": 28391712768.0, "4000": 28391712768.0, "4005": 28391712768.0, "4010": 28391712768.0, "4015": 28391712768.0, "4020": 28391712768.0, "4025": 28391712768.0, "4030": 28391712768.0, "4035": 28391712768.0, "4040": 28391712768.0, "4045": 28391712768.0, "4050": 28391712768.0, "4055": 28391712768.0, "4060": 28391712768.0, "4065": 28391712768.0, "4070": 28391712768.0, "4075": 28391712768.0, "4080": 28391712768.0, "4085": 28391712768.0, "4090": 28391712768.0, "4095": 28391712768.0, "4100": 28391712768.0, "4105": 28391712768.0, "4110": 28391712768.0, "4115": 28391712768.0, "4120": 28391712768.0, "4125": 28391712768.0, "4130": 28391712768.0, "4135": 28391712768.0, "4140": 28391712768.0, "4145": 28391712768.0, "4150": 28391712768.0, "4155": 28391712768.0, "4160": 28391712768.0, "4165": 28391712768.0, "4170": 28391712768.0, "4175": 28391712768.0, "4180": 28391712768.0, "4185": 28391712768.0, "4190": 28391712768.0, "4195": 28391712768.0, "4200": 28391712768.0, "4205": 28391712768.0, "4210": 28391712768.0, "4215": 28391712768.0, "4220": 28391712768.0, "4225": 28391712768.0, "4230": 28391712768.0, "4235": 28391712768.0, "4240": 28391712768.0, "4245": 28391712768.0, "4250": 28391712768.0, "4255": 28391712768.0, "4260": 28391712768.0, "4265": 28391712768.0, "4270": 28391712768.0, "4275": 28391712768.0, "4280": 28391712768.0, "4285": 28391712768.0, "4290": 28391712768.0, "4295": 28391712768.0, "4300": 28391712768.0, "4305": 28391712768.0, "4310": 28391712768.0, "4315": 28391712768.0, "4320": 28391712768.0, "4325": 28391712768.0, "4330": 28391712768.0, "4335": 28391712768.0, "4340": 28391712768.0, "4345": 28391712768.0, "4350": 28391712768.0, "4355": 28391712768.0, "4360": 28391712768.0, "4365": 28391712768.0, "4370": 28391712768.0, "4375": 28391712768.0, "4380": 28391712768.0, "4385": 28391712768.0, "4390": 28391712768.0, "4395": 28391712768.0, "4400": 28391712768.0, "4405": 28391712768.0, "4410": 28391712768.0, "4415": 28391712768.0, "4420": 28391712768.0, "4425": 28391712768.0, "4430": 28391712768.0, "4435": 28391712768.0, "4440": 28391712768.0, "4445": 28391712768.0, "4450": 28391712768.0, "4455": 28391712768.0, "4460": 28391712768.0, "4465": 28391712768.0, "4470": 28391712768.0, "4475": 28391712768.0, "4480": 28391712768.0, "4485": 28391712768.0, "4490": 28391712768.0, "4495": 28391712768.0, "4500": 28391712768.0, "4505": 28391712768.0, "4510": 28391712768.0, "4515": 28391712768.0, "4520": 28391712768.0, "4525": 28391712768.0, "4530": 28391712768.0, "4535": 28391712768.0, "4540": 28391712768.0, "4545": 28391712768.0, "4550": 28391712768.0, "4555": 28391712768.0, "4560": 28391712768.0, "4565": 28391712768.0, "4570": 28391712768.0, "4575": 28391712768.0, "4580": 28391712768.0, "4585": 28391712768.0, "4590": 28391712768.0, "4595": 28391712768.0, "4600": 28391712768.0, "4605": 28391712768.0, "4610": 28391712768.0, "4615": 28391712768.0, "4620": 28391712768.0, "4625": 28391712768.0, "4630": 28391712768.0, "4635": 28391712768.0, "4640": 28391712768.0, "4645": 28391712768.0, "4650": 28391712768.0, "4655": 28391712768.0, "4660": 28391712768.0, "4665": 28391712768.0, "4670": 28391712768.0, "4675": 28391712768.0, "4680": 28391712768.0, "4685": 28391712768.0, "4690": 28391712768.0, "4695": 28391712768.0, "4700": 28391712768.0, "4705": 28391712768.0, "4710": 28391712768.0, "4715": 28391712768.0, "4720": 28391712768.0, "4725": 28391712768.0, "4730": 28391712768.0, "4735": 28391712768.0, "4740": 28391712768.0, "4745": 28391712768.0, "4750": 28391712768.0, "4755": 28391712768.0, "4760": 28391712768.0, "4765": 28391712768.0, "4770": 28391712768.0, "4775": 28391712768.0, "4780": 28391712768.0, "4785": 28391712768.0, "4790": 28391712768.0, "4795": 28391712768.0, "4800": 28391712768.0, "4805": 28391712768.0, "4810": 28391712768.0, "4815": 28391712768.0, "4820": 28391712768.0, "4825": 28391712768.0, "4830": 28391712768.0, "4835": 28391712768.0, "4840": 28391712768.0, "4845": 28391712768.0, "4850": 28391712768.0, "4855": 28391712768.0, "4860": 28391712768.0, "4865": 28391712768.0, "4870": 28391712768.0, "4875": 28391712768.0, "4880": 28391712768.0, "4885": 28391712768.0, "4890": 28391712768.0, "4895": 28391712768.0, "4900": 28391712768.0, "4905": 28391712768.0, "4910": 28391712768.0, "4915": 28391712768.0, "4920": 28391712768.0, "4925": 28391712768.0, "4930": 28391712768.0, "4935": 28391712768.0, "4940": 28391712768.0, "4945": 28391712768.0, "4950": 28391712768.0, "4955": 28391712768.0, "4960": 28391712768.0, "4965": 28391712768.0, "4970": 28391712768.0, "4975": 28391712768.0, "4980": 28391712768.0, "4985": 28391712768.0, "4990": 28391712768.0, "4995": 28391712768.0, "5000": 28391712768.0, "5005": 28391712768.0, "5010": 28391712768.0, "5015": 28391712768.0, "5020": 28391712768.0, "5025": 28391712768.0, "5030": 28391712768.0, "5035": 28391712768.0, "5040": 28391712768.0, "5045": 28391712768.0, "5050": 28391712768.0, "5055": 28391712768.0, "5060": 28391712768.0, "5065": 28391712768.0, "5070": 28391712768.0, "5075": 28391712768.0, "5080": 28391712768.0, "5085": 28391712768.0, "5090": 28391712768.0, "5095": 28391712768.0, "5100": 28391712768.0, "5105": 28391712768.0, "5110": 28391712768.0, "5115": 28391712768.0, "5120": 28391712768.0, "5125": 28391712768.0, "5130": 28391712768.0, "5135": 28391712768.0, "5140": 28391712768.0, "5145": 28391712768.0, "5150": 28391712768.0, "5155": 28391712768.0, "5160": 28391712768.0, "5165": 28391712768.0, "5170": 28391712768.0, "5175": 28391712768.0, "5180": 28391712768.0, "5185": 28391712768.0, "5190": 28391712768.0, "5195": 28391712768.0, "5200": 28391712768.0, "5205": 28391712768.0, "5210": 28391712768.0, "5215": 28391712768.0, "5220": 28391712768.0, "5225": 28391712768.0, "5230": 28391712768.0, "5235": 28391712768.0, "5240": 28391712768.0, "5245": 28391712768.0, "5250": 28391712768.0, "5255": 28391712768.0, "5260": 28391712768.0, "5265": 28391712768.0, "5270": 28391712768.0, "5275": 28391712768.0, "5280": 28391712768.0, "5285": 28391712768.0, "5290": 28391712768.0, "5295": 28391712768.0, "5300": 28391712768.0, "5305": 28391712768.0, "5310": 28391712768.0, "5315": 28391712768.0, "5320": 28391712768.0, "5325": 28391712768.0, "5330": 28391712768.0, "5335": 28391712768.0, "5340": 28391712768.0, "5345": 28391712768.0, "5350": 28391712768.0, "5355": 28391712768.0, "5360": 28391712768.0, "5365": 28391712768.0, "5370": 28391712768.0, "5375": 28391712768.0, "5380": 28391712768.0, "5385": 28391712768.0, "5390": 28391712768.0, "5395": 28391712768.0, "5400": 28391712768.0, "5405": 28391712768.0, "5410": 28391712768.0, "5415": 28391712768.0, "5420": 28391712768.0, "5425": 28391712768.0, "5430": 28391712768.0, "5435": 28391712768.0, "5440": 28391712768.0, "5445": 28391712768.0, "5450": 28391712768.0, "5455": 28391712768.0, "5460": 28391712768.0, "5465": 28391712768.0, "5470": 28391712768.0, "5475": 28391712768.0, "5480": 28391712768.0, "5485": 28391712768.0, "5490": 28391712768.0, "5495": 28391712768.0, "5500": 28391712768.0, "5505": 28391712768.0, "5510": 28391712768.0, "5515": 28391712768.0, "5520": 28391712768.0, "5525": 28391712768.0, "5530": 28391712768.0, "5535": 28391712768.0, "5540": 28391712768.0, "5545": 28391712768.0, "5550": 28391712768.0, "5555": 28391712768.0, "5560": 28391712768.0, "5565": 28391712768.0, "5570": 28391712768.0, "5575": 28391712768.0, "5580": 28391712768.0, "5585": 28391712768.0, "5590": 28391712768.0, "5595": 28391712768.0, "5600": 28391712768.0, "5605": 28391712768.0, "5610": 28391712768.0, "5615": 28391712768.0, "5620": 28391712768.0, "5625": 28391712768.0, "5630": 28391712768.0, "5635": 28391712768.0, "5640": 28391712768.0, "5645": 28391712768.0, "5650": 28391712768.0, "5655": 28391712768.0, "5660": 28391712768.0, "5665": 28391712768.0, "5670": 28391712768.0, "5675": 28391712768.0, "5680": 28391712768.0, "5685": 28391712768.0, "5690": 28391712768.0, "5695": 28391712768.0, "5700": 28391712768.0, "5705": 28391712768.0, "5710": 28391712768.0, "5715": 28391712768.0, "5720": 28391712768.0, "5725": 28391712768.0, "5730": 28391712768.0, "5735": 28391712768.0, "5740": 28391712768.0, "5745": 28391712768.0, "5750": 28391712768.0, "5755": 28391712768.0, "5760": 28391712768.0, "5765": 28391712768.0, "5770": 28391712768.0, "5775": 28391712768.0, "5780": 28391712768.0, "5785": 28391712768.0, "5790": 28391712768.0, "5795": 28391712768.0, "5800": 28391712768.0, "5805": 28391712768.0, "5810": 28391712768.0, "5815": 28391712768.0, "5820": 28391712768.0, "5825": 28391712768.0, "5830": 28391712768.0, "5835": 28391712768.0, "5840": 28391712768.0, "5845": 28391712768.0, "5850": 28391712768.0, "5855": 28391712768.0, "5860": 28391712768.0, "5865": 28391712768.0, "5870": 28391712768.0, "5875": 28391712768.0, "5880": 28391712768.0, "5885": 28391712768.0, "5890": 28391712768.0, "5895": 28391712768.0, "5900": 28391712768.0, "5905": 28391712768.0, "5910": 28391712768.0, "5915": 28391712768.0, "5920": 28391712768.0, "5925": 28391712768.0, "5930": 28391712768.0, "5935": 28391712768.0, "5940": 28391712768.0, "5945": 28391712768.0, "5950": 28391712768.0, "5955": 28391712768.0, "5960": 28391712768.0, "5965": 28391712768.0, "5970": 28391712768.0, "5975": 28391712768.0, "5980": 28391712768.0, "5985": 28391712768.0, "5990": 28391712768.0, "5995": 28391712768.0, "6000": 28391712768.0, "6005": 28391712768.0, "6010": 28391712768.0, "6015": 28391712768.0, "6020": 28391712768.0, "6025": 28391712768.0, "6030": 28391712768.0, "6035": 28391712768.0, "6040": 28391712768.0, "6045": 28391712768.0, "6050": 28391712768.0, "6055": 28391712768.0, "6060": 28391712768.0, "6065": 28391712768.0, "6070": 28391712768.0, "6075": 28391712768.0, "6080": 28391712768.0, "6085": 28391712768.0, "6090": 28391712768.0, "6095": 28391712768.0, "6100": 28391712768.0, "6105": 28391712768.0, "6110": 28391712768.0, "6115": 28391712768.0, "6120": 28391712768.0, "6125": 28391712768.0, "6130": 28391712768.0, "6135": 28391712768.0, "6140": 28391712768.0, "6145": 28391712768.0, "6150": 28391712768.0, "6155": 28391712768.0, "6160": 28391712768.0, "6165": 28391712768.0, "6170": 28391712768.0, "6175": 28391712768.0, "6180": 28391712768.0, "6185": 28391712768.0, "6190": 28391712768.0, "6195": 28391712768.0, "6200": 28391712768.0, "6205": 28391712768.0, "6210": 28391712768.0, "6215": 28391712768.0, "6220": 28391712768.0, "6225": 28391712768.0, "6230": 28391712768.0, "6235": 28391712768.0, "6240": 28391712768.0, "6245": 28391712768.0, "6250": 28391712768.0, "6255": 28391712768.0, "6260": 28391712768.0, "6265": 28391712768.0, "6270": 28391712768.0, "6275": 28391712768.0, "6280": 28391712768.0, "6285": 28391712768.0, "6290": 28391712768.0, "6295": 28391712768.0, "6300": 28391712768.0, "6305": 28391712768.0, "6310": 28391712768.0, "6315": 28391712768.0, "6320": 28391712768.0, "6325": 28391712768.0, "6330": 28391712768.0, "6335": 28391712768.0, "6340": 28391712768.0, "6345": 28391712768.0, "6350": 28391712768.0, "6355": 28391712768.0, "6360": 28391712768.0, "6365": 28391712768.0, "6370": 28391712768.0, "6375": 28391712768.0, "6380": 28391712768.0, "6385": 28391712768.0, "6390": 28391712768.0, "6395": 28391712768.0, "6400": 28391712768.0, "6405": 28391712768.0, "6410": 28391712768.0, "6415": 28391712768.0, "6420": 28391712768.0, "6425": 28391712768.0, "6430": 28391712768.0, "6435": 28391712768.0, "6440": 28391712768.0, "6445": 28391712768.0, "6450": 28391712768.0, "6455": 28391712768.0, "6460": 28391712768.0, "6465": 28391712768.0, "6470": 28391712768.0, "6475": 28391712768.0, "6480": 28391712768.0, "6485": 28391712768.0, "6490": 28391712768.0, "6495": 28391712768.0, "6500": 28391712768.0, "6505": 28391712768.0, "6510": 28391712768.0, "6515": 28391712768.0, "6520": 28391712768.0, "6525": 28391712768.0, "6530": 28391712768.0, "6535": 28391712768.0, "6540": 28391712768.0, "6545": 28391712768.0, "6550": 28391712768.0, "6555": 28391712768.0, "6560": 28391712768.0, "6565": 28391712768.0, "6570": 28391712768.0, "6575": 28391712768.0, "6580": 28391712768.0, "6585": 28391712768.0, "6590": 28391712768.0, "6595": 28391712768.0, "6600": 28391712768.0, "6605": 28391712768.0, "6610": 28391712768.0, "6615": 28391712768.0, "6620": 28391712768.0, "6625": 28391712768.0, "6630": 28391712768.0, "6635": 28391712768.0, "6640": 28391712768.0, "6645": 28391712768.0, "6650": 28391712768.0, "6655": 28391712768.0, "6660": 28391712768.0, "6665": 28391712768.0, "6670": 28391712768.0, "6675": 28391712768.0, "6680": 28391712768.0, "6685": 28391712768.0, "6690": 28391712768.0, "6695": 28391712768.0, "6700": 28391712768.0, "6705": 28391712768.0, "6710": 28391712768.0, "6715": 28391712768.0, "6720": 28391712768.0, "6725": 28391712768.0, "6730": 28391712768.0, "6735": 28391712768.0, "6740": 28391712768.0, "6745": 28391712768.0, "6750": 28391712768.0, "6755": 28391712768.0, "6760": 28391712768.0, "6765": 28391712768.0, "6770": 28391712768.0, "6775": 28391712768.0, "6780": 28391712768.0, "6785": 28391712768.0, "6790": 28391712768.0, "6795": 28391712768.0, "6800": 28391712768.0, "6805": 28391712768.0, "6810": 28391712768.0, "6815": 28391712768.0, "6820": 28391712768.0, "6825": 28391712768.0, "6830": 28391712768.0, "6835": 28391712768.0, "6840": 28391712768.0, "6845": 28391712768.0, "6850": 28391712768.0, "6855": 28391712768.0, "6860": 28391712768.0, "6865": 28391712768.0, "6870": 28391712768.0, "6875": 28391712768.0, "6880": 28391712768.0, "6885": 28391712768.0, "6890": 28391712768.0, "6895": 28391712768.0, "6900": 28391712768.0, "6905": 28391712768.0, "6910": 28391712768.0, "6915": 28391712768.0, "6920": 28391712768.0, "6925": 28391712768.0, "6930": 28391712768.0, "6935": 28391712768.0, "6940": 28391712768.0, "6945": 28391712768.0, "6950": 28391712768.0, "6955": 28391712768.0, "6960": 28391712768.0, "6965": 28391712768.0, "6970": 28391712768.0, "6975": 28391712768.0, "6980": 28391712768.0, "6985": 28391712768.0, "6990": 28391712768.0, "6995": 28391712768.0, "7000": 28391712768.0, "7005": 28391712768.0, "7010": 28391712768.0, "7015": 28391712768.0, "7020": 28391712768.0, "7025": 28391712768.0, "7030": 28391712768.0, "7035": 28391712768.0, "7040": 28391712768.0, "7045": 28391712768.0, "7050": 28391712768.0, "7055": 28391712768.0, "7060": 28391712768.0, "7065": 28391712768.0, "7070": 28391712768.0, "7075": 28391712768.0, "7080": 28391712768.0, "7085": 28391712768.0, "7090": 28391712768.0, "7095": 28391712768.0, "7100": 28391712768.0, "7105": 28391712768.0, "7110": 28391712768.0, "7115": 28391712768.0, "7120": 28391712768.0, "7125": 28391712768.0, "7130": 28391712768.0, "7135": 28391712768.0, "7140": 28391712768.0, "7145": 28391712768.0, "7150": 28391712768.0, "7155": 28391712768.0, "7160": 28391712768.0, "7165": 28391712768.0, "7170": 28391712768.0, "7175": 28391712768.0, "7180": 28391712768.0, "7185": 28391712768.0, "7190": 28391712768.0, "7195": 28391712768.0, "7200": 28391712768.0, "7205": 28391712768.0, "7210": 28391712768.0, "7215": 28391712768.0, "7220": 28391712768.0, "7225": 28391712768.0, "7230": 28391712768.0, "7235": 28391712768.0, "7240": 28391712768.0, "7245": 28391712768.0, "7250": 28391712768.0, "7255": 28391712768.0, "7260": 28391712768.0, "7265": 28391712768.0, "7270": 28391712768.0, "7275": 28391712768.0, "7280": 28391712768.0, "7285": 28391712768.0, "7290": 28391712768.0, "7295": 28391712768.0, "7300": 28391712768.0, "7305": 28391712768.0, "7310": 28391712768.0, "7315": 28391712768.0, "7320": 28391712768.0, "7325": 28391712768.0, "7330": 28391712768.0, "7335": 28391712768.0, "7340": 28391712768.0, "7345": 28391712768.0, "7350": 28391712768.0, "7355": 28391712768.0, "7360": 28391712768.0, "7365": 28391712768.0, "7370": 28391712768.0, "7375": 28391712768.0, "7380": 28391712768.0, "7385": 28391712768.0, "7390": 28391712768.0, "7395": 28391712768.0, "7400": 28391712768.0, "7405": 28391712768.0, "7410": 28391712768.0, "7415": 28391712768.0, "7420": 28391712768.0, "7425": 28391712768.0, "7430": 28391712768.0, "7435": 28391712768.0, "7440": 28391712768.0, "7445": 28391712768.0, "7450": 28391712768.0, "7455": 28391712768.0, "7460": 28391712768.0, "7465": 28391712768.0, "7470": 28391712768.0, "7475": 28391712768.0, "7480": 28391712768.0, "7485": 28391712768.0, "7490": 28391712768.0, "7495": 28391712768.0, "7500": 28391712768.0, "7505": 28391712768.0, "7510": 28391712768.0, "7515": 28391712768.0, "7520": 28391712768.0, "7525": 28391712768.0, "7530": 28391712768.0, "7535": 28391712768.0, "7540": 28391712768.0, "7545": 28391712768.0, "7550": 28391712768.0, "7555": 28391712768.0, "7560": 28391712768.0, "7565": 28391712768.0, "7570": 28391712768.0, "7575": 28391712768.0, "7580": 28391712768.0, "7585": 28391712768.0, "7590": 28391712768.0, "7595": 28391712768.0, "7600": 28391712768.0, "7605": 28391712768.0, "7610": 28391712768.0, "7615": 28391712768.0, "7620": 28391712768.0, "7625": 28391712768.0, "7630": 28391712768.0, "7635": 28391712768.0, "7640": 28391712768.0, "7645": 28391712768.0, "7650": 28391712768.0, "7655": 28391712768.0, "7660": 28391712768.0, "7665": 28391712768.0, "7670": 28391712768.0, "7675": 28391712768.0, "7680": 28391712768.0, "7685": 28391712768.0, "7690": 28391712768.0, "7695": 28391712768.0, "7700": 28391712768.0, "7705": 28391712768.0, "7710": 28391712768.0, "7715": 28391712768.0, "7720": 28391712768.0, "7725": 28391712768.0, "7730": 28391712768.0, "7735": 28391712768.0, "7740": 28391712768.0, "7745": 28391712768.0, "7750": 28391712768.0, "7755": 28391712768.0, "7760": 28391712768.0, "7765": 28391712768.0, "7770": 28391712768.0, "7775": 28391712768.0, "7780": 28391712768.0, "7785": 28391712768.0, "7790": 28391712768.0, "7795": 28391712768.0, "7800": 28391712768.0, "7805": 28391712768.0, "7810": 28391712768.0, "7815": 28391712768.0, "7820": 28391712768.0, "7825": 28391712768.0, "7830": 28391712768.0, "7835": 28391712768.0, "7840": 28391712768.0, "7845": 28391712768.0, "7850": 28391712768.0, "7855": 28391712768.0, "7860": 28391712768.0, "7865": 28391712768.0, "7870": 28391712768.0, "7875": 28391712768.0, "7880": 28391712768.0, "7885": 28391712768.0, "7890": 28391712768.0, "7895": 28391712768.0, "7900": 28391712768.0, "7905": 28391712768.0, "7910": 28391712768.0, "7915": 28391712768.0, "7920": 28391712768.0, "7925": 28391712768.0, "7930": 28391712768.0, "7935": 28391712768.0, "7940": 28391712768.0, "7945": 28391712768.0, "7950": 28391712768.0, "7955": 28391712768.0, "7960": 28391712768.0, "7965": 28391712768.0, "7970": 28391712768.0, "7975": 28391712768.0, "7980": 28391712768.0, "7985": 28391712768.0, "7990": 28391712768.0, "7995": 28391712768.0, "8000": 28391712768.0, "8005": 28391712768.0, "8010": 28391712768.0, "8015": 28391712768.0, "8020": 28391712768.0, "8025": 28391712768.0, "8030": 28391712768.0, "8035": 28391712768.0, "8040": 28391712768.0, "8045": 28391712768.0, "8050": 28391712768.0, "8055": 28391712768.0, "8060": 28391712768.0, "8065": 28391712768.0, "8070": 28391712768.0, "8075": 28391712768.0, "8080": 28391712768.0, "8085": 28391712768.0, "8090": 28391712768.0, "8095": 28391712768.0, "8100": 28391712768.0, "8105": 28391712768.0, "8110": 28391712768.0, "8115": 28391712768.0, "8120": 28391712768.0, "8125": 28391712768.0, "8130": 28391712768.0, "8135": 28391712768.0, "8140": 28391712768.0, "8145": 28391712768.0, "8150": 28391712768.0, "8155": 28391712768.0, "8160": 28391712768.0, "8165": 28391712768.0, "8170": 28391712768.0, "8175": 28391712768.0, "8180": 28391712768.0, "8185": 28391712768.0, "8190": 28391712768.0, "8195": 28391712768.0, "8200": 28391712768.0, "8205": 28391712768.0, "8210": 28391712768.0, "8215": 28391712768.0, "8220": 28391712768.0, "8225": 28391712768.0, "8230": 28391712768.0, "8235": 28391712768.0, "8240": 28391712768.0, "8245": 28391712768.0, "8250": 28391712768.0, "8255": 28391712768.0, "8260": 28391712768.0, "8265": 28391712768.0, "8270": 28391712768.0, "8275": 28391712768.0, "8280": 28391712768.0, "8285": 28391712768.0, "8290": 28391712768.0, "8295": 28391712768.0, "8300": 28391712768.0, "8305": 28391712768.0, "8310": 28391712768.0, "8315": 28391712768.0, "8320": 28391712768.0, "8325": 28391712768.0, "8330": 28391712768.0, "8335": 28391712768.0, "8340": 28391712768.0, "8345": 28391712768.0, "8350": 28391712768.0, "8355": 28391712768.0, "8360": 28391712768.0, "8365": 28391712768.0, "8370": 28391712768.0, "8375": 28391712768.0, "8380": 28391712768.0, "8385": 28391712768.0, "8390": 28391712768.0, "8395": 28391712768.0, "8400": 28391712768.0, "8405": 28391712768.0, "8410": 28391712768.0, "8415": 28391712768.0, "8420": 28391712768.0, "8425": 28391712768.0, "8430": 28391712768.0, "8435": 28391712768.0, "8440": 28391712768.0, "8445": 28391712768.0, "8450": 28391712768.0, "8455": 28391712768.0, "8460": 28391712768.0, "8465": 28391712768.0, "8470": 28391712768.0, "8475": 28391712768.0, "8480": 28391712768.0, "8485": 28391712768.0, "8490": 28391712768.0, "8495": 28391712768.0, "8500": 28391712768.0, "8505": 28391712768.0, "8510": 28391712768.0, "8515": 28391712768.0, "8520": 28391712768.0, "8525": 28391712768.0, "8530": 28391712768.0, "8535": 28391712768.0, "8540": 28391712768.0, "8545": 28391712768.0, "8550": 28391712768.0, "8555": 28391712768.0, "8560": 28391712768.0, "8565": 28391712768.0, "8570": 28391712768.0, "8575": 28391712768.0, "8580": 28391712768.0, "8585": 28391712768.0, "8590": 28391712768.0, "8595": 28391712768.0, "8600": 28391712768.0, "8605": 28391712768.0, "8610": 28391712768.0, "8615": 28391712768.0, "8620": 28391712768.0, "8625": 28391712768.0, "8630": 28391712768.0, "8635": 28391712768.0, "8640": 28391712768.0, "8645": 28391712768.0, "8650": 28391712768.0, "8655": 28391712768.0, "8660": 28391712768.0, "8665": 28391712768.0, "8670": 28391712768.0, "8675": 28391712768.0, "8680": 28391712768.0, "8685": 28391712768.0, "8690": 28391712768.0, "8695": 28391712768.0, "8700": 28391712768.0, "8705": 28391712768.0, "8710": 28391712768.0, "8715": 28391712768.0, "8720": 28391712768.0, "8725": 28391712768.0, "8730": 28391712768.0, "8735": 28391712768.0, "8740": 28391712768.0, "8745": 28391712768.0, "8750": 28391712768.0, "8755": 28391712768.0, "8760": 28391712768.0, "8765": 28391712768.0, "8770": 28391712768.0, "8775": 28391712768.0, "8780": 28391712768.0, "8785": 28391712768.0, "8790": 28391712768.0, "8795": 28391712768.0, "8800": 28391712768.0, "8805": 28391712768.0, "8810": 28391712768.0, "8815": 28391712768.0, "8820": 28391712768.0, "8825": 28391712768.0, "8830": 28391712768.0, "8835": 28391712768.0, "8840": 28391712768.0, "8845": 28391712768.0, "8850": 28391712768.0, "8855": 28391712768.0, "8860": 28391712768.0, "8865": 28391712768.0, "8870": 28391712768.0, "8875": 28391712768.0, "8880": 28391712768.0, "8885": 28391712768.0, "8890": 28391712768.0, "8895": 28391712768.0, "8900": 28391712768.0, "8905": 28391712768.0, "8910": 28391712768.0, "8915": 28391712768.0, "8920": 28391712768.0, "8925": 28391712768.0, "8930": 28391712768.0, "8935": 28391712768.0, "8940": 28391712768.0, "8945": 28391712768.0, "8950": 28391712768.0, "8955": 28391712768.0, "8960": 28391712768.0, "8965": 28391712768.0, "8970": 28391712768.0, "8975": 28391712768.0, "8980": 28391712768.0, "8985": 28391712768.0, "8990": 28391712768.0, "8995": 28391712768.0, "9000": 28391712768.0, "9005": 28391712768.0, "9010": 28391712768.0, "9015": 28391712768.0, "9020": 28391712768.0, "9025": 28391712768.0, "9030": 28391712768.0, "9035": 28391712768.0, "9040": 28391712768.0, "9045": 28391712768.0, "9050": 28391712768.0, "9055": 28391712768.0, "9060": 28391712768.0, "9065": 28391712768.0, "9070": 28391712768.0, "9075": 28391712768.0, "9080": 28391712768.0, "9085": 28391712768.0, "9090": 28391712768.0, "9095": 28391712768.0, "9100": 28391712768.0, "9105": 28391712768.0, "9110": 28391712768.0, "9115": 28391712768.0, "9120": 28391712768.0, "9125": 28391712768.0, "9130": 28391712768.0, "9135": 28391712768.0, "9140": 28391712768.0, "9145": 28391712768.0, "9150": 28391712768.0, "9155": 28391712768.0, "9160": 28391712768.0, "9165": 28391712768.0, "9170": 28391712768.0, "9175": 28391712768.0, "9180": 28391712768.0, "9185": 28391712768.0, "9190": 28391712768.0, "9195": 28391712768.0, "9200": 28391712768.0, "9205": 28391712768.0, "9210": 28391712768.0, "9215": 28391712768.0, "9220": 28391712768.0, "9225": 28391712768.0, "9230": 28391712768.0, "9235": 28391712768.0, "9240": 28391712768.0, "9245": 28391712768.0, "9250": 28391712768.0, "9255": 28391712768.0, "9260": 28391712768.0, "9265": 28391712768.0, "9270": 28391712768.0, "9275": 28391712768.0, "9280": 28391712768.0, "9285": 28391712768.0, "9290": 28391712768.0, "9295": 28391712768.0, "9300": 28391712768.0, "9305": 28391712768.0, "9310": 28391712768.0, "9315": 28391712768.0, "9320": 28391712768.0, "9325": 28391712768.0, "9330": 28391712768.0, "9335": 28391712768.0, "9340": 28391712768.0, "9345": 28391712768.0, "9350": 28391712768.0, "9355": 28391712768.0, "9360": 28391712768.0, "9365": 28391712768.0, "9370": 28391712768.0, "9375": 28391712768.0, "9380": 28391712768.0, "9385": 28391712768.0, "9390": 28391712768.0, "9395": 28391712768.0, "9400": 28391712768.0, "9405": 28391712768.0, "9410": 28391712768.0, "9415": 28391712768.0, "9420": 28391712768.0, "9425": 28391712768.0, "9430": 28391712768.0, "9435": 28391712768.0, "9440": 28391712768.0, "9445": 28391712768.0, "9450": 28391712768.0, "9455": 28391712768.0, "9460": 28391712768.0, "9465": 28391712768.0, "9470": 28391712768.0, "9475": 28391712768.0, "9480": 28391712768.0, "9485": 28391712768.0, "9490": 28391712768.0, "9495": 28391712768.0, "9500": 28391712768.0, "9505": 28391712768.0, "9510": 28391712768.0, "9515": 28391712768.0, "9520": 28391712768.0, "9525": 28391712768.0, "9530": 28391712768.0, "9535": 28391712768.0, "9540": 28391712768.0, "9545": 28391712768.0, "9550": 28391712768.0, "9555": 28391712768.0, "9560": 28391712768.0, "9565": 28391712768.0, "9570": 28391712768.0, "9575": 28391712768.0, "9580": 28391712768.0, "9585": 28391712768.0, "9590": 28391712768.0, "9595": 28391712768.0, "9600": 28391712768.0, "9605": 28391712768.0, "9610": 28391712768.0, "9615": 28391712768.0, "9620": 28391712768.0, "9625": 28391712768.0, "9630": 28391712768.0, "9635": 28391712768.0, "9640": 28391712768.0, "9645": 28391712768.0, "9650": 28391712768.0, "9655": 28391712768.0, "9660": 28391712768.0, "9665": 28391712768.0, "9670": 28391712768.0, "9675": 28391712768.0, "9680": 28391712768.0, "9685": 28391712768.0, "9690": 28391712768.0, "9695": 28391712768.0, "9700": 28391712768.0, "9705": 28391712768.0, "9710": 28391712768.0, "9715": 28391712768.0, "9720": 28391712768.0, "9725": 28391712768.0, "9730": 28391712768.0, "9735": 28391712768.0, "9740": 28391712768.0, "9745": 28391712768.0, "9750": 28391712768.0, "9755": 28391712768.0, "9760": 28391712768.0, "9765": 28391712768.0, "9770": 28391712768.0, "9775": 28391712768.0, "9780": 28391712768.0, "9785": 28391712768.0, "9790": 28391712768.0, "9795": 28391712768.0, "9800": 28391712768.0, "9805": 28391712768.0, "9810": 28391712768.0, "9815": 28391712768.0, "9820": 28391712768.0, "9825": 28391712768.0, "9830": 28391712768.0, "9835": 28391712768.0, "9840": 28391712768.0, "9845": 28391712768.0, "9850": 28391712768.0, "9855": 28391712768.0, "9860": 28391712768.0, "9865": 28391712768.0, "9870": 28391712768.0, "9875": 28391712768.0, "9880": 28391712768.0, "9885": 28391712768.0, "9890": 28391712768.0, "9895": 28391712768.0, "9900": 28391712768.0, "9905": 28391712768.0, "9910": 28391712768.0, "9915": 28391712768.0, "9920": 28391712768.0, "9925": 28391712768.0, "9930": 28391712768.0, "9935": 28391712768.0, "9940": 28391712768.0, "9945": 28391712768.0, "9950": 28391712768.0, "9955": 28391712768.0, "9960": 28391712768.0, "9965": 28391712768.0, "9970": 28391712768.0, "9975": 28391712768.0, "9980": 28391712768.0, "9985": 28391712768.0, "9990": 28391712768.0, "9995": 28391712768.0, "10000": 28391712768.0, "10005": 28391712768.0, "10010": 28391712768.0, "10015": 28391712768.0, "10020": 28391712768.0, "10025": 28391712768.0, "10030": 28391712768.0, "10035": 28391712768.0, "10040": 28391712768.0, "10045": 28391712768.0, "10050": 28391712768.0, "10055": 28391712768.0, "10060": 28391712768.0, "10065": 28391712768.0, "10070": 28391712768.0, "10075": 28391712768.0, "10080": 28391712768.0, "10085": 28391712768.0, "10090": 28391712768.0, "10095": 28391712768.0, "10100": 28391712768.0, "10105": 28391712768.0, "10110": 28391712768.0, "10115": 28391712768.0, "10120": 28391712768.0, "10125": 28391712768.0, "10130": 28391712768.0, "10135": 28391712768.0, "10140": 28391712768.0, "10145": 28391712768.0, "10150": 28391712768.0, "10155": 28391712768.0, "10160": 28391712768.0, "10165": 28391712768.0, "10170": 28391712768.0, "10175": 28391712768.0, "10180": 28391712768.0, "10185": 28391712768.0, "10190": 28391712768.0, "10195": 28391712768.0, "10200": 28391712768.0, "10205": 28391712768.0, "10210": 28391712768.0, "10215": 28391712768.0, "10220": 28391712768.0, "10225": 28391712768.0, "10230": 28391712768.0, "10235": 28391712768.0, "10240": 28391712768.0, "10245": 28391712768.0, "10250": 28391712768.0, "10255": 28391712768.0, "10260": 28391712768.0, "10265": 28391712768.0, "10270": 28391712768.0, "10275": 28391712768.0, "10280": 28391712768.0, "10285": 28391712768.0, "10290": 28391712768.0, "10295": 28391712768.0, "10300": 28391712768.0, "10305": 28391712768.0, "10310": 28391712768.0, "10315": 28391712768.0, "10320": 28391712768.0, "10325": 28391712768.0, "10330": 28391712768.0, "10335": 28391712768.0, "10340": 28391712768.0, "10345": 28391712768.0, "10350": 28391712768.0, "10355": 28391712768.0, "10360": 28391712768.0, "10365": 28391712768.0, "10370": 28391712768.0, "10375": 28391712768.0, "10380": 28391712768.0, "10385": 28391712768.0, "10390": 28391712768.0, "10395": 28391712768.0, "10400": 28391712768.0, "10405": 28391712768.0, "10410": 28391712768.0, "10415": 28391712768.0, "10420": 28391712768.0, "10425": 28391712768.0, "10430": 28391712768.0, "10435": 28391712768.0, "10440": 28391712768.0, "10445": 28391712768.0, "10450": 28391712768.0, "10455": 28391712768.0, "10460": 28391712768.0, "10465": 28391712768.0, "10470": 28391712768.0, "10475": 28391712768.0, "10480": 28391712768.0, "10485": 28391712768.0, "10490": 28391712768.0, "10495": 28391712768.0, "10500": 28391712768.0, "10505": 28391712768.0, "10510": 28391712768.0, "10515": 28391712768.0, "10520": 28391712768.0, "10525": 28391712768.0, "10530": 28391712768.0, "10535": 28391712768.0, "10540": 28391712768.0, "10545": 28391712768.0, "10550": 28391712768.0, "10555": 28391712768.0, "10560": 28391712768.0, "10565": 28391712768.0, "10570": 28391712768.0, "10575": 28391712768.0, "10580": 28391712768.0, "10585": 28391712768.0, "10590": 28391712768.0, "10595": 28391712768.0, "10600": 28391712768.0, "10605": 28391712768.0, "10610": 28391712768.0, "10615": 28391712768.0, "10620": 28391712768.0, "10625": 28391712768.0, "10630": 28391712768.0, "10635": 28391712768.0, "10640": 28391712768.0, "10645": 28391712768.0, "10650": 28391712768.0, "10655": 28391712768.0, "10660": 28391712768.0, "10665": 28391712768.0, "10670": 28391712768.0, "10675": 28391712768.0, "10680": 28391712768.0, "10685": 28391712768.0, "10690": 28391712768.0, "10695": 28391712768.0, "10700": 28391712768.0, "10705": 28391712768.0, "10710": 28391712768.0, "10715": 28391712768.0, "10720": 28391712768.0, "10725": 28391712768.0, "10730": 28391712768.0, "10735": 28391712768.0, "10740": 28391712768.0, "10745": 28391712768.0, "10750": 28391712768.0, "10755": 28391712768.0, "10760": 28391712768.0, "10765": 28391712768.0, "10770": 28391712768.0, "10775": 28391712768.0, "10780": 28391712768.0, "10785": 28391712768.0, "10790": 28391712768.0, "10795": 28391712768.0, "10800": 28391712768.0, "10805": 28391712768.0, "10810": 28391712768.0, "10815": 28391712768.0, "10820": 28391712768.0, "10825": 28391712768.0, "10830": 28391712768.0, "10835": 28391712768.0, "10840": 28391712768.0, "10845": 28391712768.0, "10850": 28391712768.0, "10855": 28391712768.0, "10860": 28391712768.0, "10865": 28391712768.0, "10870": 28391712768.0, "10875": 28391712768.0, "10880": 28391712768.0, "10885": 28391712768.0, "10890": 28391712768.0, "10895": 28391712768.0, "10900": 28391712768.0, "10905": 28391712768.0, "10910": 28391712768.0, "10915": 28391712768.0, "10920": 28391712768.0, "10925": 28391712768.0, "10930": 28391712768.0, "10935": 28391712768.0, "10940": 28391712768.0, "10945": 28391712768.0, "10950": 28391712768.0, "10955": 28391712768.0, "10960": 28391712768.0, "10965": 28391712768.0, "10970": 28391712768.0, "10975": 28391712768.0, "10980": 28391712768.0, "10985": 28391712768.0, "10990": 28391712768.0, "10995": 28391712768.0, "11000": 28391712768.0, "11005": 28391712768.0, "11010": 28391712768.0, "11015": 28391712768.0, "11020": 28391712768.0, "11025": 28391712768.0, "11030": 28391712768.0, "11035": 28391712768.0, "11040": 28391712768.0, "11045": 28391712768.0, "11050": 28391712768.0, "11055": 28391712768.0, "11060": 28391712768.0, "11065": 28391712768.0, "11070": 28391712768.0, "11075": 28391712768.0, "11080": 28391712768.0, "11085": 28391712768.0, "11090": 28391712768.0, "11095": 28391712768.0, "11100": 28391712768.0, "11105": 28391712768.0, "11110": 28391712768.0, "11115": 28391712768.0, "11120": 28391712768.0, "11125": 28391712768.0, "11130": 28391712768.0, "11135": 28391712768.0, "11140": 28391712768.0, "11145": 28391712768.0, "11150": 28391712768.0, "11155": 28391712768.0, "11160": 28391712768.0, "11165": 28391712768.0, "11170": 28391712768.0, "11175": 28391712768.0, "11180": 28391712768.0, "11185": 28391712768.0, "11190": 28391712768.0, "11195": 28391712768.0, "11200": 28391712768.0, "11205": 28391712768.0, "11210": 28391712768.0, "11215": 28391712768.0, "11220": 28391712768.0, "11225": 28391712768.0, "11230": 28391712768.0, "11235": 28391712768.0, "11240": 28391712768.0, "11245": 28391712768.0, "11250": 28391712768.0, "11255": 28391712768.0, "11260": 28391712768.0, "11265": 28391712768.0, "11270": 28391712768.0, "11275": 28391712768.0, "11280": 28391712768.0, "11285": 28391712768.0, "11290": 28391712768.0, "11295": 28391712768.0, "11300": 28391712768.0, "11305": 28391712768.0, "11310": 28391712768.0, "11315": 28391712768.0, "11320": 28391712768.0, "11325": 28391712768.0, "11330": 28391712768.0, "11335": 28391712768.0, "11340": 28391712768.0, "11345": 28391712768.0, "11350": 28391712768.0, "11355": 28391712768.0, "11360": 28391712768.0, "11365": 28391712768.0, "11370": 28391712768.0, "11375": 28391712768.0, "11380": 28391712768.0, "11385": 28391712768.0, "11390": 28391712768.0, "11395": 28391712768.0, "11400": 28391712768.0, "11405": 28391712768.0, "11410": 28391712768.0, "11415": 28391712768.0, "11420": 28391712768.0, "11425": 28391712768.0, "11430": 28391712768.0, "11435": 28391712768.0, "11440": 28391712768.0, "11445": 28391712768.0, "11450": 28391712768.0, "11455": 28391712768.0, "11460": 28391712768.0, "11465": 28391712768.0, "11470": 28391712768.0, "11475": 28391712768.0, "11480": 28391712768.0, "11485": 28391712768.0, "11490": 28391712768.0, "11495": 28391712768.0, "11500": 28391712768.0, "11505": 28391712768.0, "11510": 28391712768.0, "11515": 28391712768.0, "11520": 28391712768.0, "11525": 28391712768.0, "11530": 28391712768.0, "11535": 28391712768.0, "11540": 28391712768.0, "11545": 28391712768.0, "11550": 28391712768.0, "11555": 28391712768.0, "11560": 28391712768.0, "11565": 28391712768.0, "11570": 28391712768.0, "11575": 28391712768.0, "11580": 28391712768.0, "11585": 28391712768.0, "11590": 28391712768.0, "11595": 28391712768.0, "11600": 28391712768.0, "11605": 28391712768.0, "11610": 28391712768.0, "11615": 28391712768.0, "11620": 28391712768.0, "11625": 28391712768.0, "11630": 28391712768.0, "11635": 28391712768.0, "11640": 28391712768.0, "11645": 28391712768.0, "11650": 28391712768.0, "11655": 28391712768.0, "11660": 28391712768.0, "11665": 28391712768.0, "11670": 28391712768.0, "11675": 28391712768.0, "11680": 28391712768.0, "11685": 28391712768.0, "11690": 28391712768.0, "11695": 28391712768.0, "11700": 28391712768.0, "11705": 28391712768.0, "11710": 28391712768.0, "11715": 28391712768.0, "11720": 28391712768.0, "11725": 28391712768.0, "11730": 28391712768.0, "11735": 28391712768.0, "11740": 28391712768.0, "11745": 28391712768.0, "11750": 28391712768.0, "11755": 28391712768.0, "11760": 28391712768.0, "11765": 28391712768.0, "11770": 28391712768.0, "11775": 28391712768.0, "11780": 28391712768.0, "11785": 28391712768.0, "11790": 28391712768.0, "11795": 28391712768.0, "11800": 28391712768.0, "11805": 28391712768.0, "11810": 28391712768.0, "11815": 28391712768.0, "11820": 28391712768.0, "11825": 28391712768.0, "11830": 28391712768.0, "11835": 28391712768.0, "11840": 28391712768.0, "11845": 28391712768.0, "11850": 28391712768.0, "11855": 28391712768.0, "11860": 28391712768.0, "11865": 28391712768.0, "11870": 28391712768.0, "11875": 28391712768.0, "11880": 28391712768.0, "11885": 28391712768.0, "11890": 28391712768.0, "11895": 28391712768.0, "11900": 28391712768.0, "11905": 28391712768.0, "11910": 28391712768.0, "11915": 28391712768.0, "11920": 28391712768.0, "11925": 28391712768.0, "11930": 28391712768.0, "11935": 28391712768.0, "11940": 28391712768.0, "11945": 28391712768.0, "11950": 28391712768.0, "11955": 28391712768.0, "11960": 28391712768.0, "11965": 28391712768.0, "11970": 28391712768.0, "11975": 28391712768.0, "11980": 28391712768.0, "11985": 28391712768.0, "11990": 28391712768.0, "11995": 28391712768.0, "12000": 28391712768.0, "12005": 28391712768.0, "12010": 28391712768.0, "12015": 28391712768.0, "12020": 28391712768.0, "12025": 28391712768.0, "12030": 28391712768.0, "12035": 28391712768.0, "12040": 28391712768.0, "12045": 28391712768.0, "12050": 28391712768.0, "12055": 28391712768.0, "12060": 28391712768.0, "12065": 28391712768.0, "12070": 28391712768.0, "12075": 28391712768.0, "12080": 28391712768.0, "12085": 28391712768.0, "12090": 28391712768.0, "12095": 28391712768.0, "12100": 28391712768.0, "12105": 28391712768.0, "12110": 28391712768.0, "12115": 28391712768.0, "12120": 28391712768.0, "12125": 28391712768.0, "12130": 28391712768.0, "12135": 28391712768.0, "12140": 28391712768.0, "12145": 28391712768.0, "12150": 28391712768.0, "12155": 28391712768.0, "12160": 28391712768.0, "12165": 28391712768.0, "12170": 28391712768.0, "12175": 28391712768.0, "12180": 28391712768.0, "12185": 28391712768.0, "12190": 28391712768.0, "12195": 28391712768.0, "12200": 28391712768.0, "12205": 28391712768.0, "12210": 28391712768.0, "12215": 28391712768.0, "12220": 28391712768.0, "12225": 28391712768.0, "12230": 28391712768.0, "12235": 28391712768.0, "12240": 28391712768.0, "12245": 28391712768.0, "12250": 28391712768.0, "12255": 28391712768.0, "12260": 28391712768.0, "12265": 28391712768.0, "12270": 28391712768.0, "12275": 28391712768.0, "12280": 28391712768.0, "12285": 28391712768.0, "12290": 28391712768.0, "12295": 28391712768.0, "12300": 28391712768.0, "12305": 28391712768.0, "12310": 28391712768.0, "12315": 28391712768.0, "12320": 28391712768.0, "12325": 28391712768.0, "12330": 28391712768.0, "12335": 28391712768.0, "12340": 28391712768.0, "12345": 28391712768.0, "12350": 28391712768.0, "12355": 28391712768.0, "12360": 28391712768.0, "12365": 28391712768.0, "12370": 28391712768.0, "12375": 28391712768.0, "12380": 28391712768.0, "12385": 28391712768.0, "12390": 28391712768.0, "12395": 28391712768.0, "12400": 28391712768.0, "12405": 28391712768.0, "12410": 28391712768.0, "12415": 28391712768.0, "12420": 28391712768.0, "12425": 28391712768.0, "12430": 28391712768.0, "12435": 28391712768.0, "12440": 28391712768.0, "12445": 28391712768.0, "12450": 28391712768.0, "12455": 28391712768.0, "12460": 28391712768.0, "12465": 28391712768.0, "12470": 28391712768.0, "12475": 28391712768.0, "12480": 28391712768.0, "12485": 28391712768.0, "12490": 28391712768.0, "12495": 28391712768.0, "12500": 28391712768.0, "12505": 28391712768.0, "12510": 28391712768.0, "12515": 28391712768.0, "12520": 28391712768.0, "12525": 28391712768.0, "12530": 28391712768.0, "12535": 28391712768.0, "12540": 28391712768.0, "12545": 28391712768.0, "12550": 28391712768.0, "12555": 28391712768.0, "12560": 28391712768.0, "12565": 28391712768.0, "12570": 28391712768.0, "12575": 28391712768.0, "12580": 28391712768.0, "12585": 28391712768.0, "12590": 28391712768.0, "12595": 28391712768.0, "12600": 28391712768.0, "12605": 28391712768.0, "12610": 28391712768.0, "12615": 28391712768.0, "12620": 28391712768.0, "12625": 28391712768.0, "12630": 28391712768.0, "12635": 28391712768.0, "12640": 28391712768.0, "12645": 28391712768.0, "12650": 28391712768.0, "12655": 28391712768.0, "12660": 28391712768.0, "12665": 28391712768.0, "12670": 28391712768.0, "12675": 28391712768.0, "12680": 28391712768.0, "12685": 28391712768.0, "12690": 28391712768.0, "12695": 28391712768.0, "12700": 28391712768.0, "12705": 28391712768.0, "12710": 28391712768.0, "12715": 28391712768.0, "12720": 28391712768.0, "12725": 28391712768.0, "12730": 28391712768.0, "12735": 28391712768.0, "12740": 28391712768.0, "12745": 28391712768.0, "12750": 28391712768.0, "12755": 28391712768.0, "12760": 28391712768.0, "12765": 28391712768.0, "12770": 28391712768.0, "12775": 28391712768.0, "12780": 28391712768.0, "12785": 28391712768.0, "12790": 28391712768.0, "12795": 28391712768.0, "12800": 28391712768.0, "12805": 28391712768.0, "12810": 28391712768.0, "12815": 28391712768.0, "12820": 28391712768.0, "12825": 28391712768.0, "12830": 28391712768.0, "12835": 28391712768.0, "12840": 28391712768.0, "12845": 28391712768.0, "12850": 28391712768.0, "12855": 28391712768.0, "12860": 28391712768.0, "12865": 28391712768.0, "12870": 28391712768.0, "12875": 28391712768.0, "12880": 28391712768.0, "12885": 28391712768.0, "12890": 28391712768.0, "12895": 28391712768.0, "12900": 28391712768.0, "12905": 28391712768.0, "12910": 28391712768.0, "12915": 28391712768.0, "12920": 28391712768.0, "12925": 28391712768.0, "12930": 28391712768.0, "12935": 28391712768.0, "12940": 28391712768.0, "12945": 28391712768.0, "12950": 28391712768.0, "12955": 28391712768.0, "12960": 28391712768.0, "12965": 28391712768.0, "12970": 28391712768.0, "12975": 28391712768.0, "12980": 28391712768.0, "12985": 28391712768.0, "12990": 28391712768.0, "12995": 28391712768.0, "13000": 28391712768.0, "13005": 28391712768.0, "13010": 28391712768.0, "13015": 28391712768.0, "13020": 28391712768.0, "13025": 28391712768.0, "13030": 28391712768.0, "13035": 28391712768.0, "13040": 28391712768.0, "13045": 28391712768.0, "13050": 28391712768.0, "13055": 28391712768.0, "13060": 28391712768.0, "13065": 28391712768.0, "13070": 28391712768.0, "13075": 28391712768.0, "13080": 28391712768.0, "13085": 28391712768.0, "13090": 28391712768.0, "13095": 28391712768.0, "13100": 28391712768.0, "13105": 28391712768.0, "13110": 28391712768.0, "13115": 28391712768.0, "13120": 28391712768.0, "13125": 28391712768.0, "13130": 28391712768.0, "13135": 28391712768.0, "13140": 28391712768.0, "13145": 28391712768.0, "13150": 28391712768.0, "13155": 28391712768.0, "13160": 28391712768.0, "13165": 28391712768.0, "13170": 28391712768.0, "13175": 28391712768.0, "13180": 28391712768.0, "13185": 28391712768.0, "13190": 28391712768.0, "13195": 28391712768.0, "13200": 28391712768.0, "13205": 28391712768.0, "13210": 28391712768.0, "13215": 28391712768.0, "13220": 28391712768.0, "13225": 28391712768.0, "13230": 28391712768.0, "13235": 28391712768.0, "13240": 28391712768.0, "13245": 28391712768.0, "13250": 28391712768.0, "13255": 28391712768.0, "13260": 28391712768.0, "13265": 28391712768.0, "13270": 28391712768.0, "13275": 28391712768.0, "13280": 28391712768.0, "13285": 28391712768.0, "13290": 28391712768.0, "13295": 28391712768.0, "13300": 28391712768.0, "13305": 28391712768.0, "13310": 28391712768.0, "13315": 28391712768.0, "13320": 28391712768.0, "13325": 28391712768.0, "13330": 28391712768.0, "13335": 28391712768.0, "13340": 28391712768.0, "13345": 28391712768.0, "13350": 28391712768.0, "13355": 28391712768.0, "13360": 28391712768.0, "13365": 28391712768.0, "13370": 28391712768.0, "13375": 28391712768.0, "13380": 28391712768.0, "13385": 28391712768.0, "13390": 28391712768.0, "13395": 28391712768.0, "13400": 28391712768.0, "13405": 28391712768.0, "13410": 28391712768.0, "13415": 28391712768.0, "13420": 28391712768.0, "13425": 28391712768.0, "13430": 28391712768.0, "13435": 28391712768.0, "13440": 28391712768.0, "13445": 28391712768.0, "13450": 28391712768.0, "13455": 28391712768.0, "13460": 28391712768.0, "13465": 28391712768.0, "13470": 28391712768.0, "13475": 28391712768.0, "13480": 28391712768.0, "13485": 28391712768.0, "13490": 28391712768.0, "13495": 28391712768.0, "13500": 28391712768.0, "13505": 28391712768.0, "13510": 28391712768.0, "13515": 28391712768.0, "13520": 28391712768.0, "13525": 28391712768.0, "13530": 28391712768.0, "13535": 28391712768.0, "13540": 28391712768.0, "13545": 28391712768.0, "13550": 28391712768.0, "13555": 28391712768.0, "13560": 28391712768.0, "13565": 28391712768.0, "13570": 28391712768.0, "13575": 28391712768.0, "13580": 28391712768.0, "13585": 28391712768.0, "13590": 28391712768.0, "13595": 28391712768.0, "13600": 28391712768.0, "13605": 28391712768.0, "13610": 28391712768.0, "13615": 28391712768.0, "13620": 28391712768.0, "13625": 28391712768.0, "13630": 28391712768.0, "13635": 28391712768.0, "13640": 28391712768.0, "13645": 28391712768.0, "13650": 28391712768.0, "13655": 28391712768.0, "13660": 28391712768.0, "13665": 28391712768.0, "13670": 28391712768.0, "13675": 28391712768.0, "13680": 28391712768.0, "13685": 28391712768.0, "13690": 28391712768.0, "13695": 28391712768.0, "13700": 28391712768.0, "13705": 28391712768.0, "13710": 28391712768.0, "13715": 28391712768.0, "13720": 28391712768.0, "13725": 28391712768.0, "13730": 28391712768.0, "13735": 28391712768.0, "13740": 28391712768.0, "13745": 28391712768.0, "13750": 28391712768.0, "13755": 28391712768.0, "13760": 28391712768.0, "13765": 28391712768.0, "13770": 28391712768.0, "13775": 28391712768.0, "13780": 28391712768.0, "13785": 28391712768.0, "13790": 28391712768.0, "13795": 28391712768.0, "13800": 28391712768.0, "13805": 28391712768.0, "13810": 28391712768.0, "13815": 28391712768.0, "13820": 28391712768.0, "13825": 28391712768.0, "13830": 28391712768.0, "13835": 28391712768.0, "13840": 28391712768.0, "13845": 28391712768.0, "13850": 28391712768.0, "13855": 28391712768.0, "13860": 28391712768.0, "13865": 28391712768.0, "13870": 28391712768.0, "13875": 28391712768.0, "13880": 28391712768.0, "13885": 28391712768.0, "13890": 28391712768.0, "13895": 28391712768.0, "13900": 28391712768.0, "13905": 28391712768.0, "13910": 28391712768.0, "13915": 28391712768.0, "13920": 28391712768.0, "13925": 28391712768.0, "13930": 28391712768.0, "13935": 28391712768.0, "13940": 28391712768.0, "13945": 28391712768.0, "13950": 28391712768.0, "13955": 28391712768.0, "13960": 28391712768.0, "13965": 28391712768.0, "13970": 28391712768.0, "13975": 28391712768.0, "13980": 28391712768.0, "13985": 28391712768.0, "13990": 28391712768.0, "13995": 28391712768.0, "14000": 28391712768.0, "14005": 28391712768.0, "14010": 28391712768.0, "14015": 28391712768.0, "14020": 28391712768.0, "14025": 28391712768.0, "14030": 28391712768.0, "14035": 28391712768.0, "14040": 28391712768.0, "14045": 28391712768.0, "14050": 28391712768.0, "14055": 28391712768.0, "14060": 28391712768.0, "14065": 28391712768.0, "14070": 28391712768.0, "14075": 28391712768.0, "14080": 28391712768.0, "14085": 28391712768.0, "14090": 28391712768.0, "14095": 28391712768.0, "14100": 28391712768.0, "14105": 28391712768.0, "14110": 28391712768.0, "14115": 28391712768.0, "14120": 28391712768.0, "14125": 28391712768.0, "14130": 28391712768.0, "14135": 28391712768.0, "14140": 28391712768.0, "14145": 28391712768.0, "14150": 28391712768.0, "14155": 28391712768.0, "14160": 28391712768.0, "14165": 28391712768.0, "14170": 28391712768.0, "14175": 28391712768.0, "14180": 28391712768.0, "14185": 28391712768.0, "14190": 28391712768.0, "14195": 28391712768.0, "14200": 28391712768.0, "14205": 28391712768.0, "14210": 28391712768.0, "14215": 28391712768.0, "14220": 28391712768.0, "14225": 28391712768.0, "14230": 28391712768.0, "14235": 28391712768.0, "14240": 28391712768.0, "14245": 28391712768.0, "14250": 28391712768.0, "14255": 28391712768.0, "14260": 28391712768.0, "14265": 28391712768.0, "14270": 28391712768.0, "14275": 28391712768.0, "14280": 28391712768.0, "14285": 28391712768.0, "14290": 28391712768.0, "14295": 28391712768.0, "14300": 28391712768.0, "14305": 28391712768.0, "14310": 28391712768.0, "14315": 28391712768.0, "14320": 28391712768.0, "14325": 28391712768.0, "14330": 28391712768.0, "14335": 28391712768.0, "14340": 28391712768.0, "14345": 28391712768.0, "14350": 28391712768.0, "14355": 28391712768.0, "14360": 28391712768.0, "14365": 28391712768.0, "14370": 28391712768.0, "14375": 28391712768.0, "14380": 28391712768.0, "14385": 28391712768.0, "14390": 28391712768.0, "14395": 28391712768.0, "14400": 28391712768.0, "14405": 28391712768.0, "14410": 28391712768.0, "14415": 28391712768.0, "14420": 28391712768.0, "14425": 28391712768.0, "14430": 28391712768.0, "14435": 28391712768.0, "14440": 28391712768.0, "14445": 28391712768.0, "14450": 28391712768.0, "14455": 28391712768.0, "14460": 28391712768.0, "14465": 28391712768.0, "14470": 28391712768.0, "14475": 28391712768.0, "14480": 28391712768.0, "14485": 28391712768.0, "14490": 28391712768.0, "14495": 28391712768.0, "14500": 28391712768.0, "14505": 28391712768.0, "14510": 28391712768.0, "14515": 28391712768.0, "14520": 28391712768.0, "14525": 28391712768.0, "14530": 28391712768.0, "14535": 28391712768.0, "14540": 28391712768.0, "14545": 28391712768.0, "14550": 28391712768.0, "14555": 28391712768.0, "14560": 28391712768.0, "14565": 28391712768.0, "14570": 28391712768.0, "14575": 28391712768.0, "14580": 28391712768.0, "14585": 28391712768.0, "14590": 28391712768.0, "14595": 28391712768.0, "14600": 28391712768.0, "14605": 28391712768.0, "14610": 28391712768.0, "14615": 28391712768.0, "14620": 28391712768.0, "14625": 28391712768.0, "14630": 28391712768.0, "14635": 28391712768.0, "14640": 28391712768.0, "14645": 28391712768.0, "14650": 28391712768.0, "14655": 28391712768.0, "14660": 28391712768.0, "14665": 28391712768.0, "14670": 28391712768.0, "14675": 28391712768.0, "14680": 28391712768.0, "14685": 28391712768.0, "14690": 28391712768.0, "14695": 28391712768.0, "14700": 28391712768.0, "14705": 28391712768.0, "14710": 28391712768.0, "14715": 28391712768.0, "14720": 28391712768.0, "14725": 28391712768.0, "14730": 28391712768.0, "14735": 28391712768.0, "14740": 28391712768.0, "14745": 28391712768.0, "14750": 28391712768.0, "14755": 28391712768.0, "14760": 28391712768.0, "14765": 28391712768.0, "14770": 28391712768.0, "14775": 28391712768.0, "14780": 28391712768.0, "14785": 28391712768.0, "14790": 28391712768.0, "14795": 28391712768.0, "14800": 28391712768.0, "14805": 28391712768.0, "14810": 28391712768.0, "14815": 28391712768.0, "14820": 28391712768.0, "14825": 28391712768.0, "14830": 28391712768.0, "14835": 28391712768.0, "14840": 28391712768.0, "14845": 28391712768.0, "14850": 28391712768.0, "14855": 28391712768.0, "14860": 28391712768.0, "14865": 28391712768.0, "14870": 28391712768.0, "14875": 28391712768.0, "14880": 28391712768.0, "14885": 28391712768.0, "14890": 28391712768.0, "14895": 28391712768.0, "14900": 28391712768.0, "14905": 28391712768.0, "14910": 28391712768.0, "14915": 28391712768.0, "14920": 28391712768.0, "14925": 28391712768.0, "14930": 28391712768.0, "14935": 28391712768.0, "14940": 28391712768.0, "14945": 28391712768.0, "14950": 28391712768.0, "14955": 28391712768.0, "14960": 28391712768.0, "14965": 28391712768.0, "14970": 28391712768.0, "14975": 28391712768.0, "14980": 28391712768.0, "14985": 28391712768.0, "14990": 28391712768.0, "14995": 28391712768.0, "15000": 28391712768.0, "15005": 28391712768.0, "15010": 28391712768.0, "15015": 28391712768.0, "15020": 28391712768.0, "15025": 28391712768.0, "15030": 28391712768.0, "15035": 28391712768.0, "15040": 28391712768.0, "15045": 28391712768.0, "15050": 28391712768.0, "15055": 28391712768.0, "15060": 28391712768.0, "15065": 28391712768.0, "15070": 28391712768.0, "15075": 28391712768.0, "15080": 28391712768.0, "15085": 28391712768.0, "15090": 28391712768.0, "15095": 28391712768.0, "15100": 28391712768.0, "15105": 28391712768.0, "15110": 28391712768.0, "15115": 28391712768.0, "15120": 28391712768.0, "15125": 28391712768.0, "15130": 28391712768.0, "15135": 28391712768.0, "15140": 28391712768.0, "15145": 28391712768.0, "15150": 28391712768.0, "15155": 28391712768.0, "15160": 28391712768.0, "15165": 28391712768.0, "15170": 28391712768.0, "15175": 28391712768.0, "15180": 28391712768.0, "15185": 28391712768.0, "15190": 28391712768.0, "15195": 28391712768.0, "15200": 28391712768.0, "15205": 28391712768.0, "15210": 28391712768.0, "15215": 28391712768.0, "15220": 28391712768.0, "15225": 28391712768.0, "15230": 28391712768.0, "15235": 28391712768.0, "15240": 28391712768.0, "15245": 28391712768.0, "15250": 28391712768.0, "15255": 28391712768.0, "15260": 28391712768.0, "15265": 28391712768.0, "15270": 28391712768.0, "15275": 28391712768.0, "15280": 28391712768.0, "15285": 28391712768.0, "15290": 28391712768.0, "15295": 28391712768.0, "15300": 28391712768.0, "15305": 28391712768.0, "15310": 28391712768.0, "15315": 28391712768.0, "15320": 28391712768.0, "15325": 28391712768.0, "15330": 28391712768.0, "15335": 28391712768.0, "15340": 28391712768.0, "15345": 28391712768.0, "15350": 28391712768.0, "15355": 28391712768.0, "15360": 28391712768.0, "15365": 28391712768.0, "15370": 28391712768.0, "15375": 28391712768.0, "15380": 28391712768.0, "15385": 28391712768.0, "15390": 28391712768.0, "15395": 28391712768.0, "15400": 28391712768.0, "15405": 28391712768.0, "15410": 28391712768.0, "15415": 28391712768.0, "15420": 28391712768.0, "15425": 28391712768.0, "15430": 28391712768.0, "15435": 28391712768.0, "15440": 28391712768.0, "15445": 28391712768.0, "15450": 28391712768.0, "15455": 28391712768.0, "15460": 28391712768.0, "15465": 28391712768.0, "15470": 28391712768.0, "15475": 28391712768.0, "15480": 28391712768.0, "15485": 28391712768.0, "15490": 28391712768.0, "15495": 28391712768.0, "15500": 28391712768.0, "15505": 28391712768.0, "15510": 28391712768.0, "15515": 28391712768.0, "15520": 28391712768.0, "15525": 28391712768.0, "15530": 28391712768.0, "15535": 28391712768.0, "15540": 28391712768.0, "15545": 28391712768.0, "15550": 28391712768.0, "15555": 28391712768.0, "15560": 28391712768.0, "15565": 28391712768.0, "15570": 28391712768.0, "15575": 28391712768.0, "15580": 28391712768.0, "15585": 28391712768.0, "15590": 28391712768.0, "15595": 28391712768.0, "15600": 28391712768.0, "15605": 28391712768.0, "15610": 28391712768.0, "15615": 28391712768.0, "15620": 28391712768.0, "15625": 28391712768.0, "15630": 28391712768.0, "15635": 28391712768.0, "15640": 28391712768.0, "15645": 28391712768.0, "15650": 28391712768.0, "15655": 28391712768.0, "15660": 28391712768.0, "15665": 28391712768.0, "15670": 28391712768.0, "15675": 28391712768.0, "15680": 28391712768.0, "15685": 28391712768.0, "15690": 28391712768.0, "15695": 28391712768.0, "15700": 28391712768.0, "15705": 28391712768.0, "15710": 28391712768.0, "15715": 28391712768.0, "15720": 28391712768.0, "15725": 28391712768.0, "15730": 28391712768.0, "15735": 28391712768.0, "15740": 28391712768.0, "15745": 28391712768.0, "15750": 28391712768.0, "15755": 28391712768.0, "15760": 28391712768.0, "15765": 28391712768.0, "15770": 28391712768.0, "15775": 28391712768.0, "15780": 28391712768.0, "15785": 28391712768.0, "15790": 28391712768.0, "15795": 28391712768.0, "15800": 28391712768.0, "15805": 28391712768.0, "15810": 28391712768.0, "15815": 28391712768.0, "15820": 28391712768.0, "15825": 28391712768.0, "15830": 28391712768.0, "15835": 28391712768.0, "15840": 28391712768.0, "15845": 28391712768.0, "15850": 28391712768.0, "15855": 28391712768.0, "15860": 28391712768.0, "15865": 28391712768.0, "15870": 28391712768.0, "15875": 28391712768.0, "15880": 28391712768.0, "15885": 28391712768.0, "15890": 28391712768.0, "15895": 28391712768.0, "15900": 28391712768.0, "15905": 28391712768.0, "15910": 28391712768.0, "15915": 28391712768.0, "15920": 28391712768.0, "15925": 28391712768.0, "15930": 28391712768.0, "15935": 28391712768.0, "15940": 28391712768.0, "15945": 28391712768.0, "15950": 28391712768.0, "15955": 28391712768.0, "15960": 28391712768.0, "15965": 28391712768.0, "15970": 28391712768.0, "15975": 28391712768.0, "15980": 28391712768.0, "15985": 28391712768.0, "15990": 28391712768.0, "15995": 28391712768.0, "16000": 28391712768.0, "16005": 28391712768.0, "16010": 28391712768.0, "16015": 28391712768.0, "16020": 28391712768.0, "16025": 28391712768.0, "16030": 28391712768.0, "16035": 28391712768.0, "16040": 28391712768.0, "16045": 28391712768.0, "16050": 28391712768.0, "16055": 28391712768.0, "16060": 28391712768.0, "16065": 28391712768.0, "16070": 28391712768.0, "16075": 28391712768.0, "16080": 28391712768.0, "16085": 28391712768.0, "16090": 28391712768.0, "16095": 28391712768.0, "16100": 28391712768.0, "16105": 28391712768.0, "16110": 28391712768.0, "16115": 28391712768.0, "16120": 28391712768.0, "16125": 28391712768.0, "16130": 28391712768.0, "16135": 28391712768.0, "16140": 28391712768.0, "16145": 28391712768.0, "16150": 28391712768.0, "16155": 28391712768.0, "16160": 28391712768.0, "16165": 28391712768.0, "16170": 28391712768.0, "16175": 28391712768.0, "16180": 28391712768.0, "16185": 28391712768.0, "16190": 28391712768.0, "16195": 28391712768.0, "16200": 28391712768.0, "16205": 28391712768.0, "16210": 28391712768.0, "16215": 28391712768.0, "16220": 28391712768.0, "16225": 28391712768.0, "16230": 28391712768.0, "16235": 28391712768.0, "16240": 28391712768.0, "16245": 28391712768.0, "16250": 28391712768.0, "16255": 28391712768.0, "16260": 28391712768.0, "16265": 28391712768.0, "16270": 28391712768.0, "16275": 28391712768.0, "16280": 28391712768.0, "16285": 28391712768.0, "16290": 28391712768.0, "16295": 28391712768.0, "16300": 28391712768.0, "16305": 28391712768.0, "16310": 28391712768.0, "16315": 28391712768.0, "16320": 28391712768.0, "16325": 28391712768.0, "16330": 28391712768.0, "16335": 28391712768.0, "16340": 28391712768.0, "16345": 28391712768.0, "16350": 28391712768.0, "16355": 28391712768.0, "16360": 28391712768.0, "16365": 28391712768.0, "16370": 28391712768.0, "16375": 28391712768.0, "16380": 28391712768.0, "16385": 28391712768.0, "16390": 28391712768.0, "16395": 28391712768.0, "16400": 28391712768.0, "16405": 28391712768.0, "16410": 28391712768.0, "16415": 28391712768.0, "16420": 28391712768.0, "16425": 28391712768.0, "16430": 28391712768.0, "16435": 28391712768.0, "16440": 28391712768.0, "16445": 28391712768.0, "16450": 28391712768.0, "16455": 28391712768.0, "16460": 28391712768.0, "16465": 28391712768.0, "16470": 28391712768.0, "16475": 28391712768.0, "16480": 28391712768.0, "16485": 28391712768.0, "16490": 28391712768.0, "16495": 28391712768.0, "16500": 28391712768.0, "16505": 28391712768.0, "16510": 28391712768.0, "16515": 28391712768.0, "16520": 28391712768.0, "16525": 28391712768.0, "16530": 28391712768.0, "16535": 28391712768.0, "16540": 28391712768.0, "16545": 28391712768.0, "16550": 28391712768.0, "16555": 28391712768.0, "16560": 28391712768.0, "16565": 28391712768.0, "16570": 28391712768.0, "16575": 28391712768.0, "16580": 28391712768.0, "16585": 28391712768.0, "16590": 28391712768.0, "16595": 28391712768.0, "16600": 28391712768.0, "16605": 28391712768.0, "16610": 28391712768.0, "16615": 28391712768.0, "16620": 28391712768.0, "16625": 28391712768.0, "16630": 28391712768.0, "16635": 28391712768.0, "16640": 28391712768.0, "16645": 28391712768.0, "16650": 28391712768.0, "16655": 28391712768.0, "16660": 28391712768.0, "16665": 28391712768.0, "16670": 28391712768.0, "16675": 28391712768.0, "16680": 28391712768.0, "16685": 28391712768.0, "16690": 28391712768.0, "16695": 28391712768.0, "16700": 28391712768.0, "16705": 28391712768.0, "16710": 28391712768.0, "16715": 28391712768.0, "16720": 28391712768.0, "16725": 28391712768.0, "16730": 28391712768.0, "16735": 28391712768.0, "16740": 28391712768.0, "16745": 28391712768.0, "16750": 28391712768.0, "16755": 28391712768.0, "16760": 28391712768.0, "16765": 28391712768.0, "16770": 28391712768.0, "16775": 28391712768.0, "16780": 28391712768.0, "16785": 28391712768.0, "16790": 28391712768.0, "16795": 28391712768.0, "16800": 28391712768.0, "16805": 28391712768.0, "16810": 28391712768.0, "16815": 28391712768.0, "16820": 28391712768.0, "16825": 28391712768.0, "16830": 28391712768.0, "16835": 28391712768.0, "16840": 28391712768.0, "16845": 28391712768.0, "16850": 28391712768.0, "16855": 28391712768.0, "16860": 28391712768.0, "16865": 28391712768.0, "16870": 28391712768.0, "16875": 28391712768.0, "16880": 28391712768.0, "16885": 28391712768.0, "16890": 28391712768.0, "16895": 28391712768.0, "16900": 28391712768.0, "16905": 28391712768.0, "16910": 28391712768.0, "16915": 28391712768.0, "16920": 28391712768.0, "16925": 28391712768.0, "16930": 28391712768.0, "16935": 28391712768.0, "16940": 28391712768.0, "16945": 28391712768.0, "16950": 28391712768.0, "16955": 28391712768.0, "16960": 28391712768.0, "16965": 28391712768.0, "16970": 28391712768.0, "16975": 28391712768.0, "16980": 28391712768.0, "16985": 28391712768.0, "16990": 28391712768.0, "16995": 28391712768.0, "17000": 28391712768.0, "17005": 28391712768.0, "17010": 28391712768.0, "17015": 28391712768.0, "17020": 28391712768.0, "17025": 28391712768.0, "17030": 28391712768.0, "17035": 28391712768.0, "17040": 28391712768.0, "17045": 28391712768.0, "17050": 28391712768.0, "17055": 28391712768.0, "17060": 28391712768.0, "17065": 28391712768.0, "17070": 28391712768.0, "17075": 28391712768.0, "17080": 28391712768.0, "17085": 28391712768.0, "17090": 28391712768.0, "17095": 28391712768.0, "17100": 28391712768.0, "17105": 28391712768.0, "17110": 28391712768.0, "17115": 28391712768.0, "17120": 28391712768.0, "17125": 28391712768.0, "17130": 28391712768.0, "17135": 28391712768.0, "17140": 28391712768.0, "17145": 28391712768.0, "17150": 28391712768.0, "17155": 28391712768.0, "17160": 28391712768.0, "17165": 28391712768.0, "17170": 28391712768.0, "17175": 28391712768.0, "17180": 28391712768.0, "17185": 28391712768.0, "17190": 28391712768.0, "17195": 28391712768.0, "17200": 28391712768.0, "17205": 28391712768.0, "17210": 28391712768.0, "17215": 28391712768.0, "17220": 28391712768.0, "17225": 28391712768.0, "17230": 28391712768.0, "17235": 28391712768.0, "17240": 28391712768.0, "17245": 28391712768.0, "17250": 28391712768.0, "17255": 28391712768.0, "17260": 28391712768.0, "17265": 28391712768.0, "17270": 28391712768.0, "17275": 28391712768.0, "17280": 28391712768.0, "17285": 28391712768.0, "17290": 28391712768.0, "17295": 28391712768.0, "17300": 28391712768.0, "17305": 28391712768.0, "17310": 28391712768.0, "17315": 28391712768.0, "17320": 28391712768.0, "17325": 28391712768.0, "17330": 28391712768.0, "17335": 28391712768.0, "17340": 28391712768.0, "17345": 28391712768.0, "17350": 28391712768.0, "17355": 28391712768.0, "17360": 28391712768.0, "17365": 28391712768.0, "17370": 28391712768.0, "17375": 28391712768.0, "17380": 28391712768.0, "17385": 28391712768.0, "17390": 28391712768.0, "17395": 28391712768.0, "17400": 28391712768.0, "17405": 28391712768.0, "17410": 28391712768.0, "17415": 28391712768.0, "17420": 28391712768.0, "17425": 28391712768.0, "17430": 28391712768.0, "17435": 28391712768.0, "17440": 28391712768.0, "17445": 28391712768.0, "17450": 28391712768.0, "17455": 28391712768.0, "17460": 28391712768.0, "17465": 28391712768.0, "17470": 28391712768.0, "17475": 28391712768.0, "17480": 28391712768.0, "17485": 28391712768.0, "17490": 28391712768.0, "17495": 28391712768.0, "17500": 28391712768.0, "17505": 28391712768.0, "17510": 28391712768.0, "17515": 28391712768.0, "17520": 28391712768.0, "17525": 28391712768.0, "17530": 28391712768.0, "17535": 28391712768.0, "17540": 28391712768.0, "17545": 28391712768.0, "17550": 28391712768.0, "17555": 28391712768.0, "17560": 28391712768.0, "17565": 28391712768.0, "17570": 28391712768.0, "17575": 28391712768.0, "17580": 28391712768.0, "17585": 28391712768.0, "17590": 28391712768.0, "17595": 28391712768.0, "17600": 28391712768.0, "17605": 28391712768.0, "17610": 28391712768.0, "17615": 28391712768.0, "17620": 28391712768.0, "17625": 28391712768.0, "17630": 28391712768.0, "17635": 28391712768.0, "17640": 28391712768.0, "17645": 28391712768.0, "17650": 28391712768.0, "17655": 28391712768.0, "17660": 28391712768.0, "17665": 28391712768.0, "17670": 28391712768.0, "17675": 28391712768.0, "17680": 28391712768.0, "17685": 28391712768.0, "17690": 28391712768.0, "17695": 28391712768.0, "17700": 28391712768.0, "17705": 28391712768.0, "17710": 28391712768.0, "17715": 28391712768.0, "17720": 28391712768.0, "17725": 28391712768.0, "17730": 28391712768.0, "17735": 28391712768.0, "17740": 28391712768.0, "17745": 28391712768.0, "17750": 28391712768.0, "17755": 28391712768.0, "17760": 28391712768.0, "17765": 28391712768.0, "17770": 28391712768.0, "17775": 28391712768.0, "17780": 28391712768.0, "17785": 28391712768.0, "17790": 28391712768.0, "17795": 28391712768.0, "17800": 28391712768.0, "17805": 28391712768.0, "17810": 28391712768.0, "17815": 28391712768.0, "17820": 28391712768.0, "17825": 28391712768.0, "17830": 28391712768.0, "17835": 28391712768.0, "17840": 28391712768.0, "17845": 28391712768.0, "17850": 28391712768.0, "17855": 28391712768.0, "17860": 28391712768.0, "17865": 28391712768.0, "17870": 28391712768.0, "17875": 28391712768.0, "17880": 28391712768.0, "17885": 28391712768.0, "17890": 28391712768.0, "17895": 28391712768.0, "17900": 28391712768.0, "17905": 28391712768.0, "17910": 28391712768.0, "17915": 28391712768.0, "17920": 28391712768.0, "17925": 28391712768.0, "17930": 28391712768.0, "17935": 28391712768.0, "17940": 28391712768.0, "17945": 28391712768.0, "17950": 28391712768.0, "17955": 28391712768.0, "17960": 28391712768.0, "17965": 28391712768.0, "17970": 28391712768.0, "17975": 28391712768.0, "17980": 28391712768.0, "17985": 28391712768.0, "17990": 28391712768.0, "17995": 28391712768.0, "18000": 28391712768.0, "18005": 28391712768.0, "18010": 28391712768.0, "18015": 28391712768.0, "18020": 28391712768.0, "18025": 28391712768.0, "18030": 28391712768.0, "18035": 28391712768.0, "18040": 28391712768.0, "18045": 28391712768.0, "18050": 28391712768.0, "18055": 28391712768.0, "18060": 28391712768.0, "18065": 28391712768.0, "18070": 28391712768.0, "18075": 28391712768.0, "18080": 28391712768.0, "18085": 28391712768.0, "18090": 28391712768.0, "18095": 28391712768.0, "18100": 28391712768.0, "18105": 28391712768.0, "18110": 28391712768.0, "18115": 28391712768.0, "18120": 28391712768.0, "18125": 28391712768.0, "18130": 28391712768.0, "18135": 28391712768.0, "18140": 28391712768.0, "18145": 28391712768.0, "18150": 28391712768.0, "18155": 28391712768.0, "18160": 28391712768.0, "18165": 28391712768.0, "18170": 28391712768.0, "18175": 28391712768.0, "18180": 28391712768.0, "18185": 28391712768.0, "18190": 28391712768.0, "18195": 28391712768.0, "18200": 28391712768.0, "18205": 28391712768.0, "18210": 28391712768.0, "18215": 28391712768.0, "18220": 28391712768.0, "18225": 28391712768.0, "18230": 28391712768.0, "18235": 28391712768.0, "18240": 28391712768.0, "18245": 28391712768.0, "18250": 28391712768.0, "18255": 28391712768.0, "18260": 28391712768.0, "18265": 28391712768.0, "18270": 28391712768.0, "18275": 28391712768.0, "18280": 28391712768.0, "18285": 28391712768.0, "18290": 28391712768.0, "18295": 28391712768.0, "18300": 28391712768.0, "18305": 28391712768.0, "18310": 28391712768.0, "18315": 28391712768.0, "18320": 28391712768.0, "18325": 28391712768.0, "18330": 28391712768.0, "18335": 28391712768.0, "18340": 28391712768.0, "18345": 28391712768.0, "18350": 28391712768.0, "18355": 28391712768.0, "18360": 28391712768.0, "18365": 28391712768.0, "18370": 28391712768.0, "18375": 28391712768.0, "18380": 28391712768.0, "18385": 28391712768.0, "18390": 28391712768.0, "18395": 28391712768.0, "18400": 28391712768.0, "18405": 28391712768.0, "18410": 28391712768.0, "18415": 28391712768.0, "18420": 28391712768.0, "18425": 28391712768.0, "18430": 28391712768.0, "18435": 28391712768.0, "18440": 28391712768.0, "18445": 28391712768.0, "18450": 28391712768.0, "18455": 28391712768.0, "18460": 28391712768.0, "18465": 28391712768.0, "18470": 28391712768.0, "18475": 28391712768.0, "18480": 28391712768.0, "18485": 28391712768.0, "18490": 28391712768.0, "18495": 28391712768.0, "18500": 28391712768.0, "18505": 28391712768.0, "18510": 28391712768.0, "18515": 28391712768.0, "18520": 28391712768.0, "18525": 28391712768.0, "18530": 28391712768.0, "18535": 28391712768.0, "18540": 28391712768.0, "18545": 28391712768.0, "18550": 28391712768.0, "18555": 28391712768.0, "18560": 28391712768.0, "18565": 28391712768.0, "18570": 28391712768.0, "18575": 28391712768.0, "18580": 28391712768.0, "18585": 28391712768.0, "18590": 28391712768.0, "18595": 28391712768.0, "18600": 28391712768.0, "18605": 28391712768.0, "18610": 28391712768.0, "18615": 28391712768.0, "18620": 28391712768.0, "18625": 28391712768.0, "18630": 28391712768.0, "18635": 28391712768.0, "18640": 28391712768.0, "18645": 28391712768.0, "18650": 28391712768.0, "18655": 28391712768.0, "18660": 28391712768.0, "18665": 28391712768.0, "18670": 28391712768.0, "18675": 28391712768.0, "18680": 28391712768.0, "18685": 28391712768.0, "18690": 28391712768.0, "18695": 28391712768.0, "18700": 28391712768.0, "18705": 28391712768.0, "18710": 28391712768.0, "18715": 28391712768.0, "18720": 28391712768.0, "18725": 28391712768.0, "18730": 28391712768.0, "18735": 28391712768.0, "18740": 28391712768.0, "18745": 28391712768.0, "18750": 28391712768.0, "18755": 28391712768.0, "18760": 28391712768.0, "18765": 28391712768.0, "18770": 28391712768.0, "18775": 28391712768.0, "18780": 28391712768.0, "18785": 28391712768.0, "18790": 28391712768.0, "18795": 28391712768.0, "18800": 28391712768.0, "18805": 28391712768.0, "18810": 28391712768.0, "18815": 28391712768.0, "18820": 28391712768.0, "18825": 28391712768.0, "18830": 28391712768.0, "18835": 28391712768.0, "18840": 28391712768.0, "18845": 28391712768.0, "18850": 28391712768.0, "18855": 28391712768.0, "18860": 28391712768.0, "18865": 28391712768.0, "18870": 28391712768.0, "18875": 28391712768.0, "18880": 28391712768.0, "18885": 28391712768.0, "18890": 28391712768.0, "18895": 28391712768.0, "18900": 28391712768.0, "18905": 28391712768.0, "18910": 28391712768.0, "18915": 28391712768.0, "18920": 28391712768.0, "18925": 28391712768.0, "18930": 28391712768.0, "18935": 28391712768.0, "18940": 28391712768.0, "18945": 28391712768.0, "18950": 28391712768.0, "18955": 28391712768.0, "18960": 28391712768.0, "18965": 28391712768.0, "18970": 28391712768.0, "18975": 28391712768.0, "18980": 28391712768.0, "18985": 28391712768.0, "18990": 28391712768.0, "18995": 28391712768.0, "19000": 28391712768.0, "19005": 28391712768.0, "19010": 28391712768.0, "19015": 28391712768.0, "19020": 28391712768.0, "19025": 28391712768.0, "19030": 28391712768.0, "19035": 28391712768.0, "19040": 28391712768.0, "19045": 28391712768.0, "19050": 28391712768.0, "19055": 28391712768.0, "19060": 28391712768.0, "19065": 28391712768.0, "19070": 28391712768.0, "19075": 28391712768.0, "19080": 28391712768.0, "19085": 28391712768.0, "19090": 28391712768.0, "19095": 28391712768.0, "19100": 28391712768.0, "19105": 28391712768.0, "19110": 28391712768.0, "19115": 28391712768.0, "19120": 28391712768.0, "19125": 28391712768.0, "19130": 28391712768.0, "19135": 28391712768.0, "19140": 28391712768.0, "19145": 28391712768.0, "19150": 28391712768.0, "19155": 28391712768.0, "19160": 28391712768.0, "19165": 28391712768.0, "19170": 28391712768.0, "19175": 28391712768.0, "19180": 28391712768.0, "19185": 28391712768.0, "19190": 28391712768.0, "19195": 28391712768.0, "19200": 28391712768.0, "19205": 28391712768.0, "19210": 28391712768.0, "19215": 28391712768.0, "19220": 28391712768.0, "19225": 28391712768.0, "19230": 28391712768.0, "19235": 28391712768.0, "19240": 28391712768.0, "19245": 28391712768.0, "19250": 28391712768.0, "19255": 28391712768.0, "19260": 28391712768.0, "19265": 28391712768.0, "19270": 28391712768.0, "19275": 28391712768.0, "19280": 28391712768.0, "19285": 28391712768.0, "19290": 28391712768.0, "19295": 28391712768.0, "19300": 28391712768.0, "19305": 28391712768.0, "19310": 28391712768.0, "19315": 28391712768.0, "19320": 28391712768.0, "19325": 28391712768.0, "19330": 28391712768.0, "19335": 28391712768.0, "19340": 28391712768.0, "19345": 28391712768.0, "19350": 28391712768.0, "19355": 28391712768.0, "19360": 28391712768.0, "19365": 28391712768.0, "19370": 28391712768.0, "19375": 28391712768.0, "19380": 28391712768.0, "19385": 28391712768.0, "19390": 28391712768.0, "19395": 28391712768.0, "19400": 28391712768.0, "19405": 28391712768.0, "19410": 28391712768.0, "19415": 28391712768.0, "19420": 28391712768.0, "19425": 28391712768.0, "19430": 28391712768.0, "19435": 28391712768.0, "19440": 28391712768.0, "19445": 28391712768.0, "19450": 28391712768.0, "19455": 28391712768.0, "19460": 28391712768.0, "19465": 28391712768.0, "19470": 28391712768.0, "19475": 28391712768.0, "19480": 28391712768.0, "19485": 28391712768.0, "19490": 28391712768.0, "19495": 28391712768.0, "19500": 28391712768.0, "19505": 28391712768.0, "19510": 28391712768.0, "19515": 28391712768.0, "19520": 28391712768.0, "19525": 28391712768.0, "19530": 28391712768.0, "19535": 28391712768.0, "19540": 28391712768.0, "19545": 28391712768.0, "19550": 28391712768.0, "19555": 28391712768.0, "19560": 28391712768.0, "19565": 28391712768.0, "19570": 28391712768.0, "19575": 28391712768.0, "19580": 28391712768.0, "19585": 28391712768.0, "19590": 28391712768.0, "19595": 28391712768.0, "19600": 28391712768.0, "19605": 28391712768.0, "19610": 28391712768.0, "19615": 28391712768.0, "19620": 28391712768.0, "19625": 28391712768.0, "19630": 28391712768.0, "19635": 28391712768.0, "19640": 28391712768.0, "19645": 28391712768.0, "19650": 28391712768.0, "19655": 28391712768.0, "19660": 28391712768.0, "19665": 28391712768.0, "19670": 28391712768.0, "19675": 28391712768.0, "19680": 28391712768.0, "19685": 28391712768.0, "19690": 28391712768.0, "19695": 28391712768.0, "19700": 28391712768.0, "19705": 28391712768.0, "19710": 28391712768.0, "19715": 28391712768.0, "19720": 28391712768.0, "19725": 28391712768.0, "19730": 28391712768.0, "19735": 28391712768.0, "19740": 28391712768.0, "19745": 28391712768.0, "19750": 28391712768.0, "19755": 28391712768.0, "19760": 28391712768.0, "19765": 28391712768.0, "19770": 28391712768.0, "19775": 28391712768.0, "19780": 28391712768.0, "19785": 28391712768.0, "19790": 28391712768.0, "19795": 28391712768.0, "19800": 28391712768.0, "19805": 28391712768.0, "19810": 28391712768.0, "19815": 28391712768.0, "19820": 28391712768.0, "19825": 28391712768.0, "19830": 28391712768.0, "19835": 28391712768.0, "19840": 28391712768.0, "19845": 28391712768.0, "19850": 28391712768.0, "19855": 28391712768.0, "19860": 28391712768.0, "19865": 28391712768.0, "19870": 28391712768.0, "19875": 28391712768.0, "19880": 28391712768.0, "19885": 28391712768.0, "19890": 28391712768.0, "19895": 28391712768.0, "19900": 28391712768.0, "19905": 28391712768.0, "19910": 28391712768.0, "19915": 28391712768.0, "19920": 28391712768.0, "19925": 28391712768.0, "19930": 28391712768.0, "19935": 28391712768.0, "19940": 28391712768.0, "19945": 28391712768.0, "19950": 28391712768.0, "19955": 28391712768.0, "19960": 28391712768.0, "19965": 28391712768.0, "19970": 28391712768.0, "19975": 28391712768.0, "19980": 28391712768.0, "19985": 28391712768.0, "19990": 28391712768.0, "19995": 28391712768.0, "20000": 28391712768.0, "20005": 28391712768.0, "20010": 28391712768.0, "20015": 28391712768.0, "20020": 28391712768.0, "20025": 28391712768.0, "20030": 28391712768.0, "20035": 28391712768.0, "20040": 28391712768.0, "20045": 28391712768.0, "20050": 28391712768.0, "20055": 28391712768.0, "20060": 28391712768.0, "20065": 28391712768.0, "20070": 28391712768.0, "20075": 28391712768.0, "20080": 28391712768.0, "20085": 28391712768.0, "20090": 28391712768.0, "20095": 28391712768.0, "20100": 28391712768.0, "20105": 28391712768.0, "20110": 28391712768.0, "20115": 28391712768.0, "20120": 28391712768.0, "20125": 28391712768.0, "20130": 28391712768.0, "20135": 28391712768.0, "20140": 28391712768.0, "20145": 28391712768.0, "20150": 28391712768.0, "20155": 28391712768.0, "20160": 28391712768.0, "20165": 28391712768.0, "20170": 28391712768.0, "20175": 28391712768.0, "20180": 28391712768.0, "20185": 28391712768.0, "20190": 28391712768.0, "20195": 28391712768.0, "20200": 28391712768.0, "20205": 28391712768.0, "20210": 28391712768.0, "20215": 28391712768.0, "20220": 28391712768.0, "20225": 28391712768.0, "20230": 28391712768.0, "20235": 28391712768.0, "20240": 28391712768.0, "20245": 28391712768.0, "20250": 28391712768.0, "20255": 28391712768.0, "20260": 28391712768.0, "20265": 28391712768.0, "20270": 28391712768.0, "20275": 28391712768.0, "20280": 28391712768.0, "20285": 28391712768.0, "20290": 28391712768.0, "20295": 28391712768.0, "20300": 28391712768.0, "20305": 28391712768.0, "20310": 28391712768.0, "20315": 28391712768.0, "20320": 28391712768.0, "20325": 28391712768.0, "20330": 28391712768.0, "20335": 28391712768.0, "20340": 28391712768.0, "20345": 28391712768.0, "20350": 28391712768.0, "20355": 28391712768.0, "20360": 28391712768.0, "20365": 28391712768.0, "20370": 28391712768.0, "20375": 28391712768.0, "20380": 28391712768.0, "20385": 28391712768.0, "20390": 28391712768.0, "20395": 28391712768.0, "20400": 28391712768.0, "20405": 28391712768.0, "20410": 28391712768.0, "20415": 28391712768.0, "20420": 28391712768.0, "20425": 28391712768.0, "20430": 28391712768.0, "20435": 28391712768.0, "20440": 28391712768.0, "20445": 28391712768.0, "20450": 28391712768.0, "20455": 28391712768.0, "20460": 28391712768.0, "20465": 28391712768.0, "20470": 28391712768.0, "20475": 28391712768.0, "20480": 28391712768.0, "20485": 28391712768.0, "20490": 28391712768.0, "20495": 28391712768.0, "20500": 28391712768.0, "20505": 28391712768.0, "20510": 28391712768.0, "20515": 28391712768.0, "20520": 28391712768.0, "20525": 28391712768.0, "20530": 28391712768.0, "20535": 28391712768.0, "20540": 28391712768.0, "20545": 28391712768.0, "20550": 28391712768.0, "20555": 28391712768.0, "20560": 28391712768.0, "20565": 28391712768.0, "20570": 28391712768.0, "20575": 28391712768.0, "20580": 28391712768.0, "20585": 28391712768.0, "20590": 28391712768.0, "20595": 28391712768.0, "20600": 28391712768.0, "20605": 28391712768.0, "20610": 28391712768.0, "20615": 28391712768.0, "20620": 28391712768.0, "20625": 28391712768.0, "20630": 28391712768.0, "20635": 28391712768.0, "20640": 28391712768.0, "20645": 28391712768.0, "20650": 28391712768.0, "20655": 28391712768.0, "20660": 28391712768.0, "20665": 28391712768.0, "20670": 28391712768.0, "20675": 28391712768.0, "20680": 28391712768.0, "20685": 28391712768.0, "20690": 28391712768.0, "20695": 28391712768.0, "20700": 28391712768.0, "20705": 28391712768.0, "20710": 28391712768.0, "20715": 28391712768.0, "20720": 28391712768.0, "20725": 28391712768.0, "20730": 28391712768.0, "20735": 28391712768.0, "20740": 28391712768.0, "20745": 28391712768.0, "20750": 28391712768.0, "20755": 28391712768.0, "20760": 28391712768.0, "20765": 28391712768.0, "20770": 28391712768.0, "20775": 28391712768.0, "20780": 28391712768.0, "20785": 28391712768.0, "20790": 28391712768.0, "20795": 28391712768.0, "20800": 28391712768.0, "20805": 28391712768.0, "20810": 28391712768.0, "20815": 28391712768.0, "20820": 28391712768.0, "20825": 28391712768.0, "20830": 28391712768.0, "20835": 28391712768.0, "20840": 28391712768.0, "20845": 28391712768.0, "20850": 28391712768.0, "20855": 28391712768.0, "20860": 28391712768.0, "20865": 28391712768.0, "20870": 28391712768.0, "20875": 28391712768.0, "20880": 28391712768.0, "20885": 28391712768.0, "20890": 28391712768.0, "20895": 28391712768.0, "20900": 28391712768.0, "20905": 28391712768.0, "20910": 28391712768.0, "20915": 28391712768.0, "20920": 28391712768.0, "20925": 28391712768.0, "20930": 28391712768.0, "20935": 28391712768.0, "20940": 28391712768.0, "20945": 28391712768.0, "20950": 28391712768.0, "20955": 28391712768.0, "20960": 28391712768.0, "20965": 28391712768.0, "20970": 28391712768.0, "20975": 28391712768.0, "20980": 28391712768.0, "20985": 28391712768.0, "20990": 28391712768.0, "20995": 28391712768.0, "21000": 28391712768.0, "21005": 28391712768.0, "21010": 28391712768.0, "21015": 28391712768.0, "21020": 28391712768.0, "21025": 28391712768.0, "21030": 28391712768.0, "21035": 28391712768.0, "21040": 28391712768.0, "21045": 28391712768.0, "21050": 28391712768.0, "21055": 28391712768.0, "21060": 28391712768.0, "21065": 28391712768.0, "21070": 28391712768.0, "21075": 28391712768.0, "21080": 28391712768.0, "21085": 28391712768.0, "21090": 28391712768.0, "21095": 28391712768.0, "21100": 28391712768.0, "21105": 28391712768.0, "21110": 28391712768.0, "21115": 28391712768.0, "21120": 28391712768.0, "21125": 28391712768.0, "21130": 28391712768.0, "21135": 28391712768.0, "21140": 28391712768.0, "21145": 28391712768.0, "21150": 28391712768.0, "21155": 28391712768.0, "21160": 28391712768.0, "21165": 28391712768.0, "21170": 28391712768.0, "21175": 28391712768.0, "21180": 28391712768.0, "21185": 28391712768.0, "21190": 28391712768.0, "21195": 28391712768.0, "21200": 28391712768.0, "21205": 28391712768.0, "21210": 28391712768.0, "21215": 28391712768.0, "21220": 28391712768.0, "21225": 28391712768.0, "21230": 28391712768.0, "21235": 28391712768.0, "21240": 28391712768.0, "21245": 28391712768.0, "21250": 28391712768.0, "21255": 28391712768.0, "21260": 28391712768.0, "21265": 28391712768.0, "21270": 28391712768.0, "21275": 28391712768.0, "21280": 28391712768.0, "21285": 28391712768.0, "21290": 28391712768.0, "21295": 28391712768.0, "21300": 28391712768.0, "21305": 28391712768.0, "21310": 28391712768.0, "21315": 28391712768.0, "21320": 28391712768.0, "21325": 28391712768.0, "21330": 28391712768.0, "21335": 28391712768.0, "21340": 28391712768.0, "21345": 28391712768.0, "21350": 28391712768.0, "21355": 28391712768.0, "21360": 28391712768.0, "21365": 28391712768.0, "21370": 28391712768.0, "21375": 28391712768.0, "21380": 28391712768.0, "21385": 28391712768.0, "21390": 28391712768.0, "21395": 28391712768.0, "21400": 28391712768.0, "21405": 28391712768.0, "21410": 28391712768.0, "21415": 28391712768.0, "21420": 28391712768.0, "21425": 28391712768.0, "21430": 28391712768.0, "21435": 28391712768.0, "21440": 28391712768.0, "21445": 28391712768.0, "21450": 28391712768.0, "21455": 28391712768.0, "21460": 28391712768.0, "21465": 28391712768.0, "21470": 28391712768.0, "21475": 28391712768.0, "21480": 28391712768.0, "21485": 28391712768.0, "21490": 28391712768.0, "21495": 28391712768.0, "21500": 28391712768.0, "21505": 28391712768.0, "21510": 28391712768.0, "21515": 28391712768.0, "21520": 28391712768.0, "21525": 28391712768.0, "21530": 28391712768.0, "21535": 28391712768.0, "21540": 28391712768.0, "21545": 28391712768.0, "21550": 28391712768.0, "21555": 28391712768.0, "21560": 28391712768.0, "21565": 28391712768.0, "21570": 28391712768.0, "21575": 28391712768.0, "21580": 28391712768.0, "21585": 28391712768.0, "21590": 28391712768.0, "21595": 28391712768.0, "21600": 28391712768.0, "21605": 28391712768.0, "21610": 28391712768.0, "21615": 28391712768.0, "21620": 28391712768.0, "21625": 28391712768.0, "21630": 28391712768.0, "21635": 28391712768.0, "21640": 28391712768.0, "21645": 28391712768.0, "21650": 28391712768.0, "21655": 28391712768.0, "21660": 28391712768.0, "21665": 28391712768.0, "21670": 28391712768.0, "21675": 28391712768.0, "21680": 28391712768.0, "21685": 28391712768.0, "21690": 28391712768.0, "21695": 28391712768.0, "21700": 28391712768.0, "21705": 28391712768.0, "21710": 28391712768.0, "21715": 28391712768.0, "21720": 28391712768.0, "21725": 28391712768.0, "21730": 28391712768.0, "21735": 28391712768.0, "21740": 28391712768.0, "21745": 28391712768.0, "21750": 28391712768.0, "21755": 28391712768.0, "21760": 28391712768.0, "21765": 28391712768.0, "21770": 28391712768.0, "21775": 28391712768.0, "21780": 28391712768.0, "21785": 28391712768.0, "21790": 28391712768.0, "21795": 28391712768.0, "21800": 28391712768.0, "21805": 28391712768.0, "21810": 28391712768.0, "21815": 28391712768.0, "21820": 28391712768.0, "21825": 28391712768.0, "21830": 28391712768.0, "21835": 28391712768.0, "21840": 28391712768.0, "21845": 28391712768.0, "21850": 28391712768.0, "21855": 28391712768.0, "21860": 28391712768.0, "21865": 28391712768.0, "21870": 28391712768.0, "21875": 28391712768.0, "21880": 28391712768.0, "21885": 28391712768.0, "21890": 28391712768.0, "21895": 28391712768.0, "21900": 28391712768.0, "21905": 28391712768.0, "21910": 28391712768.0, "21915": 28391712768.0, "21920": 28391712768.0, "21925": 28391712768.0, "21930": 28391712768.0, "21935": 28391712768.0, "21940": 28391712768.0, "21945": 28391712768.0, "21950": 28391712768.0, "21955": 28391712768.0, "21960": 28391712768.0, "21965": 28391712768.0, "21970": 28391712768.0, "21975": 28391712768.0, "21980": 28391712768.0, "21985": 28391712768.0, "21990": 28391712768.0, "21995": 28391712768.0, "22000": 28391712768.0, "22005": 28391712768.0, "22010": 28391712768.0, "22015": 28391712768.0, "22020": 28391712768.0, "22025": 28391712768.0, "22030": 28391712768.0, "22035": 28391712768.0, "22040": 28391712768.0, "22045": 28391712768.0, "22050": 28391712768.0, "22055": 28391712768.0, "22060": 28391712768.0, "22065": 28391712768.0, "22070": 28391712768.0, "22075": 28391712768.0, "22080": 28391712768.0, "22085": 28391712768.0, "22090": 28391712768.0, "22095": 28391712768.0, "22100": 28391712768.0, "22105": 28391712768.0, "22110": 28391712768.0, "22115": 28391712768.0, "22120": 28391712768.0, "22125": 28391712768.0, "22130": 28391712768.0, "22135": 28391712768.0, "22140": 28391712768.0, "22145": 28391712768.0, "22150": 28391712768.0, "22155": 28391712768.0, "22160": 28391712768.0, "22165": 28391712768.0, "22170": 28391712768.0, "22175": 28391712768.0, "22180": 28391712768.0, "22185": 28391712768.0, "22190": 28391712768.0, "22195": 28391712768.0, "22200": 28391712768.0, "22205": 28391712768.0, "22210": 28391712768.0, "22215": 28391712768.0, "22220": 28391712768.0, "22225": 28391712768.0, "22230": 28391712768.0, "22235": 28391712768.0, "22240": 28391712768.0, "22245": 28391712768.0, "22250": 28391712768.0, "22255": 28391712768.0, "22260": 28391712768.0, "22265": 28391712768.0, "22270": 28391712768.0, "22275": 28391712768.0, "22280": 28391712768.0, "22285": 28391712768.0, "22290": 28391712768.0, "22295": 28391712768.0, "22300": 28391712768.0, "22305": 28391712768.0, "22310": 28391712768.0, "22315": 28391712768.0, "22320": 28391712768.0, "22325": 28391712768.0, "22330": 28391712768.0, "22335": 28391712768.0, "22340": 28391712768.0, "22345": 28391712768.0, "22350": 28391712768.0, "22355": 28391712768.0, "22360": 28391712768.0, "22365": 28391712768.0, "22370": 28391712768.0, "22375": 28391712768.0, "22380": 28391712768.0, "22385": 28391712768.0, "22390": 28391712768.0, "22395": 28391712768.0, "22400": 28391712768.0, "22405": 28391712768.0, "22410": 28391712768.0, "22415": 28391712768.0, "22420": 28391712768.0, "22425": 28391712768.0, "22430": 28391712768.0, "22435": 28391712768.0, "22440": 28391712768.0, "22445": 28391712768.0, "22450": 28391712768.0, "22455": 28391712768.0, "22460": 28391712768.0, "22465": 28391712768.0, "22470": 28391712768.0, "22475": 28391712768.0, "22480": 28391712768.0, "22485": 28391712768.0, "22490": 28391712768.0, "22495": 28391712768.0, "22500": 28391712768.0, "22505": 28391712768.0, "22510": 28391712768.0, "22515": 28391712768.0, "22520": 28391712768.0, "22525": 28391712768.0, "22530": 28391712768.0, "22535": 28391712768.0, "22540": 28391712768.0, "22545": 28391712768.0, "22550": 28391712768.0, "22555": 28391712768.0, "22560": 28391712768.0, "22565": 28391712768.0, "22570": 28391712768.0, "22575": 28391712768.0, "22580": 28391712768.0, "22585": 28391712768.0, "22590": 28391712768.0, "22595": 28391712768.0, "22600": 28391712768.0, "22605": 28391712768.0, "22610": 28391712768.0, "22615": 28391712768.0, "22620": 28391712768.0, "22625": 28391712768.0, "22630": 28391712768.0, "22635": 28391712768.0, "22640": 28391712768.0, "22645": 28391712768.0, "22650": 28391712768.0, "22655": 28391712768.0, "22660": 28391712768.0, "22665": 28391712768.0, "22670": 28391712768.0, "22675": 28391712768.0, "22680": 28391712768.0, "22685": 28391712768.0, "22690": 28391712768.0, "22695": 28391712768.0, "22700": 28391712768.0, "22705": 28391712768.0, "22710": 28391712768.0, "22715": 28391712768.0, "22720": 28391712768.0, "22725": 28391712768.0, "22730": 28391712768.0, "22735": 28391712768.0, "22740": 28391712768.0, "22745": 28391712768.0, "22750": 28391712768.0, "22755": 28391712768.0, "22760": 28391712768.0, "22765": 28391712768.0, "22770": 28391712768.0, "22775": 28391712768.0, "22780": 28391712768.0, "22785": 28391712768.0, "22790": 28391712768.0, "22795": 28391712768.0, "22800": 28391712768.0, "22805": 28391712768.0, "22810": 28391712768.0, "22815": 28391712768.0, "22820": 28391712768.0, "22825": 28391712768.0, "22830": 28391712768.0, "22835": 28391712768.0, "22840": 28391712768.0, "22845": 28391712768.0, "22850": 28391712768.0, "22855": 28391712768.0, "22860": 28391712768.0, "22865": 28391712768.0, "22870": 28391712768.0, "22875": 28391712768.0, "22880": 28391712768.0, "22885": 28391712768.0, "22890": 28391712768.0, "22895": 28391712768.0, "22900": 28391712768.0, "22905": 28391712768.0, "22910": 28391712768.0, "22915": 28391712768.0, "22920": 28391712768.0, "22925": 28391712768.0, "22930": 28391712768.0, "22935": 28391712768.0, "22940": 28391712768.0, "22945": 28391712768.0, "22950": 28391712768.0, "22955": 28391712768.0, "22960": 28391712768.0, "22965": 28391712768.0, "22970": 28391712768.0, "22975": 28391712768.0, "22980": 28391712768.0, "22985": 28391712768.0, "22990": 28391712768.0, "22995": 28391712768.0, "23000": 28391712768.0, "23005": 28391712768.0, "23010": 28391712768.0, "23015": 28391712768.0, "23020": 28391712768.0, "23025": 28391712768.0, "23030": 28391712768.0, "23035": 28391712768.0, "23040": 28391712768.0, "23045": 28391712768.0, "23050": 28391712768.0, "23055": 28391712768.0, "23060": 28391712768.0, "23065": 28391712768.0, "23070": 28391712768.0, "23075": 28391712768.0, "23080": 28391712768.0, "23085": 28391712768.0, "23090": 28391712768.0, "23095": 28391712768.0, "23100": 28391712768.0, "23105": 28391712768.0, "23110": 28391712768.0, "23115": 28391712768.0, "23120": 28391712768.0, "23125": 28391712768.0, "23130": 28391712768.0, "23135": 28391712768.0, "23140": 28391712768.0, "23145": 28391712768.0, "23150": 28391712768.0, "23155": 28391712768.0, "23160": 28391712768.0, "23165": 28391712768.0, "23170": 28391712768.0, "23175": 28391712768.0, "23180": 28391712768.0, "23185": 28391712768.0, "23190": 28391712768.0, "23195": 28391712768.0, "23200": 28391712768.0, "23205": 28391712768.0, "23210": 28391712768.0, "23215": 28391712768.0, "23220": 28391712768.0, "23225": 28391712768.0, "23230": 28391712768.0, "23235": 28391712768.0, "23240": 28391712768.0, "23245": 28391712768.0, "23250": 28391712768.0, "23255": 28391712768.0, "23260": 28391712768.0, "23265": 28391712768.0, "23270": 28391712768.0, "23275": 28391712768.0, "23280": 28391712768.0, "23285": 28391712768.0, "23290": 28391712768.0, "23295": 28391712768.0, "23300": 28391712768.0, "23305": 28391712768.0, "23310": 28391712768.0, "23315": 28391712768.0, "23320": 28391712768.0, "23325": 28391712768.0, "23330": 28391712768.0, "23335": 28391712768.0, "23340": 28391712768.0, "23345": 28391712768.0, "23350": 28391712768.0, "23355": 28391712768.0, "23360": 28391712768.0, "23365": 28391712768.0, "23370": 28391712768.0, "23375": 28391712768.0, "23380": 28391712768.0, "23385": 28391712768.0, "23390": 28391712768.0, "23395": 28391712768.0, "23400": 28391712768.0, "23405": 28391712768.0, "23410": 28391712768.0, "23415": 28391712768.0, "23420": 28391712768.0, "23425": 28391712768.0, "23430": 28391712768.0, "23435": 28391712768.0, "23440": 28391712768.0, "23445": 28391712768.0, "23450": 28391712768.0, "23455": 28391712768.0, "23460": 28391712768.0, "23465": 28391712768.0, "23470": 28391712768.0, "23475": 28391712768.0, "23480": 28391712768.0, "23485": 28391712768.0, "23490": 28391712768.0, "23495": 28391712768.0, "23500": 28391712768.0, "23505": 28391712768.0, "23510": 28391712768.0, "23515": 28391712768.0, "23520": 28391712768.0, "23525": 28391712768.0, "23530": 28391712768.0, "23535": 28391712768.0, "23540": 28391712768.0, "23545": 28391712768.0, "23550": 28391712768.0, "23555": 28391712768.0, "23560": 28391712768.0, "23565": 28391712768.0, "23570": 28391712768.0, "23575": 28391712768.0, "23580": 28391712768.0, "23585": 28391712768.0, "23590": 28391712768.0, "23595": 28391712768.0, "23600": 28391712768.0, "23605": 28391712768.0, "23610": 28391712768.0, "23615": 28391712768.0, "23620": 28391712768.0, "23625": 28391712768.0, "23630": 28391712768.0, "23635": 28391712768.0, "23640": 28391712768.0, "23645": 28391712768.0, "23650": 28391712768.0, "23655": 28391712768.0, "23660": 28391712768.0, "23665": 28391712768.0, "23670": 28391712768.0, "23675": 28391712768.0, "23680": 28391712768.0, "23685": 28391712768.0, "23690": 28391712768.0, "23695": 28391712768.0, "23700": 28391712768.0, "23705": 28391712768.0, "23710": 28391712768.0, "23715": 28391712768.0, "23720": 28391712768.0, "23725": 28391712768.0, "23730": 28391712768.0, "23735": 28391712768.0, "23740": 28391712768.0, "23745": 28391712768.0, "23750": 28391712768.0, "23755": 28391712768.0, "23760": 28391712768.0, "23765": 28391712768.0, "23770": 28391712768.0, "23775": 28391712768.0, "23780": 28391712768.0, "23785": 28391712768.0, "23790": 28391712768.0, "23795": 28391712768.0, "23800": 28391712768.0, "23805": 28391712768.0, "23810": 28391712768.0, "23815": 28391712768.0, "23820": 28391712768.0, "23825": 28391712768.0, "23830": 28391712768.0, "23835": 28391712768.0, "23840": 28391712768.0, "23845": 28391712768.0, "23850": 28391712768.0, "23855": 28391712768.0, "23860": 28391712768.0, "23865": 28391712768.0, "23870": 28391712768.0, "23875": 28391712768.0, "23880": 28391712768.0, "23885": 28391712768.0, "23890": 28391712768.0, "23895": 28391712768.0, "23900": 28391712768.0, "23905": 28391712768.0, "23910": 28391712768.0, "23915": 28391712768.0, "23920": 28391712768.0, "23925": 28391712768.0, "23930": 28391712768.0, "23935": 28391712768.0, "23940": 28391712768.0, "23945": 28391712768.0, "23950": 28391712768.0, "23955": 28391712768.0, "23960": 28391712768.0, "23965": 28391712768.0, "23970": 28391712768.0, "23975": 28391712768.0, "23980": 28391712768.0, "23985": 28391712768.0, "23990": 28391712768.0, "23995": 28391712768.0, "24000": 28391712768.0, "24005": 28391712768.0, "24010": 28391712768.0, "24015": 28391712768.0, "24020": 28391712768.0, "24025": 28391712768.0, "24030": 28391712768.0, "24035": 28391712768.0, "24040": 28391712768.0, "24045": 28391712768.0, "24050": 28391712768.0, "24055": 28391712768.0, "24060": 28391712768.0, "24065": 28391712768.0, "24070": 28391712768.0, "24075": 28391712768.0, "24080": 28391712768.0, "24085": 28391712768.0, "24090": 28391712768.0, "24095": 28391712768.0, "24100": 28391712768.0, "24105": 28391712768.0, "24110": 28391712768.0, "24115": 28391712768.0, "24120": 28391712768.0, "24125": 28391712768.0, "24130": 28391712768.0, "24135": 28391712768.0, "24140": 28391712768.0, "24145": 28391712768.0, "24150": 28391712768.0, "24155": 28391712768.0, "24160": 28391712768.0, "24165": 28391712768.0, "24170": 28391712768.0, "24175": 28391712768.0, "24180": 28391712768.0, "24185": 28391712768.0, "24190": 28391712768.0, "24195": 28391712768.0, "24200": 28391712768.0, "24205": 28391712768.0, "24210": 28391712768.0, "24215": 28391712768.0, "24220": 28391712768.0, "24225": 28391712768.0, "24230": 28391712768.0, "24235": 28391712768.0, "24240": 28391712768.0, "24245": 28391712768.0, "24250": 28391712768.0, "24255": 28391712768.0, "24260": 28391712768.0, "24265": 28391712768.0, "24270": 28391712768.0, "24275": 28391712768.0, "24280": 28391712768.0, "24285": 28391712768.0, "24290": 28391712768.0, "24295": 28391712768.0, "24300": 28391712768.0, "24305": 28391712768.0, "24310": 28391712768.0, "24315": 28391712768.0, "24320": 28391712768.0, "24325": 28391712768.0, "24330": 28391712768.0, "24335": 28391712768.0, "24340": 28391712768.0, "24345": 28391712768.0, "24350": 28391712768.0, "24355": 28391712768.0, "24360": 28391712768.0, "24365": 28391712768.0, "24370": 28391712768.0, "24375": 28391712768.0, "24380": 28391712768.0, "24385": 28391712768.0, "24390": 28391712768.0, "24395": 28391712768.0, "24400": 28391712768.0, "24405": 28391712768.0, "24410": 28391712768.0, "24415": 28391712768.0, "24420": 28391712768.0, "24425": 28391712768.0, "24430": 28391712768.0, "24435": 28391712768.0, "24440": 28391712768.0, "24445": 28391712768.0, "24450": 28391712768.0, "24455": 28391712768.0, "24460": 28391712768.0, "24465": 28391712768.0, "24470": 28391712768.0, "24475": 28391712768.0, "24480": 28391712768.0, "24485": 28391712768.0, "24490": 28391712768.0, "24495": 28391712768.0, "24500": 28391712768.0, "24505": 28391712768.0, "24510": 28391712768.0, "24515": 28391712768.0, "24520": 28391712768.0, "24525": 28391712768.0, "24530": 28391712768.0, "24535": 28391712768.0, "24540": 28391712768.0, "24545": 28391712768.0, "24550": 28391712768.0, "24555": 28391712768.0, "24560": 28391712768.0, "24565": 28391712768.0, "24570": 28391712768.0, "24575": 28391712768.0, "24580": 28391712768.0, "24585": 28391712768.0, "24590": 28391712768.0, "24595": 28391712768.0, "24600": 28391712768.0, "24605": 28391712768.0, "24610": 28391712768.0, "24615": 28391712768.0, "24620": 28391712768.0, "24625": 28391712768.0, "24630": 28391712768.0, "24635": 28391712768.0, "24640": 28391712768.0, "24645": 28391712768.0, "24650": 28391712768.0, "24655": 28391712768.0, "24660": 28391712768.0, "24665": 28391712768.0, "24670": 28391712768.0, "24675": 28391712768.0, "24680": 28391712768.0, "24685": 28391712768.0, "24690": 28391712768.0, "24695": 28391712768.0, "24700": 28391712768.0, "24705": 28391712768.0, "24710": 28391712768.0, "24715": 28391712768.0, "24720": 28391712768.0, "24725": 28391712768.0, "24730": 28391712768.0, "24735": 28391712768.0, "24740": 28391712768.0, "24745": 28391712768.0, "24750": 28391712768.0, "24755": 28391712768.0, "24760": 28391712768.0, "24765": 28391712768.0, "24770": 28391712768.0, "24775": 28391712768.0, "24780": 28391712768.0, "24785": 28391712768.0, "24790": 28391712768.0, "24795": 28391712768.0, "24800": 28391712768.0, "24805": 28391712768.0, "24810": 28391712768.0, "24815": 28391712768.0, "24820": 28391712768.0, "24825": 28391712768.0, "24830": 28391712768.0, "24835": 28391712768.0, "24840": 28391712768.0, "24845": 28391712768.0, "24850": 28391712768.0, "24855": 28391712768.0, "24860": 28391712768.0, "24865": 28391712768.0, "24870": 28391712768.0, "24875": 28391712768.0, "24880": 28391712768.0, "24885": 28391712768.0, "24890": 28391712768.0, "24895": 28391712768.0, "24900": 28391712768.0, "24905": 28391712768.0, "24910": 28391712768.0, "24915": 28391712768.0, "24920": 28391712768.0, "24925": 28391712768.0, "24930": 28391712768.0, "24935": 28391712768.0, "24940": 28391712768.0, "24945": 28391712768.0, "24950": 28391712768.0, "24955": 28391712768.0, "24960": 28391712768.0, "24965": 28391712768.0, "24970": 28391712768.0, "24975": 28391712768.0, "24980": 28391712768.0, "24985": 28391712768.0, "24990": 28391712768.0, "24995": 28391712768.0, "25000": 28391712768.0, "25005": 28391712768.0, "25010": 28391712768.0, "25015": 28391712768.0, "25020": 28391712768.0, "25025": 28391712768.0, "25030": 28391712768.0, "25035": 28391712768.0, "25040": 28391712768.0, "25045": 28391712768.0, "25050": 28391712768.0, "25055": 28391712768.0, "25060": 28391712768.0, "25065": 28391712768.0, "25070": 28391712768.0, "25075": 28391712768.0, "25080": 28391712768.0, "25085": 28391712768.0, "25090": 28391712768.0, "25095": 28391712768.0, "25100": 28391712768.0, "25105": 28391712768.0, "25110": 28391712768.0, "25115": 28391712768.0, "25120": 28391712768.0, "25125": 28391712768.0, "25130": 28391712768.0, "25135": 28391712768.0, "25140": 28391712768.0, "25145": 28391712768.0, "25150": 28391712768.0, "25155": 28391712768.0, "25160": 28391712768.0, "25165": 28391712768.0, "25170": 28391712768.0, "25175": 28391712768.0, "25180": 28391712768.0, "25185": 28391712768.0, "25190": 28391712768.0, "25195": 28391712768.0, "25200": 28391712768.0, "25205": 28391712768.0, "25210": 28391712768.0, "25215": 28391712768.0, "25220": 28391712768.0, "25225": 28391712768.0, "25230": 28391712768.0, "25235": 28391712768.0, "25240": 28391712768.0, "25245": 28391712768.0, "25250": 28391712768.0, "25255": 28391712768.0, "25260": 28391712768.0, "25265": 28391712768.0, "25270": 28391712768.0, "25275": 28391712768.0, "25280": 28391712768.0, "25285": 28391712768.0, "25290": 28391712768.0, "25295": 28391712768.0, "25300": 28391712768.0, "25305": 28391712768.0, "25310": 28391712768.0, "25315": 28391712768.0, "25320": 28391712768.0, "25325": 28391712768.0, "25330": 28391712768.0, "25335": 28391712768.0, "25340": 28391712768.0, "25345": 28391712768.0, "25350": 28391712768.0, "25355": 28391712768.0, "25360": 28391712768.0, "25365": 28391712768.0, "25370": 28391712768.0, "25375": 28391712768.0, "25380": 28391712768.0, "25385": 28391712768.0, "25390": 28391712768.0, "25395": 28391712768.0, "25400": 28391712768.0, "25405": 28391712768.0, "25410": 28391712768.0, "25415": 28391712768.0, "25420": 28391712768.0, "25425": 28391712768.0, "25430": 28391712768.0, "25435": 28391712768.0, "25440": 28391712768.0, "25445": 28391712768.0, "25450": 28391712768.0, "25455": 28391712768.0, "25460": 28391712768.0, "25465": 28391712768.0, "25470": 28391712768.0, "25475": 28391712768.0, "25480": 28391712768.0, "25485": 28391712768.0, "25490": 28391712768.0, "25495": 28391712768.0, "25500": 28391712768.0, "25505": 28391712768.0, "25510": 28391712768.0, "25515": 28391712768.0, "25520": 28391712768.0, "25525": 28391712768.0, "25530": 28391712768.0, "25535": 28391712768.0, "25540": 28391712768.0, "25545": 28391712768.0, "25550": 28391712768.0, "25555": 28391712768.0, "25560": 28391712768.0, "25565": 28391712768.0, "25570": 28391712768.0, "25575": 28391712768.0, "25580": 28391712768.0, "25585": 28391712768.0, "25590": 28391712768.0, "25595": 28391712768.0, "25600": 28391712768.0, "25605": 28391712768.0, "25610": 28391712768.0, "25615": 28391712768.0, "25620": 28391712768.0, "25625": 28391712768.0, "25630": 28391712768.0, "25635": 28391712768.0, "25640": 28391712768.0, "25645": 28391712768.0, "25650": 28391712768.0, "25655": 28391712768.0, "25660": 28391712768.0, "25665": 28391712768.0, "25670": 28391712768.0, "25675": 28391712768.0, "25680": 28391712768.0, "25685": 28391712768.0, "25690": 28391712768.0, "25695": 28391712768.0, "25700": 28391712768.0, "25705": 28391712768.0, "25710": 28391712768.0, "25715": 28391712768.0, "25720": 28391712768.0, "25725": 28391712768.0, "25730": 28391712768.0, "25735": 28391712768.0, "25740": 28391712768.0, "25745": 28391712768.0, "25750": 28391712768.0, "25755": 28391712768.0, "25760": 28391712768.0, "25765": 28391712768.0, "25770": 28391712768.0, "25775": 28391712768.0, "25780": 28391712768.0, "25785": 28391712768.0, "25790": 28391712768.0, "25795": 28391712768.0, "25800": 28391712768.0, "25805": 28391712768.0, "25810": 28391712768.0, "25815": 28391712768.0, "25820": 28391712768.0, "25825": 28391712768.0, "25830": 28391712768.0, "25835": 28391712768.0, "25840": 28391712768.0, "25845": 28391712768.0, "25850": 28391712768.0, "25855": 28391712768.0, "25860": 28391712768.0, "25865": 28391712768.0, "25870": 28391712768.0, "25875": 28391712768.0, "25880": 28391712768.0, "25885": 28391712768.0, "25890": 28391712768.0, "25895": 28391712768.0, "25900": 28391712768.0, "25905": 28391712768.0, "25910": 28391712768.0, "25915": 28391712768.0, "25920": 28391712768.0, "25925": 28391712768.0, "25930": 28391712768.0, "25935": 28391712768.0, "25940": 28391712768.0, "25945": 28391712768.0, "25950": 28391712768.0, "25955": 28391712768.0, "25960": 28391712768.0, "25965": 28391712768.0, "25970": 28391712768.0, "25975": 28391712768.0, "25980": 28391712768.0, "25985": 28391712768.0, "25990": 28391712768.0, "25995": 28391712768.0, "26000": 28391712768.0, "26005": 28391712768.0, "26010": 28391712768.0, "26015": 28391712768.0, "26020": 28391712768.0, "26025": 28391712768.0, "26030": 28391712768.0, "26035": 28391712768.0, "26040": 28391712768.0, "26045": 28391712768.0, "26050": 28391712768.0, "26055": 28391712768.0, "26060": 28391712768.0, "26065": 28391712768.0, "26070": 28391712768.0, "26075": 28391712768.0, "26080": 28391712768.0, "26085": 28391712768.0, "26090": 28391712768.0, "26095": 28391712768.0, "26100": 28391712768.0, "26105": 28391712768.0, "26110": 28391712768.0, "26115": 28391712768.0, "26120": 28391712768.0, "26125": 28391712768.0, "26130": 28391712768.0, "26135": 28391712768.0, "26140": 28391712768.0, "26145": 28391712768.0, "26150": 28391712768.0, "26155": 28391712768.0, "26160": 28391712768.0, "26165": 28391712768.0, "26170": 28391712768.0, "26175": 28391712768.0, "26180": 28391712768.0, "26185": 28391712768.0, "26190": 28391712768.0, "26195": 28391712768.0, "26200": 28391712768.0, "26205": 28391712768.0, "26210": 28391712768.0, "26215": 28391712768.0, "26220": 28391712768.0, "26225": 28391712768.0, "26230": 28391712768.0, "26235": 28391712768.0, "26240": 28391712768.0, "26245": 28391712768.0, "26250": 28391712768.0, "26255": 28391712768.0, "26260": 28391712768.0, "26265": 28391712768.0, "26270": 28391712768.0, "26275": 28391712768.0, "26280": 28391712768.0, "26285": 28391712768.0, "26290": 28391712768.0, "26295": 28391712768.0, "26300": 28391712768.0, "26305": 28391712768.0, "26310": 28391712768.0, "26315": 28391712768.0, "26320": 28391712768.0, "26325": 28391712768.0, "26330": 28391712768.0, "26335": 28391712768.0, "26340": 28391712768.0, "26345": 28391712768.0, "26350": 28391712768.0, "26355": 28391712768.0, "26360": 28391712768.0, "26365": 28391712768.0, "26370": 28391712768.0, "26375": 28391712768.0, "26380": 28391712768.0, "26385": 28391712768.0, "26390": 28391712768.0, "26395": 28391712768.0, "26400": 28391712768.0, "26405": 28391712768.0, "26410": 28391712768.0, "26415": 28391712768.0, "26420": 28391712768.0, "26425": 28391712768.0, "26430": 28391712768.0, "26435": 28391712768.0, "26440": 28391712768.0, "26445": 28391712768.0, "26450": 28391712768.0, "26455": 28391712768.0, "26460": 28391712768.0, "26465": 28391712768.0, "26470": 28391712768.0, "26475": 28391712768.0, "26480": 28391712768.0, "26485": 28391712768.0, "26490": 28391712768.0, "26495": 28391712768.0, "26500": 28391712768.0, "26505": 28391712768.0, "26510": 28391712768.0, "26515": 28391712768.0, "26520": 28391712768.0, "26525": 28391712768.0, "26530": 28391712768.0, "26535": 28391712768.0, "26540": 28391712768.0, "26545": 28391712768.0, "26550": 28391712768.0, "26555": 28391712768.0, "26560": 28391712768.0, "26565": 28391712768.0, "26570": 28391712768.0, "26575": 28391712768.0, "26580": 28391712768.0, "26585": 28391712768.0, "26590": 28391712768.0, "26595": 28391712768.0, "26600": 28391712768.0, "26605": 28391712768.0, "26610": 28391712768.0, "26615": 28391712768.0, "26620": 28391712768.0, "26625": 28391712768.0, "26630": 28391712768.0, "26635": 28391712768.0, "26640": 28391712768.0, "26645": 28391712768.0, "26650": 28391712768.0, "26655": 28391712768.0, "26660": 28391712768.0, "26665": 28391712768.0, "26670": 28391712768.0, "26675": 28391712768.0, "26680": 28391712768.0, "26685": 28391712768.0, "26690": 28391712768.0, "26695": 28391712768.0, "26700": 28391712768.0, "26705": 28391712768.0, "26710": 28391712768.0, "26715": 28391712768.0, "26720": 28391712768.0, "26725": 28391712768.0, "26730": 28391712768.0, "26735": 28391712768.0, "26740": 28391712768.0, "26745": 28391712768.0, "26750": 28391712768.0, "26755": 28391712768.0, "26760": 28391712768.0, "26765": 28391712768.0, "26770": 28391712768.0, "26775": 28391712768.0, "26780": 28391712768.0, "26785": 28391712768.0, "26790": 28391712768.0, "26795": 28391712768.0, "26800": 28391712768.0, "26805": 28391712768.0, "26810": 28391712768.0, "26815": 28391712768.0, "26820": 28391712768.0, "26825": 28391712768.0, "26830": 28391712768.0, "26835": 28391712768.0, "26840": 28391712768.0, "26845": 28391712768.0, "26850": 28391712768.0, "26855": 28391712768.0, "26860": 28391712768.0, "26865": 28391712768.0, "26870": 28391712768.0, "26875": 28391712768.0, "26880": 28391712768.0, "26885": 28391712768.0, "26890": 28391712768.0, "26895": 28391712768.0, "26900": 28391712768.0, "26905": 28391712768.0, "26910": 28391712768.0, "26915": 28391712768.0, "26920": 28391712768.0, "26925": 28391712768.0, "26930": 28391712768.0, "26935": 28391712768.0, "26940": 28391712768.0, "26945": 28391712768.0, "26950": 28391712768.0, "26955": 28391712768.0, "26960": 28391712768.0, "26965": 28391712768.0, "26970": 28391712768.0, "26975": 28391712768.0, "26980": 28391712768.0, "26985": 28391712768.0, "26990": 28391712768.0, "26995": 28391712768.0, "27000": 28391712768.0, "27005": 28391712768.0, "27010": 28391712768.0, "27015": 28391712768.0, "27020": 28391712768.0, "27025": 28391712768.0, "27030": 28391712768.0, "27035": 28391712768.0, "27040": 28391712768.0, "27045": 28391712768.0, "27050": 28391712768.0, "27055": 28391712768.0, "27060": 28391712768.0, "27065": 28391712768.0, "27070": 28391712768.0, "27075": 28391712768.0, "27080": 28391712768.0, "27085": 28391712768.0, "27090": 28391712768.0, "27095": 28391712768.0, "27100": 28391712768.0, "27105": 28391712768.0, "27110": 28391712768.0, "27115": 28391712768.0, "27120": 28391712768.0, "27125": 28391712768.0, "27130": 28391712768.0, "27135": 28391712768.0, "27140": 28391712768.0, "27145": 28391712768.0, "27150": 28391712768.0, "27155": 28391712768.0, "27160": 28391712768.0, "27165": 28391712768.0, "27170": 28391712768.0, "27175": 28391712768.0, "27180": 28391712768.0, "27185": 28391712768.0, "27190": 28391712768.0, "27195": 28391712768.0, "27200": 28391712768.0, "27205": 28391712768.0, "27210": 28391712768.0, "27215": 28391712768.0, "27220": 28391712768.0, "27225": 28391712768.0, "27230": 28391712768.0, "27235": 28391712768.0, "27240": 28391712768.0, "27245": 28391712768.0, "27250": 28391712768.0, "27255": 28391712768.0, "27260": 28391712768.0, "27265": 28391712768.0, "27270": 28391712768.0, "27275": 28391712768.0, "27280": 28391712768.0, "27285": 28391712768.0, "27290": 28391712768.0, "27295": 28391712768.0, "27300": 28391712768.0, "27305": 28391712768.0, "27310": 28391712768.0, "27315": 28391712768.0, "27320": 28391712768.0, "27325": 28391712768.0, "27330": 28391712768.0, "27335": 28391712768.0, "27340": 28391712768.0, "27345": 28391712768.0, "27350": 28391712768.0, "27355": 28391712768.0, "27360": 28391712768.0, "27365": 28391712768.0, "27370": 28391712768.0, "27375": 28391712768.0, "27380": 28391712768.0, "27385": 28391712768.0, "27390": 28391712768.0, "27395": 28391712768.0, "27400": 28391712768.0, "27405": 28391712768.0, "27410": 28391712768.0, "27415": 28391712768.0, "27420": 28391712768.0, "27425": 28391712768.0, "27430": 28391712768.0, "27435": 28391712768.0, "27440": 28391712768.0, "27445": 28391712768.0, "27450": 28391712768.0, "27455": 28391712768.0, "27460": 28391712768.0, "27465": 28391712768.0, "27470": 28391712768.0, "27475": 28391712768.0, "27480": 28391712768.0, "27485": 28391712768.0, "27490": 28391712768.0, "27495": 28391712768.0, "27500": 28391712768.0, "27505": 28391712768.0, "27510": 28391712768.0, "27515": 28391712768.0, "27520": 28391712768.0, "27525": 28391712768.0, "27530": 28391712768.0, "27535": 28391712768.0, "27540": 28391712768.0, "27545": 28391712768.0, "27550": 28391712768.0, "27555": 28391712768.0, "27560": 28391712768.0, "27565": 28391712768.0, "27570": 28391712768.0, "27575": 28391712768.0, "27580": 28391712768.0, "27585": 28391712768.0, "27590": 28391712768.0, "27595": 28391712768.0, "27600": 28391712768.0, "27605": 28391712768.0, "27610": 28391712768.0, "27615": 28391712768.0, "27620": 28391712768.0, "27625": 28391712768.0, "27630": 28391712768.0, "27635": 28391712768.0, "27640": 28391712768.0, "27645": 28391712768.0, "27650": 28391712768.0, "27655": 28391712768.0, "27660": 28391712768.0, "27665": 28391712768.0, "27670": 28391712768.0, "27675": 28391712768.0, "27680": 28391712768.0, "27685": 28391712768.0, "27690": 28391712768.0, "27695": 28391712768.0, "27700": 28391712768.0, "27705": 28391712768.0, "27710": 28391712768.0, "27715": 28391712768.0, "27720": 28391712768.0, "27725": 28391712768.0, "27730": 28391712768.0, "27735": 28391712768.0, "27740": 28391712768.0, "27745": 28391712768.0, "27750": 28391712768.0, "27755": 28391712768.0, "27760": 28391712768.0, "27765": 28391712768.0, "27770": 28391712768.0, "27775": 28391712768.0, "27780": 28391712768.0, "27785": 28391712768.0, "27790": 28391712768.0, "27795": 28391712768.0, "27800": 28391712768.0, "27805": 28391712768.0, "27810": 28391712768.0, "27815": 28391712768.0, "27820": 28391712768.0, "27825": 28391712768.0, "27830": 28391712768.0, "27835": 28391712768.0, "27840": 28391712768.0, "27845": 28391712768.0, "27850": 28391712768.0, "27855": 28391712768.0, "27860": 28391712768.0, "27865": 28391712768.0, "27870": 28391712768.0, "27875": 28391712768.0, "27880": 28391712768.0, "27885": 28391712768.0, "27890": 28391712768.0, "27895": 28391712768.0, "27900": 28391712768.0, "27905": 28391712768.0, "27910": 28391712768.0, "27915": 28391712768.0, "27920": 28391712768.0, "27925": 28391712768.0, "27930": 28391712768.0, "27935": 28391712768.0, "27940": 28391712768.0, "27945": 28391712768.0, "27950": 28391712768.0, "27955": 28391712768.0, "27960": 28391712768.0, "27965": 28391712768.0, "27970": 28391712768.0, "27975": 28391712768.0, "27980": 28391712768.0, "27985": 28391712768.0, "27990": 28391712768.0, "27995": 28391712768.0, "28000": 28391712768.0, "28005": 28391712768.0, "28010": 28391712768.0, "28015": 28391712768.0, "28020": 28391712768.0, "28025": 28391712768.0, "28030": 28391712768.0, "28035": 28391712768.0, "28040": 28391712768.0, "28045": 28391712768.0, "28050": 28391712768.0, "28055": 28391712768.0, "28060": 28391712768.0, "28065": 28391712768.0, "28070": 28391712768.0, "28075": 28391712768.0, "28080": 28391712768.0, "28085": 28391712768.0, "28090": 28391712768.0, "28095": 28391712768.0, "28100": 28391712768.0, "28105": 28391712768.0, "28110": 28391712768.0, "28115": 28391712768.0, "28120": 28391712768.0, "28125": 28391712768.0, "28130": 28391712768.0, "28135": 28391712768.0, "28140": 28391712768.0, "28145": 28391712768.0, "28150": 28391712768.0, "28155": 28391712768.0, "28160": 28391712768.0, "28165": 28391712768.0, "28170": 28391712768.0, "28175": 28391712768.0, "28180": 28391712768.0, "28185": 28391712768.0, "28190": 28391712768.0, "28195": 28391712768.0, "28200": 28391712768.0, "28205": 28391712768.0, "28210": 28391712768.0, "28215": 28391712768.0, "28220": 28391712768.0, "28225": 28391712768.0, "28230": 28391712768.0, "28235": 28391712768.0, "28240": 28391712768.0, "28245": 28391712768.0, "28250": 28391712768.0, "28255": 28391712768.0, "28260": 28391712768.0, "28265": 28391712768.0, "28270": 28391712768.0, "28275": 28391712768.0, "28280": 28391712768.0, "28285": 28391712768.0, "28290": 28391712768.0, "28295": 28391712768.0, "28300": 28391712768.0, "28305": 28391712768.0, "28310": 28391712768.0, "28315": 28391712768.0, "28320": 28391712768.0, "28325": 28391712768.0, "28330": 28391712768.0, "28335": 28391712768.0, "28340": 28391712768.0, "28345": 28391712768.0, "28350": 28391712768.0, "28355": 28391712768.0, "28360": 28391712768.0, "28365": 28391712768.0, "28370": 28391712768.0, "28375": 28391712768.0, "28380": 28391712768.0, "28385": 28391712768.0, "28390": 28391712768.0, "28395": 28391712768.0, "28400": 28391712768.0, "28405": 28391712768.0, "28410": 28391712768.0, "28415": 28391712768.0, "28420": 28391712768.0, "28425": 28391712768.0, "28430": 28391712768.0, "28435": 28391712768.0, "28440": 28391712768.0, "28445": 28391712768.0, "28450": 28391712768.0, "28455": 28391712768.0, "28460": 28391712768.0, "28465": 28391712768.0, "28470": 28391712768.0, "28475": 28391712768.0, "28480": 28391712768.0, "28485": 28391712768.0, "28490": 28391712768.0, "28495": 28391712768.0, "28500": 28391712768.0, "28505": 28391712768.0, "28510": 28391712768.0, "28515": 28391712768.0, "28520": 28391712768.0, "28525": 28391712768.0, "28530": 28391712768.0, "28535": 28391712768.0, "28540": 28391712768.0, "28545": 28391712768.0, "28550": 28391712768.0, "28555": 28391712768.0, "28560": 28391712768.0, "28565": 28391712768.0, "28570": 28391712768.0, "28575": 28391712768.0, "28580": 28391712768.0, "28585": 28391712768.0, "28590": 28391712768.0, "28595": 28391712768.0, "28600": 28391712768.0, "28605": 28391712768.0, "28610": 28391712768.0, "28615": 28391712768.0, "28620": 28391712768.0, "28625": 28391712768.0, "28630": 28391712768.0, "28635": 28391712768.0, "28640": 28391712768.0, "28645": 28391712768.0, "28650": 28391712768.0, "28655": 28391712768.0, "28660": 28391712768.0, "28665": 28391712768.0, "28670": 28391712768.0, "28675": 28391712768.0, "28680": 28391712768.0, "28685": 28391712768.0, "28690": 28391712768.0, "28695": 28391712768.0, "28700": 28391712768.0, "28705": 28391712768.0, "28710": 28391712768.0, "28715": 28391712768.0, "28720": 28391712768.0, "28725": 28391712768.0, "28730": 28391712768.0, "28735": 28391712768.0, "28740": 28391712768.0, "28745": 28391712768.0, "28750": 28391712768.0, "28755": 28391712768.0, "28760": 28391712768.0, "28765": 28391712768.0, "28770": 28391712768.0, "28775": 28391712768.0, "28780": 28391712768.0, "28785": 28391712768.0, "28790": 28391712768.0, "28795": 28391712768.0, "28800": 28391712768.0, "28805": 28391712768.0, "28810": 28391712768.0, "28815": 28391712768.0, "28820": 28391712768.0, "28825": 28391712768.0, "28830": 28391712768.0, "28835": 28391712768.0, "28840": 28391712768.0, "28845": 28391712768.0, "28850": 28391712768.0, "28855": 28391712768.0, "28860": 28391712768.0, "28865": 28391712768.0, "28870": 28391712768.0, "28875": 28391712768.0, "28880": 28391712768.0, "28885": 28391712768.0, "28890": 28391712768.0, "28895": 28391712768.0, "28900": 28391712768.0, "28905": 28391712768.0, "28910": 28391712768.0, "28915": 28391712768.0, "28920": 28391712768.0, "28925": 28391712768.0, "28930": 28391712768.0, "28935": 28391712768.0, "28940": 28391712768.0, "28945": 28391712768.0, "28950": 28391712768.0, "28955": 28391712768.0, "28960": 28391712768.0, "28965": 28391712768.0, "28970": 28391712768.0, "28975": 28391712768.0, "28980": 28391712768.0, "28985": 28391712768.0, "28990": 28391712768.0, "28995": 28391712768.0, "29000": 28391712768.0, "29005": 28391712768.0, "29010": 28391712768.0, "29015": 28391712768.0, "29020": 28391712768.0, "29025": 28391712768.0, "29030": 28391712768.0, "29035": 28391712768.0, "29040": 28391712768.0, "29045": 28391712768.0, "29050": 28391712768.0, "29055": 28391712768.0, "29060": 28391712768.0, "29065": 28391712768.0, "29070": 28391712768.0, "29075": 28391712768.0, "29080": 28391712768.0, "29085": 28391712768.0, "29090": 28391712768.0, "29095": 28391712768.0, "29100": 28391712768.0, "29105": 28391712768.0, "29110": 28391712768.0, "29115": 28391712768.0, "29120": 28391712768.0, "29125": 28391712768.0, "29130": 28391712768.0, "29135": 28391712768.0, "29140": 28391712768.0, "29145": 28391712768.0, "29150": 28391712768.0, "29155": 28391712768.0, "29160": 28391712768.0, "29165": 28391712768.0, "29170": 28391712768.0, "29175": 28391712768.0, "29180": 28391712768.0, "29185": 28391712768.0, "29190": 28391712768.0, "29195": 28391712768.0, "29200": 28391712768.0, "29205": 28391712768.0, "29210": 28391712768.0, "29215": 28391712768.0, "29220": 28391712768.0, "29225": 28391712768.0, "29230": 28391712768.0, "29235": 28391712768.0, "29240": 28391712768.0, "29245": 28391712768.0, "29250": 28391712768.0, "29255": 28391712768.0, "29260": 28391712768.0, "29265": 28391712768.0, "29270": 28391712768.0, "29275": 28391712768.0, "29280": 28391712768.0, "29285": 28391712768.0, "29290": 28391712768.0, "29295": 28391712768.0, "29300": 28391712768.0, "29305": 28391712768.0, "29310": 28391712768.0, "29315": 28391712768.0, "29320": 28391712768.0, "29325": 28391712768.0, "29330": 28391712768.0, "29335": 28391712768.0, "29340": 28391712768.0, "29345": 28391712768.0, "29350": 28391712768.0, "29355": 28391712768.0, "29360": 28391712768.0, "29365": 28391712768.0, "29370": 28391712768.0, "29375": 28391712768.0, "29380": 28391712768.0, "29385": 28391712768.0, "29390": 28391712768.0, "29395": 28391712768.0, "29400": 28391712768.0, "29405": 28391712768.0, "29410": 28391712768.0, "29415": 28391712768.0, "29420": 28391712768.0, "29425": 28391712768.0, "29430": 28391712768.0, "29435": 28391712768.0, "29440": 28391712768.0, "29445": 28391712768.0, "29450": 28391712768.0, "29455": 28391712768.0, "29460": 28391712768.0, "29465": 28391712768.0, "29470": 28391712768.0, "29475": 28391712768.0, "29480": 28391712768.0, "29485": 28391712768.0, "29490": 28391712768.0, "29495": 28391712768.0, "29500": 28391712768.0, "29505": 28391712768.0, "29510": 28391712768.0, "29515": 28391712768.0, "29520": 28391712768.0, "29525": 28391712768.0, "29530": 28391712768.0, "29535": 28391712768.0, "29540": 28391712768.0, "29545": 28391712768.0, "29550": 28391712768.0, "29555": 28391712768.0, "29560": 28391712768.0, "29565": 28391712768.0, "29570": 28391712768.0, "29575": 28391712768.0, "29580": 28391712768.0, "29585": 28391712768.0, "29590": 28391712768.0, "29595": 28391712768.0, "29600": 28391712768.0, "29605": 28391712768.0, "29610": 28391712768.0, "29615": 28391712768.0, "29620": 28391712768.0, "29625": 28391712768.0, "29630": 28391712768.0, "29635": 28391712768.0, "29640": 28391712768.0, "29645": 28391712768.0, "29650": 28391712768.0, "29655": 28391712768.0, "29660": 28391712768.0, "29665": 28391712768.0, "29670": 28391712768.0, "29675": 28391712768.0, "29680": 28391712768.0, "29685": 28391712768.0, "29690": 28391712768.0, "29695": 28391712768.0, "29700": 28391712768.0, "29705": 28391712768.0, "29710": 28391712768.0, "29715": 28391712768.0, "29720": 28391712768.0, "29725": 28391712768.0, "29730": 28391712768.0, "29735": 28391712768.0, "29740": 28391712768.0, "29745": 28391712768.0, "29750": 28391712768.0, "29755": 28391712768.0, "29760": 28391712768.0, "29765": 28391712768.0, "29770": 28391712768.0, "29775": 28391712768.0, "29780": 28391712768.0, "29785": 28391712768.0, "29790": 28391712768.0, "29795": 28391712768.0, "29800": 28391712768.0, "29805": 28391712768.0, "29810": 28391712768.0, "29815": 28391712768.0, "29820": 28391712768.0, "29825": 28391712768.0, "29830": 28391712768.0, "29835": 28391712768.0, "29840": 28391712768.0, "29845": 28391712768.0, "29850": 28391712768.0, "29855": 28391712768.0, "29860": 28391712768.0, "29865": 28391712768.0, "29870": 28391712768.0, "29875": 28391712768.0, "29880": 28391712768.0, "29885": 28391712768.0, "29890": 28391712768.0, "29895": 28391712768.0, "29900": 28391712768.0, "29905": 28391712768.0, "29910": 28391712768.0, "29915": 28391712768.0, "29920": 28391712768.0, "29925": 28391712768.0, "29930": 28391712768.0, "29935": 28391712768.0, "29940": 28391712768.0, "29945": 28391712768.0, "29950": 28391712768.0, "29955": 28391712768.0, "29960": 28391712768.0, "29965": 28391712768.0, "29970": 28391712768.0, "29975": 28391712768.0, "29980": 28391712768.0, "29985": 28391712768.0, "29990": 28391712768.0, "29995": 28391712768.0, "30000": 28391712768.0, "30005": 28391712768.0, "30010": 28391712768.0, "30015": 28391712768.0, "30020": 28391712768.0, "30025": 28391712768.0, "30030": 28391712768.0, "30035": 28391712768.0, "30040": 28391712768.0, "30045": 28391712768.0, "30050": 28391712768.0, "30055": 28391712768.0, "30060": 28391712768.0, "30065": 28391712768.0, "30070": 28391712768.0, "30075": 28391712768.0, "30080": 28391712768.0, "30085": 28391712768.0, "30090": 28391712768.0, "30095": 28391712768.0, "30100": 28391712768.0, "30105": 28391712768.0, "30110": 28391712768.0, "30115": 28391712768.0, "30120": 28391712768.0, "30125": 28391712768.0, "30130": 28391712768.0, "30135": 28391712768.0, "30140": 28391712768.0, "30145": 28391712768.0, "30150": 28391712768.0, "30155": 28391712768.0, "30160": 28391712768.0, "30165": 28391712768.0, "30170": 28391712768.0, "30175": 28391712768.0, "30180": 28391712768.0, "30185": 28391712768.0, "30190": 28391712768.0, "30195": 28391712768.0, "30200": 28391712768.0, "30205": 28391712768.0, "30210": 28391712768.0, "30215": 28391712768.0, "30220": 28391712768.0, "30225": 28391712768.0, "30230": 28391712768.0, "30235": 28391712768.0, "30240": 28391712768.0, "30245": 28391712768.0, "30250": 28391712768.0, "30255": 28391712768.0, "30260": 28391712768.0, "30265": 28391712768.0, "30270": 28391712768.0, "30275": 28391712768.0, "30280": 28391712768.0, "30285": 28391712768.0, "30290": 28391712768.0, "30295": 28391712768.0, "30300": 28391712768.0, "30305": 28391712768.0, "30310": 28391712768.0, "30315": 28391712768.0, "30320": 28391712768.0, "30325": 28391712768.0, "30330": 28391712768.0, "30335": 28391712768.0, "30340": 28391712768.0, "30345": 28391712768.0, "30350": 28391712768.0, "30355": 28391712768.0, "30360": 28391712768.0, "30365": 28391712768.0, "30370": 28391712768.0, "30375": 28391712768.0, "30380": 28391712768.0, "30385": 28391712768.0, "30390": 28391712768.0, "30395": 28391712768.0, "30400": 28391712768.0, "30405": 28391712768.0, "30410": 28391712768.0, "30415": 28391712768.0, "30420": 28391712768.0, "30425": 28391712768.0, "30430": 28391712768.0, "30435": 28391712768.0, "30440": 28391712768.0, "30445": 28391712768.0, "30450": 28391712768.0, "30455": 28391712768.0, "30460": 28391712768.0, "30465": 28391712768.0, "30470": 28391712768.0, "30475": 28391712768.0, "30480": 28391712768.0, "30485": 28391712768.0, "30490": 28391712768.0, "30495": 28391712768.0, "30500": 28391712768.0, "30505": 28391712768.0, "30510": 28391712768.0, "30515": 28391712768.0, "30520": 28391712768.0, "30525": 28391712768.0, "30530": 28391712768.0, "30535": 28391712768.0, "30540": 28391712768.0, "30545": 28391712768.0, "30550": 28391712768.0, "30555": 28391712768.0, "30560": 28391712768.0, "30565": 28391712768.0, "30570": 28391712768.0, "30575": 28391712768.0, "30580": 28391712768.0, "30585": 28391712768.0, "30590": 28391712768.0, "30595": 28391712768.0, "30600": 28391712768.0, "30605": 28391712768.0, "30610": 28391712768.0, "30615": 28391712768.0, "30620": 28391712768.0, "30625": 28391712768.0, "30630": 28391712768.0, "30635": 28391712768.0, "30640": 28391712768.0, "30645": 28391712768.0, "30650": 28391712768.0, "30655": 28391712768.0, "30660": 28391712768.0, "30665": 28391712768.0, "30670": 28391712768.0, "30675": 28391712768.0, "30680": 28391712768.0, "30685": 28391712768.0, "30690": 28391712768.0, "30695": 28391712768.0, "30700": 28391712768.0, "30705": 28391712768.0, "30710": 28391712768.0, "30715": 28391712768.0, "30720": 28391712768.0, "30725": 28391712768.0, "30730": 28391712768.0, "30735": 28391712768.0, "30740": 28391712768.0, "30745": 28391712768.0, "30750": 28391712768.0, "30755": 28391712768.0, "30760": 28391712768.0, "30765": 28391712768.0, "30770": 28391712768.0, "30775": 28391712768.0, "30780": 28391712768.0, "30785": 28391712768.0, "30790": 28391712768.0, "30795": 28391712768.0, "30800": 28391712768.0, "30805": 28391712768.0, "30810": 28391712768.0, "30815": 28391712768.0, "30820": 28391712768.0, "30825": 28391712768.0, "30830": 28391712768.0, "30835": 28391712768.0, "30840": 28391712768.0, "30845": 28391712768.0, "30850": 28391712768.0, "30855": 28391712768.0, "30860": 28391712768.0, "30865": 28391712768.0, "30870": 28391712768.0, "30875": 28391712768.0, "30880": 28391712768.0, "30885": 28391712768.0, "30890": 28391712768.0, "30895": 28391712768.0, "30900": 28391712768.0, "30905": 28391712768.0, "30910": 28391712768.0, "30915": 28391712768.0, "30920": 28391712768.0, "30925": 28391712768.0, "30930": 28391712768.0, "30935": 28391712768.0, "30940": 28391712768.0, "30945": 28391712768.0, "30950": 28391712768.0, "30955": 28391712768.0, "30960": 28391712768.0, "30965": 28391712768.0, "30970": 28391712768.0, "30975": 28391712768.0, "30980": 28391712768.0, "30985": 28391712768.0, "30990": 28391712768.0, "30995": 28391712768.0, "31000": 28391712768.0, "31005": 28391712768.0, "31010": 28391712768.0, "31015": 28391712768.0, "31020": 28391712768.0, "31025": 28391712768.0, "31030": 28391712768.0, "31035": 28391712768.0, "31040": 28391712768.0, "31045": 28391712768.0, "31050": 28391712768.0, "31055": 28391712768.0, "31060": 28391712768.0, "31065": 28391712768.0, "31070": 28391712768.0, "31075": 28391712768.0, "31080": 28391712768.0, "31085": 28391712768.0, "31090": 28391712768.0, "31095": 28391712768.0, "31100": 28391712768.0, "31105": 28391712768.0, "31110": 28391712768.0, "31115": 28391712768.0, "31120": 28391712768.0, "31125": 28391712768.0, "31130": 28391712768.0, "31135": 28391712768.0, "31140": 28391712768.0, "31145": 28391712768.0, "31150": 28391712768.0, "31155": 28391712768.0, "31160": 28391712768.0, "31165": 28391712768.0, "31170": 28391712768.0, "31175": 28391712768.0, "31180": 28391712768.0, "31185": 28391712768.0, "31190": 28391712768.0, "31195": 28391712768.0, "31200": 28391712768.0, "31205": 28391712768.0, "31210": 28391712768.0, "31215": 28391712768.0, "31220": 28391712768.0, "31225": 28391712768.0, "31230": 28391712768.0, "31235": 28391712768.0, "31240": 28391712768.0, "31245": 28391712768.0, "31250": 28391712768.0, "31255": 28391712768.0, "31260": 28391712768.0, "31265": 28391712768.0, "31270": 28391712768.0, "31275": 28391712768.0, "31280": 28391712768.0, "31285": 28391712768.0, "31290": 28391712768.0, "31295": 28391712768.0, "31300": 28391712768.0, "31305": 28391712768.0, "31310": 28391712768.0, "31315": 28391712768.0, "31320": 28391712768.0, "31325": 28391712768.0, "31330": 28391712768.0, "31335": 28391712768.0, "31340": 28391712768.0, "31345": 28391712768.0, "31350": 28391712768.0, "31355": 28391712768.0, "31360": 28391712768.0, "31365": 28391712768.0, "31370": 28391712768.0, "31375": 28391712768.0, "31380": 28391712768.0, "31385": 28391712768.0, "31390": 28391712768.0, "31395": 28391712768.0, "31400": 28391712768.0, "31405": 28391712768.0, "31410": 28391712768.0, "31415": 28391712768.0, "31420": 28391712768.0, "31425": 28391712768.0, "31430": 28391712768.0, "31435": 28391712768.0, "31440": 28391712768.0, "31445": 28391712768.0, "31450": 28391712768.0, "31455": 28391712768.0, "31460": 28391712768.0, "31465": 28391712768.0, "31470": 28391712768.0, "31475": 28391712768.0, "31480": 28391712768.0, "31485": 28391712768.0, "31490": 28391712768.0, "31495": 28391712768.0, "31500": 28391712768.0, "31505": 28391712768.0, "31510": 28391712768.0, "31515": 28391712768.0, "31520": 28391712768.0, "31525": 28391712768.0, "31530": 28391712768.0, "31535": 28391712768.0, "31540": 28391712768.0, "31545": 28391712768.0, "31550": 28391712768.0, "31555": 28391712768.0, "31560": 28391712768.0, "31565": 28391712768.0, "31570": 28391712768.0, "31575": 28391712768.0, "31580": 28391712768.0, "31585": 28391712768.0, "31590": 28391712768.0, "31595": 28391712768.0, "31600": 28391712768.0, "31605": 28391712768.0, "31610": 28391712768.0, "31615": 28391712768.0, "31620": 28391712768.0, "31625": 28391712768.0, "31630": 28391712768.0, "31635": 28391712768.0, "31640": 28391712768.0, "31645": 28391712768.0, "31650": 28391712768.0, "31655": 28391712768.0, "31660": 28391712768.0, "31665": 28391712768.0, "31670": 28391712768.0, "31675": 28391712768.0, "31680": 28391712768.0, "31685": 28391712768.0, "31690": 28391712768.0, "31695": 28391712768.0, "31700": 28391712768.0, "31705": 28391712768.0, "31710": 28391712768.0, "31715": 28391712768.0, "31720": 28391712768.0, "31725": 28391712768.0, "31730": 28391712768.0, "31735": 28391712768.0, "31740": 28391712768.0, "31745": 28391712768.0, "31750": 28391712768.0, "31755": 28391712768.0, "31760": 28391712768.0, "31765": 28391712768.0, "31770": 28391712768.0, "31775": 28391712768.0, "31780": 28391712768.0, "31785": 28391712768.0, "31790": 28391712768.0, "31795": 28391712768.0, "31800": 28391712768.0, "31805": 28391712768.0, "31810": 28391712768.0, "31815": 28391712768.0, "31820": 28391712768.0, "31825": 28391712768.0, "31830": 28391712768.0, "31835": 28391712768.0, "31840": 28391712768.0, "31845": 28391712768.0, "31850": 28391712768.0, "31855": 28391712768.0, "31860": 28391712768.0, "31865": 28391712768.0, "31870": 28391712768.0, "31875": 28391712768.0, "31880": 28391712768.0, "31885": 28391712768.0, "31890": 28391712768.0, "31895": 28391712768.0, "31900": 28391712768.0, "31905": 28391712768.0, "31910": 28391712768.0, "31915": 28391712768.0, "31920": 28391712768.0, "31925": 28391712768.0, "31930": 28391712768.0, "31935": 28391712768.0, "31940": 28391712768.0, "31945": 28391712768.0, "31950": 28391712768.0, "31955": 28391712768.0, "31960": 28391712768.0, "31965": 28391712768.0, "31970": 28391712768.0, "31975": 28391712768.0, "31980": 28391712768.0, "31985": 28391712768.0, "31990": 28391712768.0, "31995": 28391712768.0, "32000": 28391712768.0, "32005": 28391712768.0, "32010": 28391712768.0, "32015": 28391712768.0, "32020": 28391712768.0, "32025": 28391712768.0, "32030": 28391712768.0, "32035": 28391712768.0, "32040": 28391712768.0, "32045": 28391712768.0, "32050": 28391712768.0, "32055": 28391712768.0, "32060": 28391712768.0, "32065": 28391712768.0, "32070": 28391712768.0, "32075": 28391712768.0, "32080": 28391712768.0, "32085": 28391712768.0, "32090": 28391712768.0, "32095": 28391712768.0, "32100": 28391712768.0, "32105": 28391712768.0, "32110": 28391712768.0, "32115": 28391712768.0, "32120": 28391712768.0, "32125": 28391712768.0, "32130": 28391712768.0, "32135": 28391712768.0, "32140": 28391712768.0, "32145": 28391712768.0, "32150": 28391712768.0, "32155": 28391712768.0, "32160": 28391712768.0, "32165": 28391712768.0, "32170": 28391712768.0, "32175": 28391712768.0, "32180": 28391712768.0, "32185": 28391712768.0, "32190": 28391712768.0, "32195": 28391712768.0, "32200": 28391712768.0, "32205": 28391712768.0, "32210": 28391712768.0, "32215": 28391712768.0, "32220": 28391712768.0, "32225": 28391712768.0, "32230": 28391712768.0, "32235": 28391712768.0, "32240": 28391712768.0, "32245": 28391712768.0, "32250": 28391712768.0, "32255": 28391712768.0, "32260": 28391712768.0, "32265": 28391712768.0, "32270": 28391712768.0, "32275": 28391712768.0, "32280": 28391712768.0, "32285": 28391712768.0, "32290": 28391712768.0, "32295": 28391712768.0, "32300": 28391712768.0, "32305": 28391712768.0, "32310": 28391712768.0, "32315": 28391712768.0, "32320": 28391712768.0, "32325": 28391712768.0, "32330": 28391712768.0, "32335": 28391712768.0, "32340": 28391712768.0, "32345": 28391712768.0, "32350": 28391712768.0, "32355": 28391712768.0, "32360": 28391712768.0, "32365": 28391712768.0, "32370": 28391712768.0, "32375": 28391712768.0, "32380": 28391712768.0, "32385": 28391712768.0, "32390": 28391712768.0, "32395": 28391712768.0, "32400": 28391712768.0, "32405": 28391712768.0, "32410": 28391712768.0, "32415": 28391712768.0, "32420": 28391712768.0, "32425": 28391712768.0, "32430": 28391712768.0, "32435": 28391712768.0, "32440": 28391712768.0, "32445": 28391712768.0, "32450": 28391712768.0, "32455": 28391712768.0, "32460": 28391712768.0, "32465": 28391712768.0, "32470": 28391712768.0, "32475": 28391712768.0, "32480": 28391712768.0, "32485": 28391712768.0, "32490": 28391712768.0, "32495": 28391712768.0, "32500": 28391712768.0, "32505": 28391712768.0, "32510": 28391712768.0, "32515": 28391712768.0, "32520": 28391712768.0, "32525": 28391712768.0, "32530": 28391712768.0, "32535": 28391712768.0, "32540": 28391712768.0, "32545": 28391712768.0, "32550": 28391712768.0, "32555": 28391712768.0, "32560": 28391712768.0, "32565": 28391712768.0, "32570": 28391712768.0, "32575": 28391712768.0, "32580": 28391712768.0, "32585": 28391712768.0, "32590": 28391712768.0, "32595": 28391712768.0, "32600": 28391712768.0, "32605": 28391712768.0, "32610": 28391712768.0, "32615": 28391712768.0, "32620": 28391712768.0, "32625": 28391712768.0, "32630": 28391712768.0, "32635": 28391712768.0, "32640": 28391712768.0, "32645": 28391712768.0, "32650": 28391712768.0, "32655": 28391712768.0, "32660": 28391712768.0, "32665": 28391712768.0, "32670": 28391712768.0, "32675": 28391712768.0, "32680": 28391712768.0, "32685": 28391712768.0, "32690": 28391712768.0, "32695": 28391712768.0, "32700": 28391712768.0, "32705": 28391712768.0, "32710": 28391712768.0, "32715": 28391712768.0, "32720": 28391712768.0, "32725": 28391712768.0, "32730": 28391712768.0, "32735": 28391712768.0, "32740": 28391712768.0, "32745": 28391712768.0, "32750": 28391712768.0, "32755": 28391712768.0, "32760": 28391712768.0, "32765": 28391712768.0, "32770": 28391712768.0, "32775": 28391712768.0, "32780": 28391712768.0, "32785": 28391712768.0, "32790": 28391712768.0, "32795": 28391712768.0, "32800": 28391712768.0, "32805": 28391712768.0, "32810": 28391712768.0, "32815": 28391712768.0, "32820": 28391712768.0, "32825": 28391712768.0, "32830": 28391712768.0, "32835": 28391712768.0, "32840": 28391712768.0, "32845": 28391712768.0, "32850": 28391712768.0, "32855": 28391712768.0, "32860": 28391712768.0, "32865": 28391712768.0, "32870": 28391712768.0, "32875": 28391712768.0, "32880": 28391712768.0, "32885": 28391712768.0, "32890": 28391712768.0, "32895": 28391712768.0, "32900": 28391712768.0, "32905": 28391712768.0, "32910": 28391712768.0, "32915": 28391712768.0, "32920": 28391712768.0, "32925": 28391712768.0, "32930": 28391712768.0, "32935": 28391712768.0, "32940": 28391712768.0, "32945": 28391712768.0, "32950": 28391712768.0, "32955": 28391712768.0, "32960": 28391712768.0, "32965": 28391712768.0, "32970": 28391712768.0, "32975": 28391712768.0, "32980": 28391712768.0, "32985": 28391712768.0, "32990": 28391712768.0, "32995": 28391712768.0, "33000": 28391712768.0, "33005": 28391712768.0, "33010": 28391712768.0, "33015": 28391712768.0, "33020": 28391712768.0, "33025": 28391712768.0, "33030": 28391712768.0, "33035": 28391712768.0, "33040": 28391712768.0, "33045": 28391712768.0, "33050": 28391712768.0, "33055": 28391712768.0, "33060": 28391712768.0, "33065": 28391712768.0, "33070": 28391712768.0, "33075": 28391712768.0, "33080": 28391712768.0, "33085": 28391712768.0, "33090": 28391712768.0, "33095": 28391712768.0, "33100": 28391712768.0, "33105": 28391712768.0, "33110": 28391712768.0, "33115": 28391712768.0, "33120": 28391712768.0, "33125": 28391712768.0, "33130": 28391712768.0, "33135": 28391712768.0, "33140": 28391712768.0, "33145": 28391712768.0, "33150": 28391712768.0, "33155": 28391712768.0, "33160": 28391712768.0, "33165": 28391712768.0, "33170": 28391712768.0, "33175": 28391712768.0, "33180": 28391712768.0, "33185": 28391712768.0, "33190": 28391712768.0, "33195": 28391712768.0, "33200": 28391712768.0, "33205": 28391712768.0, "33210": 28391712768.0, "33215": 28391712768.0, "33220": 28391712768.0, "33225": 28391712768.0, "33230": 28391712768.0, "33235": 28391712768.0, "33240": 28391712768.0, "33245": 28391712768.0, "33250": 28391712768.0, "33255": 28391712768.0, "33260": 28391712768.0, "33265": 28391712768.0, "33270": 28391712768.0, "33275": 28391712768.0, "33280": 28391712768.0, "33285": 28391712768.0, "33290": 28391712768.0, "33295": 28391712768.0, "33300": 28391712768.0, "33305": 28391712768.0, "33310": 28391712768.0, "33315": 28391712768.0, "33320": 28391712768.0, "33325": 28391712768.0, "33330": 28391712768.0, "33335": 28391712768.0, "33340": 28391712768.0, "33345": 28391712768.0, "33350": 28391712768.0, "33355": 28391712768.0, "33360": 28391712768.0, "33365": 28391712768.0, "33370": 28391712768.0, "33375": 28391712768.0, "33380": 28391712768.0, "33385": 28391712768.0, "33390": 28391712768.0, "33395": 28391712768.0, "33400": 28391712768.0, "33405": 28391712768.0, "33410": 28391712768.0, "33415": 28391712768.0, "33420": 28391712768.0, "33425": 28391712768.0, "33430": 28391712768.0, "33435": 28391712768.0, "33440": 28391712768.0, "33445": 28391712768.0, "33450": 28391712768.0, "33455": 28391712768.0, "33460": 28391712768.0, "33465": 28391712768.0, "33470": 28391712768.0, "33475": 28391712768.0, "33480": 28391712768.0, "33485": 28391712768.0, "33490": 28391712768.0, "33495": 28391712768.0, "33500": 28391712768.0, "33505": 28391712768.0, "33510": 28391712768.0, "33515": 28391712768.0, "33520": 28391712768.0, "33525": 28391712768.0, "33530": 28391712768.0, "33535": 28391712768.0, "33540": 28391712768.0, "33545": 28391712768.0, "33550": 28391712768.0, "33555": 28391712768.0, "33560": 28391712768.0, "33565": 28391712768.0, "33570": 28391712768.0, "33575": 28391712768.0, "33580": 28391712768.0, "33585": 28391712768.0, "33590": 28391712768.0, "33595": 28391712768.0, "33600": 28391712768.0, "33605": 28391712768.0, "33610": 28391712768.0, "33615": 28391712768.0, "33620": 28391712768.0, "33625": 28391712768.0, "33630": 28391712768.0, "33635": 28391712768.0, "33640": 28391712768.0, "33645": 28391712768.0, "33650": 28391712768.0, "33655": 28391712768.0, "33660": 28391712768.0, "33665": 28391712768.0, "33670": 28391712768.0, "33675": 28391712768.0, "33680": 28391712768.0, "33685": 28391712768.0, "33690": 28391712768.0, "33695": 28391712768.0, "33700": 28391712768.0, "33705": 28391712768.0, "33710": 28391712768.0, "33715": 28391712768.0, "33720": 28391712768.0, "33725": 28391712768.0, "33730": 28391712768.0, "33735": 28391712768.0, "33740": 28391712768.0, "33745": 28391712768.0, "33750": 28391712768.0, "33755": 28391712768.0, "33760": 28391712768.0, "33765": 28391712768.0, "33770": 28391712768.0, "33775": 28391712768.0, "33780": 28391712768.0, "33785": 28391712768.0, "33790": 28391712768.0, "33795": 28391712768.0, "33800": 28391712768.0, "33805": 28391712768.0, "33810": 28391712768.0, "33815": 28391712768.0, "33820": 28391712768.0, "33825": 28391712768.0, "33830": 28391712768.0, "33835": 28391712768.0, "33840": 28391712768.0, "33845": 28391712768.0, "33850": 28391712768.0, "33855": 28391712768.0, "33860": 28391712768.0, "33865": 28391712768.0, "33870": 28391712768.0, "33875": 28391712768.0, "33880": 28391712768.0, "33885": 28391712768.0, "33890": 28391712768.0, "33895": 28391712768.0, "33900": 28391712768.0, "33905": 28391712768.0, "33910": 28391712768.0, "33915": 28391712768.0, "33920": 28391712768.0, "33925": 28391712768.0, "33930": 28391712768.0, "33935": 28391712768.0, "33940": 28391712768.0, "33945": 28391712768.0, "33950": 28391712768.0, "33955": 28391712768.0, "33960": 28391712768.0, "33965": 28391712768.0, "33970": 28391712768.0, "33975": 28391712768.0, "33980": 28391712768.0, "33985": 28391712768.0, "33990": 28391712768.0, "33995": 28391712768.0, "34000": 28391712768.0, "34005": 28391712768.0, "34010": 28391712768.0, "34015": 28391712768.0, "34020": 28391712768.0, "34025": 28391712768.0, "34030": 28391712768.0, "34035": 28391712768.0, "34040": 28391712768.0, "34045": 28391712768.0, "34050": 28391712768.0, "34055": 28391712768.0, "34060": 28391712768.0, "34065": 28391712768.0, "34070": 28391712768.0, "34075": 28391712768.0, "34080": 28391712768.0, "34085": 28391712768.0, "34090": 28391712768.0, "34095": 28391712768.0, "34100": 28391712768.0, "34105": 28391712768.0, "34110": 28391712768.0, "34115": 28391712768.0, "34120": 28391712768.0, "34125": 28391712768.0, "34130": 28391712768.0, "34135": 28391712768.0, "34140": 28391712768.0, "34145": 28391712768.0, "34150": 28391712768.0, "34155": 28391712768.0, "34160": 28391712768.0, "34165": 28391712768.0, "34170": 28391712768.0, "34175": 28391712768.0, "34180": 28391712768.0, "34185": 28391712768.0, "34190": 28391712768.0, "34195": 28391712768.0, "34200": 28391712768.0, "34205": 28391712768.0, "34210": 28391712768.0, "34215": 28391712768.0, "34220": 28391712768.0, "34225": 28391712768.0, "34230": 28391712768.0, "34235": 28391712768.0, "34240": 28391712768.0, "34245": 28391712768.0, "34250": 28391712768.0, "34255": 28391712768.0, "34260": 28391712768.0, "34265": 28391712768.0, "34270": 28391712768.0, "34275": 28391712768.0, "34280": 28391712768.0, "34285": 28391712768.0, "34290": 28391712768.0, "34295": 28391712768.0, "34300": 28391712768.0, "34305": 28391712768.0, "34310": 28391712768.0, "34315": 28391712768.0, "34320": 28391712768.0, "34325": 28391712768.0, "34330": 28391712768.0, "34335": 28391712768.0, "34340": 28391712768.0, "34345": 28391712768.0, "34350": 28391712768.0, "34355": 28391712768.0, "34360": 28391712768.0, "34365": 28391712768.0, "34370": 28391712768.0, "34375": 28391712768.0, "34380": 28391712768.0, "34385": 28391712768.0, "34390": 28391712768.0, "34395": 28391712768.0, "34400": 28391712768.0, "34405": 28391712768.0, "34410": 28391712768.0, "34415": 28391712768.0, "34420": 28391712768.0, "34425": 28391712768.0, "34430": 28391712768.0, "34435": 28391712768.0, "34440": 28391712768.0, "34445": 28391712768.0, "34450": 28391712768.0, "34455": 28391712768.0, "34460": 28391712768.0, "34465": 28391712768.0, "34470": 28391712768.0, "34475": 28391712768.0, "34480": 28391712768.0, "34485": 28391712768.0, "34490": 28391712768.0, "34495": 28391712768.0, "34500": 28391712768.0, "34505": 28391712768.0, "34510": 28391712768.0, "34515": 28391712768.0, "34520": 28391712768.0, "34525": 28391712768.0, "34530": 28391712768.0, "34535": 28391712768.0, "34540": 28391712768.0, "34545": 28391712768.0, "34550": 28391712768.0, "34555": 28391712768.0, "34560": 28391712768.0, "34565": 28391712768.0, "34570": 28391712768.0, "34575": 28391712768.0, "34580": 28391712768.0, "34585": 28391712768.0, "34590": 28391712768.0, "34595": 28391712768.0, "34600": 28391712768.0, "34605": 28391712768.0, "34610": 28391712768.0, "34615": 28391712768.0, "34620": 28391712768.0, "34625": 28391712768.0, "34630": 28391712768.0, "34635": 28391712768.0, "34640": 28391712768.0, "34645": 28391712768.0, "34650": 28391712768.0, "34655": 28391712768.0, "34660": 28391712768.0, "34665": 28391712768.0, "34670": 28391712768.0, "34675": 28391712768.0, "34680": 28391712768.0, "34685": 28391712768.0, "34690": 28391712768.0, "34695": 28391712768.0, "34700": 28391712768.0, "34705": 28391712768.0, "34710": 28391712768.0, "34715": 28391712768.0, "34720": 28391712768.0, "34725": 28391712768.0, "34730": 28391712768.0, "34735": 28391712768.0, "34740": 28391712768.0, "34745": 28391712768.0, "34750": 28391712768.0, "34755": 28391712768.0, "34760": 28391712768.0, "34765": 28391712768.0, "34770": 28391712768.0, "34775": 28391712768.0, "34780": 28391712768.0, "34785": 28391712768.0, "34790": 28391712768.0, "34795": 28391712768.0, "34800": 28391712768.0, "34805": 28391712768.0, "34810": 28391712768.0, "34815": 28391712768.0, "34820": 28391712768.0, "34825": 28391712768.0, "34830": 28391712768.0, "34835": 28391712768.0, "34840": 28391712768.0, "34845": 28391712768.0, "34850": 28391712768.0, "34855": 28391712768.0, "34860": 28391712768.0, "34865": 28391712768.0, "34870": 28391712768.0, "34875": 28391712768.0, "34880": 28391712768.0, "34885": 28391712768.0, "34890": 28391712768.0, "34895": 28391712768.0, "34900": 28391712768.0, "34905": 28391712768.0, "34910": 28391712768.0, "34915": 28391712768.0, "34920": 28391712768.0, "34925": 28391712768.0, "34930": 28391712768.0, "34935": 28391712768.0, "34940": 28391712768.0, "34945": 28391712768.0, "34950": 28391712768.0, "34955": 28391712768.0, "34960": 28391712768.0, "34965": 28391712768.0, "34970": 28391712768.0, "34975": 28391712768.0, "34980": 28391712768.0, "34985": 28391712768.0, "34990": 28391712768.0, "34995": 28391712768.0, "35000": 28391712768.0, "35005": 28391712768.0, "35010": 28391712768.0, "35015": 28391712768.0, "35020": 28391712768.0, "35025": 28391712768.0, "35030": 28391712768.0, "35035": 28391712768.0, "35040": 28391712768.0, "35045": 28391712768.0, "35050": 28391712768.0, "35055": 28391712768.0, "35060": 28391712768.0, "35065": 28391712768.0, "35070": 28391712768.0, "35075": 28391712768.0, "35080": 28391712768.0, "35085": 28391712768.0, "35090": 28391712768.0, "35095": 28391712768.0, "35100": 28391712768.0, "35105": 28391712768.0, "35110": 28391712768.0, "35115": 28391712768.0, "35120": 28391712768.0, "35125": 28391712768.0, "35130": 28391712768.0, "35135": 28391712768.0, "35140": 28391712768.0, "35145": 28391712768.0, "35150": 28391712768.0, "35155": 28391712768.0, "35160": 28391712768.0, "35165": 28391712768.0, "35170": 28391712768.0, "35175": 28391712768.0, "35180": 28391712768.0, "35185": 28391712768.0, "35190": 28391712768.0, "35195": 28391712768.0, "35200": 28391712768.0, "35205": 28391712768.0, "35210": 28391712768.0, "35215": 28391712768.0, "35220": 28391712768.0, "35225": 28391712768.0, "35230": 28391712768.0, "35235": 28391712768.0, "35240": 28391712768.0, "35245": 28391712768.0, "35250": 28391712768.0, "35255": 28391712768.0, "35260": 28391712768.0, "35265": 28391712768.0, "35270": 28391712768.0, "35275": 28391712768.0, "35280": 28391712768.0, "35285": 28391712768.0, "35290": 28391712768.0, "35295": 28391712768.0, "35300": 28391712768.0, "35305": 28391712768.0, "35310": 28391712768.0, "35315": 28391712768.0, "35320": 28391712768.0, "35325": 28391712768.0, "35330": 28391712768.0, "35335": 28391712768.0, "35340": 28391712768.0, "35345": 28391712768.0, "35350": 28391712768.0, "35355": 28391712768.0, "35360": 28391712768.0, "35365": 28391712768.0, "35370": 28391712768.0, "35375": 28391712768.0, "35380": 28391712768.0, "35385": 28391712768.0, "35390": 28391712768.0, "35395": 28391712768.0, "35400": 28391712768.0, "35405": 28391712768.0, "35410": 28391712768.0, "35415": 28391712768.0, "35420": 28391712768.0, "35425": 28391712768.0, "35430": 28391712768.0, "35435": 28391712768.0, "35440": 28391712768.0, "35445": 28391712768.0, "35450": 28391712768.0, "35455": 28391712768.0, "35460": 28391712768.0, "35465": 28391712768.0, "35470": 28391712768.0, "35475": 28391712768.0, "35480": 28391712768.0, "35485": 28391712768.0, "35490": 28391712768.0, "35495": 28391712768.0, "35500": 28391712768.0, "35505": 28391712768.0, "35510": 28391712768.0, "35515": 28391712768.0, "35520": 28391712768.0, "35525": 28391712768.0, "35530": 28391712768.0, "35535": 28391712768.0, "35540": 28391712768.0, "35545": 28391712768.0, "35550": 28391712768.0, "35555": 28391712768.0, "35560": 28391712768.0, "35565": 28391712768.0, "35570": 28391712768.0, "35575": 28391712768.0, "35580": 28391712768.0, "35585": 28391712768.0, "35590": 28391712768.0, "35595": 28391712768.0, "35600": 28391712768.0, "35605": 28391712768.0, "35610": 28391712768.0, "35615": 28391712768.0, "35620": 28391712768.0, "35625": 28391712768.0, "35630": 28391712768.0, "35635": 28391712768.0, "35640": 28391712768.0, "35645": 28391712768.0, "35650": 28391712768.0, "35655": 28391712768.0, "35660": 28391712768.0, "35665": 28391712768.0, "35670": 28391712768.0, "35675": 28391712768.0, "35680": 28391712768.0, "35685": 28391712768.0, "35690": 28391712768.0, "35695": 28391712768.0, "35700": 28391712768.0, "35705": 28391712768.0, "35710": 28391712768.0, "35715": 28391712768.0, "35720": 28391712768.0, "35725": 28391712768.0, "35730": 28391712768.0, "35735": 28391712768.0, "35740": 28391712768.0, "35745": 28391712768.0, "35750": 28391712768.0, "35755": 28391712768.0, "35760": 28391712768.0, "35765": 28391712768.0, "35770": 28391712768.0, "35775": 28391712768.0, "35780": 28391712768.0, "35785": 28391712768.0, "35790": 28391712768.0, "35795": 28391712768.0, "35800": 28391712768.0, "35805": 28391712768.0, "35810": 28391712768.0, "35815": 28391712768.0, "35820": 28391712768.0, "35825": 28391712768.0, "35830": 28391712768.0, "35835": 28391712768.0, "35840": 28391712768.0, "35845": 28391712768.0, "35850": 28391712768.0, "35855": 28391712768.0, "35860": 28391712768.0, "35865": 28391712768.0, "35870": 28391712768.0, "35875": 28391712768.0, "35880": 28391712768.0, "35885": 28391712768.0, "35890": 28391712768.0, "35895": 28391712768.0, "35900": 28391712768.0, "35905": 28391712768.0, "35910": 28391712768.0, "35915": 28391712768.0, "35920": 28391712768.0, "35925": 28391712768.0, "35930": 28391712768.0, "35935": 28391712768.0, "35940": 28391712768.0, "35945": 28391712768.0, "35950": 28391712768.0, "35955": 28391712768.0, "35960": 28391712768.0, "35965": 28391712768.0, "35970": 28391712768.0, "35975": 28391712768.0, "35980": 28391712768.0, "35985": 28391712768.0, "35990": 28391712768.0, "35995": 28391712768.0, "36000": 28391712768.0, "36005": 28391712768.0, "36010": 28391712768.0, "36015": 28391712768.0, "36020": 28391712768.0, "36025": 28391712768.0, "36030": 28391712768.0, "36035": 28391712768.0, "36040": 28391712768.0, "36045": 28391712768.0, "36050": 28391712768.0, "36055": 28391712768.0, "36060": 28391712768.0, "36065": 28391712768.0, "36070": 28391712768.0, "36075": 28391712768.0, "36080": 28391712768.0, "36085": 28391712768.0, "36090": 28391712768.0, "36095": 28391712768.0, "36100": 28391712768.0, "36105": 28391712768.0, "36110": 28391712768.0, "36115": 28391712768.0, "36120": 28391712768.0, "36125": 28391712768.0, "36130": 28391712768.0, "36135": 28391712768.0, "36140": 28391712768.0, "36145": 28391712768.0, "36150": 28391712768.0, "36155": 28391712768.0, "36160": 28391712768.0, "36165": 28391712768.0, "36170": 28391712768.0, "36175": 28391712768.0, "36180": 28391712768.0, "36185": 28391712768.0, "36190": 28391712768.0, "36195": 28391712768.0, "36200": 28391712768.0, "36205": 28391712768.0, "36210": 28391712768.0, "36215": 28391712768.0, "36220": 28391712768.0, "36225": 28391712768.0, "36230": 28391712768.0, "36235": 28391712768.0, "36240": 28391712768.0, "36245": 28391712768.0, "36250": 28391712768.0, "36255": 28391712768.0, "36260": 28391712768.0, "36265": 28391712768.0, "36270": 28391712768.0, "36275": 28391712768.0, "36280": 28391712768.0, "36285": 28391712768.0, "36290": 28391712768.0, "36295": 28391712768.0, "36300": 28391712768.0, "36305": 28391712768.0, "36310": 28391712768.0, "36315": 28391712768.0, "36320": 28391712768.0, "36325": 28391712768.0, "36330": 28391712768.0, "36335": 28391712768.0, "36340": 28391712768.0, "36345": 28391712768.0, "36350": 28391712768.0, "36355": 28391712768.0, "36360": 28391712768.0, "36365": 28391712768.0, "36370": 28391712768.0, "36375": 28391712768.0, "36380": 28391712768.0, "36385": 28391712768.0, "36390": 28391712768.0, "36395": 28391712768.0, "36400": 28391712768.0, "36405": 28391712768.0, "36410": 28391712768.0, "36415": 28391712768.0, "36420": 28391712768.0, "36425": 28391712768.0, "36430": 28391712768.0, "36435": 28391712768.0, "36440": 28391712768.0, "36445": 28391712768.0, "36450": 28391712768.0, "36455": 28391712768.0, "36460": 28391712768.0, "36465": 28391712768.0, "36470": 28391712768.0, "36475": 28391712768.0, "36480": 28391712768.0, "36485": 28391712768.0, "36490": 28391712768.0, "36495": 28391712768.0, "36500": 28391712768.0, "36505": 28391712768.0, "36510": 28391712768.0, "36515": 28391712768.0, "36520": 28391712768.0, "36525": 28391712768.0, "36530": 28391712768.0, "36535": 28391712768.0, "36540": 28391712768.0, "36545": 28391712768.0, "36550": 28391712768.0, "36555": 28391712768.0, "36560": 28391712768.0, "36565": 28391712768.0, "36570": 28391712768.0, "36575": 28391712768.0, "36580": 28391712768.0, "36585": 28391712768.0, "36590": 28391712768.0, "36595": 28391712768.0, "36600": 28391712768.0, "36605": 28391712768.0, "36610": 28391712768.0, "36615": 28391712768.0, "36620": 28391712768.0, "36625": 28391712768.0, "36630": 28391712768.0, "36635": 28391712768.0, "36640": 28391712768.0, "36645": 28391712768.0, "36650": 28391712768.0, "36655": 28391712768.0, "36660": 28391712768.0, "36665": 28391712768.0, "36670": 28391712768.0, "36675": 28391712768.0, "36680": 28391712768.0, "36685": 28391712768.0, "36690": 28391712768.0, "36695": 28391712768.0, "36700": 28391712768.0, "36705": 28391712768.0, "36710": 28391712768.0, "36715": 28391712768.0, "36720": 28391712768.0, "36725": 28391712768.0, "36730": 28391712768.0, "36735": 28391712768.0, "36740": 28391712768.0, "36745": 28391712768.0, "36750": 28391712768.0, "36755": 28391712768.0, "36760": 28391712768.0, "36765": 28391712768.0, "36770": 28391712768.0, "36775": 28391712768.0, "36780": 28391712768.0, "36785": 28391712768.0, "36790": 28391712768.0, "36795": 28391712768.0, "36800": 28391712768.0, "36805": 28391712768.0, "36810": 28391712768.0, "36815": 28391712768.0, "36820": 28391712768.0, "36825": 28391712768.0, "36830": 28391712768.0, "36835": 28391712768.0, "36840": 28391712768.0, "36845": 28391712768.0, "36850": 28391712768.0, "36855": 28391712768.0, "36860": 28391712768.0, "36865": 28391712768.0, "36870": 28391712768.0, "36875": 28391712768.0, "36880": 28391712768.0, "36885": 28391712768.0, "36890": 28391712768.0, "36895": 28391712768.0, "36900": 28391712768.0, "36905": 28391712768.0, "36910": 28391712768.0, "36915": 28391712768.0, "36920": 28391712768.0, "36925": 28391712768.0, "36930": 28391712768.0, "36935": 28391712768.0, "36940": 28391712768.0, "36945": 28391712768.0, "36950": 28391712768.0, "36955": 28391712768.0, "36960": 28391712768.0, "36965": 28391712768.0, "36970": 28391712768.0, "36975": 28391712768.0, "36980": 28391712768.0, "36985": 28391712768.0, "36990": 28391712768.0, "36995": 28391712768.0, "37000": 28391712768.0, "37005": 28391712768.0, "37010": 28391712768.0, "37015": 28391712768.0, "37020": 28391712768.0, "37025": 28391712768.0, "37030": 28391712768.0, "37035": 28391712768.0, "37040": 28391712768.0, "37045": 28391712768.0, "37050": 28391712768.0, "37055": 28391712768.0, "37060": 28391712768.0, "37065": 28391712768.0, "37070": 28391712768.0, "37075": 28391712768.0, "37080": 28391712768.0, "37085": 28391712768.0, "37090": 28391712768.0, "37095": 28391712768.0, "37100": 28391712768.0, "37105": 28391712768.0, "37110": 28391712768.0, "37115": 28391712768.0, "37120": 28391712768.0, "37125": 28391712768.0, "37130": 28391712768.0, "37135": 28391712768.0, "37140": 28391712768.0, "37145": 28391712768.0, "37150": 28391712768.0, "37155": 28391712768.0, "37160": 28391712768.0, "37165": 28391712768.0, "37170": 28391712768.0, "37175": 28391712768.0, "37180": 28391712768.0, "37185": 28391712768.0, "37190": 28391712768.0, "37195": 28391712768.0, "37200": 28391712768.0, "37205": 28391712768.0, "37210": 28391712768.0, "37215": 28391712768.0, "37220": 28391712768.0, "37225": 28391712768.0, "37230": 28391712768.0, "37235": 28391712768.0, "37240": 28391712768.0, "37245": 28391712768.0, "37250": 28391712768.0, "37255": 28391712768.0, "37260": 28391712768.0, "37265": 28391712768.0, "37270": 28391712768.0, "37275": 28391712768.0, "37280": 28391712768.0, "37285": 28391712768.0, "37290": 28391712768.0, "37295": 28391712768.0, "37300": 28391712768.0, "37305": 28391712768.0, "37310": 28391712768.0, "37315": 28391712768.0, "37320": 28391712768.0, "37325": 28391712768.0, "37330": 28391712768.0, "37335": 28391712768.0, "37340": 28391712768.0, "37345": 28391712768.0, "37350": 28391712768.0, "37355": 28391712768.0, "37360": 28391712768.0, "37365": 28391712768.0, "37370": 28391712768.0, "37375": 28391712768.0, "37380": 28391712768.0, "37385": 28391712768.0, "37390": 28391712768.0, "37395": 28391712768.0, "37400": 28391712768.0, "37405": 28391712768.0, "37410": 28391712768.0, "37415": 28391712768.0, "37420": 28391712768.0, "37425": 28391712768.0, "37430": 28391712768.0, "37435": 28391712768.0, "37440": 28391712768.0, "37445": 28391712768.0, "37450": 28391712768.0, "37455": 28391712768.0, "37460": 28391712768.0, "37465": 28391712768.0, "37470": 28391712768.0, "37475": 28391712768.0, "37480": 28391712768.0, "37485": 28391712768.0, "37490": 28391712768.0, "37495": 28391712768.0, "37500": 28391712768.0, "37505": 28391712768.0, "37510": 28391712768.0, "37515": 28391712768.0, "37520": 28391712768.0, "37525": 28391712768.0, "37530": 28391712768.0, "37535": 28391712768.0, "37540": 28391712768.0, "37545": 28391712768.0, "37550": 28391712768.0, "37555": 28391712768.0, "37560": 28391712768.0, "37565": 28391712768.0, "37570": 28391712768.0, "37575": 28391712768.0, "37580": 28391712768.0, "37585": 28391712768.0, "37590": 28391712768.0, "37595": 28391712768.0, "37600": 28391712768.0, "37605": 28391712768.0, "37610": 28391712768.0, "37615": 28391712768.0, "37620": 28391712768.0, "37625": 28391712768.0, "37630": 28391712768.0, "37635": 28391712768.0, "37640": 28391712768.0, "37645": 28391712768.0, "37650": 28391712768.0, "37655": 28391712768.0, "37660": 28391712768.0, "37665": 28391712768.0, "37670": 28391712768.0, "37675": 28391712768.0, "37680": 28391712768.0, "37685": 28391712768.0, "37690": 28391712768.0, "37695": 28391712768.0, "37700": 28391712768.0, "37705": 28391712768.0, "37710": 28391712768.0, "37715": 28391712768.0, "37720": 28391712768.0, "37725": 28391712768.0, "37730": 28391712768.0, "37735": 28391712768.0, "37740": 28391712768.0, "37745": 28391712768.0, "37750": 28391712768.0, "37755": 28391712768.0, "37760": 28391712768.0, "37765": 28391712768.0, "37770": 28391712768.0, "37775": 28391712768.0, "37780": 28391712768.0, "37785": 28391712768.0, "37790": 28391712768.0, "37795": 28391712768.0, "37800": 28391712768.0, "37805": 28391712768.0, "37810": 28391712768.0, "37815": 28391712768.0, "37820": 28391712768.0, "37825": 28391712768.0, "37830": 28391712768.0, "37835": 28391712768.0, "37840": 28391712768.0, "37845": 28391712768.0, "37850": 28391712768.0, "37855": 28391712768.0, "37860": 28391712768.0, "37865": 28391712768.0, "37870": 28391712768.0, "37875": 28391712768.0, "37880": 28391712768.0, "37885": 28391712768.0, "37890": 28391712768.0, "37895": 28391712768.0, "37900": 28391712768.0, "37905": 28391712768.0, "37910": 28391712768.0, "37915": 28391712768.0, "37920": 28391712768.0, "37925": 28391712768.0, "37930": 28391712768.0, "37935": 28391712768.0, "37940": 28391712768.0, "37945": 28391712768.0, "37950": 28391712768.0, "37955": 28391712768.0, "37960": 28391712768.0, "37965": 28391712768.0, "37970": 28391712768.0, "37975": 28391712768.0, "37980": 28391712768.0, "37985": 28391712768.0, "37990": 28391712768.0, "37995": 28391712768.0, "38000": 28391712768.0, "38005": 28391712768.0, "38010": 28391712768.0, "38015": 28391712768.0, "38020": 28391712768.0, "38025": 28391712768.0, "38030": 28391712768.0, "38035": 28391712768.0, "38040": 28391712768.0, "38045": 28391712768.0, "38050": 28391712768.0, "38055": 28391712768.0, "38060": 28391712768.0, "38065": 28391712768.0, "38070": 28391712768.0, "38075": 28391712768.0, "38080": 28391712768.0, "38085": 28391712768.0, "38090": 28391712768.0, "38095": 28391712768.0, "38100": 28391712768.0, "38105": 28391712768.0, "38110": 28391712768.0, "38115": 28391712768.0, "38120": 28391712768.0, "38125": 28391712768.0, "38130": 28391712768.0, "38135": 28391712768.0, "38140": 28391712768.0, "38145": 28391712768.0, "38150": 28391712768.0, "38155": 28391712768.0, "38160": 28391712768.0, "38165": 28391712768.0, "38170": 28391712768.0, "38175": 28391712768.0, "38180": 28391712768.0, "38185": 28391712768.0, "38190": 28391712768.0, "38195": 28391712768.0, "38200": 28391712768.0, "38205": 28391712768.0, "38210": 28391712768.0, "38215": 28391712768.0, "38220": 28391712768.0, "38225": 28391712768.0, "38230": 28391712768.0, "38235": 28391712768.0, "38240": 28391712768.0, "38245": 28391712768.0, "38250": 28391712768.0, "38255": 28391712768.0, "38260": 28391712768.0, "38265": 28391712768.0, "38270": 28391712768.0, "38275": 28391712768.0, "38280": 28391712768.0, "38285": 28391712768.0, "38290": 28391712768.0, "38295": 28391712768.0, "38300": 28391712768.0, "38305": 28391712768.0, "38310": 28391712768.0, "38315": 28391712768.0, "38320": 28391712768.0, "38325": 28391712768.0, "38330": 28391712768.0, "38335": 28391712768.0, "38340": 28391712768.0, "38345": 28391712768.0, "38350": 28391712768.0, "38355": 28391712768.0, "38360": 28391712768.0, "38365": 28391712768.0, "38370": 28391712768.0, "38375": 28391712768.0, "38380": 28391712768.0, "38385": 28391712768.0, "38390": 28391712768.0, "38395": 28391712768.0, "38400": 28391712768.0, "38405": 28391712768.0, "38410": 28391712768.0, "38415": 28391712768.0, "38420": 28391712768.0, "38425": 28391712768.0, "38430": 28391712768.0, "38435": 28391712768.0, "38440": 28391712768.0, "38445": 28391712768.0, "38450": 28391712768.0, "38455": 28391712768.0, "38460": 28391712768.0, "38465": 28391712768.0, "38470": 28391712768.0, "38475": 28391712768.0, "38480": 28391712768.0, "38485": 28391712768.0, "38490": 28391712768.0, "38495": 28391712768.0, "38500": 28391712768.0, "38505": 28391712768.0, "38510": 28391712768.0, "38515": 28391712768.0, "38520": 28391712768.0, "38525": 28391712768.0, "38530": 28391712768.0, "38535": 28391712768.0, "38540": 28391712768.0, "38545": 28391712768.0, "38550": 28391712768.0, "38555": 28391712768.0, "38560": 28391712768.0, "38565": 28391712768.0, "38570": 28391712768.0, "38575": 28391712768.0, "38580": 28391712768.0, "38585": 28391712768.0, "38590": 28391712768.0, "38595": 28391712768.0, "38600": 28391712768.0, "38605": 28391712768.0, "38610": 28391712768.0, "38615": 28391712768.0, "38620": 28391712768.0, "38625": 28391712768.0, "38630": 28391712768.0, "38635": 28391712768.0, "38640": 28391712768.0, "38645": 28391712768.0, "38650": 28391712768.0, "38655": 28391712768.0, "38660": 28391712768.0, "38665": 28391712768.0, "38670": 28391712768.0, "38675": 28391712768.0, "38680": 28391712768.0, "38685": 28391712768.0, "38690": 28391712768.0, "38695": 28391712768.0, "38700": 28391712768.0, "38705": 28391712768.0, "38710": 28391712768.0, "38715": 28391712768.0, "38720": 28391712768.0, "38725": 28391712768.0, "38730": 28391712768.0, "38735": 28391712768.0, "38740": 28391712768.0, "38745": 28391712768.0, "38750": 28391712768.0, "38755": 28391712768.0, "38760": 28391712768.0, "38765": 28391712768.0, "38770": 28391712768.0, "38775": 28391712768.0, "38780": 28391712768.0, "38785": 28391712768.0, "38790": 28391712768.0, "38795": 28391712768.0, "38800": 28391712768.0, "38805": 28391712768.0, "38810": 28391712768.0, "38815": 28391712768.0, "38820": 28391712768.0, "38825": 28391712768.0, "38830": 28391712768.0, "38835": 28391712768.0, "38840": 28391712768.0, "38845": 28391712768.0, "38850": 28391712768.0, "38855": 28391712768.0, "38860": 28391712768.0, "38865": 28391712768.0, "38870": 28391712768.0, "38875": 28391712768.0, "38880": 28391712768.0, "38885": 28391712768.0, "38890": 28391712768.0, "38895": 28391712768.0, "38900": 28391712768.0, "38905": 28391712768.0, "38910": 28391712768.0, "38915": 28391712768.0, "38920": 28391712768.0, "38925": 28391712768.0, "38930": 28391712768.0, "38935": 28391712768.0, "38940": 28391712768.0, "38945": 28391712768.0, "38950": 28391712768.0, "38955": 28391712768.0, "38960": 28391712768.0, "38965": 28391712768.0, "38970": 28391712768.0, "38975": 28391712768.0, "38980": 28391712768.0, "38985": 28391712768.0, "38990": 28391712768.0, "38995": 28391712768.0, "39000": 28391712768.0, "39005": 28391712768.0, "39010": 28391712768.0, "39015": 28391712768.0, "39020": 28391712768.0, "39025": 28391712768.0, "39030": 28391712768.0, "39035": 28391712768.0, "39040": 28391712768.0, "39045": 28391712768.0, "39050": 28391712768.0, "39055": 28391712768.0, "39060": 28391712768.0, "39065": 28391712768.0, "39070": 28391712768.0, "39075": 28391712768.0, "39080": 28391712768.0, "39085": 28391712768.0, "39090": 28391712768.0, "39095": 28391712768.0, "39100": 28391712768.0, "39105": 28391712768.0, "39110": 28391712768.0, "39115": 28391712768.0, "39120": 28391712768.0, "39125": 28391712768.0, "39130": 28391712768.0, "39135": 28391712768.0, "39140": 28391712768.0, "39145": 28391712768.0, "39150": 28391712768.0, "39155": 28391712768.0, "39160": 28391712768.0, "39165": 28391712768.0, "39170": 28391712768.0, "39175": 28391712768.0, "39180": 28391712768.0, "39185": 28391712768.0, "39190": 28391712768.0, "39195": 28391712768.0, "39200": 28391712768.0, "39205": 28391712768.0, "39210": 28391712768.0, "39215": 28391712768.0, "39220": 28391712768.0, "39225": 28391712768.0, "39230": 28391712768.0, "39235": 28391712768.0, "39240": 28391712768.0, "39245": 28391712768.0, "39250": 28391712768.0, "39255": 28391712768.0, "39260": 28391712768.0, "39265": 28391712768.0, "39270": 28391712768.0, "39275": 28391712768.0, "39280": 28391712768.0, "39285": 28391712768.0, "39290": 28391712768.0, "39295": 28391712768.0, "39300": 28391712768.0, "39305": 28391712768.0, "39310": 28391712768.0, "39315": 28391712768.0, "39320": 28391712768.0, "39325": 28391712768.0, "39330": 28391712768.0, "39335": 28391712768.0, "39340": 28391712768.0, "39345": 28391712768.0, "39350": 28391712768.0, "39355": 28391712768.0, "39360": 28391712768.0, "39365": 28391712768.0, "39370": 28391712768.0, "39375": 28391712768.0, "39380": 28391712768.0, "39385": 28391712768.0, "39390": 28391712768.0, "39395": 28391712768.0, "39400": 28391712768.0, "39405": 28391712768.0, "39410": 28391712768.0, "39415": 28391712768.0, "39420": 28391712768.0, "39425": 28391712768.0, "39430": 28391712768.0, "39435": 28391712768.0, "39440": 28391712768.0, "39445": 28391712768.0, "39450": 28391712768.0, "39455": 28391712768.0, "39460": 28391712768.0, "39465": 28391712768.0, "39470": 28391712768.0, "39475": 28391712768.0, "39480": 28391712768.0, "39485": 28391712768.0, "39490": 28391712768.0, "39495": 28391712768.0, "39500": 28391712768.0, "39505": 28391712768.0, "39510": 28391712768.0, "39515": 28391712768.0, "39520": 28391712768.0, "39525": 28391712768.0, "39530": 28391712768.0, "39535": 28391712768.0, "39540": 28391712768.0, "39545": 28391712768.0, "39550": 28391712768.0, "39555": 28391712768.0, "39560": 28391712768.0, "39565": 28391712768.0, "39570": 28391712768.0, "39575": 28391712768.0, "39580": 28391712768.0, "39585": 28391712768.0, "39590": 28391712768.0, "39595": 28391712768.0, "39600": 28391712768.0, "39605": 28391712768.0, "39610": 28391712768.0, "39615": 28391712768.0, "39620": 28391712768.0, "39625": 28391712768.0, "39630": 28391712768.0, "39635": 28391712768.0, "39640": 28391712768.0, "39645": 28391712768.0, "39650": 28391712768.0, "39655": 28391712768.0, "39660": 28391712768.0, "39665": 28391712768.0, "39670": 28391712768.0, "39675": 28391712768.0, "39680": 28391712768.0, "39685": 28391712768.0, "39690": 28391712768.0, "39695": 28391712768.0, "39700": 28391712768.0, "39705": 28391712768.0, "39710": 28391712768.0, "39715": 28391712768.0, "39720": 28391712768.0, "39725": 28391712768.0, "39730": 28391712768.0, "39735": 28391712768.0, "39740": 28391712768.0, "39745": 28391712768.0, "39750": 28391712768.0, "39755": 28391712768.0, "39760": 28391712768.0, "39765": 28391712768.0, "39770": 28391712768.0, "39775": 28391712768.0, "39780": 28391712768.0, "39785": 28391712768.0, "39790": 28391712768.0, "39795": 28391712768.0, "39800": 28391712768.0, "39805": 28391712768.0, "39810": 28391712768.0, "39815": 28391712768.0, "39820": 28391712768.0, "39825": 28391712768.0, "39830": 28391712768.0, "39835": 28391712768.0, "39840": 28391712768.0, "39845": 28391712768.0, "39850": 28391712768.0, "39855": 28391712768.0, "39860": 28391712768.0, "39865": 28391712768.0, "39870": 28391712768.0, "39875": 28391712768.0, "39880": 28391712768.0, "39885": 28391712768.0, "39890": 28391712768.0, "39895": 28391712768.0, "39900": 28391712768.0, "39905": 28391712768.0, "39910": 28391712768.0, "39915": 28391712768.0, "39920": 28391712768.0, "39925": 28391712768.0, "39930": 28391712768.0, "39935": 28391712768.0, "39940": 28391712768.0, "39945": 28391712768.0, "39950": 28391712768.0, "39955": 28391712768.0, "39960": 28391712768.0, "39965": 28391712768.0, "39970": 28391712768.0, "39975": 28391712768.0, "39980": 28391712768.0, "39985": 28391712768.0, "39990": 28391712768.0, "39995": 28391712768.0, "40000": 28391712768.0, "40005": 28391712768.0, "40010": 28391712768.0, "40015": 28391712768.0, "40020": 28391712768.0, "40025": 28391712768.0, "40030": 28391712768.0, "40035": 28391712768.0, "40040": 28391712768.0, "40045": 28391712768.0, "40050": 28391712768.0, "40055": 28391712768.0, "40060": 28391712768.0, "40065": 28391712768.0, "40070": 28391712768.0, "40075": 28391712768.0, "40080": 28391712768.0, "40085": 28391712768.0, "40090": 28391712768.0, "40095": 28391712768.0, "40100": 28391712768.0, "40105": 28391712768.0, "40110": 28391712768.0, "40115": 28391712768.0, "40120": 28391712768.0, "40125": 28391712768.0, "40130": 28391712768.0, "40135": 28391712768.0, "40140": 28391712768.0, "40145": 28391712768.0, "40150": 28391712768.0, "40155": 28391712768.0, "40160": 28391712768.0, "40165": 28391712768.0, "40170": 28391712768.0, "40175": 28391712768.0, "40180": 28391712768.0, "40185": 28391712768.0, "40190": 28391712768.0, "40195": 28391712768.0, "40200": 28391712768.0, "40205": 28391712768.0, "40210": 28391712768.0, "40215": 28391712768.0, "40220": 28391712768.0, "40225": 28391712768.0, "40230": 28391712768.0, "40235": 28391712768.0, "40240": 28391712768.0, "40245": 28391712768.0, "40250": 28391712768.0, "40255": 28391712768.0, "40260": 28391712768.0, "40265": 28391712768.0, "40270": 28391712768.0, "40275": 28391712768.0, "40280": 28391712768.0, "40285": 28391712768.0, "40290": 28391712768.0, "40295": 28391712768.0, "40300": 28391712768.0, "40305": 28391712768.0, "40310": 28391712768.0, "40315": 28391712768.0, "40320": 28391712768.0, "40325": 28391712768.0, "40330": 28391712768.0, "40335": 28391712768.0, "40340": 28391712768.0, "40345": 28391712768.0, "40350": 28391712768.0, "40355": 28391712768.0, "40360": 28391712768.0, "40365": 28391712768.0, "40370": 28391712768.0, "40375": 28391712768.0, "40380": 28391712768.0, "40385": 28391712768.0, "40390": 28391712768.0, "40395": 28391712768.0, "40400": 28391712768.0, "40405": 28391712768.0, "40410": 28391712768.0, "40415": 28391712768.0, "40420": 28391712768.0, "40425": 28391712768.0, "40430": 28391712768.0, "40435": 28391712768.0, "40440": 28391712768.0, "40445": 28391712768.0, "40450": 28391712768.0, "40455": 28391712768.0, "40460": 28391712768.0, "40465": 28391712768.0, "40470": 28391712768.0, "40475": 28391712768.0, "40480": 28391712768.0, "40485": 28391712768.0, "40490": 28391712768.0, "40495": 28391712768.0, "40500": 28391712768.0, "40505": 28391712768.0, "40510": 28391712768.0, "40515": 28391712768.0, "40520": 28391712768.0, "40525": 28391712768.0, "40530": 28391712768.0, "40535": 28391712768.0, "40540": 28391712768.0, "40545": 28391712768.0, "40550": 28391712768.0, "40555": 28391712768.0, "40560": 28391712768.0, "40565": 28391712768.0, "40570": 28391712768.0, "40575": 28391712768.0, "40580": 28391712768.0, "40585": 28391712768.0, "40590": 28391712768.0, "40595": 28391712768.0, "40600": 28391712768.0, "40605": 28391712768.0, "40610": 28391712768.0, "40615": 28391712768.0, "40620": 28391712768.0, "40625": 28391712768.0, "40630": 28391712768.0, "40635": 28391712768.0, "40640": 28391712768.0, "40645": 28391712768.0, "40650": 28391712768.0, "40655": 28391712768.0, "40660": 28391712768.0, "40665": 28391712768.0, "40670": 28391712768.0, "40675": 28391712768.0, "40680": 28391712768.0, "40685": 28391712768.0, "40690": 28391712768.0, "40695": 28391712768.0, "40700": 28391712768.0, "40705": 28391712768.0, "40710": 28391712768.0, "40715": 28391712768.0, "40720": 28391712768.0, "40725": 28391712768.0, "40730": 28391712768.0, "40735": 28391712768.0, "40740": 28391712768.0, "40745": 28391712768.0, "40750": 28391712768.0, "40755": 28391712768.0, "40760": 28391712768.0, "40765": 28391712768.0, "40770": 28391712768.0, "40775": 28391712768.0, "40780": 28391712768.0, "40785": 28391712768.0, "40790": 28391712768.0, "40795": 28391712768.0, "40800": 28391712768.0, "40805": 28391712768.0, "40810": 28391712768.0, "40815": 28391712768.0, "40820": 28391712768.0, "40825": 28391712768.0, "40830": 28391712768.0, "40835": 28391712768.0, "40840": 28391712768.0, "40845": 28391712768.0, "40850": 28391712768.0, "40855": 28391712768.0, "40860": 28391712768.0, "40865": 28391712768.0, "40870": 28391712768.0, "40875": 28391712768.0, "40880": 28391712768.0, "40885": 28391712768.0, "40890": 28391712768.0, "40895": 28391712768.0, "40900": 28391712768.0, "40905": 28391712768.0, "40910": 28391712768.0, "40915": 28391712768.0, "40920": 28391712768.0, "40925": 28391712768.0, "40930": 28391712768.0, "40935": 28391712768.0, "40940": 28391712768.0, "40945": 28391712768.0, "40950": 28391712768.0, "40955": 28391712768.0, "40960": 28391712768.0, "40965": 28391712768.0, "40970": 28391712768.0, "40975": 28391712768.0, "40980": 28391712768.0, "40985": 28391712768.0, "40990": 28391712768.0, "40995": 28391712768.0, "41000": 28391712768.0, "41005": 28391712768.0, "41010": 28391712768.0, "41015": 28391712768.0, "41020": 28391712768.0, "41025": 28391712768.0, "41030": 28391712768.0, "41035": 28391712768.0, "41040": 28391712768.0, "41045": 28391712768.0, "41050": 28391712768.0, "41055": 28391712768.0, "41060": 28391712768.0, "41065": 28391712768.0, "41070": 28391712768.0, "41075": 28391712768.0, "41080": 28391712768.0, "41085": 28391712768.0, "41090": 28391712768.0, "41095": 28391712768.0, "41100": 28391712768.0, "41105": 28391712768.0, "41110": 28391712768.0, "41115": 28391712768.0, "41120": 28391712768.0, "41125": 28391712768.0, "41130": 28391712768.0, "41135": 28391712768.0, "41140": 28391712768.0, "41145": 28391712768.0, "41150": 28391712768.0, "41155": 28391712768.0, "41160": 28391712768.0, "41165": 28391712768.0, "41170": 28391712768.0, "41175": 28391712768.0, "41180": 28391712768.0, "41185": 28391712768.0, "41190": 28391712768.0, "41195": 28391712768.0, "41200": 28391712768.0, "41205": 28391712768.0, "41210": 28391712768.0, "41215": 28391712768.0, "41220": 28391712768.0, "41225": 28391712768.0, "41230": 28391712768.0, "41235": 28391712768.0, "41240": 28391712768.0, "41245": 28391712768.0, "41250": 28391712768.0, "41255": 28391712768.0, "41260": 28391712768.0, "41265": 28391712768.0, "41270": 28391712768.0, "41275": 28391712768.0, "41280": 28391712768.0, "41285": 28391712768.0, "41290": 28391712768.0, "41295": 28391712768.0, "41300": 28391712768.0, "41305": 28391712768.0, "41310": 28391712768.0, "41315": 28391712768.0, "41320": 28391712768.0, "41325": 28391712768.0, "41330": 28391712768.0, "41335": 28391712768.0, "41340": 28391712768.0, "41345": 28391712768.0, "41350": 28391712768.0, "41355": 28391712768.0, "41360": 28391712768.0, "41365": 28391712768.0, "41370": 28391712768.0, "41375": 28391712768.0, "41380": 28391712768.0, "41385": 28391712768.0, "41390": 28391712768.0, "41395": 28391712768.0, "41400": 28391712768.0, "41405": 28391712768.0, "41410": 28391712768.0, "41415": 28391712768.0, "41420": 28391712768.0, "41425": 28391712768.0, "41430": 28391712768.0, "41435": 28391712768.0, "41440": 28391712768.0, "41445": 28391712768.0, "41450": 28391712768.0, "41455": 28391712768.0, "41460": 28391712768.0, "41465": 28391712768.0, "41470": 28391712768.0, "41475": 28391712768.0, "41480": 28391712768.0, "41485": 28391712768.0, "41490": 28391712768.0, "41495": 28391712768.0, "41500": 28391712768.0, "41505": 28391712768.0, "41510": 28391712768.0, "41515": 28391712768.0, "41520": 28391712768.0, "41525": 28391712768.0, "41530": 28391712768.0, "41535": 28391712768.0, "41540": 28391712768.0, "41545": 28391712768.0, "41550": 28391712768.0, "41555": 28391712768.0, "41560": 28391712768.0, "41565": 28391712768.0, "41570": 28391712768.0, "41575": 28391712768.0, "41580": 28391712768.0, "41585": 28391712768.0, "41590": 28391712768.0, "41595": 28391712768.0, "41600": 28391712768.0, "41605": 28391712768.0, "41610": 28391712768.0, "41615": 28391712768.0, "41620": 28391712768.0, "41625": 28391712768.0, "41630": 28391712768.0, "41635": 28391712768.0, "41640": 28391712768.0, "41645": 28391712768.0, "41650": 28391712768.0, "41655": 28391712768.0, "41660": 28391712768.0, "41665": 28391712768.0, "41670": 28391712768.0, "41675": 28391712768.0, "41680": 28391712768.0, "41685": 28391712768.0, "41690": 28391712768.0, "41695": 28391712768.0, "41700": 28391712768.0, "41705": 28391712768.0, "41710": 28391712768.0, "41715": 28391712768.0, "41720": 28391712768.0, "41725": 28391712768.0, "41730": 28391712768.0, "41735": 28391712768.0, "41740": 28391712768.0, "41745": 28391712768.0, "41750": 28391712768.0, "41755": 28391712768.0, "41760": 28391712768.0, "41765": 28391712768.0, "41770": 28391712768.0, "41775": 28391712768.0, "41780": 28391712768.0, "41785": 28391712768.0, "41790": 28391712768.0, "41795": 28391712768.0, "41800": 28391712768.0, "41805": 28391712768.0, "41810": 28391712768.0, "41815": 28391712768.0, "41820": 28391712768.0, "41825": 28391712768.0, "41830": 28391712768.0, "41835": 28391712768.0, "41840": 28391712768.0, "41845": 28391712768.0, "41850": 28391712768.0, "41855": 28391712768.0, "41860": 28391712768.0, "41865": 28391712768.0, "41870": 28391712768.0, "41875": 28391712768.0, "41880": 28391712768.0, "41885": 28391712768.0, "41890": 28391712768.0, "41895": 28391712768.0, "41900": 28391712768.0, "41905": 28391712768.0, "41910": 28391712768.0, "41915": 28391712768.0, "41920": 28391712768.0, "41925": 28391712768.0, "41930": 28391712768.0, "41935": 28391712768.0, "41940": 28391712768.0, "41945": 28391712768.0, "41950": 28391712768.0, "41955": 28391712768.0, "41960": 28391712768.0, "41965": 28391712768.0, "41970": 28391712768.0, "41975": 28391712768.0, "41980": 28391712768.0, "41985": 28391712768.0, "41990": 28391712768.0, "41995": 28391712768.0, "42000": 28391712768.0, "42005": 28391712768.0, "42010": 28391712768.0, "42015": 28391712768.0, "42020": 28391712768.0, "42025": 28391712768.0, "42030": 28391712768.0, "42035": 28391712768.0, "42040": 28391712768.0, "42045": 28391712768.0, "42050": 28391712768.0, "42055": 28391712768.0, "42060": 28391712768.0, "42065": 28391712768.0, "42070": 28391712768.0, "42075": 28391712768.0, "42080": 28391712768.0, "42085": 28391712768.0, "42090": 28391712768.0, "42095": 28391712768.0, "42100": 28391712768.0, "42105": 28391712768.0, "42110": 28391712768.0, "42115": 28391712768.0, "42120": 28391712768.0, "42125": 28391712768.0, "42130": 28391712768.0, "42135": 28391712768.0, "42140": 28391712768.0, "42145": 28391712768.0, "42150": 28391712768.0, "42155": 28391712768.0, "42160": 28391712768.0, "42165": 28391712768.0, "42170": 28391712768.0, "42175": 28391712768.0, "42180": 28391712768.0, "42185": 28391712768.0, "42190": 28391712768.0, "42195": 28391712768.0, "42200": 28391712768.0, "42205": 28391712768.0, "42210": 28391712768.0, "42215": 28391712768.0, "42220": 28391712768.0, "42225": 28391712768.0, "42230": 28391712768.0, "42235": 28391712768.0, "42240": 28391712768.0, "42245": 28391712768.0, "42250": 28391712768.0, "42255": 28391712768.0, "42260": 28391712768.0, "42265": 28391712768.0, "42270": 28391712768.0, "42275": 28391712768.0, "42280": 28391712768.0, "42285": 28391712768.0, "42290": 28391712768.0, "42295": 28391712768.0, "42300": 28391712768.0, "42305": 28391712768.0, "42310": 28391712768.0, "42315": 28391712768.0, "42320": 28391712768.0, "42325": 28391712768.0, "42330": 28391712768.0, "42335": 28391712768.0, "42340": 28391712768.0, "42345": 28391712768.0, "42350": 28391712768.0, "42355": 28391712768.0, "42360": 28391712768.0, "42365": 28391712768.0, "42370": 28391712768.0, "42375": 28391712768.0, "42380": 28391712768.0, "42385": 28391712768.0, "42390": 28391712768.0, "42395": 28391712768.0, "42400": 28391712768.0, "42405": 28391712768.0, "42410": 28391712768.0, "42415": 28391712768.0, "42420": 28391712768.0, "42425": 28391712768.0, "42430": 28391712768.0, "42435": 28391712768.0, "42440": 28391712768.0, "42445": 28391712768.0, "42450": 28391712768.0, "42455": 28391712768.0, "42460": 28391712768.0, "42465": 28391712768.0, "42470": 28391712768.0, "42475": 28391712768.0, "42480": 28391712768.0, "42485": 28391712768.0, "42490": 28391712768.0, "42495": 28391712768.0, "42500": 28391712768.0, "42505": 28391712768.0, "42510": 28391712768.0, "42515": 28391712768.0, "42520": 28391712768.0, "42525": 28391712768.0, "42530": 28391712768.0, "42535": 28391712768.0, "42540": 28391712768.0, "42545": 28391712768.0, "42550": 28391712768.0, "42555": 28391712768.0, "42560": 28391712768.0, "42565": 28391712768.0, "42570": 28391712768.0, "42575": 28391712768.0, "42580": 28391712768.0, "42585": 28391712768.0, "42590": 28391712768.0, "42595": 28391712768.0, "42600": 28391712768.0, "42605": 28391712768.0, "42610": 28391712768.0, "42615": 28391712768.0, "42620": 28391712768.0, "42625": 28391712768.0, "42630": 28391712768.0, "42635": 28391712768.0, "42640": 28391712768.0, "42645": 28391712768.0, "42650": 28391712768.0, "42655": 28391712768.0, "42660": 28391712768.0, "42665": 28391712768.0, "42670": 28391712768.0, "42675": 28391712768.0, "42680": 28391712768.0, "42685": 28391712768.0, "42690": 28391712768.0, "42695": 28391712768.0, "42700": 28391712768.0, "42705": 28391712768.0, "42710": 28391712768.0, "42715": 28391712768.0, "42720": 28391712768.0, "42725": 28391712768.0, "42730": 28391712768.0, "42735": 28391712768.0, "42740": 28391712768.0, "42745": 28391712768.0, "42750": 28391712768.0, "42755": 28391712768.0, "42760": 28391712768.0, "42765": 28391712768.0, "42770": 28391712768.0, "42775": 28391712768.0, "42780": 28391712768.0, "42785": 28391712768.0, "42790": 28391712768.0, "42795": 28391712768.0, "42800": 28391712768.0, "42805": 28391712768.0, "42810": 28391712768.0, "42815": 28391712768.0, "42820": 28391712768.0, "42825": 28391712768.0, "42830": 28391712768.0, "42835": 28391712768.0, "42840": 28391712768.0, "42845": 28391712768.0, "42850": 28391712768.0, "42855": 28391712768.0, "42860": 28391712768.0, "42865": 28391712768.0, "42870": 28391712768.0, "42875": 28391712768.0, "42880": 28391712768.0, "42885": 28391712768.0, "42890": 28391712768.0, "42895": 28391712768.0, "42900": 28391712768.0, "42905": 28391712768.0, "42910": 28391712768.0, "42915": 28391712768.0, "42920": 28391712768.0, "42925": 28391712768.0, "42930": 28391712768.0, "42935": 28391712768.0, "42940": 28391712768.0, "42945": 28391712768.0, "42950": 28391712768.0, "42955": 28391712768.0, "42960": 28391712768.0, "42965": 28391712768.0, "42970": 28391712768.0, "42975": 28391712768.0, "42980": 28391712768.0, "42985": 28391712768.0, "42990": 28391712768.0, "42995": 28391712768.0, "43000": 28391712768.0, "43005": 28391712768.0, "43010": 28391712768.0, "43015": 28391712768.0, "43020": 28391712768.0, "43025": 28391712768.0, "43030": 28391712768.0, "43035": 28391712768.0, "43040": 28391712768.0, "43045": 28391712768.0, "43050": 28391712768.0, "43055": 28391712768.0, "43060": 28391712768.0, "43065": 28391712768.0, "43070": 28391712768.0, "43075": 28391712768.0, "43080": 28391712768.0, "43085": 28391712768.0, "43090": 28391712768.0, "43095": 28391712768.0, "43100": 28391712768.0, "43105": 28391712768.0, "43110": 28391712768.0, "43115": 28391712768.0, "43120": 28391712768.0, "43125": 28391712768.0, "43130": 28391712768.0, "43135": 28391712768.0, "43140": 28391712768.0, "43145": 28391712768.0, "43150": 28391712768.0, "43155": 28391712768.0, "43160": 28391712768.0, "43165": 28391712768.0, "43170": 28391712768.0, "43175": 28391712768.0, "43180": 28391712768.0, "43185": 28391712768.0, "43190": 28391712768.0, "43195": 28391712768.0, "43200": 28391712768.0, "43205": 28391712768.0, "43210": 28391712768.0, "43215": 28391712768.0, "43220": 28391712768.0, "43225": 28391712768.0, "43230": 28391712768.0, "43235": 28391712768.0, "43240": 28391712768.0, "43245": 28391712768.0, "43250": 28391712768.0, "43255": 28391712768.0, "43260": 28391712768.0, "43265": 28391712768.0, "43270": 28391712768.0, "43275": 28391712768.0, "43280": 28391712768.0, "43285": 28391712768.0, "43290": 28391712768.0, "43295": 28391712768.0, "43300": 28391712768.0, "43305": 28391712768.0, "43310": 28391712768.0, "43315": 28391712768.0, "43320": 28391712768.0, "43325": 28391712768.0, "43330": 28391712768.0, "43335": 28391712768.0, "43340": 28391712768.0, "43345": 28391712768.0, "43350": 28391712768.0, "43355": 28391712768.0, "43360": 28391712768.0, "43365": 28391712768.0, "43370": 28391712768.0, "43375": 28391712768.0, "43380": 28391712768.0, "43385": 28391712768.0, "43390": 28391712768.0, "43395": 28391712768.0, "43400": 28391712768.0, "43405": 28391712768.0, "43410": 28391712768.0, "43415": 28391712768.0, "43420": 28391712768.0, "43425": 28391712768.0, "43430": 28391712768.0, "43435": 28391712768.0, "43440": 28391712768.0, "43445": 28391712768.0, "43450": 28391712768.0, "43455": 28391712768.0, "43460": 28391712768.0, "43465": 28391712768.0, "43470": 28391712768.0, "43475": 28391712768.0, "43480": 28391712768.0, "43485": 28391712768.0, "43490": 28391712768.0, "43495": 28391712768.0, "43500": 28391712768.0, "43505": 28391712768.0, "43510": 28391712768.0, "43515": 28391712768.0, "43520": 28391712768.0, "43525": 28391712768.0, "43530": 28391712768.0, "43535": 28391712768.0, "43540": 28391712768.0, "43545": 28391712768.0, "43550": 28391712768.0, "43555": 28391712768.0, "43560": 28391712768.0, "43565": 28391712768.0, "43570": 28391712768.0, "43575": 28391712768.0, "43580": 28391712768.0, "43585": 28391712768.0, "43590": 28391712768.0, "43595": 28391712768.0, "43600": 28391712768.0, "43605": 28391712768.0, "43610": 28391712768.0, "43615": 28391712768.0, "43620": 28391712768.0, "43625": 28391712768.0, "43630": 28391712768.0, "43635": 28391712768.0, "43640": 28391712768.0, "43645": 28391712768.0, "43650": 28391712768.0, "43655": 28391712768.0, "43660": 28391712768.0, "43665": 28391712768.0, "43670": 28391712768.0, "43675": 28391712768.0, "43680": 28391712768.0, "43685": 28391712768.0, "43690": 28391712768.0, "43695": 28391712768.0, "43700": 28391712768.0, "43705": 28391712768.0, "43710": 28391712768.0, "43715": 28391712768.0, "43720": 28391712768.0, "43725": 28391712768.0, "43730": 28391712768.0, "43735": 28391712768.0, "43740": 28391712768.0, "43745": 28391712768.0, "43750": 28391712768.0, "43755": 28391712768.0, "43760": 28391712768.0, "43765": 28391712768.0, "43770": 28391712768.0, "43775": 28391712768.0, "43780": 28391712768.0, "43785": 28391712768.0, "43790": 28391712768.0, "43795": 28391712768.0, "43800": 28391712768.0, "43805": 28391712768.0, "43810": 28391712768.0, "43815": 28391712768.0, "43820": 28391712768.0, "43825": 28391712768.0, "43830": 28391712768.0, "43835": 28391712768.0, "43840": 28391712768.0, "43845": 28391712768.0, "43850": 28391712768.0, "43855": 28391712768.0, "43860": 28391712768.0, "43865": 28391712768.0, "43870": 28391712768.0, "43875": 28391712768.0, "43880": 28391712768.0, "43885": 28391712768.0, "43890": 28391712768.0, "43895": 28391712768.0, "43900": 28391712768.0, "43905": 28391712768.0, "43910": 28391712768.0, "43915": 28391712768.0, "43920": 28391712768.0, "43925": 28391712768.0, "43930": 28391712768.0, "43935": 28391712768.0, "43940": 28391712768.0, "43945": 28391712768.0, "43950": 28391712768.0, "43955": 28391712768.0, "43960": 28391712768.0, "43965": 28391712768.0, "43970": 28391712768.0, "43975": 28391712768.0, "43980": 28391712768.0, "43985": 28391712768.0, "43990": 28391712768.0, "43995": 28391712768.0, "44000": 28391712768.0, "44005": 28391712768.0, "44010": 28391712768.0, "44015": 28391712768.0, "44020": 28391712768.0, "44025": 28391712768.0, "44030": 28391712768.0, "44035": 28391712768.0, "44040": 28391712768.0, "44045": 28391712768.0, "44050": 28391712768.0, "44055": 28391712768.0, "44060": 28391712768.0, "44065": 28391712768.0, "44070": 28391712768.0, "44075": 28391712768.0, "44080": 28391712768.0, "44085": 28391712768.0, "44090": 28391712768.0, "44095": 28391712768.0, "44100": 28391712768.0, "44105": 28391712768.0, "44110": 28391712768.0, "44115": 28391712768.0, "44120": 28391712768.0, "44125": 28391712768.0, "44130": 28391712768.0, "44135": 28391712768.0, "44140": 28391712768.0, "44145": 28391712768.0, "44150": 28391712768.0, "44155": 28391712768.0, "44160": 28391712768.0, "44165": 28391712768.0, "44170": 28391712768.0, "44175": 28391712768.0, "44180": 28391712768.0, "44185": 28391712768.0, "44190": 28391712768.0, "44195": 28391712768.0, "44200": 28391712768.0, "44205": 28391712768.0, "44210": 28391712768.0, "44215": 28391712768.0, "44220": 28391712768.0, "44225": 28391712768.0, "44230": 28391712768.0, "44235": 28391712768.0, "44240": 28391712768.0, "44245": 28391712768.0, "44250": 28391712768.0, "44255": 28391712768.0, "44260": 28391712768.0, "44265": 28391712768.0, "44270": 28391712768.0, "44275": 28391712768.0, "44280": 28391712768.0, "44285": 28391712768.0, "44290": 28391712768.0, "44295": 28391712768.0, "44300": 28391712768.0, "44305": 28391712768.0, "44310": 28391712768.0, "44315": 28391712768.0, "44320": 28391712768.0, "44325": 28391712768.0, "44330": 28391712768.0, "44335": 28391712768.0, "44340": 28391712768.0, "44345": 28391712768.0, "44350": 28391712768.0, "44355": 28391712768.0, "44360": 28391712768.0, "44365": 28391712768.0, "44370": 28391712768.0, "44375": 28391712768.0, "44380": 28391712768.0, "44385": 28391712768.0, "44390": 28391712768.0, "44395": 28391712768.0, "44400": 28391712768.0, "44405": 28391712768.0, "44410": 28391712768.0, "44415": 28391712768.0, "44420": 28391712768.0, "44425": 28391712768.0, "44430": 28391712768.0, "44435": 28391712768.0, "44440": 28391712768.0, "44445": 28391712768.0, "44450": 28391712768.0, "44455": 28391712768.0, "44460": 28391712768.0, "44465": 28391712768.0, "44470": 28391712768.0, "44475": 28391712768.0, "44480": 28391712768.0, "44485": 28391712768.0, "44490": 28391712768.0, "44495": 28391712768.0, "44500": 28391712768.0, "44505": 28391712768.0, "44510": 28391712768.0, "44515": 28391712768.0, "44520": 28391712768.0, "44525": 28391712768.0, "44530": 28391712768.0, "44535": 28391712768.0, "44540": 28391712768.0, "44545": 28391712768.0, "44550": 28391712768.0, "44555": 28391712768.0, "44560": 28391712768.0, "44565": 28391712768.0, "44570": 28391712768.0, "44575": 28391712768.0, "44580": 28391712768.0, "44585": 28391712768.0, "44590": 28391712768.0, "44595": 28391712768.0, "44600": 28391712768.0, "44605": 28391712768.0, "44610": 28391712768.0, "44615": 28391712768.0, "44620": 28391712768.0, "44625": 28391712768.0, "44630": 28391712768.0, "44635": 28391712768.0, "44640": 28391712768.0, "44645": 28391712768.0, "44650": 28391712768.0, "44655": 28391712768.0, "44660": 28391712768.0, "44665": 28391712768.0, "44670": 28391712768.0, "44675": 28391712768.0, "44680": 28391712768.0, "44685": 28391712768.0, "44690": 28391712768.0, "44695": 28391712768.0, "44700": 28391712768.0, "44705": 28391712768.0, "44710": 28391712768.0, "44715": 28391712768.0, "44720": 28391712768.0, "44725": 28391712768.0, "44730": 28391712768.0, "44735": 28391712768.0, "44740": 28391712768.0, "44745": 28391712768.0, "44750": 28391712768.0, "44755": 28391712768.0, "44760": 28391712768.0, "44765": 28391712768.0, "44770": 28391712768.0, "44775": 28391712768.0, "44780": 28391712768.0, "44785": 28391712768.0, "44790": 28391712768.0, "44795": 28391712768.0, "44800": 28391712768.0, "44805": 28391712768.0, "44810": 28391712768.0, "44815": 28391712768.0, "44820": 28391712768.0, "44825": 28391712768.0, "44830": 28391712768.0, "44835": 28391712768.0, "44840": 28391712768.0, "44845": 28391712768.0, "44850": 28391712768.0, "44855": 28391712768.0, "44860": 28391712768.0, "44865": 28391712768.0, "44870": 28391712768.0, "44875": 28391712768.0, "44880": 28391712768.0, "44885": 28391712768.0, "44890": 28391712768.0, "44895": 28391712768.0, "44900": 28391712768.0, "44905": 28391712768.0, "44910": 28391712768.0, "44915": 28391712768.0, "44920": 28391712768.0, "44925": 28391712768.0, "44930": 28391712768.0, "44935": 28391712768.0, "44940": 28391712768.0, "44945": 28391712768.0, "44950": 28391712768.0, "44955": 28391712768.0, "44960": 28391712768.0, "44965": 28391712768.0, "44970": 28391712768.0, "44975": 28391712768.0, "44980": 28391712768.0, "44985": 28391712768.0, "44990": 28391712768.0, "44995": 28391712768.0, "45000": 28391712768.0, "45005": 28391712768.0, "45010": 28391712768.0, "45015": 28391712768.0, "45020": 28391712768.0, "45025": 28391712768.0, "45030": 28391712768.0, "45035": 28391712768.0, "45040": 28391712768.0, "45045": 28391712768.0, "45050": 28391712768.0, "45055": 28391712768.0, "45060": 28391712768.0, "45065": 28391712768.0, "45070": 28391712768.0, "45075": 28391712768.0, "45080": 28391712768.0, "45085": 28391712768.0, "45090": 28391712768.0, "45095": 28391712768.0, "45100": 28391712768.0, "45105": 28391712768.0, "45110": 28391712768.0, "45115": 28391712768.0, "45120": 28391712768.0, "45125": 28391712768.0, "45130": 28391712768.0, "45135": 28391712768.0, "45140": 28391712768.0, "45145": 28391712768.0, "45150": 28391712768.0, "45155": 28391712768.0, "45160": 28391712768.0, "45165": 28391712768.0, "45170": 28391712768.0, "45175": 28391712768.0, "45180": 28391712768.0, "45185": 28391712768.0, "45190": 28391712768.0, "45195": 28391712768.0, "45200": 28391712768.0, "45205": 28391712768.0, "45210": 28391712768.0, "45215": 28391712768.0, "45220": 28391712768.0, "45225": 28391712768.0, "45230": 28391712768.0, "45235": 28391712768.0, "45240": 28391712768.0, "45245": 28391712768.0, "45250": 28391712768.0, "45255": 28391712768.0, "45260": 28391712768.0, "45265": 28391712768.0, "45270": 28391712768.0, "45275": 28391712768.0, "45280": 28391712768.0, "45285": 28391712768.0, "45290": 28391712768.0, "45295": 28391712768.0, "45300": 28391712768.0, "45305": 28391712768.0, "45310": 28391712768.0, "45315": 28391712768.0, "45320": 28391712768.0, "45325": 28391712768.0, "45330": 28391712768.0, "45335": 28391712768.0, "45340": 28391712768.0, "45345": 28391712768.0, "45350": 28391712768.0, "45355": 28391712768.0, "45360": 28391712768.0, "45365": 28391712768.0, "45370": 28391712768.0, "45375": 28391712768.0, "45380": 28391712768.0, "45385": 28391712768.0, "45390": 28391712768.0, "45395": 28391712768.0, "45400": 28391712768.0, "45405": 28391712768.0, "45410": 28391712768.0, "45415": 28391712768.0, "45420": 28391712768.0, "45425": 28391712768.0, "45430": 28391712768.0, "45435": 28391712768.0, "45440": 28391712768.0, "45445": 28391712768.0, "45450": 28391712768.0, "45455": 28391712768.0, "45460": 28391712768.0, "45465": 28391712768.0, "45470": 28391712768.0, "45475": 28391712768.0, "45480": 28391712768.0, "45485": 28391712768.0, "45490": 28391712768.0, "45495": 28391712768.0, "45500": 28391712768.0, "45505": 28391712768.0, "45510": 28391712768.0, "45515": 28391712768.0, "45520": 28391712768.0, "45525": 28391712768.0, "45530": 28391712768.0, "45535": 28391712768.0, "45540": 28391712768.0, "45545": 28391712768.0, "45550": 28391712768.0, "45555": 28391712768.0, "45560": 28391712768.0, "45565": 28391712768.0, "45570": 28391712768.0, "45575": 28391712768.0, "45580": 28391712768.0, "45585": 28391712768.0, "45590": 28391712768.0, "45595": 28391712768.0, "45600": 28391712768.0, "45605": 28391712768.0, "45610": 28391712768.0, "45615": 28391712768.0, "45620": 28391712768.0, "45625": 28391712768.0, "45630": 28391712768.0, "45635": 28391712768.0, "45640": 28391712768.0, "45645": 28391712768.0, "45650": 28391712768.0, "45655": 28391712768.0, "45660": 28391712768.0, "45665": 28391712768.0, "45670": 28391712768.0, "45675": 28391712768.0, "45680": 28391712768.0, "45685": 28391712768.0, "45690": 28391712768.0, "45695": 28391712768.0, "45700": 28391712768.0, "45705": 28391712768.0, "45710": 28391712768.0, "45715": 28391712768.0, "45720": 28391712768.0, "45725": 28391712768.0, "45730": 28391712768.0, "45735": 28391712768.0, "45740": 28391712768.0, "45745": 28391712768.0, "45750": 28391712768.0, "45755": 28391712768.0, "45760": 28391712768.0, "45765": 28391712768.0, "45770": 28391712768.0, "45775": 28391712768.0, "45780": 28391712768.0, "45785": 28391712768.0, "45790": 28391712768.0, "45795": 28391712768.0, "45800": 28391712768.0, "45805": 28391712768.0, "45810": 28391712768.0, "45815": 28391712768.0, "45820": 28391712768.0, "45825": 28391712768.0, "45830": 28391712768.0, "45835": 28391712768.0, "45840": 28391712768.0, "45845": 28391712768.0, "45850": 28391712768.0, "45855": 28391712768.0, "45860": 28391712768.0, "45865": 28391712768.0, "45870": 28391712768.0, "45875": 28391712768.0, "45880": 28391712768.0, "45885": 28391712768.0, "45890": 28391712768.0, "45895": 28391712768.0, "45900": 28391712768.0, "45905": 28391712768.0, "45910": 28391712768.0, "45915": 28391712768.0, "45920": 28391712768.0, "45925": 28391712768.0, "45930": 28391712768.0, "45935": 28391712768.0, "45940": 28391712768.0, "45945": 28391712768.0, "45950": 28391712768.0, "45955": 28391712768.0, "45960": 28391712768.0, "45965": 28391712768.0, "45970": 28391712768.0, "45975": 28391712768.0, "45980": 28391712768.0, "45985": 28391712768.0, "45990": 28391712768.0, "45995": 28391712768.0, "46000": 28391712768.0, "46005": 28391712768.0, "46010": 28391712768.0, "46015": 28391712768.0, "46020": 28391712768.0, "46025": 28391712768.0, "46030": 28391712768.0, "46035": 28391712768.0, "46040": 28391712768.0, "46045": 28391712768.0, "46050": 28391712768.0, "46055": 28391712768.0, "46060": 28391712768.0, "46065": 28391712768.0, "46070": 28391712768.0, "46075": 28391712768.0, "46080": 28391712768.0, "46085": 28391712768.0, "46090": 28391712768.0, "46095": 28391712768.0, "46100": 28391712768.0, "46105": 28391712768.0, "46110": 28391712768.0, "46115": 28391712768.0, "46120": 28391712768.0, "46125": 28391712768.0, "46130": 28391712768.0, "46135": 28391712768.0, "46140": 28391712768.0, "46145": 28391712768.0, "46150": 28391712768.0, "46155": 28391712768.0, "46160": 28391712768.0, "46165": 28391712768.0, "46170": 28391712768.0, "46175": 28391712768.0, "46180": 28391712768.0, "46185": 28391712768.0, "46190": 28391712768.0, "46195": 28391712768.0, "46200": 28391712768.0, "46205": 28391712768.0, "46210": 28391712768.0, "46215": 28391712768.0, "46220": 28391712768.0, "46225": 28391712768.0, "46230": 28391712768.0, "46235": 28391712768.0, "46240": 28391712768.0, "46245": 28391712768.0, "46250": 28391712768.0, "46255": 28391712768.0, "46260": 28391712768.0, "46265": 28391712768.0, "46270": 28391712768.0, "46275": 28391712768.0, "46280": 28391712768.0, "46285": 28391712768.0, "46290": 28391712768.0, "46295": 28391712768.0, "46300": 28391712768.0, "46305": 28391712768.0, "46310": 28391712768.0, "46315": 28391712768.0, "46320": 28391712768.0, "46325": 28391712768.0, "46330": 28391712768.0, "46335": 28391712768.0, "46340": 28391712768.0, "46345": 28391712768.0, "46350": 28391712768.0, "46355": 28391712768.0, "46360": 28391712768.0, "46365": 28391712768.0, "46370": 28391712768.0, "46375": 28391712768.0, "46380": 28391712768.0, "46385": 28391712768.0, "46390": 28391712768.0, "46395": 28391712768.0, "46400": 28391712768.0, "46405": 28391712768.0, "46410": 28391712768.0, "46415": 28391712768.0, "46420": 28391712768.0, "46425": 28391712768.0, "46430": 28391712768.0, "46435": 28391712768.0, "46440": 28391712768.0, "46445": 28391712768.0, "46450": 28391712768.0, "46455": 28391712768.0, "46460": 28391712768.0, "46465": 28391712768.0, "46470": 28391712768.0, "46475": 28391712768.0, "46480": 28391712768.0, "46485": 28391712768.0, "46490": 28391712768.0, "46495": 28391712768.0, "46500": 28391712768.0, "46505": 28391712768.0, "46510": 28391712768.0, "46515": 28391712768.0, "46520": 28391712768.0, "46525": 28391712768.0, "46530": 28391712768.0, "46535": 28391712768.0, "46540": 28391712768.0, "46545": 28391712768.0, "46550": 28391712768.0, "46555": 28391712768.0, "46560": 28391712768.0, "46565": 28391712768.0, "46570": 28391712768.0, "46575": 28391712768.0, "46580": 28391712768.0, "46585": 28391712768.0, "46590": 28391712768.0, "46595": 28391712768.0, "46600": 28391712768.0, "46605": 28391712768.0, "46610": 28391712768.0, "46615": 28391712768.0, "46620": 28391712768.0, "46625": 28391712768.0, "46630": 28391712768.0, "46635": 28391712768.0, "46640": 28391712768.0, "46645": 28391712768.0, "46650": 28391712768.0, "46655": 28391712768.0, "46660": 28391712768.0, "46665": 28391712768.0, "46670": 28391712768.0, "46675": 28391712768.0, "46680": 28391712768.0, "46685": 28391712768.0, "46690": 28391712768.0, "46695": 28391712768.0, "46700": 28391712768.0, "46705": 28391712768.0, "46710": 28391712768.0, "46715": 28391712768.0, "46720": 28391712768.0, "46725": 28391712768.0, "46730": 28391712768.0, "46735": 28391712768.0, "46740": 28391712768.0, "46745": 28391712768.0, "46750": 28391712768.0, "46755": 28391712768.0, "46760": 28391712768.0, "46765": 28391712768.0, "46770": 28391712768.0, "46775": 28391712768.0, "46780": 28391712768.0, "46785": 28391712768.0, "46790": 28391712768.0, "46795": 28391712768.0, "46800": 28391712768.0, "46805": 28391712768.0, "46810": 28391712768.0, "46815": 28391712768.0, "46820": 28391712768.0, "46825": 28391712768.0, "46830": 28391712768.0, "46835": 28391712768.0, "46840": 28391712768.0, "46845": 28391712768.0, "46850": 28391712768.0, "46855": 28391712768.0, "46860": 28391712768.0, "46865": 28391712768.0, "46870": 28391712768.0, "46875": 28391712768.0, "46880": 28391712768.0, "46885": 28391712768.0, "46890": 28391712768.0, "46895": 28391712768.0, "46900": 28391712768.0, "46905": 28391712768.0, "46910": 28391712768.0, "46915": 28391712768.0, "46920": 28391712768.0, "46925": 28391712768.0, "46930": 28391712768.0, "46935": 28391712768.0, "46940": 28391712768.0, "46945": 28391712768.0, "46950": 28391712768.0, "46955": 28391712768.0, "46960": 28391712768.0, "46965": 28391712768.0, "46970": 28391712768.0, "46975": 28391712768.0, "46980": 28391712768.0, "46985": 28391712768.0, "46990": 28391712768.0, "46995": 28391712768.0, "47000": 28391712768.0, "47005": 28391712768.0, "47010": 28391712768.0, "47015": 28391712768.0, "47020": 28391712768.0, "47025": 28391712768.0, "47030": 28391712768.0, "47035": 28391712768.0, "47040": 28391712768.0, "47045": 28391712768.0, "47050": 28391712768.0, "47055": 28391712768.0, "47060": 28391712768.0, "47065": 28391712768.0, "47070": 28391712768.0, "47075": 28391712768.0, "47080": 28391712768.0, "47085": 28391712768.0, "47090": 28391712768.0, "47095": 28391712768.0, "47100": 28391712768.0, "47105": 28391712768.0, "47110": 28391712768.0, "47115": 28391712768.0, "47120": 28391712768.0, "47125": 28391712768.0, "47130": 28391712768.0, "47135": 28391712768.0, "47140": 28391712768.0, "47145": 28391712768.0, "47150": 28391712768.0, "47155": 28391712768.0, "47160": 28391712768.0, "47165": 28391712768.0, "47170": 28391712768.0, "47175": 28391712768.0, "47180": 28391712768.0, "47185": 28391712768.0, "47190": 28391712768.0, "47195": 28391712768.0, "47200": 28391712768.0, "47205": 28391712768.0, "47210": 28391712768.0, "47215": 28391712768.0, "47220": 28391712768.0, "47225": 28391712768.0, "47230": 28391712768.0, "47235": 28391712768.0, "47240": 28391712768.0, "47245": 28391712768.0, "47250": 28391712768.0, "47255": 28391712768.0, "47260": 28391712768.0, "47265": 28391712768.0, "47270": 28391712768.0, "47275": 28391712768.0, "47280": 28391712768.0, "47285": 28391712768.0, "47290": 28391712768.0, "47295": 28391712768.0, "47300": 28391712768.0, "47305": 28391712768.0, "47310": 28391712768.0, "47315": 28391712768.0, "47320": 28391712768.0, "47325": 28391712768.0, "47330": 28391712768.0, "47335": 28391712768.0, "47340": 28391712768.0, "47345": 28391712768.0, "47350": 28391712768.0, "47355": 28391712768.0, "47360": 28391712768.0, "47365": 28391712768.0, "47370": 28391712768.0, "47375": 28391712768.0, "47380": 28391712768.0, "47385": 28391712768.0, "47390": 28391712768.0, "47395": 28391712768.0, "47400": 28391712768.0, "47405": 28391712768.0, "47410": 28391712768.0, "47415": 28391712768.0, "47420": 28391712768.0, "47425": 28391712768.0, "47430": 28391712768.0, "47435": 28391712768.0, "47440": 28391712768.0, "47445": 28391712768.0, "47450": 28391712768.0, "47455": 28391712768.0, "47460": 28391712768.0, "47465": 28391712768.0, "47470": 28391712768.0, "47475": 28391712768.0, "47480": 28391712768.0, "47485": 28391712768.0, "47490": 28391712768.0, "47495": 28391712768.0, "47500": 28391712768.0, "47505": 28391712768.0, "47510": 28391712768.0, "47515": 28391712768.0, "47520": 28391712768.0, "47525": 28391712768.0, "47530": 28391712768.0, "47535": 28391712768.0, "47540": 28391712768.0, "47545": 28391712768.0, "47550": 28391712768.0, "47555": 28391712768.0, "47560": 28391712768.0, "47565": 28391712768.0, "47570": 28391712768.0, "47575": 28391712768.0, "47580": 28391712768.0, "47585": 28391712768.0, "47590": 28391712768.0, "47595": 28391712768.0, "47600": 28391712768.0, "47605": 28391712768.0, "47610": 28391712768.0, "47615": 28391712768.0, "47620": 28391712768.0, "47625": 28391712768.0, "47630": 28391712768.0, "47635": 28391712768.0, "47640": 28391712768.0, "47645": 28391712768.0, "47650": 28391712768.0, "47655": 28391712768.0, "47660": 28391712768.0, "47665": 28391712768.0, "47670": 28391712768.0, "47675": 28391712768.0, "47680": 28391712768.0, "47685": 28391712768.0, "47690": 28391712768.0, "47695": 28391712768.0, "47700": 28391712768.0, "47705": 28391712768.0, "47710": 28391712768.0, "47715": 28391712768.0, "47720": 28391712768.0, "47725": 28391712768.0, "47730": 28391712768.0, "47735": 28391712768.0, "47740": 28391712768.0, "47745": 28391712768.0, "47750": 28391712768.0, "47755": 28391712768.0, "47760": 28391712768.0, "47765": 28391712768.0, "47770": 28391712768.0, "47775": 28391712768.0, "47780": 28391712768.0, "47785": 28391712768.0, "47790": 28391712768.0, "47795": 28391712768.0, "47800": 28391712768.0, "47805": 28391712768.0, "47810": 28391712768.0, "47815": 28391712768.0, "47820": 28391712768.0, "47825": 28391712768.0, "47830": 28391712768.0, "47835": 28391712768.0, "47840": 28391712768.0, "47845": 28391712768.0, "47850": 28391712768.0, "47855": 28391712768.0, "47860": 28391712768.0, "47865": 28391712768.0, "47870": 28391712768.0, "47875": 28391712768.0, "47880": 28391712768.0, "47885": 28391712768.0, "47890": 28391712768.0, "47895": 28391712768.0, "47900": 28391712768.0, "47905": 28391712768.0, "47910": 28391712768.0, "47915": 28391712768.0, "47920": 28391712768.0, "47925": 28391712768.0, "47930": 28391712768.0, "47935": 28391712768.0, "47940": 28391712768.0, "47945": 28391712768.0, "47950": 28391712768.0, "47955": 28391712768.0, "47960": 28391712768.0, "47965": 28391712768.0, "47970": 28391712768.0, "47975": 28391712768.0, "47980": 28391712768.0, "47985": 28391712768.0, "47990": 28391712768.0, "47995": 28391712768.0, "48000": 28391712768.0, "48005": 28391712768.0, "48010": 28391712768.0, "48015": 28391712768.0, "48020": 28391712768.0, "48025": 28391712768.0, "48030": 28391712768.0, "48035": 28391712768.0, "48040": 28391712768.0, "48045": 28391712768.0, "48050": 28391712768.0, "48055": 28391712768.0, "48060": 28391712768.0, "48065": 28391712768.0, "48070": 28391712768.0, "48075": 28391712768.0, "48080": 28391712768.0, "48085": 28391712768.0, "48090": 28391712768.0, "48095": 28391712768.0, "48100": 28391712768.0, "48105": 28391712768.0, "48110": 28391712768.0, "48115": 28391712768.0, "48120": 28391712768.0, "48125": 28391712768.0, "48130": 28391712768.0, "48135": 28391712768.0, "48140": 28391712768.0, "48145": 28391712768.0, "48150": 28391712768.0, "48155": 28391712768.0, "48160": 28391712768.0, "48165": 28391712768.0, "48170": 28391712768.0, "48175": 28391712768.0, "48180": 28391712768.0, "48185": 28391712768.0, "48190": 28391712768.0, "48195": 28391712768.0, "48200": 28391712768.0, "48205": 28391712768.0, "48210": 28391712768.0, "48215": 28391712768.0, "48220": 28391712768.0, "48225": 28391712768.0, "48230": 28391712768.0, "48235": 28391712768.0, "48240": 28391712768.0, "48245": 28391712768.0, "48250": 28391712768.0, "48255": 28391712768.0, "48260": 28391712768.0, "48265": 28391712768.0, "48270": 28391712768.0, "48275": 28391712768.0, "48280": 28391712768.0, "48285": 28391712768.0, "48290": 28391712768.0, "48295": 28391712768.0, "48300": 28391712768.0, "48305": 28391712768.0, "48310": 28391712768.0, "48315": 28391712768.0, "48320": 28391712768.0, "48325": 28391712768.0, "48330": 28391712768.0, "48335": 28391712768.0, "48340": 28391712768.0, "48345": 28391712768.0, "48350": 28391712768.0, "48355": 28391712768.0, "48360": 28391712768.0, "48365": 28391712768.0, "48370": 28391712768.0, "48375": 28391712768.0, "48380": 28391712768.0, "48385": 28391712768.0, "48390": 28391712768.0, "48395": 28391712768.0, "48400": 28391712768.0, "48405": 28391712768.0, "48410": 28391712768.0, "48415": 28391712768.0, "48420": 28391712768.0, "48425": 28391712768.0, "48430": 28391712768.0, "48435": 28391712768.0, "48440": 28391712768.0, "48445": 28391712768.0, "48450": 28391712768.0, "48455": 28391712768.0, "48460": 28391712768.0, "48465": 28391712768.0, "48470": 28391712768.0, "48475": 28391712768.0, "48480": 28391712768.0, "48485": 28391712768.0, "48490": 28391712768.0, "48495": 28391712768.0, "48500": 28391712768.0, "48505": 28391712768.0, "48510": 28391712768.0, "48515": 28391712768.0, "48520": 28391712768.0, "48525": 28391712768.0, "48530": 28391712768.0, "48535": 28391712768.0, "48540": 28391712768.0, "48545": 28391712768.0, "48550": 28391712768.0, "48555": 28391712768.0, "48560": 28391712768.0, "48565": 28391712768.0, "48570": 28391712768.0, "48575": 28391712768.0, "48580": 28391712768.0, "48585": 28391712768.0, "48590": 28391712768.0, "48595": 28391712768.0, "48600": 28391712768.0, "48605": 28391712768.0, "48610": 28391712768.0, "48615": 28391712768.0, "48620": 28391712768.0, "48625": 28391712768.0, "48630": 28391712768.0, "48635": 28391712768.0, "48640": 28391712768.0, "48645": 28391712768.0, "48650": 28391712768.0, "48655": 28391712768.0, "48660": 28391712768.0, "48665": 28391712768.0, "48670": 28391712768.0, "48675": 28391712768.0, "48680": 28391712768.0, "48685": 28391712768.0, "48690": 28391712768.0, "48695": 28391712768.0, "48700": 28391712768.0, "48705": 28391712768.0, "48710": 28391712768.0, "48715": 28391712768.0, "48720": 28391712768.0, "48725": 28391712768.0, "48730": 28391712768.0, "48735": 28391712768.0, "48740": 28391712768.0, "48745": 28391712768.0, "48750": 28391712768.0, "48755": 28391712768.0, "48760": 28391712768.0, "48765": 28391712768.0, "48770": 28391712768.0, "48775": 28391712768.0, "48780": 28391712768.0, "48785": 28391712768.0, "48790": 28391712768.0, "48795": 28391712768.0, "48800": 28391712768.0, "48805": 28391712768.0, "48810": 28391712768.0, "48815": 28391712768.0, "48820": 28391712768.0, "48825": 28391712768.0, "48830": 28391712768.0, "48835": 28391712768.0, "48840": 28391712768.0, "48845": 28391712768.0, "48850": 28391712768.0, "48855": 28391712768.0, "48860": 28391712768.0, "48865": 28391712768.0, "48870": 28391712768.0, "48875": 28391712768.0, "48880": 28391712768.0, "48885": 28391712768.0, "48890": 28391712768.0, "48895": 28391712768.0, "48900": 28391712768.0, "48905": 28391712768.0, "48910": 28391712768.0, "48915": 28391712768.0, "48920": 28391712768.0, "48925": 28391712768.0, "48930": 28391712768.0, "48935": 28391712768.0, "48940": 28391712768.0, "48945": 28391712768.0, "48950": 28391712768.0, "48955": 28391712768.0, "48960": 28391712768.0, "48965": 28391712768.0, "48970": 28391712768.0, "48975": 28391712768.0, "48980": 28391712768.0, "48985": 28391712768.0, "48990": 28391712768.0, "48995": 28391712768.0, "49000": 28391712768.0, "49005": 28391712768.0, "49010": 28391712768.0, "49015": 28391712768.0, "49020": 28391712768.0, "49025": 28391712768.0, "49030": 28391712768.0, "49035": 28391712768.0, "49040": 28391712768.0, "49045": 28391712768.0, "49050": 28391712768.0, "49055": 28391712768.0, "49060": 28391712768.0, "49065": 28391712768.0, "49070": 28391712768.0, "49075": 28391712768.0, "49080": 28391712768.0, "49085": 28391712768.0, "49090": 28391712768.0, "49095": 28391712768.0, "49100": 28391712768.0, "49105": 28391712768.0, "49110": 28391712768.0, "49115": 28391712768.0, "49120": 28391712768.0, "49125": 28391712768.0, "49130": 28391712768.0, "49135": 28391712768.0, "49140": 28391712768.0, "49145": 28391712768.0, "49150": 28391712768.0, "49155": 28391712768.0, "49160": 28391712768.0, "49165": 28391712768.0, "49170": 28391712768.0, "49175": 28391712768.0, "49180": 28391712768.0, "49185": 28391712768.0, "49190": 28391712768.0, "49195": 28391712768.0, "49200": 28391712768.0, "49205": 28391712768.0, "49210": 28391712768.0, "49215": 28391712768.0, "49220": 28391712768.0, "49225": 28391712768.0, "49230": 28391712768.0, "49235": 28391712768.0, "49240": 28391712768.0, "49245": 28391712768.0, "49250": 28391712768.0, "49255": 28391712768.0, "49260": 28391712768.0, "49265": 28391712768.0, "49270": 28391712768.0, "49275": 28391712768.0, "49280": 28391712768.0, "49285": 28391712768.0, "49290": 28391712768.0, "49295": 28391712768.0, "49300": 28391712768.0, "49305": 28391712768.0, "49310": 28391712768.0, "49315": 28391712768.0, "49320": 28391712768.0, "49325": 28391712768.0, "49330": 28391712768.0, "49335": 28391712768.0, "49340": 28391712768.0, "49345": 28391712768.0, "49350": 28391712768.0, "49355": 28391712768.0, "49360": 28391712768.0, "49365": 28391712768.0, "49370": 28391712768.0, "49375": 28391712768.0, "49380": 28391712768.0, "49385": 28391712768.0, "49390": 28391712768.0, "49395": 28391712768.0, "49400": 28391712768.0, "49405": 28391712768.0, "49410": 28391712768.0, "49415": 28391712768.0, "49420": 28391712768.0, "49425": 28391712768.0, "49430": 28391712768.0, "49435": 28391712768.0, "49440": 28391712768.0, "49445": 28391712768.0, "49450": 28391712768.0, "49455": 28391712768.0, "49460": 28391712768.0, "49465": 28391712768.0, "49470": 28391712768.0, "49475": 28391712768.0, "49480": 28391712768.0, "49485": 28391712768.0, "49490": 28391712768.0, "49495": 28391712768.0, "49500": 28391712768.0, "49505": 28391712768.0, "49510": 28391712768.0, "49515": 28391712768.0, "49520": 28391712768.0, "49525": 28391712768.0, "49530": 28391712768.0, "49535": 28391712768.0, "49540": 28391712768.0, "49545": 28391712768.0, "49550": 28391712768.0, "49555": 28391712768.0, "49560": 28391712768.0, "49565": 28391712768.0, "49570": 28391712768.0, "49575": 28391712768.0, "49580": 28391712768.0, "49585": 28391712768.0, "49590": 28391712768.0, "49595": 28391712768.0, "49600": 28391712768.0, "49605": 28391712768.0, "49610": 28391712768.0, "49615": 28391712768.0, "49620": 28391712768.0, "49625": 28391712768.0, "49630": 28391712768.0, "49635": 28391712768.0, "49640": 28391712768.0, "49645": 28391712768.0, "49650": 28391712768.0, "49655": 28391712768.0, "49660": 28391712768.0, "49665": 28391712768.0, "49670": 28391712768.0, "49675": 28391712768.0, "49680": 28391712768.0, "49685": 28391712768.0, "49690": 28391712768.0, "49695": 28391712768.0, "49700": 28391712768.0, "49705": 28391712768.0, "49710": 28391712768.0, "49715": 28391712768.0, "49720": 28391712768.0, "49725": 28391712768.0, "49730": 28391712768.0, "49735": 28391712768.0, "49740": 28391712768.0, "49745": 28391712768.0, "49750": 28391712768.0, "49755": 28391712768.0, "49760": 28391712768.0, "49765": 28391712768.0, "49770": 28391712768.0, "49775": 28391712768.0, "49780": 28391712768.0, "49785": 28391712768.0, "49790": 28391712768.0, "49795": 28391712768.0, "49800": 28391712768.0, "49805": 28391712768.0, "49810": 28391712768.0, "49815": 28391712768.0, "49820": 28391712768.0, "49825": 28391712768.0, "49830": 28391712768.0, "49835": 28391712768.0, "49840": 28391712768.0, "49845": 28391712768.0, "49850": 28391712768.0, "49855": 28391712768.0, "49860": 28391712768.0, "49865": 28391712768.0, "49870": 28391712768.0, "49875": 28391712768.0, "49880": 28391712768.0, "49885": 28391712768.0, "49890": 28391712768.0, "49895": 28391712768.0, "49900": 28391712768.0, "49905": 28391712768.0, "49910": 28391712768.0, "49915": 28391712768.0, "49920": 28391712768.0, "49925": 28391712768.0, "49930": 28391712768.0, "49935": 28391712768.0, "49940": 28391712768.0, "49945": 28391712768.0, "49950": 28391712768.0, "49955": 28391712768.0, "49960": 28391712768.0, "49965": 28391712768.0, "49970": 28391712768.0, "49975": 28391712768.0, "49980": 28391712768.0, "49985": 28391712768.0, "49990": 28391712768.0, "49995": 28391712768.0, "50000": 28391712768.0, "50005": 28391712768.0, "50010": 28391712768.0, "50015": 28391712768.0, "50020": 28391712768.0, "50025": 28391712768.0, "50030": 28391712768.0, "50035": 28391712768.0, "50040": 28391712768.0, "50045": 28391712768.0, "50050": 28391712768.0, "50055": 28391712768.0, "50060": 28391712768.0, "50065": 28391712768.0, "50070": 28391712768.0, "50075": 28391712768.0, "50080": 28391712768.0, "50085": 28391712768.0, "50090": 28391712768.0, "50095": 28391712768.0, "50100": 28391712768.0, "50105": 28391712768.0, "50110": 28391712768.0, "50115": 28391712768.0, "50120": 28391712768.0, "50125": 28391712768.0, "50130": 28391712768.0, "50135": 28391712768.0, "50140": 28391712768.0, "50145": 28391712768.0, "50150": 28391712768.0, "50155": 28391712768.0, "50160": 28391712768.0, "50165": 28391712768.0, "50170": 28391712768.0, "50175": 28391712768.0, "50180": 28391712768.0, "50185": 28391712768.0, "50190": 28391712768.0, "50195": 28391712768.0, "50200": 28391712768.0, "50205": 28391712768.0, "50210": 28391712768.0, "50215": 28391712768.0, "50220": 28391712768.0, "50225": 28391712768.0, "50230": 28391712768.0, "50235": 28391712768.0, "50240": 28391712768.0, "50245": 28391712768.0, "50250": 28391712768.0, "50255": 28391712768.0, "50260": 28391712768.0, "50265": 28391712768.0, "50270": 28391712768.0, "50275": 28391712768.0, "50280": 28391712768.0, "50285": 28391712768.0, "50290": 28391712768.0, "50295": 28391712768.0, "50300": 28391712768.0, "50305": 28391712768.0, "50310": 28391712768.0, "50315": 28391712768.0, "50320": 28391712768.0, "50325": 28391712768.0, "50330": 28391712768.0, "50335": 28391712768.0, "50340": 28391712768.0, "50345": 28391712768.0, "50350": 28391712768.0, "50355": 28391712768.0, "50360": 28391712768.0, "50365": 28391712768.0, "50370": 28391712768.0, "50375": 28391712768.0, "50380": 28391712768.0, "50385": 28391712768.0, "50390": 28391712768.0, "50395": 28391712768.0, "50400": 28391712768.0, "50405": 28391712768.0, "50410": 28391712768.0, "50415": 28391712768.0, "50420": 28391712768.0, "50425": 28391712768.0, "50430": 28391712768.0, "50435": 28391712768.0, "50440": 28391712768.0, "50445": 28391712768.0, "50450": 28391712768.0, "50455": 28391712768.0, "50460": 28391712768.0, "50465": 28391712768.0, "50470": 28391712768.0, "50475": 28391712768.0, "50480": 28391712768.0, "50485": 28391712768.0, "50490": 28391712768.0, "50495": 28391712768.0, "50500": 28391712768.0, "50505": 28391712768.0, "50510": 28391712768.0, "50515": 28391712768.0, "50520": 28391712768.0, "50525": 28391712768.0, "50530": 28391712768.0, "50535": 28391712768.0, "50540": 28391712768.0, "50545": 28391712768.0, "50550": 28391712768.0, "50555": 28391712768.0, "50560": 28391712768.0, "50565": 28391712768.0, "50570": 28391712768.0, "50575": 28391712768.0, "50580": 28391712768.0, "50585": 28391712768.0, "50590": 28391712768.0, "50595": 28391712768.0, "50600": 28391712768.0, "50605": 28391712768.0, "50610": 28391712768.0, "50615": 28391712768.0, "50620": 28391712768.0, "50625": 28391712768.0, "50630": 28391712768.0, "50635": 28391712768.0, "50640": 28391712768.0, "50645": 28391712768.0, "50650": 28391712768.0, "50655": 28391712768.0, "50660": 28391712768.0, "50665": 28391712768.0, "50670": 28391712768.0, "50675": 28391712768.0, "50680": 28391712768.0, "50685": 28391712768.0, "50690": 28391712768.0, "50695": 28391712768.0, "50700": 28391712768.0, "50705": 28391712768.0, "50710": 28391712768.0, "50715": 28391712768.0, "50720": 28391712768.0, "50725": 28391712768.0, "50730": 28391712768.0, "50735": 28391712768.0, "50740": 28391712768.0, "50745": 28391712768.0, "50750": 28391712768.0, "50755": 28391712768.0, "50760": 28391712768.0, "50765": 28391712768.0, "50770": 28391712768.0, "50775": 28391712768.0, "50780": 28391712768.0, "50785": 28391712768.0, "50790": 28391712768.0, "50795": 28391712768.0, "50800": 28391712768.0, "50805": 28391712768.0, "50810": 28391712768.0, "50815": 28391712768.0, "50820": 28391712768.0, "50825": 28391712768.0, "50830": 28391712768.0, "50835": 28391712768.0, "50840": 28391712768.0, "50845": 28391712768.0, "50850": 28391712768.0, "50855": 28391712768.0, "50860": 28391712768.0, "50865": "nan", "50870": "nan", "50875": "nan", "50880": "nan", "50885": "nan", "50890": "nan", "50895": "nan", "50900": "nan", "50905": "nan", "50910": "nan", "50915": "nan", "50920": "nan", "50925": "nan", "50930": "nan", "50935": "nan", "50940": "nan", "50945": "nan", "50950": "nan", "50955": "nan", "50960": "nan", "50965": "nan", "50970": "nan", "50975": "nan", "50980": "nan", "50985": "nan", "50990": "nan", "50995": "nan", "51000": "nan", "51005": "nan", "51010": "nan", "51015": "nan", "51020": "nan", "51025": "nan", "51030": "nan", "51035": "nan", "51040": "nan", "51045": "nan", "51050": "nan", "51055": "nan", "51060": "nan", "51065": "nan", "51070": "nan", "51075": "nan", "51080": "nan", "51085": "nan", "51090": "nan", "51095": "nan", "51100": "nan", "51105": "nan", "51110": "nan", "51115": "nan", "51120": "nan", "51125": "nan", "51130": "nan", "51135": "nan", "51140": "nan", "51145": "nan", "51150": "nan", "51155": "nan", "51160": "nan", "51165": "nan", "51170": "nan", "51175": "nan", "51180": "nan", "51185": "nan", "51190": "nan", "51195": "nan", "51200": "nan", "51205": "nan", "51210": "nan", "51215": "nan", "51220": "nan", "51225": "nan", "51230": "nan", "51235": "nan", "51240": "nan", "51245": "nan", "51250": "nan", "51255": "nan", "51260": "nan", "51265": "nan", "51270": "nan", "51275": "nan", "51280": "nan", "51285": "nan", "51290": "nan", "51295": "nan", "51300": "nan", "51305": "nan", "51310": "nan", "51315": "nan", "51320": "nan", "51325": "nan", "51330": "nan", "51335": "nan", "51340": "nan", "51345": "nan", "51350": "nan", "51355": "nan", "51360": "nan", "51365": "nan", "51370": "nan", "51375": "nan", "51380": "nan", "51385": "nan", "51390": "nan", "51395": "nan", "51400": "nan", "51405": "nan", "51410": "nan", "51415": "nan", "51420": "nan", "51425": "nan", "51430": "nan", "51435": "nan", "51440": "nan", "51445": "nan", "51450": "nan", "51455": "nan", "51460": "nan", "51465": "nan", "51470": "nan", "51475": "nan", "51480": "nan", "51485": "nan", "51490": "nan", "51495": "nan", "51500": "nan", "51505": "nan", "51510": "nan", "51515": "nan", "51520": "nan", "51525": "nan", "51530": "nan", "51535": "nan", "51540": "nan", "51545": "nan", "51550": "nan", "51555": "nan", "51560": "nan", "51565": "nan", "51570": "nan", "51575": "nan", "51580": "nan", "51585": "nan", "51590": "nan", "51595": "nan", "51600": "nan", "51605": "nan", "51610": "nan", "51615": "nan", "51620": "nan", "51625": "nan", "51630": "nan", "51635": "nan", "51640": "nan", "51645": "nan", "51650": "nan", "51655": "nan", "51660": "nan", "51665": "nan", "51670": "nan", "51675": "nan", "51680": "nan", "51685": "nan", "51690": "nan", "51695": "nan", "51700": "nan", "51705": "nan", "51710": "nan", "51715": "nan", "51720": "nan", "51725": "nan", "51730": "nan", "51735": "nan", "51740": "nan", "51745": "nan", "51750": "nan", "51755": "nan", "51760": "nan", "51765": "nan", "51770": "nan", "51775": "nan", "51780": "nan", "51785": "nan", "51790": "nan", "51795": "nan", "51800": "nan", "51805": "nan", "51810": "nan", "51815": "nan", "51820": "nan", "51825": "nan", "51830": "nan", "51835": "nan", "51840": "nan", "51845": "nan", "51850": "nan", "51855": "nan", "51860": "nan", "51865": "nan", "51870": "nan", "51875": "nan", "51880": "nan", "51885": "nan", "51890": "nan", "51895": "nan", "51900": "nan", "51905": "nan", "51910": "nan", "51915": "nan", "51920": "nan", "51925": "nan", "51930": "nan", "51935": "nan", "51940": "nan", "51945": "nan", "51950": "nan", "51955": "nan", "51960": "nan", "51965": "nan", "51970": "nan", "51975": "nan", "51980": "nan", "51985": "nan", "51990": "nan", "51995": "nan", "52000": "nan", "52005": "nan", "52010": "nan", "52015": "nan", "52020": "nan", "52025": "nan", "52030": "nan", "52035": "nan", "52040": "nan", "52045": "nan", "52050": "nan", "52055": "nan", "52060": "nan", "52065": "nan", "52070": "nan", "52075": "nan", "52080": "nan", "52085": "nan", "52090": "nan", "52095": "nan", "52100": "nan", "52105": "nan", "52110": "nan", "52115": "nan", "52120": "nan", "52125": "nan", "52130": "nan", "52135": "nan", "52140": "nan", "52145": "nan", "52150": "nan", "52155": "nan", "52160": "nan", "52165": "nan", "52170": "nan", "52175": "nan", "52180": "nan", "52185": "nan", "52190": "nan", "52195": "nan", "52200": "nan", "52205": "nan", "52210": "nan", "52215": "nan", "52220": "nan", "52225": "nan", "52230": "nan", "52235": "nan", "52240": "nan", "52245": "nan", "52250": "nan", "52255": "nan", "52260": "nan", "52265": "nan", "52270": "nan", "52275": "nan", "52280": "nan", "52285": "nan", "52290": "nan", "52295": "nan", "52300": "nan", "52305": "nan", "52310": "nan", "52315": "nan", "52320": "nan", "52325": "nan", "52330": "nan", "52335": "nan", "52340": "nan", "52345": "nan", "52350": "nan", "52355": "nan", "52360": "nan", "52365": "nan", "52370": "nan", "52375": "nan", "52380": "nan", "52385": "nan", "52390": "nan", "52395": "nan", "52400": "nan", "52405": "nan", "52410": "nan", "52415": "nan", "52420": "nan", "52425": "nan", "52430": "nan", "52435": "nan", "52440": "nan", "52445": "nan", "52450": "nan", "52455": "nan", "52460": "nan", "52465": "nan", "52470": "nan", "52475": "nan", "52480": "nan", "52485": "nan", "52490": "nan", "52495": "nan", "52500": "nan", "52505": "nan", "52510": "nan", "52515": "nan", "52520": "nan", "52525": "nan", "52530": "nan", "52535": "nan", "52540": "nan", "52545": "nan", "52550": "nan", "52555": "nan", "52560": "nan", "52565": "nan", "52570": "nan", "52575": "nan", "52580": "nan", "52585": "nan", "52590": "nan", "52595": "nan", "52600": "nan", "52605": "nan", "52610": "nan", "52615": "nan", "52620": "nan", "52625": "nan", "52630": "nan", "52635": "nan", "52640": "nan", "52645": "nan", "52650": "nan", "52655": "nan", "52660": "nan", "52665": "nan", "52670": "nan", "52675": "nan", "52680": "nan", "52685": "nan", "52690": "nan", "52695": "nan", "52700": "nan", "52705": "nan", "52710": "nan", "52715": "nan", "52720": "nan", "52725": "nan", "52730": "nan", "52735": "nan", "52740": "nan", "52745": "nan", "52750": "nan", "52755": "nan", "52760": "nan", "52765": "nan", "52770": "nan", "52775": "nan", "52780": "nan", "52785": "nan", "52790": "nan", "52795": "nan", "52800": "nan", "52805": "nan", "52810": "nan", "52815": "nan", "52820": "nan", "52825": "nan", "52830": "nan", "52835": "nan", "52840": "nan", "52845": "nan", "52850": "nan", "52855": "nan", "52860": "nan", "52865": "nan", "52870": "nan", "52875": "nan", "52880": "nan", "52885": "nan", "52890": "nan", "52895": "nan", "52900": "nan", "52905": "nan", "52910": "nan", "52915": "nan", "52920": "nan", "52925": "nan", "52930": "nan", "52935": "nan", "52940": "nan", "52945": "nan", "52950": "nan", "52955": "nan", "52960": "nan", "52965": "nan", "52970": "nan", "52975": "nan", "52980": "nan", "52985": "nan", "52990": "nan", "52995": "nan", "53000": "nan", "53005": "nan", "53010": "nan", "53015": "nan", "53020": "nan", "53025": "nan", "53030": "nan", "53035": "nan", "53040": "nan", "53045": "nan", "53050": "nan", "53055": "nan", "53060": "nan", "53065": "nan", "53070": "nan", "53075": "nan", "53080": "nan", "53085": "nan", "53090": "nan", "53095": "nan", "53100": "nan", "53105": "nan", "53110": "nan", "53115": "nan", "53120": "nan", "53125": "nan", "53130": "nan", "53135": "nan", "53140": "nan", "53145": "nan", "53150": "nan", "53155": "nan", "53160": "nan", "53165": "nan", "53170": "nan", "53175": "nan", "53180": "nan", "53185": "nan", "53190": "nan", "53195": "nan", "53200": "nan", "53205": "nan", "53210": "nan", "53215": "nan", "53220": "nan", "53225": "nan", "53230": "nan", "53235": "nan", "53240": "nan", "53245": "nan", "53250": "nan", "53255": "nan", "53260": "nan", "53265": "nan", "53270": "nan", "53275": "nan", "53280": "nan", "53285": "nan", "53290": "nan", "53295": "nan", "53300": "nan", "53305": "nan", "53310": "nan", "53315": "nan", "53320": "nan", "53325": "nan", "53330": "nan", "53335": "nan", "53340": "nan", "53345": "nan", "53350": "nan", "53355": "nan", "53360": "nan", "53365": "nan", "53370": "nan", "53375": "nan", "53380": "nan", "53385": "nan", "53390": "nan", "53395": "nan", "53400": "nan", "53405": "nan", "53410": "nan", "53415": "nan", "53420": "nan", "53425": "nan", "53430": "nan", "53435": "nan", "53440": "nan", "53445": "nan", "53450": "nan", "53455": "nan", "53460": "nan", "53465": "nan", "53470": "nan", "53475": "nan", "53480": "nan", "53485": "nan", "53490": "nan", "53495": "nan", "53500": "nan", "53505": "nan", "53510": "nan", "53515": "nan", "53520": "nan", "53525": "nan", "53530": "nan", "53535": "nan", "53540": "nan", "53545": "nan", "53550": "nan", "53555": "nan", "53560": "nan", "53565": "nan", "53570": "nan", "53575": "nan", "53580": "nan", "53585": "nan", "53590": "nan", "53595": "nan", "53600": "nan", "53605": "nan", "53610": "nan", "53615": "nan", "53620": "nan", "53625": "nan", "53630": "nan", "53635": "nan", "53640": "nan", "53645": "nan", "53650": "nan", "53655": "nan", "53660": "nan", "53665": "nan", "53670": "nan", "53675": "nan", "53680": "nan", "53685": "nan", "53690": "nan", "53695": "nan", "53700": "nan", "53705": "nan", "53710": "nan", "53715": "nan", "53720": "nan", "53725": "nan", "53730": "nan", "53735": "nan", "53740": "nan", "53745": "nan", "53750": "nan", "53755": "nan", "53760": "nan", "53765": "nan", "53770": "nan", "53775": "nan", "53780": "nan", "53785": "nan", "53790": "nan", "53795": "nan", "53800": "nan", "53805": "nan", "53810": "nan", "53815": "nan", "53820": "nan", "53825": "nan", "53830": "nan", "53835": "nan", "53840": "nan", "53845": "nan", "53850": "nan", "53855": "nan", "53860": "nan", "53865": "nan", "53870": "nan", "53875": "nan", "53880": "nan", "53885": "nan", "53890": "nan", "53895": "nan", "53900": "nan", "53905": "nan", "53910": "nan", "53915": "nan", "53920": "nan", "53925": "nan", "53930": "nan", "53935": "nan", "53940": "nan", "53945": "nan", "53950": "nan", "53955": "nan", "53960": "nan", "53965": "nan", "53970": "nan", "53975": "nan", "53980": "nan", "53985": "nan", "53990": "nan", "53995": "nan", "54000": "nan", "54005": "nan", "54010": "nan", "54015": "nan", "54020": "nan", "54025": "nan", "54030": "nan", "54035": "nan", "54040": "nan", "54045": "nan", "54050": "nan", "54055": "nan", "54060": "nan", "54065": "nan", "54070": "nan", "54075": "nan", "54080": "nan", "54085": "nan", "54090": "nan", "54095": "nan", "54100": "nan", "54105": "nan", "54110": "nan", "54115": "nan", "54120": "nan", "54125": "nan", "54130": "nan", "54135": "nan", "54140": "nan", "54145": "nan", "54150": "nan", "54155": "nan", "54160": "nan", "54165": "nan", "54170": "nan", "54175": "nan", "54180": "nan", "54185": "nan", "54190": "nan", "54195": "nan", "54200": "nan", "54205": "nan", "54210": "nan", "54215": "nan", "54220": "nan", "54225": "nan", "54230": "nan", "54235": "nan", "54240": "nan", "54245": "nan", "54250": "nan", "54255": "nan", "54260": "nan", "54265": "nan", "54270": "nan", "54275": "nan", "54280": "nan", "54285": "nan", "54290": "nan", "54295": "nan", "54300": "nan", "54305": "nan", "54310": "nan", "54315": "nan", "54320": "nan", "54325": "nan", "54330": "nan", "54335": "nan", "54340": "nan", "54345": "nan", "54350": "nan", "54355": "nan", "54360": "nan", "54365": "nan", "54370": "nan", "54375": "nan", "54380": "nan", "54385": "nan", "54390": "nan", "54395": "nan", "54400": "nan", "54405": "nan", "54410": "nan", "54415": "nan", "54420": "nan", "54425": "nan", "54430": "nan", "54435": "nan", "54440": "nan", "54445": "nan", "54450": "nan", "54455": "nan", "54460": "nan", "54465": "nan", "54470": "nan", "54475": "nan", "54480": "nan", "54485": "nan", "54490": "nan", "54495": "nan", "54500": "nan", "54505": "nan", "54510": "nan", "54515": "nan", "54520": "nan", "54525": "nan", "54530": "nan", "54535": "nan", "54540": "nan", "54545": "nan", "54550": "nan", "54555": "nan", "54560": "nan", "54565": "nan", "54570": "nan", "54575": "nan", "54580": "nan", "54585": "nan", "54590": "nan", "54595": "nan", "54600": "nan", "54605": "nan", "54610": "nan", "54615": "nan", "54620": "nan", "54625": "nan", "54630": "nan", "54635": "nan", "54640": "nan", "54645": "nan", "54650": "nan", "54655": "nan", "54660": "nan", "54665": "nan", "54670": "nan", "54675": "nan", "54680": "nan", "54685": "nan", "54690": "nan", "54695": "nan", "54700": "nan", "54705": "nan", "54710": "nan", "54715": "nan", "54720": "nan", "54725": "nan", "54730": "nan", "54735": "nan", "54740": "nan", "54745": "nan", "54750": "nan", "54755": "nan", "54760": "nan", "54765": "nan", "54770": "nan", "54775": "nan", "54780": "nan", "54785": "nan", "54790": "nan", "54795": "nan", "54800": "nan", "54805": "nan", "54810": "nan", "54815": "nan", "54820": "nan", "54825": "nan", "54830": "nan", "54835": "nan", "54840": "nan", "54845": "nan", "54850": "nan", "54855": "nan", "54860": "nan", "54865": "nan", "54870": "nan", "54875": "nan", "54880": "nan", "54885": "nan", "54890": "nan", "54895": "nan", "54900": "nan", "54905": "nan", "54910": "nan", "54915": "nan", "54920": "nan", "54925": "nan", "54930": "nan", "54935": "nan", "54940": "nan", "54945": "nan", "54950": "nan", "54955": "nan", "54960": "nan", "54965": "nan", "54970": "nan", "54975": "nan", "54980": "nan", "54985": "nan", "54990": "nan", "54995": "nan", "55000": "nan"}}, "iteration-time": {"start_step": 1, "end_step": 55000, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": "nan", "25": "nan", "30": "nan", "35": "nan", "40": "nan", "45": "nan", "50": "nan", "55": "nan", "60": "nan", "65": "nan", "70": "nan", "75": "nan", "80": "nan", "85": "nan", "90": "nan", "95": "nan", "100": 3.58972, "105": "nan", "110": "nan", "115": "nan", "120": "nan", "125": "nan", "130": "nan", "135": "nan", "140": "nan", "145": "nan", "150": "nan", "155": "nan", "160": "nan", "165": "nan", "170": "nan", "175": "nan", "180": "nan", "185": "nan", "190": "nan", "195": "nan", "200": 3.45801, "205": "nan", "210": "nan", "215": "nan", "220": "nan", "225": "nan", "230": "nan", "235": "nan", "240": "nan", "245": "nan", "250": "nan", "255": "nan", "260": "nan", "265": "nan", "270": "nan", "275": "nan", "280": "nan", "285": "nan", "290": "nan", "295": "nan", "300": 3.46233, "305": "nan", "310": "nan", "315": "nan", "320": "nan", "325": "nan", "330": "nan", "335": "nan", "340": "nan", "345": "nan", "350": "nan", "355": "nan", "360": "nan", "365": "nan", "370": "nan", "375": "nan", "380": "nan", "385": "nan", "390": "nan", "395": "nan", "400": 3.46338, "405": "nan", "410": "nan", "415": "nan", "420": "nan", "425": "nan", "430": "nan", "435": "nan", "440": "nan", "445": "nan", "450": "nan", "455": "nan", "460": "nan", "465": "nan", "470": "nan", "475": "nan", "480": "nan", "485": "nan", "490": "nan", "495": "nan", "500": 3.46375, "505": "nan", "510": "nan", "515": "nan", "520": "nan", "525": "nan", "530": "nan", "535": "nan", "540": "nan", "545": "nan", "550": "nan", "555": "nan", "560": "nan", "565": "nan", "570": "nan", "575": "nan", "580": "nan", "585": "nan", "590": "nan", "595": "nan", "600": 3.46329, "605": "nan", "610": "nan", "615": "nan", "620": "nan", "625": "nan", "630": "nan", "635": "nan", "640": "nan", "645": "nan", "650": "nan", "655": "nan", "660": "nan", "665": "nan", "670": "nan", "675": "nan", "680": "nan", "685": "nan", "690": "nan", "695": "nan", "700": 3.46185, "705": "nan", "710": "nan", "715": "nan", "720": "nan", "725": "nan", "730": "nan", "735": "nan", "740": "nan", "745": "nan", "750": "nan", "755": "nan", "760": "nan", "765": "nan", "770": "nan", "775": "nan", "780": "nan", "785": "nan", "790": "nan", "795": "nan", "800": 3.45881, "805": "nan", "810": "nan", "815": "nan", "820": "nan", "825": "nan", "830": "nan", "835": "nan", "840": "nan", "845": "nan", "850": "nan", "855": "nan", "860": "nan", "865": "nan", "870": "nan", "875": "nan", "880": "nan", "885": "nan", "890": "nan", "895": "nan", "900": 3.4533, "905": "nan", "910": "nan", "915": "nan", "920": "nan", "925": "nan", "930": "nan", "935": "nan", "940": "nan", "945": "nan", "950": "nan", "955": "nan", "960": "nan", "965": "nan", "970": "nan", "975": "nan", "980": "nan", "985": "nan", "990": "nan", "995": "nan", "1000": 3.44561, "1005": "nan", "1010": "nan", "1015": "nan", "1020": "nan", "1025": "nan", "1030": "nan", "1035": "nan", "1040": "nan", "1045": "nan", "1050": "nan", "1055": "nan", "1060": "nan", "1065": "nan", "1070": "nan", "1075": "nan", "1080": "nan", "1085": "nan", "1090": "nan", "1095": "nan", "1100": 3.43885, "1105": "nan", "1110": "nan", "1115": "nan", "1120": "nan", "1125": "nan", "1130": "nan", "1135": "nan", "1140": "nan", "1145": "nan", "1150": "nan", "1155": "nan", "1160": "nan", "1165": "nan", "1170": "nan", "1175": "nan", "1180": "nan", "1185": "nan", "1190": "nan", "1195": "nan", "1200": 3.43272, "1205": "nan", "1210": "nan", "1215": "nan", "1220": "nan", "1225": "nan", "1230": "nan", "1235": "nan", "1240": "nan", "1245": "nan", "1250": "nan", "1255": "nan", "1260": "nan", "1265": "nan", "1270": "nan", "1275": "nan", "1280": "nan", "1285": "nan", "1290": "nan", "1295": "nan", "1300": 3.42856, "1305": "nan", "1310": "nan", "1315": "nan", "1320": "nan", "1325": "nan", "1330": "nan", "1335": "nan", "1340": "nan", "1345": "nan", "1350": "nan", "1355": "nan", "1360": "nan", "1365": "nan", "1370": "nan", "1375": "nan", "1380": "nan", "1385": "nan", "1390": "nan", "1395": "nan", "1400": 3.4255, "1405": "nan", "1410": "nan", "1415": "nan", "1420": "nan", "1425": "nan", "1430": "nan", "1435": "nan", "1440": "nan", "1445": "nan", "1450": "nan", "1455": "nan", "1460": "nan", "1465": "nan", "1470": "nan", "1475": "nan", "1480": "nan", "1485": "nan", "1490": "nan", "1495": "nan", "1500": 3.42259, "1505": "nan", "1510": "nan", "1515": "nan", "1520": "nan", "1525": "nan", "1530": "nan", "1535": "nan", "1540": "nan", "1545": "nan", "1550": "nan", "1555": "nan", "1560": "nan", "1565": "nan", "1570": "nan", "1575": "nan", "1580": "nan", "1585": "nan", "1590": "nan", "1595": "nan", "1600": 3.41922, "1605": "nan", "1610": "nan", "1615": "nan", "1620": "nan", "1625": "nan", "1630": "nan", "1635": "nan", "1640": "nan", "1645": "nan", "1650": "nan", "1655": "nan", "1660": "nan", "1665": "nan", "1670": "nan", "1675": "nan", "1680": "nan", "1685": "nan", "1690": "nan", "1695": "nan", "1700": 3.41701, "1705": "nan", "1710": "nan", "1715": "nan", "1720": "nan", "1725": "nan", "1730": "nan", "1735": "nan", "1740": "nan", "1745": "nan", "1750": "nan", "1755": "nan", "1760": "nan", "1765": "nan", "1770": "nan", "1775": "nan", "1780": "nan", "1785": "nan", "1790": "nan", "1795": "nan", "1800": 3.41507, "1805": "nan", "1810": "nan", "1815": "nan", "1820": "nan", "1825": "nan", "1830": "nan", "1835": "nan", "1840": "nan", "1845": "nan", "1850": "nan", "1855": "nan", "1860": "nan", "1865": "nan", "1870": "nan", "1875": "nan", "1880": "nan", "1885": "nan", "1890": "nan", "1895": "nan", "1900": 3.41436, "1905": "nan", "1910": "nan", "1915": "nan", "1920": "nan", "1925": "nan", "1930": "nan", "1935": "nan", "1940": "nan", "1945": "nan", "1950": "nan", "1955": "nan", "1960": "nan", "1965": "nan", "1970": "nan", "1975": "nan", "1980": "nan", "1985": "nan", "1990": "nan", "1995": "nan", "2000": 3.41226, "2005": "nan", "2010": "nan", "2015": "nan", "2020": "nan", "2025": "nan", "2030": "nan", "2035": "nan", "2040": "nan", "2045": "nan", "2050": "nan", "2055": "nan", "2060": "nan", "2065": "nan", "2070": "nan", "2075": "nan", "2080": "nan", "2085": "nan", "2090": "nan", "2095": "nan", "2100": 3.41166, "2105": "nan", "2110": "nan", "2115": "nan", "2120": "nan", "2125": "nan", "2130": "nan", "2135": "nan", "2140": "nan", "2145": "nan", "2150": "nan", "2155": "nan", "2160": "nan", "2165": "nan", "2170": "nan", "2175": "nan", "2180": "nan", "2185": "nan", "2190": "nan", "2195": "nan", "2200": 3.41146, "2205": "nan", "2210": "nan", "2215": "nan", "2220": "nan", "2225": "nan", "2230": "nan", "2235": "nan", "2240": "nan", "2245": "nan", "2250": "nan", "2255": "nan", "2260": "nan", "2265": "nan", "2270": "nan", "2275": "nan", "2280": "nan", "2285": "nan", "2290": "nan", "2295": "nan", "2300": 3.41116, "2305": "nan", "2310": "nan", "2315": "nan", "2320": "nan", "2325": "nan", "2330": "nan", "2335": "nan", "2340": "nan", "2345": "nan", "2350": "nan", "2355": "nan", "2360": "nan", "2365": "nan", "2370": "nan", "2375": "nan", "2380": "nan", "2385": "nan", "2390": "nan", "2395": "nan", "2400": 3.4101, "2405": "nan", "2410": "nan", "2415": "nan", "2420": "nan", "2425": "nan", "2430": "nan", "2435": "nan", "2440": "nan", "2445": "nan", "2450": "nan", "2455": "nan", "2460": "nan", "2465": "nan", "2470": "nan", "2475": "nan", "2480": "nan", "2485": "nan", "2490": "nan", "2495": "nan", "2500": 3.40882, "2505": "nan", "2510": "nan", "2515": "nan", "2520": "nan", "2525": "nan", "2530": "nan", "2535": "nan", "2540": "nan", "2545": "nan", "2550": "nan", "2555": "nan", "2560": "nan", "2565": "nan", "2570": "nan", "2575": "nan", "2580": "nan", "2585": "nan", "2590": "nan", "2595": "nan", "2600": 3.40801, "2605": "nan", "2610": "nan", "2615": "nan", "2620": "nan", "2625": "nan", "2630": "nan", "2635": "nan", "2640": "nan", "2645": "nan", "2650": "nan", "2655": "nan", "2660": "nan", "2665": "nan", "2670": "nan", "2675": "nan", "2680": "nan", "2685": "nan", "2690": "nan", "2695": "nan", "2700": 3.40701, "2705": "nan", "2710": "nan", "2715": "nan", "2720": "nan", "2725": "nan", "2730": "nan", "2735": "nan", "2740": "nan", "2745": "nan", "2750": "nan", "2755": "nan", "2760": "nan", "2765": "nan", "2770": "nan", "2775": "nan", "2780": "nan", "2785": "nan", "2790": "nan", "2795": "nan", "2800": 3.40764, "2805": "nan", "2810": "nan", "2815": "nan", "2820": "nan", "2825": "nan", "2830": "nan", "2835": "nan", "2840": "nan", "2845": "nan", "2850": "nan", "2855": "nan", "2860": "nan", "2865": "nan", "2870": "nan", "2875": "nan", "2880": "nan", "2885": "nan", "2890": "nan", "2895": "nan", "2900": 3.40618, "2905": "nan", "2910": "nan", "2915": "nan", "2920": "nan", "2925": "nan", "2930": "nan", "2935": "nan", "2940": "nan", "2945": "nan", "2950": "nan", "2955": "nan", "2960": "nan", "2965": "nan", "2970": "nan", "2975": "nan", "2980": "nan", "2985": "nan", "2990": "nan", "2995": "nan", "3000": 3.40525, "3005": "nan", "3010": "nan", "3015": "nan", "3020": "nan", "3025": "nan", "3030": "nan", "3035": "nan", "3040": "nan", "3045": "nan", "3050": "nan", "3055": "nan", "3060": "nan", "3065": "nan", "3070": "nan", "3075": "nan", "3080": "nan", "3085": "nan", "3090": "nan", "3095": "nan", "3100": 3.4044, "3105": "nan", "3110": "nan", "3115": "nan", "3120": "nan", "3125": "nan", "3130": "nan", "3135": "nan", "3140": "nan", "3145": "nan", "3150": "nan", "3155": "nan", "3160": "nan", "3165": "nan", "3170": "nan", "3175": "nan", "3180": "nan", "3185": "nan", "3190": "nan", "3195": "nan", "3200": 3.40457, "3205": "nan", "3210": "nan", "3215": "nan", "3220": "nan", "3225": "nan", "3230": "nan", "3235": "nan", "3240": "nan", "3245": "nan", "3250": "nan", "3255": "nan", "3260": "nan", "3265": "nan", "3270": "nan", "3275": "nan", "3280": "nan", "3285": "nan", "3290": "nan", "3295": "nan", "3300": 3.40551, "3305": "nan", "3310": "nan", "3315": "nan", "3320": "nan", "3325": "nan", "3330": "nan", "3335": "nan", "3340": "nan", "3345": "nan", "3350": "nan", "3355": "nan", "3360": "nan", "3365": "nan", "3370": "nan", "3375": "nan", "3380": "nan", "3385": "nan", "3390": "nan", "3395": "nan", "3400": 3.40428, "3405": "nan", "3410": "nan", "3415": "nan", "3420": "nan", "3425": "nan", "3430": "nan", "3435": "nan", "3440": "nan", "3445": "nan", "3450": "nan", "3455": "nan", "3460": "nan", "3465": "nan", "3470": "nan", "3475": "nan", "3480": "nan", "3485": "nan", "3490": "nan", "3495": "nan", "3500": 3.40406, "3505": "nan", "3510": "nan", "3515": "nan", "3520": "nan", "3525": "nan", "3530": "nan", "3535": "nan", "3540": "nan", "3545": "nan", "3550": "nan", "3555": "nan", "3560": "nan", "3565": "nan", "3570": "nan", "3575": "nan", "3580": "nan", "3585": "nan", "3590": "nan", "3595": "nan", "3600": 3.40531, "3605": "nan", "3610": "nan", "3615": "nan", "3620": "nan", "3625": "nan", "3630": "nan", "3635": "nan", "3640": "nan", "3645": "nan", "3650": "nan", "3655": "nan", "3660": "nan", "3665": "nan", "3670": "nan", "3675": "nan", "3680": "nan", "3685": "nan", "3690": "nan", "3695": "nan", "3700": 3.88901, "3705": "nan", "3710": "nan", "3715": "nan", "3720": "nan", "3725": "nan", "3730": "nan", "3735": "nan", "3740": "nan", "3745": "nan", "3750": "nan", "3755": "nan", "3760": "nan", "3765": "nan", "3770": "nan", "3775": "nan", "3780": "nan", "3785": "nan", "3790": "nan", "3795": "nan", "3800": 3.39951, "3805": "nan", "3810": "nan", "3815": "nan", "3820": "nan", "3825": "nan", "3830": "nan", "3835": "nan", "3840": "nan", "3845": "nan", "3850": "nan", "3855": "nan", "3860": "nan", "3865": "nan", "3870": "nan", "3875": "nan", "3880": "nan", "3885": "nan", "3890": "nan", "3895": "nan", "3900": 3.40154, "3905": "nan", "3910": "nan", "3915": "nan", "3920": "nan", "3925": "nan", "3930": "nan", "3935": "nan", "3940": "nan", "3945": "nan", "3950": "nan", "3955": "nan", "3960": "nan", "3965": "nan", "3970": "nan", "3975": "nan", "3980": "nan", "3985": "nan", "3990": "nan", "3995": "nan", "4000": 3.40132, "4005": "nan", "4010": "nan", "4015": "nan", "4020": "nan", "4025": "nan", "4030": "nan", "4035": "nan", "4040": "nan", "4045": "nan", "4050": "nan", "4055": "nan", "4060": "nan", "4065": "nan", "4070": "nan", "4075": "nan", "4080": "nan", "4085": "nan", "4090": "nan", "4095": "nan", "4100": 3.40224, "4105": "nan", "4110": "nan", "4115": "nan", "4120": "nan", "4125": "nan", "4130": "nan", "4135": "nan", "4140": "nan", "4145": "nan", "4150": "nan", "4155": "nan", "4160": "nan", "4165": "nan", "4170": "nan", "4175": "nan", "4180": "nan", "4185": "nan", "4190": "nan", "4195": "nan", "4200": 3.40044, "4205": "nan", "4210": "nan", "4215": "nan", "4220": "nan", "4225": "nan", "4230": "nan", "4235": "nan", "4240": "nan", "4245": "nan", "4250": "nan", "4255": "nan", "4260": "nan", "4265": "nan", "4270": "nan", "4275": "nan", "4280": "nan", "4285": "nan", "4290": "nan", "4295": "nan", "4300": 3.40156, "4305": "nan", "4310": "nan", "4315": "nan", "4320": "nan", "4325": "nan", "4330": "nan", "4335": "nan", "4340": "nan", "4345": "nan", "4350": "nan", "4355": "nan", "4360": "nan", "4365": "nan", "4370": "nan", "4375": "nan", "4380": "nan", "4385": "nan", "4390": "nan", "4395": "nan", "4400": 3.40312, "4405": "nan", "4410": "nan", "4415": "nan", "4420": "nan", "4425": "nan", "4430": "nan", "4435": "nan", "4440": "nan", "4445": "nan", "4450": "nan", "4455": "nan", "4460": "nan", "4465": "nan", "4470": "nan", "4475": "nan", "4480": "nan", "4485": "nan", "4490": "nan", "4495": "nan", "4500": 3.40104, "4505": "nan", "4510": "nan", "4515": "nan", "4520": "nan", "4525": "nan", "4530": "nan", "4535": "nan", "4540": "nan", "4545": "nan", "4550": "nan", "4555": "nan", "4560": "nan", "4565": "nan", "4570": "nan", "4575": "nan", "4580": "nan", "4585": "nan", "4590": "nan", "4595": "nan", "4600": 3.39846, "4605": "nan", "4610": "nan", "4615": "nan", "4620": "nan", "4625": "nan", "4630": "nan", "4635": "nan", "4640": "nan", "4645": "nan", "4650": "nan", "4655": "nan", "4660": "nan", "4665": "nan", "4670": "nan", "4675": "nan", "4680": "nan", "4685": "nan", "4690": "nan", "4695": "nan", "4700": 3.39719, "4705": "nan", "4710": "nan", "4715": "nan", "4720": "nan", "4725": "nan", "4730": "nan", "4735": "nan", "4740": "nan", "4745": "nan", "4750": "nan", "4755": "nan", "4760": "nan", "4765": "nan", "4770": "nan", "4775": "nan", "4780": "nan", "4785": "nan", "4790": "nan", "4795": "nan", "4800": 3.39751, "4805": "nan", "4810": "nan", "4815": "nan", "4820": "nan", "4825": "nan", "4830": "nan", "4835": "nan", "4840": "nan", "4845": "nan", "4850": "nan", "4855": "nan", "4860": "nan", "4865": "nan", "4870": "nan", "4875": "nan", "4880": "nan", "4885": "nan", "4890": "nan", "4895": "nan", "4900": 3.39932, "4905": "nan", "4910": "nan", "4915": "nan", "4920": "nan", "4925": "nan", "4930": "nan", "4935": "nan", "4940": "nan", "4945": "nan", "4950": "nan", "4955": "nan", "4960": "nan", "4965": "nan", "4970": "nan", "4975": "nan", "4980": "nan", "4985": "nan", "4990": "nan", "4995": "nan", "5000": 3.39988, "5005": "nan", "5010": "nan", "5015": "nan", "5020": "nan", "5025": "nan", "5030": "nan", "5035": "nan", "5040": "nan", "5045": "nan", "5050": "nan", "5055": "nan", "5060": "nan", "5065": "nan", "5070": "nan", "5075": "nan", "5080": "nan", "5085": "nan", "5090": "nan", "5095": "nan", "5100": 3.39693, "5105": "nan", "5110": "nan", "5115": "nan", "5120": "nan", "5125": "nan", "5130": "nan", "5135": "nan", "5140": "nan", "5145": "nan", "5150": "nan", "5155": "nan", "5160": "nan", "5165": "nan", "5170": "nan", "5175": "nan", "5180": "nan", "5185": "nan", "5190": "nan", "5195": "nan", "5200": 3.39884, "5205": "nan", "5210": "nan", "5215": "nan", "5220": "nan", "5225": "nan", "5230": "nan", "5235": "nan", "5240": "nan", "5245": "nan", "5250": "nan", "5255": "nan", "5260": "nan", "5265": "nan", "5270": "nan", "5275": "nan", "5280": "nan", "5285": "nan", "5290": "nan", "5295": "nan", "5300": 3.3986, "5305": "nan", "5310": "nan", "5315": "nan", "5320": "nan", "5325": "nan", "5330": "nan", "5335": "nan", "5340": "nan", "5345": "nan", "5350": "nan", "5355": "nan", "5360": "nan", "5365": "nan", "5370": "nan", "5375": "nan", "5380": "nan", "5385": "nan", "5390": "nan", "5395": "nan", "5400": 3.40103, "5405": "nan", "5410": "nan", "5415": "nan", "5420": "nan", "5425": "nan", "5430": "nan", "5435": "nan", "5440": "nan", "5445": "nan", "5450": "nan", "5455": "nan", "5460": "nan", "5465": "nan", "5470": "nan", "5475": "nan", "5480": "nan", "5485": "nan", "5490": "nan", "5495": "nan", "5500": 3.39841, "5505": "nan", "5510": "nan", "5515": "nan", "5520": "nan", "5525": "nan", "5530": "nan", "5535": "nan", "5540": "nan", "5545": "nan", "5550": "nan", "5555": "nan", "5560": "nan", "5565": "nan", "5570": "nan", "5575": "nan", "5580": "nan", "5585": "nan", "5590": "nan", "5595": "nan", "5600": 3.39922, "5605": "nan", "5610": "nan", "5615": "nan", "5620": "nan", "5625": "nan", "5630": "nan", "5635": "nan", "5640": "nan", "5645": "nan", "5650": "nan", "5655": "nan", "5660": "nan", "5665": "nan", "5670": "nan", "5675": "nan", "5680": "nan", "5685": "nan", "5690": "nan", "5695": "nan", "5700": 3.39933, "5705": "nan", "5710": "nan", "5715": "nan", "5720": "nan", "5725": "nan", "5730": "nan", "5735": "nan", "5740": "nan", "5745": "nan", "5750": "nan", "5755": "nan", "5760": "nan", "5765": "nan", "5770": "nan", "5775": "nan", "5780": "nan", "5785": "nan", "5790": "nan", "5795": "nan", "5800": 3.39986, "5805": "nan", "5810": "nan", "5815": "nan", "5820": "nan", "5825": "nan", "5830": "nan", "5835": "nan", "5840": "nan", "5845": "nan", "5850": "nan", "5855": "nan", "5860": "nan", "5865": "nan", "5870": "nan", "5875": "nan", "5880": "nan", "5885": "nan", "5890": "nan", "5895": "nan", "5900": 3.40005, "5905": "nan", "5910": "nan", "5915": "nan", "5920": "nan", "5925": "nan", "5930": "nan", "5935": "nan", "5940": "nan", "5945": "nan", "5950": "nan", "5955": "nan", "5960": "nan", "5965": "nan", "5970": "nan", "5975": "nan", "5980": "nan", "5985": "nan", "5990": "nan", "5995": "nan", "6000": 3.39921, "6005": "nan", "6010": "nan", "6015": "nan", "6020": "nan", "6025": "nan", "6030": "nan", "6035": "nan", "6040": "nan", "6045": "nan", "6050": "nan", "6055": "nan", "6060": "nan", "6065": "nan", "6070": "nan", "6075": "nan", "6080": "nan", "6085": "nan", "6090": "nan", "6095": "nan", "6100": 3.39949, "6105": "nan", "6110": "nan", "6115": "nan", "6120": "nan", "6125": "nan", "6130": "nan", "6135": "nan", "6140": "nan", "6145": "nan", "6150": "nan", "6155": "nan", "6160": "nan", "6165": "nan", "6170": "nan", "6175": "nan", "6180": "nan", "6185": "nan", "6190": "nan", "6195": "nan", "6200": 3.39844, "6205": "nan", "6210": "nan", "6215": "nan", "6220": "nan", "6225": "nan", "6230": "nan", "6235": "nan", "6240": "nan", "6245": "nan", "6250": "nan", "6255": "nan", "6260": "nan", "6265": "nan", "6270": "nan", "6275": "nan", "6280": "nan", "6285": "nan", "6290": "nan", "6295": "nan", "6300": 3.3979, "6305": "nan", "6310": "nan", "6315": "nan", "6320": "nan", "6325": "nan", "6330": "nan", "6335": "nan", "6340": "nan", "6345": "nan", "6350": "nan", "6355": "nan", "6360": "nan", "6365": "nan", "6370": "nan", "6375": "nan", "6380": "nan", "6385": "nan", "6390": "nan", "6395": "nan", "6400": 3.3986, "6405": "nan", "6410": "nan", "6415": "nan", "6420": "nan", "6425": "nan", "6430": "nan", "6435": "nan", "6440": "nan", "6445": "nan", "6450": "nan", "6455": "nan", "6460": "nan", "6465": "nan", "6470": "nan", "6475": "nan", "6480": "nan", "6485": "nan", "6490": "nan", "6495": "nan", "6500": 3.39847, "6505": "nan", "6510": "nan", "6515": "nan", "6520": "nan", "6525": "nan", "6530": "nan", "6535": "nan", "6540": "nan", "6545": "nan", "6550": "nan", "6555": "nan", "6560": "nan", "6565": "nan", "6570": "nan", "6575": "nan", "6580": "nan", "6585": "nan", "6590": "nan", "6595": "nan", "6600": 3.39855, "6605": "nan", "6610": "nan", "6615": "nan", "6620": "nan", "6625": "nan", "6630": "nan", "6635": "nan", "6640": "nan", "6645": "nan", "6650": "nan", "6655": "nan", "6660": "nan", "6665": "nan", "6670": "nan", "6675": "nan", "6680": "nan", "6685": "nan", "6690": "nan", "6695": "nan", "6700": 3.39719, "6705": "nan", "6710": "nan", "6715": "nan", "6720": "nan", "6725": "nan", "6730": "nan", "6735": "nan", "6740": "nan", "6745": "nan", "6750": "nan", "6755": "nan", "6760": "nan", "6765": "nan", "6770": "nan", "6775": "nan", "6780": "nan", "6785": "nan", "6790": "nan", "6795": "nan", "6800": 3.39826, "6805": "nan", "6810": "nan", "6815": "nan", "6820": "nan", "6825": "nan", "6830": "nan", "6835": "nan", "6840": "nan", "6845": "nan", "6850": "nan", "6855": "nan", "6860": "nan", "6865": "nan", "6870": "nan", "6875": "nan", "6880": "nan", "6885": "nan", "6890": "nan", "6895": "nan", "6900": 3.39712, "6905": "nan", "6910": "nan", "6915": "nan", "6920": "nan", "6925": "nan", "6930": "nan", "6935": "nan", "6940": "nan", "6945": "nan", "6950": "nan", "6955": "nan", "6960": "nan", "6965": "nan", "6970": "nan", "6975": "nan", "6980": "nan", "6985": "nan", "6990": "nan", "6995": "nan", "7000": 3.39791, "7005": "nan", "7010": "nan", "7015": "nan", "7020": "nan", "7025": "nan", "7030": "nan", "7035": "nan", "7040": "nan", "7045": "nan", "7050": "nan", "7055": "nan", "7060": "nan", "7065": "nan", "7070": "nan", "7075": "nan", "7080": "nan", "7085": "nan", "7090": "nan", "7095": "nan", "7100": 3.39708, "7105": "nan", "7110": "nan", "7115": "nan", "7120": "nan", "7125": "nan", "7130": "nan", "7135": "nan", "7140": "nan", "7145": "nan", "7150": "nan", "7155": "nan", "7160": "nan", "7165": "nan", "7170": "nan", "7175": "nan", "7180": "nan", "7185": "nan", "7190": "nan", "7195": "nan", "7200": 3.39631, "7205": "nan", "7210": "nan", "7215": "nan", "7220": "nan", "7225": "nan", "7230": "nan", "7235": "nan", "7240": "nan", "7245": "nan", "7250": "nan", "7255": "nan", "7260": "nan", "7265": "nan", "7270": "nan", "7275": "nan", "7280": "nan", "7285": "nan", "7290": "nan", "7295": "nan", "7300": 3.39759, "7305": "nan", "7310": "nan", "7315": "nan", "7320": "nan", "7325": "nan", "7330": "nan", "7335": "nan", "7340": "nan", "7345": "nan", "7350": "nan", "7355": "nan", "7360": "nan", "7365": "nan", "7370": "nan", "7375": "nan", "7380": "nan", "7385": "nan", "7390": "nan", "7395": "nan", "7400": 3.3981, "7405": "nan", "7410": "nan", "7415": "nan", "7420": "nan", "7425": "nan", "7430": "nan", "7435": "nan", "7440": "nan", "7445": "nan", "7450": "nan", "7455": "nan", "7460": "nan", "7465": "nan", "7470": "nan", "7475": "nan", "7480": "nan", "7485": "nan", "7490": "nan", "7495": "nan", "7500": 3.39904, "7505": "nan", "7510": "nan", "7515": "nan", "7520": "nan", "7525": "nan", "7530": "nan", "7535": "nan", "7540": "nan", "7545": "nan", "7550": "nan", "7555": "nan", "7560": "nan", "7565": "nan", "7570": "nan", "7575": "nan", "7580": "nan", "7585": "nan", "7590": "nan", "7595": "nan", "7600": 3.63076, "7605": "nan", "7610": "nan", "7615": "nan", "7620": "nan", "7625": "nan", "7630": "nan", "7635": "nan", "7640": "nan", "7645": "nan", "7650": "nan", "7655": "nan", "7660": "nan", "7665": "nan", "7670": "nan", "7675": "nan", "7680": "nan", "7685": "nan", "7690": "nan", "7695": "nan", "7700": 3.38438, "7705": "nan", "7710": "nan", "7715": "nan", "7720": "nan", "7725": "nan", "7730": "nan", "7735": "nan", "7740": "nan", "7745": "nan", "7750": "nan", "7755": "nan", "7760": "nan", "7765": "nan", "7770": "nan", "7775": "nan", "7780": "nan", "7785": "nan", "7790": "nan", "7795": "nan", "7800": 3.38429, "7805": "nan", "7810": "nan", "7815": "nan", "7820": "nan", "7825": "nan", "7830": "nan", "7835": "nan", "7840": "nan", "7845": "nan", "7850": "nan", "7855": "nan", "7860": "nan", "7865": "nan", "7870": "nan", "7875": "nan", "7880": "nan", "7885": "nan", "7890": "nan", "7895": "nan", "7900": 3.38539, "7905": "nan", "7910": "nan", "7915": "nan", "7920": "nan", "7925": "nan", "7930": "nan", "7935": "nan", "7940": "nan", "7945": "nan", "7950": "nan", "7955": "nan", "7960": "nan", "7965": "nan", "7970": "nan", "7975": "nan", "7980": "nan", "7985": "nan", "7990": "nan", "7995": "nan", "8000": 3.38995, "8005": "nan", "8010": "nan", "8015": "nan", "8020": "nan", "8025": "nan", "8030": "nan", "8035": "nan", "8040": "nan", "8045": "nan", "8050": "nan", "8055": "nan", "8060": "nan", "8065": "nan", "8070": "nan", "8075": "nan", "8080": "nan", "8085": "nan", "8090": "nan", "8095": "nan", "8100": 3.38581, "8105": "nan", "8110": "nan", "8115": "nan", "8120": "nan", "8125": "nan", "8130": "nan", "8135": "nan", "8140": "nan", "8145": "nan", "8150": "nan", "8155": "nan", "8160": "nan", "8165": "nan", "8170": "nan", "8175": "nan", "8180": "nan", "8185": "nan", "8190": "nan", "8195": "nan", "8200": 3.3854, "8205": "nan", "8210": "nan", "8215": "nan", "8220": "nan", "8225": "nan", "8230": "nan", "8235": "nan", "8240": "nan", "8245": "nan", "8250": "nan", "8255": "nan", "8260": "nan", "8265": "nan", "8270": "nan", "8275": "nan", "8280": "nan", "8285": "nan", "8290": "nan", "8295": "nan", "8300": 3.38584, "8305": "nan", "8310": "nan", "8315": "nan", "8320": "nan", "8325": "nan", "8330": "nan", "8335": "nan", "8340": "nan", "8345": "nan", "8350": "nan", "8355": "nan", "8360": "nan", "8365": "nan", "8370": "nan", "8375": "nan", "8380": "nan", "8385": "nan", "8390": "nan", "8395": "nan", "8400": 3.38529, "8405": "nan", "8410": "nan", "8415": "nan", "8420": "nan", "8425": "nan", "8430": "nan", "8435": "nan", "8440": "nan", "8445": "nan", "8450": "nan", "8455": "nan", "8460": "nan", "8465": "nan", "8470": "nan", "8475": "nan", "8480": "nan", "8485": "nan", "8490": "nan", "8495": "nan", "8500": 3.3845, "8505": "nan", "8510": "nan", "8515": "nan", "8520": "nan", "8525": "nan", "8530": "nan", "8535": "nan", "8540": "nan", "8545": "nan", "8550": "nan", "8555": "nan", "8560": "nan", "8565": "nan", "8570": "nan", "8575": "nan", "8580": "nan", "8585": "nan", "8590": "nan", "8595": "nan", "8600": 3.38499, "8605": "nan", "8610": "nan", "8615": "nan", "8620": "nan", "8625": "nan", "8630": "nan", "8635": "nan", "8640": "nan", "8645": "nan", "8650": "nan", "8655": "nan", "8660": "nan", "8665": "nan", "8670": "nan", "8675": "nan", "8680": "nan", "8685": "nan", "8690": "nan", "8695": "nan", "8700": 3.38455, "8705": "nan", "8710": "nan", "8715": "nan", "8720": "nan", "8725": "nan", "8730": "nan", "8735": "nan", "8740": "nan", "8745": "nan", "8750": "nan", "8755": "nan", "8760": "nan", "8765": "nan", "8770": "nan", "8775": "nan", "8780": "nan", "8785": "nan", "8790": "nan", "8795": "nan", "8800": 3.38421, "8805": "nan", "8810": "nan", "8815": "nan", "8820": "nan", "8825": "nan", "8830": "nan", "8835": "nan", "8840": "nan", "8845": "nan", "8850": "nan", "8855": "nan", "8860": "nan", "8865": "nan", "8870": "nan", "8875": "nan", "8880": "nan", "8885": "nan", "8890": "nan", "8895": "nan", "8900": 3.385, "8905": "nan", "8910": "nan", "8915": "nan", "8920": "nan", "8925": "nan", "8930": "nan", "8935": "nan", "8940": "nan", "8945": "nan", "8950": "nan", "8955": "nan", "8960": "nan", "8965": "nan", "8970": "nan", "8975": "nan", "8980": "nan", "8985": "nan", "8990": "nan", "8995": "nan", "9000": 3.3842, "9005": "nan", "9010": "nan", "9015": "nan", "9020": "nan", "9025": "nan", "9030": "nan", "9035": "nan", "9040": "nan", "9045": "nan", "9050": "nan", "9055": "nan", "9060": "nan", "9065": "nan", "9070": "nan", "9075": "nan", "9080": "nan", "9085": "nan", "9090": "nan", "9095": "nan", "9100": 3.38554, "9105": "nan", "9110": "nan", "9115": "nan", "9120": "nan", "9125": "nan", "9130": "nan", "9135": "nan", "9140": "nan", "9145": "nan", "9150": "nan", "9155": "nan", "9160": "nan", "9165": "nan", "9170": "nan", "9175": "nan", "9180": "nan", "9185": "nan", "9190": "nan", "9195": "nan", "9200": 3.38478, "9205": "nan", "9210": "nan", "9215": "nan", "9220": "nan", "9225": "nan", "9230": "nan", "9235": "nan", "9240": "nan", "9245": "nan", "9250": "nan", "9255": "nan", "9260": "nan", "9265": "nan", "9270": "nan", "9275": "nan", "9280": "nan", "9285": "nan", "9290": "nan", "9295": "nan", "9300": 3.38571, "9305": "nan", "9310": "nan", "9315": "nan", "9320": "nan", "9325": "nan", "9330": "nan", "9335": "nan", "9340": "nan", "9345": "nan", "9350": "nan", "9355": "nan", "9360": "nan", "9365": "nan", "9370": "nan", "9375": "nan", "9380": "nan", "9385": "nan", "9390": "nan", "9395": "nan", "9400": 3.38513, "9405": "nan", "9410": "nan", "9415": "nan", "9420": "nan", "9425": "nan", "9430": "nan", "9435": "nan", "9440": "nan", "9445": "nan", "9450": "nan", "9455": "nan", "9460": "nan", "9465": "nan", "9470": "nan", "9475": "nan", "9480": "nan", "9485": "nan", "9490": "nan", "9495": "nan", "9500": 3.38536, "9505": "nan", "9510": "nan", "9515": "nan", "9520": "nan", "9525": "nan", "9530": "nan", "9535": "nan", "9540": "nan", "9545": "nan", "9550": "nan", "9555": "nan", "9560": "nan", "9565": "nan", "9570": "nan", "9575": "nan", "9580": "nan", "9585": "nan", "9590": "nan", "9595": "nan", "9600": 3.38433, "9605": "nan", "9610": "nan", "9615": "nan", "9620": "nan", "9625": "nan", "9630": "nan", "9635": "nan", "9640": "nan", "9645": "nan", "9650": "nan", "9655": "nan", "9660": "nan", "9665": "nan", "9670": "nan", "9675": "nan", "9680": "nan", "9685": "nan", "9690": "nan", "9695": "nan", "9700": 3.38536, "9705": "nan", "9710": "nan", "9715": "nan", "9720": "nan", "9725": "nan", "9730": "nan", "9735": "nan", "9740": "nan", "9745": "nan", "9750": "nan", "9755": "nan", "9760": "nan", "9765": "nan", "9770": "nan", "9775": "nan", "9780": "nan", "9785": "nan", "9790": "nan", "9795": "nan", "9800": 3.38531, "9805": "nan", "9810": "nan", "9815": "nan", "9820": "nan", "9825": "nan", "9830": "nan", "9835": "nan", "9840": "nan", "9845": "nan", "9850": "nan", "9855": "nan", "9860": "nan", "9865": "nan", "9870": "nan", "9875": "nan", "9880": "nan", "9885": "nan", "9890": "nan", "9895": "nan", "9900": 3.38475, "9905": "nan", "9910": "nan", "9915": "nan", "9920": "nan", "9925": "nan", "9930": "nan", "9935": "nan", "9940": "nan", "9945": "nan", "9950": "nan", "9955": "nan", "9960": "nan", "9965": "nan", "9970": "nan", "9975": "nan", "9980": "nan", "9985": "nan", "9990": "nan", "9995": "nan", "10000": 3.38423, "10005": "nan", "10010": "nan", "10015": "nan", "10020": "nan", "10025": "nan", "10030": "nan", "10035": "nan", "10040": "nan", "10045": "nan", "10050": "nan", "10055": "nan", "10060": "nan", "10065": "nan", "10070": "nan", "10075": "nan", "10080": "nan", "10085": "nan", "10090": "nan", "10095": "nan", "10100": 3.3832, "10105": "nan", "10110": "nan", "10115": "nan", "10120": "nan", "10125": "nan", "10130": "nan", "10135": "nan", "10140": "nan", "10145": "nan", "10150": "nan", "10155": "nan", "10160": "nan", "10165": "nan", "10170": "nan", "10175": "nan", "10180": "nan", "10185": "nan", "10190": "nan", "10195": "nan", "10200": 3.38385, "10205": "nan", "10210": "nan", "10215": "nan", "10220": "nan", "10225": "nan", "10230": "nan", "10235": "nan", "10240": "nan", "10245": "nan", "10250": "nan", "10255": "nan", "10260": "nan", "10265": "nan", "10270": "nan", "10275": "nan", "10280": "nan", "10285": "nan", "10290": "nan", "10295": "nan", "10300": 3.38234, "10305": "nan", "10310": "nan", "10315": "nan", "10320": "nan", "10325": "nan", "10330": "nan", "10335": "nan", "10340": "nan", "10345": "nan", "10350": "nan", "10355": "nan", "10360": "nan", "10365": "nan", "10370": "nan", "10375": "nan", "10380": "nan", "10385": "nan", "10390": "nan", "10395": "nan", "10400": 3.38217, "10405": "nan", "10410": "nan", "10415": "nan", "10420": "nan", "10425": "nan", "10430": "nan", "10435": "nan", "10440": "nan", "10445": "nan", "10450": "nan", "10455": "nan", "10460": "nan", "10465": "nan", "10470": "nan", "10475": "nan", "10480": "nan", "10485": "nan", "10490": "nan", "10495": "nan", "10500": 3.38218, "10505": "nan", "10510": "nan", "10515": "nan", "10520": "nan", "10525": "nan", "10530": "nan", "10535": "nan", "10540": "nan", "10545": "nan", "10550": "nan", "10555": "nan", "10560": "nan", "10565": "nan", "10570": "nan", "10575": "nan", "10580": "nan", "10585": "nan", "10590": "nan", "10595": "nan", "10600": 3.38313, "10605": "nan", "10610": "nan", "10615": "nan", "10620": "nan", "10625": "nan", "10630": "nan", "10635": "nan", "10640": "nan", "10645": "nan", "10650": "nan", "10655": "nan", "10660": "nan", "10665": "nan", "10670": "nan", "10675": "nan", "10680": "nan", "10685": "nan", "10690": "nan", "10695": "nan", "10700": 3.3822, "10705": "nan", "10710": "nan", "10715": "nan", "10720": "nan", "10725": "nan", "10730": "nan", "10735": "nan", "10740": "nan", "10745": "nan", "10750": "nan", "10755": "nan", "10760": "nan", "10765": "nan", "10770": "nan", "10775": "nan", "10780": "nan", "10785": "nan", "10790": "nan", "10795": "nan", "10800": 3.38286, "10805": "nan", "10810": "nan", "10815": "nan", "10820": "nan", "10825": "nan", "10830": "nan", "10835": "nan", "10840": "nan", "10845": "nan", "10850": "nan", "10855": "nan", "10860": "nan", "10865": "nan", "10870": "nan", "10875": "nan", "10880": "nan", "10885": "nan", "10890": "nan", "10895": "nan", "10900": 3.38213, "10905": "nan", "10910": "nan", "10915": "nan", "10920": "nan", "10925": "nan", "10930": "nan", "10935": "nan", "10940": "nan", "10945": "nan", "10950": "nan", "10955": "nan", "10960": "nan", "10965": "nan", "10970": "nan", "10975": "nan", "10980": "nan", "10985": "nan", "10990": "nan", "10995": "nan", "11000": 3.38279, "11005": "nan", "11010": "nan", "11015": "nan", "11020": "nan", "11025": "nan", "11030": "nan", "11035": "nan", "11040": "nan", "11045": "nan", "11050": "nan", "11055": "nan", "11060": "nan", "11065": "nan", "11070": "nan", "11075": "nan", "11080": "nan", "11085": "nan", "11090": "nan", "11095": "nan", "11100": 3.38243, "11105": "nan", "11110": "nan", "11115": "nan", "11120": "nan", "11125": "nan", "11130": "nan", "11135": "nan", "11140": "nan", "11145": "nan", "11150": "nan", "11155": "nan", "11160": "nan", "11165": "nan", "11170": "nan", "11175": "nan", "11180": "nan", "11185": "nan", "11190": "nan", "11195": "nan", "11200": 3.38254, "11205": "nan", "11210": "nan", "11215": "nan", "11220": "nan", "11225": "nan", "11230": "nan", "11235": "nan", "11240": "nan", "11245": "nan", "11250": "nan", "11255": "nan", "11260": "nan", "11265": "nan", "11270": "nan", "11275": "nan", "11280": "nan", "11285": "nan", "11290": "nan", "11295": "nan", "11300": 3.3824, "11305": "nan", "11310": "nan", "11315": "nan", "11320": "nan", "11325": "nan", "11330": "nan", "11335": "nan", "11340": "nan", "11345": "nan", "11350": "nan", "11355": "nan", "11360": "nan", "11365": "nan", "11370": "nan", "11375": "nan", "11380": "nan", "11385": "nan", "11390": "nan", "11395": "nan", "11400": 3.38197, "11405": "nan", "11410": "nan", "11415": "nan", "11420": "nan", "11425": "nan", "11430": "nan", "11435": "nan", "11440": "nan", "11445": "nan", "11450": "nan", "11455": "nan", "11460": "nan", "11465": "nan", "11470": "nan", "11475": "nan", "11480": "nan", "11485": "nan", "11490": "nan", "11495": "nan", "11500": 3.54668, "11505": "nan", "11510": "nan", "11515": "nan", "11520": "nan", "11525": "nan", "11530": "nan", "11535": "nan", "11540": "nan", "11545": "nan", "11550": "nan", "11555": "nan", "11560": "nan", "11565": "nan", "11570": "nan", "11575": "nan", "11580": "nan", "11585": "nan", "11590": "nan", "11595": "nan", "11600": 3.39959, "11605": "nan", "11610": "nan", "11615": "nan", "11620": "nan", "11625": "nan", "11630": "nan", "11635": "nan", "11640": "nan", "11645": "nan", "11650": "nan", "11655": "nan", "11660": "nan", "11665": "nan", "11670": "nan", "11675": "nan", "11680": "nan", "11685": "nan", "11690": "nan", "11695": "nan", "11700": 3.40144, "11705": "nan", "11710": "nan", "11715": "nan", "11720": "nan", "11725": "nan", "11730": "nan", "11735": "nan", "11740": "nan", "11745": "nan", "11750": "nan", "11755": "nan", "11760": "nan", "11765": "nan", "11770": "nan", "11775": "nan", "11780": "nan", "11785": "nan", "11790": "nan", "11795": "nan", "11800": 3.39856, "11805": "nan", "11810": "nan", "11815": "nan", "11820": "nan", "11825": "nan", "11830": "nan", "11835": "nan", "11840": "nan", "11845": "nan", "11850": "nan", "11855": "nan", "11860": "nan", "11865": "nan", "11870": "nan", "11875": "nan", "11880": "nan", "11885": "nan", "11890": "nan", "11895": "nan", "11900": 3.39955, "11905": "nan", "11910": "nan", "11915": "nan", "11920": "nan", "11925": "nan", "11930": "nan", "11935": "nan", "11940": "nan", "11945": "nan", "11950": "nan", "11955": "nan", "11960": "nan", "11965": "nan", "11970": "nan", "11975": "nan", "11980": "nan", "11985": "nan", "11990": "nan", "11995": "nan", "12000": 3.39742, "12005": "nan", "12010": "nan", "12015": "nan", "12020": "nan", "12025": "nan", "12030": "nan", "12035": "nan", "12040": "nan", "12045": "nan", "12050": "nan", "12055": "nan", "12060": "nan", "12065": "nan", "12070": "nan", "12075": "nan", "12080": "nan", "12085": "nan", "12090": "nan", "12095": "nan", "12100": 3.39657, "12105": "nan", "12110": "nan", "12115": "nan", "12120": "nan", "12125": "nan", "12130": "nan", "12135": "nan", "12140": "nan", "12145": "nan", "12150": "nan", "12155": "nan", "12160": "nan", "12165": "nan", "12170": "nan", "12175": "nan", "12180": "nan", "12185": "nan", "12190": "nan", "12195": "nan", "12200": 3.39854, "12205": "nan", "12210": "nan", "12215": "nan", "12220": "nan", "12225": "nan", "12230": "nan", "12235": "nan", "12240": "nan", "12245": "nan", "12250": "nan", "12255": "nan", "12260": "nan", "12265": "nan", "12270": "nan", "12275": "nan", "12280": "nan", "12285": "nan", "12290": "nan", "12295": "nan", "12300": 3.3991, "12305": "nan", "12310": "nan", "12315": "nan", "12320": "nan", "12325": "nan", "12330": "nan", "12335": "nan", "12340": "nan", "12345": "nan", "12350": "nan", "12355": "nan", "12360": "nan", "12365": "nan", "12370": "nan", "12375": "nan", "12380": "nan", "12385": "nan", "12390": "nan", "12395": "nan", "12400": 3.39804, "12405": "nan", "12410": "nan", "12415": "nan", "12420": "nan", "12425": "nan", "12430": "nan", "12435": "nan", "12440": "nan", "12445": "nan", "12450": "nan", "12455": "nan", "12460": "nan", "12465": "nan", "12470": "nan", "12475": "nan", "12480": "nan", "12485": "nan", "12490": "nan", "12495": "nan", "12500": 3.39867, "12505": "nan", "12510": "nan", "12515": "nan", "12520": "nan", "12525": "nan", "12530": "nan", "12535": "nan", "12540": "nan", "12545": "nan", "12550": "nan", "12555": "nan", "12560": "nan", "12565": "nan", "12570": "nan", "12575": "nan", "12580": "nan", "12585": "nan", "12590": "nan", "12595": "nan", "12600": 3.39806, "12605": "nan", "12610": "nan", "12615": "nan", "12620": "nan", "12625": "nan", "12630": "nan", "12635": "nan", "12640": "nan", "12645": "nan", "12650": "nan", "12655": "nan", "12660": "nan", "12665": "nan", "12670": "nan", "12675": "nan", "12680": "nan", "12685": "nan", "12690": "nan", "12695": "nan", "12700": 3.39739, "12705": "nan", "12710": "nan", "12715": "nan", "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": 3.39872, "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": 3.39678, "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": 3.39602, "13005": "nan", "13010": "nan", "13015": "nan", "13020": "nan", "13025": "nan", "13030": "nan", "13035": "nan", "13040": "nan", "13045": "nan", "13050": "nan", "13055": "nan", "13060": "nan", "13065": "nan", "13070": "nan", "13075": "nan", "13080": "nan", "13085": "nan", "13090": "nan", "13095": "nan", "13100": 3.39539, "13105": "nan", "13110": "nan", "13115": "nan", "13120": "nan", "13125": "nan", "13130": "nan", "13135": "nan", "13140": "nan", "13145": "nan", "13150": "nan", "13155": "nan", "13160": "nan", "13165": "nan", "13170": "nan", "13175": "nan", "13180": "nan", "13185": "nan", "13190": "nan", "13195": "nan", "13200": 3.39483, "13205": "nan", "13210": "nan", "13215": "nan", "13220": "nan", "13225": "nan", "13230": "nan", "13235": "nan", "13240": "nan", "13245": "nan", "13250": "nan", "13255": "nan", "13260": "nan", "13265": "nan", "13270": "nan", "13275": "nan", "13280": "nan", "13285": "nan", "13290": "nan", "13295": "nan", "13300": 3.39587, "13305": "nan", "13310": "nan", "13315": "nan", "13320": "nan", "13325": "nan", "13330": "nan", "13335": "nan", "13340": "nan", "13345": "nan", "13350": "nan", "13355": "nan", "13360": "nan", "13365": "nan", "13370": "nan", "13375": "nan", "13380": "nan", "13385": "nan", "13390": "nan", "13395": "nan", "13400": 3.3958, "13405": "nan", "13410": "nan", "13415": "nan", "13420": "nan", "13425": "nan", "13430": "nan", "13435": "nan", "13440": "nan", "13445": "nan", "13450": "nan", "13455": "nan", "13460": "nan", "13465": "nan", "13470": "nan", "13475": "nan", "13480": "nan", "13485": "nan", "13490": "nan", "13495": "nan", "13500": 3.39504, "13505": "nan", "13510": "nan", "13515": "nan", "13520": "nan", "13525": "nan", "13530": "nan", "13535": "nan", "13540": "nan", "13545": "nan", "13550": "nan", "13555": "nan", "13560": "nan", "13565": "nan", "13570": "nan", "13575": "nan", "13580": "nan", "13585": "nan", "13590": "nan", "13595": "nan", "13600": 3.39552, "13605": "nan", "13610": "nan", "13615": "nan", "13620": "nan", "13625": "nan", "13630": "nan", "13635": "nan", "13640": "nan", "13645": "nan", "13650": "nan", "13655": "nan", "13660": "nan", "13665": "nan", "13670": "nan", "13675": "nan", "13680": "nan", "13685": "nan", "13690": "nan", "13695": "nan", "13700": 3.39539, "13705": "nan", "13710": "nan", "13715": "nan", "13720": "nan", "13725": "nan", "13730": "nan", "13735": "nan", "13740": "nan", "13745": "nan", "13750": "nan", "13755": "nan", "13760": "nan", "13765": "nan", "13770": "nan", "13775": "nan", "13780": "nan", "13785": "nan", "13790": "nan", "13795": "nan", "13800": 3.39549, "13805": "nan", "13810": "nan", "13815": "nan", "13820": "nan", "13825": "nan", "13830": "nan", "13835": "nan", "13840": "nan", "13845": "nan", "13850": "nan", "13855": "nan", "13860": "nan", "13865": "nan", "13870": "nan", "13875": "nan", "13880": "nan", "13885": "nan", "13890": "nan", "13895": "nan", "13900": 3.39558, "13905": "nan", "13910": "nan", "13915": "nan", "13920": "nan", "13925": "nan", "13930": "nan", "13935": "nan", "13940": "nan", "13945": "nan", "13950": "nan", "13955": "nan", "13960": "nan", "13965": "nan", "13970": "nan", "13975": "nan", "13980": "nan", "13985": "nan", "13990": "nan", "13995": "nan", "14000": 3.3955, "14005": "nan", "14010": "nan", "14015": "nan", "14020": "nan", "14025": "nan", "14030": "nan", "14035": "nan", "14040": "nan", "14045": "nan", "14050": "nan", "14055": "nan", "14060": "nan", "14065": "nan", "14070": "nan", "14075": "nan", "14080": "nan", "14085": "nan", "14090": "nan", "14095": "nan", "14100": 3.39449, "14105": "nan", "14110": "nan", "14115": "nan", "14120": "nan", "14125": "nan", "14130": "nan", "14135": "nan", "14140": "nan", "14145": "nan", "14150": "nan", "14155": "nan", "14160": "nan", "14165": "nan", "14170": "nan", "14175": "nan", "14180": "nan", "14185": "nan", "14190": "nan", "14195": "nan", "14200": 3.39424, "14205": "nan", "14210": "nan", "14215": "nan", "14220": "nan", "14225": "nan", "14230": "nan", "14235": "nan", "14240": "nan", "14245": "nan", "14250": "nan", "14255": "nan", "14260": "nan", "14265": "nan", "14270": "nan", "14275": "nan", "14280": "nan", "14285": "nan", "14290": "nan", "14295": "nan", "14300": 3.39439, "14305": "nan", "14310": "nan", "14315": "nan", "14320": "nan", "14325": "nan", "14330": "nan", "14335": "nan", "14340": "nan", "14345": "nan", "14350": "nan", "14355": "nan", "14360": "nan", "14365": "nan", "14370": "nan", "14375": "nan", "14380": "nan", "14385": "nan", "14390": "nan", "14395": "nan", "14400": 3.39528, "14405": "nan", "14410": "nan", "14415": "nan", "14420": "nan", "14425": "nan", "14430": "nan", "14435": "nan", "14440": "nan", "14445": "nan", "14450": "nan", "14455": "nan", "14460": "nan", "14465": "nan", "14470": "nan", "14475": "nan", "14480": "nan", "14485": "nan", "14490": "nan", "14495": "nan", "14500": 3.39457, "14505": "nan", "14510": "nan", "14515": "nan", "14520": "nan", "14525": "nan", "14530": "nan", "14535": "nan", "14540": "nan", "14545": "nan", "14550": "nan", "14555": "nan", "14560": "nan", "14565": "nan", "14570": "nan", "14575": "nan", "14580": "nan", "14585": "nan", "14590": "nan", "14595": "nan", "14600": 3.39564, "14605": "nan", "14610": "nan", "14615": "nan", "14620": "nan", "14625": "nan", "14630": "nan", "14635": "nan", "14640": "nan", "14645": "nan", "14650": "nan", "14655": "nan", "14660": "nan", "14665": "nan", "14670": "nan", "14675": "nan", "14680": "nan", "14685": "nan", "14690": "nan", "14695": "nan", "14700": 3.39534, "14705": "nan", "14710": "nan", "14715": "nan", "14720": "nan", "14725": "nan", "14730": "nan", "14735": "nan", "14740": "nan", "14745": "nan", "14750": "nan", "14755": "nan", "14760": "nan", "14765": "nan", "14770": "nan", "14775": "nan", "14780": "nan", "14785": "nan", "14790": "nan", "14795": "nan", "14800": 3.39563, "14805": "nan", "14810": "nan", "14815": "nan", "14820": "nan", "14825": "nan", "14830": "nan", "14835": "nan", "14840": "nan", "14845": "nan", "14850": "nan", "14855": "nan", "14860": "nan", "14865": "nan", "14870": "nan", "14875": "nan", "14880": "nan", "14885": "nan", "14890": "nan", "14895": "nan", "14900": 3.39508, "14905": "nan", "14910": "nan", "14915": "nan", "14920": "nan", "14925": "nan", "14930": "nan", "14935": "nan", "14940": "nan", "14945": "nan", "14950": "nan", "14955": "nan", "14960": "nan", "14965": "nan", "14970": "nan", "14975": "nan", "14980": "nan", "14985": "nan", "14990": "nan", "14995": "nan", "15000": 3.39525, "15005": "nan", "15010": "nan", "15015": "nan", "15020": "nan", "15025": "nan", "15030": "nan", "15035": "nan", "15040": "nan", "15045": "nan", "15050": "nan", "15055": "nan", "15060": "nan", "15065": "nan", "15070": "nan", "15075": "nan", "15080": "nan", "15085": "nan", "15090": "nan", "15095": "nan", "15100": 3.39455, "15105": "nan", "15110": "nan", "15115": "nan", "15120": "nan", "15125": "nan", "15130": "nan", "15135": "nan", "15140": "nan", "15145": "nan", "15150": "nan", "15155": "nan", "15160": "nan", "15165": "nan", "15170": "nan", "15175": "nan", "15180": "nan", "15185": "nan", "15190": "nan", "15195": "nan", "15200": 3.39617, "15205": "nan", "15210": "nan", "15215": "nan", "15220": "nan", "15225": "nan", "15230": "nan", "15235": "nan", "15240": "nan", "15245": "nan", "15250": "nan", "15255": "nan", "15260": "nan", "15265": "nan", "15270": "nan", "15275": "nan", "15280": "nan", "15285": "nan", "15290": "nan", "15295": "nan", "15300": 4.28806, "15305": "nan", "15310": "nan", "15315": "nan", "15320": "nan", "15325": "nan", "15330": "nan", "15335": "nan", "15340": "nan", "15345": "nan", "15350": "nan", "15355": "nan", "15360": "nan", "15365": "nan", "15370": "nan", "15375": "nan", "15380": "nan", "15385": "nan", "15390": "nan", "15395": "nan", "15400": 3.38051, "15405": "nan", "15410": "nan", "15415": "nan", "15420": "nan", "15425": "nan", "15430": "nan", "15435": "nan", "15440": "nan", "15445": "nan", "15450": "nan", "15455": "nan", "15460": "nan", "15465": "nan", "15470": "nan", "15475": "nan", "15480": "nan", "15485": "nan", "15490": "nan", "15495": "nan", "15500": 3.38175, "15505": "nan", "15510": "nan", "15515": "nan", "15520": "nan", "15525": "nan", "15530": "nan", "15535": "nan", "15540": "nan", "15545": "nan", "15550": "nan", "15555": "nan", "15560": "nan", "15565": "nan", "15570": "nan", "15575": "nan", "15580": "nan", "15585": "nan", "15590": "nan", "15595": "nan", "15600": 3.38113, "15605": "nan", "15610": "nan", "15615": "nan", "15620": "nan", "15625": "nan", "15630": "nan", "15635": "nan", "15640": "nan", "15645": "nan", "15650": "nan", "15655": "nan", "15660": "nan", "15665": "nan", "15670": "nan", "15675": "nan", "15680": "nan", "15685": "nan", "15690": "nan", "15695": "nan", "15700": 3.38052, "15705": "nan", "15710": "nan", "15715": "nan", "15720": "nan", "15725": "nan", "15730": "nan", "15735": "nan", "15740": "nan", "15745": "nan", "15750": "nan", "15755": "nan", "15760": "nan", "15765": "nan", "15770": "nan", "15775": "nan", "15780": "nan", "15785": "nan", "15790": "nan", "15795": "nan", "15800": 3.38063, "15805": "nan", "15810": "nan", "15815": "nan", "15820": "nan", "15825": "nan", "15830": "nan", "15835": "nan", "15840": "nan", "15845": "nan", "15850": "nan", "15855": "nan", "15860": "nan", "15865": "nan", "15870": "nan", "15875": "nan", "15880": "nan", "15885": "nan", "15890": "nan", "15895": "nan", "15900": 3.38049, "15905": "nan", "15910": "nan", "15915": "nan", "15920": "nan", "15925": "nan", "15930": "nan", "15935": "nan", "15940": "nan", "15945": "nan", "15950": "nan", "15955": "nan", "15960": "nan", "15965": "nan", "15970": "nan", "15975": "nan", "15980": "nan", "15985": "nan", "15990": "nan", "15995": "nan", "16000": 3.38045, "16005": "nan", "16010": "nan", "16015": "nan", "16020": "nan", "16025": "nan", "16030": "nan", "16035": "nan", "16040": "nan", "16045": "nan", "16050": "nan", "16055": "nan", "16060": "nan", "16065": "nan", "16070": "nan", "16075": "nan", "16080": "nan", "16085": "nan", "16090": "nan", "16095": "nan", "16100": 3.38146, "16105": "nan", "16110": "nan", "16115": "nan", "16120": "nan", "16125": "nan", "16130": "nan", "16135": "nan", "16140": "nan", "16145": "nan", "16150": "nan", "16155": "nan", "16160": "nan", "16165": "nan", "16170": "nan", "16175": "nan", "16180": "nan", "16185": "nan", "16190": "nan", "16195": "nan", "16200": 3.38274, "16205": "nan", "16210": "nan", "16215": "nan", "16220": "nan", "16225": "nan", "16230": "nan", "16235": "nan", "16240": "nan", "16245": "nan", "16250": "nan", "16255": "nan", "16260": "nan", "16265": "nan", "16270": "nan", "16275": "nan", "16280": "nan", "16285": "nan", "16290": "nan", "16295": "nan", "16300": 3.38257, "16305": "nan", "16310": "nan", "16315": "nan", "16320": "nan", "16325": "nan", "16330": "nan", "16335": "nan", "16340": "nan", "16345": "nan", "16350": "nan", "16355": "nan", "16360": "nan", "16365": "nan", "16370": "nan", "16375": "nan", "16380": "nan", "16385": "nan", "16390": "nan", "16395": "nan", "16400": 3.38268, "16405": "nan", "16410": "nan", "16415": "nan", "16420": "nan", "16425": "nan", "16430": "nan", "16435": "nan", "16440": "nan", "16445": "nan", "16450": "nan", "16455": "nan", "16460": "nan", "16465": "nan", "16470": "nan", "16475": "nan", "16480": "nan", "16485": "nan", "16490": "nan", "16495": "nan", "16500": 3.38203, "16505": "nan", "16510": "nan", "16515": "nan", "16520": "nan", "16525": "nan", "16530": "nan", "16535": "nan", "16540": "nan", "16545": "nan", "16550": "nan", "16555": "nan", "16560": "nan", "16565": "nan", "16570": "nan", "16575": "nan", "16580": "nan", "16585": "nan", "16590": "nan", "16595": "nan", "16600": 3.38267, "16605": "nan", "16610": "nan", "16615": "nan", "16620": "nan", "16625": "nan", "16630": "nan", "16635": "nan", "16640": "nan", "16645": "nan", "16650": "nan", "16655": "nan", "16660": "nan", "16665": "nan", "16670": "nan", "16675": "nan", "16680": "nan", "16685": "nan", "16690": "nan", "16695": "nan", "16700": 3.38191, "16705": "nan", "16710": "nan", "16715": "nan", "16720": "nan", "16725": "nan", "16730": "nan", "16735": "nan", "16740": "nan", "16745": "nan", "16750": "nan", "16755": "nan", "16760": "nan", "16765": "nan", "16770": "nan", "16775": "nan", "16780": "nan", "16785": "nan", "16790": "nan", "16795": "nan", "16800": 3.38258, "16805": "nan", "16810": "nan", "16815": "nan", "16820": "nan", "16825": "nan", "16830": "nan", "16835": "nan", "16840": "nan", "16845": "nan", "16850": "nan", "16855": "nan", "16860": "nan", "16865": "nan", "16870": "nan", "16875": "nan", "16880": "nan", "16885": "nan", "16890": "nan", "16895": "nan", "16900": 3.3808, "16905": "nan", "16910": "nan", "16915": "nan", "16920": "nan", "16925": "nan", "16930": "nan", "16935": "nan", "16940": "nan", "16945": "nan", "16950": "nan", "16955": "nan", "16960": "nan", "16965": "nan", "16970": "nan", "16975": "nan", "16980": "nan", "16985": "nan", "16990": "nan", "16995": "nan", "17000": 3.38134, "17005": "nan", "17010": "nan", "17015": "nan", "17020": "nan", "17025": "nan", "17030": "nan", "17035": "nan", "17040": "nan", "17045": "nan", "17050": "nan", "17055": "nan", "17060": "nan", "17065": "nan", "17070": "nan", "17075": "nan", "17080": "nan", "17085": "nan", "17090": "nan", "17095": "nan", "17100": 3.38146, "17105": "nan", "17110": "nan", "17115": "nan", "17120": "nan", "17125": "nan", "17130": "nan", "17135": "nan", "17140": "nan", "17145": "nan", "17150": "nan", "17155": "nan", "17160": "nan", "17165": "nan", "17170": "nan", "17175": "nan", "17180": "nan", "17185": "nan", "17190": "nan", "17195": "nan", "17200": 3.38208, "17205": "nan", "17210": "nan", "17215": "nan", "17220": "nan", "17225": "nan", "17230": "nan", "17235": "nan", "17240": "nan", "17245": "nan", "17250": "nan", "17255": "nan", "17260": "nan", "17265": "nan", "17270": "nan", "17275": "nan", "17280": "nan", "17285": "nan", "17290": "nan", "17295": "nan", "17300": 3.38051, "17305": "nan", "17310": "nan", "17315": "nan", "17320": "nan", "17325": "nan", "17330": "nan", "17335": "nan", "17340": "nan", "17345": "nan", "17350": "nan", "17355": "nan", "17360": "nan", "17365": "nan", "17370": "nan", "17375": "nan", "17380": "nan", "17385": "nan", "17390": "nan", "17395": "nan", "17400": 3.38055, "17405": "nan", "17410": "nan", "17415": "nan", "17420": "nan", "17425": "nan", "17430": "nan", "17435": "nan", "17440": "nan", "17445": "nan", "17450": "nan", "17455": "nan", "17460": "nan", "17465": "nan", "17470": "nan", "17475": "nan", "17480": "nan", "17485": "nan", "17490": "nan", "17495": "nan", "17500": 3.38162, "17505": "nan", "17510": "nan", "17515": "nan", "17520": "nan", "17525": "nan", "17530": "nan", "17535": "nan", "17540": "nan", "17545": "nan", "17550": "nan", "17555": "nan", "17560": "nan", "17565": "nan", "17570": "nan", "17575": "nan", "17580": "nan", "17585": "nan", "17590": "nan", "17595": "nan", "17600": 3.38031, "17605": "nan", "17610": "nan", "17615": "nan", "17620": "nan", "17625": "nan", "17630": "nan", "17635": "nan", "17640": "nan", "17645": "nan", "17650": "nan", "17655": "nan", "17660": "nan", "17665": "nan", "17670": "nan", "17675": "nan", "17680": "nan", "17685": "nan", "17690": "nan", "17695": "nan", "17700": 3.37945, "17705": "nan", "17710": "nan", "17715": "nan", "17720": "nan", "17725": "nan", "17730": "nan", "17735": "nan", "17740": "nan", "17745": "nan", "17750": "nan", "17755": "nan", "17760": "nan", "17765": "nan", "17770": "nan", "17775": "nan", "17780": "nan", "17785": "nan", "17790": "nan", "17795": "nan", "17800": 3.37992, "17805": "nan", "17810": "nan", "17815": "nan", "17820": "nan", "17825": "nan", "17830": "nan", "17835": "nan", "17840": "nan", "17845": "nan", "17850": "nan", "17855": "nan", "17860": "nan", "17865": "nan", "17870": "nan", "17875": "nan", "17880": "nan", "17885": "nan", "17890": "nan", "17895": "nan", "17900": 3.38059, "17905": "nan", "17910": "nan", "17915": "nan", "17920": "nan", "17925": "nan", "17930": "nan", "17935": "nan", "17940": "nan", "17945": "nan", "17950": "nan", "17955": "nan", "17960": "nan", "17965": "nan", "17970": "nan", "17975": "nan", "17980": "nan", "17985": "nan", "17990": "nan", "17995": "nan", "18000": 3.37947, "18005": "nan", "18010": "nan", "18015": "nan", "18020": "nan", "18025": "nan", "18030": "nan", "18035": "nan", "18040": "nan", "18045": "nan", "18050": "nan", "18055": "nan", "18060": "nan", "18065": "nan", "18070": "nan", "18075": "nan", "18080": "nan", "18085": "nan", "18090": "nan", "18095": "nan", "18100": 3.37901, "18105": "nan", "18110": "nan", "18115": "nan", "18120": "nan", "18125": "nan", "18130": "nan", "18135": "nan", "18140": "nan", "18145": "nan", "18150": "nan", "18155": "nan", "18160": "nan", "18165": "nan", "18170": "nan", "18175": "nan", "18180": "nan", "18185": "nan", "18190": "nan", "18195": "nan", "18200": 3.38053, "18205": "nan", "18210": "nan", "18215": "nan", "18220": "nan", "18225": "nan", "18230": "nan", "18235": "nan", "18240": "nan", "18245": "nan", "18250": "nan", "18255": "nan", "18260": "nan", "18265": "nan", "18270": "nan", "18275": "nan", "18280": "nan", "18285": "nan", "18290": "nan", "18295": "nan", "18300": 3.37967, "18305": "nan", "18310": "nan", "18315": "nan", "18320": "nan", "18325": "nan", "18330": "nan", "18335": "nan", "18340": "nan", "18345": "nan", "18350": "nan", "18355": "nan", "18360": "nan", "18365": "nan", "18370": "nan", "18375": "nan", "18380": "nan", "18385": "nan", "18390": "nan", "18395": "nan", "18400": 3.37975, "18405": "nan", "18410": "nan", "18415": "nan", "18420": "nan", "18425": "nan", "18430": "nan", "18435": "nan", "18440": "nan", "18445": "nan", "18450": "nan", "18455": "nan", "18460": "nan", "18465": "nan", "18470": "nan", "18475": "nan", "18480": "nan", "18485": "nan", "18490": "nan", "18495": "nan", "18500": 3.37932, "18505": "nan", "18510": "nan", "18515": "nan", "18520": "nan", "18525": "nan", "18530": "nan", "18535": "nan", "18540": "nan", "18545": "nan", "18550": "nan", "18555": "nan", "18560": "nan", "18565": "nan", "18570": "nan", "18575": "nan", "18580": "nan", "18585": "nan", "18590": "nan", "18595": "nan", "18600": 3.38011, "18605": "nan", "18610": "nan", "18615": "nan", "18620": "nan", "18625": "nan", "18630": "nan", "18635": "nan", "18640": "nan", "18645": "nan", "18650": "nan", "18655": "nan", "18660": "nan", "18665": "nan", "18670": "nan", "18675": "nan", "18680": "nan", "18685": "nan", "18690": "nan", "18695": "nan", "18700": 3.37952, "18705": "nan", "18710": "nan", "18715": "nan", "18720": "nan", "18725": "nan", "18730": "nan", "18735": "nan", "18740": "nan", "18745": "nan", "18750": "nan", "18755": "nan", "18760": "nan", "18765": "nan", "18770": "nan", "18775": "nan", "18780": "nan", "18785": "nan", "18790": "nan", "18795": "nan", "18800": 3.37956, "18805": "nan", "18810": "nan", "18815": "nan", "18820": "nan", "18825": "nan", "18830": "nan", "18835": "nan", "18840": "nan", "18845": "nan", "18850": "nan", "18855": "nan", "18860": "nan", "18865": "nan", "18870": "nan", "18875": "nan", "18880": "nan", "18885": "nan", "18890": "nan", "18895": "nan", "18900": 3.38058, "18905": "nan", "18910": "nan", "18915": "nan", "18920": "nan", "18925": "nan", "18930": "nan", "18935": "nan", "18940": "nan", "18945": "nan", "18950": "nan", "18955": "nan", "18960": "nan", "18965": "nan", "18970": "nan", "18975": "nan", "18980": "nan", "18985": "nan", "18990": "nan", "18995": "nan", "19000": 3.38, "19005": "nan", "19010": "nan", "19015": "nan", "19020": "nan", "19025": "nan", "19030": "nan", "19035": "nan", "19040": "nan", "19045": "nan", "19050": "nan", "19055": "nan", "19060": "nan", "19065": "nan", "19070": "nan", "19075": "nan", "19080": "nan", "19085": "nan", "19090": "nan", "19095": "nan", "19100": 3.38016, "19105": "nan", "19110": "nan", "19115": "nan", "19120": "nan", "19125": "nan", "19130": "nan", "19135": "nan", "19140": "nan", "19145": "nan", "19150": "nan", "19155": "nan", "19160": "nan", "19165": "nan", "19170": "nan", "19175": "nan", "19180": "nan", "19185": "nan", "19190": "nan", "19195": "nan", "19200": 3.74687, "19205": "nan", "19210": "nan", "19215": "nan", "19220": "nan", "19225": "nan", "19230": "nan", "19235": "nan", "19240": "nan", "19245": "nan", "19250": "nan", "19255": "nan", "19260": "nan", "19265": "nan", "19270": "nan", "19275": "nan", "19280": "nan", "19285": "nan", "19290": "nan", "19295": "nan", "19300": 3.38815, "19305": "nan", "19310": "nan", "19315": "nan", "19320": "nan", "19325": "nan", "19330": "nan", "19335": "nan", "19340": "nan", "19345": "nan", "19350": "nan", "19355": "nan", "19360": "nan", "19365": "nan", "19370": "nan", "19375": "nan", "19380": "nan", "19385": "nan", "19390": "nan", "19395": "nan", "19400": 3.38874, "19405": "nan", "19410": "nan", "19415": "nan", "19420": "nan", "19425": "nan", "19430": "nan", "19435": "nan", "19440": "nan", "19445": "nan", "19450": "nan", "19455": "nan", "19460": "nan", "19465": "nan", "19470": "nan", "19475": "nan", "19480": "nan", "19485": "nan", "19490": "nan", "19495": "nan", "19500": 3.38784, "19505": "nan", "19510": "nan", "19515": "nan", "19520": "nan", "19525": "nan", "19530": "nan", "19535": "nan", "19540": "nan", "19545": "nan", "19550": "nan", "19555": "nan", "19560": "nan", "19565": "nan", "19570": "nan", "19575": "nan", "19580": "nan", "19585": "nan", "19590": "nan", "19595": "nan", "19600": 3.38837, "19605": "nan", "19610": "nan", "19615": "nan", "19620": "nan", "19625": "nan", "19630": "nan", "19635": "nan", "19640": "nan", "19645": "nan", "19650": "nan", "19655": "nan", "19660": "nan", "19665": "nan", "19670": "nan", "19675": "nan", "19680": "nan", "19685": "nan", "19690": "nan", "19695": "nan", "19700": 3.38825, "19705": "nan", "19710": "nan", "19715": "nan", "19720": "nan", "19725": "nan", "19730": "nan", "19735": "nan", "19740": "nan", "19745": "nan", "19750": "nan", "19755": "nan", "19760": "nan", "19765": "nan", "19770": "nan", "19775": "nan", "19780": "nan", "19785": "nan", "19790": "nan", "19795": "nan", "19800": 3.38798, "19805": "nan", "19810": "nan", "19815": "nan", "19820": "nan", "19825": "nan", "19830": "nan", "19835": "nan", "19840": "nan", "19845": "nan", "19850": "nan", "19855": "nan", "19860": "nan", "19865": "nan", "19870": "nan", "19875": "nan", "19880": "nan", "19885": "nan", "19890": "nan", "19895": "nan", "19900": 3.38727, "19905": "nan", "19910": "nan", "19915": "nan", "19920": "nan", "19925": "nan", "19930": "nan", "19935": "nan", "19940": "nan", "19945": "nan", "19950": "nan", "19955": "nan", "19960": "nan", "19965": "nan", "19970": "nan", "19975": "nan", "19980": "nan", "19985": "nan", "19990": "nan", "19995": "nan", "20000": 3.38683, "20005": "nan", "20010": "nan", "20015": "nan", "20020": "nan", "20025": "nan", "20030": "nan", "20035": "nan", "20040": "nan", "20045": "nan", "20050": "nan", "20055": "nan", "20060": "nan", "20065": "nan", "20070": "nan", "20075": "nan", "20080": "nan", "20085": "nan", "20090": "nan", "20095": "nan", "20100": 3.38756, "20105": "nan", "20110": "nan", "20115": "nan", "20120": "nan", "20125": "nan", "20130": "nan", "20135": "nan", "20140": "nan", "20145": "nan", "20150": "nan", "20155": "nan", "20160": "nan", "20165": "nan", "20170": "nan", "20175": "nan", "20180": "nan", "20185": "nan", "20190": "nan", "20195": "nan", "20200": 3.38787, "20205": "nan", "20210": "nan", "20215": "nan", "20220": "nan", "20225": "nan", "20230": "nan", "20235": "nan", "20240": "nan", "20245": "nan", "20250": "nan", "20255": "nan", "20260": "nan", "20265": "nan", "20270": "nan", "20275": "nan", "20280": "nan", "20285": "nan", "20290": "nan", "20295": "nan", "20300": 3.38701, "20305": "nan", "20310": "nan", "20315": "nan", "20320": "nan", "20325": "nan", "20330": "nan", "20335": "nan", "20340": "nan", "20345": "nan", "20350": "nan", "20355": "nan", "20360": "nan", "20365": "nan", "20370": "nan", "20375": "nan", "20380": "nan", "20385": "nan", "20390": "nan", "20395": "nan", "20400": 3.38593, "20405": "nan", "20410": "nan", "20415": "nan", "20420": "nan", "20425": "nan", "20430": "nan", "20435": "nan", "20440": "nan", "20445": "nan", "20450": "nan", "20455": "nan", "20460": "nan", "20465": "nan", "20470": "nan", "20475": "nan", "20480": "nan", "20485": "nan", "20490": "nan", "20495": "nan", "20500": 3.38741, "20505": "nan", "20510": "nan", "20515": "nan", "20520": "nan", "20525": "nan", "20530": "nan", "20535": "nan", "20540": "nan", "20545": "nan", "20550": "nan", "20555": "nan", "20560": "nan", "20565": "nan", "20570": "nan", "20575": "nan", "20580": "nan", "20585": "nan", "20590": "nan", "20595": "nan", "20600": 3.3881, "20605": "nan", "20610": "nan", "20615": "nan", "20620": "nan", "20625": "nan", "20630": "nan", "20635": "nan", "20640": "nan", "20645": "nan", "20650": "nan", "20655": "nan", "20660": "nan", "20665": "nan", "20670": "nan", "20675": "nan", "20680": "nan", "20685": "nan", "20690": "nan", "20695": "nan", "20700": 3.38707, "20705": "nan", "20710": "nan", "20715": "nan", "20720": "nan", "20725": "nan", "20730": "nan", "20735": "nan", "20740": "nan", "20745": "nan", "20750": "nan", "20755": "nan", "20760": "nan", "20765": "nan", "20770": "nan", "20775": "nan", "20780": "nan", "20785": "nan", "20790": "nan", "20795": "nan", "20800": 3.38698, "20805": "nan", "20810": "nan", "20815": "nan", "20820": "nan", "20825": "nan", "20830": "nan", "20835": "nan", "20840": "nan", "20845": "nan", "20850": "nan", "20855": "nan", "20860": "nan", "20865": "nan", "20870": "nan", "20875": "nan", "20880": "nan", "20885": "nan", "20890": "nan", "20895": "nan", "20900": 3.38723, "20905": "nan", "20910": "nan", "20915": "nan", "20920": "nan", "20925": "nan", "20930": "nan", "20935": "nan", "20940": "nan", "20945": "nan", "20950": "nan", "20955": "nan", "20960": "nan", "20965": "nan", "20970": "nan", "20975": "nan", "20980": "nan", "20985": "nan", "20990": "nan", "20995": "nan", "21000": 3.38811, "21005": "nan", "21010": "nan", "21015": "nan", "21020": "nan", "21025": "nan", "21030": "nan", "21035": "nan", "21040": "nan", "21045": "nan", "21050": "nan", "21055": "nan", "21060": "nan", "21065": "nan", "21070": "nan", "21075": "nan", "21080": "nan", "21085": "nan", "21090": "nan", "21095": "nan", "21100": 3.38689, "21105": "nan", "21110": "nan", "21115": "nan", "21120": "nan", "21125": "nan", "21130": "nan", "21135": "nan", "21140": "nan", "21145": "nan", "21150": "nan", "21155": "nan", "21160": "nan", "21165": "nan", "21170": "nan", "21175": "nan", "21180": "nan", "21185": "nan", "21190": "nan", "21195": "nan", "21200": 3.389, "21205": "nan", "21210": "nan", "21215": "nan", "21220": "nan", "21225": "nan", "21230": "nan", "21235": "nan", "21240": "nan", "21245": "nan", "21250": "nan", "21255": "nan", "21260": "nan", "21265": "nan", "21270": "nan", "21275": "nan", "21280": "nan", "21285": "nan", "21290": "nan", "21295": "nan", "21300": 3.38746, "21305": "nan", "21310": "nan", "21315": "nan", "21320": "nan", "21325": "nan", "21330": "nan", "21335": "nan", "21340": "nan", "21345": "nan", "21350": "nan", "21355": "nan", "21360": "nan", "21365": "nan", "21370": "nan", "21375": "nan", "21380": "nan", "21385": "nan", "21390": "nan", "21395": "nan", "21400": 3.38763, "21405": "nan", "21410": "nan", "21415": "nan", "21420": "nan", "21425": "nan", "21430": "nan", "21435": "nan", "21440": "nan", "21445": "nan", "21450": "nan", "21455": "nan", "21460": "nan", "21465": "nan", "21470": "nan", "21475": "nan", "21480": "nan", "21485": "nan", "21490": "nan", "21495": "nan", "21500": 3.38875, "21505": "nan", "21510": "nan", "21515": "nan", "21520": "nan", "21525": "nan", "21530": "nan", "21535": "nan", "21540": "nan", "21545": "nan", "21550": "nan", "21555": "nan", "21560": "nan", "21565": "nan", "21570": "nan", "21575": "nan", "21580": "nan", "21585": "nan", "21590": "nan", "21595": "nan", "21600": 3.38725, "21605": "nan", "21610": "nan", "21615": "nan", "21620": "nan", "21625": "nan", "21630": "nan", "21635": "nan", "21640": "nan", "21645": "nan", "21650": "nan", "21655": "nan", "21660": "nan", "21665": "nan", "21670": "nan", "21675": "nan", "21680": "nan", "21685": "nan", "21690": "nan", "21695": "nan", "21700": 3.38854, "21705": "nan", "21710": "nan", "21715": "nan", "21720": "nan", "21725": "nan", "21730": "nan", "21735": "nan", "21740": "nan", "21745": "nan", "21750": "nan", "21755": "nan", "21760": "nan", "21765": "nan", "21770": "nan", "21775": "nan", "21780": "nan", "21785": "nan", "21790": "nan", "21795": "nan", "21800": 3.38736, "21805": "nan", "21810": "nan", "21815": "nan", "21820": "nan", "21825": "nan", "21830": "nan", "21835": "nan", "21840": "nan", "21845": "nan", "21850": "nan", "21855": "nan", "21860": "nan", "21865": "nan", "21870": "nan", "21875": "nan", "21880": "nan", "21885": "nan", "21890": "nan", "21895": "nan", "21900": 3.38622, "21905": "nan", "21910": "nan", "21915": "nan", "21920": "nan", "21925": "nan", "21930": "nan", "21935": "nan", "21940": "nan", "21945": "nan", "21950": "nan", "21955": "nan", "21960": "nan", "21965": "nan", "21970": "nan", "21975": "nan", "21980": "nan", "21985": "nan", "21990": "nan", "21995": "nan", "22000": 3.38861, "22005": "nan", "22010": "nan", "22015": "nan", "22020": "nan", "22025": "nan", "22030": "nan", "22035": "nan", "22040": "nan", "22045": "nan", "22050": "nan", "22055": "nan", "22060": "nan", "22065": "nan", "22070": "nan", "22075": "nan", "22080": "nan", "22085": "nan", "22090": "nan", "22095": "nan", "22100": 3.38745, "22105": "nan", "22110": "nan", "22115": "nan", "22120": "nan", "22125": "nan", "22130": "nan", "22135": "nan", "22140": "nan", "22145": "nan", "22150": "nan", "22155": "nan", "22160": "nan", "22165": "nan", "22170": "nan", "22175": "nan", "22180": "nan", "22185": "nan", "22190": "nan", "22195": "nan", "22200": 3.38844, "22205": "nan", "22210": "nan", "22215": "nan", "22220": "nan", "22225": "nan", "22230": "nan", "22235": "nan", "22240": "nan", "22245": "nan", "22250": "nan", "22255": "nan", "22260": "nan", "22265": "nan", "22270": "nan", "22275": "nan", "22280": "nan", "22285": "nan", "22290": "nan", "22295": "nan", "22300": 3.38783, "22305": "nan", "22310": "nan", "22315": "nan", "22320": "nan", "22325": "nan", "22330": "nan", "22335": "nan", "22340": "nan", "22345": "nan", "22350": "nan", "22355": "nan", "22360": "nan", "22365": "nan", "22370": "nan", "22375": "nan", "22380": "nan", "22385": "nan", "22390": "nan", "22395": "nan", "22400": 3.38808, "22405": "nan", "22410": "nan", "22415": "nan", "22420": "nan", "22425": "nan", "22430": "nan", "22435": "nan", "22440": "nan", "22445": "nan", "22450": "nan", "22455": "nan", "22460": "nan", "22465": "nan", "22470": "nan", "22475": "nan", "22480": "nan", "22485": "nan", "22490": "nan", "22495": "nan", "22500": 3.38778, "22505": "nan", "22510": "nan", "22515": "nan", "22520": "nan", "22525": "nan", "22530": "nan", "22535": "nan", "22540": "nan", "22545": "nan", "22550": "nan", "22555": "nan", "22560": "nan", "22565": "nan", "22570": "nan", "22575": "nan", "22580": "nan", "22585": "nan", "22590": "nan", "22595": "nan", "22600": 3.38862, "22605": "nan", "22610": "nan", "22615": "nan", "22620": "nan", "22625": "nan", "22630": "nan", "22635": "nan", "22640": "nan", "22645": "nan", "22650": "nan", "22655": "nan", "22660": "nan", "22665": "nan", "22670": "nan", "22675": "nan", "22680": "nan", "22685": "nan", "22690": "nan", "22695": "nan", "22700": 3.38769, "22705": "nan", "22710": "nan", "22715": "nan", "22720": "nan", "22725": "nan", "22730": "nan", "22735": "nan", "22740": "nan", "22745": "nan", "22750": "nan", "22755": "nan", "22760": "nan", "22765": "nan", "22770": "nan", "22775": "nan", "22780": "nan", "22785": "nan", "22790": "nan", "22795": "nan", "22800": 3.38736, "22805": "nan", "22810": "nan", "22815": "nan", "22820": "nan", "22825": "nan", "22830": "nan", "22835": "nan", "22840": "nan", "22845": "nan", "22850": "nan", "22855": "nan", "22860": "nan", "22865": "nan", "22870": "nan", "22875": "nan", "22880": "nan", "22885": "nan", "22890": "nan", "22895": "nan", "22900": 3.38889, "22905": "nan", "22910": "nan", "22915": "nan", "22920": "nan", "22925": "nan", "22930": "nan", "22935": "nan", "22940": "nan", "22945": "nan", "22950": "nan", "22955": "nan", "22960": "nan", "22965": "nan", "22970": "nan", "22975": "nan", "22980": "nan", "22985": "nan", "22990": "nan", "22995": "nan", "23000": 3.38779, "23005": "nan", "23010": "nan", "23015": "nan", "23020": "nan", "23025": "nan", "23030": "nan", "23035": "nan", "23040": "nan", "23045": "nan", "23050": "nan", "23055": "nan", "23060": "nan", "23065": "nan", "23070": "nan", "23075": "nan", "23080": "nan", "23085": "nan", "23090": "nan", "23095": "nan", "23100": 3.68837, "23105": "nan", "23110": "nan", "23115": "nan", "23120": "nan", "23125": "nan", "23130": "nan", "23135": "nan", "23140": "nan", "23145": "nan", "23150": "nan", "23155": "nan", "23160": "nan", "23165": "nan", "23170": "nan", "23175": "nan", "23180": "nan", "23185": "nan", "23190": "nan", "23195": "nan", "23200": 3.39442, "23205": "nan", "23210": "nan", "23215": "nan", "23220": "nan", "23225": "nan", "23230": "nan", "23235": "nan", "23240": "nan", "23245": "nan", "23250": "nan", "23255": "nan", "23260": "nan", "23265": "nan", "23270": "nan", "23275": "nan", "23280": "nan", "23285": "nan", "23290": "nan", "23295": "nan", "23300": 3.39474, "23305": "nan", "23310": "nan", "23315": "nan", "23320": "nan", "23325": "nan", "23330": "nan", "23335": "nan", "23340": "nan", "23345": "nan", "23350": "nan", "23355": "nan", "23360": "nan", "23365": "nan", "23370": "nan", "23375": "nan", "23380": "nan", "23385": "nan", "23390": "nan", "23395": "nan", "23400": 3.39385, "23405": "nan", "23410": "nan", "23415": "nan", "23420": "nan", "23425": "nan", "23430": "nan", "23435": "nan", "23440": "nan", "23445": "nan", "23450": "nan", "23455": "nan", "23460": "nan", "23465": "nan", "23470": "nan", "23475": "nan", "23480": "nan", "23485": "nan", "23490": "nan", "23495": "nan", "23500": 3.39396, "23505": "nan", "23510": "nan", "23515": "nan", "23520": "nan", "23525": "nan", "23530": "nan", "23535": "nan", "23540": "nan", "23545": "nan", "23550": "nan", "23555": "nan", "23560": "nan", "23565": "nan", "23570": "nan", "23575": "nan", "23580": "nan", "23585": "nan", "23590": "nan", "23595": "nan", "23600": 3.39339, "23605": "nan", "23610": "nan", "23615": "nan", "23620": "nan", "23625": "nan", "23630": "nan", "23635": "nan", "23640": "nan", "23645": "nan", "23650": "nan", "23655": "nan", "23660": "nan", "23665": "nan", "23670": "nan", "23675": "nan", "23680": "nan", "23685": "nan", "23690": "nan", "23695": "nan", "23700": 3.39286, "23705": "nan", "23710": "nan", "23715": "nan", "23720": "nan", "23725": "nan", "23730": "nan", "23735": "nan", "23740": "nan", "23745": "nan", "23750": "nan", "23755": "nan", "23760": "nan", "23765": "nan", "23770": "nan", "23775": "nan", "23780": "nan", "23785": "nan", "23790": "nan", "23795": "nan", "23800": 3.39212, "23805": "nan", "23810": "nan", "23815": "nan", "23820": "nan", "23825": "nan", "23830": "nan", "23835": "nan", "23840": "nan", "23845": "nan", "23850": "nan", "23855": "nan", "23860": "nan", "23865": "nan", "23870": "nan", "23875": "nan", "23880": "nan", "23885": "nan", "23890": "nan", "23895": "nan", "23900": 3.39167, "23905": "nan", "23910": "nan", "23915": "nan", "23920": "nan", "23925": "nan", "23930": "nan", "23935": "nan", "23940": "nan", "23945": "nan", "23950": "nan", "23955": "nan", "23960": "nan", "23965": "nan", "23970": "nan", "23975": "nan", "23980": "nan", "23985": "nan", "23990": "nan", "23995": "nan", "24000": 3.39341, "24005": "nan", "24010": "nan", "24015": "nan", "24020": "nan", "24025": "nan", "24030": "nan", "24035": "nan", "24040": "nan", "24045": "nan", "24050": "nan", "24055": "nan", "24060": "nan", "24065": "nan", "24070": "nan", "24075": "nan", "24080": "nan", "24085": "nan", "24090": "nan", "24095": "nan", "24100": 3.39349, "24105": "nan", "24110": "nan", "24115": "nan", "24120": "nan", "24125": "nan", "24130": "nan", "24135": "nan", "24140": "nan", "24145": "nan", "24150": "nan", "24155": "nan", "24160": "nan", "24165": "nan", "24170": "nan", "24175": "nan", "24180": "nan", "24185": "nan", "24190": "nan", "24195": "nan", "24200": 3.39421, "24205": "nan", "24210": "nan", "24215": "nan", "24220": "nan", "24225": "nan", "24230": "nan", "24235": "nan", "24240": "nan", "24245": "nan", "24250": "nan", "24255": "nan", "24260": "nan", "24265": "nan", "24270": "nan", "24275": "nan", "24280": "nan", "24285": "nan", "24290": "nan", "24295": "nan", "24300": 3.39535, "24305": "nan", "24310": "nan", "24315": "nan", "24320": "nan", "24325": "nan", "24330": "nan", "24335": "nan", "24340": "nan", "24345": "nan", "24350": "nan", "24355": "nan", "24360": "nan", "24365": "nan", "24370": "nan", "24375": "nan", "24380": "nan", "24385": "nan", "24390": "nan", "24395": "nan", "24400": 3.39352, "24405": "nan", "24410": "nan", "24415": "nan", "24420": "nan", "24425": "nan", "24430": "nan", "24435": "nan", "24440": "nan", "24445": "nan", "24450": "nan", "24455": "nan", "24460": "nan", "24465": "nan", "24470": "nan", "24475": "nan", "24480": "nan", "24485": "nan", "24490": "nan", "24495": "nan", "24500": 3.39516, "24505": "nan", "24510": "nan", "24515": "nan", "24520": "nan", "24525": "nan", "24530": "nan", "24535": "nan", "24540": "nan", "24545": "nan", "24550": "nan", "24555": "nan", "24560": "nan", "24565": "nan", "24570": "nan", "24575": "nan", "24580": "nan", "24585": "nan", "24590": "nan", "24595": "nan", "24600": 3.3956, "24605": "nan", "24610": "nan", "24615": "nan", "24620": "nan", "24625": "nan", "24630": "nan", "24635": "nan", "24640": "nan", "24645": "nan", "24650": "nan", "24655": "nan", "24660": "nan", "24665": "nan", "24670": "nan", "24675": "nan", "24680": "nan", "24685": "nan", "24690": "nan", "24695": "nan", "24700": 3.39407, "24705": "nan", "24710": "nan", "24715": "nan", "24720": "nan", "24725": "nan", "24730": "nan", "24735": "nan", "24740": "nan", "24745": "nan", "24750": "nan", "24755": "nan", "24760": "nan", "24765": "nan", "24770": "nan", "24775": "nan", "24780": "nan", "24785": "nan", "24790": "nan", "24795": "nan", "24800": 3.39561, "24805": "nan", "24810": "nan", "24815": "nan", "24820": "nan", "24825": "nan", "24830": "nan", "24835": "nan", "24840": "nan", "24845": "nan", "24850": "nan", "24855": "nan", "24860": "nan", "24865": "nan", "24870": "nan", "24875": "nan", "24880": "nan", "24885": "nan", "24890": "nan", "24895": "nan", "24900": 3.39546, "24905": "nan", "24910": "nan", "24915": "nan", "24920": "nan", "24925": "nan", "24930": "nan", "24935": "nan", "24940": "nan", "24945": "nan", "24950": "nan", "24955": "nan", "24960": "nan", "24965": "nan", "24970": "nan", "24975": "nan", "24980": "nan", "24985": "nan", "24990": "nan", "24995": "nan", "25000": 3.39476, "25005": "nan", "25010": "nan", "25015": "nan", "25020": "nan", "25025": "nan", "25030": "nan", "25035": "nan", "25040": "nan", "25045": "nan", "25050": "nan", "25055": "nan", "25060": "nan", "25065": "nan", "25070": "nan", "25075": "nan", "25080": "nan", "25085": "nan", "25090": "nan", "25095": "nan", "25100": 3.39285, "25105": "nan", "25110": "nan", "25115": "nan", "25120": "nan", "25125": "nan", "25130": "nan", "25135": "nan", "25140": "nan", "25145": "nan", "25150": "nan", "25155": "nan", "25160": "nan", "25165": "nan", "25170": "nan", "25175": "nan", "25180": "nan", "25185": "nan", "25190": "nan", "25195": "nan", "25200": 3.39376, "25205": "nan", "25210": "nan", "25215": "nan", "25220": "nan", "25225": "nan", "25230": "nan", "25235": "nan", "25240": "nan", "25245": "nan", "25250": "nan", "25255": "nan", "25260": "nan", "25265": "nan", "25270": "nan", "25275": "nan", "25280": "nan", "25285": "nan", "25290": "nan", "25295": "nan", "25300": 3.39506, "25305": "nan", "25310": "nan", "25315": "nan", "25320": "nan", "25325": "nan", "25330": "nan", "25335": "nan", "25340": "nan", "25345": "nan", "25350": "nan", "25355": "nan", "25360": "nan", "25365": "nan", "25370": "nan", "25375": "nan", "25380": "nan", "25385": "nan", "25390": "nan", "25395": "nan", "25400": 3.39386, "25405": "nan", "25410": "nan", "25415": "nan", "25420": "nan", "25425": "nan", "25430": "nan", "25435": "nan", "25440": "nan", "25445": "nan", "25450": "nan", "25455": "nan", "25460": "nan", "25465": "nan", "25470": "nan", "25475": "nan", "25480": "nan", "25485": "nan", "25490": "nan", "25495": "nan", "25500": 3.39369, "25505": "nan", "25510": "nan", "25515": "nan", "25520": "nan", "25525": "nan", "25530": "nan", "25535": "nan", "25540": "nan", "25545": "nan", "25550": "nan", "25555": "nan", "25560": "nan", "25565": "nan", "25570": "nan", "25575": "nan", "25580": "nan", "25585": "nan", "25590": "nan", "25595": "nan", "25600": 3.39474, "25605": "nan", "25610": "nan", "25615": "nan", "25620": "nan", "25625": "nan", "25630": "nan", "25635": "nan", "25640": "nan", "25645": "nan", "25650": "nan", "25655": "nan", "25660": "nan", "25665": "nan", "25670": "nan", "25675": "nan", "25680": "nan", "25685": "nan", "25690": "nan", "25695": "nan", "25700": 3.39492, "25705": "nan", "25710": "nan", "25715": "nan", "25720": "nan", "25725": "nan", "25730": "nan", "25735": "nan", "25740": "nan", "25745": "nan", "25750": "nan", "25755": "nan", "25760": "nan", "25765": "nan", "25770": "nan", "25775": "nan", "25780": "nan", "25785": "nan", "25790": "nan", "25795": "nan", "25800": 3.39526, "25805": "nan", "25810": "nan", "25815": "nan", "25820": "nan", "25825": "nan", "25830": "nan", "25835": "nan", "25840": "nan", "25845": "nan", "25850": "nan", "25855": "nan", "25860": "nan", "25865": "nan", "25870": "nan", "25875": "nan", "25880": "nan", "25885": "nan", "25890": "nan", "25895": "nan", "25900": 3.39426, "25905": "nan", "25910": "nan", "25915": "nan", "25920": "nan", "25925": "nan", "25930": "nan", "25935": "nan", "25940": "nan", "25945": "nan", "25950": "nan", "25955": "nan", "25960": "nan", "25965": "nan", "25970": "nan", "25975": "nan", "25980": "nan", "25985": "nan", "25990": "nan", "25995": "nan", "26000": 3.39553, "26005": "nan", "26010": "nan", "26015": "nan", "26020": "nan", "26025": "nan", "26030": "nan", "26035": "nan", "26040": "nan", "26045": "nan", "26050": "nan", "26055": "nan", "26060": "nan", "26065": "nan", "26070": "nan", "26075": "nan", "26080": "nan", "26085": "nan", "26090": "nan", "26095": "nan", "26100": 3.39462, "26105": "nan", "26110": "nan", "26115": "nan", "26120": "nan", "26125": "nan", "26130": "nan", "26135": "nan", "26140": "nan", "26145": "nan", "26150": "nan", "26155": "nan", "26160": "nan", "26165": "nan", "26170": "nan", "26175": "nan", "26180": "nan", "26185": "nan", "26190": "nan", "26195": "nan", "26200": 3.39526, "26205": "nan", "26210": "nan", "26215": "nan", "26220": "nan", "26225": "nan", "26230": "nan", "26235": "nan", "26240": "nan", "26245": "nan", "26250": "nan", "26255": "nan", "26260": "nan", "26265": "nan", "26270": "nan", "26275": "nan", "26280": "nan", "26285": "nan", "26290": "nan", "26295": "nan", "26300": 3.39408, "26305": "nan", "26310": "nan", "26315": "nan", "26320": "nan", "26325": "nan", "26330": "nan", "26335": "nan", "26340": "nan", "26345": "nan", "26350": "nan", "26355": "nan", "26360": "nan", "26365": "nan", "26370": "nan", "26375": "nan", "26380": "nan", "26385": "nan", "26390": "nan", "26395": "nan", "26400": 3.39413, "26405": "nan", "26410": "nan", "26415": "nan", "26420": "nan", "26425": "nan", "26430": "nan", "26435": "nan", "26440": "nan", "26445": "nan", "26450": "nan", "26455": "nan", "26460": "nan", "26465": "nan", "26470": "nan", "26475": "nan", "26480": "nan", "26485": "nan", "26490": "nan", "26495": "nan", "26500": 3.39246, "26505": "nan", "26510": "nan", "26515": "nan", "26520": "nan", "26525": "nan", "26530": "nan", "26535": "nan", "26540": "nan", "26545": "nan", "26550": "nan", "26555": "nan", "26560": "nan", "26565": "nan", "26570": "nan", "26575": "nan", "26580": "nan", "26585": "nan", "26590": "nan", "26595": "nan", "26600": 3.39245, "26605": "nan", "26610": "nan", "26615": "nan", "26620": "nan", "26625": "nan", "26630": "nan", "26635": "nan", "26640": "nan", "26645": "nan", "26650": "nan", "26655": "nan", "26660": "nan", "26665": "nan", "26670": "nan", "26675": "nan", "26680": "nan", "26685": "nan", "26690": "nan", "26695": "nan", "26700": 3.39363, "26705": "nan", "26710": "nan", "26715": "nan", "26720": "nan", "26725": "nan", "26730": "nan", "26735": "nan", "26740": "nan", "26745": "nan", "26750": "nan", "26755": "nan", "26760": "nan", "26765": "nan", "26770": "nan", "26775": "nan", "26780": "nan", "26785": "nan", "26790": "nan", "26795": "nan", "26800": 3.39348, "26805": "nan", "26810": "nan", "26815": "nan", "26820": "nan", "26825": "nan", "26830": "nan", "26835": "nan", "26840": "nan", "26845": "nan", "26850": "nan", "26855": "nan", "26860": "nan", "26865": "nan", "26870": "nan", "26875": "nan", "26880": "nan", "26885": "nan", "26890": "nan", "26895": "nan", "26900": 3.3936, "26905": "nan", "26910": "nan", "26915": "nan", "26920": "nan", "26925": "nan", "26930": "nan", "26935": "nan", "26940": "nan", "26945": "nan", "26950": "nan", "26955": "nan", "26960": "nan", "26965": "nan", "26970": "nan", "26975": "nan", "26980": "nan", "26985": "nan", "26990": "nan", "26995": "nan", "27000": 3.55955, "27005": "nan", "27010": "nan", "27015": "nan", "27020": "nan", "27025": "nan", "27030": "nan", "27035": "nan", "27040": "nan", "27045": "nan", "27050": "nan", "27055": "nan", "27060": "nan", "27065": "nan", "27070": "nan", "27075": "nan", "27080": "nan", "27085": "nan", "27090": "nan", "27095": "nan", "27100": 3.38383, "27105": "nan", "27110": "nan", "27115": "nan", "27120": "nan", "27125": "nan", "27130": "nan", "27135": "nan", "27140": "nan", "27145": "nan", "27150": "nan", "27155": "nan", "27160": "nan", "27165": "nan", "27170": "nan", "27175": "nan", "27180": "nan", "27185": "nan", "27190": "nan", "27195": "nan", "27200": 3.38406, "27205": "nan", "27210": "nan", "27215": "nan", "27220": "nan", "27225": "nan", "27230": "nan", "27235": "nan", "27240": "nan", "27245": "nan", "27250": "nan", "27255": "nan", "27260": "nan", "27265": "nan", "27270": "nan", "27275": "nan", "27280": "nan", "27285": "nan", "27290": "nan", "27295": "nan", "27300": 3.38406, "27305": "nan", "27310": "nan", "27315": "nan", "27320": "nan", "27325": "nan", "27330": "nan", "27335": "nan", "27340": "nan", "27345": "nan", "27350": "nan", "27355": "nan", "27360": "nan", "27365": "nan", "27370": "nan", "27375": "nan", "27380": "nan", "27385": "nan", "27390": "nan", "27395": "nan", "27400": 3.38331, "27405": "nan", "27410": "nan", "27415": "nan", "27420": "nan", "27425": "nan", "27430": "nan", "27435": "nan", "27440": "nan", "27445": "nan", "27450": "nan", "27455": "nan", "27460": "nan", "27465": "nan", "27470": "nan", "27475": "nan", "27480": "nan", "27485": "nan", "27490": "nan", "27495": "nan", "27500": 3.38359, "27505": "nan", "27510": "nan", "27515": "nan", "27520": "nan", "27525": "nan", "27530": "nan", "27535": "nan", "27540": "nan", "27545": "nan", "27550": "nan", "27555": "nan", "27560": "nan", "27565": "nan", "27570": "nan", "27575": "nan", "27580": "nan", "27585": "nan", "27590": "nan", "27595": "nan", "27600": 3.38383, "27605": "nan", "27610": "nan", "27615": "nan", "27620": "nan", "27625": "nan", "27630": "nan", "27635": "nan", "27640": "nan", "27645": "nan", "27650": "nan", "27655": "nan", "27660": "nan", "27665": "nan", "27670": "nan", "27675": "nan", "27680": "nan", "27685": "nan", "27690": "nan", "27695": "nan", "27700": 3.383, "27705": "nan", "27710": "nan", "27715": "nan", "27720": "nan", "27725": "nan", "27730": "nan", "27735": "nan", "27740": "nan", "27745": "nan", "27750": "nan", "27755": "nan", "27760": "nan", "27765": "nan", "27770": "nan", "27775": "nan", "27780": "nan", "27785": "nan", "27790": "nan", "27795": "nan", "27800": 3.38338, "27805": "nan", "27810": "nan", "27815": "nan", "27820": "nan", "27825": "nan", "27830": "nan", "27835": "nan", "27840": "nan", "27845": "nan", "27850": "nan", "27855": "nan", "27860": "nan", "27865": "nan", "27870": "nan", "27875": "nan", "27880": "nan", "27885": "nan", "27890": "nan", "27895": "nan", "27900": 3.38414, "27905": "nan", "27910": "nan", "27915": "nan", "27920": "nan", "27925": "nan", "27930": "nan", "27935": "nan", "27940": "nan", "27945": "nan", "27950": "nan", "27955": "nan", "27960": "nan", "27965": "nan", "27970": "nan", "27975": "nan", "27980": "nan", "27985": "nan", "27990": "nan", "27995": "nan", "28000": 3.38291, "28005": "nan", "28010": "nan", "28015": "nan", "28020": "nan", "28025": "nan", "28030": "nan", "28035": "nan", "28040": "nan", "28045": "nan", "28050": "nan", "28055": "nan", "28060": "nan", "28065": "nan", "28070": "nan", "28075": "nan", "28080": "nan", "28085": "nan", "28090": "nan", "28095": "nan", "28100": 3.38464, "28105": "nan", "28110": "nan", "28115": "nan", "28120": "nan", "28125": "nan", "28130": "nan", "28135": "nan", "28140": "nan", "28145": "nan", "28150": "nan", "28155": "nan", "28160": "nan", "28165": "nan", "28170": "nan", "28175": "nan", "28180": "nan", "28185": "nan", "28190": "nan", "28195": "nan", "28200": 3.38362, "28205": "nan", "28210": "nan", "28215": "nan", "28220": "nan", "28225": "nan", "28230": "nan", "28235": "nan", "28240": "nan", "28245": "nan", "28250": "nan", "28255": "nan", "28260": "nan", "28265": "nan", "28270": "nan", "28275": "nan", "28280": "nan", "28285": "nan", "28290": "nan", "28295": "nan", "28300": 3.38465, "28305": "nan", "28310": "nan", "28315": "nan", "28320": "nan", "28325": "nan", "28330": "nan", "28335": "nan", "28340": "nan", "28345": "nan", "28350": "nan", "28355": "nan", "28360": "nan", "28365": "nan", "28370": "nan", "28375": "nan", "28380": "nan", "28385": "nan", "28390": "nan", "28395": "nan", "28400": 3.38328, "28405": "nan", "28410": "nan", "28415": "nan", "28420": "nan", "28425": "nan", "28430": "nan", "28435": "nan", "28440": "nan", "28445": "nan", "28450": "nan", "28455": "nan", "28460": "nan", "28465": "nan", "28470": "nan", "28475": "nan", "28480": "nan", "28485": "nan", "28490": "nan", "28495": "nan", "28500": 3.38379, "28505": "nan", "28510": "nan", "28515": "nan", "28520": "nan", "28525": "nan", "28530": "nan", "28535": "nan", "28540": "nan", "28545": "nan", "28550": "nan", "28555": "nan", "28560": "nan", "28565": "nan", "28570": "nan", "28575": "nan", "28580": "nan", "28585": "nan", "28590": "nan", "28595": "nan", "28600": 3.38419, "28605": "nan", "28610": "nan", "28615": "nan", "28620": "nan", "28625": "nan", "28630": "nan", "28635": "nan", "28640": "nan", "28645": "nan", "28650": "nan", "28655": "nan", "28660": "nan", "28665": "nan", "28670": "nan", "28675": "nan", "28680": "nan", "28685": "nan", "28690": "nan", "28695": "nan", "28700": 3.38271, "28705": "nan", "28710": "nan", "28715": "nan", "28720": "nan", "28725": "nan", "28730": "nan", "28735": "nan", "28740": "nan", "28745": "nan", "28750": "nan", "28755": "nan", "28760": "nan", "28765": "nan", "28770": "nan", "28775": "nan", "28780": "nan", "28785": "nan", "28790": "nan", "28795": "nan", "28800": 3.38361, "28805": "nan", "28810": "nan", "28815": "nan", "28820": "nan", "28825": "nan", "28830": "nan", "28835": "nan", "28840": "nan", "28845": "nan", "28850": "nan", "28855": "nan", "28860": "nan", "28865": "nan", "28870": "nan", "28875": "nan", "28880": "nan", "28885": "nan", "28890": "nan", "28895": "nan", "28900": 3.38279, "28905": "nan", "28910": "nan", "28915": "nan", "28920": "nan", "28925": "nan", "28930": "nan", "28935": "nan", "28940": "nan", "28945": "nan", "28950": "nan", "28955": "nan", "28960": "nan", "28965": "nan", "28970": "nan", "28975": "nan", "28980": "nan", "28985": "nan", "28990": "nan", "28995": "nan", "29000": 3.38282, "29005": "nan", "29010": "nan", "29015": "nan", "29020": "nan", "29025": "nan", "29030": "nan", "29035": "nan", "29040": "nan", "29045": "nan", "29050": "nan", "29055": "nan", "29060": "nan", "29065": "nan", "29070": "nan", "29075": "nan", "29080": "nan", "29085": "nan", "29090": "nan", "29095": "nan", "29100": 3.38288, "29105": "nan", "29110": "nan", "29115": "nan", "29120": "nan", "29125": "nan", "29130": "nan", "29135": "nan", "29140": "nan", "29145": "nan", "29150": "nan", "29155": "nan", "29160": "nan", "29165": "nan", "29170": "nan", "29175": "nan", "29180": "nan", "29185": "nan", "29190": "nan", "29195": "nan", "29200": 3.3826, "29205": "nan", "29210": "nan", "29215": "nan", "29220": "nan", "29225": "nan", "29230": "nan", "29235": "nan", "29240": "nan", "29245": "nan", "29250": "nan", "29255": "nan", "29260": "nan", "29265": "nan", "29270": "nan", "29275": "nan", "29280": "nan", "29285": "nan", "29290": "nan", "29295": "nan", "29300": 3.38204, "29305": "nan", "29310": "nan", "29315": "nan", "29320": "nan", "29325": "nan", "29330": "nan", "29335": "nan", "29340": "nan", "29345": "nan", "29350": "nan", "29355": "nan", "29360": "nan", "29365": "nan", "29370": "nan", "29375": "nan", "29380": "nan", "29385": "nan", "29390": "nan", "29395": "nan", "29400": 3.38277, "29405": "nan", "29410": "nan", "29415": "nan", "29420": "nan", "29425": "nan", "29430": "nan", "29435": "nan", "29440": "nan", "29445": "nan", "29450": "nan", "29455": "nan", "29460": "nan", "29465": "nan", "29470": "nan", "29475": "nan", "29480": "nan", "29485": "nan", "29490": "nan", "29495": "nan", "29500": 3.38231, "29505": "nan", "29510": "nan", "29515": "nan", "29520": "nan", "29525": "nan", "29530": "nan", "29535": "nan", "29540": "nan", "29545": "nan", "29550": "nan", "29555": "nan", "29560": "nan", "29565": "nan", "29570": "nan", "29575": "nan", "29580": "nan", "29585": "nan", "29590": "nan", "29595": "nan", "29600": 3.38309, "29605": "nan", "29610": "nan", "29615": "nan", "29620": "nan", "29625": "nan", "29630": "nan", "29635": "nan", "29640": "nan", "29645": "nan", "29650": "nan", "29655": "nan", "29660": "nan", "29665": "nan", "29670": "nan", "29675": "nan", "29680": "nan", "29685": "nan", "29690": "nan", "29695": "nan", "29700": 3.38192, "29705": "nan", "29710": "nan", "29715": "nan", "29720": "nan", "29725": "nan", "29730": "nan", "29735": "nan", "29740": "nan", "29745": "nan", "29750": "nan", "29755": "nan", "29760": "nan", "29765": "nan", "29770": "nan", "29775": "nan", "29780": "nan", "29785": "nan", "29790": "nan", "29795": "nan", "29800": 3.38198, "29805": "nan", "29810": "nan", "29815": "nan", "29820": "nan", "29825": "nan", "29830": "nan", "29835": "nan", "29840": "nan", "29845": "nan", "29850": "nan", "29855": "nan", "29860": "nan", "29865": "nan", "29870": "nan", "29875": "nan", "29880": "nan", "29885": "nan", "29890": "nan", "29895": "nan", "29900": 3.38241, "29905": "nan", "29910": "nan", "29915": "nan", "29920": "nan", "29925": "nan", "29930": "nan", "29935": "nan", "29940": "nan", "29945": "nan", "29950": "nan", "29955": "nan", "29960": "nan", "29965": "nan", "29970": "nan", "29975": "nan", "29980": "nan", "29985": "nan", "29990": "nan", "29995": "nan", "30000": 3.3827, "30005": "nan", "30010": "nan", "30015": "nan", "30020": "nan", "30025": "nan", "30030": "nan", "30035": "nan", "30040": "nan", "30045": "nan", "30050": "nan", "30055": "nan", "30060": "nan", "30065": "nan", "30070": "nan", "30075": "nan", "30080": "nan", "30085": "nan", "30090": "nan", "30095": "nan", "30100": 3.38067, "30105": "nan", "30110": "nan", "30115": "nan", "30120": "nan", "30125": "nan", "30130": "nan", "30135": "nan", "30140": "nan", "30145": "nan", "30150": "nan", "30155": "nan", "30160": "nan", "30165": "nan", "30170": "nan", "30175": "nan", "30180": "nan", "30185": "nan", "30190": "nan", "30195": "nan", "30200": 3.38246, "30205": "nan", "30210": "nan", "30215": "nan", "30220": "nan", "30225": "nan", "30230": "nan", "30235": "nan", "30240": "nan", "30245": "nan", "30250": "nan", "30255": "nan", "30260": "nan", "30265": "nan", "30270": "nan", "30275": "nan", "30280": "nan", "30285": "nan", "30290": "nan", "30295": "nan", "30300": 3.38258, "30305": "nan", "30310": "nan", "30315": "nan", "30320": "nan", "30325": "nan", "30330": "nan", "30335": "nan", "30340": "nan", "30345": "nan", "30350": "nan", "30355": "nan", "30360": "nan", "30365": "nan", "30370": "nan", "30375": "nan", "30380": "nan", "30385": "nan", "30390": "nan", "30395": "nan", "30400": 3.38116, "30405": "nan", "30410": "nan", "30415": "nan", "30420": "nan", "30425": "nan", "30430": "nan", "30435": "nan", "30440": "nan", "30445": "nan", "30450": "nan", "30455": "nan", "30460": "nan", "30465": "nan", "30470": "nan", "30475": "nan", "30480": "nan", "30485": "nan", "30490": "nan", "30495": "nan", "30500": 3.38072, "30505": "nan", "30510": "nan", "30515": "nan", "30520": "nan", "30525": "nan", "30530": "nan", "30535": "nan", "30540": "nan", "30545": "nan", "30550": "nan", "30555": "nan", "30560": "nan", "30565": "nan", "30570": "nan", "30575": "nan", "30580": "nan", "30585": "nan", "30590": "nan", "30595": "nan", "30600": 3.38143, "30605": "nan", "30610": "nan", "30615": "nan", "30620": "nan", "30625": "nan", "30630": "nan", "30635": "nan", "30640": "nan", "30645": "nan", "30650": "nan", "30655": "nan", "30660": "nan", "30665": "nan", "30670": "nan", "30675": "nan", "30680": "nan", "30685": "nan", "30690": "nan", "30695": "nan", "30700": 3.3917, "30705": "nan", "30710": "nan", "30715": "nan", "30720": "nan", "30725": "nan", "30730": "nan", "30735": "nan", "30740": "nan", "30745": "nan", "30750": "nan", "30755": "nan", "30760": "nan", "30765": "nan", "30770": "nan", "30775": "nan", "30780": "nan", "30785": "nan", "30790": "nan", "30795": "nan", "30800": 3.39146, "30805": "nan", "30810": "nan", "30815": "nan", "30820": "nan", "30825": "nan", "30830": "nan", "30835": "nan", "30840": "nan", "30845": "nan", "30850": "nan", "30855": "nan", "30860": "nan", "30865": "nan", "30870": "nan", "30875": "nan", "30880": "nan", "30885": "nan", "30890": "nan", "30895": "nan", "30900": 3.39271, "30905": "nan", "30910": "nan", "30915": "nan", "30920": "nan", "30925": "nan", "30930": "nan", "30935": "nan", "30940": "nan", "30945": "nan", "30950": "nan", "30955": "nan", "30960": "nan", "30965": "nan", "30970": "nan", "30975": "nan", "30980": "nan", "30985": "nan", "30990": "nan", "30995": "nan", "31000": 3.39174, "31005": "nan", "31010": "nan", "31015": "nan", "31020": "nan", "31025": "nan", "31030": "nan", "31035": "nan", "31040": "nan", "31045": "nan", "31050": "nan", "31055": "nan", "31060": "nan", "31065": "nan", "31070": "nan", "31075": "nan", "31080": "nan", "31085": "nan", "31090": "nan", "31095": "nan", "31100": 3.39273, "31105": "nan", "31110": "nan", "31115": "nan", "31120": "nan", "31125": "nan", "31130": "nan", "31135": "nan", "31140": "nan", "31145": "nan", "31150": "nan", "31155": "nan", "31160": "nan", "31165": "nan", "31170": "nan", "31175": "nan", "31180": "nan", "31185": "nan", "31190": "nan", "31195": "nan", "31200": 3.39245, "31205": "nan", "31210": "nan", "31215": "nan", "31220": "nan", "31225": "nan", "31230": "nan", "31235": "nan", "31240": "nan", "31245": "nan", "31250": "nan", "31255": "nan", "31260": "nan", "31265": "nan", "31270": "nan", "31275": "nan", "31280": "nan", "31285": "nan", "31290": "nan", "31295": "nan", "31300": 3.39227, "31305": "nan", "31310": "nan", "31315": "nan", "31320": "nan", "31325": "nan", "31330": "nan", "31335": "nan", "31340": "nan", "31345": "nan", "31350": "nan", "31355": "nan", "31360": "nan", "31365": "nan", "31370": "nan", "31375": "nan", "31380": "nan", "31385": "nan", "31390": "nan", "31395": "nan", "31400": 3.39213, "31405": "nan", "31410": "nan", "31415": "nan", "31420": "nan", "31425": "nan", "31430": "nan", "31435": "nan", "31440": "nan", "31445": "nan", "31450": "nan", "31455": "nan", "31460": "nan", "31465": "nan", "31470": "nan", "31475": "nan", "31480": "nan", "31485": "nan", "31490": "nan", "31495": "nan", "31500": 3.39235, "31505": "nan", "31510": "nan", "31515": "nan", "31520": "nan", "31525": "nan", "31530": "nan", "31535": "nan", "31540": "nan", "31545": "nan", "31550": "nan", "31555": "nan", "31560": "nan", "31565": "nan", "31570": "nan", "31575": "nan", "31580": "nan", "31585": "nan", "31590": "nan", "31595": "nan", "31600": 3.39298, "31605": "nan", "31610": "nan", "31615": "nan", "31620": "nan", "31625": "nan", "31630": "nan", "31635": "nan", "31640": "nan", "31645": "nan", "31650": "nan", "31655": "nan", "31660": "nan", "31665": "nan", "31670": "nan", "31675": "nan", "31680": "nan", "31685": "nan", "31690": "nan", "31695": "nan", "31700": 3.39361, "31705": "nan", "31710": "nan", "31715": "nan", "31720": "nan", "31725": "nan", "31730": "nan", "31735": "nan", "31740": "nan", "31745": "nan", "31750": "nan", "31755": "nan", "31760": "nan", "31765": "nan", "31770": "nan", "31775": "nan", "31780": "nan", "31785": "nan", "31790": "nan", "31795": "nan", "31800": 3.39533, "31805": "nan", "31810": "nan", "31815": "nan", "31820": "nan", "31825": "nan", "31830": "nan", "31835": "nan", "31840": "nan", "31845": "nan", "31850": "nan", "31855": "nan", "31860": "nan", "31865": "nan", "31870": "nan", "31875": "nan", "31880": "nan", "31885": "nan", "31890": "nan", "31895": "nan", "31900": 3.39273, "31905": "nan", "31910": "nan", "31915": "nan", "31920": "nan", "31925": "nan", "31930": "nan", "31935": "nan", "31940": "nan", "31945": "nan", "31950": "nan", "31955": "nan", "31960": "nan", "31965": "nan", "31970": "nan", "31975": "nan", "31980": "nan", "31985": "nan", "31990": "nan", "31995": "nan", "32000": 3.39307, "32005": "nan", "32010": "nan", "32015": "nan", "32020": "nan", "32025": "nan", "32030": "nan", "32035": "nan", "32040": "nan", "32045": "nan", "32050": "nan", "32055": "nan", "32060": "nan", "32065": "nan", "32070": "nan", "32075": "nan", "32080": "nan", "32085": "nan", "32090": "nan", "32095": "nan", "32100": 3.39333, "32105": "nan", "32110": "nan", "32115": "nan", "32120": "nan", "32125": "nan", "32130": "nan", "32135": "nan", "32140": "nan", "32145": "nan", "32150": "nan", "32155": "nan", "32160": "nan", "32165": "nan", "32170": "nan", "32175": "nan", "32180": "nan", "32185": "nan", "32190": "nan", "32195": "nan", "32200": 3.3931, "32205": "nan", "32210": "nan", "32215": "nan", "32220": "nan", "32225": "nan", "32230": "nan", "32235": "nan", "32240": "nan", "32245": "nan", "32250": "nan", "32255": "nan", "32260": "nan", "32265": "nan", "32270": "nan", "32275": "nan", "32280": "nan", "32285": "nan", "32290": "nan", "32295": "nan", "32300": 3.39214, "32305": "nan", "32310": "nan", "32315": "nan", "32320": "nan", "32325": "nan", "32330": "nan", "32335": "nan", "32340": "nan", "32345": "nan", "32350": "nan", "32355": "nan", "32360": "nan", "32365": "nan", "32370": "nan", "32375": "nan", "32380": "nan", "32385": "nan", "32390": "nan", "32395": "nan", "32400": 3.39339, "32405": "nan", "32410": "nan", "32415": "nan", "32420": "nan", "32425": "nan", "32430": "nan", "32435": "nan", "32440": "nan", "32445": "nan", "32450": "nan", "32455": "nan", "32460": "nan", "32465": "nan", "32470": "nan", "32475": "nan", "32480": "nan", "32485": "nan", "32490": "nan", "32495": "nan", "32500": 3.39254, "32505": "nan", "32510": "nan", "32515": "nan", "32520": "nan", "32525": "nan", "32530": "nan", "32535": "nan", "32540": "nan", "32545": "nan", "32550": "nan", "32555": "nan", "32560": "nan", "32565": "nan", "32570": "nan", "32575": "nan", "32580": "nan", "32585": "nan", "32590": "nan", "32595": "nan", "32600": 3.39257, "32605": "nan", "32610": "nan", "32615": "nan", "32620": "nan", "32625": "nan", "32630": "nan", "32635": "nan", "32640": "nan", "32645": "nan", "32650": "nan", "32655": "nan", "32660": "nan", "32665": "nan", "32670": "nan", "32675": "nan", "32680": "nan", "32685": "nan", "32690": "nan", "32695": "nan", "32700": 3.39211, "32705": "nan", "32710": "nan", "32715": "nan", "32720": "nan", "32725": "nan", "32730": "nan", "32735": "nan", "32740": "nan", "32745": "nan", "32750": "nan", "32755": "nan", "32760": "nan", "32765": "nan", "32770": "nan", "32775": "nan", "32780": "nan", "32785": "nan", "32790": "nan", "32795": "nan", "32800": 3.39198, "32805": "nan", "32810": "nan", "32815": "nan", "32820": "nan", "32825": "nan", "32830": "nan", "32835": "nan", "32840": "nan", "32845": "nan", "32850": "nan", "32855": "nan", "32860": "nan", "32865": "nan", "32870": "nan", "32875": "nan", "32880": "nan", "32885": "nan", "32890": "nan", "32895": "nan", "32900": 3.3921, "32905": "nan", "32910": "nan", "32915": "nan", "32920": "nan", "32925": "nan", "32930": "nan", "32935": "nan", "32940": "nan", "32945": "nan", "32950": "nan", "32955": "nan", "32960": "nan", "32965": "nan", "32970": "nan", "32975": "nan", "32980": "nan", "32985": "nan", "32990": "nan", "32995": "nan", "33000": 3.39328, "33005": "nan", "33010": "nan", "33015": "nan", "33020": "nan", "33025": "nan", "33030": "nan", "33035": "nan", "33040": "nan", "33045": "nan", "33050": "nan", "33055": "nan", "33060": "nan", "33065": "nan", "33070": "nan", "33075": "nan", "33080": "nan", "33085": "nan", "33090": "nan", "33095": "nan", "33100": 3.39283, "33105": "nan", "33110": "nan", "33115": "nan", "33120": "nan", "33125": "nan", "33130": "nan", "33135": "nan", "33140": "nan", "33145": "nan", "33150": "nan", "33155": "nan", "33160": "nan", "33165": "nan", "33170": "nan", "33175": "nan", "33180": "nan", "33185": "nan", "33190": "nan", "33195": "nan", "33200": 3.39264, "33205": "nan", "33210": "nan", "33215": "nan", "33220": "nan", "33225": "nan", "33230": "nan", "33235": "nan", "33240": "nan", "33245": "nan", "33250": "nan", "33255": "nan", "33260": "nan", "33265": "nan", "33270": "nan", "33275": "nan", "33280": "nan", "33285": "nan", "33290": "nan", "33295": "nan", "33300": 3.39194, "33305": "nan", "33310": "nan", "33315": "nan", "33320": "nan", "33325": "nan", "33330": "nan", "33335": "nan", "33340": "nan", "33345": "nan", "33350": "nan", "33355": "nan", "33360": "nan", "33365": "nan", "33370": "nan", "33375": "nan", "33380": "nan", "33385": "nan", "33390": "nan", "33395": "nan", "33400": 3.39304, "33405": "nan", "33410": "nan", "33415": "nan", "33420": "nan", "33425": "nan", "33430": "nan", "33435": "nan", "33440": "nan", "33445": "nan", "33450": "nan", "33455": "nan", "33460": "nan", "33465": "nan", "33470": "nan", "33475": "nan", "33480": "nan", "33485": "nan", "33490": "nan", "33495": "nan", "33500": 3.39358, "33505": "nan", "33510": "nan", "33515": "nan", "33520": "nan", "33525": "nan", "33530": "nan", "33535": "nan", "33540": "nan", "33545": "nan", "33550": "nan", "33555": "nan", "33560": "nan", "33565": "nan", "33570": "nan", "33575": "nan", "33580": "nan", "33585": "nan", "33590": "nan", "33595": "nan", "33600": 3.39193, "33605": "nan", "33610": "nan", "33615": "nan", "33620": "nan", "33625": "nan", "33630": "nan", "33635": "nan", "33640": "nan", "33645": "nan", "33650": "nan", "33655": "nan", "33660": "nan", "33665": "nan", "33670": "nan", "33675": "nan", "33680": "nan", "33685": "nan", "33690": "nan", "33695": "nan", "33700": 3.39216, "33705": "nan", "33710": "nan", "33715": "nan", "33720": "nan", "33725": "nan", "33730": "nan", "33735": "nan", "33740": "nan", "33745": "nan", "33750": "nan", "33755": "nan", "33760": "nan", "33765": "nan", "33770": "nan", "33775": "nan", "33780": "nan", "33785": "nan", "33790": "nan", "33795": "nan", "33800": 3.38838, "33805": "nan", "33810": "nan", "33815": "nan", "33820": "nan", "33825": "nan", "33830": "nan", "33835": "nan", "33840": "nan", "33845": "nan", "33850": "nan", "33855": "nan", "33860": "nan", "33865": "nan", "33870": "nan", "33875": "nan", "33880": "nan", "33885": "nan", "33890": "nan", "33895": "nan", "33900": 3.38777, "33905": "nan", "33910": "nan", "33915": "nan", "33920": "nan", "33925": "nan", "33930": "nan", "33935": "nan", "33940": "nan", "33945": "nan", "33950": "nan", "33955": "nan", "33960": "nan", "33965": "nan", "33970": "nan", "33975": "nan", "33980": "nan", "33985": "nan", "33990": "nan", "33995": "nan", "34000": 3.5595, "34005": "nan", "34010": "nan", "34015": "nan", "34020": "nan", "34025": "nan", "34030": "nan", "34035": "nan", "34040": "nan", "34045": "nan", "34050": "nan", "34055": "nan", "34060": "nan", "34065": "nan", "34070": "nan", "34075": "nan", "34080": "nan", "34085": "nan", "34090": "nan", "34095": "nan", "34100": 3.3979, "34105": "nan", "34110": "nan", "34115": "nan", "34120": "nan", "34125": "nan", "34130": "nan", "34135": "nan", "34140": "nan", "34145": "nan", "34150": "nan", "34155": "nan", "34160": "nan", "34165": "nan", "34170": "nan", "34175": "nan", "34180": "nan", "34185": "nan", "34190": "nan", "34195": "nan", "34200": 3.39779, "34205": "nan", "34210": "nan", "34215": "nan", "34220": "nan", "34225": "nan", "34230": "nan", "34235": "nan", "34240": "nan", "34245": "nan", "34250": "nan", "34255": "nan", "34260": "nan", "34265": "nan", "34270": "nan", "34275": "nan", "34280": "nan", "34285": "nan", "34290": "nan", "34295": "nan", "34300": 3.39683, "34305": "nan", "34310": "nan", "34315": "nan", "34320": "nan", "34325": "nan", "34330": "nan", "34335": "nan", "34340": "nan", "34345": "nan", "34350": "nan", "34355": "nan", "34360": "nan", "34365": "nan", "34370": "nan", "34375": "nan", "34380": "nan", "34385": "nan", "34390": "nan", "34395": "nan", "34400": 3.39548, "34405": "nan", "34410": "nan", "34415": "nan", "34420": "nan", "34425": "nan", "34430": "nan", "34435": "nan", "34440": "nan", "34445": "nan", "34450": "nan", "34455": "nan", "34460": "nan", "34465": "nan", "34470": "nan", "34475": "nan", "34480": "nan", "34485": "nan", "34490": "nan", "34495": "nan", "34500": 3.39524, "34505": "nan", "34510": "nan", "34515": "nan", "34520": "nan", "34525": "nan", "34530": "nan", "34535": "nan", "34540": "nan", "34545": "nan", "34550": "nan", "34555": "nan", "34560": "nan", "34565": "nan", "34570": "nan", "34575": "nan", "34580": "nan", "34585": "nan", "34590": "nan", "34595": "nan", "34600": 3.39548, "34605": "nan", "34610": "nan", "34615": "nan", "34620": "nan", "34625": "nan", "34630": "nan", "34635": "nan", "34640": "nan", "34645": "nan", "34650": "nan", "34655": "nan", "34660": "nan", "34665": "nan", "34670": "nan", "34675": "nan", "34680": "nan", "34685": "nan", "34690": "nan", "34695": "nan", "34700": 3.39573, "34705": "nan", "34710": "nan", "34715": "nan", "34720": "nan", "34725": "nan", "34730": "nan", "34735": "nan", "34740": "nan", "34745": "nan", "34750": "nan", "34755": "nan", "34760": "nan", "34765": "nan", "34770": "nan", "34775": "nan", "34780": "nan", "34785": "nan", "34790": "nan", "34795": "nan", "34800": 3.39514, "34805": "nan", "34810": "nan", "34815": "nan", "34820": "nan", "34825": "nan", "34830": "nan", "34835": "nan", "34840": "nan", "34845": "nan", "34850": "nan", "34855": "nan", "34860": "nan", "34865": "nan", "34870": "nan", "34875": "nan", "34880": "nan", "34885": "nan", "34890": "nan", "34895": "nan", "34900": 3.39473, "34905": "nan", "34910": "nan", "34915": "nan", "34920": "nan", "34925": "nan", "34930": "nan", "34935": "nan", "34940": "nan", "34945": "nan", "34950": "nan", "34955": "nan", "34960": "nan", "34965": "nan", "34970": "nan", "34975": "nan", "34980": "nan", "34985": "nan", "34990": "nan", "34995": "nan", "35000": 3.39358, "35005": "nan", "35010": "nan", "35015": "nan", "35020": "nan", "35025": "nan", "35030": "nan", "35035": "nan", "35040": "nan", "35045": "nan", "35050": "nan", "35055": "nan", "35060": "nan", "35065": "nan", "35070": "nan", "35075": "nan", "35080": "nan", "35085": "nan", "35090": "nan", "35095": "nan", "35100": 3.3924, "35105": "nan", "35110": "nan", "35115": "nan", "35120": "nan", "35125": "nan", "35130": "nan", "35135": "nan", "35140": "nan", "35145": "nan", "35150": "nan", "35155": "nan", "35160": "nan", "35165": "nan", "35170": "nan", "35175": "nan", "35180": "nan", "35185": "nan", "35190": "nan", "35195": "nan", "35200": 3.39584, "35205": "nan", "35210": "nan", "35215": "nan", "35220": "nan", "35225": "nan", "35230": "nan", "35235": "nan", "35240": "nan", "35245": "nan", "35250": "nan", "35255": "nan", "35260": "nan", "35265": "nan", "35270": "nan", "35275": "nan", "35280": "nan", "35285": "nan", "35290": "nan", "35295": "nan", "35300": 3.39657, "35305": "nan", "35310": "nan", "35315": "nan", "35320": "nan", "35325": "nan", "35330": "nan", "35335": "nan", "35340": "nan", "35345": "nan", "35350": "nan", "35355": "nan", "35360": "nan", "35365": "nan", "35370": "nan", "35375": "nan", "35380": "nan", "35385": "nan", "35390": "nan", "35395": "nan", "35400": 3.39625, "35405": "nan", "35410": "nan", "35415": "nan", "35420": "nan", "35425": "nan", "35430": "nan", "35435": "nan", "35440": "nan", "35445": "nan", "35450": "nan", "35455": "nan", "35460": "nan", "35465": "nan", "35470": "nan", "35475": "nan", "35480": "nan", "35485": "nan", "35490": "nan", "35495": "nan", "35500": 3.39718, "35505": "nan", "35510": "nan", "35515": "nan", "35520": "nan", "35525": "nan", "35530": "nan", "35535": "nan", "35540": "nan", "35545": "nan", "35550": "nan", "35555": "nan", "35560": "nan", "35565": "nan", "35570": "nan", "35575": "nan", "35580": "nan", "35585": "nan", "35590": "nan", "35595": "nan", "35600": 3.39746, "35605": "nan", "35610": "nan", "35615": "nan", "35620": "nan", "35625": "nan", "35630": "nan", "35635": "nan", "35640": "nan", "35645": "nan", "35650": "nan", "35655": "nan", "35660": "nan", "35665": "nan", "35670": "nan", "35675": "nan", "35680": "nan", "35685": "nan", "35690": "nan", "35695": "nan", "35700": 3.3974, "35705": "nan", "35710": "nan", "35715": "nan", "35720": "nan", "35725": "nan", "35730": "nan", "35735": "nan", "35740": "nan", "35745": "nan", "35750": "nan", "35755": "nan", "35760": "nan", "35765": "nan", "35770": "nan", "35775": "nan", "35780": "nan", "35785": "nan", "35790": "nan", "35795": "nan", "35800": 3.39726, "35805": "nan", "35810": "nan", "35815": "nan", "35820": "nan", "35825": "nan", "35830": "nan", "35835": "nan", "35840": "nan", "35845": "nan", "35850": "nan", "35855": "nan", "35860": "nan", "35865": "nan", "35870": "nan", "35875": "nan", "35880": "nan", "35885": "nan", "35890": "nan", "35895": "nan", "35900": 3.39688, "35905": "nan", "35910": "nan", "35915": "nan", "35920": "nan", "35925": "nan", "35930": "nan", "35935": "nan", "35940": "nan", "35945": "nan", "35950": "nan", "35955": "nan", "35960": "nan", "35965": "nan", "35970": "nan", "35975": "nan", "35980": "nan", "35985": "nan", "35990": "nan", "35995": "nan", "36000": 3.39803, "36005": "nan", "36010": "nan", "36015": "nan", "36020": "nan", "36025": "nan", "36030": "nan", "36035": "nan", "36040": "nan", "36045": "nan", "36050": "nan", "36055": "nan", "36060": "nan", "36065": "nan", "36070": "nan", "36075": "nan", "36080": "nan", "36085": "nan", "36090": "nan", "36095": "nan", "36100": 3.39757, "36105": "nan", "36110": "nan", "36115": "nan", "36120": "nan", "36125": "nan", "36130": "nan", "36135": "nan", "36140": "nan", "36145": "nan", "36150": "nan", "36155": "nan", "36160": "nan", "36165": "nan", "36170": "nan", "36175": "nan", "36180": "nan", "36185": "nan", "36190": "nan", "36195": "nan", "36200": 3.39764, "36205": "nan", "36210": "nan", "36215": "nan", "36220": "nan", "36225": "nan", "36230": "nan", "36235": "nan", "36240": "nan", "36245": "nan", "36250": "nan", "36255": "nan", "36260": "nan", "36265": "nan", "36270": "nan", "36275": "nan", "36280": "nan", "36285": "nan", "36290": "nan", "36295": "nan", "36300": 3.39876, "36305": "nan", "36310": "nan", "36315": "nan", "36320": "nan", "36325": "nan", "36330": "nan", "36335": "nan", "36340": "nan", "36345": "nan", "36350": "nan", "36355": "nan", "36360": "nan", "36365": "nan", "36370": "nan", "36375": "nan", "36380": "nan", "36385": "nan", "36390": "nan", "36395": "nan", "36400": 3.39829, "36405": "nan", "36410": "nan", "36415": "nan", "36420": "nan", "36425": "nan", "36430": "nan", "36435": "nan", "36440": "nan", "36445": "nan", "36450": "nan", "36455": "nan", "36460": "nan", "36465": "nan", "36470": "nan", "36475": "nan", "36480": "nan", "36485": "nan", "36490": "nan", "36495": "nan", "36500": 3.39791, "36505": "nan", "36510": "nan", "36515": "nan", "36520": "nan", "36525": "nan", "36530": "nan", "36535": "nan", "36540": "nan", "36545": "nan", "36550": "nan", "36555": "nan", "36560": "nan", "36565": "nan", "36570": "nan", "36575": "nan", "36580": "nan", "36585": "nan", "36590": "nan", "36595": "nan", "36600": 3.39866, "36605": "nan", "36610": "nan", "36615": "nan", "36620": "nan", "36625": "nan", "36630": "nan", "36635": "nan", "36640": "nan", "36645": "nan", "36650": "nan", "36655": "nan", "36660": "nan", "36665": "nan", "36670": "nan", "36675": "nan", "36680": "nan", "36685": "nan", "36690": "nan", "36695": "nan", "36700": 3.39907, "36705": "nan", "36710": "nan", "36715": "nan", "36720": "nan", "36725": "nan", "36730": "nan", "36735": "nan", "36740": "nan", "36745": "nan", "36750": "nan", "36755": "nan", "36760": "nan", "36765": "nan", "36770": "nan", "36775": "nan", "36780": "nan", "36785": "nan", "36790": "nan", "36795": "nan", "36800": 3.39819, "36805": "nan", "36810": "nan", "36815": "nan", "36820": "nan", "36825": "nan", "36830": "nan", "36835": "nan", "36840": "nan", "36845": "nan", "36850": "nan", "36855": "nan", "36860": "nan", "36865": "nan", "36870": "nan", "36875": "nan", "36880": "nan", "36885": "nan", "36890": "nan", "36895": "nan", "36900": 3.39883, "36905": "nan", "36910": "nan", "36915": "nan", "36920": "nan", "36925": "nan", "36930": "nan", "36935": "nan", "36940": "nan", "36945": "nan", "36950": "nan", "36955": "nan", "36960": "nan", "36965": "nan", "36970": "nan", "36975": "nan", "36980": "nan", "36985": "nan", "36990": "nan", "36995": "nan", "37000": 3.39926, "37005": "nan", "37010": "nan", "37015": "nan", "37020": "nan", "37025": "nan", "37030": "nan", "37035": "nan", "37040": "nan", "37045": "nan", "37050": "nan", "37055": "nan", "37060": "nan", "37065": "nan", "37070": "nan", "37075": "nan", "37080": "nan", "37085": "nan", "37090": "nan", "37095": "nan", "37100": 3.3987, "37105": "nan", "37110": "nan", "37115": "nan", "37120": "nan", "37125": "nan", "37130": "nan", "37135": "nan", "37140": "nan", "37145": "nan", "37150": "nan", "37155": "nan", "37160": "nan", "37165": "nan", "37170": "nan", "37175": "nan", "37180": "nan", "37185": "nan", "37190": "nan", "37195": "nan", "37200": 3.3996, "37205": "nan", "37210": "nan", "37215": "nan", "37220": "nan", "37225": "nan", "37230": "nan", "37235": "nan", "37240": "nan", "37245": "nan", "37250": "nan", "37255": "nan", "37260": "nan", "37265": "nan", "37270": "nan", "37275": "nan", "37280": "nan", "37285": "nan", "37290": "nan", "37295": "nan", "37300": 3.40168, "37305": "nan", "37310": "nan", "37315": "nan", "37320": "nan", "37325": "nan", "37330": "nan", "37335": "nan", "37340": "nan", "37345": "nan", "37350": "nan", "37355": "nan", "37360": "nan", "37365": "nan", "37370": "nan", "37375": "nan", "37380": "nan", "37385": "nan", "37390": "nan", "37395": "nan", "37400": 3.40221, "37405": "nan", "37410": "nan", "37415": "nan", "37420": "nan", "37425": "nan", "37430": "nan", "37435": "nan", "37440": "nan", "37445": "nan", "37450": "nan", "37455": "nan", "37460": "nan", "37465": "nan", "37470": "nan", "37475": "nan", "37480": "nan", "37485": "nan", "37490": "nan", "37495": "nan", "37500": 3.40161, "37505": "nan", "37510": "nan", "37515": "nan", "37520": "nan", "37525": "nan", "37530": "nan", "37535": "nan", "37540": "nan", "37545": "nan", "37550": "nan", "37555": "nan", "37560": "nan", "37565": "nan", "37570": "nan", "37575": "nan", "37580": "nan", "37585": "nan", "37590": "nan", "37595": "nan", "37600": 3.40111, "37605": "nan", "37610": "nan", "37615": "nan", "37620": "nan", "37625": "nan", "37630": "nan", "37635": "nan", "37640": "nan", "37645": "nan", "37650": "nan", "37655": "nan", "37660": "nan", "37665": "nan", "37670": "nan", "37675": "nan", "37680": "nan", "37685": "nan", "37690": "nan", "37695": "nan", "37700": 3.65057, "37705": "nan", "37710": "nan", "37715": "nan", "37720": "nan", "37725": "nan", "37730": "nan", "37735": "nan", "37740": "nan", "37745": "nan", "37750": "nan", "37755": "nan", "37760": "nan", "37765": "nan", "37770": "nan", "37775": "nan", "37780": "nan", "37785": "nan", "37790": "nan", "37795": "nan", "37800": 3.39739, "37805": "nan", "37810": "nan", "37815": "nan", "37820": "nan", "37825": "nan", "37830": "nan", "37835": "nan", "37840": "nan", "37845": "nan", "37850": "nan", "37855": "nan", "37860": "nan", "37865": "nan", "37870": "nan", "37875": "nan", "37880": "nan", "37885": "nan", "37890": "nan", "37895": "nan", "37900": 3.40045, "37905": "nan", "37910": "nan", "37915": "nan", "37920": "nan", "37925": "nan", "37930": "nan", "37935": "nan", "37940": "nan", "37945": "nan", "37950": "nan", "37955": "nan", "37960": "nan", "37965": "nan", "37970": "nan", "37975": "nan", "37980": "nan", "37985": "nan", "37990": "nan", "37995": "nan", "38000": 3.40041, "38005": "nan", "38010": "nan", "38015": "nan", "38020": "nan", "38025": "nan", "38030": "nan", "38035": "nan", "38040": "nan", "38045": "nan", "38050": "nan", "38055": "nan", "38060": "nan", "38065": "nan", "38070": "nan", "38075": "nan", "38080": "nan", "38085": "nan", "38090": "nan", "38095": "nan", "38100": 3.39901, "38105": "nan", "38110": "nan", "38115": "nan", "38120": "nan", "38125": "nan", "38130": "nan", "38135": "nan", "38140": "nan", "38145": "nan", "38150": "nan", "38155": "nan", "38160": "nan", "38165": "nan", "38170": "nan", "38175": "nan", "38180": "nan", "38185": "nan", "38190": "nan", "38195": "nan", "38200": 3.43395, "38205": "nan", "38210": "nan", "38215": "nan", "38220": "nan", "38225": "nan", "38230": "nan", "38235": "nan", "38240": "nan", "38245": "nan", "38250": "nan", "38255": "nan", "38260": "nan", "38265": "nan", "38270": "nan", "38275": "nan", "38280": "nan", "38285": "nan", "38290": "nan", "38295": "nan", "38300": 3.40437, "38305": "nan", "38310": "nan", "38315": "nan", "38320": "nan", "38325": "nan", "38330": "nan", "38335": "nan", "38340": "nan", "38345": "nan", "38350": "nan", "38355": "nan", "38360": "nan", "38365": "nan", "38370": "nan", "38375": "nan", "38380": "nan", "38385": "nan", "38390": "nan", "38395": "nan", "38400": 3.40284, "38405": "nan", "38410": "nan", "38415": "nan", "38420": "nan", "38425": "nan", "38430": "nan", "38435": "nan", "38440": "nan", "38445": "nan", "38450": "nan", "38455": "nan", "38460": "nan", "38465": "nan", "38470": "nan", "38475": "nan", "38480": "nan", "38485": "nan", "38490": "nan", "38495": "nan", "38500": 3.407, "38505": "nan", "38510": "nan", "38515": "nan", "38520": "nan", "38525": "nan", "38530": "nan", "38535": "nan", "38540": "nan", "38545": "nan", "38550": "nan", "38555": "nan", "38560": "nan", "38565": "nan", "38570": "nan", "38575": "nan", "38580": "nan", "38585": "nan", "38590": "nan", "38595": "nan", "38600": 3.46144, "38605": "nan", "38610": "nan", "38615": "nan", "38620": "nan", "38625": "nan", "38630": "nan", "38635": "nan", "38640": "nan", "38645": "nan", "38650": "nan", "38655": "nan", "38660": "nan", "38665": "nan", "38670": "nan", "38675": "nan", "38680": "nan", "38685": "nan", "38690": "nan", "38695": "nan", "38700": 3.39871, "38705": "nan", "38710": "nan", "38715": "nan", "38720": "nan", "38725": "nan", "38730": "nan", "38735": "nan", "38740": "nan", "38745": "nan", "38750": "nan", "38755": "nan", "38760": "nan", "38765": "nan", "38770": "nan", "38775": "nan", "38780": "nan", "38785": "nan", "38790": "nan", "38795": "nan", "38800": 3.38852, "38805": "nan", "38810": "nan", "38815": "nan", "38820": "nan", "38825": "nan", "38830": "nan", "38835": "nan", "38840": "nan", "38845": "nan", "38850": "nan", "38855": "nan", "38860": "nan", "38865": "nan", "38870": "nan", "38875": "nan", "38880": "nan", "38885": "nan", "38890": "nan", "38895": "nan", "38900": 3.38915, "38905": "nan", "38910": "nan", "38915": "nan", "38920": "nan", "38925": "nan", "38930": "nan", "38935": "nan", "38940": "nan", "38945": "nan", "38950": "nan", "38955": "nan", "38960": "nan", "38965": "nan", "38970": "nan", "38975": "nan", "38980": "nan", "38985": "nan", "38990": "nan", "38995": "nan", "39000": 3.3893, "39005": "nan", "39010": "nan", "39015": "nan", "39020": "nan", "39025": "nan", "39030": "nan", "39035": "nan", "39040": "nan", "39045": "nan", "39050": "nan", "39055": "nan", "39060": "nan", "39065": "nan", "39070": "nan", "39075": "nan", "39080": "nan", "39085": "nan", "39090": "nan", "39095": "nan", "39100": 3.38952, "39105": "nan", "39110": "nan", "39115": "nan", "39120": "nan", "39125": "nan", "39130": "nan", "39135": "nan", "39140": "nan", "39145": "nan", "39150": "nan", "39155": "nan", "39160": "nan", "39165": "nan", "39170": "nan", "39175": "nan", "39180": "nan", "39185": "nan", "39190": "nan", "39195": "nan", "39200": 3.38939, "39205": "nan", "39210": "nan", "39215": "nan", "39220": "nan", "39225": "nan", "39230": "nan", "39235": "nan", "39240": "nan", "39245": "nan", "39250": "nan", "39255": "nan", "39260": "nan", "39265": "nan", "39270": "nan", "39275": "nan", "39280": "nan", "39285": "nan", "39290": "nan", "39295": "nan", "39300": 3.38958, "39305": "nan", "39310": "nan", "39315": "nan", "39320": "nan", "39325": "nan", "39330": "nan", "39335": "nan", "39340": "nan", "39345": "nan", "39350": "nan", "39355": "nan", "39360": "nan", "39365": "nan", "39370": "nan", "39375": "nan", "39380": "nan", "39385": "nan", "39390": "nan", "39395": "nan", "39400": 3.38984, "39405": "nan", "39410": "nan", "39415": "nan", "39420": "nan", "39425": "nan", "39430": "nan", "39435": "nan", "39440": "nan", "39445": "nan", "39450": "nan", "39455": "nan", "39460": "nan", "39465": "nan", "39470": "nan", "39475": "nan", "39480": "nan", "39485": "nan", "39490": "nan", "39495": "nan", "39500": 3.38965, "39505": "nan", "39510": "nan", "39515": "nan", "39520": "nan", "39525": "nan", "39530": "nan", "39535": "nan", "39540": "nan", "39545": "nan", "39550": "nan", "39555": "nan", "39560": "nan", "39565": "nan", "39570": "nan", "39575": "nan", "39580": "nan", "39585": "nan", "39590": "nan", "39595": "nan", "39600": 3.39007, "39605": "nan", "39610": "nan", "39615": "nan", "39620": "nan", "39625": "nan", "39630": "nan", "39635": "nan", "39640": "nan", "39645": "nan", "39650": "nan", "39655": "nan", "39660": "nan", "39665": "nan", "39670": "nan", "39675": "nan", "39680": "nan", "39685": "nan", "39690": "nan", "39695": "nan", "39700": 3.38861, "39705": "nan", "39710": "nan", "39715": "nan", "39720": "nan", "39725": "nan", "39730": "nan", "39735": "nan", "39740": "nan", "39745": "nan", "39750": "nan", "39755": "nan", "39760": "nan", "39765": "nan", "39770": "nan", "39775": "nan", "39780": "nan", "39785": "nan", "39790": "nan", "39795": "nan", "39800": 3.38876, "39805": "nan", "39810": "nan", "39815": "nan", "39820": "nan", "39825": "nan", "39830": "nan", "39835": "nan", "39840": "nan", "39845": "nan", "39850": "nan", "39855": "nan", "39860": "nan", "39865": "nan", "39870": "nan", "39875": "nan", "39880": "nan", "39885": "nan", "39890": "nan", "39895": "nan", "39900": 3.38989, "39905": "nan", "39910": "nan", "39915": "nan", "39920": "nan", "39925": "nan", "39930": "nan", "39935": "nan", "39940": "nan", "39945": "nan", "39950": "nan", "39955": "nan", "39960": "nan", "39965": "nan", "39970": "nan", "39975": "nan", "39980": "nan", "39985": "nan", "39990": "nan", "39995": "nan", "40000": 3.3899, "40005": "nan", "40010": "nan", "40015": "nan", "40020": "nan", "40025": "nan", "40030": "nan", "40035": "nan", "40040": "nan", "40045": "nan", "40050": "nan", "40055": "nan", "40060": "nan", "40065": "nan", "40070": "nan", "40075": "nan", "40080": "nan", "40085": "nan", "40090": "nan", "40095": "nan", "40100": 3.38677, "40105": "nan", "40110": "nan", "40115": "nan", "40120": "nan", "40125": "nan", "40130": "nan", "40135": "nan", "40140": "nan", "40145": "nan", "40150": "nan", "40155": "nan", "40160": "nan", "40165": "nan", "40170": "nan", "40175": "nan", "40180": "nan", "40185": "nan", "40190": "nan", "40195": "nan", "40200": 3.38822, "40205": "nan", "40210": "nan", "40215": "nan", "40220": "nan", "40225": "nan", "40230": "nan", "40235": "nan", "40240": "nan", "40245": "nan", "40250": "nan", "40255": "nan", "40260": "nan", "40265": "nan", "40270": "nan", "40275": "nan", "40280": "nan", "40285": "nan", "40290": "nan", "40295": "nan", "40300": 3.38872, "40305": "nan", "40310": "nan", "40315": "nan", "40320": "nan", "40325": "nan", "40330": "nan", "40335": "nan", "40340": "nan", "40345": "nan", "40350": "nan", "40355": "nan", "40360": "nan", "40365": "nan", "40370": "nan", "40375": "nan", "40380": "nan", "40385": "nan", "40390": "nan", "40395": "nan", "40400": 3.38893, "40405": "nan", "40410": "nan", "40415": "nan", "40420": "nan", "40425": "nan", "40430": "nan", "40435": "nan", "40440": "nan", "40445": "nan", "40450": "nan", "40455": "nan", "40460": "nan", "40465": "nan", "40470": "nan", "40475": "nan", "40480": "nan", "40485": "nan", "40490": "nan", "40495": "nan", "40500": 3.3876, "40505": "nan", "40510": "nan", "40515": "nan", "40520": "nan", "40525": "nan", "40530": "nan", "40535": "nan", "40540": "nan", "40545": "nan", "40550": "nan", "40555": "nan", "40560": "nan", "40565": "nan", "40570": "nan", "40575": "nan", "40580": "nan", "40585": "nan", "40590": "nan", "40595": "nan", "40600": 3.38905, "40605": "nan", "40610": "nan", "40615": "nan", "40620": "nan", "40625": "nan", "40630": "nan", "40635": "nan", "40640": "nan", "40645": "nan", "40650": "nan", "40655": "nan", "40660": "nan", "40665": "nan", "40670": "nan", "40675": "nan", "40680": "nan", "40685": "nan", "40690": "nan", "40695": "nan", "40700": 3.38865, "40705": "nan", "40710": "nan", "40715": "nan", "40720": "nan", "40725": "nan", "40730": "nan", "40735": "nan", "40740": "nan", "40745": "nan", "40750": "nan", "40755": "nan", "40760": "nan", "40765": "nan", "40770": "nan", "40775": "nan", "40780": "nan", "40785": "nan", "40790": "nan", "40795": "nan", "40800": 3.38879, "40805": "nan", "40810": "nan", "40815": "nan", "40820": "nan", "40825": "nan", "40830": "nan", "40835": "nan", "40840": "nan", "40845": "nan", "40850": "nan", "40855": "nan", "40860": "nan", "40865": "nan", "40870": "nan", "40875": "nan", "40880": "nan", "40885": "nan", "40890": "nan", "40895": "nan", "40900": 3.38769, "40905": "nan", "40910": "nan", "40915": "nan", "40920": "nan", "40925": "nan", "40930": "nan", "40935": "nan", "40940": "nan", "40945": "nan", "40950": "nan", "40955": "nan", "40960": "nan", "40965": "nan", "40970": "nan", "40975": "nan", "40980": "nan", "40985": "nan", "40990": "nan", "40995": "nan", "41000": 3.38786, "41005": "nan", "41010": "nan", "41015": "nan", "41020": "nan", "41025": "nan", "41030": "nan", "41035": "nan", "41040": "nan", "41045": "nan", "41050": "nan", "41055": "nan", "41060": "nan", "41065": "nan", "41070": "nan", "41075": "nan", "41080": "nan", "41085": "nan", "41090": "nan", "41095": "nan", "41100": 3.38845, "41105": "nan", "41110": "nan", "41115": "nan", "41120": "nan", "41125": "nan", "41130": "nan", "41135": "nan", "41140": "nan", "41145": "nan", "41150": "nan", "41155": "nan", "41160": "nan", "41165": "nan", "41170": "nan", "41175": "nan", "41180": "nan", "41185": "nan", "41190": "nan", "41195": "nan", "41200": 3.38742, "41205": "nan", "41210": "nan", "41215": "nan", "41220": "nan", "41225": "nan", "41230": "nan", "41235": "nan", "41240": "nan", "41245": "nan", "41250": "nan", "41255": "nan", "41260": "nan", "41265": "nan", "41270": "nan", "41275": "nan", "41280": "nan", "41285": "nan", "41290": "nan", "41295": "nan", "41300": 3.38853, "41305": "nan", "41310": "nan", "41315": "nan", "41320": "nan", "41325": "nan", "41330": "nan", "41335": "nan", "41340": "nan", "41345": "nan", "41350": "nan", "41355": "nan", "41360": "nan", "41365": "nan", "41370": "nan", "41375": "nan", "41380": "nan", "41385": "nan", "41390": "nan", "41395": "nan", "41400": 3.38801, "41405": "nan", "41410": "nan", "41415": "nan", "41420": "nan", "41425": "nan", "41430": "nan", "41435": "nan", "41440": "nan", "41445": "nan", "41450": "nan", "41455": "nan", "41460": "nan", "41465": "nan", "41470": "nan", "41475": "nan", "41480": "nan", "41485": "nan", "41490": "nan", "41495": "nan", "41500": 3.5883, "41505": "nan", "41510": "nan", "41515": "nan", "41520": "nan", "41525": "nan", "41530": "nan", "41535": "nan", "41540": "nan", "41545": "nan", "41550": "nan", "41555": "nan", "41560": "nan", "41565": "nan", "41570": "nan", "41575": "nan", "41580": "nan", "41585": "nan", "41590": "nan", "41595": "nan", "41600": 3.38844, "41605": "nan", "41610": "nan", "41615": "nan", "41620": "nan", "41625": "nan", "41630": "nan", "41635": "nan", "41640": "nan", "41645": "nan", "41650": "nan", "41655": "nan", "41660": "nan", "41665": "nan", "41670": "nan", "41675": "nan", "41680": "nan", "41685": "nan", "41690": "nan", "41695": "nan", "41700": 3.38802, "41705": "nan", "41710": "nan", "41715": "nan", "41720": "nan", "41725": "nan", "41730": "nan", "41735": "nan", "41740": "nan", "41745": "nan", "41750": "nan", "41755": "nan", "41760": "nan", "41765": "nan", "41770": "nan", "41775": "nan", "41780": "nan", "41785": "nan", "41790": "nan", "41795": "nan", "41800": 3.38805, "41805": "nan", "41810": "nan", "41815": "nan", "41820": "nan", "41825": "nan", "41830": "nan", "41835": "nan", "41840": "nan", "41845": "nan", "41850": "nan", "41855": "nan", "41860": "nan", "41865": "nan", "41870": "nan", "41875": "nan", "41880": "nan", "41885": "nan", "41890": "nan", "41895": "nan", "41900": 3.38811, "41905": "nan", "41910": "nan", "41915": "nan", "41920": "nan", "41925": "nan", "41930": "nan", "41935": "nan", "41940": "nan", "41945": "nan", "41950": "nan", "41955": "nan", "41960": "nan", "41965": "nan", "41970": "nan", "41975": "nan", "41980": "nan", "41985": "nan", "41990": "nan", "41995": "nan", "42000": 3.38795, "42005": "nan", "42010": "nan", "42015": "nan", "42020": "nan", "42025": "nan", "42030": "nan", "42035": "nan", "42040": "nan", "42045": "nan", "42050": "nan", "42055": "nan", "42060": "nan", "42065": "nan", "42070": "nan", "42075": "nan", "42080": "nan", "42085": "nan", "42090": "nan", "42095": "nan", "42100": 3.38793, "42105": "nan", "42110": "nan", "42115": "nan", "42120": "nan", "42125": "nan", "42130": "nan", "42135": "nan", "42140": "nan", "42145": "nan", "42150": "nan", "42155": "nan", "42160": "nan", "42165": "nan", "42170": "nan", "42175": "nan", "42180": "nan", "42185": "nan", "42190": "nan", "42195": "nan", "42200": 3.38786, "42205": "nan", "42210": "nan", "42215": "nan", "42220": "nan", "42225": "nan", "42230": "nan", "42235": "nan", "42240": "nan", "42245": "nan", "42250": "nan", "42255": "nan", "42260": "nan", "42265": "nan", "42270": "nan", "42275": "nan", "42280": "nan", "42285": "nan", "42290": "nan", "42295": "nan", "42300": 3.38768, "42305": "nan", "42310": "nan", "42315": "nan", "42320": "nan", "42325": "nan", "42330": "nan", "42335": "nan", "42340": "nan", "42345": "nan", "42350": "nan", "42355": "nan", "42360": "nan", "42365": "nan", "42370": "nan", "42375": "nan", "42380": "nan", "42385": "nan", "42390": "nan", "42395": "nan", "42400": 3.38759, "42405": "nan", "42410": "nan", "42415": "nan", "42420": "nan", "42425": "nan", "42430": "nan", "42435": "nan", "42440": "nan", "42445": "nan", "42450": "nan", "42455": "nan", "42460": "nan", "42465": "nan", "42470": "nan", "42475": "nan", "42480": "nan", "42485": "nan", "42490": "nan", "42495": "nan", "42500": 3.38657, "42505": "nan", "42510": "nan", "42515": "nan", "42520": "nan", "42525": "nan", "42530": "nan", "42535": "nan", "42540": "nan", "42545": "nan", "42550": "nan", "42555": "nan", "42560": "nan", "42565": "nan", "42570": "nan", "42575": "nan", "42580": "nan", "42585": "nan", "42590": "nan", "42595": "nan", "42600": 3.38701, "42605": "nan", "42610": "nan", "42615": "nan", "42620": "nan", "42625": "nan", "42630": "nan", "42635": "nan", "42640": "nan", "42645": "nan", "42650": "nan", "42655": "nan", "42660": "nan", "42665": "nan", "42670": "nan", "42675": "nan", "42680": "nan", "42685": "nan", "42690": "nan", "42695": "nan", "42700": 3.38775, "42705": "nan", "42710": "nan", "42715": "nan", "42720": "nan", "42725": "nan", "42730": "nan", "42735": "nan", "42740": "nan", "42745": "nan", "42750": "nan", "42755": "nan", "42760": "nan", "42765": "nan", "42770": "nan", "42775": "nan", "42780": "nan", "42785": "nan", "42790": "nan", "42795": "nan", "42800": 3.38722, "42805": "nan", "42810": "nan", "42815": "nan", "42820": "nan", "42825": "nan", "42830": "nan", "42835": "nan", "42840": "nan", "42845": "nan", "42850": "nan", "42855": "nan", "42860": "nan", "42865": "nan", "42870": "nan", "42875": "nan", "42880": "nan", "42885": "nan", "42890": "nan", "42895": "nan", "42900": 3.38739, "42905": "nan", "42910": "nan", "42915": "nan", "42920": "nan", "42925": "nan", "42930": "nan", "42935": "nan", "42940": "nan", "42945": "nan", "42950": "nan", "42955": "nan", "42960": "nan", "42965": "nan", "42970": "nan", "42975": "nan", "42980": "nan", "42985": "nan", "42990": "nan", "42995": "nan", "43000": 3.38699, "43005": "nan", "43010": "nan", "43015": "nan", "43020": "nan", "43025": "nan", "43030": "nan", "43035": "nan", "43040": "nan", "43045": "nan", "43050": "nan", "43055": "nan", "43060": "nan", "43065": "nan", "43070": "nan", "43075": "nan", "43080": "nan", "43085": "nan", "43090": "nan", "43095": "nan", "43100": 3.3869, "43105": "nan", "43110": "nan", "43115": "nan", "43120": "nan", "43125": "nan", "43130": "nan", "43135": "nan", "43140": "nan", "43145": "nan", "43150": "nan", "43155": "nan", "43160": "nan", "43165": "nan", "43170": "nan", "43175": "nan", "43180": "nan", "43185": "nan", "43190": "nan", "43195": "nan", "43200": 3.38691, "43205": "nan", "43210": "nan", "43215": "nan", "43220": "nan", "43225": "nan", "43230": "nan", "43235": "nan", "43240": "nan", "43245": "nan", "43250": "nan", "43255": "nan", "43260": "nan", "43265": "nan", "43270": "nan", "43275": "nan", "43280": "nan", "43285": "nan", "43290": "nan", "43295": "nan", "43300": 3.38744, "43305": "nan", "43310": "nan", "43315": "nan", "43320": "nan", "43325": "nan", "43330": "nan", "43335": "nan", "43340": "nan", "43345": "nan", "43350": "nan", "43355": "nan", "43360": "nan", "43365": "nan", "43370": "nan", "43375": "nan", "43380": "nan", "43385": "nan", "43390": "nan", "43395": "nan", "43400": 3.38689, "43405": "nan", "43410": "nan", "43415": "nan", "43420": "nan", "43425": "nan", "43430": "nan", "43435": "nan", "43440": "nan", "43445": "nan", "43450": "nan", "43455": "nan", "43460": "nan", "43465": "nan", "43470": "nan", "43475": "nan", "43480": "nan", "43485": "nan", "43490": "nan", "43495": "nan", "43500": 3.38638, "43505": "nan", "43510": "nan", "43515": "nan", "43520": "nan", "43525": "nan", "43530": "nan", "43535": "nan", "43540": "nan", "43545": "nan", "43550": "nan", "43555": "nan", "43560": "nan", "43565": "nan", "43570": "nan", "43575": "nan", "43580": "nan", "43585": "nan", "43590": "nan", "43595": "nan", "43600": 3.38601, "43605": "nan", "43610": "nan", "43615": "nan", "43620": "nan", "43625": "nan", "43630": "nan", "43635": "nan", "43640": "nan", "43645": "nan", "43650": "nan", "43655": "nan", "43660": "nan", "43665": "nan", "43670": "nan", "43675": "nan", "43680": "nan", "43685": "nan", "43690": "nan", "43695": "nan", "43700": 3.3862, "43705": "nan", "43710": "nan", "43715": "nan", "43720": "nan", "43725": "nan", "43730": "nan", "43735": "nan", "43740": "nan", "43745": "nan", "43750": "nan", "43755": "nan", "43760": "nan", "43765": "nan", "43770": "nan", "43775": "nan", "43780": "nan", "43785": "nan", "43790": "nan", "43795": "nan", "43800": 3.3854, "43805": "nan", "43810": "nan", "43815": "nan", "43820": "nan", "43825": "nan", "43830": "nan", "43835": "nan", "43840": "nan", "43845": "nan", "43850": "nan", "43855": "nan", "43860": "nan", "43865": "nan", "43870": "nan", "43875": "nan", "43880": "nan", "43885": "nan", "43890": "nan", "43895": "nan", "43900": 3.38702, "43905": "nan", "43910": "nan", "43915": "nan", "43920": "nan", "43925": "nan", "43930": "nan", "43935": "nan", "43940": "nan", "43945": "nan", "43950": "nan", "43955": "nan", "43960": "nan", "43965": "nan", "43970": "nan", "43975": "nan", "43980": "nan", "43985": "nan", "43990": "nan", "43995": "nan", "44000": 3.38781, "44005": "nan", "44010": "nan", "44015": "nan", "44020": "nan", "44025": "nan", "44030": "nan", "44035": "nan", "44040": "nan", "44045": "nan", "44050": "nan", "44055": "nan", "44060": "nan", "44065": "nan", "44070": "nan", "44075": "nan", "44080": "nan", "44085": "nan", "44090": "nan", "44095": "nan", "44100": 3.38606, "44105": "nan", "44110": "nan", "44115": "nan", "44120": "nan", "44125": "nan", "44130": "nan", "44135": "nan", "44140": "nan", "44145": "nan", "44150": "nan", "44155": "nan", "44160": "nan", "44165": "nan", "44170": "nan", "44175": "nan", "44180": "nan", "44185": "nan", "44190": "nan", "44195": "nan", "44200": 3.38724, "44205": "nan", "44210": "nan", "44215": "nan", "44220": "nan", "44225": "nan", "44230": "nan", "44235": "nan", "44240": "nan", "44245": "nan", "44250": "nan", "44255": "nan", "44260": "nan", "44265": "nan", "44270": "nan", "44275": "nan", "44280": "nan", "44285": "nan", "44290": "nan", "44295": "nan", "44300": 3.38663, "44305": "nan", "44310": "nan", "44315": "nan", "44320": "nan", "44325": "nan", "44330": "nan", "44335": "nan", "44340": "nan", "44345": "nan", "44350": "nan", "44355": "nan", "44360": "nan", "44365": "nan", "44370": "nan", "44375": "nan", "44380": "nan", "44385": "nan", "44390": "nan", "44395": "nan", "44400": 3.38808, "44405": "nan", "44410": "nan", "44415": "nan", "44420": "nan", "44425": "nan", "44430": "nan", "44435": "nan", "44440": "nan", "44445": "nan", "44450": "nan", "44455": "nan", "44460": "nan", "44465": "nan", "44470": "nan", "44475": "nan", "44480": "nan", "44485": "nan", "44490": "nan", "44495": "nan", "44500": 3.38722, "44505": "nan", "44510": "nan", "44515": "nan", "44520": "nan", "44525": "nan", "44530": "nan", "44535": "nan", "44540": "nan", "44545": "nan", "44550": "nan", "44555": "nan", "44560": "nan", "44565": "nan", "44570": "nan", "44575": "nan", "44580": "nan", "44585": "nan", "44590": "nan", "44595": "nan", "44600": 3.38553, "44605": "nan", "44610": "nan", "44615": "nan", "44620": "nan", "44625": "nan", "44630": "nan", "44635": "nan", "44640": "nan", "44645": "nan", "44650": "nan", "44655": "nan", "44660": "nan", "44665": "nan", "44670": "nan", "44675": "nan", "44680": "nan", "44685": "nan", "44690": "nan", "44695": "nan", "44700": 3.3869, "44705": "nan", "44710": "nan", "44715": "nan", "44720": "nan", "44725": "nan", "44730": "nan", "44735": "nan", "44740": "nan", "44745": "nan", "44750": "nan", "44755": "nan", "44760": "nan", "44765": "nan", "44770": "nan", "44775": "nan", "44780": "nan", "44785": "nan", "44790": "nan", "44795": "nan", "44800": 3.38645, "44805": "nan", "44810": "nan", "44815": "nan", "44820": "nan", "44825": "nan", "44830": "nan", "44835": "nan", "44840": "nan", "44845": "nan", "44850": "nan", "44855": "nan", "44860": "nan", "44865": "nan", "44870": "nan", "44875": "nan", "44880": "nan", "44885": "nan", "44890": "nan", "44895": "nan", "44900": 3.38721, "44905": "nan", "44910": "nan", "44915": "nan", "44920": "nan", "44925": "nan", "44930": "nan", "44935": "nan", "44940": "nan", "44945": "nan", "44950": "nan", "44955": "nan", "44960": "nan", "44965": "nan", "44970": "nan", "44975": "nan", "44980": "nan", "44985": "nan", "44990": "nan", "44995": "nan", "45000": 3.38713, "45005": "nan", "45010": "nan", "45015": "nan", "45020": "nan", "45025": "nan", "45030": "nan", "45035": "nan", "45040": "nan", "45045": "nan", "45050": "nan", "45055": "nan", "45060": "nan", "45065": "nan", "45070": "nan", "45075": "nan", "45080": "nan", "45085": "nan", "45090": "nan", "45095": "nan", "45100": 3.38622, "45105": "nan", "45110": "nan", "45115": "nan", "45120": "nan", "45125": "nan", "45130": "nan", "45135": "nan", "45140": "nan", "45145": "nan", "45150": "nan", "45155": "nan", "45160": "nan", "45165": "nan", "45170": "nan", "45175": "nan", "45180": "nan", "45185": "nan", "45190": "nan", "45195": "nan", "45200": 3.3866, "45205": "nan", "45210": "nan", "45215": "nan", "45220": "nan", "45225": "nan", "45230": "nan", "45235": "nan", "45240": "nan", "45245": "nan", "45250": "nan", "45255": "nan", "45260": "nan", "45265": "nan", "45270": "nan", "45275": "nan", "45280": "nan", "45285": "nan", "45290": "nan", "45295": "nan", "45300": 3.3867, "45305": "nan", "45310": "nan", "45315": "nan", "45320": "nan", "45325": "nan", "45330": "nan", "45335": "nan", "45340": "nan", "45345": "nan", "45350": "nan", "45355": "nan", "45360": "nan", "45365": "nan", "45370": "nan", "45375": "nan", "45380": "nan", "45385": "nan", "45390": "nan", "45395": "nan", "45400": 3.56953, "45405": "nan", "45410": "nan", "45415": "nan", "45420": "nan", "45425": "nan", "45430": "nan", "45435": "nan", "45440": "nan", "45445": "nan", "45450": "nan", "45455": "nan", "45460": "nan", "45465": "nan", "45470": "nan", "45475": "nan", "45480": "nan", "45485": "nan", "45490": "nan", "45495": "nan", "45500": 3.38798, "45505": "nan", "45510": "nan", "45515": "nan", "45520": "nan", "45525": "nan", "45530": "nan", "45535": "nan", "45540": "nan", "45545": "nan", "45550": "nan", "45555": "nan", "45560": "nan", "45565": "nan", "45570": "nan", "45575": "nan", "45580": "nan", "45585": "nan", "45590": "nan", "45595": "nan", "45600": 3.38783, "45605": "nan", "45610": "nan", "45615": "nan", "45620": "nan", "45625": "nan", "45630": "nan", "45635": "nan", "45640": "nan", "45645": "nan", "45650": "nan", "45655": "nan", "45660": "nan", "45665": "nan", "45670": "nan", "45675": "nan", "45680": "nan", "45685": "nan", "45690": "nan", "45695": "nan", "45700": 3.38711, "45705": "nan", "45710": "nan", "45715": "nan", "45720": "nan", "45725": "nan", "45730": "nan", "45735": "nan", "45740": "nan", "45745": "nan", "45750": "nan", "45755": "nan", "45760": "nan", "45765": "nan", "45770": "nan", "45775": "nan", "45780": "nan", "45785": "nan", "45790": "nan", "45795": "nan", "45800": 3.38768, "45805": "nan", "45810": "nan", "45815": "nan", "45820": "nan", "45825": "nan", "45830": "nan", "45835": "nan", "45840": "nan", "45845": "nan", "45850": "nan", "45855": "nan", "45860": "nan", "45865": "nan", "45870": "nan", "45875": "nan", "45880": "nan", "45885": "nan", "45890": "nan", "45895": "nan", "45900": 3.38686, "45905": "nan", "45910": "nan", "45915": "nan", "45920": "nan", "45925": "nan", "45930": "nan", "45935": "nan", "45940": "nan", "45945": "nan", "45950": "nan", "45955": "nan", "45960": "nan", "45965": "nan", "45970": "nan", "45975": "nan", "45980": "nan", "45985": "nan", "45990": "nan", "45995": "nan", "46000": 3.38648, "46005": "nan", "46010": "nan", "46015": "nan", "46020": "nan", "46025": "nan", "46030": "nan", "46035": "nan", "46040": "nan", "46045": "nan", "46050": "nan", "46055": "nan", "46060": "nan", "46065": "nan", "46070": "nan", "46075": "nan", "46080": "nan", "46085": "nan", "46090": "nan", "46095": "nan", "46100": 3.38742, "46105": "nan", "46110": "nan", "46115": "nan", "46120": "nan", "46125": "nan", "46130": "nan", "46135": "nan", "46140": "nan", "46145": "nan", "46150": "nan", "46155": "nan", "46160": "nan", "46165": "nan", "46170": "nan", "46175": "nan", "46180": "nan", "46185": "nan", "46190": "nan", "46195": "nan", "46200": 3.38799, "46205": "nan", "46210": "nan", "46215": "nan", "46220": "nan", "46225": "nan", "46230": "nan", "46235": "nan", "46240": "nan", "46245": "nan", "46250": "nan", "46255": "nan", "46260": "nan", "46265": "nan", "46270": "nan", "46275": "nan", "46280": "nan", "46285": "nan", "46290": "nan", "46295": "nan", "46300": 3.38747, "46305": "nan", "46310": "nan", "46315": "nan", "46320": "nan", "46325": "nan", "46330": "nan", "46335": "nan", "46340": "nan", "46345": "nan", "46350": "nan", "46355": "nan", "46360": "nan", "46365": "nan", "46370": "nan", "46375": "nan", "46380": "nan", "46385": "nan", "46390": "nan", "46395": "nan", "46400": 3.38757, "46405": "nan", "46410": "nan", "46415": "nan", "46420": "nan", "46425": "nan", "46430": "nan", "46435": "nan", "46440": "nan", "46445": "nan", "46450": "nan", "46455": "nan", "46460": "nan", "46465": "nan", "46470": "nan", "46475": "nan", "46480": "nan", "46485": "nan", "46490": "nan", "46495": "nan", "46500": 3.38673, "46505": "nan", "46510": "nan", "46515": "nan", "46520": "nan", "46525": "nan", "46530": "nan", "46535": "nan", "46540": "nan", "46545": "nan", "46550": "nan", "46555": "nan", "46560": "nan", "46565": "nan", "46570": "nan", "46575": "nan", "46580": "nan", "46585": "nan", "46590": "nan", "46595": "nan", "46600": 3.38748, "46605": "nan", "46610": "nan", "46615": "nan", "46620": "nan", "46625": "nan", "46630": "nan", "46635": "nan", "46640": "nan", "46645": "nan", "46650": "nan", "46655": "nan", "46660": "nan", "46665": "nan", "46670": "nan", "46675": "nan", "46680": "nan", "46685": "nan", "46690": "nan", "46695": "nan", "46700": 3.38716, "46705": "nan", "46710": "nan", "46715": "nan", "46720": "nan", "46725": "nan", "46730": "nan", "46735": "nan", "46740": "nan", "46745": "nan", "46750": "nan", "46755": "nan", "46760": "nan", "46765": "nan", "46770": "nan", "46775": "nan", "46780": "nan", "46785": "nan", "46790": "nan", "46795": "nan", "46800": 3.38753, "46805": "nan", "46810": "nan", "46815": "nan", "46820": "nan", "46825": "nan", "46830": "nan", "46835": "nan", "46840": "nan", "46845": "nan", "46850": "nan", "46855": "nan", "46860": "nan", "46865": "nan", "46870": "nan", "46875": "nan", "46880": "nan", "46885": "nan", "46890": "nan", "46895": "nan", "46900": 3.38699, "46905": "nan", "46910": "nan", "46915": "nan", "46920": "nan", "46925": "nan", "46930": "nan", "46935": "nan", "46940": "nan", "46945": "nan", "46950": "nan", "46955": "nan", "46960": "nan", "46965": "nan", "46970": "nan", "46975": "nan", "46980": "nan", "46985": "nan", "46990": "nan", "46995": "nan", "47000": 3.38808, "47005": "nan", "47010": "nan", "47015": "nan", "47020": "nan", "47025": "nan", "47030": "nan", "47035": "nan", "47040": "nan", "47045": "nan", "47050": "nan", "47055": "nan", "47060": "nan", "47065": "nan", "47070": "nan", "47075": "nan", "47080": "nan", "47085": "nan", "47090": "nan", "47095": "nan", "47100": 3.38707, "47105": "nan", "47110": "nan", "47115": "nan", "47120": "nan", "47125": "nan", "47130": "nan", "47135": "nan", "47140": "nan", "47145": "nan", "47150": "nan", "47155": "nan", "47160": "nan", "47165": "nan", "47170": "nan", "47175": "nan", "47180": "nan", "47185": "nan", "47190": "nan", "47195": "nan", "47200": 3.3868, "47205": "nan", "47210": "nan", "47215": "nan", "47220": "nan", "47225": "nan", "47230": "nan", "47235": "nan", "47240": "nan", "47245": "nan", "47250": "nan", "47255": "nan", "47260": "nan", "47265": "nan", "47270": "nan", "47275": "nan", "47280": "nan", "47285": "nan", "47290": "nan", "47295": "nan", "47300": 3.38668, "47305": "nan", "47310": "nan", "47315": "nan", "47320": "nan", "47325": "nan", "47330": "nan", "47335": "nan", "47340": "nan", "47345": "nan", "47350": "nan", "47355": "nan", "47360": "nan", "47365": "nan", "47370": "nan", "47375": "nan", "47380": "nan", "47385": "nan", "47390": "nan", "47395": "nan", "47400": 3.38703, "47405": "nan", "47410": "nan", "47415": "nan", "47420": "nan", "47425": "nan", "47430": "nan", "47435": "nan", "47440": "nan", "47445": "nan", "47450": "nan", "47455": "nan", "47460": "nan", "47465": "nan", "47470": "nan", "47475": "nan", "47480": "nan", "47485": "nan", "47490": "nan", "47495": "nan", "47500": 3.38647, "47505": "nan", "47510": "nan", "47515": "nan", "47520": "nan", "47525": "nan", "47530": "nan", "47535": "nan", "47540": "nan", "47545": "nan", "47550": "nan", "47555": "nan", "47560": "nan", "47565": "nan", "47570": "nan", "47575": "nan", "47580": "nan", "47585": "nan", "47590": "nan", "47595": "nan", "47600": 3.38769, "47605": "nan", "47610": "nan", "47615": "nan", "47620": "nan", "47625": "nan", "47630": "nan", "47635": "nan", "47640": "nan", "47645": "nan", "47650": "nan", "47655": "nan", "47660": "nan", "47665": "nan", "47670": "nan", "47675": "nan", "47680": "nan", "47685": "nan", "47690": "nan", "47695": "nan", "47700": 3.38602, "47705": "nan", "47710": "nan", "47715": "nan", "47720": "nan", "47725": "nan", "47730": "nan", "47735": "nan", "47740": "nan", "47745": "nan", "47750": "nan", "47755": "nan", "47760": "nan", "47765": "nan", "47770": "nan", "47775": "nan", "47780": "nan", "47785": "nan", "47790": "nan", "47795": "nan", "47800": 3.38604, "47805": "nan", "47810": "nan", "47815": "nan", "47820": "nan", "47825": "nan", "47830": "nan", "47835": "nan", "47840": "nan", "47845": "nan", "47850": "nan", "47855": "nan", "47860": "nan", "47865": "nan", "47870": "nan", "47875": "nan", "47880": "nan", "47885": "nan", "47890": "nan", "47895": "nan", "47900": 3.38579, "47905": "nan", "47910": "nan", "47915": "nan", "47920": "nan", "47925": "nan", "47930": "nan", "47935": "nan", "47940": "nan", "47945": "nan", "47950": "nan", "47955": "nan", "47960": "nan", "47965": "nan", "47970": "nan", "47975": "nan", "47980": "nan", "47985": "nan", "47990": "nan", "47995": "nan", "48000": 3.38613, "48005": "nan", "48010": "nan", "48015": "nan", "48020": "nan", "48025": "nan", "48030": "nan", "48035": "nan", "48040": "nan", "48045": "nan", "48050": "nan", "48055": "nan", "48060": "nan", "48065": "nan", "48070": "nan", "48075": "nan", "48080": "nan", "48085": "nan", "48090": "nan", "48095": "nan", "48100": 3.38598, "48105": "nan", "48110": "nan", "48115": "nan", "48120": "nan", "48125": "nan", "48130": "nan", "48135": "nan", "48140": "nan", "48145": "nan", "48150": "nan", "48155": "nan", "48160": "nan", "48165": "nan", "48170": "nan", "48175": "nan", "48180": "nan", "48185": "nan", "48190": "nan", "48195": "nan", "48200": 3.38537, "48205": "nan", "48210": "nan", "48215": "nan", "48220": "nan", "48225": "nan", "48230": "nan", "48235": "nan", "48240": "nan", "48245": "nan", "48250": "nan", "48255": "nan", "48260": "nan", "48265": "nan", "48270": "nan", "48275": "nan", "48280": "nan", "48285": "nan", "48290": "nan", "48295": "nan", "48300": 3.38621, "48305": "nan", "48310": "nan", "48315": "nan", "48320": "nan", "48325": "nan", "48330": "nan", "48335": "nan", "48340": "nan", "48345": "nan", "48350": "nan", "48355": "nan", "48360": "nan", "48365": "nan", "48370": "nan", "48375": "nan", "48380": "nan", "48385": "nan", "48390": "nan", "48395": "nan", "48400": 3.3861, "48405": "nan", "48410": "nan", "48415": "nan", "48420": "nan", "48425": "nan", "48430": "nan", "48435": "nan", "48440": "nan", "48445": "nan", "48450": "nan", "48455": "nan", "48460": "nan", "48465": "nan", "48470": "nan", "48475": "nan", "48480": "nan", "48485": "nan", "48490": "nan", "48495": "nan", "48500": 3.3845, "48505": "nan", "48510": "nan", "48515": "nan", "48520": "nan", "48525": "nan", "48530": "nan", "48535": "nan", "48540": "nan", "48545": "nan", "48550": "nan", "48555": "nan", "48560": "nan", "48565": "nan", "48570": "nan", "48575": "nan", "48580": "nan", "48585": "nan", "48590": "nan", "48595": "nan", "48600": 3.38534, "48605": "nan", "48610": "nan", "48615": "nan", "48620": "nan", "48625": "nan", "48630": "nan", "48635": "nan", "48640": "nan", "48645": "nan", "48650": "nan", "48655": "nan", "48660": "nan", "48665": "nan", "48670": "nan", "48675": "nan", "48680": "nan", "48685": "nan", "48690": "nan", "48695": "nan", "48700": 3.3857, "48705": "nan", "48710": "nan", "48715": "nan", "48720": "nan", "48725": "nan", "48730": "nan", "48735": "nan", "48740": "nan", "48745": "nan", "48750": "nan", "48755": "nan", "48760": "nan", "48765": "nan", "48770": "nan", "48775": "nan", "48780": "nan", "48785": "nan", "48790": "nan", "48795": "nan", "48800": 3.38585, "48805": "nan", "48810": "nan", "48815": "nan", "48820": "nan", "48825": "nan", "48830": "nan", "48835": "nan", "48840": "nan", "48845": "nan", "48850": "nan", "48855": "nan", "48860": "nan", "48865": "nan", "48870": "nan", "48875": "nan", "48880": "nan", "48885": "nan", "48890": "nan", "48895": "nan", "48900": 3.38606, "48905": "nan", "48910": "nan", "48915": "nan", "48920": "nan", "48925": "nan", "48930": "nan", "48935": "nan", "48940": "nan", "48945": "nan", "48950": "nan", "48955": "nan", "48960": "nan", "48965": "nan", "48970": "nan", "48975": "nan", "48980": "nan", "48985": "nan", "48990": "nan", "48995": "nan", "49000": 3.38562, "49005": "nan", "49010": "nan", "49015": "nan", "49020": "nan", "49025": "nan", "49030": "nan", "49035": "nan", "49040": "nan", "49045": "nan", "49050": "nan", "49055": "nan", "49060": "nan", "49065": "nan", "49070": "nan", "49075": "nan", "49080": "nan", "49085": "nan", "49090": "nan", "49095": "nan", "49100": 3.38456, "49105": "nan", "49110": "nan", "49115": "nan", "49120": "nan", "49125": "nan", "49130": "nan", "49135": "nan", "49140": "nan", "49145": "nan", "49150": "nan", "49155": "nan", "49160": "nan", "49165": "nan", "49170": "nan", "49175": "nan", "49180": "nan", "49185": "nan", "49190": "nan", "49195": "nan", "49200": 3.38482, "49205": "nan", "49210": "nan", "49215": "nan", "49220": "nan", "49225": "nan", "49230": "nan", "49235": "nan", "49240": "nan", "49245": "nan", "49250": "nan", "49255": "nan", "49260": "nan", "49265": "nan", "49270": "nan", "49275": "nan", "49280": "nan", "49285": "nan", "49290": "nan", "49295": "nan", "49300": 3.5375, "49305": "nan", "49310": "nan", "49315": "nan", "49320": "nan", "49325": "nan", "49330": "nan", "49335": "nan", "49340": "nan", "49345": "nan", "49350": "nan", "49355": "nan", "49360": "nan", "49365": "nan", "49370": "nan", "49375": "nan", "49380": "nan", "49385": "nan", "49390": "nan", "49395": "nan", "49400": 3.40286, "49405": "nan", "49410": "nan", "49415": "nan", "49420": "nan", "49425": "nan", "49430": "nan", "49435": "nan", "49440": "nan", "49445": "nan", "49450": "nan", "49455": "nan", "49460": "nan", "49465": "nan", "49470": "nan", "49475": "nan", "49480": "nan", "49485": "nan", "49490": "nan", "49495": "nan", "49500": 3.40056, "49505": "nan", "49510": "nan", "49515": "nan", "49520": "nan", "49525": "nan", "49530": "nan", "49535": "nan", "49540": "nan", "49545": "nan", "49550": "nan", "49555": "nan", "49560": "nan", "49565": "nan", "49570": "nan", "49575": "nan", "49580": "nan", "49585": "nan", "49590": "nan", "49595": "nan", "49600": 3.40193, "49605": "nan", "49610": "nan", "49615": "nan", "49620": "nan", "49625": "nan", "49630": "nan", "49635": "nan", "49640": "nan", "49645": "nan", "49650": "nan", "49655": "nan", "49660": "nan", "49665": "nan", "49670": "nan", "49675": "nan", "49680": "nan", "49685": "nan", "49690": "nan", "49695": "nan", "49700": 3.40267, "49705": "nan", "49710": "nan", "49715": "nan", "49720": "nan", "49725": "nan", "49730": "nan", "49735": "nan", "49740": "nan", "49745": "nan", "49750": "nan", "49755": "nan", "49760": "nan", "49765": "nan", "49770": "nan", "49775": "nan", "49780": "nan", "49785": "nan", "49790": "nan", "49795": "nan", "49800": 3.4023, "49805": "nan", "49810": "nan", "49815": "nan", "49820": "nan", "49825": "nan", "49830": "nan", "49835": "nan", "49840": "nan", "49845": "nan", "49850": "nan", "49855": "nan", "49860": "nan", "49865": "nan", "49870": "nan", "49875": "nan", "49880": "nan", "49885": "nan", "49890": "nan", "49895": "nan", "49900": 3.40094, "49905": "nan", "49910": "nan", "49915": "nan", "49920": "nan", "49925": "nan", "49930": "nan", "49935": "nan", "49940": "nan", "49945": "nan", "49950": "nan", "49955": "nan", "49960": "nan", "49965": "nan", "49970": "nan", "49975": "nan", "49980": "nan", "49985": "nan", "49990": "nan", "49995": "nan", "50000": 3.40122, "50005": "nan", "50010": "nan", "50015": "nan", "50020": "nan", "50025": "nan", "50030": "nan", "50035": "nan", "50040": "nan", "50045": "nan", "50050": "nan", "50055": "nan", "50060": "nan", "50065": "nan", "50070": "nan", "50075": "nan", "50080": "nan", "50085": "nan", "50090": "nan", "50095": "nan", "50100": 3.3987, "50105": "nan", "50110": "nan", "50115": "nan", "50120": "nan", "50125": "nan", "50130": "nan", "50135": "nan", "50140": "nan", "50145": "nan", "50150": "nan", "50155": "nan", "50160": "nan", "50165": "nan", "50170": "nan", "50175": "nan", "50180": "nan", "50185": "nan", "50190": "nan", "50195": "nan", "50200": 3.40202, "50205": "nan", "50210": "nan", "50215": "nan", "50220": "nan", "50225": "nan", "50230": "nan", "50235": "nan", "50240": "nan", "50245": "nan", "50250": "nan", "50255": "nan", "50260": "nan", "50265": "nan", "50270": "nan", "50275": "nan", "50280": "nan", "50285": "nan", "50290": "nan", "50295": "nan", "50300": 3.40014, "50305": "nan", "50310": "nan", "50315": "nan", "50320": "nan", "50325": "nan", "50330": "nan", "50335": "nan", "50340": "nan", "50345": "nan", "50350": "nan", "50355": "nan", "50360": "nan", "50365": "nan", "50370": "nan", "50375": "nan", "50380": "nan", "50385": "nan", "50390": "nan", "50395": "nan", "50400": 3.39925, "50405": "nan", "50410": "nan", "50415": "nan", "50420": "nan", "50425": "nan", "50430": "nan", "50435": "nan", "50440": "nan", "50445": "nan", "50450": "nan", "50455": "nan", "50460": "nan", "50465": "nan", "50470": "nan", "50475": "nan", "50480": "nan", "50485": "nan", "50490": "nan", "50495": "nan", "50500": 3.39803, "50505": "nan", "50510": "nan", "50515": "nan", "50520": "nan", "50525": "nan", "50530": "nan", "50535": "nan", "50540": "nan", "50545": "nan", "50550": "nan", "50555": "nan", "50560": "nan", "50565": "nan", "50570": "nan", "50575": "nan", "50580": "nan", "50585": "nan", "50590": "nan", "50595": "nan", "50600": 3.39803, "50605": "nan", "50610": "nan", "50615": "nan", "50620": "nan", "50625": "nan", "50630": "nan", "50635": "nan", "50640": "nan", "50645": "nan", "50650": "nan", "50655": "nan", "50660": "nan", "50665": "nan", "50670": "nan", "50675": "nan", "50680": "nan", "50685": "nan", "50690": "nan", "50695": "nan", "50700": 3.39955, "50705": "nan", "50710": "nan", "50715": "nan", "50720": "nan", "50725": "nan", "50730": "nan", "50735": "nan", "50740": "nan", "50745": "nan", "50750": "nan", "50755": "nan", "50760": "nan", "50765": "nan", "50770": "nan", "50775": "nan", "50780": "nan", "50785": "nan", "50790": "nan", "50795": "nan", "50800": 3.40136, "50805": "nan", "50810": "nan", "50815": "nan", "50820": "nan", "50825": "nan", "50830": "nan", "50835": "nan", "50840": "nan", "50845": "nan", "50850": "nan", "50855": "nan", "50860": "nan", "50865": "nan", "50870": "nan", "50875": "nan", "50880": "nan", "50885": "nan", "50890": "nan", "50895": "nan", "50900": "nan", "50905": "nan", "50910": "nan", "50915": "nan", "50920": "nan", "50925": "nan", "50930": "nan", "50935": "nan", "50940": "nan", "50945": "nan", "50950": "nan", "50955": "nan", "50960": "nan", "50965": "nan", "50970": "nan", "50975": "nan", "50980": "nan", "50985": "nan", "50990": "nan", "50995": "nan", "51000": "nan", "51005": "nan", "51010": "nan", "51015": "nan", "51020": "nan", "51025": "nan", "51030": "nan", "51035": "nan", "51040": "nan", "51045": "nan", "51050": "nan", "51055": "nan", "51060": "nan", "51065": "nan", "51070": "nan", "51075": "nan", "51080": "nan", "51085": "nan", "51090": "nan", "51095": "nan", "51100": "nan", "51105": "nan", "51110": "nan", "51115": "nan", "51120": "nan", "51125": "nan", "51130": "nan", "51135": "nan", "51140": "nan", "51145": "nan", "51150": "nan", "51155": "nan", "51160": "nan", "51165": "nan", "51170": "nan", "51175": "nan", "51180": "nan", "51185": "nan", "51190": "nan", "51195": "nan", "51200": "nan", "51205": "nan", "51210": "nan", "51215": "nan", "51220": "nan", "51225": "nan", "51230": "nan", "51235": "nan", "51240": "nan", "51245": "nan", "51250": "nan", "51255": "nan", "51260": "nan", "51265": "nan", "51270": "nan", "51275": "nan", "51280": "nan", "51285": "nan", "51290": "nan", "51295": "nan", "51300": "nan", "51305": "nan", "51310": "nan", "51315": "nan", "51320": "nan", "51325": "nan", "51330": "nan", "51335": "nan", "51340": "nan", "51345": "nan", "51350": "nan", "51355": "nan", "51360": "nan", "51365": "nan", "51370": "nan", "51375": "nan", "51380": "nan", "51385": "nan", "51390": "nan", "51395": "nan", "51400": "nan", "51405": "nan", "51410": "nan", "51415": "nan", "51420": "nan", "51425": "nan", "51430": "nan", "51435": "nan", "51440": "nan", "51445": "nan", "51450": "nan", "51455": "nan", "51460": "nan", "51465": "nan", "51470": "nan", "51475": "nan", "51480": "nan", "51485": "nan", "51490": "nan", "51495": "nan", "51500": "nan", "51505": "nan", "51510": "nan", "51515": "nan", "51520": "nan", "51525": "nan", "51530": "nan", "51535": "nan", "51540": "nan", "51545": "nan", "51550": "nan", "51555": "nan", "51560": "nan", "51565": "nan", "51570": "nan", "51575": "nan", "51580": "nan", "51585": "nan", "51590": "nan", "51595": "nan", "51600": "nan", "51605": "nan", "51610": "nan", "51615": "nan", "51620": "nan", "51625": "nan", "51630": "nan", "51635": "nan", "51640": "nan", "51645": "nan", "51650": "nan", "51655": "nan", "51660": "nan", "51665": "nan", "51670": "nan", "51675": "nan", "51680": "nan", "51685": "nan", "51690": "nan", "51695": "nan", "51700": "nan", "51705": "nan", "51710": "nan", "51715": "nan", "51720": "nan", "51725": "nan", "51730": "nan", "51735": "nan", "51740": "nan", "51745": "nan", "51750": "nan", "51755": "nan", "51760": "nan", "51765": "nan", "51770": "nan", "51775": "nan", "51780": "nan", "51785": "nan", "51790": "nan", "51795": "nan", "51800": "nan", "51805": "nan", "51810": "nan", "51815": "nan", "51820": "nan", "51825": "nan", "51830": "nan", "51835": "nan", "51840": "nan", "51845": "nan", "51850": "nan", "51855": "nan", "51860": "nan", "51865": "nan", "51870": "nan", "51875": "nan", "51880": "nan", "51885": "nan", "51890": "nan", "51895": "nan", "51900": "nan", "51905": "nan", "51910": "nan", "51915": "nan", "51920": "nan", "51925": "nan", "51930": "nan", "51935": "nan", "51940": "nan", "51945": "nan", "51950": "nan", "51955": "nan", "51960": "nan", "51965": "nan", "51970": "nan", "51975": "nan", "51980": "nan", "51985": "nan", "51990": "nan", "51995": "nan", "52000": "nan", "52005": "nan", "52010": "nan", "52015": "nan", "52020": "nan", "52025": "nan", "52030": "nan", "52035": "nan", "52040": "nan", "52045": "nan", "52050": "nan", "52055": "nan", "52060": "nan", "52065": "nan", "52070": "nan", "52075": "nan", "52080": "nan", "52085": "nan", "52090": "nan", "52095": "nan", "52100": "nan", "52105": "nan", "52110": "nan", "52115": "nan", "52120": "nan", "52125": "nan", "52130": "nan", "52135": "nan", "52140": "nan", "52145": "nan", "52150": "nan", "52155": "nan", "52160": "nan", "52165": "nan", "52170": "nan", "52175": "nan", "52180": "nan", "52185": "nan", "52190": "nan", "52195": "nan", "52200": "nan", "52205": "nan", "52210": "nan", "52215": "nan", "52220": "nan", "52225": "nan", "52230": "nan", "52235": "nan", "52240": "nan", "52245": "nan", "52250": "nan", "52255": "nan", "52260": "nan", "52265": "nan", "52270": "nan", "52275": "nan", "52280": "nan", "52285": "nan", "52290": "nan", "52295": "nan", "52300": "nan", "52305": "nan", "52310": "nan", "52315": "nan", "52320": "nan", "52325": "nan", "52330": "nan", "52335": "nan", "52340": "nan", "52345": "nan", "52350": "nan", "52355": "nan", "52360": "nan", "52365": "nan", "52370": "nan", "52375": "nan", "52380": "nan", "52385": "nan", "52390": "nan", "52395": "nan", "52400": "nan", "52405": "nan", "52410": "nan", "52415": "nan", "52420": "nan", "52425": "nan", "52430": "nan", "52435": "nan", "52440": "nan", "52445": "nan", "52450": "nan", "52455": "nan", "52460": "nan", "52465": "nan", "52470": "nan", "52475": "nan", "52480": "nan", "52485": "nan", "52490": "nan", "52495": "nan", "52500": "nan", "52505": "nan", "52510": "nan", "52515": "nan", "52520": "nan", "52525": "nan", "52530": "nan", "52535": "nan", "52540": "nan", "52545": "nan", "52550": "nan", "52555": "nan", "52560": "nan", "52565": "nan", "52570": "nan", "52575": "nan", "52580": "nan", "52585": "nan", "52590": "nan", "52595": "nan", "52600": "nan", "52605": "nan", "52610": "nan", "52615": "nan", "52620": "nan", "52625": "nan", "52630": "nan", "52635": "nan", "52640": "nan", "52645": "nan", "52650": "nan", "52655": "nan", "52660": "nan", "52665": "nan", "52670": "nan", "52675": "nan", "52680": "nan", "52685": "nan", "52690": "nan", "52695": "nan", "52700": "nan", "52705": "nan", "52710": "nan", "52715": "nan", "52720": "nan", "52725": "nan", "52730": "nan", "52735": "nan", "52740": "nan", "52745": "nan", "52750": "nan", "52755": "nan", "52760": "nan", "52765": "nan", "52770": "nan", "52775": "nan", "52780": "nan", "52785": "nan", "52790": "nan", "52795": "nan", "52800": "nan", "52805": "nan", "52810": "nan", "52815": "nan", "52820": "nan", "52825": "nan", "52830": "nan", "52835": "nan", "52840": "nan", "52845": "nan", "52850": "nan", "52855": "nan", "52860": "nan", "52865": "nan", "52870": "nan", "52875": "nan", "52880": "nan", "52885": "nan", "52890": "nan", "52895": "nan", "52900": "nan", "52905": "nan", "52910": "nan", "52915": "nan", "52920": "nan", "52925": "nan", "52930": "nan", "52935": "nan", "52940": "nan", "52945": "nan", "52950": "nan", "52955": "nan", "52960": "nan", "52965": "nan", "52970": "nan", "52975": "nan", "52980": "nan", "52985": "nan", "52990": "nan", "52995": "nan", "53000": "nan", "53005": "nan", "53010": "nan", "53015": "nan", "53020": "nan", "53025": "nan", "53030": "nan", "53035": "nan", "53040": "nan", "53045": "nan", "53050": "nan", "53055": "nan", "53060": "nan", "53065": "nan", "53070": "nan", "53075": "nan", "53080": "nan", "53085": "nan", "53090": "nan", "53095": "nan", "53100": "nan", "53105": "nan", "53110": "nan", "53115": "nan", "53120": "nan", "53125": "nan", "53130": "nan", "53135": "nan", "53140": "nan", "53145": "nan", "53150": "nan", "53155": "nan", "53160": "nan", "53165": "nan", "53170": "nan", "53175": "nan", "53180": "nan", "53185": "nan", "53190": "nan", "53195": "nan", "53200": "nan", "53205": "nan", "53210": "nan", "53215": "nan", "53220": "nan", "53225": "nan", "53230": "nan", "53235": "nan", "53240": "nan", "53245": "nan", "53250": "nan", "53255": "nan", "53260": "nan", "53265": "nan", "53270": "nan", "53275": "nan", "53280": "nan", "53285": "nan", "53290": "nan", "53295": "nan", "53300": "nan", "53305": "nan", "53310": "nan", "53315": "nan", "53320": "nan", "53325": "nan", "53330": "nan", "53335": "nan", "53340": "nan", "53345": "nan", "53350": "nan", "53355": "nan", "53360": "nan", "53365": "nan", "53370": "nan", "53375": "nan", "53380": "nan", "53385": "nan", "53390": "nan", "53395": "nan", "53400": "nan", "53405": "nan", "53410": "nan", "53415": "nan", "53420": "nan", "53425": "nan", "53430": "nan", "53435": "nan", "53440": "nan", "53445": "nan", "53450": "nan", "53455": "nan", "53460": "nan", "53465": "nan", "53470": "nan", "53475": "nan", "53480": "nan", "53485": "nan", "53490": "nan", "53495": "nan", "53500": "nan", "53505": "nan", "53510": "nan", "53515": "nan", "53520": "nan", "53525": "nan", "53530": "nan", "53535": "nan", "53540": "nan", "53545": "nan", "53550": "nan", "53555": "nan", "53560": "nan", "53565": "nan", "53570": "nan", "53575": "nan", "53580": "nan", "53585": "nan", "53590": "nan", "53595": "nan", "53600": "nan", "53605": "nan", "53610": "nan", "53615": "nan", "53620": "nan", "53625": "nan", "53630": "nan", "53635": "nan", "53640": "nan", "53645": "nan", "53650": "nan", "53655": "nan", "53660": "nan", "53665": "nan", "53670": "nan", "53675": "nan", "53680": "nan", "53685": "nan", "53690": "nan", "53695": "nan", "53700": "nan", "53705": "nan", "53710": "nan", "53715": "nan", "53720": "nan", "53725": "nan", "53730": "nan", "53735": "nan", "53740": "nan", "53745": "nan", "53750": "nan", "53755": "nan", "53760": "nan", "53765": "nan", "53770": "nan", "53775": "nan", "53780": "nan", "53785": "nan", "53790": "nan", "53795": "nan", "53800": "nan", "53805": "nan", "53810": "nan", "53815": "nan", "53820": "nan", "53825": "nan", "53830": "nan", "53835": "nan", "53840": "nan", "53845": "nan", "53850": "nan", "53855": "nan", "53860": "nan", "53865": "nan", "53870": "nan", "53875": "nan", "53880": "nan", "53885": "nan", "53890": "nan", "53895": "nan", "53900": "nan", "53905": "nan", "53910": "nan", "53915": "nan", "53920": "nan", "53925": "nan", "53930": "nan", "53935": "nan", "53940": "nan", "53945": "nan", "53950": "nan", "53955": "nan", "53960": "nan", "53965": "nan", "53970": "nan", "53975": "nan", "53980": "nan", "53985": "nan", "53990": "nan", "53995": "nan", "54000": "nan", "54005": "nan", "54010": "nan", "54015": "nan", "54020": "nan", "54025": "nan", "54030": "nan", "54035": "nan", "54040": "nan", "54045": "nan", "54050": "nan", "54055": "nan", "54060": "nan", "54065": "nan", "54070": "nan", "54075": "nan", "54080": "nan", "54085": "nan", "54090": "nan", "54095": "nan", "54100": "nan", "54105": "nan", "54110": "nan", "54115": "nan", "54120": "nan", "54125": "nan", "54130": "nan", "54135": "nan", "54140": "nan", "54145": "nan", "54150": "nan", "54155": "nan", "54160": "nan", "54165": "nan", "54170": "nan", "54175": "nan", "54180": "nan", "54185": "nan", "54190": "nan", "54195": "nan", "54200": "nan", "54205": "nan", "54210": "nan", "54215": "nan", "54220": "nan", "54225": "nan", "54230": "nan", "54235": "nan", "54240": "nan", "54245": "nan", "54250": "nan", "54255": "nan", "54260": "nan", "54265": "nan", "54270": "nan", "54275": "nan", "54280": "nan", "54285": "nan", "54290": "nan", "54295": "nan", "54300": "nan", "54305": "nan", "54310": "nan", "54315": "nan", "54320": "nan", "54325": "nan", "54330": "nan", "54335": "nan", "54340": "nan", "54345": "nan", "54350": "nan", "54355": "nan", "54360": "nan", "54365": "nan", "54370": "nan", "54375": "nan", "54380": "nan", "54385": "nan", "54390": "nan", "54395": "nan", "54400": "nan", "54405": "nan", "54410": "nan", "54415": "nan", "54420": "nan", "54425": "nan", "54430": "nan", "54435": "nan", "54440": "nan", "54445": "nan", "54450": "nan", "54455": "nan", "54460": "nan", "54465": "nan", "54470": "nan", "54475": "nan", "54480": "nan", "54485": "nan", "54490": "nan", "54495": "nan", "54500": "nan", "54505": "nan", "54510": "nan", "54515": "nan", "54520": "nan", "54525": "nan", "54530": "nan", "54535": "nan", "54540": "nan", "54545": "nan", "54550": "nan", "54555": "nan", "54560": "nan", "54565": "nan", "54570": "nan", "54575": "nan", "54580": "nan", "54585": "nan", "54590": "nan", "54595": "nan", "54600": "nan", "54605": "nan", "54610": "nan", "54615": "nan", "54620": "nan", "54625": "nan", "54630": "nan", "54635": "nan", "54640": "nan", "54645": "nan", "54650": "nan", "54655": "nan", "54660": "nan", "54665": "nan", "54670": "nan", "54675": "nan", "54680": "nan", "54685": "nan", "54690": "nan", "54695": "nan", "54700": "nan", "54705": "nan", "54710": "nan", "54715": "nan", "54720": "nan", "54725": "nan", "54730": "nan", "54735": "nan", "54740": "nan", "54745": "nan", "54750": "nan", "54755": "nan", "54760": "nan", "54765": "nan", "54770": "nan", "54775": "nan", "54780": "nan", "54785": "nan", "54790": "nan", "54795": "nan", "54800": "nan", "54805": "nan", "54810": "nan", "54815": "nan", "54820": "nan", "54825": "nan", "54830": "nan", "54835": "nan", "54840": "nan", "54845": "nan", "54850": "nan", "54855": "nan", "54860": "nan", "54865": "nan", "54870": "nan", "54875": "nan", "54880": "nan", "54885": "nan", "54890": "nan", "54895": "nan", "54900": "nan", "54905": "nan", "54910": "nan", "54915": "nan", "54920": "nan", "54925": "nan", "54930": "nan", "54935": "nan", "54940": "nan", "54945": "nan", "54950": "nan", "54955": "nan", "54960": "nan", "54965": "nan", "54970": "nan", "54975": "nan", "54980": "nan", "54985": "nan", "54990": "nan", "54995": "nan", "55000": "nan"}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release/model_config.yaml
================================================
ENV_VARS:
  NCCL_IB_SL: 1
  NCCL_IB_TIMEOUT: 19
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_FWD_LAYERNORM_SM_MARGIN: 16
  NVTE_BWD_LAYERNORM_SM_MARGIN: 16
  NCCL_P2P_NET_CHUNKSIZE: 2097152
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
  NON_DETERMINSTIC_RESULTS: 1
TEST_TYPE: "release"
MODEL_ARGS:
  # Distributed args
  --distributed-timeout-minutes: 60
  --tensor-model-parallel-size: 8
  --pipeline-model-parallel-size: 1
  --use-distributed-optimizer: true
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  # Training args
  --use-mcore-models: true
  --sequence-parallel: true
  --disable-bias-linear: true
  --micro-batch-size: 4
  --rampup-batch-size: "[384 384 97656250]"
  --global-batch-size: 1152
  --train-samples: 19531250
  --manual-gc: true
  # Transformer Engine args
  --transformer-impl: transformer_engine
  # Data args
  --data-cache-path: ${DATA_CACHE_PATH}
  --tokenizer-type: GPTSentencePieceTokenizer
  --tokenizer-model: ${DATA_PATH}/utils/nemotron_2_256k.model
  --data-path: $DATA_BLEND
  --split: 99,1,0
  --no-mmap-bin-files: true
  --num-workers: 6
  # Add network size args
  --apply-layernorm-1p: true
  --untie-embeddings-and-output-weights: true
  --position-embedding-type: rope
  --no-rope-fusion: true #TODO: We can remove this once upgrading to the DEV container
  --rotary-percent: 0.5
  --squared-relu: true
  --num-layers: 32
  --hidden-size: 6144
  --num-attention-heads: 48
  --group-query-attention: true
  --num-query-groups: 8
  --seq-length: 4096
  --max-position-embeddings: 4096
  # Add regularization args
  --attention-dropout: 0.0
  --hidden-dropout: 0.0
  --clip-grad: 1.0
  --weight-decay: 0.1
  # Add learning rate args
  --lr-decay-samples: 1949218748
  --lr-warmup-samples: 3906252
  --lr: 4.5e-4
  --min-lr: 4.5e-5
  --decoupled-lr: 5.0e-4
  --decoupled-min-lr: 4.5e-5
  --lr-decay-style: cosine
  --adam-beta1: 0.9
  --adam-beta2: 0.95
  # Add validation args
  --eval-iters: 32
  --eval-interval: 2000
  # Add checkpointing args
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --save-interval: 1000
  --save-retain-interval: 5000
  # Add initialization args
  --init-method-std: 0.0134
  # Add logging args
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  --log-num-zeros-in-grad: true
  --log-params-norm: true
  --log-validation-ppl-to-tensorboard: true
  --log-throughput: true
  --log-interval: 100
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --wandb-project: megatron-core-release-runs
  --wandb-entity: adlr
  --wandb-exp-name: ${WANDB_EXPERIMENT}
  # Add mixed precision args
  --bf16: true
  --exit-interval: 51000
  --wandb-save-dir: ${WANDB_SAVE_PATH}
  --async-save: true
  --use-persistent-ckpt-worker: true
METRICS:
  - "iteration-time"
  - "lm loss"
  - "mem-allocated-bytes"
  - "mem-max-allocated-bytes"


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_gb200/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
  NON_DETERMINSTIC_RESULTS: 1
  NVTE_NORM_FWD_USE_CUDNN: 1
  NVTE_NORM_BWD_USE_CUDNN: 1
  NVTE_FUSED_ATTN: 1
  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
  USE_MNNVL: 1
TEST_TYPE: "release"
MODEL_ARGS:
  # Distributed args
  --distributed-timeout-minutes: 60
  --tensor-model-parallel-size: 4
  --pipeline-model-parallel-size: 2
  --num-layers-per-virtual-pipeline-stage: 8
  --use-distributed-optimizer: true
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  --tp-comm-overlap: true
  # Training args
  --use-mcore-models: true
  --sequence-parallel: true
  --disable-bias-linear: true
  --micro-batch-size: 4
  --rampup-batch-size: "[384 384 97656250]"
  --global-batch-size: 1152
  --train-samples: 19531250
  --manual-gc: true
  --cross-entropy-loss-fusion: true
  --cross-entropy-fusion-impl: te
  # Transformer Engine args
  --transformer-impl: transformer_engine
  # Data args
  --data-cache-path: ${DATA_CACHE_PATH}
  --tokenizer-type: GPTSentencePieceTokenizer
  --tokenizer-model: ${DATA_PATH}/utils/nemotron_2_256k.model
  --data-path: $DATA_BLEND
  --split: 99,1,0
  --no-mmap-bin-files: true
  --num-workers: 6
  # Add network size args
  --apply-layernorm-1p: true
  --untie-embeddings-and-output-weights: true
  --position-embedding-type: rope
  --rotary-percent: 0.5
  --squared-relu: true
  --num-layers: 32
  --hidden-size: 6144
  --num-attention-heads: 48
  --group-query-attention: true
  --num-query-groups: 8
  --seq-length: 4096
  --max-position-embeddings: 4096
  # Add regularization args
  --attention-dropout: 0.0
  --hidden-dropout: 0.0
  --clip-grad: 1.0
  --weight-decay: 0.1
  # Add learning rate args
  --lr-decay-samples: 1949218748
  --lr-warmup-samples: 3906252
  --lr: 4.5e-4
  --min-lr: 4.5e-5
  --decoupled-lr: 5.0e-4
  --decoupled-min-lr: 4.5e-5
  --lr-decay-style: cosine
  --adam-beta1: 0.9
  --adam-beta2: 0.95
  # Add validation args
  --eval-iters: 32
  --eval-interval: 2000
  # Add checkpointing args
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --save-interval: 1000
  --save-retain-interval: 5000
  # Add initialization args
  --init-method-std: 0.0134
  # Add logging args
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  --log-num-zeros-in-grad: true
  --log-params-norm: true
  --log-validation-ppl-to-tensorboard: true
  --log-throughput: true
  --log-interval: 100
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --wandb-project: megatron-core-release-runs
  --wandb-entity: adlr
  --wandb-exp-name: ${WANDB_EXPERIMENT}
  # Add mixed precision args
  --bf16: true
  --exit-interval: 51000
  --wandb-save-dir: ${WANDB_SAVE_PATH}
  --async-save: true
  --use-persistent-ckpt-worker: true
METRICS:
  - "iteration-time"
  - "lm loss"
  - "mem-allocated-bytes"
  - "mem-max-allocated-bytes"


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 13000,
        "step_interval": 5,
        "values": {
            "1": 13.05008,
            "5": 13.01204,
            "10": 12.1655,
            "15": 11.32234,
            "20": 10.41894,
            "25": 10.0578,
            "30": 9.71991,
            "35": 9.42902,
            "40": 9.23153,
            "45": 9.0416,
            "50": 8.84482,
            "55": 8.66178,
            "60": 8.60539,
            "65": 8.47775,
            "70": 8.43752,
            "75": 8.34325,
            "80": 8.18407,
            "85": 8.12181,
            "90": 7.96741,
            "95": 7.95709,
            "100": 7.84521,
            "105": 7.70676,
            "110": 7.56897,
            "115": 7.458,
            "120": 7.42069,
            "125": 7.44964,
            "130": 7.26452,
            "135": 7.24116,
            "140": 7.20033,
            "145": 7.00832,
            "150": 7.14295,
            "155": 6.99244,
            "160": 6.92185,
            "165": 6.89548,
            "170": 6.80023,
            "175": 6.82899,
            "180": 6.83583,
            "185": 6.74551,
            "190": 6.71201,
            "195": 6.64915,
            "200": 6.63261,
            "205": 6.6249,
            "210": 6.52711,
            "215": 6.50221,
            "220": 6.50404,
            "225": 6.50149,
            "230": 6.54783,
            "235": 6.46088,
            "240": 6.37609,
            "245": 6.3747,
            "250": 6.30165,
            "255": 6.43597,
            "260": 6.33348,
            "265": 6.28434,
            "270": 6.21703,
            "275": 6.2455,
            "280": 6.17513,
            "285": 6.20833,
            "290": 6.16109,
            "295": 6.1242,
            "300": 6.12642,
            "305": 6.01347,
            "310": 6.07181,
            "315": 6.07118,
            "320": 5.95351,
            "325": 5.90429,
            "330": 5.98027,
            "335": 6.01029,
            "340": 5.92597,
            "345": 5.90736,
            "350": 5.91072,
            "355": 5.82381,
            "360": 5.85127,
            "365": 5.83524,
            "370": 5.78767,
            "375": 5.82558,
            "380": 5.84099,
            "385": 5.77487,
            "390": 5.75924,
            "395": 5.68715,
            "400": 5.63262,
            "405": 5.64579,
            "410": 5.6491,
            "415": 5.7162,
            "420": 5.6389,
            "425": 5.64386,
            "430": 5.63857,
            "435": 5.56311,
            "440": 5.6115,
            "445": 5.50046,
            "450": 5.5841,
            "455": 5.49511,
            "460": 5.5081,
            "465": 5.58817,
            "470": 5.54722,
            "475": 5.49015,
            "480": 5.46104,
            "485": 5.51932,
            "490": 5.50384,
            "495": 5.46751,
            "500": 5.40583,
            "505": 5.37976,
            "510": 5.41077,
            "515": 5.41349,
            "520": 5.46389,
            "525": 5.30176,
            "530": 5.30865,
            "535": 5.31487,
            "540": 5.30664,
            "545": 5.39279,
            "550": 5.34384,
            "555": 5.23202,
            "560": 5.34449,
            "565": 5.29541,
            "570": 5.23678,
            "575": 5.29742,
            "580": 5.21604,
            "585": 5.20684,
            "590": 5.2311,
            "595": 5.21895,
            "600": 5.27384,
            "605": 5.22693,
            "610": 5.2111,
            "615": 5.20312,
            "620": 5.20466,
            "625": 5.18657,
            "630": 5.15555,
            "635": 5.12189,
            "640": 5.09743,
            "645": 5.12261,
            "650": 5.13982,
            "655": 5.1289,
            "660": 5.04202,
            "665": 5.09654,
            "670": 5.03729,
            "675": 5.0268,
            "680": 5.01185,
            "685": 4.98136,
            "690": 5.01416,
            "695": 4.95502,
            "700": 4.96149,
            "705": 4.9334,
            "710": 4.98004,
            "715": 4.90045,
            "720": 4.84477,
            "725": 4.79493,
            "730": 4.82978,
            "735": 4.80907,
            "740": 4.82743,
            "745": 4.70279,
            "750": 4.70949,
            "755": 4.78826,
            "760": 4.77357,
            "765": 4.72919,
            "770": 4.67543,
            "775": 4.66386,
            "780": 4.68062,
            "785": 4.75851,
            "790": 4.65081,
            "795": 4.59989,
            "800": 4.58234,
            "805": 4.60231,
            "810": 4.64497,
            "815": 4.57763,
            "820": 4.6061,
            "825": 4.57606,
            "830": 4.54268,
            "835": 4.53145,
            "840": 4.46479,
            "845": 4.46978,
            "850": 4.42697,
            "855": 4.48821,
            "860": 4.43561,
            "865": 4.48227,
            "870": 4.45584,
            "875": 4.3794,
            "880": 4.40382,
            "885": 4.35898,
            "890": 4.4257,
            "895": 4.38718,
            "900": 4.37761,
            "905": 4.32099,
            "910": 4.34998,
            "915": 4.33433,
            "920": 4.36562,
            "925": 4.35701,
            "930": 4.30214,
            "935": 4.28572,
            "940": 4.32955,
            "945": 4.28423,
            "950": 4.31902,
            "955": 4.25429,
            "960": 4.18985,
            "965": 4.27248,
            "970": 4.26224,
            "975": 4.22351,
            "980": 4.21989,
            "985": 4.15887,
            "990": 4.13424,
            "995": 4.16609,
            "1000": 4.21284,
            "1005": 4.1657,
            "1010": 4.17265,
            "1015": 4.09445,
            "1020": 4.14749,
            "1025": 4.2074,
            "1030": 4.11261,
            "1035": 4.08674,
            "1040": 4.08595,
            "1045": 4.09037,
            "1050": 4.13206,
            "1055": 4.08609,
            "1060": 4.10286,
            "1065": 4.05057,
            "1070": 4.03731,
            "1075": 4.06262,
            "1080": 4.05661,
            "1085": 4.08637,
            "1090": 4.02242,
            "1095": 4.08239,
            "1100": 4.04944,
            "1105": 4.05282,
            "1110": 4.02139,
            "1115": 4.01265,
            "1120": 3.99,
            "1125": 3.98572,
            "1130": 4.03287,
            "1135": 3.99997,
            "1140": 3.98314,
            "1145": 3.93997,
            "1150": 4.048,
            "1155": 3.99054,
            "1160": 3.9513,
            "1165": 3.86271,
            "1170": 3.92237,
            "1175": 3.93401,
            "1180": 3.95735,
            "1185": 3.97312,
            "1190": 3.92353,
            "1195": 3.93659,
            "1200": 3.88343,
            "1205": 3.87122,
            "1210": 3.98003,
            "1215": 3.81935,
            "1220": 3.86489,
            "1225": 3.80317,
            "1230": 3.90989,
            "1235": 3.88555,
            "1240": 3.87765,
            "1245": 3.79262,
            "1250": 3.8313,
            "1255": 3.86382,
            "1260": 3.88293,
            "1265": 3.78626,
            "1270": 3.87347,
            "1275": 3.8262,
            "1280": 3.81649,
            "1285": 3.8459,
            "1290": 3.87404,
            "1295": 3.84352,
            "1300": 3.80631,
            "1305": 3.82946,
            "1310": 3.80451,
            "1315": 3.81158,
            "1320": 3.81004,
            "1325": 3.7133,
            "1330": 3.78036,
            "1335": 3.75635,
            "1340": 3.74424,
            "1345": 3.75023,
            "1350": 3.74904,
            "1355": 3.7756,
            "1360": 3.73427,
            "1365": 3.73041,
            "1370": 3.73521,
            "1375": 3.73462,
            "1380": 3.73031,
            "1385": 3.73409,
            "1390": 3.65424,
            "1395": 3.74026,
            "1400": 3.72338,
            "1405": 3.66567,
            "1410": 3.66913,
            "1415": 3.64555,
            "1420": 3.69661,
            "1425": 3.70429,
            "1430": 3.66615,
            "1435": 3.66413,
            "1440": 3.6299,
            "1445": 3.6837,
            "1450": 3.67379,
            "1455": 3.62801,
            "1460": 3.64174,
            "1465": 3.66948,
            "1470": 3.62106,
            "1475": 3.68562,
            "1480": 3.65,
            "1485": 3.66033,
            "1490": 3.62097,
            "1495": 3.59333,
            "1500": 3.63003,
            "1505": 3.68557,
            "1510": 3.5576,
            "1515": 3.60595,
            "1520": 3.63257,
            "1525": 3.59376,
            "1530": 3.58343,
            "1535": 3.59111,
            "1540": 3.61262,
            "1545": 3.61139,
            "1550": 3.55891,
            "1555": 3.56505,
            "1560": 3.61415,
            "1565": 3.60344,
            "1570": 3.5834,
            "1575": 3.54504,
            "1580": 3.58566,
            "1585": 3.55795,
            "1590": 3.46436,
            "1595": 3.49688,
            "1600": 3.5123,
            "1605": 3.58015,
            "1610": 3.57647,
            "1615": 3.48342,
            "1620": 3.51893,
            "1625": 3.45637,
            "1630": 3.49954,
            "1635": 3.541,
            "1640": 3.52002,
            "1645": 3.54913,
            "1650": 3.49346,
            "1655": 3.4684,
            "1660": 3.51337,
            "1665": 3.44456,
            "1670": 3.50579,
            "1675": 3.49404,
            "1680": 3.46873,
            "1685": 3.47674,
            "1690": 3.47471,
            "1695": 3.48546,
            "1700": 3.45526,
            "1705": 3.38706,
            "1710": 3.48407,
            "1715": 3.49204,
            "1720": 3.44268,
            "1725": 3.42354,
            "1730": 3.42642,
            "1735": 3.46923,
            "1740": 3.457,
            "1745": 3.43155,
            "1750": 3.40389,
            "1755": 3.40607,
            "1760": 3.37866,
            "1765": 3.41549,
            "1770": 3.4484,
            "1775": 3.38138,
            "1780": 3.42122,
            "1785": 3.40965,
            "1790": 3.38547,
            "1795": 3.40935,
            "1800": 3.34031,
            "1805": 3.3907,
            "1810": 3.33733,
            "1815": 3.41598,
            "1820": 3.4039,
            "1825": 3.37123,
            "1830": 3.32433,
            "1835": 3.4153,
            "1840": 3.40482,
            "1845": 3.43338,
            "1850": 3.38928,
            "1855": 3.39458,
            "1860": 3.36025,
            "1865": 3.38634,
            "1870": 3.30473,
            "1875": 3.45012,
            "1880": 3.34557,
            "1885": 3.34827,
            "1890": 3.33046,
            "1895": 3.38303,
            "1900": 3.37378,
            "1905": 3.2966,
            "1910": 3.32688,
            "1915": 3.3148,
            "1920": 3.3538,
            "1925": 3.33785,
            "1930": 3.30674,
            "1935": 3.31319,
            "1940": 3.36196,
            "1945": 3.25945,
            "1950": 3.40875,
            "1955": 3.30383,
            "1960": 3.28738,
            "1965": 3.27524,
            "1970": 3.28884,
            "1975": 3.339,
            "1980": 3.33871,
            "1985": 3.23068,
            "1990": 3.29676,
            "1995": 3.27317,
            "2000": 3.27611,
            "2005": 3.25628,
            "2010": 3.25614,
            "2015": 3.23395,
            "2020": 3.27251,
            "2025": 3.27212,
            "2030": 3.26956,
            "2035": 3.29321,
            "2040": 3.24341,
            "2045": 3.24512,
            "2050": 3.27219,
            "2055": 3.31028,
            "2060": 3.27911,
            "2065": 3.22105,
            "2070": 3.30262,
            "2075": 3.24308,
            "2080": 3.23722,
            "2085": 3.28743,
            "2090": 3.14147,
            "2095": 3.29285,
            "2100": 3.23507,
            "2105": 3.18698,
            "2110": 3.19488,
            "2115": 3.22507,
            "2120": 3.17281,
            "2125": 3.20994,
            "2130": 3.21504,
            "2135": 3.2795,
            "2140": 3.20055,
            "2145": 3.21514,
            "2150": 3.20567,
            "2155": 3.22994,
            "2160": 3.21817,
            "2165": 3.28593,
            "2170": 3.22314,
            "2175": 3.17419,
            "2180": 3.22019,
            "2185": 3.24188,
            "2190": 3.22977,
            "2195": 3.15677,
            "2200": 3.18661,
            "2205": 3.16602,
            "2210": 3.12392,
            "2215": 3.20063,
            "2220": 3.19672,
            "2225": 3.17772,
            "2230": 3.12195,
            "2235": 3.17896,
            "2240": 3.21655,
            "2245": 3.2019,
            "2250": 3.21383,
            "2255": 3.15541,
            "2260": 3.16941,
            "2265": 3.21191,
            "2270": 3.17741,
            "2275": 3.13051,
            "2280": 3.17617,
            "2285": 3.15025,
            "2290": 3.15609,
            "2295": 3.20156,
            "2300": 3.14421,
            "2305": 3.15813,
            "2310": 3.12811,
            "2315": 3.05602,
            "2320": 3.12288,
            "2325": 3.17284,
            "2330": 3.13107,
            "2335": 3.10638,
            "2340": 3.15454,
            "2345": 3.11338,
            "2350": 3.12349,
            "2355": 3.11221,
            "2360": 3.1587,
            "2365": 3.13477,
            "2370": 3.15842,
            "2375": 3.14138,
            "2380": 3.11083,
            "2385": 3.07931,
            "2390": 3.09995,
            "2395": 3.08487,
            "2400": 3.08246,
            "2405": 3.09178,
            "2410": 3.08889,
            "2415": 3.08364,
            "2420": 3.0794,
            "2425": 3.07602,
            "2430": 3.07131,
            "2435": 3.0765,
            "2440": 3.08722,
            "2445": 3.06039,
            "2450": 3.12452,
            "2455": 3.14884,
            "2460": 3.08647,
            "2465": 3.06823,
            "2470": 3.02841,
            "2475": 3.06105,
            "2480": 3.08627,
            "2485": 3.05418,
            "2490": 3.0659,
            "2495": 3.10432,
            "2500": 3.08628,
            "2505": 3.10056,
            "2510": 3.11958,
            "2515": 3.04463,
            "2520": 3.06461,
            "2525": 3.03564,
            "2530": 3.04561,
            "2535": 3.08874,
            "2540": 3.07717,
            "2545": 3.04594,
            "2550": 2.98846,
            "2555": 3.06822,
            "2560": 3.04282,
            "2565": 3.1096,
            "2570": 2.99987,
            "2575": 3.05028,
            "2580": 3.07924,
            "2585": 3.01705,
            "2590": 3.06415,
            "2595": 2.99331,
            "2600": 3.06187,
            "2605": 3.0334,
            "2610": 3.04104,
            "2615": 3.06401,
            "2620": 2.98076,
            "2625": 3.00513,
            "2630": 3.03888,
            "2635": 3.05046,
            "2640": 3.00752,
            "2645": 3.05044,
            "2650": 3.02104,
            "2655": 2.99029,
            "2660": 3.01279,
            "2665": 3.03456,
            "2670": 2.97516,
            "2675": 2.9664,
            "2680": 2.99127,
            "2685": 3.00057,
            "2690": 3.00136,
            "2695": 2.9829,
            "2700": 3.02461,
            "2705": 2.98686,
            "2710": 2.97101,
            "2715": 2.96259,
            "2720": 3.02765,
            "2725": 2.9843,
            "2730": 3.07085,
            "2735": 3.00313,
            "2740": 2.99675,
            "2745": 3.01003,
            "2750": 3.00601,
            "2755": 2.9679,
            "2760": 2.99515,
            "2765": 3.0052,
            "2770": 2.97319,
            "2775": 2.98775,
            "2780": 3.01171,
            "2785": 2.96171,
            "2790": 2.9614,
            "2795": 2.95103,
            "2800": 2.95137,
            "2805": 2.92932,
            "2810": 2.97584,
            "2815": 2.95426,
            "2820": 3.02824,
            "2825": 3.03446,
            "2830": 2.99241,
            "2835": 2.92405,
            "2840": 2.924,
            "2845": 3.01764,
            "2850": 3.02928,
            "2855": 3.00953,
            "2860": 2.96462,
            "2865": 2.91418,
            "2870": 2.98414,
            "2875": 2.91364,
            "2880": 2.94595,
            "2885": 2.91375,
            "2890": 2.97568,
            "2895": 2.92367,
            "2900": 2.95047,
            "2905": 2.99681,
            "2910": 2.96428,
            "2915": 2.97992,
            "2920": 2.96376,
            "2925": 2.94181,
            "2930": 2.95351,
            "2935": 2.94268,
            "2940": 2.95302,
            "2945": 2.92122,
            "2950": 2.98363,
            "2955": 2.90414,
            "2960": 2.96628,
            "2965": 2.86677,
            "2970": 2.96148,
            "2975": 2.98973,
            "2980": 2.93798,
            "2985": 3.03145,
            "2990": 2.93902,
            "2995": 2.87289,
            "3000": 2.93094,
            "3005": 2.88807,
            "3010": 2.93649,
            "3015": 2.93728,
            "3020": 2.9765,
            "3025": 2.94765,
            "3030": 2.94829,
            "3035": 2.96692,
            "3040": 2.94429,
            "3045": 2.8457,
            "3050": 2.9005,
            "3055": 2.90093,
            "3060": 2.93402,
            "3065": 2.92043,
            "3070": 2.88416,
            "3075": 2.89414,
            "3080": 2.92836,
            "3085": 2.90419,
            "3090": 2.92118,
            "3095": 2.92732,
            "3100": 2.86811,
            "3105": 2.93241,
            "3110": 2.92174,
            "3115": 2.95416,
            "3120": 2.9617,
            "3125": 2.86072,
            "3130": 2.93315,
            "3135": 2.92603,
            "3140": 2.8763,
            "3145": 2.91992,
            "3150": 2.85429,
            "3155": 2.85441,
            "3160": 2.84409,
            "3165": 2.8445,
            "3170": 2.90103,
            "3175": 2.91615,
            "3180": 2.87162,
            "3185": 2.93749,
            "3190": 2.94414,
            "3195": 2.95906,
            "3200": 2.9346,
            "3205": 2.86154,
            "3210": 2.86991,
            "3215": 2.91437,
            "3220": 2.86962,
            "3225": 2.86687,
            "3230": 2.81718,
            "3235": 2.87781,
            "3240": 2.87293,
            "3245": 2.90545,
            "3250": 2.86599,
            "3255": 2.85075,
            "3260": 2.86584,
            "3265": 2.87425,
            "3270": 2.84613,
            "3275": 2.86845,
            "3280": 2.80107,
            "3285": 2.81792,
            "3290": 2.87182,
            "3295": 2.9051,
            "3300": 2.87482,
            "3305": 2.86457,
            "3310": 2.8617,
            "3315": 2.82258,
            "3320": 2.84626,
            "3325": 2.83293,
            "3330": 2.83744,
            "3335": 2.85803,
            "3340": 2.83014,
            "3345": 2.84335,
            "3350": 2.92003,
            "3355": 2.88331,
            "3360": 2.81284,
            "3365": 2.85791,
            "3370": 2.84692,
            "3375": 2.83772,
            "3380": 2.85031,
            "3385": 2.8801,
            "3390": 2.87178,
            "3395": 2.81384,
            "3400": 2.78856,
            "3405": 2.8419,
            "3410": 2.85225,
            "3415": 2.86352,
            "3420": 2.82864,
            "3425": 2.81295,
            "3430": 2.83167,
            "3435": 2.89686,
            "3440": 2.8282,
            "3445": 2.88078,
            "3450": 2.8261,
            "3455": 2.79448,
            "3460": 2.82071,
            "3465": 2.8541,
            "3470": 2.84374,
            "3475": 2.77799,
            "3480": 2.8464,
            "3485": 2.82783,
            "3490": 2.89586,
            "3495": 2.86003,
            "3500": 2.83569,
            "3505": 2.82809,
            "3510": 2.82659,
            "3515": 2.83789,
            "3520": 2.78684,
            "3525": 2.8091,
            "3530": 2.85704,
            "3535": 2.78933,
            "3540": 2.84286,
            "3545": 2.81678,
            "3550": 2.80459,
            "3555": 2.82485,
            "3560": 2.82756,
            "3565": 2.83286,
            "3570": 2.80757,
            "3575": 2.81456,
            "3580": 2.82206,
            "3585": 2.83994,
            "3590": 2.83354,
            "3595": 2.78616,
            "3600": 2.75448,
            "3605": 2.79347,
            "3610": 2.8501,
            "3615": 2.75759,
            "3620": 2.81049,
            "3625": 2.89049,
            "3630": 2.78648,
            "3635": 2.79541,
            "3640": 2.78992,
            "3645": 2.77597,
            "3650": 2.81076,
            "3655": 2.82175,
            "3660": 2.77324,
            "3665": 2.78879,
            "3670": 2.77345,
            "3675": 2.77485,
            "3680": 2.82264,
            "3685": 2.81255,
            "3690": 2.80639,
            "3695": 2.81455,
            "3700": 2.79382,
            "3705": 2.79333,
            "3710": 2.75729,
            "3715": 2.80607,
            "3720": 2.80223,
            "3725": 2.79579,
            "3730": 2.84735,
            "3735": 2.80322,
            "3740": 2.75398,
            "3745": 2.8006,
            "3750": 2.80967,
            "3755": 2.80087,
            "3760": 2.76493,
            "3765": 2.75981,
            "3770": 2.76212,
            "3775": 2.77797,
            "3780": 2.76132,
            "3785": 2.78468,
            "3790": 2.75638,
            "3795": 2.79596,
            "3800": 2.8073,
            "3805": 2.75971,
            "3810": 2.8053,
            "3815": 2.76711,
            "3820": 2.79005,
            "3825": 2.73907,
            "3830": 2.75002,
            "3835": 2.81788,
            "3840": 2.72938,
            "3845": 2.71849,
            "3850": 2.78452,
            "3855": 2.72543,
            "3860": 2.80742,
            "3865": 2.75975,
            "3870": 2.77909,
            "3875": 2.75698,
            "3880": 2.79154,
            "3885": 2.79772,
            "3890": 2.74732,
            "3895": 2.81224,
            "3900": 2.7667,
            "3905": 2.72768,
            "3910": 2.75146,
            "3915": 2.75568,
            "3920": 2.7965,
            "3925": 2.7834,
            "3930": 2.72104,
            "3935": 2.7481,
            "3940": 2.75988,
            "3945": 2.74416,
            "3950": 2.73118,
            "3955": 2.78157,
            "3960": 2.76135,
            "3965": 2.74076,
            "3970": 2.75926,
            "3975": 2.73309,
            "3980": 2.744,
            "3985": 2.75444,
            "3990": 2.69665,
            "3995": 2.78631,
            "4000": 2.73901,
            "4005": 2.77258,
            "4010": 2.71546,
            "4015": 2.73038,
            "4020": 2.75439,
            "4025": 2.74097,
            "4030": 2.66413,
            "4035": 2.70287,
            "4040": 2.75553,
            "4045": 2.75354,
            "4050": 2.79756,
            "4055": 2.72714,
            "4060": 2.72047,
            "4065": 2.65865,
            "4070": 2.81308,
            "4075": 2.76334,
            "4080": 2.72285,
            "4085": 2.75607,
            "4090": 2.68226,
            "4095": 2.69234,
            "4100": 2.71963,
            "4105": 2.74397,
            "4110": 2.73704,
            "4115": 2.70976,
            "4120": 2.73378,
            "4125": 2.70953,
            "4130": 2.70207,
            "4135": 2.69241,
            "4140": 2.68559,
            "4145": 2.78455,
            "4150": 2.71388,
            "4155": 2.7486,
            "4160": 2.76565,
            "4165": 2.72377,
            "4170": 2.67692,
            "4175": 2.72423,
            "4180": 2.73214,
            "4185": 2.73112,
            "4190": 2.74361,
            "4195": 2.70199,
            "4200": 2.71084,
            "4205": 2.74634,
            "4210": 2.68089,
            "4215": 2.6701,
            "4220": 2.66543,
            "4225": 2.71006,
            "4230": 2.72178,
            "4235": 2.73702,
            "4240": 2.70895,
            "4245": 2.69911,
            "4250": 2.72113,
            "4255": 2.65314,
            "4260": 2.72897,
            "4265": 2.74195,
            "4270": 2.72821,
            "4275": 2.69356,
            "4280": 2.71212,
            "4285": 2.74058,
            "4290": 2.69094,
            "4295": 2.6967,
            "4300": 2.70408,
            "4305": 2.70382,
            "4310": 2.73923,
            "4315": 2.71253,
            "4320": 2.7063,
            "4325": 2.70787,
            "4330": 2.7135,
            "4335": 2.69438,
            "4340": 2.7039,
            "4345": 2.72843,
            "4350": 2.68147,
            "4355": 2.70469,
            "4360": 2.7148,
            "4365": 2.7912,
            "4370": 2.74169,
            "4375": 2.74601,
            "4380": 2.70617,
            "4385": 2.7023,
            "4390": 2.70399,
            "4395": 2.75676,
            "4400": 2.66802,
            "4405": 2.67255,
            "4410": 2.68624,
            "4415": 2.70971,
            "4420": 2.71023,
            "4425": 2.72318,
            "4430": 2.696,
            "4435": 2.68512,
            "4440": 2.69942,
            "4445": 2.68608,
            "4450": 2.66467,
            "4455": 2.67,
            "4460": 2.6879,
            "4465": 2.70646,
            "4470": 2.67224,
            "4475": 2.69263,
            "4480": 2.66016,
            "4485": 2.70684,
            "4490": 2.65797,
            "4495": 2.71483,
            "4500": 2.70827,
            "4505": 2.70383,
            "4510": 2.65407,
            "4515": 2.70511,
            "4520": 2.67563,
            "4525": 2.67431,
            "4530": 2.67629,
            "4535": 2.67349,
            "4540": 2.71319,
            "4545": 2.66393,
            "4550": 2.70558,
            "4555": 2.68533,
            "4560": 2.65973,
            "4565": 2.64332,
            "4570": 2.6436,
            "4575": 2.67031,
            "4580": 2.68851,
            "4585": 2.68755,
            "4590": 2.62319,
            "4595": 2.66776,
            "4600": 2.68266,
            "4605": 2.6814,
            "4610": 2.66475,
            "4615": 2.66996,
            "4620": 2.67313,
            "4625": 2.69613,
            "4630": 2.67469,
            "4635": 2.64781,
            "4640": 2.69472,
            "4645": 2.64862,
            "4650": 2.70415,
            "4655": 2.70835,
            "4660": 2.67805,
            "4665": 2.68807,
            "4670": 2.67892,
            "4675": 2.68891,
            "4680": 2.66775,
            "4685": 2.65991,
            "4690": 2.705,
            "4695": 2.65936,
            "4700": 2.67667,
            "4705": 2.65892,
            "4710": 2.67964,
            "4715": 2.65107,
            "4720": 2.72776,
            "4725": 2.63773,
            "4730": 2.6601,
            "4735": 2.69113,
            "4740": 2.64705,
            "4745": 2.65631,
            "4750": 2.64529,
            "4755": 2.65759,
            "4760": 2.66598,
            "4765": 2.64721,
            "4770": 2.62567,
            "4775": 2.66265,
            "4780": 2.66479,
            "4785": 2.69336,
            "4790": 2.65185,
            "4795": 2.67703,
            "4800": 2.63477,
            "4805": 2.64622,
            "4810": 2.66639,
            "4815": 2.64958,
            "4820": 2.67431,
            "4825": 2.65801,
            "4830": 2.62203,
            "4835": 2.65316,
            "4840": 2.66379,
            "4845": 2.66014,
            "4850": 2.63136,
            "4855": 2.60636,
            "4860": 2.65637,
            "4865": 2.63154,
            "4870": 2.64347,
            "4875": 2.62372,
            "4880": 2.63111,
            "4885": 2.63425,
            "4890": 2.6904,
            "4895": 2.66758,
            "4900": 2.62197,
            "4905": 2.62442,
            "4910": 2.64166,
            "4915": 2.61951,
            "4920": 2.65575,
            "4925": 2.65667,
            "4930": 2.57607,
            "4935": 2.65457,
            "4940": 2.63403,
            "4945": 2.64168,
            "4950": 2.63147,
            "4955": 2.62132,
            "4960": 2.6207,
            "4965": 2.66066,
            "4970": 2.60408,
            "4975": 2.65875,
            "4980": 2.62384,
            "4985": 2.63572,
            "4990": 2.66221,
            "4995": 2.58438,
            "5000": 2.66598,
            "5005": 2.66961,
            "5010": 2.68942,
            "5015": 2.63994,
            "5020": 2.64342,
            "5025": 2.68953,
            "5030": 2.64884,
            "5035": 2.62109,
            "5040": 2.62556,
            "5045": 2.61018,
            "5050": 2.6284,
            "5055": 2.65151,
            "5060": 2.64873,
            "5065": 2.69265,
            "5070": 2.61131,
            "5075": 2.61722,
            "5080": 2.61434,
            "5085": 2.61211,
            "5090": 2.59602,
            "5095": 2.65863,
            "5100": 2.65368,
            "5105": 2.61404,
            "5110": 2.66578,
            "5115": 2.62643,
            "5120": 2.67473,
            "5125": 2.63416,
            "5130": 2.62146,
            "5135": 2.64001,
            "5140": 2.59109,
            "5145": 2.636,
            "5150": 2.65103,
            "5155": 2.64678,
            "5160": 2.67741,
            "5165": 2.59135,
            "5170": 2.59663,
            "5175": 2.62169,
            "5180": 2.60891,
            "5185": 2.62655,
            "5190": 2.6275,
            "5195": 2.67374,
            "5200": 2.60143,
            "5205": 2.60779,
            "5210": 2.60797,
            "5215": 2.64883,
            "5220": 2.58969,
            "5225": 2.55564,
            "5230": 2.63704,
            "5235": 2.61808,
            "5240": 2.61892,
            "5245": 2.6334,
            "5250": 2.59759,
            "5255": 2.61999,
            "5260": 2.5638,
            "5265": 2.6022,
            "5270": 2.5957,
            "5275": 2.62453,
            "5280": 2.61679,
            "5285": 2.609,
            "5290": 2.63809,
            "5295": 2.62998,
            "5300": 2.58882,
            "5305": 2.63394,
            "5310": 2.62559,
            "5315": 2.59505,
            "5320": 2.61946,
            "5325": 2.64889,
            "5330": 2.60433,
            "5335": 2.58462,
            "5340": 2.56677,
            "5345": 2.66119,
            "5350": 2.62072,
            "5355": 2.58224,
            "5360": 2.59753,
            "5365": 2.62368,
            "5370": 2.61875,
            "5375": 2.63305,
            "5380": 2.58162,
            "5385": 2.56549,
            "5390": 2.58914,
            "5395": 2.62188,
            "5400": 2.61137,
            "5405": 2.54782,
            "5410": 2.61765,
            "5415": 2.60072,
            "5420": 2.6175,
            "5425": 2.62744,
            "5430": 2.63046,
            "5435": 2.5818,
            "5440": 2.59048,
            "5445": 2.63239,
            "5450": 2.65266,
            "5455": 2.6172,
            "5460": 2.59535,
            "5465": 2.60935,
            "5470": 2.60473,
            "5475": 2.62847,
            "5480": 2.59117,
            "5485": 2.59387,
            "5490": 2.58,
            "5495": 2.57536,
            "5500": 2.57456,
            "5505": 2.62145,
            "5510": 2.62833,
            "5515": 2.58533,
            "5520": 2.56247,
            "5525": 2.59009,
            "5530": 2.66987,
            "5535": 2.62544,
            "5540": 2.57594,
            "5545": 2.60009,
            "5550": 2.55588,
            "5555": 2.57975,
            "5560": 2.56808,
            "5565": 2.61179,
            "5570": 2.66123,
            "5575": 2.63601,
            "5580": 2.57869,
            "5585": 2.59855,
            "5590": 2.56836,
            "5595": 2.59581,
            "5600": 2.55933,
            "5605": 2.60378,
            "5610": 2.58997,
            "5615": 2.58968,
            "5620": 2.58638,
            "5625": 2.55629,
            "5630": 2.5763,
            "5635": 2.63778,
            "5640": 2.59795,
            "5645": 2.57553,
            "5650": 2.58201,
            "5655": 2.55416,
            "5660": 2.56329,
            "5665": 2.59005,
            "5670": 2.56952,
            "5675": 2.61052,
            "5680": 2.53178,
            "5685": 2.57099,
            "5690": 2.60544,
            "5695": 2.56507,
            "5700": 2.60075,
            "5705": 2.60105,
            "5710": 2.58147,
            "5715": 2.58925,
            "5720": 2.53967,
            "5725": 2.60663,
            "5730": 2.57648,
            "5735": 2.61186,
            "5740": 2.5967,
            "5745": 2.56064,
            "5750": 2.54451,
            "5755": 2.56468,
            "5760": 2.62209,
            "5765": 2.56228,
            "5770": 2.54488,
            "5775": 2.58976,
            "5780": 2.58017,
            "5785": 2.54338,
            "5790": 2.56917,
            "5795": 2.60813,
            "5800": 2.54915,
            "5805": 2.53928,
            "5810": 2.56066,
            "5815": 2.52773,
            "5820": 2.60045,
            "5825": 2.50845,
            "5830": 2.50043,
            "5835": 2.6004,
            "5840": 2.54297,
            "5845": 2.55645,
            "5850": 2.6166,
            "5855": 2.51288,
            "5860": 2.56548,
            "5865": 2.52098,
            "5870": 2.57866,
            "5875": 2.61158,
            "5880": 2.58684,
            "5885": 2.5689,
            "5890": 2.58709,
            "5895": 2.55628,
            "5900": 2.61888,
            "5905": 2.56102,
            "5910": 2.60329,
            "5915": 2.6141,
            "5920": 2.59205,
            "5925": 2.53533,
            "5930": 2.57933,
            "5935": 2.55532,
            "5940": 2.57406,
            "5945": 2.52396,
            "5950": 2.56069,
            "5955": 2.59134,
            "5960": 2.56674,
            "5965": 2.62254,
            "5970": 2.55477,
            "5975": 2.58563,
            "5980": 2.56214,
            "5985": 2.56218,
            "5990": 2.55816,
            "5995": 2.55939,
            "6000": 2.55916,
            "6005": 2.52582,
            "6010": 2.56294,
            "6015": 2.5274,
            "6020": 2.53613,
            "6025": 2.55915,
            "6030": 2.60835,
            "6035": 2.54559,
            "6040": 2.551,
            "6045": 2.49287,
            "6050": 2.59671,
            "6055": 2.52113,
            "6060": 2.54903,
            "6065": 2.52668,
            "6070": 2.53063,
            "6075": 2.54125,
            "6080": 2.53828,
            "6085": 2.6014,
            "6090": 2.57326,
            "6095": 2.53938,
            "6100": 2.54651,
            "6105": 2.52417,
            "6110": 2.55763,
            "6115": 2.58953,
            "6120": 2.56015,
            "6125": 2.54649,
            "6130": 2.47548,
            "6135": 2.56256,
            "6140": 2.55995,
            "6145": 2.56011,
            "6150": 2.52806,
            "6155": 2.51155,
            "6160": 2.54318,
            "6165": 2.57878,
            "6170": 2.55001,
            "6175": 2.60347,
            "6180": 2.51199,
            "6185": 2.55297,
            "6190": 2.4939,
            "6195": 2.58162,
            "6200": 2.55703,
            "6205": 2.54407,
            "6210": 2.52438,
            "6215": 2.51685,
            "6220": 2.56896,
            "6225": 2.51605,
            "6230": 2.51271,
            "6235": 2.56294,
            "6240": 2.55265,
            "6245": 2.5273,
            "6250": 2.53499,
            "6255": 2.57521,
            "6260": 2.52745,
            "6265": 2.57783,
            "6270": 2.52798,
            "6275": 2.56887,
            "6280": 2.52652,
            "6285": 2.52461,
            "6290": 2.52333,
            "6295": 2.50807,
            "6300": 2.55739,
            "6305": 2.52703,
            "6310": 2.51458,
            "6315": 2.53888,
            "6320": 2.49247,
            "6325": 2.60019,
            "6330": 2.55901,
            "6335": 2.51425,
            "6340": 2.51261,
            "6345": 2.55869,
            "6350": 2.56028,
            "6355": 2.52664,
            "6360": 2.52402,
            "6365": 2.48552,
            "6370": 2.53722,
            "6375": 2.49389,
            "6380": 2.56229,
            "6385": 2.57823,
            "6390": 2.50771,
            "6395": 2.55326,
            "6400": 2.50997,
            "6405": 2.53036,
            "6410": 2.52054,
            "6415": 2.52596,
            "6420": 2.54097,
            "6425": 2.53566,
            "6430": 2.5808,
            "6435": 2.54817,
            "6440": 2.53987,
            "6445": 2.53241,
            "6450": 2.53514,
            "6455": 2.52675,
            "6460": 2.51878,
            "6465": 2.56707,
            "6470": 2.52212,
            "6475": 2.5281,
            "6480": 2.4893,
            "6485": 2.52991,
            "6490": 2.51048,
            "6495": 2.50057,
            "6500": 2.52728,
            "6505": 2.49759,
            "6510": 2.54453,
            "6515": 2.51139,
            "6520": 2.51311,
            "6525": 2.49504,
            "6530": 2.54469,
            "6535": 2.5335,
            "6540": 2.53179,
            "6545": 2.56322,
            "6550": 2.50205,
            "6555": 2.55879,
            "6560": 2.51272,
            "6565": 2.52344,
            "6570": 2.58812,
            "6575": 2.52412,
            "6580": 2.50017,
            "6585": 2.50818,
            "6590": 2.51163,
            "6595": 2.49887,
            "6600": 2.4898,
            "6605": 2.5406,
            "6610": 2.47882,
            "6615": 2.57,
            "6620": 2.53557,
            "6625": 2.51312,
            "6630": 2.51294,
            "6635": 2.47504,
            "6640": 2.54233,
            "6645": 2.59784,
            "6650": 2.5111,
            "6655": 2.50078,
            "6660": 2.57583,
            "6665": 2.52098,
            "6670": 2.57063,
            "6675": 2.46875,
            "6680": 2.54988,
            "6685": 2.53786,
            "6690": 2.51562,
            "6695": 2.4897,
            "6700": 2.52535,
            "6705": 2.5205,
            "6710": 2.49369,
            "6715": 2.51757,
            "6720": 2.5118,
            "6725": 2.52401,
            "6730": 2.52057,
            "6735": 2.48485,
            "6740": 2.5167,
            "6745": 2.49467,
            "6750": 2.55883,
            "6755": 2.47556,
            "6760": 2.54412,
            "6765": 2.48839,
            "6770": 2.51956,
            "6775": 2.51224,
            "6780": 2.53968,
            "6785": 2.47432,
            "6790": 2.54663,
            "6795": 2.50016,
            "6800": 2.5269,
            "6805": 2.51219,
            "6810": 2.50666,
            "6815": 2.52359,
            "6820": 2.48606,
            "6825": 2.50599,
            "6830": 2.54009,
            "6835": 2.50773,
            "6840": 2.50931,
            "6845": 2.5268,
            "6850": 2.47552,
            "6855": 2.51576,
            "6860": 2.50426,
            "6865": 2.48961,
            "6870": 2.55456,
            "6875": 2.47601,
            "6880": 2.5516,
            "6885": 2.47979,
            "6890": 2.54624,
            "6895": 2.50264,
            "6900": 2.48907,
            "6905": 2.49713,
            "6910": 2.51911,
            "6915": 2.5182,
            "6920": 2.53495,
            "6925": 2.54115,
            "6930": 2.49071,
            "6935": 2.5187,
            "6940": 2.49868,
            "6945": 2.46109,
            "6950": 2.48692,
            "6955": 2.52809,
            "6960": 2.52197,
            "6965": 2.49412,
            "6970": 2.47206,
            "6975": 2.52296,
            "6980": 2.45471,
            "6985": 2.5177,
            "6990": 2.53059,
            "6995": 2.46347,
            "7000": 2.48934,
            "7005": 2.46926,
            "7010": 2.47439,
            "7015": 2.52132,
            "7020": 2.46643,
            "7025": 2.45331,
            "7030": 2.4875,
            "7035": 2.47931,
            "7040": 2.50617,
            "7045": 2.5207,
            "7050": 2.52899,
            "7055": 2.44321,
            "7060": 2.47483,
            "7065": 2.48776,
            "7070": 2.49125,
            "7075": 2.49322,
            "7080": 2.53446,
            "7085": 2.48571,
            "7090": 2.47712,
            "7095": 2.50173,
            "7100": 2.51547,
            "7105": 2.4879,
            "7110": 2.48725,
            "7115": 2.50475,
            "7120": 2.47235,
            "7125": 2.46446,
            "7130": 2.48765,
            "7135": 2.51409,
            "7140": 2.50062,
            "7145": 2.49946,
            "7150": 2.51067,
            "7155": 2.50512,
            "7160": 2.47574,
            "7165": 2.4578,
            "7170": 2.50841,
            "7175": 2.50613,
            "7180": 2.50375,
            "7185": 2.48163,
            "7190": 2.46209,
            "7195": 2.47862,
            "7200": 2.51196,
            "7205": 2.49121,
            "7210": 2.44589,
            "7215": 2.48125,
            "7220": 2.44456,
            "7225": 2.5132,
            "7230": 2.50961,
            "7235": 2.48266,
            "7240": 2.47731,
            "7245": 2.50007,
            "7250": 2.50923,
            "7255": 2.49701,
            "7260": 2.46243,
            "7265": 2.45514,
            "7270": 2.47182,
            "7275": 2.50104,
            "7280": 2.49421,
            "7285": 2.42317,
            "7290": 2.48017,
            "7295": 2.48888,
            "7300": 2.41969,
            "7305": 2.44782,
            "7310": 2.45036,
            "7315": 2.49163,
            "7320": 2.48463,
            "7325": 2.45999,
            "7330": 2.49158,
            "7335": 2.47761,
            "7340": 2.4651,
            "7345": 2.49594,
            "7350": 2.50993,
            "7355": 2.49634,
            "7360": 2.48177,
            "7365": 2.47188,
            "7370": 2.47367,
            "7375": 2.45059,
            "7380": 2.49507,
            "7385": 2.48552,
            "7390": 2.47293,
            "7395": 2.47305,
            "7400": 2.48041,
            "7405": 2.43937,
            "7410": 2.48234,
            "7415": 2.47393,
            "7420": 2.49325,
            "7425": 2.45705,
            "7430": 2.5244,
            "7435": 2.49247,
            "7440": 2.52053,
            "7445": 2.50973,
            "7450": 2.47691,
            "7455": 2.45855,
            "7460": 2.46721,
            "7465": 2.47716,
            "7470": 2.45147,
            "7475": 2.45825,
            "7480": 2.51369,
            "7485": 2.44885,
            "7490": 2.47507,
            "7495": 2.48093,
            "7500": 2.49604,
            "7505": 2.44117,
            "7510": 2.43666,
            "7515": 2.42007,
            "7520": 2.49219,
            "7525": 2.49742,
            "7530": 2.47703,
            "7535": 2.46038,
            "7540": 2.4741,
            "7545": 2.47511,
            "7550": 2.49065,
            "7555": 2.45383,
            "7560": 2.43052,
            "7565": 2.50969,
            "7570": 2.48512,
            "7575": 2.44278,
            "7580": 2.45861,
            "7585": 2.48267,
            "7590": 2.48145,
            "7595": 2.46474,
            "7600": 2.46272,
            "7605": 2.44838,
            "7610": 2.45322,
            "7615": 2.42741,
            "7620": 2.5483,
            "7625": 2.48566,
            "7630": 2.42481,
            "7635": 2.42734,
            "7640": 2.45557,
            "7645": 2.47446,
            "7650": 2.46405,
            "7655": 2.48523,
            "7660": 2.45448,
            "7665": 2.43415,
            "7670": 2.44222,
            "7675": 2.45634,
            "7680": 2.48669,
            "7685": 2.43365,
            "7690": 2.48235,
            "7695": 2.45562,
            "7700": 2.48221,
            "7705": 2.50089,
            "7710": 2.49559,
            "7715": 2.44301,
            "7720": 2.47101,
            "7725": 2.48149,
            "7730": 2.45764,
            "7735": 2.47245,
            "7740": 2.43789,
            "7745": 2.45085,
            "7750": 2.43894,
            "7755": 2.46672,
            "7760": 2.45099,
            "7765": 2.45461,
            "7770": 2.46974,
            "7775": 2.45582,
            "7780": 2.41891,
            "7785": 2.44452,
            "7790": 2.48343,
            "7795": 2.44094,
            "7800": 2.46248,
            "7805": 2.48205,
            "7810": 2.50456,
            "7815": 2.48933,
            "7820": 2.44792,
            "7825": 2.51403,
            "7830": 2.45315,
            "7835": 2.46857,
            "7840": 2.47863,
            "7845": 2.45925,
            "7850": 2.41753,
            "7855": 2.47259,
            "7860": 2.49941,
            "7865": 2.42445,
            "7870": 2.46877,
            "7875": 2.44758,
            "7880": 2.45498,
            "7885": 2.46271,
            "7890": 2.4704,
            "7895": 2.44746,
            "7900": 2.43849,
            "7905": 2.43611,
            "7910": 2.42508,
            "7915": 2.48223,
            "7920": 2.47586,
            "7925": 2.42204,
            "7930": 2.47227,
            "7935": 2.45144,
            "7940": 2.42164,
            "7945": 2.46858,
            "7950": 2.44519,
            "7955": 2.41938,
            "7960": 2.48925,
            "7965": 2.51812,
            "7970": 2.52365,
            "7975": 2.45089,
            "7980": 2.44183,
            "7985": 2.46779,
            "7990": 2.43205,
            "7995": 2.47124,
            "8000": 2.43695,
            "8005": 2.41847,
            "8010": 2.45827,
            "8015": 2.47003,
            "8020": 2.48312,
            "8025": 2.4743,
            "8030": 2.45211,
            "8035": 2.47209,
            "8040": 2.42113,
            "8045": 2.47922,
            "8050": 2.45648,
            "8055": 2.42799,
            "8060": 2.44378,
            "8065": 2.46133,
            "8070": 2.45509,
            "8075": 2.45782,
            "8080": 2.44539,
            "8085": 2.44103,
            "8090": 2.42385,
            "8095": 2.42411,
            "8100": 2.4383,
            "8105": 2.49409,
            "8110": 2.43711,
            "8115": 2.44483,
            "8120": 2.46723,
            "8125": 2.46835,
            "8130": 2.45534,
            "8135": 2.453,
            "8140": 2.43964,
            "8145": 2.42681,
            "8150": 2.42282,
            "8155": 2.48725,
            "8160": 2.45564,
            "8165": 2.44353,
            "8170": 2.43601,
            "8175": 2.42347,
            "8180": 2.49602,
            "8185": 2.4266,
            "8190": 2.4682,
            "8195": 2.45714,
            "8200": 2.44755,
            "8205": 2.44698,
            "8210": 2.43158,
            "8215": 2.44097,
            "8220": 2.43711,
            "8225": 2.41111,
            "8230": 2.43991,
            "8235": 2.46523,
            "8240": 2.42792,
            "8245": 2.44877,
            "8250": 2.4449,
            "8255": 2.43746,
            "8260": 2.4329,
            "8265": 2.42841,
            "8270": 2.43338,
            "8275": 2.44139,
            "8280": 2.39763,
            "8285": 2.43908,
            "8290": 2.48053,
            "8295": 2.44765,
            "8300": 2.45824,
            "8305": 2.40708,
            "8310": 2.43736,
            "8315": 2.45487,
            "8320": 2.39821,
            "8325": 2.39302,
            "8330": 2.43427,
            "8335": 2.44307,
            "8340": 2.48835,
            "8345": 2.44982,
            "8350": 2.4483,
            "8355": 2.40703,
            "8360": 2.39899,
            "8365": 2.45388,
            "8370": 2.4522,
            "8375": 2.42571,
            "8380": 2.41956,
            "8385": 2.42303,
            "8390": 2.43482,
            "8395": 2.43963,
            "8400": 2.4396,
            "8405": 2.48867,
            "8410": 2.43761,
            "8415": 2.43376,
            "8420": 2.41526,
            "8425": 2.43843,
            "8430": 2.46211,
            "8435": 2.40284,
            "8440": 2.45067,
            "8445": 2.45871,
            "8450": 2.40746,
            "8455": 2.45971,
            "8460": 2.4544,
            "8465": 2.4362,
            "8470": 2.40884,
            "8475": 2.47657,
            "8480": 2.40164,
            "8485": 2.41272,
            "8490": 2.46525,
            "8495": 2.43663,
            "8500": 2.44487,
            "8505": 2.40411,
            "8510": 2.40402,
            "8515": 2.42904,
            "8520": 2.42561,
            "8525": 2.49154,
            "8530": 2.37412,
            "8535": 2.4007,
            "8540": 2.48529,
            "8545": 2.38045,
            "8550": 2.43845,
            "8555": 2.45138,
            "8560": 2.46969,
            "8565": 2.42022,
            "8570": 2.43069,
            "8575": 2.44782,
            "8580": 2.44057,
            "8585": 2.4206,
            "8590": 2.40434,
            "8595": 2.42633,
            "8600": 2.41208,
            "8605": 2.49153,
            "8610": 2.4194,
            "8615": 2.38712,
            "8620": 2.44807,
            "8625": 2.42543,
            "8630": 2.45639,
            "8635": 2.44968,
            "8640": 2.43446,
            "8645": 2.47511,
            "8650": 2.42064,
            "8655": 2.45306,
            "8660": 2.45562,
            "8665": 2.38502,
            "8670": 2.40915,
            "8675": 2.42743,
            "8680": 2.44652,
            "8685": 2.42977,
            "8690": 2.40998,
            "8695": 2.44244,
            "8700": 2.43397,
            "8705": 2.41831,
            "8710": 2.42796,
            "8715": 2.44873,
            "8720": 2.4762,
            "8725": 2.41193,
            "8730": 2.3907,
            "8735": 2.43509,
            "8740": 2.43059,
            "8745": 2.3966,
            "8750": 2.43639,
            "8755": 2.42428,
            "8760": 2.4016,
            "8765": 2.43375,
            "8770": 2.40409,
            "8775": 2.43681,
            "8780": 2.41926,
            "8785": 2.47235,
            "8790": 2.42096,
            "8795": 2.41752,
            "8800": 2.41464,
            "8805": 2.4061,
            "8810": 2.4098,
            "8815": 2.47301,
            "8820": 2.45432,
            "8825": 2.42408,
            "8830": 2.38686,
            "8835": 2.42272,
            "8840": 2.39428,
            "8845": 2.42523,
            "8850": 2.43381,
            "8855": 2.40267,
            "8860": 2.42873,
            "8865": 2.42868,
            "8870": 2.43634,
            "8875": 2.43759,
            "8880": 2.41231,
            "8885": 2.3937,
            "8890": 2.4451,
            "8895": 2.42855,
            "8900": 2.41221,
            "8905": 2.40437,
            "8910": 2.40297,
            "8915": 2.42005,
            "8920": 2.43194,
            "8925": 2.46564,
            "8930": 2.41557,
            "8935": 2.40817,
            "8940": 2.38863,
            "8945": 2.39302,
            "8950": 2.41651,
            "8955": 2.39575,
            "8960": 2.43244,
            "8965": 2.41533,
            "8970": 2.4034,
            "8975": 2.4766,
            "8980": 2.43987,
            "8985": 2.37327,
            "8990": 2.4078,
            "8995": 2.41701,
            "9000": 2.45566,
            "9005": 2.41239,
            "9010": 2.37445,
            "9015": 2.40781,
            "9020": 2.39772,
            "9025": 2.36899,
            "9030": 2.40082,
            "9035": 2.42402,
            "9040": 2.4202,
            "9045": 2.41956,
            "9050": 2.39708,
            "9055": 2.41909,
            "9060": 2.41955,
            "9065": 2.40543,
            "9070": 2.44567,
            "9075": 2.39519,
            "9080": 2.43479,
            "9085": 2.42815,
            "9090": 2.41277,
            "9095": 2.39677,
            "9100": 2.40146,
            "9105": 2.35866,
            "9110": 2.466,
            "9115": 2.41729,
            "9120": 2.40494,
            "9125": 2.45833,
            "9130": 2.3942,
            "9135": 2.44764,
            "9140": 2.43538,
            "9145": 2.42864,
            "9150": 2.42436,
            "9155": 2.37413,
            "9160": 2.41723,
            "9165": 2.4237,
            "9170": 2.37298,
            "9175": 2.41773,
            "9180": 2.37794,
            "9185": 2.43884,
            "9190": 2.41245,
            "9195": 2.40368,
            "9200": 2.39457,
            "9205": 2.45154,
            "9210": 2.36217,
            "9215": 2.46422,
            "9220": 2.44676,
            "9225": 2.3842,
            "9230": 2.44585,
            "9235": 2.39756,
            "9240": 2.40256,
            "9245": 2.43616,
            "9250": 2.44676,
            "9255": 2.43332,
            "9260": 2.38684,
            "9265": 2.4388,
            "9270": 2.43616,
            "9275": 2.39245,
            "9280": 2.38882,
            "9285": 2.42149,
            "9290": 2.40403,
            "9295": 2.38344,
            "9300": 2.42258,
            "9305": 2.40398,
            "9310": 2.41555,
            "9315": 2.40892,
            "9320": 2.44315,
            "9325": 2.36939,
            "9330": 2.40243,
            "9335": 2.36125,
            "9340": 2.40817,
            "9345": 2.41574,
            "9350": 2.44011,
            "9355": 2.47744,
            "9360": 2.43769,
            "9365": 2.38878,
            "9370": 2.43788,
            "9375": 2.4353,
            "9380": 2.35334,
            "9385": 2.40158,
            "9390": 2.38263,
            "9395": 2.38979,
            "9400": 2.44357,
            "9405": 2.415,
            "9410": 2.39793,
            "9415": 2.4369,
            "9420": 2.44583,
            "9425": 2.43136,
            "9430": 2.4473,
            "9435": 2.41443,
            "9440": 2.47941,
            "9445": 2.37575,
            "9450": 2.39465,
            "9455": 2.40401,
            "9460": 2.38595,
            "9465": 2.37979,
            "9470": 2.38285,
            "9475": 2.36564,
            "9480": 2.43384,
            "9485": 2.38592,
            "9490": 2.42196,
            "9495": 2.38314,
            "9500": 2.36654,
            "9505": 2.43133,
            "9510": 2.4008,
            "9515": 2.43151,
            "9520": 2.41924,
            "9525": 2.38934,
            "9530": 2.45494,
            "9535": 2.40096,
            "9540": 2.41854,
            "9545": 2.37751,
            "9550": 2.42252,
            "9555": 2.39166,
            "9560": 2.42327,
            "9565": 2.40878,
            "9570": 2.37091,
            "9575": 2.4107,
            "9580": 2.39618,
            "9585": 2.42387,
            "9590": 2.42987,
            "9595": 2.44822,
            "9600": 2.39109,
            "9605": 2.38373,
            "9610": 2.42148,
            "9615": 2.41483,
            "9620": 2.41408,
            "9625": 2.4471,
            "9630": 2.39887,
            "9635": 2.40319,
            "9640": 2.44771,
            "9645": 2.40968,
            "9650": 2.39895,
            "9655": 2.37493,
            "9660": 2.42625,
            "9665": 2.39151,
            "9670": 2.38394,
            "9675": 2.3568,
            "9680": 2.39733,
            "9685": 2.39587,
            "9690": 2.46251,
            "9695": 2.38057,
            "9700": 2.37624,
            "9705": 2.38424,
            "9710": 2.36525,
            "9715": 2.38907,
            "9720": 2.43577,
            "9725": 2.44198,
            "9730": 2.42952,
            "9735": 2.38556,
            "9740": 2.38107,
            "9745": 2.42781,
            "9750": 2.39866,
            "9755": 2.40833,
            "9760": 2.41151,
            "9765": 2.36766,
            "9770": 2.45373,
            "9775": 2.40095,
            "9780": 2.36241,
            "9785": 2.40262,
            "9790": 2.40674,
            "9795": 2.36207,
            "9800": 2.39716,
            "9805": 2.40678,
            "9810": 2.40944,
            "9815": 2.37834,
            "9820": 2.37627,
            "9825": 2.40529,
            "9830": 2.4226,
            "9835": 2.38429,
            "9840": 2.41358,
            "9845": 2.36216,
            "9850": 2.39986,
            "9855": 2.39508,
            "9860": 2.38986,
            "9865": 2.3818,
            "9870": 2.38527,
            "9875": 2.38223,
            "9880": 2.45096,
            "9885": 2.39236,
            "9890": 2.3539,
            "9895": 2.31957,
            "9900": 2.39578,
            "9905": 2.42362,
            "9910": 2.35552,
            "9915": 2.36371,
            "9920": 2.41083,
            "9925": 2.39762,
            "9930": 2.38206,
            "9935": 2.349,
            "9940": 2.3851,
            "9945": 2.37954,
            "9950": 2.4033,
            "9955": 2.4499,
            "9960": 2.43018,
            "9965": 2.35621,
            "9970": 2.4091,
            "9975": 2.38371,
            "9980": 2.33142,
            "9985": 2.405,
            "9990": 2.39633,
            "9995": 2.39584,
            "10000": 2.36742,
            "10005": 2.37207,
            "10010": 2.38237,
            "10015": 2.44528,
            "10020": 2.36262,
            "10025": 2.38754,
            "10030": 2.38683,
            "10035": 2.40902,
            "10040": 2.40526,
            "10045": 2.3827,
            "10050": 2.35168,
            "10055": 2.3679,
            "10060": 2.41766,
            "10065": 2.37238,
            "10070": 2.4226,
            "10075": 2.37237,
            "10080": 2.36326,
            "10085": 2.36945,
            "10090": 2.34646,
            "10095": 2.40112,
            "10100": 2.31438,
            "10105": 2.38271,
            "10110": 2.41097,
            "10115": 2.38644,
            "10120": 2.35762,
            "10125": 2.37234,
            "10130": 2.35976,
            "10135": 2.38285,
            "10140": 2.41246,
            "10145": 2.407,
            "10150": 2.37571,
            "10155": 2.39507,
            "10160": 2.36146,
            "10165": 2.38316,
            "10170": 2.42297,
            "10175": 2.32401,
            "10180": 2.39308,
            "10185": 2.38368,
            "10190": 2.44327,
            "10195": 2.40286,
            "10200": 2.39067,
            "10205": 2.38654,
            "10210": 2.36842,
            "10215": 2.34516,
            "10220": 2.41929,
            "10225": 2.43133,
            "10230": 2.3568,
            "10235": 2.38752,
            "10240": 2.3752,
            "10245": 2.39113,
            "10250": 2.38864,
            "10255": 2.41216,
            "10260": 2.33416,
            "10265": 2.34887,
            "10270": 2.35033,
            "10275": 2.37285,
            "10280": 2.44994,
            "10285": 2.35789,
            "10290": 2.38502,
            "10295": 2.37373,
            "10300": 2.36727,
            "10305": 2.41438,
            "10310": 2.38817,
            "10315": 2.36009,
            "10320": 2.36681,
            "10325": 2.36022,
            "10330": 2.41314,
            "10335": 2.36278,
            "10340": 2.41978,
            "10345": 2.36984,
            "10350": 2.35828,
            "10355": 2.39688,
            "10360": 2.37367,
            "10365": 2.36112,
            "10370": 2.34104,
            "10375": 2.35717,
            "10380": 2.41878,
            "10385": 2.4051,
            "10390": 2.38062,
            "10395": 2.35893,
            "10400": 2.3779,
            "10405": 2.35068,
            "10410": 2.34134,
            "10415": 2.41698,
            "10420": 2.37934,
            "10425": 2.32763,
            "10430": 2.35959,
            "10435": 2.37178,
            "10440": 2.37103,
            "10445": 2.36068,
            "10450": 2.36055,
            "10455": 2.37973,
            "10460": 2.3816,
            "10465": 2.30311,
            "10470": 2.35728,
            "10475": 2.38121,
            "10480": 2.36377,
            "10485": 2.36123,
            "10490": 2.41286,
            "10495": 2.36732,
            "10500": 2.36292,
            "10505": 2.36988,
            "10510": 2.38283,
            "10515": 2.37416,
            "10520": 2.40348,
            "10525": 2.39031,
            "10530": 2.39169,
            "10535": 2.3553,
            "10540": 2.40457,
            "10545": 2.35849,
            "10550": 2.37876,
            "10555": 2.35964,
            "10560": 2.34048,
            "10565": 2.37134,
            "10570": 2.37385,
            "10575": 2.35597,
            "10580": 2.37862,
            "10585": 2.36882,
            "10590": 2.38027,
            "10595": 2.37758,
            "10600": 2.33268,
            "10605": 2.37212,
            "10610": 2.36594,
            "10615": 2.36345,
            "10620": 2.34684,
            "10625": 2.41919,
            "10630": 2.37018,
            "10635": 2.32323,
            "10640": 2.3649,
            "10645": 2.42247,
            "10650": 2.36086,
            "10655": 2.30764,
            "10660": 2.348,
            "10665": 2.39867,
            "10670": 2.31573,
            "10675": 2.41654,
            "10680": 2.35743,
            "10685": 2.28909,
            "10690": 2.38576,
            "10695": 2.33077,
            "10700": 2.38759,
            "10705": 2.3863,
            "10710": 2.34409,
            "10715": 2.38283,
            "10720": 2.32748,
            "10725": 2.35498,
            "10730": 2.35039,
            "10735": 2.35471,
            "10740": 2.31827,
            "10745": 2.34059,
            "10750": 2.33574,
            "10755": 2.40551,
            "10760": 2.36631,
            "10765": 2.33806,
            "10770": 2.37451,
            "10775": 2.38596,
            "10780": 2.37059,
            "10785": 2.39212,
            "10790": 2.35117,
            "10795": 2.3904,
            "10800": 2.32374,
            "10805": 2.39726,
            "10810": 2.37614,
            "10815": 2.3553,
            "10820": 2.34478,
            "10825": 2.37237,
            "10830": 2.33964,
            "10835": 2.3485,
            "10840": 2.33094,
            "10845": 2.38895,
            "10850": 2.33302,
            "10855": 2.36518,
            "10860": 2.33262,
            "10865": 2.32199,
            "10870": 2.32482,
            "10875": 2.30493,
            "10880": 2.39438,
            "10885": 2.40446,
            "10890": 2.36207,
            "10895": 2.37301,
            "10900": 2.33379,
            "10905": 2.31397,
            "10910": 2.40899,
            "10915": 2.37243,
            "10920": 2.3758,
            "10925": 2.36623,
            "10930": 2.32019,
            "10935": 2.36161,
            "10940": 2.35709,
            "10945": 2.34878,
            "10950": 2.36962,
            "10955": 2.36398,
            "10960": 2.31397,
            "10965": 2.36562,
            "10970": 2.3583,
            "10975": 2.40985,
            "10980": 2.3781,
            "10985": 2.34498,
            "10990": 2.40001,
            "10995": 2.3672,
            "11000": 2.33893,
            "11005": 2.36245,
            "11010": 2.34836,
            "11015": 2.32906,
            "11020": 2.33621,
            "11025": 2.36933,
            "11030": 2.34186,
            "11035": 2.31576,
            "11040": 2.31933,
            "11045": 2.31919,
            "11050": 2.31897,
            "11055": 2.29051,
            "11060": 2.33978,
            "11065": 2.3119,
            "11070": 2.39842,
            "11075": 2.32101,
            "11080": 2.35606,
            "11085": 2.34028,
            "11090": 2.34747,
            "11095": 2.37211,
            "11100": 2.32957,
            "11105": 2.31737,
            "11110": 2.36384,
            "11115": 2.37329,
            "11120": 2.38274,
            "11125": 2.31626,
            "11130": 2.35197,
            "11135": 2.33464,
            "11140": 2.37551,
            "11145": 2.35157,
            "11150": 2.39785,
            "11155": 2.34206,
            "11160": 2.36772,
            "11165": 2.36464,
            "11170": 2.34238,
            "11175": 2.33468,
            "11180": 2.37671,
            "11185": 2.31336,
            "11190": 2.28079,
            "11195": 2.33083,
            "11200": 2.34705,
            "11205": 2.36331,
            "11210": 2.33356,
            "11215": 2.31979,
            "11220": 2.3448,
            "11225": 2.37516,
            "11230": 2.36812,
            "11235": 2.32151,
            "11240": 2.34463,
            "11245": 2.35847,
            "11250": 2.33422,
            "11255": 2.33794,
            "11260": 2.35873,
            "11265": 2.39072,
            "11270": 2.2892,
            "11275": 2.3165,
            "11280": 2.3704,
            "11285": 2.32406,
            "11290": 2.34783,
            "11295": 2.36604,
            "11300": 2.38247,
            "11305": 2.33566,
            "11310": 2.33149,
            "11315": 2.30059,
            "11320": 2.30617,
            "11325": 2.31599,
            "11330": 2.35495,
            "11335": 2.34008,
            "11340": 2.30955,
            "11345": 2.31469,
            "11350": 2.29656,
            "11355": 2.32534,
            "11360": 2.35152,
            "11365": 2.29442,
            "11370": 2.35416,
            "11375": 2.32906,
            "11380": 2.34243,
            "11385": 2.34892,
            "11390": 2.33603,
            "11395": 2.28851,
            "11400": 2.3102,
            "11405": 2.35523,
            "11410": 2.35698,
            "11415": 2.38675,
            "11420": 2.35342,
            "11425": 2.31015,
            "11430": 2.37109,
            "11435": 2.36371,
            "11440": 2.35039,
            "11445": 2.36451,
            "11450": 2.32322,
            "11455": 2.30546,
            "11460": 2.35207,
            "11465": 2.3442,
            "11470": 2.37537,
            "11475": 2.31381,
            "11480": 2.32586,
            "11485": 2.31116,
            "11490": 2.34703,
            "11495": 2.40854,
            "11500": 2.34205,
            "11505": 2.3525,
            "11510": 2.3652,
            "11515": 2.32359,
            "11520": 2.30633,
            "11525": 2.36165,
            "11530": 2.31593,
            "11535": 2.32316,
            "11540": 2.34794,
            "11545": 2.34435,
            "11550": 2.36612,
            "11555": 2.32736,
            "11560": 2.3511,
            "11565": 2.34131,
            "11570": 2.35152,
            "11575": 2.29761,
            "11580": 2.3296,
            "11585": 2.35345,
            "11590": 2.36481,
            "11595": 2.33643,
            "11600": 2.35888,
            "11605": 2.3235,
            "11610": 2.3627,
            "11615": 2.3609,
            "11620": 2.29636,
            "11625": 2.2769,
            "11630": 2.32824,
            "11635": 2.34398,
            "11640": 2.30587,
            "11645": 2.30877,
            "11650": 2.32859,
            "11655": 2.35253,
            "11660": 2.33697,
            "11665": 2.33118,
            "11670": 2.30107,
            "11675": 2.29905,
            "11680": 2.32579,
            "11685": 2.3365,
            "11690": 2.34545,
            "11695": 2.3185,
            "11700": 2.32594,
            "11705": 2.30312,
            "11710": 2.34508,
            "11715": 2.3159,
            "11720": 2.30013,
            "11725": 2.34135,
            "11730": 2.30673,
            "11735": 2.32959,
            "11740": 2.27327,
            "11745": 2.31924,
            "11750": 2.32847,
            "11755": 2.35343,
            "11760": 2.31323,
            "11765": 2.33591,
            "11770": 2.27588,
            "11775": 2.32611,
            "11780": 2.2558,
            "11785": 2.29841,
            "11790": 2.31362,
            "11795": 2.32061,
            "11800": 2.33531,
            "11805": 2.30389,
            "11810": 2.30648,
            "11815": 2.33051,
            "11820": 2.32042,
            "11825": 2.36169,
            "11830": 2.31796,
            "11835": 2.33908,
            "11840": 2.34234,
            "11845": 2.31871,
            "11850": 2.30518,
            "11855": 2.31532,
            "11860": 2.34365,
            "11865": 2.35918,
            "11870": 2.37963,
            "11875": 2.28094,
            "11880": 2.29379,
            "11885": 2.33757,
            "11890": 2.29331,
            "11895": 2.29026,
            "11900": 2.33451,
            "11905": 2.31957,
            "11910": 2.27792,
            "11915": 2.31251,
            "11920": 2.33527,
            "11925": 2.30383,
            "11930": 2.30797,
            "11935": 2.31763,
            "11940": 2.31918,
            "11945": 2.34259,
            "11950": 2.3003,
            "11955": 2.31499,
            "11960": 2.339,
            "11965": 2.29617,
            "11970": 2.28244,
            "11975": 2.33742,
            "11980": 2.307,
            "11985": 2.27878,
            "11990": 2.3052,
            "11995": 2.3307,
            "12000": 2.3261,
            "12005": 2.32673,
            "12010": 2.28887,
            "12015": 2.31036,
            "12020": 2.32835,
            "12025": 2.3354,
            "12030": 2.31192,
            "12035": 2.336,
            "12040": 2.31487,
            "12045": 2.31196,
            "12050": 2.30879,
            "12055": 2.33271,
            "12060": 2.29798,
            "12065": 2.32949,
            "12070": 2.30235,
            "12075": 2.27654,
            "12080": 2.3519,
            "12085": 2.33956,
            "12090": 2.33249,
            "12095": 2.28215,
            "12100": 2.31668,
            "12105": 2.31,
            "12110": 2.32992,
            "12115": 2.30644,
            "12120": 2.30659,
            "12125": 2.2963,
            "12130": 2.30302,
            "12135": 2.32825,
            "12140": 2.29602,
            "12145": 2.25652,
            "12150": 2.2603,
            "12155": 2.34303,
            "12160": 2.35823,
            "12165": 2.31986,
            "12170": 2.33446,
            "12175": 2.34322,
            "12180": 2.33054,
            "12185": 2.34225,
            "12190": 2.3349,
            "12195": 2.29838,
            "12200": 2.30196,
            "12205": 2.32374,
            "12210": 2.35625,
            "12215": 2.3039,
            "12220": 2.3005,
            "12225": 2.24425,
            "12230": 2.33456,
            "12235": 2.33882,
            "12240": 2.32397,
            "12245": 2.28614,
            "12250": 2.27273,
            "12255": 2.33512,
            "12260": 2.31251,
            "12265": 2.34116,
            "12270": 2.31202,
            "12275": 2.3128,
            "12280": 2.31892,
            "12285": 2.2861,
            "12290": 2.31089,
            "12295": 2.26519,
            "12300": 2.32882,
            "12305": 2.268,
            "12310": 2.28705,
            "12315": 2.3539,
            "12320": 2.29568,
            "12325": 2.32004,
            "12330": 2.2996,
            "12335": 2.31922,
            "12340": 2.33993,
            "12345": 2.36806,
            "12350": 2.34457,
            "12355": 2.30697,
            "12360": 2.31406,
            "12365": 2.33109,
            "12370": 2.29205,
            "12375": 2.30035,
            "12380": 2.29175,
            "12385": 2.29129,
            "12390": 2.25008,
            "12395": 2.30515,
            "12400": 2.29975,
            "12405": 2.31139,
            "12410": 2.30433,
            "12415": 2.28207,
            "12420": 2.31827,
            "12425": 2.30059,
            "12430": 2.31595,
            "12435": 2.30009,
            "12440": 2.33225,
            "12445": 2.32061,
            "12450": 2.30677,
            "12455": 2.24051,
            "12460": 2.33765,
            "12465": 2.36395,
            "12470": 2.27666,
            "12475": 2.27404,
            "12480": 2.2927,
            "12485": 2.30694,
            "12490": 2.33069,
            "12495": 2.26943,
            "12500": 2.32076,
            "12505": 2.3358,
            "12510": 2.35633,
            "12515": 2.26977,
            "12520": 2.31981,
            "12525": 2.28667,
            "12530": 2.3207,
            "12535": 2.27236,
            "12540": 2.28479,
            "12545": 2.29083,
            "12550": 2.31729,
            "12555": 2.32436,
            "12560": 2.30126,
            "12565": 2.33569,
            "12570": 2.28016,
            "12575": 2.30108,
            "12580": 2.31066,
            "12585": 2.29232,
            "12590": 2.33444,
            "12595": 2.3246,
            "12600": 2.28237,
            "12605": 2.31981,
            "12610": 2.36453,
            "12615": 2.30709,
            "12620": 2.33321,
            "12625": 2.33082,
            "12630": 2.29784,
            "12635": 2.33603,
            "12640": 2.29527,
            "12645": 2.28038,
            "12650": 2.32632,
            "12655": 2.26534,
            "12660": 2.34185,
            "12665": 2.3183,
            "12670": 2.31281,
            "12675": 2.3199,
            "12680": 2.275,
            "12685": 2.3665,
            "12690": 2.30372,
            "12695": 2.33211,
            "12700": 2.29318,
            "12705": 2.30709,
            "12710": 2.30722,
            "12715": 2.28764,
            "12720": "nan",
            "12725": "nan",
            "12730": "nan",
            "12735": "nan",
            "12740": "nan",
            "12745": "nan",
            "12750": "nan",
            "12755": "nan",
            "12760": "nan",
            "12765": "nan",
            "12770": "nan",
            "12775": "nan",
            "12780": "nan",
            "12785": "nan",
            "12790": "nan",
            "12795": "nan",
            "12800": "nan",
            "12805": "nan",
            "12810": "nan",
            "12815": "nan",
            "12820": "nan",
            "12825": "nan",
            "12830": "nan",
            "12835": "nan",
            "12840": "nan",
            "12845": "nan",
            "12850": "nan",
            "12855": "nan",
            "12860": "nan",
            "12865": "nan",
            "12870": "nan",
            "12875": "nan",
            "12880": "nan",
            "12885": "nan",
            "12890": "nan",
            "12895": "nan",
            "12900": "nan",
            "12905": "nan",
            "12910": "nan",
            "12915": "nan",
            "12920": "nan",
            "12925": "nan",
            "12930": "nan",
            "12935": "nan",
            "12940": "nan",
            "12945": "nan",
            "12950": "nan",
            "12955": "nan",
            "12960": "nan",
            "12965": "nan",
            "12970": "nan",
            "12975": "nan",
            "12980": "nan",
            "12985": "nan",
            "12990": "nan",
            "12995": "nan",
            "13000": "nan"
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 13000,
        "step_interval": 5,
        "values": {
            "1": 956237760.0,
            "5": 967339648.0,
            "10": 971387456.0,
            "15": 946482432.0,
            "20": 960006656.0,
            "25": 1076542208.0,
            "30": 1190547584.0,
            "35": 1270033664.0,
            "40": 1234848000.0,
            "45": 1146008832.0,
            "50": 1105336704.0,
            "55": 1058935168.0,
            "60": 1024844352.0,
            "65": 1009768384.0,
            "70": 987690240.0,
            "75": 988810304.0,
            "80": 1012225024.0,
            "85": 1010933120.0,
            "90": 989566016.0,
            "95": 961647488.0,
            "100": 975055936.0,
            "105": 981783040.0,
            "110": 979816448.0,
            "115": 977699648.0,
            "120": 961670144.0,
            "125": 943075072.0,
            "130": 975285440.0,
            "135": 964823040.0,
            "140": 963104128.0,
            "145": 976365952.0,
            "150": 921578432.0,
            "155": 968343744.0,
            "160": 956751936.0,
            "165": 959846848.0,
            "170": 974339456.0,
            "175": 949012608.0,
            "180": 946737088.0,
            "185": 972001408.0,
            "190": 969072000.0,
            "195": 985125248.0,
            "200": 945786816.0,
            "205": 958356736.0,
            "210": 979441280.0,
            "215": 967494208.0,
            "220": 956434816.0,
            "225": 962397632.0,
            "230": 948169600.0,
            "235": 965207168.0,
            "240": 966058176.0,
            "245": 969162752.0,
            "250": 974444672.0,
            "255": 925064192.0,
            "260": 965640832.0,
            "265": 970673600.0,
            "270": 959140224.0,
            "275": 954000256.0,
            "280": 963433792.0,
            "285": 945782912.0,
            "290": 974115392.0,
            "295": 966706496.0,
            "300": 967148032.0,
            "305": 964512000.0,
            "310": 940358208.0,
            "315": 967391168.0,
            "320": 969008512.0,
            "325": 980559872.0,
            "330": 972096384.0,
            "335": 946862720.0,
            "340": 966597184.0,
            "345": 973025408.0,
            "350": 973919488.0,
            "355": 963264512.0,
            "360": 948352576.0,
            "365": 964823104.0,
            "370": 962953728.0,
            "375": 958455040.0,
            "380": 947150976.0,
            "385": 955996928.0,
            "390": 945403264.0,
            "395": 970420608.0,
            "400": 979779712.0,
            "405": 968353664.0,
            "410": 970074112.0,
            "415": 953158144.0,
            "420": 943571904.0,
            "425": 954780736.0,
            "430": 962666816.0,
            "435": 977082496.0,
            "440": 954811328.0,
            "445": 971893696.0,
            "450": 963510208.0,
            "455": 973132288.0,
            "460": 983712448.0,
            "465": 945280448.0,
            "470": 942057216.0,
            "475": 967006592.0,
            "480": 966105088.0,
            "485": 976415616.0,
            "490": 962534848.0,
            "495": 945465216.0,
            "500": 964458880.0,
            "505": 986012672.0,
            "510": 965685888.0,
            "515": 943412224.0,
            "520": 945015360.0,
            "525": 971263040.0,
            "530": 971893056.0,
            "535": 979142848.0,
            "540": 969537664.0,
            "545": 954128640.0,
            "550": 951271424.0,
            "555": 987225408.0,
            "560": 960430848.0,
            "565": 966615360.0,
            "570": 975733120.0,
            "575": 927225472.0,
            "580": 970700032.0,
            "585": 961176896.0,
            "590": 972961408.0,
            "595": 963684608.0,
            "600": 937080000.0,
            "605": 951480064.0,
            "610": 963364224.0,
            "615": 970012416.0,
            "620": 976467264.0,
            "625": 949584192.0,
            "630": 954445952.0,
            "635": 986042496.0,
            "640": 980976832.0,
            "645": 955012608.0,
            "650": 958549760.0,
            "655": 951650048.0,
            "660": 961041984.0,
            "665": 967554368.0,
            "670": 962514432.0,
            "675": 968335808.0,
            "680": 965619904.0,
            "685": 962866496.0,
            "690": 961917952.0,
            "695": 954772480.0,
            "700": 970340928.0,
            "705": 945515968.0,
            "710": 943884416.0,
            "715": 973357632.0,
            "720": 968367232.0,
            "725": 978492544.0,
            "730": 952195904.0,
            "735": 948814720.0,
            "740": 955634432.0,
            "745": 975869376.0,
            "750": 981239232.0,
            "755": 962157824.0,
            "760": 951961152.0,
            "765": 967343296.0,
            "770": 976150784.0,
            "775": 970550080.0,
            "780": 977540480.0,
            "785": 931531136.0,
            "790": 960443264.0,
            "795": 964584768.0,
            "800": 967026688.0,
            "805": 962321216.0,
            "810": 940975872.0,
            "815": 949036416.0,
            "820": 953185792.0,
            "825": 954505664.0,
            "830": 976445888.0,
            "835": 956076032.0,
            "840": 948407360.0,
            "845": 965153408.0,
            "850": 966027904.0,
            "855": 960903808.0,
            "860": 976024384.0,
            "865": 938161280.0,
            "870": 966416896.0,
            "875": 972313728.0,
            "880": 963123712.0,
            "885": 967747712.0,
            "890": 949970752.0,
            "895": 960021504.0,
            "900": 974229184.0,
            "905": 963966528.0,
            "910": 958435776.0,
            "915": 956355328.0,
            "920": 943975104.0,
            "925": 960835136.0,
            "930": 978849408.0,
            "935": 971071936.0,
            "940": 960909504.0,
            "945": 945066880.0,
            "950": 957427712.0,
            "955": 979038848.0,
            "960": 983588864.0,
            "965": 966168320.0,
            "970": 951230336.0,
            "975": 961575680.0,
            "980": 968069120.0,
            "985": 968990784.0,
            "990": 984392960.0,
            "995": 953292288.0,
            "1000": 934782656.0,
            "1005": 960147584.0,
            "1010": 971538304.0,
            "1015": 985188224.0,
            "1020": 962782144.0,
            "1025": 935007616.0,
            "1030": 974681024.0,
            "1035": 964994304.0,
            "1040": 980466944.0,
            "1045": 960830592.0,
            "1050": 955201664.0,
            "1055": 957781376.0,
            "1060": 967747584.0,
            "1065": 967118080.0,
            "1070": 966603520.0,
            "1075": 950061440.0,
            "1080": 954509312.0,
            "1085": 967249408.0,
            "1090": 977134912.0,
            "1095": 961238080.0,
            "1100": 979613184.0,
            "1105": 953366336.0,
            "1110": 965955456.0,
            "1115": 966984256.0,
            "1120": 970352576.0,
            "1125": 965709056.0,
            "1130": 954945792.0,
            "1135": 965843264.0,
            "1140": 965177344.0,
            "1145": 970987328.0,
            "1150": 955554752.0,
            "1155": 930579584.0,
            "1160": 957776000.0,
            "1165": 978125120.0,
            "1170": 974299968.0,
            "1175": 973059200.0,
            "1180": 973082688.0,
            "1185": 947341888.0,
            "1190": 964790912.0,
            "1195": 953137024.0,
            "1200": 972845248.0,
            "1205": 988480128.0,
            "1210": 931125888.0,
            "1215": 968647168.0,
            "1220": 969161216.0,
            "1225": 975952320.0,
            "1230": 967334016.0,
            "1235": 943447296.0,
            "1240": 955856384.0,
            "1245": 981504448.0,
            "1250": 966112256.0,
            "1255": 973670208.0,
            "1260": 946496640.0,
            "1265": 963999616.0,
            "1270": 960487744.0,
            "1275": 973616640.0,
            "1280": 961113664.0,
            "1285": 957581760.0,
            "1290": 952528704.0,
            "1295": 971614912.0,
            "1300": 968864896.0,
            "1305": 963739712.0,
            "1310": 963335552.0,
            "1315": 943555456.0,
            "1320": 966308096.0,
            "1325": 989786880.0,
            "1330": 969508480.0,
            "1335": 972302080.0,
            "1340": 972270272.0,
            "1345": 960656896.0,
            "1350": 968636672.0,
            "1355": 955852544.0,
            "1360": 971822400.0,
            "1365": 960385920.0,
            "1370": 948788736.0,
            "1375": 973532544.0,
            "1380": 953470208.0,
            "1385": 969149184.0,
            "1390": 975719552.0,
            "1395": 931676800.0,
            "1400": 945855296.0,
            "1405": 976752896.0,
            "1410": 974510528.0,
            "1415": 967572608.0,
            "1420": 966746304.0,
            "1425": 937378048.0,
            "1430": 973914368.0,
            "1435": 978334336.0,
            "1440": 964180992.0,
            "1445": 958057600.0,
            "1450": 946145984.0,
            "1455": 983924736.0,
            "1460": 968651840.0,
            "1465": 948746880.0,
            "1470": 984245632.0,
            "1475": 943905664.0,
            "1480": 963976192.0,
            "1485": 957350016.0,
            "1490": 961261824.0,
            "1495": 980541184.0,
            "1500": 958332224.0,
            "1505": 942866816.0,
            "1510": 984179456.0,
            "1515": 959092992.0,
            "1520": 959106304.0,
            "1525": 952786752.0,
            "1530": 957745472.0,
            "1535": 949430784.0,
            "1540": 971086464.0,
            "1545": 963132736.0,
            "1550": 978667392.0,
            "1555": 952319680.0,
            "1560": 980089024.0,
            "1565": 967317120.0,
            "1570": 973840704.0,
            "1575": 975495680.0,
            "1580": 941863424.0,
            "1585": 970031104.0,
            "1590": 983821568.0,
            "1595": 948632576.0,
            "1600": 967443968.0,
            "1605": 952444544.0,
            "1610": 969617536.0,
            "1615": 983148800.0,
            "1620": 968018688.0,
            "1625": 970716288.0,
            "1630": 962888192.0,
            "1635": 942311616.0,
            "1640": 981611968.0,
            "1645": 973976960.0,
            "1650": 974184960.0,
            "1655": 967265728.0,
            "1660": 940687936.0,
            "1665": 961705216.0,
            "1670": 962902528.0,
            "1675": 971279360.0,
            "1680": 980877696.0,
            "1685": 944417856.0,
            "1690": 964687488.0,
            "1695": 965642752.0,
            "1700": 966344896.0,
            "1705": 985201280.0,
            "1710": 978355328.0,
            "1715": 943212288.0,
            "1720": 977087360.0,
            "1725": 965872832.0,
            "1730": 968971328.0,
            "1735": 965087168.0,
            "1740": 949714688.0,
            "1745": 970011392.0,
            "1750": 959684352.0,
            "1755": 960088896.0,
            "1760": 966381184.0,
            "1765": 951817856.0,
            "1770": 954665024.0,
            "1775": 973751808.0,
            "1780": 970535104.0,
            "1785": 968826944.0,
            "1790": 950233984.0,
            "1795": 945131712.0,
            "1800": 984665088.0,
            "1805": 987162880.0,
            "1810": 977768192.0,
            "1815": 948005760.0,
            "1820": 949209088.0,
            "1825": 978853760.0,
            "1830": 966363200.0,
            "1835": 964133760.0,
            "1840": 972319232.0,
            "1845": 935413504.0,
            "1850": 952499072.0,
            "1855": 980048576.0,
            "1860": 975868864.0,
            "1865": 958965376.0,
            "1870": 958948224.0,
            "1875": 932594304.0,
            "1880": 973574208.0,
            "1885": 978846784.0,
            "1890": 971359808.0,
            "1895": 959212224.0,
            "1900": 947392256.0,
            "1905": 981830016.0,
            "1910": 969123136.0,
            "1915": 970039744.0,
            "1920": 975599104.0,
            "1925": 960493312.0,
            "1930": 977924736.0,
            "1935": 963249792.0,
            "1940": 952459776.0,
            "1945": 981338048.0,
            "1950": 939170368.0,
            "1955": 960602880.0,
            "1960": 970031808.0,
            "1965": 981175168.0,
            "1970": 962046208.0,
            "1975": 952821760.0,
            "1980": 936848960.0,
            "1985": 975939456.0,
            "1990": 965967104.0,
            "1995": 962611712.0,
            "2000": 960555584.0,
            "2005": 954499392.0,
            "2010": 975580672.0,
            "2015": 991801600.0,
            "2020": 975435264.0,
            "2025": 974304768.0,
            "2030": 952085696.0,
            "2035": 967849344.0,
            "2040": 987458752.0,
            "2045": 976478912.0,
            "2050": 984702464.0,
            "2055": 942838592.0,
            "2060": 942596352.0,
            "2065": 966210752.0,
            "2070": 969622464.0,
            "2075": 980553600.0,
            "2080": 977598720.0,
            "2085": 939639168.0,
            "2090": 969874176.0,
            "2095": 961276288.0,
            "2100": 976718208.0,
            "2105": 972536576.0,
            "2110": 959902720.0,
            "2115": 956877568.0,
            "2120": 977482432.0,
            "2125": 962568064.0,
            "2130": 979619456.0,
            "2135": 950537152.0,
            "2140": 946995328.0,
            "2145": 962277376.0,
            "2150": 973403072.0,
            "2155": 972689664.0,
            "2160": 970312512.0,
            "2165": 948642560.0,
            "2170": 961545216.0,
            "2175": 969376384.0,
            "2180": 969329728.0,
            "2185": 947448064.0,
            "2190": 940481664.0,
            "2195": 986087808.0,
            "2200": 961862144.0,
            "2205": 978924864.0,
            "2210": 964102144.0,
            "2215": 963500416.0,
            "2220": 951312128.0,
            "2225": 969315520.0,
            "2230": 976330816.0,
            "2235": 974025728.0,
            "2240": 975493376.0,
            "2245": 960229056.0,
            "2250": 967640704.0,
            "2255": 969131584.0,
            "2260": 975063552.0,
            "2265": 968259840.0,
            "2270": 951744448.0,
            "2275": 962767488.0,
            "2280": 969640000.0,
            "2285": 971694016.0,
            "2290": 962890944.0,
            "2295": 931408448.0,
            "2300": 959905152.0,
            "2305": 970426368.0,
            "2310": 967444352.0,
            "2315": 970905152.0,
            "2320": 975589824.0,
            "2325": 938587264.0,
            "2330": 988437184.0,
            "2335": 977489856.0,
            "2340": 964595584.0,
            "2345": 964166528.0,
            "2350": 947556096.0,
            "2355": 977029056.0,
            "2360": 966899136.0,
            "2365": 977294400.0,
            "2370": 965070848.0,
            "2375": 953964736.0,
            "2380": 962918016.0,
            "2385": 967194880.0,
            "2390": 963073664.0,
            "2395": 974466432.0,
            "2400": 958409984.0,
            "2405": 968120704.0,
            "2410": 951585152.0,
            "2415": 965904832.0,
            "2420": 966516864.0,
            "2425": 959046336.0,
            "2430": 956685696.0,
            "2435": 961387264.0,
            "2440": 959755008.0,
            "2445": 970889216.0,
            "2450": 961997248.0,
            "2455": 922721536.0,
            "2460": 951951360.0,
            "2465": 955731008.0,
            "2470": 972571648.0,
            "2475": 973811776.0,
            "2480": 943895936.0,
            "2485": 944186624.0,
            "2490": 972411328.0,
            "2495": 974450496.0,
            "2500": 973908096.0,
            "2505": 958491648.0,
            "2510": 939508544.0,
            "2515": 979552832.0,
            "2520": 970472960.0,
            "2525": 964387776.0,
            "2530": 955798144.0,
            "2535": 936597568.0,
            "2540": 969027328.0,
            "2545": 970384320.0,
            "2550": 969462912.0,
            "2555": 969439936.0,
            "2560": 964979072.0,
            "2565": 959765312.0,
            "2570": 985176576.0,
            "2575": 957425792.0,
            "2580": 967424000.0,
            "2585": 966024064.0,
            "2590": 956356288.0,
            "2595": 981830144.0,
            "2600": 959531712.0,
            "2605": 963001600.0,
            "2610": 965973504.0,
            "2615": 951925696.0,
            "2620": 971242432.0,
            "2625": 976456064.0,
            "2630": 974410688.0,
            "2635": 948071424.0,
            "2640": 948138368.0,
            "2645": 963037376.0,
            "2650": 953982592.0,
            "2655": 977112768.0,
            "2660": 949623296.0,
            "2665": 953930048.0,
            "2670": 959063680.0,
            "2675": 979276032.0,
            "2680": 961397824.0,
            "2685": 970702272.0,
            "2690": 965222464.0,
            "2695": 943554880.0,
            "2700": 969425152.0,
            "2705": 978962240.0,
            "2710": 971811328.0,
            "2715": 990814528.0,
            "2720": 942649344.0,
            "2725": 967956032.0,
            "2730": 955465088.0,
            "2735": 970673152.0,
            "2740": 977920768.0,
            "2745": 932279232.0,
            "2750": 947856640.0,
            "2755": 956317824.0,
            "2760": 981699840.0,
            "2765": 966112448.0,
            "2770": 948914688.0,
            "2775": 935832064.0,
            "2780": 964779712.0,
            "2785": 969568704.0,
            "2790": 974273408.0,
            "2795": 966887168.0,
            "2800": 944389952.0,
            "2805": 964353920.0,
            "2810": 969609792.0,
            "2815": 975845952.0,
            "2820": 963085568.0,
            "2825": 937630144.0,
            "2830": 956741440.0,
            "2835": 986321792.0,
            "2840": 961759296.0,
            "2845": 967500032.0,
            "2850": 951712064.0,
            "2855": 962088704.0,
            "2860": 954242816.0,
            "2865": 955881728.0,
            "2870": 944663168.0,
            "2875": 974664512.0,
            "2880": 968201792.0,
            "2885": 981081344.0,
            "2890": 953455232.0,
            "2895": 957179904.0,
            "2900": 964991552.0,
            "2905": 931708288.0,
            "2910": 955730816.0,
            "2915": 979475584.0,
            "2920": 970492672.0,
            "2925": 964976576.0,
            "2930": 964047872.0,
            "2935": 940141568.0,
            "2940": 964910976.0,
            "2945": 989149696.0,
            "2950": 965208448.0,
            "2955": 965105920.0,
            "2960": 933161408.0,
            "2965": 968794496.0,
            "2970": 973034880.0,
            "2975": 958093184.0,
            "2980": 964497856.0,
            "2985": 937269888.0,
            "2990": 951254144.0,
            "2995": 978316736.0,
            "3000": 969276160.0,
            "3005": 974687680.0,
            "3010": 950236416.0,
            "3015": 943841920.0,
            "3020": 958436160.0,
            "3025": 975185280.0,
            "3030": 965017088.0,
            "3035": 963452544.0,
            "3040": 952132352.0,
            "3045": 989793344.0,
            "3050": 965544896.0,
            "3055": 982520768.0,
            "3060": 971226112.0,
            "3065": 943916032.0,
            "3070": 978408000.0,
            "3075": 975203840.0,
            "3080": 960991552.0,
            "3085": 962352192.0,
            "3090": 945951680.0,
            "3095": 938116416.0,
            "3100": 972927936.0,
            "3105": 962001024.0,
            "3110": 970668800.0,
            "3115": 963402048.0,
            "3120": 947128832.0,
            "3125": 972719488.0,
            "3130": 952985856.0,
            "3135": 966040384.0,
            "3140": 968500800.0,
            "3145": 937851968.0,
            "3150": 975022144.0,
            "3155": 976826368.0,
            "3160": 969641856.0,
            "3165": 982207040.0,
            "3170": 937973440.0,
            "3175": 953838080.0,
            "3180": 983821312.0,
            "3185": 965180288.0,
            "3190": 968481408.0,
            "3195": 950943936.0,
            "3200": 945101696.0,
            "3205": 959862912.0,
            "3210": 957499456.0,
            "3215": 958032768.0,
            "3220": 968142016.0,
            "3225": 935626752.0,
            "3230": 962588608.0,
            "3235": 975788416.0,
            "3240": 962634816.0,
            "3245": 981286080.0,
            "3250": 943272704.0,
            "3255": 954612224.0,
            "3260": 980375424.0,
            "3265": 963632192.0,
            "3270": 965176512.0,
            "3275": 959744000.0,
            "3280": 967058432.0,
            "3285": 982489984.0,
            "3290": 947702528.0,
            "3295": 966434112.0,
            "3300": 959179136.0,
            "3305": 949142784.0,
            "3310": 979523008.0,
            "3315": 964295552.0,
            "3320": 969206848.0,
            "3325": 956206080.0,
            "3330": 941178880.0,
            "3335": 964986816.0,
            "3340": 956914432.0,
            "3345": 972501568.0,
            "3350": 964584448.0,
            "3355": 943359616.0,
            "3360": 970048192.0,
            "3365": 969467008.0,
            "3370": 954775680.0,
            "3375": 958689792.0,
            "3380": 971476160.0,
            "3385": 947985792.0,
            "3390": 965793152.0,
            "3395": 978403584.0,
            "3400": 978139840.0,
            "3405": 976735808.0,
            "3410": 924207040.0,
            "3415": 955437376.0,
            "3420": 971832192.0,
            "3425": 977167040.0,
            "3430": 973838592.0,
            "3435": 936083776.0,
            "3440": 970515200.0,
            "3445": 957315776.0,
            "3450": 959852224.0,
            "3455": 963865664.0,
            "3460": 967881856.0,
            "3465": 931330944.0,
            "3470": 952347136.0,
            "3475": 973717632.0,
            "3480": 959751680.0,
            "3485": 979947840.0,
            "3490": 944683264.0,
            "3495": 953917632.0,
            "3500": 969331264.0,
            "3505": 964360704.0,
            "3510": 971234112.0,
            "3515": 955945984.0,
            "3520": 958734336.0,
            "3525": 971927296.0,
            "3530": 964137664.0,
            "3535": 983204352.0,
            "3540": 937502016.0,
            "3545": 944743040.0,
            "3550": 984474624.0,
            "3555": 978070528.0,
            "3560": 974387072.0,
            "3565": 968812544.0,
            "3570": 946707328.0,
            "3575": 976117248.0,
            "3580": 977505536.0,
            "3585": 954581376.0,
            "3590": 956437952.0,
            "3595": 951456640.0,
            "3600": 989010176.0,
            "3605": 962020480.0,
            "3610": 965066240.0,
            "3615": 974647360.0,
            "3620": 954887488.0,
            "3625": 939528768.0,
            "3630": 990161664.0,
            "3635": 971447744.0,
            "3640": 976038464.0,
            "3645": 961501440.0,
            "3650": 945815680.0,
            "3655": 965794816.0,
            "3660": 976207360.0,
            "3665": 964019648.0,
            "3670": 977444032.0,
            "3675": 943500672.0,
            "3680": 958209408.0,
            "3685": 964297920.0,
            "3690": 982118912.0,
            "3695": 963137856.0,
            "3700": 950582528.0,
            "3705": 947345088.0,
            "3710": 982367872.0,
            "3715": 972706624.0,
            "3720": 976137664.0,
            "3725": 964038080.0,
            "3730": 948861504.0,
            "3735": 967078400.0,
            "3740": 960973440.0,
            "3745": 969320832.0,
            "3750": 963948800.0,
            "3755": 953432640.0,
            "3760": 976653952.0,
            "3765": 979841920.0,
            "3770": 972371840.0,
            "3775": 972384576.0,
            "3780": 952586048.0,
            "3785": 960237888.0,
            "3790": 985598208.0,
            "3795": 969183872.0,
            "3800": 957876992.0,
            "3805": 972407552.0,
            "3810": 954517952.0,
            "3815": 974567360.0,
            "3820": 963019264.0,
            "3825": 962071104.0,
            "3830": 969396224.0,
            "3835": 934733056.0,
            "3840": 971256576.0,
            "3845": 986850048.0,
            "3850": 968848192.0,
            "3855": 965247744.0,
            "3860": 948069952.0,
            "3865": 975049344.0,
            "3870": 985111424.0,
            "3875": 983042496.0,
            "3880": 963615296.0,
            "3885": 953023360.0,
            "3890": 960284608.0,
            "3895": 960602624.0,
            "3900": 984925312.0,
            "3905": 976210432.0,
            "3910": 987334656.0,
            "3915": 946017792.0,
            "3920": 974866176.0,
            "3925": 961235584.0,
            "3930": 976763392.0,
            "3935": 978900480.0,
            "3940": 950267456.0,
            "3945": 960260992.0,
            "3950": 974185344.0,
            "3955": 972955136.0,
            "3960": 974053120.0,
            "3965": 950861824.0,
            "3970": 980668096.0,
            "3975": 960724992.0,
            "3980": 977530496.0,
            "3985": 962936320.0,
            "3990": 972730048.0,
            "3995": 953687936.0,
            "4000": 974987904.0,
            "4005": 971633792.0,
            "4010": 978383488.0,
            "4015": 971478976.0,
            "4020": 950323136.0,
            "4025": 968418944.0,
            "4030": 997923008.0,
            "4035": 978535744.0,
            "4040": 959811840.0,
            "4045": 939638656.0,
            "4050": 944717824.0,
            "4055": 980959616.0,
            "4060": 977683456.0,
            "4065": 975729984.0,
            "4070": 942139968.0,
            "4075": 945740416.0,
            "4080": 988715584.0,
            "4085": 962066688.0,
            "4090": 983344448.0,
            "4095": 986946816.0,
            "4100": 957186624.0,
            "4105": 954085888.0,
            "4110": 966462976.0,
            "4115": 976029056.0,
            "4120": 983518144.0,
            "4125": 960016640.0,
            "4130": 967253312.0,
            "4135": 971431232.0,
            "4140": 963157504.0,
            "4145": 956188416.0,
            "4150": 960281856.0,
            "4155": 946191936.0,
            "4160": 968439616.0,
            "4165": 970303104.0,
            "4170": 971971200.0,
            "4175": 955861120.0,
            "4180": 940972800.0,
            "4185": 968293568.0,
            "4190": 967979392.0,
            "4195": 989198144.0,
            "4200": 962629056.0,
            "4205": 960582656.0,
            "4210": 971851008.0,
            "4215": 974120256.0,
            "4220": 981144192.0,
            "4225": 975207616.0,
            "4230": 952653184.0,
            "4235": 958457472.0,
            "4240": 966762368.0,
            "4245": 961708096.0,
            "4250": 965808640.0,
            "4255": 958217536.0,
            "4260": 949521216.0,
            "4265": 964125888.0,
            "4270": 978346752.0,
            "4275": 975384960.0,
            "4280": 962647936.0,
            "4285": 951425856.0,
            "4290": 980017024.0,
            "4295": 968791680.0,
            "4300": 958273792.0,
            "4305": 966766720.0,
            "4310": 939564096.0,
            "4315": 949379008.0,
            "4320": 984485696.0,
            "4325": 982522048.0,
            "4330": 974697600.0,
            "4335": 949396736.0,
            "4340": 959441600.0,
            "4345": 956595456.0,
            "4350": 979898368.0,
            "4355": 968820928.0,
            "4360": 966300672.0,
            "4365": 941195776.0,
            "4370": 969445376.0,
            "4375": 972957440.0,
            "4380": 966111296.0,
            "4385": 971986624.0,
            "4390": 954114560.0,
            "4395": 951807488.0,
            "4400": 973592000.0,
            "4405": 972416640.0,
            "4410": 967883456.0,
            "4415": 958912768.0,
            "4420": 960729856.0,
            "4425": 976366400.0,
            "4430": 965893824.0,
            "4435": 975864320.0,
            "4440": 962282176.0,
            "4445": 954800192.0,
            "4450": 978281664.0,
            "4455": 960169216.0,
            "4460": 968545280.0,
            "4465": 968756224.0,
            "4470": 944114816.0,
            "4475": 951960640.0,
            "4480": 978776320.0,
            "4485": 968266624.0,
            "4490": 956987776.0,
            "4495": 938865152.0,
            "4500": 953203968.0,
            "4505": 977130368.0,
            "4510": 978652160.0,
            "4515": 962467840.0,
            "4520": 958787584.0,
            "4525": 958077952.0,
            "4530": 964403968.0,
            "4535": 976543488.0,
            "4540": 976798272.0,
            "4545": 970083840.0,
            "4550": 953137344.0,
            "4555": 959504640.0,
            "4560": 972460992.0,
            "4565": 973417600.0,
            "4570": 978848640.0,
            "4575": 957772992.0,
            "4580": 963142528.0,
            "4585": 957345024.0,
            "4590": 986468992.0,
            "4595": 960139520.0,
            "4600": 952193152.0,
            "4605": 959225216.0,
            "4610": 963698432.0,
            "4615": 957937536.0,
            "4620": 960168960.0,
            "4625": 973774208.0,
            "4630": 944470464.0,
            "4635": 976988288.0,
            "4640": 960261824.0,
            "4645": 981943488.0,
            "4650": 962325632.0,
            "4655": 939396864.0,
            "4660": 963974784.0,
            "4665": 962479040.0,
            "4670": 976585664.0,
            "4675": 963416384.0,
            "4680": 957403008.0,
            "4685": 949551936.0,
            "4690": 956901952.0,
            "4695": 969876992.0,
            "4700": 961140800.0,
            "4705": 970707136.0,
            "4710": 934407872.0,
            "4715": 970388608.0,
            "4720": 966252096.0,
            "4725": 980180160.0,
            "4730": 965752192.0,
            "4735": 937796864.0,
            "4740": 960051136.0,
            "4745": 975947392.0,
            "4750": 967910656.0,
            "4755": 984873536.0,
            "4760": 959099008.0,
            "4765": 955138112.0,
            "4770": 958547200.0,
            "4775": 990993920.0,
            "4780": 976744512.0,
            "4785": 967471104.0,
            "4790": 943658112.0,
            "4795": 955775680.0,
            "4800": 967660672.0,
            "4805": 976444544.0,
            "4810": 965075072.0,
            "4815": 957944832.0,
            "4820": 973918976.0,
            "4825": 961444160.0,
            "4830": 962527616.0,
            "4835": 972470272.0,
            "4840": 948840256.0,
            "4845": 965641344.0,
            "4850": 960193984.0,
            "4855": 964069248.0,
            "4860": 962964928.0,
            "4865": 967457920.0,
            "4870": 957132800.0,
            "4875": 983500672.0,
            "4880": 956973824.0,
            "4885": 976940608.0,
            "4890": 959727744.0,
            "4895": 942127296.0,
            "4900": 973552320.0,
            "4905": 975127424.0,
            "4910": 969170944.0,
            "4915": 970053056.0,
            "4920": 941108480.0,
            "4925": 954698816.0,
            "4930": 976972928.0,
            "4935": 963676928.0,
            "4940": 972549504.0,
            "4945": 959994368.0,
            "4950": 940720256.0,
            "4955": 968012160.0,
            "4960": 976789184.0,
            "4965": 960947328.0,
            "4970": 958579328.0,
            "4975": 933677440.0,
            "4980": 960725888.0,
            "4985": 962937792.0,
            "4990": 963466944.0,
            "4995": 986209664.0,
            "5000": 940640704.0,
            "5005": 968874752.0,
            "5010": 970197824.0,
            "5015": 965159680.0,
            "5020": 966679232.0,
            "5025": 949349824.0,
            "5030": 953478208.0,
            "5035": 967279104.0,
            "5040": 955578368.0,
            "5045": 969055232.0,
            "5050": 953342976.0,
            "5055": 954799232.0,
            "5060": 962992704.0,
            "5065": 952133248.0,
            "5070": 973547712.0,
            "5075": 978507392.0,
            "5080": 942804736.0,
            "5085": 965765184.0,
            "5090": 972832256.0,
            "5095": 964348416.0,
            "5100": 958305664.0,
            "5105": 965256064.0,
            "5110": 950316352.0,
            "5115": 972296256.0,
            "5120": 960314560.0,
            "5125": 969694272.0,
            "5130": 938721536.0,
            "5135": 943668992.0,
            "5140": 969825408.0,
            "5145": 968528896.0,
            "5150": 970513024.0,
            "5155": 972533568.0,
            "5160": 926488576.0,
            "5165": 961518208.0,
            "5170": 966811776.0,
            "5175": 966011776.0,
            "5180": 963618304.0,
            "5185": 930814464.0,
            "5190": 949718016.0,
            "5195": 972335808.0,
            "5200": 973709440.0,
            "5205": 968151680.0,
            "5210": 960453248.0,
            "5215": 928729984.0,
            "5220": 979075840.0,
            "5225": 984717760.0,
            "5230": 974973440.0,
            "5235": 975006144.0,
            "5240": 944288384.0,
            "5245": 970786240.0,
            "5250": 972391040.0,
            "5255": 966871168.0,
            "5260": 976692544.0,
            "5265": 942199104.0,
            "5270": 969176896.0,
            "5275": 970048640.0,
            "5280": 962752640.0,
            "5285": 964022784.0,
            "5290": 932477120.0,
            "5295": 951640192.0,
            "5300": 975575680.0,
            "5305": 951864064.0,
            "5310": 967966144.0,
            "5315": 955743360.0,
            "5320": 950921664.0,
            "5325": 972942144.0,
            "5330": 967696128.0,
            "5335": 967535616.0,
            "5340": 966417280.0,
            "5345": 962874624.0,
            "5350": 978862976.0,
            "5355": 972203328.0,
            "5360": 963819072.0,
            "5365": 965210368.0,
            "5370": 947812992.0,
            "5375": 948759872.0,
            "5380": 967162432.0,
            "5385": 980467008.0,
            "5390": 965227392.0,
            "5395": 955172224.0,
            "5400": 948212288.0,
            "5405": 974243520.0,
            "5410": 967795200.0,
            "5415": 975977728.0,
            "5420": 967321728.0,
            "5425": 937376192.0,
            "5430": 963821760.0,
            "5435": 971825536.0,
            "5440": 969029120.0,
            "5445": 957378176.0,
            "5450": 919443456.0,
            "5455": 952003968.0,
            "5460": 962289664.0,
            "5465": 978917504.0,
            "5470": 980871040.0,
            "5475": 941549312.0,
            "5480": 955693632.0,
            "5485": 964840000.0,
            "5490": 975924608.0,
            "5495": 962731904.0,
            "5500": 971132416.0,
            "5505": 957026112.0,
            "5510": 968525056.0,
            "5515": 945503808.0,
            "5520": 963158144.0,
            "5525": 975845632.0,
            "5530": 936597824.0,
            "5535": 970633088.0,
            "5540": 960331776.0,
            "5545": 971949952.0,
            "5550": 967847296.0,
            "5555": 955832704.0,
            "5560": 954630592.0,
            "5565": 968800768.0,
            "5570": 945161664.0,
            "5575": 960575744.0,
            "5580": 960610816.0,
            "5585": 959494656.0,
            "5590": 977618816.0,
            "5595": 975182400.0,
            "5600": 963009472.0,
            "5605": 964090752.0,
            "5610": 943130880.0,
            "5615": 966527680.0,
            "5620": 963273472.0,
            "5625": 982324096.0,
            "5630": 975962688.0,
            "5635": 957432256.0,
            "5640": 951431040.0,
            "5645": 967756224.0,
            "5650": 979095360.0,
            "5655": 983558976.0,
            "5660": 956530112.0,
            "5665": 953585984.0,
            "5670": 966085888.0,
            "5675": 967701504.0,
            "5680": 978635136.0,
            "5685": 961971200.0,
            "5690": 935896576.0,
            "5695": 963738688.0,
            "5700": 952586560.0,
            "5705": 974499776.0,
            "5710": 971280128.0,
            "5715": 946048000.0,
            "5720": 975003520.0,
            "5725": 967399872.0,
            "5730": 978524800.0,
            "5735": 964881152.0,
            "5740": 943709760.0,
            "5745": 971179264.0,
            "5750": 982024640.0,
            "5755": 956767360.0,
            "5760": 963708224.0,
            "5765": 957645376.0,
            "5770": 955673664.0,
            "5775": 970860288.0,
            "5780": 962959104.0,
            "5785": 970821760.0,
            "5790": 974947840.0,
            "5795": 949912192.0,
            "5800": 965951296.0,
            "5805": 968749504.0,
            "5810": 976207616.0,
            "5815": 970065984.0,
            "5820": 936474496.0,
            "5825": 969549184.0,
            "5830": 977626880.0,
            "5835": 975078784.0,
            "5840": 963147584.0,
            "5845": 968825088.0,
            "5850": 943106496.0,
            "5855": 976036352.0,
            "5860": 979622976.0,
            "5865": 978382080.0,
            "5870": 968998016.0,
            "5875": 942376832.0,
            "5880": 964595200.0,
            "5885": 974769664.0,
            "5890": 972745728.0,
            "5895": 965767680.0,
            "5900": 941346816.0,
            "5905": 961840832.0,
            "5910": 958716672.0,
            "5915": 968075840.0,
            "5920": 977555200.0,
            "5925": 959702336.0,
            "5930": 946848384.0,
            "5935": 952396160.0,
            "5940": 977736000.0,
            "5945": 984818176.0,
            "5950": 980776512.0,
            "5955": 935216768.0,
            "5960": 961736256.0,
            "5965": 966035648.0,
            "5970": 970636544.0,
            "5975": 962117248.0,
            "5980": 958208576.0,
            "5985": 964636928.0,
            "5990": 973554176.0,
            "5995": 956128704.0,
            "6000": 955744128.0,
            "6005": 961380352.0,
            "6010": 953014016.0,
            "6015": 974649856.0,
            "6020": 978278656.0,
            "6025": 972129536.0,
            "6030": 955373376.0,
            "6035": 946856064.0,
            "6040": 962581056.0,
            "6045": 983964992.0,
            "6050": 956601664.0,
            "6055": 963618048.0,
            "6060": 945903232.0,
            "6065": 958737792.0,
            "6070": 978427904.0,
            "6075": 978185408.0,
            "6080": 957702656.0,
            "6085": 947826368.0,
            "6090": 953768768.0,
            "6095": 964843776.0,
            "6100": 979825280.0,
            "6105": 971208064.0,
            "6110": 961876992.0,
            "6115": 943727424.0,
            "6120": 968622720.0,
            "6125": 960862528.0,
            "6130": 984113856.0,
            "6135": 961117184.0,
            "6140": 958732672.0,
            "6145": 971396352.0,
            "6150": 968554112.0,
            "6155": 975040384.0,
            "6160": 977190848.0,
            "6165": 953025344.0,
            "6170": 951252608.0,
            "6175": 963543552.0,
            "6180": 969390144.0,
            "6185": 966295168.0,
            "6190": 963964672.0,
            "6195": 947167616.0,
            "6200": 969488064.0,
            "6205": 967247424.0,
            "6210": 959217344.0,
            "6215": 973036224.0,
            "6220": 936472832.0,
            "6225": 978690944.0,
            "6230": 976060672.0,
            "6235": 971581824.0,
            "6240": 965871552.0,
            "6245": 956004864.0,
            "6250": 956719616.0,
            "6255": 973240704.0,
            "6260": 978976192.0,
            "6265": 974930880.0,
            "6270": 958670912.0,
            "6275": 963688832.0,
            "6280": 973129344.0,
            "6285": 966123392.0,
            "6290": 970966464.0,
            "6295": 987367680.0,
            "6300": 947468160.0,
            "6305": 964820928.0,
            "6310": 979103360.0,
            "6315": 978343680.0,
            "6320": 971773376.0,
            "6325": 922855744.0,
            "6330": 959107840.0,
            "6335": 974816768.0,
            "6340": 984714752.0,
            "6345": 967007936.0,
            "6350": 944575168.0,
            "6355": 957977920.0,
            "6360": 972760000.0,
            "6365": 972380416.0,
            "6370": 959361152.0,
            "6375": 967129344.0,
            "6380": 951326848.0,
            "6385": 973561856.0,
            "6390": 965380288.0,
            "6395": 975032448.0,
            "6400": 984241024.0,
            "6405": 944249728.0,
            "6410": 977450112.0,
            "6415": 971759936.0,
            "6420": 956665088.0,
            "6425": 961024896.0,
            "6430": 957475648.0,
            "6435": 960222720.0,
            "6440": 969046080.0,
            "6445": 973418112.0,
            "6450": 974342784.0,
            "6455": 962497664.0,
            "6460": 941166912.0,
            "6465": 974334976.0,
            "6470": 979985920.0,
            "6475": 960811968.0,
            "6480": 967643392.0,
            "6485": 948782528.0,
            "6490": 971032000.0,
            "6495": 988664448.0,
            "6500": 980469504.0,
            "6505": 972048256.0,
            "6510": 951587776.0,
            "6515": 957811520.0,
            "6520": 978840640.0,
            "6525": 979014912.0,
            "6530": 973383168.0,
            "6535": 967829248.0,
            "6540": 950430336.0,
            "6545": 966515392.0,
            "6550": 979372032.0,
            "6555": 967131776.0,
            "6560": 975416256.0,
            "6565": 949474816.0,
            "6570": 952104768.0,
            "6575": 962492352.0,
            "6580": 975657472.0,
            "6585": 979594176.0,
            "6590": 949188672.0,
            "6595": 961521152.0,
            "6600": 961195520.0,
            "6605": 961424320.0,
            "6610": 985028736.0,
            "6615": 959654912.0,
            "6620": 944341824.0,
            "6625": 970881536.0,
            "6630": 971369024.0,
            "6635": 963908032.0,
            "6640": 959870080.0,
            "6645": 950833152.0,
            "6650": 978909248.0,
            "6655": 965790528.0,
            "6660": 968642432.0,
            "6665": 968965312.0,
            "6670": 932863360.0,
            "6675": 970957696.0,
            "6680": 968819392.0,
            "6685": 958796800.0,
            "6690": 956030912.0,
            "6695": 955610944.0,
            "6700": 962074688.0,
            "6705": 979070272.0,
            "6710": 971393472.0,
            "6715": 966740672.0,
            "6720": 974002944.0,
            "6725": 941977984.0,
            "6730": 979547776.0,
            "6735": 994587008.0,
            "6740": 976441984.0,
            "6745": 974481344.0,
            "6750": 939296384.0,
            "6755": 977465216.0,
            "6760": 969855936.0,
            "6765": 978564992.0,
            "6770": 975255488.0,
            "6775": 943450112.0,
            "6780": 947295872.0,
            "6785": 975604800.0,
            "6790": 960471296.0,
            "6795": 976304832.0,
            "6800": 973159872.0,
            "6805": 946486464.0,
            "6810": 958437888.0,
            "6815": 970647104.0,
            "6820": 978037248.0,
            "6825": 969144448.0,
            "6830": 950595392.0,
            "6835": 981587392.0,
            "6840": 983195008.0,
            "6845": 948960960.0,
            "6850": 965662592.0,
            "6855": 953732736.0,
            "6860": 979050304.0,
            "6865": 983914432.0,
            "6870": 965064128.0,
            "6875": 978995328.0,
            "6880": 950605696.0,
            "6885": 958238016.0,
            "6890": 960190016.0,
            "6895": 965887232.0,
            "6900": 985198720.0,
            "6905": 968454016.0,
            "6910": 949851520.0,
            "6915": 970890688.0,
            "6920": 967265216.0,
            "6925": 965108480.0,
            "6930": 964729792.0,
            "6935": 952209152.0,
            "6940": 963044736.0,
            "6945": 986545280.0,
            "6950": 973486592.0,
            "6955": 964912768.0,
            "6960": 940139392.0,
            "6965": 974504448.0,
            "6970": 978284928.0,
            "6975": 985103936.0,
            "6980": 982446912.0,
            "6985": 959605952.0,
            "6990": 945558016.0,
            "6995": 987421568.0,
            "7000": 963206400.0,
            "7005": 962628800.0,
            "7010": 985217408.0,
            "7015": 945595392.0,
            "7020": 983020288.0,
            "7025": 968805184.0,
            "7030": 953673536.0,
            "7035": 982738304.0,
            "7040": 950484416.0,
            "7045": 955734592.0,
            "7050": 959912128.0,
            "7055": 963461824.0,
            "7060": 976974208.0,
            "7065": 967937344.0,
            "7070": 953363520.0,
            "7075": 956584256.0,
            "7080": 969236032.0,
            "7085": 966041472.0,
            "7090": 969411904.0,
            "7095": 959820608.0,
            "7100": 973737984.0,
            "7105": 973259264.0,
            "7110": 970316864.0,
            "7115": 958425088.0,
            "7120": 948966656.0,
            "7125": 963190016.0,
            "7130": 971607488.0,
            "7135": 964616128.0,
            "7140": 961313152.0,
            "7145": 930553024.0,
            "7150": 946378880.0,
            "7155": 990761152.0,
            "7160": 968173568.0,
            "7165": 956889344.0,
            "7170": 968251776.0,
            "7175": 955767296.0,
            "7180": 958036608.0,
            "7185": 984732992.0,
            "7190": 978833856.0,
            "7195": 973712128.0,
            "7200": 935857728.0,
            "7205": 957365120.0,
            "7210": 967041152.0,
            "7215": 969412864.0,
            "7220": 982109888.0,
            "7225": 928727552.0,
            "7230": 949316352.0,
            "7235": 967054720.0,
            "7240": 966895360.0,
            "7245": 967651904.0,
            "7250": 949626496.0,
            "7255": 957388288.0,
            "7260": 970176448.0,
            "7265": 975035776.0,
            "7270": 959863168.0,
            "7275": 959348544.0,
            "7280": 957000192.0,
            "7285": 977982528.0,
            "7290": 977201792.0,
            "7295": 963098624.0,
            "7300": 975327616.0,
            "7305": 964145344.0,
            "7310": 977565952.0,
            "7315": 966761856.0,
            "7320": 974417920.0,
            "7325": 966895104.0,
            "7330": 959593152.0,
            "7335": 963913984.0,
            "7340": 977791360.0,
            "7345": 967878336.0,
            "7350": 984658752.0,
            "7355": 959651584.0,
            "7360": 948746624.0,
            "7365": 972749248.0,
            "7370": 982520512.0,
            "7375": 963272640.0,
            "7380": 964382592.0,
            "7385": 948591936.0,
            "7390": 963947200.0,
            "7395": 958553984.0,
            "7400": 969678464.0,
            "7405": 987651072.0,
            "7410": 952276480.0,
            "7415": 950530880.0,
            "7420": 966746944.0,
            "7425": 982829184.0,
            "7430": 965909824.0,
            "7435": 972889664.0,
            "7440": 937252992.0,
            "7445": 968829376.0,
            "7450": 980216320.0,
            "7455": 971489344.0,
            "7460": 972282432.0,
            "7465": 939402112.0,
            "7470": 971692608.0,
            "7475": 957750656.0,
            "7480": 969468224.0,
            "7485": 961552064.0,
            "7490": 934133440.0,
            "7495": 957481664.0,
            "7500": 969315456.0,
            "7505": 970271936.0,
            "7510": 972148224.0,
            "7515": 979570752.0,
            "7520": 951797248.0,
            "7525": 970358464.0,
            "7530": 954349120.0,
            "7535": 971249024.0,
            "7540": 979388480.0,
            "7545": 959124288.0,
            "7550": 960365824.0,
            "7555": 959826496.0,
            "7560": 970185408.0,
            "7565": 954961536.0,
            "7570": 942754432.0,
            "7575": 965398080.0,
            "7580": 982261824.0,
            "7585": 978833664.0,
            "7590": 970328192.0,
            "7595": 950078208.0,
            "7600": 946067392.0,
            "7605": 982220672.0,
            "7610": 969327872.0,
            "7615": 988638464.0,
            "7620": 957197312.0,
            "7625": 941357056.0,
            "7630": 971415232.0,
            "7635": 984228480.0,
            "7640": 983531520.0,
            "7645": 968049152.0,
            "7650": 959283328.0,
            "7655": 962240256.0,
            "7660": 968743872.0,
            "7665": 978220032.0,
            "7670": 975769792.0,
            "7675": 975312960.0,
            "7680": 942613504.0,
            "7685": 959860928.0,
            "7690": 975230208.0,
            "7695": 982332096.0,
            "7700": 979320960.0,
            "7705": 940520832.0,
            "7710": 974873024.0,
            "7715": 979676800.0,
            "7720": 968007552.0,
            "7725": 960515968.0,
            "7730": 943202432.0,
            "7735": 968229568.0,
            "7740": 980156864.0,
            "7745": 964188416.0,
            "7750": 963657920.0,
            "7755": 959910912.0,
            "7760": 970408448.0,
            "7765": 971294848.0,
            "7770": 962203392.0,
            "7775": 981663808.0,
            "7780": 964605376.0,
            "7785": 959948992.0,
            "7790": 968295808.0,
            "7795": 968590016.0,
            "7800": 971622656.0,
            "7805": 968209216.0,
            "7810": 945873792.0,
            "7815": 963823360.0,
            "7820": 974418688.0,
            "7825": 963931136.0,
            "7830": 956929024.0,
            "7835": 949759104.0,
            "7840": 957601664.0,
            "7845": 954126848.0,
            "7850": 979673472.0,
            "7855": 987113216.0,
            "7860": 947068928.0,
            "7865": 949487168.0,
            "7870": 965175808.0,
            "7875": 975789312.0,
            "7880": 968761472.0,
            "7885": 968941312.0,
            "7890": 951623680.0,
            "7895": 974831232.0,
            "7900": 963584064.0,
            "7905": 964137344.0,
            "7910": 965500672.0,
            "7915": 943576256.0,
            "7920": 950969088.0,
            "7925": 969635456.0,
            "7930": 964956608.0,
            "7935": 984559872.0,
            "7940": 964905344.0,
            "7945": 950760448.0,
            "7950": 962209216.0,
            "7955": 979885312.0,
            "7960": 963566784.0,
            "7965": 953230464.0,
            "7970": 951716608.0,
            "7975": 969856896.0,
            "7980": 965181696.0,
            "7985": 959437312.0,
            "7990": 968092800.0,
            "7995": 946804480.0,
            "8000": 962951168.0,
            "8005": 980789120.0,
            "8010": 965420992.0,
            "8015": 982784064.0,
            "8020": 961005248.0,
            "8025": 965458624.0,
            "8030": 958618560.0,
            "8035": 975777024.0,
            "8040": 960459776.0,
            "8045": 948136064.0,
            "8050": 959229696.0,
            "8055": 978929664.0,
            "8060": 969198848.0,
            "8065": 957854656.0,
            "8070": 963932352.0,
            "8075": 942315968.0,
            "8080": 966157824.0,
            "8085": 966925696.0,
            "8090": 983454528.0,
            "8095": 988527104.0,
            "8100": 966421504.0,
            "8105": 944303744.0,
            "8110": 968909504.0,
            "8115": 985647488.0,
            "8120": 974936064.0,
            "8125": 964178432.0,
            "8130": 966359552.0,
            "8135": 967737408.0,
            "8140": 963995520.0,
            "8145": 995087872.0,
            "8150": 973252352.0,
            "8155": 938501696.0,
            "8160": 964363584.0,
            "8165": 973418048.0,
            "8170": 968656768.0,
            "8175": 961648896.0,
            "8180": 936115904.0,
            "8185": 962846464.0,
            "8190": 968107072.0,
            "8195": 977152448.0,
            "8200": 956773632.0,
            "8205": 961033152.0,
            "8210": 946889344.0,
            "8215": 982632448.0,
            "8220": 988185600.0,
            "8225": 966648832.0,
            "8230": 962897536.0,
            "8235": 933948096.0,
            "8240": 980489664.0,
            "8245": 976301056.0,
            "8250": 963861056.0,
            "8255": 977137152.0,
            "8260": 956588608.0,
            "8265": 982784640.0,
            "8270": 952701696.0,
            "8275": 974062528.0,
            "8280": 974439680.0,
            "8285": 953395264.0,
            "8290": 939875776.0,
            "8295": 981224832.0,
            "8300": 973482496.0,
            "8305": 977951104.0,
            "8310": 951317440.0,
            "8315": 938032768.0,
            "8320": 977546752.0,
            "8325": 967701504.0,
            "8330": 990204544.0,
            "8335": 975809792.0,
            "8340": 947969280.0,
            "8345": 970863680.0,
            "8350": 969947264.0,
            "8355": 975458560.0,
            "8360": 979849472.0,
            "8365": 933249216.0,
            "8370": 965366272.0,
            "8375": 979880128.0,
            "8380": 965581184.0,
            "8385": 972620288.0,
            "8390": 962944768.0,
            "8395": 951483264.0,
            "8400": 972189632.0,
            "8405": 951416576.0,
            "8410": 960852864.0,
            "8415": 965647872.0,
            "8420": 941496960.0,
            "8425": 968092928.0,
            "8430": 961037312.0,
            "8435": 965993344.0,
            "8440": 969732736.0,
            "8445": 952751424.0,
            "8450": 984203904.0,
            "8455": 990432000.0,
            "8460": 968853440.0,
            "8465": 967392640.0,
            "8470": 963083008.0,
            "8475": 942786624.0,
            "8480": 987434176.0,
            "8485": 980034176.0,
            "8490": 992329600.0,
            "8495": 971754112.0,
            "8500": 951823360.0,
            "8505": 983228800.0,
            "8510": 974309568.0,
            "8515": 969046784.0,
            "8520": 961730944.0,
            "8525": 944922304.0,
            "8530": 984660608.0,
            "8535": 978670656.0,
            "8540": 968110080.0,
            "8545": 968524480.0,
            "8550": 942347392.0,
            "8555": 971930752.0,
            "8560": 958590336.0,
            "8565": 975812096.0,
            "8570": 974790272.0,
            "8575": 971485312.0,
            "8580": 932314048.0,
            "8585": 965844224.0,
            "8590": 979204736.0,
            "8595": 979228992.0,
            "8600": 984273152.0,
            "8605": 957989312.0,
            "8610": 984168384.0,
            "8615": 977626048.0,
            "8620": 963301952.0,
            "8625": 979589952.0,
            "8630": 943409024.0,
            "8635": 961711936.0,
            "8640": 973623232.0,
            "8645": 970575104.0,
            "8650": 969548608.0,
            "8655": 971083968.0,
            "8660": 944791360.0,
            "8665": 986806400.0,
            "8670": 960723904.0,
            "8675": 974538176.0,
            "8680": 962363392.0,
            "8685": 955619648.0,
            "8690": 978563840.0,
            "8695": 968917632.0,
            "8700": 973037184.0,
            "8705": 973570880.0,
            "8710": 947093184.0,
            "8715": 973675392.0,
            "8720": 958168000.0,
            "8725": 978732480.0,
            "8730": 986091264.0,
            "8735": 952203264.0,
            "8740": 940976448.0,
            "8745": 988046464.0,
            "8750": 971900736.0,
            "8755": 971639616.0,
            "8760": 965755008.0,
            "8765": 934692160.0,
            "8770": 986510272.0,
            "8775": 969679296.0,
            "8780": 967393920.0,
            "8785": 962431168.0,
            "8790": 947946368.0,
            "8795": 969419392.0,
            "8800": 971061312.0,
            "8805": 973534976.0,
            "8810": 983247360.0,
            "8815": 951791424.0,
            "8820": 940190528.0,
            "8825": 964691840.0,
            "8830": 981237952.0,
            "8835": 971841408.0,
            "8840": 979946560.0,
            "8845": 951308864.0,
            "8850": 987092992.0,
            "8855": 971287168.0,
            "8860": 962172032.0,
            "8865": 956633280.0,
            "8870": 946021632.0,
            "8875": 968481408.0,
            "8880": 983755776.0,
            "8885": 971736384.0,
            "8890": 969784896.0,
            "8895": 952910336.0,
            "8900": 961620672.0,
            "8905": 976778112.0,
            "8910": 981939072.0,
            "8915": 980891648.0,
            "8920": 968024512.0,
            "8925": 940251392.0,
            "8930": 970136064.0,
            "8935": 963917888.0,
            "8940": 977583360.0,
            "8945": 981544192.0,
            "8950": 946004416.0,
            "8955": 972768192.0,
            "8960": 973947008.0,
            "8965": 973383936.0,
            "8970": 966398272.0,
            "8975": 936781568.0,
            "8980": 953136576.0,
            "8985": 977411200.0,
            "8990": 967375936.0,
            "8995": 980774528.0,
            "9000": 952728192.0,
            "9005": 950697216.0,
            "9010": 975551808.0,
            "9015": 982901760.0,
            "9020": 959389056.0,
            "9025": 979940736.0,
            "9030": 953776512.0,
            "9035": 968922560.0,
            "9040": 978552640.0,
            "9045": 968831296.0,
            "9050": 982790464.0,
            "9055": 948045888.0,
            "9060": 956589184.0,
            "9065": 970172672.0,
            "9070": 967684480.0,
            "9075": 981152384.0,
            "9080": 952602048.0,
            "9085": 971467520.0,
            "9090": 963557248.0,
            "9095": 967994880.0,
            "9100": 974781440.0,
            "9105": 960009792.0,
            "9110": 947825408.0,
            "9115": 956785664.0,
            "9120": 985655040.0,
            "9125": 963481472.0,
            "9130": 958727936.0,
            "9135": 951879232.0,
            "9140": 967393984.0,
            "9145": 977101056.0,
            "9150": 987110720.0,
            "9155": 977189632.0,
            "9160": 957952704.0,
            "9165": 950256768.0,
            "9170": 988456064.0,
            "9175": 971693632.0,
            "9180": 967584320.0,
            "9185": 955140416.0,
            "9190": 957140928.0,
            "9195": 966212352.0,
            "9200": 969115840.0,
            "9205": 967428224.0,
            "9210": 984534912.0,
            "9215": 932014528.0,
            "9220": 949587904.0,
            "9225": 971095424.0,
            "9230": 971012288.0,
            "9235": 971575616.0,
            "9240": 959574272.0,
            "9245": 963990912.0,
            "9250": 961552512.0,
            "9255": 983019392.0,
            "9260": 979394944.0,
            "9265": 952805504.0,
            "9270": 949326336.0,
            "9275": 978637248.0,
            "9280": 977361344.0,
            "9285": 962731200.0,
            "9290": 979255680.0,
            "9295": 958574656.0,
            "9300": 965053312.0,
            "9305": 969046400.0,
            "9310": 973026560.0,
            "9315": 976292288.0,
            "9320": 947985280.0,
            "9325": 979776640.0,
            "9330": 977718976.0,
            "9335": 975414848.0,
            "9340": 960187968.0,
            "9345": 943341952.0,
            "9350": 952640128.0,
            "9355": 963598208.0,
            "9360": 960270848.0,
            "9365": 983214848.0,
            "9370": 982509056.0,
            "9375": 942057024.0,
            "9380": 982602944.0,
            "9385": 985542528.0,
            "9390": 973357952.0,
            "9395": 978024960.0,
            "9400": 938125376.0,
            "9405": 968465088.0,
            "9410": 981829696.0,
            "9415": 991810624.0,
            "9420": 960258176.0,
            "9425": 956645376.0,
            "9430": 939027904.0,
            "9435": 974855168.0,
            "9440": 959027456.0,
            "9445": 973696192.0,
            "9450": 961227776.0,
            "9455": 946047232.0,
            "9460": 978455936.0,
            "9465": 988237952.0,
            "9470": 963345856.0,
            "9475": 983982464.0,
            "9480": 931407360.0,
            "9485": 987430464.0,
            "9490": 963495168.0,
            "9495": 972477824.0,
            "9500": 982245312.0,
            "9505": 970089216.0,
            "9510": 964443776.0,
            "9515": 957060544.0,
            "9520": 948360320.0,
            "9525": 965533056.0,
            "9530": 958222848.0,
            "9535": 951184000.0,
            "9540": 954452096.0,
            "9545": 979665792.0,
            "9550": 955705152.0,
            "9555": 953111552.0,
            "9560": 958396416.0,
            "9565": 970089344.0,
            "9570": 977259456.0,
            "9575": 958986112.0,
            "9580": 963197568.0,
            "9585": 946283648.0,
            "9590": 947963904.0,
            "9595": 966430592.0,
            "9600": 984959040.0,
            "9605": 985143936.0,
            "9610": 943481984.0,
            "9615": 952715008.0,
            "9620": 980440576.0,
            "9625": 978303936.0,
            "9630": 969868160.0,
            "9635": 974525504.0,
            "9640": 940122816.0,
            "9645": 962069952.0,
            "9650": 971067328.0,
            "9655": 987521088.0,
            "9660": 962704576.0,
            "9665": 949898624.0,
            "9670": 966470528.0,
            "9675": 963383936.0,
            "9680": 964843136.0,
            "9685": 986562112.0,
            "9690": 939724096.0,
            "9695": 950472256.0,
            "9700": 975001984.0,
            "9705": 972711936.0,
            "9710": 967509760.0,
            "9715": 971305408.0,
            "9720": 940441792.0,
            "9725": 966294528.0,
            "9730": 974027264.0,
            "9735": 973989248.0,
            "9740": 971271680.0,
            "9745": 951061952.0,
            "9750": 979642304.0,
            "9755": 970022464.0,
            "9760": 967782144.0,
            "9765": 963702016.0,
            "9770": 952406272.0,
            "9775": 956486400.0,
            "9780": 970119744.0,
            "9785": 958688960.0,
            "9790": 960858752.0,
            "9795": 958029440.0,
            "9800": 949218624.0,
            "9805": 962086592.0,
            "9810": 978418688.0,
            "9815": 977606464.0,
            "9820": 982410176.0,
            "9825": 939265280.0,
            "9830": 969135040.0,
            "9835": 972511872.0,
            "9840": 971504128.0,
            "9845": 967136512.0,
            "9850": 946839424.0,
            "9855": 956824128.0,
            "9860": 986868992.0,
            "9865": 969709248.0,
            "9870": 990372224.0,
            "9875": 957135744.0,
            "9880": 931092352.0,
            "9885": 963617216.0,
            "9890": 972564736.0,
            "9895": 983526656.0,
            "9900": 956239424.0,
            "9905": 938825600.0,
            "9910": 978737920.0,
            "9915": 973480128.0,
            "9920": 944352576.0,
            "9925": 962596864.0,
            "9930": 947479232.0,
            "9935": 960746688.0,
            "9940": 965980864.0,
            "9945": 959010752.0,
            "9950": 964129216.0,
            "9955": 943475520.0,
            "9960": 966686464.0,
            "9965": 983719680.0,
            "9970": 966804288.0,
            "9975": 963722560.0,
            "9980": 980734592.0,
            "9985": 942341056.0,
            "9990": 976442240.0,
            "9995": 982719104.0,
            "10000": 971864128.0,
            "10005": 969013952.0,
            "10010": 944136192.0,
            "10015": 982651648.0,
            "10020": 978235776.0,
            "10025": 979660352.0,
            "10030": 971208704.0,
            "10035": 946398464.0,
            "10040": 949993920.0,
            "10045": 977929088.0,
            "10050": 985968512.0,
            "10055": 990260032.0,
            "10060": 958923136.0,
            "10065": 947308672.0,
            "10070": 966687808.0,
            "10075": 979044608.0,
            "10080": 971424832.0,
            "10085": 974488128.0,
            "10090": 943751168.0,
            "10095": 962277376.0,
            "10100": 971509952.0,
            "10105": 975740800.0,
            "10110": 971944448.0,
            "10115": 948612864.0,
            "10120": 962398656.0,
            "10125": 973998848.0,
            "10130": 979987200.0,
            "10135": 972261248.0,
            "10140": 957413696.0,
            "10145": 933813248.0,
            "10150": 973622848.0,
            "10155": 969274304.0,
            "10160": 961962624.0,
            "10165": 974813312.0,
            "10170": 944318784.0,
            "10175": 978795520.0,
            "10180": 983504768.0,
            "10185": 978501632.0,
            "10190": 954944512.0,
            "10195": 936623232.0,
            "10200": 987885440.0,
            "10205": 972258816.0,
            "10210": 966509376.0,
            "10215": 975802752.0,
            "10220": 948321088.0,
            "10225": 950461120.0,
            "10230": 975630208.0,
            "10235": 953782016.0,
            "10240": 969370176.0,
            "10245": 961576192.0,
            "10250": 936081152.0,
            "10255": 979319424.0,
            "10260": 964128640.0,
            "10265": 967120192.0,
            "10270": 968498816.0,
            "10275": 935868800.0,
            "10280": 969242688.0,
            "10285": 995937280.0,
            "10290": 979259136.0,
            "10295": 981503616.0,
            "10300": 951791680.0,
            "10305": 971754560.0,
            "10310": 959965824.0,
            "10315": 971077952.0,
            "10320": 984925632.0,
            "10325": 983082624.0,
            "10330": 934657152.0,
            "10335": 976220352.0,
            "10340": 957051712.0,
            "10345": 973646080.0,
            "10350": 984548224.0,
            "10355": 941791872.0,
            "10360": 961714240.0,
            "10365": 974072704.0,
            "10370": 980395520.0,
            "10375": 969789888.0,
            "10380": 961306368.0,
            "10385": 955071936.0,
            "10390": 990365952.0,
            "10395": 964464000.0,
            "10400": 960446144.0,
            "10405": 949563072.0,
            "10410": 954754304.0,
            "10415": 975151232.0,
            "10420": 967101120.0,
            "10425": 968904128.0,
            "10430": 964671872.0,
            "10435": 962459584.0,
            "10440": 971827584.0,
            "10445": 972036800.0,
            "10450": 974739712.0,
            "10455": 966332928.0,
            "10460": 948443328.0,
            "10465": 971633920.0,
            "10470": 972473984.0,
            "10475": 979072128.0,
            "10480": 997084800.0,
            "10485": 949607552.0,
            "10490": 934736512.0,
            "10495": 969033856.0,
            "10500": 977803776.0,
            "10505": 958570240.0,
            "10510": 951081088.0,
            "10515": 953656832.0,
            "10520": 971768384.0,
            "10525": 969009472.0,
            "10530": 970215680.0,
            "10535": 986292992.0,
            "10540": 946977152.0,
            "10545": 970127104.0,
            "10550": 969372992.0,
            "10555": 959300800.0,
            "10560": 975720832.0,
            "10565": 961216448.0,
            "10570": 968959360.0,
            "10575": 973174272.0,
            "10580": 960182720.0,
            "10585": 973309440.0,
            "10590": 951782144.0,
            "10595": 956492288.0,
            "10600": 967455680.0,
            "10605": 986403584.0,
            "10610": 966269568.0,
            "10615": 976502656.0,
            "10620": 940588992.0,
            "10625": 965036864.0,
            "10630": 967383488.0,
            "10635": 973025920.0,
            "10640": 974566400.0,
            "10645": 948472256.0,
            "10650": 966032512.0,
            "10655": 985539008.0,
            "10660": 976460928.0,
            "10665": 967101120.0,
            "10670": 954918464.0,
            "10675": 933718272.0,
            "10680": 985944832.0,
            "10685": 990967680.0,
            "10690": 963776000.0,
            "10695": 971578624.0,
            "10700": 949710976.0,
            "10705": 977786368.0,
            "10710": 968243712.0,
            "10715": 966750336.0,
            "10720": 966038400.0,
            "10725": 943961472.0,
            "10730": 980036160.0,
            "10735": 960563328.0,
            "10740": 971111424.0,
            "10745": 983819328.0,
            "10750": 981627072.0,
            "10755": 944956224.0,
            "10760": 969697536.0,
            "10765": 972653184.0,
            "10770": 973592640.0,
            "10775": 958354432.0,
            "10780": 949396992.0,
            "10785": 953595776.0,
            "10790": 969862784.0,
            "10795": 960249728.0,
            "10800": 971706432.0,
            "10805": 950981632.0,
            "10810": 973867008.0,
            "10815": 959902272.0,
            "10820": 971104384.0,
            "10825": 967088000.0,
            "10830": 956843200.0,
            "10835": 962953856.0,
            "10840": 970556096.0,
            "10845": 963811072.0,
            "10850": 957572096.0,
            "10855": 967361920.0,
            "10860": 950321024.0,
            "10865": 963889536.0,
            "10870": 983368896.0,
            "10875": 982371968.0,
            "10880": 958217856.0,
            "10885": 954330624.0,
            "10890": 973060864.0,
            "10895": 973396480.0,
            "10900": 970089920.0,
            "10905": 964838592.0,
            "10910": 938643072.0,
            "10915": 960478720.0,
            "10920": 983431808.0,
            "10925": 969440640.0,
            "10930": 968268288.0,
            "10935": 962588992.0,
            "10940": 953930048.0,
            "10945": 964470912.0,
            "10950": 971827840.0,
            "10955": 966066944.0,
            "10960": 971449152.0,
            "10965": 966221568.0,
            "10970": 983031360.0,
            "10975": 965673280.0,
            "10980": 974312320.0,
            "10985": 986228352.0,
            "10990": 950760320.0,
            "10995": 963204800.0,
            "11000": 985212672.0,
            "11005": 978412288.0,
            "11010": 971355776.0,
            "11015": 969480960.0,
            "11020": 947788928.0,
            "11025": 959772544.0,
            "11030": 977959936.0,
            "11035": 975451008.0,
            "11040": 986277504.0,
            "11045": 956634432.0,
            "11050": 973260672.0,
            "11055": 974149504.0,
            "11060": 962016128.0,
            "11065": 985504512.0,
            "11070": 949753280.0,
            "11075": 976394624.0,
            "11080": 972031872.0,
            "11085": 967020416.0,
            "11090": 976336384.0,
            "11095": 946732032.0,
            "11100": 966014272.0,
            "11105": 973917504.0,
            "11110": 980682176.0,
            "11115": 967836672.0,
            "11120": 957161280.0,
            "11125": 956646656.0,
            "11130": 975539968.0,
            "11135": 979452032.0,
            "11140": 964636736.0,
            "11145": 966265792.0,
            "11150": 935727232.0,
            "11155": 976467648.0,
            "11160": 983961664.0,
            "11165": 982399360.0,
            "11170": 977720064.0,
            "11175": 958108096.0,
            "11180": 962306240.0,
            "11185": 971891456.0,
            "11190": 979924416.0,
            "11195": 985255872.0,
            "11200": 983025472.0,
            "11205": 941737600.0,
            "11210": 984678912.0,
            "11215": 967432192.0,
            "11220": 983352000.0,
            "11225": 961579904.0,
            "11230": 953286272.0,
            "11235": 981753920.0,
            "11240": 977473344.0,
            "11245": 965594752.0,
            "11250": 969379072.0,
            "11255": 959675136.0,
            "11260": 979628352.0,
            "11265": 963742080.0,
            "11270": 981387392.0,
            "11275": 968132480.0,
            "11280": 955867648.0,
            "11285": 953773632.0,
            "11290": 956513920.0,
            "11295": 968320320.0,
            "11300": 962413952.0,
            "11305": 958477056.0,
            "11310": 946143616.0,
            "11315": 983343232.0,
            "11320": 964742208.0,
            "11325": 980844992.0,
            "11330": 975686656.0,
            "11335": 952358528.0,
            "11340": 970141248.0,
            "11345": 970000320.0,
            "11350": 981713920.0,
            "11355": 982531008.0,
            "11360": 941237888.0,
            "11365": 970640192.0,
            "11370": 978903424.0,
            "11375": 975301568.0,
            "11380": 968416256.0,
            "11385": 958673088.0,
            "11390": 937971968.0,
            "11395": 977420160.0,
            "11400": 973551360.0,
            "11405": 961555584.0,
            "11410": 966291712.0,
            "11415": 929213632.0,
            "11420": 964718848.0,
            "11425": 981182528.0,
            "11430": 978574592.0,
            "11435": 970325888.0,
            "11440": 944973696.0,
            "11445": 975068032.0,
            "11450": 984427392.0,
            "11455": 971364352.0,
            "11460": 965071360.0,
            "11465": 960150656.0,
            "11470": 954908800.0,
            "11475": 972813184.0,
            "11480": 956431552.0,
            "11485": 977088960.0,
            "11490": 986184192.0,
            "11495": 959146176.0,
            "11500": 969768896.0,
            "11505": 964037120.0,
            "11510": 976603392.0,
            "11515": 977965248.0,
            "11520": 954036416.0,
            "11525": 976051392.0,
            "11530": 976575744.0,
            "11535": 979724928.0,
            "11540": 974337408.0,
            "11545": 953335872.0,
            "11550": 952790336.0,
            "11555": 981364224.0,
            "11560": 984814912.0,
            "11565": 965119296.0,
            "11570": 966388096.0,
            "11575": 951670144.0,
            "11580": 976271040.0,
            "11585": 977377152.0,
            "11590": 969153536.0,
            "11595": 976633088.0,
            "11600": 946475968.0,
            "11605": 973212096.0,
            "11610": 982445120.0,
            "11615": 972059136.0,
            "11620": 969179328.0,
            "11625": 949418368.0,
            "11630": 937577728.0,
            "11635": 973618176.0,
            "11640": 981198336.0,
            "11645": 980141696.0,
            "11650": 971794176.0,
            "11655": 956067968.0,
            "11660": 980601088.0,
            "11665": 958364288.0,
            "11670": 982631232.0,
            "11675": 972254400.0,
            "11680": 956150912.0,
            "11685": 982832256.0,
            "11690": 968553088.0,
            "11695": 968086272.0,
            "11700": 973659712.0,
            "11705": 956149376.0,
            "11710": 964658176.0,
            "11715": 983306432.0,
            "11720": 983734848.0,
            "11725": 965216768.0,
            "11730": 955435456.0,
            "11735": 942289920.0,
            "11740": 973779456.0,
            "11745": 971285248.0,
            "11750": 961599424.0,
            "11755": 963397824.0,
            "11760": 950131200.0,
            "11765": 983766400.0,
            "11770": 984208512.0,
            "11775": 975523712.0,
            "11780": 985478464.0,
            "11785": 947161984.0,
            "11790": 972493312.0,
            "11795": 970462400.0,
            "11800": 973178368.0,
            "11805": 986620480.0,
            "11810": 967751488.0,
            "11815": 955799168.0,
            "11820": 973347712.0,
            "11825": 970646336.0,
            "11830": 974624640.0,
            "11835": 961679552.0,
            "11840": 944528640.0,
            "11845": 980866432.0,
            "11850": 974564800.0,
            "11855": 978016640.0,
            "11860": 971869824.0,
            "11865": 938508672.0,
            "11870": 940174400.0,
            "11875": 990073216.0,
            "11880": 972232128.0,
            "11885": 962926592.0,
            "11890": 970329856.0,
            "11895": 965331392.0,
            "11900": 979368768.0,
            "11905": 961579072.0,
            "11910": 983739904.0,
            "11915": 989931712.0,
            "11920": 944844672.0,
            "11925": 993967360.0,
            "11930": 964656640.0,
            "11935": 963318144.0,
            "11940": 976990208.0,
            "11945": 943877888.0,
            "11950": 977614336.0,
            "11955": 979017984.0,
            "11960": 972299648.0,
            "11965": 976420736.0,
            "11970": 963196544.0,
            "11975": 963264192.0,
            "11980": 977523584.0,
            "11985": 953295936.0,
            "11990": 968557184.0,
            "11995": 964949696.0,
            "12000": 958245696.0,
            "12005": 974362816.0,
            "12010": 979133120.0,
            "12015": 971844096.0,
            "12020": 973046400.0,
            "12025": 934406400.0,
            "12030": 969001216.0,
            "12035": 984151808.0,
            "12040": 977262208.0,
            "12045": 981731776.0,
            "12050": 931274816.0,
            "12055": 938311872.0,
            "12060": 974067200.0,
            "12065": 965477696.0,
            "12070": 968006528.0,
            "12075": 949658880.0,
            "12080": 953243712.0,
            "12085": 972513344.0,
            "12090": 963833920.0,
            "12095": 963602496.0,
            "12100": 976480832.0,
            "12105": 950655424.0,
            "12110": 972107904.0,
            "12115": 968212608.0,
            "12120": 986542656.0,
            "12125": 980539648.0,
            "12130": 941737792.0,
            "12135": 955712512.0,
            "12140": 976076864.0,
            "12145": 979760640.0,
            "12150": 979903040.0,
            "12155": 961891328.0,
            "12160": 946421376.0,
            "12165": 968674944.0,
            "12170": 964216192.0,
            "12175": 967224192.0,
            "12180": 974769664.0,
            "12185": 952382400.0,
            "12190": 988016576.0,
            "12195": 970187072.0,
            "12200": 964551104.0,
            "12205": 968434560.0,
            "12210": 938996224.0,
            "12215": 996909184.0,
            "12220": 969778496.0,
            "12225": 979236736.0,
            "12230": 980062912.0,
            "12235": 949854784.0,
            "12240": 963612544.0,
            "12245": 965285248.0,
            "12250": 976217216.0,
            "12255": 968066560.0,
            "12260": 983371456.0,
            "12265": 931721536.0,
            "12270": 966209920.0,
            "12275": 979566720.0,
            "12280": 977469952.0,
            "12285": 970346432.0,
            "12290": 929181248.0,
            "12295": 976800384.0,
            "12300": 985978496.0,
            "12305": 969918592.0,
            "12310": 986289600.0,
            "12315": 936202304.0,
            "12320": 958148352.0,
            "12325": 966634880.0,
            "12330": 968319488.0,
            "12335": 963973120.0,
            "12340": 957507840.0,
            "12345": 944285056.0,
            "12350": 966565632.0,
            "12355": 975757952.0,
            "12360": 978686464.0,
            "12365": 964769792.0,
            "12370": 948300992.0,
            "12375": 963864256.0,
            "12380": 964849664.0,
            "12385": 972869120.0,
            "12390": 961833792.0,
            "12395": 961544256.0,
            "12400": 975505536.0,
            "12405": 976412672.0,
            "12410": 953583168.0,
            "12415": 963226112.0,
            "12420": 944420736.0,
            "12425": 949603456.0,
            "12430": 972833536.0,
            "12435": 969607552.0,
            "12440": 962386112.0,
            "12445": 952637696.0,
            "12450": 947956480.0,
            "12455": 981740352.0,
            "12460": 974357440.0,
            "12465": 954756736.0,
            "12470": 980836608.0,
            "12475": 958470144.0,
            "12480": 967282176.0,
            "12485": 978194176.0,
            "12490": 974255104.0,
            "12495": 969619136.0,
            "12500": 961844416.0,
            "12505": 943830272.0,
            "12510": 961026880.0,
            "12515": 969812928.0,
            "12520": 974083712.0,
            "12525": 972377536.0,
            "12530": 944719296.0,
            "12535": 976303296.0,
            "12540": 965709056.0,
            "12545": 971849664.0,
            "12550": 969844672.0,
            "12555": 941324672.0,
            "12560": 964666432.0,
            "12565": 947400448.0,
            "12570": 974493568.0,
            "12575": 963104192.0,
            "12580": 958128896.0,
            "12585": 963636992.0,
            "12590": 965789568.0,
            "12595": 978658816.0,
            "12600": 981959552.0,
            "12605": 949359232.0,
            "12610": 938076928.0,
            "12615": 962718720.0,
            "12620": 961005440.0,
            "12625": 966630720.0,
            "12630": 970775872.0,
            "12635": 962372032.0,
            "12640": 978419712.0,
            "12645": 968940544.0,
            "12650": 970174592.0,
            "12655": 964155264.0,
            "12660": 932281664.0,
            "12665": 956939712.0,
            "12670": 986266368.0,
            "12675": 965740032.0,
            "12680": 960978944.0,
            "12685": 951439744.0,
            "12690": 944876992.0,
            "12695": 978375488.0,
            "12700": 984995712.0,
            "12705": 959030784.0,
            "12710": 968442880.0,
            "12715": 956452160.0,
            "12720": "nan",
            "12725": "nan",
            "12730": "nan",
            "12735": "nan",
            "12740": "nan",
            "12745": "nan",
            "12750": "nan",
            "12755": "nan",
            "12760": "nan",
            "12765": "nan",
            "12770": "nan",
            "12775": "nan",
            "12780": "nan",
            "12785": "nan",
            "12790": "nan",
            "12795": "nan",
            "12800": "nan",
            "12805": "nan",
            "12810": "nan",
            "12815": "nan",
            "12820": "nan",
            "12825": "nan",
            "12830": "nan",
            "12835": "nan",
            "12840": "nan",
            "12845": "nan",
            "12850": "nan",
            "12855": "nan",
            "12860": "nan",
            "12865": "nan",
            "12870": "nan",
            "12875": "nan",
            "12880": "nan",
            "12885": "nan",
            "12890": "nan",
            "12895": "nan",
            "12900": "nan",
            "12905": "nan",
            "12910": "nan",
            "12915": "nan",
            "12920": "nan",
            "12925": "nan",
            "12930": "nan",
            "12935": "nan",
            "12940": "nan",
            "12945": "nan",
            "12950": "nan",
            "12955": "nan",
            "12960": "nan",
            "12965": "nan",
            "12970": "nan",
            "12975": "nan",
            "12980": "nan",
            "12985": "nan",
            "12990": "nan",
            "12995": "nan",
            "13000": "nan"
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 13000,
        "step_interval": 5,
        "values": {
            "1": 13218789376.0,
            "5": 13218789376.0,
            "10": 13218789376.0,
            "15": 13218789376.0,
            "20": 13218789376.0,
            "25": 13218789376.0,
            "30": 13218789376.0,
            "35": 13218789376.0,
            "40": 13218789376.0,
            "45": 13218789376.0,
            "50": 13218789376.0,
            "55": 13218789376.0,
            "60": 13218789376.0,
            "65": 13218789376.0,
            "70": 13218789376.0,
            "75": 13218789376.0,
            "80": 13218789376.0,
            "85": 13218789376.0,
            "90": 13218789376.0,
            "95": 13218789376.0,
            "100": 13218789376.0,
            "105": 13218789376.0,
            "110": 13218789376.0,
            "115": 13218789376.0,
            "120": 13218789376.0,
            "125": 13218789376.0,
            "130": 13218789376.0,
            "135": 13218789376.0,
            "140": 13218789376.0,
            "145": 13218789376.0,
            "150": 13218789376.0,
            "155": 13218789376.0,
            "160": 13218789376.0,
            "165": 13218789376.0,
            "170": 13218789376.0,
            "175": 13218789376.0,
            "180": 13218789376.0,
            "185": 13218789376.0,
            "190": 13218789376.0,
            "195": 13218789376.0,
            "200": 13218789376.0,
            "205": 13218789376.0,
            "210": 13218789376.0,
            "215": 13218789376.0,
            "220": 13218789376.0,
            "225": 13218789376.0,
            "230": 13218789376.0,
            "235": 13218789376.0,
            "240": 13218789376.0,
            "245": 13218789376.0,
            "250": 13218789376.0,
            "255": 13218789376.0,
            "260": 13218789376.0,
            "265": 13218789376.0,
            "270": 13218789376.0,
            "275": 13218789376.0,
            "280": 13218789376.0,
            "285": 13218789376.0,
            "290": 13218789376.0,
            "295": 13218789376.0,
            "300": 13218789376.0,
            "305": 13218789376.0,
            "310": 13218789376.0,
            "315": 13218789376.0,
            "320": 13218789376.0,
            "325": 13218789376.0,
            "330": 13218789376.0,
            "335": 13218789376.0,
            "340": 13218789376.0,
            "345": 13218789376.0,
            "350": 13218789376.0,
            "355": 13218789376.0,
            "360": 13218789376.0,
            "365": 13218789376.0,
            "370": 13218789376.0,
            "375": 13218789376.0,
            "380": 13218789376.0,
            "385": 13218789376.0,
            "390": 13218789376.0,
            "395": 13218789376.0,
            "400": 13218789376.0,
            "405": 13218789376.0,
            "410": 13218789376.0,
            "415": 13218789376.0,
            "420": 13218789376.0,
            "425": 13218789376.0,
            "430": 13218789376.0,
            "435": 13218789376.0,
            "440": 13218789376.0,
            "445": 13218789376.0,
            "450": 13218789376.0,
            "455": 13218789376.0,
            "460": 13218789376.0,
            "465": 13218789376.0,
            "470": 13218789376.0,
            "475": 13218789376.0,
            "480": 13218789376.0,
            "485": 13218789376.0,
            "490": 13218789376.0,
            "495": 13218789376.0,
            "500": 13218789376.0,
            "505": 13218789376.0,
            "510": 13218789376.0,
            "515": 13218789376.0,
            "520": 13218789376.0,
            "525": 13218789376.0,
            "530": 13218789376.0,
            "535": 13218789376.0,
            "540": 13218789376.0,
            "545": 13218789376.0,
            "550": 13218789376.0,
            "555": 13218789376.0,
            "560": 13218789376.0,
            "565": 13218789376.0,
            "570": 13218789376.0,
            "575": 13218789376.0,
            "580": 13218789376.0,
            "585": 13218789376.0,
            "590": 13218789376.0,
            "595": 13218789376.0,
            "600": 13218789376.0,
            "605": 13218789376.0,
            "610": 13218789376.0,
            "615": 13218789376.0,
            "620": 13218789376.0,
            "625": 13218789376.0,
            "630": 13218789376.0,
            "635": 13218789376.0,
            "640": 13218789376.0,
            "645": 13218789376.0,
            "650": 13218789376.0,
            "655": 13218789376.0,
            "660": 13218789376.0,
            "665": 13218789376.0,
            "670": 13218789376.0,
            "675": 13218789376.0,
            "680": 13218789376.0,
            "685": 13218789376.0,
            "690": 13218789376.0,
            "695": 13218789376.0,
            "700": 13218789376.0,
            "705": 13218789376.0,
            "710": 13218789376.0,
            "715": 13218789376.0,
            "720": 13218789376.0,
            "725": 13218789376.0,
            "730": 13218789376.0,
            "735": 13218789376.0,
            "740": 13218789376.0,
            "745": 13218789376.0,
            "750": 13218789376.0,
            "755": 13218789376.0,
            "760": 13218789376.0,
            "765": 13218789376.0,
            "770": 13218789376.0,
            "775": 13218789376.0,
            "780": 13218789376.0,
            "785": 13218789376.0,
            "790": 13218789376.0,
            "795": 13218789376.0,
            "800": 13218789376.0,
            "805": 13218789376.0,
            "810": 13218789376.0,
            "815": 13218789376.0,
            "820": 13218789376.0,
            "825": 13218789376.0,
            "830": 13218789376.0,
            "835": 13218789376.0,
            "840": 13218789376.0,
            "845": 13218789376.0,
            "850": 13218789376.0,
            "855": 13218789376.0,
            "860": 13218789376.0,
            "865": 13218789376.0,
            "870": 13218789376.0,
            "875": 13218789376.0,
            "880": 13218789376.0,
            "885": 13218789376.0,
            "890": 13218789376.0,
            "895": 13218789376.0,
            "900": 13218789376.0,
            "905": 13218789376.0,
            "910": 13218789376.0,
            "915": 13218789376.0,
            "920": 13218789376.0,
            "925": 13218789376.0,
            "930": 13218789376.0,
            "935": 13218789376.0,
            "940": 13218789376.0,
            "945": 13218789376.0,
            "950": 13218789376.0,
            "955": 13218789376.0,
            "960": 13218789376.0,
            "965": 13218789376.0,
            "970": 13218789376.0,
            "975": 13218789376.0,
            "980": 13218789376.0,
            "985": 13218789376.0,
            "990": 13218789376.0,
            "995": 13218789376.0,
            "1000": 13218789376.0,
            "1005": 13218789376.0,
            "1010": 13218789376.0,
            "1015": 13218789376.0,
            "1020": 13218789376.0,
            "1025": 13218789376.0,
            "1030": 13218789376.0,
            "1035": 13218789376.0,
            "1040": 13218789376.0,
            "1045": 13218789376.0,
            "1050": 13218789376.0,
            "1055": 13218789376.0,
            "1060": 13218789376.0,
            "1065": 13218789376.0,
            "1070": 13218789376.0,
            "1075": 13218789376.0,
            "1080": 13218789376.0,
            "1085": 13218789376.0,
            "1090": 13218789376.0,
            "1095": 13218789376.0,
            "1100": 13218789376.0,
            "1105": 13218789376.0,
            "1110": 13218789376.0,
            "1115": 13218789376.0,
            "1120": 13218789376.0,
            "1125": 13218789376.0,
            "1130": 13218789376.0,
            "1135": 13218789376.0,
            "1140": 13218789376.0,
            "1145": 13218789376.0,
            "1150": 13218789376.0,
            "1155": 13218789376.0,
            "1160": 13218789376.0,
            "1165": 13218789376.0,
            "1170": 13218789376.0,
            "1175": 13218789376.0,
            "1180": 13218789376.0,
            "1185": 13218789376.0,
            "1190": 13218789376.0,
            "1195": 13218789376.0,
            "1200": 13218789376.0,
            "1205": 13218789376.0,
            "1210": 13218789376.0,
            "1215": 13218789376.0,
            "1220": 13218789376.0,
            "1225": 13218789376.0,
            "1230": 13218789376.0,
            "1235": 13218789376.0,
            "1240": 13218789376.0,
            "1245": 13218789376.0,
            "1250": 13218789376.0,
            "1255": 13218789376.0,
            "1260": 13218789376.0,
            "1265": 13218789376.0,
            "1270": 13218789376.0,
            "1275": 13218789376.0,
            "1280": 13218789376.0,
            "1285": 13218789376.0,
            "1290": 13218789376.0,
            "1295": 13218789376.0,
            "1300": 13218789376.0,
            "1305": 13218789376.0,
            "1310": 13218789376.0,
            "1315": 13218789376.0,
            "1320": 13218789376.0,
            "1325": 13218789376.0,
            "1330": 13218789376.0,
            "1335": 13218789376.0,
            "1340": 13218789376.0,
            "1345": 13218789376.0,
            "1350": 13218789376.0,
            "1355": 13218789376.0,
            "1360": 13218789376.0,
            "1365": 13218789376.0,
            "1370": 13218789376.0,
            "1375": 13218789376.0,
            "1380": 13218789376.0,
            "1385": 13218789376.0,
            "1390": 13218789376.0,
            "1395": 13218789376.0,
            "1400": 13218789376.0,
            "1405": 13218789376.0,
            "1410": 13218789376.0,
            "1415": 13218789376.0,
            "1420": 13218789376.0,
            "1425": 13218789376.0,
            "1430": 13218789376.0,
            "1435": 13218789376.0,
            "1440": 13218789376.0,
            "1445": 13218789376.0,
            "1450": 13218789376.0,
            "1455": 13218789376.0,
            "1460": 13218789376.0,
            "1465": 13218789376.0,
            "1470": 13218789376.0,
            "1475": 13218789376.0,
            "1480": 13218789376.0,
            "1485": 13218789376.0,
            "1490": 13218789376.0,
            "1495": 13218789376.0,
            "1500": 13218789376.0,
            "1505": 13218789376.0,
            "1510": 13218789376.0,
            "1515": 13218789376.0,
            "1520": 13218789376.0,
            "1525": 13218789376.0,
            "1530": 13218789376.0,
            "1535": 13218789376.0,
            "1540": 13218789376.0,
            "1545": 13218789376.0,
            "1550": 13218789376.0,
            "1555": 13218789376.0,
            "1560": 13218789376.0,
            "1565": 13218789376.0,
            "1570": 13218789376.0,
            "1575": 13218789376.0,
            "1580": 13218789376.0,
            "1585": 13218789376.0,
            "1590": 13218789376.0,
            "1595": 13218789376.0,
            "1600": 13218789376.0,
            "1605": 13218789376.0,
            "1610": 13218789376.0,
            "1615": 13218789376.0,
            "1620": 13218789376.0,
            "1625": 13218789376.0,
            "1630": 13218789376.0,
            "1635": 13218789376.0,
            "1640": 13218789376.0,
            "1645": 13218789376.0,
            "1650": 13218789376.0,
            "1655": 13218789376.0,
            "1660": 13218789376.0,
            "1665": 13218789376.0,
            "1670": 13218789376.0,
            "1675": 13218789376.0,
            "1680": 13218789376.0,
            "1685": 13218789376.0,
            "1690": 13218789376.0,
            "1695": 13218789376.0,
            "1700": 13218789376.0,
            "1705": 13218789376.0,
            "1710": 13218789376.0,
            "1715": 13218789376.0,
            "1720": 13218789376.0,
            "1725": 13218789376.0,
            "1730": 13218789376.0,
            "1735": 13218789376.0,
            "1740": 13218789376.0,
            "1745": 13218789376.0,
            "1750": 13218789376.0,
            "1755": 13218789376.0,
            "1760": 13218789376.0,
            "1765": 13218789376.0,
            "1770": 13218789376.0,
            "1775": 13218789376.0,
            "1780": 13218789376.0,
            "1785": 13218789376.0,
            "1790": 13218789376.0,
            "1795": 13218789376.0,
            "1800": 13218789376.0,
            "1805": 13218789376.0,
            "1810": 13218789376.0,
            "1815": 13218789376.0,
            "1820": 13218789376.0,
            "1825": 13218789376.0,
            "1830": 13218789376.0,
            "1835": 13218789376.0,
            "1840": 13218789376.0,
            "1845": 13218789376.0,
            "1850": 13218789376.0,
            "1855": 13218789376.0,
            "1860": 13218789376.0,
            "1865": 13218789376.0,
            "1870": 13218789376.0,
            "1875": 13218789376.0,
            "1880": 13218789376.0,
            "1885": 13218789376.0,
            "1890": 13218789376.0,
            "1895": 13218789376.0,
            "1900": 13218789376.0,
            "1905": 13218789376.0,
            "1910": 13218789376.0,
            "1915": 13218789376.0,
            "1920": 13218789376.0,
            "1925": 13218789376.0,
            "1930": 13218789376.0,
            "1935": 13218789376.0,
            "1940": 13218789376.0,
            "1945": 13218789376.0,
            "1950": 13218789376.0,
            "1955": 13218789376.0,
            "1960": 13218789376.0,
            "1965": 13218789376.0,
            "1970": 13218789376.0,
            "1975": 13218789376.0,
            "1980": 13218789376.0,
            "1985": 13218789376.0,
            "1990": 13218789376.0,
            "1995": 13218789376.0,
            "2000": 13218789376.0,
            "2005": 13218789376.0,
            "2010": 13218789376.0,
            "2015": 13218789376.0,
            "2020": 13218789376.0,
            "2025": 13218789376.0,
            "2030": 13218789376.0,
            "2035": 13218789376.0,
            "2040": 13218789376.0,
            "2045": 13218789376.0,
            "2050": 13218789376.0,
            "2055": 13218789376.0,
            "2060": 13218789376.0,
            "2065": 13218789376.0,
            "2070": 13218789376.0,
            "2075": 13218789376.0,
            "2080": 13218789376.0,
            "2085": 13218789376.0,
            "2090": 13218789376.0,
            "2095": 13218789376.0,
            "2100": 13218789376.0,
            "2105": 13218789376.0,
            "2110": 13218789376.0,
            "2115": 13218789376.0,
            "2120": 13218789376.0,
            "2125": 13218789376.0,
            "2130": 13218789376.0,
            "2135": 13218789376.0,
            "2140": 13218789376.0,
            "2145": 13218789376.0,
            "2150": 13218789376.0,
            "2155": 13218789376.0,
            "2160": 13218789376.0,
            "2165": 13218789376.0,
            "2170": 13218789376.0,
            "2175": 13218789376.0,
            "2180": 13218789376.0,
            "2185": 13218789376.0,
            "2190": 13218789376.0,
            "2195": 13218789376.0,
            "2200": 13218789376.0,
            "2205": 13218789376.0,
            "2210": 13218789376.0,
            "2215": 13218789376.0,
            "2220": 13218789376.0,
            "2225": 13218789376.0,
            "2230": 13218789376.0,
            "2235": 13218789376.0,
            "2240": 13218789376.0,
            "2245": 13218789376.0,
            "2250": 13218789376.0,
            "2255": 13218789376.0,
            "2260": 13218789376.0,
            "2265": 13218789376.0,
            "2270": 13218789376.0,
            "2275": 13218789376.0,
            "2280": 13218789376.0,
            "2285": 13218789376.0,
            "2290": 13218789376.0,
            "2295": 13218789376.0,
            "2300": 13218789376.0,
            "2305": 13218789376.0,
            "2310": 13218789376.0,
            "2315": 13218789376.0,
            "2320": 13218789376.0,
            "2325": 13218789376.0,
            "2330": 13218789376.0,
            "2335": 13218789376.0,
            "2340": 13218789376.0,
            "2345": 13218789376.0,
            "2350": 13218789376.0,
            "2355": 13218789376.0,
            "2360": 13218789376.0,
            "2365": 13218789376.0,
            "2370": 13218789376.0,
            "2375": 13218789376.0,
            "2380": 13218789376.0,
            "2385": 13218789376.0,
            "2390": 13218789376.0,
            "2395": 13218789376.0,
            "2400": 13218789376.0,
            "2405": 13218789376.0,
            "2410": 13218789376.0,
            "2415": 13218789376.0,
            "2420": 13218789376.0,
            "2425": 13218789376.0,
            "2430": 13218789376.0,
            "2435": 13218789376.0,
            "2440": 13218789376.0,
            "2445": 13218789376.0,
            "2450": 13218789376.0,
            "2455": 13218789376.0,
            "2460": 13218789376.0,
            "2465": 13218789376.0,
            "2470": 13218789376.0,
            "2475": 13218789376.0,
            "2480": 13218789376.0,
            "2485": 13218789376.0,
            "2490": 13218789376.0,
            "2495": 13218789376.0,
            "2500": 13218789376.0,
            "2505": 13218789376.0,
            "2510": 13218789376.0,
            "2515": 13218789376.0,
            "2520": 13218789376.0,
            "2525": 13218789376.0,
            "2530": 13218789376.0,
            "2535": 13218789376.0,
            "2540": 13218789376.0,
            "2545": 13218789376.0,
            "2550": 13218789376.0,
            "2555": 13218789376.0,
            "2560": 13218789376.0,
            "2565": 13218789376.0,
            "2570": 13218789376.0,
            "2575": 13218789376.0,
            "2580": 13218789376.0,
            "2585": 13218789376.0,
            "2590": 13218789376.0,
            "2595": 13218789376.0,
            "2600": 13218789376.0,
            "2605": 13218789376.0,
            "2610": 13218789376.0,
            "2615": 13218789376.0,
            "2620": 13218789376.0,
            "2625": 13218789376.0,
            "2630": 13218789376.0,
            "2635": 13218789376.0,
            "2640": 13218789376.0,
            "2645": 13218789376.0,
            "2650": 13218789376.0,
            "2655": 13218789376.0,
            "2660": 13218789376.0,
            "2665": 13218789376.0,
            "2670": 13218789376.0,
            "2675": 13218789376.0,
            "2680": 13218789376.0,
            "2685": 13218789376.0,
            "2690": 13218789376.0,
            "2695": 13218789376.0,
            "2700": 13218789376.0,
            "2705": 13218789376.0,
            "2710": 13218789376.0,
            "2715": 13218789376.0,
            "2720": 13218789376.0,
            "2725": 13218789376.0,
            "2730": 13218789376.0,
            "2735": 13218789376.0,
            "2740": 13218789376.0,
            "2745": 13218789376.0,
            "2750": 13218789376.0,
            "2755": 13218789376.0,
            "2760": 13218789376.0,
            "2765": 13218789376.0,
            "2770": 13218789376.0,
            "2775": 13218789376.0,
            "2780": 13218789376.0,
            "2785": 13218789376.0,
            "2790": 13218789376.0,
            "2795": 13218789376.0,
            "2800": 13218789376.0,
            "2805": 13218789376.0,
            "2810": 13218789376.0,
            "2815": 13218789376.0,
            "2820": 13218789376.0,
            "2825": 13218789376.0,
            "2830": 13218789376.0,
            "2835": 13218789376.0,
            "2840": 13218789376.0,
            "2845": 13218789376.0,
            "2850": 13218789376.0,
            "2855": 13218789376.0,
            "2860": 13218789376.0,
            "2865": 13218789376.0,
            "2870": 13218789376.0,
            "2875": 13218789376.0,
            "2880": 13218789376.0,
            "2885": 13218789376.0,
            "2890": 13218789376.0,
            "2895": 13218789376.0,
            "2900": 13218789376.0,
            "2905": 13218789376.0,
            "2910": 13218789376.0,
            "2915": 13218789376.0,
            "2920": 13218789376.0,
            "2925": 13218789376.0,
            "2930": 13218789376.0,
            "2935": 13218789376.0,
            "2940": 13218789376.0,
            "2945": 13218789376.0,
            "2950": 13218789376.0,
            "2955": 13218789376.0,
            "2960": 13218789376.0,
            "2965": 13218789376.0,
            "2970": 13218789376.0,
            "2975": 13218789376.0,
            "2980": 13218789376.0,
            "2985": 13218789376.0,
            "2990": 13218789376.0,
            "2995": 13218789376.0,
            "3000": 13218789376.0,
            "3005": 13218789376.0,
            "3010": 13218789376.0,
            "3015": 13218789376.0,
            "3020": 13218789376.0,
            "3025": 13218789376.0,
            "3030": 13218789376.0,
            "3035": 13218789376.0,
            "3040": 13218789376.0,
            "3045": 13218789376.0,
            "3050": 13218789376.0,
            "3055": 13218789376.0,
            "3060": 13218789376.0,
            "3065": 13218789376.0,
            "3070": 13218789376.0,
            "3075": 13218789376.0,
            "3080": 13218789376.0,
            "3085": 13218789376.0,
            "3090": 13218789376.0,
            "3095": 13218789376.0,
            "3100": 13218789376.0,
            "3105": 13218789376.0,
            "3110": 13218789376.0,
            "3115": 13218789376.0,
            "3120": 13218789376.0,
            "3125": 13218789376.0,
            "3130": 13218789376.0,
            "3135": 13218789376.0,
            "3140": 13218789376.0,
            "3145": 13218789376.0,
            "3150": 13218789376.0,
            "3155": 13218789376.0,
            "3160": 13218789376.0,
            "3165": 13218789376.0,
            "3170": 13218789376.0,
            "3175": 13218789376.0,
            "3180": 13218789376.0,
            "3185": 13218789376.0,
            "3190": 13218789376.0,
            "3195": 13218789376.0,
            "3200": 13218789376.0,
            "3205": 13218789376.0,
            "3210": 13218789376.0,
            "3215": 13218789376.0,
            "3220": 13218789376.0,
            "3225": 13218789376.0,
            "3230": 13218789376.0,
            "3235": 13218789376.0,
            "3240": 13218789376.0,
            "3245": 13218789376.0,
            "3250": 13218789376.0,
            "3255": 13218789376.0,
            "3260": 13218789376.0,
            "3265": 13218789376.0,
            "3270": 13218789376.0,
            "3275": 13218789376.0,
            "3280": 13218789376.0,
            "3285": 13218789376.0,
            "3290": 13218789376.0,
            "3295": 13218789376.0,
            "3300": 13218789376.0,
            "3305": 13218789376.0,
            "3310": 13218789376.0,
            "3315": 13218789376.0,
            "3320": 13218789376.0,
            "3325": 13218789376.0,
            "3330": 13218789376.0,
            "3335": 13218789376.0,
            "3340": 13218789376.0,
            "3345": 13218789376.0,
            "3350": 13218789376.0,
            "3355": 13218789376.0,
            "3360": 13218789376.0,
            "3365": 13218789376.0,
            "3370": 13218789376.0,
            "3375": 13218789376.0,
            "3380": 13218789376.0,
            "3385": 13218789376.0,
            "3390": 13218789376.0,
            "3395": 13218789376.0,
            "3400": 13218789376.0,
            "3405": 13218789376.0,
            "3410": 13218789376.0,
            "3415": 13218789376.0,
            "3420": 13218789376.0,
            "3425": 13218789376.0,
            "3430": 13218789376.0,
            "3435": 13218789376.0,
            "3440": 13218789376.0,
            "3445": 13218789376.0,
            "3450": 13218789376.0,
            "3455": 13218789376.0,
            "3460": 13218789376.0,
            "3465": 13218789376.0,
            "3470": 13218789376.0,
            "3475": 13218789376.0,
            "3480": 13218789376.0,
            "3485": 13218789376.0,
            "3490": 13218789376.0,
            "3495": 13218789376.0,
            "3500": 13218789376.0,
            "3505": 13218789376.0,
            "3510": 13218789376.0,
            "3515": 13218789376.0,
            "3520": 13218789376.0,
            "3525": 13218789376.0,
            "3530": 13218789376.0,
            "3535": 13218789376.0,
            "3540": 13218789376.0,
            "3545": 13218789376.0,
            "3550": 13218789376.0,
            "3555": 13218789376.0,
            "3560": 13218789376.0,
            "3565": 13218789376.0,
            "3570": 13218789376.0,
            "3575": 13218789376.0,
            "3580": 13218789376.0,
            "3585": 13218789376.0,
            "3590": 13218789376.0,
            "3595": 13218789376.0,
            "3600": 13218789376.0,
            "3605": 13218789376.0,
            "3610": 13218789376.0,
            "3615": 13218789376.0,
            "3620": 13218789376.0,
            "3625": 13218789376.0,
            "3630": 13218789376.0,
            "3635": 13218789376.0,
            "3640": 13218789376.0,
            "3645": 13218789376.0,
            "3650": 13218789376.0,
            "3655": 13218789376.0,
            "3660": 13218789376.0,
            "3665": 13218789376.0,
            "3670": 13218789376.0,
            "3675": 13218789376.0,
            "3680": 13218789376.0,
            "3685": 13218789376.0,
            "3690": 13218789376.0,
            "3695": 13218789376.0,
            "3700": 13218789376.0,
            "3705": 13218789376.0,
            "3710": 13218789376.0,
            "3715": 13218789376.0,
            "3720": 13218789376.0,
            "3725": 13218789376.0,
            "3730": 13218789376.0,
            "3735": 13218789376.0,
            "3740": 13218789376.0,
            "3745": 13218789376.0,
            "3750": 13218789376.0,
            "3755": 13218789376.0,
            "3760": 13218789376.0,
            "3765": 13218789376.0,
            "3770": 13218789376.0,
            "3775": 13218789376.0,
            "3780": 13218789376.0,
            "3785": 13218789376.0,
            "3790": 13218789376.0,
            "3795": 13218789376.0,
            "3800": 13218789376.0,
            "3805": 13218789376.0,
            "3810": 13218789376.0,
            "3815": 13218789376.0,
            "3820": 13218789376.0,
            "3825": 13218789376.0,
            "3830": 13218789376.0,
            "3835": 13218789376.0,
            "3840": 13218789376.0,
            "3845": 13218789376.0,
            "3850": 13218789376.0,
            "3855": 13218789376.0,
            "3860": 13218789376.0,
            "3865": 13218789376.0,
            "3870": 13218789376.0,
            "3875": 13218789376.0,
            "3880": 13218789376.0,
            "3885": 13218789376.0,
            "3890": 13218789376.0,
            "3895": 13218789376.0,
            "3900": 13218789376.0,
            "3905": 13218789376.0,
            "3910": 13218789376.0,
            "3915": 13218789376.0,
            "3920": 13218789376.0,
            "3925": 13218789376.0,
            "3930": 13218789376.0,
            "3935": 13218789376.0,
            "3940": 13218789376.0,
            "3945": 13218789376.0,
            "3950": 13218789376.0,
            "3955": 13218789376.0,
            "3960": 13218789376.0,
            "3965": 13218789376.0,
            "3970": 13218789376.0,
            "3975": 13218789376.0,
            "3980": 13218789376.0,
            "3985": 13218789376.0,
            "3990": 13218789376.0,
            "3995": 13218789376.0,
            "4000": 13218789376.0,
            "4005": 13218789376.0,
            "4010": 13218789376.0,
            "4015": 13218789376.0,
            "4020": 13218789376.0,
            "4025": 13218789376.0,
            "4030": 13218789376.0,
            "4035": 13218789376.0,
            "4040": 13218789376.0,
            "4045": 13218789376.0,
            "4050": 13218789376.0,
            "4055": 13218789376.0,
            "4060": 13218789376.0,
            "4065": 13218789376.0,
            "4070": 13218789376.0,
            "4075": 13218789376.0,
            "4080": 13218789376.0,
            "4085": 13218789376.0,
            "4090": 13218789376.0,
            "4095": 13218789376.0,
            "4100": 13218789376.0,
            "4105": 13218789376.0,
            "4110": 13218789376.0,
            "4115": 13218789376.0,
            "4120": 13218789376.0,
            "4125": 13218789376.0,
            "4130": 13218789376.0,
            "4135": 13218789376.0,
            "4140": 13218789376.0,
            "4145": 13218789376.0,
            "4150": 13218789376.0,
            "4155": 13218789376.0,
            "4160": 13218789376.0,
            "4165": 13218789376.0,
            "4170": 13218789376.0,
            "4175": 13218789376.0,
            "4180": 13218789376.0,
            "4185": 13218789376.0,
            "4190": 13218789376.0,
            "4195": 13218789376.0,
            "4200": 13218789376.0,
            "4205": 13218789376.0,
            "4210": 13218789376.0,
            "4215": 13218789376.0,
            "4220": 13218789376.0,
            "4225": 13218789376.0,
            "4230": 13218789376.0,
            "4235": 13218789376.0,
            "4240": 13218789376.0,
            "4245": 13218789376.0,
            "4250": 13218789376.0,
            "4255": 13218789376.0,
            "4260": 13218789376.0,
            "4265": 13218789376.0,
            "4270": 13218789376.0,
            "4275": 13218789376.0,
            "4280": 13218789376.0,
            "4285": 13218789376.0,
            "4290": 13218789376.0,
            "4295": 13218789376.0,
            "4300": 13218789376.0,
            "4305": 13218789376.0,
            "4310": 13218789376.0,
            "4315": 13218789376.0,
            "4320": 13218789376.0,
            "4325": 13218789376.0,
            "4330": 13218789376.0,
            "4335": 13218789376.0,
            "4340": 13218789376.0,
            "4345": 13218789376.0,
            "4350": 13218789376.0,
            "4355": 13218789376.0,
            "4360": 13218789376.0,
            "4365": 13218789376.0,
            "4370": 13218789376.0,
            "4375": 13218789376.0,
            "4380": 13218789376.0,
            "4385": 13218789376.0,
            "4390": 13218789376.0,
            "4395": 13218789376.0,
            "4400": 13218789376.0,
            "4405": 13218789376.0,
            "4410": 13218789376.0,
            "4415": 13218789376.0,
            "4420": 13218789376.0,
            "4425": 13218789376.0,
            "4430": 13218789376.0,
            "4435": 13218789376.0,
            "4440": 13218789376.0,
            "4445": 13218789376.0,
            "4450": 13218789376.0,
            "4455": 13218789376.0,
            "4460": 13218789376.0,
            "4465": 13218789376.0,
            "4470": 13218789376.0,
            "4475": 13218789376.0,
            "4480": 13218789376.0,
            "4485": 13218789376.0,
            "4490": 13218789376.0,
            "4495": 13218789376.0,
            "4500": 13218789376.0,
            "4505": 13218789376.0,
            "4510": 13218789376.0,
            "4515": 13218789376.0,
            "4520": 13218789376.0,
            "4525": 13218789376.0,
            "4530": 13218789376.0,
            "4535": 13218789376.0,
            "4540": 13218789376.0,
            "4545": 13218789376.0,
            "4550": 13218789376.0,
            "4555": 13218789376.0,
            "4560": 13218789376.0,
            "4565": 13218789376.0,
            "4570": 13218789376.0,
            "4575": 13218789376.0,
            "4580": 13218789376.0,
            "4585": 13218789376.0,
            "4590": 13218789376.0,
            "4595": 13218789376.0,
            "4600": 13218789376.0,
            "4605": 13218789376.0,
            "4610": 13218789376.0,
            "4615": 13218789376.0,
            "4620": 13218789376.0,
            "4625": 13218789376.0,
            "4630": 13218789376.0,
            "4635": 13218789376.0,
            "4640": 13218789376.0,
            "4645": 13218789376.0,
            "4650": 13218789376.0,
            "4655": 13218789376.0,
            "4660": 13218789376.0,
            "4665": 13218789376.0,
            "4670": 13218789376.0,
            "4675": 13218789376.0,
            "4680": 13218789376.0,
            "4685": 13218789376.0,
            "4690": 13218789376.0,
            "4695": 13218789376.0,
            "4700": 13218789376.0,
            "4705": 13218789376.0,
            "4710": 13218789376.0,
            "4715": 13218789376.0,
            "4720": 13218789376.0,
            "4725": 13218789376.0,
            "4730": 13218789376.0,
            "4735": 13218789376.0,
            "4740": 13218789376.0,
            "4745": 13218789376.0,
            "4750": 13218789376.0,
            "4755": 13218789376.0,
            "4760": 13218789376.0,
            "4765": 13218789376.0,
            "4770": 13218789376.0,
            "4775": 13218789376.0,
            "4780": 13218789376.0,
            "4785": 13218789376.0,
            "4790": 13218789376.0,
            "4795": 13218789376.0,
            "4800": 13218789376.0,
            "4805": 13218789376.0,
            "4810": 13218789376.0,
            "4815": 13218789376.0,
            "4820": 13218789376.0,
            "4825": 13218789376.0,
            "4830": 13218789376.0,
            "4835": 13218789376.0,
            "4840": 13218789376.0,
            "4845": 13218789376.0,
            "4850": 13218789376.0,
            "4855": 13218789376.0,
            "4860": 13218789376.0,
            "4865": 13218789376.0,
            "4870": 13218789376.0,
            "4875": 13218789376.0,
            "4880": 13218789376.0,
            "4885": 13218789376.0,
            "4890": 13218789376.0,
            "4895": 13218789376.0,
            "4900": 13218789376.0,
            "4905": 13218789376.0,
            "4910": 13218789376.0,
            "4915": 13218789376.0,
            "4920": 13218789376.0,
            "4925": 13218789376.0,
            "4930": 13218789376.0,
            "4935": 13218789376.0,
            "4940": 13218789376.0,
            "4945": 13218789376.0,
            "4950": 13218789376.0,
            "4955": 13218789376.0,
            "4960": 13218789376.0,
            "4965": 13218789376.0,
            "4970": 13218789376.0,
            "4975": 13218789376.0,
            "4980": 13218789376.0,
            "4985": 13218789376.0,
            "4990": 13218789376.0,
            "4995": 13218789376.0,
            "5000": 13218789376.0,
            "5005": 13218789376.0,
            "5010": 13218789376.0,
            "5015": 13218789376.0,
            "5020": 13218789376.0,
            "5025": 13218789376.0,
            "5030": 13218789376.0,
            "5035": 13218789376.0,
            "5040": 13218789376.0,
            "5045": 13218789376.0,
            "5050": 13218789376.0,
            "5055": 13218789376.0,
            "5060": 13218789376.0,
            "5065": 13218789376.0,
            "5070": 13218789376.0,
            "5075": 13218789376.0,
            "5080": 13218789376.0,
            "5085": 13218789376.0,
            "5090": 13218789376.0,
            "5095": 13218789376.0,
            "5100": 13218789376.0,
            "5105": 13218789376.0,
            "5110": 13218789376.0,
            "5115": 13218789376.0,
            "5120": 13218789376.0,
            "5125": 13218789376.0,
            "5130": 13218789376.0,
            "5135": 13218789376.0,
            "5140": 13218789376.0,
            "5145": 13218789376.0,
            "5150": 13218789376.0,
            "5155": 13218789376.0,
            "5160": 13218789376.0,
            "5165": 13218789376.0,
            "5170": 13218789376.0,
            "5175": 13218789376.0,
            "5180": 13218789376.0,
            "5185": 13218789376.0,
            "5190": 13218789376.0,
            "5195": 13218789376.0,
            "5200": 13218789376.0,
            "5205": 13218789376.0,
            "5210": 13218789376.0,
            "5215": 13218789376.0,
            "5220": 13218789376.0,
            "5225": 13218789376.0,
            "5230": 13218789376.0,
            "5235": 13218789376.0,
            "5240": 13218789376.0,
            "5245": 13218789376.0,
            "5250": 13218789376.0,
            "5255": 13218789376.0,
            "5260": 13218789376.0,
            "5265": 13218789376.0,
            "5270": 13218789376.0,
            "5275": 13218789376.0,
            "5280": 13218789376.0,
            "5285": 13218789376.0,
            "5290": 13218789376.0,
            "5295": 13218789376.0,
            "5300": 13218789376.0,
            "5305": 13218789376.0,
            "5310": 13218789376.0,
            "5315": 13218789376.0,
            "5320": 13218789376.0,
            "5325": 13218789376.0,
            "5330": 13218789376.0,
            "5335": 13218789376.0,
            "5340": 13218789376.0,
            "5345": 13218789376.0,
            "5350": 13218789376.0,
            "5355": 13218789376.0,
            "5360": 13218789376.0,
            "5365": 13218789376.0,
            "5370": 13218789376.0,
            "5375": 13218789376.0,
            "5380": 13218789376.0,
            "5385": 13218789376.0,
            "5390": 13218789376.0,
            "5395": 13218789376.0,
            "5400": 13218789376.0,
            "5405": 13218789376.0,
            "5410": 13218789376.0,
            "5415": 13218789376.0,
            "5420": 13218789376.0,
            "5425": 13218789376.0,
            "5430": 13218789376.0,
            "5435": 13218789376.0,
            "5440": 13218789376.0,
            "5445": 13218789376.0,
            "5450": 13218789376.0,
            "5455": 13218789376.0,
            "5460": 13218789376.0,
            "5465": 13218789376.0,
            "5470": 13218789376.0,
            "5475": 13218789376.0,
            "5480": 13218789376.0,
            "5485": 13218789376.0,
            "5490": 13218789376.0,
            "5495": 13218789376.0,
            "5500": 13218789376.0,
            "5505": 13218789376.0,
            "5510": 13218789376.0,
            "5515": 13218789376.0,
            "5520": 13218789376.0,
            "5525": 13218789376.0,
            "5530": 13218789376.0,
            "5535": 13218789376.0,
            "5540": 13218789376.0,
            "5545": 13218789376.0,
            "5550": 13218789376.0,
            "5555": 13218789376.0,
            "5560": 13218789376.0,
            "5565": 13218789376.0,
            "5570": 13218789376.0,
            "5575": 13218789376.0,
            "5580": 13218789376.0,
            "5585": 13218789376.0,
            "5590": 13218789376.0,
            "5595": 13218789376.0,
            "5600": 13218789376.0,
            "5605": 13218789376.0,
            "5610": 13218789376.0,
            "5615": 13218789376.0,
            "5620": 13218789376.0,
            "5625": 13218789376.0,
            "5630": 13218789376.0,
            "5635": 13218789376.0,
            "5640": 13218789376.0,
            "5645": 13218789376.0,
            "5650": 13218789376.0,
            "5655": 13218789376.0,
            "5660": 13218789376.0,
            "5665": 13218789376.0,
            "5670": 13218789376.0,
            "5675": 13218789376.0,
            "5680": 13218789376.0,
            "5685": 13218789376.0,
            "5690": 13218789376.0,
            "5695": 13218789376.0,
            "5700": 13218789376.0,
            "5705": 13218789376.0,
            "5710": 13218789376.0,
            "5715": 13218789376.0,
            "5720": 13218789376.0,
            "5725": 13218789376.0,
            "5730": 13218789376.0,
            "5735": 13218789376.0,
            "5740": 13218789376.0,
            "5745": 13218789376.0,
            "5750": 13218789376.0,
            "5755": 13218789376.0,
            "5760": 13218789376.0,
            "5765": 13218789376.0,
            "5770": 13218789376.0,
            "5775": 13218789376.0,
            "5780": 13218789376.0,
            "5785": 13218789376.0,
            "5790": 13218789376.0,
            "5795": 13218789376.0,
            "5800": 13218789376.0,
            "5805": 13218789376.0,
            "5810": 13218789376.0,
            "5815": 13218789376.0,
            "5820": 13218789376.0,
            "5825": 13218789376.0,
            "5830": 13218789376.0,
            "5835": 13218789376.0,
            "5840": 13218789376.0,
            "5845": 13218789376.0,
            "5850": 13218789376.0,
            "5855": 13218789376.0,
            "5860": 13218789376.0,
            "5865": 13218789376.0,
            "5870": 13218789376.0,
            "5875": 13218789376.0,
            "5880": 13218789376.0,
            "5885": 13218789376.0,
            "5890": 13218789376.0,
            "5895": 13218789376.0,
            "5900": 13218789376.0,
            "5905": 13218789376.0,
            "5910": 13218789376.0,
            "5915": 13218789376.0,
            "5920": 13218789376.0,
            "5925": 13218789376.0,
            "5930": 13218789376.0,
            "5935": 13218789376.0,
            "5940": 13218789376.0,
            "5945": 13218789376.0,
            "5950": 13218789376.0,
            "5955": 13218789376.0,
            "5960": 13218789376.0,
            "5965": 13218789376.0,
            "5970": 13218789376.0,
            "5975": 13218789376.0,
            "5980": 13218789376.0,
            "5985": 13218789376.0,
            "5990": 13218789376.0,
            "5995": 13218789376.0,
            "6000": 13218789376.0,
            "6005": 13218789376.0,
            "6010": 13218789376.0,
            "6015": 13218789376.0,
            "6020": 13218789376.0,
            "6025": 13218789376.0,
            "6030": 13218789376.0,
            "6035": 13218789376.0,
            "6040": 13218789376.0,
            "6045": 13218789376.0,
            "6050": 13218789376.0,
            "6055": 13218789376.0,
            "6060": 13218789376.0,
            "6065": 13218789376.0,
            "6070": 13218789376.0,
            "6075": 13218789376.0,
            "6080": 13218789376.0,
            "6085": 13218789376.0,
            "6090": 13218789376.0,
            "6095": 13218789376.0,
            "6100": 13218789376.0,
            "6105": 13218789376.0,
            "6110": 13218789376.0,
            "6115": 13218789376.0,
            "6120": 13218789376.0,
            "6125": 13218789376.0,
            "6130": 13218789376.0,
            "6135": 13218789376.0,
            "6140": 13218789376.0,
            "6145": 13218789376.0,
            "6150": 13218789376.0,
            "6155": 13218789376.0,
            "6160": 13218789376.0,
            "6165": 13218789376.0,
            "6170": 13218789376.0,
            "6175": 13218789376.0,
            "6180": 13218789376.0,
            "6185": 13218789376.0,
            "6190": 13218789376.0,
            "6195": 13218789376.0,
            "6200": 13218789376.0,
            "6205": 13218789376.0,
            "6210": 13218789376.0,
            "6215": 13218789376.0,
            "6220": 13218789376.0,
            "6225": 13218789376.0,
            "6230": 13218789376.0,
            "6235": 13218789376.0,
            "6240": 13218789376.0,
            "6245": 13218789376.0,
            "6250": 13218789376.0,
            "6255": 13218789376.0,
            "6260": 13218789376.0,
            "6265": 13218789376.0,
            "6270": 13218789376.0,
            "6275": 13218789376.0,
            "6280": 13218789376.0,
            "6285": 13218789376.0,
            "6290": 13218789376.0,
            "6295": 13218789376.0,
            "6300": 13218789376.0,
            "6305": 13218789376.0,
            "6310": 13218789376.0,
            "6315": 13218789376.0,
            "6320": 13218789376.0,
            "6325": 13218789376.0,
            "6330": 13218789376.0,
            "6335": 13218789376.0,
            "6340": 13218789376.0,
            "6345": 13218789376.0,
            "6350": 13218789376.0,
            "6355": 13218789376.0,
            "6360": 13218789376.0,
            "6365": 13218789376.0,
            "6370": 13218789376.0,
            "6375": 13218789376.0,
            "6380": 13218789376.0,
            "6385": 13218789376.0,
            "6390": 13218789376.0,
            "6395": 13218789376.0,
            "6400": 13218789376.0,
            "6405": 13218789376.0,
            "6410": 13218789376.0,
            "6415": 13218789376.0,
            "6420": 13218789376.0,
            "6425": 13218789376.0,
            "6430": 13218789376.0,
            "6435": 13218789376.0,
            "6440": 13218789376.0,
            "6445": 13218789376.0,
            "6450": 13218789376.0,
            "6455": 13218789376.0,
            "6460": 13218789376.0,
            "6465": 13218789376.0,
            "6470": 13218789376.0,
            "6475": 13218789376.0,
            "6480": 13218789376.0,
            "6485": 13218789376.0,
            "6490": 13218789376.0,
            "6495": 13218789376.0,
            "6500": 13218789376.0,
            "6505": 13218789376.0,
            "6510": 13218789376.0,
            "6515": 13218789376.0,
            "6520": 13218789376.0,
            "6525": 13218789376.0,
            "6530": 13218789376.0,
            "6535": 13218789376.0,
            "6540": 13218789376.0,
            "6545": 13218789376.0,
            "6550": 13218789376.0,
            "6555": 13218789376.0,
            "6560": 13218789376.0,
            "6565": 13218789376.0,
            "6570": 13218789376.0,
            "6575": 13218789376.0,
            "6580": 13218789376.0,
            "6585": 13218789376.0,
            "6590": 13218789376.0,
            "6595": 13218789376.0,
            "6600": 13218789376.0,
            "6605": 13218789376.0,
            "6610": 13218789376.0,
            "6615": 13218789376.0,
            "6620": 13218789376.0,
            "6625": 13218789376.0,
            "6630": 13218789376.0,
            "6635": 13218789376.0,
            "6640": 13218789376.0,
            "6645": 13218789376.0,
            "6650": 13218789376.0,
            "6655": 13218789376.0,
            "6660": 13218789376.0,
            "6665": 13218789376.0,
            "6670": 13218789376.0,
            "6675": 13218789376.0,
            "6680": 13218789376.0,
            "6685": 13218789376.0,
            "6690": 13218789376.0,
            "6695": 13218789376.0,
            "6700": 13218789376.0,
            "6705": 13218789376.0,
            "6710": 13218789376.0,
            "6715": 13218789376.0,
            "6720": 13218789376.0,
            "6725": 13218789376.0,
            "6730": 13218789376.0,
            "6735": 13218789376.0,
            "6740": 13218789376.0,
            "6745": 13218789376.0,
            "6750": 13218789376.0,
            "6755": 13218789376.0,
            "6760": 13218789376.0,
            "6765": 13218789376.0,
            "6770": 13218789376.0,
            "6775": 13218789376.0,
            "6780": 13218789376.0,
            "6785": 13218789376.0,
            "6790": 13218789376.0,
            "6795": 13218789376.0,
            "6800": 13218789376.0,
            "6805": 13218789376.0,
            "6810": 13218789376.0,
            "6815": 13218789376.0,
            "6820": 13218789376.0,
            "6825": 13218789376.0,
            "6830": 13218789376.0,
            "6835": 13218789376.0,
            "6840": 13218789376.0,
            "6845": 13218789376.0,
            "6850": 13218789376.0,
            "6855": 13218789376.0,
            "6860": 13218789376.0,
            "6865": 13218789376.0,
            "6870": 13218789376.0,
            "6875": 13218789376.0,
            "6880": 13218789376.0,
            "6885": 13218789376.0,
            "6890": 13218789376.0,
            "6895": 13218789376.0,
            "6900": 13218789376.0,
            "6905": 13218789376.0,
            "6910": 13218789376.0,
            "6915": 13218789376.0,
            "6920": 13218789376.0,
            "6925": 13218789376.0,
            "6930": 13218789376.0,
            "6935": 13218789376.0,
            "6940": 13218789376.0,
            "6945": 13218789376.0,
            "6950": 13218789376.0,
            "6955": 13218789376.0,
            "6960": 13218789376.0,
            "6965": 13218789376.0,
            "6970": 13218789376.0,
            "6975": 13218789376.0,
            "6980": 13218789376.0,
            "6985": 13218789376.0,
            "6990": 13218789376.0,
            "6995": 13218789376.0,
            "7000": 13218789376.0,
            "7005": 13218789376.0,
            "7010": 13218789376.0,
            "7015": 13218789376.0,
            "7020": 13218789376.0,
            "7025": 13218789376.0,
            "7030": 13218789376.0,
            "7035": 13218789376.0,
            "7040": 13218789376.0,
            "7045": 13218789376.0,
            "7050": 13218789376.0,
            "7055": 13218789376.0,
            "7060": 13218789376.0,
            "7065": 13218789376.0,
            "7070": 13218789376.0,
            "7075": 13218789376.0,
            "7080": 13218789376.0,
            "7085": 13218789376.0,
            "7090": 13218789376.0,
            "7095": 13218789376.0,
            "7100": 13218789376.0,
            "7105": 13218789376.0,
            "7110": 13218789376.0,
            "7115": 13218789376.0,
            "7120": 13218789376.0,
            "7125": 13218789376.0,
            "7130": 13218789376.0,
            "7135": 13218789376.0,
            "7140": 13218789376.0,
            "7145": 13218789376.0,
            "7150": 13218789376.0,
            "7155": 13218789376.0,
            "7160": 13218789376.0,
            "7165": 13218789376.0,
            "7170": 13218789376.0,
            "7175": 13218789376.0,
            "7180": 13218789376.0,
            "7185": 13218789376.0,
            "7190": 13218789376.0,
            "7195": 13218789376.0,
            "7200": 13218789376.0,
            "7205": 13218789376.0,
            "7210": 13218789376.0,
            "7215": 13218789376.0,
            "7220": 13218789376.0,
            "7225": 13218789376.0,
            "7230": 13218789376.0,
            "7235": 13218789376.0,
            "7240": 13218789376.0,
            "7245": 13218789376.0,
            "7250": 13218789376.0,
            "7255": 13218789376.0,
            "7260": 13218789376.0,
            "7265": 13218789376.0,
            "7270": 13218789376.0,
            "7275": 13218789376.0,
            "7280": 13218789376.0,
            "7285": 13218789376.0,
            "7290": 13218789376.0,
            "7295": 13218789376.0,
            "7300": 13218789376.0,
            "7305": 13218789376.0,
            "7310": 13218789376.0,
            "7315": 13218789376.0,
            "7320": 13218789376.0,
            "7325": 13218789376.0,
            "7330": 13218789376.0,
            "7335": 13218789376.0,
            "7340": 13218789376.0,
            "7345": 13218789376.0,
            "7350": 13218789376.0,
            "7355": 13218789376.0,
            "7360": 13218789376.0,
            "7365": 13218789376.0,
            "7370": 13218789376.0,
            "7375": 13218789376.0,
            "7380": 13218789376.0,
            "7385": 13218789376.0,
            "7390": 13218789376.0,
            "7395": 13218789376.0,
            "7400": 13218789376.0,
            "7405": 13218789376.0,
            "7410": 13218789376.0,
            "7415": 13218789376.0,
            "7420": 13218789376.0,
            "7425": 13218789376.0,
            "7430": 13218789376.0,
            "7435": 13218789376.0,
            "7440": 13218789376.0,
            "7445": 13218789376.0,
            "7450": 13218789376.0,
            "7455": 13218789376.0,
            "7460": 13218789376.0,
            "7465": 13218789376.0,
            "7470": 13218789376.0,
            "7475": 13218789376.0,
            "7480": 13218789376.0,
            "7485": 13218789376.0,
            "7490": 13218789376.0,
            "7495": 13218789376.0,
            "7500": 13218789376.0,
            "7505": 13218789376.0,
            "7510": 13218789376.0,
            "7515": 13218789376.0,
            "7520": 13218789376.0,
            "7525": 13218789376.0,
            "7530": 13218789376.0,
            "7535": 13218789376.0,
            "7540": 13218789376.0,
            "7545": 13218789376.0,
            "7550": 13218789376.0,
            "7555": 13218789376.0,
            "7560": 13218789376.0,
            "7565": 13218789376.0,
            "7570": 13218789376.0,
            "7575": 13218789376.0,
            "7580": 13218789376.0,
            "7585": 13218789376.0,
            "7590": 13218789376.0,
            "7595": 13218789376.0,
            "7600": 13218789376.0,
            "7605": 13218789376.0,
            "7610": 13218789376.0,
            "7615": 13218789376.0,
            "7620": 13218789376.0,
            "7625": 13218789376.0,
            "7630": 13218789376.0,
            "7635": 13218789376.0,
            "7640": 13218789376.0,
            "7645": 13218789376.0,
            "7650": 13218789376.0,
            "7655": 13218789376.0,
            "7660": 13218789376.0,
            "7665": 13218789376.0,
            "7670": 13218789376.0,
            "7675": 13218789376.0,
            "7680": 13218789376.0,
            "7685": 13218789376.0,
            "7690": 13218789376.0,
            "7695": 13218789376.0,
            "7700": 13218789376.0,
            "7705": 13218789376.0,
            "7710": 13218789376.0,
            "7715": 13218789376.0,
            "7720": 13218789376.0,
            "7725": 13218789376.0,
            "7730": 13218789376.0,
            "7735": 13218789376.0,
            "7740": 13218789376.0,
            "7745": 13218789376.0,
            "7750": 13218789376.0,
            "7755": 13218789376.0,
            "7760": 13218789376.0,
            "7765": 13218789376.0,
            "7770": 13218789376.0,
            "7775": 13218789376.0,
            "7780": 13218789376.0,
            "7785": 13218789376.0,
            "7790": 13218789376.0,
            "7795": 13218789376.0,
            "7800": 13218789376.0,
            "7805": 13218789376.0,
            "7810": 13218789376.0,
            "7815": 13218789376.0,
            "7820": 13218789376.0,
            "7825": 13218789376.0,
            "7830": 13218789376.0,
            "7835": 13218789376.0,
            "7840": 13218789376.0,
            "7845": 13218789376.0,
            "7850": 13218789376.0,
            "7855": 13218789376.0,
            "7860": 13218789376.0,
            "7865": 13218789376.0,
            "7870": 13218789376.0,
            "7875": 13218789376.0,
            "7880": 13218789376.0,
            "7885": 13218789376.0,
            "7890": 13218789376.0,
            "7895": 13218789376.0,
            "7900": 13218789376.0,
            "7905": 13218789376.0,
            "7910": 13218789376.0,
            "7915": 13218789376.0,
            "7920": 13218789376.0,
            "7925": 13218789376.0,
            "7930": 13218789376.0,
            "7935": 13218789376.0,
            "7940": 13218789376.0,
            "7945": 13218789376.0,
            "7950": 13218789376.0,
            "7955": 13218789376.0,
            "7960": 13218789376.0,
            "7965": 13218789376.0,
            "7970": 13218789376.0,
            "7975": 13218789376.0,
            "7980": 13218789376.0,
            "7985": 13218789376.0,
            "7990": 13218789376.0,
            "7995": 13218789376.0,
            "8000": 13218789376.0,
            "8005": 13218789376.0,
            "8010": 13218789376.0,
            "8015": 13218789376.0,
            "8020": 13218789376.0,
            "8025": 13218789376.0,
            "8030": 13218789376.0,
            "8035": 13218789376.0,
            "8040": 13218789376.0,
            "8045": 13218789376.0,
            "8050": 13218789376.0,
            "8055": 13218789376.0,
            "8060": 13218789376.0,
            "8065": 13218789376.0,
            "8070": 13218789376.0,
            "8075": 13218789376.0,
            "8080": 13218789376.0,
            "8085": 13218789376.0,
            "8090": 13218789376.0,
            "8095": 13218789376.0,
            "8100": 13218789376.0,
            "8105": 13218789376.0,
            "8110": 13218789376.0,
            "8115": 13218789376.0,
            "8120": 13218789376.0,
            "8125": 13218789376.0,
            "8130": 13218789376.0,
            "8135": 13218789376.0,
            "8140": 13218789376.0,
            "8145": 13218789376.0,
            "8150": 13218789376.0,
            "8155": 13218789376.0,
            "8160": 13218789376.0,
            "8165": 13218789376.0,
            "8170": 13218789376.0,
            "8175": 13218789376.0,
            "8180": 13218789376.0,
            "8185": 13218789376.0,
            "8190": 13218789376.0,
            "8195": 13218789376.0,
            "8200": 13218789376.0,
            "8205": 13218789376.0,
            "8210": 13218789376.0,
            "8215": 13218789376.0,
            "8220": 13218789376.0,
            "8225": 13218789376.0,
            "8230": 13218789376.0,
            "8235": 13218789376.0,
            "8240": 13218789376.0,
            "8245": 13218789376.0,
            "8250": 13218789376.0,
            "8255": 13218789376.0,
            "8260": 13218789376.0,
            "8265": 13218789376.0,
            "8270": 13218789376.0,
            "8275": 13218789376.0,
            "8280": 13218789376.0,
            "8285": 13218789376.0,
            "8290": 13218789376.0,
            "8295": 13218789376.0,
            "8300": 13218789376.0,
            "8305": 13218789376.0,
            "8310": 13218789376.0,
            "8315": 13218789376.0,
            "8320": 13218789376.0,
            "8325": 13218789376.0,
            "8330": 13218789376.0,
            "8335": 13218789376.0,
            "8340": 13218789376.0,
            "8345": 13218789376.0,
            "8350": 13218789376.0,
            "8355": 13218789376.0,
            "8360": 13218789376.0,
            "8365": 13218789376.0,
            "8370": 13218789376.0,
            "8375": 13218789376.0,
            "8380": 13218789376.0,
            "8385": 13218789376.0,
            "8390": 13218789376.0,
            "8395": 13218789376.0,
            "8400": 13218789376.0,
            "8405": 13218789376.0,
            "8410": 13218789376.0,
            "8415": 13218789376.0,
            "8420": 13218789376.0,
            "8425": 13218789376.0,
            "8430": 13218789376.0,
            "8435": 13218789376.0,
            "8440": 13218789376.0,
            "8445": 13218789376.0,
            "8450": 13218789376.0,
            "8455": 13218789376.0,
            "8460": 13218789376.0,
            "8465": 13218789376.0,
            "8470": 13218789376.0,
            "8475": 13218789376.0,
            "8480": 13218789376.0,
            "8485": 13218789376.0,
            "8490": 13218789376.0,
            "8495": 13218789376.0,
            "8500": 13218789376.0,
            "8505": 13218789376.0,
            "8510": 13218789376.0,
            "8515": 13218789376.0,
            "8520": 13218789376.0,
            "8525": 13218789376.0,
            "8530": 13218789376.0,
            "8535": 13218789376.0,
            "8540": 13218789376.0,
            "8545": 13218789376.0,
            "8550": 13218789376.0,
            "8555": 13218789376.0,
            "8560": 13218789376.0,
            "8565": 13218789376.0,
            "8570": 13218789376.0,
            "8575": 13218789376.0,
            "8580": 13218789376.0,
            "8585": 13218789376.0,
            "8590": 13218789376.0,
            "8595": 13218789376.0,
            "8600": 13218789376.0,
            "8605": 13218789376.0,
            "8610": 13218789376.0,
            "8615": 13218789376.0,
            "8620": 13218789376.0,
            "8625": 13218789376.0,
            "8630": 13218789376.0,
            "8635": 13218789376.0,
            "8640": 13218789376.0,
            "8645": 13218789376.0,
            "8650": 13218789376.0,
            "8655": 13218789376.0,
            "8660": 13218789376.0,
            "8665": 13218789376.0,
            "8670": 13218789376.0,
            "8675": 13218789376.0,
            "8680": 13218789376.0,
            "8685": 13218789376.0,
            "8690": 13218789376.0,
            "8695": 13218789376.0,
            "8700": 13218789376.0,
            "8705": 13218789376.0,
            "8710": 13218789376.0,
            "8715": 13218789376.0,
            "8720": 13218789376.0,
            "8725": 13218789376.0,
            "8730": 13218789376.0,
            "8735": 13218789376.0,
            "8740": 13218789376.0,
            "8745": 13218789376.0,
            "8750": 13218789376.0,
            "8755": 13218789376.0,
            "8760": 13218789376.0,
            "8765": 13218789376.0,
            "8770": 13218789376.0,
            "8775": 13218789376.0,
            "8780": 13218789376.0,
            "8785": 13218789376.0,
            "8790": 13218789376.0,
            "8795": 13218789376.0,
            "8800": 13218789376.0,
            "8805": 13218789376.0,
            "8810": 13218789376.0,
            "8815": 13218789376.0,
            "8820": 13218789376.0,
            "8825": 13218789376.0,
            "8830": 13218789376.0,
            "8835": 13218789376.0,
            "8840": 13218789376.0,
            "8845": 13218789376.0,
            "8850": 13218789376.0,
            "8855": 13218789376.0,
            "8860": 13218789376.0,
            "8865": 13218789376.0,
            "8870": 13218789376.0,
            "8875": 13218789376.0,
            "8880": 13218789376.0,
            "8885": 13218789376.0,
            "8890": 13218789376.0,
            "8895": 13218789376.0,
            "8900": 13218789376.0,
            "8905": 13218789376.0,
            "8910": 13218789376.0,
            "8915": 13218789376.0,
            "8920": 13218789376.0,
            "8925": 13218789376.0,
            "8930": 13218789376.0,
            "8935": 13218789376.0,
            "8940": 13218789376.0,
            "8945": 13218789376.0,
            "8950": 13218789376.0,
            "8955": 13218789376.0,
            "8960": 13218789376.0,
            "8965": 13218789376.0,
            "8970": 13218789376.0,
            "8975": 13218789376.0,
            "8980": 13218789376.0,
            "8985": 13218789376.0,
            "8990": 13218789376.0,
            "8995": 13218789376.0,
            "9000": 13218789376.0,
            "9005": 13218789376.0,
            "9010": 13218789376.0,
            "9015": 13218789376.0,
            "9020": 13218789376.0,
            "9025": 13218789376.0,
            "9030": 13218789376.0,
            "9035": 13218789376.0,
            "9040": 13218789376.0,
            "9045": 13218789376.0,
            "9050": 13218789376.0,
            "9055": 13218789376.0,
            "9060": 13218789376.0,
            "9065": 13218789376.0,
            "9070": 13218789376.0,
            "9075": 13218789376.0,
            "9080": 13218789376.0,
            "9085": 13218789376.0,
            "9090": 13218789376.0,
            "9095": 13218789376.0,
            "9100": 13218789376.0,
            "9105": 13218789376.0,
            "9110": 13218789376.0,
            "9115": 13218789376.0,
            "9120": 13218789376.0,
            "9125": 13218789376.0,
            "9130": 13218789376.0,
            "9135": 13218789376.0,
            "9140": 13218789376.0,
            "9145": 13218789376.0,
            "9150": 13218789376.0,
            "9155": 13218789376.0,
            "9160": 13218789376.0,
            "9165": 13218789376.0,
            "9170": 13218789376.0,
            "9175": 13218789376.0,
            "9180": 13218789376.0,
            "9185": 13218789376.0,
            "9190": 13218789376.0,
            "9195": 13218789376.0,
            "9200": 13218789376.0,
            "9205": 13218789376.0,
            "9210": 13218789376.0,
            "9215": 13218789376.0,
            "9220": 13218789376.0,
            "9225": 13218789376.0,
            "9230": 13218789376.0,
            "9235": 13218789376.0,
            "9240": 13218789376.0,
            "9245": 13218789376.0,
            "9250": 13218789376.0,
            "9255": 13218789376.0,
            "9260": 13218789376.0,
            "9265": 13218789376.0,
            "9270": 13218789376.0,
            "9275": 13218789376.0,
            "9280": 13218789376.0,
            "9285": 13218789376.0,
            "9290": 13218789376.0,
            "9295": 13218789376.0,
            "9300": 13218789376.0,
            "9305": 13218789376.0,
            "9310": 13218789376.0,
            "9315": 13218789376.0,
            "9320": 13218789376.0,
            "9325": 13218789376.0,
            "9330": 13218789376.0,
            "9335": 13218789376.0,
            "9340": 13218789376.0,
            "9345": 13218789376.0,
            "9350": 13218789376.0,
            "9355": 13218789376.0,
            "9360": 13218789376.0,
            "9365": 13218789376.0,
            "9370": 13218789376.0,
            "9375": 13218789376.0,
            "9380": 13218789376.0,
            "9385": 13218789376.0,
            "9390": 13218789376.0,
            "9395": 13218789376.0,
            "9400": 13218789376.0,
            "9405": 13218789376.0,
            "9410": 13218789376.0,
            "9415": 13218789376.0,
            "9420": 13218789376.0,
            "9425": 13218789376.0,
            "9430": 13218789376.0,
            "9435": 13218789376.0,
            "9440": 13218789376.0,
            "9445": 13218789376.0,
            "9450": 13218789376.0,
            "9455": 13218789376.0,
            "9460": 13218789376.0,
            "9465": 13218789376.0,
            "9470": 13218789376.0,
            "9475": 13218789376.0,
            "9480": 13218789376.0,
            "9485": 13218789376.0,
            "9490": 13218789376.0,
            "9495": 13218789376.0,
            "9500": 13218789376.0,
            "9505": 13218789376.0,
            "9510": 13218789376.0,
            "9515": 13218789376.0,
            "9520": 13218789376.0,
            "9525": 13218789376.0,
            "9530": 13218789376.0,
            "9535": 13218789376.0,
            "9540": 13218789376.0,
            "9545": 13218789376.0,
            "9550": 13218789376.0,
            "9555": 13218789376.0,
            "9560": 13218789376.0,
            "9565": 13218789376.0,
            "9570": 13218789376.0,
            "9575": 13218789376.0,
            "9580": 13218789376.0,
            "9585": 13218789376.0,
            "9590": 13218789376.0,
            "9595": 13218789376.0,
            "9600": 13218789376.0,
            "9605": 13218789376.0,
            "9610": 13218789376.0,
            "9615": 13218789376.0,
            "9620": 13218789376.0,
            "9625": 13218789376.0,
            "9630": 13218789376.0,
            "9635": 13218789376.0,
            "9640": 13218789376.0,
            "9645": 13218789376.0,
            "9650": 13218789376.0,
            "9655": 13218789376.0,
            "9660": 13218789376.0,
            "9665": 13218789376.0,
            "9670": 13218789376.0,
            "9675": 13218789376.0,
            "9680": 13218789376.0,
            "9685": 13218789376.0,
            "9690": 13218789376.0,
            "9695": 13218789376.0,
            "9700": 13218789376.0,
            "9705": 13218789376.0,
            "9710": 13218789376.0,
            "9715": 13218789376.0,
            "9720": 13218789376.0,
            "9725": 13218789376.0,
            "9730": 13218789376.0,
            "9735": 13218789376.0,
            "9740": 13218789376.0,
            "9745": 13218789376.0,
            "9750": 13218789376.0,
            "9755": 13218789376.0,
            "9760": 13218789376.0,
            "9765": 13218789376.0,
            "9770": 13218789376.0,
            "9775": 13218789376.0,
            "9780": 13218789376.0,
            "9785": 13218789376.0,
            "9790": 13218789376.0,
            "9795": 13218789376.0,
            "9800": 13218789376.0,
            "9805": 13218789376.0,
            "9810": 13218789376.0,
            "9815": 13218789376.0,
            "9820": 13218789376.0,
            "9825": 13218789376.0,
            "9830": 13218789376.0,
            "9835": 13218789376.0,
            "9840": 13218789376.0,
            "9845": 13218789376.0,
            "9850": 13218789376.0,
            "9855": 13218789376.0,
            "9860": 13218789376.0,
            "9865": 13218789376.0,
            "9870": 13218789376.0,
            "9875": 13218789376.0,
            "9880": 13218789376.0,
            "9885": 13218789376.0,
            "9890": 13218789376.0,
            "9895": 13218789376.0,
            "9900": 13218789376.0,
            "9905": 13218789376.0,
            "9910": 13218789376.0,
            "9915": 13218789376.0,
            "9920": 13218789376.0,
            "9925": 13218789376.0,
            "9930": 13218789376.0,
            "9935": 13218789376.0,
            "9940": 13218789376.0,
            "9945": 13218789376.0,
            "9950": 13218789376.0,
            "9955": 13218789376.0,
            "9960": 13218789376.0,
            "9965": 13218789376.0,
            "9970": 13218789376.0,
            "9975": 13218789376.0,
            "9980": 13218789376.0,
            "9985": 13218789376.0,
            "9990": 13218789376.0,
            "9995": 13218789376.0,
            "10000": 13218789376.0,
            "10005": 13218789376.0,
            "10010": 13218789376.0,
            "10015": 13218789376.0,
            "10020": 13218789376.0,
            "10025": 13218789376.0,
            "10030": 13218789376.0,
            "10035": 13218789376.0,
            "10040": 13218789376.0,
            "10045": 13218789376.0,
            "10050": 13218789376.0,
            "10055": 13218789376.0,
            "10060": 13218789376.0,
            "10065": 13218789376.0,
            "10070": 13218789376.0,
            "10075": 13218789376.0,
            "10080": 13218789376.0,
            "10085": 13218789376.0,
            "10090": 13218789376.0,
            "10095": 13218789376.0,
            "10100": 13218789376.0,
            "10105": 13218789376.0,
            "10110": 13218789376.0,
            "10115": 13218789376.0,
            "10120": 13218789376.0,
            "10125": 13218789376.0,
            "10130": 13218789376.0,
            "10135": 13218789376.0,
            "10140": 13218789376.0,
            "10145": 13218789376.0,
            "10150": 13218789376.0,
            "10155": 13218789376.0,
            "10160": 13218789376.0,
            "10165": 13218789376.0,
            "10170": 13218789376.0,
            "10175": 13218789376.0,
            "10180": 13218789376.0,
            "10185": 13218789376.0,
            "10190": 13218789376.0,
            "10195": 13218789376.0,
            "10200": 13218789376.0,
            "10205": 13218789376.0,
            "10210": 13218789376.0,
            "10215": 13218789376.0,
            "10220": 13218789376.0,
            "10225": 13218789376.0,
            "10230": 13218789376.0,
            "10235": 13218789376.0,
            "10240": 13218789376.0,
            "10245": 13218789376.0,
            "10250": 13218789376.0,
            "10255": 13218789376.0,
            "10260": 13218789376.0,
            "10265": 13218789376.0,
            "10270": 13218789376.0,
            "10275": 13218789376.0,
            "10280": 13218789376.0,
            "10285": 13218789376.0,
            "10290": 13218789376.0,
            "10295": 13218789376.0,
            "10300": 13218789376.0,
            "10305": 13218789376.0,
            "10310": 13218789376.0,
            "10315": 13218789376.0,
            "10320": 13218789376.0,
            "10325": 13218789376.0,
            "10330": 13218789376.0,
            "10335": 13218789376.0,
            "10340": 13218789376.0,
            "10345": 13218789376.0,
            "10350": 13218789376.0,
            "10355": 13218789376.0,
            "10360": 13218789376.0,
            "10365": 13218789376.0,
            "10370": 13218789376.0,
            "10375": 13218789376.0,
            "10380": 13218789376.0,
            "10385": 13218789376.0,
            "10390": 13218789376.0,
            "10395": 13218789376.0,
            "10400": 13218789376.0,
            "10405": 13218789376.0,
            "10410": 13218789376.0,
            "10415": 13218789376.0,
            "10420": 13218789376.0,
            "10425": 13218789376.0,
            "10430": 13218789376.0,
            "10435": 13218789376.0,
            "10440": 13218789376.0,
            "10445": 13218789376.0,
            "10450": 13218789376.0,
            "10455": 13218789376.0,
            "10460": 13218789376.0,
            "10465": 13218789376.0,
            "10470": 13218789376.0,
            "10475": 13218789376.0,
            "10480": 13218789376.0,
            "10485": 13218789376.0,
            "10490": 13218789376.0,
            "10495": 13218789376.0,
            "10500": 13218789376.0,
            "10505": 13218789376.0,
            "10510": 13218789376.0,
            "10515": 13218789376.0,
            "10520": 13218789376.0,
            "10525": 13218789376.0,
            "10530": 13218789376.0,
            "10535": 13218789376.0,
            "10540": 13218789376.0,
            "10545": 13218789376.0,
            "10550": 13218789376.0,
            "10555": 13218789376.0,
            "10560": 13218789376.0,
            "10565": 13218789376.0,
            "10570": 13218789376.0,
            "10575": 13218789376.0,
            "10580": 13218789376.0,
            "10585": 13218789376.0,
            "10590": 13218789376.0,
            "10595": 13218789376.0,
            "10600": 13218789376.0,
            "10605": 13218789376.0,
            "10610": 13218789376.0,
            "10615": 13218789376.0,
            "10620": 13218789376.0,
            "10625": 13218789376.0,
            "10630": 13218789376.0,
            "10635": 13218789376.0,
            "10640": 13218789376.0,
            "10645": 13218789376.0,
            "10650": 13218789376.0,
            "10655": 13218789376.0,
            "10660": 13218789376.0,
            "10665": 13218789376.0,
            "10670": 13218789376.0,
            "10675": 13218789376.0,
            "10680": 13218789376.0,
            "10685": 13218789376.0,
            "10690": 13218789376.0,
            "10695": 13218789376.0,
            "10700": 13218789376.0,
            "10705": 13218789376.0,
            "10710": 13218789376.0,
            "10715": 13218789376.0,
            "10720": 13218789376.0,
            "10725": 13218789376.0,
            "10730": 13218789376.0,
            "10735": 13218789376.0,
            "10740": 13218789376.0,
            "10745": 13218789376.0,
            "10750": 13218789376.0,
            "10755": 13218789376.0,
            "10760": 13218789376.0,
            "10765": 13218789376.0,
            "10770": 13218789376.0,
            "10775": 13218789376.0,
            "10780": 13218789376.0,
            "10785": 13218789376.0,
            "10790": 13218789376.0,
            "10795": 13218789376.0,
            "10800": 13218789376.0,
            "10805": 13218789376.0,
            "10810": 13218789376.0,
            "10815": 13218789376.0,
            "10820": 13218789376.0,
            "10825": 13218789376.0,
            "10830": 13218789376.0,
            "10835": 13218789376.0,
            "10840": 13218789376.0,
            "10845": 13218789376.0,
            "10850": 13218789376.0,
            "10855": 13218789376.0,
            "10860": 13218789376.0,
            "10865": 13218789376.0,
            "10870": 13218789376.0,
            "10875": 13218789376.0,
            "10880": 13218789376.0,
            "10885": 13218789376.0,
            "10890": 13218789376.0,
            "10895": 13218789376.0,
            "10900": 13218789376.0,
            "10905": 13218789376.0,
            "10910": 13218789376.0,
            "10915": 13218789376.0,
            "10920": 13218789376.0,
            "10925": 13218789376.0,
            "10930": 13218789376.0,
            "10935": 13218789376.0,
            "10940": 13218789376.0,
            "10945": 13218789376.0,
            "10950": 13218789376.0,
            "10955": 13218789376.0,
            "10960": 13218789376.0,
            "10965": 13218789376.0,
            "10970": 13218789376.0,
            "10975": 13218789376.0,
            "10980": 13218789376.0,
            "10985": 13218789376.0,
            "10990": 13218789376.0,
            "10995": 13218789376.0,
            "11000": 13218789376.0,
            "11005": 13218789376.0,
            "11010": 13218789376.0,
            "11015": 13218789376.0,
            "11020": 13218789376.0,
            "11025": 13218789376.0,
            "11030": 13218789376.0,
            "11035": 13218789376.0,
            "11040": 13218789376.0,
            "11045": 13218789376.0,
            "11050": 13218789376.0,
            "11055": 13218789376.0,
            "11060": 13218789376.0,
            "11065": 13218789376.0,
            "11070": 13218789376.0,
            "11075": 13218789376.0,
            "11080": 13218789376.0,
            "11085": 13218789376.0,
            "11090": 13218789376.0,
            "11095": 13218789376.0,
            "11100": 13218789376.0,
            "11105": 13218789376.0,
            "11110": 13218789376.0,
            "11115": 13218789376.0,
            "11120": 13218789376.0,
            "11125": 13218789376.0,
            "11130": 13218789376.0,
            "11135": 13218789376.0,
            "11140": 13218789376.0,
            "11145": 13218789376.0,
            "11150": 13218789376.0,
            "11155": 13218789376.0,
            "11160": 13218789376.0,
            "11165": 13218789376.0,
            "11170": 13218789376.0,
            "11175": 13218789376.0,
            "11180": 13218789376.0,
            "11185": 13218789376.0,
            "11190": 13218789376.0,
            "11195": 13218789376.0,
            "11200": 13218789376.0,
            "11205": 13218789376.0,
            "11210": 13218789376.0,
            "11215": 13218789376.0,
            "11220": 13218789376.0,
            "11225": 13218789376.0,
            "11230": 13218789376.0,
            "11235": 13218789376.0,
            "11240": 13218789376.0,
            "11245": 13218789376.0,
            "11250": 13218789376.0,
            "11255": 13218789376.0,
            "11260": 13218789376.0,
            "11265": 13218789376.0,
            "11270": 13218789376.0,
            "11275": 13218789376.0,
            "11280": 13218789376.0,
            "11285": 13218789376.0,
            "11290": 13218789376.0,
            "11295": 13218789376.0,
            "11300": 13218789376.0,
            "11305": 13218789376.0,
            "11310": 13218789376.0,
            "11315": 13218789376.0,
            "11320": 13218789376.0,
            "11325": 13218789376.0,
            "11330": 13218789376.0,
            "11335": 13218789376.0,
            "11340": 13218789376.0,
            "11345": 13218789376.0,
            "11350": 13218789376.0,
            "11355": 13218789376.0,
            "11360": 13218789376.0,
            "11365": 13218789376.0,
            "11370": 13218789376.0,
            "11375": 13218789376.0,
            "11380": 13218789376.0,
            "11385": 13218789376.0,
            "11390": 13218789376.0,
            "11395": 13218789376.0,
            "11400": 13218789376.0,
            "11405": 13218789376.0,
            "11410": 13218789376.0,
            "11415": 13218789376.0,
            "11420": 13218789376.0,
            "11425": 13218789376.0,
            "11430": 13218789376.0,
            "11435": 13218789376.0,
            "11440": 13218789376.0,
            "11445": 13218789376.0,
            "11450": 13218789376.0,
            "11455": 13218789376.0,
            "11460": 13218789376.0,
            "11465": 13218789376.0,
            "11470": 13218789376.0,
            "11475": 13218789376.0,
            "11480": 13218789376.0,
            "11485": 13218789376.0,
            "11490": 13218789376.0,
            "11495": 13218789376.0,
            "11500": 13218789376.0,
            "11505": 13218789376.0,
            "11510": 13218789376.0,
            "11515": 13218789376.0,
            "11520": 13218789376.0,
            "11525": 13218789376.0,
            "11530": 13218789376.0,
            "11535": 13218789376.0,
            "11540": 13218789376.0,
            "11545": 13218789376.0,
            "11550": 13218789376.0,
            "11555": 13218789376.0,
            "11560": 13218789376.0,
            "11565": 13218789376.0,
            "11570": 13218789376.0,
            "11575": 13218789376.0,
            "11580": 13218789376.0,
            "11585": 13218789376.0,
            "11590": 13218789376.0,
            "11595": 13218789376.0,
            "11600": 13218789376.0,
            "11605": 13218789376.0,
            "11610": 13218789376.0,
            "11615": 13218789376.0,
            "11620": 13218789376.0,
            "11625": 13218789376.0,
            "11630": 13218789376.0,
            "11635": 13218789376.0,
            "11640": 13218789376.0,
            "11645": 13218789376.0,
            "11650": 13218789376.0,
            "11655": 13218789376.0,
            "11660": 13218789376.0,
            "11665": 13218789376.0,
            "11670": 13218789376.0,
            "11675": 13218789376.0,
            "11680": 13218789376.0,
            "11685": 13218789376.0,
            "11690": 13218789376.0,
            "11695": 13218789376.0,
            "11700": 13218789376.0,
            "11705": 13218789376.0,
            "11710": 13218789376.0,
            "11715": 13218789376.0,
            "11720": 13218789376.0,
            "11725": 13218789376.0,
            "11730": 13218789376.0,
            "11735": 13218789376.0,
            "11740": 13218789376.0,
            "11745": 13218789376.0,
            "11750": 13218789376.0,
            "11755": 13218789376.0,
            "11760": 13218789376.0,
            "11765": 13218789376.0,
            "11770": 13218789376.0,
            "11775": 13218789376.0,
            "11780": 13218789376.0,
            "11785": 13218789376.0,
            "11790": 13218789376.0,
            "11795": 13218789376.0,
            "11800": 13218789376.0,
            "11805": 13218789376.0,
            "11810": 13218789376.0,
            "11815": 13218789376.0,
            "11820": 13218789376.0,
            "11825": 13218789376.0,
            "11830": 13218789376.0,
            "11835": 13218789376.0,
            "11840": 13218789376.0,
            "11845": 13218789376.0,
            "11850": 13218789376.0,
            "11855": 13218789376.0,
            "11860": 13218789376.0,
            "11865": 13218789376.0,
            "11870": 13218789376.0,
            "11875": 13218789376.0,
            "11880": 13218789376.0,
            "11885": 13218789376.0,
            "11890": 13218789376.0,
            "11895": 13218789376.0,
            "11900": 13218789376.0,
            "11905": 13218789376.0,
            "11910": 13218789376.0,
            "11915": 13218789376.0,
            "11920": 13218789376.0,
            "11925": 13218789376.0,
            "11930": 13218789376.0,
            "11935": 13218789376.0,
            "11940": 13218789376.0,
            "11945": 13218789376.0,
            "11950": 13218789376.0,
            "11955": 13218789376.0,
            "11960": 13218789376.0,
            "11965": 13218789376.0,
            "11970": 13218789376.0,
            "11975": 13218789376.0,
            "11980": 13218789376.0,
            "11985": 13218789376.0,
            "11990": 13218789376.0,
            "11995": 13218789376.0,
            "12000": 13218789376.0,
            "12005": 13218789376.0,
            "12010": 13218789376.0,
            "12015": 13218789376.0,
            "12020": 13218789376.0,
            "12025": 13218789376.0,
            "12030": 13218789376.0,
            "12035": 13218789376.0,
            "12040": 13218789376.0,
            "12045": 13218789376.0,
            "12050": 13218789376.0,
            "12055": 13218789376.0,
            "12060": 13218789376.0,
            "12065": 13218789376.0,
            "12070": 13218789376.0,
            "12075": 13218789376.0,
            "12080": 13218789376.0,
            "12085": 13218789376.0,
            "12090": 13218789376.0,
            "12095": 13218789376.0,
            "12100": 13218789376.0,
            "12105": 13218789376.0,
            "12110": 13218789376.0,
            "12115": 13218789376.0,
            "12120": 13218789376.0,
            "12125": 13218789376.0,
            "12130": 13218789376.0,
            "12135": 13218789376.0,
            "12140": 13218789376.0,
            "12145": 13218789376.0,
            "12150": 13218789376.0,
            "12155": 13218789376.0,
            "12160": 13218789376.0,
            "12165": 13218789376.0,
            "12170": 13218789376.0,
            "12175": 13218789376.0,
            "12180": 13218789376.0,
            "12185": 13218789376.0,
            "12190": 13218789376.0,
            "12195": 13218789376.0,
            "12200": 13218789376.0,
            "12205": 13218789376.0,
            "12210": 13218789376.0,
            "12215": 13218789376.0,
            "12220": 13218789376.0,
            "12225": 13218789376.0,
            "12230": 13218789376.0,
            "12235": 13218789376.0,
            "12240": 13218789376.0,
            "12245": 13218789376.0,
            "12250": 13218789376.0,
            "12255": 13218789376.0,
            "12260": 13218789376.0,
            "12265": 13218789376.0,
            "12270": 13218789376.0,
            "12275": 13218789376.0,
            "12280": 13218789376.0,
            "12285": 13218789376.0,
            "12290": 13218789376.0,
            "12295": 13218789376.0,
            "12300": 13218789376.0,
            "12305": 13218789376.0,
            "12310": 13218789376.0,
            "12315": 13218789376.0,
            "12320": 13218789376.0,
            "12325": 13218789376.0,
            "12330": 13218789376.0,
            "12335": 13218789376.0,
            "12340": 13218789376.0,
            "12345": 13218789376.0,
            "12350": 13218789376.0,
            "12355": 13218789376.0,
            "12360": 13218789376.0,
            "12365": 13218789376.0,
            "12370": 13218789376.0,
            "12375": 13218789376.0,
            "12380": 13218789376.0,
            "12385": 13218789376.0,
            "12390": 13218789376.0,
            "12395": 13218789376.0,
            "12400": 13218789376.0,
            "12405": 13218789376.0,
            "12410": 13218789376.0,
            "12415": 13218789376.0,
            "12420": 13218789376.0,
            "12425": 13218789376.0,
            "12430": 13218789376.0,
            "12435": 13218789376.0,
            "12440": 13218789376.0,
            "12445": 13218789376.0,
            "12450": 13218789376.0,
            "12455": 13218789376.0,
            "12460": 13218789376.0,
            "12465": 13218789376.0,
            "12470": 13218789376.0,
            "12475": 13218789376.0,
            "12480": 13218789376.0,
            "12485": 13218789376.0,
            "12490": 13218789376.0,
            "12495": 13218789376.0,
            "12500": 13218789376.0,
            "12505": 13218789376.0,
            "12510": 13218789376.0,
            "12515": 13218789376.0,
            "12520": 13218789376.0,
            "12525": 13218789376.0,
            "12530": 13218789376.0,
            "12535": 13218789376.0,
            "12540": 13218789376.0,
            "12545": 13218789376.0,
            "12550": 13218789376.0,
            "12555": 13218789376.0,
            "12560": 13218789376.0,
            "12565": 13218789376.0,
            "12570": 13218789376.0,
            "12575": 13218789376.0,
            "12580": 13218789376.0,
            "12585": 13218789376.0,
            "12590": 13218789376.0,
            "12595": 13218789376.0,
            "12600": 13218789376.0,
            "12605": 13218789376.0,
            "12610": 13218789376.0,
            "12615": 13218789376.0,
            "12620": 13218789376.0,
            "12625": 13218789376.0,
            "12630": 13218789376.0,
            "12635": 13218789376.0,
            "12640": 13218789376.0,
            "12645": 13218789376.0,
            "12650": 13218789376.0,
            "12655": 13218789376.0,
            "12660": 13218789376.0,
            "12665": 13218789376.0,
            "12670": 13218789376.0,
            "12675": 13218789376.0,
            "12680": 13218789376.0,
            "12685": 13218789376.0,
            "12690": 13218789376.0,
            "12695": 13218789376.0,
            "12700": 13218789376.0,
            "12705": 13218789376.0,
            "12710": 13218789376.0,
            "12715": 13218789376.0,
            "12720": "nan",
            "12725": "nan",
            "12730": "nan",
            "12735": "nan",
            "12740": "nan",
            "12745": "nan",
            "12750": "nan",
            "12755": "nan",
            "12760": "nan",
            "12765": "nan",
            "12770": "nan",
            "12775": "nan",
            "12780": "nan",
            "12785": "nan",
            "12790": "nan",
            "12795": "nan",
            "12800": "nan",
            "12805": "nan",
            "12810": "nan",
            "12815": "nan",
            "12820": "nan",
            "12825": "nan",
            "12830": "nan",
            "12835": "nan",
            "12840": "nan",
            "12845": "nan",
            "12850": "nan",
            "12855": "nan",
            "12860": "nan",
            "12865": "nan",
            "12870": "nan",
            "12875": "nan",
            "12880": "nan",
            "12885": "nan",
            "12890": "nan",
            "12895": "nan",
            "12900": "nan",
            "12905": "nan",
            "12910": "nan",
            "12915": "nan",
            "12920": "nan",
            "12925": "nan",
            "12930": "nan",
            "12935": "nan",
            "12940": "nan",
            "12945": "nan",
            "12950": "nan",
            "12955": "nan",
            "12960": "nan",
            "12965": "nan",
            "12970": "nan",
            "12975": "nan",
            "12980": "nan",
            "12985": "nan",
            "12990": "nan",
            "12995": "nan",
            "13000": "nan"
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 13000,
        "step_interval": 5,
        "values": {
            "1": 27608694784.0,
            "5": 28107184128.0,
            "10": 28107184128.0,
            "15": 28107184128.0,
            "20": 28107184128.0,
            "25": 28107184128.0,
            "30": 28107184128.0,
            "35": 28107184128.0,
            "40": 28107184128.0,
            "45": 28107184128.0,
            "50": 28107184128.0,
            "55": 28107184128.0,
            "60": 28107184128.0,
            "65": 28107184128.0,
            "70": 28107184128.0,
            "75": 28107184128.0,
            "80": 28107184128.0,
            "85": 28107184128.0,
            "90": 28107184128.0,
            "95": 28107184128.0,
            "100": 28107184128.0,
            "105": 28107184128.0,
            "110": 28107184128.0,
            "115": 28107184128.0,
            "120": 28107184128.0,
            "125": 28107184128.0,
            "130": 28107184128.0,
            "135": 28107184128.0,
            "140": 28107184128.0,
            "145": 28107184128.0,
            "150": 28107184128.0,
            "155": 28107184128.0,
            "160": 28107184128.0,
            "165": 28107184128.0,
            "170": 28107184128.0,
            "175": 28107184128.0,
            "180": 28107184128.0,
            "185": 28107184128.0,
            "190": 28107184128.0,
            "195": 28107184128.0,
            "200": 28107184128.0,
            "205": 28107184128.0,
            "210": 28107184128.0,
            "215": 28107184128.0,
            "220": 28107184128.0,
            "225": 28107184128.0,
            "230": 28107184128.0,
            "235": 28107184128.0,
            "240": 28107184128.0,
            "245": 28107184128.0,
            "250": 28107184128.0,
            "255": 28107184128.0,
            "260": 28107184128.0,
            "265": 28107184128.0,
            "270": 28107184128.0,
            "275": 28107184128.0,
            "280": 28107184128.0,
            "285": 28107184128.0,
            "290": 28107184128.0,
            "295": 28107184128.0,
            "300": 28107184128.0,
            "305": 28107184128.0,
            "310": 28107184128.0,
            "315": 28107184128.0,
            "320": 28107184128.0,
            "325": 28107184128.0,
            "330": 28107184128.0,
            "335": 28107184128.0,
            "340": 28107184128.0,
            "345": 28107184128.0,
            "350": 28107184128.0,
            "355": 28107184128.0,
            "360": 28107184128.0,
            "365": 28107184128.0,
            "370": 28107184128.0,
            "375": 28107184128.0,
            "380": 28107184128.0,
            "385": 28107184128.0,
            "390": 28107184128.0,
            "395": 28107184128.0,
            "400": 28107184128.0,
            "405": 28107184128.0,
            "410": 28107184128.0,
            "415": 28107184128.0,
            "420": 28107184128.0,
            "425": 28107184128.0,
            "430": 28107184128.0,
            "435": 28107184128.0,
            "440": 28107184128.0,
            "445": 28107184128.0,
            "450": 28107184128.0,
            "455": 28107184128.0,
            "460": 28107184128.0,
            "465": 28107184128.0,
            "470": 28107184128.0,
            "475": 28107184128.0,
            "480": 28107184128.0,
            "485": 28107184128.0,
            "490": 28107184128.0,
            "495": 28107184128.0,
            "500": 28107184128.0,
            "505": 28107184128.0,
            "510": 28107184128.0,
            "515": 28107184128.0,
            "520": 28107184128.0,
            "525": 28107184128.0,
            "530": 28107184128.0,
            "535": 28107184128.0,
            "540": 28107184128.0,
            "545": 28107184128.0,
            "550": 28107184128.0,
            "555": 28107184128.0,
            "560": 28107184128.0,
            "565": 28107184128.0,
            "570": 28107184128.0,
            "575": 28107184128.0,
            "580": 28107184128.0,
            "585": 28107184128.0,
            "590": 28107184128.0,
            "595": 28107184128.0,
            "600": 28107184128.0,
            "605": 28107184128.0,
            "610": 28107184128.0,
            "615": 28107184128.0,
            "620": 28107184128.0,
            "625": 28107184128.0,
            "630": 28107184128.0,
            "635": 28107184128.0,
            "640": 28107184128.0,
            "645": 28107184128.0,
            "650": 28107184128.0,
            "655": 28107184128.0,
            "660": 28107184128.0,
            "665": 28107184128.0,
            "670": 28107184128.0,
            "675": 28107184128.0,
            "680": 28107184128.0,
            "685": 28107184128.0,
            "690": 28107184128.0,
            "695": 28107184128.0,
            "700": 28107184128.0,
            "705": 28107184128.0,
            "710": 28107184128.0,
            "715": 28107184128.0,
            "720": 28107184128.0,
            "725": 28107184128.0,
            "730": 28107184128.0,
            "735": 28107184128.0,
            "740": 28107184128.0,
            "745": 28107184128.0,
            "750": 28107184128.0,
            "755": 28107184128.0,
            "760": 28107184128.0,
            "765": 28107184128.0,
            "770": 28107184128.0,
            "775": 28107184128.0,
            "780": 28107184128.0,
            "785": 28107184128.0,
            "790": 28107184128.0,
            "795": 28107184128.0,
            "800": 28107184128.0,
            "805": 28107184128.0,
            "810": 28107184128.0,
            "815": 28107184128.0,
            "820": 28107184128.0,
            "825": 28107184128.0,
            "830": 28107184128.0,
            "835": 28107184128.0,
            "840": 28107184128.0,
            "845": 28107184128.0,
            "850": 28107184128.0,
            "855": 28107184128.0,
            "860": 28107184128.0,
            "865": 28107184128.0,
            "870": 28107184128.0,
            "875": 28107184128.0,
            "880": 28107184128.0,
            "885": 28107184128.0,
            "890": 28107184128.0,
            "895": 28107184128.0,
            "900": 28107184128.0,
            "905": 28107184128.0,
            "910": 28107184128.0,
            "915": 28107184128.0,
            "920": 28107184128.0,
            "925": 28107184128.0,
            "930": 28107184128.0,
            "935": 28107184128.0,
            "940": 28107184128.0,
            "945": 28107184128.0,
            "950": 28107184128.0,
            "955": 28107184128.0,
            "960": 28107184128.0,
            "965": 28107184128.0,
            "970": 28107184128.0,
            "975": 28107184128.0,
            "980": 28107184128.0,
            "985": 28107184128.0,
            "990": 28107184128.0,
            "995": 28107184128.0,
            "1000": 28107184128.0,
            "1005": 28107184128.0,
            "1010": 28107184128.0,
            "1015": 28107184128.0,
            "1020": 28107184128.0,
            "1025": 28107184128.0,
            "1030": 28107184128.0,
            "1035": 28107184128.0,
            "1040": 28107184128.0,
            "1045": 28107184128.0,
            "1050": 28107184128.0,
            "1055": 28107184128.0,
            "1060": 28107184128.0,
            "1065": 28107184128.0,
            "1070": 28107184128.0,
            "1075": 28107184128.0,
            "1080": 28107184128.0,
            "1085": 28107184128.0,
            "1090": 28107184128.0,
            "1095": 28107184128.0,
            "1100": 28107184128.0,
            "1105": 28107184128.0,
            "1110": 28107184128.0,
            "1115": 28107184128.0,
            "1120": 28107184128.0,
            "1125": 28107184128.0,
            "1130": 28107184128.0,
            "1135": 28107184128.0,
            "1140": 28107184128.0,
            "1145": 28107184128.0,
            "1150": 28107184128.0,
            "1155": 28107184128.0,
            "1160": 28107184128.0,
            "1165": 28107184128.0,
            "1170": 28107184128.0,
            "1175": 28107184128.0,
            "1180": 28107184128.0,
            "1185": 28107184128.0,
            "1190": 28107184128.0,
            "1195": 28107184128.0,
            "1200": 28107184128.0,
            "1205": 28107184128.0,
            "1210": 28107184128.0,
            "1215": 28107184128.0,
            "1220": 28107184128.0,
            "1225": 28107184128.0,
            "1230": 28107184128.0,
            "1235": 28107184128.0,
            "1240": 28107184128.0,
            "1245": 28107184128.0,
            "1250": 28107184128.0,
            "1255": 28107184128.0,
            "1260": 28107184128.0,
            "1265": 28107184128.0,
            "1270": 28107184128.0,
            "1275": 28107184128.0,
            "1280": 28107184128.0,
            "1285": 28107184128.0,
            "1290": 28107184128.0,
            "1295": 28107184128.0,
            "1300": 28107184128.0,
            "1305": 28107184128.0,
            "1310": 28107184128.0,
            "1315": 28107184128.0,
            "1320": 28107184128.0,
            "1325": 28107184128.0,
            "1330": 28107184128.0,
            "1335": 28107184128.0,
            "1340": 28107184128.0,
            "1345": 28107184128.0,
            "1350": 28107184128.0,
            "1355": 28107184128.0,
            "1360": 28107184128.0,
            "1365": 28107184128.0,
            "1370": 28107184128.0,
            "1375": 28107184128.0,
            "1380": 28107184128.0,
            "1385": 28107184128.0,
            "1390": 28107184128.0,
            "1395": 28107184128.0,
            "1400": 28107184128.0,
            "1405": 28107184128.0,
            "1410": 28107184128.0,
            "1415": 28107184128.0,
            "1420": 28107184128.0,
            "1425": 28107184128.0,
            "1430": 28107184128.0,
            "1435": 28107184128.0,
            "1440": 28107184128.0,
            "1445": 28107184128.0,
            "1450": 28107184128.0,
            "1455": 28107184128.0,
            "1460": 28107184128.0,
            "1465": 28107184128.0,
            "1470": 28107184128.0,
            "1475": 28107184128.0,
            "1480": 28107184128.0,
            "1485": 28107184128.0,
            "1490": 28107184128.0,
            "1495": 28107184128.0,
            "1500": 28107184128.0,
            "1505": 28107184128.0,
            "1510": 28107184128.0,
            "1515": 28107184128.0,
            "1520": 28107184128.0,
            "1525": 28107184128.0,
            "1530": 28107184128.0,
            "1535": 28107184128.0,
            "1540": 28107184128.0,
            "1545": 28107184128.0,
            "1550": 28107184128.0,
            "1555": 28107184128.0,
            "1560": 28107184128.0,
            "1565": 28107184128.0,
            "1570": 28107184128.0,
            "1575": 28107184128.0,
            "1580": 28107184128.0,
            "1585": 28107184128.0,
            "1590": 28107184128.0,
            "1595": 28107184128.0,
            "1600": 28107184128.0,
            "1605": 28107184128.0,
            "1610": 28107184128.0,
            "1615": 28107184128.0,
            "1620": 28107184128.0,
            "1625": 28107184128.0,
            "1630": 28107184128.0,
            "1635": 28107184128.0,
            "1640": 28107184128.0,
            "1645": 28107184128.0,
            "1650": 28107184128.0,
            "1655": 28107184128.0,
            "1660": 28107184128.0,
            "1665": 28107184128.0,
            "1670": 28107184128.0,
            "1675": 28107184128.0,
            "1680": 28107184128.0,
            "1685": 28107184128.0,
            "1690": 28107184128.0,
            "1695": 28107184128.0,
            "1700": 28107184128.0,
            "1705": 28107184128.0,
            "1710": 28107184128.0,
            "1715": 28107184128.0,
            "1720": 28107184128.0,
            "1725": 28107184128.0,
            "1730": 28107184128.0,
            "1735": 28107184128.0,
            "1740": 28107184128.0,
            "1745": 28107184128.0,
            "1750": 28107184128.0,
            "1755": 28107184128.0,
            "1760": 28107184128.0,
            "1765": 28107184128.0,
            "1770": 28107184128.0,
            "1775": 28107184128.0,
            "1780": 28107184128.0,
            "1785": 28107184128.0,
            "1790": 28107184128.0,
            "1795": 28107184128.0,
            "1800": 28107184128.0,
            "1805": 28107184128.0,
            "1810": 28107184128.0,
            "1815": 28107184128.0,
            "1820": 28107184128.0,
            "1825": 28107184128.0,
            "1830": 28107184128.0,
            "1835": 28107184128.0,
            "1840": 28107184128.0,
            "1845": 28107184128.0,
            "1850": 28107184128.0,
            "1855": 28107184128.0,
            "1860": 28107184128.0,
            "1865": 28107184128.0,
            "1870": 28107184128.0,
            "1875": 28107184128.0,
            "1880": 28107184128.0,
            "1885": 28107184128.0,
            "1890": 28107184128.0,
            "1895": 28107184128.0,
            "1900": 28107184128.0,
            "1905": 28107184128.0,
            "1910": 28107184128.0,
            "1915": 28107184128.0,
            "1920": 28107184128.0,
            "1925": 28107184128.0,
            "1930": 28107184128.0,
            "1935": 28107184128.0,
            "1940": 28107184128.0,
            "1945": 28107184128.0,
            "1950": 28107184128.0,
            "1955": 28107184128.0,
            "1960": 28107184128.0,
            "1965": 28107184128.0,
            "1970": 28107184128.0,
            "1975": 28107184128.0,
            "1980": 28107184128.0,
            "1985": 28107184128.0,
            "1990": 28107184128.0,
            "1995": 28107184128.0,
            "2000": 28107184128.0,
            "2005": 28107184128.0,
            "2010": 28107184128.0,
            "2015": 28107184128.0,
            "2020": 28107184128.0,
            "2025": 28107184128.0,
            "2030": 28107184128.0,
            "2035": 28107184128.0,
            "2040": 28107184128.0,
            "2045": 28107184128.0,
            "2050": 28107184128.0,
            "2055": 28107184128.0,
            "2060": 28107184128.0,
            "2065": 28107184128.0,
            "2070": 28107184128.0,
            "2075": 28107184128.0,
            "2080": 28107184128.0,
            "2085": 28107184128.0,
            "2090": 28107184128.0,
            "2095": 28107184128.0,
            "2100": 28107184128.0,
            "2105": 28107184128.0,
            "2110": 28107184128.0,
            "2115": 28107184128.0,
            "2120": 28107184128.0,
            "2125": 28107184128.0,
            "2130": 28107184128.0,
            "2135": 28107184128.0,
            "2140": 28107184128.0,
            "2145": 28107184128.0,
            "2150": 28107184128.0,
            "2155": 28107184128.0,
            "2160": 28107184128.0,
            "2165": 28107184128.0,
            "2170": 28107184128.0,
            "2175": 28107184128.0,
            "2180": 28107184128.0,
            "2185": 28107184128.0,
            "2190": 28107184128.0,
            "2195": 28107184128.0,
            "2200": 28107184128.0,
            "2205": 28107184128.0,
            "2210": 28107184128.0,
            "2215": 28107184128.0,
            "2220": 28107184128.0,
            "2225": 28107184128.0,
            "2230": 28107184128.0,
            "2235": 28107184128.0,
            "2240": 28107184128.0,
            "2245": 28107184128.0,
            "2250": 28107184128.0,
            "2255": 28107184128.0,
            "2260": 28107184128.0,
            "2265": 28107184128.0,
            "2270": 28107184128.0,
            "2275": 28107184128.0,
            "2280": 28107184128.0,
            "2285": 28107184128.0,
            "2290": 28107184128.0,
            "2295": 28107184128.0,
            "2300": 28107184128.0,
            "2305": 28107184128.0,
            "2310": 28107184128.0,
            "2315": 28107184128.0,
            "2320": 28107184128.0,
            "2325": 28107184128.0,
            "2330": 28107184128.0,
            "2335": 28107184128.0,
            "2340": 28107184128.0,
            "2345": 28107184128.0,
            "2350": 28107184128.0,
            "2355": 28107184128.0,
            "2360": 28107184128.0,
            "2365": 28107184128.0,
            "2370": 28107184128.0,
            "2375": 28107184128.0,
            "2380": 28107184128.0,
            "2385": 28107184128.0,
            "2390": 28107184128.0,
            "2395": 28107184128.0,
            "2400": 28107184128.0,
            "2405": 28107184128.0,
            "2410": 28107184128.0,
            "2415": 28107184128.0,
            "2420": 28107184128.0,
            "2425": 28107184128.0,
            "2430": 28107184128.0,
            "2435": 28107184128.0,
            "2440": 28107184128.0,
            "2445": 28107184128.0,
            "2450": 28107184128.0,
            "2455": 28107184128.0,
            "2460": 28107184128.0,
            "2465": 28107184128.0,
            "2470": 28107184128.0,
            "2475": 28107184128.0,
            "2480": 28107184128.0,
            "2485": 28107184128.0,
            "2490": 28107184128.0,
            "2495": 28107184128.0,
            "2500": 28107184128.0,
            "2505": 28107184128.0,
            "2510": 28107184128.0,
            "2515": 28107184128.0,
            "2520": 28107184128.0,
            "2525": 28107184128.0,
            "2530": 28107184128.0,
            "2535": 28107184128.0,
            "2540": 28107184128.0,
            "2545": 28107184128.0,
            "2550": 28107184128.0,
            "2555": 28107184128.0,
            "2560": 28107184128.0,
            "2565": 28107184128.0,
            "2570": 28107184128.0,
            "2575": 28107184128.0,
            "2580": 28107184128.0,
            "2585": 28107184128.0,
            "2590": 28107184128.0,
            "2595": 28107184128.0,
            "2600": 28107184128.0,
            "2605": 28107184128.0,
            "2610": 28107184128.0,
            "2615": 28107184128.0,
            "2620": 28107184128.0,
            "2625": 28107184128.0,
            "2630": 28107184128.0,
            "2635": 28107184128.0,
            "2640": 28107184128.0,
            "2645": 28107184128.0,
            "2650": 28107184128.0,
            "2655": 28107184128.0,
            "2660": 28107184128.0,
            "2665": 28107184128.0,
            "2670": 28107184128.0,
            "2675": 28107184128.0,
            "2680": 28107184128.0,
            "2685": 28107184128.0,
            "2690": 28107184128.0,
            "2695": 28107184128.0,
            "2700": 28107184128.0,
            "2705": 28107184128.0,
            "2710": 28107184128.0,
            "2715": 28107184128.0,
            "2720": 28107184128.0,
            "2725": 28107184128.0,
            "2730": 28107184128.0,
            "2735": 28107184128.0,
            "2740": 28107184128.0,
            "2745": 28107184128.0,
            "2750": 28107184128.0,
            "2755": 28107184128.0,
            "2760": 28107184128.0,
            "2765": 28107184128.0,
            "2770": 28107184128.0,
            "2775": 28107184128.0,
            "2780": 28107184128.0,
            "2785": 28107184128.0,
            "2790": 28107184128.0,
            "2795": 28107184128.0,
            "2800": 28107184128.0,
            "2805": 28107184128.0,
            "2810": 28107184128.0,
            "2815": 28107184128.0,
            "2820": 28107184128.0,
            "2825": 28107184128.0,
            "2830": 28107184128.0,
            "2835": 28107184128.0,
            "2840": 28107184128.0,
            "2845": 28107184128.0,
            "2850": 28107184128.0,
            "2855": 28107184128.0,
            "2860": 28107184128.0,
            "2865": 28107184128.0,
            "2870": 28107184128.0,
            "2875": 28107184128.0,
            "2880": 28107184128.0,
            "2885": 28107184128.0,
            "2890": 28107184128.0,
            "2895": 28107184128.0,
            "2900": 28107184128.0,
            "2905": 28107184128.0,
            "2910": 28107184128.0,
            "2915": 28107184128.0,
            "2920": 28107184128.0,
            "2925": 28107184128.0,
            "2930": 28107184128.0,
            "2935": 28107184128.0,
            "2940": 28107184128.0,
            "2945": 28107184128.0,
            "2950": 28107184128.0,
            "2955": 28107184128.0,
            "2960": 28107184128.0,
            "2965": 28107184128.0,
            "2970": 28107184128.0,
            "2975": 28107184128.0,
            "2980": 28107184128.0,
            "2985": 28107184128.0,
            "2990": 28107184128.0,
            "2995": 28107184128.0,
            "3000": 28107184128.0,
            "3005": 28107184128.0,
            "3010": 28107184128.0,
            "3015": 28107184128.0,
            "3020": 28107184128.0,
            "3025": 28107184128.0,
            "3030": 28107184128.0,
            "3035": 28107184128.0,
            "3040": 28107184128.0,
            "3045": 28107184128.0,
            "3050": 28107184128.0,
            "3055": 28107184128.0,
            "3060": 28107184128.0,
            "3065": 28107184128.0,
            "3070": 28107184128.0,
            "3075": 28107184128.0,
            "3080": 28107184128.0,
            "3085": 28107184128.0,
            "3090": 28107184128.0,
            "3095": 28107184128.0,
            "3100": 28107184128.0,
            "3105": 28107184128.0,
            "3110": 28107184128.0,
            "3115": 28107184128.0,
            "3120": 28107184128.0,
            "3125": 28107184128.0,
            "3130": 28107184128.0,
            "3135": 28107184128.0,
            "3140": 28107184128.0,
            "3145": 28107184128.0,
            "3150": 28107184128.0,
            "3155": 28107184128.0,
            "3160": 28107184128.0,
            "3165": 28107184128.0,
            "3170": 28107184128.0,
            "3175": 28107184128.0,
            "3180": 28107184128.0,
            "3185": 28107184128.0,
            "3190": 28107184128.0,
            "3195": 28107184128.0,
            "3200": 28107184128.0,
            "3205": 28107184128.0,
            "3210": 28107184128.0,
            "3215": 28107184128.0,
            "3220": 28107184128.0,
            "3225": 28107184128.0,
            "3230": 28107184128.0,
            "3235": 28107184128.0,
            "3240": 28107184128.0,
            "3245": 28107184128.0,
            "3250": 28107184128.0,
            "3255": 28107184128.0,
            "3260": 28107184128.0,
            "3265": 28107184128.0,
            "3270": 28107184128.0,
            "3275": 28107184128.0,
            "3280": 28107184128.0,
            "3285": 28107184128.0,
            "3290": 28107184128.0,
            "3295": 28107184128.0,
            "3300": 28107184128.0,
            "3305": 28107184128.0,
            "3310": 28107184128.0,
            "3315": 28107184128.0,
            "3320": 28107184128.0,
            "3325": 28107184128.0,
            "3330": 28107184128.0,
            "3335": 28107184128.0,
            "3340": 28107184128.0,
            "3345": 28107184128.0,
            "3350": 28107184128.0,
            "3355": 28107184128.0,
            "3360": 28107184128.0,
            "3365": 28107184128.0,
            "3370": 28107184128.0,
            "3375": 28107184128.0,
            "3380": 28107184128.0,
            "3385": 28107184128.0,
            "3390": 28107184128.0,
            "3395": 28107184128.0,
            "3400": 28107184128.0,
            "3405": 28107184128.0,
            "3410": 28107184128.0,
            "3415": 28107184128.0,
            "3420": 28107184128.0,
            "3425": 28107184128.0,
            "3430": 28107184128.0,
            "3435": 28107184128.0,
            "3440": 28107184128.0,
            "3445": 28107184128.0,
            "3450": 28107184128.0,
            "3455": 28107184128.0,
            "3460": 28107184128.0,
            "3465": 28107184128.0,
            "3470": 28107184128.0,
            "3475": 28107184128.0,
            "3480": 28107184128.0,
            "3485": 28107184128.0,
            "3490": 28107184128.0,
            "3495": 28107184128.0,
            "3500": 28107184128.0,
            "3505": 28107184128.0,
            "3510": 28107184128.0,
            "3515": 28107184128.0,
            "3520": 28107184128.0,
            "3525": 28107184128.0,
            "3530": 28107184128.0,
            "3535": 28107184128.0,
            "3540": 28107184128.0,
            "3545": 28107184128.0,
            "3550": 28107184128.0,
            "3555": 28107184128.0,
            "3560": 28107184128.0,
            "3565": 28107184128.0,
            "3570": 28107184128.0,
            "3575": 28107184128.0,
            "3580": 28107184128.0,
            "3585": 28107184128.0,
            "3590": 28107184128.0,
            "3595": 28107184128.0,
            "3600": 28107184128.0,
            "3605": 28107184128.0,
            "3610": 28107184128.0,
            "3615": 28107184128.0,
            "3620": 28107184128.0,
            "3625": 28107184128.0,
            "3630": 28107184128.0,
            "3635": 28107184128.0,
            "3640": 28107184128.0,
            "3645": 28107184128.0,
            "3650": 28107184128.0,
            "3655": 28107184128.0,
            "3660": 28107184128.0,
            "3665": 28107184128.0,
            "3670": 28107184128.0,
            "3675": 28107184128.0,
            "3680": 28107184128.0,
            "3685": 28107184128.0,
            "3690": 28107184128.0,
            "3695": 28107184128.0,
            "3700": 28107184128.0,
            "3705": 28107184128.0,
            "3710": 28107184128.0,
            "3715": 28107184128.0,
            "3720": 28107184128.0,
            "3725": 28107184128.0,
            "3730": 28107184128.0,
            "3735": 28107184128.0,
            "3740": 28107184128.0,
            "3745": 28107184128.0,
            "3750": 28107184128.0,
            "3755": 28107184128.0,
            "3760": 28107184128.0,
            "3765": 28107184128.0,
            "3770": 28107184128.0,
            "3775": 28107184128.0,
            "3780": 28107184128.0,
            "3785": 28107184128.0,
            "3790": 28107184128.0,
            "3795": 28107184128.0,
            "3800": 28107184128.0,
            "3805": 28107184128.0,
            "3810": 28107184128.0,
            "3815": 28107184128.0,
            "3820": 28107184128.0,
            "3825": 28107184128.0,
            "3830": 28107184128.0,
            "3835": 28107184128.0,
            "3840": 28107184128.0,
            "3845": 28107184128.0,
            "3850": 28107184128.0,
            "3855": 28107184128.0,
            "3860": 28107184128.0,
            "3865": 28107184128.0,
            "3870": 28107184128.0,
            "3875": 28107184128.0,
            "3880": 28107184128.0,
            "3885": 28107184128.0,
            "3890": 28107184128.0,
            "3895": 28107184128.0,
            "3900": 28107184128.0,
            "3905": 28107184128.0,
            "3910": 28107184128.0,
            "3915": 28107184128.0,
            "3920": 28107184128.0,
            "3925": 28107184128.0,
            "3930": 28107184128.0,
            "3935": 28107184128.0,
            "3940": 28107184128.0,
            "3945": 28107184128.0,
            "3950": 28107184128.0,
            "3955": 28107184128.0,
            "3960": 28107184128.0,
            "3965": 28107184128.0,
            "3970": 28107184128.0,
            "3975": 28107184128.0,
            "3980": 28107184128.0,
            "3985": 28107184128.0,
            "3990": 28107184128.0,
            "3995": 28107184128.0,
            "4000": 28107184128.0,
            "4005": 28107184128.0,
            "4010": 28107184128.0,
            "4015": 28107184128.0,
            "4020": 28107184128.0,
            "4025": 28107184128.0,
            "4030": 28107184128.0,
            "4035": 28107184128.0,
            "4040": 28107184128.0,
            "4045": 28107184128.0,
            "4050": 28107184128.0,
            "4055": 28107184128.0,
            "4060": 28107184128.0,
            "4065": 28107184128.0,
            "4070": 28107184128.0,
            "4075": 28107184128.0,
            "4080": 28107184128.0,
            "4085": 28107184128.0,
            "4090": 28107184128.0,
            "4095": 28107184128.0,
            "4100": 28107184128.0,
            "4105": 28107184128.0,
            "4110": 28107184128.0,
            "4115": 28107184128.0,
            "4120": 28107184128.0,
            "4125": 28107184128.0,
            "4130": 28107184128.0,
            "4135": 28107184128.0,
            "4140": 28107184128.0,
            "4145": 28107184128.0,
            "4150": 28107184128.0,
            "4155": 28107184128.0,
            "4160": 28107184128.0,
            "4165": 28107184128.0,
            "4170": 28107184128.0,
            "4175": 28107184128.0,
            "4180": 28107184128.0,
            "4185": 28107184128.0,
            "4190": 28107184128.0,
            "4195": 28107184128.0,
            "4200": 28107184128.0,
            "4205": 28107184128.0,
            "4210": 28107184128.0,
            "4215": 28107184128.0,
            "4220": 28107184128.0,
            "4225": 28107184128.0,
            "4230": 28107184128.0,
            "4235": 28107184128.0,
            "4240": 28107184128.0,
            "4245": 28107184128.0,
            "4250": 28107184128.0,
            "4255": 28107184128.0,
            "4260": 28107184128.0,
            "4265": 28107184128.0,
            "4270": 28107184128.0,
            "4275": 28107184128.0,
            "4280": 28107184128.0,
            "4285": 28107184128.0,
            "4290": 28107184128.0,
            "4295": 28107184128.0,
            "4300": 28107184128.0,
            "4305": 28107184128.0,
            "4310": 28107184128.0,
            "4315": 28107184128.0,
            "4320": 28107184128.0,
            "4325": 28107184128.0,
            "4330": 28107184128.0,
            "4335": 28107184128.0,
            "4340": 28107184128.0,
            "4345": 28107184128.0,
            "4350": 28107184128.0,
            "4355": 28107184128.0,
            "4360": 28107184128.0,
            "4365": 28107184128.0,
            "4370": 28107184128.0,
            "4375": 28107184128.0,
            "4380": 28107184128.0,
            "4385": 28107184128.0,
            "4390": 28107184128.0,
            "4395": 28107184128.0,
            "4400": 28107184128.0,
            "4405": 28107184128.0,
            "4410": 28107184128.0,
            "4415": 28107184128.0,
            "4420": 28107184128.0,
            "4425": 28107184128.0,
            "4430": 28107184128.0,
            "4435": 28107184128.0,
            "4440": 28107184128.0,
            "4445": 28107184128.0,
            "4450": 28107184128.0,
            "4455": 28107184128.0,
            "4460": 28107184128.0,
            "4465": 28107184128.0,
            "4470": 28107184128.0,
            "4475": 28107184128.0,
            "4480": 28107184128.0,
            "4485": 28107184128.0,
            "4490": 28107184128.0,
            "4495": 28107184128.0,
            "4500": 28107184128.0,
            "4505": 28107184128.0,
            "4510": 28107184128.0,
            "4515": 28107184128.0,
            "4520": 28107184128.0,
            "4525": 28107184128.0,
            "4530": 28107184128.0,
            "4535": 28107184128.0,
            "4540": 28107184128.0,
            "4545": 28107184128.0,
            "4550": 28107184128.0,
            "4555": 28107184128.0,
            "4560": 28107184128.0,
            "4565": 28107184128.0,
            "4570": 28107184128.0,
            "4575": 28107184128.0,
            "4580": 28107184128.0,
            "4585": 28107184128.0,
            "4590": 28107184128.0,
            "4595": 28107184128.0,
            "4600": 28107184128.0,
            "4605": 28107184128.0,
            "4610": 28107184128.0,
            "4615": 28107184128.0,
            "4620": 28107184128.0,
            "4625": 28107184128.0,
            "4630": 28107184128.0,
            "4635": 28107184128.0,
            "4640": 28107184128.0,
            "4645": 28107184128.0,
            "4650": 28107184128.0,
            "4655": 28107184128.0,
            "4660": 28107184128.0,
            "4665": 28107184128.0,
            "4670": 28107184128.0,
            "4675": 28107184128.0,
            "4680": 28107184128.0,
            "4685": 28107184128.0,
            "4690": 28107184128.0,
            "4695": 28107184128.0,
            "4700": 28107184128.0,
            "4705": 28107184128.0,
            "4710": 28107184128.0,
            "4715": 28107184128.0,
            "4720": 28107184128.0,
            "4725": 28107184128.0,
            "4730": 28107184128.0,
            "4735": 28107184128.0,
            "4740": 28107184128.0,
            "4745": 28107184128.0,
            "4750": 28107184128.0,
            "4755": 28107184128.0,
            "4760": 28107184128.0,
            "4765": 28107184128.0,
            "4770": 28107184128.0,
            "4775": 28107184128.0,
            "4780": 28107184128.0,
            "4785": 28107184128.0,
            "4790": 28107184128.0,
            "4795": 28107184128.0,
            "4800": 28107184128.0,
            "4805": 28107184128.0,
            "4810": 28107184128.0,
            "4815": 28107184128.0,
            "4820": 28107184128.0,
            "4825": 28107184128.0,
            "4830": 28107184128.0,
            "4835": 28107184128.0,
            "4840": 28107184128.0,
            "4845": 28107184128.0,
            "4850": 28107184128.0,
            "4855": 28107184128.0,
            "4860": 28107184128.0,
            "4865": 28107184128.0,
            "4870": 28107184128.0,
            "4875": 28107184128.0,
            "4880": 28107184128.0,
            "4885": 28107184128.0,
            "4890": 28107184128.0,
            "4895": 28107184128.0,
            "4900": 28107184128.0,
            "4905": 28107184128.0,
            "4910": 28107184128.0,
            "4915": 28107184128.0,
            "4920": 28107184128.0,
            "4925": 28107184128.0,
            "4930": 28107184128.0,
            "4935": 28107184128.0,
            "4940": 28107184128.0,
            "4945": 28107184128.0,
            "4950": 28107184128.0,
            "4955": 28107184128.0,
            "4960": 28107184128.0,
            "4965": 28107184128.0,
            "4970": 28107184128.0,
            "4975": 28107184128.0,
            "4980": 28107184128.0,
            "4985": 28107184128.0,
            "4990": 28107184128.0,
            "4995": 28107184128.0,
            "5000": 28107184128.0,
            "5005": 28107184128.0,
            "5010": 28107184128.0,
            "5015": 28107184128.0,
            "5020": 28107184128.0,
            "5025": 28107184128.0,
            "5030": 28107184128.0,
            "5035": 28107184128.0,
            "5040": 28107184128.0,
            "5045": 28107184128.0,
            "5050": 28107184128.0,
            "5055": 28107184128.0,
            "5060": 28107184128.0,
            "5065": 28107184128.0,
            "5070": 28107184128.0,
            "5075": 28107184128.0,
            "5080": 28107184128.0,
            "5085": 28107184128.0,
            "5090": 28107184128.0,
            "5095": 28107184128.0,
            "5100": 28107184128.0,
            "5105": 28107184128.0,
            "5110": 28107184128.0,
            "5115": 28107184128.0,
            "5120": 28107184128.0,
            "5125": 28107184128.0,
            "5130": 28107184128.0,
            "5135": 28107184128.0,
            "5140": 28107184128.0,
            "5145": 28107184128.0,
            "5150": 28107184128.0,
            "5155": 28107184128.0,
            "5160": 28107184128.0,
            "5165": 28107184128.0,
            "5170": 28107184128.0,
            "5175": 28107184128.0,
            "5180": 28107184128.0,
            "5185": 28107184128.0,
            "5190": 28107184128.0,
            "5195": 28107184128.0,
            "5200": 28107184128.0,
            "5205": 28107184128.0,
            "5210": 28107184128.0,
            "5215": 28107184128.0,
            "5220": 28107184128.0,
            "5225": 28107184128.0,
            "5230": 28107184128.0,
            "5235": 28107184128.0,
            "5240": 28107184128.0,
            "5245": 28107184128.0,
            "5250": 28107184128.0,
            "5255": 28107184128.0,
            "5260": 28107184128.0,
            "5265": 28107184128.0,
            "5270": 28107184128.0,
            "5275": 28107184128.0,
            "5280": 28107184128.0,
            "5285": 28107184128.0,
            "5290": 28107184128.0,
            "5295": 28107184128.0,
            "5300": 28107184128.0,
            "5305": 28107184128.0,
            "5310": 28107184128.0,
            "5315": 28107184128.0,
            "5320": 28107184128.0,
            "5325": 28107184128.0,
            "5330": 28107184128.0,
            "5335": 28107184128.0,
            "5340": 28107184128.0,
            "5345": 28107184128.0,
            "5350": 28107184128.0,
            "5355": 28107184128.0,
            "5360": 28107184128.0,
            "5365": 28107184128.0,
            "5370": 28107184128.0,
            "5375": 28107184128.0,
            "5380": 28107184128.0,
            "5385": 28107184128.0,
            "5390": 28107184128.0,
            "5395": 28107184128.0,
            "5400": 28107184128.0,
            "5405": 28107184128.0,
            "5410": 28107184128.0,
            "5415": 28107184128.0,
            "5420": 28107184128.0,
            "5425": 28107184128.0,
            "5430": 28107184128.0,
            "5435": 28107184128.0,
            "5440": 28107184128.0,
            "5445": 28107184128.0,
            "5450": 28107184128.0,
            "5455": 28107184128.0,
            "5460": 28107184128.0,
            "5465": 28107184128.0,
            "5470": 28107184128.0,
            "5475": 28107184128.0,
            "5480": 28107184128.0,
            "5485": 28107184128.0,
            "5490": 28107184128.0,
            "5495": 28107184128.0,
            "5500": 28107184128.0,
            "5505": 28107184128.0,
            "5510": 28107184128.0,
            "5515": 28107184128.0,
            "5520": 28107184128.0,
            "5525": 28107184128.0,
            "5530": 28107184128.0,
            "5535": 28107184128.0,
            "5540": 28107184128.0,
            "5545": 28107184128.0,
            "5550": 28107184128.0,
            "5555": 28107184128.0,
            "5560": 28107184128.0,
            "5565": 28107184128.0,
            "5570": 28107184128.0,
            "5575": 28107184128.0,
            "5580": 28107184128.0,
            "5585": 28107184128.0,
            "5590": 28107184128.0,
            "5595": 28107184128.0,
            "5600": 28107184128.0,
            "5605": 28107184128.0,
            "5610": 28107184128.0,
            "5615": 28107184128.0,
            "5620": 28107184128.0,
            "5625": 28107184128.0,
            "5630": 28107184128.0,
            "5635": 28107184128.0,
            "5640": 28107184128.0,
            "5645": 28107184128.0,
            "5650": 28107184128.0,
            "5655": 28107184128.0,
            "5660": 28107184128.0,
            "5665": 28107184128.0,
            "5670": 28107184128.0,
            "5675": 28107184128.0,
            "5680": 28107184128.0,
            "5685": 28107184128.0,
            "5690": 28107184128.0,
            "5695": 28107184128.0,
            "5700": 28107184128.0,
            "5705": 28107184128.0,
            "5710": 28107184128.0,
            "5715": 28107184128.0,
            "5720": 28107184128.0,
            "5725": 28107184128.0,
            "5730": 28107184128.0,
            "5735": 28107184128.0,
            "5740": 28107184128.0,
            "5745": 28107184128.0,
            "5750": 28107184128.0,
            "5755": 28107184128.0,
            "5760": 28107184128.0,
            "5765": 28107184128.0,
            "5770": 28107184128.0,
            "5775": 28107184128.0,
            "5780": 28107184128.0,
            "5785": 28107184128.0,
            "5790": 28107184128.0,
            "5795": 28107184128.0,
            "5800": 28107184128.0,
            "5805": 28107184128.0,
            "5810": 28107184128.0,
            "5815": 28107184128.0,
            "5820": 28107184128.0,
            "5825": 28107184128.0,
            "5830": 28107184128.0,
            "5835": 28107184128.0,
            "5840": 28107184128.0,
            "5845": 28107184128.0,
            "5850": 28107184128.0,
            "5855": 28107184128.0,
            "5860": 28107184128.0,
            "5865": 28107184128.0,
            "5870": 28107184128.0,
            "5875": 28107184128.0,
            "5880": 28107184128.0,
            "5885": 28107184128.0,
            "5890": 28107184128.0,
            "5895": 28107184128.0,
            "5900": 28107184128.0,
            "5905": 28107184128.0,
            "5910": 28107184128.0,
            "5915": 28107184128.0,
            "5920": 28107184128.0,
            "5925": 28107184128.0,
            "5930": 28107184128.0,
            "5935": 28107184128.0,
            "5940": 28107184128.0,
            "5945": 28107184128.0,
            "5950": 28107184128.0,
            "5955": 28107184128.0,
            "5960": 28107184128.0,
            "5965": 28107184128.0,
            "5970": 28107184128.0,
            "5975": 28107184128.0,
            "5980": 28107184128.0,
            "5985": 28107184128.0,
            "5990": 28107184128.0,
            "5995": 28107184128.0,
            "6000": 28107184128.0,
            "6005": 28107184128.0,
            "6010": 28107184128.0,
            "6015": 28107184128.0,
            "6020": 28107184128.0,
            "6025": 28107184128.0,
            "6030": 28107184128.0,
            "6035": 28107184128.0,
            "6040": 28107184128.0,
            "6045": 28107184128.0,
            "6050": 28107184128.0,
            "6055": 28107184128.0,
            "6060": 28107184128.0,
            "6065": 28107184128.0,
            "6070": 28107184128.0,
            "6075": 28107184128.0,
            "6080": 28107184128.0,
            "6085": 28107184128.0,
            "6090": 28107184128.0,
            "6095": 28107184128.0,
            "6100": 28107184128.0,
            "6105": 28107184128.0,
            "6110": 28107184128.0,
            "6115": 28107184128.0,
            "6120": 28107184128.0,
            "6125": 28107184128.0,
            "6130": 28107184128.0,
            "6135": 28107184128.0,
            "6140": 28107184128.0,
            "6145": 28107184128.0,
            "6150": 28107184128.0,
            "6155": 28107184128.0,
            "6160": 28107184128.0,
            "6165": 28107184128.0,
            "6170": 28107184128.0,
            "6175": 28107184128.0,
            "6180": 28107184128.0,
            "6185": 28107184128.0,
            "6190": 28107184128.0,
            "6195": 28107184128.0,
            "6200": 28107184128.0,
            "6205": 28107184128.0,
            "6210": 28107184128.0,
            "6215": 28107184128.0,
            "6220": 28107184128.0,
            "6225": 28107184128.0,
            "6230": 28107184128.0,
            "6235": 28107184128.0,
            "6240": 28107184128.0,
            "6245": 28107184128.0,
            "6250": 28107184128.0,
            "6255": 28107184128.0,
            "6260": 28107184128.0,
            "6265": 28107184128.0,
            "6270": 28107184128.0,
            "6275": 28107184128.0,
            "6280": 28107184128.0,
            "6285": 28107184128.0,
            "6290": 28107184128.0,
            "6295": 28107184128.0,
            "6300": 28107184128.0,
            "6305": 28107184128.0,
            "6310": 28107184128.0,
            "6315": 28107184128.0,
            "6320": 28107184128.0,
            "6325": 28107184128.0,
            "6330": 28107184128.0,
            "6335": 28107184128.0,
            "6340": 28107184128.0,
            "6345": 28107184128.0,
            "6350": 28107184128.0,
            "6355": 28107184128.0,
            "6360": 28107184128.0,
            "6365": 28107184128.0,
            "6370": 28107184128.0,
            "6375": 28107184128.0,
            "6380": 28107184128.0,
            "6385": 28107184128.0,
            "6390": 28107184128.0,
            "6395": 28107184128.0,
            "6400": 28107184128.0,
            "6405": 28107184128.0,
            "6410": 28107184128.0,
            "6415": 28107184128.0,
            "6420": 28107184128.0,
            "6425": 28107184128.0,
            "6430": 28107184128.0,
            "6435": 28107184128.0,
            "6440": 28107184128.0,
            "6445": 28107184128.0,
            "6450": 28107184128.0,
            "6455": 28107184128.0,
            "6460": 28107184128.0,
            "6465": 28107184128.0,
            "6470": 28107184128.0,
            "6475": 28107184128.0,
            "6480": 28107184128.0,
            "6485": 28107184128.0,
            "6490": 28107184128.0,
            "6495": 28107184128.0,
            "6500": 28107184128.0,
            "6505": 28107184128.0,
            "6510": 28107184128.0,
            "6515": 28107184128.0,
            "6520": 28107184128.0,
            "6525": 28107184128.0,
            "6530": 28107184128.0,
            "6535": 28107184128.0,
            "6540": 28107184128.0,
            "6545": 28107184128.0,
            "6550": 28107184128.0,
            "6555": 28107184128.0,
            "6560": 28107184128.0,
            "6565": 28107184128.0,
            "6570": 28107184128.0,
            "6575": 28107184128.0,
            "6580": 28107184128.0,
            "6585": 28107184128.0,
            "6590": 28107184128.0,
            "6595": 28107184128.0,
            "6600": 28107184128.0,
            "6605": 28107184128.0,
            "6610": 28107184128.0,
            "6615": 28107184128.0,
            "6620": 28107184128.0,
            "6625": 28107184128.0,
            "6630": 28107184128.0,
            "6635": 28107184128.0,
            "6640": 28107184128.0,
            "6645": 28107184128.0,
            "6650": 28107184128.0,
            "6655": 28107184128.0,
            "6660": 28107184128.0,
            "6665": 28107184128.0,
            "6670": 28107184128.0,
            "6675": 28107184128.0,
            "6680": 28107184128.0,
            "6685": 28107184128.0,
            "6690": 28107184128.0,
            "6695": 28107184128.0,
            "6700": 28107184128.0,
            "6705": 28107184128.0,
            "6710": 28107184128.0,
            "6715": 28107184128.0,
            "6720": 28107184128.0,
            "6725": 28107184128.0,
            "6730": 28107184128.0,
            "6735": 28107184128.0,
            "6740": 28107184128.0,
            "6745": 28107184128.0,
            "6750": 28107184128.0,
            "6755": 28107184128.0,
            "6760": 28107184128.0,
            "6765": 28107184128.0,
            "6770": 28107184128.0,
            "6775": 28107184128.0,
            "6780": 28107184128.0,
            "6785": 28107184128.0,
            "6790": 28107184128.0,
            "6795": 28107184128.0,
            "6800": 28107184128.0,
            "6805": 28107184128.0,
            "6810": 28107184128.0,
            "6815": 28107184128.0,
            "6820": 28107184128.0,
            "6825": 28107184128.0,
            "6830": 28107184128.0,
            "6835": 28107184128.0,
            "6840": 28107184128.0,
            "6845": 28107184128.0,
            "6850": 28107184128.0,
            "6855": 28107184128.0,
            "6860": 28107184128.0,
            "6865": 28107184128.0,
            "6870": 28107184128.0,
            "6875": 28107184128.0,
            "6880": 28107184128.0,
            "6885": 28107184128.0,
            "6890": 28107184128.0,
            "6895": 28107184128.0,
            "6900": 28107184128.0,
            "6905": 28107184128.0,
            "6910": 28107184128.0,
            "6915": 28107184128.0,
            "6920": 28107184128.0,
            "6925": 28107184128.0,
            "6930": 28107184128.0,
            "6935": 28107184128.0,
            "6940": 28107184128.0,
            "6945": 28107184128.0,
            "6950": 28107184128.0,
            "6955": 28107184128.0,
            "6960": 28107184128.0,
            "6965": 28107184128.0,
            "6970": 28107184128.0,
            "6975": 28107184128.0,
            "6980": 28107184128.0,
            "6985": 28107184128.0,
            "6990": 28107184128.0,
            "6995": 28107184128.0,
            "7000": 28107184128.0,
            "7005": 28107184128.0,
            "7010": 28107184128.0,
            "7015": 28107184128.0,
            "7020": 28107184128.0,
            "7025": 28107184128.0,
            "7030": 28107184128.0,
            "7035": 28107184128.0,
            "7040": 28107184128.0,
            "7045": 28107184128.0,
            "7050": 28107184128.0,
            "7055": 28107184128.0,
            "7060": 28107184128.0,
            "7065": 28107184128.0,
            "7070": 28107184128.0,
            "7075": 28107184128.0,
            "7080": 28107184128.0,
            "7085": 28107184128.0,
            "7090": 28107184128.0,
            "7095": 28107184128.0,
            "7100": 28107184128.0,
            "7105": 28107184128.0,
            "7110": 28107184128.0,
            "7115": 28107184128.0,
            "7120": 28107184128.0,
            "7125": 28107184128.0,
            "7130": 28107184128.0,
            "7135": 28107184128.0,
            "7140": 28107184128.0,
            "7145": 28107184128.0,
            "7150": 28107184128.0,
            "7155": 28107184128.0,
            "7160": 28107184128.0,
            "7165": 28107184128.0,
            "7170": 28107184128.0,
            "7175": 28107184128.0,
            "7180": 28107184128.0,
            "7185": 28107184128.0,
            "7190": 28107184128.0,
            "7195": 28107184128.0,
            "7200": 28107184128.0,
            "7205": 28107184128.0,
            "7210": 28107184128.0,
            "7215": 28107184128.0,
            "7220": 28107184128.0,
            "7225": 28107184128.0,
            "7230": 28107184128.0,
            "7235": 28107184128.0,
            "7240": 28107184128.0,
            "7245": 28107184128.0,
            "7250": 28107184128.0,
            "7255": 28107184128.0,
            "7260": 28107184128.0,
            "7265": 28107184128.0,
            "7270": 28107184128.0,
            "7275": 28107184128.0,
            "7280": 28107184128.0,
            "7285": 28107184128.0,
            "7290": 28107184128.0,
            "7295": 28107184128.0,
            "7300": 28107184128.0,
            "7305": 28107184128.0,
            "7310": 28107184128.0,
            "7315": 28107184128.0,
            "7320": 28107184128.0,
            "7325": 28107184128.0,
            "7330": 28107184128.0,
            "7335": 28107184128.0,
            "7340": 28107184128.0,
            "7345": 28107184128.0,
            "7350": 28107184128.0,
            "7355": 28107184128.0,
            "7360": 28107184128.0,
            "7365": 28107184128.0,
            "7370": 28107184128.0,
            "7375": 28107184128.0,
            "7380": 28107184128.0,
            "7385": 28107184128.0,
            "7390": 28107184128.0,
            "7395": 28107184128.0,
            "7400": 28107184128.0,
            "7405": 28107184128.0,
            "7410": 28107184128.0,
            "7415": 28107184128.0,
            "7420": 28107184128.0,
            "7425": 28107184128.0,
            "7430": 28107184128.0,
            "7435": 28107184128.0,
            "7440": 28107184128.0,
            "7445": 28107184128.0,
            "7450": 28107184128.0,
            "7455": 28107184128.0,
            "7460": 28107184128.0,
            "7465": 28107184128.0,
            "7470": 28107184128.0,
            "7475": 28107184128.0,
            "7480": 28107184128.0,
            "7485": 28107184128.0,
            "7490": 28107184128.0,
            "7495": 28107184128.0,
            "7500": 28107184128.0,
            "7505": 28107184128.0,
            "7510": 28107184128.0,
            "7515": 28107184128.0,
            "7520": 28107184128.0,
            "7525": 28107184128.0,
            "7530": 28107184128.0,
            "7535": 28107184128.0,
            "7540": 28107184128.0,
            "7545": 28107184128.0,
            "7550": 28107184128.0,
            "7555": 28107184128.0,
            "7560": 28107184128.0,
            "7565": 28107184128.0,
            "7570": 28107184128.0,
            "7575": 28107184128.0,
            "7580": 28107184128.0,
            "7585": 28107184128.0,
            "7590": 28107184128.0,
            "7595": 28107184128.0,
            "7600": 28107184128.0,
            "7605": 28107184128.0,
            "7610": 28107184128.0,
            "7615": 28107184128.0,
            "7620": 28107184128.0,
            "7625": 28107184128.0,
            "7630": 28107184128.0,
            "7635": 28107184128.0,
            "7640": 28107184128.0,
            "7645": 28107184128.0,
            "7650": 28107184128.0,
            "7655": 28107184128.0,
            "7660": 28107184128.0,
            "7665": 28107184128.0,
            "7670": 28107184128.0,
            "7675": 28107184128.0,
            "7680": 28107184128.0,
            "7685": 28107184128.0,
            "7690": 28107184128.0,
            "7695": 28107184128.0,
            "7700": 28107184128.0,
            "7705": 28107184128.0,
            "7710": 28107184128.0,
            "7715": 28107184128.0,
            "7720": 28107184128.0,
            "7725": 28107184128.0,
            "7730": 28107184128.0,
            "7735": 28107184128.0,
            "7740": 28107184128.0,
            "7745": 28107184128.0,
            "7750": 28107184128.0,
            "7755": 28107184128.0,
            "7760": 28107184128.0,
            "7765": 28107184128.0,
            "7770": 28107184128.0,
            "7775": 28107184128.0,
            "7780": 28107184128.0,
            "7785": 28107184128.0,
            "7790": 28107184128.0,
            "7795": 28107184128.0,
            "7800": 28107184128.0,
            "7805": 28107184128.0,
            "7810": 28107184128.0,
            "7815": 28107184128.0,
            "7820": 28107184128.0,
            "7825": 28107184128.0,
            "7830": 28107184128.0,
            "7835": 28107184128.0,
            "7840": 28107184128.0,
            "7845": 28107184128.0,
            "7850": 28107184128.0,
            "7855": 28107184128.0,
            "7860": 28107184128.0,
            "7865": 28107184128.0,
            "7870": 28107184128.0,
            "7875": 28107184128.0,
            "7880": 28107184128.0,
            "7885": 28107184128.0,
            "7890": 28107184128.0,
            "7895": 28107184128.0,
            "7900": 28107184128.0,
            "7905": 28107184128.0,
            "7910": 28107184128.0,
            "7915": 28107184128.0,
            "7920": 28107184128.0,
            "7925": 28107184128.0,
            "7930": 28107184128.0,
            "7935": 28107184128.0,
            "7940": 28107184128.0,
            "7945": 28107184128.0,
            "7950": 28107184128.0,
            "7955": 28107184128.0,
            "7960": 28107184128.0,
            "7965": 28107184128.0,
            "7970": 28107184128.0,
            "7975": 28107184128.0,
            "7980": 28107184128.0,
            "7985": 28107184128.0,
            "7990": 28107184128.0,
            "7995": 28107184128.0,
            "8000": 28107184128.0,
            "8005": 28107184128.0,
            "8010": 28107184128.0,
            "8015": 28107184128.0,
            "8020": 28107184128.0,
            "8025": 28107184128.0,
            "8030": 28107184128.0,
            "8035": 28107184128.0,
            "8040": 28107184128.0,
            "8045": 28107184128.0,
            "8050": 28107184128.0,
            "8055": 28107184128.0,
            "8060": 28107184128.0,
            "8065": 28107184128.0,
            "8070": 28107184128.0,
            "8075": 28107184128.0,
            "8080": 28107184128.0,
            "8085": 28107184128.0,
            "8090": 28107184128.0,
            "8095": 28107184128.0,
            "8100": 28107184128.0,
            "8105": 28107184128.0,
            "8110": 28107184128.0,
            "8115": 28107184128.0,
            "8120": 28107184128.0,
            "8125": 28107184128.0,
            "8130": 28107184128.0,
            "8135": 28107184128.0,
            "8140": 28107184128.0,
            "8145": 28107184128.0,
            "8150": 28107184128.0,
            "8155": 28107184128.0,
            "8160": 28107184128.0,
            "8165": 28107184128.0,
            "8170": 28107184128.0,
            "8175": 28107184128.0,
            "8180": 28107184128.0,
            "8185": 28107184128.0,
            "8190": 28107184128.0,
            "8195": 28107184128.0,
            "8200": 28107184128.0,
            "8205": 28107184128.0,
            "8210": 28107184128.0,
            "8215": 28107184128.0,
            "8220": 28107184128.0,
            "8225": 28107184128.0,
            "8230": 28107184128.0,
            "8235": 28107184128.0,
            "8240": 28107184128.0,
            "8245": 28107184128.0,
            "8250": 28107184128.0,
            "8255": 28107184128.0,
            "8260": 28107184128.0,
            "8265": 28107184128.0,
            "8270": 28107184128.0,
            "8275": 28107184128.0,
            "8280": 28107184128.0,
            "8285": 28107184128.0,
            "8290": 28107184128.0,
            "8295": 28107184128.0,
            "8300": 28107184128.0,
            "8305": 28107184128.0,
            "8310": 28107184128.0,
            "8315": 28107184128.0,
            "8320": 28107184128.0,
            "8325": 28107184128.0,
            "8330": 28107184128.0,
            "8335": 28107184128.0,
            "8340": 28107184128.0,
            "8345": 28107184128.0,
            "8350": 28107184128.0,
            "8355": 28107184128.0,
            "8360": 28107184128.0,
            "8365": 28107184128.0,
            "8370": 28107184128.0,
            "8375": 28107184128.0,
            "8380": 28107184128.0,
            "8385": 28107184128.0,
            "8390": 28107184128.0,
            "8395": 28107184128.0,
            "8400": 28107184128.0,
            "8405": 28107184128.0,
            "8410": 28107184128.0,
            "8415": 28107184128.0,
            "8420": 28107184128.0,
            "8425": 28107184128.0,
            "8430": 28107184128.0,
            "8435": 28107184128.0,
            "8440": 28107184128.0,
            "8445": 28107184128.0,
            "8450": 28107184128.0,
            "8455": 28107184128.0,
            "8460": 28107184128.0,
            "8465": 28107184128.0,
            "8470": 28107184128.0,
            "8475": 28107184128.0,
            "8480": 28107184128.0,
            "8485": 28107184128.0,
            "8490": 28107184128.0,
            "8495": 28107184128.0,
            "8500": 28107184128.0,
            "8505": 28107184128.0,
            "8510": 28107184128.0,
            "8515": 28107184128.0,
            "8520": 28107184128.0,
            "8525": 28107184128.0,
            "8530": 28107184128.0,
            "8535": 28107184128.0,
            "8540": 28107184128.0,
            "8545": 28107184128.0,
            "8550": 28107184128.0,
            "8555": 28107184128.0,
            "8560": 28107184128.0,
            "8565": 28107184128.0,
            "8570": 28107184128.0,
            "8575": 28107184128.0,
            "8580": 28107184128.0,
            "8585": 28107184128.0,
            "8590": 28107184128.0,
            "8595": 28107184128.0,
            "8600": 28107184128.0,
            "8605": 28107184128.0,
            "8610": 28107184128.0,
            "8615": 28107184128.0,
            "8620": 28107184128.0,
            "8625": 28107184128.0,
            "8630": 28107184128.0,
            "8635": 28107184128.0,
            "8640": 28107184128.0,
            "8645": 28107184128.0,
            "8650": 28107184128.0,
            "8655": 28107184128.0,
            "8660": 28107184128.0,
            "8665": 28107184128.0,
            "8670": 28107184128.0,
            "8675": 28107184128.0,
            "8680": 28107184128.0,
            "8685": 28107184128.0,
            "8690": 28107184128.0,
            "8695": 28107184128.0,
            "8700": 28107184128.0,
            "8705": 28107184128.0,
            "8710": 28107184128.0,
            "8715": 28107184128.0,
            "8720": 28107184128.0,
            "8725": 28107184128.0,
            "8730": 28107184128.0,
            "8735": 28107184128.0,
            "8740": 28107184128.0,
            "8745": 28107184128.0,
            "8750": 28107184128.0,
            "8755": 28107184128.0,
            "8760": 28107184128.0,
            "8765": 28107184128.0,
            "8770": 28107184128.0,
            "8775": 28107184128.0,
            "8780": 28107184128.0,
            "8785": 28107184128.0,
            "8790": 28107184128.0,
            "8795": 28107184128.0,
            "8800": 28107184128.0,
            "8805": 28107184128.0,
            "8810": 28107184128.0,
            "8815": 28107184128.0,
            "8820": 28107184128.0,
            "8825": 28107184128.0,
            "8830": 28107184128.0,
            "8835": 28107184128.0,
            "8840": 28107184128.0,
            "8845": 28107184128.0,
            "8850": 28107184128.0,
            "8855": 28107184128.0,
            "8860": 28107184128.0,
            "8865": 28107184128.0,
            "8870": 28107184128.0,
            "8875": 28107184128.0,
            "8880": 28107184128.0,
            "8885": 28107184128.0,
            "8890": 28107184128.0,
            "8895": 28107184128.0,
            "8900": 28107184128.0,
            "8905": 28107184128.0,
            "8910": 28107184128.0,
            "8915": 28107184128.0,
            "8920": 28107184128.0,
            "8925": 28107184128.0,
            "8930": 28107184128.0,
            "8935": 28107184128.0,
            "8940": 28107184128.0,
            "8945": 28107184128.0,
            "8950": 28107184128.0,
            "8955": 28107184128.0,
            "8960": 28107184128.0,
            "8965": 28107184128.0,
            "8970": 28107184128.0,
            "8975": 28107184128.0,
            "8980": 28107184128.0,
            "8985": 28107184128.0,
            "8990": 28107184128.0,
            "8995": 28107184128.0,
            "9000": 28107184128.0,
            "9005": 28107184128.0,
            "9010": 28107184128.0,
            "9015": 28107184128.0,
            "9020": 28107184128.0,
            "9025": 28107184128.0,
            "9030": 28107184128.0,
            "9035": 28107184128.0,
            "9040": 28107184128.0,
            "9045": 28107184128.0,
            "9050": 28107184128.0,
            "9055": 28107184128.0,
            "9060": 28107184128.0,
            "9065": 28107184128.0,
            "9070": 28107184128.0,
            "9075": 28107184128.0,
            "9080": 28107184128.0,
            "9085": 28107184128.0,
            "9090": 28107184128.0,
            "9095": 28107184128.0,
            "9100": 28107184128.0,
            "9105": 28107184128.0,
            "9110": 28107184128.0,
            "9115": 28107184128.0,
            "9120": 28107184128.0,
            "9125": 28107184128.0,
            "9130": 28107184128.0,
            "9135": 28107184128.0,
            "9140": 28107184128.0,
            "9145": 28107184128.0,
            "9150": 28107184128.0,
            "9155": 28107184128.0,
            "9160": 28107184128.0,
            "9165": 28107184128.0,
            "9170": 28107184128.0,
            "9175": 28107184128.0,
            "9180": 28107184128.0,
            "9185": 28107184128.0,
            "9190": 28107184128.0,
            "9195": 28107184128.0,
            "9200": 28107184128.0,
            "9205": 28107184128.0,
            "9210": 28107184128.0,
            "9215": 28107184128.0,
            "9220": 28107184128.0,
            "9225": 28107184128.0,
            "9230": 28107184128.0,
            "9235": 28107184128.0,
            "9240": 28107184128.0,
            "9245": 28107184128.0,
            "9250": 28107184128.0,
            "9255": 28107184128.0,
            "9260": 28107184128.0,
            "9265": 28107184128.0,
            "9270": 28107184128.0,
            "9275": 28107184128.0,
            "9280": 28107184128.0,
            "9285": 28107184128.0,
            "9290": 28107184128.0,
            "9295": 28107184128.0,
            "9300": 28107184128.0,
            "9305": 28107184128.0,
            "9310": 28107184128.0,
            "9315": 28107184128.0,
            "9320": 28107184128.0,
            "9325": 28107184128.0,
            "9330": 28107184128.0,
            "9335": 28107184128.0,
            "9340": 28107184128.0,
            "9345": 28107184128.0,
            "9350": 28107184128.0,
            "9355": 28107184128.0,
            "9360": 28107184128.0,
            "9365": 28107184128.0,
            "9370": 28107184128.0,
            "9375": 28107184128.0,
            "9380": 28107184128.0,
            "9385": 28107184128.0,
            "9390": 28107184128.0,
            "9395": 28107184128.0,
            "9400": 28107184128.0,
            "9405": 28107184128.0,
            "9410": 28107184128.0,
            "9415": 28107184128.0,
            "9420": 28107184128.0,
            "9425": 28107184128.0,
            "9430": 28107184128.0,
            "9435": 28107184128.0,
            "9440": 28107184128.0,
            "9445": 28107184128.0,
            "9450": 28107184128.0,
            "9455": 28107184128.0,
            "9460": 28107184128.0,
            "9465": 28107184128.0,
            "9470": 28107184128.0,
            "9475": 28107184128.0,
            "9480": 28107184128.0,
            "9485": 28107184128.0,
            "9490": 28107184128.0,
            "9495": 28107184128.0,
            "9500": 28107184128.0,
            "9505": 28107184128.0,
            "9510": 28107184128.0,
            "9515": 28107184128.0,
            "9520": 28107184128.0,
            "9525": 28107184128.0,
            "9530": 28107184128.0,
            "9535": 28107184128.0,
            "9540": 28107184128.0,
            "9545": 28107184128.0,
            "9550": 28107184128.0,
            "9555": 28107184128.0,
            "9560": 28107184128.0,
            "9565": 28107184128.0,
            "9570": 28107184128.0,
            "9575": 28107184128.0,
            "9580": 28107184128.0,
            "9585": 28107184128.0,
            "9590": 28107184128.0,
            "9595": 28107184128.0,
            "9600": 28107184128.0,
            "9605": 28107184128.0,
            "9610": 28107184128.0,
            "9615": 28107184128.0,
            "9620": 28107184128.0,
            "9625": 28107184128.0,
            "9630": 28107184128.0,
            "9635": 28107184128.0,
            "9640": 28107184128.0,
            "9645": 28107184128.0,
            "9650": 28107184128.0,
            "9655": 28107184128.0,
            "9660": 28107184128.0,
            "9665": 28107184128.0,
            "9670": 28107184128.0,
            "9675": 28107184128.0,
            "9680": 28107184128.0,
            "9685": 28107184128.0,
            "9690": 28107184128.0,
            "9695": 28107184128.0,
            "9700": 28107184128.0,
            "9705": 28107184128.0,
            "9710": 28107184128.0,
            "9715": 28107184128.0,
            "9720": 28107184128.0,
            "9725": 28107184128.0,
            "9730": 28107184128.0,
            "9735": 28107184128.0,
            "9740": 28107184128.0,
            "9745": 28107184128.0,
            "9750": 28107184128.0,
            "9755": 28107184128.0,
            "9760": 28107184128.0,
            "9765": 28107184128.0,
            "9770": 28107184128.0,
            "9775": 28107184128.0,
            "9780": 28107184128.0,
            "9785": 28107184128.0,
            "9790": 28107184128.0,
            "9795": 28107184128.0,
            "9800": 28107184128.0,
            "9805": 28107184128.0,
            "9810": 28107184128.0,
            "9815": 28107184128.0,
            "9820": 28107184128.0,
            "9825": 28107184128.0,
            "9830": 28107184128.0,
            "9835": 28107184128.0,
            "9840": 28107184128.0,
            "9845": 28107184128.0,
            "9850": 28107184128.0,
            "9855": 28107184128.0,
            "9860": 28107184128.0,
            "9865": 28107184128.0,
            "9870": 28107184128.0,
            "9875": 28107184128.0,
            "9880": 28107184128.0,
            "9885": 28107184128.0,
            "9890": 28107184128.0,
            "9895": 28107184128.0,
            "9900": 28107184128.0,
            "9905": 28107184128.0,
            "9910": 28107184128.0,
            "9915": 28107184128.0,
            "9920": 28107184128.0,
            "9925": 28107184128.0,
            "9930": 28107184128.0,
            "9935": 28107184128.0,
            "9940": 28107184128.0,
            "9945": 28107184128.0,
            "9950": 28107184128.0,
            "9955": 28107184128.0,
            "9960": 28107184128.0,
            "9965": 28107184128.0,
            "9970": 28107184128.0,
            "9975": 28107184128.0,
            "9980": 28107184128.0,
            "9985": 28107184128.0,
            "9990": 28107184128.0,
            "9995": 28107184128.0,
            "10000": 28107184128.0,
            "10005": 28107184128.0,
            "10010": 28107184128.0,
            "10015": 28107184128.0,
            "10020": 28107184128.0,
            "10025": 28107184128.0,
            "10030": 28107184128.0,
            "10035": 28107184128.0,
            "10040": 28107184128.0,
            "10045": 28107184128.0,
            "10050": 28107184128.0,
            "10055": 28107184128.0,
            "10060": 28107184128.0,
            "10065": 28107184128.0,
            "10070": 28107184128.0,
            "10075": 28107184128.0,
            "10080": 28107184128.0,
            "10085": 28107184128.0,
            "10090": 28107184128.0,
            "10095": 28107184128.0,
            "10100": 28107184128.0,
            "10105": 28107184128.0,
            "10110": 28107184128.0,
            "10115": 28107184128.0,
            "10120": 28107184128.0,
            "10125": 28107184128.0,
            "10130": 28107184128.0,
            "10135": 28107184128.0,
            "10140": 28107184128.0,
            "10145": 28107184128.0,
            "10150": 28107184128.0,
            "10155": 28107184128.0,
            "10160": 28107184128.0,
            "10165": 28107184128.0,
            "10170": 28107184128.0,
            "10175": 28107184128.0,
            "10180": 28107184128.0,
            "10185": 28107184128.0,
            "10190": 28107184128.0,
            "10195": 28107184128.0,
            "10200": 28107184128.0,
            "10205": 28107184128.0,
            "10210": 28107184128.0,
            "10215": 28107184128.0,
            "10220": 28107184128.0,
            "10225": 28107184128.0,
            "10230": 28107184128.0,
            "10235": 28107184128.0,
            "10240": 28107184128.0,
            "10245": 28107184128.0,
            "10250": 28107184128.0,
            "10255": 28107184128.0,
            "10260": 28107184128.0,
            "10265": 28107184128.0,
            "10270": 28107184128.0,
            "10275": 28107184128.0,
            "10280": 28107184128.0,
            "10285": 28107184128.0,
            "10290": 28107184128.0,
            "10295": 28107184128.0,
            "10300": 28107184128.0,
            "10305": 28107184128.0,
            "10310": 28107184128.0,
            "10315": 28107184128.0,
            "10320": 28107184128.0,
            "10325": 28107184128.0,
            "10330": 28107184128.0,
            "10335": 28107184128.0,
            "10340": 28107184128.0,
            "10345": 28107184128.0,
            "10350": 28107184128.0,
            "10355": 28107184128.0,
            "10360": 28107184128.0,
            "10365": 28107184128.0,
            "10370": 28107184128.0,
            "10375": 28107184128.0,
            "10380": 28107184128.0,
            "10385": 28107184128.0,
            "10390": 28107184128.0,
            "10395": 28107184128.0,
            "10400": 28107184128.0,
            "10405": 28107184128.0,
            "10410": 28107184128.0,
            "10415": 28107184128.0,
            "10420": 28107184128.0,
            "10425": 28107184128.0,
            "10430": 28107184128.0,
            "10435": 28107184128.0,
            "10440": 28107184128.0,
            "10445": 28107184128.0,
            "10450": 28107184128.0,
            "10455": 28107184128.0,
            "10460": 28107184128.0,
            "10465": 28107184128.0,
            "10470": 28107184128.0,
            "10475": 28107184128.0,
            "10480": 28107184128.0,
            "10485": 28107184128.0,
            "10490": 28107184128.0,
            "10495": 28107184128.0,
            "10500": 28107184128.0,
            "10505": 28107184128.0,
            "10510": 28107184128.0,
            "10515": 28107184128.0,
            "10520": 28107184128.0,
            "10525": 28107184128.0,
            "10530": 28107184128.0,
            "10535": 28107184128.0,
            "10540": 28107184128.0,
            "10545": 28107184128.0,
            "10550": 28107184128.0,
            "10555": 28107184128.0,
            "10560": 28107184128.0,
            "10565": 28107184128.0,
            "10570": 28107184128.0,
            "10575": 28107184128.0,
            "10580": 28107184128.0,
            "10585": 28107184128.0,
            "10590": 28107184128.0,
            "10595": 28107184128.0,
            "10600": 28107184128.0,
            "10605": 28107184128.0,
            "10610": 28107184128.0,
            "10615": 28107184128.0,
            "10620": 28107184128.0,
            "10625": 28107184128.0,
            "10630": 28107184128.0,
            "10635": 28107184128.0,
            "10640": 28107184128.0,
            "10645": 28107184128.0,
            "10650": 28107184128.0,
            "10655": 28107184128.0,
            "10660": 28107184128.0,
            "10665": 28107184128.0,
            "10670": 28107184128.0,
            "10675": 28107184128.0,
            "10680": 28107184128.0,
            "10685": 28107184128.0,
            "10690": 28107184128.0,
            "10695": 28107184128.0,
            "10700": 28107184128.0,
            "10705": 28107184128.0,
            "10710": 28107184128.0,
            "10715": 28107184128.0,
            "10720": 28107184128.0,
            "10725": 28107184128.0,
            "10730": 28107184128.0,
            "10735": 28107184128.0,
            "10740": 28107184128.0,
            "10745": 28107184128.0,
            "10750": 28107184128.0,
            "10755": 28107184128.0,
            "10760": 28107184128.0,
            "10765": 28107184128.0,
            "10770": 28107184128.0,
            "10775": 28107184128.0,
            "10780": 28107184128.0,
            "10785": 28107184128.0,
            "10790": 28107184128.0,
            "10795": 28107184128.0,
            "10800": 28107184128.0,
            "10805": 28107184128.0,
            "10810": 28107184128.0,
            "10815": 28107184128.0,
            "10820": 28107184128.0,
            "10825": 28107184128.0,
            "10830": 28107184128.0,
            "10835": 28107184128.0,
            "10840": 28107184128.0,
            "10845": 28107184128.0,
            "10850": 28107184128.0,
            "10855": 28107184128.0,
            "10860": 28107184128.0,
            "10865": 28107184128.0,
            "10870": 28107184128.0,
            "10875": 28107184128.0,
            "10880": 28107184128.0,
            "10885": 28107184128.0,
            "10890": 28107184128.0,
            "10895": 28107184128.0,
            "10900": 28107184128.0,
            "10905": 28107184128.0,
            "10910": 28107184128.0,
            "10915": 28107184128.0,
            "10920": 28107184128.0,
            "10925": 28107184128.0,
            "10930": 28107184128.0,
            "10935": 28107184128.0,
            "10940": 28107184128.0,
            "10945": 28107184128.0,
            "10950": 28107184128.0,
            "10955": 28107184128.0,
            "10960": 28107184128.0,
            "10965": 28107184128.0,
            "10970": 28107184128.0,
            "10975": 28107184128.0,
            "10980": 28107184128.0,
            "10985": 28107184128.0,
            "10990": 28107184128.0,
            "10995": 28107184128.0,
            "11000": 28107184128.0,
            "11005": 28107184128.0,
            "11010": 28107184128.0,
            "11015": 28107184128.0,
            "11020": 28107184128.0,
            "11025": 28107184128.0,
            "11030": 28107184128.0,
            "11035": 28107184128.0,
            "11040": 28107184128.0,
            "11045": 28107184128.0,
            "11050": 28107184128.0,
            "11055": 28107184128.0,
            "11060": 28107184128.0,
            "11065": 28107184128.0,
            "11070": 28107184128.0,
            "11075": 28107184128.0,
            "11080": 28107184128.0,
            "11085": 28107184128.0,
            "11090": 28107184128.0,
            "11095": 28107184128.0,
            "11100": 28107184128.0,
            "11105": 28107184128.0,
            "11110": 28107184128.0,
            "11115": 28107184128.0,
            "11120": 28107184128.0,
            "11125": 28107184128.0,
            "11130": 28107184128.0,
            "11135": 28107184128.0,
            "11140": 28107184128.0,
            "11145": 28107184128.0,
            "11150": 28107184128.0,
            "11155": 28107184128.0,
            "11160": 28107184128.0,
            "11165": 28107184128.0,
            "11170": 28107184128.0,
            "11175": 28107184128.0,
            "11180": 28107184128.0,
            "11185": 28107184128.0,
            "11190": 28107184128.0,
            "11195": 28107184128.0,
            "11200": 28107184128.0,
            "11205": 28107184128.0,
            "11210": 28107184128.0,
            "11215": 28107184128.0,
            "11220": 28107184128.0,
            "11225": 28107184128.0,
            "11230": 28107184128.0,
            "11235": 28107184128.0,
            "11240": 28107184128.0,
            "11245": 28107184128.0,
            "11250": 28107184128.0,
            "11255": 28107184128.0,
            "11260": 28107184128.0,
            "11265": 28107184128.0,
            "11270": 28107184128.0,
            "11275": 28107184128.0,
            "11280": 28107184128.0,
            "11285": 28107184128.0,
            "11290": 28107184128.0,
            "11295": 28107184128.0,
            "11300": 28107184128.0,
            "11305": 28107184128.0,
            "11310": 28107184128.0,
            "11315": 28107184128.0,
            "11320": 28107184128.0,
            "11325": 28107184128.0,
            "11330": 28107184128.0,
            "11335": 28107184128.0,
            "11340": 28107184128.0,
            "11345": 28107184128.0,
            "11350": 28107184128.0,
            "11355": 28107184128.0,
            "11360": 28107184128.0,
            "11365": 28107184128.0,
            "11370": 28107184128.0,
            "11375": 28107184128.0,
            "11380": 28107184128.0,
            "11385": 28107184128.0,
            "11390": 28107184128.0,
            "11395": 28107184128.0,
            "11400": 28107184128.0,
            "11405": 28107184128.0,
            "11410": 28107184128.0,
            "11415": 28107184128.0,
            "11420": 28107184128.0,
            "11425": 28107184128.0,
            "11430": 28107184128.0,
            "11435": 28107184128.0,
            "11440": 28107184128.0,
            "11445": 28107184128.0,
            "11450": 28107184128.0,
            "11455": 28107184128.0,
            "11460": 28107184128.0,
            "11465": 28107184128.0,
            "11470": 28107184128.0,
            "11475": 28107184128.0,
            "11480": 28107184128.0,
            "11485": 28107184128.0,
            "11490": 28107184128.0,
            "11495": 28107184128.0,
            "11500": 28107184128.0,
            "11505": 28107184128.0,
            "11510": 28107184128.0,
            "11515": 28107184128.0,
            "11520": 28107184128.0,
            "11525": 28107184128.0,
            "11530": 28107184128.0,
            "11535": 28107184128.0,
            "11540": 28107184128.0,
            "11545": 28107184128.0,
            "11550": 28107184128.0,
            "11555": 28107184128.0,
            "11560": 28107184128.0,
            "11565": 28107184128.0,
            "11570": 28107184128.0,
            "11575": 28107184128.0,
            "11580": 28107184128.0,
            "11585": 28107184128.0,
            "11590": 28107184128.0,
            "11595": 28107184128.0,
            "11600": 28107184128.0,
            "11605": 28107184128.0,
            "11610": 28107184128.0,
            "11615": 28107184128.0,
            "11620": 28107184128.0,
            "11625": 28107184128.0,
            "11630": 28107184128.0,
            "11635": 28107184128.0,
            "11640": 28107184128.0,
            "11645": 28107184128.0,
            "11650": 28107184128.0,
            "11655": 28107184128.0,
            "11660": 28107184128.0,
            "11665": 28107184128.0,
            "11670": 28107184128.0,
            "11675": 28107184128.0,
            "11680": 28107184128.0,
            "11685": 28107184128.0,
            "11690": 28107184128.0,
            "11695": 28107184128.0,
            "11700": 28107184128.0,
            "11705": 28107184128.0,
            "11710": 28107184128.0,
            "11715": 28107184128.0,
            "11720": 28107184128.0,
            "11725": 28107184128.0,
            "11730": 28107184128.0,
            "11735": 28107184128.0,
            "11740": 28107184128.0,
            "11745": 28107184128.0,
            "11750": 28107184128.0,
            "11755": 28107184128.0,
            "11760": 28107184128.0,
            "11765": 28107184128.0,
            "11770": 28107184128.0,
            "11775": 28107184128.0,
            "11780": 28107184128.0,
            "11785": 28107184128.0,
            "11790": 28107184128.0,
            "11795": 28107184128.0,
            "11800": 28107184128.0,
            "11805": 28107184128.0,
            "11810": 28107184128.0,
            "11815": 28107184128.0,
            "11820": 28107184128.0,
            "11825": 28107184128.0,
            "11830": 28107184128.0,
            "11835": 28107184128.0,
            "11840": 28107184128.0,
            "11845": 28107184128.0,
            "11850": 28107184128.0,
            "11855": 28107184128.0,
            "11860": 28107184128.0,
            "11865": 28107184128.0,
            "11870": 28107184128.0,
            "11875": 28107184128.0,
            "11880": 28107184128.0,
            "11885": 28107184128.0,
            "11890": 28107184128.0,
            "11895": 28107184128.0,
            "11900": 28107184128.0,
            "11905": 28107184128.0,
            "11910": 28107184128.0,
            "11915": 28107184128.0,
            "11920": 28107184128.0,
            "11925": 28107184128.0,
            "11930": 28107184128.0,
            "11935": 28107184128.0,
            "11940": 28107184128.0,
            "11945": 28107184128.0,
            "11950": 28107184128.0,
            "11955": 28107184128.0,
            "11960": 28107184128.0,
            "11965": 28107184128.0,
            "11970": 28107184128.0,
            "11975": 28107184128.0,
            "11980": 28107184128.0,
            "11985": 28107184128.0,
            "11990": 28107184128.0,
            "11995": 28107184128.0,
            "12000": 28107184128.0,
            "12005": 28107184128.0,
            "12010": 28107184128.0,
            "12015": 28107184128.0,
            "12020": 28107184128.0,
            "12025": 28107184128.0,
            "12030": 28107184128.0,
            "12035": 28107184128.0,
            "12040": 28107184128.0,
            "12045": 28107184128.0,
            "12050": 28107184128.0,
            "12055": 28107184128.0,
            "12060": 28107184128.0,
            "12065": 28107184128.0,
            "12070": 28107184128.0,
            "12075": 28107184128.0,
            "12080": 28107184128.0,
            "12085": 28107184128.0,
            "12090": 28107184128.0,
            "12095": 28107184128.0,
            "12100": 28107184128.0,
            "12105": 28107184128.0,
            "12110": 28107184128.0,
            "12115": 28107184128.0,
            "12120": 28107184128.0,
            "12125": 28107184128.0,
            "12130": 28107184128.0,
            "12135": 28107184128.0,
            "12140": 28107184128.0,
            "12145": 28107184128.0,
            "12150": 28107184128.0,
            "12155": 28107184128.0,
            "12160": 28107184128.0,
            "12165": 28107184128.0,
            "12170": 28107184128.0,
            "12175": 28107184128.0,
            "12180": 28107184128.0,
            "12185": 28107184128.0,
            "12190": 28107184128.0,
            "12195": 28107184128.0,
            "12200": 28107184128.0,
            "12205": 28107184128.0,
            "12210": 28107184128.0,
            "12215": 28107184128.0,
            "12220": 28107184128.0,
            "12225": 28107184128.0,
            "12230": 28107184128.0,
            "12235": 28107184128.0,
            "12240": 28107184128.0,
            "12245": 28107184128.0,
            "12250": 28107184128.0,
            "12255": 28107184128.0,
            "12260": 28107184128.0,
            "12265": 28107184128.0,
            "12270": 28107184128.0,
            "12275": 28107184128.0,
            "12280": 28107184128.0,
            "12285": 28107184128.0,
            "12290": 28107184128.0,
            "12295": 28107184128.0,
            "12300": 28107184128.0,
            "12305": 28107184128.0,
            "12310": 28107184128.0,
            "12315": 28107184128.0,
            "12320": 28107184128.0,
            "12325": 28107184128.0,
            "12330": 28107184128.0,
            "12335": 28107184128.0,
            "12340": 28107184128.0,
            "12345": 28107184128.0,
            "12350": 28107184128.0,
            "12355": 28107184128.0,
            "12360": 28107184128.0,
            "12365": 28107184128.0,
            "12370": 28107184128.0,
            "12375": 28107184128.0,
            "12380": 28107184128.0,
            "12385": 28107184128.0,
            "12390": 28107184128.0,
            "12395": 28107184128.0,
            "12400": 28107184128.0,
            "12405": 28107184128.0,
            "12410": 28107184128.0,
            "12415": 28107184128.0,
            "12420": 28107184128.0,
            "12425": 28107184128.0,
            "12430": 28107184128.0,
            "12435": 28107184128.0,
            "12440": 28107184128.0,
            "12445": 28107184128.0,
            "12450": 28107184128.0,
            "12455": 28107184128.0,
            "12460": 28107184128.0,
            "12465": 28107184128.0,
            "12470": 28107184128.0,
            "12475": 28107184128.0,
            "12480": 28107184128.0,
            "12485": 28107184128.0,
            "12490": 28107184128.0,
            "12495": 28107184128.0,
            "12500": 28107184128.0,
            "12505": 28107184128.0,
            "12510": 28107184128.0,
            "12515": 28107184128.0,
            "12520": 28107184128.0,
            "12525": 28107184128.0,
            "12530": 28107184128.0,
            "12535": 28107184128.0,
            "12540": 28107184128.0,
            "12545": 28107184128.0,
            "12550": 28107184128.0,
            "12555": 28107184128.0,
            "12560": 28107184128.0,
            "12565": 28107184128.0,
            "12570": 28107184128.0,
            "12575": 28107184128.0,
            "12580": 28107184128.0,
            "12585": 28107184128.0,
            "12590": 28107184128.0,
            "12595": 28107184128.0,
            "12600": 28107184128.0,
            "12605": 28107184128.0,
            "12610": 28107184128.0,
            "12615": 28107184128.0,
            "12620": 28107184128.0,
            "12625": 28107184128.0,
            "12630": 28107184128.0,
            "12635": 28107184128.0,
            "12640": 28107184128.0,
            "12645": 28107184128.0,
            "12650": 28107184128.0,
            "12655": 28107184128.0,
            "12660": 28107184128.0,
            "12665": 28107184128.0,
            "12670": 28107184128.0,
            "12675": 28107184128.0,
            "12680": 28107184128.0,
            "12685": 28107184128.0,
            "12690": 28107184128.0,
            "12695": 28107184128.0,
            "12700": 28107184128.0,
            "12705": 28107184128.0,
            "12710": 28107184128.0,
            "12715": 28107184128.0,
            "12720": "nan",
            "12725": "nan",
            "12730": "nan",
            "12735": "nan",
            "12740": "nan",
            "12745": "nan",
            "12750": "nan",
            "12755": "nan",
            "12760": "nan",
            "12765": "nan",
            "12770": "nan",
            "12775": "nan",
            "12780": "nan",
            "12785": "nan",
            "12790": "nan",
            "12795": "nan",
            "12800": "nan",
            "12805": "nan",
            "12810": "nan",
            "12815": "nan",
            "12820": "nan",
            "12825": "nan",
            "12830": "nan",
            "12835": "nan",
            "12840": "nan",
            "12845": "nan",
            "12850": "nan",
            "12855": "nan",
            "12860": "nan",
            "12865": "nan",
            "12870": "nan",
            "12875": "nan",
            "12880": "nan",
            "12885": "nan",
            "12890": "nan",
            "12895": "nan",
            "12900": "nan",
            "12905": "nan",
            "12910": "nan",
            "12915": "nan",
            "12920": "nan",
            "12925": "nan",
            "12930": "nan",
            "12935": "nan",
            "12940": "nan",
            "12945": "nan",
            "12950": "nan",
            "12955": "nan",
            "12960": "nan",
            "12965": "nan",
            "12970": "nan",
            "12975": "nan",
            "12980": "nan",
            "12985": "nan",
            "12990": "nan",
            "12995": "nan",
            "13000": "nan"
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 13000,
        "step_interval": 5,
        "values": {
            "1": "nan",
            "5": "nan",
            "10": "nan",
            "15": "nan",
            "20": "nan",
            "25": "nan",
            "30": "nan",
            "35": "nan",
            "40": "nan",
            "45": "nan",
            "50": "nan",
            "55": "nan",
            "60": "nan",
            "65": "nan",
            "70": "nan",
            "75": "nan",
            "80": "nan",
            "85": "nan",
            "90": "nan",
            "95": "nan",
            "100": 1.13737,
            "105": "nan",
            "110": "nan",
            "115": "nan",
            "120": "nan",
            "125": "nan",
            "130": "nan",
            "135": "nan",
            "140": "nan",
            "145": "nan",
            "150": "nan",
            "155": "nan",
            "160": "nan",
            "165": "nan",
            "170": "nan",
            "175": "nan",
            "180": "nan",
            "185": "nan",
            "190": "nan",
            "195": "nan",
            "200": 0.92163,
            "205": "nan",
            "210": "nan",
            "215": "nan",
            "220": "nan",
            "225": "nan",
            "230": "nan",
            "235": "nan",
            "240": "nan",
            "245": "nan",
            "250": "nan",
            "255": "nan",
            "260": "nan",
            "265": "nan",
            "270": "nan",
            "275": "nan",
            "280": "nan",
            "285": "nan",
            "290": "nan",
            "295": "nan",
            "300": 0.91017,
            "305": "nan",
            "310": "nan",
            "315": "nan",
            "320": "nan",
            "325": "nan",
            "330": "nan",
            "335": "nan",
            "340": "nan",
            "345": "nan",
            "350": "nan",
            "355": "nan",
            "360": "nan",
            "365": "nan",
            "370": "nan",
            "375": "nan",
            "380": "nan",
            "385": "nan",
            "390": "nan",
            "395": "nan",
            "400": 0.92356,
            "405": "nan",
            "410": "nan",
            "415": "nan",
            "420": "nan",
            "425": "nan",
            "430": "nan",
            "435": "nan",
            "440": "nan",
            "445": "nan",
            "450": "nan",
            "455": "nan",
            "460": "nan",
            "465": "nan",
            "470": "nan",
            "475": "nan",
            "480": "nan",
            "485": "nan",
            "490": "nan",
            "495": "nan",
            "500": 0.92399,
            "505": "nan",
            "510": "nan",
            "515": "nan",
            "520": "nan",
            "525": "nan",
            "530": "nan",
            "535": "nan",
            "540": "nan",
            "545": "nan",
            "550": "nan",
            "555": "nan",
            "560": "nan",
            "565": "nan",
            "570": "nan",
            "575": "nan",
            "580": "nan",
            "585": "nan",
            "590": "nan",
            "595": "nan",
            "600": 0.91892,
            "605": "nan",
            "610": "nan",
            "615": "nan",
            "620": "nan",
            "625": "nan",
            "630": "nan",
            "635": "nan",
            "640": "nan",
            "645": "nan",
            "650": "nan",
            "655": "nan",
            "660": "nan",
            "665": "nan",
            "670": "nan",
            "675": "nan",
            "680": "nan",
            "685": "nan",
            "690": "nan",
            "695": "nan",
            "700": 0.92118,
            "705": "nan",
            "710": "nan",
            "715": "nan",
            "720": "nan",
            "725": "nan",
            "730": "nan",
            "735": "nan",
            "740": "nan",
            "745": "nan",
            "750": "nan",
            "755": "nan",
            "760": "nan",
            "765": "nan",
            "770": "nan",
            "775": "nan",
            "780": "nan",
            "785": "nan",
            "790": "nan",
            "795": "nan",
            "800": 0.91859,
            "805": "nan",
            "810": "nan",
            "815": "nan",
            "820": "nan",
            "825": "nan",
            "830": "nan",
            "835": "nan",
            "840": "nan",
            "845": "nan",
            "850": "nan",
            "855": "nan",
            "860": "nan",
            "865": "nan",
            "870": "nan",
            "875": "nan",
            "880": "nan",
            "885": "nan",
            "890": "nan",
            "895": "nan",
            "900": 0.91514,
            "905": "nan",
            "910": "nan",
            "915": "nan",
            "920": "nan",
            "925": "nan",
            "930": "nan",
            "935": "nan",
            "940": "nan",
            "945": "nan",
            "950": "nan",
            "955": "nan",
            "960": "nan",
            "965": "nan",
            "970": "nan",
            "975": "nan",
            "980": "nan",
            "985": "nan",
            "990": "nan",
            "995": "nan",
            "1000": 0.92291,
            "1005": "nan",
            "1010": "nan",
            "1015": "nan",
            "1020": "nan",
            "1025": "nan",
            "1030": "nan",
            "1035": "nan",
            "1040": "nan",
            "1045": "nan",
            "1050": "nan",
            "1055": "nan",
            "1060": "nan",
            "1065": "nan",
            "1070": "nan",
            "1075": "nan",
            "1080": "nan",
            "1085": "nan",
            "1090": "nan",
            "1095": "nan",
            "1100": 0.92819,
            "1105": "nan",
            "1110": "nan",
            "1115": "nan",
            "1120": "nan",
            "1125": "nan",
            "1130": "nan",
            "1135": "nan",
            "1140": "nan",
            "1145": "nan",
            "1150": "nan",
            "1155": "nan",
            "1160": "nan",
            "1165": "nan",
            "1170": "nan",
            "1175": "nan",
            "1180": "nan",
            "1185": "nan",
            "1190": "nan",
            "1195": "nan",
            "1200": 0.9181,
            "1205": "nan",
            "1210": "nan",
            "1215": "nan",
            "1220": "nan",
            "1225": "nan",
            "1230": "nan",
            "1235": "nan",
            "1240": "nan",
            "1245": "nan",
            "1250": "nan",
            "1255": "nan",
            "1260": "nan",
            "1265": "nan",
            "1270": "nan",
            "1275": "nan",
            "1280": "nan",
            "1285": "nan",
            "1290": "nan",
            "1295": "nan",
            "1300": 0.98022,
            "1305": "nan",
            "1310": "nan",
            "1315": "nan",
            "1320": "nan",
            "1325": "nan",
            "1330": "nan",
            "1335": "nan",
            "1340": "nan",
            "1345": "nan",
            "1350": "nan",
            "1355": "nan",
            "1360": "nan",
            "1365": "nan",
            "1370": "nan",
            "1375": "nan",
            "1380": "nan",
            "1385": "nan",
            "1390": "nan",
            "1395": "nan",
            "1400": 1.01105,
            "1405": "nan",
            "1410": "nan",
            "1415": "nan",
            "1420": "nan",
            "1425": "nan",
            "1430": "nan",
            "1435": "nan",
            "1440": "nan",
            "1445": "nan",
            "1450": "nan",
            "1455": "nan",
            "1460": "nan",
            "1465": "nan",
            "1470": "nan",
            "1475": "nan",
            "1480": "nan",
            "1485": "nan",
            "1490": "nan",
            "1495": "nan",
            "1500": 0.91346,
            "1505": "nan",
            "1510": "nan",
            "1515": "nan",
            "1520": "nan",
            "1525": "nan",
            "1530": "nan",
            "1535": "nan",
            "1540": "nan",
            "1545": "nan",
            "1550": "nan",
            "1555": "nan",
            "1560": "nan",
            "1565": "nan",
            "1570": "nan",
            "1575": "nan",
            "1580": "nan",
            "1585": "nan",
            "1590": "nan",
            "1595": "nan",
            "1600": 0.91787,
            "1605": "nan",
            "1610": "nan",
            "1615": "nan",
            "1620": "nan",
            "1625": "nan",
            "1630": "nan",
            "1635": "nan",
            "1640": "nan",
            "1645": "nan",
            "1650": "nan",
            "1655": "nan",
            "1660": "nan",
            "1665": "nan",
            "1670": "nan",
            "1675": "nan",
            "1680": "nan",
            "1685": "nan",
            "1690": "nan",
            "1695": "nan",
            "1700": 0.91742,
            "1705": "nan",
            "1710": "nan",
            "1715": "nan",
            "1720": "nan",
            "1725": "nan",
            "1730": "nan",
            "1735": "nan",
            "1740": "nan",
            "1745": "nan",
            "1750": "nan",
            "1755": "nan",
            "1760": "nan",
            "1765": "nan",
            "1770": "nan",
            "1775": "nan",
            "1780": "nan",
            "1785": "nan",
            "1790": "nan",
            "1795": "nan",
            "1800": 1.00231,
            "1805": "nan",
            "1810": "nan",
            "1815": "nan",
            "1820": "nan",
            "1825": "nan",
            "1830": "nan",
            "1835": "nan",
            "1840": "nan",
            "1845": "nan",
            "1850": "nan",
            "1855": "nan",
            "1860": "nan",
            "1865": "nan",
            "1870": "nan",
            "1875": "nan",
            "1880": "nan",
            "1885": "nan",
            "1890": "nan",
            "1895": "nan",
            "1900": 0.91484,
            "1905": "nan",
            "1910": "nan",
            "1915": "nan",
            "1920": "nan",
            "1925": "nan",
            "1930": "nan",
            "1935": "nan",
            "1940": "nan",
            "1945": "nan",
            "1950": "nan",
            "1955": "nan",
            "1960": "nan",
            "1965": "nan",
            "1970": "nan",
            "1975": "nan",
            "1980": "nan",
            "1985": "nan",
            "1990": "nan",
            "1995": "nan",
            "2000": 0.92229,
            "2005": "nan",
            "2010": "nan",
            "2015": "nan",
            "2020": "nan",
            "2025": "nan",
            "2030": "nan",
            "2035": "nan",
            "2040": "nan",
            "2045": "nan",
            "2050": "nan",
            "2055": "nan",
            "2060": "nan",
            "2065": "nan",
            "2070": "nan",
            "2075": "nan",
            "2080": "nan",
            "2085": "nan",
            "2090": "nan",
            "2095": "nan",
            "2100": 0.91845,
            "2105": "nan",
            "2110": "nan",
            "2115": "nan",
            "2120": "nan",
            "2125": "nan",
            "2130": "nan",
            "2135": "nan",
            "2140": "nan",
            "2145": "nan",
            "2150": "nan",
            "2155": "nan",
            "2160": "nan",
            "2165": "nan",
            "2170": "nan",
            "2175": "nan",
            "2180": "nan",
            "2185": "nan",
            "2190": "nan",
            "2195": "nan",
            "2200": 0.96226,
            "2205": "nan",
            "2210": "nan",
            "2215": "nan",
            "2220": "nan",
            "2225": "nan",
            "2230": "nan",
            "2235": "nan",
            "2240": "nan",
            "2245": "nan",
            "2250": "nan",
            "2255": "nan",
            "2260": "nan",
            "2265": "nan",
            "2270": "nan",
            "2275": "nan",
            "2280": "nan",
            "2285": "nan",
            "2290": "nan",
            "2295": "nan",
            "2300": 1.01963,
            "2305": "nan",
            "2310": "nan",
            "2315": "nan",
            "2320": "nan",
            "2325": "nan",
            "2330": "nan",
            "2335": "nan",
            "2340": "nan",
            "2345": "nan",
            "2350": "nan",
            "2355": "nan",
            "2360": "nan",
            "2365": "nan",
            "2370": "nan",
            "2375": "nan",
            "2380": "nan",
            "2385": "nan",
            "2390": "nan",
            "2395": "nan",
            "2400": 0.90953,
            "2405": "nan",
            "2410": "nan",
            "2415": "nan",
            "2420": "nan",
            "2425": "nan",
            "2430": "nan",
            "2435": "nan",
            "2440": "nan",
            "2445": "nan",
            "2450": "nan",
            "2455": "nan",
            "2460": "nan",
            "2465": "nan",
            "2470": "nan",
            "2475": "nan",
            "2480": "nan",
            "2485": "nan",
            "2490": "nan",
            "2495": "nan",
            "2500": 0.91481,
            "2505": "nan",
            "2510": "nan",
            "2515": "nan",
            "2520": "nan",
            "2525": "nan",
            "2530": "nan",
            "2535": "nan",
            "2540": "nan",
            "2545": "nan",
            "2550": "nan",
            "2555": "nan",
            "2560": "nan",
            "2565": "nan",
            "2570": "nan",
            "2575": "nan",
            "2580": "nan",
            "2585": "nan",
            "2590": "nan",
            "2595": "nan",
            "2600": 0.91625,
            "2605": "nan",
            "2610": "nan",
            "2615": "nan",
            "2620": "nan",
            "2625": "nan",
            "2630": "nan",
            "2635": "nan",
            "2640": "nan",
            "2645": "nan",
            "2650": "nan",
            "2655": "nan",
            "2660": "nan",
            "2665": "nan",
            "2670": "nan",
            "2675": "nan",
            "2680": "nan",
            "2685": "nan",
            "2690": "nan",
            "2695": "nan",
            "2700": 1.01096,
            "2705": "nan",
            "2710": "nan",
            "2715": "nan",
            "2720": "nan",
            "2725": "nan",
            "2730": "nan",
            "2735": "nan",
            "2740": "nan",
            "2745": "nan",
            "2750": "nan",
            "2755": "nan",
            "2760": "nan",
            "2765": "nan",
            "2770": "nan",
            "2775": "nan",
            "2780": "nan",
            "2785": "nan",
            "2790": "nan",
            "2795": "nan",
            "2800": 0.89726,
            "2805": "nan",
            "2810": "nan",
            "2815": "nan",
            "2820": "nan",
            "2825": "nan",
            "2830": "nan",
            "2835": "nan",
            "2840": "nan",
            "2845": "nan",
            "2850": "nan",
            "2855": "nan",
            "2860": "nan",
            "2865": "nan",
            "2870": "nan",
            "2875": "nan",
            "2880": "nan",
            "2885": "nan",
            "2890": "nan",
            "2895": "nan",
            "2900": 0.89875,
            "2905": "nan",
            "2910": "nan",
            "2915": "nan",
            "2920": "nan",
            "2925": "nan",
            "2930": "nan",
            "2935": "nan",
            "2940": "nan",
            "2945": "nan",
            "2950": "nan",
            "2955": "nan",
            "2960": "nan",
            "2965": "nan",
            "2970": "nan",
            "2975": "nan",
            "2980": "nan",
            "2985": "nan",
            "2990": "nan",
            "2995": "nan",
            "3000": 0.90963,
            "3005": "nan",
            "3010": "nan",
            "3015": "nan",
            "3020": "nan",
            "3025": "nan",
            "3030": "nan",
            "3035": "nan",
            "3040": "nan",
            "3045": "nan",
            "3050": "nan",
            "3055": "nan",
            "3060": "nan",
            "3065": "nan",
            "3070": "nan",
            "3075": "nan",
            "3080": "nan",
            "3085": "nan",
            "3090": "nan",
            "3095": "nan",
            "3100": 0.96015,
            "3105": "nan",
            "3110": "nan",
            "3115": "nan",
            "3120": "nan",
            "3125": "nan",
            "3130": "nan",
            "3135": "nan",
            "3140": "nan",
            "3145": "nan",
            "3150": "nan",
            "3155": "nan",
            "3160": "nan",
            "3165": "nan",
            "3170": "nan",
            "3175": "nan",
            "3180": "nan",
            "3185": "nan",
            "3190": "nan",
            "3195": "nan",
            "3200": 1.01137,
            "3205": "nan",
            "3210": "nan",
            "3215": "nan",
            "3220": "nan",
            "3225": "nan",
            "3230": "nan",
            "3235": "nan",
            "3240": "nan",
            "3245": "nan",
            "3250": "nan",
            "3255": "nan",
            "3260": "nan",
            "3265": "nan",
            "3270": "nan",
            "3275": "nan",
            "3280": "nan",
            "3285": "nan",
            "3290": "nan",
            "3295": "nan",
            "3300": 0.91337,
            "3305": "nan",
            "3310": "nan",
            "3315": "nan",
            "3320": "nan",
            "3325": "nan",
            "3330": "nan",
            "3335": "nan",
            "3340": "nan",
            "3345": "nan",
            "3350": "nan",
            "3355": "nan",
            "3360": "nan",
            "3365": "nan",
            "3370": "nan",
            "3375": "nan",
            "3380": "nan",
            "3385": "nan",
            "3390": "nan",
            "3395": "nan",
            "3400": 0.91177,
            "3405": "nan",
            "3410": "nan",
            "3415": "nan",
            "3420": "nan",
            "3425": "nan",
            "3430": "nan",
            "3435": "nan",
            "3440": "nan",
            "3445": "nan",
            "3450": "nan",
            "3455": "nan",
            "3460": "nan",
            "3465": "nan",
            "3470": "nan",
            "3475": "nan",
            "3480": "nan",
            "3485": "nan",
            "3490": "nan",
            "3495": "nan",
            "3500": 0.91124,
            "3505": "nan",
            "3510": "nan",
            "3515": "nan",
            "3520": "nan",
            "3525": "nan",
            "3530": "nan",
            "3535": "nan",
            "3540": "nan",
            "3545": "nan",
            "3550": "nan",
            "3555": "nan",
            "3560": "nan",
            "3565": "nan",
            "3570": "nan",
            "3575": "nan",
            "3580": "nan",
            "3585": "nan",
            "3590": "nan",
            "3595": "nan",
            "3600": 0.91795,
            "3605": "nan",
            "3610": "nan",
            "3615": "nan",
            "3620": "nan",
            "3625": "nan",
            "3630": "nan",
            "3635": "nan",
            "3640": "nan",
            "3645": "nan",
            "3650": "nan",
            "3655": "nan",
            "3660": "nan",
            "3665": "nan",
            "3670": "nan",
            "3675": "nan",
            "3680": "nan",
            "3685": "nan",
            "3690": "nan",
            "3695": "nan",
            "3700": 0.99608,
            "3705": "nan",
            "3710": "nan",
            "3715": "nan",
            "3720": "nan",
            "3725": "nan",
            "3730": "nan",
            "3735": "nan",
            "3740": "nan",
            "3745": "nan",
            "3750": "nan",
            "3755": "nan",
            "3760": "nan",
            "3765": "nan",
            "3770": "nan",
            "3775": "nan",
            "3780": "nan",
            "3785": "nan",
            "3790": "nan",
            "3795": "nan",
            "3800": 0.89009,
            "3805": "nan",
            "3810": "nan",
            "3815": "nan",
            "3820": "nan",
            "3825": "nan",
            "3830": "nan",
            "3835": "nan",
            "3840": "nan",
            "3845": "nan",
            "3850": "nan",
            "3855": "nan",
            "3860": "nan",
            "3865": "nan",
            "3870": "nan",
            "3875": "nan",
            "3880": "nan",
            "3885": "nan",
            "3890": "nan",
            "3895": "nan",
            "3900": 0.88671,
            "3905": "nan",
            "3910": "nan",
            "3915": "nan",
            "3920": "nan",
            "3925": "nan",
            "3930": "nan",
            "3935": "nan",
            "3940": "nan",
            "3945": "nan",
            "3950": "nan",
            "3955": "nan",
            "3960": "nan",
            "3965": "nan",
            "3970": "nan",
            "3975": "nan",
            "3980": "nan",
            "3985": "nan",
            "3990": "nan",
            "3995": "nan",
            "4000": 0.96428,
            "4005": "nan",
            "4010": "nan",
            "4015": "nan",
            "4020": "nan",
            "4025": "nan",
            "4030": "nan",
            "4035": "nan",
            "4040": "nan",
            "4045": "nan",
            "4050": "nan",
            "4055": "nan",
            "4060": "nan",
            "4065": "nan",
            "4070": "nan",
            "4075": "nan",
            "4080": "nan",
            "4085": "nan",
            "4090": "nan",
            "4095": "nan",
            "4100": 1.01022,
            "4105": "nan",
            "4110": "nan",
            "4115": "nan",
            "4120": "nan",
            "4125": "nan",
            "4130": "nan",
            "4135": "nan",
            "4140": "nan",
            "4145": "nan",
            "4150": "nan",
            "4155": "nan",
            "4160": "nan",
            "4165": "nan",
            "4170": "nan",
            "4175": "nan",
            "4180": "nan",
            "4185": "nan",
            "4190": "nan",
            "4195": "nan",
            "4200": 0.91862,
            "4205": "nan",
            "4210": "nan",
            "4215": "nan",
            "4220": "nan",
            "4225": "nan",
            "4230": "nan",
            "4235": "nan",
            "4240": "nan",
            "4245": "nan",
            "4250": "nan",
            "4255": "nan",
            "4260": "nan",
            "4265": "nan",
            "4270": "nan",
            "4275": "nan",
            "4280": "nan",
            "4285": "nan",
            "4290": "nan",
            "4295": "nan",
            "4300": 0.91817,
            "4305": "nan",
            "4310": "nan",
            "4315": "nan",
            "4320": "nan",
            "4325": "nan",
            "4330": "nan",
            "4335": "nan",
            "4340": "nan",
            "4345": "nan",
            "4350": "nan",
            "4355": "nan",
            "4360": "nan",
            "4365": "nan",
            "4370": "nan",
            "4375": "nan",
            "4380": "nan",
            "4385": "nan",
            "4390": "nan",
            "4395": "nan",
            "4400": 0.91961,
            "4405": "nan",
            "4410": "nan",
            "4415": "nan",
            "4420": "nan",
            "4425": "nan",
            "4430": "nan",
            "4435": "nan",
            "4440": "nan",
            "4445": "nan",
            "4450": "nan",
            "4455": "nan",
            "4460": "nan",
            "4465": "nan",
            "4470": "nan",
            "4475": "nan",
            "4480": "nan",
            "4485": "nan",
            "4490": "nan",
            "4495": "nan",
            "4500": 0.91494,
            "4505": "nan",
            "4510": "nan",
            "4515": "nan",
            "4520": "nan",
            "4525": "nan",
            "4530": "nan",
            "4535": "nan",
            "4540": "nan",
            "4545": "nan",
            "4550": "nan",
            "4555": "nan",
            "4560": "nan",
            "4565": "nan",
            "4570": "nan",
            "4575": "nan",
            "4580": "nan",
            "4585": "nan",
            "4590": "nan",
            "4595": "nan",
            "4600": 0.97166,
            "4605": "nan",
            "4610": "nan",
            "4615": "nan",
            "4620": "nan",
            "4625": "nan",
            "4630": "nan",
            "4635": "nan",
            "4640": "nan",
            "4645": "nan",
            "4650": "nan",
            "4655": "nan",
            "4660": "nan",
            "4665": "nan",
            "4670": "nan",
            "4675": "nan",
            "4680": "nan",
            "4685": "nan",
            "4690": "nan",
            "4695": "nan",
            "4700": 0.89156,
            "4705": "nan",
            "4710": "nan",
            "4715": "nan",
            "4720": "nan",
            "4725": "nan",
            "4730": "nan",
            "4735": "nan",
            "4740": "nan",
            "4745": "nan",
            "4750": "nan",
            "4755": "nan",
            "4760": "nan",
            "4765": "nan",
            "4770": "nan",
            "4775": "nan",
            "4780": "nan",
            "4785": "nan",
            "4790": "nan",
            "4795": "nan",
            "4800": 0.91581,
            "4805": "nan",
            "4810": "nan",
            "4815": "nan",
            "4820": "nan",
            "4825": "nan",
            "4830": "nan",
            "4835": "nan",
            "4840": "nan",
            "4845": "nan",
            "4850": "nan",
            "4855": "nan",
            "4860": "nan",
            "4865": "nan",
            "4870": "nan",
            "4875": "nan",
            "4880": "nan",
            "4885": "nan",
            "4890": "nan",
            "4895": "nan",
            "4900": 0.98462,
            "4905": "nan",
            "4910": "nan",
            "4915": "nan",
            "4920": "nan",
            "4925": "nan",
            "4930": "nan",
            "4935": "nan",
            "4940": "nan",
            "4945": "nan",
            "4950": "nan",
            "4955": "nan",
            "4960": "nan",
            "4965": "nan",
            "4970": "nan",
            "4975": "nan",
            "4980": "nan",
            "4985": "nan",
            "4990": "nan",
            "4995": "nan",
            "5000": 1.0238,
            "5005": "nan",
            "5010": "nan",
            "5015": "nan",
            "5020": "nan",
            "5025": "nan",
            "5030": "nan",
            "5035": "nan",
            "5040": "nan",
            "5045": "nan",
            "5050": "nan",
            "5055": "nan",
            "5060": "nan",
            "5065": "nan",
            "5070": "nan",
            "5075": "nan",
            "5080": "nan",
            "5085": "nan",
            "5090": "nan",
            "5095": "nan",
            "5100": 0.91893,
            "5105": "nan",
            "5110": "nan",
            "5115": "nan",
            "5120": "nan",
            "5125": "nan",
            "5130": "nan",
            "5135": "nan",
            "5140": "nan",
            "5145": "nan",
            "5150": "nan",
            "5155": "nan",
            "5160": "nan",
            "5165": "nan",
            "5170": "nan",
            "5175": "nan",
            "5180": "nan",
            "5185": "nan",
            "5190": "nan",
            "5195": "nan",
            "5200": 0.9139,
            "5205": "nan",
            "5210": "nan",
            "5215": "nan",
            "5220": "nan",
            "5225": "nan",
            "5230": "nan",
            "5235": "nan",
            "5240": "nan",
            "5245": "nan",
            "5250": "nan",
            "5255": "nan",
            "5260": "nan",
            "5265": "nan",
            "5270": "nan",
            "5275": "nan",
            "5280": "nan",
            "5285": "nan",
            "5290": "nan",
            "5295": "nan",
            "5300": 0.9158,
            "5305": "nan",
            "5310": "nan",
            "5315": "nan",
            "5320": "nan",
            "5325": "nan",
            "5330": "nan",
            "5335": "nan",
            "5340": "nan",
            "5345": "nan",
            "5350": "nan",
            "5355": "nan",
            "5360": "nan",
            "5365": "nan",
            "5370": "nan",
            "5375": "nan",
            "5380": "nan",
            "5385": "nan",
            "5390": "nan",
            "5395": "nan",
            "5400": 0.91724,
            "5405": "nan",
            "5410": "nan",
            "5415": "nan",
            "5420": "nan",
            "5425": "nan",
            "5430": "nan",
            "5435": "nan",
            "5440": "nan",
            "5445": "nan",
            "5450": "nan",
            "5455": "nan",
            "5460": "nan",
            "5465": "nan",
            "5470": "nan",
            "5475": "nan",
            "5480": "nan",
            "5485": "nan",
            "5490": "nan",
            "5495": "nan",
            "5500": 1.00127,
            "5505": "nan",
            "5510": "nan",
            "5515": "nan",
            "5520": "nan",
            "5525": "nan",
            "5530": "nan",
            "5535": "nan",
            "5540": "nan",
            "5545": "nan",
            "5550": "nan",
            "5555": "nan",
            "5560": "nan",
            "5565": "nan",
            "5570": "nan",
            "5575": "nan",
            "5580": "nan",
            "5585": "nan",
            "5590": "nan",
            "5595": "nan",
            "5600": 0.91763,
            "5605": "nan",
            "5610": "nan",
            "5615": "nan",
            "5620": "nan",
            "5625": "nan",
            "5630": "nan",
            "5635": "nan",
            "5640": "nan",
            "5645": "nan",
            "5650": "nan",
            "5655": "nan",
            "5660": "nan",
            "5665": "nan",
            "5670": "nan",
            "5675": "nan",
            "5680": "nan",
            "5685": "nan",
            "5690": "nan",
            "5695": "nan",
            "5700": 0.91302,
            "5705": "nan",
            "5710": "nan",
            "5715": "nan",
            "5720": "nan",
            "5725": "nan",
            "5730": "nan",
            "5735": "nan",
            "5740": "nan",
            "5745": "nan",
            "5750": "nan",
            "5755": "nan",
            "5760": "nan",
            "5765": "nan",
            "5770": "nan",
            "5775": "nan",
            "5780": "nan",
            "5785": "nan",
            "5790": "nan",
            "5795": "nan",
            "5800": 0.9736,
            "5805": "nan",
            "5810": "nan",
            "5815": "nan",
            "5820": "nan",
            "5825": "nan",
            "5830": "nan",
            "5835": "nan",
            "5840": "nan",
            "5845": "nan",
            "5850": "nan",
            "5855": "nan",
            "5860": "nan",
            "5865": "nan",
            "5870": "nan",
            "5875": "nan",
            "5880": "nan",
            "5885": "nan",
            "5890": "nan",
            "5895": "nan",
            "5900": 1.0057,
            "5905": "nan",
            "5910": "nan",
            "5915": "nan",
            "5920": "nan",
            "5925": "nan",
            "5930": "nan",
            "5935": "nan",
            "5940": "nan",
            "5945": "nan",
            "5950": "nan",
            "5955": "nan",
            "5960": "nan",
            "5965": "nan",
            "5970": "nan",
            "5975": "nan",
            "5980": "nan",
            "5985": "nan",
            "5990": "nan",
            "5995": "nan",
            "6000": 0.9012,
            "6005": "nan",
            "6010": "nan",
            "6015": "nan",
            "6020": "nan",
            "6025": "nan",
            "6030": "nan",
            "6035": "nan",
            "6040": "nan",
            "6045": "nan",
            "6050": "nan",
            "6055": "nan",
            "6060": "nan",
            "6065": "nan",
            "6070": "nan",
            "6075": "nan",
            "6080": "nan",
            "6085": "nan",
            "6090": "nan",
            "6095": "nan",
            "6100": 0.89269,
            "6105": "nan",
            "6110": "nan",
            "6115": "nan",
            "6120": "nan",
            "6125": "nan",
            "6130": "nan",
            "6135": "nan",
            "6140": "nan",
            "6145": "nan",
            "6150": "nan",
            "6155": "nan",
            "6160": "nan",
            "6165": "nan",
            "6170": "nan",
            "6175": "nan",
            "6180": "nan",
            "6185": "nan",
            "6190": "nan",
            "6195": "nan",
            "6200": 0.89637,
            "6205": "nan",
            "6210": "nan",
            "6215": "nan",
            "6220": "nan",
            "6225": "nan",
            "6230": "nan",
            "6235": "nan",
            "6240": "nan",
            "6245": "nan",
            "6250": "nan",
            "6255": "nan",
            "6260": "nan",
            "6265": "nan",
            "6270": "nan",
            "6275": "nan",
            "6280": "nan",
            "6285": "nan",
            "6290": "nan",
            "6295": "nan",
            "6300": 1.01218,
            "6305": "nan",
            "6310": "nan",
            "6315": "nan",
            "6320": "nan",
            "6325": "nan",
            "6330": "nan",
            "6335": "nan",
            "6340": "nan",
            "6345": "nan",
            "6350": "nan",
            "6355": "nan",
            "6360": "nan",
            "6365": "nan",
            "6370": "nan",
            "6375": "nan",
            "6380": "nan",
            "6385": "nan",
            "6390": "nan",
            "6395": "nan",
            "6400": 0.92171,
            "6405": "nan",
            "6410": "nan",
            "6415": "nan",
            "6420": "nan",
            "6425": "nan",
            "6430": "nan",
            "6435": "nan",
            "6440": "nan",
            "6445": "nan",
            "6450": "nan",
            "6455": "nan",
            "6460": "nan",
            "6465": "nan",
            "6470": "nan",
            "6475": "nan",
            "6480": "nan",
            "6485": "nan",
            "6490": "nan",
            "6495": "nan",
            "6500": 0.9199,
            "6505": "nan",
            "6510": "nan",
            "6515": "nan",
            "6520": "nan",
            "6525": "nan",
            "6530": "nan",
            "6535": "nan",
            "6540": "nan",
            "6545": "nan",
            "6550": "nan",
            "6555": "nan",
            "6560": "nan",
            "6565": "nan",
            "6570": "nan",
            "6575": "nan",
            "6580": "nan",
            "6585": "nan",
            "6590": "nan",
            "6595": "nan",
            "6600": 0.88888,
            "6605": "nan",
            "6610": "nan",
            "6615": "nan",
            "6620": "nan",
            "6625": "nan",
            "6630": "nan",
            "6635": "nan",
            "6640": "nan",
            "6645": "nan",
            "6650": "nan",
            "6655": "nan",
            "6660": "nan",
            "6665": "nan",
            "6670": "nan",
            "6675": "nan",
            "6680": "nan",
            "6685": "nan",
            "6690": "nan",
            "6695": "nan",
            "6700": 0.95756,
            "6705": "nan",
            "6710": "nan",
            "6715": "nan",
            "6720": "nan",
            "6725": "nan",
            "6730": "nan",
            "6735": "nan",
            "6740": "nan",
            "6745": "nan",
            "6750": "nan",
            "6755": "nan",
            "6760": "nan",
            "6765": "nan",
            "6770": "nan",
            "6775": "nan",
            "6780": "nan",
            "6785": "nan",
            "6790": "nan",
            "6795": "nan",
            "6800": 0.98858,
            "6805": "nan",
            "6810": "nan",
            "6815": "nan",
            "6820": "nan",
            "6825": "nan",
            "6830": "nan",
            "6835": "nan",
            "6840": "nan",
            "6845": "nan",
            "6850": "nan",
            "6855": "nan",
            "6860": "nan",
            "6865": "nan",
            "6870": "nan",
            "6875": "nan",
            "6880": "nan",
            "6885": "nan",
            "6890": "nan",
            "6895": "nan",
            "6900": 0.92019,
            "6905": "nan",
            "6910": "nan",
            "6915": "nan",
            "6920": "nan",
            "6925": "nan",
            "6930": "nan",
            "6935": "nan",
            "6940": "nan",
            "6945": "nan",
            "6950": "nan",
            "6955": "nan",
            "6960": "nan",
            "6965": "nan",
            "6970": "nan",
            "6975": "nan",
            "6980": "nan",
            "6985": "nan",
            "6990": "nan",
            "6995": "nan",
            "7000": 0.92142,
            "7005": "nan",
            "7010": "nan",
            "7015": "nan",
            "7020": "nan",
            "7025": "nan",
            "7030": "nan",
            "7035": "nan",
            "7040": "nan",
            "7045": "nan",
            "7050": "nan",
            "7055": "nan",
            "7060": "nan",
            "7065": "nan",
            "7070": "nan",
            "7075": "nan",
            "7080": "nan",
            "7085": "nan",
            "7090": "nan",
            "7095": "nan",
            "7100": 0.91628,
            "7105": "nan",
            "7110": "nan",
            "7115": "nan",
            "7120": "nan",
            "7125": "nan",
            "7130": "nan",
            "7135": "nan",
            "7140": "nan",
            "7145": "nan",
            "7150": "nan",
            "7155": "nan",
            "7160": "nan",
            "7165": "nan",
            "7170": "nan",
            "7175": "nan",
            "7180": "nan",
            "7185": "nan",
            "7190": "nan",
            "7195": "nan",
            "7200": 1.02044,
            "7205": "nan",
            "7210": "nan",
            "7215": "nan",
            "7220": "nan",
            "7225": "nan",
            "7230": "nan",
            "7235": "nan",
            "7240": "nan",
            "7245": "nan",
            "7250": "nan",
            "7255": "nan",
            "7260": "nan",
            "7265": "nan",
            "7270": "nan",
            "7275": "nan",
            "7280": "nan",
            "7285": "nan",
            "7290": "nan",
            "7295": "nan",
            "7300": 0.92189,
            "7305": "nan",
            "7310": "nan",
            "7315": "nan",
            "7320": "nan",
            "7325": "nan",
            "7330": "nan",
            "7335": "nan",
            "7340": "nan",
            "7345": "nan",
            "7350": "nan",
            "7355": "nan",
            "7360": "nan",
            "7365": "nan",
            "7370": "nan",
            "7375": "nan",
            "7380": "nan",
            "7385": "nan",
            "7390": "nan",
            "7395": "nan",
            "7400": 0.92339,
            "7405": "nan",
            "7410": "nan",
            "7415": "nan",
            "7420": "nan",
            "7425": "nan",
            "7430": "nan",
            "7435": "nan",
            "7440": "nan",
            "7445": "nan",
            "7450": "nan",
            "7455": "nan",
            "7460": "nan",
            "7465": "nan",
            "7470": "nan",
            "7475": "nan",
            "7480": "nan",
            "7485": "nan",
            "7490": "nan",
            "7495": "nan",
            "7500": 0.92441,
            "7505": "nan",
            "7510": "nan",
            "7515": "nan",
            "7520": "nan",
            "7525": "nan",
            "7530": "nan",
            "7535": "nan",
            "7540": "nan",
            "7545": "nan",
            "7550": "nan",
            "7555": "nan",
            "7560": "nan",
            "7565": "nan",
            "7570": "nan",
            "7575": "nan",
            "7580": "nan",
            "7585": "nan",
            "7590": "nan",
            "7595": "nan",
            "7600": 0.98985,
            "7605": "nan",
            "7610": "nan",
            "7615": "nan",
            "7620": "nan",
            "7625": "nan",
            "7630": "nan",
            "7635": "nan",
            "7640": "nan",
            "7645": "nan",
            "7650": "nan",
            "7655": "nan",
            "7660": "nan",
            "7665": "nan",
            "7670": "nan",
            "7675": "nan",
            "7680": "nan",
            "7685": "nan",
            "7690": "nan",
            "7695": "nan",
            "7700": 0.9984,
            "7705": "nan",
            "7710": "nan",
            "7715": "nan",
            "7720": "nan",
            "7725": "nan",
            "7730": "nan",
            "7735": "nan",
            "7740": "nan",
            "7745": "nan",
            "7750": "nan",
            "7755": "nan",
            "7760": "nan",
            "7765": "nan",
            "7770": "nan",
            "7775": "nan",
            "7780": "nan",
            "7785": "nan",
            "7790": "nan",
            "7795": "nan",
            "7800": 0.92652,
            "7805": "nan",
            "7810": "nan",
            "7815": "nan",
            "7820": "nan",
            "7825": "nan",
            "7830": "nan",
            "7835": "nan",
            "7840": "nan",
            "7845": "nan",
            "7850": "nan",
            "7855": "nan",
            "7860": "nan",
            "7865": "nan",
            "7870": "nan",
            "7875": "nan",
            "7880": "nan",
            "7885": "nan",
            "7890": "nan",
            "7895": "nan",
            "7900": 0.92853,
            "7905": "nan",
            "7910": "nan",
            "7915": "nan",
            "7920": "nan",
            "7925": "nan",
            "7930": "nan",
            "7935": "nan",
            "7940": "nan",
            "7945": "nan",
            "7950": "nan",
            "7955": "nan",
            "7960": "nan",
            "7965": "nan",
            "7970": "nan",
            "7975": "nan",
            "7980": "nan",
            "7985": "nan",
            "7990": "nan",
            "7995": "nan",
            "8000": 0.92936,
            "8005": "nan",
            "8010": "nan",
            "8015": "nan",
            "8020": "nan",
            "8025": "nan",
            "8030": "nan",
            "8035": "nan",
            "8040": "nan",
            "8045": "nan",
            "8050": "nan",
            "8055": "nan",
            "8060": "nan",
            "8065": "nan",
            "8070": "nan",
            "8075": "nan",
            "8080": "nan",
            "8085": "nan",
            "8090": "nan",
            "8095": "nan",
            "8100": 1.02351,
            "8105": "nan",
            "8110": "nan",
            "8115": "nan",
            "8120": "nan",
            "8125": "nan",
            "8130": "nan",
            "8135": "nan",
            "8140": "nan",
            "8145": "nan",
            "8150": "nan",
            "8155": "nan",
            "8160": "nan",
            "8165": "nan",
            "8170": "nan",
            "8175": "nan",
            "8180": "nan",
            "8185": "nan",
            "8190": "nan",
            "8195": "nan",
            "8200": 0.92463,
            "8205": "nan",
            "8210": "nan",
            "8215": "nan",
            "8220": "nan",
            "8225": "nan",
            "8230": "nan",
            "8235": "nan",
            "8240": "nan",
            "8245": "nan",
            "8250": "nan",
            "8255": "nan",
            "8260": "nan",
            "8265": "nan",
            "8270": "nan",
            "8275": "nan",
            "8280": "nan",
            "8285": "nan",
            "8290": "nan",
            "8295": "nan",
            "8300": 0.92647,
            "8305": "nan",
            "8310": "nan",
            "8315": "nan",
            "8320": "nan",
            "8325": "nan",
            "8330": "nan",
            "8335": "nan",
            "8340": "nan",
            "8345": "nan",
            "8350": "nan",
            "8355": "nan",
            "8360": "nan",
            "8365": "nan",
            "8370": "nan",
            "8375": "nan",
            "8380": "nan",
            "8385": "nan",
            "8390": "nan",
            "8395": "nan",
            "8400": 0.96739,
            "8405": "nan",
            "8410": "nan",
            "8415": "nan",
            "8420": "nan",
            "8425": "nan",
            "8430": "nan",
            "8435": "nan",
            "8440": "nan",
            "8445": "nan",
            "8450": "nan",
            "8455": "nan",
            "8460": "nan",
            "8465": "nan",
            "8470": "nan",
            "8475": "nan",
            "8480": "nan",
            "8485": "nan",
            "8490": "nan",
            "8495": "nan",
            "8500": 0.99499,
            "8505": "nan",
            "8510": "nan",
            "8515": "nan",
            "8520": "nan",
            "8525": "nan",
            "8530": "nan",
            "8535": "nan",
            "8540": "nan",
            "8545": "nan",
            "8550": "nan",
            "8555": "nan",
            "8560": "nan",
            "8565": "nan",
            "8570": "nan",
            "8575": "nan",
            "8580": "nan",
            "8585": "nan",
            "8590": "nan",
            "8595": "nan",
            "8600": 0.90151,
            "8605": "nan",
            "8610": "nan",
            "8615": "nan",
            "8620": "nan",
            "8625": "nan",
            "8630": "nan",
            "8635": "nan",
            "8640": "nan",
            "8645": "nan",
            "8650": "nan",
            "8655": "nan",
            "8660": "nan",
            "8665": "nan",
            "8670": "nan",
            "8675": "nan",
            "8680": "nan",
            "8685": "nan",
            "8690": "nan",
            "8695": "nan",
            "8700": 0.91227,
            "8705": "nan",
            "8710": "nan",
            "8715": "nan",
            "8720": "nan",
            "8725": "nan",
            "8730": "nan",
            "8735": "nan",
            "8740": "nan",
            "8745": "nan",
            "8750": "nan",
            "8755": "nan",
            "8760": "nan",
            "8765": "nan",
            "8770": "nan",
            "8775": "nan",
            "8780": "nan",
            "8785": "nan",
            "8790": "nan",
            "8795": "nan",
            "8800": 0.9268,
            "8805": "nan",
            "8810": "nan",
            "8815": "nan",
            "8820": "nan",
            "8825": "nan",
            "8830": "nan",
            "8835": "nan",
            "8840": "nan",
            "8845": "nan",
            "8850": "nan",
            "8855": "nan",
            "8860": "nan",
            "8865": "nan",
            "8870": "nan",
            "8875": "nan",
            "8880": "nan",
            "8885": "nan",
            "8890": "nan",
            "8895": "nan",
            "8900": 0.92834,
            "8905": "nan",
            "8910": "nan",
            "8915": "nan",
            "8920": "nan",
            "8925": "nan",
            "8930": "nan",
            "8935": "nan",
            "8940": "nan",
            "8945": "nan",
            "8950": "nan",
            "8955": "nan",
            "8960": "nan",
            "8965": "nan",
            "8970": "nan",
            "8975": "nan",
            "8980": "nan",
            "8985": "nan",
            "8990": "nan",
            "8995": "nan",
            "9000": 1.00636,
            "9005": "nan",
            "9010": "nan",
            "9015": "nan",
            "9020": "nan",
            "9025": "nan",
            "9030": "nan",
            "9035": "nan",
            "9040": "nan",
            "9045": "nan",
            "9050": "nan",
            "9055": "nan",
            "9060": "nan",
            "9065": "nan",
            "9070": "nan",
            "9075": "nan",
            "9080": "nan",
            "9085": "nan",
            "9090": "nan",
            "9095": "nan",
            "9100": 0.92408,
            "9105": "nan",
            "9110": "nan",
            "9115": "nan",
            "9120": "nan",
            "9125": "nan",
            "9130": "nan",
            "9135": "nan",
            "9140": "nan",
            "9145": "nan",
            "9150": "nan",
            "9155": "nan",
            "9160": "nan",
            "9165": "nan",
            "9170": "nan",
            "9175": "nan",
            "9180": "nan",
            "9185": "nan",
            "9190": "nan",
            "9195": "nan",
            "9200": 0.8975,
            "9205": "nan",
            "9210": "nan",
            "9215": "nan",
            "9220": "nan",
            "9225": "nan",
            "9230": "nan",
            "9235": "nan",
            "9240": "nan",
            "9245": "nan",
            "9250": "nan",
            "9255": "nan",
            "9260": "nan",
            "9265": "nan",
            "9270": "nan",
            "9275": "nan",
            "9280": "nan",
            "9285": "nan",
            "9290": "nan",
            "9295": "nan",
            "9300": 0.96478,
            "9305": "nan",
            "9310": "nan",
            "9315": "nan",
            "9320": "nan",
            "9325": "nan",
            "9330": "nan",
            "9335": "nan",
            "9340": "nan",
            "9345": "nan",
            "9350": "nan",
            "9355": "nan",
            "9360": "nan",
            "9365": "nan",
            "9370": "nan",
            "9375": "nan",
            "9380": "nan",
            "9385": "nan",
            "9390": "nan",
            "9395": "nan",
            "9400": 0.98208,
            "9405": "nan",
            "9410": "nan",
            "9415": "nan",
            "9420": "nan",
            "9425": "nan",
            "9430": "nan",
            "9435": "nan",
            "9440": "nan",
            "9445": "nan",
            "9450": "nan",
            "9455": "nan",
            "9460": "nan",
            "9465": "nan",
            "9470": "nan",
            "9475": "nan",
            "9480": "nan",
            "9485": "nan",
            "9490": "nan",
            "9495": "nan",
            "9500": 0.92094,
            "9505": "nan",
            "9510": "nan",
            "9515": "nan",
            "9520": "nan",
            "9525": "nan",
            "9530": "nan",
            "9535": "nan",
            "9540": "nan",
            "9545": "nan",
            "9550": "nan",
            "9555": "nan",
            "9560": "nan",
            "9565": "nan",
            "9570": "nan",
            "9575": "nan",
            "9580": "nan",
            "9585": "nan",
            "9590": "nan",
            "9595": "nan",
            "9600": 0.91916,
            "9605": "nan",
            "9610": "nan",
            "9615": "nan",
            "9620": "nan",
            "9625": "nan",
            "9630": "nan",
            "9635": "nan",
            "9640": "nan",
            "9645": "nan",
            "9650": "nan",
            "9655": "nan",
            "9660": "nan",
            "9665": "nan",
            "9670": "nan",
            "9675": "nan",
            "9680": "nan",
            "9685": "nan",
            "9690": "nan",
            "9695": "nan",
            "9700": 0.90531,
            "9705": "nan",
            "9710": "nan",
            "9715": "nan",
            "9720": "nan",
            "9725": "nan",
            "9730": "nan",
            "9735": "nan",
            "9740": "nan",
            "9745": "nan",
            "9750": "nan",
            "9755": "nan",
            "9760": "nan",
            "9765": "nan",
            "9770": "nan",
            "9775": "nan",
            "9780": "nan",
            "9785": "nan",
            "9790": "nan",
            "9795": "nan",
            "9800": 0.99629,
            "9805": "nan",
            "9810": "nan",
            "9815": "nan",
            "9820": "nan",
            "9825": "nan",
            "9830": "nan",
            "9835": "nan",
            "9840": "nan",
            "9845": "nan",
            "9850": "nan",
            "9855": "nan",
            "9860": "nan",
            "9865": "nan",
            "9870": "nan",
            "9875": "nan",
            "9880": "nan",
            "9885": "nan",
            "9890": "nan",
            "9895": "nan",
            "9900": 0.91989,
            "9905": "nan",
            "9910": "nan",
            "9915": "nan",
            "9920": "nan",
            "9925": "nan",
            "9930": "nan",
            "9935": "nan",
            "9940": "nan",
            "9945": "nan",
            "9950": "nan",
            "9955": "nan",
            "9960": "nan",
            "9965": "nan",
            "9970": "nan",
            "9975": "nan",
            "9980": "nan",
            "9985": "nan",
            "9990": "nan",
            "9995": "nan",
            "10000": 0.90473,
            "10005": "nan",
            "10010": "nan",
            "10015": "nan",
            "10020": "nan",
            "10025": "nan",
            "10030": "nan",
            "10035": "nan",
            "10040": "nan",
            "10045": "nan",
            "10050": "nan",
            "10055": "nan",
            "10060": "nan",
            "10065": "nan",
            "10070": "nan",
            "10075": "nan",
            "10080": "nan",
            "10085": "nan",
            "10090": "nan",
            "10095": "nan",
            "10100": 0.92605,
            "10105": "nan",
            "10110": "nan",
            "10115": "nan",
            "10120": "nan",
            "10125": "nan",
            "10130": "nan",
            "10135": "nan",
            "10140": "nan",
            "10145": "nan",
            "10150": "nan",
            "10155": "nan",
            "10160": "nan",
            "10165": "nan",
            "10170": "nan",
            "10175": "nan",
            "10180": "nan",
            "10185": "nan",
            "10190": "nan",
            "10195": "nan",
            "10200": 0.98226,
            "10205": "nan",
            "10210": "nan",
            "10215": "nan",
            "10220": "nan",
            "10225": "nan",
            "10230": "nan",
            "10235": "nan",
            "10240": "nan",
            "10245": "nan",
            "10250": "nan",
            "10255": "nan",
            "10260": "nan",
            "10265": "nan",
            "10270": "nan",
            "10275": "nan",
            "10280": "nan",
            "10285": "nan",
            "10290": "nan",
            "10295": "nan",
            "10300": 1.00905,
            "10305": "nan",
            "10310": "nan",
            "10315": "nan",
            "10320": "nan",
            "10325": "nan",
            "10330": "nan",
            "10335": "nan",
            "10340": "nan",
            "10345": "nan",
            "10350": "nan",
            "10355": "nan",
            "10360": "nan",
            "10365": "nan",
            "10370": "nan",
            "10375": "nan",
            "10380": "nan",
            "10385": "nan",
            "10390": "nan",
            "10395": "nan",
            "10400": 0.92156,
            "10405": "nan",
            "10410": "nan",
            "10415": "nan",
            "10420": "nan",
            "10425": "nan",
            "10430": "nan",
            "10435": "nan",
            "10440": "nan",
            "10445": "nan",
            "10450": "nan",
            "10455": "nan",
            "10460": "nan",
            "10465": "nan",
            "10470": "nan",
            "10475": "nan",
            "10480": "nan",
            "10485": "nan",
            "10490": "nan",
            "10495": "nan",
            "10500": 0.92591,
            "10505": "nan",
            "10510": "nan",
            "10515": "nan",
            "10520": "nan",
            "10525": "nan",
            "10530": "nan",
            "10535": "nan",
            "10540": "nan",
            "10545": "nan",
            "10550": "nan",
            "10555": "nan",
            "10560": "nan",
            "10565": "nan",
            "10570": "nan",
            "10575": "nan",
            "10580": "nan",
            "10585": "nan",
            "10590": "nan",
            "10595": "nan",
            "10600": 0.92578,
            "10605": "nan",
            "10610": "nan",
            "10615": "nan",
            "10620": "nan",
            "10625": "nan",
            "10630": "nan",
            "10635": "nan",
            "10640": "nan",
            "10645": "nan",
            "10650": "nan",
            "10655": "nan",
            "10660": "nan",
            "10665": "nan",
            "10670": "nan",
            "10675": "nan",
            "10680": "nan",
            "10685": "nan",
            "10690": "nan",
            "10695": "nan",
            "10700": 1.00913,
            "10705": "nan",
            "10710": "nan",
            "10715": "nan",
            "10720": "nan",
            "10725": "nan",
            "10730": "nan",
            "10735": "nan",
            "10740": "nan",
            "10745": "nan",
            "10750": "nan",
            "10755": "nan",
            "10760": "nan",
            "10765": "nan",
            "10770": "nan",
            "10775": "nan",
            "10780": "nan",
            "10785": "nan",
            "10790": "nan",
            "10795": "nan",
            "10800": 0.92582,
            "10805": "nan",
            "10810": "nan",
            "10815": "nan",
            "10820": "nan",
            "10825": "nan",
            "10830": "nan",
            "10835": "nan",
            "10840": "nan",
            "10845": "nan",
            "10850": "nan",
            "10855": "nan",
            "10860": "nan",
            "10865": "nan",
            "10870": "nan",
            "10875": "nan",
            "10880": "nan",
            "10885": "nan",
            "10890": "nan",
            "10895": "nan",
            "10900": 0.92772,
            "10905": "nan",
            "10910": "nan",
            "10915": "nan",
            "10920": "nan",
            "10925": "nan",
            "10930": "nan",
            "10935": "nan",
            "10940": "nan",
            "10945": "nan",
            "10950": "nan",
            "10955": "nan",
            "10960": "nan",
            "10965": "nan",
            "10970": "nan",
            "10975": "nan",
            "10980": "nan",
            "10985": "nan",
            "10990": "nan",
            "10995": "nan",
            "11000": 0.91979,
            "11005": "nan",
            "11010": "nan",
            "11015": "nan",
            "11020": "nan",
            "11025": "nan",
            "11030": "nan",
            "11035": "nan",
            "11040": "nan",
            "11045": "nan",
            "11050": "nan",
            "11055": "nan",
            "11060": "nan",
            "11065": "nan",
            "11070": "nan",
            "11075": "nan",
            "11080": "nan",
            "11085": "nan",
            "11090": "nan",
            "11095": "nan",
            "11100": 1.03705,
            "11105": "nan",
            "11110": "nan",
            "11115": "nan",
            "11120": "nan",
            "11125": "nan",
            "11130": "nan",
            "11135": "nan",
            "11140": "nan",
            "11145": "nan",
            "11150": "nan",
            "11155": "nan",
            "11160": "nan",
            "11165": "nan",
            "11170": "nan",
            "11175": "nan",
            "11180": "nan",
            "11185": "nan",
            "11190": "nan",
            "11195": "nan",
            "11200": 0.90885,
            "11205": "nan",
            "11210": "nan",
            "11215": "nan",
            "11220": "nan",
            "11225": "nan",
            "11230": "nan",
            "11235": "nan",
            "11240": "nan",
            "11245": "nan",
            "11250": "nan",
            "11255": "nan",
            "11260": "nan",
            "11265": "nan",
            "11270": "nan",
            "11275": "nan",
            "11280": "nan",
            "11285": "nan",
            "11290": "nan",
            "11295": "nan",
            "11300": 0.91622,
            "11305": "nan",
            "11310": "nan",
            "11315": "nan",
            "11320": "nan",
            "11325": "nan",
            "11330": "nan",
            "11335": "nan",
            "11340": "nan",
            "11345": "nan",
            "11350": "nan",
            "11355": "nan",
            "11360": "nan",
            "11365": "nan",
            "11370": "nan",
            "11375": "nan",
            "11380": "nan",
            "11385": "nan",
            "11390": "nan",
            "11395": "nan",
            "11400": 0.91818,
            "11405": "nan",
            "11410": "nan",
            "11415": "nan",
            "11420": "nan",
            "11425": "nan",
            "11430": "nan",
            "11435": "nan",
            "11440": "nan",
            "11445": "nan",
            "11450": "nan",
            "11455": "nan",
            "11460": "nan",
            "11465": "nan",
            "11470": "nan",
            "11475": "nan",
            "11480": "nan",
            "11485": "nan",
            "11490": "nan",
            "11495": "nan",
            "11500": 0.92193,
            "11505": "nan",
            "11510": "nan",
            "11515": "nan",
            "11520": "nan",
            "11525": "nan",
            "11530": "nan",
            "11535": "nan",
            "11540": "nan",
            "11545": "nan",
            "11550": "nan",
            "11555": "nan",
            "11560": "nan",
            "11565": "nan",
            "11570": "nan",
            "11575": "nan",
            "11580": "nan",
            "11585": "nan",
            "11590": "nan",
            "11595": "nan",
            "11600": 1.00741,
            "11605": "nan",
            "11610": "nan",
            "11615": "nan",
            "11620": "nan",
            "11625": "nan",
            "11630": "nan",
            "11635": "nan",
            "11640": "nan",
            "11645": "nan",
            "11650": "nan",
            "11655": "nan",
            "11660": "nan",
            "11665": "nan",
            "11670": "nan",
            "11675": "nan",
            "11680": "nan",
            "11685": "nan",
            "11690": "nan",
            "11695": "nan",
            "11700": 0.92603,
            "11705": "nan",
            "11710": "nan",
            "11715": "nan",
            "11720": "nan",
            "11725": "nan",
            "11730": "nan",
            "11735": "nan",
            "11740": "nan",
            "11745": "nan",
            "11750": "nan",
            "11755": "nan",
            "11760": "nan",
            "11765": "nan",
            "11770": "nan",
            "11775": "nan",
            "11780": "nan",
            "11785": "nan",
            "11790": "nan",
            "11795": "nan",
            "11800": 0.92723,
            "11805": "nan",
            "11810": "nan",
            "11815": "nan",
            "11820": "nan",
            "11825": "nan",
            "11830": "nan",
            "11835": "nan",
            "11840": "nan",
            "11845": "nan",
            "11850": "nan",
            "11855": "nan",
            "11860": "nan",
            "11865": "nan",
            "11870": "nan",
            "11875": "nan",
            "11880": "nan",
            "11885": "nan",
            "11890": "nan",
            "11895": "nan",
            "11900": 0.92352,
            "11905": "nan",
            "11910": "nan",
            "11915": "nan",
            "11920": "nan",
            "11925": "nan",
            "11930": "nan",
            "11935": "nan",
            "11940": "nan",
            "11945": "nan",
            "11950": "nan",
            "11955": "nan",
            "11960": "nan",
            "11965": "nan",
            "11970": "nan",
            "11975": "nan",
            "11980": "nan",
            "11985": "nan",
            "11990": "nan",
            "11995": "nan",
            "12000": 1.05482,
            "12005": "nan",
            "12010": "nan",
            "12015": "nan",
            "12020": "nan",
            "12025": "nan",
            "12030": "nan",
            "12035": "nan",
            "12040": "nan",
            "12045": "nan",
            "12050": "nan",
            "12055": "nan",
            "12060": "nan",
            "12065": "nan",
            "12070": "nan",
            "12075": "nan",
            "12080": "nan",
            "12085": "nan",
            "12090": "nan",
            "12095": "nan",
            "12100": 0.94597,
            "12105": "nan",
            "12110": "nan",
            "12115": "nan",
            "12120": "nan",
            "12125": "nan",
            "12130": "nan",
            "12135": "nan",
            "12140": "nan",
            "12145": "nan",
            "12150": "nan",
            "12155": "nan",
            "12160": "nan",
            "12165": "nan",
            "12170": "nan",
            "12175": "nan",
            "12180": "nan",
            "12185": "nan",
            "12190": "nan",
            "12195": "nan",
            "12200": 0.91078,
            "12205": "nan",
            "12210": "nan",
            "12215": "nan",
            "12220": "nan",
            "12225": "nan",
            "12230": "nan",
            "12235": "nan",
            "12240": "nan",
            "12245": "nan",
            "12250": "nan",
            "12255": "nan",
            "12260": "nan",
            "12265": "nan",
            "12270": "nan",
            "12275": "nan",
            "12280": "nan",
            "12285": "nan",
            "12290": "nan",
            "12295": "nan",
            "12300": 0.89248,
            "12305": "nan",
            "12310": "nan",
            "12315": "nan",
            "12320": "nan",
            "12325": "nan",
            "12330": "nan",
            "12335": "nan",
            "12340": "nan",
            "12345": "nan",
            "12350": "nan",
            "12355": "nan",
            "12360": "nan",
            "12365": "nan",
            "12370": "nan",
            "12375": "nan",
            "12380": "nan",
            "12385": "nan",
            "12390": "nan",
            "12395": "nan",
            "12400": 0.9801,
            "12405": "nan",
            "12410": "nan",
            "12415": "nan",
            "12420": "nan",
            "12425": "nan",
            "12430": "nan",
            "12435": "nan",
            "12440": "nan",
            "12445": "nan",
            "12450": "nan",
            "12455": "nan",
            "12460": "nan",
            "12465": "nan",
            "12470": "nan",
            "12475": "nan",
            "12480": "nan",
            "12485": "nan",
            "12490": "nan",
            "12495": "nan",
            "12500": 0.90225,
            "12505": "nan",
            "12510": "nan",
            "12515": "nan",
            "12520": "nan",
            "12525": "nan",
            "12530": "nan",
            "12535": "nan",
            "12540": "nan",
            "12545": "nan",
            "12550": "nan",
            "12555": "nan",
            "12560": "nan",
            "12565": "nan",
            "12570": "nan",
            "12575": "nan",
            "12580": "nan",
            "12585": "nan",
            "12590": "nan",
            "12595": "nan",
            "12600": 0.92251,
            "12605": "nan",
            "12610": "nan",
            "12615": "nan",
            "12620": "nan",
            "12625": "nan",
            "12630": "nan",
            "12635": "nan",
            "12640": "nan",
            "12645": "nan",
            "12650": "nan",
            "12655": "nan",
            "12660": "nan",
            "12665": "nan",
            "12670": "nan",
            "12675": "nan",
            "12680": "nan",
            "12685": "nan",
            "12690": "nan",
            "12695": "nan",
            "12700": 0.91943,
            "12705": "nan",
            "12710": "nan",
            "12715": "nan",
            "12720": "nan",
            "12725": "nan",
            "12730": "nan",
            "12735": "nan",
            "12740": "nan",
            "12745": "nan",
            "12750": "nan",
            "12755": "nan",
            "12760": "nan",
            "12765": "nan",
            "12770": "nan",
            "12775": "nan",
            "12780": "nan",
            "12785": "nan",
            "12790": "nan",
            "12795": "nan",
            "12800": "nan",
            "12805": "nan",
            "12810": "nan",
            "12815": "nan",
            "12820": "nan",
            "12825": "nan",
            "12830": "nan",
            "12835": "nan",
            "12840": "nan",
            "12845": "nan",
            "12850": "nan",
            "12855": "nan",
            "12860": "nan",
            "12865": "nan",
            "12870": "nan",
            "12875": "nan",
            "12880": "nan",
            "12885": "nan",
            "12890": "nan",
            "12895": "nan",
            "12900": "nan",
            "12905": "nan",
            "12910": "nan",
            "12915": "nan",
            "12920": "nan",
            "12925": "nan",
            "12930": "nan",
            "12935": "nan",
            "12940": "nan",
            "12945": "nan",
            "12950": "nan",
            "12955": "nan",
            "12960": "nan",
            "12965": "nan",
            "12970": "nan",
            "12975": "nan",
            "12980": "nan",
            "12985": "nan",
            "12990": "nan",
            "12995": "nan",
            "13000": "nan"
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 13000,
        "step_interval": 5,
        "values": {
            "1": 12.95636,
            "5": 12.91493,
            "10": 12.06071,
            "15": 11.399,
            "20": 10.42783,
            "25": 9.97708,
            "30": 9.62862,
            "35": 9.36622,
            "40": 9.17249,
            "45": 9.00253,
            "50": 8.8424,
            "55": 8.63901,
            "60": 8.60961,
            "65": 8.47692,
            "70": 8.46329,
            "75": 8.28991,
            "80": 8.16093,
            "85": 8.10818,
            "90": 7.92141,
            "95": 7.90423,
            "100": 7.79003,
            "105": 7.66332,
            "110": 7.53152,
            "115": 7.44265,
            "120": 7.47216,
            "125": 7.42219,
            "130": 7.27255,
            "135": 7.27337,
            "140": 7.17596,
            "145": 7.06835,
            "150": 7.15145,
            "155": 7.02838,
            "160": 6.92535,
            "165": 6.89994,
            "170": 6.81047,
            "175": 6.86004,
            "180": 6.81466,
            "185": 6.75648,
            "190": 6.68105,
            "195": 6.63303,
            "200": 6.67654,
            "205": 6.69053,
            "210": 6.55683,
            "215": 6.54183,
            "220": 6.50946,
            "225": 6.48416,
            "230": 6.51062,
            "235": 6.43666,
            "240": 6.38442,
            "245": 6.35429,
            "250": 6.32154,
            "255": 6.45338,
            "260": 6.34969,
            "265": 6.26896,
            "270": 6.29578,
            "275": 6.26435,
            "280": 6.21097,
            "285": 6.24029,
            "290": 6.17061,
            "295": 6.11197,
            "300": 6.10341,
            "305": 6.02939,
            "310": 6.08873,
            "315": 6.08978,
            "320": 5.98033,
            "325": 5.94023,
            "330": 6.00151,
            "335": 6.03605,
            "340": 5.95411,
            "345": 5.93918,
            "350": 5.90365,
            "355": 5.86832,
            "360": 5.86866,
            "365": 5.85156,
            "370": 5.83353,
            "375": 5.84167,
            "380": 5.8501,
            "385": 5.78541,
            "390": 5.79439,
            "395": 5.69008,
            "400": 5.67247,
            "405": 5.68516,
            "410": 5.67869,
            "415": 5.72763,
            "420": 5.64672,
            "425": 5.66169,
            "430": 5.63737,
            "435": 5.58968,
            "440": 5.61896,
            "445": 5.5645,
            "450": 5.58508,
            "455": 5.50743,
            "460": 5.50847,
            "465": 5.59451,
            "470": 5.60347,
            "475": 5.51168,
            "480": 5.48287,
            "485": 5.49811,
            "490": 5.46891,
            "495": 5.47163,
            "500": 5.4091,
            "505": 5.38671,
            "510": 5.45239,
            "515": 5.4142,
            "520": 5.43017,
            "525": 5.29804,
            "530": 5.33295,
            "535": 5.32694,
            "540": 5.34564,
            "545": 5.3566,
            "550": 5.372,
            "555": 5.21825,
            "560": 5.35828,
            "565": 5.29055,
            "570": 5.25088,
            "575": 5.28196,
            "580": 5.21238,
            "585": 5.22679,
            "590": 5.23078,
            "595": 5.20361,
            "600": 5.26226,
            "605": 5.20734,
            "610": 5.20433,
            "615": 5.16551,
            "620": 5.1828,
            "625": 5.21942,
            "630": 5.14048,
            "635": 5.10207,
            "640": 5.06829,
            "645": 5.11226,
            "650": 5.13069,
            "655": 5.11083,
            "660": 5.05053,
            "665": 5.07037,
            "670": 5.03239,
            "675": 5.01416,
            "680": 5.00235,
            "685": 4.98481,
            "690": 4.98266,
            "695": 4.94418,
            "700": 4.96036,
            "705": 4.91447,
            "710": 4.94457,
            "715": 4.84337,
            "720": 4.82105,
            "725": 4.76373,
            "730": 4.83281,
            "735": 4.78972,
            "740": 4.78672,
            "745": 4.69179,
            "750": 4.71771,
            "755": 4.77161,
            "760": 4.76703,
            "765": 4.69822,
            "770": 4.67903,
            "775": 4.64475,
            "780": 4.65639,
            "785": 4.72058,
            "790": 4.63092,
            "795": 4.57774,
            "800": 4.56798,
            "805": 4.58802,
            "810": 4.63352,
            "815": 4.57408,
            "820": 4.6076,
            "825": 4.56989,
            "830": 4.561,
            "835": 4.51372,
            "840": 4.4407,
            "845": 4.46506,
            "850": 4.43154,
            "855": 4.47148,
            "860": 4.42231,
            "865": 4.45963,
            "870": 4.43819,
            "875": 4.34074,
            "880": 4.4007,
            "885": 4.35925,
            "890": 4.41003,
            "895": 4.41996,
            "900": 4.36598,
            "905": 4.30337,
            "910": 4.32788,
            "915": 4.3191,
            "920": 4.34354,
            "925": 4.36176,
            "930": 4.27417,
            "935": 4.29383,
            "940": 4.31971,
            "945": 4.29013,
            "950": 4.32454,
            "955": 4.2605,
            "960": 4.16766,
            "965": 4.26398,
            "970": 4.2642,
            "975": 4.21835,
            "980": 4.20542,
            "985": 4.16834,
            "990": 4.13159,
            "995": 4.15896,
            "1000": 4.20986,
            "1005": 4.17818,
            "1010": 4.14706,
            "1015": 4.12779,
            "1020": 4.14585,
            "1025": 4.20083,
            "1030": 4.10606,
            "1035": 4.06602,
            "1040": 4.104,
            "1045": 4.1058,
            "1050": 4.1194,
            "1055": 4.07941,
            "1060": 4.10741,
            "1065": 4.05729,
            "1070": 4.04856,
            "1075": 4.06438,
            "1080": 4.06666,
            "1085": 4.06126,
            "1090": 4.03111,
            "1095": 4.09404,
            "1100": 4.05366,
            "1105": 4.06449,
            "1110": 4.03061,
            "1115": 4.00934,
            "1120": 3.98834,
            "1125": 3.98655,
            "1130": 4.03219,
            "1135": 4.01012,
            "1140": 4.00354,
            "1145": 3.94596,
            "1150": 4.03253,
            "1155": 3.98732,
            "1160": 3.95928,
            "1165": 3.86152,
            "1170": 3.92565,
            "1175": 3.94727,
            "1180": 3.94594,
            "1185": 3.98008,
            "1190": 3.92521,
            "1195": 3.93802,
            "1200": 3.86632,
            "1205": 3.87409,
            "1210": 3.99026,
            "1215": 3.82709,
            "1220": 3.86889,
            "1225": 3.80788,
            "1230": 3.90481,
            "1235": 3.89023,
            "1240": 3.86358,
            "1245": 3.80325,
            "1250": 3.83985,
            "1255": 3.87091,
            "1260": 3.88382,
            "1265": 3.79458,
            "1270": 3.86062,
            "1275": 3.83631,
            "1280": 3.81782,
            "1285": 3.84386,
            "1290": 3.86067,
            "1295": 3.84325,
            "1300": 3.82563,
            "1305": 3.81469,
            "1310": 3.81207,
            "1315": 3.81033,
            "1320": 3.8231,
            "1325": 3.70845,
            "1330": 3.78885,
            "1335": 3.77725,
            "1340": 3.75347,
            "1345": 3.75107,
            "1350": 3.71799,
            "1355": 3.76936,
            "1360": 3.72335,
            "1365": 3.72556,
            "1370": 3.73231,
            "1375": 3.74918,
            "1380": 3.75889,
            "1385": 3.73782,
            "1390": 3.64839,
            "1395": 3.74238,
            "1400": 3.72646,
            "1405": 3.66878,
            "1410": 3.66425,
            "1415": 3.65271,
            "1420": 3.69535,
            "1425": 3.70138,
            "1430": 3.67121,
            "1435": 3.66402,
            "1440": 3.65193,
            "1445": 3.68875,
            "1450": 3.67616,
            "1455": 3.63441,
            "1460": 3.64566,
            "1465": 3.67157,
            "1470": 3.63262,
            "1475": 3.69518,
            "1480": 3.65146,
            "1485": 3.66092,
            "1490": 3.62758,
            "1495": 3.59427,
            "1500": 3.62478,
            "1505": 3.68785,
            "1510": 3.563,
            "1515": 3.60002,
            "1520": 3.62112,
            "1525": 3.60225,
            "1530": 3.58683,
            "1535": 3.57992,
            "1540": 3.60666,
            "1545": 3.59612,
            "1550": 3.55628,
            "1555": 3.56998,
            "1560": 3.60483,
            "1565": 3.60236,
            "1570": 3.60117,
            "1575": 3.54502,
            "1580": 3.58519,
            "1585": 3.57087,
            "1590": 3.45241,
            "1595": 3.50264,
            "1600": 3.49859,
            "1605": 3.54796,
            "1610": 3.57472,
            "1615": 3.49729,
            "1620": 3.51578,
            "1625": 3.46192,
            "1630": 3.49755,
            "1635": 3.53974,
            "1640": 3.5315,
            "1645": 3.53664,
            "1650": 3.49268,
            "1655": 3.4598,
            "1660": 3.51996,
            "1665": 3.44574,
            "1670": 3.50193,
            "1675": 3.48185,
            "1680": 3.4727,
            "1685": 3.47728,
            "1690": 3.47809,
            "1695": 3.49119,
            "1700": 3.44575,
            "1705": 3.39167,
            "1710": 3.49469,
            "1715": 3.49059,
            "1720": 3.42628,
            "1725": 3.42554,
            "1730": 3.4079,
            "1735": 3.45263,
            "1740": 3.45054,
            "1745": 3.43655,
            "1750": 3.39646,
            "1755": 3.39273,
            "1760": 3.38735,
            "1765": 3.4222,
            "1770": 3.44622,
            "1775": 3.387,
            "1780": 3.43414,
            "1785": 3.40568,
            "1790": 3.37632,
            "1795": 3.39987,
            "1800": 3.33185,
            "1805": 3.3833,
            "1810": 3.32286,
            "1815": 3.41563,
            "1820": 3.40441,
            "1825": 3.36911,
            "1830": 3.32658,
            "1835": 3.4159,
            "1840": 3.38999,
            "1845": 3.41729,
            "1850": 3.37087,
            "1855": 3.36447,
            "1860": 3.33,
            "1865": 3.3828,
            "1870": 3.29994,
            "1875": 3.42203,
            "1880": 3.31553,
            "1885": 3.34452,
            "1890": 3.34191,
            "1895": 3.38708,
            "1900": 3.37396,
            "1905": 3.3097,
            "1910": 3.32571,
            "1915": 3.31274,
            "1920": 3.34648,
            "1925": 3.32051,
            "1930": 3.30099,
            "1935": 3.30725,
            "1940": 3.35696,
            "1945": 3.25663,
            "1950": 3.4137,
            "1955": 3.29079,
            "1960": 3.29664,
            "1965": 3.26156,
            "1970": 3.29433,
            "1975": 3.3407,
            "1980": 3.33342,
            "1985": 3.23349,
            "1990": 3.29114,
            "1995": 3.28958,
            "2000": 3.27902,
            "2005": 3.27718,
            "2010": 3.26011,
            "2015": 3.21811,
            "2020": 3.25285,
            "2025": 3.26578,
            "2030": 3.27801,
            "2035": 3.3008,
            "2040": 3.25364,
            "2045": 3.22173,
            "2050": 3.27331,
            "2055": 3.31203,
            "2060": 3.29841,
            "2065": 3.22375,
            "2070": 3.27847,
            "2075": 3.23278,
            "2080": 3.21071,
            "2085": 3.27722,
            "2090": 3.1336,
            "2095": 3.27283,
            "2100": 3.22452,
            "2105": 3.19481,
            "2110": 3.19679,
            "2115": 3.23269,
            "2120": 3.17039,
            "2125": 3.20328,
            "2130": 3.21145,
            "2135": 3.27257,
            "2140": 3.18877,
            "2145": 3.19442,
            "2150": 3.20116,
            "2155": 3.23563,
            "2160": 3.19845,
            "2165": 3.24654,
            "2170": 3.2216,
            "2175": 3.16252,
            "2180": 3.21306,
            "2185": 3.24681,
            "2190": 3.22862,
            "2195": 3.14645,
            "2200": 3.19104,
            "2205": 3.15804,
            "2210": 3.11856,
            "2215": 3.18638,
            "2220": 3.18386,
            "2225": 3.16933,
            "2230": 3.13649,
            "2235": 3.17995,
            "2240": 3.21834,
            "2245": 3.1834,
            "2250": 3.19663,
            "2255": 3.12028,
            "2260": 3.14221,
            "2265": 3.20381,
            "2270": 3.19267,
            "2275": 3.16805,
            "2280": 3.16867,
            "2285": 3.1601,
            "2290": 3.16301,
            "2295": 3.18761,
            "2300": 3.12966,
            "2305": 3.17206,
            "2310": 3.11553,
            "2315": 3.06971,
            "2320": 3.12085,
            "2325": 3.16434,
            "2330": 3.11423,
            "2335": 3.11076,
            "2340": 3.15588,
            "2345": 3.10477,
            "2350": 3.11732,
            "2355": 3.11185,
            "2360": 3.15506,
            "2365": 3.11076,
            "2370": 3.16356,
            "2375": 3.14222,
            "2380": 3.10402,
            "2385": 3.0741,
            "2390": 3.09007,
            "2395": 3.07964,
            "2400": 3.0826,
            "2405": 3.08587,
            "2410": 3.07576,
            "2415": 3.07852,
            "2420": 3.06533,
            "2425": 3.07275,
            "2430": 3.06933,
            "2435": 3.0564,
            "2440": 3.08532,
            "2445": 3.05739,
            "2450": 3.11279,
            "2455": 3.15238,
            "2460": 3.07391,
            "2465": 3.07127,
            "2470": 3.03425,
            "2475": 3.0807,
            "2480": 3.08629,
            "2485": 3.0577,
            "2490": 3.05332,
            "2495": 3.07752,
            "2500": 3.06183,
            "2505": 3.11062,
            "2510": 3.11173,
            "2515": 3.04133,
            "2520": 3.06465,
            "2525": 3.01687,
            "2530": 3.03591,
            "2535": 3.07924,
            "2540": 3.0737,
            "2545": 3.04184,
            "2550": 2.99916,
            "2555": 3.07523,
            "2560": 3.05422,
            "2565": 3.10948,
            "2570": 2.99967,
            "2575": 3.04042,
            "2580": 3.07793,
            "2585": 3.01288,
            "2590": 3.06395,
            "2595": 2.99387,
            "2600": 3.05698,
            "2605": 3.04109,
            "2610": 3.05189,
            "2615": 3.05276,
            "2620": 2.98134,
            "2625": 3.00912,
            "2630": 3.02901,
            "2635": 3.05103,
            "2640": 3.01503,
            "2645": 3.04801,
            "2650": 3.00882,
            "2655": 2.98542,
            "2660": 3.00801,
            "2665": 3.03349,
            "2670": 2.97627,
            "2675": 2.96043,
            "2680": 2.99062,
            "2685": 3.00007,
            "2690": 2.99293,
            "2695": 2.98783,
            "2700": 3.02201,
            "2705": 2.97427,
            "2710": 2.96634,
            "2715": 2.95921,
            "2720": 3.02806,
            "2725": 3.00529,
            "2730": 3.0322,
            "2735": 3.02478,
            "2740": 2.9826,
            "2745": 3.00739,
            "2750": 3.00234,
            "2755": 2.96406,
            "2760": 2.9934,
            "2765": 3.00331,
            "2770": 2.97181,
            "2775": 2.98931,
            "2780": 3.01168,
            "2785": 2.94404,
            "2790": 2.95915,
            "2795": 2.9483,
            "2800": 2.95122,
            "2805": 2.92887,
            "2810": 2.97394,
            "2815": 2.95273,
            "2820": 3.09901,
            "2825": 3.09029,
            "2830": 3.06294,
            "2835": 2.99075,
            "2840": 2.99097,
            "2845": 3.16374,
            "2850": 3.0727,
            "2855": 2.9904,
            "2860": 2.95406,
            "2865": 2.90905,
            "2870": 2.97507,
            "2875": 2.91077,
            "2880": 2.94853,
            "2885": 2.92014,
            "2890": 2.97403,
            "2895": 2.92299,
            "2900": 2.94051,
            "2905": 3.04074,
            "2910": 2.94425,
            "2915": 2.96669,
            "2920": 2.95973,
            "2925": 2.94592,
            "2930": 2.95038,
            "2935": 2.93537,
            "2940": 2.93072,
            "2945": 3.05785,
            "2950": 3.04355,
            "2955": 2.9389,
            "2960": 2.99183,
            "2965": 2.87016,
            "2970": 2.95512,
            "2975": 2.98818,
            "2980": 2.93644,
            "2985": 3.03092,
            "2990": 2.93432,
            "2995": 2.86558,
            "3000": 2.93207,
            "3005": 2.89421,
            "3010": 2.93131,
            "3015": 2.93536,
            "3020": 2.98292,
            "3025": 2.97341,
            "3030": 2.94829,
            "3035": 2.9713,
            "3040": 2.91863,
            "3045": 2.83181,
            "3050": 2.8976,
            "3055": 2.89038,
            "3060": 2.92509,
            "3065": 2.91825,
            "3070": 2.88122,
            "3075": 2.87986,
            "3080": 2.96134,
            "3085": 2.92236,
            "3090": 2.93503,
            "3095": 2.92516,
            "3100": 2.86898,
            "3105": 2.93926,
            "3110": 2.90416,
            "3115": 2.9415,
            "3120": 2.9498,
            "3125": 2.85617,
            "3130": 2.93084,
            "3135": 2.92732,
            "3140": 2.87396,
            "3145": 2.92342,
            "3150": 2.86658,
            "3155": 2.88336,
            "3160": 2.84999,
            "3165": 2.86518,
            "3170": 2.90374,
            "3175": 2.90679,
            "3180": 2.86664,
            "3185": 2.98102,
            "3190": 2.92651,
            "3195": 2.93655,
            "3200": 2.92553,
            "3205": 2.86435,
            "3210": 2.87237,
            "3215": 2.91522,
            "3220": 2.87693,
            "3225": 2.87047,
            "3230": 2.81633,
            "3235": 2.87742,
            "3240": 2.881,
            "3245": 2.90693,
            "3250": 2.86631,
            "3255": 2.85231,
            "3260": 2.87001,
            "3265": 2.87376,
            "3270": 2.84904,
            "3275": 2.86952,
            "3280": 2.81176,
            "3285": 2.81325,
            "3290": 2.87177,
            "3295": 2.90468,
            "3300": 2.87538,
            "3305": 2.86924,
            "3310": 2.86384,
            "3315": 2.81591,
            "3320": 2.83934,
            "3325": 2.83244,
            "3330": 2.8359,
            "3335": 2.84976,
            "3340": 2.83631,
            "3345": 2.85001,
            "3350": 2.89341,
            "3355": 2.88029,
            "3360": 2.80501,
            "3365": 2.85662,
            "3370": 2.85298,
            "3375": 2.84567,
            "3380": 2.85507,
            "3385": 2.87967,
            "3390": 2.8654,
            "3395": 2.81392,
            "3400": 2.7912,
            "3405": 2.83445,
            "3410": 2.84915,
            "3415": 2.86658,
            "3420": 2.82691,
            "3425": 2.81422,
            "3430": 2.83318,
            "3435": 2.89964,
            "3440": 2.82094,
            "3445": 2.87176,
            "3450": 2.82231,
            "3455": 2.79427,
            "3460": 2.82113,
            "3465": 2.85549,
            "3470": 2.84379,
            "3475": 2.7827,
            "3480": 2.84358,
            "3485": 2.82723,
            "3490": 2.89648,
            "3495": 2.85358,
            "3500": 2.83932,
            "3505": 2.82976,
            "3510": 2.81722,
            "3515": 2.83947,
            "3520": 2.7798,
            "3525": 2.8076,
            "3530": 2.86312,
            "3535": 2.7916,
            "3540": 2.84405,
            "3545": 2.81728,
            "3550": 2.80222,
            "3555": 2.82111,
            "3560": 2.82831,
            "3565": 2.83064,
            "3570": 2.80758,
            "3575": 2.8108,
            "3580": 2.82354,
            "3585": 2.83897,
            "3590": 2.83314,
            "3595": 2.79006,
            "3600": 2.75763,
            "3605": 2.79839,
            "3610": 2.85366,
            "3615": 2.75968,
            "3620": 2.81153,
            "3625": 2.89012,
            "3630": 2.78512,
            "3635": 2.78962,
            "3640": 2.79249,
            "3645": 2.77318,
            "3650": 2.80654,
            "3655": 2.82567,
            "3660": 2.77181,
            "3665": 2.78827,
            "3670": 2.76987,
            "3675": 2.77982,
            "3680": 2.81722,
            "3685": 2.807,
            "3690": 2.81012,
            "3695": 2.81602,
            "3700": 2.79246,
            "3705": 2.78977,
            "3710": 2.75621,
            "3715": 2.80769,
            "3720": 2.80295,
            "3725": 2.79689,
            "3730": 2.84284,
            "3735": 2.80302,
            "3740": 2.75403,
            "3745": 2.79871,
            "3750": 2.80835,
            "3755": 2.80365,
            "3760": 2.76297,
            "3765": 2.76114,
            "3770": 2.76425,
            "3775": 2.77515,
            "3780": 2.76611,
            "3785": 2.78273,
            "3790": 2.74585,
            "3795": 2.7943,
            "3800": 2.80919,
            "3805": 2.75606,
            "3810": 2.80348,
            "3815": 2.76783,
            "3820": 2.78665,
            "3825": 2.74368,
            "3830": 2.74754,
            "3835": 2.81858,
            "3840": 2.72874,
            "3845": 2.72322,
            "3850": 2.78,
            "3855": 2.72854,
            "3860": 2.80843,
            "3865": 2.75384,
            "3870": 2.77843,
            "3875": 2.75435,
            "3880": 2.78959,
            "3885": 2.78704,
            "3890": 2.74772,
            "3895": 2.80198,
            "3900": 2.76665,
            "3905": 2.7273,
            "3910": 2.74855,
            "3915": 2.75398,
            "3920": 2.79669,
            "3925": 2.78392,
            "3930": 2.71057,
            "3935": 2.74346,
            "3940": 2.75667,
            "3945": 2.74619,
            "3950": 2.73229,
            "3955": 2.78862,
            "3960": 2.76604,
            "3965": 2.74261,
            "3970": 2.76202,
            "3975": 2.72885,
            "3980": 2.74356,
            "3985": 2.753,
            "3990": 2.69532,
            "3995": 2.78674,
            "4000": 2.73839,
            "4005": 2.77018,
            "4010": 2.71275,
            "4015": 2.72469,
            "4020": 2.75812,
            "4025": 2.73947,
            "4030": 2.66471,
            "4035": 2.70312,
            "4040": 2.7551,
            "4045": 2.75154,
            "4050": 2.79476,
            "4055": 2.72564,
            "4060": 2.71823,
            "4065": 2.65515,
            "4070": 2.81106,
            "4075": 2.7593,
            "4080": 2.7224,
            "4085": 2.7526,
            "4090": 2.68408,
            "4095": 2.69465,
            "4100": 2.71413,
            "4105": 2.74242,
            "4110": 2.73879,
            "4115": 2.70781,
            "4120": 2.72904,
            "4125": 2.706,
            "4130": 2.70256,
            "4135": 2.69364,
            "4140": 2.68685,
            "4145": 2.78573,
            "4150": 2.71248,
            "4155": 2.74287,
            "4160": 2.7636,
            "4165": 2.72423,
            "4170": 2.68299,
            "4175": 2.72506,
            "4180": 2.73013,
            "4185": 2.73505,
            "4190": 2.74113,
            "4195": 2.69908,
            "4200": 2.70931,
            "4205": 2.74563,
            "4210": 2.68107,
            "4215": 2.67252,
            "4220": 2.66454,
            "4225": 2.70808,
            "4230": 2.71983,
            "4235": 2.73343,
            "4240": 2.70888,
            "4245": 2.69989,
            "4250": 2.71677,
            "4255": 2.65608,
            "4260": 2.7308,
            "4265": 2.74762,
            "4270": 2.73075,
            "4275": 2.6981,
            "4280": 2.70824,
            "4285": 2.73731,
            "4290": 2.69096,
            "4295": 2.69773,
            "4300": 2.70521,
            "4305": 2.70287,
            "4310": 2.73612,
            "4315": 2.71699,
            "4320": 2.70088,
            "4325": 2.70647,
            "4330": 2.71085,
            "4335": 2.69263,
            "4340": 2.69943,
            "4345": 2.72678,
            "4350": 2.67936,
            "4355": 2.69716,
            "4360": 2.717,
            "4365": 2.79214,
            "4370": 2.73775,
            "4375": 2.74574,
            "4380": 2.70719,
            "4385": 2.70003,
            "4390": 2.70588,
            "4395": 2.75693,
            "4400": 2.66771,
            "4405": 2.66938,
            "4410": 2.6866,
            "4415": 2.70735,
            "4420": 2.70696,
            "4425": 2.72098,
            "4430": 2.69315,
            "4435": 2.68322,
            "4440": 2.6986,
            "4445": 2.68318,
            "4450": 2.65777,
            "4455": 2.66913,
            "4460": 2.68987,
            "4465": 2.70288,
            "4470": 2.67093,
            "4475": 2.69006,
            "4480": 2.65881,
            "4485": 2.70227,
            "4490": 2.65665,
            "4495": 2.71197,
            "4500": 2.70447,
            "4505": 2.70028,
            "4510": 2.65488,
            "4515": 2.70383,
            "4520": 2.67082,
            "4525": 2.67161,
            "4530": 2.67935,
            "4535": 2.67779,
            "4540": 2.71224,
            "4545": 2.66111,
            "4550": 2.70285,
            "4555": 2.68549,
            "4560": 2.65973,
            "4565": 2.64283,
            "4570": 2.64437,
            "4575": 2.66817,
            "4580": 2.69161,
            "4585": 2.6866,
            "4590": 2.61902,
            "4595": 2.66724,
            "4600": 2.68012,
            "4605": 2.67797,
            "4610": 2.65878,
            "4615": 2.66405,
            "4620": 2.65841,
            "4625": 2.68759,
            "4630": 2.67207,
            "4635": 2.6467,
            "4640": 2.69593,
            "4645": 2.64943,
            "4650": 2.70482,
            "4655": 2.70974,
            "4660": 2.678,
            "4665": 2.68927,
            "4670": 2.67818,
            "4675": 2.69201,
            "4680": 2.66723,
            "4685": 2.66091,
            "4690": 2.70694,
            "4695": 2.65664,
            "4700": 2.67563,
            "4705": 2.6541,
            "4710": 2.67809,
            "4715": 2.65279,
            "4720": 2.72611,
            "4725": 2.62991,
            "4730": 2.65465,
            "4735": 2.68665,
            "4740": 2.64349,
            "4745": 2.65464,
            "4750": 2.6425,
            "4755": 2.65742,
            "4760": 2.66679,
            "4765": 2.64631,
            "4770": 2.62442,
            "4775": 2.65996,
            "4780": 2.65947,
            "4785": 2.69218,
            "4790": 2.65164,
            "4795": 2.67687,
            "4800": 2.6313,
            "4805": 2.6446,
            "4810": 2.66621,
            "4815": 2.64665,
            "4820": 2.6782,
            "4825": 2.6539,
            "4830": 2.61864,
            "4835": 2.65011,
            "4840": 2.65638,
            "4845": 2.64014,
            "4850": 2.62687,
            "4855": 2.60115,
            "4860": 2.65274,
            "4865": 2.62849,
            "4870": 2.6455,
            "4875": 2.62328,
            "4880": 2.6283,
            "4885": 2.62736,
            "4890": 2.6794,
            "4895": 2.65936,
            "4900": 2.61724,
            "4905": 2.61961,
            "4910": 2.63978,
            "4915": 2.61489,
            "4920": 2.65096,
            "4925": 2.6502,
            "4930": 2.57167,
            "4935": 2.65086,
            "4940": 2.63371,
            "4945": 2.63866,
            "4950": 2.63093,
            "4955": 2.61847,
            "4960": 2.62102,
            "4965": 2.65956,
            "4970": 2.60049,
            "4975": 2.65887,
            "4980": 2.62253,
            "4985": 2.63045,
            "4990": 2.65896,
            "4995": 2.58449,
            "5000": 2.66172,
            "5005": 2.66467,
            "5010": 2.68321,
            "5015": 2.63657,
            "5020": 2.64187,
            "5025": 2.68752,
            "5030": 2.64287,
            "5035": 2.61752,
            "5040": 2.62143,
            "5045": 2.6017,
            "5050": 2.62827,
            "5055": 2.6529,
            "5060": 2.64519,
            "5065": 2.68833,
            "5070": 2.60442,
            "5075": 2.61853,
            "5080": 2.61208,
            "5085": 2.604,
            "5090": 2.59066,
            "5095": 2.65156,
            "5100": 2.6482,
            "5105": 2.61142,
            "5110": 2.66463,
            "5115": 2.61773,
            "5120": 2.6851,
            "5125": 2.6328,
            "5130": 2.61414,
            "5135": 2.61072,
            "5140": 2.57728,
            "5145": 2.62747,
            "5150": 2.63552,
            "5155": 2.61666,
            "5160": 2.66422,
            "5165": 2.58419,
            "5170": 2.59264,
            "5175": 2.61755,
            "5180": 2.60594,
            "5185": 2.62077,
            "5190": 2.62566,
            "5195": 2.67043,
            "5200": 2.60106,
            "5205": 2.60577,
            "5210": 2.60577,
            "5215": 2.64683,
            "5220": 2.58739,
            "5225": 2.55124,
            "5230": 2.63497,
            "5235": 2.61953,
            "5240": 2.63299,
            "5245": 2.64007,
            "5250": 2.62651,
            "5255": 2.62996,
            "5260": 2.56167,
            "5265": 2.59636,
            "5270": 2.58715,
            "5275": 2.61558,
            "5280": 2.6101,
            "5285": 2.60177,
            "5290": 2.63161,
            "5295": 2.62124,
            "5300": 2.57873,
            "5305": 2.59839,
            "5310": 2.61352,
            "5315": 2.58879,
            "5320": 2.61649,
            "5325": 2.6449,
            "5330": 2.606,
            "5335": 2.5852,
            "5340": 2.56323,
            "5345": 2.65839,
            "5350": 2.6253,
            "5355": 2.58005,
            "5360": 2.59537,
            "5365": 2.62193,
            "5370": 2.616,
            "5375": 2.62869,
            "5380": 2.57994,
            "5385": 2.56513,
            "5390": 2.58667,
            "5395": 2.61896,
            "5400": 2.60742,
            "5405": 2.54859,
            "5410": 2.61326,
            "5415": 2.5968,
            "5420": 2.61305,
            "5425": 2.62522,
            "5430": 2.62714,
            "5435": 2.57646,
            "5440": 2.58691,
            "5445": 2.62928,
            "5450": 2.64904,
            "5455": 2.61295,
            "5460": 2.59303,
            "5465": 2.60946,
            "5470": 2.60013,
            "5475": 2.62549,
            "5480": 2.59024,
            "5485": 2.59298,
            "5490": 2.57902,
            "5495": 2.57297,
            "5500": 2.56984,
            "5505": 2.6179,
            "5510": 2.62675,
            "5515": 2.58442,
            "5520": 2.55635,
            "5525": 2.58644,
            "5530": 2.66454,
            "5535": 2.62243,
            "5540": 2.57094,
            "5545": 2.59629,
            "5550": 2.55008,
            "5555": 2.57413,
            "5560": 2.56392,
            "5565": 2.60633,
            "5570": 2.65231,
            "5575": 2.62956,
            "5580": 2.57293,
            "5585": 2.59512,
            "5590": 2.56253,
            "5595": 2.58488,
            "5600": 2.55316,
            "5605": 2.60174,
            "5610": 2.58344,
            "5615": 2.58383,
            "5620": 2.5828,
            "5625": 2.55159,
            "5630": 2.57274,
            "5635": 2.63391,
            "5640": 2.59709,
            "5645": 2.57394,
            "5650": 2.58002,
            "5655": 2.54968,
            "5660": 2.55884,
            "5665": 2.58772,
            "5670": 2.56974,
            "5675": 2.60699,
            "5680": 2.52871,
            "5685": 2.57074,
            "5690": 2.60289,
            "5695": 2.56061,
            "5700": 2.59647,
            "5705": 2.59767,
            "5710": 2.57979,
            "5715": 2.58478,
            "5720": 2.53602,
            "5725": 2.60163,
            "5730": 2.57478,
            "5735": 2.61025,
            "5740": 2.59376,
            "5745": 2.55786,
            "5750": 2.54163,
            "5755": 2.56148,
            "5760": 2.61763,
            "5765": 2.55839,
            "5770": 2.54209,
            "5775": 2.5851,
            "5780": 2.57898,
            "5785": 2.54052,
            "5790": 2.56441,
            "5795": 2.6016,
            "5800": 2.54469,
            "5805": 2.53479,
            "5810": 2.55723,
            "5815": 2.52569,
            "5820": 2.59764,
            "5825": 2.50389,
            "5830": 2.49824,
            "5835": 2.59839,
            "5840": 2.54067,
            "5845": 2.5546,
            "5850": 2.61487,
            "5855": 2.51063,
            "5860": 2.56026,
            "5865": 2.51866,
            "5870": 2.57501,
            "5875": 2.61024,
            "5880": 2.58742,
            "5885": 2.56735,
            "5890": 2.58353,
            "5895": 2.55592,
            "5900": 2.61408,
            "5905": 2.55554,
            "5910": 2.59722,
            "5915": 2.60946,
            "5920": 2.58692,
            "5925": 2.54513,
            "5930": 2.58538,
            "5935": 2.55227,
            "5940": 2.57041,
            "5945": 2.51784,
            "5950": 2.55423,
            "5955": 2.58456,
            "5960": 2.56321,
            "5965": 2.61861,
            "5970": 2.54969,
            "5975": 2.57895,
            "5980": 2.55865,
            "5985": 2.56052,
            "5990": 2.55619,
            "5995": 2.55747,
            "6000": 2.55354,
            "6005": 2.52322,
            "6010": 2.56029,
            "6015": 2.524,
            "6020": 2.53447,
            "6025": 2.55788,
            "6030": 2.60493,
            "6035": 2.54194,
            "6040": 2.54995,
            "6045": 2.49165,
            "6050": 2.5951,
            "6055": 2.51904,
            "6060": 2.54539,
            "6065": 2.52515,
            "6070": 2.52835,
            "6075": 2.53731,
            "6080": 2.53486,
            "6085": 2.59864,
            "6090": 2.5708,
            "6095": 2.53577,
            "6100": 2.5429,
            "6105": 2.52445,
            "6110": 2.5555,
            "6115": 2.58453,
            "6120": 2.55777,
            "6125": 2.5404,
            "6130": 2.47406,
            "6135": 2.55687,
            "6140": 2.5547,
            "6145": 2.55742,
            "6150": 2.52523,
            "6155": 2.50878,
            "6160": 2.53933,
            "6165": 2.5709,
            "6170": 2.54328,
            "6175": 2.60016,
            "6180": 2.52416,
            "6185": 2.55328,
            "6190": 2.49156,
            "6195": 2.57673,
            "6200": 2.55024,
            "6205": 2.53605,
            "6210": 2.51872,
            "6215": 2.5131,
            "6220": 2.5647,
            "6225": 2.51278,
            "6230": 2.51147,
            "6235": 2.56204,
            "6240": 2.55176,
            "6245": 2.52293,
            "6250": 2.53194,
            "6255": 2.57179,
            "6260": 2.52397,
            "6265": 2.57464,
            "6270": 2.5253,
            "6275": 2.56546,
            "6280": 2.52265,
            "6285": 2.52082,
            "6290": 2.52178,
            "6295": 2.50489,
            "6300": 2.55651,
            "6305": 2.5255,
            "6310": 2.51251,
            "6315": 2.53866,
            "6320": 2.48965,
            "6325": 2.59676,
            "6330": 2.5548,
            "6335": 2.51247,
            "6340": 2.51081,
            "6345": 2.55459,
            "6350": 2.55485,
            "6355": 2.5242,
            "6360": 2.52125,
            "6365": 2.48319,
            "6370": 2.53517,
            "6375": 2.49198,
            "6380": 2.55811,
            "6385": 2.57549,
            "6390": 2.50397,
            "6395": 2.55156,
            "6400": 2.50904,
            "6405": 2.52939,
            "6410": 2.52069,
            "6415": 2.52703,
            "6420": 2.54256,
            "6425": 2.5347,
            "6430": 2.57884,
            "6435": 2.54441,
            "6440": 2.5365,
            "6445": 2.52925,
            "6450": 2.53175,
            "6455": 2.52274,
            "6460": 2.51748,
            "6465": 2.5608,
            "6470": 2.51903,
            "6475": 2.52435,
            "6480": 2.48637,
            "6485": 2.52979,
            "6490": 2.50961,
            "6495": 2.49958,
            "6500": 2.52332,
            "6505": 2.49435,
            "6510": 2.5431,
            "6515": 2.51011,
            "6520": 2.50992,
            "6525": 2.49432,
            "6530": 2.54454,
            "6535": 2.53285,
            "6540": 2.53196,
            "6545": 2.56057,
            "6550": 2.50472,
            "6555": 2.55543,
            "6560": 2.51041,
            "6565": 2.52139,
            "6570": 2.58448,
            "6575": 2.52272,
            "6580": 2.50045,
            "6585": 2.50883,
            "6590": 2.51066,
            "6595": 2.49931,
            "6600": 2.48752,
            "6605": 2.53809,
            "6610": 2.47784,
            "6615": 2.56592,
            "6620": 2.53317,
            "6625": 2.51215,
            "6630": 2.51366,
            "6635": 2.47267,
            "6640": 2.53835,
            "6645": 2.59583,
            "6650": 2.50952,
            "6655": 2.49788,
            "6660": 2.57415,
            "6665": 2.51974,
            "6670": 2.56822,
            "6675": 2.46804,
            "6680": 2.54779,
            "6685": 2.53524,
            "6690": 2.51328,
            "6695": 2.48745,
            "6700": 2.52449,
            "6705": 2.51812,
            "6710": 2.49315,
            "6715": 2.51956,
            "6720": 2.51086,
            "6725": 2.52243,
            "6730": 2.51941,
            "6735": 2.48358,
            "6740": 2.51226,
            "6745": 2.4971,
            "6750": 2.55699,
            "6755": 2.47508,
            "6760": 2.54106,
            "6765": 2.48797,
            "6770": 2.51814,
            "6775": 2.50699,
            "6780": 2.53925,
            "6785": 2.47179,
            "6790": 2.54293,
            "6795": 2.499,
            "6800": 2.52666,
            "6805": 2.51089,
            "6810": 2.50481,
            "6815": 2.52269,
            "6820": 2.48597,
            "6825": 2.50306,
            "6830": 2.54052,
            "6835": 2.50686,
            "6840": 2.51012,
            "6845": 2.5258,
            "6850": 2.47578,
            "6855": 2.51497,
            "6860": 2.50403,
            "6865": 2.49004,
            "6870": 2.55532,
            "6875": 2.4756,
            "6880": 2.55005,
            "6885": 2.47667,
            "6890": 2.54499,
            "6895": 2.50058,
            "6900": 2.48911,
            "6905": 2.49914,
            "6910": 2.51865,
            "6915": 2.51876,
            "6920": 2.53284,
            "6925": 2.54186,
            "6930": 2.49003,
            "6935": 2.51954,
            "6940": 2.50123,
            "6945": 2.46259,
            "6950": 2.48298,
            "6955": 2.52635,
            "6960": 2.51943,
            "6965": 2.49462,
            "6970": 2.47092,
            "6975": 2.52187,
            "6980": 2.45145,
            "6985": 2.51525,
            "6990": 2.53123,
            "6995": 2.46232,
            "7000": 2.48871,
            "7005": 2.47014,
            "7010": 2.47431,
            "7015": 2.51846,
            "7020": 2.46743,
            "7025": 2.45259,
            "7030": 2.48539,
            "7035": 2.47883,
            "7040": 2.50494,
            "7045": 2.51969,
            "7050": 2.52672,
            "7055": 2.44446,
            "7060": 2.47361,
            "7065": 2.48553,
            "7070": 2.49028,
            "7075": 2.49411,
            "7080": 2.53854,
            "7085": 2.48557,
            "7090": 2.48272,
            "7095": 2.50214,
            "7100": 2.51549,
            "7105": 2.48863,
            "7110": 2.48625,
            "7115": 2.5057,
            "7120": 2.47446,
            "7125": 2.46362,
            "7130": 2.48515,
            "7135": 2.51314,
            "7140": 2.49783,
            "7145": 2.49951,
            "7150": 2.50942,
            "7155": 2.50244,
            "7160": 2.47497,
            "7165": 2.45734,
            "7170": 2.50625,
            "7175": 2.50449,
            "7180": 2.50343,
            "7185": 2.4821,
            "7190": 2.46138,
            "7195": 2.46516,
            "7200": 2.50961,
            "7205": 2.48844,
            "7210": 2.44288,
            "7215": 2.48064,
            "7220": 2.4432,
            "7225": 2.51318,
            "7230": 2.50885,
            "7235": 2.48565,
            "7240": 2.47827,
            "7245": 2.49869,
            "7250": 2.50766,
            "7255": 2.49096,
            "7260": 2.45665,
            "7265": 2.45398,
            "7270": 2.471,
            "7275": 2.50056,
            "7280": 2.49436,
            "7285": 2.42318,
            "7290": 2.48025,
            "7295": 2.48765,
            "7300": 2.41781,
            "7305": 2.44566,
            "7310": 2.44742,
            "7315": 2.49088,
            "7320": 2.48414,
            "7325": 2.45913,
            "7330": 2.48837,
            "7335": 2.47377,
            "7340": 2.46217,
            "7345": 2.49547,
            "7350": 2.50972,
            "7355": 2.49521,
            "7360": 2.47975,
            "7365": 2.46757,
            "7370": 2.47133,
            "7375": 2.44779,
            "7380": 2.49143,
            "7385": 2.48328,
            "7390": 2.47244,
            "7395": 2.47201,
            "7400": 2.47965,
            "7405": 2.44114,
            "7410": 2.4806,
            "7415": 2.46985,
            "7420": 2.4921,
            "7425": 2.45657,
            "7430": 2.5236,
            "7435": 2.49195,
            "7440": 2.51827,
            "7445": 2.50779,
            "7450": 2.47245,
            "7455": 2.45529,
            "7460": 2.46275,
            "7465": 2.47587,
            "7470": 2.45102,
            "7475": 2.45675,
            "7480": 2.51057,
            "7485": 2.44969,
            "7490": 2.47412,
            "7495": 2.48024,
            "7500": 2.49436,
            "7505": 2.43994,
            "7510": 2.43541,
            "7515": 2.41913,
            "7520": 2.49605,
            "7525": 2.49704,
            "7530": 2.47612,
            "7535": 2.46046,
            "7540": 2.47199,
            "7545": 2.47311,
            "7550": 2.4891,
            "7555": 2.45305,
            "7560": 2.42768,
            "7565": 2.50895,
            "7570": 2.48445,
            "7575": 2.43768,
            "7580": 2.45675,
            "7585": 2.48217,
            "7590": 2.48097,
            "7595": 2.46221,
            "7600": 2.46178,
            "7605": 2.44643,
            "7610": 2.45041,
            "7615": 2.42542,
            "7620": 2.54338,
            "7625": 2.48067,
            "7630": 2.42332,
            "7635": 2.42574,
            "7640": 2.45345,
            "7645": 2.47356,
            "7650": 2.46236,
            "7655": 2.48328,
            "7660": 2.45205,
            "7665": 2.43265,
            "7670": 2.43945,
            "7675": 2.45593,
            "7680": 2.48501,
            "7685": 2.43122,
            "7690": 2.47924,
            "7695": 2.45353,
            "7700": 2.48295,
            "7705": 2.49849,
            "7710": 2.49353,
            "7715": 2.4439,
            "7720": 2.46796,
            "7725": 2.47949,
            "7730": 2.45614,
            "7735": 2.46957,
            "7740": 2.43649,
            "7745": 2.44912,
            "7750": 2.43696,
            "7755": 2.46558,
            "7760": 2.4507,
            "7765": 2.45439,
            "7770": 2.46929,
            "7775": 2.45158,
            "7780": 2.41614,
            "7785": 2.44384,
            "7790": 2.48079,
            "7795": 2.4398,
            "7800": 2.46271,
            "7805": 2.48027,
            "7810": 2.50129,
            "7815": 2.48639,
            "7820": 2.44516,
            "7825": 2.51324,
            "7830": 2.45153,
            "7835": 2.46659,
            "7840": 2.47993,
            "7845": 2.45911,
            "7850": 2.41588,
            "7855": 2.4717,
            "7860": 2.49983,
            "7865": 2.42282,
            "7870": 2.46628,
            "7875": 2.44702,
            "7880": 2.45404,
            "7885": 2.46099,
            "7890": 2.47047,
            "7895": 2.44714,
            "7900": 2.43952,
            "7905": 2.43594,
            "7910": 2.42458,
            "7915": 2.48098,
            "7920": 2.47635,
            "7925": 2.42092,
            "7930": 2.47004,
            "7935": 2.44984,
            "7940": 2.4209,
            "7945": 2.4679,
            "7950": 2.44269,
            "7955": 2.41681,
            "7960": 2.48704,
            "7965": 2.5161,
            "7970": 2.52106,
            "7975": 2.44905,
            "7980": 2.44048,
            "7985": 2.46706,
            "7990": 2.4318,
            "7995": 2.46886,
            "8000": 2.43524,
            "8005": 2.41715,
            "8010": 2.45855,
            "8015": 2.47016,
            "8020": 2.4817,
            "8025": 2.47442,
            "8030": 2.45168,
            "8035": 2.46987,
            "8040": 2.41869,
            "8045": 2.4532,
            "8050": 2.44608,
            "8055": 2.42567,
            "8060": 2.44377,
            "8065": 2.46148,
            "8070": 2.45678,
            "8075": 2.45758,
            "8080": 2.44368,
            "8085": 2.44074,
            "8090": 2.425,
            "8095": 2.42411,
            "8100": 2.43875,
            "8105": 2.49528,
            "8110": 2.43881,
            "8115": 2.4442,
            "8120": 2.46682,
            "8125": 2.468,
            "8130": 2.45181,
            "8135": 2.45124,
            "8140": 2.43943,
            "8145": 2.4251,
            "8150": 2.42115,
            "8155": 2.48618,
            "8160": 2.45302,
            "8165": 2.44147,
            "8170": 2.43445,
            "8175": 2.42116,
            "8180": 2.49462,
            "8185": 2.42506,
            "8190": 2.46774,
            "8195": 2.4559,
            "8200": 2.44665,
            "8205": 2.44464,
            "8210": 2.43127,
            "8215": 2.43962,
            "8220": 2.43461,
            "8225": 2.40807,
            "8230": 2.43921,
            "8235": 2.46438,
            "8240": 2.42629,
            "8245": 2.44606,
            "8250": 2.44335,
            "8255": 2.43703,
            "8260": 2.43286,
            "8265": 2.43363,
            "8270": 2.43733,
            "8275": 2.44725,
            "8280": 2.40445,
            "8285": 2.44626,
            "8290": 2.48807,
            "8295": 2.4556,
            "8300": 2.46545,
            "8305": 2.41882,
            "8310": 2.44521,
            "8315": 2.4652,
            "8320": 2.40775,
            "8325": 2.40185,
            "8330": 2.44465,
            "8335": 2.454,
            "8340": 2.4989,
            "8345": 2.45642,
            "8350": 2.45938,
            "8355": 2.41812,
            "8360": 2.41252,
            "8365": 2.46233,
            "8370": 2.45956,
            "8375": 2.43404,
            "8380": 2.42861,
            "8385": 2.43299,
            "8390": 2.4462,
            "8395": 2.44992,
            "8400": 2.44991,
            "8405": 2.49879,
            "8410": 2.44773,
            "8415": 2.44365,
            "8420": 2.42653,
            "8425": 2.44808,
            "8430": 2.47166,
            "8435": 2.4148,
            "8440": 2.46205,
            "8445": 2.4719,
            "8450": 2.41712,
            "8455": 2.47322,
            "8460": 2.46703,
            "8465": 2.45532,
            "8470": 2.44631,
            "8475": 2.50009,
            "8480": 2.41626,
            "8485": 2.42485,
            "8490": 2.47607,
            "8495": 2.44617,
            "8500": 2.45344,
            "8505": 2.41523,
            "8510": 2.41142,
            "8515": 2.43755,
            "8520": 2.43349,
            "8525": 2.50425,
            "8530": 2.38186,
            "8535": 2.41091,
            "8540": 2.49491,
            "8545": 2.39141,
            "8550": 2.44924,
            "8555": 2.46223,
            "8560": 2.48222,
            "8565": 2.43498,
            "8570": 2.44426,
            "8575": 2.45864,
            "8580": 2.45188,
            "8585": 2.43229,
            "8590": 2.41478,
            "8595": 2.43822,
            "8600": 2.42252,
            "8605": 2.50219,
            "8610": 2.43046,
            "8615": 2.39848,
            "8620": 2.46033,
            "8625": 2.43683,
            "8630": 2.46564,
            "8635": 2.46509,
            "8640": 2.44573,
            "8645": 2.48476,
            "8650": 2.43317,
            "8655": 2.46437,
            "8660": 2.46688,
            "8665": 2.39792,
            "8670": 2.4204,
            "8675": 2.44134,
            "8680": 2.45917,
            "8685": 2.44287,
            "8690": 2.42337,
            "8695": 2.4576,
            "8700": 2.44685,
            "8705": 2.43517,
            "8710": 2.43956,
            "8715": 2.45926,
            "8720": 2.48768,
            "8725": 2.42018,
            "8730": 2.40411,
            "8735": 2.4458,
            "8740": 2.44124,
            "8745": 2.40867,
            "8750": 2.44728,
            "8755": 2.43508,
            "8760": 2.41316,
            "8765": 2.44751,
            "8770": 2.41594,
            "8775": 2.45002,
            "8780": 2.43082,
            "8785": 2.4826,
            "8790": 2.43308,
            "8795": 2.43349,
            "8800": 2.4321,
            "8805": 2.41894,
            "8810": 2.42481,
            "8815": 2.487,
            "8820": 2.46651,
            "8825": 2.4393,
            "8830": 2.39711,
            "8835": 2.43377,
            "8840": 2.40495,
            "8845": 2.44022,
            "8850": 2.44733,
            "8855": 2.41504,
            "8860": 2.44057,
            "8865": 2.43912,
            "8870": 2.44762,
            "8875": 2.45022,
            "8880": 2.42391,
            "8885": 2.40582,
            "8890": 2.45757,
            "8895": 2.43989,
            "8900": 2.42516,
            "8905": 2.41312,
            "8910": 2.41171,
            "8915": 2.43051,
            "8920": 2.44575,
            "8925": 2.47902,
            "8930": 2.42743,
            "8935": 2.42201,
            "8940": 2.40161,
            "8945": 2.40426,
            "8950": 2.42938,
            "8955": 2.40734,
            "8960": 2.44555,
            "8965": 2.42863,
            "8970": 2.41756,
            "8975": 2.49059,
            "8980": 2.45369,
            "8985": 2.3862,
            "8990": 2.42051,
            "8995": 2.42916,
            "9000": 2.46813,
            "9005": 2.42341,
            "9010": 2.3862,
            "9015": 2.41938,
            "9020": 2.41034,
            "9025": 2.38041,
            "9030": 2.41146,
            "9035": 2.43664,
            "9040": 2.43479,
            "9045": 2.43227,
            "9050": 2.40905,
            "9055": 2.43064,
            "9060": 2.43608,
            "9065": 2.41987,
            "9070": 2.45671,
            "9075": 2.40626,
            "9080": 2.45053,
            "9085": 2.4361,
            "9090": 2.42635,
            "9095": 2.41044,
            "9100": 2.41349,
            "9105": 2.37061,
            "9110": 2.47783,
            "9115": 2.42768,
            "9120": 2.4153,
            "9125": 2.47081,
            "9130": 2.40666,
            "9135": 2.45957,
            "9140": 2.44595,
            "9145": 2.43789,
            "9150": 2.43689,
            "9155": 2.38451,
            "9160": 2.42955,
            "9165": 2.43724,
            "9170": 2.3856,
            "9175": 2.43,
            "9180": 2.38849,
            "9185": 2.44997,
            "9190": 2.42357,
            "9195": 2.4078,
            "9200": 2.40419,
            "9205": 2.46075,
            "9210": 2.37469,
            "9215": 2.47828,
            "9220": 2.46007,
            "9225": 2.39401,
            "9230": 2.45725,
            "9235": 2.40825,
            "9240": 2.41321,
            "9245": 2.44871,
            "9250": 2.43983,
            "9255": 2.44156,
            "9260": 2.39854,
            "9265": 2.44978,
            "9270": 2.44664,
            "9275": 2.40469,
            "9280": 2.39981,
            "9285": 2.43354,
            "9290": 2.41628,
            "9295": 2.39626,
            "9300": 2.43389,
            "9305": 2.41395,
            "9310": 2.42504,
            "9315": 2.41992,
            "9320": 2.45402,
            "9325": 2.38111,
            "9330": 2.41516,
            "9335": 2.37044,
            "9340": 2.4179,
            "9345": 2.42705,
            "9350": 2.45019,
            "9355": 2.48641,
            "9360": 2.44802,
            "9365": 2.39905,
            "9370": 2.44623,
            "9375": 2.44502,
            "9380": 2.36429,
            "9385": 2.41094,
            "9390": 2.39295,
            "9395": 2.398,
            "9400": 2.45369,
            "9405": 2.42353,
            "9410": 2.40802,
            "9415": 2.44816,
            "9420": 2.45482,
            "9425": 2.44258,
            "9430": 2.45777,
            "9435": 2.42475,
            "9440": 2.48693,
            "9445": 2.38593,
            "9450": 2.4031,
            "9455": 2.41359,
            "9460": 2.39628,
            "9465": 2.38987,
            "9470": 2.39178,
            "9475": 2.37518,
            "9480": 2.44372,
            "9485": 2.39835,
            "9490": 2.43096,
            "9495": 2.39337,
            "9500": 2.37474,
            "9505": 2.43992,
            "9510": 2.40976,
            "9515": 2.44038,
            "9520": 2.42911,
            "9525": 2.39978,
            "9530": 2.46462,
            "9535": 2.41098,
            "9540": 2.42979,
            "9545": 2.38866,
            "9550": 2.43326,
            "9555": 2.40096,
            "9560": 2.43313,
            "9565": 2.41693,
            "9570": 2.38286,
            "9575": 2.4218,
            "9580": 2.40624,
            "9585": 2.43284,
            "9590": 2.4387,
            "9595": 2.45826,
            "9600": 2.40192,
            "9605": 2.39415,
            "9610": 2.43148,
            "9615": 2.42381,
            "9620": 2.42343,
            "9625": 2.45384,
            "9630": 2.40573,
            "9635": 2.4104,
            "9640": 2.45515,
            "9645": 2.41908,
            "9650": 2.40681,
            "9655": 2.38176,
            "9660": 2.43323,
            "9665": 2.39773,
            "9670": 2.39086,
            "9675": 2.36759,
            "9680": 2.40689,
            "9685": 2.40759,
            "9690": 2.47137,
            "9695": 2.39035,
            "9700": 2.38642,
            "9705": 2.39295,
            "9710": 2.37507,
            "9715": 2.39687,
            "9720": 2.44489,
            "9725": 2.45193,
            "9730": 2.44046,
            "9735": 2.39598,
            "9740": 2.39167,
            "9745": 2.43511,
            "9750": 2.40675,
            "9755": 2.41637,
            "9760": 2.42082,
            "9765": 2.37763,
            "9770": 2.4769,
            "9775": 2.40938,
            "9780": 2.37101,
            "9785": 2.41029,
            "9790": 2.41665,
            "9795": 2.36761,
            "9800": 2.40508,
            "9805": 2.41261,
            "9810": 2.41612,
            "9815": 2.38943,
            "9820": 2.38493,
            "9825": 2.41288,
            "9830": 2.42952,
            "9835": 2.39301,
            "9840": 2.42326,
            "9845": 2.37134,
            "9850": 2.40705,
            "9855": 2.40435,
            "9860": 2.39835,
            "9865": 2.39051,
            "9870": 2.39483,
            "9875": 2.38932,
            "9880": 2.45964,
            "9885": 2.40149,
            "9890": 2.3625,
            "9895": 2.32983,
            "9900": 2.40524,
            "9905": 2.44036,
            "9910": 2.36752,
            "9915": 2.37375,
            "9920": 2.42036,
            "9925": 2.40752,
            "9930": 2.39138,
            "9935": 2.35829,
            "9940": 2.39175,
            "9945": 2.38731,
            "9950": 2.41161,
            "9955": 2.4562,
            "9960": 2.44029,
            "9965": 2.36548,
            "9970": 2.41841,
            "9975": 2.39346,
            "9980": 2.34202,
            "9985": 2.41527,
            "9990": 2.40476,
            "9995": 2.40551,
            "10000": 2.3779,
            "10005": 2.38222,
            "10010": 2.39155,
            "10015": 2.45278,
            "10020": 2.37287,
            "10025": 2.39614,
            "10030": 2.39671,
            "10035": 2.41747,
            "10040": 2.41168,
            "10045": 2.391,
            "10050": 2.35587,
            "10055": 2.37631,
            "10060": 2.42816,
            "10065": 2.38202,
            "10070": 2.43219,
            "10075": 2.38058,
            "10080": 2.37107,
            "10085": 2.37994,
            "10090": 2.35547,
            "10095": 2.41061,
            "10100": 2.3218,
            "10105": 2.39037,
            "10110": 2.41856,
            "10115": 2.39594,
            "10120": 2.36629,
            "10125": 2.37945,
            "10130": 2.36734,
            "10135": 2.39077,
            "10140": 2.42128,
            "10145": 2.41628,
            "10150": 2.38441,
            "10155": 2.40315,
            "10160": 2.3701,
            "10165": 2.39226,
            "10170": 2.43082,
            "10175": 2.33481,
            "10180": 2.40418,
            "10185": 2.39205,
            "10190": 2.45277,
            "10195": 2.41277,
            "10200": 2.39905,
            "10205": 2.39664,
            "10210": 2.37582,
            "10215": 2.35082,
            "10220": 2.42833,
            "10225": 2.438,
            "10230": 2.36343,
            "10235": 2.39441,
            "10240": 2.38042,
            "10245": 2.39776,
            "10250": 2.39615,
            "10255": 2.42117,
            "10260": 2.34215,
            "10265": 2.35597,
            "10270": 2.35839,
            "10275": 2.38034,
            "10280": 2.45804,
            "10285": 2.36609,
            "10290": 2.39509,
            "10295": 2.38161,
            "10300": 2.376,
            "10305": 2.42265,
            "10310": 2.39724,
            "10315": 2.36745,
            "10320": 2.37409,
            "10325": 2.36891,
            "10330": 2.41989,
            "10335": 2.36912,
            "10340": 2.42622,
            "10345": 2.37725,
            "10350": 2.36505,
            "10355": 2.40497,
            "10360": 2.38079,
            "10365": 2.36896,
            "10370": 2.35332,
            "10375": 2.36773,
            "10380": 2.42902,
            "10385": 2.41607,
            "10390": 2.38923,
            "10395": 2.36549,
            "10400": 2.38611,
            "10405": 2.36126,
            "10410": 2.35109,
            "10415": 2.42503,
            "10420": 2.3871,
            "10425": 2.33401,
            "10430": 2.36713,
            "10435": 2.37802,
            "10440": 2.37871,
            "10445": 2.36671,
            "10450": 2.36821,
            "10455": 2.38669,
            "10460": 2.38756,
            "10465": 2.30956,
            "10470": 2.36511,
            "10475": 2.3866,
            "10480": 2.37089,
            "10485": 2.36836,
            "10490": 2.42144,
            "10495": 2.37354,
            "10500": 2.37199,
            "10505": 2.37777,
            "10510": 2.39,
            "10515": 2.37919,
            "10520": 2.41012,
            "10525": 2.39687,
            "10530": 2.39908,
            "10535": 2.36141,
            "10540": 2.41268,
            "10545": 2.36489,
            "10550": 2.38305,
            "10555": 2.36463,
            "10560": 2.34744,
            "10565": 2.38069,
            "10570": 2.38206,
            "10575": 2.36644,
            "10580": 2.38522,
            "10585": 2.37539,
            "10590": 2.38616,
            "10595": 2.38394,
            "10600": 2.33869,
            "10605": 2.37852,
            "10610": 2.37214,
            "10615": 2.37005,
            "10620": 2.35367,
            "10625": 2.42649,
            "10630": 2.37613,
            "10635": 2.3312,
            "10640": 2.37069,
            "10645": 2.42878,
            "10650": 2.36823,
            "10655": 2.31441,
            "10660": 2.35337,
            "10665": 2.4063,
            "10670": 2.32304,
            "10675": 2.42238,
            "10680": 2.36118,
            "10685": 2.29504,
            "10690": 2.39175,
            "10695": 2.33674,
            "10700": 2.39022,
            "10705": 2.39174,
            "10710": 2.34831,
            "10715": 2.38812,
            "10720": 2.33134,
            "10725": 2.35906,
            "10730": 2.35495,
            "10735": 2.36061,
            "10740": 2.32528,
            "10745": 2.34563,
            "10750": 2.34057,
            "10755": 2.41066,
            "10760": 2.37111,
            "10765": 2.34177,
            "10770": 2.3764,
            "10775": 2.39158,
            "10780": 2.37592,
            "10785": 2.40194,
            "10790": 2.35381,
            "10795": 2.39226,
            "10800": 2.32822,
            "10805": 2.4021,
            "10810": 2.38039,
            "10815": 2.35987,
            "10820": 2.34926,
            "10825": 2.37741,
            "10830": 2.34371,
            "10835": 2.35353,
            "10840": 2.33485,
            "10845": 2.39224,
            "10850": 2.33793,
            "10855": 2.36948,
            "10860": 2.33964,
            "10865": 2.32843,
            "10870": 2.33078,
            "10875": 2.30989,
            "10880": 2.40035,
            "10885": 2.41176,
            "10890": 2.36855,
            "10895": 2.37788,
            "10900": 2.3384,
            "10905": 2.31887,
            "10910": 2.41223,
            "10915": 2.37677,
            "10920": 2.38028,
            "10925": 2.36814,
            "10930": 2.32494,
            "10935": 2.36648,
            "10940": 2.35886,
            "10945": 2.35128,
            "10950": 2.36889,
            "10955": 2.36934,
            "10960": 2.31708,
            "10965": 2.36916,
            "10970": 2.36263,
            "10975": 2.41456,
            "10980": 2.38086,
            "10985": 2.34865,
            "10990": 2.4034,
            "10995": 2.37075,
            "11000": 2.34255,
            "11005": 2.36778,
            "11010": 2.34924,
            "11015": 2.33315,
            "11020": 2.33903,
            "11025": 2.37143,
            "11030": 2.34476,
            "11035": 2.31967,
            "11040": 2.32289,
            "11045": 2.32388,
            "11050": 2.32252,
            "11055": 2.29585,
            "11060": 2.34538,
            "11065": 2.31536,
            "11070": 2.40051,
            "11075": 2.32509,
            "11080": 2.36017,
            "11085": 2.34352,
            "11090": 2.35209,
            "11095": 2.37641,
            "11100": 2.33449,
            "11105": 2.32154,
            "11110": 2.368,
            "11115": 2.37793,
            "11120": 2.38619,
            "11125": 2.32229,
            "11130": 2.35716,
            "11135": 2.33918,
            "11140": 2.37855,
            "11145": 2.35579,
            "11150": 2.40131,
            "11155": 2.34705,
            "11160": 2.37239,
            "11165": 2.36885,
            "11170": 2.3465,
            "11175": 2.34021,
            "11180": 2.37972,
            "11185": 2.31673,
            "11190": 2.28317,
            "11195": 2.33373,
            "11200": 2.35134,
            "11205": 2.36714,
            "11210": 2.33799,
            "11215": 2.32437,
            "11220": 2.3481,
            "11225": 2.37627,
            "11230": 2.37091,
            "11235": 2.32348,
            "11240": 2.3466,
            "11245": 2.36177,
            "11250": 2.33705,
            "11255": 2.34193,
            "11260": 2.36182,
            "11265": 2.39408,
            "11270": 2.29316,
            "11275": 2.31872,
            "11280": 2.37433,
            "11285": 2.29595,
            "11290": 2.35146,
            "11295": 2.36999,
            "11300": 2.38682,
            "11305": 2.34013,
            "11310": 2.3354,
            "11315": 2.30317,
            "11320": 2.30974,
            "11325": 2.3208,
            "11330": 2.35828,
            "11335": 2.34307,
            "11340": 2.3129,
            "11345": 2.31769,
            "11350": 2.30069,
            "11355": 2.3251,
            "11360": 2.35679,
            "11365": 2.29948,
            "11370": 2.35751,
            "11375": 2.33197,
            "11380": 2.34491,
            "11385": 2.35247,
            "11390": 2.33803,
            "11395": 2.29059,
            "11400": 2.31536,
            "11405": 2.3598,
            "11410": 2.35964,
            "11415": 2.39106,
            "11420": 2.35695,
            "11425": 2.31353,
            "11430": 2.3736,
            "11435": 2.3667,
            "11440": 2.35263,
            "11445": 2.36807,
            "11450": 2.32665,
            "11455": 2.30943,
            "11460": 2.35563,
            "11465": 2.34819,
            "11470": 2.38002,
            "11475": 2.319,
            "11480": 2.33041,
            "11485": 2.31503,
            "11490": 2.35057,
            "11495": 2.41059,
            "11500": 2.34525,
            "11505": 2.35499,
            "11510": 2.36763,
            "11515": 2.32598,
            "11520": 2.30859,
            "11525": 2.36509,
            "11530": 2.31744,
            "11535": 2.32663,
            "11540": 2.35006,
            "11545": 2.34784,
            "11550": 2.36943,
            "11555": 2.33007,
            "11560": 2.35288,
            "11565": 2.34403,
            "11570": 2.35346,
            "11575": 2.30052,
            "11580": 2.33268,
            "11585": 2.35658,
            "11590": 2.36633,
            "11595": 2.33968,
            "11600": 2.36249,
            "11605": 2.32611,
            "11610": 2.3654,
            "11615": 2.36389,
            "11620": 2.30009,
            "11625": 2.28266,
            "11630": 2.33544,
            "11635": 2.34875,
            "11640": 2.30973,
            "11645": 2.31232,
            "11650": 2.33028,
            "11655": 2.35572,
            "11660": 2.34011,
            "11665": 2.33427,
            "11670": 2.3041,
            "11675": 2.30115,
            "11680": 2.32859,
            "11685": 2.34041,
            "11690": 2.34791,
            "11695": 2.32158,
            "11700": 2.32899,
            "11705": 2.30663,
            "11710": 2.34874,
            "11715": 2.31887,
            "11720": 2.30331,
            "11725": 2.34343,
            "11730": 2.30734,
            "11735": 2.33175,
            "11740": 2.27598,
            "11745": 2.32117,
            "11750": 2.33154,
            "11755": 2.35613,
            "11760": 2.31593,
            "11765": 2.34029,
            "11770": 2.28007,
            "11775": 2.32874,
            "11780": 2.25882,
            "11785": 2.29998,
            "11790": 2.31839,
            "11795": 2.32436,
            "11800": 2.33862,
            "11805": 2.30782,
            "11810": 2.30838,
            "11815": 2.33529,
            "11820": 2.32509,
            "11825": 2.36473,
            "11830": 2.32136,
            "11835": 2.34203,
            "11840": 2.34565,
            "11845": 2.32162,
            "11850": 2.30761,
            "11855": 2.31613,
            "11860": 2.34632,
            "11865": 2.36175,
            "11870": 2.3825,
            "11875": 2.28407,
            "11880": 2.29477,
            "11885": 2.34011,
            "11890": 2.29485,
            "11895": 2.29405,
            "11900": 2.33591,
            "11905": 2.32263,
            "11910": 2.28085,
            "11915": 2.31397,
            "11920": 2.33712,
            "11925": 2.30656,
            "11930": 2.31046,
            "11935": 2.3206,
            "11940": 2.32098,
            "11945": 2.34487,
            "11950": 2.3033,
            "11955": 2.31832,
            "11960": 2.34126,
            "11965": 2.29979,
            "11970": 2.28637,
            "11975": 2.33975,
            "11980": 2.31076,
            "11985": 2.28178,
            "11990": 2.30755,
            "11995": 2.33395,
            "12000": 2.32869,
            "12005": 2.32829,
            "12010": 2.29237,
            "12015": 2.31401,
            "12020": 2.33204,
            "12025": 2.33978,
            "12030": 2.31521,
            "12035": 2.33999,
            "12040": 2.31862,
            "12045": 2.31428,
            "12050": 2.31132,
            "12055": 2.33564,
            "12060": 2.30019,
            "12065": 2.33242,
            "12070": 2.30696,
            "12075": 2.28019,
            "12080": 2.35436,
            "12085": 2.34253,
            "12090": 2.33556,
            "12095": 2.2845,
            "12100": 2.31882,
            "12105": 2.31267,
            "12110": 2.3337,
            "12115": 2.30828,
            "12120": 2.30846,
            "12125": 2.29721,
            "12130": 2.30882,
            "12135": 2.33247,
            "12140": 2.29946,
            "12145": 2.25894,
            "12150": 2.2623,
            "12155": 2.34462,
            "12160": 2.36042,
            "12165": 2.3217,
            "12170": 2.33654,
            "12175": 2.34429,
            "12180": 2.33378,
            "12185": 2.34364,
            "12190": 2.33822,
            "12195": 2.3013,
            "12200": 2.30315,
            "12205": 2.32625,
            "12210": 2.35973,
            "12215": 2.30677,
            "12220": 2.30311,
            "12225": 2.25141,
            "12230": 2.337,
            "12235": 2.34191,
            "12240": 2.32761,
            "12245": 2.28907,
            "12250": 2.27562,
            "12255": 2.33792,
            "12260": 2.3162,
            "12265": 2.34477,
            "12270": 2.31496,
            "12275": 2.31706,
            "12280": 2.32245,
            "12285": 2.2891,
            "12290": 2.31391,
            "12295": 2.26998,
            "12300": 2.33169,
            "12305": 2.27166,
            "12310": 2.29137,
            "12315": 2.35763,
            "12320": 2.29959,
            "12325": 2.32374,
            "12330": 2.30301,
            "12335": 2.32281,
            "12340": 2.34365,
            "12345": 2.36988,
            "12350": 2.34598,
            "12355": 2.30802,
            "12360": 2.31641,
            "12365": 2.33362,
            "12370": 2.29476,
            "12375": 2.3034,
            "12380": 2.29451,
            "12385": 2.29368,
            "12390": 2.25148,
            "12395": 2.30792,
            "12400": 2.30262,
            "12405": 2.31508,
            "12410": 2.30746,
            "12415": 2.28713,
            "12420": 2.3211,
            "12425": 2.30387,
            "12430": 2.31915,
            "12435": 2.30225,
            "12440": 2.33631,
            "12445": 2.3229,
            "12450": 2.31029,
            "12455": 2.24284,
            "12460": 2.3393,
            "12465": 2.36818,
            "12470": 2.27992,
            "12475": 2.27607,
            "12480": 2.29448,
            "12485": 2.3091,
            "12490": 2.33361,
            "12495": 2.27164,
            "12500": 2.32378,
            "12505": 2.33909,
            "12510": 2.35961,
            "12515": 2.27361,
            "12520": 2.32348,
            "12525": 2.28983,
            "12530": 2.32402,
            "12535": 2.27622,
            "12540": 2.2884,
            "12545": 2.29316,
            "12550": 2.31887,
            "12555": 2.32698,
            "12560": 2.30354,
            "12565": 2.33766,
            "12570": 2.28173,
            "12575": 2.30324,
            "12580": 2.31441,
            "12585": 2.29584,
            "12590": 2.33735,
            "12595": 2.327,
            "12600": 2.28524,
            "12605": 2.32325,
            "12610": 2.36557,
            "12615": 2.30924,
            "12620": 2.33596,
            "12625": 2.33366,
            "12630": 2.30138,
            "12635": 2.33851,
            "12640": 2.29753,
            "12645": 2.28239,
            "12650": 2.32994,
            "12655": 2.26839,
            "12660": 2.34397,
            "12665": 2.32017,
            "12670": 2.31271,
            "12675": 2.32157,
            "12680": 2.27735,
            "12685": 2.36821,
            "12690": 2.30671,
            "12695": 2.33358,
            "12700": 2.29629,
            "12705": 2.31071,
            "12710": 2.31098,
            "12715": 2.2899,
            "12720": "nan",
            "12725": "nan",
            "12730": "nan",
            "12735": "nan",
            "12740": "nan",
            "12745": "nan",
            "12750": "nan",
            "12755": "nan",
            "12760": "nan",
            "12765": "nan",
            "12770": "nan",
            "12775": "nan",
            "12780": "nan",
            "12785": "nan",
            "12790": "nan",
            "12795": "nan",
            "12800": "nan",
            "12805": "nan",
            "12810": "nan",
            "12815": "nan",
            "12820": "nan",
            "12825": "nan",
            "12830": "nan",
            "12835": "nan",
            "12840": "nan",
            "12845": "nan",
            "12850": "nan",
            "12855": "nan",
            "12860": "nan",
            "12865": "nan",
            "12870": "nan",
            "12875": "nan",
            "12880": "nan",
            "12885": "nan",
            "12890": "nan",
            "12895": "nan",
            "12900": "nan",
            "12905": "nan",
            "12910": "nan",
            "12915": "nan",
            "12920": "nan",
            "12925": "nan",
            "12930": "nan",
            "12935": "nan",
            "12940": "nan",
            "12945": "nan",
            "12950": "nan",
            "12955": "nan",
            "12960": "nan",
            "12965": "nan",
            "12970": "nan",
            "12975": "nan",
            "12980": "nan",
            "12985": "nan",
            "12990": "nan",
            "12995": "nan",
            "13000": "nan"
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 13000,
        "step_interval": 5,
        "values": {
            "1": 956238208.0,
            "5": 967339520.0,
            "10": 971388032.0,
            "15": 946517440.0,
            "20": 957353792.0,
            "25": 1064191488.0,
            "30": 1177591296.0,
            "35": 1231325824.0,
            "40": 1192786176.0,
            "45": 1126079744.0,
            "50": 1114080640.0,
            "55": 1080115072.0,
            "60": 1029360512.0,
            "65": 1011742400.0,
            "70": 989809536.0,
            "75": 988686592.0,
            "80": 1010954752.0,
            "85": 1002030784.0,
            "90": 982093760.0,
            "95": 957666304.0,
            "100": 970864192.0,
            "105": 978298944.0,
            "110": 976436928.0,
            "115": 977307072.0,
            "120": 961413888.0,
            "125": 941839232.0,
            "130": 974677184.0,
            "135": 964893696.0,
            "140": 962928448.0,
            "145": 976848256.0,
            "150": 921837824.0,
            "155": 968182848.0,
            "160": 956372224.0,
            "165": 959838592.0,
            "170": 974363456.0,
            "175": 948986496.0,
            "180": 946722496.0,
            "185": 971990144.0,
            "190": 969058048.0,
            "195": 985124672.0,
            "200": 945767360.0,
            "205": 958344704.0,
            "210": 979438400.0,
            "215": 967482112.0,
            "220": 956425280.0,
            "225": 962397312.0,
            "230": 948180160.0,
            "235": 965227648.0,
            "240": 966065152.0,
            "245": 969164800.0,
            "250": 974440512.0,
            "255": 925058112.0,
            "260": 965630592.0,
            "265": 970669824.0,
            "270": 959129600.0,
            "275": 953994048.0,
            "280": 963424832.0,
            "285": 945779840.0,
            "290": 974125184.0,
            "295": 966700352.0,
            "300": 967154304.0,
            "305": 964506304.0,
            "310": 940357760.0,
            "315": 967397376.0,
            "320": 968995200.0,
            "325": 980551744.0,
            "330": 972091328.0,
            "335": 946866432.0,
            "340": 966592192.0,
            "345": 973017408.0,
            "350": 973919360.0,
            "355": 963260736.0,
            "360": 948351488.0,
            "365": 964818176.0,
            "370": 962952768.0,
            "375": 958446016.0,
            "380": 947153536.0,
            "385": 955985536.0,
            "390": 945399552.0,
            "395": 970421056.0,
            "400": 979772928.0,
            "405": 968348544.0,
            "410": 970068352.0,
            "415": 953155840.0,
            "420": 943570304.0,
            "425": 954774784.0,
            "430": 962661376.0,
            "435": 977075392.0,
            "440": 954810496.0,
            "445": 971890432.0,
            "450": 963508416.0,
            "455": 973133696.0,
            "460": 983712384.0,
            "465": 945280320.0,
            "470": 942051776.0,
            "475": 966991488.0,
            "480": 966099968.0,
            "485": 976414720.0,
            "490": 962540800.0,
            "495": 945464320.0,
            "500": 964455104.0,
            "505": 986008192.0,
            "510": 965685696.0,
            "515": 943408256.0,
            "520": 945017408.0,
            "525": 971264512.0,
            "530": 971888128.0,
            "535": 979137408.0,
            "540": 969533056.0,
            "545": 954128704.0,
            "550": 951265920.0,
            "555": 987223104.0,
            "560": 960427264.0,
            "565": 966615680.0,
            "570": 975726336.0,
            "575": 927225856.0,
            "580": 970697984.0,
            "585": 961173760.0,
            "590": 972966208.0,
            "595": 963684544.0,
            "600": 937076416.0,
            "605": 951474048.0,
            "610": 963361152.0,
            "615": 970013248.0,
            "620": 976473088.0,
            "625": 949583744.0,
            "630": 954443136.0,
            "635": 986045888.0,
            "640": 980977024.0,
            "645": 955010560.0,
            "650": 958551424.0,
            "655": 951653504.0,
            "660": 961039360.0,
            "665": 967552640.0,
            "670": 962513984.0,
            "675": 968332992.0,
            "680": 965619840.0,
            "685": 962863104.0,
            "690": 961919872.0,
            "695": 954768768.0,
            "700": 970332928.0,
            "705": 945515520.0,
            "710": 943882176.0,
            "715": 973357120.0,
            "720": 968366464.0,
            "725": 978490112.0,
            "730": 952193408.0,
            "735": 948811776.0,
            "740": 955636096.0,
            "745": 975864320.0,
            "750": 981233792.0,
            "755": 962160000.0,
            "760": 951964672.0,
            "765": 967346560.0,
            "770": 976147456.0,
            "775": 970547200.0,
            "780": 977539392.0,
            "785": 931527936.0,
            "790": 960440448.0,
            "795": 964584576.0,
            "800": 967023104.0,
            "805": 962318144.0,
            "810": 940972672.0,
            "815": 949034688.0,
            "820": 953181824.0,
            "825": 954501440.0,
            "830": 976444416.0,
            "835": 956077248.0,
            "840": 948405120.0,
            "845": 965154560.0,
            "850": 966026368.0,
            "855": 960905088.0,
            "860": 976024960.0,
            "865": 938161792.0,
            "870": 966415680.0,
            "875": 972314176.0,
            "880": 963123712.0,
            "885": 967742976.0,
            "890": 949969792.0,
            "895": 960017536.0,
            "900": 974231296.0,
            "905": 963968512.0,
            "910": 958436352.0,
            "915": 956353472.0,
            "920": 943975552.0,
            "925": 960830592.0,
            "930": 978849408.0,
            "935": 971072064.0,
            "940": 960906432.0,
            "945": 945062400.0,
            "950": 957425856.0,
            "955": 979037056.0,
            "960": 983589376.0,
            "965": 966165568.0,
            "970": 951229760.0,
            "975": 961578752.0,
            "980": 968072448.0,
            "985": 968988672.0,
            "990": 984390272.0,
            "995": 953292224.0,
            "1000": 934781056.0,
            "1005": 960145408.0,
            "1010": 971541504.0,
            "1015": 985182592.0,
            "1020": 962781376.0,
            "1025": 935010688.0,
            "1030": 974681152.0,
            "1035": 964994816.0,
            "1040": 980464704.0,
            "1045": 960823424.0,
            "1050": 955203136.0,
            "1055": 957780224.0,
            "1060": 967749120.0,
            "1065": 967115840.0,
            "1070": 966599040.0,
            "1075": 950060096.0,
            "1080": 954510784.0,
            "1085": 967251200.0,
            "1090": 977132032.0,
            "1095": 961234688.0,
            "1100": 979610240.0,
            "1105": 953364992.0,
            "1110": 965953472.0,
            "1115": 966983552.0,
            "1120": 970352384.0,
            "1125": 965708416.0,
            "1130": 954942720.0,
            "1135": 965842176.0,
            "1140": 965173632.0,
            "1145": 970989120.0,
            "1150": 955556928.0,
            "1155": 930576768.0,
            "1160": 957775488.0,
            "1165": 978123264.0,
            "1170": 974302976.0,
            "1175": 973057920.0,
            "1180": 973084416.0,
            "1185": 947341952.0,
            "1190": 964793728.0,
            "1195": 953137856.0,
            "1200": 972846528.0,
            "1205": 988477760.0,
            "1210": 931125568.0,
            "1215": 968646656.0,
            "1220": 969161280.0,
            "1225": 975950208.0,
            "1230": 967334912.0,
            "1235": 943444608.0,
            "1240": 955857152.0,
            "1245": 981503872.0,
            "1250": 966110016.0,
            "1255": 973670208.0,
            "1260": 946495936.0,
            "1265": 963997568.0,
            "1270": 960487872.0,
            "1275": 973615360.0,
            "1280": 961114240.0,
            "1285": 957583232.0,
            "1290": 952528512.0,
            "1295": 971613312.0,
            "1300": 968863424.0,
            "1305": 963741504.0,
            "1310": 963336512.0,
            "1315": 943555584.0,
            "1320": 966306176.0,
            "1325": 989786432.0,
            "1330": 969505408.0,
            "1335": 972299840.0,
            "1340": 972270080.0,
            "1345": 960657920.0,
            "1350": 968638208.0,
            "1355": 955854464.0,
            "1360": 971823616.0,
            "1365": 960387584.0,
            "1370": 948791680.0,
            "1375": 973532224.0,
            "1380": 953470464.0,
            "1385": 969146880.0,
            "1390": 975720128.0,
            "1395": 931674240.0,
            "1400": 945856640.0,
            "1405": 976751872.0,
            "1410": 974512768.0,
            "1415": 967571456.0,
            "1420": 966746752.0,
            "1425": 937378368.0,
            "1430": 973914368.0,
            "1435": 978332160.0,
            "1440": 964179456.0,
            "1445": 958055552.0,
            "1450": 946146944.0,
            "1455": 983924288.0,
            "1460": 968651392.0,
            "1465": 948746496.0,
            "1470": 984241856.0,
            "1475": 943900608.0,
            "1480": 963974400.0,
            "1485": 957349376.0,
            "1490": 961258496.0,
            "1495": 980540928.0,
            "1500": 958331584.0,
            "1505": 942866048.0,
            "1510": 984178560.0,
            "1515": 959093376.0,
            "1520": 959104704.0,
            "1525": 952783360.0,
            "1530": 957743040.0,
            "1535": 949428736.0,
            "1540": 971085056.0,
            "1545": 963133696.0,
            "1550": 978667328.0,
            "1555": 952317312.0,
            "1560": 980089472.0,
            "1565": 967314944.0,
            "1570": 973839744.0,
            "1575": 975492992.0,
            "1580": 941861248.0,
            "1585": 970028864.0,
            "1590": 983823488.0,
            "1595": 948633216.0,
            "1600": 967443968.0,
            "1605": 952450496.0,
            "1610": 969617920.0,
            "1615": 983145344.0,
            "1620": 968020096.0,
            "1625": 970717120.0,
            "1630": 962888064.0,
            "1635": 942311232.0,
            "1640": 981610496.0,
            "1645": 973977088.0,
            "1650": 974184000.0,
            "1655": 967265408.0,
            "1660": 940687488.0,
            "1665": 961703552.0,
            "1670": 962901440.0,
            "1675": 971281536.0,
            "1680": 980877568.0,
            "1685": 944417920.0,
            "1690": 964689600.0,
            "1695": 965642112.0,
            "1700": 966343488.0,
            "1705": 985201152.0,
            "1710": 978353792.0,
            "1715": 943211392.0,
            "1720": 977089408.0,
            "1725": 965872128.0,
            "1730": 968971648.0,
            "1735": 965088512.0,
            "1740": 949712064.0,
            "1745": 970011968.0,
            "1750": 959683200.0,
            "1755": 960089920.0,
            "1760": 966377728.0,
            "1765": 951817856.0,
            "1770": 954664448.0,
            "1775": 973750144.0,
            "1780": 970533248.0,
            "1785": 968826368.0,
            "1790": 950233728.0,
            "1795": 945131904.0,
            "1800": 984666240.0,
            "1805": 987162112.0,
            "1810": 977770368.0,
            "1815": 948006400.0,
            "1820": 949209216.0,
            "1825": 978853696.0,
            "1830": 966362240.0,
            "1835": 964132608.0,
            "1840": 972319936.0,
            "1845": 935414656.0,
            "1850": 952499648.0,
            "1855": 980053248.0,
            "1860": 975868864.0,
            "1865": 958964992.0,
            "1870": 958950144.0,
            "1875": 932595392.0,
            "1880": 973575168.0,
            "1885": 978846208.0,
            "1890": 971357888.0,
            "1895": 959212672.0,
            "1900": 947393664.0,
            "1905": 981829888.0,
            "1910": 969124096.0,
            "1915": 970039296.0,
            "1920": 975599104.0,
            "1925": 960495488.0,
            "1930": 977922880.0,
            "1935": 963250944.0,
            "1940": 952460480.0,
            "1945": 981337984.0,
            "1950": 939169664.0,
            "1955": 960603840.0,
            "1960": 970032896.0,
            "1965": 981177088.0,
            "1970": 962046720.0,
            "1975": 952823296.0,
            "1980": 936849600.0,
            "1985": 975939584.0,
            "1990": 965967104.0,
            "1995": 962612416.0,
            "2000": 960554496.0,
            "2005": 954497600.0,
            "2010": 975579648.0,
            "2015": 991803584.0,
            "2020": 975435776.0,
            "2025": 974302912.0,
            "2030": 952084224.0,
            "2035": 967847424.0,
            "2040": 987457792.0,
            "2045": 976480512.0,
            "2050": 984702144.0,
            "2055": 942837120.0,
            "2060": 942592896.0,
            "2065": 966210304.0,
            "2070": 969622720.0,
            "2075": 980553856.0,
            "2080": 977597952.0,
            "2085": 939639680.0,
            "2090": 969873216.0,
            "2095": 961275776.0,
            "2100": 976720384.0,
            "2105": 972537024.0,
            "2110": 959903872.0,
            "2115": 956877504.0,
            "2120": 977480704.0,
            "2125": 962566912.0,
            "2130": 979617600.0,
            "2135": 950536512.0,
            "2140": 946996608.0,
            "2145": 962277696.0,
            "2150": 973402816.0,
            "2155": 972688448.0,
            "2160": 970314624.0,
            "2165": 948642112.0,
            "2170": 961542144.0,
            "2175": 969378304.0,
            "2180": 969329600.0,
            "2185": 947446784.0,
            "2190": 940480256.0,
            "2195": 986087808.0,
            "2200": 961862976.0,
            "2205": 978924800.0,
            "2210": 964102208.0,
            "2215": 963502016.0,
            "2220": 951310976.0,
            "2225": 969315328.0,
            "2230": 976330496.0,
            "2235": 974026368.0,
            "2240": 975492992.0,
            "2245": 960230656.0,
            "2250": 967638976.0,
            "2255": 969131008.0,
            "2260": 975063808.0,
            "2265": 968257920.0,
            "2270": 951742208.0,
            "2275": 962765824.0,
            "2280": 969640000.0,
            "2285": 971691776.0,
            "2290": 962890432.0,
            "2295": 931409920.0,
            "2300": 959904512.0,
            "2305": 970424640.0,
            "2310": 967445248.0,
            "2315": 970905920.0,
            "2320": 975589760.0,
            "2325": 938586752.0,
            "2330": 988438784.0,
            "2335": 977489024.0,
            "2340": 964596224.0,
            "2345": 964166144.0,
            "2350": 947555520.0,
            "2355": 977030400.0,
            "2360": 966899712.0,
            "2365": 977296448.0,
            "2370": 965070912.0,
            "2375": 953965184.0,
            "2380": 962916736.0,
            "2385": 967196672.0,
            "2390": 963075712.0,
            "2395": 974466432.0,
            "2400": 958410624.0,
            "2405": 968119936.0,
            "2410": 951586176.0,
            "2415": 965904768.0,
            "2420": 966517120.0,
            "2425": 959045440.0,
            "2430": 956684800.0,
            "2435": 961388736.0,
            "2440": 959755328.0,
            "2445": 970890624.0,
            "2450": 961997568.0,
            "2455": 922720640.0,
            "2460": 951952192.0,
            "2465": 955730496.0,
            "2470": 972570496.0,
            "2475": 973810432.0,
            "2480": 943894016.0,
            "2485": 944184832.0,
            "2490": 972410240.0,
            "2495": 974449664.0,
            "2500": 973909696.0,
            "2505": 958489024.0,
            "2510": 939508608.0,
            "2515": 979552768.0,
            "2520": 970472000.0,
            "2525": 964389632.0,
            "2530": 955799232.0,
            "2535": 936598464.0,
            "2540": 969026752.0,
            "2545": 970384192.0,
            "2550": 969460160.0,
            "2555": 969439360.0,
            "2560": 964976768.0,
            "2565": 959763200.0,
            "2570": 985174784.0,
            "2575": 957423360.0,
            "2580": 967423488.0,
            "2585": 966023040.0,
            "2590": 956354240.0,
            "2595": 981828864.0,
            "2600": 959531392.0,
            "2605": 962999680.0,
            "2610": 965970240.0,
            "2615": 951924032.0,
            "2620": 971241088.0,
            "2625": 976456064.0,
            "2630": 974410624.0,
            "2635": 948071296.0,
            "2640": 948137344.0,
            "2645": 963038912.0,
            "2650": 953982016.0,
            "2655": 977112640.0,
            "2660": 949622528.0,
            "2665": 953929472.0,
            "2670": 959064064.0,
            "2675": 979275776.0,
            "2680": 961396224.0,
            "2685": 970702208.0,
            "2690": 965220736.0,
            "2695": 943553856.0,
            "2700": 969424960.0,
            "2705": 978961792.0,
            "2710": 971810688.0,
            "2715": 990813824.0,
            "2720": 942648768.0,
            "2725": 967953984.0,
            "2730": 955465408.0,
            "2735": 970673536.0,
            "2740": 977922688.0,
            "2745": 932280064.0,
            "2750": 947857536.0,
            "2755": 956317824.0,
            "2760": 981696768.0,
            "2765": 966111104.0,
            "2770": 948915200.0,
            "2775": 935830528.0,
            "2780": 964777472.0,
            "2785": 969568256.0,
            "2790": 974273920.0,
            "2795": 966884736.0,
            "2800": 944390464.0,
            "2805": 964354176.0,
            "2810": 969611264.0,
            "2815": 975844864.0,
            "2820": 963083968.0,
            "2825": 937626688.0,
            "2830": 956737920.0,
            "2835": 986317312.0,
            "2840": 961753792.0,
            "2845": 967497664.0,
            "2850": 951711232.0,
            "2855": 962089856.0,
            "2860": 954239488.0,
            "2865": 955880832.0,
            "2870": 944660864.0,
            "2875": 974662912.0,
            "2880": 968200320.0,
            "2885": 981080832.0,
            "2890": 953454592.0,
            "2895": 957177728.0,
            "2900": 964988928.0,
            "2905": 931698048.0,
            "2910": 955728512.0,
            "2915": 979473920.0,
            "2920": 970491520.0,
            "2925": 964976000.0,
            "2930": 964046976.0,
            "2935": 940140672.0,
            "2940": 964910592.0,
            "2945": 989140480.0,
            "2950": 965204224.0,
            "2955": 965114496.0,
            "2960": 933158144.0,
            "2965": 968804352.0,
            "2970": 973033920.0,
            "2975": 958091392.0,
            "2980": 964495744.0,
            "2985": 937267456.0,
            "2990": 951252864.0,
            "2995": 978314880.0,
            "3000": 969275520.0,
            "3005": 974685760.0,
            "3010": 950235136.0,
            "3015": 943853312.0,
            "3020": 958437760.0,
            "3025": 975181440.0,
            "3030": 965028672.0,
            "3035": 963452096.0,
            "3040": 952132480.0,
            "3045": 989792000.0,
            "3050": 965544960.0,
            "3055": 982520192.0,
            "3060": 971238464.0,
            "3065": 943916032.0,
            "3070": 978408704.0,
            "3075": 975205632.0,
            "3080": 960989888.0,
            "3085": 962361792.0,
            "3090": 945962176.0,
            "3095": 938115328.0,
            "3100": 972928448.0,
            "3105": 961988096.0,
            "3110": 970655872.0,
            "3115": 963388544.0,
            "3120": 947128256.0,
            "3125": 972719360.0,
            "3130": 952985088.0,
            "3135": 966041088.0,
            "3140": 968488576.0,
            "3145": 937850816.0,
            "3150": 975009664.0,
            "3155": 976812608.0,
            "3160": 969628160.0,
            "3165": 982192576.0,
            "3170": 937971968.0,
            "3175": 953836928.0,
            "3180": 983808256.0,
            "3185": 965174912.0,
            "3190": 968492480.0,
            "3195": 950942784.0,
            "3200": 945113472.0,
            "3205": 959861760.0,
            "3210": 957485632.0,
            "3215": 958032128.0,
            "3220": 968128512.0,
            "3225": 935613824.0,
            "3230": 962601216.0,
            "3235": 975774720.0,
            "3240": 962621184.0,
            "3245": 981274048.0,
            "3250": 943271424.0,
            "3255": 954597632.0,
            "3260": 980361600.0,
            "3265": 963631488.0,
            "3270": 965175424.0,
            "3275": 959744000.0,
            "3280": 967056512.0,
            "3285": 982489216.0,
            "3290": 947700992.0,
            "3295": 966433792.0,
            "3300": 959177984.0,
            "3305": 949142144.0,
            "3310": 979510912.0,
            "3315": 964295424.0,
            "3320": 969218304.0,
            "3325": 956205440.0,
            "3330": 941178752.0,
            "3335": 964985728.0,
            "3340": 956900928.0,
            "3345": 972501248.0,
            "3350": 964575232.0,
            "3355": 943358976.0,
            "3360": 970036352.0,
            "3365": 969466496.0,
            "3370": 954762048.0,
            "3375": 958689664.0,
            "3380": 971475328.0,
            "3385": 947986048.0,
            "3390": 965792896.0,
            "3395": 978402432.0,
            "3400": 978138368.0,
            "3405": 976735744.0,
            "3410": 924216960.0,
            "3415": 955448704.0,
            "3420": 971845184.0,
            "3425": 977180160.0,
            "3430": 973849984.0,
            "3435": 936096256.0,
            "3440": 970528896.0,
            "3445": 957327680.0,
            "3450": 959851968.0,
            "3455": 963876096.0,
            "3460": 967894400.0,
            "3465": 931343616.0,
            "3470": 952360448.0,
            "3475": 973728512.0,
            "3480": 959751104.0,
            "3485": 979959616.0,
            "3490": 944682496.0,
            "3495": 953930240.0,
            "3500": 969341440.0,
            "3505": 964360704.0,
            "3510": 971246336.0,
            "3515": 955971584.0,
            "3520": 958758016.0,
            "3525": 971938944.0,
            "3530": 964149248.0,
            "3535": 983204352.0,
            "3540": 937514688.0,
            "3545": 944754752.0,
            "3550": 984487296.0,
            "3555": 978070784.0,
            "3560": 974397760.0,
            "3565": 968825088.0,
            "3570": 946719488.0,
            "3575": 976129728.0,
            "3580": 977517568.0,
            "3585": 954579904.0,
            "3590": 956450816.0,
            "3595": 951467904.0,
            "3600": 989007872.0,
            "3605": 962031488.0,
            "3610": 965078016.0,
            "3615": 974658560.0,
            "3620": 954911104.0,
            "3625": 939539072.0,
            "3630": 990173440.0,
            "3635": 971459328.0,
            "3640": 976048512.0,
            "3645": 961512832.0,
            "3650": 945826304.0,
            "3655": 965806464.0,
            "3660": 976218624.0,
            "3665": 964045056.0,
            "3670": 977455744.0,
            "3675": 943498112.0,
            "3680": 958208192.0,
            "3685": 964297664.0,
            "3690": 982105408.0,
            "3695": 963148416.0,
            "3700": 950593920.0,
            "3705": 947368832.0,
            "3710": 982379520.0,
            "3715": 972704896.0,
            "3720": 976150400.0,
            "3725": 964061952.0,
            "3730": 948862464.0,
            "3735": 967102976.0,
            "3740": 960972544.0,
            "3745": 969346176.0,
            "3750": 963958912.0,
            "3755": 953444224.0,
            "3760": 976653312.0,
            "3765": 979853376.0,
            "3770": 972371136.0,
            "3775": 972385152.0,
            "3780": 952596608.0,
            "3785": 960248896.0,
            "3790": 985609984.0,
            "3795": 969208064.0,
            "3800": 957888896.0,
            "3805": 972420160.0,
            "3810": 954529792.0,
            "3815": 974579008.0,
            "3820": 963032192.0,
            "3825": 962083584.0,
            "3830": 969407232.0,
            "3835": 934758144.0,
            "3840": 971268928.0,
            "3845": 986861568.0,
            "3850": 968872576.0,
            "3855": 965259136.0,
            "3860": 948069504.0,
            "3865": 975060864.0,
            "3870": 985098240.0,
            "3875": 983041536.0,
            "3880": 963614976.0,
            "3885": 953023296.0,
            "3890": 960283264.0,
            "3895": 960602304.0,
            "3900": 984924288.0,
            "3905": 976209664.0,
            "3910": 987348480.0,
            "3915": 946018304.0,
            "3920": 974866688.0,
            "3925": 961247104.0,
            "3930": 976776832.0,
            "3935": 978912448.0,
            "3940": 950291456.0,
            "3945": 960272512.0,
            "3950": 974197120.0,
            "3955": 972954688.0,
            "3960": 974078400.0,
            "3965": 950873408.0,
            "3970": 980679552.0,
            "3975": 960724736.0,
            "3980": 977530624.0,
            "3985": 962948480.0,
            "3990": 972742784.0,
            "3995": 953701248.0,
            "4000": 974987648.0,
            "4005": 971658240.0,
            "4010": 978408000.0,
            "4015": 971490816.0,
            "4020": 950321984.0,
            "4025": 968433152.0,
            "4030": 997947904.0,
            "4035": 978560256.0,
            "4040": 959811840.0,
            "4045": 939663424.0,
            "4050": 944705664.0,
            "4055": 980972864.0,
            "4060": 977696256.0,
            "4065": 975729856.0,
            "4070": 942163712.0,
            "4075": 945752576.0,
            "4080": 988740096.0,
            "4085": 962092544.0,
            "4090": 983357312.0,
            "4095": 986957760.0,
            "4100": 957199552.0,
            "4105": 954098048.0,
            "4110": 966498048.0,
            "4115": 976029248.0,
            "4120": 983531264.0,
            "4125": 960040704.0,
            "4130": 967276416.0,
            "4135": 971442944.0,
            "4140": 963169920.0,
            "4145": 956187008.0,
            "4150": 960281664.0,
            "4155": 946204096.0,
            "4160": 968426560.0,
            "4165": 970325504.0,
            "4170": 971982400.0,
            "4175": 955860736.0,
            "4180": 940972928.0,
            "4185": 968293120.0,
            "4190": 967991424.0,
            "4195": 989223296.0,
            "4200": 962689152.0,
            "4205": 960581376.0,
            "4210": 971849664.0,
            "4215": 974144832.0,
            "4220": 981168128.0,
            "4225": 975219904.0,
            "4230": 952725888.0,
            "4235": 958493120.0,
            "4240": 966823744.0,
            "4245": 961732352.0,
            "4250": 965845184.0,
            "4255": 958291712.0,
            "4260": 949594240.0,
            "4265": 964124672.0,
            "4270": 978406720.0,
            "4275": 975433024.0,
            "4280": 962683264.0,
            "4285": 951462912.0,
            "4290": 980053632.0,
            "4295": 968853248.0,
            "4300": 958285696.0,
            "4305": 966815232.0,
            "4310": 939637760.0,
            "4315": 949402880.0,
            "4320": 984559808.0,
            "4325": 982559360.0,
            "4330": 974745216.0,
            "4335": 949470976.0,
            "4340": 959514176.0,
            "4345": 956618752.0,
            "4350": 979947008.0,
            "4355": 968881664.0,
            "4360": 966325632.0,
            "4365": 941257088.0,
            "4370": 969506304.0,
            "4375": 972982016.0,
            "4380": 966147840.0,
            "4385": 972036224.0,
            "4390": 954165312.0,
            "4395": 951879808.0,
            "4400": 973676928.0,
            "4405": 972490496.0,
            "4410": 967920192.0,
            "4415": 958996864.0,
            "4420": 960802432.0,
            "4425": 976427904.0,
            "4430": 965954752.0,
            "4435": 975925632.0,
            "4440": 962366848.0,
            "4445": 954836544.0,
            "4450": 978343552.0,
            "4455": 960242112.0,
            "4460": 968629632.0,
            "4465": 968842496.0,
            "4470": 944164096.0,
            "4475": 952008256.0,
            "4480": 978873920.0,
            "4485": 968328192.0,
            "4490": 957074432.0,
            "4495": 938975104.0,
            "4500": 953265152.0,
            "4505": 977166336.0,
            "4510": 978761984.0,
            "4515": 962492608.0,
            "4520": 958836352.0,
            "4525": 958138496.0,
            "4530": 964428224.0,
            "4535": 976629376.0,
            "4540": 976870912.0,
            "4545": 970145152.0,
            "4550": 953234560.0,
            "4555": 959602432.0,
            "4560": 972460416.0,
            "4565": 973453824.0,
            "4570": 978908928.0,
            "4575": 957797248.0,
            "4580": 963180096.0,
            "4585": 957393088.0,
            "4590": 986579968.0,
            "4595": 960189568.0,
            "4600": 952266496.0,
            "4605": 959322112.0,
            "4610": 963734336.0,
            "4615": 957987648.0,
            "4620": 960280832.0,
            "4625": 973823104.0,
            "4630": 944532608.0,
            "4635": 977049728.0,
            "4640": 960298112.0,
            "4645": 982029440.0,
            "4650": 962399104.0,
            "4655": 939445376.0,
            "4660": 964037248.0,
            "4665": 962602240.0,
            "4670": 976670592.0,
            "4675": 963501504.0,
            "4680": 957462656.0,
            "4685": 949722432.0,
            "4690": 956974336.0,
            "4695": 969988224.0,
            "4700": 961213696.0,
            "4705": 970804928.0,
            "4710": 934505216.0,
            "4715": 970426496.0,
            "4720": 966386432.0,
            "4725": 980291648.0,
            "4730": 965899776.0,
            "4735": 937883712.0,
            "4740": 960124416.0,
            "4745": 976034240.0,
            "4750": 967996672.0,
            "4755": 984984640.0,
            "4760": 959160704.0,
            "4765": 955260480.0,
            "4770": 958633280.0,
            "4775": 991091904.0,
            "4780": 976806144.0,
            "4785": 967569792.0,
            "4790": 943731520.0,
            "4795": 955836096.0,
            "4800": 967734144.0,
            "4805": 976504192.0,
            "4810": 965124096.0,
            "4815": 958006016.0,
            "4820": 973992064.0,
            "4825": 961493376.0,
            "4830": 962612992.0,
            "4835": 972519872.0,
            "4840": 948900928.0,
            "4845": 965727232.0,
            "4850": 960317312.0,
            "4855": 964131072.0,
            "4860": 963038528.0,
            "4865": 967482752.0,
            "4870": 957157120.0,
            "4875": 983549312.0,
            "4880": 957072064.0,
            "4885": 977004032.0,
            "4890": 959752640.0,
            "4895": 942237760.0,
            "4900": 973663168.0,
            "4905": 975250944.0,
            "4910": 969207680.0,
            "4915": 970077824.0,
            "4920": 941168832.0,
            "4925": 954797120.0,
            "4930": 977033472.0,
            "4935": 963763264.0,
            "4940": 972636032.0,
            "4945": 960056448.0,
            "4950": 940842816.0,
            "4955": 968036096.0,
            "4960": 976849664.0,
            "4965": 960984576.0,
            "4970": 958615232.0,
            "4975": 933738496.0,
            "4980": 960849216.0,
            "4985": 963012608.0,
            "4990": 963514752.0,
            "4995": 986270592.0,
            "5000": 940714560.0,
            "5005": 968912192.0,
            "5010": 970295808.0,
            "5015": 965196928.0,
            "5020": 966677760.0,
            "5025": 949434880.0,
            "5030": 953527168.0,
            "5035": 967317120.0,
            "5040": 955677312.0,
            "5045": 969128704.0,
            "5050": 953368448.0,
            "5055": 954787584.0,
            "5060": 962992448.0,
            "5065": 952158720.0,
            "5070": 973622016.0,
            "5075": 978544512.0,
            "5080": 942841984.0,
            "5085": 965826688.0,
            "5090": 972869760.0,
            "5095": 964386304.0,
            "5100": 958317952.0,
            "5105": 965304704.0,
            "5110": 950426368.0,
            "5115": 972334464.0,
            "5120": 960412736.0,
            "5125": 969669760.0,
            "5130": 938722560.0,
            "5135": 943648384.0,
            "5140": 969888448.0,
            "5145": 968602496.0,
            "5150": 970576256.0,
            "5155": 972561344.0,
            "5160": 926526656.0,
            "5165": 961605120.0,
            "5170": 966812736.0,
            "5175": 965999104.0,
            "5180": 963667584.0,
            "5185": 930790400.0,
            "5190": 949767296.0,
            "5195": 972459776.0,
            "5200": 973758400.0,
            "5205": 968213504.0,
            "5210": 960515072.0,
            "5215": 928754176.0,
            "5220": 979235328.0,
            "5225": 984792384.0,
            "5230": 975061632.0,
            "5235": 975080128.0,
            "5240": 944338688.0,
            "5245": 970809856.0,
            "5250": 972437376.0,
            "5255": 966945472.0,
            "5260": 976754048.0,
            "5265": 942271616.0,
            "5270": 969238528.0,
            "5275": 970073280.0,
            "5280": 962813184.0,
            "5285": 964085376.0,
            "5290": 932502848.0,
            "5295": 951775104.0,
            "5300": 975637440.0,
            "5305": 951879040.0,
            "5310": 968053120.0,
            "5315": 955768512.0,
            "5320": 950946944.0,
            "5325": 973089792.0,
            "5330": 967782400.0,
            "5335": 967548864.0,
            "5340": 966491200.0,
            "5345": 962986112.0,
            "5350": 978936576.0,
            "5355": 972314176.0,
            "5360": 963844096.0,
            "5365": 965283712.0,
            "5370": 947850112.0,
            "5375": 948870336.0,
            "5380": 967211968.0,
            "5385": 980515840.0,
            "5390": 965325824.0,
            "5395": 955184768.0,
            "5400": 948286848.0,
            "5405": 974268288.0,
            "5410": 967819648.0,
            "5415": 975976960.0,
            "5420": 967506496.0,
            "5425": 937351168.0,
            "5430": 963858112.0,
            "5435": 971875392.0,
            "5440": 969079424.0,
            "5445": 957489408.0,
            "5450": 919444096.0,
            "5455": 952065152.0,
            "5460": 962363968.0,
            "5465": 978856256.0,
            "5470": 980933120.0,
            "5475": 941573760.0,
            "5480": 955804288.0,
            "5485": 964902528.0,
            "5490": 975961920.0,
            "5495": 962769856.0,
            "5500": 971131520.0,
            "5505": 956915200.0,
            "5510": 968537216.0,
            "5515": 945405696.0,
            "5520": 963158016.0,
            "5525": 975882624.0,
            "5530": 936598528.0,
            "5535": 970608704.0,
            "5540": 960245440.0,
            "5545": 971925632.0,
            "5550": 967860992.0,
            "5555": 955919872.0,
            "5560": 954497024.0,
            "5565": 968752896.0,
            "5570": 945162944.0,
            "5575": 960589440.0,
            "5580": 960575872.0,
            "5585": 959483520.0,
            "5590": 977668864.0,
            "5595": 975245568.0,
            "5600": 962985088.0,
            "5605": 964116352.0,
            "5610": 943157952.0,
            "5615": 966602176.0,
            "5620": 963311040.0,
            "5625": 982288320.0,
            "5630": 976050624.0,
            "5635": 957225216.0,
            "5640": 951468544.0,
            "5645": 967830464.0,
            "5650": 979133120.0,
            "5655": 983462400.0,
            "5660": 956445056.0,
            "5665": 953438656.0,
            "5670": 966098048.0,
            "5675": 967653504.0,
            "5680": 978696960.0,
            "5685": 962081664.0,
            "5690": 935846976.0,
            "5695": 963701312.0,
            "5700": 952427904.0,
            "5705": 974415040.0,
            "5710": 971206592.0,
            "5715": 946098688.0,
            "5720": 974992128.0,
            "5725": 967437760.0,
            "5730": 978488448.0,
            "5735": 964857088.0,
            "5740": 943624576.0,
            "5745": 971192576.0,
            "5750": 981903232.0,
            "5755": 956694400.0,
            "5760": 963635840.0,
            "5765": 957695488.0,
            "5770": 955612928.0,
            "5775": 970811008.0,
            "5780": 962763520.0,
            "5785": 970576384.0,
            "5790": 974567808.0,
            "5795": 949716224.0,
            "5800": 965927104.0,
            "5805": 968762112.0,
            "5810": 976023424.0,
            "5815": 970054208.0,
            "5820": 936427008.0,
            "5825": 969278720.0,
            "5830": 977675520.0,
            "5835": 975029120.0,
            "5840": 963087488.0,
            "5845": 968739776.0,
            "5850": 942983296.0,
            "5855": 975974912.0,
            "5860": 979586240.0,
            "5865": 978395392.0,
            "5870": 968875392.0,
            "5875": 942253952.0,
            "5880": 964460160.0,
            "5885": 974721536.0,
            "5890": 972585792.0,
            "5895": 965718720.0,
            "5900": 941322880.0,
            "5905": 961816384.0,
            "5910": 958508224.0,
            "5915": 968174272.0,
            "5920": 977432704.0,
            "5925": 959568128.0,
            "5930": 946687488.0,
            "5935": 952420608.0,
            "5940": 977686528.0,
            "5945": 984781184.0,
            "5950": 980727168.0,
            "5955": 935069312.0,
            "5960": 961687360.0,
            "5965": 966000512.0,
            "5970": 970723136.0,
            "5975": 961933056.0,
            "5980": 958259328.0,
            "5985": 964539008.0,
            "5990": 973566464.0,
            "5995": 955920064.0,
            "6000": 955633408.0,
            "6005": 961454336.0,
            "6010": 952744448.0,
            "6015": 974466048.0,
            "6020": 978364544.0,
            "6025": 972190464.0,
            "6030": 955422784.0,
            "6035": 946979392.0,
            "6040": 962839104.0,
            "6045": 983682560.0,
            "6050": 956576768.0,
            "6055": 963457920.0,
            "6060": 945840704.0,
            "6065": 958443456.0,
            "6070": 978391168.0,
            "6075": 978098944.0,
            "6080": 957542208.0,
            "6085": 947739520.0,
            "6090": 953768064.0,
            "6095": 964672384.0,
            "6100": 979961024.0,
            "6105": 971023872.0,
            "6110": 961767296.0,
            "6115": 943790080.0,
            "6120": 968561856.0,
            "6125": 960789888.0,
            "6130": 983979392.0,
            "6135": 961018880.0,
            "6140": 958671488.0,
            "6145": 971176064.0,
            "6150": 968591040.0,
            "6155": 974967424.0,
            "6160": 977216128.0,
            "6165": 952743744.0,
            "6170": 951068800.0,
            "6175": 963383488.0,
            "6180": 969463616.0,
            "6185": 966344704.0,
            "6190": 963743872.0,
            "6195": 947204864.0,
            "6200": 969329280.0,
            "6205": 967223872.0,
            "6210": 959204928.0,
            "6215": 973012224.0,
            "6220": 936449280.0,
            "6225": 978666880.0,
            "6230": 976060672.0,
            "6235": 971802752.0,
            "6240": 965944192.0,
            "6245": 956029440.0,
            "6250": 956646784.0,
            "6255": 973400000.0,
            "6260": 978902144.0,
            "6265": 974880832.0,
            "6270": 958830848.0,
            "6275": 963811456.0,
            "6280": 973239808.0,
            "6285": 966086208.0,
            "6290": 971004032.0,
            "6295": 987564544.0,
            "6300": 947518144.0,
            "6305": 964881664.0,
            "6310": 979127360.0,
            "6315": 978491904.0,
            "6320": 971846912.0,
            "6325": 923003392.0,
            "6330": 959219840.0,
            "6335": 974940416.0,
            "6340": 984750784.0,
            "6345": 966922112.0,
            "6350": 944686016.0,
            "6355": 958149952.0,
            "6360": 972846848.0,
            "6365": 972207808.0,
            "6370": 959152064.0,
            "6375": 967179008.0,
            "6380": 951498688.0,
            "6385": 973743872.0,
            "6390": 965576576.0,
            "6395": 975377600.0,
            "6400": 984092928.0,
            "6405": 944003712.0,
            "6410": 977387776.0,
            "6415": 971562688.0,
            "6420": 956554560.0,
            "6425": 961257536.0,
            "6430": 957499520.0,
            "6435": 960148992.0,
            "6440": 968996800.0,
            "6445": 973577728.0,
            "6450": 974256256.0,
            "6455": 962215552.0,
            "6460": 941143040.0,
            "6465": 974594240.0,
            "6470": 979986688.0,
            "6475": 960787200.0,
            "6480": 967680768.0,
            "6485": 948695872.0,
            "6490": 970736896.0,
            "6495": 988356416.0,
            "6500": 980444224.0,
            "6505": 972048768.0,
            "6510": 951538560.0,
            "6515": 957553536.0,
            "6520": 978988160.0,
            "6525": 979075968.0,
            "6530": 973432448.0,
            "6535": 967927680.0,
            "6540": 950344576.0,
            "6545": 966282304.0,
            "6550": 979666944.0,
            "6555": 967156480.0,
            "6560": 975378752.0,
            "6565": 949388480.0,
            "6570": 952153856.0,
            "6575": 962491968.0,
            "6580": 975828224.0,
            "6585": 979201600.0,
            "6590": 949150784.0,
            "6595": 961570816.0,
            "6600": 961156800.0,
            "6605": 961387776.0,
            "6610": 985139264.0,
            "6615": 959604992.0,
            "6620": 944527488.0,
            "6625": 970979072.0,
            "6630": 971344128.0,
            "6635": 964018688.0,
            "6640": 959735296.0,
            "6645": 951066496.0,
            "6650": 978983360.0,
            "6655": 965888896.0,
            "6660": 968655296.0,
            "6665": 969051328.0,
            "6670": 932925440.0,
            "6675": 970809600.0,
            "6680": 968930432.0,
            "6685": 958834048.0,
            "6690": 956239360.0,
            "6695": 955414528.0,
            "6700": 961951744.0,
            "6705": 979267840.0,
            "6710": 971185792.0,
            "6715": 966765440.0,
            "6720": 973915840.0,
            "6725": 941977280.0,
            "6730": 979326080.0,
            "6735": 994512256.0,
            "6740": 976712000.0,
            "6745": 974664320.0,
            "6750": 939125120.0,
            "6755": 977587840.0,
            "6760": 969794048.0,
            "6765": 978431104.0,
            "6770": 975390848.0,
            "6775": 943240896.0,
            "6780": 947394304.0,
            "6785": 975457408.0,
            "6790": 960606208.0,
            "6795": 976119744.0,
            "6800": 973160064.0,
            "6805": 946682880.0,
            "6810": 958523648.0,
            "6815": 970806784.0,
            "6820": 977803264.0,
            "6825": 969525248.0,
            "6830": 950521664.0,
            "6835": 981722624.0,
            "6840": 983268352.0,
            "6845": 949033536.0,
            "6850": 965785344.0,
            "6855": 954186880.0,
            "6860": 979099776.0,
            "6865": 984086016.0,
            "6870": 965062656.0,
            "6875": 978762112.0,
            "6880": 950469248.0,
            "6885": 958533248.0,
            "6890": 960226240.0,
            "6895": 965886592.0,
            "6900": 985432576.0,
            "6905": 968809536.0,
            "6910": 950022976.0,
            "6915": 970876928.0,
            "6920": 967301760.0,
            "6925": 965291136.0,
            "6930": 964642368.0,
            "6935": 952024256.0,
            "6940": 963106176.0,
            "6945": 986617216.0,
            "6950": 973548096.0,
            "6955": 964838400.0,
            "6960": 940152704.0,
            "6965": 974565248.0,
            "6970": 978542720.0,
            "6975": 985066496.0,
            "6980": 982483584.0,
            "6985": 959913728.0,
            "6990": 945569024.0,
            "6995": 987496320.0,
            "7000": 963293504.0,
            "7005": 962922496.0,
            "7010": 984873920.0,
            "7015": 945472128.0,
            "7020": 982970688.0,
            "7025": 968730752.0,
            "7030": 953267840.0,
            "7035": 983032576.0,
            "7040": 950545536.0,
            "7045": 955794944.0,
            "7050": 959998336.0,
            "7055": 963843712.0,
            "7060": 976703872.0,
            "7065": 968243456.0,
            "7070": 953732288.0,
            "7075": 956633088.0,
            "7080": 969051392.0,
            "7085": 965646976.0,
            "7090": 969275648.0,
            "7095": 959794944.0,
            "7100": 973258112.0,
            "7105": 973233792.0,
            "7110": 970156224.0,
            "7115": 958695360.0,
            "7120": 949100544.0,
            "7125": 963213760.0,
            "7130": 971458688.0,
            "7135": 964614656.0,
            "7140": 961472768.0,
            "7145": 930478144.0,
            "7150": 946305344.0,
            "7155": 990945728.0,
            "7160": 968504640.0,
            "7165": 957035648.0,
            "7170": 968091136.0,
            "7175": 955889536.0,
            "7180": 958098304.0,
            "7185": 984769280.0,
            "7190": 978724544.0,
            "7195": 973823360.0,
            "7200": 935906432.0,
            "7205": 957574976.0,
            "7210": 967287296.0,
            "7215": 969486848.0,
            "7220": 982282368.0,
            "7225": 928875264.0,
            "7230": 949709248.0,
            "7235": 967103360.0,
            "7240": 966992512.0,
            "7245": 967701120.0,
            "7250": 949504448.0,
            "7255": 957437248.0,
            "7260": 970348672.0,
            "7265": 975011200.0,
            "7270": 959861440.0,
            "7275": 959238720.0,
            "7280": 957073536.0,
            "7285": 977612800.0,
            "7290": 977410304.0,
            "7295": 963012224.0,
            "7300": 975696000.0,
            "7305": 964281216.0,
            "7310": 977590784.0,
            "7315": 966920512.0,
            "7320": 974589952.0,
            "7325": 967030144.0,
            "7330": 959814144.0,
            "7335": 964098432.0,
            "7340": 977865216.0,
            "7345": 967964416.0,
            "7350": 984903808.0,
            "7355": 959835136.0,
            "7360": 949175360.0,
            "7365": 972711808.0,
            "7370": 982594432.0,
            "7375": 963297920.0,
            "7380": 964137600.0,
            "7385": 948664704.0,
            "7390": 963972352.0,
            "7395": 958382528.0,
            "7400": 969862656.0,
            "7405": 987809920.0,
            "7410": 952313344.0,
            "7415": 950431680.0,
            "7420": 967091520.0,
            "7425": 983148160.0,
            "7430": 965896640.0,
            "7435": 972987520.0,
            "7440": 937056448.0,
            "7445": 968792960.0,
            "7450": 980130752.0,
            "7455": 971354240.0,
            "7460": 972147072.0,
            "7465": 939438976.0,
            "7470": 971876800.0,
            "7475": 958106752.0,
            "7480": 969431232.0,
            "7485": 961637632.0,
            "7490": 934427904.0,
            "7495": 957212288.0,
            "7500": 969499392.0,
            "7505": 970369536.0,
            "7510": 972331904.0,
            "7515": 979583680.0,
            "7520": 951895680.0,
            "7525": 970542720.0,
            "7530": 954594944.0,
            "7535": 971456640.0,
            "7540": 979399808.0,
            "7545": 959210880.0,
            "7550": 960539264.0,
            "7555": 960183744.0,
            "7560": 970406144.0,
            "7565": 955121152.0,
            "7570": 942681216.0,
            "7575": 965667328.0,
            "7580": 982016448.0,
            "7585": 978957312.0,
            "7590": 970352896.0,
            "7595": 949944064.0,
            "7600": 946166208.0,
            "7605": 982330368.0,
            "7610": 969105344.0,
            "7615": 988675072.0,
            "7620": 957024576.0,
            "7625": 941271296.0,
            "7630": 971452736.0,
            "7635": 984388096.0,
            "7640": 983484032.0,
            "7645": 968295040.0,
            "7650": 959136256.0,
            "7655": 962572608.0,
            "7660": 969101888.0,
            "7665": 978367872.0,
            "7670": 975598336.0,
            "7675": 975485568.0,
            "7680": 942860288.0,
            "7685": 960278336.0,
            "7690": 975426304.0,
            "7695": 982429888.0,
            "7700": 979395712.0,
            "7705": 940767936.0,
            "7710": 974726144.0,
            "7715": 979320064.0,
            "7720": 967701888.0,
            "7725": 960431040.0,
            "7730": 943571072.0,
            "7735": 968205120.0,
            "7740": 980144320.0,
            "7745": 964028096.0,
            "7750": 963510528.0,
            "7755": 959923712.0,
            "7760": 970409472.0,
            "7765": 970840064.0,
            "7770": 962608256.0,
            "7775": 981822592.0,
            "7780": 964937536.0,
            "7785": 959813888.0,
            "7790": 968418688.0,
            "7795": 968810368.0,
            "7800": 971942400.0,
            "7805": 968393600.0,
            "7810": 945837056.0,
            "7815": 963393664.0,
            "7820": 974554240.0,
            "7825": 963661440.0,
            "7830": 957249792.0,
            "7835": 949943296.0,
            "7840": 957195520.0,
            "7845": 954041216.0,
            "7850": 979637248.0,
            "7855": 986769152.0,
            "7860": 947535296.0,
            "7865": 949192448.0,
            "7870": 965347520.0,
            "7875": 975739648.0,
            "7880": 968699904.0,
            "7885": 969383808.0,
            "7890": 951943040.0,
            "7895": 974806144.0,
            "7900": 963731392.0,
            "7905": 963952640.0,
            "7910": 965562240.0,
            "7915": 943316480.0,
            "7920": 951016704.0,
            "7925": 969537152.0,
            "7930": 964735232.0,
            "7935": 984412288.0,
            "7940": 964486016.0,
            "7945": 950846144.0,
            "7950": 962123008.0,
            "7955": 980081408.0,
            "7960": 963652928.0,
            "7965": 953426176.0,
            "7970": 951790080.0,
            "7975": 969476032.0,
            "7980": 965341184.0,
            "7985": 959473280.0,
            "7990": 968080384.0,
            "7995": 946841344.0,
            "8000": 962742912.0,
            "8005": 980642304.0,
            "8010": 965851136.0,
            "8015": 982918656.0,
            "8020": 961115904.0,
            "8025": 965420992.0,
            "8030": 958410048.0,
            "8035": 975936640.0,
            "8040": 960876928.0,
            "8045": 948345920.0,
            "8050": 959722624.0,
            "8055": 979580480.0,
            "8060": 969802624.0,
            "8065": 958418816.0,
            "8070": 964214784.0,
            "8075": 942438528.0,
            "8080": 966293376.0,
            "8085": 966952576.0,
            "8090": 983725632.0,
            "8095": 988883968.0,
            "8100": 966654784.0,
            "8105": 944475136.0,
            "8110": 969154432.0,
            "8115": 985881472.0,
            "8120": 975193216.0,
            "8125": 964510016.0,
            "8130": 966592896.0,
            "8135": 968277376.0,
            "8140": 964192384.0,
            "8145": 995580288.0,
            "8150": 973731712.0,
            "8155": 938881344.0,
            "8160": 964549120.0,
            "8165": 973467840.0,
            "8170": 968669248.0,
            "8175": 961785344.0,
            "8180": 936312960.0,
            "8185": 963032448.0,
            "8190": 968045440.0,
            "8195": 977582464.0,
            "8200": 956846720.0,
            "8205": 960801152.0,
            "8210": 946900800.0,
            "8215": 982422912.0,
            "8220": 988640640.0,
            "8225": 966550912.0,
            "8230": 962577728.0,
            "8235": 934266880.0,
            "8240": 980121088.0,
            "8245": 976742656.0,
            "8250": 964438272.0,
            "8255": 977690240.0,
            "8260": 957215808.0,
            "8265": 982982016.0,
            "8270": 953045504.0,
            "8275": 974443520.0,
            "8280": 974709696.0,
            "8285": 953580736.0,
            "8290": 940207104.0,
            "8295": 981334400.0,
            "8300": 973592832.0,
            "8305": 978208704.0,
            "8310": 951476736.0,
            "8315": 938155648.0,
            "8320": 977608256.0,
            "8325": 968131584.0,
            "8330": 990118272.0,
            "8335": 975845952.0,
            "8340": 947797120.0,
            "8345": 971108608.0,
            "8350": 970389568.0,
            "8355": 975309696.0,
            "8360": 979959168.0,
            "8365": 933125760.0,
            "8370": 965870144.0,
            "8375": 979941504.0,
            "8380": 965457536.0,
            "8385": 972902272.0,
            "8390": 962932800.0,
            "8395": 951285504.0,
            "8400": 972582528.0,
            "8405": 951932416.0,
            "8410": 960913088.0,
            "8415": 965733120.0,
            "8420": 942024128.0,
            "8425": 968350080.0,
            "8430": 960937984.0,
            "8435": 966275200.0,
            "8440": 969976960.0,
            "8445": 953032704.0,
            "8450": 984572288.0,
            "8455": 990529984.0,
            "8460": 969012352.0,
            "8465": 967267968.0,
            "8470": 963153024.0,
            "8475": 942747136.0,
            "8480": 987456448.0,
            "8485": 979909888.0,
            "8490": 992218368.0,
            "8495": 971653952.0,
            "8500": 951453312.0,
            "8505": 983301760.0,
            "8510": 973866432.0,
            "8515": 968949184.0,
            "8520": 961717248.0,
            "8525": 945032064.0,
            "8530": 984401920.0,
            "8535": 978362240.0,
            "8540": 967715968.0,
            "8545": 968670464.0,
            "8550": 942272768.0,
            "8555": 971942272.0,
            "8560": 958429504.0,
            "8565": 975786240.0,
            "8570": 974826944.0,
            "8575": 971790976.0,
            "8580": 932055168.0,
            "8585": 965917312.0,
            "8590": 978957568.0,
            "8595": 979130816.0,
            "8600": 984002176.0,
            "8605": 958012032.0,
            "8610": 983798656.0,
            "8615": 977747648.0,
            "8620": 963337984.0,
            "8625": 979429248.0,
            "8630": 943333760.0,
            "8635": 962054912.0,
            "8640": 973241024.0,
            "8645": 970513088.0,
            "8650": 969657984.0,
            "8655": 970996480.0,
            "8660": 944544640.0,
            "8665": 986559104.0,
            "8670": 960599424.0,
            "8675": 974340096.0,
            "8680": 962877824.0,
            "8685": 955741440.0,
            "8690": 978440576.0,
            "8695": 968964672.0,
            "8700": 972962240.0,
            "8705": 973851264.0,
            "8710": 946968384.0,
            "8715": 973256064.0,
            "8720": 958203392.0,
            "8725": 978976448.0,
            "8730": 985659904.0,
            "8735": 952336768.0,
            "8740": 940581440.0,
            "8745": 987553920.0,
            "8750": 972231552.0,
            "8755": 971589760.0,
            "8760": 965322944.0,
            "8765": 935034496.0,
            "8770": 986508864.0,
            "8775": 969788416.0,
            "8780": 967404160.0,
            "8785": 961988480.0,
            "8790": 947342144.0,
            "8795": 969540800.0,
            "8800": 970839296.0,
            "8805": 973361792.0,
            "8810": 983391808.0,
            "8815": 951703552.0,
            "8820": 939856512.0,
            "8825": 964356992.0,
            "8830": 981113152.0,
            "8835": 971531776.0,
            "8840": 979490752.0,
            "8845": 951368832.0,
            "8850": 986906880.0,
            "8855": 970990464.0,
            "8860": 962109568.0,
            "8865": 957197952.0,
            "8870": 945626752.0,
            "8875": 968480512.0,
            "8880": 984062848.0,
            "8885": 971587712.0,
            "8890": 970177344.0,
            "8895": 952798592.0,
            "8900": 961704064.0,
            "8905": 976776384.0,
            "8910": 982036032.0,
            "8915": 980878848.0,
            "8920": 967569408.0,
            "8925": 940163840.0,
            "8930": 970577664.0,
            "8935": 964259456.0,
            "8940": 977594496.0,
            "8945": 981714688.0,
            "8950": 945977984.0,
            "8955": 972681344.0,
            "8960": 973207680.0,
            "8965": 973566848.0,
            "8970": 966348032.0,
            "8975": 937087360.0,
            "8980": 952802688.0,
            "8985": 977877056.0,
            "8990": 967091328.0,
            "8995": 980577152.0,
            "9000": 952173120.0,
            "9005": 950807680.0,
            "9010": 975107072.0,
            "9015": 982421376.0,
            "9020": 958921984.0,
            "9025": 979619712.0,
            "9030": 953713792.0,
            "9035": 968417408.0,
            "9040": 978206976.0,
            "9045": 968768384.0,
            "9050": 983084800.0,
            "9055": 947871872.0,
            "9060": 956280064.0,
            "9065": 970011136.0,
            "9070": 967853952.0,
            "9075": 980916864.0,
            "9080": 952196160.0,
            "9085": 971256000.0,
            "9090": 963370752.0,
            "9095": 968275072.0,
            "9100": 974460544.0,
            "9105": 959749504.0,
            "9110": 947873152.0,
            "9115": 956488704.0,
            "9120": 985580608.0,
            "9125": 963025472.0,
            "9130": 958505088.0,
            "9135": 951311040.0,
            "9140": 967061120.0,
            "9145": 976830080.0,
            "9150": 986997120.0,
            "9155": 976930304.0,
            "9160": 957827584.0,
            "9165": 950181760.0,
            "9170": 988529408.0,
            "9175": 971285120.0,
            "9180": 967484544.0,
            "9185": 955065728.0,
            "9190": 956905472.0,
            "9195": 965805952.0,
            "9200": 968609984.0,
            "9205": 967243008.0,
            "9210": 984521600.0,
            "9215": 931692928.0,
            "9220": 949709184.0,
            "9225": 970934144.0,
            "9230": 971047936.0,
            "9235": 971696896.0,
            "9240": 959609856.0,
            "9245": 963866752.0,
            "9250": 961353856.0,
            "9255": 983177472.0,
            "9260": 979159808.0,
            "9265": 952472192.0,
            "9270": 949325440.0,
            "9275": 978180928.0,
            "9280": 977863936.0,
            "9285": 962335808.0,
            "9290": 978959232.0,
            "9295": 958535552.0,
            "9300": 965616128.0,
            "9305": 969044416.0,
            "9310": 972999680.0,
            "9315": 976352576.0,
            "9320": 947984000.0,
            "9325": 979480000.0,
            "9330": 977961408.0,
            "9335": 975289920.0,
            "9340": 960088576.0,
            "9345": 943327488.0,
            "9350": 952514624.0,
            "9355": 963388864.0,
            "9360": 960489728.0,
            "9365": 983742016.0,
            "9370": 982422016.0,
            "9375": 942030720.0,
            "9380": 983055936.0,
            "9385": 985392704.0,
            "9390": 972963648.0,
            "9395": 978623872.0,
            "9400": 938136000.0,
            "9405": 968425920.0,
            "9410": 981618496.0,
            "9415": 991820352.0,
            "9420": 960342272.0,
            "9425": 956876736.0,
            "9430": 938878208.0,
            "9435": 974569728.0,
            "9440": 959602432.0,
            "9445": 974199232.0,
            "9450": 961384512.0,
            "9455": 945848000.0,
            "9460": 978146560.0,
            "9465": 987977856.0,
            "9470": 963087040.0,
            "9475": 983735936.0,
            "9480": 931197056.0,
            "9485": 987662144.0,
            "9490": 963667136.0,
            "9495": 972328000.0,
            "9500": 982451968.0,
            "9505": 969941120.0,
            "9510": 964244992.0,
            "9515": 957158080.0,
            "9520": 948370816.0,
            "9525": 965322688.0,
            "9530": 958603072.0,
            "9535": 951367040.0,
            "9540": 954254144.0,
            "9545": 979345920.0,
            "9550": 955704832.0,
            "9555": 953158784.0,
            "9560": 958148288.0,
            "9565": 969842688.0,
            "9570": 977689152.0,
            "9575": 959007424.0,
            "9580": 962802048.0,
            "9585": 945914624.0,
            "9590": 948319296.0,
            "9595": 966821760.0,
            "9600": 984637504.0,
            "9605": 985018624.0,
            "9610": 943615744.0,
            "9615": 952553984.0,
            "9620": 980919424.0,
            "9625": 978670592.0,
            "9630": 969793152.0,
            "9635": 974880576.0,
            "9640": 940403584.0,
            "9645": 962338560.0,
            "9650": 971139200.0,
            "9655": 987188544.0,
            "9660": 962729856.0,
            "9665": 949970880.0,
            "9670": 966321856.0,
            "9675": 963186560.0,
            "9680": 964929664.0,
            "9685": 986634432.0,
            "9690": 940214400.0,
            "9695": 950507520.0,
            "9700": 975627584.0,
            "9705": 972612928.0,
            "9710": 966856576.0,
            "9715": 971143616.0,
            "9720": 940513856.0,
            "9725": 965912128.0,
            "9730": 973877888.0,
            "9735": 974172608.0,
            "9740": 971110656.0,
            "9745": 950961280.0,
            "9750": 979701376.0,
            "9755": 970057856.0,
            "9760": 967854336.0,
            "9765": 963810240.0,
            "9770": 952441600.0,
            "9775": 956632576.0,
            "9780": 970278784.0,
            "9785": 958686592.0,
            "9790": 961127424.0,
            "9795": 958470784.0,
            "9800": 949352448.0,
            "9805": 962490880.0,
            "9810": 978601856.0,
            "9815": 977788800.0,
            "9820": 982581120.0,
            "9825": 939349376.0,
            "9830": 969404736.0,
            "9835": 972829568.0,
            "9840": 971145920.0,
            "9845": 967146880.0,
            "9850": 946664960.0,
            "9855": 957276608.0,
            "9860": 987617664.0,
            "9865": 970002112.0,
            "9870": 990063104.0,
            "9875": 957023168.0,
            "9880": 930919552.0,
            "9885": 963369984.0,
            "9890": 972256000.0,
            "9895": 983795200.0,
            "9900": 956852096.0,
            "9905": 938860864.0,
            "9910": 978612544.0,
            "9915": 973442944.0,
            "9920": 944400512.0,
            "9925": 962460544.0,
            "9930": 947575808.0,
            "9935": 960768768.0,
            "9940": 965991360.0,
            "9945": 958542464.0,
            "9950": 963943552.0,
            "9955": 943303040.0,
            "9960": 966928320.0,
            "9965": 983459328.0,
            "9970": 966433344.0,
            "9975": 963658880.0,
            "9980": 980524416.0,
            "9985": 942412992.0,
            "9990": 976514688.0,
            "9995": 982421696.0,
            "10000": 971874304.0,
            "10005": 969540544.0,
            "10010": 944219008.0,
            "10015": 983176832.0,
            "10020": 978159744.0,
            "10025": 979523840.0,
            "10030": 971365760.0,
            "10035": 946237248.0,
            "10040": 949943616.0,
            "10045": 977963200.0,
            "10050": 985770112.0,
            "10055": 990172800.0,
            "10060": 958786176.0,
            "10065": 947122944.0,
            "10070": 966979840.0,
            "10075": 979154176.0,
            "10080": 971521536.0,
            "10085": 974608640.0,
            "10090": 943429632.0,
            "10095": 962804608.0,
            "10100": 971998656.0,
            "10105": 975960064.0,
            "10110": 971894144.0,
            "10115": 948832640.0,
            "10120": 962580992.0,
            "10125": 973813568.0,
            "10130": 980220160.0,
            "10135": 972049792.0,
            "10140": 957743104.0,
            "10145": 933921280.0,
            "10150": 973387264.0,
            "10155": 969433024.0,
            "10160": 962035072.0,
            "10165": 974995968.0,
            "10170": 944365376.0,
            "10175": 978781568.0,
            "10180": 983454592.0,
            "10185": 978413888.0,
            "10190": 955347840.0,
            "10195": 936952960.0,
            "10200": 987896064.0,
            "10205": 972773312.0,
            "10210": 966531840.0,
            "10215": 975826432.0,
            "10220": 948246272.0,
            "10225": 950423040.0,
            "10230": 975369984.0,
            "10235": 953940928.0,
            "10240": 969345664.0,
            "10245": 961612224.0,
            "10250": 936153472.0,
            "10255": 979515648.0,
            "10260": 964372224.0,
            "10265": 967377216.0,
            "10270": 968830336.0,
            "10275": 935979648.0,
            "10280": 969560320.0,
            "10285": 995739008.0,
            "10290": 978790400.0,
            "10295": 981477056.0,
            "10300": 951900864.0,
            "10305": 971789952.0,
            "10310": 960075584.0,
            "10315": 971224192.0,
            "10320": 984960512.0,
            "10325": 983215680.0,
            "10330": 934961856.0,
            "10335": 976403328.0,
            "10340": 957454976.0,
            "10345": 973779776.0,
            "10350": 984533888.0,
            "10355": 941692160.0,
            "10360": 961896384.0,
            "10365": 974009280.0,
            "10370": 980271744.0,
            "10375": 969911424.0,
            "10380": 961242688.0,
            "10385": 955119680.0,
            "10390": 990167424.0,
            "10395": 964697344.0,
            "10400": 960800384.0,
            "10405": 949794880.0,
            "10410": 954973696.0,
            "10415": 975369536.0,
            "10420": 967024896.0,
            "10425": 969123584.0,
            "10430": 964412416.0,
            "10435": 962679232.0,
            "10440": 971555456.0,
            "10445": 972121216.0,
            "10450": 974775936.0,
            "10455": 966035712.0,
            "10460": 948282112.0,
            "10465": 971189376.0,
            "10470": 972312448.0,
            "10475": 978973184.0,
            "10480": 997033344.0,
            "10485": 949347968.0,
            "10490": 934452480.0,
            "10495": 968799488.0,
            "10500": 978096896.0,
            "10505": 958678784.0,
            "10510": 950661504.0,
            "10515": 953816064.0,
            "10520": 971619456.0,
            "10525": 969523584.0,
            "10530": 969956416.0,
            "10535": 985959616.0,
            "10540": 946655296.0,
            "10545": 970224256.0,
            "10550": 969050048.0,
            "10555": 959127040.0,
            "10560": 975928192.0,
            "10565": 961004864.0,
            "10570": 968760576.0,
            "10575": 972361920.0,
            "10580": 960450944.0,
            "10585": 973098496.0,
            "10590": 951276864.0,
            "10595": 955899584.0,
            "10600": 967134464.0,
            "10605": 985775616.0,
            "10610": 965887488.0,
            "10615": 976377984.0,
            "10620": 940772224.0,
            "10625": 964654080.0,
            "10630": 967061632.0,
            "10635": 972742464.0,
            "10640": 974269696.0,
            "10645": 948248320.0,
            "10650": 965526656.0,
            "10655": 985549248.0,
            "10660": 976126848.0,
            "10665": 966806592.0,
            "10670": 954781504.0,
            "10675": 934158336.0,
            "10680": 985955456.0,
            "10685": 990879552.0,
            "10690": 963724160.0,
            "10695": 971465600.0,
            "10700": 949488000.0,
            "10705": 977846336.0,
            "10710": 967774464.0,
            "10715": 966883840.0,
            "10720": 965741184.0,
            "10725": 943910976.0,
            "10730": 979763648.0,
            "10735": 960526080.0,
            "10740": 970887680.0,
            "10745": 984161792.0,
            "10750": 981011392.0,
            "10755": 944759616.0,
            "10760": 969277952.0,
            "10765": 972565888.0,
            "10770": 973246528.0,
            "10775": 958365120.0,
            "10780": 949088128.0,
            "10785": 953260736.0,
            "10790": 969640576.0,
            "10795": 959977984.0,
            "10800": 971914112.0,
            "10805": 950906048.0,
            "10810": 973705856.0,
            "10815": 959373952.0,
            "10820": 970980544.0,
            "10825": 966693888.0,
            "10830": 956609088.0,
            "10835": 962903104.0,
            "10840": 970530304.0,
            "10845": 963528000.0,
            "10850": 957743936.0,
            "10855": 967336832.0,
            "10860": 950516224.0,
            "10865": 963973696.0,
            "10870": 983059712.0,
            "10875": 981940800.0,
            "10880": 958105920.0,
            "10885": 954574784.0,
            "10890": 972692032.0,
            "10895": 973026752.0,
            "10900": 970016512.0,
            "10905": 964408192.0,
            "10910": 938543424.0,
            "10915": 960121856.0,
            "10920": 983112512.0,
            "10925": 969525632.0,
            "10930": 968056832.0,
            "10935": 962330112.0,
            "10940": 953695936.0,
            "10945": 964656000.0,
            "10950": 971912704.0,
            "10955": 966274688.0,
            "10960": 971239296.0,
            "10965": 965974272.0,
            "10970": 983152896.0,
            "10975": 965500992.0,
            "10980": 974347648.0,
            "10985": 986153152.0,
            "10990": 950734720.0,
            "10995": 962957632.0,
            "11000": 984645632.0,
            "11005": 978005888.0,
            "11010": 971084736.0,
            "11015": 969540928.0,
            "11020": 947443584.0,
            "11025": 959759232.0,
            "11030": 977565888.0,
            "11035": 975462656.0,
            "11040": 985919232.0,
            "11045": 956191168.0,
            "11050": 973025536.0,
            "11055": 974123904.0,
            "11060": 961535168.0,
            "11065": 984926080.0,
            "11070": 949335168.0,
            "11075": 976048448.0,
            "11080": 971612864.0,
            "11085": 966872192.0,
            "11090": 975660032.0,
            "11095": 946314240.0,
            "11100": 965521216.0,
            "11105": 973891520.0,
            "11110": 980607680.0,
            "11115": 967578496.0,
            "11120": 956988800.0,
            "11125": 956350720.0,
            "11130": 974764480.0,
            "11135": 978995136.0,
            "11140": 964107200.0,
            "11145": 965760512.0,
            "11150": 935356672.0,
            "11155": 975383808.0,
            "11160": 983591616.0,
            "11165": 982103040.0,
            "11170": 977214464.0,
            "11175": 957652544.0,
            "11180": 961923264.0,
            "11185": 971484736.0,
            "11190": 979394880.0,
            "11195": 984898496.0,
            "11200": 982384512.0,
            "11205": 941871616.0,
            "11210": 984210368.0,
            "11215": 966940032.0,
            "11220": 982933440.0,
            "11225": 961345216.0,
            "11230": 952794112.0,
            "11235": 981397952.0,
            "11240": 977288064.0,
            "11245": 965789632.0,
            "11250": 969070912.0,
            "11255": 960030016.0,
            "11260": 979480128.0,
            "11265": 963433024.0,
            "11270": 981152768.0,
            "11275": 968032896.0,
            "11280": 955902592.0,
            "11285": 953195008.0,
            "11290": 956118656.0,
            "11295": 967631296.0,
            "11300": 961921920.0,
            "11305": 958093568.0,
            "11310": 946019264.0,
            "11315": 982887104.0,
            "11320": 963966656.0,
            "11325": 980683776.0,
            "11330": 975045376.0,
            "11335": 951385408.0,
            "11340": 970287360.0,
            "11345": 969715584.0,
            "11350": 981270144.0,
            "11355": 981865920.0,
            "11360": 940830400.0,
            "11365": 970416256.0,
            "11370": 978532352.0,
            "11375": 975066432.0,
            "11380": 968255104.0,
            "11385": 958610240.0,
            "11390": 937860224.0,
            "11395": 976816384.0,
            "11400": 973094784.0,
            "11405": 960792256.0,
            "11410": 965934016.0,
            "11415": 929101632.0,
            "11420": 964656704.0,
            "11425": 980900288.0,
            "11430": 978352256.0,
            "11435": 969930496.0,
            "11440": 944960256.0,
            "11445": 974945024.0,
            "11450": 984193536.0,
            "11455": 970920192.0,
            "11460": 964788032.0,
            "11465": 959779840.0,
            "11470": 954796800.0,
            "11475": 972394944.0,
            "11480": 956111104.0,
            "11485": 976547776.0,
            "11490": 986096896.0,
            "11495": 958885632.0,
            "11500": 970159296.0,
            "11505": 963629376.0,
            "11510": 976196288.0,
            "11515": 977754432.0,
            "11520": 953592832.0,
            "11525": 975656448.0,
            "11530": 976241664.0,
            "11535": 979771648.0,
            "11540": 974225728.0,
            "11545": 953371520.0,
            "11550": 952960320.0,
            "11555": 981597184.0,
            "11560": 984420032.0,
            "11565": 965007424.0,
            "11570": 966005824.0,
            "11575": 951323968.0,
            "11580": 975949760.0,
            "11585": 976971456.0,
            "11590": 969004672.0,
            "11595": 976372352.0,
            "11600": 946117504.0,
            "11605": 972976512.0,
            "11610": 982099648.0,
            "11615": 971848576.0,
            "11620": 968932096.0,
            "11625": 948544000.0,
            "11630": 937082240.0,
            "11635": 973246784.0,
            "11640": 980778496.0,
            "11645": 979550272.0,
            "11650": 971571200.0,
            "11655": 955918720.0,
            "11660": 980377984.0,
            "11665": 957771840.0,
            "11670": 982370944.0,
            "11675": 972031296.0,
            "11680": 956038336.0,
            "11685": 982695680.0,
            "11690": 968206848.0,
            "11695": 967654080.0,
            "11700": 973386624.0,
            "11705": 955828160.0,
            "11710": 964362176.0,
            "11715": 982948608.0,
            "11720": 983559424.0,
            "11725": 965129216.0,
            "11730": 955433728.0,
            "11735": 942620864.0,
            "11740": 973654592.0,
            "11745": 970718144.0,
            "11750": 961559872.0,
            "11755": 962806464.0,
            "11760": 949884096.0,
            "11765": 983433536.0,
            "11770": 984378496.0,
            "11775": 975203712.0,
            "11780": 985170496.0,
            "11785": 946901760.0,
            "11790": 972012608.0,
            "11795": 970299840.0,
            "11800": 972807616.0,
            "11805": 986521728.0,
            "11810": 966950464.0,
            "11815": 955589632.0,
            "11820": 973517952.0,
            "11825": 970349952.0,
            "11830": 974794944.0,
            "11835": 961566464.0,
            "11840": 944378944.0,
            "11845": 980594432.0,
            "11850": 974513472.0,
            "11855": 977893120.0,
            "11860": 971389120.0,
            "11865": 938261120.0,
            "11870": 939964992.0,
            "11875": 989777216.0,
            "11880": 971971968.0,
            "11885": 962679040.0,
            "11890": 970033472.0,
            "11895": 965194944.0,
            "11900": 979072192.0,
            "11905": 961049536.0,
            "11910": 983430528.0,
            "11915": 989806592.0,
            "11920": 944166464.0,
            "11925": 993816768.0,
            "11930": 964211648.0,
            "11935": 963032704.0,
            "11940": 976558272.0,
            "11945": 944637120.0,
            "11950": 977490048.0,
            "11955": 978905216.0,
            "11960": 972040448.0,
            "11965": 975989504.0,
            "11970": 963020480.0,
            "11975": 962954624.0,
            "11980": 977423104.0,
            "11985": 953010752.0,
            "11990": 968248320.0,
            "11995": 964736704.0,
            "12000": 957960448.0,
            "12005": 973929920.0,
            "12010": 978787456.0,
            "12015": 971681344.0,
            "12020": 972846592.0,
            "12025": 934010752.0,
            "12030": 968579776.0,
            "12035": 983657024.0,
            "12040": 977184192.0,
            "12045": 981385152.0,
            "12050": 930805504.0,
            "12055": 938148672.0,
            "12060": 973965760.0,
            "12065": 965572288.0,
            "12070": 967819456.0,
            "12075": 949533120.0,
            "12080": 952959104.0,
            "12085": 972328000.0,
            "12090": 963512384.0,
            "12095": 963268736.0,
            "12100": 976624896.0,
            "12105": 950333184.0,
            "12110": 971577536.0,
            "12115": 967854912.0,
            "12120": 986356032.0,
            "12125": 980610816.0,
            "12130": 941366528.0,
            "12135": 955427904.0,
            "12140": 975803584.0,
            "12145": 979451008.0,
            "12150": 979544768.0,
            "12155": 961569664.0,
            "12160": 946185792.0,
            "12165": 968329536.0,
            "12170": 963709248.0,
            "12175": 967258816.0,
            "12180": 974632384.0,
            "12185": 952415552.0,
            "12190": 987879168.0,
            "12195": 970233024.0,
            "12200": 964549376.0,
            "12205": 968334528.0,
            "12210": 938908800.0,
            "12215": 996464640.0,
            "12220": 969469248.0,
            "12225": 979370624.0,
            "12230": 979961792.0,
            "12235": 950000896.0,
            "12240": 963697088.0,
            "12245": 965775680.0,
            "12250": 976681984.0,
            "12255": 968433280.0,
            "12260": 983664320.0,
            "12265": 931892032.0,
            "12270": 966171776.0,
            "12275": 979552512.0,
            "12280": 977688960.0,
            "12285": 970429632.0,
            "12290": 929315456.0,
            "12295": 976772032.0,
            "12300": 986049088.0,
            "12305": 969584256.0,
            "12310": 986005376.0,
            "12315": 936027456.0,
            "12320": 957642496.0,
            "12325": 966202240.0,
            "12330": 968292608.0,
            "12335": 963687360.0,
            "12340": 957247872.0,
            "12345": 944271424.0,
            "12350": 966649600.0,
            "12355": 975486080.0,
            "12360": 978634688.0,
            "12365": 964201856.0,
            "12370": 948778368.0,
            "12375": 963602944.0,
            "12380": 964688384.0,
            "12385": 972793664.0,
            "12390": 961485760.0,
            "12395": 961271168.0,
            "12400": 975342592.0,
            "12405": 976299904.0,
            "12410": 953299008.0,
            "12415": 962706688.0,
            "12420": 944036736.0,
            "12425": 948974592.0,
            "12430": 972069376.0,
            "12435": 968670592.0,
            "12440": 961658048.0,
            "12445": 951909632.0,
            "12450": 947131008.0,
            "12455": 980976832.0,
            "12460": 973901696.0,
            "12465": 954326144.0,
            "12470": 980931712.0,
            "12475": 958062720.0,
            "12480": 966837376.0,
            "12485": 978056896.0,
            "12490": 973895936.0,
            "12495": 969407936.0,
            "12500": 961141120.0,
            "12505": 943213824.0,
            "12510": 960766080.0,
            "12515": 969638080.0,
            "12520": 973971200.0,
            "12525": 972228096.0,
            "12530": 944434304.0,
            "12535": 976337728.0,
            "12540": 965435392.0,
            "12545": 971699456.0,
            "12550": 969423872.0,
            "12555": 940893248.0,
            "12560": 963999680.0,
            "12565": 947261568.0,
            "12570": 974171520.0,
            "12575": 962966784.0,
            "12580": 957904960.0,
            "12585": 964310592.0,
            "12590": 965663040.0,
            "12595": 978496832.0,
            "12600": 981415744.0,
            "12605": 949112192.0,
            "12610": 937705728.0,
            "12615": 962690944.0,
            "12620": 960966784.0,
            "12625": 966271936.0,
            "12630": 970736512.0,
            "12635": 962160064.0,
            "12640": 978256960.0,
            "12645": 969491328.0,
            "12650": 970048960.0,
            "12655": 964006464.0,
            "12660": 932254144.0,
            "12665": 956728320.0,
            "12670": 986153664.0,
            "12675": 965503808.0,
            "12680": 960963200.0,
            "12685": 951228032.0,
            "12690": 945266304.0,
            "12695": 978347904.0,
            "12700": 985300224.0,
            "12705": 958770112.0,
            "12710": 968391296.0,
            "12715": 956547968.0,
            "12720": "nan",
            "12725": "nan",
            "12730": "nan",
            "12735": "nan",
            "12740": "nan",
            "12745": "nan",
            "12750": "nan",
            "12755": "nan",
            "12760": "nan",
            "12765": "nan",
            "12770": "nan",
            "12775": "nan",
            "12780": "nan",
            "12785": "nan",
            "12790": "nan",
            "12795": "nan",
            "12800": "nan",
            "12805": "nan",
            "12810": "nan",
            "12815": "nan",
            "12820": "nan",
            "12825": "nan",
            "12830": "nan",
            "12835": "nan",
            "12840": "nan",
            "12845": "nan",
            "12850": "nan",
            "12855": "nan",
            "12860": "nan",
            "12865": "nan",
            "12870": "nan",
            "12875": "nan",
            "12880": "nan",
            "12885": "nan",
            "12890": "nan",
            "12895": "nan",
            "12900": "nan",
            "12905": "nan",
            "12910": "nan",
            "12915": "nan",
            "12920": "nan",
            "12925": "nan",
            "12930": "nan",
            "12935": "nan",
            "12940": "nan",
            "12945": "nan",
            "12950": "nan",
            "12955": "nan",
            "12960": "nan",
            "12965": "nan",
            "12970": "nan",
            "12975": "nan",
            "12980": "nan",
            "12985": "nan",
            "12990": "nan",
            "12995": "nan",
            "13000": "nan"
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 13000,
        "step_interval": 5,
        "values": {
            "1": 13268858880.0,
            "5": 13268858880.0,
            "10": 13268858880.0,
            "15": 13268858880.0,
            "20": 13268858880.0,
            "25": 13268858880.0,
            "30": 13268858880.0,
            "35": 13268858880.0,
            "40": 13268858880.0,
            "45": 13268858880.0,
            "50": 13268858880.0,
            "55": 13268858880.0,
            "60": 13268858880.0,
            "65": 13268858880.0,
            "70": 13268858880.0,
            "75": 13268858880.0,
            "80": 13268858880.0,
            "85": 13268858880.0,
            "90": 13268858880.0,
            "95": 13268858880.0,
            "100": 13268858880.0,
            "105": 13268858880.0,
            "110": 13268858880.0,
            "115": 13268858880.0,
            "120": 13268858880.0,
            "125": 13268858880.0,
            "130": 13268858880.0,
            "135": 13268858880.0,
            "140": 13268858880.0,
            "145": 13268858880.0,
            "150": 13268858880.0,
            "155": 13268858880.0,
            "160": 13268858880.0,
            "165": 13268858880.0,
            "170": 13268858880.0,
            "175": 13268858880.0,
            "180": 13268858880.0,
            "185": 13268858880.0,
            "190": 13268858880.0,
            "195": 13268858880.0,
            "200": 13268858880.0,
            "205": 13268858880.0,
            "210": 13268858880.0,
            "215": 13268858880.0,
            "220": 13268858880.0,
            "225": 13268858880.0,
            "230": 13268858880.0,
            "235": 13268858880.0,
            "240": 13268858880.0,
            "245": 13268858880.0,
            "250": 13268858880.0,
            "255": 13268858880.0,
            "260": 13268858880.0,
            "265": 13268858880.0,
            "270": 13268858880.0,
            "275": 13268858880.0,
            "280": 13268858880.0,
            "285": 13268858880.0,
            "290": 13268858880.0,
            "295": 13268858880.0,
            "300": 13268858880.0,
            "305": 13268858880.0,
            "310": 13268858880.0,
            "315": 13268858880.0,
            "320": 13268858880.0,
            "325": 13268858880.0,
            "330": 13268858880.0,
            "335": 13268858880.0,
            "340": 13268858880.0,
            "345": 13268858880.0,
            "350": 13268858880.0,
            "355": 13268858880.0,
            "360": 13268858880.0,
            "365": 13268858880.0,
            "370": 13268858880.0,
            "375": 13268858880.0,
            "380": 13268858880.0,
            "385": 13268858880.0,
            "390": 13268858880.0,
            "395": 13268858880.0,
            "400": 13268858880.0,
            "405": 13268858880.0,
            "410": 13268858880.0,
            "415": 13268858880.0,
            "420": 13268858880.0,
            "425": 13268858880.0,
            "430": 13268858880.0,
            "435": 13268858880.0,
            "440": 13268858880.0,
            "445": 13268858880.0,
            "450": 13268858880.0,
            "455": 13268858880.0,
            "460": 13268858880.0,
            "465": 13268858880.0,
            "470": 13268858880.0,
            "475": 13268858880.0,
            "480": 13268858880.0,
            "485": 13268858880.0,
            "490": 13268858880.0,
            "495": 13268858880.0,
            "500": 13268858880.0,
            "505": 13268858880.0,
            "510": 13268858880.0,
            "515": 13268858880.0,
            "520": 13268858880.0,
            "525": 13268858880.0,
            "530": 13268858880.0,
            "535": 13268858880.0,
            "540": 13268858880.0,
            "545": 13268858880.0,
            "550": 13268858880.0,
            "555": 13268858880.0,
            "560": 13268858880.0,
            "565": 13268858880.0,
            "570": 13268858880.0,
            "575": 13268858880.0,
            "580": 13268858880.0,
            "585": 13268858880.0,
            "590": 13268858880.0,
            "595": 13268858880.0,
            "600": 13268858880.0,
            "605": 13268858880.0,
            "610": 13268858880.0,
            "615": 13268858880.0,
            "620": 13268858880.0,
            "625": 13268858880.0,
            "630": 13268858880.0,
            "635": 13268858880.0,
            "640": 13268858880.0,
            "645": 13268858880.0,
            "650": 13268858880.0,
            "655": 13268858880.0,
            "660": 13268858880.0,
            "665": 13268858880.0,
            "670": 13268858880.0,
            "675": 13268858880.0,
            "680": 13268858880.0,
            "685": 13268858880.0,
            "690": 13268858880.0,
            "695": 13268858880.0,
            "700": 13268858880.0,
            "705": 13268858880.0,
            "710": 13268858880.0,
            "715": 13268858880.0,
            "720": 13268858880.0,
            "725": 13268858880.0,
            "730": 13268858880.0,
            "735": 13268858880.0,
            "740": 13268858880.0,
            "745": 13268858880.0,
            "750": 13268858880.0,
            "755": 13268858880.0,
            "760": 13268858880.0,
            "765": 13268858880.0,
            "770": 13268858880.0,
            "775": 13268858880.0,
            "780": 13268858880.0,
            "785": 13268858880.0,
            "790": 13268858880.0,
            "795": 13268858880.0,
            "800": 13268858880.0,
            "805": 13268858880.0,
            "810": 13268858880.0,
            "815": 13268858880.0,
            "820": 13268858880.0,
            "825": 13268858880.0,
            "830": 13268858880.0,
            "835": 13268858880.0,
            "840": 13268858880.0,
            "845": 13268858880.0,
            "850": 13268858880.0,
            "855": 13268858880.0,
            "860": 13268858880.0,
            "865": 13268858880.0,
            "870": 13268858880.0,
            "875": 13268858880.0,
            "880": 13268858880.0,
            "885": 13268858880.0,
            "890": 13268858880.0,
            "895": 13268858880.0,
            "900": 13268858880.0,
            "905": 13268858880.0,
            "910": 13268858880.0,
            "915": 13268858880.0,
            "920": 13268858880.0,
            "925": 13268858880.0,
            "930": 13268858880.0,
            "935": 13268858880.0,
            "940": 13268858880.0,
            "945": 13268858880.0,
            "950": 13268858880.0,
            "955": 13268858880.0,
            "960": 13268858880.0,
            "965": 13268858880.0,
            "970": 13268858880.0,
            "975": 13268858880.0,
            "980": 13268858880.0,
            "985": 13268858880.0,
            "990": 13268858880.0,
            "995": 13268858880.0,
            "1000": 13268858880.0,
            "1005": 13268858880.0,
            "1010": 13268858880.0,
            "1015": 13268858880.0,
            "1020": 13268858880.0,
            "1025": 13268858880.0,
            "1030": 13268858880.0,
            "1035": 13268858880.0,
            "1040": 13268858880.0,
            "1045": 13268858880.0,
            "1050": 13268858880.0,
            "1055": 13268858880.0,
            "1060": 13268858880.0,
            "1065": 13268858880.0,
            "1070": 13268858880.0,
            "1075": 13268858880.0,
            "1080": 13268858880.0,
            "1085": 13268858880.0,
            "1090": 13268858880.0,
            "1095": 13268858880.0,
            "1100": 13268858880.0,
            "1105": 13268858880.0,
            "1110": 13268858880.0,
            "1115": 13268858880.0,
            "1120": 13268858880.0,
            "1125": 13268858880.0,
            "1130": 13268858880.0,
            "1135": 13268858880.0,
            "1140": 13268858880.0,
            "1145": 13268858880.0,
            "1150": 13268858880.0,
            "1155": 13268858880.0,
            "1160": 13268858880.0,
            "1165": 13268858880.0,
            "1170": 13268858880.0,
            "1175": 13268858880.0,
            "1180": 13268858880.0,
            "1185": 13268858880.0,
            "1190": 13268858880.0,
            "1195": 13268858880.0,
            "1200": 13268858880.0,
            "1205": 13268858880.0,
            "1210": 13268858880.0,
            "1215": 13268858880.0,
            "1220": 13268858880.0,
            "1225": 13268858880.0,
            "1230": 13268858880.0,
            "1235": 13268858880.0,
            "1240": 13268858880.0,
            "1245": 13268858880.0,
            "1250": 13268858880.0,
            "1255": 13268858880.0,
            "1260": 13268858880.0,
            "1265": 13268858880.0,
            "1270": 13268858880.0,
            "1275": 13268858880.0,
            "1280": 13268858880.0,
            "1285": 13268858880.0,
            "1290": 13268858880.0,
            "1295": 13268858880.0,
            "1300": 13268858880.0,
            "1305": 13268858880.0,
            "1310": 13268858880.0,
            "1315": 13268858880.0,
            "1320": 13268858880.0,
            "1325": 13268858880.0,
            "1330": 13268858880.0,
            "1335": 13268858880.0,
            "1340": 13268858880.0,
            "1345": 13268858880.0,
            "1350": 13268858880.0,
            "1355": 13268858880.0,
            "1360": 13268858880.0,
            "1365": 13268858880.0,
            "1370": 13268858880.0,
            "1375": 13268858880.0,
            "1380": 13268858880.0,
            "1385": 13268858880.0,
            "1390": 13268858880.0,
            "1395": 13268858880.0,
            "1400": 13268858880.0,
            "1405": 13268858880.0,
            "1410": 13268858880.0,
            "1415": 13268858880.0,
            "1420": 13268858880.0,
            "1425": 13268858880.0,
            "1430": 13268858880.0,
            "1435": 13268858880.0,
            "1440": 13268858880.0,
            "1445": 13268858880.0,
            "1450": 13268858880.0,
            "1455": 13268858880.0,
            "1460": 13268858880.0,
            "1465": 13268858880.0,
            "1470": 13268858880.0,
            "1475": 13268858880.0,
            "1480": 13268858880.0,
            "1485": 13268858880.0,
            "1490": 13268858880.0,
            "1495": 13268858880.0,
            "1500": 13268858880.0,
            "1505": 13268858880.0,
            "1510": 13268858880.0,
            "1515": 13268858880.0,
            "1520": 13268858880.0,
            "1525": 13268858880.0,
            "1530": 13268858880.0,
            "1535": 13268858880.0,
            "1540": 13268858880.0,
            "1545": 13268858880.0,
            "1550": 13268858880.0,
            "1555": 13268858880.0,
            "1560": 13268858880.0,
            "1565": 13268858880.0,
            "1570": 13268858880.0,
            "1575": 13268858880.0,
            "1580": 13268858880.0,
            "1585": 13268858880.0,
            "1590": 13268858880.0,
            "1595": 13268858880.0,
            "1600": 13268858880.0,
            "1605": 13268858880.0,
            "1610": 13268858880.0,
            "1615": 13268858880.0,
            "1620": 13268858880.0,
            "1625": 13268858880.0,
            "1630": 13268858880.0,
            "1635": 13268858880.0,
            "1640": 13268858880.0,
            "1645": 13268858880.0,
            "1650": 13268858880.0,
            "1655": 13268858880.0,
            "1660": 13268858880.0,
            "1665": 13268858880.0,
            "1670": 13268858880.0,
            "1675": 13268858880.0,
            "1680": 13268858880.0,
            "1685": 13268858880.0,
            "1690": 13268858880.0,
            "1695": 13268858880.0,
            "1700": 13268858880.0,
            "1705": 13268858880.0,
            "1710": 13268858880.0,
            "1715": 13268858880.0,
            "1720": 13268858880.0,
            "1725": 13268858880.0,
            "1730": 13268858880.0,
            "1735": 13268858880.0,
            "1740": 13268858880.0,
            "1745": 13268858880.0,
            "1750": 13268858880.0,
            "1755": 13268858880.0,
            "1760": 13268858880.0,
            "1765": 13268858880.0,
            "1770": 13268858880.0,
            "1775": 13268858880.0,
            "1780": 13268858880.0,
            "1785": 13268858880.0,
            "1790": 13268858880.0,
            "1795": 13268858880.0,
            "1800": 13268858880.0,
            "1805": 13268858880.0,
            "1810": 13268858880.0,
            "1815": 13268858880.0,
            "1820": 13268858880.0,
            "1825": 13268858880.0,
            "1830": 13268858880.0,
            "1835": 13268858880.0,
            "1840": 13268858880.0,
            "1845": 13268858880.0,
            "1850": 13268858880.0,
            "1855": 13268858880.0,
            "1860": 13268858880.0,
            "1865": 13268858880.0,
            "1870": 13268858880.0,
            "1875": 13268858880.0,
            "1880": 13268858880.0,
            "1885": 13268858880.0,
            "1890": 13268858880.0,
            "1895": 13268858880.0,
            "1900": 13268858880.0,
            "1905": 13268858880.0,
            "1910": 13268858880.0,
            "1915": 13268858880.0,
            "1920": 13268858880.0,
            "1925": 13268858880.0,
            "1930": 13268858880.0,
            "1935": 13268858880.0,
            "1940": 13268858880.0,
            "1945": 13268858880.0,
            "1950": 13268858880.0,
            "1955": 13268858880.0,
            "1960": 13268858880.0,
            "1965": 13268858880.0,
            "1970": 13268858880.0,
            "1975": 13268858880.0,
            "1980": 13268858880.0,
            "1985": 13268858880.0,
            "1990": 13268858880.0,
            "1995": 13268858880.0,
            "2000": 13268858880.0,
            "2005": 13268858880.0,
            "2010": 13268858880.0,
            "2015": 13268858880.0,
            "2020": 13268858880.0,
            "2025": 13268858880.0,
            "2030": 13268858880.0,
            "2035": 13268858880.0,
            "2040": 13268858880.0,
            "2045": 13268858880.0,
            "2050": 13268858880.0,
            "2055": 13268858880.0,
            "2060": 13268858880.0,
            "2065": 13268858880.0,
            "2070": 13268858880.0,
            "2075": 13268858880.0,
            "2080": 13268858880.0,
            "2085": 13268858880.0,
            "2090": 13268858880.0,
            "2095": 13268858880.0,
            "2100": 13268858880.0,
            "2105": 13268858880.0,
            "2110": 13268858880.0,
            "2115": 13268858880.0,
            "2120": 13268858880.0,
            "2125": 13268858880.0,
            "2130": 13268858880.0,
            "2135": 13268858880.0,
            "2140": 13268858880.0,
            "2145": 13268858880.0,
            "2150": 13268858880.0,
            "2155": 13268858880.0,
            "2160": 13268858880.0,
            "2165": 13268858880.0,
            "2170": 13268858880.0,
            "2175": 13268858880.0,
            "2180": 13268858880.0,
            "2185": 13268858880.0,
            "2190": 13268858880.0,
            "2195": 13268858880.0,
            "2200": 13268858880.0,
            "2205": 13268858880.0,
            "2210": 13268858880.0,
            "2215": 13268858880.0,
            "2220": 13268858880.0,
            "2225": 13268858880.0,
            "2230": 13268858880.0,
            "2235": 13268858880.0,
            "2240": 13268858880.0,
            "2245": 13268858880.0,
            "2250": 13268858880.0,
            "2255": 13268858880.0,
            "2260": 13268858880.0,
            "2265": 13268858880.0,
            "2270": 13268858880.0,
            "2275": 13268858880.0,
            "2280": 13268858880.0,
            "2285": 13268858880.0,
            "2290": 13268858880.0,
            "2295": 13268858880.0,
            "2300": 13268858880.0,
            "2305": 13268858880.0,
            "2310": 13268858880.0,
            "2315": 13268858880.0,
            "2320": 13268858880.0,
            "2325": 13268858880.0,
            "2330": 13268858880.0,
            "2335": 13268858880.0,
            "2340": 13268858880.0,
            "2345": 13268858880.0,
            "2350": 13268858880.0,
            "2355": 13268858880.0,
            "2360": 13268858880.0,
            "2365": 13268858880.0,
            "2370": 13268858880.0,
            "2375": 13268858880.0,
            "2380": 13268858880.0,
            "2385": 13268858880.0,
            "2390": 13268858880.0,
            "2395": 13268858880.0,
            "2400": 13268858880.0,
            "2405": 13268858880.0,
            "2410": 13268858880.0,
            "2415": 13268858880.0,
            "2420": 13268858880.0,
            "2425": 13268858880.0,
            "2430": 13268858880.0,
            "2435": 13268858880.0,
            "2440": 13268858880.0,
            "2445": 13268858880.0,
            "2450": 13268858880.0,
            "2455": 13268858880.0,
            "2460": 13268858880.0,
            "2465": 13268858880.0,
            "2470": 13268858880.0,
            "2475": 13268858880.0,
            "2480": 13268858880.0,
            "2485": 13268858880.0,
            "2490": 13268858880.0,
            "2495": 13268858880.0,
            "2500": 13268858880.0,
            "2505": 13268858880.0,
            "2510": 13268858880.0,
            "2515": 13268858880.0,
            "2520": 13268858880.0,
            "2525": 13268858880.0,
            "2530": 13268858880.0,
            "2535": 13268858880.0,
            "2540": 13268858880.0,
            "2545": 13268858880.0,
            "2550": 13268858880.0,
            "2555": 13268858880.0,
            "2560": 13268858880.0,
            "2565": 13268858880.0,
            "2570": 13268858880.0,
            "2575": 13268858880.0,
            "2580": 13268858880.0,
            "2585": 13268858880.0,
            "2590": 13268858880.0,
            "2595": 13268858880.0,
            "2600": 13268858880.0,
            "2605": 13268858880.0,
            "2610": 13268858880.0,
            "2615": 13268858880.0,
            "2620": 13268858880.0,
            "2625": 13268858880.0,
            "2630": 13268858880.0,
            "2635": 13268858880.0,
            "2640": 13268858880.0,
            "2645": 13268858880.0,
            "2650": 13268858880.0,
            "2655": 13268858880.0,
            "2660": 13268858880.0,
            "2665": 13268858880.0,
            "2670": 13268858880.0,
            "2675": 13268858880.0,
            "2680": 13268858880.0,
            "2685": 13268858880.0,
            "2690": 13268858880.0,
            "2695": 13268858880.0,
            "2700": 13268858880.0,
            "2705": 13268858880.0,
            "2710": 13268858880.0,
            "2715": 13268858880.0,
            "2720": 13268858880.0,
            "2725": 13268858880.0,
            "2730": 13268858880.0,
            "2735": 13268858880.0,
            "2740": 13268858880.0,
            "2745": 13268858880.0,
            "2750": 13268858880.0,
            "2755": 13268858880.0,
            "2760": 13268858880.0,
            "2765": 13268858880.0,
            "2770": 13268858880.0,
            "2775": 13268858880.0,
            "2780": 13268858880.0,
            "2785": 13268858880.0,
            "2790": 13268858880.0,
            "2795": 13268858880.0,
            "2800": 13268858880.0,
            "2805": 13268858880.0,
            "2810": 13268858880.0,
            "2815": 13268858880.0,
            "2820": 13268858880.0,
            "2825": 13268858880.0,
            "2830": 13268858880.0,
            "2835": 13268858880.0,
            "2840": 13268858880.0,
            "2845": 13268858880.0,
            "2850": 13268858880.0,
            "2855": 13268858880.0,
            "2860": 13268858880.0,
            "2865": 13268858880.0,
            "2870": 13268858880.0,
            "2875": 13268858880.0,
            "2880": 13268858880.0,
            "2885": 13268858880.0,
            "2890": 13268858880.0,
            "2895": 13268858880.0,
            "2900": 13268858880.0,
            "2905": 13268858880.0,
            "2910": 13268858880.0,
            "2915": 13268858880.0,
            "2920": 13268858880.0,
            "2925": 13268858880.0,
            "2930": 13268858880.0,
            "2935": 13268858880.0,
            "2940": 13268858880.0,
            "2945": 13268858880.0,
            "2950": 13268858880.0,
            "2955": 13268858880.0,
            "2960": 13268858880.0,
            "2965": 13268858880.0,
            "2970": 13268858880.0,
            "2975": 13268858880.0,
            "2980": 13268858880.0,
            "2985": 13268858880.0,
            "2990": 13268858880.0,
            "2995": 13268858880.0,
            "3000": 13268858880.0,
            "3005": 13268858880.0,
            "3010": 13268858880.0,
            "3015": 13268858880.0,
            "3020": 13268858880.0,
            "3025": 13268858880.0,
            "3030": 13268858880.0,
            "3035": 13268858880.0,
            "3040": 13268858880.0,
            "3045": 13268858880.0,
            "3050": 13268858880.0,
            "3055": 13268858880.0,
            "3060": 13268858880.0,
            "3065": 13268858880.0,
            "3070": 13268858880.0,
            "3075": 13268858880.0,
            "3080": 13268858880.0,
            "3085": 13268858880.0,
            "3090": 13268858880.0,
            "3095": 13268858880.0,
            "3100": 13268858880.0,
            "3105": 13268858880.0,
            "3110": 13268858880.0,
            "3115": 13268858880.0,
            "3120": 13268858880.0,
            "3125": 13268858880.0,
            "3130": 13268858880.0,
            "3135": 13268858880.0,
            "3140": 13268858880.0,
            "3145": 13268858880.0,
            "3150": 13268858880.0,
            "3155": 13268858880.0,
            "3160": 13268858880.0,
            "3165": 13268858880.0,
            "3170": 13268858880.0,
            "3175": 13268858880.0,
            "3180": 13268858880.0,
            "3185": 13268858880.0,
            "3190": 13268858880.0,
            "3195": 13268858880.0,
            "3200": 13268858880.0,
            "3205": 13268858880.0,
            "3210": 13268858880.0,
            "3215": 13268858880.0,
            "3220": 13268858880.0,
            "3225": 13268858880.0,
            "3230": 13268858880.0,
            "3235": 13268858880.0,
            "3240": 13268858880.0,
            "3245": 13268858880.0,
            "3250": 13268858880.0,
            "3255": 13268858880.0,
            "3260": 13268858880.0,
            "3265": 13268858880.0,
            "3270": 13268858880.0,
            "3275": 13268858880.0,
            "3280": 13268858880.0,
            "3285": 13268858880.0,
            "3290": 13268858880.0,
            "3295": 13268858880.0,
            "3300": 13268858880.0,
            "3305": 13268858880.0,
            "3310": 13268858880.0,
            "3315": 13268858880.0,
            "3320": 13268858880.0,
            "3325": 13268858880.0,
            "3330": 13268858880.0,
            "3335": 13268858880.0,
            "3340": 13268858880.0,
            "3345": 13268858880.0,
            "3350": 13268858880.0,
            "3355": 13268858880.0,
            "3360": 13268858880.0,
            "3365": 13268858880.0,
            "3370": 13268858880.0,
            "3375": 13268858880.0,
            "3380": 13268858880.0,
            "3385": 13268858880.0,
            "3390": 13268858880.0,
            "3395": 13268858880.0,
            "3400": 13268858880.0,
            "3405": 13268858880.0,
            "3410": 13268858880.0,
            "3415": 13268858880.0,
            "3420": 13268858880.0,
            "3425": 13268858880.0,
            "3430": 13268858880.0,
            "3435": 13268858880.0,
            "3440": 13268858880.0,
            "3445": 13268858880.0,
            "3450": 13268858880.0,
            "3455": 13268858880.0,
            "3460": 13268858880.0,
            "3465": 13268858880.0,
            "3470": 13268858880.0,
            "3475": 13268858880.0,
            "3480": 13268858880.0,
            "3485": 13268858880.0,
            "3490": 13268858880.0,
            "3495": 13268858880.0,
            "3500": 13268858880.0,
            "3505": 13268858880.0,
            "3510": 13268858880.0,
            "3515": 13268858880.0,
            "3520": 13268858880.0,
            "3525": 13268858880.0,
            "3530": 13268858880.0,
            "3535": 13268858880.0,
            "3540": 13268858880.0,
            "3545": 13268858880.0,
            "3550": 13268858880.0,
            "3555": 13268858880.0,
            "3560": 13268858880.0,
            "3565": 13268858880.0,
            "3570": 13268858880.0,
            "3575": 13268858880.0,
            "3580": 13268858880.0,
            "3585": 13268858880.0,
            "3590": 13268858880.0,
            "3595": 13268858880.0,
            "3600": 13268858880.0,
            "3605": 13268858880.0,
            "3610": 13268858880.0,
            "3615": 13268858880.0,
            "3620": 13268858880.0,
            "3625": 13268858880.0,
            "3630": 13268858880.0,
            "3635": 13268858880.0,
            "3640": 13268858880.0,
            "3645": 13268858880.0,
            "3650": 13268858880.0,
            "3655": 13268858880.0,
            "3660": 13268858880.0,
            "3665": 13268858880.0,
            "3670": 13268858880.0,
            "3675": 13268858880.0,
            "3680": 13268858880.0,
            "3685": 13268858880.0,
            "3690": 13268858880.0,
            "3695": 13268858880.0,
            "3700": 13268858880.0,
            "3705": 13268858880.0,
            "3710": 13268858880.0,
            "3715": 13268858880.0,
            "3720": 13268858880.0,
            "3725": 13268858880.0,
            "3730": 13268858880.0,
            "3735": 13268858880.0,
            "3740": 13268858880.0,
            "3745": 13268858880.0,
            "3750": 13268858880.0,
            "3755": 13268858880.0,
            "3760": 13268858880.0,
            "3765": 13268858880.0,
            "3770": 13268858880.0,
            "3775": 13268858880.0,
            "3780": 13268858880.0,
            "3785": 13268858880.0,
            "3790": 13268858880.0,
            "3795": 13268858880.0,
            "3800": 13268858880.0,
            "3805": 13268858880.0,
            "3810": 13268858880.0,
            "3815": 13268858880.0,
            "3820": 13268858880.0,
            "3825": 13268858880.0,
            "3830": 13268858880.0,
            "3835": 13268858880.0,
            "3840": 13268858880.0,
            "3845": 13268858880.0,
            "3850": 13268858880.0,
            "3855": 13268858880.0,
            "3860": 13268858880.0,
            "3865": 13268858880.0,
            "3870": 13268858880.0,
            "3875": 13268858880.0,
            "3880": 13268858880.0,
            "3885": 13268858880.0,
            "3890": 13268858880.0,
            "3895": 13268858880.0,
            "3900": 13268858880.0,
            "3905": 13268858880.0,
            "3910": 13268858880.0,
            "3915": 13268858880.0,
            "3920": 13268858880.0,
            "3925": 13268858880.0,
            "3930": 13268858880.0,
            "3935": 13268858880.0,
            "3940": 13268858880.0,
            "3945": 13268858880.0,
            "3950": 13268858880.0,
            "3955": 13268858880.0,
            "3960": 13268858880.0,
            "3965": 13268858880.0,
            "3970": 13268858880.0,
            "3975": 13268858880.0,
            "3980": 13268858880.0,
            "3985": 13268858880.0,
            "3990": 13268858880.0,
            "3995": 13268858880.0,
            "4000": 13268858880.0,
            "4005": 13268858880.0,
            "4010": 13268858880.0,
            "4015": 13268858880.0,
            "4020": 13268858880.0,
            "4025": 13268858880.0,
            "4030": 13268858880.0,
            "4035": 13268858880.0,
            "4040": 13268858880.0,
            "4045": 13268858880.0,
            "4050": 13268858880.0,
            "4055": 13268858880.0,
            "4060": 13268858880.0,
            "4065": 13268858880.0,
            "4070": 13268858880.0,
            "4075": 13268858880.0,
            "4080": 13268858880.0,
            "4085": 13268858880.0,
            "4090": 13268858880.0,
            "4095": 13268858880.0,
            "4100": 13268858880.0,
            "4105": 13268858880.0,
            "4110": 13268858880.0,
            "4115": 13268858880.0,
            "4120": 13268858880.0,
            "4125": 13268858880.0,
            "4130": 13268858880.0,
            "4135": 13268858880.0,
            "4140": 13268858880.0,
            "4145": 13268858880.0,
            "4150": 13268858880.0,
            "4155": 13268858880.0,
            "4160": 13268858880.0,
            "4165": 13268858880.0,
            "4170": 13268858880.0,
            "4175": 13268858880.0,
            "4180": 13268858880.0,
            "4185": 13268858880.0,
            "4190": 13268858880.0,
            "4195": 13268858880.0,
            "4200": 13268858880.0,
            "4205": 13268858880.0,
            "4210": 13268858880.0,
            "4215": 13268858880.0,
            "4220": 13268858880.0,
            "4225": 13268858880.0,
            "4230": 13268858880.0,
            "4235": 13268858880.0,
            "4240": 13268858880.0,
            "4245": 13268858880.0,
            "4250": 13268858880.0,
            "4255": 13268858880.0,
            "4260": 13268858880.0,
            "4265": 13268858880.0,
            "4270": 13268858880.0,
            "4275": 13268858880.0,
            "4280": 13268858880.0,
            "4285": 13268858880.0,
            "4290": 13268858880.0,
            "4295": 13268858880.0,
            "4300": 13268858880.0,
            "4305": 13268858880.0,
            "4310": 13268858880.0,
            "4315": 13268858880.0,
            "4320": 13268858880.0,
            "4325": 13268858880.0,
            "4330": 13268858880.0,
            "4335": 13268858880.0,
            "4340": 13268858880.0,
            "4345": 13268858880.0,
            "4350": 13268858880.0,
            "4355": 13268858880.0,
            "4360": 13268858880.0,
            "4365": 13268858880.0,
            "4370": 13268858880.0,
            "4375": 13268858880.0,
            "4380": 13268858880.0,
            "4385": 13268858880.0,
            "4390": 13268858880.0,
            "4395": 13268858880.0,
            "4400": 13268858880.0,
            "4405": 13268858880.0,
            "4410": 13268858880.0,
            "4415": 13268858880.0,
            "4420": 13268858880.0,
            "4425": 13268858880.0,
            "4430": 13268858880.0,
            "4435": 13268858880.0,
            "4440": 13268858880.0,
            "4445": 13268858880.0,
            "4450": 13268858880.0,
            "4455": 13268858880.0,
            "4460": 13268858880.0,
            "4465": 13268858880.0,
            "4470": 13268858880.0,
            "4475": 13268858880.0,
            "4480": 13268858880.0,
            "4485": 13268858880.0,
            "4490": 13268858880.0,
            "4495": 13268858880.0,
            "4500": 13268858880.0,
            "4505": 13268858880.0,
            "4510": 13268858880.0,
            "4515": 13268858880.0,
            "4520": 13268858880.0,
            "4525": 13268858880.0,
            "4530": 13268858880.0,
            "4535": 13268858880.0,
            "4540": 13268858880.0,
            "4545": 13268858880.0,
            "4550": 13268858880.0,
            "4555": 13268858880.0,
            "4560": 13268858880.0,
            "4565": 13268858880.0,
            "4570": 13268858880.0,
            "4575": 13268858880.0,
            "4580": 13268858880.0,
            "4585": 13268858880.0,
            "4590": 13268858880.0,
            "4595": 13268858880.0,
            "4600": 13268858880.0,
            "4605": 13268858880.0,
            "4610": 13268858880.0,
            "4615": 13268858880.0,
            "4620": 13268858880.0,
            "4625": 13268858880.0,
            "4630": 13268858880.0,
            "4635": 13268858880.0,
            "4640": 13268858880.0,
            "4645": 13268858880.0,
            "4650": 13268858880.0,
            "4655": 13268858880.0,
            "4660": 13268858880.0,
            "4665": 13268858880.0,
            "4670": 13268858880.0,
            "4675": 13268858880.0,
            "4680": 13268858880.0,
            "4685": 13268858880.0,
            "4690": 13268858880.0,
            "4695": 13268858880.0,
            "4700": 13268858880.0,
            "4705": 13268858880.0,
            "4710": 13268858880.0,
            "4715": 13268858880.0,
            "4720": 13268858880.0,
            "4725": 13268858880.0,
            "4730": 13268858880.0,
            "4735": 13268858880.0,
            "4740": 13268858880.0,
            "4745": 13268858880.0,
            "4750": 13268858880.0,
            "4755": 13268858880.0,
            "4760": 13268858880.0,
            "4765": 13268858880.0,
            "4770": 13268858880.0,
            "4775": 13268858880.0,
            "4780": 13268858880.0,
            "4785": 13268858880.0,
            "4790": 13268858880.0,
            "4795": 13268858880.0,
            "4800": 13268858880.0,
            "4805": 13268858880.0,
            "4810": 13268858880.0,
            "4815": 13268858880.0,
            "4820": 13268858880.0,
            "4825": 13268858880.0,
            "4830": 13268858880.0,
            "4835": 13268858880.0,
            "4840": 13268858880.0,
            "4845": 13268858880.0,
            "4850": 13268858880.0,
            "4855": 13268858880.0,
            "4860": 13268858880.0,
            "4865": 13268858880.0,
            "4870": 13268858880.0,
            "4875": 13268858880.0,
            "4880": 13268858880.0,
            "4885": 13268858880.0,
            "4890": 13268858880.0,
            "4895": 13268858880.0,
            "4900": 13268858880.0,
            "4905": 13268858880.0,
            "4910": 13268858880.0,
            "4915": 13268858880.0,
            "4920": 13268858880.0,
            "4925": 13268858880.0,
            "4930": 13268858880.0,
            "4935": 13268858880.0,
            "4940": 13268858880.0,
            "4945": 13268858880.0,
            "4950": 13268858880.0,
            "4955": 13268858880.0,
            "4960": 13268858880.0,
            "4965": 13268858880.0,
            "4970": 13268858880.0,
            "4975": 13268858880.0,
            "4980": 13268858880.0,
            "4985": 13268858880.0,
            "4990": 13268858880.0,
            "4995": 13268858880.0,
            "5000": 13268858880.0,
            "5005": 13268858880.0,
            "5010": 13268858880.0,
            "5015": 13268858880.0,
            "5020": 13268858880.0,
            "5025": 13268858880.0,
            "5030": 13268858880.0,
            "5035": 13268858880.0,
            "5040": 13268858880.0,
            "5045": 13268858880.0,
            "5050": 13268858880.0,
            "5055": 13268858880.0,
            "5060": 13268858880.0,
            "5065": 13268858880.0,
            "5070": 13268858880.0,
            "5075": 13268858880.0,
            "5080": 13268858880.0,
            "5085": 13268858880.0,
            "5090": 13268858880.0,
            "5095": 13268858880.0,
            "5100": 13268858880.0,
            "5105": 13268858880.0,
            "5110": 13268858880.0,
            "5115": 13268858880.0,
            "5120": 13268858880.0,
            "5125": 13268858880.0,
            "5130": 13268858880.0,
            "5135": 13268858880.0,
            "5140": 13268858880.0,
            "5145": 13268858880.0,
            "5150": 13268858880.0,
            "5155": 13268858880.0,
            "5160": 13268858880.0,
            "5165": 13268858880.0,
            "5170": 13268858880.0,
            "5175": 13268858880.0,
            "5180": 13268858880.0,
            "5185": 13268858880.0,
            "5190": 13268858880.0,
            "5195": 13268858880.0,
            "5200": 13268858880.0,
            "5205": 13268858880.0,
            "5210": 13268858880.0,
            "5215": 13268858880.0,
            "5220": 13268858880.0,
            "5225": 13268858880.0,
            "5230": 13268858880.0,
            "5235": 13268858880.0,
            "5240": 13268858880.0,
            "5245": 13268858880.0,
            "5250": 13268858880.0,
            "5255": 13268858880.0,
            "5260": 13268858880.0,
            "5265": 13268858880.0,
            "5270": 13268858880.0,
            "5275": 13268858880.0,
            "5280": 13268858880.0,
            "5285": 13268858880.0,
            "5290": 13268858880.0,
            "5295": 13268858880.0,
            "5300": 13268858880.0,
            "5305": 13268858880.0,
            "5310": 13268858880.0,
            "5315": 13268858880.0,
            "5320": 13268858880.0,
            "5325": 13268858880.0,
            "5330": 13268858880.0,
            "5335": 13268858880.0,
            "5340": 13268858880.0,
            "5345": 13268858880.0,
            "5350": 13268858880.0,
            "5355": 13268858880.0,
            "5360": 13268858880.0,
            "5365": 13268858880.0,
            "5370": 13268858880.0,
            "5375": 13268858880.0,
            "5380": 13268858880.0,
            "5385": 13268858880.0,
            "5390": 13268858880.0,
            "5395": 13268858880.0,
            "5400": 13268858880.0,
            "5405": 13268858880.0,
            "5410": 13268858880.0,
            "5415": 13268858880.0,
            "5420": 13268858880.0,
            "5425": 13268858880.0,
            "5430": 13268858880.0,
            "5435": 13268858880.0,
            "5440": 13268858880.0,
            "5445": 13268858880.0,
            "5450": 13268858880.0,
            "5455": 13268858880.0,
            "5460": 13268858880.0,
            "5465": 13268858880.0,
            "5470": 13268858880.0,
            "5475": 13268858880.0,
            "5480": 13268858880.0,
            "5485": 13268858880.0,
            "5490": 13268858880.0,
            "5495": 13268858880.0,
            "5500": 13268858880.0,
            "5505": 13268858880.0,
            "5510": 13268858880.0,
            "5515": 13268858880.0,
            "5520": 13268858880.0,
            "5525": 13268858880.0,
            "5530": 13268858880.0,
            "5535": 13268858880.0,
            "5540": 13268858880.0,
            "5545": 13268858880.0,
            "5550": 13268858880.0,
            "5555": 13268858880.0,
            "5560": 13268858880.0,
            "5565": 13268858880.0,
            "5570": 13268858880.0,
            "5575": 13268858880.0,
            "5580": 13268858880.0,
            "5585": 13268858880.0,
            "5590": 13268858880.0,
            "5595": 13268858880.0,
            "5600": 13268858880.0,
            "5605": 13268858880.0,
            "5610": 13268858880.0,
            "5615": 13268858880.0,
            "5620": 13268858880.0,
            "5625": 13268858880.0,
            "5630": 13268858880.0,
            "5635": 13268858880.0,
            "5640": 13268858880.0,
            "5645": 13268858880.0,
            "5650": 13268858880.0,
            "5655": 13268858880.0,
            "5660": 13268858880.0,
            "5665": 13268858880.0,
            "5670": 13268858880.0,
            "5675": 13268858880.0,
            "5680": 13268858880.0,
            "5685": 13268858880.0,
            "5690": 13268858880.0,
            "5695": 13268858880.0,
            "5700": 13268858880.0,
            "5705": 13268858880.0,
            "5710": 13268858880.0,
            "5715": 13268858880.0,
            "5720": 13268858880.0,
            "5725": 13268858880.0,
            "5730": 13268858880.0,
            "5735": 13268858880.0,
            "5740": 13268858880.0,
            "5745": 13268858880.0,
            "5750": 13268858880.0,
            "5755": 13268858880.0,
            "5760": 13268858880.0,
            "5765": 13268858880.0,
            "5770": 13268858880.0,
            "5775": 13268858880.0,
            "5780": 13268858880.0,
            "5785": 13268858880.0,
            "5790": 13268858880.0,
            "5795": 13268858880.0,
            "5800": 13268858880.0,
            "5805": 13268858880.0,
            "5810": 13268858880.0,
            "5815": 13268858880.0,
            "5820": 13268858880.0,
            "5825": 13268858880.0,
            "5830": 13268858880.0,
            "5835": 13268858880.0,
            "5840": 13268858880.0,
            "5845": 13268858880.0,
            "5850": 13268858880.0,
            "5855": 13268858880.0,
            "5860": 13268858880.0,
            "5865": 13268858880.0,
            "5870": 13268858880.0,
            "5875": 13268858880.0,
            "5880": 13268858880.0,
            "5885": 13268858880.0,
            "5890": 13268858880.0,
            "5895": 13268858880.0,
            "5900": 13268858880.0,
            "5905": 13268858880.0,
            "5910": 13268858880.0,
            "5915": 13268858880.0,
            "5920": 13268858880.0,
            "5925": 13268858880.0,
            "5930": 13268858880.0,
            "5935": 13268858880.0,
            "5940": 13268858880.0,
            "5945": 13268858880.0,
            "5950": 13268858880.0,
            "5955": 13268858880.0,
            "5960": 13268858880.0,
            "5965": 13268858880.0,
            "5970": 13268858880.0,
            "5975": 13268858880.0,
            "5980": 13268858880.0,
            "5985": 13268858880.0,
            "5990": 13268858880.0,
            "5995": 13268858880.0,
            "6000": 13268858880.0,
            "6005": 13268858880.0,
            "6010": 13268858880.0,
            "6015": 13268858880.0,
            "6020": 13268858880.0,
            "6025": 13268858880.0,
            "6030": 13268858880.0,
            "6035": 13268858880.0,
            "6040": 13268858880.0,
            "6045": 13268858880.0,
            "6050": 13268858880.0,
            "6055": 13268858880.0,
            "6060": 13268858880.0,
            "6065": 13268858880.0,
            "6070": 13268858880.0,
            "6075": 13268858880.0,
            "6080": 13268858880.0,
            "6085": 13268858880.0,
            "6090": 13268858880.0,
            "6095": 13268858880.0,
            "6100": 13268858880.0,
            "6105": 13268858880.0,
            "6110": 13268858880.0,
            "6115": 13268858880.0,
            "6120": 13268858880.0,
            "6125": 13268858880.0,
            "6130": 13268858880.0,
            "6135": 13268858880.0,
            "6140": 13268858880.0,
            "6145": 13268858880.0,
            "6150": 13268858880.0,
            "6155": 13268858880.0,
            "6160": 13268858880.0,
            "6165": 13268858880.0,
            "6170": 13268858880.0,
            "6175": 13268858880.0,
            "6180": 13268858880.0,
            "6185": 13268858880.0,
            "6190": 13268858880.0,
            "6195": 13268858880.0,
            "6200": 13268858880.0,
            "6205": 13268858880.0,
            "6210": 13268858880.0,
            "6215": 13268858880.0,
            "6220": 13268858880.0,
            "6225": 13268858880.0,
            "6230": 13268858880.0,
            "6235": 13268858880.0,
            "6240": 13268858880.0,
            "6245": 13268858880.0,
            "6250": 13268858880.0,
            "6255": 13268858880.0,
            "6260": 13268858880.0,
            "6265": 13268858880.0,
            "6270": 13268858880.0,
            "6275": 13268858880.0,
            "6280": 13268858880.0,
            "6285": 13268858880.0,
            "6290": 13268858880.0,
            "6295": 13268858880.0,
            "6300": 13268858880.0,
            "6305": 13268858880.0,
            "6310": 13268858880.0,
            "6315": 13268858880.0,
            "6320": 13268858880.0,
            "6325": 13268858880.0,
            "6330": 13268858880.0,
            "6335": 13268858880.0,
            "6340": 13268858880.0,
            "6345": 13268858880.0,
            "6350": 13268858880.0,
            "6355": 13268858880.0,
            "6360": 13268858880.0,
            "6365": 13268858880.0,
            "6370": 13268858880.0,
            "6375": 13268858880.0,
            "6380": 13268858880.0,
            "6385": 13268858880.0,
            "6390": 13268858880.0,
            "6395": 13268858880.0,
            "6400": 13268858880.0,
            "6405": 13268858880.0,
            "6410": 13268858880.0,
            "6415": 13268858880.0,
            "6420": 13268858880.0,
            "6425": 13268858880.0,
            "6430": 13268858880.0,
            "6435": 13268858880.0,
            "6440": 13268858880.0,
            "6445": 13268858880.0,
            "6450": 13268858880.0,
            "6455": 13268858880.0,
            "6460": 13268858880.0,
            "6465": 13268858880.0,
            "6470": 13268858880.0,
            "6475": 13268858880.0,
            "6480": 13268858880.0,
            "6485": 13268858880.0,
            "6490": 13268858880.0,
            "6495": 13268858880.0,
            "6500": 13268858880.0,
            "6505": 13268858880.0,
            "6510": 13268858880.0,
            "6515": 13268858880.0,
            "6520": 13268858880.0,
            "6525": 13268858880.0,
            "6530": 13268858880.0,
            "6535": 13268858880.0,
            "6540": 13268858880.0,
            "6545": 13268858880.0,
            "6550": 13268858880.0,
            "6555": 13268858880.0,
            "6560": 13268858880.0,
            "6565": 13268858880.0,
            "6570": 13268858880.0,
            "6575": 13268858880.0,
            "6580": 13268858880.0,
            "6585": 13268858880.0,
            "6590": 13268858880.0,
            "6595": 13268858880.0,
            "6600": 13268858880.0,
            "6605": 13268858880.0,
            "6610": 13268858880.0,
            "6615": 13268858880.0,
            "6620": 13268858880.0,
            "6625": 13268858880.0,
            "6630": 13268858880.0,
            "6635": 13268858880.0,
            "6640": 13268858880.0,
            "6645": 13268858880.0,
            "6650": 13268858880.0,
            "6655": 13268858880.0,
            "6660": 13268858880.0,
            "6665": 13268858880.0,
            "6670": 13268858880.0,
            "6675": 13268858880.0,
            "6680": 13268858880.0,
            "6685": 13268858880.0,
            "6690": 13268858880.0,
            "6695": 13268858880.0,
            "6700": 13268858880.0,
            "6705": 13268858880.0,
            "6710": 13268858880.0,
            "6715": 13268858880.0,
            "6720": 13268858880.0,
            "6725": 13268858880.0,
            "6730": 13268858880.0,
            "6735": 13268858880.0,
            "6740": 13268858880.0,
            "6745": 13268858880.0,
            "6750": 13268858880.0,
            "6755": 13268858880.0,
            "6760": 13268858880.0,
            "6765": 13268858880.0,
            "6770": 13268858880.0,
            "6775": 13268858880.0,
            "6780": 13268858880.0,
            "6785": 13268858880.0,
            "6790": 13268858880.0,
            "6795": 13268858880.0,
            "6800": 13268858880.0,
            "6805": 13268858880.0,
            "6810": 13268858880.0,
            "6815": 13268858880.0,
            "6820": 13268858880.0,
            "6825": 13268858880.0,
            "6830": 13268858880.0,
            "6835": 13268858880.0,
            "6840": 13268858880.0,
            "6845": 13268858880.0,
            "6850": 13268858880.0,
            "6855": 13268858880.0,
            "6860": 13268858880.0,
            "6865": 13268858880.0,
            "6870": 13268858880.0,
            "6875": 13268858880.0,
            "6880": 13268858880.0,
            "6885": 13268858880.0,
            "6890": 13268858880.0,
            "6895": 13268858880.0,
            "6900": 13268858880.0,
            "6905": 13268858880.0,
            "6910": 13268858880.0,
            "6915": 13268858880.0,
            "6920": 13268858880.0,
            "6925": 13268858880.0,
            "6930": 13268858880.0,
            "6935": 13268858880.0,
            "6940": 13268858880.0,
            "6945": 13268858880.0,
            "6950": 13268858880.0,
            "6955": 13268858880.0,
            "6960": 13268858880.0,
            "6965": 13268858880.0,
            "6970": 13268858880.0,
            "6975": 13268858880.0,
            "6980": 13268858880.0,
            "6985": 13268858880.0,
            "6990": 13268858880.0,
            "6995": 13268858880.0,
            "7000": 13268858880.0,
            "7005": 13268858880.0,
            "7010": 13268858880.0,
            "7015": 13268858880.0,
            "7020": 13268858880.0,
            "7025": 13268858880.0,
            "7030": 13268858880.0,
            "7035": 13268858880.0,
            "7040": 13268858880.0,
            "7045": 13268858880.0,
            "7050": 13268858880.0,
            "7055": 13268858880.0,
            "7060": 13268858880.0,
            "7065": 13268858880.0,
            "7070": 13268858880.0,
            "7075": 13268858880.0,
            "7080": 13268858880.0,
            "7085": 13268858880.0,
            "7090": 13268858880.0,
            "7095": 13268858880.0,
            "7100": 13268858880.0,
            "7105": 13268858880.0,
            "7110": 13268858880.0,
            "7115": 13268858880.0,
            "7120": 13268858880.0,
            "7125": 13268858880.0,
            "7130": 13268858880.0,
            "7135": 13268858880.0,
            "7140": 13268858880.0,
            "7145": 13268858880.0,
            "7150": 13268858880.0,
            "7155": 13268858880.0,
            "7160": 13268858880.0,
            "7165": 13268858880.0,
            "7170": 13268858880.0,
            "7175": 13268858880.0,
            "7180": 13268858880.0,
            "7185": 13268858880.0,
            "7190": 13268858880.0,
            "7195": 13268858880.0,
            "7200": 13268858880.0,
            "7205": 13268858880.0,
            "7210": 13268858880.0,
            "7215": 13268858880.0,
            "7220": 13268858880.0,
            "7225": 13268858880.0,
            "7230": 13268858880.0,
            "7235": 13268858880.0,
            "7240": 13268858880.0,
            "7245": 13268858880.0,
            "7250": 13268858880.0,
            "7255": 13268858880.0,
            "7260": 13268858880.0,
            "7265": 13268858880.0,
            "7270": 13268858880.0,
            "7275": 13268858880.0,
            "7280": 13268858880.0,
            "7285": 13268858880.0,
            "7290": 13268858880.0,
            "7295": 13268858880.0,
            "7300": 13268858880.0,
            "7305": 13268858880.0,
            "7310": 13268858880.0,
            "7315": 13268858880.0,
            "7320": 13268858880.0,
            "7325": 13268858880.0,
            "7330": 13268858880.0,
            "7335": 13268858880.0,
            "7340": 13268858880.0,
            "7345": 13268858880.0,
            "7350": 13268858880.0,
            "7355": 13268858880.0,
            "7360": 13268858880.0,
            "7365": 13268858880.0,
            "7370": 13268858880.0,
            "7375": 13268858880.0,
            "7380": 13268858880.0,
            "7385": 13268858880.0,
            "7390": 13268858880.0,
            "7395": 13268858880.0,
            "7400": 13268858880.0,
            "7405": 13268858880.0,
            "7410": 13268858880.0,
            "7415": 13268858880.0,
            "7420": 13268858880.0,
            "7425": 13268858880.0,
            "7430": 13268858880.0,
            "7435": 13268858880.0,
            "7440": 13268858880.0,
            "7445": 13268858880.0,
            "7450": 13268858880.0,
            "7455": 13268858880.0,
            "7460": 13268858880.0,
            "7465": 13268858880.0,
            "7470": 13268858880.0,
            "7475": 13268858880.0,
            "7480": 13268858880.0,
            "7485": 13268858880.0,
            "7490": 13268858880.0,
            "7495": 13268858880.0,
            "7500": 13268858880.0,
            "7505": 13268858880.0,
            "7510": 13268858880.0,
            "7515": 13268858880.0,
            "7520": 13268858880.0,
            "7525": 13268858880.0,
            "7530": 13268858880.0,
            "7535": 13268858880.0,
            "7540": 13268858880.0,
            "7545": 13268858880.0,
            "7550": 13268858880.0,
            "7555": 13268858880.0,
            "7560": 13268858880.0,
            "7565": 13268858880.0,
            "7570": 13268858880.0,
            "7575": 13268858880.0,
            "7580": 13268858880.0,
            "7585": 13268858880.0,
            "7590": 13268858880.0,
            "7595": 13268858880.0,
            "7600": 13268858880.0,
            "7605": 13268858880.0,
            "7610": 13268858880.0,
            "7615": 13268858880.0,
            "7620": 13268858880.0,
            "7625": 13268858880.0,
            "7630": 13268858880.0,
            "7635": 13268858880.0,
            "7640": 13268858880.0,
            "7645": 13268858880.0,
            "7650": 13268858880.0,
            "7655": 13268858880.0,
            "7660": 13268858880.0,
            "7665": 13268858880.0,
            "7670": 13268858880.0,
            "7675": 13268858880.0,
            "7680": 13268858880.0,
            "7685": 13268858880.0,
            "7690": 13268858880.0,
            "7695": 13268858880.0,
            "7700": 13268858880.0,
            "7705": 13268858880.0,
            "7710": 13268858880.0,
            "7715": 13268858880.0,
            "7720": 13268858880.0,
            "7725": 13268858880.0,
            "7730": 13268858880.0,
            "7735": 13268858880.0,
            "7740": 13268858880.0,
            "7745": 13268858880.0,
            "7750": 13268858880.0,
            "7755": 13268858880.0,
            "7760": 13268858880.0,
            "7765": 13268858880.0,
            "7770": 13268858880.0,
            "7775": 13268858880.0,
            "7780": 13268858880.0,
            "7785": 13268858880.0,
            "7790": 13268858880.0,
            "7795": 13268858880.0,
            "7800": 13268858880.0,
            "7805": 13268858880.0,
            "7810": 13268858880.0,
            "7815": 13268858880.0,
            "7820": 13268858880.0,
            "7825": 13268858880.0,
            "7830": 13268858880.0,
            "7835": 13268858880.0,
            "7840": 13268858880.0,
            "7845": 13268858880.0,
            "7850": 13268858880.0,
            "7855": 13268858880.0,
            "7860": 13268858880.0,
            "7865": 13268858880.0,
            "7870": 13268858880.0,
            "7875": 13268858880.0,
            "7880": 13268858880.0,
            "7885": 13268858880.0,
            "7890": 13268858880.0,
            "7895": 13268858880.0,
            "7900": 13268858880.0,
            "7905": 13268858880.0,
            "7910": 13268858880.0,
            "7915": 13268858880.0,
            "7920": 13268858880.0,
            "7925": 13268858880.0,
            "7930": 13268858880.0,
            "7935": 13268858880.0,
            "7940": 13268858880.0,
            "7945": 13268858880.0,
            "7950": 13268858880.0,
            "7955": 13268858880.0,
            "7960": 13268858880.0,
            "7965": 13268858880.0,
            "7970": 13268858880.0,
            "7975": 13268858880.0,
            "7980": 13268858880.0,
            "7985": 13268858880.0,
            "7990": 13268858880.0,
            "7995": 13268858880.0,
            "8000": 13268858880.0,
            "8005": 13268858880.0,
            "8010": 13268858880.0,
            "8015": 13268858880.0,
            "8020": 13268858880.0,
            "8025": 13268858880.0,
            "8030": 13268858880.0,
            "8035": 13268858880.0,
            "8040": 13268858880.0,
            "8045": 13268858880.0,
            "8050": 13268858880.0,
            "8055": 13268858880.0,
            "8060": 13268858880.0,
            "8065": 13268858880.0,
            "8070": 13268858880.0,
            "8075": 13268858880.0,
            "8080": 13268858880.0,
            "8085": 13268858880.0,
            "8090": 13268858880.0,
            "8095": 13268858880.0,
            "8100": 13268858880.0,
            "8105": 13268858880.0,
            "8110": 13268858880.0,
            "8115": 13268858880.0,
            "8120": 13268858880.0,
            "8125": 13268858880.0,
            "8130": 13268858880.0,
            "8135": 13268858880.0,
            "8140": 13268858880.0,
            "8145": 13268858880.0,
            "8150": 13268858880.0,
            "8155": 13268858880.0,
            "8160": 13268858880.0,
            "8165": 13268858880.0,
            "8170": 13268858880.0,
            "8175": 13268858880.0,
            "8180": 13268858880.0,
            "8185": 13268858880.0,
            "8190": 13268858880.0,
            "8195": 13268858880.0,
            "8200": 13268858880.0,
            "8205": 13268858880.0,
            "8210": 13268858880.0,
            "8215": 13268858880.0,
            "8220": 13268858880.0,
            "8225": 13268858880.0,
            "8230": 13268858880.0,
            "8235": 13268858880.0,
            "8240": 13268858880.0,
            "8245": 13268858880.0,
            "8250": 13268858880.0,
            "8255": 13268858880.0,
            "8260": 13269043200.0,
            "8265": 13269043200.0,
            "8270": 13269043200.0,
            "8275": 13269043200.0,
            "8280": 13269043200.0,
            "8285": 13269043200.0,
            "8290": 13269043200.0,
            "8295": 13269043200.0,
            "8300": 13269043200.0,
            "8305": 13269043200.0,
            "8310": 13269043200.0,
            "8315": 13269043200.0,
            "8320": 13269043200.0,
            "8325": 13269043200.0,
            "8330": 13269043200.0,
            "8335": 13269043200.0,
            "8340": 13269043200.0,
            "8345": 13269043200.0,
            "8350": 13269043200.0,
            "8355": 13269043200.0,
            "8360": 13269043200.0,
            "8365": 13269043200.0,
            "8370": 13269043200.0,
            "8375": 13269043200.0,
            "8380": 13269043200.0,
            "8385": 13269043200.0,
            "8390": 13269043200.0,
            "8395": 13269043200.0,
            "8400": 13269043200.0,
            "8405": 13269043200.0,
            "8410": 13269043200.0,
            "8415": 13269043200.0,
            "8420": 13269043200.0,
            "8425": 13269043200.0,
            "8430": 13269043200.0,
            "8435": 13269043200.0,
            "8440": 13269043200.0,
            "8445": 13269043200.0,
            "8450": 13269043200.0,
            "8455": 13269043200.0,
            "8460": 13269043200.0,
            "8465": 13269043200.0,
            "8470": 13269043200.0,
            "8475": 13269043200.0,
            "8480": 13269043200.0,
            "8485": 13269043200.0,
            "8490": 13269043200.0,
            "8495": 13269043200.0,
            "8500": 13269043200.0,
            "8505": 13269043200.0,
            "8510": 13269043200.0,
            "8515": 13269043200.0,
            "8520": 13269043200.0,
            "8525": 13269043200.0,
            "8530": 13269043200.0,
            "8535": 13269043200.0,
            "8540": 13269043200.0,
            "8545": 13269043200.0,
            "8550": 13269043200.0,
            "8555": 13269043200.0,
            "8560": 13269043200.0,
            "8565": 13269043200.0,
            "8570": 13269043200.0,
            "8575": 13269043200.0,
            "8580": 13269043200.0,
            "8585": 13269043200.0,
            "8590": 13269043200.0,
            "8595": 13269043200.0,
            "8600": 13269043200.0,
            "8605": 13269043200.0,
            "8610": 13269043200.0,
            "8615": 13269043200.0,
            "8620": 13269043200.0,
            "8625": 13269043200.0,
            "8630": 13269043200.0,
            "8635": 13269043200.0,
            "8640": 13269043200.0,
            "8645": 13269043200.0,
            "8650": 13269043200.0,
            "8655": 13269043200.0,
            "8660": 13269043200.0,
            "8665": 13269043200.0,
            "8670": 13269043200.0,
            "8675": 13269043200.0,
            "8680": 13269043200.0,
            "8685": 13269043200.0,
            "8690": 13269043200.0,
            "8695": 13269043200.0,
            "8700": 13269043200.0,
            "8705": 13269043200.0,
            "8710": 13269043200.0,
            "8715": 13269043200.0,
            "8720": 13269043200.0,
            "8725": 13269043200.0,
            "8730": 13269043200.0,
            "8735": 13269043200.0,
            "8740": 13269043200.0,
            "8745": 13269043200.0,
            "8750": 13269043200.0,
            "8755": 13269043200.0,
            "8760": 13269043200.0,
            "8765": 13269043200.0,
            "8770": 13269043200.0,
            "8775": 13269043200.0,
            "8780": 13269043200.0,
            "8785": 13269043200.0,
            "8790": 13269043200.0,
            "8795": 13269043200.0,
            "8800": 13269043200.0,
            "8805": 13269043200.0,
            "8810": 13269043200.0,
            "8815": 13269043200.0,
            "8820": 13269043200.0,
            "8825": 13269043200.0,
            "8830": 13269043200.0,
            "8835": 13269043200.0,
            "8840": 13269043200.0,
            "8845": 13269043200.0,
            "8850": 13269043200.0,
            "8855": 13269043200.0,
            "8860": 13269043200.0,
            "8865": 13269043200.0,
            "8870": 13269043200.0,
            "8875": 13269043200.0,
            "8880": 13269043200.0,
            "8885": 13269043200.0,
            "8890": 13269043200.0,
            "8895": 13269043200.0,
            "8900": 13269043200.0,
            "8905": 13269043200.0,
            "8910": 13269043200.0,
            "8915": 13269043200.0,
            "8920": 13269043200.0,
            "8925": 13269043200.0,
            "8930": 13269043200.0,
            "8935": 13269043200.0,
            "8940": 13269043200.0,
            "8945": 13269043200.0,
            "8950": 13269043200.0,
            "8955": 13269043200.0,
            "8960": 13269043200.0,
            "8965": 13269043200.0,
            "8970": 13269043200.0,
            "8975": 13269043200.0,
            "8980": 13269043200.0,
            "8985": 13269043200.0,
            "8990": 13269043200.0,
            "8995": 13269043200.0,
            "9000": 13269043200.0,
            "9005": 13269043200.0,
            "9010": 13269043200.0,
            "9015": 13269043200.0,
            "9020": 13269043200.0,
            "9025": 13269043200.0,
            "9030": 13269043200.0,
            "9035": 13269043200.0,
            "9040": 13269043200.0,
            "9045": 13269043200.0,
            "9050": 13269043200.0,
            "9055": 13269043200.0,
            "9060": 13269043200.0,
            "9065": 13269043200.0,
            "9070": 13269043200.0,
            "9075": 13269043200.0,
            "9080": 13269043200.0,
            "9085": 13269043200.0,
            "9090": 13269043200.0,
            "9095": 13269043200.0,
            "9100": 13269043200.0,
            "9105": 13269043200.0,
            "9110": 13269043200.0,
            "9115": 13269043200.0,
            "9120": 13269043200.0,
            "9125": 13269043200.0,
            "9130": 13269043200.0,
            "9135": 13269043200.0,
            "9140": 13269043200.0,
            "9145": 13269043200.0,
            "9150": 13269043200.0,
            "9155": 13269043200.0,
            "9160": 13269043200.0,
            "9165": 13269043200.0,
            "9170": 13269043200.0,
            "9175": 13269043200.0,
            "9180": 13269043200.0,
            "9185": 13269043200.0,
            "9190": 13269043200.0,
            "9195": 13269043200.0,
            "9200": 13269043200.0,
            "9205": 13269043200.0,
            "9210": 13269043200.0,
            "9215": 13269043200.0,
            "9220": 13269043200.0,
            "9225": 13269043200.0,
            "9230": 13269043200.0,
            "9235": 13269043200.0,
            "9240": 13269043200.0,
            "9245": 13269043200.0,
            "9250": 13269043200.0,
            "9255": 13269043200.0,
            "9260": 13269043200.0,
            "9265": 13269043200.0,
            "9270": 13269043200.0,
            "9275": 13269043200.0,
            "9280": 13269043200.0,
            "9285": 13269043200.0,
            "9290": 13269043200.0,
            "9295": 13269043200.0,
            "9300": 13269043200.0,
            "9305": 13269043200.0,
            "9310": 13269043200.0,
            "9315": 13269043200.0,
            "9320": 13269043200.0,
            "9325": 13269043200.0,
            "9330": 13269043200.0,
            "9335": 13269043200.0,
            "9340": 13269043200.0,
            "9345": 13269043200.0,
            "9350": 13269043200.0,
            "9355": 13269043200.0,
            "9360": 13269043200.0,
            "9365": 13269043200.0,
            "9370": 13269043200.0,
            "9375": 13269043200.0,
            "9380": 13269043200.0,
            "9385": 13269043200.0,
            "9390": 13269043200.0,
            "9395": 13269043200.0,
            "9400": 13269043200.0,
            "9405": 13269043200.0,
            "9410": 13269043200.0,
            "9415": 13269043200.0,
            "9420": 13269043200.0,
            "9425": 13269043200.0,
            "9430": 13269043200.0,
            "9435": 13269043200.0,
            "9440": 13269043200.0,
            "9445": 13269043200.0,
            "9450": 13269043200.0,
            "9455": 13269043200.0,
            "9460": 13269043200.0,
            "9465": 13269043200.0,
            "9470": 13269043200.0,
            "9475": 13269043200.0,
            "9480": 13269043200.0,
            "9485": 13269043200.0,
            "9490": 13269043200.0,
            "9495": 13269043200.0,
            "9500": 13269043200.0,
            "9505": 13269043200.0,
            "9510": 13269043200.0,
            "9515": 13269043200.0,
            "9520": 13269043200.0,
            "9525": 13269043200.0,
            "9530": 13269043200.0,
            "9535": 13269043200.0,
            "9540": 13269043200.0,
            "9545": 13269043200.0,
            "9550": 13269043200.0,
            "9555": 13269043200.0,
            "9560": 13269043200.0,
            "9565": 13269043200.0,
            "9570": 13269043200.0,
            "9575": 13269043200.0,
            "9580": 13269043200.0,
            "9585": 13269043200.0,
            "9590": 13269043200.0,
            "9595": 13269043200.0,
            "9600": 13269043200.0,
            "9605": 13269043200.0,
            "9610": 13269043200.0,
            "9615": 13269043200.0,
            "9620": 13269043200.0,
            "9625": 13269043200.0,
            "9630": 13269043200.0,
            "9635": 13269043200.0,
            "9640": 13269043200.0,
            "9645": 13269043200.0,
            "9650": 13269043200.0,
            "9655": 13269043200.0,
            "9660": 13269043200.0,
            "9665": 13269043200.0,
            "9670": 13269043200.0,
            "9675": 13269043200.0,
            "9680": 13269043200.0,
            "9685": 13269043200.0,
            "9690": 13269043200.0,
            "9695": 13269043200.0,
            "9700": 13269043200.0,
            "9705": 13269043200.0,
            "9710": 13269043200.0,
            "9715": 13269043200.0,
            "9720": 13269043200.0,
            "9725": 13269043200.0,
            "9730": 13269043200.0,
            "9735": 13269043200.0,
            "9740": 13269043200.0,
            "9745": 13269043200.0,
            "9750": 13269043200.0,
            "9755": 13269043200.0,
            "9760": 13269043200.0,
            "9765": 13269043200.0,
            "9770": 13269043200.0,
            "9775": 13269043200.0,
            "9780": 13269043200.0,
            "9785": 13269043200.0,
            "9790": 13269043200.0,
            "9795": 13269043200.0,
            "9800": 13269043200.0,
            "9805": 13269043200.0,
            "9810": 13269043200.0,
            "9815": 13269043200.0,
            "9820": 13269043200.0,
            "9825": 13269043200.0,
            "9830": 13269043200.0,
            "9835": 13269043200.0,
            "9840": 13269043200.0,
            "9845": 13269043200.0,
            "9850": 13269043200.0,
            "9855": 13269043200.0,
            "9860": 13269043200.0,
            "9865": 13269043200.0,
            "9870": 13269043200.0,
            "9875": 13269043200.0,
            "9880": 13269043200.0,
            "9885": 13269043200.0,
            "9890": 13269043200.0,
            "9895": 13269043200.0,
            "9900": 13269043200.0,
            "9905": 13269043200.0,
            "9910": 13269043200.0,
            "9915": 13269043200.0,
            "9920": 13269043200.0,
            "9925": 13269043200.0,
            "9930": 13269043200.0,
            "9935": 13269043200.0,
            "9940": 13269043200.0,
            "9945": 13269043200.0,
            "9950": 13269043200.0,
            "9955": 13269043200.0,
            "9960": 13269043200.0,
            "9965": 13269043200.0,
            "9970": 13269043200.0,
            "9975": 13269043200.0,
            "9980": 13269043200.0,
            "9985": 13269043200.0,
            "9990": 13269043200.0,
            "9995": 13269043200.0,
            "10000": 13269043200.0,
            "10005": 13269043200.0,
            "10010": 13269043200.0,
            "10015": 13269043200.0,
            "10020": 13269043200.0,
            "10025": 13269043200.0,
            "10030": 13269043200.0,
            "10035": 13269043200.0,
            "10040": 13269043200.0,
            "10045": 13269043200.0,
            "10050": 13269043200.0,
            "10055": 13269043200.0,
            "10060": 13269043200.0,
            "10065": 13269043200.0,
            "10070": 13269043200.0,
            "10075": 13269043200.0,
            "10080": 13269043200.0,
            "10085": 13269043200.0,
            "10090": 13269043200.0,
            "10095": 13269043200.0,
            "10100": 13269043200.0,
            "10105": 13269043200.0,
            "10110": 13269043200.0,
            "10115": 13269043200.0,
            "10120": 13269043200.0,
            "10125": 13269043200.0,
            "10130": 13269043200.0,
            "10135": 13269043200.0,
            "10140": 13269043200.0,
            "10145": 13269043200.0,
            "10150": 13269043200.0,
            "10155": 13269043200.0,
            "10160": 13269043200.0,
            "10165": 13269043200.0,
            "10170": 13269043200.0,
            "10175": 13269043200.0,
            "10180": 13269043200.0,
            "10185": 13269043200.0,
            "10190": 13269043200.0,
            "10195": 13269043200.0,
            "10200": 13269043200.0,
            "10205": 13269043200.0,
            "10210": 13269043200.0,
            "10215": 13269043200.0,
            "10220": 13269043200.0,
            "10225": 13269043200.0,
            "10230": 13269043200.0,
            "10235": 13269043200.0,
            "10240": 13269043200.0,
            "10245": 13269043200.0,
            "10250": 13269043200.0,
            "10255": 13269043200.0,
            "10260": 13269043200.0,
            "10265": 13269043200.0,
            "10270": 13269043200.0,
            "10275": 13269043200.0,
            "10280": 13269043200.0,
            "10285": 13269043200.0,
            "10290": 13269043200.0,
            "10295": 13269043200.0,
            "10300": 13269043200.0,
            "10305": 13269043200.0,
            "10310": 13269043200.0,
            "10315": 13269043200.0,
            "10320": 13269043200.0,
            "10325": 13269043200.0,
            "10330": 13269043200.0,
            "10335": 13269043200.0,
            "10340": 13269043200.0,
            "10345": 13269043200.0,
            "10350": 13269043200.0,
            "10355": 13269043200.0,
            "10360": 13269043200.0,
            "10365": 13269043200.0,
            "10370": 13269043200.0,
            "10375": 13269043200.0,
            "10380": 13269043200.0,
            "10385": 13269043200.0,
            "10390": 13269043200.0,
            "10395": 13269043200.0,
            "10400": 13269043200.0,
            "10405": 13269043200.0,
            "10410": 13269043200.0,
            "10415": 13269043200.0,
            "10420": 13269043200.0,
            "10425": 13269043200.0,
            "10430": 13269043200.0,
            "10435": 13269043200.0,
            "10440": 13269043200.0,
            "10445": 13269043200.0,
            "10450": 13269043200.0,
            "10455": 13269043200.0,
            "10460": 13269043200.0,
            "10465": 13269043200.0,
            "10470": 13269043200.0,
            "10475": 13269043200.0,
            "10480": 13269043200.0,
            "10485": 13269043200.0,
            "10490": 13269043200.0,
            "10495": 13269043200.0,
            "10500": 13269043200.0,
            "10505": 13269043200.0,
            "10510": 13269043200.0,
            "10515": 13269043200.0,
            "10520": 13269043200.0,
            "10525": 13269043200.0,
            "10530": 13269043200.0,
            "10535": 13269043200.0,
            "10540": 13269043200.0,
            "10545": 13269043200.0,
            "10550": 13269043200.0,
            "10555": 13269043200.0,
            "10560": 13269043200.0,
            "10565": 13269043200.0,
            "10570": 13269043200.0,
            "10575": 13269043200.0,
            "10580": 13269043200.0,
            "10585": 13269043200.0,
            "10590": 13269043200.0,
            "10595": 13269043200.0,
            "10600": 13269043200.0,
            "10605": 13269043200.0,
            "10610": 13269043200.0,
            "10615": 13269043200.0,
            "10620": 13269043200.0,
            "10625": 13269043200.0,
            "10630": 13269043200.0,
            "10635": 13269043200.0,
            "10640": 13269043200.0,
            "10645": 13269043200.0,
            "10650": 13269043200.0,
            "10655": 13269043200.0,
            "10660": 13269043200.0,
            "10665": 13269043200.0,
            "10670": 13269043200.0,
            "10675": 13269043200.0,
            "10680": 13269043200.0,
            "10685": 13269043200.0,
            "10690": 13269043200.0,
            "10695": 13269043200.0,
            "10700": 13269043200.0,
            "10705": 13269043200.0,
            "10710": 13269043200.0,
            "10715": 13269043200.0,
            "10720": 13269043200.0,
            "10725": 13269043200.0,
            "10730": 13269043200.0,
            "10735": 13269043200.0,
            "10740": 13269043200.0,
            "10745": 13269043200.0,
            "10750": 13269043200.0,
            "10755": 13269043200.0,
            "10760": 13269043200.0,
            "10765": 13269043200.0,
            "10770": 13269043200.0,
            "10775": 13269043200.0,
            "10780": 13269043200.0,
            "10785": 13269043200.0,
            "10790": 13269043200.0,
            "10795": 13269043200.0,
            "10800": 13269043200.0,
            "10805": 13269043200.0,
            "10810": 13269043200.0,
            "10815": 13269043200.0,
            "10820": 13269043200.0,
            "10825": 13269043200.0,
            "10830": 13269043200.0,
            "10835": 13269043200.0,
            "10840": 13269043200.0,
            "10845": 13269043200.0,
            "10850": 13269043200.0,
            "10855": 13269043200.0,
            "10860": 13269043200.0,
            "10865": 13269043200.0,
            "10870": 13269043200.0,
            "10875": 13269043200.0,
            "10880": 13269043200.0,
            "10885": 13269043200.0,
            "10890": 13269043200.0,
            "10895": 13269043200.0,
            "10900": 13269043200.0,
            "10905": 13269043200.0,
            "10910": 13269043200.0,
            "10915": 13269043200.0,
            "10920": 13269043200.0,
            "10925": 13269043200.0,
            "10930": 13269043200.0,
            "10935": 13269043200.0,
            "10940": 13269043200.0,
            "10945": 13269043200.0,
            "10950": 13269043200.0,
            "10955": 13269043200.0,
            "10960": 13269043200.0,
            "10965": 13269043200.0,
            "10970": 13269043200.0,
            "10975": 13269043200.0,
            "10980": 13269043200.0,
            "10985": 13269043200.0,
            "10990": 13269043200.0,
            "10995": 13269043200.0,
            "11000": 13269043200.0,
            "11005": 13269043200.0,
            "11010": 13269043200.0,
            "11015": 13269043200.0,
            "11020": 13269043200.0,
            "11025": 13269043200.0,
            "11030": 13269043200.0,
            "11035": 13269043200.0,
            "11040": 13269043200.0,
            "11045": 13269043200.0,
            "11050": 13269043200.0,
            "11055": 13269043200.0,
            "11060": 13269043200.0,
            "11065": 13269043200.0,
            "11070": 13269043200.0,
            "11075": 13269043200.0,
            "11080": 13269043200.0,
            "11085": 13269043200.0,
            "11090": 13269043200.0,
            "11095": 13269043200.0,
            "11100": 13269043200.0,
            "11105": 13269043200.0,
            "11110": 13269043200.0,
            "11115": 13269043200.0,
            "11120": 13269043200.0,
            "11125": 13269043200.0,
            "11130": 13269043200.0,
            "11135": 13269043200.0,
            "11140": 13269043200.0,
            "11145": 13269043200.0,
            "11150": 13269043200.0,
            "11155": 13269043200.0,
            "11160": 13269043200.0,
            "11165": 13269043200.0,
            "11170": 13269043200.0,
            "11175": 13269043200.0,
            "11180": 13269043200.0,
            "11185": 13269043200.0,
            "11190": 13269043200.0,
            "11195": 13269043200.0,
            "11200": 13269043200.0,
            "11205": 13269043200.0,
            "11210": 13269043200.0,
            "11215": 13269043200.0,
            "11220": 13269043200.0,
            "11225": 13269043200.0,
            "11230": 13269043200.0,
            "11235": 13269043200.0,
            "11240": 13269043200.0,
            "11245": 13269043200.0,
            "11250": 13269043200.0,
            "11255": 13269043200.0,
            "11260": 13269043200.0,
            "11265": 13269043200.0,
            "11270": 13269043200.0,
            "11275": 13269043200.0,
            "11280": 13269043200.0,
            "11285": 13269043200.0,
            "11290": 13269043200.0,
            "11295": 13269043200.0,
            "11300": 13269043200.0,
            "11305": 13269043200.0,
            "11310": 13269043200.0,
            "11315": 13269043200.0,
            "11320": 13269043200.0,
            "11325": 13269043200.0,
            "11330": 13269043200.0,
            "11335": 13269043200.0,
            "11340": 13269043200.0,
            "11345": 13269043200.0,
            "11350": 13269043200.0,
            "11355": 13269043200.0,
            "11360": 13269043200.0,
            "11365": 13269043200.0,
            "11370": 13269043200.0,
            "11375": 13269043200.0,
            "11380": 13269043200.0,
            "11385": 13269043200.0,
            "11390": 13269043200.0,
            "11395": 13269043200.0,
            "11400": 13269043200.0,
            "11405": 13269043200.0,
            "11410": 13269043200.0,
            "11415": 13269043200.0,
            "11420": 13269043200.0,
            "11425": 13269043200.0,
            "11430": 13269043200.0,
            "11435": 13269043200.0,
            "11440": 13269043200.0,
            "11445": 13269043200.0,
            "11450": 13269043200.0,
            "11455": 13269043200.0,
            "11460": 13269043200.0,
            "11465": 13269043200.0,
            "11470": 13269043200.0,
            "11475": 13269043200.0,
            "11480": 13269043200.0,
            "11485": 13269043200.0,
            "11490": 13269043200.0,
            "11495": 13269043200.0,
            "11500": 13269043200.0,
            "11505": 13269043200.0,
            "11510": 13269043200.0,
            "11515": 13269043200.0,
            "11520": 13269043200.0,
            "11525": 13269043200.0,
            "11530": 13269043200.0,
            "11535": 13269043200.0,
            "11540": 13269043200.0,
            "11545": 13269043200.0,
            "11550": 13269043200.0,
            "11555": 13269043200.0,
            "11560": 13269043200.0,
            "11565": 13269043200.0,
            "11570": 13269043200.0,
            "11575": 13269043200.0,
            "11580": 13269043200.0,
            "11585": 13269043200.0,
            "11590": 13269043200.0,
            "11595": 13269043200.0,
            "11600": 13269043200.0,
            "11605": 13269043200.0,
            "11610": 13269043200.0,
            "11615": 13269043200.0,
            "11620": 13269043200.0,
            "11625": 13269043200.0,
            "11630": 13269043200.0,
            "11635": 13269043200.0,
            "11640": 13269043200.0,
            "11645": 13269043200.0,
            "11650": 13269043200.0,
            "11655": 13269043200.0,
            "11660": 13269043200.0,
            "11665": 13269043200.0,
            "11670": 13269043200.0,
            "11675": 13269043200.0,
            "11680": 13269043200.0,
            "11685": 13269043200.0,
            "11690": 13269043200.0,
            "11695": 13269043200.0,
            "11700": 13269043200.0,
            "11705": 13269043200.0,
            "11710": 13269043200.0,
            "11715": 13269043200.0,
            "11720": 13269043200.0,
            "11725": 13269043200.0,
            "11730": 13269043200.0,
            "11735": 13269043200.0,
            "11740": 13269043200.0,
            "11745": 13269043200.0,
            "11750": 13269043200.0,
            "11755": 13269043200.0,
            "11760": 13269043200.0,
            "11765": 13269043200.0,
            "11770": 13269043200.0,
            "11775": 13269043200.0,
            "11780": 13269043200.0,
            "11785": 13269043200.0,
            "11790": 13269043200.0,
            "11795": 13269043200.0,
            "11800": 13269043200.0,
            "11805": 13269043200.0,
            "11810": 13269043200.0,
            "11815": 13269043200.0,
            "11820": 13269043200.0,
            "11825": 13269043200.0,
            "11830": 13269043200.0,
            "11835": 13269043200.0,
            "11840": 13269043200.0,
            "11845": 13269043200.0,
            "11850": 13269043200.0,
            "11855": 13269043200.0,
            "11860": 13269043200.0,
            "11865": 13269043200.0,
            "11870": 13269043200.0,
            "11875": 13269043200.0,
            "11880": 13269043200.0,
            "11885": 13269043200.0,
            "11890": 13269043200.0,
            "11895": 13269043200.0,
            "11900": 13269043200.0,
            "11905": 13269043200.0,
            "11910": 13269043200.0,
            "11915": 13269043200.0,
            "11920": 13269043200.0,
            "11925": 13269043200.0,
            "11930": 13269043200.0,
            "11935": 13269043200.0,
            "11940": 13269043200.0,
            "11945": 13269043200.0,
            "11950": 13269043200.0,
            "11955": 13269043200.0,
            "11960": 13269043200.0,
            "11965": 13269043200.0,
            "11970": 13269043200.0,
            "11975": 13269043200.0,
            "11980": 13269043200.0,
            "11985": 13269043200.0,
            "11990": 13269043200.0,
            "11995": 13269043200.0,
            "12000": 13269043200.0,
            "12005": 13269043200.0,
            "12010": 13269043200.0,
            "12015": 13269043200.0,
            "12020": 13269043200.0,
            "12025": 13269043200.0,
            "12030": 13269043200.0,
            "12035": 13269043200.0,
            "12040": 13269043200.0,
            "12045": 13269043200.0,
            "12050": 13269043200.0,
            "12055": 13269043200.0,
            "12060": 13269043200.0,
            "12065": 13269043200.0,
            "12070": 13269043200.0,
            "12075": 13269043200.0,
            "12080": 13269043200.0,
            "12085": 13269043200.0,
            "12090": 13269043200.0,
            "12095": 13269043200.0,
            "12100": 13269043200.0,
            "12105": 13269043200.0,
            "12110": 13269043200.0,
            "12115": 13269043200.0,
            "12120": 13269043200.0,
            "12125": 13269043200.0,
            "12130": 13269043200.0,
            "12135": 13269043200.0,
            "12140": 13269043200.0,
            "12145": 13269043200.0,
            "12150": 13269043200.0,
            "12155": 13269043200.0,
            "12160": 13269043200.0,
            "12165": 13269043200.0,
            "12170": 13269043200.0,
            "12175": 13269043200.0,
            "12180": 13269043200.0,
            "12185": 13269043200.0,
            "12190": 13269043200.0,
            "12195": 13269043200.0,
            "12200": 13269043200.0,
            "12205": 13269043200.0,
            "12210": 13269043200.0,
            "12215": 13269043200.0,
            "12220": 13269043200.0,
            "12225": 13269043200.0,
            "12230": 13269043200.0,
            "12235": 13269043200.0,
            "12240": 13269043200.0,
            "12245": 13269043200.0,
            "12250": 13269043200.0,
            "12255": 13269043200.0,
            "12260": 13269043200.0,
            "12265": 13269043200.0,
            "12270": 13269043200.0,
            "12275": 13269043200.0,
            "12280": 13269043200.0,
            "12285": 13269043200.0,
            "12290": 13269043200.0,
            "12295": 13269043200.0,
            "12300": 13269043200.0,
            "12305": 13269043200.0,
            "12310": 13269043200.0,
            "12315": 13269043200.0,
            "12320": 13269043200.0,
            "12325": 13269043200.0,
            "12330": 13269043200.0,
            "12335": 13269043200.0,
            "12340": 13269043200.0,
            "12345": 13269043200.0,
            "12350": 13269043200.0,
            "12355": 13269043200.0,
            "12360": 13269043200.0,
            "12365": 13269043200.0,
            "12370": 13269043200.0,
            "12375": 13269043200.0,
            "12380": 13269043200.0,
            "12385": 13269043200.0,
            "12390": 13269043200.0,
            "12395": 13269043200.0,
            "12400": 13269043200.0,
            "12405": 13269043200.0,
            "12410": 13269043200.0,
            "12415": 13269043200.0,
            "12420": 13269043200.0,
            "12425": 13269043200.0,
            "12430": 13269043200.0,
            "12435": 13269043200.0,
            "12440": 13269043200.0,
            "12445": 13269043200.0,
            "12450": 13269043200.0,
            "12455": 13269043200.0,
            "12460": 13269043200.0,
            "12465": 13269043200.0,
            "12470": 13269043200.0,
            "12475": 13269043200.0,
            "12480": 13269043200.0,
            "12485": 13269043200.0,
            "12490": 13269043200.0,
            "12495": 13269043200.0,
            "12500": 13269043200.0,
            "12505": 13269043200.0,
            "12510": 13269043200.0,
            "12515": 13269043200.0,
            "12520": 13269043200.0,
            "12525": 13269043200.0,
            "12530": 13269043200.0,
            "12535": 13269043200.0,
            "12540": 13269043200.0,
            "12545": 13269043200.0,
            "12550": 13269043200.0,
            "12555": 13269043200.0,
            "12560": 13269043200.0,
            "12565": 13269043200.0,
            "12570": 13269043200.0,
            "12575": 13269043200.0,
            "12580": 13269043200.0,
            "12585": 13269043200.0,
            "12590": 13269043200.0,
            "12595": 13269043200.0,
            "12600": 13269043200.0,
            "12605": 13269043200.0,
            "12610": 13269043200.0,
            "12615": 13269043200.0,
            "12620": 13269043200.0,
            "12625": 13269043200.0,
            "12630": 13269043200.0,
            "12635": 13269043200.0,
            "12640": 13269043200.0,
            "12645": 13269043200.0,
            "12650": 13269043200.0,
            "12655": 13269043200.0,
            "12660": 13269043200.0,
            "12665": 13269043200.0,
            "12670": 13269043200.0,
            "12675": 13269043200.0,
            "12680": 13269043200.0,
            "12685": 13269043200.0,
            "12690": 13269043200.0,
            "12695": 13269043200.0,
            "12700": 13269043200.0,
            "12705": 13269043200.0,
            "12710": 13269043200.0,
            "12715": 13269043200.0,
            "12720": "nan",
            "12725": "nan",
            "12730": "nan",
            "12735": "nan",
            "12740": "nan",
            "12745": "nan",
            "12750": "nan",
            "12755": "nan",
            "12760": "nan",
            "12765": "nan",
            "12770": "nan",
            "12775": "nan",
            "12780": "nan",
            "12785": "nan",
            "12790": "nan",
            "12795": "nan",
            "12800": "nan",
            "12805": "nan",
            "12810": "nan",
            "12815": "nan",
            "12820": "nan",
            "12825": "nan",
            "12830": "nan",
            "12835": "nan",
            "12840": "nan",
            "12845": "nan",
            "12850": "nan",
            "12855": "nan",
            "12860": "nan",
            "12865": "nan",
            "12870": "nan",
            "12875": "nan",
            "12880": "nan",
            "12885": "nan",
            "12890": "nan",
            "12895": "nan",
            "12900": "nan",
            "12905": "nan",
            "12910": "nan",
            "12915": "nan",
            "12920": "nan",
            "12925": "nan",
            "12930": "nan",
            "12935": "nan",
            "12940": "nan",
            "12945": "nan",
            "12950": "nan",
            "12955": "nan",
            "12960": "nan",
            "12965": "nan",
            "12970": "nan",
            "12975": "nan",
            "12980": "nan",
            "12985": "nan",
            "12990": "nan",
            "12995": "nan",
            "13000": "nan"
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 13000,
        "step_interval": 5,
        "values": {
            "1": 27658764288.0,
            "5": 28157253632.0,
            "10": 28157253632.0,
            "15": 28157253632.0,
            "20": 28157253632.0,
            "25": 28157253632.0,
            "30": 28157253632.0,
            "35": 28157253632.0,
            "40": 28157253632.0,
            "45": 28157253632.0,
            "50": 28157253632.0,
            "55": 28157253632.0,
            "60": 28157253632.0,
            "65": 28157253632.0,
            "70": 28157253632.0,
            "75": 28157253632.0,
            "80": 28157253632.0,
            "85": 28157253632.0,
            "90": 28157253632.0,
            "95": 28157253632.0,
            "100": 28157253632.0,
            "105": 28157253632.0,
            "110": 28157253632.0,
            "115": 28157253632.0,
            "120": 28157253632.0,
            "125": 28157253632.0,
            "130": 28157253632.0,
            "135": 28157253632.0,
            "140": 28157253632.0,
            "145": 28157253632.0,
            "150": 28157253632.0,
            "155": 28157253632.0,
            "160": 28157253632.0,
            "165": 28157253632.0,
            "170": 28157253632.0,
            "175": 28157253632.0,
            "180": 28157253632.0,
            "185": 28157253632.0,
            "190": 28157253632.0,
            "195": 28157253632.0,
            "200": 28157253632.0,
            "205": 28157253632.0,
            "210": 28157253632.0,
            "215": 28157253632.0,
            "220": 28157253632.0,
            "225": 28157253632.0,
            "230": 28157253632.0,
            "235": 28157253632.0,
            "240": 28157253632.0,
            "245": 28157253632.0,
            "250": 28157253632.0,
            "255": 28157253632.0,
            "260": 28157253632.0,
            "265": 28157253632.0,
            "270": 28157253632.0,
            "275": 28157253632.0,
            "280": 28157253632.0,
            "285": 28157253632.0,
            "290": 28157253632.0,
            "295": 28157253632.0,
            "300": 28157253632.0,
            "305": 28157253632.0,
            "310": 28157253632.0,
            "315": 28157253632.0,
            "320": 28157253632.0,
            "325": 28157253632.0,
            "330": 28157253632.0,
            "335": 28157253632.0,
            "340": 28157253632.0,
            "345": 28157253632.0,
            "350": 28157253632.0,
            "355": 28157253632.0,
            "360": 28157253632.0,
            "365": 28157253632.0,
            "370": 28157253632.0,
            "375": 28157253632.0,
            "380": 28157253632.0,
            "385": 28157253632.0,
            "390": 28157253632.0,
            "395": 28157253632.0,
            "400": 28157253632.0,
            "405": 28157253632.0,
            "410": 28157253632.0,
            "415": 28157253632.0,
            "420": 28157253632.0,
            "425": 28157253632.0,
            "430": 28157253632.0,
            "435": 28157253632.0,
            "440": 28157253632.0,
            "445": 28157253632.0,
            "450": 28157253632.0,
            "455": 28157253632.0,
            "460": 28157253632.0,
            "465": 28157253632.0,
            "470": 28157253632.0,
            "475": 28157253632.0,
            "480": 28157253632.0,
            "485": 28157253632.0,
            "490": 28157253632.0,
            "495": 28157253632.0,
            "500": 28157253632.0,
            "505": 28157253632.0,
            "510": 28157253632.0,
            "515": 28157253632.0,
            "520": 28157253632.0,
            "525": 28157253632.0,
            "530": 28157253632.0,
            "535": 28157253632.0,
            "540": 28157253632.0,
            "545": 28157253632.0,
            "550": 28157253632.0,
            "555": 28157253632.0,
            "560": 28157253632.0,
            "565": 28157253632.0,
            "570": 28157253632.0,
            "575": 28157253632.0,
            "580": 28157253632.0,
            "585": 28157253632.0,
            "590": 28157253632.0,
            "595": 28157253632.0,
            "600": 28157253632.0,
            "605": 28157253632.0,
            "610": 28157253632.0,
            "615": 28157253632.0,
            "620": 28157253632.0,
            "625": 28157253632.0,
            "630": 28157253632.0,
            "635": 28157253632.0,
            "640": 28157253632.0,
            "645": 28157253632.0,
            "650": 28157253632.0,
            "655": 28157253632.0,
            "660": 28157253632.0,
            "665": 28157253632.0,
            "670": 28157253632.0,
            "675": 28157253632.0,
            "680": 28157253632.0,
            "685": 28157253632.0,
            "690": 28157253632.0,
            "695": 28157253632.0,
            "700": 28157253632.0,
            "705": 28157253632.0,
            "710": 28157253632.0,
            "715": 28157253632.0,
            "720": 28157253632.0,
            "725": 28157253632.0,
            "730": 28157253632.0,
            "735": 28157253632.0,
            "740": 28157253632.0,
            "745": 28157253632.0,
            "750": 28157253632.0,
            "755": 28157253632.0,
            "760": 28157253632.0,
            "765": 28157253632.0,
            "770": 28157253632.0,
            "775": 28157253632.0,
            "780": 28157253632.0,
            "785": 28157253632.0,
            "790": 28157253632.0,
            "795": 28157253632.0,
            "800": 28157253632.0,
            "805": 28157253632.0,
            "810": 28157253632.0,
            "815": 28157253632.0,
            "820": 28157253632.0,
            "825": 28157253632.0,
            "830": 28157253632.0,
            "835": 28157253632.0,
            "840": 28157253632.0,
            "845": 28157253632.0,
            "850": 28157253632.0,
            "855": 28157253632.0,
            "860": 28157253632.0,
            "865": 28157253632.0,
            "870": 28157253632.0,
            "875": 28157253632.0,
            "880": 28157253632.0,
            "885": 28157253632.0,
            "890": 28157253632.0,
            "895": 28157253632.0,
            "900": 28157253632.0,
            "905": 28157253632.0,
            "910": 28157253632.0,
            "915": 28157253632.0,
            "920": 28157253632.0,
            "925": 28157253632.0,
            "930": 28157253632.0,
            "935": 28157253632.0,
            "940": 28157253632.0,
            "945": 28157253632.0,
            "950": 28157253632.0,
            "955": 28157253632.0,
            "960": 28157253632.0,
            "965": 28157253632.0,
            "970": 28157253632.0,
            "975": 28157253632.0,
            "980": 28157253632.0,
            "985": 28157253632.0,
            "990": 28157253632.0,
            "995": 28157253632.0,
            "1000": 28157253632.0,
            "1005": 28157253632.0,
            "1010": 28157253632.0,
            "1015": 28157253632.0,
            "1020": 28157253632.0,
            "1025": 28157253632.0,
            "1030": 28157253632.0,
            "1035": 28157253632.0,
            "1040": 28157253632.0,
            "1045": 28157253632.0,
            "1050": 28157253632.0,
            "1055": 28157253632.0,
            "1060": 28157253632.0,
            "1065": 28157253632.0,
            "1070": 28157253632.0,
            "1075": 28157253632.0,
            "1080": 28157253632.0,
            "1085": 28157253632.0,
            "1090": 28157253632.0,
            "1095": 28157253632.0,
            "1100": 28157253632.0,
            "1105": 28157253632.0,
            "1110": 28157253632.0,
            "1115": 28157253632.0,
            "1120": 28157253632.0,
            "1125": 28157253632.0,
            "1130": 28157253632.0,
            "1135": 28157253632.0,
            "1140": 28157253632.0,
            "1145": 28157253632.0,
            "1150": 28157253632.0,
            "1155": 28157253632.0,
            "1160": 28157253632.0,
            "1165": 28157253632.0,
            "1170": 28157253632.0,
            "1175": 28157253632.0,
            "1180": 28157253632.0,
            "1185": 28157253632.0,
            "1190": 28157253632.0,
            "1195": 28157253632.0,
            "1200": 28157253632.0,
            "1205": 28157253632.0,
            "1210": 28157253632.0,
            "1215": 28157253632.0,
            "1220": 28157253632.0,
            "1225": 28157253632.0,
            "1230": 28157253632.0,
            "1235": 28157253632.0,
            "1240": 28157253632.0,
            "1245": 28157253632.0,
            "1250": 28157253632.0,
            "1255": 28157253632.0,
            "1260": 28157253632.0,
            "1265": 28157253632.0,
            "1270": 28157253632.0,
            "1275": 28157253632.0,
            "1280": 28157253632.0,
            "1285": 28157253632.0,
            "1290": 28157253632.0,
            "1295": 28157253632.0,
            "1300": 28157253632.0,
            "1305": 28157253632.0,
            "1310": 28157253632.0,
            "1315": 28157253632.0,
            "1320": 28157253632.0,
            "1325": 28157253632.0,
            "1330": 28157253632.0,
            "1335": 28157253632.0,
            "1340": 28157253632.0,
            "1345": 28157253632.0,
            "1350": 28157253632.0,
            "1355": 28157253632.0,
            "1360": 28157253632.0,
            "1365": 28157253632.0,
            "1370": 28157253632.0,
            "1375": 28157253632.0,
            "1380": 28157253632.0,
            "1385": 28157253632.0,
            "1390": 28157253632.0,
            "1395": 28157253632.0,
            "1400": 28157253632.0,
            "1405": 28157253632.0,
            "1410": 28157253632.0,
            "1415": 28157253632.0,
            "1420": 28157253632.0,
            "1425": 28157253632.0,
            "1430": 28157253632.0,
            "1435": 28157253632.0,
            "1440": 28157253632.0,
            "1445": 28157253632.0,
            "1450": 28157253632.0,
            "1455": 28157253632.0,
            "1460": 28157253632.0,
            "1465": 28157253632.0,
            "1470": 28157253632.0,
            "1475": 28157253632.0,
            "1480": 28157253632.0,
            "1485": 28157253632.0,
            "1490": 28157253632.0,
            "1495": 28157253632.0,
            "1500": 28157253632.0,
            "1505": 28157253632.0,
            "1510": 28157253632.0,
            "1515": 28157253632.0,
            "1520": 28157253632.0,
            "1525": 28157253632.0,
            "1530": 28157253632.0,
            "1535": 28157253632.0,
            "1540": 28157253632.0,
            "1545": 28157253632.0,
            "1550": 28157253632.0,
            "1555": 28157253632.0,
            "1560": 28157253632.0,
            "1565": 28157253632.0,
            "1570": 28157253632.0,
            "1575": 28157253632.0,
            "1580": 28157253632.0,
            "1585": 28157253632.0,
            "1590": 28157253632.0,
            "1595": 28157253632.0,
            "1600": 28157253632.0,
            "1605": 28157253632.0,
            "1610": 28157253632.0,
            "1615": 28157253632.0,
            "1620": 28157253632.0,
            "1625": 28157253632.0,
            "1630": 28157253632.0,
            "1635": 28157253632.0,
            "1640": 28157253632.0,
            "1645": 28157253632.0,
            "1650": 28157253632.0,
            "1655": 28157253632.0,
            "1660": 28157253632.0,
            "1665": 28157253632.0,
            "1670": 28157253632.0,
            "1675": 28157253632.0,
            "1680": 28157253632.0,
            "1685": 28157253632.0,
            "1690": 28157253632.0,
            "1695": 28157253632.0,
            "1700": 28157253632.0,
            "1705": 28157253632.0,
            "1710": 28157253632.0,
            "1715": 28157253632.0,
            "1720": 28157253632.0,
            "1725": 28157253632.0,
            "1730": 28157253632.0,
            "1735": 28157253632.0,
            "1740": 28157253632.0,
            "1745": 28157253632.0,
            "1750": 28157253632.0,
            "1755": 28157253632.0,
            "1760": 28157253632.0,
            "1765": 28157253632.0,
            "1770": 28157253632.0,
            "1775": 28157253632.0,
            "1780": 28157253632.0,
            "1785": 28157253632.0,
            "1790": 28157253632.0,
            "1795": 28157253632.0,
            "1800": 28157253632.0,
            "1805": 28157253632.0,
            "1810": 28157253632.0,
            "1815": 28157253632.0,
            "1820": 28157253632.0,
            "1825": 28157253632.0,
            "1830": 28157253632.0,
            "1835": 28157253632.0,
            "1840": 28157253632.0,
            "1845": 28157253632.0,
            "1850": 28157253632.0,
            "1855": 28157253632.0,
            "1860": 28157253632.0,
            "1865": 28157253632.0,
            "1870": 28157253632.0,
            "1875": 28157253632.0,
            "1880": 28157253632.0,
            "1885": 28157253632.0,
            "1890": 28157253632.0,
            "1895": 28157253632.0,
            "1900": 28157253632.0,
            "1905": 28157253632.0,
            "1910": 28157253632.0,
            "1915": 28157253632.0,
            "1920": 28157253632.0,
            "1925": 28157253632.0,
            "1930": 28157253632.0,
            "1935": 28157253632.0,
            "1940": 28157253632.0,
            "1945": 28157253632.0,
            "1950": 28157253632.0,
            "1955": 28157253632.0,
            "1960": 28157253632.0,
            "1965": 28157253632.0,
            "1970": 28157253632.0,
            "1975": 28157253632.0,
            "1980": 28157253632.0,
            "1985": 28157253632.0,
            "1990": 28157253632.0,
            "1995": 28157253632.0,
            "2000": 28157253632.0,
            "2005": 28157253632.0,
            "2010": 28157253632.0,
            "2015": 28157253632.0,
            "2020": 28157253632.0,
            "2025": 28157253632.0,
            "2030": 28157253632.0,
            "2035": 28157253632.0,
            "2040": 28157253632.0,
            "2045": 28157253632.0,
            "2050": 28157253632.0,
            "2055": 28157253632.0,
            "2060": 28157253632.0,
            "2065": 28157253632.0,
            "2070": 28157253632.0,
            "2075": 28157253632.0,
            "2080": 28157253632.0,
            "2085": 28157253632.0,
            "2090": 28157253632.0,
            "2095": 28157253632.0,
            "2100": 28157253632.0,
            "2105": 28157253632.0,
            "2110": 28157253632.0,
            "2115": 28157253632.0,
            "2120": 28157253632.0,
            "2125": 28157253632.0,
            "2130": 28157253632.0,
            "2135": 28157253632.0,
            "2140": 28157253632.0,
            "2145": 28157253632.0,
            "2150": 28157253632.0,
            "2155": 28157253632.0,
            "2160": 28157253632.0,
            "2165": 28157253632.0,
            "2170": 28157253632.0,
            "2175": 28157253632.0,
            "2180": 28157253632.0,
            "2185": 28157253632.0,
            "2190": 28157253632.0,
            "2195": 28157253632.0,
            "2200": 28157253632.0,
            "2205": 28157253632.0,
            "2210": 28157253632.0,
            "2215": 28157253632.0,
            "2220": 28157253632.0,
            "2225": 28157253632.0,
            "2230": 28157253632.0,
            "2235": 28157253632.0,
            "2240": 28157253632.0,
            "2245": 28157253632.0,
            "2250": 28157253632.0,
            "2255": 28157253632.0,
            "2260": 28157253632.0,
            "2265": 28157253632.0,
            "2270": 28157253632.0,
            "2275": 28157253632.0,
            "2280": 28157253632.0,
            "2285": 28157253632.0,
            "2290": 28157253632.0,
            "2295": 28157253632.0,
            "2300": 28157253632.0,
            "2305": 28157253632.0,
            "2310": 28157253632.0,
            "2315": 28157253632.0,
            "2320": 28157253632.0,
            "2325": 28157253632.0,
            "2330": 28157253632.0,
            "2335": 28157253632.0,
            "2340": 28157253632.0,
            "2345": 28157253632.0,
            "2350": 28157253632.0,
            "2355": 28157253632.0,
            "2360": 28157253632.0,
            "2365": 28157253632.0,
            "2370": 28157253632.0,
            "2375": 28157253632.0,
            "2380": 28157253632.0,
            "2385": 28157253632.0,
            "2390": 28157253632.0,
            "2395": 28157253632.0,
            "2400": 28157253632.0,
            "2405": 28157253632.0,
            "2410": 28157253632.0,
            "2415": 28157253632.0,
            "2420": 28157253632.0,
            "2425": 28157253632.0,
            "2430": 28157253632.0,
            "2435": 28157253632.0,
            "2440": 28157253632.0,
            "2445": 28157253632.0,
            "2450": 28157253632.0,
            "2455": 28157253632.0,
            "2460": 28157253632.0,
            "2465": 28157253632.0,
            "2470": 28157253632.0,
            "2475": 28157253632.0,
            "2480": 28157253632.0,
            "2485": 28157253632.0,
            "2490": 28157253632.0,
            "2495": 28157253632.0,
            "2500": 28157253632.0,
            "2505": 28157253632.0,
            "2510": 28157253632.0,
            "2515": 28157253632.0,
            "2520": 28157253632.0,
            "2525": 28157253632.0,
            "2530": 28157253632.0,
            "2535": 28157253632.0,
            "2540": 28157253632.0,
            "2545": 28157253632.0,
            "2550": 28157253632.0,
            "2555": 28157253632.0,
            "2560": 28157253632.0,
            "2565": 28157253632.0,
            "2570": 28157253632.0,
            "2575": 28157253632.0,
            "2580": 28157253632.0,
            "2585": 28157253632.0,
            "2590": 28157253632.0,
            "2595": 28157253632.0,
            "2600": 28157253632.0,
            "2605": 28157253632.0,
            "2610": 28157253632.0,
            "2615": 28157253632.0,
            "2620": 28157253632.0,
            "2625": 28157253632.0,
            "2630": 28157253632.0,
            "2635": 28157253632.0,
            "2640": 28157253632.0,
            "2645": 28157253632.0,
            "2650": 28157253632.0,
            "2655": 28157253632.0,
            "2660": 28157253632.0,
            "2665": 28157253632.0,
            "2670": 28157253632.0,
            "2675": 28157253632.0,
            "2680": 28157253632.0,
            "2685": 28157253632.0,
            "2690": 28157253632.0,
            "2695": 28157253632.0,
            "2700": 28157253632.0,
            "2705": 28157253632.0,
            "2710": 28157253632.0,
            "2715": 28157253632.0,
            "2720": 28157253632.0,
            "2725": 28157253632.0,
            "2730": 28157253632.0,
            "2735": 28157253632.0,
            "2740": 28157253632.0,
            "2745": 28157253632.0,
            "2750": 28157253632.0,
            "2755": 28157253632.0,
            "2760": 28157253632.0,
            "2765": 28157253632.0,
            "2770": 28157253632.0,
            "2775": 28157253632.0,
            "2780": 28157253632.0,
            "2785": 28157253632.0,
            "2790": 28157253632.0,
            "2795": 28157253632.0,
            "2800": 28157253632.0,
            "2805": 28157253632.0,
            "2810": 28157253632.0,
            "2815": 28157253632.0,
            "2820": 28157253632.0,
            "2825": 28157253632.0,
            "2830": 28157253632.0,
            "2835": 28157253632.0,
            "2840": 28157253632.0,
            "2845": 28157253632.0,
            "2850": 28157253632.0,
            "2855": 28157253632.0,
            "2860": 28157253632.0,
            "2865": 28157253632.0,
            "2870": 28157253632.0,
            "2875": 28157253632.0,
            "2880": 28157253632.0,
            "2885": 28157253632.0,
            "2890": 28157253632.0,
            "2895": 28157253632.0,
            "2900": 28157253632.0,
            "2905": 28157253632.0,
            "2910": 28157253632.0,
            "2915": 28157253632.0,
            "2920": 28157253632.0,
            "2925": 28157253632.0,
            "2930": 28157253632.0,
            "2935": 28157253632.0,
            "2940": 28157253632.0,
            "2945": 28157253632.0,
            "2950": 28157253632.0,
            "2955": 28157253632.0,
            "2960": 28157253632.0,
            "2965": 28157253632.0,
            "2970": 28157253632.0,
            "2975": 28157253632.0,
            "2980": 28157253632.0,
            "2985": 28157253632.0,
            "2990": 28157253632.0,
            "2995": 28157253632.0,
            "3000": 28157253632.0,
            "3005": 28157253632.0,
            "3010": 28157253632.0,
            "3015": 28157253632.0,
            "3020": 28157253632.0,
            "3025": 28157253632.0,
            "3030": 28157253632.0,
            "3035": 28157253632.0,
            "3040": 28157253632.0,
            "3045": 28157253632.0,
            "3050": 28157253632.0,
            "3055": 28157253632.0,
            "3060": 28157253632.0,
            "3065": 28157253632.0,
            "3070": 28157253632.0,
            "3075": 28157253632.0,
            "3080": 28157253632.0,
            "3085": 28157253632.0,
            "3090": 28157253632.0,
            "3095": 28157253632.0,
            "3100": 28157253632.0,
            "3105": 28157253632.0,
            "3110": 28157253632.0,
            "3115": 28157253632.0,
            "3120": 28157253632.0,
            "3125": 28157253632.0,
            "3130": 28157253632.0,
            "3135": 28157253632.0,
            "3140": 28157253632.0,
            "3145": 28157253632.0,
            "3150": 28157253632.0,
            "3155": 28157253632.0,
            "3160": 28157253632.0,
            "3165": 28157253632.0,
            "3170": 28157253632.0,
            "3175": 28157253632.0,
            "3180": 28157253632.0,
            "3185": 28157253632.0,
            "3190": 28157253632.0,
            "3195": 28157253632.0,
            "3200": 28157253632.0,
            "3205": 28157253632.0,
            "3210": 28157253632.0,
            "3215": 28157253632.0,
            "3220": 28157253632.0,
            "3225": 28157253632.0,
            "3230": 28157253632.0,
            "3235": 28157253632.0,
            "3240": 28157253632.0,
            "3245": 28157253632.0,
            "3250": 28157253632.0,
            "3255": 28157253632.0,
            "3260": 28157253632.0,
            "3265": 28157253632.0,
            "3270": 28157253632.0,
            "3275": 28157253632.0,
            "3280": 28157253632.0,
            "3285": 28157253632.0,
            "3290": 28157253632.0,
            "3295": 28157253632.0,
            "3300": 28157253632.0,
            "3305": 28157253632.0,
            "3310": 28157253632.0,
            "3315": 28157253632.0,
            "3320": 28157253632.0,
            "3325": 28157253632.0,
            "3330": 28157253632.0,
            "3335": 28157253632.0,
            "3340": 28157253632.0,
            "3345": 28157253632.0,
            "3350": 28157253632.0,
            "3355": 28157253632.0,
            "3360": 28157253632.0,
            "3365": 28157253632.0,
            "3370": 28157253632.0,
            "3375": 28157253632.0,
            "3380": 28157253632.0,
            "3385": 28157253632.0,
            "3390": 28157253632.0,
            "3395": 28157253632.0,
            "3400": 28157253632.0,
            "3405": 28157253632.0,
            "3410": 28157253632.0,
            "3415": 28157253632.0,
            "3420": 28157253632.0,
            "3425": 28157253632.0,
            "3430": 28157253632.0,
            "3435": 28157253632.0,
            "3440": 28157253632.0,
            "3445": 28157253632.0,
            "3450": 28157253632.0,
            "3455": 28157253632.0,
            "3460": 28157253632.0,
            "3465": 28157253632.0,
            "3470": 28157253632.0,
            "3475": 28157253632.0,
            "3480": 28157253632.0,
            "3485": 28157253632.0,
            "3490": 28157253632.0,
            "3495": 28157253632.0,
            "3500": 28157253632.0,
            "3505": 28157253632.0,
            "3510": 28157253632.0,
            "3515": 28157253632.0,
            "3520": 28157253632.0,
            "3525": 28157253632.0,
            "3530": 28157253632.0,
            "3535": 28157253632.0,
            "3540": 28157253632.0,
            "3545": 28157253632.0,
            "3550": 28157253632.0,
            "3555": 28157253632.0,
            "3560": 28157253632.0,
            "3565": 28157253632.0,
            "3570": 28157253632.0,
            "3575": 28157253632.0,
            "3580": 28157253632.0,
            "3585": 28157253632.0,
            "3590": 28157253632.0,
            "3595": 28157253632.0,
            "3600": 28157253632.0,
            "3605": 28157253632.0,
            "3610": 28157253632.0,
            "3615": 28157253632.0,
            "3620": 28157253632.0,
            "3625": 28157253632.0,
            "3630": 28157253632.0,
            "3635": 28157253632.0,
            "3640": 28157253632.0,
            "3645": 28157253632.0,
            "3650": 28157253632.0,
            "3655": 28157253632.0,
            "3660": 28157253632.0,
            "3665": 28157253632.0,
            "3670": 28157253632.0,
            "3675": 28157253632.0,
            "3680": 28157253632.0,
            "3685": 28157253632.0,
            "3690": 28157253632.0,
            "3695": 28157253632.0,
            "3700": 28157253632.0,
            "3705": 28157253632.0,
            "3710": 28157253632.0,
            "3715": 28157253632.0,
            "3720": 28157253632.0,
            "3725": 28157253632.0,
            "3730": 28157253632.0,
            "3735": 28157253632.0,
            "3740": 28157253632.0,
            "3745": 28157253632.0,
            "3750": 28157253632.0,
            "3755": 28157253632.0,
            "3760": 28157253632.0,
            "3765": 28157253632.0,
            "3770": 28157253632.0,
            "3775": 28157253632.0,
            "3780": 28157253632.0,
            "3785": 28157253632.0,
            "3790": 28157253632.0,
            "3795": 28157253632.0,
            "3800": 28157253632.0,
            "3805": 28157253632.0,
            "3810": 28157253632.0,
            "3815": 28157253632.0,
            "3820": 28157253632.0,
            "3825": 28157253632.0,
            "3830": 28157253632.0,
            "3835": 28157253632.0,
            "3840": 28157253632.0,
            "3845": 28157253632.0,
            "3850": 28157253632.0,
            "3855": 28157253632.0,
            "3860": 28157253632.0,
            "3865": 28157253632.0,
            "3870": 28157253632.0,
            "3875": 28157253632.0,
            "3880": 28157253632.0,
            "3885": 28157253632.0,
            "3890": 28157253632.0,
            "3895": 28157253632.0,
            "3900": 28157253632.0,
            "3905": 28157253632.0,
            "3910": 28157253632.0,
            "3915": 28157253632.0,
            "3920": 28157253632.0,
            "3925": 28157253632.0,
            "3930": 28157253632.0,
            "3935": 28157253632.0,
            "3940": 28157253632.0,
            "3945": 28157253632.0,
            "3950": 28157253632.0,
            "3955": 28157253632.0,
            "3960": 28157253632.0,
            "3965": 28157253632.0,
            "3970": 28157253632.0,
            "3975": 28157253632.0,
            "3980": 28157253632.0,
            "3985": 28157253632.0,
            "3990": 28157253632.0,
            "3995": 28157253632.0,
            "4000": 28157253632.0,
            "4005": 28157253632.0,
            "4010": 28157253632.0,
            "4015": 28157253632.0,
            "4020": 28157253632.0,
            "4025": 28157253632.0,
            "4030": 28157253632.0,
            "4035": 28157253632.0,
            "4040": 28157253632.0,
            "4045": 28157253632.0,
            "4050": 28157253632.0,
            "4055": 28157253632.0,
            "4060": 28157253632.0,
            "4065": 28157253632.0,
            "4070": 28157253632.0,
            "4075": 28157253632.0,
            "4080": 28157253632.0,
            "4085": 28157253632.0,
            "4090": 28157253632.0,
            "4095": 28157253632.0,
            "4100": 28157253632.0,
            "4105": 28157253632.0,
            "4110": 28157253632.0,
            "4115": 28157253632.0,
            "4120": 28157253632.0,
            "4125": 28157253632.0,
            "4130": 28157253632.0,
            "4135": 28157253632.0,
            "4140": 28157253632.0,
            "4145": 28157253632.0,
            "4150": 28157253632.0,
            "4155": 28157253632.0,
            "4160": 28157253632.0,
            "4165": 28157253632.0,
            "4170": 28157253632.0,
            "4175": 28157253632.0,
            "4180": 28157253632.0,
            "4185": 28157253632.0,
            "4190": 28157253632.0,
            "4195": 28157253632.0,
            "4200": 28157253632.0,
            "4205": 28157253632.0,
            "4210": 28157253632.0,
            "4215": 28157253632.0,
            "4220": 28157253632.0,
            "4225": 28157253632.0,
            "4230": 28157253632.0,
            "4235": 28157253632.0,
            "4240": 28157253632.0,
            "4245": 28157253632.0,
            "4250": 28157253632.0,
            "4255": 28157253632.0,
            "4260": 28157253632.0,
            "4265": 28157253632.0,
            "4270": 28157253632.0,
            "4275": 28157253632.0,
            "4280": 28157253632.0,
            "4285": 28157253632.0,
            "4290": 28157253632.0,
            "4295": 28157253632.0,
            "4300": 28157253632.0,
            "4305": 28157253632.0,
            "4310": 28157253632.0,
            "4315": 28157253632.0,
            "4320": 28157253632.0,
            "4325": 28157253632.0,
            "4330": 28157253632.0,
            "4335": 28157253632.0,
            "4340": 28157253632.0,
            "4345": 28157253632.0,
            "4350": 28157253632.0,
            "4355": 28157253632.0,
            "4360": 28157253632.0,
            "4365": 28157253632.0,
            "4370": 28157253632.0,
            "4375": 28157253632.0,
            "4380": 28157253632.0,
            "4385": 28157253632.0,
            "4390": 28157253632.0,
            "4395": 28157253632.0,
            "4400": 28157253632.0,
            "4405": 28157253632.0,
            "4410": 28157253632.0,
            "4415": 28157253632.0,
            "4420": 28157253632.0,
            "4425": 28157253632.0,
            "4430": 28157253632.0,
            "4435": 28157253632.0,
            "4440": 28157253632.0,
            "4445": 28157253632.0,
            "4450": 28157253632.0,
            "4455": 28157253632.0,
            "4460": 28157253632.0,
            "4465": 28157253632.0,
            "4470": 28157253632.0,
            "4475": 28157253632.0,
            "4480": 28157253632.0,
            "4485": 28157253632.0,
            "4490": 28157253632.0,
            "4495": 28157253632.0,
            "4500": 28157253632.0,
            "4505": 28157253632.0,
            "4510": 28157253632.0,
            "4515": 28157253632.0,
            "4520": 28157253632.0,
            "4525": 28157253632.0,
            "4530": 28157253632.0,
            "4535": 28157253632.0,
            "4540": 28157253632.0,
            "4545": 28157253632.0,
            "4550": 28157253632.0,
            "4555": 28157253632.0,
            "4560": 28157253632.0,
            "4565": 28157253632.0,
            "4570": 28157253632.0,
            "4575": 28157253632.0,
            "4580": 28157253632.0,
            "4585": 28157253632.0,
            "4590": 28157253632.0,
            "4595": 28157253632.0,
            "4600": 28157253632.0,
            "4605": 28157253632.0,
            "4610": 28157253632.0,
            "4615": 28157253632.0,
            "4620": 28157253632.0,
            "4625": 28157253632.0,
            "4630": 28157253632.0,
            "4635": 28157253632.0,
            "4640": 28157253632.0,
            "4645": 28157253632.0,
            "4650": 28157253632.0,
            "4655": 28157253632.0,
            "4660": 28157253632.0,
            "4665": 28157253632.0,
            "4670": 28157253632.0,
            "4675": 28157253632.0,
            "4680": 28157253632.0,
            "4685": 28157253632.0,
            "4690": 28157253632.0,
            "4695": 28157253632.0,
            "4700": 28157253632.0,
            "4705": 28157253632.0,
            "4710": 28157253632.0,
            "4715": 28157253632.0,
            "4720": 28157253632.0,
            "4725": 28157253632.0,
            "4730": 28157253632.0,
            "4735": 28157253632.0,
            "4740": 28157253632.0,
            "4745": 28157253632.0,
            "4750": 28157253632.0,
            "4755": 28157253632.0,
            "4760": 28157253632.0,
            "4765": 28157253632.0,
            "4770": 28157253632.0,
            "4775": 28157253632.0,
            "4780": 28157253632.0,
            "4785": 28157253632.0,
            "4790": 28157253632.0,
            "4795": 28157253632.0,
            "4800": 28157253632.0,
            "4805": 28157253632.0,
            "4810": 28157253632.0,
            "4815": 28157253632.0,
            "4820": 28157253632.0,
            "4825": 28157253632.0,
            "4830": 28157253632.0,
            "4835": 28157253632.0,
            "4840": 28157253632.0,
            "4845": 28157253632.0,
            "4850": 28157253632.0,
            "4855": 28157253632.0,
            "4860": 28157253632.0,
            "4865": 28157253632.0,
            "4870": 28157253632.0,
            "4875": 28157253632.0,
            "4880": 28157253632.0,
            "4885": 28157253632.0,
            "4890": 28157253632.0,
            "4895": 28157253632.0,
            "4900": 28157253632.0,
            "4905": 28157253632.0,
            "4910": 28157253632.0,
            "4915": 28157253632.0,
            "4920": 28157253632.0,
            "4925": 28157253632.0,
            "4930": 28157253632.0,
            "4935": 28157253632.0,
            "4940": 28157253632.0,
            "4945": 28157253632.0,
            "4950": 28157253632.0,
            "4955": 28157253632.0,
            "4960": 28157253632.0,
            "4965": 28157253632.0,
            "4970": 28157253632.0,
            "4975": 28157253632.0,
            "4980": 28157253632.0,
            "4985": 28157253632.0,
            "4990": 28157253632.0,
            "4995": 28157253632.0,
            "5000": 28157253632.0,
            "5005": 28157253632.0,
            "5010": 28157253632.0,
            "5015": 28157253632.0,
            "5020": 28157253632.0,
            "5025": 28157253632.0,
            "5030": 28157253632.0,
            "5035": 28157253632.0,
            "5040": 28157253632.0,
            "5045": 28157253632.0,
            "5050": 28157253632.0,
            "5055": 28157253632.0,
            "5060": 28157253632.0,
            "5065": 28157253632.0,
            "5070": 28157253632.0,
            "5075": 28157253632.0,
            "5080": 28157253632.0,
            "5085": 28157253632.0,
            "5090": 28157253632.0,
            "5095": 28157253632.0,
            "5100": 28157253632.0,
            "5105": 28157253632.0,
            "5110": 28157253632.0,
            "5115": 28157253632.0,
            "5120": 28157253632.0,
            "5125": 28157253632.0,
            "5130": 28157253632.0,
            "5135": 28157253632.0,
            "5140": 28157253632.0,
            "5145": 28157253632.0,
            "5150": 28157253632.0,
            "5155": 28157253632.0,
            "5160": 28157253632.0,
            "5165": 28157253632.0,
            "5170": 28157253632.0,
            "5175": 28157253632.0,
            "5180": 28157253632.0,
            "5185": 28157253632.0,
            "5190": 28157253632.0,
            "5195": 28157253632.0,
            "5200": 28157253632.0,
            "5205": 28157253632.0,
            "5210": 28157253632.0,
            "5215": 28157253632.0,
            "5220": 28157253632.0,
            "5225": 28157253632.0,
            "5230": 28157253632.0,
            "5235": 28157253632.0,
            "5240": 28157253632.0,
            "5245": 28157253632.0,
            "5250": 28157253632.0,
            "5255": 28157253632.0,
            "5260": 28157253632.0,
            "5265": 28157253632.0,
            "5270": 28157253632.0,
            "5275": 28157253632.0,
            "5280": 28157253632.0,
            "5285": 28157253632.0,
            "5290": 28157253632.0,
            "5295": 28157253632.0,
            "5300": 28157253632.0,
            "5305": 28157253632.0,
            "5310": 28157253632.0,
            "5315": 28157253632.0,
            "5320": 28157253632.0,
            "5325": 28157253632.0,
            "5330": 28157253632.0,
            "5335": 28157253632.0,
            "5340": 28157253632.0,
            "5345": 28157253632.0,
            "5350": 28157253632.0,
            "5355": 28157253632.0,
            "5360": 28157253632.0,
            "5365": 28157253632.0,
            "5370": 28157253632.0,
            "5375": 28157253632.0,
            "5380": 28157253632.0,
            "5385": 28157253632.0,
            "5390": 28157253632.0,
            "5395": 28157253632.0,
            "5400": 28157253632.0,
            "5405": 28157253632.0,
            "5410": 28157253632.0,
            "5415": 28157253632.0,
            "5420": 28157253632.0,
            "5425": 28157253632.0,
            "5430": 28157253632.0,
            "5435": 28157253632.0,
            "5440": 28157253632.0,
            "5445": 28157253632.0,
            "5450": 28157253632.0,
            "5455": 28157253632.0,
            "5460": 28157253632.0,
            "5465": 28157253632.0,
            "5470": 28157253632.0,
            "5475": 28157253632.0,
            "5480": 28157253632.0,
            "5485": 28157253632.0,
            "5490": 28157253632.0,
            "5495": 28157253632.0,
            "5500": 28157253632.0,
            "5505": 28157253632.0,
            "5510": 28157253632.0,
            "5515": 28157253632.0,
            "5520": 28157253632.0,
            "5525": 28157253632.0,
            "5530": 28157253632.0,
            "5535": 28157253632.0,
            "5540": 28157253632.0,
            "5545": 28157253632.0,
            "5550": 28157253632.0,
            "5555": 28157253632.0,
            "5560": 28157253632.0,
            "5565": 28157253632.0,
            "5570": 28157253632.0,
            "5575": 28157253632.0,
            "5580": 28157253632.0,
            "5585": 28157253632.0,
            "5590": 28157253632.0,
            "5595": 28157253632.0,
            "5600": 28157253632.0,
            "5605": 28157253632.0,
            "5610": 28157253632.0,
            "5615": 28157253632.0,
            "5620": 28157253632.0,
            "5625": 28157253632.0,
            "5630": 28157253632.0,
            "5635": 28157253632.0,
            "5640": 28157253632.0,
            "5645": 28157253632.0,
            "5650": 28157253632.0,
            "5655": 28157253632.0,
            "5660": 28157253632.0,
            "5665": 28157253632.0,
            "5670": 28157253632.0,
            "5675": 28157253632.0,
            "5680": 28157253632.0,
            "5685": 28157253632.0,
            "5690": 28157253632.0,
            "5695": 28157253632.0,
            "5700": 28157253632.0,
            "5705": 28157253632.0,
            "5710": 28157253632.0,
            "5715": 28157253632.0,
            "5720": 28157253632.0,
            "5725": 28157253632.0,
            "5730": 28157253632.0,
            "5735": 28157253632.0,
            "5740": 28157253632.0,
            "5745": 28157253632.0,
            "5750": 28157253632.0,
            "5755": 28157253632.0,
            "5760": 28157253632.0,
            "5765": 28157253632.0,
            "5770": 28157253632.0,
            "5775": 28157253632.0,
            "5780": 28157253632.0,
            "5785": 28157253632.0,
            "5790": 28157253632.0,
            "5795": 28157253632.0,
            "5800": 28157253632.0,
            "5805": 28157253632.0,
            "5810": 28157253632.0,
            "5815": 28157253632.0,
            "5820": 28157253632.0,
            "5825": 28157253632.0,
            "5830": 28157253632.0,
            "5835": 28157253632.0,
            "5840": 28157253632.0,
            "5845": 28157253632.0,
            "5850": 28157253632.0,
            "5855": 28157253632.0,
            "5860": 28157253632.0,
            "5865": 28157253632.0,
            "5870": 28157253632.0,
            "5875": 28157253632.0,
            "5880": 28157253632.0,
            "5885": 28157253632.0,
            "5890": 28157253632.0,
            "5895": 28157253632.0,
            "5900": 28157253632.0,
            "5905": 28157253632.0,
            "5910": 28157253632.0,
            "5915": 28157253632.0,
            "5920": 28157253632.0,
            "5925": 28157253632.0,
            "5930": 28157253632.0,
            "5935": 28157253632.0,
            "5940": 28157253632.0,
            "5945": 28157253632.0,
            "5950": 28157253632.0,
            "5955": 28157253632.0,
            "5960": 28157253632.0,
            "5965": 28157253632.0,
            "5970": 28157253632.0,
            "5975": 28157253632.0,
            "5980": 28157253632.0,
            "5985": 28157253632.0,
            "5990": 28157253632.0,
            "5995": 28157253632.0,
            "6000": 28157253632.0,
            "6005": 28157253632.0,
            "6010": 28157253632.0,
            "6015": 28157253632.0,
            "6020": 28157253632.0,
            "6025": 28157253632.0,
            "6030": 28157253632.0,
            "6035": 28157253632.0,
            "6040": 28157253632.0,
            "6045": 28157253632.0,
            "6050": 28157253632.0,
            "6055": 28157253632.0,
            "6060": 28157253632.0,
            "6065": 28157253632.0,
            "6070": 28157253632.0,
            "6075": 28157253632.0,
            "6080": 28157253632.0,
            "6085": 28157253632.0,
            "6090": 28157253632.0,
            "6095": 28157253632.0,
            "6100": 28157253632.0,
            "6105": 28157253632.0,
            "6110": 28157253632.0,
            "6115": 28157253632.0,
            "6120": 28157253632.0,
            "6125": 28157253632.0,
            "6130": 28157253632.0,
            "6135": 28157253632.0,
            "6140": 28157253632.0,
            "6145": 28157253632.0,
            "6150": 28157253632.0,
            "6155": 28157253632.0,
            "6160": 28157253632.0,
            "6165": 28157253632.0,
            "6170": 28157253632.0,
            "6175": 28157253632.0,
            "6180": 28157253632.0,
            "6185": 28157253632.0,
            "6190": 28157253632.0,
            "6195": 28157253632.0,
            "6200": 28157253632.0,
            "6205": 28157253632.0,
            "6210": 28157253632.0,
            "6215": 28157253632.0,
            "6220": 28157253632.0,
            "6225": 28157253632.0,
            "6230": 28157253632.0,
            "6235": 28157253632.0,
            "6240": 28157253632.0,
            "6245": 28157253632.0,
            "6250": 28157253632.0,
            "6255": 28157253632.0,
            "6260": 28157253632.0,
            "6265": 28157253632.0,
            "6270": 28157253632.0,
            "6275": 28157253632.0,
            "6280": 28157253632.0,
            "6285": 28157253632.0,
            "6290": 28157253632.0,
            "6295": 28157253632.0,
            "6300": 28157253632.0,
            "6305": 28157253632.0,
            "6310": 28157253632.0,
            "6315": 28157253632.0,
            "6320": 28157253632.0,
            "6325": 28157253632.0,
            "6330": 28157253632.0,
            "6335": 28157253632.0,
            "6340": 28157253632.0,
            "6345": 28157253632.0,
            "6350": 28157253632.0,
            "6355": 28157253632.0,
            "6360": 28157253632.0,
            "6365": 28157253632.0,
            "6370": 28157253632.0,
            "6375": 28157253632.0,
            "6380": 28157253632.0,
            "6385": 28157253632.0,
            "6390": 28157253632.0,
            "6395": 28157253632.0,
            "6400": 28157253632.0,
            "6405": 28157253632.0,
            "6410": 28157253632.0,
            "6415": 28157253632.0,
            "6420": 28157253632.0,
            "6425": 28157253632.0,
            "6430": 28157253632.0,
            "6435": 28157253632.0,
            "6440": 28157253632.0,
            "6445": 28157253632.0,
            "6450": 28157253632.0,
            "6455": 28157253632.0,
            "6460": 28157253632.0,
            "6465": 28157253632.0,
            "6470": 28157253632.0,
            "6475": 28157253632.0,
            "6480": 28157253632.0,
            "6485": 28157253632.0,
            "6490": 28157253632.0,
            "6495": 28157253632.0,
            "6500": 28157253632.0,
            "6505": 28157253632.0,
            "6510": 28157253632.0,
            "6515": 28157253632.0,
            "6520": 28157253632.0,
            "6525": 28157253632.0,
            "6530": 28157253632.0,
            "6535": 28157253632.0,
            "6540": 28157253632.0,
            "6545": 28157253632.0,
            "6550": 28157253632.0,
            "6555": 28157253632.0,
            "6560": 28157253632.0,
            "6565": 28157253632.0,
            "6570": 28157253632.0,
            "6575": 28157253632.0,
            "6580": 28157253632.0,
            "6585": 28157253632.0,
            "6590": 28157253632.0,
            "6595": 28157253632.0,
            "6600": 28157253632.0,
            "6605": 28157253632.0,
            "6610": 28157253632.0,
            "6615": 28157253632.0,
            "6620": 28157253632.0,
            "6625": 28157253632.0,
            "6630": 28157253632.0,
            "6635": 28157253632.0,
            "6640": 28157253632.0,
            "6645": 28157253632.0,
            "6650": 28157253632.0,
            "6655": 28157253632.0,
            "6660": 28157253632.0,
            "6665": 28157253632.0,
            "6670": 28157253632.0,
            "6675": 28157253632.0,
            "6680": 28157253632.0,
            "6685": 28157253632.0,
            "6690": 28157253632.0,
            "6695": 28157253632.0,
            "6700": 28157253632.0,
            "6705": 28157253632.0,
            "6710": 28157253632.0,
            "6715": 28157253632.0,
            "6720": 28157253632.0,
            "6725": 28157253632.0,
            "6730": 28157253632.0,
            "6735": 28157253632.0,
            "6740": 28157253632.0,
            "6745": 28157253632.0,
            "6750": 28157253632.0,
            "6755": 28157253632.0,
            "6760": 28157253632.0,
            "6765": 28157253632.0,
            "6770": 28157253632.0,
            "6775": 28157253632.0,
            "6780": 28157253632.0,
            "6785": 28157253632.0,
            "6790": 28157253632.0,
            "6795": 28157253632.0,
            "6800": 28157253632.0,
            "6805": 28157253632.0,
            "6810": 28157253632.0,
            "6815": 28157253632.0,
            "6820": 28157253632.0,
            "6825": 28157253632.0,
            "6830": 28157253632.0,
            "6835": 28157253632.0,
            "6840": 28157253632.0,
            "6845": 28157253632.0,
            "6850": 28157253632.0,
            "6855": 28157253632.0,
            "6860": 28157253632.0,
            "6865": 28157253632.0,
            "6870": 28157253632.0,
            "6875": 28157253632.0,
            "6880": 28157253632.0,
            "6885": 28157253632.0,
            "6890": 28157253632.0,
            "6895": 28157253632.0,
            "6900": 28157253632.0,
            "6905": 28157253632.0,
            "6910": 28157253632.0,
            "6915": 28157253632.0,
            "6920": 28157253632.0,
            "6925": 28157253632.0,
            "6930": 28157253632.0,
            "6935": 28157253632.0,
            "6940": 28157253632.0,
            "6945": 28157253632.0,
            "6950": 28157253632.0,
            "6955": 28157253632.0,
            "6960": 28157253632.0,
            "6965": 28157253632.0,
            "6970": 28157253632.0,
            "6975": 28157253632.0,
            "6980": 28157253632.0,
            "6985": 28157253632.0,
            "6990": 28157253632.0,
            "6995": 28157253632.0,
            "7000": 28157253632.0,
            "7005": 28157253632.0,
            "7010": 28157253632.0,
            "7015": 28157253632.0,
            "7020": 28157253632.0,
            "7025": 28157253632.0,
            "7030": 28157253632.0,
            "7035": 28157253632.0,
            "7040": 28157253632.0,
            "7045": 28157253632.0,
            "7050": 28157253632.0,
            "7055": 28157253632.0,
            "7060": 28157253632.0,
            "7065": 28157253632.0,
            "7070": 28157253632.0,
            "7075": 28157253632.0,
            "7080": 28157253632.0,
            "7085": 28157253632.0,
            "7090": 28157253632.0,
            "7095": 28157253632.0,
            "7100": 28157253632.0,
            "7105": 28157253632.0,
            "7110": 28157253632.0,
            "7115": 28157253632.0,
            "7120": 28157253632.0,
            "7125": 28157253632.0,
            "7130": 28157253632.0,
            "7135": 28157253632.0,
            "7140": 28157253632.0,
            "7145": 28157253632.0,
            "7150": 28157253632.0,
            "7155": 28157253632.0,
            "7160": 28157253632.0,
            "7165": 28157253632.0,
            "7170": 28157253632.0,
            "7175": 28157253632.0,
            "7180": 28157253632.0,
            "7185": 28157253632.0,
            "7190": 28157253632.0,
            "7195": 28157253632.0,
            "7200": 28157253632.0,
            "7205": 28157253632.0,
            "7210": 28157253632.0,
            "7215": 28157253632.0,
            "7220": 28157253632.0,
            "7225": 28157253632.0,
            "7230": 28157253632.0,
            "7235": 28157253632.0,
            "7240": 28157253632.0,
            "7245": 28157253632.0,
            "7250": 28157253632.0,
            "7255": 28157253632.0,
            "7260": 28157253632.0,
            "7265": 28157253632.0,
            "7270": 28157253632.0,
            "7275": 28157253632.0,
            "7280": 28157253632.0,
            "7285": 28157253632.0,
            "7290": 28157253632.0,
            "7295": 28157253632.0,
            "7300": 28157253632.0,
            "7305": 28157253632.0,
            "7310": 28157253632.0,
            "7315": 28157253632.0,
            "7320": 28157253632.0,
            "7325": 28157253632.0,
            "7330": 28157253632.0,
            "7335": 28157253632.0,
            "7340": 28157253632.0,
            "7345": 28157253632.0,
            "7350": 28157253632.0,
            "7355": 28157253632.0,
            "7360": 28157253632.0,
            "7365": 28157253632.0,
            "7370": 28157253632.0,
            "7375": 28157253632.0,
            "7380": 28157253632.0,
            "7385": 28157253632.0,
            "7390": 28157253632.0,
            "7395": 28157253632.0,
            "7400": 28157253632.0,
            "7405": 28157253632.0,
            "7410": 28157253632.0,
            "7415": 28157253632.0,
            "7420": 28157253632.0,
            "7425": 28157253632.0,
            "7430": 28157253632.0,
            "7435": 28157253632.0,
            "7440": 28157253632.0,
            "7445": 28157253632.0,
            "7450": 28157253632.0,
            "7455": 28157253632.0,
            "7460": 28157253632.0,
            "7465": 28157253632.0,
            "7470": 28157253632.0,
            "7475": 28157253632.0,
            "7480": 28157253632.0,
            "7485": 28157253632.0,
            "7490": 28157253632.0,
            "7495": 28157253632.0,
            "7500": 28157253632.0,
            "7505": 28157253632.0,
            "7510": 28157253632.0,
            "7515": 28157253632.0,
            "7520": 28157253632.0,
            "7525": 28157253632.0,
            "7530": 28157253632.0,
            "7535": 28157253632.0,
            "7540": 28157253632.0,
            "7545": 28157253632.0,
            "7550": 28157253632.0,
            "7555": 28157253632.0,
            "7560": 28157253632.0,
            "7565": 28157253632.0,
            "7570": 28157253632.0,
            "7575": 28157253632.0,
            "7580": 28157253632.0,
            "7585": 28157253632.0,
            "7590": 28157253632.0,
            "7595": 28157253632.0,
            "7600": 28157253632.0,
            "7605": 28157253632.0,
            "7610": 28157253632.0,
            "7615": 28157253632.0,
            "7620": 28157253632.0,
            "7625": 28157253632.0,
            "7630": 28157253632.0,
            "7635": 28157253632.0,
            "7640": 28157253632.0,
            "7645": 28157253632.0,
            "7650": 28157253632.0,
            "7655": 28157253632.0,
            "7660": 28157253632.0,
            "7665": 28157253632.0,
            "7670": 28157253632.0,
            "7675": 28157253632.0,
            "7680": 28157253632.0,
            "7685": 28157253632.0,
            "7690": 28157253632.0,
            "7695": 28157253632.0,
            "7700": 28157253632.0,
            "7705": 28157253632.0,
            "7710": 28157253632.0,
            "7715": 28157253632.0,
            "7720": 28157253632.0,
            "7725": 28157253632.0,
            "7730": 28157253632.0,
            "7735": 28157253632.0,
            "7740": 28157253632.0,
            "7745": 28157253632.0,
            "7750": 28157253632.0,
            "7755": 28157253632.0,
            "7760": 28157253632.0,
            "7765": 28157253632.0,
            "7770": 28157253632.0,
            "7775": 28157253632.0,
            "7780": 28157253632.0,
            "7785": 28157253632.0,
            "7790": 28157253632.0,
            "7795": 28157253632.0,
            "7800": 28157253632.0,
            "7805": 28157253632.0,
            "7810": 28157253632.0,
            "7815": 28157253632.0,
            "7820": 28157253632.0,
            "7825": 28157253632.0,
            "7830": 28157253632.0,
            "7835": 28157253632.0,
            "7840": 28157253632.0,
            "7845": 28157253632.0,
            "7850": 28157253632.0,
            "7855": 28157253632.0,
            "7860": 28157253632.0,
            "7865": 28157253632.0,
            "7870": 28157253632.0,
            "7875": 28157253632.0,
            "7880": 28157253632.0,
            "7885": 28157253632.0,
            "7890": 28157253632.0,
            "7895": 28157253632.0,
            "7900": 28157253632.0,
            "7905": 28157253632.0,
            "7910": 28157253632.0,
            "7915": 28157253632.0,
            "7920": 28157253632.0,
            "7925": 28157253632.0,
            "7930": 28157253632.0,
            "7935": 28157253632.0,
            "7940": 28157253632.0,
            "7945": 28157253632.0,
            "7950": 28157253632.0,
            "7955": 28157253632.0,
            "7960": 28157253632.0,
            "7965": 28157253632.0,
            "7970": 28157253632.0,
            "7975": 28157253632.0,
            "7980": 28157253632.0,
            "7985": 28157253632.0,
            "7990": 28157253632.0,
            "7995": 28157253632.0,
            "8000": 28157253632.0,
            "8005": 28157253632.0,
            "8010": 28157253632.0,
            "8015": 28157253632.0,
            "8020": 28157253632.0,
            "8025": 28157253632.0,
            "8030": 28157253632.0,
            "8035": 28157253632.0,
            "8040": 28157253632.0,
            "8045": 28157253632.0,
            "8050": 28157253632.0,
            "8055": 28157253632.0,
            "8060": 28157253632.0,
            "8065": 28157253632.0,
            "8070": 28157253632.0,
            "8075": 28157253632.0,
            "8080": 28157253632.0,
            "8085": 28157253632.0,
            "8090": 28157253632.0,
            "8095": 28157253632.0,
            "8100": 28157253632.0,
            "8105": 28157253632.0,
            "8110": 28157253632.0,
            "8115": 28157253632.0,
            "8120": 28157253632.0,
            "8125": 28157253632.0,
            "8130": 28157253632.0,
            "8135": 28157253632.0,
            "8140": 28157253632.0,
            "8145": 28157253632.0,
            "8150": 28157253632.0,
            "8155": 28157253632.0,
            "8160": 28157253632.0,
            "8165": 28157253632.0,
            "8170": 28157253632.0,
            "8175": 28157253632.0,
            "8180": 28157253632.0,
            "8185": 28157253632.0,
            "8190": 28157253632.0,
            "8195": 28157253632.0,
            "8200": 28157253632.0,
            "8205": 28157253632.0,
            "8210": 28157253632.0,
            "8215": 28157253632.0,
            "8220": 28157253632.0,
            "8225": 28157253632.0,
            "8230": 28157253632.0,
            "8235": 28157253632.0,
            "8240": 28157253632.0,
            "8245": 28157253632.0,
            "8250": 28157253632.0,
            "8255": 28157253632.0,
            "8260": 28159016960.0,
            "8265": 28159016960.0,
            "8270": 28159016960.0,
            "8275": 28159016960.0,
            "8280": 28159016960.0,
            "8285": 28159016960.0,
            "8290": 28159016960.0,
            "8295": 28159016960.0,
            "8300": 28159016960.0,
            "8305": 28159016960.0,
            "8310": 28159016960.0,
            "8315": 28159016960.0,
            "8320": 28159016960.0,
            "8325": 28159016960.0,
            "8330": 28159016960.0,
            "8335": 28159016960.0,
            "8340": 28159016960.0,
            "8345": 28159016960.0,
            "8350": 28159016960.0,
            "8355": 28159016960.0,
            "8360": 28159016960.0,
            "8365": 28159016960.0,
            "8370": 28159016960.0,
            "8375": 28159016960.0,
            "8380": 28159016960.0,
            "8385": 28159016960.0,
            "8390": 28159016960.0,
            "8395": 28159016960.0,
            "8400": 28159016960.0,
            "8405": 28159016960.0,
            "8410": 28159016960.0,
            "8415": 28159016960.0,
            "8420": 28159016960.0,
            "8425": 28159016960.0,
            "8430": 28159016960.0,
            "8435": 28159016960.0,
            "8440": 28159016960.0,
            "8445": 28159016960.0,
            "8450": 28159016960.0,
            "8455": 28159016960.0,
            "8460": 28159016960.0,
            "8465": 28159016960.0,
            "8470": 28159016960.0,
            "8475": 28159016960.0,
            "8480": 28159016960.0,
            "8485": 28159016960.0,
            "8490": 28159016960.0,
            "8495": 28159016960.0,
            "8500": 28159016960.0,
            "8505": 28159016960.0,
            "8510": 28159016960.0,
            "8515": 28159016960.0,
            "8520": 28159016960.0,
            "8525": 28159016960.0,
            "8530": 28159016960.0,
            "8535": 28159016960.0,
            "8540": 28159016960.0,
            "8545": 28159016960.0,
            "8550": 28159016960.0,
            "8555": 28159016960.0,
            "8560": 28159016960.0,
            "8565": 28159016960.0,
            "8570": 28159016960.0,
            "8575": 28159016960.0,
            "8580": 28159016960.0,
            "8585": 28159016960.0,
            "8590": 28159016960.0,
            "8595": 28159016960.0,
            "8600": 28159016960.0,
            "8605": 28159016960.0,
            "8610": 28159016960.0,
            "8615": 28159016960.0,
            "8620": 28159016960.0,
            "8625": 28159016960.0,
            "8630": 28159016960.0,
            "8635": 28159016960.0,
            "8640": 28159016960.0,
            "8645": 28159016960.0,
            "8650": 28159016960.0,
            "8655": 28159016960.0,
            "8660": 28159016960.0,
            "8665": 28159016960.0,
            "8670": 28159016960.0,
            "8675": 28159016960.0,
            "8680": 28159016960.0,
            "8685": 28159016960.0,
            "8690": 28159016960.0,
            "8695": 28159016960.0,
            "8700": 28159016960.0,
            "8705": 28159016960.0,
            "8710": 28159016960.0,
            "8715": 28159016960.0,
            "8720": 28159016960.0,
            "8725": 28159016960.0,
            "8730": 28159016960.0,
            "8735": 28159016960.0,
            "8740": 28159016960.0,
            "8745": 28159016960.0,
            "8750": 28159016960.0,
            "8755": 28159016960.0,
            "8760": 28159016960.0,
            "8765": 28159016960.0,
            "8770": 28159016960.0,
            "8775": 28159016960.0,
            "8780": 28159016960.0,
            "8785": 28159016960.0,
            "8790": 28159016960.0,
            "8795": 28159016960.0,
            "8800": 28159016960.0,
            "8805": 28159016960.0,
            "8810": 28159016960.0,
            "8815": 28159016960.0,
            "8820": 28159016960.0,
            "8825": 28159016960.0,
            "8830": 28159016960.0,
            "8835": 28159016960.0,
            "8840": 28159016960.0,
            "8845": 28159016960.0,
            "8850": 28159016960.0,
            "8855": 28159016960.0,
            "8860": 28159016960.0,
            "8865": 28159016960.0,
            "8870": 28159016960.0,
            "8875": 28159016960.0,
            "8880": 28159016960.0,
            "8885": 28159016960.0,
            "8890": 28159016960.0,
            "8895": 28159016960.0,
            "8900": 28159016960.0,
            "8905": 28159016960.0,
            "8910": 28159016960.0,
            "8915": 28159016960.0,
            "8920": 28159016960.0,
            "8925": 28159016960.0,
            "8930": 28159016960.0,
            "8935": 28159016960.0,
            "8940": 28159016960.0,
            "8945": 28159016960.0,
            "8950": 28159016960.0,
            "8955": 28159016960.0,
            "8960": 28159016960.0,
            "8965": 28159016960.0,
            "8970": 28159016960.0,
            "8975": 28159016960.0,
            "8980": 28159016960.0,
            "8985": 28159016960.0,
            "8990": 28159016960.0,
            "8995": 28159016960.0,
            "9000": 28159016960.0,
            "9005": 28160194560.0,
            "9010": 28160194560.0,
            "9015": 28160194560.0,
            "9020": 28160194560.0,
            "9025": 28160194560.0,
            "9030": 28160194560.0,
            "9035": 28160194560.0,
            "9040": 28160194560.0,
            "9045": 28160194560.0,
            "9050": 28160194560.0,
            "9055": 28160194560.0,
            "9060": 28160194560.0,
            "9065": 28160194560.0,
            "9070": 28160194560.0,
            "9075": 28160194560.0,
            "9080": 28160194560.0,
            "9085": 28160194560.0,
            "9090": 28160194560.0,
            "9095": 28160194560.0,
            "9100": 28160194560.0,
            "9105": 28160194560.0,
            "9110": 28160194560.0,
            "9115": 28160194560.0,
            "9120": 28160194560.0,
            "9125": 28160194560.0,
            "9130": 28160194560.0,
            "9135": 28160194560.0,
            "9140": 28160194560.0,
            "9145": 28160194560.0,
            "9150": 28160194560.0,
            "9155": 28160194560.0,
            "9160": 28160194560.0,
            "9165": 28160194560.0,
            "9170": 28160194560.0,
            "9175": 28160194560.0,
            "9180": 28160194560.0,
            "9185": 28160194560.0,
            "9190": 28160194560.0,
            "9195": 28160194560.0,
            "9200": 28160194560.0,
            "9205": 28160194560.0,
            "9210": 28160194560.0,
            "9215": 28160194560.0,
            "9220": 28160194560.0,
            "9225": 28160194560.0,
            "9230": 28160194560.0,
            "9235": 28160194560.0,
            "9240": 28160194560.0,
            "9245": 28160194560.0,
            "9250": 28160194560.0,
            "9255": 28160194560.0,
            "9260": 28160194560.0,
            "9265": 28160194560.0,
            "9270": 28160194560.0,
            "9275": 28160194560.0,
            "9280": 28160194560.0,
            "9285": 28160194560.0,
            "9290": 28160194560.0,
            "9295": 28160194560.0,
            "9300": 28160194560.0,
            "9305": 28160194560.0,
            "9310": 28160194560.0,
            "9315": 28160194560.0,
            "9320": 28160194560.0,
            "9325": 28160194560.0,
            "9330": 28160194560.0,
            "9335": 28160194560.0,
            "9340": 28160194560.0,
            "9345": 28160194560.0,
            "9350": 28160194560.0,
            "9355": 28160194560.0,
            "9360": 28160194560.0,
            "9365": 28160194560.0,
            "9370": 28160194560.0,
            "9375": 28160194560.0,
            "9380": 28160194560.0,
            "9385": 28160194560.0,
            "9390": 28160194560.0,
            "9395": 28160194560.0,
            "9400": 28160194560.0,
            "9405": 28160194560.0,
            "9410": 28160194560.0,
            "9415": 28160194560.0,
            "9420": 28160194560.0,
            "9425": 28160194560.0,
            "9430": 28160194560.0,
            "9435": 28160194560.0,
            "9440": 28160194560.0,
            "9445": 28160194560.0,
            "9450": 28160194560.0,
            "9455": 28160194560.0,
            "9460": 28160194560.0,
            "9465": 28160194560.0,
            "9470": 28160194560.0,
            "9475": 28160194560.0,
            "9480": 28160194560.0,
            "9485": 28160194560.0,
            "9490": 28160194560.0,
            "9495": 28160194560.0,
            "9500": 28160194560.0,
            "9505": 28160194560.0,
            "9510": 28160194560.0,
            "9515": 28160194560.0,
            "9520": 28160194560.0,
            "9525": 28160194560.0,
            "9530": 28160194560.0,
            "9535": 28160194560.0,
            "9540": 28160194560.0,
            "9545": 28160194560.0,
            "9550": 28160194560.0,
            "9555": 28160194560.0,
            "9560": 28160194560.0,
            "9565": 28160194560.0,
            "9570": 28160194560.0,
            "9575": 28160194560.0,
            "9580": 28160194560.0,
            "9585": 28160194560.0,
            "9590": 28160194560.0,
            "9595": 28160194560.0,
            "9600": 28160194560.0,
            "9605": 28160194560.0,
            "9610": 28160194560.0,
            "9615": 28160194560.0,
            "9620": 28160194560.0,
            "9625": 28160194560.0,
            "9630": 28160194560.0,
            "9635": 28160194560.0,
            "9640": 28160194560.0,
            "9645": 28160194560.0,
            "9650": 28160194560.0,
            "9655": 28160194560.0,
            "9660": 28160194560.0,
            "9665": 28160194560.0,
            "9670": 28160194560.0,
            "9675": 28160194560.0,
            "9680": 28160194560.0,
            "9685": 28160194560.0,
            "9690": 28160194560.0,
            "9695": 28160194560.0,
            "9700": 28160194560.0,
            "9705": 28160194560.0,
            "9710": 28160194560.0,
            "9715": 28160194560.0,
            "9720": 28160194560.0,
            "9725": 28160194560.0,
            "9730": 28160194560.0,
            "9735": 28160194560.0,
            "9740": 28160194560.0,
            "9745": 28160194560.0,
            "9750": 28160194560.0,
            "9755": 28160194560.0,
            "9760": 28160194560.0,
            "9765": 28160194560.0,
            "9770": 28160194560.0,
            "9775": 28160194560.0,
            "9780": 28160194560.0,
            "9785": 28160194560.0,
            "9790": 28160194560.0,
            "9795": 28160194560.0,
            "9800": 28160194560.0,
            "9805": 28160194560.0,
            "9810": 28160194560.0,
            "9815": 28160194560.0,
            "9820": 28160194560.0,
            "9825": 28160194560.0,
            "9830": 28160194560.0,
            "9835": 28160194560.0,
            "9840": 28160194560.0,
            "9845": 28160194560.0,
            "9850": 28160194560.0,
            "9855": 28160194560.0,
            "9860": 28160194560.0,
            "9865": 28160194560.0,
            "9870": 28160194560.0,
            "9875": 28160194560.0,
            "9880": 28160194560.0,
            "9885": 28160194560.0,
            "9890": 28160194560.0,
            "9895": 28160194560.0,
            "9900": 28160194560.0,
            "9905": 28160194560.0,
            "9910": 28160194560.0,
            "9915": 28160194560.0,
            "9920": 28160194560.0,
            "9925": 28160194560.0,
            "9930": 28160194560.0,
            "9935": 28160194560.0,
            "9940": 28160194560.0,
            "9945": 28160194560.0,
            "9950": 28160194560.0,
            "9955": 28160194560.0,
            "9960": 28160194560.0,
            "9965": 28160194560.0,
            "9970": 28160194560.0,
            "9975": 28160194560.0,
            "9980": 28160194560.0,
            "9985": 28160194560.0,
            "9990": 28160194560.0,
            "9995": 28160194560.0,
            "10000": 28160194560.0,
            "10005": 28160194560.0,
            "10010": 28160194560.0,
            "10015": 28160194560.0,
            "10020": 28160194560.0,
            "10025": 28160194560.0,
            "10030": 28160194560.0,
            "10035": 28160194560.0,
            "10040": 28160194560.0,
            "10045": 28160194560.0,
            "10050": 28160194560.0,
            "10055": 28160194560.0,
            "10060": 28160194560.0,
            "10065": 28160194560.0,
            "10070": 28160194560.0,
            "10075": 28160194560.0,
            "10080": 28160194560.0,
            "10085": 28160194560.0,
            "10090": 28160194560.0,
            "10095": 28160194560.0,
            "10100": 28160194560.0,
            "10105": 28160194560.0,
            "10110": 28160194560.0,
            "10115": 28160194560.0,
            "10120": 28160194560.0,
            "10125": 28160194560.0,
            "10130": 28160194560.0,
            "10135": 28160194560.0,
            "10140": 28160194560.0,
            "10145": 28160194560.0,
            "10150": 28160194560.0,
            "10155": 28160194560.0,
            "10160": 28160194560.0,
            "10165": 28160194560.0,
            "10170": 28160194560.0,
            "10175": 28160194560.0,
            "10180": 28160194560.0,
            "10185": 28160194560.0,
            "10190": 28160194560.0,
            "10195": 28160194560.0,
            "10200": 28160194560.0,
            "10205": 28160194560.0,
            "10210": 28160194560.0,
            "10215": 28160194560.0,
            "10220": 28160194560.0,
            "10225": 28160194560.0,
            "10230": 28160194560.0,
            "10235": 28160194560.0,
            "10240": 28160194560.0,
            "10245": 28160194560.0,
            "10250": 28160194560.0,
            "10255": 28160194560.0,
            "10260": 28160194560.0,
            "10265": 28160194560.0,
            "10270": 28160194560.0,
            "10275": 28160194560.0,
            "10280": 28160194560.0,
            "10285": 28160194560.0,
            "10290": 28160194560.0,
            "10295": 28160194560.0,
            "10300": 28160194560.0,
            "10305": 28160194560.0,
            "10310": 28160194560.0,
            "10315": 28160194560.0,
            "10320": 28160194560.0,
            "10325": 28160194560.0,
            "10330": 28160194560.0,
            "10335": 28160194560.0,
            "10340": 28160194560.0,
            "10345": 28160194560.0,
            "10350": 28160194560.0,
            "10355": 28160194560.0,
            "10360": 28160194560.0,
            "10365": 28160194560.0,
            "10370": 28160194560.0,
            "10375": 28160194560.0,
            "10380": 28160194560.0,
            "10385": 28160194560.0,
            "10390": 28160194560.0,
            "10395": 28160194560.0,
            "10400": 28160194560.0,
            "10405": 28160194560.0,
            "10410": 28160194560.0,
            "10415": 28160194560.0,
            "10420": 28160194560.0,
            "10425": 28160194560.0,
            "10430": 28160194560.0,
            "10435": 28160194560.0,
            "10440": 28160194560.0,
            "10445": 28160194560.0,
            "10450": 28160194560.0,
            "10455": 28160194560.0,
            "10460": 28160194560.0,
            "10465": 28160194560.0,
            "10470": 28160194560.0,
            "10475": 28160194560.0,
            "10480": 28160194560.0,
            "10485": 28160194560.0,
            "10490": 28160194560.0,
            "10495": 28160194560.0,
            "10500": 28160194560.0,
            "10505": 28160194560.0,
            "10510": 28160194560.0,
            "10515": 28160194560.0,
            "10520": 28160194560.0,
            "10525": 28160194560.0,
            "10530": 28160194560.0,
            "10535": 28160194560.0,
            "10540": 28160194560.0,
            "10545": 28160194560.0,
            "10550": 28160194560.0,
            "10555": 28160194560.0,
            "10560": 28160194560.0,
            "10565": 28160194560.0,
            "10570": 28160194560.0,
            "10575": 28160194560.0,
            "10580": 28160194560.0,
            "10585": 28160194560.0,
            "10590": 28160194560.0,
            "10595": 28160194560.0,
            "10600": 28160194560.0,
            "10605": 28160194560.0,
            "10610": 28160194560.0,
            "10615": 28160194560.0,
            "10620": 28160194560.0,
            "10625": 28160194560.0,
            "10630": 28160194560.0,
            "10635": 28160194560.0,
            "10640": 28160194560.0,
            "10645": 28160194560.0,
            "10650": 28160194560.0,
            "10655": 28160194560.0,
            "10660": 28160194560.0,
            "10665": 28160194560.0,
            "10670": 28160194560.0,
            "10675": 28160194560.0,
            "10680": 28160194560.0,
            "10685": 28160194560.0,
            "10690": 28160194560.0,
            "10695": 28160194560.0,
            "10700": 28160194560.0,
            "10705": 28160194560.0,
            "10710": 28160194560.0,
            "10715": 28160194560.0,
            "10720": 28160194560.0,
            "10725": 28160194560.0,
            "10730": 28160194560.0,
            "10735": 28160194560.0,
            "10740": 28160194560.0,
            "10745": 28160194560.0,
            "10750": 28160194560.0,
            "10755": 28160194560.0,
            "10760": 28160194560.0,
            "10765": 28160194560.0,
            "10770": 28160194560.0,
            "10775": 28160194560.0,
            "10780": 28160194560.0,
            "10785": 28160194560.0,
            "10790": 28160194560.0,
            "10795": 28160194560.0,
            "10800": 28160194560.0,
            "10805": 28160194560.0,
            "10810": 28160194560.0,
            "10815": 28160194560.0,
            "10820": 28160194560.0,
            "10825": 28160194560.0,
            "10830": 28160194560.0,
            "10835": 28160194560.0,
            "10840": 28160194560.0,
            "10845": 28160194560.0,
            "10850": 28160194560.0,
            "10855": 28160194560.0,
            "10860": 28160194560.0,
            "10865": 28160194560.0,
            "10870": 28160194560.0,
            "10875": 28160194560.0,
            "10880": 28160194560.0,
            "10885": 28160194560.0,
            "10890": 28160194560.0,
            "10895": 28160194560.0,
            "10900": 28160194560.0,
            "10905": 28160194560.0,
            "10910": 28160194560.0,
            "10915": 28160194560.0,
            "10920": 28160194560.0,
            "10925": 28160194560.0,
            "10930": 28160194560.0,
            "10935": 28160194560.0,
            "10940": 28160194560.0,
            "10945": 28160194560.0,
            "10950": 28160194560.0,
            "10955": 28160194560.0,
            "10960": 28160194560.0,
            "10965": 28160194560.0,
            "10970": 28160194560.0,
            "10975": 28160194560.0,
            "10980": 28160194560.0,
            "10985": 28160194560.0,
            "10990": 28160194560.0,
            "10995": 28160194560.0,
            "11000": 28160194560.0,
            "11005": 28160194560.0,
            "11010": 28160194560.0,
            "11015": 28160194560.0,
            "11020": 28160194560.0,
            "11025": 28160194560.0,
            "11030": 28160194560.0,
            "11035": 28160194560.0,
            "11040": 28160194560.0,
            "11045": 28160194560.0,
            "11050": 28160194560.0,
            "11055": 28160194560.0,
            "11060": 28160194560.0,
            "11065": 28160194560.0,
            "11070": 28160194560.0,
            "11075": 28160194560.0,
            "11080": 28160194560.0,
            "11085": 28160194560.0,
            "11090": 28160194560.0,
            "11095": 28160194560.0,
            "11100": 28160194560.0,
            "11105": 28160194560.0,
            "11110": 28160194560.0,
            "11115": 28160194560.0,
            "11120": 28160194560.0,
            "11125": 28160194560.0,
            "11130": 28160194560.0,
            "11135": 28160194560.0,
            "11140": 28160194560.0,
            "11145": 28160194560.0,
            "11150": 28160194560.0,
            "11155": 28160194560.0,
            "11160": 28160194560.0,
            "11165": 28160194560.0,
            "11170": 28160194560.0,
            "11175": 28160194560.0,
            "11180": 28160194560.0,
            "11185": 28160194560.0,
            "11190": 28160194560.0,
            "11195": 28160194560.0,
            "11200": 28160194560.0,
            "11205": 28160194560.0,
            "11210": 28160194560.0,
            "11215": 28160194560.0,
            "11220": 28160194560.0,
            "11225": 28160194560.0,
            "11230": 28160194560.0,
            "11235": 28160194560.0,
            "11240": 28160194560.0,
            "11245": 28160194560.0,
            "11250": 28160194560.0,
            "11255": 28160194560.0,
            "11260": 28160194560.0,
            "11265": 28160194560.0,
            "11270": 28160194560.0,
            "11275": 28160194560.0,
            "11280": 28160194560.0,
            "11285": 28160194560.0,
            "11290": 28160194560.0,
            "11295": 28160194560.0,
            "11300": 28160194560.0,
            "11305": 28160194560.0,
            "11310": 28160194560.0,
            "11315": 28160194560.0,
            "11320": 28160194560.0,
            "11325": 28160194560.0,
            "11330": 28160194560.0,
            "11335": 28160194560.0,
            "11340": 28160194560.0,
            "11345": 28160194560.0,
            "11350": 28160194560.0,
            "11355": 28160194560.0,
            "11360": 28160194560.0,
            "11365": 28160194560.0,
            "11370": 28160194560.0,
            "11375": 28160194560.0,
            "11380": 28160194560.0,
            "11385": 28160194560.0,
            "11390": 28160194560.0,
            "11395": 28160194560.0,
            "11400": 28160194560.0,
            "11405": 28160194560.0,
            "11410": 28160194560.0,
            "11415": 28160194560.0,
            "11420": 28160194560.0,
            "11425": 28160194560.0,
            "11430": 28160194560.0,
            "11435": 28160194560.0,
            "11440": 28160194560.0,
            "11445": 28160194560.0,
            "11450": 28160194560.0,
            "11455": 28160194560.0,
            "11460": 28160194560.0,
            "11465": 28160194560.0,
            "11470": 28160194560.0,
            "11475": 28160194560.0,
            "11480": 28160194560.0,
            "11485": 28160194560.0,
            "11490": 28160194560.0,
            "11495": 28160194560.0,
            "11500": 28160194560.0,
            "11505": 28160194560.0,
            "11510": 28160194560.0,
            "11515": 28160194560.0,
            "11520": 28160194560.0,
            "11525": 28160194560.0,
            "11530": 28160194560.0,
            "11535": 28160194560.0,
            "11540": 28160194560.0,
            "11545": 28160194560.0,
            "11550": 28160194560.0,
            "11555": 28160194560.0,
            "11560": 28160194560.0,
            "11565": 28160194560.0,
            "11570": 28160194560.0,
            "11575": 28160194560.0,
            "11580": 28160194560.0,
            "11585": 28160194560.0,
            "11590": 28160194560.0,
            "11595": 28160194560.0,
            "11600": 28160194560.0,
            "11605": 28160194560.0,
            "11610": 28160194560.0,
            "11615": 28160194560.0,
            "11620": 28160194560.0,
            "11625": 28160194560.0,
            "11630": 28160194560.0,
            "11635": 28160194560.0,
            "11640": 28160194560.0,
            "11645": 28160194560.0,
            "11650": 28160194560.0,
            "11655": 28160194560.0,
            "11660": 28160194560.0,
            "11665": 28160194560.0,
            "11670": 28160194560.0,
            "11675": 28160194560.0,
            "11680": 28160194560.0,
            "11685": 28160194560.0,
            "11690": 28160194560.0,
            "11695": 28160194560.0,
            "11700": 28160194560.0,
            "11705": 28160194560.0,
            "11710": 28160194560.0,
            "11715": 28160194560.0,
            "11720": 28160194560.0,
            "11725": 28160194560.0,
            "11730": 28160194560.0,
            "11735": 28160194560.0,
            "11740": 28160194560.0,
            "11745": 28160194560.0,
            "11750": 28160194560.0,
            "11755": 28160194560.0,
            "11760": 28160194560.0,
            "11765": 28160194560.0,
            "11770": 28160194560.0,
            "11775": 28160194560.0,
            "11780": 28160194560.0,
            "11785": 28160194560.0,
            "11790": 28160194560.0,
            "11795": 28160194560.0,
            "11800": 28160194560.0,
            "11805": 28160194560.0,
            "11810": 28160194560.0,
            "11815": 28160194560.0,
            "11820": 28160194560.0,
            "11825": 28160194560.0,
            "11830": 28160194560.0,
            "11835": 28160194560.0,
            "11840": 28160194560.0,
            "11845": 28160194560.0,
            "11850": 28160194560.0,
            "11855": 28160194560.0,
            "11860": 28160194560.0,
            "11865": 28160194560.0,
            "11870": 28160194560.0,
            "11875": 28160194560.0,
            "11880": 28160194560.0,
            "11885": 28160194560.0,
            "11890": 28160194560.0,
            "11895": 28160194560.0,
            "11900": 28160194560.0,
            "11905": 28160194560.0,
            "11910": 28160194560.0,
            "11915": 28160194560.0,
            "11920": 28160194560.0,
            "11925": 28160194560.0,
            "11930": 28160194560.0,
            "11935": 28160194560.0,
            "11940": 28160194560.0,
            "11945": 28160194560.0,
            "11950": 28160194560.0,
            "11955": 28160194560.0,
            "11960": 28160194560.0,
            "11965": 28160194560.0,
            "11970": 28160194560.0,
            "11975": 28160194560.0,
            "11980": 28160194560.0,
            "11985": 28160194560.0,
            "11990": 28160194560.0,
            "11995": 28160194560.0,
            "12000": 28160194560.0,
            "12005": 28160194560.0,
            "12010": 28160194560.0,
            "12015": 28160194560.0,
            "12020": 28160194560.0,
            "12025": 28160194560.0,
            "12030": 28160194560.0,
            "12035": 28160194560.0,
            "12040": 28160194560.0,
            "12045": 28160194560.0,
            "12050": 28160194560.0,
            "12055": 28160194560.0,
            "12060": 28160194560.0,
            "12065": 28160194560.0,
            "12070": 28160194560.0,
            "12075": 28160194560.0,
            "12080": 28160194560.0,
            "12085": 28160194560.0,
            "12090": 28160194560.0,
            "12095": 28160194560.0,
            "12100": 28160194560.0,
            "12105": 28160194560.0,
            "12110": 28160194560.0,
            "12115": 28160194560.0,
            "12120": 28160194560.0,
            "12125": 28160194560.0,
            "12130": 28160194560.0,
            "12135": 28160194560.0,
            "12140": 28160194560.0,
            "12145": 28160194560.0,
            "12150": 28160194560.0,
            "12155": 28160194560.0,
            "12160": 28160194560.0,
            "12165": 28160194560.0,
            "12170": 28160194560.0,
            "12175": 28160194560.0,
            "12180": 28160194560.0,
            "12185": 28160194560.0,
            "12190": 28160194560.0,
            "12195": 28160194560.0,
            "12200": 28160194560.0,
            "12205": 28160194560.0,
            "12210": 28160194560.0,
            "12215": 28160194560.0,
            "12220": 28160194560.0,
            "12225": 28160194560.0,
            "12230": 28160194560.0,
            "12235": 28160194560.0,
            "12240": 28160194560.0,
            "12245": 28160194560.0,
            "12250": 28160194560.0,
            "12255": 28160194560.0,
            "12260": 28160194560.0,
            "12265": 28160194560.0,
            "12270": 28160194560.0,
            "12275": 28160194560.0,
            "12280": 28160194560.0,
            "12285": 28160194560.0,
            "12290": 28160194560.0,
            "12295": 28160194560.0,
            "12300": 28160194560.0,
            "12305": 28160194560.0,
            "12310": 28160194560.0,
            "12315": 28160194560.0,
            "12320": 28160194560.0,
            "12325": 28160194560.0,
            "12330": 28160194560.0,
            "12335": 28160194560.0,
            "12340": 28160194560.0,
            "12345": 28160194560.0,
            "12350": 28160194560.0,
            "12355": 28160194560.0,
            "12360": 28160194560.0,
            "12365": 28160194560.0,
            "12370": 28160194560.0,
            "12375": 28160194560.0,
            "12380": 28160194560.0,
            "12385": 28160194560.0,
            "12390": 28160194560.0,
            "12395": 28160194560.0,
            "12400": 28160194560.0,
            "12405": 28160194560.0,
            "12410": 28160194560.0,
            "12415": 28160194560.0,
            "12420": 28160194560.0,
            "12425": 28160194560.0,
            "12430": 28160194560.0,
            "12435": 28160194560.0,
            "12440": 28160194560.0,
            "12445": 28160194560.0,
            "12450": 28160194560.0,
            "12455": 28160194560.0,
            "12460": 28160194560.0,
            "12465": 28160194560.0,
            "12470": 28160194560.0,
            "12475": 28160194560.0,
            "12480": 28160194560.0,
            "12485": 28160194560.0,
            "12490": 28160194560.0,
            "12495": 28160194560.0,
            "12500": 28160194560.0,
            "12505": 28160194560.0,
            "12510": 28160194560.0,
            "12515": 28160194560.0,
            "12520": 28160194560.0,
            "12525": 28160194560.0,
            "12530": 28160194560.0,
            "12535": 28160194560.0,
            "12540": 28160194560.0,
            "12545": 28160194560.0,
            "12550": 28160194560.0,
            "12555": 28160194560.0,
            "12560": 28160194560.0,
            "12565": 28160194560.0,
            "12570": 28160194560.0,
            "12575": 28160194560.0,
            "12580": 28160194560.0,
            "12585": 28160194560.0,
            "12590": 28160194560.0,
            "12595": 28160194560.0,
            "12600": 28160194560.0,
            "12605": 28160194560.0,
            "12610": 28160194560.0,
            "12615": 28160194560.0,
            "12620": 28160194560.0,
            "12625": 28160194560.0,
            "12630": 28160194560.0,
            "12635": 28160194560.0,
            "12640": 28160194560.0,
            "12645": 28160194560.0,
            "12650": 28160194560.0,
            "12655": 28160194560.0,
            "12660": 28160194560.0,
            "12665": 28160194560.0,
            "12670": 28160194560.0,
            "12675": 28160194560.0,
            "12680": 28160194560.0,
            "12685": 28160194560.0,
            "12690": 28160194560.0,
            "12695": 28160194560.0,
            "12700": 28160194560.0,
            "12705": 28160194560.0,
            "12710": 28160194560.0,
            "12715": 28160194560.0,
            "12720": "nan",
            "12725": "nan",
            "12730": "nan",
            "12735": "nan",
            "12740": "nan",
            "12745": "nan",
            "12750": "nan",
            "12755": "nan",
            "12760": "nan",
            "12765": "nan",
            "12770": "nan",
            "12775": "nan",
            "12780": "nan",
            "12785": "nan",
            "12790": "nan",
            "12795": "nan",
            "12800": "nan",
            "12805": "nan",
            "12810": "nan",
            "12815": "nan",
            "12820": "nan",
            "12825": "nan",
            "12830": "nan",
            "12835": "nan",
            "12840": "nan",
            "12845": "nan",
            "12850": "nan",
            "12855": "nan",
            "12860": "nan",
            "12865": "nan",
            "12870": "nan",
            "12875": "nan",
            "12880": "nan",
            "12885": "nan",
            "12890": "nan",
            "12895": "nan",
            "12900": "nan",
            "12905": "nan",
            "12910": "nan",
            "12915": "nan",
            "12920": "nan",
            "12925": "nan",
            "12930": "nan",
            "12935": "nan",
            "12940": "nan",
            "12945": "nan",
            "12950": "nan",
            "12955": "nan",
            "12960": "nan",
            "12965": "nan",
            "12970": "nan",
            "12975": "nan",
            "12980": "nan",
            "12985": "nan",
            "12990": "nan",
            "12995": "nan",
            "13000": "nan"
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 13000,
        "step_interval": 5,
        "values": {
            "1": "nan",
            "5": "nan",
            "10": "nan",
            "15": "nan",
            "20": "nan",
            "25": "nan",
            "30": "nan",
            "35": "nan",
            "40": "nan",
            "45": "nan",
            "50": "nan",
            "55": "nan",
            "60": "nan",
            "65": "nan",
            "70": "nan",
            "75": "nan",
            "80": "nan",
            "85": "nan",
            "90": "nan",
            "95": "nan",
            "100": 1.58468,
            "105": "nan",
            "110": "nan",
            "115": "nan",
            "120": "nan",
            "125": "nan",
            "130": "nan",
            "135": "nan",
            "140": "nan",
            "145": "nan",
            "150": "nan",
            "155": "nan",
            "160": "nan",
            "165": "nan",
            "170": "nan",
            "175": "nan",
            "180": "nan",
            "185": "nan",
            "190": "nan",
            "195": "nan",
            "200": 1.46052,
            "205": "nan",
            "210": "nan",
            "215": "nan",
            "220": "nan",
            "225": "nan",
            "230": "nan",
            "235": "nan",
            "240": "nan",
            "245": "nan",
            "250": "nan",
            "255": "nan",
            "260": "nan",
            "265": "nan",
            "270": "nan",
            "275": "nan",
            "280": "nan",
            "285": "nan",
            "290": "nan",
            "295": "nan",
            "300": 1.48144,
            "305": "nan",
            "310": "nan",
            "315": "nan",
            "320": "nan",
            "325": "nan",
            "330": "nan",
            "335": "nan",
            "340": "nan",
            "345": "nan",
            "350": "nan",
            "355": "nan",
            "360": "nan",
            "365": "nan",
            "370": "nan",
            "375": "nan",
            "380": "nan",
            "385": "nan",
            "390": "nan",
            "395": "nan",
            "400": 1.5294,
            "405": "nan",
            "410": "nan",
            "415": "nan",
            "420": "nan",
            "425": "nan",
            "430": "nan",
            "435": "nan",
            "440": "nan",
            "445": "nan",
            "450": "nan",
            "455": "nan",
            "460": "nan",
            "465": "nan",
            "470": "nan",
            "475": "nan",
            "480": "nan",
            "485": "nan",
            "490": "nan",
            "495": "nan",
            "500": 1.52806,
            "505": "nan",
            "510": "nan",
            "515": "nan",
            "520": "nan",
            "525": "nan",
            "530": "nan",
            "535": "nan",
            "540": "nan",
            "545": "nan",
            "550": "nan",
            "555": "nan",
            "560": "nan",
            "565": "nan",
            "570": "nan",
            "575": "nan",
            "580": "nan",
            "585": "nan",
            "590": "nan",
            "595": "nan",
            "600": 1.69372,
            "605": "nan",
            "610": "nan",
            "615": "nan",
            "620": "nan",
            "625": "nan",
            "630": "nan",
            "635": "nan",
            "640": "nan",
            "645": "nan",
            "650": "nan",
            "655": "nan",
            "660": "nan",
            "665": "nan",
            "670": "nan",
            "675": "nan",
            "680": "nan",
            "685": "nan",
            "690": "nan",
            "695": "nan",
            "700": 1.45115,
            "705": "nan",
            "710": "nan",
            "715": "nan",
            "720": "nan",
            "725": "nan",
            "730": "nan",
            "735": "nan",
            "740": "nan",
            "745": "nan",
            "750": "nan",
            "755": "nan",
            "760": "nan",
            "765": "nan",
            "770": "nan",
            "775": "nan",
            "780": "nan",
            "785": "nan",
            "790": "nan",
            "795": "nan",
            "800": 1.56133,
            "805": "nan",
            "810": "nan",
            "815": "nan",
            "820": "nan",
            "825": "nan",
            "830": "nan",
            "835": "nan",
            "840": "nan",
            "845": "nan",
            "850": "nan",
            "855": "nan",
            "860": "nan",
            "865": "nan",
            "870": "nan",
            "875": "nan",
            "880": "nan",
            "885": "nan",
            "890": "nan",
            "895": "nan",
            "900": 1.44623,
            "905": "nan",
            "910": "nan",
            "915": "nan",
            "920": "nan",
            "925": "nan",
            "930": "nan",
            "935": "nan",
            "940": "nan",
            "945": "nan",
            "950": "nan",
            "955": "nan",
            "960": "nan",
            "965": "nan",
            "970": "nan",
            "975": "nan",
            "980": "nan",
            "985": "nan",
            "990": "nan",
            "995": "nan",
            "1000": 1.45115,
            "1005": "nan",
            "1010": "nan",
            "1015": "nan",
            "1020": "nan",
            "1025": "nan",
            "1030": "nan",
            "1035": "nan",
            "1040": "nan",
            "1045": "nan",
            "1050": "nan",
            "1055": "nan",
            "1060": "nan",
            "1065": "nan",
            "1070": "nan",
            "1075": "nan",
            "1080": "nan",
            "1085": "nan",
            "1090": "nan",
            "1095": "nan",
            "1100": 1.44011,
            "1105": "nan",
            "1110": "nan",
            "1115": "nan",
            "1120": "nan",
            "1125": "nan",
            "1130": "nan",
            "1135": "nan",
            "1140": "nan",
            "1145": "nan",
            "1150": "nan",
            "1155": "nan",
            "1160": "nan",
            "1165": "nan",
            "1170": "nan",
            "1175": "nan",
            "1180": "nan",
            "1185": "nan",
            "1190": "nan",
            "1195": "nan",
            "1200": 1.44546,
            "1205": "nan",
            "1210": "nan",
            "1215": "nan",
            "1220": "nan",
            "1225": "nan",
            "1230": "nan",
            "1235": "nan",
            "1240": "nan",
            "1245": "nan",
            "1250": "nan",
            "1255": "nan",
            "1260": "nan",
            "1265": "nan",
            "1270": "nan",
            "1275": "nan",
            "1280": "nan",
            "1285": "nan",
            "1290": "nan",
            "1295": "nan",
            "1300": 1.43704,
            "1305": "nan",
            "1310": "nan",
            "1315": "nan",
            "1320": "nan",
            "1325": "nan",
            "1330": "nan",
            "1335": "nan",
            "1340": "nan",
            "1345": "nan",
            "1350": "nan",
            "1355": "nan",
            "1360": "nan",
            "1365": "nan",
            "1370": "nan",
            "1375": "nan",
            "1380": "nan",
            "1385": "nan",
            "1390": "nan",
            "1395": "nan",
            "1400": 1.47549,
            "1405": "nan",
            "1410": "nan",
            "1415": "nan",
            "1420": "nan",
            "1425": "nan",
            "1430": "nan",
            "1435": "nan",
            "1440": "nan",
            "1445": "nan",
            "1450": "nan",
            "1455": "nan",
            "1460": "nan",
            "1465": "nan",
            "1470": "nan",
            "1475": "nan",
            "1480": "nan",
            "1485": "nan",
            "1490": "nan",
            "1495": "nan",
            "1500": 1.5242,
            "1505": "nan",
            "1510": "nan",
            "1515": "nan",
            "1520": "nan",
            "1525": "nan",
            "1530": "nan",
            "1535": "nan",
            "1540": "nan",
            "1545": "nan",
            "1550": "nan",
            "1555": "nan",
            "1560": "nan",
            "1565": "nan",
            "1570": "nan",
            "1575": "nan",
            "1580": "nan",
            "1585": "nan",
            "1590": "nan",
            "1595": "nan",
            "1600": 1.44191,
            "1605": "nan",
            "1610": "nan",
            "1615": "nan",
            "1620": "nan",
            "1625": "nan",
            "1630": "nan",
            "1635": "nan",
            "1640": "nan",
            "1645": "nan",
            "1650": "nan",
            "1655": "nan",
            "1660": "nan",
            "1665": "nan",
            "1670": "nan",
            "1675": "nan",
            "1680": "nan",
            "1685": "nan",
            "1690": "nan",
            "1695": "nan",
            "1700": 1.43495,
            "1705": "nan",
            "1710": "nan",
            "1715": "nan",
            "1720": "nan",
            "1725": "nan",
            "1730": "nan",
            "1735": "nan",
            "1740": "nan",
            "1745": "nan",
            "1750": "nan",
            "1755": "nan",
            "1760": "nan",
            "1765": "nan",
            "1770": "nan",
            "1775": "nan",
            "1780": "nan",
            "1785": "nan",
            "1790": "nan",
            "1795": "nan",
            "1800": 1.4335,
            "1805": "nan",
            "1810": "nan",
            "1815": "nan",
            "1820": "nan",
            "1825": "nan",
            "1830": "nan",
            "1835": "nan",
            "1840": "nan",
            "1845": "nan",
            "1850": "nan",
            "1855": "nan",
            "1860": "nan",
            "1865": "nan",
            "1870": "nan",
            "1875": "nan",
            "1880": "nan",
            "1885": "nan",
            "1890": "nan",
            "1895": "nan",
            "1900": 1.46027,
            "1905": "nan",
            "1910": "nan",
            "1915": "nan",
            "1920": "nan",
            "1925": "nan",
            "1930": "nan",
            "1935": "nan",
            "1940": "nan",
            "1945": "nan",
            "1950": "nan",
            "1955": "nan",
            "1960": "nan",
            "1965": "nan",
            "1970": "nan",
            "1975": "nan",
            "1980": "nan",
            "1985": "nan",
            "1990": "nan",
            "1995": "nan",
            "2000": 1.43986,
            "2005": "nan",
            "2010": "nan",
            "2015": "nan",
            "2020": "nan",
            "2025": "nan",
            "2030": "nan",
            "2035": "nan",
            "2040": "nan",
            "2045": "nan",
            "2050": "nan",
            "2055": "nan",
            "2060": "nan",
            "2065": "nan",
            "2070": "nan",
            "2075": "nan",
            "2080": "nan",
            "2085": "nan",
            "2090": "nan",
            "2095": "nan",
            "2100": 1.47505,
            "2105": "nan",
            "2110": "nan",
            "2115": "nan",
            "2120": "nan",
            "2125": "nan",
            "2130": "nan",
            "2135": "nan",
            "2140": "nan",
            "2145": "nan",
            "2150": "nan",
            "2155": "nan",
            "2160": "nan",
            "2165": "nan",
            "2170": "nan",
            "2175": "nan",
            "2180": "nan",
            "2185": "nan",
            "2190": "nan",
            "2195": "nan",
            "2200": 1.43441,
            "2205": "nan",
            "2210": "nan",
            "2215": "nan",
            "2220": "nan",
            "2225": "nan",
            "2230": "nan",
            "2235": "nan",
            "2240": "nan",
            "2245": "nan",
            "2250": "nan",
            "2255": "nan",
            "2260": "nan",
            "2265": "nan",
            "2270": "nan",
            "2275": "nan",
            "2280": "nan",
            "2285": "nan",
            "2290": "nan",
            "2295": "nan",
            "2300": 1.54495,
            "2305": "nan",
            "2310": "nan",
            "2315": "nan",
            "2320": "nan",
            "2325": "nan",
            "2330": "nan",
            "2335": "nan",
            "2340": "nan",
            "2345": "nan",
            "2350": "nan",
            "2355": "nan",
            "2360": "nan",
            "2365": "nan",
            "2370": "nan",
            "2375": "nan",
            "2380": "nan",
            "2385": "nan",
            "2390": "nan",
            "2395": "nan",
            "2400": 1.51503,
            "2405": "nan",
            "2410": "nan",
            "2415": "nan",
            "2420": "nan",
            "2425": "nan",
            "2430": "nan",
            "2435": "nan",
            "2440": "nan",
            "2445": "nan",
            "2450": "nan",
            "2455": "nan",
            "2460": "nan",
            "2465": "nan",
            "2470": "nan",
            "2475": "nan",
            "2480": "nan",
            "2485": "nan",
            "2490": "nan",
            "2495": "nan",
            "2500": 1.43246,
            "2505": "nan",
            "2510": "nan",
            "2515": "nan",
            "2520": "nan",
            "2525": "nan",
            "2530": "nan",
            "2535": "nan",
            "2540": "nan",
            "2545": "nan",
            "2550": "nan",
            "2555": "nan",
            "2560": "nan",
            "2565": "nan",
            "2570": "nan",
            "2575": "nan",
            "2580": "nan",
            "2585": "nan",
            "2590": "nan",
            "2595": "nan",
            "2600": 1.47287,
            "2605": "nan",
            "2610": "nan",
            "2615": "nan",
            "2620": "nan",
            "2625": "nan",
            "2630": "nan",
            "2635": "nan",
            "2640": "nan",
            "2645": "nan",
            "2650": "nan",
            "2655": "nan",
            "2660": "nan",
            "2665": "nan",
            "2670": "nan",
            "2675": "nan",
            "2680": "nan",
            "2685": "nan",
            "2690": "nan",
            "2695": "nan",
            "2700": 1.42953,
            "2705": "nan",
            "2710": "nan",
            "2715": "nan",
            "2720": "nan",
            "2725": "nan",
            "2730": "nan",
            "2735": "nan",
            "2740": "nan",
            "2745": "nan",
            "2750": "nan",
            "2755": "nan",
            "2760": "nan",
            "2765": "nan",
            "2770": "nan",
            "2775": "nan",
            "2780": "nan",
            "2785": "nan",
            "2790": "nan",
            "2795": "nan",
            "2800": 1.46691,
            "2805": "nan",
            "2810": "nan",
            "2815": "nan",
            "2820": "nan",
            "2825": "nan",
            "2830": "nan",
            "2835": "nan",
            "2840": "nan",
            "2845": "nan",
            "2850": "nan",
            "2855": "nan",
            "2860": "nan",
            "2865": "nan",
            "2870": "nan",
            "2875": "nan",
            "2880": "nan",
            "2885": "nan",
            "2890": "nan",
            "2895": "nan",
            "2900": 1.43076,
            "2905": "nan",
            "2910": "nan",
            "2915": "nan",
            "2920": "nan",
            "2925": "nan",
            "2930": "nan",
            "2935": "nan",
            "2940": "nan",
            "2945": "nan",
            "2950": "nan",
            "2955": "nan",
            "2960": "nan",
            "2965": "nan",
            "2970": "nan",
            "2975": "nan",
            "2980": "nan",
            "2985": "nan",
            "2990": "nan",
            "2995": "nan",
            "3000": 1.64091,
            "3005": "nan",
            "3010": "nan",
            "3015": "nan",
            "3020": "nan",
            "3025": "nan",
            "3030": "nan",
            "3035": "nan",
            "3040": "nan",
            "3045": "nan",
            "3050": "nan",
            "3055": "nan",
            "3060": "nan",
            "3065": "nan",
            "3070": "nan",
            "3075": "nan",
            "3080": "nan",
            "3085": "nan",
            "3090": "nan",
            "3095": "nan",
            "3100": 1.59233,
            "3105": "nan",
            "3110": "nan",
            "3115": "nan",
            "3120": "nan",
            "3125": "nan",
            "3130": "nan",
            "3135": "nan",
            "3140": "nan",
            "3145": "nan",
            "3150": "nan",
            "3155": "nan",
            "3160": "nan",
            "3165": "nan",
            "3170": "nan",
            "3175": "nan",
            "3180": "nan",
            "3185": "nan",
            "3190": "nan",
            "3195": "nan",
            "3200": 1.43772,
            "3205": "nan",
            "3210": "nan",
            "3215": "nan",
            "3220": "nan",
            "3225": "nan",
            "3230": "nan",
            "3235": "nan",
            "3240": "nan",
            "3245": "nan",
            "3250": "nan",
            "3255": "nan",
            "3260": "nan",
            "3265": "nan",
            "3270": "nan",
            "3275": "nan",
            "3280": "nan",
            "3285": "nan",
            "3290": "nan",
            "3295": "nan",
            "3300": 1.46407,
            "3305": "nan",
            "3310": "nan",
            "3315": "nan",
            "3320": "nan",
            "3325": "nan",
            "3330": "nan",
            "3335": "nan",
            "3340": "nan",
            "3345": "nan",
            "3350": "nan",
            "3355": "nan",
            "3360": "nan",
            "3365": "nan",
            "3370": "nan",
            "3375": "nan",
            "3380": "nan",
            "3385": "nan",
            "3390": "nan",
            "3395": "nan",
            "3400": 1.42524,
            "3405": "nan",
            "3410": "nan",
            "3415": "nan",
            "3420": "nan",
            "3425": "nan",
            "3430": "nan",
            "3435": "nan",
            "3440": "nan",
            "3445": "nan",
            "3450": "nan",
            "3455": "nan",
            "3460": "nan",
            "3465": "nan",
            "3470": "nan",
            "3475": "nan",
            "3480": "nan",
            "3485": "nan",
            "3490": "nan",
            "3495": "nan",
            "3500": 1.4277,
            "3505": "nan",
            "3510": "nan",
            "3515": "nan",
            "3520": "nan",
            "3525": "nan",
            "3530": "nan",
            "3535": "nan",
            "3540": "nan",
            "3545": "nan",
            "3550": "nan",
            "3555": "nan",
            "3560": "nan",
            "3565": "nan",
            "3570": "nan",
            "3575": "nan",
            "3580": "nan",
            "3585": "nan",
            "3590": "nan",
            "3595": "nan",
            "3600": 1.43193,
            "3605": "nan",
            "3610": "nan",
            "3615": "nan",
            "3620": "nan",
            "3625": "nan",
            "3630": "nan",
            "3635": "nan",
            "3640": "nan",
            "3645": "nan",
            "3650": "nan",
            "3655": "nan",
            "3660": "nan",
            "3665": "nan",
            "3670": "nan",
            "3675": "nan",
            "3680": "nan",
            "3685": "nan",
            "3690": "nan",
            "3695": "nan",
            "3700": 1.54239,
            "3705": "nan",
            "3710": "nan",
            "3715": "nan",
            "3720": "nan",
            "3725": "nan",
            "3730": "nan",
            "3735": "nan",
            "3740": "nan",
            "3745": "nan",
            "3750": "nan",
            "3755": "nan",
            "3760": "nan",
            "3765": "nan",
            "3770": "nan",
            "3775": "nan",
            "3780": "nan",
            "3785": "nan",
            "3790": "nan",
            "3795": "nan",
            "3800": 1.51705,
            "3805": "nan",
            "3810": "nan",
            "3815": "nan",
            "3820": "nan",
            "3825": "nan",
            "3830": "nan",
            "3835": "nan",
            "3840": "nan",
            "3845": "nan",
            "3850": "nan",
            "3855": "nan",
            "3860": "nan",
            "3865": "nan",
            "3870": "nan",
            "3875": "nan",
            "3880": "nan",
            "3885": "nan",
            "3890": "nan",
            "3895": "nan",
            "3900": 1.67534,
            "3905": "nan",
            "3910": "nan",
            "3915": "nan",
            "3920": "nan",
            "3925": "nan",
            "3930": "nan",
            "3935": "nan",
            "3940": "nan",
            "3945": "nan",
            "3950": "nan",
            "3955": "nan",
            "3960": "nan",
            "3965": "nan",
            "3970": "nan",
            "3975": "nan",
            "3980": "nan",
            "3985": "nan",
            "3990": "nan",
            "3995": "nan",
            "4000": 1.67907,
            "4005": "nan",
            "4010": "nan",
            "4015": "nan",
            "4020": "nan",
            "4025": "nan",
            "4030": "nan",
            "4035": "nan",
            "4040": "nan",
            "4045": "nan",
            "4050": "nan",
            "4055": "nan",
            "4060": "nan",
            "4065": "nan",
            "4070": "nan",
            "4075": "nan",
            "4080": "nan",
            "4085": "nan",
            "4090": "nan",
            "4095": "nan",
            "4100": 1.4593,
            "4105": "nan",
            "4110": "nan",
            "4115": "nan",
            "4120": "nan",
            "4125": "nan",
            "4130": "nan",
            "4135": "nan",
            "4140": "nan",
            "4145": "nan",
            "4150": "nan",
            "4155": "nan",
            "4160": "nan",
            "4165": "nan",
            "4170": "nan",
            "4175": "nan",
            "4180": "nan",
            "4185": "nan",
            "4190": "nan",
            "4195": "nan",
            "4200": 1.42454,
            "4205": "nan",
            "4210": "nan",
            "4215": "nan",
            "4220": "nan",
            "4225": "nan",
            "4230": "nan",
            "4235": "nan",
            "4240": "nan",
            "4245": "nan",
            "4250": "nan",
            "4255": "nan",
            "4260": "nan",
            "4265": "nan",
            "4270": "nan",
            "4275": "nan",
            "4280": "nan",
            "4285": "nan",
            "4290": "nan",
            "4295": "nan",
            "4300": 1.45808,
            "4305": "nan",
            "4310": "nan",
            "4315": "nan",
            "4320": "nan",
            "4325": "nan",
            "4330": "nan",
            "4335": "nan",
            "4340": "nan",
            "4345": "nan",
            "4350": "nan",
            "4355": "nan",
            "4360": "nan",
            "4365": "nan",
            "4370": "nan",
            "4375": "nan",
            "4380": "nan",
            "4385": "nan",
            "4390": "nan",
            "4395": "nan",
            "4400": 1.4385,
            "4405": "nan",
            "4410": "nan",
            "4415": "nan",
            "4420": "nan",
            "4425": "nan",
            "4430": "nan",
            "4435": "nan",
            "4440": "nan",
            "4445": "nan",
            "4450": "nan",
            "4455": "nan",
            "4460": "nan",
            "4465": "nan",
            "4470": "nan",
            "4475": "nan",
            "4480": "nan",
            "4485": "nan",
            "4490": "nan",
            "4495": "nan",
            "4500": 1.43321,
            "4505": "nan",
            "4510": "nan",
            "4515": "nan",
            "4520": "nan",
            "4525": "nan",
            "4530": "nan",
            "4535": "nan",
            "4540": "nan",
            "4545": "nan",
            "4550": "nan",
            "4555": "nan",
            "4560": "nan",
            "4565": "nan",
            "4570": "nan",
            "4575": "nan",
            "4580": "nan",
            "4585": "nan",
            "4590": "nan",
            "4595": "nan",
            "4600": 3.29055,
            "4605": "nan",
            "4610": "nan",
            "4615": "nan",
            "4620": "nan",
            "4625": "nan",
            "4630": "nan",
            "4635": "nan",
            "4640": "nan",
            "4645": "nan",
            "4650": "nan",
            "4655": "nan",
            "4660": "nan",
            "4665": "nan",
            "4670": "nan",
            "4675": "nan",
            "4680": "nan",
            "4685": "nan",
            "4690": "nan",
            "4695": "nan",
            "4700": 1.42713,
            "4705": "nan",
            "4710": "nan",
            "4715": "nan",
            "4720": "nan",
            "4725": "nan",
            "4730": "nan",
            "4735": "nan",
            "4740": "nan",
            "4745": "nan",
            "4750": "nan",
            "4755": "nan",
            "4760": "nan",
            "4765": "nan",
            "4770": "nan",
            "4775": "nan",
            "4780": "nan",
            "4785": "nan",
            "4790": "nan",
            "4795": "nan",
            "4800": 1.47366,
            "4805": "nan",
            "4810": "nan",
            "4815": "nan",
            "4820": "nan",
            "4825": "nan",
            "4830": "nan",
            "4835": "nan",
            "4840": "nan",
            "4845": "nan",
            "4850": "nan",
            "4855": "nan",
            "4860": "nan",
            "4865": "nan",
            "4870": "nan",
            "4875": "nan",
            "4880": "nan",
            "4885": "nan",
            "4890": "nan",
            "4895": "nan",
            "4900": 1.42583,
            "4905": "nan",
            "4910": "nan",
            "4915": "nan",
            "4920": "nan",
            "4925": "nan",
            "4930": "nan",
            "4935": "nan",
            "4940": "nan",
            "4945": "nan",
            "4950": "nan",
            "4955": "nan",
            "4960": "nan",
            "4965": "nan",
            "4970": "nan",
            "4975": "nan",
            "4980": "nan",
            "4985": "nan",
            "4990": "nan",
            "4995": "nan",
            "5000": 1.51639,
            "5005": "nan",
            "5010": "nan",
            "5015": "nan",
            "5020": "nan",
            "5025": "nan",
            "5030": "nan",
            "5035": "nan",
            "5040": "nan",
            "5045": "nan",
            "5050": "nan",
            "5055": "nan",
            "5060": "nan",
            "5065": "nan",
            "5070": "nan",
            "5075": "nan",
            "5080": "nan",
            "5085": "nan",
            "5090": "nan",
            "5095": "nan",
            "5100": 1.4372,
            "5105": "nan",
            "5110": "nan",
            "5115": "nan",
            "5120": "nan",
            "5125": "nan",
            "5130": "nan",
            "5135": "nan",
            "5140": "nan",
            "5145": "nan",
            "5150": "nan",
            "5155": "nan",
            "5160": "nan",
            "5165": "nan",
            "5170": "nan",
            "5175": "nan",
            "5180": "nan",
            "5185": "nan",
            "5190": "nan",
            "5195": "nan",
            "5200": 1.432,
            "5205": "nan",
            "5210": "nan",
            "5215": "nan",
            "5220": "nan",
            "5225": "nan",
            "5230": "nan",
            "5235": "nan",
            "5240": "nan",
            "5245": "nan",
            "5250": "nan",
            "5255": "nan",
            "5260": "nan",
            "5265": "nan",
            "5270": "nan",
            "5275": "nan",
            "5280": "nan",
            "5285": "nan",
            "5290": "nan",
            "5295": "nan",
            "5300": 1.50922,
            "5305": "nan",
            "5310": "nan",
            "5315": "nan",
            "5320": "nan",
            "5325": "nan",
            "5330": "nan",
            "5335": "nan",
            "5340": "nan",
            "5345": "nan",
            "5350": "nan",
            "5355": "nan",
            "5360": "nan",
            "5365": "nan",
            "5370": "nan",
            "5375": "nan",
            "5380": "nan",
            "5385": "nan",
            "5390": "nan",
            "5395": "nan",
            "5400": 1.50495,
            "5405": "nan",
            "5410": "nan",
            "5415": "nan",
            "5420": "nan",
            "5425": "nan",
            "5430": "nan",
            "5435": "nan",
            "5440": "nan",
            "5445": "nan",
            "5450": "nan",
            "5455": "nan",
            "5460": "nan",
            "5465": "nan",
            "5470": "nan",
            "5475": "nan",
            "5480": "nan",
            "5485": "nan",
            "5490": "nan",
            "5495": "nan",
            "5500": 1.43826,
            "5505": "nan",
            "5510": "nan",
            "5515": "nan",
            "5520": "nan",
            "5525": "nan",
            "5530": "nan",
            "5535": "nan",
            "5540": "nan",
            "5545": "nan",
            "5550": "nan",
            "5555": "nan",
            "5560": "nan",
            "5565": "nan",
            "5570": "nan",
            "5575": "nan",
            "5580": "nan",
            "5585": "nan",
            "5590": "nan",
            "5595": "nan",
            "5600": 1.43314,
            "5605": "nan",
            "5610": "nan",
            "5615": "nan",
            "5620": "nan",
            "5625": "nan",
            "5630": "nan",
            "5635": "nan",
            "5640": "nan",
            "5645": "nan",
            "5650": "nan",
            "5655": "nan",
            "5660": "nan",
            "5665": "nan",
            "5670": "nan",
            "5675": "nan",
            "5680": "nan",
            "5685": "nan",
            "5690": "nan",
            "5695": "nan",
            "5700": 1.42578,
            "5705": "nan",
            "5710": "nan",
            "5715": "nan",
            "5720": "nan",
            "5725": "nan",
            "5730": "nan",
            "5735": "nan",
            "5740": "nan",
            "5745": "nan",
            "5750": "nan",
            "5755": "nan",
            "5760": "nan",
            "5765": "nan",
            "5770": "nan",
            "5775": "nan",
            "5780": "nan",
            "5785": "nan",
            "5790": "nan",
            "5795": "nan",
            "5800": 1.54238,
            "5805": "nan",
            "5810": "nan",
            "5815": "nan",
            "5820": "nan",
            "5825": "nan",
            "5830": "nan",
            "5835": "nan",
            "5840": "nan",
            "5845": "nan",
            "5850": "nan",
            "5855": "nan",
            "5860": "nan",
            "5865": "nan",
            "5870": "nan",
            "5875": "nan",
            "5880": "nan",
            "5885": "nan",
            "5890": "nan",
            "5895": "nan",
            "5900": 1.66057,
            "5905": "nan",
            "5910": "nan",
            "5915": "nan",
            "5920": "nan",
            "5925": "nan",
            "5930": "nan",
            "5935": "nan",
            "5940": "nan",
            "5945": "nan",
            "5950": "nan",
            "5955": "nan",
            "5960": "nan",
            "5965": "nan",
            "5970": "nan",
            "5975": "nan",
            "5980": "nan",
            "5985": "nan",
            "5990": "nan",
            "5995": "nan",
            "6000": 1.43351,
            "6005": "nan",
            "6010": "nan",
            "6015": "nan",
            "6020": "nan",
            "6025": "nan",
            "6030": "nan",
            "6035": "nan",
            "6040": "nan",
            "6045": "nan",
            "6050": "nan",
            "6055": "nan",
            "6060": "nan",
            "6065": "nan",
            "6070": "nan",
            "6075": "nan",
            "6080": "nan",
            "6085": "nan",
            "6090": "nan",
            "6095": "nan",
            "6100": 1.54984,
            "6105": "nan",
            "6110": "nan",
            "6115": "nan",
            "6120": "nan",
            "6125": "nan",
            "6130": "nan",
            "6135": "nan",
            "6140": "nan",
            "6145": "nan",
            "6150": "nan",
            "6155": "nan",
            "6160": "nan",
            "6165": "nan",
            "6170": "nan",
            "6175": "nan",
            "6180": "nan",
            "6185": "nan",
            "6190": "nan",
            "6195": "nan",
            "6200": 1.51837,
            "6205": "nan",
            "6210": "nan",
            "6215": "nan",
            "6220": "nan",
            "6225": "nan",
            "6230": "nan",
            "6235": "nan",
            "6240": "nan",
            "6245": "nan",
            "6250": "nan",
            "6255": "nan",
            "6260": "nan",
            "6265": "nan",
            "6270": "nan",
            "6275": "nan",
            "6280": "nan",
            "6285": "nan",
            "6290": "nan",
            "6295": "nan",
            "6300": 1.43501,
            "6305": "nan",
            "6310": "nan",
            "6315": "nan",
            "6320": "nan",
            "6325": "nan",
            "6330": "nan",
            "6335": "nan",
            "6340": "nan",
            "6345": "nan",
            "6350": "nan",
            "6355": "nan",
            "6360": "nan",
            "6365": "nan",
            "6370": "nan",
            "6375": "nan",
            "6380": "nan",
            "6385": "nan",
            "6390": "nan",
            "6395": "nan",
            "6400": 1.44369,
            "6405": "nan",
            "6410": "nan",
            "6415": "nan",
            "6420": "nan",
            "6425": "nan",
            "6430": "nan",
            "6435": "nan",
            "6440": "nan",
            "6445": "nan",
            "6450": "nan",
            "6455": "nan",
            "6460": "nan",
            "6465": "nan",
            "6470": "nan",
            "6475": "nan",
            "6480": "nan",
            "6485": "nan",
            "6490": "nan",
            "6495": "nan",
            "6500": 1.69151,
            "6505": "nan",
            "6510": "nan",
            "6515": "nan",
            "6520": "nan",
            "6525": "nan",
            "6530": "nan",
            "6535": "nan",
            "6540": "nan",
            "6545": "nan",
            "6550": "nan",
            "6555": "nan",
            "6560": "nan",
            "6565": "nan",
            "6570": "nan",
            "6575": "nan",
            "6580": "nan",
            "6585": "nan",
            "6590": "nan",
            "6595": "nan",
            "6600": 1.73692,
            "6605": "nan",
            "6610": "nan",
            "6615": "nan",
            "6620": "nan",
            "6625": "nan",
            "6630": "nan",
            "6635": "nan",
            "6640": "nan",
            "6645": "nan",
            "6650": "nan",
            "6655": "nan",
            "6660": "nan",
            "6665": "nan",
            "6670": "nan",
            "6675": "nan",
            "6680": "nan",
            "6685": "nan",
            "6690": "nan",
            "6695": "nan",
            "6700": 1.50479,
            "6705": "nan",
            "6710": "nan",
            "6715": "nan",
            "6720": "nan",
            "6725": "nan",
            "6730": "nan",
            "6735": "nan",
            "6740": "nan",
            "6745": "nan",
            "6750": "nan",
            "6755": "nan",
            "6760": "nan",
            "6765": "nan",
            "6770": "nan",
            "6775": "nan",
            "6780": "nan",
            "6785": "nan",
            "6790": "nan",
            "6795": "nan",
            "6800": 1.42689,
            "6805": "nan",
            "6810": "nan",
            "6815": "nan",
            "6820": "nan",
            "6825": "nan",
            "6830": "nan",
            "6835": "nan",
            "6840": "nan",
            "6845": "nan",
            "6850": "nan",
            "6855": "nan",
            "6860": "nan",
            "6865": "nan",
            "6870": "nan",
            "6875": "nan",
            "6880": "nan",
            "6885": "nan",
            "6890": "nan",
            "6895": "nan",
            "6900": 1.50703,
            "6905": "nan",
            "6910": "nan",
            "6915": "nan",
            "6920": "nan",
            "6925": "nan",
            "6930": "nan",
            "6935": "nan",
            "6940": "nan",
            "6945": "nan",
            "6950": "nan",
            "6955": "nan",
            "6960": "nan",
            "6965": "nan",
            "6970": "nan",
            "6975": "nan",
            "6980": "nan",
            "6985": "nan",
            "6990": "nan",
            "6995": "nan",
            "7000": 1.4532,
            "7005": "nan",
            "7010": "nan",
            "7015": "nan",
            "7020": "nan",
            "7025": "nan",
            "7030": "nan",
            "7035": "nan",
            "7040": "nan",
            "7045": "nan",
            "7050": "nan",
            "7055": "nan",
            "7060": "nan",
            "7065": "nan",
            "7070": "nan",
            "7075": "nan",
            "7080": "nan",
            "7085": "nan",
            "7090": "nan",
            "7095": "nan",
            "7100": 1.51593,
            "7105": "nan",
            "7110": "nan",
            "7115": "nan",
            "7120": "nan",
            "7125": "nan",
            "7130": "nan",
            "7135": "nan",
            "7140": "nan",
            "7145": "nan",
            "7150": "nan",
            "7155": "nan",
            "7160": "nan",
            "7165": "nan",
            "7170": "nan",
            "7175": "nan",
            "7180": "nan",
            "7185": "nan",
            "7190": "nan",
            "7195": "nan",
            "7200": 1.45753,
            "7205": "nan",
            "7210": "nan",
            "7215": "nan",
            "7220": "nan",
            "7225": "nan",
            "7230": "nan",
            "7235": "nan",
            "7240": "nan",
            "7245": "nan",
            "7250": "nan",
            "7255": "nan",
            "7260": "nan",
            "7265": "nan",
            "7270": "nan",
            "7275": "nan",
            "7280": "nan",
            "7285": "nan",
            "7290": "nan",
            "7295": "nan",
            "7300": 1.61042,
            "7305": "nan",
            "7310": "nan",
            "7315": "nan",
            "7320": "nan",
            "7325": "nan",
            "7330": "nan",
            "7335": "nan",
            "7340": "nan",
            "7345": "nan",
            "7350": "nan",
            "7355": "nan",
            "7360": "nan",
            "7365": "nan",
            "7370": "nan",
            "7375": "nan",
            "7380": "nan",
            "7385": "nan",
            "7390": "nan",
            "7395": "nan",
            "7400": 1.43351,
            "7405": "nan",
            "7410": "nan",
            "7415": "nan",
            "7420": "nan",
            "7425": "nan",
            "7430": "nan",
            "7435": "nan",
            "7440": "nan",
            "7445": "nan",
            "7450": "nan",
            "7455": "nan",
            "7460": "nan",
            "7465": "nan",
            "7470": "nan",
            "7475": "nan",
            "7480": "nan",
            "7485": "nan",
            "7490": "nan",
            "7495": "nan",
            "7500": 1.45693,
            "7505": "nan",
            "7510": "nan",
            "7515": "nan",
            "7520": "nan",
            "7525": "nan",
            "7530": "nan",
            "7535": "nan",
            "7540": "nan",
            "7545": "nan",
            "7550": "nan",
            "7555": "nan",
            "7560": "nan",
            "7565": "nan",
            "7570": "nan",
            "7575": "nan",
            "7580": "nan",
            "7585": "nan",
            "7590": "nan",
            "7595": "nan",
            "7600": 1.43171,
            "7605": "nan",
            "7610": "nan",
            "7615": "nan",
            "7620": "nan",
            "7625": "nan",
            "7630": "nan",
            "7635": "nan",
            "7640": "nan",
            "7645": "nan",
            "7650": "nan",
            "7655": "nan",
            "7660": "nan",
            "7665": "nan",
            "7670": "nan",
            "7675": "nan",
            "7680": "nan",
            "7685": "nan",
            "7690": "nan",
            "7695": "nan",
            "7700": 1.42897,
            "7705": "nan",
            "7710": "nan",
            "7715": "nan",
            "7720": "nan",
            "7725": "nan",
            "7730": "nan",
            "7735": "nan",
            "7740": "nan",
            "7745": "nan",
            "7750": "nan",
            "7755": "nan",
            "7760": "nan",
            "7765": "nan",
            "7770": "nan",
            "7775": "nan",
            "7780": "nan",
            "7785": "nan",
            "7790": "nan",
            "7795": "nan",
            "7800": 1.44919,
            "7805": "nan",
            "7810": "nan",
            "7815": "nan",
            "7820": "nan",
            "7825": "nan",
            "7830": "nan",
            "7835": "nan",
            "7840": "nan",
            "7845": "nan",
            "7850": "nan",
            "7855": "nan",
            "7860": "nan",
            "7865": "nan",
            "7870": "nan",
            "7875": "nan",
            "7880": "nan",
            "7885": "nan",
            "7890": "nan",
            "7895": "nan",
            "7900": 1.45149,
            "7905": "nan",
            "7910": "nan",
            "7915": "nan",
            "7920": "nan",
            "7925": "nan",
            "7930": "nan",
            "7935": "nan",
            "7940": "nan",
            "7945": "nan",
            "7950": "nan",
            "7955": "nan",
            "7960": "nan",
            "7965": "nan",
            "7970": "nan",
            "7975": "nan",
            "7980": "nan",
            "7985": "nan",
            "7990": "nan",
            "7995": "nan",
            "8000": 1.51543,
            "8005": "nan",
            "8010": "nan",
            "8015": "nan",
            "8020": "nan",
            "8025": "nan",
            "8030": "nan",
            "8035": "nan",
            "8040": "nan",
            "8045": "nan",
            "8050": "nan",
            "8055": "nan",
            "8060": "nan",
            "8065": "nan",
            "8070": "nan",
            "8075": "nan",
            "8080": "nan",
            "8085": "nan",
            "8090": "nan",
            "8095": "nan",
            "8100": 1.46342,
            "8105": "nan",
            "8110": "nan",
            "8115": "nan",
            "8120": "nan",
            "8125": "nan",
            "8130": "nan",
            "8135": "nan",
            "8140": "nan",
            "8145": "nan",
            "8150": "nan",
            "8155": "nan",
            "8160": "nan",
            "8165": "nan",
            "8170": "nan",
            "8175": "nan",
            "8180": "nan",
            "8185": "nan",
            "8190": "nan",
            "8195": "nan",
            "8200": 1.59038,
            "8205": "nan",
            "8210": "nan",
            "8215": "nan",
            "8220": "nan",
            "8225": "nan",
            "8230": "nan",
            "8235": "nan",
            "8240": "nan",
            "8245": "nan",
            "8250": "nan",
            "8255": "nan",
            "8260": "nan",
            "8265": "nan",
            "8270": "nan",
            "8275": "nan",
            "8280": "nan",
            "8285": "nan",
            "8290": "nan",
            "8295": "nan",
            "8300": 1.72439,
            "8305": "nan",
            "8310": "nan",
            "8315": "nan",
            "8320": "nan",
            "8325": "nan",
            "8330": "nan",
            "8335": "nan",
            "8340": "nan",
            "8345": "nan",
            "8350": "nan",
            "8355": "nan",
            "8360": "nan",
            "8365": "nan",
            "8370": "nan",
            "8375": "nan",
            "8380": "nan",
            "8385": "nan",
            "8390": "nan",
            "8395": "nan",
            "8400": 1.43549,
            "8405": "nan",
            "8410": "nan",
            "8415": "nan",
            "8420": "nan",
            "8425": "nan",
            "8430": "nan",
            "8435": "nan",
            "8440": "nan",
            "8445": "nan",
            "8450": "nan",
            "8455": "nan",
            "8460": "nan",
            "8465": "nan",
            "8470": "nan",
            "8475": "nan",
            "8480": "nan",
            "8485": "nan",
            "8490": "nan",
            "8495": "nan",
            "8500": 1.51344,
            "8505": "nan",
            "8510": "nan",
            "8515": "nan",
            "8520": "nan",
            "8525": "nan",
            "8530": "nan",
            "8535": "nan",
            "8540": "nan",
            "8545": "nan",
            "8550": "nan",
            "8555": "nan",
            "8560": "nan",
            "8565": "nan",
            "8570": "nan",
            "8575": "nan",
            "8580": "nan",
            "8585": "nan",
            "8590": "nan",
            "8595": "nan",
            "8600": 1.43389,
            "8605": "nan",
            "8610": "nan",
            "8615": "nan",
            "8620": "nan",
            "8625": "nan",
            "8630": "nan",
            "8635": "nan",
            "8640": "nan",
            "8645": "nan",
            "8650": "nan",
            "8655": "nan",
            "8660": "nan",
            "8665": "nan",
            "8670": "nan",
            "8675": "nan",
            "8680": "nan",
            "8685": "nan",
            "8690": "nan",
            "8695": "nan",
            "8700": 1.42937,
            "8705": "nan",
            "8710": "nan",
            "8715": "nan",
            "8720": "nan",
            "8725": "nan",
            "8730": "nan",
            "8735": "nan",
            "8740": "nan",
            "8745": "nan",
            "8750": "nan",
            "8755": "nan",
            "8760": "nan",
            "8765": "nan",
            "8770": "nan",
            "8775": "nan",
            "8780": "nan",
            "8785": "nan",
            "8790": "nan",
            "8795": "nan",
            "8800": 1.43364,
            "8805": "nan",
            "8810": "nan",
            "8815": "nan",
            "8820": "nan",
            "8825": "nan",
            "8830": "nan",
            "8835": "nan",
            "8840": "nan",
            "8845": "nan",
            "8850": "nan",
            "8855": "nan",
            "8860": "nan",
            "8865": "nan",
            "8870": "nan",
            "8875": "nan",
            "8880": "nan",
            "8885": "nan",
            "8890": "nan",
            "8895": "nan",
            "8900": 1.42823,
            "8905": "nan",
            "8910": "nan",
            "8915": "nan",
            "8920": "nan",
            "8925": "nan",
            "8930": "nan",
            "8935": "nan",
            "8940": "nan",
            "8945": "nan",
            "8950": "nan",
            "8955": "nan",
            "8960": "nan",
            "8965": "nan",
            "8970": "nan",
            "8975": "nan",
            "8980": "nan",
            "8985": "nan",
            "8990": "nan",
            "8995": "nan",
            "9000": 1.42555,
            "9005": "nan",
            "9010": "nan",
            "9015": "nan",
            "9020": "nan",
            "9025": "nan",
            "9030": "nan",
            "9035": "nan",
            "9040": "nan",
            "9045": "nan",
            "9050": "nan",
            "9055": "nan",
            "9060": "nan",
            "9065": "nan",
            "9070": "nan",
            "9075": "nan",
            "9080": "nan",
            "9085": "nan",
            "9090": "nan",
            "9095": "nan",
            "9100": 1.47893,
            "9105": "nan",
            "9110": "nan",
            "9115": "nan",
            "9120": "nan",
            "9125": "nan",
            "9130": "nan",
            "9135": "nan",
            "9140": "nan",
            "9145": "nan",
            "9150": "nan",
            "9155": "nan",
            "9160": "nan",
            "9165": "nan",
            "9170": "nan",
            "9175": "nan",
            "9180": "nan",
            "9185": "nan",
            "9190": "nan",
            "9195": "nan",
            "9200": 1.54458,
            "9205": "nan",
            "9210": "nan",
            "9215": "nan",
            "9220": "nan",
            "9225": "nan",
            "9230": "nan",
            "9235": "nan",
            "9240": "nan",
            "9245": "nan",
            "9250": "nan",
            "9255": "nan",
            "9260": "nan",
            "9265": "nan",
            "9270": "nan",
            "9275": "nan",
            "9280": "nan",
            "9285": "nan",
            "9290": "nan",
            "9295": "nan",
            "9300": 1.51369,
            "9305": "nan",
            "9310": "nan",
            "9315": "nan",
            "9320": "nan",
            "9325": "nan",
            "9330": "nan",
            "9335": "nan",
            "9340": "nan",
            "9345": "nan",
            "9350": "nan",
            "9355": "nan",
            "9360": "nan",
            "9365": "nan",
            "9370": "nan",
            "9375": "nan",
            "9380": "nan",
            "9385": "nan",
            "9390": "nan",
            "9395": "nan",
            "9400": 1.59273,
            "9405": "nan",
            "9410": "nan",
            "9415": "nan",
            "9420": "nan",
            "9425": "nan",
            "9430": "nan",
            "9435": "nan",
            "9440": "nan",
            "9445": "nan",
            "9450": "nan",
            "9455": "nan",
            "9460": "nan",
            "9465": "nan",
            "9470": "nan",
            "9475": "nan",
            "9480": "nan",
            "9485": "nan",
            "9490": "nan",
            "9495": "nan",
            "9500": 1.51633,
            "9505": "nan",
            "9510": "nan",
            "9515": "nan",
            "9520": "nan",
            "9525": "nan",
            "9530": "nan",
            "9535": "nan",
            "9540": "nan",
            "9545": "nan",
            "9550": "nan",
            "9555": "nan",
            "9560": "nan",
            "9565": "nan",
            "9570": "nan",
            "9575": "nan",
            "9580": "nan",
            "9585": "nan",
            "9590": "nan",
            "9595": "nan",
            "9600": 1.42217,
            "9605": "nan",
            "9610": "nan",
            "9615": "nan",
            "9620": "nan",
            "9625": "nan",
            "9630": "nan",
            "9635": "nan",
            "9640": "nan",
            "9645": "nan",
            "9650": "nan",
            "9655": "nan",
            "9660": "nan",
            "9665": "nan",
            "9670": "nan",
            "9675": "nan",
            "9680": "nan",
            "9685": "nan",
            "9690": "nan",
            "9695": "nan",
            "9700": 1.46834,
            "9705": "nan",
            "9710": "nan",
            "9715": "nan",
            "9720": "nan",
            "9725": "nan",
            "9730": "nan",
            "9735": "nan",
            "9740": "nan",
            "9745": "nan",
            "9750": "nan",
            "9755": "nan",
            "9760": "nan",
            "9765": "nan",
            "9770": "nan",
            "9775": "nan",
            "9780": "nan",
            "9785": "nan",
            "9790": "nan",
            "9795": "nan",
            "9800": 2.31936,
            "9805": "nan",
            "9810": "nan",
            "9815": "nan",
            "9820": "nan",
            "9825": "nan",
            "9830": "nan",
            "9835": "nan",
            "9840": "nan",
            "9845": "nan",
            "9850": "nan",
            "9855": "nan",
            "9860": "nan",
            "9865": "nan",
            "9870": "nan",
            "9875": "nan",
            "9880": "nan",
            "9885": "nan",
            "9890": "nan",
            "9895": "nan",
            "9900": 1.5164,
            "9905": "nan",
            "9910": "nan",
            "9915": "nan",
            "9920": "nan",
            "9925": "nan",
            "9930": "nan",
            "9935": "nan",
            "9940": "nan",
            "9945": "nan",
            "9950": "nan",
            "9955": "nan",
            "9960": "nan",
            "9965": "nan",
            "9970": "nan",
            "9975": "nan",
            "9980": "nan",
            "9985": "nan",
            "9990": "nan",
            "9995": "nan",
            "10000": 1.42494,
            "10005": "nan",
            "10010": "nan",
            "10015": "nan",
            "10020": "nan",
            "10025": "nan",
            "10030": "nan",
            "10035": "nan",
            "10040": "nan",
            "10045": "nan",
            "10050": "nan",
            "10055": "nan",
            "10060": "nan",
            "10065": "nan",
            "10070": "nan",
            "10075": "nan",
            "10080": "nan",
            "10085": "nan",
            "10090": "nan",
            "10095": "nan",
            "10100": 1.43004,
            "10105": "nan",
            "10110": "nan",
            "10115": "nan",
            "10120": "nan",
            "10125": "nan",
            "10130": "nan",
            "10135": "nan",
            "10140": "nan",
            "10145": "nan",
            "10150": "nan",
            "10155": "nan",
            "10160": "nan",
            "10165": "nan",
            "10170": "nan",
            "10175": "nan",
            "10180": "nan",
            "10185": "nan",
            "10190": "nan",
            "10195": "nan",
            "10200": 1.4274,
            "10205": "nan",
            "10210": "nan",
            "10215": "nan",
            "10220": "nan",
            "10225": "nan",
            "10230": "nan",
            "10235": "nan",
            "10240": "nan",
            "10245": "nan",
            "10250": "nan",
            "10255": "nan",
            "10260": "nan",
            "10265": "nan",
            "10270": "nan",
            "10275": "nan",
            "10280": "nan",
            "10285": "nan",
            "10290": "nan",
            "10295": "nan",
            "10300": 1.43038,
            "10305": "nan",
            "10310": "nan",
            "10315": "nan",
            "10320": "nan",
            "10325": "nan",
            "10330": "nan",
            "10335": "nan",
            "10340": "nan",
            "10345": "nan",
            "10350": "nan",
            "10355": "nan",
            "10360": "nan",
            "10365": "nan",
            "10370": "nan",
            "10375": "nan",
            "10380": "nan",
            "10385": "nan",
            "10390": "nan",
            "10395": "nan",
            "10400": 1.43012,
            "10405": "nan",
            "10410": "nan",
            "10415": "nan",
            "10420": "nan",
            "10425": "nan",
            "10430": "nan",
            "10435": "nan",
            "10440": "nan",
            "10445": "nan",
            "10450": "nan",
            "10455": "nan",
            "10460": "nan",
            "10465": "nan",
            "10470": "nan",
            "10475": "nan",
            "10480": "nan",
            "10485": "nan",
            "10490": "nan",
            "10495": "nan",
            "10500": 1.43136,
            "10505": "nan",
            "10510": "nan",
            "10515": "nan",
            "10520": "nan",
            "10525": "nan",
            "10530": "nan",
            "10535": "nan",
            "10540": "nan",
            "10545": "nan",
            "10550": "nan",
            "10555": "nan",
            "10560": "nan",
            "10565": "nan",
            "10570": "nan",
            "10575": "nan",
            "10580": "nan",
            "10585": "nan",
            "10590": "nan",
            "10595": "nan",
            "10600": 1.43207,
            "10605": "nan",
            "10610": "nan",
            "10615": "nan",
            "10620": "nan",
            "10625": "nan",
            "10630": "nan",
            "10635": "nan",
            "10640": "nan",
            "10645": "nan",
            "10650": "nan",
            "10655": "nan",
            "10660": "nan",
            "10665": "nan",
            "10670": "nan",
            "10675": "nan",
            "10680": "nan",
            "10685": "nan",
            "10690": "nan",
            "10695": "nan",
            "10700": 1.43277,
            "10705": "nan",
            "10710": "nan",
            "10715": "nan",
            "10720": "nan",
            "10725": "nan",
            "10730": "nan",
            "10735": "nan",
            "10740": "nan",
            "10745": "nan",
            "10750": "nan",
            "10755": "nan",
            "10760": "nan",
            "10765": "nan",
            "10770": "nan",
            "10775": "nan",
            "10780": "nan",
            "10785": "nan",
            "10790": "nan",
            "10795": "nan",
            "10800": 1.59276,
            "10805": "nan",
            "10810": "nan",
            "10815": "nan",
            "10820": "nan",
            "10825": "nan",
            "10830": "nan",
            "10835": "nan",
            "10840": "nan",
            "10845": "nan",
            "10850": "nan",
            "10855": "nan",
            "10860": "nan",
            "10865": "nan",
            "10870": "nan",
            "10875": "nan",
            "10880": "nan",
            "10885": "nan",
            "10890": "nan",
            "10895": "nan",
            "10900": 1.43482,
            "10905": "nan",
            "10910": "nan",
            "10915": "nan",
            "10920": "nan",
            "10925": "nan",
            "10930": "nan",
            "10935": "nan",
            "10940": "nan",
            "10945": "nan",
            "10950": "nan",
            "10955": "nan",
            "10960": "nan",
            "10965": "nan",
            "10970": "nan",
            "10975": "nan",
            "10980": "nan",
            "10985": "nan",
            "10990": "nan",
            "10995": "nan",
            "11000": 1.43307,
            "11005": "nan",
            "11010": "nan",
            "11015": "nan",
            "11020": "nan",
            "11025": "nan",
            "11030": "nan",
            "11035": "nan",
            "11040": "nan",
            "11045": "nan",
            "11050": "nan",
            "11055": "nan",
            "11060": "nan",
            "11065": "nan",
            "11070": "nan",
            "11075": "nan",
            "11080": "nan",
            "11085": "nan",
            "11090": "nan",
            "11095": "nan",
            "11100": 1.45438,
            "11105": "nan",
            "11110": "nan",
            "11115": "nan",
            "11120": "nan",
            "11125": "nan",
            "11130": "nan",
            "11135": "nan",
            "11140": "nan",
            "11145": "nan",
            "11150": "nan",
            "11155": "nan",
            "11160": "nan",
            "11165": "nan",
            "11170": "nan",
            "11175": "nan",
            "11180": "nan",
            "11185": "nan",
            "11190": "nan",
            "11195": "nan",
            "11200": 1.91248,
            "11205": "nan",
            "11210": "nan",
            "11215": "nan",
            "11220": "nan",
            "11225": "nan",
            "11230": "nan",
            "11235": "nan",
            "11240": "nan",
            "11245": "nan",
            "11250": "nan",
            "11255": "nan",
            "11260": "nan",
            "11265": "nan",
            "11270": "nan",
            "11275": "nan",
            "11280": "nan",
            "11285": "nan",
            "11290": "nan",
            "11295": "nan",
            "11300": 1.66415,
            "11305": "nan",
            "11310": "nan",
            "11315": "nan",
            "11320": "nan",
            "11325": "nan",
            "11330": "nan",
            "11335": "nan",
            "11340": "nan",
            "11345": "nan",
            "11350": "nan",
            "11355": "nan",
            "11360": "nan",
            "11365": "nan",
            "11370": "nan",
            "11375": "nan",
            "11380": "nan",
            "11385": "nan",
            "11390": "nan",
            "11395": "nan",
            "11400": 1.42797,
            "11405": "nan",
            "11410": "nan",
            "11415": "nan",
            "11420": "nan",
            "11425": "nan",
            "11430": "nan",
            "11435": "nan",
            "11440": "nan",
            "11445": "nan",
            "11450": "nan",
            "11455": "nan",
            "11460": "nan",
            "11465": "nan",
            "11470": "nan",
            "11475": "nan",
            "11480": "nan",
            "11485": "nan",
            "11490": "nan",
            "11495": "nan",
            "11500": 1.42875,
            "11505": "nan",
            "11510": "nan",
            "11515": "nan",
            "11520": "nan",
            "11525": "nan",
            "11530": "nan",
            "11535": "nan",
            "11540": "nan",
            "11545": "nan",
            "11550": "nan",
            "11555": "nan",
            "11560": "nan",
            "11565": "nan",
            "11570": "nan",
            "11575": "nan",
            "11580": "nan",
            "11585": "nan",
            "11590": "nan",
            "11595": "nan",
            "11600": 1.42955,
            "11605": "nan",
            "11610": "nan",
            "11615": "nan",
            "11620": "nan",
            "11625": "nan",
            "11630": "nan",
            "11635": "nan",
            "11640": "nan",
            "11645": "nan",
            "11650": "nan",
            "11655": "nan",
            "11660": "nan",
            "11665": "nan",
            "11670": "nan",
            "11675": "nan",
            "11680": "nan",
            "11685": "nan",
            "11690": "nan",
            "11695": "nan",
            "11700": 1.45905,
            "11705": "nan",
            "11710": "nan",
            "11715": "nan",
            "11720": "nan",
            "11725": "nan",
            "11730": "nan",
            "11735": "nan",
            "11740": "nan",
            "11745": "nan",
            "11750": "nan",
            "11755": "nan",
            "11760": "nan",
            "11765": "nan",
            "11770": "nan",
            "11775": "nan",
            "11780": "nan",
            "11785": "nan",
            "11790": "nan",
            "11795": "nan",
            "11800": 1.42993,
            "11805": "nan",
            "11810": "nan",
            "11815": "nan",
            "11820": "nan",
            "11825": "nan",
            "11830": "nan",
            "11835": "nan",
            "11840": "nan",
            "11845": "nan",
            "11850": "nan",
            "11855": "nan",
            "11860": "nan",
            "11865": "nan",
            "11870": "nan",
            "11875": "nan",
            "11880": "nan",
            "11885": "nan",
            "11890": "nan",
            "11895": "nan",
            "11900": 1.43196,
            "11905": "nan",
            "11910": "nan",
            "11915": "nan",
            "11920": "nan",
            "11925": "nan",
            "11930": "nan",
            "11935": "nan",
            "11940": "nan",
            "11945": "nan",
            "11950": "nan",
            "11955": "nan",
            "11960": "nan",
            "11965": "nan",
            "11970": "nan",
            "11975": "nan",
            "11980": "nan",
            "11985": "nan",
            "11990": "nan",
            "11995": "nan",
            "12000": 1.45532,
            "12005": "nan",
            "12010": "nan",
            "12015": "nan",
            "12020": "nan",
            "12025": "nan",
            "12030": "nan",
            "12035": "nan",
            "12040": "nan",
            "12045": "nan",
            "12050": "nan",
            "12055": "nan",
            "12060": "nan",
            "12065": "nan",
            "12070": "nan",
            "12075": "nan",
            "12080": "nan",
            "12085": "nan",
            "12090": "nan",
            "12095": "nan",
            "12100": 1.43662,
            "12105": "nan",
            "12110": "nan",
            "12115": "nan",
            "12120": "nan",
            "12125": "nan",
            "12130": "nan",
            "12135": "nan",
            "12140": "nan",
            "12145": "nan",
            "12150": "nan",
            "12155": "nan",
            "12160": "nan",
            "12165": "nan",
            "12170": "nan",
            "12175": "nan",
            "12180": "nan",
            "12185": "nan",
            "12190": "nan",
            "12195": "nan",
            "12200": 1.42854,
            "12205": "nan",
            "12210": "nan",
            "12215": "nan",
            "12220": "nan",
            "12225": "nan",
            "12230": "nan",
            "12235": "nan",
            "12240": "nan",
            "12245": "nan",
            "12250": "nan",
            "12255": "nan",
            "12260": "nan",
            "12265": "nan",
            "12270": "nan",
            "12275": "nan",
            "12280": "nan",
            "12285": "nan",
            "12290": "nan",
            "12295": "nan",
            "12300": 1.46417,
            "12305": "nan",
            "12310": "nan",
            "12315": "nan",
            "12320": "nan",
            "12325": "nan",
            "12330": "nan",
            "12335": "nan",
            "12340": "nan",
            "12345": "nan",
            "12350": "nan",
            "12355": "nan",
            "12360": "nan",
            "12365": "nan",
            "12370": "nan",
            "12375": "nan",
            "12380": "nan",
            "12385": "nan",
            "12390": "nan",
            "12395": "nan",
            "12400": 1.43423,
            "12405": "nan",
            "12410": "nan",
            "12415": "nan",
            "12420": "nan",
            "12425": "nan",
            "12430": "nan",
            "12435": "nan",
            "12440": "nan",
            "12445": "nan",
            "12450": "nan",
            "12455": "nan",
            "12460": "nan",
            "12465": "nan",
            "12470": "nan",
            "12475": "nan",
            "12480": "nan",
            "12485": "nan",
            "12490": "nan",
            "12495": "nan",
            "12500": 1.43958,
            "12505": "nan",
            "12510": "nan",
            "12515": "nan",
            "12520": "nan",
            "12525": "nan",
            "12530": "nan",
            "12535": "nan",
            "12540": "nan",
            "12545": "nan",
            "12550": "nan",
            "12555": "nan",
            "12560": "nan",
            "12565": "nan",
            "12570": "nan",
            "12575": "nan",
            "12580": "nan",
            "12585": "nan",
            "12590": "nan",
            "12595": "nan",
            "12600": 1.46497,
            "12605": "nan",
            "12610": "nan",
            "12615": "nan",
            "12620": "nan",
            "12625": "nan",
            "12630": "nan",
            "12635": "nan",
            "12640": "nan",
            "12645": "nan",
            "12650": "nan",
            "12655": "nan",
            "12660": "nan",
            "12665": "nan",
            "12670": "nan",
            "12675": "nan",
            "12680": "nan",
            "12685": "nan",
            "12690": "nan",
            "12695": "nan",
            "12700": 1.5955,
            "12705": "nan",
            "12710": "nan",
            "12715": "nan",
            "12720": "nan",
            "12725": "nan",
            "12730": "nan",
            "12735": "nan",
            "12740": "nan",
            "12745": "nan",
            "12750": "nan",
            "12755": "nan",
            "12760": "nan",
            "12765": "nan",
            "12770": "nan",
            "12775": "nan",
            "12780": "nan",
            "12785": "nan",
            "12790": "nan",
            "12795": "nan",
            "12800": "nan",
            "12805": "nan",
            "12810": "nan",
            "12815": "nan",
            "12820": "nan",
            "12825": "nan",
            "12830": "nan",
            "12835": "nan",
            "12840": "nan",
            "12845": "nan",
            "12850": "nan",
            "12855": "nan",
            "12860": "nan",
            "12865": "nan",
            "12870": "nan",
            "12875": "nan",
            "12880": "nan",
            "12885": "nan",
            "12890": "nan",
            "12895": "nan",
            "12900": "nan",
            "12905": "nan",
            "12910": "nan",
            "12915": "nan",
            "12920": "nan",
            "12925": "nan",
            "12930": "nan",
            "12935": "nan",
            "12940": "nan",
            "12945": "nan",
            "12950": "nan",
            "12955": "nan",
            "12960": "nan",
            "12965": "nan",
            "12970": "nan",
            "12975": "nan",
            "12980": "nan",
            "12985": "nan",
            "12990": "nan",
            "12995": "nan",
            "13000": "nan"
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 12.98419, "5": 12.93854, "10": 12.06404, "15": 11.97879, "20": 10.53584, "25": 10.11956, "30": 9.7286, "35": 9.44176, "40": 9.23735, "45": 9.03751, "50": 8.85055, "55": 8.63984, "60": 8.64552, "65": 8.5235, "70": 8.478, "75": 8.3676, "80": 8.15365, "85": 8.15327, "90": 8.03161, "95": 7.95115, "100": 7.84388, "105": 7.72026, "110": 7.54733, "115": 7.45663, "120": 7.46178, "125": 7.47933, "130": 7.29598, "135": 7.26374, "140": 7.18817, "145": 7.03912, "150": 7.16593, "155": 7.03177, "160": 6.9153, "165": 6.90426, "170": 6.82329, "175": 6.89163, "180": 6.83373, "185": 6.7479, "190": 6.70023, "195": 6.63022, "200": 6.69259, "205": 6.62944, "210": 6.51496, "215": 6.49432, "220": 6.50588, "225": 6.49262, "230": 6.49489, "235": 6.48163, "240": 6.36127, "245": 6.36637, "250": 6.29142, "255": 6.45496, "260": 6.34018, "265": 6.26489, "270": 6.22894, "275": 6.23729, "280": 6.18967, "285": 6.20291, "290": 6.17708, "295": 6.13232, "300": 6.10682, "305": 6.00908, "310": 6.05252, "315": 6.05204, "320": 5.96463, "325": 5.91966, "330": 6.0284, "335": 5.99858, "340": 5.95439, "345": 5.93592, "350": 5.91905, "355": 5.86171, "360": 5.86274, "365": 5.82901, "370": 5.79297, "375": 5.80909, "380": 5.84904, "385": 5.80185, "390": 5.77686, "395": 5.68835, "400": 5.63684, "405": 5.66531, "410": 5.66265, "415": 5.72537, "420": 5.65949, "425": 5.68078, "430": 5.63808, "435": 5.57811, "440": 5.59213, "445": 5.5255, "450": 5.57672, "455": 5.51795, "460": 5.50422, "465": 5.5785, "470": 5.56096, "475": 5.48941, "480": 5.49418, "485": 5.50116, "490": 5.45593, "495": 5.46922, "500": 5.42565, "505": 5.41648, "510": 5.43832, "515": 5.42661, "520": 5.42935, "525": 5.29381, "530": 5.32998, "535": 5.31902, "540": 5.34353, "545": 5.39054, "550": 5.37019, "555": 5.19836, "560": 5.35374, "565": 5.30845, "570": 5.25365, "575": 5.31206, "580": 5.23746, "585": 5.20838, "590": 5.21845, "595": 5.23516, "600": 5.26955, "605": 5.21896, "610": 5.2345, "615": 5.20262, "620": 5.21905, "625": 5.1986, "630": 5.14834, "635": 5.13713, "640": 5.09747, "645": 5.1618, "650": 5.16552, "655": 5.15803, "660": 5.07055, "665": 5.10628, "670": 5.08585, "675": 5.05517, "680": 5.04384, "685": 5.03952, "690": 5.04772, "695": 5.00275, "700": 4.99979, "705": 4.95596, "710": 4.98915, "715": 4.92713, "720": 4.88403, "725": 4.84387, "730": 4.89609, "735": 4.86372, "740": 4.90625, "745": 4.76089, "750": 4.77974, "755": 4.83335, "760": 4.81568, "765": 4.76505, "770": 4.7234, "775": 4.6921, "780": 4.7432, "785": 4.78676, "790": 4.67828, "795": 4.64866, "800": 4.63612, "805": 4.64195, "810": 4.65077, "815": 4.63084, "820": 4.64482, "825": 4.61571, "830": 4.59387, "835": 4.56863, "840": 4.48927, "845": 4.49634, "850": 4.47352, "855": 4.52409, "860": 4.46251, "865": 4.52223, "870": 4.48175, "875": 4.39297, "880": 4.44361, "885": 4.4111, "890": 4.43659, "895": 4.41378, "900": 4.3919, "905": 4.35935, "910": 4.36354, "915": 4.34282, "920": 4.39946, "925": 4.41667, "930": 4.31531, "935": 4.30369, "940": 4.35342, "945": 4.30303, "950": 4.34463, "955": 4.26527, "960": 4.19654, "965": 4.29565, "970": 4.28642, "975": 4.23045, "980": 4.22564, "985": 4.1684, "990": 4.15028, "995": 4.17776, "1000": 4.23154, "1005": 4.18887, "1010": 4.16924, "1015": 4.11841, "1020": 4.15687, "1025": 4.21713, "1030": 4.13768, "1035": 4.10871, "1040": 4.118, "1045": 4.10979, "1050": 4.15517, "1055": 4.09572, "1060": 4.1093, "1065": 4.0737, "1070": 4.06643, "1075": 4.08655, "1080": 4.09213, "1085": 4.07259, "1090": 4.01677, "1095": 4.09659, "1100": 4.04859, "1105": 4.07084, "1110": 4.03263, "1115": 4.01534, "1120": 4.00246, "1125": 4.01029, "1130": 4.06057, "1135": 4.00668, "1140": 4.01528, "1145": 3.944, "1150": 4.02955, "1155": 4.00187, "1160": 3.97146, "1165": 3.86545, "1170": 3.91556, "1175": 3.95241, "1180": 3.9746, "1185": 3.9952, "1190": 3.94499, "1195": 3.96181, "1200": 3.8893, "1205": 3.8679, "1210": 3.98189, "1215": 3.81925, "1220": 3.86366, "1225": 3.81826, "1230": 3.91477, "1235": 3.90783, "1240": 3.89088, "1245": 3.7878, "1250": 3.83462, "1255": 3.86647, "1260": 3.90676, "1265": 3.79799, "1270": 3.87717, "1275": 3.83332, "1280": 3.82518, "1285": 3.84272, "1290": 3.88049, "1295": 3.84309, "1300": 3.82132, "1305": 3.8325, "1310": 3.81275, "1315": 3.80311, "1320": 3.81776, "1325": 3.71836, "1330": 3.7917, "1335": 3.74949, "1340": 3.75432, "1345": 3.75185, "1350": 3.72246, "1355": 3.76982, "1360": 3.73827, "1365": 3.72479, "1370": 3.73938, "1375": 3.73694, "1380": 3.75033, "1385": 3.75324, "1390": 3.66352, "1395": 3.74143, "1400": 3.73229, "1405": 3.66226, "1410": 3.65804, "1415": 3.66281, "1420": 3.69529, "1425": 3.71298, "1430": 3.68032, "1435": 3.66567, "1440": 3.62691, "1445": 3.68062, "1450": 3.66766, "1455": 3.62753, "1460": 3.65479, "1465": 3.66938, "1470": 3.62148, "1475": 3.69775, "1480": 3.65738, "1485": 3.66245, "1490": 3.63265, "1495": 3.61492, "1500": 3.6367, "1505": 3.68281, "1510": 3.54566, "1515": 3.609, "1520": 3.63666, "1525": 3.60047, "1530": 3.58381, "1535": 3.5969, "1540": 3.59479, "1545": 3.61209, "1550": 3.55084, "1555": 3.60011, "1560": 3.61885, "1565": 3.61562, "1570": 3.59161, "1575": 3.54997, "1580": 3.58264, "1585": 3.57028, "1590": 3.46649, "1595": 3.51916, "1600": 3.51438, "1605": 3.56399, "1610": 3.56561, "1615": 3.48914, "1620": 3.51872, "1625": 3.46038, "1630": 3.50057, "1635": 3.54094, "1640": 3.52781, "1645": 3.53869, "1650": 3.49558, "1655": 3.47362, "1660": 3.53058, "1665": 3.4694, "1670": 3.51604, "1675": 3.49294, "1680": 3.47856, "1685": 3.47172, "1690": 3.48567, "1695": 3.48595, "1700": 3.46718, "1705": 3.40405, "1710": 3.4904, "1715": 3.50163, "1720": 3.43217, "1725": 3.43389, "1730": 3.44993, "1735": 3.46203, "1740": 3.45825, "1745": 3.45364, "1750": 3.4018, "1755": 3.41959, "1760": 3.38236, "1765": 3.42143, "1770": 3.42391, "1775": 3.38829, "1780": 3.43133, "1785": 3.42531, "1790": 3.38061, "1795": 3.40413, "1800": 3.34505, "1805": 3.40411, "1810": 3.3443, "1815": 3.41721, "1820": 3.41298, "1825": 3.41394, "1830": 3.34507, "1835": 3.44327, "1840": 3.40026, "1845": 3.42225, "1850": 3.3737, "1855": 3.37379, "1860": 3.34718, "1865": 3.39774, "1870": 3.31941, "1875": 3.44184, "1880": 3.34583, "1885": 3.33834, "1890": 3.33625, "1895": 3.40145, "1900": 3.36496, "1905": 3.32271, "1910": 3.3368, "1915": 3.32331, "1920": 3.36555, "1925": 3.33897, "1930": 3.31467, "1935": 3.31123, "1940": 3.3762, "1945": 3.27313, "1950": 3.41256, "1955": 3.30548, "1960": 3.29785, "1965": 3.27936, "1970": 3.29856, "1975": 3.35566, "1980": 3.33591, "1985": 3.24229, "1990": 3.30028, "1995": 3.28316, "2000": 3.28226, "2005": 3.26535, "2010": 3.27352, "2015": 3.2365, "2020": 3.27501, "2025": 3.27742, "2030": 3.29047, "2035": 3.29225, "2040": 3.23929, "2045": 3.2491, "2050": 3.27456, "2055": 3.33373, "2060": 3.29439, "2065": 3.23928, "2070": 3.29227, "2075": 3.2624, "2080": 3.23888, "2085": 3.27623, "2090": 3.13844, "2095": 3.28702, "2100": 3.23688, "2105": 3.19771, "2110": 3.20277, "2115": 3.2382, "2120": 3.1813, "2125": 3.22025, "2130": 3.22372, "2135": 3.29164, "2140": 3.20319, "2145": 3.20474, "2150": 3.21527, "2155": 3.23198, "2160": 3.19331, "2165": 3.25562, "2170": 3.22466, "2175": 3.20239, "2180": 3.25351, "2185": 3.25349, "2190": 3.24601, "2195": 3.17461, "2200": 3.19592, "2205": 3.16638, "2210": 3.11756, "2215": 3.18822, "2220": 3.19477, "2225": 3.17024, "2230": 3.12954, "2235": 3.17891, "2240": 3.21397, "2245": 3.18137, "2250": 3.19957, "2255": 3.12336, "2260": 3.13509, "2265": 3.22906, "2270": 3.18616, "2275": 3.1529, "2280": 3.18794, "2285": 3.16881, "2290": 3.16738, "2295": 3.20792, "2300": 3.14094, "2305": 3.16694, "2310": 3.12982, "2315": 3.0574, "2320": 3.12693, "2325": 3.18302, "2330": 3.13863, "2335": 3.13173, "2340": 3.16429, "2345": 3.11609, "2350": 3.12193, "2355": 3.12102, "2360": 3.16057, "2365": 3.14069, "2370": 3.1566, "2375": 3.13306, "2380": 3.1089, "2385": 3.07598, "2390": 3.09423, "2395": 3.0923, "2400": 3.09049, "2405": 3.10312, "2410": 3.08738, "2415": 3.0859, "2420": 3.07232, "2425": 3.07515, "2430": 3.08315, "2435": 3.0666, "2440": 3.09293, "2445": 3.05738, "2450": 3.12724, "2455": 3.1668, "2460": 3.08258, "2465": 3.07557, "2470": 3.03871, "2475": 3.07295, "2480": 3.09401, "2485": 3.04631, "2490": 3.05694, "2495": 3.07352, "2500": 3.07374, "2505": 3.09757, "2510": 3.11425, "2515": 3.04708, "2520": 3.0804, "2525": 3.02461, "2530": 3.04704, "2535": 3.08948, "2540": 3.07683, "2545": 3.05223, "2550": 3.0346, "2555": 3.106, "2560": 3.05236, "2565": 3.10772, "2570": 3.01402, "2575": 3.04644, "2580": 3.07514, "2585": 3.02095, "2590": 3.06575, "2595": 2.99715, "2600": 3.06315, "2605": 3.04568, "2610": 3.04704, "2615": 3.05832, "2620": 2.98626, "2625": 3.00956, "2630": 3.03577, "2635": 3.05424, "2640": 3.01527, "2645": 3.05906, "2650": 3.02519, "2655": 3.00005, "2660": 3.00566, "2665": 3.03857, "2670": 2.9869, "2675": 2.96406, "2680": 2.99471, "2685": 3.00999, "2690": 3.00193, "2695": 2.99167, "2700": 3.02505, "2705": 2.98729, "2710": 2.97342, "2715": 2.96605, "2720": 3.02717, "2725": 3.00415, "2730": 3.02744, "2735": 2.98433, "2740": 2.98866, "2745": 3.00764, "2750": 3.01004, "2755": 2.97318, "2760": 3.00963, "2765": 3.01277, "2770": 2.98399, "2775": 2.99786, "2780": 3.0332, "2785": 2.95954, "2790": 2.96222, "2795": 2.97034, "2800": 2.98127, "2805": 2.93987, "2810": 2.983, "2815": 2.9543, "2820": 2.98172, "2825": 3.00651, "2830": 2.98887, "2835": 2.91425, "2840": 2.92476, "2845": 2.95821, "2850": 2.96948, "2855": 2.97557, "2860": 2.96429, "2865": 2.91763, "2870": 2.98977, "2875": 2.9248, "2880": 2.95945, "2885": 2.91822, "2890": 2.97801, "2895": 2.92798, "2900": 2.94851, "2905": 3.01025, "2910": 2.90885, "2915": 2.95254, "2920": 2.97185, "2925": 2.95597, "2930": 2.94982, "2935": 2.93409, "2940": 2.93873, "2945": 2.9082, "2950": 2.97314, "2955": 2.90745, "2960": 2.96696, "2965": 2.86293, "2970": 2.95934, "2975": 2.99626, "2980": 2.93691, "2985": 3.03654, "2990": 2.93999, "2995": 2.87144, "3000": 2.92971, "3005": 2.88913, "3010": 2.93432, "3015": 2.91276, "3020": 2.91232, "3025": 2.92393, "3030": 2.92045, "3035": 2.95576, "3040": 2.91549, "3045": 2.83698, "3050": 2.89779, "3055": 2.90445, "3060": 2.92488, "3065": 2.92719, "3070": 2.87846, "3075": 2.86687, "3080": 2.92258, "3085": 2.89966, "3090": 2.91396, "3095": 2.92199, "3100": 2.87182, "3105": 2.92802, "3110": 2.91863, "3115": 2.9403, "3120": 2.9518, "3125": 2.85751, "3130": 2.93151, "3135": 2.92635, "3140": 2.87276, "3145": 2.92375, "3150": 2.85484, "3155": 2.8485, "3160": 2.83637, "3165": 2.84033, "3170": 2.88849, "3175": 2.90239, "3180": 2.85043, "3185": 2.89154, "3190": 2.90425, "3195": 2.92291, "3200": 2.92063, "3205": 2.86226, "3210": 2.86891, "3215": 2.91036, "3220": 2.87807, "3225": 2.86421, "3230": 2.81586, "3235": 2.87333, "3240": 2.87156, "3245": 2.90126, "3250": 2.85213, "3255": 2.84713, "3260": 2.86187, "3265": 2.86787, "3270": 2.84284, "3275": 2.86909, "3280": 2.79921, "3285": 2.81195, "3290": 2.86578, "3295": 2.89818, "3300": 2.87821, "3305": 2.85947, "3310": 2.85651, "3315": 2.81024, "3320": 2.82854, "3325": 2.8296, "3330": 2.82729, "3335": 2.84636, "3340": 2.83138, "3345": 2.85448, "3350": 2.85382, "3355": 2.86154, "3360": 2.8033, "3365": 2.8523, "3370": 2.84468, "3375": 2.8454, "3380": 2.85596, "3385": 2.87939, "3390": 2.85981, "3395": 2.80661, "3400": 2.78593, "3405": 2.83043, "3410": 2.84374, "3415": 2.85748, "3420": 2.82452, "3425": 2.80324, "3430": 2.8249, "3435": 2.88819, "3440": 2.81499, "3445": 2.87505, "3450": 2.81623, "3455": 2.79143, "3460": 2.81576, "3465": 2.84564, "3470": 2.83662, "3475": 2.78218, "3480": 2.84288, "3485": 2.82039, "3490": 2.89267, "3495": 2.84976, "3500": 2.83815, "3505": 2.82178, "3510": 2.81512, "3515": 2.83844, "3520": 2.77805, "3525": 2.80344, "3530": 2.84921, "3535": 2.78161, "3540": 2.83674, "3545": 2.80973, "3550": 2.79547, "3555": 2.81863, "3560": 2.82078, "3565": 2.82666, "3570": 2.80124, "3575": 2.79967, "3580": 2.82045, "3585": 2.83294, "3590": 2.82697, "3595": 2.78815, "3600": 2.74915, "3605": 2.7873, "3610": 2.84627, "3615": 2.7499, "3620": 2.8026, "3625": 2.88969, "3630": 2.78406, "3635": 2.78796, "3640": 2.7812, "3645": 2.76636, "3650": 2.80296, "3655": 2.82044, "3660": 2.76688, "3665": 2.78201, "3670": 2.76782, "3675": 2.77556, "3680": 2.80739, "3685": 2.80119, "3690": 2.80155, "3695": 2.80812, "3700": 2.78648, "3705": 2.78205, "3710": 2.74897, "3715": 2.80626, "3720": 2.79273, "3725": 2.80205, "3730": 2.84576, "3735": 2.79732, "3740": 2.74582, "3745": 2.78748, "3750": 2.80839, "3755": 2.78946, "3760": 2.75624, "3765": 2.7529, "3770": 2.76053, "3775": 2.77003, "3780": 2.75736, "3785": 2.77603, "3790": 2.74395, "3795": 2.79193, "3800": 2.79977, "3805": 2.74927, "3810": 2.8012, "3815": 2.75973, "3820": 2.78378, "3825": 2.72979, "3830": 2.74412, "3835": 2.81283, "3840": 2.72346, "3845": 2.71207, "3850": 2.77208, "3855": 2.71862, "3860": 2.79999, "3865": 2.74819, "3870": 2.7788, "3875": 2.75589, "3880": 2.78591, "3885": 2.78703, "3890": 2.745, "3895": 2.79656, "3900": 2.76217, "3905": 2.7204, "3910": 2.74251, "3915": 2.75354, "3920": 2.7922, "3925": 2.779, "3930": 2.70818, "3935": 2.73844, "3940": 2.75097, "3945": 2.7403, "3950": 2.71938, "3955": 2.77595, "3960": 2.7594, "3965": 2.74095, "3970": 2.75582, "3975": 2.72197, "3980": 2.73797, "3985": 2.74501, "3990": 2.69182, "3995": 2.77927, "4000": 2.73467, "4005": 2.76845, "4010": 2.70729, "4015": 2.72138, "4020": 2.7543, "4025": 2.73235, "4030": 2.65702, "4035": 2.69604, "4040": 2.7478, "4045": 2.74602, "4050": 2.79108, "4055": 2.72052, "4060": 2.7132, "4065": 2.65055, "4070": 2.80438, "4075": 2.75742, "4080": 2.71911, "4085": 2.74853, "4090": 2.67511, "4095": 2.68776, "4100": 2.71367, "4105": 2.73756, "4110": 2.72971, "4115": 2.70539, "4120": 2.73419, "4125": 2.70252, "4130": 2.69562, "4135": 2.68553, "4140": 2.6792, "4145": 2.77962, "4150": 2.71146, "4155": 2.73921, "4160": 2.75831, "4165": 2.71903, "4170": 2.67374, "4175": 2.71681, "4180": 2.72466, "4185": 2.72363, "4190": 2.74097, "4195": 2.69297, "4200": 2.70506, "4205": 2.74554, "4210": 2.67484, "4215": 2.66292, "4220": 2.65885, "4225": 2.70625, "4230": 2.71947, "4235": 2.73163, "4240": 2.71, "4245": 2.69905, "4250": 2.71229, "4255": 2.6486, "4260": 2.7231, "4265": 2.74057, "4270": 2.72269, "4275": 2.68914, "4280": 2.70013, "4285": 2.73156, "4290": 2.68402, "4295": 2.69061, "4300": 2.69891, "4305": 2.70215, "4310": 2.73018, "4315": 2.70831, "4320": 2.70146, "4325": 2.70427, "4330": 2.71103, "4335": 2.68951, "4340": 2.69679, "4345": 2.72847, "4350": 2.67635, "4355": 2.69369, "4360": 2.71057, "4365": 2.78505, "4370": 2.73161, "4375": 2.7416, "4380": 2.70035, "4385": 2.69729, "4390": 2.6976, "4395": 2.75157, "4400": 2.6658, "4405": 2.66826, "4410": 2.68381, "4415": 2.70238, "4420": 2.70549, "4425": 2.72132, "4430": 2.69374, "4435": 2.68373, "4440": 2.69348, "4445": 2.67973, "4450": 2.65166, "4455": 2.66887, "4460": 2.6881, "4465": 2.69814, "4470": 2.67338, "4475": 2.68749, "4480": 2.65676, "4485": 2.69941, "4490": 2.65189, "4495": 2.70897, "4500": 2.70239, "4505": 2.69521, "4510": 2.64919, "4515": 2.70031, "4520": 2.66829, "4525": 2.66609, "4530": 2.67206, "4535": 2.67259, "4540": 2.70728, "4545": 2.65612, "4550": 2.70083, "4555": 2.67856, "4560": 2.65513, "4565": 2.63823, "4570": 2.64122, "4575": 2.6646, "4580": 2.68762, "4585": 2.68234, "4590": 2.61701, "4595": 2.66208, "4600": 2.6793, "4605": 2.67662, "4610": 2.65261, "4615": 2.66058, "4620": 2.65881, "4625": 2.68537, "4630": 2.67218, "4635": 2.64354, "4640": 2.69408, "4645": 2.64915, "4650": 2.70258, "4655": 2.71207, "4660": 2.67684, "4665": 2.68847, "4670": 2.67711, "4675": 2.68695, "4680": 2.66399, "4685": 2.65574, "4690": 2.70135, "4695": 2.65603, "4700": 2.67474, "4705": 2.64927, "4710": 2.67454, "4715": 2.6445, "4720": 2.72006, "4725": 2.62873, "4730": 2.64853, "4735": 2.68453, "4740": 2.64342, "4745": 2.65059, "4750": 2.63888, "4755": 2.65157, "4760": 2.65996, "4765": 2.64629, "4770": 2.62414, "4775": 2.65418, "4780": 2.65916, "4785": 2.69171, "4790": 2.64922, "4795": 2.67239, "4800": 2.62814, "4805": 2.64031, "4810": 2.66308, "4815": 2.64414, "4820": 2.66856, "4825": 2.64936, "4830": 2.61301, "4835": 2.64828, "4840": 2.65719, "4845": 2.63018, "4850": 2.62249, "4855": 2.59762, "4860": 2.65006, "4865": 2.62728, "4870": 2.63739, "4875": 2.61869, "4880": 2.62597, "4885": 2.62554, "4890": 2.6792, "4895": 2.659, "4900": 2.61682, "4905": 2.62007, "4910": 2.63757, "4915": 2.612, "4920": 2.65399, "4925": 2.64822, "4930": 2.57057, "4935": 2.65116, "4940": 2.63208, "4945": 2.63853, "4950": 2.6253, "4955": 2.61842, "4960": 2.61614, "4965": 2.65867, "4970": 2.59725, "4975": 2.65839, "4980": 2.61913, "4985": 2.63529, "4990": 2.65578, "4995": 2.58058, "5000": 2.66073, "5005": 2.66345, "5010": 2.68227, "5015": 2.63294, "5020": 2.63988, "5025": 2.68591, "5030": 2.64819, "5035": 2.61651, "5040": 2.61892, "5045": 2.60431, "5050": 2.62445, "5055": 2.64904, "5060": 2.64327, "5065": 2.68471, "5070": 2.60524, "5075": 2.61458, "5080": 2.60662, "5085": 2.60264, "5090": 2.59015, "5095": 2.64761, "5100": 2.6457, "5105": 2.60793, "5110": 2.66334, "5115": 2.62502, "5120": 2.67169, "5125": 2.62758, "5130": 2.61382, "5135": 2.61793, "5140": 2.57598, "5145": 2.62766, "5150": 2.63527, "5155": 2.6155, "5160": 2.65966, "5165": 2.58483, "5170": 2.58978, "5175": 2.61727, "5180": 2.60571, "5185": 2.62183, "5190": 2.62827, "5195": 2.67109, "5200": 2.59759, "5205": 2.60702, "5210": 2.60617, "5215": 2.64994, "5220": 2.58854, "5225": 2.55622, "5230": 2.63303, "5235": 2.61375, "5240": 2.63238, "5245": 2.62967, "5250": 2.5932, "5255": 2.61614, "5260": 2.55787, "5265": 2.59927, "5270": 2.59007, "5275": 2.61657, "5280": 2.61119, "5285": 2.60465, "5290": 2.63616, "5295": 2.62334, "5300": 2.58036, "5305": 2.59688, "5310": 2.60926, "5315": 2.58728, "5320": 2.61359, "5325": 2.6452, "5330": 2.6021, "5335": 2.58401, "5340": 2.56341, "5345": 2.65571, "5350": 2.61767, "5355": 2.58122, "5360": 2.59757, "5365": 2.61791, "5370": 2.61383, "5375": 2.62936, "5380": 2.57845, "5385": 2.56235, "5390": 2.58632, "5395": 2.61671, "5400": 2.60674, "5405": 2.54636, "5410": 2.61005, "5415": 2.59464, "5420": 2.61139, "5425": 2.625, "5430": 2.6265, "5435": 2.5765, "5440": 2.58814, "5445": 2.63079, "5450": 2.64534, "5455": 2.6101, "5460": 2.5903, "5465": 2.6052, "5470": 2.59691, "5475": 2.62442, "5480": 2.58886, "5485": 2.5915, "5490": 2.57494, "5495": 2.56951, "5500": 2.56777, "5505": 2.61617, "5510": 2.62567, "5515": 2.58374, "5520": 2.56088, "5525": 2.58583, "5530": 2.66352, "5535": 2.62035, "5540": 2.56974, "5545": 2.59511, "5550": 2.54965, "5555": 2.57459, "5560": 2.56453, "5565": 2.60605, "5570": 2.65634, "5575": 2.63541, "5580": 2.57175, "5585": 2.5945, "5590": 2.55928, "5595": 2.58276, "5600": 2.55517, "5605": 2.60104, "5610": 2.58094, "5615": 2.58249, "5620": 2.58036, "5625": 2.55089, "5630": 2.57187, "5635": 2.6324, "5640": 2.59548, "5645": 2.57131, "5650": 2.57908, "5655": 2.54814, "5660": 2.56269, "5665": 2.58485, "5670": 2.56733, "5675": 2.61086, "5680": 2.5279, "5685": 2.5673, "5690": 2.60169, "5695": 2.56032, "5700": 2.59817, "5705": 2.59773, "5710": 2.57825, "5715": 2.58556, "5720": 2.53545, "5725": 2.60141, "5730": 2.57545, "5735": 2.60941, "5740": 2.59452, "5745": 2.55803, "5750": 2.53903, "5755": 2.55715, "5760": 2.61525, "5765": 2.56038, "5770": 2.5412, "5775": 2.58539, "5780": 2.57794, "5785": 2.53954, "5790": 2.56402, "5795": 2.60037, "5800": 2.54586, "5805": 2.53368, "5810": 2.55739, "5815": 2.52458, "5820": 2.59565, "5825": 2.50396, "5830": 2.49812, "5835": 2.59879, "5840": 2.54027, "5845": 2.55119, "5850": 2.61034, "5855": 2.5078, "5860": 2.56077, "5865": 2.51887, "5870": 2.57531, "5875": 2.60847, "5880": 2.58556, "5885": 2.56695, "5890": 2.58322, "5895": 2.55606, "5900": 2.61399, "5905": 2.55561, "5910": 2.59622, "5915": 2.60833, "5920": 2.58814, "5925": 2.53547, "5930": 2.57614, "5935": 2.55624, "5940": 2.57051, "5945": 2.51758, "5950": 2.5549, "5955": 2.58364, "5960": 2.56774, "5965": 2.61916, "5970": 2.54972, "5975": 2.58058, "5980": 2.55777, "5985": 2.5587, "5990": 2.55442, "5995": 2.55786, "6000": 2.5541, "6005": 2.51959, "6010": 2.5612, "6015": 2.5234, "6020": 2.53535, "6025": 2.55845, "6030": 2.60267, "6035": 2.54217, "6040": 2.54797, "6045": 2.49021, "6050": 2.59325, "6055": 2.51806, "6060": 2.54365, "6065": 2.52512, "6070": 2.52866, "6075": 2.5355, "6080": 2.53264, "6085": 2.59598, "6090": 2.56805, "6095": 2.53269, "6100": 2.53973, "6105": 2.52014, "6110": 2.5548, "6115": 2.5854, "6120": 2.55512, "6125": 2.53857, "6130": 2.47414, "6135": 2.5559, "6140": 2.55561, "6145": 2.55497, "6150": 2.52352, "6155": 2.50641, "6160": 2.54021, "6165": 2.57308, "6170": 2.54839, "6175": 2.60252, "6180": 2.51088, "6185": 2.54973, "6190": 2.49031, "6195": 2.57918, "6200": 2.55088, "6205": 2.53612, "6210": 2.51902, "6215": 2.51336, "6220": 2.56408, "6225": 2.51443, "6230": 2.51059, "6235": 2.5608, "6240": 2.55012, "6245": 2.5247, "6250": 2.53168, "6255": 2.57932, "6260": 2.52354, "6265": 2.57262, "6270": 2.52446, "6275": 2.56295, "6280": 2.52126, "6285": 2.52066, "6290": 2.52096, "6295": 2.50424, "6300": 2.55533, "6305": 2.52427, "6310": 2.51089, "6315": 2.53744, "6320": 2.488, "6325": 2.59779, "6330": 2.55532, "6335": 2.51175, "6340": 2.51285, "6345": 2.55419, "6350": 2.55594, "6355": 2.52344, "6360": 2.52101, "6365": 2.48499, "6370": 2.53525, "6375": 2.49254, "6380": 2.55757, "6385": 2.57399, "6390": 2.50202, "6395": 2.55089, "6400": 2.50993, "6405": 2.5282, "6410": 2.5186, "6415": 2.52701, "6420": 2.54208, "6425": 2.53505, "6430": 2.57601, "6435": 2.54343, "6440": 2.53596, "6445": 2.52707, "6450": 2.53253, "6455": 2.5206, "6460": 2.51748, "6465": 2.56168, "6470": 2.5182, "6475": 2.52367, "6480": 2.48721, "6485": 2.52891, "6490": 2.50941, "6495": 2.49869, "6500": 2.52288, "6505": 2.49655, "6510": 2.5435, "6515": 2.51061, "6520": 2.51072, "6525": 2.4951, "6530": 2.54405, "6535": 2.53179, "6540": 2.52806, "6545": 2.56063, "6550": 2.50377, "6555": 2.5567, "6560": 2.50957, "6565": 2.52066, "6570": 2.5846, "6575": 2.52194, "6580": 2.49953, "6585": 2.50756, "6590": 2.5088, "6595": 2.49707, "6600": 2.48904, "6605": 2.53959, "6610": 2.47704, "6615": 2.56692, "6620": 2.53266, "6625": 2.50914, "6630": 2.51237, "6635": 2.47298, "6640": 2.53895, "6645": 2.59549, "6650": 2.51039, "6655": 2.5003, "6660": 2.57417, "6665": 2.52229, "6670": 2.56842, "6675": 2.46804, "6680": 2.54777, "6685": 2.53423, "6690": 2.51271, "6695": 2.48576, "6700": 2.52379, "6705": 2.51865, "6710": 2.49131, "6715": 2.51508, "6720": 2.51046, "6725": 2.51877, "6730": 2.51764, "6735": 2.48391, "6740": 2.51343, "6745": 2.49511, "6750": 2.55883, "6755": 2.47532, "6760": 2.54102, "6765": 2.48848, "6770": 2.51699, "6775": 2.50924, "6780": 2.53742, "6785": 2.47155, "6790": 2.54353, "6795": 2.49747, "6800": 2.52506, "6805": 2.51084, "6810": 2.50457, "6815": 2.52164, "6820": 2.48634, "6825": 2.5058, "6830": 2.54057, "6835": 2.5071, "6840": 2.50862, "6845": 2.52459, "6850": 2.47366, "6855": 2.5114, "6860": 2.50121, "6865": 2.48697, "6870": 2.55404, "6875": 2.47466, "6880": 2.55074, "6885": 2.47742, "6890": 2.5456, "6895": 2.50044, "6900": 2.48513, "6905": 2.49727, "6910": 2.51927, "6915": 2.51524, "6920": 2.53222, "6925": 2.54381, "6930": 2.49006, "6935": 2.52259, "6940": 2.49956, "6945": 2.46052, "6950": 2.48424, "6955": 2.52708, "6960": 2.51994, "6965": 2.493, "6970": 2.47002, "6975": 2.52124, "6980": 2.45179, "6985": 2.51469, "6990": 2.53168, "6995": 2.46205, "7000": 2.48721, "7005": 2.47085, "7010": 2.47458, "7015": 2.52239, "7020": 2.46871, "7025": 2.455, "7030": 2.48512, "7035": 2.47891, "7040": 2.5055, "7045": 2.52023, "7050": 2.52604, "7055": 2.44159, "7060": 2.47055, "7065": 2.4812, "7070": 2.49094, "7075": 2.49419, "7080": 2.5339, "7085": 2.48474, "7090": 2.47483, "7095": 2.50175, "7100": 2.51611, "7105": 2.48909, "7110": 2.48875, "7115": 2.50654, "7120": 2.47919, "7125": 2.4648, "7130": 2.48826, "7135": 2.51398, "7140": 2.49836, "7145": 2.49968, "7150": 2.51209, "7155": 2.50995, "7160": 2.47585, "7165": 2.45844, "7170": 2.5053, "7175": 2.50179, "7180": 2.50705, "7185": 2.48036, "7190": 2.46179, "7195": 2.46554, "7200": 2.5094, "7205": 2.48845, "7210": 2.44336, "7215": 2.48072, "7220": 2.44332, "7225": 2.51135, "7230": 2.50786, "7235": 2.48259, "7240": 2.47784, "7245": 2.49888, "7250": 2.50767, "7255": 2.49251, "7260": 2.45824, "7265": 2.45066, "7270": 2.47062, "7275": 2.50235, "7280": 2.49435, "7285": 2.42685, "7290": 2.48191, "7295": 2.48837, "7300": 2.41828, "7305": 2.44602, "7310": 2.44836, "7315": 2.49166, "7320": 2.48459, "7325": 2.45973, "7330": 2.49034, "7335": 2.47382, "7340": 2.46396, "7345": 2.4941, "7350": 2.5103, "7355": 2.49639, "7360": 2.48014, "7365": 2.46719, "7370": 2.4737, "7375": 2.44973, "7380": 2.49351, "7385": 2.48455, "7390": 2.47135, "7395": 2.477, "7400": 2.48731, "7405": 2.44334, "7410": 2.48259, "7415": 2.46975, "7420": 2.49233, "7425": 2.45642, "7430": 2.52265, "7435": 2.49165, "7440": 2.52011, "7445": 2.50649, "7450": 2.47346, "7455": 2.46263, "7460": 2.46431, "7465": 2.48535, "7470": 2.45144, "7475": 2.45815, "7480": 2.51387, "7485": 2.45117, "7490": 2.47472, "7495": 2.47889, "7500": 2.49782, "7505": 2.44041, "7510": 2.43588, "7515": 2.42224, "7520": 2.49392, "7525": 2.49662, "7530": 2.47567, "7535": 2.45999, "7540": 2.47286, "7545": 2.47379, "7550": 2.49017, "7555": 2.45369, "7560": 2.42765, "7565": 2.50931, "7570": 2.48419, "7575": 2.43912, "7580": 2.45901, "7585": 2.48319, "7590": 2.48184, "7595": 2.46424, "7600": 2.46287, "7605": 2.44744, "7610": 2.44904, "7615": 2.42801, "7620": 2.54382, "7625": 2.47981, "7630": 2.42404, "7635": 2.42598, "7640": 2.45333, "7645": 2.47382, "7650": 2.46343, "7655": 2.48648, "7660": 2.4512, "7665": 2.43238, "7670": 2.44036, "7675": 2.45618, "7680": 2.48615, "7685": 2.43166, "7690": 2.48187, "7695": 2.4547, "7700": 2.48168, "7705": 2.51497, "7710": 2.50597, "7715": 2.4453, "7720": 2.47122, "7725": 2.48144, "7730": 2.45802, "7735": 2.49851, "7740": 2.44105, "7745": 2.45131, "7750": 2.44125, "7755": 2.47226, "7760": 2.45319, "7765": 2.45587, "7770": 2.47059, "7775": 2.4538, "7780": 2.41754, "7785": 2.44561, "7790": 2.48633, "7795": 2.44163, "7800": 2.4703, "7805": 2.48729, "7810": 2.50584, "7815": 2.49634, "7820": 2.45047, "7825": 2.51645, "7830": 2.45243, "7835": 2.46939, "7840": 2.47968, "7845": 2.4632, "7850": 2.41717, "7855": 2.47372, "7860": 2.5075, "7865": 2.42692, "7870": 2.47598, "7875": 2.44861, "7880": 2.45488, "7885": 2.46205, "7890": 2.47234, "7895": 2.45238, "7900": 2.44179, "7905": 2.43688, "7910": 2.426, "7915": 2.48366, "7920": 2.47574, "7925": 2.4225, "7930": 2.47225, "7935": 2.45119, "7940": 2.424, "7945": 2.47139, "7950": 2.44612, "7955": 2.41981, "7960": 2.49324, "7965": 2.53135, "7970": 2.52904, "7975": 2.45162, "7980": 2.44412, "7985": 2.46923, "7990": 2.43335, "7995": 2.46941, "8000": 2.43681, "8005": 2.41849, "8010": 2.45817, "8015": 2.47029, "8020": 2.4825, "8025": 2.47607, "8030": 2.45105, "8035": 2.47127, "8040": 2.42237, "8045": 2.45296, "8050": 2.4482, "8055": 2.42424, "8060": 2.44611, "8065": 2.46189, "8070": 2.45993, "8075": 2.45852, "8080": 2.4462, "8085": 2.43951, "8090": 2.4255, "8095": 2.42228, "8100": 2.43833, "8105": 2.49489, "8110": 2.43712, "8115": 2.44338, "8120": 2.46793, "8125": 2.46836, "8130": 2.45375, "8135": 2.45377, "8140": 2.43826, "8145": 2.42645, "8150": 2.42267, "8155": 2.4863, "8160": 2.45488, "8165": 2.4436, "8170": 2.43613, "8175": 2.42318, "8180": 2.49704, "8185": 2.4269, "8190": 2.46935, "8195": 2.4585, "8200": 2.44837, "8205": 2.44505, "8210": 2.43355, "8215": 2.44056, "8220": 2.4358, "8225": 2.41015, "8230": 2.44176, "8235": 2.4652, "8240": 2.42683, "8245": 2.44875, "8250": 2.44572, "8255": 2.4414, "8260": 2.43328, "8265": 2.42888, "8270": 2.43309, "8275": 2.44149, "8280": 2.39924, "8285": 2.4396, "8290": 2.48206, "8295": 2.4474, "8300": 2.45867, "8305": 2.40775, "8310": 2.43515, "8315": 2.46058, "8320": 2.40073, "8325": 2.39433, "8330": 2.43694, "8335": 2.44522, "8340": 2.49104, "8345": 2.4478, "8350": 2.44978, "8355": 2.40749, "8360": 2.40123, "8365": 2.45247, "8370": 2.45194, "8375": 2.42273, "8380": 2.41941, "8385": 2.42385, "8390": 2.4346, "8395": 2.43997, "8400": 2.43946, "8405": 2.48993, "8410": 2.437, "8415": 2.43406, "8420": 2.41749, "8425": 2.43899, "8430": 2.46013, "8435": 2.40557, "8440": 2.45224, "8445": 2.45724, "8450": 2.40482, "8455": 2.46, "8460": 2.45352, "8465": 2.4356, "8470": 2.40868, "8475": 2.47827, "8480": 2.40162, "8485": 2.41279, "8490": 2.46472, "8495": 2.43598, "8500": 2.44504, "8505": 2.40324, "8510": 2.40392, "8515": 2.42848, "8520": 2.42568, "8525": 2.4921, "8530": 2.37281, "8535": 2.40013, "8540": 2.48538, "8545": 2.38102, "8550": 2.43805, "8555": 2.44969, "8560": 2.47001, "8565": 2.42051, "8570": 2.43016, "8575": 2.44673, "8580": 2.44107, "8585": 2.4206, "8590": 2.40199, "8595": 2.42668, "8600": 2.41302, "8605": 2.49039, "8610": 2.42012, "8615": 2.38793, "8620": 2.44803, "8625": 2.42382, "8630": 2.45408, "8635": 2.44896, "8640": 2.43494, "8645": 2.4736, "8650": 2.42011, "8655": 2.45238, "8660": 2.45516, "8665": 2.38524, "8670": 2.40887, "8675": 2.4285, "8680": 2.44765, "8685": 2.4292, "8690": 2.41052, "8695": 2.44169, "8700": 2.43279, "8705": 2.41873, "8710": 2.42723, "8715": 2.44835, "8720": 2.47546, "8725": 2.40943, "8730": 2.3906, "8735": 2.43279, "8740": 2.42918, "8745": 2.39996, "8750": 2.43474, "8755": 2.42434, "8760": 2.40027, "8765": 2.43461, "8770": 2.40495, "8775": 2.43724, "8780": 2.42018, "8785": 2.47035, "8790": 2.41985, "8795": 2.41766, "8800": 2.41504, "8805": 2.40682, "8810": 2.41125, "8815": 2.47313, "8820": 2.45249, "8825": 2.42464, "8830": 2.38678, "8835": 2.42251, "8840": 2.39423, "8845": 2.42654, "8850": 2.43398, "8855": 2.40351, "8860": 2.42788, "8865": 2.42683, "8870": 2.43891, "8875": 2.43765, "8880": 2.41181, "8885": 2.39358, "8890": 2.44536, "8895": 2.42582, "8900": 2.41128, "8905": 2.40287, "8910": 2.40178, "8915": 2.41899, "8920": 2.43296, "8925": 2.46577, "8930": 2.4143, "8935": 2.40926, "8940": 2.38869, "8945": 2.3922, "8950": 2.41756, "8955": 2.39479, "8960": 2.43345, "8965": 2.41805, "8970": 2.40426, "8975": 2.47319, "8980": 2.43981, "8985": 2.37288, "8990": 2.4072, "8995": 2.41768, "9000": 2.45601, "9005": 2.41214, "9010": 2.37503, "9015": 2.40997, "9020": 2.3985, "9025": 2.37013, "9030": 2.39819, "9035": 2.4235, "9040": 2.42034, "9045": 2.41944, "9050": 2.39591, "9055": 2.41865, "9060": 2.41835, "9065": 2.40424, "9070": 2.44544, "9075": 2.39609, "9080": 2.43408, "9085": 2.41287, "9090": 2.41241, "9095": 2.39607, "9100": 2.4007, "9105": 2.35839, "9110": 2.46492, "9115": 2.41577, "9120": 2.405, "9125": 2.458, "9130": 2.39476, "9135": 2.44823, "9140": 2.43426, "9145": 2.43038, "9150": 2.42521, "9155": 2.37592, "9160": 2.41896, "9165": 2.42461, "9170": 2.37306, "9175": 2.41974, "9180": 2.37725, "9185": 2.43877, "9190": 2.41309, "9195": 2.38919, "9200": 2.39263, "9205": 2.45158, "9210": 2.36253, "9215": 2.46302, "9220": 2.44836, "9225": 2.38351, "9230": 2.44641, "9235": 2.39743, "9240": 2.40233, "9245": 2.43533, "9250": 2.42643, "9255": 2.42876, "9260": 2.38508, "9265": 2.43838, "9270": 2.43511, "9275": 2.39568, "9280": 2.39071, "9285": 2.42371, "9290": 2.40431, "9295": 2.38539, "9300": 2.42356, "9305": 2.40497, "9310": 2.41467, "9315": 2.40958, "9320": 2.44478, "9325": 2.37057, "9330": 2.40175, "9335": 2.36064, "9340": 2.40709, "9345": 2.41361, "9350": 2.43767, "9355": 2.47543, "9360": 2.4371, "9365": 2.38709, "9370": 2.43617, "9375": 2.43237, "9380": 2.35257, "9385": 2.40086, "9390": 2.38081, "9395": 2.3885, "9400": 2.44434, "9405": 2.41269, "9410": 2.39825, "9415": 2.43724, "9420": 2.44459, "9425": 2.43199, "9430": 2.44775, "9435": 2.41468, "9440": 2.4757, "9445": 2.3748, "9450": 2.39361, "9455": 2.40348, "9460": 2.38595, "9465": 2.37795, "9470": 2.38158, "9475": 2.36591, "9480": 2.43326, "9485": 2.38642, "9490": 2.41941, "9495": 2.38123, "9500": 2.36329, "9505": 2.42967, "9510": 2.39832, "9515": 2.42896, "9520": 2.41821, "9525": 2.39098, "9530": 2.45386, "9535": 2.40282, "9540": 2.41807, "9545": 2.37646, "9550": 2.42227, "9555": 2.38928, "9560": 2.42056, "9565": 2.40473, "9570": 2.37127, "9575": 2.41035, "9580": 2.39592, "9585": 2.42186, "9590": 2.42769, "9595": 2.44835, "9600": 2.39217, "9605": 2.38532, "9610": 2.41965, "9615": 2.41204, "9620": 2.41154, "9625": 2.44483, "9630": 2.395, "9635": 2.40098, "9640": 2.44575, "9645": 2.41172, "9650": 2.39957, "9655": 2.37414, "9660": 2.42292, "9665": 2.38785, "9670": 2.38317, "9675": 2.35775, "9680": 2.39654, "9685": 2.39503, "9690": 2.4613, "9695": 2.38031, "9700": 2.37568, "9705": 2.38353, "9710": 2.36601, "9715": 2.38749, "9720": 2.4332, "9725": 2.44039, "9730": 2.42955, "9735": 2.38794, "9740": 2.37969, "9745": 2.42437, "9750": 2.3981, "9755": 2.4077, "9760": 2.41101, "9765": 2.36724, "9770": 2.4342, "9775": 2.40115, "9780": 2.36209, "9785": 2.40063, "9790": 2.40782, "9795": 2.35982, "9800": 2.39646, "9805": 2.40576, "9810": 2.40871, "9815": 2.37765, "9820": 2.37557, "9825": 2.40604, "9830": 2.42044, "9835": 2.38417, "9840": 2.41278, "9845": 2.36206, "9850": 2.39915, "9855": 2.39447, "9860": 2.38855, "9865": 2.37969, "9870": 2.38592, "9875": 2.38177, "9880": 2.45139, "9885": 2.39245, "9890": 2.35063, "9895": 2.31931, "9900": 2.3952, "9905": 2.42474, "9910": 2.35507, "9915": 2.36324, "9920": 2.41087, "9925": 2.3985, "9930": 2.38397, "9935": 2.35059, "9940": 2.38385, "9945": 2.3797, "9950": 2.40362, "9955": 2.44626, "9960": 2.4258, "9965": 2.35484, "9970": 2.40776, "9975": 2.38445, "9980": 2.3311, "9985": 2.4051, "9990": 2.39694, "9995": 2.39526, "10000": 2.36696, "10005": 2.37191, "10010": 2.38253, "10015": 2.44412, "10020": 2.36322, "10025": 2.38777, "10030": 2.38744, "10035": 2.40886, "10040": 2.40309, "10045": 2.38187, "10050": 2.34835, "10055": 2.36929, "10060": 2.41862, "10065": 2.37479, "10070": 2.423, "10075": 2.37255, "10080": 2.36332, "10085": 2.37108, "10090": 2.34595, "10095": 2.40124, "10100": 2.32131, "10105": 2.38462, "10110": 2.41161, "10115": 2.38754, "10120": 2.35903, "10125": 2.37092, "10130": 2.36022, "10135": 2.38243, "10140": 2.41271, "10145": 2.40607, "10150": 2.37539, "10155": 2.39406, "10160": 2.36042, "10165": 2.3842, "10170": 2.42368, "10175": 2.32305, "10180": 2.39352, "10185": 2.38375, "10190": 2.44188, "10195": 2.40305, "10200": 2.39015, "10205": 2.38822, "10210": 2.36991, "10215": 2.34364, "10220": 2.41795, "10225": 2.43037, "10230": 2.35534, "10235": 2.38619, "10240": 2.37205, "10245": 2.39035, "10250": 2.38646, "10255": 2.41642, "10260": 2.33424, "10265": 2.34821, "10270": 2.35061, "10275": 2.36955, "10280": 2.44822, "10285": 2.35754, "10290": 2.38389, "10295": 2.3723, "10300": 2.36828, "10305": 2.41571, "10310": 2.3877, "10315": 2.36013, "10320": 2.36624, "10325": 2.36267, "10330": 2.41312, "10335": 2.36153, "10340": 2.41836, "10345": 2.36889, "10350": 2.35672, "10355": 2.39532, "10360": 2.37312, "10365": 2.36074, "10370": 2.33915, "10375": 2.35562, "10380": 2.41668, "10385": 2.40583, "10390": 2.38095, "10395": 2.3576, "10400": 2.37548, "10405": 2.34822, "10410": 2.3381, "10415": 2.41431, "10420": 2.38041, "10425": 2.32562, "10430": 2.36014, "10435": 2.36947, "10440": 2.37091, "10445": 2.3601, "10450": 2.36009, "10455": 2.37843, "10460": 2.38055, "10465": 2.30313, "10470": 2.35764, "10475": 2.37919, "10480": 2.36187, "10485": 2.35885, "10490": 2.4119, "10495": 2.36592, "10500": 2.36273, "10505": 2.36961, "10510": 2.3816, "10515": 2.37346, "10520": 2.40183, "10525": 2.389, "10530": 2.39058, "10535": 2.35539, "10540": 2.40525, "10545": 2.35703, "10550": 2.37657, "10555": 2.35803, "10560": 2.3405, "10565": 2.37135, "10570": 2.37477, "10575": 2.35743, "10580": 2.37782, "10585": 2.36809, "10590": 2.37791, "10595": 2.37689, "10600": 2.33089, "10605": 2.3717, "10610": 2.36437, "10615": 2.36241, "10620": 2.34737, "10625": 2.41733, "10630": 2.36755, "10635": 2.32224, "10640": 2.36202, "10645": 2.42137, "10650": 2.3603, "10655": 2.31025, "10660": 2.34676, "10665": 2.39855, "10670": 2.31403, "10675": 2.41662, "10680": 2.35394, "10685": 2.28619, "10690": 2.38317, "10695": 2.32897, "10700": 2.38276, "10705": 2.38404, "10710": 2.34229, "10715": 2.38181, "10720": 2.32585, "10725": 2.35132, "10730": 2.34862, "10735": 2.3518, "10740": 2.31777, "10745": 2.33753, "10750": 2.33397, "10755": 2.40347, "10760": 2.36377, "10765": 2.33563, "10770": 2.36832, "10775": 2.38431, "10780": 2.36873, "10785": 2.39064, "10790": 2.34523, "10795": 2.38497, "10800": 2.32139, "10805": 2.39484, "10810": 2.37409, "10815": 2.35325, "10820": 2.34132, "10825": 2.36976, "10830": 2.33704, "10835": 2.347, "10840": 2.32835, "10845": 2.38518, "10850": 2.33101, "10855": 2.3619, "10860": 2.33071, "10865": 2.32059, "10870": 2.32243, "10875": 2.30352, "10880": 2.39351, "10885": 2.4031, "10890": 2.36036, "10895": 2.37107, "10900": 2.33074, "10905": 2.3132, "10910": 2.40682, "10915": 2.37125, "10920": 2.37395, "10925": 2.36178, "10930": 2.31785, "10935": 2.35984, "10940": 2.35325, "10945": 2.34609, "10950": 2.36031, "10955": 2.36317, "10960": 2.31043, "10965": 2.36158, "10970": 2.35657, "10975": 2.40855, "10980": 2.37438, "10985": 2.34339, "10990": 2.39794, "10995": 2.36358, "11000": 2.3367, "11005": 2.36071, "11010": 2.34173, "11015": 2.32617, "11020": 2.33259, "11025": 2.36576, "11030": 2.33866, "11035": 2.31271, "11040": 2.31651, "11045": 2.31822, "11050": 2.31809, "11055": 2.28913, "11060": 2.33868, "11065": 2.30809, "11070": 2.39307, "11075": 2.31783, "11080": 2.35458, "11085": 2.3363, "11090": 2.34633, "11095": 2.3718, "11100": 2.32898, "11105": 2.31552, "11110": 2.36164, "11115": 2.37242, "11120": 2.38066, "11125": 2.31475, "11130": 2.3499, "11135": 2.33245, "11140": 2.37221, "11145": 2.34935, "11150": 2.39376, "11155": 2.33996, "11160": 2.3647, "11165": 2.36272, "11170": 2.34016, "11175": 2.33391, "11180": 2.3723, "11185": 2.31334, "11190": 2.27753, "11195": 2.32826, "11200": 2.34711, "11205": 2.36211, "11210": 2.33021, "11215": 2.31819, "11220": 2.34183, "11225": 2.37052, "11230": 2.36505, "11235": 2.31953, "11240": 2.34095, "11245": 2.35575, "11250": 2.33117, "11255": 2.33525, "11260": 2.35568, "11265": 2.38813, "11270": 2.28869, "11275": 2.31457, "11280": 2.36943, "11285": 2.28996, "11290": 2.34586, "11295": 2.36394, "11300": 2.37973, "11305": 2.3352, "11310": 2.33011, "11315": 2.29941, "11320": 2.30556, "11325": 2.31524, "11330": 2.35389, "11335": 2.33811, "11340": 2.30715, "11345": 2.31278, "11350": 2.29619, "11355": 2.31948, "11360": 2.35099, "11365": 2.29275, "11370": 2.35148, "11375": 2.32687, "11380": 2.33972, "11385": 2.34686, "11390": 2.33406, "11395": 2.28592, "11400": 2.30948, "11405": 2.35421, "11410": 2.35465, "11415": 2.38449, "11420": 2.35035, "11425": 2.30772, "11430": 2.36681, "11435": 2.36061, "11440": 2.34732, "11445": 2.36229, "11450": 2.32147, "11455": 2.30472, "11460": 2.35122, "11465": 2.34332, "11470": 2.37307, "11475": 2.31161, "11480": 2.32382, "11485": 2.30846, "11490": 2.344, "11495": 2.40648, "11500": 2.33935, "11505": 2.34918, "11510": 2.36196, "11515": 2.32145, "11520": 2.3043, "11525": 2.36115, "11530": 2.31264, "11535": 2.3215, "11540": 2.34504, "11545": 2.34375, "11550": 2.3636, "11555": 2.32437, "11560": 2.34788, "11565": 2.33894, "11570": 2.34963, "11575": 2.29524, "11580": 2.32749, "11585": 2.35175, "11590": 2.3618, "11595": 2.33486, "11600": 2.3573, "11605": 2.32205, "11610": 2.35986, "11615": 2.36006, "11620": 2.29629, "11625": 2.27613, "11630": 2.32588, "11635": 2.34225, "11640": 2.30389, "11645": 2.30695, "11650": 2.32765, "11655": 2.35115, "11660": 2.33559, "11665": 2.32862, "11670": 2.29989, "11675": 2.29611, "11680": 2.3245, "11685": 2.335, "11690": 2.34341, "11695": 2.31618, "11700": 2.32498, "11705": 2.29971, "11710": 2.34418, "11715": 2.31355, "11720": 2.29829, "11725": 2.33913, "11730": 2.30649, "11735": 2.32753, "11740": 2.27272, "11745": 2.31682, "11750": 2.32586, "11755": 2.3504, "11760": 2.31072, "11765": 2.34059, "11770": 2.27741, "11775": 2.32428, "11780": 2.25457, "11785": 2.29632, "11790": 2.32161, "11795": 2.32148, "11800": 2.33441, "11805": 2.30224, "11810": 2.30358, "11815": 2.32855, "11820": 2.31768, "11825": 2.35799, "11830": 2.3157, "11835": 2.33627, "11840": 2.33933, "11845": 2.31625, "11850": 2.30279, "11855": 2.31274, "11860": 2.34249, "11865": 2.35695, "11870": 2.37806, "11875": 2.27972, "11880": 2.29164, "11885": 2.33582, "11890": 2.29146, "11895": 2.28855, "11900": 2.33216, "11905": 2.32131, "11910": 2.27663, "11915": 2.31183, "11920": 2.33333, "11925": 2.30178, "11930": 2.30522, "11935": 2.31612, "11940": 2.31579, "11945": 2.34065, "11950": 2.29853, "11955": 2.31246, "11960": 2.33665, "11965": 2.29384, "11970": 2.28198, "11975": 2.33532, "11980": 2.30525, "11985": 2.27675, "11990": 2.30305, "11995": 2.32936, "12000": 2.3243, "12005": 2.32389, "12010": 2.2876, "12015": 2.30885, "12020": 2.32609, "12025": 2.3339, "12030": 2.31036, "12035": 2.33548, "12040": 2.31504, "12045": 2.30961, "12050": 2.30776, "12055": 2.33205, "12060": 2.29709, "12065": 2.32997, "12070": 2.30215, "12075": 2.27525, "12080": 2.35012, "12085": 2.33805, "12090": 2.33153, "12095": 2.28064, "12100": 2.31437, "12105": 2.3082, "12110": 2.3286, "12115": 2.30272, "12120": 2.30476, "12125": 2.29299, "12130": 2.30209, "12135": 2.32731, "12140": 2.29487, "12145": 2.25587, "12150": 2.25942, "12155": 2.34139, "12160": 2.3564, "12165": 2.31895, "12170": 2.33202, "12175": 2.34176, "12180": 2.32919, "12185": 2.3392, "12190": 2.33408, "12195": 2.29739, "12200": 2.29971, "12205": 2.32199, "12210": 2.35582, "12215": 2.30236, "12220": 2.298, "12225": 2.24251, "12230": 2.33254, "12235": 2.33789, "12240": 2.32363, "12245": 2.28607, "12250": 2.27217, "12255": 2.33627, "12260": 2.31277, "12265": 2.34134, "12270": 2.31196, "12275": 2.31345, "12280": 2.31816, "12285": 2.28538, "12290": 2.30977, "12295": 2.26494, "12300": 2.32846, "12305": 2.26819, "12310": 2.2931, "12315": 2.38537, "12320": 2.3108, "12325": 2.32611, "12330": 2.30252, "12335": 2.32201, "12340": 2.33972, "12345": 2.36736, "12350": 2.34235, "12355": 2.3039, "12360": 2.31292, "12365": 2.32841, "12370": 2.29028, "12375": 2.29855, "12380": 2.29177, "12385": 2.28958, "12390": 2.24749, "12395": 2.30262, "12400": 2.29816, "12405": 2.30947, "12410": 2.30218, "12415": 2.28121, "12420": 2.31576, "12425": 2.29993, "12430": 2.31452, "12435": 2.29916, "12440": 2.33133, "12445": 2.31918, "12450": 2.30543, "12455": 2.24083, "12460": 2.33478, "12465": 2.36325, "12470": 2.27585, "12475": 2.27386, "12480": 2.29137, "12485": 2.30584, "12490": 2.33022, "12495": 2.26913, "12500": 2.3199, "12505": 2.33541, "12510": 2.3559, "12515": 2.27055, "12520": 2.31974, "12525": 2.28748, "12530": 2.32122, "12535": 2.27213, "12540": 2.28628, "12545": 2.29073, "12550": 2.31647, "12555": 2.32346, "12560": 2.30163, "12565": 2.33608, "12570": 2.2774, "12575": 2.30021, "12580": 2.31011, "12585": 2.29127, "12590": 2.33476, "12595": 2.32359, "12600": 2.28129, "12605": 2.31883, "12610": 2.36317, "12615": 2.30591, "12620": 2.33262, "12625": 2.33055, "12630": 2.2973, "12635": 2.33522, "12640": 2.29483, "12645": 2.27955, "12650": 2.32509, "12655": 2.2649, "12660": 2.34164, "12665": 2.31762, "12670": 2.30944, "12675": 2.31773, "12680": 2.27484, "12685": 2.36538, "12690": 2.30258, "12695": 2.3316, "12700": 2.29242, "12705": 2.3066, "12710": 2.30648, "12715": 2.28622, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "num-zeros": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 956235968.0, "5": 967337088.0, "10": 971385344.0, "15": 946451584.0, "20": 961454144.0, "25": 1083495040.0, "30": 1210886784.0, "35": 1297400576.0, "40": 1271467008.0, "45": 1175317120.0, "50": 1127123712.0, "55": 1082697856.0, "60": 1044925888.0, "65": 1035741120.0, "70": 1015001472.0, "75": 1009854336.0, "80": 1028703936.0, "85": 1019290368.0, "90": 990809344.0, "95": 964848896.0, "100": 973471680.0, "105": 983119424.0, "110": 977565504.0, "115": 977218048.0, "120": 961220416.0, "125": 943671872.0, "130": 976736960.0, "135": 964989632.0, "140": 963116864.0, "145": 976472832.0, "150": 921307328.0, "155": 968129664.0, "160": 956322560.0, "165": 959833728.0, "170": 974347456.0, "175": 949087808.0, "180": 946701504.0, "185": 972012288.0, "190": 969080384.0, "195": 985148288.0, "200": 945763584.0, "205": 958347008.0, "210": 979438848.0, "215": 967497024.0, "220": 956425216.0, "225": 962393664.0, "230": 948174848.0, "235": 965217152.0, "240": 966068608.0, "245": 969152000.0, "250": 974439936.0, "255": 925058944.0, "260": 965631168.0, "265": 970669376.0, "270": 959136512.0, "275": 953999296.0, "280": 963425664.0, "285": 945783936.0, "290": 974119040.0, "295": 966704000.0, "300": 967153024.0, "305": 964512128.0, "310": 940359680.0, "315": 967399488.0, "320": 969007936.0, "325": 980555456.0, "330": 972089216.0, "335": 946860544.0, "340": 966592384.0, "345": 973019520.0, "350": 973912320.0, "355": 963257280.0, "360": 948348032.0, "365": 964815680.0, "370": 962952960.0, "375": 958450432.0, "380": 947149312.0, "385": 955992704.0, "390": 945401920.0, "395": 970418112.0, "400": 979775872.0, "405": 968349696.0, "410": 970067456.0, "415": 953155072.0, "420": 943562368.0, "425": 954772160.0, "430": 962659264.0, "435": 977076736.0, "440": 954808960.0, "445": 971890816.0, "450": 963511680.0, "455": 973132288.0, "460": 983708288.0, "465": 945281152.0, "470": 942057856.0, "475": 967007104.0, "480": 966098304.0, "485": 976406464.0, "490": 962540544.0, "495": 945459840.0, "500": 964454784.0, "505": 986003136.0, "510": 965679232.0, "515": 943408000.0, "520": 945020992.0, "525": 971262848.0, "530": 971888448.0, "535": 979136128.0, "540": 969531136.0, "545": 954123392.0, "550": 951268096.0, "555": 987224704.0, "560": 960424256.0, "565": 966614016.0, "570": 975728384.0, "575": 927223872.0, "580": 970694784.0, "585": 961176832.0, "590": 972965888.0, "595": 963684352.0, "600": 937077568.0, "605": 951474304.0, "610": 963361024.0, "615": 970007808.0, "620": 976466176.0, "625": 949580224.0, "630": 954443904.0, "635": 986043648.0, "640": 980977216.0, "645": 955008832.0, "650": 958547584.0, "655": 951651520.0, "660": 961042688.0, "665": 967554944.0, "670": 962511744.0, "675": 968338048.0, "680": 965622016.0, "685": 962869120.0, "690": 961921152.0, "695": 954770816.0, "700": 970338624.0, "705": 945517632.0, "710": 943884160.0, "715": 973356416.0, "720": 968367744.0, "725": 978491840.0, "730": 952192896.0, "735": 948810368.0, "740": 955631744.0, "745": 975870976.0, "750": 981237376.0, "755": 962160384.0, "760": 951962496.0, "765": 967347840.0, "770": 976146048.0, "775": 970547840.0, "780": 977538112.0, "785": 931529792.0, "790": 960443264.0, "795": 964581248.0, "800": 967022144.0, "805": 962318720.0, "810": 940976448.0, "815": 949040000.0, "820": 953185856.0, "825": 954501760.0, "830": 976443776.0, "835": 956075520.0, "840": 948406912.0, "845": 965154752.0, "850": 966026688.0, "855": 960904960.0, "860": 976025088.0, "865": 938160704.0, "870": 966414976.0, "875": 972314304.0, "880": 963119744.0, "885": 967742720.0, "890": 949969728.0, "895": 960017792.0, "900": 974230272.0, "905": 963968064.0, "910": 958436608.0, "915": 956353920.0, "920": 943970368.0, "925": 960829184.0, "930": 978849664.0, "935": 971071104.0, "940": 960907136.0, "945": 945064832.0, "950": 957427008.0, "955": 979039232.0, "960": 983589376.0, "965": 966165696.0, "970": 951225600.0, "975": 961578240.0, "980": 968070528.0, "985": 968992256.0, "990": 984392960.0, "995": 953292032.0, "1000": 934783040.0, "1005": 960146944.0, "1010": 971539968.0, "1015": 985186560.0, "1020": 962781312.0, "1025": 935010304.0, "1030": 974676416.0, "1035": 964993152.0, "1040": 980464704.0, "1045": 960830464.0, "1050": 955200064.0, "1055": 957782272.0, "1060": 967746944.0, "1065": 967115264.0, "1070": 966600192.0, "1075": 950056448.0, "1080": 954506240.0, "1085": 967250688.0, "1090": 977131776.0, "1095": 961235584.0, "1100": 979611776.0, "1105": 953366144.0, "1110": 965955584.0, "1115": 966984832.0, "1120": 970350528.0, "1125": 965706496.0, "1130": 954942208.0, "1135": 965843328.0, "1140": 965175360.0, "1145": 970988544.0, "1150": 955558144.0, "1155": 930579520.0, "1160": 957773824.0, "1165": 978125312.0, "1170": 974302848.0, "1175": 973057600.0, "1180": 973081216.0, "1185": 947342912.0, "1190": 964792960.0, "1195": 953134464.0, "1200": 972844672.0, "1205": 988479168.0, "1210": 931123264.0, "1215": 968647744.0, "1220": 969164992.0, "1225": 975950272.0, "1230": 967333824.0, "1235": 943446336.0, "1240": 955852416.0, "1245": 981507264.0, "1250": 966111104.0, "1255": 973672960.0, "1260": 946495744.0, "1265": 963997568.0, "1270": 960487744.0, "1275": 973616640.0, "1280": 961114880.0, "1285": 957581440.0, "1290": 952528640.0, "1295": 971612608.0, "1300": 968863744.0, "1305": 963737728.0, "1310": 963337664.0, "1315": 943555840.0, "1320": 966307008.0, "1325": 989786496.0, "1330": 969506304.0, "1335": 972303232.0, "1340": 972271104.0, "1345": 960659072.0, "1350": 968639552.0, "1355": 955854208.0, "1360": 971819456.0, "1365": 960387456.0, "1370": 948791360.0, "1375": 973533696.0, "1380": 953470208.0, "1385": 969144704.0, "1390": 975719424.0, "1395": 931675264.0, "1400": 945856192.0, "1405": 976752768.0, "1410": 974512064.0, "1415": 967568832.0, "1420": 966747200.0, "1425": 937379200.0, "1430": 973916160.0, "1435": 978333952.0, "1440": 964180352.0, "1445": 958059264.0, "1450": 946148608.0, "1455": 983924608.0, "1460": 968649984.0, "1465": 948747072.0, "1470": 984244864.0, "1475": 943903360.0, "1480": 963976576.0, "1485": 957349120.0, "1490": 961261568.0, "1495": 980539520.0, "1500": 958334144.0, "1505": 942866752.0, "1510": 984180608.0, "1515": 959093888.0, "1520": 959103936.0, "1525": 952784896.0, "1530": 957744896.0, "1535": 949431168.0, "1540": 971087680.0, "1545": 963135040.0, "1550": 978667264.0, "1555": 952316672.0, "1560": 980089536.0, "1565": 967317376.0, "1570": 973843712.0, "1575": 975494784.0, "1580": 941864320.0, "1585": 970030528.0, "1590": 983822592.0, "1595": 948635392.0, "1600": 967446016.0, "1605": 952449280.0, "1610": 969617664.0, "1615": 983150208.0, "1620": 968020608.0, "1625": 970718336.0, "1630": 962888320.0, "1635": 942312576.0, "1640": 981613056.0, "1645": 973977984.0, "1650": 974185728.0, "1655": 967264192.0, "1660": 940689024.0, "1665": 961702848.0, "1670": 962902848.0, "1675": 971280896.0, "1680": 980879872.0, "1685": 944418816.0, "1690": 964687872.0, "1695": 965643264.0, "1700": 966339584.0, "1705": 985202688.0, "1710": 978353024.0, "1715": 943211136.0, "1720": 977088448.0, "1725": 965872448.0, "1730": 968966784.0, "1735": 965089536.0, "1740": 949714624.0, "1745": 970010176.0, "1750": 959685056.0, "1755": 960087040.0, "1760": 966380800.0, "1765": 951817088.0, "1770": 954666944.0, "1775": 973752064.0, "1780": 970536448.0, "1785": 968825728.0, "1790": 950235648.0, "1795": 945133184.0, "1800": 984667712.0, "1805": 987163072.0, "1810": 977768000.0, "1815": 948006400.0, "1820": 949209344.0, "1825": 978852352.0, "1830": 966361856.0, "1835": 964132864.0, "1840": 972318784.0, "1845": 935415808.0, "1850": 952497536.0, "1855": 980050944.0, "1860": 975868288.0, "1865": 958966272.0, "1870": 958948864.0, "1875": 932594944.0, "1880": 973575936.0, "1885": 978847744.0, "1890": 971359168.0, "1895": 959211776.0, "1900": 947394304.0, "1905": 981827712.0, "1910": 969127936.0, "1915": 970041728.0, "1920": 975598080.0, "1925": 960496640.0, "1930": 977926144.0, "1935": 963251456.0, "1940": 952460800.0, "1945": 981337920.0, "1950": 939171584.0, "1955": 960606208.0, "1960": 970031616.0, "1965": 981176768.0, "1970": 962045312.0, "1975": 952824064.0, "1980": 936849920.0, "1985": 975939456.0, "1990": 965967936.0, "1995": 962612608.0, "2000": 960553984.0, "2005": 954497792.0, "2010": 975579392.0, "2015": 991802880.0, "2020": 975435840.0, "2025": 974304768.0, "2030": 952083776.0, "2035": 967850432.0, "2040": 987460096.0, "2045": 976479616.0, "2050": 984702208.0, "2055": 942837248.0, "2060": 942594688.0, "2065": 966210176.0, "2070": 969622912.0, "2075": 980554880.0, "2080": 977600384.0, "2085": 939640128.0, "2090": 969874880.0, "2095": 961277312.0, "2100": 976718976.0, "2105": 972537856.0, "2110": 959904000.0, "2115": 956877184.0, "2120": 977480320.0, "2125": 962566272.0, "2130": 979618624.0, "2135": 950537280.0, "2140": 946995456.0, "2145": 962275584.0, "2150": 973404032.0, "2155": 972689216.0, "2160": 970315008.0, "2165": 948644224.0, "2170": 961545088.0, "2175": 969376640.0, "2180": 969329408.0, "2185": 947447424.0, "2190": 940481024.0, "2195": 986085888.0, "2200": 961860352.0, "2205": 978924032.0, "2210": 964102656.0, "2215": 963501952.0, "2220": 951311104.0, "2225": 969316672.0, "2230": 976331136.0, "2235": 974024704.0, "2240": 975494208.0, "2245": 960232000.0, "2250": 967640384.0, "2255": 969132672.0, "2260": 975065216.0, "2265": 968258304.0, "2270": 951745664.0, "2275": 962768640.0, "2280": 969640064.0, "2285": 971693632.0, "2290": 962890944.0, "2295": 931408256.0, "2300": 959906688.0, "2305": 970426496.0, "2310": 967444800.0, "2315": 970906880.0, "2320": 975589248.0, "2325": 938586496.0, "2330": 988435968.0, "2335": 977489152.0, "2340": 964596224.0, "2345": 964167680.0, "2350": 947555456.0, "2355": 977029504.0, "2360": 966898688.0, "2365": 977293056.0, "2370": 965073024.0, "2375": 953966016.0, "2380": 962917184.0, "2385": 967195264.0, "2390": 963078016.0, "2395": 974465536.0, "2400": 958411584.0, "2405": 968120896.0, "2410": 951585920.0, "2415": 965904960.0, "2420": 966518208.0, "2425": 959045248.0, "2430": 956686080.0, "2435": 961390080.0, "2440": 959755648.0, "2445": 970890176.0, "2450": 961998976.0, "2455": 922721024.0, "2460": 951954560.0, "2465": 955732224.0, "2470": 972571904.0, "2475": 973812736.0, "2480": 943896704.0, "2485": 944186560.0, "2490": 972411968.0, "2495": 974452672.0, "2500": 973908096.0, "2505": 958492352.0, "2510": 939510144.0, "2515": 979553408.0, "2520": 970473600.0, "2525": 964389504.0, "2530": 955798848.0, "2535": 936598656.0, "2540": 969027968.0, "2545": 970385088.0, "2550": 969458560.0, "2555": 969437056.0, "2560": 964977664.0, "2565": 959764544.0, "2570": 985176320.0, "2575": 957425664.0, "2580": 967424768.0, "2585": 966023424.0, "2590": 956355072.0, "2595": 981829760.0, "2600": 959532160.0, "2605": 963000960.0, "2610": 965972608.0, "2615": 951925504.0, "2620": 971242688.0, "2625": 976457408.0, "2630": 974411584.0, "2635": 948070912.0, "2640": 948137728.0, "2645": 963038848.0, "2650": 953984064.0, "2655": 977112064.0, "2660": 949623424.0, "2665": 953930112.0, "2670": 959063552.0, "2675": 979276736.0, "2680": 961397312.0, "2685": 970702592.0, "2690": 965221952.0, "2695": 943553664.0, "2700": 969425280.0, "2705": 978961536.0, "2710": 971812224.0, "2715": 990814976.0, "2720": 942649536.0, "2725": 967955904.0, "2730": 955468160.0, "2735": 970675840.0, "2740": 977921792.0, "2745": 932281024.0, "2750": 947857152.0, "2755": 956317440.0, "2760": 981697344.0, "2765": 966111488.0, "2770": 948915456.0, "2775": 935830656.0, "2780": 964776512.0, "2785": 969569152.0, "2790": 974273664.0, "2795": 966883712.0, "2800": 944388224.0, "2805": 964353152.0, "2810": 969609344.0, "2815": 975844352.0, "2820": 963087232.0, "2825": 937629952.0, "2830": 956742464.0, "2835": 986322752.0, "2840": 961759360.0, "2845": 967508416.0, "2850": 951716480.0, "2855": 962092736.0, "2860": 954241920.0, "2865": 955882368.0, "2870": 944664064.0, "2875": 974664000.0, "2880": 968201984.0, "2885": 981081664.0, "2890": 953455872.0, "2895": 957179008.0, "2900": 964989824.0, "2905": 931709056.0, "2910": 955731584.0, "2915": 979476608.0, "2920": 970491904.0, "2925": 964976000.0, "2930": 964048128.0, "2935": 940141696.0, "2940": 964911104.0, "2945": 989150336.0, "2950": 965209728.0, "2955": 965104768.0, "2960": 933162112.0, "2965": 968794816.0, "2970": 973035264.0, "2975": 958093440.0, "2980": 964497344.0, "2985": 937268864.0, "2990": 951255424.0, "2995": 978316288.0, "3000": 969275776.0, "3005": 974687936.0, "3010": 950235520.0, "3015": 943841152.0, "3020": 958442752.0, "3025": 975186112.0, "3030": 965018240.0, "3035": 963454336.0, "3040": 952134656.0, "3045": 989793408.0, "3050": 965545216.0, "3055": 982521216.0, "3060": 971227520.0, "3065": 943917312.0, "3070": 978408576.0, "3075": 975206208.0, "3080": 960992768.0, "3085": 962353280.0, "3090": 945954304.0, "3095": 938116096.0, "3100": 972929856.0, "3105": 961990400.0, "3110": 970657024.0, "3115": 963390336.0, "3120": 947117312.0, "3125": 972721408.0, "3130": 952974080.0, "3135": 966042624.0, "3140": 968488512.0, "3145": 937851392.0, "3150": 975010176.0, "3155": 976815360.0, "3160": 969628032.0, "3165": 982195584.0, "3170": 937961088.0, "3175": 953825024.0, "3180": 983810048.0, "3185": 965169920.0, "3190": 968483200.0, "3195": 950932864.0, "3200": 945102400.0, "3205": 959863552.0, "3210": 957487488.0, "3215": 958020352.0, "3220": 968129408.0, "3225": 935614592.0, "3230": 962590080.0, "3235": 975776960.0, "3240": 962621952.0, "3245": 981274880.0, "3250": 943260608.0, "3255": 954600064.0, "3260": 980362752.0, "3265": 963620416.0, "3270": 965164928.0, "3275": 959731712.0, "3280": 967046400.0, "3285": 982477824.0, "3290": 947690368.0, "3295": 966423168.0, "3300": 959165952.0, "3305": 949131776.0, "3310": 979510592.0, "3315": 964283520.0, "3320": 969208960.0, "3325": 956193856.0, "3330": 941167872.0, "3335": 964986176.0, "3340": 956913792.0, "3345": 972511872.0, "3350": 964575744.0, "3355": 943359232.0, "3360": 970037504.0, "3365": 969467904.0, "3370": 954763392.0, "3375": 958676096.0, "3380": 971474944.0, "3385": 947985280.0, "3390": 965793152.0, "3395": 978403520.0, "3400": 978138752.0, "3405": 976737152.0, "3410": 924193216.0, "3415": 955437440.0, "3420": 971832960.0, "3425": 977168768.0, "3430": 973837696.0, "3435": 936085504.0, "3440": 970517120.0, "3445": 957314816.0, "3450": 959853440.0, "3455": 963865600.0, "3460": 967895040.0, "3465": 931319360.0, "3470": 952348352.0, "3475": 973716096.0, "3480": 959751168.0, "3485": 979959296.0, "3490": 944684224.0, "3495": 953917952.0, "3500": 969330176.0, "3505": 964360704.0, "3510": 971235456.0, "3515": 955957888.0, "3520": 958747264.0, "3525": 971915392.0, "3530": 964137984.0, "3535": 983204736.0, "3540": 937501952.0, "3545": 944743040.0, "3550": 984463872.0, "3555": 978071424.0, "3560": 974386048.0, "3565": 968813568.0, "3570": 946708224.0, "3575": 976105344.0, "3580": 977506752.0, "3585": 954580608.0, "3590": 956439552.0, "3595": 951455232.0, "3600": 989009856.0, "3605": 962020160.0, "3610": 965066816.0, "3615": 974649280.0, "3620": 954900992.0, "3625": 939529280.0, "3630": 990161536.0, "3635": 971447488.0, "3640": 976038912.0, "3645": 961502208.0, "3650": 945816192.0, "3655": 965782720.0, "3660": 976207936.0, "3665": 964032640.0, "3670": 977444352.0, "3675": 943488640.0, "3680": 958197888.0, "3685": 964298624.0, "3690": 982118400.0, "3695": 963126016.0, "3700": 950571520.0, "3705": 947357440.0, "3710": 982380160.0, "3715": 972682368.0, "3720": 976139008.0, "3725": 964051584.0, "3730": 948861888.0, "3735": 967079296.0, "3740": 960960896.0, "3745": 969322432.0, "3750": 963935872.0, "3755": 953421824.0, "3760": 976654464.0, "3765": 979841536.0, "3770": 972372736.0, "3775": 972386432.0, "3780": 952599040.0, "3785": 960225664.0, "3790": 985586240.0, "3795": 969183360.0, "3800": 957865856.0, "3805": 972396288.0, "3810": 954517824.0, "3815": 974567808.0, "3820": 963020416.0, "3825": 962059712.0, "3830": 969383808.0, "3835": 934745984.0, "3840": 971270528.0, "3845": 986838144.0, "3850": 968850240.0, "3855": 965260864.0, "3860": 948069888.0, "3865": 975062528.0, "3870": 985111936.0, "3875": 983055232.0, "3880": 963640448.0, "3885": 953024000.0, "3890": 960298240.0, "3895": 960592000.0, "3900": 984901760.0, "3905": 976210880.0, "3910": 987348992.0, "3915": 946005888.0, "3920": 974879616.0, "3925": 961236352.0, "3930": 976790336.0, "3935": 978926464.0, "3940": 950304448.0, "3945": 960273664.0, "3950": 974172928.0, "3955": 972980352.0, "3960": 974078592.0, "3965": 950874176.0, "3970": 980656000.0, "3975": 960726272.0, "3980": 977520000.0, "3985": 962949120.0, "3990": 972755776.0, "3995": 953701632.0, "4000": 974988416.0, "4005": 971658112.0, "4010": 978397184.0, "4015": 971468288.0, "4020": 950297600.0, "4025": 968408576.0, "4030": 997923840.0, "4035": 978537152.0, "4040": 959788800.0, "4045": 939639808.0, "4050": 944694400.0, "4055": 980948672.0, "4060": 977659136.0, "4065": 975707200.0, "4070": 942140416.0, "4075": 945753088.0, "4080": 988741376.0, "4085": 962081024.0, "4090": 983332928.0, "4095": 986920832.0, "4100": 957176512.0, "4105": 954074624.0, "4110": 966463104.0, "4115": 976016960.0, "4120": 983506944.0, "4125": 960017024.0, "4130": 967253824.0, "4135": 971444032.0, "4140": 963146944.0, "4145": 956162816.0, "4150": 960282624.0, "4155": 946204480.0, "4160": 968402560.0, "4165": 970302720.0, "4170": 971934848.0, "4175": 955849792.0, "4180": 940962176.0, "4185": 968281088.0, "4190": 967966848.0, "4195": 989187008.0, "4200": 962630144.0, "4205": 960569792.0, "4210": 971813760.0, "4215": 974121088.0, "4220": 981120704.0, "4225": 975209024.0, "4230": 952702400.0, "4235": 958494080.0, "4240": 966811392.0, "4245": 961732736.0, "4250": 965857920.0, "4255": 958267136.0, "4260": 949557632.0, "4265": 964149376.0, "4270": 978395840.0, "4275": 975422464.0, "4280": 962672256.0, "4285": 951463296.0, "4290": 980017408.0, "4295": 968855296.0, "4300": 958286336.0, "4305": 966765632.0, "4310": 939614592.0, "4315": 949429696.0, "4320": 984536512.0, "4325": 982573056.0, "4330": 974772096.0, "4335": 949447040.0, "4340": 959503680.0, "4345": 956644096.0, "4350": 979935744.0, "4355": 968870784.0, "4360": 966338816.0, "4365": 941246208.0, "4370": 969508288.0, "4375": 973008000.0, "4380": 966160768.0, "4385": 972036992.0, "4390": 954165440.0, "4395": 951869056.0, "4400": 973665280.0, "4405": 972466432.0, "4410": 967921280.0, "4415": 958960768.0, "4420": 960791744.0, "4425": 976440576.0, "4430": 965930368.0, "4435": 975914240.0, "4440": 962343424.0, "4445": 954825536.0, "4450": 978332672.0, "4455": 960256384.0, "4460": 968593856.0, "4465": 968818944.0, "4470": 944152448.0, "4475": 952010240.0, "4480": 978837312.0, "4485": 968292032.0, "4490": 957038208.0, "4495": 938951296.0, "4500": 953253376.0, "4505": 977203456.0, "4510": 978762368.0, "4515": 962505600.0, "4520": 958837632.0, "4525": 958139520.0, "4530": 964429824.0, "4535": 976604928.0, "4540": 976823488.0, "4545": 970194560.0, "4550": 953223488.0, "4555": 959566720.0, "4560": 972485376.0, "4565": 973467520.0, "4570": 978886400.0, "4575": 957835072.0, "4580": 963191936.0, "4585": 957370752.0, "4590": 986568064.0, "4595": 960176896.0, "4600": 952267392.0, "4605": 959311296.0, "4610": 963711616.0, "4615": 957987520.0, "4620": 960268672.0, "4625": 973823488.0, "4630": 944495104.0, "4635": 977037952.0, "4640": 960311040.0, "4645": 982017600.0, "4650": 962374528.0, "4655": 939483264.0, "4660": 964001792.0, "4665": 962602752.0, "4670": 976696704.0, "4675": 963491392.0, "4680": 957513600.0, "4685": 949686656.0, "4690": 956986752.0, "4695": 969964032.0, "4700": 961238272.0, "4705": 970782208.0, "4710": 934518272.0, "4715": 970476096.0, "4720": 966337728.0, "4725": 980328128.0, "4730": 965875840.0, "4735": 937908736.0, "4740": 960161536.0, "4745": 976046720.0, "4750": 967984512.0, "4755": 984947328.0, "4760": 959111680.0, "4765": 955199488.0, "4770": 958583872.0, "4775": 991080576.0, "4780": 976794752.0, "4785": 967571200.0, "4790": 943707328.0, "4795": 955813760.0, "4800": 967686272.0, "4805": 976542592.0, "4810": 965124864.0, "4815": 957983104.0, "4820": 973980928.0, "4825": 961481728.0, "4830": 962651264.0, "4835": 972545408.0, "4840": 948877888.0, "4845": 965765760.0, "4850": 960305536.0, "4855": 964131584.0, "4860": 963063808.0, "4865": 967482752.0, "4870": 957158656.0, "4875": 983550592.0, "4880": 957085056.0, "4885": 977004544.0, "4890": 959764928.0, "4895": 942128512.0, "4900": 973663296.0, "4905": 975227264.0, "4910": 969246080.0, "4915": 970115456.0, "4920": 941182656.0, "4925": 954773120.0, "4930": 977023104.0, "4935": 963751424.0, "4940": 972623808.0, "4945": 960032128.0, "4950": 940732480.0, "4955": 967988096.0, "4960": 976728320.0, "4965": 960948608.0, "4970": 958567680.0, "4975": 933676800.0, "4980": 960813184.0, "4985": 962951104.0, "4990": 963516288.0, "4995": 986247936.0, "5000": 940654592.0, "5005": 968864192.0, "5010": 970259840.0, "5015": 965172608.0, "5020": 966617280.0, "5025": 949312896.0, "5030": 953465344.0, "5035": 967256000.0, "5040": 955628928.0, "5045": 969080000.0, "5050": 953309056.0, "5055": 954762624.0, "5060": 962993024.0, "5065": 952085120.0, "5070": 973511552.0, "5075": 978483328.0, "5080": 942842112.0, "5085": 965766400.0, "5090": 972783680.0, "5095": 964338176.0, "5100": 958294016.0, "5105": 965281664.0, "5110": 950365824.0, "5115": 972247808.0, "5120": 960340608.0, "5125": 969731968.0, "5130": 938710656.0, "5135": 943623424.0, "5140": 969840640.0, "5145": 968529344.0, "5150": 970528000.0, "5155": 972525248.0, "5160": 926502016.0, "5165": 961518464.0, "5170": 966824960.0, "5175": 966024192.0, "5180": 963595328.0, "5185": 930779008.0, "5190": 949693952.0, "5195": 972360320.0, "5200": 973649152.0, "5205": 968176128.0, "5210": 960442304.0, "5215": 928693888.0, "5220": 979112384.0, "5225": 984693504.0, "5230": 974975616.0, "5235": 974982464.0, "5240": 944278144.0, "5245": 970774656.0, "5250": 972390464.0, "5255": 966848128.0, "5260": 976729728.0, "5265": 942283776.0, "5270": 969202048.0, "5275": 970061632.0, "5280": 962764928.0, "5285": 964023296.0, "5290": 932465792.0, "5295": 951714688.0, "5300": 975563520.0, "5305": 951904128.0, "5310": 968004736.0, "5315": 955755904.0, "5320": 950885888.0, "5325": 973016320.0, "5330": 967732992.0, "5335": 967475008.0, "5340": 966369536.0, "5345": 962912512.0, "5350": 978913280.0, "5355": 972191104.0, "5360": 963819136.0, "5365": 965161280.0, "5370": 947777152.0, "5375": 948748416.0, "5380": 967102656.0, "5385": 980430272.0, "5390": 965252480.0, "5395": 955025536.0, "5400": 948263040.0, "5405": 974207168.0, "5410": 967794944.0, "5415": 975952640.0, "5420": 967297152.0, "5425": 937265984.0, "5430": 963686528.0, "5435": 971789568.0, "5440": 968956736.0, "5445": 957306432.0, "5450": 919370368.0, "5455": 951930496.0, "5460": 962315008.0, "5465": 978783104.0, "5470": 980883712.0, "5475": 941501568.0, "5480": 955719168.0, "5485": 964816768.0, "5490": 975815424.0, "5495": 962707968.0, "5500": 971107648.0, "5505": 956916224.0, "5510": 968451392.0, "5515": 945405568.0, "5520": 963071872.0, "5525": 975771904.0, "5530": 936475648.0, "5535": 970547968.0, "5540": 960222464.0, "5545": 971888384.0, "5550": 967786624.0, "5555": 955772544.0, "5560": 954374016.0, "5565": 968741248.0, "5570": 945064448.0, "5575": 960441600.0, "5580": 960465280.0, "5585": 959385472.0, "5590": 977594752.0, "5595": 975097728.0, "5600": 962936320.0, "5605": 964017920.0, "5610": 943072576.0, "5615": 966467584.0, "5620": 963213440.0, "5625": 982251392.0, "5630": 975903616.0, "5635": 957188352.0, "5640": 951321920.0, "5645": 967769728.0, "5650": 979010112.0, "5655": 983338496.0, "5660": 956309184.0, "5665": 953303680.0, "5670": 965950464.0, "5675": 967530304.0, "5680": 978475840.0, "5685": 961873792.0, "5690": 935811520.0, "5695": 963629312.0, "5700": 952340480.0, "5705": 974340992.0, "5710": 971034880.0, "5715": 945939328.0, "5720": 974930560.0, "5725": 967241344.0, "5730": 978353792.0, "5735": 964685568.0, "5740": 943451840.0, "5745": 971057664.0, "5750": 981706368.0, "5755": 956582976.0, "5760": 963500032.0, "5765": 957572224.0, "5770": 955539072.0, "5775": 970688512.0, "5780": 962628288.0, "5785": 970453440.0, "5790": 974456832.0, "5795": 949655936.0, "5800": 965840256.0, "5805": 968615424.0, "5810": 975876352.0, "5815": 969980864.0, "5820": 936254464.0, "5825": 969106496.0, "5830": 977602112.0, "5835": 974857920.0, "5840": 962964480.0, "5845": 968640960.0, "5850": 942897792.0, "5855": 976024384.0, "5860": 979439552.0, "5865": 978148352.0, "5870": 968604800.0, "5875": 942081792.0, "5880": 964361984.0, "5885": 974635264.0, "5890": 972499392.0, "5895": 965435904.0, "5900": 941175808.0, "5905": 961730432.0, "5910": 958446336.0, "5915": 967928576.0, "5920": 977235520.0, "5925": 959469824.0, "5930": 946577664.0, "5935": 952298048.0, "5940": 977465600.0, "5945": 984830720.0, "5950": 980530304.0, "5955": 934909760.0, "5960": 961539776.0, "5965": 965803520.0, "5970": 970514560.0, "5975": 961675264.0, "5980": 958086400.0, "5985": 964439936.0, "5990": 973381760.0, "5995": 955833856.0, "6000": 955473024.0, "6005": 961281280.0, "6010": 952609664.0, "6015": 974269184.0, "6020": 978020224.0, "6025": 971871104.0, "6030": 955287680.0, "6035": 946819712.0, "6040": 962494592.0, "6045": 983547136.0, "6050": 956393088.0, "6055": 963359616.0, "6060": 945583360.0, "6065": 958321536.0, "6070": 978182400.0, "6075": 977890560.0, "6080": 957321728.0, "6085": 947495808.0, "6090": 953634816.0, "6095": 964598656.0, "6100": 979702976.0, "6105": 971048960.0, "6110": 961632384.0, "6115": 943507840.0, "6120": 968476288.0, "6125": 960494720.0, "6130": 983671360.0, "6135": 960847104.0, "6140": 958474944.0, "6145": 970942592.0, "6150": 968419072.0, "6155": 974943104.0, "6160": 977314944.0, "6165": 952410816.0, "6170": 951019392.0, "6175": 963261056.0, "6180": 969317440.0, "6185": 966209792.0, "6190": 963584192.0, "6195": 946898560.0, "6200": 969195008.0, "6205": 966867072.0, "6210": 959094080.0, "6215": 972791104.0, "6220": 936153920.0, "6225": 978469696.0, "6230": 975851584.0, "6235": 971556544.0, "6240": 965723904.0, "6245": 955868672.0, "6250": 956253568.0, "6255": 973080192.0, "6260": 978570880.0, "6265": 974795136.0, "6270": 958560384.0, "6275": 963615744.0, "6280": 972920512.0, "6285": 966135296.0, "6290": 970845184.0, "6295": 987233024.0, "6300": 947370624.0, "6305": 964771840.0, "6310": 979152448.0, "6315": 978344704.0, "6320": 971638464.0, "6325": 922708992.0, "6330": 958826368.0, "6335": 974829952.0, "6340": 984530432.0, "6345": 966922880.0, "6350": 944525952.0, "6355": 957990144.0, "6360": 972515456.0, "6365": 972268800.0, "6370": 958820096.0, "6375": 966932544.0, "6380": 951265920.0, "6385": 973684224.0, "6390": 965368832.0, "6395": 974959488.0, "6400": 984229888.0, "6405": 944126656.0, "6410": 977154048.0, "6415": 971514048.0, "6420": 956530048.0, "6425": 961085056.0, "6430": 957341312.0, "6435": 960001984.0, "6440": 968825216.0, "6445": 973343936.0, "6450": 974355456.0, "6455": 962167616.0, "6460": 941044864.0, "6465": 974176192.0, "6470": 979789696.0, "6475": 960836480.0, "6480": 967656576.0, "6485": 948487424.0, "6490": 970896896.0, "6495": 988185216.0, "6500": 980162816.0, "6505": 971974080.0, "6510": 951329088.0, "6515": 957639488.0, "6520": 978534080.0, "6525": 978818432.0, "6530": 973112832.0, "6535": 967681664.0, "6540": 949926464.0, "6545": 966122880.0, "6550": 979186688.0, "6555": 966898560.0, "6560": 975305344.0, "6565": 949327232.0, "6570": 951871552.0, "6575": 962025216.0, "6580": 975459712.0, "6585": 979436096.0, "6590": 948819648.0, "6595": 961398912.0, "6600": 961035520.0, "6605": 961387712.0, "6610": 985261952.0, "6615": 959604864.0, "6620": 944392448.0, "6625": 970623296.0, "6630": 971319040.0, "6635": 963969536.0, "6640": 959549568.0, "6645": 950709504.0, "6650": 978527872.0, "6655": 965593984.0, "6660": 968298752.0, "6665": 968645504.0, "6670": 932542592.0, "6675": 970478976.0, "6680": 968697088.0, "6685": 958428416.0, "6690": 955883648.0, "6695": 955193088.0, "6700": 961816384.0, "6705": 978898816.0, "6710": 970680768.0, "6715": 966518592.0, "6720": 973696320.0, "6725": 941461440.0, "6730": 979068352.0, "6735": 994181056.0, "6740": 976271104.0, "6745": 974234752.0, "6750": 938818176.0, "6755": 977293312.0, "6760": 969438336.0, "6765": 978112192.0, "6770": 975145600.0, "6775": 943069184.0, "6780": 946927488.0, "6785": 975065088.0, "6790": 960090624.0, "6795": 975750784.0, "6800": 972730752.0, "6805": 946265728.0, "6810": 958129664.0, "6815": 970426432.0, "6820": 977582592.0, "6825": 969082048.0, "6830": 950103424.0, "6835": 981242752.0, "6840": 982863232.0, "6845": 948763904.0, "6850": 965380416.0, "6855": 953757312.0, "6860": 978584576.0, "6865": 983644672.0, "6870": 964683136.0, "6875": 978492672.0, "6880": 950138368.0, "6885": 958115968.0, "6890": 959858496.0, "6895": 965740160.0, "6900": 984915840.0, "6905": 968171648.0, "6910": 949790208.0, "6915": 970620288.0, "6920": 966797952.0, "6925": 964972416.0, "6930": 964311040.0, "6935": 951827008.0, "6940": 962700032.0, "6945": 986297920.0, "6950": 973154752.0, "6955": 964482624.0, "6960": 939806464.0, "6965": 974418496.0, "6970": 978198976.0, "6975": 984772032.0, "6980": 982077952.0, "6985": 959446400.0, "6990": 945089984.0, "6995": 986954752.0, "7000": 962949312.0, "7005": 962664640.0, "7010": 984653184.0, "7015": 945128576.0, "7020": 982638336.0, "7025": 968189888.0, "7030": 953218688.0, "7035": 982478784.0, "7040": 950287616.0, "7045": 955659904.0, "7050": 959973824.0, "7055": 963561472.0, "7060": 976520448.0, "7065": 967826304.0, "7070": 953400576.0, "7075": 956437120.0, "7080": 968756864.0, "7085": 965636224.0, "7090": 969043584.0, "7095": 959624448.0, "7100": 973197760.0, "7105": 973048704.0, "7110": 969799936.0, "7115": 958092672.0, "7120": 948744576.0, "7125": 962832896.0, "7130": 971299840.0, "7135": 964393216.0, "7140": 961213824.0, "7145": 930183232.0, "7150": 945960320.0, "7155": 990784256.0, "7160": 968134912.0, "7165": 956493696.0, "7170": 968041856.0, "7175": 955532928.0, "7180": 957827072.0, "7185": 984449472.0, "7190": 978477632.0, "7195": 973381504.0, "7200": 935623360.0, "7205": 957230336.0, "7210": 966881216.0, "7215": 969215616.0, "7220": 981925376.0, "7225": 928530432.0, "7230": 949353088.0, "7235": 966760576.0, "7240": 966575168.0, "7245": 967134912.0, "7250": 949025088.0, "7255": 956946368.0, "7260": 969807872.0, "7265": 974691648.0, "7270": 959566592.0, "7275": 958942848.0, "7280": 956765824.0, "7285": 977538560.0, "7290": 976734080.0, "7295": 962754880.0, "7300": 975082176.0, "7305": 963739776.0, "7310": 976988032.0, "7315": 966514048.0, "7320": 974233408.0, "7325": 966526080.0, "7330": 959260544.0, "7335": 963692864.0, "7340": 977422592.0, "7345": 967423744.0, "7350": 984486528.0, "7355": 959355520.0, "7360": 948462720.0, "7365": 972380288.0, "7370": 982188928.0, "7375": 963174656.0, "7380": 963633280.0, "7385": 948295808.0, "7390": 963566400.0, "7395": 958074624.0, "7400": 969333440.0, "7405": 987281280.0, "7410": 951600128.0, "7415": 950185728.0, "7420": 966599296.0, "7425": 982484800.0, "7430": 965429312.0, "7435": 972606592.0, "7440": 936859200.0, "7445": 968497024.0, "7450": 979957696.0, "7455": 970923776.0, "7460": 971840896.0, "7465": 939178432.0, "7470": 971494592.0, "7475": 957699840.0, "7480": 968804352.0, "7485": 961318528.0, "7490": 933789312.0, "7495": 957100544.0, "7500": 969006592.0, "7505": 969803648.0, "7510": 971704192.0, "7515": 979311680.0, "7520": 951624960.0, "7525": 970088000.0, "7530": 953869440.0, "7535": 971075776.0, "7540": 979153728.0, "7545": 958988480.0, "7550": 960133120.0, "7555": 959912320.0, "7560": 969914368.0, "7565": 954751808.0, "7570": 942337152.0, "7575": 965335296.0, "7580": 982003200.0, "7585": 978710656.0, "7590": 969922368.0, "7595": 949709184.0, "7600": 945771776.0, "7605": 982120576.0, "7610": 968713216.0, "7615": 988171520.0, "7620": 956914048.0, "7625": 940939520.0, "7630": 971317248.0, "7635": 984252672.0, "7640": 983151168.0, "7645": 967816320.0, "7650": 958914176.0, "7655": 962227200.0, "7660": 968720896.0, "7665": 977950912.0, "7670": 975216896.0, "7675": 975006848.0, "7680": 942233088.0, "7685": 959380480.0, "7690": 975130240.0, "7695": 982011904.0, "7700": 978902976.0, "7705": 940606208.0, "7710": 974454784.0, "7715": 979171456.0, "7720": 967490560.0, "7725": 960158592.0, "7730": 942943488.0, "7735": 967944832.0, "7740": 980179072.0, "7745": 963843328.0, "7750": 963459328.0, "7755": 959614848.0, "7760": 970433344.0, "7765": 970703552.0, "7770": 962102848.0, "7775": 981846464.0, "7780": 964457472.0, "7785": 959517056.0, "7790": 967987840.0, "7795": 968687424.0, "7800": 971671040.0, "7805": 968158592.0, "7810": 945835136.0, "7815": 963331200.0, "7820": 974466304.0, "7825": 963474560.0, "7830": 957260288.0, "7835": 949782656.0, "7840": 957269312.0, "7845": 954002944.0, "7850": 979745088.0, "7855": 986607936.0, "7860": 947287040.0, "7865": 949166208.0, "7870": 965038592.0, "7875": 975639808.0, "7880": 968440192.0, "7885": 969100288.0, "7890": 951917056.0, "7895": 974263360.0, "7900": 963606656.0, "7905": 963902464.0, "7910": 965879680.0, "7915": 943390208.0, "7920": 950807040.0, "7925": 969449856.0, "7930": 964844544.0, "7935": 984472576.0, "7940": 964350400.0, "7945": 950747264.0, "7950": 962036352.0, "7955": 979737344.0, "7960": 963687936.0, "7965": 953212928.0, "7970": 951432448.0, "7975": 969081344.0, "7980": 965377920.0, "7985": 959348736.0, "7990": 968055040.0, "7995": 946779136.0, "8000": 962594304.0, "8005": 980605056.0, "8010": 965702976.0, "8015": 982808256.0, "8020": 960389952.0, "8025": 965309888.0, "8030": 958262208.0, "8035": 975592640.0, "8040": 960643584.0, "8045": 948272384.0, "8050": 959290432.0, "8055": 979224768.0, "8060": 969468672.0, "8065": 957939904.0, "8070": 963845696.0, "8075": 941970688.0, "8080": 965985472.0, "8085": 966864320.0, "8090": 983528064.0, "8095": 988650688.0, "8100": 966078208.0, "8105": 944512128.0, "8110": 968908992.0, "8115": 985180608.0, "8120": 974701888.0, "8125": 964153664.0, "8130": 966322432.0, "8135": 967723904.0, "8140": 963798528.0, "8145": 995247616.0, "8150": 973142080.0, "8155": 938476544.0, "8160": 964315008.0, "8165": 972925952.0, "8170": 968275520.0, "8175": 961599808.0, "8180": 936139392.0, "8185": 962736896.0, "8190": 968033088.0, "8195": 977397632.0, "8200": 956563840.0, "8205": 960812480.0, "8210": 946741568.0, "8215": 982398592.0, "8220": 987755072.0, "8225": 966280064.0, "8230": 962282688.0, "8235": 933738944.0, "8240": 980304576.0, "8245": 976189632.0, "8250": 963750400.0, "8255": 977001088.0, "8260": 956452416.0, "8265": 982710592.0, "8270": 952492096.0, "8275": 973853312.0, "8280": 974218048.0, "8285": 953334080.0, "8290": 939714688.0, "8295": 981089472.0, "8300": 973026240.0, "8305": 978061504.0, "8310": 950924096.0, "8315": 937711936.0, "8320": 977374080.0, "8325": 967837376.0, "8330": 990203584.0, "8335": 975599104.0, "8340": 947317568.0, "8345": 970703104.0, "8350": 970132096.0, "8355": 974915968.0, "8360": 979210496.0, "8365": 932621952.0, "8370": 965304512.0, "8375": 979830976.0, "8380": 965015744.0, "8385": 972730560.0, "8390": 962613248.0, "8395": 951077120.0, "8400": 972409664.0, "8405": 951415680.0, "8410": 960789632.0, "8415": 965573312.0, "8420": 941582016.0, "8425": 968081152.0, "8430": 961159616.0, "8435": 966042432.0, "8440": 969658688.0, "8445": 952713344.0, "8450": 984510784.0, "8455": 990051520.0, "8460": 968656960.0, "8465": 967147136.0, "8470": 962985088.0, "8475": 943118272.0, "8480": 986979264.0, "8485": 979689920.0, "8490": 992280000.0, "8495": 971471552.0, "8500": 951317824.0, "8505": 983350528.0, "8510": 974186112.0, "8515": 968862976.0, "8520": 961618560.0, "8525": 945070592.0, "8530": 984119616.0, "8535": 978264000.0, "8540": 967839104.0, "8545": 968719616.0, "8550": 941721408.0, "8555": 971721728.0, "8560": 958210112.0, "8565": 975541440.0, "8570": 974850816.0, "8575": 971165696.0, "8580": 932030912.0, "8585": 965721024.0, "8590": 978774592.0, "8595": 979179264.0, "8600": 983534976.0, "8605": 957472768.0, "8610": 983406848.0, "8615": 977662720.0, "8620": 963278016.0, "8625": 979344320.0, "8630": 943469248.0, "8635": 961638848.0, "8640": 973401088.0, "8645": 970304896.0, "8650": 969363520.0, "8655": 970457216.0, "8660": 943783488.0, "8665": 986105984.0, "8670": 960734720.0, "8675": 973824192.0, "8680": 962276288.0, "8685": 955741760.0, "8690": 978501312.0, "8695": 968820032.0, "8700": 972532608.0, "8705": 973766784.0, "8710": 946858560.0, "8715": 973281984.0, "8720": 958450816.0, "8725": 979051520.0, "8730": 985599936.0, "8735": 952338240.0, "8740": 940889408.0, "8745": 987271872.0, "8750": 972048384.0, "8755": 971578176.0, "8760": 965483200.0, "8765": 934360448.0, "8770": 986362240.0, "8775": 969740736.0, "8780": 966950400.0, "8785": 961987648.0, "8790": 947565056.0, "8795": 969517056.0, "8800": 970803392.0, "8805": 972994112.0, "8810": 983429888.0, "8815": 951016960.0, "8820": 939808704.0, "8825": 964408448.0, "8830": 981249472.0, "8835": 971398336.0, "8840": 979160512.0, "8845": 951234560.0, "8850": 986674560.0, "8855": 970377472.0, "8860": 962000896.0, "8865": 956620736.0, "8870": 945714880.0, "8875": 968444160.0, "8880": 983889920.0, "8885": 971269056.0, "8890": 969427200.0, "8895": 952763456.0, "8900": 961386176.0, "8905": 976606592.0, "8910": 981717440.0, "8915": 980645504.0, "8920": 968074112.0, "8925": 939992256.0, "8930": 970258432.0, "8935": 964040384.0, "8940": 977644800.0, "8945": 981826944.0, "8950": 945562496.0, "8955": 972497152.0, "8960": 973356992.0, "8965": 973678848.0, "8970": 966311424.0, "8975": 936927552.0, "8980": 952829248.0, "8985": 977632320.0, "8990": 967399424.0, "8995": 980307840.0, "9000": 952199232.0, "9005": 950403776.0, "9010": 974875392.0, "9015": 982753728.0, "9020": 958935488.0, "9025": 979374144.0, "9030": 953690688.0, "9035": 968675712.0, "9040": 978208384.0, "9045": 968412032.0, "9050": 983012544.0, "9055": 947799488.0, "9060": 956368192.0, "9065": 969361984.0, "9070": 967990592.0, "9075": 980648000.0, "9080": 952455488.0, "9085": 971552512.0, "9090": 963642816.0, "9095": 968240320.0, "9100": 974375744.0, "9105": 960292096.0, "9110": 947543104.0, "9115": 956514368.0, "9120": 985151168.0, "9125": 963050368.0, "9130": 958395136.0, "9135": 951643648.0, "9140": 967074432.0, "9145": 976978944.0, "9150": 986789952.0, "9155": 976907008.0, "9160": 957621504.0, "9165": 950526656.0, "9170": 988443776.0, "9175": 971262400.0, "9180": 967509504.0, "9185": 955042112.0, "9190": 956366976.0, "9195": 965770368.0, "9200": 968622848.0, "9205": 967231744.0, "9210": 984253184.0, "9215": 931731776.0, "9220": 949083264.0, "9225": 970972416.0, "9230": 970827200.0, "9235": 971550848.0, "9240": 959819392.0, "9245": 963536256.0, "9250": 961576256.0, "9255": 982627072.0, "9260": 979369344.0, "9265": 952769152.0, "9270": 948946560.0, "9275": 978587840.0, "9280": 977583360.0, "9285": 962705472.0, "9290": 978850560.0, "9295": 958658624.0, "9300": 965618304.0, "9305": 968911488.0, "9310": 972879232.0, "9315": 976034368.0, "9320": 947948352.0, "9325": 979431360.0, "9330": 977558272.0, "9335": 975487744.0, "9340": 960151872.0, "9345": 943218176.0, "9350": 952676288.0, "9355": 963291136.0, "9360": 960074112.0, "9365": 983534848.0, "9370": 982557952.0, "9375": 942044672.0, "9380": 982945920.0, "9385": 985381568.0, "9390": 973099712.0, "9395": 978541248.0, "9400": 937756160.0, "9405": 968082816.0, "9410": 981275392.0, "9415": 991699072.0, "9420": 960244800.0, "9425": 956695680.0, "9430": 938879872.0, "9435": 974510464.0, "9440": 959322368.0, "9445": 973635584.0, "9450": 961485184.0, "9455": 945813120.0, "9460": 978135680.0, "9465": 988016576.0, "9470": 962977216.0, "9475": 983614016.0, "9480": 931015168.0, "9485": 986877504.0, "9490": 963520000.0, "9495": 972526464.0, "9500": 982355008.0, "9505": 970113472.0, "9510": 964358720.0, "9515": 956691264.0, "9520": 947893248.0, "9525": 965359680.0, "9530": 958138048.0, "9535": 951097600.0, "9540": 953849536.0, "9545": 979579200.0, "9550": 955691648.0, "9555": 952532928.0, "9560": 958223360.0, "9565": 969425920.0, "9570": 977369856.0, "9575": 959033984.0, "9580": 963097536.0, "9585": 945779776.0, "9590": 948295360.0, "9595": 966492160.0, "9600": 984344000.0, "9605": 984934912.0, "9610": 943517952.0, "9615": 952482496.0, "9620": 980674816.0, "9625": 978574272.0, "9630": 970052544.0, "9635": 974733184.0, "9640": 940245440.0, "9645": 962402112.0, "9650": 971201664.0, "9655": 987483968.0, "9660": 963258752.0, "9665": 949972864.0, "9670": 966334592.0, "9675": 963075968.0, "9680": 965003840.0, "9685": 986401984.0, "9690": 940337664.0, "9695": 950509504.0, "9700": 975088128.0, "9705": 972504064.0, "9710": 967558912.0, "9715": 971305216.0, "9720": 940514752.0, "9725": 966097152.0, "9730": 973731968.0, "9735": 974370176.0, "9740": 971295680.0, "9745": 950816192.0, "9750": 979727232.0, "9755": 970292480.0, "9760": 968175296.0, "9765": 963811392.0, "9770": 952258304.0, "9775": 956670528.0, "9780": 970353408.0, "9785": 958627264.0, "9790": 961264512.0, "9795": 958201472.0, "9800": 948971520.0, "9805": 962036992.0, "9810": 978381568.0, "9815": 977262656.0, "9820": 982496960.0, "9825": 939141376.0, "9830": 969073216.0, "9835": 972339072.0, "9840": 971208320.0, "9845": 966656960.0, "9850": 946518784.0, "9855": 956995328.0, "9860": 986979584.0, "9865": 970027904.0, "9870": 989916864.0, "9875": 956925696.0, "9880": 931276800.0, "9885": 963470400.0, "9890": 972405568.0, "9895": 983489792.0, "9900": 956755968.0, "9905": 938556160.0, "9910": 978602368.0, "9915": 973099200.0, "9920": 944426496.0, "9925": 962916992.0, "9930": 947183744.0, "9935": 960278272.0, "9940": 965697280.0, "9945": 958382720.0, "9950": 963625856.0, "9955": 942997056.0, "9960": 966882304.0, "9965": 983350592.0, "9970": 966386496.0, "9975": 963881344.0, "9980": 980328512.0, "9985": 941824384.0, "9990": 976809984.0, "9995": 982129024.0, "10000": 971851776.0, "10005": 969861376.0, "10010": 943877440.0, "10015": 982577152.0, "10020": 977804032.0, "10025": 979549888.0, "10030": 971244672.0, "10035": 946533312.0, "10040": 950485760.0, "10045": 978173696.0, "10050": 985758592.0, "10055": 990346368.0, "10060": 959069696.0, "10065": 947062784.0, "10070": 966931584.0, "10075": 979401280.0, "10080": 971622080.0, "10085": 974745856.0, "10090": 944008832.0, "10095": 963125376.0, "10100": 972146944.0, "10105": 976034752.0, "10110": 971772800.0, "10115": 948353792.0, "10120": 962507264.0, "10125": 974194752.0, "10130": 980564736.0, "10135": 972567680.0, "10140": 957806016.0, "10145": 933887232.0, "10150": 973855872.0, "10155": 969285952.0, "10160": 961618304.0, "10165": 974885760.0, "10170": 944427776.0, "10175": 978892608.0, "10180": 983664000.0, "10185": 978746752.0, "10190": 955519744.0, "10195": 937150144.0, "10200": 988093952.0, "10205": 972676352.0, "10210": 966740672.0, "10215": 975630720.0, "10220": 948295552.0, "10225": 950289280.0, "10230": 975223168.0, "10235": 953831744.0, "10240": 969616640.0, "10245": 961772032.0, "10250": 936264064.0, "10255": 979504128.0, "10260": 964644352.0, "10265": 967489600.0, "10270": 968425088.0, "10275": 936103808.0, "10280": 969561856.0, "10285": 996083968.0, "10290": 979454656.0, "10295": 981540224.0, "10300": 951828480.0, "10305": 971864512.0, "10310": 960064768.0, "10315": 971187840.0, "10320": 985120896.0, "10325": 983215936.0, "10330": 934926464.0, "10335": 976317696.0, "10340": 957468800.0, "10345": 973596928.0, "10350": 984756096.0, "10355": 941803008.0, "10360": 961872512.0, "10365": 974268416.0, "10370": 980346176.0, "10375": 969875584.0, "10380": 961527616.0, "10385": 955072512.0, "10390": 990463232.0, "10395": 964845440.0, "10400": 960777984.0, "10405": 949747968.0, "10410": 955098176.0, "10415": 976146624.0, "10420": 967212096.0, "10425": 969751808.0, "10430": 964795648.0, "10435": 963036736.0, "10440": 971963200.0, "10445": 972183296.0, "10450": 975122048.0, "10455": 966331904.0, "10460": 948480896.0, "10465": 971374464.0, "10470": 972572672.0, "10475": 979157888.0, "10480": 997170048.0, "10485": 949275904.0, "10490": 934796800.0, "10495": 969120896.0, "10500": 977939456.0, "10505": 958974848.0, "10510": 950564864.0, "10515": 953891648.0, "10520": 972026624.0, "10525": 969414016.0, "10530": 970264896.0, "10535": 986317568.0, "10540": 946952448.0, "10545": 970374144.0, "10550": 968708032.0, "10555": 959557632.0, "10560": 975881664.0, "10565": 961363648.0, "10570": 968627200.0, "10575": 972498368.0, "10580": 960586752.0, "10585": 973333312.0, "10590": 951660288.0, "10595": 956430016.0, "10600": 967505280.0, "10605": 986588288.0, "10610": 966405248.0, "10615": 976526336.0, "10620": 940774144.0, "10625": 964938240.0, "10630": 967592064.0, "10635": 973064768.0, "10640": 974382592.0, "10645": 948458688.0, "10650": 966118656.0, "10655": 985083136.0, "10660": 976596736.0, "10665": 967187456.0, "10670": 954858240.0, "10675": 934123776.0, "10680": 986080640.0, "10685": 990783424.0, "10690": 963935488.0, "10695": 971897600.0, "10700": 949662464.0, "10705": 977799040.0, "10710": 968120576.0, "10715": 966837376.0, "10720": 966149760.0, "10725": 944047616.0, "10730": 980048704.0, "10735": 960773376.0, "10740": 971234048.0, "10745": 984200064.0, "10750": 981234944.0, "10755": 944244992.0, "10760": 969661632.0, "10765": 972555392.0, "10770": 973764864.0, "10775": 958674688.0, "10780": 949434368.0, "10785": 953571456.0, "10790": 970121216.0, "10795": 960127488.0, "10800": 972161280.0, "10805": 950822848.0, "10810": 973682880.0, "10815": 959656832.0, "10820": 971093696.0, "10825": 967113792.0, "10830": 956819456.0, "10835": 963015552.0, "10840": 970409024.0, "10845": 963739136.0, "10850": 957549312.0, "10855": 967510912.0, "10860": 950518400.0, "10865": 964160128.0, "10870": 983381376.0, "10875": 982274560.0, "10880": 958610752.0, "10885": 954466816.0, "10890": 972840192.0, "10895": 973359552.0, "10900": 970362240.0, "10905": 964840448.0, "10910": 938557824.0, "10915": 960615424.0, "10920": 982842368.0, "10925": 969796480.0, "10930": 968551104.0, "10935": 962492544.0, "10940": 953943040.0, "10945": 964767936.0, "10950": 972467008.0, "10955": 966633216.0, "10960": 971941824.0, "10965": 966455552.0, "10970": 983437248.0, "10975": 965796672.0, "10980": 974656832.0, "10985": 986572608.0, "10990": 950958336.0, "10995": 963353408.0, "11000": 985286784.0, "11005": 978548224.0, "11010": 971455360.0, "11015": 969726400.0, "11020": 947938112.0, "11025": 959834624.0, "11030": 977837824.0, "11035": 975095872.0, "11040": 986142784.0, "11045": 956475200.0, "11050": 973064832.0, "11055": 974445952.0, "11060": 962065280.0, "11065": 985161344.0, "11070": 949631488.0, "11075": 976197760.0, "11080": 971823744.0, "11085": 967095040.0, "11090": 976461504.0, "11095": 946438144.0, "11100": 965375424.0, "11105": 973585792.0, "11110": 980633600.0, "11115": 967886656.0, "11120": 956744384.0, "11125": 956475200.0, "11130": 975504896.0, "11135": 979451712.0, "11140": 964612992.0, "11145": 966377088.0, "11150": 935937152.0, "11155": 975939584.0, "11160": 983999424.0, "11165": 982400320.0, "11170": 977339264.0, "11175": 957764608.0, "11180": 962147136.0, "11185": 971844672.0, "11190": 979692288.0, "11195": 985085376.0, "11200": 982817216.0, "11205": 942217408.0, "11210": 984703488.0, "11215": 967396928.0, "11220": 983169280.0, "11225": 961691264.0, "11230": 953189568.0, "11235": 981706240.0, "11240": 977425152.0, "11245": 966073920.0, "11250": 969233408.0, "11255": 960376256.0, "11260": 979703296.0, "11265": 963607232.0, "11270": 981118656.0, "11275": 968047232.0, "11280": 955794176.0, "11285": 953405440.0, "11290": 956623360.0, "11295": 968271680.0, "11300": 962673344.0, "11305": 958624960.0, "11310": 946341184.0, "11315": 982999936.0, "11320": 964742912.0, "11325": 981029568.0, "11330": 975564288.0, "11335": 952309056.0, "11340": 970375616.0, "11345": 969803648.0, "11350": 981444096.0, "11355": 982385024.0, "11360": 941016704.0, "11365": 970431360.0, "11370": 978805312.0, "11375": 975313792.0, "11380": 968478016.0, "11385": 958625152.0, "11390": 938120832.0, "11395": 977384192.0, "11400": 972740288.0, "11405": 961470400.0, "11410": 966144128.0, "11415": 929225856.0, "11420": 964547904.0, "11425": 980999168.0, "11430": 978502144.0, "11435": 970006400.0, "11440": 945134656.0, "11445": 974983936.0, "11450": 984464448.0, "11455": 971217024.0, "11460": 964937088.0, "11465": 959659776.0, "11470": 954982528.0, "11475": 972653632.0, "11480": 956542208.0, "11485": 977052928.0, "11490": 986172224.0, "11495": 959133824.0, "11500": 970309248.0, "11505": 963864960.0, "11510": 976505280.0, "11515": 978026624.0, "11520": 954061696.0, "11525": 975806016.0, "11530": 976686720.0, "11535": 979970048.0, "11540": 974399040.0, "11545": 953188288.0, "11550": 953074304.0, "11555": 981696128.0, "11560": 984556736.0, "11565": 965242048.0, "11570": 966375872.0, "11575": 951523968.0, "11580": 975902976.0, "11585": 977428032.0, "11590": 969376384.0, "11595": 976214208.0, "11600": 946488640.0, "11605": 973334272.0, "11610": 982199616.0, "11615": 971997440.0, "11620": 968577856.0, "11625": 948852096.0, "11630": 937541248.0, "11635": 973606016.0, "11640": 981160768.0, "11645": 980069504.0, "11650": 971782464.0, "11655": 956044224.0, "11660": 980441920.0, "11665": 958240768.0, "11670": 982643776.0, "11675": 972390592.0, "11680": 956322304.0, "11685": 982734016.0, "11690": 968614464.0, "11695": 967558592.0, "11700": 973842752.0, "11705": 956309760.0, "11710": 964708032.0, "11715": 983208768.0, "11720": 983538048.0, "11725": 965265600.0, "11730": 955214208.0, "11735": 942646336.0, "11740": 973792512.0, "11745": 971088320.0, "11750": 961672064.0, "11755": 963374080.0, "11760": 949543168.0, "11765": 983755008.0, "11770": 984528384.0, "11775": 975438144.0, "11780": 984888832.0, "11785": 946940288.0, "11790": 972493440.0, "11795": 970387968.0, "11800": 973227264.0, "11805": 986472960.0, "11810": 967530304.0, "11815": 955616128.0, "11820": 973433920.0, "11825": 970646848.0, "11830": 974846080.0, "11835": 961579392.0, "11840": 944504512.0, "11845": 980497536.0, "11850": 974454528.0, "11855": 977918336.0, "11860": 971612352.0, "11865": 938262400.0, "11870": 940126208.0, "11875": 990048704.0, "11880": 972206784.0, "11885": 962901568.0, "11890": 970318208.0, "11895": 965208192.0, "11900": 979393920.0, "11905": 961566912.0, "11910": 982719680.0, "11915": 989857600.0, "11920": 944868608.0, "11925": 994162944.0, "11930": 964190528.0, "11935": 963465152.0, "11940": 976622080.0, "11945": 944763008.0, "11950": 977159808.0, "11955": 979128640.0, "11960": 972399168.0, "11965": 976260672.0, "11970": 963060544.0, "11975": 963129408.0, "11980": 977634432.0, "11985": 953073920.0, "11990": 968742400.0, "11995": 965156352.0, "12000": 958418112.0, "12005": 973932800.0, "12010": 979537920.0, "12015": 972212736.0, "12020": 973267520.0, "12025": 934456768.0, "12030": 969246336.0, "12035": 984077504.0, "12040": 977777472.0, "12045": 981645632.0, "12050": 931299456.0, "12055": 938852096.0, "12060": 974410240.0, "12065": 965723008.0, "12070": 968326208.0, "12075": 949990592.0, "12080": 953194240.0, "12085": 972673152.0, "12090": 963907776.0, "12095": 963627072.0, "12100": 976849088.0, "12105": 950520256.0, "12110": 971690368.0, "12115": 968300352.0, "12120": 986518208.0, "12125": 980798080.0, "12130": 941565184.0, "12135": 955700864.0, "12140": 976100864.0, "12145": 979711616.0, "12150": 979730944.0, "12155": 962112384.0, "12160": 946594176.0, "12165": 968588032.0, "12170": 964645760.0, "12175": 967863936.0, "12180": 975310656.0, "12185": 953131520.0, "12190": 988619136.0, "12195": 970961088.0, "12200": 965117120.0, "12205": 968926720.0, "12210": 939426496.0, "12215": 997290688.0, "12220": 970357184.0, "12225": 979974272.0, "12230": 980481408.0, "12235": 950285504.0, "12240": 963896832.0, "12245": 966011648.0, "12250": 976881216.0, "12255": 968239744.0, "12260": 983813760.0, "12265": 932017664.0, "12270": 966679040.0, "12275": 979923008.0, "12280": 977409216.0, "12285": 970702528.0, "12290": 929600256.0, "12295": 977118656.0, "12300": 986039360.0, "12305": 970126848.0, "12310": 986129472.0, "12315": 936110464.0, "12320": 957900288.0, "12325": 966485952.0, "12330": 968416512.0, "12335": 963885568.0, "12340": 957481216.0, "12345": 944333504.0, "12350": 966714496.0, "12355": 975720896.0, "12360": 978980352.0, "12365": 964351680.0, "12370": 949002240.0, "12375": 963753920.0, "12380": 964518144.0, "12385": 973139392.0, "12390": 961403136.0, "12395": 961530944.0, "12400": 975517120.0, "12405": 976523520.0, "12410": 953484928.0, "12415": 963238144.0, "12420": 944371136.0, "12425": 949470016.0, "12430": 972612736.0, "12435": 968709440.0, "12440": 962225600.0, "12445": 952293120.0, "12450": 947771904.0, "12455": 981396992.0, "12460": 974272512.0, "12465": 954487232.0, "12470": 981144320.0, "12475": 958704384.0, "12480": 967121920.0, "12485": 978366016.0, "12490": 974278784.0, "12495": 969692096.0, "12500": 961659904.0, "12505": 943534912.0, "12510": 960928768.0, "12515": 969530304.0, "12520": 973764160.0, "12525": 972291392.0, "12530": 944744576.0, "12535": 976634496.0, "12540": 965682880.0, "12545": 972057792.0, "12550": 969770432.0, "12555": 941177664.0, "12560": 964738560.0, "12565": 947584320.0, "12570": 974419712.0, "12575": 962993280.0, "12580": 958078592.0, "12585": 964422976.0, "12590": 965961088.0, "12595": 978854528.0, "12600": 981995776.0, "12605": 949260736.0, "12610": 937854656.0, "12615": 962619712.0, "12620": 961337152.0, "12625": 966494016.0, "12630": 970714112.0, "12635": 962162688.0, "12640": 978283392.0, "12645": 969272128.0, "12650": 970076032.0, "12655": 964130240.0, "12660": 932255680.0, "12665": 956938560.0, "12670": 986216960.0, "12675": 965529856.0, "12680": 961113536.0, "12685": 951218048.0, "12690": 945392256.0, "12695": 978288768.0, "12700": 985215808.0, "12705": 958807744.0, "12710": 968417792.0, "12715": 956500736.0, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 12698293248.0, "5": 12698293248.0, "10": 12698293248.0, "15": 12698293248.0, "20": 12698293248.0, "25": 12698293248.0, "30": 12698293248.0, "35": 12698293248.0, "40": 12698293248.0, "45": 12698293248.0, "50": 12698293248.0, "55": 12698293248.0, "60": 12698293248.0, "65": 12698293248.0, "70": 12698293248.0, "75": 12698293248.0, "80": 12698293248.0, "85": 12698293248.0, "90": 12698293248.0, "95": 12698293248.0, "100": 12698293248.0, "105": 12698293248.0, "110": 12698293248.0, "115": 12698293248.0, "120": 12698293248.0, "125": 12698293248.0, "130": 12698293248.0, "135": 12698293248.0, "140": 12698293248.0, "145": 12698293248.0, "150": 12698293248.0, "155": 12698293248.0, "160": 12698293248.0, "165": 12698293248.0, "170": 12698293248.0, "175": 12698293248.0, "180": 12698293248.0, "185": 12698293248.0, "190": 12698293248.0, "195": 12698293248.0, "200": 12698293248.0, "205": 12698293248.0, "210": 12698293248.0, "215": 12698293248.0, "220": 12698293248.0, "225": 12698293248.0, "230": 12698293248.0, "235": 12698293248.0, "240": 12698293248.0, "245": 12698293248.0, "250": 12698293248.0, "255": 12698293248.0, "260": 12698293248.0, "265": 12698293248.0, "270": 12698293248.0, "275": 12698293248.0, "280": 12698293248.0, "285": 12698293248.0, "290": 12698293248.0, "295": 12698293248.0, "300": 12698293248.0, "305": 12698293248.0, "310": 12698293248.0, "315": 12698293248.0, "320": 12698293248.0, "325": 12698293248.0, "330": 12698293248.0, "335": 12698293248.0, "340": 12698293248.0, "345": 12698293248.0, "350": 12698293248.0, "355": 12698293248.0, "360": 12698293248.0, "365": 12698293248.0, "370": 12698293248.0, "375": 12698293248.0, "380": 12698293248.0, "385": 12698293248.0, "390": 12698293248.0, "395": 12698293248.0, "400": 12698293248.0, "405": 12698293248.0, "410": 12698293248.0, "415": 12698293248.0, "420": 12698293248.0, "425": 12698293248.0, "430": 12698293248.0, "435": 12698293248.0, "440": 12698293248.0, "445": 12698293248.0, "450": 12698293248.0, "455": 12698293248.0, "460": 12698293248.0, "465": 12698293248.0, "470": 12698293248.0, "475": 12698293248.0, "480": 12698293248.0, "485": 12698293248.0, "490": 12698293248.0, "495": 12698293248.0, "500": 12698293248.0, "505": 12698293248.0, "510": 12698293248.0, "515": 12698293248.0, "520": 12698293248.0, "525": 12698293248.0, "530": 12698293248.0, "535": 12698293248.0, "540": 12698293248.0, "545": 12698293248.0, "550": 12698293248.0, "555": 12698293248.0, "560": 12698293248.0, "565": 12698293248.0, "570": 12698293248.0, "575": 12698293248.0, "580": 12698293248.0, "585": 12698293248.0, "590": 12698293248.0, "595": 12698293248.0, "600": 12698293248.0, "605": 12698293248.0, "610": 12698293248.0, "615": 12698293248.0, "620": 12698293248.0, "625": 12698293248.0, "630": 12698293248.0, "635": 12698293248.0, "640": 12698293248.0, "645": 12698293248.0, "650": 12698293248.0, "655": 12698293248.0, "660": 12698293248.0, "665": 12698293248.0, "670": 12698293248.0, "675": 12698293248.0, "680": 12698293248.0, "685": 12698293248.0, "690": 12698293248.0, "695": 12698293248.0, "700": 12698293248.0, "705": 12698293248.0, "710": 12698293248.0, "715": 12698293248.0, "720": 12698293248.0, "725": 12698293248.0, "730": 12698293248.0, "735": 12698293248.0, "740": 12698293248.0, "745": 12698293248.0, "750": 12698293248.0, "755": 12698293248.0, "760": 12698293248.0, "765": 12698293248.0, "770": 12698293248.0, "775": 12698293248.0, "780": 12698293248.0, "785": 12698293248.0, "790": 12698293248.0, "795": 12698293248.0, "800": 12698293248.0, "805": 12698293248.0, "810": 12698293248.0, "815": 12698293248.0, "820": 12698293248.0, "825": 12698293248.0, "830": 12698293248.0, "835": 12698293248.0, "840": 12698293248.0, "845": 12698293248.0, "850": 12698293248.0, "855": 12698293248.0, "860": 12698293248.0, "865": 12698293248.0, "870": 12698293248.0, "875": 12698293248.0, "880": 12698293248.0, "885": 12698293248.0, "890": 12698293248.0, "895": 12698293248.0, "900": 12698293248.0, "905": 12698293248.0, "910": 12698293248.0, "915": 12698293248.0, "920": 12698293248.0, "925": 12698293248.0, "930": 12698293248.0, "935": 12698293248.0, "940": 12698293248.0, "945": 12698293248.0, "950": 12698293248.0, "955": 12698293248.0, "960": 12698293248.0, "965": 12698293248.0, "970": 12698293248.0, "975": 12698293248.0, "980": 12698293248.0, "985": 12698293248.0, "990": 12698293248.0, "995": 12698293248.0, "1000": 12698293248.0, "1005": 12698293248.0, "1010": 12698293248.0, "1015": 12698293248.0, "1020": 12698293248.0, "1025": 12698293248.0, "1030": 12698293248.0, "1035": 12698293248.0, "1040": 12698293248.0, "1045": 12698293248.0, "1050": 12698293248.0, "1055": 12698293248.0, "1060": 12698293248.0, "1065": 12698293248.0, "1070": 12698293248.0, "1075": 12698293248.0, "1080": 12698293248.0, "1085": 12698293248.0, "1090": 12698293248.0, "1095": 12698293248.0, "1100": 12698293248.0, "1105": 12698293248.0, "1110": 12698293248.0, "1115": 12698293248.0, "1120": 12698293248.0, "1125": 12698293248.0, "1130": 12698293248.0, "1135": 12698293248.0, "1140": 12698293248.0, "1145": 12698293248.0, "1150": 12698293248.0, "1155": 12698293248.0, "1160": 12698293248.0, "1165": 12698293248.0, "1170": 12698293248.0, "1175": 12698293248.0, "1180": 12698293248.0, "1185": 12698293248.0, "1190": 12698293248.0, "1195": 12698293248.0, "1200": 12698293248.0, "1205": 12698293248.0, "1210": 12698293248.0, "1215": 12698293248.0, "1220": 12698293248.0, "1225": 12698293248.0, "1230": 12698293248.0, "1235": 12698293248.0, "1240": 12698293248.0, "1245": 12698293248.0, "1250": 12698293248.0, "1255": 12698293248.0, "1260": 12698293248.0, "1265": 12698293248.0, "1270": 12698293248.0, "1275": 12698293248.0, "1280": 12698293248.0, "1285": 12698293248.0, "1290": 12698293248.0, "1295": 12698293248.0, "1300": 12698293248.0, "1305": 12698293248.0, "1310": 12698293248.0, "1315": 12698293248.0, "1320": 12698293248.0, "1325": 12698293248.0, "1330": 12698293248.0, "1335": 12698293248.0, "1340": 12698293248.0, "1345": 12698293248.0, "1350": 12698293248.0, "1355": 12698293248.0, "1360": 12698293248.0, "1365": 12698293248.0, "1370": 12698293248.0, "1375": 12698293248.0, "1380": 12698293248.0, "1385": 12698293248.0, "1390": 12698293248.0, "1395": 12698293248.0, "1400": 12698293248.0, "1405": 12698293248.0, "1410": 12698293248.0, "1415": 12698293248.0, "1420": 12698293248.0, "1425": 12698293248.0, "1430": 12698293248.0, "1435": 12698293248.0, "1440": 12698293248.0, "1445": 12698293248.0, "1450": 12698293248.0, "1455": 12698293248.0, "1460": 12698293248.0, "1465": 12698293248.0, "1470": 12698293248.0, "1475": 12698293248.0, "1480": 12698293248.0, "1485": 12698293248.0, "1490": 12698293248.0, "1495": 12698293248.0, "1500": 12698293248.0, "1505": 12698293248.0, "1510": 12698293248.0, "1515": 12698293248.0, "1520": 12698293248.0, "1525": 12698293248.0, "1530": 12698293248.0, "1535": 12698293248.0, "1540": 12698293248.0, "1545": 12698293248.0, "1550": 12698293248.0, "1555": 12698293248.0, "1560": 12698293248.0, "1565": 12698293248.0, "1570": 12698293248.0, "1575": 12698293248.0, "1580": 12698293248.0, "1585": 12698293248.0, "1590": 12698293248.0, "1595": 12698293248.0, "1600": 12698293248.0, "1605": 12698293248.0, "1610": 12698293248.0, "1615": 12698293248.0, "1620": 12698293248.0, "1625": 12698293248.0, "1630": 12698293248.0, "1635": 12698293248.0, "1640": 12698293248.0, "1645": 12698293248.0, "1650": 12698293248.0, "1655": 12698293248.0, "1660": 12698293248.0, "1665": 12698293248.0, "1670": 12698293248.0, "1675": 12698293248.0, "1680": 12698293248.0, "1685": 12698293248.0, "1690": 12698293248.0, "1695": 12698293248.0, "1700": 12698293248.0, "1705": 12698293248.0, "1710": 12698293248.0, "1715": 12698293248.0, "1720": 12698293248.0, "1725": 12698293248.0, "1730": 12698293248.0, "1735": 12698293248.0, "1740": 12698293248.0, "1745": 12698293248.0, "1750": 12698293248.0, "1755": 12698293248.0, "1760": 12698293248.0, "1765": 12698293248.0, "1770": 12698293248.0, "1775": 12698293248.0, "1780": 12698293248.0, "1785": 12698293248.0, "1790": 12698293248.0, "1795": 12698293248.0, "1800": 12698293248.0, "1805": 12698293248.0, "1810": 12698293248.0, "1815": 12698293248.0, "1820": 12698293248.0, "1825": 12698293248.0, "1830": 12698293248.0, "1835": 12698293248.0, "1840": 12698293248.0, "1845": 12698293248.0, "1850": 12698293248.0, "1855": 12698293248.0, "1860": 12698293248.0, "1865": 12698293248.0, "1870": 12698293248.0, "1875": 12698293248.0, "1880": 12698293248.0, "1885": 12698293248.0, "1890": 12698293248.0, "1895": 12698293248.0, "1900": 12698293248.0, "1905": 12698293248.0, "1910": 12698293248.0, "1915": 12698293248.0, "1920": 12698293248.0, "1925": 12698293248.0, "1930": 12698293248.0, "1935": 12698293248.0, "1940": 12698293248.0, "1945": 12698293248.0, "1950": 12698293248.0, "1955": 12698293248.0, "1960": 12698293248.0, "1965": 12698293248.0, "1970": 12698293248.0, "1975": 12698293248.0, "1980": 12698293248.0, "1985": 12698293248.0, "1990": 12698293248.0, "1995": 12698293248.0, "2000": 12698293248.0, "2005": 12698293248.0, "2010": 12698293248.0, "2015": 12698293248.0, "2020": 12698293248.0, "2025": 12698293248.0, "2030": 12698293248.0, "2035": 12698293248.0, "2040": 12698293248.0, "2045": 12698293248.0, "2050": 12698293248.0, "2055": 12698293248.0, "2060": 12698293248.0, "2065": 12698293248.0, "2070": 12698293248.0, "2075": 12698293248.0, "2080": 12698293248.0, "2085": 12698293248.0, "2090": 12698293248.0, "2095": 12698293248.0, "2100": 12698293248.0, "2105": 12698293248.0, "2110": 12698293248.0, "2115": 12698293248.0, "2120": 12698293248.0, "2125": 12698293248.0, "2130": 12698293248.0, "2135": 12698293248.0, "2140": 12698293248.0, "2145": 12698293248.0, "2150": 12698293248.0, "2155": 12698293248.0, "2160": 12698293248.0, "2165": 12698293248.0, "2170": 12698293248.0, "2175": 12698293248.0, "2180": 12698293248.0, "2185": 12698293248.0, "2190": 12698293248.0, "2195": 12698293248.0, "2200": 12698293248.0, "2205": 12698293248.0, "2210": 12698293248.0, "2215": 12698293248.0, "2220": 12698293248.0, "2225": 12698293248.0, "2230": 12698293248.0, "2235": 12698293248.0, "2240": 12698293248.0, "2245": 12698293248.0, "2250": 12698293248.0, "2255": 12698293248.0, "2260": 12698293248.0, "2265": 12698293248.0, "2270": 12698293248.0, "2275": 12698293248.0, "2280": 12698293248.0, "2285": 12698293248.0, "2290": 12698293248.0, "2295": 12698293248.0, "2300": 12698293248.0, "2305": 12698293248.0, "2310": 12698293248.0, "2315": 12698293248.0, "2320": 12698293248.0, "2325": 12698293248.0, "2330": 12698293248.0, "2335": 12698293248.0, "2340": 12698293248.0, "2345": 12698293248.0, "2350": 12698293248.0, "2355": 12698293248.0, "2360": 12698293248.0, "2365": 12698293248.0, "2370": 12698293248.0, "2375": 12698293248.0, "2380": 12698293248.0, "2385": 12698293248.0, "2390": 12698293248.0, "2395": 12698293248.0, "2400": 12698293248.0, "2405": 12698293248.0, "2410": 12698293248.0, "2415": 12698293248.0, "2420": 12698293248.0, "2425": 12698293248.0, "2430": 12698293248.0, "2435": 12698293248.0, "2440": 12698293248.0, "2445": 12698293248.0, "2450": 12698293248.0, "2455": 12698293248.0, "2460": 12698293248.0, "2465": 12698293248.0, "2470": 12698293248.0, "2475": 12698293248.0, "2480": 12698293248.0, "2485": 12698293248.0, "2490": 12698293248.0, "2495": 12698293248.0, "2500": 12698293248.0, "2505": 12698293248.0, "2510": 12698293248.0, "2515": 12698293248.0, "2520": 12698293248.0, "2525": 12698293248.0, "2530": 12698293248.0, "2535": 12698293248.0, "2540": 12698293248.0, "2545": 12698293248.0, "2550": 12698293248.0, "2555": 12698293248.0, "2560": 12698293248.0, "2565": 12698293248.0, "2570": 12698293248.0, "2575": 12698293248.0, "2580": 12698293248.0, "2585": 12698293248.0, "2590": 12698293248.0, "2595": 12698293248.0, "2600": 12698293248.0, "2605": 12698293248.0, "2610": 12698293248.0, "2615": 12698293248.0, "2620": 12698293248.0, "2625": 12698293248.0, "2630": 12698293248.0, "2635": 12698293248.0, "2640": 12698293248.0, "2645": 12698293248.0, "2650": 12698293248.0, "2655": 12698293248.0, "2660": 12698293248.0, "2665": 12698293248.0, "2670": 12698293248.0, "2675": 12698293248.0, "2680": 12698293248.0, "2685": 12698293248.0, "2690": 12698293248.0, "2695": 12698293248.0, "2700": 12698293248.0, "2705": 12698293248.0, "2710": 12698293248.0, "2715": 12698293248.0, "2720": 12698293248.0, "2725": 12698293248.0, "2730": 12698293248.0, "2735": 12698293248.0, "2740": 12698293248.0, "2745": 12698293248.0, "2750": 12698293248.0, "2755": 12698293248.0, "2760": 12698293248.0, "2765": 12698293248.0, "2770": 12698293248.0, "2775": 12698293248.0, "2780": 12698293248.0, "2785": 12698293248.0, "2790": 12698293248.0, "2795": 12698293248.0, "2800": 12698293248.0, "2805": 12698293248.0, "2810": 12698293248.0, "2815": 12698293248.0, "2820": 12698293248.0, "2825": 12698293248.0, "2830": 12698293248.0, "2835": 12698293248.0, "2840": 12698293248.0, "2845": 12698293248.0, "2850": 12698293248.0, "2855": 12698293248.0, "2860": 12698293248.0, "2865": 12698293248.0, "2870": 12698293248.0, "2875": 12698293248.0, "2880": 12698293248.0, "2885": 12698293248.0, "2890": 12698293248.0, "2895": 12698293248.0, "2900": 12698293248.0, "2905": 12698293248.0, "2910": 12698293248.0, "2915": 12698293248.0, "2920": 12698293248.0, "2925": 12698293248.0, "2930": 12698293248.0, "2935": 12698293248.0, "2940": 12698293248.0, "2945": 12698293248.0, "2950": 12698293248.0, "2955": 12698293248.0, "2960": 12698293248.0, "2965": 12698293248.0, "2970": 12698293248.0, "2975": 12698293248.0, "2980": 12698293248.0, "2985": 12698293248.0, "2990": 12698293248.0, "2995": 12698293248.0, "3000": 12698293248.0, "3005": 12698293248.0, "3010": 12698293248.0, "3015": 12698293248.0, "3020": 12698293248.0, "3025": 12698293248.0, "3030": 12698293248.0, "3035": 12698293248.0, "3040": 12698293248.0, "3045": 12698293248.0, "3050": 12698293248.0, "3055": 12698293248.0, "3060": 12698293248.0, "3065": 12698293248.0, "3070": 12698293248.0, "3075": 12698293248.0, "3080": 12698293248.0, "3085": 12698293248.0, "3090": 12698293248.0, "3095": 12698293248.0, "3100": 12698293248.0, "3105": 12698293248.0, "3110": 12698293248.0, "3115": 12698293248.0, "3120": 12698293248.0, "3125": 12698293248.0, "3130": 12698293248.0, "3135": 12698293248.0, "3140": 12698293248.0, "3145": 12698293248.0, "3150": 12698293248.0, "3155": 12698293248.0, "3160": 12698293248.0, "3165": 12698293248.0, "3170": 12698293248.0, "3175": 12698293248.0, "3180": 12698293248.0, "3185": 12698293248.0, "3190": 12698293248.0, "3195": 12698293248.0, "3200": 12698293248.0, "3205": 12698293248.0, "3210": 12698293248.0, "3215": 12698293248.0, "3220": 12698293248.0, "3225": 12698293248.0, "3230": 12698293248.0, "3235": 12698293248.0, "3240": 12698293248.0, "3245": 12698293248.0, "3250": 12698293248.0, "3255": 12698293248.0, "3260": 12698293248.0, "3265": 12698293248.0, "3270": 12698293248.0, "3275": 12698293248.0, "3280": 12698293248.0, "3285": 12698293248.0, "3290": 12698293248.0, "3295": 12698293248.0, "3300": 12698293248.0, "3305": 12698293248.0, "3310": 12698293248.0, "3315": 12698293248.0, "3320": 12698293248.0, "3325": 12698293248.0, "3330": 12698293248.0, "3335": 12698293248.0, "3340": 12698293248.0, "3345": 12698293248.0, "3350": 12698293248.0, "3355": 12698293248.0, "3360": 12698293248.0, "3365": 12698293248.0, "3370": 12698293248.0, "3375": 12698293248.0, "3380": 12698293248.0, "3385": 12698293248.0, "3390": 12698293248.0, "3395": 12698293248.0, "3400": 12698293248.0, "3405": 12698293248.0, "3410": 12698293248.0, "3415": 12698293248.0, "3420": 12698293248.0, "3425": 12698293248.0, "3430": 12698293248.0, "3435": 12698293248.0, "3440": 12698293248.0, "3445": 12698293248.0, "3450": 12698293248.0, "3455": 12698293248.0, "3460": 12698293248.0, "3465": 12698293248.0, "3470": 12698293248.0, "3475": 12698293248.0, "3480": 12698293248.0, "3485": 12698293248.0, "3490": 12698293248.0, "3495": 12698293248.0, "3500": 12698293248.0, "3505": 12698293248.0, "3510": 12698293248.0, "3515": 12698293248.0, "3520": 12698293248.0, "3525": 12698293248.0, "3530": 12698293248.0, "3535": 12698293248.0, "3540": 12698293248.0, "3545": 12698293248.0, "3550": 12698293248.0, "3555": 12698293248.0, "3560": 12698293248.0, "3565": 12698293248.0, "3570": 12698293248.0, "3575": 12698293248.0, "3580": 12698293248.0, "3585": 12698293248.0, "3590": 12698293248.0, "3595": 12698293248.0, "3600": 12698293248.0, "3605": 12698293248.0, "3610": 12698293248.0, "3615": 12698293248.0, "3620": 12698293248.0, "3625": 12698293248.0, "3630": 12698293248.0, "3635": 12698293248.0, "3640": 12698293248.0, "3645": 12698293248.0, "3650": 12698492928.0, "3655": 12698492928.0, "3660": 12698492928.0, "3665": 12698492928.0, "3670": 12698492928.0, "3675": 12698492928.0, "3680": 12698492928.0, "3685": 12698492928.0, "3690": 12698492928.0, "3695": 12698492928.0, "3700": 12698492928.0, "3705": 12698492928.0, "3710": 12698492928.0, "3715": 12698492928.0, "3720": 12698492928.0, "3725": 12698492928.0, "3730": 12698492928.0, "3735": 12698492928.0, "3740": 12698492928.0, "3745": 12698492928.0, "3750": 12698492928.0, "3755": 12698492928.0, "3760": 12698492928.0, "3765": 12698492928.0, "3770": 12698492928.0, "3775": 12698492928.0, "3780": 12698492928.0, "3785": 12698492928.0, "3790": 12698492928.0, "3795": 12698492928.0, "3800": 12698492928.0, "3805": 12698492928.0, "3810": 12698492928.0, "3815": 12698492928.0, "3820": 12698492928.0, "3825": 12698492928.0, "3830": 12698492928.0, "3835": 12698492928.0, "3840": 12698492928.0, "3845": 12698492928.0, "3850": 12698492928.0, "3855": 12698492928.0, "3860": 12698492928.0, "3865": 12698492928.0, "3870": 12698492928.0, "3875": 12698492928.0, "3880": 12698492928.0, "3885": 12698492928.0, "3890": 12698492928.0, "3895": 12698492928.0, "3900": 12698492928.0, "3905": 12698492928.0, "3910": 12698492928.0, "3915": 12698492928.0, "3920": 12698492928.0, "3925": 12698492928.0, "3930": 12698492928.0, "3935": 12698492928.0, "3940": 12698492928.0, "3945": 12698492928.0, "3950": 12698492928.0, "3955": 12698492928.0, "3960": 12698492928.0, "3965": 12698492928.0, "3970": 12698492928.0, "3975": 12698492928.0, "3980": 12698492928.0, "3985": 12698492928.0, "3990": 12698492928.0, "3995": 12698492928.0, "4000": 12698492928.0, "4005": 12698492928.0, "4010": 12698492928.0, "4015": 12698492928.0, "4020": 12698492928.0, "4025": 12698492928.0, "4030": 12698492928.0, "4035": 12698492928.0, "4040": 12698492928.0, "4045": 12698492928.0, "4050": 12698492928.0, "4055": 12698492928.0, "4060": 12698492928.0, "4065": 12698492928.0, "4070": 12698492928.0, "4075": 12698492928.0, "4080": 12698492928.0, "4085": 12698492928.0, "4090": 12698492928.0, "4095": 12698492928.0, "4100": 12698492928.0, "4105": 12698492928.0, "4110": 12698492928.0, "4115": 12698492928.0, "4120": 12698492928.0, "4125": 12698492928.0, "4130": 12698492928.0, "4135": 12698492928.0, "4140": 12698492928.0, "4145": 12698492928.0, "4150": 12698492928.0, "4155": 12698492928.0, "4160": 12698492928.0, "4165": 12698492928.0, "4170": 12698492928.0, "4175": 12698492928.0, "4180": 12698492928.0, "4185": 12698492928.0, "4190": 12698492928.0, "4195": 12698492928.0, "4200": 12698492928.0, "4205": 12698492928.0, "4210": 12698492928.0, "4215": 12698492928.0, "4220": 12698492928.0, "4225": 12698492928.0, "4230": 12698492928.0, "4235": 12698492928.0, "4240": 12698492928.0, "4245": 12698492928.0, "4250": 12698492928.0, "4255": 12698492928.0, "4260": 12698492928.0, "4265": 12698492928.0, "4270": 12698492928.0, "4275": 12698492928.0, "4280": 12698492928.0, "4285": 12698492928.0, "4290": 12698492928.0, "4295": 12698492928.0, "4300": 12698492928.0, "4305": 12698492928.0, "4310": 12698492928.0, "4315": 12698492928.0, "4320": 12698492928.0, "4325": 12698492928.0, "4330": 12698492928.0, "4335": 12698492928.0, "4340": 12698492928.0, "4345": 12698492928.0, "4350": 12698492928.0, "4355": 12698492928.0, "4360": 12698492928.0, "4365": 12698492928.0, "4370": 12698492928.0, "4375": 12698492928.0, "4380": 12698492928.0, "4385": 12698492928.0, "4390": 12698492928.0, "4395": 12698492928.0, "4400": 12698492928.0, "4405": 12698492928.0, "4410": 12698492928.0, "4415": 12698492928.0, "4420": 12698492928.0, "4425": 12698492928.0, "4430": 12698492928.0, "4435": 12698492928.0, "4440": 12698492928.0, "4445": 12698492928.0, "4450": 12698492928.0, "4455": 12698492928.0, "4460": 12698492928.0, "4465": 12698492928.0, "4470": 12698492928.0, "4475": 12698492928.0, "4480": 12698492928.0, "4485": 12698492928.0, "4490": 12698492928.0, "4495": 12698492928.0, "4500": 12698492928.0, "4505": 12698492928.0, "4510": 12698492928.0, "4515": 12698492928.0, "4520": 12698492928.0, "4525": 12698492928.0, "4530": 12698492928.0, "4535": 12698492928.0, "4540": 12698492928.0, "4545": 12698492928.0, "4550": 12698492928.0, "4555": 12698492928.0, "4560": 12698492928.0, "4565": 12698492928.0, "4570": 12698492928.0, "4575": 12698492928.0, "4580": 12698492928.0, "4585": 12698492928.0, "4590": 12698492928.0, "4595": 12698492928.0, "4600": 12698492928.0, "4605": 12698492928.0, "4610": 12698492928.0, "4615": 12698492928.0, "4620": 12698492928.0, "4625": 12698492928.0, "4630": 12698492928.0, "4635": 12698492928.0, "4640": 12698492928.0, "4645": 12698492928.0, "4650": 12698492928.0, "4655": 12698492928.0, "4660": 12698492928.0, "4665": 12698492928.0, "4670": 12698492928.0, "4675": 12698492928.0, "4680": 12698492928.0, "4685": 12698492928.0, "4690": 12698492928.0, "4695": 12698492928.0, "4700": 12698492928.0, "4705": 12698492928.0, "4710": 12698492928.0, "4715": 12698492928.0, "4720": 12698492928.0, "4725": 12698492928.0, "4730": 12698492928.0, "4735": 12698492928.0, "4740": 12698492928.0, "4745": 12698492928.0, "4750": 12698492928.0, "4755": 12698492928.0, "4760": 12698492928.0, "4765": 12698492928.0, "4770": 12698492928.0, "4775": 12698492928.0, "4780": 12698492928.0, "4785": 12698492928.0, "4790": 12698492928.0, "4795": 12698492928.0, "4800": 12698492928.0, "4805": 12698492928.0, "4810": 12698492928.0, "4815": 12698492928.0, "4820": 12698492928.0, "4825": 12698492928.0, "4830": 12698492928.0, "4835": 12698492928.0, "4840": 12698492928.0, "4845": 12698492928.0, "4850": 12698492928.0, "4855": 12698492928.0, "4860": 12698492928.0, "4865": 12698492928.0, "4870": 12698492928.0, "4875": 12698492928.0, "4880": 12698492928.0, "4885": 12698492928.0, "4890": 12698492928.0, "4895": 12698492928.0, "4900": 12698492928.0, "4905": 12698492928.0, "4910": 12698492928.0, "4915": 12698492928.0, "4920": 12698492928.0, "4925": 12698492928.0, "4930": 12698492928.0, "4935": 12698492928.0, "4940": 12698492928.0, "4945": 12698492928.0, "4950": 12698492928.0, "4955": 12698492928.0, "4960": 12698492928.0, "4965": 12698492928.0, "4970": 12698492928.0, "4975": 12698492928.0, "4980": 12698492928.0, "4985": 12698492928.0, "4990": 12698492928.0, "4995": 12698492928.0, "5000": 12698492928.0, "5005": 12698492928.0, "5010": 12698492928.0, "5015": 12698492928.0, "5020": 12698492928.0, "5025": 12698492928.0, "5030": 12698492928.0, "5035": 12698492928.0, "5040": 12698492928.0, "5045": 12698492928.0, "5050": 12698492928.0, "5055": 12698492928.0, "5060": 12698492928.0, "5065": 12698492928.0, "5070": 12698492928.0, "5075": 12698492928.0, "5080": 12698492928.0, "5085": 12698492928.0, "5090": 12698492928.0, "5095": 12698492928.0, "5100": 12698492928.0, "5105": 12698492928.0, "5110": 12698492928.0, "5115": 12698492928.0, "5120": 12698492928.0, "5125": 12698492928.0, "5130": 12698492928.0, "5135": 12698492928.0, "5140": 12698492928.0, "5145": 12698492928.0, "5150": 12698492928.0, "5155": 12698492928.0, "5160": 12698492928.0, "5165": 12698492928.0, "5170": 12698492928.0, "5175": 12698492928.0, "5180": 12698492928.0, "5185": 12698492928.0, "5190": 12698492928.0, "5195": 12698492928.0, "5200": 12698492928.0, "5205": 12698492928.0, "5210": 12698492928.0, "5215": 12698492928.0, "5220": 12698492928.0, "5225": 12698492928.0, "5230": 12698492928.0, "5235": 12698492928.0, "5240": 12698492928.0, "5245": 12698492928.0, "5250": 12698492928.0, "5255": 12698492928.0, "5260": 12698492928.0, "5265": 12698492928.0, "5270": 12698492928.0, "5275": 12698492928.0, "5280": 12698492928.0, "5285": 12698492928.0, "5290": 12698492928.0, "5295": 12698492928.0, "5300": 12698492928.0, "5305": 12698492928.0, "5310": 12698492928.0, "5315": 12698492928.0, "5320": 12698492928.0, "5325": 12698492928.0, "5330": 12698492928.0, "5335": 12698492928.0, "5340": 12698492928.0, "5345": 12698492928.0, "5350": 12698492928.0, "5355": 12698492928.0, "5360": 12698492928.0, "5365": 12698492928.0, "5370": 12698492928.0, "5375": 12698492928.0, "5380": 12698492928.0, "5385": 12698492928.0, "5390": 12698492928.0, "5395": 12698492928.0, "5400": 12698492928.0, "5405": 12698492928.0, "5410": 12698492928.0, "5415": 12698492928.0, "5420": 12698492928.0, "5425": 12698492928.0, "5430": 12698492928.0, "5435": 12698492928.0, "5440": 12698492928.0, "5445": 12698492928.0, "5450": 12698492928.0, "5455": 12698492928.0, "5460": 12698492928.0, "5465": 12698492928.0, "5470": 12698492928.0, "5475": 12698492928.0, "5480": 12698492928.0, "5485": 12698492928.0, "5490": 12698492928.0, "5495": 12698492928.0, "5500": 12698492928.0, "5505": 12698492928.0, "5510": 12698492928.0, "5515": 12698492928.0, "5520": 12698492928.0, "5525": 12698492928.0, "5530": 12698492928.0, "5535": 12698492928.0, "5540": 12698492928.0, "5545": 12698492928.0, "5550": 12698492928.0, "5555": 12698492928.0, "5560": 12698492928.0, "5565": 12698492928.0, "5570": 12698492928.0, "5575": 12698492928.0, "5580": 12698492928.0, "5585": 12698492928.0, "5590": 12698492928.0, "5595": 12698492928.0, "5600": 12698492928.0, "5605": 12698492928.0, "5610": 12698492928.0, "5615": 12698492928.0, "5620": 12698492928.0, "5625": 12698492928.0, "5630": 12698492928.0, "5635": 12698492928.0, "5640": 12698492928.0, "5645": 12698492928.0, "5650": 12698492928.0, "5655": 12698492928.0, "5660": 12698492928.0, "5665": 12698492928.0, "5670": 12698492928.0, "5675": 12698492928.0, "5680": 12698492928.0, "5685": 12698492928.0, "5690": 12698492928.0, "5695": 12698492928.0, "5700": 12698492928.0, "5705": 12698492928.0, "5710": 12698492928.0, "5715": 12698492928.0, "5720": 12698492928.0, "5725": 12698492928.0, "5730": 12698492928.0, "5735": 12698492928.0, "5740": 12698492928.0, "5745": 12698492928.0, "5750": 12698492928.0, "5755": 12698492928.0, "5760": 12698492928.0, "5765": 12698492928.0, "5770": 12698492928.0, "5775": 12698492928.0, "5780": 12698492928.0, "5785": 12698492928.0, "5790": 12698492928.0, "5795": 12698492928.0, "5800": 12698492928.0, "5805": 12698492928.0, "5810": 12698492928.0, "5815": 12698492928.0, "5820": 12698492928.0, "5825": 12698492928.0, "5830": 12698492928.0, "5835": 12698492928.0, "5840": 12698492928.0, "5845": 12698492928.0, "5850": 12698492928.0, "5855": 12698492928.0, "5860": 12698492928.0, "5865": 12698492928.0, "5870": 12698492928.0, "5875": 12698492928.0, "5880": 12698492928.0, "5885": 12698492928.0, "5890": 12698492928.0, "5895": 12698492928.0, "5900": 12698492928.0, "5905": 12698492928.0, "5910": 12698492928.0, "5915": 12698492928.0, "5920": 12698492928.0, "5925": 12698492928.0, "5930": 12698492928.0, "5935": 12698492928.0, "5940": 12698492928.0, "5945": 12698492928.0, "5950": 12698492928.0, "5955": 12698492928.0, "5960": 12698492928.0, "5965": 12698492928.0, "5970": 12698492928.0, "5975": 12698492928.0, "5980": 12698492928.0, "5985": 12698492928.0, "5990": 12698492928.0, "5995": 12698492928.0, "6000": 12698492928.0, "6005": 12698492928.0, "6010": 12698492928.0, "6015": 12698492928.0, "6020": 12698492928.0, "6025": 12698492928.0, "6030": 12698492928.0, "6035": 12698492928.0, "6040": 12698492928.0, "6045": 12698492928.0, "6050": 12698492928.0, "6055": 12698492928.0, "6060": 12698492928.0, "6065": 12698492928.0, "6070": 12698492928.0, "6075": 12698492928.0, "6080": 12698492928.0, "6085": 12698492928.0, "6090": 12698492928.0, "6095": 12698492928.0, "6100": 12698492928.0, "6105": 12698492928.0, "6110": 12698492928.0, "6115": 12698492928.0, "6120": 12698492928.0, "6125": 12698492928.0, "6130": 12698492928.0, "6135": 12698492928.0, "6140": 12698492928.0, "6145": 12698492928.0, "6150": 12698492928.0, "6155": 12698492928.0, "6160": 12698492928.0, "6165": 12698492928.0, "6170": 12698492928.0, "6175": 12698492928.0, "6180": 12698492928.0, "6185": 12698492928.0, "6190": 12698492928.0, "6195": 12698492928.0, "6200": 12698492928.0, "6205": 12698492928.0, "6210": 12698492928.0, "6215": 12698492928.0, "6220": 12698492928.0, "6225": 12698492928.0, "6230": 12698492928.0, "6235": 12698492928.0, "6240": 12698492928.0, "6245": 12698492928.0, "6250": 12698492928.0, "6255": 12698492928.0, "6260": 12698492928.0, "6265": 12698492928.0, "6270": 12698492928.0, "6275": 12698492928.0, "6280": 12698492928.0, "6285": 12698492928.0, "6290": 12698492928.0, "6295": 12698492928.0, "6300": 12698492928.0, "6305": 12698492928.0, "6310": 12698492928.0, "6315": 12698492928.0, "6320": 12698492928.0, "6325": 12698492928.0, "6330": 12698492928.0, "6335": 12698492928.0, "6340": 12698492928.0, "6345": 12698492928.0, "6350": 12698492928.0, "6355": 12698492928.0, "6360": 12698492928.0, "6365": 12698492928.0, "6370": 12698492928.0, "6375": 12698492928.0, "6380": 12698492928.0, "6385": 12698492928.0, "6390": 12698492928.0, "6395": 12698492928.0, "6400": 12698492928.0, "6405": 12698492928.0, "6410": 12698492928.0, "6415": 12698492928.0, "6420": 12698492928.0, "6425": 12698492928.0, "6430": 12698492928.0, "6435": 12698492928.0, "6440": 12698492928.0, "6445": 12698492928.0, "6450": 12698492928.0, "6455": 12698492928.0, "6460": 12698492928.0, "6465": 12698492928.0, "6470": 12698492928.0, "6475": 12698492928.0, "6480": 12698492928.0, "6485": 12698492928.0, "6490": 12698492928.0, "6495": 12698492928.0, "6500": 12698492928.0, "6505": 12698492928.0, "6510": 12698492928.0, "6515": 12698492928.0, "6520": 12698492928.0, "6525": 12698492928.0, "6530": 12698492928.0, "6535": 12698492928.0, "6540": 12698492928.0, "6545": 12698492928.0, "6550": 12698492928.0, "6555": 12698492928.0, "6560": 12698492928.0, "6565": 12698492928.0, "6570": 12698492928.0, "6575": 12698492928.0, "6580": 12698492928.0, "6585": 12698492928.0, "6590": 12698492928.0, "6595": 12698492928.0, "6600": 12698492928.0, "6605": 12698492928.0, "6610": 12698492928.0, "6615": 12698492928.0, "6620": 12698492928.0, "6625": 12698492928.0, "6630": 12698492928.0, "6635": 12698492928.0, "6640": 12698492928.0, "6645": 12698492928.0, "6650": 12698492928.0, "6655": 12698492928.0, "6660": 12698492928.0, "6665": 12698492928.0, "6670": 12698492928.0, "6675": 12698492928.0, "6680": 12698492928.0, "6685": 12698492928.0, "6690": 12698492928.0, "6695": 12698492928.0, "6700": 12698492928.0, "6705": 12698492928.0, "6710": 12698492928.0, "6715": 12698492928.0, "6720": 12698492928.0, "6725": 12698492928.0, "6730": 12698492928.0, "6735": 12698492928.0, "6740": 12698492928.0, "6745": 12698492928.0, "6750": 12698492928.0, "6755": 12698492928.0, "6760": 12698492928.0, "6765": 12698492928.0, "6770": 12698492928.0, "6775": 12698492928.0, "6780": 12698492928.0, "6785": 12698492928.0, "6790": 12698492928.0, "6795": 12698492928.0, "6800": 12698492928.0, "6805": 12698492928.0, "6810": 12698492928.0, "6815": 12698492928.0, "6820": 12698492928.0, "6825": 12698492928.0, "6830": 12698492928.0, "6835": 12698492928.0, "6840": 12698492928.0, "6845": 12698492928.0, "6850": 12698492928.0, "6855": 12698492928.0, "6860": 12698492928.0, "6865": 12698492928.0, "6870": 12698492928.0, "6875": 12698492928.0, "6880": 12698492928.0, "6885": 12698492928.0, "6890": 12698492928.0, "6895": 12698492928.0, "6900": 12698492928.0, "6905": 12698492928.0, "6910": 12698492928.0, "6915": 12698492928.0, "6920": 12698492928.0, "6925": 12698492928.0, "6930": 12698492928.0, "6935": 12698492928.0, "6940": 12698492928.0, "6945": 12698492928.0, "6950": 12698492928.0, "6955": 12698492928.0, "6960": 12698492928.0, "6965": 12698492928.0, "6970": 12698492928.0, "6975": 12698492928.0, "6980": 12698492928.0, "6985": 12698492928.0, "6990": 12698492928.0, "6995": 12698492928.0, "7000": 12698492928.0, "7005": 12698492928.0, "7010": 12698492928.0, "7015": 12698492928.0, "7020": 12698492928.0, "7025": 12698492928.0, "7030": 12698492928.0, "7035": 12698492928.0, "7040": 12698492928.0, "7045": 12698492928.0, "7050": 12698492928.0, "7055": 12698492928.0, "7060": 12698492928.0, "7065": 12698492928.0, "7070": 12698492928.0, "7075": 12698492928.0, "7080": 12698492928.0, "7085": 12698492928.0, "7090": 12698492928.0, "7095": 12698492928.0, "7100": 12698492928.0, "7105": 12698492928.0, "7110": 12698492928.0, "7115": 12698492928.0, "7120": 12698492928.0, "7125": 12698492928.0, "7130": 12698492928.0, "7135": 12698492928.0, "7140": 12698492928.0, "7145": 12698492928.0, "7150": 12698492928.0, "7155": 12698492928.0, "7160": 12698492928.0, "7165": 12698492928.0, "7170": 12698492928.0, "7175": 12698492928.0, "7180": 12698492928.0, "7185": 12698492928.0, "7190": 12698492928.0, "7195": 12698492928.0, "7200": 12698492928.0, "7205": 12698492928.0, "7210": 12698492928.0, "7215": 12698492928.0, "7220": 12698492928.0, "7225": 12698492928.0, "7230": 12698492928.0, "7235": 12698492928.0, "7240": 12698492928.0, "7245": 12698492928.0, "7250": 12698492928.0, "7255": 12698492928.0, "7260": 12698492928.0, "7265": 12698492928.0, "7270": 12698492928.0, "7275": 12698492928.0, "7280": 12698492928.0, "7285": 12698492928.0, "7290": 12698492928.0, "7295": 12698492928.0, "7300": 12698492928.0, "7305": 12698492928.0, "7310": 12698492928.0, "7315": 12698492928.0, "7320": 12698492928.0, "7325": 12698492928.0, "7330": 12698492928.0, "7335": 12698492928.0, "7340": 12698492928.0, "7345": 12698492928.0, "7350": 12698492928.0, "7355": 12698492928.0, "7360": 12698492928.0, "7365": 12698492928.0, "7370": 12698492928.0, "7375": 12698492928.0, "7380": 12698492928.0, "7385": 12698492928.0, "7390": 12698492928.0, "7395": 12698492928.0, "7400": 12698492928.0, "7405": 12698492928.0, "7410": 12698492928.0, "7415": 12698492928.0, "7420": 12698492928.0, "7425": 12698492928.0, "7430": 12698492928.0, "7435": 12698492928.0, "7440": 12698492928.0, "7445": 12698492928.0, "7450": 12698492928.0, "7455": 12698492928.0, "7460": 12698492928.0, "7465": 12698492928.0, "7470": 12698492928.0, "7475": 12698492928.0, "7480": 12698492928.0, "7485": 12698492928.0, "7490": 12698492928.0, "7495": 12698492928.0, "7500": 12698492928.0, "7505": 12698492928.0, "7510": 12698492928.0, "7515": 12698492928.0, "7520": 12698492928.0, "7525": 12698492928.0, "7530": 12698492928.0, "7535": 12698492928.0, "7540": 12698492928.0, "7545": 12698492928.0, "7550": 12698492928.0, "7555": 12698492928.0, "7560": 12698492928.0, "7565": 12698492928.0, "7570": 12698492928.0, "7575": 12698492928.0, "7580": 12698492928.0, "7585": 12698492928.0, "7590": 12698492928.0, "7595": 12698492928.0, "7600": 12698492928.0, "7605": 12698492928.0, "7610": 12698492928.0, "7615": 12698492928.0, "7620": 12698492928.0, "7625": 12698492928.0, "7630": 12698492928.0, "7635": 12698492928.0, "7640": 12698492928.0, "7645": 12698492928.0, "7650": 12698492928.0, "7655": 12698492928.0, "7660": 12698492928.0, "7665": 12698492928.0, "7670": 12698492928.0, "7675": 12698492928.0, "7680": 12698492928.0, "7685": 12698492928.0, "7690": 12698492928.0, "7695": 12698492928.0, "7700": 12698492928.0, "7705": 12698492928.0, "7710": 12698492928.0, "7715": 12698492928.0, "7720": 12698492928.0, "7725": 12698492928.0, "7730": 12698492928.0, "7735": 12698492928.0, "7740": 12698492928.0, "7745": 12698492928.0, "7750": 12698492928.0, "7755": 12698492928.0, "7760": 12698492928.0, "7765": 12698492928.0, "7770": 12698492928.0, "7775": 12698492928.0, "7780": 12698492928.0, "7785": 12698492928.0, "7790": 12698492928.0, "7795": 12698492928.0, "7800": 12698492928.0, "7805": 12698492928.0, "7810": 12698492928.0, "7815": 12698492928.0, "7820": 12698492928.0, "7825": 12698492928.0, "7830": 12698492928.0, "7835": 12698492928.0, "7840": 12698492928.0, "7845": 12698492928.0, "7850": 12698492928.0, "7855": 12698492928.0, "7860": 12698492928.0, "7865": 12698492928.0, "7870": 12698492928.0, "7875": 12698492928.0, "7880": 12698492928.0, "7885": 12698492928.0, "7890": 12698492928.0, "7895": 12698492928.0, "7900": 12698492928.0, "7905": 12698492928.0, "7910": 12698492928.0, "7915": 12698492928.0, "7920": 12698492928.0, "7925": 12698492928.0, "7930": 12698492928.0, "7935": 12698492928.0, "7940": 12698492928.0, "7945": 12698492928.0, "7950": 12698492928.0, "7955": 12698492928.0, "7960": 12698492928.0, "7965": 12698492928.0, "7970": 12698492928.0, "7975": 12698492928.0, "7980": 12698492928.0, "7985": 12698492928.0, "7990": 12698492928.0, "7995": 12698492928.0, "8000": 12698492928.0, "8005": 12698492928.0, "8010": 12698492928.0, "8015": 12698492928.0, "8020": 12698492928.0, "8025": 12698492928.0, "8030": 12698492928.0, "8035": 12698492928.0, "8040": 12698492928.0, "8045": 12698492928.0, "8050": 12698492928.0, "8055": 12698492928.0, "8060": 12698492928.0, "8065": 12698492928.0, "8070": 12698492928.0, "8075": 12698492928.0, "8080": 12698492928.0, "8085": 12698492928.0, "8090": 12698492928.0, "8095": 12698492928.0, "8100": 12698492928.0, "8105": 12698492928.0, "8110": 12698492928.0, "8115": 12698492928.0, "8120": 12698492928.0, "8125": 12698492928.0, "8130": 12698492928.0, "8135": 12698492928.0, "8140": 12698492928.0, "8145": 12698492928.0, "8150": 12698492928.0, "8155": 12698492928.0, "8160": 12698492928.0, "8165": 12698492928.0, "8170": 12698492928.0, "8175": 12698492928.0, "8180": 12698492928.0, "8185": 12698492928.0, "8190": 12698492928.0, "8195": 12698492928.0, "8200": 12698492928.0, "8205": 12698492928.0, "8210": 12698492928.0, "8215": 12698492928.0, "8220": 12698492928.0, "8225": 12698492928.0, "8230": 12698492928.0, "8235": 12698492928.0, "8240": 12698492928.0, "8245": 12698492928.0, "8250": 12698492928.0, "8255": 12698492928.0, "8260": 12698492928.0, "8265": 12698492928.0, "8270": 12698492928.0, "8275": 12698492928.0, "8280": 12698492928.0, "8285": 12698492928.0, "8290": 12698492928.0, "8295": 12698492928.0, "8300": 12698492928.0, "8305": 12698492928.0, "8310": 12698492928.0, "8315": 12698492928.0, "8320": 12698492928.0, "8325": 12698492928.0, "8330": 12698492928.0, "8335": 12698492928.0, "8340": 12698492928.0, "8345": 12698492928.0, "8350": 12698492928.0, "8355": 12698492928.0, "8360": 12698492928.0, "8365": 12698492928.0, "8370": 12698492928.0, "8375": 12698492928.0, "8380": 12698492928.0, "8385": 12698492928.0, "8390": 12698492928.0, "8395": 12698492928.0, "8400": 12698492928.0, "8405": 12698492928.0, "8410": 12698492928.0, "8415": 12698492928.0, "8420": 12698492928.0, "8425": 12698492928.0, "8430": 12698492928.0, "8435": 12698492928.0, "8440": 12698492928.0, "8445": 12698492928.0, "8450": 12698492928.0, "8455": 12698492928.0, "8460": 12698492928.0, "8465": 12698492928.0, "8470": 12698492928.0, "8475": 12698492928.0, "8480": 12698492928.0, "8485": 12698492928.0, "8490": 12698492928.0, "8495": 12698492928.0, "8500": 12698492928.0, "8505": 12698492928.0, "8510": 12698492928.0, "8515": 12698492928.0, "8520": 12698492928.0, "8525": 12698492928.0, "8530": 12698492928.0, "8535": 12698492928.0, "8540": 12698492928.0, "8545": 12698492928.0, "8550": 12698492928.0, "8555": 12698492928.0, "8560": 12698492928.0, "8565": 12698492928.0, "8570": 12698492928.0, "8575": 12698492928.0, "8580": 12698492928.0, "8585": 12698492928.0, "8590": 12698492928.0, "8595": 12698492928.0, "8600": 12698492928.0, "8605": 12698492928.0, "8610": 12698492928.0, "8615": 12698492928.0, "8620": 12698492928.0, "8625": 12698492928.0, "8630": 12698492928.0, "8635": 12698492928.0, "8640": 12698492928.0, "8645": 12698492928.0, "8650": 12698492928.0, "8655": 12698492928.0, "8660": 12698492928.0, "8665": 12698492928.0, "8670": 12698492928.0, "8675": 12698492928.0, "8680": 12698492928.0, "8685": 12698492928.0, "8690": 12698492928.0, "8695": 12698492928.0, "8700": 12698492928.0, "8705": 12698492928.0, "8710": 12698492928.0, "8715": 12698492928.0, "8720": 12698492928.0, "8725": 12698492928.0, "8730": 12698492928.0, "8735": 12698492928.0, "8740": 12698492928.0, "8745": 12698492928.0, "8750": 12698492928.0, "8755": 12698492928.0, "8760": 12698492928.0, "8765": 12698492928.0, "8770": 12698492928.0, "8775": 12698492928.0, "8780": 12698492928.0, "8785": 12698492928.0, "8790": 12698492928.0, "8795": 12698492928.0, "8800": 12698492928.0, "8805": 12698492928.0, "8810": 12698492928.0, "8815": 12698492928.0, "8820": 12698492928.0, "8825": 12698492928.0, "8830": 12698492928.0, "8835": 12698492928.0, "8840": 12698492928.0, "8845": 12698492928.0, "8850": 12698492928.0, "8855": 12698492928.0, "8860": 12698492928.0, "8865": 12698492928.0, "8870": 12698492928.0, "8875": 12698492928.0, "8880": 12698492928.0, "8885": 12698492928.0, "8890": 12698492928.0, "8895": 12698492928.0, "8900": 12698492928.0, "8905": 12698492928.0, "8910": 12698492928.0, "8915": 12698492928.0, "8920": 12698492928.0, "8925": 12698492928.0, "8930": 12698492928.0, "8935": 12698492928.0, "8940": 12698492928.0, "8945": 12698492928.0, "8950": 12698492928.0, "8955": 12698492928.0, "8960": 12698492928.0, "8965": 12698492928.0, "8970": 12698492928.0, "8975": 12698492928.0, "8980": 12698492928.0, "8985": 12698492928.0, "8990": 12698492928.0, "8995": 12698492928.0, "9000": 12698492928.0, "9005": 12698492928.0, "9010": 12698492928.0, "9015": 12698492928.0, "9020": 12698492928.0, "9025": 12698492928.0, "9030": 12698492928.0, "9035": 12698492928.0, "9040": 12698492928.0, "9045": 12698492928.0, "9050": 12698492928.0, "9055": 12698492928.0, "9060": 12698492928.0, "9065": 12698492928.0, "9070": 12698492928.0, "9075": 12698492928.0, "9080": 12698492928.0, "9085": 12698492928.0, "9090": 12698492928.0, "9095": 12698492928.0, "9100": 12698492928.0, "9105": 12698492928.0, "9110": 12698492928.0, "9115": 12698492928.0, "9120": 12698492928.0, "9125": 12698492928.0, "9130": 12698492928.0, "9135": 12698492928.0, "9140": 12698492928.0, "9145": 12698492928.0, "9150": 12698492928.0, "9155": 12698492928.0, "9160": 12698492928.0, "9165": 12698492928.0, "9170": 12698492928.0, "9175": 12698492928.0, "9180": 12698492928.0, "9185": 12698492928.0, "9190": 12698492928.0, "9195": 12698492928.0, "9200": 12698492928.0, "9205": 12698492928.0, "9210": 12698492928.0, "9215": 12698492928.0, "9220": 12698492928.0, "9225": 12698492928.0, "9230": 12698492928.0, "9235": 12698492928.0, "9240": 12698492928.0, "9245": 12698492928.0, "9250": 12698492928.0, "9255": 12698492928.0, "9260": 12698492928.0, "9265": 12698492928.0, "9270": 12698492928.0, "9275": 12698492928.0, "9280": 12698492928.0, "9285": 12698492928.0, "9290": 12698492928.0, "9295": 12698492928.0, "9300": 12698492928.0, "9305": 12698492928.0, "9310": 12698492928.0, "9315": 12698492928.0, "9320": 12698492928.0, "9325": 12698492928.0, "9330": 12698492928.0, "9335": 12698492928.0, "9340": 12698492928.0, "9345": 12698492928.0, "9350": 12698492928.0, "9355": 12698492928.0, "9360": 12698492928.0, "9365": 12698492928.0, "9370": 12698492928.0, "9375": 12698492928.0, "9380": 12698492928.0, "9385": 12698492928.0, "9390": 12698492928.0, "9395": 12698492928.0, "9400": 12698492928.0, "9405": 12698492928.0, "9410": 12698492928.0, "9415": 12698492928.0, "9420": 12698492928.0, "9425": 12698492928.0, "9430": 12698492928.0, "9435": 12698492928.0, "9440": 12698492928.0, "9445": 12698492928.0, "9450": 12698492928.0, "9455": 12698492928.0, "9460": 12698492928.0, "9465": 12698492928.0, "9470": 12698492928.0, "9475": 12698492928.0, "9480": 12698492928.0, "9485": 12698492928.0, "9490": 12698492928.0, "9495": 12698492928.0, "9500": 12698492928.0, "9505": 12698492928.0, "9510": 12698492928.0, "9515": 12698492928.0, "9520": 12698492928.0, "9525": 12698492928.0, "9530": 12698492928.0, "9535": 12698492928.0, "9540": 12698492928.0, "9545": 12698492928.0, "9550": 12698492928.0, "9555": 12698492928.0, "9560": 12698492928.0, "9565": 12698492928.0, "9570": 12698492928.0, "9575": 12698492928.0, "9580": 12698492928.0, "9585": 12698492928.0, "9590": 12698492928.0, "9595": 12698492928.0, "9600": 12698492928.0, "9605": 12698492928.0, "9610": 12698492928.0, "9615": 12698492928.0, "9620": 12698492928.0, "9625": 12698492928.0, "9630": 12698492928.0, "9635": 12698492928.0, "9640": 12698492928.0, "9645": 12698492928.0, "9650": 12698492928.0, "9655": 12698492928.0, "9660": 12698492928.0, "9665": 12698492928.0, "9670": 12698492928.0, "9675": 12698492928.0, "9680": 12698492928.0, "9685": 12698492928.0, "9690": 12698492928.0, "9695": 12698492928.0, "9700": 12698492928.0, "9705": 12698492928.0, "9710": 12698492928.0, "9715": 12698492928.0, "9720": 12698492928.0, "9725": 12698492928.0, "9730": 12698492928.0, "9735": 12698492928.0, "9740": 12698492928.0, "9745": 12698492928.0, "9750": 12698492928.0, "9755": 12698492928.0, "9760": 12698492928.0, "9765": 12698492928.0, "9770": 12698492928.0, "9775": 12698492928.0, "9780": 12698492928.0, "9785": 12698492928.0, "9790": 12698492928.0, "9795": 12698492928.0, "9800": 12698492928.0, "9805": 12698492928.0, "9810": 12698492928.0, "9815": 12698492928.0, "9820": 12698492928.0, "9825": 12698492928.0, "9830": 12698492928.0, "9835": 12698492928.0, "9840": 12698492928.0, "9845": 12698492928.0, "9850": 12698492928.0, "9855": 12698492928.0, "9860": 12698492928.0, "9865": 12698492928.0, "9870": 12698492928.0, "9875": 12698492928.0, "9880": 12698492928.0, "9885": 12698492928.0, "9890": 12698492928.0, "9895": 12698492928.0, "9900": 12698492928.0, "9905": 12698492928.0, "9910": 12698492928.0, "9915": 12698492928.0, "9920": 12698492928.0, "9925": 12698492928.0, "9930": 12698492928.0, "9935": 12698492928.0, "9940": 12698492928.0, "9945": 12698492928.0, "9950": 12698492928.0, "9955": 12698492928.0, "9960": 12698492928.0, "9965": 12698492928.0, "9970": 12698492928.0, "9975": 12698492928.0, "9980": 12698492928.0, "9985": 12698492928.0, "9990": 12698492928.0, "9995": 12698492928.0, "10000": 12698492928.0, "10005": 12698492928.0, "10010": 12698492928.0, "10015": 12698492928.0, "10020": 12698492928.0, "10025": 12698492928.0, "10030": 12698492928.0, "10035": 12698492928.0, "10040": 12698492928.0, "10045": 12698492928.0, "10050": 12698492928.0, "10055": 12698492928.0, "10060": 12698492928.0, "10065": 12698492928.0, "10070": 12698492928.0, "10075": 12698492928.0, "10080": 12698492928.0, "10085": 12698492928.0, "10090": 12698492928.0, "10095": 12698492928.0, "10100": 12698492928.0, "10105": 12698492928.0, "10110": 12698492928.0, "10115": 12698492928.0, "10120": 12698492928.0, "10125": 12698492928.0, "10130": 12698492928.0, "10135": 12698492928.0, "10140": 12698492928.0, "10145": 12698492928.0, "10150": 12698492928.0, "10155": 12698492928.0, "10160": 12698492928.0, "10165": 12698492928.0, "10170": 12698492928.0, "10175": 12698492928.0, "10180": 12698492928.0, "10185": 12698492928.0, "10190": 12698492928.0, "10195": 12698492928.0, "10200": 12698492928.0, "10205": 12698492928.0, "10210": 12698492928.0, "10215": 12698492928.0, "10220": 12698492928.0, "10225": 12698492928.0, "10230": 12698492928.0, "10235": 12698492928.0, "10240": 12698492928.0, "10245": 12698492928.0, "10250": 12698492928.0, "10255": 12698492928.0, "10260": 12698492928.0, "10265": 12698492928.0, "10270": 12698492928.0, "10275": 12698492928.0, "10280": 12698492928.0, "10285": 12698492928.0, "10290": 12698492928.0, "10295": 12698492928.0, "10300": 12698492928.0, "10305": 12698492928.0, "10310": 12698492928.0, "10315": 12698492928.0, "10320": 12698492928.0, "10325": 12698492928.0, "10330": 12698492928.0, "10335": 12698492928.0, "10340": 12698492928.0, "10345": 12698492928.0, "10350": 12698492928.0, "10355": 12698492928.0, "10360": 12698492928.0, "10365": 12698492928.0, "10370": 12698492928.0, "10375": 12698492928.0, "10380": 12698492928.0, "10385": 12698492928.0, "10390": 12698492928.0, "10395": 12698492928.0, "10400": 12698492928.0, "10405": 12698492928.0, "10410": 12698492928.0, "10415": 12698492928.0, "10420": 12698492928.0, "10425": 12698492928.0, "10430": 12698492928.0, "10435": 12698492928.0, "10440": 12698492928.0, "10445": 12698492928.0, "10450": 12698492928.0, "10455": 12698492928.0, "10460": 12698492928.0, "10465": 12698492928.0, "10470": 12698492928.0, "10475": 12698492928.0, "10480": 12698492928.0, "10485": 12698492928.0, "10490": 12698492928.0, "10495": 12698492928.0, "10500": 12698492928.0, "10505": 12698492928.0, "10510": 12698492928.0, "10515": 12698492928.0, "10520": 12698492928.0, "10525": 12698492928.0, "10530": 12698492928.0, "10535": 12698492928.0, "10540": 12698492928.0, "10545": 12698492928.0, "10550": 12698492928.0, "10555": 12698492928.0, "10560": 12698492928.0, "10565": 12698492928.0, "10570": 12698492928.0, "10575": 12698492928.0, "10580": 12698492928.0, "10585": 12698492928.0, "10590": 12698492928.0, "10595": 12698492928.0, "10600": 12698492928.0, "10605": 12698492928.0, "10610": 12698492928.0, "10615": 12698492928.0, "10620": 12698492928.0, "10625": 12698492928.0, "10630": 12698492928.0, "10635": 12698492928.0, "10640": 12698492928.0, "10645": 12698492928.0, "10650": 12698492928.0, "10655": 12698492928.0, "10660": 12698492928.0, "10665": 12698492928.0, "10670": 12698492928.0, "10675": 12698492928.0, "10680": 12698492928.0, "10685": 12698492928.0, "10690": 12698492928.0, "10695": 12698492928.0, "10700": 12698492928.0, "10705": 12698492928.0, "10710": 12698492928.0, "10715": 12698492928.0, "10720": 12698492928.0, "10725": 12698492928.0, "10730": 12698492928.0, "10735": 12698492928.0, "10740": 12698492928.0, "10745": 12698492928.0, "10750": 12698492928.0, "10755": 12698492928.0, "10760": 12698492928.0, "10765": 12698492928.0, "10770": 12698492928.0, "10775": 12698492928.0, "10780": 12698492928.0, "10785": 12698492928.0, "10790": 12698492928.0, "10795": 12698492928.0, "10800": 12698492928.0, "10805": 12698492928.0, "10810": 12698492928.0, "10815": 12698492928.0, "10820": 12698492928.0, "10825": 12698492928.0, "10830": 12698492928.0, "10835": 12698492928.0, "10840": 12698492928.0, "10845": 12698492928.0, "10850": 12698492928.0, "10855": 12698492928.0, "10860": 12698492928.0, "10865": 12698492928.0, "10870": 12698492928.0, "10875": 12698492928.0, "10880": 12698492928.0, "10885": 12698492928.0, "10890": 12698492928.0, "10895": 12698492928.0, "10900": 12698492928.0, "10905": 12698492928.0, "10910": 12698492928.0, "10915": 12698492928.0, "10920": 12698492928.0, "10925": 12698492928.0, "10930": 12698492928.0, "10935": 12698492928.0, "10940": 12698492928.0, "10945": 12698492928.0, "10950": 12698492928.0, "10955": 12698492928.0, "10960": 12698492928.0, "10965": 12698492928.0, "10970": 12698492928.0, "10975": 12698492928.0, "10980": 12698492928.0, "10985": 12698492928.0, "10990": 12698492928.0, "10995": 12698492928.0, "11000": 12698492928.0, "11005": 12698492928.0, "11010": 12698492928.0, "11015": 12698492928.0, "11020": 12698492928.0, "11025": 12698492928.0, "11030": 12698492928.0, "11035": 12698492928.0, "11040": 12698492928.0, "11045": 12698492928.0, "11050": 12698492928.0, "11055": 12698492928.0, "11060": 12698492928.0, "11065": 12698492928.0, "11070": 12698492928.0, "11075": 12698492928.0, "11080": 12698492928.0, "11085": 12698492928.0, "11090": 12698492928.0, "11095": 12698492928.0, "11100": 12698492928.0, "11105": 12698492928.0, "11110": 12698492928.0, "11115": 12698492928.0, "11120": 12698492928.0, "11125": 12698492928.0, "11130": 12698492928.0, "11135": 12698492928.0, "11140": 12698492928.0, "11145": 12698492928.0, "11150": 12698492928.0, "11155": 12698492928.0, "11160": 12698492928.0, "11165": 12698492928.0, "11170": 12698492928.0, "11175": 12698492928.0, "11180": 12698492928.0, "11185": 12698492928.0, "11190": 12698492928.0, "11195": 12698492928.0, "11200": 12698492928.0, "11205": 12698492928.0, "11210": 12698492928.0, "11215": 12698492928.0, "11220": 12698492928.0, "11225": 12698492928.0, "11230": 12698492928.0, "11235": 12698492928.0, "11240": 12698492928.0, "11245": 12698492928.0, "11250": 12698492928.0, "11255": 12698492928.0, "11260": 12698492928.0, "11265": 12698492928.0, "11270": 12698492928.0, "11275": 12698492928.0, "11280": 12698492928.0, "11285": 12698492928.0, "11290": 12698492928.0, "11295": 12698492928.0, "11300": 12698492928.0, "11305": 12698492928.0, "11310": 12698492928.0, "11315": 12698492928.0, "11320": 12698492928.0, "11325": 12698492928.0, "11330": 12698492928.0, "11335": 12698492928.0, "11340": 12698492928.0, "11345": 12698492928.0, "11350": 12698492928.0, "11355": 12698492928.0, "11360": 12698492928.0, "11365": 12698492928.0, "11370": 12698492928.0, "11375": 12698492928.0, "11380": 12698492928.0, "11385": 12698492928.0, "11390": 12698492928.0, "11395": 12698492928.0, "11400": 12698492928.0, "11405": 12698492928.0, "11410": 12698492928.0, "11415": 12698492928.0, "11420": 12698492928.0, "11425": 12698492928.0, "11430": 12698492928.0, "11435": 12698492928.0, "11440": 12698492928.0, "11445": 12698492928.0, "11450": 12698492928.0, "11455": 12698492928.0, "11460": 12698492928.0, "11465": 12698492928.0, "11470": 12698492928.0, "11475": 12698492928.0, "11480": 12698492928.0, "11485": 12698492928.0, "11490": 12698492928.0, "11495": 12698492928.0, "11500": 12698492928.0, "11505": 12698492928.0, "11510": 12698492928.0, "11515": 12698492928.0, "11520": 12698492928.0, "11525": 12698492928.0, "11530": 12698492928.0, "11535": 12698492928.0, "11540": 12698492928.0, "11545": 12698492928.0, "11550": 12698492928.0, "11555": 12698492928.0, "11560": 12698492928.0, "11565": 12698492928.0, "11570": 12698492928.0, "11575": 12698492928.0, "11580": 12698492928.0, "11585": 12698492928.0, "11590": 12698492928.0, "11595": 12698492928.0, "11600": 12698492928.0, "11605": 12698492928.0, "11610": 12698492928.0, "11615": 12698492928.0, "11620": 12698492928.0, "11625": 12698492928.0, "11630": 12698492928.0, "11635": 12698492928.0, "11640": 12698492928.0, "11645": 12698492928.0, "11650": 12698492928.0, "11655": 12698492928.0, "11660": 12698492928.0, "11665": 12698492928.0, "11670": 12698492928.0, "11675": 12698492928.0, "11680": 12698492928.0, "11685": 12698492928.0, "11690": 12698492928.0, "11695": 12698492928.0, "11700": 12698492928.0, "11705": 12698492928.0, "11710": 12698492928.0, "11715": 12698492928.0, "11720": 12698492928.0, "11725": 12698492928.0, "11730": 12698492928.0, "11735": 12698492928.0, "11740": 12698492928.0, "11745": 12698492928.0, "11750": 12698492928.0, "11755": 12698492928.0, "11760": 12698492928.0, "11765": 12698492928.0, "11770": 12698492928.0, "11775": 12698492928.0, "11780": 12698492928.0, "11785": 12698492928.0, "11790": 12698492928.0, "11795": 12698492928.0, "11800": 12698492928.0, "11805": 12698492928.0, "11810": 12698492928.0, "11815": 12698492928.0, "11820": 12698492928.0, "11825": 12698492928.0, "11830": 12698492928.0, "11835": 12698492928.0, "11840": 12698492928.0, "11845": 12698492928.0, "11850": 12698492928.0, "11855": 12698492928.0, "11860": 12698492928.0, "11865": 12698492928.0, "11870": 12698492928.0, "11875": 12698492928.0, "11880": 12698492928.0, "11885": 12698492928.0, "11890": 12698492928.0, "11895": 12698492928.0, "11900": 12698492928.0, "11905": 12698492928.0, "11910": 12698492928.0, "11915": 12698492928.0, "11920": 12698492928.0, "11925": 12698492928.0, "11930": 12698492928.0, "11935": 12698492928.0, "11940": 12698492928.0, "11945": 12698492928.0, "11950": 12698492928.0, "11955": 12698492928.0, "11960": 12698492928.0, "11965": 12698492928.0, "11970": 12698492928.0, "11975": 12698492928.0, "11980": 12698492928.0, "11985": 12698492928.0, "11990": 12698492928.0, "11995": 12698492928.0, "12000": 12698492928.0, "12005": 12698492928.0, "12010": 12698492928.0, "12015": 12698492928.0, "12020": 12698492928.0, "12025": 12698492928.0, "12030": 12698492928.0, "12035": 12698492928.0, "12040": 12698492928.0, "12045": 12698492928.0, "12050": 12698492928.0, "12055": 12698492928.0, "12060": 12698492928.0, "12065": 12698492928.0, "12070": 12698492928.0, "12075": 12698492928.0, "12080": 12698492928.0, "12085": 12698492928.0, "12090": 12698492928.0, "12095": 12698492928.0, "12100": 12698492928.0, "12105": 12698492928.0, "12110": 12698492928.0, "12115": 12698492928.0, "12120": 12698492928.0, "12125": 12698492928.0, "12130": 12698492928.0, "12135": 12698492928.0, "12140": 12698492928.0, "12145": 12698492928.0, "12150": 12698492928.0, "12155": 12698492928.0, "12160": 12698492928.0, "12165": 12698492928.0, "12170": 12698492928.0, "12175": 12698492928.0, "12180": 12698492928.0, "12185": 12698492928.0, "12190": 12698492928.0, "12195": 12698492928.0, "12200": 12698492928.0, "12205": 12698492928.0, "12210": 12698492928.0, "12215": 12698492928.0, "12220": 12698492928.0, "12225": 12698492928.0, "12230": 12698492928.0, "12235": 12698492928.0, "12240": 12698492928.0, "12245": 12698492928.0, "12250": 12698492928.0, "12255": 12698492928.0, "12260": 12698492928.0, "12265": 12698492928.0, "12270": 12698492928.0, "12275": 12698492928.0, "12280": 12698492928.0, "12285": 12698492928.0, "12290": 12698492928.0, "12295": 12698492928.0, "12300": 12698492928.0, "12305": 12698492928.0, "12310": 12698492928.0, "12315": 12698492928.0, "12320": 12698492928.0, "12325": 12698492928.0, "12330": 12698492928.0, "12335": 12698492928.0, "12340": 12698492928.0, "12345": 12698492928.0, "12350": 12698492928.0, "12355": 12698492928.0, "12360": 12698492928.0, "12365": 12698492928.0, "12370": 12698492928.0, "12375": 12698492928.0, "12380": 12698492928.0, "12385": 12698492928.0, "12390": 12698492928.0, "12395": 12698492928.0, "12400": 12698492928.0, "12405": 12698492928.0, "12410": 12698492928.0, "12415": 12698492928.0, "12420": 12698492928.0, "12425": 12698492928.0, "12430": 12698492928.0, "12435": 12698492928.0, "12440": 12698492928.0, "12445": 12698492928.0, "12450": 12698492928.0, "12455": 12698492928.0, "12460": 12698492928.0, "12465": 12698492928.0, "12470": 12698492928.0, "12475": 12698492928.0, "12480": 12698492928.0, "12485": 12698492928.0, "12490": 12698492928.0, "12495": 12698492928.0, "12500": 12698492928.0, "12505": 12698492928.0, "12510": 12698492928.0, "12515": 12698492928.0, "12520": 12698492928.0, "12525": 12698492928.0, "12530": 12698492928.0, "12535": 12698492928.0, "12540": 12698492928.0, "12545": 12698492928.0, "12550": 12698492928.0, "12555": 12698492928.0, "12560": 12698492928.0, "12565": 12698492928.0, "12570": 12698492928.0, "12575": 12698492928.0, "12580": 12698492928.0, "12585": 12698492928.0, "12590": 12698492928.0, "12595": 12698492928.0, "12600": 12698492928.0, "12605": 12698492928.0, "12610": 12698492928.0, "12615": 12698492928.0, "12620": 12698492928.0, "12625": 12698492928.0, "12630": 12698492928.0, "12635": 12698492928.0, "12640": 12698492928.0, "12645": 12698492928.0, "12650": 12698492928.0, "12655": 12698492928.0, "12660": 12698492928.0, "12665": 12698492928.0, "12670": 12698492928.0, "12675": 12698492928.0, "12680": 12698492928.0, "12685": 12698492928.0, "12690": 12698492928.0, "12695": 12698492928.0, "12700": 12698492928.0, "12705": 12698492928.0, "12710": 12698492928.0, "12715": 12698492928.0, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 27893811200.0, "5": 28391901184.0, "10": 28391901184.0, "15": 28391901184.0, "20": 28391901184.0, "25": 28391901184.0, "30": 28391901184.0, "35": 28391901184.0, "40": 28391901184.0, "45": 28391901184.0, "50": 28391901184.0, "55": 28391901184.0, "60": 28391901184.0, "65": 28391901184.0, "70": 28391901184.0, "75": 28391901184.0, "80": 28391901184.0, "85": 28391901184.0, "90": 28391901184.0, "95": 28391901184.0, "100": 28391901184.0, "105": 28391901184.0, "110": 28391901184.0, "115": 28391901184.0, "120": 28391901184.0, "125": 28391901184.0, "130": 28391901184.0, "135": 28391901184.0, "140": 28391901184.0, "145": 28391901184.0, "150": 28391901184.0, "155": 28391901184.0, "160": 28391901184.0, "165": 28391901184.0, "170": 28391901184.0, "175": 28391901184.0, "180": 28391901184.0, "185": 28391901184.0, "190": 28391901184.0, "195": 28391901184.0, "200": 28391901184.0, "205": 28391901184.0, "210": 28391901184.0, "215": 28391901184.0, "220": 28391901184.0, "225": 28391901184.0, "230": 28391901184.0, "235": 28391901184.0, "240": 28391901184.0, "245": 28391901184.0, "250": 28391901184.0, "255": 28391901184.0, "260": 28391901184.0, "265": 28391901184.0, "270": 28391901184.0, "275": 28391901184.0, "280": 28391901184.0, "285": 28391901184.0, "290": 28391901184.0, "295": 28391901184.0, "300": 28391901184.0, "305": 28391901184.0, "310": 28391901184.0, "315": 28391901184.0, "320": 28391901184.0, "325": 28391901184.0, "330": 28391901184.0, "335": 28391901184.0, "340": 28391901184.0, "345": 28391901184.0, "350": 28391901184.0, "355": 28391901184.0, "360": 28391901184.0, "365": 28391901184.0, "370": 28391901184.0, "375": 28391901184.0, "380": 28391901184.0, "385": 28391901184.0, "390": 28391901184.0, "395": 28391901184.0, "400": 28391901184.0, "405": 28391901184.0, "410": 28391901184.0, "415": 28391901184.0, "420": 28391901184.0, "425": 28391901184.0, "430": 28391901184.0, "435": 28391901184.0, "440": 28391901184.0, "445": 28391901184.0, "450": 28391901184.0, "455": 28391901184.0, "460": 28391901184.0, "465": 28391901184.0, "470": 28391901184.0, "475": 28391901184.0, "480": 28391901184.0, "485": 28391901184.0, "490": 28391901184.0, "495": 28391901184.0, "500": 28391901184.0, "505": 28391901184.0, "510": 28391901184.0, "515": 28391901184.0, "520": 28391901184.0, "525": 28391901184.0, "530": 28391901184.0, "535": 28391901184.0, "540": 28391901184.0, "545": 28391901184.0, "550": 28391901184.0, "555": 28391901184.0, "560": 28391901184.0, "565": 28391901184.0, "570": 28391901184.0, "575": 28391901184.0, "580": 28391901184.0, "585": 28391901184.0, "590": 28391901184.0, "595": 28391901184.0, "600": 28391901184.0, "605": 28391901184.0, "610": 28391901184.0, "615": 28391901184.0, "620": 28391901184.0, "625": 28391901184.0, "630": 28391901184.0, "635": 28391901184.0, "640": 28391901184.0, "645": 28391901184.0, "650": 28391901184.0, "655": 28391901184.0, "660": 28391901184.0, "665": 28391901184.0, "670": 28391901184.0, "675": 28391901184.0, "680": 28391901184.0, "685": 28391901184.0, "690": 28391901184.0, "695": 28391901184.0, "700": 28391901184.0, "705": 28391901184.0, "710": 28391901184.0, "715": 28391901184.0, "720": 28391901184.0, "725": 28391901184.0, "730": 28391901184.0, "735": 28391901184.0, "740": 28391901184.0, "745": 28391901184.0, "750": 28391901184.0, "755": 28391901184.0, "760": 28391901184.0, "765": 28391901184.0, "770": 28391901184.0, "775": 28391901184.0, "780": 28391901184.0, "785": 28391901184.0, "790": 28391901184.0, "795": 28391901184.0, "800": 28391901184.0, "805": 28391901184.0, "810": 28391901184.0, "815": 28391901184.0, "820": 28391901184.0, "825": 28391901184.0, "830": 28391901184.0, "835": 28391901184.0, "840": 28391901184.0, "845": 28391901184.0, "850": 28391901184.0, "855": 28391901184.0, "860": 28391901184.0, "865": 28391901184.0, "870": 28391901184.0, "875": 28391901184.0, "880": 28391901184.0, "885": 28391901184.0, "890": 28391901184.0, "895": 28391901184.0, "900": 28391901184.0, "905": 28391901184.0, "910": 28391901184.0, "915": 28391901184.0, "920": 28391901184.0, "925": 28391901184.0, "930": 28391901184.0, "935": 28391901184.0, "940": 28391901184.0, "945": 28391901184.0, "950": 28391901184.0, "955": 28391901184.0, "960": 28391901184.0, "965": 28391901184.0, "970": 28391901184.0, "975": 28391901184.0, "980": 28391901184.0, "985": 28391901184.0, "990": 28391901184.0, "995": 28391901184.0, "1000": 28391901184.0, "1005": 28391901184.0, "1010": 28391901184.0, "1015": 28391901184.0, "1020": 28391901184.0, "1025": 28391901184.0, "1030": 28391901184.0, "1035": 28391901184.0, "1040": 28391901184.0, "1045": 28391901184.0, "1050": 28391901184.0, "1055": 28391901184.0, "1060": 28391901184.0, "1065": 28391901184.0, "1070": 28391901184.0, "1075": 28391901184.0, "1080": 28391901184.0, "1085": 28391901184.0, "1090": 28391901184.0, "1095": 28391901184.0, "1100": 28391901184.0, "1105": 28391901184.0, "1110": 28391901184.0, "1115": 28391901184.0, "1120": 28391901184.0, "1125": 28391901184.0, "1130": 28391901184.0, "1135": 28391901184.0, "1140": 28391901184.0, "1145": 28391901184.0, "1150": 28391901184.0, "1155": 28391901184.0, "1160": 28391901184.0, "1165": 28391901184.0, "1170": 28391901184.0, "1175": 28391901184.0, "1180": 28391901184.0, "1185": 28391901184.0, "1190": 28391901184.0, "1195": 28391901184.0, "1200": 28391901184.0, "1205": 28391901184.0, "1210": 28391901184.0, "1215": 28391901184.0, "1220": 28391901184.0, "1225": 28391901184.0, "1230": 28391901184.0, "1235": 28391901184.0, "1240": 28391901184.0, "1245": 28391901184.0, "1250": 28391901184.0, "1255": 28391901184.0, "1260": 28391901184.0, "1265": 28391901184.0, "1270": 28391901184.0, "1275": 28391901184.0, "1280": 28391901184.0, "1285": 28391901184.0, "1290": 28391901184.0, "1295": 28391901184.0, "1300": 28391901184.0, "1305": 28391901184.0, "1310": 28391901184.0, "1315": 28391901184.0, "1320": 28391901184.0, "1325": 28391901184.0, "1330": 28391901184.0, "1335": 28391901184.0, "1340": 28391901184.0, "1345": 28391901184.0, "1350": 28391901184.0, "1355": 28391901184.0, "1360": 28391901184.0, "1365": 28391901184.0, "1370": 28391901184.0, "1375": 28391901184.0, "1380": 28391901184.0, "1385": 28391901184.0, "1390": 28391901184.0, "1395": 28391901184.0, "1400": 28391901184.0, "1405": 28391901184.0, "1410": 28391901184.0, "1415": 28391901184.0, "1420": 28391901184.0, "1425": 28391901184.0, "1430": 28391901184.0, "1435": 28391901184.0, "1440": 28391901184.0, "1445": 28391901184.0, "1450": 28391901184.0, "1455": 28391901184.0, "1460": 28391901184.0, "1465": 28391901184.0, "1470": 28391901184.0, "1475": 28391901184.0, "1480": 28391901184.0, "1485": 28391901184.0, "1490": 28391901184.0, "1495": 28391901184.0, "1500": 28391901184.0, "1505": 28391901184.0, "1510": 28391901184.0, "1515": 28391901184.0, "1520": 28391901184.0, "1525": 28391901184.0, "1530": 28391901184.0, "1535": 28391901184.0, "1540": 28391901184.0, "1545": 28391901184.0, "1550": 28391901184.0, "1555": 28391901184.0, "1560": 28391901184.0, "1565": 28391901184.0, "1570": 28391901184.0, "1575": 28391901184.0, "1580": 28391901184.0, "1585": 28391901184.0, "1590": 28391901184.0, "1595": 28391901184.0, "1600": 28391901184.0, "1605": 28391901184.0, "1610": 28391901184.0, "1615": 28391901184.0, "1620": 28391901184.0, "1625": 28391901184.0, "1630": 28391901184.0, "1635": 28391901184.0, "1640": 28391901184.0, "1645": 28391901184.0, "1650": 28391901184.0, "1655": 28391901184.0, "1660": 28391901184.0, "1665": 28391901184.0, "1670": 28391901184.0, "1675": 28391901184.0, "1680": 28391901184.0, "1685": 28391901184.0, "1690": 28391901184.0, "1695": 28391901184.0, "1700": 28391901184.0, "1705": 28391901184.0, "1710": 28391901184.0, "1715": 28391901184.0, "1720": 28391901184.0, "1725": 28391901184.0, "1730": 28391901184.0, "1735": 28391901184.0, "1740": 28391901184.0, "1745": 28391901184.0, "1750": 28391901184.0, "1755": 28391901184.0, "1760": 28391901184.0, "1765": 28391901184.0, "1770": 28391901184.0, "1775": 28391901184.0, "1780": 28391901184.0, "1785": 28391901184.0, "1790": 28391901184.0, "1795": 28391901184.0, "1800": 28391901184.0, "1805": 28391901184.0, "1810": 28391901184.0, "1815": 28391901184.0, "1820": 28391901184.0, "1825": 28391901184.0, "1830": 28391901184.0, "1835": 28391901184.0, "1840": 28391901184.0, "1845": 28391901184.0, "1850": 28391901184.0, "1855": 28391901184.0, "1860": 28391901184.0, "1865": 28391901184.0, "1870": 28391901184.0, "1875": 28391901184.0, "1880": 28391901184.0, "1885": 28391901184.0, "1890": 28391901184.0, "1895": 28391901184.0, "1900": 28391901184.0, "1905": 28391901184.0, "1910": 28391901184.0, "1915": 28391901184.0, "1920": 28391901184.0, "1925": 28391901184.0, "1930": 28391901184.0, "1935": 28391901184.0, "1940": 28391901184.0, "1945": 28391901184.0, "1950": 28391901184.0, "1955": 28391901184.0, "1960": 28391901184.0, "1965": 28391901184.0, "1970": 28391901184.0, "1975": 28391901184.0, "1980": 28391901184.0, "1985": 28391901184.0, "1990": 28391901184.0, "1995": 28391901184.0, "2000": 28391901184.0, "2005": 28391901184.0, "2010": 28391901184.0, "2015": 28391901184.0, "2020": 28391901184.0, "2025": 28391901184.0, "2030": 28391901184.0, "2035": 28391901184.0, "2040": 28391901184.0, "2045": 28391901184.0, "2050": 28391901184.0, "2055": 28391901184.0, "2060": 28391901184.0, "2065": 28391901184.0, "2070": 28391901184.0, "2075": 28391901184.0, "2080": 28391901184.0, "2085": 28391901184.0, "2090": 28391901184.0, "2095": 28391901184.0, "2100": 28391901184.0, "2105": 28391901184.0, "2110": 28391901184.0, "2115": 28391901184.0, "2120": 28391901184.0, "2125": 28391901184.0, "2130": 28391901184.0, "2135": 28391901184.0, "2140": 28391901184.0, "2145": 28391901184.0, "2150": 28391901184.0, "2155": 28391901184.0, "2160": 28391901184.0, "2165": 28391901184.0, "2170": 28391901184.0, "2175": 28391901184.0, "2180": 28391901184.0, "2185": 28391901184.0, "2190": 28391901184.0, "2195": 28391901184.0, "2200": 28391901184.0, "2205": 28391901184.0, "2210": 28391901184.0, "2215": 28391901184.0, "2220": 28391901184.0, "2225": 28391901184.0, "2230": 28391901184.0, "2235": 28391901184.0, "2240": 28391901184.0, "2245": 28391901184.0, "2250": 28391901184.0, "2255": 28391901184.0, "2260": 28391901184.0, "2265": 28391901184.0, "2270": 28391901184.0, "2275": 28391901184.0, "2280": 28391901184.0, "2285": 28391901184.0, "2290": 28391901184.0, "2295": 28391901184.0, "2300": 28391901184.0, "2305": 28391901184.0, "2310": 28391901184.0, "2315": 28391901184.0, "2320": 28391901184.0, "2325": 28391901184.0, "2330": 28391901184.0, "2335": 28391901184.0, "2340": 28391901184.0, "2345": 28391901184.0, "2350": 28391901184.0, "2355": 28391901184.0, "2360": 28391901184.0, "2365": 28391901184.0, "2370": 28391901184.0, "2375": 28391901184.0, "2380": 28391901184.0, "2385": 28391901184.0, "2390": 28391901184.0, "2395": 28391901184.0, "2400": 28391901184.0, "2405": 28391901184.0, "2410": 28391901184.0, "2415": 28391901184.0, "2420": 28391901184.0, "2425": 28391901184.0, "2430": 28391901184.0, "2435": 28391901184.0, "2440": 28391901184.0, "2445": 28391901184.0, "2450": 28391901184.0, "2455": 28391901184.0, "2460": 28391901184.0, "2465": 28391901184.0, "2470": 28391901184.0, "2475": 28391901184.0, "2480": 28391901184.0, "2485": 28391901184.0, "2490": 28391901184.0, "2495": 28391901184.0, "2500": 28391901184.0, "2505": 28391901184.0, "2510": 28391901184.0, "2515": 28391901184.0, "2520": 28391901184.0, "2525": 28391901184.0, "2530": 28391901184.0, "2535": 28391901184.0, "2540": 28391901184.0, "2545": 28391901184.0, "2550": 28391901184.0, "2555": 28391901184.0, "2560": 28391901184.0, "2565": 28391901184.0, "2570": 28391901184.0, "2575": 28391901184.0, "2580": 28391901184.0, "2585": 28391901184.0, "2590": 28391901184.0, "2595": 28391901184.0, "2600": 28391901184.0, "2605": 28391901184.0, "2610": 28391901184.0, "2615": 28391901184.0, "2620": 28391901184.0, "2625": 28391901184.0, "2630": 28391901184.0, "2635": 28391901184.0, "2640": 28391901184.0, "2645": 28391901184.0, "2650": 28391901184.0, "2655": 28391901184.0, "2660": 28391901184.0, "2665": 28391901184.0, "2670": 28391901184.0, "2675": 28391901184.0, "2680": 28391901184.0, "2685": 28391901184.0, "2690": 28391901184.0, "2695": 28391901184.0, "2700": 28391901184.0, "2705": 28391901184.0, "2710": 28391901184.0, "2715": 28391901184.0, "2720": 28391901184.0, "2725": 28391901184.0, "2730": 28391901184.0, "2735": 28391901184.0, "2740": 28391901184.0, "2745": 28391901184.0, "2750": 28391901184.0, "2755": 28391901184.0, "2760": 28391901184.0, "2765": 28391901184.0, "2770": 28391901184.0, "2775": 28391901184.0, "2780": 28391901184.0, "2785": 28391901184.0, "2790": 28391901184.0, "2795": 28391901184.0, "2800": 28391901184.0, "2805": 28391901184.0, "2810": 28391901184.0, "2815": 28391901184.0, "2820": 28391901184.0, "2825": 28391901184.0, "2830": 28391901184.0, "2835": 28391901184.0, "2840": 28391901184.0, "2845": 28391901184.0, "2850": 28391901184.0, "2855": 28391901184.0, "2860": 28391901184.0, "2865": 28391901184.0, "2870": 28391901184.0, "2875": 28391901184.0, "2880": 28391901184.0, "2885": 28391901184.0, "2890": 28391901184.0, "2895": 28391901184.0, "2900": 28391901184.0, "2905": 28391901184.0, "2910": 28391901184.0, "2915": 28391901184.0, "2920": 28391901184.0, "2925": 28391901184.0, "2930": 28391901184.0, "2935": 28391901184.0, "2940": 28391901184.0, "2945": 28391901184.0, "2950": 28391901184.0, "2955": 28391901184.0, "2960": 28391901184.0, "2965": 28391901184.0, "2970": 28391901184.0, "2975": 28391901184.0, "2980": 28391901184.0, "2985": 28391901184.0, "2990": 28391901184.0, "2995": 28391901184.0, "3000": 28391901184.0, "3005": 28391901184.0, "3010": 28391901184.0, "3015": 28391901184.0, "3020": 28391901184.0, "3025": 28391901184.0, "3030": 28391901184.0, "3035": 28391901184.0, "3040": 28391901184.0, "3045": 28391901184.0, "3050": 28391901184.0, "3055": 28391901184.0, "3060": 28391901184.0, "3065": 28391901184.0, "3070": 28391901184.0, "3075": 28391901184.0, "3080": 28391901184.0, "3085": 28391901184.0, "3090": 28391901184.0, "3095": 28391901184.0, "3100": 28391901184.0, "3105": 28391901184.0, "3110": 28391901184.0, "3115": 28391901184.0, "3120": 28391901184.0, "3125": 28391901184.0, "3130": 28391901184.0, "3135": 28391901184.0, "3140": 28391901184.0, "3145": 28391901184.0, "3150": 28391901184.0, "3155": 28391901184.0, "3160": 28391901184.0, "3165": 28391901184.0, "3170": 28391901184.0, "3175": 28391901184.0, "3180": 28391901184.0, "3185": 28391901184.0, "3190": 28391901184.0, "3195": 28391901184.0, "3200": 28391901184.0, "3205": 28391901184.0, "3210": 28391901184.0, "3215": 28391901184.0, "3220": 28391901184.0, "3225": 28391901184.0, "3230": 28391901184.0, "3235": 28391901184.0, "3240": 28391901184.0, "3245": 28391901184.0, "3250": 28391901184.0, "3255": 28391901184.0, "3260": 28391901184.0, "3265": 28391901184.0, "3270": 28391901184.0, "3275": 28391901184.0, "3280": 28391901184.0, "3285": 28391901184.0, "3290": 28391901184.0, "3295": 28391901184.0, "3300": 28391901184.0, "3305": 28391901184.0, "3310": 28391901184.0, "3315": 28391901184.0, "3320": 28391901184.0, "3325": 28391901184.0, "3330": 28391901184.0, "3335": 28391901184.0, "3340": 28391901184.0, "3345": 28391901184.0, "3350": 28391901184.0, "3355": 28391901184.0, "3360": 28391901184.0, "3365": 28391901184.0, "3370": 28391901184.0, "3375": 28391901184.0, "3380": 28391901184.0, "3385": 28391901184.0, "3390": 28391901184.0, "3395": 28391901184.0, "3400": 28391901184.0, "3405": 28391901184.0, "3410": 28391901184.0, "3415": 28391901184.0, "3420": 28391901184.0, "3425": 28391901184.0, "3430": 28391901184.0, "3435": 28391901184.0, "3440": 28391901184.0, "3445": 28391901184.0, "3450": 28391901184.0, "3455": 28391901184.0, "3460": 28391901184.0, "3465": 28391901184.0, "3470": 28391901184.0, "3475": 28391901184.0, "3480": 28391901184.0, "3485": 28391901184.0, "3490": 28391901184.0, "3495": 28391901184.0, "3500": 28391901184.0, "3505": 28391901184.0, "3510": 28391901184.0, "3515": 28391901184.0, "3520": 28391901184.0, "3525": 28391901184.0, "3530": 28391901184.0, "3535": 28391901184.0, "3540": 28391901184.0, "3545": 28391901184.0, "3550": 28391901184.0, "3555": 28391901184.0, "3560": 28391901184.0, "3565": 28391901184.0, "3570": 28391901184.0, "3575": 28391901184.0, "3580": 28391901184.0, "3585": 28391901184.0, "3590": 28391901184.0, "3595": 28391901184.0, "3600": 28391901184.0, "3605": 28391901184.0, "3610": 28391901184.0, "3615": 28391901184.0, "3620": 28391901184.0, "3625": 28391901184.0, "3630": 28391901184.0, "3635": 28391901184.0, "3640": 28391901184.0, "3645": 28391901184.0, "3650": 28391712768.0, "3655": 28391712768.0, "3660": 28391712768.0, "3665": 28391712768.0, "3670": 28391712768.0, "3675": 28391712768.0, "3680": 28391712768.0, "3685": 28391712768.0, "3690": 28391712768.0, "3695": 28391712768.0, "3700": 28391712768.0, "3705": 28391712768.0, "3710": 28391712768.0, "3715": 28391712768.0, "3720": 28391712768.0, "3725": 28391712768.0, "3730": 28391712768.0, "3735": 28391712768.0, "3740": 28391712768.0, "3745": 28391712768.0, "3750": 28391712768.0, "3755": 28391712768.0, "3760": 28391712768.0, "3765": 28391712768.0, "3770": 28391712768.0, "3775": 28391712768.0, "3780": 28391712768.0, "3785": 28391712768.0, "3790": 28391712768.0, "3795": 28391712768.0, "3800": 28391712768.0, "3805": 28391712768.0, "3810": 28391712768.0, "3815": 28391712768.0, "3820": 28391712768.0, "3825": 28391712768.0, "3830": 28391712768.0, "3835": 28391712768.0, "3840": 28391712768.0, "3845": 28391712768.0, "3850": 28391712768.0, "3855": 28391712768.0, "3860": 28391712768.0, "3865": 28391712768.0, "3870": 28391712768.0, "3875": 28391712768.0, "3880": 28391712768.0, "3885": 28391712768.0, "3890": 28391712768.0, "3895": 28391712768.0, "3900": 28391712768.0, "3905": 28391712768.0, "3910": 28391712768.0, "3915": 28391712768.0, "3920": 28391712768.0, "3925": 28391712768.0, "3930": 28391712768.0, "3935": 28391712768.0, "3940": 28391712768.0, "3945": 28391712768.0, "3950": 28391712768.0, "3955": 28391712768.0, "3960": 28391712768.0, "3965": 28391712768.0, "3970": 28391712768.0, "3975": 28391712768.0, "3980": 28391712768.0, "3985": 28391712768.0, "3990": 28391712768.0, "3995": 28391712768.0, "4000": 28391712768.0, "4005": 28391712768.0, "4010": 28391712768.0, "4015": 28391712768.0, "4020": 28391712768.0, "4025": 28391712768.0, "4030": 28391712768.0, "4035": 28391712768.0, "4040": 28391712768.0, "4045": 28391712768.0, "4050": 28391712768.0, "4055": 28391712768.0, "4060": 28391712768.0, "4065": 28391712768.0, "4070": 28391712768.0, "4075": 28391712768.0, "4080": 28391712768.0, "4085": 28391712768.0, "4090": 28391712768.0, "4095": 28391712768.0, "4100": 28391712768.0, "4105": 28391712768.0, "4110": 28391712768.0, "4115": 28391712768.0, "4120": 28391712768.0, "4125": 28391712768.0, "4130": 28391712768.0, "4135": 28391712768.0, "4140": 28391712768.0, "4145": 28391712768.0, "4150": 28391712768.0, "4155": 28391712768.0, "4160": 28391712768.0, "4165": 28391712768.0, "4170": 28391712768.0, "4175": 28391712768.0, "4180": 28391712768.0, "4185": 28391712768.0, "4190": 28391712768.0, "4195": 28391712768.0, "4200": 28391712768.0, "4205": 28391712768.0, "4210": 28391712768.0, "4215": 28391712768.0, "4220": 28391712768.0, "4225": 28391712768.0, "4230": 28391712768.0, "4235": 28391712768.0, "4240": 28391712768.0, "4245": 28391712768.0, "4250": 28391712768.0, "4255": 28391712768.0, "4260": 28391712768.0, "4265": 28391712768.0, "4270": 28391712768.0, "4275": 28391712768.0, "4280": 28391712768.0, "4285": 28391712768.0, "4290": 28391712768.0, "4295": 28391712768.0, "4300": 28391712768.0, "4305": 28391712768.0, "4310": 28391712768.0, "4315": 28391712768.0, "4320": 28391712768.0, "4325": 28391712768.0, "4330": 28391712768.0, "4335": 28391712768.0, "4340": 28391712768.0, "4345": 28391712768.0, "4350": 28391712768.0, "4355": 28391712768.0, "4360": 28391712768.0, "4365": 28391712768.0, "4370": 28391712768.0, "4375": 28391712768.0, "4380": 28391712768.0, "4385": 28391712768.0, "4390": 28391712768.0, "4395": 28391712768.0, "4400": 28391712768.0, "4405": 28391712768.0, "4410": 28391712768.0, "4415": 28391712768.0, "4420": 28391712768.0, "4425": 28391712768.0, "4430": 28391712768.0, "4435": 28391712768.0, "4440": 28391712768.0, "4445": 28391712768.0, "4450": 28391712768.0, "4455": 28391712768.0, "4460": 28391712768.0, "4465": 28391712768.0, "4470": 28391712768.0, "4475": 28391712768.0, "4480": 28391712768.0, "4485": 28391712768.0, "4490": 28391712768.0, "4495": 28391712768.0, "4500": 28391712768.0, "4505": 28391712768.0, "4510": 28391712768.0, "4515": 28391712768.0, "4520": 28391712768.0, "4525": 28391712768.0, "4530": 28391712768.0, "4535": 28391712768.0, "4540": 28391712768.0, "4545": 28391712768.0, "4550": 28391712768.0, "4555": 28391712768.0, "4560": 28391712768.0, "4565": 28391712768.0, "4570": 28391712768.0, "4575": 28391712768.0, "4580": 28391712768.0, "4585": 28391712768.0, "4590": 28391712768.0, "4595": 28391712768.0, "4600": 28391712768.0, "4605": 28391712768.0, "4610": 28391712768.0, "4615": 28391712768.0, "4620": 28391712768.0, "4625": 28391712768.0, "4630": 28391712768.0, "4635": 28391712768.0, "4640": 28391712768.0, "4645": 28391712768.0, "4650": 28391712768.0, "4655": 28391712768.0, "4660": 28391712768.0, "4665": 28391712768.0, "4670": 28391712768.0, "4675": 28391712768.0, "4680": 28391712768.0, "4685": 28391712768.0, "4690": 28391712768.0, "4695": 28391712768.0, "4700": 28391712768.0, "4705": 28391712768.0, "4710": 28391712768.0, "4715": 28391712768.0, "4720": 28391712768.0, "4725": 28391712768.0, "4730": 28391712768.0, "4735": 28391712768.0, "4740": 28391712768.0, "4745": 28391712768.0, "4750": 28391712768.0, "4755": 28391712768.0, "4760": 28391712768.0, "4765": 28391712768.0, "4770": 28391712768.0, "4775": 28391712768.0, "4780": 28391712768.0, "4785": 28391712768.0, "4790": 28391712768.0, "4795": 28391712768.0, "4800": 28391712768.0, "4805": 28391712768.0, "4810": 28391712768.0, "4815": 28391712768.0, "4820": 28391712768.0, "4825": 28391712768.0, "4830": 28391712768.0, "4835": 28391712768.0, "4840": 28391712768.0, "4845": 28391712768.0, "4850": 28391712768.0, "4855": 28391712768.0, "4860": 28391712768.0, "4865": 28391712768.0, "4870": 28391712768.0, "4875": 28391712768.0, "4880": 28391712768.0, "4885": 28391712768.0, "4890": 28391712768.0, "4895": 28391712768.0, "4900": 28391712768.0, "4905": 28391712768.0, "4910": 28391712768.0, "4915": 28391712768.0, "4920": 28391712768.0, "4925": 28391712768.0, "4930": 28391712768.0, "4935": 28391712768.0, "4940": 28391712768.0, "4945": 28391712768.0, "4950": 28391712768.0, "4955": 28391712768.0, "4960": 28391712768.0, "4965": 28391712768.0, "4970": 28391712768.0, "4975": 28391712768.0, "4980": 28391712768.0, "4985": 28391712768.0, "4990": 28391712768.0, "4995": 28391712768.0, "5000": 28391712768.0, "5005": 28391712768.0, "5010": 28391712768.0, "5015": 28391712768.0, "5020": 28391712768.0, "5025": 28391712768.0, "5030": 28391712768.0, "5035": 28391712768.0, "5040": 28391712768.0, "5045": 28391712768.0, "5050": 28391712768.0, "5055": 28391712768.0, "5060": 28391712768.0, "5065": 28391712768.0, "5070": 28391712768.0, "5075": 28391712768.0, "5080": 28391712768.0, "5085": 28391712768.0, "5090": 28391712768.0, "5095": 28391712768.0, "5100": 28391712768.0, "5105": 28391712768.0, "5110": 28391712768.0, "5115": 28391712768.0, "5120": 28391712768.0, "5125": 28391712768.0, "5130": 28391712768.0, "5135": 28391712768.0, "5140": 28391712768.0, "5145": 28391712768.0, "5150": 28391712768.0, "5155": 28391712768.0, "5160": 28391712768.0, "5165": 28391712768.0, "5170": 28391712768.0, "5175": 28391712768.0, "5180": 28391712768.0, "5185": 28391712768.0, "5190": 28391712768.0, "5195": 28391712768.0, "5200": 28391712768.0, "5205": 28391712768.0, "5210": 28391712768.0, "5215": 28391712768.0, "5220": 28391712768.0, "5225": 28391712768.0, "5230": 28391712768.0, "5235": 28391712768.0, "5240": 28391712768.0, "5245": 28391712768.0, "5250": 28391712768.0, "5255": 28391712768.0, "5260": 28391712768.0, "5265": 28391712768.0, "5270": 28391712768.0, "5275": 28391712768.0, "5280": 28391712768.0, "5285": 28391712768.0, "5290": 28391712768.0, "5295": 28391712768.0, "5300": 28391712768.0, "5305": 28391712768.0, "5310": 28391712768.0, "5315": 28391712768.0, "5320": 28391712768.0, "5325": 28391712768.0, "5330": 28391712768.0, "5335": 28391712768.0, "5340": 28391712768.0, "5345": 28391712768.0, "5350": 28391712768.0, "5355": 28391712768.0, "5360": 28391712768.0, "5365": 28391712768.0, "5370": 28391712768.0, "5375": 28391712768.0, "5380": 28391712768.0, "5385": 28391712768.0, "5390": 28391712768.0, "5395": 28391712768.0, "5400": 28391712768.0, "5405": 28391712768.0, "5410": 28391712768.0, "5415": 28391712768.0, "5420": 28391712768.0, "5425": 28391712768.0, "5430": 28391712768.0, "5435": 28391712768.0, "5440": 28391712768.0, "5445": 28391712768.0, "5450": 28391712768.0, "5455": 28391712768.0, "5460": 28391712768.0, "5465": 28391712768.0, "5470": 28391712768.0, "5475": 28391712768.0, "5480": 28391712768.0, "5485": 28391712768.0, "5490": 28391712768.0, "5495": 28391712768.0, "5500": 28391712768.0, "5505": 28391712768.0, "5510": 28391712768.0, "5515": 28391712768.0, "5520": 28391712768.0, "5525": 28391712768.0, "5530": 28391712768.0, "5535": 28391712768.0, "5540": 28391712768.0, "5545": 28391712768.0, "5550": 28391712768.0, "5555": 28391712768.0, "5560": 28391712768.0, "5565": 28391712768.0, "5570": 28391712768.0, "5575": 28391712768.0, "5580": 28391712768.0, "5585": 28391712768.0, "5590": 28391712768.0, "5595": 28391712768.0, "5600": 28391712768.0, "5605": 28391712768.0, "5610": 28391712768.0, "5615": 28391712768.0, "5620": 28391712768.0, "5625": 28391712768.0, "5630": 28391712768.0, "5635": 28391712768.0, "5640": 28391712768.0, "5645": 28391712768.0, "5650": 28391712768.0, "5655": 28391712768.0, "5660": 28391712768.0, "5665": 28391712768.0, "5670": 28391712768.0, "5675": 28391712768.0, "5680": 28391712768.0, "5685": 28391712768.0, "5690": 28391712768.0, "5695": 28391712768.0, "5700": 28391712768.0, "5705": 28391712768.0, "5710": 28391712768.0, "5715": 28391712768.0, "5720": 28391712768.0, "5725": 28391712768.0, "5730": 28391712768.0, "5735": 28391712768.0, "5740": 28391712768.0, "5745": 28391712768.0, "5750": 28391712768.0, "5755": 28391712768.0, "5760": 28391712768.0, "5765": 28391712768.0, "5770": 28391712768.0, "5775": 28391712768.0, "5780": 28391712768.0, "5785": 28391712768.0, "5790": 28391712768.0, "5795": 28391712768.0, "5800": 28391712768.0, "5805": 28391712768.0, "5810": 28391712768.0, "5815": 28391712768.0, "5820": 28391712768.0, "5825": 28391712768.0, "5830": 28391712768.0, "5835": 28391712768.0, "5840": 28391712768.0, "5845": 28391712768.0, "5850": 28391712768.0, "5855": 28391712768.0, "5860": 28391712768.0, "5865": 28391712768.0, "5870": 28391712768.0, "5875": 28391712768.0, "5880": 28391712768.0, "5885": 28391712768.0, "5890": 28391712768.0, "5895": 28391712768.0, "5900": 28391712768.0, "5905": 28391712768.0, "5910": 28391712768.0, "5915": 28391712768.0, "5920": 28391712768.0, "5925": 28391712768.0, "5930": 28391712768.0, "5935": 28391712768.0, "5940": 28391712768.0, "5945": 28391712768.0, "5950": 28391712768.0, "5955": 28391712768.0, "5960": 28391712768.0, "5965": 28391712768.0, "5970": 28391712768.0, "5975": 28391712768.0, "5980": 28391712768.0, "5985": 28391712768.0, "5990": 28391712768.0, "5995": 28391712768.0, "6000": 28391712768.0, "6005": 28391712768.0, "6010": 28391712768.0, "6015": 28391712768.0, "6020": 28391712768.0, "6025": 28391712768.0, "6030": 28391712768.0, "6035": 28391712768.0, "6040": 28391712768.0, "6045": 28391712768.0, "6050": 28391712768.0, "6055": 28391712768.0, "6060": 28391712768.0, "6065": 28391712768.0, "6070": 28391712768.0, "6075": 28391712768.0, "6080": 28391712768.0, "6085": 28391712768.0, "6090": 28391712768.0, "6095": 28391712768.0, "6100": 28391712768.0, "6105": 28391712768.0, "6110": 28391712768.0, "6115": 28391712768.0, "6120": 28391712768.0, "6125": 28391712768.0, "6130": 28391712768.0, "6135": 28391712768.0, "6140": 28391712768.0, "6145": 28391712768.0, "6150": 28391712768.0, "6155": 28391712768.0, "6160": 28391712768.0, "6165": 28391712768.0, "6170": 28391712768.0, "6175": 28391712768.0, "6180": 28391712768.0, "6185": 28391712768.0, "6190": 28391712768.0, "6195": 28391712768.0, "6200": 28391712768.0, "6205": 28391712768.0, "6210": 28391712768.0, "6215": 28391712768.0, "6220": 28391712768.0, "6225": 28391712768.0, "6230": 28391712768.0, "6235": 28391712768.0, "6240": 28391712768.0, "6245": 28391712768.0, "6250": 28391712768.0, "6255": 28391712768.0, "6260": 28391712768.0, "6265": 28391712768.0, "6270": 28391712768.0, "6275": 28391712768.0, "6280": 28391712768.0, "6285": 28391712768.0, "6290": 28391712768.0, "6295": 28391712768.0, "6300": 28391712768.0, "6305": 28391712768.0, "6310": 28391712768.0, "6315": 28391712768.0, "6320": 28391712768.0, "6325": 28391712768.0, "6330": 28391712768.0, "6335": 28391712768.0, "6340": 28391712768.0, "6345": 28391712768.0, "6350": 28391712768.0, "6355": 28391712768.0, "6360": 28391712768.0, "6365": 28391712768.0, "6370": 28391712768.0, "6375": 28391712768.0, "6380": 28391712768.0, "6385": 28391712768.0, "6390": 28391712768.0, "6395": 28391712768.0, "6400": 28391712768.0, "6405": 28391712768.0, "6410": 28391712768.0, "6415": 28391712768.0, "6420": 28391712768.0, "6425": 28391712768.0, "6430": 28391712768.0, "6435": 28391712768.0, "6440": 28391712768.0, "6445": 28391712768.0, "6450": 28391712768.0, "6455": 28391712768.0, "6460": 28391712768.0, "6465": 28391712768.0, "6470": 28391712768.0, "6475": 28391712768.0, "6480": 28391712768.0, "6485": 28391712768.0, "6490": 28391712768.0, "6495": 28391712768.0, "6500": 28391712768.0, "6505": 28391712768.0, "6510": 28391712768.0, "6515": 28391712768.0, "6520": 28391712768.0, "6525": 28391712768.0, "6530": 28391712768.0, "6535": 28391712768.0, "6540": 28391712768.0, "6545": 28391712768.0, "6550": 28391712768.0, "6555": 28391712768.0, "6560": 28391712768.0, "6565": 28391712768.0, "6570": 28391712768.0, "6575": 28391712768.0, "6580": 28391712768.0, "6585": 28391712768.0, "6590": 28391712768.0, "6595": 28391712768.0, "6600": 28391712768.0, "6605": 28391712768.0, "6610": 28391712768.0, "6615": 28391712768.0, "6620": 28391712768.0, "6625": 28391712768.0, "6630": 28391712768.0, "6635": 28391712768.0, "6640": 28391712768.0, "6645": 28391712768.0, "6650": 28391712768.0, "6655": 28391712768.0, "6660": 28391712768.0, "6665": 28391712768.0, "6670": 28391712768.0, "6675": 28391712768.0, "6680": 28391712768.0, "6685": 28391712768.0, "6690": 28391712768.0, "6695": 28391712768.0, "6700": 28391712768.0, "6705": 28391712768.0, "6710": 28391712768.0, "6715": 28391712768.0, "6720": 28391712768.0, "6725": 28391712768.0, "6730": 28391712768.0, "6735": 28391712768.0, "6740": 28391712768.0, "6745": 28391712768.0, "6750": 28391712768.0, "6755": 28391712768.0, "6760": 28391712768.0, "6765": 28391712768.0, "6770": 28391712768.0, "6775": 28391712768.0, "6780": 28391712768.0, "6785": 28391712768.0, "6790": 28391712768.0, "6795": 28391712768.0, "6800": 28391712768.0, "6805": 28391712768.0, "6810": 28391712768.0, "6815": 28391712768.0, "6820": 28391712768.0, "6825": 28391712768.0, "6830": 28391712768.0, "6835": 28391712768.0, "6840": 28391712768.0, "6845": 28391712768.0, "6850": 28391712768.0, "6855": 28391712768.0, "6860": 28391712768.0, "6865": 28391712768.0, "6870": 28391712768.0, "6875": 28391712768.0, "6880": 28391712768.0, "6885": 28391712768.0, "6890": 28391712768.0, "6895": 28391712768.0, "6900": 28391712768.0, "6905": 28391712768.0, "6910": 28391712768.0, "6915": 28391712768.0, "6920": 28391712768.0, "6925": 28391712768.0, "6930": 28391712768.0, "6935": 28391712768.0, "6940": 28391712768.0, "6945": 28391712768.0, "6950": 28391712768.0, "6955": 28391712768.0, "6960": 28391712768.0, "6965": 28391712768.0, "6970": 28391712768.0, "6975": 28391712768.0, "6980": 28391712768.0, "6985": 28391712768.0, "6990": 28391712768.0, "6995": 28391712768.0, "7000": 28391712768.0, "7005": 28391712768.0, "7010": 28391712768.0, "7015": 28391712768.0, "7020": 28391712768.0, "7025": 28391712768.0, "7030": 28391712768.0, "7035": 28391712768.0, "7040": 28391712768.0, "7045": 28391712768.0, "7050": 28391712768.0, "7055": 28391712768.0, "7060": 28391712768.0, "7065": 28391712768.0, "7070": 28391712768.0, "7075": 28391712768.0, "7080": 28391712768.0, "7085": 28391712768.0, "7090": 28391712768.0, "7095": 28391712768.0, "7100": 28391712768.0, "7105": 28391712768.0, "7110": 28391712768.0, "7115": 28391712768.0, "7120": 28391712768.0, "7125": 28391712768.0, "7130": 28391712768.0, "7135": 28391712768.0, "7140": 28391712768.0, "7145": 28391712768.0, "7150": 28391712768.0, "7155": 28391712768.0, "7160": 28391712768.0, "7165": 28391712768.0, "7170": 28391712768.0, "7175": 28391712768.0, "7180": 28391712768.0, "7185": 28391712768.0, "7190": 28391712768.0, "7195": 28391712768.0, "7200": 28391712768.0, "7205": 28391712768.0, "7210": 28391712768.0, "7215": 28391712768.0, "7220": 28391712768.0, "7225": 28391712768.0, "7230": 28391712768.0, "7235": 28391712768.0, "7240": 28391712768.0, "7245": 28391712768.0, "7250": 28391712768.0, "7255": 28391712768.0, "7260": 28391712768.0, "7265": 28391712768.0, "7270": 28391712768.0, "7275": 28391712768.0, "7280": 28391712768.0, "7285": 28391712768.0, "7290": 28391712768.0, "7295": 28391712768.0, "7300": 28391712768.0, "7305": 28391712768.0, "7310": 28391712768.0, "7315": 28391712768.0, "7320": 28391712768.0, "7325": 28391712768.0, "7330": 28391712768.0, "7335": 28391712768.0, "7340": 28391712768.0, "7345": 28391712768.0, "7350": 28391712768.0, "7355": 28391712768.0, "7360": 28391712768.0, "7365": 28391712768.0, "7370": 28391712768.0, "7375": 28391712768.0, "7380": 28391712768.0, "7385": 28391712768.0, "7390": 28391712768.0, "7395": 28391712768.0, "7400": 28391712768.0, "7405": 28391712768.0, "7410": 28391712768.0, "7415": 28391712768.0, "7420": 28391712768.0, "7425": 28391712768.0, "7430": 28391712768.0, "7435": 28391712768.0, "7440": 28391712768.0, "7445": 28391712768.0, "7450": 28391712768.0, "7455": 28391712768.0, "7460": 28391712768.0, "7465": 28391712768.0, "7470": 28391712768.0, "7475": 28391712768.0, "7480": 28391712768.0, "7485": 28391712768.0, "7490": 28391712768.0, "7495": 28391712768.0, "7500": 28391712768.0, "7505": 28391712768.0, "7510": 28391712768.0, "7515": 28391712768.0, "7520": 28391712768.0, "7525": 28391712768.0, "7530": 28391712768.0, "7535": 28391712768.0, "7540": 28391712768.0, "7545": 28391712768.0, "7550": 28391712768.0, "7555": 28391712768.0, "7560": 28391712768.0, "7565": 28391712768.0, "7570": 28391712768.0, "7575": 28391712768.0, "7580": 28391712768.0, "7585": 28391712768.0, "7590": 28391712768.0, "7595": 28391712768.0, "7600": 28391712768.0, "7605": 28391712768.0, "7610": 28391712768.0, "7615": 28391712768.0, "7620": 28391712768.0, "7625": 28391712768.0, "7630": 28391712768.0, "7635": 28391712768.0, "7640": 28391712768.0, "7645": 28391712768.0, "7650": 28391712768.0, "7655": 28391712768.0, "7660": 28391712768.0, "7665": 28391712768.0, "7670": 28391712768.0, "7675": 28391712768.0, "7680": 28391712768.0, "7685": 28391712768.0, "7690": 28391712768.0, "7695": 28391712768.0, "7700": 28391712768.0, "7705": 28391712768.0, "7710": 28391712768.0, "7715": 28391712768.0, "7720": 28391712768.0, "7725": 28391712768.0, "7730": 28391712768.0, "7735": 28391712768.0, "7740": 28391712768.0, "7745": 28391712768.0, "7750": 28391712768.0, "7755": 28391712768.0, "7760": 28391712768.0, "7765": 28391712768.0, "7770": 28391712768.0, "7775": 28391712768.0, "7780": 28391712768.0, "7785": 28391712768.0, "7790": 28391712768.0, "7795": 28391712768.0, "7800": 28391712768.0, "7805": 28391712768.0, "7810": 28391712768.0, "7815": 28391712768.0, "7820": 28391712768.0, "7825": 28391712768.0, "7830": 28391712768.0, "7835": 28391712768.0, "7840": 28391712768.0, "7845": 28391712768.0, "7850": 28391712768.0, "7855": 28391712768.0, "7860": 28391712768.0, "7865": 28391712768.0, "7870": 28391712768.0, "7875": 28391712768.0, "7880": 28391712768.0, "7885": 28391712768.0, "7890": 28391712768.0, "7895": 28391712768.0, "7900": 28391712768.0, "7905": 28391712768.0, "7910": 28391712768.0, "7915": 28391712768.0, "7920": 28391712768.0, "7925": 28391712768.0, "7930": 28391712768.0, "7935": 28391712768.0, "7940": 28391712768.0, "7945": 28391712768.0, "7950": 28391712768.0, "7955": 28391712768.0, "7960": 28391712768.0, "7965": 28391712768.0, "7970": 28391712768.0, "7975": 28391712768.0, "7980": 28391712768.0, "7985": 28391712768.0, "7990": 28391712768.0, "7995": 28391712768.0, "8000": 28391712768.0, "8005": 28391712768.0, "8010": 28391712768.0, "8015": 28391712768.0, "8020": 28391712768.0, "8025": 28391712768.0, "8030": 28391712768.0, "8035": 28391712768.0, "8040": 28391712768.0, "8045": 28391712768.0, "8050": 28391712768.0, "8055": 28391712768.0, "8060": 28391712768.0, "8065": 28391712768.0, "8070": 28391712768.0, "8075": 28391712768.0, "8080": 28391712768.0, "8085": 28391712768.0, "8090": 28391712768.0, "8095": 28391712768.0, "8100": 28391712768.0, "8105": 28391712768.0, "8110": 28391712768.0, "8115": 28391712768.0, "8120": 28391712768.0, "8125": 28391712768.0, "8130": 28391712768.0, "8135": 28391712768.0, "8140": 28391712768.0, "8145": 28391712768.0, "8150": 28391712768.0, "8155": 28391712768.0, "8160": 28391712768.0, "8165": 28391712768.0, "8170": 28391712768.0, "8175": 28391712768.0, "8180": 28391712768.0, "8185": 28391712768.0, "8190": 28391712768.0, "8195": 28391712768.0, "8200": 28391712768.0, "8205": 28391712768.0, "8210": 28391712768.0, "8215": 28391712768.0, "8220": 28391712768.0, "8225": 28391712768.0, "8230": 28391712768.0, "8235": 28391712768.0, "8240": 28391712768.0, "8245": 28391712768.0, "8250": 28391712768.0, "8255": 28391712768.0, "8260": 28391712768.0, "8265": 28391712768.0, "8270": 28391712768.0, "8275": 28391712768.0, "8280": 28391712768.0, "8285": 28391712768.0, "8290": 28391712768.0, "8295": 28391712768.0, "8300": 28391712768.0, "8305": 28391712768.0, "8310": 28391712768.0, "8315": 28391712768.0, "8320": 28391712768.0, "8325": 28391712768.0, "8330": 28391712768.0, "8335": 28391712768.0, "8340": 28391712768.0, "8345": 28391712768.0, "8350": 28391712768.0, "8355": 28391712768.0, "8360": 28391712768.0, "8365": 28391712768.0, "8370": 28391712768.0, "8375": 28391712768.0, "8380": 28391712768.0, "8385": 28391712768.0, "8390": 28391712768.0, "8395": 28391712768.0, "8400": 28391712768.0, "8405": 28391712768.0, "8410": 28391712768.0, "8415": 28391712768.0, "8420": 28391712768.0, "8425": 28391712768.0, "8430": 28391712768.0, "8435": 28391712768.0, "8440": 28391712768.0, "8445": 28391712768.0, "8450": 28391712768.0, "8455": 28391712768.0, "8460": 28391712768.0, "8465": 28391712768.0, "8470": 28391712768.0, "8475": 28391712768.0, "8480": 28391712768.0, "8485": 28391712768.0, "8490": 28391712768.0, "8495": 28391712768.0, "8500": 28391712768.0, "8505": 28391712768.0, "8510": 28391712768.0, "8515": 28391712768.0, "8520": 28391712768.0, "8525": 28391712768.0, "8530": 28391712768.0, "8535": 28391712768.0, "8540": 28391712768.0, "8545": 28391712768.0, "8550": 28391712768.0, "8555": 28391712768.0, "8560": 28391712768.0, "8565": 28391712768.0, "8570": 28391712768.0, "8575": 28391712768.0, "8580": 28391712768.0, "8585": 28391712768.0, "8590": 28391712768.0, "8595": 28391712768.0, "8600": 28391712768.0, "8605": 28391712768.0, "8610": 28391712768.0, "8615": 28391712768.0, "8620": 28391712768.0, "8625": 28391712768.0, "8630": 28391712768.0, "8635": 28391712768.0, "8640": 28391712768.0, "8645": 28391712768.0, "8650": 28391712768.0, "8655": 28391712768.0, "8660": 28391712768.0, "8665": 28391712768.0, "8670": 28391712768.0, "8675": 28391712768.0, "8680": 28391712768.0, "8685": 28391712768.0, "8690": 28391712768.0, "8695": 28391712768.0, "8700": 28391712768.0, "8705": 28391712768.0, "8710": 28391712768.0, "8715": 28391712768.0, "8720": 28391712768.0, "8725": 28391712768.0, "8730": 28391712768.0, "8735": 28391712768.0, "8740": 28391712768.0, "8745": 28391712768.0, "8750": 28391712768.0, "8755": 28391712768.0, "8760": 28391712768.0, "8765": 28391712768.0, "8770": 28391712768.0, "8775": 28391712768.0, "8780": 28391712768.0, "8785": 28391712768.0, "8790": 28391712768.0, "8795": 28391712768.0, "8800": 28391712768.0, "8805": 28391712768.0, "8810": 28391712768.0, "8815": 28391712768.0, "8820": 28391712768.0, "8825": 28391712768.0, "8830": 28391712768.0, "8835": 28391712768.0, "8840": 28391712768.0, "8845": 28391712768.0, "8850": 28391712768.0, "8855": 28391712768.0, "8860": 28391712768.0, "8865": 28391712768.0, "8870": 28391712768.0, "8875": 28391712768.0, "8880": 28391712768.0, "8885": 28391712768.0, "8890": 28391712768.0, "8895": 28391712768.0, "8900": 28391712768.0, "8905": 28391712768.0, "8910": 28391712768.0, "8915": 28391712768.0, "8920": 28391712768.0, "8925": 28391712768.0, "8930": 28391712768.0, "8935": 28391712768.0, "8940": 28391712768.0, "8945": 28391712768.0, "8950": 28391712768.0, "8955": 28391712768.0, "8960": 28391712768.0, "8965": 28391712768.0, "8970": 28391712768.0, "8975": 28391712768.0, "8980": 28391712768.0, "8985": 28391712768.0, "8990": 28391712768.0, "8995": 28391712768.0, "9000": 28391712768.0, "9005": 28391712768.0, "9010": 28391712768.0, "9015": 28391712768.0, "9020": 28391712768.0, "9025": 28391712768.0, "9030": 28391712768.0, "9035": 28391712768.0, "9040": 28391712768.0, "9045": 28391712768.0, "9050": 28391712768.0, "9055": 28391712768.0, "9060": 28391712768.0, "9065": 28391712768.0, "9070": 28391712768.0, "9075": 28391712768.0, "9080": 28391712768.0, "9085": 28391712768.0, "9090": 28391712768.0, "9095": 28391712768.0, "9100": 28391712768.0, "9105": 28391712768.0, "9110": 28391712768.0, "9115": 28391712768.0, "9120": 28391712768.0, "9125": 28391712768.0, "9130": 28391712768.0, "9135": 28391712768.0, "9140": 28391712768.0, "9145": 28391712768.0, "9150": 28391712768.0, "9155": 28391712768.0, "9160": 28391712768.0, "9165": 28391712768.0, "9170": 28391712768.0, "9175": 28391712768.0, "9180": 28391712768.0, "9185": 28391712768.0, "9190": 28391712768.0, "9195": 28391712768.0, "9200": 28391712768.0, "9205": 28391712768.0, "9210": 28391712768.0, "9215": 28391712768.0, "9220": 28391712768.0, "9225": 28391712768.0, "9230": 28391712768.0, "9235": 28391712768.0, "9240": 28391712768.0, "9245": 28391712768.0, "9250": 28391712768.0, "9255": 28391712768.0, "9260": 28391712768.0, "9265": 28391712768.0, "9270": 28391712768.0, "9275": 28391712768.0, "9280": 28391712768.0, "9285": 28391712768.0, "9290": 28391712768.0, "9295": 28391712768.0, "9300": 28391712768.0, "9305": 28391712768.0, "9310": 28391712768.0, "9315": 28391712768.0, "9320": 28391712768.0, "9325": 28391712768.0, "9330": 28391712768.0, "9335": 28391712768.0, "9340": 28391712768.0, "9345": 28391712768.0, "9350": 28391712768.0, "9355": 28391712768.0, "9360": 28391712768.0, "9365": 28391712768.0, "9370": 28391712768.0, "9375": 28391712768.0, "9380": 28391712768.0, "9385": 28391712768.0, "9390": 28391712768.0, "9395": 28391712768.0, "9400": 28391712768.0, "9405": 28391712768.0, "9410": 28391712768.0, "9415": 28391712768.0, "9420": 28391712768.0, "9425": 28391712768.0, "9430": 28391712768.0, "9435": 28391712768.0, "9440": 28391712768.0, "9445": 28391712768.0, "9450": 28391712768.0, "9455": 28391712768.0, "9460": 28391712768.0, "9465": 28391712768.0, "9470": 28391712768.0, "9475": 28391712768.0, "9480": 28391712768.0, "9485": 28391712768.0, "9490": 28391712768.0, "9495": 28391712768.0, "9500": 28391712768.0, "9505": 28391712768.0, "9510": 28391712768.0, "9515": 28391712768.0, "9520": 28391712768.0, "9525": 28391712768.0, "9530": 28391712768.0, "9535": 28391712768.0, "9540": 28391712768.0, "9545": 28391712768.0, "9550": 28391712768.0, "9555": 28391712768.0, "9560": 28391712768.0, "9565": 28391712768.0, "9570": 28391712768.0, "9575": 28391712768.0, "9580": 28391712768.0, "9585": 28391712768.0, "9590": 28391712768.0, "9595": 28391712768.0, "9600": 28391712768.0, "9605": 28391712768.0, "9610": 28391712768.0, "9615": 28391712768.0, "9620": 28391712768.0, "9625": 28391712768.0, "9630": 28391712768.0, "9635": 28391712768.0, "9640": 28391712768.0, "9645": 28391712768.0, "9650": 28391712768.0, "9655": 28391712768.0, "9660": 28391712768.0, "9665": 28391712768.0, "9670": 28391712768.0, "9675": 28391712768.0, "9680": 28391712768.0, "9685": 28391712768.0, "9690": 28391712768.0, "9695": 28391712768.0, "9700": 28391712768.0, "9705": 28391712768.0, "9710": 28391712768.0, "9715": 28391712768.0, "9720": 28391712768.0, "9725": 28391712768.0, "9730": 28391712768.0, "9735": 28391712768.0, "9740": 28391712768.0, "9745": 28391712768.0, "9750": 28391712768.0, "9755": 28391712768.0, "9760": 28391712768.0, "9765": 28391712768.0, "9770": 28391712768.0, "9775": 28391712768.0, "9780": 28391712768.0, "9785": 28391712768.0, "9790": 28391712768.0, "9795": 28391712768.0, "9800": 28391712768.0, "9805": 28391712768.0, "9810": 28391712768.0, "9815": 28391712768.0, "9820": 28391712768.0, "9825": 28391712768.0, "9830": 28391712768.0, "9835": 28391712768.0, "9840": 28391712768.0, "9845": 28391712768.0, "9850": 28391712768.0, "9855": 28391712768.0, "9860": 28391712768.0, "9865": 28391712768.0, "9870": 28391712768.0, "9875": 28391712768.0, "9880": 28391712768.0, "9885": 28391712768.0, "9890": 28391712768.0, "9895": 28391712768.0, "9900": 28391712768.0, "9905": 28391712768.0, "9910": 28391712768.0, "9915": 28391712768.0, "9920": 28391712768.0, "9925": 28391712768.0, "9930": 28391712768.0, "9935": 28391712768.0, "9940": 28391712768.0, "9945": 28391712768.0, "9950": 28391712768.0, "9955": 28391712768.0, "9960": 28391712768.0, "9965": 28391712768.0, "9970": 28391712768.0, "9975": 28391712768.0, "9980": 28391712768.0, "9985": 28391712768.0, "9990": 28391712768.0, "9995": 28391712768.0, "10000": 28391712768.0, "10005": 28391712768.0, "10010": 28391712768.0, "10015": 28391712768.0, "10020": 28391712768.0, "10025": 28391712768.0, "10030": 28391712768.0, "10035": 28391712768.0, "10040": 28391712768.0, "10045": 28391712768.0, "10050": 28391712768.0, "10055": 28391712768.0, "10060": 28391712768.0, "10065": 28391712768.0, "10070": 28391712768.0, "10075": 28391712768.0, "10080": 28391712768.0, "10085": 28391712768.0, "10090": 28391712768.0, "10095": 28391712768.0, "10100": 28391712768.0, "10105": 28391712768.0, "10110": 28391712768.0, "10115": 28391712768.0, "10120": 28391712768.0, "10125": 28391712768.0, "10130": 28391712768.0, "10135": 28391712768.0, "10140": 28391712768.0, "10145": 28391712768.0, "10150": 28391712768.0, "10155": 28391712768.0, "10160": 28391712768.0, "10165": 28391712768.0, "10170": 28391712768.0, "10175": 28391712768.0, "10180": 28391712768.0, "10185": 28391712768.0, "10190": 28391712768.0, "10195": 28391712768.0, "10200": 28391712768.0, "10205": 28391712768.0, "10210": 28391712768.0, "10215": 28391712768.0, "10220": 28391712768.0, "10225": 28391712768.0, "10230": 28391712768.0, "10235": 28391712768.0, "10240": 28391712768.0, "10245": 28391712768.0, "10250": 28391712768.0, "10255": 28391712768.0, "10260": 28391712768.0, "10265": 28391712768.0, "10270": 28391712768.0, "10275": 28391712768.0, "10280": 28391712768.0, "10285": 28391712768.0, "10290": 28391712768.0, "10295": 28391712768.0, "10300": 28391712768.0, "10305": 28391712768.0, "10310": 28391712768.0, "10315": 28391712768.0, "10320": 28391712768.0, "10325": 28391712768.0, "10330": 28391712768.0, "10335": 28391712768.0, "10340": 28391712768.0, "10345": 28391712768.0, "10350": 28391712768.0, "10355": 28391712768.0, "10360": 28391712768.0, "10365": 28391712768.0, "10370": 28391712768.0, "10375": 28391712768.0, "10380": 28391712768.0, "10385": 28391712768.0, "10390": 28391712768.0, "10395": 28391712768.0, "10400": 28391712768.0, "10405": 28391712768.0, "10410": 28391712768.0, "10415": 28391712768.0, "10420": 28391712768.0, "10425": 28391712768.0, "10430": 28391712768.0, "10435": 28391712768.0, "10440": 28391712768.0, "10445": 28391712768.0, "10450": 28391712768.0, "10455": 28391712768.0, "10460": 28391712768.0, "10465": 28391712768.0, "10470": 28391712768.0, "10475": 28391712768.0, "10480": 28391712768.0, "10485": 28391712768.0, "10490": 28391712768.0, "10495": 28391712768.0, "10500": 28391712768.0, "10505": 28391712768.0, "10510": 28391712768.0, "10515": 28391712768.0, "10520": 28391712768.0, "10525": 28391712768.0, "10530": 28391712768.0, "10535": 28391712768.0, "10540": 28391712768.0, "10545": 28391712768.0, "10550": 28391712768.0, "10555": 28391712768.0, "10560": 28391712768.0, "10565": 28391712768.0, "10570": 28391712768.0, "10575": 28391712768.0, "10580": 28391712768.0, "10585": 28391712768.0, "10590": 28391712768.0, "10595": 28391712768.0, "10600": 28391712768.0, "10605": 28391712768.0, "10610": 28391712768.0, "10615": 28391712768.0, "10620": 28391712768.0, "10625": 28391712768.0, "10630": 28391712768.0, "10635": 28391712768.0, "10640": 28391712768.0, "10645": 28391712768.0, "10650": 28391712768.0, "10655": 28391712768.0, "10660": 28391712768.0, "10665": 28391712768.0, "10670": 28391712768.0, "10675": 28391712768.0, "10680": 28391712768.0, "10685": 28391712768.0, "10690": 28391712768.0, "10695": 28391712768.0, "10700": 28391712768.0, "10705": 28391712768.0, "10710": 28391712768.0, "10715": 28391712768.0, "10720": 28391712768.0, "10725": 28391712768.0, "10730": 28391712768.0, "10735": 28391712768.0, "10740": 28391712768.0, "10745": 28391712768.0, "10750": 28391712768.0, "10755": 28391712768.0, "10760": 28391712768.0, "10765": 28391712768.0, "10770": 28391712768.0, "10775": 28391712768.0, "10780": 28391712768.0, "10785": 28391712768.0, "10790": 28391712768.0, "10795": 28391712768.0, "10800": 28391712768.0, "10805": 28391712768.0, "10810": 28391712768.0, "10815": 28391712768.0, "10820": 28391712768.0, "10825": 28391712768.0, "10830": 28391712768.0, "10835": 28391712768.0, "10840": 28391712768.0, "10845": 28391712768.0, "10850": 28391712768.0, "10855": 28391712768.0, "10860": 28391712768.0, "10865": 28391712768.0, "10870": 28391712768.0, "10875": 28391712768.0, "10880": 28391712768.0, "10885": 28391712768.0, "10890": 28391712768.0, "10895": 28391712768.0, "10900": 28391712768.0, "10905": 28391712768.0, "10910": 28391712768.0, "10915": 28391712768.0, "10920": 28391712768.0, "10925": 28391712768.0, "10930": 28391712768.0, "10935": 28391712768.0, "10940": 28391712768.0, "10945": 28391712768.0, "10950": 28391712768.0, "10955": 28391712768.0, "10960": 28391712768.0, "10965": 28391712768.0, "10970": 28391712768.0, "10975": 28391712768.0, "10980": 28391712768.0, "10985": 28391712768.0, "10990": 28391712768.0, "10995": 28391712768.0, "11000": 28391712768.0, "11005": 28391712768.0, "11010": 28391712768.0, "11015": 28391712768.0, "11020": 28391712768.0, "11025": 28391712768.0, "11030": 28391712768.0, "11035": 28391712768.0, "11040": 28391712768.0, "11045": 28391712768.0, "11050": 28391712768.0, "11055": 28391712768.0, "11060": 28391712768.0, "11065": 28391712768.0, "11070": 28391712768.0, "11075": 28391712768.0, "11080": 28391712768.0, "11085": 28391712768.0, "11090": 28391712768.0, "11095": 28391712768.0, "11100": 28391712768.0, "11105": 28391712768.0, "11110": 28391712768.0, "11115": 28391712768.0, "11120": 28391712768.0, "11125": 28391712768.0, "11130": 28391712768.0, "11135": 28391712768.0, "11140": 28391712768.0, "11145": 28391712768.0, "11150": 28391712768.0, "11155": 28391712768.0, "11160": 28391712768.0, "11165": 28391712768.0, "11170": 28391712768.0, "11175": 28391712768.0, "11180": 28391712768.0, "11185": 28391712768.0, "11190": 28391712768.0, "11195": 28391712768.0, "11200": 28391712768.0, "11205": 28391712768.0, "11210": 28391712768.0, "11215": 28391712768.0, "11220": 28391712768.0, "11225": 28391712768.0, "11230": 28391712768.0, "11235": 28391712768.0, "11240": 28391712768.0, "11245": 28391712768.0, "11250": 28391712768.0, "11255": 28391712768.0, "11260": 28391712768.0, "11265": 28391712768.0, "11270": 28391712768.0, "11275": 28391712768.0, "11280": 28391712768.0, "11285": 28391712768.0, "11290": 28391712768.0, "11295": 28391712768.0, "11300": 28391712768.0, "11305": 28391712768.0, "11310": 28391712768.0, "11315": 28391712768.0, "11320": 28391712768.0, "11325": 28391712768.0, "11330": 28391712768.0, "11335": 28391712768.0, "11340": 28391712768.0, "11345": 28391712768.0, "11350": 28391712768.0, "11355": 28391712768.0, "11360": 28391712768.0, "11365": 28391712768.0, "11370": 28391712768.0, "11375": 28391712768.0, "11380": 28391712768.0, "11385": 28391712768.0, "11390": 28391712768.0, "11395": 28391712768.0, "11400": 28391712768.0, "11405": 28391712768.0, "11410": 28391712768.0, "11415": 28391712768.0, "11420": 28391712768.0, "11425": 28391712768.0, "11430": 28391712768.0, "11435": 28391712768.0, "11440": 28391712768.0, "11445": 28391712768.0, "11450": 28391712768.0, "11455": 28391712768.0, "11460": 28391712768.0, "11465": 28391712768.0, "11470": 28391712768.0, "11475": 28391712768.0, "11480": 28391712768.0, "11485": 28391712768.0, "11490": 28391712768.0, "11495": 28391712768.0, "11500": 28391712768.0, "11505": 28391712768.0, "11510": 28391712768.0, "11515": 28391712768.0, "11520": 28391712768.0, "11525": 28391712768.0, "11530": 28391712768.0, "11535": 28391712768.0, "11540": 28391712768.0, "11545": 28391712768.0, "11550": 28391712768.0, "11555": 28391712768.0, "11560": 28391712768.0, "11565": 28391712768.0, "11570": 28391712768.0, "11575": 28391712768.0, "11580": 28391712768.0, "11585": 28391712768.0, "11590": 28391712768.0, "11595": 28391712768.0, "11600": 28391712768.0, "11605": 28391712768.0, "11610": 28391712768.0, "11615": 28391712768.0, "11620": 28391712768.0, "11625": 28391712768.0, "11630": 28391712768.0, "11635": 28391712768.0, "11640": 28391712768.0, "11645": 28391712768.0, "11650": 28391712768.0, "11655": 28391712768.0, "11660": 28391712768.0, "11665": 28391712768.0, "11670": 28391712768.0, "11675": 28391712768.0, "11680": 28391712768.0, "11685": 28391712768.0, "11690": 28391712768.0, "11695": 28391712768.0, "11700": 28391712768.0, "11705": 28391712768.0, "11710": 28391712768.0, "11715": 28391712768.0, "11720": 28391712768.0, "11725": 28391712768.0, "11730": 28391712768.0, "11735": 28391712768.0, "11740": 28391712768.0, "11745": 28391712768.0, "11750": 28391712768.0, "11755": 28391712768.0, "11760": 28391712768.0, "11765": 28391712768.0, "11770": 28391712768.0, "11775": 28391712768.0, "11780": 28391712768.0, "11785": 28391712768.0, "11790": 28391712768.0, "11795": 28391712768.0, "11800": 28391712768.0, "11805": 28391712768.0, "11810": 28391712768.0, "11815": 28391712768.0, "11820": 28391712768.0, "11825": 28391712768.0, "11830": 28391712768.0, "11835": 28391712768.0, "11840": 28391712768.0, "11845": 28391712768.0, "11850": 28391712768.0, "11855": 28391712768.0, "11860": 28391712768.0, "11865": 28391712768.0, "11870": 28391712768.0, "11875": 28391712768.0, "11880": 28391712768.0, "11885": 28391712768.0, "11890": 28391712768.0, "11895": 28391712768.0, "11900": 28391712768.0, "11905": 28391712768.0, "11910": 28391712768.0, "11915": 28391712768.0, "11920": 28391712768.0, "11925": 28391712768.0, "11930": 28391712768.0, "11935": 28391712768.0, "11940": 28391712768.0, "11945": 28391712768.0, "11950": 28391712768.0, "11955": 28391712768.0, "11960": 28391712768.0, "11965": 28391712768.0, "11970": 28391712768.0, "11975": 28391712768.0, "11980": 28391712768.0, "11985": 28391712768.0, "11990": 28391712768.0, "11995": 28391712768.0, "12000": 28391712768.0, "12005": 28391712768.0, "12010": 28391712768.0, "12015": 28391712768.0, "12020": 28391712768.0, "12025": 28391712768.0, "12030": 28391712768.0, "12035": 28391712768.0, "12040": 28391712768.0, "12045": 28391712768.0, "12050": 28391712768.0, "12055": 28391712768.0, "12060": 28391712768.0, "12065": 28391712768.0, "12070": 28391712768.0, "12075": 28391712768.0, "12080": 28391712768.0, "12085": 28391712768.0, "12090": 28391712768.0, "12095": 28391712768.0, "12100": 28391712768.0, "12105": 28391712768.0, "12110": 28391712768.0, "12115": 28391712768.0, "12120": 28391712768.0, "12125": 28391712768.0, "12130": 28391712768.0, "12135": 28391712768.0, "12140": 28391712768.0, "12145": 28391712768.0, "12150": 28391712768.0, "12155": 28391712768.0, "12160": 28391712768.0, "12165": 28391712768.0, "12170": 28391712768.0, "12175": 28391712768.0, "12180": 28391712768.0, "12185": 28391712768.0, "12190": 28391712768.0, "12195": 28391712768.0, "12200": 28391712768.0, "12205": 28391712768.0, "12210": 28391712768.0, "12215": 28391712768.0, "12220": 28391712768.0, "12225": 28391712768.0, "12230": 28391712768.0, "12235": 28391712768.0, "12240": 28391712768.0, "12245": 28391712768.0, "12250": 28391712768.0, "12255": 28391712768.0, "12260": 28391712768.0, "12265": 28391712768.0, "12270": 28391712768.0, "12275": 28391712768.0, "12280": 28391712768.0, "12285": 28391712768.0, "12290": 28391712768.0, "12295": 28391712768.0, "12300": 28391712768.0, "12305": 28391712768.0, "12310": 28391712768.0, "12315": 28391712768.0, "12320": 28391712768.0, "12325": 28391712768.0, "12330": 28391712768.0, "12335": 28391712768.0, "12340": 28391712768.0, "12345": 28391712768.0, "12350": 28391712768.0, "12355": 28391712768.0, "12360": 28391712768.0, "12365": 28391712768.0, "12370": 28391712768.0, "12375": 28391712768.0, "12380": 28391712768.0, "12385": 28391712768.0, "12390": 28391712768.0, "12395": 28391712768.0, "12400": 28391712768.0, "12405": 28391712768.0, "12410": 28391712768.0, "12415": 28391712768.0, "12420": 28391712768.0, "12425": 28391712768.0, "12430": 28391712768.0, "12435": 28391712768.0, "12440": 28391712768.0, "12445": 28391712768.0, "12450": 28391712768.0, "12455": 28391712768.0, "12460": 28391712768.0, "12465": 28391712768.0, "12470": 28391712768.0, "12475": 28391712768.0, "12480": 28391712768.0, "12485": 28391712768.0, "12490": 28391712768.0, "12495": 28391712768.0, "12500": 28391712768.0, "12505": 28391712768.0, "12510": 28391712768.0, "12515": 28391712768.0, "12520": 28391712768.0, "12525": 28391712768.0, "12530": 28391712768.0, "12535": 28391712768.0, "12540": 28391712768.0, "12545": 28391712768.0, "12550": 28391712768.0, "12555": 28391712768.0, "12560": 28391712768.0, "12565": 28391712768.0, "12570": 28391712768.0, "12575": 28391712768.0, "12580": 28391712768.0, "12585": 28391712768.0, "12590": 28391712768.0, "12595": 28391712768.0, "12600": 28391712768.0, "12605": 28391712768.0, "12610": 28391712768.0, "12615": 28391712768.0, "12620": 28391712768.0, "12625": 28391712768.0, "12630": 28391712768.0, "12635": 28391712768.0, "12640": 28391712768.0, "12645": 28391712768.0, "12650": 28391712768.0, "12655": 28391712768.0, "12660": 28391712768.0, "12665": 28391712768.0, "12670": 28391712768.0, "12675": 28391712768.0, "12680": 28391712768.0, "12685": 28391712768.0, "12690": 28391712768.0, "12695": 28391712768.0, "12700": 28391712768.0, "12705": 28391712768.0, "12710": 28391712768.0, "12715": 28391712768.0, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "iteration-time": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": "nan", "25": "nan", "30": "nan", "35": "nan", "40": "nan", "45": "nan", "50": "nan", "55": "nan", "60": "nan", "65": "nan", "70": "nan", "75": "nan", "80": "nan", "85": "nan", "90": "nan", "95": "nan", "100": 3.55899, "105": "nan", "110": "nan", "115": "nan", "120": "nan", "125": "nan", "130": "nan", "135": "nan", "140": "nan", "145": "nan", "150": "nan", "155": "nan", "160": "nan", "165": "nan", "170": "nan", "175": "nan", "180": "nan", "185": "nan", "190": "nan", "195": "nan", "200": 3.44029, "205": "nan", "210": "nan", "215": "nan", "220": "nan", "225": "nan", "230": "nan", "235": "nan", "240": "nan", "245": "nan", "250": "nan", "255": "nan", "260": "nan", "265": "nan", "270": "nan", "275": "nan", "280": "nan", "285": "nan", "290": "nan", "295": "nan", "300": 3.44419, "305": "nan", "310": "nan", "315": "nan", "320": "nan", "325": "nan", "330": "nan", "335": "nan", "340": "nan", "345": "nan", "350": "nan", "355": "nan", "360": "nan", "365": "nan", "370": "nan", "375": "nan", "380": "nan", "385": "nan", "390": "nan", "395": "nan", "400": 3.44595, "405": "nan", "410": "nan", "415": "nan", "420": "nan", "425": "nan", "430": "nan", "435": "nan", "440": "nan", "445": "nan", "450": "nan", "455": "nan", "460": "nan", "465": "nan", "470": "nan", "475": "nan", "480": "nan", "485": "nan", "490": "nan", "495": "nan", "500": 3.4477, "505": "nan", "510": "nan", "515": "nan", "520": "nan", "525": "nan", "530": "nan", "535": "nan", "540": "nan", "545": "nan", "550": "nan", "555": "nan", "560": "nan", "565": "nan", "570": "nan", "575": "nan", "580": "nan", "585": "nan", "590": "nan", "595": "nan", "600": 3.44601, "605": "nan", "610": "nan", "615": "nan", "620": "nan", "625": "nan", "630": "nan", "635": "nan", "640": "nan", "645": "nan", "650": "nan", "655": "nan", "660": "nan", "665": "nan", "670": "nan", "675": "nan", "680": "nan", "685": "nan", "690": "nan", "695": "nan", "700": 3.4446, "705": "nan", "710": "nan", "715": "nan", "720": "nan", "725": "nan", "730": "nan", "735": "nan", "740": "nan", "745": "nan", "750": "nan", "755": "nan", "760": "nan", "765": "nan", "770": "nan", "775": "nan", "780": "nan", "785": "nan", "790": "nan", "795": "nan", "800": 3.44149, "805": "nan", "810": "nan", "815": "nan", "820": "nan", "825": "nan", "830": "nan", "835": "nan", "840": "nan", "845": "nan", "850": "nan", "855": "nan", "860": "nan", "865": "nan", "870": "nan", "875": "nan", "880": "nan", "885": "nan", "890": "nan", "895": "nan", "900": 3.43766, "905": "nan", "910": "nan", "915": "nan", "920": "nan", "925": "nan", "930": "nan", "935": "nan", "940": "nan", "945": "nan", "950": "nan", "955": "nan", "960": "nan", "965": "nan", "970": "nan", "975": "nan", "980": "nan", "985": "nan", "990": "nan", "995": "nan", "1000": 3.43127, "1005": "nan", "1010": "nan", "1015": "nan", "1020": "nan", "1025": "nan", "1030": "nan", "1035": "nan", "1040": "nan", "1045": "nan", "1050": "nan", "1055": "nan", "1060": "nan", "1065": "nan", "1070": "nan", "1075": "nan", "1080": "nan", "1085": "nan", "1090": "nan", "1095": "nan", "1100": 3.42513, "1105": "nan", "1110": "nan", "1115": "nan", "1120": "nan", "1125": "nan", "1130": "nan", "1135": "nan", "1140": "nan", "1145": "nan", "1150": "nan", "1155": "nan", "1160": "nan", "1165": "nan", "1170": "nan", "1175": "nan", "1180": "nan", "1185": "nan", "1190": "nan", "1195": "nan", "1200": 3.41859, "1205": "nan", "1210": "nan", "1215": "nan", "1220": "nan", "1225": "nan", "1230": "nan", "1235": "nan", "1240": "nan", "1245": "nan", "1250": "nan", "1255": "nan", "1260": "nan", "1265": "nan", "1270": "nan", "1275": "nan", "1280": "nan", "1285": "nan", "1290": "nan", "1295": "nan", "1300": 3.41493, "1305": "nan", "1310": "nan", "1315": "nan", "1320": "nan", "1325": "nan", "1330": "nan", "1335": "nan", "1340": "nan", "1345": "nan", "1350": "nan", "1355": "nan", "1360": "nan", "1365": "nan", "1370": "nan", "1375": "nan", "1380": "nan", "1385": "nan", "1390": "nan", "1395": "nan", "1400": 3.41144, "1405": "nan", "1410": "nan", "1415": "nan", "1420": "nan", "1425": "nan", "1430": "nan", "1435": "nan", "1440": "nan", "1445": "nan", "1450": "nan", "1455": "nan", "1460": "nan", "1465": "nan", "1470": "nan", "1475": "nan", "1480": "nan", "1485": "nan", "1490": "nan", "1495": "nan", "1500": 3.40875, "1505": "nan", "1510": "nan", "1515": "nan", "1520": "nan", "1525": "nan", "1530": "nan", "1535": "nan", "1540": "nan", "1545": "nan", "1550": "nan", "1555": "nan", "1560": "nan", "1565": "nan", "1570": "nan", "1575": "nan", "1580": "nan", "1585": "nan", "1590": "nan", "1595": "nan", "1600": 3.40598, "1605": "nan", "1610": "nan", "1615": "nan", "1620": "nan", "1625": "nan", "1630": "nan", "1635": "nan", "1640": "nan", "1645": "nan", "1650": "nan", "1655": "nan", "1660": "nan", "1665": "nan", "1670": "nan", "1675": "nan", "1680": "nan", "1685": "nan", "1690": "nan", "1695": "nan", "1700": 3.4038, "1705": "nan", "1710": "nan", "1715": "nan", "1720": "nan", "1725": "nan", "1730": "nan", "1735": "nan", "1740": "nan", "1745": "nan", "1750": "nan", "1755": "nan", "1760": "nan", "1765": "nan", "1770": "nan", "1775": "nan", "1780": "nan", "1785": "nan", "1790": "nan", "1795": "nan", "1800": 3.40301, "1805": "nan", "1810": "nan", "1815": "nan", "1820": "nan", "1825": "nan", "1830": "nan", "1835": "nan", "1840": "nan", "1845": "nan", "1850": "nan", "1855": "nan", "1860": "nan", "1865": "nan", "1870": "nan", "1875": "nan", "1880": "nan", "1885": "nan", "1890": "nan", "1895": "nan", "1900": 3.40033, "1905": "nan", "1910": "nan", "1915": "nan", "1920": "nan", "1925": "nan", "1930": "nan", "1935": "nan", "1940": "nan", "1945": "nan", "1950": "nan", "1955": "nan", "1960": "nan", "1965": "nan", "1970": "nan", "1975": "nan", "1980": "nan", "1985": "nan", "1990": "nan", "1995": "nan", "2000": 3.3983, "2005": "nan", "2010": "nan", "2015": "nan", "2020": "nan", "2025": "nan", "2030": "nan", "2035": "nan", "2040": "nan", "2045": "nan", "2050": "nan", "2055": "nan", "2060": "nan", "2065": "nan", "2070": "nan", "2075": "nan", "2080": "nan", "2085": "nan", "2090": "nan", "2095": "nan", "2100": 3.3967, "2105": "nan", "2110": "nan", "2115": "nan", "2120": "nan", "2125": "nan", "2130": "nan", "2135": "nan", "2140": "nan", "2145": "nan", "2150": "nan", "2155": "nan", "2160": "nan", "2165": "nan", "2170": "nan", "2175": "nan", "2180": "nan", "2185": "nan", "2190": "nan", "2195": "nan", "2200": 3.39751, "2205": "nan", "2210": "nan", "2215": "nan", "2220": "nan", "2225": "nan", "2230": "nan", "2235": "nan", "2240": "nan", "2245": "nan", "2250": "nan", "2255": "nan", "2260": "nan", "2265": "nan", "2270": "nan", "2275": "nan", "2280": "nan", "2285": "nan", "2290": "nan", "2295": "nan", "2300": 3.39609, "2305": "nan", "2310": "nan", "2315": "nan", "2320": "nan", "2325": "nan", "2330": "nan", "2335": "nan", "2340": "nan", "2345": "nan", "2350": "nan", "2355": "nan", "2360": "nan", "2365": "nan", "2370": "nan", "2375": "nan", "2380": "nan", "2385": "nan", "2390": "nan", "2395": "nan", "2400": 3.39469, "2405": "nan", "2410": "nan", "2415": "nan", "2420": "nan", "2425": "nan", "2430": "nan", "2435": "nan", "2440": "nan", "2445": "nan", "2450": "nan", "2455": "nan", "2460": "nan", "2465": "nan", "2470": "nan", "2475": "nan", "2480": "nan", "2485": "nan", "2490": "nan", "2495": "nan", "2500": 3.39297, "2505": "nan", "2510": "nan", "2515": "nan", "2520": "nan", "2525": "nan", "2530": "nan", "2535": "nan", "2540": "nan", "2545": "nan", "2550": "nan", "2555": "nan", "2560": "nan", "2565": "nan", "2570": "nan", "2575": "nan", "2580": "nan", "2585": "nan", "2590": "nan", "2595": "nan", "2600": 3.39215, "2605": "nan", "2610": "nan", "2615": "nan", "2620": "nan", "2625": "nan", "2630": "nan", "2635": "nan", "2640": "nan", "2645": "nan", "2650": "nan", "2655": "nan", "2660": "nan", "2665": "nan", "2670": "nan", "2675": "nan", "2680": "nan", "2685": "nan", "2690": "nan", "2695": "nan", "2700": 3.39252, "2705": "nan", "2710": "nan", "2715": "nan", "2720": "nan", "2725": "nan", "2730": "nan", "2735": "nan", "2740": "nan", "2745": "nan", "2750": "nan", "2755": "nan", "2760": "nan", "2765": "nan", "2770": "nan", "2775": "nan", "2780": "nan", "2785": "nan", "2790": "nan", "2795": "nan", "2800": 3.39244, "2805": "nan", "2810": "nan", "2815": "nan", "2820": "nan", "2825": "nan", "2830": "nan", "2835": "nan", "2840": "nan", "2845": "nan", "2850": "nan", "2855": "nan", "2860": "nan", "2865": "nan", "2870": "nan", "2875": "nan", "2880": "nan", "2885": "nan", "2890": "nan", "2895": "nan", "2900": 3.39099, "2905": "nan", "2910": "nan", "2915": "nan", "2920": "nan", "2925": "nan", "2930": "nan", "2935": "nan", "2940": "nan", "2945": "nan", "2950": "nan", "2955": "nan", "2960": "nan", "2965": "nan", "2970": "nan", "2975": "nan", "2980": "nan", "2985": "nan", "2990": "nan", "2995": "nan", "3000": 3.39125, "3005": "nan", "3010": "nan", "3015": "nan", "3020": "nan", "3025": "nan", "3030": "nan", "3035": "nan", "3040": "nan", "3045": "nan", "3050": "nan", "3055": "nan", "3060": "nan", "3065": "nan", "3070": "nan", "3075": "nan", "3080": "nan", "3085": "nan", "3090": "nan", "3095": "nan", "3100": 3.39087, "3105": "nan", "3110": "nan", "3115": "nan", "3120": "nan", "3125": "nan", "3130": "nan", "3135": "nan", "3140": "nan", "3145": "nan", "3150": "nan", "3155": "nan", "3160": "nan", "3165": "nan", "3170": "nan", "3175": "nan", "3180": "nan", "3185": "nan", "3190": "nan", "3195": "nan", "3200": 3.3894, "3205": "nan", "3210": "nan", "3215": "nan", "3220": "nan", "3225": "nan", "3230": "nan", "3235": "nan", "3240": "nan", "3245": "nan", "3250": "nan", "3255": "nan", "3260": "nan", "3265": "nan", "3270": "nan", "3275": "nan", "3280": "nan", "3285": "nan", "3290": "nan", "3295": "nan", "3300": 3.39035, "3305": "nan", "3310": "nan", "3315": "nan", "3320": "nan", "3325": "nan", "3330": "nan", "3335": "nan", "3340": "nan", "3345": "nan", "3350": "nan", "3355": "nan", "3360": "nan", "3365": "nan", "3370": "nan", "3375": "nan", "3380": "nan", "3385": "nan", "3390": "nan", "3395": "nan", "3400": 3.39004, "3405": "nan", "3410": "nan", "3415": "nan", "3420": "nan", "3425": "nan", "3430": "nan", "3435": "nan", "3440": "nan", "3445": "nan", "3450": "nan", "3455": "nan", "3460": "nan", "3465": "nan", "3470": "nan", "3475": "nan", "3480": "nan", "3485": "nan", "3490": "nan", "3495": "nan", "3500": 3.38966, "3505": "nan", "3510": "nan", "3515": "nan", "3520": "nan", "3525": "nan", "3530": "nan", "3535": "nan", "3540": "nan", "3545": "nan", "3550": "nan", "3555": "nan", "3560": "nan", "3565": "nan", "3570": "nan", "3575": "nan", "3580": "nan", "3585": "nan", "3590": "nan", "3595": "nan", "3600": 3.38952, "3605": "nan", "3610": "nan", "3615": "nan", "3620": "nan", "3625": "nan", "3630": "nan", "3635": "nan", "3640": "nan", "3645": "nan", "3650": "nan", "3655": "nan", "3660": "nan", "3665": "nan", "3670": "nan", "3675": "nan", "3680": "nan", "3685": "nan", "3690": "nan", "3695": "nan", "3700": 3.3871, "3705": "nan", "3710": "nan", "3715": "nan", "3720": "nan", "3725": "nan", "3730": "nan", "3735": "nan", "3740": "nan", "3745": "nan", "3750": "nan", "3755": "nan", "3760": "nan", "3765": "nan", "3770": "nan", "3775": "nan", "3780": "nan", "3785": "nan", "3790": "nan", "3795": "nan", "3800": 3.38626, "3805": "nan", "3810": "nan", "3815": "nan", "3820": "nan", "3825": "nan", "3830": "nan", "3835": "nan", "3840": "nan", "3845": "nan", "3850": "nan", "3855": "nan", "3860": "nan", "3865": "nan", "3870": "nan", "3875": "nan", "3880": "nan", "3885": "nan", "3890": "nan", "3895": "nan", "3900": 3.38708, "3905": "nan", "3910": "nan", "3915": "nan", "3920": "nan", "3925": "nan", "3930": "nan", "3935": "nan", "3940": "nan", "3945": "nan", "3950": "nan", "3955": "nan", "3960": "nan", "3965": "nan", "3970": "nan", "3975": "nan", "3980": "nan", "3985": "nan", "3990": "nan", "3995": "nan", "4000": 3.38632, "4005": "nan", "4010": "nan", "4015": "nan", "4020": "nan", "4025": "nan", "4030": "nan", "4035": "nan", "4040": "nan", "4045": "nan", "4050": "nan", "4055": "nan", "4060": "nan", "4065": "nan", "4070": "nan", "4075": "nan", "4080": "nan", "4085": "nan", "4090": "nan", "4095": "nan", "4100": 3.38441, "4105": "nan", "4110": "nan", "4115": "nan", "4120": "nan", "4125": "nan", "4130": "nan", "4135": "nan", "4140": "nan", "4145": "nan", "4150": "nan", "4155": "nan", "4160": "nan", "4165": "nan", "4170": "nan", "4175": "nan", "4180": "nan", "4185": "nan", "4190": "nan", "4195": "nan", "4200": 3.38457, "4205": "nan", "4210": "nan", "4215": "nan", "4220": "nan", "4225": "nan", "4230": "nan", "4235": "nan", "4240": "nan", "4245": "nan", "4250": "nan", "4255": "nan", "4260": "nan", "4265": "nan", "4270": "nan", "4275": "nan", "4280": "nan", "4285": "nan", "4290": "nan", "4295": "nan", "4300": 3.38457, "4305": "nan", "4310": "nan", "4315": "nan", "4320": "nan", "4325": "nan", "4330": "nan", "4335": "nan", "4340": "nan", "4345": "nan", "4350": "nan", "4355": "nan", "4360": "nan", "4365": "nan", "4370": "nan", "4375": "nan", "4380": "nan", "4385": "nan", "4390": "nan", "4395": "nan", "4400": 3.38389, "4405": "nan", "4410": "nan", "4415": "nan", "4420": "nan", "4425": "nan", "4430": "nan", "4435": "nan", "4440": "nan", "4445": "nan", "4450": "nan", "4455": "nan", "4460": "nan", "4465": "nan", "4470": "nan", "4475": "nan", "4480": "nan", "4485": "nan", "4490": "nan", "4495": "nan", "4500": 3.38314, "4505": "nan", "4510": "nan", "4515": "nan", "4520": "nan", "4525": "nan", "4530": "nan", "4535": "nan", "4540": "nan", "4545": "nan", "4550": "nan", "4555": "nan", "4560": "nan", "4565": "nan", "4570": "nan", "4575": "nan", "4580": "nan", "4585": "nan", "4590": "nan", "4595": "nan", "4600": 3.38093, "4605": "nan", "4610": "nan", "4615": "nan", "4620": "nan", "4625": "nan", "4630": "nan", "4635": "nan", "4640": "nan", "4645": "nan", "4650": "nan", "4655": "nan", "4660": "nan", "4665": "nan", "4670": "nan", "4675": "nan", "4680": "nan", "4685": "nan", "4690": "nan", "4695": "nan", "4700": 3.38075, "4705": "nan", "4710": "nan", "4715": "nan", "4720": "nan", "4725": "nan", "4730": "nan", "4735": "nan", "4740": "nan", "4745": "nan", "4750": "nan", "4755": "nan", "4760": "nan", "4765": "nan", "4770": "nan", "4775": "nan", "4780": "nan", "4785": "nan", "4790": "nan", "4795": "nan", "4800": 3.38137, "4805": "nan", "4810": "nan", "4815": "nan", "4820": "nan", "4825": "nan", "4830": "nan", "4835": "nan", "4840": "nan", "4845": "nan", "4850": "nan", "4855": "nan", "4860": "nan", "4865": "nan", "4870": "nan", "4875": "nan", "4880": "nan", "4885": "nan", "4890": "nan", "4895": "nan", "4900": 3.38055, "4905": "nan", "4910": "nan", "4915": "nan", "4920": "nan", "4925": "nan", "4930": "nan", "4935": "nan", "4940": "nan", "4945": "nan", "4950": "nan", "4955": "nan", "4960": "nan", "4965": "nan", "4970": "nan", "4975": "nan", "4980": "nan", "4985": "nan", "4990": "nan", "4995": "nan", "5000": 3.38053, "5005": "nan", "5010": "nan", "5015": "nan", "5020": "nan", "5025": "nan", "5030": "nan", "5035": "nan", "5040": "nan", "5045": "nan", "5050": "nan", "5055": "nan", "5060": "nan", "5065": "nan", "5070": "nan", "5075": "nan", "5080": "nan", "5085": "nan", "5090": "nan", "5095": "nan", "5100": 3.3822, "5105": "nan", "5110": "nan", "5115": "nan", "5120": "nan", "5125": "nan", "5130": "nan", "5135": "nan", "5140": "nan", "5145": "nan", "5150": "nan", "5155": "nan", "5160": "nan", "5165": "nan", "5170": "nan", "5175": "nan", "5180": "nan", "5185": "nan", "5190": "nan", "5195": "nan", "5200": 3.38192, "5205": "nan", "5210": "nan", "5215": "nan", "5220": "nan", "5225": "nan", "5230": "nan", "5235": "nan", "5240": "nan", "5245": "nan", "5250": "nan", "5255": "nan", "5260": "nan", "5265": "nan", "5270": "nan", "5275": "nan", "5280": "nan", "5285": "nan", "5290": "nan", "5295": "nan", "5300": 3.38156, "5305": "nan", "5310": "nan", "5315": "nan", "5320": "nan", "5325": "nan", "5330": "nan", "5335": "nan", "5340": "nan", "5345": "nan", "5350": "nan", "5355": "nan", "5360": "nan", "5365": "nan", "5370": "nan", "5375": "nan", "5380": "nan", "5385": "nan", "5390": "nan", "5395": "nan", "5400": 3.38069, "5405": "nan", "5410": "nan", "5415": "nan", "5420": "nan", "5425": "nan", "5430": "nan", "5435": "nan", "5440": "nan", "5445": "nan", "5450": "nan", "5455": "nan", "5460": "nan", "5465": "nan", "5470": "nan", "5475": "nan", "5480": "nan", "5485": "nan", "5490": "nan", "5495": "nan", "5500": 3.38347, "5505": "nan", "5510": "nan", "5515": "nan", "5520": "nan", "5525": "nan", "5530": "nan", "5535": "nan", "5540": "nan", "5545": "nan", "5550": "nan", "5555": "nan", "5560": "nan", "5565": "nan", "5570": "nan", "5575": "nan", "5580": "nan", "5585": "nan", "5590": "nan", "5595": "nan", "5600": 3.38222, "5605": "nan", "5610": "nan", "5615": "nan", "5620": "nan", "5625": "nan", "5630": "nan", "5635": "nan", "5640": "nan", "5645": "nan", "5650": "nan", "5655": "nan", "5660": "nan", "5665": "nan", "5670": "nan", "5675": "nan", "5680": "nan", "5685": "nan", "5690": "nan", "5695": "nan", "5700": 3.38243, "5705": "nan", "5710": "nan", "5715": "nan", "5720": "nan", "5725": "nan", "5730": "nan", "5735": "nan", "5740": "nan", "5745": "nan", "5750": "nan", "5755": "nan", "5760": "nan", "5765": "nan", "5770": "nan", "5775": "nan", "5780": "nan", "5785": "nan", "5790": "nan", "5795": "nan", "5800": 3.38216, "5805": "nan", "5810": "nan", "5815": "nan", "5820": "nan", "5825": "nan", "5830": "nan", "5835": "nan", "5840": "nan", "5845": "nan", "5850": "nan", "5855": "nan", "5860": "nan", "5865": "nan", "5870": "nan", "5875": "nan", "5880": "nan", "5885": "nan", "5890": "nan", "5895": "nan", "5900": 3.38049, "5905": "nan", "5910": "nan", "5915": "nan", "5920": "nan", "5925": "nan", "5930": "nan", "5935": "nan", "5940": "nan", "5945": "nan", "5950": "nan", "5955": "nan", "5960": "nan", "5965": "nan", "5970": "nan", "5975": "nan", "5980": "nan", "5985": "nan", "5990": "nan", "5995": "nan", "6000": 3.38023, "6005": "nan", "6010": "nan", "6015": "nan", "6020": "nan", "6025": "nan", "6030": "nan", "6035": "nan", "6040": "nan", "6045": "nan", "6050": "nan", "6055": "nan", "6060": "nan", "6065": "nan", "6070": "nan", "6075": "nan", "6080": "nan", "6085": "nan", "6090": "nan", "6095": "nan", "6100": 3.37904, "6105": "nan", "6110": "nan", "6115": "nan", "6120": "nan", "6125": "nan", "6130": "nan", "6135": "nan", "6140": "nan", "6145": "nan", "6150": "nan", "6155": "nan", "6160": "nan", "6165": "nan", "6170": "nan", "6175": "nan", "6180": "nan", "6185": "nan", "6190": "nan", "6195": "nan", "6200": 3.38026, "6205": "nan", "6210": "nan", "6215": "nan", "6220": "nan", "6225": "nan", "6230": "nan", "6235": "nan", "6240": "nan", "6245": "nan", "6250": "nan", "6255": "nan", "6260": "nan", "6265": "nan", "6270": "nan", "6275": "nan", "6280": "nan", "6285": "nan", "6290": "nan", "6295": "nan", "6300": 3.38003, "6305": "nan", "6310": "nan", "6315": "nan", "6320": "nan", "6325": "nan", "6330": "nan", "6335": "nan", "6340": "nan", "6345": "nan", "6350": "nan", "6355": "nan", "6360": "nan", "6365": "nan", "6370": "nan", "6375": "nan", "6380": "nan", "6385": "nan", "6390": "nan", "6395": "nan", "6400": 3.37878, "6405": "nan", "6410": "nan", "6415": "nan", "6420": "nan", "6425": "nan", "6430": "nan", "6435": "nan", "6440": "nan", "6445": "nan", "6450": "nan", "6455": "nan", "6460": "nan", "6465": "nan", "6470": "nan", "6475": "nan", "6480": "nan", "6485": "nan", "6490": "nan", "6495": "nan", "6500": 3.38006, "6505": "nan", "6510": "nan", "6515": "nan", "6520": "nan", "6525": "nan", "6530": "nan", "6535": "nan", "6540": "nan", "6545": "nan", "6550": "nan", "6555": "nan", "6560": "nan", "6565": "nan", "6570": "nan", "6575": "nan", "6580": "nan", "6585": "nan", "6590": "nan", "6595": "nan", "6600": 3.38036, "6605": "nan", "6610": "nan", "6615": "nan", "6620": "nan", "6625": "nan", "6630": "nan", "6635": "nan", "6640": "nan", "6645": "nan", "6650": "nan", "6655": "nan", "6660": "nan", "6665": "nan", "6670": "nan", "6675": "nan", "6680": "nan", "6685": "nan", "6690": "nan", "6695": "nan", "6700": 3.38116, "6705": "nan", "6710": "nan", "6715": "nan", "6720": "nan", "6725": "nan", "6730": "nan", "6735": "nan", "6740": "nan", "6745": "nan", "6750": "nan", "6755": "nan", "6760": "nan", "6765": "nan", "6770": "nan", "6775": "nan", "6780": "nan", "6785": "nan", "6790": "nan", "6795": "nan", "6800": 3.39788, "6805": "nan", "6810": "nan", "6815": "nan", "6820": "nan", "6825": "nan", "6830": "nan", "6835": "nan", "6840": "nan", "6845": "nan", "6850": "nan", "6855": "nan", "6860": "nan", "6865": "nan", "6870": "nan", "6875": "nan", "6880": "nan", "6885": "nan", "6890": "nan", "6895": "nan", "6900": 3.39841, "6905": "nan", "6910": "nan", "6915": "nan", "6920": "nan", "6925": "nan", "6930": "nan", "6935": "nan", "6940": "nan", "6945": "nan", "6950": "nan", "6955": "nan", "6960": "nan", "6965": "nan", "6970": "nan", "6975": "nan", "6980": "nan", "6985": "nan", "6990": "nan", "6995": "nan", "7000": 3.39845, "7005": "nan", "7010": "nan", "7015": "nan", "7020": "nan", "7025": "nan", "7030": "nan", "7035": "nan", "7040": "nan", "7045": "nan", "7050": "nan", "7055": "nan", "7060": "nan", "7065": "nan", "7070": "nan", "7075": "nan", "7080": "nan", "7085": "nan", "7090": "nan", "7095": "nan", "7100": 3.37973, "7105": "nan", "7110": "nan", "7115": "nan", "7120": "nan", "7125": "nan", "7130": "nan", "7135": "nan", "7140": "nan", "7145": "nan", "7150": "nan", "7155": "nan", "7160": "nan", "7165": "nan", "7170": "nan", "7175": "nan", "7180": "nan", "7185": "nan", "7190": "nan", "7195": "nan", "7200": 3.3813, "7205": "nan", "7210": "nan", "7215": "nan", "7220": "nan", "7225": "nan", "7230": "nan", "7235": "nan", "7240": "nan", "7245": "nan", "7250": "nan", "7255": "nan", "7260": "nan", "7265": "nan", "7270": "nan", "7275": "nan", "7280": "nan", "7285": "nan", "7290": "nan", "7295": "nan", "7300": 3.38135, "7305": "nan", "7310": "nan", "7315": "nan", "7320": "nan", "7325": "nan", "7330": "nan", "7335": "nan", "7340": "nan", "7345": "nan", "7350": "nan", "7355": "nan", "7360": "nan", "7365": "nan", "7370": "nan", "7375": "nan", "7380": "nan", "7385": "nan", "7390": "nan", "7395": "nan", "7400": 3.38258, "7405": "nan", "7410": "nan", "7415": "nan", "7420": "nan", "7425": "nan", "7430": "nan", "7435": "nan", "7440": "nan", "7445": "nan", "7450": "nan", "7455": "nan", "7460": "nan", "7465": "nan", "7470": "nan", "7475": "nan", "7480": "nan", "7485": "nan", "7490": "nan", "7495": "nan", "7500": 3.37993, "7505": "nan", "7510": "nan", "7515": "nan", "7520": "nan", "7525": "nan", "7530": "nan", "7535": "nan", "7540": "nan", "7545": "nan", "7550": "nan", "7555": "nan", "7560": "nan", "7565": "nan", "7570": "nan", "7575": "nan", "7580": "nan", "7585": "nan", "7590": "nan", "7595": "nan", "7600": 3.3791, "7605": "nan", "7610": "nan", "7615": "nan", "7620": "nan", "7625": "nan", "7630": "nan", "7635": "nan", "7640": "nan", "7645": "nan", "7650": "nan", "7655": "nan", "7660": "nan", "7665": "nan", "7670": "nan", "7675": "nan", "7680": "nan", "7685": "nan", "7690": "nan", "7695": "nan", "7700": 3.3801, "7705": "nan", "7710": "nan", "7715": "nan", "7720": "nan", "7725": "nan", "7730": "nan", "7735": "nan", "7740": "nan", "7745": "nan", "7750": "nan", "7755": "nan", "7760": "nan", "7765": "nan", "7770": "nan", "7775": "nan", "7780": "nan", "7785": "nan", "7790": "nan", "7795": "nan", "7800": 3.40629, "7805": "nan", "7810": "nan", "7815": "nan", "7820": "nan", "7825": "nan", "7830": "nan", "7835": "nan", "7840": "nan", "7845": "nan", "7850": "nan", "7855": "nan", "7860": "nan", "7865": "nan", "7870": "nan", "7875": "nan", "7880": "nan", "7885": "nan", "7890": "nan", "7895": "nan", "7900": 3.39663, "7905": "nan", "7910": "nan", "7915": "nan", "7920": "nan", "7925": "nan", "7930": "nan", "7935": "nan", "7940": "nan", "7945": "nan", "7950": "nan", "7955": "nan", "7960": "nan", "7965": "nan", "7970": "nan", "7975": "nan", "7980": "nan", "7985": "nan", "7990": "nan", "7995": "nan", "8000": 3.39421, "8005": "nan", "8010": "nan", "8015": "nan", "8020": "nan", "8025": "nan", "8030": "nan", "8035": "nan", "8040": "nan", "8045": "nan", "8050": "nan", "8055": "nan", "8060": "nan", "8065": "nan", "8070": "nan", "8075": "nan", "8080": "nan", "8085": "nan", "8090": "nan", "8095": "nan", "8100": 3.38433, "8105": "nan", "8110": "nan", "8115": "nan", "8120": "nan", "8125": "nan", "8130": "nan", "8135": "nan", "8140": "nan", "8145": "nan", "8150": "nan", "8155": "nan", "8160": "nan", "8165": "nan", "8170": "nan", "8175": "nan", "8180": "nan", "8185": "nan", "8190": "nan", "8195": "nan", "8200": 3.38564, "8205": "nan", "8210": "nan", "8215": "nan", "8220": "nan", "8225": "nan", "8230": "nan", "8235": "nan", "8240": "nan", "8245": "nan", "8250": "nan", "8255": "nan", "8260": "nan", "8265": "nan", "8270": "nan", "8275": "nan", "8280": "nan", "8285": "nan", "8290": "nan", "8295": "nan", "8300": 3.38548, "8305": "nan", "8310": "nan", "8315": "nan", "8320": "nan", "8325": "nan", "8330": "nan", "8335": "nan", "8340": "nan", "8345": "nan", "8350": "nan", "8355": "nan", "8360": "nan", "8365": "nan", "8370": "nan", "8375": "nan", "8380": "nan", "8385": "nan", "8390": "nan", "8395": "nan", "8400": 3.38652, "8405": "nan", "8410": "nan", "8415": "nan", "8420": "nan", "8425": "nan", "8430": "nan", "8435": "nan", "8440": "nan", "8445": "nan", "8450": "nan", "8455": "nan", "8460": "nan", "8465": "nan", "8470": "nan", "8475": "nan", "8480": "nan", "8485": "nan", "8490": "nan", "8495": "nan", "8500": 3.38537, "8505": "nan", "8510": "nan", "8515": "nan", "8520": "nan", "8525": "nan", "8530": "nan", "8535": "nan", "8540": "nan", "8545": "nan", "8550": "nan", "8555": "nan", "8560": "nan", "8565": "nan", "8570": "nan", "8575": "nan", "8580": "nan", "8585": "nan", "8590": "nan", "8595": "nan", "8600": 3.38441, "8605": "nan", "8610": "nan", "8615": "nan", "8620": "nan", "8625": "nan", "8630": "nan", "8635": "nan", "8640": "nan", "8645": "nan", "8650": "nan", "8655": "nan", "8660": "nan", "8665": "nan", "8670": "nan", "8675": "nan", "8680": "nan", "8685": "nan", "8690": "nan", "8695": "nan", "8700": 3.38569, "8705": "nan", "8710": "nan", "8715": "nan", "8720": "nan", "8725": "nan", "8730": "nan", "8735": "nan", "8740": "nan", "8745": "nan", "8750": "nan", "8755": "nan", "8760": "nan", "8765": "nan", "8770": "nan", "8775": "nan", "8780": "nan", "8785": "nan", "8790": "nan", "8795": "nan", "8800": 3.38471, "8805": "nan", "8810": "nan", "8815": "nan", "8820": "nan", "8825": "nan", "8830": "nan", "8835": "nan", "8840": "nan", "8845": "nan", "8850": "nan", "8855": "nan", "8860": "nan", "8865": "nan", "8870": "nan", "8875": "nan", "8880": "nan", "8885": "nan", "8890": "nan", "8895": "nan", "8900": 3.38583, "8905": "nan", "8910": "nan", "8915": "nan", "8920": "nan", "8925": "nan", "8930": "nan", "8935": "nan", "8940": "nan", "8945": "nan", "8950": "nan", "8955": "nan", "8960": "nan", "8965": "nan", "8970": "nan", "8975": "nan", "8980": "nan", "8985": "nan", "8990": "nan", "8995": "nan", "9000": 3.38381, "9005": "nan", "9010": "nan", "9015": "nan", "9020": "nan", "9025": "nan", "9030": "nan", "9035": "nan", "9040": "nan", "9045": "nan", "9050": "nan", "9055": "nan", "9060": "nan", "9065": "nan", "9070": "nan", "9075": "nan", "9080": "nan", "9085": "nan", "9090": "nan", "9095": "nan", "9100": 3.38317, "9105": "nan", "9110": "nan", "9115": "nan", "9120": "nan", "9125": "nan", "9130": "nan", "9135": "nan", "9140": "nan", "9145": "nan", "9150": "nan", "9155": "nan", "9160": "nan", "9165": "nan", "9170": "nan", "9175": "nan", "9180": "nan", "9185": "nan", "9190": "nan", "9195": "nan", "9200": 3.3843, "9205": "nan", "9210": "nan", "9215": "nan", "9220": "nan", "9225": "nan", "9230": "nan", "9235": "nan", "9240": "nan", "9245": "nan", "9250": "nan", "9255": "nan", "9260": "nan", "9265": "nan", "9270": "nan", "9275": "nan", "9280": "nan", "9285": "nan", "9290": "nan", "9295": "nan", "9300": 3.38399, "9305": "nan", "9310": "nan", "9315": "nan", "9320": "nan", "9325": "nan", "9330": "nan", "9335": "nan", "9340": "nan", "9345": "nan", "9350": "nan", "9355": "nan", "9360": "nan", "9365": "nan", "9370": "nan", "9375": "nan", "9380": "nan", "9385": "nan", "9390": "nan", "9395": "nan", "9400": 3.38416, "9405": "nan", "9410": "nan", "9415": "nan", "9420": "nan", "9425": "nan", "9430": "nan", "9435": "nan", "9440": "nan", "9445": "nan", "9450": "nan", "9455": "nan", "9460": "nan", "9465": "nan", "9470": "nan", "9475": "nan", "9480": "nan", "9485": "nan", "9490": "nan", "9495": "nan", "9500": 3.38376, "9505": "nan", "9510": "nan", "9515": "nan", "9520": "nan", "9525": "nan", "9530": "nan", "9535": "nan", "9540": "nan", "9545": "nan", "9550": "nan", "9555": "nan", "9560": "nan", "9565": "nan", "9570": "nan", "9575": "nan", "9580": "nan", "9585": "nan", "9590": "nan", "9595": "nan", "9600": 3.38327, "9605": "nan", "9610": "nan", "9615": "nan", "9620": "nan", "9625": "nan", "9630": "nan", "9635": "nan", "9640": "nan", "9645": "nan", "9650": "nan", "9655": "nan", "9660": "nan", "9665": "nan", "9670": "nan", "9675": "nan", "9680": "nan", "9685": "nan", "9690": "nan", "9695": "nan", "9700": 3.38432, "9705": "nan", "9710": "nan", "9715": "nan", "9720": "nan", "9725": "nan", "9730": "nan", "9735": "nan", "9740": "nan", "9745": "nan", "9750": "nan", "9755": "nan", "9760": "nan", "9765": "nan", "9770": "nan", "9775": "nan", "9780": "nan", "9785": "nan", "9790": "nan", "9795": "nan", "9800": 3.38427, "9805": "nan", "9810": "nan", "9815": "nan", "9820": "nan", "9825": "nan", "9830": "nan", "9835": "nan", "9840": "nan", "9845": "nan", "9850": "nan", "9855": "nan", "9860": "nan", "9865": "nan", "9870": "nan", "9875": "nan", "9880": "nan", "9885": "nan", "9890": "nan", "9895": "nan", "9900": 3.38415, "9905": "nan", "9910": "nan", "9915": "nan", "9920": "nan", "9925": "nan", "9930": "nan", "9935": "nan", "9940": "nan", "9945": "nan", "9950": "nan", "9955": "nan", "9960": "nan", "9965": "nan", "9970": "nan", "9975": "nan", "9980": "nan", "9985": "nan", "9990": "nan", "9995": "nan", "10000": 3.38426, "10005": "nan", "10010": "nan", "10015": "nan", "10020": "nan", "10025": "nan", "10030": "nan", "10035": "nan", "10040": "nan", "10045": "nan", "10050": "nan", "10055": "nan", "10060": "nan", "10065": "nan", "10070": "nan", "10075": "nan", "10080": "nan", "10085": "nan", "10090": "nan", "10095": "nan", "10100": 3.38377, "10105": "nan", "10110": "nan", "10115": "nan", "10120": "nan", "10125": "nan", "10130": "nan", "10135": "nan", "10140": "nan", "10145": "nan", "10150": "nan", "10155": "nan", "10160": "nan", "10165": "nan", "10170": "nan", "10175": "nan", "10180": "nan", "10185": "nan", "10190": "nan", "10195": "nan", "10200": 3.38456, "10205": "nan", "10210": "nan", "10215": "nan", "10220": "nan", "10225": "nan", "10230": "nan", "10235": "nan", "10240": "nan", "10245": "nan", "10250": "nan", "10255": "nan", "10260": "nan", "10265": "nan", "10270": "nan", "10275": "nan", "10280": "nan", "10285": "nan", "10290": "nan", "10295": "nan", "10300": 3.38487, "10305": "nan", "10310": "nan", "10315": "nan", "10320": "nan", "10325": "nan", "10330": "nan", "10335": "nan", "10340": "nan", "10345": "nan", "10350": "nan", "10355": "nan", "10360": "nan", "10365": "nan", "10370": "nan", "10375": "nan", "10380": "nan", "10385": "nan", "10390": "nan", "10395": "nan", "10400": 3.38413, "10405": "nan", "10410": "nan", "10415": "nan", "10420": "nan", "10425": "nan", "10430": "nan", "10435": "nan", "10440": "nan", "10445": "nan", "10450": "nan", "10455": "nan", "10460": "nan", "10465": "nan", "10470": "nan", "10475": "nan", "10480": "nan", "10485": "nan", "10490": "nan", "10495": "nan", "10500": 3.38411, "10505": "nan", "10510": "nan", "10515": "nan", "10520": "nan", "10525": "nan", "10530": "nan", "10535": "nan", "10540": "nan", "10545": "nan", "10550": "nan", "10555": "nan", "10560": "nan", "10565": "nan", "10570": "nan", "10575": "nan", "10580": "nan", "10585": "nan", "10590": "nan", "10595": "nan", "10600": 3.38408, "10605": "nan", "10610": "nan", "10615": "nan", "10620": "nan", "10625": "nan", "10630": "nan", "10635": "nan", "10640": "nan", "10645": "nan", "10650": "nan", "10655": "nan", "10660": "nan", "10665": "nan", "10670": "nan", "10675": "nan", "10680": "nan", "10685": "nan", "10690": "nan", "10695": "nan", "10700": 3.3839, "10705": "nan", "10710": "nan", "10715": "nan", "10720": "nan", "10725": "nan", "10730": "nan", "10735": "nan", "10740": "nan", "10745": "nan", "10750": "nan", "10755": "nan", "10760": "nan", "10765": "nan", "10770": "nan", "10775": "nan", "10780": "nan", "10785": "nan", "10790": "nan", "10795": "nan", "10800": 3.38381, "10805": "nan", "10810": "nan", "10815": "nan", "10820": "nan", "10825": "nan", "10830": "nan", "10835": "nan", "10840": "nan", "10845": "nan", "10850": "nan", "10855": "nan", "10860": "nan", "10865": "nan", "10870": "nan", "10875": "nan", "10880": "nan", "10885": "nan", "10890": "nan", "10895": "nan", "10900": 3.83861, "10905": "nan", "10910": "nan", "10915": "nan", "10920": "nan", "10925": "nan", "10930": "nan", "10935": "nan", "10940": "nan", "10945": "nan", "10950": "nan", "10955": "nan", "10960": "nan", "10965": "nan", "10970": "nan", "10975": "nan", "10980": "nan", "10985": "nan", "10990": "nan", "10995": "nan", "11000": 3.39347, "11005": "nan", "11010": "nan", "11015": "nan", "11020": "nan", "11025": "nan", "11030": "nan", "11035": "nan", "11040": "nan", "11045": "nan", "11050": "nan", "11055": "nan", "11060": "nan", "11065": "nan", "11070": "nan", "11075": "nan", "11080": "nan", "11085": "nan", "11090": "nan", "11095": "nan", "11100": 3.39189, "11105": "nan", "11110": "nan", "11115": "nan", "11120": "nan", "11125": "nan", "11130": "nan", "11135": "nan", "11140": "nan", "11145": "nan", "11150": "nan", "11155": "nan", "11160": "nan", "11165": "nan", "11170": "nan", "11175": "nan", "11180": "nan", "11185": "nan", "11190": "nan", "11195": "nan", "11200": 3.3918, "11205": "nan", "11210": "nan", "11215": "nan", "11220": "nan", "11225": "nan", "11230": "nan", "11235": "nan", "11240": "nan", "11245": "nan", "11250": "nan", "11255": "nan", "11260": "nan", "11265": "nan", "11270": "nan", "11275": "nan", "11280": "nan", "11285": "nan", "11290": "nan", "11295": "nan", "11300": 3.39238, "11305": "nan", "11310": "nan", "11315": "nan", "11320": "nan", "11325": "nan", "11330": "nan", "11335": "nan", "11340": "nan", "11345": "nan", "11350": "nan", "11355": "nan", "11360": "nan", "11365": "nan", "11370": "nan", "11375": "nan", "11380": "nan", "11385": "nan", "11390": "nan", "11395": "nan", "11400": 3.39244, "11405": "nan", "11410": "nan", "11415": "nan", "11420": "nan", "11425": "nan", "11430": "nan", "11435": "nan", "11440": "nan", "11445": "nan", "11450": "nan", "11455": "nan", "11460": "nan", "11465": "nan", "11470": "nan", "11475": "nan", "11480": "nan", "11485": "nan", "11490": "nan", "11495": "nan", "11500": 3.39253, "11505": "nan", "11510": "nan", "11515": "nan", "11520": "nan", "11525": "nan", "11530": "nan", "11535": "nan", "11540": "nan", "11545": "nan", "11550": "nan", "11555": "nan", "11560": "nan", "11565": "nan", "11570": "nan", "11575": "nan", "11580": "nan", "11585": "nan", "11590": "nan", "11595": "nan", "11600": 3.39379, "11605": "nan", "11610": "nan", "11615": "nan", "11620": "nan", "11625": "nan", "11630": "nan", "11635": "nan", "11640": "nan", "11645": "nan", "11650": "nan", "11655": "nan", "11660": "nan", "11665": "nan", "11670": "nan", "11675": "nan", "11680": "nan", "11685": "nan", "11690": "nan", "11695": "nan", "11700": 3.39314, "11705": "nan", "11710": "nan", "11715": "nan", "11720": "nan", "11725": "nan", "11730": "nan", "11735": "nan", "11740": "nan", "11745": "nan", "11750": "nan", "11755": "nan", "11760": "nan", "11765": "nan", "11770": "nan", "11775": "nan", "11780": "nan", "11785": "nan", "11790": "nan", "11795": "nan", "11800": 3.41793, "11805": "nan", "11810": "nan", "11815": "nan", "11820": "nan", "11825": "nan", "11830": "nan", "11835": "nan", "11840": "nan", "11845": "nan", "11850": "nan", "11855": "nan", "11860": "nan", "11865": "nan", "11870": "nan", "11875": "nan", "11880": "nan", "11885": "nan", "11890": "nan", "11895": "nan", "11900": 3.39269, "11905": "nan", "11910": "nan", "11915": "nan", "11920": "nan", "11925": "nan", "11930": "nan", "11935": "nan", "11940": "nan", "11945": "nan", "11950": "nan", "11955": "nan", "11960": "nan", "11965": "nan", "11970": "nan", "11975": "nan", "11980": "nan", "11985": "nan", "11990": "nan", "11995": "nan", "12000": 3.39317, "12005": "nan", "12010": "nan", "12015": "nan", "12020": "nan", "12025": "nan", "12030": "nan", "12035": "nan", "12040": "nan", "12045": "nan", "12050": "nan", "12055": "nan", "12060": "nan", "12065": "nan", "12070": "nan", "12075": "nan", "12080": "nan", "12085": "nan", "12090": "nan", "12095": "nan", "12100": 3.39129, "12105": "nan", "12110": "nan", "12115": "nan", "12120": "nan", "12125": "nan", "12130": "nan", "12135": "nan", "12140": "nan", "12145": "nan", "12150": "nan", "12155": "nan", "12160": "nan", "12165": "nan", "12170": "nan", "12175": "nan", "12180": "nan", "12185": "nan", "12190": "nan", "12195": "nan", "12200": 3.3923, "12205": "nan", "12210": "nan", "12215": "nan", "12220": "nan", "12225": "nan", "12230": "nan", "12235": "nan", "12240": "nan", "12245": "nan", "12250": "nan", "12255": "nan", "12260": "nan", "12265": "nan", "12270": "nan", "12275": "nan", "12280": "nan", "12285": "nan", "12290": "nan", "12295": "nan", "12300": 3.39197, "12305": "nan", "12310": "nan", "12315": "nan", "12320": "nan", "12325": "nan", "12330": "nan", "12335": "nan", "12340": "nan", "12345": "nan", "12350": "nan", "12355": "nan", "12360": "nan", "12365": "nan", "12370": "nan", "12375": "nan", "12380": "nan", "12385": "nan", "12390": "nan", "12395": "nan", "12400": 3.39164, "12405": "nan", "12410": "nan", "12415": "nan", "12420": "nan", "12425": "nan", "12430": "nan", "12435": "nan", "12440": "nan", "12445": "nan", "12450": "nan", "12455": "nan", "12460": "nan", "12465": "nan", "12470": "nan", "12475": "nan", "12480": "nan", "12485": "nan", "12490": "nan", "12495": "nan", "12500": 3.39073, "12505": "nan", "12510": "nan", "12515": "nan", "12520": "nan", "12525": "nan", "12530": "nan", "12535": "nan", "12540": "nan", "12545": "nan", "12550": "nan", "12555": "nan", "12560": "nan", "12565": "nan", "12570": "nan", "12575": "nan", "12580": "nan", "12585": "nan", "12590": "nan", "12595": "nan", "12600": 3.39126, "12605": "nan", "12610": "nan", "12615": "nan", "12620": "nan", "12625": "nan", "12630": "nan", "12635": "nan", "12640": "nan", "12645": "nan", "12650": "nan", "12655": "nan", "12660": "nan", "12665": "nan", "12670": "nan", "12675": "nan", "12680": "nan", "12685": "nan", "12690": "nan", "12695": "nan", "12700": 3.39109, "12705": "nan", "12710": "nan", "12715": "nan", "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm/model_config.yaml
================================================
ENV_VARS:
  NCCL_IB_SL: 1
  NCCL_IB_TIMEOUT: 19
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_FWD_LAYERNORM_SM_MARGIN: 16
  NVTE_BWD_LAYERNORM_SM_MARGIN: 16
  NCCL_P2P_NET_CHUNKSIZE: 2097152
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
  NON_DETERMINSTIC_RESULTS: 1
TEST_TYPE: "release"
MODEL_ARGS:
  # Distributed args
  --distributed-timeout-minutes: 60
  --tensor-model-parallel-size: 8
  --pipeline-model-parallel-size: 1
  --use-distributed-optimizer: true
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  # Training args
  --use-mcore-models: true
  --sequence-parallel: true
  --disable-bias-linear: true
  --micro-batch-size: 4
  --rampup-batch-size: "[384 384 97656250]"
  --global-batch-size: 1152
  --train-samples: 4882812
  --manual-gc: true
  # Transformer Engine args
  --transformer-impl: transformer_engine
  # Data args
  --data-cache-path: ${DATA_CACHE_PATH}
  --tokenizer-type: GPTSentencePieceTokenizer
  --tokenizer-model: ${DATA_PATH}/utils/nemotron_2_256k.model
  --data-path: $DATA_BLEND
  --split: 99,1,0
  --no-mmap-bin-files: true
  --num-workers: 6
  # Add network size args
  --apply-layernorm-1p: true
  --untie-embeddings-and-output-weights: true
  --position-embedding-type: rope
  --no-rope-fusion: true #TODO: We can remove this once upgrading to the DEV container
  --rotary-percent: 0.5
  --squared-relu: true
  --num-layers: 32
  --hidden-size: 6144
  --num-attention-heads: 48
  --group-query-attention: true
  --num-query-groups: 8
  --seq-length: 4096
  --max-position-embeddings: 4096
  # Add regularization args
  --attention-dropout: 0.0
  --hidden-dropout: 0.0
  --clip-grad: 1.0
  --weight-decay: 0.1
  # Add learning rate args
  --lr-decay-samples: 1949218748
  --lr-warmup-samples: 3906252
  --lr: 4.5e-4
  --min-lr: 4.5e-5
  --decoupled-lr: 5.0e-4
  --decoupled-min-lr: 4.5e-5
  --lr-decay-style: cosine
  --adam-beta1: 0.9
  --adam-beta2: 0.95
  # Add validation args
  --eval-iters: 32
  --eval-interval: 2000
  # Add checkpointing args
  --load: ${CHECKPOINT_LOAD_PATH}
  --save: ${CHECKPOINT_SAVE_PATH}
  --save-interval: 1000
  --save-retain-interval: 5000
  # Add initialization args
  --init-method-std: 0.0134
  # Add logging args
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  --log-num-zeros-in-grad: true
  --log-params-norm: true
  --log-validation-ppl-to-tensorboard: true
  --log-throughput: true
  --log-interval: 100
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --wandb-project: megatron-core-release-runs
  --wandb-entity: adlr
  --wandb-exp-name: ${WANDB_EXPERIMENT}
  # Add mixed precision args
  --bf16: true
  --exit-interval: 13000
  --wandb-save-dir: ${WANDB_SAVE_PATH}
  --async-save: true
  --use-persistent-ckpt-worker: true
METRICS:
  - "iteration-time"
  - "lm loss"
  - "mem-allocated-bytes"
  - "mem-max-allocated-bytes"


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm_gb200/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 13000,
        "step_interval": 5,
        "values": {
            "1": 13.00341,
            "5": 12.95282,
            "10": 12.10778,
            "15": 11.54743,
            "20": 10.38064,
            "25": 9.96943,
            "30": 9.61583,
            "35": 9.35993,
            "40": 9.18388,
            "45": 9.04655,
            "50": 8.88708,
            "55": 8.73169,
            "60": 8.64595,
            "65": 8.56844,
            "70": 8.47431,
            "75": 8.29862,
            "80": 8.15455,
            "85": 8.13037,
            "90": 8.03999,
            "95": 7.94008,
            "100": 7.81135,
            "105": 7.68743,
            "110": 7.59148,
            "115": 7.48955,
            "120": 7.46567,
            "125": 7.41631,
            "130": 7.2757,
            "135": 7.23718,
            "140": 7.1914,
            "145": 7.04633,
            "150": 7.18433,
            "155": 7.03577,
            "160": 6.94043,
            "165": 6.93135,
            "170": 6.81691,
            "175": 6.84664,
            "180": 6.8163,
            "185": 6.7551,
            "190": 6.69768,
            "195": 6.63881,
            "200": 6.65089,
            "205": 6.61819,
            "210": 6.53871,
            "215": 6.50275,
            "220": 6.51297,
            "225": 6.49057,
            "230": 6.528,
            "235": 6.46104,
            "240": 6.3991,
            "245": 6.39504,
            "250": 6.34376,
            "255": 6.45426,
            "260": 6.35163,
            "265": 6.30593,
            "270": 6.2449,
            "275": 6.24094,
            "280": 6.22016,
            "285": 6.22853,
            "290": 6.18891,
            "295": 6.13549,
            "300": 6.12886,
            "305": 6.05304,
            "310": 6.11723,
            "315": 6.08704,
            "320": 5.99557,
            "325": 5.9427,
            "330": 5.9916,
            "335": 6.01094,
            "340": 5.943,
            "345": 5.92627,
            "350": 5.91033,
            "355": 5.8613,
            "360": 5.90035,
            "365": 5.86533,
            "370": 5.83734,
            "375": 5.87837,
            "380": 5.84845,
            "385": 5.78602,
            "390": 5.79911,
            "395": 5.69522,
            "400": 5.66258,
            "405": 5.6747,
            "410": 5.66221,
            "415": 5.69803,
            "420": 5.67149,
            "425": 5.68823,
            "430": 5.63923,
            "435": 5.565,
            "440": 5.61082,
            "445": 5.52916,
            "450": 5.60319,
            "455": 5.51312,
            "460": 5.4984,
            "465": 5.59438,
            "470": 5.56489,
            "475": 5.48861,
            "480": 5.48945,
            "485": 5.50778,
            "490": 5.48381,
            "495": 5.46072,
            "500": 5.40169,
            "505": 5.35826,
            "510": 5.45831,
            "515": 5.42488,
            "520": 5.44311,
            "525": 5.28781,
            "530": 5.32698,
            "535": 5.32999,
            "540": 5.3123,
            "545": 5.37638,
            "550": 5.35248,
            "555": 5.21556,
            "560": 5.32644,
            "565": 5.27412,
            "570": 5.27391,
            "575": 5.30158,
            "580": 5.2278,
            "585": 5.19805,
            "590": 5.20458,
            "595": 5.22492,
            "600": 5.26217,
            "605": 5.22824,
            "610": 5.21735,
            "615": 5.16884,
            "620": 5.17672,
            "625": 5.19402,
            "630": 5.1403,
            "635": 5.13381,
            "640": 5.08111,
            "645": 5.11608,
            "650": 5.12425,
            "655": 5.11953,
            "660": 5.0382,
            "665": 5.08316,
            "670": 5.02529,
            "675": 5.02258,
            "680": 5.01892,
            "685": 4.97919,
            "690": 4.99517,
            "695": 4.96847,
            "700": 4.95202,
            "705": 4.94585,
            "710": 4.95931,
            "715": 4.86635,
            "720": 4.85227,
            "725": 4.81006,
            "730": 4.85128,
            "735": 4.8154,
            "740": 4.83909,
            "745": 4.69365,
            "750": 4.74319,
            "755": 4.77271,
            "760": 4.76402,
            "765": 4.73205,
            "770": 4.67087,
            "775": 4.65419,
            "780": 4.67296,
            "785": 4.76815,
            "790": 4.65704,
            "795": 4.66153,
            "800": 4.61386,
            "805": 4.59823,
            "810": 4.62179,
            "815": 4.57296,
            "820": 4.60773,
            "825": 4.57845,
            "830": 4.56525,
            "835": 4.5459,
            "840": 4.45395,
            "845": 4.46403,
            "850": 4.43813,
            "855": 4.49986,
            "860": 4.43175,
            "865": 4.4862,
            "870": 4.45556,
            "875": 4.34881,
            "880": 4.40215,
            "885": 4.37846,
            "890": 4.41409,
            "895": 4.4217,
            "900": 4.37113,
            "905": 4.32814,
            "910": 4.35962,
            "915": 4.3318,
            "920": 4.37059,
            "925": 4.37232,
            "930": 4.2975,
            "935": 4.28259,
            "940": 4.34895,
            "945": 4.29285,
            "950": 4.35589,
            "955": 4.2374,
            "960": 4.17782,
            "965": 4.28699,
            "970": 4.2605,
            "975": 4.22283,
            "980": 4.21392,
            "985": 4.15862,
            "990": 4.14997,
            "995": 4.17141,
            "1000": 4.24118,
            "1005": 4.17774,
            "1010": 4.1699,
            "1015": 4.12546,
            "1020": 4.14741,
            "1025": 4.21766,
            "1030": 4.1176,
            "1035": 4.09191,
            "1040": 4.09265,
            "1045": 4.09146,
            "1050": 4.13107,
            "1055": 4.08531,
            "1060": 4.09951,
            "1065": 4.06018,
            "1070": 4.05114,
            "1075": 4.05617,
            "1080": 4.05555,
            "1085": 4.08029,
            "1090": 4.0071,
            "1095": 4.0907,
            "1100": 4.05758,
            "1105": 4.07469,
            "1110": 4.02177,
            "1115": 4.01434,
            "1120": 3.99358,
            "1125": 3.98306,
            "1130": 4.05322,
            "1135": 4.0092,
            "1140": 3.99072,
            "1145": 3.92195,
            "1150": 4.03579,
            "1155": 3.99701,
            "1160": 3.97067,
            "1165": 3.86814,
            "1170": 3.92222,
            "1175": 3.92804,
            "1180": 3.95933,
            "1185": 3.96391,
            "1190": 3.92944,
            "1195": 3.93933,
            "1200": 3.8774,
            "1205": 3.85232,
            "1210": 3.98673,
            "1215": 3.83918,
            "1220": 3.84435,
            "1225": 3.80402,
            "1230": 3.90757,
            "1235": 3.88788,
            "1240": 3.86858,
            "1245": 3.79653,
            "1250": 3.8226,
            "1255": 3.86487,
            "1260": 3.88944,
            "1265": 3.8054,
            "1270": 3.8725,
            "1275": 3.84069,
            "1280": 3.8272,
            "1285": 3.84101,
            "1290": 3.86734,
            "1295": 3.84344,
            "1300": 3.81557,
            "1305": 3.82235,
            "1310": 3.81946,
            "1315": 3.81858,
            "1320": 3.82101,
            "1325": 3.71406,
            "1330": 3.78974,
            "1335": 3.75815,
            "1340": 3.74693,
            "1345": 3.74855,
            "1350": 3.72971,
            "1355": 3.76468,
            "1360": 3.72924,
            "1365": 3.72618,
            "1370": 3.745,
            "1375": 3.74466,
            "1380": 3.75533,
            "1385": 3.74749,
            "1390": 3.66162,
            "1395": 3.74044,
            "1400": 3.73268,
            "1405": 3.66171,
            "1410": 3.675,
            "1415": 3.65416,
            "1420": 3.68871,
            "1425": 3.7079,
            "1430": 3.67178,
            "1435": 3.65934,
            "1440": 3.64023,
            "1445": 3.69206,
            "1450": 3.68166,
            "1455": 3.66296,
            "1460": 3.65139,
            "1465": 3.68083,
            "1470": 3.62366,
            "1475": 3.6855,
            "1480": 3.67027,
            "1485": 3.65899,
            "1490": 3.62592,
            "1495": 3.60214,
            "1500": 3.64218,
            "1505": 3.68503,
            "1510": 3.55738,
            "1515": 3.60821,
            "1520": 3.63821,
            "1525": 3.59664,
            "1530": 3.58203,
            "1535": 3.59168,
            "1540": 3.6284,
            "1545": 3.60374,
            "1550": 3.5589,
            "1555": 3.58528,
            "1560": 3.60845,
            "1565": 3.61971,
            "1570": 3.59263,
            "1575": 3.56569,
            "1580": 3.58413,
            "1585": 3.57017,
            "1590": 3.47604,
            "1595": 3.50827,
            "1600": 3.49282,
            "1605": 3.53851,
            "1610": 3.57015,
            "1615": 3.50241,
            "1620": 3.52246,
            "1625": 3.47492,
            "1630": 3.50009,
            "1635": 3.54559,
            "1640": 3.5397,
            "1645": 3.53653,
            "1650": 3.52508,
            "1655": 3.48883,
            "1660": 3.52548,
            "1665": 3.48788,
            "1670": 3.5136,
            "1675": 3.49709,
            "1680": 3.46921,
            "1685": 3.49078,
            "1690": 3.47354,
            "1695": 3.48898,
            "1700": 3.46982,
            "1705": 3.39023,
            "1710": 3.50693,
            "1715": 3.49841,
            "1720": 3.43695,
            "1725": 3.42268,
            "1730": 3.41636,
            "1735": 3.45692,
            "1740": 3.46242,
            "1745": 3.45952,
            "1750": 3.42376,
            "1755": 3.42622,
            "1760": 3.39111,
            "1765": 3.43137,
            "1770": 3.4422,
            "1775": 3.38271,
            "1780": 3.41979,
            "1785": 3.41667,
            "1790": 3.38863,
            "1795": 3.40825,
            "1800": 3.34462,
            "1805": 3.39529,
            "1810": 3.32295,
            "1815": 3.42738,
            "1820": 3.41333,
            "1825": 3.375,
            "1830": 3.33549,
            "1835": 3.43637,
            "1840": 3.40535,
            "1845": 3.42467,
            "1850": 3.38812,
            "1855": 3.36965,
            "1860": 3.33865,
            "1865": 3.38199,
            "1870": 3.30933,
            "1875": 3.43508,
            "1880": 3.3395,
            "1885": 3.35459,
            "1890": 3.33612,
            "1895": 3.39941,
            "1900": 3.37561,
            "1905": 3.30827,
            "1910": 3.32745,
            "1915": 3.30559,
            "1920": 3.35831,
            "1925": 3.33716,
            "1930": 3.32272,
            "1935": 3.32229,
            "1940": 3.38342,
            "1945": 3.27978,
            "1950": 3.40609,
            "1955": 3.28996,
            "1960": 3.28625,
            "1965": 3.27158,
            "1970": 3.29364,
            "1975": 3.3382,
            "1980": 3.33403,
            "1985": 3.24406,
            "1990": 3.30765,
            "1995": 3.27854,
            "2000": 3.27484,
            "2005": 3.26184,
            "2010": 3.26534,
            "2015": 3.21842,
            "2020": 3.27379,
            "2025": 3.26704,
            "2030": 3.28585,
            "2035": 3.30676,
            "2040": 3.25351,
            "2045": 3.24455,
            "2050": 3.27847,
            "2055": 3.32351,
            "2060": 3.28264,
            "2065": 3.24592,
            "2070": 3.28814,
            "2075": 3.23618,
            "2080": 3.23069,
            "2085": 3.27266,
            "2090": 3.14481,
            "2095": 3.27316,
            "2100": 3.239,
            "2105": 3.19514,
            "2110": 3.21434,
            "2115": 3.22623,
            "2120": 3.17749,
            "2125": 3.20847,
            "2130": 3.21248,
            "2135": 3.28745,
            "2140": 3.20611,
            "2145": 3.19872,
            "2150": 3.21887,
            "2155": 3.23068,
            "2160": 3.18815,
            "2165": 3.22776,
            "2170": 3.21354,
            "2175": 3.16799,
            "2180": 3.23529,
            "2185": 3.24865,
            "2190": 3.23197,
            "2195": 3.16199,
            "2200": 3.19947,
            "2205": 3.16674,
            "2210": 3.12196,
            "2215": 3.1938,
            "2220": 3.19794,
            "2225": 3.18644,
            "2230": 3.12773,
            "2235": 3.16594,
            "2240": 3.20496,
            "2245": 3.17049,
            "2250": 3.19927,
            "2255": 3.12229,
            "2260": 3.13331,
            "2265": 3.20933,
            "2270": 3.18357,
            "2275": 3.13915,
            "2280": 3.17223,
            "2285": 3.16447,
            "2290": 3.16657,
            "2295": 3.1927,
            "2300": 3.13208,
            "2305": 3.18964,
            "2310": 3.13932,
            "2315": 3.07633,
            "2320": 3.11821,
            "2325": 3.16623,
            "2330": 3.12536,
            "2335": 3.12162,
            "2340": 3.14297,
            "2345": 3.10963,
            "2350": 3.11784,
            "2355": 3.10625,
            "2360": 3.16104,
            "2365": 3.10584,
            "2370": 3.14549,
            "2375": 3.17332,
            "2380": 3.1178,
            "2385": 3.10236,
            "2390": 3.08947,
            "2395": 3.08137,
            "2400": 3.08073,
            "2405": 3.08587,
            "2410": 3.08314,
            "2415": 3.0791,
            "2420": 3.07113,
            "2425": 3.07964,
            "2430": 3.07021,
            "2435": 3.07029,
            "2440": 3.08337,
            "2445": 3.04968,
            "2450": 3.12731,
            "2455": 3.15086,
            "2460": 3.07874,
            "2465": 3.08421,
            "2470": 3.06043,
            "2475": 3.06934,
            "2480": 3.09621,
            "2485": 3.04953,
            "2490": 3.04445,
            "2495": 3.05866,
            "2500": 3.05497,
            "2505": 3.08808,
            "2510": 3.15669,
            "2515": 3.0582,
            "2520": 3.10236,
            "2525": 3.021,
            "2530": 3.03681,
            "2535": 3.08312,
            "2540": 3.06587,
            "2545": 3.05372,
            "2550": 2.99619,
            "2555": 3.06456,
            "2560": 3.03578,
            "2565": 3.09463,
            "2570": 3.00563,
            "2575": 3.04436,
            "2580": 3.084,
            "2585": 3.01523,
            "2590": 3.06732,
            "2595": 3.00023,
            "2600": 3.08861,
            "2605": 3.14402,
            "2610": 3.08533,
            "2615": 3.07656,
            "2620": 3.00603,
            "2625": 3.01343,
            "2630": 3.03246,
            "2635": 3.0523,
            "2640": 3.00846,
            "2645": 3.05301,
            "2650": 3.01792,
            "2655": 2.98999,
            "2660": 3.01495,
            "2665": 3.04128,
            "2670": 2.9958,
            "2675": 2.9649,
            "2680": 2.99141,
            "2685": 2.99883,
            "2690": 2.99734,
            "2695": 2.98598,
            "2700": 3.03046,
            "2705": 2.9834,
            "2710": 2.96809,
            "2715": 2.96337,
            "2720": 3.0264,
            "2725": 2.98626,
            "2730": 3.03826,
            "2735": 3.02965,
            "2740": 2.99202,
            "2745": 3.02839,
            "2750": 3.00926,
            "2755": 2.97903,
            "2760": 2.99403,
            "2765": 3.00181,
            "2770": 2.97472,
            "2775": 2.9923,
            "2780": 3.0061,
            "2785": 2.95659,
            "2790": 2.95657,
            "2795": 2.94303,
            "2800": 2.95448,
            "2805": 2.93332,
            "2810": 2.98427,
            "2815": 2.96695,
            "2820": 3.00231,
            "2825": 3.0147,
            "2830": 2.99007,
            "2835": 2.90983,
            "2840": 2.92834,
            "2845": 2.99026,
            "2850": 2.97232,
            "2855": 2.96179,
            "2860": 2.94476,
            "2865": 2.91085,
            "2870": 2.9888,
            "2875": 2.91323,
            "2880": 2.94565,
            "2885": 2.91637,
            "2890": 2.97445,
            "2895": 2.92644,
            "2900": 2.94865,
            "2905": 3.03353,
            "2910": 2.91723,
            "2915": 2.93144,
            "2920": 2.94797,
            "2925": 2.93537,
            "2930": 2.95144,
            "2935": 2.9369,
            "2940": 2.96325,
            "2945": 2.91214,
            "2950": 2.98922,
            "2955": 2.90926,
            "2960": 2.96584,
            "2965": 2.88683,
            "2970": 2.95718,
            "2975": 2.98277,
            "2980": 2.9367,
            "2985": 3.03343,
            "2990": 2.93541,
            "2995": 2.86261,
            "3000": 2.92534,
            "3005": 2.88407,
            "3010": 2.93414,
            "3015": 2.92309,
            "3020": 2.977,
            "3025": 2.95352,
            "3030": 2.93532,
            "3035": 2.95868,
            "3040": 2.91521,
            "3045": 2.83891,
            "3050": 2.8977,
            "3055": 2.88904,
            "3060": 2.92244,
            "3065": 2.91471,
            "3070": 2.91449,
            "3075": 2.88932,
            "3080": 2.92122,
            "3085": 2.89336,
            "3090": 2.92476,
            "3095": 2.92179,
            "3100": 2.86189,
            "3105": 2.92443,
            "3110": 2.89386,
            "3115": 2.93361,
            "3120": 2.95895,
            "3125": 2.85462,
            "3130": 2.92521,
            "3135": 2.93995,
            "3140": 2.87634,
            "3145": 2.91792,
            "3150": 2.85868,
            "3155": 2.84568,
            "3160": 2.84763,
            "3165": 2.84155,
            "3170": 2.88845,
            "3175": 2.91094,
            "3180": 2.8661,
            "3185": 2.89866,
            "3190": 2.91812,
            "3195": 2.92615,
            "3200": 2.95465,
            "3205": 2.86572,
            "3210": 2.87823,
            "3215": 2.91074,
            "3220": 2.86578,
            "3225": 2.86915,
            "3230": 2.80969,
            "3235": 2.86948,
            "3240": 2.86986,
            "3245": 2.89771,
            "3250": 2.85186,
            "3255": 2.84589,
            "3260": 2.85824,
            "3265": 2.87471,
            "3270": 2.84718,
            "3275": 2.86791,
            "3280": 2.79556,
            "3285": 2.8117,
            "3290": 2.86902,
            "3295": 2.89815,
            "3300": 2.87745,
            "3305": 2.86306,
            "3310": 2.85809,
            "3315": 2.81091,
            "3320": 2.8372,
            "3325": 2.84637,
            "3330": 2.83284,
            "3335": 2.84257,
            "3340": 2.82204,
            "3345": 2.84111,
            "3350": 2.84457,
            "3355": 2.85755,
            "3360": 2.79436,
            "3365": 2.8589,
            "3370": 2.85077,
            "3375": 2.83647,
            "3380": 2.84805,
            "3385": 2.88398,
            "3390": 2.87033,
            "3395": 2.80786,
            "3400": 2.78177,
            "3405": 2.83488,
            "3410": 2.85095,
            "3415": 2.86165,
            "3420": 2.82119,
            "3425": 2.8102,
            "3430": 2.8363,
            "3435": 2.89596,
            "3440": 2.8135,
            "3445": 2.86467,
            "3450": 2.81786,
            "3455": 2.7874,
            "3460": 2.81131,
            "3465": 2.84613,
            "3470": 2.83749,
            "3475": 2.77325,
            "3480": 2.84036,
            "3485": 2.82948,
            "3490": 2.89206,
            "3495": 2.85003,
            "3500": 2.83297,
            "3505": 2.82069,
            "3510": 2.81196,
            "3515": 2.83997,
            "3520": 2.77942,
            "3525": 2.80249,
            "3530": 2.85547,
            "3535": 2.78481,
            "3540": 2.83765,
            "3545": 2.80967,
            "3550": 2.79672,
            "3555": 2.82248,
            "3560": 2.82367,
            "3565": 2.8292,
            "3570": 2.80511,
            "3575": 2.80236,
            "3580": 2.81895,
            "3585": 2.83346,
            "3590": 2.83136,
            "3595": 2.77898,
            "3600": 2.74846,
            "3605": 2.79314,
            "3610": 2.84596,
            "3615": 2.7513,
            "3620": 2.80426,
            "3625": 2.88432,
            "3630": 2.77648,
            "3635": 2.78896,
            "3640": 2.78215,
            "3645": 2.7715,
            "3650": 2.80647,
            "3655": 2.82016,
            "3660": 2.7692,
            "3665": 2.78341,
            "3670": 2.76989,
            "3675": 2.77234,
            "3680": 2.8071,
            "3685": 2.80012,
            "3690": 2.80812,
            "3695": 2.81329,
            "3700": 2.79421,
            "3705": 2.78765,
            "3710": 2.7547,
            "3715": 2.80455,
            "3720": 2.79133,
            "3725": 2.79596,
            "3730": 2.83694,
            "3735": 2.79754,
            "3740": 2.75406,
            "3745": 2.79016,
            "3750": 2.80417,
            "3755": 2.7959,
            "3760": 2.75894,
            "3765": 2.75271,
            "3770": 2.76679,
            "3775": 2.77229,
            "3780": 2.75917,
            "3785": 2.7853,
            "3790": 2.74166,
            "3795": 2.79163,
            "3800": 2.8017,
            "3805": 2.74854,
            "3810": 2.80189,
            "3815": 2.76902,
            "3820": 2.79012,
            "3825": 2.73474,
            "3830": 2.74634,
            "3835": 2.81817,
            "3840": 2.72688,
            "3845": 2.72236,
            "3850": 2.77266,
            "3855": 2.72027,
            "3860": 2.80478,
            "3865": 2.75292,
            "3870": 2.77339,
            "3875": 2.75733,
            "3880": 2.78558,
            "3885": 2.78699,
            "3890": 2.74303,
            "3895": 2.7996,
            "3900": 2.7629,
            "3905": 2.72091,
            "3910": 2.74733,
            "3915": 2.758,
            "3920": 2.7967,
            "3925": 2.77615,
            "3930": 2.71012,
            "3935": 2.73961,
            "3940": 2.75239,
            "3945": 2.74262,
            "3950": 2.73683,
            "3955": 2.78055,
            "3960": 2.76323,
            "3965": 2.74184,
            "3970": 2.75793,
            "3975": 2.73131,
            "3980": 2.73729,
            "3985": 2.74485,
            "3990": 2.69287,
            "3995": 2.7834,
            "4000": 2.73648,
            "4005": 2.76984,
            "4010": 2.70933,
            "4015": 2.72406,
            "4020": 2.75112,
            "4025": 2.73253,
            "4030": 2.66097,
            "4035": 2.69403,
            "4040": 2.75338,
            "4045": 2.74949,
            "4050": 2.79346,
            "4055": 2.72174,
            "4060": 2.7159,
            "4065": 2.65176,
            "4070": 2.81153,
            "4075": 2.75849,
            "4080": 2.71956,
            "4085": 2.75067,
            "4090": 2.67987,
            "4095": 2.69015,
            "4100": 2.71058,
            "4105": 2.73706,
            "4110": 2.72798,
            "4115": 2.70336,
            "4120": 2.72875,
            "4125": 2.7011,
            "4130": 2.69669,
            "4135": 2.68835,
            "4140": 2.68559,
            "4145": 2.78223,
            "4150": 2.71009,
            "4155": 2.73996,
            "4160": 2.76212,
            "4165": 2.72153,
            "4170": 2.67355,
            "4175": 2.71994,
            "4180": 2.727,
            "4185": 2.72905,
            "4190": 2.73546,
            "4195": 2.69401,
            "4200": 2.70509,
            "4205": 2.74168,
            "4210": 2.67815,
            "4215": 2.66585,
            "4220": 2.65921,
            "4225": 2.70225,
            "4230": 2.72856,
            "4235": 2.73236,
            "4240": 2.70478,
            "4245": 2.69813,
            "4250": 2.70927,
            "4255": 2.65015,
            "4260": 2.72399,
            "4265": 2.73256,
            "4270": 2.72096,
            "4275": 2.68966,
            "4280": 2.70256,
            "4285": 2.7317,
            "4290": 2.68551,
            "4295": 2.69037,
            "4300": 2.69925,
            "4305": 2.69945,
            "4310": 2.72829,
            "4315": 2.71156,
            "4320": 2.69748,
            "4325": 2.7069,
            "4330": 2.70852,
            "4335": 2.69007,
            "4340": 2.69885,
            "4345": 2.72196,
            "4350": 2.67329,
            "4355": 2.6918,
            "4360": 2.71448,
            "4365": 2.78418,
            "4370": 2.73535,
            "4375": 2.74071,
            "4380": 2.70083,
            "4385": 2.69955,
            "4390": 2.7011,
            "4395": 2.75061,
            "4400": 2.66448,
            "4405": 2.66584,
            "4410": 2.68248,
            "4415": 2.70539,
            "4420": 2.70577,
            "4425": 2.72028,
            "4430": 2.69184,
            "4435": 2.68123,
            "4440": 2.69505,
            "4445": 2.67984,
            "4450": 2.65395,
            "4455": 2.66652,
            "4460": 2.68943,
            "4465": 2.69819,
            "4470": 2.67145,
            "4475": 2.68587,
            "4480": 2.65553,
            "4485": 2.69963,
            "4490": 2.65294,
            "4495": 2.71037,
            "4500": 2.70324,
            "4505": 2.69683,
            "4510": 2.64896,
            "4515": 2.69797,
            "4520": 2.66783,
            "4525": 2.67012,
            "4530": 2.67298,
            "4535": 2.66959,
            "4540": 2.70704,
            "4545": 2.65362,
            "4550": 2.70111,
            "4555": 2.68032,
            "4560": 2.65997,
            "4565": 2.6396,
            "4570": 2.63886,
            "4575": 2.66714,
            "4580": 2.68937,
            "4585": 2.68084,
            "4590": 2.61504,
            "4595": 2.66219,
            "4600": 2.67647,
            "4605": 2.68095,
            "4610": 2.66659,
            "4615": 2.66484,
            "4620": 2.65798,
            "4625": 2.71498,
            "4630": 2.67833,
            "4635": 2.64579,
            "4640": 2.69257,
            "4645": 2.64881,
            "4650": 2.69874,
            "4655": 2.70611,
            "4660": 2.67285,
            "4665": 2.68494,
            "4670": 2.67239,
            "4675": 2.68509,
            "4680": 2.66516,
            "4685": 2.6554,
            "4690": 2.70637,
            "4695": 2.65837,
            "4700": 2.67207,
            "4705": 2.6526,
            "4710": 2.67753,
            "4715": 2.65051,
            "4720": 2.72219,
            "4725": 2.62879,
            "4730": 2.65138,
            "4735": 2.6864,
            "4740": 2.64033,
            "4745": 2.64943,
            "4750": 2.63903,
            "4755": 2.6536,
            "4760": 2.66257,
            "4765": 2.64101,
            "4770": 2.62193,
            "4775": 2.65287,
            "4780": 2.6554,
            "4785": 2.69084,
            "4790": 2.64804,
            "4795": 2.66928,
            "4800": 2.62567,
            "4805": 2.63985,
            "4810": 2.661,
            "4815": 2.64809,
            "4820": 2.67082,
            "4825": 2.65124,
            "4830": 2.61626,
            "4835": 2.64733,
            "4840": 2.65426,
            "4845": 2.63739,
            "4850": 2.62545,
            "4855": 2.60194,
            "4860": 2.64972,
            "4865": 2.62682,
            "4870": 2.63799,
            "4875": 2.61811,
            "4880": 2.62652,
            "4885": 2.62543,
            "4890": 2.68013,
            "4895": 2.66042,
            "4900": 2.61535,
            "4905": 2.61912,
            "4910": 2.63653,
            "4915": 2.61524,
            "4920": 2.65467,
            "4925": 2.64999,
            "4930": 2.56915,
            "4935": 2.64924,
            "4940": 2.63292,
            "4945": 2.63792,
            "4950": 2.62476,
            "4955": 2.61732,
            "4960": 2.6167,
            "4965": 2.66171,
            "4970": 2.59848,
            "4975": 2.65287,
            "4980": 2.6187,
            "4985": 2.63335,
            "4990": 2.6586,
            "4995": 2.57974,
            "5000": 2.65898,
            "5005": 2.66578,
            "5010": 2.68301,
            "5015": 2.63322,
            "5020": 2.63924,
            "5025": 2.6866,
            "5030": 2.64456,
            "5035": 2.61559,
            "5040": 2.62024,
            "5045": 2.60287,
            "5050": 2.6231,
            "5055": 2.64947,
            "5060": 2.6452,
            "5065": 2.6881,
            "5070": 2.60595,
            "5075": 2.61447,
            "5080": 2.60665,
            "5085": 2.60357,
            "5090": 2.59045,
            "5095": 2.64908,
            "5100": 2.64606,
            "5105": 2.61042,
            "5110": 2.66253,
            "5115": 2.61674,
            "5120": 2.6721,
            "5125": 2.62882,
            "5130": 2.6133,
            "5135": 2.61257,
            "5140": 2.57323,
            "5145": 2.62739,
            "5150": 2.63655,
            "5155": 2.61744,
            "5160": 2.6636,
            "5165": 2.58271,
            "5170": 2.58972,
            "5175": 2.61655,
            "5180": 2.60404,
            "5185": 2.61939,
            "5190": 2.62404,
            "5195": 2.66753,
            "5200": 2.5989,
            "5205": 2.60446,
            "5210": 2.60475,
            "5215": 2.64605,
            "5220": 2.58627,
            "5225": 2.5519,
            "5230": 2.6321,
            "5235": 2.61744,
            "5240": 2.64291,
            "5245": 2.63621,
            "5250": 2.60218,
            "5255": 2.6186,
            "5260": 2.55727,
            "5265": 2.59614,
            "5270": 2.58806,
            "5275": 2.61658,
            "5280": 2.60897,
            "5285": 2.60361,
            "5290": 2.63324,
            "5295": 2.62086,
            "5300": 2.57816,
            "5305": 2.59805,
            "5310": 2.61289,
            "5315": 2.5858,
            "5320": 2.61592,
            "5325": 2.6441,
            "5330": 2.60238,
            "5335": 2.58083,
            "5340": 2.56386,
            "5345": 2.65745,
            "5350": 2.62017,
            "5355": 2.5774,
            "5360": 2.59964,
            "5365": 2.62005,
            "5370": 2.61244,
            "5375": 2.62598,
            "5380": 2.57586,
            "5385": 2.56169,
            "5390": 2.58533,
            "5395": 2.61918,
            "5400": 2.60517,
            "5405": 2.54618,
            "5410": 2.60967,
            "5415": 2.595,
            "5420": 2.61096,
            "5425": 2.62258,
            "5430": 2.62722,
            "5435": 2.5764,
            "5440": 2.58535,
            "5445": 2.62872,
            "5450": 2.64623,
            "5455": 2.609,
            "5460": 2.59057,
            "5465": 2.60606,
            "5470": 2.59632,
            "5475": 2.62477,
            "5480": 2.58787,
            "5485": 2.58859,
            "5490": 2.57747,
            "5495": 2.57015,
            "5500": 2.56782,
            "5505": 2.61543,
            "5510": 2.62353,
            "5515": 2.58253,
            "5520": 2.55721,
            "5525": 2.58387,
            "5530": 2.66211,
            "5535": 2.62005,
            "5540": 2.57052,
            "5545": 2.59608,
            "5550": 2.54903,
            "5555": 2.571,
            "5560": 2.56272,
            "5565": 2.60562,
            "5570": 2.65097,
            "5575": 2.62977,
            "5580": 2.57224,
            "5585": 2.59618,
            "5590": 2.56443,
            "5595": 2.58267,
            "5600": 2.55357,
            "5605": 2.59978,
            "5610": 2.58046,
            "5615": 2.58067,
            "5620": 2.58075,
            "5625": 2.55005,
            "5630": 2.56908,
            "5635": 2.63034,
            "5640": 2.59338,
            "5645": 2.56893,
            "5650": 2.57608,
            "5655": 2.54821,
            "5660": 2.55792,
            "5665": 2.58591,
            "5670": 2.56547,
            "5675": 2.60454,
            "5680": 2.5262,
            "5685": 2.56681,
            "5690": 2.60176,
            "5695": 2.55937,
            "5700": 2.59684,
            "5705": 2.59636,
            "5710": 2.57638,
            "5715": 2.58318,
            "5720": 2.53262,
            "5725": 2.60064,
            "5730": 2.57304,
            "5735": 2.60717,
            "5740": 2.59194,
            "5745": 2.55793,
            "5750": 2.53836,
            "5755": 2.5567,
            "5760": 2.61443,
            "5765": 2.55588,
            "5770": 2.53851,
            "5775": 2.58782,
            "5780": 2.57719,
            "5785": 2.53776,
            "5790": 2.56308,
            "5795": 2.60024,
            "5800": 2.54175,
            "5805": 2.53318,
            "5810": 2.55626,
            "5815": 2.5244,
            "5820": 2.59728,
            "5825": 2.50527,
            "5830": 2.49522,
            "5835": 2.59468,
            "5840": 2.53947,
            "5845": 2.55127,
            "5850": 2.61142,
            "5855": 2.50793,
            "5860": 2.55927,
            "5865": 2.51711,
            "5870": 2.57272,
            "5875": 2.60591,
            "5880": 2.58271,
            "5885": 2.56594,
            "5890": 2.58158,
            "5895": 2.55213,
            "5900": 2.61216,
            "5905": 2.55585,
            "5910": 2.59736,
            "5915": 2.61237,
            "5920": 2.58654,
            "5925": 2.54725,
            "5930": 2.62155,
            "5935": 2.58179,
            "5940": 2.58196,
            "5945": 2.52214,
            "5950": 2.55771,
            "5955": 2.58453,
            "5960": 2.56145,
            "5965": 2.61505,
            "5970": 2.54895,
            "5975": 2.57663,
            "5980": 2.55629,
            "5985": 2.55842,
            "5990": 2.55443,
            "5995": 2.55641,
            "6000": 2.55159,
            "6005": 2.52054,
            "6010": 2.55823,
            "6015": 2.52099,
            "6020": 2.53292,
            "6025": 2.55713,
            "6030": 2.60153,
            "6035": 2.54241,
            "6040": 2.54726,
            "6045": 2.48874,
            "6050": 2.59262,
            "6055": 2.51567,
            "6060": 2.54341,
            "6065": 2.52354,
            "6070": 2.52746,
            "6075": 2.53647,
            "6080": 2.53381,
            "6085": 2.59542,
            "6090": 2.56894,
            "6095": 2.53307,
            "6100": 2.54002,
            "6105": 2.52343,
            "6110": 2.5534,
            "6115": 2.58294,
            "6120": 2.55434,
            "6125": 2.53926,
            "6130": 2.47204,
            "6135": 2.55321,
            "6140": 2.5535,
            "6145": 2.55533,
            "6150": 2.52311,
            "6155": 2.50907,
            "6160": 2.53915,
            "6165": 2.57049,
            "6170": 2.54242,
            "6175": 2.59879,
            "6180": 2.50969,
            "6185": 2.54989,
            "6190": 2.49232,
            "6195": 2.57901,
            "6200": 2.55006,
            "6205": 2.53567,
            "6210": 2.51847,
            "6215": 2.51389,
            "6220": 2.56254,
            "6225": 2.51182,
            "6230": 2.50777,
            "6235": 2.55912,
            "6240": 2.54898,
            "6245": 2.52163,
            "6250": 2.53375,
            "6255": 2.5742,
            "6260": 2.52313,
            "6265": 2.57224,
            "6270": 2.52333,
            "6275": 2.5617,
            "6280": 2.52013,
            "6285": 2.51858,
            "6290": 2.51624,
            "6295": 2.50566,
            "6300": 2.55453,
            "6305": 2.52441,
            "6310": 2.51112,
            "6315": 2.53522,
            "6320": 2.48781,
            "6325": 2.5957,
            "6330": 2.55401,
            "6335": 2.50915,
            "6340": 2.50909,
            "6345": 2.55416,
            "6350": 2.55378,
            "6355": 2.52109,
            "6360": 2.51957,
            "6365": 2.48297,
            "6370": 2.53494,
            "6375": 2.49306,
            "6380": 2.55558,
            "6385": 2.57647,
            "6390": 2.50421,
            "6395": 2.54995,
            "6400": 2.50619,
            "6405": 2.52515,
            "6410": 2.5145,
            "6415": 2.523,
            "6420": 2.5404,
            "6425": 2.53299,
            "6430": 2.57518,
            "6435": 2.54369,
            "6440": 2.53393,
            "6445": 2.52557,
            "6450": 2.53037,
            "6455": 2.51956,
            "6460": 2.51424,
            "6465": 2.55742,
            "6470": 2.51587,
            "6475": 2.52288,
            "6480": 2.48485,
            "6485": 2.52568,
            "6490": 2.50531,
            "6495": 2.49851,
            "6500": 2.52156,
            "6505": 2.49322,
            "6510": 2.54091,
            "6515": 2.50769,
            "6520": 2.50847,
            "6525": 2.49126,
            "6530": 2.54068,
            "6535": 2.53196,
            "6540": 2.5298,
            "6545": 2.55935,
            "6550": 2.50141,
            "6555": 2.55393,
            "6560": 2.50851,
            "6565": 2.51974,
            "6570": 2.58456,
            "6575": 2.5206,
            "6580": 2.49613,
            "6585": 2.50468,
            "6590": 2.50784,
            "6595": 2.49551,
            "6600": 2.48492,
            "6605": 2.53761,
            "6610": 2.47813,
            "6615": 2.56643,
            "6620": 2.53331,
            "6625": 2.50918,
            "6630": 2.50884,
            "6635": 2.46955,
            "6640": 2.53625,
            "6645": 2.59366,
            "6650": 2.50727,
            "6655": 2.49491,
            "6660": 2.57096,
            "6665": 2.51861,
            "6670": 2.5669,
            "6675": 2.46451,
            "6680": 2.54552,
            "6685": 2.53303,
            "6690": 2.51066,
            "6695": 2.48338,
            "6700": 2.5201,
            "6705": 2.51556,
            "6710": 2.48936,
            "6715": 2.51426,
            "6720": 2.50704,
            "6725": 2.51943,
            "6730": 2.51595,
            "6735": 2.47971,
            "6740": 2.51256,
            "6745": 2.49237,
            "6750": 2.55432,
            "6755": 2.47171,
            "6760": 2.5398,
            "6765": 2.48511,
            "6770": 2.51673,
            "6775": 2.50938,
            "6780": 2.53691,
            "6785": 2.46942,
            "6790": 2.5404,
            "6795": 2.49747,
            "6800": 2.526,
            "6805": 2.50951,
            "6810": 2.50246,
            "6815": 2.51992,
            "6820": 2.48493,
            "6825": 2.50145,
            "6830": 2.53699,
            "6835": 2.50583,
            "6840": 2.5074,
            "6845": 2.52166,
            "6850": 2.47062,
            "6855": 2.50963,
            "6860": 2.50181,
            "6865": 2.48488,
            "6870": 2.54928,
            "6875": 2.47554,
            "6880": 2.55063,
            "6885": 2.47562,
            "6890": 2.54406,
            "6895": 2.49869,
            "6900": 2.48472,
            "6905": 2.49732,
            "6910": 2.51658,
            "6915": 2.51538,
            "6920": 2.52945,
            "6925": 2.53592,
            "6930": 2.48743,
            "6935": 2.51601,
            "6940": 2.49733,
            "6945": 2.45735,
            "6950": 2.48159,
            "6955": 2.52529,
            "6960": 2.51942,
            "6965": 2.49038,
            "6970": 2.46776,
            "6975": 2.51893,
            "6980": 2.45119,
            "6985": 2.51435,
            "6990": 2.52681,
            "6995": 2.46008,
            "7000": 2.4857,
            "7005": 2.4667,
            "7010": 2.47022,
            "7015": 2.51594,
            "7020": 2.46517,
            "7025": 2.44841,
            "7030": 2.48208,
            "7035": 2.47701,
            "7040": 2.50402,
            "7045": 2.51891,
            "7050": 2.52336,
            "7055": 2.4399,
            "7060": 2.47602,
            "7065": 2.48595,
            "7070": 2.49079,
            "7075": 2.49162,
            "7080": 2.53301,
            "7085": 2.48237,
            "7090": 2.473,
            "7095": 2.49968,
            "7100": 2.51235,
            "7105": 2.48475,
            "7110": 2.48398,
            "7115": 2.5008,
            "7120": 2.46802,
            "7125": 2.45993,
            "7130": 2.48174,
            "7135": 2.51009,
            "7140": 2.49524,
            "7145": 2.49547,
            "7150": 2.50838,
            "7155": 2.50549,
            "7160": 2.47267,
            "7165": 2.45441,
            "7170": 2.50323,
            "7175": 2.50138,
            "7180": 2.50287,
            "7185": 2.47888,
            "7190": 2.45754,
            "7195": 2.46282,
            "7200": 2.50582,
            "7205": 2.48673,
            "7210": 2.44143,
            "7215": 2.4774,
            "7220": 2.44056,
            "7225": 2.50992,
            "7230": 2.50819,
            "7235": 2.48137,
            "7240": 2.47521,
            "7245": 2.49699,
            "7250": 2.50435,
            "7255": 2.48907,
            "7260": 2.4559,
            "7265": 2.44754,
            "7270": 2.46985,
            "7275": 2.49711,
            "7280": 2.49068,
            "7285": 2.42028,
            "7290": 2.4765,
            "7295": 2.48576,
            "7300": 2.41537,
            "7305": 2.44172,
            "7310": 2.44612,
            "7315": 2.48688,
            "7320": 2.48061,
            "7325": 2.45471,
            "7330": 2.48741,
            "7335": 2.47103,
            "7340": 2.45999,
            "7345": 2.49065,
            "7350": 2.50759,
            "7355": 2.49204,
            "7360": 2.47672,
            "7365": 2.46675,
            "7370": 2.46718,
            "7375": 2.44736,
            "7380": 2.49247,
            "7385": 2.48245,
            "7390": 2.47143,
            "7395": 2.47076,
            "7400": 2.47826,
            "7405": 2.43636,
            "7410": 2.47594,
            "7415": 2.46762,
            "7420": 2.49047,
            "7425": 2.45309,
            "7430": 2.51923,
            "7435": 2.48835,
            "7440": 2.51712,
            "7445": 2.50649,
            "7450": 2.47023,
            "7455": 2.4505,
            "7460": 2.46232,
            "7465": 2.47224,
            "7470": 2.44654,
            "7475": 2.45263,
            "7480": 2.50696,
            "7485": 2.44731,
            "7490": 2.47151,
            "7495": 2.47897,
            "7500": 2.49176,
            "7505": 2.43714,
            "7510": 2.4327,
            "7515": 2.41782,
            "7520": 2.48958,
            "7525": 2.49304,
            "7530": 2.47239,
            "7535": 2.45795,
            "7540": 2.47005,
            "7545": 2.47142,
            "7550": 2.48676,
            "7555": 2.45126,
            "7560": 2.42516,
            "7565": 2.50493,
            "7570": 2.48155,
            "7575": 2.43534,
            "7580": 2.45593,
            "7585": 2.47794,
            "7590": 2.47723,
            "7595": 2.45983,
            "7600": 2.4619,
            "7605": 2.44428,
            "7610": 2.44666,
            "7615": 2.42334,
            "7620": 2.54396,
            "7625": 2.47914,
            "7630": 2.42239,
            "7635": 2.42463,
            "7640": 2.44931,
            "7645": 2.47013,
            "7650": 2.45978,
            "7655": 2.48267,
            "7660": 2.45011,
            "7665": 2.43052,
            "7670": 2.43917,
            "7675": 2.45127,
            "7680": 2.4822,
            "7685": 2.42789,
            "7690": 2.47748,
            "7695": 2.45207,
            "7700": 2.47852,
            "7705": 2.49472,
            "7710": 2.4906,
            "7715": 2.43857,
            "7720": 2.46528,
            "7725": 2.47882,
            "7730": 2.45384,
            "7735": 2.46895,
            "7740": 2.43524,
            "7745": 2.44409,
            "7750": 2.43493,
            "7755": 2.46349,
            "7760": 2.44734,
            "7765": 2.45062,
            "7770": 2.46527,
            "7775": 2.44968,
            "7780": 2.41367,
            "7785": 2.44115,
            "7790": 2.47809,
            "7795": 2.43666,
            "7800": 2.45672,
            "7805": 2.47745,
            "7810": 2.49856,
            "7815": 2.48425,
            "7820": 2.44372,
            "7825": 2.51041,
            "7830": 2.44874,
            "7835": 2.46354,
            "7840": 2.47479,
            "7845": 2.45731,
            "7850": 2.41361,
            "7855": 2.46795,
            "7860": 2.49498,
            "7865": 2.42043,
            "7870": 2.46323,
            "7875": 2.44232,
            "7880": 2.44989,
            "7885": 2.45571,
            "7890": 2.46629,
            "7895": 2.44233,
            "7900": 2.43437,
            "7905": 2.43129,
            "7910": 2.41956,
            "7915": 2.47833,
            "7920": 2.47132,
            "7925": 2.41801,
            "7930": 2.46833,
            "7935": 2.44775,
            "7940": 2.42007,
            "7945": 2.46791,
            "7950": 2.4404,
            "7955": 2.4146,
            "7960": 2.48514,
            "7965": 2.51375,
            "7970": 2.5174,
            "7975": 2.44637,
            "7980": 2.43831,
            "7985": 2.46457,
            "7990": 2.4281,
            "7995": 2.46689,
            "8000": 2.43426,
            "8005": 2.4158,
            "8010": 2.45426,
            "8015": 2.46634,
            "8020": 2.47876,
            "8025": 2.47029,
            "8030": 2.4497,
            "8035": 2.46716,
            "8040": 2.41566,
            "8045": 2.45052,
            "8050": 2.44515,
            "8055": 2.42304,
            "8060": 2.43892,
            "8065": 2.45749,
            "8070": 2.45333,
            "8075": 2.45444,
            "8080": 2.44209,
            "8085": 2.43781,
            "8090": 2.4222,
            "8095": 2.42035,
            "8100": 2.43701,
            "8105": 2.49226,
            "8110": 2.43554,
            "8115": 2.44107,
            "8120": 2.46446,
            "8125": 2.46397,
            "8130": 2.45005,
            "8135": 2.44859,
            "8140": 2.43719,
            "8145": 2.42285,
            "8150": 2.41929,
            "8155": 2.48306,
            "8160": 2.44961,
            "8165": 2.43799,
            "8170": 2.43144,
            "8175": 2.4191,
            "8180": 2.49169,
            "8185": 2.42127,
            "8190": 2.46436,
            "8195": 2.45449,
            "8200": 2.44219,
            "8205": 2.44226,
            "8210": 2.42747,
            "8215": 2.43706,
            "8220": 2.43181,
            "8225": 2.40713,
            "8230": 2.43716,
            "8235": 2.46065,
            "8240": 2.42535,
            "8245": 2.4454,
            "8250": 2.44079,
            "8255": 2.4331,
            "8260": 2.43059,
            "8265": 2.4271,
            "8270": 2.43176,
            "8275": 2.43875,
            "8280": 2.3943,
            "8285": 2.43559,
            "8290": 2.47695,
            "8295": 2.44448,
            "8300": 2.45474,
            "8305": 2.40521,
            "8310": 2.4311,
            "8315": 2.45391,
            "8320": 2.3955,
            "8325": 2.3912,
            "8330": 2.43328,
            "8335": 2.44265,
            "8340": 2.48794,
            "8345": 2.45065,
            "8350": 2.45145,
            "8355": 2.41099,
            "8360": 2.40381,
            "8365": 2.45603,
            "8370": 2.45802,
            "8375": 2.42948,
            "8380": 2.42394,
            "8385": 2.43031,
            "8390": 2.4431,
            "8395": 2.44939,
            "8400": 2.44584,
            "8405": 2.495,
            "8410": 2.44327,
            "8415": 2.43894,
            "8420": 2.42215,
            "8425": 2.44395,
            "8430": 2.46704,
            "8435": 2.41158,
            "8440": 2.4595,
            "8445": 2.46637,
            "8450": 2.41361,
            "8455": 2.46609,
            "8460": 2.46112,
            "8465": 2.44125,
            "8470": 2.41471,
            "8475": 2.48334,
            "8480": 2.41021,
            "8485": 2.42432,
            "8490": 2.47209,
            "8495": 2.44598,
            "8500": 2.45224,
            "8505": 2.42377,
            "8510": 2.41131,
            "8515": 2.43868,
            "8520": 2.43196,
            "8525": 2.49903,
            "8530": 2.38038,
            "8535": 2.40733,
            "8540": 2.49469,
            "8545": 2.3894,
            "8550": 2.44785,
            "8555": 2.45932,
            "8560": 2.47841,
            "8565": 2.43193,
            "8570": 2.44137,
            "8575": 2.45682,
            "8580": 2.44831,
            "8585": 2.42889,
            "8590": 2.41034,
            "8595": 2.43429,
            "8600": 2.42179,
            "8605": 2.5001,
            "8610": 2.4355,
            "8615": 2.39538,
            "8620": 2.45793,
            "8625": 2.43491,
            "8630": 2.46953,
            "8635": 2.46026,
            "8640": 2.44563,
            "8645": 2.48328,
            "8650": 2.4289,
            "8655": 2.46099,
            "8660": 2.46544,
            "8665": 2.39596,
            "8670": 2.4187,
            "8675": 2.43748,
            "8680": 2.45725,
            "8685": 2.43903,
            "8690": 2.41756,
            "8695": 2.44994,
            "8700": 2.44424,
            "8705": 2.43203,
            "8710": 2.43799,
            "8715": 2.45796,
            "8720": 2.4885,
            "8725": 2.42286,
            "8730": 2.40182,
            "8735": 2.44324,
            "8740": 2.43971,
            "8745": 2.40735,
            "8750": 2.44644,
            "8755": 2.43268,
            "8760": 2.40882,
            "8765": 2.44368,
            "8770": 2.41346,
            "8775": 2.44728,
            "8780": 2.42897,
            "8785": 2.48105,
            "8790": 2.42913,
            "8795": 2.4275,
            "8800": 2.42507,
            "8805": 2.41547,
            "8810": 2.41989,
            "8815": 2.48573,
            "8820": 2.46245,
            "8825": 2.43508,
            "8830": 2.39363,
            "8835": 2.43059,
            "8840": 2.4011,
            "8845": 2.43561,
            "8850": 2.4432,
            "8855": 2.41208,
            "8860": 2.43692,
            "8865": 2.4368,
            "8870": 2.44614,
            "8875": 2.44694,
            "8880": 2.41968,
            "8885": 2.40435,
            "8890": 2.45636,
            "8895": 2.43825,
            "8900": 2.42221,
            "8905": 2.41086,
            "8910": 2.40878,
            "8915": 2.42762,
            "8920": 2.44256,
            "8925": 2.47644,
            "8930": 2.42537,
            "8935": 2.42038,
            "8940": 2.39746,
            "8945": 2.40294,
            "8950": 2.42585,
            "8955": 2.40426,
            "8960": 2.4429,
            "8965": 2.42534,
            "8970": 2.41287,
            "8975": 2.48503,
            "8980": 2.44965,
            "8985": 2.38255,
            "8990": 2.41786,
            "8995": 2.42593,
            "9000": 2.46556,
            "9005": 2.42295,
            "9010": 2.38472,
            "9015": 2.41606,
            "9020": 2.40591,
            "9025": 2.37666,
            "9030": 2.41023,
            "9035": 2.43334,
            "9040": 2.43041,
            "9045": 2.42903,
            "9050": 2.40571,
            "9055": 2.42869,
            "9060": 2.43178,
            "9065": 2.41519,
            "9070": 2.45462,
            "9075": 2.40713,
            "9080": 2.44279,
            "9085": 2.42534,
            "9090": 2.42125,
            "9095": 2.40854,
            "9100": 2.41025,
            "9105": 2.367,
            "9110": 2.47453,
            "9115": 2.42448,
            "9120": 2.41281,
            "9125": 2.46625,
            "9130": 2.40301,
            "9135": 2.45728,
            "9140": 2.44304,
            "9145": 2.43611,
            "9150": 2.43471,
            "9155": 2.38526,
            "9160": 2.42692,
            "9165": 2.43405,
            "9170": 2.38388,
            "9175": 2.42789,
            "9180": 2.38445,
            "9185": 2.4481,
            "9190": 2.42039,
            "9195": 2.39805,
            "9200": 2.40085,
            "9205": 2.45829,
            "9210": 2.371,
            "9215": 2.47166,
            "9220": 2.45642,
            "9225": 2.39429,
            "9230": 2.45657,
            "9235": 2.40768,
            "9240": 2.41027,
            "9245": 2.44666,
            "9250": 2.43485,
            "9255": 2.43757,
            "9260": 2.39426,
            "9265": 2.44798,
            "9270": 2.44482,
            "9275": 2.40359,
            "9280": 2.39705,
            "9285": 2.43007,
            "9290": 2.41426,
            "9295": 2.39299,
            "9300": 2.43289,
            "9305": 2.41279,
            "9310": 2.423,
            "9315": 2.41942,
            "9320": 2.45204,
            "9325": 2.37844,
            "9330": 2.41244,
            "9335": 2.36996,
            "9340": 2.41408,
            "9345": 2.42371,
            "9350": 2.44769,
            "9355": 2.48475,
            "9360": 2.44528,
            "9365": 2.39717,
            "9370": 2.44369,
            "9375": 2.44154,
            "9380": 2.36344,
            "9385": 2.40893,
            "9390": 2.392,
            "9395": 2.39609,
            "9400": 2.45217,
            "9405": 2.42157,
            "9410": 2.40685,
            "9415": 2.4464,
            "9420": 2.45271,
            "9425": 2.44004,
            "9430": 2.45598,
            "9435": 2.42225,
            "9440": 2.48766,
            "9445": 2.38406,
            "9450": 2.403,
            "9455": 2.41116,
            "9460": 2.39528,
            "9465": 2.38808,
            "9470": 2.39304,
            "9475": 2.37421,
            "9480": 2.44231,
            "9485": 2.3953,
            "9490": 2.429,
            "9495": 2.39024,
            "9500": 2.37127,
            "9505": 2.43715,
            "9510": 2.40592,
            "9515": 2.43922,
            "9520": 2.4267,
            "9525": 2.39745,
            "9530": 2.46261,
            "9535": 2.4094,
            "9540": 2.42584,
            "9545": 2.38754,
            "9550": 2.43208,
            "9555": 2.40052,
            "9560": 2.43543,
            "9565": 2.42095,
            "9570": 2.3827,
            "9575": 2.421,
            "9580": 2.40541,
            "9585": 2.43312,
            "9590": 2.43539,
            "9595": 2.45662,
            "9600": 2.39949,
            "9605": 2.39163,
            "9610": 2.42764,
            "9615": 2.42109,
            "9620": 2.42093,
            "9625": 2.45314,
            "9630": 2.40624,
            "9635": 2.41073,
            "9640": 2.45484,
            "9645": 2.41927,
            "9650": 2.40829,
            "9655": 2.38,
            "9660": 2.43229,
            "9665": 2.39547,
            "9670": 2.38951,
            "9675": 2.36541,
            "9680": 2.40451,
            "9685": 2.40399,
            "9690": 2.47182,
            "9695": 2.3897,
            "9700": 2.38473,
            "9705": 2.39081,
            "9710": 2.37373,
            "9715": 2.39466,
            "9720": 2.44223,
            "9725": 2.44947,
            "9730": 2.43603,
            "9735": 2.39314,
            "9740": 2.38934,
            "9745": 2.43447,
            "9750": 2.40659,
            "9755": 2.41677,
            "9760": 2.41903,
            "9765": 2.3765,
            "9770": 2.45819,
            "9775": 2.40841,
            "9780": 2.37191,
            "9785": 2.40974,
            "9790": 2.41715,
            "9795": 2.36864,
            "9800": 2.40489,
            "9805": 2.41293,
            "9810": 2.41636,
            "9815": 2.38765,
            "9820": 2.38452,
            "9825": 2.41458,
            "9830": 2.42939,
            "9835": 2.39324,
            "9840": 2.42203,
            "9845": 2.36981,
            "9850": 2.40762,
            "9855": 2.40309,
            "9860": 2.39844,
            "9865": 2.38777,
            "9870": 2.39425,
            "9875": 2.38784,
            "9880": 2.45814,
            "9885": 2.40039,
            "9890": 2.36126,
            "9895": 2.32977,
            "9900": 2.40372,
            "9905": 2.43103,
            "9910": 2.36341,
            "9915": 2.37042,
            "9920": 2.41821,
            "9925": 2.40625,
            "9930": 2.39008,
            "9935": 2.35818,
            "9940": 2.39734,
            "9945": 2.38754,
            "9950": 2.4102,
            "9955": 2.45576,
            "9960": 2.43796,
            "9965": 2.36266,
            "9970": 2.41282,
            "9975": 2.39226,
            "9980": 2.34002,
            "9985": 2.4136,
            "9990": 2.40407,
            "9995": 2.40251,
            "10000": 2.37503,
            "10005": 2.38016,
            "10010": 2.38897,
            "10015": 2.45152,
            "10020": 2.37135,
            "10025": 2.39485,
            "10030": 2.39555,
            "10035": 2.41697,
            "10040": 2.41008,
            "10045": 2.39015,
            "10050": 2.3608,
            "10055": 2.37497,
            "10060": 2.42662,
            "10065": 2.381,
            "10070": 2.42931,
            "10075": 2.37803,
            "10080": 2.36864,
            "10085": 2.37765,
            "10090": 2.35414,
            "10095": 2.40824,
            "10100": 2.32065,
            "10105": 2.38938,
            "10110": 2.41723,
            "10115": 2.3932,
            "10120": 2.3649,
            "10125": 2.3777,
            "10130": 2.36629,
            "10135": 2.38894,
            "10140": 2.41886,
            "10145": 2.41313,
            "10150": 2.38364,
            "10155": 2.40171,
            "10160": 2.36831,
            "10165": 2.38929,
            "10170": 2.42951,
            "10175": 2.33254,
            "10180": 2.40139,
            "10185": 2.38961,
            "10190": 2.44963,
            "10195": 2.40903,
            "10200": 2.39636,
            "10205": 2.39541,
            "10210": 2.37545,
            "10215": 2.35138,
            "10220": 2.42686,
            "10225": 2.4374,
            "10230": 2.36151,
            "10235": 2.39413,
            "10240": 2.37745,
            "10245": 2.39586,
            "10250": 2.39498,
            "10255": 2.42063,
            "10260": 2.34179,
            "10265": 2.35595,
            "10270": 2.35696,
            "10275": 2.37742,
            "10280": 2.45578,
            "10285": 2.36405,
            "10290": 2.39142,
            "10295": 2.37906,
            "10300": 2.37381,
            "10305": 2.42262,
            "10310": 2.39511,
            "10315": 2.36789,
            "10320": 2.3721,
            "10325": 2.36743,
            "10330": 2.41904,
            "10335": 2.36838,
            "10340": 2.42816,
            "10345": 2.37986,
            "10350": 2.36588,
            "10355": 2.40402,
            "10360": 2.38027,
            "10365": 2.36618,
            "10370": 2.34647,
            "10375": 2.36068,
            "10380": 2.42359,
            "10385": 2.41044,
            "10390": 2.38547,
            "10395": 2.36378,
            "10400": 2.3835,
            "10405": 2.35412,
            "10410": 2.34482,
            "10415": 2.42134,
            "10420": 2.38596,
            "10425": 2.33277,
            "10430": 2.36637,
            "10435": 2.37673,
            "10440": 2.37865,
            "10445": 2.36491,
            "10450": 2.3669,
            "10455": 2.38595,
            "10460": 2.38657,
            "10465": 2.30837,
            "10470": 2.36369,
            "10475": 2.38619,
            "10480": 2.36847,
            "10485": 2.3661,
            "10490": 2.42007,
            "10495": 2.37239,
            "10500": 2.37043,
            "10505": 2.37665,
            "10510": 2.38893,
            "10515": 2.3786,
            "10520": 2.40715,
            "10525": 2.39544,
            "10530": 2.39751,
            "10535": 2.36068,
            "10540": 2.41035,
            "10545": 2.36388,
            "10550": 2.38204,
            "10555": 2.36345,
            "10560": 2.34633,
            "10565": 2.37874,
            "10570": 2.38037,
            "10575": 2.36022,
            "10580": 2.38294,
            "10585": 2.37306,
            "10590": 2.38416,
            "10595": 2.38213,
            "10600": 2.33865,
            "10605": 2.37753,
            "10610": 2.37058,
            "10615": 2.37039,
            "10620": 2.35512,
            "10625": 2.42587,
            "10630": 2.37959,
            "10635": 2.32939,
            "10640": 2.37025,
            "10645": 2.42582,
            "10650": 2.36625,
            "10655": 2.31323,
            "10660": 2.35436,
            "10665": 2.40571,
            "10670": 2.32294,
            "10675": 2.42284,
            "10680": 2.36013,
            "10685": 2.29341,
            "10690": 2.38755,
            "10695": 2.33602,
            "10700": 2.39021,
            "10705": 2.39103,
            "10710": 2.34839,
            "10715": 2.38791,
            "10720": 2.33122,
            "10725": 2.35822,
            "10730": 2.35545,
            "10735": 2.35889,
            "10740": 2.32328,
            "10745": 2.3436,
            "10750": 2.33874,
            "10755": 2.40853,
            "10760": 2.37013,
            "10765": 2.34207,
            "10770": 2.3764,
            "10775": 2.39275,
            "10780": 2.37572,
            "10785": 2.39828,
            "10790": 2.35249,
            "10795": 2.39336,
            "10800": 2.32898,
            "10805": 2.40193,
            "10810": 2.38061,
            "10815": 2.35979,
            "10820": 2.34858,
            "10825": 2.37631,
            "10830": 2.34337,
            "10835": 2.35361,
            "10840": 2.33499,
            "10845": 2.39216,
            "10850": 2.33795,
            "10855": 2.36908,
            "10860": 2.33711,
            "10865": 2.32743,
            "10870": 2.32848,
            "10875": 2.30977,
            "10880": 2.39976,
            "10885": 2.40825,
            "10890": 2.36578,
            "10895": 2.37644,
            "10900": 2.33747,
            "10905": 2.31849,
            "10910": 2.41102,
            "10915": 2.37581,
            "10920": 2.37944,
            "10925": 2.36681,
            "10930": 2.32353,
            "10935": 2.366,
            "10940": 2.35829,
            "10945": 2.35268,
            "10950": 2.36612,
            "10955": 2.36831,
            "10960": 2.31609,
            "10965": 2.36861,
            "10970": 2.36195,
            "10975": 2.41427,
            "10980": 2.37916,
            "10985": 2.34718,
            "10990": 2.40203,
            "10995": 2.36813,
            "11000": 2.3431,
            "11005": 2.36593,
            "11010": 2.34815,
            "11015": 2.33058,
            "11020": 2.33971,
            "11025": 2.37059,
            "11030": 2.34495,
            "11035": 2.32056,
            "11040": 2.32299,
            "11045": 2.32437,
            "11050": 2.32271,
            "11055": 2.29492,
            "11060": 2.34574,
            "11065": 2.31429,
            "11070": 2.3997,
            "11075": 2.32488,
            "11080": 2.35909,
            "11085": 2.34215,
            "11090": 2.35097,
            "11095": 2.37591,
            "11100": 2.33353,
            "11105": 2.32079,
            "11110": 2.36825,
            "11115": 2.37722,
            "11120": 2.38574,
            "11125": 2.32049,
            "11130": 2.35432,
            "11135": 2.33798,
            "11140": 2.3756,
            "11145": 2.35296,
            "11150": 2.39873,
            "11155": 2.34565,
            "11160": 2.36975,
            "11165": 2.36888,
            "11170": 2.34562,
            "11175": 2.33842,
            "11180": 2.377,
            "11185": 2.31587,
            "11190": 2.28274,
            "11195": 2.33321,
            "11200": 2.35058,
            "11205": 2.36546,
            "11210": 2.33584,
            "11215": 2.32266,
            "11220": 2.34696,
            "11225": 2.37525,
            "11230": 2.37025,
            "11235": 2.32278,
            "11240": 2.34703,
            "11245": 2.36117,
            "11250": 2.33601,
            "11255": 2.33861,
            "11260": 2.36043,
            "11265": 2.39195,
            "11270": 2.29246,
            "11275": 2.31756,
            "11280": 2.37339,
            "11285": 2.29474,
            "11290": 2.35128,
            "11295": 2.36883,
            "11300": 2.38525,
            "11305": 2.33833,
            "11310": 2.335,
            "11315": 2.30246,
            "11320": 2.30888,
            "11325": 2.31935,
            "11330": 2.35739,
            "11335": 2.34267,
            "11340": 2.31199,
            "11345": 2.31657,
            "11350": 2.29981,
            "11355": 2.32629,
            "11360": 2.35455,
            "11365": 2.29683,
            "11370": 2.35491,
            "11375": 2.33083,
            "11380": 2.34514,
            "11385": 2.35134,
            "11390": 2.33916,
            "11395": 2.2904,
            "11400": 2.31403,
            "11405": 2.35915,
            "11410": 2.35981,
            "11415": 2.3881,
            "11420": 2.35419,
            "11425": 2.3118,
            "11430": 2.37189,
            "11435": 2.36599,
            "11440": 2.35189,
            "11445": 2.36747,
            "11450": 2.32505,
            "11455": 2.31027,
            "11460": 2.35577,
            "11465": 2.34641,
            "11470": 2.37697,
            "11475": 2.31611,
            "11480": 2.32987,
            "11485": 2.3146,
            "11490": 2.34959,
            "11495": 2.41145,
            "11500": 2.34337,
            "11505": 2.35276,
            "11510": 2.36718,
            "11515": 2.32509,
            "11520": 2.30839,
            "11525": 2.36337,
            "11530": 2.3194,
            "11535": 2.32609,
            "11540": 2.35006,
            "11545": 2.34693,
            "11550": 2.36786,
            "11555": 2.32993,
            "11560": 2.3536,
            "11565": 2.34458,
            "11570": 2.3536,
            "11575": 2.3011,
            "11580": 2.33205,
            "11585": 2.35602,
            "11590": 2.36689,
            "11595": 2.33751,
            "11600": 2.36086,
            "11605": 2.32743,
            "11610": 2.36471,
            "11615": 2.36332,
            "11620": 2.29974,
            "11625": 2.28036,
            "11630": 2.32987,
            "11635": 2.34628,
            "11640": 2.30899,
            "11645": 2.31323,
            "11650": 2.33117,
            "11655": 2.35585,
            "11660": 2.34024,
            "11665": 2.33286,
            "11670": 2.30287,
            "11675": 2.30075,
            "11680": 2.32815,
            "11685": 2.33901,
            "11690": 2.34716,
            "11695": 2.3197,
            "11700": 2.32709,
            "11705": 2.30459,
            "11710": 2.34705,
            "11715": 2.31826,
            "11720": 2.30389,
            "11725": 2.34199,
            "11730": 2.30779,
            "11735": 2.33209,
            "11740": 2.2753,
            "11745": 2.32144,
            "11750": 2.3336,
            "11755": 2.35456,
            "11760": 2.31473,
            "11765": 2.33841,
            "11770": 2.27811,
            "11775": 2.32897,
            "11780": 2.25799,
            "11785": 2.30024,
            "11790": 2.31493,
            "11795": 2.32371,
            "11800": 2.33793,
            "11805": 2.30621,
            "11810": 2.30786,
            "11815": 2.3343,
            "11820": 2.32101,
            "11825": 2.36172,
            "11830": 2.32071,
            "11835": 2.34063,
            "11840": 2.34274,
            "11845": 2.32181,
            "11850": 2.30581,
            "11855": 2.31644,
            "11860": 2.34571,
            "11865": 2.36074,
            "11870": 2.38141,
            "11875": 2.2838,
            "11880": 2.29616,
            "11885": 2.34139,
            "11890": 2.29488,
            "11895": 2.29572,
            "11900": 2.33763,
            "11905": 2.32182,
            "11910": 2.28009,
            "11915": 2.31065,
            "11920": 2.33704,
            "11925": 2.30583,
            "11930": 2.30791,
            "11935": 2.31945,
            "11940": 2.31995,
            "11945": 2.34572,
            "11950": 2.30214,
            "11955": 2.31571,
            "11960": 2.33963,
            "11965": 2.30105,
            "11970": 2.30187,
            "11975": 2.3488,
            "11980": 2.31378,
            "11985": 2.28272,
            "11990": 2.30796,
            "11995": 2.33162,
            "12000": 2.32448,
            "12005": 2.32719,
            "12010": 2.29018,
            "12015": 2.31336,
            "12020": 2.32951,
            "12025": 2.33643,
            "12030": 2.31428,
            "12035": 2.33848,
            "12040": 2.31854,
            "12045": 2.31434,
            "12050": 2.31215,
            "12055": 2.3352,
            "12060": 2.30159,
            "12065": 2.33219,
            "12070": 2.30837,
            "12075": 2.28037,
            "12080": 2.35467,
            "12085": 2.34152,
            "12090": 2.33354,
            "12095": 2.28306,
            "12100": 2.31673,
            "12105": 2.31069,
            "12110": 2.33069,
            "12115": 2.30641,
            "12120": 2.30704,
            "12125": 2.29609,
            "12130": 2.30622,
            "12135": 2.33037,
            "12140": 2.29688,
            "12145": 2.25958,
            "12150": 2.26153,
            "12155": 2.34294,
            "12160": 2.35915,
            "12165": 2.32051,
            "12170": 2.334,
            "12175": 2.34314,
            "12180": 2.3318,
            "12185": 2.34303,
            "12190": 2.33577,
            "12195": 2.30093,
            "12200": 2.30246,
            "12205": 2.32658,
            "12210": 2.35972,
            "12215": 2.30653,
            "12220": 2.30191,
            "12225": 2.24709,
            "12230": 2.33677,
            "12235": 2.34065,
            "12240": 2.32782,
            "12245": 2.2935,
            "12250": 2.27763,
            "12255": 2.3395,
            "12260": 2.31679,
            "12265": 2.34478,
            "12270": 2.31554,
            "12275": 2.31677,
            "12280": 2.32253,
            "12285": 2.28952,
            "12290": 2.31433,
            "12295": 2.26884,
            "12300": 2.33108,
            "12305": 2.27093,
            "12310": 2.28974,
            "12315": 2.35756,
            "12320": 2.29797,
            "12325": 2.32217,
            "12330": 2.30328,
            "12335": 2.32209,
            "12340": 2.34279,
            "12345": 2.37023,
            "12350": 2.34617,
            "12355": 2.30866,
            "12360": 2.31576,
            "12365": 2.33299,
            "12370": 2.29597,
            "12375": 2.30396,
            "12380": 2.30039,
            "12385": 2.29488,
            "12390": 2.25304,
            "12395": 2.30748,
            "12400": 2.30213,
            "12405": 2.31418,
            "12410": 2.30832,
            "12415": 2.2868,
            "12420": 2.32194,
            "12425": 2.30362,
            "12430": 2.31855,
            "12435": 2.30193,
            "12440": 2.33517,
            "12445": 2.3228,
            "12450": 2.30936,
            "12455": 2.24374,
            "12460": 2.33755,
            "12465": 2.36582,
            "12470": 2.27944,
            "12475": 2.27513,
            "12480": 2.29532,
            "12485": 2.30917,
            "12490": 2.33321,
            "12495": 2.27309,
            "12500": 2.32322,
            "12505": 2.33899,
            "12510": 2.35918,
            "12515": 2.27269,
            "12520": 2.32282,
            "12525": 2.28953,
            "12530": 2.32246,
            "12535": 2.27508,
            "12540": 2.28833,
            "12545": 2.29304,
            "12550": 2.31847,
            "12555": 2.32567,
            "12560": 2.30247,
            "12565": 2.33702,
            "12570": 2.27975,
            "12575": 2.30359,
            "12580": 2.31404,
            "12585": 2.29346,
            "12590": 2.33723,
            "12595": 2.32629,
            "12600": 2.28432,
            "12605": 2.32186,
            "12610": 2.36725,
            "12615": 2.30853,
            "12620": 2.33633,
            "12625": 2.33404,
            "12630": 2.30017,
            "12635": 2.33795,
            "12640": 2.29794,
            "12645": 2.28149,
            "12650": 2.32885,
            "12655": 2.26803,
            "12660": 2.34509,
            "12665": 2.32031,
            "12670": 2.31308,
            "12675": 2.32177,
            "12680": 2.2781,
            "12685": 2.36828,
            "12690": 2.30518,
            "12695": 2.33467,
            "12700": 2.29576,
            "12705": 2.30963,
            "12710": 2.31082,
            "12715": 2.2908,
            "12720": 2.3163,
            "12725": 2.27788,
            "12730": 2.34119,
            "12735": 2.3003,
            "12740": 2.34099,
            "12745": 2.28836,
            "12750": 2.27593,
            "12755": 2.28477,
            "12760": 2.27021,
            "12765": 2.34085,
            "12770": 2.32873,
            "12775": 2.26616,
            "12780": 2.32288,
            "12785": 2.30308,
            "12790": 2.3082,
            "12795": 2.32298,
            "12800": 2.29883,
            "12805": 2.31865,
            "12810": 2.28331,
            "12815": 2.30037,
            "12820": 2.32651,
            "12825": 2.32484,
            "12830": 2.29196,
            "12835": 2.2712,
            "12840": 2.27357,
            "12845": 2.31533,
            "12850": 2.27968,
            "12855": 2.2743,
            "12860": 2.27381,
            "12865": 2.31935,
            "12870": 2.2679,
            "12875": 2.34222,
            "12880": 2.32036,
            "12885": 2.28574,
            "12890": 2.31145,
            "12895": 2.24813,
            "12900": 2.32757,
            "12905": 2.31794,
            "12910": 2.29019,
            "12915": 2.28757,
            "12920": 2.30176,
            "12925": 2.30293,
            "12930": 2.27492,
            "12935": 2.24416,
            "12940": 2.26089,
            "12945": 2.31287,
            "12950": 2.28712,
            "12955": 2.32917,
            "12960": 2.31954,
            "12965": 2.29119,
            "12970": 2.27432,
            "12975": 2.27007,
            "12980": 2.33407,
            "12985": 2.28074,
            "12990": 2.2834,
            "12995": 2.27493,
            "13000": 2.25272
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 13000,
        "step_interval": 5,
        "values": {
            "1": 956238080.0,
            "5": 967340032.0,
            "10": 971387136.0,
            "15": 946583872.0,
            "20": 958988544.0,
            "25": 1069888064.0,
            "30": 1195545216.0,
            "35": 1265919104.0,
            "40": 1225385472.0,
            "45": 1143984000.0,
            "50": 1123953792.0,
            "55": 1097520640.0,
            "60": 1062098688.0,
            "65": 1045100544.0,
            "70": 1013369024.0,
            "75": 1002651456.0,
            "80": 1017073408.0,
            "85": 1010896064.0,
            "90": 990290304.0,
            "95": 963963648.0,
            "100": 977244864.0,
            "105": 986322432.0,
            "110": 979685888.0,
            "115": 981374592.0,
            "120": 963850304.0,
            "125": 943482752.0,
            "130": 976245632.0,
            "135": 967085504.0,
            "140": 963599104.0,
            "145": 977430784.0,
            "150": 922111680.0,
            "155": 968683328.0,
            "160": 956706432.0,
            "165": 960015936.0,
            "170": 974511872.0,
            "175": 949081792.0,
            "180": 946759872.0,
            "185": 972029696.0,
            "190": 969055616.0,
            "195": 985147648.0,
            "200": 945774912.0,
            "205": 958352384.0,
            "210": 979447552.0,
            "215": 967492032.0,
            "220": 956426880.0,
            "225": 962403968.0,
            "230": 948176128.0,
            "235": 965226688.0,
            "240": 966071360.0,
            "245": 969163776.0,
            "250": 974432512.0,
            "255": 925067072.0,
            "260": 965635840.0,
            "265": 970669952.0,
            "270": 959135488.0,
            "275": 954000256.0,
            "280": 963429120.0,
            "285": 945788160.0,
            "290": 974126784.0,
            "295": 966698112.0,
            "300": 967156352.0,
            "305": 964511808.0,
            "310": 940348864.0,
            "315": 967400384.0,
            "320": 969008576.0,
            "325": 980561216.0,
            "330": 972092288.0,
            "335": 946870592.0,
            "340": 966600576.0,
            "345": 973021056.0,
            "350": 973923136.0,
            "355": 963257600.0,
            "360": 948344576.0,
            "365": 964819904.0,
            "370": 962946624.0,
            "375": 958442880.0,
            "380": 947151296.0,
            "385": 955998848.0,
            "390": 945404416.0,
            "395": 970418944.0,
            "400": 979779136.0,
            "405": 968349248.0,
            "410": 970067008.0,
            "415": 953161536.0,
            "420": 943564992.0,
            "425": 954775168.0,
            "430": 962666112.0,
            "435": 977082048.0,
            "440": 954810112.0,
            "445": 971891200.0,
            "450": 963511232.0,
            "455": 973135936.0,
            "460": 983716480.0,
            "465": 945283008.0,
            "470": 942053952.0,
            "475": 967004544.0,
            "480": 966101760.0,
            "485": 976409792.0,
            "490": 962541952.0,
            "495": 945466304.0,
            "500": 964461504.0,
            "505": 986015680.0,
            "510": 965678144.0,
            "515": 943414080.0,
            "520": 945023552.0,
            "525": 971265600.0,
            "530": 971890944.0,
            "535": 979139904.0,
            "540": 969535488.0,
            "545": 954128896.0,
            "550": 951268864.0,
            "555": 987222080.0,
            "560": 960432128.0,
            "565": 966618752.0,
            "570": 975727808.0,
            "575": 927225344.0,
            "580": 970699200.0,
            "585": 961180864.0,
            "590": 972971072.0,
            "595": 963684608.0,
            "600": 937080064.0,
            "605": 951475072.0,
            "610": 963360768.0,
            "615": 970011584.0,
            "620": 976474304.0,
            "625": 949584512.0,
            "630": 954446592.0,
            "635": 986046080.0,
            "640": 980983168.0,
            "645": 955012928.0,
            "650": 958553792.0,
            "655": 951656512.0,
            "660": 961046016.0,
            "665": 967560320.0,
            "670": 962522496.0,
            "675": 968339520.0,
            "680": 965623104.0,
            "685": 962871360.0,
            "690": 961923584.0,
            "695": 954770176.0,
            "700": 970341312.0,
            "705": 945518784.0,
            "710": 943886144.0,
            "715": 973361344.0,
            "720": 968367616.0,
            "725": 978494976.0,
            "730": 952192512.0,
            "735": 948816064.0,
            "740": 955635584.0,
            "745": 975871552.0,
            "750": 981237760.0,
            "755": 962162368.0,
            "760": 951962368.0,
            "765": 967347712.0,
            "770": 976154176.0,
            "775": 970551040.0,
            "780": 977544192.0,
            "785": 931529728.0,
            "790": 960442048.0,
            "795": 964582592.0,
            "800": 967021440.0,
            "805": 962323648.0,
            "810": 940978496.0,
            "815": 949040832.0,
            "820": 953188160.0,
            "825": 954505152.0,
            "830": 976443904.0,
            "835": 956075968.0,
            "840": 948409856.0,
            "845": 965160128.0,
            "850": 966029504.0,
            "855": 960906944.0,
            "860": 976028736.0,
            "865": 938162112.0,
            "870": 966417792.0,
            "875": 972318848.0,
            "880": 963125504.0,
            "885": 967747776.0,
            "890": 949973248.0,
            "895": 960018048.0,
            "900": 974236608.0,
            "905": 963970048.0,
            "910": 958435200.0,
            "915": 956354752.0,
            "920": 943976576.0,
            "925": 960833920.0,
            "930": 978850304.0,
            "935": 971075776.0,
            "940": 960910144.0,
            "945": 945065728.0,
            "950": 957428160.0,
            "955": 979041664.0,
            "960": 983593216.0,
            "965": 966167488.0,
            "970": 951231552.0,
            "975": 961577728.0,
            "980": 968070720.0,
            "985": 968992768.0,
            "990": 984393024.0,
            "995": 953293440.0,
            "1000": 934779904.0,
            "1005": 960148352.0,
            "1010": 971540288.0,
            "1015": 985186304.0,
            "1020": 962782912.0,
            "1025": 935013120.0,
            "1030": 974679936.0,
            "1035": 964995008.0,
            "1040": 980467776.0,
            "1045": 960829376.0,
            "1050": 955202368.0,
            "1055": 957782592.0,
            "1060": 967751808.0,
            "1065": 967118208.0,
            "1070": 966603264.0,
            "1075": 950063424.0,
            "1080": 954511424.0,
            "1085": 967251008.0,
            "1090": 977138048.0,
            "1095": 961238464.0,
            "1100": 979612672.0,
            "1105": 953365568.0,
            "1110": 965954240.0,
            "1115": 966987456.0,
            "1120": 970352640.0,
            "1125": 965710784.0,
            "1130": 954943808.0,
            "1135": 965844160.0,
            "1140": 965175104.0,
            "1145": 970989056.0,
            "1150": 955558656.0,
            "1155": 930577344.0,
            "1160": 957775040.0,
            "1165": 978124608.0,
            "1170": 974303296.0,
            "1175": 973060928.0,
            "1180": 973080320.0,
            "1185": 947347520.0,
            "1190": 964794304.0,
            "1195": 953141632.0,
            "1200": 972847232.0,
            "1205": 988483584.0,
            "1210": 931126400.0,
            "1215": 968644928.0,
            "1220": 969165504.0,
            "1225": 975950912.0,
            "1230": 967337728.0,
            "1235": 943447168.0,
            "1240": 955857920.0,
            "1245": 981507520.0,
            "1250": 966114240.0,
            "1255": 973674176.0,
            "1260": 946497152.0,
            "1265": 963998336.0,
            "1270": 960488896.0,
            "1275": 973616576.0,
            "1280": 961115904.0,
            "1285": 957585216.0,
            "1290": 952528384.0,
            "1295": 971612736.0,
            "1300": 968865216.0,
            "1305": 963741120.0,
            "1310": 963336384.0,
            "1315": 943555136.0,
            "1320": 966307456.0,
            "1325": 989785088.0,
            "1330": 969508864.0,
            "1335": 972301312.0,
            "1340": 972270592.0,
            "1345": 960659136.0,
            "1350": 968641536.0,
            "1355": 955854144.0,
            "1360": 971822784.0,
            "1365": 960388032.0,
            "1370": 948792640.0,
            "1375": 973534080.0,
            "1380": 953469888.0,
            "1385": 969149696.0,
            "1390": 975719552.0,
            "1395": 931676416.0,
            "1400": 945855808.0,
            "1405": 976752576.0,
            "1410": 974511040.0,
            "1415": 967572800.0,
            "1420": 966748096.0,
            "1425": 937379392.0,
            "1430": 973914176.0,
            "1435": 978336320.0,
            "1440": 964179712.0,
            "1445": 958056512.0,
            "1450": 946147584.0,
            "1455": 983922368.0,
            "1460": 968652928.0,
            "1465": 948746496.0,
            "1470": 984244032.0,
            "1475": 943905600.0,
            "1480": 963975488.0,
            "1485": 957348928.0,
            "1490": 961262592.0,
            "1495": 980541184.0,
            "1500": 958332800.0,
            "1505": 942868800.0,
            "1510": 984180352.0,
            "1515": 959092928.0,
            "1520": 959104896.0,
            "1525": 952785152.0,
            "1530": 957745792.0,
            "1535": 949430720.0,
            "1540": 971088768.0,
            "1545": 963134464.0,
            "1550": 978667072.0,
            "1555": 952319808.0,
            "1560": 980089344.0,
            "1565": 967318144.0,
            "1570": 973843520.0,
            "1575": 975494400.0,
            "1580": 941863296.0,
            "1585": 970028736.0,
            "1590": 983821504.0,
            "1595": 948633600.0,
            "1600": 967447104.0,
            "1605": 952451840.0,
            "1610": 969619840.0,
            "1615": 983147136.0,
            "1620": 968020800.0,
            "1625": 970717568.0,
            "1630": 962888000.0,
            "1635": 942312064.0,
            "1640": 981610368.0,
            "1645": 973979456.0,
            "1650": 974182080.0,
            "1655": 967266112.0,
            "1660": 940687616.0,
            "1665": 961702720.0,
            "1670": 962901440.0,
            "1675": 971280704.0,
            "1680": 980880064.0,
            "1685": 944417472.0,
            "1690": 964689344.0,
            "1695": 965643264.0,
            "1700": 966343424.0,
            "1705": 985202240.0,
            "1710": 978352384.0,
            "1715": 943212864.0,
            "1720": 977088768.0,
            "1725": 965873088.0,
            "1730": 968971904.0,
            "1735": 965088192.0,
            "1740": 949713280.0,
            "1745": 970010048.0,
            "1750": 959683008.0,
            "1755": 960087424.0,
            "1760": 966380160.0,
            "1765": 951815360.0,
            "1770": 954665536.0,
            "1775": 973750656.0,
            "1780": 970533952.0,
            "1785": 968824832.0,
            "1790": 950235328.0,
            "1795": 945133568.0,
            "1800": 984667200.0,
            "1805": 987163392.0,
            "1810": 977767744.0,
            "1815": 948005376.0,
            "1820": 949208704.0,
            "1825": 978855168.0,
            "1830": 966361984.0,
            "1835": 964133568.0,
            "1840": 972318272.0,
            "1845": 935416064.0,
            "1850": 952499712.0,
            "1855": 980051136.0,
            "1860": 975869696.0,
            "1865": 958966336.0,
            "1870": 958947648.0,
            "1875": 932594688.0,
            "1880": 973575936.0,
            "1885": 978846272.0,
            "1890": 971360448.0,
            "1895": 959212416.0,
            "1900": 947394496.0,
            "1905": 981830720.0,
            "1910": 969124480.0,
            "1915": 970041280.0,
            "1920": 975598592.0,
            "1925": 960495360.0,
            "1930": 977921984.0,
            "1935": 963249792.0,
            "1940": 952459264.0,
            "1945": 981337728.0,
            "1950": 939172864.0,
            "1955": 960605120.0,
            "1960": 970032384.0,
            "1965": 981176640.0,
            "1970": 962044928.0,
            "1975": 952823552.0,
            "1980": 936849344.0,
            "1985": 975937600.0,
            "1990": 965965824.0,
            "1995": 962611456.0,
            "2000": 960554432.0,
            "2005": 954497664.0,
            "2010": 975581632.0,
            "2015": 991804288.0,
            "2020": 975434496.0,
            "2025": 974303168.0,
            "2030": 952084352.0,
            "2035": 967846400.0,
            "2040": 987459776.0,
            "2045": 976479808.0,
            "2050": 984701376.0,
            "2055": 942838080.0,
            "2060": 942594816.0,
            "2065": 966208896.0,
            "2070": 969623552.0,
            "2075": 980553664.0,
            "2080": 977598528.0,
            "2085": 939639040.0,
            "2090": 969873088.0,
            "2095": 961277824.0,
            "2100": 976718912.0,
            "2105": 972536000.0,
            "2110": 959902720.0,
            "2115": 956880896.0,
            "2120": 977481088.0,
            "2125": 962567168.0,
            "2130": 979619200.0,
            "2135": 950537472.0,
            "2140": 946994944.0,
            "2145": 962275776.0,
            "2150": 973404800.0,
            "2155": 972691712.0,
            "2160": 970314560.0,
            "2165": 948642688.0,
            "2170": 961543744.0,
            "2175": 969378496.0,
            "2180": 969328832.0,
            "2185": 947448512.0,
            "2190": 940480576.0,
            "2195": 986085824.0,
            "2200": 961860928.0,
            "2205": 978924032.0,
            "2210": 964101632.0,
            "2215": 963501312.0,
            "2220": 951310976.0,
            "2225": 969316288.0,
            "2230": 976331264.0,
            "2235": 974027648.0,
            "2240": 975494848.0,
            "2245": 960232448.0,
            "2250": 967638912.0,
            "2255": 969133696.0,
            "2260": 975065152.0,
            "2265": 968258688.0,
            "2270": 951744000.0,
            "2275": 962769088.0,
            "2280": 969640896.0,
            "2285": 971692928.0,
            "2290": 962891712.0,
            "2295": 931408128.0,
            "2300": 959906432.0,
            "2305": 970424000.0,
            "2310": 967444800.0,
            "2315": 970905600.0,
            "2320": 975589568.0,
            "2325": 938586112.0,
            "2330": 988437312.0,
            "2335": 977487872.0,
            "2340": 964596544.0,
            "2345": 964166976.0,
            "2350": 947555008.0,
            "2355": 977030720.0,
            "2360": 966899968.0,
            "2365": 977296704.0,
            "2370": 965071488.0,
            "2375": 953964864.0,
            "2380": 962917504.0,
            "2385": 967194624.0,
            "2390": 963075712.0,
            "2395": 974465728.0,
            "2400": 958409856.0,
            "2405": 968120768.0,
            "2410": 951586368.0,
            "2415": 965904640.0,
            "2420": 966518592.0,
            "2425": 959044096.0,
            "2430": 956685632.0,
            "2435": 961387072.0,
            "2440": 959755712.0,
            "2445": 970891200.0,
            "2450": 961997184.0,
            "2455": 922721472.0,
            "2460": 951953600.0,
            "2465": 955730176.0,
            "2470": 972569600.0,
            "2475": 973811968.0,
            "2480": 943893696.0,
            "2485": 944184576.0,
            "2490": 972411904.0,
            "2495": 974451840.0,
            "2500": 973910720.0,
            "2505": 958492032.0,
            "2510": 939506176.0,
            "2515": 979553408.0,
            "2520": 970471232.0,
            "2525": 964389504.0,
            "2530": 955799232.0,
            "2535": 936597056.0,
            "2540": 969026240.0,
            "2545": 970384896.0,
            "2550": 969461824.0,
            "2555": 969440320.0,
            "2560": 964978752.0,
            "2565": 959765632.0,
            "2570": 985176512.0,
            "2575": 957423872.0,
            "2580": 967424448.0,
            "2585": 966024128.0,
            "2590": 956354176.0,
            "2595": 981829504.0,
            "2600": 959530816.0,
            "2605": 963000064.0,
            "2610": 965972672.0,
            "2615": 951924608.0,
            "2620": 971241600.0,
            "2625": 976454720.0,
            "2630": 974410240.0,
            "2635": 948070336.0,
            "2640": 948137152.0,
            "2645": 963037632.0,
            "2650": 953983616.0,
            "2655": 977112000.0,
            "2660": 949621312.0,
            "2665": 953928768.0,
            "2670": 959061312.0,
            "2675": 979275520.0,
            "2680": 961395776.0,
            "2685": 970699392.0,
            "2690": 965219712.0,
            "2695": 943553472.0,
            "2700": 969424704.0,
            "2705": 978960128.0,
            "2710": 971810880.0,
            "2715": 990814208.0,
            "2720": 942648320.0,
            "2725": 967955200.0,
            "2730": 955465088.0,
            "2735": 970675456.0,
            "2740": 977921472.0,
            "2745": 932278016.0,
            "2750": 947856704.0,
            "2755": 956316160.0,
            "2760": 981697344.0,
            "2765": 966111552.0,
            "2770": 948914240.0,
            "2775": 935832064.0,
            "2780": 964779008.0,
            "2785": 969569024.0,
            "2790": 974273344.0,
            "2795": 966886080.0,
            "2800": 944388800.0,
            "2805": 964353280.0,
            "2810": 969610560.0,
            "2815": 975843712.0,
            "2820": 963085120.0,
            "2825": 937628928.0,
            "2830": 956740800.0,
            "2835": 986321856.0,
            "2840": 961758592.0,
            "2845": 967507136.0,
            "2850": 951716928.0,
            "2855": 962093312.0,
            "2860": 954243328.0,
            "2865": 955882624.0,
            "2870": 944663104.0,
            "2875": 974666176.0,
            "2880": 968202880.0,
            "2885": 981082112.0,
            "2890": 953454720.0,
            "2895": 957178432.0,
            "2900": 964989696.0,
            "2905": 931707328.0,
            "2910": 955731136.0,
            "2915": 979476608.0,
            "2920": 970492352.0,
            "2925": 964976512.0,
            "2930": 964048000.0,
            "2935": 940141248.0,
            "2940": 964910592.0,
            "2945": 989150720.0,
            "2950": 965209344.0,
            "2955": 965104128.0,
            "2960": 933161216.0,
            "2965": 968792896.0,
            "2970": 973033792.0,
            "2975": 958093376.0,
            "2980": 964496064.0,
            "2985": 937269504.0,
            "2990": 951254272.0,
            "2995": 978315584.0,
            "3000": 969275840.0,
            "3005": 974686656.0,
            "3010": 950234432.0,
            "3015": 943841920.0,
            "3020": 958440064.0,
            "3025": 975183488.0,
            "3030": 965017472.0,
            "3035": 963453120.0,
            "3040": 952134208.0,
            "3045": 989792448.0,
            "3050": 965544448.0,
            "3055": 982520448.0,
            "3060": 971226688.0,
            "3065": 943915264.0,
            "3070": 978404096.0,
            "3075": 975203264.0,
            "3080": 960992192.0,
            "3085": 962351424.0,
            "3090": 945950912.0,
            "3095": 938114880.0,
            "3100": 972928192.0,
            "3105": 961988992.0,
            "3110": 970655424.0,
            "3115": 963389504.0,
            "3120": 947128768.0,
            "3125": 972720192.0,
            "3130": 952973376.0,
            "3135": 966052480.0,
            "3140": 968489152.0,
            "3145": 937863808.0,
            "3150": 975022016.0,
            "3155": 976827328.0,
            "3160": 969638848.0,
            "3165": 982208128.0,
            "3170": 937973824.0,
            "3175": 953837824.0,
            "3180": 983822016.0,
            "3185": 965180032.0,
            "3190": 968492992.0,
            "3195": 950932608.0,
            "3200": 945101888.0,
            "3205": 959863168.0,
            "3210": 957499136.0,
            "3215": 958020096.0,
            "3220": 968141504.0,
            "3225": 935613824.0,
            "3230": 962601024.0,
            "3235": 975775616.0,
            "3240": 962621248.0,
            "3245": 981274432.0,
            "3250": 943259840.0,
            "3255": 954598016.0,
            "3260": 980361792.0,
            "3265": 963619776.0,
            "3270": 965163968.0,
            "3275": 959731584.0,
            "3280": 967045376.0,
            "3285": 982476544.0,
            "3290": 947688192.0,
            "3295": 966433984.0,
            "3300": 959165312.0,
            "3305": 949143104.0,
            "3310": 979521984.0,
            "3315": 964281984.0,
            "3320": 969219584.0,
            "3325": 956191744.0,
            "3330": 941167296.0,
            "3335": 964973440.0,
            "3340": 956900864.0,
            "3345": 972513408.0,
            "3350": 964588608.0,
            "3355": 943345984.0,
            "3360": 970050432.0,
            "3365": 969466368.0,
            "3370": 954773824.0,
            "3375": 958677248.0,
            "3380": 971463488.0,
            "3385": 947984960.0,
            "3390": 965793216.0,
            "3395": 978403904.0,
            "3400": 978127360.0,
            "3405": 976723392.0,
            "3410": 924192960.0,
            "3415": 955436608.0,
            "3420": 971820160.0,
            "3425": 977168064.0,
            "3430": 973838272.0,
            "3435": 936070976.0,
            "3440": 970516352.0,
            "3445": 957316160.0,
            "3450": 959853696.0,
            "3455": 963864832.0,
            "3460": 967882432.0,
            "3465": 931319424.0,
            "3470": 952348032.0,
            "3475": 973715840.0,
            "3480": 959751232.0,
            "3485": 979958976.0,
            "3490": 944683072.0,
            "3495": 953916800.0,
            "3500": 969328960.0,
            "3505": 964360256.0,
            "3510": 971233984.0,
            "3515": 955946048.0,
            "3520": 958745536.0,
            "3525": 971913920.0,
            "3530": 964149568.0,
            "3535": 983190912.0,
            "3540": 937501696.0,
            "3545": 944730624.0,
            "3550": 984461952.0,
            "3555": 978069632.0,
            "3560": 974385792.0,
            "3565": 968812480.0,
            "3570": 946694976.0,
            "3575": 976105408.0,
            "3580": 977492672.0,
            "3585": 954568320.0,
            "3590": 956426432.0,
            "3595": 951442432.0,
            "3600": 989021440.0,
            "3605": 962032256.0,
            "3610": 965078784.0,
            "3615": 974659840.0,
            "3620": 954900608.0,
            "3625": 939539776.0,
            "3630": 990173952.0,
            "3635": 971447552.0,
            "3640": 976036160.0,
            "3645": 961500352.0,
            "3650": 945814912.0,
            "3655": 965794688.0,
            "3660": 976218944.0,
            "3665": 964044096.0,
            "3670": 977455488.0,
            "3675": 943500288.0,
            "3680": 958197248.0,
            "3685": 964322368.0,
            "3690": 982116352.0,
            "3695": 963135872.0,
            "3700": 950594112.0,
            "3705": 947356928.0,
            "3710": 982367424.0,
            "3715": 972693184.0,
            "3720": 976162880.0,
            "3725": 964049664.0,
            "3730": 948886912.0,
            "3735": 967103040.0,
            "3740": 960971584.0,
            "3745": 969346752.0,
            "3750": 963960384.0,
            "3755": 953457280.0,
            "3760": 976665664.0,
            "3765": 979865984.0,
            "3770": 972408448.0,
            "3775": 972409856.0,
            "3780": 952609792.0,
            "3785": 960271808.0,
            "3790": 985622400.0,
            "3795": 969232512.0,
            "3800": 957901184.0,
            "3805": 972444928.0,
            "3810": 954543040.0,
            "3815": 974602816.0,
            "3820": 963042944.0,
            "3825": 962095936.0,
            "3830": 969406976.0,
            "3835": 934782272.0,
            "3840": 971305280.0,
            "3845": 986897088.0,
            "3850": 968909376.0,
            "3855": 965296576.0,
            "3860": 948093120.0,
            "3865": 975085440.0,
            "3870": 985135488.0,
            "3875": 983066688.0,
            "3880": 963639552.0,
            "3885": 953048768.0,
            "3890": 960333312.0,
            "3895": 960638528.0,
            "3900": 984961664.0,
            "3905": 976260992.0,
            "3910": 987385664.0,
            "3915": 946040960.0,
            "3920": 974902976.0,
            "3925": 961246784.0,
            "3930": 976813696.0,
            "3935": 978937344.0,
            "3940": 950328512.0,
            "3945": 960297792.0,
            "3950": 974209216.0,
            "3955": 973004032.0,
            "3960": 974091392.0,
            "3965": 950911104.0,
            "3970": 980704448.0,
            "3975": 960761728.0,
            "3980": 977567296.0,
            "3985": 962984768.0,
            "3990": 972778688.0,
            "3995": 953726016.0,
            "4000": 975036928.0,
            "4005": 971682560.0,
            "4010": 978420736.0,
            "4015": 971515904.0,
            "4020": 950322624.0,
            "4025": 968457920.0,
            "4030": 997960128.0,
            "4035": 978585536.0,
            "4040": 959836352.0,
            "4045": 939688704.0,
            "4050": 944754048.0,
            "4055": 980983936.0,
            "4060": 977721280.0,
            "4065": 975754688.0,
            "4070": 942176640.0,
            "4075": 945789504.0,
            "4080": 988777984.0,
            "4085": 962116096.0,
            "4090": 983380864.0,
            "4095": 986958144.0,
            "4100": 957224128.0,
            "4105": 954110144.0,
            "4110": 966512064.0,
            "4115": 976089664.0,
            "4120": 983568064.0,
            "4125": 960091328.0,
            "4130": 967314368.0,
            "4135": 971478784.0,
            "4140": 963194432.0,
            "4145": 956223232.0,
            "4150": 960331264.0,
            "4155": 946229376.0,
            "4160": 968462272.0,
            "4165": 970375872.0,
            "4170": 971996608.0,
            "4175": 955898688.0,
            "4180": 940997248.0,
            "4185": 968330880.0,
            "4190": 968065536.0,
            "4195": 989248000.0,
            "4200": 962714368.0,
            "4205": 960631936.0,
            "4210": 971886912.0,
            "4215": 974218432.0,
            "4220": 981169152.0,
            "4225": 975281600.0,
            "4230": 952752000.0,
            "4235": 958494592.0,
            "4240": 966835840.0,
            "4245": 961769280.0,
            "4250": 965881792.0,
            "4255": 958291840.0,
            "4260": 949594432.0,
            "4265": 964174656.0,
            "4270": 978444864.0,
            "4275": 975458496.0,
            "4280": 962708672.0,
            "4285": 951525632.0,
            "4290": 980078464.0,
            "4295": 968878976.0,
            "4300": 958323456.0,
            "4305": 966863424.0,
            "4310": 939626304.0,
            "4315": 949453440.0,
            "4320": 984584704.0,
            "4325": 982595840.0,
            "4330": 974758592.0,
            "4335": 949495936.0,
            "4340": 959551360.0,
            "4345": 956668224.0,
            "4350": 979996544.0,
            "4355": 968881728.0,
            "4360": 966350784.0,
            "4365": 941257536.0,
            "4370": 969507648.0,
            "4375": 973019648.0,
            "4380": 966172160.0,
            "4385": 972059840.0,
            "4390": 954176768.0,
            "4395": 951881344.0,
            "4400": 973664960.0,
            "4405": 972478272.0,
            "4410": 967944960.0,
            "4415": 958997824.0,
            "4420": 960802944.0,
            "4425": 976476608.0,
            "4430": 965991232.0,
            "4435": 975949952.0,
            "4440": 962355520.0,
            "4445": 954861760.0,
            "4450": 978343616.0,
            "4455": 960255360.0,
            "4460": 968592576.0,
            "4465": 968867648.0,
            "4470": 944164544.0,
            "4475": 952033600.0,
            "4480": 978825088.0,
            "4485": 968327936.0,
            "4490": 957061568.0,
            "4495": 938987136.0,
            "4500": 953277568.0,
            "4505": 977216384.0,
            "4510": 978774336.0,
            "4515": 962517312.0,
            "4520": 958886784.0,
            "4525": 958138688.0,
            "4530": 964440064.0,
            "4535": 976629952.0,
            "4540": 976896576.0,
            "4545": 970207040.0,
            "4550": 953247680.0,
            "4555": 959589760.0,
            "4560": 972558784.0,
            "4565": 973528896.0,
            "4570": 978958400.0,
            "4575": 957859072.0,
            "4580": 963252672.0,
            "4585": 957492672.0,
            "4590": 986605120.0,
            "4595": 960249792.0,
            "4600": 952290240.0,
            "4605": 959323264.0,
            "4610": 963772992.0,
            "4615": 958011968.0,
            "4620": 960280000.0,
            "4625": 973847424.0,
            "4630": 944542784.0,
            "4635": 977037120.0,
            "4640": 960322432.0,
            "4645": 982028736.0,
            "4650": 962448384.0,
            "4655": 939470080.0,
            "4660": 964038144.0,
            "4665": 962602048.0,
            "4670": 976684032.0,
            "4675": 963502784.0,
            "4680": 957512448.0,
            "4685": 949685760.0,
            "4690": 957023680.0,
            "4695": 969999936.0,
            "4700": 961262720.0,
            "4705": 970817664.0,
            "4710": 934542016.0,
            "4715": 970512448.0,
            "4720": 966385600.0,
            "4725": 980377472.0,
            "4730": 965886784.0,
            "4735": 937945472.0,
            "4740": 960173696.0,
            "4745": 976083008.0,
            "4750": 968033408.0,
            "4755": 984996800.0,
            "4760": 959208960.0,
            "4765": 955261632.0,
            "4770": 958682304.0,
            "4775": 991141824.0,
            "4780": 976893440.0,
            "4785": 967557376.0,
            "4790": 943744320.0,
            "4795": 955874496.0,
            "4800": 967758592.0,
            "4805": 976603392.0,
            "4810": 965222336.0,
            "4815": 958017856.0,
            "4820": 974004736.0,
            "4825": 961568064.0,
            "4830": 962649344.0,
            "4835": 972532288.0,
            "4840": 948962816.0,
            "4845": 965753600.0,
            "4850": 960304768.0,
            "4855": 964155392.0,
            "4860": 963088448.0,
            "4865": 967507520.0,
            "4870": 957243328.0,
            "4875": 983586560.0,
            "4880": 957120896.0,
            "4885": 977051840.0,
            "4890": 959728640.0,
            "4895": 942213312.0,
            "4900": 973712448.0,
            "4905": 975263296.0,
            "4910": 969245184.0,
            "4915": 970126848.0,
            "4920": 941157120.0,
            "4925": 954796928.0,
            "4930": 977095424.0,
            "4935": 963799872.0,
            "4940": 972672192.0,
            "4945": 960043968.0,
            "4950": 940819328.0,
            "4955": 968135488.0,
            "4960": 976886912.0,
            "4965": 961008576.0,
            "4970": 958641152.0,
            "4975": 933811648.0,
            "4980": 960811648.0,
            "4985": 962987584.0,
            "4990": 963552704.0,
            "4995": 986332416.0,
            "5000": 940726912.0,
            "5005": 968911744.0,
            "5010": 970296000.0,
            "5015": 965281536.0,
            "5020": 966727808.0,
            "5025": 949447936.0,
            "5030": 953528000.0,
            "5035": 967340992.0,
            "5040": 955750528.0,
            "5045": 969178432.0,
            "5050": 953416960.0,
            "5055": 954847552.0,
            "5060": 963016640.0,
            "5065": 952219520.0,
            "5070": 973596928.0,
            "5075": 978580288.0,
            "5080": 942915840.0,
            "5085": 965863168.0,
            "5090": 972954688.0,
            "5095": 964495680.0,
            "5100": 958391744.0,
            "5105": 965378432.0,
            "5110": 950450304.0,
            "5115": 972383744.0,
            "5120": 960498688.0,
            "5125": 969818112.0,
            "5130": 938856960.0,
            "5135": 943769728.0,
            "5140": 969998720.0,
            "5145": 968688320.0,
            "5150": 970638016.0,
            "5155": 972684352.0,
            "5160": 926600576.0,
            "5165": 961653824.0,
            "5170": 966886592.0,
            "5175": 966135424.0,
            "5180": 963778176.0,
            "5185": 930925824.0,
            "5190": 949877312.0,
            "5195": 972509376.0,
            "5200": 973820480.0,
            "5205": 968226240.0,
            "5210": 960588800.0,
            "5215": 928828352.0,
            "5220": 979211648.0,
            "5225": 984853376.0,
            "5230": 975098112.0,
            "5235": 975166272.0,
            "5240": 944424000.0,
            "5245": 970895616.0,
            "5250": 972524992.0,
            "5255": 967043968.0,
            "5260": 976828224.0,
            "5265": 942295872.0,
            "5270": 969262976.0,
            "5275": 970172224.0,
            "5280": 962924480.0,
            "5285": 964121920.0,
            "5290": 932636480.0,
            "5295": 951861696.0,
            "5300": 975661696.0,
            "5305": 951977344.0,
            "5310": 968126208.0,
            "5315": 955854912.0,
            "5320": 951044352.0,
            "5325": 973089472.0,
            "5330": 967843968.0,
            "5335": 967658560.0,
            "5340": 966540608.0,
            "5345": 963034432.0,
            "5350": 978924928.0,
            "5355": 972276800.0,
            "5360": 963953792.0,
            "5365": 965295616.0,
            "5370": 947885760.0,
            "5375": 948857408.0,
            "5380": 967261056.0,
            "5385": 980552896.0,
            "5390": 965374656.0,
            "5395": 955270720.0,
            "5400": 948336320.0,
            "5405": 974415488.0,
            "5410": 967942912.0,
            "5415": 976186048.0,
            "5420": 967567296.0,
            "5425": 937473920.0,
            "5430": 963955776.0,
            "5435": 971935552.0,
            "5440": 969176448.0,
            "5445": 957549888.0,
            "5450": 919479488.0,
            "5455": 952137920.0,
            "5460": 962474048.0,
            "5465": 978991744.0,
            "5470": 981105024.0,
            "5475": 941709440.0,
            "5480": 955890624.0,
            "5485": 964975616.0,
            "5490": 976048192.0,
            "5495": 962842752.0,
            "5500": 971242368.0,
            "5505": 957038208.0,
            "5510": 968672256.0,
            "5515": 945577600.0,
            "5520": 963244224.0,
            "5525": 976030976.0,
            "5530": 936696128.0,
            "5535": 970744320.0,
            "5540": 960467968.0,
            "5545": 972048640.0,
            "5550": 967934400.0,
            "5555": 956030528.0,
            "5560": 954643968.0,
            "5565": 968900736.0,
            "5570": 945284928.0,
            "5575": 960662528.0,
            "5580": 960723584.0,
            "5585": 959679296.0,
            "5590": 977815808.0,
            "5595": 975294272.0,
            "5600": 963034688.0,
            "5605": 964226176.0,
            "5610": 943317056.0,
            "5615": 966761792.0,
            "5620": 963470272.0,
            "5625": 982496320.0,
            "5630": 976173696.0,
            "5635": 957421824.0,
            "5640": 951529728.0,
            "5645": 967940672.0,
            "5650": 979305152.0,
            "5655": 983511104.0,
            "5660": 956505792.0,
            "5665": 953573440.0,
            "5670": 966159360.0,
            "5675": 967726848.0,
            "5680": 978733632.0,
            "5685": 962265728.0,
            "5690": 935982144.0,
            "5695": 963812800.0,
            "5700": 952574784.0,
            "5705": 974573952.0,
            "5710": 971366464.0,
            "5715": 946183872.0,
            "5720": 975249536.0,
            "5725": 967683328.0,
            "5730": 978697088.0,
            "5735": 964979776.0,
            "5740": 943771008.0,
            "5745": 971302656.0,
            "5750": 981938752.0,
            "5755": 956827840.0,
            "5760": 963793216.0,
            "5765": 957817344.0,
            "5770": 955846912.0,
            "5775": 970907584.0,
            "5780": 962910272.0,
            "5785": 970846912.0,
            "5790": 974812096.0,
            "5795": 949986752.0,
            "5800": 966024000.0,
            "5805": 968910016.0,
            "5810": 976257088.0,
            "5815": 970212928.0,
            "5820": 936525120.0,
            "5825": 969462848.0,
            "5830": 977847744.0,
            "5835": 975127104.0,
            "5840": 963246912.0,
            "5845": 968886464.0,
            "5850": 943217536.0,
            "5855": 976109632.0,
            "5860": 979697024.0,
            "5865": 978493312.0,
            "5870": 969035584.0,
            "5875": 942328512.0,
            "5880": 964645184.0,
            "5885": 974978304.0,
            "5890": 972782528.0,
            "5895": 965742272.0,
            "5900": 941323200.0,
            "5905": 961926272.0,
            "5910": 958679232.0,
            "5915": 968394880.0,
            "5920": 977357184.0,
            "5925": 959603648.0,
            "5930": 946782592.0,
            "5935": 952536448.0,
            "5940": 977672320.0,
            "5945": 984805440.0,
            "5950": 980811648.0,
            "5955": 935093120.0,
            "5960": 961796544.0,
            "5965": 965925568.0,
            "5970": 970723584.0,
            "5975": 961994688.0,
            "5980": 958246336.0,
            "5985": 964747136.0,
            "5990": 973615680.0,
            "5995": 955981184.0,
            "6000": 955657408.0,
            "6005": 961391744.0,
            "6010": 952842624.0,
            "6015": 974538304.0,
            "6020": 978425088.0,
            "6025": 972251520.0,
            "6030": 955397632.0,
            "6035": 946990912.0,
            "6040": 962666624.0,
            "6045": 983866368.0,
            "6050": 956613312.0,
            "6055": 963544384.0,
            "6060": 945853376.0,
            "6065": 958468352.0,
            "6070": 978476992.0,
            "6075": 978099392.0,
            "6080": 957580032.0,
            "6085": 947629952.0,
            "6090": 953756864.0,
            "6095": 964684416.0,
            "6100": 979986240.0,
            "6105": 971232960.0,
            "6110": 961901760.0,
            "6115": 943888064.0,
            "6120": 968721600.0,
            "6125": 960715392.0,
            "6130": 984039872.0,
            "6135": 961130112.0,
            "6140": 958892800.0,
            "6145": 971373376.0,
            "6150": 968702208.0,
            "6155": 975126592.0,
            "6160": 977339200.0,
            "6165": 952867264.0,
            "6170": 951228544.0,
            "6175": 963519104.0,
            "6180": 969550528.0,
            "6185": 966491968.0,
            "6190": 963780352.0,
            "6195": 947291328.0,
            "6200": 969574720.0,
            "6205": 967247872.0,
            "6210": 959437568.0,
            "6215": 973270144.0,
            "6220": 936559552.0,
            "6225": 978935936.0,
            "6230": 976292864.0,
            "6235": 971864256.0,
            "6240": 966338176.0,
            "6245": 956215040.0,
            "6250": 956610304.0,
            "6255": 973375424.0,
            "6260": 979036928.0,
            "6265": 975261248.0,
            "6270": 958978432.0,
            "6275": 963970880.0,
            "6280": 973386048.0,
            "6285": 966258304.0,
            "6290": 971150720.0,
            "6295": 987576320.0,
            "6300": 947701696.0,
            "6305": 965041344.0,
            "6310": 979398464.0,
            "6315": 978688448.0,
            "6320": 971932800.0,
            "6325": 923113408.0,
            "6330": 959514880.0,
            "6335": 975051584.0,
            "6340": 985315712.0,
            "6345": 967045312.0,
            "6350": 944820736.0,
            "6355": 958321984.0,
            "6360": 973018880.0,
            "6365": 972404736.0,
            "6370": 959630848.0,
            "6375": 967485888.0,
            "6380": 951362560.0,
            "6385": 973794304.0,
            "6390": 965735168.0,
            "6395": 975303168.0,
            "6400": 984314880.0,
            "6405": 944422144.0,
            "6410": 977732160.0,
            "6415": 971686144.0,
            "6420": 956788672.0,
            "6425": 961306944.0,
            "6430": 957955392.0,
            "6435": 960505792.0,
            "6440": 969020736.0,
            "6445": 973872960.0,
            "6450": 974612672.0,
            "6455": 962486912.0,
            "6460": 941388416.0,
            "6465": 974655232.0,
            "6470": 980206848.0,
            "6475": 961267648.0,
            "6480": 967865536.0,
            "6485": 949004032.0,
            "6490": 971191744.0,
            "6495": 988602816.0,
            "6500": 980802176.0,
            "6505": 972330112.0,
            "6510": 951785472.0,
            "6515": 958069824.0,
            "6520": 979073536.0,
            "6525": 979088768.0,
            "6530": 973617152.0,
            "6535": 968222464.0,
            "6540": 950516544.0,
            "6545": 966441216.0,
            "6550": 979716160.0,
            "6555": 967439040.0,
            "6560": 975563136.0,
            "6565": 949855168.0,
            "6570": 952179392.0,
            "6575": 962750912.0,
            "6580": 976233664.0,
            "6585": 979741568.0,
            "6590": 949495808.0,
            "6595": 961951488.0,
            "6600": 961342144.0,
            "6605": 961682496.0,
            "6610": 985247936.0,
            "6615": 959825984.0,
            "6620": 944747840.0,
            "6625": 971176192.0,
            "6630": 971516352.0,
            "6635": 964240256.0,
            "6640": 959857344.0,
            "6645": 951373696.0,
            "6650": 979180224.0,
            "6655": 966146880.0,
            "6660": 968717120.0,
            "6665": 969174720.0,
            "6670": 933046912.0,
            "6675": 970933056.0,
            "6680": 969175936.0,
            "6685": 958931968.0,
            "6690": 956460480.0,
            "6695": 955560704.0,
            "6700": 962393856.0,
            "6705": 979231488.0,
            "6710": 971147776.0,
            "6715": 967157952.0,
            "6720": 974236864.0,
            "6725": 941929344.0,
            "6730": 979473536.0,
            "6735": 994820544.0,
            "6740": 976934144.0,
            "6745": 974799808.0,
            "6750": 939161920.0,
            "6755": 977895424.0,
            "6760": 969918144.0,
            "6765": 978639552.0,
            "6770": 975587712.0,
            "6775": 943585856.0,
            "6780": 947430528.0,
            "6785": 975530944.0,
            "6790": 960889152.0,
            "6795": 976414464.0,
            "6800": 973516736.0,
            "6805": 946867008.0,
            "6810": 958437888.0,
            "6815": 970782784.0,
            "6820": 977999680.0,
            "6825": 969315520.0,
            "6830": 950730304.0,
            "6835": 981673024.0,
            "6840": 983132672.0,
            "6845": 948936192.0,
            "6850": 965809856.0,
            "6855": 954064128.0,
            "6860": 979357824.0,
            "6865": 984061696.0,
            "6870": 965100352.0,
            "6875": 978933952.0,
            "6880": 950666368.0,
            "6885": 958557376.0,
            "6890": 960656704.0,
            "6895": 966243456.0,
            "6900": 985345984.0,
            "6905": 968969856.0,
            "6910": 950196224.0,
            "6915": 971099264.0,
            "6920": 967596416.0,
            "6925": 965513152.0,
            "6930": 964852416.0,
            "6935": 952245952.0,
            "6940": 963241728.0,
            "6945": 986666560.0,
            "6950": 973768960.0,
            "6955": 964924608.0,
            "6960": 940274688.0,
            "6965": 975019648.0,
            "6970": 978641728.0,
            "6975": 985239040.0,
            "6980": 982655040.0,
            "6985": 960294400.0,
            "6990": 945704448.0,
            "6995": 987766336.0,
            "7000": 963317440.0,
            "7005": 963045760.0,
            "7010": 985353984.0,
            "7015": 945669120.0,
            "7020": 983129344.0,
            "7025": 968988800.0,
            "7030": 953562688.0,
            "7035": 983118720.0,
            "7040": 950766208.0,
            "7045": 956286336.0,
            "7050": 960452992.0,
            "7055": 963769600.0,
            "7060": 976814528.0,
            "7065": 968477184.0,
            "7070": 953817920.0,
            "7075": 957039744.0,
            "7080": 969383040.0,
            "7085": 966052928.0,
            "7090": 969731456.0,
            "7095": 960300288.0,
            "7100": 973872448.0,
            "7105": 973209856.0,
            "7110": 970513728.0,
            "7115": 958916480.0,
            "7120": 949518784.0,
            "7125": 963153856.0,
            "7130": 971680640.0,
            "7135": 964676800.0,
            "7140": 961595904.0,
            "7145": 931069184.0,
            "7150": 946833152.0,
            "7155": 990945280.0,
            "7160": 968663872.0,
            "7165": 956926400.0,
            "7170": 968423552.0,
            "7175": 955864320.0,
            "7180": 958331200.0,
            "7185": 985273280.0,
            "7190": 979117824.0,
            "7195": 974032448.0,
            "7200": 936447232.0,
            "7205": 958005888.0,
            "7210": 967372992.0,
            "7215": 969622400.0,
            "7220": 982491200.0,
            "7225": 929305152.0,
            "7230": 950090048.0,
            "7235": 967215296.0,
            "7240": 967251712.0,
            "7245": 967860032.0,
            "7250": 950094976.0,
            "7255": 958040960.0,
            "7260": 970705600.0,
            "7265": 975243968.0,
            "7270": 960156928.0,
            "7275": 959656320.0,
            "7280": 957553536.0,
            "7285": 978387136.0,
            "7290": 977863936.0,
            "7295": 963392320.0,
            "7300": 975868224.0,
            "7305": 964588352.0,
            "7310": 977836352.0,
            "7315": 967215872.0,
            "7320": 974848256.0,
            "7325": 967496768.0,
            "7330": 960097600.0,
            "7335": 964578688.0,
            "7340": 978332544.0,
            "7345": 968062592.0,
            "7350": 985297856.0,
            "7355": 960130752.0,
            "7360": 949274304.0,
            "7365": 973093760.0,
            "7370": 982913920.0,
            "7375": 963555648.0,
            "7380": 964689792.0,
            "7385": 949157376.0,
            "7390": 964574592.0,
            "7395": 958898240.0,
            "7400": 970427456.0,
            "7405": 988104640.0,
            "7410": 952865920.0,
            "7415": 950934528.0,
            "7420": 967226752.0,
            "7425": 983172800.0,
            "7430": 966265728.0,
            "7435": 973491840.0,
            "7440": 937683712.0,
            "7445": 969259264.0,
            "7450": 980769024.0,
            "7455": 971858368.0,
            "7460": 972529408.0,
            "7465": 940188544.0,
            "7470": 972282496.0,
            "7475": 958389696.0,
            "7480": 969505344.0,
            "7485": 961847040.0,
            "7490": 934846592.0,
            "7495": 957507200.0,
            "7500": 969769152.0,
            "7505": 970161536.0,
            "7510": 972406464.0,
            "7515": 980210048.0,
            "7520": 952473600.0,
            "7525": 970801600.0,
            "7530": 954925952.0,
            "7535": 971567936.0,
            "7540": 979904512.0,
            "7545": 959591808.0,
            "7550": 961140800.0,
            "7555": 960895552.0,
            "7560": 970885632.0,
            "7565": 955157248.0,
            "7570": 943331520.0,
            "7575": 966109824.0,
            "7580": 982409792.0,
            "7585": 979290048.0,
            "7590": 970979008.0,
            "7595": 950336384.0,
            "7600": 946411712.0,
            "7605": 982649152.0,
            "7610": 969955072.0,
            "7615": 989327744.0,
            "7620": 957356672.0,
            "7625": 941577536.0,
            "7630": 971881984.0,
            "7635": 984658624.0,
            "7640": 983926016.0,
            "7645": 968503552.0,
            "7650": 959528896.0,
            "7655": 962977792.0,
            "7660": 969519488.0,
            "7665": 978576896.0,
            "7670": 975844800.0,
            "7675": 976001216.0,
            "7680": 942945920.0,
            "7685": 960549504.0,
            "7690": 975622592.0,
            "7695": 982725568.0,
            "7700": 980058816.0,
            "7705": 941456384.0,
            "7710": 975279744.0,
            "7715": 979628288.0,
            "7720": 968339712.0,
            "7725": 960700608.0,
            "7730": 943977216.0,
            "7735": 968476032.0,
            "7740": 980832192.0,
            "7745": 964593600.0,
            "7750": 964505920.0,
            "7755": 959959936.0,
            "7760": 971035968.0,
            "7765": 971331968.0,
            "7770": 962621184.0,
            "7775": 982228992.0,
            "7780": 965195904.0,
            "7785": 959752704.0,
            "7790": 968591680.0,
            "7795": 968859456.0,
            "7800": 972188736.0,
            "7805": 968762240.0,
            "7810": 946316224.0,
            "7815": 964008448.0,
            "7820": 974652096.0,
            "7825": 963857664.0,
            "7830": 957421888.0,
            "7835": 950374080.0,
            "7840": 957441088.0,
            "7845": 954201344.0,
            "7850": 980128000.0,
            "7855": 986990592.0,
            "7860": 947953088.0,
            "7865": 949598656.0,
            "7870": 965851456.0,
            "7875": 976366912.0,
            "7880": 969056576.0,
            "7885": 969827328.0,
            "7890": 952287488.0,
            "7895": 975297408.0,
            "7900": 963977472.0,
            "7905": 964432576.0,
            "7910": 966139520.0,
            "7915": 943821248.0,
            "7920": 951435520.0,
            "7925": 969931584.0,
            "7930": 965301248.0,
            "7935": 984633472.0,
            "7940": 965187648.0,
            "7945": 950834240.0,
            "7950": 962713728.0,
            "7955": 980597632.0,
            "7960": 964009728.0,
            "7965": 953464256.0,
            "7970": 952121408.0,
            "7975": 970176768.0,
            "7980": 965820288.0,
            "7985": 959510336.0,
            "7990": 968583680.0,
            "7995": 947223296.0,
            "8000": 963087744.0,
            "8005": 981245184.0,
            "8010": 966256320.0,
            "8015": 983078976.0,
            "8020": 961325504.0,
            "8025": 965814784.0,
            "8030": 958815232.0,
            "8035": 976182784.0,
            "8040": 961184576.0,
            "8045": 948370368.0,
            "8050": 960090304.0,
            "8055": 979949632.0,
            "8060": 970317824.0,
            "8065": 958616896.0,
            "8070": 964572224.0,
            "8075": 942979840.0,
            "8080": 966354688.0,
            "8085": 967456384.0,
            "8090": 984068800.0,
            "8095": 989130304.0,
            "8100": 967122880.0,
            "8105": 944880832.0,
            "8110": 969854208.0,
            "8115": 986200512.0,
            "8120": 975353792.0,
            "8125": 964977664.0,
            "8130": 966949824.0,
            "8135": 968265216.0,
            "8140": 964500160.0,
            "8145": 995874752.0,
            "8150": 973817600.0,
            "8155": 939275776.0,
            "8160": 965212608.0,
            "8165": 973688512.0,
            "8170": 969087680.0,
            "8175": 962153344.0,
            "8180": 936964480.0,
            "8185": 963486208.0,
            "8190": 968119488.0,
            "8195": 978061376.0,
            "8200": 956982272.0,
            "8205": 960910912.0,
            "8210": 947123008.0,
            "8215": 982891392.0,
            "8220": 989217856.0,
            "8225": 967091328.0,
            "8230": 962860608.0,
            "8235": 934980480.0,
            "8240": 980883072.0,
            "8245": 976853696.0,
            "8250": 964513536.0,
            "8255": 978243904.0,
            "8260": 957165888.0,
            "8265": 983486016.0,
            "8270": 953168128.0,
            "8275": 974862144.0,
            "8280": 975042112.0,
            "8285": 954023424.0,
            "8290": 940649664.0,
            "8295": 981948480.0,
            "8300": 973790592.0,
            "8305": 978307968.0,
            "8310": 951945216.0,
            "8315": 938364608.0,
            "8320": 977977344.0,
            "8325": 968107648.0,
            "8330": 990904448.0,
            "8335": 976668800.0,
            "8340": 948313088.0,
            "8345": 971268096.0,
            "8350": 970833216.0,
            "8355": 975592512.0,
            "8360": 980242240.0,
            "8365": 933298112.0,
            "8370": 965809280.0,
            "8375": 980285440.0,
            "8380": 965519296.0,
            "8385": 972964096.0,
            "8390": 963276160.0,
            "8395": 951641792.0,
            "8400": 972950208.0,
            "8405": 952006016.0,
            "8410": 961048640.0,
            "8415": 966224960.0,
            "8420": 942073216.0,
            "8425": 968608576.0,
            "8430": 961269632.0,
            "8435": 966016768.0,
            "8440": 969989952.0,
            "8445": 953303168.0,
            "8450": 984719936.0,
            "8455": 990886144.0,
            "8460": 969110848.0,
            "8465": 967810816.0,
            "8470": 963340288.0,
            "8475": 943511616.0,
            "8480": 987741120.0,
            "8485": 980317376.0,
            "8490": 992612032.0,
            "8495": 972245632.0,
            "8500": 951823360.0,
            "8505": 983793408.0,
            "8510": 974002240.0,
            "8515": 969145216.0,
            "8520": 962195840.0,
            "8525": 945413312.0,
            "8530": 984573440.0,
            "8535": 978608128.0,
            "8540": 968331200.0,
            "8545": 969014720.0,
            "8550": 942421056.0,
            "8555": 972137728.0,
            "8560": 958995008.0,
            "8565": 976032000.0,
            "8570": 974998080.0,
            "8575": 971534528.0,
            "8580": 932411712.0,
            "8585": 965795392.0,
            "8590": 979154688.0,
            "8595": 979621440.0,
            "8600": 984124800.0,
            "8605": 958319616.0,
            "8610": 984389056.0,
            "8615": 977724160.0,
            "8620": 963289728.0,
            "8625": 979552576.0,
            "8630": 943285120.0,
            "8635": 962324608.0,
            "8640": 973621184.0,
            "8645": 970942976.0,
            "8650": 969644480.0,
            "8655": 971046080.0,
            "8660": 944863936.0,
            "8665": 986915840.0,
            "8670": 960930944.0,
            "8675": 974807040.0,
            "8680": 962730432.0,
            "8685": 955913472.0,
            "8690": 978796160.0,
            "8695": 969175104.0,
            "8700": 973282112.0,
            "8705": 973926784.0,
            "8710": 947349248.0,
            "8715": 973834112.0,
            "8720": 958732160.0,
            "8725": 979211392.0,
            "8730": 986078336.0,
            "8735": 952288384.0,
            "8740": 940913152.0,
            "8745": 987934528.0,
            "8750": 972318080.0,
            "8755": 971958080.0,
            "8760": 965827136.0,
            "8765": 934961664.0,
            "8770": 986889536.0,
            "8775": 969984320.0,
            "8780": 967712512.0,
            "8785": 962688384.0,
            "8790": 947760128.0,
            "8795": 969675584.0,
            "8800": 970863680.0,
            "8805": 973828480.0,
            "8810": 983933760.0,
            "8815": 951961600.0,
            "8820": 940164672.0,
            "8825": 964555200.0,
            "8830": 981470848.0,
            "8835": 971778240.0,
            "8840": 979651200.0,
            "8845": 951320384.0,
            "8850": 987141504.0,
            "8855": 971372864.0,
            "8860": 962404800.0,
            "8865": 957297472.0,
            "8870": 946156288.0,
            "8875": 969020672.0,
            "8880": 984369856.0,
            "8885": 971441792.0,
            "8890": 970361408.0,
            "8895": 953044608.0,
            "8900": 961975936.0,
            "8905": 977023360.0,
            "8910": 982158720.0,
            "8915": 981124992.0,
            "8920": 968220160.0,
            "8925": 940594688.0,
            "8930": 970846912.0,
            "8935": 964150656.0,
            "8940": 977951488.0,
            "8945": 982379648.0,
            "8950": 946076352.0,
            "8955": 972927552.0,
            "8960": 973859840.0,
            "8965": 974120640.0,
            "8970": 966680000.0,
            "8975": 937468160.0,
            "8980": 953369728.0,
            "8985": 977766912.0,
            "8990": 967620608.0,
            "8995": 980945856.0,
            "9000": 952394752.0,
            "9005": 950709376.0,
            "9010": 975378752.0,
            "9015": 982950080.0,
            "9020": 959475008.0,
            "9025": 979791872.0,
            "9030": 954120320.0,
            "9035": 968614656.0,
            "9040": 978674880.0,
            "9045": 968964160.0,
            "9050": 983280640.0,
            "9055": 948192640.0,
            "9060": 956759744.0,
            "9065": 969974400.0,
            "9070": 967757056.0,
            "9075": 981028672.0,
            "9080": 952994304.0,
            "9085": 971687424.0,
            "9090": 963874496.0,
            "9095": 968373952.0,
            "9100": 974891776.0,
            "9105": 960536704.0,
            "9110": 948230272.0,
            "9115": 956895488.0,
            "9120": 986010176.0,
            "9125": 963197504.0,
            "9130": 958861888.0,
            "9135": 951827776.0,
            "9140": 967368448.0,
            "9145": 977617280.0,
            "9150": 986985472.0,
            "9155": 977385792.0,
            "9160": 958147904.0,
            "9165": 950599232.0,
            "9170": 988430720.0,
            "9175": 971827456.0,
            "9180": 967657152.0,
            "9185": 955335872.0,
            "9190": 957127552.0,
            "9195": 966261376.0,
            "9200": 969175040.0,
            "9205": 967636352.0,
            "9210": 985002240.0,
            "9215": 931816768.0,
            "9220": 949942976.0,
            "9225": 971389696.0,
            "9230": 970999680.0,
            "9235": 971870080.0,
            "9240": 960051776.0,
            "9245": 964111808.0,
            "9250": 961931648.0,
            "9255": 983252672.0,
            "9260": 979394752.0,
            "9265": 952509760.0,
            "9270": 949681856.0,
            "9275": 978722560.0,
            "9280": 978049664.0,
            "9285": 962680960.0,
            "9290": 979242752.0,
            "9295": 958904064.0,
            "9300": 966046592.0,
            "9305": 969462592.0,
            "9310": 973282752.0,
            "9315": 976167552.0,
            "9320": 948389184.0,
            "9325": 979737216.0,
            "9330": 978122432.0,
            "9335": 975671488.0,
            "9340": 960371840.0,
            "9345": 943573440.0,
            "9350": 953153920.0,
            "9355": 963375808.0,
            "9360": 960797696.0,
            "9365": 983508224.0,
            "9370": 982987328.0,
            "9375": 942067008.0,
            "9380": 983326144.0,
            "9385": 985749440.0,
            "9390": 973308096.0,
            "9395": 978894592.0,
            "9400": 938332160.0,
            "9405": 968683456.0,
            "9410": 982171328.0,
            "9415": 992238848.0,
            "9420": 960673344.0,
            "9425": 957097856.0,
            "9430": 939234816.0,
            "9435": 974951872.0,
            "9440": 959689472.0,
            "9445": 974297664.0,
            "9450": 962110144.0,
            "9455": 946217088.0,
            "9460": 978465920.0,
            "9465": 988615680.0,
            "9470": 963602304.0,
            "9475": 983955968.0,
            "9480": 931651008.0,
            "9485": 987649984.0,
            "9490": 963923968.0,
            "9495": 972783104.0,
            "9500": 982760384.0,
            "9505": 970407616.0,
            "9510": 964983552.0,
            "9515": 957084480.0,
            "9520": 948395456.0,
            "9525": 965495424.0,
            "9530": 958616256.0,
            "9535": 951551872.0,
            "9540": 954032704.0,
            "9545": 979505152.0,
            "9550": 956158400.0,
            "9555": 953539968.0,
            "9560": 958480064.0,
            "9565": 969926848.0,
            "9570": 977774464.0,
            "9575": 959007552.0,
            "9580": 963107904.0,
            "9585": 946810048.0,
            "9590": 948810432.0,
            "9595": 967363264.0,
            "9600": 985067136.0,
            "9605": 985239360.0,
            "9610": 943946880.0,
            "9615": 953020480.0,
            "9620": 981447552.0,
            "9625": 978904192.0,
            "9630": 970259840.0,
            "9635": 975088384.0,
            "9640": 940783936.0,
            "9645": 962349824.0,
            "9650": 971519744.0,
            "9655": 987470656.0,
            "9660": 963466368.0,
            "9665": 950093504.0,
            "9670": 966320192.0,
            "9675": 963284160.0,
            "9680": 965297472.0,
            "9685": 986536832.0,
            "9690": 940841216.0,
            "9695": 950778496.0,
            "9700": 976180736.0,
            "9705": 972956480.0,
            "9710": 967815424.0,
            "9715": 971697664.0,
            "9720": 940869888.0,
            "9725": 966477312.0,
            "9730": 974320320.0,
            "9735": 974762368.0,
            "9740": 971725312.0,
            "9745": 951060672.0,
            "9750": 979997248.0,
            "9755": 970451264.0,
            "9760": 968346944.0,
            "9765": 964130816.0,
            "9770": 952710976.0,
            "9775": 956865600.0,
            "9780": 970389120.0,
            "9785": 959043072.0,
            "9790": 961323648.0,
            "9795": 958544896.0,
            "9800": 949609152.0,
            "9805": 962440768.0,
            "9810": 978688000.0,
            "9815": 977997760.0,
            "9820": 982998976.0,
            "9825": 939509760.0,
            "9830": 969416512.0,
            "9835": 973184704.0,
            "9840": 971895552.0,
            "9845": 967552704.0,
            "9850": 947278592.0,
            "9855": 957841536.0,
            "9860": 987531456.0,
            "9865": 970396032.0,
            "9870": 990394624.0,
            "9875": 957255616.0,
            "9880": 931373248.0,
            "9885": 963726400.0,
            "9890": 972710848.0,
            "9895": 984065280.0,
            "9900": 956629824.0,
            "9905": 939315648.0,
            "9910": 979056000.0,
            "9915": 974277760.0,
            "9920": 944916160.0,
            "9925": 962988416.0,
            "9930": 947846144.0,
            "9935": 961026496.0,
            "9940": 965854848.0,
            "9945": 958762688.0,
            "9950": 964496192.0,
            "9955": 943559104.0,
            "9960": 967015296.0,
            "9965": 984074496.0,
            "9970": 966937216.0,
            "9975": 964346240.0,
            "9980": 980855808.0,
            "9985": 942708160.0,
            "9990": 976956480.0,
            "9995": 982753280.0,
            "10000": 971923776.0,
            "10005": 970081216.0,
            "10010": 944760576.0,
            "10015": 983815808.0,
            "10020": 978859200.0,
            "10025": 979745280.0,
            "10030": 971783936.0,
            "10035": 947023168.0,
            "10040": 950827904.0,
            "10045": 978282816.0,
            "10050": 986212160.0,
            "10055": 990541760.0,
            "10060": 958798592.0,
            "10065": 947921472.0,
            "10070": 967496640.0,
            "10075": 979866880.0,
            "10080": 971877184.0,
            "10085": 975039104.0,
            "10090": 944118272.0,
            "10095": 963111040.0,
            "10100": 972613056.0,
            "10105": 976340736.0,
            "10110": 972299136.0,
            "10115": 949200960.0,
            "10120": 962728896.0,
            "10125": 974463808.0,
            "10130": 981079104.0,
            "10135": 972727488.0,
            "10140": 958456576.0,
            "10145": 934757632.0,
            "10150": 974173632.0,
            "10155": 970071040.0,
            "10160": 962587904.0,
            "10165": 975340032.0,
            "10170": 944992576.0,
            "10175": 979764288.0,
            "10180": 984166848.0,
            "10185": 979299584.0,
            "10190": 956011072.0,
            "10195": 937762816.0,
            "10200": 988375040.0,
            "10205": 973486912.0,
            "10210": 967071936.0,
            "10215": 976244288.0,
            "10220": 948392768.0,
            "10225": 950509120.0,
            "10230": 975910400.0,
            "10235": 954518208.0,
            "10240": 969725760.0,
            "10245": 962262720.0,
            "10250": 936595328.0,
            "10255": 979858880.0,
            "10260": 965036480.0,
            "10265": 967979456.0,
            "10270": 969013376.0,
            "10275": 936557568.0,
            "10280": 970089984.0,
            "10285": 996574272.0,
            "10290": 979919936.0,
            "10295": 982067648.0,
            "10300": 952589312.0,
            "10305": 972220096.0,
            "10310": 960615680.0,
            "10315": 971949120.0,
            "10320": 985623808.0,
            "10325": 983756096.0,
            "10330": 935342912.0,
            "10335": 976575872.0,
            "10340": 957602304.0,
            "10345": 974258688.0,
            "10350": 984902144.0,
            "10355": 942600832.0,
            "10360": 962436736.0,
            "10365": 974586944.0,
            "10370": 981020352.0,
            "10375": 970280320.0,
            "10380": 962189312.0,
            "10385": 955427904.0,
            "10390": 991076992.0,
            "10395": 965397312.0,
            "10400": 961206656.0,
            "10405": 950568768.0,
            "10410": 955784576.0,
            "10415": 976330368.0,
            "10420": 967640256.0,
            "10425": 970082752.0,
            "10430": 965444544.0,
            "10435": 963393344.0,
            "10440": 972564096.0,
            "10445": 972711104.0,
            "10450": 975586240.0,
            "10455": 966761280.0,
            "10460": 949044608.0,
            "10465": 972012864.0,
            "10470": 973235136.0,
            "10475": 979907264.0,
            "10480": 997795200.0,
            "10485": 949619008.0,
            "10490": 935300480.0,
            "10495": 969611904.0,
            "10500": 978921280.0,
            "10505": 959660992.0,
            "10510": 951633856.0,
            "10515": 954111680.0,
            "10520": 972589952.0,
            "10525": 969646400.0,
            "10530": 970841536.0,
            "10535": 986954368.0,
            "10540": 947295104.0,
            "10545": 971010944.0,
            "10550": 969935232.0,
            "10555": 959974656.0,
            "10560": 976629184.0,
            "10565": 961878592.0,
            "10570": 969657536.0,
            "10575": 973664384.0,
            "10580": 961126784.0,
            "10585": 973761984.0,
            "10590": 952334656.0,
            "10595": 957006080.0,
            "10600": 967797504.0,
            "10605": 986734464.0,
            "10610": 966871040.0,
            "10615": 977264000.0,
            "10620": 941644608.0,
            "10625": 965452224.0,
            "10630": 968266048.0,
            "10635": 973553152.0,
            "10640": 974994624.0,
            "10645": 949035328.0,
            "10650": 966545152.0,
            "10655": 986163456.0,
            "10660": 977134784.0,
            "10665": 967246080.0,
            "10670": 955432448.0,
            "10675": 934489536.0,
            "10680": 986790528.0,
            "10685": 991420352.0,
            "10690": 964497600.0,
            "10695": 972595136.0,
            "10700": 950187648.0,
            "10705": 978509504.0,
            "10710": 968830912.0,
            "10715": 967620672.0,
            "10720": 966650496.0,
            "10725": 944721472.0,
            "10730": 980659840.0,
            "10735": 961373248.0,
            "10740": 971771328.0,
            "10745": 985008576.0,
            "10750": 981981696.0,
            "10755": 945716672.0,
            "10760": 970052160.0,
            "10765": 973155328.0,
            "10770": 974475456.0,
            "10775": 959065152.0,
            "10780": 949775680.0,
            "10785": 954158976.0,
            "10790": 970536768.0,
            "10795": 960812480.0,
            "10800": 972540160.0,
            "10805": 951692608.0,
            "10810": 974430272.0,
            "10815": 960269632.0,
            "10820": 971717568.0,
            "10825": 967418816.0,
            "10830": 957221696.0,
            "10835": 963639936.0,
            "10840": 971205888.0,
            "10845": 964413312.0,
            "10850": 958640000.0,
            "10855": 968024512.0,
            "10860": 951093888.0,
            "10865": 964574848.0,
            "10870": 983797696.0,
            "10875": 982923584.0,
            "10880": 958486080.0,
            "10885": 955116224.0,
            "10890": 973588480.0,
            "10895": 973971840.0,
            "10900": 970937984.0,
            "10905": 965514560.0,
            "10910": 939441472.0,
            "10915": 961079296.0,
            "10920": 984131648.0,
            "10925": 970323776.0,
            "10930": 969175744.0,
            "10935": 963301760.0,
            "10940": 954394688.0,
            "10945": 965563776.0,
            "10950": 973128640.0,
            "10955": 967246784.0,
            "10960": 972552896.0,
            "10965": 966833984.0,
            "10970": 983963392.0,
            "10975": 966287232.0,
            "10980": 975342720.0,
            "10985": 987025792.0,
            "10990": 951644096.0,
            "10995": 963978240.0,
            "11000": 985935360.0,
            "11005": 979086848.0,
            "11010": 972104448.0,
            "11015": 970536832.0,
            "11020": 948057984.0,
            "11025": 960816512.0,
            "11030": 978449600.0,
            "11035": 976138624.0,
            "11040": 986853184.0,
            "11045": 957369600.0,
            "11050": 973873792.0,
            "11055": 975254464.0,
            "11060": 962811776.0,
            "11065": 985970368.0,
            "11070": 950293056.0,
            "11075": 977031296.0,
            "11080": 972596224.0,
            "11085": 967855104.0,
            "11090": 976950464.0,
            "11095": 947014080.0,
            "11100": 966197568.0,
            "11105": 974603968.0,
            "11110": 981418624.0,
            "11115": 968486848.0,
            "11120": 958033216.0,
            "11125": 957333376.0,
            "11130": 976226496.0,
            "11135": 979965312.0,
            "11140": 965299264.0,
            "11145": 966830400.0,
            "11150": 936524416.0,
            "11155": 976638336.0,
            "11160": 984635776.0,
            "11165": 983012480.0,
            "11170": 978529920.0,
            "11175": 958660352.0,
            "11180": 962894464.0,
            "11185": 972480192.0,
            "11190": 980341504.0,
            "11195": 985512704.0,
            "11200": 983381056.0,
            "11205": 942584384.0,
            "11210": 985304320.0,
            "11215": 967911168.0,
            "11220": 983659648.0,
            "11225": 962193280.0,
            "11230": 953703808.0,
            "11235": 982258240.0,
            "11240": 977915200.0,
            "11245": 966858368.0,
            "11250": 969759808.0,
            "11255": 960890560.0,
            "11260": 980377856.0,
            "11265": 964428800.0,
            "11270": 982000896.0,
            "11275": 968942592.0,
            "11280": 956529920.0,
            "11285": 954079872.0,
            "11290": 957163328.0,
            "11295": 968884800.0,
            "11300": 963188032.0,
            "11305": 959323840.0,
            "11310": 946978176.0,
            "11315": 983673792.0,
            "11320": 965318976.0,
            "11325": 981629568.0,
            "11330": 976249728.0,
            "11335": 952934336.0,
            "11340": 971284032.0,
            "11345": 970465728.0,
            "11350": 982093824.0,
            "11355": 982960896.0,
            "11360": 941900288.0,
            "11365": 971190976.0,
            "11370": 979491968.0,
            "11375": 976025856.0,
            "11380": 968820864.0,
            "11385": 959336256.0,
            "11390": 938646720.0,
            "11395": 977983680.0,
            "11400": 973858112.0,
            "11405": 961935744.0,
            "11410": 966806528.0,
            "11415": 929777408.0,
            "11420": 965160384.0,
            "11425": 981550848.0,
            "11430": 979212288.0,
            "11435": 970729728.0,
            "11440": 945697280.0,
            "11445": 975559552.0,
            "11450": 984857152.0,
            "11455": 971634304.0,
            "11460": 965512128.0,
            "11465": 960628416.0,
            "11470": 955201728.0,
            "11475": 973266304.0,
            "11480": 956934656.0,
            "11485": 977665152.0,
            "11490": 987202432.0,
            "11495": 959512640.0,
            "11500": 970934080.0,
            "11505": 964563584.0,
            "11510": 977252928.0,
            "11515": 978639616.0,
            "11520": 954465344.0,
            "11525": 976774656.0,
            "11530": 977371968.0,
            "11535": 980325376.0,
            "11540": 974974656.0,
            "11545": 954059392.0,
            "11550": 953760768.0,
            "11555": 982529344.0,
            "11560": 985169536.0,
            "11565": 965645568.0,
            "11570": 967013632.0,
            "11575": 952418368.0,
            "11580": 976773568.0,
            "11585": 977635008.0,
            "11590": 969803136.0,
            "11595": 977281984.0,
            "11600": 946867904.0,
            "11605": 973934848.0,
            "11610": 982750144.0,
            "11615": 972768256.0,
            "11620": 969840832.0,
            "11625": 949821696.0,
            "11630": 938129920.0,
            "11635": 974156672.0,
            "11640": 981822656.0,
            "11645": 980619520.0,
            "11650": 972432320.0,
            "11655": 956571072.0,
            "11660": 981361024.0,
            "11665": 958829312.0,
            "11670": 982936768.0,
            "11675": 973014976.0,
            "11680": 956714432.0,
            "11685": 983334272.0,
            "11690": 969116032.0,
            "11695": 968036288.0,
            "11700": 974173376.0,
            "11705": 956800064.0,
            "11710": 965369920.0,
            "11715": 983784000.0,
            "11720": 984323008.0,
            "11725": 965977664.0,
            "11730": 956037824.0,
            "11735": 943149120.0,
            "11740": 974220992.0,
            "11745": 971688640.0,
            "11750": 962298304.0,
            "11755": 964072960.0,
            "11760": 950646400.0,
            "11765": 984367936.0,
            "11770": 985153088.0,
            "11775": 975965056.0,
            "11780": 985772224.0,
            "11785": 947688640.0,
            "11790": 973230528.0,
            "11795": 971111488.0,
            "11800": 973656384.0,
            "11805": 987233472.0,
            "11810": 968240704.0,
            "11815": 956327424.0,
            "11820": 974046080.0,
            "11825": 971406976.0,
            "11830": 975546112.0,
            "11835": 962241792.0,
            "11840": 944970496.0,
            "11845": 981491008.0,
            "11850": 975104640.0,
            "11855": 978532480.0,
            "11860": 972385280.0,
            "11865": 939096384.0,
            "11870": 940701376.0,
            "11875": 990513856.0,
            "11880": 972906624.0,
            "11885": 963429056.0,
            "11890": 970857216.0,
            "11895": 965527488.0,
            "11900": 979883712.0,
            "11905": 962068800.0,
            "11910": 984083072.0,
            "11915": 990557376.0,
            "11920": 945026432.0,
            "11925": 994653632.0,
            "11930": 965404352.0,
            "11935": 963992512.0,
            "11940": 977430528.0,
            "11945": 945362048.0,
            "11950": 978226752.0,
            "11955": 979741056.0,
            "11960": 972926400.0,
            "11965": 976653248.0,
            "11970": 963649344.0,
            "11975": 963654656.0,
            "11980": 978148672.0,
            "11985": 953660672.0,
            "11990": 969205632.0,
            "11995": 965646848.0,
            "12000": 959189376.0,
            "12005": 975245760.0,
            "12010": 980040512.0,
            "12015": 972961472.0,
            "12020": 974015936.0,
            "12025": 935486528.0,
            "12030": 969883456.0,
            "12035": 984936448.0,
            "12040": 978279104.0,
            "12045": 982466944.0,
            "12050": 932071488.0,
            "12055": 939453504.0,
            "12060": 975133888.0,
            "12065": 966175552.0,
            "12070": 968532864.0,
            "12075": 950454848.0,
            "12080": 954126336.0,
            "12085": 973089216.0,
            "12090": 964544896.0,
            "12095": 964301056.0,
            "12100": 977560000.0,
            "12105": 951131328.0,
            "12110": 972670528.0,
            "12115": 968593472.0,
            "12120": 987082368.0,
            "12125": 981458944.0,
            "12130": 942360832.0,
            "12135": 956325184.0,
            "12140": 976590336.0,
            "12145": 980090304.0,
            "12150": 980381248.0,
            "12155": 962713536.0,
            "12160": 947316608.0,
            "12165": 968991616.0,
            "12170": 965062464.0,
            "12175": 968575040.0,
            "12180": 976120320.0,
            "12185": 954075520.0,
            "12190": 989391104.0,
            "12195": 971708800.0,
            "12200": 965827264.0,
            "12205": 969711040.0,
            "12210": 940248576.0,
            "12215": 997989184.0,
            "12220": 970995200.0,
            "12225": 980366208.0,
            "12230": 980909120.0,
            "12235": 950726656.0,
            "12240": 964435264.0,
            "12245": 966266944.0,
            "12250": 977518208.0,
            "12255": 968924928.0,
            "12260": 984033792.0,
            "12265": 932555840.0,
            "12270": 967046272.0,
            "12275": 980228736.0,
            "12280": 978388800.0,
            "12285": 971154624.0,
            "12290": 929978688.0,
            "12295": 977657280.0,
            "12300": 986762304.0,
            "12305": 970652864.0,
            "12310": 986926592.0,
            "12315": 936949504.0,
            "12320": 958171456.0,
            "12325": 967223808.0,
            "12330": 968969344.0,
            "12335": 964536128.0,
            "12340": 958341056.0,
            "12345": 944836288.0,
            "12350": 967498368.0,
            "12355": 976371072.0,
            "12360": 979569024.0,
            "12365": 965419264.0,
            "12370": 949564672.0,
            "12375": 964426688.0,
            "12380": 965351744.0,
            "12385": 973481536.0,
            "12390": 962323136.0,
            "12395": 962068928.0,
            "12400": 975785216.0,
            "12405": 976753664.0,
            "12410": 953826496.0,
            "12415": 963161280.0,
            "12420": 944512320.0,
            "12425": 949133632.0,
            "12430": 972276608.0,
            "12435": 969186816.0,
            "12440": 961963200.0,
            "12445": 952227776.0,
            "12450": 947719424.0,
            "12455": 981553664.0,
            "12460": 973975168.0,
            "12465": 954619008.0,
            "12470": 981484928.0,
            "12475": 958971712.0,
            "12480": 967487808.0,
            "12485": 978829888.0,
            "12490": 974300928.0,
            "12495": 970095616.0,
            "12500": 962149312.0,
            "12505": 944269184.0,
            "12510": 961737600.0,
            "12515": 969895232.0,
            "12520": 974657408.0,
            "12525": 972976704.0,
            "12530": 945367232.0,
            "12535": 977345216.0,
            "12540": 966331136.0,
            "12545": 972619968.0,
            "12550": 970357760.0,
            "12555": 941690496.0,
            "12560": 965253184.0,
            "12565": 948048128.0,
            "12570": 975043136.0,
            "12575": 963776704.0,
            "12580": 958666560.0,
            "12585": 964961536.0,
            "12590": 966634624.0,
            "12595": 979503232.0,
            "12600": 982656704.0,
            "12605": 949885952.0,
            "12610": 938553984.0,
            "12615": 963255936.0,
            "12620": 961752256.0,
            "12625": 967008512.0,
            "12630": 971583232.0,
            "12635": 962909312.0,
            "12640": 978944512.0,
            "12645": 969957952.0,
            "12650": 970774272.0,
            "12655": 964693888.0,
            "12660": 932868224.0,
            "12665": 957452096.0,
            "12670": 986767872.0,
            "12675": 966118336.0,
            "12680": 961677440.0,
            "12685": 951879616.0,
            "12690": 945942400.0,
            "12695": 978999424.0,
            "12700": 985706368.0,
            "12705": 959433984.0,
            "12710": 969079488.0,
            "12715": 957051264.0,
            "12720": 977087616.0,
            "12725": 965928640.0,
            "12730": 969980352.0,
            "12735": 987452160.0,
            "12740": 937968896.0,
            "12745": 971461312.0,
            "12750": 974890176.0,
            "12755": 980713920.0,
            "12760": 970725312.0,
            "12765": 942710592.0,
            "12770": 952649088.0,
            "12775": 951886656.0,
            "12780": 969741312.0,
            "12785": 956598016.0,
            "12790": 963494144.0,
            "12795": 953530688.0,
            "12800": 963390720.0,
            "12805": 973287872.0,
            "12810": 974244416.0,
            "12815": 953176896.0,
            "12820": 941838656.0,
            "12825": 967136384.0,
            "12830": 999296576.0,
            "12835": 977431744.0,
            "12840": 963033920.0,
            "12845": 942053952.0,
            "12850": 959577152.0,
            "12855": 961839488.0,
            "12860": 972850048.0,
            "12865": 978494912.0,
            "12870": 974999104.0,
            "12875": 956488640.0,
            "12880": 968016832.0,
            "12885": 981818176.0,
            "12890": 960462464.0,
            "12895": 970178176.0,
            "12900": 938071232.0,
            "12905": 968847744.0,
            "12910": 982008960.0,
            "12915": 975883520.0,
            "12920": 956247808.0,
            "12925": 949779136.0,
            "12930": 960950592.0,
            "12935": 991529088.0,
            "12940": 968458944.0,
            "12945": 976237952.0,
            "12950": 971959936.0,
            "12955": 956432064.0,
            "12960": 978052416.0,
            "12965": 961617664.0,
            "12970": 962469760.0,
            "12975": 960424000.0,
            "12980": 938356224.0,
            "12985": 963543808.0,
            "12990": 968678016.0,
            "12995": 976098752.0,
            "13000": 980572992.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 13000,
        "step_interval": 5,
        "values": {
            "1": 14650828800.0,
            "5": 14650828800.0,
            "10": 14650828800.0,
            "15": 14650828800.0,
            "20": 14650828800.0,
            "25": 14650828800.0,
            "30": 14650828800.0,
            "35": 14650828800.0,
            "40": 14650828800.0,
            "45": 14650828800.0,
            "50": 14650828800.0,
            "55": 14650828800.0,
            "60": 14650828800.0,
            "65": 14650828800.0,
            "70": 14650828800.0,
            "75": 14650828800.0,
            "80": 14650828800.0,
            "85": 14650828800.0,
            "90": 14650828800.0,
            "95": 14650828800.0,
            "100": 14650828800.0,
            "105": 14650828800.0,
            "110": 14650828800.0,
            "115": 14650828800.0,
            "120": 14650828800.0,
            "125": 14650828800.0,
            "130": 14650828800.0,
            "135": 14650828800.0,
            "140": 14650828800.0,
            "145": 14650828800.0,
            "150": 14650828800.0,
            "155": 14650828800.0,
            "160": 14650828800.0,
            "165": 14650828800.0,
            "170": 14650828800.0,
            "175": 14650828800.0,
            "180": 14650828800.0,
            "185": 14650828800.0,
            "190": 14650828800.0,
            "195": 14650828800.0,
            "200": 14650828800.0,
            "205": 14650828800.0,
            "210": 14650828800.0,
            "215": 14650828800.0,
            "220": 14650828800.0,
            "225": 14650828800.0,
            "230": 14650828800.0,
            "235": 14650828800.0,
            "240": 14650828800.0,
            "245": 14650828800.0,
            "250": 14650828800.0,
            "255": 14650828800.0,
            "260": 14650828800.0,
            "265": 14650828800.0,
            "270": 14650828800.0,
            "275": 14650828800.0,
            "280": 14650828800.0,
            "285": 14650828800.0,
            "290": 14650828800.0,
            "295": 14650828800.0,
            "300": 14650828800.0,
            "305": 14650828800.0,
            "310": 14650828800.0,
            "315": 14650828800.0,
            "320": 14650828800.0,
            "325": 14650828800.0,
            "330": 14650828800.0,
            "335": 14650828800.0,
            "340": 14650828800.0,
            "345": 14650828800.0,
            "350": 14650828800.0,
            "355": 14650828800.0,
            "360": 14650828800.0,
            "365": 14650828800.0,
            "370": 14650828800.0,
            "375": 14650828800.0,
            "380": 14650828800.0,
            "385": 14650828800.0,
            "390": 14650828800.0,
            "395": 14650828800.0,
            "400": 14650828800.0,
            "405": 14650828800.0,
            "410": 14650828800.0,
            "415": 14650828800.0,
            "420": 14650828800.0,
            "425": 14650828800.0,
            "430": 14650828800.0,
            "435": 14650828800.0,
            "440": 14650828800.0,
            "445": 14650828800.0,
            "450": 14650828800.0,
            "455": 14650828800.0,
            "460": 14650828800.0,
            "465": 14650828800.0,
            "470": 14650828800.0,
            "475": 14650828800.0,
            "480": 14650828800.0,
            "485": 14650828800.0,
            "490": 14650828800.0,
            "495": 14650828800.0,
            "500": 14650828800.0,
            "505": 14650828800.0,
            "510": 14650828800.0,
            "515": 14650828800.0,
            "520": 14650828800.0,
            "525": 14650828800.0,
            "530": 14650828800.0,
            "535": 14650828800.0,
            "540": 14650828800.0,
            "545": 14650828800.0,
            "550": 14650828800.0,
            "555": 14650828800.0,
            "560": 14650828800.0,
            "565": 14650828800.0,
            "570": 14650828800.0,
            "575": 14650828800.0,
            "580": 14650828800.0,
            "585": 14650828800.0,
            "590": 14650828800.0,
            "595": 14650828800.0,
            "600": 14650828800.0,
            "605": 14650828800.0,
            "610": 14650828800.0,
            "615": 14650828800.0,
            "620": 14650828800.0,
            "625": 14650828800.0,
            "630": 14650828800.0,
            "635": 14650828800.0,
            "640": 14650828800.0,
            "645": 14650828800.0,
            "650": 14650828800.0,
            "655": 14650828800.0,
            "660": 14650828800.0,
            "665": 14650828800.0,
            "670": 14650828800.0,
            "675": 14650828800.0,
            "680": 14650828800.0,
            "685": 14650828800.0,
            "690": 14650828800.0,
            "695": 14650828800.0,
            "700": 14650828800.0,
            "705": 14650828800.0,
            "710": 14650828800.0,
            "715": 14650828800.0,
            "720": 14650828800.0,
            "725": 14650828800.0,
            "730": 14650828800.0,
            "735": 14650828800.0,
            "740": 14650828800.0,
            "745": 14650828800.0,
            "750": 14650828800.0,
            "755": 14650828800.0,
            "760": 14650828800.0,
            "765": 14650828800.0,
            "770": 14650828800.0,
            "775": 14650828800.0,
            "780": 14650828800.0,
            "785": 14650828800.0,
            "790": 14650828800.0,
            "795": 14650828800.0,
            "800": 14650828800.0,
            "805": 14650828800.0,
            "810": 14650828800.0,
            "815": 14650828800.0,
            "820": 14650828800.0,
            "825": 14650828800.0,
            "830": 14650828800.0,
            "835": 14650828800.0,
            "840": 14650828800.0,
            "845": 14650828800.0,
            "850": 14650828800.0,
            "855": 14650828800.0,
            "860": 14650828800.0,
            "865": 14650828800.0,
            "870": 14650828800.0,
            "875": 14650828800.0,
            "880": 14650828800.0,
            "885": 14650828800.0,
            "890": 14650828800.0,
            "895": 14650828800.0,
            "900": 14650828800.0,
            "905": 14650828800.0,
            "910": 14650828800.0,
            "915": 14650828800.0,
            "920": 14650828800.0,
            "925": 14650828800.0,
            "930": 14650828800.0,
            "935": 14650828800.0,
            "940": 14650828800.0,
            "945": 14650828800.0,
            "950": 14650828800.0,
            "955": 14650828800.0,
            "960": 14650828800.0,
            "965": 14650828800.0,
            "970": 14650828800.0,
            "975": 14650828800.0,
            "980": 14650828800.0,
            "985": 14650828800.0,
            "990": 14650828800.0,
            "995": 14650828800.0,
            "1000": 14650828800.0,
            "1005": 14650828800.0,
            "1010": 14650828800.0,
            "1015": 14650828800.0,
            "1020": 14650828800.0,
            "1025": 14650828800.0,
            "1030": 14650828800.0,
            "1035": 14650828800.0,
            "1040": 14650828800.0,
            "1045": 14650828800.0,
            "1050": 14650828800.0,
            "1055": 14650828800.0,
            "1060": 14650828800.0,
            "1065": 14650828800.0,
            "1070": 14650828800.0,
            "1075": 14650828800.0,
            "1080": 14650828800.0,
            "1085": 14650828800.0,
            "1090": 14650828800.0,
            "1095": 14650828800.0,
            "1100": 14650828800.0,
            "1105": 14650828800.0,
            "1110": 14650828800.0,
            "1115": 14650828800.0,
            "1120": 14650828800.0,
            "1125": 14650828800.0,
            "1130": 14650828800.0,
            "1135": 14650828800.0,
            "1140": 14650828800.0,
            "1145": 14650828800.0,
            "1150": 14650828800.0,
            "1155": 14650828800.0,
            "1160": 14650828800.0,
            "1165": 14650828800.0,
            "1170": 14650828800.0,
            "1175": 14650828800.0,
            "1180": 14650828800.0,
            "1185": 14650828800.0,
            "1190": 14650828800.0,
            "1195": 14650828800.0,
            "1200": 14650828800.0,
            "1205": 14650828800.0,
            "1210": 14650828800.0,
            "1215": 14650828800.0,
            "1220": 14650828800.0,
            "1225": 14650828800.0,
            "1230": 14650828800.0,
            "1235": 14650828800.0,
            "1240": 14650828800.0,
            "1245": 14650828800.0,
            "1250": 14650828800.0,
            "1255": 14650828800.0,
            "1260": 14650828800.0,
            "1265": 14650828800.0,
            "1270": 14650828800.0,
            "1275": 14650828800.0,
            "1280": 14650828800.0,
            "1285": 14650828800.0,
            "1290": 14650828800.0,
            "1295": 14650828800.0,
            "1300": 14650828800.0,
            "1305": 14650828800.0,
            "1310": 14650828800.0,
            "1315": 14650828800.0,
            "1320": 14650828800.0,
            "1325": 14650828800.0,
            "1330": 14650828800.0,
            "1335": 14650828800.0,
            "1340": 14650828800.0,
            "1345": 14650828800.0,
            "1350": 14650828800.0,
            "1355": 14650828800.0,
            "1360": 14650828800.0,
            "1365": 14650828800.0,
            "1370": 14650828800.0,
            "1375": 14650828800.0,
            "1380": 14650828800.0,
            "1385": 14650828800.0,
            "1390": 14650828800.0,
            "1395": 14650828800.0,
            "1400": 14650828800.0,
            "1405": 14650828800.0,
            "1410": 14650828800.0,
            "1415": 14650828800.0,
            "1420": 14650828800.0,
            "1425": 14650828800.0,
            "1430": 14650828800.0,
            "1435": 14650828800.0,
            "1440": 14650828800.0,
            "1445": 14650828800.0,
            "1450": 14650828800.0,
            "1455": 14650828800.0,
            "1460": 14650828800.0,
            "1465": 14650828800.0,
            "1470": 14650828800.0,
            "1475": 14650828800.0,
            "1480": 14650828800.0,
            "1485": 14650828800.0,
            "1490": 14650828800.0,
            "1495": 14650828800.0,
            "1500": 14650828800.0,
            "1505": 14650828800.0,
            "1510": 14650828800.0,
            "1515": 14650828800.0,
            "1520": 14650828800.0,
            "1525": 14650828800.0,
            "1530": 14650828800.0,
            "1535": 14650828800.0,
            "1540": 14650828800.0,
            "1545": 14650828800.0,
            "1550": 14650828800.0,
            "1555": 14650828800.0,
            "1560": 14650828800.0,
            "1565": 14650828800.0,
            "1570": 14650828800.0,
            "1575": 14650828800.0,
            "1580": 14650828800.0,
            "1585": 14650828800.0,
            "1590": 14650828800.0,
            "1595": 14650828800.0,
            "1600": 14650828800.0,
            "1605": 14650828800.0,
            "1610": 14650828800.0,
            "1615": 14650828800.0,
            "1620": 14650828800.0,
            "1625": 14650828800.0,
            "1630": 14650828800.0,
            "1635": 14650828800.0,
            "1640": 14650828800.0,
            "1645": 14650828800.0,
            "1650": 14650828800.0,
            "1655": 14650828800.0,
            "1660": 14650828800.0,
            "1665": 14650828800.0,
            "1670": 14650828800.0,
            "1675": 14650828800.0,
            "1680": 14650828800.0,
            "1685": 14650828800.0,
            "1690": 14650828800.0,
            "1695": 14650828800.0,
            "1700": 14650828800.0,
            "1705": 14650828800.0,
            "1710": 14650828800.0,
            "1715": 14650828800.0,
            "1720": 14650828800.0,
            "1725": 14650828800.0,
            "1730": 14650828800.0,
            "1735": 14650828800.0,
            "1740": 14650828800.0,
            "1745": 14650828800.0,
            "1750": 14650828800.0,
            "1755": 14650828800.0,
            "1760": 14650828800.0,
            "1765": 14650828800.0,
            "1770": 14650828800.0,
            "1775": 14650828800.0,
            "1780": 14650828800.0,
            "1785": 14650828800.0,
            "1790": 14650828800.0,
            "1795": 14650828800.0,
            "1800": 14650828800.0,
            "1805": 14650828800.0,
            "1810": 14650828800.0,
            "1815": 14650828800.0,
            "1820": 14650828800.0,
            "1825": 14650828800.0,
            "1830": 14650828800.0,
            "1835": 14650828800.0,
            "1840": 14650828800.0,
            "1845": 14650828800.0,
            "1850": 14650828800.0,
            "1855": 14650828800.0,
            "1860": 14650828800.0,
            "1865": 14650828800.0,
            "1870": 14650828800.0,
            "1875": 14650828800.0,
            "1880": 14650828800.0,
            "1885": 14650828800.0,
            "1890": 14650828800.0,
            "1895": 14650828800.0,
            "1900": 14650828800.0,
            "1905": 14650828800.0,
            "1910": 14650828800.0,
            "1915": 14650828800.0,
            "1920": 14650828800.0,
            "1925": 14650828800.0,
            "1930": 14650828800.0,
            "1935": 14650828800.0,
            "1940": 14650828800.0,
            "1945": 14650828800.0,
            "1950": 14650828800.0,
            "1955": 14650828800.0,
            "1960": 14650828800.0,
            "1965": 14650828800.0,
            "1970": 14650828800.0,
            "1975": 14650828800.0,
            "1980": 14650828800.0,
            "1985": 14650828800.0,
            "1990": 14650828800.0,
            "1995": 14650828800.0,
            "2000": 14650828800.0,
            "2005": 14650828800.0,
            "2010": 14650828800.0,
            "2015": 14650828800.0,
            "2020": 14650828800.0,
            "2025": 14650828800.0,
            "2030": 14650828800.0,
            "2035": 14650828800.0,
            "2040": 14650828800.0,
            "2045": 14650828800.0,
            "2050": 14650828800.0,
            "2055": 14650828800.0,
            "2060": 14650828800.0,
            "2065": 14650828800.0,
            "2070": 14650828800.0,
            "2075": 14650828800.0,
            "2080": 14650828800.0,
            "2085": 14650828800.0,
            "2090": 14650828800.0,
            "2095": 14650828800.0,
            "2100": 14650828800.0,
            "2105": 14650828800.0,
            "2110": 14650828800.0,
            "2115": 14650828800.0,
            "2120": 14650828800.0,
            "2125": 14650828800.0,
            "2130": 14650828800.0,
            "2135": 14650828800.0,
            "2140": 14650828800.0,
            "2145": 14650828800.0,
            "2150": 14650828800.0,
            "2155": 14650828800.0,
            "2160": 14650828800.0,
            "2165": 14650828800.0,
            "2170": 14650828800.0,
            "2175": 14650828800.0,
            "2180": 14650828800.0,
            "2185": 14650828800.0,
            "2190": 14650828800.0,
            "2195": 14650828800.0,
            "2200": 14650828800.0,
            "2205": 14650828800.0,
            "2210": 14650828800.0,
            "2215": 14650828800.0,
            "2220": 14650828800.0,
            "2225": 14650828800.0,
            "2230": 14650828800.0,
            "2235": 14650828800.0,
            "2240": 14650828800.0,
            "2245": 14650828800.0,
            "2250": 14650828800.0,
            "2255": 14650828800.0,
            "2260": 14650828800.0,
            "2265": 14650828800.0,
            "2270": 14650828800.0,
            "2275": 14650828800.0,
            "2280": 14650828800.0,
            "2285": 14650828800.0,
            "2290": 14650828800.0,
            "2295": 14650828800.0,
            "2300": 14650828800.0,
            "2305": 14650828800.0,
            "2310": 14650828800.0,
            "2315": 14650828800.0,
            "2320": 14650828800.0,
            "2325": 14650828800.0,
            "2330": 14650828800.0,
            "2335": 14650828800.0,
            "2340": 14650828800.0,
            "2345": 14650828800.0,
            "2350": 14650828800.0,
            "2355": 14650828800.0,
            "2360": 14650828800.0,
            "2365": 14650828800.0,
            "2370": 14650828800.0,
            "2375": 14650828800.0,
            "2380": 14650828800.0,
            "2385": 14650828800.0,
            "2390": 14650828800.0,
            "2395": 14650828800.0,
            "2400": 14650828800.0,
            "2405": 14650828800.0,
            "2410": 14650828800.0,
            "2415": 14650828800.0,
            "2420": 14650828800.0,
            "2425": 14650828800.0,
            "2430": 14650828800.0,
            "2435": 14650828800.0,
            "2440": 14650828800.0,
            "2445": 14650828800.0,
            "2450": 14650828800.0,
            "2455": 14650828800.0,
            "2460": 14650828800.0,
            "2465": 14650828800.0,
            "2470": 14650828800.0,
            "2475": 14650828800.0,
            "2480": 14650828800.0,
            "2485": 14650828800.0,
            "2490": 14650828800.0,
            "2495": 14650828800.0,
            "2500": 14650828800.0,
            "2505": 14650828800.0,
            "2510": 14650828800.0,
            "2515": 14650828800.0,
            "2520": 14650828800.0,
            "2525": 14650828800.0,
            "2530": 14650828800.0,
            "2535": 14650828800.0,
            "2540": 14650828800.0,
            "2545": 14650828800.0,
            "2550": 14650828800.0,
            "2555": 14650828800.0,
            "2560": 14650828800.0,
            "2565": 14650828800.0,
            "2570": 14650828800.0,
            "2575": 14650828800.0,
            "2580": 14650828800.0,
            "2585": 14650828800.0,
            "2590": 14650828800.0,
            "2595": 14650828800.0,
            "2600": 14650828800.0,
            "2605": 14650828800.0,
            "2610": 14650828800.0,
            "2615": 14650828800.0,
            "2620": 14650828800.0,
            "2625": 14650828800.0,
            "2630": 14650828800.0,
            "2635": 14650828800.0,
            "2640": 14650828800.0,
            "2645": 14650828800.0,
            "2650": 14650828800.0,
            "2655": 14650828800.0,
            "2660": 14650828800.0,
            "2665": 14650828800.0,
            "2670": 14650828800.0,
            "2675": 14650828800.0,
            "2680": 14650828800.0,
            "2685": 14650828800.0,
            "2690": 14650828800.0,
            "2695": 14650828800.0,
            "2700": 14650828800.0,
            "2705": 14650828800.0,
            "2710": 14650828800.0,
            "2715": 14650828800.0,
            "2720": 14650828800.0,
            "2725": 14650828800.0,
            "2730": 14650828800.0,
            "2735": 14650828800.0,
            "2740": 14650828800.0,
            "2745": 14650828800.0,
            "2750": 14650828800.0,
            "2755": 14650828800.0,
            "2760": 14650828800.0,
            "2765": 14650828800.0,
            "2770": 14650828800.0,
            "2775": 14650828800.0,
            "2780": 14650828800.0,
            "2785": 14650828800.0,
            "2790": 14650828800.0,
            "2795": 14650828800.0,
            "2800": 14650828800.0,
            "2805": 14650828800.0,
            "2810": 14650828800.0,
            "2815": 14650828800.0,
            "2820": 14650828800.0,
            "2825": 14650828800.0,
            "2830": 14650828800.0,
            "2835": 14650828800.0,
            "2840": 14650828800.0,
            "2845": 14650828800.0,
            "2850": 14650828800.0,
            "2855": 14650828800.0,
            "2860": 14650828800.0,
            "2865": 14650828800.0,
            "2870": 14650828800.0,
            "2875": 14650828800.0,
            "2880": 14650828800.0,
            "2885": 14650828800.0,
            "2890": 14650828800.0,
            "2895": 14650828800.0,
            "2900": 14650828800.0,
            "2905": 14650828800.0,
            "2910": 14650828800.0,
            "2915": 14650828800.0,
            "2920": 14650828800.0,
            "2925": 14650828800.0,
            "2930": 14650828800.0,
            "2935": 14650828800.0,
            "2940": 14650828800.0,
            "2945": 14650828800.0,
            "2950": 14650828800.0,
            "2955": 14650828800.0,
            "2960": 14650828800.0,
            "2965": 14650828800.0,
            "2970": 14650828800.0,
            "2975": 14650828800.0,
            "2980": 14650828800.0,
            "2985": 14650828800.0,
            "2990": 14650828800.0,
            "2995": 14650828800.0,
            "3000": 14650828800.0,
            "3005": 14650828800.0,
            "3010": 14650828800.0,
            "3015": 14650828800.0,
            "3020": 14650828800.0,
            "3025": 14650828800.0,
            "3030": 14650828800.0,
            "3035": 14650828800.0,
            "3040": 14650828800.0,
            "3045": 14650828800.0,
            "3050": 14650828800.0,
            "3055": 14650828800.0,
            "3060": 14650828800.0,
            "3065": 14650828800.0,
            "3070": 14650828800.0,
            "3075": 14650828800.0,
            "3080": 14650828800.0,
            "3085": 14650828800.0,
            "3090": 14650828800.0,
            "3095": 14650828800.0,
            "3100": 14650828800.0,
            "3105": 14650828800.0,
            "3110": 14650828800.0,
            "3115": 14650828800.0,
            "3120": 14650828800.0,
            "3125": 14650828800.0,
            "3130": 14650828800.0,
            "3135": 14650828800.0,
            "3140": 14650828800.0,
            "3145": 14650828800.0,
            "3150": 14650828800.0,
            "3155": 14650828800.0,
            "3160": 14650828800.0,
            "3165": 14650828800.0,
            "3170": 14650828800.0,
            "3175": 14650828800.0,
            "3180": 14650828800.0,
            "3185": 14650828800.0,
            "3190": 14650828800.0,
            "3195": 14650828800.0,
            "3200": 14650828800.0,
            "3205": 14650828800.0,
            "3210": 14650828800.0,
            "3215": 14650828800.0,
            "3220": 14650828800.0,
            "3225": 14650828800.0,
            "3230": 14650828800.0,
            "3235": 14650828800.0,
            "3240": 14650828800.0,
            "3245": 14650828800.0,
            "3250": 14650828800.0,
            "3255": 14650828800.0,
            "3260": 14650828800.0,
            "3265": 14650828800.0,
            "3270": 14650828800.0,
            "3275": 14650828800.0,
            "3280": 14650828800.0,
            "3285": 14650828800.0,
            "3290": 14650828800.0,
            "3295": 14650828800.0,
            "3300": 14650828800.0,
            "3305": 14650828800.0,
            "3310": 14650828800.0,
            "3315": 14650828800.0,
            "3320": 14650828800.0,
            "3325": 14650828800.0,
            "3330": 14650828800.0,
            "3335": 14650828800.0,
            "3340": 14650828800.0,
            "3345": 14650828800.0,
            "3350": 14650828800.0,
            "3355": 14650828800.0,
            "3360": 14650828800.0,
            "3365": 14650828800.0,
            "3370": 14650828800.0,
            "3375": 14650828800.0,
            "3380": 14650828800.0,
            "3385": 14650828800.0,
            "3390": 14650828800.0,
            "3395": 14650828800.0,
            "3400": 14650828800.0,
            "3405": 14650828800.0,
            "3410": 14650828800.0,
            "3415": 14650828800.0,
            "3420": 14650828800.0,
            "3425": 14650828800.0,
            "3430": 14650828800.0,
            "3435": 14650828800.0,
            "3440": 14650828800.0,
            "3445": 14650828800.0,
            "3450": 14650828800.0,
            "3455": 14650828800.0,
            "3460": 14650828800.0,
            "3465": 14650828800.0,
            "3470": 14650828800.0,
            "3475": 14650828800.0,
            "3480": 14650828800.0,
            "3485": 14650828800.0,
            "3490": 14650828800.0,
            "3495": 14650828800.0,
            "3500": 14650828800.0,
            "3505": 14650828800.0,
            "3510": 14650828800.0,
            "3515": 14650828800.0,
            "3520": 14650828800.0,
            "3525": 14650828800.0,
            "3530": 14650828800.0,
            "3535": 14650828800.0,
            "3540": 14650828800.0,
            "3545": 14650828800.0,
            "3550": 14650828800.0,
            "3555": 14650828800.0,
            "3560": 14650828800.0,
            "3565": 14650828800.0,
            "3570": 14650828800.0,
            "3575": 14650828800.0,
            "3580": 14650828800.0,
            "3585": 14650828800.0,
            "3590": 14650828800.0,
            "3595": 14650828800.0,
            "3600": 14650828800.0,
            "3605": 14650828800.0,
            "3610": 14650828800.0,
            "3615": 14650828800.0,
            "3620": 14650828800.0,
            "3625": 14650828800.0,
            "3630": 14650828800.0,
            "3635": 14650828800.0,
            "3640": 14650828800.0,
            "3645": 14650828800.0,
            "3650": 14650828800.0,
            "3655": 14650828800.0,
            "3660": 14650828800.0,
            "3665": 14650828800.0,
            "3670": 14650828800.0,
            "3675": 14650828800.0,
            "3680": 14650828800.0,
            "3685": 14650828800.0,
            "3690": 14650828800.0,
            "3695": 14650828800.0,
            "3700": 14650828800.0,
            "3705": 14650828800.0,
            "3710": 14650828800.0,
            "3715": 14650828800.0,
            "3720": 14650828800.0,
            "3725": 14650828800.0,
            "3730": 14650828800.0,
            "3735": 14650828800.0,
            "3740": 14650828800.0,
            "3745": 14650828800.0,
            "3750": 14650828800.0,
            "3755": 14650828800.0,
            "3760": 14650828800.0,
            "3765": 14650828800.0,
            "3770": 14650828800.0,
            "3775": 14650828800.0,
            "3780": 14650828800.0,
            "3785": 14650828800.0,
            "3790": 14650828800.0,
            "3795": 14650828800.0,
            "3800": 14650828800.0,
            "3805": 14650828800.0,
            "3810": 14650828800.0,
            "3815": 14650828800.0,
            "3820": 14650828800.0,
            "3825": 14650828800.0,
            "3830": 14650828800.0,
            "3835": 14650828800.0,
            "3840": 14650828800.0,
            "3845": 14650828800.0,
            "3850": 14650828800.0,
            "3855": 14650828800.0,
            "3860": 14650828800.0,
            "3865": 14650828800.0,
            "3870": 14650828800.0,
            "3875": 14650828800.0,
            "3880": 14650828800.0,
            "3885": 14650828800.0,
            "3890": 14650828800.0,
            "3895": 14650828800.0,
            "3900": 14650828800.0,
            "3905": 14650828800.0,
            "3910": 14650828800.0,
            "3915": 14650828800.0,
            "3920": 14650828800.0,
            "3925": 14650828800.0,
            "3930": 14650828800.0,
            "3935": 14650828800.0,
            "3940": 14650828800.0,
            "3945": 14650828800.0,
            "3950": 14650828800.0,
            "3955": 14650828800.0,
            "3960": 14650828800.0,
            "3965": 14650828800.0,
            "3970": 14650828800.0,
            "3975": 14650828800.0,
            "3980": 14650828800.0,
            "3985": 14650828800.0,
            "3990": 14650828800.0,
            "3995": 14650828800.0,
            "4000": 14650828800.0,
            "4005": 14650828800.0,
            "4010": 14650828800.0,
            "4015": 14650828800.0,
            "4020": 14650828800.0,
            "4025": 14650828800.0,
            "4030": 14650828800.0,
            "4035": 14650828800.0,
            "4040": 14650828800.0,
            "4045": 14650828800.0,
            "4050": 14650828800.0,
            "4055": 14650828800.0,
            "4060": 14650828800.0,
            "4065": 14650828800.0,
            "4070": 14650828800.0,
            "4075": 14650828800.0,
            "4080": 14650828800.0,
            "4085": 14650828800.0,
            "4090": 14650828800.0,
            "4095": 14650828800.0,
            "4100": 14650828800.0,
            "4105": 14650828800.0,
            "4110": 14650828800.0,
            "4115": 14650828800.0,
            "4120": 14650828800.0,
            "4125": 14650828800.0,
            "4130": 14650828800.0,
            "4135": 14650828800.0,
            "4140": 14650828800.0,
            "4145": 14650828800.0,
            "4150": 14650828800.0,
            "4155": 14650828800.0,
            "4160": 14650828800.0,
            "4165": 14650828800.0,
            "4170": 14650828800.0,
            "4175": 14650828800.0,
            "4180": 14650828800.0,
            "4185": 14650828800.0,
            "4190": 14650828800.0,
            "4195": 14650828800.0,
            "4200": 14650828800.0,
            "4205": 14650828800.0,
            "4210": 14650828800.0,
            "4215": 14650828800.0,
            "4220": 14650828800.0,
            "4225": 14650828800.0,
            "4230": 14650828800.0,
            "4235": 14650828800.0,
            "4240": 14650828800.0,
            "4245": 14650828800.0,
            "4250": 14650828800.0,
            "4255": 14650828800.0,
            "4260": 14650828800.0,
            "4265": 14650828800.0,
            "4270": 14650828800.0,
            "4275": 14650828800.0,
            "4280": 14650828800.0,
            "4285": 14650828800.0,
            "4290": 14650828800.0,
            "4295": 14650828800.0,
            "4300": 14650828800.0,
            "4305": 14650828800.0,
            "4310": 14650828800.0,
            "4315": 14650828800.0,
            "4320": 14650828800.0,
            "4325": 14650828800.0,
            "4330": 14650828800.0,
            "4335": 14650828800.0,
            "4340": 14650828800.0,
            "4345": 14650828800.0,
            "4350": 14650828800.0,
            "4355": 14650828800.0,
            "4360": 14650828800.0,
            "4365": 14650828800.0,
            "4370": 14650828800.0,
            "4375": 14650828800.0,
            "4380": 14650828800.0,
            "4385": 14650828800.0,
            "4390": 14650828800.0,
            "4395": 14650828800.0,
            "4400": 14650828800.0,
            "4405": 14650828800.0,
            "4410": 14650828800.0,
            "4415": 14650828800.0,
            "4420": 14650828800.0,
            "4425": 14650828800.0,
            "4430": 14650828800.0,
            "4435": 14650828800.0,
            "4440": 14650828800.0,
            "4445": 14650828800.0,
            "4450": 14650828800.0,
            "4455": 14650828800.0,
            "4460": 14650828800.0,
            "4465": 14650828800.0,
            "4470": 14650828800.0,
            "4475": 14650828800.0,
            "4480": 14650828800.0,
            "4485": 14650828800.0,
            "4490": 14650828800.0,
            "4495": 14650828800.0,
            "4500": 14650828800.0,
            "4505": 14650828800.0,
            "4510": 14650828800.0,
            "4515": 14650828800.0,
            "4520": 14650828800.0,
            "4525": 14650828800.0,
            "4530": 14650828800.0,
            "4535": 14650828800.0,
            "4540": 14650828800.0,
            "4545": 14650828800.0,
            "4550": 14650828800.0,
            "4555": 14650828800.0,
            "4560": 14650828800.0,
            "4565": 14650828800.0,
            "4570": 14650828800.0,
            "4575": 14650828800.0,
            "4580": 14650828800.0,
            "4585": 14650828800.0,
            "4590": 14650828800.0,
            "4595": 14650828800.0,
            "4600": 14650828800.0,
            "4605": 14650828800.0,
            "4610": 14650828800.0,
            "4615": 14650828800.0,
            "4620": 14650828800.0,
            "4625": 14650828800.0,
            "4630": 14650828800.0,
            "4635": 14650828800.0,
            "4640": 14650828800.0,
            "4645": 14650828800.0,
            "4650": 14650828800.0,
            "4655": 14650828800.0,
            "4660": 14650828800.0,
            "4665": 14650828800.0,
            "4670": 14650828800.0,
            "4675": 14650828800.0,
            "4680": 14650828800.0,
            "4685": 14650828800.0,
            "4690": 14650828800.0,
            "4695": 14650828800.0,
            "4700": 14650828800.0,
            "4705": 14650828800.0,
            "4710": 14650828800.0,
            "4715": 14650828800.0,
            "4720": 14650828800.0,
            "4725": 14650828800.0,
            "4730": 14650828800.0,
            "4735": 14650828800.0,
            "4740": 14650828800.0,
            "4745": 14650828800.0,
            "4750": 14650828800.0,
            "4755": 14650828800.0,
            "4760": 14650828800.0,
            "4765": 14650828800.0,
            "4770": 14650828800.0,
            "4775": 14650828800.0,
            "4780": 14650828800.0,
            "4785": 14650828800.0,
            "4790": 14650828800.0,
            "4795": 14650828800.0,
            "4800": 14650828800.0,
            "4805": 14650828800.0,
            "4810": 14650828800.0,
            "4815": 14650828800.0,
            "4820": 14650828800.0,
            "4825": 14650828800.0,
            "4830": 14650828800.0,
            "4835": 14650828800.0,
            "4840": 14650828800.0,
            "4845": 14650828800.0,
            "4850": 14650828800.0,
            "4855": 14650828800.0,
            "4860": 14650828800.0,
            "4865": 14650828800.0,
            "4870": 14650828800.0,
            "4875": 14650828800.0,
            "4880": 14650828800.0,
            "4885": 14650828800.0,
            "4890": 14650828800.0,
            "4895": 14650828800.0,
            "4900": 14650828800.0,
            "4905": 14650828800.0,
            "4910": 14650828800.0,
            "4915": 14650828800.0,
            "4920": 14650828800.0,
            "4925": 14650828800.0,
            "4930": 14650828800.0,
            "4935": 14650828800.0,
            "4940": 14650828800.0,
            "4945": 14650828800.0,
            "4950": 14650828800.0,
            "4955": 14650828800.0,
            "4960": 14650828800.0,
            "4965": 14650828800.0,
            "4970": 14650828800.0,
            "4975": 14650828800.0,
            "4980": 14650828800.0,
            "4985": 14650828800.0,
            "4990": 14650828800.0,
            "4995": 14650828800.0,
            "5000": 14650828800.0,
            "5005": 14650828800.0,
            "5010": 14650828800.0,
            "5015": 14650828800.0,
            "5020": 14650828800.0,
            "5025": 14650828800.0,
            "5030": 14650828800.0,
            "5035": 14650828800.0,
            "5040": 14650828800.0,
            "5045": 14650828800.0,
            "5050": 14650828800.0,
            "5055": 14650828800.0,
            "5060": 14650828800.0,
            "5065": 14650828800.0,
            "5070": 14650828800.0,
            "5075": 14650828800.0,
            "5080": 14650828800.0,
            "5085": 14650828800.0,
            "5090": 14650828800.0,
            "5095": 14650828800.0,
            "5100": 14650828800.0,
            "5105": 14650828800.0,
            "5110": 14650828800.0,
            "5115": 14650828800.0,
            "5120": 14650828800.0,
            "5125": 14650828800.0,
            "5130": 14650828800.0,
            "5135": 14650828800.0,
            "5140": 14650828800.0,
            "5145": 14650828800.0,
            "5150": 14650828800.0,
            "5155": 14650828800.0,
            "5160": 14650828800.0,
            "5165": 14650828800.0,
            "5170": 14650828800.0,
            "5175": 14650828800.0,
            "5180": 14650828800.0,
            "5185": 14650828800.0,
            "5190": 14650828800.0,
            "5195": 14650828800.0,
            "5200": 14650828800.0,
            "5205": 14650828800.0,
            "5210": 14650828800.0,
            "5215": 14650828800.0,
            "5220": 14650828800.0,
            "5225": 14650828800.0,
            "5230": 14650828800.0,
            "5235": 14650828800.0,
            "5240": 14650828800.0,
            "5245": 14650828800.0,
            "5250": 14650828800.0,
            "5255": 14650828800.0,
            "5260": 14650828800.0,
            "5265": 14650828800.0,
            "5270": 14650828800.0,
            "5275": 14650828800.0,
            "5280": 14650828800.0,
            "5285": 14650828800.0,
            "5290": 14650828800.0,
            "5295": 14650828800.0,
            "5300": 14650828800.0,
            "5305": 14650828800.0,
            "5310": 14650828800.0,
            "5315": 14650828800.0,
            "5320": 14650828800.0,
            "5325": 14650828800.0,
            "5330": 14650828800.0,
            "5335": 14650828800.0,
            "5340": 14650828800.0,
            "5345": 14650828800.0,
            "5350": 14650828800.0,
            "5355": 14650828800.0,
            "5360": 14650828800.0,
            "5365": 14650828800.0,
            "5370": 14650828800.0,
            "5375": 14650828800.0,
            "5380": 14650828800.0,
            "5385": 14650828800.0,
            "5390": 14650828800.0,
            "5395": 14650828800.0,
            "5400": 14650828800.0,
            "5405": 14650828800.0,
            "5410": 14650828800.0,
            "5415": 14650828800.0,
            "5420": 14650828800.0,
            "5425": 14650828800.0,
            "5430": 14650828800.0,
            "5435": 14650828800.0,
            "5440": 14650828800.0,
            "5445": 14650828800.0,
            "5450": 14650828800.0,
            "5455": 14650828800.0,
            "5460": 14650828800.0,
            "5465": 14650828800.0,
            "5470": 14650828800.0,
            "5475": 14650828800.0,
            "5480": 14650828800.0,
            "5485": 14650828800.0,
            "5490": 14650828800.0,
            "5495": 14650828800.0,
            "5500": 14650828800.0,
            "5505": 14650828800.0,
            "5510": 14650828800.0,
            "5515": 14650828800.0,
            "5520": 14650828800.0,
            "5525": 14650828800.0,
            "5530": 14650828800.0,
            "5535": 14650828800.0,
            "5540": 14650828800.0,
            "5545": 14650828800.0,
            "5550": 14650828800.0,
            "5555": 14650828800.0,
            "5560": 14650828800.0,
            "5565": 14650828800.0,
            "5570": 14650828800.0,
            "5575": 14650828800.0,
            "5580": 14650828800.0,
            "5585": 14650828800.0,
            "5590": 14650828800.0,
            "5595": 14650828800.0,
            "5600": 14650828800.0,
            "5605": 14650828800.0,
            "5610": 14650828800.0,
            "5615": 14650828800.0,
            "5620": 14650828800.0,
            "5625": 14650828800.0,
            "5630": 14650828800.0,
            "5635": 14650828800.0,
            "5640": 14650828800.0,
            "5645": 14650828800.0,
            "5650": 14650828800.0,
            "5655": 14650828800.0,
            "5660": 14650828800.0,
            "5665": 14650828800.0,
            "5670": 14650828800.0,
            "5675": 14650828800.0,
            "5680": 14650828800.0,
            "5685": 14650828800.0,
            "5690": 14650828800.0,
            "5695": 14650828800.0,
            "5700": 14650828800.0,
            "5705": 14650828800.0,
            "5710": 14650828800.0,
            "5715": 14650828800.0,
            "5720": 14650828800.0,
            "5725": 14650828800.0,
            "5730": 14650828800.0,
            "5735": 14650828800.0,
            "5740": 14650828800.0,
            "5745": 14650828800.0,
            "5750": 14650828800.0,
            "5755": 14650828800.0,
            "5760": 14650828800.0,
            "5765": 14650828800.0,
            "5770": 14650828800.0,
            "5775": 14650828800.0,
            "5780": 14650828800.0,
            "5785": 14650828800.0,
            "5790": 14650828800.0,
            "5795": 14650828800.0,
            "5800": 14650828800.0,
            "5805": 14650828800.0,
            "5810": 14650828800.0,
            "5815": 14650828800.0,
            "5820": 14650828800.0,
            "5825": 14650828800.0,
            "5830": 14650828800.0,
            "5835": 14650828800.0,
            "5840": 14650828800.0,
            "5845": 14650828800.0,
            "5850": 14650828800.0,
            "5855": 14650828800.0,
            "5860": 14650828800.0,
            "5865": 14650828800.0,
            "5870": 14650828800.0,
            "5875": 14650828800.0,
            "5880": 14650828800.0,
            "5885": 14650828800.0,
            "5890": 14650828800.0,
            "5895": 14650828800.0,
            "5900": 14650828800.0,
            "5905": 14650828800.0,
            "5910": 14650828800.0,
            "5915": 14650828800.0,
            "5920": 14650828800.0,
            "5925": 14650828800.0,
            "5930": 14650828800.0,
            "5935": 14650828800.0,
            "5940": 14650828800.0,
            "5945": 14650828800.0,
            "5950": 14650828800.0,
            "5955": 14650828800.0,
            "5960": 14650828800.0,
            "5965": 14650828800.0,
            "5970": 14650828800.0,
            "5975": 14650828800.0,
            "5980": 14650828800.0,
            "5985": 14650828800.0,
            "5990": 14650828800.0,
            "5995": 14650828800.0,
            "6000": 14650828800.0,
            "6005": 14650828800.0,
            "6010": 14650828800.0,
            "6015": 14650828800.0,
            "6020": 14650828800.0,
            "6025": 14650828800.0,
            "6030": 14650828800.0,
            "6035": 14650828800.0,
            "6040": 14650828800.0,
            "6045": 14650828800.0,
            "6050": 14650828800.0,
            "6055": 14650828800.0,
            "6060": 14650828800.0,
            "6065": 14650828800.0,
            "6070": 14650828800.0,
            "6075": 14650828800.0,
            "6080": 14650828800.0,
            "6085": 14650828800.0,
            "6090": 14650828800.0,
            "6095": 14650828800.0,
            "6100": 14650828800.0,
            "6105": 14650828800.0,
            "6110": 14650828800.0,
            "6115": 14650828800.0,
            "6120": 14650828800.0,
            "6125": 14650828800.0,
            "6130": 14650828800.0,
            "6135": 14650828800.0,
            "6140": 14650828800.0,
            "6145": 14650828800.0,
            "6150": 14650828800.0,
            "6155": 14650828800.0,
            "6160": 14650828800.0,
            "6165": 14650828800.0,
            "6170": 14650828800.0,
            "6175": 14650828800.0,
            "6180": 14650828800.0,
            "6185": 14650828800.0,
            "6190": 14650828800.0,
            "6195": 14650828800.0,
            "6200": 14650828800.0,
            "6205": 14650828800.0,
            "6210": 14650828800.0,
            "6215": 14650828800.0,
            "6220": 14650828800.0,
            "6225": 14650828800.0,
            "6230": 14650828800.0,
            "6235": 14650828800.0,
            "6240": 14650828800.0,
            "6245": 14650828800.0,
            "6250": 14650828800.0,
            "6255": 14650828800.0,
            "6260": 14650828800.0,
            "6265": 14650828800.0,
            "6270": 14650828800.0,
            "6275": 14650828800.0,
            "6280": 14650828800.0,
            "6285": 14650828800.0,
            "6290": 14650828800.0,
            "6295": 14650828800.0,
            "6300": 14650828800.0,
            "6305": 14650828800.0,
            "6310": 14650828800.0,
            "6315": 14650828800.0,
            "6320": 14650828800.0,
            "6325": 14650828800.0,
            "6330": 14650828800.0,
            "6335": 14650828800.0,
            "6340": 14650828800.0,
            "6345": 14650828800.0,
            "6350": 14650828800.0,
            "6355": 14650828800.0,
            "6360": 14650828800.0,
            "6365": 14650828800.0,
            "6370": 14650828800.0,
            "6375": 14650828800.0,
            "6380": 14650828800.0,
            "6385": 14650828800.0,
            "6390": 14650828800.0,
            "6395": 14650828800.0,
            "6400": 14650828800.0,
            "6405": 14650828800.0,
            "6410": 14650828800.0,
            "6415": 14650828800.0,
            "6420": 14650828800.0,
            "6425": 14650828800.0,
            "6430": 14650828800.0,
            "6435": 14650828800.0,
            "6440": 14650828800.0,
            "6445": 14650828800.0,
            "6450": 14650828800.0,
            "6455": 14650828800.0,
            "6460": 14650828800.0,
            "6465": 14650828800.0,
            "6470": 14650828800.0,
            "6475": 14650828800.0,
            "6480": 14650828800.0,
            "6485": 14650828800.0,
            "6490": 14650828800.0,
            "6495": 14650828800.0,
            "6500": 14650828800.0,
            "6505": 14650828800.0,
            "6510": 14650828800.0,
            "6515": 14650828800.0,
            "6520": 14650828800.0,
            "6525": 14650828800.0,
            "6530": 14650828800.0,
            "6535": 14650828800.0,
            "6540": 14650828800.0,
            "6545": 14650828800.0,
            "6550": 14650828800.0,
            "6555": 14650828800.0,
            "6560": 14650828800.0,
            "6565": 14650828800.0,
            "6570": 14650828800.0,
            "6575": 14650828800.0,
            "6580": 14650828800.0,
            "6585": 14650828800.0,
            "6590": 14650828800.0,
            "6595": 14650828800.0,
            "6600": 14650828800.0,
            "6605": 14650828800.0,
            "6610": 14650828800.0,
            "6615": 14650828800.0,
            "6620": 14650828800.0,
            "6625": 14650828800.0,
            "6630": 14650828800.0,
            "6635": 14650828800.0,
            "6640": 14650828800.0,
            "6645": 14650828800.0,
            "6650": 14650828800.0,
            "6655": 14650828800.0,
            "6660": 14650828800.0,
            "6665": 14650828800.0,
            "6670": 14650828800.0,
            "6675": 14650828800.0,
            "6680": 14650828800.0,
            "6685": 14650828800.0,
            "6690": 14650828800.0,
            "6695": 14650828800.0,
            "6700": 14650828800.0,
            "6705": 14650828800.0,
            "6710": 14650828800.0,
            "6715": 14650828800.0,
            "6720": 14650828800.0,
            "6725": 14650828800.0,
            "6730": 14650828800.0,
            "6735": 14650828800.0,
            "6740": 14650828800.0,
            "6745": 14650828800.0,
            "6750": 14650828800.0,
            "6755": 14650828800.0,
            "6760": 14650828800.0,
            "6765": 14650828800.0,
            "6770": 14650828800.0,
            "6775": 14650828800.0,
            "6780": 14650828800.0,
            "6785": 14650828800.0,
            "6790": 14650828800.0,
            "6795": 14650828800.0,
            "6800": 14650828800.0,
            "6805": 14650828800.0,
            "6810": 14650828800.0,
            "6815": 14650828800.0,
            "6820": 14650828800.0,
            "6825": 14650828800.0,
            "6830": 14650828800.0,
            "6835": 14650828800.0,
            "6840": 14650828800.0,
            "6845": 14650828800.0,
            "6850": 14650828800.0,
            "6855": 14650828800.0,
            "6860": 14650828800.0,
            "6865": 14650828800.0,
            "6870": 14650828800.0,
            "6875": 14650828800.0,
            "6880": 14650828800.0,
            "6885": 14650828800.0,
            "6890": 14650828800.0,
            "6895": 14650828800.0,
            "6900": 14650828800.0,
            "6905": 14650828800.0,
            "6910": 14650828800.0,
            "6915": 14650828800.0,
            "6920": 14650828800.0,
            "6925": 14650828800.0,
            "6930": 14650828800.0,
            "6935": 14650828800.0,
            "6940": 14650828800.0,
            "6945": 14650828800.0,
            "6950": 14650828800.0,
            "6955": 14650828800.0,
            "6960": 14650828800.0,
            "6965": 14650828800.0,
            "6970": 14650828800.0,
            "6975": 14650828800.0,
            "6980": 14650828800.0,
            "6985": 14650828800.0,
            "6990": 14650828800.0,
            "6995": 14650828800.0,
            "7000": 14650828800.0,
            "7005": 14650828800.0,
            "7010": 14650828800.0,
            "7015": 14650828800.0,
            "7020": 14650828800.0,
            "7025": 14650828800.0,
            "7030": 14650828800.0,
            "7035": 14650828800.0,
            "7040": 14650828800.0,
            "7045": 14650828800.0,
            "7050": 14650828800.0,
            "7055": 14650828800.0,
            "7060": 14650828800.0,
            "7065": 14650828800.0,
            "7070": 14650828800.0,
            "7075": 14650828800.0,
            "7080": 14650828800.0,
            "7085": 14650828800.0,
            "7090": 14650828800.0,
            "7095": 14650828800.0,
            "7100": 14650828800.0,
            "7105": 14650828800.0,
            "7110": 14650828800.0,
            "7115": 14650828800.0,
            "7120": 14650828800.0,
            "7125": 14650828800.0,
            "7130": 14650828800.0,
            "7135": 14650828800.0,
            "7140": 14650828800.0,
            "7145": 14650828800.0,
            "7150": 14650828800.0,
            "7155": 14650828800.0,
            "7160": 14650828800.0,
            "7165": 14650828800.0,
            "7170": 14650828800.0,
            "7175": 14650828800.0,
            "7180": 14650828800.0,
            "7185": 14650828800.0,
            "7190": 14650828800.0,
            "7195": 14650828800.0,
            "7200": 14650828800.0,
            "7205": 14650828800.0,
            "7210": 14650828800.0,
            "7215": 14650828800.0,
            "7220": 14650828800.0,
            "7225": 14650828800.0,
            "7230": 14650828800.0,
            "7235": 14650828800.0,
            "7240": 14650828800.0,
            "7245": 14650828800.0,
            "7250": 14650828800.0,
            "7255": 14650828800.0,
            "7260": 14650828800.0,
            "7265": 14650828800.0,
            "7270": 14650828800.0,
            "7275": 14650828800.0,
            "7280": 14650828800.0,
            "7285": 14650828800.0,
            "7290": 14650828800.0,
            "7295": 14650828800.0,
            "7300": 14650828800.0,
            "7305": 14650828800.0,
            "7310": 14650828800.0,
            "7315": 14650828800.0,
            "7320": 14650828800.0,
            "7325": 14650828800.0,
            "7330": 14650828800.0,
            "7335": 14650828800.0,
            "7340": 14650828800.0,
            "7345": 14650828800.0,
            "7350": 14650828800.0,
            "7355": 14650828800.0,
            "7360": 14650828800.0,
            "7365": 14650828800.0,
            "7370": 14650828800.0,
            "7375": 14650828800.0,
            "7380": 14650828800.0,
            "7385": 14650828800.0,
            "7390": 14650828800.0,
            "7395": 14650828800.0,
            "7400": 14650828800.0,
            "7405": 14650828800.0,
            "7410": 14650828800.0,
            "7415": 14650828800.0,
            "7420": 14650828800.0,
            "7425": 14650828800.0,
            "7430": 14650828800.0,
            "7435": 14650828800.0,
            "7440": 14650828800.0,
            "7445": 14650828800.0,
            "7450": 14650828800.0,
            "7455": 14650828800.0,
            "7460": 14650828800.0,
            "7465": 14650828800.0,
            "7470": 14650828800.0,
            "7475": 14650828800.0,
            "7480": 14650828800.0,
            "7485": 14650828800.0,
            "7490": 14650828800.0,
            "7495": 14650828800.0,
            "7500": 14650828800.0,
            "7505": 14650828800.0,
            "7510": 14650828800.0,
            "7515": 14650828800.0,
            "7520": 14650828800.0,
            "7525": 14650828800.0,
            "7530": 14650828800.0,
            "7535": 14650828800.0,
            "7540": 14650828800.0,
            "7545": 14650828800.0,
            "7550": 14650828800.0,
            "7555": 14650828800.0,
            "7560": 14650828800.0,
            "7565": 14650828800.0,
            "7570": 14650828800.0,
            "7575": 14650828800.0,
            "7580": 14650828800.0,
            "7585": 14650828800.0,
            "7590": 14650828800.0,
            "7595": 14650828800.0,
            "7600": 14650828800.0,
            "7605": 14650828800.0,
            "7610": 14650828800.0,
            "7615": 14650828800.0,
            "7620": 14650828800.0,
            "7625": 14650828800.0,
            "7630": 14650828800.0,
            "7635": 14650828800.0,
            "7640": 14650828800.0,
            "7645": 14650828800.0,
            "7650": 14650828800.0,
            "7655": 14650828800.0,
            "7660": 14650828800.0,
            "7665": 14650828800.0,
            "7670": 14650828800.0,
            "7675": 14650828800.0,
            "7680": 14650828800.0,
            "7685": 14650828800.0,
            "7690": 14650828800.0,
            "7695": 14650828800.0,
            "7700": 14650828800.0,
            "7705": 14650828800.0,
            "7710": 14650828800.0,
            "7715": 14650828800.0,
            "7720": 14650828800.0,
            "7725": 14650828800.0,
            "7730": 14650828800.0,
            "7735": 14650828800.0,
            "7740": 14650828800.0,
            "7745": 14650828800.0,
            "7750": 14650828800.0,
            "7755": 14650828800.0,
            "7760": 14650828800.0,
            "7765": 14650828800.0,
            "7770": 14650828800.0,
            "7775": 14650828800.0,
            "7780": 14650828800.0,
            "7785": 14650828800.0,
            "7790": 14650828800.0,
            "7795": 14650828800.0,
            "7800": 14650828800.0,
            "7805": 14650828800.0,
            "7810": 14650828800.0,
            "7815": 14650828800.0,
            "7820": 14650828800.0,
            "7825": 14650828800.0,
            "7830": 14650828800.0,
            "7835": 14650828800.0,
            "7840": 14650828800.0,
            "7845": 14650828800.0,
            "7850": 14650828800.0,
            "7855": 14650828800.0,
            "7860": 14650828800.0,
            "7865": 14650828800.0,
            "7870": 14650828800.0,
            "7875": 14650828800.0,
            "7880": 14650828800.0,
            "7885": 14650828800.0,
            "7890": 14650828800.0,
            "7895": 14650828800.0,
            "7900": 14650828800.0,
            "7905": 14650828800.0,
            "7910": 14650828800.0,
            "7915": 14650828800.0,
            "7920": 14650828800.0,
            "7925": 14650828800.0,
            "7930": 14650828800.0,
            "7935": 14650828800.0,
            "7940": 14650828800.0,
            "7945": 14650828800.0,
            "7950": 14650828800.0,
            "7955": 14650828800.0,
            "7960": 14650828800.0,
            "7965": 14650828800.0,
            "7970": 14650828800.0,
            "7975": 14650828800.0,
            "7980": 14650828800.0,
            "7985": 14650828800.0,
            "7990": 14650828800.0,
            "7995": 14650828800.0,
            "8000": 14650828800.0,
            "8005": 14650828800.0,
            "8010": 14650828800.0,
            "8015": 14650828800.0,
            "8020": 14650828800.0,
            "8025": 14650828800.0,
            "8030": 14650828800.0,
            "8035": 14650828800.0,
            "8040": 14650828800.0,
            "8045": 14650828800.0,
            "8050": 14650828800.0,
            "8055": 14650828800.0,
            "8060": 14650828800.0,
            "8065": 14650828800.0,
            "8070": 14650828800.0,
            "8075": 14650828800.0,
            "8080": 14650828800.0,
            "8085": 14650828800.0,
            "8090": 14650828800.0,
            "8095": 14650828800.0,
            "8100": 14650828800.0,
            "8105": 14650828800.0,
            "8110": 14650828800.0,
            "8115": 14650828800.0,
            "8120": 14650828800.0,
            "8125": 14650828800.0,
            "8130": 14650828800.0,
            "8135": 14650828800.0,
            "8140": 14650828800.0,
            "8145": 14650828800.0,
            "8150": 14650828800.0,
            "8155": 14650828800.0,
            "8160": 14650828800.0,
            "8165": 14650828800.0,
            "8170": 14650828800.0,
            "8175": 14650828800.0,
            "8180": 14650828800.0,
            "8185": 14650828800.0,
            "8190": 14650828800.0,
            "8195": 14650828800.0,
            "8200": 14650828800.0,
            "8205": 14650828800.0,
            "8210": 14650828800.0,
            "8215": 14650828800.0,
            "8220": 14650828800.0,
            "8225": 14650828800.0,
            "8230": 14650828800.0,
            "8235": 14650828800.0,
            "8240": 14650828800.0,
            "8245": 14650828800.0,
            "8250": 14650828800.0,
            "8255": 14650828800.0,
            "8260": 14650828800.0,
            "8265": 14650828800.0,
            "8270": 14650828800.0,
            "8275": 14650828800.0,
            "8280": 14650828800.0,
            "8285": 14650828800.0,
            "8290": 14650828800.0,
            "8295": 14650828800.0,
            "8300": 14650828800.0,
            "8305": 14650828800.0,
            "8310": 14650828800.0,
            "8315": 14650828800.0,
            "8320": 14650828800.0,
            "8325": 14650828800.0,
            "8330": 14650828800.0,
            "8335": 14650828800.0,
            "8340": 14650828800.0,
            "8345": 14650828800.0,
            "8350": 14650828800.0,
            "8355": 14650828800.0,
            "8360": 14650828800.0,
            "8365": 14650828800.0,
            "8370": 14650828800.0,
            "8375": 14650828800.0,
            "8380": 14650828800.0,
            "8385": 14650828800.0,
            "8390": 14650828800.0,
            "8395": 14650828800.0,
            "8400": 14650828800.0,
            "8405": 14650828800.0,
            "8410": 14650828800.0,
            "8415": 14650828800.0,
            "8420": 14650828800.0,
            "8425": 14650828800.0,
            "8430": 14650828800.0,
            "8435": 14650828800.0,
            "8440": 14650828800.0,
            "8445": 14650828800.0,
            "8450": 14650828800.0,
            "8455": 14650828800.0,
            "8460": 14650828800.0,
            "8465": 14650828800.0,
            "8470": 14650828800.0,
            "8475": 14650828800.0,
            "8480": 14650828800.0,
            "8485": 14650828800.0,
            "8490": 14650828800.0,
            "8495": 14650828800.0,
            "8500": 14650828800.0,
            "8505": 14650828800.0,
            "8510": 14650828800.0,
            "8515": 14650828800.0,
            "8520": 14650828800.0,
            "8525": 14650828800.0,
            "8530": 14650828800.0,
            "8535": 14650828800.0,
            "8540": 14650828800.0,
            "8545": 14650828800.0,
            "8550": 14650828800.0,
            "8555": 14650828800.0,
            "8560": 14650828800.0,
            "8565": 14650828800.0,
            "8570": 14650828800.0,
            "8575": 14650828800.0,
            "8580": 14650828800.0,
            "8585": 14650828800.0,
            "8590": 14650828800.0,
            "8595": 14650828800.0,
            "8600": 14650828800.0,
            "8605": 14650828800.0,
            "8610": 14650828800.0,
            "8615": 14650828800.0,
            "8620": 14650828800.0,
            "8625": 14650828800.0,
            "8630": 14650828800.0,
            "8635": 14650828800.0,
            "8640": 14650828800.0,
            "8645": 14650828800.0,
            "8650": 14650828800.0,
            "8655": 14650828800.0,
            "8660": 14650828800.0,
            "8665": 14650828800.0,
            "8670": 14650828800.0,
            "8675": 14650828800.0,
            "8680": 14650828800.0,
            "8685": 14650828800.0,
            "8690": 14650828800.0,
            "8695": 14650828800.0,
            "8700": 14650828800.0,
            "8705": 14650828800.0,
            "8710": 14650828800.0,
            "8715": 14650828800.0,
            "8720": 14650828800.0,
            "8725": 14650828800.0,
            "8730": 14650828800.0,
            "8735": 14650828800.0,
            "8740": 14650828800.0,
            "8745": 14650828800.0,
            "8750": 14650828800.0,
            "8755": 14650828800.0,
            "8760": 14650828800.0,
            "8765": 14650828800.0,
            "8770": 14650828800.0,
            "8775": 14650828800.0,
            "8780": 14650828800.0,
            "8785": 14650828800.0,
            "8790": 14650828800.0,
            "8795": 14650828800.0,
            "8800": 14650828800.0,
            "8805": 14650828800.0,
            "8810": 14650828800.0,
            "8815": 14650828800.0,
            "8820": 14650828800.0,
            "8825": 14650828800.0,
            "8830": 14650828800.0,
            "8835": 14650828800.0,
            "8840": 14650828800.0,
            "8845": 14650828800.0,
            "8850": 14650828800.0,
            "8855": 14650828800.0,
            "8860": 14650828800.0,
            "8865": 14650828800.0,
            "8870": 14650828800.0,
            "8875": 14650828800.0,
            "8880": 14650828800.0,
            "8885": 14650828800.0,
            "8890": 14650828800.0,
            "8895": 14650828800.0,
            "8900": 14650828800.0,
            "8905": 14650828800.0,
            "8910": 14650828800.0,
            "8915": 14650828800.0,
            "8920": 14650828800.0,
            "8925": 14650828800.0,
            "8930": 14650828800.0,
            "8935": 14650828800.0,
            "8940": 14650828800.0,
            "8945": 14650828800.0,
            "8950": 14650828800.0,
            "8955": 14650828800.0,
            "8960": 14650828800.0,
            "8965": 14650828800.0,
            "8970": 14650828800.0,
            "8975": 14650828800.0,
            "8980": 14650828800.0,
            "8985": 14650828800.0,
            "8990": 14650828800.0,
            "8995": 14650828800.0,
            "9000": 14650828800.0,
            "9005": 14650828800.0,
            "9010": 14650828800.0,
            "9015": 14650828800.0,
            "9020": 14650828800.0,
            "9025": 14650828800.0,
            "9030": 14650828800.0,
            "9035": 14650828800.0,
            "9040": 14650828800.0,
            "9045": 14650828800.0,
            "9050": 14650828800.0,
            "9055": 14650828800.0,
            "9060": 14650828800.0,
            "9065": 14650828800.0,
            "9070": 14650828800.0,
            "9075": 14650828800.0,
            "9080": 14650828800.0,
            "9085": 14650828800.0,
            "9090": 14650828800.0,
            "9095": 14650828800.0,
            "9100": 14650828800.0,
            "9105": 14650828800.0,
            "9110": 14650828800.0,
            "9115": 14650828800.0,
            "9120": 14650828800.0,
            "9125": 14650828800.0,
            "9130": 14650828800.0,
            "9135": 14650828800.0,
            "9140": 14650828800.0,
            "9145": 14650828800.0,
            "9150": 14650828800.0,
            "9155": 14650828800.0,
            "9160": 14650828800.0,
            "9165": 14650828800.0,
            "9170": 14650828800.0,
            "9175": 14650828800.0,
            "9180": 14650828800.0,
            "9185": 14650828800.0,
            "9190": 14650828800.0,
            "9195": 14650828800.0,
            "9200": 14650828800.0,
            "9205": 14650828800.0,
            "9210": 14650828800.0,
            "9215": 14650828800.0,
            "9220": 14650828800.0,
            "9225": 14650828800.0,
            "9230": 14650828800.0,
            "9235": 14650828800.0,
            "9240": 14650828800.0,
            "9245": 14650828800.0,
            "9250": 14650828800.0,
            "9255": 14650828800.0,
            "9260": 14650828800.0,
            "9265": 14650828800.0,
            "9270": 14650828800.0,
            "9275": 14650828800.0,
            "9280": 14650828800.0,
            "9285": 14650828800.0,
            "9290": 14650828800.0,
            "9295": 14650828800.0,
            "9300": 14650828800.0,
            "9305": 14650828800.0,
            "9310": 14650828800.0,
            "9315": 14650828800.0,
            "9320": 14650828800.0,
            "9325": 14650828800.0,
            "9330": 14650828800.0,
            "9335": 14650828800.0,
            "9340": 14650828800.0,
            "9345": 14650828800.0,
            "9350": 14650828800.0,
            "9355": 14650828800.0,
            "9360": 14650828800.0,
            "9365": 14650828800.0,
            "9370": 14650828800.0,
            "9375": 14650828800.0,
            "9380": 14650828800.0,
            "9385": 14650828800.0,
            "9390": 14650828800.0,
            "9395": 14650828800.0,
            "9400": 14650828800.0,
            "9405": 14650828800.0,
            "9410": 14650828800.0,
            "9415": 14650828800.0,
            "9420": 14650828800.0,
            "9425": 14650828800.0,
            "9430": 14650828800.0,
            "9435": 14650828800.0,
            "9440": 14650828800.0,
            "9445": 14650828800.0,
            "9450": 14650828800.0,
            "9455": 14650828800.0,
            "9460": 14650828800.0,
            "9465": 14650828800.0,
            "9470": 14650828800.0,
            "9475": 14650828800.0,
            "9480": 14650828800.0,
            "9485": 14650828800.0,
            "9490": 14650828800.0,
            "9495": 14650828800.0,
            "9500": 14650828800.0,
            "9505": 14650828800.0,
            "9510": 14650828800.0,
            "9515": 14650828800.0,
            "9520": 14650828800.0,
            "9525": 14650828800.0,
            "9530": 14650828800.0,
            "9535": 14650828800.0,
            "9540": 14650828800.0,
            "9545": 14650828800.0,
            "9550": 14650828800.0,
            "9555": 14650828800.0,
            "9560": 14650828800.0,
            "9565": 14650828800.0,
            "9570": 14650828800.0,
            "9575": 14650828800.0,
            "9580": 14650828800.0,
            "9585": 14650828800.0,
            "9590": 14650828800.0,
            "9595": 14650828800.0,
            "9600": 14650828800.0,
            "9605": 14650828800.0,
            "9610": 14650828800.0,
            "9615": 14650828800.0,
            "9620": 14650828800.0,
            "9625": 14650828800.0,
            "9630": 14650828800.0,
            "9635": 14650828800.0,
            "9640": 14650828800.0,
            "9645": 14650828800.0,
            "9650": 14650828800.0,
            "9655": 14650828800.0,
            "9660": 14650828800.0,
            "9665": 14650828800.0,
            "9670": 14650828800.0,
            "9675": 14650828800.0,
            "9680": 14650828800.0,
            "9685": 14650828800.0,
            "9690": 14650828800.0,
            "9695": 14650828800.0,
            "9700": 14650828800.0,
            "9705": 14650828800.0,
            "9710": 14650828800.0,
            "9715": 14650828800.0,
            "9720": 14650828800.0,
            "9725": 14650828800.0,
            "9730": 14650828800.0,
            "9735": 14650828800.0,
            "9740": 14650828800.0,
            "9745": 14650828800.0,
            "9750": 14650828800.0,
            "9755": 14650828800.0,
            "9760": 14650828800.0,
            "9765": 14650828800.0,
            "9770": 14650828800.0,
            "9775": 14650828800.0,
            "9780": 14650828800.0,
            "9785": 14650828800.0,
            "9790": 14650828800.0,
            "9795": 14650828800.0,
            "9800": 14650828800.0,
            "9805": 14650828800.0,
            "9810": 14650828800.0,
            "9815": 14650828800.0,
            "9820": 14650828800.0,
            "9825": 14650828800.0,
            "9830": 14650828800.0,
            "9835": 14650828800.0,
            "9840": 14650828800.0,
            "9845": 14650828800.0,
            "9850": 14650828800.0,
            "9855": 14650828800.0,
            "9860": 14650828800.0,
            "9865": 14650828800.0,
            "9870": 14650828800.0,
            "9875": 14650828800.0,
            "9880": 14650828800.0,
            "9885": 14650828800.0,
            "9890": 14650828800.0,
            "9895": 14650828800.0,
            "9900": 14650828800.0,
            "9905": 14650828800.0,
            "9910": 14650828800.0,
            "9915": 14650828800.0,
            "9920": 14650828800.0,
            "9925": 14650828800.0,
            "9930": 14650828800.0,
            "9935": 14650828800.0,
            "9940": 14650828800.0,
            "9945": 14650828800.0,
            "9950": 14650828800.0,
            "9955": 14650828800.0,
            "9960": 14650828800.0,
            "9965": 14650828800.0,
            "9970": 14650828800.0,
            "9975": 14650828800.0,
            "9980": 14650828800.0,
            "9985": 14650828800.0,
            "9990": 14650828800.0,
            "9995": 14650828800.0,
            "10000": 14650828800.0,
            "10005": 14650828800.0,
            "10010": 14650828800.0,
            "10015": 14650828800.0,
            "10020": 14650828800.0,
            "10025": 14650828800.0,
            "10030": 14650828800.0,
            "10035": 14650828800.0,
            "10040": 14650828800.0,
            "10045": 14650828800.0,
            "10050": 14650828800.0,
            "10055": 14650828800.0,
            "10060": 14650828800.0,
            "10065": 14650828800.0,
            "10070": 14650828800.0,
            "10075": 14650828800.0,
            "10080": 14650828800.0,
            "10085": 14650828800.0,
            "10090": 14650828800.0,
            "10095": 14650828800.0,
            "10100": 14650828800.0,
            "10105": 14650828800.0,
            "10110": 14650828800.0,
            "10115": 14650828800.0,
            "10120": 14650828800.0,
            "10125": 14650828800.0,
            "10130": 14650828800.0,
            "10135": 14650828800.0,
            "10140": 14650828800.0,
            "10145": 14650828800.0,
            "10150": 14650828800.0,
            "10155": 14650828800.0,
            "10160": 14650828800.0,
            "10165": 14650828800.0,
            "10170": 14650828800.0,
            "10175": 14650828800.0,
            "10180": 14650828800.0,
            "10185": 14650828800.0,
            "10190": 14650828800.0,
            "10195": 14650828800.0,
            "10200": 14650828800.0,
            "10205": 14650828800.0,
            "10210": 14650828800.0,
            "10215": 14650828800.0,
            "10220": 14650828800.0,
            "10225": 14650828800.0,
            "10230": 14650828800.0,
            "10235": 14650828800.0,
            "10240": 14650828800.0,
            "10245": 14650828800.0,
            "10250": 14650828800.0,
            "10255": 14650828800.0,
            "10260": 14650828800.0,
            "10265": 14650828800.0,
            "10270": 14650828800.0,
            "10275": 14650828800.0,
            "10280": 14650828800.0,
            "10285": 14650828800.0,
            "10290": 14650828800.0,
            "10295": 14650828800.0,
            "10300": 14650828800.0,
            "10305": 14650828800.0,
            "10310": 14650828800.0,
            "10315": 14650828800.0,
            "10320": 14650828800.0,
            "10325": 14650828800.0,
            "10330": 14650828800.0,
            "10335": 14650828800.0,
            "10340": 14650828800.0,
            "10345": 14650828800.0,
            "10350": 14650828800.0,
            "10355": 14650828800.0,
            "10360": 14650828800.0,
            "10365": 14650828800.0,
            "10370": 14650828800.0,
            "10375": 14650828800.0,
            "10380": 14650828800.0,
            "10385": 14650828800.0,
            "10390": 14650828800.0,
            "10395": 14650828800.0,
            "10400": 14650828800.0,
            "10405": 14650828800.0,
            "10410": 14650828800.0,
            "10415": 14650828800.0,
            "10420": 14650828800.0,
            "10425": 14650828800.0,
            "10430": 14650828800.0,
            "10435": 14650828800.0,
            "10440": 14650828800.0,
            "10445": 14650828800.0,
            "10450": 14650828800.0,
            "10455": 14650828800.0,
            "10460": 14650828800.0,
            "10465": 14650828800.0,
            "10470": 14650828800.0,
            "10475": 14650828800.0,
            "10480": 14650828800.0,
            "10485": 14650828800.0,
            "10490": 14650828800.0,
            "10495": 14650828800.0,
            "10500": 14650828800.0,
            "10505": 14650828800.0,
            "10510": 14650828800.0,
            "10515": 14650828800.0,
            "10520": 14650828800.0,
            "10525": 14650828800.0,
            "10530": 14650828800.0,
            "10535": 14650828800.0,
            "10540": 14650828800.0,
            "10545": 14650828800.0,
            "10550": 14650828800.0,
            "10555": 14650828800.0,
            "10560": 14650828800.0,
            "10565": 14650828800.0,
            "10570": 14650828800.0,
            "10575": 14650828800.0,
            "10580": 14650828800.0,
            "10585": 14650828800.0,
            "10590": 14650828800.0,
            "10595": 14650828800.0,
            "10600": 14650828800.0,
            "10605": 14650828800.0,
            "10610": 14650828800.0,
            "10615": 14650828800.0,
            "10620": 14650828800.0,
            "10625": 14650828800.0,
            "10630": 14650828800.0,
            "10635": 14650828800.0,
            "10640": 14650828800.0,
            "10645": 14650828800.0,
            "10650": 14650828800.0,
            "10655": 14650828800.0,
            "10660": 14650828800.0,
            "10665": 14650828800.0,
            "10670": 14650828800.0,
            "10675": 14650828800.0,
            "10680": 14650828800.0,
            "10685": 14650828800.0,
            "10690": 14650828800.0,
            "10695": 14650828800.0,
            "10700": 14650828800.0,
            "10705": 14650828800.0,
            "10710": 14650828800.0,
            "10715": 14650828800.0,
            "10720": 14650828800.0,
            "10725": 14650828800.0,
            "10730": 14650828800.0,
            "10735": 14650828800.0,
            "10740": 14650828800.0,
            "10745": 14650828800.0,
            "10750": 14650828800.0,
            "10755": 14650828800.0,
            "10760": 14650828800.0,
            "10765": 14650828800.0,
            "10770": 14650828800.0,
            "10775": 14650828800.0,
            "10780": 14650828800.0,
            "10785": 14650828800.0,
            "10790": 14650828800.0,
            "10795": 14650828800.0,
            "10800": 14650828800.0,
            "10805": 14650828800.0,
            "10810": 14650828800.0,
            "10815": 14650828800.0,
            "10820": 14650828800.0,
            "10825": 14650828800.0,
            "10830": 14650828800.0,
            "10835": 14650828800.0,
            "10840": 14650828800.0,
            "10845": 14650828800.0,
            "10850": 14650828800.0,
            "10855": 14650828800.0,
            "10860": 14650828800.0,
            "10865": 14650828800.0,
            "10870": 14650828800.0,
            "10875": 14650828800.0,
            "10880": 14650828800.0,
            "10885": 14650828800.0,
            "10890": 14650828800.0,
            "10895": 14650828800.0,
            "10900": 14650828800.0,
            "10905": 14650828800.0,
            "10910": 14650828800.0,
            "10915": 14650828800.0,
            "10920": 14650828800.0,
            "10925": 14650828800.0,
            "10930": 14650828800.0,
            "10935": 14650828800.0,
            "10940": 14650828800.0,
            "10945": 14650828800.0,
            "10950": 14650828800.0,
            "10955": 14650828800.0,
            "10960": 14650828800.0,
            "10965": 14650828800.0,
            "10970": 14650828800.0,
            "10975": 14650828800.0,
            "10980": 14650828800.0,
            "10985": 14650828800.0,
            "10990": 14650828800.0,
            "10995": 14650828800.0,
            "11000": 14650828800.0,
            "11005": 14650828800.0,
            "11010": 14650828800.0,
            "11015": 14650828800.0,
            "11020": 14650828800.0,
            "11025": 14650828800.0,
            "11030": 14650828800.0,
            "11035": 14650828800.0,
            "11040": 14650828800.0,
            "11045": 14650828800.0,
            "11050": 14650828800.0,
            "11055": 14650828800.0,
            "11060": 14650828800.0,
            "11065": 14650828800.0,
            "11070": 14650828800.0,
            "11075": 14650828800.0,
            "11080": 14650828800.0,
            "11085": 14650828800.0,
            "11090": 14650828800.0,
            "11095": 14650828800.0,
            "11100": 14650828800.0,
            "11105": 14650828800.0,
            "11110": 14650828800.0,
            "11115": 14650828800.0,
            "11120": 14650828800.0,
            "11125": 14650828800.0,
            "11130": 14650828800.0,
            "11135": 14650828800.0,
            "11140": 14650828800.0,
            "11145": 14650828800.0,
            "11150": 14650828800.0,
            "11155": 14650828800.0,
            "11160": 14650828800.0,
            "11165": 14650828800.0,
            "11170": 14650828800.0,
            "11175": 14650828800.0,
            "11180": 14650828800.0,
            "11185": 14650828800.0,
            "11190": 14650828800.0,
            "11195": 14650828800.0,
            "11200": 14650828800.0,
            "11205": 14650828800.0,
            "11210": 14650828800.0,
            "11215": 14650828800.0,
            "11220": 14650828800.0,
            "11225": 14650828800.0,
            "11230": 14650828800.0,
            "11235": 14650828800.0,
            "11240": 14650828800.0,
            "11245": 14650828800.0,
            "11250": 14650828800.0,
            "11255": 14650828800.0,
            "11260": 14650828800.0,
            "11265": 14650828800.0,
            "11270": 14650828800.0,
            "11275": 14650828800.0,
            "11280": 14650828800.0,
            "11285": 14650828800.0,
            "11290": 14650828800.0,
            "11295": 14650828800.0,
            "11300": 14650828800.0,
            "11305": 14650828800.0,
            "11310": 14650828800.0,
            "11315": 14650828800.0,
            "11320": 14650828800.0,
            "11325": 14650828800.0,
            "11330": 14650828800.0,
            "11335": 14650828800.0,
            "11340": 14650828800.0,
            "11345": 14650828800.0,
            "11350": 14650828800.0,
            "11355": 14650828800.0,
            "11360": 14650828800.0,
            "11365": 14650828800.0,
            "11370": 14650828800.0,
            "11375": 14650828800.0,
            "11380": 14650828800.0,
            "11385": 14650828800.0,
            "11390": 14650828800.0,
            "11395": 14650828800.0,
            "11400": 14650828800.0,
            "11405": 14650828800.0,
            "11410": 14650828800.0,
            "11415": 14650828800.0,
            "11420": 14650828800.0,
            "11425": 14650828800.0,
            "11430": 14650828800.0,
            "11435": 14650828800.0,
            "11440": 14650828800.0,
            "11445": 14650828800.0,
            "11450": 14650828800.0,
            "11455": 14650828800.0,
            "11460": 14650828800.0,
            "11465": 14650828800.0,
            "11470": 14650828800.0,
            "11475": 14650828800.0,
            "11480": 14650828800.0,
            "11485": 14650828800.0,
            "11490": 14650828800.0,
            "11495": 14650828800.0,
            "11500": 14650828800.0,
            "11505": 14650828800.0,
            "11510": 14650828800.0,
            "11515": 14650828800.0,
            "11520": 14650828800.0,
            "11525": 14650828800.0,
            "11530": 14650828800.0,
            "11535": 14650828800.0,
            "11540": 14650828800.0,
            "11545": 14650828800.0,
            "11550": 14650828800.0,
            "11555": 14650828800.0,
            "11560": 14650828800.0,
            "11565": 14650828800.0,
            "11570": 14650828800.0,
            "11575": 14650828800.0,
            "11580": 14650828800.0,
            "11585": 14650828800.0,
            "11590": 14650828800.0,
            "11595": 14650828800.0,
            "11600": 14650828800.0,
            "11605": 14650828800.0,
            "11610": 14650828800.0,
            "11615": 14650828800.0,
            "11620": 14650828800.0,
            "11625": 14650828800.0,
            "11630": 14650828800.0,
            "11635": 14650828800.0,
            "11640": 14650828800.0,
            "11645": 14650828800.0,
            "11650": 14650828800.0,
            "11655": 14650828800.0,
            "11660": 14650828800.0,
            "11665": 14650828800.0,
            "11670": 14650828800.0,
            "11675": 14650828800.0,
            "11680": 14650828800.0,
            "11685": 14650828800.0,
            "11690": 14650828800.0,
            "11695": 14650828800.0,
            "11700": 14650828800.0,
            "11705": 14650828800.0,
            "11710": 14650828800.0,
            "11715": 14650828800.0,
            "11720": 14650828800.0,
            "11725": 14650828800.0,
            "11730": 14650828800.0,
            "11735": 14650828800.0,
            "11740": 14650828800.0,
            "11745": 14650828800.0,
            "11750": 14650828800.0,
            "11755": 14650828800.0,
            "11760": 14650828800.0,
            "11765": 14650828800.0,
            "11770": 14650828800.0,
            "11775": 14650828800.0,
            "11780": 14650828800.0,
            "11785": 14650828800.0,
            "11790": 14650828800.0,
            "11795": 14650828800.0,
            "11800": 14650828800.0,
            "11805": 14650828800.0,
            "11810": 14650828800.0,
            "11815": 14650828800.0,
            "11820": 14650828800.0,
            "11825": 14650828800.0,
            "11830": 14650828800.0,
            "11835": 14650828800.0,
            "11840": 14650828800.0,
            "11845": 14650828800.0,
            "11850": 14650828800.0,
            "11855": 14650828800.0,
            "11860": 14650828800.0,
            "11865": 14650828800.0,
            "11870": 14650828800.0,
            "11875": 14650828800.0,
            "11880": 14650828800.0,
            "11885": 14650828800.0,
            "11890": 14650828800.0,
            "11895": 14650828800.0,
            "11900": 14650828800.0,
            "11905": 14650828800.0,
            "11910": 14650828800.0,
            "11915": 14650828800.0,
            "11920": 14650828800.0,
            "11925": 14650828800.0,
            "11930": 14650828800.0,
            "11935": 14650828800.0,
            "11940": 14650828800.0,
            "11945": 14650828800.0,
            "11950": 14650828800.0,
            "11955": 14650828800.0,
            "11960": 14650828800.0,
            "11965": 14650828800.0,
            "11970": 14650828800.0,
            "11975": 14650828800.0,
            "11980": 14650828800.0,
            "11985": 14650828800.0,
            "11990": 14650828800.0,
            "11995": 14650828800.0,
            "12000": 14650828800.0,
            "12005": 14650828800.0,
            "12010": 14650828800.0,
            "12015": 14650828800.0,
            "12020": 14650828800.0,
            "12025": 14650828800.0,
            "12030": 14650828800.0,
            "12035": 14650828800.0,
            "12040": 14650828800.0,
            "12045": 14650828800.0,
            "12050": 14650828800.0,
            "12055": 14650828800.0,
            "12060": 14650828800.0,
            "12065": 14650828800.0,
            "12070": 14650828800.0,
            "12075": 14650828800.0,
            "12080": 14650828800.0,
            "12085": 14650828800.0,
            "12090": 14650828800.0,
            "12095": 14650828800.0,
            "12100": 14650828800.0,
            "12105": 14650828800.0,
            "12110": 14650828800.0,
            "12115": 14650828800.0,
            "12120": 14650828800.0,
            "12125": 14650828800.0,
            "12130": 14650828800.0,
            "12135": 14650828800.0,
            "12140": 14650828800.0,
            "12145": 14650828800.0,
            "12150": 14650828800.0,
            "12155": 14650828800.0,
            "12160": 14650828800.0,
            "12165": 14650828800.0,
            "12170": 14650828800.0,
            "12175": 14650828800.0,
            "12180": 14650828800.0,
            "12185": 14650828800.0,
            "12190": 14650828800.0,
            "12195": 14650828800.0,
            "12200": 14650828800.0,
            "12205": 14650828800.0,
            "12210": 14650828800.0,
            "12215": 14650828800.0,
            "12220": 14650828800.0,
            "12225": 14650828800.0,
            "12230": 14650828800.0,
            "12235": 14650828800.0,
            "12240": 14650828800.0,
            "12245": 14650828800.0,
            "12250": 14650828800.0,
            "12255": 14650828800.0,
            "12260": 14650828800.0,
            "12265": 14650828800.0,
            "12270": 14650828800.0,
            "12275": 14650828800.0,
            "12280": 14650828800.0,
            "12285": 14650828800.0,
            "12290": 14650828800.0,
            "12295": 14650828800.0,
            "12300": 14650828800.0,
            "12305": 14650828800.0,
            "12310": 14650828800.0,
            "12315": 14650828800.0,
            "12320": 14650828800.0,
            "12325": 14650828800.0,
            "12330": 14650828800.0,
            "12335": 14650828800.0,
            "12340": 14650828800.0,
            "12345": 14650828800.0,
            "12350": 14650828800.0,
            "12355": 14650828800.0,
            "12360": 14650828800.0,
            "12365": 14650828800.0,
            "12370": 14650828800.0,
            "12375": 14650828800.0,
            "12380": 14650828800.0,
            "12385": 14650828800.0,
            "12390": 14650828800.0,
            "12395": 14650828800.0,
            "12400": 14650828800.0,
            "12405": 14650828800.0,
            "12410": 14650828800.0,
            "12415": 14650828800.0,
            "12420": 14650828800.0,
            "12425": 14650828800.0,
            "12430": 14650828800.0,
            "12435": 14650828800.0,
            "12440": 14650828800.0,
            "12445": 14650828800.0,
            "12450": 14650828800.0,
            "12455": 14650828800.0,
            "12460": 14650828800.0,
            "12465": 14650828800.0,
            "12470": 14650828800.0,
            "12475": 14650828800.0,
            "12480": 14650828800.0,
            "12485": 14650828800.0,
            "12490": 14650828800.0,
            "12495": 14650828800.0,
            "12500": 14650828800.0,
            "12505": 14650828800.0,
            "12510": 14650828800.0,
            "12515": 14650828800.0,
            "12520": 14650828800.0,
            "12525": 14650828800.0,
            "12530": 14650828800.0,
            "12535": 14650828800.0,
            "12540": 14650828800.0,
            "12545": 14650828800.0,
            "12550": 14650828800.0,
            "12555": 14650828800.0,
            "12560": 14650828800.0,
            "12565": 14650828800.0,
            "12570": 14650828800.0,
            "12575": 14650828800.0,
            "12580": 14650828800.0,
            "12585": 14650828800.0,
            "12590": 14650828800.0,
            "12595": 14650828800.0,
            "12600": 14650828800.0,
            "12605": 14650828800.0,
            "12610": 14650828800.0,
            "12615": 14650828800.0,
            "12620": 14650828800.0,
            "12625": 14650828800.0,
            "12630": 14650828800.0,
            "12635": 14650828800.0,
            "12640": 14650828800.0,
            "12645": 14650828800.0,
            "12650": 14650828800.0,
            "12655": 14650828800.0,
            "12660": 14650828800.0,
            "12665": 14650828800.0,
            "12670": 14650828800.0,
            "12675": 14650828800.0,
            "12680": 14650828800.0,
            "12685": 14650828800.0,
            "12690": 14650828800.0,
            "12695": 14650828800.0,
            "12700": 14650828800.0,
            "12705": 14650828800.0,
            "12710": 14650828800.0,
            "12715": 14650828800.0,
            "12720": 14650828800.0,
            "12725": 14650828800.0,
            "12730": 14650828800.0,
            "12735": 14650828800.0,
            "12740": 14650828800.0,
            "12745": 14650828800.0,
            "12750": 14650828800.0,
            "12755": 14650828800.0,
            "12760": 14650828800.0,
            "12765": 14650828800.0,
            "12770": 14650828800.0,
            "12775": 14650828800.0,
            "12780": 14650828800.0,
            "12785": 14650828800.0,
            "12790": 14650828800.0,
            "12795": 14650828800.0,
            "12800": 14650828800.0,
            "12805": 14650828800.0,
            "12810": 14650828800.0,
            "12815": 14650828800.0,
            "12820": 14650828800.0,
            "12825": 14650828800.0,
            "12830": 14650828800.0,
            "12835": 14650828800.0,
            "12840": 14650828800.0,
            "12845": 14650828800.0,
            "12850": 14650828800.0,
            "12855": 14650828800.0,
            "12860": 14650828800.0,
            "12865": 14650828800.0,
            "12870": 14650828800.0,
            "12875": 14650828800.0,
            "12880": 14650828800.0,
            "12885": 14650828800.0,
            "12890": 14650828800.0,
            "12895": 14650828800.0,
            "12900": 14650828800.0,
            "12905": 14650828800.0,
            "12910": 14650828800.0,
            "12915": 14650828800.0,
            "12920": 14650828800.0,
            "12925": 14650828800.0,
            "12930": 14650828800.0,
            "12935": 14650828800.0,
            "12940": 14650828800.0,
            "12945": 14650828800.0,
            "12950": 14650828800.0,
            "12955": 14650828800.0,
            "12960": 14650828800.0,
            "12965": 14650828800.0,
            "12970": 14650828800.0,
            "12975": 14650828800.0,
            "12980": 14650828800.0,
            "12985": 14650828800.0,
            "12990": 14650828800.0,
            "12995": 14650828800.0,
            "13000": 14650828800.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 13000,
        "step_interval": 5,
        "values": {
            "1": 33457934336.0,
            "5": 34434887680.0,
            "10": 34434887680.0,
            "15": 34434887680.0,
            "20": 34434887680.0,
            "25": 34434887680.0,
            "30": 34434887680.0,
            "35": 34434887680.0,
            "40": 34434887680.0,
            "45": 34434887680.0,
            "50": 34434887680.0,
            "55": 34434887680.0,
            "60": 34434887680.0,
            "65": 34434887680.0,
            "70": 34434887680.0,
            "75": 34434887680.0,
            "80": 34434887680.0,
            "85": 34434887680.0,
            "90": 34434887680.0,
            "95": 34434887680.0,
            "100": 34434887680.0,
            "105": 34434887680.0,
            "110": 34434887680.0,
            "115": 34434887680.0,
            "120": 34434887680.0,
            "125": 34434887680.0,
            "130": 34434887680.0,
            "135": 34434887680.0,
            "140": 34434887680.0,
            "145": 34434887680.0,
            "150": 34434887680.0,
            "155": 34434887680.0,
            "160": 34434887680.0,
            "165": 34434887680.0,
            "170": 34434887680.0,
            "175": 34434887680.0,
            "180": 34434887680.0,
            "185": 34434887680.0,
            "190": 34434887680.0,
            "195": 34434887680.0,
            "200": 34434887680.0,
            "205": 34434887680.0,
            "210": 34434887680.0,
            "215": 34434887680.0,
            "220": 34434887680.0,
            "225": 34434887680.0,
            "230": 34434887680.0,
            "235": 34434887680.0,
            "240": 34434887680.0,
            "245": 34434887680.0,
            "250": 34434887680.0,
            "255": 34434887680.0,
            "260": 34434887680.0,
            "265": 34434887680.0,
            "270": 34434887680.0,
            "275": 34434887680.0,
            "280": 34434887680.0,
            "285": 34434887680.0,
            "290": 34434887680.0,
            "295": 34434887680.0,
            "300": 34434887680.0,
            "305": 34434887680.0,
            "310": 34434887680.0,
            "315": 34434887680.0,
            "320": 34434887680.0,
            "325": 34434887680.0,
            "330": 34434887680.0,
            "335": 34434887680.0,
            "340": 34434887680.0,
            "345": 34434887680.0,
            "350": 34434887680.0,
            "355": 34434887680.0,
            "360": 34434887680.0,
            "365": 34434887680.0,
            "370": 34434887680.0,
            "375": 34434887680.0,
            "380": 34434887680.0,
            "385": 34434887680.0,
            "390": 34434887680.0,
            "395": 34434887680.0,
            "400": 34434887680.0,
            "405": 34434887680.0,
            "410": 34434887680.0,
            "415": 34434887680.0,
            "420": 34434887680.0,
            "425": 34434887680.0,
            "430": 34434887680.0,
            "435": 34434887680.0,
            "440": 34434887680.0,
            "445": 34434887680.0,
            "450": 34434887680.0,
            "455": 34434887680.0,
            "460": 34434887680.0,
            "465": 34434887680.0,
            "470": 34434887680.0,
            "475": 34434887680.0,
            "480": 34434887680.0,
            "485": 34434887680.0,
            "490": 34434887680.0,
            "495": 34434887680.0,
            "500": 34434887680.0,
            "505": 34434887680.0,
            "510": 34434887680.0,
            "515": 34434887680.0,
            "520": 34434887680.0,
            "525": 34434887680.0,
            "530": 34434887680.0,
            "535": 34434887680.0,
            "540": 34434887680.0,
            "545": 34434887680.0,
            "550": 34434887680.0,
            "555": 34434887680.0,
            "560": 34434887680.0,
            "565": 34434887680.0,
            "570": 34434887680.0,
            "575": 34434887680.0,
            "580": 34434887680.0,
            "585": 34434887680.0,
            "590": 34434887680.0,
            "595": 34434887680.0,
            "600": 34434887680.0,
            "605": 34434887680.0,
            "610": 34434887680.0,
            "615": 34434887680.0,
            "620": 34434887680.0,
            "625": 34434887680.0,
            "630": 34434887680.0,
            "635": 34434887680.0,
            "640": 34434887680.0,
            "645": 34434887680.0,
            "650": 34434887680.0,
            "655": 34434887680.0,
            "660": 34434887680.0,
            "665": 34434887680.0,
            "670": 34434887680.0,
            "675": 34434887680.0,
            "680": 34434887680.0,
            "685": 34434887680.0,
            "690": 34434887680.0,
            "695": 34434887680.0,
            "700": 34434887680.0,
            "705": 34434887680.0,
            "710": 34434887680.0,
            "715": 34434887680.0,
            "720": 34434887680.0,
            "725": 34434887680.0,
            "730": 34434887680.0,
            "735": 34434887680.0,
            "740": 34434887680.0,
            "745": 34434887680.0,
            "750": 34434887680.0,
            "755": 34434887680.0,
            "760": 34434887680.0,
            "765": 34434887680.0,
            "770": 34434887680.0,
            "775": 34434887680.0,
            "780": 34434887680.0,
            "785": 34434887680.0,
            "790": 34434887680.0,
            "795": 34434887680.0,
            "800": 34434887680.0,
            "805": 34434887680.0,
            "810": 34434887680.0,
            "815": 34434887680.0,
            "820": 34434887680.0,
            "825": 34434887680.0,
            "830": 34434887680.0,
            "835": 34434887680.0,
            "840": 34434887680.0,
            "845": 34434887680.0,
            "850": 34434887680.0,
            "855": 34434887680.0,
            "860": 34434887680.0,
            "865": 34434887680.0,
            "870": 34434887680.0,
            "875": 34434887680.0,
            "880": 34434887680.0,
            "885": 34434887680.0,
            "890": 34434887680.0,
            "895": 34434887680.0,
            "900": 34434887680.0,
            "905": 34434887680.0,
            "910": 34434887680.0,
            "915": 34434887680.0,
            "920": 34434887680.0,
            "925": 34434887680.0,
            "930": 34434887680.0,
            "935": 34434887680.0,
            "940": 34434887680.0,
            "945": 34434887680.0,
            "950": 34434887680.0,
            "955": 34434887680.0,
            "960": 34434887680.0,
            "965": 34434887680.0,
            "970": 34434887680.0,
            "975": 34434887680.0,
            "980": 34434887680.0,
            "985": 34434887680.0,
            "990": 34434887680.0,
            "995": 34434887680.0,
            "1000": 34434887680.0,
            "1005": 34434887680.0,
            "1010": 34434887680.0,
            "1015": 34434887680.0,
            "1020": 34434887680.0,
            "1025": 34434887680.0,
            "1030": 34434887680.0,
            "1035": 34434887680.0,
            "1040": 34434887680.0,
            "1045": 34434887680.0,
            "1050": 34434887680.0,
            "1055": 34434887680.0,
            "1060": 34434887680.0,
            "1065": 34434887680.0,
            "1070": 34434887680.0,
            "1075": 34434887680.0,
            "1080": 34434887680.0,
            "1085": 34434887680.0,
            "1090": 34434887680.0,
            "1095": 34434887680.0,
            "1100": 34434887680.0,
            "1105": 34434887680.0,
            "1110": 34434887680.0,
            "1115": 34434887680.0,
            "1120": 34434887680.0,
            "1125": 34434887680.0,
            "1130": 34434887680.0,
            "1135": 34434887680.0,
            "1140": 34434887680.0,
            "1145": 34434887680.0,
            "1150": 34434887680.0,
            "1155": 34434887680.0,
            "1160": 34434887680.0,
            "1165": 34434887680.0,
            "1170": 34434887680.0,
            "1175": 34434887680.0,
            "1180": 34434887680.0,
            "1185": 34434887680.0,
            "1190": 34434887680.0,
            "1195": 34434887680.0,
            "1200": 34434887680.0,
            "1205": 34434887680.0,
            "1210": 34434887680.0,
            "1215": 34434887680.0,
            "1220": 34434887680.0,
            "1225": 34434887680.0,
            "1230": 34434887680.0,
            "1235": 34434887680.0,
            "1240": 34434887680.0,
            "1245": 34434887680.0,
            "1250": 34434887680.0,
            "1255": 34434887680.0,
            "1260": 34434887680.0,
            "1265": 34434887680.0,
            "1270": 34434887680.0,
            "1275": 34434887680.0,
            "1280": 34434887680.0,
            "1285": 34434887680.0,
            "1290": 34434887680.0,
            "1295": 34434887680.0,
            "1300": 34434887680.0,
            "1305": 34434887680.0,
            "1310": 34434887680.0,
            "1315": 34434887680.0,
            "1320": 34434887680.0,
            "1325": 34434887680.0,
            "1330": 34434887680.0,
            "1335": 34434887680.0,
            "1340": 34434887680.0,
            "1345": 34434887680.0,
            "1350": 34434887680.0,
            "1355": 34434887680.0,
            "1360": 34434887680.0,
            "1365": 34434887680.0,
            "1370": 34434887680.0,
            "1375": 34434887680.0,
            "1380": 34434887680.0,
            "1385": 34434887680.0,
            "1390": 34434887680.0,
            "1395": 34434887680.0,
            "1400": 34434887680.0,
            "1405": 34434887680.0,
            "1410": 34434887680.0,
            "1415": 34434887680.0,
            "1420": 34434887680.0,
            "1425": 34434887680.0,
            "1430": 34434887680.0,
            "1435": 34434887680.0,
            "1440": 34434887680.0,
            "1445": 34434887680.0,
            "1450": 34434887680.0,
            "1455": 34434887680.0,
            "1460": 34434887680.0,
            "1465": 34434887680.0,
            "1470": 34434887680.0,
            "1475": 34434887680.0,
            "1480": 34434887680.0,
            "1485": 34434887680.0,
            "1490": 34434887680.0,
            "1495": 34434887680.0,
            "1500": 34434887680.0,
            "1505": 34434887680.0,
            "1510": 34434887680.0,
            "1515": 34434887680.0,
            "1520": 34434887680.0,
            "1525": 34434887680.0,
            "1530": 34434887680.0,
            "1535": 34434887680.0,
            "1540": 34434887680.0,
            "1545": 34434887680.0,
            "1550": 34434887680.0,
            "1555": 34434887680.0,
            "1560": 34434887680.0,
            "1565": 34434887680.0,
            "1570": 34434887680.0,
            "1575": 34434887680.0,
            "1580": 34434887680.0,
            "1585": 34434887680.0,
            "1590": 34434887680.0,
            "1595": 34434887680.0,
            "1600": 34434887680.0,
            "1605": 34434887680.0,
            "1610": 34434887680.0,
            "1615": 34434887680.0,
            "1620": 34434887680.0,
            "1625": 34434887680.0,
            "1630": 34434887680.0,
            "1635": 34434887680.0,
            "1640": 34434887680.0,
            "1645": 34434887680.0,
            "1650": 34434887680.0,
            "1655": 34434887680.0,
            "1660": 34434887680.0,
            "1665": 34434887680.0,
            "1670": 34434887680.0,
            "1675": 34434887680.0,
            "1680": 34434887680.0,
            "1685": 34434887680.0,
            "1690": 34434887680.0,
            "1695": 34434887680.0,
            "1700": 34434887680.0,
            "1705": 34434887680.0,
            "1710": 34434887680.0,
            "1715": 34434887680.0,
            "1720": 34434887680.0,
            "1725": 34434887680.0,
            "1730": 34434887680.0,
            "1735": 34434887680.0,
            "1740": 34434887680.0,
            "1745": 34434887680.0,
            "1750": 34434887680.0,
            "1755": 34434887680.0,
            "1760": 34434887680.0,
            "1765": 34434887680.0,
            "1770": 34434887680.0,
            "1775": 34434887680.0,
            "1780": 34434887680.0,
            "1785": 34434887680.0,
            "1790": 34434887680.0,
            "1795": 34434887680.0,
            "1800": 34434887680.0,
            "1805": 34434887680.0,
            "1810": 34434887680.0,
            "1815": 34434887680.0,
            "1820": 34434887680.0,
            "1825": 34434887680.0,
            "1830": 34434887680.0,
            "1835": 34434887680.0,
            "1840": 34434887680.0,
            "1845": 34434887680.0,
            "1850": 34434887680.0,
            "1855": 34434887680.0,
            "1860": 34434887680.0,
            "1865": 34434887680.0,
            "1870": 34434887680.0,
            "1875": 34434887680.0,
            "1880": 34434887680.0,
            "1885": 34434887680.0,
            "1890": 34434887680.0,
            "1895": 34434887680.0,
            "1900": 34434887680.0,
            "1905": 34434887680.0,
            "1910": 34434887680.0,
            "1915": 34434887680.0,
            "1920": 34434887680.0,
            "1925": 34434887680.0,
            "1930": 34434887680.0,
            "1935": 34434887680.0,
            "1940": 34434887680.0,
            "1945": 34434887680.0,
            "1950": 34434887680.0,
            "1955": 34434887680.0,
            "1960": 34434887680.0,
            "1965": 34434887680.0,
            "1970": 34434887680.0,
            "1975": 34434887680.0,
            "1980": 34434887680.0,
            "1985": 34434887680.0,
            "1990": 34434887680.0,
            "1995": 34434887680.0,
            "2000": 34434887680.0,
            "2005": 34434887680.0,
            "2010": 34434887680.0,
            "2015": 34434887680.0,
            "2020": 34434887680.0,
            "2025": 34434887680.0,
            "2030": 34434887680.0,
            "2035": 34434887680.0,
            "2040": 34434887680.0,
            "2045": 34434887680.0,
            "2050": 34434887680.0,
            "2055": 34434887680.0,
            "2060": 34434887680.0,
            "2065": 34434887680.0,
            "2070": 34434887680.0,
            "2075": 34434887680.0,
            "2080": 34434887680.0,
            "2085": 34434887680.0,
            "2090": 34434887680.0,
            "2095": 34434887680.0,
            "2100": 34434887680.0,
            "2105": 34434887680.0,
            "2110": 34434887680.0,
            "2115": 34434887680.0,
            "2120": 34434887680.0,
            "2125": 34434887680.0,
            "2130": 34434887680.0,
            "2135": 34434887680.0,
            "2140": 34434887680.0,
            "2145": 34434887680.0,
            "2150": 34434887680.0,
            "2155": 34434887680.0,
            "2160": 34434887680.0,
            "2165": 34434887680.0,
            "2170": 34434887680.0,
            "2175": 34434887680.0,
            "2180": 34434887680.0,
            "2185": 34434887680.0,
            "2190": 34434887680.0,
            "2195": 34434887680.0,
            "2200": 34434887680.0,
            "2205": 34434887680.0,
            "2210": 34434887680.0,
            "2215": 34434887680.0,
            "2220": 34434887680.0,
            "2225": 34434887680.0,
            "2230": 34434887680.0,
            "2235": 34434887680.0,
            "2240": 34434887680.0,
            "2245": 34434887680.0,
            "2250": 34434887680.0,
            "2255": 34434887680.0,
            "2260": 34434887680.0,
            "2265": 34434887680.0,
            "2270": 34434887680.0,
            "2275": 34434887680.0,
            "2280": 34434887680.0,
            "2285": 34434887680.0,
            "2290": 34434887680.0,
            "2295": 34434887680.0,
            "2300": 34434887680.0,
            "2305": 34434887680.0,
            "2310": 34434887680.0,
            "2315": 34434887680.0,
            "2320": 34434887680.0,
            "2325": 34434887680.0,
            "2330": 34434887680.0,
            "2335": 34434887680.0,
            "2340": 34434887680.0,
            "2345": 34434887680.0,
            "2350": 34434887680.0,
            "2355": 34434887680.0,
            "2360": 34434887680.0,
            "2365": 34434887680.0,
            "2370": 34434887680.0,
            "2375": 34434887680.0,
            "2380": 34434887680.0,
            "2385": 34434887680.0,
            "2390": 34434887680.0,
            "2395": 34434887680.0,
            "2400": 34434887680.0,
            "2405": 34434887680.0,
            "2410": 34434887680.0,
            "2415": 34434887680.0,
            "2420": 34434887680.0,
            "2425": 34434887680.0,
            "2430": 34434887680.0,
            "2435": 34434887680.0,
            "2440": 34434887680.0,
            "2445": 34434887680.0,
            "2450": 34434887680.0,
            "2455": 34434887680.0,
            "2460": 34434887680.0,
            "2465": 34434887680.0,
            "2470": 34434887680.0,
            "2475": 34434887680.0,
            "2480": 34434887680.0,
            "2485": 34434887680.0,
            "2490": 34434887680.0,
            "2495": 34434887680.0,
            "2500": 34434887680.0,
            "2505": 34434887680.0,
            "2510": 34434887680.0,
            "2515": 34434887680.0,
            "2520": 34434887680.0,
            "2525": 34434887680.0,
            "2530": 34434887680.0,
            "2535": 34434887680.0,
            "2540": 34434887680.0,
            "2545": 34434887680.0,
            "2550": 34434887680.0,
            "2555": 34434887680.0,
            "2560": 34434887680.0,
            "2565": 34434887680.0,
            "2570": 34434887680.0,
            "2575": 34434887680.0,
            "2580": 34434887680.0,
            "2585": 34434887680.0,
            "2590": 34434887680.0,
            "2595": 34434887680.0,
            "2600": 34434887680.0,
            "2605": 34434887680.0,
            "2610": 34434887680.0,
            "2615": 34434887680.0,
            "2620": 34434887680.0,
            "2625": 34434887680.0,
            "2630": 34434887680.0,
            "2635": 34434887680.0,
            "2640": 34434887680.0,
            "2645": 34434887680.0,
            "2650": 34434887680.0,
            "2655": 34434887680.0,
            "2660": 34434887680.0,
            "2665": 34434887680.0,
            "2670": 34434887680.0,
            "2675": 34434887680.0,
            "2680": 34434887680.0,
            "2685": 34434887680.0,
            "2690": 34434887680.0,
            "2695": 34434887680.0,
            "2700": 34434887680.0,
            "2705": 34434887680.0,
            "2710": 34434887680.0,
            "2715": 34434887680.0,
            "2720": 34434887680.0,
            "2725": 34434887680.0,
            "2730": 34434887680.0,
            "2735": 34434887680.0,
            "2740": 34434887680.0,
            "2745": 34434887680.0,
            "2750": 34434887680.0,
            "2755": 34434887680.0,
            "2760": 34434887680.0,
            "2765": 34434887680.0,
            "2770": 34434887680.0,
            "2775": 34434887680.0,
            "2780": 34434887680.0,
            "2785": 34434887680.0,
            "2790": 34434887680.0,
            "2795": 34434887680.0,
            "2800": 34434887680.0,
            "2805": 34434887680.0,
            "2810": 34434887680.0,
            "2815": 34434887680.0,
            "2820": 34434887680.0,
            "2825": 34434887680.0,
            "2830": 34434887680.0,
            "2835": 34434887680.0,
            "2840": 34434887680.0,
            "2845": 34434887680.0,
            "2850": 34434887680.0,
            "2855": 34434887680.0,
            "2860": 34434887680.0,
            "2865": 34434887680.0,
            "2870": 34434887680.0,
            "2875": 34434887680.0,
            "2880": 34434887680.0,
            "2885": 34434887680.0,
            "2890": 34434887680.0,
            "2895": 34434887680.0,
            "2900": 34434887680.0,
            "2905": 34434887680.0,
            "2910": 34434887680.0,
            "2915": 34434887680.0,
            "2920": 34434887680.0,
            "2925": 34434887680.0,
            "2930": 34434887680.0,
            "2935": 34434887680.0,
            "2940": 34434887680.0,
            "2945": 34434887680.0,
            "2950": 34434887680.0,
            "2955": 34434887680.0,
            "2960": 34434887680.0,
            "2965": 34434887680.0,
            "2970": 34434887680.0,
            "2975": 34434887680.0,
            "2980": 34434887680.0,
            "2985": 34434887680.0,
            "2990": 34434887680.0,
            "2995": 34434887680.0,
            "3000": 34434887680.0,
            "3005": 34434887680.0,
            "3010": 34434887680.0,
            "3015": 34434887680.0,
            "3020": 34434887680.0,
            "3025": 34434887680.0,
            "3030": 34434887680.0,
            "3035": 34434887680.0,
            "3040": 34434887680.0,
            "3045": 34434887680.0,
            "3050": 34434887680.0,
            "3055": 34434887680.0,
            "3060": 34434887680.0,
            "3065": 34434887680.0,
            "3070": 34434887680.0,
            "3075": 34434887680.0,
            "3080": 34434887680.0,
            "3085": 34434887680.0,
            "3090": 34434887680.0,
            "3095": 34434887680.0,
            "3100": 34434887680.0,
            "3105": 34434887680.0,
            "3110": 34434887680.0,
            "3115": 34434887680.0,
            "3120": 34434887680.0,
            "3125": 34434887680.0,
            "3130": 34434887680.0,
            "3135": 34434887680.0,
            "3140": 34434887680.0,
            "3145": 34434887680.0,
            "3150": 34434887680.0,
            "3155": 34434887680.0,
            "3160": 34434887680.0,
            "3165": 34434887680.0,
            "3170": 34434887680.0,
            "3175": 34434887680.0,
            "3180": 34434887680.0,
            "3185": 34434887680.0,
            "3190": 34434887680.0,
            "3195": 34434887680.0,
            "3200": 34434887680.0,
            "3205": 34434887680.0,
            "3210": 34434887680.0,
            "3215": 34434887680.0,
            "3220": 34434887680.0,
            "3225": 34434887680.0,
            "3230": 34434887680.0,
            "3235": 34434887680.0,
            "3240": 34434887680.0,
            "3245": 34434887680.0,
            "3250": 34434887680.0,
            "3255": 34434887680.0,
            "3260": 34434887680.0,
            "3265": 34434887680.0,
            "3270": 34434887680.0,
            "3275": 34434887680.0,
            "3280": 34434887680.0,
            "3285": 34434887680.0,
            "3290": 34434887680.0,
            "3295": 34434887680.0,
            "3300": 34434887680.0,
            "3305": 34434887680.0,
            "3310": 34434887680.0,
            "3315": 34434887680.0,
            "3320": 34434887680.0,
            "3325": 34434887680.0,
            "3330": 34434887680.0,
            "3335": 34434887680.0,
            "3340": 34434887680.0,
            "3345": 34434887680.0,
            "3350": 34434887680.0,
            "3355": 34434887680.0,
            "3360": 34434887680.0,
            "3365": 34434887680.0,
            "3370": 34434887680.0,
            "3375": 34434887680.0,
            "3380": 34434887680.0,
            "3385": 34434887680.0,
            "3390": 34434887680.0,
            "3395": 34434887680.0,
            "3400": 34434887680.0,
            "3405": 34434887680.0,
            "3410": 34434887680.0,
            "3415": 34434887680.0,
            "3420": 34434887680.0,
            "3425": 34434887680.0,
            "3430": 34434887680.0,
            "3435": 34434887680.0,
            "3440": 34434887680.0,
            "3445": 34434887680.0,
            "3450": 34434887680.0,
            "3455": 34434887680.0,
            "3460": 34434887680.0,
            "3465": 34434887680.0,
            "3470": 34434887680.0,
            "3475": 34434887680.0,
            "3480": 34434887680.0,
            "3485": 34434887680.0,
            "3490": 34434887680.0,
            "3495": 34434887680.0,
            "3500": 34434887680.0,
            "3505": 34434887680.0,
            "3510": 34434887680.0,
            "3515": 34434887680.0,
            "3520": 34434887680.0,
            "3525": 34434887680.0,
            "3530": 34434887680.0,
            "3535": 34434887680.0,
            "3540": 34434887680.0,
            "3545": 34434887680.0,
            "3550": 34434887680.0,
            "3555": 34434887680.0,
            "3560": 34434887680.0,
            "3565": 34434887680.0,
            "3570": 34434887680.0,
            "3575": 34434887680.0,
            "3580": 34434887680.0,
            "3585": 34434887680.0,
            "3590": 34434887680.0,
            "3595": 34434887680.0,
            "3600": 34434887680.0,
            "3605": 34434887680.0,
            "3610": 34434887680.0,
            "3615": 34434887680.0,
            "3620": 34434887680.0,
            "3625": 34434887680.0,
            "3630": 34434887680.0,
            "3635": 34434887680.0,
            "3640": 34434887680.0,
            "3645": 34434887680.0,
            "3650": 34434887680.0,
            "3655": 34434887680.0,
            "3660": 34434887680.0,
            "3665": 34434887680.0,
            "3670": 34434887680.0,
            "3675": 34434887680.0,
            "3680": 34434887680.0,
            "3685": 34434887680.0,
            "3690": 34434887680.0,
            "3695": 34434887680.0,
            "3700": 34434887680.0,
            "3705": 34434887680.0,
            "3710": 34434887680.0,
            "3715": 34434887680.0,
            "3720": 34434887680.0,
            "3725": 34434887680.0,
            "3730": 34434887680.0,
            "3735": 34434887680.0,
            "3740": 34434887680.0,
            "3745": 34434887680.0,
            "3750": 34434887680.0,
            "3755": 34434887680.0,
            "3760": 34434887680.0,
            "3765": 34434887680.0,
            "3770": 34434887680.0,
            "3775": 34434887680.0,
            "3780": 34434887680.0,
            "3785": 34434887680.0,
            "3790": 34434887680.0,
            "3795": 34434887680.0,
            "3800": 34434887680.0,
            "3805": 34434887680.0,
            "3810": 34434887680.0,
            "3815": 34434887680.0,
            "3820": 34434887680.0,
            "3825": 34434887680.0,
            "3830": 34434887680.0,
            "3835": 34434887680.0,
            "3840": 34434887680.0,
            "3845": 34434887680.0,
            "3850": 34434887680.0,
            "3855": 34434887680.0,
            "3860": 34434887680.0,
            "3865": 34434887680.0,
            "3870": 34434887680.0,
            "3875": 34434887680.0,
            "3880": 34434887680.0,
            "3885": 34434887680.0,
            "3890": 34434887680.0,
            "3895": 34434887680.0,
            "3900": 34434887680.0,
            "3905": 34434887680.0,
            "3910": 34434887680.0,
            "3915": 34434887680.0,
            "3920": 34434887680.0,
            "3925": 34434887680.0,
            "3930": 34434887680.0,
            "3935": 34434887680.0,
            "3940": 34434887680.0,
            "3945": 34434887680.0,
            "3950": 34434887680.0,
            "3955": 34434887680.0,
            "3960": 34434887680.0,
            "3965": 34434887680.0,
            "3970": 34434887680.0,
            "3975": 34434887680.0,
            "3980": 34434887680.0,
            "3985": 34434887680.0,
            "3990": 34434887680.0,
            "3995": 34434887680.0,
            "4000": 34434887680.0,
            "4005": 34434887680.0,
            "4010": 34434887680.0,
            "4015": 34434887680.0,
            "4020": 34434887680.0,
            "4025": 34434887680.0,
            "4030": 34434887680.0,
            "4035": 34434887680.0,
            "4040": 34434887680.0,
            "4045": 34434887680.0,
            "4050": 34434887680.0,
            "4055": 34434887680.0,
            "4060": 34434887680.0,
            "4065": 34434887680.0,
            "4070": 34434887680.0,
            "4075": 34434887680.0,
            "4080": 34434887680.0,
            "4085": 34434887680.0,
            "4090": 34434887680.0,
            "4095": 34434887680.0,
            "4100": 34434887680.0,
            "4105": 34434887680.0,
            "4110": 34434887680.0,
            "4115": 34434887680.0,
            "4120": 34434887680.0,
            "4125": 34434887680.0,
            "4130": 34434887680.0,
            "4135": 34434887680.0,
            "4140": 34434887680.0,
            "4145": 34434887680.0,
            "4150": 34434887680.0,
            "4155": 34434887680.0,
            "4160": 34434887680.0,
            "4165": 34434887680.0,
            "4170": 34434887680.0,
            "4175": 34434887680.0,
            "4180": 34434887680.0,
            "4185": 34434887680.0,
            "4190": 34434887680.0,
            "4195": 34434887680.0,
            "4200": 34434887680.0,
            "4205": 34434887680.0,
            "4210": 34434887680.0,
            "4215": 34434887680.0,
            "4220": 34434887680.0,
            "4225": 34434887680.0,
            "4230": 34434887680.0,
            "4235": 34434887680.0,
            "4240": 34434887680.0,
            "4245": 34434887680.0,
            "4250": 34434887680.0,
            "4255": 34434887680.0,
            "4260": 34434887680.0,
            "4265": 34434887680.0,
            "4270": 34434887680.0,
            "4275": 34434887680.0,
            "4280": 34434887680.0,
            "4285": 34434887680.0,
            "4290": 34434887680.0,
            "4295": 34434887680.0,
            "4300": 34434887680.0,
            "4305": 34434887680.0,
            "4310": 34434887680.0,
            "4315": 34434887680.0,
            "4320": 34434887680.0,
            "4325": 34434887680.0,
            "4330": 34434887680.0,
            "4335": 34434887680.0,
            "4340": 34434887680.0,
            "4345": 34434887680.0,
            "4350": 34434887680.0,
            "4355": 34434887680.0,
            "4360": 34434887680.0,
            "4365": 34434887680.0,
            "4370": 34434887680.0,
            "4375": 34434887680.0,
            "4380": 34434887680.0,
            "4385": 34434887680.0,
            "4390": 34434887680.0,
            "4395": 34434887680.0,
            "4400": 34434887680.0,
            "4405": 34434887680.0,
            "4410": 34434887680.0,
            "4415": 34434887680.0,
            "4420": 34434887680.0,
            "4425": 34434887680.0,
            "4430": 34434887680.0,
            "4435": 34434887680.0,
            "4440": 34434887680.0,
            "4445": 34434887680.0,
            "4450": 34434887680.0,
            "4455": 34434887680.0,
            "4460": 34434887680.0,
            "4465": 34434887680.0,
            "4470": 34434887680.0,
            "4475": 34434887680.0,
            "4480": 34434887680.0,
            "4485": 34434887680.0,
            "4490": 34434887680.0,
            "4495": 34434887680.0,
            "4500": 34434887680.0,
            "4505": 34434887680.0,
            "4510": 34434887680.0,
            "4515": 34434887680.0,
            "4520": 34434887680.0,
            "4525": 34434887680.0,
            "4530": 34434887680.0,
            "4535": 34434887680.0,
            "4540": 34434887680.0,
            "4545": 34434887680.0,
            "4550": 34434887680.0,
            "4555": 34434887680.0,
            "4560": 34434887680.0,
            "4565": 34434887680.0,
            "4570": 34434887680.0,
            "4575": 34434887680.0,
            "4580": 34434887680.0,
            "4585": 34434887680.0,
            "4590": 34434887680.0,
            "4595": 34434887680.0,
            "4600": 34434887680.0,
            "4605": 34434887680.0,
            "4610": 34434887680.0,
            "4615": 34434887680.0,
            "4620": 34434887680.0,
            "4625": 34434887680.0,
            "4630": 34434887680.0,
            "4635": 34434887680.0,
            "4640": 34434887680.0,
            "4645": 34434887680.0,
            "4650": 34434887680.0,
            "4655": 34434887680.0,
            "4660": 34434887680.0,
            "4665": 34434887680.0,
            "4670": 34434887680.0,
            "4675": 34434887680.0,
            "4680": 34434887680.0,
            "4685": 34434887680.0,
            "4690": 34434887680.0,
            "4695": 34434887680.0,
            "4700": 34434887680.0,
            "4705": 34434887680.0,
            "4710": 34434887680.0,
            "4715": 34434887680.0,
            "4720": 34434887680.0,
            "4725": 34434887680.0,
            "4730": 34434887680.0,
            "4735": 34434887680.0,
            "4740": 34434887680.0,
            "4745": 34434887680.0,
            "4750": 34434887680.0,
            "4755": 34434887680.0,
            "4760": 34434887680.0,
            "4765": 34434887680.0,
            "4770": 34434887680.0,
            "4775": 34434887680.0,
            "4780": 34434887680.0,
            "4785": 34434887680.0,
            "4790": 34434887680.0,
            "4795": 34434887680.0,
            "4800": 34434887680.0,
            "4805": 34434887680.0,
            "4810": 34434887680.0,
            "4815": 34434887680.0,
            "4820": 34434887680.0,
            "4825": 34434887680.0,
            "4830": 34434887680.0,
            "4835": 34434887680.0,
            "4840": 34434887680.0,
            "4845": 34434887680.0,
            "4850": 34434887680.0,
            "4855": 34434887680.0,
            "4860": 34434887680.0,
            "4865": 34434887680.0,
            "4870": 34434887680.0,
            "4875": 34434887680.0,
            "4880": 34434887680.0,
            "4885": 34434887680.0,
            "4890": 34434887680.0,
            "4895": 34434887680.0,
            "4900": 34434887680.0,
            "4905": 34434887680.0,
            "4910": 34434887680.0,
            "4915": 34434887680.0,
            "4920": 34434887680.0,
            "4925": 34434887680.0,
            "4930": 34434887680.0,
            "4935": 34434887680.0,
            "4940": 34434887680.0,
            "4945": 34434887680.0,
            "4950": 34434887680.0,
            "4955": 34434887680.0,
            "4960": 34434887680.0,
            "4965": 34434887680.0,
            "4970": 34434887680.0,
            "4975": 34434887680.0,
            "4980": 34434887680.0,
            "4985": 34434887680.0,
            "4990": 34434887680.0,
            "4995": 34434887680.0,
            "5000": 34434887680.0,
            "5005": 34434887680.0,
            "5010": 34434887680.0,
            "5015": 34434887680.0,
            "5020": 34434887680.0,
            "5025": 34434887680.0,
            "5030": 34434887680.0,
            "5035": 34434887680.0,
            "5040": 34434887680.0,
            "5045": 34434887680.0,
            "5050": 34434887680.0,
            "5055": 34434887680.0,
            "5060": 34434887680.0,
            "5065": 34434887680.0,
            "5070": 34434887680.0,
            "5075": 34434887680.0,
            "5080": 34434887680.0,
            "5085": 34434887680.0,
            "5090": 34434887680.0,
            "5095": 34434887680.0,
            "5100": 34434887680.0,
            "5105": 34434887680.0,
            "5110": 34434887680.0,
            "5115": 34434887680.0,
            "5120": 34434887680.0,
            "5125": 34434887680.0,
            "5130": 34434887680.0,
            "5135": 34434887680.0,
            "5140": 34434887680.0,
            "5145": 34434887680.0,
            "5150": 34434887680.0,
            "5155": 34434887680.0,
            "5160": 34434887680.0,
            "5165": 34434887680.0,
            "5170": 34434887680.0,
            "5175": 34434887680.0,
            "5180": 34434887680.0,
            "5185": 34434887680.0,
            "5190": 34434887680.0,
            "5195": 34434887680.0,
            "5200": 34434887680.0,
            "5205": 34434887680.0,
            "5210": 34434887680.0,
            "5215": 34434887680.0,
            "5220": 34434887680.0,
            "5225": 34434887680.0,
            "5230": 34434887680.0,
            "5235": 34434887680.0,
            "5240": 34434887680.0,
            "5245": 34434887680.0,
            "5250": 34434887680.0,
            "5255": 34434887680.0,
            "5260": 34434887680.0,
            "5265": 34434887680.0,
            "5270": 34434887680.0,
            "5275": 34434887680.0,
            "5280": 34434887680.0,
            "5285": 34434887680.0,
            "5290": 34434887680.0,
            "5295": 34434887680.0,
            "5300": 34434887680.0,
            "5305": 34434887680.0,
            "5310": 34434887680.0,
            "5315": 34434887680.0,
            "5320": 34434887680.0,
            "5325": 34434887680.0,
            "5330": 34434887680.0,
            "5335": 34434887680.0,
            "5340": 34434887680.0,
            "5345": 34434887680.0,
            "5350": 34434887680.0,
            "5355": 34434887680.0,
            "5360": 34434887680.0,
            "5365": 34434887680.0,
            "5370": 34434887680.0,
            "5375": 34434887680.0,
            "5380": 34434887680.0,
            "5385": 34434887680.0,
            "5390": 34434887680.0,
            "5395": 34434887680.0,
            "5400": 34434887680.0,
            "5405": 34434887680.0,
            "5410": 34434887680.0,
            "5415": 34434887680.0,
            "5420": 34434887680.0,
            "5425": 34434887680.0,
            "5430": 34434887680.0,
            "5435": 34434887680.0,
            "5440": 34434887680.0,
            "5445": 34434887680.0,
            "5450": 34434887680.0,
            "5455": 34434887680.0,
            "5460": 34434887680.0,
            "5465": 34434887680.0,
            "5470": 34434887680.0,
            "5475": 34434887680.0,
            "5480": 34434887680.0,
            "5485": 34434887680.0,
            "5490": 34434887680.0,
            "5495": 34434887680.0,
            "5500": 34434887680.0,
            "5505": 34434887680.0,
            "5510": 34434887680.0,
            "5515": 34434887680.0,
            "5520": 34434887680.0,
            "5525": 34434887680.0,
            "5530": 34434887680.0,
            "5535": 34434887680.0,
            "5540": 34434887680.0,
            "5545": 34434887680.0,
            "5550": 34434887680.0,
            "5555": 34434887680.0,
            "5560": 34434887680.0,
            "5565": 34434887680.0,
            "5570": 34434887680.0,
            "5575": 34434887680.0,
            "5580": 34434887680.0,
            "5585": 34434887680.0,
            "5590": 34434887680.0,
            "5595": 34434887680.0,
            "5600": 34434887680.0,
            "5605": 34434887680.0,
            "5610": 34434887680.0,
            "5615": 34434887680.0,
            "5620": 34434887680.0,
            "5625": 34434887680.0,
            "5630": 34434887680.0,
            "5635": 34434887680.0,
            "5640": 34434887680.0,
            "5645": 34434887680.0,
            "5650": 34434887680.0,
            "5655": 34434887680.0,
            "5660": 34434887680.0,
            "5665": 34434887680.0,
            "5670": 34434887680.0,
            "5675": 34434887680.0,
            "5680": 34434887680.0,
            "5685": 34434887680.0,
            "5690": 34434887680.0,
            "5695": 34434887680.0,
            "5700": 34434887680.0,
            "5705": 34434887680.0,
            "5710": 34434887680.0,
            "5715": 34434887680.0,
            "5720": 34434887680.0,
            "5725": 34434887680.0,
            "5730": 34434887680.0,
            "5735": 34434887680.0,
            "5740": 34434887680.0,
            "5745": 34434887680.0,
            "5750": 34434887680.0,
            "5755": 34434887680.0,
            "5760": 34434887680.0,
            "5765": 34434887680.0,
            "5770": 34434887680.0,
            "5775": 34434887680.0,
            "5780": 34434887680.0,
            "5785": 34434887680.0,
            "5790": 34434887680.0,
            "5795": 34434887680.0,
            "5800": 34434887680.0,
            "5805": 34434887680.0,
            "5810": 34434887680.0,
            "5815": 34434887680.0,
            "5820": 34434887680.0,
            "5825": 34434887680.0,
            "5830": 34434887680.0,
            "5835": 34434887680.0,
            "5840": 34434887680.0,
            "5845": 34434887680.0,
            "5850": 34434887680.0,
            "5855": 34434887680.0,
            "5860": 34434887680.0,
            "5865": 34434887680.0,
            "5870": 34434887680.0,
            "5875": 34434887680.0,
            "5880": 34434887680.0,
            "5885": 34434887680.0,
            "5890": 34434887680.0,
            "5895": 34434887680.0,
            "5900": 34434887680.0,
            "5905": 34434887680.0,
            "5910": 34434887680.0,
            "5915": 34434887680.0,
            "5920": 34434887680.0,
            "5925": 34434887680.0,
            "5930": 34434887680.0,
            "5935": 34434887680.0,
            "5940": 34434887680.0,
            "5945": 34434887680.0,
            "5950": 34434887680.0,
            "5955": 34434887680.0,
            "5960": 34434887680.0,
            "5965": 34434887680.0,
            "5970": 34434887680.0,
            "5975": 34434887680.0,
            "5980": 34434887680.0,
            "5985": 34434887680.0,
            "5990": 34434887680.0,
            "5995": 34434887680.0,
            "6000": 34434887680.0,
            "6005": 34434887680.0,
            "6010": 34434887680.0,
            "6015": 34434887680.0,
            "6020": 34434887680.0,
            "6025": 34434887680.0,
            "6030": 34434887680.0,
            "6035": 34434887680.0,
            "6040": 34434887680.0,
            "6045": 34434887680.0,
            "6050": 34434887680.0,
            "6055": 34434887680.0,
            "6060": 34434887680.0,
            "6065": 34434887680.0,
            "6070": 34434887680.0,
            "6075": 34434887680.0,
            "6080": 34434887680.0,
            "6085": 34434887680.0,
            "6090": 34434887680.0,
            "6095": 34434887680.0,
            "6100": 34434887680.0,
            "6105": 34434887680.0,
            "6110": 34434887680.0,
            "6115": 34434887680.0,
            "6120": 34434887680.0,
            "6125": 34434887680.0,
            "6130": 34434887680.0,
            "6135": 34434887680.0,
            "6140": 34434887680.0,
            "6145": 34434887680.0,
            "6150": 34434887680.0,
            "6155": 34434887680.0,
            "6160": 34434887680.0,
            "6165": 34434887680.0,
            "6170": 34434887680.0,
            "6175": 34434887680.0,
            "6180": 34434887680.0,
            "6185": 34434887680.0,
            "6190": 34434887680.0,
            "6195": 34434887680.0,
            "6200": 34434887680.0,
            "6205": 34434887680.0,
            "6210": 34434887680.0,
            "6215": 34434887680.0,
            "6220": 34434887680.0,
            "6225": 34434887680.0,
            "6230": 34434887680.0,
            "6235": 34434887680.0,
            "6240": 34434887680.0,
            "6245": 34434887680.0,
            "6250": 34434887680.0,
            "6255": 34434887680.0,
            "6260": 34434887680.0,
            "6265": 34434887680.0,
            "6270": 34434887680.0,
            "6275": 34434887680.0,
            "6280": 34434887680.0,
            "6285": 34434887680.0,
            "6290": 34434887680.0,
            "6295": 34434887680.0,
            "6300": 34434887680.0,
            "6305": 34434887680.0,
            "6310": 34434887680.0,
            "6315": 34434887680.0,
            "6320": 34434887680.0,
            "6325": 34434887680.0,
            "6330": 34434887680.0,
            "6335": 34434887680.0,
            "6340": 34434887680.0,
            "6345": 34434887680.0,
            "6350": 34434887680.0,
            "6355": 34434887680.0,
            "6360": 34434887680.0,
            "6365": 34434887680.0,
            "6370": 34434887680.0,
            "6375": 34434887680.0,
            "6380": 34434887680.0,
            "6385": 34434887680.0,
            "6390": 34434887680.0,
            "6395": 34434887680.0,
            "6400": 34434887680.0,
            "6405": 34434887680.0,
            "6410": 34434887680.0,
            "6415": 34434887680.0,
            "6420": 34434887680.0,
            "6425": 34434887680.0,
            "6430": 34434887680.0,
            "6435": 34434887680.0,
            "6440": 34434887680.0,
            "6445": 34434887680.0,
            "6450": 34434887680.0,
            "6455": 34434887680.0,
            "6460": 34434887680.0,
            "6465": 34434887680.0,
            "6470": 34434887680.0,
            "6475": 34434887680.0,
            "6480": 34434887680.0,
            "6485": 34434887680.0,
            "6490": 34434887680.0,
            "6495": 34434887680.0,
            "6500": 34434887680.0,
            "6505": 34434887680.0,
            "6510": 34434887680.0,
            "6515": 34434887680.0,
            "6520": 34434887680.0,
            "6525": 34434887680.0,
            "6530": 34434887680.0,
            "6535": 34434887680.0,
            "6540": 34434887680.0,
            "6545": 34434887680.0,
            "6550": 34434887680.0,
            "6555": 34434887680.0,
            "6560": 34434887680.0,
            "6565": 34434887680.0,
            "6570": 34434887680.0,
            "6575": 34434887680.0,
            "6580": 34434887680.0,
            "6585": 34434887680.0,
            "6590": 34434887680.0,
            "6595": 34434887680.0,
            "6600": 34434887680.0,
            "6605": 34434887680.0,
            "6610": 34434887680.0,
            "6615": 34434887680.0,
            "6620": 34434887680.0,
            "6625": 34434887680.0,
            "6630": 34434887680.0,
            "6635": 34434887680.0,
            "6640": 34434887680.0,
            "6645": 34434887680.0,
            "6650": 34434887680.0,
            "6655": 34434887680.0,
            "6660": 34434887680.0,
            "6665": 34434887680.0,
            "6670": 34434887680.0,
            "6675": 34434887680.0,
            "6680": 34434887680.0,
            "6685": 34434887680.0,
            "6690": 34434887680.0,
            "6695": 34434887680.0,
            "6700": 34434887680.0,
            "6705": 34434887680.0,
            "6710": 34434887680.0,
            "6715": 34434887680.0,
            "6720": 34434887680.0,
            "6725": 34434887680.0,
            "6730": 34434887680.0,
            "6735": 34434887680.0,
            "6740": 34434887680.0,
            "6745": 34434887680.0,
            "6750": 34434887680.0,
            "6755": 34434887680.0,
            "6760": 34434887680.0,
            "6765": 34434887680.0,
            "6770": 34434887680.0,
            "6775": 34434887680.0,
            "6780": 34434887680.0,
            "6785": 34434887680.0,
            "6790": 34434887680.0,
            "6795": 34434887680.0,
            "6800": 34434887680.0,
            "6805": 34434887680.0,
            "6810": 34434887680.0,
            "6815": 34434887680.0,
            "6820": 34434887680.0,
            "6825": 34434887680.0,
            "6830": 34434887680.0,
            "6835": 34434887680.0,
            "6840": 34434887680.0,
            "6845": 34434887680.0,
            "6850": 34434887680.0,
            "6855": 34434887680.0,
            "6860": 34434887680.0,
            "6865": 34434887680.0,
            "6870": 34434887680.0,
            "6875": 34434887680.0,
            "6880": 34434887680.0,
            "6885": 34434887680.0,
            "6890": 34434887680.0,
            "6895": 34434887680.0,
            "6900": 34434887680.0,
            "6905": 34434887680.0,
            "6910": 34434887680.0,
            "6915": 34434887680.0,
            "6920": 34434887680.0,
            "6925": 34434887680.0,
            "6930": 34434887680.0,
            "6935": 34434887680.0,
            "6940": 34434887680.0,
            "6945": 34434887680.0,
            "6950": 34434887680.0,
            "6955": 34434887680.0,
            "6960": 34434887680.0,
            "6965": 34434887680.0,
            "6970": 34434887680.0,
            "6975": 34434887680.0,
            "6980": 34434887680.0,
            "6985": 34434887680.0,
            "6990": 34434887680.0,
            "6995": 34434887680.0,
            "7000": 34434887680.0,
            "7005": 34434887680.0,
            "7010": 34434887680.0,
            "7015": 34434887680.0,
            "7020": 34434887680.0,
            "7025": 34434887680.0,
            "7030": 34434887680.0,
            "7035": 34434887680.0,
            "7040": 34434887680.0,
            "7045": 34434887680.0,
            "7050": 34434887680.0,
            "7055": 34434887680.0,
            "7060": 34434887680.0,
            "7065": 34434887680.0,
            "7070": 34434887680.0,
            "7075": 34434887680.0,
            "7080": 34434887680.0,
            "7085": 34434887680.0,
            "7090": 34434887680.0,
            "7095": 34434887680.0,
            "7100": 34434887680.0,
            "7105": 34434887680.0,
            "7110": 34434887680.0,
            "7115": 34434887680.0,
            "7120": 34434887680.0,
            "7125": 34434887680.0,
            "7130": 34434887680.0,
            "7135": 34434887680.0,
            "7140": 34434887680.0,
            "7145": 34434887680.0,
            "7150": 34434887680.0,
            "7155": 34434887680.0,
            "7160": 34434887680.0,
            "7165": 34434887680.0,
            "7170": 34434887680.0,
            "7175": 34434887680.0,
            "7180": 34434887680.0,
            "7185": 34434887680.0,
            "7190": 34434887680.0,
            "7195": 34434887680.0,
            "7200": 34434887680.0,
            "7205": 34434887680.0,
            "7210": 34434887680.0,
            "7215": 34434887680.0,
            "7220": 34434887680.0,
            "7225": 34434887680.0,
            "7230": 34434887680.0,
            "7235": 34434887680.0,
            "7240": 34434887680.0,
            "7245": 34434887680.0,
            "7250": 34434887680.0,
            "7255": 34434887680.0,
            "7260": 34434887680.0,
            "7265": 34434887680.0,
            "7270": 34434887680.0,
            "7275": 34434887680.0,
            "7280": 34434887680.0,
            "7285": 34434887680.0,
            "7290": 34434887680.0,
            "7295": 34434887680.0,
            "7300": 34434887680.0,
            "7305": 34434887680.0,
            "7310": 34434887680.0,
            "7315": 34434887680.0,
            "7320": 34434887680.0,
            "7325": 34434887680.0,
            "7330": 34434887680.0,
            "7335": 34434887680.0,
            "7340": 34434887680.0,
            "7345": 34434887680.0,
            "7350": 34434887680.0,
            "7355": 34434887680.0,
            "7360": 34434887680.0,
            "7365": 34434887680.0,
            "7370": 34434887680.0,
            "7375": 34434887680.0,
            "7380": 34434887680.0,
            "7385": 34434887680.0,
            "7390": 34434887680.0,
            "7395": 34434887680.0,
            "7400": 34434887680.0,
            "7405": 34434887680.0,
            "7410": 34434887680.0,
            "7415": 34434887680.0,
            "7420": 34434887680.0,
            "7425": 34434887680.0,
            "7430": 34434887680.0,
            "7435": 34434887680.0,
            "7440": 34434887680.0,
            "7445": 34434887680.0,
            "7450": 34434887680.0,
            "7455": 34434887680.0,
            "7460": 34434887680.0,
            "7465": 34434887680.0,
            "7470": 34434887680.0,
            "7475": 34434887680.0,
            "7480": 34434887680.0,
            "7485": 34434887680.0,
            "7490": 34434887680.0,
            "7495": 34434887680.0,
            "7500": 34434887680.0,
            "7505": 34434887680.0,
            "7510": 34434887680.0,
            "7515": 34434887680.0,
            "7520": 34434887680.0,
            "7525": 34434887680.0,
            "7530": 34434887680.0,
            "7535": 34434887680.0,
            "7540": 34434887680.0,
            "7545": 34434887680.0,
            "7550": 34434887680.0,
            "7555": 34434887680.0,
            "7560": 34434887680.0,
            "7565": 34434887680.0,
            "7570": 34434887680.0,
            "7575": 34434887680.0,
            "7580": 34434887680.0,
            "7585": 34434887680.0,
            "7590": 34434887680.0,
            "7595": 34434887680.0,
            "7600": 34434887680.0,
            "7605": 34434887680.0,
            "7610": 34434887680.0,
            "7615": 34434887680.0,
            "7620": 34434887680.0,
            "7625": 34434887680.0,
            "7630": 34434887680.0,
            "7635": 34434887680.0,
            "7640": 34434887680.0,
            "7645": 34434887680.0,
            "7650": 34434887680.0,
            "7655": 34434887680.0,
            "7660": 34434887680.0,
            "7665": 34434887680.0,
            "7670": 34434887680.0,
            "7675": 34434887680.0,
            "7680": 34434887680.0,
            "7685": 34434887680.0,
            "7690": 34434887680.0,
            "7695": 34434887680.0,
            "7700": 34434887680.0,
            "7705": 34434887680.0,
            "7710": 34434887680.0,
            "7715": 34434887680.0,
            "7720": 34434887680.0,
            "7725": 34434887680.0,
            "7730": 34434887680.0,
            "7735": 34434887680.0,
            "7740": 34434887680.0,
            "7745": 34434887680.0,
            "7750": 34434887680.0,
            "7755": 34434887680.0,
            "7760": 34434887680.0,
            "7765": 34434887680.0,
            "7770": 34434887680.0,
            "7775": 34434887680.0,
            "7780": 34434887680.0,
            "7785": 34434887680.0,
            "7790": 34434887680.0,
            "7795": 34434887680.0,
            "7800": 34434887680.0,
            "7805": 34434887680.0,
            "7810": 34434887680.0,
            "7815": 34434887680.0,
            "7820": 34434887680.0,
            "7825": 34434887680.0,
            "7830": 34434887680.0,
            "7835": 34434887680.0,
            "7840": 34434887680.0,
            "7845": 34434887680.0,
            "7850": 34434887680.0,
            "7855": 34434887680.0,
            "7860": 34434887680.0,
            "7865": 34434887680.0,
            "7870": 34434887680.0,
            "7875": 34434887680.0,
            "7880": 34434887680.0,
            "7885": 34434887680.0,
            "7890": 34434887680.0,
            "7895": 34434887680.0,
            "7900": 34434887680.0,
            "7905": 34434887680.0,
            "7910": 34434887680.0,
            "7915": 34434887680.0,
            "7920": 34434887680.0,
            "7925": 34434887680.0,
            "7930": 34434887680.0,
            "7935": 34434887680.0,
            "7940": 34434887680.0,
            "7945": 34434887680.0,
            "7950": 34434887680.0,
            "7955": 34434887680.0,
            "7960": 34434887680.0,
            "7965": 34434887680.0,
            "7970": 34434887680.0,
            "7975": 34434887680.0,
            "7980": 34434887680.0,
            "7985": 34434887680.0,
            "7990": 34434887680.0,
            "7995": 34434887680.0,
            "8000": 34434887680.0,
            "8005": 34434887680.0,
            "8010": 34434887680.0,
            "8015": 34434887680.0,
            "8020": 34434887680.0,
            "8025": 34434887680.0,
            "8030": 34434887680.0,
            "8035": 34434887680.0,
            "8040": 34434887680.0,
            "8045": 34434887680.0,
            "8050": 34434887680.0,
            "8055": 34434887680.0,
            "8060": 34434887680.0,
            "8065": 34434887680.0,
            "8070": 34434887680.0,
            "8075": 34434887680.0,
            "8080": 34434887680.0,
            "8085": 34434887680.0,
            "8090": 34434887680.0,
            "8095": 34434887680.0,
            "8100": 34434887680.0,
            "8105": 34434887680.0,
            "8110": 34434887680.0,
            "8115": 34434887680.0,
            "8120": 34434887680.0,
            "8125": 34434887680.0,
            "8130": 34434887680.0,
            "8135": 34434887680.0,
            "8140": 34434887680.0,
            "8145": 34434887680.0,
            "8150": 34434887680.0,
            "8155": 34434887680.0,
            "8160": 34434887680.0,
            "8165": 34434887680.0,
            "8170": 34434887680.0,
            "8175": 34434887680.0,
            "8180": 34434887680.0,
            "8185": 34434887680.0,
            "8190": 34434887680.0,
            "8195": 34434887680.0,
            "8200": 34434887680.0,
            "8205": 34434887680.0,
            "8210": 34434887680.0,
            "8215": 34434887680.0,
            "8220": 34434887680.0,
            "8225": 34434887680.0,
            "8230": 34434887680.0,
            "8235": 34434887680.0,
            "8240": 34434887680.0,
            "8245": 34434887680.0,
            "8250": 34434887680.0,
            "8255": 34434887680.0,
            "8260": 34434887680.0,
            "8265": 34434887680.0,
            "8270": 34434887680.0,
            "8275": 34434887680.0,
            "8280": 34434887680.0,
            "8285": 34434887680.0,
            "8290": 34434887680.0,
            "8295": 34434887680.0,
            "8300": 34434887680.0,
            "8305": 34434887680.0,
            "8310": 34434887680.0,
            "8315": 34434887680.0,
            "8320": 34434887680.0,
            "8325": 34434887680.0,
            "8330": 34434887680.0,
            "8335": 34434887680.0,
            "8340": 34434887680.0,
            "8345": 34434887680.0,
            "8350": 34434887680.0,
            "8355": 34434887680.0,
            "8360": 34434887680.0,
            "8365": 34434887680.0,
            "8370": 34434887680.0,
            "8375": 34434887680.0,
            "8380": 34434887680.0,
            "8385": 34434887680.0,
            "8390": 34434887680.0,
            "8395": 34434887680.0,
            "8400": 34434887680.0,
            "8405": 34434887680.0,
            "8410": 34434887680.0,
            "8415": 34434887680.0,
            "8420": 34434887680.0,
            "8425": 34434887680.0,
            "8430": 34434887680.0,
            "8435": 34434887680.0,
            "8440": 34434887680.0,
            "8445": 34434887680.0,
            "8450": 34434887680.0,
            "8455": 34434887680.0,
            "8460": 34434887680.0,
            "8465": 34434887680.0,
            "8470": 34434887680.0,
            "8475": 34434887680.0,
            "8480": 34434887680.0,
            "8485": 34434887680.0,
            "8490": 34434887680.0,
            "8495": 34434887680.0,
            "8500": 34434887680.0,
            "8505": 34434887680.0,
            "8510": 34434887680.0,
            "8515": 34434887680.0,
            "8520": 34434887680.0,
            "8525": 34434887680.0,
            "8530": 34434887680.0,
            "8535": 34434887680.0,
            "8540": 34434887680.0,
            "8545": 34434887680.0,
            "8550": 34434887680.0,
            "8555": 34434887680.0,
            "8560": 34434887680.0,
            "8565": 34434887680.0,
            "8570": 34434887680.0,
            "8575": 34434887680.0,
            "8580": 34434887680.0,
            "8585": 34434887680.0,
            "8590": 34434887680.0,
            "8595": 34434887680.0,
            "8600": 34434887680.0,
            "8605": 34434887680.0,
            "8610": 34434887680.0,
            "8615": 34434887680.0,
            "8620": 34434887680.0,
            "8625": 34434887680.0,
            "8630": 34434887680.0,
            "8635": 34434887680.0,
            "8640": 34434887680.0,
            "8645": 34434887680.0,
            "8650": 34434887680.0,
            "8655": 34434887680.0,
            "8660": 34434887680.0,
            "8665": 34434887680.0,
            "8670": 34434887680.0,
            "8675": 34434887680.0,
            "8680": 34434887680.0,
            "8685": 34434887680.0,
            "8690": 34434887680.0,
            "8695": 34434887680.0,
            "8700": 34434887680.0,
            "8705": 34434887680.0,
            "8710": 34434887680.0,
            "8715": 34434887680.0,
            "8720": 34434887680.0,
            "8725": 34434887680.0,
            "8730": 34434887680.0,
            "8735": 34434887680.0,
            "8740": 34434887680.0,
            "8745": 34434887680.0,
            "8750": 34434887680.0,
            "8755": 34434887680.0,
            "8760": 34434887680.0,
            "8765": 34434887680.0,
            "8770": 34434887680.0,
            "8775": 34434887680.0,
            "8780": 34434887680.0,
            "8785": 34434887680.0,
            "8790": 34434887680.0,
            "8795": 34434887680.0,
            "8800": 34434887680.0,
            "8805": 34434887680.0,
            "8810": 34434887680.0,
            "8815": 34434887680.0,
            "8820": 34434887680.0,
            "8825": 34434887680.0,
            "8830": 34434887680.0,
            "8835": 34434887680.0,
            "8840": 34434887680.0,
            "8845": 34434887680.0,
            "8850": 34434887680.0,
            "8855": 34434887680.0,
            "8860": 34434887680.0,
            "8865": 34434887680.0,
            "8870": 34434887680.0,
            "8875": 34434887680.0,
            "8880": 34434887680.0,
            "8885": 34434887680.0,
            "8890": 34434887680.0,
            "8895": 34434887680.0,
            "8900": 34434887680.0,
            "8905": 34434887680.0,
            "8910": 34434887680.0,
            "8915": 34434887680.0,
            "8920": 34434887680.0,
            "8925": 34434887680.0,
            "8930": 34434887680.0,
            "8935": 34434887680.0,
            "8940": 34434887680.0,
            "8945": 34434887680.0,
            "8950": 34434887680.0,
            "8955": 34434887680.0,
            "8960": 34434887680.0,
            "8965": 34434887680.0,
            "8970": 34434887680.0,
            "8975": 34434887680.0,
            "8980": 34434887680.0,
            "8985": 34434887680.0,
            "8990": 34434887680.0,
            "8995": 34434887680.0,
            "9000": 34434887680.0,
            "9005": 34434887680.0,
            "9010": 34434887680.0,
            "9015": 34434887680.0,
            "9020": 34434887680.0,
            "9025": 34434887680.0,
            "9030": 34434887680.0,
            "9035": 34434887680.0,
            "9040": 34434887680.0,
            "9045": 34434887680.0,
            "9050": 34434887680.0,
            "9055": 34434887680.0,
            "9060": 34434887680.0,
            "9065": 34434887680.0,
            "9070": 34434887680.0,
            "9075": 34434887680.0,
            "9080": 34434887680.0,
            "9085": 34434887680.0,
            "9090": 34434887680.0,
            "9095": 34434887680.0,
            "9100": 34434887680.0,
            "9105": 34434887680.0,
            "9110": 34434887680.0,
            "9115": 34434887680.0,
            "9120": 34434887680.0,
            "9125": 34434887680.0,
            "9130": 34434887680.0,
            "9135": 34434887680.0,
            "9140": 34434887680.0,
            "9145": 34434887680.0,
            "9150": 34434887680.0,
            "9155": 34434887680.0,
            "9160": 34434887680.0,
            "9165": 34434887680.0,
            "9170": 34434887680.0,
            "9175": 34434887680.0,
            "9180": 34434887680.0,
            "9185": 34434887680.0,
            "9190": 34434887680.0,
            "9195": 34434887680.0,
            "9200": 34434887680.0,
            "9205": 34434887680.0,
            "9210": 34434887680.0,
            "9215": 34434887680.0,
            "9220": 34434887680.0,
            "9225": 34434887680.0,
            "9230": 34434887680.0,
            "9235": 34434887680.0,
            "9240": 34434887680.0,
            "9245": 34434887680.0,
            "9250": 34434887680.0,
            "9255": 34434887680.0,
            "9260": 34434887680.0,
            "9265": 34434887680.0,
            "9270": 34434887680.0,
            "9275": 34434887680.0,
            "9280": 34434887680.0,
            "9285": 34434887680.0,
            "9290": 34434887680.0,
            "9295": 34434887680.0,
            "9300": 34434887680.0,
            "9305": 34434887680.0,
            "9310": 34434887680.0,
            "9315": 34434887680.0,
            "9320": 34434887680.0,
            "9325": 34434887680.0,
            "9330": 34434887680.0,
            "9335": 34434887680.0,
            "9340": 34434887680.0,
            "9345": 34434887680.0,
            "9350": 34434887680.0,
            "9355": 34434887680.0,
            "9360": 34434887680.0,
            "9365": 34434887680.0,
            "9370": 34434887680.0,
            "9375": 34434887680.0,
            "9380": 34434887680.0,
            "9385": 34434887680.0,
            "9390": 34434887680.0,
            "9395": 34434887680.0,
            "9400": 34434887680.0,
            "9405": 34434887680.0,
            "9410": 34434887680.0,
            "9415": 34434887680.0,
            "9420": 34434887680.0,
            "9425": 34434887680.0,
            "9430": 34434887680.0,
            "9435": 34434887680.0,
            "9440": 34434887680.0,
            "9445": 34434887680.0,
            "9450": 34434887680.0,
            "9455": 34434887680.0,
            "9460": 34434887680.0,
            "9465": 34434887680.0,
            "9470": 34434887680.0,
            "9475": 34434887680.0,
            "9480": 34434887680.0,
            "9485": 34434887680.0,
            "9490": 34434887680.0,
            "9495": 34434887680.0,
            "9500": 34434887680.0,
            "9505": 34434887680.0,
            "9510": 34434887680.0,
            "9515": 34434887680.0,
            "9520": 34434887680.0,
            "9525": 34434887680.0,
            "9530": 34434887680.0,
            "9535": 34434887680.0,
            "9540": 34434887680.0,
            "9545": 34434887680.0,
            "9550": 34434887680.0,
            "9555": 34434887680.0,
            "9560": 34434887680.0,
            "9565": 34434887680.0,
            "9570": 34434887680.0,
            "9575": 34434887680.0,
            "9580": 34434887680.0,
            "9585": 34434887680.0,
            "9590": 34434887680.0,
            "9595": 34434887680.0,
            "9600": 34434887680.0,
            "9605": 34434887680.0,
            "9610": 34434887680.0,
            "9615": 34434887680.0,
            "9620": 34434887680.0,
            "9625": 34434887680.0,
            "9630": 34434887680.0,
            "9635": 34434887680.0,
            "9640": 34434887680.0,
            "9645": 34434887680.0,
            "9650": 34434887680.0,
            "9655": 34434887680.0,
            "9660": 34434887680.0,
            "9665": 34434887680.0,
            "9670": 34434887680.0,
            "9675": 34434887680.0,
            "9680": 34434887680.0,
            "9685": 34434887680.0,
            "9690": 34434887680.0,
            "9695": 34434887680.0,
            "9700": 34434887680.0,
            "9705": 34434887680.0,
            "9710": 34434887680.0,
            "9715": 34434887680.0,
            "9720": 34434887680.0,
            "9725": 34434887680.0,
            "9730": 34434887680.0,
            "9735": 34434887680.0,
            "9740": 34434887680.0,
            "9745": 34434887680.0,
            "9750": 34434887680.0,
            "9755": 34434887680.0,
            "9760": 34434887680.0,
            "9765": 34434887680.0,
            "9770": 34434887680.0,
            "9775": 34434887680.0,
            "9780": 34434887680.0,
            "9785": 34434887680.0,
            "9790": 34434887680.0,
            "9795": 34434887680.0,
            "9800": 34434887680.0,
            "9805": 34434887680.0,
            "9810": 34434887680.0,
            "9815": 34434887680.0,
            "9820": 34434887680.0,
            "9825": 34434887680.0,
            "9830": 34434887680.0,
            "9835": 34434887680.0,
            "9840": 34434887680.0,
            "9845": 34434887680.0,
            "9850": 34434887680.0,
            "9855": 34434887680.0,
            "9860": 34434887680.0,
            "9865": 34434887680.0,
            "9870": 34434887680.0,
            "9875": 34434887680.0,
            "9880": 34434887680.0,
            "9885": 34434887680.0,
            "9890": 34434887680.0,
            "9895": 34434887680.0,
            "9900": 34434887680.0,
            "9905": 34434887680.0,
            "9910": 34434887680.0,
            "9915": 34434887680.0,
            "9920": 34434887680.0,
            "9925": 34434887680.0,
            "9930": 34434887680.0,
            "9935": 34434887680.0,
            "9940": 34434887680.0,
            "9945": 34434887680.0,
            "9950": 34434887680.0,
            "9955": 34434887680.0,
            "9960": 34434887680.0,
            "9965": 34434887680.0,
            "9970": 34434887680.0,
            "9975": 34434887680.0,
            "9980": 34434887680.0,
            "9985": 34434887680.0,
            "9990": 34434887680.0,
            "9995": 34434887680.0,
            "10000": 34434887680.0,
            "10005": 34434887680.0,
            "10010": 34434887680.0,
            "10015": 34434887680.0,
            "10020": 34434887680.0,
            "10025": 34434887680.0,
            "10030": 34434887680.0,
            "10035": 34434887680.0,
            "10040": 34434887680.0,
            "10045": 34434887680.0,
            "10050": 34434887680.0,
            "10055": 34434887680.0,
            "10060": 34434887680.0,
            "10065": 34434887680.0,
            "10070": 34434887680.0,
            "10075": 34434887680.0,
            "10080": 34434887680.0,
            "10085": 34434887680.0,
            "10090": 34434887680.0,
            "10095": 34434887680.0,
            "10100": 34434887680.0,
            "10105": 34434887680.0,
            "10110": 34434887680.0,
            "10115": 34434887680.0,
            "10120": 34434887680.0,
            "10125": 34434887680.0,
            "10130": 34434887680.0,
            "10135": 34434887680.0,
            "10140": 34434887680.0,
            "10145": 34434887680.0,
            "10150": 34434887680.0,
            "10155": 34434887680.0,
            "10160": 34434887680.0,
            "10165": 34434887680.0,
            "10170": 34434887680.0,
            "10175": 34434887680.0,
            "10180": 34434887680.0,
            "10185": 34434887680.0,
            "10190": 34434887680.0,
            "10195": 34434887680.0,
            "10200": 34434887680.0,
            "10205": 34434887680.0,
            "10210": 34434887680.0,
            "10215": 34434887680.0,
            "10220": 34434887680.0,
            "10225": 34434887680.0,
            "10230": 34434887680.0,
            "10235": 34434887680.0,
            "10240": 34434887680.0,
            "10245": 34434887680.0,
            "10250": 34434887680.0,
            "10255": 34434887680.0,
            "10260": 34434887680.0,
            "10265": 34434887680.0,
            "10270": 34434887680.0,
            "10275": 34434887680.0,
            "10280": 34434887680.0,
            "10285": 34434887680.0,
            "10290": 34434887680.0,
            "10295": 34434887680.0,
            "10300": 34434887680.0,
            "10305": 34434887680.0,
            "10310": 34434887680.0,
            "10315": 34434887680.0,
            "10320": 34434887680.0,
            "10325": 34434887680.0,
            "10330": 34434887680.0,
            "10335": 34434887680.0,
            "10340": 34434887680.0,
            "10345": 34434887680.0,
            "10350": 34434887680.0,
            "10355": 34434887680.0,
            "10360": 34434887680.0,
            "10365": 34434887680.0,
            "10370": 34434887680.0,
            "10375": 34434887680.0,
            "10380": 34434887680.0,
            "10385": 34434887680.0,
            "10390": 34434887680.0,
            "10395": 34434887680.0,
            "10400": 34434887680.0,
            "10405": 34434887680.0,
            "10410": 34434887680.0,
            "10415": 34434887680.0,
            "10420": 34434887680.0,
            "10425": 34434887680.0,
            "10430": 34434887680.0,
            "10435": 34434887680.0,
            "10440": 34434887680.0,
            "10445": 34434887680.0,
            "10450": 34434887680.0,
            "10455": 34434887680.0,
            "10460": 34434887680.0,
            "10465": 34434887680.0,
            "10470": 34434887680.0,
            "10475": 34434887680.0,
            "10480": 34434887680.0,
            "10485": 34434887680.0,
            "10490": 34434887680.0,
            "10495": 34434887680.0,
            "10500": 34434887680.0,
            "10505": 34434887680.0,
            "10510": 34434887680.0,
            "10515": 34434887680.0,
            "10520": 34434887680.0,
            "10525": 34434887680.0,
            "10530": 34434887680.0,
            "10535": 34434887680.0,
            "10540": 34434887680.0,
            "10545": 34434887680.0,
            "10550": 34434887680.0,
            "10555": 34434887680.0,
            "10560": 34434887680.0,
            "10565": 34434887680.0,
            "10570": 34434887680.0,
            "10575": 34434887680.0,
            "10580": 34434887680.0,
            "10585": 34434887680.0,
            "10590": 34434887680.0,
            "10595": 34434887680.0,
            "10600": 34434887680.0,
            "10605": 34434887680.0,
            "10610": 34434887680.0,
            "10615": 34434887680.0,
            "10620": 34434887680.0,
            "10625": 34434887680.0,
            "10630": 34434887680.0,
            "10635": 34434887680.0,
            "10640": 34434887680.0,
            "10645": 34434887680.0,
            "10650": 34434887680.0,
            "10655": 34434887680.0,
            "10660": 34434887680.0,
            "10665": 34434887680.0,
            "10670": 34434887680.0,
            "10675": 34434887680.0,
            "10680": 34434887680.0,
            "10685": 34434887680.0,
            "10690": 34434887680.0,
            "10695": 34434887680.0,
            "10700": 34434887680.0,
            "10705": 34434887680.0,
            "10710": 34434887680.0,
            "10715": 34434887680.0,
            "10720": 34434887680.0,
            "10725": 34434887680.0,
            "10730": 34434887680.0,
            "10735": 34434887680.0,
            "10740": 34434887680.0,
            "10745": 34434887680.0,
            "10750": 34434887680.0,
            "10755": 34434887680.0,
            "10760": 34434887680.0,
            "10765": 34434887680.0,
            "10770": 34434887680.0,
            "10775": 34434887680.0,
            "10780": 34434887680.0,
            "10785": 34434887680.0,
            "10790": 34434887680.0,
            "10795": 34434887680.0,
            "10800": 34434887680.0,
            "10805": 34434887680.0,
            "10810": 34434887680.0,
            "10815": 34434887680.0,
            "10820": 34434887680.0,
            "10825": 34434887680.0,
            "10830": 34434887680.0,
            "10835": 34434887680.0,
            "10840": 34434887680.0,
            "10845": 34434887680.0,
            "10850": 34434887680.0,
            "10855": 34434887680.0,
            "10860": 34434887680.0,
            "10865": 34434887680.0,
            "10870": 34434887680.0,
            "10875": 34434887680.0,
            "10880": 34434887680.0,
            "10885": 34434887680.0,
            "10890": 34434887680.0,
            "10895": 34434887680.0,
            "10900": 34434887680.0,
            "10905": 34434887680.0,
            "10910": 34434887680.0,
            "10915": 34434887680.0,
            "10920": 34434887680.0,
            "10925": 34434887680.0,
            "10930": 34434887680.0,
            "10935": 34434887680.0,
            "10940": 34434887680.0,
            "10945": 34434887680.0,
            "10950": 34434887680.0,
            "10955": 34434887680.0,
            "10960": 34434887680.0,
            "10965": 34434887680.0,
            "10970": 34434887680.0,
            "10975": 34434887680.0,
            "10980": 34434887680.0,
            "10985": 34434887680.0,
            "10990": 34434887680.0,
            "10995": 34434887680.0,
            "11000": 34434887680.0,
            "11005": 34434887680.0,
            "11010": 34434887680.0,
            "11015": 34434887680.0,
            "11020": 34434887680.0,
            "11025": 34434887680.0,
            "11030": 34434887680.0,
            "11035": 34434887680.0,
            "11040": 34434887680.0,
            "11045": 34434887680.0,
            "11050": 34434887680.0,
            "11055": 34434887680.0,
            "11060": 34434887680.0,
            "11065": 34434887680.0,
            "11070": 34434887680.0,
            "11075": 34434887680.0,
            "11080": 34434887680.0,
            "11085": 34434887680.0,
            "11090": 34434887680.0,
            "11095": 34434887680.0,
            "11100": 34434887680.0,
            "11105": 34434887680.0,
            "11110": 34434887680.0,
            "11115": 34434887680.0,
            "11120": 34434887680.0,
            "11125": 34434887680.0,
            "11130": 34434887680.0,
            "11135": 34434887680.0,
            "11140": 34434887680.0,
            "11145": 34434887680.0,
            "11150": 34434887680.0,
            "11155": 34434887680.0,
            "11160": 34434887680.0,
            "11165": 34434887680.0,
            "11170": 34434887680.0,
            "11175": 34434887680.0,
            "11180": 34434887680.0,
            "11185": 34434887680.0,
            "11190": 34434887680.0,
            "11195": 34434887680.0,
            "11200": 34434887680.0,
            "11205": 34434887680.0,
            "11210": 34434887680.0,
            "11215": 34434887680.0,
            "11220": 34434887680.0,
            "11225": 34434887680.0,
            "11230": 34434887680.0,
            "11235": 34434887680.0,
            "11240": 34434887680.0,
            "11245": 34434887680.0,
            "11250": 34434887680.0,
            "11255": 34434887680.0,
            "11260": 34434887680.0,
            "11265": 34434887680.0,
            "11270": 34434887680.0,
            "11275": 34434887680.0,
            "11280": 34434887680.0,
            "11285": 34434887680.0,
            "11290": 34434887680.0,
            "11295": 34434887680.0,
            "11300": 34434887680.0,
            "11305": 34434887680.0,
            "11310": 34434887680.0,
            "11315": 34434887680.0,
            "11320": 34434887680.0,
            "11325": 34434887680.0,
            "11330": 34434887680.0,
            "11335": 34434887680.0,
            "11340": 34434887680.0,
            "11345": 34434887680.0,
            "11350": 34434887680.0,
            "11355": 34434887680.0,
            "11360": 34434887680.0,
            "11365": 34434887680.0,
            "11370": 34434887680.0,
            "11375": 34434887680.0,
            "11380": 34434887680.0,
            "11385": 34434887680.0,
            "11390": 34434887680.0,
            "11395": 34434887680.0,
            "11400": 34434887680.0,
            "11405": 34434887680.0,
            "11410": 34434887680.0,
            "11415": 34434887680.0,
            "11420": 34434887680.0,
            "11425": 34434887680.0,
            "11430": 34434887680.0,
            "11435": 34434887680.0,
            "11440": 34434887680.0,
            "11445": 34434887680.0,
            "11450": 34434887680.0,
            "11455": 34434887680.0,
            "11460": 34434887680.0,
            "11465": 34434887680.0,
            "11470": 34434887680.0,
            "11475": 34434887680.0,
            "11480": 34434887680.0,
            "11485": 34434887680.0,
            "11490": 34434887680.0,
            "11495": 34434887680.0,
            "11500": 34434887680.0,
            "11505": 34434887680.0,
            "11510": 34434887680.0,
            "11515": 34434887680.0,
            "11520": 34434887680.0,
            "11525": 34434887680.0,
            "11530": 34434887680.0,
            "11535": 34434887680.0,
            "11540": 34434887680.0,
            "11545": 34434887680.0,
            "11550": 34434887680.0,
            "11555": 34434887680.0,
            "11560": 34434887680.0,
            "11565": 34434887680.0,
            "11570": 34434887680.0,
            "11575": 34434887680.0,
            "11580": 34434887680.0,
            "11585": 34434887680.0,
            "11590": 34434887680.0,
            "11595": 34434887680.0,
            "11600": 34434887680.0,
            "11605": 34434887680.0,
            "11610": 34434887680.0,
            "11615": 34434887680.0,
            "11620": 34434887680.0,
            "11625": 34434887680.0,
            "11630": 34434887680.0,
            "11635": 34434887680.0,
            "11640": 34434887680.0,
            "11645": 34434887680.0,
            "11650": 34434887680.0,
            "11655": 34434887680.0,
            "11660": 34434887680.0,
            "11665": 34434887680.0,
            "11670": 34434887680.0,
            "11675": 34434887680.0,
            "11680": 34434887680.0,
            "11685": 34434887680.0,
            "11690": 34434887680.0,
            "11695": 34434887680.0,
            "11700": 34434887680.0,
            "11705": 34434887680.0,
            "11710": 34434887680.0,
            "11715": 34434887680.0,
            "11720": 34434887680.0,
            "11725": 34434887680.0,
            "11730": 34434887680.0,
            "11735": 34434887680.0,
            "11740": 34434887680.0,
            "11745": 34434887680.0,
            "11750": 34434887680.0,
            "11755": 34434887680.0,
            "11760": 34434887680.0,
            "11765": 34434887680.0,
            "11770": 34434887680.0,
            "11775": 34434887680.0,
            "11780": 34434887680.0,
            "11785": 34434887680.0,
            "11790": 34434887680.0,
            "11795": 34434887680.0,
            "11800": 34434887680.0,
            "11805": 34434887680.0,
            "11810": 34434887680.0,
            "11815": 34434887680.0,
            "11820": 34434887680.0,
            "11825": 34434887680.0,
            "11830": 34434887680.0,
            "11835": 34434887680.0,
            "11840": 34434887680.0,
            "11845": 34434887680.0,
            "11850": 34434887680.0,
            "11855": 34434887680.0,
            "11860": 34434887680.0,
            "11865": 34434887680.0,
            "11870": 34434887680.0,
            "11875": 34434887680.0,
            "11880": 34434887680.0,
            "11885": 34434887680.0,
            "11890": 34434887680.0,
            "11895": 34434887680.0,
            "11900": 34434887680.0,
            "11905": 34434887680.0,
            "11910": 34434887680.0,
            "11915": 34434887680.0,
            "11920": 34434887680.0,
            "11925": 34434887680.0,
            "11930": 34434887680.0,
            "11935": 34434887680.0,
            "11940": 34434887680.0,
            "11945": 34434887680.0,
            "11950": 34434887680.0,
            "11955": 34434887680.0,
            "11960": 34434887680.0,
            "11965": 34434887680.0,
            "11970": 34434887680.0,
            "11975": 34434887680.0,
            "11980": 34434887680.0,
            "11985": 34434887680.0,
            "11990": 34434887680.0,
            "11995": 34434887680.0,
            "12000": 34434887680.0,
            "12005": 34434887680.0,
            "12010": 34434887680.0,
            "12015": 34434887680.0,
            "12020": 34434887680.0,
            "12025": 34434887680.0,
            "12030": 34434887680.0,
            "12035": 34434887680.0,
            "12040": 34434887680.0,
            "12045": 34434887680.0,
            "12050": 34434887680.0,
            "12055": 34434887680.0,
            "12060": 34434887680.0,
            "12065": 34434887680.0,
            "12070": 34434887680.0,
            "12075": 34434887680.0,
            "12080": 34434887680.0,
            "12085": 34434887680.0,
            "12090": 34434887680.0,
            "12095": 34434887680.0,
            "12100": 34434887680.0,
            "12105": 34434887680.0,
            "12110": 34434887680.0,
            "12115": 34434887680.0,
            "12120": 34434887680.0,
            "12125": 34434887680.0,
            "12130": 34434887680.0,
            "12135": 34434887680.0,
            "12140": 34434887680.0,
            "12145": 34434887680.0,
            "12150": 34434887680.0,
            "12155": 34434887680.0,
            "12160": 34434887680.0,
            "12165": 34434887680.0,
            "12170": 34434887680.0,
            "12175": 34434887680.0,
            "12180": 34434887680.0,
            "12185": 34434887680.0,
            "12190": 34434887680.0,
            "12195": 34434887680.0,
            "12200": 34434887680.0,
            "12205": 34434887680.0,
            "12210": 34434887680.0,
            "12215": 34434887680.0,
            "12220": 34434887680.0,
            "12225": 34434887680.0,
            "12230": 34434887680.0,
            "12235": 34434887680.0,
            "12240": 34434887680.0,
            "12245": 34434887680.0,
            "12250": 34434887680.0,
            "12255": 34434887680.0,
            "12260": 34434887680.0,
            "12265": 34434887680.0,
            "12270": 34434887680.0,
            "12275": 34434887680.0,
            "12280": 34434887680.0,
            "12285": 34434887680.0,
            "12290": 34434887680.0,
            "12295": 34434887680.0,
            "12300": 34434887680.0,
            "12305": 34434887680.0,
            "12310": 34434887680.0,
            "12315": 34434887680.0,
            "12320": 34434887680.0,
            "12325": 34434887680.0,
            "12330": 34434887680.0,
            "12335": 34434887680.0,
            "12340": 34434887680.0,
            "12345": 34434887680.0,
            "12350": 34434887680.0,
            "12355": 34434887680.0,
            "12360": 34434887680.0,
            "12365": 34434887680.0,
            "12370": 34434887680.0,
            "12375": 34434887680.0,
            "12380": 34434887680.0,
            "12385": 34434887680.0,
            "12390": 34434887680.0,
            "12395": 34434887680.0,
            "12400": 34434887680.0,
            "12405": 34434887680.0,
            "12410": 34434887680.0,
            "12415": 34434887680.0,
            "12420": 34434887680.0,
            "12425": 34434887680.0,
            "12430": 34434887680.0,
            "12435": 34434887680.0,
            "12440": 34434887680.0,
            "12445": 34434887680.0,
            "12450": 34434887680.0,
            "12455": 34434887680.0,
            "12460": 34434887680.0,
            "12465": 34434887680.0,
            "12470": 34434887680.0,
            "12475": 34434887680.0,
            "12480": 34434887680.0,
            "12485": 34434887680.0,
            "12490": 34434887680.0,
            "12495": 34434887680.0,
            "12500": 34434887680.0,
            "12505": 34434887680.0,
            "12510": 34434887680.0,
            "12515": 34434887680.0,
            "12520": 34434887680.0,
            "12525": 34434887680.0,
            "12530": 34434887680.0,
            "12535": 34434887680.0,
            "12540": 34434887680.0,
            "12545": 34434887680.0,
            "12550": 34434887680.0,
            "12555": 34434887680.0,
            "12560": 34434887680.0,
            "12565": 34434887680.0,
            "12570": 34434887680.0,
            "12575": 34434887680.0,
            "12580": 34434887680.0,
            "12585": 34434887680.0,
            "12590": 34434887680.0,
            "12595": 34434887680.0,
            "12600": 34434887680.0,
            "12605": 34434887680.0,
            "12610": 34434887680.0,
            "12615": 34434887680.0,
            "12620": 34434887680.0,
            "12625": 34434887680.0,
            "12630": 34434887680.0,
            "12635": 34434887680.0,
            "12640": 34434887680.0,
            "12645": 34434887680.0,
            "12650": 34434887680.0,
            "12655": 34434887680.0,
            "12660": 34434887680.0,
            "12665": 34434887680.0,
            "12670": 34434887680.0,
            "12675": 34434887680.0,
            "12680": 34434887680.0,
            "12685": 34434887680.0,
            "12690": 34434887680.0,
            "12695": 34434887680.0,
            "12700": 34434887680.0,
            "12705": 34434887680.0,
            "12710": 34434887680.0,
            "12715": 34434887680.0,
            "12720": 34434887680.0,
            "12725": 34434887680.0,
            "12730": 34434887680.0,
            "12735": 34434887680.0,
            "12740": 34434887680.0,
            "12745": 34434887680.0,
            "12750": 34434887680.0,
            "12755": 34434887680.0,
            "12760": 34434887680.0,
            "12765": 34434887680.0,
            "12770": 34434887680.0,
            "12775": 34434887680.0,
            "12780": 34434887680.0,
            "12785": 34434887680.0,
            "12790": 34434887680.0,
            "12795": 34434887680.0,
            "12800": 34434887680.0,
            "12805": 34434887680.0,
            "12810": 34434887680.0,
            "12815": 34434887680.0,
            "12820": 34434887680.0,
            "12825": 34434887680.0,
            "12830": 34434887680.0,
            "12835": 34434887680.0,
            "12840": 34434887680.0,
            "12845": 34434887680.0,
            "12850": 34434887680.0,
            "12855": 34434887680.0,
            "12860": 34434887680.0,
            "12865": 34434887680.0,
            "12870": 34434887680.0,
            "12875": 34434887680.0,
            "12880": 34434887680.0,
            "12885": 34434887680.0,
            "12890": 34434887680.0,
            "12895": 34434887680.0,
            "12900": 34434887680.0,
            "12905": 34434887680.0,
            "12910": 34434887680.0,
            "12915": 34434887680.0,
            "12920": 34434887680.0,
            "12925": 34434887680.0,
            "12930": 34434887680.0,
            "12935": 34434887680.0,
            "12940": 34434887680.0,
            "12945": 34434887680.0,
            "12950": 34434887680.0,
            "12955": 34434887680.0,
            "12960": 34434887680.0,
            "12965": 34434887680.0,
            "12970": 34434887680.0,
            "12975": 34434887680.0,
            "12980": 34434887680.0,
            "12985": 34434887680.0,
            "12990": 34434887680.0,
            "12995": 34434887680.0,
            "13000": 34434887680.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 13000,
        "step_interval": 5,
        "values": {
            "1": "nan",
            "5": "nan",
            "10": "nan",
            "15": "nan",
            "20": "nan",
            "25": "nan",
            "30": "nan",
            "35": "nan",
            "40": "nan",
            "45": "nan",
            "50": "nan",
            "55": "nan",
            "60": "nan",
            "65": "nan",
            "70": "nan",
            "75": "nan",
            "80": "nan",
            "85": "nan",
            "90": "nan",
            "95": "nan",
            "100": 1.52575,
            "105": "nan",
            "110": "nan",
            "115": "nan",
            "120": "nan",
            "125": "nan",
            "130": "nan",
            "135": "nan",
            "140": "nan",
            "145": "nan",
            "150": "nan",
            "155": "nan",
            "160": "nan",
            "165": "nan",
            "170": "nan",
            "175": "nan",
            "180": "nan",
            "185": "nan",
            "190": "nan",
            "195": "nan",
            "200": 1.22241,
            "205": "nan",
            "210": "nan",
            "215": "nan",
            "220": "nan",
            "225": "nan",
            "230": "nan",
            "235": "nan",
            "240": "nan",
            "245": "nan",
            "250": "nan",
            "255": "nan",
            "260": "nan",
            "265": "nan",
            "270": "nan",
            "275": "nan",
            "280": "nan",
            "285": "nan",
            "290": "nan",
            "295": "nan",
            "300": 1.68719,
            "305": "nan",
            "310": "nan",
            "315": "nan",
            "320": "nan",
            "325": "nan",
            "330": "nan",
            "335": "nan",
            "340": "nan",
            "345": "nan",
            "350": "nan",
            "355": "nan",
            "360": "nan",
            "365": "nan",
            "370": "nan",
            "375": "nan",
            "380": "nan",
            "385": "nan",
            "390": "nan",
            "395": "nan",
            "400": 2.3415,
            "405": "nan",
            "410": "nan",
            "415": "nan",
            "420": "nan",
            "425": "nan",
            "430": "nan",
            "435": "nan",
            "440": "nan",
            "445": "nan",
            "450": "nan",
            "455": "nan",
            "460": "nan",
            "465": "nan",
            "470": "nan",
            "475": "nan",
            "480": "nan",
            "485": "nan",
            "490": "nan",
            "495": "nan",
            "500": 1.22139,
            "505": "nan",
            "510": "nan",
            "515": "nan",
            "520": "nan",
            "525": "nan",
            "530": "nan",
            "535": "nan",
            "540": "nan",
            "545": "nan",
            "550": "nan",
            "555": "nan",
            "560": "nan",
            "565": "nan",
            "570": "nan",
            "575": "nan",
            "580": "nan",
            "585": "nan",
            "590": "nan",
            "595": "nan",
            "600": 1.64801,
            "605": "nan",
            "610": "nan",
            "615": "nan",
            "620": "nan",
            "625": "nan",
            "630": "nan",
            "635": "nan",
            "640": "nan",
            "645": "nan",
            "650": "nan",
            "655": "nan",
            "660": "nan",
            "665": "nan",
            "670": "nan",
            "675": "nan",
            "680": "nan",
            "685": "nan",
            "690": "nan",
            "695": "nan",
            "700": 1.59272,
            "705": "nan",
            "710": "nan",
            "715": "nan",
            "720": "nan",
            "725": "nan",
            "730": "nan",
            "735": "nan",
            "740": "nan",
            "745": "nan",
            "750": "nan",
            "755": "nan",
            "760": "nan",
            "765": "nan",
            "770": "nan",
            "775": "nan",
            "780": "nan",
            "785": "nan",
            "790": "nan",
            "795": "nan",
            "800": 1.2195,
            "805": "nan",
            "810": "nan",
            "815": "nan",
            "820": "nan",
            "825": "nan",
            "830": "nan",
            "835": "nan",
            "840": "nan",
            "845": "nan",
            "850": "nan",
            "855": "nan",
            "860": "nan",
            "865": "nan",
            "870": "nan",
            "875": "nan",
            "880": "nan",
            "885": "nan",
            "890": "nan",
            "895": "nan",
            "900": 1.62873,
            "905": "nan",
            "910": "nan",
            "915": "nan",
            "920": "nan",
            "925": "nan",
            "930": "nan",
            "935": "nan",
            "940": "nan",
            "945": "nan",
            "950": "nan",
            "955": "nan",
            "960": "nan",
            "965": "nan",
            "970": "nan",
            "975": "nan",
            "980": "nan",
            "985": "nan",
            "990": "nan",
            "995": "nan",
            "1000": 1.64961,
            "1005": "nan",
            "1010": "nan",
            "1015": "nan",
            "1020": "nan",
            "1025": "nan",
            "1030": "nan",
            "1035": "nan",
            "1040": "nan",
            "1045": "nan",
            "1050": "nan",
            "1055": "nan",
            "1060": "nan",
            "1065": "nan",
            "1070": "nan",
            "1075": "nan",
            "1080": "nan",
            "1085": "nan",
            "1090": "nan",
            "1095": "nan",
            "1100": 1.62131,
            "1105": "nan",
            "1110": "nan",
            "1115": "nan",
            "1120": "nan",
            "1125": "nan",
            "1130": "nan",
            "1135": "nan",
            "1140": "nan",
            "1145": "nan",
            "1150": "nan",
            "1155": "nan",
            "1160": "nan",
            "1165": "nan",
            "1170": "nan",
            "1175": "nan",
            "1180": "nan",
            "1185": "nan",
            "1190": "nan",
            "1195": "nan",
            "1200": 1.46576,
            "1205": "nan",
            "1210": "nan",
            "1215": "nan",
            "1220": "nan",
            "1225": "nan",
            "1230": "nan",
            "1235": "nan",
            "1240": "nan",
            "1245": "nan",
            "1250": "nan",
            "1255": "nan",
            "1260": "nan",
            "1265": "nan",
            "1270": "nan",
            "1275": "nan",
            "1280": "nan",
            "1285": "nan",
            "1290": "nan",
            "1295": "nan",
            "1300": 1.52481,
            "1305": "nan",
            "1310": "nan",
            "1315": "nan",
            "1320": "nan",
            "1325": "nan",
            "1330": "nan",
            "1335": "nan",
            "1340": "nan",
            "1345": "nan",
            "1350": "nan",
            "1355": "nan",
            "1360": "nan",
            "1365": "nan",
            "1370": "nan",
            "1375": "nan",
            "1380": "nan",
            "1385": "nan",
            "1390": "nan",
            "1395": "nan",
            "1400": 1.18935,
            "1405": "nan",
            "1410": "nan",
            "1415": "nan",
            "1420": "nan",
            "1425": "nan",
            "1430": "nan",
            "1435": "nan",
            "1440": "nan",
            "1445": "nan",
            "1450": "nan",
            "1455": "nan",
            "1460": "nan",
            "1465": "nan",
            "1470": "nan",
            "1475": "nan",
            "1480": "nan",
            "1485": "nan",
            "1490": "nan",
            "1495": "nan",
            "1500": 1.81193,
            "1505": "nan",
            "1510": "nan",
            "1515": "nan",
            "1520": "nan",
            "1525": "nan",
            "1530": "nan",
            "1535": "nan",
            "1540": "nan",
            "1545": "nan",
            "1550": "nan",
            "1555": "nan",
            "1560": "nan",
            "1565": "nan",
            "1570": "nan",
            "1575": "nan",
            "1580": "nan",
            "1585": "nan",
            "1590": "nan",
            "1595": "nan",
            "1600": 1.18361,
            "1605": "nan",
            "1610": "nan",
            "1615": "nan",
            "1620": "nan",
            "1625": "nan",
            "1630": "nan",
            "1635": "nan",
            "1640": "nan",
            "1645": "nan",
            "1650": "nan",
            "1655": "nan",
            "1660": "nan",
            "1665": "nan",
            "1670": "nan",
            "1675": "nan",
            "1680": "nan",
            "1685": "nan",
            "1690": "nan",
            "1695": "nan",
            "1700": 1.8225,
            "1705": "nan",
            "1710": "nan",
            "1715": "nan",
            "1720": "nan",
            "1725": "nan",
            "1730": "nan",
            "1735": "nan",
            "1740": "nan",
            "1745": "nan",
            "1750": "nan",
            "1755": "nan",
            "1760": "nan",
            "1765": "nan",
            "1770": "nan",
            "1775": "nan",
            "1780": "nan",
            "1785": "nan",
            "1790": "nan",
            "1795": "nan",
            "1800": 1.17987,
            "1805": "nan",
            "1810": "nan",
            "1815": "nan",
            "1820": "nan",
            "1825": "nan",
            "1830": "nan",
            "1835": "nan",
            "1840": "nan",
            "1845": "nan",
            "1850": "nan",
            "1855": "nan",
            "1860": "nan",
            "1865": "nan",
            "1870": "nan",
            "1875": "nan",
            "1880": "nan",
            "1885": "nan",
            "1890": "nan",
            "1895": "nan",
            "1900": 1.51305,
            "1905": "nan",
            "1910": "nan",
            "1915": "nan",
            "1920": "nan",
            "1925": "nan",
            "1930": "nan",
            "1935": "nan",
            "1940": "nan",
            "1945": "nan",
            "1950": "nan",
            "1955": "nan",
            "1960": "nan",
            "1965": "nan",
            "1970": "nan",
            "1975": "nan",
            "1980": "nan",
            "1985": "nan",
            "1990": "nan",
            "1995": "nan",
            "2000": 1.50011,
            "2005": "nan",
            "2010": "nan",
            "2015": "nan",
            "2020": "nan",
            "2025": "nan",
            "2030": "nan",
            "2035": "nan",
            "2040": "nan",
            "2045": "nan",
            "2050": "nan",
            "2055": "nan",
            "2060": "nan",
            "2065": "nan",
            "2070": "nan",
            "2075": "nan",
            "2080": "nan",
            "2085": "nan",
            "2090": "nan",
            "2095": "nan",
            "2100": 1.57718,
            "2105": "nan",
            "2110": "nan",
            "2115": "nan",
            "2120": "nan",
            "2125": "nan",
            "2130": "nan",
            "2135": "nan",
            "2140": "nan",
            "2145": "nan",
            "2150": "nan",
            "2155": "nan",
            "2160": "nan",
            "2165": "nan",
            "2170": "nan",
            "2175": "nan",
            "2180": "nan",
            "2185": "nan",
            "2190": "nan",
            "2195": "nan",
            "2200": 1.45196,
            "2205": "nan",
            "2210": "nan",
            "2215": "nan",
            "2220": "nan",
            "2225": "nan",
            "2230": "nan",
            "2235": "nan",
            "2240": "nan",
            "2245": "nan",
            "2250": "nan",
            "2255": "nan",
            "2260": "nan",
            "2265": "nan",
            "2270": "nan",
            "2275": "nan",
            "2280": "nan",
            "2285": "nan",
            "2290": "nan",
            "2295": "nan",
            "2300": 1.26751,
            "2305": "nan",
            "2310": "nan",
            "2315": "nan",
            "2320": "nan",
            "2325": "nan",
            "2330": "nan",
            "2335": "nan",
            "2340": "nan",
            "2345": "nan",
            "2350": "nan",
            "2355": "nan",
            "2360": "nan",
            "2365": "nan",
            "2370": "nan",
            "2375": "nan",
            "2380": "nan",
            "2385": "nan",
            "2390": "nan",
            "2395": "nan",
            "2400": 1.67576,
            "2405": "nan",
            "2410": "nan",
            "2415": "nan",
            "2420": "nan",
            "2425": "nan",
            "2430": "nan",
            "2435": "nan",
            "2440": "nan",
            "2445": "nan",
            "2450": "nan",
            "2455": "nan",
            "2460": "nan",
            "2465": "nan",
            "2470": "nan",
            "2475": "nan",
            "2480": "nan",
            "2485": "nan",
            "2490": "nan",
            "2495": "nan",
            "2500": 1.45152,
            "2505": "nan",
            "2510": "nan",
            "2515": "nan",
            "2520": "nan",
            "2525": "nan",
            "2530": "nan",
            "2535": "nan",
            "2540": "nan",
            "2545": "nan",
            "2550": "nan",
            "2555": "nan",
            "2560": "nan",
            "2565": "nan",
            "2570": "nan",
            "2575": "nan",
            "2580": "nan",
            "2585": "nan",
            "2590": "nan",
            "2595": "nan",
            "2600": 1.17207,
            "2605": "nan",
            "2610": "nan",
            "2615": "nan",
            "2620": "nan",
            "2625": "nan",
            "2630": "nan",
            "2635": "nan",
            "2640": "nan",
            "2645": "nan",
            "2650": "nan",
            "2655": "nan",
            "2660": "nan",
            "2665": "nan",
            "2670": "nan",
            "2675": "nan",
            "2680": "nan",
            "2685": "nan",
            "2690": "nan",
            "2695": "nan",
            "2700": 1.47473,
            "2705": "nan",
            "2710": "nan",
            "2715": "nan",
            "2720": "nan",
            "2725": "nan",
            "2730": "nan",
            "2735": "nan",
            "2740": "nan",
            "2745": "nan",
            "2750": "nan",
            "2755": "nan",
            "2760": "nan",
            "2765": "nan",
            "2770": "nan",
            "2775": "nan",
            "2780": "nan",
            "2785": "nan",
            "2790": "nan",
            "2795": "nan",
            "2800": 1.46554,
            "2805": "nan",
            "2810": "nan",
            "2815": "nan",
            "2820": "nan",
            "2825": "nan",
            "2830": "nan",
            "2835": "nan",
            "2840": "nan",
            "2845": "nan",
            "2850": "nan",
            "2855": "nan",
            "2860": "nan",
            "2865": "nan",
            "2870": "nan",
            "2875": "nan",
            "2880": "nan",
            "2885": "nan",
            "2890": "nan",
            "2895": "nan",
            "2900": 1.47107,
            "2905": "nan",
            "2910": "nan",
            "2915": "nan",
            "2920": "nan",
            "2925": "nan",
            "2930": "nan",
            "2935": "nan",
            "2940": "nan",
            "2945": "nan",
            "2950": "nan",
            "2955": "nan",
            "2960": "nan",
            "2965": "nan",
            "2970": "nan",
            "2975": "nan",
            "2980": "nan",
            "2985": "nan",
            "2990": "nan",
            "2995": "nan",
            "3000": 1.47313,
            "3005": "nan",
            "3010": "nan",
            "3015": "nan",
            "3020": "nan",
            "3025": "nan",
            "3030": "nan",
            "3035": "nan",
            "3040": "nan",
            "3045": "nan",
            "3050": "nan",
            "3055": "nan",
            "3060": "nan",
            "3065": "nan",
            "3070": "nan",
            "3075": "nan",
            "3080": "nan",
            "3085": "nan",
            "3090": "nan",
            "3095": "nan",
            "3100": 1.58134,
            "3105": "nan",
            "3110": "nan",
            "3115": "nan",
            "3120": "nan",
            "3125": "nan",
            "3130": "nan",
            "3135": "nan",
            "3140": "nan",
            "3145": "nan",
            "3150": "nan",
            "3155": "nan",
            "3160": "nan",
            "3165": "nan",
            "3170": "nan",
            "3175": "nan",
            "3180": "nan",
            "3185": "nan",
            "3190": "nan",
            "3195": "nan",
            "3200": 1.47784,
            "3205": "nan",
            "3210": "nan",
            "3215": "nan",
            "3220": "nan",
            "3225": "nan",
            "3230": "nan",
            "3235": "nan",
            "3240": "nan",
            "3245": "nan",
            "3250": "nan",
            "3255": "nan",
            "3260": "nan",
            "3265": "nan",
            "3270": "nan",
            "3275": "nan",
            "3280": "nan",
            "3285": "nan",
            "3290": "nan",
            "3295": "nan",
            "3300": 1.53281,
            "3305": "nan",
            "3310": "nan",
            "3315": "nan",
            "3320": "nan",
            "3325": "nan",
            "3330": "nan",
            "3335": "nan",
            "3340": "nan",
            "3345": "nan",
            "3350": "nan",
            "3355": "nan",
            "3360": "nan",
            "3365": "nan",
            "3370": "nan",
            "3375": "nan",
            "3380": "nan",
            "3385": "nan",
            "3390": "nan",
            "3395": "nan",
            "3400": 1.16771,
            "3405": "nan",
            "3410": "nan",
            "3415": "nan",
            "3420": "nan",
            "3425": "nan",
            "3430": "nan",
            "3435": "nan",
            "3440": "nan",
            "3445": "nan",
            "3450": "nan",
            "3455": "nan",
            "3460": "nan",
            "3465": "nan",
            "3470": "nan",
            "3475": "nan",
            "3480": "nan",
            "3485": "nan",
            "3490": "nan",
            "3495": "nan",
            "3500": 1.7677,
            "3505": "nan",
            "3510": "nan",
            "3515": "nan",
            "3520": "nan",
            "3525": "nan",
            "3530": "nan",
            "3535": "nan",
            "3540": "nan",
            "3545": "nan",
            "3550": "nan",
            "3555": "nan",
            "3560": "nan",
            "3565": "nan",
            "3570": "nan",
            "3575": "nan",
            "3580": "nan",
            "3585": "nan",
            "3590": "nan",
            "3595": "nan",
            "3600": 1.16778,
            "3605": "nan",
            "3610": "nan",
            "3615": "nan",
            "3620": "nan",
            "3625": "nan",
            "3630": "nan",
            "3635": "nan",
            "3640": "nan",
            "3645": "nan",
            "3650": "nan",
            "3655": "nan",
            "3660": "nan",
            "3665": "nan",
            "3670": "nan",
            "3675": "nan",
            "3680": "nan",
            "3685": "nan",
            "3690": "nan",
            "3695": "nan",
            "3700": 1.52902,
            "3705": "nan",
            "3710": "nan",
            "3715": "nan",
            "3720": "nan",
            "3725": "nan",
            "3730": "nan",
            "3735": "nan",
            "3740": "nan",
            "3745": "nan",
            "3750": "nan",
            "3755": "nan",
            "3760": "nan",
            "3765": "nan",
            "3770": "nan",
            "3775": "nan",
            "3780": "nan",
            "3785": "nan",
            "3790": "nan",
            "3795": "nan",
            "3800": 1.45421,
            "3805": "nan",
            "3810": "nan",
            "3815": "nan",
            "3820": "nan",
            "3825": "nan",
            "3830": "nan",
            "3835": "nan",
            "3840": "nan",
            "3845": "nan",
            "3850": "nan",
            "3855": "nan",
            "3860": "nan",
            "3865": "nan",
            "3870": "nan",
            "3875": "nan",
            "3880": "nan",
            "3885": "nan",
            "3890": "nan",
            "3895": "nan",
            "3900": 1.48924,
            "3905": "nan",
            "3910": "nan",
            "3915": "nan",
            "3920": "nan",
            "3925": "nan",
            "3930": "nan",
            "3935": "nan",
            "3940": "nan",
            "3945": "nan",
            "3950": "nan",
            "3955": "nan",
            "3960": "nan",
            "3965": "nan",
            "3970": "nan",
            "3975": "nan",
            "3980": "nan",
            "3985": "nan",
            "3990": "nan",
            "3995": "nan",
            "4000": 1.49248,
            "4005": "nan",
            "4010": "nan",
            "4015": "nan",
            "4020": "nan",
            "4025": "nan",
            "4030": "nan",
            "4035": "nan",
            "4040": "nan",
            "4045": "nan",
            "4050": "nan",
            "4055": "nan",
            "4060": "nan",
            "4065": "nan",
            "4070": "nan",
            "4075": "nan",
            "4080": "nan",
            "4085": "nan",
            "4090": "nan",
            "4095": "nan",
            "4100": 1.24661,
            "4105": "nan",
            "4110": "nan",
            "4115": "nan",
            "4120": "nan",
            "4125": "nan",
            "4130": "nan",
            "4135": "nan",
            "4140": "nan",
            "4145": "nan",
            "4150": "nan",
            "4155": "nan",
            "4160": "nan",
            "4165": "nan",
            "4170": "nan",
            "4175": "nan",
            "4180": "nan",
            "4185": "nan",
            "4190": "nan",
            "4195": "nan",
            "4200": 1.49468,
            "4205": "nan",
            "4210": "nan",
            "4215": "nan",
            "4220": "nan",
            "4225": "nan",
            "4230": "nan",
            "4235": "nan",
            "4240": "nan",
            "4245": "nan",
            "4250": "nan",
            "4255": "nan",
            "4260": "nan",
            "4265": "nan",
            "4270": "nan",
            "4275": "nan",
            "4280": "nan",
            "4285": "nan",
            "4290": "nan",
            "4295": "nan",
            "4300": 1.16555,
            "4305": "nan",
            "4310": "nan",
            "4315": "nan",
            "4320": "nan",
            "4325": "nan",
            "4330": "nan",
            "4335": "nan",
            "4340": "nan",
            "4345": "nan",
            "4350": "nan",
            "4355": "nan",
            "4360": "nan",
            "4365": "nan",
            "4370": "nan",
            "4375": "nan",
            "4380": "nan",
            "4385": "nan",
            "4390": "nan",
            "4395": "nan",
            "4400": 1.47217,
            "4405": "nan",
            "4410": "nan",
            "4415": "nan",
            "4420": "nan",
            "4425": "nan",
            "4430": "nan",
            "4435": "nan",
            "4440": "nan",
            "4445": "nan",
            "4450": "nan",
            "4455": "nan",
            "4460": "nan",
            "4465": "nan",
            "4470": "nan",
            "4475": "nan",
            "4480": "nan",
            "4485": "nan",
            "4490": "nan",
            "4495": "nan",
            "4500": 1.16663,
            "4505": "nan",
            "4510": "nan",
            "4515": "nan",
            "4520": "nan",
            "4525": "nan",
            "4530": "nan",
            "4535": "nan",
            "4540": "nan",
            "4545": "nan",
            "4550": "nan",
            "4555": "nan",
            "4560": "nan",
            "4565": "nan",
            "4570": "nan",
            "4575": "nan",
            "4580": "nan",
            "4585": "nan",
            "4590": "nan",
            "4595": "nan",
            "4600": 1.43693,
            "4605": "nan",
            "4610": "nan",
            "4615": "nan",
            "4620": "nan",
            "4625": "nan",
            "4630": "nan",
            "4635": "nan",
            "4640": "nan",
            "4645": "nan",
            "4650": "nan",
            "4655": "nan",
            "4660": "nan",
            "4665": "nan",
            "4670": "nan",
            "4675": "nan",
            "4680": "nan",
            "4685": "nan",
            "4690": "nan",
            "4695": "nan",
            "4700": 1.16482,
            "4705": "nan",
            "4710": "nan",
            "4715": "nan",
            "4720": "nan",
            "4725": "nan",
            "4730": "nan",
            "4735": "nan",
            "4740": "nan",
            "4745": "nan",
            "4750": "nan",
            "4755": "nan",
            "4760": "nan",
            "4765": "nan",
            "4770": "nan",
            "4775": "nan",
            "4780": "nan",
            "4785": "nan",
            "4790": "nan",
            "4795": "nan",
            "4800": 1.48482,
            "4805": "nan",
            "4810": "nan",
            "4815": "nan",
            "4820": "nan",
            "4825": "nan",
            "4830": "nan",
            "4835": "nan",
            "4840": "nan",
            "4845": "nan",
            "4850": "nan",
            "4855": "nan",
            "4860": "nan",
            "4865": "nan",
            "4870": "nan",
            "4875": "nan",
            "4880": "nan",
            "4885": "nan",
            "4890": "nan",
            "4895": "nan",
            "4900": 1.16433,
            "4905": "nan",
            "4910": "nan",
            "4915": "nan",
            "4920": "nan",
            "4925": "nan",
            "4930": "nan",
            "4935": "nan",
            "4940": "nan",
            "4945": "nan",
            "4950": "nan",
            "4955": "nan",
            "4960": "nan",
            "4965": "nan",
            "4970": "nan",
            "4975": "nan",
            "4980": "nan",
            "4985": "nan",
            "4990": "nan",
            "4995": "nan",
            "5000": 1.46317,
            "5005": "nan",
            "5010": "nan",
            "5015": "nan",
            "5020": "nan",
            "5025": "nan",
            "5030": "nan",
            "5035": "nan",
            "5040": "nan",
            "5045": "nan",
            "5050": "nan",
            "5055": "nan",
            "5060": "nan",
            "5065": "nan",
            "5070": "nan",
            "5075": "nan",
            "5080": "nan",
            "5085": "nan",
            "5090": "nan",
            "5095": "nan",
            "5100": 1.25189,
            "5105": "nan",
            "5110": "nan",
            "5115": "nan",
            "5120": "nan",
            "5125": "nan",
            "5130": "nan",
            "5135": "nan",
            "5140": "nan",
            "5145": "nan",
            "5150": "nan",
            "5155": "nan",
            "5160": "nan",
            "5165": "nan",
            "5170": "nan",
            "5175": "nan",
            "5180": "nan",
            "5185": "nan",
            "5190": "nan",
            "5195": "nan",
            "5200": 1.16439,
            "5205": "nan",
            "5210": "nan",
            "5215": "nan",
            "5220": "nan",
            "5225": "nan",
            "5230": "nan",
            "5235": "nan",
            "5240": "nan",
            "5245": "nan",
            "5250": "nan",
            "5255": "nan",
            "5260": "nan",
            "5265": "nan",
            "5270": "nan",
            "5275": "nan",
            "5280": "nan",
            "5285": "nan",
            "5290": "nan",
            "5295": "nan",
            "5300": 1.50195,
            "5305": "nan",
            "5310": "nan",
            "5315": "nan",
            "5320": "nan",
            "5325": "nan",
            "5330": "nan",
            "5335": "nan",
            "5340": "nan",
            "5345": "nan",
            "5350": "nan",
            "5355": "nan",
            "5360": "nan",
            "5365": "nan",
            "5370": "nan",
            "5375": "nan",
            "5380": "nan",
            "5385": "nan",
            "5390": "nan",
            "5395": "nan",
            "5400": 1.16391,
            "5405": "nan",
            "5410": "nan",
            "5415": "nan",
            "5420": "nan",
            "5425": "nan",
            "5430": "nan",
            "5435": "nan",
            "5440": "nan",
            "5445": "nan",
            "5450": "nan",
            "5455": "nan",
            "5460": "nan",
            "5465": "nan",
            "5470": "nan",
            "5475": "nan",
            "5480": "nan",
            "5485": "nan",
            "5490": "nan",
            "5495": "nan",
            "5500": 1.49666,
            "5505": "nan",
            "5510": "nan",
            "5515": "nan",
            "5520": "nan",
            "5525": "nan",
            "5530": "nan",
            "5535": "nan",
            "5540": "nan",
            "5545": "nan",
            "5550": "nan",
            "5555": "nan",
            "5560": "nan",
            "5565": "nan",
            "5570": "nan",
            "5575": "nan",
            "5580": "nan",
            "5585": "nan",
            "5590": "nan",
            "5595": "nan",
            "5600": 1.16446,
            "5605": "nan",
            "5610": "nan",
            "5615": "nan",
            "5620": "nan",
            "5625": "nan",
            "5630": "nan",
            "5635": "nan",
            "5640": "nan",
            "5645": "nan",
            "5650": "nan",
            "5655": "nan",
            "5660": "nan",
            "5665": "nan",
            "5670": "nan",
            "5675": "nan",
            "5680": "nan",
            "5685": "nan",
            "5690": "nan",
            "5695": "nan",
            "5700": 1.42544,
            "5705": "nan",
            "5710": "nan",
            "5715": "nan",
            "5720": "nan",
            "5725": "nan",
            "5730": "nan",
            "5735": "nan",
            "5740": "nan",
            "5745": "nan",
            "5750": "nan",
            "5755": "nan",
            "5760": "nan",
            "5765": "nan",
            "5770": "nan",
            "5775": "nan",
            "5780": "nan",
            "5785": "nan",
            "5790": "nan",
            "5795": "nan",
            "5800": 1.18903,
            "5805": "nan",
            "5810": "nan",
            "5815": "nan",
            "5820": "nan",
            "5825": "nan",
            "5830": "nan",
            "5835": "nan",
            "5840": "nan",
            "5845": "nan",
            "5850": "nan",
            "5855": "nan",
            "5860": "nan",
            "5865": "nan",
            "5870": "nan",
            "5875": "nan",
            "5880": "nan",
            "5885": "nan",
            "5890": "nan",
            "5895": "nan",
            "5900": 1.16376,
            "5905": "nan",
            "5910": "nan",
            "5915": "nan",
            "5920": "nan",
            "5925": "nan",
            "5930": "nan",
            "5935": "nan",
            "5940": "nan",
            "5945": "nan",
            "5950": "nan",
            "5955": "nan",
            "5960": "nan",
            "5965": "nan",
            "5970": "nan",
            "5975": "nan",
            "5980": "nan",
            "5985": "nan",
            "5990": "nan",
            "5995": "nan",
            "6000": 1.48894,
            "6005": "nan",
            "6010": "nan",
            "6015": "nan",
            "6020": "nan",
            "6025": "nan",
            "6030": "nan",
            "6035": "nan",
            "6040": "nan",
            "6045": "nan",
            "6050": "nan",
            "6055": "nan",
            "6060": "nan",
            "6065": "nan",
            "6070": "nan",
            "6075": "nan",
            "6080": "nan",
            "6085": "nan",
            "6090": "nan",
            "6095": "nan",
            "6100": 1.56874,
            "6105": "nan",
            "6110": "nan",
            "6115": "nan",
            "6120": "nan",
            "6125": "nan",
            "6130": "nan",
            "6135": "nan",
            "6140": "nan",
            "6145": "nan",
            "6150": "nan",
            "6155": "nan",
            "6160": "nan",
            "6165": "nan",
            "6170": "nan",
            "6175": "nan",
            "6180": "nan",
            "6185": "nan",
            "6190": "nan",
            "6195": "nan",
            "6200": 1.16367,
            "6205": "nan",
            "6210": "nan",
            "6215": "nan",
            "6220": "nan",
            "6225": "nan",
            "6230": "nan",
            "6235": "nan",
            "6240": "nan",
            "6245": "nan",
            "6250": "nan",
            "6255": "nan",
            "6260": "nan",
            "6265": "nan",
            "6270": "nan",
            "6275": "nan",
            "6280": "nan",
            "6285": "nan",
            "6290": "nan",
            "6295": "nan",
            "6300": 1.49027,
            "6305": "nan",
            "6310": "nan",
            "6315": "nan",
            "6320": "nan",
            "6325": "nan",
            "6330": "nan",
            "6335": "nan",
            "6340": "nan",
            "6345": "nan",
            "6350": "nan",
            "6355": "nan",
            "6360": "nan",
            "6365": "nan",
            "6370": "nan",
            "6375": "nan",
            "6380": "nan",
            "6385": "nan",
            "6390": "nan",
            "6395": "nan",
            "6400": 1.16275,
            "6405": "nan",
            "6410": "nan",
            "6415": "nan",
            "6420": "nan",
            "6425": "nan",
            "6430": "nan",
            "6435": "nan",
            "6440": "nan",
            "6445": "nan",
            "6450": "nan",
            "6455": "nan",
            "6460": "nan",
            "6465": "nan",
            "6470": "nan",
            "6475": "nan",
            "6480": "nan",
            "6485": "nan",
            "6490": "nan",
            "6495": "nan",
            "6500": 1.78937,
            "6505": "nan",
            "6510": "nan",
            "6515": "nan",
            "6520": "nan",
            "6525": "nan",
            "6530": "nan",
            "6535": "nan",
            "6540": "nan",
            "6545": "nan",
            "6550": "nan",
            "6555": "nan",
            "6560": "nan",
            "6565": "nan",
            "6570": "nan",
            "6575": "nan",
            "6580": "nan",
            "6585": "nan",
            "6590": "nan",
            "6595": "nan",
            "6600": 1.16414,
            "6605": "nan",
            "6610": "nan",
            "6615": "nan",
            "6620": "nan",
            "6625": "nan",
            "6630": "nan",
            "6635": "nan",
            "6640": "nan",
            "6645": "nan",
            "6650": "nan",
            "6655": "nan",
            "6660": "nan",
            "6665": "nan",
            "6670": "nan",
            "6675": "nan",
            "6680": "nan",
            "6685": "nan",
            "6690": "nan",
            "6695": "nan",
            "6700": 1.16369,
            "6705": "nan",
            "6710": "nan",
            "6715": "nan",
            "6720": "nan",
            "6725": "nan",
            "6730": "nan",
            "6735": "nan",
            "6740": "nan",
            "6745": "nan",
            "6750": "nan",
            "6755": "nan",
            "6760": "nan",
            "6765": "nan",
            "6770": "nan",
            "6775": "nan",
            "6780": "nan",
            "6785": "nan",
            "6790": "nan",
            "6795": "nan",
            "6800": 1.87916,
            "6805": "nan",
            "6810": "nan",
            "6815": "nan",
            "6820": "nan",
            "6825": "nan",
            "6830": "nan",
            "6835": "nan",
            "6840": "nan",
            "6845": "nan",
            "6850": "nan",
            "6855": "nan",
            "6860": "nan",
            "6865": "nan",
            "6870": "nan",
            "6875": "nan",
            "6880": "nan",
            "6885": "nan",
            "6890": "nan",
            "6895": "nan",
            "6900": 1.74932,
            "6905": "nan",
            "6910": "nan",
            "6915": "nan",
            "6920": "nan",
            "6925": "nan",
            "6930": "nan",
            "6935": "nan",
            "6940": "nan",
            "6945": "nan",
            "6950": "nan",
            "6955": "nan",
            "6960": "nan",
            "6965": "nan",
            "6970": "nan",
            "6975": "nan",
            "6980": "nan",
            "6985": "nan",
            "6990": "nan",
            "6995": "nan",
            "7000": 1.26768,
            "7005": "nan",
            "7010": "nan",
            "7015": "nan",
            "7020": "nan",
            "7025": "nan",
            "7030": "nan",
            "7035": "nan",
            "7040": "nan",
            "7045": "nan",
            "7050": "nan",
            "7055": "nan",
            "7060": "nan",
            "7065": "nan",
            "7070": "nan",
            "7075": "nan",
            "7080": "nan",
            "7085": "nan",
            "7090": "nan",
            "7095": "nan",
            "7100": 1.56361,
            "7105": "nan",
            "7110": "nan",
            "7115": "nan",
            "7120": "nan",
            "7125": "nan",
            "7130": "nan",
            "7135": "nan",
            "7140": "nan",
            "7145": "nan",
            "7150": "nan",
            "7155": "nan",
            "7160": "nan",
            "7165": "nan",
            "7170": "nan",
            "7175": "nan",
            "7180": "nan",
            "7185": "nan",
            "7190": "nan",
            "7195": "nan",
            "7200": 1.46637,
            "7205": "nan",
            "7210": "nan",
            "7215": "nan",
            "7220": "nan",
            "7225": "nan",
            "7230": "nan",
            "7235": "nan",
            "7240": "nan",
            "7245": "nan",
            "7250": "nan",
            "7255": "nan",
            "7260": "nan",
            "7265": "nan",
            "7270": "nan",
            "7275": "nan",
            "7280": "nan",
            "7285": "nan",
            "7290": "nan",
            "7295": "nan",
            "7300": 1.47082,
            "7305": "nan",
            "7310": "nan",
            "7315": "nan",
            "7320": "nan",
            "7325": "nan",
            "7330": "nan",
            "7335": "nan",
            "7340": "nan",
            "7345": "nan",
            "7350": "nan",
            "7355": "nan",
            "7360": "nan",
            "7365": "nan",
            "7370": "nan",
            "7375": "nan",
            "7380": "nan",
            "7385": "nan",
            "7390": "nan",
            "7395": "nan",
            "7400": 1.46294,
            "7405": "nan",
            "7410": "nan",
            "7415": "nan",
            "7420": "nan",
            "7425": "nan",
            "7430": "nan",
            "7435": "nan",
            "7440": "nan",
            "7445": "nan",
            "7450": "nan",
            "7455": "nan",
            "7460": "nan",
            "7465": "nan",
            "7470": "nan",
            "7475": "nan",
            "7480": "nan",
            "7485": "nan",
            "7490": "nan",
            "7495": "nan",
            "7500": 1.48069,
            "7505": "nan",
            "7510": "nan",
            "7515": "nan",
            "7520": "nan",
            "7525": "nan",
            "7530": "nan",
            "7535": "nan",
            "7540": "nan",
            "7545": "nan",
            "7550": "nan",
            "7555": "nan",
            "7560": "nan",
            "7565": "nan",
            "7570": "nan",
            "7575": "nan",
            "7580": "nan",
            "7585": "nan",
            "7590": "nan",
            "7595": "nan",
            "7600": 1.44277,
            "7605": "nan",
            "7610": "nan",
            "7615": "nan",
            "7620": "nan",
            "7625": "nan",
            "7630": "nan",
            "7635": "nan",
            "7640": "nan",
            "7645": "nan",
            "7650": "nan",
            "7655": "nan",
            "7660": "nan",
            "7665": "nan",
            "7670": "nan",
            "7675": "nan",
            "7680": "nan",
            "7685": "nan",
            "7690": "nan",
            "7695": "nan",
            "7700": 1.44688,
            "7705": "nan",
            "7710": "nan",
            "7715": "nan",
            "7720": "nan",
            "7725": "nan",
            "7730": "nan",
            "7735": "nan",
            "7740": "nan",
            "7745": "nan",
            "7750": "nan",
            "7755": "nan",
            "7760": "nan",
            "7765": "nan",
            "7770": "nan",
            "7775": "nan",
            "7780": "nan",
            "7785": "nan",
            "7790": "nan",
            "7795": "nan",
            "7800": 1.46189,
            "7805": "nan",
            "7810": "nan",
            "7815": "nan",
            "7820": "nan",
            "7825": "nan",
            "7830": "nan",
            "7835": "nan",
            "7840": "nan",
            "7845": "nan",
            "7850": "nan",
            "7855": "nan",
            "7860": "nan",
            "7865": "nan",
            "7870": "nan",
            "7875": "nan",
            "7880": "nan",
            "7885": "nan",
            "7890": "nan",
            "7895": "nan",
            "7900": 1.48119,
            "7905": "nan",
            "7910": "nan",
            "7915": "nan",
            "7920": "nan",
            "7925": "nan",
            "7930": "nan",
            "7935": "nan",
            "7940": "nan",
            "7945": "nan",
            "7950": "nan",
            "7955": "nan",
            "7960": "nan",
            "7965": "nan",
            "7970": "nan",
            "7975": "nan",
            "7980": "nan",
            "7985": "nan",
            "7990": "nan",
            "7995": "nan",
            "8000": 1.16319,
            "8005": "nan",
            "8010": "nan",
            "8015": "nan",
            "8020": "nan",
            "8025": "nan",
            "8030": "nan",
            "8035": "nan",
            "8040": "nan",
            "8045": "nan",
            "8050": "nan",
            "8055": "nan",
            "8060": "nan",
            "8065": "nan",
            "8070": "nan",
            "8075": "nan",
            "8080": "nan",
            "8085": "nan",
            "8090": "nan",
            "8095": "nan",
            "8100": 1.56123,
            "8105": "nan",
            "8110": "nan",
            "8115": "nan",
            "8120": "nan",
            "8125": "nan",
            "8130": "nan",
            "8135": "nan",
            "8140": "nan",
            "8145": "nan",
            "8150": "nan",
            "8155": "nan",
            "8160": "nan",
            "8165": "nan",
            "8170": "nan",
            "8175": "nan",
            "8180": "nan",
            "8185": "nan",
            "8190": "nan",
            "8195": "nan",
            "8200": 1.47947,
            "8205": "nan",
            "8210": "nan",
            "8215": "nan",
            "8220": "nan",
            "8225": "nan",
            "8230": "nan",
            "8235": "nan",
            "8240": "nan",
            "8245": "nan",
            "8250": "nan",
            "8255": "nan",
            "8260": "nan",
            "8265": "nan",
            "8270": "nan",
            "8275": "nan",
            "8280": "nan",
            "8285": "nan",
            "8290": "nan",
            "8295": "nan",
            "8300": 1.16329,
            "8305": "nan",
            "8310": "nan",
            "8315": "nan",
            "8320": "nan",
            "8325": "nan",
            "8330": "nan",
            "8335": "nan",
            "8340": "nan",
            "8345": "nan",
            "8350": "nan",
            "8355": "nan",
            "8360": "nan",
            "8365": "nan",
            "8370": "nan",
            "8375": "nan",
            "8380": "nan",
            "8385": "nan",
            "8390": "nan",
            "8395": "nan",
            "8400": 1.76645,
            "8405": "nan",
            "8410": "nan",
            "8415": "nan",
            "8420": "nan",
            "8425": "nan",
            "8430": "nan",
            "8435": "nan",
            "8440": "nan",
            "8445": "nan",
            "8450": "nan",
            "8455": "nan",
            "8460": "nan",
            "8465": "nan",
            "8470": "nan",
            "8475": "nan",
            "8480": "nan",
            "8485": "nan",
            "8490": "nan",
            "8495": "nan",
            "8500": 1.24745,
            "8505": "nan",
            "8510": "nan",
            "8515": "nan",
            "8520": "nan",
            "8525": "nan",
            "8530": "nan",
            "8535": "nan",
            "8540": "nan",
            "8545": "nan",
            "8550": "nan",
            "8555": "nan",
            "8560": "nan",
            "8565": "nan",
            "8570": "nan",
            "8575": "nan",
            "8580": "nan",
            "8585": "nan",
            "8590": "nan",
            "8595": "nan",
            "8600": 1.50472,
            "8605": "nan",
            "8610": "nan",
            "8615": "nan",
            "8620": "nan",
            "8625": "nan",
            "8630": "nan",
            "8635": "nan",
            "8640": "nan",
            "8645": "nan",
            "8650": "nan",
            "8655": "nan",
            "8660": "nan",
            "8665": "nan",
            "8670": "nan",
            "8675": "nan",
            "8680": "nan",
            "8685": "nan",
            "8690": "nan",
            "8695": "nan",
            "8700": 1.2351,
            "8705": "nan",
            "8710": "nan",
            "8715": "nan",
            "8720": "nan",
            "8725": "nan",
            "8730": "nan",
            "8735": "nan",
            "8740": "nan",
            "8745": "nan",
            "8750": "nan",
            "8755": "nan",
            "8760": "nan",
            "8765": "nan",
            "8770": "nan",
            "8775": "nan",
            "8780": "nan",
            "8785": "nan",
            "8790": "nan",
            "8795": "nan",
            "8800": 1.23483,
            "8805": "nan",
            "8810": "nan",
            "8815": "nan",
            "8820": "nan",
            "8825": "nan",
            "8830": "nan",
            "8835": "nan",
            "8840": "nan",
            "8845": "nan",
            "8850": "nan",
            "8855": "nan",
            "8860": "nan",
            "8865": "nan",
            "8870": "nan",
            "8875": "nan",
            "8880": "nan",
            "8885": "nan",
            "8890": "nan",
            "8895": "nan",
            "8900": 1.44634,
            "8905": "nan",
            "8910": "nan",
            "8915": "nan",
            "8920": "nan",
            "8925": "nan",
            "8930": "nan",
            "8935": "nan",
            "8940": "nan",
            "8945": "nan",
            "8950": "nan",
            "8955": "nan",
            "8960": "nan",
            "8965": "nan",
            "8970": "nan",
            "8975": "nan",
            "8980": "nan",
            "8985": "nan",
            "8990": "nan",
            "8995": "nan",
            "9000": 1.28102,
            "9005": "nan",
            "9010": "nan",
            "9015": "nan",
            "9020": "nan",
            "9025": "nan",
            "9030": "nan",
            "9035": "nan",
            "9040": "nan",
            "9045": "nan",
            "9050": "nan",
            "9055": "nan",
            "9060": "nan",
            "9065": "nan",
            "9070": "nan",
            "9075": "nan",
            "9080": "nan",
            "9085": "nan",
            "9090": "nan",
            "9095": "nan",
            "9100": 1.9791,
            "9105": "nan",
            "9110": "nan",
            "9115": "nan",
            "9120": "nan",
            "9125": "nan",
            "9130": "nan",
            "9135": "nan",
            "9140": "nan",
            "9145": "nan",
            "9150": "nan",
            "9155": "nan",
            "9160": "nan",
            "9165": "nan",
            "9170": "nan",
            "9175": "nan",
            "9180": "nan",
            "9185": "nan",
            "9190": "nan",
            "9195": "nan",
            "9200": 1.23709,
            "9205": "nan",
            "9210": "nan",
            "9215": "nan",
            "9220": "nan",
            "9225": "nan",
            "9230": "nan",
            "9235": "nan",
            "9240": "nan",
            "9245": "nan",
            "9250": "nan",
            "9255": "nan",
            "9260": "nan",
            "9265": "nan",
            "9270": "nan",
            "9275": "nan",
            "9280": "nan",
            "9285": "nan",
            "9290": "nan",
            "9295": "nan",
            "9300": 1.23759,
            "9305": "nan",
            "9310": "nan",
            "9315": "nan",
            "9320": "nan",
            "9325": "nan",
            "9330": "nan",
            "9335": "nan",
            "9340": "nan",
            "9345": "nan",
            "9350": "nan",
            "9355": "nan",
            "9360": "nan",
            "9365": "nan",
            "9370": "nan",
            "9375": "nan",
            "9380": "nan",
            "9385": "nan",
            "9390": "nan",
            "9395": "nan",
            "9400": 1.99173,
            "9405": "nan",
            "9410": "nan",
            "9415": "nan",
            "9420": "nan",
            "9425": "nan",
            "9430": "nan",
            "9435": "nan",
            "9440": "nan",
            "9445": "nan",
            "9450": "nan",
            "9455": "nan",
            "9460": "nan",
            "9465": "nan",
            "9470": "nan",
            "9475": "nan",
            "9480": "nan",
            "9485": "nan",
            "9490": "nan",
            "9495": "nan",
            "9500": 1.53391,
            "9505": "nan",
            "9510": "nan",
            "9515": "nan",
            "9520": "nan",
            "9525": "nan",
            "9530": "nan",
            "9535": "nan",
            "9540": "nan",
            "9545": "nan",
            "9550": "nan",
            "9555": "nan",
            "9560": "nan",
            "9565": "nan",
            "9570": "nan",
            "9575": "nan",
            "9580": "nan",
            "9585": "nan",
            "9590": "nan",
            "9595": "nan",
            "9600": 1.59234,
            "9605": "nan",
            "9610": "nan",
            "9615": "nan",
            "9620": "nan",
            "9625": "nan",
            "9630": "nan",
            "9635": "nan",
            "9640": "nan",
            "9645": "nan",
            "9650": "nan",
            "9655": "nan",
            "9660": "nan",
            "9665": "nan",
            "9670": "nan",
            "9675": "nan",
            "9680": "nan",
            "9685": "nan",
            "9690": "nan",
            "9695": "nan",
            "9700": 1.23829,
            "9705": "nan",
            "9710": "nan",
            "9715": "nan",
            "9720": "nan",
            "9725": "nan",
            "9730": "nan",
            "9735": "nan",
            "9740": "nan",
            "9745": "nan",
            "9750": "nan",
            "9755": "nan",
            "9760": "nan",
            "9765": "nan",
            "9770": "nan",
            "9775": "nan",
            "9780": "nan",
            "9785": "nan",
            "9790": "nan",
            "9795": "nan",
            "9800": 1.94369,
            "9805": "nan",
            "9810": "nan",
            "9815": "nan",
            "9820": "nan",
            "9825": "nan",
            "9830": "nan",
            "9835": "nan",
            "9840": "nan",
            "9845": "nan",
            "9850": "nan",
            "9855": "nan",
            "9860": "nan",
            "9865": "nan",
            "9870": "nan",
            "9875": "nan",
            "9880": "nan",
            "9885": "nan",
            "9890": "nan",
            "9895": "nan",
            "9900": 1.54022,
            "9905": "nan",
            "9910": "nan",
            "9915": "nan",
            "9920": "nan",
            "9925": "nan",
            "9930": "nan",
            "9935": "nan",
            "9940": "nan",
            "9945": "nan",
            "9950": "nan",
            "9955": "nan",
            "9960": "nan",
            "9965": "nan",
            "9970": "nan",
            "9975": "nan",
            "9980": "nan",
            "9985": "nan",
            "9990": "nan",
            "9995": "nan",
            "10000": 1.61649,
            "10005": "nan",
            "10010": "nan",
            "10015": "nan",
            "10020": "nan",
            "10025": "nan",
            "10030": "nan",
            "10035": "nan",
            "10040": "nan",
            "10045": "nan",
            "10050": "nan",
            "10055": "nan",
            "10060": "nan",
            "10065": "nan",
            "10070": "nan",
            "10075": "nan",
            "10080": "nan",
            "10085": "nan",
            "10090": "nan",
            "10095": "nan",
            "10100": 1.85307,
            "10105": "nan",
            "10110": "nan",
            "10115": "nan",
            "10120": "nan",
            "10125": "nan",
            "10130": "nan",
            "10135": "nan",
            "10140": "nan",
            "10145": "nan",
            "10150": "nan",
            "10155": "nan",
            "10160": "nan",
            "10165": "nan",
            "10170": "nan",
            "10175": "nan",
            "10180": "nan",
            "10185": "nan",
            "10190": "nan",
            "10195": "nan",
            "10200": 1.3129,
            "10205": "nan",
            "10210": "nan",
            "10215": "nan",
            "10220": "nan",
            "10225": "nan",
            "10230": "nan",
            "10235": "nan",
            "10240": "nan",
            "10245": "nan",
            "10250": "nan",
            "10255": "nan",
            "10260": "nan",
            "10265": "nan",
            "10270": "nan",
            "10275": "nan",
            "10280": "nan",
            "10285": "nan",
            "10290": "nan",
            "10295": "nan",
            "10300": 1.57995,
            "10305": "nan",
            "10310": "nan",
            "10315": "nan",
            "10320": "nan",
            "10325": "nan",
            "10330": "nan",
            "10335": "nan",
            "10340": "nan",
            "10345": "nan",
            "10350": "nan",
            "10355": "nan",
            "10360": "nan",
            "10365": "nan",
            "10370": "nan",
            "10375": "nan",
            "10380": "nan",
            "10385": "nan",
            "10390": "nan",
            "10395": "nan",
            "10400": 1.55986,
            "10405": "nan",
            "10410": "nan",
            "10415": "nan",
            "10420": "nan",
            "10425": "nan",
            "10430": "nan",
            "10435": "nan",
            "10440": "nan",
            "10445": "nan",
            "10450": "nan",
            "10455": "nan",
            "10460": "nan",
            "10465": "nan",
            "10470": "nan",
            "10475": "nan",
            "10480": "nan",
            "10485": "nan",
            "10490": "nan",
            "10495": "nan",
            "10500": 1.54981,
            "10505": "nan",
            "10510": "nan",
            "10515": "nan",
            "10520": "nan",
            "10525": "nan",
            "10530": "nan",
            "10535": "nan",
            "10540": "nan",
            "10545": "nan",
            "10550": "nan",
            "10555": "nan",
            "10560": "nan",
            "10565": "nan",
            "10570": "nan",
            "10575": "nan",
            "10580": "nan",
            "10585": "nan",
            "10590": "nan",
            "10595": "nan",
            "10600": 1.61207,
            "10605": "nan",
            "10610": "nan",
            "10615": "nan",
            "10620": "nan",
            "10625": "nan",
            "10630": "nan",
            "10635": "nan",
            "10640": "nan",
            "10645": "nan",
            "10650": "nan",
            "10655": "nan",
            "10660": "nan",
            "10665": "nan",
            "10670": "nan",
            "10675": "nan",
            "10680": "nan",
            "10685": "nan",
            "10690": "nan",
            "10695": "nan",
            "10700": 1.56274,
            "10705": "nan",
            "10710": "nan",
            "10715": "nan",
            "10720": "nan",
            "10725": "nan",
            "10730": "nan",
            "10735": "nan",
            "10740": "nan",
            "10745": "nan",
            "10750": "nan",
            "10755": "nan",
            "10760": "nan",
            "10765": "nan",
            "10770": "nan",
            "10775": "nan",
            "10780": "nan",
            "10785": "nan",
            "10790": "nan",
            "10795": "nan",
            "10800": 1.60491,
            "10805": "nan",
            "10810": "nan",
            "10815": "nan",
            "10820": "nan",
            "10825": "nan",
            "10830": "nan",
            "10835": "nan",
            "10840": "nan",
            "10845": "nan",
            "10850": "nan",
            "10855": "nan",
            "10860": "nan",
            "10865": "nan",
            "10870": "nan",
            "10875": "nan",
            "10880": "nan",
            "10885": "nan",
            "10890": "nan",
            "10895": "nan",
            "10900": 1.54037,
            "10905": "nan",
            "10910": "nan",
            "10915": "nan",
            "10920": "nan",
            "10925": "nan",
            "10930": "nan",
            "10935": "nan",
            "10940": "nan",
            "10945": "nan",
            "10950": "nan",
            "10955": "nan",
            "10960": "nan",
            "10965": "nan",
            "10970": "nan",
            "10975": "nan",
            "10980": "nan",
            "10985": "nan",
            "10990": "nan",
            "10995": "nan",
            "11000": 1.53164,
            "11005": "nan",
            "11010": "nan",
            "11015": "nan",
            "11020": "nan",
            "11025": "nan",
            "11030": "nan",
            "11035": "nan",
            "11040": "nan",
            "11045": "nan",
            "11050": "nan",
            "11055": "nan",
            "11060": "nan",
            "11065": "nan",
            "11070": "nan",
            "11075": "nan",
            "11080": "nan",
            "11085": "nan",
            "11090": "nan",
            "11095": "nan",
            "11100": 1.6827,
            "11105": "nan",
            "11110": "nan",
            "11115": "nan",
            "11120": "nan",
            "11125": "nan",
            "11130": "nan",
            "11135": "nan",
            "11140": "nan",
            "11145": "nan",
            "11150": "nan",
            "11155": "nan",
            "11160": "nan",
            "11165": "nan",
            "11170": "nan",
            "11175": "nan",
            "11180": "nan",
            "11185": "nan",
            "11190": "nan",
            "11195": "nan",
            "11200": 1.24755,
            "11205": "nan",
            "11210": "nan",
            "11215": "nan",
            "11220": "nan",
            "11225": "nan",
            "11230": "nan",
            "11235": "nan",
            "11240": "nan",
            "11245": "nan",
            "11250": "nan",
            "11255": "nan",
            "11260": "nan",
            "11265": "nan",
            "11270": "nan",
            "11275": "nan",
            "11280": "nan",
            "11285": "nan",
            "11290": "nan",
            "11295": "nan",
            "11300": 1.58769,
            "11305": "nan",
            "11310": "nan",
            "11315": "nan",
            "11320": "nan",
            "11325": "nan",
            "11330": "nan",
            "11335": "nan",
            "11340": "nan",
            "11345": "nan",
            "11350": "nan",
            "11355": "nan",
            "11360": "nan",
            "11365": "nan",
            "11370": "nan",
            "11375": "nan",
            "11380": "nan",
            "11385": "nan",
            "11390": "nan",
            "11395": "nan",
            "11400": 1.24695,
            "11405": "nan",
            "11410": "nan",
            "11415": "nan",
            "11420": "nan",
            "11425": "nan",
            "11430": "nan",
            "11435": "nan",
            "11440": "nan",
            "11445": "nan",
            "11450": "nan",
            "11455": "nan",
            "11460": "nan",
            "11465": "nan",
            "11470": "nan",
            "11475": "nan",
            "11480": "nan",
            "11485": "nan",
            "11490": "nan",
            "11495": "nan",
            "11500": 1.26924,
            "11505": "nan",
            "11510": "nan",
            "11515": "nan",
            "11520": "nan",
            "11525": "nan",
            "11530": "nan",
            "11535": "nan",
            "11540": "nan",
            "11545": "nan",
            "11550": "nan",
            "11555": "nan",
            "11560": "nan",
            "11565": "nan",
            "11570": "nan",
            "11575": "nan",
            "11580": "nan",
            "11585": "nan",
            "11590": "nan",
            "11595": "nan",
            "11600": 1.58092,
            "11605": "nan",
            "11610": "nan",
            "11615": "nan",
            "11620": "nan",
            "11625": "nan",
            "11630": "nan",
            "11635": "nan",
            "11640": "nan",
            "11645": "nan",
            "11650": "nan",
            "11655": "nan",
            "11660": "nan",
            "11665": "nan",
            "11670": "nan",
            "11675": "nan",
            "11680": "nan",
            "11685": "nan",
            "11690": "nan",
            "11695": "nan",
            "11700": 1.2414,
            "11705": "nan",
            "11710": "nan",
            "11715": "nan",
            "11720": "nan",
            "11725": "nan",
            "11730": "nan",
            "11735": "nan",
            "11740": "nan",
            "11745": "nan",
            "11750": "nan",
            "11755": "nan",
            "11760": "nan",
            "11765": "nan",
            "11770": "nan",
            "11775": "nan",
            "11780": "nan",
            "11785": "nan",
            "11790": "nan",
            "11795": "nan",
            "11800": 1.55653,
            "11805": "nan",
            "11810": "nan",
            "11815": "nan",
            "11820": "nan",
            "11825": "nan",
            "11830": "nan",
            "11835": "nan",
            "11840": "nan",
            "11845": "nan",
            "11850": "nan",
            "11855": "nan",
            "11860": "nan",
            "11865": "nan",
            "11870": "nan",
            "11875": "nan",
            "11880": "nan",
            "11885": "nan",
            "11890": "nan",
            "11895": "nan",
            "11900": 1.24347,
            "11905": "nan",
            "11910": "nan",
            "11915": "nan",
            "11920": "nan",
            "11925": "nan",
            "11930": "nan",
            "11935": "nan",
            "11940": "nan",
            "11945": "nan",
            "11950": "nan",
            "11955": "nan",
            "11960": "nan",
            "11965": "nan",
            "11970": "nan",
            "11975": "nan",
            "11980": "nan",
            "11985": "nan",
            "11990": "nan",
            "11995": "nan",
            "12000": 1.26258,
            "12005": "nan",
            "12010": "nan",
            "12015": "nan",
            "12020": "nan",
            "12025": "nan",
            "12030": "nan",
            "12035": "nan",
            "12040": "nan",
            "12045": "nan",
            "12050": "nan",
            "12055": "nan",
            "12060": "nan",
            "12065": "nan",
            "12070": "nan",
            "12075": "nan",
            "12080": "nan",
            "12085": "nan",
            "12090": "nan",
            "12095": "nan",
            "12100": 1.31137,
            "12105": "nan",
            "12110": "nan",
            "12115": "nan",
            "12120": "nan",
            "12125": "nan",
            "12130": "nan",
            "12135": "nan",
            "12140": "nan",
            "12145": "nan",
            "12150": "nan",
            "12155": "nan",
            "12160": "nan",
            "12165": "nan",
            "12170": "nan",
            "12175": "nan",
            "12180": "nan",
            "12185": "nan",
            "12190": "nan",
            "12195": "nan",
            "12200": 1.2351,
            "12205": "nan",
            "12210": "nan",
            "12215": "nan",
            "12220": "nan",
            "12225": "nan",
            "12230": "nan",
            "12235": "nan",
            "12240": "nan",
            "12245": "nan",
            "12250": "nan",
            "12255": "nan",
            "12260": "nan",
            "12265": "nan",
            "12270": "nan",
            "12275": "nan",
            "12280": "nan",
            "12285": "nan",
            "12290": "nan",
            "12295": "nan",
            "12300": 1.23767,
            "12305": "nan",
            "12310": "nan",
            "12315": "nan",
            "12320": "nan",
            "12325": "nan",
            "12330": "nan",
            "12335": "nan",
            "12340": "nan",
            "12345": "nan",
            "12350": "nan",
            "12355": "nan",
            "12360": "nan",
            "12365": "nan",
            "12370": "nan",
            "12375": "nan",
            "12380": "nan",
            "12385": "nan",
            "12390": "nan",
            "12395": "nan",
            "12400": 1.50314,
            "12405": "nan",
            "12410": "nan",
            "12415": "nan",
            "12420": "nan",
            "12425": "nan",
            "12430": "nan",
            "12435": "nan",
            "12440": "nan",
            "12445": "nan",
            "12450": "nan",
            "12455": "nan",
            "12460": "nan",
            "12465": "nan",
            "12470": "nan",
            "12475": "nan",
            "12480": "nan",
            "12485": "nan",
            "12490": "nan",
            "12495": "nan",
            "12500": 1.44228,
            "12505": "nan",
            "12510": "nan",
            "12515": "nan",
            "12520": "nan",
            "12525": "nan",
            "12530": "nan",
            "12535": "nan",
            "12540": "nan",
            "12545": "nan",
            "12550": "nan",
            "12555": "nan",
            "12560": "nan",
            "12565": "nan",
            "12570": "nan",
            "12575": "nan",
            "12580": "nan",
            "12585": "nan",
            "12590": "nan",
            "12595": "nan",
            "12600": 1.14538,
            "12605": "nan",
            "12610": "nan",
            "12615": "nan",
            "12620": "nan",
            "12625": "nan",
            "12630": "nan",
            "12635": "nan",
            "12640": "nan",
            "12645": "nan",
            "12650": "nan",
            "12655": "nan",
            "12660": "nan",
            "12665": "nan",
            "12670": "nan",
            "12675": "nan",
            "12680": "nan",
            "12685": "nan",
            "12690": "nan",
            "12695": "nan",
            "12700": 1.47378,
            "12705": "nan",
            "12710": "nan",
            "12715": "nan",
            "12720": "nan",
            "12725": "nan",
            "12730": "nan",
            "12735": "nan",
            "12740": "nan",
            "12745": "nan",
            "12750": "nan",
            "12755": "nan",
            "12760": "nan",
            "12765": "nan",
            "12770": "nan",
            "12775": "nan",
            "12780": "nan",
            "12785": "nan",
            "12790": "nan",
            "12795": "nan",
            "12800": 1.14584,
            "12805": "nan",
            "12810": "nan",
            "12815": "nan",
            "12820": "nan",
            "12825": "nan",
            "12830": "nan",
            "12835": "nan",
            "12840": "nan",
            "12845": "nan",
            "12850": "nan",
            "12855": "nan",
            "12860": "nan",
            "12865": "nan",
            "12870": "nan",
            "12875": "nan",
            "12880": "nan",
            "12885": "nan",
            "12890": "nan",
            "12895": "nan",
            "12900": 1.16693,
            "12905": "nan",
            "12910": "nan",
            "12915": "nan",
            "12920": "nan",
            "12925": "nan",
            "12930": "nan",
            "12935": "nan",
            "12940": "nan",
            "12945": "nan",
            "12950": "nan",
            "12955": "nan",
            "12960": "nan",
            "12965": "nan",
            "12970": "nan",
            "12975": "nan",
            "12980": "nan",
            "12985": "nan",
            "12990": "nan",
            "12995": "nan",
            "13000": 1.47183
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm_gb200/golden_values_dev_dgx_h100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 12.98419, "5": 12.93858, "10": 12.06404, "15": 11.97882, "20": 10.53588, "25": 10.11952, "30": 9.7286, "35": 9.44173, "40": 9.2373, "45": 9.03763, "50": 8.85277, "55": 8.64259, "60": 8.60098, "65": 8.50179, "70": 8.41326, "75": 8.31346, "80": 8.16921, "85": 8.09253, "90": 7.97894, "95": 7.91859, "100": 7.82704, "105": 7.71191, "110": 7.62418, "115": 7.52685, "120": 7.48107, "125": 7.48004, "130": 7.33364, "135": 7.26758, "140": 7.23146, "145": 7.04647, "150": 7.17621, "155": 7.00383, "160": 6.89968, "165": 6.91293, "170": 6.84228, "175": 6.85916, "180": 6.81429, "185": 6.7203, "190": 6.66124, "195": 6.59364, "200": 6.64046, "205": 6.64305, "210": 6.5179, "215": 6.51519, "220": 6.51027, "225": 6.46653, "230": 6.47574, "235": 6.42409, "240": 6.36976, "245": 6.3778, "250": 6.29868, "255": 6.43438, "260": 6.34377, "265": 6.28803, "270": 6.23364, "275": 6.26123, "280": 6.19076, "285": 6.19886, "290": 6.15022, "295": 6.12619, "300": 6.11141, "305": 6.01886, "310": 6.08556, "315": 6.07169, "320": 5.99243, "325": 5.93189, "330": 5.99792, "335": 6.0145, "340": 5.93453, "345": 5.92339, "350": 5.87179, "355": 5.84258, "360": 5.85866, "365": 5.81752, "370": 5.80407, "375": 5.80516, "380": 5.85848, "385": 5.78993, "390": 5.81141, "395": 5.68051, "400": 5.66121, "405": 5.68906, "410": 5.66202, "415": 5.70461, "420": 5.63851, "425": 5.66062, "430": 5.62802, "435": 5.56913, "440": 5.62147, "445": 5.52803, "450": 5.58428, "455": 5.5123, "460": 5.49325, "465": 5.56828, "470": 5.54845, "475": 5.49678, "480": 5.46247, "485": 5.49185, "490": 5.47566, "495": 5.47856, "500": 5.42533, "505": 5.38883, "510": 5.44319, "515": 5.42148, "520": 5.47608, "525": 5.31477, "530": 5.33216, "535": 5.36, "540": 5.33276, "545": 5.41314, "550": 5.37099, "555": 5.23374, "560": 5.32665, "565": 5.27809, "570": 5.25324, "575": 5.28184, "580": 5.23593, "585": 5.21762, "590": 5.22346, "595": 5.22561, "600": 5.26751, "605": 5.22896, "610": 5.2012, "615": 5.18737, "620": 5.19543, "625": 5.19655, "630": 5.14985, "635": 5.12452, "640": 5.09298, "645": 5.13279, "650": 5.14481, "655": 5.11963, "660": 5.0475, "665": 5.1142, "670": 5.04119, "675": 5.01723, "680": 5.05635, "685": 5.00678, "690": 5.01633, "695": 4.96228, "700": 4.97301, "705": 4.95571, "710": 4.97305, "715": 4.87719, "720": 4.85764, "725": 4.80769, "730": 4.84352, "735": 4.82916, "740": 4.8644, "745": 4.74895, "750": 4.75764, "755": 4.8023, "760": 4.78257, "765": 4.76428, "770": 4.69615, "775": 4.69212, "780": 4.684, "785": 4.7405, "790": 4.67498, "795": 4.64675, "800": 4.61184, "805": 4.61203, "810": 4.65393, "815": 4.60253, "820": 4.62914, "825": 4.58486, "830": 4.57946, "835": 4.56275, "840": 4.48603, "845": 4.50743, "850": 4.45704, "855": 4.51258, "860": 4.43583, "865": 4.52116, "870": 4.47717, "875": 4.38345, "880": 4.41849, "885": 4.38985, "890": 4.43389, "895": 4.42652, "900": 4.39808, "905": 4.32618, "910": 4.35391, "915": 4.34171, "920": 4.38377, "925": 4.38353, "930": 4.30961, "935": 4.29021, "940": 4.36235, "945": 4.31265, "950": 4.35051, "955": 4.28087, "960": 4.19218, "965": 4.27537, "970": 4.26236, "975": 4.24397, "980": 4.22146, "985": 4.17523, "990": 4.13237, "995": 4.18304, "1000": 4.2308, "1005": 4.18908, "1010": 4.17127, "1015": 4.13273, "1020": 4.15962, "1025": 4.22451, "1030": 4.1247, "1035": 4.10028, "1040": 4.13078, "1045": 4.11598, "1050": 4.15985, "1055": 4.0973, "1060": 4.1011, "1065": 4.06848, "1070": 4.05654, "1075": 4.07977, "1080": 4.07117, "1085": 4.06586, "1090": 4.02567, "1095": 4.09923, "1100": 4.06572, "1105": 4.071, "1110": 4.03046, "1115": 4.00398, "1120": 4.00356, "1125": 4.00318, "1130": 4.04694, "1135": 4.01101, "1140": 4.00143, "1145": 3.93633, "1150": 4.03948, "1155": 4.00619, "1160": 3.9851, "1165": 3.86882, "1170": 3.92684, "1175": 3.93038, "1180": 3.95878, "1185": 3.96184, "1190": 3.93034, "1195": 3.94125, "1200": 3.89932, "1205": 3.87266, "1210": 3.97831, "1215": 3.84899, "1220": 3.87263, "1225": 3.81413, "1230": 3.92846, "1235": 3.89312, "1240": 3.87537, "1245": 3.80079, "1250": 3.82885, "1255": 3.8493, "1260": 3.8874, "1265": 3.79336, "1270": 3.88167, "1275": 3.8455, "1280": 3.82143, "1285": 3.85735, "1290": 3.87159, "1295": 3.84912, "1300": 3.81711, "1305": 3.82495, "1310": 3.82551, "1315": 3.81192, "1320": 3.81405, "1325": 3.72789, "1330": 3.79529, "1335": 3.76889, "1340": 3.76123, "1345": 3.74704, "1350": 3.73516, "1355": 3.76572, "1360": 3.73919, "1365": 3.73441, "1370": 3.73284, "1375": 3.74202, "1380": 3.74909, "1385": 3.7541, "1390": 3.66502, "1395": 3.73883, "1400": 3.72777, "1405": 3.66183, "1410": 3.67095, "1415": 3.64909, "1420": 3.7012, "1425": 3.71121, "1430": 3.6707, "1435": 3.65988, "1440": 3.63587, "1445": 3.67497, "1450": 3.67892, "1455": 3.65003, "1460": 3.64827, "1465": 3.69085, "1470": 3.63258, "1475": 3.6929, "1480": 3.65433, "1485": 3.65353, "1490": 3.61514, "1495": 3.60996, "1500": 3.65344, "1505": 3.69268, "1510": 3.55544, "1515": 3.60731, "1520": 3.63573, "1525": 3.59446, "1530": 3.59436, "1535": 3.59439, "1540": 3.59806, "1545": 3.59944, "1550": 3.56476, "1555": 3.56897, "1560": 3.61095, "1565": 3.62486, "1570": 3.58908, "1575": 3.54769, "1580": 3.59493, "1585": 3.58838, "1590": 3.47011, "1595": 3.51425, "1600": 3.50212, "1605": 3.55557, "1610": 3.56864, "1615": 3.50712, "1620": 3.52052, "1625": 3.47381, "1630": 3.50275, "1635": 3.55197, "1640": 3.51791, "1645": 3.54635, "1650": 3.49031, "1655": 3.4704, "1660": 3.52131, "1665": 3.45881, "1670": 3.51236, "1675": 3.50045, "1680": 3.46681, "1685": 3.49078, "1690": 3.48353, "1695": 3.49776, "1700": 3.46185, "1705": 3.40868, "1710": 3.48728, "1715": 3.49675, "1720": 3.43132, "1725": 3.44122, "1730": 3.42961, "1735": 3.4644, "1740": 3.45584, "1745": 3.43534, "1750": 3.41748, "1755": 3.42246, "1760": 3.37607, "1765": 3.42468, "1770": 3.43261, "1775": 3.379, "1780": 3.4355, "1785": 3.42306, "1790": 3.37807, "1795": 3.40594, "1800": 3.34684, "1805": 3.39025, "1810": 3.34654, "1815": 3.4221, "1820": 3.41309, "1825": 3.38102, "1830": 3.32801, "1835": 3.42581, "1840": 3.38756, "1845": 3.42483, "1850": 3.39575, "1855": 3.37642, "1860": 3.35084, "1865": 3.38387, "1870": 3.29854, "1875": 3.45603, "1880": 3.34133, "1885": 3.36396, "1890": 3.34216, "1895": 3.39809, "1900": 3.36794, "1905": 3.30189, "1910": 3.32844, "1915": 3.3151, "1920": 3.36278, "1925": 3.33318, "1930": 3.31578, "1935": 3.31145, "1940": 3.36438, "1945": 3.26186, "1950": 3.40063, "1955": 3.30708, "1960": 3.31486, "1965": 3.28405, "1970": 3.29999, "1975": 3.33744, "1980": 3.34165, "1985": 3.23762, "1990": 3.32593, "1995": 3.28362, "2000": 3.27303, "2005": 3.26618, "2010": 3.28661, "2015": 3.22715, "2020": 3.27479, "2025": 3.27135, "2030": 3.27231, "2035": 3.29709, "2040": 3.26992, "2045": 3.23395, "2050": 3.27144, "2055": 3.32457, "2060": 3.28203, "2065": 3.24424, "2070": 3.30416, "2075": 3.24361, "2080": 3.22085, "2085": 3.30421, "2090": 3.15415, "2095": 3.29493, "2100": 3.23056, "2105": 3.19586, "2110": 3.20308, "2115": 3.24137, "2120": 3.18045, "2125": 3.21484, "2130": 3.22503, "2135": 3.27763, "2140": 3.1948, "2145": 3.21466, "2150": 3.20867, "2155": 3.2365, "2160": 3.20615, "2165": 3.25296, "2170": 3.23909, "2175": 3.17384, "2180": 3.22049, "2185": 3.25755, "2190": 3.24863, "2195": 3.15892, "2200": 3.2045, "2205": 3.17192, "2210": 3.12378, "2215": 3.19067, "2220": 3.19593, "2225": 3.18949, "2230": 3.13791, "2235": 3.19063, "2240": 3.21593, "2245": 3.1871, "2250": 3.21088, "2255": 3.1585, "2260": 3.14105, "2265": 3.23218, "2270": 3.21271, "2275": 3.1566, "2280": 3.17614, "2285": 3.16486, "2290": 3.17692, "2295": 3.20197, "2300": 3.13847, "2305": 3.15963, "2310": 3.12614, "2315": 3.06316, "2320": 3.11512, "2325": 3.17382, "2330": 3.12397, "2335": 3.12649, "2340": 3.17195, "2345": 3.12462, "2350": 3.129, "2355": 3.11726, "2360": 3.16031, "2365": 3.09266, "2370": 3.15197, "2375": 3.13019, "2380": 3.11082, "2385": 3.09359, "2390": 3.09567, "2395": 3.09807, "2400": 3.09966, "2405": 3.10436, "2410": 3.09007, "2415": 3.09491, "2420": 3.08537, "2425": 3.07877, "2430": 3.08079, "2435": 3.06761, "2440": 3.08574, "2445": 3.05747, "2450": 3.12167, "2455": 3.15832, "2460": 3.08596, "2465": 3.07656, "2470": 3.03663, "2475": 3.06421, "2480": 3.10252, "2485": 3.06485, "2490": 3.06573, "2495": 3.08845, "2500": 3.05671, "2505": 3.105, "2510": 3.12399, "2515": 3.0532, "2520": 3.07806, "2525": 3.02426, "2530": 3.04842, "2535": 3.09401, "2540": 3.07984, "2545": 3.05538, "2550": 3.00469, "2555": 3.07001, "2560": 3.04403, "2565": 3.12, "2570": 3.00976, "2575": 3.0601, "2580": 3.08548, "2585": 3.02156, "2590": 3.06606, "2595": 2.99925, "2600": 3.0841, "2605": 3.06879, "2610": 3.05401, "2615": 3.06935, "2620": 2.99191, "2625": 3.01384, "2630": 3.03627, "2635": 3.05041, "2640": 3.01088, "2645": 3.05612, "2650": 3.02233, "2655": 2.98756, "2660": 3.01604, "2665": 3.03817, "2670": 2.98547, "2675": 2.97442, "2680": 3.00378, "2685": 3.00171, "2690": 2.99912, "2695": 2.99265, "2700": 3.03079, "2705": 2.98376, "2710": 2.97975, "2715": 2.96047, "2720": 3.02663, "2725": 2.99565, "2730": 3.05827, "2735": 3.04913, "2740": 3.02027, "2745": 3.02502, "2750": 3.02065, "2755": 2.96792, "2760": 2.99447, "2765": 3.00785, "2770": 2.98958, "2775": 2.99278, "2780": 3.02294, "2785": 2.95383, "2790": 2.96474, "2795": 2.95595, "2800": 2.98985, "2805": 2.94051, "2810": 2.99046, "2815": 2.95976, "2820": 3.0756, "2825": 3.03639, "2830": 3.01855, "2835": 2.92175, "2840": 2.92574, "2845": 2.96102, "2850": 2.96997, "2855": 2.96207, "2860": 2.94977, "2865": 2.91535, "2870": 2.99202, "2875": 2.92084, "2880": 2.96303, "2885": 2.91779, "2890": 2.98572, "2895": 2.93253, "2900": 2.95289, "2905": 3.00499, "2910": 2.92994, "2915": 2.94325, "2920": 2.95516, "2925": 2.94427, "2930": 2.95621, "2935": 2.94005, "2940": 2.94552, "2945": 2.9075, "2950": 2.97913, "2955": 2.91177, "2960": 2.97029, "2965": 2.87292, "2970": 2.96107, "2975": 2.99603, "2980": 2.94257, "2985": 3.04155, "2990": 2.93897, "2995": 2.87114, "3000": 2.9422, "3005": 2.89655, "3010": 2.93538, "3015": 2.91032, "3020": 2.91995, "3025": 2.91883, "3030": 2.92686, "3035": 2.95815, "3040": 2.9312, "3045": 2.83504, "3050": 2.8988, "3055": 2.89613, "3060": 2.92461, "3065": 2.92459, "3070": 2.88159, "3075": 2.86953, "3080": 2.9243, "3085": 2.90325, "3090": 2.91754, "3095": 2.92816, "3100": 2.86703, "3105": 2.92918, "3110": 2.90236, "3115": 2.94681, "3120": 2.95312, "3125": 2.86217, "3130": 2.93048, "3135": 2.92489, "3140": 2.87699, "3145": 2.91715, "3150": 2.85701, "3155": 2.8442, "3160": 2.83887, "3165": 2.84564, "3170": 2.89213, "3175": 2.90452, "3180": 2.85788, "3185": 2.89571, "3190": 2.90627, "3195": 2.92723, "3200": 2.92789, "3205": 2.85912, "3210": 2.86987, "3215": 2.91563, "3220": 2.87374, "3225": 2.86935, "3230": 2.815, "3235": 2.87434, "3240": 2.8734, "3245": 2.90299, "3250": 2.86289, "3255": 2.8503, "3260": 2.85959, "3265": 2.86936, "3270": 2.85223, "3275": 2.86681, "3280": 2.79974, "3285": 2.81123, "3290": 2.86928, "3295": 2.92038, "3300": 2.87938, "3305": 2.86113, "3310": 2.85785, "3315": 2.80615, "3320": 2.8258, "3325": 2.82403, "3330": 2.82839, "3335": 2.8465, "3340": 2.82742, "3345": 2.84707, "3350": 2.84121, "3355": 2.85933, "3360": 2.79899, "3365": 2.85514, "3370": 2.84537, "3375": 2.84431, "3380": 2.84971, "3385": 2.87822, "3390": 2.8682, "3395": 2.81029, "3400": 2.78472, "3405": 2.82816, "3410": 2.84591, "3415": 2.86134, "3420": 2.82742, "3425": 2.81129, "3430": 2.82982, "3435": 2.8906, "3440": 2.81795, "3445": 2.86973, "3450": 2.81507, "3455": 2.7888, "3460": 2.8203, "3465": 2.84691, "3470": 2.83475, "3475": 2.7758, "3480": 2.84173, "3485": 2.82043, "3490": 2.8933, "3495": 2.84901, "3500": 2.84084, "3505": 2.82498, "3510": 2.81285, "3515": 2.83554, "3520": 2.77982, "3525": 2.80208, "3530": 2.84998, "3535": 2.78436, "3540": 2.83771, "3545": 2.81048, "3550": 2.79684, "3555": 2.8181, "3560": 2.82828, "3565": 2.82912, "3570": 2.80393, "3575": 2.80372, "3580": 2.82073, "3585": 2.83541, "3590": 2.8298, "3595": 2.77921, "3600": 2.74897, "3605": 2.79083, "3610": 2.8488, "3615": 2.75492, "3620": 2.80351, "3625": 2.88338, "3630": 2.77314, "3635": 2.78625, "3640": 2.78253, "3645": 2.76926, "3650": 2.80301, "3655": 2.81469, "3660": 2.76524, "3665": 2.7858, "3670": 2.77457, "3675": 2.77711, "3680": 2.80733, "3685": 2.80194, "3690": 2.8055, "3695": 2.81135, "3700": 2.78752, "3705": 2.78408, "3710": 2.75166, "3715": 2.80201, "3720": 2.79409, "3725": 2.78884, "3730": 2.84096, "3735": 2.80547, "3740": 2.74952, "3745": 2.78986, "3750": 2.8022, "3755": 2.79576, "3760": 2.75757, "3765": 2.75586, "3770": 2.75989, "3775": 2.76991, "3780": 2.76383, "3785": 2.7793, "3790": 2.74202, "3795": 2.79511, "3800": 2.80269, "3805": 2.75159, "3810": 2.80354, "3815": 2.76482, "3820": 2.78758, "3825": 2.73331, "3830": 2.74563, "3835": 2.81464, "3840": 2.72812, "3845": 2.71424, "3850": 2.77453, "3855": 2.71774, "3860": 2.80173, "3865": 2.75469, "3870": 2.77531, "3875": 2.75779, "3880": 2.78968, "3885": 2.78424, "3890": 2.74541, "3895": 2.79804, "3900": 2.76127, "3905": 2.72353, "3910": 2.74147, "3915": 2.75183, "3920": 2.79462, "3925": 2.77792, "3930": 2.70759, "3935": 2.73982, "3940": 2.75131, "3945": 2.74267, "3950": 2.725, "3955": 2.77958, "3960": 2.75991, "3965": 2.74216, "3970": 2.75653, "3975": 2.72552, "3980": 2.73817, "3985": 2.75045, "3990": 2.69347, "3995": 2.78059, "4000": 2.73558, "4005": 2.7658, "4010": 2.70885, "4015": 2.72538, "4020": 2.74956, "4025": 2.733, "4030": 2.65924, "4035": 2.69455, "4040": 2.74652, "4045": 2.74857, "4050": 2.78817, "4055": 2.7239, "4060": 2.71419, "4065": 2.6515, "4070": 2.80691, "4075": 2.75748, "4080": 2.71884, "4085": 2.74977, "4090": 2.67836, "4095": 2.69073, "4100": 2.7114, "4105": 2.73822, "4110": 2.72956, "4115": 2.70127, "4120": 2.73267, "4125": 2.70389, "4130": 2.69553, "4135": 2.6893, "4140": 2.68057, "4145": 2.77973, "4150": 2.70801, "4155": 2.73792, "4160": 2.76329, "4165": 2.72099, "4170": 2.67438, "4175": 2.71828, "4180": 2.72666, "4185": 2.72916, "4190": 2.73806, "4195": 2.70222, "4200": 2.71066, "4205": 2.73922, "4210": 2.67309, "4215": 2.66565, "4220": 2.65928, "4225": 2.70242, "4230": 2.71409, "4235": 2.7326, "4240": 2.70413, "4245": 2.69658, "4250": 2.71363, "4255": 2.64837, "4260": 2.7266, "4265": 2.73863, "4270": 2.72157, "4275": 2.68943, "4280": 2.70233, "4285": 2.73114, "4290": 2.68765, "4295": 2.69223, "4300": 2.69956, "4305": 2.70313, "4310": 2.73003, "4315": 2.71191, "4320": 2.69906, "4325": 2.70557, "4330": 2.7106, "4335": 2.69172, "4340": 2.6976, "4345": 2.72675, "4350": 2.67431, "4355": 2.69349, "4360": 2.71041, "4365": 2.78314, "4370": 2.73369, "4375": 2.74431, "4380": 2.71504, "4385": 2.69901, "4390": 2.70191, "4395": 2.75058, "4400": 2.66501, "4405": 2.66954, "4410": 2.68278, "4415": 2.70442, "4420": 2.7077, "4425": 2.72158, "4430": 2.69277, "4435": 2.68035, "4440": 2.69127, "4445": 2.67896, "4450": 2.65272, "4455": 2.69044, "4460": 2.70302, "4465": 2.70631, "4470": 2.6731, "4475": 2.68761, "4480": 2.65626, "4485": 2.69968, "4490": 2.65279, "4495": 2.70894, "4500": 2.70235, "4505": 2.69674, "4510": 2.64893, "4515": 2.70162, "4520": 2.66802, "4525": 2.66816, "4530": 2.6736, "4535": 2.67113, "4540": 2.70729, "4545": 2.65603, "4550": 2.70177, "4555": 2.68009, "4560": 2.65895, "4565": 2.63898, "4570": 2.6402, "4575": 2.66692, "4580": 2.68858, "4585": 2.68157, "4590": 2.61727, "4595": 2.66336, "4600": 2.67638, "4605": 2.68094, "4610": 2.66704, "4615": 2.66234, "4620": 2.65727, "4625": 2.68721, "4630": 2.6742, "4635": 2.64708, "4640": 2.69575, "4645": 2.64774, "4650": 2.7018, "4655": 2.70661, "4660": 2.67838, "4665": 2.68918, "4670": 2.67556, "4675": 2.68422, "4680": 2.66596, "4685": 2.65737, "4690": 2.70912, "4695": 2.65528, "4700": 2.67174, "4705": 2.65146, "4710": 2.68366, "4715": 2.64964, "4720": 2.72485, "4725": 2.62902, "4730": 2.65213, "4735": 2.68926, "4740": 2.64614, "4745": 2.65553, "4750": 2.65737, "4755": 2.65793, "4760": 2.66632, "4765": 2.64489, "4770": 2.62202, "4775": 2.65194, "4780": 2.65519, "4785": 2.68655, "4790": 2.65201, "4795": 2.67305, "4800": 2.62427, "4805": 2.64099, "4810": 2.65942, "4815": 2.65033, "4820": 2.6668, "4825": 2.65019, "4830": 2.6151, "4835": 2.64832, "4840": 2.65513, "4845": 2.6348, "4850": 2.62482, "4855": 2.60337, "4860": 2.65757, "4865": 2.62536, "4870": 2.63972, "4875": 2.61897, "4880": 2.62576, "4885": 2.62632, "4890": 2.67912, "4895": 2.65961, "4900": 2.618, "4905": 2.61823, "4910": 2.63845, "4915": 2.61463, "4920": 2.65397, "4925": 2.64838, "4930": 2.57129, "4935": 2.65193, "4940": 2.63034, "4945": 2.63777, "4950": 2.62825, "4955": 2.61794, "4960": 2.61856, "4965": 2.65951, "4970": 2.6008, "4975": 2.65676, "4980": 2.62049, "4985": 2.63225, "4990": 2.65645, "4995": 2.58184, "5000": 2.6621, "5005": 2.6658, "5010": 2.68112, "5015": 2.63396, "5020": 2.64091, "5025": 2.68726, "5030": 2.64362, "5035": 2.61873, "5040": 2.62248, "5045": 2.60699, "5050": 2.62641, "5055": 2.65014, "5060": 2.64375, "5065": 2.68893, "5070": 2.60617, "5075": 2.61421, "5080": 2.61231, "5085": 2.60499, "5090": 2.59441, "5095": 2.65086, "5100": 2.64984, "5105": 2.61053, "5110": 2.66408, "5115": 2.62171, "5120": 2.67055, "5125": 2.6309, "5130": 2.615, "5135": 2.61462, "5140": 2.57424, "5145": 2.62966, "5150": 2.63646, "5155": 2.61887, "5160": 2.66278, "5165": 2.58409, "5170": 2.59136, "5175": 2.62185, "5180": 2.60659, "5185": 2.62099, "5190": 2.6266, "5195": 2.67047, "5200": 2.5968, "5205": 2.60868, "5210": 2.60701, "5215": 2.64792, "5220": 2.58826, "5225": 2.55166, "5230": 2.6359, "5235": 2.61417, "5240": 2.62802, "5245": 2.64006, "5250": 2.61297, "5255": 2.62612, "5260": 2.5619, "5265": 2.59802, "5270": 2.58865, "5275": 2.61781, "5280": 2.61032, "5285": 2.60442, "5290": 2.63245, "5295": 2.62071, "5300": 2.57979, "5305": 2.59834, "5310": 2.60591, "5315": 2.5881, "5320": 2.61539, "5325": 2.64615, "5330": 2.6015, "5335": 2.58439, "5340": 2.56291, "5345": 2.65819, "5350": 2.62526, "5355": 2.57953, "5360": 2.59528, "5365": 2.62373, "5370": 2.61518, "5375": 2.63002, "5380": 2.58083, "5385": 2.56502, "5390": 2.58666, "5395": 2.61597, "5400": 2.60909, "5405": 2.54774, "5410": 2.61298, "5415": 2.59619, "5420": 2.61443, "5425": 2.62678, "5430": 2.62674, "5435": 2.57707, "5440": 2.58734, "5445": 2.633, "5450": 2.6473, "5455": 2.61252, "5460": 2.59272, "5465": 2.60502, "5470": 2.60189, "5475": 2.62728, "5480": 2.58753, "5485": 2.59002, "5490": 2.57733, "5495": 2.57075, "5500": 2.56937, "5505": 2.61715, "5510": 2.62664, "5515": 2.58137, "5520": 2.55697, "5525": 2.5859, "5530": 2.66433, "5535": 2.62339, "5540": 2.57109, "5545": 2.59633, "5550": 2.54936, "5555": 2.57342, "5560": 2.56447, "5565": 2.60758, "5570": 2.65168, "5575": 2.63138, "5580": 2.57564, "5585": 2.59822, "5590": 2.56185, "5595": 2.58521, "5600": 2.55512, "5605": 2.59879, "5610": 2.58291, "5615": 2.58198, "5620": 2.58123, "5625": 2.55147, "5630": 2.57081, "5635": 2.63484, "5640": 2.59425, "5645": 2.56995, "5650": 2.58004, "5655": 2.54766, "5660": 2.55881, "5665": 2.58604, "5670": 2.56686, "5675": 2.60728, "5680": 2.52861, "5685": 2.56813, "5690": 2.6039, "5695": 2.55782, "5700": 2.59695, "5705": 2.596, "5710": 2.57921, "5715": 2.58424, "5720": 2.53643, "5725": 2.6038, "5730": 2.57366, "5735": 2.61087, "5740": 2.59519, "5745": 2.56, "5750": 2.54216, "5755": 2.55997, "5760": 2.62481, "5765": 2.56328, "5770": 2.5429, "5775": 2.58373, "5780": 2.57701, "5785": 2.53911, "5790": 2.56461, "5795": 2.60179, "5800": 2.54494, "5805": 2.53531, "5810": 2.55658, "5815": 2.52456, "5820": 2.59694, "5825": 2.50599, "5830": 2.49558, "5835": 2.59597, "5840": 2.53979, "5845": 2.5528, "5850": 2.61315, "5855": 2.5102, "5860": 2.56169, "5865": 2.51778, "5870": 2.57574, "5875": 2.60723, "5880": 2.58596, "5885": 2.56757, "5890": 2.58608, "5895": 2.55562, "5900": 2.61651, "5905": 2.55716, "5910": 2.59828, "5915": 2.61008, "5920": 2.58733, "5925": 2.55324, "5930": 2.57568, "5935": 2.55168, "5940": 2.57131, "5945": 2.5204, "5950": 2.55562, "5955": 2.586, "5960": 2.56741, "5965": 2.62046, "5970": 2.55594, "5975": 2.58503, "5980": 2.55843, "5985": 2.56032, "5990": 2.55653, "5995": 2.55873, "6000": 2.55658, "6005": 2.51961, "6010": 2.5612, "6015": 2.52607, "6020": 2.53453, "6025": 2.55768, "6030": 2.6046, "6035": 2.54228, "6040": 2.54868, "6045": 2.49077, "6050": 2.5963, "6055": 2.5204, "6060": 2.54409, "6065": 2.52518, "6070": 2.52918, "6075": 2.5364, "6080": 2.53607, "6085": 2.59714, "6090": 2.57034, "6095": 2.53592, "6100": 2.5428, "6105": 2.52487, "6110": 2.55483, "6115": 2.58495, "6120": 2.55695, "6125": 2.53683, "6130": 2.47322, "6135": 2.5563, "6140": 2.55589, "6145": 2.55739, "6150": 2.52565, "6155": 2.50872, "6160": 2.54299, "6165": 2.57304, "6170": 2.54638, "6175": 2.60079, "6180": 2.51196, "6185": 2.55194, "6190": 2.49345, "6195": 2.57854, "6200": 2.55164, "6205": 2.5377, "6210": 2.52088, "6215": 2.51358, "6220": 2.56539, "6225": 2.51406, "6230": 2.51072, "6235": 2.56268, "6240": 2.55115, "6245": 2.52327, "6250": 2.53069, "6255": 2.57365, "6260": 2.52537, "6265": 2.57441, "6270": 2.52397, "6275": 2.56565, "6280": 2.52297, "6285": 2.5207, "6290": 2.51982, "6295": 2.50722, "6300": 2.55559, "6305": 2.52486, "6310": 2.51259, "6315": 2.53731, "6320": 2.4894, "6325": 2.59818, "6330": 2.555, "6335": 2.51085, "6340": 2.51313, "6345": 2.55702, "6350": 2.556, "6355": 2.52448, "6360": 2.52293, "6365": 2.48409, "6370": 2.53563, "6375": 2.49779, "6380": 2.56282, "6385": 2.58189, "6390": 2.50441, "6395": 2.55121, "6400": 2.5086, "6405": 2.5278, "6410": 2.51466, "6415": 2.52482, "6420": 2.54258, "6425": 2.53509, "6430": 2.57978, "6435": 2.54444, "6440": 2.53907, "6445": 2.53125, "6450": 2.53474, "6455": 2.52399, "6460": 2.51849, "6465": 2.56225, "6470": 2.52104, "6475": 2.52654, "6480": 2.48826, "6485": 2.52861, "6490": 2.50978, "6495": 2.49978, "6500": 2.52402, "6505": 2.49432, "6510": 2.54199, "6515": 2.5101, "6520": 2.51003, "6525": 2.49503, "6530": 2.54392, "6535": 2.53282, "6540": 2.53291, "6545": 2.56194, "6550": 2.50127, "6555": 2.55627, "6560": 2.51016, "6565": 2.52281, "6570": 2.58445, "6575": 2.52324, "6580": 2.49815, "6585": 2.50823, "6590": 2.5097, "6595": 2.49807, "6600": 2.49539, "6605": 2.54253, "6610": 2.4797, "6615": 2.56766, "6620": 2.53402, "6625": 2.51202, "6630": 2.51431, "6635": 2.47464, "6640": 2.54106, "6645": 2.59681, "6650": 2.51024, "6655": 2.4983, "6660": 2.57419, "6665": 2.52156, "6670": 2.5674, "6675": 2.46861, "6680": 2.54697, "6685": 2.53564, "6690": 2.51427, "6695": 2.48573, "6700": 2.52463, "6705": 2.52218, "6710": 2.49347, "6715": 2.51687, "6720": 2.50996, "6725": 2.52089, "6730": 2.52013, "6735": 2.4825, "6740": 2.51535, "6745": 2.49672, "6750": 2.55754, "6755": 2.47484, "6760": 2.54212, "6765": 2.48878, "6770": 2.51847, "6775": 2.50828, "6780": 2.53878, "6785": 2.47177, "6790": 2.54553, "6795": 2.49868, "6800": 2.52671, "6805": 2.51099, "6810": 2.50296, "6815": 2.52064, "6820": 2.48696, "6825": 2.5071, "6830": 2.54063, "6835": 2.50678, "6840": 2.50885, "6845": 2.52492, "6850": 2.47583, "6855": 2.512, "6860": 2.50239, "6865": 2.49001, "6870": 2.55392, "6875": 2.47561, "6880": 2.55072, "6885": 2.47892, "6890": 2.54905, "6895": 2.50384, "6900": 2.49072, "6905": 2.51205, "6910": 2.5215, "6915": 2.51823, "6920": 2.5328, "6925": 2.54741, "6930": 2.49289, "6935": 2.521, "6940": 2.50604, "6945": 2.46237, "6950": 2.48628, "6955": 2.5288, "6960": 2.51952, "6965": 2.49196, "6970": 2.47065, "6975": 2.52409, "6980": 2.45258, "6985": 2.51631, "6990": 2.52932, "6995": 2.46179, "7000": 2.49172, "7005": 2.47011, "7010": 2.47632, "7015": 2.51983, "7020": 2.46705, "7025": 2.45424, "7030": 2.48487, "7035": 2.47988, "7040": 2.50783, "7045": 2.52359, "7050": 2.52831, "7055": 2.44161, "7060": 2.47409, "7065": 2.48138, "7070": 2.48981, "7075": 2.49452, "7080": 2.53479, "7085": 2.48717, "7090": 2.47618, "7095": 2.4999, "7100": 2.51585, "7105": 2.4884, "7110": 2.487, "7115": 2.50558, "7120": 2.47286, "7125": 2.46376, "7130": 2.48693, "7135": 2.51456, "7140": 2.50032, "7145": 2.49769, "7150": 2.51016, "7155": 2.50401, "7160": 2.47274, "7165": 2.45638, "7170": 2.50459, "7175": 2.50355, "7180": 2.50497, "7185": 2.48172, "7190": 2.46296, "7195": 2.46639, "7200": 2.50998, "7205": 2.49029, "7210": 2.44246, "7215": 2.47885, "7220": 2.4456, "7225": 2.51269, "7230": 2.50805, "7235": 2.48249, "7240": 2.47867, "7245": 2.50035, "7250": 2.50922, "7255": 2.49324, "7260": 2.46058, "7265": 2.45308, "7270": 2.47086, "7275": 2.49781, "7280": 2.49343, "7285": 2.42363, "7290": 2.47944, "7295": 2.48626, "7300": 2.41751, "7305": 2.44554, "7310": 2.44899, "7315": 2.48986, "7320": 2.48389, "7325": 2.45917, "7330": 2.4893, "7335": 2.47688, "7340": 2.46486, "7345": 2.49515, "7350": 2.5106, "7355": 2.49669, "7360": 2.48037, "7365": 2.46906, "7370": 2.47138, "7375": 2.4508, "7380": 2.49622, "7385": 2.48448, "7390": 2.47337, "7395": 2.47339, "7400": 2.48169, "7405": 2.43994, "7410": 2.48078, "7415": 2.47113, "7420": 2.49398, "7425": 2.45774, "7430": 2.52358, "7435": 2.49185, "7440": 2.52151, "7445": 2.5101, "7450": 2.4751, "7455": 2.45401, "7460": 2.46474, "7465": 2.47685, "7470": 2.44899, "7475": 2.45681, "7480": 2.51145, "7485": 2.45042, "7490": 2.47478, "7495": 2.48246, "7500": 2.49584, "7505": 2.44104, "7510": 2.43501, "7515": 2.41997, "7520": 2.49389, "7525": 2.49884, "7530": 2.47668, "7535": 2.4601, "7540": 2.47288, "7545": 2.47471, "7550": 2.49181, "7555": 2.45487, "7560": 2.42922, "7565": 2.51106, "7570": 2.4857, "7575": 2.439, "7580": 2.45825, "7585": 2.48256, "7590": 2.48193, "7595": 2.46508, "7600": 2.46362, "7605": 2.44863, "7610": 2.44948, "7615": 2.42526, "7620": 2.54441, "7625": 2.47879, "7630": 2.42526, "7635": 2.42739, "7640": 2.45364, "7645": 2.47151, "7650": 2.46303, "7655": 2.48304, "7660": 2.4532, "7665": 2.4342, "7670": 2.4426, "7675": 2.45588, "7680": 2.48517, "7685": 2.43208, "7690": 2.48, "7695": 2.45485, "7700": 2.48159, "7705": 2.49878, "7710": 2.49483, "7715": 2.44384, "7720": 2.4696, "7725": 2.47981, "7730": 2.45864, "7735": 2.47057, "7740": 2.43882, "7745": 2.45157, "7750": 2.43921, "7755": 2.46722, "7760": 2.45122, "7765": 2.45511, "7770": 2.47144, "7775": 2.45332, "7780": 2.41653, "7785": 2.44516, "7790": 2.48285, "7795": 2.44125, "7800": 2.46355, "7805": 2.48202, "7810": 2.50258, "7815": 2.48733, "7820": 2.44788, "7825": 2.51471, "7830": 2.45477, "7835": 2.4697, "7840": 2.47907, "7845": 2.46064, "7850": 2.41717, "7855": 2.47244, "7860": 2.49887, "7865": 2.42434, "7870": 2.46693, "7875": 2.44544, "7880": 2.45287, "7885": 2.46023, "7890": 2.47026, "7895": 2.44872, "7900": 2.4404, "7905": 2.43773, "7910": 2.42565, "7915": 2.48107, "7920": 2.47699, "7925": 2.4218, "7930": 2.47199, "7935": 2.44975, "7940": 2.42126, "7945": 2.46977, "7950": 2.44424, "7955": 2.4204, "7960": 2.49038, "7965": 2.5188, "7970": 2.52207, "7975": 2.44798, "7980": 2.44076, "7985": 2.46872, "7990": 2.43169, "7995": 2.46954, "8000": 2.43641, "8005": 2.41891, "8010": 2.45749, "8015": 2.46841, "8020": 2.48116, "8025": 2.47363, "8030": 2.45173, "8035": 2.47071, "8040": 2.41983, "8045": 2.45333, "8050": 2.44721, "8055": 2.42302, "8060": 2.44253, "8065": 2.46158, "8070": 2.4567, "8075": 2.46077, "8080": 2.44618, "8085": 2.44085, "8090": 2.42787, "8095": 2.42397, "8100": 2.43904, "8105": 2.49479, "8110": 2.43878, "8115": 2.58899, "8120": 2.49362, "8125": 2.47876, "8130": 2.45879, "8135": 2.4574, "8140": 2.44166, "8145": 2.42774, "8150": 2.42089, "8155": 2.48312, "8160": 2.45131, "8165": 2.43947, "8170": 2.43326, "8175": 2.42092, "8180": 2.4946, "8185": 2.42477, "8190": 2.46908, "8195": 2.45732, "8200": 2.44651, "8205": 2.44406, "8210": 2.43096, "8215": 2.44122, "8220": 2.43556, "8225": 2.41067, "8230": 2.44055, "8235": 2.46438, "8240": 2.42694, "8245": 2.44767, "8250": 2.44524, "8255": 2.43772, "8260": 2.43153, "8265": 2.42903, "8270": 2.4363, "8275": 2.44197, "8280": 2.39831, "8285": 2.4405, "8290": 2.48021, "8295": 2.44762, "8300": 2.45931, "8305": 2.40847, "8310": 2.43461, "8315": 2.45616, "8320": 2.40422, "8325": 2.39725, "8330": 2.43986, "8335": 2.44684, "8340": 2.49212, "8345": 2.44942, "8350": 2.45049, "8355": 2.40704, "8360": 2.40131, "8365": 2.45443, "8370": 2.45427, "8375": 2.42518, "8380": 2.41939, "8385": 2.42541, "8390": 2.4387, "8395": 2.44193, "8400": 2.44114, "8405": 2.49132, "8410": 2.4383, "8415": 2.43519, "8420": 2.41861, "8425": 2.44324, "8430": 2.46253, "8435": 2.40559, "8440": 2.45227, "8445": 2.45999, "8450": 2.40867, "8455": 2.46028, "8460": 2.45495, "8465": 2.43629, "8470": 2.40854, "8475": 2.47887, "8480": 2.40222, "8485": 2.41392, "8490": 2.46612, "8495": 2.43613, "8500": 2.44492, "8505": 2.40329, "8510": 2.40218, "8515": 2.42871, "8520": 2.42574, "8525": 2.49152, "8530": 2.3746, "8535": 2.40109, "8540": 2.48679, "8545": 2.3811, "8550": 2.43875, "8555": 2.4514, "8560": 2.47019, "8565": 2.42055, "8570": 2.43185, "8575": 2.44959, "8580": 2.44124, "8585": 2.42059, "8590": 2.4038, "8595": 2.42895, "8600": 2.41116, "8605": 2.49131, "8610": 2.42052, "8615": 2.38808, "8620": 2.45039, "8625": 2.42523, "8630": 2.45471, "8635": 2.4509, "8640": 2.43534, "8645": 2.47406, "8650": 2.42305, "8655": 2.45293, "8660": 2.45576, "8665": 2.38622, "8670": 2.41139, "8675": 2.42943, "8680": 2.44841, "8685": 2.43079, "8690": 2.41017, "8695": 2.44311, "8700": 2.43428, "8705": 2.42016, "8710": 2.42854, "8715": 2.44862, "8720": 2.47696, "8725": 2.41012, "8730": 2.39278, "8735": 2.43505, "8740": 2.43198, "8745": 2.39801, "8750": 2.43609, "8755": 2.42381, "8760": 2.40031, "8765": 2.43541, "8770": 2.40569, "8775": 2.43812, "8780": 2.42153, "8785": 2.47144, "8790": 2.42041, "8795": 2.41876, "8800": 2.41592, "8805": 2.40548, "8810": 2.41139, "8815": 2.47509, "8820": 2.45362, "8825": 2.4241, "8830": 2.38744, "8835": 2.42258, "8840": 2.39347, "8845": 2.42679, "8850": 2.43485, "8855": 2.4044, "8860": 2.42715, "8865": 2.42631, "8870": 2.43391, "8875": 2.44152, "8880": 2.41099, "8885": 2.39514, "8890": 2.44614, "8895": 2.42902, "8900": 2.41354, "8905": 2.40085, "8910": 2.4019, "8915": 2.4163, "8920": 2.43454, "8925": 2.46713, "8930": 2.41511, "8935": 2.40784, "8940": 2.38869, "8945": 2.39353, "8950": 2.41789, "8955": 2.39534, "8960": 2.43426, "8965": 2.41798, "8970": 2.40536, "8975": 2.47767, "8980": 2.44109, "8985": 2.37482, "8990": 2.41061, "8995": 2.416, "9000": 2.45568, "9005": 2.41279, "9010": 2.37662, "9015": 2.41141, "9020": 2.40089, "9025": 2.3701, "9030": 2.40026, "9035": 2.4243, "9040": 2.42079, "9045": 2.41805, "9050": 2.39505, "9055": 2.41785, "9060": 2.41922, "9065": 2.40527, "9070": 2.44454, "9075": 2.39395, "9080": 2.43398, "9085": 2.4136, "9090": 2.41293, "9095": 2.39793, "9100": 2.40135, "9105": 2.35782, "9110": 2.46451, "9115": 2.41499, "9120": 2.40368, "9125": 2.45804, "9130": 2.39387, "9135": 2.44878, "9140": 2.43562, "9145": 2.42684, "9150": 2.42505, "9155": 2.3752, "9160": 2.41724, "9165": 2.42569, "9170": 2.37359, "9175": 2.41857, "9180": 2.37803, "9185": 2.43942, "9190": 2.41281, "9195": 2.40662, "9200": 2.39186, "9205": 2.44999, "9210": 2.36248, "9215": 2.46363, "9220": 2.44779, "9225": 2.3828, "9230": 2.44575, "9235": 2.39772, "9240": 2.40182, "9245": 2.43796, "9250": 2.43806, "9255": 2.4326, "9260": 2.38813, "9265": 2.43977, "9270": 2.43657, "9275": 2.39535, "9280": 2.39074, "9285": 2.42225, "9290": 2.40437, "9295": 2.38603, "9300": 2.42495, "9305": 2.40579, "9310": 2.41555, "9315": 2.41153, "9320": 2.44493, "9325": 2.37049, "9330": 2.40434, "9335": 2.36191, "9340": 2.40835, "9345": 2.41458, "9350": 2.44039, "9355": 2.47763, "9360": 2.43745, "9365": 2.38821, "9370": 2.43648, "9375": 2.43331, "9380": 2.35346, "9385": 2.39958, "9390": 2.38109, "9395": 2.38731, "9400": 2.44471, "9405": 2.41259, "9410": 2.39756, "9415": 2.43759, "9420": 2.4441, "9425": 2.43656, "9430": 2.45071, "9435": 2.41453, "9440": 2.47761, "9445": 2.37622, "9450": 2.39383, "9455": 2.40249, "9460": 2.38597, "9465": 2.3775, "9470": 2.38205, "9475": 2.36454, "9480": 2.43551, "9485": 2.38642, "9490": 2.4204, "9495": 2.38165, "9500": 2.36325, "9505": 2.4296, "9510": 2.39916, "9515": 2.43096, "9520": 2.41792, "9525": 2.38898, "9530": 2.45385, "9535": 2.40151, "9540": 2.41839, "9545": 2.37813, "9550": 2.42143, "9555": 2.39054, "9560": 2.42191, "9565": 2.40523, "9570": 2.37157, "9575": 2.41109, "9580": 2.39564, "9585": 2.42353, "9590": 2.42924, "9595": 2.44777, "9600": 2.39117, "9605": 2.38431, "9610": 2.42142, "9615": 2.41558, "9620": 2.41413, "9625": 2.44723, "9630": 2.39712, "9635": 2.40396, "9640": 2.44817, "9645": 2.4109, "9650": 2.39894, "9655": 2.37366, "9660": 2.42329, "9665": 2.39029, "9670": 2.38274, "9675": 2.35662, "9680": 2.39869, "9685": 2.40199, "9690": 2.46804, "9695": 2.38133, "9700": 2.37698, "9705": 2.38453, "9710": 2.36554, "9715": 2.38868, "9720": 2.43552, "9725": 2.4413, "9730": 2.42919, "9735": 2.38684, "9740": 2.38077, "9745": 2.42676, "9750": 2.3991, "9755": 2.40788, "9760": 2.41084, "9765": 2.37036, "9770": 2.43675, "9775": 2.40145, "9780": 2.36196, "9785": 2.40085, "9790": 2.40714, "9795": 2.3593, "9800": 2.39629, "9805": 2.40561, "9810": 2.41066, "9815": 2.37884, "9820": 2.37671, "9825": 2.40364, "9830": 2.42194, "9835": 2.3861, "9840": 2.41457, "9845": 2.36502, "9850": 2.39824, "9855": 2.39496, "9860": 2.3972, "9865": 2.38197, "9870": 2.39342, "9875": 2.38398, "9880": 2.45319, "9885": 2.39313, "9890": 2.35399, "9895": 2.32116, "9900": 2.3962, "9905": 2.42494, "9910": 2.35642, "9915": 2.36473, "9920": 2.41154, "9925": 2.39863, "9930": 2.38182, "9935": 2.35063, "9940": 2.38377, "9945": 2.37842, "9950": 2.40342, "9955": 2.44928, "9960": 2.43108, "9965": 2.35851, "9970": 2.41017, "9975": 2.38564, "9980": 2.33084, "9985": 2.40772, "9990": 2.39761, "9995": 2.39543, "10000": 2.36621, "10005": 2.37213, "10010": 2.38256, "10015": 2.44495, "10020": 2.36326, "10025": 2.38851, "10030": 2.38817, "10035": 2.40993, "10040": 2.40515, "10045": 2.3831, "10050": 2.34965, "10055": 2.36805, "10060": 2.42146, "10065": 2.37528, "10070": 2.42235, "10075": 2.37088, "10080": 2.36211, "10085": 2.36918, "10090": 2.34573, "10095": 2.40221, "10100": 2.31408, "10105": 2.38253, "10110": 2.40897, "10115": 2.38736, "10120": 2.35801, "10125": 2.37033, "10130": 2.36037, "10135": 2.38382, "10140": 2.4139, "10145": 2.40714, "10150": 2.37532, "10155": 2.39536, "10160": 2.36205, "10165": 2.38369, "10170": 2.4236, "10175": 2.32447, "10180": 2.39651, "10185": 2.3824, "10190": 2.44396, "10195": 2.40416, "10200": 2.38955, "10205": 2.38797, "10210": 2.36805, "10215": 2.34261, "10220": 2.41843, "10225": 2.43079, "10230": 2.35627, "10235": 2.38764, "10240": 2.37226, "10245": 2.39117, "10250": 2.38838, "10255": 2.41316, "10260": 2.33469, "10265": 2.34846, "10270": 2.34979, "10275": 2.3717, "10280": 2.4513, "10285": 2.35906, "10290": 2.3861, "10295": 2.375, "10300": 2.36936, "10305": 2.41578, "10310": 2.38877, "10315": 2.36095, "10320": 2.36607, "10325": 2.36094, "10330": 2.41247, "10335": 2.36135, "10340": 2.41934, "10345": 2.36966, "10350": 2.35686, "10355": 2.39609, "10360": 2.37338, "10365": 2.36225, "10370": 2.34061, "10375": 2.3585, "10380": 2.41953, "10385": 2.40576, "10390": 2.38058, "10395": 2.35968, "10400": 2.37919, "10405": 2.34877, "10410": 2.3389, "10415": 2.41664, "10420": 2.37924, "10425": 2.32522, "10430": 2.35941, "10435": 2.37129, "10440": 2.3711, "10445": 2.35949, "10450": 2.36154, "10455": 2.38113, "10460": 2.38064, "10465": 2.30273, "10470": 2.3577, "10475": 2.37958, "10480": 2.36276, "10485": 2.36137, "10490": 2.41283, "10495": 2.36502, "10500": 2.36277, "10505": 2.37018, "10510": 2.38172, "10515": 2.37393, "10520": 2.40259, "10525": 2.39024, "10530": 2.39211, "10535": 2.35551, "10540": 2.40461, "10545": 2.35856, "10550": 2.37752, "10555": 2.35793, "10560": 2.34025, "10565": 2.37346, "10570": 2.37536, "10575": 2.3535, "10580": 2.37788, "10585": 2.36682, "10590": 2.37817, "10595": 2.37713, "10600": 2.33146, "10605": 2.3724, "10610": 2.36498, "10615": 2.36379, "10620": 2.34659, "10625": 2.41843, "10630": 2.36855, "10635": 2.32266, "10640": 2.36413, "10645": 2.42158, "10650": 2.36174, "10655": 2.30869, "10660": 2.34689, "10665": 2.39981, "10670": 2.31617, "10675": 2.41612, "10680": 2.35445, "10685": 2.28871, "10690": 2.38456, "10695": 2.33038, "10700": 2.38407, "10705": 2.38432, "10710": 2.34313, "10715": 2.3828, "10720": 2.32518, "10725": 2.35278, "10730": 2.34872, "10735": 2.35338, "10740": 2.31849, "10745": 2.33808, "10750": 2.33362, "10755": 2.4041, "10760": 2.36431, "10765": 2.33591, "10770": 2.36802, "10775": 2.38746, "10780": 2.36985, "10785": 2.39167, "10790": 2.34599, "10795": 2.38556, "10800": 2.32491, "10805": 2.39755, "10810": 2.37536, "10815": 2.35431, "10820": 2.34323, "10825": 2.37192, "10830": 2.33781, "10835": 2.3477, "10840": 2.32993, "10845": 2.38645, "10850": 2.33282, "10855": 2.36654, "10860": 2.33304, "10865": 2.32192, "10870": 2.32311, "10875": 2.30406, "10880": 2.39356, "10885": 2.40455, "10890": 2.36115, "10895": 2.37301, "10900": 2.33176, "10905": 2.31266, "10910": 2.40728, "10915": 2.37119, "10920": 2.37413, "10925": 2.36306, "10930": 2.31881, "10935": 2.36035, "10940": 2.35501, "10945": 2.34689, "10950": 2.36286, "10955": 2.3644, "10960": 2.30987, "10965": 2.3635, "10970": 2.35624, "10975": 2.40775, "10980": 2.37303, "10985": 2.3427, "10990": 2.39729, "10995": 2.36387, "11000": 2.33714, "11005": 2.36117, "11010": 2.34243, "11015": 2.32557, "11020": 2.3346, "11025": 2.36577, "11030": 2.34044, "11035": 2.31307, "11040": 2.31887, "11045": 2.31738, "11050": 2.31805, "11055": 2.28859, "11060": 2.33998, "11065": 2.31013, "11070": 2.39402, "11075": 2.32015, "11080": 2.35427, "11085": 2.33669, "11090": 2.34632, "11095": 2.37084, "11100": 2.32912, "11105": 2.31663, "11110": 2.36288, "11115": 2.37225, "11120": 2.38139, "11125": 2.31341, "11130": 2.34997, "11135": 2.3336, "11140": 2.37217, "11145": 2.35107, "11150": 2.39612, "11155": 2.34114, "11160": 2.3659, "11165": 2.36388, "11170": 2.34098, "11175": 2.33474, "11180": 2.37348, "11185": 2.31203, "11190": 2.27804, "11195": 2.32819, "11200": 2.34726, "11205": 2.36258, "11210": 2.33385, "11215": 2.31927, "11220": 2.34329, "11225": 2.37141, "11230": 2.36569, "11235": 2.32069, "11240": 2.34092, "11245": 2.35748, "11250": 2.3324, "11255": 2.33515, "11260": 2.35577, "11265": 2.38918, "11270": 2.28782, "11275": 2.31519, "11280": 2.36893, "11285": 2.29387, "11290": 2.34639, "11295": 2.3655, "11300": 2.38111, "11305": 2.33495, "11310": 2.32963, "11315": 2.29825, "11320": 2.30482, "11325": 2.31462, "11330": 2.35421, "11335": 2.33831, "11340": 2.30841, "11345": 2.31278, "11350": 2.29588, "11355": 2.3219, "11360": 2.35153, "11365": 2.29378, "11370": 2.35263, "11375": 2.32804, "11380": 2.34006, "11385": 2.34763, "11390": 2.33477, "11395": 2.28732, "11400": 2.30981, "11405": 2.35647, "11410": 2.35502, "11415": 2.38458, "11420": 2.35172, "11425": 2.30761, "11430": 2.36718, "11435": 2.36201, "11440": 2.34796, "11445": 2.36318, "11450": 2.32182, "11455": 2.30476, "11460": 2.35092, "11465": 2.34386, "11470": 2.37434, "11475": 2.31342, "11480": 2.32527, "11485": 2.30987, "11490": 2.34568, "11495": 2.406, "11500": 2.33937, "11505": 2.35014, "11510": 2.36223, "11515": 2.32176, "11520": 2.30507, "11525": 2.36152, "11530": 2.31469, "11535": 2.32196, "11540": 2.34627, "11545": 2.34321, "11550": 2.36438, "11555": 2.32533, "11560": 2.34981, "11565": 2.34125, "11570": 2.34916, "11575": 2.29628, "11580": 2.32931, "11585": 2.35173, "11590": 2.36158, "11595": 2.33454, "11600": 2.35704, "11605": 2.3235, "11610": 2.36089, "11615": 2.35899, "11620": 2.29569, "11625": 2.2757, "11630": 2.32782, "11635": 2.34204, "11640": 2.30488, "11645": 2.30751, "11650": 2.32628, "11655": 2.35114, "11660": 2.33566, "11665": 2.32994, "11670": 2.30002, "11675": 2.29666, "11680": 2.32542, "11685": 2.33637, "11690": 2.34433, "11695": 2.31688, "11700": 2.32535, "11705": 2.3009, "11710": 2.34479, "11715": 2.31575, "11720": 2.29844, "11725": 2.33988, "11730": 2.30403, "11735": 2.32822, "11740": 2.27122, "11745": 2.31714, "11750": 2.32793, "11755": 2.35133, "11760": 2.31357, "11765": 2.3378, "11770": 2.27597, "11775": 2.32591, "11780": 2.25511, "11785": 2.2973, "11790": 2.31403, "11795": 2.32024, "11800": 2.3345, "11805": 2.30403, "11810": 2.30398, "11815": 2.33078, "11820": 2.32015, "11825": 2.36083, "11830": 2.31663, "11835": 2.33741, "11840": 2.34081, "11845": 2.31727, "11850": 2.30496, "11855": 2.31403, "11860": 2.34333, "11865": 2.35836, "11870": 2.37861, "11875": 2.28155, "11880": 2.29163, "11885": 2.33553, "11890": 2.29241, "11895": 2.29059, "11900": 2.33401, "11905": 2.31769, "11910": 2.27783, "11915": 2.31082, "11920": 2.33519, "11925": 2.30272, "11930": 2.30681, "11935": 2.31569, "11940": 2.3175, "11945": 2.34208, "11950": 2.29773, "11955": 2.31327, "11960": 2.33576, "11965": 2.29584, "11970": 2.28204, "11975": 2.33575, "11980": 2.30612, "11985": 2.2776, "11990": 2.30416, "11995": 2.33013, "12000": 2.32323, "12005": 2.32565, "12010": 2.2884, "12015": 2.30861, "12020": 2.32922, "12025": 2.33525, "12030": 2.31246, "12035": 2.33617, "12040": 2.3154, "12045": 2.3126, "12050": 2.30835, "12055": 2.33352, "12060": 2.29764, "12065": 2.32975, "12070": 2.30319, "12075": 2.2775, "12080": 2.35063, "12085": 2.33812, "12090": 2.33359, "12095": 2.28176, "12100": 2.31543, "12105": 2.30903, "12110": 2.33029, "12115": 2.3036, "12120": 2.30606, "12125": 2.29484, "12130": 2.30409, "12135": 2.32842, "12140": 2.29591, "12145": 2.25622, "12150": 2.26125, "12155": 2.34249, "12160": 2.35771, "12165": 2.31914, "12170": 2.3336, "12175": 2.3412, "12180": 2.33054, "12185": 2.34135, "12190": 2.33375, "12195": 2.29767, "12200": 2.30036, "12205": 2.32225, "12210": 2.35697, "12215": 2.30437, "12220": 2.2987, "12225": 2.24241, "12230": 2.33348, "12235": 2.33945, "12240": 2.32345, "12245": 2.28764, "12250": 2.27397, "12255": 2.33706, "12260": 2.31368, "12265": 2.34287, "12270": 2.31292, "12275": 2.31361, "12280": 2.31869, "12285": 2.28631, "12290": 2.31074, "12295": 2.26654, "12300": 2.32931, "12305": 2.26821, "12310": 2.28768, "12315": 2.3543, "12320": 2.2963, "12325": 2.32045, "12330": 2.30113, "12335": 2.3194, "12340": 2.34117, "12345": 2.36885, "12350": 2.34318, "12355": 2.30683, "12360": 2.31344, "12365": 2.32933, "12370": 2.29273, "12375": 2.29957, "12380": 2.29184, "12385": 2.29061, "12390": 2.25018, "12395": 2.30421, "12400": 2.29905, "12405": 2.31088, "12410": 2.30419, "12415": 2.28306, "12420": 2.31729, "12425": 2.30099, "12430": 2.31571, "12435": 2.30048, "12440": 2.33123, "12445": 2.3202, "12450": 2.30745, "12455": 2.24018, "12460": 2.33488, "12465": 2.36363, "12470": 2.27626, "12475": 2.27276, "12480": 2.29139, "12485": 2.30632, "12490": 2.33128, "12495": 2.26961, "12500": 2.32122, "12505": 2.3351, "12510": 2.35582, "12515": 2.27062, "12520": 2.31971, "12525": 2.28653, "12530": 2.32054, "12535": 2.27138, "12540": 2.28491, "12545": 2.29049, "12550": 2.31572, "12555": 2.32333, "12560": 2.30023, "12565": 2.3353, "12570": 2.27829, "12575": 2.29941, "12580": 2.31153, "12585": 2.29201, "12590": 2.33455, "12595": 2.3227, "12600": 2.28167, "12605": 2.31996, "12610": 2.3631, "12615": 2.30567, "12620": 2.33322, "12625": 2.32935, "12630": 2.29885, "12635": 2.33561, "12640": 2.29568, "12645": 2.27902, "12650": 2.32556, "12655": 2.2647, "12660": 2.34199, "12665": 2.31843, "12670": 2.3097, "12675": 2.31886, "12680": 2.27525, "12685": 2.3664, "12690": 2.30452, "12695": 2.33199, "12700": 2.29244, "12705": 2.30628, "12710": 2.30837, "12715": 2.28749, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "num-zeros": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 956236544.0, "5": 967337600.0, "10": 971388224.0, "15": 946439424.0, "20": 961330240.0, "25": 1083876480.0, "30": 1211133312.0, "35": 1297707520.0, "40": 1271785728.0, "45": 1175048064.0, "50": 1126729728.0, "55": 1083975424.0, "60": 1045060608.0, "65": 1026047360.0, "70": 995721280.0, "75": 986257152.0, "80": 1010241664.0, "85": 1006739968.0, "90": 988780736.0, "95": 959700032.0, "100": 971861632.0, "105": 980754624.0, "110": 977222528.0, "115": 978430848.0, "120": 961162432.0, "125": 942469184.0, "130": 977095104.0, "135": 966160128.0, "140": 963476928.0, "145": 976512384.0, "150": 921597184.0, "155": 968134336.0, "160": 956383232.0, "165": 959869952.0, "170": 974372224.0, "175": 949013120.0, "180": 946688448.0, "185": 972006784.0, "190": 969055488.0, "195": 985121664.0, "200": 945774592.0, "205": 958353792.0, "210": 979445248.0, "215": 967478208.0, "220": 956423424.0, "225": 962400768.0, "230": 948177792.0, "235": 965221120.0, "240": 966072192.0, "245": 969161216.0, "250": 974435968.0, "255": 925063296.0, "260": 965635968.0, "265": 970660352.0, "270": 959131264.0, "275": 954001216.0, "280": 963427648.0, "285": 945777408.0, "290": 974124544.0, "295": 966704640.0, "300": 967140096.0, "305": 964514048.0, "310": 940354048.0, "315": 967404800.0, "320": 969006080.0, "325": 980552832.0, "330": 972090752.0, "335": 946865984.0, "340": 966598784.0, "345": 973025856.0, "350": 973918720.0, "355": 963261696.0, "360": 948351680.0, "365": 964821248.0, "370": 962952704.0, "375": 958446848.0, "380": 947153280.0, "385": 955988608.0, "390": 945399616.0, "395": 970423552.0, "400": 979770112.0, "405": 968344320.0, "410": 970058752.0, "415": 953158528.0, "420": 943569920.0, "425": 954774144.0, "430": 962663232.0, "435": 977082240.0, "440": 954811392.0, "445": 971894272.0, "450": 963512576.0, "455": 973134720.0, "460": 983714688.0, "465": 945280512.0, "470": 942055616.0, "475": 967007104.0, "480": 966107264.0, "485": 976414528.0, "490": 962538880.0, "495": 945454464.0, "500": 964454656.0, "505": 986005440.0, "510": 965682944.0, "515": 943411584.0, "520": 945017408.0, "525": 971262208.0, "530": 971890688.0, "535": 979140352.0, "540": 969531264.0, "545": 954116608.0, "550": 951267584.0, "555": 987219456.0, "560": 960428288.0, "565": 966616320.0, "570": 975727488.0, "575": 927224960.0, "580": 970694528.0, "585": 961176064.0, "590": 972967040.0, "595": 963682816.0, "600": 937079168.0, "605": 951470208.0, "610": 963360768.0, "615": 970009728.0, "620": 976472192.0, "625": 949579776.0, "630": 954445504.0, "635": 986042816.0, "640": 980980992.0, "645": 955010560.0, "650": 958545664.0, "655": 951656640.0, "660": 961043712.0, "665": 967552000.0, "670": 962514304.0, "675": 968337536.0, "680": 965619200.0, "685": 962871040.0, "690": 961921088.0, "695": 954770368.0, "700": 970340608.0, "705": 945512640.0, "710": 943885440.0, "715": 973357568.0, "720": 968369856.0, "725": 978489984.0, "730": 952195008.0, "735": 948813952.0, "740": 955633408.0, "745": 975866880.0, "750": 981235072.0, "755": 962156608.0, "760": 951964800.0, "765": 967343616.0, "770": 976148096.0, "775": 970544000.0, "780": 977540928.0, "785": 931529024.0, "790": 960441536.0, "795": 964582016.0, "800": 967022848.0, "805": 962321024.0, "810": 940969344.0, "815": 949037568.0, "820": 953181440.0, "825": 954502400.0, "830": 976442240.0, "835": 956073344.0, "840": 948401920.0, "845": 965153024.0, "850": 966029248.0, "855": 960904384.0, "860": 976027200.0, "865": 938157824.0, "870": 966414016.0, "875": 972314880.0, "880": 963120896.0, "885": 967745600.0, "890": 949967872.0, "895": 960019072.0, "900": 974229696.0, "905": 963968256.0, "910": 958435072.0, "915": 956354560.0, "920": 943974592.0, "925": 960833728.0, "930": 978845952.0, "935": 971073664.0, "940": 960905792.0, "945": 945063040.0, "950": 957423360.0, "955": 979035520.0, "960": 983589248.0, "965": 966165824.0, "970": 951228672.0, "975": 961577344.0, "980": 968071040.0, "985": 968991872.0, "990": 984393024.0, "995": 953291264.0, "1000": 934780480.0, "1005": 960147328.0, "1010": 971538624.0, "1015": 985184896.0, "1020": 962780928.0, "1025": 935009408.0, "1030": 974679936.0, "1035": 964992384.0, "1040": 980464256.0, "1045": 960826496.0, "1050": 955197824.0, "1055": 957780352.0, "1060": 967748800.0, "1065": 967116352.0, "1070": 966600064.0, "1075": 950061696.0, "1080": 954508544.0, "1085": 967251712.0, "1090": 977132800.0, "1095": 961237632.0, "1100": 979613568.0, "1105": 953365120.0, "1110": 965954176.0, "1115": 966986944.0, "1120": 970350592.0, "1125": 965707776.0, "1130": 954942400.0, "1135": 965843328.0, "1140": 965176384.0, "1145": 970988224.0, "1150": 955556864.0, "1155": 930578432.0, "1160": 957774208.0, "1165": 978124736.0, "1170": 974299520.0, "1175": 973059648.0, "1180": 973083648.0, "1185": 947344640.0, "1190": 964793216.0, "1195": 953138560.0, "1200": 972843136.0, "1205": 988478656.0, "1210": 931126784.0, "1215": 968647040.0, "1220": 969160960.0, "1225": 975950656.0, "1230": 967331712.0, "1235": 943446912.0, "1240": 955853952.0, "1245": 981503488.0, "1250": 966111808.0, "1255": 973676032.0, "1260": 946497280.0, "1265": 963997824.0, "1270": 960489024.0, "1275": 973615104.0, "1280": 961112576.0, "1285": 957580480.0, "1290": 952528768.0, "1295": 971610240.0, "1300": 968862464.0, "1305": 963739136.0, "1310": 963334656.0, "1315": 943553408.0, "1320": 966307200.0, "1325": 989784960.0, "1330": 969508992.0, "1335": 972302464.0, "1340": 972269440.0, "1345": 960658304.0, "1350": 968639296.0, "1355": 955853312.0, "1360": 971822144.0, "1365": 960387584.0, "1370": 948791872.0, "1375": 973533376.0, "1380": 953470208.0, "1385": 969146880.0, "1390": 975720640.0, "1395": 931673984.0, "1400": 945854848.0, "1405": 976753536.0, "1410": 974510336.0, "1415": 967573760.0, "1420": 966747328.0, "1425": 937378560.0, "1430": 973916608.0, "1435": 978335552.0, "1440": 964178304.0, "1445": 958058240.0, "1450": 946147712.0, "1455": 983922304.0, "1460": 968651136.0, "1465": 948745152.0, "1470": 984243328.0, "1475": 943906048.0, "1480": 963975488.0, "1485": 957349376.0, "1490": 961261888.0, "1495": 980539648.0, "1500": 958332032.0, "1505": 942866816.0, "1510": 984180096.0, "1515": 959094528.0, "1520": 959105408.0, "1525": 952786816.0, "1530": 957741312.0, "1535": 949428928.0, "1540": 971088256.0, "1545": 963132352.0, "1550": 978666752.0, "1555": 952320512.0, "1560": 980089984.0, "1565": 967314048.0, "1570": 973844352.0, "1575": 975494912.0, "1580": 941862656.0, "1585": 970028352.0, "1590": 983822208.0, "1595": 948631616.0, "1600": 967442560.0, "1605": 952451328.0, "1610": 969616512.0, "1615": 983146496.0, "1620": 968019200.0, "1625": 970715776.0, "1630": 962887360.0, "1635": 942311936.0, "1640": 981612224.0, "1645": 973977856.0, "1650": 974188224.0, "1655": 967265024.0, "1660": 940687744.0, "1665": 961704448.0, "1670": 962902016.0, "1675": 971280896.0, "1680": 980879232.0, "1685": 944416192.0, "1690": 964688128.0, "1695": 965644992.0, "1700": 966342336.0, "1705": 985200000.0, "1710": 978354304.0, "1715": 943210880.0, "1720": 977089408.0, "1725": 965870208.0, "1730": 968968960.0, "1735": 965088000.0, "1740": 949713280.0, "1745": 970012352.0, "1750": 959681728.0, "1755": 960085440.0, "1760": 966381376.0, "1765": 951816192.0, "1770": 954665728.0, "1775": 973752064.0, "1780": 970534272.0, "1785": 968824960.0, "1790": 950235520.0, "1795": 945131072.0, "1800": 984666816.0, "1805": 987163520.0, "1810": 977766656.0, "1815": 948004480.0, "1820": 949209216.0, "1825": 978853632.0, "1830": 966362368.0, "1835": 964133632.0, "1840": 972320128.0, "1845": 935415808.0, "1850": 952497792.0, "1855": 980048640.0, "1860": 975866880.0, "1865": 958966528.0, "1870": 958949056.0, "1875": 932593408.0, "1880": 973574016.0, "1885": 978843264.0, "1890": 971358720.0, "1895": 959212288.0, "1900": 947394432.0, "1905": 981829952.0, "1910": 969126912.0, "1915": 970040704.0, "1920": 975597056.0, "1925": 960496512.0, "1930": 977922304.0, "1935": 963250432.0, "1940": 952460928.0, "1945": 981338176.0, "1950": 939172864.0, "1955": 960604416.0, "1960": 970031744.0, "1965": 981176000.0, "1970": 962045120.0, "1975": 952822016.0, "1980": 936847360.0, "1985": 975938432.0, "1990": 965965696.0, "1995": 962609920.0, "2000": 960553984.0, "2005": 954497728.0, "2010": 975579776.0, "2015": 991802112.0, "2020": 975433408.0, "2025": 974303936.0, "2030": 952084736.0, "2035": 967847680.0, "2040": 987457536.0, "2045": 976480064.0, "2050": 984702464.0, "2055": 942839488.0, "2060": 942593920.0, "2065": 966208768.0, "2070": 969622528.0, "2075": 980553536.0, "2080": 977598080.0, "2085": 939635968.0, "2090": 969872256.0, "2095": 961274880.0, "2100": 976719168.0, "2105": 972537920.0, "2110": 959901568.0, "2115": 956875264.0, "2120": 977482304.0, "2125": 962566784.0, "2130": 979618496.0, "2135": 950537408.0, "2140": 946996544.0, "2145": 962273920.0, "2150": 973404416.0, "2155": 972690944.0, "2160": 970314560.0, "2165": 948644160.0, "2170": 961541696.0, "2175": 969377216.0, "2180": 969329920.0, "2185": 947446592.0, "2190": 940480960.0, "2195": 986085952.0, "2200": 961861248.0, "2205": 978924672.0, "2210": 964100864.0, "2215": 963502336.0, "2220": 951311104.0, "2225": 969315776.0, "2230": 976331328.0, "2235": 974025920.0, "2240": 975493888.0, "2245": 960230784.0, "2250": 967640192.0, "2255": 969129984.0, "2260": 975065024.0, "2265": 968258688.0, "2270": 951744768.0, "2275": 962766848.0, "2280": 969640064.0, "2285": 971692992.0, "2290": 962889344.0, "2295": 931409280.0, "2300": 959906048.0, "2305": 970426560.0, "2310": 967444864.0, "2315": 970905792.0, "2320": 975590848.0, "2325": 938587264.0, "2330": 988438528.0, "2335": 977489408.0, "2340": 964596352.0, "2345": 964166528.0, "2350": 947555712.0, "2355": 977029568.0, "2360": 966899072.0, "2365": 977297728.0, "2370": 965072640.0, "2375": 953966272.0, "2380": 962918912.0, "2385": 967194496.0, "2390": 963077248.0, "2395": 974465792.0, "2400": 958410816.0, "2405": 968119552.0, "2410": 951586112.0, "2415": 965904256.0, "2420": 966516160.0, "2425": 959045632.0, "2430": 956685952.0, "2435": 961389184.0, "2440": 959755904.0, "2445": 970891392.0, "2450": 961996736.0, "2455": 922721216.0, "2460": 951953536.0, "2465": 955730432.0, "2470": 972570496.0, "2475": 973812992.0, "2480": 943895296.0, "2485": 944184064.0, "2490": 972411136.0, "2495": 974451712.0, "2500": 973910080.0, "2505": 958492032.0, "2510": 939510912.0, "2515": 979553728.0, "2520": 970473792.0, "2525": 964390784.0, "2530": 955799168.0, "2535": 936598144.0, "2540": 969027648.0, "2545": 970385024.0, "2550": 969462528.0, "2555": 969439040.0, "2560": 964978304.0, "2565": 959763712.0, "2570": 985176704.0, "2575": 957426112.0, "2580": 967424512.0, "2585": 966022400.0, "2590": 956354944.0, "2595": 981830400.0, "2600": 959530880.0, "2605": 962999168.0, "2610": 965972864.0, "2615": 951924992.0, "2620": 971241216.0, "2625": 976456064.0, "2630": 974409984.0, "2635": 948071296.0, "2640": 948137088.0, "2645": 963036736.0, "2650": 953982912.0, "2655": 977112448.0, "2660": 949622976.0, "2665": 953929024.0, "2670": 959064064.0, "2675": 979275904.0, "2680": 961395840.0, "2685": 970701952.0, "2690": 965222016.0, "2695": 943553536.0, "2700": 969425664.0, "2705": 978961792.0, "2710": 971810560.0, "2715": 990813952.0, "2720": 942649152.0, "2725": 967955328.0, "2730": 955466496.0, "2735": 970672704.0, "2740": 977921536.0, "2745": 932280000.0, "2750": 947856384.0, "2755": 956317184.0, "2760": 981697664.0, "2765": 966112192.0, "2770": 948914688.0, "2775": 935830272.0, "2780": 964777088.0, "2785": 969570176.0, "2790": 974273152.0, "2795": 966886144.0, "2800": 944388096.0, "2805": 964353920.0, "2810": 969610752.0, "2815": 975845248.0, "2820": 963081600.0, "2825": 937627392.0, "2830": 956738368.0, "2835": 986321024.0, "2840": 961756672.0, "2845": 967505920.0, "2850": 951714816.0, "2855": 962091520.0, "2860": 954242304.0, "2865": 955881216.0, "2870": 944662848.0, "2875": 974662784.0, "2880": 968199936.0, "2885": 981081984.0, "2890": 953454080.0, "2895": 957178304.0, "2900": 964989440.0, "2905": 931707648.0, "2910": 955730688.0, "2915": 979477120.0, "2920": 970492544.0, "2925": 964975680.0, "2930": 964046592.0, "2935": 940140416.0, "2940": 964912384.0, "2945": 989149952.0, "2950": 965209088.0, "2955": 965104256.0, "2960": 933161472.0, "2965": 968794496.0, "2970": 973034688.0, "2975": 958092288.0, "2980": 964497280.0, "2985": 937267584.0, "2990": 951254720.0, "2995": 978315264.0, "3000": 969275392.0, "3005": 974686080.0, "3010": 950235008.0, "3015": 943841024.0, "3020": 958440960.0, "3025": 975186560.0, "3030": 965018624.0, "3035": 963454464.0, "3040": 952131776.0, "3045": 989793472.0, "3050": 965545728.0, "3055": 982520704.0, "3060": 971227712.0, "3065": 943915648.0, "3070": 978409024.0, "3075": 975204992.0, "3080": 960992896.0, "3085": 962352128.0, "3090": 945953664.0, "3095": 938116032.0, "3100": 972929088.0, "3105": 961989120.0, "3110": 970657152.0, "3115": 963390528.0, "3120": 947116032.0, "3125": 972720640.0, "3130": 952973312.0, "3135": 966041920.0, "3140": 968488896.0, "3145": 937852288.0, "3150": 975009280.0, "3155": 976815488.0, "3160": 969629184.0, "3165": 982194944.0, "3170": 937959936.0, "3175": 953825280.0, "3180": 983810176.0, "3185": 965169536.0, "3190": 968482880.0, "3195": 950933888.0, "3200": 945101440.0, "3205": 959863232.0, "3210": 957486336.0, "3215": 958020096.0, "3220": 968129792.0, "3225": 935614144.0, "3230": 962588672.0, "3235": 975775488.0, "3240": 962621440.0, "3245": 981274368.0, "3250": 943260544.0, "3255": 954599424.0, "3260": 980363648.0, "3265": 963619968.0, "3270": 965162624.0, "3275": 959731072.0, "3280": 967046720.0, "3285": 982478144.0, "3290": 947689408.0, "3295": 966419840.0, "3300": 959165632.0, "3305": 949131008.0, "3310": 979510144.0, "3315": 964283008.0, "3320": 969207296.0, "3325": 956193280.0, "3330": 941167104.0, "3335": 964973184.0, "3340": 956900736.0, "3345": 972500224.0, "3350": 964576192.0, "3355": 943346176.0, "3360": 970037632.0, "3365": 969453952.0, "3370": 954774976.0, "3375": 958676288.0, "3380": 971463168.0, "3385": 947973376.0, "3390": 965793024.0, "3395": 978390016.0, "3400": 978127360.0, "3405": 976724032.0, "3410": 924193664.0, "3415": 955424256.0, "3420": 971820416.0, "3425": 977167488.0, "3430": 973837696.0, "3435": 936071680.0, "3440": 970502464.0, "3445": 957303232.0, "3450": 959840896.0, "3455": 963864256.0, "3460": 967881856.0, "3465": 931318976.0, "3470": 952348224.0, "3475": 973704384.0, "3480": 959738112.0, "3485": 979960640.0, "3490": 944670592.0, "3495": 953904576.0, "3500": 969330432.0, "3505": 964348416.0, "3510": 971222656.0, "3515": 955946112.0, "3520": 958733120.0, "3525": 971914240.0, "3530": 964124672.0, "3535": 983192192.0, "3540": 937489664.0, "3545": 944730496.0, "3550": 984462784.0, "3555": 978057984.0, "3560": 974372992.0, "3565": 968800128.0, "3570": 946694528.0, "3575": 976104640.0, "3580": 977494272.0, "3585": 954568832.0, "3590": 956425856.0, "3595": 951444800.0, "3600": 988996800.0, "3605": 962008448.0, "3610": 965055104.0, "3615": 974635648.0, "3620": 954888384.0, "3625": 939515392.0, "3630": 990148736.0, "3635": 971435712.0, "3640": 976025856.0, "3645": 961489664.0, "3650": 945804160.0, "3655": 965782784.0, "3660": 976208000.0, "3665": 964021248.0, "3670": 977431936.0, "3675": 943476096.0, "3680": 958185152.0, "3685": 964285312.0, "3690": 982093952.0, "3695": 963125248.0, "3700": 950570176.0, "3705": 947345792.0, "3710": 982355328.0, "3715": 972682240.0, "3720": 976138624.0, "3725": 964038272.0, "3730": 948851328.0, "3735": 967079424.0, "3740": 960973568.0, "3745": 969334272.0, "3750": 963949056.0, "3755": 953432832.0, "3760": 976641920.0, "3765": 979841280.0, "3770": 972360320.0, "3775": 972374720.0, "3780": 952585664.0, "3785": 960236800.0, "3790": 985598848.0, "3795": 969195392.0, "3800": 957877504.0, "3805": 972408192.0, "3810": 954517952.0, "3815": 974566528.0, "3820": 963019648.0, "3825": 962070528.0, "3830": 969394304.0, "3835": 934744960.0, "3840": 971255808.0, "3845": 986849792.0, "3850": 968873536.0, "3855": 965248640.0, "3860": 948057600.0, "3865": 975049216.0, "3870": 985098432.0, "3875": 983043072.0, "3880": 963615616.0, "3885": 953012224.0, "3890": 960296960.0, "3895": 960591616.0, "3900": 984926464.0, "3905": 976222592.0, "3910": 987360512.0, "3915": 946017792.0, "3920": 974867328.0, "3925": 961248384.0, "3930": 976790208.0, "3935": 978925824.0, "3940": 950292672.0, "3945": 960260864.0, "3950": 974185152.0, "3955": 972967808.0, "3960": 974078848.0, "3965": 950861696.0, "3970": 980692992.0, "3975": 960750208.0, "3980": 977519808.0, "3985": 962949440.0, "3990": 972755328.0, "3995": 953714176.0, "4000": 975011968.0, "4005": 971658752.0, "4010": 978420992.0, "4015": 971491968.0, "4020": 950311040.0, "4025": 968433024.0, "4030": 997935360.0, "4035": 978548864.0, "4040": 959813248.0, "4045": 939664448.0, "4050": 944718720.0, "4055": 980958720.0, "4060": 977682816.0, "4065": 975706624.0, "4070": 942152832.0, "4075": 945765376.0, "4080": 988765632.0, "4085": 962079872.0, "4090": 983356736.0, "4095": 986958720.0, "4100": 957224448.0, "4105": 954073984.0, "4110": 966488192.0, "4115": 976042432.0, "4120": 983531776.0, "4125": 960054336.0, "4130": 967278976.0, "4135": 971431296.0, "4140": 963171200.0, "4145": 956175616.0, "4150": 960332672.0, "4155": 946216576.0, "4160": 968451328.0, "4165": 970327424.0, "4170": 971946880.0, "4175": 955862272.0, "4180": 940997632.0, "4185": 968318592.0, "4190": 968003712.0, "4195": 989223168.0, "4200": 962678912.0, "4205": 960595072.0, "4210": 971851136.0, "4215": 974145536.0, "4220": 981144704.0, "4225": 975196160.0, "4230": 952703104.0, "4235": 958493312.0, "4240": 966824448.0, "4245": 961756800.0, "4250": 965845632.0, "4255": 958268032.0, "4260": 949545344.0, "4265": 964137856.0, "4270": 978408896.0, "4275": 975397504.0, "4280": 962672064.0, "4285": 951451136.0, "4290": 980029952.0, "4295": 968842816.0, "4300": 958299456.0, "4305": 966802048.0, "4310": 939613376.0, "4315": 949416704.0, "4320": 984522240.0, "4325": 982547712.0, "4330": 974746240.0, "4335": 949459840.0, "4340": 959478400.0, "4345": 956644096.0, "4350": 979960320.0, "4355": 968869888.0, "4360": 966313728.0, "4365": 941208704.0, "4370": 969495552.0, "4375": 972982336.0, "4380": 966147840.0, "4385": 972060544.0, "4390": 954141120.0, "4395": 951856512.0, "4400": 973629184.0, "4405": 972441728.0, "4410": 967908480.0, "4415": 958935232.0, "4420": 960777856.0, "4425": 976416000.0, "4430": 965905664.0, "4435": 975864704.0, "4440": 962282496.0, "4445": 954799872.0, "4450": 978294912.0, "4455": 960203776.0, "4460": 968579008.0, "4465": 968805760.0, "4470": 944127232.0, "4475": 951973056.0, "4480": 978824832.0, "4485": 968265728.0, "4490": 956975616.0, "4495": 938925248.0, "4500": 953215488.0, "4505": 977129344.0, "4510": 978700416.0, "4515": 962493568.0, "4520": 958825024.0, "4525": 958101760.0, "4530": 964428608.0, "4535": 976641024.0, "4540": 976846784.0, "4545": 970169408.0, "4550": 953223680.0, "4555": 959591040.0, "4560": 972497920.0, "4565": 973442560.0, "4570": 978909568.0, "4575": 957760000.0, "4580": 963142016.0, "4585": 957381888.0, "4590": 986530176.0, "4595": 960189824.0, "4600": 952267648.0, "4605": 959297664.0, "4610": 963698304.0, "4615": 957950912.0, "4620": 960255936.0, "4625": 973786624.0, "4630": 944507648.0, "4635": 977024896.0, "4640": 960310272.0, "4645": 981993856.0, "4650": 962400128.0, "4655": 939457792.0, "4660": 964001280.0, "4665": 962540544.0, "4670": 976671872.0, "4675": 963514432.0, "4680": 957488448.0, "4685": 949674432.0, "4690": 956962240.0, "4695": 969926912.0, "4700": 961214016.0, "4705": 970781312.0, "4710": 934493248.0, "4715": 970475008.0, "4720": 966361920.0, "4725": 980303616.0, "4730": 965826240.0, "4735": 937871360.0, "4740": 960124288.0, "4745": 975997376.0, "4750": 967957760.0, "4755": 984959744.0, "4760": 959123648.0, "4765": 955261056.0, "4770": 958621440.0, "4775": 991092608.0, "4780": 976856000.0, "4785": 967582336.0, "4790": 943756160.0, "4795": 955861760.0, "4800": 967745984.0, "4805": 976565888.0, "4810": 965160448.0, "4815": 957969408.0, "4820": 973993216.0, "4825": 961505920.0, "4830": 962638336.0, "4835": 972543936.0, "4840": 948913920.0, "4845": 965716608.0, "4850": 960305024.0, "4855": 964130624.0, "4860": 963051008.0, "4865": 967532352.0, "4870": 957207424.0, "4875": 983574528.0, "4880": 957085120.0, "4885": 977052480.0, "4890": 959740928.0, "4895": 942152192.0, "4900": 973700352.0, "4905": 975213952.0, "4910": 969220608.0, "4915": 970053248.0, "4920": 941169024.0, "4925": 954809344.0, "4930": 977034624.0, "4935": 963750016.0, "4940": 972586496.0, "4945": 960056448.0, "4950": 940793856.0, "4955": 968036480.0, "4960": 976800640.0, "4965": 961022464.0, "4970": 958665472.0, "4975": 933775168.0, "4980": 960824704.0, "4985": 963012160.0, "4990": 963588736.0, "4995": 986307968.0, "5000": 940776000.0, "5005": 968887552.0, "5010": 970307776.0, "5015": 965233408.0, "5020": 966702336.0, "5025": 949471744.0, "5030": 953490560.0, "5035": 967352704.0, "5040": 955689856.0, "5045": 969129856.0, "5050": 953417984.0, "5055": 954811520.0, "5060": 963029248.0, "5065": 952195072.0, "5070": 973597056.0, "5075": 978581632.0, "5080": 942828736.0, "5085": 965863040.0, "5090": 972857088.0, "5095": 964397952.0, "5100": 958318016.0, "5105": 965353728.0, "5110": 950401920.0, "5115": 972347520.0, "5120": 960424960.0, "5125": 969755008.0, "5130": 938795456.0, "5135": 943659008.0, "5140": 969888896.0, "5145": 968688768.0, "5150": 970601216.0, "5155": 972635008.0, "5160": 926551872.0, "5165": 961591552.0, "5170": 966873472.0, "5175": 966086400.0, "5180": 963656192.0, "5185": 930802688.0, "5190": 949852992.0, "5195": 972422016.0, "5200": 973758656.0, "5205": 968249664.0, "5210": 960527936.0, "5215": 928815680.0, "5220": 979174208.0, "5225": 984780416.0, "5230": 975060672.0, "5235": 975055232.0, "5240": 944314112.0, "5245": 970834048.0, "5250": 972426624.0, "5255": 966895296.0, "5260": 976679744.0, "5265": 942270592.0, "5270": 969202176.0, "5275": 970073344.0, "5280": 962825728.0, "5285": 964072064.0, "5290": 932501824.0, "5295": 951762944.0, "5300": 975588288.0, "5305": 951853504.0, "5310": 968040960.0, "5315": 955817472.0, "5320": 950921984.0, "5325": 973040384.0, "5330": 967843264.0, "5335": 967536384.0, "5340": 966503424.0, "5345": 962961344.0, "5350": 978937216.0, "5355": 972202560.0, "5360": 963892288.0, "5365": 965246464.0, "5370": 947824896.0, "5375": 948820864.0, "5380": 967210752.0, "5385": 980540416.0, "5390": 965312832.0, "5395": 955160000.0, "5400": 948286464.0, "5405": 974365952.0, "5410": 967844224.0, "5415": 976074688.0, "5420": 967431680.0, "5425": 937412096.0, "5430": 963919744.0, "5435": 971948544.0, "5440": 969128192.0, "5445": 957501184.0, "5450": 919431040.0, "5455": 952052800.0, "5460": 962315264.0, "5465": 978905088.0, "5470": 981031104.0, "5475": 941610304.0, "5480": 955755072.0, "5485": 964902912.0, "5490": 976036992.0, "5495": 962805120.0, "5500": 971218240.0, "5505": 957001728.0, "5510": 968610432.0, "5515": 945466624.0, "5520": 963181952.0, "5525": 975932480.0, "5530": 936635520.0, "5535": 970693824.0, "5540": 960295936.0, "5545": 972084800.0, "5550": 967898240.0, "5555": 955968960.0, "5560": 954520832.0, "5565": 968862848.0, "5570": 945186112.0, "5575": 960539584.0, "5580": 960563456.0, "5585": 959470720.0, "5590": 977668224.0, "5595": 975194496.0, "5600": 963009472.0, "5605": 964201216.0, "5610": 943157760.0, "5615": 966515904.0, "5620": 963224448.0, "5625": 982398656.0, "5630": 976073984.0, "5635": 957347520.0, "5640": 951455488.0, "5645": 967805568.0, "5650": 979181056.0, "5655": 983510912.0, "5660": 956493952.0, "5665": 953574656.0, "5670": 966097408.0, "5675": 967701184.0, "5680": 978598848.0, "5685": 962008576.0, "5690": 935908928.0, "5695": 963739648.0, "5700": 952464384.0, "5705": 974525376.0, "5710": 971341376.0, "5715": 946147648.0, "5720": 974991360.0, "5725": 967473664.0, "5730": 978561792.0, "5735": 964979712.0, "5740": 943573056.0, "5745": 971142016.0, "5750": 981951168.0, "5755": 956754944.0, "5760": 963695168.0, "5765": 957707648.0, "5770": 955747584.0, "5775": 970847104.0, "5780": 962811840.0, "5785": 970673664.0, "5790": 974652672.0, "5795": 949802368.0, "5800": 965999232.0, "5805": 968774272.0, "5810": 975986176.0, "5815": 970103936.0, "5820": 936377408.0, "5825": 969266816.0, "5830": 977614464.0, "5835": 974980224.0, "5840": 963061120.0, "5845": 968774464.0, "5850": 942897536.0, "5855": 975999104.0, "5860": 979537600.0, "5865": 978369280.0, "5870": 968714112.0, "5875": 942328320.0, "5880": 964508224.0, "5885": 974806656.0, "5890": 972671104.0, "5895": 965681920.0, "5900": 941482880.0, "5905": 961766528.0, "5910": 958568832.0, "5915": 968174464.0, "5920": 977468032.0, "5925": 959468800.0, "5930": 946750080.0, "5935": 952334656.0, "5940": 977662144.0, "5945": 984818560.0, "5950": 980689536.0, "5955": 935094464.0, "5960": 961735296.0, "5965": 965938176.0, "5970": 970612096.0, "5975": 961933888.0, "5980": 958344832.0, "5985": 964562816.0, "5990": 973676288.0, "5995": 955919488.0, "6000": 955633216.0, "6005": 961366784.0, "6010": 952707072.0, "6015": 974539328.0, "6020": 978265920.0, "6025": 972226688.0, "6030": 955311744.0, "6035": 946954368.0, "6040": 962641920.0, "6045": 983743552.0, "6050": 956515200.0, "6055": 963445888.0, "6060": 945767488.0, "6065": 958443776.0, "6070": 978390272.0, "6075": 977988416.0, "6080": 957530304.0, "6085": 947641408.0, "6090": 953645632.0, "6095": 964770560.0, "6100": 979886720.0, "6105": 971011904.0, "6110": 961816128.0, "6115": 943814016.0, "6120": 968573824.0, "6125": 960703232.0, "6130": 984040320.0, "6135": 960994432.0, "6140": 958720704.0, "6145": 971225728.0, "6150": 968493312.0, "6155": 974930688.0, "6160": 977216128.0, "6165": 952742848.0, "6170": 951129728.0, "6175": 963358592.0, "6180": 969549568.0, "6185": 966320320.0, "6190": 963731264.0, "6195": 947266752.0, "6200": 969378240.0, "6205": 967161728.0, "6210": 959339264.0, "6215": 973147776.0, "6220": 936374912.0, "6225": 978715520.0, "6230": 976146816.0, "6235": 971740992.0, "6240": 966006336.0, "6245": 956249728.0, "6250": 956449920.0, "6255": 973363584.0, "6260": 978804800.0, "6265": 974941952.0, "6270": 958855808.0, "6275": 963713152.0, "6280": 973141120.0, "6285": 966123136.0, "6290": 971040256.0, "6295": 987490560.0, "6300": 947640064.0, "6305": 964795456.0, "6310": 979017216.0, "6315": 978443392.0, "6320": 971724672.0, "6325": 923002624.0, "6330": 959366784.0, "6335": 974940864.0, "6340": 984824576.0, "6345": 966835456.0, "6350": 944574016.0, "6355": 957990272.0, "6360": 972711552.0, "6365": 972207744.0, "6370": 958966784.0, "6375": 967142144.0, "6380": 951399104.0, "6385": 973584896.0, "6390": 965514880.0, "6395": 975032064.0, "6400": 983945472.0, "6405": 944065408.0, "6410": 977178496.0, "6415": 971635776.0, "6420": 956726592.0, "6425": 960937728.0, "6430": 957525120.0, "6435": 960333440.0, "6440": 968713088.0, "6445": 973479168.0, "6450": 974637056.0, "6455": 962142208.0, "6460": 940994496.0, "6465": 974482944.0, "6470": 979911936.0, "6475": 960847808.0, "6480": 967532032.0, "6485": 948559616.0, "6490": 970748032.0, "6495": 988369024.0, "6500": 980468864.0, "6505": 972158336.0, "6510": 951648576.0, "6515": 957565440.0, "6520": 979061952.0, "6525": 978903424.0, "6530": 973271744.0, "6535": 967829056.0, "6540": 950159040.0, "6545": 966294144.0, "6550": 979335168.0, "6555": 967119872.0, "6560": 975391104.0, "6565": 949645696.0, "6570": 952068224.0, "6575": 962553728.0, "6580": 975679424.0, "6585": 979544832.0, "6590": 949212544.0, "6595": 961471616.0, "6600": 961353856.0, "6605": 961755520.0, "6610": 985212480.0, "6615": 959518336.0, "6620": 944576256.0, "6625": 971028736.0, "6630": 971564928.0, "6635": 964103936.0, "6640": 959857152.0, "6645": 951077504.0, "6650": 978674944.0, "6655": 965949440.0, "6660": 968814080.0, "6665": 969002112.0, "6670": 932999424.0, "6675": 970736128.0, "6680": 969016064.0, "6685": 958784384.0, "6690": 956215552.0, "6695": 955745920.0, "6700": 962135936.0, "6705": 979365824.0, "6710": 971098240.0, "6715": 966874944.0, "6720": 974162048.0, "6725": 941768192.0, "6730": 979399488.0, "6735": 994709376.0, "6740": 976356224.0, "6745": 974602752.0, "6750": 939272320.0, "6755": 977649344.0, "6760": 969757888.0, "6765": 978454848.0, "6770": 975513728.0, "6775": 943523520.0, "6780": 947283584.0, "6785": 975371712.0, "6790": 960607104.0, "6795": 976217984.0, "6800": 973344640.0, "6805": 946806016.0, "6810": 958265856.0, "6815": 970929792.0, "6820": 978086528.0, "6825": 969192704.0, "6830": 950435072.0, "6835": 981464192.0, "6840": 983022336.0, "6845": 948763840.0, "6850": 965465152.0, "6855": 954199552.0, "6860": 979123968.0, "6865": 983975808.0, "6870": 964842560.0, "6875": 978847808.0, "6880": 950371200.0, "6885": 958582016.0, "6890": 960484032.0, "6895": 965665280.0, "6900": 985370880.0, "6905": 968478592.0, "6910": 950097088.0, "6915": 971060736.0, "6920": 967166720.0, "6925": 965180672.0, "6930": 964715648.0, "6935": 952122112.0, "6940": 962920704.0, "6945": 986470144.0, "6950": 973350272.0, "6955": 964715136.0, "6960": 940248960.0, "6965": 974503680.0, "6970": 978554240.0, "6975": 985114880.0, "6980": 982851072.0, "6985": 959949376.0, "6990": 945298944.0, "6995": 987557120.0, "7000": 963329344.0, "7005": 962922240.0, "7010": 985144320.0, "7015": 945447424.0, "7020": 982884608.0, "7025": 968840640.0, "7030": 953537472.0, "7035": 982810432.0, "7040": 950520320.0, "7045": 956041600.0, "7050": 960403712.0, "7055": 963929728.0, "7060": 976999040.0, "7065": 968391296.0, "7070": 953547264.0, "7075": 956559360.0, "7080": 969124864.0, "7085": 965868800.0, "7090": 969521920.0, "7095": 960078592.0, "7100": 973651200.0, "7105": 973332672.0, "7110": 970254848.0, "7115": 958633088.0, "7120": 948865536.0, "7125": 963163584.0, "7130": 971421376.0, "7135": 964540096.0, "7140": 961681152.0, "7145": 930416448.0, "7150": 946280064.0, "7155": 991092864.0, "7160": 968442496.0, "7165": 956888320.0, "7170": 968275328.0, "7175": 955828224.0, "7180": 958441536.0, "7185": 984880256.0, "7190": 978735936.0, "7195": 973712000.0, "7200": 935905536.0, "7205": 957525760.0, "7210": 967114624.0, "7215": 969707264.0, "7220": 982219584.0, "7225": 928936768.0, "7230": 949560960.0, "7235": 967251712.0, "7240": 966968064.0, "7245": 967454976.0, "7250": 949502336.0, "7255": 957363968.0, "7260": 970114816.0, "7265": 974961664.0, "7270": 959874240.0, "7275": 959286784.0, "7280": 957121920.0, "7285": 977575808.0, "7290": 977249920.0, "7295": 962901120.0, "7300": 975499904.0, "7305": 964022528.0, "7310": 977515520.0, "7315": 966809600.0, "7320": 974552768.0, "7325": 966906752.0, "7330": 959690880.0, "7335": 964049280.0, "7340": 977556864.0, "7345": 967668224.0, "7350": 984804864.0, "7355": 959674816.0, "7360": 948842240.0, "7365": 972772864.0, "7370": 982593664.0, "7375": 963567424.0, "7380": 964284224.0, "7385": 948615488.0, "7390": 964192512.0, "7395": 958787008.0, "7400": 970242816.0, "7405": 988116736.0, "7410": 952423488.0, "7415": 950935744.0, "7420": 967472640.0, "7425": 982705664.0, "7430": 965871552.0, "7435": 973294080.0, "7440": 937228160.0, "7445": 969074752.0, "7450": 980608832.0, "7455": 971587712.0, "7460": 972749056.0, "7465": 939573760.0, "7470": 972011648.0, "7475": 958500480.0, "7480": 969529792.0, "7485": 961637568.0, "7490": 934760704.0, "7495": 957297216.0, "7500": 969548416.0, "7505": 970380928.0, "7510": 972589184.0, "7515": 979619840.0, "7520": 951796224.0, "7525": 970763840.0, "7530": 954655104.0, "7535": 971887616.0, "7540": 979952832.0, "7545": 959493248.0, "7550": 960599936.0, "7555": 960564352.0, "7560": 970209920.0, "7565": 955267200.0, "7570": 942667904.0, "7575": 966060032.0, "7580": 982740480.0, "7585": 979190784.0, "7590": 970155264.0, "7595": 950188416.0, "7600": 946546432.0, "7605": 982722432.0, "7610": 969487360.0, "7615": 988970624.0, "7620": 957136000.0, "7625": 941577856.0, "7630": 971809152.0, "7635": 984756608.0, "7640": 983937792.0, "7645": 968197120.0, "7650": 959357504.0, "7655": 962682368.0, "7660": 969198976.0, "7665": 978073088.0, "7670": 975279104.0, "7675": 975891840.0, "7680": 943167616.0, "7685": 960475136.0, "7690": 975843968.0, "7695": 982086400.0, "7700": 979923648.0, "7705": 940595776.0, "7710": 974811648.0, "7715": 979946496.0, "7720": 968216448.0, "7725": 960576640.0, "7730": 943583104.0, "7735": 968598400.0, "7740": 980697600.0, "7745": 964667008.0, "7750": 963965568.0, "7755": 960341056.0, "7760": 970667072.0, "7765": 971220096.0, "7770": 962730624.0, "7775": 981588800.0, "7780": 965073280.0, "7785": 959776384.0, "7790": 968147968.0, "7795": 969118208.0, "7800": 971586880.0, "7805": 968712128.0, "7810": 946156608.0, "7815": 963601664.0, "7820": 974369664.0, "7825": 963930944.0, "7830": 957420864.0, "7835": 949820864.0, "7840": 957576448.0, "7845": 954299264.0, "7850": 980140416.0, "7855": 987100288.0, "7860": 947203712.0, "7865": 949597632.0, "7870": 965653760.0, "7875": 976341632.0, "7880": 968749184.0, "7885": 969863296.0, "7890": 951979520.0, "7895": 974744576.0, "7900": 964075264.0, "7905": 964628544.0, "7910": 966224768.0, "7915": 943623808.0, "7920": 951251584.0, "7925": 969683840.0, "7930": 965018496.0, "7935": 984522112.0, "7940": 965260992.0, "7945": 950920512.0, "7950": 961778944.0, "7955": 980819072.0, "7960": 964107328.0, "7965": 952800768.0, "7970": 952096960.0, "7975": 969954944.0, "7980": 965058752.0, "7985": 959497728.0, "7990": 968288768.0, "7995": 947074368.0, "8000": 962595712.0, "8005": 980875264.0, "8010": 965703040.0, "8015": 982795648.0, "8020": 960636544.0, "8025": 965519616.0, "8030": 958643200.0, "8035": 975716096.0, "8040": 960827648.0, "8045": 948395264.0, "8050": 959831808.0, "8055": 979617792.0, "8060": 969592128.0, "8065": 958394752.0, "8070": 964066944.0, "8075": 942266240.0, "8080": 966035328.0, "8085": 966815936.0, "8090": 983700160.0, "8095": 988871424.0, "8100": 966531968.0, "8105": 944438272.0, "8110": 969326016.0, "8115": 985228672.0, "8120": 974833408.0, "8125": 964005120.0, "8130": 966272000.0, "8135": 967624576.0, "8140": 963686848.0, "8145": 994976768.0, "8150": 973166016.0, "8155": 938390528.0, "8160": 964462464.0, "8165": 972803200.0, "8170": 968497280.0, "8175": 961587008.0, "8180": 936029440.0, "8185": 962625536.0, "8190": 967799296.0, "8195": 977385088.0, "8200": 956367296.0, "8205": 960566528.0, "8210": 946495424.0, "8215": 982005248.0, "8220": 988443520.0, "8225": 966243584.0, "8230": 962552576.0, "8235": 934131712.0, "8240": 980267904.0, "8245": 976606848.0, "8250": 964327808.0, "8255": 977492864.0, "8260": 956833664.0, "8265": 982957440.0, "8270": 952836608.0, "8275": 974283968.0, "8280": 974906560.0, "8285": 953985664.0, "8290": 940194816.0, "8295": 981360128.0, "8300": 972952832.0, "8305": 978368320.0, "8310": 951095936.0, "8315": 937922048.0, "8320": 977484544.0, "8325": 967872768.0, "8330": 990116800.0, "8335": 975746048.0, "8340": 947366912.0, "8345": 970641408.0, "8350": 970082176.0, "8355": 975014080.0, "8360": 979651456.0, "8365": 932855680.0, "8370": 965537344.0, "8375": 979732736.0, "8380": 965482496.0, "8385": 972889472.0, "8390": 962502912.0, "8395": 951003840.0, "8400": 972739968.0, "8405": 951808384.0, "8410": 960912000.0, "8415": 965867904.0, "8420": 941925888.0, "8425": 968447872.0, "8430": 961416704.0, "8435": 966249344.0, "8440": 969510272.0, "8445": 952921344.0, "8450": 984742912.0, "8455": 990518400.0, "8460": 969086848.0, "8465": 967798656.0, "8470": 963598464.0, "8475": 942921920.0, "8480": 987605888.0, "8485": 979799936.0, "8490": 991849856.0, "8495": 971815552.0, "8500": 951760768.0, "8505": 982982848.0, "8510": 974371200.0, "8515": 969206912.0, "8520": 961827968.0, "8525": 944996096.0, "8530": 984721152.0, "8535": 978411520.0, "8540": 968342592.0, "8545": 969125440.0, "8550": 942408448.0, "8555": 971549056.0, "8560": 958775296.0, "8565": 975676160.0, "8570": 975305216.0, "8575": 971852992.0, "8580": 932583232.0, "8585": 966065856.0, "8590": 978933760.0, "8595": 979387904.0, "8600": 983792768.0, "8605": 958356416.0, "8610": 984069888.0, "8615": 978067776.0, "8620": 963535168.0, "8625": 979909120.0, "8630": 943580032.0, "8635": 961797632.0, "8640": 973745600.0, "8645": 970784128.0, "8650": 969289152.0, "8655": 970653440.0, "8660": 944484096.0, "8665": 986977728.0, "8670": 960353920.0, "8675": 974610176.0, "8680": 962718976.0, "8685": 956147136.0, "8690": 978612864.0, "8695": 969139072.0, "8700": 973135360.0, "8705": 973914176.0, "8710": 947435776.0, "8715": 973736320.0, "8720": 958622976.0, "8725": 978719488.0, "8730": 985894400.0, "8735": 952583040.0, "8740": 940201728.0, "8745": 987763456.0, "8750": 972207744.0, "8755": 971134720.0, "8760": 965569152.0, "8765": 934519872.0, "8770": 986656640.0, "8775": 969789440.0, "8780": 967920512.0, "8785": 962639488.0, "8790": 947921664.0, "8795": 969775296.0, "8800": 971220608.0, "8805": 973559168.0, "8810": 983161280.0, "8815": 951065856.0, "8820": 939478016.0, "8825": 964494336.0, "8830": 981089472.0, "8835": 971889408.0, "8840": 979835520.0, "8845": 951616384.0, "8850": 987153920.0, "8855": 971335296.0, "8860": 962222080.0, "8865": 957359360.0, "8870": 946242816.0, "8875": 968628096.0, "8880": 984173184.0, "8885": 971110144.0, "8890": 970299648.0, "8895": 952971136.0, "8900": 962246528.0, "8905": 977392000.0, "8910": 981876416.0, "8915": 981149952.0, "8920": 968258432.0, "8925": 940189184.0, "8930": 970787456.0, "8935": 963634560.0, "8940": 978025664.0, "8945": 982356352.0, "8950": 946274176.0, "8955": 972928128.0, "8960": 974032128.0, "8965": 973961216.0, "8970": 966361216.0, "8975": 937321600.0, "8980": 953099648.0, "8985": 977878528.0, "8990": 967166592.0, "8995": 980283904.0, "9000": 952421184.0, "9005": 950292544.0, "9010": 974935552.0, "9015": 982668672.0, "9020": 959278656.0, "9025": 979055040.0, "9030": 953936640.0, "9035": 968749312.0, "9040": 978270080.0, "9045": 968843136.0, "9050": 983417600.0, "9055": 947885952.0, "9060": 956699776.0, "9065": 970246528.0, "9070": 968015744.0, "9075": 981225856.0, "9080": 952541632.0, "9085": 971319168.0, "9090": 963789184.0, "9095": 968313984.0, "9100": 974584320.0, "9105": 960032896.0, "9110": 947321664.0, "9115": 956833728.0, "9120": 985899904.0, "9125": 963026176.0, "9130": 958457216.0, "9135": 951989056.0, "9140": 967565824.0, "9145": 977433728.0, "9150": 987305408.0, "9155": 976649408.0, "9160": 958050816.0, "9165": 950957248.0, "9170": 988702272.0, "9175": 971913280.0, "9180": 967854400.0, "9185": 955127680.0, "9190": 957263744.0, "9195": 966003584.0, "9200": 968856960.0, "9205": 967330048.0, "9210": 984179584.0, "9215": 931743808.0, "9220": 949808960.0, "9225": 971440256.0, "9230": 971281792.0, "9235": 971857152.0, "9240": 959917376.0, "9245": 963584128.0, "9250": 961416384.0, "9255": 983241472.0, "9260": 979566336.0, "9265": 953039104.0, "9270": 949474624.0, "9275": 978502016.0, "9280": 978025536.0, "9285": 962828800.0, "9290": 979390080.0, "9295": 958548480.0, "9300": 965876352.0, "9305": 969599232.0, "9310": 973283008.0, "9315": 976451392.0, "9320": 948304512.0, "9325": 979749696.0, "9330": 977926784.0, "9335": 975525504.0, "9340": 960336000.0, "9345": 943464832.0, "9350": 952835072.0, "9355": 962850048.0, "9360": 960675328.0, "9365": 983816320.0, "9370": 983035904.0, "9375": 942080896.0, "9380": 982540928.0, "9385": 985259136.0, "9390": 973406272.0, "9395": 978528128.0, "9400": 938038400.0, "9405": 968500672.0, "9410": 981791488.0, "9415": 991945472.0, "9420": 960625728.0, "9425": 956681216.0, "9430": 938695808.0, "9435": 974362368.0, "9440": 959727872.0, "9445": 973720576.0, "9450": 961877760.0, "9455": 946303872.0, "9460": 978086272.0, "9465": 988617984.0, "9470": 963615872.0, "9475": 983908608.0, "9480": 930854528.0, "9485": 987221248.0, "9490": 963974912.0, "9495": 972857088.0, "9500": 982392960.0, "9505": 970286080.0, "9510": 964873536.0, "9515": 957183296.0, "9520": 948641664.0, "9525": 965336064.0, "9530": 958567296.0, "9535": 950963840.0, "9540": 954501120.0, "9545": 979935296.0, "9550": 955384704.0, "9555": 953296192.0, "9560": 958726208.0, "9565": 969930112.0, "9570": 977751168.0, "9575": 958849792.0, "9580": 963257728.0, "9585": 946197184.0, "9590": 948135936.0, "9595": 967007808.0, "9600": 985117952.0, "9605": 985499648.0, "9610": 943959808.0, "9615": 952912128.0, "9620": 980920192.0, "9625": 978524736.0, "9630": 969671168.0, "9635": 974868544.0, "9640": 940772416.0, "9645": 962475008.0, "9650": 970857536.0, "9655": 987496960.0, "9660": 963394176.0, "9665": 950327872.0, "9670": 965817856.0, "9675": 963579264.0, "9680": 965384064.0, "9685": 986598272.0, "9690": 940596864.0, "9695": 950521728.0, "9700": 975714688.0, "9705": 972896256.0, "9710": 967299968.0, "9715": 971403392.0, "9720": 940613632.0, "9725": 966514816.0, "9730": 974099584.0, "9735": 974345792.0, "9740": 971516928.0, "9745": 951220736.0, "9750": 979370880.0, "9755": 970170432.0, "9760": 968237888.0, "9765": 963835520.0, "9770": 952652160.0, "9775": 956682880.0, "9780": 970721984.0, "9785": 958959232.0, "9790": 961043072.0, "9795": 958779200.0, "9800": 949918656.0, "9805": 962651200.0, "9810": 979093888.0, "9815": 978146816.0, "9820": 982841088.0, "9825": 939730944.0, "9830": 969614208.0, "9835": 973272832.0, "9840": 971945664.0, "9845": 967603328.0, "9850": 947232896.0, "9855": 956896512.0, "9860": 987801728.0, "9865": 970385664.0, "9870": 990310144.0, "9875": 957380096.0, "9880": 931362176.0, "9885": 963678464.0, "9890": 972811648.0, "9895": 984054016.0, "9900": 956595136.0, "9905": 939303808.0, "9910": 979107072.0, "9915": 973996800.0, "9920": 943946432.0, "9925": 963187328.0, "9930": 948020224.0, "9935": 960573120.0, "9940": 965856512.0, "9945": 958998016.0, "9950": 964584192.0, "9955": 943733120.0, "9960": 966844160.0, "9965": 983732096.0, "9970": 966840192.0, "9975": 964040640.0, "9980": 980881024.0, "9985": 942746240.0, "9990": 976134400.0, "9995": 982950848.0, "10000": 972073152.0, "10005": 970193472.0, "10010": 944380480.0, "10015": 983265344.0, "10020": 977865472.0, "10025": 979868544.0, "10030": 971490816.0, "10035": 946263296.0, "10040": 950534016.0, "10045": 977546880.0, "10050": 986017280.0, "10055": 990492800.0, "10060": 958996032.0, "10065": 947517312.0, "10070": 966895616.0, "10075": 979683904.0, "10080": 971953920.0, "10085": 974879744.0, "10090": 944216960.0, "10095": 962977344.0, "10100": 972381952.0, "10105": 976354432.0, "10110": 972128768.0, "10115": 948919680.0, "10120": 962852480.0, "10125": 974293120.0, "10130": 980737472.0, "10135": 972335104.0, "10140": 957843264.0, "10145": 934671872.0, "10150": 973965568.0, "10155": 970306112.0, "10160": 962491456.0, "10165": 975341248.0, "10170": 944624384.0, "10175": 979643712.0, "10180": 984008448.0, "10185": 978870144.0, "10190": 955877376.0, "10195": 937261120.0, "10200": 988253760.0, "10205": 973401856.0, "10210": 966901120.0, "10215": 976049664.0, "10220": 948799872.0, "10225": 950572096.0, "10230": 976120896.0, "10235": 954421632.0, "10240": 969850752.0, "10245": 962265472.0, "10250": 936756480.0, "10255": 979774976.0, "10260": 965000704.0, "10265": 967563712.0, "10270": 969297920.0, "10275": 935944256.0, "10280": 969526272.0, "10285": 996465152.0, "10290": 979762816.0, "10295": 981662912.0, "10300": 952271936.0, "10305": 972024256.0, "10310": 960359872.0, "10315": 971605760.0, "10320": 985354304.0, "10325": 983302336.0, "10330": 935148288.0, "10335": 976392064.0, "10340": 957603840.0, "10345": 973044352.0, "10350": 984707136.0, "10355": 942479296.0, "10360": 962279040.0, "10365": 973641856.0, "10370": 980432768.0, "10375": 970343296.0, "10380": 962080384.0, "10385": 955687296.0, "10390": 990783104.0, "10395": 965164608.0, "10400": 960470208.0, "10405": 950214848.0, "10410": 955491392.0, "10415": 975924736.0, "10420": 967248320.0, "10425": 969875328.0, "10430": 965126272.0, "10435": 962680768.0, "10440": 972024064.0, "10445": 972467456.0, "10450": 974949504.0, "10455": 965864704.0, "10460": 948726272.0, "10465": 971534464.0, "10470": 972756736.0, "10475": 979392128.0, "10480": 997292352.0, "10485": 949631936.0, "10490": 935104896.0, "10495": 969599424.0, "10500": 978688704.0, "10505": 959342784.0, "10510": 951008000.0, "10515": 954223744.0, "10520": 972150016.0, "10525": 969942528.0, "10530": 970425728.0, "10535": 986576256.0, "10540": 946829632.0, "10545": 970484032.0, "10550": 969371968.0, "10555": 959521856.0, "10560": 976274496.0, "10565": 960798208.0, "10570": 968688128.0, "10575": 973272576.0, "10580": 961017472.0, "10585": 973457024.0, "10590": 952053568.0, "10595": 956331776.0, "10600": 967935552.0, "10605": 986576256.0, "10610": 966417408.0, "10615": 976957568.0, "10620": 940933888.0, "10625": 965306432.0, "10630": 968022272.0, "10635": 973333888.0, "10640": 974664448.0, "10645": 948582400.0, "10650": 966388224.0, "10655": 985562624.0, "10660": 976682624.0, "10665": 967088256.0, "10670": 955226368.0, "10675": 934529920.0, "10680": 986153344.0, "10685": 991102656.0, "10690": 963886208.0, "10695": 971933632.0, "10700": 950091520.0, "10705": 978240128.0, "10710": 968317184.0, "10715": 967450432.0, "10720": 966357824.0, "10725": 944490816.0, "10730": 980318592.0, "10735": 961117952.0, "10740": 971283392.0, "10745": 984630528.0, "10750": 981762816.0, "10755": 945191296.0, "10760": 969882304.0, "10765": 972886400.0, "10770": 974268608.0, "10775": 959067392.0, "10780": 949520384.0, "10785": 953706304.0, "10790": 970157568.0, "10795": 960631552.0, "10800": 972050368.0, "10805": 951460864.0, "10810": 974235456.0, "10815": 959804160.0, "10820": 971302656.0, "10825": 967211072.0, "10830": 957016128.0, "10835": 963139136.0, "10840": 971035008.0, "10845": 964268160.0, "10850": 958162432.0, "10855": 967657344.0, "10860": 950849536.0, "10865": 964061696.0, "10870": 983627200.0, "10875": 982016640.0, "10880": 958659648.0, "10885": 954981888.0, "10890": 973122560.0, "10895": 973655744.0, "10900": 970546048.0, "10905": 965184256.0, "10910": 939048192.0, "10915": 960749824.0, "10920": 983653376.0, "10925": 970068160.0, "10930": 968771200.0, "10935": 963228480.0, "10940": 954249408.0, "10945": 964532608.0, "10950": 972466880.0, "10955": 966621248.0, "10960": 972285056.0, "10965": 966333184.0, "10970": 983572160.0, "10975": 965330496.0, "10980": 974669248.0, "10985": 986818496.0, "10990": 950797760.0, "10995": 963598784.0, "11000": 985495104.0, "11005": 978671168.0, "11010": 971614464.0, "11015": 970071232.0, "11020": 948195648.0, "11025": 960105088.0, "11030": 978168768.0, "11035": 976017024.0, "11040": 986523264.0, "11045": 956708480.0, "11050": 973395968.0, "11055": 974051968.0, "11060": 962164544.0, "11065": 985712768.0, "11070": 949791424.0, "11075": 976565888.0, "11080": 972315712.0, "11085": 967328576.0, "11090": 976399296.0, "11095": 946696448.0, "11100": 966199040.0, "11105": 974421504.0, "11110": 981198912.0, "11115": 968108160.0, "11120": 957518656.0, "11125": 956979840.0, "11130": 975786432.0, "11135": 979636544.0, "11140": 964944832.0, "11145": 966499008.0, "11150": 935518400.0, "11155": 976579008.0, "11160": 984367232.0, "11165": 982289792.0, "11170": 978113472.0, "11175": 958084864.0, "11180": 962589888.0, "11185": 972260672.0, "11190": 979666368.0, "11195": 985502784.0, "11200": 983014336.0, "11205": 942426240.0, "11210": 984802368.0, "11215": 967690816.0, "11220": 983476928.0, "11225": 961985728.0, "11230": 953398272.0, "11235": 981841280.0, "11240": 977805568.0, "11245": 966530176.0, "11250": 969466304.0, "11255": 960572544.0, "11260": 980096576.0, "11265": 963926720.0, "11270": 981695936.0, "11275": 968525888.0, "11280": 955905088.0, "11285": 953700224.0, "11290": 956489152.0, "11295": 968197568.0, "11300": 962513216.0, "11305": 958759872.0, "11310": 946304256.0, "11315": 983036096.0, "11320": 964828480.0, "11325": 980906304.0, "11330": 975476608.0, "11335": 952186816.0, "11340": 970596800.0, "11345": 969926080.0, "11350": 981628736.0, "11355": 981905088.0, "11360": 940723328.0, "11365": 970750592.0, "11370": 978978432.0, "11375": 975338432.0, "11380": 968256960.0, "11385": 958096384.0, "11390": 937948288.0, "11395": 977494080.0, "11400": 973515520.0, "11405": 961359424.0, "11410": 966143616.0, "11415": 929202368.0, "11420": 964768960.0, "11425": 981196352.0, "11430": 978636864.0, "11435": 970153280.0, "11440": 945072704.0, "11445": 975241024.0, "11450": 984735296.0, "11455": 971426176.0, "11460": 965182016.0, "11465": 960090176.0, "11470": 955191296.0, "11475": 972691072.0, "11480": 956542272.0, "11485": 977076864.0, "11490": 986332352.0, "11495": 959121344.0, "11500": 969424704.0, "11505": 964024640.0, "11510": 976702848.0, "11515": 977904064.0, "11520": 953963584.0, "11525": 976039360.0, "11530": 976686784.0, "11535": 979809792.0, "11540": 974141760.0, "11545": 953644288.0, "11550": 953295552.0, "11555": 981560640.0, "11560": 984532352.0, "11565": 965181312.0, "11570": 966487424.0, "11575": 950896832.0, "11580": 976062592.0, "11585": 977550784.0, "11590": 969314368.0, "11595": 976719232.0, "11600": 946047104.0, "11605": 973359168.0, "11610": 982457984.0, "11615": 972010048.0, "11620": 969363904.0, "11625": 949111040.0, "11630": 937578176.0, "11635": 973667008.0, "11640": 981259456.0, "11645": 980106048.0, "11650": 971758144.0, "11655": 956204288.0, "11660": 980712192.0, "11665": 958265664.0, "11670": 982618880.0, "11675": 972427200.0, "11680": 956445568.0, "11685": 982955712.0, "11690": 968614528.0, "11695": 968085632.0, "11700": 973819008.0, "11705": 956432640.0, "11710": 964584640.0, "11715": 983208448.0, "11720": 983636224.0, "11725": 965204032.0, "11730": 955695040.0, "11735": 942941376.0, "11740": 973829824.0, "11745": 971260672.0, "11750": 961624256.0, "11755": 963534976.0, "11760": 950291904.0, "11765": 983877632.0, "11770": 984810368.0, "11775": 975671936.0, "11780": 985430336.0, "11785": 947272512.0, "11790": 972444352.0, "11795": 970670464.0, "11800": 973251520.0, "11805": 986780480.0, "11810": 967591808.0, "11815": 955862848.0, "11820": 973765952.0, "11825": 970671296.0, "11830": 974883776.0, "11835": 961826368.0, "11840": 944467904.0, "11845": 980681344.0, "11850": 974601536.0, "11855": 977943744.0, "11860": 971600192.0, "11865": 938631104.0, "11870": 940150208.0, "11875": 990036736.0, "11880": 971605184.0, "11885": 962938432.0, "11890": 970330560.0, "11895": 965356416.0, "11900": 978791360.0, "11905": 961690240.0, "11910": 983653824.0, "11915": 989882688.0, "11920": 944918016.0, "11925": 994335296.0, "11930": 964853504.0, "11935": 963527104.0, "11940": 977089344.0, "11945": 944859968.0, "11950": 977822912.0, "11955": 979190400.0, "11960": 972201664.0, "11965": 976359488.0, "11970": 963048064.0, "11975": 963239936.0, "11980": 977609536.0, "11985": 953135424.0, "11990": 968692864.0, "11995": 965206144.0, "12000": 958650816.0, "12005": 974558720.0, "12010": 979488320.0, "12015": 972212928.0, "12020": 972972992.0, "12025": 934714048.0, "12030": 969111104.0, "12035": 984089664.0, "12040": 977507648.0, "12045": 981645056.0, "12050": 931286400.0, "12055": 938790208.0, "12060": 974349248.0, "12065": 965845696.0, "12070": 968239104.0, "12075": 949940224.0, "12080": 953231552.0, "12085": 972342592.0, "12090": 964042304.0, "12095": 963552832.0, "12100": 976701248.0, "12105": 950225216.0, "12110": 972205568.0, "12115": 968115136.0, "12120": 986359296.0, "12125": 980675264.0, "12130": 941589504.0, "12135": 955591040.0, "12140": 975756032.0, "12145": 979390528.0, "12150": 979435776.0, "12155": 961767936.0, "12160": 946323264.0, "12165": 968612864.0, "12170": 964129152.0, "12175": 967543936.0, "12180": 974942848.0, "12185": 952774592.0, "12190": 988261760.0, "12195": 970261312.0, "12200": 964981312.0, "12205": 968767232.0, "12210": 939144320.0, "12215": 996663488.0, "12220": 970037696.0, "12225": 979618880.0, "12230": 980198144.0, "12235": 950224576.0, "12240": 963441344.0, "12245": 965754240.0, "12250": 976793792.0, "12255": 968140288.0, "12260": 983346688.0, "12265": 931758592.0, "12270": 966457472.0, "12275": 979665408.0, "12280": 977629696.0, "12285": 970432320.0, "12290": 929095296.0, "12295": 976750144.0, "12300": 985683008.0, "12305": 969918016.0, "12310": 986105792.0, "12315": 936237952.0, "12320": 957828032.0, "12325": 966696000.0, "12330": 968270016.0, "12335": 963800896.0, "12340": 957433344.0, "12345": 944247872.0, "12350": 966701760.0, "12355": 975709440.0, "12360": 978832832.0, "12365": 964561280.0, "12370": 948990016.0, "12375": 963593536.0, "12380": 964789056.0, "12385": 973126912.0, "12390": 961501056.0, "12395": 961629824.0, "12400": 975307712.0, "12405": 976388032.0, "12410": 953154688.0, "12415": 962821184.0, "12420": 943854144.0, "12425": 948976640.0, "12430": 972157696.0, "12435": 969004352.0, "12440": 961893696.0, "12445": 951751744.0, "12450": 947021888.0, "12455": 981248448.0, "12460": 973989696.0, "12465": 954462784.0, "12470": 981144320.0, "12475": 958569728.0, "12480": 967084864.0, "12485": 978267776.0, "12490": 974168192.0, "12495": 969692160.0, "12500": 961573632.0, "12505": 943534528.0, "12510": 961013504.0, "12515": 969566080.0, "12520": 974194304.0, "12525": 972107840.0, "12530": 944608640.0, "12535": 976414272.0, "12540": 965917440.0, "12545": 972033856.0, "12550": 969671552.0, "12555": 941300736.0, "12560": 964469312.0, "12565": 947620608.0, "12570": 974443840.0, "12575": 963055232.0, "12580": 958104128.0, "12585": 964300352.0, "12590": 965924288.0, "12595": 978732672.0, "12600": 982093952.0, "12605": 949125824.0, "12610": 937745344.0, "12615": 962779264.0, "12620": 961201664.0, "12625": 966531136.0, "12630": 971046272.0, "12635": 962236416.0, "12640": 978541696.0, "12645": 969369920.0, "12650": 970161664.0, "12655": 964217216.0, "12660": 932390336.0, "12665": 956865664.0, "12670": 986180352.0, "12675": 965566464.0, "12680": 961126528.0, "12685": 951304256.0, "12690": 945491456.0, "12695": 978387648.0, "12700": 985277888.0, "12705": 958784640.0, "12710": 968294144.0, "12715": 956280512.0, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 12795811840.0, "5": 12795811840.0, "10": 12795811840.0, "15": 12795811840.0, "20": 12795811840.0, "25": 12795811840.0, "30": 12795811840.0, "35": 12795811840.0, "40": 12795811840.0, "45": 12795811840.0, "50": 12795811840.0, "55": 12795811840.0, "60": 12795811840.0, "65": 12795811840.0, "70": 12795811840.0, "75": 12795811840.0, "80": 12795811840.0, "85": 12795811840.0, "90": 12795811840.0, "95": 12795811840.0, "100": 12795811840.0, "105": 12795811840.0, "110": 12795811840.0, "115": 12795811840.0, "120": 12795811840.0, "125": 12795811840.0, "130": 12795811840.0, "135": 12795811840.0, "140": 12795811840.0, "145": 12795811840.0, "150": 12795811840.0, "155": 12795811840.0, "160": 12795811840.0, "165": 12795811840.0, "170": 12795811840.0, "175": 12795811840.0, "180": 12795811840.0, "185": 12795811840.0, "190": 12795811840.0, "195": 12795811840.0, "200": 12795811840.0, "205": 12795811840.0, "210": 12795811840.0, "215": 12795811840.0, "220": 12795811840.0, "225": 12795811840.0, "230": 12795811840.0, "235": 12795811840.0, "240": 12795811840.0, "245": 12795811840.0, "250": 12795811840.0, "255": 12795811840.0, "260": 12795811840.0, "265": 12795811840.0, "270": 12795811840.0, "275": 12795811840.0, "280": 12795811840.0, "285": 12795811840.0, "290": 12795811840.0, "295": 12795811840.0, "300": 12795811840.0, "305": 12795811840.0, "310": 12795811840.0, "315": 12795811840.0, "320": 12795811840.0, "325": 12795811840.0, "330": 12795811840.0, "335": 12795811840.0, "340": 12795811840.0, "345": 12795811840.0, "350": 12795811840.0, "355": 12795811840.0, "360": 12795811840.0, "365": 12795811840.0, "370": 12795811840.0, "375": 12795811840.0, "380": 12795811840.0, "385": 12795811840.0, "390": 12795811840.0, "395": 12795811840.0, "400": 12795811840.0, "405": 12795811840.0, "410": 12795811840.0, "415": 12795811840.0, "420": 12795811840.0, "425": 12795811840.0, "430": 12795811840.0, "435": 12795811840.0, "440": 12795811840.0, "445": 12795811840.0, "450": 12795811840.0, "455": 12795811840.0, "460": 12795811840.0, "465": 12795811840.0, "470": 12795811840.0, "475": 12795811840.0, "480": 12795811840.0, "485": 12795811840.0, "490": 12795811840.0, "495": 12795811840.0, "500": 12795811840.0, "505": 12795811840.0, "510": 12795811840.0, "515": 12795811840.0, "520": 12795811840.0, "525": 12795811840.0, "530": 12795811840.0, "535": 12795811840.0, "540": 12795811840.0, "545": 12795811840.0, "550": 12795811840.0, "555": 12795811840.0, "560": 12795811840.0, "565": 12795811840.0, "570": 12795811840.0, "575": 12795811840.0, "580": 12795811840.0, "585": 12795811840.0, "590": 12795811840.0, "595": 12795811840.0, "600": 12795811840.0, "605": 12795811840.0, "610": 12795811840.0, "615": 12795811840.0, "620": 12795811840.0, "625": 12795811840.0, "630": 12795811840.0, "635": 12795811840.0, "640": 12795811840.0, "645": 12795811840.0, "650": 12795811840.0, "655": 12795811840.0, "660": 12795811840.0, "665": 12795811840.0, "670": 12795811840.0, "675": 12795811840.0, "680": 12795811840.0, "685": 12795811840.0, "690": 12795811840.0, "695": 12795811840.0, "700": 12795811840.0, "705": 12795811840.0, "710": 12795811840.0, "715": 12795811840.0, "720": 12795811840.0, "725": 12795811840.0, "730": 12795811840.0, "735": 12795811840.0, "740": 12795811840.0, "745": 12795811840.0, "750": 12795811840.0, "755": 12795811840.0, "760": 12795811840.0, "765": 12795811840.0, "770": 12795811840.0, "775": 12795811840.0, "780": 12795811840.0, "785": 12795811840.0, "790": 12795811840.0, "795": 12795811840.0, "800": 12795811840.0, "805": 12795811840.0, "810": 12795811840.0, "815": 12795811840.0, "820": 12795811840.0, "825": 12795811840.0, "830": 12795811840.0, "835": 12795811840.0, "840": 12795811840.0, "845": 12795811840.0, "850": 12795811840.0, "855": 12795811840.0, "860": 12795811840.0, "865": 12795811840.0, "870": 12795811840.0, "875": 12795811840.0, "880": 12795811840.0, "885": 12795811840.0, "890": 12795811840.0, "895": 12795811840.0, "900": 12795811840.0, "905": 12795811840.0, "910": 12795811840.0, "915": 12795811840.0, "920": 12795811840.0, "925": 12795811840.0, "930": 12795811840.0, "935": 12795811840.0, "940": 12795811840.0, "945": 12795811840.0, "950": 12795811840.0, "955": 12795811840.0, "960": 12795811840.0, "965": 12795811840.0, "970": 12795811840.0, "975": 12795811840.0, "980": 12795811840.0, "985": 12795811840.0, "990": 12795811840.0, "995": 12795811840.0, "1000": 12795811840.0, "1005": 12795811840.0, "1010": 12795811840.0, "1015": 12795811840.0, "1020": 12795811840.0, "1025": 12795811840.0, "1030": 12795811840.0, "1035": 12795811840.0, "1040": 12795811840.0, "1045": 12795811840.0, "1050": 12795811840.0, "1055": 12795811840.0, "1060": 12795811840.0, "1065": 12795811840.0, "1070": 12795811840.0, "1075": 12795811840.0, "1080": 12795811840.0, "1085": 12795811840.0, "1090": 12795811840.0, "1095": 12795811840.0, "1100": 12795811840.0, "1105": 12795811840.0, "1110": 12795811840.0, "1115": 12795811840.0, "1120": 12795811840.0, "1125": 12795811840.0, "1130": 12795811840.0, "1135": 12795811840.0, "1140": 12795811840.0, "1145": 12795811840.0, "1150": 12795811840.0, "1155": 12795811840.0, "1160": 12795811840.0, "1165": 12795811840.0, "1170": 12795811840.0, "1175": 12795811840.0, "1180": 12795811840.0, "1185": 12795811840.0, "1190": 12795811840.0, "1195": 12795811840.0, "1200": 12795811840.0, "1205": 12795811840.0, "1210": 12795811840.0, "1215": 12795811840.0, "1220": 12795811840.0, "1225": 12795811840.0, "1230": 12795811840.0, "1235": 12795811840.0, "1240": 12795811840.0, "1245": 12795811840.0, "1250": 12795811840.0, "1255": 12795811840.0, "1260": 12795811840.0, "1265": 12795811840.0, "1270": 12795811840.0, "1275": 12795811840.0, "1280": 12795811840.0, "1285": 12795811840.0, "1290": 12795811840.0, "1295": 12795811840.0, "1300": 12795811840.0, "1305": 12795811840.0, "1310": 12795811840.0, "1315": 12795811840.0, "1320": 12795811840.0, "1325": 12795811840.0, "1330": 12795811840.0, "1335": 12795811840.0, "1340": 12795811840.0, "1345": 12795811840.0, "1350": 12795811840.0, "1355": 12795811840.0, "1360": 12795811840.0, "1365": 12795811840.0, "1370": 12795811840.0, "1375": 12795811840.0, "1380": 12795811840.0, "1385": 12795811840.0, "1390": 12795811840.0, "1395": 12795811840.0, "1400": 12795811840.0, "1405": 12795811840.0, "1410": 12795811840.0, "1415": 12795811840.0, "1420": 12795811840.0, "1425": 12795811840.0, "1430": 12795811840.0, "1435": 12795811840.0, "1440": 12795811840.0, "1445": 12795811840.0, "1450": 12795811840.0, "1455": 12795811840.0, "1460": 12795811840.0, "1465": 12795811840.0, "1470": 12795811840.0, "1475": 12795811840.0, "1480": 12795811840.0, "1485": 12795811840.0, "1490": 12795811840.0, "1495": 12795811840.0, "1500": 12795811840.0, "1505": 12795811840.0, "1510": 12795811840.0, "1515": 12795811840.0, "1520": 12795811840.0, "1525": 12795811840.0, "1530": 12795811840.0, "1535": 12795811840.0, "1540": 12795811840.0, "1545": 12795811840.0, "1550": 12795811840.0, "1555": 12795811840.0, "1560": 12795811840.0, "1565": 12795811840.0, "1570": 12795811840.0, "1575": 12795811840.0, "1580": 12795811840.0, "1585": 12795811840.0, "1590": 12795811840.0, "1595": 12795811840.0, "1600": 12795811840.0, "1605": 12795811840.0, "1610": 12795811840.0, "1615": 12795811840.0, "1620": 12795811840.0, "1625": 12795811840.0, "1630": 12795811840.0, "1635": 12795811840.0, "1640": 12795811840.0, "1645": 12795811840.0, "1650": 12795811840.0, "1655": 12795811840.0, "1660": 12795811840.0, "1665": 12795811840.0, "1670": 12795811840.0, "1675": 12795811840.0, "1680": 12795811840.0, "1685": 12795811840.0, "1690": 12795811840.0, "1695": 12795811840.0, "1700": 12795811840.0, "1705": 12795811840.0, "1710": 12795811840.0, "1715": 12795811840.0, "1720": 12795811840.0, "1725": 12795811840.0, "1730": 12795811840.0, "1735": 12795811840.0, "1740": 12795811840.0, "1745": 12795811840.0, "1750": 12795811840.0, "1755": 12795811840.0, "1760": 12795811840.0, "1765": 12795811840.0, "1770": 12795811840.0, "1775": 12795811840.0, "1780": 12795811840.0, "1785": 12795811840.0, "1790": 12795811840.0, "1795": 12795811840.0, "1800": 12795811840.0, "1805": 12795811840.0, "1810": 12795811840.0, "1815": 12795811840.0, "1820": 12795811840.0, "1825": 12795811840.0, "1830": 12795811840.0, "1835": 12795811840.0, "1840": 12795811840.0, "1845": 12795811840.0, "1850": 12795811840.0, "1855": 12795811840.0, "1860": 12795811840.0, "1865": 12795811840.0, "1870": 12795811840.0, "1875": 12795811840.0, "1880": 12795811840.0, "1885": 12795811840.0, "1890": 12795811840.0, "1895": 12795811840.0, "1900": 12795811840.0, "1905": 12795811840.0, "1910": 12795811840.0, "1915": 12795811840.0, "1920": 12795811840.0, "1925": 12795811840.0, "1930": 12795811840.0, "1935": 12795811840.0, "1940": 12795811840.0, "1945": 12795811840.0, "1950": 12795811840.0, "1955": 12795811840.0, "1960": 12795811840.0, "1965": 12795811840.0, "1970": 12795811840.0, "1975": 12795811840.0, "1980": 12795811840.0, "1985": 12795811840.0, "1990": 12795811840.0, "1995": 12795811840.0, "2000": 12795811840.0, "2005": 12795811840.0, "2010": 12795811840.0, "2015": 12795811840.0, "2020": 12795811840.0, "2025": 12795811840.0, "2030": 12795811840.0, "2035": 12795811840.0, "2040": 12795811840.0, "2045": 12795811840.0, "2050": 12795811840.0, "2055": 12795811840.0, "2060": 12795811840.0, "2065": 12795811840.0, "2070": 12795811840.0, "2075": 12795811840.0, "2080": 12795811840.0, "2085": 12795811840.0, "2090": 12795811840.0, "2095": 12795811840.0, "2100": 12795811840.0, "2105": 12795811840.0, "2110": 12795811840.0, "2115": 12795811840.0, "2120": 12795811840.0, "2125": 12795811840.0, "2130": 12795811840.0, "2135": 12795811840.0, "2140": 12795811840.0, "2145": 12795811840.0, "2150": 12795811840.0, "2155": 12795811840.0, "2160": 12795811840.0, "2165": 12795811840.0, "2170": 12795811840.0, "2175": 12795811840.0, "2180": 12795811840.0, "2185": 12795811840.0, "2190": 12795811840.0, "2195": 12795811840.0, "2200": 12795811840.0, "2205": 12795811840.0, "2210": 12795811840.0, "2215": 12795811840.0, "2220": 12795811840.0, "2225": 12795811840.0, "2230": 12795811840.0, "2235": 12795811840.0, "2240": 12795811840.0, "2245": 12795811840.0, "2250": 12795811840.0, "2255": 12795811840.0, "2260": 12795811840.0, "2265": 12795811840.0, "2270": 12795811840.0, "2275": 12795811840.0, "2280": 12795811840.0, "2285": 12795811840.0, "2290": 12795811840.0, "2295": 12795811840.0, "2300": 12795811840.0, "2305": 12795811840.0, "2310": 12795811840.0, "2315": 12795811840.0, "2320": 12795811840.0, "2325": 12795811840.0, "2330": 12795811840.0, "2335": 12795811840.0, "2340": 12795811840.0, "2345": 12795811840.0, "2350": 12795811840.0, "2355": 12795811840.0, "2360": 12795811840.0, "2365": 12795811840.0, "2370": 12795811840.0, "2375": 12795811840.0, "2380": 12795811840.0, "2385": 12795811840.0, "2390": 12795811840.0, "2395": 12795811840.0, "2400": 12795811840.0, "2405": 12795811840.0, "2410": 12795811840.0, "2415": 12795811840.0, "2420": 12795811840.0, "2425": 12795811840.0, "2430": 12795811840.0, "2435": 12795811840.0, "2440": 12795811840.0, "2445": 12795811840.0, "2450": 12795811840.0, "2455": 12795811840.0, "2460": 12795811840.0, "2465": 12795811840.0, "2470": 12795811840.0, "2475": 12795811840.0, "2480": 12795811840.0, "2485": 12795811840.0, "2490": 12795811840.0, "2495": 12795811840.0, "2500": 12795811840.0, "2505": 12795811840.0, "2510": 12795811840.0, "2515": 12795811840.0, "2520": 12795811840.0, "2525": 12795811840.0, "2530": 12795811840.0, "2535": 12795811840.0, "2540": 12795811840.0, "2545": 12795811840.0, "2550": 12795811840.0, "2555": 12795811840.0, "2560": 12795811840.0, "2565": 12795811840.0, "2570": 12795811840.0, "2575": 12795811840.0, "2580": 12795811840.0, "2585": 12795811840.0, "2590": 12795811840.0, "2595": 12795811840.0, "2600": 12795811840.0, "2605": 12795811840.0, "2610": 12795811840.0, "2615": 12795811840.0, "2620": 12795811840.0, "2625": 12795811840.0, "2630": 12795811840.0, "2635": 12795811840.0, "2640": 12795811840.0, "2645": 12795811840.0, "2650": 12795811840.0, "2655": 12795811840.0, "2660": 12795811840.0, "2665": 12795811840.0, "2670": 12795811840.0, "2675": 12795811840.0, "2680": 12795811840.0, "2685": 12795811840.0, "2690": 12795811840.0, "2695": 12795811840.0, "2700": 12795811840.0, "2705": 12795811840.0, "2710": 12795811840.0, "2715": 12795811840.0, "2720": 12795811840.0, "2725": 12795811840.0, "2730": 12795811840.0, "2735": 12795811840.0, "2740": 12795811840.0, "2745": 12795811840.0, "2750": 12795811840.0, "2755": 12795811840.0, "2760": 12795811840.0, "2765": 12795811840.0, "2770": 12795811840.0, "2775": 12795811840.0, "2780": 12795811840.0, "2785": 12795811840.0, "2790": 12795811840.0, "2795": 12795811840.0, "2800": 12795811840.0, "2805": 12795811840.0, "2810": 12795811840.0, "2815": 12795811840.0, "2820": 12795811840.0, "2825": 12795811840.0, "2830": 12795811840.0, "2835": 12795811840.0, "2840": 12795811840.0, "2845": 12795811840.0, "2850": 12795811840.0, "2855": 12795811840.0, "2860": 12795811840.0, "2865": 12795811840.0, "2870": 12795811840.0, "2875": 12795811840.0, "2880": 12795811840.0, "2885": 12795811840.0, "2890": 12795811840.0, "2895": 12795811840.0, "2900": 12795811840.0, "2905": 12795811840.0, "2910": 12795811840.0, "2915": 12795811840.0, "2920": 12795811840.0, "2925": 12795811840.0, "2930": 12795811840.0, "2935": 12795811840.0, "2940": 12795811840.0, "2945": 12795811840.0, "2950": 12795811840.0, "2955": 12795811840.0, "2960": 12795811840.0, "2965": 12795811840.0, "2970": 12795811840.0, "2975": 12795811840.0, "2980": 12795811840.0, "2985": 12795811840.0, "2990": 12795811840.0, "2995": 12795811840.0, "3000": 12795811840.0, "3005": 12795811840.0, "3010": 12795811840.0, "3015": 12795811840.0, "3020": 12795811840.0, "3025": 12795811840.0, "3030": 12795811840.0, "3035": 12795811840.0, "3040": 12795811840.0, "3045": 12795811840.0, "3050": 12795811840.0, "3055": 12795811840.0, "3060": 12795811840.0, "3065": 12795811840.0, "3070": 12795811840.0, "3075": 12795811840.0, "3080": 12795811840.0, "3085": 12795811840.0, "3090": 12795811840.0, "3095": 12795811840.0, "3100": 12795811840.0, "3105": 12795811840.0, "3110": 12795811840.0, "3115": 12795811840.0, "3120": 12795811840.0, "3125": 12795811840.0, "3130": 12795811840.0, "3135": 12795811840.0, "3140": 12795811840.0, "3145": 12795811840.0, "3150": 12795811840.0, "3155": 12795811840.0, "3160": 12795811840.0, "3165": 12795811840.0, "3170": 12795811840.0, "3175": 12795811840.0, "3180": 12795811840.0, "3185": 12795811840.0, "3190": 12795811840.0, "3195": 12795811840.0, "3200": 12795811840.0, "3205": 12795811840.0, "3210": 12795811840.0, "3215": 12795811840.0, "3220": 12795811840.0, "3225": 12795811840.0, "3230": 12795811840.0, "3235": 12795811840.0, "3240": 12795811840.0, "3245": 12795811840.0, "3250": 12795811840.0, "3255": 12795811840.0, "3260": 12795811840.0, "3265": 12795811840.0, "3270": 12795811840.0, "3275": 12795811840.0, "3280": 12795811840.0, "3285": 12795811840.0, "3290": 12795811840.0, "3295": 12795811840.0, "3300": 12795811840.0, "3305": 12795811840.0, "3310": 12795811840.0, "3315": 12795811840.0, "3320": 12795811840.0, "3325": 12795811840.0, "3330": 12795811840.0, "3335": 12795811840.0, "3340": 12795811840.0, "3345": 12795811840.0, "3350": 12795811840.0, "3355": 12795811840.0, "3360": 12795811840.0, "3365": 12795811840.0, "3370": 12795811840.0, "3375": 12795811840.0, "3380": 12795811840.0, "3385": 12795811840.0, "3390": 12795811840.0, "3395": 12795811840.0, "3400": 12795811840.0, "3405": 12795811840.0, "3410": 12795811840.0, "3415": 12795811840.0, "3420": 12795811840.0, "3425": 12795811840.0, "3430": 12795811840.0, "3435": 12795811840.0, "3440": 12795811840.0, "3445": 12795811840.0, "3450": 12795811840.0, "3455": 12795811840.0, "3460": 12795811840.0, "3465": 12795811840.0, "3470": 12795811840.0, "3475": 12795811840.0, "3480": 12795811840.0, "3485": 12795811840.0, "3490": 12795811840.0, "3495": 12795811840.0, "3500": 12795811840.0, "3505": 12795811840.0, "3510": 12795811840.0, "3515": 12795811840.0, "3520": 12795811840.0, "3525": 12795811840.0, "3530": 12795811840.0, "3535": 12795811840.0, "3540": 12795811840.0, "3545": 12795811840.0, "3550": 12795811840.0, "3555": 12795811840.0, "3560": 12795811840.0, "3565": 12795811840.0, "3570": 12795789312.0, "3575": 12795789312.0, "3580": 12795789312.0, "3585": 12795789312.0, "3590": 12795789312.0, "3595": 12795789312.0, "3600": 12795789312.0, "3605": 12795789312.0, "3610": 12795789312.0, "3615": 12795789312.0, "3620": 12795789312.0, "3625": 12795789312.0, "3630": 12795789312.0, "3635": 12795789312.0, "3640": 12795789312.0, "3645": 12795789312.0, "3650": 12795789312.0, "3655": 12795789312.0, "3660": 12795789312.0, "3665": 12795789312.0, "3670": 12795789312.0, "3675": 12795789312.0, "3680": 12795789312.0, "3685": 12795789312.0, "3690": 12795789312.0, "3695": 12795789312.0, "3700": 12795789312.0, "3705": 12795789312.0, "3710": 12795789312.0, "3715": 12795789312.0, "3720": 12795789312.0, "3725": 12795789312.0, "3730": 12795789312.0, "3735": 12795789312.0, "3740": 12795789312.0, "3745": 12795789312.0, "3750": 12795789312.0, "3755": 12795789312.0, "3760": 12795789312.0, "3765": 12795789312.0, "3770": 12795789312.0, "3775": 12795789312.0, "3780": 12795789312.0, "3785": 12795789312.0, "3790": 12795789312.0, "3795": 12795789312.0, "3800": 12795789312.0, "3805": 12795789312.0, "3810": 12795789312.0, "3815": 12795789312.0, "3820": 12795789312.0, "3825": 12795789312.0, "3830": 12795789312.0, "3835": 12795789312.0, "3840": 12795789312.0, "3845": 12795789312.0, "3850": 12795789312.0, "3855": 12795789312.0, "3860": 12795789312.0, "3865": 12795789312.0, "3870": 12795789312.0, "3875": 12795789312.0, "3880": 12795789312.0, "3885": 12795789312.0, "3890": 12795789312.0, "3895": 12795789312.0, "3900": 12795789312.0, "3905": 12795789312.0, "3910": 12795789312.0, "3915": 12795789312.0, "3920": 12795789312.0, "3925": 12795789312.0, "3930": 12795789312.0, "3935": 12795789312.0, "3940": 12795789312.0, "3945": 12795789312.0, "3950": 12795789312.0, "3955": 12795789312.0, "3960": 12795789312.0, "3965": 12795789312.0, "3970": 12795789312.0, "3975": 12795789312.0, "3980": 12795789312.0, "3985": 12795789312.0, "3990": 12795789312.0, "3995": 12795789312.0, "4000": 12795789312.0, "4005": 12795789312.0, "4010": 12795789312.0, "4015": 12795789312.0, "4020": 12795789312.0, "4025": 12795789312.0, "4030": 12795789312.0, "4035": 12795789312.0, "4040": 12795789312.0, "4045": 12795789312.0, "4050": 12795789312.0, "4055": 12795789312.0, "4060": 12795789312.0, "4065": 12795789312.0, "4070": 12795789312.0, "4075": 12795789312.0, "4080": 12795789312.0, "4085": 12795789312.0, "4090": 12795789312.0, "4095": 12795789312.0, "4100": 12795789312.0, "4105": 12795789312.0, "4110": 12795789312.0, "4115": 12795789312.0, "4120": 12795789312.0, "4125": 12795789312.0, "4130": 12795789312.0, "4135": 12795789312.0, "4140": 12795789312.0, "4145": 12795789312.0, "4150": 12795789312.0, "4155": 12795789312.0, "4160": 12795789312.0, "4165": 12795789312.0, "4170": 12795789312.0, "4175": 12795789312.0, "4180": 12795789312.0, "4185": 12795789312.0, "4190": 12795789312.0, "4195": 12795789312.0, "4200": 12795789312.0, "4205": 12795789312.0, "4210": 12795789312.0, "4215": 12795789312.0, "4220": 12795789312.0, "4225": 12795789312.0, "4230": 12795789312.0, "4235": 12795789312.0, "4240": 12795789312.0, "4245": 12795789312.0, "4250": 12795789312.0, "4255": 12795789312.0, "4260": 12795789312.0, "4265": 12795789312.0, "4270": 12795789312.0, "4275": 12795789312.0, "4280": 12795789312.0, "4285": 12795789312.0, "4290": 12795789312.0, "4295": 12795789312.0, "4300": 12795789312.0, "4305": 12795789312.0, "4310": 12795789312.0, "4315": 12795789312.0, "4320": 12795789312.0, "4325": 12795789312.0, "4330": 12795789312.0, "4335": 12795789312.0, "4340": 12795789312.0, "4345": 12795789312.0, "4350": 12795789312.0, "4355": 12795789312.0, "4360": 12795789312.0, "4365": 12795789312.0, "4370": 12795789312.0, "4375": 12795789312.0, "4380": 12795789312.0, "4385": 12795789312.0, "4390": 12795789312.0, "4395": 12795789312.0, "4400": 12795789312.0, "4405": 12795789312.0, "4410": 12795789312.0, "4415": 12795789312.0, "4420": 12795789312.0, "4425": 12795789312.0, "4430": 12795789312.0, "4435": 12795789312.0, "4440": 12795789312.0, "4445": 12795789312.0, "4450": 12795789312.0, "4455": 12795789312.0, "4460": 12795789312.0, "4465": 12795789312.0, "4470": 12795789312.0, "4475": 12795789312.0, "4480": 12795789312.0, "4485": 12795789312.0, "4490": 12795789312.0, "4495": 12795789312.0, "4500": 12795789312.0, "4505": 12795789312.0, "4510": 12795789312.0, "4515": 12795789312.0, "4520": 12795789312.0, "4525": 12795789312.0, "4530": 12795789312.0, "4535": 12795789312.0, "4540": 12795789312.0, "4545": 12795789312.0, "4550": 12795789312.0, "4555": 12795789312.0, "4560": 12795789312.0, "4565": 12795789312.0, "4570": 12795789312.0, "4575": 12795789312.0, "4580": 12795789312.0, "4585": 12795789312.0, "4590": 12795789312.0, "4595": 12795789312.0, "4600": 12795789312.0, "4605": 12795789312.0, "4610": 12795789312.0, "4615": 12795789312.0, "4620": 12795789312.0, "4625": 12795789312.0, "4630": 12795789312.0, "4635": 12795789312.0, "4640": 12795789312.0, "4645": 12795789312.0, "4650": 12795789312.0, "4655": 12795789312.0, "4660": 12795789312.0, "4665": 12795789312.0, "4670": 12795789312.0, "4675": 12795789312.0, "4680": 12795789312.0, "4685": 12795789312.0, "4690": 12795789312.0, "4695": 12795789312.0, "4700": 12795789312.0, "4705": 12795789312.0, "4710": 12795789312.0, "4715": 12795789312.0, "4720": 12795789312.0, "4725": 12795789312.0, "4730": 12795789312.0, "4735": 12795789312.0, "4740": 12795789312.0, "4745": 12795789312.0, "4750": 12795789312.0, "4755": 12795789312.0, "4760": 12795789312.0, "4765": 12795789312.0, "4770": 12795789312.0, "4775": 12795789312.0, "4780": 12795789312.0, "4785": 12795789312.0, "4790": 12795789312.0, "4795": 12795789312.0, "4800": 12795789312.0, "4805": 12795789312.0, "4810": 12795789312.0, "4815": 12795789312.0, "4820": 12795789312.0, "4825": 12795789312.0, "4830": 12795789312.0, "4835": 12795789312.0, "4840": 12795789312.0, "4845": 12795789312.0, "4850": 12795789312.0, "4855": 12795789312.0, "4860": 12795789312.0, "4865": 12795789312.0, "4870": 12795789312.0, "4875": 12795789312.0, "4880": 12795789312.0, "4885": 12795789312.0, "4890": 12795789312.0, "4895": 12795789312.0, "4900": 12795789312.0, "4905": 12795789312.0, "4910": 12795789312.0, "4915": 12795789312.0, "4920": 12795789312.0, "4925": 12795789312.0, "4930": 12795789312.0, "4935": 12795789312.0, "4940": 12795789312.0, "4945": 12795789312.0, "4950": 12795789312.0, "4955": 12795789312.0, "4960": 12795789312.0, "4965": 12795789312.0, "4970": 12795789312.0, "4975": 12795789312.0, "4980": 12795789312.0, "4985": 12795789312.0, "4990": 12795789312.0, "4995": 12795789312.0, "5000": 12795789312.0, "5005": 12795789312.0, "5010": 12795789312.0, "5015": 12795789312.0, "5020": 12795789312.0, "5025": 12795789312.0, "5030": 12795789312.0, "5035": 12795789312.0, "5040": 12795789312.0, "5045": 12795789312.0, "5050": 12795789312.0, "5055": 12795789312.0, "5060": 12795789312.0, "5065": 12795789312.0, "5070": 12795789312.0, "5075": 12795789312.0, "5080": 12795789312.0, "5085": 12795789312.0, "5090": 12795789312.0, "5095": 12795789312.0, "5100": 12795789312.0, "5105": 12795789312.0, "5110": 12795789312.0, "5115": 12795789312.0, "5120": 12795789312.0, "5125": 12795789312.0, "5130": 12795789312.0, "5135": 12795789312.0, "5140": 12795789312.0, "5145": 12795789312.0, "5150": 12795789312.0, "5155": 12795789312.0, "5160": 12795789312.0, "5165": 12795789312.0, "5170": 12795789312.0, "5175": 12795789312.0, "5180": 12795789312.0, "5185": 12795789312.0, "5190": 12795789312.0, "5195": 12795789312.0, "5200": 12795789312.0, "5205": 12795789312.0, "5210": 12795789312.0, "5215": 12795789312.0, "5220": 12795789312.0, "5225": 12795789312.0, "5230": 12795789312.0, "5235": 12795789312.0, "5240": 12795789312.0, "5245": 12795789312.0, "5250": 12795789312.0, "5255": 12795789312.0, "5260": 12795789312.0, "5265": 12795789312.0, "5270": 12795789312.0, "5275": 12795789312.0, "5280": 12795789312.0, "5285": 12795789312.0, "5290": 12795789312.0, "5295": 12795789312.0, "5300": 12795789312.0, "5305": 12795789312.0, "5310": 12795789312.0, "5315": 12795789312.0, "5320": 12795789312.0, "5325": 12795789312.0, "5330": 12795789312.0, "5335": 12795789312.0, "5340": 12795789312.0, "5345": 12795789312.0, "5350": 12795789312.0, "5355": 12795789312.0, "5360": 12795789312.0, "5365": 12795789312.0, "5370": 12795789312.0, "5375": 12795789312.0, "5380": 12795789312.0, "5385": 12795789312.0, "5390": 12795789312.0, "5395": 12795789312.0, "5400": 12795789312.0, "5405": 12795789312.0, "5410": 12795789312.0, "5415": 12795789312.0, "5420": 12795789312.0, "5425": 12795789312.0, "5430": 12795789312.0, "5435": 12795789312.0, "5440": 12795789312.0, "5445": 12795789312.0, "5450": 12795789312.0, "5455": 12795789312.0, "5460": 12795789312.0, "5465": 12795789312.0, "5470": 12795789312.0, "5475": 12795789312.0, "5480": 12795789312.0, "5485": 12795789312.0, "5490": 12795789312.0, "5495": 12795789312.0, "5500": 12795789312.0, "5505": 12795789312.0, "5510": 12795789312.0, "5515": 12795789312.0, "5520": 12795789312.0, "5525": 12795789312.0, "5530": 12795789312.0, "5535": 12795789312.0, "5540": 12795789312.0, "5545": 12795789312.0, "5550": 12795789312.0, "5555": 12795789312.0, "5560": 12795789312.0, "5565": 12795789312.0, "5570": 12795789312.0, "5575": 12795789312.0, "5580": 12795789312.0, "5585": 12795789312.0, "5590": 12795789312.0, "5595": 12795789312.0, "5600": 12795789312.0, "5605": 12795789312.0, "5610": 12795789312.0, "5615": 12795789312.0, "5620": 12795789312.0, "5625": 12795789312.0, "5630": 12795789312.0, "5635": 12795789312.0, "5640": 12795789312.0, "5645": 12795789312.0, "5650": 12795789312.0, "5655": 12795789312.0, "5660": 12795789312.0, "5665": 12795789312.0, "5670": 12795789312.0, "5675": 12795789312.0, "5680": 12795789312.0, "5685": 12795789312.0, "5690": 12795789312.0, "5695": 12795789312.0, "5700": 12795789312.0, "5705": 12795789312.0, "5710": 12795789312.0, "5715": 12795789312.0, "5720": 12795789312.0, "5725": 12795789312.0, "5730": 12795789312.0, "5735": 12795789312.0, "5740": 12795789312.0, "5745": 12795789312.0, "5750": 12795789312.0, "5755": 12795789312.0, "5760": 12795789312.0, "5765": 12795789312.0, "5770": 12795789312.0, "5775": 12795789312.0, "5780": 12795789312.0, "5785": 12795789312.0, "5790": 12795789312.0, "5795": 12795789312.0, "5800": 12795789312.0, "5805": 12795789312.0, "5810": 12795789312.0, "5815": 12795789312.0, "5820": 12795789312.0, "5825": 12795789312.0, "5830": 12795789312.0, "5835": 12795789312.0, "5840": 12795789312.0, "5845": 12795789312.0, "5850": 12795789312.0, "5855": 12795789312.0, "5860": 12795789312.0, "5865": 12795789312.0, "5870": 12795789312.0, "5875": 12795789312.0, "5880": 12795789312.0, "5885": 12795789312.0, "5890": 12795789312.0, "5895": 12795789312.0, "5900": 12795789312.0, "5905": 12795789312.0, "5910": 12795789312.0, "5915": 12795789312.0, "5920": 12795789312.0, "5925": 12795789312.0, "5930": 12795789312.0, "5935": 12795789312.0, "5940": 12795789312.0, "5945": 12795789312.0, "5950": 12795789312.0, "5955": 12795789312.0, "5960": 12795789312.0, "5965": 12795789312.0, "5970": 12795789312.0, "5975": 12795789312.0, "5980": 12795789312.0, "5985": 12795789312.0, "5990": 12795789312.0, "5995": 12795789312.0, "6000": 12795789312.0, "6005": 12795789312.0, "6010": 12795789312.0, "6015": 12795789312.0, "6020": 12795789312.0, "6025": 12795789312.0, "6030": 12795789312.0, "6035": 12795789312.0, "6040": 12795789312.0, "6045": 12795789312.0, "6050": 12795789312.0, "6055": 12795789312.0, "6060": 12795789312.0, "6065": 12795789312.0, "6070": 12795789312.0, "6075": 12795789312.0, "6080": 12795789312.0, "6085": 12795789312.0, "6090": 12795789312.0, "6095": 12795789312.0, "6100": 12795789312.0, "6105": 12795789312.0, "6110": 12795789312.0, "6115": 12795789312.0, "6120": 12795789312.0, "6125": 12795789312.0, "6130": 12795789312.0, "6135": 12795789312.0, "6140": 12795789312.0, "6145": 12795789312.0, "6150": 12795789312.0, "6155": 12795789312.0, "6160": 12795789312.0, "6165": 12795789312.0, "6170": 12795789312.0, "6175": 12795789312.0, "6180": 12795789312.0, "6185": 12795789312.0, "6190": 12795789312.0, "6195": 12795789312.0, "6200": 12795789312.0, "6205": 12795789312.0, "6210": 12795789312.0, "6215": 12795789312.0, "6220": 12795789312.0, "6225": 12795789312.0, "6230": 12795789312.0, "6235": 12795789312.0, "6240": 12795789312.0, "6245": 12795789312.0, "6250": 12795789312.0, "6255": 12795789312.0, "6260": 12795789312.0, "6265": 12795789312.0, "6270": 12795789312.0, "6275": 12795789312.0, "6280": 12795789312.0, "6285": 12795789312.0, "6290": 12795789312.0, "6295": 12795789312.0, "6300": 12795789312.0, "6305": 12795789312.0, "6310": 12795789312.0, "6315": 12795789312.0, "6320": 12795789312.0, "6325": 12795789312.0, "6330": 12795789312.0, "6335": 12795789312.0, "6340": 12795789312.0, "6345": 12795789312.0, "6350": 12795789312.0, "6355": 12795789312.0, "6360": 12795789312.0, "6365": 12795789312.0, "6370": 12795789312.0, "6375": 12795789312.0, "6380": 12795789312.0, "6385": 12795789312.0, "6390": 12795789312.0, "6395": 12795789312.0, "6400": 12795789312.0, "6405": 12795789312.0, "6410": 12795789312.0, "6415": 12795789312.0, "6420": 12795789312.0, "6425": 12795789312.0, "6430": 12795789312.0, "6435": 12795789312.0, "6440": 12795789312.0, "6445": 12795789312.0, "6450": 12795789312.0, "6455": 12795789312.0, "6460": 12795789312.0, "6465": 12795789312.0, "6470": 12795789312.0, "6475": 12795789312.0, "6480": 12795789312.0, "6485": 12795789312.0, "6490": 12795789312.0, "6495": 12795789312.0, "6500": 12795789312.0, "6505": 12795789312.0, "6510": 12795789312.0, "6515": 12795789312.0, "6520": 12795789312.0, "6525": 12795789312.0, "6530": 12795789312.0, "6535": 12795789312.0, "6540": 12795789312.0, "6545": 12795789312.0, "6550": 12795789312.0, "6555": 12795789312.0, "6560": 12795789312.0, "6565": 12795789312.0, "6570": 12795789312.0, "6575": 12795789312.0, "6580": 12795789312.0, "6585": 12795789312.0, "6590": 12795789312.0, "6595": 12795789312.0, "6600": 12795789312.0, "6605": 12795789312.0, "6610": 12795789312.0, "6615": 12795789312.0, "6620": 12795789312.0, "6625": 12795789312.0, "6630": 12795789312.0, "6635": 12795789312.0, "6640": 12795789312.0, "6645": 12795789312.0, "6650": 12795789312.0, "6655": 12795789312.0, "6660": 12795789312.0, "6665": 12795789312.0, "6670": 12795789312.0, "6675": 12795789312.0, "6680": 12795789312.0, "6685": 12795789312.0, "6690": 12795789312.0, "6695": 12795789312.0, "6700": 12795789312.0, "6705": 12795789312.0, "6710": 12795789312.0, "6715": 12795789312.0, "6720": 12795789312.0, "6725": 12795789312.0, "6730": 12795789312.0, "6735": 12795789312.0, "6740": 12795789312.0, "6745": 12795789312.0, "6750": 12795789312.0, "6755": 12795789312.0, "6760": 12795789312.0, "6765": 12795789312.0, "6770": 12795789312.0, "6775": 12795789312.0, "6780": 12795789312.0, "6785": 12795789312.0, "6790": 12795789312.0, "6795": 12795789312.0, "6800": 12795789312.0, "6805": 12795789312.0, "6810": 12795789312.0, "6815": 12795789312.0, "6820": 12795789312.0, "6825": 12795789312.0, "6830": 12795789312.0, "6835": 12795789312.0, "6840": 12795789312.0, "6845": 12795789312.0, "6850": 12795789312.0, "6855": 12795789312.0, "6860": 12795789312.0, "6865": 12795789312.0, "6870": 12795789312.0, "6875": 12795789312.0, "6880": 12795789312.0, "6885": 12795789312.0, "6890": 12795789312.0, "6895": 12795789312.0, "6900": 12795789312.0, "6905": 12795789312.0, "6910": 12795789312.0, "6915": 12795789312.0, "6920": 12795789312.0, "6925": 12795789312.0, "6930": 12795789312.0, "6935": 12795789312.0, "6940": 12795789312.0, "6945": 12795789312.0, "6950": 12795789312.0, "6955": 12795789312.0, "6960": 12795789312.0, "6965": 12795789312.0, "6970": 12795789312.0, "6975": 12795789312.0, "6980": 12795789312.0, "6985": 12795789312.0, "6990": 12795789312.0, "6995": 12795789312.0, "7000": 12795789312.0, "7005": 12795789312.0, "7010": 12795789312.0, "7015": 12795789312.0, "7020": 12795789312.0, "7025": 12795789312.0, "7030": 12795789312.0, "7035": 12795789312.0, "7040": 12795789312.0, "7045": 12795789312.0, "7050": 12795789312.0, "7055": 12795789312.0, "7060": 12795789312.0, "7065": 12795789312.0, "7070": 12795789312.0, "7075": 12795789312.0, "7080": 12795789312.0, "7085": 12795789312.0, "7090": 12795789312.0, "7095": 12795789312.0, "7100": 12795789312.0, "7105": 12795789312.0, "7110": 12795789312.0, "7115": 12795789312.0, "7120": 12795789312.0, "7125": 12795789312.0, "7130": 12795789312.0, "7135": 12795789312.0, "7140": 12795789312.0, "7145": 12795789312.0, "7150": 12795789312.0, "7155": 12795789312.0, "7160": 12795789312.0, "7165": 12795789312.0, "7170": 12795789312.0, "7175": 12795789312.0, "7180": 12795789312.0, "7185": 12795789312.0, "7190": 12795789312.0, "7195": 12795789312.0, "7200": 12795789312.0, "7205": 12795789312.0, "7210": 12795789312.0, "7215": 12795789312.0, "7220": 12795789312.0, "7225": 12795789312.0, "7230": 12795789312.0, "7235": 12795789312.0, "7240": 12795789312.0, "7245": 12795789312.0, "7250": 12795789312.0, "7255": 12795789312.0, "7260": 12795789312.0, "7265": 12795789312.0, "7270": 12795789312.0, "7275": 12795789312.0, "7280": 12795789312.0, "7285": 12795789312.0, "7290": 12795789312.0, "7295": 12795789312.0, "7300": 12795789312.0, "7305": 12795789312.0, "7310": 12795789312.0, "7315": 12795789312.0, "7320": 12795789312.0, "7325": 12795789312.0, "7330": 12795789312.0, "7335": 12795789312.0, "7340": 12795789312.0, "7345": 12795789312.0, "7350": 12795789312.0, "7355": 12795789312.0, "7360": 12795789312.0, "7365": 12795789312.0, "7370": 12795789312.0, "7375": 12795789312.0, "7380": 12795789312.0, "7385": 12795789312.0, "7390": 12795789312.0, "7395": 12795789312.0, "7400": 12795789312.0, "7405": 12795789312.0, "7410": 12795789312.0, "7415": 12795789312.0, "7420": 12795789312.0, "7425": 12795789312.0, "7430": 12795789312.0, "7435": 12795789312.0, "7440": 12795789312.0, "7445": 12795789312.0, "7450": 12795789312.0, "7455": 12795789312.0, "7460": 12795789312.0, "7465": 12795789312.0, "7470": 12795789312.0, "7475": 12795789312.0, "7480": 12795789312.0, "7485": 12795789312.0, "7490": 12795789312.0, "7495": 12795789312.0, "7500": 12795789312.0, "7505": 12795789312.0, "7510": 12795789312.0, "7515": 12795789312.0, "7520": 12795789312.0, "7525": 12795789312.0, "7530": 12795789312.0, "7535": 12795789312.0, "7540": 12795789312.0, "7545": 12795789312.0, "7550": 12795789312.0, "7555": 12795789312.0, "7560": 12795789312.0, "7565": 12795789312.0, "7570": 12795789312.0, "7575": 12795789312.0, "7580": 12795789312.0, "7585": 12795789312.0, "7590": 12795789312.0, "7595": 12795789312.0, "7600": 12795789312.0, "7605": 12795789312.0, "7610": 12795789312.0, "7615": 12795789312.0, "7620": 12795789312.0, "7625": 12795789312.0, "7630": 12795789312.0, "7635": 12795789312.0, "7640": 12795789312.0, "7645": 12795789312.0, "7650": 12795789312.0, "7655": 12795789312.0, "7660": 12795789312.0, "7665": 12795789312.0, "7670": 12795789312.0, "7675": 12795789312.0, "7680": 12795789312.0, "7685": 12795789312.0, "7690": 12795789312.0, "7695": 12795789312.0, "7700": 12795789312.0, "7705": 12795789312.0, "7710": 12795789312.0, "7715": 12795789312.0, "7720": 12795789312.0, "7725": 12795789312.0, "7730": 12795789312.0, "7735": 12795789312.0, "7740": 12795789312.0, "7745": 12795789312.0, "7750": 12795789312.0, "7755": 12795789312.0, "7760": 12795789312.0, "7765": 12795789312.0, "7770": 12795789312.0, "7775": 12795789312.0, "7780": 12795789312.0, "7785": 12795789312.0, "7790": 12795789312.0, "7795": 12795789312.0, "7800": 12795789312.0, "7805": 12795789312.0, "7810": 12795789312.0, "7815": 12795789312.0, "7820": 12795789312.0, "7825": 12795789312.0, "7830": 12795789312.0, "7835": 12795789312.0, "7840": 12795789312.0, "7845": 12795789312.0, "7850": 12795789312.0, "7855": 12795789312.0, "7860": 12795789312.0, "7865": 12795789312.0, "7870": 12795789312.0, "7875": 12795789312.0, "7880": 12795789312.0, "7885": 12795789312.0, "7890": 12795789312.0, "7895": 12795789312.0, "7900": 12795789312.0, "7905": 12795789312.0, "7910": 12795789312.0, "7915": 12795789312.0, "7920": 12795789312.0, "7925": 12795789312.0, "7930": 12795789312.0, "7935": 12795789312.0, "7940": 12795789312.0, "7945": 12795789312.0, "7950": 12795789312.0, "7955": 12795789312.0, "7960": 12795789312.0, "7965": 12795789312.0, "7970": 12795789312.0, "7975": 12795789312.0, "7980": 12795789312.0, "7985": 12795789312.0, "7990": 12795789312.0, "7995": 12795789312.0, "8000": 12795789312.0, "8005": 12795789312.0, "8010": 12795789312.0, "8015": 12795789312.0, "8020": 12795789312.0, "8025": 12795789312.0, "8030": 12795789312.0, "8035": 12795789312.0, "8040": 12795789312.0, "8045": 12795789312.0, "8050": 12795789312.0, "8055": 12795789312.0, "8060": 12795789312.0, "8065": 12795789312.0, "8070": 12795789312.0, "8075": 12795789312.0, "8080": 12795789312.0, "8085": 12795789312.0, "8090": 12795789312.0, "8095": 12795789312.0, "8100": 12795789312.0, "8105": 12795789312.0, "8110": 12795789312.0, "8115": 12795789312.0, "8120": 12795789312.0, "8125": 12795789312.0, "8130": 12795789312.0, "8135": 12795789312.0, "8140": 12795789312.0, "8145": 12795789312.0, "8150": 12795789312.0, "8155": 12795789312.0, "8160": 12795789312.0, "8165": 12795789312.0, "8170": 12795789312.0, "8175": 12795789312.0, "8180": 12795789312.0, "8185": 12795789312.0, "8190": 12795789312.0, "8195": 12795789312.0, "8200": 12795789312.0, "8205": 12795789312.0, "8210": 12795789312.0, "8215": 12795789312.0, "8220": 12795789312.0, "8225": 12795789312.0, "8230": 12795789312.0, "8235": 12795789312.0, "8240": 12795789312.0, "8245": 12795789312.0, "8250": 12795789312.0, "8255": 12795789312.0, "8260": 12795789312.0, "8265": 12795789312.0, "8270": 12795789312.0, "8275": 12795789312.0, "8280": 12795789312.0, "8285": 12795789312.0, "8290": 12795789312.0, "8295": 12795789312.0, "8300": 12795789312.0, "8305": 12795789312.0, "8310": 12795789312.0, "8315": 12795789312.0, "8320": 12795789312.0, "8325": 12795789312.0, "8330": 12795789312.0, "8335": 12795789312.0, "8340": 12795789312.0, "8345": 12795789312.0, "8350": 12795789312.0, "8355": 12795789312.0, "8360": 12795789312.0, "8365": 12795789312.0, "8370": 12795789312.0, "8375": 12795789312.0, "8380": 12795789312.0, "8385": 12795789312.0, "8390": 12795789312.0, "8395": 12795789312.0, "8400": 12795789312.0, "8405": 12795789312.0, "8410": 12795789312.0, "8415": 12795789312.0, "8420": 12795789312.0, "8425": 12795789312.0, "8430": 12795789312.0, "8435": 12795789312.0, "8440": 12795789312.0, "8445": 12795789312.0, "8450": 12795789312.0, "8455": 12795789312.0, "8460": 12795789312.0, "8465": 12795789312.0, "8470": 12795789312.0, "8475": 12795789312.0, "8480": 12795789312.0, "8485": 12795789312.0, "8490": 12795789312.0, "8495": 12795789312.0, "8500": 12795789312.0, "8505": 12795789312.0, "8510": 12795789312.0, "8515": 12795789312.0, "8520": 12795789312.0, "8525": 12795789312.0, "8530": 12795789312.0, "8535": 12795789312.0, "8540": 12795789312.0, "8545": 12795789312.0, "8550": 12795789312.0, "8555": 12795789312.0, "8560": 12795789312.0, "8565": 12795789312.0, "8570": 12795789312.0, "8575": 12795789312.0, "8580": 12795789312.0, "8585": 12795789312.0, "8590": 12795789312.0, "8595": 12795789312.0, "8600": 12795789312.0, "8605": 12795789312.0, "8610": 12795789312.0, "8615": 12795789312.0, "8620": 12795789312.0, "8625": 12795789312.0, "8630": 12795789312.0, "8635": 12795789312.0, "8640": 12795789312.0, "8645": 12795789312.0, "8650": 12795789312.0, "8655": 12795789312.0, "8660": 12795789312.0, "8665": 12795789312.0, "8670": 12795789312.0, "8675": 12795789312.0, "8680": 12795789312.0, "8685": 12795789312.0, "8690": 12795789312.0, "8695": 12795789312.0, "8700": 12795789312.0, "8705": 12795789312.0, "8710": 12795789312.0, "8715": 12795789312.0, "8720": 12795789312.0, "8725": 12795789312.0, "8730": 12795789312.0, "8735": 12795789312.0, "8740": 12795789312.0, "8745": 12795789312.0, "8750": 12795789312.0, "8755": 12795789312.0, "8760": 12795789312.0, "8765": 12795789312.0, "8770": 12795789312.0, "8775": 12795789312.0, "8780": 12795789312.0, "8785": 12795789312.0, "8790": 12795789312.0, "8795": 12795789312.0, "8800": 12795789312.0, "8805": 12795789312.0, "8810": 12795789312.0, "8815": 12795789312.0, "8820": 12795789312.0, "8825": 12795789312.0, "8830": 12795789312.0, "8835": 12795789312.0, "8840": 12795789312.0, "8845": 12795789312.0, "8850": 12795789312.0, "8855": 12795789312.0, "8860": 12795789312.0, "8865": 12795789312.0, "8870": 12795789312.0, "8875": 12795789312.0, "8880": 12795789312.0, "8885": 12795789312.0, "8890": 12795789312.0, "8895": 12795789312.0, "8900": 12795789312.0, "8905": 12795789312.0, "8910": 12795789312.0, "8915": 12795789312.0, "8920": 12795789312.0, "8925": 12795789312.0, "8930": 12795789312.0, "8935": 12795789312.0, "8940": 12795789312.0, "8945": 12795789312.0, "8950": 12795789312.0, "8955": 12795789312.0, "8960": 12795789312.0, "8965": 12795789312.0, "8970": 12795789312.0, "8975": 12795789312.0, "8980": 12795789312.0, "8985": 12795789312.0, "8990": 12795789312.0, "8995": 12795789312.0, "9000": 12795789312.0, "9005": 12795789312.0, "9010": 12795789312.0, "9015": 12795789312.0, "9020": 12795789312.0, "9025": 12795789312.0, "9030": 12795789312.0, "9035": 12795789312.0, "9040": 12795789312.0, "9045": 12795789312.0, "9050": 12795789312.0, "9055": 12795789312.0, "9060": 12795789312.0, "9065": 12795789312.0, "9070": 12795789312.0, "9075": 12795789312.0, "9080": 12795789312.0, "9085": 12795789312.0, "9090": 12795789312.0, "9095": 12795789312.0, "9100": 12795789312.0, "9105": 12795789312.0, "9110": 12795789312.0, "9115": 12795789312.0, "9120": 12795789312.0, "9125": 12795789312.0, "9130": 12795789312.0, "9135": 12795789312.0, "9140": 12795789312.0, "9145": 12795789312.0, "9150": 12795789312.0, "9155": 12795789312.0, "9160": 12795789312.0, "9165": 12795789312.0, "9170": 12795789312.0, "9175": 12795789312.0, "9180": 12795789312.0, "9185": 12795789312.0, "9190": 12795789312.0, "9195": 12795789312.0, "9200": 12795789312.0, "9205": 12795789312.0, "9210": 12795789312.0, "9215": 12795789312.0, "9220": 12795789312.0, "9225": 12795789312.0, "9230": 12795789312.0, "9235": 12795789312.0, "9240": 12795789312.0, "9245": 12795789312.0, "9250": 12795789312.0, "9255": 12795789312.0, "9260": 12795789312.0, "9265": 12795789312.0, "9270": 12795789312.0, "9275": 12795789312.0, "9280": 12795789312.0, "9285": 12795789312.0, "9290": 12795789312.0, "9295": 12795789312.0, "9300": 12795789312.0, "9305": 12795789312.0, "9310": 12795789312.0, "9315": 12795789312.0, "9320": 12795789312.0, "9325": 12795789312.0, "9330": 12795789312.0, "9335": 12795789312.0, "9340": 12795789312.0, "9345": 12795789312.0, "9350": 12795789312.0, "9355": 12795789312.0, "9360": 12795789312.0, "9365": 12795789312.0, "9370": 12795789312.0, "9375": 12795789312.0, "9380": 12795789312.0, "9385": 12795789312.0, "9390": 12795789312.0, "9395": 12795789312.0, "9400": 12795789312.0, "9405": 12795789312.0, "9410": 12795789312.0, "9415": 12795789312.0, "9420": 12795789312.0, "9425": 12795789312.0, "9430": 12795789312.0, "9435": 12795789312.0, "9440": 12795789312.0, "9445": 12795789312.0, "9450": 12795789312.0, "9455": 12795789312.0, "9460": 12795789312.0, "9465": 12795789312.0, "9470": 12795789312.0, "9475": 12795789312.0, "9480": 12795789312.0, "9485": 12795789312.0, "9490": 12795789312.0, "9495": 12795789312.0, "9500": 12795789312.0, "9505": 12795789312.0, "9510": 12795789312.0, "9515": 12795789312.0, "9520": 12795789312.0, "9525": 12795789312.0, "9530": 12795789312.0, "9535": 12795789312.0, "9540": 12795789312.0, "9545": 12795789312.0, "9550": 12795789312.0, "9555": 12795789312.0, "9560": 12795789312.0, "9565": 12795789312.0, "9570": 12795789312.0, "9575": 12795789312.0, "9580": 12795789312.0, "9585": 12795789312.0, "9590": 12795789312.0, "9595": 12795789312.0, "9600": 12795789312.0, "9605": 12795789312.0, "9610": 12795789312.0, "9615": 12795789312.0, "9620": 12795789312.0, "9625": 12795789312.0, "9630": 12795789312.0, "9635": 12795789312.0, "9640": 12795789312.0, "9645": 12795789312.0, "9650": 12795789312.0, "9655": 12795789312.0, "9660": 12795789312.0, "9665": 12795789312.0, "9670": 12795789312.0, "9675": 12795789312.0, "9680": 12795789312.0, "9685": 12795789312.0, "9690": 12795789312.0, "9695": 12795789312.0, "9700": 12795789312.0, "9705": 12795789312.0, "9710": 12795789312.0, "9715": 12795789312.0, "9720": 12795789312.0, "9725": 12795789312.0, "9730": 12795789312.0, "9735": 12795789312.0, "9740": 12795789312.0, "9745": 12795789312.0, "9750": 12795789312.0, "9755": 12795789312.0, "9760": 12795789312.0, "9765": 12795789312.0, "9770": 12795789312.0, "9775": 12795789312.0, "9780": 12795789312.0, "9785": 12795789312.0, "9790": 12795789312.0, "9795": 12795789312.0, "9800": 12795789312.0, "9805": 12795789312.0, "9810": 12795789312.0, "9815": 12795789312.0, "9820": 12795789312.0, "9825": 12795789312.0, "9830": 12795789312.0, "9835": 12795789312.0, "9840": 12795789312.0, "9845": 12795789312.0, "9850": 12795789312.0, "9855": 12795789312.0, "9860": 12795789312.0, "9865": 12795789312.0, "9870": 12795789312.0, "9875": 12795789312.0, "9880": 12795789312.0, "9885": 12795789312.0, "9890": 12795789312.0, "9895": 12795789312.0, "9900": 12795789312.0, "9905": 12795789312.0, "9910": 12795789312.0, "9915": 12795789312.0, "9920": 12795789312.0, "9925": 12795789312.0, "9930": 12795789312.0, "9935": 12795789312.0, "9940": 12795789312.0, "9945": 12795789312.0, "9950": 12795789312.0, "9955": 12795789312.0, "9960": 12795789312.0, "9965": 12795789312.0, "9970": 12795789312.0, "9975": 12795789312.0, "9980": 12795789312.0, "9985": 12795789312.0, "9990": 12795789312.0, "9995": 12795789312.0, "10000": 12795789312.0, "10005": 12795789312.0, "10010": 12795789312.0, "10015": 12795789312.0, "10020": 12795789312.0, "10025": 12795789312.0, "10030": 12795789312.0, "10035": 12795789312.0, "10040": 12795789312.0, "10045": 12795789312.0, "10050": 12795789312.0, "10055": 12795789312.0, "10060": 12795789312.0, "10065": 12795789312.0, "10070": 12795789312.0, "10075": 12795789312.0, "10080": 12795789312.0, "10085": 12795789312.0, "10090": 12795789312.0, "10095": 12795789312.0, "10100": 12795789312.0, "10105": 12795789312.0, "10110": 12795789312.0, "10115": 12795789312.0, "10120": 12795789312.0, "10125": 12795789312.0, "10130": 12795789312.0, "10135": 12795789312.0, "10140": 12795789312.0, "10145": 12795789312.0, "10150": 12795789312.0, "10155": 12795789312.0, "10160": 12795789312.0, "10165": 12795789312.0, "10170": 12795789312.0, "10175": 12795789312.0, "10180": 12795789312.0, "10185": 12795789312.0, "10190": 12795789312.0, "10195": 12795789312.0, "10200": 12795789312.0, "10205": 12795789312.0, "10210": 12795789312.0, "10215": 12795789312.0, "10220": 12795789312.0, "10225": 12795789312.0, "10230": 12795789312.0, "10235": 12795789312.0, "10240": 12795789312.0, "10245": 12795789312.0, "10250": 12795789312.0, "10255": 12795789312.0, "10260": 12795789312.0, "10265": 12795789312.0, "10270": 12795789312.0, "10275": 12795789312.0, "10280": 12795789312.0, "10285": 12795789312.0, "10290": 12795789312.0, "10295": 12795789312.0, "10300": 12795789312.0, "10305": 12795789312.0, "10310": 12795789312.0, "10315": 12795789312.0, "10320": 12795789312.0, "10325": 12795789312.0, "10330": 12795789312.0, "10335": 12795789312.0, "10340": 12795789312.0, "10345": 12795789312.0, "10350": 12795789312.0, "10355": 12795789312.0, "10360": 12795789312.0, "10365": 12795789312.0, "10370": 12795789312.0, "10375": 12795789312.0, "10380": 12795789312.0, "10385": 12795789312.0, "10390": 12795789312.0, "10395": 12795789312.0, "10400": 12795789312.0, "10405": 12795789312.0, "10410": 12795789312.0, "10415": 12795789312.0, "10420": 12795789312.0, "10425": 12795789312.0, "10430": 12795789312.0, "10435": 12795789312.0, "10440": 12795789312.0, "10445": 12795789312.0, "10450": 12795789312.0, "10455": 12795789312.0, "10460": 12795789312.0, "10465": 12795789312.0, "10470": 12795789312.0, "10475": 12795789312.0, "10480": 12795789312.0, "10485": 12795789312.0, "10490": 12795789312.0, "10495": 12795789312.0, "10500": 12795789312.0, "10505": 12795789312.0, "10510": 12795789312.0, "10515": 12795789312.0, "10520": 12795789312.0, "10525": 12795789312.0, "10530": 12795789312.0, "10535": 12795789312.0, "10540": 12795789312.0, "10545": 12795789312.0, "10550": 12795789312.0, "10555": 12795789312.0, "10560": 12795789312.0, "10565": 12795789312.0, "10570": 12795789312.0, "10575": 12795789312.0, "10580": 12795789312.0, "10585": 12795789312.0, "10590": 12795789312.0, "10595": 12795789312.0, "10600": 12795789312.0, "10605": 12795789312.0, "10610": 12795789312.0, "10615": 12795789312.0, "10620": 12795789312.0, "10625": 12795789312.0, "10630": 12795789312.0, "10635": 12795789312.0, "10640": 12795789312.0, "10645": 12795789312.0, "10650": 12795789312.0, "10655": 12795789312.0, "10660": 12795789312.0, "10665": 12795789312.0, "10670": 12795789312.0, "10675": 12795789312.0, "10680": 12795789312.0, "10685": 12795789312.0, "10690": 12795789312.0, "10695": 12795789312.0, "10700": 12795789312.0, "10705": 12795789312.0, "10710": 12795789312.0, "10715": 12795789312.0, "10720": 12795789312.0, "10725": 12795789312.0, "10730": 12795789312.0, "10735": 12795789312.0, "10740": 12795789312.0, "10745": 12795789312.0, "10750": 12795789312.0, "10755": 12795789312.0, "10760": 12795789312.0, "10765": 12795789312.0, "10770": 12795789312.0, "10775": 12795789312.0, "10780": 12795789312.0, "10785": 12795789312.0, "10790": 12795789312.0, "10795": 12795789312.0, "10800": 12795789312.0, "10805": 12795789312.0, "10810": 12795789312.0, "10815": 12795789312.0, "10820": 12795789312.0, "10825": 12795789312.0, "10830": 12795789312.0, "10835": 12795789312.0, "10840": 12795789312.0, "10845": 12795789312.0, "10850": 12795789312.0, "10855": 12795789312.0, "10860": 12795789312.0, "10865": 12795789312.0, "10870": 12795789312.0, "10875": 12795789312.0, "10880": 12795789312.0, "10885": 12795789312.0, "10890": 12795789312.0, "10895": 12795789312.0, "10900": 12795789312.0, "10905": 12795789312.0, "10910": 12795789312.0, "10915": 12795789312.0, "10920": 12795789312.0, "10925": 12795789312.0, "10930": 12795789312.0, "10935": 12795789312.0, "10940": 12795789312.0, "10945": 12795789312.0, "10950": 12795789312.0, "10955": 12795789312.0, "10960": 12795789312.0, "10965": 12795789312.0, "10970": 12795789312.0, "10975": 12795789312.0, "10980": 12795789312.0, "10985": 12795789312.0, "10990": 12795789312.0, "10995": 12795789312.0, "11000": 12795789312.0, "11005": 12795789312.0, "11010": 12795789312.0, "11015": 12795789312.0, "11020": 12795789312.0, "11025": 12795789312.0, "11030": 12795789312.0, "11035": 12795789312.0, "11040": 12795789312.0, "11045": 12795789312.0, "11050": 12795789312.0, "11055": 12795789312.0, "11060": 12795789312.0, "11065": 12795789312.0, "11070": 12795789312.0, "11075": 12795789312.0, "11080": 12795789312.0, "11085": 12795789312.0, "11090": 12795789312.0, "11095": 12795789312.0, "11100": 12795789312.0, "11105": 12795789312.0, "11110": 12795789312.0, "11115": 12795789312.0, "11120": 12795789312.0, "11125": 12795789312.0, "11130": 12795789312.0, "11135": 12795789312.0, "11140": 12795789312.0, "11145": 12795789312.0, "11150": 12795789312.0, "11155": 12795789312.0, "11160": 12795789312.0, "11165": 12795789312.0, "11170": 12795789312.0, "11175": 12795789312.0, "11180": 12795789312.0, "11185": 12795789312.0, "11190": 12795789312.0, "11195": 12795789312.0, "11200": 12795789312.0, "11205": 12795789312.0, "11210": 12795789312.0, "11215": 12795789312.0, "11220": 12795789312.0, "11225": 12795789312.0, "11230": 12795789312.0, "11235": 12795789312.0, "11240": 12795789312.0, "11245": 12795789312.0, "11250": 12795789312.0, "11255": 12795789312.0, "11260": 12795789312.0, "11265": 12795789312.0, "11270": 12795789312.0, "11275": 12795789312.0, "11280": 12795789312.0, "11285": 12795789312.0, "11290": 12795789312.0, "11295": 12795789312.0, "11300": 12795789312.0, "11305": 12795789312.0, "11310": 12795789312.0, "11315": 12795789312.0, "11320": 12795789312.0, "11325": 12795789312.0, "11330": 12795789312.0, "11335": 12795789312.0, "11340": 12795789312.0, "11345": 12795789312.0, "11350": 12795789312.0, "11355": 12795789312.0, "11360": 12795789312.0, "11365": 12795789312.0, "11370": 12795789312.0, "11375": 12795789312.0, "11380": 12795789312.0, "11385": 12795789312.0, "11390": 12795789312.0, "11395": 12795789312.0, "11400": 12795789312.0, "11405": 12795789312.0, "11410": 12795789312.0, "11415": 12795789312.0, "11420": 12795789312.0, "11425": 12795789312.0, "11430": 12795789312.0, "11435": 12795789312.0, "11440": 12795789312.0, "11445": 12795789312.0, "11450": 12795789312.0, "11455": 12795789312.0, "11460": 12795789312.0, "11465": 12795789312.0, "11470": 12795789312.0, "11475": 12795789312.0, "11480": 12795789312.0, "11485": 12795789312.0, "11490": 12795789312.0, "11495": 12795789312.0, "11500": 12795789312.0, "11505": 12795789312.0, "11510": 12795789312.0, "11515": 12795789312.0, "11520": 12795789312.0, "11525": 12795789312.0, "11530": 12795789312.0, "11535": 12795789312.0, "11540": 12795789312.0, "11545": 12795789312.0, "11550": 12795789312.0, "11555": 12795789312.0, "11560": 12795789312.0, "11565": 12795789312.0, "11570": 12795789312.0, "11575": 12795789312.0, "11580": 12795789312.0, "11585": 12795789312.0, "11590": 12795789312.0, "11595": 12795789312.0, "11600": 12795789312.0, "11605": 12795789312.0, "11610": 12795789312.0, "11615": 12795789312.0, "11620": 12795789312.0, "11625": 12795789312.0, "11630": 12795789312.0, "11635": 12795789312.0, "11640": 12795789312.0, "11645": 12795789312.0, "11650": 12795789312.0, "11655": 12795789312.0, "11660": 12795789312.0, "11665": 12795789312.0, "11670": 12795789312.0, "11675": 12795789312.0, "11680": 12795789312.0, "11685": 12795789312.0, "11690": 12795789312.0, "11695": 12795789312.0, "11700": 12795789312.0, "11705": 12795789312.0, "11710": 12795789312.0, "11715": 12795789312.0, "11720": 12795789312.0, "11725": 12795789312.0, "11730": 12795789312.0, "11735": 12795789312.0, "11740": 12795789312.0, "11745": 12795789312.0, "11750": 12795789312.0, "11755": 12795789312.0, "11760": 12795789312.0, "11765": 12795789312.0, "11770": 12795789312.0, "11775": 12795789312.0, "11780": 12795789312.0, "11785": 12795789312.0, "11790": 12795789312.0, "11795": 12795789312.0, "11800": 12795789312.0, "11805": 12795789312.0, "11810": 12795789312.0, "11815": 12795789312.0, "11820": 12795789312.0, "11825": 12795789312.0, "11830": 12795789312.0, "11835": 12795789312.0, "11840": 12795789312.0, "11845": 12795789312.0, "11850": 12795789312.0, "11855": 12795789312.0, "11860": 12795789312.0, "11865": 12795789312.0, "11870": 12795789312.0, "11875": 12795789312.0, "11880": 12795789312.0, "11885": 12795789312.0, "11890": 12795789312.0, "11895": 12795789312.0, "11900": 12795789312.0, "11905": 12795789312.0, "11910": 12795789312.0, "11915": 12795789312.0, "11920": 12795789312.0, "11925": 12795789312.0, "11930": 12795789312.0, "11935": 12795789312.0, "11940": 12795789312.0, "11945": 12795789312.0, "11950": 12795789312.0, "11955": 12795789312.0, "11960": 12795789312.0, "11965": 12795789312.0, "11970": 12795789312.0, "11975": 12795789312.0, "11980": 12795789312.0, "11985": 12795789312.0, "11990": 12795789312.0, "11995": 12795789312.0, "12000": 12795789312.0, "12005": 12795789312.0, "12010": 12795789312.0, "12015": 12795789312.0, "12020": 12795789312.0, "12025": 12795789312.0, "12030": 12795789312.0, "12035": 12795789312.0, "12040": 12795789312.0, "12045": 12795789312.0, "12050": 12795789312.0, "12055": 12795789312.0, "12060": 12795789312.0, "12065": 12795789312.0, "12070": 12795789312.0, "12075": 12795789312.0, "12080": 12795789312.0, "12085": 12795789312.0, "12090": 12795789312.0, "12095": 12795789312.0, "12100": 12795789312.0, "12105": 12795789312.0, "12110": 12795789312.0, "12115": 12795789312.0, "12120": 12795789312.0, "12125": 12795789312.0, "12130": 12795789312.0, "12135": 12795789312.0, "12140": 12795789312.0, "12145": 12795789312.0, "12150": 12795789312.0, "12155": 12795789312.0, "12160": 12795789312.0, "12165": 12795789312.0, "12170": 12795789312.0, "12175": 12795789312.0, "12180": 12795789312.0, "12185": 12795789312.0, "12190": 12795789312.0, "12195": 12795789312.0, "12200": 12795789312.0, "12205": 12795789312.0, "12210": 12795789312.0, "12215": 12795789312.0, "12220": 12795789312.0, "12225": 12795789312.0, "12230": 12795789312.0, "12235": 12795789312.0, "12240": 12795789312.0, "12245": 12795789312.0, "12250": 12795789312.0, "12255": 12795789312.0, "12260": 12795789312.0, "12265": 12795789312.0, "12270": 12795789312.0, "12275": 12795789312.0, "12280": 12795789312.0, "12285": 12795789312.0, "12290": 12795789312.0, "12295": 12795789312.0, "12300": 12795789312.0, "12305": 12795789312.0, "12310": 12795789312.0, "12315": 12795789312.0, "12320": 12795789312.0, "12325": 12795789312.0, "12330": 12795789312.0, "12335": 12795789312.0, "12340": 12795789312.0, "12345": 12795789312.0, "12350": 12795789312.0, "12355": 12795789312.0, "12360": 12795789312.0, "12365": 12795789312.0, "12370": 12795789312.0, "12375": 12795789312.0, "12380": 12795789312.0, "12385": 12795789312.0, "12390": 12795789312.0, "12395": 12795789312.0, "12400": 12795789312.0, "12405": 12795789312.0, "12410": 12795789312.0, "12415": 12795789312.0, "12420": 12795789312.0, "12425": 12795789312.0, "12430": 12795789312.0, "12435": 12795789312.0, "12440": 12795789312.0, "12445": 12795789312.0, "12450": 12795789312.0, "12455": 12795789312.0, "12460": 12795789312.0, "12465": 12795789312.0, "12470": 12795789312.0, "12475": 12795789312.0, "12480": 12795789312.0, "12485": 12795789312.0, "12490": 12795789312.0, "12495": 12795789312.0, "12500": 12795789312.0, "12505": 12795789312.0, "12510": 12795789312.0, "12515": 12795789312.0, "12520": 12795789312.0, "12525": 12795789312.0, "12530": 12795789312.0, "12535": 12795789312.0, "12540": 12795789312.0, "12545": 12795789312.0, "12550": 12795789312.0, "12555": 12795789312.0, "12560": 12795789312.0, "12565": 12795789312.0, "12570": 12795789312.0, "12575": 12795789312.0, "12580": 12795789312.0, "12585": 12795789312.0, "12590": 12795789312.0, "12595": 12795789312.0, "12600": 12795789312.0, "12605": 12795789312.0, "12610": 12795789312.0, "12615": 12795789312.0, "12620": 12795789312.0, "12625": 12795789312.0, "12630": 12795789312.0, "12635": 12795789312.0, "12640": 12795789312.0, "12645": 12795789312.0, "12650": 12795789312.0, "12655": 12795789312.0, "12660": 12795789312.0, "12665": 12795789312.0, "12670": 12795789312.0, "12675": 12795789312.0, "12680": 12795789312.0, "12685": 12795789312.0, "12690": 12795789312.0, "12695": 12795789312.0, "12700": 12795789312.0, "12705": 12795789312.0, "12710": 12795789312.0, "12715": 12795789312.0, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 27991298048.0, "5": 28489385984.0, "10": 28489385984.0, "15": 28489385984.0, "20": 28489385984.0, "25": 28489385984.0, "30": 28489385984.0, "35": 28489385984.0, "40": 28489385984.0, "45": 28489385984.0, "50": 28489385984.0, "55": 28489385984.0, "60": 28489385984.0, "65": 28489385984.0, "70": 28489385984.0, "75": 28489385984.0, "80": 28489385984.0, "85": 28489385984.0, "90": 28489385984.0, "95": 28489385984.0, "100": 28489385984.0, "105": 28489385984.0, "110": 28489385984.0, "115": 28489385984.0, "120": 28489385984.0, "125": 28489385984.0, "130": 28489385984.0, "135": 28489385984.0, "140": 28489385984.0, "145": 28489385984.0, "150": 28489385984.0, "155": 28489385984.0, "160": 28489385984.0, "165": 28489385984.0, "170": 28489385984.0, "175": 28489385984.0, "180": 28489385984.0, "185": 28489385984.0, "190": 28489385984.0, "195": 28489385984.0, "200": 28489385984.0, "205": 28489385984.0, "210": 28489385984.0, "215": 28489385984.0, "220": 28489385984.0, "225": 28489385984.0, "230": 28489385984.0, "235": 28489385984.0, "240": 28489385984.0, "245": 28489385984.0, "250": 28489385984.0, "255": 28489385984.0, "260": 28489385984.0, "265": 28489385984.0, "270": 28489385984.0, "275": 28489385984.0, "280": 28489385984.0, "285": 28489385984.0, "290": 28489385984.0, "295": 28489385984.0, "300": 28489385984.0, "305": 28489385984.0, "310": 28489385984.0, "315": 28489385984.0, "320": 28489385984.0, "325": 28489385984.0, "330": 28489385984.0, "335": 28489385984.0, "340": 28489385984.0, "345": 28489385984.0, "350": 28489385984.0, "355": 28489385984.0, "360": 28489385984.0, "365": 28489385984.0, "370": 28489385984.0, "375": 28489385984.0, "380": 28489385984.0, "385": 28489385984.0, "390": 28489385984.0, "395": 28489385984.0, "400": 28489385984.0, "405": 28489385984.0, "410": 28489385984.0, "415": 28489385984.0, "420": 28489385984.0, "425": 28489385984.0, "430": 28489385984.0, "435": 28489385984.0, "440": 28489385984.0, "445": 28489385984.0, "450": 28489385984.0, "455": 28489385984.0, "460": 28489385984.0, "465": 28489385984.0, "470": 28489385984.0, "475": 28489385984.0, "480": 28489385984.0, "485": 28489385984.0, "490": 28489385984.0, "495": 28489385984.0, "500": 28489385984.0, "505": 28489385984.0, "510": 28489385984.0, "515": 28489385984.0, "520": 28489385984.0, "525": 28489385984.0, "530": 28489385984.0, "535": 28489385984.0, "540": 28489385984.0, "545": 28489385984.0, "550": 28489385984.0, "555": 28489385984.0, "560": 28489385984.0, "565": 28489385984.0, "570": 28489385984.0, "575": 28489385984.0, "580": 28489385984.0, "585": 28489385984.0, "590": 28489385984.0, "595": 28489385984.0, "600": 28489385984.0, "605": 28489385984.0, "610": 28489385984.0, "615": 28489385984.0, "620": 28489385984.0, "625": 28489385984.0, "630": 28489385984.0, "635": 28489385984.0, "640": 28489385984.0, "645": 28489385984.0, "650": 28489385984.0, "655": 28489385984.0, "660": 28489385984.0, "665": 28489385984.0, "670": 28489385984.0, "675": 28489385984.0, "680": 28489385984.0, "685": 28489385984.0, "690": 28489385984.0, "695": 28489385984.0, "700": 28489385984.0, "705": 28489385984.0, "710": 28489385984.0, "715": 28489385984.0, "720": 28489385984.0, "725": 28489385984.0, "730": 28489385984.0, "735": 28489385984.0, "740": 28489385984.0, "745": 28489385984.0, "750": 28489385984.0, "755": 28489385984.0, "760": 28489385984.0, "765": 28489385984.0, "770": 28489385984.0, "775": 28489385984.0, "780": 28489385984.0, "785": 28489385984.0, "790": 28489385984.0, "795": 28489385984.0, "800": 28489385984.0, "805": 28489385984.0, "810": 28489385984.0, "815": 28489385984.0, "820": 28489385984.0, "825": 28489385984.0, "830": 28489385984.0, "835": 28489385984.0, "840": 28489385984.0, "845": 28489385984.0, "850": 28489385984.0, "855": 28489385984.0, "860": 28489385984.0, "865": 28489385984.0, "870": 28489385984.0, "875": 28489385984.0, "880": 28489385984.0, "885": 28489385984.0, "890": 28489385984.0, "895": 28489385984.0, "900": 28489385984.0, "905": 28489385984.0, "910": 28489385984.0, "915": 28489385984.0, "920": 28489385984.0, "925": 28489385984.0, "930": 28489385984.0, "935": 28489385984.0, "940": 28489385984.0, "945": 28489385984.0, "950": 28489385984.0, "955": 28489385984.0, "960": 28489385984.0, "965": 28489385984.0, "970": 28489385984.0, "975": 28489385984.0, "980": 28489385984.0, "985": 28489385984.0, "990": 28489385984.0, "995": 28489385984.0, "1000": 28489385984.0, "1005": 28489385984.0, "1010": 28489385984.0, "1015": 28489385984.0, "1020": 28489385984.0, "1025": 28489385984.0, "1030": 28489385984.0, "1035": 28489385984.0, "1040": 28489385984.0, "1045": 28489385984.0, "1050": 28489385984.0, "1055": 28489385984.0, "1060": 28489385984.0, "1065": 28489385984.0, "1070": 28489385984.0, "1075": 28489385984.0, "1080": 28489385984.0, "1085": 28489385984.0, "1090": 28489385984.0, "1095": 28489385984.0, "1100": 28489385984.0, "1105": 28489385984.0, "1110": 28489385984.0, "1115": 28489385984.0, "1120": 28489385984.0, "1125": 28489385984.0, "1130": 28489385984.0, "1135": 28489385984.0, "1140": 28489385984.0, "1145": 28489385984.0, "1150": 28489385984.0, "1155": 28489385984.0, "1160": 28489385984.0, "1165": 28489385984.0, "1170": 28489385984.0, "1175": 28489385984.0, "1180": 28489385984.0, "1185": 28489385984.0, "1190": 28489385984.0, "1195": 28489385984.0, "1200": 28489385984.0, "1205": 28489385984.0, "1210": 28489385984.0, "1215": 28489385984.0, "1220": 28489385984.0, "1225": 28489385984.0, "1230": 28489385984.0, "1235": 28489385984.0, "1240": 28489385984.0, "1245": 28489385984.0, "1250": 28489385984.0, "1255": 28489385984.0, "1260": 28489385984.0, "1265": 28489385984.0, "1270": 28489385984.0, "1275": 28489385984.0, "1280": 28489385984.0, "1285": 28489385984.0, "1290": 28489385984.0, "1295": 28489385984.0, "1300": 28489385984.0, "1305": 28489385984.0, "1310": 28489385984.0, "1315": 28489385984.0, "1320": 28489385984.0, "1325": 28489385984.0, "1330": 28489385984.0, "1335": 28489385984.0, "1340": 28489385984.0, "1345": 28489385984.0, "1350": 28489385984.0, "1355": 28489385984.0, "1360": 28489385984.0, "1365": 28489385984.0, "1370": 28489385984.0, "1375": 28489385984.0, "1380": 28489385984.0, "1385": 28489385984.0, "1390": 28489385984.0, "1395": 28489385984.0, "1400": 28489385984.0, "1405": 28489385984.0, "1410": 28489385984.0, "1415": 28489385984.0, "1420": 28489385984.0, "1425": 28489385984.0, "1430": 28489385984.0, "1435": 28489385984.0, "1440": 28489385984.0, "1445": 28489385984.0, "1450": 28489385984.0, "1455": 28489385984.0, "1460": 28489385984.0, "1465": 28489385984.0, "1470": 28489385984.0, "1475": 28489385984.0, "1480": 28489385984.0, "1485": 28489385984.0, "1490": 28489385984.0, "1495": 28489385984.0, "1500": 28489385984.0, "1505": 28489385984.0, "1510": 28489385984.0, "1515": 28489385984.0, "1520": 28489385984.0, "1525": 28489385984.0, "1530": 28489385984.0, "1535": 28489385984.0, "1540": 28489385984.0, "1545": 28489385984.0, "1550": 28489385984.0, "1555": 28489385984.0, "1560": 28489385984.0, "1565": 28489385984.0, "1570": 28489385984.0, "1575": 28489385984.0, "1580": 28489385984.0, "1585": 28489385984.0, "1590": 28489385984.0, "1595": 28489385984.0, "1600": 28489385984.0, "1605": 28489385984.0, "1610": 28489385984.0, "1615": 28489385984.0, "1620": 28489385984.0, "1625": 28489385984.0, "1630": 28489385984.0, "1635": 28489385984.0, "1640": 28489385984.0, "1645": 28489385984.0, "1650": 28489385984.0, "1655": 28489385984.0, "1660": 28489385984.0, "1665": 28489385984.0, "1670": 28489385984.0, "1675": 28489385984.0, "1680": 28489385984.0, "1685": 28489385984.0, "1690": 28489385984.0, "1695": 28489385984.0, "1700": 28489385984.0, "1705": 28489385984.0, "1710": 28489385984.0, "1715": 28489385984.0, "1720": 28489385984.0, "1725": 28489385984.0, "1730": 28489385984.0, "1735": 28489385984.0, "1740": 28489385984.0, "1745": 28489385984.0, "1750": 28489385984.0, "1755": 28489385984.0, "1760": 28489385984.0, "1765": 28489385984.0, "1770": 28489385984.0, "1775": 28489385984.0, "1780": 28489385984.0, "1785": 28489385984.0, "1790": 28489385984.0, "1795": 28489385984.0, "1800": 28489385984.0, "1805": 28489385984.0, "1810": 28489385984.0, "1815": 28489385984.0, "1820": 28489385984.0, "1825": 28489385984.0, "1830": 28489385984.0, "1835": 28489385984.0, "1840": 28489385984.0, "1845": 28489385984.0, "1850": 28489385984.0, "1855": 28489385984.0, "1860": 28489385984.0, "1865": 28489385984.0, "1870": 28489385984.0, "1875": 28489385984.0, "1880": 28489385984.0, "1885": 28489385984.0, "1890": 28489385984.0, "1895": 28489385984.0, "1900": 28489385984.0, "1905": 28489385984.0, "1910": 28489385984.0, "1915": 28489385984.0, "1920": 28489385984.0, "1925": 28489385984.0, "1930": 28489385984.0, "1935": 28489385984.0, "1940": 28489385984.0, "1945": 28489385984.0, "1950": 28489385984.0, "1955": 28489385984.0, "1960": 28489385984.0, "1965": 28489385984.0, "1970": 28489385984.0, "1975": 28489385984.0, "1980": 28489385984.0, "1985": 28489385984.0, "1990": 28489385984.0, "1995": 28489385984.0, "2000": 28489385984.0, "2005": 28489385984.0, "2010": 28489385984.0, "2015": 28489385984.0, "2020": 28489385984.0, "2025": 28489385984.0, "2030": 28489385984.0, "2035": 28489385984.0, "2040": 28489385984.0, "2045": 28489385984.0, "2050": 28489385984.0, "2055": 28489385984.0, "2060": 28489385984.0, "2065": 28489385984.0, "2070": 28489385984.0, "2075": 28489385984.0, "2080": 28489385984.0, "2085": 28489385984.0, "2090": 28489385984.0, "2095": 28489385984.0, "2100": 28489385984.0, "2105": 28489385984.0, "2110": 28489385984.0, "2115": 28489385984.0, "2120": 28489385984.0, "2125": 28489385984.0, "2130": 28489385984.0, "2135": 28489385984.0, "2140": 28489385984.0, "2145": 28489385984.0, "2150": 28489385984.0, "2155": 28489385984.0, "2160": 28489385984.0, "2165": 28489385984.0, "2170": 28489385984.0, "2175": 28489385984.0, "2180": 28489385984.0, "2185": 28489385984.0, "2190": 28489385984.0, "2195": 28489385984.0, "2200": 28489385984.0, "2205": 28489385984.0, "2210": 28489385984.0, "2215": 28489385984.0, "2220": 28489385984.0, "2225": 28489385984.0, "2230": 28489385984.0, "2235": 28489385984.0, "2240": 28489385984.0, "2245": 28489385984.0, "2250": 28489385984.0, "2255": 28489385984.0, "2260": 28489385984.0, "2265": 28489385984.0, "2270": 28489385984.0, "2275": 28489385984.0, "2280": 28489385984.0, "2285": 28489385984.0, "2290": 28489385984.0, "2295": 28489385984.0, "2300": 28489385984.0, "2305": 28489385984.0, "2310": 28489385984.0, "2315": 28489385984.0, "2320": 28489385984.0, "2325": 28489385984.0, "2330": 28489385984.0, "2335": 28489385984.0, "2340": 28489385984.0, "2345": 28489385984.0, "2350": 28489385984.0, "2355": 28489385984.0, "2360": 28489385984.0, "2365": 28489385984.0, "2370": 28489385984.0, "2375": 28489385984.0, "2380": 28489385984.0, "2385": 28489385984.0, "2390": 28489385984.0, "2395": 28489385984.0, "2400": 28489385984.0, "2405": 28489385984.0, "2410": 28489385984.0, "2415": 28489385984.0, "2420": 28489385984.0, "2425": 28489385984.0, "2430": 28489385984.0, "2435": 28489385984.0, "2440": 28489385984.0, "2445": 28489385984.0, "2450": 28489385984.0, "2455": 28489385984.0, "2460": 28489385984.0, "2465": 28489385984.0, "2470": 28489385984.0, "2475": 28489385984.0, "2480": 28489385984.0, "2485": 28489385984.0, "2490": 28489385984.0, "2495": 28489385984.0, "2500": 28489385984.0, "2505": 28489385984.0, "2510": 28489385984.0, "2515": 28489385984.0, "2520": 28489385984.0, "2525": 28489385984.0, "2530": 28489385984.0, "2535": 28489385984.0, "2540": 28489385984.0, "2545": 28489385984.0, "2550": 28489385984.0, "2555": 28489385984.0, "2560": 28489385984.0, "2565": 28489385984.0, "2570": 28489385984.0, "2575": 28489385984.0, "2580": 28489385984.0, "2585": 28489385984.0, "2590": 28489385984.0, "2595": 28489385984.0, "2600": 28489385984.0, "2605": 28489385984.0, "2610": 28489385984.0, "2615": 28489385984.0, "2620": 28489385984.0, "2625": 28489385984.0, "2630": 28489385984.0, "2635": 28489385984.0, "2640": 28489385984.0, "2645": 28489385984.0, "2650": 28489385984.0, "2655": 28489385984.0, "2660": 28489385984.0, "2665": 28489385984.0, "2670": 28489385984.0, "2675": 28489385984.0, "2680": 28489385984.0, "2685": 28489385984.0, "2690": 28489385984.0, "2695": 28489385984.0, "2700": 28489385984.0, "2705": 28489385984.0, "2710": 28489385984.0, "2715": 28489385984.0, "2720": 28489385984.0, "2725": 28489385984.0, "2730": 28489385984.0, "2735": 28489385984.0, "2740": 28489385984.0, "2745": 28489385984.0, "2750": 28489385984.0, "2755": 28489385984.0, "2760": 28489385984.0, "2765": 28489385984.0, "2770": 28489385984.0, "2775": 28489385984.0, "2780": 28489385984.0, "2785": 28489385984.0, "2790": 28489385984.0, "2795": 28489385984.0, "2800": 28489385984.0, "2805": 28489385984.0, "2810": 28489385984.0, "2815": 28489385984.0, "2820": 28489385984.0, "2825": 28489385984.0, "2830": 28489385984.0, "2835": 28489385984.0, "2840": 28489385984.0, "2845": 28489385984.0, "2850": 28489385984.0, "2855": 28489385984.0, "2860": 28489385984.0, "2865": 28489385984.0, "2870": 28489385984.0, "2875": 28489385984.0, "2880": 28489385984.0, "2885": 28489385984.0, "2890": 28489385984.0, "2895": 28489385984.0, "2900": 28489385984.0, "2905": 28489385984.0, "2910": 28489385984.0, "2915": 28489385984.0, "2920": 28489385984.0, "2925": 28489385984.0, "2930": 28489385984.0, "2935": 28489385984.0, "2940": 28489385984.0, "2945": 28489385984.0, "2950": 28489385984.0, "2955": 28489385984.0, "2960": 28489385984.0, "2965": 28489385984.0, "2970": 28489385984.0, "2975": 28489385984.0, "2980": 28489385984.0, "2985": 28489385984.0, "2990": 28489385984.0, "2995": 28489385984.0, "3000": 28489385984.0, "3005": 28489385984.0, "3010": 28489385984.0, "3015": 28489385984.0, "3020": 28489385984.0, "3025": 28489385984.0, "3030": 28489385984.0, "3035": 28489385984.0, "3040": 28489385984.0, "3045": 28489385984.0, "3050": 28489385984.0, "3055": 28489385984.0, "3060": 28489385984.0, "3065": 28489385984.0, "3070": 28489385984.0, "3075": 28489385984.0, "3080": 28489385984.0, "3085": 28489385984.0, "3090": 28489385984.0, "3095": 28489385984.0, "3100": 28489385984.0, "3105": 28489385984.0, "3110": 28489385984.0, "3115": 28489385984.0, "3120": 28489385984.0, "3125": 28489385984.0, "3130": 28489385984.0, "3135": 28489385984.0, "3140": 28489385984.0, "3145": 28489385984.0, "3150": 28489385984.0, "3155": 28489385984.0, "3160": 28489385984.0, "3165": 28489385984.0, "3170": 28489385984.0, "3175": 28489385984.0, "3180": 28489385984.0, "3185": 28489385984.0, "3190": 28489385984.0, "3195": 28489385984.0, "3200": 28489385984.0, "3205": 28489385984.0, "3210": 28489385984.0, "3215": 28489385984.0, "3220": 28489385984.0, "3225": 28489385984.0, "3230": 28489385984.0, "3235": 28489385984.0, "3240": 28489385984.0, "3245": 28489385984.0, "3250": 28489385984.0, "3255": 28489385984.0, "3260": 28489385984.0, "3265": 28489385984.0, "3270": 28489385984.0, "3275": 28489385984.0, "3280": 28489385984.0, "3285": 28489385984.0, "3290": 28489385984.0, "3295": 28489385984.0, "3300": 28489385984.0, "3305": 28489385984.0, "3310": 28489385984.0, "3315": 28489385984.0, "3320": 28489385984.0, "3325": 28489385984.0, "3330": 28489385984.0, "3335": 28489385984.0, "3340": 28489385984.0, "3345": 28489385984.0, "3350": 28489385984.0, "3355": 28489385984.0, "3360": 28489385984.0, "3365": 28489385984.0, "3370": 28489385984.0, "3375": 28489385984.0, "3380": 28489385984.0, "3385": 28489385984.0, "3390": 28489385984.0, "3395": 28489385984.0, "3400": 28489385984.0, "3405": 28489385984.0, "3410": 28489385984.0, "3415": 28489385984.0, "3420": 28489385984.0, "3425": 28489385984.0, "3430": 28489385984.0, "3435": 28489385984.0, "3440": 28489385984.0, "3445": 28489385984.0, "3450": 28489385984.0, "3455": 28489385984.0, "3460": 28489385984.0, "3465": 28489385984.0, "3470": 28489385984.0, "3475": 28489385984.0, "3480": 28489385984.0, "3485": 28489385984.0, "3490": 28489385984.0, "3495": 28489385984.0, "3500": 28489385984.0, "3505": 28489385984.0, "3510": 28489385984.0, "3515": 28489385984.0, "3520": 28489385984.0, "3525": 28489385984.0, "3530": 28489385984.0, "3535": 28489385984.0, "3540": 28489385984.0, "3545": 28489385984.0, "3550": 28489385984.0, "3555": 28489385984.0, "3560": 28489385984.0, "3565": 28489385984.0, "3570": 28489568256.0, "3575": 28489568256.0, "3580": 28489568256.0, "3585": 28489568256.0, "3590": 28489568256.0, "3595": 28489568256.0, "3600": 28489568256.0, "3605": 28489568256.0, "3610": 28489568256.0, "3615": 28489568256.0, "3620": 28489568256.0, "3625": 28489568256.0, "3630": 28489568256.0, "3635": 28489568256.0, "3640": 28489568256.0, "3645": 28489568256.0, "3650": 28489568256.0, "3655": 28489568256.0, "3660": 28489568256.0, "3665": 28489568256.0, "3670": 28489568256.0, "3675": 28489568256.0, "3680": 28489568256.0, "3685": 28489568256.0, "3690": 28489568256.0, "3695": 28489568256.0, "3700": 28489568256.0, "3705": 28489568256.0, "3710": 28489568256.0, "3715": 28489568256.0, "3720": 28489568256.0, "3725": 28489568256.0, "3730": 28489568256.0, "3735": 28489568256.0, "3740": 28489568256.0, "3745": 28489568256.0, "3750": 28489568256.0, "3755": 28489568256.0, "3760": 28489568256.0, "3765": 28489568256.0, "3770": 28489568256.0, "3775": 28489568256.0, "3780": 28489568256.0, "3785": 28489568256.0, "3790": 28489568256.0, "3795": 28489568256.0, "3800": 28489568256.0, "3805": 28489568256.0, "3810": 28489568256.0, "3815": 28489568256.0, "3820": 28489568256.0, "3825": 28489568256.0, "3830": 28489568256.0, "3835": 28489568256.0, "3840": 28489568256.0, "3845": 28489568256.0, "3850": 28489568256.0, "3855": 28489568256.0, "3860": 28489568256.0, "3865": 28489568256.0, "3870": 28489568256.0, "3875": 28489568256.0, "3880": 28489568256.0, "3885": 28489568256.0, "3890": 28489568256.0, "3895": 28489568256.0, "3900": 28489568256.0, "3905": 28489568256.0, "3910": 28489568256.0, "3915": 28489568256.0, "3920": 28489568256.0, "3925": 28489568256.0, "3930": 28489568256.0, "3935": 28489568256.0, "3940": 28489568256.0, "3945": 28489568256.0, "3950": 28489568256.0, "3955": 28489568256.0, "3960": 28489568256.0, "3965": 28489568256.0, "3970": 28489568256.0, "3975": 28489568256.0, "3980": 28489568256.0, "3985": 28489568256.0, "3990": 28489568256.0, "3995": 28489568256.0, "4000": 28489568256.0, "4005": 28489568256.0, "4010": 28489568256.0, "4015": 28489568256.0, "4020": 28489568256.0, "4025": 28489568256.0, "4030": 28489568256.0, "4035": 28489568256.0, "4040": 28489568256.0, "4045": 28489568256.0, "4050": 28489568256.0, "4055": 28489568256.0, "4060": 28489568256.0, "4065": 28489568256.0, "4070": 28489568256.0, "4075": 28489568256.0, "4080": 28489568256.0, "4085": 28489568256.0, "4090": 28489568256.0, "4095": 28489568256.0, "4100": 28489568256.0, "4105": 28489568256.0, "4110": 28489568256.0, "4115": 28489568256.0, "4120": 28489568256.0, "4125": 28489568256.0, "4130": 28489568256.0, "4135": 28489568256.0, "4140": 28489568256.0, "4145": 28489568256.0, "4150": 28489568256.0, "4155": 28489568256.0, "4160": 28489568256.0, "4165": 28489568256.0, "4170": 28489568256.0, "4175": 28489568256.0, "4180": 28489568256.0, "4185": 28489568256.0, "4190": 28489568256.0, "4195": 28489568256.0, "4200": 28489568256.0, "4205": 28489568256.0, "4210": 28489568256.0, "4215": 28489568256.0, "4220": 28489568256.0, "4225": 28489568256.0, "4230": 28489568256.0, "4235": 28489568256.0, "4240": 28489568256.0, "4245": 28489568256.0, "4250": 28489568256.0, "4255": 28489568256.0, "4260": 28489568256.0, "4265": 28489568256.0, "4270": 28489568256.0, "4275": 28489568256.0, "4280": 28489568256.0, "4285": 28489568256.0, "4290": 28489568256.0, "4295": 28489568256.0, "4300": 28489568256.0, "4305": 28489568256.0, "4310": 28489568256.0, "4315": 28489568256.0, "4320": 28489568256.0, "4325": 28489568256.0, "4330": 28489568256.0, "4335": 28489568256.0, "4340": 28489568256.0, "4345": 28489568256.0, "4350": 28489568256.0, "4355": 28489568256.0, "4360": 28489568256.0, "4365": 28489568256.0, "4370": 28489568256.0, "4375": 28489568256.0, "4380": 28489568256.0, "4385": 28489568256.0, "4390": 28489568256.0, "4395": 28489568256.0, "4400": 28489568256.0, "4405": 28489568256.0, "4410": 28489568256.0, "4415": 28489568256.0, "4420": 28489568256.0, "4425": 28489568256.0, "4430": 28489568256.0, "4435": 28489568256.0, "4440": 28489568256.0, "4445": 28489568256.0, "4450": 28489568256.0, "4455": 28489568256.0, "4460": 28489568256.0, "4465": 28489568256.0, "4470": 28489568256.0, "4475": 28489568256.0, "4480": 28489568256.0, "4485": 28489568256.0, "4490": 28489568256.0, "4495": 28489568256.0, "4500": 28489568256.0, "4505": 28489568256.0, "4510": 28489568256.0, "4515": 28489568256.0, "4520": 28489568256.0, "4525": 28489568256.0, "4530": 28489568256.0, "4535": 28489568256.0, "4540": 28489568256.0, "4545": 28489568256.0, "4550": 28489568256.0, "4555": 28489568256.0, "4560": 28489568256.0, "4565": 28489568256.0, "4570": 28489568256.0, "4575": 28489568256.0, "4580": 28489568256.0, "4585": 28489568256.0, "4590": 28489568256.0, "4595": 28489568256.0, "4600": 28489568256.0, "4605": 28489568256.0, "4610": 28489568256.0, "4615": 28489568256.0, "4620": 28489568256.0, "4625": 28489568256.0, "4630": 28489568256.0, "4635": 28489568256.0, "4640": 28489568256.0, "4645": 28489568256.0, "4650": 28489568256.0, "4655": 28489568256.0, "4660": 28489568256.0, "4665": 28489568256.0, "4670": 28489568256.0, "4675": 28489568256.0, "4680": 28489568256.0, "4685": 28489568256.0, "4690": 28489568256.0, "4695": 28489568256.0, "4700": 28489568256.0, "4705": 28489568256.0, "4710": 28489568256.0, "4715": 28489568256.0, "4720": 28489568256.0, "4725": 28489568256.0, "4730": 28489568256.0, "4735": 28489568256.0, "4740": 28489568256.0, "4745": 28489568256.0, "4750": 28489568256.0, "4755": 28489568256.0, "4760": 28489568256.0, "4765": 28489568256.0, "4770": 28489568256.0, "4775": 28489568256.0, "4780": 28489568256.0, "4785": 28489568256.0, "4790": 28489568256.0, "4795": 28489568256.0, "4800": 28489568256.0, "4805": 28489568256.0, "4810": 28489568256.0, "4815": 28489568256.0, "4820": 28489568256.0, "4825": 28489568256.0, "4830": 28489568256.0, "4835": 28489568256.0, "4840": 28489568256.0, "4845": 28489568256.0, "4850": 28489568256.0, "4855": 28489568256.0, "4860": 28489568256.0, "4865": 28489568256.0, "4870": 28489568256.0, "4875": 28489568256.0, "4880": 28489568256.0, "4885": 28489568256.0, "4890": 28489568256.0, "4895": 28489568256.0, "4900": 28489568256.0, "4905": 28489568256.0, "4910": 28489568256.0, "4915": 28489568256.0, "4920": 28489568256.0, "4925": 28489568256.0, "4930": 28489568256.0, "4935": 28489568256.0, "4940": 28489568256.0, "4945": 28489568256.0, "4950": 28489568256.0, "4955": 28489568256.0, "4960": 28489568256.0, "4965": 28489568256.0, "4970": 28489568256.0, "4975": 28489568256.0, "4980": 28489568256.0, "4985": 28489568256.0, "4990": 28489568256.0, "4995": 28489568256.0, "5000": 28489568256.0, "5005": 28489568256.0, "5010": 28489568256.0, "5015": 28489568256.0, "5020": 28489568256.0, "5025": 28489568256.0, "5030": 28489568256.0, "5035": 28489568256.0, "5040": 28489568256.0, "5045": 28489568256.0, "5050": 28489568256.0, "5055": 28489568256.0, "5060": 28489568256.0, "5065": 28489568256.0, "5070": 28489568256.0, "5075": 28489568256.0, "5080": 28489568256.0, "5085": 28489568256.0, "5090": 28489568256.0, "5095": 28489568256.0, "5100": 28489568256.0, "5105": 28489568256.0, "5110": 28489568256.0, "5115": 28489568256.0, "5120": 28489568256.0, "5125": 28489568256.0, "5130": 28489568256.0, "5135": 28489568256.0, "5140": 28489568256.0, "5145": 28489568256.0, "5150": 28489568256.0, "5155": 28489568256.0, "5160": 28489568256.0, "5165": 28489568256.0, "5170": 28489568256.0, "5175": 28489568256.0, "5180": 28489568256.0, "5185": 28489568256.0, "5190": 28489568256.0, "5195": 28489568256.0, "5200": 28489568256.0, "5205": 28489568256.0, "5210": 28489568256.0, "5215": 28489568256.0, "5220": 28489568256.0, "5225": 28489568256.0, "5230": 28489568256.0, "5235": 28489568256.0, "5240": 28489568256.0, "5245": 28489568256.0, "5250": 28489568256.0, "5255": 28489568256.0, "5260": 28489568256.0, "5265": 28489568256.0, "5270": 28489568256.0, "5275": 28489568256.0, "5280": 28489568256.0, "5285": 28489568256.0, "5290": 28489568256.0, "5295": 28489568256.0, "5300": 28489568256.0, "5305": 28489568256.0, "5310": 28489568256.0, "5315": 28489568256.0, "5320": 28489568256.0, "5325": 28489568256.0, "5330": 28489568256.0, "5335": 28489568256.0, "5340": 28489568256.0, "5345": 28489568256.0, "5350": 28489568256.0, "5355": 28489568256.0, "5360": 28489568256.0, "5365": 28489568256.0, "5370": 28489568256.0, "5375": 28489568256.0, "5380": 28489568256.0, "5385": 28489568256.0, "5390": 28489568256.0, "5395": 28489568256.0, "5400": 28489568256.0, "5405": 28489568256.0, "5410": 28489568256.0, "5415": 28489568256.0, "5420": 28489568256.0, "5425": 28489568256.0, "5430": 28489568256.0, "5435": 28489568256.0, "5440": 28489568256.0, "5445": 28489568256.0, "5450": 28489568256.0, "5455": 28489568256.0, "5460": 28489568256.0, "5465": 28489568256.0, "5470": 28489568256.0, "5475": 28489568256.0, "5480": 28489568256.0, "5485": 28489568256.0, "5490": 28489568256.0, "5495": 28489568256.0, "5500": 28489568256.0, "5505": 28489568256.0, "5510": 28489568256.0, "5515": 28489568256.0, "5520": 28489568256.0, "5525": 28489568256.0, "5530": 28489568256.0, "5535": 28489568256.0, "5540": 28489568256.0, "5545": 28489568256.0, "5550": 28489568256.0, "5555": 28489568256.0, "5560": 28489568256.0, "5565": 28489568256.0, "5570": 28489568256.0, "5575": 28489568256.0, "5580": 28489568256.0, "5585": 28489568256.0, "5590": 28489568256.0, "5595": 28489568256.0, "5600": 28489568256.0, "5605": 28489568256.0, "5610": 28489568256.0, "5615": 28489568256.0, "5620": 28489568256.0, "5625": 28489568256.0, "5630": 28489568256.0, "5635": 28489568256.0, "5640": 28489568256.0, "5645": 28489568256.0, "5650": 28489568256.0, "5655": 28489568256.0, "5660": 28489568256.0, "5665": 28489568256.0, "5670": 28489568256.0, "5675": 28489568256.0, "5680": 28489568256.0, "5685": 28489568256.0, "5690": 28489568256.0, "5695": 28489568256.0, "5700": 28489568256.0, "5705": 28489568256.0, "5710": 28489568256.0, "5715": 28489568256.0, "5720": 28489568256.0, "5725": 28489568256.0, "5730": 28489568256.0, "5735": 28489568256.0, "5740": 28489568256.0, "5745": 28489568256.0, "5750": 28489568256.0, "5755": 28489568256.0, "5760": 28489568256.0, "5765": 28489568256.0, "5770": 28489568256.0, "5775": 28489568256.0, "5780": 28489568256.0, "5785": 28489568256.0, "5790": 28489568256.0, "5795": 28489568256.0, "5800": 28489568256.0, "5805": 28489568256.0, "5810": 28489568256.0, "5815": 28489568256.0, "5820": 28489568256.0, "5825": 28489568256.0, "5830": 28489568256.0, "5835": 28489568256.0, "5840": 28489568256.0, "5845": 28489568256.0, "5850": 28489568256.0, "5855": 28489568256.0, "5860": 28489568256.0, "5865": 28489568256.0, "5870": 28489568256.0, "5875": 28489568256.0, "5880": 28489568256.0, "5885": 28489568256.0, "5890": 28489568256.0, "5895": 28489568256.0, "5900": 28489568256.0, "5905": 28489568256.0, "5910": 28489568256.0, "5915": 28489568256.0, "5920": 28489568256.0, "5925": 28489568256.0, "5930": 28489568256.0, "5935": 28489568256.0, "5940": 28489568256.0, "5945": 28489568256.0, "5950": 28489568256.0, "5955": 28489568256.0, "5960": 28489568256.0, "5965": 28489568256.0, "5970": 28489568256.0, "5975": 28489568256.0, "5980": 28489568256.0, "5985": 28489568256.0, "5990": 28489568256.0, "5995": 28489568256.0, "6000": 28489568256.0, "6005": 28489568256.0, "6010": 28489568256.0, "6015": 28489568256.0, "6020": 28489568256.0, "6025": 28489568256.0, "6030": 28489568256.0, "6035": 28489568256.0, "6040": 28489568256.0, "6045": 28489568256.0, "6050": 28489568256.0, "6055": 28489568256.0, "6060": 28489568256.0, "6065": 28489568256.0, "6070": 28489568256.0, "6075": 28489568256.0, "6080": 28489568256.0, "6085": 28489568256.0, "6090": 28489568256.0, "6095": 28489568256.0, "6100": 28489568256.0, "6105": 28489568256.0, "6110": 28489568256.0, "6115": 28489568256.0, "6120": 28489568256.0, "6125": 28489568256.0, "6130": 28489568256.0, "6135": 28489568256.0, "6140": 28489568256.0, "6145": 28489568256.0, "6150": 28489568256.0, "6155": 28489568256.0, "6160": 28489568256.0, "6165": 28489568256.0, "6170": 28489568256.0, "6175": 28489568256.0, "6180": 28489568256.0, "6185": 28489568256.0, "6190": 28489568256.0, "6195": 28489568256.0, "6200": 28489568256.0, "6205": 28489568256.0, "6210": 28489568256.0, "6215": 28489568256.0, "6220": 28489568256.0, "6225": 28489568256.0, "6230": 28489568256.0, "6235": 28489568256.0, "6240": 28489568256.0, "6245": 28489568256.0, "6250": 28489568256.0, "6255": 28489568256.0, "6260": 28489568256.0, "6265": 28489568256.0, "6270": 28489568256.0, "6275": 28489568256.0, "6280": 28489568256.0, "6285": 28489568256.0, "6290": 28489568256.0, "6295": 28489568256.0, "6300": 28489568256.0, "6305": 28489568256.0, "6310": 28489568256.0, "6315": 28489568256.0, "6320": 28489568256.0, "6325": 28489568256.0, "6330": 28489568256.0, "6335": 28489568256.0, "6340": 28489568256.0, "6345": 28489568256.0, "6350": 28489568256.0, "6355": 28489568256.0, "6360": 28489568256.0, "6365": 28489568256.0, "6370": 28489568256.0, "6375": 28489568256.0, "6380": 28489568256.0, "6385": 28489568256.0, "6390": 28489568256.0, "6395": 28489568256.0, "6400": 28489568256.0, "6405": 28489568256.0, "6410": 28489568256.0, "6415": 28489568256.0, "6420": 28489568256.0, "6425": 28489568256.0, "6430": 28489568256.0, "6435": 28489568256.0, "6440": 28489568256.0, "6445": 28489568256.0, "6450": 28489568256.0, "6455": 28489568256.0, "6460": 28489568256.0, "6465": 28489568256.0, "6470": 28489568256.0, "6475": 28489568256.0, "6480": 28489568256.0, "6485": 28489568256.0, "6490": 28489568256.0, "6495": 28489568256.0, "6500": 28489568256.0, "6505": 28489568256.0, "6510": 28489568256.0, "6515": 28489568256.0, "6520": 28489568256.0, "6525": 28489568256.0, "6530": 28489568256.0, "6535": 28489568256.0, "6540": 28489568256.0, "6545": 28489568256.0, "6550": 28489568256.0, "6555": 28489568256.0, "6560": 28489568256.0, "6565": 28489568256.0, "6570": 28489568256.0, "6575": 28489568256.0, "6580": 28489568256.0, "6585": 28489568256.0, "6590": 28489568256.0, "6595": 28489568256.0, "6600": 28489568256.0, "6605": 28489568256.0, "6610": 28489568256.0, "6615": 28489568256.0, "6620": 28489568256.0, "6625": 28489568256.0, "6630": 28489568256.0, "6635": 28489568256.0, "6640": 28489568256.0, "6645": 28489568256.0, "6650": 28489568256.0, "6655": 28489568256.0, "6660": 28489568256.0, "6665": 28489568256.0, "6670": 28489568256.0, "6675": 28489568256.0, "6680": 28489568256.0, "6685": 28489568256.0, "6690": 28489568256.0, "6695": 28489568256.0, "6700": 28489568256.0, "6705": 28489568256.0, "6710": 28489568256.0, "6715": 28489568256.0, "6720": 28489568256.0, "6725": 28489568256.0, "6730": 28489568256.0, "6735": 28489568256.0, "6740": 28489568256.0, "6745": 28489568256.0, "6750": 28489568256.0, "6755": 28489568256.0, "6760": 28489568256.0, "6765": 28489568256.0, "6770": 28489568256.0, "6775": 28489568256.0, "6780": 28489568256.0, "6785": 28489568256.0, "6790": 28489568256.0, "6795": 28489568256.0, "6800": 28489568256.0, "6805": 28489568256.0, "6810": 28489568256.0, "6815": 28489568256.0, "6820": 28489568256.0, "6825": 28489568256.0, "6830": 28489568256.0, "6835": 28489568256.0, "6840": 28489568256.0, "6845": 28489568256.0, "6850": 28489568256.0, "6855": 28489568256.0, "6860": 28489568256.0, "6865": 28489568256.0, "6870": 28489568256.0, "6875": 28489568256.0, "6880": 28489568256.0, "6885": 28489568256.0, "6890": 28489568256.0, "6895": 28489568256.0, "6900": 28489568256.0, "6905": 28489568256.0, "6910": 28489568256.0, "6915": 28489568256.0, "6920": 28489568256.0, "6925": 28489568256.0, "6930": 28489568256.0, "6935": 28489568256.0, "6940": 28489568256.0, "6945": 28489568256.0, "6950": 28489568256.0, "6955": 28489568256.0, "6960": 28489568256.0, "6965": 28489568256.0, "6970": 28489568256.0, "6975": 28489568256.0, "6980": 28489568256.0, "6985": 28489568256.0, "6990": 28489568256.0, "6995": 28489568256.0, "7000": 28489568256.0, "7005": 28489568256.0, "7010": 28489568256.0, "7015": 28489568256.0, "7020": 28489568256.0, "7025": 28489568256.0, "7030": 28489568256.0, "7035": 28489568256.0, "7040": 28489568256.0, "7045": 28489568256.0, "7050": 28489568256.0, "7055": 28489568256.0, "7060": 28489568256.0, "7065": 28489568256.0, "7070": 28489568256.0, "7075": 28489568256.0, "7080": 28489568256.0, "7085": 28489568256.0, "7090": 28489568256.0, "7095": 28489568256.0, "7100": 28489568256.0, "7105": 28489568256.0, "7110": 28489568256.0, "7115": 28489568256.0, "7120": 28489568256.0, "7125": 28489568256.0, "7130": 28489568256.0, "7135": 28489568256.0, "7140": 28489568256.0, "7145": 28489568256.0, "7150": 28489568256.0, "7155": 28489568256.0, "7160": 28489568256.0, "7165": 28489568256.0, "7170": 28489568256.0, "7175": 28489568256.0, "7180": 28489568256.0, "7185": 28489568256.0, "7190": 28489568256.0, "7195": 28489568256.0, "7200": 28489568256.0, "7205": 28489568256.0, "7210": 28489568256.0, "7215": 28489568256.0, "7220": 28489568256.0, "7225": 28489568256.0, "7230": 28489568256.0, "7235": 28489568256.0, "7240": 28489568256.0, "7245": 28489568256.0, "7250": 28489568256.0, "7255": 28489568256.0, "7260": 28489568256.0, "7265": 28489568256.0, "7270": 28489568256.0, "7275": 28489568256.0, "7280": 28489568256.0, "7285": 28489568256.0, "7290": 28489568256.0, "7295": 28489568256.0, "7300": 28489568256.0, "7305": 28489568256.0, "7310": 28489568256.0, "7315": 28489568256.0, "7320": 28489568256.0, "7325": 28489568256.0, "7330": 28489568256.0, "7335": 28489568256.0, "7340": 28489568256.0, "7345": 28489568256.0, "7350": 28489568256.0, "7355": 28489568256.0, "7360": 28489568256.0, "7365": 28489568256.0, "7370": 28489568256.0, "7375": 28489568256.0, "7380": 28489568256.0, "7385": 28489568256.0, "7390": 28489568256.0, "7395": 28489568256.0, "7400": 28489568256.0, "7405": 28489568256.0, "7410": 28489568256.0, "7415": 28489568256.0, "7420": 28489568256.0, "7425": 28489568256.0, "7430": 28489568256.0, "7435": 28489568256.0, "7440": 28489568256.0, "7445": 28489568256.0, "7450": 28489568256.0, "7455": 28489568256.0, "7460": 28489568256.0, "7465": 28489568256.0, "7470": 28489568256.0, "7475": 28489568256.0, "7480": 28489568256.0, "7485": 28489568256.0, "7490": 28489568256.0, "7495": 28489568256.0, "7500": 28489568256.0, "7505": 28489568256.0, "7510": 28489568256.0, "7515": 28489568256.0, "7520": 28489568256.0, "7525": 28489568256.0, "7530": 28489568256.0, "7535": 28489568256.0, "7540": 28489568256.0, "7545": 28489568256.0, "7550": 28489568256.0, "7555": 28489568256.0, "7560": 28489568256.0, "7565": 28489568256.0, "7570": 28489568256.0, "7575": 28489568256.0, "7580": 28489568256.0, "7585": 28489568256.0, "7590": 28489568256.0, "7595": 28489568256.0, "7600": 28489568256.0, "7605": 28489568256.0, "7610": 28489568256.0, "7615": 28489568256.0, "7620": 28489568256.0, "7625": 28489568256.0, "7630": 28489568256.0, "7635": 28489568256.0, "7640": 28489568256.0, "7645": 28489568256.0, "7650": 28489568256.0, "7655": 28489568256.0, "7660": 28489568256.0, "7665": 28489568256.0, "7670": 28489568256.0, "7675": 28489568256.0, "7680": 28489568256.0, "7685": 28489568256.0, "7690": 28489568256.0, "7695": 28489568256.0, "7700": 28489568256.0, "7705": 28489568256.0, "7710": 28489568256.0, "7715": 28489568256.0, "7720": 28489568256.0, "7725": 28489568256.0, "7730": 28489568256.0, "7735": 28489568256.0, "7740": 28489568256.0, "7745": 28489568256.0, "7750": 28489568256.0, "7755": 28489568256.0, "7760": 28489568256.0, "7765": 28489568256.0, "7770": 28489568256.0, "7775": 28489568256.0, "7780": 28489568256.0, "7785": 28489568256.0, "7790": 28489568256.0, "7795": 28489568256.0, "7800": 28489568256.0, "7805": 28489568256.0, "7810": 28489568256.0, "7815": 28489568256.0, "7820": 28489568256.0, "7825": 28489568256.0, "7830": 28489568256.0, "7835": 28489568256.0, "7840": 28489568256.0, "7845": 28489568256.0, "7850": 28489568256.0, "7855": 28489568256.0, "7860": 28489568256.0, "7865": 28489568256.0, "7870": 28489568256.0, "7875": 28489568256.0, "7880": 28489568256.0, "7885": 28489568256.0, "7890": 28489568256.0, "7895": 28489568256.0, "7900": 28489568256.0, "7905": 28489568256.0, "7910": 28489568256.0, "7915": 28489568256.0, "7920": 28489568256.0, "7925": 28489568256.0, "7930": 28489568256.0, "7935": 28489568256.0, "7940": 28489568256.0, "7945": 28489568256.0, "7950": 28489568256.0, "7955": 28489568256.0, "7960": 28489568256.0, "7965": 28489568256.0, "7970": 28489568256.0, "7975": 28489568256.0, "7980": 28489568256.0, "7985": 28489568256.0, "7990": 28489568256.0, "7995": 28489568256.0, "8000": 28489568256.0, "8005": 28489568256.0, "8010": 28489568256.0, "8015": 28489568256.0, "8020": 28489568256.0, "8025": 28489568256.0, "8030": 28489568256.0, "8035": 28489568256.0, "8040": 28489568256.0, "8045": 28489568256.0, "8050": 28489568256.0, "8055": 28489568256.0, "8060": 28489568256.0, "8065": 28489568256.0, "8070": 28489568256.0, "8075": 28489568256.0, "8080": 28489568256.0, "8085": 28489568256.0, "8090": 28489568256.0, "8095": 28489568256.0, "8100": 28489568256.0, "8105": 28489568256.0, "8110": 28489568256.0, "8115": 28489568256.0, "8120": 28489568256.0, "8125": 28489568256.0, "8130": 28489568256.0, "8135": 28489568256.0, "8140": 28489568256.0, "8145": 28489568256.0, "8150": 28489568256.0, "8155": 28489568256.0, "8160": 28489568256.0, "8165": 28489568256.0, "8170": 28489568256.0, "8175": 28489568256.0, "8180": 28489568256.0, "8185": 28489568256.0, "8190": 28489568256.0, "8195": 28489568256.0, "8200": 28489568256.0, "8205": 28489568256.0, "8210": 28489568256.0, "8215": 28489568256.0, "8220": 28489568256.0, "8225": 28489568256.0, "8230": 28489568256.0, "8235": 28489568256.0, "8240": 28489568256.0, "8245": 28489568256.0, "8250": 28489568256.0, "8255": 28489568256.0, "8260": 28489568256.0, "8265": 28489568256.0, "8270": 28489568256.0, "8275": 28489568256.0, "8280": 28489568256.0, "8285": 28489568256.0, "8290": 28489568256.0, "8295": 28489568256.0, "8300": 28489568256.0, "8305": 28489568256.0, "8310": 28489568256.0, "8315": 28489568256.0, "8320": 28489568256.0, "8325": 28489568256.0, "8330": 28489568256.0, "8335": 28489568256.0, "8340": 28489568256.0, "8345": 28489568256.0, "8350": 28489568256.0, "8355": 28489568256.0, "8360": 28489568256.0, "8365": 28489568256.0, "8370": 28489568256.0, "8375": 28489568256.0, "8380": 28489568256.0, "8385": 28489568256.0, "8390": 28489568256.0, "8395": 28489568256.0, "8400": 28489568256.0, "8405": 28489568256.0, "8410": 28489568256.0, "8415": 28489568256.0, "8420": 28489568256.0, "8425": 28489568256.0, "8430": 28489568256.0, "8435": 28489568256.0, "8440": 28489568256.0, "8445": 28489568256.0, "8450": 28489568256.0, "8455": 28489568256.0, "8460": 28489568256.0, "8465": 28489568256.0, "8470": 28489568256.0, "8475": 28489568256.0, "8480": 28489568256.0, "8485": 28489568256.0, "8490": 28489568256.0, "8495": 28489568256.0, "8500": 28489568256.0, "8505": 28489568256.0, "8510": 28489568256.0, "8515": 28489568256.0, "8520": 28489568256.0, "8525": 28489568256.0, "8530": 28489568256.0, "8535": 28489568256.0, "8540": 28489568256.0, "8545": 28489568256.0, "8550": 28489568256.0, "8555": 28489568256.0, "8560": 28489568256.0, "8565": 28489568256.0, "8570": 28489568256.0, "8575": 28489568256.0, "8580": 28489568256.0, "8585": 28489568256.0, "8590": 28489568256.0, "8595": 28489568256.0, "8600": 28489568256.0, "8605": 28489568256.0, "8610": 28489568256.0, "8615": 28489568256.0, "8620": 28489568256.0, "8625": 28489568256.0, "8630": 28489568256.0, "8635": 28489568256.0, "8640": 28489568256.0, "8645": 28489568256.0, "8650": 28489568256.0, "8655": 28489568256.0, "8660": 28489568256.0, "8665": 28489568256.0, "8670": 28489568256.0, "8675": 28489568256.0, "8680": 28489568256.0, "8685": 28489568256.0, "8690": 28489568256.0, "8695": 28489568256.0, "8700": 28489568256.0, "8705": 28489568256.0, "8710": 28489568256.0, "8715": 28489568256.0, "8720": 28489568256.0, "8725": 28489568256.0, "8730": 28489568256.0, "8735": 28489568256.0, "8740": 28489568256.0, "8745": 28489568256.0, "8750": 28489568256.0, "8755": 28489568256.0, "8760": 28489568256.0, "8765": 28489568256.0, "8770": 28489568256.0, "8775": 28489568256.0, "8780": 28489568256.0, "8785": 28489568256.0, "8790": 28489568256.0, "8795": 28489568256.0, "8800": 28489568256.0, "8805": 28489568256.0, "8810": 28489568256.0, "8815": 28489568256.0, "8820": 28489568256.0, "8825": 28489568256.0, "8830": 28489568256.0, "8835": 28489568256.0, "8840": 28489568256.0, "8845": 28489568256.0, "8850": 28489568256.0, "8855": 28489568256.0, "8860": 28489568256.0, "8865": 28489568256.0, "8870": 28489568256.0, "8875": 28489568256.0, "8880": 28489568256.0, "8885": 28489568256.0, "8890": 28489568256.0, "8895": 28489568256.0, "8900": 28489568256.0, "8905": 28489568256.0, "8910": 28489568256.0, "8915": 28489568256.0, "8920": 28489568256.0, "8925": 28489568256.0, "8930": 28489568256.0, "8935": 28489568256.0, "8940": 28489568256.0, "8945": 28489568256.0, "8950": 28489568256.0, "8955": 28489568256.0, "8960": 28489568256.0, "8965": 28489568256.0, "8970": 28489568256.0, "8975": 28489568256.0, "8980": 28489568256.0, "8985": 28489568256.0, "8990": 28489568256.0, "8995": 28489568256.0, "9000": 28489568256.0, "9005": 28489568256.0, "9010": 28489568256.0, "9015": 28489568256.0, "9020": 28489568256.0, "9025": 28489568256.0, "9030": 28489568256.0, "9035": 28489568256.0, "9040": 28489568256.0, "9045": 28489568256.0, "9050": 28489568256.0, "9055": 28489568256.0, "9060": 28489568256.0, "9065": 28489568256.0, "9070": 28489568256.0, "9075": 28489568256.0, "9080": 28489568256.0, "9085": 28489568256.0, "9090": 28489568256.0, "9095": 28489568256.0, "9100": 28489568256.0, "9105": 28489568256.0, "9110": 28489568256.0, "9115": 28489568256.0, "9120": 28489568256.0, "9125": 28489568256.0, "9130": 28489568256.0, "9135": 28489568256.0, "9140": 28489568256.0, "9145": 28489568256.0, "9150": 28489568256.0, "9155": 28489568256.0, "9160": 28489568256.0, "9165": 28489568256.0, "9170": 28489568256.0, "9175": 28489568256.0, "9180": 28489568256.0, "9185": 28489568256.0, "9190": 28489568256.0, "9195": 28489568256.0, "9200": 28489568256.0, "9205": 28489568256.0, "9210": 28489568256.0, "9215": 28489568256.0, "9220": 28489568256.0, "9225": 28489568256.0, "9230": 28489568256.0, "9235": 28489568256.0, "9240": 28489568256.0, "9245": 28489568256.0, "9250": 28489568256.0, "9255": 28489568256.0, "9260": 28489568256.0, "9265": 28489568256.0, "9270": 28489568256.0, "9275": 28489568256.0, "9280": 28489568256.0, "9285": 28489568256.0, "9290": 28489568256.0, "9295": 28489568256.0, "9300": 28489568256.0, "9305": 28489568256.0, "9310": 28489568256.0, "9315": 28489568256.0, "9320": 28489568256.0, "9325": 28489568256.0, "9330": 28489568256.0, "9335": 28489568256.0, "9340": 28489568256.0, "9345": 28489568256.0, "9350": 28489568256.0, "9355": 28489568256.0, "9360": 28489568256.0, "9365": 28489568256.0, "9370": 28489568256.0, "9375": 28489568256.0, "9380": 28489568256.0, "9385": 28489568256.0, "9390": 28489568256.0, "9395": 28489568256.0, "9400": 28489568256.0, "9405": 28489568256.0, "9410": 28489568256.0, "9415": 28489568256.0, "9420": 28489568256.0, "9425": 28489568256.0, "9430": 28489568256.0, "9435": 28489568256.0, "9440": 28489568256.0, "9445": 28489568256.0, "9450": 28489568256.0, "9455": 28489568256.0, "9460": 28489568256.0, "9465": 28489568256.0, "9470": 28489568256.0, "9475": 28489568256.0, "9480": 28489568256.0, "9485": 28489568256.0, "9490": 28489568256.0, "9495": 28489568256.0, "9500": 28489568256.0, "9505": 28489568256.0, "9510": 28489568256.0, "9515": 28489568256.0, "9520": 28489568256.0, "9525": 28489568256.0, "9530": 28489568256.0, "9535": 28489568256.0, "9540": 28489568256.0, "9545": 28489568256.0, "9550": 28489568256.0, "9555": 28489568256.0, "9560": 28489568256.0, "9565": 28489568256.0, "9570": 28489568256.0, "9575": 28489568256.0, "9580": 28489568256.0, "9585": 28489568256.0, "9590": 28489568256.0, "9595": 28489568256.0, "9600": 28489568256.0, "9605": 28489568256.0, "9610": 28489568256.0, "9615": 28489568256.0, "9620": 28489568256.0, "9625": 28489568256.0, "9630": 28489568256.0, "9635": 28489568256.0, "9640": 28489568256.0, "9645": 28489568256.0, "9650": 28489568256.0, "9655": 28489568256.0, "9660": 28489568256.0, "9665": 28489568256.0, "9670": 28489568256.0, "9675": 28489568256.0, "9680": 28489568256.0, "9685": 28489568256.0, "9690": 28489568256.0, "9695": 28489568256.0, "9700": 28489568256.0, "9705": 28489568256.0, "9710": 28489568256.0, "9715": 28489568256.0, "9720": 28489568256.0, "9725": 28489568256.0, "9730": 28489568256.0, "9735": 28489568256.0, "9740": 28489568256.0, "9745": 28489568256.0, "9750": 28489568256.0, "9755": 28489568256.0, "9760": 28489568256.0, "9765": 28489568256.0, "9770": 28489568256.0, "9775": 28489568256.0, "9780": 28489568256.0, "9785": 28489568256.0, "9790": 28489568256.0, "9795": 28489568256.0, "9800": 28489568256.0, "9805": 28489568256.0, "9810": 28489568256.0, "9815": 28489568256.0, "9820": 28489568256.0, "9825": 28489568256.0, "9830": 28489568256.0, "9835": 28489568256.0, "9840": 28489568256.0, "9845": 28489568256.0, "9850": 28489568256.0, "9855": 28489568256.0, "9860": 28489568256.0, "9865": 28489568256.0, "9870": 28489568256.0, "9875": 28489568256.0, "9880": 28489568256.0, "9885": 28489568256.0, "9890": 28489568256.0, "9895": 28489568256.0, "9900": 28489568256.0, "9905": 28489568256.0, "9910": 28489568256.0, "9915": 28489568256.0, "9920": 28489568256.0, "9925": 28489568256.0, "9930": 28489568256.0, "9935": 28489568256.0, "9940": 28489568256.0, "9945": 28489568256.0, "9950": 28489568256.0, "9955": 28489568256.0, "9960": 28489568256.0, "9965": 28489568256.0, "9970": 28489568256.0, "9975": 28489568256.0, "9980": 28489568256.0, "9985": 28489568256.0, "9990": 28489568256.0, "9995": 28489568256.0, "10000": 28489568256.0, "10005": 28489568256.0, "10010": 28489568256.0, "10015": 28489568256.0, "10020": 28489568256.0, "10025": 28489568256.0, "10030": 28489568256.0, "10035": 28489568256.0, "10040": 28489568256.0, "10045": 28489568256.0, "10050": 28489568256.0, "10055": 28489568256.0, "10060": 28489568256.0, "10065": 28489568256.0, "10070": 28489568256.0, "10075": 28489568256.0, "10080": 28489568256.0, "10085": 28489568256.0, "10090": 28489568256.0, "10095": 28489568256.0, "10100": 28489568256.0, "10105": 28489568256.0, "10110": 28489568256.0, "10115": 28489568256.0, "10120": 28489568256.0, "10125": 28489568256.0, "10130": 28489568256.0, "10135": 28489568256.0, "10140": 28489568256.0, "10145": 28489568256.0, "10150": 28489568256.0, "10155": 28489568256.0, "10160": 28489568256.0, "10165": 28489568256.0, "10170": 28489568256.0, "10175": 28489568256.0, "10180": 28489568256.0, "10185": 28489568256.0, "10190": 28489568256.0, "10195": 28489568256.0, "10200": 28489568256.0, "10205": 28489568256.0, "10210": 28489568256.0, "10215": 28489568256.0, "10220": 28489568256.0, "10225": 28489568256.0, "10230": 28489568256.0, "10235": 28489568256.0, "10240": 28489568256.0, "10245": 28489568256.0, "10250": 28489568256.0, "10255": 28489568256.0, "10260": 28489568256.0, "10265": 28489568256.0, "10270": 28489568256.0, "10275": 28489568256.0, "10280": 28489568256.0, "10285": 28489568256.0, "10290": 28489568256.0, "10295": 28489568256.0, "10300": 28489568256.0, "10305": 28489568256.0, "10310": 28489568256.0, "10315": 28489568256.0, "10320": 28489568256.0, "10325": 28489568256.0, "10330": 28489568256.0, "10335": 28489568256.0, "10340": 28489568256.0, "10345": 28489568256.0, "10350": 28489568256.0, "10355": 28489568256.0, "10360": 28489568256.0, "10365": 28489568256.0, "10370": 28489568256.0, "10375": 28489568256.0, "10380": 28489568256.0, "10385": 28489568256.0, "10390": 28489568256.0, "10395": 28489568256.0, "10400": 28489568256.0, "10405": 28489568256.0, "10410": 28489568256.0, "10415": 28489568256.0, "10420": 28489568256.0, "10425": 28489568256.0, "10430": 28489568256.0, "10435": 28489568256.0, "10440": 28489568256.0, "10445": 28489568256.0, "10450": 28489568256.0, "10455": 28489568256.0, "10460": 28489568256.0, "10465": 28489568256.0, "10470": 28489568256.0, "10475": 28489568256.0, "10480": 28489568256.0, "10485": 28489568256.0, "10490": 28489568256.0, "10495": 28489568256.0, "10500": 28489568256.0, "10505": 28489568256.0, "10510": 28489568256.0, "10515": 28489568256.0, "10520": 28489568256.0, "10525": 28489568256.0, "10530": 28489568256.0, "10535": 28489568256.0, "10540": 28489568256.0, "10545": 28489568256.0, "10550": 28489568256.0, "10555": 28489568256.0, "10560": 28489568256.0, "10565": 28489568256.0, "10570": 28489568256.0, "10575": 28489568256.0, "10580": 28489568256.0, "10585": 28489568256.0, "10590": 28489568256.0, "10595": 28489568256.0, "10600": 28489568256.0, "10605": 28489568256.0, "10610": 28489568256.0, "10615": 28489568256.0, "10620": 28489568256.0, "10625": 28489568256.0, "10630": 28489568256.0, "10635": 28489568256.0, "10640": 28489568256.0, "10645": 28489568256.0, "10650": 28489568256.0, "10655": 28489568256.0, "10660": 28489568256.0, "10665": 28489568256.0, "10670": 28489568256.0, "10675": 28489568256.0, "10680": 28489568256.0, "10685": 28489568256.0, "10690": 28489568256.0, "10695": 28489568256.0, "10700": 28489568256.0, "10705": 28489568256.0, "10710": 28489568256.0, "10715": 28489568256.0, "10720": 28489568256.0, "10725": 28489568256.0, "10730": 28489568256.0, "10735": 28489568256.0, "10740": 28489568256.0, "10745": 28489568256.0, "10750": 28489568256.0, "10755": 28489568256.0, "10760": 28489568256.0, "10765": 28489568256.0, "10770": 28489568256.0, "10775": 28489568256.0, "10780": 28489568256.0, "10785": 28489568256.0, "10790": 28489568256.0, "10795": 28489568256.0, "10800": 28489568256.0, "10805": 28489568256.0, "10810": 28489568256.0, "10815": 28489568256.0, "10820": 28489568256.0, "10825": 28489568256.0, "10830": 28489568256.0, "10835": 28489568256.0, "10840": 28489568256.0, "10845": 28489568256.0, "10850": 28489568256.0, "10855": 28489568256.0, "10860": 28489568256.0, "10865": 28489568256.0, "10870": 28489568256.0, "10875": 28489568256.0, "10880": 28489568256.0, "10885": 28489568256.0, "10890": 28489568256.0, "10895": 28489568256.0, "10900": 28489568256.0, "10905": 28489568256.0, "10910": 28489568256.0, "10915": 28489568256.0, "10920": 28489568256.0, "10925": 28489568256.0, "10930": 28489568256.0, "10935": 28489568256.0, "10940": 28489568256.0, "10945": 28489568256.0, "10950": 28489568256.0, "10955": 28489568256.0, "10960": 28489568256.0, "10965": 28489568256.0, "10970": 28489568256.0, "10975": 28489568256.0, "10980": 28489568256.0, "10985": 28489568256.0, "10990": 28489568256.0, "10995": 28489568256.0, "11000": 28489568256.0, "11005": 28489568256.0, "11010": 28489568256.0, "11015": 28489568256.0, "11020": 28489568256.0, "11025": 28489568256.0, "11030": 28489568256.0, "11035": 28489568256.0, "11040": 28489568256.0, "11045": 28489568256.0, "11050": 28489568256.0, "11055": 28489568256.0, "11060": 28489568256.0, "11065": 28489568256.0, "11070": 28489568256.0, "11075": 28489568256.0, "11080": 28489568256.0, "11085": 28489568256.0, "11090": 28489568256.0, "11095": 28489568256.0, "11100": 28489568256.0, "11105": 28489568256.0, "11110": 28489568256.0, "11115": 28489568256.0, "11120": 28489568256.0, "11125": 28489568256.0, "11130": 28489568256.0, "11135": 28489568256.0, "11140": 28489568256.0, "11145": 28489568256.0, "11150": 28489568256.0, "11155": 28489568256.0, "11160": 28489568256.0, "11165": 28489568256.0, "11170": 28489568256.0, "11175": 28489568256.0, "11180": 28489568256.0, "11185": 28489568256.0, "11190": 28489568256.0, "11195": 28489568256.0, "11200": 28489568256.0, "11205": 28489568256.0, "11210": 28489568256.0, "11215": 28489568256.0, "11220": 28489568256.0, "11225": 28489568256.0, "11230": 28489568256.0, "11235": 28489568256.0, "11240": 28489568256.0, "11245": 28489568256.0, "11250": 28489568256.0, "11255": 28489568256.0, "11260": 28489568256.0, "11265": 28489568256.0, "11270": 28489568256.0, "11275": 28489568256.0, "11280": 28489568256.0, "11285": 28489568256.0, "11290": 28489568256.0, "11295": 28489568256.0, "11300": 28489568256.0, "11305": 28489568256.0, "11310": 28489568256.0, "11315": 28489568256.0, "11320": 28489568256.0, "11325": 28489568256.0, "11330": 28489568256.0, "11335": 28489568256.0, "11340": 28489568256.0, "11345": 28489568256.0, "11350": 28489568256.0, "11355": 28489568256.0, "11360": 28489568256.0, "11365": 28489568256.0, "11370": 28489568256.0, "11375": 28489568256.0, "11380": 28489568256.0, "11385": 28489568256.0, "11390": 28489568256.0, "11395": 28489568256.0, "11400": 28489568256.0, "11405": 28489568256.0, "11410": 28489568256.0, "11415": 28489568256.0, "11420": 28489568256.0, "11425": 28489568256.0, "11430": 28489568256.0, "11435": 28489568256.0, "11440": 28489568256.0, "11445": 28489568256.0, "11450": 28489568256.0, "11455": 28489568256.0, "11460": 28489568256.0, "11465": 28489568256.0, "11470": 28489568256.0, "11475": 28489568256.0, "11480": 28489568256.0, "11485": 28489568256.0, "11490": 28489568256.0, "11495": 28489568256.0, "11500": 28489568256.0, "11505": 28489568256.0, "11510": 28489568256.0, "11515": 28489568256.0, "11520": 28489568256.0, "11525": 28489568256.0, "11530": 28489568256.0, "11535": 28489568256.0, "11540": 28489568256.0, "11545": 28489568256.0, "11550": 28489568256.0, "11555": 28489568256.0, "11560": 28489568256.0, "11565": 28489568256.0, "11570": 28489568256.0, "11575": 28489568256.0, "11580": 28489568256.0, "11585": 28489568256.0, "11590": 28489568256.0, "11595": 28489568256.0, "11600": 28489568256.0, "11605": 28489568256.0, "11610": 28489568256.0, "11615": 28489568256.0, "11620": 28489568256.0, "11625": 28489568256.0, "11630": 28489568256.0, "11635": 28489568256.0, "11640": 28489568256.0, "11645": 28489568256.0, "11650": 28489568256.0, "11655": 28489568256.0, "11660": 28489568256.0, "11665": 28489568256.0, "11670": 28489568256.0, "11675": 28489568256.0, "11680": 28489568256.0, "11685": 28489568256.0, "11690": 28489568256.0, "11695": 28489568256.0, "11700": 28489568256.0, "11705": 28489568256.0, "11710": 28489568256.0, "11715": 28489568256.0, "11720": 28489568256.0, "11725": 28489568256.0, "11730": 28489568256.0, "11735": 28489568256.0, "11740": 28489568256.0, "11745": 28489568256.0, "11750": 28489568256.0, "11755": 28489568256.0, "11760": 28489568256.0, "11765": 28489568256.0, "11770": 28489568256.0, "11775": 28489568256.0, "11780": 28489568256.0, "11785": 28489568256.0, "11790": 28489568256.0, "11795": 28489568256.0, "11800": 28489568256.0, "11805": 28489568256.0, "11810": 28489568256.0, "11815": 28489568256.0, "11820": 28489568256.0, "11825": 28489568256.0, "11830": 28489568256.0, "11835": 28489568256.0, "11840": 28489568256.0, "11845": 28489568256.0, "11850": 28489568256.0, "11855": 28489568256.0, "11860": 28489568256.0, "11865": 28489568256.0, "11870": 28489568256.0, "11875": 28489568256.0, "11880": 28489568256.0, "11885": 28489568256.0, "11890": 28489568256.0, "11895": 28489568256.0, "11900": 28489568256.0, "11905": 28489568256.0, "11910": 28489568256.0, "11915": 28489568256.0, "11920": 28489568256.0, "11925": 28489568256.0, "11930": 28489568256.0, "11935": 28489568256.0, "11940": 28489568256.0, "11945": 28489568256.0, "11950": 28489568256.0, "11955": 28489568256.0, "11960": 28489568256.0, "11965": 28489568256.0, "11970": 28489568256.0, "11975": 28489568256.0, "11980": 28489568256.0, "11985": 28489568256.0, "11990": 28489568256.0, "11995": 28489568256.0, "12000": 28489568256.0, "12005": 28489568256.0, "12010": 28489568256.0, "12015": 28489568256.0, "12020": 28489568256.0, "12025": 28489568256.0, "12030": 28489568256.0, "12035": 28489568256.0, "12040": 28489568256.0, "12045": 28489568256.0, "12050": 28489568256.0, "12055": 28489568256.0, "12060": 28489568256.0, "12065": 28489568256.0, "12070": 28489568256.0, "12075": 28489568256.0, "12080": 28489568256.0, "12085": 28489568256.0, "12090": 28489568256.0, "12095": 28489568256.0, "12100": 28489568256.0, "12105": 28489568256.0, "12110": 28489568256.0, "12115": 28489568256.0, "12120": 28489568256.0, "12125": 28489568256.0, "12130": 28489568256.0, "12135": 28489568256.0, "12140": 28489568256.0, "12145": 28489568256.0, "12150": 28489568256.0, "12155": 28489568256.0, "12160": 28489568256.0, "12165": 28489568256.0, "12170": 28489568256.0, "12175": 28489568256.0, "12180": 28489568256.0, "12185": 28489568256.0, "12190": 28489568256.0, "12195": 28489568256.0, "12200": 28489568256.0, "12205": 28489568256.0, "12210": 28489568256.0, "12215": 28489568256.0, "12220": 28489568256.0, "12225": 28489568256.0, "12230": 28489568256.0, "12235": 28489568256.0, "12240": 28489568256.0, "12245": 28489568256.0, "12250": 28489568256.0, "12255": 28489568256.0, "12260": 28489568256.0, "12265": 28489568256.0, "12270": 28489568256.0, "12275": 28489568256.0, "12280": 28489568256.0, "12285": 28489568256.0, "12290": 28489568256.0, "12295": 28489568256.0, "12300": 28489568256.0, "12305": 28489568256.0, "12310": 28489568256.0, "12315": 28489568256.0, "12320": 28489568256.0, "12325": 28489568256.0, "12330": 28489568256.0, "12335": 28489568256.0, "12340": 28489568256.0, "12345": 28489568256.0, "12350": 28489568256.0, "12355": 28489568256.0, "12360": 28489568256.0, "12365": 28489568256.0, "12370": 28489568256.0, "12375": 28489568256.0, "12380": 28489568256.0, "12385": 28489568256.0, "12390": 28489568256.0, "12395": 28489568256.0, "12400": 28489568256.0, "12405": 28489568256.0, "12410": 28489568256.0, "12415": 28489568256.0, "12420": 28489568256.0, "12425": 28489568256.0, "12430": 28489568256.0, "12435": 28489568256.0, "12440": 28489568256.0, "12445": 28489568256.0, "12450": 28489568256.0, "12455": 28489568256.0, "12460": 28489568256.0, "12465": 28489568256.0, "12470": 28489568256.0, "12475": 28489568256.0, "12480": 28489568256.0, "12485": 28489568256.0, "12490": 28489568256.0, "12495": 28489568256.0, "12500": 28489568256.0, "12505": 28489568256.0, "12510": 28489568256.0, "12515": 28489568256.0, "12520": 28489568256.0, "12525": 28489568256.0, "12530": 28489568256.0, "12535": 28489568256.0, "12540": 28489568256.0, "12545": 28489568256.0, "12550": 28489568256.0, "12555": 28489568256.0, "12560": 28489568256.0, "12565": 28489568256.0, "12570": 28489568256.0, "12575": 28489568256.0, "12580": 28489568256.0, "12585": 28489568256.0, "12590": 28489568256.0, "12595": 28489568256.0, "12600": 28489568256.0, "12605": 28489568256.0, "12610": 28489568256.0, "12615": 28489568256.0, "12620": 28489568256.0, "12625": 28489568256.0, "12630": 28489568256.0, "12635": 28489568256.0, "12640": 28489568256.0, "12645": 28489568256.0, "12650": 28489568256.0, "12655": 28489568256.0, "12660": 28489568256.0, "12665": 28489568256.0, "12670": 28489568256.0, "12675": 28489568256.0, "12680": 28489568256.0, "12685": 28489568256.0, "12690": 28489568256.0, "12695": 28489568256.0, "12700": 28489568256.0, "12705": 28489568256.0, "12710": 28489568256.0, "12715": 28489568256.0, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "iteration-time": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": "nan", "25": "nan", "30": "nan", "35": "nan", "40": "nan", "45": "nan", "50": "nan", "55": "nan", "60": "nan", "65": "nan", "70": "nan", "75": "nan", "80": "nan", "85": "nan", "90": "nan", "95": "nan", "100": 3.5554, "105": "nan", "110": "nan", "115": "nan", "120": "nan", "125": "nan", "130": "nan", "135": "nan", "140": "nan", "145": "nan", "150": "nan", "155": "nan", "160": "nan", "165": "nan", "170": "nan", "175": "nan", "180": "nan", "185": "nan", "190": "nan", "195": "nan", "200": 3.47185, "205": "nan", "210": "nan", "215": "nan", "220": "nan", "225": "nan", "230": "nan", "235": "nan", "240": "nan", "245": "nan", "250": "nan", "255": "nan", "260": "nan", "265": "nan", "270": "nan", "275": "nan", "280": "nan", "285": "nan", "290": "nan", "295": "nan", "300": 3.45692, "305": "nan", "310": "nan", "315": "nan", "320": "nan", "325": "nan", "330": "nan", "335": "nan", "340": "nan", "345": "nan", "350": "nan", "355": "nan", "360": "nan", "365": "nan", "370": "nan", "375": "nan", "380": "nan", "385": "nan", "390": "nan", "395": "nan", "400": 3.45471, "405": "nan", "410": "nan", "415": "nan", "420": "nan", "425": "nan", "430": "nan", "435": "nan", "440": "nan", "445": "nan", "450": "nan", "455": "nan", "460": "nan", "465": "nan", "470": "nan", "475": "nan", "480": "nan", "485": "nan", "490": "nan", "495": "nan", "500": 3.45467, "505": "nan", "510": "nan", "515": "nan", "520": "nan", "525": "nan", "530": "nan", "535": "nan", "540": "nan", "545": "nan", "550": "nan", "555": "nan", "560": "nan", "565": "nan", "570": "nan", "575": "nan", "580": "nan", "585": "nan", "590": "nan", "595": "nan", "600": 3.4543, "605": "nan", "610": "nan", "615": "nan", "620": "nan", "625": "nan", "630": "nan", "635": "nan", "640": "nan", "645": "nan", "650": "nan", "655": "nan", "660": "nan", "665": "nan", "670": "nan", "675": "nan", "680": "nan", "685": "nan", "690": "nan", "695": "nan", "700": 3.45264, "705": "nan", "710": "nan", "715": "nan", "720": "nan", "725": "nan", "730": "nan", "735": "nan", "740": "nan", "745": "nan", "750": "nan", "755": "nan", "760": "nan", "765": "nan", "770": "nan", "775": "nan", "780": "nan", "785": "nan", "790": "nan", "795": "nan", "800": 3.45125, "805": "nan", "810": "nan", "815": "nan", "820": "nan", "825": "nan", "830": "nan", "835": "nan", "840": "nan", "845": "nan", "850": "nan", "855": "nan", "860": "nan", "865": "nan", "870": "nan", "875": "nan", "880": "nan", "885": "nan", "890": "nan", "895": "nan", "900": 3.44668, "905": "nan", "910": "nan", "915": "nan", "920": "nan", "925": "nan", "930": "nan", "935": "nan", "940": "nan", "945": "nan", "950": "nan", "955": "nan", "960": "nan", "965": "nan", "970": "nan", "975": "nan", "980": "nan", "985": "nan", "990": "nan", "995": "nan", "1000": 3.44035, "1005": "nan", "1010": "nan", "1015": "nan", "1020": "nan", "1025": "nan", "1030": "nan", "1035": "nan", "1040": "nan", "1045": "nan", "1050": "nan", "1055": "nan", "1060": "nan", "1065": "nan", "1070": "nan", "1075": "nan", "1080": "nan", "1085": "nan", "1090": "nan", "1095": "nan", "1100": 3.43442, "1105": "nan", "1110": "nan", "1115": "nan", "1120": "nan", "1125": "nan", "1130": "nan", "1135": "nan", "1140": "nan", "1145": "nan", "1150": "nan", "1155": "nan", "1160": "nan", "1165": "nan", "1170": "nan", "1175": "nan", "1180": "nan", "1185": "nan", "1190": "nan", "1195": "nan", "1200": 3.4306, "1205": "nan", "1210": "nan", "1215": "nan", "1220": "nan", "1225": "nan", "1230": "nan", "1235": "nan", "1240": "nan", "1245": "nan", "1250": "nan", "1255": "nan", "1260": "nan", "1265": "nan", "1270": "nan", "1275": "nan", "1280": "nan", "1285": "nan", "1290": "nan", "1295": "nan", "1300": 3.42464, "1305": "nan", "1310": "nan", "1315": "nan", "1320": "nan", "1325": "nan", "1330": "nan", "1335": "nan", "1340": "nan", "1345": "nan", "1350": "nan", "1355": "nan", "1360": "nan", "1365": "nan", "1370": "nan", "1375": "nan", "1380": "nan", "1385": "nan", "1390": "nan", "1395": "nan", "1400": 3.42155, "1405": "nan", "1410": "nan", "1415": "nan", "1420": "nan", "1425": "nan", "1430": "nan", "1435": "nan", "1440": "nan", "1445": "nan", "1450": "nan", "1455": "nan", "1460": "nan", "1465": "nan", "1470": "nan", "1475": "nan", "1480": "nan", "1485": "nan", "1490": "nan", "1495": "nan", "1500": 3.4201, "1505": "nan", "1510": "nan", "1515": "nan", "1520": "nan", "1525": "nan", "1530": "nan", "1535": "nan", "1540": "nan", "1545": "nan", "1550": "nan", "1555": "nan", "1560": "nan", "1565": "nan", "1570": "nan", "1575": "nan", "1580": "nan", "1585": "nan", "1590": "nan", "1595": "nan", "1600": 3.41703, "1605": "nan", "1610": "nan", "1615": "nan", "1620": "nan", "1625": "nan", "1630": "nan", "1635": "nan", "1640": "nan", "1645": "nan", "1650": "nan", "1655": "nan", "1660": "nan", "1665": "nan", "1670": "nan", "1675": "nan", "1680": "nan", "1685": "nan", "1690": "nan", "1695": "nan", "1700": 3.41482, "1705": "nan", "1710": "nan", "1715": "nan", "1720": "nan", "1725": "nan", "1730": "nan", "1735": "nan", "1740": "nan", "1745": "nan", "1750": "nan", "1755": "nan", "1760": "nan", "1765": "nan", "1770": "nan", "1775": "nan", "1780": "nan", "1785": "nan", "1790": "nan", "1795": "nan", "1800": 3.41352, "1805": "nan", "1810": "nan", "1815": "nan", "1820": "nan", "1825": "nan", "1830": "nan", "1835": "nan", "1840": "nan", "1845": "nan", "1850": "nan", "1855": "nan", "1860": "nan", "1865": "nan", "1870": "nan", "1875": "nan", "1880": "nan", "1885": "nan", "1890": "nan", "1895": "nan", "1900": 3.4128, "1905": "nan", "1910": "nan", "1915": "nan", "1920": "nan", "1925": "nan", "1930": "nan", "1935": "nan", "1940": "nan", "1945": "nan", "1950": "nan", "1955": "nan", "1960": "nan", "1965": "nan", "1970": "nan", "1975": "nan", "1980": "nan", "1985": "nan", "1990": "nan", "1995": "nan", "2000": 3.40994, "2005": "nan", "2010": "nan", "2015": "nan", "2020": "nan", "2025": "nan", "2030": "nan", "2035": "nan", "2040": "nan", "2045": "nan", "2050": "nan", "2055": "nan", "2060": "nan", "2065": "nan", "2070": "nan", "2075": "nan", "2080": "nan", "2085": "nan", "2090": "nan", "2095": "nan", "2100": 3.40878, "2105": "nan", "2110": "nan", "2115": "nan", "2120": "nan", "2125": "nan", "2130": "nan", "2135": "nan", "2140": "nan", "2145": "nan", "2150": "nan", "2155": "nan", "2160": "nan", "2165": "nan", "2170": "nan", "2175": "nan", "2180": "nan", "2185": "nan", "2190": "nan", "2195": "nan", "2200": 3.40862, "2205": "nan", "2210": "nan", "2215": "nan", "2220": "nan", "2225": "nan", "2230": "nan", "2235": "nan", "2240": "nan", "2245": "nan", "2250": "nan", "2255": "nan", "2260": "nan", "2265": "nan", "2270": "nan", "2275": "nan", "2280": "nan", "2285": "nan", "2290": "nan", "2295": "nan", "2300": 3.41459, "2305": "nan", "2310": "nan", "2315": "nan", "2320": "nan", "2325": "nan", "2330": "nan", "2335": "nan", "2340": "nan", "2345": "nan", "2350": "nan", "2355": "nan", "2360": "nan", "2365": "nan", "2370": "nan", "2375": "nan", "2380": "nan", "2385": "nan", "2390": "nan", "2395": "nan", "2400": 3.40662, "2405": "nan", "2410": "nan", "2415": "nan", "2420": "nan", "2425": "nan", "2430": "nan", "2435": "nan", "2440": "nan", "2445": "nan", "2450": "nan", "2455": "nan", "2460": "nan", "2465": "nan", "2470": "nan", "2475": "nan", "2480": "nan", "2485": "nan", "2490": "nan", "2495": "nan", "2500": 3.40543, "2505": "nan", "2510": "nan", "2515": "nan", "2520": "nan", "2525": "nan", "2530": "nan", "2535": "nan", "2540": "nan", "2545": "nan", "2550": "nan", "2555": "nan", "2560": "nan", "2565": "nan", "2570": "nan", "2575": "nan", "2580": "nan", "2585": "nan", "2590": "nan", "2595": "nan", "2600": 3.40484, "2605": "nan", "2610": "nan", "2615": "nan", "2620": "nan", "2625": "nan", "2630": "nan", "2635": "nan", "2640": "nan", "2645": "nan", "2650": "nan", "2655": "nan", "2660": "nan", "2665": "nan", "2670": "nan", "2675": "nan", "2680": "nan", "2685": "nan", "2690": "nan", "2695": "nan", "2700": 3.40448, "2705": "nan", "2710": "nan", "2715": "nan", "2720": "nan", "2725": "nan", "2730": "nan", "2735": "nan", "2740": "nan", "2745": "nan", "2750": "nan", "2755": "nan", "2760": "nan", "2765": "nan", "2770": "nan", "2775": "nan", "2780": "nan", "2785": "nan", "2790": "nan", "2795": "nan", "2800": 3.403, "2805": "nan", "2810": "nan", "2815": "nan", "2820": "nan", "2825": "nan", "2830": "nan", "2835": "nan", "2840": "nan", "2845": "nan", "2850": "nan", "2855": "nan", "2860": "nan", "2865": "nan", "2870": "nan", "2875": "nan", "2880": "nan", "2885": "nan", "2890": "nan", "2895": "nan", "2900": 3.40346, "2905": "nan", "2910": "nan", "2915": "nan", "2920": "nan", "2925": "nan", "2930": "nan", "2935": "nan", "2940": "nan", "2945": "nan", "2950": "nan", "2955": "nan", "2960": "nan", "2965": "nan", "2970": "nan", "2975": "nan", "2980": "nan", "2985": "nan", "2990": "nan", "2995": "nan", "3000": 3.4023, "3005": "nan", "3010": "nan", "3015": "nan", "3020": "nan", "3025": "nan", "3030": "nan", "3035": "nan", "3040": "nan", "3045": "nan", "3050": "nan", "3055": "nan", "3060": "nan", "3065": "nan", "3070": "nan", "3075": "nan", "3080": "nan", "3085": "nan", "3090": "nan", "3095": "nan", "3100": 3.40069, "3105": "nan", "3110": "nan", "3115": "nan", "3120": "nan", "3125": "nan", "3130": "nan", "3135": "nan", "3140": "nan", "3145": "nan", "3150": "nan", "3155": "nan", "3160": "nan", "3165": "nan", "3170": "nan", "3175": "nan", "3180": "nan", "3185": "nan", "3190": "nan", "3195": "nan", "3200": 3.40162, "3205": "nan", "3210": "nan", "3215": "nan", "3220": "nan", "3225": "nan", "3230": "nan", "3235": "nan", "3240": "nan", "3245": "nan", "3250": "nan", "3255": "nan", "3260": "nan", "3265": "nan", "3270": "nan", "3275": "nan", "3280": "nan", "3285": "nan", "3290": "nan", "3295": "nan", "3300": 3.40071, "3305": "nan", "3310": "nan", "3315": "nan", "3320": "nan", "3325": "nan", "3330": "nan", "3335": "nan", "3340": "nan", "3345": "nan", "3350": "nan", "3355": "nan", "3360": "nan", "3365": "nan", "3370": "nan", "3375": "nan", "3380": "nan", "3385": "nan", "3390": "nan", "3395": "nan", "3400": 3.40058, "3405": "nan", "3410": "nan", "3415": "nan", "3420": "nan", "3425": "nan", "3430": "nan", "3435": "nan", "3440": "nan", "3445": "nan", "3450": "nan", "3455": "nan", "3460": "nan", "3465": "nan", "3470": "nan", "3475": "nan", "3480": "nan", "3485": "nan", "3490": "nan", "3495": "nan", "3500": 3.39993, "3505": "nan", "3510": "nan", "3515": "nan", "3520": "nan", "3525": "nan", "3530": "nan", "3535": "nan", "3540": "nan", "3545": "nan", "3550": "nan", "3555": "nan", "3560": "nan", "3565": "nan", "3570": "nan", "3575": "nan", "3580": "nan", "3585": "nan", "3590": "nan", "3595": "nan", "3600": 3.62689, "3605": "nan", "3610": "nan", "3615": "nan", "3620": "nan", "3625": "nan", "3630": "nan", "3635": "nan", "3640": "nan", "3645": "nan", "3650": "nan", "3655": "nan", "3660": "nan", "3665": "nan", "3670": "nan", "3675": "nan", "3680": "nan", "3685": "nan", "3690": "nan", "3695": "nan", "3700": 3.40474, "3705": "nan", "3710": "nan", "3715": "nan", "3720": "nan", "3725": "nan", "3730": "nan", "3735": "nan", "3740": "nan", "3745": "nan", "3750": "nan", "3755": "nan", "3760": "nan", "3765": "nan", "3770": "nan", "3775": "nan", "3780": "nan", "3785": "nan", "3790": "nan", "3795": "nan", "3800": 3.40515, "3805": "nan", "3810": "nan", "3815": "nan", "3820": "nan", "3825": "nan", "3830": "nan", "3835": "nan", "3840": "nan", "3845": "nan", "3850": "nan", "3855": "nan", "3860": "nan", "3865": "nan", "3870": "nan", "3875": "nan", "3880": "nan", "3885": "nan", "3890": "nan", "3895": "nan", "3900": 3.40436, "3905": "nan", "3910": "nan", "3915": "nan", "3920": "nan", "3925": "nan", "3930": "nan", "3935": "nan", "3940": "nan", "3945": "nan", "3950": "nan", "3955": "nan", "3960": "nan", "3965": "nan", "3970": "nan", "3975": "nan", "3980": "nan", "3985": "nan", "3990": "nan", "3995": "nan", "4000": 3.40304, "4005": "nan", "4010": "nan", "4015": "nan", "4020": "nan", "4025": "nan", "4030": "nan", "4035": "nan", "4040": "nan", "4045": "nan", "4050": "nan", "4055": "nan", "4060": "nan", "4065": "nan", "4070": "nan", "4075": "nan", "4080": "nan", "4085": "nan", "4090": "nan", "4095": "nan", "4100": 3.40076, "4105": "nan", "4110": "nan", "4115": "nan", "4120": "nan", "4125": "nan", "4130": "nan", "4135": "nan", "4140": "nan", "4145": "nan", "4150": "nan", "4155": "nan", "4160": "nan", "4165": "nan", "4170": "nan", "4175": "nan", "4180": "nan", "4185": "nan", "4190": "nan", "4195": "nan", "4200": 3.40222, "4205": "nan", "4210": "nan", "4215": "nan", "4220": "nan", "4225": "nan", "4230": "nan", "4235": "nan", "4240": "nan", "4245": "nan", "4250": "nan", "4255": "nan", "4260": "nan", "4265": "nan", "4270": "nan", "4275": "nan", "4280": "nan", "4285": "nan", "4290": "nan", "4295": "nan", "4300": 3.40241, "4305": "nan", "4310": "nan", "4315": "nan", "4320": "nan", "4325": "nan", "4330": "nan", "4335": "nan", "4340": "nan", "4345": "nan", "4350": "nan", "4355": "nan", "4360": "nan", "4365": "nan", "4370": "nan", "4375": "nan", "4380": "nan", "4385": "nan", "4390": "nan", "4395": "nan", "4400": 3.40228, "4405": "nan", "4410": "nan", "4415": "nan", "4420": "nan", "4425": "nan", "4430": "nan", "4435": "nan", "4440": "nan", "4445": "nan", "4450": "nan", "4455": "nan", "4460": "nan", "4465": "nan", "4470": "nan", "4475": "nan", "4480": "nan", "4485": "nan", "4490": "nan", "4495": "nan", "4500": 3.40282, "4505": "nan", "4510": "nan", "4515": "nan", "4520": "nan", "4525": "nan", "4530": "nan", "4535": "nan", "4540": "nan", "4545": "nan", "4550": "nan", "4555": "nan", "4560": "nan", "4565": "nan", "4570": "nan", "4575": "nan", "4580": "nan", "4585": "nan", "4590": "nan", "4595": "nan", "4600": 3.40214, "4605": "nan", "4610": "nan", "4615": "nan", "4620": "nan", "4625": "nan", "4630": "nan", "4635": "nan", "4640": "nan", "4645": "nan", "4650": "nan", "4655": "nan", "4660": "nan", "4665": "nan", "4670": "nan", "4675": "nan", "4680": "nan", "4685": "nan", "4690": "nan", "4695": "nan", "4700": 3.40155, "4705": "nan", "4710": "nan", "4715": "nan", "4720": "nan", "4725": "nan", "4730": "nan", "4735": "nan", "4740": "nan", "4745": "nan", "4750": "nan", "4755": "nan", "4760": "nan", "4765": "nan", "4770": "nan", "4775": "nan", "4780": "nan", "4785": "nan", "4790": "nan", "4795": "nan", "4800": 3.4016, "4805": "nan", "4810": "nan", "4815": "nan", "4820": "nan", "4825": "nan", "4830": "nan", "4835": "nan", "4840": "nan", "4845": "nan", "4850": "nan", "4855": "nan", "4860": "nan", "4865": "nan", "4870": "nan", "4875": "nan", "4880": "nan", "4885": "nan", "4890": "nan", "4895": "nan", "4900": 3.40208, "4905": "nan", "4910": "nan", "4915": "nan", "4920": "nan", "4925": "nan", "4930": "nan", "4935": "nan", "4940": "nan", "4945": "nan", "4950": "nan", "4955": "nan", "4960": "nan", "4965": "nan", "4970": "nan", "4975": "nan", "4980": "nan", "4985": "nan", "4990": "nan", "4995": "nan", "5000": 3.40265, "5005": "nan", "5010": "nan", "5015": "nan", "5020": "nan", "5025": "nan", "5030": "nan", "5035": "nan", "5040": "nan", "5045": "nan", "5050": "nan", "5055": "nan", "5060": "nan", "5065": "nan", "5070": "nan", "5075": "nan", "5080": "nan", "5085": "nan", "5090": "nan", "5095": "nan", "5100": 3.3986, "5105": "nan", "5110": "nan", "5115": "nan", "5120": "nan", "5125": "nan", "5130": "nan", "5135": "nan", "5140": "nan", "5145": "nan", "5150": "nan", "5155": "nan", "5160": "nan", "5165": "nan", "5170": "nan", "5175": "nan", "5180": "nan", "5185": "nan", "5190": "nan", "5195": "nan", "5200": 3.39887, "5205": "nan", "5210": "nan", "5215": "nan", "5220": "nan", "5225": "nan", "5230": "nan", "5235": "nan", "5240": "nan", "5245": "nan", "5250": "nan", "5255": "nan", "5260": "nan", "5265": "nan", "5270": "nan", "5275": "nan", "5280": "nan", "5285": "nan", "5290": "nan", "5295": "nan", "5300": 3.3991, "5305": "nan", "5310": "nan", "5315": "nan", "5320": "nan", "5325": "nan", "5330": "nan", "5335": "nan", "5340": "nan", "5345": "nan", "5350": "nan", "5355": "nan", "5360": "nan", "5365": "nan", "5370": "nan", "5375": "nan", "5380": "nan", "5385": "nan", "5390": "nan", "5395": "nan", "5400": 3.40087, "5405": "nan", "5410": "nan", "5415": "nan", "5420": "nan", "5425": "nan", "5430": "nan", "5435": "nan", "5440": "nan", "5445": "nan", "5450": "nan", "5455": "nan", "5460": "nan", "5465": "nan", "5470": "nan", "5475": "nan", "5480": "nan", "5485": "nan", "5490": "nan", "5495": "nan", "5500": 3.40055, "5505": "nan", "5510": "nan", "5515": "nan", "5520": "nan", "5525": "nan", "5530": "nan", "5535": "nan", "5540": "nan", "5545": "nan", "5550": "nan", "5555": "nan", "5560": "nan", "5565": "nan", "5570": "nan", "5575": "nan", "5580": "nan", "5585": "nan", "5590": "nan", "5595": "nan", "5600": 3.40101, "5605": "nan", "5610": "nan", "5615": "nan", "5620": "nan", "5625": "nan", "5630": "nan", "5635": "nan", "5640": "nan", "5645": "nan", "5650": "nan", "5655": "nan", "5660": "nan", "5665": "nan", "5670": "nan", "5675": "nan", "5680": "nan", "5685": "nan", "5690": "nan", "5695": "nan", "5700": 3.4007, "5705": "nan", "5710": "nan", "5715": "nan", "5720": "nan", "5725": "nan", "5730": "nan", "5735": "nan", "5740": "nan", "5745": "nan", "5750": "nan", "5755": "nan", "5760": "nan", "5765": "nan", "5770": "nan", "5775": "nan", "5780": "nan", "5785": "nan", "5790": "nan", "5795": "nan", "5800": 3.40177, "5805": "nan", "5810": "nan", "5815": "nan", "5820": "nan", "5825": "nan", "5830": "nan", "5835": "nan", "5840": "nan", "5845": "nan", "5850": "nan", "5855": "nan", "5860": "nan", "5865": "nan", "5870": "nan", "5875": "nan", "5880": "nan", "5885": "nan", "5890": "nan", "5895": "nan", "5900": 3.40093, "5905": "nan", "5910": "nan", "5915": "nan", "5920": "nan", "5925": "nan", "5930": "nan", "5935": "nan", "5940": "nan", "5945": "nan", "5950": "nan", "5955": "nan", "5960": "nan", "5965": "nan", "5970": "nan", "5975": "nan", "5980": "nan", "5985": "nan", "5990": "nan", "5995": "nan", "6000": 3.40207, "6005": "nan", "6010": "nan", "6015": "nan", "6020": "nan", "6025": "nan", "6030": "nan", "6035": "nan", "6040": "nan", "6045": "nan", "6050": "nan", "6055": "nan", "6060": "nan", "6065": "nan", "6070": "nan", "6075": "nan", "6080": "nan", "6085": "nan", "6090": "nan", "6095": "nan", "6100": 3.40047, "6105": "nan", "6110": "nan", "6115": "nan", "6120": "nan", "6125": "nan", "6130": "nan", "6135": "nan", "6140": "nan", "6145": "nan", "6150": "nan", "6155": "nan", "6160": "nan", "6165": "nan", "6170": "nan", "6175": "nan", "6180": "nan", "6185": "nan", "6190": "nan", "6195": "nan", "6200": 3.40254, "6205": "nan", "6210": "nan", "6215": "nan", "6220": "nan", "6225": "nan", "6230": "nan", "6235": "nan", "6240": "nan", "6245": "nan", "6250": "nan", "6255": "nan", "6260": "nan", "6265": "nan", "6270": "nan", "6275": "nan", "6280": "nan", "6285": "nan", "6290": "nan", "6295": "nan", "6300": 3.4024, "6305": "nan", "6310": "nan", "6315": "nan", "6320": "nan", "6325": "nan", "6330": "nan", "6335": "nan", "6340": "nan", "6345": "nan", "6350": "nan", "6355": "nan", "6360": "nan", "6365": "nan", "6370": "nan", "6375": "nan", "6380": "nan", "6385": "nan", "6390": "nan", "6395": "nan", "6400": 3.40281, "6405": "nan", "6410": "nan", "6415": "nan", "6420": "nan", "6425": "nan", "6430": "nan", "6435": "nan", "6440": "nan", "6445": "nan", "6450": "nan", "6455": "nan", "6460": "nan", "6465": "nan", "6470": "nan", "6475": "nan", "6480": "nan", "6485": "nan", "6490": "nan", "6495": "nan", "6500": 3.40268, "6505": "nan", "6510": "nan", "6515": "nan", "6520": "nan", "6525": "nan", "6530": "nan", "6535": "nan", "6540": "nan", "6545": "nan", "6550": "nan", "6555": "nan", "6560": "nan", "6565": "nan", "6570": "nan", "6575": "nan", "6580": "nan", "6585": "nan", "6590": "nan", "6595": "nan", "6600": 3.40166, "6605": "nan", "6610": "nan", "6615": "nan", "6620": "nan", "6625": "nan", "6630": "nan", "6635": "nan", "6640": "nan", "6645": "nan", "6650": "nan", "6655": "nan", "6660": "nan", "6665": "nan", "6670": "nan", "6675": "nan", "6680": "nan", "6685": "nan", "6690": "nan", "6695": "nan", "6700": 3.40129, "6705": "nan", "6710": "nan", "6715": "nan", "6720": "nan", "6725": "nan", "6730": "nan", "6735": "nan", "6740": "nan", "6745": "nan", "6750": "nan", "6755": "nan", "6760": "nan", "6765": "nan", "6770": "nan", "6775": "nan", "6780": "nan", "6785": "nan", "6790": "nan", "6795": "nan", "6800": 3.40047, "6805": "nan", "6810": "nan", "6815": "nan", "6820": "nan", "6825": "nan", "6830": "nan", "6835": "nan", "6840": "nan", "6845": "nan", "6850": "nan", "6855": "nan", "6860": "nan", "6865": "nan", "6870": "nan", "6875": "nan", "6880": "nan", "6885": "nan", "6890": "nan", "6895": "nan", "6900": 3.40079, "6905": "nan", "6910": "nan", "6915": "nan", "6920": "nan", "6925": "nan", "6930": "nan", "6935": "nan", "6940": "nan", "6945": "nan", "6950": "nan", "6955": "nan", "6960": "nan", "6965": "nan", "6970": "nan", "6975": "nan", "6980": "nan", "6985": "nan", "6990": "nan", "6995": "nan", "7000": 3.40034, "7005": "nan", "7010": "nan", "7015": "nan", "7020": "nan", "7025": "nan", "7030": "nan", "7035": "nan", "7040": "nan", "7045": "nan", "7050": "nan", "7055": "nan", "7060": "nan", "7065": "nan", "7070": "nan", "7075": "nan", "7080": "nan", "7085": "nan", "7090": "nan", "7095": "nan", "7100": 3.39893, "7105": "nan", "7110": "nan", "7115": "nan", "7120": "nan", "7125": "nan", "7130": "nan", "7135": "nan", "7140": "nan", "7145": "nan", "7150": "nan", "7155": "nan", "7160": "nan", "7165": "nan", "7170": "nan", "7175": "nan", "7180": "nan", "7185": "nan", "7190": "nan", "7195": "nan", "7200": 3.40035, "7205": "nan", "7210": "nan", "7215": "nan", "7220": "nan", "7225": "nan", "7230": "nan", "7235": "nan", "7240": "nan", "7245": "nan", "7250": "nan", "7255": "nan", "7260": "nan", "7265": "nan", "7270": "nan", "7275": "nan", "7280": "nan", "7285": "nan", "7290": "nan", "7295": "nan", "7300": 3.39965, "7305": "nan", "7310": "nan", "7315": "nan", "7320": "nan", "7325": "nan", "7330": "nan", "7335": "nan", "7340": "nan", "7345": "nan", "7350": "nan", "7355": "nan", "7360": "nan", "7365": "nan", "7370": "nan", "7375": "nan", "7380": "nan", "7385": "nan", "7390": "nan", "7395": "nan", "7400": 3.40073, "7405": "nan", "7410": "nan", "7415": "nan", "7420": "nan", "7425": "nan", "7430": "nan", "7435": "nan", "7440": "nan", "7445": "nan", "7450": "nan", "7455": "nan", "7460": "nan", "7465": "nan", "7470": "nan", "7475": "nan", "7480": "nan", "7485": "nan", "7490": "nan", "7495": "nan", "7500": 3.40077, "7505": "nan", "7510": "nan", "7515": "nan", "7520": "nan", "7525": "nan", "7530": "nan", "7535": "nan", "7540": "nan", "7545": "nan", "7550": "nan", "7555": "nan", "7560": "nan", "7565": "nan", "7570": "nan", "7575": "nan", "7580": "nan", "7585": "nan", "7590": "nan", "7595": "nan", "7600": 3.39989, "7605": "nan", "7610": "nan", "7615": "nan", "7620": "nan", "7625": "nan", "7630": "nan", "7635": "nan", "7640": "nan", "7645": "nan", "7650": "nan", "7655": "nan", "7660": "nan", "7665": "nan", "7670": "nan", "7675": "nan", "7680": "nan", "7685": "nan", "7690": "nan", "7695": "nan", "7700": 3.3988, "7705": "nan", "7710": "nan", "7715": "nan", "7720": "nan", "7725": "nan", "7730": "nan", "7735": "nan", "7740": "nan", "7745": "nan", "7750": "nan", "7755": "nan", "7760": "nan", "7765": "nan", "7770": "nan", "7775": "nan", "7780": "nan", "7785": "nan", "7790": "nan", "7795": "nan", "7800": 3.3995, "7805": "nan", "7810": "nan", "7815": "nan", "7820": "nan", "7825": "nan", "7830": "nan", "7835": "nan", "7840": "nan", "7845": "nan", "7850": "nan", "7855": "nan", "7860": "nan", "7865": "nan", "7870": "nan", "7875": "nan", "7880": "nan", "7885": "nan", "7890": "nan", "7895": "nan", "7900": 3.39928, "7905": "nan", "7910": "nan", "7915": "nan", "7920": "nan", "7925": "nan", "7930": "nan", "7935": "nan", "7940": "nan", "7945": "nan", "7950": "nan", "7955": "nan", "7960": "nan", "7965": "nan", "7970": "nan", "7975": "nan", "7980": "nan", "7985": "nan", "7990": "nan", "7995": "nan", "8000": 3.39834, "8005": "nan", "8010": "nan", "8015": "nan", "8020": "nan", "8025": "nan", "8030": "nan", "8035": "nan", "8040": "nan", "8045": "nan", "8050": "nan", "8055": "nan", "8060": "nan", "8065": "nan", "8070": "nan", "8075": "nan", "8080": "nan", "8085": "nan", "8090": "nan", "8095": "nan", "8100": 3.39716, "8105": "nan", "8110": "nan", "8115": "nan", "8120": "nan", "8125": "nan", "8130": "nan", "8135": "nan", "8140": "nan", "8145": "nan", "8150": "nan", "8155": "nan", "8160": "nan", "8165": "nan", "8170": "nan", "8175": "nan", "8180": "nan", "8185": "nan", "8190": "nan", "8195": "nan", "8200": 3.39809, "8205": "nan", "8210": "nan", "8215": "nan", "8220": "nan", "8225": "nan", "8230": "nan", "8235": "nan", "8240": "nan", "8245": "nan", "8250": "nan", "8255": "nan", "8260": "nan", "8265": "nan", "8270": "nan", "8275": "nan", "8280": "nan", "8285": "nan", "8290": "nan", "8295": "nan", "8300": 3.39856, "8305": "nan", "8310": "nan", "8315": "nan", "8320": "nan", "8325": "nan", "8330": "nan", "8335": "nan", "8340": "nan", "8345": "nan", "8350": "nan", "8355": "nan", "8360": "nan", "8365": "nan", "8370": "nan", "8375": "nan", "8380": "nan", "8385": "nan", "8390": "nan", "8395": "nan", "8400": 3.39822, "8405": "nan", "8410": "nan", "8415": "nan", "8420": "nan", "8425": "nan", "8430": "nan", "8435": "nan", "8440": "nan", "8445": "nan", "8450": "nan", "8455": "nan", "8460": "nan", "8465": "nan", "8470": "nan", "8475": "nan", "8480": "nan", "8485": "nan", "8490": "nan", "8495": "nan", "8500": 3.39927, "8505": "nan", "8510": "nan", "8515": "nan", "8520": "nan", "8525": "nan", "8530": "nan", "8535": "nan", "8540": "nan", "8545": "nan", "8550": "nan", "8555": "nan", "8560": "nan", "8565": "nan", "8570": "nan", "8575": "nan", "8580": "nan", "8585": "nan", "8590": "nan", "8595": "nan", "8600": 3.40049, "8605": "nan", "8610": "nan", "8615": "nan", "8620": "nan", "8625": "nan", "8630": "nan", "8635": "nan", "8640": "nan", "8645": "nan", "8650": "nan", "8655": "nan", "8660": "nan", "8665": "nan", "8670": "nan", "8675": "nan", "8680": "nan", "8685": "nan", "8690": "nan", "8695": "nan", "8700": 3.39943, "8705": "nan", "8710": "nan", "8715": "nan", "8720": "nan", "8725": "nan", "8730": "nan", "8735": "nan", "8740": "nan", "8745": "nan", "8750": "nan", "8755": "nan", "8760": "nan", "8765": "nan", "8770": "nan", "8775": "nan", "8780": "nan", "8785": "nan", "8790": "nan", "8795": "nan", "8800": 3.3999, "8805": "nan", "8810": "nan", "8815": "nan", "8820": "nan", "8825": "nan", "8830": "nan", "8835": "nan", "8840": "nan", "8845": "nan", "8850": "nan", "8855": "nan", "8860": "nan", "8865": "nan", "8870": "nan", "8875": "nan", "8880": "nan", "8885": "nan", "8890": "nan", "8895": "nan", "8900": 3.39978, "8905": "nan", "8910": "nan", "8915": "nan", "8920": "nan", "8925": "nan", "8930": "nan", "8935": "nan", "8940": "nan", "8945": "nan", "8950": "nan", "8955": "nan", "8960": "nan", "8965": "nan", "8970": "nan", "8975": "nan", "8980": "nan", "8985": "nan", "8990": "nan", "8995": "nan", "9000": 3.40051, "9005": "nan", "9010": "nan", "9015": "nan", "9020": "nan", "9025": "nan", "9030": "nan", "9035": "nan", "9040": "nan", "9045": "nan", "9050": "nan", "9055": "nan", "9060": "nan", "9065": "nan", "9070": "nan", "9075": "nan", "9080": "nan", "9085": "nan", "9090": "nan", "9095": "nan", "9100": 3.39875, "9105": "nan", "9110": "nan", "9115": "nan", "9120": "nan", "9125": "nan", "9130": "nan", "9135": "nan", "9140": "nan", "9145": "nan", "9150": "nan", "9155": "nan", "9160": "nan", "9165": "nan", "9170": "nan", "9175": "nan", "9180": "nan", "9185": "nan", "9190": "nan", "9195": "nan", "9200": 3.39972, "9205": "nan", "9210": "nan", "9215": "nan", "9220": "nan", "9225": "nan", "9230": "nan", "9235": "nan", "9240": "nan", "9245": "nan", "9250": "nan", "9255": "nan", "9260": "nan", "9265": "nan", "9270": "nan", "9275": "nan", "9280": "nan", "9285": "nan", "9290": "nan", "9295": "nan", "9300": 3.40071, "9305": "nan", "9310": "nan", "9315": "nan", "9320": "nan", "9325": "nan", "9330": "nan", "9335": "nan", "9340": "nan", "9345": "nan", "9350": "nan", "9355": "nan", "9360": "nan", "9365": "nan", "9370": "nan", "9375": "nan", "9380": "nan", "9385": "nan", "9390": "nan", "9395": "nan", "9400": 3.40005, "9405": "nan", "9410": "nan", "9415": "nan", "9420": "nan", "9425": "nan", "9430": "nan", "9435": "nan", "9440": "nan", "9445": "nan", "9450": "nan", "9455": "nan", "9460": "nan", "9465": "nan", "9470": "nan", "9475": "nan", "9480": "nan", "9485": "nan", "9490": "nan", "9495": "nan", "9500": 3.39896, "9505": "nan", "9510": "nan", "9515": "nan", "9520": "nan", "9525": "nan", "9530": "nan", "9535": "nan", "9540": "nan", "9545": "nan", "9550": "nan", "9555": "nan", "9560": "nan", "9565": "nan", "9570": "nan", "9575": "nan", "9580": "nan", "9585": "nan", "9590": "nan", "9595": "nan", "9600": 3.39953, "9605": "nan", "9610": "nan", "9615": "nan", "9620": "nan", "9625": "nan", "9630": "nan", "9635": "nan", "9640": "nan", "9645": "nan", "9650": "nan", "9655": "nan", "9660": "nan", "9665": "nan", "9670": "nan", "9675": "nan", "9680": "nan", "9685": "nan", "9690": "nan", "9695": "nan", "9700": 3.39866, "9705": "nan", "9710": "nan", "9715": "nan", "9720": "nan", "9725": "nan", "9730": "nan", "9735": "nan", "9740": "nan", "9745": "nan", "9750": "nan", "9755": "nan", "9760": "nan", "9765": "nan", "9770": "nan", "9775": "nan", "9780": "nan", "9785": "nan", "9790": "nan", "9795": "nan", "9800": 3.40009, "9805": "nan", "9810": "nan", "9815": "nan", "9820": "nan", "9825": "nan", "9830": "nan", "9835": "nan", "9840": "nan", "9845": "nan", "9850": "nan", "9855": "nan", "9860": "nan", "9865": "nan", "9870": "nan", "9875": "nan", "9880": "nan", "9885": "nan", "9890": "nan", "9895": "nan", "9900": 3.39979, "9905": "nan", "9910": "nan", "9915": "nan", "9920": "nan", "9925": "nan", "9930": "nan", "9935": "nan", "9940": "nan", "9945": "nan", "9950": "nan", "9955": "nan", "9960": "nan", "9965": "nan", "9970": "nan", "9975": "nan", "9980": "nan", "9985": "nan", "9990": "nan", "9995": "nan", "10000": 3.3996, "10005": "nan", "10010": "nan", "10015": "nan", "10020": "nan", "10025": "nan", "10030": "nan", "10035": "nan", "10040": "nan", "10045": "nan", "10050": "nan", "10055": "nan", "10060": "nan", "10065": "nan", "10070": "nan", "10075": "nan", "10080": "nan", "10085": "nan", "10090": "nan", "10095": "nan", "10100": 3.39815, "10105": "nan", "10110": "nan", "10115": "nan", "10120": "nan", "10125": "nan", "10130": "nan", "10135": "nan", "10140": "nan", "10145": "nan", "10150": "nan", "10155": "nan", "10160": "nan", "10165": "nan", "10170": "nan", "10175": "nan", "10180": "nan", "10185": "nan", "10190": "nan", "10195": "nan", "10200": 3.3996, "10205": "nan", "10210": "nan", "10215": "nan", "10220": "nan", "10225": "nan", "10230": "nan", "10235": "nan", "10240": "nan", "10245": "nan", "10250": "nan", "10255": "nan", "10260": "nan", "10265": "nan", "10270": "nan", "10275": "nan", "10280": "nan", "10285": "nan", "10290": "nan", "10295": "nan", "10300": 3.40067, "10305": "nan", "10310": "nan", "10315": "nan", "10320": "nan", "10325": "nan", "10330": "nan", "10335": "nan", "10340": "nan", "10345": "nan", "10350": "nan", "10355": "nan", "10360": "nan", "10365": "nan", "10370": "nan", "10375": "nan", "10380": "nan", "10385": "nan", "10390": "nan", "10395": "nan", "10400": 3.39985, "10405": "nan", "10410": "nan", "10415": "nan", "10420": "nan", "10425": "nan", "10430": "nan", "10435": "nan", "10440": "nan", "10445": "nan", "10450": "nan", "10455": "nan", "10460": "nan", "10465": "nan", "10470": "nan", "10475": "nan", "10480": "nan", "10485": "nan", "10490": "nan", "10495": "nan", "10500": 3.39923, "10505": "nan", "10510": "nan", "10515": "nan", "10520": "nan", "10525": "nan", "10530": "nan", "10535": "nan", "10540": "nan", "10545": "nan", "10550": "nan", "10555": "nan", "10560": "nan", "10565": "nan", "10570": "nan", "10575": "nan", "10580": "nan", "10585": "nan", "10590": "nan", "10595": "nan", "10600": 3.40087, "10605": "nan", "10610": "nan", "10615": "nan", "10620": "nan", "10625": "nan", "10630": "nan", "10635": "nan", "10640": "nan", "10645": "nan", "10650": "nan", "10655": "nan", "10660": "nan", "10665": "nan", "10670": "nan", "10675": "nan", "10680": "nan", "10685": "nan", "10690": "nan", "10695": "nan", "10700": 3.40098, "10705": "nan", "10710": "nan", "10715": "nan", "10720": "nan", "10725": "nan", "10730": "nan", "10735": "nan", "10740": "nan", "10745": "nan", "10750": "nan", "10755": "nan", "10760": "nan", "10765": "nan", "10770": "nan", "10775": "nan", "10780": "nan", "10785": "nan", "10790": "nan", "10795": "nan", "10800": 3.39973, "10805": "nan", "10810": "nan", "10815": "nan", "10820": "nan", "10825": "nan", "10830": "nan", "10835": "nan", "10840": "nan", "10845": "nan", "10850": "nan", "10855": "nan", "10860": "nan", "10865": "nan", "10870": "nan", "10875": "nan", "10880": "nan", "10885": "nan", "10890": "nan", "10895": "nan", "10900": 3.66029, "10905": "nan", "10910": "nan", "10915": "nan", "10920": "nan", "10925": "nan", "10930": "nan", "10935": "nan", "10940": "nan", "10945": "nan", "10950": "nan", "10955": "nan", "10960": "nan", "10965": "nan", "10970": "nan", "10975": "nan", "10980": "nan", "10985": "nan", "10990": "nan", "10995": "nan", "11000": 3.38756, "11005": "nan", "11010": "nan", "11015": "nan", "11020": "nan", "11025": "nan", "11030": "nan", "11035": "nan", "11040": "nan", "11045": "nan", "11050": "nan", "11055": "nan", "11060": "nan", "11065": "nan", "11070": "nan", "11075": "nan", "11080": "nan", "11085": "nan", "11090": "nan", "11095": "nan", "11100": 3.38656, "11105": "nan", "11110": "nan", "11115": "nan", "11120": "nan", "11125": "nan", "11130": "nan", "11135": "nan", "11140": "nan", "11145": "nan", "11150": "nan", "11155": "nan", "11160": "nan", "11165": "nan", "11170": "nan", "11175": "nan", "11180": "nan", "11185": "nan", "11190": "nan", "11195": "nan", "11200": 3.38765, "11205": "nan", "11210": "nan", "11215": "nan", "11220": "nan", "11225": "nan", "11230": "nan", "11235": "nan", "11240": "nan", "11245": "nan", "11250": "nan", "11255": "nan", "11260": "nan", "11265": "nan", "11270": "nan", "11275": "nan", "11280": "nan", "11285": "nan", "11290": "nan", "11295": "nan", "11300": 3.38835, "11305": "nan", "11310": "nan", "11315": "nan", "11320": "nan", "11325": "nan", "11330": "nan", "11335": "nan", "11340": "nan", "11345": "nan", "11350": "nan", "11355": "nan", "11360": "nan", "11365": "nan", "11370": "nan", "11375": "nan", "11380": "nan", "11385": "nan", "11390": "nan", "11395": "nan", "11400": 3.38844, "11405": "nan", "11410": "nan", "11415": "nan", "11420": "nan", "11425": "nan", "11430": "nan", "11435": "nan", "11440": "nan", "11445": "nan", "11450": "nan", "11455": "nan", "11460": "nan", "11465": "nan", "11470": "nan", "11475": "nan", "11480": "nan", "11485": "nan", "11490": "nan", "11495": "nan", "11500": 3.38793, "11505": "nan", "11510": "nan", "11515": "nan", "11520": "nan", "11525": "nan", "11530": "nan", "11535": "nan", "11540": "nan", "11545": "nan", "11550": "nan", "11555": "nan", "11560": "nan", "11565": "nan", "11570": "nan", "11575": "nan", "11580": "nan", "11585": "nan", "11590": "nan", "11595": "nan", "11600": 3.38725, "11605": "nan", "11610": "nan", "11615": "nan", "11620": "nan", "11625": "nan", "11630": "nan", "11635": "nan", "11640": "nan", "11645": "nan", "11650": "nan", "11655": "nan", "11660": "nan", "11665": "nan", "11670": "nan", "11675": "nan", "11680": "nan", "11685": "nan", "11690": "nan", "11695": "nan", "11700": 3.38738, "11705": "nan", "11710": "nan", "11715": "nan", "11720": "nan", "11725": "nan", "11730": "nan", "11735": "nan", "11740": "nan", "11745": "nan", "11750": "nan", "11755": "nan", "11760": "nan", "11765": "nan", "11770": "nan", "11775": "nan", "11780": "nan", "11785": "nan", "11790": "nan", "11795": "nan", "11800": 3.38748, "11805": "nan", "11810": "nan", "11815": "nan", "11820": "nan", "11825": "nan", "11830": "nan", "11835": "nan", "11840": "nan", "11845": "nan", "11850": "nan", "11855": "nan", "11860": "nan", "11865": "nan", "11870": "nan", "11875": "nan", "11880": "nan", "11885": "nan", "11890": "nan", "11895": "nan", "11900": 3.38839, "11905": "nan", "11910": "nan", "11915": "nan", "11920": "nan", "11925": "nan", "11930": "nan", "11935": "nan", "11940": "nan", "11945": "nan", "11950": "nan", "11955": "nan", "11960": "nan", "11965": "nan", "11970": "nan", "11975": "nan", "11980": "nan", "11985": "nan", "11990": "nan", "11995": "nan", "12000": 3.38814, "12005": "nan", "12010": "nan", "12015": "nan", "12020": "nan", "12025": "nan", "12030": "nan", "12035": "nan", "12040": "nan", "12045": "nan", "12050": "nan", "12055": "nan", "12060": "nan", "12065": "nan", "12070": "nan", "12075": "nan", "12080": "nan", "12085": "nan", "12090": "nan", "12095": "nan", "12100": 3.38677, "12105": "nan", "12110": "nan", "12115": "nan", "12120": "nan", "12125": "nan", "12130": "nan", "12135": "nan", "12140": "nan", "12145": "nan", "12150": "nan", "12155": "nan", "12160": "nan", "12165": "nan", "12170": "nan", "12175": "nan", "12180": "nan", "12185": "nan", "12190": "nan", "12195": "nan", "12200": 3.38679, "12205": "nan", "12210": "nan", "12215": "nan", "12220": "nan", "12225": "nan", "12230": "nan", "12235": "nan", "12240": "nan", "12245": "nan", "12250": "nan", "12255": "nan", "12260": "nan", "12265": "nan", "12270": "nan", "12275": "nan", "12280": "nan", "12285": "nan", "12290": "nan", "12295": "nan", "12300": 3.38609, "12305": "nan", "12310": "nan", "12315": "nan", "12320": "nan", "12325": "nan", "12330": "nan", "12335": "nan", "12340": "nan", "12345": "nan", "12350": "nan", "12355": "nan", "12360": "nan", "12365": "nan", "12370": "nan", "12375": "nan", "12380": "nan", "12385": "nan", "12390": "nan", "12395": "nan", "12400": 3.38665, "12405": "nan", "12410": "nan", "12415": "nan", "12420": "nan", "12425": "nan", "12430": "nan", "12435": "nan", "12440": "nan", "12445": "nan", "12450": "nan", "12455": "nan", "12460": "nan", "12465": "nan", "12470": "nan", "12475": "nan", "12480": "nan", "12485": "nan", "12490": "nan", "12495": "nan", "12500": 3.38727, "12505": "nan", "12510": "nan", "12515": "nan", "12520": "nan", "12525": "nan", "12530": "nan", "12535": "nan", "12540": "nan", "12545": "nan", "12550": "nan", "12555": "nan", "12560": "nan", "12565": "nan", "12570": "nan", "12575": "nan", "12580": "nan", "12585": "nan", "12590": "nan", "12595": "nan", "12600": 3.38752, "12605": "nan", "12610": "nan", "12615": "nan", "12620": "nan", "12625": "nan", "12630": "nan", "12635": "nan", "12640": "nan", "12645": "nan", "12650": "nan", "12655": "nan", "12660": "nan", "12665": "nan", "12670": "nan", "12675": "nan", "12680": "nan", "12685": "nan", "12690": "nan", "12695": "nan", "12700": 3.38807, "12705": "nan", "12710": "nan", "12715": "nan", "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm_gb200/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 12.98419, "5": 12.93854, "10": 12.06404, "15": 11.97879, "20": 10.53584, "25": 10.11956, "30": 9.7286, "35": 9.44176, "40": 9.23735, "45": 9.03751, "50": 8.85055, "55": 8.63984, "60": 8.64552, "65": 8.5235, "70": 8.478, "75": 8.3676, "80": 8.15365, "85": 8.15327, "90": 8.03161, "95": 7.95115, "100": 7.84388, "105": 7.72026, "110": 7.54733, "115": 7.45663, "120": 7.46178, "125": 7.47933, "130": 7.29598, "135": 7.26374, "140": 7.18817, "145": 7.03912, "150": 7.16593, "155": 7.03177, "160": 6.9153, "165": 6.90426, "170": 6.82329, "175": 6.89163, "180": 6.83373, "185": 6.7479, "190": 6.70023, "195": 6.63022, "200": 6.69259, "205": 6.62944, "210": 6.51496, "215": 6.49432, "220": 6.50588, "225": 6.49262, "230": 6.49489, "235": 6.48163, "240": 6.36127, "245": 6.36637, "250": 6.29142, "255": 6.45496, "260": 6.34018, "265": 6.26489, "270": 6.22894, "275": 6.23729, "280": 6.18967, "285": 6.20291, "290": 6.17708, "295": 6.13232, "300": 6.10682, "305": 6.00908, "310": 6.05252, "315": 6.05204, "320": 5.96463, "325": 5.91966, "330": 6.0284, "335": 5.99858, "340": 5.95439, "345": 5.93592, "350": 5.91905, "355": 5.86171, "360": 5.86274, "365": 5.82901, "370": 5.79297, "375": 5.80909, "380": 5.84904, "385": 5.80185, "390": 5.77686, "395": 5.68835, "400": 5.63684, "405": 5.66531, "410": 5.66265, "415": 5.72537, "420": 5.65949, "425": 5.68078, "430": 5.63808, "435": 5.57811, "440": 5.59213, "445": 5.5255, "450": 5.57672, "455": 5.51795, "460": 5.50422, "465": 5.5785, "470": 5.56096, "475": 5.48941, "480": 5.49418, "485": 5.50116, "490": 5.45593, "495": 5.46922, "500": 5.42565, "505": 5.41648, "510": 5.43832, "515": 5.42661, "520": 5.42935, "525": 5.29381, "530": 5.32998, "535": 5.31902, "540": 5.34353, "545": 5.39054, "550": 5.37019, "555": 5.19836, "560": 5.35374, "565": 5.30845, "570": 5.25365, "575": 5.31206, "580": 5.23746, "585": 5.20838, "590": 5.21845, "595": 5.23516, "600": 5.26955, "605": 5.21896, "610": 5.2345, "615": 5.20262, "620": 5.21905, "625": 5.1986, "630": 5.14834, "635": 5.13713, "640": 5.09747, "645": 5.1618, "650": 5.16552, "655": 5.15803, "660": 5.07055, "665": 5.10628, "670": 5.08585, "675": 5.05517, "680": 5.04384, "685": 5.03952, "690": 5.04772, "695": 5.00275, "700": 4.99979, "705": 4.95596, "710": 4.98915, "715": 4.92713, "720": 4.88403, "725": 4.84387, "730": 4.89609, "735": 4.86372, "740": 4.90625, "745": 4.76089, "750": 4.77974, "755": 4.83335, "760": 4.81568, "765": 4.76505, "770": 4.7234, "775": 4.6921, "780": 4.7432, "785": 4.78676, "790": 4.67828, "795": 4.64866, "800": 4.63612, "805": 4.64195, "810": 4.65077, "815": 4.63084, "820": 4.64482, "825": 4.61571, "830": 4.59387, "835": 4.56863, "840": 4.48927, "845": 4.49634, "850": 4.47352, "855": 4.52409, "860": 4.46251, "865": 4.52223, "870": 4.48175, "875": 4.39297, "880": 4.44361, "885": 4.4111, "890": 4.43659, "895": 4.41378, "900": 4.3919, "905": 4.35935, "910": 4.36354, "915": 4.34282, "920": 4.39946, "925": 4.41667, "930": 4.31531, "935": 4.30369, "940": 4.35342, "945": 4.30303, "950": 4.34463, "955": 4.26527, "960": 4.19654, "965": 4.29565, "970": 4.28642, "975": 4.23045, "980": 4.22564, "985": 4.1684, "990": 4.15028, "995": 4.17776, "1000": 4.23154, "1005": 4.18887, "1010": 4.16924, "1015": 4.11841, "1020": 4.15687, "1025": 4.21713, "1030": 4.13768, "1035": 4.10871, "1040": 4.118, "1045": 4.10979, "1050": 4.15517, "1055": 4.09572, "1060": 4.1093, "1065": 4.0737, "1070": 4.06643, "1075": 4.08655, "1080": 4.09213, "1085": 4.07259, "1090": 4.01677, "1095": 4.09659, "1100": 4.04859, "1105": 4.07084, "1110": 4.03263, "1115": 4.01534, "1120": 4.00246, "1125": 4.01029, "1130": 4.06057, "1135": 4.00668, "1140": 4.01528, "1145": 3.944, "1150": 4.02955, "1155": 4.00187, "1160": 3.97146, "1165": 3.86545, "1170": 3.91556, "1175": 3.95241, "1180": 3.9746, "1185": 3.9952, "1190": 3.94499, "1195": 3.96181, "1200": 3.8893, "1205": 3.8679, "1210": 3.98189, "1215": 3.81925, "1220": 3.86366, "1225": 3.81826, "1230": 3.91477, "1235": 3.90783, "1240": 3.89088, "1245": 3.7878, "1250": 3.83462, "1255": 3.86647, "1260": 3.90676, "1265": 3.79799, "1270": 3.87717, "1275": 3.83332, "1280": 3.82518, "1285": 3.84272, "1290": 3.88049, "1295": 3.84309, "1300": 3.82132, "1305": 3.8325, "1310": 3.81275, "1315": 3.80311, "1320": 3.81776, "1325": 3.71836, "1330": 3.7917, "1335": 3.74949, "1340": 3.75432, "1345": 3.75185, "1350": 3.72246, "1355": 3.76982, "1360": 3.73827, "1365": 3.72479, "1370": 3.73938, "1375": 3.73694, "1380": 3.75033, "1385": 3.75324, "1390": 3.66352, "1395": 3.74143, "1400": 3.73229, "1405": 3.66226, "1410": 3.65804, "1415": 3.66281, "1420": 3.69529, "1425": 3.71298, "1430": 3.68032, "1435": 3.66567, "1440": 3.62691, "1445": 3.68062, "1450": 3.66766, "1455": 3.62753, "1460": 3.65479, "1465": 3.66938, "1470": 3.62148, "1475": 3.69775, "1480": 3.65738, "1485": 3.66245, "1490": 3.63265, "1495": 3.61492, "1500": 3.6367, "1505": 3.68281, "1510": 3.54566, "1515": 3.609, "1520": 3.63666, "1525": 3.60047, "1530": 3.58381, "1535": 3.5969, "1540": 3.59479, "1545": 3.61209, "1550": 3.55084, "1555": 3.60011, "1560": 3.61885, "1565": 3.61562, "1570": 3.59161, "1575": 3.54997, "1580": 3.58264, "1585": 3.57028, "1590": 3.46649, "1595": 3.51916, "1600": 3.51438, "1605": 3.56399, "1610": 3.56561, "1615": 3.48914, "1620": 3.51872, "1625": 3.46038, "1630": 3.50057, "1635": 3.54094, "1640": 3.52781, "1645": 3.53869, "1650": 3.49558, "1655": 3.47362, "1660": 3.53058, "1665": 3.4694, "1670": 3.51604, "1675": 3.49294, "1680": 3.47856, "1685": 3.47172, "1690": 3.48567, "1695": 3.48595, "1700": 3.46718, "1705": 3.40405, "1710": 3.4904, "1715": 3.50163, "1720": 3.43217, "1725": 3.43389, "1730": 3.44993, "1735": 3.46203, "1740": 3.45825, "1745": 3.45364, "1750": 3.4018, "1755": 3.41959, "1760": 3.38236, "1765": 3.42143, "1770": 3.42391, "1775": 3.38829, "1780": 3.43133, "1785": 3.42531, "1790": 3.38061, "1795": 3.40413, "1800": 3.34505, "1805": 3.40411, "1810": 3.3443, "1815": 3.41721, "1820": 3.41298, "1825": 3.41394, "1830": 3.34507, "1835": 3.44327, "1840": 3.40026, "1845": 3.42225, "1850": 3.3737, "1855": 3.37379, "1860": 3.34718, "1865": 3.39774, "1870": 3.31941, "1875": 3.44184, "1880": 3.34583, "1885": 3.33834, "1890": 3.33625, "1895": 3.40145, "1900": 3.36496, "1905": 3.32271, "1910": 3.3368, "1915": 3.32331, "1920": 3.36555, "1925": 3.33897, "1930": 3.31467, "1935": 3.31123, "1940": 3.3762, "1945": 3.27313, "1950": 3.41256, "1955": 3.30548, "1960": 3.29785, "1965": 3.27936, "1970": 3.29856, "1975": 3.35566, "1980": 3.33591, "1985": 3.24229, "1990": 3.30028, "1995": 3.28316, "2000": 3.28226, "2005": 3.26535, "2010": 3.27352, "2015": 3.2365, "2020": 3.27501, "2025": 3.27742, "2030": 3.29047, "2035": 3.29225, "2040": 3.23929, "2045": 3.2491, "2050": 3.27456, "2055": 3.33373, "2060": 3.29439, "2065": 3.23928, "2070": 3.29227, "2075": 3.2624, "2080": 3.23888, "2085": 3.27623, "2090": 3.13844, "2095": 3.28702, "2100": 3.23688, "2105": 3.19771, "2110": 3.20277, "2115": 3.2382, "2120": 3.1813, "2125": 3.22025, "2130": 3.22372, "2135": 3.29164, "2140": 3.20319, "2145": 3.20474, "2150": 3.21527, "2155": 3.23198, "2160": 3.19331, "2165": 3.25562, "2170": 3.22466, "2175": 3.20239, "2180": 3.25351, "2185": 3.25349, "2190": 3.24601, "2195": 3.17461, "2200": 3.19592, "2205": 3.16638, "2210": 3.11756, "2215": 3.18822, "2220": 3.19477, "2225": 3.17024, "2230": 3.12954, "2235": 3.17891, "2240": 3.21397, "2245": 3.18137, "2250": 3.19957, "2255": 3.12336, "2260": 3.13509, "2265": 3.22906, "2270": 3.18616, "2275": 3.1529, "2280": 3.18794, "2285": 3.16881, "2290": 3.16738, "2295": 3.20792, "2300": 3.14094, "2305": 3.16694, "2310": 3.12982, "2315": 3.0574, "2320": 3.12693, "2325": 3.18302, "2330": 3.13863, "2335": 3.13173, "2340": 3.16429, "2345": 3.11609, "2350": 3.12193, "2355": 3.12102, "2360": 3.16057, "2365": 3.14069, "2370": 3.1566, "2375": 3.13306, "2380": 3.1089, "2385": 3.07598, "2390": 3.09423, "2395": 3.0923, "2400": 3.09049, "2405": 3.10312, "2410": 3.08738, "2415": 3.0859, "2420": 3.07232, "2425": 3.07515, "2430": 3.08315, "2435": 3.0666, "2440": 3.09293, "2445": 3.05738, "2450": 3.12724, "2455": 3.1668, "2460": 3.08258, "2465": 3.07557, "2470": 3.03871, "2475": 3.07295, "2480": 3.09401, "2485": 3.04631, "2490": 3.05694, "2495": 3.07352, "2500": 3.07374, "2505": 3.09757, "2510": 3.11425, "2515": 3.04708, "2520": 3.0804, "2525": 3.02461, "2530": 3.04704, "2535": 3.08948, "2540": 3.07683, "2545": 3.05223, "2550": 3.0346, "2555": 3.106, "2560": 3.05236, "2565": 3.10772, "2570": 3.01402, "2575": 3.04644, "2580": 3.07514, "2585": 3.02095, "2590": 3.06575, "2595": 2.99715, "2600": 3.06315, "2605": 3.04568, "2610": 3.04704, "2615": 3.05832, "2620": 2.98626, "2625": 3.00956, "2630": 3.03577, "2635": 3.05424, "2640": 3.01527, "2645": 3.05906, "2650": 3.02519, "2655": 3.00005, "2660": 3.00566, "2665": 3.03857, "2670": 2.9869, "2675": 2.96406, "2680": 2.99471, "2685": 3.00999, "2690": 3.00193, "2695": 2.99167, "2700": 3.02505, "2705": 2.98729, "2710": 2.97342, "2715": 2.96605, "2720": 3.02717, "2725": 3.00415, "2730": 3.02744, "2735": 2.98433, "2740": 2.98866, "2745": 3.00764, "2750": 3.01004, "2755": 2.97318, "2760": 3.00963, "2765": 3.01277, "2770": 2.98399, "2775": 2.99786, "2780": 3.0332, "2785": 2.95954, "2790": 2.96222, "2795": 2.97034, "2800": 2.98127, "2805": 2.93987, "2810": 2.983, "2815": 2.9543, "2820": 2.98172, "2825": 3.00651, "2830": 2.98887, "2835": 2.91425, "2840": 2.92476, "2845": 2.95821, "2850": 2.96948, "2855": 2.97557, "2860": 2.96429, "2865": 2.91763, "2870": 2.98977, "2875": 2.9248, "2880": 2.95945, "2885": 2.91822, "2890": 2.97801, "2895": 2.92798, "2900": 2.94851, "2905": 3.01025, "2910": 2.90885, "2915": 2.95254, "2920": 2.97185, "2925": 2.95597, "2930": 2.94982, "2935": 2.93409, "2940": 2.93873, "2945": 2.9082, "2950": 2.97314, "2955": 2.90745, "2960": 2.96696, "2965": 2.86293, "2970": 2.95934, "2975": 2.99626, "2980": 2.93691, "2985": 3.03654, "2990": 2.93999, "2995": 2.87144, "3000": 2.92971, "3005": 2.88913, "3010": 2.93432, "3015": 2.91276, "3020": 2.91232, "3025": 2.92393, "3030": 2.92045, "3035": 2.95576, "3040": 2.91549, "3045": 2.83698, "3050": 2.89779, "3055": 2.90445, "3060": 2.92488, "3065": 2.92719, "3070": 2.87846, "3075": 2.86687, "3080": 2.92258, "3085": 2.89966, "3090": 2.91396, "3095": 2.92199, "3100": 2.87182, "3105": 2.92802, "3110": 2.91863, "3115": 2.9403, "3120": 2.9518, "3125": 2.85751, "3130": 2.93151, "3135": 2.92635, "3140": 2.87276, "3145": 2.92375, "3150": 2.85484, "3155": 2.8485, "3160": 2.83637, "3165": 2.84033, "3170": 2.88849, "3175": 2.90239, "3180": 2.85043, "3185": 2.89154, "3190": 2.90425, "3195": 2.92291, "3200": 2.92063, "3205": 2.86226, "3210": 2.86891, "3215": 2.91036, "3220": 2.87807, "3225": 2.86421, "3230": 2.81586, "3235": 2.87333, "3240": 2.87156, "3245": 2.90126, "3250": 2.85213, "3255": 2.84713, "3260": 2.86187, "3265": 2.86787, "3270": 2.84284, "3275": 2.86909, "3280": 2.79921, "3285": 2.81195, "3290": 2.86578, "3295": 2.89818, "3300": 2.87821, "3305": 2.85947, "3310": 2.85651, "3315": 2.81024, "3320": 2.82854, "3325": 2.8296, "3330": 2.82729, "3335": 2.84636, "3340": 2.83138, "3345": 2.85448, "3350": 2.85382, "3355": 2.86154, "3360": 2.8033, "3365": 2.8523, "3370": 2.84468, "3375": 2.8454, "3380": 2.85596, "3385": 2.87939, "3390": 2.85981, "3395": 2.80661, "3400": 2.78593, "3405": 2.83043, "3410": 2.84374, "3415": 2.85748, "3420": 2.82452, "3425": 2.80324, "3430": 2.8249, "3435": 2.88819, "3440": 2.81499, "3445": 2.87505, "3450": 2.81623, "3455": 2.79143, "3460": 2.81576, "3465": 2.84564, "3470": 2.83662, "3475": 2.78218, "3480": 2.84288, "3485": 2.82039, "3490": 2.89267, "3495": 2.84976, "3500": 2.83815, "3505": 2.82178, "3510": 2.81512, "3515": 2.83844, "3520": 2.77805, "3525": 2.80344, "3530": 2.84921, "3535": 2.78161, "3540": 2.83674, "3545": 2.80973, "3550": 2.79547, "3555": 2.81863, "3560": 2.82078, "3565": 2.82666, "3570": 2.80124, "3575": 2.79967, "3580": 2.82045, "3585": 2.83294, "3590": 2.82697, "3595": 2.78815, "3600": 2.74915, "3605": 2.7873, "3610": 2.84627, "3615": 2.7499, "3620": 2.8026, "3625": 2.88969, "3630": 2.78406, "3635": 2.78796, "3640": 2.7812, "3645": 2.76636, "3650": 2.80296, "3655": 2.82044, "3660": 2.76688, "3665": 2.78201, "3670": 2.76782, "3675": 2.77556, "3680": 2.80739, "3685": 2.80119, "3690": 2.80155, "3695": 2.80812, "3700": 2.78648, "3705": 2.78205, "3710": 2.74897, "3715": 2.80626, "3720": 2.79273, "3725": 2.80205, "3730": 2.84576, "3735": 2.79732, "3740": 2.74582, "3745": 2.78748, "3750": 2.80839, "3755": 2.78946, "3760": 2.75624, "3765": 2.7529, "3770": 2.76053, "3775": 2.77003, "3780": 2.75736, "3785": 2.77603, "3790": 2.74395, "3795": 2.79193, "3800": 2.79977, "3805": 2.74927, "3810": 2.8012, "3815": 2.75973, "3820": 2.78378, "3825": 2.72979, "3830": 2.74412, "3835": 2.81283, "3840": 2.72346, "3845": 2.71207, "3850": 2.77208, "3855": 2.71862, "3860": 2.79999, "3865": 2.74819, "3870": 2.7788, "3875": 2.75589, "3880": 2.78591, "3885": 2.78703, "3890": 2.745, "3895": 2.79656, "3900": 2.76217, "3905": 2.7204, "3910": 2.74251, "3915": 2.75354, "3920": 2.7922, "3925": 2.779, "3930": 2.70818, "3935": 2.73844, "3940": 2.75097, "3945": 2.7403, "3950": 2.71938, "3955": 2.77595, "3960": 2.7594, "3965": 2.74095, "3970": 2.75582, "3975": 2.72197, "3980": 2.73797, "3985": 2.74501, "3990": 2.69182, "3995": 2.77927, "4000": 2.73467, "4005": 2.76845, "4010": 2.70729, "4015": 2.72138, "4020": 2.7543, "4025": 2.73235, "4030": 2.65702, "4035": 2.69604, "4040": 2.7478, "4045": 2.74602, "4050": 2.79108, "4055": 2.72052, "4060": 2.7132, "4065": 2.65055, "4070": 2.80438, "4075": 2.75742, "4080": 2.71911, "4085": 2.74853, "4090": 2.67511, "4095": 2.68776, "4100": 2.71367, "4105": 2.73756, "4110": 2.72971, "4115": 2.70539, "4120": 2.73419, "4125": 2.70252, "4130": 2.69562, "4135": 2.68553, "4140": 2.6792, "4145": 2.77962, "4150": 2.71146, "4155": 2.73921, "4160": 2.75831, "4165": 2.71903, "4170": 2.67374, "4175": 2.71681, "4180": 2.72466, "4185": 2.72363, "4190": 2.74097, "4195": 2.69297, "4200": 2.70506, "4205": 2.74554, "4210": 2.67484, "4215": 2.66292, "4220": 2.65885, "4225": 2.70625, "4230": 2.71947, "4235": 2.73163, "4240": 2.71, "4245": 2.69905, "4250": 2.71229, "4255": 2.6486, "4260": 2.7231, "4265": 2.74057, "4270": 2.72269, "4275": 2.68914, "4280": 2.70013, "4285": 2.73156, "4290": 2.68402, "4295": 2.69061, "4300": 2.69891, "4305": 2.70215, "4310": 2.73018, "4315": 2.70831, "4320": 2.70146, "4325": 2.70427, "4330": 2.71103, "4335": 2.68951, "4340": 2.69679, "4345": 2.72847, "4350": 2.67635, "4355": 2.69369, "4360": 2.71057, "4365": 2.78505, "4370": 2.73161, "4375": 2.7416, "4380": 2.70035, "4385": 2.69729, "4390": 2.6976, "4395": 2.75157, "4400": 2.6658, "4405": 2.66826, "4410": 2.68381, "4415": 2.70238, "4420": 2.70549, "4425": 2.72132, "4430": 2.69374, "4435": 2.68373, "4440": 2.69348, "4445": 2.67973, "4450": 2.65166, "4455": 2.66887, "4460": 2.6881, "4465": 2.69814, "4470": 2.67338, "4475": 2.68749, "4480": 2.65676, "4485": 2.69941, "4490": 2.65189, "4495": 2.70897, "4500": 2.70239, "4505": 2.69521, "4510": 2.64919, "4515": 2.70031, "4520": 2.66829, "4525": 2.66609, "4530": 2.67206, "4535": 2.67259, "4540": 2.70728, "4545": 2.65612, "4550": 2.70083, "4555": 2.67856, "4560": 2.65513, "4565": 2.63823, "4570": 2.64122, "4575": 2.6646, "4580": 2.68762, "4585": 2.68234, "4590": 2.61701, "4595": 2.66208, "4600": 2.6793, "4605": 2.67662, "4610": 2.65261, "4615": 2.66058, "4620": 2.65881, "4625": 2.68537, "4630": 2.67218, "4635": 2.64354, "4640": 2.69408, "4645": 2.64915, "4650": 2.70258, "4655": 2.71207, "4660": 2.67684, "4665": 2.68847, "4670": 2.67711, "4675": 2.68695, "4680": 2.66399, "4685": 2.65574, "4690": 2.70135, "4695": 2.65603, "4700": 2.67474, "4705": 2.64927, "4710": 2.67454, "4715": 2.6445, "4720": 2.72006, "4725": 2.62873, "4730": 2.64853, "4735": 2.68453, "4740": 2.64342, "4745": 2.65059, "4750": 2.63888, "4755": 2.65157, "4760": 2.65996, "4765": 2.64629, "4770": 2.62414, "4775": 2.65418, "4780": 2.65916, "4785": 2.69171, "4790": 2.64922, "4795": 2.67239, "4800": 2.62814, "4805": 2.64031, "4810": 2.66308, "4815": 2.64414, "4820": 2.66856, "4825": 2.64936, "4830": 2.61301, "4835": 2.64828, "4840": 2.65719, "4845": 2.63018, "4850": 2.62249, "4855": 2.59762, "4860": 2.65006, "4865": 2.62728, "4870": 2.63739, "4875": 2.61869, "4880": 2.62597, "4885": 2.62554, "4890": 2.6792, "4895": 2.659, "4900": 2.61682, "4905": 2.62007, "4910": 2.63757, "4915": 2.612, "4920": 2.65399, "4925": 2.64822, "4930": 2.57057, "4935": 2.65116, "4940": 2.63208, "4945": 2.63853, "4950": 2.6253, "4955": 2.61842, "4960": 2.61614, "4965": 2.65867, "4970": 2.59725, "4975": 2.65839, "4980": 2.61913, "4985": 2.63529, "4990": 2.65578, "4995": 2.58058, "5000": 2.66073, "5005": 2.66345, "5010": 2.68227, "5015": 2.63294, "5020": 2.63988, "5025": 2.68591, "5030": 2.64819, "5035": 2.61651, "5040": 2.61892, "5045": 2.60431, "5050": 2.62445, "5055": 2.64904, "5060": 2.64327, "5065": 2.68471, "5070": 2.60524, "5075": 2.61458, "5080": 2.60662, "5085": 2.60264, "5090": 2.59015, "5095": 2.64761, "5100": 2.6457, "5105": 2.60793, "5110": 2.66334, "5115": 2.62502, "5120": 2.67169, "5125": 2.62758, "5130": 2.61382, "5135": 2.61793, "5140": 2.57598, "5145": 2.62766, "5150": 2.63527, "5155": 2.6155, "5160": 2.65966, "5165": 2.58483, "5170": 2.58978, "5175": 2.61727, "5180": 2.60571, "5185": 2.62183, "5190": 2.62827, "5195": 2.67109, "5200": 2.59759, "5205": 2.60702, "5210": 2.60617, "5215": 2.64994, "5220": 2.58854, "5225": 2.55622, "5230": 2.63303, "5235": 2.61375, "5240": 2.63238, "5245": 2.62967, "5250": 2.5932, "5255": 2.61614, "5260": 2.55787, "5265": 2.59927, "5270": 2.59007, "5275": 2.61657, "5280": 2.61119, "5285": 2.60465, "5290": 2.63616, "5295": 2.62334, "5300": 2.58036, "5305": 2.59688, "5310": 2.60926, "5315": 2.58728, "5320": 2.61359, "5325": 2.6452, "5330": 2.6021, "5335": 2.58401, "5340": 2.56341, "5345": 2.65571, "5350": 2.61767, "5355": 2.58122, "5360": 2.59757, "5365": 2.61791, "5370": 2.61383, "5375": 2.62936, "5380": 2.57845, "5385": 2.56235, "5390": 2.58632, "5395": 2.61671, "5400": 2.60674, "5405": 2.54636, "5410": 2.61005, "5415": 2.59464, "5420": 2.61139, "5425": 2.625, "5430": 2.6265, "5435": 2.5765, "5440": 2.58814, "5445": 2.63079, "5450": 2.64534, "5455": 2.6101, "5460": 2.5903, "5465": 2.6052, "5470": 2.59691, "5475": 2.62442, "5480": 2.58886, "5485": 2.5915, "5490": 2.57494, "5495": 2.56951, "5500": 2.56777, "5505": 2.61617, "5510": 2.62567, "5515": 2.58374, "5520": 2.56088, "5525": 2.58583, "5530": 2.66352, "5535": 2.62035, "5540": 2.56974, "5545": 2.59511, "5550": 2.54965, "5555": 2.57459, "5560": 2.56453, "5565": 2.60605, "5570": 2.65634, "5575": 2.63541, "5580": 2.57175, "5585": 2.5945, "5590": 2.55928, "5595": 2.58276, "5600": 2.55517, "5605": 2.60104, "5610": 2.58094, "5615": 2.58249, "5620": 2.58036, "5625": 2.55089, "5630": 2.57187, "5635": 2.6324, "5640": 2.59548, "5645": 2.57131, "5650": 2.57908, "5655": 2.54814, "5660": 2.56269, "5665": 2.58485, "5670": 2.56733, "5675": 2.61086, "5680": 2.5279, "5685": 2.5673, "5690": 2.60169, "5695": 2.56032, "5700": 2.59817, "5705": 2.59773, "5710": 2.57825, "5715": 2.58556, "5720": 2.53545, "5725": 2.60141, "5730": 2.57545, "5735": 2.60941, "5740": 2.59452, "5745": 2.55803, "5750": 2.53903, "5755": 2.55715, "5760": 2.61525, "5765": 2.56038, "5770": 2.5412, "5775": 2.58539, "5780": 2.57794, "5785": 2.53954, "5790": 2.56402, "5795": 2.60037, "5800": 2.54586, "5805": 2.53368, "5810": 2.55739, "5815": 2.52458, "5820": 2.59565, "5825": 2.50396, "5830": 2.49812, "5835": 2.59879, "5840": 2.54027, "5845": 2.55119, "5850": 2.61034, "5855": 2.5078, "5860": 2.56077, "5865": 2.51887, "5870": 2.57531, "5875": 2.60847, "5880": 2.58556, "5885": 2.56695, "5890": 2.58322, "5895": 2.55606, "5900": 2.61399, "5905": 2.55561, "5910": 2.59622, "5915": 2.60833, "5920": 2.58814, "5925": 2.53547, "5930": 2.57614, "5935": 2.55624, "5940": 2.57051, "5945": 2.51758, "5950": 2.5549, "5955": 2.58364, "5960": 2.56774, "5965": 2.61916, "5970": 2.54972, "5975": 2.58058, "5980": 2.55777, "5985": 2.5587, "5990": 2.55442, "5995": 2.55786, "6000": 2.5541, "6005": 2.51959, "6010": 2.5612, "6015": 2.5234, "6020": 2.53535, "6025": 2.55845, "6030": 2.60267, "6035": 2.54217, "6040": 2.54797, "6045": 2.49021, "6050": 2.59325, "6055": 2.51806, "6060": 2.54365, "6065": 2.52512, "6070": 2.52866, "6075": 2.5355, "6080": 2.53264, "6085": 2.59598, "6090": 2.56805, "6095": 2.53269, "6100": 2.53973, "6105": 2.52014, "6110": 2.5548, "6115": 2.5854, "6120": 2.55512, "6125": 2.53857, "6130": 2.47414, "6135": 2.5559, "6140": 2.55561, "6145": 2.55497, "6150": 2.52352, "6155": 2.50641, "6160": 2.54021, "6165": 2.57308, "6170": 2.54839, "6175": 2.60252, "6180": 2.51088, "6185": 2.54973, "6190": 2.49031, "6195": 2.57918, "6200": 2.55088, "6205": 2.53612, "6210": 2.51902, "6215": 2.51336, "6220": 2.56408, "6225": 2.51443, "6230": 2.51059, "6235": 2.5608, "6240": 2.55012, "6245": 2.5247, "6250": 2.53168, "6255": 2.57932, "6260": 2.52354, "6265": 2.57262, "6270": 2.52446, "6275": 2.56295, "6280": 2.52126, "6285": 2.52066, "6290": 2.52096, "6295": 2.50424, "6300": 2.55533, "6305": 2.52427, "6310": 2.51089, "6315": 2.53744, "6320": 2.488, "6325": 2.59779, "6330": 2.55532, "6335": 2.51175, "6340": 2.51285, "6345": 2.55419, "6350": 2.55594, "6355": 2.52344, "6360": 2.52101, "6365": 2.48499, "6370": 2.53525, "6375": 2.49254, "6380": 2.55757, "6385": 2.57399, "6390": 2.50202, "6395": 2.55089, "6400": 2.50993, "6405": 2.5282, "6410": 2.5186, "6415": 2.52701, "6420": 2.54208, "6425": 2.53505, "6430": 2.57601, "6435": 2.54343, "6440": 2.53596, "6445": 2.52707, "6450": 2.53253, "6455": 2.5206, "6460": 2.51748, "6465": 2.56168, "6470": 2.5182, "6475": 2.52367, "6480": 2.48721, "6485": 2.52891, "6490": 2.50941, "6495": 2.49869, "6500": 2.52288, "6505": 2.49655, "6510": 2.5435, "6515": 2.51061, "6520": 2.51072, "6525": 2.4951, "6530": 2.54405, "6535": 2.53179, "6540": 2.52806, "6545": 2.56063, "6550": 2.50377, "6555": 2.5567, "6560": 2.50957, "6565": 2.52066, "6570": 2.5846, "6575": 2.52194, "6580": 2.49953, "6585": 2.50756, "6590": 2.5088, "6595": 2.49707, "6600": 2.48904, "6605": 2.53959, "6610": 2.47704, "6615": 2.56692, "6620": 2.53266, "6625": 2.50914, "6630": 2.51237, "6635": 2.47298, "6640": 2.53895, "6645": 2.59549, "6650": 2.51039, "6655": 2.5003, "6660": 2.57417, "6665": 2.52229, "6670": 2.56842, "6675": 2.46804, "6680": 2.54777, "6685": 2.53423, "6690": 2.51271, "6695": 2.48576, "6700": 2.52379, "6705": 2.51865, "6710": 2.49131, "6715": 2.51508, "6720": 2.51046, "6725": 2.51877, "6730": 2.51764, "6735": 2.48391, "6740": 2.51343, "6745": 2.49511, "6750": 2.55883, "6755": 2.47532, "6760": 2.54102, "6765": 2.48848, "6770": 2.51699, "6775": 2.50924, "6780": 2.53742, "6785": 2.47155, "6790": 2.54353, "6795": 2.49747, "6800": 2.52506, "6805": 2.51084, "6810": 2.50457, "6815": 2.52164, "6820": 2.48634, "6825": 2.5058, "6830": 2.54057, "6835": 2.5071, "6840": 2.50862, "6845": 2.52459, "6850": 2.47366, "6855": 2.5114, "6860": 2.50121, "6865": 2.48697, "6870": 2.55404, "6875": 2.47466, "6880": 2.55074, "6885": 2.47742, "6890": 2.5456, "6895": 2.50044, "6900": 2.48513, "6905": 2.49727, "6910": 2.51927, "6915": 2.51524, "6920": 2.53222, "6925": 2.54381, "6930": 2.49006, "6935": 2.52259, "6940": 2.49956, "6945": 2.46052, "6950": 2.48424, "6955": 2.52708, "6960": 2.51994, "6965": 2.493, "6970": 2.47002, "6975": 2.52124, "6980": 2.45179, "6985": 2.51469, "6990": 2.53168, "6995": 2.46205, "7000": 2.48721, "7005": 2.47085, "7010": 2.47458, "7015": 2.52239, "7020": 2.46871, "7025": 2.455, "7030": 2.48512, "7035": 2.47891, "7040": 2.5055, "7045": 2.52023, "7050": 2.52604, "7055": 2.44159, "7060": 2.47055, "7065": 2.4812, "7070": 2.49094, "7075": 2.49419, "7080": 2.5339, "7085": 2.48474, "7090": 2.47483, "7095": 2.50175, "7100": 2.51611, "7105": 2.48909, "7110": 2.48875, "7115": 2.50654, "7120": 2.47919, "7125": 2.4648, "7130": 2.48826, "7135": 2.51398, "7140": 2.49836, "7145": 2.49968, "7150": 2.51209, "7155": 2.50995, "7160": 2.47585, "7165": 2.45844, "7170": 2.5053, "7175": 2.50179, "7180": 2.50705, "7185": 2.48036, "7190": 2.46179, "7195": 2.46554, "7200": 2.5094, "7205": 2.48845, "7210": 2.44336, "7215": 2.48072, "7220": 2.44332, "7225": 2.51135, "7230": 2.50786, "7235": 2.48259, "7240": 2.47784, "7245": 2.49888, "7250": 2.50767, "7255": 2.49251, "7260": 2.45824, "7265": 2.45066, "7270": 2.47062, "7275": 2.50235, "7280": 2.49435, "7285": 2.42685, "7290": 2.48191, "7295": 2.48837, "7300": 2.41828, "7305": 2.44602, "7310": 2.44836, "7315": 2.49166, "7320": 2.48459, "7325": 2.45973, "7330": 2.49034, "7335": 2.47382, "7340": 2.46396, "7345": 2.4941, "7350": 2.5103, "7355": 2.49639, "7360": 2.48014, "7365": 2.46719, "7370": 2.4737, "7375": 2.44973, "7380": 2.49351, "7385": 2.48455, "7390": 2.47135, "7395": 2.477, "7400": 2.48731, "7405": 2.44334, "7410": 2.48259, "7415": 2.46975, "7420": 2.49233, "7425": 2.45642, "7430": 2.52265, "7435": 2.49165, "7440": 2.52011, "7445": 2.50649, "7450": 2.47346, "7455": 2.46263, "7460": 2.46431, "7465": 2.48535, "7470": 2.45144, "7475": 2.45815, "7480": 2.51387, "7485": 2.45117, "7490": 2.47472, "7495": 2.47889, "7500": 2.49782, "7505": 2.44041, "7510": 2.43588, "7515": 2.42224, "7520": 2.49392, "7525": 2.49662, "7530": 2.47567, "7535": 2.45999, "7540": 2.47286, "7545": 2.47379, "7550": 2.49017, "7555": 2.45369, "7560": 2.42765, "7565": 2.50931, "7570": 2.48419, "7575": 2.43912, "7580": 2.45901, "7585": 2.48319, "7590": 2.48184, "7595": 2.46424, "7600": 2.46287, "7605": 2.44744, "7610": 2.44904, "7615": 2.42801, "7620": 2.54382, "7625": 2.47981, "7630": 2.42404, "7635": 2.42598, "7640": 2.45333, "7645": 2.47382, "7650": 2.46343, "7655": 2.48648, "7660": 2.4512, "7665": 2.43238, "7670": 2.44036, "7675": 2.45618, "7680": 2.48615, "7685": 2.43166, "7690": 2.48187, "7695": 2.4547, "7700": 2.48168, "7705": 2.51497, "7710": 2.50597, "7715": 2.4453, "7720": 2.47122, "7725": 2.48144, "7730": 2.45802, "7735": 2.49851, "7740": 2.44105, "7745": 2.45131, "7750": 2.44125, "7755": 2.47226, "7760": 2.45319, "7765": 2.45587, "7770": 2.47059, "7775": 2.4538, "7780": 2.41754, "7785": 2.44561, "7790": 2.48633, "7795": 2.44163, "7800": 2.4703, "7805": 2.48729, "7810": 2.50584, "7815": 2.49634, "7820": 2.45047, "7825": 2.51645, "7830": 2.45243, "7835": 2.46939, "7840": 2.47968, "7845": 2.4632, "7850": 2.41717, "7855": 2.47372, "7860": 2.5075, "7865": 2.42692, "7870": 2.47598, "7875": 2.44861, "7880": 2.45488, "7885": 2.46205, "7890": 2.47234, "7895": 2.45238, "7900": 2.44179, "7905": 2.43688, "7910": 2.426, "7915": 2.48366, "7920": 2.47574, "7925": 2.4225, "7930": 2.47225, "7935": 2.45119, "7940": 2.424, "7945": 2.47139, "7950": 2.44612, "7955": 2.41981, "7960": 2.49324, "7965": 2.53135, "7970": 2.52904, "7975": 2.45162, "7980": 2.44412, "7985": 2.46923, "7990": 2.43335, "7995": 2.46941, "8000": 2.43681, "8005": 2.41849, "8010": 2.45817, "8015": 2.47029, "8020": 2.4825, "8025": 2.47607, "8030": 2.45105, "8035": 2.47127, "8040": 2.42237, "8045": 2.45296, "8050": 2.4482, "8055": 2.42424, "8060": 2.44611, "8065": 2.46189, "8070": 2.45993, "8075": 2.45852, "8080": 2.4462, "8085": 2.43951, "8090": 2.4255, "8095": 2.42228, "8100": 2.43833, "8105": 2.49489, "8110": 2.43712, "8115": 2.44338, "8120": 2.46793, "8125": 2.46836, "8130": 2.45375, "8135": 2.45377, "8140": 2.43826, "8145": 2.42645, "8150": 2.42267, "8155": 2.4863, "8160": 2.45488, "8165": 2.4436, "8170": 2.43613, "8175": 2.42318, "8180": 2.49704, "8185": 2.4269, "8190": 2.46935, "8195": 2.4585, "8200": 2.44837, "8205": 2.44505, "8210": 2.43355, "8215": 2.44056, "8220": 2.4358, "8225": 2.41015, "8230": 2.44176, "8235": 2.4652, "8240": 2.42683, "8245": 2.44875, "8250": 2.44572, "8255": 2.4414, "8260": 2.43328, "8265": 2.42888, "8270": 2.43309, "8275": 2.44149, "8280": 2.39924, "8285": 2.4396, "8290": 2.48206, "8295": 2.4474, "8300": 2.45867, "8305": 2.40775, "8310": 2.43515, "8315": 2.46058, "8320": 2.40073, "8325": 2.39433, "8330": 2.43694, "8335": 2.44522, "8340": 2.49104, "8345": 2.4478, "8350": 2.44978, "8355": 2.40749, "8360": 2.40123, "8365": 2.45247, "8370": 2.45194, "8375": 2.42273, "8380": 2.41941, "8385": 2.42385, "8390": 2.4346, "8395": 2.43997, "8400": 2.43946, "8405": 2.48993, "8410": 2.437, "8415": 2.43406, "8420": 2.41749, "8425": 2.43899, "8430": 2.46013, "8435": 2.40557, "8440": 2.45224, "8445": 2.45724, "8450": 2.40482, "8455": 2.46, "8460": 2.45352, "8465": 2.4356, "8470": 2.40868, "8475": 2.47827, "8480": 2.40162, "8485": 2.41279, "8490": 2.46472, "8495": 2.43598, "8500": 2.44504, "8505": 2.40324, "8510": 2.40392, "8515": 2.42848, "8520": 2.42568, "8525": 2.4921, "8530": 2.37281, "8535": 2.40013, "8540": 2.48538, "8545": 2.38102, "8550": 2.43805, "8555": 2.44969, "8560": 2.47001, "8565": 2.42051, "8570": 2.43016, "8575": 2.44673, "8580": 2.44107, "8585": 2.4206, "8590": 2.40199, "8595": 2.42668, "8600": 2.41302, "8605": 2.49039, "8610": 2.42012, "8615": 2.38793, "8620": 2.44803, "8625": 2.42382, "8630": 2.45408, "8635": 2.44896, "8640": 2.43494, "8645": 2.4736, "8650": 2.42011, "8655": 2.45238, "8660": 2.45516, "8665": 2.38524, "8670": 2.40887, "8675": 2.4285, "8680": 2.44765, "8685": 2.4292, "8690": 2.41052, "8695": 2.44169, "8700": 2.43279, "8705": 2.41873, "8710": 2.42723, "8715": 2.44835, "8720": 2.47546, "8725": 2.40943, "8730": 2.3906, "8735": 2.43279, "8740": 2.42918, "8745": 2.39996, "8750": 2.43474, "8755": 2.42434, "8760": 2.40027, "8765": 2.43461, "8770": 2.40495, "8775": 2.43724, "8780": 2.42018, "8785": 2.47035, "8790": 2.41985, "8795": 2.41766, "8800": 2.41504, "8805": 2.40682, "8810": 2.41125, "8815": 2.47313, "8820": 2.45249, "8825": 2.42464, "8830": 2.38678, "8835": 2.42251, "8840": 2.39423, "8845": 2.42654, "8850": 2.43398, "8855": 2.40351, "8860": 2.42788, "8865": 2.42683, "8870": 2.43891, "8875": 2.43765, "8880": 2.41181, "8885": 2.39358, "8890": 2.44536, "8895": 2.42582, "8900": 2.41128, "8905": 2.40287, "8910": 2.40178, "8915": 2.41899, "8920": 2.43296, "8925": 2.46577, "8930": 2.4143, "8935": 2.40926, "8940": 2.38869, "8945": 2.3922, "8950": 2.41756, "8955": 2.39479, "8960": 2.43345, "8965": 2.41805, "8970": 2.40426, "8975": 2.47319, "8980": 2.43981, "8985": 2.37288, "8990": 2.4072, "8995": 2.41768, "9000": 2.45601, "9005": 2.41214, "9010": 2.37503, "9015": 2.40997, "9020": 2.3985, "9025": 2.37013, "9030": 2.39819, "9035": 2.4235, "9040": 2.42034, "9045": 2.41944, "9050": 2.39591, "9055": 2.41865, "9060": 2.41835, "9065": 2.40424, "9070": 2.44544, "9075": 2.39609, "9080": 2.43408, "9085": 2.41287, "9090": 2.41241, "9095": 2.39607, "9100": 2.4007, "9105": 2.35839, "9110": 2.46492, "9115": 2.41577, "9120": 2.405, "9125": 2.458, "9130": 2.39476, "9135": 2.44823, "9140": 2.43426, "9145": 2.43038, "9150": 2.42521, "9155": 2.37592, "9160": 2.41896, "9165": 2.42461, "9170": 2.37306, "9175": 2.41974, "9180": 2.37725, "9185": 2.43877, "9190": 2.41309, "9195": 2.38919, "9200": 2.39263, "9205": 2.45158, "9210": 2.36253, "9215": 2.46302, "9220": 2.44836, "9225": 2.38351, "9230": 2.44641, "9235": 2.39743, "9240": 2.40233, "9245": 2.43533, "9250": 2.42643, "9255": 2.42876, "9260": 2.38508, "9265": 2.43838, "9270": 2.43511, "9275": 2.39568, "9280": 2.39071, "9285": 2.42371, "9290": 2.40431, "9295": 2.38539, "9300": 2.42356, "9305": 2.40497, "9310": 2.41467, "9315": 2.40958, "9320": 2.44478, "9325": 2.37057, "9330": 2.40175, "9335": 2.36064, "9340": 2.40709, "9345": 2.41361, "9350": 2.43767, "9355": 2.47543, "9360": 2.4371, "9365": 2.38709, "9370": 2.43617, "9375": 2.43237, "9380": 2.35257, "9385": 2.40086, "9390": 2.38081, "9395": 2.3885, "9400": 2.44434, "9405": 2.41269, "9410": 2.39825, "9415": 2.43724, "9420": 2.44459, "9425": 2.43199, "9430": 2.44775, "9435": 2.41468, "9440": 2.4757, "9445": 2.3748, "9450": 2.39361, "9455": 2.40348, "9460": 2.38595, "9465": 2.37795, "9470": 2.38158, "9475": 2.36591, "9480": 2.43326, "9485": 2.38642, "9490": 2.41941, "9495": 2.38123, "9500": 2.36329, "9505": 2.42967, "9510": 2.39832, "9515": 2.42896, "9520": 2.41821, "9525": 2.39098, "9530": 2.45386, "9535": 2.40282, "9540": 2.41807, "9545": 2.37646, "9550": 2.42227, "9555": 2.38928, "9560": 2.42056, "9565": 2.40473, "9570": 2.37127, "9575": 2.41035, "9580": 2.39592, "9585": 2.42186, "9590": 2.42769, "9595": 2.44835, "9600": 2.39217, "9605": 2.38532, "9610": 2.41965, "9615": 2.41204, "9620": 2.41154, "9625": 2.44483, "9630": 2.395, "9635": 2.40098, "9640": 2.44575, "9645": 2.41172, "9650": 2.39957, "9655": 2.37414, "9660": 2.42292, "9665": 2.38785, "9670": 2.38317, "9675": 2.35775, "9680": 2.39654, "9685": 2.39503, "9690": 2.4613, "9695": 2.38031, "9700": 2.37568, "9705": 2.38353, "9710": 2.36601, "9715": 2.38749, "9720": 2.4332, "9725": 2.44039, "9730": 2.42955, "9735": 2.38794, "9740": 2.37969, "9745": 2.42437, "9750": 2.3981, "9755": 2.4077, "9760": 2.41101, "9765": 2.36724, "9770": 2.4342, "9775": 2.40115, "9780": 2.36209, "9785": 2.40063, "9790": 2.40782, "9795": 2.35982, "9800": 2.39646, "9805": 2.40576, "9810": 2.40871, "9815": 2.37765, "9820": 2.37557, "9825": 2.40604, "9830": 2.42044, "9835": 2.38417, "9840": 2.41278, "9845": 2.36206, "9850": 2.39915, "9855": 2.39447, "9860": 2.38855, "9865": 2.37969, "9870": 2.38592, "9875": 2.38177, "9880": 2.45139, "9885": 2.39245, "9890": 2.35063, "9895": 2.31931, "9900": 2.3952, "9905": 2.42474, "9910": 2.35507, "9915": 2.36324, "9920": 2.41087, "9925": 2.3985, "9930": 2.38397, "9935": 2.35059, "9940": 2.38385, "9945": 2.3797, "9950": 2.40362, "9955": 2.44626, "9960": 2.4258, "9965": 2.35484, "9970": 2.40776, "9975": 2.38445, "9980": 2.3311, "9985": 2.4051, "9990": 2.39694, "9995": 2.39526, "10000": 2.36696, "10005": 2.37191, "10010": 2.38253, "10015": 2.44412, "10020": 2.36322, "10025": 2.38777, "10030": 2.38744, "10035": 2.40886, "10040": 2.40309, "10045": 2.38187, "10050": 2.34835, "10055": 2.36929, "10060": 2.41862, "10065": 2.37479, "10070": 2.423, "10075": 2.37255, "10080": 2.36332, "10085": 2.37108, "10090": 2.34595, "10095": 2.40124, "10100": 2.32131, "10105": 2.38462, "10110": 2.41161, "10115": 2.38754, "10120": 2.35903, "10125": 2.37092, "10130": 2.36022, "10135": 2.38243, "10140": 2.41271, "10145": 2.40607, "10150": 2.37539, "10155": 2.39406, "10160": 2.36042, "10165": 2.3842, "10170": 2.42368, "10175": 2.32305, "10180": 2.39352, "10185": 2.38375, "10190": 2.44188, "10195": 2.40305, "10200": 2.39015, "10205": 2.38822, "10210": 2.36991, "10215": 2.34364, "10220": 2.41795, "10225": 2.43037, "10230": 2.35534, "10235": 2.38619, "10240": 2.37205, "10245": 2.39035, "10250": 2.38646, "10255": 2.41642, "10260": 2.33424, "10265": 2.34821, "10270": 2.35061, "10275": 2.36955, "10280": 2.44822, "10285": 2.35754, "10290": 2.38389, "10295": 2.3723, "10300": 2.36828, "10305": 2.41571, "10310": 2.3877, "10315": 2.36013, "10320": 2.36624, "10325": 2.36267, "10330": 2.41312, "10335": 2.36153, "10340": 2.41836, "10345": 2.36889, "10350": 2.35672, "10355": 2.39532, "10360": 2.37312, "10365": 2.36074, "10370": 2.33915, "10375": 2.35562, "10380": 2.41668, "10385": 2.40583, "10390": 2.38095, "10395": 2.3576, "10400": 2.37548, "10405": 2.34822, "10410": 2.3381, "10415": 2.41431, "10420": 2.38041, "10425": 2.32562, "10430": 2.36014, "10435": 2.36947, "10440": 2.37091, "10445": 2.3601, "10450": 2.36009, "10455": 2.37843, "10460": 2.38055, "10465": 2.30313, "10470": 2.35764, "10475": 2.37919, "10480": 2.36187, "10485": 2.35885, "10490": 2.4119, "10495": 2.36592, "10500": 2.36273, "10505": 2.36961, "10510": 2.3816, "10515": 2.37346, "10520": 2.40183, "10525": 2.389, "10530": 2.39058, "10535": 2.35539, "10540": 2.40525, "10545": 2.35703, "10550": 2.37657, "10555": 2.35803, "10560": 2.3405, "10565": 2.37135, "10570": 2.37477, "10575": 2.35743, "10580": 2.37782, "10585": 2.36809, "10590": 2.37791, "10595": 2.37689, "10600": 2.33089, "10605": 2.3717, "10610": 2.36437, "10615": 2.36241, "10620": 2.34737, "10625": 2.41733, "10630": 2.36755, "10635": 2.32224, "10640": 2.36202, "10645": 2.42137, "10650": 2.3603, "10655": 2.31025, "10660": 2.34676, "10665": 2.39855, "10670": 2.31403, "10675": 2.41662, "10680": 2.35394, "10685": 2.28619, "10690": 2.38317, "10695": 2.32897, "10700": 2.38276, "10705": 2.38404, "10710": 2.34229, "10715": 2.38181, "10720": 2.32585, "10725": 2.35132, "10730": 2.34862, "10735": 2.3518, "10740": 2.31777, "10745": 2.33753, "10750": 2.33397, "10755": 2.40347, "10760": 2.36377, "10765": 2.33563, "10770": 2.36832, "10775": 2.38431, "10780": 2.36873, "10785": 2.39064, "10790": 2.34523, "10795": 2.38497, "10800": 2.32139, "10805": 2.39484, "10810": 2.37409, "10815": 2.35325, "10820": 2.34132, "10825": 2.36976, "10830": 2.33704, "10835": 2.347, "10840": 2.32835, "10845": 2.38518, "10850": 2.33101, "10855": 2.3619, "10860": 2.33071, "10865": 2.32059, "10870": 2.32243, "10875": 2.30352, "10880": 2.39351, "10885": 2.4031, "10890": 2.36036, "10895": 2.37107, "10900": 2.33074, "10905": 2.3132, "10910": 2.40682, "10915": 2.37125, "10920": 2.37395, "10925": 2.36178, "10930": 2.31785, "10935": 2.35984, "10940": 2.35325, "10945": 2.34609, "10950": 2.36031, "10955": 2.36317, "10960": 2.31043, "10965": 2.36158, "10970": 2.35657, "10975": 2.40855, "10980": 2.37438, "10985": 2.34339, "10990": 2.39794, "10995": 2.36358, "11000": 2.3367, "11005": 2.36071, "11010": 2.34173, "11015": 2.32617, "11020": 2.33259, "11025": 2.36576, "11030": 2.33866, "11035": 2.31271, "11040": 2.31651, "11045": 2.31822, "11050": 2.31809, "11055": 2.28913, "11060": 2.33868, "11065": 2.30809, "11070": 2.39307, "11075": 2.31783, "11080": 2.35458, "11085": 2.3363, "11090": 2.34633, "11095": 2.3718, "11100": 2.32898, "11105": 2.31552, "11110": 2.36164, "11115": 2.37242, "11120": 2.38066, "11125": 2.31475, "11130": 2.3499, "11135": 2.33245, "11140": 2.37221, "11145": 2.34935, "11150": 2.39376, "11155": 2.33996, "11160": 2.3647, "11165": 2.36272, "11170": 2.34016, "11175": 2.33391, "11180": 2.3723, "11185": 2.31334, "11190": 2.27753, "11195": 2.32826, "11200": 2.34711, "11205": 2.36211, "11210": 2.33021, "11215": 2.31819, "11220": 2.34183, "11225": 2.37052, "11230": 2.36505, "11235": 2.31953, "11240": 2.34095, "11245": 2.35575, "11250": 2.33117, "11255": 2.33525, "11260": 2.35568, "11265": 2.38813, "11270": 2.28869, "11275": 2.31457, "11280": 2.36943, "11285": 2.28996, "11290": 2.34586, "11295": 2.36394, "11300": 2.37973, "11305": 2.3352, "11310": 2.33011, "11315": 2.29941, "11320": 2.30556, "11325": 2.31524, "11330": 2.35389, "11335": 2.33811, "11340": 2.30715, "11345": 2.31278, "11350": 2.29619, "11355": 2.31948, "11360": 2.35099, "11365": 2.29275, "11370": 2.35148, "11375": 2.32687, "11380": 2.33972, "11385": 2.34686, "11390": 2.33406, "11395": 2.28592, "11400": 2.30948, "11405": 2.35421, "11410": 2.35465, "11415": 2.38449, "11420": 2.35035, "11425": 2.30772, "11430": 2.36681, "11435": 2.36061, "11440": 2.34732, "11445": 2.36229, "11450": 2.32147, "11455": 2.30472, "11460": 2.35122, "11465": 2.34332, "11470": 2.37307, "11475": 2.31161, "11480": 2.32382, "11485": 2.30846, "11490": 2.344, "11495": 2.40648, "11500": 2.33935, "11505": 2.34918, "11510": 2.36196, "11515": 2.32145, "11520": 2.3043, "11525": 2.36115, "11530": 2.31264, "11535": 2.3215, "11540": 2.34504, "11545": 2.34375, "11550": 2.3636, "11555": 2.32437, "11560": 2.34788, "11565": 2.33894, "11570": 2.34963, "11575": 2.29524, "11580": 2.32749, "11585": 2.35175, "11590": 2.3618, "11595": 2.33486, "11600": 2.3573, "11605": 2.32205, "11610": 2.35986, "11615": 2.36006, "11620": 2.29629, "11625": 2.27613, "11630": 2.32588, "11635": 2.34225, "11640": 2.30389, "11645": 2.30695, "11650": 2.32765, "11655": 2.35115, "11660": 2.33559, "11665": 2.32862, "11670": 2.29989, "11675": 2.29611, "11680": 2.3245, "11685": 2.335, "11690": 2.34341, "11695": 2.31618, "11700": 2.32498, "11705": 2.29971, "11710": 2.34418, "11715": 2.31355, "11720": 2.29829, "11725": 2.33913, "11730": 2.30649, "11735": 2.32753, "11740": 2.27272, "11745": 2.31682, "11750": 2.32586, "11755": 2.3504, "11760": 2.31072, "11765": 2.34059, "11770": 2.27741, "11775": 2.32428, "11780": 2.25457, "11785": 2.29632, "11790": 2.32161, "11795": 2.32148, "11800": 2.33441, "11805": 2.30224, "11810": 2.30358, "11815": 2.32855, "11820": 2.31768, "11825": 2.35799, "11830": 2.3157, "11835": 2.33627, "11840": 2.33933, "11845": 2.31625, "11850": 2.30279, "11855": 2.31274, "11860": 2.34249, "11865": 2.35695, "11870": 2.37806, "11875": 2.27972, "11880": 2.29164, "11885": 2.33582, "11890": 2.29146, "11895": 2.28855, "11900": 2.33216, "11905": 2.32131, "11910": 2.27663, "11915": 2.31183, "11920": 2.33333, "11925": 2.30178, "11930": 2.30522, "11935": 2.31612, "11940": 2.31579, "11945": 2.34065, "11950": 2.29853, "11955": 2.31246, "11960": 2.33665, "11965": 2.29384, "11970": 2.28198, "11975": 2.33532, "11980": 2.30525, "11985": 2.27675, "11990": 2.30305, "11995": 2.32936, "12000": 2.3243, "12005": 2.32389, "12010": 2.2876, "12015": 2.30885, "12020": 2.32609, "12025": 2.3339, "12030": 2.31036, "12035": 2.33548, "12040": 2.31504, "12045": 2.30961, "12050": 2.30776, "12055": 2.33205, "12060": 2.29709, "12065": 2.32997, "12070": 2.30215, "12075": 2.27525, "12080": 2.35012, "12085": 2.33805, "12090": 2.33153, "12095": 2.28064, "12100": 2.31437, "12105": 2.3082, "12110": 2.3286, "12115": 2.30272, "12120": 2.30476, "12125": 2.29299, "12130": 2.30209, "12135": 2.32731, "12140": 2.29487, "12145": 2.25587, "12150": 2.25942, "12155": 2.34139, "12160": 2.3564, "12165": 2.31895, "12170": 2.33202, "12175": 2.34176, "12180": 2.32919, "12185": 2.3392, "12190": 2.33408, "12195": 2.29739, "12200": 2.29971, "12205": 2.32199, "12210": 2.35582, "12215": 2.30236, "12220": 2.298, "12225": 2.24251, "12230": 2.33254, "12235": 2.33789, "12240": 2.32363, "12245": 2.28607, "12250": 2.27217, "12255": 2.33627, "12260": 2.31277, "12265": 2.34134, "12270": 2.31196, "12275": 2.31345, "12280": 2.31816, "12285": 2.28538, "12290": 2.30977, "12295": 2.26494, "12300": 2.32846, "12305": 2.26819, "12310": 2.2931, "12315": 2.38537, "12320": 2.3108, "12325": 2.32611, "12330": 2.30252, "12335": 2.32201, "12340": 2.33972, "12345": 2.36736, "12350": 2.34235, "12355": 2.3039, "12360": 2.31292, "12365": 2.32841, "12370": 2.29028, "12375": 2.29855, "12380": 2.29177, "12385": 2.28958, "12390": 2.24749, "12395": 2.30262, "12400": 2.29816, "12405": 2.30947, "12410": 2.30218, "12415": 2.28121, "12420": 2.31576, "12425": 2.29993, "12430": 2.31452, "12435": 2.29916, "12440": 2.33133, "12445": 2.31918, "12450": 2.30543, "12455": 2.24083, "12460": 2.33478, "12465": 2.36325, "12470": 2.27585, "12475": 2.27386, "12480": 2.29137, "12485": 2.30584, "12490": 2.33022, "12495": 2.26913, "12500": 2.3199, "12505": 2.33541, "12510": 2.3559, "12515": 2.27055, "12520": 2.31974, "12525": 2.28748, "12530": 2.32122, "12535": 2.27213, "12540": 2.28628, "12545": 2.29073, "12550": 2.31647, "12555": 2.32346, "12560": 2.30163, "12565": 2.33608, "12570": 2.2774, "12575": 2.30021, "12580": 2.31011, "12585": 2.29127, "12590": 2.33476, "12595": 2.32359, "12600": 2.28129, "12605": 2.31883, "12610": 2.36317, "12615": 2.30591, "12620": 2.33262, "12625": 2.33055, "12630": 2.2973, "12635": 2.33522, "12640": 2.29483, "12645": 2.27955, "12650": 2.32509, "12655": 2.2649, "12660": 2.34164, "12665": 2.31762, "12670": 2.30944, "12675": 2.31773, "12680": 2.27484, "12685": 2.36538, "12690": 2.30258, "12695": 2.3316, "12700": 2.29242, "12705": 2.3066, "12710": 2.30648, "12715": 2.28622, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "num-zeros": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 956235968.0, "5": 967337088.0, "10": 971385344.0, "15": 946451584.0, "20": 961454144.0, "25": 1083495040.0, "30": 1210886784.0, "35": 1297400576.0, "40": 1271467008.0, "45": 1175317120.0, "50": 1127123712.0, "55": 1082697856.0, "60": 1044925888.0, "65": 1035741120.0, "70": 1015001472.0, "75": 1009854336.0, "80": 1028703936.0, "85": 1019290368.0, "90": 990809344.0, "95": 964848896.0, "100": 973471680.0, "105": 983119424.0, "110": 977565504.0, "115": 977218048.0, "120": 961220416.0, "125": 943671872.0, "130": 976736960.0, "135": 964989632.0, "140": 963116864.0, "145": 976472832.0, "150": 921307328.0, "155": 968129664.0, "160": 956322560.0, "165": 959833728.0, "170": 974347456.0, "175": 949087808.0, "180": 946701504.0, "185": 972012288.0, "190": 969080384.0, "195": 985148288.0, "200": 945763584.0, "205": 958347008.0, "210": 979438848.0, "215": 967497024.0, "220": 956425216.0, "225": 962393664.0, "230": 948174848.0, "235": 965217152.0, "240": 966068608.0, "245": 969152000.0, "250": 974439936.0, "255": 925058944.0, "260": 965631168.0, "265": 970669376.0, "270": 959136512.0, "275": 953999296.0, "280": 963425664.0, "285": 945783936.0, "290": 974119040.0, "295": 966704000.0, "300": 967153024.0, "305": 964512128.0, "310": 940359680.0, "315": 967399488.0, "320": 969007936.0, "325": 980555456.0, "330": 972089216.0, "335": 946860544.0, "340": 966592384.0, "345": 973019520.0, "350": 973912320.0, "355": 963257280.0, "360": 948348032.0, "365": 964815680.0, "370": 962952960.0, "375": 958450432.0, "380": 947149312.0, "385": 955992704.0, "390": 945401920.0, "395": 970418112.0, "400": 979775872.0, "405": 968349696.0, "410": 970067456.0, "415": 953155072.0, "420": 943562368.0, "425": 954772160.0, "430": 962659264.0, "435": 977076736.0, "440": 954808960.0, "445": 971890816.0, "450": 963511680.0, "455": 973132288.0, "460": 983708288.0, "465": 945281152.0, "470": 942057856.0, "475": 967007104.0, "480": 966098304.0, "485": 976406464.0, "490": 962540544.0, "495": 945459840.0, "500": 964454784.0, "505": 986003136.0, "510": 965679232.0, "515": 943408000.0, "520": 945020992.0, "525": 971262848.0, "530": 971888448.0, "535": 979136128.0, "540": 969531136.0, "545": 954123392.0, "550": 951268096.0, "555": 987224704.0, "560": 960424256.0, "565": 966614016.0, "570": 975728384.0, "575": 927223872.0, "580": 970694784.0, "585": 961176832.0, "590": 972965888.0, "595": 963684352.0, "600": 937077568.0, "605": 951474304.0, "610": 963361024.0, "615": 970007808.0, "620": 976466176.0, "625": 949580224.0, "630": 954443904.0, "635": 986043648.0, "640": 980977216.0, "645": 955008832.0, "650": 958547584.0, "655": 951651520.0, "660": 961042688.0, "665": 967554944.0, "670": 962511744.0, "675": 968338048.0, "680": 965622016.0, "685": 962869120.0, "690": 961921152.0, "695": 954770816.0, "700": 970338624.0, "705": 945517632.0, "710": 943884160.0, "715": 973356416.0, "720": 968367744.0, "725": 978491840.0, "730": 952192896.0, "735": 948810368.0, "740": 955631744.0, "745": 975870976.0, "750": 981237376.0, "755": 962160384.0, "760": 951962496.0, "765": 967347840.0, "770": 976146048.0, "775": 970547840.0, "780": 977538112.0, "785": 931529792.0, "790": 960443264.0, "795": 964581248.0, "800": 967022144.0, "805": 962318720.0, "810": 940976448.0, "815": 949040000.0, "820": 953185856.0, "825": 954501760.0, "830": 976443776.0, "835": 956075520.0, "840": 948406912.0, "845": 965154752.0, "850": 966026688.0, "855": 960904960.0, "860": 976025088.0, "865": 938160704.0, "870": 966414976.0, "875": 972314304.0, "880": 963119744.0, "885": 967742720.0, "890": 949969728.0, "895": 960017792.0, "900": 974230272.0, "905": 963968064.0, "910": 958436608.0, "915": 956353920.0, "920": 943970368.0, "925": 960829184.0, "930": 978849664.0, "935": 971071104.0, "940": 960907136.0, "945": 945064832.0, "950": 957427008.0, "955": 979039232.0, "960": 983589376.0, "965": 966165696.0, "970": 951225600.0, "975": 961578240.0, "980": 968070528.0, "985": 968992256.0, "990": 984392960.0, "995": 953292032.0, "1000": 934783040.0, "1005": 960146944.0, "1010": 971539968.0, "1015": 985186560.0, "1020": 962781312.0, "1025": 935010304.0, "1030": 974676416.0, "1035": 964993152.0, "1040": 980464704.0, "1045": 960830464.0, "1050": 955200064.0, "1055": 957782272.0, "1060": 967746944.0, "1065": 967115264.0, "1070": 966600192.0, "1075": 950056448.0, "1080": 954506240.0, "1085": 967250688.0, "1090": 977131776.0, "1095": 961235584.0, "1100": 979611776.0, "1105": 953366144.0, "1110": 965955584.0, "1115": 966984832.0, "1120": 970350528.0, "1125": 965706496.0, "1130": 954942208.0, "1135": 965843328.0, "1140": 965175360.0, "1145": 970988544.0, "1150": 955558144.0, "1155": 930579520.0, "1160": 957773824.0, "1165": 978125312.0, "1170": 974302848.0, "1175": 973057600.0, "1180": 973081216.0, "1185": 947342912.0, "1190": 964792960.0, "1195": 953134464.0, "1200": 972844672.0, "1205": 988479168.0, "1210": 931123264.0, "1215": 968647744.0, "1220": 969164992.0, "1225": 975950272.0, "1230": 967333824.0, "1235": 943446336.0, "1240": 955852416.0, "1245": 981507264.0, "1250": 966111104.0, "1255": 973672960.0, "1260": 946495744.0, "1265": 963997568.0, "1270": 960487744.0, "1275": 973616640.0, "1280": 961114880.0, "1285": 957581440.0, "1290": 952528640.0, "1295": 971612608.0, "1300": 968863744.0, "1305": 963737728.0, "1310": 963337664.0, "1315": 943555840.0, "1320": 966307008.0, "1325": 989786496.0, "1330": 969506304.0, "1335": 972303232.0, "1340": 972271104.0, "1345": 960659072.0, "1350": 968639552.0, "1355": 955854208.0, "1360": 971819456.0, "1365": 960387456.0, "1370": 948791360.0, "1375": 973533696.0, "1380": 953470208.0, "1385": 969144704.0, "1390": 975719424.0, "1395": 931675264.0, "1400": 945856192.0, "1405": 976752768.0, "1410": 974512064.0, "1415": 967568832.0, "1420": 966747200.0, "1425": 937379200.0, "1430": 973916160.0, "1435": 978333952.0, "1440": 964180352.0, "1445": 958059264.0, "1450": 946148608.0, "1455": 983924608.0, "1460": 968649984.0, "1465": 948747072.0, "1470": 984244864.0, "1475": 943903360.0, "1480": 963976576.0, "1485": 957349120.0, "1490": 961261568.0, "1495": 980539520.0, "1500": 958334144.0, "1505": 942866752.0, "1510": 984180608.0, "1515": 959093888.0, "1520": 959103936.0, "1525": 952784896.0, "1530": 957744896.0, "1535": 949431168.0, "1540": 971087680.0, "1545": 963135040.0, "1550": 978667264.0, "1555": 952316672.0, "1560": 980089536.0, "1565": 967317376.0, "1570": 973843712.0, "1575": 975494784.0, "1580": 941864320.0, "1585": 970030528.0, "1590": 983822592.0, "1595": 948635392.0, "1600": 967446016.0, "1605": 952449280.0, "1610": 969617664.0, "1615": 983150208.0, "1620": 968020608.0, "1625": 970718336.0, "1630": 962888320.0, "1635": 942312576.0, "1640": 981613056.0, "1645": 973977984.0, "1650": 974185728.0, "1655": 967264192.0, "1660": 940689024.0, "1665": 961702848.0, "1670": 962902848.0, "1675": 971280896.0, "1680": 980879872.0, "1685": 944418816.0, "1690": 964687872.0, "1695": 965643264.0, "1700": 966339584.0, "1705": 985202688.0, "1710": 978353024.0, "1715": 943211136.0, "1720": 977088448.0, "1725": 965872448.0, "1730": 968966784.0, "1735": 965089536.0, "1740": 949714624.0, "1745": 970010176.0, "1750": 959685056.0, "1755": 960087040.0, "1760": 966380800.0, "1765": 951817088.0, "1770": 954666944.0, "1775": 973752064.0, "1780": 970536448.0, "1785": 968825728.0, "1790": 950235648.0, "1795": 945133184.0, "1800": 984667712.0, "1805": 987163072.0, "1810": 977768000.0, "1815": 948006400.0, "1820": 949209344.0, "1825": 978852352.0, "1830": 966361856.0, "1835": 964132864.0, "1840": 972318784.0, "1845": 935415808.0, "1850": 952497536.0, "1855": 980050944.0, "1860": 975868288.0, "1865": 958966272.0, "1870": 958948864.0, "1875": 932594944.0, "1880": 973575936.0, "1885": 978847744.0, "1890": 971359168.0, "1895": 959211776.0, "1900": 947394304.0, "1905": 981827712.0, "1910": 969127936.0, "1915": 970041728.0, "1920": 975598080.0, "1925": 960496640.0, "1930": 977926144.0, "1935": 963251456.0, "1940": 952460800.0, "1945": 981337920.0, "1950": 939171584.0, "1955": 960606208.0, "1960": 970031616.0, "1965": 981176768.0, "1970": 962045312.0, "1975": 952824064.0, "1980": 936849920.0, "1985": 975939456.0, "1990": 965967936.0, "1995": 962612608.0, "2000": 960553984.0, "2005": 954497792.0, "2010": 975579392.0, "2015": 991802880.0, "2020": 975435840.0, "2025": 974304768.0, "2030": 952083776.0, "2035": 967850432.0, "2040": 987460096.0, "2045": 976479616.0, "2050": 984702208.0, "2055": 942837248.0, "2060": 942594688.0, "2065": 966210176.0, "2070": 969622912.0, "2075": 980554880.0, "2080": 977600384.0, "2085": 939640128.0, "2090": 969874880.0, "2095": 961277312.0, "2100": 976718976.0, "2105": 972537856.0, "2110": 959904000.0, "2115": 956877184.0, "2120": 977480320.0, "2125": 962566272.0, "2130": 979618624.0, "2135": 950537280.0, "2140": 946995456.0, "2145": 962275584.0, "2150": 973404032.0, "2155": 972689216.0, "2160": 970315008.0, "2165": 948644224.0, "2170": 961545088.0, "2175": 969376640.0, "2180": 969329408.0, "2185": 947447424.0, "2190": 940481024.0, "2195": 986085888.0, "2200": 961860352.0, "2205": 978924032.0, "2210": 964102656.0, "2215": 963501952.0, "2220": 951311104.0, "2225": 969316672.0, "2230": 976331136.0, "2235": 974024704.0, "2240": 975494208.0, "2245": 960232000.0, "2250": 967640384.0, "2255": 969132672.0, "2260": 975065216.0, "2265": 968258304.0, "2270": 951745664.0, "2275": 962768640.0, "2280": 969640064.0, "2285": 971693632.0, "2290": 962890944.0, "2295": 931408256.0, "2300": 959906688.0, "2305": 970426496.0, "2310": 967444800.0, "2315": 970906880.0, "2320": 975589248.0, "2325": 938586496.0, "2330": 988435968.0, "2335": 977489152.0, "2340": 964596224.0, "2345": 964167680.0, "2350": 947555456.0, "2355": 977029504.0, "2360": 966898688.0, "2365": 977293056.0, "2370": 965073024.0, "2375": 953966016.0, "2380": 962917184.0, "2385": 967195264.0, "2390": 963078016.0, "2395": 974465536.0, "2400": 958411584.0, "2405": 968120896.0, "2410": 951585920.0, "2415": 965904960.0, "2420": 966518208.0, "2425": 959045248.0, "2430": 956686080.0, "2435": 961390080.0, "2440": 959755648.0, "2445": 970890176.0, "2450": 961998976.0, "2455": 922721024.0, "2460": 951954560.0, "2465": 955732224.0, "2470": 972571904.0, "2475": 973812736.0, "2480": 943896704.0, "2485": 944186560.0, "2490": 972411968.0, "2495": 974452672.0, "2500": 973908096.0, "2505": 958492352.0, "2510": 939510144.0, "2515": 979553408.0, "2520": 970473600.0, "2525": 964389504.0, "2530": 955798848.0, "2535": 936598656.0, "2540": 969027968.0, "2545": 970385088.0, "2550": 969458560.0, "2555": 969437056.0, "2560": 964977664.0, "2565": 959764544.0, "2570": 985176320.0, "2575": 957425664.0, "2580": 967424768.0, "2585": 966023424.0, "2590": 956355072.0, "2595": 981829760.0, "2600": 959532160.0, "2605": 963000960.0, "2610": 965972608.0, "2615": 951925504.0, "2620": 971242688.0, "2625": 976457408.0, "2630": 974411584.0, "2635": 948070912.0, "2640": 948137728.0, "2645": 963038848.0, "2650": 953984064.0, "2655": 977112064.0, "2660": 949623424.0, "2665": 953930112.0, "2670": 959063552.0, "2675": 979276736.0, "2680": 961397312.0, "2685": 970702592.0, "2690": 965221952.0, "2695": 943553664.0, "2700": 969425280.0, "2705": 978961536.0, "2710": 971812224.0, "2715": 990814976.0, "2720": 942649536.0, "2725": 967955904.0, "2730": 955468160.0, "2735": 970675840.0, "2740": 977921792.0, "2745": 932281024.0, "2750": 947857152.0, "2755": 956317440.0, "2760": 981697344.0, "2765": 966111488.0, "2770": 948915456.0, "2775": 935830656.0, "2780": 964776512.0, "2785": 969569152.0, "2790": 974273664.0, "2795": 966883712.0, "2800": 944388224.0, "2805": 964353152.0, "2810": 969609344.0, "2815": 975844352.0, "2820": 963087232.0, "2825": 937629952.0, "2830": 956742464.0, "2835": 986322752.0, "2840": 961759360.0, "2845": 967508416.0, "2850": 951716480.0, "2855": 962092736.0, "2860": 954241920.0, "2865": 955882368.0, "2870": 944664064.0, "2875": 974664000.0, "2880": 968201984.0, "2885": 981081664.0, "2890": 953455872.0, "2895": 957179008.0, "2900": 964989824.0, "2905": 931709056.0, "2910": 955731584.0, "2915": 979476608.0, "2920": 970491904.0, "2925": 964976000.0, "2930": 964048128.0, "2935": 940141696.0, "2940": 964911104.0, "2945": 989150336.0, "2950": 965209728.0, "2955": 965104768.0, "2960": 933162112.0, "2965": 968794816.0, "2970": 973035264.0, "2975": 958093440.0, "2980": 964497344.0, "2985": 937268864.0, "2990": 951255424.0, "2995": 978316288.0, "3000": 969275776.0, "3005": 974687936.0, "3010": 950235520.0, "3015": 943841152.0, "3020": 958442752.0, "3025": 975186112.0, "3030": 965018240.0, "3035": 963454336.0, "3040": 952134656.0, "3045": 989793408.0, "3050": 965545216.0, "3055": 982521216.0, "3060": 971227520.0, "3065": 943917312.0, "3070": 978408576.0, "3075": 975206208.0, "3080": 960992768.0, "3085": 962353280.0, "3090": 945954304.0, "3095": 938116096.0, "3100": 972929856.0, "3105": 961990400.0, "3110": 970657024.0, "3115": 963390336.0, "3120": 947117312.0, "3125": 972721408.0, "3130": 952974080.0, "3135": 966042624.0, "3140": 968488512.0, "3145": 937851392.0, "3150": 975010176.0, "3155": 976815360.0, "3160": 969628032.0, "3165": 982195584.0, "3170": 937961088.0, "3175": 953825024.0, "3180": 983810048.0, "3185": 965169920.0, "3190": 968483200.0, "3195": 950932864.0, "3200": 945102400.0, "3205": 959863552.0, "3210": 957487488.0, "3215": 958020352.0, "3220": 968129408.0, "3225": 935614592.0, "3230": 962590080.0, "3235": 975776960.0, "3240": 962621952.0, "3245": 981274880.0, "3250": 943260608.0, "3255": 954600064.0, "3260": 980362752.0, "3265": 963620416.0, "3270": 965164928.0, "3275": 959731712.0, "3280": 967046400.0, "3285": 982477824.0, "3290": 947690368.0, "3295": 966423168.0, "3300": 959165952.0, "3305": 949131776.0, "3310": 979510592.0, "3315": 964283520.0, "3320": 969208960.0, "3325": 956193856.0, "3330": 941167872.0, "3335": 964986176.0, "3340": 956913792.0, "3345": 972511872.0, "3350": 964575744.0, "3355": 943359232.0, "3360": 970037504.0, "3365": 969467904.0, "3370": 954763392.0, "3375": 958676096.0, "3380": 971474944.0, "3385": 947985280.0, "3390": 965793152.0, "3395": 978403520.0, "3400": 978138752.0, "3405": 976737152.0, "3410": 924193216.0, "3415": 955437440.0, "3420": 971832960.0, "3425": 977168768.0, "3430": 973837696.0, "3435": 936085504.0, "3440": 970517120.0, "3445": 957314816.0, "3450": 959853440.0, "3455": 963865600.0, "3460": 967895040.0, "3465": 931319360.0, "3470": 952348352.0, "3475": 973716096.0, "3480": 959751168.0, "3485": 979959296.0, "3490": 944684224.0, "3495": 953917952.0, "3500": 969330176.0, "3505": 964360704.0, "3510": 971235456.0, "3515": 955957888.0, "3520": 958747264.0, "3525": 971915392.0, "3530": 964137984.0, "3535": 983204736.0, "3540": 937501952.0, "3545": 944743040.0, "3550": 984463872.0, "3555": 978071424.0, "3560": 974386048.0, "3565": 968813568.0, "3570": 946708224.0, "3575": 976105344.0, "3580": 977506752.0, "3585": 954580608.0, "3590": 956439552.0, "3595": 951455232.0, "3600": 989009856.0, "3605": 962020160.0, "3610": 965066816.0, "3615": 974649280.0, "3620": 954900992.0, "3625": 939529280.0, "3630": 990161536.0, "3635": 971447488.0, "3640": 976038912.0, "3645": 961502208.0, "3650": 945816192.0, "3655": 965782720.0, "3660": 976207936.0, "3665": 964032640.0, "3670": 977444352.0, "3675": 943488640.0, "3680": 958197888.0, "3685": 964298624.0, "3690": 982118400.0, "3695": 963126016.0, "3700": 950571520.0, "3705": 947357440.0, "3710": 982380160.0, "3715": 972682368.0, "3720": 976139008.0, "3725": 964051584.0, "3730": 948861888.0, "3735": 967079296.0, "3740": 960960896.0, "3745": 969322432.0, "3750": 963935872.0, "3755": 953421824.0, "3760": 976654464.0, "3765": 979841536.0, "3770": 972372736.0, "3775": 972386432.0, "3780": 952599040.0, "3785": 960225664.0, "3790": 985586240.0, "3795": 969183360.0, "3800": 957865856.0, "3805": 972396288.0, "3810": 954517824.0, "3815": 974567808.0, "3820": 963020416.0, "3825": 962059712.0, "3830": 969383808.0, "3835": 934745984.0, "3840": 971270528.0, "3845": 986838144.0, "3850": 968850240.0, "3855": 965260864.0, "3860": 948069888.0, "3865": 975062528.0, "3870": 985111936.0, "3875": 983055232.0, "3880": 963640448.0, "3885": 953024000.0, "3890": 960298240.0, "3895": 960592000.0, "3900": 984901760.0, "3905": 976210880.0, "3910": 987348992.0, "3915": 946005888.0, "3920": 974879616.0, "3925": 961236352.0, "3930": 976790336.0, "3935": 978926464.0, "3940": 950304448.0, "3945": 960273664.0, "3950": 974172928.0, "3955": 972980352.0, "3960": 974078592.0, "3965": 950874176.0, "3970": 980656000.0, "3975": 960726272.0, "3980": 977520000.0, "3985": 962949120.0, "3990": 972755776.0, "3995": 953701632.0, "4000": 974988416.0, "4005": 971658112.0, "4010": 978397184.0, "4015": 971468288.0, "4020": 950297600.0, "4025": 968408576.0, "4030": 997923840.0, "4035": 978537152.0, "4040": 959788800.0, "4045": 939639808.0, "4050": 944694400.0, "4055": 980948672.0, "4060": 977659136.0, "4065": 975707200.0, "4070": 942140416.0, "4075": 945753088.0, "4080": 988741376.0, "4085": 962081024.0, "4090": 983332928.0, "4095": 986920832.0, "4100": 957176512.0, "4105": 954074624.0, "4110": 966463104.0, "4115": 976016960.0, "4120": 983506944.0, "4125": 960017024.0, "4130": 967253824.0, "4135": 971444032.0, "4140": 963146944.0, "4145": 956162816.0, "4150": 960282624.0, "4155": 946204480.0, "4160": 968402560.0, "4165": 970302720.0, "4170": 971934848.0, "4175": 955849792.0, "4180": 940962176.0, "4185": 968281088.0, "4190": 967966848.0, "4195": 989187008.0, "4200": 962630144.0, "4205": 960569792.0, "4210": 971813760.0, "4215": 974121088.0, "4220": 981120704.0, "4225": 975209024.0, "4230": 952702400.0, "4235": 958494080.0, "4240": 966811392.0, "4245": 961732736.0, "4250": 965857920.0, "4255": 958267136.0, "4260": 949557632.0, "4265": 964149376.0, "4270": 978395840.0, "4275": 975422464.0, "4280": 962672256.0, "4285": 951463296.0, "4290": 980017408.0, "4295": 968855296.0, "4300": 958286336.0, "4305": 966765632.0, "4310": 939614592.0, "4315": 949429696.0, "4320": 984536512.0, "4325": 982573056.0, "4330": 974772096.0, "4335": 949447040.0, "4340": 959503680.0, "4345": 956644096.0, "4350": 979935744.0, "4355": 968870784.0, "4360": 966338816.0, "4365": 941246208.0, "4370": 969508288.0, "4375": 973008000.0, "4380": 966160768.0, "4385": 972036992.0, "4390": 954165440.0, "4395": 951869056.0, "4400": 973665280.0, "4405": 972466432.0, "4410": 967921280.0, "4415": 958960768.0, "4420": 960791744.0, "4425": 976440576.0, "4430": 965930368.0, "4435": 975914240.0, "4440": 962343424.0, "4445": 954825536.0, "4450": 978332672.0, "4455": 960256384.0, "4460": 968593856.0, "4465": 968818944.0, "4470": 944152448.0, "4475": 952010240.0, "4480": 978837312.0, "4485": 968292032.0, "4490": 957038208.0, "4495": 938951296.0, "4500": 953253376.0, "4505": 977203456.0, "4510": 978762368.0, "4515": 962505600.0, "4520": 958837632.0, "4525": 958139520.0, "4530": 964429824.0, "4535": 976604928.0, "4540": 976823488.0, "4545": 970194560.0, "4550": 953223488.0, "4555": 959566720.0, "4560": 972485376.0, "4565": 973467520.0, "4570": 978886400.0, "4575": 957835072.0, "4580": 963191936.0, "4585": 957370752.0, "4590": 986568064.0, "4595": 960176896.0, "4600": 952267392.0, "4605": 959311296.0, "4610": 963711616.0, "4615": 957987520.0, "4620": 960268672.0, "4625": 973823488.0, "4630": 944495104.0, "4635": 977037952.0, "4640": 960311040.0, "4645": 982017600.0, "4650": 962374528.0, "4655": 939483264.0, "4660": 964001792.0, "4665": 962602752.0, "4670": 976696704.0, "4675": 963491392.0, "4680": 957513600.0, "4685": 949686656.0, "4690": 956986752.0, "4695": 969964032.0, "4700": 961238272.0, "4705": 970782208.0, "4710": 934518272.0, "4715": 970476096.0, "4720": 966337728.0, "4725": 980328128.0, "4730": 965875840.0, "4735": 937908736.0, "4740": 960161536.0, "4745": 976046720.0, "4750": 967984512.0, "4755": 984947328.0, "4760": 959111680.0, "4765": 955199488.0, "4770": 958583872.0, "4775": 991080576.0, "4780": 976794752.0, "4785": 967571200.0, "4790": 943707328.0, "4795": 955813760.0, "4800": 967686272.0, "4805": 976542592.0, "4810": 965124864.0, "4815": 957983104.0, "4820": 973980928.0, "4825": 961481728.0, "4830": 962651264.0, "4835": 972545408.0, "4840": 948877888.0, "4845": 965765760.0, "4850": 960305536.0, "4855": 964131584.0, "4860": 963063808.0, "4865": 967482752.0, "4870": 957158656.0, "4875": 983550592.0, "4880": 957085056.0, "4885": 977004544.0, "4890": 959764928.0, "4895": 942128512.0, "4900": 973663296.0, "4905": 975227264.0, "4910": 969246080.0, "4915": 970115456.0, "4920": 941182656.0, "4925": 954773120.0, "4930": 977023104.0, "4935": 963751424.0, "4940": 972623808.0, "4945": 960032128.0, "4950": 940732480.0, "4955": 967988096.0, "4960": 976728320.0, "4965": 960948608.0, "4970": 958567680.0, "4975": 933676800.0, "4980": 960813184.0, "4985": 962951104.0, "4990": 963516288.0, "4995": 986247936.0, "5000": 940654592.0, "5005": 968864192.0, "5010": 970259840.0, "5015": 965172608.0, "5020": 966617280.0, "5025": 949312896.0, "5030": 953465344.0, "5035": 967256000.0, "5040": 955628928.0, "5045": 969080000.0, "5050": 953309056.0, "5055": 954762624.0, "5060": 962993024.0, "5065": 952085120.0, "5070": 973511552.0, "5075": 978483328.0, "5080": 942842112.0, "5085": 965766400.0, "5090": 972783680.0, "5095": 964338176.0, "5100": 958294016.0, "5105": 965281664.0, "5110": 950365824.0, "5115": 972247808.0, "5120": 960340608.0, "5125": 969731968.0, "5130": 938710656.0, "5135": 943623424.0, "5140": 969840640.0, "5145": 968529344.0, "5150": 970528000.0, "5155": 972525248.0, "5160": 926502016.0, "5165": 961518464.0, "5170": 966824960.0, "5175": 966024192.0, "5180": 963595328.0, "5185": 930779008.0, "5190": 949693952.0, "5195": 972360320.0, "5200": 973649152.0, "5205": 968176128.0, "5210": 960442304.0, "5215": 928693888.0, "5220": 979112384.0, "5225": 984693504.0, "5230": 974975616.0, "5235": 974982464.0, "5240": 944278144.0, "5245": 970774656.0, "5250": 972390464.0, "5255": 966848128.0, "5260": 976729728.0, "5265": 942283776.0, "5270": 969202048.0, "5275": 970061632.0, "5280": 962764928.0, "5285": 964023296.0, "5290": 932465792.0, "5295": 951714688.0, "5300": 975563520.0, "5305": 951904128.0, "5310": 968004736.0, "5315": 955755904.0, "5320": 950885888.0, "5325": 973016320.0, "5330": 967732992.0, "5335": 967475008.0, "5340": 966369536.0, "5345": 962912512.0, "5350": 978913280.0, "5355": 972191104.0, "5360": 963819136.0, "5365": 965161280.0, "5370": 947777152.0, "5375": 948748416.0, "5380": 967102656.0, "5385": 980430272.0, "5390": 965252480.0, "5395": 955025536.0, "5400": 948263040.0, "5405": 974207168.0, "5410": 967794944.0, "5415": 975952640.0, "5420": 967297152.0, "5425": 937265984.0, "5430": 963686528.0, "5435": 971789568.0, "5440": 968956736.0, "5445": 957306432.0, "5450": 919370368.0, "5455": 951930496.0, "5460": 962315008.0, "5465": 978783104.0, "5470": 980883712.0, "5475": 941501568.0, "5480": 955719168.0, "5485": 964816768.0, "5490": 975815424.0, "5495": 962707968.0, "5500": 971107648.0, "5505": 956916224.0, "5510": 968451392.0, "5515": 945405568.0, "5520": 963071872.0, "5525": 975771904.0, "5530": 936475648.0, "5535": 970547968.0, "5540": 960222464.0, "5545": 971888384.0, "5550": 967786624.0, "5555": 955772544.0, "5560": 954374016.0, "5565": 968741248.0, "5570": 945064448.0, "5575": 960441600.0, "5580": 960465280.0, "5585": 959385472.0, "5590": 977594752.0, "5595": 975097728.0, "5600": 962936320.0, "5605": 964017920.0, "5610": 943072576.0, "5615": 966467584.0, "5620": 963213440.0, "5625": 982251392.0, "5630": 975903616.0, "5635": 957188352.0, "5640": 951321920.0, "5645": 967769728.0, "5650": 979010112.0, "5655": 983338496.0, "5660": 956309184.0, "5665": 953303680.0, "5670": 965950464.0, "5675": 967530304.0, "5680": 978475840.0, "5685": 961873792.0, "5690": 935811520.0, "5695": 963629312.0, "5700": 952340480.0, "5705": 974340992.0, "5710": 971034880.0, "5715": 945939328.0, "5720": 974930560.0, "5725": 967241344.0, "5730": 978353792.0, "5735": 964685568.0, "5740": 943451840.0, "5745": 971057664.0, "5750": 981706368.0, "5755": 956582976.0, "5760": 963500032.0, "5765": 957572224.0, "5770": 955539072.0, "5775": 970688512.0, "5780": 962628288.0, "5785": 970453440.0, "5790": 974456832.0, "5795": 949655936.0, "5800": 965840256.0, "5805": 968615424.0, "5810": 975876352.0, "5815": 969980864.0, "5820": 936254464.0, "5825": 969106496.0, "5830": 977602112.0, "5835": 974857920.0, "5840": 962964480.0, "5845": 968640960.0, "5850": 942897792.0, "5855": 976024384.0, "5860": 979439552.0, "5865": 978148352.0, "5870": 968604800.0, "5875": 942081792.0, "5880": 964361984.0, "5885": 974635264.0, "5890": 972499392.0, "5895": 965435904.0, "5900": 941175808.0, "5905": 961730432.0, "5910": 958446336.0, "5915": 967928576.0, "5920": 977235520.0, "5925": 959469824.0, "5930": 946577664.0, "5935": 952298048.0, "5940": 977465600.0, "5945": 984830720.0, "5950": 980530304.0, "5955": 934909760.0, "5960": 961539776.0, "5965": 965803520.0, "5970": 970514560.0, "5975": 961675264.0, "5980": 958086400.0, "5985": 964439936.0, "5990": 973381760.0, "5995": 955833856.0, "6000": 955473024.0, "6005": 961281280.0, "6010": 952609664.0, "6015": 974269184.0, "6020": 978020224.0, "6025": 971871104.0, "6030": 955287680.0, "6035": 946819712.0, "6040": 962494592.0, "6045": 983547136.0, "6050": 956393088.0, "6055": 963359616.0, "6060": 945583360.0, "6065": 958321536.0, "6070": 978182400.0, "6075": 977890560.0, "6080": 957321728.0, "6085": 947495808.0, "6090": 953634816.0, "6095": 964598656.0, "6100": 979702976.0, "6105": 971048960.0, "6110": 961632384.0, "6115": 943507840.0, "6120": 968476288.0, "6125": 960494720.0, "6130": 983671360.0, "6135": 960847104.0, "6140": 958474944.0, "6145": 970942592.0, "6150": 968419072.0, "6155": 974943104.0, "6160": 977314944.0, "6165": 952410816.0, "6170": 951019392.0, "6175": 963261056.0, "6180": 969317440.0, "6185": 966209792.0, "6190": 963584192.0, "6195": 946898560.0, "6200": 969195008.0, "6205": 966867072.0, "6210": 959094080.0, "6215": 972791104.0, "6220": 936153920.0, "6225": 978469696.0, "6230": 975851584.0, "6235": 971556544.0, "6240": 965723904.0, "6245": 955868672.0, "6250": 956253568.0, "6255": 973080192.0, "6260": 978570880.0, "6265": 974795136.0, "6270": 958560384.0, "6275": 963615744.0, "6280": 972920512.0, "6285": 966135296.0, "6290": 970845184.0, "6295": 987233024.0, "6300": 947370624.0, "6305": 964771840.0, "6310": 979152448.0, "6315": 978344704.0, "6320": 971638464.0, "6325": 922708992.0, "6330": 958826368.0, "6335": 974829952.0, "6340": 984530432.0, "6345": 966922880.0, "6350": 944525952.0, "6355": 957990144.0, "6360": 972515456.0, "6365": 972268800.0, "6370": 958820096.0, "6375": 966932544.0, "6380": 951265920.0, "6385": 973684224.0, "6390": 965368832.0, "6395": 974959488.0, "6400": 984229888.0, "6405": 944126656.0, "6410": 977154048.0, "6415": 971514048.0, "6420": 956530048.0, "6425": 961085056.0, "6430": 957341312.0, "6435": 960001984.0, "6440": 968825216.0, "6445": 973343936.0, "6450": 974355456.0, "6455": 962167616.0, "6460": 941044864.0, "6465": 974176192.0, "6470": 979789696.0, "6475": 960836480.0, "6480": 967656576.0, "6485": 948487424.0, "6490": 970896896.0, "6495": 988185216.0, "6500": 980162816.0, "6505": 971974080.0, "6510": 951329088.0, "6515": 957639488.0, "6520": 978534080.0, "6525": 978818432.0, "6530": 973112832.0, "6535": 967681664.0, "6540": 949926464.0, "6545": 966122880.0, "6550": 979186688.0, "6555": 966898560.0, "6560": 975305344.0, "6565": 949327232.0, "6570": 951871552.0, "6575": 962025216.0, "6580": 975459712.0, "6585": 979436096.0, "6590": 948819648.0, "6595": 961398912.0, "6600": 961035520.0, "6605": 961387712.0, "6610": 985261952.0, "6615": 959604864.0, "6620": 944392448.0, "6625": 970623296.0, "6630": 971319040.0, "6635": 963969536.0, "6640": 959549568.0, "6645": 950709504.0, "6650": 978527872.0, "6655": 965593984.0, "6660": 968298752.0, "6665": 968645504.0, "6670": 932542592.0, "6675": 970478976.0, "6680": 968697088.0, "6685": 958428416.0, "6690": 955883648.0, "6695": 955193088.0, "6700": 961816384.0, "6705": 978898816.0, "6710": 970680768.0, "6715": 966518592.0, "6720": 973696320.0, "6725": 941461440.0, "6730": 979068352.0, "6735": 994181056.0, "6740": 976271104.0, "6745": 974234752.0, "6750": 938818176.0, "6755": 977293312.0, "6760": 969438336.0, "6765": 978112192.0, "6770": 975145600.0, "6775": 943069184.0, "6780": 946927488.0, "6785": 975065088.0, "6790": 960090624.0, "6795": 975750784.0, "6800": 972730752.0, "6805": 946265728.0, "6810": 958129664.0, "6815": 970426432.0, "6820": 977582592.0, "6825": 969082048.0, "6830": 950103424.0, "6835": 981242752.0, "6840": 982863232.0, "6845": 948763904.0, "6850": 965380416.0, "6855": 953757312.0, "6860": 978584576.0, "6865": 983644672.0, "6870": 964683136.0, "6875": 978492672.0, "6880": 950138368.0, "6885": 958115968.0, "6890": 959858496.0, "6895": 965740160.0, "6900": 984915840.0, "6905": 968171648.0, "6910": 949790208.0, "6915": 970620288.0, "6920": 966797952.0, "6925": 964972416.0, "6930": 964311040.0, "6935": 951827008.0, "6940": 962700032.0, "6945": 986297920.0, "6950": 973154752.0, "6955": 964482624.0, "6960": 939806464.0, "6965": 974418496.0, "6970": 978198976.0, "6975": 984772032.0, "6980": 982077952.0, "6985": 959446400.0, "6990": 945089984.0, "6995": 986954752.0, "7000": 962949312.0, "7005": 962664640.0, "7010": 984653184.0, "7015": 945128576.0, "7020": 982638336.0, "7025": 968189888.0, "7030": 953218688.0, "7035": 982478784.0, "7040": 950287616.0, "7045": 955659904.0, "7050": 959973824.0, "7055": 963561472.0, "7060": 976520448.0, "7065": 967826304.0, "7070": 953400576.0, "7075": 956437120.0, "7080": 968756864.0, "7085": 965636224.0, "7090": 969043584.0, "7095": 959624448.0, "7100": 973197760.0, "7105": 973048704.0, "7110": 969799936.0, "7115": 958092672.0, "7120": 948744576.0, "7125": 962832896.0, "7130": 971299840.0, "7135": 964393216.0, "7140": 961213824.0, "7145": 930183232.0, "7150": 945960320.0, "7155": 990784256.0, "7160": 968134912.0, "7165": 956493696.0, "7170": 968041856.0, "7175": 955532928.0, "7180": 957827072.0, "7185": 984449472.0, "7190": 978477632.0, "7195": 973381504.0, "7200": 935623360.0, "7205": 957230336.0, "7210": 966881216.0, "7215": 969215616.0, "7220": 981925376.0, "7225": 928530432.0, "7230": 949353088.0, "7235": 966760576.0, "7240": 966575168.0, "7245": 967134912.0, "7250": 949025088.0, "7255": 956946368.0, "7260": 969807872.0, "7265": 974691648.0, "7270": 959566592.0, "7275": 958942848.0, "7280": 956765824.0, "7285": 977538560.0, "7290": 976734080.0, "7295": 962754880.0, "7300": 975082176.0, "7305": 963739776.0, "7310": 976988032.0, "7315": 966514048.0, "7320": 974233408.0, "7325": 966526080.0, "7330": 959260544.0, "7335": 963692864.0, "7340": 977422592.0, "7345": 967423744.0, "7350": 984486528.0, "7355": 959355520.0, "7360": 948462720.0, "7365": 972380288.0, "7370": 982188928.0, "7375": 963174656.0, "7380": 963633280.0, "7385": 948295808.0, "7390": 963566400.0, "7395": 958074624.0, "7400": 969333440.0, "7405": 987281280.0, "7410": 951600128.0, "7415": 950185728.0, "7420": 966599296.0, "7425": 982484800.0, "7430": 965429312.0, "7435": 972606592.0, "7440": 936859200.0, "7445": 968497024.0, "7450": 979957696.0, "7455": 970923776.0, "7460": 971840896.0, "7465": 939178432.0, "7470": 971494592.0, "7475": 957699840.0, "7480": 968804352.0, "7485": 961318528.0, "7490": 933789312.0, "7495": 957100544.0, "7500": 969006592.0, "7505": 969803648.0, "7510": 971704192.0, "7515": 979311680.0, "7520": 951624960.0, "7525": 970088000.0, "7530": 953869440.0, "7535": 971075776.0, "7540": 979153728.0, "7545": 958988480.0, "7550": 960133120.0, "7555": 959912320.0, "7560": 969914368.0, "7565": 954751808.0, "7570": 942337152.0, "7575": 965335296.0, "7580": 982003200.0, "7585": 978710656.0, "7590": 969922368.0, "7595": 949709184.0, "7600": 945771776.0, "7605": 982120576.0, "7610": 968713216.0, "7615": 988171520.0, "7620": 956914048.0, "7625": 940939520.0, "7630": 971317248.0, "7635": 984252672.0, "7640": 983151168.0, "7645": 967816320.0, "7650": 958914176.0, "7655": 962227200.0, "7660": 968720896.0, "7665": 977950912.0, "7670": 975216896.0, "7675": 975006848.0, "7680": 942233088.0, "7685": 959380480.0, "7690": 975130240.0, "7695": 982011904.0, "7700": 978902976.0, "7705": 940606208.0, "7710": 974454784.0, "7715": 979171456.0, "7720": 967490560.0, "7725": 960158592.0, "7730": 942943488.0, "7735": 967944832.0, "7740": 980179072.0, "7745": 963843328.0, "7750": 963459328.0, "7755": 959614848.0, "7760": 970433344.0, "7765": 970703552.0, "7770": 962102848.0, "7775": 981846464.0, "7780": 964457472.0, "7785": 959517056.0, "7790": 967987840.0, "7795": 968687424.0, "7800": 971671040.0, "7805": 968158592.0, "7810": 945835136.0, "7815": 963331200.0, "7820": 974466304.0, "7825": 963474560.0, "7830": 957260288.0, "7835": 949782656.0, "7840": 957269312.0, "7845": 954002944.0, "7850": 979745088.0, "7855": 986607936.0, "7860": 947287040.0, "7865": 949166208.0, "7870": 965038592.0, "7875": 975639808.0, "7880": 968440192.0, "7885": 969100288.0, "7890": 951917056.0, "7895": 974263360.0, "7900": 963606656.0, "7905": 963902464.0, "7910": 965879680.0, "7915": 943390208.0, "7920": 950807040.0, "7925": 969449856.0, "7930": 964844544.0, "7935": 984472576.0, "7940": 964350400.0, "7945": 950747264.0, "7950": 962036352.0, "7955": 979737344.0, "7960": 963687936.0, "7965": 953212928.0, "7970": 951432448.0, "7975": 969081344.0, "7980": 965377920.0, "7985": 959348736.0, "7990": 968055040.0, "7995": 946779136.0, "8000": 962594304.0, "8005": 980605056.0, "8010": 965702976.0, "8015": 982808256.0, "8020": 960389952.0, "8025": 965309888.0, "8030": 958262208.0, "8035": 975592640.0, "8040": 960643584.0, "8045": 948272384.0, "8050": 959290432.0, "8055": 979224768.0, "8060": 969468672.0, "8065": 957939904.0, "8070": 963845696.0, "8075": 941970688.0, "8080": 965985472.0, "8085": 966864320.0, "8090": 983528064.0, "8095": 988650688.0, "8100": 966078208.0, "8105": 944512128.0, "8110": 968908992.0, "8115": 985180608.0, "8120": 974701888.0, "8125": 964153664.0, "8130": 966322432.0, "8135": 967723904.0, "8140": 963798528.0, "8145": 995247616.0, "8150": 973142080.0, "8155": 938476544.0, "8160": 964315008.0, "8165": 972925952.0, "8170": 968275520.0, "8175": 961599808.0, "8180": 936139392.0, "8185": 962736896.0, "8190": 968033088.0, "8195": 977397632.0, "8200": 956563840.0, "8205": 960812480.0, "8210": 946741568.0, "8215": 982398592.0, "8220": 987755072.0, "8225": 966280064.0, "8230": 962282688.0, "8235": 933738944.0, "8240": 980304576.0, "8245": 976189632.0, "8250": 963750400.0, "8255": 977001088.0, "8260": 956452416.0, "8265": 982710592.0, "8270": 952492096.0, "8275": 973853312.0, "8280": 974218048.0, "8285": 953334080.0, "8290": 939714688.0, "8295": 981089472.0, "8300": 973026240.0, "8305": 978061504.0, "8310": 950924096.0, "8315": 937711936.0, "8320": 977374080.0, "8325": 967837376.0, "8330": 990203584.0, "8335": 975599104.0, "8340": 947317568.0, "8345": 970703104.0, "8350": 970132096.0, "8355": 974915968.0, "8360": 979210496.0, "8365": 932621952.0, "8370": 965304512.0, "8375": 979830976.0, "8380": 965015744.0, "8385": 972730560.0, "8390": 962613248.0, "8395": 951077120.0, "8400": 972409664.0, "8405": 951415680.0, "8410": 960789632.0, "8415": 965573312.0, "8420": 941582016.0, "8425": 968081152.0, "8430": 961159616.0, "8435": 966042432.0, "8440": 969658688.0, "8445": 952713344.0, "8450": 984510784.0, "8455": 990051520.0, "8460": 968656960.0, "8465": 967147136.0, "8470": 962985088.0, "8475": 943118272.0, "8480": 986979264.0, "8485": 979689920.0, "8490": 992280000.0, "8495": 971471552.0, "8500": 951317824.0, "8505": 983350528.0, "8510": 974186112.0, "8515": 968862976.0, "8520": 961618560.0, "8525": 945070592.0, "8530": 984119616.0, "8535": 978264000.0, "8540": 967839104.0, "8545": 968719616.0, "8550": 941721408.0, "8555": 971721728.0, "8560": 958210112.0, "8565": 975541440.0, "8570": 974850816.0, "8575": 971165696.0, "8580": 932030912.0, "8585": 965721024.0, "8590": 978774592.0, "8595": 979179264.0, "8600": 983534976.0, "8605": 957472768.0, "8610": 983406848.0, "8615": 977662720.0, "8620": 963278016.0, "8625": 979344320.0, "8630": 943469248.0, "8635": 961638848.0, "8640": 973401088.0, "8645": 970304896.0, "8650": 969363520.0, "8655": 970457216.0, "8660": 943783488.0, "8665": 986105984.0, "8670": 960734720.0, "8675": 973824192.0, "8680": 962276288.0, "8685": 955741760.0, "8690": 978501312.0, "8695": 968820032.0, "8700": 972532608.0, "8705": 973766784.0, "8710": 946858560.0, "8715": 973281984.0, "8720": 958450816.0, "8725": 979051520.0, "8730": 985599936.0, "8735": 952338240.0, "8740": 940889408.0, "8745": 987271872.0, "8750": 972048384.0, "8755": 971578176.0, "8760": 965483200.0, "8765": 934360448.0, "8770": 986362240.0, "8775": 969740736.0, "8780": 966950400.0, "8785": 961987648.0, "8790": 947565056.0, "8795": 969517056.0, "8800": 970803392.0, "8805": 972994112.0, "8810": 983429888.0, "8815": 951016960.0, "8820": 939808704.0, "8825": 964408448.0, "8830": 981249472.0, "8835": 971398336.0, "8840": 979160512.0, "8845": 951234560.0, "8850": 986674560.0, "8855": 970377472.0, "8860": 962000896.0, "8865": 956620736.0, "8870": 945714880.0, "8875": 968444160.0, "8880": 983889920.0, "8885": 971269056.0, "8890": 969427200.0, "8895": 952763456.0, "8900": 961386176.0, "8905": 976606592.0, "8910": 981717440.0, "8915": 980645504.0, "8920": 968074112.0, "8925": 939992256.0, "8930": 970258432.0, "8935": 964040384.0, "8940": 977644800.0, "8945": 981826944.0, "8950": 945562496.0, "8955": 972497152.0, "8960": 973356992.0, "8965": 973678848.0, "8970": 966311424.0, "8975": 936927552.0, "8980": 952829248.0, "8985": 977632320.0, "8990": 967399424.0, "8995": 980307840.0, "9000": 952199232.0, "9005": 950403776.0, "9010": 974875392.0, "9015": 982753728.0, "9020": 958935488.0, "9025": 979374144.0, "9030": 953690688.0, "9035": 968675712.0, "9040": 978208384.0, "9045": 968412032.0, "9050": 983012544.0, "9055": 947799488.0, "9060": 956368192.0, "9065": 969361984.0, "9070": 967990592.0, "9075": 980648000.0, "9080": 952455488.0, "9085": 971552512.0, "9090": 963642816.0, "9095": 968240320.0, "9100": 974375744.0, "9105": 960292096.0, "9110": 947543104.0, "9115": 956514368.0, "9120": 985151168.0, "9125": 963050368.0, "9130": 958395136.0, "9135": 951643648.0, "9140": 967074432.0, "9145": 976978944.0, "9150": 986789952.0, "9155": 976907008.0, "9160": 957621504.0, "9165": 950526656.0, "9170": 988443776.0, "9175": 971262400.0, "9180": 967509504.0, "9185": 955042112.0, "9190": 956366976.0, "9195": 965770368.0, "9200": 968622848.0, "9205": 967231744.0, "9210": 984253184.0, "9215": 931731776.0, "9220": 949083264.0, "9225": 970972416.0, "9230": 970827200.0, "9235": 971550848.0, "9240": 959819392.0, "9245": 963536256.0, "9250": 961576256.0, "9255": 982627072.0, "9260": 979369344.0, "9265": 952769152.0, "9270": 948946560.0, "9275": 978587840.0, "9280": 977583360.0, "9285": 962705472.0, "9290": 978850560.0, "9295": 958658624.0, "9300": 965618304.0, "9305": 968911488.0, "9310": 972879232.0, "9315": 976034368.0, "9320": 947948352.0, "9325": 979431360.0, "9330": 977558272.0, "9335": 975487744.0, "9340": 960151872.0, "9345": 943218176.0, "9350": 952676288.0, "9355": 963291136.0, "9360": 960074112.0, "9365": 983534848.0, "9370": 982557952.0, "9375": 942044672.0, "9380": 982945920.0, "9385": 985381568.0, "9390": 973099712.0, "9395": 978541248.0, "9400": 937756160.0, "9405": 968082816.0, "9410": 981275392.0, "9415": 991699072.0, "9420": 960244800.0, "9425": 956695680.0, "9430": 938879872.0, "9435": 974510464.0, "9440": 959322368.0, "9445": 973635584.0, "9450": 961485184.0, "9455": 945813120.0, "9460": 978135680.0, "9465": 988016576.0, "9470": 962977216.0, "9475": 983614016.0, "9480": 931015168.0, "9485": 986877504.0, "9490": 963520000.0, "9495": 972526464.0, "9500": 982355008.0, "9505": 970113472.0, "9510": 964358720.0, "9515": 956691264.0, "9520": 947893248.0, "9525": 965359680.0, "9530": 958138048.0, "9535": 951097600.0, "9540": 953849536.0, "9545": 979579200.0, "9550": 955691648.0, "9555": 952532928.0, "9560": 958223360.0, "9565": 969425920.0, "9570": 977369856.0, "9575": 959033984.0, "9580": 963097536.0, "9585": 945779776.0, "9590": 948295360.0, "9595": 966492160.0, "9600": 984344000.0, "9605": 984934912.0, "9610": 943517952.0, "9615": 952482496.0, "9620": 980674816.0, "9625": 978574272.0, "9630": 970052544.0, "9635": 974733184.0, "9640": 940245440.0, "9645": 962402112.0, "9650": 971201664.0, "9655": 987483968.0, "9660": 963258752.0, "9665": 949972864.0, "9670": 966334592.0, "9675": 963075968.0, "9680": 965003840.0, "9685": 986401984.0, "9690": 940337664.0, "9695": 950509504.0, "9700": 975088128.0, "9705": 972504064.0, "9710": 967558912.0, "9715": 971305216.0, "9720": 940514752.0, "9725": 966097152.0, "9730": 973731968.0, "9735": 974370176.0, "9740": 971295680.0, "9745": 950816192.0, "9750": 979727232.0, "9755": 970292480.0, "9760": 968175296.0, "9765": 963811392.0, "9770": 952258304.0, "9775": 956670528.0, "9780": 970353408.0, "9785": 958627264.0, "9790": 961264512.0, "9795": 958201472.0, "9800": 948971520.0, "9805": 962036992.0, "9810": 978381568.0, "9815": 977262656.0, "9820": 982496960.0, "9825": 939141376.0, "9830": 969073216.0, "9835": 972339072.0, "9840": 971208320.0, "9845": 966656960.0, "9850": 946518784.0, "9855": 956995328.0, "9860": 986979584.0, "9865": 970027904.0, "9870": 989916864.0, "9875": 956925696.0, "9880": 931276800.0, "9885": 963470400.0, "9890": 972405568.0, "9895": 983489792.0, "9900": 956755968.0, "9905": 938556160.0, "9910": 978602368.0, "9915": 973099200.0, "9920": 944426496.0, "9925": 962916992.0, "9930": 947183744.0, "9935": 960278272.0, "9940": 965697280.0, "9945": 958382720.0, "9950": 963625856.0, "9955": 942997056.0, "9960": 966882304.0, "9965": 983350592.0, "9970": 966386496.0, "9975": 963881344.0, "9980": 980328512.0, "9985": 941824384.0, "9990": 976809984.0, "9995": 982129024.0, "10000": 971851776.0, "10005": 969861376.0, "10010": 943877440.0, "10015": 982577152.0, "10020": 977804032.0, "10025": 979549888.0, "10030": 971244672.0, "10035": 946533312.0, "10040": 950485760.0, "10045": 978173696.0, "10050": 985758592.0, "10055": 990346368.0, "10060": 959069696.0, "10065": 947062784.0, "10070": 966931584.0, "10075": 979401280.0, "10080": 971622080.0, "10085": 974745856.0, "10090": 944008832.0, "10095": 963125376.0, "10100": 972146944.0, "10105": 976034752.0, "10110": 971772800.0, "10115": 948353792.0, "10120": 962507264.0, "10125": 974194752.0, "10130": 980564736.0, "10135": 972567680.0, "10140": 957806016.0, "10145": 933887232.0, "10150": 973855872.0, "10155": 969285952.0, "10160": 961618304.0, "10165": 974885760.0, "10170": 944427776.0, "10175": 978892608.0, "10180": 983664000.0, "10185": 978746752.0, "10190": 955519744.0, "10195": 937150144.0, "10200": 988093952.0, "10205": 972676352.0, "10210": 966740672.0, "10215": 975630720.0, "10220": 948295552.0, "10225": 950289280.0, "10230": 975223168.0, "10235": 953831744.0, "10240": 969616640.0, "10245": 961772032.0, "10250": 936264064.0, "10255": 979504128.0, "10260": 964644352.0, "10265": 967489600.0, "10270": 968425088.0, "10275": 936103808.0, "10280": 969561856.0, "10285": 996083968.0, "10290": 979454656.0, "10295": 981540224.0, "10300": 951828480.0, "10305": 971864512.0, "10310": 960064768.0, "10315": 971187840.0, "10320": 985120896.0, "10325": 983215936.0, "10330": 934926464.0, "10335": 976317696.0, "10340": 957468800.0, "10345": 973596928.0, "10350": 984756096.0, "10355": 941803008.0, "10360": 961872512.0, "10365": 974268416.0, "10370": 980346176.0, "10375": 969875584.0, "10380": 961527616.0, "10385": 955072512.0, "10390": 990463232.0, "10395": 964845440.0, "10400": 960777984.0, "10405": 949747968.0, "10410": 955098176.0, "10415": 976146624.0, "10420": 967212096.0, "10425": 969751808.0, "10430": 964795648.0, "10435": 963036736.0, "10440": 971963200.0, "10445": 972183296.0, "10450": 975122048.0, "10455": 966331904.0, "10460": 948480896.0, "10465": 971374464.0, "10470": 972572672.0, "10475": 979157888.0, "10480": 997170048.0, "10485": 949275904.0, "10490": 934796800.0, "10495": 969120896.0, "10500": 977939456.0, "10505": 958974848.0, "10510": 950564864.0, "10515": 953891648.0, "10520": 972026624.0, "10525": 969414016.0, "10530": 970264896.0, "10535": 986317568.0, "10540": 946952448.0, "10545": 970374144.0, "10550": 968708032.0, "10555": 959557632.0, "10560": 975881664.0, "10565": 961363648.0, "10570": 968627200.0, "10575": 972498368.0, "10580": 960586752.0, "10585": 973333312.0, "10590": 951660288.0, "10595": 956430016.0, "10600": 967505280.0, "10605": 986588288.0, "10610": 966405248.0, "10615": 976526336.0, "10620": 940774144.0, "10625": 964938240.0, "10630": 967592064.0, "10635": 973064768.0, "10640": 974382592.0, "10645": 948458688.0, "10650": 966118656.0, "10655": 985083136.0, "10660": 976596736.0, "10665": 967187456.0, "10670": 954858240.0, "10675": 934123776.0, "10680": 986080640.0, "10685": 990783424.0, "10690": 963935488.0, "10695": 971897600.0, "10700": 949662464.0, "10705": 977799040.0, "10710": 968120576.0, "10715": 966837376.0, "10720": 966149760.0, "10725": 944047616.0, "10730": 980048704.0, "10735": 960773376.0, "10740": 971234048.0, "10745": 984200064.0, "10750": 981234944.0, "10755": 944244992.0, "10760": 969661632.0, "10765": 972555392.0, "10770": 973764864.0, "10775": 958674688.0, "10780": 949434368.0, "10785": 953571456.0, "10790": 970121216.0, "10795": 960127488.0, "10800": 972161280.0, "10805": 950822848.0, "10810": 973682880.0, "10815": 959656832.0, "10820": 971093696.0, "10825": 967113792.0, "10830": 956819456.0, "10835": 963015552.0, "10840": 970409024.0, "10845": 963739136.0, "10850": 957549312.0, "10855": 967510912.0, "10860": 950518400.0, "10865": 964160128.0, "10870": 983381376.0, "10875": 982274560.0, "10880": 958610752.0, "10885": 954466816.0, "10890": 972840192.0, "10895": 973359552.0, "10900": 970362240.0, "10905": 964840448.0, "10910": 938557824.0, "10915": 960615424.0, "10920": 982842368.0, "10925": 969796480.0, "10930": 968551104.0, "10935": 962492544.0, "10940": 953943040.0, "10945": 964767936.0, "10950": 972467008.0, "10955": 966633216.0, "10960": 971941824.0, "10965": 966455552.0, "10970": 983437248.0, "10975": 965796672.0, "10980": 974656832.0, "10985": 986572608.0, "10990": 950958336.0, "10995": 963353408.0, "11000": 985286784.0, "11005": 978548224.0, "11010": 971455360.0, "11015": 969726400.0, "11020": 947938112.0, "11025": 959834624.0, "11030": 977837824.0, "11035": 975095872.0, "11040": 986142784.0, "11045": 956475200.0, "11050": 973064832.0, "11055": 974445952.0, "11060": 962065280.0, "11065": 985161344.0, "11070": 949631488.0, "11075": 976197760.0, "11080": 971823744.0, "11085": 967095040.0, "11090": 976461504.0, "11095": 946438144.0, "11100": 965375424.0, "11105": 973585792.0, "11110": 980633600.0, "11115": 967886656.0, "11120": 956744384.0, "11125": 956475200.0, "11130": 975504896.0, "11135": 979451712.0, "11140": 964612992.0, "11145": 966377088.0, "11150": 935937152.0, "11155": 975939584.0, "11160": 983999424.0, "11165": 982400320.0, "11170": 977339264.0, "11175": 957764608.0, "11180": 962147136.0, "11185": 971844672.0, "11190": 979692288.0, "11195": 985085376.0, "11200": 982817216.0, "11205": 942217408.0, "11210": 984703488.0, "11215": 967396928.0, "11220": 983169280.0, "11225": 961691264.0, "11230": 953189568.0, "11235": 981706240.0, "11240": 977425152.0, "11245": 966073920.0, "11250": 969233408.0, "11255": 960376256.0, "11260": 979703296.0, "11265": 963607232.0, "11270": 981118656.0, "11275": 968047232.0, "11280": 955794176.0, "11285": 953405440.0, "11290": 956623360.0, "11295": 968271680.0, "11300": 962673344.0, "11305": 958624960.0, "11310": 946341184.0, "11315": 982999936.0, "11320": 964742912.0, "11325": 981029568.0, "11330": 975564288.0, "11335": 952309056.0, "11340": 970375616.0, "11345": 969803648.0, "11350": 981444096.0, "11355": 982385024.0, "11360": 941016704.0, "11365": 970431360.0, "11370": 978805312.0, "11375": 975313792.0, "11380": 968478016.0, "11385": 958625152.0, "11390": 938120832.0, "11395": 977384192.0, "11400": 972740288.0, "11405": 961470400.0, "11410": 966144128.0, "11415": 929225856.0, "11420": 964547904.0, "11425": 980999168.0, "11430": 978502144.0, "11435": 970006400.0, "11440": 945134656.0, "11445": 974983936.0, "11450": 984464448.0, "11455": 971217024.0, "11460": 964937088.0, "11465": 959659776.0, "11470": 954982528.0, "11475": 972653632.0, "11480": 956542208.0, "11485": 977052928.0, "11490": 986172224.0, "11495": 959133824.0, "11500": 970309248.0, "11505": 963864960.0, "11510": 976505280.0, "11515": 978026624.0, "11520": 954061696.0, "11525": 975806016.0, "11530": 976686720.0, "11535": 979970048.0, "11540": 974399040.0, "11545": 953188288.0, "11550": 953074304.0, "11555": 981696128.0, "11560": 984556736.0, "11565": 965242048.0, "11570": 966375872.0, "11575": 951523968.0, "11580": 975902976.0, "11585": 977428032.0, "11590": 969376384.0, "11595": 976214208.0, "11600": 946488640.0, "11605": 973334272.0, "11610": 982199616.0, "11615": 971997440.0, "11620": 968577856.0, "11625": 948852096.0, "11630": 937541248.0, "11635": 973606016.0, "11640": 981160768.0, "11645": 980069504.0, "11650": 971782464.0, "11655": 956044224.0, "11660": 980441920.0, "11665": 958240768.0, "11670": 982643776.0, "11675": 972390592.0, "11680": 956322304.0, "11685": 982734016.0, "11690": 968614464.0, "11695": 967558592.0, "11700": 973842752.0, "11705": 956309760.0, "11710": 964708032.0, "11715": 983208768.0, "11720": 983538048.0, "11725": 965265600.0, "11730": 955214208.0, "11735": 942646336.0, "11740": 973792512.0, "11745": 971088320.0, "11750": 961672064.0, "11755": 963374080.0, "11760": 949543168.0, "11765": 983755008.0, "11770": 984528384.0, "11775": 975438144.0, "11780": 984888832.0, "11785": 946940288.0, "11790": 972493440.0, "11795": 970387968.0, "11800": 973227264.0, "11805": 986472960.0, "11810": 967530304.0, "11815": 955616128.0, "11820": 973433920.0, "11825": 970646848.0, "11830": 974846080.0, "11835": 961579392.0, "11840": 944504512.0, "11845": 980497536.0, "11850": 974454528.0, "11855": 977918336.0, "11860": 971612352.0, "11865": 938262400.0, "11870": 940126208.0, "11875": 990048704.0, "11880": 972206784.0, "11885": 962901568.0, "11890": 970318208.0, "11895": 965208192.0, "11900": 979393920.0, "11905": 961566912.0, "11910": 982719680.0, "11915": 989857600.0, "11920": 944868608.0, "11925": 994162944.0, "11930": 964190528.0, "11935": 963465152.0, "11940": 976622080.0, "11945": 944763008.0, "11950": 977159808.0, "11955": 979128640.0, "11960": 972399168.0, "11965": 976260672.0, "11970": 963060544.0, "11975": 963129408.0, "11980": 977634432.0, "11985": 953073920.0, "11990": 968742400.0, "11995": 965156352.0, "12000": 958418112.0, "12005": 973932800.0, "12010": 979537920.0, "12015": 972212736.0, "12020": 973267520.0, "12025": 934456768.0, "12030": 969246336.0, "12035": 984077504.0, "12040": 977777472.0, "12045": 981645632.0, "12050": 931299456.0, "12055": 938852096.0, "12060": 974410240.0, "12065": 965723008.0, "12070": 968326208.0, "12075": 949990592.0, "12080": 953194240.0, "12085": 972673152.0, "12090": 963907776.0, "12095": 963627072.0, "12100": 976849088.0, "12105": 950520256.0, "12110": 971690368.0, "12115": 968300352.0, "12120": 986518208.0, "12125": 980798080.0, "12130": 941565184.0, "12135": 955700864.0, "12140": 976100864.0, "12145": 979711616.0, "12150": 979730944.0, "12155": 962112384.0, "12160": 946594176.0, "12165": 968588032.0, "12170": 964645760.0, "12175": 967863936.0, "12180": 975310656.0, "12185": 953131520.0, "12190": 988619136.0, "12195": 970961088.0, "12200": 965117120.0, "12205": 968926720.0, "12210": 939426496.0, "12215": 997290688.0, "12220": 970357184.0, "12225": 979974272.0, "12230": 980481408.0, "12235": 950285504.0, "12240": 963896832.0, "12245": 966011648.0, "12250": 976881216.0, "12255": 968239744.0, "12260": 983813760.0, "12265": 932017664.0, "12270": 966679040.0, "12275": 979923008.0, "12280": 977409216.0, "12285": 970702528.0, "12290": 929600256.0, "12295": 977118656.0, "12300": 986039360.0, "12305": 970126848.0, "12310": 986129472.0, "12315": 936110464.0, "12320": 957900288.0, "12325": 966485952.0, "12330": 968416512.0, "12335": 963885568.0, "12340": 957481216.0, "12345": 944333504.0, "12350": 966714496.0, "12355": 975720896.0, "12360": 978980352.0, "12365": 964351680.0, "12370": 949002240.0, "12375": 963753920.0, "12380": 964518144.0, "12385": 973139392.0, "12390": 961403136.0, "12395": 961530944.0, "12400": 975517120.0, "12405": 976523520.0, "12410": 953484928.0, "12415": 963238144.0, "12420": 944371136.0, "12425": 949470016.0, "12430": 972612736.0, "12435": 968709440.0, "12440": 962225600.0, "12445": 952293120.0, "12450": 947771904.0, "12455": 981396992.0, "12460": 974272512.0, "12465": 954487232.0, "12470": 981144320.0, "12475": 958704384.0, "12480": 967121920.0, "12485": 978366016.0, "12490": 974278784.0, "12495": 969692096.0, "12500": 961659904.0, "12505": 943534912.0, "12510": 960928768.0, "12515": 969530304.0, "12520": 973764160.0, "12525": 972291392.0, "12530": 944744576.0, "12535": 976634496.0, "12540": 965682880.0, "12545": 972057792.0, "12550": 969770432.0, "12555": 941177664.0, "12560": 964738560.0, "12565": 947584320.0, "12570": 974419712.0, "12575": 962993280.0, "12580": 958078592.0, "12585": 964422976.0, "12590": 965961088.0, "12595": 978854528.0, "12600": 981995776.0, "12605": 949260736.0, "12610": 937854656.0, "12615": 962619712.0, "12620": 961337152.0, "12625": 966494016.0, "12630": 970714112.0, "12635": 962162688.0, "12640": 978283392.0, "12645": 969272128.0, "12650": 970076032.0, "12655": 964130240.0, "12660": 932255680.0, "12665": 956938560.0, "12670": 986216960.0, "12675": 965529856.0, "12680": 961113536.0, "12685": 951218048.0, "12690": 945392256.0, "12695": 978288768.0, "12700": 985215808.0, "12705": 958807744.0, "12710": 968417792.0, "12715": 956500736.0, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 12698293248.0, "5": 12698293248.0, "10": 12698293248.0, "15": 12698293248.0, "20": 12698293248.0, "25": 12698293248.0, "30": 12698293248.0, "35": 12698293248.0, "40": 12698293248.0, "45": 12698293248.0, "50": 12698293248.0, "55": 12698293248.0, "60": 12698293248.0, "65": 12698293248.0, "70": 12698293248.0, "75": 12698293248.0, "80": 12698293248.0, "85": 12698293248.0, "90": 12698293248.0, "95": 12698293248.0, "100": 12698293248.0, "105": 12698293248.0, "110": 12698293248.0, "115": 12698293248.0, "120": 12698293248.0, "125": 12698293248.0, "130": 12698293248.0, "135": 12698293248.0, "140": 12698293248.0, "145": 12698293248.0, "150": 12698293248.0, "155": 12698293248.0, "160": 12698293248.0, "165": 12698293248.0, "170": 12698293248.0, "175": 12698293248.0, "180": 12698293248.0, "185": 12698293248.0, "190": 12698293248.0, "195": 12698293248.0, "200": 12698293248.0, "205": 12698293248.0, "210": 12698293248.0, "215": 12698293248.0, "220": 12698293248.0, "225": 12698293248.0, "230": 12698293248.0, "235": 12698293248.0, "240": 12698293248.0, "245": 12698293248.0, "250": 12698293248.0, "255": 12698293248.0, "260": 12698293248.0, "265": 12698293248.0, "270": 12698293248.0, "275": 12698293248.0, "280": 12698293248.0, "285": 12698293248.0, "290": 12698293248.0, "295": 12698293248.0, "300": 12698293248.0, "305": 12698293248.0, "310": 12698293248.0, "315": 12698293248.0, "320": 12698293248.0, "325": 12698293248.0, "330": 12698293248.0, "335": 12698293248.0, "340": 12698293248.0, "345": 12698293248.0, "350": 12698293248.0, "355": 12698293248.0, "360": 12698293248.0, "365": 12698293248.0, "370": 12698293248.0, "375": 12698293248.0, "380": 12698293248.0, "385": 12698293248.0, "390": 12698293248.0, "395": 12698293248.0, "400": 12698293248.0, "405": 12698293248.0, "410": 12698293248.0, "415": 12698293248.0, "420": 12698293248.0, "425": 12698293248.0, "430": 12698293248.0, "435": 12698293248.0, "440": 12698293248.0, "445": 12698293248.0, "450": 12698293248.0, "455": 12698293248.0, "460": 12698293248.0, "465": 12698293248.0, "470": 12698293248.0, "475": 12698293248.0, "480": 12698293248.0, "485": 12698293248.0, "490": 12698293248.0, "495": 12698293248.0, "500": 12698293248.0, "505": 12698293248.0, "510": 12698293248.0, "515": 12698293248.0, "520": 12698293248.0, "525": 12698293248.0, "530": 12698293248.0, "535": 12698293248.0, "540": 12698293248.0, "545": 12698293248.0, "550": 12698293248.0, "555": 12698293248.0, "560": 12698293248.0, "565": 12698293248.0, "570": 12698293248.0, "575": 12698293248.0, "580": 12698293248.0, "585": 12698293248.0, "590": 12698293248.0, "595": 12698293248.0, "600": 12698293248.0, "605": 12698293248.0, "610": 12698293248.0, "615": 12698293248.0, "620": 12698293248.0, "625": 12698293248.0, "630": 12698293248.0, "635": 12698293248.0, "640": 12698293248.0, "645": 12698293248.0, "650": 12698293248.0, "655": 12698293248.0, "660": 12698293248.0, "665": 12698293248.0, "670": 12698293248.0, "675": 12698293248.0, "680": 12698293248.0, "685": 12698293248.0, "690": 12698293248.0, "695": 12698293248.0, "700": 12698293248.0, "705": 12698293248.0, "710": 12698293248.0, "715": 12698293248.0, "720": 12698293248.0, "725": 12698293248.0, "730": 12698293248.0, "735": 12698293248.0, "740": 12698293248.0, "745": 12698293248.0, "750": 12698293248.0, "755": 12698293248.0, "760": 12698293248.0, "765": 12698293248.0, "770": 12698293248.0, "775": 12698293248.0, "780": 12698293248.0, "785": 12698293248.0, "790": 12698293248.0, "795": 12698293248.0, "800": 12698293248.0, "805": 12698293248.0, "810": 12698293248.0, "815": 12698293248.0, "820": 12698293248.0, "825": 12698293248.0, "830": 12698293248.0, "835": 12698293248.0, "840": 12698293248.0, "845": 12698293248.0, "850": 12698293248.0, "855": 12698293248.0, "860": 12698293248.0, "865": 12698293248.0, "870": 12698293248.0, "875": 12698293248.0, "880": 12698293248.0, "885": 12698293248.0, "890": 12698293248.0, "895": 12698293248.0, "900": 12698293248.0, "905": 12698293248.0, "910": 12698293248.0, "915": 12698293248.0, "920": 12698293248.0, "925": 12698293248.0, "930": 12698293248.0, "935": 12698293248.0, "940": 12698293248.0, "945": 12698293248.0, "950": 12698293248.0, "955": 12698293248.0, "960": 12698293248.0, "965": 12698293248.0, "970": 12698293248.0, "975": 12698293248.0, "980": 12698293248.0, "985": 12698293248.0, "990": 12698293248.0, "995": 12698293248.0, "1000": 12698293248.0, "1005": 12698293248.0, "1010": 12698293248.0, "1015": 12698293248.0, "1020": 12698293248.0, "1025": 12698293248.0, "1030": 12698293248.0, "1035": 12698293248.0, "1040": 12698293248.0, "1045": 12698293248.0, "1050": 12698293248.0, "1055": 12698293248.0, "1060": 12698293248.0, "1065": 12698293248.0, "1070": 12698293248.0, "1075": 12698293248.0, "1080": 12698293248.0, "1085": 12698293248.0, "1090": 12698293248.0, "1095": 12698293248.0, "1100": 12698293248.0, "1105": 12698293248.0, "1110": 12698293248.0, "1115": 12698293248.0, "1120": 12698293248.0, "1125": 12698293248.0, "1130": 12698293248.0, "1135": 12698293248.0, "1140": 12698293248.0, "1145": 12698293248.0, "1150": 12698293248.0, "1155": 12698293248.0, "1160": 12698293248.0, "1165": 12698293248.0, "1170": 12698293248.0, "1175": 12698293248.0, "1180": 12698293248.0, "1185": 12698293248.0, "1190": 12698293248.0, "1195": 12698293248.0, "1200": 12698293248.0, "1205": 12698293248.0, "1210": 12698293248.0, "1215": 12698293248.0, "1220": 12698293248.0, "1225": 12698293248.0, "1230": 12698293248.0, "1235": 12698293248.0, "1240": 12698293248.0, "1245": 12698293248.0, "1250": 12698293248.0, "1255": 12698293248.0, "1260": 12698293248.0, "1265": 12698293248.0, "1270": 12698293248.0, "1275": 12698293248.0, "1280": 12698293248.0, "1285": 12698293248.0, "1290": 12698293248.0, "1295": 12698293248.0, "1300": 12698293248.0, "1305": 12698293248.0, "1310": 12698293248.0, "1315": 12698293248.0, "1320": 12698293248.0, "1325": 12698293248.0, "1330": 12698293248.0, "1335": 12698293248.0, "1340": 12698293248.0, "1345": 12698293248.0, "1350": 12698293248.0, "1355": 12698293248.0, "1360": 12698293248.0, "1365": 12698293248.0, "1370": 12698293248.0, "1375": 12698293248.0, "1380": 12698293248.0, "1385": 12698293248.0, "1390": 12698293248.0, "1395": 12698293248.0, "1400": 12698293248.0, "1405": 12698293248.0, "1410": 12698293248.0, "1415": 12698293248.0, "1420": 12698293248.0, "1425": 12698293248.0, "1430": 12698293248.0, "1435": 12698293248.0, "1440": 12698293248.0, "1445": 12698293248.0, "1450": 12698293248.0, "1455": 12698293248.0, "1460": 12698293248.0, "1465": 12698293248.0, "1470": 12698293248.0, "1475": 12698293248.0, "1480": 12698293248.0, "1485": 12698293248.0, "1490": 12698293248.0, "1495": 12698293248.0, "1500": 12698293248.0, "1505": 12698293248.0, "1510": 12698293248.0, "1515": 12698293248.0, "1520": 12698293248.0, "1525": 12698293248.0, "1530": 12698293248.0, "1535": 12698293248.0, "1540": 12698293248.0, "1545": 12698293248.0, "1550": 12698293248.0, "1555": 12698293248.0, "1560": 12698293248.0, "1565": 12698293248.0, "1570": 12698293248.0, "1575": 12698293248.0, "1580": 12698293248.0, "1585": 12698293248.0, "1590": 12698293248.0, "1595": 12698293248.0, "1600": 12698293248.0, "1605": 12698293248.0, "1610": 12698293248.0, "1615": 12698293248.0, "1620": 12698293248.0, "1625": 12698293248.0, "1630": 12698293248.0, "1635": 12698293248.0, "1640": 12698293248.0, "1645": 12698293248.0, "1650": 12698293248.0, "1655": 12698293248.0, "1660": 12698293248.0, "1665": 12698293248.0, "1670": 12698293248.0, "1675": 12698293248.0, "1680": 12698293248.0, "1685": 12698293248.0, "1690": 12698293248.0, "1695": 12698293248.0, "1700": 12698293248.0, "1705": 12698293248.0, "1710": 12698293248.0, "1715": 12698293248.0, "1720": 12698293248.0, "1725": 12698293248.0, "1730": 12698293248.0, "1735": 12698293248.0, "1740": 12698293248.0, "1745": 12698293248.0, "1750": 12698293248.0, "1755": 12698293248.0, "1760": 12698293248.0, "1765": 12698293248.0, "1770": 12698293248.0, "1775": 12698293248.0, "1780": 12698293248.0, "1785": 12698293248.0, "1790": 12698293248.0, "1795": 12698293248.0, "1800": 12698293248.0, "1805": 12698293248.0, "1810": 12698293248.0, "1815": 12698293248.0, "1820": 12698293248.0, "1825": 12698293248.0, "1830": 12698293248.0, "1835": 12698293248.0, "1840": 12698293248.0, "1845": 12698293248.0, "1850": 12698293248.0, "1855": 12698293248.0, "1860": 12698293248.0, "1865": 12698293248.0, "1870": 12698293248.0, "1875": 12698293248.0, "1880": 12698293248.0, "1885": 12698293248.0, "1890": 12698293248.0, "1895": 12698293248.0, "1900": 12698293248.0, "1905": 12698293248.0, "1910": 12698293248.0, "1915": 12698293248.0, "1920": 12698293248.0, "1925": 12698293248.0, "1930": 12698293248.0, "1935": 12698293248.0, "1940": 12698293248.0, "1945": 12698293248.0, "1950": 12698293248.0, "1955": 12698293248.0, "1960": 12698293248.0, "1965": 12698293248.0, "1970": 12698293248.0, "1975": 12698293248.0, "1980": 12698293248.0, "1985": 12698293248.0, "1990": 12698293248.0, "1995": 12698293248.0, "2000": 12698293248.0, "2005": 12698293248.0, "2010": 12698293248.0, "2015": 12698293248.0, "2020": 12698293248.0, "2025": 12698293248.0, "2030": 12698293248.0, "2035": 12698293248.0, "2040": 12698293248.0, "2045": 12698293248.0, "2050": 12698293248.0, "2055": 12698293248.0, "2060": 12698293248.0, "2065": 12698293248.0, "2070": 12698293248.0, "2075": 12698293248.0, "2080": 12698293248.0, "2085": 12698293248.0, "2090": 12698293248.0, "2095": 12698293248.0, "2100": 12698293248.0, "2105": 12698293248.0, "2110": 12698293248.0, "2115": 12698293248.0, "2120": 12698293248.0, "2125": 12698293248.0, "2130": 12698293248.0, "2135": 12698293248.0, "2140": 12698293248.0, "2145": 12698293248.0, "2150": 12698293248.0, "2155": 12698293248.0, "2160": 12698293248.0, "2165": 12698293248.0, "2170": 12698293248.0, "2175": 12698293248.0, "2180": 12698293248.0, "2185": 12698293248.0, "2190": 12698293248.0, "2195": 12698293248.0, "2200": 12698293248.0, "2205": 12698293248.0, "2210": 12698293248.0, "2215": 12698293248.0, "2220": 12698293248.0, "2225": 12698293248.0, "2230": 12698293248.0, "2235": 12698293248.0, "2240": 12698293248.0, "2245": 12698293248.0, "2250": 12698293248.0, "2255": 12698293248.0, "2260": 12698293248.0, "2265": 12698293248.0, "2270": 12698293248.0, "2275": 12698293248.0, "2280": 12698293248.0, "2285": 12698293248.0, "2290": 12698293248.0, "2295": 12698293248.0, "2300": 12698293248.0, "2305": 12698293248.0, "2310": 12698293248.0, "2315": 12698293248.0, "2320": 12698293248.0, "2325": 12698293248.0, "2330": 12698293248.0, "2335": 12698293248.0, "2340": 12698293248.0, "2345": 12698293248.0, "2350": 12698293248.0, "2355": 12698293248.0, "2360": 12698293248.0, "2365": 12698293248.0, "2370": 12698293248.0, "2375": 12698293248.0, "2380": 12698293248.0, "2385": 12698293248.0, "2390": 12698293248.0, "2395": 12698293248.0, "2400": 12698293248.0, "2405": 12698293248.0, "2410": 12698293248.0, "2415": 12698293248.0, "2420": 12698293248.0, "2425": 12698293248.0, "2430": 12698293248.0, "2435": 12698293248.0, "2440": 12698293248.0, "2445": 12698293248.0, "2450": 12698293248.0, "2455": 12698293248.0, "2460": 12698293248.0, "2465": 12698293248.0, "2470": 12698293248.0, "2475": 12698293248.0, "2480": 12698293248.0, "2485": 12698293248.0, "2490": 12698293248.0, "2495": 12698293248.0, "2500": 12698293248.0, "2505": 12698293248.0, "2510": 12698293248.0, "2515": 12698293248.0, "2520": 12698293248.0, "2525": 12698293248.0, "2530": 12698293248.0, "2535": 12698293248.0, "2540": 12698293248.0, "2545": 12698293248.0, "2550": 12698293248.0, "2555": 12698293248.0, "2560": 12698293248.0, "2565": 12698293248.0, "2570": 12698293248.0, "2575": 12698293248.0, "2580": 12698293248.0, "2585": 12698293248.0, "2590": 12698293248.0, "2595": 12698293248.0, "2600": 12698293248.0, "2605": 12698293248.0, "2610": 12698293248.0, "2615": 12698293248.0, "2620": 12698293248.0, "2625": 12698293248.0, "2630": 12698293248.0, "2635": 12698293248.0, "2640": 12698293248.0, "2645": 12698293248.0, "2650": 12698293248.0, "2655": 12698293248.0, "2660": 12698293248.0, "2665": 12698293248.0, "2670": 12698293248.0, "2675": 12698293248.0, "2680": 12698293248.0, "2685": 12698293248.0, "2690": 12698293248.0, "2695": 12698293248.0, "2700": 12698293248.0, "2705": 12698293248.0, "2710": 12698293248.0, "2715": 12698293248.0, "2720": 12698293248.0, "2725": 12698293248.0, "2730": 12698293248.0, "2735": 12698293248.0, "2740": 12698293248.0, "2745": 12698293248.0, "2750": 12698293248.0, "2755": 12698293248.0, "2760": 12698293248.0, "2765": 12698293248.0, "2770": 12698293248.0, "2775": 12698293248.0, "2780": 12698293248.0, "2785": 12698293248.0, "2790": 12698293248.0, "2795": 12698293248.0, "2800": 12698293248.0, "2805": 12698293248.0, "2810": 12698293248.0, "2815": 12698293248.0, "2820": 12698293248.0, "2825": 12698293248.0, "2830": 12698293248.0, "2835": 12698293248.0, "2840": 12698293248.0, "2845": 12698293248.0, "2850": 12698293248.0, "2855": 12698293248.0, "2860": 12698293248.0, "2865": 12698293248.0, "2870": 12698293248.0, "2875": 12698293248.0, "2880": 12698293248.0, "2885": 12698293248.0, "2890": 12698293248.0, "2895": 12698293248.0, "2900": 12698293248.0, "2905": 12698293248.0, "2910": 12698293248.0, "2915": 12698293248.0, "2920": 12698293248.0, "2925": 12698293248.0, "2930": 12698293248.0, "2935": 12698293248.0, "2940": 12698293248.0, "2945": 12698293248.0, "2950": 12698293248.0, "2955": 12698293248.0, "2960": 12698293248.0, "2965": 12698293248.0, "2970": 12698293248.0, "2975": 12698293248.0, "2980": 12698293248.0, "2985": 12698293248.0, "2990": 12698293248.0, "2995": 12698293248.0, "3000": 12698293248.0, "3005": 12698293248.0, "3010": 12698293248.0, "3015": 12698293248.0, "3020": 12698293248.0, "3025": 12698293248.0, "3030": 12698293248.0, "3035": 12698293248.0, "3040": 12698293248.0, "3045": 12698293248.0, "3050": 12698293248.0, "3055": 12698293248.0, "3060": 12698293248.0, "3065": 12698293248.0, "3070": 12698293248.0, "3075": 12698293248.0, "3080": 12698293248.0, "3085": 12698293248.0, "3090": 12698293248.0, "3095": 12698293248.0, "3100": 12698293248.0, "3105": 12698293248.0, "3110": 12698293248.0, "3115": 12698293248.0, "3120": 12698293248.0, "3125": 12698293248.0, "3130": 12698293248.0, "3135": 12698293248.0, "3140": 12698293248.0, "3145": 12698293248.0, "3150": 12698293248.0, "3155": 12698293248.0, "3160": 12698293248.0, "3165": 12698293248.0, "3170": 12698293248.0, "3175": 12698293248.0, "3180": 12698293248.0, "3185": 12698293248.0, "3190": 12698293248.0, "3195": 12698293248.0, "3200": 12698293248.0, "3205": 12698293248.0, "3210": 12698293248.0, "3215": 12698293248.0, "3220": 12698293248.0, "3225": 12698293248.0, "3230": 12698293248.0, "3235": 12698293248.0, "3240": 12698293248.0, "3245": 12698293248.0, "3250": 12698293248.0, "3255": 12698293248.0, "3260": 12698293248.0, "3265": 12698293248.0, "3270": 12698293248.0, "3275": 12698293248.0, "3280": 12698293248.0, "3285": 12698293248.0, "3290": 12698293248.0, "3295": 12698293248.0, "3300": 12698293248.0, "3305": 12698293248.0, "3310": 12698293248.0, "3315": 12698293248.0, "3320": 12698293248.0, "3325": 12698293248.0, "3330": 12698293248.0, "3335": 12698293248.0, "3340": 12698293248.0, "3345": 12698293248.0, "3350": 12698293248.0, "3355": 12698293248.0, "3360": 12698293248.0, "3365": 12698293248.0, "3370": 12698293248.0, "3375": 12698293248.0, "3380": 12698293248.0, "3385": 12698293248.0, "3390": 12698293248.0, "3395": 12698293248.0, "3400": 12698293248.0, "3405": 12698293248.0, "3410": 12698293248.0, "3415": 12698293248.0, "3420": 12698293248.0, "3425": 12698293248.0, "3430": 12698293248.0, "3435": 12698293248.0, "3440": 12698293248.0, "3445": 12698293248.0, "3450": 12698293248.0, "3455": 12698293248.0, "3460": 12698293248.0, "3465": 12698293248.0, "3470": 12698293248.0, "3475": 12698293248.0, "3480": 12698293248.0, "3485": 12698293248.0, "3490": 12698293248.0, "3495": 12698293248.0, "3500": 12698293248.0, "3505": 12698293248.0, "3510": 12698293248.0, "3515": 12698293248.0, "3520": 12698293248.0, "3525": 12698293248.0, "3530": 12698293248.0, "3535": 12698293248.0, "3540": 12698293248.0, "3545": 12698293248.0, "3550": 12698293248.0, "3555": 12698293248.0, "3560": 12698293248.0, "3565": 12698293248.0, "3570": 12698293248.0, "3575": 12698293248.0, "3580": 12698293248.0, "3585": 12698293248.0, "3590": 12698293248.0, "3595": 12698293248.0, "3600": 12698293248.0, "3605": 12698293248.0, "3610": 12698293248.0, "3615": 12698293248.0, "3620": 12698293248.0, "3625": 12698293248.0, "3630": 12698293248.0, "3635": 12698293248.0, "3640": 12698293248.0, "3645": 12698293248.0, "3650": 12698492928.0, "3655": 12698492928.0, "3660": 12698492928.0, "3665": 12698492928.0, "3670": 12698492928.0, "3675": 12698492928.0, "3680": 12698492928.0, "3685": 12698492928.0, "3690": 12698492928.0, "3695": 12698492928.0, "3700": 12698492928.0, "3705": 12698492928.0, "3710": 12698492928.0, "3715": 12698492928.0, "3720": 12698492928.0, "3725": 12698492928.0, "3730": 12698492928.0, "3735": 12698492928.0, "3740": 12698492928.0, "3745": 12698492928.0, "3750": 12698492928.0, "3755": 12698492928.0, "3760": 12698492928.0, "3765": 12698492928.0, "3770": 12698492928.0, "3775": 12698492928.0, "3780": 12698492928.0, "3785": 12698492928.0, "3790": 12698492928.0, "3795": 12698492928.0, "3800": 12698492928.0, "3805": 12698492928.0, "3810": 12698492928.0, "3815": 12698492928.0, "3820": 12698492928.0, "3825": 12698492928.0, "3830": 12698492928.0, "3835": 12698492928.0, "3840": 12698492928.0, "3845": 12698492928.0, "3850": 12698492928.0, "3855": 12698492928.0, "3860": 12698492928.0, "3865": 12698492928.0, "3870": 12698492928.0, "3875": 12698492928.0, "3880": 12698492928.0, "3885": 12698492928.0, "3890": 12698492928.0, "3895": 12698492928.0, "3900": 12698492928.0, "3905": 12698492928.0, "3910": 12698492928.0, "3915": 12698492928.0, "3920": 12698492928.0, "3925": 12698492928.0, "3930": 12698492928.0, "3935": 12698492928.0, "3940": 12698492928.0, "3945": 12698492928.0, "3950": 12698492928.0, "3955": 12698492928.0, "3960": 12698492928.0, "3965": 12698492928.0, "3970": 12698492928.0, "3975": 12698492928.0, "3980": 12698492928.0, "3985": 12698492928.0, "3990": 12698492928.0, "3995": 12698492928.0, "4000": 12698492928.0, "4005": 12698492928.0, "4010": 12698492928.0, "4015": 12698492928.0, "4020": 12698492928.0, "4025": 12698492928.0, "4030": 12698492928.0, "4035": 12698492928.0, "4040": 12698492928.0, "4045": 12698492928.0, "4050": 12698492928.0, "4055": 12698492928.0, "4060": 12698492928.0, "4065": 12698492928.0, "4070": 12698492928.0, "4075": 12698492928.0, "4080": 12698492928.0, "4085": 12698492928.0, "4090": 12698492928.0, "4095": 12698492928.0, "4100": 12698492928.0, "4105": 12698492928.0, "4110": 12698492928.0, "4115": 12698492928.0, "4120": 12698492928.0, "4125": 12698492928.0, "4130": 12698492928.0, "4135": 12698492928.0, "4140": 12698492928.0, "4145": 12698492928.0, "4150": 12698492928.0, "4155": 12698492928.0, "4160": 12698492928.0, "4165": 12698492928.0, "4170": 12698492928.0, "4175": 12698492928.0, "4180": 12698492928.0, "4185": 12698492928.0, "4190": 12698492928.0, "4195": 12698492928.0, "4200": 12698492928.0, "4205": 12698492928.0, "4210": 12698492928.0, "4215": 12698492928.0, "4220": 12698492928.0, "4225": 12698492928.0, "4230": 12698492928.0, "4235": 12698492928.0, "4240": 12698492928.0, "4245": 12698492928.0, "4250": 12698492928.0, "4255": 12698492928.0, "4260": 12698492928.0, "4265": 12698492928.0, "4270": 12698492928.0, "4275": 12698492928.0, "4280": 12698492928.0, "4285": 12698492928.0, "4290": 12698492928.0, "4295": 12698492928.0, "4300": 12698492928.0, "4305": 12698492928.0, "4310": 12698492928.0, "4315": 12698492928.0, "4320": 12698492928.0, "4325": 12698492928.0, "4330": 12698492928.0, "4335": 12698492928.0, "4340": 12698492928.0, "4345": 12698492928.0, "4350": 12698492928.0, "4355": 12698492928.0, "4360": 12698492928.0, "4365": 12698492928.0, "4370": 12698492928.0, "4375": 12698492928.0, "4380": 12698492928.0, "4385": 12698492928.0, "4390": 12698492928.0, "4395": 12698492928.0, "4400": 12698492928.0, "4405": 12698492928.0, "4410": 12698492928.0, "4415": 12698492928.0, "4420": 12698492928.0, "4425": 12698492928.0, "4430": 12698492928.0, "4435": 12698492928.0, "4440": 12698492928.0, "4445": 12698492928.0, "4450": 12698492928.0, "4455": 12698492928.0, "4460": 12698492928.0, "4465": 12698492928.0, "4470": 12698492928.0, "4475": 12698492928.0, "4480": 12698492928.0, "4485": 12698492928.0, "4490": 12698492928.0, "4495": 12698492928.0, "4500": 12698492928.0, "4505": 12698492928.0, "4510": 12698492928.0, "4515": 12698492928.0, "4520": 12698492928.0, "4525": 12698492928.0, "4530": 12698492928.0, "4535": 12698492928.0, "4540": 12698492928.0, "4545": 12698492928.0, "4550": 12698492928.0, "4555": 12698492928.0, "4560": 12698492928.0, "4565": 12698492928.0, "4570": 12698492928.0, "4575": 12698492928.0, "4580": 12698492928.0, "4585": 12698492928.0, "4590": 12698492928.0, "4595": 12698492928.0, "4600": 12698492928.0, "4605": 12698492928.0, "4610": 12698492928.0, "4615": 12698492928.0, "4620": 12698492928.0, "4625": 12698492928.0, "4630": 12698492928.0, "4635": 12698492928.0, "4640": 12698492928.0, "4645": 12698492928.0, "4650": 12698492928.0, "4655": 12698492928.0, "4660": 12698492928.0, "4665": 12698492928.0, "4670": 12698492928.0, "4675": 12698492928.0, "4680": 12698492928.0, "4685": 12698492928.0, "4690": 12698492928.0, "4695": 12698492928.0, "4700": 12698492928.0, "4705": 12698492928.0, "4710": 12698492928.0, "4715": 12698492928.0, "4720": 12698492928.0, "4725": 12698492928.0, "4730": 12698492928.0, "4735": 12698492928.0, "4740": 12698492928.0, "4745": 12698492928.0, "4750": 12698492928.0, "4755": 12698492928.0, "4760": 12698492928.0, "4765": 12698492928.0, "4770": 12698492928.0, "4775": 12698492928.0, "4780": 12698492928.0, "4785": 12698492928.0, "4790": 12698492928.0, "4795": 12698492928.0, "4800": 12698492928.0, "4805": 12698492928.0, "4810": 12698492928.0, "4815": 12698492928.0, "4820": 12698492928.0, "4825": 12698492928.0, "4830": 12698492928.0, "4835": 12698492928.0, "4840": 12698492928.0, "4845": 12698492928.0, "4850": 12698492928.0, "4855": 12698492928.0, "4860": 12698492928.0, "4865": 12698492928.0, "4870": 12698492928.0, "4875": 12698492928.0, "4880": 12698492928.0, "4885": 12698492928.0, "4890": 12698492928.0, "4895": 12698492928.0, "4900": 12698492928.0, "4905": 12698492928.0, "4910": 12698492928.0, "4915": 12698492928.0, "4920": 12698492928.0, "4925": 12698492928.0, "4930": 12698492928.0, "4935": 12698492928.0, "4940": 12698492928.0, "4945": 12698492928.0, "4950": 12698492928.0, "4955": 12698492928.0, "4960": 12698492928.0, "4965": 12698492928.0, "4970": 12698492928.0, "4975": 12698492928.0, "4980": 12698492928.0, "4985": 12698492928.0, "4990": 12698492928.0, "4995": 12698492928.0, "5000": 12698492928.0, "5005": 12698492928.0, "5010": 12698492928.0, "5015": 12698492928.0, "5020": 12698492928.0, "5025": 12698492928.0, "5030": 12698492928.0, "5035": 12698492928.0, "5040": 12698492928.0, "5045": 12698492928.0, "5050": 12698492928.0, "5055": 12698492928.0, "5060": 12698492928.0, "5065": 12698492928.0, "5070": 12698492928.0, "5075": 12698492928.0, "5080": 12698492928.0, "5085": 12698492928.0, "5090": 12698492928.0, "5095": 12698492928.0, "5100": 12698492928.0, "5105": 12698492928.0, "5110": 12698492928.0, "5115": 12698492928.0, "5120": 12698492928.0, "5125": 12698492928.0, "5130": 12698492928.0, "5135": 12698492928.0, "5140": 12698492928.0, "5145": 12698492928.0, "5150": 12698492928.0, "5155": 12698492928.0, "5160": 12698492928.0, "5165": 12698492928.0, "5170": 12698492928.0, "5175": 12698492928.0, "5180": 12698492928.0, "5185": 12698492928.0, "5190": 12698492928.0, "5195": 12698492928.0, "5200": 12698492928.0, "5205": 12698492928.0, "5210": 12698492928.0, "5215": 12698492928.0, "5220": 12698492928.0, "5225": 12698492928.0, "5230": 12698492928.0, "5235": 12698492928.0, "5240": 12698492928.0, "5245": 12698492928.0, "5250": 12698492928.0, "5255": 12698492928.0, "5260": 12698492928.0, "5265": 12698492928.0, "5270": 12698492928.0, "5275": 12698492928.0, "5280": 12698492928.0, "5285": 12698492928.0, "5290": 12698492928.0, "5295": 12698492928.0, "5300": 12698492928.0, "5305": 12698492928.0, "5310": 12698492928.0, "5315": 12698492928.0, "5320": 12698492928.0, "5325": 12698492928.0, "5330": 12698492928.0, "5335": 12698492928.0, "5340": 12698492928.0, "5345": 12698492928.0, "5350": 12698492928.0, "5355": 12698492928.0, "5360": 12698492928.0, "5365": 12698492928.0, "5370": 12698492928.0, "5375": 12698492928.0, "5380": 12698492928.0, "5385": 12698492928.0, "5390": 12698492928.0, "5395": 12698492928.0, "5400": 12698492928.0, "5405": 12698492928.0, "5410": 12698492928.0, "5415": 12698492928.0, "5420": 12698492928.0, "5425": 12698492928.0, "5430": 12698492928.0, "5435": 12698492928.0, "5440": 12698492928.0, "5445": 12698492928.0, "5450": 12698492928.0, "5455": 12698492928.0, "5460": 12698492928.0, "5465": 12698492928.0, "5470": 12698492928.0, "5475": 12698492928.0, "5480": 12698492928.0, "5485": 12698492928.0, "5490": 12698492928.0, "5495": 12698492928.0, "5500": 12698492928.0, "5505": 12698492928.0, "5510": 12698492928.0, "5515": 12698492928.0, "5520": 12698492928.0, "5525": 12698492928.0, "5530": 12698492928.0, "5535": 12698492928.0, "5540": 12698492928.0, "5545": 12698492928.0, "5550": 12698492928.0, "5555": 12698492928.0, "5560": 12698492928.0, "5565": 12698492928.0, "5570": 12698492928.0, "5575": 12698492928.0, "5580": 12698492928.0, "5585": 12698492928.0, "5590": 12698492928.0, "5595": 12698492928.0, "5600": 12698492928.0, "5605": 12698492928.0, "5610": 12698492928.0, "5615": 12698492928.0, "5620": 12698492928.0, "5625": 12698492928.0, "5630": 12698492928.0, "5635": 12698492928.0, "5640": 12698492928.0, "5645": 12698492928.0, "5650": 12698492928.0, "5655": 12698492928.0, "5660": 12698492928.0, "5665": 12698492928.0, "5670": 12698492928.0, "5675": 12698492928.0, "5680": 12698492928.0, "5685": 12698492928.0, "5690": 12698492928.0, "5695": 12698492928.0, "5700": 12698492928.0, "5705": 12698492928.0, "5710": 12698492928.0, "5715": 12698492928.0, "5720": 12698492928.0, "5725": 12698492928.0, "5730": 12698492928.0, "5735": 12698492928.0, "5740": 12698492928.0, "5745": 12698492928.0, "5750": 12698492928.0, "5755": 12698492928.0, "5760": 12698492928.0, "5765": 12698492928.0, "5770": 12698492928.0, "5775": 12698492928.0, "5780": 12698492928.0, "5785": 12698492928.0, "5790": 12698492928.0, "5795": 12698492928.0, "5800": 12698492928.0, "5805": 12698492928.0, "5810": 12698492928.0, "5815": 12698492928.0, "5820": 12698492928.0, "5825": 12698492928.0, "5830": 12698492928.0, "5835": 12698492928.0, "5840": 12698492928.0, "5845": 12698492928.0, "5850": 12698492928.0, "5855": 12698492928.0, "5860": 12698492928.0, "5865": 12698492928.0, "5870": 12698492928.0, "5875": 12698492928.0, "5880": 12698492928.0, "5885": 12698492928.0, "5890": 12698492928.0, "5895": 12698492928.0, "5900": 12698492928.0, "5905": 12698492928.0, "5910": 12698492928.0, "5915": 12698492928.0, "5920": 12698492928.0, "5925": 12698492928.0, "5930": 12698492928.0, "5935": 12698492928.0, "5940": 12698492928.0, "5945": 12698492928.0, "5950": 12698492928.0, "5955": 12698492928.0, "5960": 12698492928.0, "5965": 12698492928.0, "5970": 12698492928.0, "5975": 12698492928.0, "5980": 12698492928.0, "5985": 12698492928.0, "5990": 12698492928.0, "5995": 12698492928.0, "6000": 12698492928.0, "6005": 12698492928.0, "6010": 12698492928.0, "6015": 12698492928.0, "6020": 12698492928.0, "6025": 12698492928.0, "6030": 12698492928.0, "6035": 12698492928.0, "6040": 12698492928.0, "6045": 12698492928.0, "6050": 12698492928.0, "6055": 12698492928.0, "6060": 12698492928.0, "6065": 12698492928.0, "6070": 12698492928.0, "6075": 12698492928.0, "6080": 12698492928.0, "6085": 12698492928.0, "6090": 12698492928.0, "6095": 12698492928.0, "6100": 12698492928.0, "6105": 12698492928.0, "6110": 12698492928.0, "6115": 12698492928.0, "6120": 12698492928.0, "6125": 12698492928.0, "6130": 12698492928.0, "6135": 12698492928.0, "6140": 12698492928.0, "6145": 12698492928.0, "6150": 12698492928.0, "6155": 12698492928.0, "6160": 12698492928.0, "6165": 12698492928.0, "6170": 12698492928.0, "6175": 12698492928.0, "6180": 12698492928.0, "6185": 12698492928.0, "6190": 12698492928.0, "6195": 12698492928.0, "6200": 12698492928.0, "6205": 12698492928.0, "6210": 12698492928.0, "6215": 12698492928.0, "6220": 12698492928.0, "6225": 12698492928.0, "6230": 12698492928.0, "6235": 12698492928.0, "6240": 12698492928.0, "6245": 12698492928.0, "6250": 12698492928.0, "6255": 12698492928.0, "6260": 12698492928.0, "6265": 12698492928.0, "6270": 12698492928.0, "6275": 12698492928.0, "6280": 12698492928.0, "6285": 12698492928.0, "6290": 12698492928.0, "6295": 12698492928.0, "6300": 12698492928.0, "6305": 12698492928.0, "6310": 12698492928.0, "6315": 12698492928.0, "6320": 12698492928.0, "6325": 12698492928.0, "6330": 12698492928.0, "6335": 12698492928.0, "6340": 12698492928.0, "6345": 12698492928.0, "6350": 12698492928.0, "6355": 12698492928.0, "6360": 12698492928.0, "6365": 12698492928.0, "6370": 12698492928.0, "6375": 12698492928.0, "6380": 12698492928.0, "6385": 12698492928.0, "6390": 12698492928.0, "6395": 12698492928.0, "6400": 12698492928.0, "6405": 12698492928.0, "6410": 12698492928.0, "6415": 12698492928.0, "6420": 12698492928.0, "6425": 12698492928.0, "6430": 12698492928.0, "6435": 12698492928.0, "6440": 12698492928.0, "6445": 12698492928.0, "6450": 12698492928.0, "6455": 12698492928.0, "6460": 12698492928.0, "6465": 12698492928.0, "6470": 12698492928.0, "6475": 12698492928.0, "6480": 12698492928.0, "6485": 12698492928.0, "6490": 12698492928.0, "6495": 12698492928.0, "6500": 12698492928.0, "6505": 12698492928.0, "6510": 12698492928.0, "6515": 12698492928.0, "6520": 12698492928.0, "6525": 12698492928.0, "6530": 12698492928.0, "6535": 12698492928.0, "6540": 12698492928.0, "6545": 12698492928.0, "6550": 12698492928.0, "6555": 12698492928.0, "6560": 12698492928.0, "6565": 12698492928.0, "6570": 12698492928.0, "6575": 12698492928.0, "6580": 12698492928.0, "6585": 12698492928.0, "6590": 12698492928.0, "6595": 12698492928.0, "6600": 12698492928.0, "6605": 12698492928.0, "6610": 12698492928.0, "6615": 12698492928.0, "6620": 12698492928.0, "6625": 12698492928.0, "6630": 12698492928.0, "6635": 12698492928.0, "6640": 12698492928.0, "6645": 12698492928.0, "6650": 12698492928.0, "6655": 12698492928.0, "6660": 12698492928.0, "6665": 12698492928.0, "6670": 12698492928.0, "6675": 12698492928.0, "6680": 12698492928.0, "6685": 12698492928.0, "6690": 12698492928.0, "6695": 12698492928.0, "6700": 12698492928.0, "6705": 12698492928.0, "6710": 12698492928.0, "6715": 12698492928.0, "6720": 12698492928.0, "6725": 12698492928.0, "6730": 12698492928.0, "6735": 12698492928.0, "6740": 12698492928.0, "6745": 12698492928.0, "6750": 12698492928.0, "6755": 12698492928.0, "6760": 12698492928.0, "6765": 12698492928.0, "6770": 12698492928.0, "6775": 12698492928.0, "6780": 12698492928.0, "6785": 12698492928.0, "6790": 12698492928.0, "6795": 12698492928.0, "6800": 12698492928.0, "6805": 12698492928.0, "6810": 12698492928.0, "6815": 12698492928.0, "6820": 12698492928.0, "6825": 12698492928.0, "6830": 12698492928.0, "6835": 12698492928.0, "6840": 12698492928.0, "6845": 12698492928.0, "6850": 12698492928.0, "6855": 12698492928.0, "6860": 12698492928.0, "6865": 12698492928.0, "6870": 12698492928.0, "6875": 12698492928.0, "6880": 12698492928.0, "6885": 12698492928.0, "6890": 12698492928.0, "6895": 12698492928.0, "6900": 12698492928.0, "6905": 12698492928.0, "6910": 12698492928.0, "6915": 12698492928.0, "6920": 12698492928.0, "6925": 12698492928.0, "6930": 12698492928.0, "6935": 12698492928.0, "6940": 12698492928.0, "6945": 12698492928.0, "6950": 12698492928.0, "6955": 12698492928.0, "6960": 12698492928.0, "6965": 12698492928.0, "6970": 12698492928.0, "6975": 12698492928.0, "6980": 12698492928.0, "6985": 12698492928.0, "6990": 12698492928.0, "6995": 12698492928.0, "7000": 12698492928.0, "7005": 12698492928.0, "7010": 12698492928.0, "7015": 12698492928.0, "7020": 12698492928.0, "7025": 12698492928.0, "7030": 12698492928.0, "7035": 12698492928.0, "7040": 12698492928.0, "7045": 12698492928.0, "7050": 12698492928.0, "7055": 12698492928.0, "7060": 12698492928.0, "7065": 12698492928.0, "7070": 12698492928.0, "7075": 12698492928.0, "7080": 12698492928.0, "7085": 12698492928.0, "7090": 12698492928.0, "7095": 12698492928.0, "7100": 12698492928.0, "7105": 12698492928.0, "7110": 12698492928.0, "7115": 12698492928.0, "7120": 12698492928.0, "7125": 12698492928.0, "7130": 12698492928.0, "7135": 12698492928.0, "7140": 12698492928.0, "7145": 12698492928.0, "7150": 12698492928.0, "7155": 12698492928.0, "7160": 12698492928.0, "7165": 12698492928.0, "7170": 12698492928.0, "7175": 12698492928.0, "7180": 12698492928.0, "7185": 12698492928.0, "7190": 12698492928.0, "7195": 12698492928.0, "7200": 12698492928.0, "7205": 12698492928.0, "7210": 12698492928.0, "7215": 12698492928.0, "7220": 12698492928.0, "7225": 12698492928.0, "7230": 12698492928.0, "7235": 12698492928.0, "7240": 12698492928.0, "7245": 12698492928.0, "7250": 12698492928.0, "7255": 12698492928.0, "7260": 12698492928.0, "7265": 12698492928.0, "7270": 12698492928.0, "7275": 12698492928.0, "7280": 12698492928.0, "7285": 12698492928.0, "7290": 12698492928.0, "7295": 12698492928.0, "7300": 12698492928.0, "7305": 12698492928.0, "7310": 12698492928.0, "7315": 12698492928.0, "7320": 12698492928.0, "7325": 12698492928.0, "7330": 12698492928.0, "7335": 12698492928.0, "7340": 12698492928.0, "7345": 12698492928.0, "7350": 12698492928.0, "7355": 12698492928.0, "7360": 12698492928.0, "7365": 12698492928.0, "7370": 12698492928.0, "7375": 12698492928.0, "7380": 12698492928.0, "7385": 12698492928.0, "7390": 12698492928.0, "7395": 12698492928.0, "7400": 12698492928.0, "7405": 12698492928.0, "7410": 12698492928.0, "7415": 12698492928.0, "7420": 12698492928.0, "7425": 12698492928.0, "7430": 12698492928.0, "7435": 12698492928.0, "7440": 12698492928.0, "7445": 12698492928.0, "7450": 12698492928.0, "7455": 12698492928.0, "7460": 12698492928.0, "7465": 12698492928.0, "7470": 12698492928.0, "7475": 12698492928.0, "7480": 12698492928.0, "7485": 12698492928.0, "7490": 12698492928.0, "7495": 12698492928.0, "7500": 12698492928.0, "7505": 12698492928.0, "7510": 12698492928.0, "7515": 12698492928.0, "7520": 12698492928.0, "7525": 12698492928.0, "7530": 12698492928.0, "7535": 12698492928.0, "7540": 12698492928.0, "7545": 12698492928.0, "7550": 12698492928.0, "7555": 12698492928.0, "7560": 12698492928.0, "7565": 12698492928.0, "7570": 12698492928.0, "7575": 12698492928.0, "7580": 12698492928.0, "7585": 12698492928.0, "7590": 12698492928.0, "7595": 12698492928.0, "7600": 12698492928.0, "7605": 12698492928.0, "7610": 12698492928.0, "7615": 12698492928.0, "7620": 12698492928.0, "7625": 12698492928.0, "7630": 12698492928.0, "7635": 12698492928.0, "7640": 12698492928.0, "7645": 12698492928.0, "7650": 12698492928.0, "7655": 12698492928.0, "7660": 12698492928.0, "7665": 12698492928.0, "7670": 12698492928.0, "7675": 12698492928.0, "7680": 12698492928.0, "7685": 12698492928.0, "7690": 12698492928.0, "7695": 12698492928.0, "7700": 12698492928.0, "7705": 12698492928.0, "7710": 12698492928.0, "7715": 12698492928.0, "7720": 12698492928.0, "7725": 12698492928.0, "7730": 12698492928.0, "7735": 12698492928.0, "7740": 12698492928.0, "7745": 12698492928.0, "7750": 12698492928.0, "7755": 12698492928.0, "7760": 12698492928.0, "7765": 12698492928.0, "7770": 12698492928.0, "7775": 12698492928.0, "7780": 12698492928.0, "7785": 12698492928.0, "7790": 12698492928.0, "7795": 12698492928.0, "7800": 12698492928.0, "7805": 12698492928.0, "7810": 12698492928.0, "7815": 12698492928.0, "7820": 12698492928.0, "7825": 12698492928.0, "7830": 12698492928.0, "7835": 12698492928.0, "7840": 12698492928.0, "7845": 12698492928.0, "7850": 12698492928.0, "7855": 12698492928.0, "7860": 12698492928.0, "7865": 12698492928.0, "7870": 12698492928.0, "7875": 12698492928.0, "7880": 12698492928.0, "7885": 12698492928.0, "7890": 12698492928.0, "7895": 12698492928.0, "7900": 12698492928.0, "7905": 12698492928.0, "7910": 12698492928.0, "7915": 12698492928.0, "7920": 12698492928.0, "7925": 12698492928.0, "7930": 12698492928.0, "7935": 12698492928.0, "7940": 12698492928.0, "7945": 12698492928.0, "7950": 12698492928.0, "7955": 12698492928.0, "7960": 12698492928.0, "7965": 12698492928.0, "7970": 12698492928.0, "7975": 12698492928.0, "7980": 12698492928.0, "7985": 12698492928.0, "7990": 12698492928.0, "7995": 12698492928.0, "8000": 12698492928.0, "8005": 12698492928.0, "8010": 12698492928.0, "8015": 12698492928.0, "8020": 12698492928.0, "8025": 12698492928.0, "8030": 12698492928.0, "8035": 12698492928.0, "8040": 12698492928.0, "8045": 12698492928.0, "8050": 12698492928.0, "8055": 12698492928.0, "8060": 12698492928.0, "8065": 12698492928.0, "8070": 12698492928.0, "8075": 12698492928.0, "8080": 12698492928.0, "8085": 12698492928.0, "8090": 12698492928.0, "8095": 12698492928.0, "8100": 12698492928.0, "8105": 12698492928.0, "8110": 12698492928.0, "8115": 12698492928.0, "8120": 12698492928.0, "8125": 12698492928.0, "8130": 12698492928.0, "8135": 12698492928.0, "8140": 12698492928.0, "8145": 12698492928.0, "8150": 12698492928.0, "8155": 12698492928.0, "8160": 12698492928.0, "8165": 12698492928.0, "8170": 12698492928.0, "8175": 12698492928.0, "8180": 12698492928.0, "8185": 12698492928.0, "8190": 12698492928.0, "8195": 12698492928.0, "8200": 12698492928.0, "8205": 12698492928.0, "8210": 12698492928.0, "8215": 12698492928.0, "8220": 12698492928.0, "8225": 12698492928.0, "8230": 12698492928.0, "8235": 12698492928.0, "8240": 12698492928.0, "8245": 12698492928.0, "8250": 12698492928.0, "8255": 12698492928.0, "8260": 12698492928.0, "8265": 12698492928.0, "8270": 12698492928.0, "8275": 12698492928.0, "8280": 12698492928.0, "8285": 12698492928.0, "8290": 12698492928.0, "8295": 12698492928.0, "8300": 12698492928.0, "8305": 12698492928.0, "8310": 12698492928.0, "8315": 12698492928.0, "8320": 12698492928.0, "8325": 12698492928.0, "8330": 12698492928.0, "8335": 12698492928.0, "8340": 12698492928.0, "8345": 12698492928.0, "8350": 12698492928.0, "8355": 12698492928.0, "8360": 12698492928.0, "8365": 12698492928.0, "8370": 12698492928.0, "8375": 12698492928.0, "8380": 12698492928.0, "8385": 12698492928.0, "8390": 12698492928.0, "8395": 12698492928.0, "8400": 12698492928.0, "8405": 12698492928.0, "8410": 12698492928.0, "8415": 12698492928.0, "8420": 12698492928.0, "8425": 12698492928.0, "8430": 12698492928.0, "8435": 12698492928.0, "8440": 12698492928.0, "8445": 12698492928.0, "8450": 12698492928.0, "8455": 12698492928.0, "8460": 12698492928.0, "8465": 12698492928.0, "8470": 12698492928.0, "8475": 12698492928.0, "8480": 12698492928.0, "8485": 12698492928.0, "8490": 12698492928.0, "8495": 12698492928.0, "8500": 12698492928.0, "8505": 12698492928.0, "8510": 12698492928.0, "8515": 12698492928.0, "8520": 12698492928.0, "8525": 12698492928.0, "8530": 12698492928.0, "8535": 12698492928.0, "8540": 12698492928.0, "8545": 12698492928.0, "8550": 12698492928.0, "8555": 12698492928.0, "8560": 12698492928.0, "8565": 12698492928.0, "8570": 12698492928.0, "8575": 12698492928.0, "8580": 12698492928.0, "8585": 12698492928.0, "8590": 12698492928.0, "8595": 12698492928.0, "8600": 12698492928.0, "8605": 12698492928.0, "8610": 12698492928.0, "8615": 12698492928.0, "8620": 12698492928.0, "8625": 12698492928.0, "8630": 12698492928.0, "8635": 12698492928.0, "8640": 12698492928.0, "8645": 12698492928.0, "8650": 12698492928.0, "8655": 12698492928.0, "8660": 12698492928.0, "8665": 12698492928.0, "8670": 12698492928.0, "8675": 12698492928.0, "8680": 12698492928.0, "8685": 12698492928.0, "8690": 12698492928.0, "8695": 12698492928.0, "8700": 12698492928.0, "8705": 12698492928.0, "8710": 12698492928.0, "8715": 12698492928.0, "8720": 12698492928.0, "8725": 12698492928.0, "8730": 12698492928.0, "8735": 12698492928.0, "8740": 12698492928.0, "8745": 12698492928.0, "8750": 12698492928.0, "8755": 12698492928.0, "8760": 12698492928.0, "8765": 12698492928.0, "8770": 12698492928.0, "8775": 12698492928.0, "8780": 12698492928.0, "8785": 12698492928.0, "8790": 12698492928.0, "8795": 12698492928.0, "8800": 12698492928.0, "8805": 12698492928.0, "8810": 12698492928.0, "8815": 12698492928.0, "8820": 12698492928.0, "8825": 12698492928.0, "8830": 12698492928.0, "8835": 12698492928.0, "8840": 12698492928.0, "8845": 12698492928.0, "8850": 12698492928.0, "8855": 12698492928.0, "8860": 12698492928.0, "8865": 12698492928.0, "8870": 12698492928.0, "8875": 12698492928.0, "8880": 12698492928.0, "8885": 12698492928.0, "8890": 12698492928.0, "8895": 12698492928.0, "8900": 12698492928.0, "8905": 12698492928.0, "8910": 12698492928.0, "8915": 12698492928.0, "8920": 12698492928.0, "8925": 12698492928.0, "8930": 12698492928.0, "8935": 12698492928.0, "8940": 12698492928.0, "8945": 12698492928.0, "8950": 12698492928.0, "8955": 12698492928.0, "8960": 12698492928.0, "8965": 12698492928.0, "8970": 12698492928.0, "8975": 12698492928.0, "8980": 12698492928.0, "8985": 12698492928.0, "8990": 12698492928.0, "8995": 12698492928.0, "9000": 12698492928.0, "9005": 12698492928.0, "9010": 12698492928.0, "9015": 12698492928.0, "9020": 12698492928.0, "9025": 12698492928.0, "9030": 12698492928.0, "9035": 12698492928.0, "9040": 12698492928.0, "9045": 12698492928.0, "9050": 12698492928.0, "9055": 12698492928.0, "9060": 12698492928.0, "9065": 12698492928.0, "9070": 12698492928.0, "9075": 12698492928.0, "9080": 12698492928.0, "9085": 12698492928.0, "9090": 12698492928.0, "9095": 12698492928.0, "9100": 12698492928.0, "9105": 12698492928.0, "9110": 12698492928.0, "9115": 12698492928.0, "9120": 12698492928.0, "9125": 12698492928.0, "9130": 12698492928.0, "9135": 12698492928.0, "9140": 12698492928.0, "9145": 12698492928.0, "9150": 12698492928.0, "9155": 12698492928.0, "9160": 12698492928.0, "9165": 12698492928.0, "9170": 12698492928.0, "9175": 12698492928.0, "9180": 12698492928.0, "9185": 12698492928.0, "9190": 12698492928.0, "9195": 12698492928.0, "9200": 12698492928.0, "9205": 12698492928.0, "9210": 12698492928.0, "9215": 12698492928.0, "9220": 12698492928.0, "9225": 12698492928.0, "9230": 12698492928.0, "9235": 12698492928.0, "9240": 12698492928.0, "9245": 12698492928.0, "9250": 12698492928.0, "9255": 12698492928.0, "9260": 12698492928.0, "9265": 12698492928.0, "9270": 12698492928.0, "9275": 12698492928.0, "9280": 12698492928.0, "9285": 12698492928.0, "9290": 12698492928.0, "9295": 12698492928.0, "9300": 12698492928.0, "9305": 12698492928.0, "9310": 12698492928.0, "9315": 12698492928.0, "9320": 12698492928.0, "9325": 12698492928.0, "9330": 12698492928.0, "9335": 12698492928.0, "9340": 12698492928.0, "9345": 12698492928.0, "9350": 12698492928.0, "9355": 12698492928.0, "9360": 12698492928.0, "9365": 12698492928.0, "9370": 12698492928.0, "9375": 12698492928.0, "9380": 12698492928.0, "9385": 12698492928.0, "9390": 12698492928.0, "9395": 12698492928.0, "9400": 12698492928.0, "9405": 12698492928.0, "9410": 12698492928.0, "9415": 12698492928.0, "9420": 12698492928.0, "9425": 12698492928.0, "9430": 12698492928.0, "9435": 12698492928.0, "9440": 12698492928.0, "9445": 12698492928.0, "9450": 12698492928.0, "9455": 12698492928.0, "9460": 12698492928.0, "9465": 12698492928.0, "9470": 12698492928.0, "9475": 12698492928.0, "9480": 12698492928.0, "9485": 12698492928.0, "9490": 12698492928.0, "9495": 12698492928.0, "9500": 12698492928.0, "9505": 12698492928.0, "9510": 12698492928.0, "9515": 12698492928.0, "9520": 12698492928.0, "9525": 12698492928.0, "9530": 12698492928.0, "9535": 12698492928.0, "9540": 12698492928.0, "9545": 12698492928.0, "9550": 12698492928.0, "9555": 12698492928.0, "9560": 12698492928.0, "9565": 12698492928.0, "9570": 12698492928.0, "9575": 12698492928.0, "9580": 12698492928.0, "9585": 12698492928.0, "9590": 12698492928.0, "9595": 12698492928.0, "9600": 12698492928.0, "9605": 12698492928.0, "9610": 12698492928.0, "9615": 12698492928.0, "9620": 12698492928.0, "9625": 12698492928.0, "9630": 12698492928.0, "9635": 12698492928.0, "9640": 12698492928.0, "9645": 12698492928.0, "9650": 12698492928.0, "9655": 12698492928.0, "9660": 12698492928.0, "9665": 12698492928.0, "9670": 12698492928.0, "9675": 12698492928.0, "9680": 12698492928.0, "9685": 12698492928.0, "9690": 12698492928.0, "9695": 12698492928.0, "9700": 12698492928.0, "9705": 12698492928.0, "9710": 12698492928.0, "9715": 12698492928.0, "9720": 12698492928.0, "9725": 12698492928.0, "9730": 12698492928.0, "9735": 12698492928.0, "9740": 12698492928.0, "9745": 12698492928.0, "9750": 12698492928.0, "9755": 12698492928.0, "9760": 12698492928.0, "9765": 12698492928.0, "9770": 12698492928.0, "9775": 12698492928.0, "9780": 12698492928.0, "9785": 12698492928.0, "9790": 12698492928.0, "9795": 12698492928.0, "9800": 12698492928.0, "9805": 12698492928.0, "9810": 12698492928.0, "9815": 12698492928.0, "9820": 12698492928.0, "9825": 12698492928.0, "9830": 12698492928.0, "9835": 12698492928.0, "9840": 12698492928.0, "9845": 12698492928.0, "9850": 12698492928.0, "9855": 12698492928.0, "9860": 12698492928.0, "9865": 12698492928.0, "9870": 12698492928.0, "9875": 12698492928.0, "9880": 12698492928.0, "9885": 12698492928.0, "9890": 12698492928.0, "9895": 12698492928.0, "9900": 12698492928.0, "9905": 12698492928.0, "9910": 12698492928.0, "9915": 12698492928.0, "9920": 12698492928.0, "9925": 12698492928.0, "9930": 12698492928.0, "9935": 12698492928.0, "9940": 12698492928.0, "9945": 12698492928.0, "9950": 12698492928.0, "9955": 12698492928.0, "9960": 12698492928.0, "9965": 12698492928.0, "9970": 12698492928.0, "9975": 12698492928.0, "9980": 12698492928.0, "9985": 12698492928.0, "9990": 12698492928.0, "9995": 12698492928.0, "10000": 12698492928.0, "10005": 12698492928.0, "10010": 12698492928.0, "10015": 12698492928.0, "10020": 12698492928.0, "10025": 12698492928.0, "10030": 12698492928.0, "10035": 12698492928.0, "10040": 12698492928.0, "10045": 12698492928.0, "10050": 12698492928.0, "10055": 12698492928.0, "10060": 12698492928.0, "10065": 12698492928.0, "10070": 12698492928.0, "10075": 12698492928.0, "10080": 12698492928.0, "10085": 12698492928.0, "10090": 12698492928.0, "10095": 12698492928.0, "10100": 12698492928.0, "10105": 12698492928.0, "10110": 12698492928.0, "10115": 12698492928.0, "10120": 12698492928.0, "10125": 12698492928.0, "10130": 12698492928.0, "10135": 12698492928.0, "10140": 12698492928.0, "10145": 12698492928.0, "10150": 12698492928.0, "10155": 12698492928.0, "10160": 12698492928.0, "10165": 12698492928.0, "10170": 12698492928.0, "10175": 12698492928.0, "10180": 12698492928.0, "10185": 12698492928.0, "10190": 12698492928.0, "10195": 12698492928.0, "10200": 12698492928.0, "10205": 12698492928.0, "10210": 12698492928.0, "10215": 12698492928.0, "10220": 12698492928.0, "10225": 12698492928.0, "10230": 12698492928.0, "10235": 12698492928.0, "10240": 12698492928.0, "10245": 12698492928.0, "10250": 12698492928.0, "10255": 12698492928.0, "10260": 12698492928.0, "10265": 12698492928.0, "10270": 12698492928.0, "10275": 12698492928.0, "10280": 12698492928.0, "10285": 12698492928.0, "10290": 12698492928.0, "10295": 12698492928.0, "10300": 12698492928.0, "10305": 12698492928.0, "10310": 12698492928.0, "10315": 12698492928.0, "10320": 12698492928.0, "10325": 12698492928.0, "10330": 12698492928.0, "10335": 12698492928.0, "10340": 12698492928.0, "10345": 12698492928.0, "10350": 12698492928.0, "10355": 12698492928.0, "10360": 12698492928.0, "10365": 12698492928.0, "10370": 12698492928.0, "10375": 12698492928.0, "10380": 12698492928.0, "10385": 12698492928.0, "10390": 12698492928.0, "10395": 12698492928.0, "10400": 12698492928.0, "10405": 12698492928.0, "10410": 12698492928.0, "10415": 12698492928.0, "10420": 12698492928.0, "10425": 12698492928.0, "10430": 12698492928.0, "10435": 12698492928.0, "10440": 12698492928.0, "10445": 12698492928.0, "10450": 12698492928.0, "10455": 12698492928.0, "10460": 12698492928.0, "10465": 12698492928.0, "10470": 12698492928.0, "10475": 12698492928.0, "10480": 12698492928.0, "10485": 12698492928.0, "10490": 12698492928.0, "10495": 12698492928.0, "10500": 12698492928.0, "10505": 12698492928.0, "10510": 12698492928.0, "10515": 12698492928.0, "10520": 12698492928.0, "10525": 12698492928.0, "10530": 12698492928.0, "10535": 12698492928.0, "10540": 12698492928.0, "10545": 12698492928.0, "10550": 12698492928.0, "10555": 12698492928.0, "10560": 12698492928.0, "10565": 12698492928.0, "10570": 12698492928.0, "10575": 12698492928.0, "10580": 12698492928.0, "10585": 12698492928.0, "10590": 12698492928.0, "10595": 12698492928.0, "10600": 12698492928.0, "10605": 12698492928.0, "10610": 12698492928.0, "10615": 12698492928.0, "10620": 12698492928.0, "10625": 12698492928.0, "10630": 12698492928.0, "10635": 12698492928.0, "10640": 12698492928.0, "10645": 12698492928.0, "10650": 12698492928.0, "10655": 12698492928.0, "10660": 12698492928.0, "10665": 12698492928.0, "10670": 12698492928.0, "10675": 12698492928.0, "10680": 12698492928.0, "10685": 12698492928.0, "10690": 12698492928.0, "10695": 12698492928.0, "10700": 12698492928.0, "10705": 12698492928.0, "10710": 12698492928.0, "10715": 12698492928.0, "10720": 12698492928.0, "10725": 12698492928.0, "10730": 12698492928.0, "10735": 12698492928.0, "10740": 12698492928.0, "10745": 12698492928.0, "10750": 12698492928.0, "10755": 12698492928.0, "10760": 12698492928.0, "10765": 12698492928.0, "10770": 12698492928.0, "10775": 12698492928.0, "10780": 12698492928.0, "10785": 12698492928.0, "10790": 12698492928.0, "10795": 12698492928.0, "10800": 12698492928.0, "10805": 12698492928.0, "10810": 12698492928.0, "10815": 12698492928.0, "10820": 12698492928.0, "10825": 12698492928.0, "10830": 12698492928.0, "10835": 12698492928.0, "10840": 12698492928.0, "10845": 12698492928.0, "10850": 12698492928.0, "10855": 12698492928.0, "10860": 12698492928.0, "10865": 12698492928.0, "10870": 12698492928.0, "10875": 12698492928.0, "10880": 12698492928.0, "10885": 12698492928.0, "10890": 12698492928.0, "10895": 12698492928.0, "10900": 12698492928.0, "10905": 12698492928.0, "10910": 12698492928.0, "10915": 12698492928.0, "10920": 12698492928.0, "10925": 12698492928.0, "10930": 12698492928.0, "10935": 12698492928.0, "10940": 12698492928.0, "10945": 12698492928.0, "10950": 12698492928.0, "10955": 12698492928.0, "10960": 12698492928.0, "10965": 12698492928.0, "10970": 12698492928.0, "10975": 12698492928.0, "10980": 12698492928.0, "10985": 12698492928.0, "10990": 12698492928.0, "10995": 12698492928.0, "11000": 12698492928.0, "11005": 12698492928.0, "11010": 12698492928.0, "11015": 12698492928.0, "11020": 12698492928.0, "11025": 12698492928.0, "11030": 12698492928.0, "11035": 12698492928.0, "11040": 12698492928.0, "11045": 12698492928.0, "11050": 12698492928.0, "11055": 12698492928.0, "11060": 12698492928.0, "11065": 12698492928.0, "11070": 12698492928.0, "11075": 12698492928.0, "11080": 12698492928.0, "11085": 12698492928.0, "11090": 12698492928.0, "11095": 12698492928.0, "11100": 12698492928.0, "11105": 12698492928.0, "11110": 12698492928.0, "11115": 12698492928.0, "11120": 12698492928.0, "11125": 12698492928.0, "11130": 12698492928.0, "11135": 12698492928.0, "11140": 12698492928.0, "11145": 12698492928.0, "11150": 12698492928.0, "11155": 12698492928.0, "11160": 12698492928.0, "11165": 12698492928.0, "11170": 12698492928.0, "11175": 12698492928.0, "11180": 12698492928.0, "11185": 12698492928.0, "11190": 12698492928.0, "11195": 12698492928.0, "11200": 12698492928.0, "11205": 12698492928.0, "11210": 12698492928.0, "11215": 12698492928.0, "11220": 12698492928.0, "11225": 12698492928.0, "11230": 12698492928.0, "11235": 12698492928.0, "11240": 12698492928.0, "11245": 12698492928.0, "11250": 12698492928.0, "11255": 12698492928.0, "11260": 12698492928.0, "11265": 12698492928.0, "11270": 12698492928.0, "11275": 12698492928.0, "11280": 12698492928.0, "11285": 12698492928.0, "11290": 12698492928.0, "11295": 12698492928.0, "11300": 12698492928.0, "11305": 12698492928.0, "11310": 12698492928.0, "11315": 12698492928.0, "11320": 12698492928.0, "11325": 12698492928.0, "11330": 12698492928.0, "11335": 12698492928.0, "11340": 12698492928.0, "11345": 12698492928.0, "11350": 12698492928.0, "11355": 12698492928.0, "11360": 12698492928.0, "11365": 12698492928.0, "11370": 12698492928.0, "11375": 12698492928.0, "11380": 12698492928.0, "11385": 12698492928.0, "11390": 12698492928.0, "11395": 12698492928.0, "11400": 12698492928.0, "11405": 12698492928.0, "11410": 12698492928.0, "11415": 12698492928.0, "11420": 12698492928.0, "11425": 12698492928.0, "11430": 12698492928.0, "11435": 12698492928.0, "11440": 12698492928.0, "11445": 12698492928.0, "11450": 12698492928.0, "11455": 12698492928.0, "11460": 12698492928.0, "11465": 12698492928.0, "11470": 12698492928.0, "11475": 12698492928.0, "11480": 12698492928.0, "11485": 12698492928.0, "11490": 12698492928.0, "11495": 12698492928.0, "11500": 12698492928.0, "11505": 12698492928.0, "11510": 12698492928.0, "11515": 12698492928.0, "11520": 12698492928.0, "11525": 12698492928.0, "11530": 12698492928.0, "11535": 12698492928.0, "11540": 12698492928.0, "11545": 12698492928.0, "11550": 12698492928.0, "11555": 12698492928.0, "11560": 12698492928.0, "11565": 12698492928.0, "11570": 12698492928.0, "11575": 12698492928.0, "11580": 12698492928.0, "11585": 12698492928.0, "11590": 12698492928.0, "11595": 12698492928.0, "11600": 12698492928.0, "11605": 12698492928.0, "11610": 12698492928.0, "11615": 12698492928.0, "11620": 12698492928.0, "11625": 12698492928.0, "11630": 12698492928.0, "11635": 12698492928.0, "11640": 12698492928.0, "11645": 12698492928.0, "11650": 12698492928.0, "11655": 12698492928.0, "11660": 12698492928.0, "11665": 12698492928.0, "11670": 12698492928.0, "11675": 12698492928.0, "11680": 12698492928.0, "11685": 12698492928.0, "11690": 12698492928.0, "11695": 12698492928.0, "11700": 12698492928.0, "11705": 12698492928.0, "11710": 12698492928.0, "11715": 12698492928.0, "11720": 12698492928.0, "11725": 12698492928.0, "11730": 12698492928.0, "11735": 12698492928.0, "11740": 12698492928.0, "11745": 12698492928.0, "11750": 12698492928.0, "11755": 12698492928.0, "11760": 12698492928.0, "11765": 12698492928.0, "11770": 12698492928.0, "11775": 12698492928.0, "11780": 12698492928.0, "11785": 12698492928.0, "11790": 12698492928.0, "11795": 12698492928.0, "11800": 12698492928.0, "11805": 12698492928.0, "11810": 12698492928.0, "11815": 12698492928.0, "11820": 12698492928.0, "11825": 12698492928.0, "11830": 12698492928.0, "11835": 12698492928.0, "11840": 12698492928.0, "11845": 12698492928.0, "11850": 12698492928.0, "11855": 12698492928.0, "11860": 12698492928.0, "11865": 12698492928.0, "11870": 12698492928.0, "11875": 12698492928.0, "11880": 12698492928.0, "11885": 12698492928.0, "11890": 12698492928.0, "11895": 12698492928.0, "11900": 12698492928.0, "11905": 12698492928.0, "11910": 12698492928.0, "11915": 12698492928.0, "11920": 12698492928.0, "11925": 12698492928.0, "11930": 12698492928.0, "11935": 12698492928.0, "11940": 12698492928.0, "11945": 12698492928.0, "11950": 12698492928.0, "11955": 12698492928.0, "11960": 12698492928.0, "11965": 12698492928.0, "11970": 12698492928.0, "11975": 12698492928.0, "11980": 12698492928.0, "11985": 12698492928.0, "11990": 12698492928.0, "11995": 12698492928.0, "12000": 12698492928.0, "12005": 12698492928.0, "12010": 12698492928.0, "12015": 12698492928.0, "12020": 12698492928.0, "12025": 12698492928.0, "12030": 12698492928.0, "12035": 12698492928.0, "12040": 12698492928.0, "12045": 12698492928.0, "12050": 12698492928.0, "12055": 12698492928.0, "12060": 12698492928.0, "12065": 12698492928.0, "12070": 12698492928.0, "12075": 12698492928.0, "12080": 12698492928.0, "12085": 12698492928.0, "12090": 12698492928.0, "12095": 12698492928.0, "12100": 12698492928.0, "12105": 12698492928.0, "12110": 12698492928.0, "12115": 12698492928.0, "12120": 12698492928.0, "12125": 12698492928.0, "12130": 12698492928.0, "12135": 12698492928.0, "12140": 12698492928.0, "12145": 12698492928.0, "12150": 12698492928.0, "12155": 12698492928.0, "12160": 12698492928.0, "12165": 12698492928.0, "12170": 12698492928.0, "12175": 12698492928.0, "12180": 12698492928.0, "12185": 12698492928.0, "12190": 12698492928.0, "12195": 12698492928.0, "12200": 12698492928.0, "12205": 12698492928.0, "12210": 12698492928.0, "12215": 12698492928.0, "12220": 12698492928.0, "12225": 12698492928.0, "12230": 12698492928.0, "12235": 12698492928.0, "12240": 12698492928.0, "12245": 12698492928.0, "12250": 12698492928.0, "12255": 12698492928.0, "12260": 12698492928.0, "12265": 12698492928.0, "12270": 12698492928.0, "12275": 12698492928.0, "12280": 12698492928.0, "12285": 12698492928.0, "12290": 12698492928.0, "12295": 12698492928.0, "12300": 12698492928.0, "12305": 12698492928.0, "12310": 12698492928.0, "12315": 12698492928.0, "12320": 12698492928.0, "12325": 12698492928.0, "12330": 12698492928.0, "12335": 12698492928.0, "12340": 12698492928.0, "12345": 12698492928.0, "12350": 12698492928.0, "12355": 12698492928.0, "12360": 12698492928.0, "12365": 12698492928.0, "12370": 12698492928.0, "12375": 12698492928.0, "12380": 12698492928.0, "12385": 12698492928.0, "12390": 12698492928.0, "12395": 12698492928.0, "12400": 12698492928.0, "12405": 12698492928.0, "12410": 12698492928.0, "12415": 12698492928.0, "12420": 12698492928.0, "12425": 12698492928.0, "12430": 12698492928.0, "12435": 12698492928.0, "12440": 12698492928.0, "12445": 12698492928.0, "12450": 12698492928.0, "12455": 12698492928.0, "12460": 12698492928.0, "12465": 12698492928.0, "12470": 12698492928.0, "12475": 12698492928.0, "12480": 12698492928.0, "12485": 12698492928.0, "12490": 12698492928.0, "12495": 12698492928.0, "12500": 12698492928.0, "12505": 12698492928.0, "12510": 12698492928.0, "12515": 12698492928.0, "12520": 12698492928.0, "12525": 12698492928.0, "12530": 12698492928.0, "12535": 12698492928.0, "12540": 12698492928.0, "12545": 12698492928.0, "12550": 12698492928.0, "12555": 12698492928.0, "12560": 12698492928.0, "12565": 12698492928.0, "12570": 12698492928.0, "12575": 12698492928.0, "12580": 12698492928.0, "12585": 12698492928.0, "12590": 12698492928.0, "12595": 12698492928.0, "12600": 12698492928.0, "12605": 12698492928.0, "12610": 12698492928.0, "12615": 12698492928.0, "12620": 12698492928.0, "12625": 12698492928.0, "12630": 12698492928.0, "12635": 12698492928.0, "12640": 12698492928.0, "12645": 12698492928.0, "12650": 12698492928.0, "12655": 12698492928.0, "12660": 12698492928.0, "12665": 12698492928.0, "12670": 12698492928.0, "12675": 12698492928.0, "12680": 12698492928.0, "12685": 12698492928.0, "12690": 12698492928.0, "12695": 12698492928.0, "12700": 12698492928.0, "12705": 12698492928.0, "12710": 12698492928.0, "12715": 12698492928.0, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": 27893811200.0, "5": 28391901184.0, "10": 28391901184.0, "15": 28391901184.0, "20": 28391901184.0, "25": 28391901184.0, "30": 28391901184.0, "35": 28391901184.0, "40": 28391901184.0, "45": 28391901184.0, "50": 28391901184.0, "55": 28391901184.0, "60": 28391901184.0, "65": 28391901184.0, "70": 28391901184.0, "75": 28391901184.0, "80": 28391901184.0, "85": 28391901184.0, "90": 28391901184.0, "95": 28391901184.0, "100": 28391901184.0, "105": 28391901184.0, "110": 28391901184.0, "115": 28391901184.0, "120": 28391901184.0, "125": 28391901184.0, "130": 28391901184.0, "135": 28391901184.0, "140": 28391901184.0, "145": 28391901184.0, "150": 28391901184.0, "155": 28391901184.0, "160": 28391901184.0, "165": 28391901184.0, "170": 28391901184.0, "175": 28391901184.0, "180": 28391901184.0, "185": 28391901184.0, "190": 28391901184.0, "195": 28391901184.0, "200": 28391901184.0, "205": 28391901184.0, "210": 28391901184.0, "215": 28391901184.0, "220": 28391901184.0, "225": 28391901184.0, "230": 28391901184.0, "235": 28391901184.0, "240": 28391901184.0, "245": 28391901184.0, "250": 28391901184.0, "255": 28391901184.0, "260": 28391901184.0, "265": 28391901184.0, "270": 28391901184.0, "275": 28391901184.0, "280": 28391901184.0, "285": 28391901184.0, "290": 28391901184.0, "295": 28391901184.0, "300": 28391901184.0, "305": 28391901184.0, "310": 28391901184.0, "315": 28391901184.0, "320": 28391901184.0, "325": 28391901184.0, "330": 28391901184.0, "335": 28391901184.0, "340": 28391901184.0, "345": 28391901184.0, "350": 28391901184.0, "355": 28391901184.0, "360": 28391901184.0, "365": 28391901184.0, "370": 28391901184.0, "375": 28391901184.0, "380": 28391901184.0, "385": 28391901184.0, "390": 28391901184.0, "395": 28391901184.0, "400": 28391901184.0, "405": 28391901184.0, "410": 28391901184.0, "415": 28391901184.0, "420": 28391901184.0, "425": 28391901184.0, "430": 28391901184.0, "435": 28391901184.0, "440": 28391901184.0, "445": 28391901184.0, "450": 28391901184.0, "455": 28391901184.0, "460": 28391901184.0, "465": 28391901184.0, "470": 28391901184.0, "475": 28391901184.0, "480": 28391901184.0, "485": 28391901184.0, "490": 28391901184.0, "495": 28391901184.0, "500": 28391901184.0, "505": 28391901184.0, "510": 28391901184.0, "515": 28391901184.0, "520": 28391901184.0, "525": 28391901184.0, "530": 28391901184.0, "535": 28391901184.0, "540": 28391901184.0, "545": 28391901184.0, "550": 28391901184.0, "555": 28391901184.0, "560": 28391901184.0, "565": 28391901184.0, "570": 28391901184.0, "575": 28391901184.0, "580": 28391901184.0, "585": 28391901184.0, "590": 28391901184.0, "595": 28391901184.0, "600": 28391901184.0, "605": 28391901184.0, "610": 28391901184.0, "615": 28391901184.0, "620": 28391901184.0, "625": 28391901184.0, "630": 28391901184.0, "635": 28391901184.0, "640": 28391901184.0, "645": 28391901184.0, "650": 28391901184.0, "655": 28391901184.0, "660": 28391901184.0, "665": 28391901184.0, "670": 28391901184.0, "675": 28391901184.0, "680": 28391901184.0, "685": 28391901184.0, "690": 28391901184.0, "695": 28391901184.0, "700": 28391901184.0, "705": 28391901184.0, "710": 28391901184.0, "715": 28391901184.0, "720": 28391901184.0, "725": 28391901184.0, "730": 28391901184.0, "735": 28391901184.0, "740": 28391901184.0, "745": 28391901184.0, "750": 28391901184.0, "755": 28391901184.0, "760": 28391901184.0, "765": 28391901184.0, "770": 28391901184.0, "775": 28391901184.0, "780": 28391901184.0, "785": 28391901184.0, "790": 28391901184.0, "795": 28391901184.0, "800": 28391901184.0, "805": 28391901184.0, "810": 28391901184.0, "815": 28391901184.0, "820": 28391901184.0, "825": 28391901184.0, "830": 28391901184.0, "835": 28391901184.0, "840": 28391901184.0, "845": 28391901184.0, "850": 28391901184.0, "855": 28391901184.0, "860": 28391901184.0, "865": 28391901184.0, "870": 28391901184.0, "875": 28391901184.0, "880": 28391901184.0, "885": 28391901184.0, "890": 28391901184.0, "895": 28391901184.0, "900": 28391901184.0, "905": 28391901184.0, "910": 28391901184.0, "915": 28391901184.0, "920": 28391901184.0, "925": 28391901184.0, "930": 28391901184.0, "935": 28391901184.0, "940": 28391901184.0, "945": 28391901184.0, "950": 28391901184.0, "955": 28391901184.0, "960": 28391901184.0, "965": 28391901184.0, "970": 28391901184.0, "975": 28391901184.0, "980": 28391901184.0, "985": 28391901184.0, "990": 28391901184.0, "995": 28391901184.0, "1000": 28391901184.0, "1005": 28391901184.0, "1010": 28391901184.0, "1015": 28391901184.0, "1020": 28391901184.0, "1025": 28391901184.0, "1030": 28391901184.0, "1035": 28391901184.0, "1040": 28391901184.0, "1045": 28391901184.0, "1050": 28391901184.0, "1055": 28391901184.0, "1060": 28391901184.0, "1065": 28391901184.0, "1070": 28391901184.0, "1075": 28391901184.0, "1080": 28391901184.0, "1085": 28391901184.0, "1090": 28391901184.0, "1095": 28391901184.0, "1100": 28391901184.0, "1105": 28391901184.0, "1110": 28391901184.0, "1115": 28391901184.0, "1120": 28391901184.0, "1125": 28391901184.0, "1130": 28391901184.0, "1135": 28391901184.0, "1140": 28391901184.0, "1145": 28391901184.0, "1150": 28391901184.0, "1155": 28391901184.0, "1160": 28391901184.0, "1165": 28391901184.0, "1170": 28391901184.0, "1175": 28391901184.0, "1180": 28391901184.0, "1185": 28391901184.0, "1190": 28391901184.0, "1195": 28391901184.0, "1200": 28391901184.0, "1205": 28391901184.0, "1210": 28391901184.0, "1215": 28391901184.0, "1220": 28391901184.0, "1225": 28391901184.0, "1230": 28391901184.0, "1235": 28391901184.0, "1240": 28391901184.0, "1245": 28391901184.0, "1250": 28391901184.0, "1255": 28391901184.0, "1260": 28391901184.0, "1265": 28391901184.0, "1270": 28391901184.0, "1275": 28391901184.0, "1280": 28391901184.0, "1285": 28391901184.0, "1290": 28391901184.0, "1295": 28391901184.0, "1300": 28391901184.0, "1305": 28391901184.0, "1310": 28391901184.0, "1315": 28391901184.0, "1320": 28391901184.0, "1325": 28391901184.0, "1330": 28391901184.0, "1335": 28391901184.0, "1340": 28391901184.0, "1345": 28391901184.0, "1350": 28391901184.0, "1355": 28391901184.0, "1360": 28391901184.0, "1365": 28391901184.0, "1370": 28391901184.0, "1375": 28391901184.0, "1380": 28391901184.0, "1385": 28391901184.0, "1390": 28391901184.0, "1395": 28391901184.0, "1400": 28391901184.0, "1405": 28391901184.0, "1410": 28391901184.0, "1415": 28391901184.0, "1420": 28391901184.0, "1425": 28391901184.0, "1430": 28391901184.0, "1435": 28391901184.0, "1440": 28391901184.0, "1445": 28391901184.0, "1450": 28391901184.0, "1455": 28391901184.0, "1460": 28391901184.0, "1465": 28391901184.0, "1470": 28391901184.0, "1475": 28391901184.0, "1480": 28391901184.0, "1485": 28391901184.0, "1490": 28391901184.0, "1495": 28391901184.0, "1500": 28391901184.0, "1505": 28391901184.0, "1510": 28391901184.0, "1515": 28391901184.0, "1520": 28391901184.0, "1525": 28391901184.0, "1530": 28391901184.0, "1535": 28391901184.0, "1540": 28391901184.0, "1545": 28391901184.0, "1550": 28391901184.0, "1555": 28391901184.0, "1560": 28391901184.0, "1565": 28391901184.0, "1570": 28391901184.0, "1575": 28391901184.0, "1580": 28391901184.0, "1585": 28391901184.0, "1590": 28391901184.0, "1595": 28391901184.0, "1600": 28391901184.0, "1605": 28391901184.0, "1610": 28391901184.0, "1615": 28391901184.0, "1620": 28391901184.0, "1625": 28391901184.0, "1630": 28391901184.0, "1635": 28391901184.0, "1640": 28391901184.0, "1645": 28391901184.0, "1650": 28391901184.0, "1655": 28391901184.0, "1660": 28391901184.0, "1665": 28391901184.0, "1670": 28391901184.0, "1675": 28391901184.0, "1680": 28391901184.0, "1685": 28391901184.0, "1690": 28391901184.0, "1695": 28391901184.0, "1700": 28391901184.0, "1705": 28391901184.0, "1710": 28391901184.0, "1715": 28391901184.0, "1720": 28391901184.0, "1725": 28391901184.0, "1730": 28391901184.0, "1735": 28391901184.0, "1740": 28391901184.0, "1745": 28391901184.0, "1750": 28391901184.0, "1755": 28391901184.0, "1760": 28391901184.0, "1765": 28391901184.0, "1770": 28391901184.0, "1775": 28391901184.0, "1780": 28391901184.0, "1785": 28391901184.0, "1790": 28391901184.0, "1795": 28391901184.0, "1800": 28391901184.0, "1805": 28391901184.0, "1810": 28391901184.0, "1815": 28391901184.0, "1820": 28391901184.0, "1825": 28391901184.0, "1830": 28391901184.0, "1835": 28391901184.0, "1840": 28391901184.0, "1845": 28391901184.0, "1850": 28391901184.0, "1855": 28391901184.0, "1860": 28391901184.0, "1865": 28391901184.0, "1870": 28391901184.0, "1875": 28391901184.0, "1880": 28391901184.0, "1885": 28391901184.0, "1890": 28391901184.0, "1895": 28391901184.0, "1900": 28391901184.0, "1905": 28391901184.0, "1910": 28391901184.0, "1915": 28391901184.0, "1920": 28391901184.0, "1925": 28391901184.0, "1930": 28391901184.0, "1935": 28391901184.0, "1940": 28391901184.0, "1945": 28391901184.0, "1950": 28391901184.0, "1955": 28391901184.0, "1960": 28391901184.0, "1965": 28391901184.0, "1970": 28391901184.0, "1975": 28391901184.0, "1980": 28391901184.0, "1985": 28391901184.0, "1990": 28391901184.0, "1995": 28391901184.0, "2000": 28391901184.0, "2005": 28391901184.0, "2010": 28391901184.0, "2015": 28391901184.0, "2020": 28391901184.0, "2025": 28391901184.0, "2030": 28391901184.0, "2035": 28391901184.0, "2040": 28391901184.0, "2045": 28391901184.0, "2050": 28391901184.0, "2055": 28391901184.0, "2060": 28391901184.0, "2065": 28391901184.0, "2070": 28391901184.0, "2075": 28391901184.0, "2080": 28391901184.0, "2085": 28391901184.0, "2090": 28391901184.0, "2095": 28391901184.0, "2100": 28391901184.0, "2105": 28391901184.0, "2110": 28391901184.0, "2115": 28391901184.0, "2120": 28391901184.0, "2125": 28391901184.0, "2130": 28391901184.0, "2135": 28391901184.0, "2140": 28391901184.0, "2145": 28391901184.0, "2150": 28391901184.0, "2155": 28391901184.0, "2160": 28391901184.0, "2165": 28391901184.0, "2170": 28391901184.0, "2175": 28391901184.0, "2180": 28391901184.0, "2185": 28391901184.0, "2190": 28391901184.0, "2195": 28391901184.0, "2200": 28391901184.0, "2205": 28391901184.0, "2210": 28391901184.0, "2215": 28391901184.0, "2220": 28391901184.0, "2225": 28391901184.0, "2230": 28391901184.0, "2235": 28391901184.0, "2240": 28391901184.0, "2245": 28391901184.0, "2250": 28391901184.0, "2255": 28391901184.0, "2260": 28391901184.0, "2265": 28391901184.0, "2270": 28391901184.0, "2275": 28391901184.0, "2280": 28391901184.0, "2285": 28391901184.0, "2290": 28391901184.0, "2295": 28391901184.0, "2300": 28391901184.0, "2305": 28391901184.0, "2310": 28391901184.0, "2315": 28391901184.0, "2320": 28391901184.0, "2325": 28391901184.0, "2330": 28391901184.0, "2335": 28391901184.0, "2340": 28391901184.0, "2345": 28391901184.0, "2350": 28391901184.0, "2355": 28391901184.0, "2360": 28391901184.0, "2365": 28391901184.0, "2370": 28391901184.0, "2375": 28391901184.0, "2380": 28391901184.0, "2385": 28391901184.0, "2390": 28391901184.0, "2395": 28391901184.0, "2400": 28391901184.0, "2405": 28391901184.0, "2410": 28391901184.0, "2415": 28391901184.0, "2420": 28391901184.0, "2425": 28391901184.0, "2430": 28391901184.0, "2435": 28391901184.0, "2440": 28391901184.0, "2445": 28391901184.0, "2450": 28391901184.0, "2455": 28391901184.0, "2460": 28391901184.0, "2465": 28391901184.0, "2470": 28391901184.0, "2475": 28391901184.0, "2480": 28391901184.0, "2485": 28391901184.0, "2490": 28391901184.0, "2495": 28391901184.0, "2500": 28391901184.0, "2505": 28391901184.0, "2510": 28391901184.0, "2515": 28391901184.0, "2520": 28391901184.0, "2525": 28391901184.0, "2530": 28391901184.0, "2535": 28391901184.0, "2540": 28391901184.0, "2545": 28391901184.0, "2550": 28391901184.0, "2555": 28391901184.0, "2560": 28391901184.0, "2565": 28391901184.0, "2570": 28391901184.0, "2575": 28391901184.0, "2580": 28391901184.0, "2585": 28391901184.0, "2590": 28391901184.0, "2595": 28391901184.0, "2600": 28391901184.0, "2605": 28391901184.0, "2610": 28391901184.0, "2615": 28391901184.0, "2620": 28391901184.0, "2625": 28391901184.0, "2630": 28391901184.0, "2635": 28391901184.0, "2640": 28391901184.0, "2645": 28391901184.0, "2650": 28391901184.0, "2655": 28391901184.0, "2660": 28391901184.0, "2665": 28391901184.0, "2670": 28391901184.0, "2675": 28391901184.0, "2680": 28391901184.0, "2685": 28391901184.0, "2690": 28391901184.0, "2695": 28391901184.0, "2700": 28391901184.0, "2705": 28391901184.0, "2710": 28391901184.0, "2715": 28391901184.0, "2720": 28391901184.0, "2725": 28391901184.0, "2730": 28391901184.0, "2735": 28391901184.0, "2740": 28391901184.0, "2745": 28391901184.0, "2750": 28391901184.0, "2755": 28391901184.0, "2760": 28391901184.0, "2765": 28391901184.0, "2770": 28391901184.0, "2775": 28391901184.0, "2780": 28391901184.0, "2785": 28391901184.0, "2790": 28391901184.0, "2795": 28391901184.0, "2800": 28391901184.0, "2805": 28391901184.0, "2810": 28391901184.0, "2815": 28391901184.0, "2820": 28391901184.0, "2825": 28391901184.0, "2830": 28391901184.0, "2835": 28391901184.0, "2840": 28391901184.0, "2845": 28391901184.0, "2850": 28391901184.0, "2855": 28391901184.0, "2860": 28391901184.0, "2865": 28391901184.0, "2870": 28391901184.0, "2875": 28391901184.0, "2880": 28391901184.0, "2885": 28391901184.0, "2890": 28391901184.0, "2895": 28391901184.0, "2900": 28391901184.0, "2905": 28391901184.0, "2910": 28391901184.0, "2915": 28391901184.0, "2920": 28391901184.0, "2925": 28391901184.0, "2930": 28391901184.0, "2935": 28391901184.0, "2940": 28391901184.0, "2945": 28391901184.0, "2950": 28391901184.0, "2955": 28391901184.0, "2960": 28391901184.0, "2965": 28391901184.0, "2970": 28391901184.0, "2975": 28391901184.0, "2980": 28391901184.0, "2985": 28391901184.0, "2990": 28391901184.0, "2995": 28391901184.0, "3000": 28391901184.0, "3005": 28391901184.0, "3010": 28391901184.0, "3015": 28391901184.0, "3020": 28391901184.0, "3025": 28391901184.0, "3030": 28391901184.0, "3035": 28391901184.0, "3040": 28391901184.0, "3045": 28391901184.0, "3050": 28391901184.0, "3055": 28391901184.0, "3060": 28391901184.0, "3065": 28391901184.0, "3070": 28391901184.0, "3075": 28391901184.0, "3080": 28391901184.0, "3085": 28391901184.0, "3090": 28391901184.0, "3095": 28391901184.0, "3100": 28391901184.0, "3105": 28391901184.0, "3110": 28391901184.0, "3115": 28391901184.0, "3120": 28391901184.0, "3125": 28391901184.0, "3130": 28391901184.0, "3135": 28391901184.0, "3140": 28391901184.0, "3145": 28391901184.0, "3150": 28391901184.0, "3155": 28391901184.0, "3160": 28391901184.0, "3165": 28391901184.0, "3170": 28391901184.0, "3175": 28391901184.0, "3180": 28391901184.0, "3185": 28391901184.0, "3190": 28391901184.0, "3195": 28391901184.0, "3200": 28391901184.0, "3205": 28391901184.0, "3210": 28391901184.0, "3215": 28391901184.0, "3220": 28391901184.0, "3225": 28391901184.0, "3230": 28391901184.0, "3235": 28391901184.0, "3240": 28391901184.0, "3245": 28391901184.0, "3250": 28391901184.0, "3255": 28391901184.0, "3260": 28391901184.0, "3265": 28391901184.0, "3270": 28391901184.0, "3275": 28391901184.0, "3280": 28391901184.0, "3285": 28391901184.0, "3290": 28391901184.0, "3295": 28391901184.0, "3300": 28391901184.0, "3305": 28391901184.0, "3310": 28391901184.0, "3315": 28391901184.0, "3320": 28391901184.0, "3325": 28391901184.0, "3330": 28391901184.0, "3335": 28391901184.0, "3340": 28391901184.0, "3345": 28391901184.0, "3350": 28391901184.0, "3355": 28391901184.0, "3360": 28391901184.0, "3365": 28391901184.0, "3370": 28391901184.0, "3375": 28391901184.0, "3380": 28391901184.0, "3385": 28391901184.0, "3390": 28391901184.0, "3395": 28391901184.0, "3400": 28391901184.0, "3405": 28391901184.0, "3410": 28391901184.0, "3415": 28391901184.0, "3420": 28391901184.0, "3425": 28391901184.0, "3430": 28391901184.0, "3435": 28391901184.0, "3440": 28391901184.0, "3445": 28391901184.0, "3450": 28391901184.0, "3455": 28391901184.0, "3460": 28391901184.0, "3465": 28391901184.0, "3470": 28391901184.0, "3475": 28391901184.0, "3480": 28391901184.0, "3485": 28391901184.0, "3490": 28391901184.0, "3495": 28391901184.0, "3500": 28391901184.0, "3505": 28391901184.0, "3510": 28391901184.0, "3515": 28391901184.0, "3520": 28391901184.0, "3525": 28391901184.0, "3530": 28391901184.0, "3535": 28391901184.0, "3540": 28391901184.0, "3545": 28391901184.0, "3550": 28391901184.0, "3555": 28391901184.0, "3560": 28391901184.0, "3565": 28391901184.0, "3570": 28391901184.0, "3575": 28391901184.0, "3580": 28391901184.0, "3585": 28391901184.0, "3590": 28391901184.0, "3595": 28391901184.0, "3600": 28391901184.0, "3605": 28391901184.0, "3610": 28391901184.0, "3615": 28391901184.0, "3620": 28391901184.0, "3625": 28391901184.0, "3630": 28391901184.0, "3635": 28391901184.0, "3640": 28391901184.0, "3645": 28391901184.0, "3650": 28391712768.0, "3655": 28391712768.0, "3660": 28391712768.0, "3665": 28391712768.0, "3670": 28391712768.0, "3675": 28391712768.0, "3680": 28391712768.0, "3685": 28391712768.0, "3690": 28391712768.0, "3695": 28391712768.0, "3700": 28391712768.0, "3705": 28391712768.0, "3710": 28391712768.0, "3715": 28391712768.0, "3720": 28391712768.0, "3725": 28391712768.0, "3730": 28391712768.0, "3735": 28391712768.0, "3740": 28391712768.0, "3745": 28391712768.0, "3750": 28391712768.0, "3755": 28391712768.0, "3760": 28391712768.0, "3765": 28391712768.0, "3770": 28391712768.0, "3775": 28391712768.0, "3780": 28391712768.0, "3785": 28391712768.0, "3790": 28391712768.0, "3795": 28391712768.0, "3800": 28391712768.0, "3805": 28391712768.0, "3810": 28391712768.0, "3815": 28391712768.0, "3820": 28391712768.0, "3825": 28391712768.0, "3830": 28391712768.0, "3835": 28391712768.0, "3840": 28391712768.0, "3845": 28391712768.0, "3850": 28391712768.0, "3855": 28391712768.0, "3860": 28391712768.0, "3865": 28391712768.0, "3870": 28391712768.0, "3875": 28391712768.0, "3880": 28391712768.0, "3885": 28391712768.0, "3890": 28391712768.0, "3895": 28391712768.0, "3900": 28391712768.0, "3905": 28391712768.0, "3910": 28391712768.0, "3915": 28391712768.0, "3920": 28391712768.0, "3925": 28391712768.0, "3930": 28391712768.0, "3935": 28391712768.0, "3940": 28391712768.0, "3945": 28391712768.0, "3950": 28391712768.0, "3955": 28391712768.0, "3960": 28391712768.0, "3965": 28391712768.0, "3970": 28391712768.0, "3975": 28391712768.0, "3980": 28391712768.0, "3985": 28391712768.0, "3990": 28391712768.0, "3995": 28391712768.0, "4000": 28391712768.0, "4005": 28391712768.0, "4010": 28391712768.0, "4015": 28391712768.0, "4020": 28391712768.0, "4025": 28391712768.0, "4030": 28391712768.0, "4035": 28391712768.0, "4040": 28391712768.0, "4045": 28391712768.0, "4050": 28391712768.0, "4055": 28391712768.0, "4060": 28391712768.0, "4065": 28391712768.0, "4070": 28391712768.0, "4075": 28391712768.0, "4080": 28391712768.0, "4085": 28391712768.0, "4090": 28391712768.0, "4095": 28391712768.0, "4100": 28391712768.0, "4105": 28391712768.0, "4110": 28391712768.0, "4115": 28391712768.0, "4120": 28391712768.0, "4125": 28391712768.0, "4130": 28391712768.0, "4135": 28391712768.0, "4140": 28391712768.0, "4145": 28391712768.0, "4150": 28391712768.0, "4155": 28391712768.0, "4160": 28391712768.0, "4165": 28391712768.0, "4170": 28391712768.0, "4175": 28391712768.0, "4180": 28391712768.0, "4185": 28391712768.0, "4190": 28391712768.0, "4195": 28391712768.0, "4200": 28391712768.0, "4205": 28391712768.0, "4210": 28391712768.0, "4215": 28391712768.0, "4220": 28391712768.0, "4225": 28391712768.0, "4230": 28391712768.0, "4235": 28391712768.0, "4240": 28391712768.0, "4245": 28391712768.0, "4250": 28391712768.0, "4255": 28391712768.0, "4260": 28391712768.0, "4265": 28391712768.0, "4270": 28391712768.0, "4275": 28391712768.0, "4280": 28391712768.0, "4285": 28391712768.0, "4290": 28391712768.0, "4295": 28391712768.0, "4300": 28391712768.0, "4305": 28391712768.0, "4310": 28391712768.0, "4315": 28391712768.0, "4320": 28391712768.0, "4325": 28391712768.0, "4330": 28391712768.0, "4335": 28391712768.0, "4340": 28391712768.0, "4345": 28391712768.0, "4350": 28391712768.0, "4355": 28391712768.0, "4360": 28391712768.0, "4365": 28391712768.0, "4370": 28391712768.0, "4375": 28391712768.0, "4380": 28391712768.0, "4385": 28391712768.0, "4390": 28391712768.0, "4395": 28391712768.0, "4400": 28391712768.0, "4405": 28391712768.0, "4410": 28391712768.0, "4415": 28391712768.0, "4420": 28391712768.0, "4425": 28391712768.0, "4430": 28391712768.0, "4435": 28391712768.0, "4440": 28391712768.0, "4445": 28391712768.0, "4450": 28391712768.0, "4455": 28391712768.0, "4460": 28391712768.0, "4465": 28391712768.0, "4470": 28391712768.0, "4475": 28391712768.0, "4480": 28391712768.0, "4485": 28391712768.0, "4490": 28391712768.0, "4495": 28391712768.0, "4500": 28391712768.0, "4505": 28391712768.0, "4510": 28391712768.0, "4515": 28391712768.0, "4520": 28391712768.0, "4525": 28391712768.0, "4530": 28391712768.0, "4535": 28391712768.0, "4540": 28391712768.0, "4545": 28391712768.0, "4550": 28391712768.0, "4555": 28391712768.0, "4560": 28391712768.0, "4565": 28391712768.0, "4570": 28391712768.0, "4575": 28391712768.0, "4580": 28391712768.0, "4585": 28391712768.0, "4590": 28391712768.0, "4595": 28391712768.0, "4600": 28391712768.0, "4605": 28391712768.0, "4610": 28391712768.0, "4615": 28391712768.0, "4620": 28391712768.0, "4625": 28391712768.0, "4630": 28391712768.0, "4635": 28391712768.0, "4640": 28391712768.0, "4645": 28391712768.0, "4650": 28391712768.0, "4655": 28391712768.0, "4660": 28391712768.0, "4665": 28391712768.0, "4670": 28391712768.0, "4675": 28391712768.0, "4680": 28391712768.0, "4685": 28391712768.0, "4690": 28391712768.0, "4695": 28391712768.0, "4700": 28391712768.0, "4705": 28391712768.0, "4710": 28391712768.0, "4715": 28391712768.0, "4720": 28391712768.0, "4725": 28391712768.0, "4730": 28391712768.0, "4735": 28391712768.0, "4740": 28391712768.0, "4745": 28391712768.0, "4750": 28391712768.0, "4755": 28391712768.0, "4760": 28391712768.0, "4765": 28391712768.0, "4770": 28391712768.0, "4775": 28391712768.0, "4780": 28391712768.0, "4785": 28391712768.0, "4790": 28391712768.0, "4795": 28391712768.0, "4800": 28391712768.0, "4805": 28391712768.0, "4810": 28391712768.0, "4815": 28391712768.0, "4820": 28391712768.0, "4825": 28391712768.0, "4830": 28391712768.0, "4835": 28391712768.0, "4840": 28391712768.0, "4845": 28391712768.0, "4850": 28391712768.0, "4855": 28391712768.0, "4860": 28391712768.0, "4865": 28391712768.0, "4870": 28391712768.0, "4875": 28391712768.0, "4880": 28391712768.0, "4885": 28391712768.0, "4890": 28391712768.0, "4895": 28391712768.0, "4900": 28391712768.0, "4905": 28391712768.0, "4910": 28391712768.0, "4915": 28391712768.0, "4920": 28391712768.0, "4925": 28391712768.0, "4930": 28391712768.0, "4935": 28391712768.0, "4940": 28391712768.0, "4945": 28391712768.0, "4950": 28391712768.0, "4955": 28391712768.0, "4960": 28391712768.0, "4965": 28391712768.0, "4970": 28391712768.0, "4975": 28391712768.0, "4980": 28391712768.0, "4985": 28391712768.0, "4990": 28391712768.0, "4995": 28391712768.0, "5000": 28391712768.0, "5005": 28391712768.0, "5010": 28391712768.0, "5015": 28391712768.0, "5020": 28391712768.0, "5025": 28391712768.0, "5030": 28391712768.0, "5035": 28391712768.0, "5040": 28391712768.0, "5045": 28391712768.0, "5050": 28391712768.0, "5055": 28391712768.0, "5060": 28391712768.0, "5065": 28391712768.0, "5070": 28391712768.0, "5075": 28391712768.0, "5080": 28391712768.0, "5085": 28391712768.0, "5090": 28391712768.0, "5095": 28391712768.0, "5100": 28391712768.0, "5105": 28391712768.0, "5110": 28391712768.0, "5115": 28391712768.0, "5120": 28391712768.0, "5125": 28391712768.0, "5130": 28391712768.0, "5135": 28391712768.0, "5140": 28391712768.0, "5145": 28391712768.0, "5150": 28391712768.0, "5155": 28391712768.0, "5160": 28391712768.0, "5165": 28391712768.0, "5170": 28391712768.0, "5175": 28391712768.0, "5180": 28391712768.0, "5185": 28391712768.0, "5190": 28391712768.0, "5195": 28391712768.0, "5200": 28391712768.0, "5205": 28391712768.0, "5210": 28391712768.0, "5215": 28391712768.0, "5220": 28391712768.0, "5225": 28391712768.0, "5230": 28391712768.0, "5235": 28391712768.0, "5240": 28391712768.0, "5245": 28391712768.0, "5250": 28391712768.0, "5255": 28391712768.0, "5260": 28391712768.0, "5265": 28391712768.0, "5270": 28391712768.0, "5275": 28391712768.0, "5280": 28391712768.0, "5285": 28391712768.0, "5290": 28391712768.0, "5295": 28391712768.0, "5300": 28391712768.0, "5305": 28391712768.0, "5310": 28391712768.0, "5315": 28391712768.0, "5320": 28391712768.0, "5325": 28391712768.0, "5330": 28391712768.0, "5335": 28391712768.0, "5340": 28391712768.0, "5345": 28391712768.0, "5350": 28391712768.0, "5355": 28391712768.0, "5360": 28391712768.0, "5365": 28391712768.0, "5370": 28391712768.0, "5375": 28391712768.0, "5380": 28391712768.0, "5385": 28391712768.0, "5390": 28391712768.0, "5395": 28391712768.0, "5400": 28391712768.0, "5405": 28391712768.0, "5410": 28391712768.0, "5415": 28391712768.0, "5420": 28391712768.0, "5425": 28391712768.0, "5430": 28391712768.0, "5435": 28391712768.0, "5440": 28391712768.0, "5445": 28391712768.0, "5450": 28391712768.0, "5455": 28391712768.0, "5460": 28391712768.0, "5465": 28391712768.0, "5470": 28391712768.0, "5475": 28391712768.0, "5480": 28391712768.0, "5485": 28391712768.0, "5490": 28391712768.0, "5495": 28391712768.0, "5500": 28391712768.0, "5505": 28391712768.0, "5510": 28391712768.0, "5515": 28391712768.0, "5520": 28391712768.0, "5525": 28391712768.0, "5530": 28391712768.0, "5535": 28391712768.0, "5540": 28391712768.0, "5545": 28391712768.0, "5550": 28391712768.0, "5555": 28391712768.0, "5560": 28391712768.0, "5565": 28391712768.0, "5570": 28391712768.0, "5575": 28391712768.0, "5580": 28391712768.0, "5585": 28391712768.0, "5590": 28391712768.0, "5595": 28391712768.0, "5600": 28391712768.0, "5605": 28391712768.0, "5610": 28391712768.0, "5615": 28391712768.0, "5620": 28391712768.0, "5625": 28391712768.0, "5630": 28391712768.0, "5635": 28391712768.0, "5640": 28391712768.0, "5645": 28391712768.0, "5650": 28391712768.0, "5655": 28391712768.0, "5660": 28391712768.0, "5665": 28391712768.0, "5670": 28391712768.0, "5675": 28391712768.0, "5680": 28391712768.0, "5685": 28391712768.0, "5690": 28391712768.0, "5695": 28391712768.0, "5700": 28391712768.0, "5705": 28391712768.0, "5710": 28391712768.0, "5715": 28391712768.0, "5720": 28391712768.0, "5725": 28391712768.0, "5730": 28391712768.0, "5735": 28391712768.0, "5740": 28391712768.0, "5745": 28391712768.0, "5750": 28391712768.0, "5755": 28391712768.0, "5760": 28391712768.0, "5765": 28391712768.0, "5770": 28391712768.0, "5775": 28391712768.0, "5780": 28391712768.0, "5785": 28391712768.0, "5790": 28391712768.0, "5795": 28391712768.0, "5800": 28391712768.0, "5805": 28391712768.0, "5810": 28391712768.0, "5815": 28391712768.0, "5820": 28391712768.0, "5825": 28391712768.0, "5830": 28391712768.0, "5835": 28391712768.0, "5840": 28391712768.0, "5845": 28391712768.0, "5850": 28391712768.0, "5855": 28391712768.0, "5860": 28391712768.0, "5865": 28391712768.0, "5870": 28391712768.0, "5875": 28391712768.0, "5880": 28391712768.0, "5885": 28391712768.0, "5890": 28391712768.0, "5895": 28391712768.0, "5900": 28391712768.0, "5905": 28391712768.0, "5910": 28391712768.0, "5915": 28391712768.0, "5920": 28391712768.0, "5925": 28391712768.0, "5930": 28391712768.0, "5935": 28391712768.0, "5940": 28391712768.0, "5945": 28391712768.0, "5950": 28391712768.0, "5955": 28391712768.0, "5960": 28391712768.0, "5965": 28391712768.0, "5970": 28391712768.0, "5975": 28391712768.0, "5980": 28391712768.0, "5985": 28391712768.0, "5990": 28391712768.0, "5995": 28391712768.0, "6000": 28391712768.0, "6005": 28391712768.0, "6010": 28391712768.0, "6015": 28391712768.0, "6020": 28391712768.0, "6025": 28391712768.0, "6030": 28391712768.0, "6035": 28391712768.0, "6040": 28391712768.0, "6045": 28391712768.0, "6050": 28391712768.0, "6055": 28391712768.0, "6060": 28391712768.0, "6065": 28391712768.0, "6070": 28391712768.0, "6075": 28391712768.0, "6080": 28391712768.0, "6085": 28391712768.0, "6090": 28391712768.0, "6095": 28391712768.0, "6100": 28391712768.0, "6105": 28391712768.0, "6110": 28391712768.0, "6115": 28391712768.0, "6120": 28391712768.0, "6125": 28391712768.0, "6130": 28391712768.0, "6135": 28391712768.0, "6140": 28391712768.0, "6145": 28391712768.0, "6150": 28391712768.0, "6155": 28391712768.0, "6160": 28391712768.0, "6165": 28391712768.0, "6170": 28391712768.0, "6175": 28391712768.0, "6180": 28391712768.0, "6185": 28391712768.0, "6190": 28391712768.0, "6195": 28391712768.0, "6200": 28391712768.0, "6205": 28391712768.0, "6210": 28391712768.0, "6215": 28391712768.0, "6220": 28391712768.0, "6225": 28391712768.0, "6230": 28391712768.0, "6235": 28391712768.0, "6240": 28391712768.0, "6245": 28391712768.0, "6250": 28391712768.0, "6255": 28391712768.0, "6260": 28391712768.0, "6265": 28391712768.0, "6270": 28391712768.0, "6275": 28391712768.0, "6280": 28391712768.0, "6285": 28391712768.0, "6290": 28391712768.0, "6295": 28391712768.0, "6300": 28391712768.0, "6305": 28391712768.0, "6310": 28391712768.0, "6315": 28391712768.0, "6320": 28391712768.0, "6325": 28391712768.0, "6330": 28391712768.0, "6335": 28391712768.0, "6340": 28391712768.0, "6345": 28391712768.0, "6350": 28391712768.0, "6355": 28391712768.0, "6360": 28391712768.0, "6365": 28391712768.0, "6370": 28391712768.0, "6375": 28391712768.0, "6380": 28391712768.0, "6385": 28391712768.0, "6390": 28391712768.0, "6395": 28391712768.0, "6400": 28391712768.0, "6405": 28391712768.0, "6410": 28391712768.0, "6415": 28391712768.0, "6420": 28391712768.0, "6425": 28391712768.0, "6430": 28391712768.0, "6435": 28391712768.0, "6440": 28391712768.0, "6445": 28391712768.0, "6450": 28391712768.0, "6455": 28391712768.0, "6460": 28391712768.0, "6465": 28391712768.0, "6470": 28391712768.0, "6475": 28391712768.0, "6480": 28391712768.0, "6485": 28391712768.0, "6490": 28391712768.0, "6495": 28391712768.0, "6500": 28391712768.0, "6505": 28391712768.0, "6510": 28391712768.0, "6515": 28391712768.0, "6520": 28391712768.0, "6525": 28391712768.0, "6530": 28391712768.0, "6535": 28391712768.0, "6540": 28391712768.0, "6545": 28391712768.0, "6550": 28391712768.0, "6555": 28391712768.0, "6560": 28391712768.0, "6565": 28391712768.0, "6570": 28391712768.0, "6575": 28391712768.0, "6580": 28391712768.0, "6585": 28391712768.0, "6590": 28391712768.0, "6595": 28391712768.0, "6600": 28391712768.0, "6605": 28391712768.0, "6610": 28391712768.0, "6615": 28391712768.0, "6620": 28391712768.0, "6625": 28391712768.0, "6630": 28391712768.0, "6635": 28391712768.0, "6640": 28391712768.0, "6645": 28391712768.0, "6650": 28391712768.0, "6655": 28391712768.0, "6660": 28391712768.0, "6665": 28391712768.0, "6670": 28391712768.0, "6675": 28391712768.0, "6680": 28391712768.0, "6685": 28391712768.0, "6690": 28391712768.0, "6695": 28391712768.0, "6700": 28391712768.0, "6705": 28391712768.0, "6710": 28391712768.0, "6715": 28391712768.0, "6720": 28391712768.0, "6725": 28391712768.0, "6730": 28391712768.0, "6735": 28391712768.0, "6740": 28391712768.0, "6745": 28391712768.0, "6750": 28391712768.0, "6755": 28391712768.0, "6760": 28391712768.0, "6765": 28391712768.0, "6770": 28391712768.0, "6775": 28391712768.0, "6780": 28391712768.0, "6785": 28391712768.0, "6790": 28391712768.0, "6795": 28391712768.0, "6800": 28391712768.0, "6805": 28391712768.0, "6810": 28391712768.0, "6815": 28391712768.0, "6820": 28391712768.0, "6825": 28391712768.0, "6830": 28391712768.0, "6835": 28391712768.0, "6840": 28391712768.0, "6845": 28391712768.0, "6850": 28391712768.0, "6855": 28391712768.0, "6860": 28391712768.0, "6865": 28391712768.0, "6870": 28391712768.0, "6875": 28391712768.0, "6880": 28391712768.0, "6885": 28391712768.0, "6890": 28391712768.0, "6895": 28391712768.0, "6900": 28391712768.0, "6905": 28391712768.0, "6910": 28391712768.0, "6915": 28391712768.0, "6920": 28391712768.0, "6925": 28391712768.0, "6930": 28391712768.0, "6935": 28391712768.0, "6940": 28391712768.0, "6945": 28391712768.0, "6950": 28391712768.0, "6955": 28391712768.0, "6960": 28391712768.0, "6965": 28391712768.0, "6970": 28391712768.0, "6975": 28391712768.0, "6980": 28391712768.0, "6985": 28391712768.0, "6990": 28391712768.0, "6995": 28391712768.0, "7000": 28391712768.0, "7005": 28391712768.0, "7010": 28391712768.0, "7015": 28391712768.0, "7020": 28391712768.0, "7025": 28391712768.0, "7030": 28391712768.0, "7035": 28391712768.0, "7040": 28391712768.0, "7045": 28391712768.0, "7050": 28391712768.0, "7055": 28391712768.0, "7060": 28391712768.0, "7065": 28391712768.0, "7070": 28391712768.0, "7075": 28391712768.0, "7080": 28391712768.0, "7085": 28391712768.0, "7090": 28391712768.0, "7095": 28391712768.0, "7100": 28391712768.0, "7105": 28391712768.0, "7110": 28391712768.0, "7115": 28391712768.0, "7120": 28391712768.0, "7125": 28391712768.0, "7130": 28391712768.0, "7135": 28391712768.0, "7140": 28391712768.0, "7145": 28391712768.0, "7150": 28391712768.0, "7155": 28391712768.0, "7160": 28391712768.0, "7165": 28391712768.0, "7170": 28391712768.0, "7175": 28391712768.0, "7180": 28391712768.0, "7185": 28391712768.0, "7190": 28391712768.0, "7195": 28391712768.0, "7200": 28391712768.0, "7205": 28391712768.0, "7210": 28391712768.0, "7215": 28391712768.0, "7220": 28391712768.0, "7225": 28391712768.0, "7230": 28391712768.0, "7235": 28391712768.0, "7240": 28391712768.0, "7245": 28391712768.0, "7250": 28391712768.0, "7255": 28391712768.0, "7260": 28391712768.0, "7265": 28391712768.0, "7270": 28391712768.0, "7275": 28391712768.0, "7280": 28391712768.0, "7285": 28391712768.0, "7290": 28391712768.0, "7295": 28391712768.0, "7300": 28391712768.0, "7305": 28391712768.0, "7310": 28391712768.0, "7315": 28391712768.0, "7320": 28391712768.0, "7325": 28391712768.0, "7330": 28391712768.0, "7335": 28391712768.0, "7340": 28391712768.0, "7345": 28391712768.0, "7350": 28391712768.0, "7355": 28391712768.0, "7360": 28391712768.0, "7365": 28391712768.0, "7370": 28391712768.0, "7375": 28391712768.0, "7380": 28391712768.0, "7385": 28391712768.0, "7390": 28391712768.0, "7395": 28391712768.0, "7400": 28391712768.0, "7405": 28391712768.0, "7410": 28391712768.0, "7415": 28391712768.0, "7420": 28391712768.0, "7425": 28391712768.0, "7430": 28391712768.0, "7435": 28391712768.0, "7440": 28391712768.0, "7445": 28391712768.0, "7450": 28391712768.0, "7455": 28391712768.0, "7460": 28391712768.0, "7465": 28391712768.0, "7470": 28391712768.0, "7475": 28391712768.0, "7480": 28391712768.0, "7485": 28391712768.0, "7490": 28391712768.0, "7495": 28391712768.0, "7500": 28391712768.0, "7505": 28391712768.0, "7510": 28391712768.0, "7515": 28391712768.0, "7520": 28391712768.0, "7525": 28391712768.0, "7530": 28391712768.0, "7535": 28391712768.0, "7540": 28391712768.0, "7545": 28391712768.0, "7550": 28391712768.0, "7555": 28391712768.0, "7560": 28391712768.0, "7565": 28391712768.0, "7570": 28391712768.0, "7575": 28391712768.0, "7580": 28391712768.0, "7585": 28391712768.0, "7590": 28391712768.0, "7595": 28391712768.0, "7600": 28391712768.0, "7605": 28391712768.0, "7610": 28391712768.0, "7615": 28391712768.0, "7620": 28391712768.0, "7625": 28391712768.0, "7630": 28391712768.0, "7635": 28391712768.0, "7640": 28391712768.0, "7645": 28391712768.0, "7650": 28391712768.0, "7655": 28391712768.0, "7660": 28391712768.0, "7665": 28391712768.0, "7670": 28391712768.0, "7675": 28391712768.0, "7680": 28391712768.0, "7685": 28391712768.0, "7690": 28391712768.0, "7695": 28391712768.0, "7700": 28391712768.0, "7705": 28391712768.0, "7710": 28391712768.0, "7715": 28391712768.0, "7720": 28391712768.0, "7725": 28391712768.0, "7730": 28391712768.0, "7735": 28391712768.0, "7740": 28391712768.0, "7745": 28391712768.0, "7750": 28391712768.0, "7755": 28391712768.0, "7760": 28391712768.0, "7765": 28391712768.0, "7770": 28391712768.0, "7775": 28391712768.0, "7780": 28391712768.0, "7785": 28391712768.0, "7790": 28391712768.0, "7795": 28391712768.0, "7800": 28391712768.0, "7805": 28391712768.0, "7810": 28391712768.0, "7815": 28391712768.0, "7820": 28391712768.0, "7825": 28391712768.0, "7830": 28391712768.0, "7835": 28391712768.0, "7840": 28391712768.0, "7845": 28391712768.0, "7850": 28391712768.0, "7855": 28391712768.0, "7860": 28391712768.0, "7865": 28391712768.0, "7870": 28391712768.0, "7875": 28391712768.0, "7880": 28391712768.0, "7885": 28391712768.0, "7890": 28391712768.0, "7895": 28391712768.0, "7900": 28391712768.0, "7905": 28391712768.0, "7910": 28391712768.0, "7915": 28391712768.0, "7920": 28391712768.0, "7925": 28391712768.0, "7930": 28391712768.0, "7935": 28391712768.0, "7940": 28391712768.0, "7945": 28391712768.0, "7950": 28391712768.0, "7955": 28391712768.0, "7960": 28391712768.0, "7965": 28391712768.0, "7970": 28391712768.0, "7975": 28391712768.0, "7980": 28391712768.0, "7985": 28391712768.0, "7990": 28391712768.0, "7995": 28391712768.0, "8000": 28391712768.0, "8005": 28391712768.0, "8010": 28391712768.0, "8015": 28391712768.0, "8020": 28391712768.0, "8025": 28391712768.0, "8030": 28391712768.0, "8035": 28391712768.0, "8040": 28391712768.0, "8045": 28391712768.0, "8050": 28391712768.0, "8055": 28391712768.0, "8060": 28391712768.0, "8065": 28391712768.0, "8070": 28391712768.0, "8075": 28391712768.0, "8080": 28391712768.0, "8085": 28391712768.0, "8090": 28391712768.0, "8095": 28391712768.0, "8100": 28391712768.0, "8105": 28391712768.0, "8110": 28391712768.0, "8115": 28391712768.0, "8120": 28391712768.0, "8125": 28391712768.0, "8130": 28391712768.0, "8135": 28391712768.0, "8140": 28391712768.0, "8145": 28391712768.0, "8150": 28391712768.0, "8155": 28391712768.0, "8160": 28391712768.0, "8165": 28391712768.0, "8170": 28391712768.0, "8175": 28391712768.0, "8180": 28391712768.0, "8185": 28391712768.0, "8190": 28391712768.0, "8195": 28391712768.0, "8200": 28391712768.0, "8205": 28391712768.0, "8210": 28391712768.0, "8215": 28391712768.0, "8220": 28391712768.0, "8225": 28391712768.0, "8230": 28391712768.0, "8235": 28391712768.0, "8240": 28391712768.0, "8245": 28391712768.0, "8250": 28391712768.0, "8255": 28391712768.0, "8260": 28391712768.0, "8265": 28391712768.0, "8270": 28391712768.0, "8275": 28391712768.0, "8280": 28391712768.0, "8285": 28391712768.0, "8290": 28391712768.0, "8295": 28391712768.0, "8300": 28391712768.0, "8305": 28391712768.0, "8310": 28391712768.0, "8315": 28391712768.0, "8320": 28391712768.0, "8325": 28391712768.0, "8330": 28391712768.0, "8335": 28391712768.0, "8340": 28391712768.0, "8345": 28391712768.0, "8350": 28391712768.0, "8355": 28391712768.0, "8360": 28391712768.0, "8365": 28391712768.0, "8370": 28391712768.0, "8375": 28391712768.0, "8380": 28391712768.0, "8385": 28391712768.0, "8390": 28391712768.0, "8395": 28391712768.0, "8400": 28391712768.0, "8405": 28391712768.0, "8410": 28391712768.0, "8415": 28391712768.0, "8420": 28391712768.0, "8425": 28391712768.0, "8430": 28391712768.0, "8435": 28391712768.0, "8440": 28391712768.0, "8445": 28391712768.0, "8450": 28391712768.0, "8455": 28391712768.0, "8460": 28391712768.0, "8465": 28391712768.0, "8470": 28391712768.0, "8475": 28391712768.0, "8480": 28391712768.0, "8485": 28391712768.0, "8490": 28391712768.0, "8495": 28391712768.0, "8500": 28391712768.0, "8505": 28391712768.0, "8510": 28391712768.0, "8515": 28391712768.0, "8520": 28391712768.0, "8525": 28391712768.0, "8530": 28391712768.0, "8535": 28391712768.0, "8540": 28391712768.0, "8545": 28391712768.0, "8550": 28391712768.0, "8555": 28391712768.0, "8560": 28391712768.0, "8565": 28391712768.0, "8570": 28391712768.0, "8575": 28391712768.0, "8580": 28391712768.0, "8585": 28391712768.0, "8590": 28391712768.0, "8595": 28391712768.0, "8600": 28391712768.0, "8605": 28391712768.0, "8610": 28391712768.0, "8615": 28391712768.0, "8620": 28391712768.0, "8625": 28391712768.0, "8630": 28391712768.0, "8635": 28391712768.0, "8640": 28391712768.0, "8645": 28391712768.0, "8650": 28391712768.0, "8655": 28391712768.0, "8660": 28391712768.0, "8665": 28391712768.0, "8670": 28391712768.0, "8675": 28391712768.0, "8680": 28391712768.0, "8685": 28391712768.0, "8690": 28391712768.0, "8695": 28391712768.0, "8700": 28391712768.0, "8705": 28391712768.0, "8710": 28391712768.0, "8715": 28391712768.0, "8720": 28391712768.0, "8725": 28391712768.0, "8730": 28391712768.0, "8735": 28391712768.0, "8740": 28391712768.0, "8745": 28391712768.0, "8750": 28391712768.0, "8755": 28391712768.0, "8760": 28391712768.0, "8765": 28391712768.0, "8770": 28391712768.0, "8775": 28391712768.0, "8780": 28391712768.0, "8785": 28391712768.0, "8790": 28391712768.0, "8795": 28391712768.0, "8800": 28391712768.0, "8805": 28391712768.0, "8810": 28391712768.0, "8815": 28391712768.0, "8820": 28391712768.0, "8825": 28391712768.0, "8830": 28391712768.0, "8835": 28391712768.0, "8840": 28391712768.0, "8845": 28391712768.0, "8850": 28391712768.0, "8855": 28391712768.0, "8860": 28391712768.0, "8865": 28391712768.0, "8870": 28391712768.0, "8875": 28391712768.0, "8880": 28391712768.0, "8885": 28391712768.0, "8890": 28391712768.0, "8895": 28391712768.0, "8900": 28391712768.0, "8905": 28391712768.0, "8910": 28391712768.0, "8915": 28391712768.0, "8920": 28391712768.0, "8925": 28391712768.0, "8930": 28391712768.0, "8935": 28391712768.0, "8940": 28391712768.0, "8945": 28391712768.0, "8950": 28391712768.0, "8955": 28391712768.0, "8960": 28391712768.0, "8965": 28391712768.0, "8970": 28391712768.0, "8975": 28391712768.0, "8980": 28391712768.0, "8985": 28391712768.0, "8990": 28391712768.0, "8995": 28391712768.0, "9000": 28391712768.0, "9005": 28391712768.0, "9010": 28391712768.0, "9015": 28391712768.0, "9020": 28391712768.0, "9025": 28391712768.0, "9030": 28391712768.0, "9035": 28391712768.0, "9040": 28391712768.0, "9045": 28391712768.0, "9050": 28391712768.0, "9055": 28391712768.0, "9060": 28391712768.0, "9065": 28391712768.0, "9070": 28391712768.0, "9075": 28391712768.0, "9080": 28391712768.0, "9085": 28391712768.0, "9090": 28391712768.0, "9095": 28391712768.0, "9100": 28391712768.0, "9105": 28391712768.0, "9110": 28391712768.0, "9115": 28391712768.0, "9120": 28391712768.0, "9125": 28391712768.0, "9130": 28391712768.0, "9135": 28391712768.0, "9140": 28391712768.0, "9145": 28391712768.0, "9150": 28391712768.0, "9155": 28391712768.0, "9160": 28391712768.0, "9165": 28391712768.0, "9170": 28391712768.0, "9175": 28391712768.0, "9180": 28391712768.0, "9185": 28391712768.0, "9190": 28391712768.0, "9195": 28391712768.0, "9200": 28391712768.0, "9205": 28391712768.0, "9210": 28391712768.0, "9215": 28391712768.0, "9220": 28391712768.0, "9225": 28391712768.0, "9230": 28391712768.0, "9235": 28391712768.0, "9240": 28391712768.0, "9245": 28391712768.0, "9250": 28391712768.0, "9255": 28391712768.0, "9260": 28391712768.0, "9265": 28391712768.0, "9270": 28391712768.0, "9275": 28391712768.0, "9280": 28391712768.0, "9285": 28391712768.0, "9290": 28391712768.0, "9295": 28391712768.0, "9300": 28391712768.0, "9305": 28391712768.0, "9310": 28391712768.0, "9315": 28391712768.0, "9320": 28391712768.0, "9325": 28391712768.0, "9330": 28391712768.0, "9335": 28391712768.0, "9340": 28391712768.0, "9345": 28391712768.0, "9350": 28391712768.0, "9355": 28391712768.0, "9360": 28391712768.0, "9365": 28391712768.0, "9370": 28391712768.0, "9375": 28391712768.0, "9380": 28391712768.0, "9385": 28391712768.0, "9390": 28391712768.0, "9395": 28391712768.0, "9400": 28391712768.0, "9405": 28391712768.0, "9410": 28391712768.0, "9415": 28391712768.0, "9420": 28391712768.0, "9425": 28391712768.0, "9430": 28391712768.0, "9435": 28391712768.0, "9440": 28391712768.0, "9445": 28391712768.0, "9450": 28391712768.0, "9455": 28391712768.0, "9460": 28391712768.0, "9465": 28391712768.0, "9470": 28391712768.0, "9475": 28391712768.0, "9480": 28391712768.0, "9485": 28391712768.0, "9490": 28391712768.0, "9495": 28391712768.0, "9500": 28391712768.0, "9505": 28391712768.0, "9510": 28391712768.0, "9515": 28391712768.0, "9520": 28391712768.0, "9525": 28391712768.0, "9530": 28391712768.0, "9535": 28391712768.0, "9540": 28391712768.0, "9545": 28391712768.0, "9550": 28391712768.0, "9555": 28391712768.0, "9560": 28391712768.0, "9565": 28391712768.0, "9570": 28391712768.0, "9575": 28391712768.0, "9580": 28391712768.0, "9585": 28391712768.0, "9590": 28391712768.0, "9595": 28391712768.0, "9600": 28391712768.0, "9605": 28391712768.0, "9610": 28391712768.0, "9615": 28391712768.0, "9620": 28391712768.0, "9625": 28391712768.0, "9630": 28391712768.0, "9635": 28391712768.0, "9640": 28391712768.0, "9645": 28391712768.0, "9650": 28391712768.0, "9655": 28391712768.0, "9660": 28391712768.0, "9665": 28391712768.0, "9670": 28391712768.0, "9675": 28391712768.0, "9680": 28391712768.0, "9685": 28391712768.0, "9690": 28391712768.0, "9695": 28391712768.0, "9700": 28391712768.0, "9705": 28391712768.0, "9710": 28391712768.0, "9715": 28391712768.0, "9720": 28391712768.0, "9725": 28391712768.0, "9730": 28391712768.0, "9735": 28391712768.0, "9740": 28391712768.0, "9745": 28391712768.0, "9750": 28391712768.0, "9755": 28391712768.0, "9760": 28391712768.0, "9765": 28391712768.0, "9770": 28391712768.0, "9775": 28391712768.0, "9780": 28391712768.0, "9785": 28391712768.0, "9790": 28391712768.0, "9795": 28391712768.0, "9800": 28391712768.0, "9805": 28391712768.0, "9810": 28391712768.0, "9815": 28391712768.0, "9820": 28391712768.0, "9825": 28391712768.0, "9830": 28391712768.0, "9835": 28391712768.0, "9840": 28391712768.0, "9845": 28391712768.0, "9850": 28391712768.0, "9855": 28391712768.0, "9860": 28391712768.0, "9865": 28391712768.0, "9870": 28391712768.0, "9875": 28391712768.0, "9880": 28391712768.0, "9885": 28391712768.0, "9890": 28391712768.0, "9895": 28391712768.0, "9900": 28391712768.0, "9905": 28391712768.0, "9910": 28391712768.0, "9915": 28391712768.0, "9920": 28391712768.0, "9925": 28391712768.0, "9930": 28391712768.0, "9935": 28391712768.0, "9940": 28391712768.0, "9945": 28391712768.0, "9950": 28391712768.0, "9955": 28391712768.0, "9960": 28391712768.0, "9965": 28391712768.0, "9970": 28391712768.0, "9975": 28391712768.0, "9980": 28391712768.0, "9985": 28391712768.0, "9990": 28391712768.0, "9995": 28391712768.0, "10000": 28391712768.0, "10005": 28391712768.0, "10010": 28391712768.0, "10015": 28391712768.0, "10020": 28391712768.0, "10025": 28391712768.0, "10030": 28391712768.0, "10035": 28391712768.0, "10040": 28391712768.0, "10045": 28391712768.0, "10050": 28391712768.0, "10055": 28391712768.0, "10060": 28391712768.0, "10065": 28391712768.0, "10070": 28391712768.0, "10075": 28391712768.0, "10080": 28391712768.0, "10085": 28391712768.0, "10090": 28391712768.0, "10095": 28391712768.0, "10100": 28391712768.0, "10105": 28391712768.0, "10110": 28391712768.0, "10115": 28391712768.0, "10120": 28391712768.0, "10125": 28391712768.0, "10130": 28391712768.0, "10135": 28391712768.0, "10140": 28391712768.0, "10145": 28391712768.0, "10150": 28391712768.0, "10155": 28391712768.0, "10160": 28391712768.0, "10165": 28391712768.0, "10170": 28391712768.0, "10175": 28391712768.0, "10180": 28391712768.0, "10185": 28391712768.0, "10190": 28391712768.0, "10195": 28391712768.0, "10200": 28391712768.0, "10205": 28391712768.0, "10210": 28391712768.0, "10215": 28391712768.0, "10220": 28391712768.0, "10225": 28391712768.0, "10230": 28391712768.0, "10235": 28391712768.0, "10240": 28391712768.0, "10245": 28391712768.0, "10250": 28391712768.0, "10255": 28391712768.0, "10260": 28391712768.0, "10265": 28391712768.0, "10270": 28391712768.0, "10275": 28391712768.0, "10280": 28391712768.0, "10285": 28391712768.0, "10290": 28391712768.0, "10295": 28391712768.0, "10300": 28391712768.0, "10305": 28391712768.0, "10310": 28391712768.0, "10315": 28391712768.0, "10320": 28391712768.0, "10325": 28391712768.0, "10330": 28391712768.0, "10335": 28391712768.0, "10340": 28391712768.0, "10345": 28391712768.0, "10350": 28391712768.0, "10355": 28391712768.0, "10360": 28391712768.0, "10365": 28391712768.0, "10370": 28391712768.0, "10375": 28391712768.0, "10380": 28391712768.0, "10385": 28391712768.0, "10390": 28391712768.0, "10395": 28391712768.0, "10400": 28391712768.0, "10405": 28391712768.0, "10410": 28391712768.0, "10415": 28391712768.0, "10420": 28391712768.0, "10425": 28391712768.0, "10430": 28391712768.0, "10435": 28391712768.0, "10440": 28391712768.0, "10445": 28391712768.0, "10450": 28391712768.0, "10455": 28391712768.0, "10460": 28391712768.0, "10465": 28391712768.0, "10470": 28391712768.0, "10475": 28391712768.0, "10480": 28391712768.0, "10485": 28391712768.0, "10490": 28391712768.0, "10495": 28391712768.0, "10500": 28391712768.0, "10505": 28391712768.0, "10510": 28391712768.0, "10515": 28391712768.0, "10520": 28391712768.0, "10525": 28391712768.0, "10530": 28391712768.0, "10535": 28391712768.0, "10540": 28391712768.0, "10545": 28391712768.0, "10550": 28391712768.0, "10555": 28391712768.0, "10560": 28391712768.0, "10565": 28391712768.0, "10570": 28391712768.0, "10575": 28391712768.0, "10580": 28391712768.0, "10585": 28391712768.0, "10590": 28391712768.0, "10595": 28391712768.0, "10600": 28391712768.0, "10605": 28391712768.0, "10610": 28391712768.0, "10615": 28391712768.0, "10620": 28391712768.0, "10625": 28391712768.0, "10630": 28391712768.0, "10635": 28391712768.0, "10640": 28391712768.0, "10645": 28391712768.0, "10650": 28391712768.0, "10655": 28391712768.0, "10660": 28391712768.0, "10665": 28391712768.0, "10670": 28391712768.0, "10675": 28391712768.0, "10680": 28391712768.0, "10685": 28391712768.0, "10690": 28391712768.0, "10695": 28391712768.0, "10700": 28391712768.0, "10705": 28391712768.0, "10710": 28391712768.0, "10715": 28391712768.0, "10720": 28391712768.0, "10725": 28391712768.0, "10730": 28391712768.0, "10735": 28391712768.0, "10740": 28391712768.0, "10745": 28391712768.0, "10750": 28391712768.0, "10755": 28391712768.0, "10760": 28391712768.0, "10765": 28391712768.0, "10770": 28391712768.0, "10775": 28391712768.0, "10780": 28391712768.0, "10785": 28391712768.0, "10790": 28391712768.0, "10795": 28391712768.0, "10800": 28391712768.0, "10805": 28391712768.0, "10810": 28391712768.0, "10815": 28391712768.0, "10820": 28391712768.0, "10825": 28391712768.0, "10830": 28391712768.0, "10835": 28391712768.0, "10840": 28391712768.0, "10845": 28391712768.0, "10850": 28391712768.0, "10855": 28391712768.0, "10860": 28391712768.0, "10865": 28391712768.0, "10870": 28391712768.0, "10875": 28391712768.0, "10880": 28391712768.0, "10885": 28391712768.0, "10890": 28391712768.0, "10895": 28391712768.0, "10900": 28391712768.0, "10905": 28391712768.0, "10910": 28391712768.0, "10915": 28391712768.0, "10920": 28391712768.0, "10925": 28391712768.0, "10930": 28391712768.0, "10935": 28391712768.0, "10940": 28391712768.0, "10945": 28391712768.0, "10950": 28391712768.0, "10955": 28391712768.0, "10960": 28391712768.0, "10965": 28391712768.0, "10970": 28391712768.0, "10975": 28391712768.0, "10980": 28391712768.0, "10985": 28391712768.0, "10990": 28391712768.0, "10995": 28391712768.0, "11000": 28391712768.0, "11005": 28391712768.0, "11010": 28391712768.0, "11015": 28391712768.0, "11020": 28391712768.0, "11025": 28391712768.0, "11030": 28391712768.0, "11035": 28391712768.0, "11040": 28391712768.0, "11045": 28391712768.0, "11050": 28391712768.0, "11055": 28391712768.0, "11060": 28391712768.0, "11065": 28391712768.0, "11070": 28391712768.0, "11075": 28391712768.0, "11080": 28391712768.0, "11085": 28391712768.0, "11090": 28391712768.0, "11095": 28391712768.0, "11100": 28391712768.0, "11105": 28391712768.0, "11110": 28391712768.0, "11115": 28391712768.0, "11120": 28391712768.0, "11125": 28391712768.0, "11130": 28391712768.0, "11135": 28391712768.0, "11140": 28391712768.0, "11145": 28391712768.0, "11150": 28391712768.0, "11155": 28391712768.0, "11160": 28391712768.0, "11165": 28391712768.0, "11170": 28391712768.0, "11175": 28391712768.0, "11180": 28391712768.0, "11185": 28391712768.0, "11190": 28391712768.0, "11195": 28391712768.0, "11200": 28391712768.0, "11205": 28391712768.0, "11210": 28391712768.0, "11215": 28391712768.0, "11220": 28391712768.0, "11225": 28391712768.0, "11230": 28391712768.0, "11235": 28391712768.0, "11240": 28391712768.0, "11245": 28391712768.0, "11250": 28391712768.0, "11255": 28391712768.0, "11260": 28391712768.0, "11265": 28391712768.0, "11270": 28391712768.0, "11275": 28391712768.0, "11280": 28391712768.0, "11285": 28391712768.0, "11290": 28391712768.0, "11295": 28391712768.0, "11300": 28391712768.0, "11305": 28391712768.0, "11310": 28391712768.0, "11315": 28391712768.0, "11320": 28391712768.0, "11325": 28391712768.0, "11330": 28391712768.0, "11335": 28391712768.0, "11340": 28391712768.0, "11345": 28391712768.0, "11350": 28391712768.0, "11355": 28391712768.0, "11360": 28391712768.0, "11365": 28391712768.0, "11370": 28391712768.0, "11375": 28391712768.0, "11380": 28391712768.0, "11385": 28391712768.0, "11390": 28391712768.0, "11395": 28391712768.0, "11400": 28391712768.0, "11405": 28391712768.0, "11410": 28391712768.0, "11415": 28391712768.0, "11420": 28391712768.0, "11425": 28391712768.0, "11430": 28391712768.0, "11435": 28391712768.0, "11440": 28391712768.0, "11445": 28391712768.0, "11450": 28391712768.0, "11455": 28391712768.0, "11460": 28391712768.0, "11465": 28391712768.0, "11470": 28391712768.0, "11475": 28391712768.0, "11480": 28391712768.0, "11485": 28391712768.0, "11490": 28391712768.0, "11495": 28391712768.0, "11500": 28391712768.0, "11505": 28391712768.0, "11510": 28391712768.0, "11515": 28391712768.0, "11520": 28391712768.0, "11525": 28391712768.0, "11530": 28391712768.0, "11535": 28391712768.0, "11540": 28391712768.0, "11545": 28391712768.0, "11550": 28391712768.0, "11555": 28391712768.0, "11560": 28391712768.0, "11565": 28391712768.0, "11570": 28391712768.0, "11575": 28391712768.0, "11580": 28391712768.0, "11585": 28391712768.0, "11590": 28391712768.0, "11595": 28391712768.0, "11600": 28391712768.0, "11605": 28391712768.0, "11610": 28391712768.0, "11615": 28391712768.0, "11620": 28391712768.0, "11625": 28391712768.0, "11630": 28391712768.0, "11635": 28391712768.0, "11640": 28391712768.0, "11645": 28391712768.0, "11650": 28391712768.0, "11655": 28391712768.0, "11660": 28391712768.0, "11665": 28391712768.0, "11670": 28391712768.0, "11675": 28391712768.0, "11680": 28391712768.0, "11685": 28391712768.0, "11690": 28391712768.0, "11695": 28391712768.0, "11700": 28391712768.0, "11705": 28391712768.0, "11710": 28391712768.0, "11715": 28391712768.0, "11720": 28391712768.0, "11725": 28391712768.0, "11730": 28391712768.0, "11735": 28391712768.0, "11740": 28391712768.0, "11745": 28391712768.0, "11750": 28391712768.0, "11755": 28391712768.0, "11760": 28391712768.0, "11765": 28391712768.0, "11770": 28391712768.0, "11775": 28391712768.0, "11780": 28391712768.0, "11785": 28391712768.0, "11790": 28391712768.0, "11795": 28391712768.0, "11800": 28391712768.0, "11805": 28391712768.0, "11810": 28391712768.0, "11815": 28391712768.0, "11820": 28391712768.0, "11825": 28391712768.0, "11830": 28391712768.0, "11835": 28391712768.0, "11840": 28391712768.0, "11845": 28391712768.0, "11850": 28391712768.0, "11855": 28391712768.0, "11860": 28391712768.0, "11865": 28391712768.0, "11870": 28391712768.0, "11875": 28391712768.0, "11880": 28391712768.0, "11885": 28391712768.0, "11890": 28391712768.0, "11895": 28391712768.0, "11900": 28391712768.0, "11905": 28391712768.0, "11910": 28391712768.0, "11915": 28391712768.0, "11920": 28391712768.0, "11925": 28391712768.0, "11930": 28391712768.0, "11935": 28391712768.0, "11940": 28391712768.0, "11945": 28391712768.0, "11950": 28391712768.0, "11955": 28391712768.0, "11960": 28391712768.0, "11965": 28391712768.0, "11970": 28391712768.0, "11975": 28391712768.0, "11980": 28391712768.0, "11985": 28391712768.0, "11990": 28391712768.0, "11995": 28391712768.0, "12000": 28391712768.0, "12005": 28391712768.0, "12010": 28391712768.0, "12015": 28391712768.0, "12020": 28391712768.0, "12025": 28391712768.0, "12030": 28391712768.0, "12035": 28391712768.0, "12040": 28391712768.0, "12045": 28391712768.0, "12050": 28391712768.0, "12055": 28391712768.0, "12060": 28391712768.0, "12065": 28391712768.0, "12070": 28391712768.0, "12075": 28391712768.0, "12080": 28391712768.0, "12085": 28391712768.0, "12090": 28391712768.0, "12095": 28391712768.0, "12100": 28391712768.0, "12105": 28391712768.0, "12110": 28391712768.0, "12115": 28391712768.0, "12120": 28391712768.0, "12125": 28391712768.0, "12130": 28391712768.0, "12135": 28391712768.0, "12140": 28391712768.0, "12145": 28391712768.0, "12150": 28391712768.0, "12155": 28391712768.0, "12160": 28391712768.0, "12165": 28391712768.0, "12170": 28391712768.0, "12175": 28391712768.0, "12180": 28391712768.0, "12185": 28391712768.0, "12190": 28391712768.0, "12195": 28391712768.0, "12200": 28391712768.0, "12205": 28391712768.0, "12210": 28391712768.0, "12215": 28391712768.0, "12220": 28391712768.0, "12225": 28391712768.0, "12230": 28391712768.0, "12235": 28391712768.0, "12240": 28391712768.0, "12245": 28391712768.0, "12250": 28391712768.0, "12255": 28391712768.0, "12260": 28391712768.0, "12265": 28391712768.0, "12270": 28391712768.0, "12275": 28391712768.0, "12280": 28391712768.0, "12285": 28391712768.0, "12290": 28391712768.0, "12295": 28391712768.0, "12300": 28391712768.0, "12305": 28391712768.0, "12310": 28391712768.0, "12315": 28391712768.0, "12320": 28391712768.0, "12325": 28391712768.0, "12330": 28391712768.0, "12335": 28391712768.0, "12340": 28391712768.0, "12345": 28391712768.0, "12350": 28391712768.0, "12355": 28391712768.0, "12360": 28391712768.0, "12365": 28391712768.0, "12370": 28391712768.0, "12375": 28391712768.0, "12380": 28391712768.0, "12385": 28391712768.0, "12390": 28391712768.0, "12395": 28391712768.0, "12400": 28391712768.0, "12405": 28391712768.0, "12410": 28391712768.0, "12415": 28391712768.0, "12420": 28391712768.0, "12425": 28391712768.0, "12430": 28391712768.0, "12435": 28391712768.0, "12440": 28391712768.0, "12445": 28391712768.0, "12450": 28391712768.0, "12455": 28391712768.0, "12460": 28391712768.0, "12465": 28391712768.0, "12470": 28391712768.0, "12475": 28391712768.0, "12480": 28391712768.0, "12485": 28391712768.0, "12490": 28391712768.0, "12495": 28391712768.0, "12500": 28391712768.0, "12505": 28391712768.0, "12510": 28391712768.0, "12515": 28391712768.0, "12520": 28391712768.0, "12525": 28391712768.0, "12530": 28391712768.0, "12535": 28391712768.0, "12540": 28391712768.0, "12545": 28391712768.0, "12550": 28391712768.0, "12555": 28391712768.0, "12560": 28391712768.0, "12565": 28391712768.0, "12570": 28391712768.0, "12575": 28391712768.0, "12580": 28391712768.0, "12585": 28391712768.0, "12590": 28391712768.0, "12595": 28391712768.0, "12600": 28391712768.0, "12605": 28391712768.0, "12610": 28391712768.0, "12615": 28391712768.0, "12620": 28391712768.0, "12625": 28391712768.0, "12630": 28391712768.0, "12635": 28391712768.0, "12640": 28391712768.0, "12645": 28391712768.0, "12650": 28391712768.0, "12655": 28391712768.0, "12660": 28391712768.0, "12665": 28391712768.0, "12670": 28391712768.0, "12675": 28391712768.0, "12680": 28391712768.0, "12685": 28391712768.0, "12690": 28391712768.0, "12695": 28391712768.0, "12700": 28391712768.0, "12705": 28391712768.0, "12710": 28391712768.0, "12715": 28391712768.0, "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}, "iteration-time": {"start_step": 1, "end_step": 13000, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": "nan", "15": "nan", "20": "nan", "25": "nan", "30": "nan", "35": "nan", "40": "nan", "45": "nan", "50": "nan", "55": "nan", "60": "nan", "65": "nan", "70": "nan", "75": "nan", "80": "nan", "85": "nan", "90": "nan", "95": "nan", "100": 3.55899, "105": "nan", "110": "nan", "115": "nan", "120": "nan", "125": "nan", "130": "nan", "135": "nan", "140": "nan", "145": "nan", "150": "nan", "155": "nan", "160": "nan", "165": "nan", "170": "nan", "175": "nan", "180": "nan", "185": "nan", "190": "nan", "195": "nan", "200": 3.44029, "205": "nan", "210": "nan", "215": "nan", "220": "nan", "225": "nan", "230": "nan", "235": "nan", "240": "nan", "245": "nan", "250": "nan", "255": "nan", "260": "nan", "265": "nan", "270": "nan", "275": "nan", "280": "nan", "285": "nan", "290": "nan", "295": "nan", "300": 3.44419, "305": "nan", "310": "nan", "315": "nan", "320": "nan", "325": "nan", "330": "nan", "335": "nan", "340": "nan", "345": "nan", "350": "nan", "355": "nan", "360": "nan", "365": "nan", "370": "nan", "375": "nan", "380": "nan", "385": "nan", "390": "nan", "395": "nan", "400": 3.44595, "405": "nan", "410": "nan", "415": "nan", "420": "nan", "425": "nan", "430": "nan", "435": "nan", "440": "nan", "445": "nan", "450": "nan", "455": "nan", "460": "nan", "465": "nan", "470": "nan", "475": "nan", "480": "nan", "485": "nan", "490": "nan", "495": "nan", "500": 3.4477, "505": "nan", "510": "nan", "515": "nan", "520": "nan", "525": "nan", "530": "nan", "535": "nan", "540": "nan", "545": "nan", "550": "nan", "555": "nan", "560": "nan", "565": "nan", "570": "nan", "575": "nan", "580": "nan", "585": "nan", "590": "nan", "595": "nan", "600": 3.44601, "605": "nan", "610": "nan", "615": "nan", "620": "nan", "625": "nan", "630": "nan", "635": "nan", "640": "nan", "645": "nan", "650": "nan", "655": "nan", "660": "nan", "665": "nan", "670": "nan", "675": "nan", "680": "nan", "685": "nan", "690": "nan", "695": "nan", "700": 3.4446, "705": "nan", "710": "nan", "715": "nan", "720": "nan", "725": "nan", "730": "nan", "735": "nan", "740": "nan", "745": "nan", "750": "nan", "755": "nan", "760": "nan", "765": "nan", "770": "nan", "775": "nan", "780": "nan", "785": "nan", "790": "nan", "795": "nan", "800": 3.44149, "805": "nan", "810": "nan", "815": "nan", "820": "nan", "825": "nan", "830": "nan", "835": "nan", "840": "nan", "845": "nan", "850": "nan", "855": "nan", "860": "nan", "865": "nan", "870": "nan", "875": "nan", "880": "nan", "885": "nan", "890": "nan", "895": "nan", "900": 3.43766, "905": "nan", "910": "nan", "915": "nan", "920": "nan", "925": "nan", "930": "nan", "935": "nan", "940": "nan", "945": "nan", "950": "nan", "955": "nan", "960": "nan", "965": "nan", "970": "nan", "975": "nan", "980": "nan", "985": "nan", "990": "nan", "995": "nan", "1000": 3.43127, "1005": "nan", "1010": "nan", "1015": "nan", "1020": "nan", "1025": "nan", "1030": "nan", "1035": "nan", "1040": "nan", "1045": "nan", "1050": "nan", "1055": "nan", "1060": "nan", "1065": "nan", "1070": "nan", "1075": "nan", "1080": "nan", "1085": "nan", "1090": "nan", "1095": "nan", "1100": 3.42513, "1105": "nan", "1110": "nan", "1115": "nan", "1120": "nan", "1125": "nan", "1130": "nan", "1135": "nan", "1140": "nan", "1145": "nan", "1150": "nan", "1155": "nan", "1160": "nan", "1165": "nan", "1170": "nan", "1175": "nan", "1180": "nan", "1185": "nan", "1190": "nan", "1195": "nan", "1200": 3.41859, "1205": "nan", "1210": "nan", "1215": "nan", "1220": "nan", "1225": "nan", "1230": "nan", "1235": "nan", "1240": "nan", "1245": "nan", "1250": "nan", "1255": "nan", "1260": "nan", "1265": "nan", "1270": "nan", "1275": "nan", "1280": "nan", "1285": "nan", "1290": "nan", "1295": "nan", "1300": 3.41493, "1305": "nan", "1310": "nan", "1315": "nan", "1320": "nan", "1325": "nan", "1330": "nan", "1335": "nan", "1340": "nan", "1345": "nan", "1350": "nan", "1355": "nan", "1360": "nan", "1365": "nan", "1370": "nan", "1375": "nan", "1380": "nan", "1385": "nan", "1390": "nan", "1395": "nan", "1400": 3.41144, "1405": "nan", "1410": "nan", "1415": "nan", "1420": "nan", "1425": "nan", "1430": "nan", "1435": "nan", "1440": "nan", "1445": "nan", "1450": "nan", "1455": "nan", "1460": "nan", "1465": "nan", "1470": "nan", "1475": "nan", "1480": "nan", "1485": "nan", "1490": "nan", "1495": "nan", "1500": 3.40875, "1505": "nan", "1510": "nan", "1515": "nan", "1520": "nan", "1525": "nan", "1530": "nan", "1535": "nan", "1540": "nan", "1545": "nan", "1550": "nan", "1555": "nan", "1560": "nan", "1565": "nan", "1570": "nan", "1575": "nan", "1580": "nan", "1585": "nan", "1590": "nan", "1595": "nan", "1600": 3.40598, "1605": "nan", "1610": "nan", "1615": "nan", "1620": "nan", "1625": "nan", "1630": "nan", "1635": "nan", "1640": "nan", "1645": "nan", "1650": "nan", "1655": "nan", "1660": "nan", "1665": "nan", "1670": "nan", "1675": "nan", "1680": "nan", "1685": "nan", "1690": "nan", "1695": "nan", "1700": 3.4038, "1705": "nan", "1710": "nan", "1715": "nan", "1720": "nan", "1725": "nan", "1730": "nan", "1735": "nan", "1740": "nan", "1745": "nan", "1750": "nan", "1755": "nan", "1760": "nan", "1765": "nan", "1770": "nan", "1775": "nan", "1780": "nan", "1785": "nan", "1790": "nan", "1795": "nan", "1800": 3.40301, "1805": "nan", "1810": "nan", "1815": "nan", "1820": "nan", "1825": "nan", "1830": "nan", "1835": "nan", "1840": "nan", "1845": "nan", "1850": "nan", "1855": "nan", "1860": "nan", "1865": "nan", "1870": "nan", "1875": "nan", "1880": "nan", "1885": "nan", "1890": "nan", "1895": "nan", "1900": 3.40033, "1905": "nan", "1910": "nan", "1915": "nan", "1920": "nan", "1925": "nan", "1930": "nan", "1935": "nan", "1940": "nan", "1945": "nan", "1950": "nan", "1955": "nan", "1960": "nan", "1965": "nan", "1970": "nan", "1975": "nan", "1980": "nan", "1985": "nan", "1990": "nan", "1995": "nan", "2000": 3.3983, "2005": "nan", "2010": "nan", "2015": "nan", "2020": "nan", "2025": "nan", "2030": "nan", "2035": "nan", "2040": "nan", "2045": "nan", "2050": "nan", "2055": "nan", "2060": "nan", "2065": "nan", "2070": "nan", "2075": "nan", "2080": "nan", "2085": "nan", "2090": "nan", "2095": "nan", "2100": 3.3967, "2105": "nan", "2110": "nan", "2115": "nan", "2120": "nan", "2125": "nan", "2130": "nan", "2135": "nan", "2140": "nan", "2145": "nan", "2150": "nan", "2155": "nan", "2160": "nan", "2165": "nan", "2170": "nan", "2175": "nan", "2180": "nan", "2185": "nan", "2190": "nan", "2195": "nan", "2200": 3.39751, "2205": "nan", "2210": "nan", "2215": "nan", "2220": "nan", "2225": "nan", "2230": "nan", "2235": "nan", "2240": "nan", "2245": "nan", "2250": "nan", "2255": "nan", "2260": "nan", "2265": "nan", "2270": "nan", "2275": "nan", "2280": "nan", "2285": "nan", "2290": "nan", "2295": "nan", "2300": 3.39609, "2305": "nan", "2310": "nan", "2315": "nan", "2320": "nan", "2325": "nan", "2330": "nan", "2335": "nan", "2340": "nan", "2345": "nan", "2350": "nan", "2355": "nan", "2360": "nan", "2365": "nan", "2370": "nan", "2375": "nan", "2380": "nan", "2385": "nan", "2390": "nan", "2395": "nan", "2400": 3.39469, "2405": "nan", "2410": "nan", "2415": "nan", "2420": "nan", "2425": "nan", "2430": "nan", "2435": "nan", "2440": "nan", "2445": "nan", "2450": "nan", "2455": "nan", "2460": "nan", "2465": "nan", "2470": "nan", "2475": "nan", "2480": "nan", "2485": "nan", "2490": "nan", "2495": "nan", "2500": 3.39297, "2505": "nan", "2510": "nan", "2515": "nan", "2520": "nan", "2525": "nan", "2530": "nan", "2535": "nan", "2540": "nan", "2545": "nan", "2550": "nan", "2555": "nan", "2560": "nan", "2565": "nan", "2570": "nan", "2575": "nan", "2580": "nan", "2585": "nan", "2590": "nan", "2595": "nan", "2600": 3.39215, "2605": "nan", "2610": "nan", "2615": "nan", "2620": "nan", "2625": "nan", "2630": "nan", "2635": "nan", "2640": "nan", "2645": "nan", "2650": "nan", "2655": "nan", "2660": "nan", "2665": "nan", "2670": "nan", "2675": "nan", "2680": "nan", "2685": "nan", "2690": "nan", "2695": "nan", "2700": 3.39252, "2705": "nan", "2710": "nan", "2715": "nan", "2720": "nan", "2725": "nan", "2730": "nan", "2735": "nan", "2740": "nan", "2745": "nan", "2750": "nan", "2755": "nan", "2760": "nan", "2765": "nan", "2770": "nan", "2775": "nan", "2780": "nan", "2785": "nan", "2790": "nan", "2795": "nan", "2800": 3.39244, "2805": "nan", "2810": "nan", "2815": "nan", "2820": "nan", "2825": "nan", "2830": "nan", "2835": "nan", "2840": "nan", "2845": "nan", "2850": "nan", "2855": "nan", "2860": "nan", "2865": "nan", "2870": "nan", "2875": "nan", "2880": "nan", "2885": "nan", "2890": "nan", "2895": "nan", "2900": 3.39099, "2905": "nan", "2910": "nan", "2915": "nan", "2920": "nan", "2925": "nan", "2930": "nan", "2935": "nan", "2940": "nan", "2945": "nan", "2950": "nan", "2955": "nan", "2960": "nan", "2965": "nan", "2970": "nan", "2975": "nan", "2980": "nan", "2985": "nan", "2990": "nan", "2995": "nan", "3000": 3.39125, "3005": "nan", "3010": "nan", "3015": "nan", "3020": "nan", "3025": "nan", "3030": "nan", "3035": "nan", "3040": "nan", "3045": "nan", "3050": "nan", "3055": "nan", "3060": "nan", "3065": "nan", "3070": "nan", "3075": "nan", "3080": "nan", "3085": "nan", "3090": "nan", "3095": "nan", "3100": 3.39087, "3105": "nan", "3110": "nan", "3115": "nan", "3120": "nan", "3125": "nan", "3130": "nan", "3135": "nan", "3140": "nan", "3145": "nan", "3150": "nan", "3155": "nan", "3160": "nan", "3165": "nan", "3170": "nan", "3175": "nan", "3180": "nan", "3185": "nan", "3190": "nan", "3195": "nan", "3200": 3.3894, "3205": "nan", "3210": "nan", "3215": "nan", "3220": "nan", "3225": "nan", "3230": "nan", "3235": "nan", "3240": "nan", "3245": "nan", "3250": "nan", "3255": "nan", "3260": "nan", "3265": "nan", "3270": "nan", "3275": "nan", "3280": "nan", "3285": "nan", "3290": "nan", "3295": "nan", "3300": 3.39035, "3305": "nan", "3310": "nan", "3315": "nan", "3320": "nan", "3325": "nan", "3330": "nan", "3335": "nan", "3340": "nan", "3345": "nan", "3350": "nan", "3355": "nan", "3360": "nan", "3365": "nan", "3370": "nan", "3375": "nan", "3380": "nan", "3385": "nan", "3390": "nan", "3395": "nan", "3400": 3.39004, "3405": "nan", "3410": "nan", "3415": "nan", "3420": "nan", "3425": "nan", "3430": "nan", "3435": "nan", "3440": "nan", "3445": "nan", "3450": "nan", "3455": "nan", "3460": "nan", "3465": "nan", "3470": "nan", "3475": "nan", "3480": "nan", "3485": "nan", "3490": "nan", "3495": "nan", "3500": 3.38966, "3505": "nan", "3510": "nan", "3515": "nan", "3520": "nan", "3525": "nan", "3530": "nan", "3535": "nan", "3540": "nan", "3545": "nan", "3550": "nan", "3555": "nan", "3560": "nan", "3565": "nan", "3570": "nan", "3575": "nan", "3580": "nan", "3585": "nan", "3590": "nan", "3595": "nan", "3600": 3.38952, "3605": "nan", "3610": "nan", "3615": "nan", "3620": "nan", "3625": "nan", "3630": "nan", "3635": "nan", "3640": "nan", "3645": "nan", "3650": "nan", "3655": "nan", "3660": "nan", "3665": "nan", "3670": "nan", "3675": "nan", "3680": "nan", "3685": "nan", "3690": "nan", "3695": "nan", "3700": 3.3871, "3705": "nan", "3710": "nan", "3715": "nan", "3720": "nan", "3725": "nan", "3730": "nan", "3735": "nan", "3740": "nan", "3745": "nan", "3750": "nan", "3755": "nan", "3760": "nan", "3765": "nan", "3770": "nan", "3775": "nan", "3780": "nan", "3785": "nan", "3790": "nan", "3795": "nan", "3800": 3.38626, "3805": "nan", "3810": "nan", "3815": "nan", "3820": "nan", "3825": "nan", "3830": "nan", "3835": "nan", "3840": "nan", "3845": "nan", "3850": "nan", "3855": "nan", "3860": "nan", "3865": "nan", "3870": "nan", "3875": "nan", "3880": "nan", "3885": "nan", "3890": "nan", "3895": "nan", "3900": 3.38708, "3905": "nan", "3910": "nan", "3915": "nan", "3920": "nan", "3925": "nan", "3930": "nan", "3935": "nan", "3940": "nan", "3945": "nan", "3950": "nan", "3955": "nan", "3960": "nan", "3965": "nan", "3970": "nan", "3975": "nan", "3980": "nan", "3985": "nan", "3990": "nan", "3995": "nan", "4000": 3.38632, "4005": "nan", "4010": "nan", "4015": "nan", "4020": "nan", "4025": "nan", "4030": "nan", "4035": "nan", "4040": "nan", "4045": "nan", "4050": "nan", "4055": "nan", "4060": "nan", "4065": "nan", "4070": "nan", "4075": "nan", "4080": "nan", "4085": "nan", "4090": "nan", "4095": "nan", "4100": 3.38441, "4105": "nan", "4110": "nan", "4115": "nan", "4120": "nan", "4125": "nan", "4130": "nan", "4135": "nan", "4140": "nan", "4145": "nan", "4150": "nan", "4155": "nan", "4160": "nan", "4165": "nan", "4170": "nan", "4175": "nan", "4180": "nan", "4185": "nan", "4190": "nan", "4195": "nan", "4200": 3.38457, "4205": "nan", "4210": "nan", "4215": "nan", "4220": "nan", "4225": "nan", "4230": "nan", "4235": "nan", "4240": "nan", "4245": "nan", "4250": "nan", "4255": "nan", "4260": "nan", "4265": "nan", "4270": "nan", "4275": "nan", "4280": "nan", "4285": "nan", "4290": "nan", "4295": "nan", "4300": 3.38457, "4305": "nan", "4310": "nan", "4315": "nan", "4320": "nan", "4325": "nan", "4330": "nan", "4335": "nan", "4340": "nan", "4345": "nan", "4350": "nan", "4355": "nan", "4360": "nan", "4365": "nan", "4370": "nan", "4375": "nan", "4380": "nan", "4385": "nan", "4390": "nan", "4395": "nan", "4400": 3.38389, "4405": "nan", "4410": "nan", "4415": "nan", "4420": "nan", "4425": "nan", "4430": "nan", "4435": "nan", "4440": "nan", "4445": "nan", "4450": "nan", "4455": "nan", "4460": "nan", "4465": "nan", "4470": "nan", "4475": "nan", "4480": "nan", "4485": "nan", "4490": "nan", "4495": "nan", "4500": 3.38314, "4505": "nan", "4510": "nan", "4515": "nan", "4520": "nan", "4525": "nan", "4530": "nan", "4535": "nan", "4540": "nan", "4545": "nan", "4550": "nan", "4555": "nan", "4560": "nan", "4565": "nan", "4570": "nan", "4575": "nan", "4580": "nan", "4585": "nan", "4590": "nan", "4595": "nan", "4600": 3.38093, "4605": "nan", "4610": "nan", "4615": "nan", "4620": "nan", "4625": "nan", "4630": "nan", "4635": "nan", "4640": "nan", "4645": "nan", "4650": "nan", "4655": "nan", "4660": "nan", "4665": "nan", "4670": "nan", "4675": "nan", "4680": "nan", "4685": "nan", "4690": "nan", "4695": "nan", "4700": 3.38075, "4705": "nan", "4710": "nan", "4715": "nan", "4720": "nan", "4725": "nan", "4730": "nan", "4735": "nan", "4740": "nan", "4745": "nan", "4750": "nan", "4755": "nan", "4760": "nan", "4765": "nan", "4770": "nan", "4775": "nan", "4780": "nan", "4785": "nan", "4790": "nan", "4795": "nan", "4800": 3.38137, "4805": "nan", "4810": "nan", "4815": "nan", "4820": "nan", "4825": "nan", "4830": "nan", "4835": "nan", "4840": "nan", "4845": "nan", "4850": "nan", "4855": "nan", "4860": "nan", "4865": "nan", "4870": "nan", "4875": "nan", "4880": "nan", "4885": "nan", "4890": "nan", "4895": "nan", "4900": 3.38055, "4905": "nan", "4910": "nan", "4915": "nan", "4920": "nan", "4925": "nan", "4930": "nan", "4935": "nan", "4940": "nan", "4945": "nan", "4950": "nan", "4955": "nan", "4960": "nan", "4965": "nan", "4970": "nan", "4975": "nan", "4980": "nan", "4985": "nan", "4990": "nan", "4995": "nan", "5000": 3.38053, "5005": "nan", "5010": "nan", "5015": "nan", "5020": "nan", "5025": "nan", "5030": "nan", "5035": "nan", "5040": "nan", "5045": "nan", "5050": "nan", "5055": "nan", "5060": "nan", "5065": "nan", "5070": "nan", "5075": "nan", "5080": "nan", "5085": "nan", "5090": "nan", "5095": "nan", "5100": 3.3822, "5105": "nan", "5110": "nan", "5115": "nan", "5120": "nan", "5125": "nan", "5130": "nan", "5135": "nan", "5140": "nan", "5145": "nan", "5150": "nan", "5155": "nan", "5160": "nan", "5165": "nan", "5170": "nan", "5175": "nan", "5180": "nan", "5185": "nan", "5190": "nan", "5195": "nan", "5200": 3.38192, "5205": "nan", "5210": "nan", "5215": "nan", "5220": "nan", "5225": "nan", "5230": "nan", "5235": "nan", "5240": "nan", "5245": "nan", "5250": "nan", "5255": "nan", "5260": "nan", "5265": "nan", "5270": "nan", "5275": "nan", "5280": "nan", "5285": "nan", "5290": "nan", "5295": "nan", "5300": 3.38156, "5305": "nan", "5310": "nan", "5315": "nan", "5320": "nan", "5325": "nan", "5330": "nan", "5335": "nan", "5340": "nan", "5345": "nan", "5350": "nan", "5355": "nan", "5360": "nan", "5365": "nan", "5370": "nan", "5375": "nan", "5380": "nan", "5385": "nan", "5390": "nan", "5395": "nan", "5400": 3.38069, "5405": "nan", "5410": "nan", "5415": "nan", "5420": "nan", "5425": "nan", "5430": "nan", "5435": "nan", "5440": "nan", "5445": "nan", "5450": "nan", "5455": "nan", "5460": "nan", "5465": "nan", "5470": "nan", "5475": "nan", "5480": "nan", "5485": "nan", "5490": "nan", "5495": "nan", "5500": 3.38347, "5505": "nan", "5510": "nan", "5515": "nan", "5520": "nan", "5525": "nan", "5530": "nan", "5535": "nan", "5540": "nan", "5545": "nan", "5550": "nan", "5555": "nan", "5560": "nan", "5565": "nan", "5570": "nan", "5575": "nan", "5580": "nan", "5585": "nan", "5590": "nan", "5595": "nan", "5600": 3.38222, "5605": "nan", "5610": "nan", "5615": "nan", "5620": "nan", "5625": "nan", "5630": "nan", "5635": "nan", "5640": "nan", "5645": "nan", "5650": "nan", "5655": "nan", "5660": "nan", "5665": "nan", "5670": "nan", "5675": "nan", "5680": "nan", "5685": "nan", "5690": "nan", "5695": "nan", "5700": 3.38243, "5705": "nan", "5710": "nan", "5715": "nan", "5720": "nan", "5725": "nan", "5730": "nan", "5735": "nan", "5740": "nan", "5745": "nan", "5750": "nan", "5755": "nan", "5760": "nan", "5765": "nan", "5770": "nan", "5775": "nan", "5780": "nan", "5785": "nan", "5790": "nan", "5795": "nan", "5800": 3.38216, "5805": "nan", "5810": "nan", "5815": "nan", "5820": "nan", "5825": "nan", "5830": "nan", "5835": "nan", "5840": "nan", "5845": "nan", "5850": "nan", "5855": "nan", "5860": "nan", "5865": "nan", "5870": "nan", "5875": "nan", "5880": "nan", "5885": "nan", "5890": "nan", "5895": "nan", "5900": 3.38049, "5905": "nan", "5910": "nan", "5915": "nan", "5920": "nan", "5925": "nan", "5930": "nan", "5935": "nan", "5940": "nan", "5945": "nan", "5950": "nan", "5955": "nan", "5960": "nan", "5965": "nan", "5970": "nan", "5975": "nan", "5980": "nan", "5985": "nan", "5990": "nan", "5995": "nan", "6000": 3.38023, "6005": "nan", "6010": "nan", "6015": "nan", "6020": "nan", "6025": "nan", "6030": "nan", "6035": "nan", "6040": "nan", "6045": "nan", "6050": "nan", "6055": "nan", "6060": "nan", "6065": "nan", "6070": "nan", "6075": "nan", "6080": "nan", "6085": "nan", "6090": "nan", "6095": "nan", "6100": 3.37904, "6105": "nan", "6110": "nan", "6115": "nan", "6120": "nan", "6125": "nan", "6130": "nan", "6135": "nan", "6140": "nan", "6145": "nan", "6150": "nan", "6155": "nan", "6160": "nan", "6165": "nan", "6170": "nan", "6175": "nan", "6180": "nan", "6185": "nan", "6190": "nan", "6195": "nan", "6200": 3.38026, "6205": "nan", "6210": "nan", "6215": "nan", "6220": "nan", "6225": "nan", "6230": "nan", "6235": "nan", "6240": "nan", "6245": "nan", "6250": "nan", "6255": "nan", "6260": "nan", "6265": "nan", "6270": "nan", "6275": "nan", "6280": "nan", "6285": "nan", "6290": "nan", "6295": "nan", "6300": 3.38003, "6305": "nan", "6310": "nan", "6315": "nan", "6320": "nan", "6325": "nan", "6330": "nan", "6335": "nan", "6340": "nan", "6345": "nan", "6350": "nan", "6355": "nan", "6360": "nan", "6365": "nan", "6370": "nan", "6375": "nan", "6380": "nan", "6385": "nan", "6390": "nan", "6395": "nan", "6400": 3.37878, "6405": "nan", "6410": "nan", "6415": "nan", "6420": "nan", "6425": "nan", "6430": "nan", "6435": "nan", "6440": "nan", "6445": "nan", "6450": "nan", "6455": "nan", "6460": "nan", "6465": "nan", "6470": "nan", "6475": "nan", "6480": "nan", "6485": "nan", "6490": "nan", "6495": "nan", "6500": 3.38006, "6505": "nan", "6510": "nan", "6515": "nan", "6520": "nan", "6525": "nan", "6530": "nan", "6535": "nan", "6540": "nan", "6545": "nan", "6550": "nan", "6555": "nan", "6560": "nan", "6565": "nan", "6570": "nan", "6575": "nan", "6580": "nan", "6585": "nan", "6590": "nan", "6595": "nan", "6600": 3.38036, "6605": "nan", "6610": "nan", "6615": "nan", "6620": "nan", "6625": "nan", "6630": "nan", "6635": "nan", "6640": "nan", "6645": "nan", "6650": "nan", "6655": "nan", "6660": "nan", "6665": "nan", "6670": "nan", "6675": "nan", "6680": "nan", "6685": "nan", "6690": "nan", "6695": "nan", "6700": 3.38116, "6705": "nan", "6710": "nan", "6715": "nan", "6720": "nan", "6725": "nan", "6730": "nan", "6735": "nan", "6740": "nan", "6745": "nan", "6750": "nan", "6755": "nan", "6760": "nan", "6765": "nan", "6770": "nan", "6775": "nan", "6780": "nan", "6785": "nan", "6790": "nan", "6795": "nan", "6800": 3.39788, "6805": "nan", "6810": "nan", "6815": "nan", "6820": "nan", "6825": "nan", "6830": "nan", "6835": "nan", "6840": "nan", "6845": "nan", "6850": "nan", "6855": "nan", "6860": "nan", "6865": "nan", "6870": "nan", "6875": "nan", "6880": "nan", "6885": "nan", "6890": "nan", "6895": "nan", "6900": 3.39841, "6905": "nan", "6910": "nan", "6915": "nan", "6920": "nan", "6925": "nan", "6930": "nan", "6935": "nan", "6940": "nan", "6945": "nan", "6950": "nan", "6955": "nan", "6960": "nan", "6965": "nan", "6970": "nan", "6975": "nan", "6980": "nan", "6985": "nan", "6990": "nan", "6995": "nan", "7000": 3.39845, "7005": "nan", "7010": "nan", "7015": "nan", "7020": "nan", "7025": "nan", "7030": "nan", "7035": "nan", "7040": "nan", "7045": "nan", "7050": "nan", "7055": "nan", "7060": "nan", "7065": "nan", "7070": "nan", "7075": "nan", "7080": "nan", "7085": "nan", "7090": "nan", "7095": "nan", "7100": 3.37973, "7105": "nan", "7110": "nan", "7115": "nan", "7120": "nan", "7125": "nan", "7130": "nan", "7135": "nan", "7140": "nan", "7145": "nan", "7150": "nan", "7155": "nan", "7160": "nan", "7165": "nan", "7170": "nan", "7175": "nan", "7180": "nan", "7185": "nan", "7190": "nan", "7195": "nan", "7200": 3.3813, "7205": "nan", "7210": "nan", "7215": "nan", "7220": "nan", "7225": "nan", "7230": "nan", "7235": "nan", "7240": "nan", "7245": "nan", "7250": "nan", "7255": "nan", "7260": "nan", "7265": "nan", "7270": "nan", "7275": "nan", "7280": "nan", "7285": "nan", "7290": "nan", "7295": "nan", "7300": 3.38135, "7305": "nan", "7310": "nan", "7315": "nan", "7320": "nan", "7325": "nan", "7330": "nan", "7335": "nan", "7340": "nan", "7345": "nan", "7350": "nan", "7355": "nan", "7360": "nan", "7365": "nan", "7370": "nan", "7375": "nan", "7380": "nan", "7385": "nan", "7390": "nan", "7395": "nan", "7400": 3.38258, "7405": "nan", "7410": "nan", "7415": "nan", "7420": "nan", "7425": "nan", "7430": "nan", "7435": "nan", "7440": "nan", "7445": "nan", "7450": "nan", "7455": "nan", "7460": "nan", "7465": "nan", "7470": "nan", "7475": "nan", "7480": "nan", "7485": "nan", "7490": "nan", "7495": "nan", "7500": 3.37993, "7505": "nan", "7510": "nan", "7515": "nan", "7520": "nan", "7525": "nan", "7530": "nan", "7535": "nan", "7540": "nan", "7545": "nan", "7550": "nan", "7555": "nan", "7560": "nan", "7565": "nan", "7570": "nan", "7575": "nan", "7580": "nan", "7585": "nan", "7590": "nan", "7595": "nan", "7600": 3.3791, "7605": "nan", "7610": "nan", "7615": "nan", "7620": "nan", "7625": "nan", "7630": "nan", "7635": "nan", "7640": "nan", "7645": "nan", "7650": "nan", "7655": "nan", "7660": "nan", "7665": "nan", "7670": "nan", "7675": "nan", "7680": "nan", "7685": "nan", "7690": "nan", "7695": "nan", "7700": 3.3801, "7705": "nan", "7710": "nan", "7715": "nan", "7720": "nan", "7725": "nan", "7730": "nan", "7735": "nan", "7740": "nan", "7745": "nan", "7750": "nan", "7755": "nan", "7760": "nan", "7765": "nan", "7770": "nan", "7775": "nan", "7780": "nan", "7785": "nan", "7790": "nan", "7795": "nan", "7800": 3.40629, "7805": "nan", "7810": "nan", "7815": "nan", "7820": "nan", "7825": "nan", "7830": "nan", "7835": "nan", "7840": "nan", "7845": "nan", "7850": "nan", "7855": "nan", "7860": "nan", "7865": "nan", "7870": "nan", "7875": "nan", "7880": "nan", "7885": "nan", "7890": "nan", "7895": "nan", "7900": 3.39663, "7905": "nan", "7910": "nan", "7915": "nan", "7920": "nan", "7925": "nan", "7930": "nan", "7935": "nan", "7940": "nan", "7945": "nan", "7950": "nan", "7955": "nan", "7960": "nan", "7965": "nan", "7970": "nan", "7975": "nan", "7980": "nan", "7985": "nan", "7990": "nan", "7995": "nan", "8000": 3.39421, "8005": "nan", "8010": "nan", "8015": "nan", "8020": "nan", "8025": "nan", "8030": "nan", "8035": "nan", "8040": "nan", "8045": "nan", "8050": "nan", "8055": "nan", "8060": "nan", "8065": "nan", "8070": "nan", "8075": "nan", "8080": "nan", "8085": "nan", "8090": "nan", "8095": "nan", "8100": 3.38433, "8105": "nan", "8110": "nan", "8115": "nan", "8120": "nan", "8125": "nan", "8130": "nan", "8135": "nan", "8140": "nan", "8145": "nan", "8150": "nan", "8155": "nan", "8160": "nan", "8165": "nan", "8170": "nan", "8175": "nan", "8180": "nan", "8185": "nan", "8190": "nan", "8195": "nan", "8200": 3.38564, "8205": "nan", "8210": "nan", "8215": "nan", "8220": "nan", "8225": "nan", "8230": "nan", "8235": "nan", "8240": "nan", "8245": "nan", "8250": "nan", "8255": "nan", "8260": "nan", "8265": "nan", "8270": "nan", "8275": "nan", "8280": "nan", "8285": "nan", "8290": "nan", "8295": "nan", "8300": 3.38548, "8305": "nan", "8310": "nan", "8315": "nan", "8320": "nan", "8325": "nan", "8330": "nan", "8335": "nan", "8340": "nan", "8345": "nan", "8350": "nan", "8355": "nan", "8360": "nan", "8365": "nan", "8370": "nan", "8375": "nan", "8380": "nan", "8385": "nan", "8390": "nan", "8395": "nan", "8400": 3.38652, "8405": "nan", "8410": "nan", "8415": "nan", "8420": "nan", "8425": "nan", "8430": "nan", "8435": "nan", "8440": "nan", "8445": "nan", "8450": "nan", "8455": "nan", "8460": "nan", "8465": "nan", "8470": "nan", "8475": "nan", "8480": "nan", "8485": "nan", "8490": "nan", "8495": "nan", "8500": 3.38537, "8505": "nan", "8510": "nan", "8515": "nan", "8520": "nan", "8525": "nan", "8530": "nan", "8535": "nan", "8540": "nan", "8545": "nan", "8550": "nan", "8555": "nan", "8560": "nan", "8565": "nan", "8570": "nan", "8575": "nan", "8580": "nan", "8585": "nan", "8590": "nan", "8595": "nan", "8600": 3.38441, "8605": "nan", "8610": "nan", "8615": "nan", "8620": "nan", "8625": "nan", "8630": "nan", "8635": "nan", "8640": "nan", "8645": "nan", "8650": "nan", "8655": "nan", "8660": "nan", "8665": "nan", "8670": "nan", "8675": "nan", "8680": "nan", "8685": "nan", "8690": "nan", "8695": "nan", "8700": 3.38569, "8705": "nan", "8710": "nan", "8715": "nan", "8720": "nan", "8725": "nan", "8730": "nan", "8735": "nan", "8740": "nan", "8745": "nan", "8750": "nan", "8755": "nan", "8760": "nan", "8765": "nan", "8770": "nan", "8775": "nan", "8780": "nan", "8785": "nan", "8790": "nan", "8795": "nan", "8800": 3.38471, "8805": "nan", "8810": "nan", "8815": "nan", "8820": "nan", "8825": "nan", "8830": "nan", "8835": "nan", "8840": "nan", "8845": "nan", "8850": "nan", "8855": "nan", "8860": "nan", "8865": "nan", "8870": "nan", "8875": "nan", "8880": "nan", "8885": "nan", "8890": "nan", "8895": "nan", "8900": 3.38583, "8905": "nan", "8910": "nan", "8915": "nan", "8920": "nan", "8925": "nan", "8930": "nan", "8935": "nan", "8940": "nan", "8945": "nan", "8950": "nan", "8955": "nan", "8960": "nan", "8965": "nan", "8970": "nan", "8975": "nan", "8980": "nan", "8985": "nan", "8990": "nan", "8995": "nan", "9000": 3.38381, "9005": "nan", "9010": "nan", "9015": "nan", "9020": "nan", "9025": "nan", "9030": "nan", "9035": "nan", "9040": "nan", "9045": "nan", "9050": "nan", "9055": "nan", "9060": "nan", "9065": "nan", "9070": "nan", "9075": "nan", "9080": "nan", "9085": "nan", "9090": "nan", "9095": "nan", "9100": 3.38317, "9105": "nan", "9110": "nan", "9115": "nan", "9120": "nan", "9125": "nan", "9130": "nan", "9135": "nan", "9140": "nan", "9145": "nan", "9150": "nan", "9155": "nan", "9160": "nan", "9165": "nan", "9170": "nan", "9175": "nan", "9180": "nan", "9185": "nan", "9190": "nan", "9195": "nan", "9200": 3.3843, "9205": "nan", "9210": "nan", "9215": "nan", "9220": "nan", "9225": "nan", "9230": "nan", "9235": "nan", "9240": "nan", "9245": "nan", "9250": "nan", "9255": "nan", "9260": "nan", "9265": "nan", "9270": "nan", "9275": "nan", "9280": "nan", "9285": "nan", "9290": "nan", "9295": "nan", "9300": 3.38399, "9305": "nan", "9310": "nan", "9315": "nan", "9320": "nan", "9325": "nan", "9330": "nan", "9335": "nan", "9340": "nan", "9345": "nan", "9350": "nan", "9355": "nan", "9360": "nan", "9365": "nan", "9370": "nan", "9375": "nan", "9380": "nan", "9385": "nan", "9390": "nan", "9395": "nan", "9400": 3.38416, "9405": "nan", "9410": "nan", "9415": "nan", "9420": "nan", "9425": "nan", "9430": "nan", "9435": "nan", "9440": "nan", "9445": "nan", "9450": "nan", "9455": "nan", "9460": "nan", "9465": "nan", "9470": "nan", "9475": "nan", "9480": "nan", "9485": "nan", "9490": "nan", "9495": "nan", "9500": 3.38376, "9505": "nan", "9510": "nan", "9515": "nan", "9520": "nan", "9525": "nan", "9530": "nan", "9535": "nan", "9540": "nan", "9545": "nan", "9550": "nan", "9555": "nan", "9560": "nan", "9565": "nan", "9570": "nan", "9575": "nan", "9580": "nan", "9585": "nan", "9590": "nan", "9595": "nan", "9600": 3.38327, "9605": "nan", "9610": "nan", "9615": "nan", "9620": "nan", "9625": "nan", "9630": "nan", "9635": "nan", "9640": "nan", "9645": "nan", "9650": "nan", "9655": "nan", "9660": "nan", "9665": "nan", "9670": "nan", "9675": "nan", "9680": "nan", "9685": "nan", "9690": "nan", "9695": "nan", "9700": 3.38432, "9705": "nan", "9710": "nan", "9715": "nan", "9720": "nan", "9725": "nan", "9730": "nan", "9735": "nan", "9740": "nan", "9745": "nan", "9750": "nan", "9755": "nan", "9760": "nan", "9765": "nan", "9770": "nan", "9775": "nan", "9780": "nan", "9785": "nan", "9790": "nan", "9795": "nan", "9800": 3.38427, "9805": "nan", "9810": "nan", "9815": "nan", "9820": "nan", "9825": "nan", "9830": "nan", "9835": "nan", "9840": "nan", "9845": "nan", "9850": "nan", "9855": "nan", "9860": "nan", "9865": "nan", "9870": "nan", "9875": "nan", "9880": "nan", "9885": "nan", "9890": "nan", "9895": "nan", "9900": 3.38415, "9905": "nan", "9910": "nan", "9915": "nan", "9920": "nan", "9925": "nan", "9930": "nan", "9935": "nan", "9940": "nan", "9945": "nan", "9950": "nan", "9955": "nan", "9960": "nan", "9965": "nan", "9970": "nan", "9975": "nan", "9980": "nan", "9985": "nan", "9990": "nan", "9995": "nan", "10000": 3.38426, "10005": "nan", "10010": "nan", "10015": "nan", "10020": "nan", "10025": "nan", "10030": "nan", "10035": "nan", "10040": "nan", "10045": "nan", "10050": "nan", "10055": "nan", "10060": "nan", "10065": "nan", "10070": "nan", "10075": "nan", "10080": "nan", "10085": "nan", "10090": "nan", "10095": "nan", "10100": 3.38377, "10105": "nan", "10110": "nan", "10115": "nan", "10120": "nan", "10125": "nan", "10130": "nan", "10135": "nan", "10140": "nan", "10145": "nan", "10150": "nan", "10155": "nan", "10160": "nan", "10165": "nan", "10170": "nan", "10175": "nan", "10180": "nan", "10185": "nan", "10190": "nan", "10195": "nan", "10200": 3.38456, "10205": "nan", "10210": "nan", "10215": "nan", "10220": "nan", "10225": "nan", "10230": "nan", "10235": "nan", "10240": "nan", "10245": "nan", "10250": "nan", "10255": "nan", "10260": "nan", "10265": "nan", "10270": "nan", "10275": "nan", "10280": "nan", "10285": "nan", "10290": "nan", "10295": "nan", "10300": 3.38487, "10305": "nan", "10310": "nan", "10315": "nan", "10320": "nan", "10325": "nan", "10330": "nan", "10335": "nan", "10340": "nan", "10345": "nan", "10350": "nan", "10355": "nan", "10360": "nan", "10365": "nan", "10370": "nan", "10375": "nan", "10380": "nan", "10385": "nan", "10390": "nan", "10395": "nan", "10400": 3.38413, "10405": "nan", "10410": "nan", "10415": "nan", "10420": "nan", "10425": "nan", "10430": "nan", "10435": "nan", "10440": "nan", "10445": "nan", "10450": "nan", "10455": "nan", "10460": "nan", "10465": "nan", "10470": "nan", "10475": "nan", "10480": "nan", "10485": "nan", "10490": "nan", "10495": "nan", "10500": 3.38411, "10505": "nan", "10510": "nan", "10515": "nan", "10520": "nan", "10525": "nan", "10530": "nan", "10535": "nan", "10540": "nan", "10545": "nan", "10550": "nan", "10555": "nan", "10560": "nan", "10565": "nan", "10570": "nan", "10575": "nan", "10580": "nan", "10585": "nan", "10590": "nan", "10595": "nan", "10600": 3.38408, "10605": "nan", "10610": "nan", "10615": "nan", "10620": "nan", "10625": "nan", "10630": "nan", "10635": "nan", "10640": "nan", "10645": "nan", "10650": "nan", "10655": "nan", "10660": "nan", "10665": "nan", "10670": "nan", "10675": "nan", "10680": "nan", "10685": "nan", "10690": "nan", "10695": "nan", "10700": 3.3839, "10705": "nan", "10710": "nan", "10715": "nan", "10720": "nan", "10725": "nan", "10730": "nan", "10735": "nan", "10740": "nan", "10745": "nan", "10750": "nan", "10755": "nan", "10760": "nan", "10765": "nan", "10770": "nan", "10775": "nan", "10780": "nan", "10785": "nan", "10790": "nan", "10795": "nan", "10800": 3.38381, "10805": "nan", "10810": "nan", "10815": "nan", "10820": "nan", "10825": "nan", "10830": "nan", "10835": "nan", "10840": "nan", "10845": "nan", "10850": "nan", "10855": "nan", "10860": "nan", "10865": "nan", "10870": "nan", "10875": "nan", "10880": "nan", "10885": "nan", "10890": "nan", "10895": "nan", "10900": 3.83861, "10905": "nan", "10910": "nan", "10915": "nan", "10920": "nan", "10925": "nan", "10930": "nan", "10935": "nan", "10940": "nan", "10945": "nan", "10950": "nan", "10955": "nan", "10960": "nan", "10965": "nan", "10970": "nan", "10975": "nan", "10980": "nan", "10985": "nan", "10990": "nan", "10995": "nan", "11000": 3.39347, "11005": "nan", "11010": "nan", "11015": "nan", "11020": "nan", "11025": "nan", "11030": "nan", "11035": "nan", "11040": "nan", "11045": "nan", "11050": "nan", "11055": "nan", "11060": "nan", "11065": "nan", "11070": "nan", "11075": "nan", "11080": "nan", "11085": "nan", "11090": "nan", "11095": "nan", "11100": 3.39189, "11105": "nan", "11110": "nan", "11115": "nan", "11120": "nan", "11125": "nan", "11130": "nan", "11135": "nan", "11140": "nan", "11145": "nan", "11150": "nan", "11155": "nan", "11160": "nan", "11165": "nan", "11170": "nan", "11175": "nan", "11180": "nan", "11185": "nan", "11190": "nan", "11195": "nan", "11200": 3.3918, "11205": "nan", "11210": "nan", "11215": "nan", "11220": "nan", "11225": "nan", "11230": "nan", "11235": "nan", "11240": "nan", "11245": "nan", "11250": "nan", "11255": "nan", "11260": "nan", "11265": "nan", "11270": "nan", "11275": "nan", "11280": "nan", "11285": "nan", "11290": "nan", "11295": "nan", "11300": 3.39238, "11305": "nan", "11310": "nan", "11315": "nan", "11320": "nan", "11325": "nan", "11330": "nan", "11335": "nan", "11340": "nan", "11345": "nan", "11350": "nan", "11355": "nan", "11360": "nan", "11365": "nan", "11370": "nan", "11375": "nan", "11380": "nan", "11385": "nan", "11390": "nan", "11395": "nan", "11400": 3.39244, "11405": "nan", "11410": "nan", "11415": "nan", "11420": "nan", "11425": "nan", "11430": "nan", "11435": "nan", "11440": "nan", "11445": "nan", "11450": "nan", "11455": "nan", "11460": "nan", "11465": "nan", "11470": "nan", "11475": "nan", "11480": "nan", "11485": "nan", "11490": "nan", "11495": "nan", "11500": 3.39253, "11505": "nan", "11510": "nan", "11515": "nan", "11520": "nan", "11525": "nan", "11530": "nan", "11535": "nan", "11540": "nan", "11545": "nan", "11550": "nan", "11555": "nan", "11560": "nan", "11565": "nan", "11570": "nan", "11575": "nan", "11580": "nan", "11585": "nan", "11590": "nan", "11595": "nan", "11600": 3.39379, "11605": "nan", "11610": "nan", "11615": "nan", "11620": "nan", "11625": "nan", "11630": "nan", "11635": "nan", "11640": "nan", "11645": "nan", "11650": "nan", "11655": "nan", "11660": "nan", "11665": "nan", "11670": "nan", "11675": "nan", "11680": "nan", "11685": "nan", "11690": "nan", "11695": "nan", "11700": 3.39314, "11705": "nan", "11710": "nan", "11715": "nan", "11720": "nan", "11725": "nan", "11730": "nan", "11735": "nan", "11740": "nan", "11745": "nan", "11750": "nan", "11755": "nan", "11760": "nan", "11765": "nan", "11770": "nan", "11775": "nan", "11780": "nan", "11785": "nan", "11790": "nan", "11795": "nan", "11800": 3.41793, "11805": "nan", "11810": "nan", "11815": "nan", "11820": "nan", "11825": "nan", "11830": "nan", "11835": "nan", "11840": "nan", "11845": "nan", "11850": "nan", "11855": "nan", "11860": "nan", "11865": "nan", "11870": "nan", "11875": "nan", "11880": "nan", "11885": "nan", "11890": "nan", "11895": "nan", "11900": 3.39269, "11905": "nan", "11910": "nan", "11915": "nan", "11920": "nan", "11925": "nan", "11930": "nan", "11935": "nan", "11940": "nan", "11945": "nan", "11950": "nan", "11955": "nan", "11960": "nan", "11965": "nan", "11970": "nan", "11975": "nan", "11980": "nan", "11985": "nan", "11990": "nan", "11995": "nan", "12000": 3.39317, "12005": "nan", "12010": "nan", "12015": "nan", "12020": "nan", "12025": "nan", "12030": "nan", "12035": "nan", "12040": "nan", "12045": "nan", "12050": "nan", "12055": "nan", "12060": "nan", "12065": "nan", "12070": "nan", "12075": "nan", "12080": "nan", "12085": "nan", "12090": "nan", "12095": "nan", "12100": 3.39129, "12105": "nan", "12110": "nan", "12115": "nan", "12120": "nan", "12125": "nan", "12130": "nan", "12135": "nan", "12140": "nan", "12145": "nan", "12150": "nan", "12155": "nan", "12160": "nan", "12165": "nan", "12170": "nan", "12175": "nan", "12180": "nan", "12185": "nan", "12190": "nan", "12195": "nan", "12200": 3.3923, "12205": "nan", "12210": "nan", "12215": "nan", "12220": "nan", "12225": "nan", "12230": "nan", "12235": "nan", "12240": "nan", "12245": "nan", "12250": "nan", "12255": "nan", "12260": "nan", "12265": "nan", "12270": "nan", "12275": "nan", "12280": "nan", "12285": "nan", "12290": "nan", "12295": "nan", "12300": 3.39197, "12305": "nan", "12310": "nan", "12315": "nan", "12320": "nan", "12325": "nan", "12330": "nan", "12335": "nan", "12340": "nan", "12345": "nan", "12350": "nan", "12355": "nan", "12360": "nan", "12365": "nan", "12370": "nan", "12375": "nan", "12380": "nan", "12385": "nan", "12390": "nan", "12395": "nan", "12400": 3.39164, "12405": "nan", "12410": "nan", "12415": "nan", "12420": "nan", "12425": "nan", "12430": "nan", "12435": "nan", "12440": "nan", "12445": "nan", "12450": "nan", "12455": "nan", "12460": "nan", "12465": "nan", "12470": "nan", "12475": "nan", "12480": "nan", "12485": "nan", "12490": "nan", "12495": "nan", "12500": 3.39073, "12505": "nan", "12510": "nan", "12515": "nan", "12520": "nan", "12525": "nan", "12530": "nan", "12535": "nan", "12540": "nan", "12545": "nan", "12550": "nan", "12555": "nan", "12560": "nan", "12565": "nan", "12570": "nan", "12575": "nan", "12580": "nan", "12585": "nan", "12590": "nan", "12595": "nan", "12600": 3.39126, "12605": "nan", "12610": "nan", "12615": "nan", "12620": "nan", "12625": "nan", "12630": "nan", "12635": "nan", "12640": "nan", "12645": "nan", "12650": "nan", "12655": "nan", "12660": "nan", "12665": "nan", "12670": "nan", "12675": "nan", "12680": "nan", "12685": "nan", "12690": "nan", "12695": "nan", "12700": 3.39109, "12705": "nan", "12710": "nan", "12715": "nan", "12720": "nan", "12725": "nan", "12730": "nan", "12735": "nan", "12740": "nan", "12745": "nan", "12750": "nan", "12755": "nan", "12760": "nan", "12765": "nan", "12770": "nan", "12775": "nan", "12780": "nan", "12785": "nan", "12790": "nan", "12795": "nan", "12800": "nan", "12805": "nan", "12810": "nan", "12815": "nan", "12820": "nan", "12825": "nan", "12830": "nan", "12835": "nan", "12840": "nan", "12845": "nan", "12850": "nan", "12855": "nan", "12860": "nan", "12865": "nan", "12870": "nan", "12875": "nan", "12880": "nan", "12885": "nan", "12890": "nan", "12895": "nan", "12900": "nan", "12905": "nan", "12910": "nan", "12915": "nan", "12920": "nan", "12925": "nan", "12930": "nan", "12935": "nan", "12940": "nan", "12945": "nan", "12950": "nan", "12955": "nan", "12960": "nan", "12965": "nan", "12970": "nan", "12975": "nan", "12980": "nan", "12985": "nan", "12990": "nan", "12995": "nan", "13000": "nan"}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_15b_8t_release_sm_gb200/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
  NON_DETERMINSTIC_RESULTS: 1
  NVTE_NORM_FWD_USE_CUDNN: 1
  NVTE_NORM_BWD_USE_CUDNN: 1
  NVTE_FUSED_ATTN: 1
  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
  USE_MNNVL: 1
TEST_TYPE: "release"
MODEL_ARGS:
  # Distributed args
  --distributed-timeout-minutes: 60
  --tensor-model-parallel-size: 4
  --pipeline-model-parallel-size: 2
  --num-layers-per-virtual-pipeline-stage: 8
  --use-distributed-optimizer: true
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  --tp-comm-overlap: true
  # Training args
  --use-mcore-models: true
  --sequence-parallel: true
  --disable-bias-linear: true
  --micro-batch-size: 4
  --rampup-batch-size: "[384 384 97656250]"
  --global-batch-size: 1152
  --train-samples: 19531250
  --manual-gc: true
  --cross-entropy-loss-fusion: true
  --cross-entropy-fusion-impl: te
  # Transformer Engine args
  --transformer-impl: transformer_engine
  # Data args
  --data-cache-path: ${DATA_CACHE_PATH}
  --tokenizer-type: GPTSentencePieceTokenizer
  --tokenizer-model: ${DATA_PATH}/utils/nemotron_2_256k.model
  --data-path: $DATA_BLEND
  --split: 99,1,0
  --no-mmap-bin-files: true
  --num-workers: 6
  # Add network size args
  --apply-layernorm-1p: true
  --untie-embeddings-and-output-weights: true
  --position-embedding-type: rope
  --rotary-percent: 0.5
  --squared-relu: true
  --num-layers: 32
  --hidden-size: 6144
  --num-attention-heads: 48
  --group-query-attention: true
  --num-query-groups: 8
  --seq-length: 4096
  --max-position-embeddings: 4096
  # Add regularization args
  --attention-dropout: 0.0
  --hidden-dropout: 0.0
  --clip-grad: 1.0
  --weight-decay: 0.1
  # Add learning rate args
  --lr-decay-samples: 1949218748
  --lr-warmup-samples: 3906252
  --lr: 4.5e-4
  --min-lr: 4.5e-5
  --decoupled-lr: 5.0e-4
  --decoupled-min-lr: 4.5e-5
  --lr-decay-style: cosine
  --adam-beta1: 0.9
  --adam-beta2: 0.95
  # Add validation args
  --eval-iters: 32
  --eval-interval: 2000
  # Add checkpointing args
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --save-interval: 1000
  --save-retain-interval: 5000
  # Add initialization args
  --init-method-std: 0.0134
  # Add logging args
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  --log-num-zeros-in-grad: true
  --log-params-norm: true
  --log-validation-ppl-to-tensorboard: true
  --log-throughput: true
  --log-interval: 100
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --wandb-project: megatron-core-release-runs
  --wandb-entity: adlr
  --wandb-exp-name: ${WANDB_EXPERIMENT}
  # Add mixed precision args
  --bf16: true
  --exit-interval: 13000
  --wandb-save-dir: ${WANDB_SAVE_PATH}
  --async-save: true
  --use-persistent-ckpt-worker: true
METRICS:
  - "iteration-time"
  - "lm loss"
  - "mem-allocated-bytes"
  - "mem-max-allocated-bytes"


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 12.57354, "5": 12.58059, "10": 12.47392, "15": 11.8061, "20": 11.49685, "25": 10.98449}}, "num-zeros": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 521040608.0, "5": 520997440.0, "10": 521179744.0, "15": 521593504.0, "20": 521132800.0, "25": 523543712.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 24510808064.0, "5": 24510808064.0, "10": 24510808064.0, "15": 24510808064.0, "20": 24511029248.0, "25": 24510808064.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 52700401664.0, "5": 60489064448.0, "10": 60489064448.0, "15": 60489064448.0, "20": 60489064448.0, "25": 60489064448.0}}, "iteration-time": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": 2.9368, "15": "nan", "20": 2.94791, "25": "nan"}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 12.59654,
            "2": 12.60484,
            "3": 12.59797,
            "4": 12.5969,
            "5": 12.59289,
            "6": 12.59265,
            "7": 12.58015,
            "8": 12.54318,
            "9": 12.5105,
            "10": 12.49672,
            "11": 12.32881,
            "12": 12.29939,
            "13": 12.23473,
            "14": 12.23325,
            "15": 11.817,
            "16": 11.80134,
            "17": 11.76435,
            "18": 11.73993,
            "19": 11.60889,
            "20": 11.50648,
            "21": 11.26946,
            "22": 11.37969,
            "23": 11.28801,
            "24": 11.16331,
            "25": 10.9989
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 521038208.0,
            "2": 521665504.0,
            "3": 520934784.0,
            "4": 521226912.0,
            "5": 520995584.0,
            "6": 521371136.0,
            "7": 521420160.0,
            "8": 521056672.0,
            "9": 521461088.0,
            "10": 521178048.0,
            "11": 522280576.0,
            "12": 521439168.0,
            "13": 521475200.0,
            "14": 522446240.0,
            "15": 521590592.0,
            "16": 521416064.0,
            "17": 521027200.0,
            "18": 521279360.0,
            "19": 521153088.0,
            "20": 521134144.0,
            "21": 522908352.0,
            "22": 521591200.0,
            "23": 521351488.0,
            "24": 521424000.0,
            "25": 523543424.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 24540168192.0,
            "2": 24540168192.0,
            "3": 24540168192.0,
            "4": 24540168192.0,
            "5": 24540168192.0,
            "6": 24540168192.0,
            "7": 24540168192.0,
            "8": 24540168192.0,
            "9": 24540168192.0,
            "10": 24540168192.0,
            "11": 24540168192.0,
            "12": 24540168192.0,
            "13": 24540168192.0,
            "14": 24540168192.0,
            "15": 24540168192.0,
            "16": 24540168192.0,
            "17": 24540168192.0,
            "18": 24540168192.0,
            "19": 24540168192.0,
            "20": 24540168192.0,
            "21": 24540168192.0,
            "22": 24540168192.0,
            "23": 24540168192.0,
            "24": 24540168192.0,
            "25": 24540168192.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 52729765888.0,
            "2": 60518424576.0,
            "3": 60518424576.0,
            "4": 60518424576.0,
            "5": 60518424576.0,
            "6": 60518424576.0,
            "7": 60518424576.0,
            "8": 60518424576.0,
            "9": 60518424576.0,
            "10": 60518424576.0,
            "11": 60518424576.0,
            "12": 60518424576.0,
            "13": 60518424576.0,
            "14": 60518424576.0,
            "15": 60518424576.0,
            "16": 60518424576.0,
            "17": 60518424576.0,
            "18": 60518424576.0,
            "19": 60518424576.0,
            "20": 60518424576.0,
            "21": 60518424576.0,
            "22": 60518424576.0,
            "23": 60518424576.0,
            "24": 60518424576.0,
            "25": 60518424576.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 10.37368,
            "3": "nan",
            "4": 0.82471,
            "5": "nan",
            "6": 0.81418,
            "7": "nan",
            "8": 0.81547,
            "9": "nan",
            "10": 0.82718,
            "11": "nan",
            "12": 0.82851,
            "13": "nan",
            "14": 0.81363,
            "15": "nan",
            "16": 0.81374,
            "17": "nan",
            "18": 0.81396,
            "19": "nan",
            "20": 0.81346,
            "21": "nan",
            "22": 0.81255,
            "23": "nan",
            "24": 0.81353,
            "25": "nan"
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 12.58569,
            "2": 12.58406,
            "3": 12.58488,
            "4": 12.58632,
            "5": 12.5828,
            "6": 12.57908,
            "7": 12.56192,
            "8": 12.52305,
            "9": 12.49668,
            "10": 12.48245,
            "11": 12.31444,
            "12": 12.27191,
            "13": 12.20938,
            "14": 12.20089,
            "15": 11.79651,
            "16": 11.78043,
            "17": 11.74179,
            "18": 11.71657,
            "19": 11.59068,
            "20": 11.47674,
            "21": 11.2379,
            "22": 11.35857,
            "23": 11.25769,
            "24": 11.14081,
            "25": 10.97993
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 521034848.0,
            "2": 521662912.0,
            "3": 520932320.0,
            "4": 521225216.0,
            "5": 520992768.0,
            "6": 521369920.0,
            "7": 521417152.0,
            "8": 521055744.0,
            "9": 521459008.0,
            "10": 521175872.0,
            "11": 522276992.0,
            "12": 521435104.0,
            "13": 521472960.0,
            "14": 522443232.0,
            "15": 521589504.0,
            "16": 521413760.0,
            "17": 521026112.0,
            "18": 521279232.0,
            "19": 521152640.0,
            "20": 521132288.0,
            "21": 522908864.0,
            "22": 521591872.0,
            "23": 521353024.0,
            "24": 521427040.0,
            "25": 523546112.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 24540168192.0,
            "2": 24540168192.0,
            "3": 24540168192.0,
            "4": 24540168192.0,
            "5": 24540168192.0,
            "6": 24540168192.0,
            "7": 24540168192.0,
            "8": 24540168192.0,
            "9": 24540168192.0,
            "10": 24540168192.0,
            "11": 24540168192.0,
            "12": 24540168192.0,
            "13": 24540168192.0,
            "14": 24540168192.0,
            "15": 24540168192.0,
            "16": 24540168192.0,
            "17": 24540168192.0,
            "18": 24540168192.0,
            "19": 24540168192.0,
            "20": 24540168192.0,
            "21": 24540168192.0,
            "22": 24540168192.0,
            "23": 24540168192.0,
            "24": 24540168192.0,
            "25": 24540168192.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 52730814464.0,
            "2": 60518313984.0,
            "3": 60518313984.0,
            "4": 60518313984.0,
            "5": 60518313984.0,
            "6": 60518313984.0,
            "7": 60518313984.0,
            "8": 60518313984.0,
            "9": 60518313984.0,
            "10": 60518313984.0,
            "11": 60518313984.0,
            "12": 60518313984.0,
            "13": 60518313984.0,
            "14": 60518313984.0,
            "15": 60518313984.0,
            "16": 60518313984.0,
            "17": 60518313984.0,
            "18": 60518313984.0,
            "19": 60518313984.0,
            "20": 60518313984.0,
            "21": 60518313984.0,
            "22": 60518313984.0,
            "23": 60518313984.0,
            "24": 60518313984.0,
            "25": 60518313984.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 9.40588,
            "3": "nan",
            "4": 1.14216,
            "5": "nan",
            "6": 1.13426,
            "7": "nan",
            "8": 1.13417,
            "9": "nan",
            "10": 1.13556,
            "11": "nan",
            "12": 1.13935,
            "13": "nan",
            "14": 1.13873,
            "15": "nan",
            "16": 1.13957,
            "17": "nan",
            "18": 1.14035,
            "19": "nan",
            "20": 1.13973,
            "21": "nan",
            "22": 1.13936,
            "23": "nan",
            "24": 1.13705,
            "25": "nan"
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
  NVTE_FWD_LAYERNORM_SM_MARGIN: 16
  NVTE_BWD_LAYERNORM_SM_MARGIN: 16
MODEL_ARGS:
  --num-layers: 32
  --hidden-size: 4096
  --num-attention-heads: 32
  --group-query-attention: true
  --num-query-groups: 8
  --untie-embeddings-and-output-weights: true
  --log-throughput: true
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 1
  --global-batch-size: 8
  --seq-length: 8192
  --max-position-embeddings: 8192
  --train-iters: 25
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --tokenizer-type: NullTokenizer
  --vocab-size: 131072
  --mock-data: true
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 2
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 5
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --num-layers-per-virtual-pipeline-stage: 1
  --use-distributed-optimizer: true
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
  --exit-interval: 25
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular
METRICS:
  - "iteration-time"
  - "lm loss"
  - "num-zeros"
  - "mem-allocated-bytes"
  - "mem-max-allocated-bytes"


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 12.61228, "5": 12.60405, "10": 12.49835, "15": 11.81786, "20": 11.50309, "25": 10.99209}}, "num-zeros": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 523041344.0, "5": 523011584.0, "10": 523188896.0, "15": 523626176.0, "20": 523224608.0, "25": 525636160.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 20634103808.0, "5": 20634103808.0, "10": 20634103808.0, "15": 20634103808.0, "20": 20634103808.0, "25": 20634103808.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": 51333926912.0, "5": 58188115968.0, "10": 58188115968.0, "15": 58188115968.0, "20": 58188115968.0, "25": 58188115968.0}}, "iteration-time": {"start_step": 1, "end_step": 25, "step_interval": 5, "values": {"1": "nan", "5": "nan", "10": 2.57769, "15": "nan", "20": 2.596, "25": "nan"}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 12.61164,
            "2": 12.60596,
            "3": 12.60284,
            "4": 12.59692,
            "5": 12.59563,
            "6": 12.59765,
            "7": 12.58048,
            "8": 12.53848,
            "9": 12.51216,
            "10": 12.4986,
            "11": 12.32362,
            "12": 12.29423,
            "13": 12.23125,
            "14": 12.22834,
            "15": 11.82216,
            "16": 11.80406,
            "17": 11.76114,
            "18": 11.7371,
            "19": 11.61304,
            "20": 11.50147,
            "21": 11.26477,
            "22": 11.37633,
            "23": 11.28391,
            "24": 11.15655,
            "25": 10.99866
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 523050144.0,
            "2": 523678816.0,
            "3": 522945600.0,
            "4": 523240640.0,
            "5": 523021472.0,
            "6": 523373120.0,
            "7": 523437344.0,
            "8": 523085504.0,
            "9": 523469120.0,
            "10": 523195520.0,
            "11": 524297440.0,
            "12": 523455616.0,
            "13": 523497152.0,
            "14": 524479520.0,
            "15": 523635392.0,
            "16": 523462432.0,
            "17": 523079680.0,
            "18": 523362816.0,
            "19": 523208736.0,
            "20": 523229056.0,
            "21": 524937344.0,
            "22": 523659200.0,
            "23": 523415552.0,
            "24": 523485568.0,
            "25": 525640512.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 20663463936.0,
            "2": 20663463936.0,
            "3": 20663463936.0,
            "4": 20663463936.0,
            "5": 20663463936.0,
            "6": 20663463936.0,
            "7": 20663463936.0,
            "8": 20663463936.0,
            "9": 20663463936.0,
            "10": 20663463936.0,
            "11": 20663463936.0,
            "12": 20663463936.0,
            "13": 20663463936.0,
            "14": 20663463936.0,
            "15": 20663463936.0,
            "16": 20663463936.0,
            "17": 20663463936.0,
            "18": 20663463936.0,
            "19": 20663463936.0,
            "20": 20663463936.0,
            "21": 20663463936.0,
            "22": 20663463936.0,
            "23": 20663463936.0,
            "24": 20663463936.0,
            "25": 20663463936.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 51363229696.0,
            "2": 58217480192.0,
            "3": 58217480192.0,
            "4": 58217480192.0,
            "5": 58217480192.0,
            "6": 58217480192.0,
            "7": 58217480192.0,
            "8": 58217480192.0,
            "9": 58217480192.0,
            "10": 58217480192.0,
            "11": 58217480192.0,
            "12": 58217480192.0,
            "13": 58217480192.0,
            "14": 58217480192.0,
            "15": 58217480192.0,
            "16": 58217480192.0,
            "17": 58217480192.0,
            "18": 58217480192.0,
            "19": 58217480192.0,
            "20": 58217480192.0,
            "21": 58217480192.0,
            "22": 58217480192.0,
            "23": 58217480192.0,
            "24": 58217480192.0,
            "25": 58217480192.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 5.70576,
            "3": "nan",
            "4": 0.89304,
            "5": "nan",
            "6": 0.89085,
            "7": "nan",
            "8": 0.89054,
            "9": "nan",
            "10": 0.88818,
            "11": "nan",
            "12": 0.88741,
            "13": "nan",
            "14": 0.88829,
            "15": "nan",
            "16": 0.89204,
            "17": "nan",
            "18": 0.8886,
            "19": "nan",
            "20": 0.88626,
            "21": "nan",
            "22": 0.8871,
            "23": "nan",
            "24": 0.88991,
            "25": "nan"
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 12.59715,
            "2": 12.60067,
            "3": 12.59727,
            "4": 12.60021,
            "5": 12.59013,
            "6": 12.58834,
            "7": 12.57605,
            "8": 12.5362,
            "9": 12.50745,
            "10": 12.49091,
            "11": 12.32614,
            "12": 12.29366,
            "13": 12.22589,
            "14": 12.23023,
            "15": 11.82108,
            "16": 11.80586,
            "17": 11.77001,
            "18": 11.74946,
            "19": 11.62189,
            "20": 11.51704,
            "21": 11.27121,
            "22": 11.38966,
            "23": 11.29559,
            "24": 11.16591,
            "25": 11.00354
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 523037536.0,
            "2": 523668064.0,
            "3": 522933056.0,
            "4": 523228480.0,
            "5": 523009792.0,
            "6": 523364320.0,
            "7": 523427840.0,
            "8": 523074688.0,
            "9": 523459232.0,
            "10": 523184992.0,
            "11": 524288736.0,
            "12": 523447712.0,
            "13": 523490112.0,
            "14": 524476096.0,
            "15": 523630496.0,
            "16": 523459232.0,
            "17": 523075936.0,
            "18": 523360192.0,
            "19": 523206816.0,
            "20": 523230848.0,
            "21": 524941248.0,
            "22": 523654464.0,
            "23": 523420576.0,
            "24": 523494720.0,
            "25": 525638016.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 20663519232.0,
            "2": 20663519232.0,
            "3": 20663519232.0,
            "4": 20663519232.0,
            "5": 20663519232.0,
            "6": 20663519232.0,
            "7": 20663519232.0,
            "8": 20663519232.0,
            "9": 20663519232.0,
            "10": 20663519232.0,
            "11": 20663519232.0,
            "12": 20663519232.0,
            "13": 20663519232.0,
            "14": 20663519232.0,
            "15": 20663519232.0,
            "16": 20663519232.0,
            "17": 20663519232.0,
            "18": 20663519232.0,
            "19": 20663519232.0,
            "20": 20663519232.0,
            "21": 20663519232.0,
            "22": 20663519232.0,
            "23": 20663519232.0,
            "24": 20663519232.0,
            "25": 20663519232.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": 50289487872.0,
            "2": 57143791616.0,
            "3": 57143791616.0,
            "4": 57143791616.0,
            "5": 57143791616.0,
            "6": 57143791616.0,
            "7": 57143791616.0,
            "8": 57143791616.0,
            "9": 57143791616.0,
            "10": 57143791616.0,
            "11": 57143791616.0,
            "12": 57143791616.0,
            "13": 57143791616.0,
            "14": 57143791616.0,
            "15": 57143791616.0,
            "16": 57143791616.0,
            "17": 57143791616.0,
            "18": 57143791616.0,
            "19": 57143791616.0,
            "20": 57143791616.0,
            "21": 57143791616.0,
            "22": 57143791616.0,
            "23": 57143791616.0,
            "24": 57143791616.0,
            "25": 57143791616.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 25,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 5.99154,
            "3": "nan",
            "4": 1.10664,
            "5": "nan",
            "6": 1.10108,
            "7": "nan",
            "8": 1.09852,
            "9": "nan",
            "10": 1.10395,
            "11": "nan",
            "12": 1.13133,
            "13": "nan",
            "14": 1.1009,
            "15": "nan",
            "16": 1.10173,
            "17": "nan",
            "18": 1.10058,
            "19": "nan",
            "20": 1.10006,
            "21": "nan",
            "22": 1.10081,
            "23": "nan",
            "24": 1.09852,
            "25": "nan"
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_lts_dgx_a100.json
================================================
{}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
  NVTE_FWD_LAYERNORM_SM_MARGIN: 16
  NVTE_BWD_LAYERNORM_SM_MARGIN: 16
MODEL_ARGS:
  --num-layers: 32
  --hidden-size: 4096
  --num-attention-heads: 32
  --group-query-attention: true
  --num-query-groups: 8
  --untie-embeddings-and-output-weights: true
  --log-throughput: true
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 2
  --global-batch-size: 8
  --seq-length: 8192
  --max-position-embeddings: 8192
  --train-iters: 25
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --tokenizer-type: NullTokenizer
  --vocab-size: 131072
  --mock-data: true
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 2
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 5
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 4
  --pipeline-model-parallel-size: 1
  --use-distributed-optimizer: true
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular
METRICS:
  - "iteration-time"
  - "lm loss"
  - "num-zeros"
  - "mem-allocated-bytes"
  - "mem-max-allocated-bytes"


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_disable/model_config.yaml
================================================
ENV_VARS:
  SKIP_PYTEST: 1
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  # generic training settings
  --deterministic-mode: true
  --bf16: true
  --train-iters: 50
  --eval-iters: 0
  --manual-gc: true
  --use-mcore-models: true
  --distributed-backend: nccl
  # parallelism settings
  --sequence-parallel: true
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 2
  --micro-batch-size: 1
  --global-batch-size: 128
  # embedding settings
  --untie-embeddings-and-output-weights: true
  --position-embedding-type: rope
  --rotary-percent: 1.0
  --max-position-embeddings: 4096
  # transformer settings
  --num-layers: 32
  --hidden-size: 3072
  --ffn-hidden-size: 8192
  --num-attention-heads: 32
  --num-query-groups: 8
  --seq-length: 4096
  --kv-channels: 128
  --ffn-hidden-size: 8192
  --group-query-attention: true
  --normalization: RMSNorm
  --swiglu: true
  --attention-dropout: 0.0
  --hidden-dropout: 0.0
  --no-create-attention-mask-in-dataloader: true
  --transformer-impl: transformer_engine
  --disable-bias-linear: true
  # gradient & optimizer settings
  --clip-grad: 1.0
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  --lr: 3e-4
  --lr-warmup-samples: 0
  --adam-beta1: 0.9
  --adam-beta2: 0.95
  --adam-eps: 1e-8
  --use-distributed-optimizer: true
  --split: 949,50,1
  --no-gradient-accumulation-fusion: true
  # checkpoint settings
  --save-interval: 10000
  --eval-interval: 1000
  --ckpt-format: torch_dist
  --dist-ckpt-optim-fully-reshardable: true
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  # data settings
  --data-cache-path: ${DATA_CACHE_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  # logging settings
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --timing-log-level: 0
  --log-interval: 1
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  # rerun settings
  --rerun-mode: disabled
  --async-save: true
  --use-persistent-ckpt-worker: true
AFTER_SCRIPT: |
  check_log() { if [[ -z $(grep -r $1 "$2" $LOG_DIR) ]]; then exit 1; else echo OK; fi }
  check_log_not() { if [[ -z $(grep -r $1 "$2" $LOG_DIR) ]]; then echo OK; else exit 1; fi }
  check_log_not -F "WARNING:megatron.core.rerun_state_machine:Result validation enabled"
  check_log -F "Setting rerun_state_machine.current_iteration to 0..."
  EXIT_CODE=0
TEST_TYPE: regular

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_enable/model_config.yaml
================================================
ENV_VARS:
  SKIP_PYTEST: 1
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  # generic training settings
  --deterministic-mode: true
  --bf16: true
  --train-iters: 50
  --eval-iters: 0
  --manual-gc: true
  --use-mcore-models: true
  --distributed-backend: nccl
  # parallelism settings
  --sequence-parallel: true
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 2
  --micro-batch-size: 1
  --global-batch-size: 128
  # embedding settings
  --untie-embeddings-and-output-weights: true
  --position-embedding-type: rope
  --rotary-percent: 1.0
  --max-position-embeddings: 4096
  # transformer settings
  --num-layers: 32
  --hidden-size: 3072
  --ffn-hidden-size: 8192
  --num-attention-heads: 32
  --num-query-groups: 8
  --seq-length: 4096
  --kv-channels: 128
  --ffn-hidden-size: 8192
  --group-query-attention: true
  --normalization: RMSNorm
  --swiglu: true
  --attention-dropout: 0.0
  --hidden-dropout: 0.0
  --no-create-attention-mask-in-dataloader: true
  --transformer-impl: transformer_engine
  --disable-bias-linear: true
  # gradient & optimizer settings
  --clip-grad: 1.0
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  --lr: 3e-4
  --lr-warmup-samples: 0
  --adam-beta1: 0.9
  --adam-beta2: 0.95
  --adam-eps: 1e-8
  --use-distributed-optimizer: true
  --split: 949,50,1
  --no-gradient-accumulation-fusion: true
  # checkpoint settings
  --save-interval: 10000
  --eval-interval: 1000
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  # data settings
  --data-cache-path: ${DATA_CACHE_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  # logging settings
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --timing-log-level: 0
  --log-interval: 1
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  # rerun settings
  --rerun-mode: validate_results
  --async-save: true
  --use-persistent-ckpt-worker: true
AFTER_SCRIPT: |
  check_log() { if [[ -z $(grep -r $1 "$2" $LOG_DIR) ]]; then exit 1; else echo OK; fi }
  check_log -F "WARNING:megatron.core.rerun_state_machine:Result validation enabled"
  EXIT_CODE=0
TEST_TYPE: regular

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_1/model_config.yaml
================================================
ENV_VARS:
  SKIP_PYTEST: 1
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  # generic training settings
  --deterministic-mode: true
  --bf16: true
  --train-iters: 50
  --eval-iters: 0
  --manual-gc: true
  --use-mcore-models: true
  --distributed-backend: nccl
  # parallelism settings
  --sequence-parallel: true
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 2
  --micro-batch-size: 1
  --global-batch-size: 128
  # embedding settings
  --untie-embeddings-and-output-weights: true
  --position-embedding-type: rope
  --rotary-percent: 1.0
  --max-position-embeddings: 4096
  # transformer settings
  --num-layers: 32
  --hidden-size: 3072
  --ffn-hidden-size: 8192
  --num-attention-heads: 32
  --num-query-groups: 8
  --seq-length: 4096
  --kv-channels: 128
  --ffn-hidden-size: 8192
  --group-query-attention: true
  --normalization: RMSNorm
  --swiglu: true
  --attention-dropout: 0.0
  --hidden-dropout: 0.0
  --no-create-attention-mask-in-dataloader: true
  --transformer-impl: transformer_engine
  --disable-bias-linear: true
  # gradient & optimizer settings
  --clip-grad: 1.0
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  --lr: 3e-4
  --lr-warmup-samples: 0
  --adam-beta1: 0.9
  --adam-beta2: 0.95
  --adam-eps: 1e-8
  --use-distributed-optimizer: true
  --split: 949,50,1
  --no-gradient-accumulation-fusion: true
  # checkpoint settings
  --save-interval: 10000
  --eval-interval: 1000
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  # data settings
  --data-cache-path: ${DATA_CACHE_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  # logging settings
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --timing-log-level: 0
  --log-interval: 1
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  # rerun settings
  --rerun-mode: validate_results
  --error-injection-rate: 100
  --error-injection-type: persistent_error
  --async-save: true
  --use-persistent-ckpt-worker: true
AFTER_SCRIPT: |
  check_log() { if [[ -z $(grep -r $1 "$2" $LOG_DIR) ]]; then exit 1; else echo OK; fi }
  check_log -F "Result validation enabled"
  check_log -F "Injecting error type Persistent error"
  check_log -F "First rerun: unexpected result is reproducible within the tolerance"
  check_log -F "Saving a checkpoint and exiting now. Please resume the job from the checkpoint to rerun the last iteration and establish a diagnostic"
  EXIT_CODE=0
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_persistent_2/model_config.yaml
================================================
ENV_VARS:
  SKIP_PYTEST: 1
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  # generic training settings
  --deterministic-mode: true
  --bf16: true
  --train-iters: 50
  --eval-iters: 0
  --manual-gc: true
  --use-mcore-models: true
  --distributed-backend: nccl
  # parallelism settings
  --sequence-parallel: true
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 2
  --micro-batch-size: 1
  --global-batch-size: 128
  # embedding settings
  --untie-embeddings-and-output-weights: true
  --position-embedding-type: rope
  --rotary-percent: 1.0
  --max-position-embeddings: 4096
  # transformer settings
  --num-layers: 32
  --hidden-size: 3072
  --ffn-hidden-size: 8192
  --num-attention-heads: 32
  --num-query-groups: 8
  --seq-length: 4096
  --kv-channels: 128
  --ffn-hidden-size: 8192
  --group-query-attention: true
  --normalization: RMSNorm
  --swiglu: true
  --attention-dropout: 0.0
  --hidden-dropout: 0.0
  --no-create-attention-mask-in-dataloader: true
  --transformer-impl: transformer_engine
  --disable-bias-linear: true
  # gradient & optimizer settings
  --clip-grad: 1.0
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  --lr: 3e-4
  --lr-warmup-samples: 0
  --adam-beta1: 0.9
  --adam-beta2: 0.95
  --adam-eps: 1e-8
  --use-distributed-optimizer: true
  --split: 949,50,1
  --no-gradient-accumulation-fusion: true
  # checkpoint settings
  --save-interval: 10000
  --eval-interval: 1000
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  # data settings
  --data-cache-path: ${DATA_CACHE_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  # logging settings
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --timing-log-level: 0
  --log-interval: 1
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  # rerun settings
  --rerun-mode: validate_results
  --async-save: true
  --use-persistent-ckpt-worker: true
AFTER_SCRIPT: |
  check_log() { if [[ -z $(grep -r $1 "$2" $LOG_DIR) ]]; then exit 1; else echo OK; fi }
  check_log -F "WARNING:megatron.core.rerun_state_machine:Result validation enabled"
  check_log -E "ERROR:megatron\.core\.rerun_state_machine:Rank [0-9]+, node ([0-9a-z]|\-)+, device [0-9]+: Possible persistent error!!"
  EXIT_CODE=0
TEST_TYPE: frozen-start

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_reshard/model_config.yaml
================================================
ENV_VARS:
  SKIP_PYTEST: 1
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  # generic training settings
  --deterministic-mode: true
  --bf16: true
  --train-iters: 50
  --eval-iters: 0
  --manual-gc: true
  --use-mcore-models: true
  --distributed-backend: nccl
  # parallelism settings
  --sequence-parallel: true
  --tensor-model-parallel-size: 2
  --pipeline-model-parallel-size: 2
  --micro-batch-size: 1
  --global-batch-size: 128
  # embedding settings
  --untie-embeddings-and-output-weights: true
  --position-embedding-type: rope
  --rotary-percent: 1.0
  --max-position-embeddings: 4096
  # transformer settings
  --num-layers: 32
  --hidden-size: 3072
  --ffn-hidden-size: 8192
  --num-attention-heads: 32
  --num-query-groups: 8
  --seq-length: 4096
  --kv-channels: 128
  --ffn-hidden-size: 8192
  --group-query-attention: true
  --normalization: RMSNorm
  --swiglu: true
  --attention-dropout: 0.0
  --hidden-dropout: 0.0
  --no-create-attention-mask-in-dataloader: true
  --transformer-impl: transformer_engine
  --disable-bias-linear: true
  # gradient & optimizer settings
  --clip-grad: 1.0
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  --lr: 3e-4
  --lr-warmup-samples: 0
  --adam-beta1: 0.9
  --adam-beta2: 0.95
  --adam-eps: 1e-8
  --use-distributed-optimizer: true
  --split: 949,50,1
  --no-gradient-accumulation-fusion: true
  # checkpoint settings
  --save-interval: 10000
  --eval-interval: 1000
  --ckpt-format: torch_dist
  --dist-ckpt-optim-fully-reshardable: true
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  # data settings
  --data-cache-path: ${DATA_CACHE_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  # logging settings
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --timing-log-level: 0
  --log-interval: 1
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  # rerun settings
  --rerun-mode: validate_results
  --async-save: true
  --use-persistent-ckpt-worker: true
AFTER_SCRIPT: |
  check_log() { if [[ -z $(grep -r $1 "$2" $LOG_DIR) ]]; then exit 1; else echo OK; fi }
  check_log -F "WARNING:megatron.core.rerun_state_machine:Result validation enabled"
  check_log -F "Job sharding has changed: Rerun state will be ignored"
  EXIT_CODE=0
TEST_TYPE: frozen-start

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume/model_config.yaml
================================================
ENV_VARS:
  SKIP_PYTEST: 1
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  # generic training settings
  --deterministic-mode: true
  --bf16: true
  --train-iters: 50
  --eval-iters: 0
  --manual-gc: true
  --use-mcore-models: true
  --distributed-backend: nccl
  # parallelism settings
  --sequence-parallel: true
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 2
  --micro-batch-size: 1
  --global-batch-size: 128
  # embedding settings
  --untie-embeddings-and-output-weights: true
  --position-embedding-type: rope
  --rotary-percent: 1.0
  --max-position-embeddings: 4096
  # transformer settings
  --num-layers: 32
  --hidden-size: 3072
  --ffn-hidden-size: 8192
  --num-attention-heads: 32
  --num-query-groups: 8
  --seq-length: 4096
  --kv-channels: 128
  --ffn-hidden-size: 8192
  --group-query-attention: true
  --normalization: RMSNorm
  --swiglu: true
  --attention-dropout: 0.0
  --hidden-dropout: 0.0
  --no-create-attention-mask-in-dataloader: true
  --transformer-impl: transformer_engine
  --disable-bias-linear: true
  # gradient & optimizer settings
  --clip-grad: 1.0
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  --lr: 3e-4
  --lr-warmup-samples: 0
  --adam-beta1: 0.9
  --adam-beta2: 0.95
  --adam-eps: 1e-8
  --use-distributed-optimizer: true
  --split: 949,50,1
  --no-gradient-accumulation-fusion: true
  # checkpoint settings
  --save-interval: 10000
  --eval-interval: 1000
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  # data settings
  --data-cache-path: ${DATA_CACHE_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  # logging settings
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --timing-log-level: 0
  --log-interval: 1
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  # rerun settings
  --rerun-mode: validate_results
  --async-save: true
  --use-persistent-ckpt-worker: true
AFTER_SCRIPT: |
  check_log() { if [[ -z $(grep -r $1 "$2" $LOG_DIR) ]]; then exit 1; else echo OK; fi }
  check_log -F "successfully loaded checkpoint"
  check_log -F "WARNING:megatron.core.rerun_state_machine:Result validation enabled"
  EXIT_CODE=0
TEST_TYPE: frozen-start

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/README.md
================================================
## Gradient tests
### Overview of the components of this test
* Trains for one step to collect gradients
* Gradients are extracted from the optimizer state in the checkpoint. You can get
this behavior by setting adam's b1, b2 to 0, so each step will override the previous adam 
state. The gradients for the previous step are stored in adam's first momentum 
state. Make sure to include `--adam-beta1: 0.0` and `--adam-beta2: 0.0`.
* script for comparing grads is currently (2025/09/16) tuned to some degree for
GPT models. The key thing that has to change with other models is the logic for 
determining which layers are row parallel linear. Those layers are sharded 
differently and need a different reshaping function to compare between the
non-model-parallel base case and the model parallel case. The script is located
currently in `tests/functional_tests/python_test_utils/test_optimizer_grads_match.py`.
* The test script currently relies on the older optimizer 
checkpoint format. For now make sure to add 
`--dist-ckpt-save-pre-mcore-014: true` to your test runs.
* You should disable randomization such as dropout which would have different
patterns with a single global batch and/or model/data parallel shards of the 
features.
* You can use this approach to test different configurations that are expected 
to result in the same gradients, not just model parallel configurations.
* To add a new test that follows this pattern, you can copy/modify this directory and
register the test in a similar way into `tests/functional_tests/test_utils/recipes/gpt-grads.yaml`
or something similar.

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 1,
        "step_interval": 1,
        "values": {
            "1": 10.86791
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 1,
        "step_interval": 1,
        "values": {
            "1": 152866448.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 1,
        "step_interval": 1,
        "values": {
            "1": 67277201408.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 1,
        "step_interval": 1,
        "values": {
            "1": 67277205504.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 1,
        "step_interval": 1,
        "values": {
            "1": 14.45281
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_resume_check_grads/model_config.yaml
================================================
ENV_VARS:
  SKIP_PYTEST: 0
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ENV_VARS:
  # Model specific environmental variables below. Currently only GPUS_PER_NODE is supported.
  MODEL_ARGS:
    # DP=1, TP=1, CP=1, PP=1
    GPUS_PER_NODE: 1
  MODEL_ARGS_10:
    # DP=1, TP=1, CP=1, PP=1
    GPUS_PER_NODE: 2
  MODEL_ARGS_2:
    # DP=2, TP=1, CP=1, PP=1
    GPUS_PER_NODE: 2
  MODEL_ARGS_3:
    # DP=1, TP=2, CP=1, PP=1
    GPUS_PER_NODE: 2
  MODEL_ARGS_4:
    # DP=1, TP=1, CP=2, PP=1
    GPUS_PER_NODE: 2
  MODEL_ARGS_5:
    # DP=1, TP=1, CP=1, PP=2
    GPUS_PER_NODE: 2
TEST_TYPE: checkpoint-consistency
MODE: pretraining
# TODO figure out how to use yq merge syntax to put reused arguments in a single place.
BASE_MODEL_ARGS: &BASE_MODEL_ARGS
  # generic training settings
  #  Use a an odd ROPE base to make sure the gradients are farther from 0
  --rotary-base: 500
  # Vocab size divisible by 1 to keep dimensions in line under TP. TODO figure out vocab size.
  --finetune: true
  --seed: ${REPEAT}
  --override-opt_param-scheduler: true
  --no-load-optim: true
  --recompute-num-layers: 1
  --deterministic-mode: true
  --bf16: true
  --train-iters: 1
  --eval-iters: 0
  --manual-gc: true
  --use-mcore-models: true
  --distributed-backend: nccl
  # parallelism settings
  --sequence-parallel: true
  # embedding settings
  --untie-embeddings-and-output-weights: true
  --position-embedding-type: rope
  --rotary-percent: 1.0
  --max-position-embeddings: 4096
  # transformer settings
  --num-layers: 32
  --hidden-size: 3072
  --ffn-hidden-size: 8192
  --num-attention-heads: 32
  --num-query-groups: 8
  --seq-length: 512
  --kv-channels: 128
  --group-query-attention: true
  --normalization: RMSNorm
  --swiglu: true
  --attention-dropout: 0.0
  --hidden-dropout: 0.0
  --no-create-attention-mask-in-dataloader: true
  --transformer-impl: transformer_engine
  --disable-bias-linear: true
  # gradient & optimizer settings
  --clip-grad: 1.0
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  --lr: 3e-4
  --lr-warmup-samples: 0
  # Set adam beta1 and beta2 to 0 so that we can easily check the gradients
  --adam-beta1: 0.0
  --adam-beta2: 0.0
  --adam-eps: 1e-8
  --use-distributed-optimizer: true
  --split: 949,50,1
  --no-gradient-accumulation-fusion: true
  # checkpoint settings
  --save-interval: 1
  --eval-interval: 1000
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --dist-ckpt-optim-fully-reshardable: true
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}/model/mcore_gpt/gpt3_4b_pyt/25.03.05_bf16_rerun-enabled_v2
  --async-save: true
  --use-persistent-ckpt-worker: true
  # data settings
  --data-cache-path: ${DATA_CACHE_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  # logging settings
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --timing-log-level: 0
  --log-interval: 1
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  # rerun settings
  --rerun-mode: validate_results
MODEL_ARGS:
  <<: *BASE_MODEL_ARGS
  #########################################################
  # DP=1, TP=1, CP=1, PP=1
  --micro-batch-size: 2
  --global-batch-size: 2
  --tensor-model-parallel-size: 1
  --context-parallel-size: 1
  --pipeline-model-parallel-size: 1
MODEL_ARGS_2:
  <<: *BASE_MODEL_ARGS
  #########################################################
  # DP=2, TP=1, CP=1, PP=1
  --micro-batch-size: 1
  --global-batch-size: 2
  --tensor-model-parallel-size: 1
  --context-parallel-size: 1
  --pipeline-model-parallel-size: 1
MODEL_ARGS_3:
  <<: *BASE_MODEL_ARGS
  #########################################################
  # DP=1, TP=2, CP=1, PP=1
  --micro-batch-size: 2
  --global-batch-size: 2
  --tensor-model-parallel-size: 2
  --context-parallel-size: 1
  --pipeline-model-parallel-size: 1
MODEL_ARGS_4:
  <<: *BASE_MODEL_ARGS
  #########################################################
  # DP=1, TP=1, CP=2, PP=1
  --micro-batch-size: 2
  --global-batch-size: 2
  --tensor-model-parallel-size: 1
  --context-parallel-size: 2
  --pipeline-model-parallel-size: 1
MODEL_ARGS_5:
  <<: *BASE_MODEL_ARGS
  #########################################################
  # DP=1, TP=1, CP=1, PP=2
  --micro-batch-size: 2
  --global-batch-size: 2
  --tensor-model-parallel-size: 1
  --context-parallel-size: 1
  --pipeline-model-parallel-size: 2


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_reruns_transient/model_config.yaml
================================================
ENV_VARS:
  SKIP_PYTEST: 1
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  # generic training settings
  --deterministic-mode: true
  --bf16: true
  --train-iters: 50
  --eval-iters: 0
  --manual-gc: true
  --use-mcore-models: true
  --distributed-backend: nccl
  # parallelism settings
  --sequence-parallel: true
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 2
  --micro-batch-size: 1
  --global-batch-size: 128
  # embedding settings
  --untie-embeddings-and-output-weights: true
  --position-embedding-type: rope
  --rotary-percent: 1.0
  --max-position-embeddings: 4096
  # transformer settings
  --num-layers: 32
  --hidden-size: 3072
  --ffn-hidden-size: 8192
  --num-attention-heads: 32
  --num-query-groups: 8
  --seq-length: 4096
  --kv-channels: 128
  --ffn-hidden-size: 8192
  --group-query-attention: true
  --normalization: RMSNorm
  --swiglu: true
  --attention-dropout: 0.0
  --hidden-dropout: 0.0
  --no-create-attention-mask-in-dataloader: true
  --transformer-impl: transformer_engine
  --disable-bias-linear: true
  # gradient & optimizer settings
  --clip-grad: 1.0
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  --lr: 3e-4
  --lr-warmup-samples: 0
  --adam-beta1: 0.9
  --adam-beta2: 0.95
  --adam-eps: 1e-8
  --use-distributed-optimizer: true
  --split: 949,50,1
  --no-gradient-accumulation-fusion: true
  # checkpoint settings
  --save-interval: 10000
  --eval-interval: 1000
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --exit-interval: 4
  # data settings
  --data-cache-path: ${DATA_CACHE_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  # logging settings
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --timing-log-level: 0
  --log-interval: 1
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --log-memory-to-tensorboard: true
  # rerun settings
  --rerun-mode: validate_results
  --error-injection-rate: 100
  --error-injection-type: transient_error
  --async-save: true
  --use-persistent-ckpt-worker: true
AFTER_SCRIPT: |
  check_log() { if [[ -z $(grep -r $1 "$2" $LOG_DIR) ]]; then exit 1; else echo OK; fi }
  check_log -F "WARNING:megatron.core.rerun_state_machine:Result validation enabled"
  check_log -F "WARNING:megatron.core.rerun_state_machine:Injecting error type Transient error"
  check_log -E "ERROR:megatron\.core\.rerun_state_machine:Rank [0-9]+, node ([0-9a-z]|\-)+, device [0-9]+: Possible transient error!!"
  EXIT_CODE=0
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.77535,
            "2": 10.78444,
            "3": 10.78594,
            "4": 10.74844,
            "5": 10.81549,
            "6": 10.82694,
            "7": 10.78465,
            "8": 10.77766,
            "9": 10.78351,
            "10": 10.74241,
            "11": 10.8303,
            "12": 10.80334,
            "13": 10.81651,
            "14": 10.82185,
            "15": 10.7422,
            "16": 10.75086,
            "17": 10.71886,
            "18": 10.74306,
            "19": 10.74073,
            "20": 10.63717,
            "21": 10.62764,
            "22": 10.48433,
            "23": 10.657,
            "24": 10.52681,
            "25": 10.47547,
            "26": 10.54093,
            "27": 10.55549,
            "28": 10.52151,
            "29": 10.53465,
            "30": 10.30894,
            "31": 10.06666,
            "32": 10.41746,
            "33": 10.42488,
            "34": 10.17386,
            "35": 10.2248,
            "36": 10.18284,
            "37": 10.29686,
            "38": 10.14801,
            "39": 10.36934,
            "40": 10.04006,
            "41": 10.10749,
            "42": 10.18199,
            "43": 9.79649,
            "44": 9.91069,
            "45": 9.79712,
            "46": 9.79411,
            "47": 10.11362,
            "48": 9.82518,
            "49": 9.50417,
            "50": 9.887
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1597.0,
            "2": 1627.0,
            "3": 1645.0,
            "4": 1760.0,
            "5": 1863.0,
            "6": 1829.0,
            "7": 1908.0,
            "8": 1642.0,
            "9": 1885.0,
            "10": 1444.0,
            "11": 1844.0,
            "12": 1833.0,
            "13": 1903.0,
            "14": 1906.0,
            "15": 1984.0,
            "16": 2015.0,
            "17": 1820.0,
            "18": 1763.0,
            "19": 1723.0,
            "20": 1681.0,
            "21": 1875.0,
            "22": 1677.0,
            "23": 1981.0,
            "24": 1571.0,
            "25": 1588.0,
            "26": 1659.0,
            "27": 1732.0,
            "28": 2035.0,
            "29": 1977.0,
            "30": 1932.0,
            "31": 1579.0,
            "32": 1890.0,
            "33": 2186.0,
            "34": 1984.0,
            "35": 2000.0,
            "36": 1941.0,
            "37": 2334.0,
            "38": 2132.0,
            "39": 2533.0,
            "40": 2156.0,
            "41": 2318.0,
            "42": 2339.0,
            "43": 1998.0,
            "44": 2097.0,
            "45": 2178.0,
            "46": 2287.0,
            "47": 2409.0,
            "48": 2319.0,
            "49": 2104.0,
            "50": 2433.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 581489664.0,
            "2": 581489664.0,
            "3": 581489664.0,
            "4": 581489664.0,
            "5": 581489664.0,
            "6": 581489664.0,
            "7": 581489664.0,
            "8": 581489664.0,
            "9": 581489664.0,
            "10": 581489664.0,
            "11": 581489664.0,
            "12": 581489664.0,
            "13": 581489664.0,
            "14": 581489664.0,
            "15": 581489664.0,
            "16": 581489664.0,
            "17": 581489664.0,
            "18": 581489664.0,
            "19": 581489664.0,
            "20": 581489664.0,
            "21": 581489664.0,
            "22": 581489664.0,
            "23": 581489664.0,
            "24": 581489664.0,
            "25": 581489664.0,
            "26": 581489664.0,
            "27": 581489664.0,
            "28": 581489664.0,
            "29": 581489664.0,
            "30": 581489664.0,
            "31": 581489664.0,
            "32": 581489664.0,
            "33": 581489664.0,
            "34": 581489664.0,
            "35": 581489664.0,
            "36": 581489664.0,
            "37": 581489664.0,
            "38": 581489664.0,
            "39": 581489664.0,
            "40": 581489664.0,
            "41": 581489664.0,
            "42": 581489664.0,
            "43": 581489664.0,
            "44": 581489664.0,
            "45": 581489664.0,
            "46": 581489664.0,
            "47": 581489664.0,
            "48": 581489664.0,
            "49": 581489664.0,
            "50": 581489664.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 4605814272.0,
            "2": 4702430720.0,
            "3": 4702430720.0,
            "4": 4702430720.0,
            "5": 4702430720.0,
            "6": 4702430720.0,
            "7": 4702430720.0,
            "8": 4702430720.0,
            "9": 4702430720.0,
            "10": 4702430720.0,
            "11": 4702430720.0,
            "12": 4702430720.0,
            "13": 4702430720.0,
            "14": 4702430720.0,
            "15": 4702430720.0,
            "16": 4702430720.0,
            "17": 4702430720.0,
            "18": 4702430720.0,
            "19": 4702430720.0,
            "20": 4702430720.0,
            "21": 4702430720.0,
            "22": 4702430720.0,
            "23": 4702430720.0,
            "24": 4702430720.0,
            "25": 4702430720.0,
            "26": 4702430720.0,
            "27": 4702430720.0,
            "28": 4702430720.0,
            "29": 4702430720.0,
            "30": 4702430720.0,
            "31": 4702430720.0,
            "32": 4702430720.0,
            "33": 4702430720.0,
            "34": 4702430720.0,
            "35": 4702430720.0,
            "36": 4702430720.0,
            "37": 4702430720.0,
            "38": 4702430720.0,
            "39": 4702430720.0,
            "40": 4702430720.0,
            "41": 4702430720.0,
            "42": 4702430720.0,
            "43": 4702430720.0,
            "44": 4702430720.0,
            "45": 4702430720.0,
            "46": 4702430720.0,
            "47": 4702430720.0,
            "48": 4702430720.0,
            "49": 4702430720.0,
            "50": 4702430720.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 3.85031,
            "3": 0.10579,
            "4": 0.08811,
            "5": 0.08828,
            "6": 0.09023,
            "7": 0.09011,
            "8": 0.09009,
            "9": 0.08883,
            "10": 0.09032,
            "11": 0.08909,
            "12": 0.09028,
            "13": 0.08981,
            "14": 0.0908,
            "15": 0.09035,
            "16": 0.08825,
            "17": 0.09092,
            "18": 0.09041,
            "19": 0.09012,
            "20": 0.09006,
            "21": 0.08995,
            "22": 0.09051,
            "23": 0.09078,
            "24": 0.09133,
            "25": 0.0906,
            "26": 0.09043,
            "27": 0.08991,
            "28": 0.08972,
            "29": 0.09046,
            "30": 0.08921,
            "31": 0.09085,
            "32": 0.09076,
            "33": 0.0898,
            "34": 0.08988,
            "35": 0.09085,
            "36": 0.08951,
            "37": 0.09036,
            "38": 0.08966,
            "39": 0.08995,
            "40": 0.0898,
            "41": 0.09082,
            "42": 0.09019,
            "43": 0.09295,
            "44": 0.09078,
            "45": 0.0912,
            "46": 0.09208,
            "47": 0.09077,
            "48": 0.09093,
            "49": 0.09052,
            "50": 0.08959
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.89074,
            "2": 10.89234,
            "3": 10.8903,
            "4": 10.8922,
            "5": 10.89414,
            "6": 10.90232,
            "7": 10.88841,
            "8": 10.9021,
            "9": 10.90201,
            "10": 10.88511,
            "11": 10.87636,
            "12": 10.895,
            "13": 10.89838,
            "14": 10.89179,
            "15": 10.85121,
            "16": 10.85339,
            "17": 10.82863,
            "18": 10.83657,
            "19": 10.82846,
            "20": 10.74587,
            "21": 10.73112,
            "22": 10.6126,
            "23": 10.72614,
            "24": 10.62933,
            "25": 10.59397,
            "26": 10.63359,
            "27": 10.63131,
            "28": 10.58202,
            "29": 10.58668,
            "30": 10.40936,
            "31": 10.15875,
            "32": 10.48318,
            "33": 10.46978,
            "34": 10.23975,
            "35": 10.28146,
            "36": 10.23893,
            "37": 10.35197,
            "38": 10.20568,
            "39": 10.40494,
            "40": 10.09274,
            "41": 10.16148,
            "42": 10.22306,
            "43": 9.84153,
            "44": 9.97326,
            "45": 9.84547,
            "46": 9.82101,
            "47": 10.14262,
            "48": 9.86552,
            "49": 9.54033,
            "50": 9.91688
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1601.0,
            "2": 1708.0,
            "3": 1739.0,
            "4": 1740.0,
            "5": 1963.0,
            "6": 1802.0,
            "7": 1896.0,
            "8": 1618.0,
            "9": 1935.0,
            "10": 1449.0,
            "11": 1960.0,
            "12": 1860.0,
            "13": 1950.0,
            "14": 1907.0,
            "15": 1864.0,
            "16": 1972.0,
            "17": 1719.0,
            "18": 1561.0,
            "19": 1764.0,
            "20": 1668.0,
            "21": 1922.0,
            "22": 1811.0,
            "23": 2038.0,
            "24": 1655.0,
            "25": 1664.0,
            "26": 1757.0,
            "27": 1860.0,
            "28": 2051.0,
            "29": 1995.0,
            "30": 1976.0,
            "31": 1530.0,
            "32": 1961.0,
            "33": 2077.0,
            "34": 1941.0,
            "35": 1965.0,
            "36": 1916.0,
            "37": 2344.0,
            "38": 2201.0,
            "39": 2388.0,
            "40": 2246.0,
            "41": 2411.0,
            "42": 2366.0,
            "43": 2054.0,
            "44": 2154.0,
            "45": 2135.0,
            "46": 2347.0,
            "47": 2301.0,
            "48": 2354.0,
            "49": 2230.0,
            "50": 2385.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 581489664.0,
            "2": 581489664.0,
            "3": 581489664.0,
            "4": 581489664.0,
            "5": 581489664.0,
            "6": 581489664.0,
            "7": 581489664.0,
            "8": 581489664.0,
            "9": 581489664.0,
            "10": 581489664.0,
            "11": 581489664.0,
            "12": 581489664.0,
            "13": 581489664.0,
            "14": 581489664.0,
            "15": 581489664.0,
            "16": 581489664.0,
            "17": 581489664.0,
            "18": 581489664.0,
            "19": 581489664.0,
            "20": 581489664.0,
            "21": 581489664.0,
            "22": 581489664.0,
            "23": 581489664.0,
            "24": 581489664.0,
            "25": 581489664.0,
            "26": 581489664.0,
            "27": 581489664.0,
            "28": 581489664.0,
            "29": 581489664.0,
            "30": 581489664.0,
            "31": 581489664.0,
            "32": 581489664.0,
            "33": 581489664.0,
            "34": 581489664.0,
            "35": 581489664.0,
            "36": 581489664.0,
            "37": 581489664.0,
            "38": 581489664.0,
            "39": 581489664.0,
            "40": 581489664.0,
            "41": 581489664.0,
            "42": 581489664.0,
            "43": 581489664.0,
            "44": 581489664.0,
            "45": 581489664.0,
            "46": 581489664.0,
            "47": 581489664.0,
            "48": 581489664.0,
            "49": 581489664.0,
            "50": 581489664.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 4605814272.0,
            "2": 4702430720.0,
            "3": 4702430720.0,
            "4": 4702430720.0,
            "5": 4702430720.0,
            "6": 4702430720.0,
            "7": 4702430720.0,
            "8": 4702430720.0,
            "9": 4702430720.0,
            "10": 4702430720.0,
            "11": 4702430720.0,
            "12": 4702430720.0,
            "13": 4702430720.0,
            "14": 4702430720.0,
            "15": 4702430720.0,
            "16": 4702430720.0,
            "17": 4702430720.0,
            "18": 4702430720.0,
            "19": 4702430720.0,
            "20": 4702430720.0,
            "21": 4702430720.0,
            "22": 4702430720.0,
            "23": 4702430720.0,
            "24": 4702430720.0,
            "25": 4702430720.0,
            "26": 4702430720.0,
            "27": 4702430720.0,
            "28": 4702430720.0,
            "29": 4702430720.0,
            "30": 4702430720.0,
            "31": 4702430720.0,
            "32": 4702430720.0,
            "33": 4702430720.0,
            "34": 4702430720.0,
            "35": 4702430720.0,
            "36": 4702430720.0,
            "37": 4702430720.0,
            "38": 4702430720.0,
            "39": 4702430720.0,
            "40": 4702430720.0,
            "41": 4702430720.0,
            "42": 4702430720.0,
            "43": 4702430720.0,
            "44": 4702430720.0,
            "45": 4702430720.0,
            "46": 4702430720.0,
            "47": 4702430720.0,
            "48": 4702430720.0,
            "49": 4702430720.0,
            "50": 4702430720.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 3.36008,
            "3": 0.07706,
            "4": 0.05754,
            "5": 0.06556,
            "6": 0.05767,
            "7": 0.06278,
            "8": 0.05622,
            "9": 0.05975,
            "10": 0.05635,
            "11": 0.06095,
            "12": 0.05668,
            "13": 0.06242,
            "14": 0.05756,
            "15": 0.06014,
            "16": 0.05763,
            "17": 0.06282,
            "18": 0.05672,
            "19": 0.06248,
            "20": 0.05666,
            "21": 0.05943,
            "22": 0.05746,
            "23": 0.06248,
            "24": 0.05782,
            "25": 0.0626,
            "26": 0.05697,
            "27": 0.06234,
            "28": 0.05681,
            "29": 0.06175,
            "30": 0.0575,
            "31": 0.06289,
            "32": 0.05752,
            "33": 0.06247,
            "34": 0.05817,
            "35": 0.06189,
            "36": 0.05843,
            "37": 0.06228,
            "38": 0.05885,
            "39": 0.0623,
            "40": 0.05846,
            "41": 0.06215,
            "42": 0.05671,
            "43": 0.06175,
            "44": 0.05738,
            "45": 0.06472,
            "46": 0.05749,
            "47": 0.06223,
            "48": 0.05703,
            "49": 0.06163,
            "50": 0.05739
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 1
  --use-distributed-optimizer: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
  --fim-data: true
  --fim-rate: 0.5
  --fim-spm-rate: 0.5
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.84081, "5": 10.84098, "10": 10.80913, "15": 10.80028, "20": 10.70502, "25": 10.5355, "30": 10.35506, "35": 10.27096, "40": 10.07811, "45": 9.82336, "50": 9.90123}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1539.0, "5": 1932.0, "10": 1388.0, "15": 1945.0, "20": 1744.0, "25": 1644.0, "30": 2019.0, "35": 1955.0, "40": 2263.0, "45": 2218.0, "50": 2374.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 552128512.0, "5": 552128512.0, "10": 552128512.0, "15": 552128512.0, "20": 552128512.0, "25": 552128512.0, "30": 552128512.0, "35": 552128512.0, "40": 552128512.0, "45": 552128512.0, "50": 552128512.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4576453120.0, "5": 4673069568.0, "10": 4673069568.0, "15": 4673069568.0, "20": 4673069568.0, "25": 4673069568.0, "30": 4673069568.0, "35": 4673069568.0, "40": 4673069568.0, "45": 4673069568.0, "50": 4673069568.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 6.0031, "5": 0.11502, "10": 0.11635, "15": 0.12006, "20": 0.11874, "25": 0.11281, "30": 0.11121, "35": 0.112, "40": 0.11204, "45": 0.1119, "50": 0.11108}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.77472,
            "2": 10.7834,
            "3": 10.783,
            "4": 10.74953,
            "5": 10.82069,
            "6": 10.82339,
            "7": 10.79075,
            "8": 10.78,
            "9": 10.78617,
            "10": 10.74367,
            "11": 10.8322,
            "12": 10.80441,
            "13": 10.82131,
            "14": 10.82576,
            "15": 10.74148,
            "16": 10.75034,
            "17": 10.72538,
            "18": 10.74232,
            "19": 10.74454,
            "20": 10.63704,
            "21": 10.63099,
            "22": 10.48029,
            "23": 10.65995,
            "24": 10.52537,
            "25": 10.47538,
            "26": 10.54137,
            "27": 10.55474,
            "28": 10.52102,
            "29": 10.53614,
            "30": 10.30518,
            "31": 10.06489,
            "32": 10.41554,
            "33": 10.42245,
            "34": 10.17407,
            "35": 10.22339,
            "36": 10.18526,
            "37": 10.30398,
            "38": 10.14971,
            "39": 10.37031,
            "40": 10.04014,
            "41": 10.10916,
            "42": 10.17947,
            "43": 9.79735,
            "44": 9.90801,
            "45": 9.79833,
            "46": 9.79662,
            "47": 10.1206,
            "48": 9.82074,
            "49": 9.50511,
            "50": 9.88047
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1542.0,
            "2": 1772.0,
            "3": 1677.0,
            "4": 1763.0,
            "5": 1987.0,
            "6": 1880.0,
            "7": 1948.0,
            "8": 1686.0,
            "9": 1930.0,
            "10": 1437.0,
            "11": 1928.0,
            "12": 1829.0,
            "13": 1980.0,
            "14": 1810.0,
            "15": 2006.0,
            "16": 1885.0,
            "17": 1765.0,
            "18": 1742.0,
            "19": 1788.0,
            "20": 1717.0,
            "21": 1880.0,
            "22": 1707.0,
            "23": 2116.0,
            "24": 1644.0,
            "25": 1581.0,
            "26": 1664.0,
            "27": 1810.0,
            "28": 2052.0,
            "29": 1954.0,
            "30": 1943.0,
            "31": 1590.0,
            "32": 1906.0,
            "33": 2122.0,
            "34": 1865.0,
            "35": 1994.0,
            "36": 1902.0,
            "37": 2380.0,
            "38": 2161.0,
            "39": 2414.0,
            "40": 2260.0,
            "41": 2308.0,
            "42": 2275.0,
            "43": 2109.0,
            "44": 2189.0,
            "45": 2236.0,
            "46": 2437.0,
            "47": 2581.0,
            "48": 2351.0,
            "49": 2345.0,
            "50": 2524.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 581489664.0,
            "2": 581489664.0,
            "3": 581489664.0,
            "4": 581489664.0,
            "5": 581489664.0,
            "6": 581489664.0,
            "7": 581489664.0,
            "8": 581489664.0,
            "9": 581489664.0,
            "10": 581489664.0,
            "11": 581489664.0,
            "12": 581489664.0,
            "13": 581489664.0,
            "14": 581489664.0,
            "15": 581489664.0,
            "16": 581489664.0,
            "17": 581489664.0,
            "18": 581489664.0,
            "19": 581489664.0,
            "20": 581489664.0,
            "21": 581489664.0,
            "22": 581489664.0,
            "23": 581489664.0,
            "24": 581489664.0,
            "25": 581489664.0,
            "26": 581489664.0,
            "27": 581489664.0,
            "28": 581489664.0,
            "29": 581489664.0,
            "30": 581489664.0,
            "31": 581489664.0,
            "32": 581489664.0,
            "33": 581489664.0,
            "34": 581489664.0,
            "35": 581489664.0,
            "36": 581489664.0,
            "37": 581489664.0,
            "38": 581489664.0,
            "39": 581489664.0,
            "40": 581489664.0,
            "41": 581489664.0,
            "42": 581489664.0,
            "43": 581489664.0,
            "44": 581489664.0,
            "45": 581489664.0,
            "46": 581489664.0,
            "47": 581489664.0,
            "48": 581489664.0,
            "49": 581489664.0,
            "50": 581489664.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 4605814272.0,
            "2": 4702430720.0,
            "3": 4702430720.0,
            "4": 4702430720.0,
            "5": 4702430720.0,
            "6": 4702430720.0,
            "7": 4702430720.0,
            "8": 4702430720.0,
            "9": 4702430720.0,
            "10": 4702430720.0,
            "11": 4702430720.0,
            "12": 4702430720.0,
            "13": 4702430720.0,
            "14": 4702430720.0,
            "15": 4702430720.0,
            "16": 4702430720.0,
            "17": 4702430720.0,
            "18": 4702430720.0,
            "19": 4702430720.0,
            "20": 4702430720.0,
            "21": 4702430720.0,
            "22": 4702430720.0,
            "23": 4702430720.0,
            "24": 4702430720.0,
            "25": 4702430720.0,
            "26": 4702430720.0,
            "27": 4702430720.0,
            "28": 4702430720.0,
            "29": 4702430720.0,
            "30": 4702430720.0,
            "31": 4702430720.0,
            "32": 4702430720.0,
            "33": 4702430720.0,
            "34": 4702430720.0,
            "35": 4702430720.0,
            "36": 4702430720.0,
            "37": 4702430720.0,
            "38": 4702430720.0,
            "39": 4702430720.0,
            "40": 4702430720.0,
            "41": 4702430720.0,
            "42": 4702430720.0,
            "43": 4702430720.0,
            "44": 4702430720.0,
            "45": 4702430720.0,
            "46": 4702430720.0,
            "47": 4702430720.0,
            "48": 4702430720.0,
            "49": 4702430720.0,
            "50": 4702430720.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 4.06847,
            "3": 0.10126,
            "4": 0.08879,
            "5": 0.0895,
            "6": 0.09253,
            "7": 0.09257,
            "8": 0.09092,
            "9": 0.0912,
            "10": 0.09343,
            "11": 0.09132,
            "12": 0.09098,
            "13": 0.08924,
            "14": 0.08868,
            "15": 0.0917,
            "16": 0.09022,
            "17": 0.09175,
            "18": 0.08931,
            "19": 0.0903,
            "20": 0.08975,
            "21": 0.08914,
            "22": 0.09136,
            "23": 0.09031,
            "24": 0.08986,
            "25": 0.08928,
            "26": 0.08905,
            "27": 0.0893,
            "28": 0.08978,
            "29": 0.08991,
            "30": 0.08929,
            "31": 0.09073,
            "32": 0.08895,
            "33": 0.08888,
            "34": 0.0889,
            "35": 0.08867,
            "36": 0.08814,
            "37": 0.08834,
            "38": 0.08834,
            "39": 0.08804,
            "40": 0.08849,
            "41": 0.08911,
            "42": 0.08844,
            "43": 0.0897,
            "44": 0.08853,
            "45": 0.09005,
            "46": 0.09453,
            "47": 0.09155,
            "48": 0.08894,
            "49": 0.09025,
            "50": 0.08973
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.89592,
            "2": 10.89514,
            "3": 10.88761,
            "4": 10.88903,
            "5": 10.89131,
            "6": 10.90004,
            "7": 10.89143,
            "8": 10.89938,
            "9": 10.90231,
            "10": 10.88299,
            "11": 10.87827,
            "12": 10.89318,
            "13": 10.89818,
            "14": 10.89188,
            "15": 10.84786,
            "16": 10.85369,
            "17": 10.831,
            "18": 10.83994,
            "19": 10.82779,
            "20": 10.74925,
            "21": 10.73558,
            "22": 10.61567,
            "23": 10.72599,
            "24": 10.63027,
            "25": 10.59226,
            "26": 10.63312,
            "27": 10.63277,
            "28": 10.58231,
            "29": 10.58547,
            "30": 10.41136,
            "31": 10.15833,
            "32": 10.48326,
            "33": 10.46651,
            "34": 10.23801,
            "35": 10.28136,
            "36": 10.24029,
            "37": 10.3617,
            "38": 10.20342,
            "39": 10.404,
            "40": 10.09306,
            "41": 10.15805,
            "42": 10.21903,
            "43": 9.84274,
            "44": 9.97219,
            "45": 9.84149,
            "46": 9.82007,
            "47": 10.14934,
            "48": 9.85997,
            "49": 9.54155,
            "50": 9.91285
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1581.0,
            "2": 1674.0,
            "3": 1724.0,
            "4": 1803.0,
            "5": 1962.0,
            "6": 1846.0,
            "7": 1864.0,
            "8": 1792.0,
            "9": 1848.0,
            "10": 1435.0,
            "11": 1868.0,
            "12": 1782.0,
            "13": 1874.0,
            "14": 1783.0,
            "15": 1944.0,
            "16": 1933.0,
            "17": 1807.0,
            "18": 1737.0,
            "19": 1822.0,
            "20": 1679.0,
            "21": 1808.0,
            "22": 1806.0,
            "23": 2077.0,
            "24": 1663.0,
            "25": 1645.0,
            "26": 1719.0,
            "27": 1925.0,
            "28": 2030.0,
            "29": 2042.0,
            "30": 1912.0,
            "31": 1603.0,
            "32": 1938.0,
            "33": 2158.0,
            "34": 1896.0,
            "35": 2023.0,
            "36": 1910.0,
            "37": 2330.0,
            "38": 2298.0,
            "39": 2498.0,
            "40": 2270.0,
            "41": 2464.0,
            "42": 2296.0,
            "43": 2042.0,
            "44": 2138.0,
            "45": 2152.0,
            "46": 2282.0,
            "47": 2529.0,
            "48": 2454.0,
            "49": 2358.0,
            "50": 2580.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 581489664.0,
            "2": 581489664.0,
            "3": 581489664.0,
            "4": 581489664.0,
            "5": 581489664.0,
            "6": 581489664.0,
            "7": 581489664.0,
            "8": 581489664.0,
            "9": 581489664.0,
            "10": 581489664.0,
            "11": 581489664.0,
            "12": 581489664.0,
            "13": 581489664.0,
            "14": 581489664.0,
            "15": 581489664.0,
            "16": 581489664.0,
            "17": 581489664.0,
            "18": 581489664.0,
            "19": 581489664.0,
            "20": 581489664.0,
            "21": 581489664.0,
            "22": 581489664.0,
            "23": 581489664.0,
            "24": 581489664.0,
            "25": 581489664.0,
            "26": 581489664.0,
            "27": 581489664.0,
            "28": 581489664.0,
            "29": 581489664.0,
            "30": 581489664.0,
            "31": 581489664.0,
            "32": 581489664.0,
            "33": 581489664.0,
            "34": 581489664.0,
            "35": 581489664.0,
            "36": 581489664.0,
            "37": 581489664.0,
            "38": 581489664.0,
            "39": 581489664.0,
            "40": 581489664.0,
            "41": 581489664.0,
            "42": 581489664.0,
            "43": 581489664.0,
            "44": 581489664.0,
            "45": 581489664.0,
            "46": 581489664.0,
            "47": 581489664.0,
            "48": 581489664.0,
            "49": 581489664.0,
            "50": 581489664.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 4605814272.0,
            "2": 4702430720.0,
            "3": 4702430720.0,
            "4": 4702430720.0,
            "5": 4702430720.0,
            "6": 4702430720.0,
            "7": 4702430720.0,
            "8": 4702430720.0,
            "9": 4702430720.0,
            "10": 4702430720.0,
            "11": 4702430720.0,
            "12": 4702430720.0,
            "13": 4702430720.0,
            "14": 4702430720.0,
            "15": 4702430720.0,
            "16": 4702430720.0,
            "17": 4702430720.0,
            "18": 4702430720.0,
            "19": 4702430720.0,
            "20": 4702430720.0,
            "21": 4702430720.0,
            "22": 4702430720.0,
            "23": 4702430720.0,
            "24": 4702430720.0,
            "25": 4702430720.0,
            "26": 4702430720.0,
            "27": 4702430720.0,
            "28": 4702430720.0,
            "29": 4702430720.0,
            "30": 4702430720.0,
            "31": 4702430720.0,
            "32": 4702430720.0,
            "33": 4702430720.0,
            "34": 4702430720.0,
            "35": 4702430720.0,
            "36": 4702430720.0,
            "37": 4702430720.0,
            "38": 4702430720.0,
            "39": 4702430720.0,
            "40": 4702430720.0,
            "41": 4702430720.0,
            "42": 4702430720.0,
            "43": 4702430720.0,
            "44": 4702430720.0,
            "45": 4702430720.0,
            "46": 4702430720.0,
            "47": 4702430720.0,
            "48": 4702430720.0,
            "49": 4702430720.0,
            "50": 4702430720.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 8.63401,
            "2": 0.09023,
            "3": 0.07348,
            "4": 0.05746,
            "5": 0.05663,
            "6": 0.05755,
            "7": 0.0574,
            "8": 0.05838,
            "9": 0.05585,
            "10": 0.05739,
            "11": 0.05576,
            "12": 0.0561,
            "13": 0.05582,
            "14": 0.05815,
            "15": 0.05615,
            "16": 0.05649,
            "17": 0.05732,
            "18": 0.05614,
            "19": 0.05614,
            "20": 0.0565,
            "21": 0.05624,
            "22": 0.05712,
            "23": 0.05601,
            "24": 0.05772,
            "25": 0.05612,
            "26": 0.05714,
            "27": 0.05571,
            "28": 0.05803,
            "29": 0.0562,
            "30": 0.05628,
            "31": 0.05602,
            "32": 0.05667,
            "33": 0.05631,
            "34": 0.05631,
            "35": 0.05623,
            "36": 0.0565,
            "37": 0.05737,
            "38": 0.05733,
            "39": 0.05988,
            "40": 0.05739,
            "41": 0.05719,
            "42": 0.05699,
            "43": 0.05608,
            "44": 0.05867,
            "45": 0.05838,
            "46": 0.05842,
            "47": 0.05635,
            "48": 0.05732,
            "49": 0.0569,
            "50": 0.05736
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.84081,
            "2": 10.8354,
            "3": 10.83237,
            "4": 10.81882,
            "5": 10.84098,
            "6": 10.87094,
            "7": 10.83285,
            "8": 10.8395,
            "9": 10.84275,
            "10": 10.80913,
            "11": 10.85185,
            "12": 10.84426,
            "13": 10.86366,
            "14": 10.86332,
            "15": 10.80028,
            "16": 10.79303,
            "17": 10.7753,
            "18": 10.80133,
            "19": 10.79138,
            "20": 10.70502,
            "21": 10.68161,
            "22": 10.56472,
            "23": 10.70185,
            "24": 10.58,
            "25": 10.5355,
            "26": 10.607,
            "27": 10.59378,
            "28": 10.56083,
            "29": 10.57494,
            "30": 10.35506,
            "31": 10.12664,
            "32": 10.46551,
            "33": 10.45216,
            "34": 10.22453,
            "35": 10.27096,
            "36": 10.22158,
            "37": 10.33994,
            "38": 10.18651,
            "39": 10.39397,
            "40": 10.07811,
            "41": 10.13811,
            "42": 10.20175,
            "43": 9.83808,
            "44": 9.94297,
            "45": 9.82336,
            "46": 9.82165,
            "47": 10.13421,
            "48": 9.84058,
            "49": 9.52119,
            "50": 9.90123
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1539.0,
            "2": 1717.0,
            "3": 1773.0,
            "4": 1809.0,
            "5": 1932.0,
            "6": 1836.0,
            "7": 1808.0,
            "8": 1638.0,
            "9": 1924.0,
            "10": 1388.0,
            "11": 1978.0,
            "12": 1875.0,
            "13": 1894.0,
            "14": 1832.0,
            "15": 1945.0,
            "16": 1966.0,
            "17": 1779.0,
            "18": 1731.0,
            "19": 1812.0,
            "20": 1744.0,
            "21": 1910.0,
            "22": 1717.0,
            "23": 2079.0,
            "24": 1636.0,
            "25": 1644.0,
            "26": 1812.0,
            "27": 1939.0,
            "28": 1904.0,
            "29": 2001.0,
            "30": 2019.0,
            "31": 1661.0,
            "32": 1904.0,
            "33": 2040.0,
            "34": 1944.0,
            "35": 1955.0,
            "36": 1968.0,
            "37": 2344.0,
            "38": 2300.0,
            "39": 2418.0,
            "40": 2263.0,
            "41": 2357.0,
            "42": 2285.0,
            "43": 1988.0,
            "44": 2123.0,
            "45": 2218.0,
            "46": 2349.0,
            "47": 2594.0,
            "48": 2506.0,
            "49": 2331.0,
            "50": 2374.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 552128512.0,
            "2": 552128512.0,
            "3": 552128512.0,
            "4": 552128512.0,
            "5": 552128512.0,
            "6": 552128512.0,
            "7": 552128512.0,
            "8": 552128512.0,
            "9": 552128512.0,
            "10": 552128512.0,
            "11": 552128512.0,
            "12": 552128512.0,
            "13": 552128512.0,
            "14": 552128512.0,
            "15": 552128512.0,
            "16": 552128512.0,
            "17": 552128512.0,
            "18": 552128512.0,
            "19": 552128512.0,
            "20": 552128512.0,
            "21": 552128512.0,
            "22": 552128512.0,
            "23": 552128512.0,
            "24": 552128512.0,
            "25": 552128512.0,
            "26": 552128512.0,
            "27": 552128512.0,
            "28": 552128512.0,
            "29": 552128512.0,
            "30": 552128512.0,
            "31": 552128512.0,
            "32": 552128512.0,
            "33": 552128512.0,
            "34": 552128512.0,
            "35": 552128512.0,
            "36": 552128512.0,
            "37": 552128512.0,
            "38": 552128512.0,
            "39": 552128512.0,
            "40": 552128512.0,
            "41": 552128512.0,
            "42": 552128512.0,
            "43": 552128512.0,
            "44": 552128512.0,
            "45": 552128512.0,
            "46": 552128512.0,
            "47": 552128512.0,
            "48": 552128512.0,
            "49": 552128512.0,
            "50": 552128512.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 4576453120.0,
            "2": 4673069568.0,
            "3": 4673069568.0,
            "4": 4673069568.0,
            "5": 4673069568.0,
            "6": 4673069568.0,
            "7": 4673069568.0,
            "8": 4673069568.0,
            "9": 4673069568.0,
            "10": 4673069568.0,
            "11": 4673069568.0,
            "12": 4673069568.0,
            "13": 4673069568.0,
            "14": 4673069568.0,
            "15": 4673069568.0,
            "16": 4673069568.0,
            "17": 4673069568.0,
            "18": 4673069568.0,
            "19": 4673069568.0,
            "20": 4673069568.0,
            "21": 4673069568.0,
            "22": 4673069568.0,
            "23": 4673069568.0,
            "24": 4673069568.0,
            "25": 4673069568.0,
            "26": 4673069568.0,
            "27": 4673069568.0,
            "28": 4673069568.0,
            "29": 4673069568.0,
            "30": 4673069568.0,
            "31": 4673069568.0,
            "32": 4673069568.0,
            "33": 4673069568.0,
            "34": 4673069568.0,
            "35": 4673069568.0,
            "36": 4673069568.0,
            "37": 4673069568.0,
            "38": 4673069568.0,
            "39": 4673069568.0,
            "40": 4673069568.0,
            "41": 4673069568.0,
            "42": 4673069568.0,
            "43": 4673069568.0,
            "44": 4673069568.0,
            "45": 4673069568.0,
            "46": 4673069568.0,
            "47": 4673069568.0,
            "48": 4673069568.0,
            "49": 4673069568.0,
            "50": 4673069568.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 4.18387,
            "2": 0.12504,
            "3": 0.09705,
            "4": 0.09652,
            "5": 0.09748,
            "6": 0.09683,
            "7": 0.09565,
            "8": 0.09466,
            "9": 0.09428,
            "10": 0.09486,
            "11": 0.09436,
            "12": 0.09386,
            "13": 0.09434,
            "14": 0.09599,
            "15": 0.09464,
            "16": 0.0943,
            "17": 0.09447,
            "18": 0.09424,
            "19": 0.0942,
            "20": 0.09425,
            "21": 0.09401,
            "22": 0.09476,
            "23": 0.09408,
            "24": 0.09462,
            "25": 0.09414,
            "26": 0.09442,
            "27": 0.0939,
            "28": 0.09352,
            "29": 0.09364,
            "30": 0.09376,
            "31": 0.09494,
            "32": 0.09358,
            "33": 0.09378,
            "34": 0.09361,
            "35": 0.09442,
            "36": 0.09437,
            "37": 0.09367,
            "38": 0.0934,
            "39": 0.09328,
            "40": 0.09295,
            "41": 0.09331,
            "42": 0.09302,
            "43": 0.09373,
            "44": 0.09287,
            "45": 0.09264,
            "46": 0.10047,
            "47": 0.09374,
            "48": 0.09248,
            "49": 0.09248,
            "50": 0.09148
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.84081,
            "2": 10.8354,
            "3": 10.83237,
            "4": 10.81882,
            "5": 10.84098,
            "6": 10.87094,
            "7": 10.83285,
            "8": 10.8395,
            "9": 10.84275,
            "10": 10.80913,
            "11": 10.85185,
            "12": 10.84426,
            "13": 10.86366,
            "14": 10.86332,
            "15": 10.80028,
            "16": 10.79303,
            "17": 10.7753,
            "18": 10.80133,
            "19": 10.79138,
            "20": 10.70502,
            "21": 10.68161,
            "22": 10.56472,
            "23": 10.70185,
            "24": 10.58,
            "25": 10.5355,
            "26": 10.607,
            "27": 10.59378,
            "28": 10.56083,
            "29": 10.57494,
            "30": 10.35506,
            "31": 10.12664,
            "32": 10.46551,
            "33": 10.45216,
            "34": 10.22453,
            "35": 10.27096,
            "36": 10.22158,
            "37": 10.33994,
            "38": 10.18651,
            "39": 10.39397,
            "40": 10.07811,
            "41": 10.13811,
            "42": 10.20175,
            "43": 9.83808,
            "44": 9.94297,
            "45": 9.82336,
            "46": 9.82165,
            "47": 10.13421,
            "48": 9.84058,
            "49": 9.52119,
            "50": 9.90123
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1539.0,
            "2": 1717.0,
            "3": 1773.0,
            "4": 1809.0,
            "5": 1932.0,
            "6": 1836.0,
            "7": 1808.0,
            "8": 1638.0,
            "9": 1924.0,
            "10": 1388.0,
            "11": 1978.0,
            "12": 1875.0,
            "13": 1894.0,
            "14": 1832.0,
            "15": 1945.0,
            "16": 1966.0,
            "17": 1779.0,
            "18": 1731.0,
            "19": 1812.0,
            "20": 1744.0,
            "21": 1910.0,
            "22": 1717.0,
            "23": 2079.0,
            "24": 1636.0,
            "25": 1644.0,
            "26": 1812.0,
            "27": 1939.0,
            "28": 1904.0,
            "29": 2001.0,
            "30": 2019.0,
            "31": 1661.0,
            "32": 1904.0,
            "33": 2040.0,
            "34": 1944.0,
            "35": 1955.0,
            "36": 1968.0,
            "37": 2344.0,
            "38": 2300.0,
            "39": 2418.0,
            "40": 2263.0,
            "41": 2357.0,
            "42": 2285.0,
            "43": 1988.0,
            "44": 2123.0,
            "45": 2218.0,
            "46": 2349.0,
            "47": 2594.0,
            "48": 2506.0,
            "49": 2331.0,
            "50": 2374.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 552128512.0,
            "2": 552128512.0,
            "3": 552128512.0,
            "4": 552128512.0,
            "5": 552128512.0,
            "6": 552128512.0,
            "7": 552128512.0,
            "8": 552128512.0,
            "9": 552128512.0,
            "10": 552128512.0,
            "11": 552128512.0,
            "12": 552128512.0,
            "13": 552128512.0,
            "14": 552128512.0,
            "15": 552128512.0,
            "16": 552128512.0,
            "17": 552128512.0,
            "18": 552128512.0,
            "19": 552128512.0,
            "20": 552128512.0,
            "21": 552128512.0,
            "22": 552128512.0,
            "23": 552128512.0,
            "24": 552128512.0,
            "25": 552128512.0,
            "26": 552128512.0,
            "27": 552128512.0,
            "28": 552128512.0,
            "29": 552128512.0,
            "30": 552128512.0,
            "31": 552128512.0,
            "32": 552128512.0,
            "33": 552128512.0,
            "34": 552128512.0,
            "35": 552128512.0,
            "36": 552128512.0,
            "37": 552128512.0,
            "38": 552128512.0,
            "39": 552128512.0,
            "40": 552128512.0,
            "41": 552128512.0,
            "42": 552128512.0,
            "43": 552128512.0,
            "44": 552128512.0,
            "45": 552128512.0,
            "46": 552128512.0,
            "47": 552128512.0,
            "48": 552128512.0,
            "49": 552128512.0,
            "50": 552128512.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 4576453120.0,
            "2": 4673069568.0,
            "3": 4673069568.0,
            "4": 4673069568.0,
            "5": 4673069568.0,
            "6": 4673069568.0,
            "7": 4673069568.0,
            "8": 4673069568.0,
            "9": 4673069568.0,
            "10": 4673069568.0,
            "11": 4673069568.0,
            "12": 4673069568.0,
            "13": 4673069568.0,
            "14": 4673069568.0,
            "15": 4673069568.0,
            "16": 4673069568.0,
            "17": 4673069568.0,
            "18": 4673069568.0,
            "19": 4673069568.0,
            "20": 4673069568.0,
            "21": 4673069568.0,
            "22": 4673069568.0,
            "23": 4673069568.0,
            "24": 4673069568.0,
            "25": 4673069568.0,
            "26": 4673069568.0,
            "27": 4673069568.0,
            "28": 4673069568.0,
            "29": 4673069568.0,
            "30": 4673069568.0,
            "31": 4673069568.0,
            "32": 4673069568.0,
            "33": 4673069568.0,
            "34": 4673069568.0,
            "35": 4673069568.0,
            "36": 4673069568.0,
            "37": 4673069568.0,
            "38": 4673069568.0,
            "39": 4673069568.0,
            "40": 4673069568.0,
            "41": 4673069568.0,
            "42": 4673069568.0,
            "43": 4673069568.0,
            "44": 4673069568.0,
            "45": 4673069568.0,
            "46": 4673069568.0,
            "47": 4673069568.0,
            "48": 4673069568.0,
            "49": 4673069568.0,
            "50": 4673069568.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3.16333,
            "2": 0.12429,
            "3": 0.10327,
            "4": 0.09373,
            "5": 0.09355,
            "6": 0.0921,
            "7": 0.09247,
            "8": 0.09175,
            "9": 0.08988,
            "10": 0.09206,
            "11": 0.0907,
            "12": 0.09062,
            "13": 0.09067,
            "14": 0.09178,
            "15": 0.09006,
            "16": 0.09058,
            "17": 0.09113,
            "18": 0.08975,
            "19": 0.08958,
            "20": 0.08974,
            "21": 0.0895,
            "22": 0.08967,
            "23": 0.08965,
            "24": 0.08985,
            "25": 0.08964,
            "26": 0.09069,
            "27": 0.08964,
            "28": 0.08972,
            "29": 0.08977,
            "30": 0.08994,
            "31": 0.0898,
            "32": 0.08953,
            "33": 0.09044,
            "34": 0.09062,
            "35": 0.09102,
            "36": 0.09102,
            "37": 0.09125,
            "38": 0.09035,
            "39": 0.09141,
            "40": 0.09069,
            "41": 0.0916,
            "42": 0.09094,
            "43": 0.09103,
            "44": 0.09176,
            "45": 0.09169,
            "46": 0.09186,
            "47": 0.09119,
            "48": 0.09112,
            "49": 0.09072,
            "50": 0.09246
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgxa100_dracooci-ord.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.8401,
            "2": 10.83566,
            "3": 10.82993,
            "4": 10.8173,
            "5": 10.84032,
            "6": 10.87262,
            "7": 10.83467,
            "8": 10.8403,
            "9": 10.84359,
            "10": 10.8134,
            "11": 10.85025,
            "12": 10.84316,
            "13": 10.86605,
            "14": 10.86315,
            "15": 10.80276,
            "16": 10.79643,
            "17": 10.7763,
            "18": 10.8015,
            "19": 10.7939,
            "20": 10.705,
            "21": 10.68148,
            "22": 10.56313,
            "23": 10.70136,
            "24": 10.57939,
            "25": 10.53849,
            "26": 10.60617,
            "27": 10.59211,
            "28": 10.56156,
            "29": 10.57666,
            "30": 10.35521,
            "31": 10.12773,
            "32": 10.46367,
            "33": 10.45444,
            "34": 10.22451,
            "35": 10.27148,
            "36": 10.22184,
            "37": 10.33945,
            "38": 10.18637,
            "39": 10.39329,
            "40": 10.08049,
            "41": 10.13789,
            "42": 10.20012,
            "43": 9.83791,
            "44": 9.94327,
            "45": 9.8229,
            "46": 9.82313,
            "47": 10.13353,
            "48": 9.8415,
            "49": 9.52102,
            "50": 9.90118
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1670.0,
            "2": 1691.0,
            "3": 1630.0,
            "4": 1805.0,
            "5": 1970.0,
            "6": 1901.0,
            "7": 1816.0,
            "8": 1587.0,
            "9": 1905.0,
            "10": 1397.0,
            "11": 1954.0,
            "12": 1859.0,
            "13": 1873.0,
            "14": 1875.0,
            "15": 1936.0,
            "16": 1972.0,
            "17": 1816.0,
            "18": 1773.0,
            "19": 1833.0,
            "20": 1715.0,
            "21": 1923.0,
            "22": 1681.0,
            "23": 2055.0,
            "24": 1727.0,
            "25": 1703.0,
            "26": 1761.0,
            "27": 1917.0,
            "28": 1962.0,
            "29": 2010.0,
            "30": 1957.0,
            "31": 1723.0,
            "32": 1898.0,
            "33": 2153.0,
            "34": 1828.0,
            "35": 1991.0,
            "36": 1937.0,
            "37": 2347.0,
            "38": 2365.0,
            "39": 2349.0,
            "40": 2239.0,
            "41": 2217.0,
            "42": 2222.0,
            "43": 2121.0,
            "44": 2059.0,
            "45": 2144.0,
            "46": 2296.0,
            "47": 2487.0,
            "48": 2376.0,
            "49": 2330.0,
            "50": 2377.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 552238592.0,
            "2": 552238592.0,
            "3": 552238592.0,
            "4": 552238592.0,
            "5": 552238592.0,
            "6": 552238592.0,
            "7": 552238592.0,
            "8": 552238592.0,
            "9": 552238592.0,
            "10": 552238592.0,
            "11": 552238592.0,
            "12": 552238592.0,
            "13": 552238592.0,
            "14": 552238592.0,
            "15": 552238592.0,
            "16": 552238592.0,
            "17": 552238592.0,
            "18": 552238592.0,
            "19": 552238592.0,
            "20": 552238592.0,
            "21": 552238592.0,
            "22": 552238592.0,
            "23": 552238592.0,
            "24": 552238592.0,
            "25": 552238592.0,
            "26": 552238592.0,
            "27": 552238592.0,
            "28": 552238592.0,
            "29": 552238592.0,
            "30": 552238592.0,
            "31": 552238592.0,
            "32": 552238592.0,
            "33": 552238592.0,
            "34": 552238592.0,
            "35": 552238592.0,
            "36": 552238592.0,
            "37": 552238592.0,
            "38": 552238592.0,
            "39": 552238592.0,
            "40": 552238592.0,
            "41": 552238592.0,
            "42": 552238592.0,
            "43": 552238592.0,
            "44": 552238592.0,
            "45": 552238592.0,
            "46": 552238592.0,
            "47": 552238592.0,
            "48": 552238592.0,
            "49": 552238592.0,
            "50": 552238592.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 4576563200.0,
            "2": 4673179648.0,
            "3": 4673179648.0,
            "4": 4673179648.0,
            "5": 4673179648.0,
            "6": 4673179648.0,
            "7": 4673179648.0,
            "8": 4673179648.0,
            "9": 4673179648.0,
            "10": 4673179648.0,
            "11": 4673179648.0,
            "12": 4673179648.0,
            "13": 4673179648.0,
            "14": 4673179648.0,
            "15": 4673179648.0,
            "16": 4673179648.0,
            "17": 4673179648.0,
            "18": 4673179648.0,
            "19": 4673179648.0,
            "20": 4673179648.0,
            "21": 4673179648.0,
            "22": 4673179648.0,
            "23": 4673179648.0,
            "24": 4673179648.0,
            "25": 4673179648.0,
            "26": 4673179648.0,
            "27": 4673179648.0,
            "28": 4673179648.0,
            "29": 4673179648.0,
            "30": 4673179648.0,
            "31": 4673179648.0,
            "32": 4673179648.0,
            "33": 4673179648.0,
            "34": 4673179648.0,
            "35": 4673179648.0,
            "36": 4673179648.0,
            "37": 4673179648.0,
            "38": 4673179648.0,
            "39": 4673179648.0,
            "40": 4673179648.0,
            "41": 4673179648.0,
            "42": 4673179648.0,
            "43": 4673179648.0,
            "44": 4673179648.0,
            "45": 4673179648.0,
            "46": 4673179648.0,
            "47": 4673179648.0,
            "48": 4673179648.0,
            "49": 4673179648.0,
            "50": 4673179648.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 8.45713,
            "2": 0.13161,
            "3": 0.11061,
            "4": 0.12579,
            "5": 0.13121,
            "6": 0.13773,
            "7": 0.13653,
            "8": 0.46789,
            "9": 0.12385,
            "10": 0.12166,
            "11": 0.1263,
            "12": 0.13396,
            "13": 0.12492,
            "14": 0.12502,
            "15": 0.11723,
            "16": 0.15631,
            "17": 0.3771,
            "18": 0.12361,
            "19": 0.11397,
            "20": 0.11135,
            "21": 0.10366,
            "22": 0.10396,
            "23": 0.10431,
            "24": 0.10481,
            "25": 0.10339,
            "26": 0.1068,
            "27": 0.10511,
            "28": 0.36221,
            "29": 0.1036,
            "30": 0.10364,
            "31": 0.10951,
            "32": 0.11609,
            "33": 0.11339,
            "34": 0.1139,
            "35": 0.11975,
            "36": 0.11809,
            "37": 0.10984,
            "38": 0.10706,
            "39": 0.10797,
            "40": 0.11217,
            "41": 0.11266,
            "42": 0.10821,
            "43": 0.1114,
            "44": 0.10779,
            "45": 0.1071,
            "46": 0.11272,
            "47": 0.1145,
            "48": 0.10778,
            "49": 0.10649,
            "50": 0.10728
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgxa100_dracooci.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.8401,
            "2": 10.83566,
            "3": 10.82993,
            "4": 10.8173,
            "5": 10.84032,
            "6": 10.87262,
            "7": 10.83467,
            "8": 10.8403,
            "9": 10.84359,
            "10": 10.8134,
            "11": 10.85025,
            "12": 10.84316,
            "13": 10.86605,
            "14": 10.86315,
            "15": 10.80276,
            "16": 10.79643,
            "17": 10.7763,
            "18": 10.8015,
            "19": 10.7939,
            "20": 10.705,
            "21": 10.68148,
            "22": 10.56313,
            "23": 10.70136,
            "24": 10.57939,
            "25": 10.53849,
            "26": 10.60617,
            "27": 10.59211,
            "28": 10.56156,
            "29": 10.57666,
            "30": 10.35521,
            "31": 10.12773,
            "32": 10.46367,
            "33": 10.45444,
            "34": 10.22451,
            "35": 10.27148,
            "36": 10.22184,
            "37": 10.33945,
            "38": 10.18637,
            "39": 10.39329,
            "40": 10.08049,
            "41": 10.13789,
            "42": 10.20012,
            "43": 9.83791,
            "44": 9.94327,
            "45": 9.8229,
            "46": 9.82313,
            "47": 10.13353,
            "48": 9.8415,
            "49": 9.52102,
            "50": 9.90118
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1670.0,
            "2": 1691.0,
            "3": 1630.0,
            "4": 1805.0,
            "5": 1970.0,
            "6": 1901.0,
            "7": 1816.0,
            "8": 1587.0,
            "9": 1905.0,
            "10": 1397.0,
            "11": 1954.0,
            "12": 1859.0,
            "13": 1873.0,
            "14": 1875.0,
            "15": 1936.0,
            "16": 1972.0,
            "17": 1816.0,
            "18": 1773.0,
            "19": 1833.0,
            "20": 1715.0,
            "21": 1923.0,
            "22": 1681.0,
            "23": 2055.0,
            "24": 1727.0,
            "25": 1703.0,
            "26": 1761.0,
            "27": 1917.0,
            "28": 1962.0,
            "29": 2010.0,
            "30": 1957.0,
            "31": 1723.0,
            "32": 1898.0,
            "33": 2153.0,
            "34": 1828.0,
            "35": 1991.0,
            "36": 1937.0,
            "37": 2347.0,
            "38": 2365.0,
            "39": 2349.0,
            "40": 2239.0,
            "41": 2217.0,
            "42": 2222.0,
            "43": 2121.0,
            "44": 2059.0,
            "45": 2144.0,
            "46": 2296.0,
            "47": 2487.0,
            "48": 2376.0,
            "49": 2330.0,
            "50": 2377.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 552238592.0,
            "2": 552238592.0,
            "3": 552238592.0,
            "4": 552238592.0,
            "5": 552238592.0,
            "6": 552238592.0,
            "7": 552238592.0,
            "8": 552238592.0,
            "9": 552238592.0,
            "10": 552238592.0,
            "11": 552238592.0,
            "12": 552238592.0,
            "13": 552238592.0,
            "14": 552238592.0,
            "15": 552238592.0,
            "16": 552238592.0,
            "17": 552238592.0,
            "18": 552238592.0,
            "19": 552238592.0,
            "20": 552238592.0,
            "21": 552238592.0,
            "22": 552238592.0,
            "23": 552238592.0,
            "24": 552238592.0,
            "25": 552238592.0,
            "26": 552238592.0,
            "27": 552238592.0,
            "28": 552238592.0,
            "29": 552238592.0,
            "30": 552238592.0,
            "31": 552238592.0,
            "32": 552238592.0,
            "33": 552238592.0,
            "34": 552238592.0,
            "35": 552238592.0,
            "36": 552238592.0,
            "37": 552238592.0,
            "38": 552238592.0,
            "39": 552238592.0,
            "40": 552238592.0,
            "41": 552238592.0,
            "42": 552238592.0,
            "43": 552238592.0,
            "44": 552238592.0,
            "45": 552238592.0,
            "46": 552238592.0,
            "47": 552238592.0,
            "48": 552238592.0,
            "49": 552238592.0,
            "50": 552238592.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 4576563200.0,
            "2": 4673179648.0,
            "3": 4673179648.0,
            "4": 4673179648.0,
            "5": 4673179648.0,
            "6": 4673179648.0,
            "7": 4673179648.0,
            "8": 4673179648.0,
            "9": 4673179648.0,
            "10": 4673179648.0,
            "11": 4673179648.0,
            "12": 4673179648.0,
            "13": 4673179648.0,
            "14": 4673179648.0,
            "15": 4673179648.0,
            "16": 4673179648.0,
            "17": 4673179648.0,
            "18": 4673179648.0,
            "19": 4673179648.0,
            "20": 4673179648.0,
            "21": 4673179648.0,
            "22": 4673179648.0,
            "23": 4673179648.0,
            "24": 4673179648.0,
            "25": 4673179648.0,
            "26": 4673179648.0,
            "27": 4673179648.0,
            "28": 4673179648.0,
            "29": 4673179648.0,
            "30": 4673179648.0,
            "31": 4673179648.0,
            "32": 4673179648.0,
            "33": 4673179648.0,
            "34": 4673179648.0,
            "35": 4673179648.0,
            "36": 4673179648.0,
            "37": 4673179648.0,
            "38": 4673179648.0,
            "39": 4673179648.0,
            "40": 4673179648.0,
            "41": 4673179648.0,
            "42": 4673179648.0,
            "43": 4673179648.0,
            "44": 4673179648.0,
            "45": 4673179648.0,
            "46": 4673179648.0,
            "47": 4673179648.0,
            "48": 4673179648.0,
            "49": 4673179648.0,
            "50": 4673179648.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 13.01978,
            "2": 0.13386,
            "3": 0.10421,
            "4": 0.10575,
            "5": 0.10347,
            "6": 0.10366,
            "7": 0.10198,
            "8": 0.10204,
            "9": 0.10153,
            "10": 0.10361,
            "11": 0.10226,
            "12": 0.31034,
            "13": 0.36244,
            "14": 0.32183,
            "15": 0.09858,
            "16": 0.10098,
            "17": 0.10218,
            "18": 0.09859,
            "19": 0.09858,
            "20": 0.0985,
            "21": 0.09758,
            "22": 0.0984,
            "23": 0.09686,
            "24": 0.09763,
            "25": 0.09689,
            "26": 0.0979,
            "27": 0.09858,
            "28": 0.09763,
            "29": 0.09678,
            "30": 0.09714,
            "31": 0.10001,
            "32": 0.09705,
            "33": 0.09776,
            "34": 0.09662,
            "35": 0.09763,
            "36": 0.10137,
            "37": 0.10113,
            "38": 0.09825,
            "39": 0.09976,
            "40": 0.09925,
            "41": 0.09738,
            "42": 0.09904,
            "43": 0.10108,
            "44": 0.09921,
            "45": 0.09873,
            "46": 0.10018,
            "47": 0.09927,
            "48": 0.09914,
            "49": 0.09907,
            "50": 0.09879
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 1
  --use-distributed-optimizer: true
  --no-mmap-bin-files: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 5,
        "values": {
            "1": 10.84092,
            "5": 10.8406,
            "10": 10.80966,
            "15": 10.79977,
            "20": 10.705,
            "25": 10.53578,
            "30": 10.35453,
            "35": 10.27067,
            "40": 10.0777,
            "45": 9.82283,
            "50": 9.90094,
            "55": 9.86286,
            "60": 9.48141,
            "65": 8.93755,
            "70": 9.7101,
            "75": 9.40718,
            "80": 9.38973,
            "85": 9.5973,
            "90": 9.8035,
            "95": 9.5057,
            "100": 9.38833
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 5,
        "values": {
            "1": 1629.0,
            "5": 1944.0,
            "10": 1346.0,
            "15": 1926.0,
            "20": 1656.0,
            "25": 1631.0,
            "30": 1980.0,
            "35": 1957.0,
            "40": 2272.0,
            "45": 2175.0,
            "50": 2352.0,
            "55": 2457.0,
            "60": 2531.0,
            "65": 2658.0,
            "70": 3403.0,
            "75": 2689.0,
            "80": 3367.0,
            "85": 3361.0,
            "90": 3070.0,
            "95": 3344.0,
            "100": 3359.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 5,
        "values": {
            "1": 552128512.0,
            "5": 552128512.0,
            "10": 552128512.0,
            "15": 552128512.0,
            "20": 552128512.0,
            "25": 552128512.0,
            "30": 552128512.0,
            "35": 552128512.0,
            "40": 552128512.0,
            "45": 552128512.0,
            "50": 552128512.0,
            "55": 552128512.0,
            "60": 552128512.0,
            "65": 552128512.0,
            "70": 552128512.0,
            "75": 552128512.0,
            "80": 552128512.0,
            "85": 552128512.0,
            "90": 552128512.0,
            "95": 552128512.0,
            "100": 552128512.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 5,
        "values": {
            "1": 2615097856.0,
            "5": 2711714304.0,
            "10": 2711714304.0,
            "15": 2711714304.0,
            "20": 2711714304.0,
            "25": 2711714304.0,
            "30": 2711714304.0,
            "35": 2711714304.0,
            "40": 2711714304.0,
            "45": 2711714304.0,
            "50": 2711714304.0,
            "55": 2711714304.0,
            "60": 2711714304.0,
            "65": 2711714304.0,
            "70": 2711714304.0,
            "75": 2711714304.0,
            "80": 2711714304.0,
            "85": 2711714304.0,
            "90": 2711714304.0,
            "95": 2711714304.0,
            "100": 2711714304.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 5,
        "values": {
            "1": 3.0127,
            "5": 0.09342,
            "10": 0.08689,
            "15": 0.08674,
            "20": 0.08662,
            "25": 0.08586,
            "30": 0.0867,
            "35": 0.0866,
            "40": 0.08627,
            "45": 0.08714,
            "50": 0.08386,
            "55": 0.0862,
            "60": 0.08375,
            "65": 0.08427,
            "70": 0.08439,
            "75": 0.08449,
            "80": 0.0843,
            "85": 0.08404,
            "90": 0.08808,
            "95": 0.08627,
            "100": 0.086
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.89631, "5": 10.89154, "10": 10.88361, "15": 10.84798, "20": 10.74822, "25": 10.5931, "30": 10.41201, "35": 10.28193, "40": 10.09271, "45": 9.84189, "50": 9.91347, "55": 9.88574, "60": 9.50245, "65": 8.94515, "70": 9.74454, "75": 9.42524, "80": 9.40453, "85": 9.61293, "90": 9.81672, "95": 9.5184, "100": 9.39924}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1483.0, "5": 1903.0, "10": 1427.0, "15": 1901.0, "20": 1639.0, "25": 1672.0, "30": 1938.0, "35": 1982.0, "40": 2407.0, "45": 2210.0, "50": 2492.0, "55": 2372.0, "60": 2430.0, "65": 2683.0, "70": 3152.0, "75": 2581.0, "80": 3246.0, "85": 3425.0, "90": 3023.0, "95": 3403.0, "100": 3199.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 581488640.0, "5": 581488640.0, "10": 581488640.0, "15": 581488640.0, "20": 581488640.0, "25": 581488640.0, "30": 581488640.0, "35": 581488640.0, "40": 581488640.0, "45": 581488640.0, "50": 581488640.0, "55": 581488640.0, "60": 581488640.0, "65": 581488640.0, "70": 581488640.0, "75": 581488640.0, "80": 581488640.0, "85": 581488640.0, "90": 581488640.0, "95": 581488640.0, "100": 581488640.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2594126336.0, "5": 2690742784.0, "10": 2690742784.0, "15": 2690742784.0, "20": 2690742784.0, "25": 2690742784.0, "30": 2690742784.0, "35": 2690742784.0, "40": 2690742784.0, "45": 2690742784.0, "50": 2690742784.0, "55": 2690742784.0, "60": 2690742784.0, "65": 2690742784.0, "70": 2690742784.0, "75": 2690742784.0, "80": 2690742784.0, "85": 2690742784.0, "90": 2690742784.0, "95": 2690742784.0, "100": 2690742784.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 8.17366, "5": 0.04869, "10": 0.04921, "15": 0.04903, "20": 0.0487, "25": 0.04905, "30": 0.04875, "35": 0.04924, "40": 0.04916, "45": 0.07006, "50": 0.04877, "55": 0.05067, "60": 0.04939, "65": 0.05135, "70": 0.04954, "75": 0.05027, "80": 0.0502, "85": 0.05036, "90": 0.04936, "95": 0.04886, "100": 0.05002}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.8401, "5": 10.84032, "10": 10.8134, "15": 10.80276, "20": 10.705, "25": 10.53849, "30": 10.35521, "35": 10.27148, "40": 10.08049, "45": 9.8229, "50": 9.90118, "55": 9.86424, "60": 9.4803, "65": 8.93743, "70": 9.71026, "75": 9.4088, "80": 9.39078, "85": 9.59741, "90": 9.80389, "95": 9.50562, "100": 9.38808}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1670.0, "5": 1970.0, "10": 1397.0, "15": 1936.0, "20": 1715.0, "25": 1703.0, "30": 1957.0, "35": 1991.0, "40": 2239.0, "45": 2144.0, "50": 2377.0, "55": 2436.0, "60": 2445.0, "65": 2645.0, "70": 3337.0, "75": 2726.0, "80": 3356.0, "85": 3336.0, "90": 3044.0, "95": 3484.0, "100": 3467.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 552238592.0, "5": 552238592.0, "10": 552238592.0, "15": 552238592.0, "20": 552238592.0, "25": 552238592.0, "30": 552238592.0, "35": 552238592.0, "40": 552238592.0, "45": 552238592.0, "50": 552238592.0, "55": 552238592.0, "60": 552238592.0, "65": 552238592.0, "70": 552238592.0, "75": 552238592.0, "80": 552238592.0, "85": 552238592.0, "90": 552238592.0, "95": 552238592.0, "100": 552238592.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 4576563200.0, "5": 4673179648.0, "10": 4673179648.0, "15": 4673179648.0, "20": 4673179648.0, "25": 4673179648.0, "30": 4673179648.0, "35": 4673179648.0, "40": 4673179648.0, "45": 4673179648.0, "50": 4673179648.0, "55": 4673179648.0, "60": 4673179648.0, "65": 4673179648.0, "70": 4673179648.0, "75": 4673179648.0, "80": 4673179648.0, "85": 4673179648.0, "90": 4673179648.0, "95": 4673179648.0, "100": 4673179648.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.31015, "5": 0.09357, "10": 0.0921, "15": 0.09207, "20": 0.09029, "25": 0.08982, "30": 0.09091, "35": 0.0896, "40": 0.09037, "45": 0.0897, "50": 0.09035, "55": 0.09151, "60": 0.09116, "65": 0.09006, "70": 0.08969, "75": 0.0902, "80": 0.08999, "85": 0.08996, "90": 0.08888, "95": 0.09033, "100": 0.09004}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_frozen_resume_torch_dist_dist_optimizer/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 1
  --use-distributed-optimizer: true
  --no-ckpt-fully-parallel-save: true
  --async-save: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: frozen-resume


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_mup/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 1
  --use-distributed-optimizer: true
  --no-mmap-bin-files: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
  # MuP (Maximal Update Parameterization) args
  --use-mup: true
  --mup-base-hidden-size: 256
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.84092, "5": 10.84059, "10": 10.80969, "15": 10.79985, "20": 10.70512, "25": 10.53597, "30": 10.35474, "35": 10.27088, "40": 10.07794, "45": 9.82299, "50": 9.90107, "55": 9.86297, "60": 9.48153, "65": 8.93769, "70": 9.71012, "75": 9.40721, "80": 9.38978, "85": 9.59734, "90": 9.80354, "95": 9.50572, "100": 9.38837}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1628.0, "5": 1937.0, "10": 1325.0, "15": 1948.0, "20": 1610.0, "25": 1634.0, "30": 1929.0, "35": 2007.0, "40": 2254.0, "45": 2153.0, "50": 2335.0, "55": 2408.0, "60": 2509.0, "65": 2687.0, "70": 3286.0, "75": 2763.0, "80": 3357.0, "85": 3245.0, "90": 3086.0, "95": 3424.0, "100": 3401.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 552128512.0, "5": 552128512.0, "10": 552128512.0, "15": 552128512.0, "20": 552128512.0, "25": 552128512.0, "30": 552128512.0, "35": 552128512.0, "40": 552128512.0, "45": 552128512.0, "50": 552128512.0, "55": 552128512.0, "60": 552128512.0, "65": 552128512.0, "70": 552128512.0, "75": 552128512.0, "80": 552128512.0, "85": 552128512.0, "90": 552128512.0, "95": 552128512.0, "100": 552128512.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2615097856.0, "5": 2711714304.0, "10": 2711714304.0, "15": 2711714304.0, "20": 2711714304.0, "25": 2711714304.0, "30": 2711714304.0, "35": 2711714304.0, "40": 2711714304.0, "45": 2711714304.0, "50": 2711714304.0, "55": 2711714304.0, "60": 2711714304.0, "65": 2711714304.0, "70": 2711714304.0, "75": 2711714304.0, "80": 2711714304.0, "85": 2711714304.0, "90": 2711714304.0, "95": 2711714304.0, "100": 2711714304.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 5.22785, "5": 0.10915, "10": 0.1088, "15": 0.11097, "20": 0.1066, "25": 0.10661, "30": 0.10875, "35": 0.10698, "40": 0.10675, "45": 0.10779, "50": 0.10869, "55": 0.10963, "60": 0.10676, "65": 0.10842, "70": 0.10681, "75": 0.10678, "80": 0.10683, "85": 0.1072, "90": 0.10704, "95": 0.10665, "100": 0.10819}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.77447,
            "2": 10.78365,
            "3": 10.78344,
            "4": 10.74824,
            "5": 10.81984,
            "6": 10.82302,
            "7": 10.79059,
            "8": 10.77958,
            "9": 10.78598,
            "10": 10.74454,
            "11": 10.83266,
            "12": 10.80426,
            "13": 10.82087,
            "14": 10.8257,
            "15": 10.74203,
            "16": 10.74906,
            "17": 10.72516,
            "18": 10.74181,
            "19": 10.74413,
            "20": 10.63677,
            "21": 10.6305,
            "22": 10.47963,
            "23": 10.65976,
            "24": 10.52481,
            "25": 10.47557,
            "26": 10.54117,
            "27": 10.55492,
            "28": 10.52139,
            "29": 10.53597,
            "30": 10.30531,
            "31": 10.06441,
            "32": 10.41574,
            "33": 10.42201,
            "34": 10.17392,
            "35": 10.22404,
            "36": 10.18502,
            "37": 10.30415,
            "38": 10.14997,
            "39": 10.37042,
            "40": 10.03995,
            "41": 10.10953,
            "42": 10.17942,
            "43": 9.79746,
            "44": 9.90813,
            "45": 9.79805,
            "46": 9.79659,
            "47": 10.12109,
            "48": 9.82082,
            "49": 9.50495,
            "50": 9.88028,
            "51": 9.83616,
            "52": 9.72316,
            "53": 10.05321,
            "54": 9.93747,
            "55": 9.87386,
            "56": 9.6045,
            "57": 9.45231,
            "58": 9.81878,
            "59": 9.57719,
            "60": 9.48532,
            "61": 9.68547,
            "62": 9.97908,
            "63": 9.36418,
            "64": 9.76205,
            "65": 8.94098,
            "66": 9.69474,
            "67": 9.36662,
            "68": 9.77744,
            "69": 9.79003,
            "70": 9.72374,
            "71": 9.62037,
            "72": 9.5742,
            "73": 9.48575,
            "74": 8.9273,
            "75": 9.41656,
            "76": 9.07746,
            "77": 10.05445,
            "78": 9.71916,
            "79": 9.37306,
            "80": 9.40002,
            "81": 9.47843,
            "82": 9.69866,
            "83": 9.31154,
            "84": 9.41458,
            "85": 9.61163,
            "86": 9.07421,
            "87": 9.5939,
            "88": 9.74929,
            "89": 9.59848,
            "90": 9.82763,
            "91": 9.33629,
            "92": 9.35805,
            "93": 9.08555,
            "94": 8.8279,
            "95": 9.53034,
            "96": 9.5266,
            "97": 9.30484,
            "98": 9.67005,
            "99": 8.89605,
            "100": 9.40698
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1587.0,
            "2": 1684.0,
            "3": 1639.0,
            "4": 1845.0,
            "5": 2013.0,
            "6": 1866.0,
            "7": 1966.0,
            "8": 1660.0,
            "9": 1910.0,
            "10": 1448.0,
            "11": 2024.0,
            "12": 1767.0,
            "13": 1863.0,
            "14": 1833.0,
            "15": 2020.0,
            "16": 1957.0,
            "17": 1799.0,
            "18": 1751.0,
            "19": 1745.0,
            "20": 1673.0,
            "21": 1901.0,
            "22": 1689.0,
            "23": 2062.0,
            "24": 1658.0,
            "25": 1594.0,
            "26": 1728.0,
            "27": 1913.0,
            "28": 1933.0,
            "29": 1983.0,
            "30": 1950.0,
            "31": 1616.0,
            "32": 1879.0,
            "33": 2091.0,
            "34": 1833.0,
            "35": 1993.0,
            "36": 2025.0,
            "37": 2385.0,
            "38": 2102.0,
            "39": 2431.0,
            "40": 2238.0,
            "41": 2359.0,
            "42": 2225.0,
            "43": 2138.0,
            "44": 2090.0,
            "45": 2178.0,
            "46": 2287.0,
            "47": 2655.0,
            "48": 2336.0,
            "49": 2214.0,
            "50": 2526.0,
            "51": 2690.0,
            "52": 2683.0,
            "53": 2975.0,
            "54": 2691.0,
            "55": 2449.0,
            "56": 2802.0,
            "57": 2291.0,
            "58": 2876.0,
            "59": 2795.0,
            "60": 2457.0,
            "61": 2992.0,
            "62": 2686.0,
            "63": 2532.0,
            "64": 2965.0,
            "65": 2585.0,
            "66": 3084.0,
            "67": 2858.0,
            "68": 2869.0,
            "69": 2976.0,
            "70": 3125.0,
            "71": 3013.0,
            "72": 2619.0,
            "73": 3083.0,
            "74": 2056.0,
            "75": 2613.0,
            "76": 2984.0,
            "77": 3289.0,
            "78": 3241.0,
            "79": 3185.0,
            "80": 3326.0,
            "81": 3485.0,
            "82": 3309.0,
            "83": 2835.0,
            "84": 3288.0,
            "85": 3314.0,
            "86": 2900.0,
            "87": 3890.0,
            "88": 3216.0,
            "89": 3337.0,
            "90": 3102.0,
            "91": 2785.0,
            "92": 3063.0,
            "93": 2919.0,
            "94": 3367.0,
            "95": 3351.0,
            "96": 3415.0,
            "97": 3221.0,
            "98": 3763.0,
            "99": 3123.0,
            "100": 3091.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 581489664.0,
            "2": 581489664.0,
            "3": 581489664.0,
            "4": 581489664.0,
            "5": 581489664.0,
            "6": 581489664.0,
            "7": 581489664.0,
            "8": 581489664.0,
            "9": 581489664.0,
            "10": 581489664.0,
            "11": 581489664.0,
            "12": 581489664.0,
            "13": 581489664.0,
            "14": 581489664.0,
            "15": 581489664.0,
            "16": 581489664.0,
            "17": 581489664.0,
            "18": 581489664.0,
            "19": 581489664.0,
            "20": 581489664.0,
            "21": 581489664.0,
            "22": 581489664.0,
            "23": 581489664.0,
            "24": 581489664.0,
            "25": 581489664.0,
            "26": 581489664.0,
            "27": 581489664.0,
            "28": 581489664.0,
            "29": 581489664.0,
            "30": 581489664.0,
            "31": 581489664.0,
            "32": 581489664.0,
            "33": 581489664.0,
            "34": 581489664.0,
            "35": 581489664.0,
            "36": 581489664.0,
            "37": 581489664.0,
            "38": 581489664.0,
            "39": 581489664.0,
            "40": 581489664.0,
            "41": 581489664.0,
            "42": 581489664.0,
            "43": 581489664.0,
            "44": 581489664.0,
            "45": 581489664.0,
            "46": 581489664.0,
            "47": 581489664.0,
            "48": 581489664.0,
            "49": 581489664.0,
            "50": 581489664.0,
            "51": 581489664.0,
            "52": 581489664.0,
            "53": 581489664.0,
            "54": 581489664.0,
            "55": 581489664.0,
            "56": 581489664.0,
            "57": 581489664.0,
            "58": 581489664.0,
            "59": 581489664.0,
            "60": 581489664.0,
            "61": 581489664.0,
            "62": 581489664.0,
            "63": 581489664.0,
            "64": 581489664.0,
            "65": 581489664.0,
            "66": 581489664.0,
            "67": 581489664.0,
            "68": 581489664.0,
            "69": 581489664.0,
            "70": 581489664.0,
            "71": 581489664.0,
            "72": 581489664.0,
            "73": 581489664.0,
            "74": 581489664.0,
            "75": 581489664.0,
            "76": 581489664.0,
            "77": 581489664.0,
            "78": 581489664.0,
            "79": 581489664.0,
            "80": 581489664.0,
            "81": 581489664.0,
            "82": 581489664.0,
            "83": 581489664.0,
            "84": 581489664.0,
            "85": 581489664.0,
            "86": 581489664.0,
            "87": 581489664.0,
            "88": 581489664.0,
            "89": 581489664.0,
            "90": 581489664.0,
            "91": 581489664.0,
            "92": 581489664.0,
            "93": 581489664.0,
            "94": 581489664.0,
            "95": 581489664.0,
            "96": 581489664.0,
            "97": 581489664.0,
            "98": 581489664.0,
            "99": 581489664.0,
            "100": 581489664.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2644459008.0,
            "2": 2741977600.0,
            "3": 2741977600.0,
            "4": 2741977600.0,
            "5": 2741977600.0,
            "6": 2741977600.0,
            "7": 2741977600.0,
            "8": 2741977600.0,
            "9": 2741977600.0,
            "10": 2741977600.0,
            "11": 2741977600.0,
            "12": 2741977600.0,
            "13": 2741977600.0,
            "14": 2741977600.0,
            "15": 2741977600.0,
            "16": 2741977600.0,
            "17": 2741977600.0,
            "18": 2741977600.0,
            "19": 2741977600.0,
            "20": 2741977600.0,
            "21": 2741977600.0,
            "22": 2741977600.0,
            "23": 2741977600.0,
            "24": 2741977600.0,
            "25": 2741977600.0,
            "26": 2741977600.0,
            "27": 2741977600.0,
            "28": 2741977600.0,
            "29": 2741977600.0,
            "30": 2741977600.0,
            "31": 2741977600.0,
            "32": 2741977600.0,
            "33": 2741977600.0,
            "34": 2741977600.0,
            "35": 2741977600.0,
            "36": 2741977600.0,
            "37": 2741977600.0,
            "38": 2741977600.0,
            "39": 2741977600.0,
            "40": 2741977600.0,
            "41": 2741977600.0,
            "42": 2741977600.0,
            "43": 2741977600.0,
            "44": 2741977600.0,
            "45": 2741977600.0,
            "46": 2741977600.0,
            "47": 2741977600.0,
            "48": 2741977600.0,
            "49": 2741977600.0,
            "50": 2741977600.0,
            "51": 2741977600.0,
            "52": 2741977600.0,
            "53": 2741977600.0,
            "54": 2741977600.0,
            "55": 2741977600.0,
            "56": 2741977600.0,
            "57": 2741977600.0,
            "58": 2741977600.0,
            "59": 2741977600.0,
            "60": 2741977600.0,
            "61": 2741977600.0,
            "62": 2741977600.0,
            "63": 2741977600.0,
            "64": 2741977600.0,
            "65": 2741977600.0,
            "66": 2741977600.0,
            "67": 2741977600.0,
            "68": 2741977600.0,
            "69": 2741977600.0,
            "70": 2741977600.0,
            "71": 2741977600.0,
            "72": 2741977600.0,
            "73": 2741977600.0,
            "74": 2741977600.0,
            "75": 2741977600.0,
            "76": 2741977600.0,
            "77": 2741977600.0,
            "78": 2741977600.0,
            "79": 2741977600.0,
            "80": 2741977600.0,
            "81": 2741977600.0,
            "82": 2741977600.0,
            "83": 2741977600.0,
            "84": 2741977600.0,
            "85": 2741977600.0,
            "86": 2741977600.0,
            "87": 2741977600.0,
            "88": 2741977600.0,
            "89": 2741977600.0,
            "90": 2741977600.0,
            "91": 2741977600.0,
            "92": 2741977600.0,
            "93": 2741977600.0,
            "94": 2741977600.0,
            "95": 2741977600.0,
            "96": 2741977600.0,
            "97": 2741977600.0,
            "98": 2741977600.0,
            "99": 2741977600.0,
            "100": 2741977600.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 3.91,
            "3": 0.09176,
            "4": 0.07867,
            "5": 0.07784,
            "6": 0.07723,
            "7": 0.07698,
            "8": 0.0758,
            "9": 0.07589,
            "10": 0.07634,
            "11": 0.07626,
            "12": 0.0768,
            "13": 0.07681,
            "14": 0.07682,
            "15": 0.0759,
            "16": 0.07596,
            "17": 0.07651,
            "18": 0.07603,
            "19": 0.07618,
            "20": 0.07636,
            "21": 0.07747,
            "22": 0.07846,
            "23": 0.07695,
            "24": 0.0769,
            "25": 0.07901,
            "26": 0.07721,
            "27": 0.07758,
            "28": 0.07805,
            "29": 0.07566,
            "30": 0.07566,
            "31": 0.07601,
            "32": 0.07665,
            "33": 0.07669,
            "34": 0.07738,
            "35": 0.07656,
            "36": 0.07717,
            "37": 0.07666,
            "38": 0.07714,
            "39": 0.07666,
            "40": 0.07677,
            "41": 0.07635,
            "42": 0.07656,
            "43": 0.07591,
            "44": 0.07577,
            "45": 0.07726,
            "46": 0.07728,
            "47": 0.07659,
            "48": 0.07695,
            "49": 0.07762,
            "50": 0.07653,
            "51": 0.09458,
            "52": 0.07903,
            "53": 0.07694,
            "54": 0.0755,
            "55": 0.07647,
            "56": 0.07565,
            "57": 0.07705,
            "58": 0.07632,
            "59": 0.07796,
            "60": 0.07569,
            "61": 0.07613,
            "62": 0.07503,
            "63": 0.0764,
            "64": 0.07603,
            "65": 0.07611,
            "66": 0.07585,
            "67": 0.07858,
            "68": 0.07626,
            "69": 0.07694,
            "70": 0.07697,
            "71": 0.07593,
            "72": 0.07697,
            "73": 0.07657,
            "74": 0.07722,
            "75": 0.07641,
            "76": 0.07669,
            "77": 0.07681,
            "78": 0.07631,
            "79": 0.07651,
            "80": 0.07632,
            "81": 0.07737,
            "82": 0.07659,
            "83": 0.07741,
            "84": 0.0764,
            "85": 0.07666,
            "86": 0.07693,
            "87": 0.07693,
            "88": 0.07656,
            "89": 0.07649,
            "90": 0.07761,
            "91": 0.07685,
            "92": 0.07639,
            "93": 0.07647,
            "94": 0.07726,
            "95": 0.07663,
            "96": 0.07721,
            "97": 0.0767,
            "98": 0.07736,
            "99": 0.07719,
            "100": 0.07713
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.83614,
            "52": 9.72315,
            "53": 10.05318,
            "54": 9.93747,
            "55": 9.87384,
            "56": 9.60449,
            "57": 9.4523,
            "58": 9.8188,
            "59": 9.5772,
            "60": 9.48534,
            "61": 9.68548,
            "62": 9.97906,
            "63": 9.36419,
            "64": 9.76203,
            "65": 8.94097,
            "66": 9.69475,
            "67": 9.36656,
            "68": 9.77745,
            "69": 9.79001,
            "70": 9.72374,
            "71": 9.62037,
            "72": 9.57423,
            "73": 9.48575,
            "74": 8.92729,
            "75": 9.41651,
            "76": 9.07747,
            "77": 10.05444,
            "78": 9.71914,
            "79": 9.37306,
            "80": 9.40003,
            "81": 9.47844,
            "82": 9.69867,
            "83": 9.31155,
            "84": 9.41457,
            "85": 9.61163,
            "86": 9.07418,
            "87": 9.5939,
            "88": 9.74928,
            "89": 9.5985,
            "90": 9.82761,
            "91": 9.33631,
            "92": 9.35805,
            "93": 9.08552,
            "94": 8.82786,
            "95": 9.5303,
            "96": 9.52663,
            "97": 9.30483,
            "98": 9.67007,
            "99": 8.89606,
            "100": 9.40702
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2650.0,
            "52": 2700.0,
            "53": 2863.0,
            "54": 2676.0,
            "55": 2390.0,
            "56": 2753.0,
            "57": 2430.0,
            "58": 2919.0,
            "59": 2831.0,
            "60": 2428.0,
            "61": 2932.0,
            "62": 2724.0,
            "63": 2579.0,
            "64": 2987.0,
            "65": 2506.0,
            "66": 2886.0,
            "67": 2871.0,
            "68": 2870.0,
            "69": 3001.0,
            "70": 3294.0,
            "71": 3043.0,
            "72": 2614.0,
            "73": 3054.0,
            "74": 2024.0,
            "75": 2507.0,
            "76": 3020.0,
            "77": 3253.0,
            "78": 3230.0,
            "79": 3210.0,
            "80": 3252.0,
            "81": 3614.0,
            "82": 3395.0,
            "83": 2919.0,
            "84": 3296.0,
            "85": 3320.0,
            "86": 2865.0,
            "87": 3931.0,
            "88": 3240.0,
            "89": 3428.0,
            "90": 3127.0,
            "91": 2815.0,
            "92": 3098.0,
            "93": 2796.0,
            "94": 3324.0,
            "95": 3428.0,
            "96": 3541.0,
            "97": 3216.0,
            "98": 3705.0,
            "99": 3184.0,
            "100": 3073.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 581489664.0,
            "52": 581489664.0,
            "53": 581489664.0,
            "54": 581489664.0,
            "55": 581489664.0,
            "56": 581489664.0,
            "57": 581489664.0,
            "58": 581489664.0,
            "59": 581489664.0,
            "60": 581489664.0,
            "61": 581489664.0,
            "62": 581489664.0,
            "63": 581489664.0,
            "64": 581489664.0,
            "65": 581489664.0,
            "66": 581489664.0,
            "67": 581489664.0,
            "68": 581489664.0,
            "69": 581489664.0,
            "70": 581489664.0,
            "71": 581489664.0,
            "72": 581489664.0,
            "73": 581489664.0,
            "74": 581489664.0,
            "75": 581489664.0,
            "76": 581489664.0,
            "77": 581489664.0,
            "78": 581489664.0,
            "79": 581489664.0,
            "80": 581489664.0,
            "81": 581489664.0,
            "82": 581489664.0,
            "83": 581489664.0,
            "84": 581489664.0,
            "85": 581489664.0,
            "86": 581489664.0,
            "87": 581489664.0,
            "88": 581489664.0,
            "89": 581489664.0,
            "90": 581489664.0,
            "91": 581489664.0,
            "92": 581489664.0,
            "93": 581489664.0,
            "94": 581489664.0,
            "95": 581489664.0,
            "96": 581489664.0,
            "97": 581489664.0,
            "98": 581489664.0,
            "99": 581489664.0,
            "100": 581489664.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2708568576.0,
            "52": 2742124032.0,
            "53": 2742124032.0,
            "54": 2742124032.0,
            "55": 2742124032.0,
            "56": 2742124032.0,
            "57": 2742124032.0,
            "58": 2742124032.0,
            "59": 2742124032.0,
            "60": 2742124032.0,
            "61": 2742124032.0,
            "62": 2742124032.0,
            "63": 2742124032.0,
            "64": 2742124032.0,
            "65": 2742124032.0,
            "66": 2742124032.0,
            "67": 2742124032.0,
            "68": 2742124032.0,
            "69": 2742124032.0,
            "70": 2742124032.0,
            "71": 2742124032.0,
            "72": 2742124032.0,
            "73": 2742124032.0,
            "74": 2742124032.0,
            "75": 2742124032.0,
            "76": 2742124032.0,
            "77": 2742124032.0,
            "78": 2742124032.0,
            "79": 2742124032.0,
            "80": 2742124032.0,
            "81": 2742124032.0,
            "82": 2742124032.0,
            "83": 2742124032.0,
            "84": 2742124032.0,
            "85": 2742124032.0,
            "86": 2742124032.0,
            "87": 2742124032.0,
            "88": 2742124032.0,
            "89": 2742124032.0,
            "90": 2742124032.0,
            "91": 2742124032.0,
            "92": 2742124032.0,
            "93": 2742124032.0,
            "94": 2742124032.0,
            "95": 2742124032.0,
            "96": 2742124032.0,
            "97": 2742124032.0,
            "98": 2742124032.0,
            "99": 2742124032.0,
            "100": 2742124032.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": "nan",
            "52": 2.1716,
            "53": 0.09643,
            "54": 0.08435,
            "55": 0.08492,
            "56": 0.08409,
            "57": 0.08624,
            "58": 0.08522,
            "59": 0.08521,
            "60": 0.08445,
            "61": 0.08447,
            "62": 0.08412,
            "63": 0.08534,
            "64": 0.08529,
            "65": 0.08566,
            "66": 0.08409,
            "67": 0.08468,
            "68": 0.08268,
            "69": 0.08161,
            "70": 0.08416,
            "71": 0.08383,
            "72": 0.08425,
            "73": 0.08363,
            "74": 0.08451,
            "75": 0.08423,
            "76": 0.08453,
            "77": 0.08475,
            "78": 0.08435,
            "79": 0.0844,
            "80": 0.08466,
            "81": 0.08777,
            "82": 0.08524,
            "83": 0.08559,
            "84": 0.08524,
            "85": 0.08501,
            "86": 0.08518,
            "87": 0.08503,
            "88": 0.08555,
            "89": 0.0855,
            "90": 0.08584,
            "91": 0.08419,
            "92": 0.08467,
            "93": 0.08514,
            "94": 0.08518,
            "95": 0.08444,
            "96": 0.08484,
            "97": 0.08521,
            "98": 0.08697,
            "99": 0.08772,
            "100": 0.08544
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.89631,
            "2": 10.89416,
            "3": 10.88785,
            "4": 10.89141,
            "5": 10.89153,
            "6": 10.90002,
            "7": 10.89187,
            "8": 10.89886,
            "9": 10.90211,
            "10": 10.88366,
            "11": 10.87818,
            "12": 10.89332,
            "13": 10.89817,
            "14": 10.89242,
            "15": 10.84801,
            "16": 10.85399,
            "17": 10.83093,
            "18": 10.83988,
            "19": 10.82804,
            "20": 10.74824,
            "21": 10.73491,
            "22": 10.61715,
            "23": 10.72617,
            "24": 10.63178,
            "25": 10.59314,
            "26": 10.63368,
            "27": 10.63299,
            "28": 10.5826,
            "29": 10.58594,
            "30": 10.41207,
            "31": 10.15904,
            "32": 10.48361,
            "33": 10.46707,
            "34": 10.23815,
            "35": 10.28191,
            "36": 10.24054,
            "37": 10.36221,
            "38": 10.2031,
            "39": 10.40457,
            "40": 10.0927,
            "41": 10.15833,
            "42": 10.21932,
            "43": 9.8436,
            "44": 9.97302,
            "45": 9.84192,
            "46": 9.82018,
            "47": 10.14968,
            "48": 9.86019,
            "49": 9.54235,
            "50": 9.91348,
            "51": 9.85448,
            "52": 9.73931,
            "53": 10.0743,
            "54": 9.96913,
            "55": 9.8857,
            "56": 9.62437,
            "57": 9.48228,
            "58": 9.83485,
            "59": 9.5873,
            "60": 9.50243,
            "61": 9.69341,
            "62": 9.98806,
            "63": 9.39103,
            "64": 9.78025,
            "65": 8.94515,
            "66": 9.70491,
            "67": 9.37249,
            "68": 9.78331,
            "69": 9.7906,
            "70": 9.74449,
            "71": 9.62299,
            "72": 9.58459,
            "73": 9.5051,
            "74": 8.94308,
            "75": 9.42526,
            "76": 9.07602,
            "77": 10.06351,
            "78": 9.72309,
            "79": 9.37504,
            "80": 9.40451,
            "81": 9.47792,
            "82": 9.69668,
            "83": 9.30716,
            "84": 9.41528,
            "85": 9.61293,
            "86": 9.07193,
            "87": 9.58838,
            "88": 9.74763,
            "89": 9.59984,
            "90": 9.8167,
            "91": 9.33789,
            "92": 9.35602,
            "93": 9.07424,
            "94": 8.8351,
            "95": 9.51839,
            "96": 9.52393,
            "97": 9.30921,
            "98": 9.66745,
            "99": 8.88417,
            "100": 9.39922
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1524.0,
            "2": 1653.0,
            "3": 1732.0,
            "4": 1794.0,
            "5": 1835.0,
            "6": 1904.0,
            "7": 1919.0,
            "8": 1747.0,
            "9": 1860.0,
            "10": 1363.0,
            "11": 1886.0,
            "12": 1808.0,
            "13": 1956.0,
            "14": 1754.0,
            "15": 1833.0,
            "16": 1855.0,
            "17": 1780.0,
            "18": 1729.0,
            "19": 1777.0,
            "20": 1697.0,
            "21": 1884.0,
            "22": 1765.0,
            "23": 2080.0,
            "24": 1675.0,
            "25": 1705.0,
            "26": 1767.0,
            "27": 1858.0,
            "28": 2041.0,
            "29": 1983.0,
            "30": 1959.0,
            "31": 1555.0,
            "32": 1953.0,
            "33": 2118.0,
            "34": 1862.0,
            "35": 1973.0,
            "36": 1880.0,
            "37": 2313.0,
            "38": 2319.0,
            "39": 2419.0,
            "40": 2393.0,
            "41": 2485.0,
            "42": 2393.0,
            "43": 2025.0,
            "44": 2129.0,
            "45": 2131.0,
            "46": 2281.0,
            "47": 2440.0,
            "48": 2405.0,
            "49": 2336.0,
            "50": 2472.0,
            "51": 2591.0,
            "52": 2526.0,
            "53": 2940.0,
            "54": 2660.0,
            "55": 2391.0,
            "56": 2665.0,
            "57": 2437.0,
            "58": 2987.0,
            "59": 2660.0,
            "60": 2398.0,
            "61": 2806.0,
            "62": 2734.0,
            "63": 2411.0,
            "64": 2967.0,
            "65": 2627.0,
            "66": 2869.0,
            "67": 2810.0,
            "68": 2718.0,
            "69": 2774.0,
            "70": 3156.0,
            "71": 3066.0,
            "72": 2566.0,
            "73": 3156.0,
            "74": 1966.0,
            "75": 2690.0,
            "76": 2965.0,
            "77": 3234.0,
            "78": 3138.0,
            "79": 2977.0,
            "80": 3317.0,
            "81": 3629.0,
            "82": 3248.0,
            "83": 2756.0,
            "84": 3268.0,
            "85": 3380.0,
            "86": 2755.0,
            "87": 3685.0,
            "88": 3079.0,
            "89": 3284.0,
            "90": 3041.0,
            "91": 2674.0,
            "92": 3078.0,
            "93": 2682.0,
            "94": 3367.0,
            "95": 3456.0,
            "96": 3380.0,
            "97": 3163.0,
            "98": 3686.0,
            "99": 3131.0,
            "100": 3129.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 581489664.0,
            "2": 581489664.0,
            "3": 581489664.0,
            "4": 581489664.0,
            "5": 581489664.0,
            "6": 581489664.0,
            "7": 581489664.0,
            "8": 581489664.0,
            "9": 581489664.0,
            "10": 581489664.0,
            "11": 581489664.0,
            "12": 581489664.0,
            "13": 581489664.0,
            "14": 581489664.0,
            "15": 581489664.0,
            "16": 581489664.0,
            "17": 581489664.0,
            "18": 581489664.0,
            "19": 581489664.0,
            "20": 581489664.0,
            "21": 581489664.0,
            "22": 581489664.0,
            "23": 581489664.0,
            "24": 581489664.0,
            "25": 581489664.0,
            "26": 581489664.0,
            "27": 581489664.0,
            "28": 581489664.0,
            "29": 581489664.0,
            "30": 581489664.0,
            "31": 581489664.0,
            "32": 581489664.0,
            "33": 581489664.0,
            "34": 581489664.0,
            "35": 581489664.0,
            "36": 581489664.0,
            "37": 581489664.0,
            "38": 581489664.0,
            "39": 581489664.0,
            "40": 581489664.0,
            "41": 581489664.0,
            "42": 581489664.0,
            "43": 581489664.0,
            "44": 581489664.0,
            "45": 581489664.0,
            "46": 581489664.0,
            "47": 581489664.0,
            "48": 581489664.0,
            "49": 581489664.0,
            "50": 581489664.0,
            "51": 581489664.0,
            "52": 581489664.0,
            "53": 581489664.0,
            "54": 581489664.0,
            "55": 581489664.0,
            "56": 581489664.0,
            "57": 581489664.0,
            "58": 581489664.0,
            "59": 581489664.0,
            "60": 581489664.0,
            "61": 581489664.0,
            "62": 581489664.0,
            "63": 581489664.0,
            "64": 581489664.0,
            "65": 581489664.0,
            "66": 581489664.0,
            "67": 581489664.0,
            "68": 581489664.0,
            "69": 581489664.0,
            "70": 581489664.0,
            "71": 581489664.0,
            "72": 581489664.0,
            "73": 581489664.0,
            "74": 581489664.0,
            "75": 581489664.0,
            "76": 581489664.0,
            "77": 581489664.0,
            "78": 581489664.0,
            "79": 581489664.0,
            "80": 581489664.0,
            "81": 581489664.0,
            "82": 581489664.0,
            "83": 581489664.0,
            "84": 581489664.0,
            "85": 581489664.0,
            "86": 581489664.0,
            "87": 581489664.0,
            "88": 581489664.0,
            "89": 581489664.0,
            "90": 581489664.0,
            "91": 581489664.0,
            "92": 581489664.0,
            "93": 581489664.0,
            "94": 581489664.0,
            "95": 581489664.0,
            "96": 581489664.0,
            "97": 581489664.0,
            "98": 581489664.0,
            "99": 581489664.0,
            "100": 581489664.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2594127360.0,
            "2": 2690743808.0,
            "3": 2690743808.0,
            "4": 2690743808.0,
            "5": 2690743808.0,
            "6": 2690743808.0,
            "7": 2690743808.0,
            "8": 2690743808.0,
            "9": 2690743808.0,
            "10": 2690743808.0,
            "11": 2690743808.0,
            "12": 2690743808.0,
            "13": 2690743808.0,
            "14": 2690743808.0,
            "15": 2690743808.0,
            "16": 2690743808.0,
            "17": 2690743808.0,
            "18": 2690743808.0,
            "19": 2690743808.0,
            "20": 2690743808.0,
            "21": 2690743808.0,
            "22": 2690743808.0,
            "23": 2690743808.0,
            "24": 2690743808.0,
            "25": 2690743808.0,
            "26": 2690743808.0,
            "27": 2690743808.0,
            "28": 2690743808.0,
            "29": 2690743808.0,
            "30": 2690743808.0,
            "31": 2690743808.0,
            "32": 2690743808.0,
            "33": 2690743808.0,
            "34": 2690743808.0,
            "35": 2690743808.0,
            "36": 2690743808.0,
            "37": 2690743808.0,
            "38": 2690743808.0,
            "39": 2690743808.0,
            "40": 2690743808.0,
            "41": 2690743808.0,
            "42": 2690743808.0,
            "43": 2690743808.0,
            "44": 2690743808.0,
            "45": 2690743808.0,
            "46": 2690743808.0,
            "47": 2690743808.0,
            "48": 2690743808.0,
            "49": 2690743808.0,
            "50": 2690743808.0,
            "51": 2690743808.0,
            "52": 2690743808.0,
            "53": 2690743808.0,
            "54": 2690743808.0,
            "55": 2690743808.0,
            "56": 2690743808.0,
            "57": 2690743808.0,
            "58": 2690743808.0,
            "59": 2690743808.0,
            "60": 2690743808.0,
            "61": 2690743808.0,
            "62": 2690743808.0,
            "63": 2690743808.0,
            "64": 2690743808.0,
            "65": 2690743808.0,
            "66": 2690743808.0,
            "67": 2690743808.0,
            "68": 2690743808.0,
            "69": 2690743808.0,
            "70": 2690743808.0,
            "71": 2690743808.0,
            "72": 2690743808.0,
            "73": 2690743808.0,
            "74": 2690743808.0,
            "75": 2690743808.0,
            "76": 2690743808.0,
            "77": 2690743808.0,
            "78": 2690743808.0,
            "79": 2690743808.0,
            "80": 2690743808.0,
            "81": 2690743808.0,
            "82": 2690743808.0,
            "83": 2690743808.0,
            "84": 2690743808.0,
            "85": 2690743808.0,
            "86": 2690743808.0,
            "87": 2690743808.0,
            "88": 2690743808.0,
            "89": 2690743808.0,
            "90": 2690743808.0,
            "91": 2690743808.0,
            "92": 2690743808.0,
            "93": 2690743808.0,
            "94": 2690743808.0,
            "95": 2690743808.0,
            "96": 2690743808.0,
            "97": 2690743808.0,
            "98": 2690743808.0,
            "99": 2690743808.0,
            "100": 2690743808.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 4.10968,
            "3": 0.0647,
            "4": 0.05011,
            "5": 0.04999,
            "6": 0.05,
            "7": 0.04977,
            "8": 0.04985,
            "9": 0.05058,
            "10": 0.04994,
            "11": 0.05101,
            "12": 0.05004,
            "13": 0.05386,
            "14": 0.0495,
            "15": 0.05196,
            "16": 0.04977,
            "17": 0.05163,
            "18": 0.04987,
            "19": 0.052,
            "20": 0.05008,
            "21": 0.05173,
            "22": 0.04974,
            "23": 0.05131,
            "24": 0.04947,
            "25": 0.05107,
            "26": 0.05122,
            "27": 0.05254,
            "28": 0.04977,
            "29": 0.05091,
            "30": 0.04997,
            "31": 0.05132,
            "32": 0.04976,
            "33": 0.05152,
            "34": 0.04986,
            "35": 0.05126,
            "36": 0.05009,
            "37": 0.05096,
            "38": 0.04998,
            "39": 0.05212,
            "40": 0.0502,
            "41": 0.05106,
            "42": 0.04979,
            "43": 0.05269,
            "44": 0.05006,
            "45": 0.0516,
            "46": 0.05082,
            "47": 0.05109,
            "48": 0.04999,
            "49": 0.05119,
            "50": 0.05038,
            "51": 0.05535,
            "52": 0.05046,
            "53": 0.05138,
            "54": 0.05027,
            "55": 0.05075,
            "56": 0.04981,
            "57": 0.05159,
            "58": 0.05058,
            "59": 0.05217,
            "60": 0.05149,
            "61": 0.04997,
            "62": 0.05194,
            "63": 0.04978,
            "64": 0.05149,
            "65": 0.05046,
            "66": 0.05122,
            "67": 0.05044,
            "68": 0.05143,
            "69": 0.05001,
            "70": 0.05119,
            "71": 0.04992,
            "72": 0.05165,
            "73": 0.04982,
            "74": 0.05177,
            "75": 0.05017,
            "76": 0.05181,
            "77": 0.04976,
            "78": 0.05205,
            "79": 0.04988,
            "80": 0.05179,
            "81": 0.04998,
            "82": 0.05171,
            "83": 0.04955,
            "84": 0.05129,
            "85": 0.04997,
            "86": 0.05196,
            "87": 0.04994,
            "88": 0.05222,
            "89": 0.04959,
            "90": 0.05149,
            "91": 0.05016,
            "92": 0.04991,
            "93": 0.05106,
            "94": 0.05058,
            "95": 0.05081,
            "96": 0.05036,
            "97": 0.05139,
            "98": 0.05245,
            "99": 0.05188,
            "100": 0.05016
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.85447,
            "52": 9.73936,
            "53": 10.07426,
            "54": 9.96915,
            "55": 9.88574,
            "56": 9.62437,
            "57": 9.4823,
            "58": 9.83483,
            "59": 9.58732,
            "60": 9.50245,
            "61": 9.69343,
            "62": 9.98806,
            "63": 9.39103,
            "64": 9.78021,
            "65": 8.94515,
            "66": 9.70494,
            "67": 9.37251,
            "68": 9.78329,
            "69": 9.79058,
            "70": 9.74454,
            "71": 9.62301,
            "72": 9.58458,
            "73": 9.50513,
            "74": 8.94312,
            "75": 9.42524,
            "76": 9.07601,
            "77": 10.06353,
            "78": 9.72308,
            "79": 9.37502,
            "80": 9.40453,
            "81": 9.47794,
            "82": 9.69667,
            "83": 9.3072,
            "84": 9.41526,
            "85": 9.61293,
            "86": 9.07195,
            "87": 9.5884,
            "88": 9.74762,
            "89": 9.59982,
            "90": 9.81672,
            "91": 9.3379,
            "92": 9.35605,
            "93": 9.07425,
            "94": 8.8351,
            "95": 9.5184,
            "96": 9.52391,
            "97": 9.30923,
            "98": 9.66743,
            "99": 8.88419,
            "100": 9.39924
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2598.0,
            "52": 2547.0,
            "53": 2957.0,
            "54": 2750.0,
            "55": 2372.0,
            "56": 2569.0,
            "57": 2395.0,
            "58": 2901.0,
            "59": 2741.0,
            "60": 2430.0,
            "61": 2868.0,
            "62": 2651.0,
            "63": 2507.0,
            "64": 3014.0,
            "65": 2683.0,
            "66": 2935.0,
            "67": 2783.0,
            "68": 2725.0,
            "69": 2788.0,
            "70": 3152.0,
            "71": 3026.0,
            "72": 2415.0,
            "73": 3122.0,
            "74": 1967.0,
            "75": 2581.0,
            "76": 3010.0,
            "77": 3294.0,
            "78": 3166.0,
            "79": 3150.0,
            "80": 3246.0,
            "81": 3566.0,
            "82": 3285.0,
            "83": 2817.0,
            "84": 3269.0,
            "85": 3425.0,
            "86": 2819.0,
            "87": 3577.0,
            "88": 3004.0,
            "89": 3323.0,
            "90": 3023.0,
            "91": 2661.0,
            "92": 3066.0,
            "93": 2691.0,
            "94": 3305.0,
            "95": 3403.0,
            "96": 3377.0,
            "97": 3242.0,
            "98": 3697.0,
            "99": 3112.0,
            "100": 3199.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 581489664.0,
            "52": 581489664.0,
            "53": 581489664.0,
            "54": 581489664.0,
            "55": 581489664.0,
            "56": 581489664.0,
            "57": 581489664.0,
            "58": 581489664.0,
            "59": 581489664.0,
            "60": 581489664.0,
            "61": 581489664.0,
            "62": 581489664.0,
            "63": 581489664.0,
            "64": 581489664.0,
            "65": 581489664.0,
            "66": 581489664.0,
            "67": 581489664.0,
            "68": 581489664.0,
            "69": 581489664.0,
            "70": 581489664.0,
            "71": 581489664.0,
            "72": 581489664.0,
            "73": 581489664.0,
            "74": 581489664.0,
            "75": 581489664.0,
            "76": 581489664.0,
            "77": 581489664.0,
            "78": 581489664.0,
            "79": 581489664.0,
            "80": 581489664.0,
            "81": 581489664.0,
            "82": 581489664.0,
            "83": 581489664.0,
            "84": 581489664.0,
            "85": 581489664.0,
            "86": 581489664.0,
            "87": 581489664.0,
            "88": 581489664.0,
            "89": 581489664.0,
            "90": 581489664.0,
            "91": 581489664.0,
            "92": 581489664.0,
            "93": 581489664.0,
            "94": 581489664.0,
            "95": 581489664.0,
            "96": 581489664.0,
            "97": 581489664.0,
            "98": 581489664.0,
            "99": 581489664.0,
            "100": 581489664.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2658236928.0,
            "52": 2691792384.0,
            "53": 2691792384.0,
            "54": 2691792384.0,
            "55": 2691792384.0,
            "56": 2691792384.0,
            "57": 2691792384.0,
            "58": 2691792384.0,
            "59": 2691792384.0,
            "60": 2691792384.0,
            "61": 2691792384.0,
            "62": 2691792384.0,
            "63": 2691792384.0,
            "64": 2691792384.0,
            "65": 2691792384.0,
            "66": 2691792384.0,
            "67": 2691792384.0,
            "68": 2691792384.0,
            "69": 2691792384.0,
            "70": 2691792384.0,
            "71": 2691792384.0,
            "72": 2691792384.0,
            "73": 2691792384.0,
            "74": 2691792384.0,
            "75": 2691792384.0,
            "76": 2691792384.0,
            "77": 2691792384.0,
            "78": 2691792384.0,
            "79": 2691792384.0,
            "80": 2691792384.0,
            "81": 2691792384.0,
            "82": 2691792384.0,
            "83": 2691792384.0,
            "84": 2691792384.0,
            "85": 2691792384.0,
            "86": 2691792384.0,
            "87": 2691792384.0,
            "88": 2691792384.0,
            "89": 2691792384.0,
            "90": 2691792384.0,
            "91": 2691792384.0,
            "92": 2691792384.0,
            "93": 2691792384.0,
            "94": 2691792384.0,
            "95": 2691792384.0,
            "96": 2691792384.0,
            "97": 2691792384.0,
            "98": 2691792384.0,
            "99": 2691792384.0,
            "100": 2691792384.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 6.24535,
            "52": 0.08446,
            "53": 0.05106,
            "54": 0.05053,
            "55": 0.05025,
            "56": 0.06328,
            "57": 0.05006,
            "58": 0.04939,
            "59": 0.04895,
            "60": 0.05032,
            "61": 0.05024,
            "62": 0.04926,
            "63": 0.051,
            "64": 0.04994,
            "65": 0.0516,
            "66": 0.05582,
            "67": 0.05024,
            "68": 0.04967,
            "69": 0.04945,
            "70": 0.05103,
            "71": 0.04971,
            "72": 0.0494,
            "73": 0.05144,
            "74": 0.0497,
            "75": 0.05084,
            "76": 0.05125,
            "77": 0.05002,
            "78": 0.04992,
            "79": 0.05192,
            "80": 0.05131,
            "81": 0.05007,
            "82": 0.05145,
            "83": 0.05065,
            "84": 0.05098,
            "85": 0.05005,
            "86": 0.05133,
            "87": 0.05031,
            "88": 0.05145,
            "89": 0.05038,
            "90": 0.49172,
            "91": 0.05261,
            "92": 0.05313,
            "93": 0.05042,
            "94": 0.05061,
            "95": 0.05207,
            "96": 0.04992,
            "97": 0.04998,
            "98": 0.05103,
            "99": 0.05004,
            "100": 0.05054
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.84092,
            "2": 10.83661,
            "3": 10.83233,
            "4": 10.81819,
            "5": 10.84059,
            "6": 10.86985,
            "7": 10.83324,
            "8": 10.83877,
            "9": 10.84355,
            "10": 10.80969,
            "11": 10.85186,
            "12": 10.84454,
            "13": 10.8632,
            "14": 10.86353,
            "15": 10.79985,
            "16": 10.79265,
            "17": 10.77473,
            "18": 10.80156,
            "19": 10.79155,
            "20": 10.70512,
            "21": 10.68174,
            "22": 10.56547,
            "23": 10.70145,
            "24": 10.5789,
            "25": 10.53597,
            "26": 10.60745,
            "27": 10.59418,
            "28": 10.56116,
            "29": 10.57573,
            "30": 10.35474,
            "31": 10.12618,
            "32": 10.46569,
            "33": 10.45235,
            "34": 10.22491,
            "35": 10.27088,
            "36": 10.22167,
            "37": 10.33935,
            "38": 10.18639,
            "39": 10.39432,
            "40": 10.07794,
            "41": 10.13875,
            "42": 10.20184,
            "43": 9.83819,
            "44": 9.94273,
            "45": 9.82299,
            "46": 9.82187,
            "47": 10.13444,
            "48": 9.84097,
            "49": 9.52094,
            "50": 9.90107,
            "51": 9.83459,
            "52": 9.73231,
            "53": 10.04881,
            "54": 9.93895,
            "55": 9.86297,
            "56": 9.613,
            "57": 9.46964,
            "58": 9.81136,
            "59": 9.57107,
            "60": 9.48153,
            "61": 9.67881,
            "62": 9.96579,
            "63": 9.35276,
            "64": 9.75644,
            "65": 8.93769,
            "66": 9.68152,
            "67": 9.35669,
            "68": 9.76806,
            "69": 9.7739,
            "70": 9.71012,
            "71": 9.60009,
            "72": 9.56796,
            "73": 9.47739,
            "74": 8.93177,
            "75": 9.40721,
            "76": 9.06847,
            "77": 10.0464,
            "78": 9.70984,
            "79": 9.35731,
            "80": 9.38978,
            "81": 9.4662,
            "82": 9.68056,
            "83": 9.29144,
            "84": 9.40194,
            "85": 9.59734,
            "86": 9.06207,
            "87": 9.57921,
            "88": 9.73262,
            "89": 9.58838,
            "90": 9.80354,
            "91": 9.31991,
            "92": 9.35013,
            "93": 9.06378,
            "94": 8.81909,
            "95": 9.50572,
            "96": 9.51068,
            "97": 9.29244,
            "98": 9.65579,
            "99": 8.87401,
            "100": 9.38837
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1628.0,
            "2": 1744.0,
            "3": 1662.0,
            "4": 1801.0,
            "5": 1937.0,
            "6": 1866.0,
            "7": 1855.0,
            "8": 1584.0,
            "9": 1901.0,
            "10": 1325.0,
            "11": 1963.0,
            "12": 1833.0,
            "13": 1863.0,
            "14": 1928.0,
            "15": 1948.0,
            "16": 1999.0,
            "17": 1838.0,
            "18": 1703.0,
            "19": 1898.0,
            "20": 1610.0,
            "21": 1944.0,
            "22": 1728.0,
            "23": 2116.0,
            "24": 1613.0,
            "25": 1634.0,
            "26": 1788.0,
            "27": 2059.0,
            "28": 2067.0,
            "29": 1978.0,
            "30": 1929.0,
            "31": 1782.0,
            "32": 1850.0,
            "33": 2169.0,
            "34": 1815.0,
            "35": 2007.0,
            "36": 2010.0,
            "37": 2385.0,
            "38": 2413.0,
            "39": 2474.0,
            "40": 2254.0,
            "41": 2373.0,
            "42": 2253.0,
            "43": 1900.0,
            "44": 2058.0,
            "45": 2153.0,
            "46": 2385.0,
            "47": 2514.0,
            "48": 2475.0,
            "49": 2362.0,
            "50": 2335.0,
            "51": 2452.0,
            "52": 2576.0,
            "53": 2914.0,
            "54": 2741.0,
            "55": 2408.0,
            "56": 2650.0,
            "57": 2264.0,
            "58": 2853.0,
            "59": 2757.0,
            "60": 2509.0,
            "61": 3076.0,
            "62": 2709.0,
            "63": 2563.0,
            "64": 3041.0,
            "65": 2687.0,
            "66": 3089.0,
            "67": 2767.0,
            "68": 2930.0,
            "69": 2911.0,
            "70": 3286.0,
            "71": 3105.0,
            "72": 2507.0,
            "73": 3063.0,
            "74": 2022.0,
            "75": 2763.0,
            "76": 3002.0,
            "77": 3382.0,
            "78": 3470.0,
            "79": 3109.0,
            "80": 3357.0,
            "81": 3798.0,
            "82": 3348.0,
            "83": 2763.0,
            "84": 3271.0,
            "85": 3245.0,
            "86": 2587.0,
            "87": 3650.0,
            "88": 3103.0,
            "89": 3471.0,
            "90": 3086.0,
            "91": 3050.0,
            "92": 3368.0,
            "93": 2828.0,
            "94": 3495.0,
            "95": 3424.0,
            "96": 3559.0,
            "97": 3289.0,
            "98": 3727.0,
            "99": 3275.0,
            "100": 3401.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 552128512.0,
            "2": 552128512.0,
            "3": 552128512.0,
            "4": 552128512.0,
            "5": 552128512.0,
            "6": 552128512.0,
            "7": 552128512.0,
            "8": 552128512.0,
            "9": 552128512.0,
            "10": 552128512.0,
            "11": 552128512.0,
            "12": 552128512.0,
            "13": 552128512.0,
            "14": 552128512.0,
            "15": 552128512.0,
            "16": 552128512.0,
            "17": 552128512.0,
            "18": 552128512.0,
            "19": 552128512.0,
            "20": 552128512.0,
            "21": 552128512.0,
            "22": 552128512.0,
            "23": 552128512.0,
            "24": 552128512.0,
            "25": 552128512.0,
            "26": 552128512.0,
            "27": 552128512.0,
            "28": 552128512.0,
            "29": 552128512.0,
            "30": 552128512.0,
            "31": 552128512.0,
            "32": 552128512.0,
            "33": 552128512.0,
            "34": 552128512.0,
            "35": 552128512.0,
            "36": 552128512.0,
            "37": 552128512.0,
            "38": 552128512.0,
            "39": 552128512.0,
            "40": 552128512.0,
            "41": 552128512.0,
            "42": 552128512.0,
            "43": 552128512.0,
            "44": 552128512.0,
            "45": 552128512.0,
            "46": 552128512.0,
            "47": 552128512.0,
            "48": 552128512.0,
            "49": 552128512.0,
            "50": 552128512.0,
            "51": 552128512.0,
            "52": 552128512.0,
            "53": 552128512.0,
            "54": 552128512.0,
            "55": 552128512.0,
            "56": 552128512.0,
            "57": 552128512.0,
            "58": 552128512.0,
            "59": 552128512.0,
            "60": 552128512.0,
            "61": 552128512.0,
            "62": 552128512.0,
            "63": 552128512.0,
            "64": 552128512.0,
            "65": 552128512.0,
            "66": 552128512.0,
            "67": 552128512.0,
            "68": 552128512.0,
            "69": 552128512.0,
            "70": 552128512.0,
            "71": 552128512.0,
            "72": 552128512.0,
            "73": 552128512.0,
            "74": 552128512.0,
            "75": 552128512.0,
            "76": 552128512.0,
            "77": 552128512.0,
            "78": 552128512.0,
            "79": 552128512.0,
            "80": 552128512.0,
            "81": 552128512.0,
            "82": 552128512.0,
            "83": 552128512.0,
            "84": 552128512.0,
            "85": 552128512.0,
            "86": 552128512.0,
            "87": 552128512.0,
            "88": 552128512.0,
            "89": 552128512.0,
            "90": 552128512.0,
            "91": 552128512.0,
            "92": 552128512.0,
            "93": 552128512.0,
            "94": 552128512.0,
            "95": 552128512.0,
            "96": 552128512.0,
            "97": 552128512.0,
            "98": 552128512.0,
            "99": 552128512.0,
            "100": 552128512.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2615097856.0,
            "2": 2711714304.0,
            "3": 2711714304.0,
            "4": 2711714304.0,
            "5": 2711714304.0,
            "6": 2711714304.0,
            "7": 2711714304.0,
            "8": 2711714304.0,
            "9": 2711714304.0,
            "10": 2711714304.0,
            "11": 2711714304.0,
            "12": 2711714304.0,
            "13": 2711714304.0,
            "14": 2711714304.0,
            "15": 2711714304.0,
            "16": 2711714304.0,
            "17": 2711714304.0,
            "18": 2711714304.0,
            "19": 2711714304.0,
            "20": 2711714304.0,
            "21": 2711714304.0,
            "22": 2711714304.0,
            "23": 2711714304.0,
            "24": 2711714304.0,
            "25": 2711714304.0,
            "26": 2711714304.0,
            "27": 2711714304.0,
            "28": 2711714304.0,
            "29": 2711714304.0,
            "30": 2711714304.0,
            "31": 2711714304.0,
            "32": 2711714304.0,
            "33": 2711714304.0,
            "34": 2711714304.0,
            "35": 2711714304.0,
            "36": 2711714304.0,
            "37": 2711714304.0,
            "38": 2711714304.0,
            "39": 2711714304.0,
            "40": 2711714304.0,
            "41": 2711714304.0,
            "42": 2711714304.0,
            "43": 2711714304.0,
            "44": 2711714304.0,
            "45": 2711714304.0,
            "46": 2711714304.0,
            "47": 2711714304.0,
            "48": 2711714304.0,
            "49": 2711714304.0,
            "50": 2711714304.0,
            "51": 2711714304.0,
            "52": 2711714304.0,
            "53": 2711714304.0,
            "54": 2711714304.0,
            "55": 2711714304.0,
            "56": 2711714304.0,
            "57": 2711714304.0,
            "58": 2711714304.0,
            "59": 2711714304.0,
            "60": 2711714304.0,
            "61": 2711714304.0,
            "62": 2711714304.0,
            "63": 2711714304.0,
            "64": 2711714304.0,
            "65": 2711714304.0,
            "66": 2711714304.0,
            "67": 2711714304.0,
            "68": 2711714304.0,
            "69": 2711714304.0,
            "70": 2711714304.0,
            "71": 2711714304.0,
            "72": 2711714304.0,
            "73": 2711714304.0,
            "74": 2711714304.0,
            "75": 2711714304.0,
            "76": 2711714304.0,
            "77": 2711714304.0,
            "78": 2711714304.0,
            "79": 2711714304.0,
            "80": 2711714304.0,
            "81": 2711714304.0,
            "82": 2711714304.0,
            "83": 2711714304.0,
            "84": 2711714304.0,
            "85": 2711714304.0,
            "86": 2711714304.0,
            "87": 2711714304.0,
            "88": 2711714304.0,
            "89": 2711714304.0,
            "90": 2711714304.0,
            "91": 2711714304.0,
            "92": 2711714304.0,
            "93": 2711714304.0,
            "94": 2711714304.0,
            "95": 2711714304.0,
            "96": 2711714304.0,
            "97": 2711714304.0,
            "98": 2711714304.0,
            "99": 2711714304.0,
            "100": 2711714304.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 4.22026,
            "2": 0.11508,
            "3": 0.08784,
            "4": 0.08853,
            "5": 0.08761,
            "6": 0.08872,
            "7": 0.08738,
            "8": 0.08924,
            "9": 0.08542,
            "10": 0.08527,
            "11": 0.08496,
            "12": 0.08437,
            "13": 0.08471,
            "14": 0.08501,
            "15": 0.08468,
            "16": 0.08444,
            "17": 0.08421,
            "18": 0.08402,
            "19": 0.08444,
            "20": 0.0841,
            "21": 0.08395,
            "22": 0.08442,
            "23": 0.0846,
            "24": 0.08394,
            "25": 0.08371,
            "26": 0.08416,
            "27": 0.08354,
            "28": 0.08445,
            "29": 0.08405,
            "30": 0.08414,
            "31": 0.08444,
            "32": 0.08369,
            "33": 0.08356,
            "34": 0.08435,
            "35": 0.08405,
            "36": 0.08358,
            "37": 0.08349,
            "38": 0.08439,
            "39": 0.0837,
            "40": 0.08444,
            "41": 0.08399,
            "42": 0.0835,
            "43": 0.0843,
            "44": 0.08389,
            "45": 0.084,
            "46": 0.08426,
            "47": 0.0842,
            "48": 0.08434,
            "49": 0.08385,
            "50": 0.08424,
            "51": 0.09846,
            "52": 0.08909,
            "53": 0.08511,
            "54": 0.0849,
            "55": 0.1206,
            "56": 0.0845,
            "57": 0.08446,
            "58": 0.08474,
            "59": 0.08505,
            "60": 0.08422,
            "61": 0.08413,
            "62": 0.0845,
            "63": 0.08441,
            "64": 0.08486,
            "65": 0.08527,
            "66": 0.08442,
            "67": 0.08533,
            "68": 0.08468,
            "69": 0.08469,
            "70": 0.08503,
            "71": 0.08424,
            "72": 0.085,
            "73": 0.08469,
            "74": 0.08484,
            "75": 0.08396,
            "76": 0.08437,
            "77": 0.08458,
            "78": 0.08553,
            "79": 0.08492,
            "80": 0.08459,
            "81": 0.08431,
            "82": 0.08515,
            "83": 0.08407,
            "84": 0.08429,
            "85": 0.08413,
            "86": 0.08499,
            "87": 0.08442,
            "88": 0.08461,
            "89": 0.08478,
            "90": 0.08469,
            "91": 0.08469,
            "92": 0.08478,
            "93": 0.08453,
            "94": 0.0842,
            "95": 0.08391,
            "96": 0.08383,
            "97": 0.08459,
            "98": 0.08469,
            "99": 0.085,
            "100": 0.08518
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.84092,
            "2": 10.83661,
            "3": 10.83233,
            "4": 10.81819,
            "5": 10.84059,
            "6": 10.86985,
            "7": 10.83324,
            "8": 10.83877,
            "9": 10.84355,
            "10": 10.80969,
            "11": 10.85186,
            "12": 10.84454,
            "13": 10.8632,
            "14": 10.86353,
            "15": 10.79985,
            "16": 10.79265,
            "17": 10.77473,
            "18": 10.80156,
            "19": 10.79155,
            "20": 10.70512,
            "21": 10.68174,
            "22": 10.56547,
            "23": 10.70145,
            "24": 10.5789,
            "25": 10.53597,
            "26": 10.60745,
            "27": 10.59418,
            "28": 10.56116,
            "29": 10.57573,
            "30": 10.35474,
            "31": 10.12618,
            "32": 10.46569,
            "33": 10.45235,
            "34": 10.22491,
            "35": 10.27088,
            "36": 10.22167,
            "37": 10.33935,
            "38": 10.18639,
            "39": 10.39432,
            "40": 10.07794,
            "41": 10.13875,
            "42": 10.20184,
            "43": 9.83819,
            "44": 9.94273,
            "45": 9.82299,
            "46": 9.82187,
            "47": 10.13444,
            "48": 9.84097,
            "49": 9.52094,
            "50": 9.90107,
            "51": 9.83459,
            "52": 9.73231,
            "53": 10.04881,
            "54": 9.93895,
            "55": 9.86297,
            "56": 9.613,
            "57": 9.46964,
            "58": 9.81136,
            "59": 9.57107,
            "60": 9.48153,
            "61": 9.67881,
            "62": 9.96579,
            "63": 9.35276,
            "64": 9.75644,
            "65": 8.93769,
            "66": 9.68152,
            "67": 9.35669,
            "68": 9.76806,
            "69": 9.7739,
            "70": 9.71012,
            "71": 9.60009,
            "72": 9.56796,
            "73": 9.47739,
            "74": 8.93177,
            "75": 9.40721,
            "76": 9.06847,
            "77": 10.0464,
            "78": 9.70984,
            "79": 9.35731,
            "80": 9.38978,
            "81": 9.4662,
            "82": 9.68056,
            "83": 9.29144,
            "84": 9.40194,
            "85": 9.59734,
            "86": 9.06207,
            "87": 9.57921,
            "88": 9.73262,
            "89": 9.58838,
            "90": 9.80354,
            "91": 9.31991,
            "92": 9.35013,
            "93": 9.06378,
            "94": 8.81909,
            "95": 9.50572,
            "96": 9.51068,
            "97": 9.29244,
            "98": 9.65579,
            "99": 8.87401,
            "100": 9.38837
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1628.0,
            "2": 1744.0,
            "3": 1662.0,
            "4": 1801.0,
            "5": 1937.0,
            "6": 1866.0,
            "7": 1855.0,
            "8": 1584.0,
            "9": 1901.0,
            "10": 1325.0,
            "11": 1963.0,
            "12": 1833.0,
            "13": 1863.0,
            "14": 1928.0,
            "15": 1948.0,
            "16": 1999.0,
            "17": 1838.0,
            "18": 1703.0,
            "19": 1898.0,
            "20": 1610.0,
            "21": 1944.0,
            "22": 1728.0,
            "23": 2116.0,
            "24": 1613.0,
            "25": 1634.0,
            "26": 1788.0,
            "27": 2059.0,
            "28": 2067.0,
            "29": 1978.0,
            "30": 1929.0,
            "31": 1782.0,
            "32": 1850.0,
            "33": 2169.0,
            "34": 1815.0,
            "35": 2007.0,
            "36": 2010.0,
            "37": 2385.0,
            "38": 2413.0,
            "39": 2474.0,
            "40": 2254.0,
            "41": 2373.0,
            "42": 2253.0,
            "43": 1900.0,
            "44": 2058.0,
            "45": 2153.0,
            "46": 2385.0,
            "47": 2514.0,
            "48": 2475.0,
            "49": 2362.0,
            "50": 2335.0,
            "51": 2452.0,
            "52": 2576.0,
            "53": 2914.0,
            "54": 2741.0,
            "55": 2408.0,
            "56": 2650.0,
            "57": 2264.0,
            "58": 2853.0,
            "59": 2757.0,
            "60": 2509.0,
            "61": 3076.0,
            "62": 2709.0,
            "63": 2563.0,
            "64": 3041.0,
            "65": 2687.0,
            "66": 3089.0,
            "67": 2767.0,
            "68": 2930.0,
            "69": 2911.0,
            "70": 3286.0,
            "71": 3105.0,
            "72": 2507.0,
            "73": 3063.0,
            "74": 2022.0,
            "75": 2763.0,
            "76": 3002.0,
            "77": 3382.0,
            "78": 3470.0,
            "79": 3109.0,
            "80": 3357.0,
            "81": 3798.0,
            "82": 3348.0,
            "83": 2763.0,
            "84": 3271.0,
            "85": 3245.0,
            "86": 2587.0,
            "87": 3650.0,
            "88": 3103.0,
            "89": 3471.0,
            "90": 3086.0,
            "91": 3050.0,
            "92": 3368.0,
            "93": 2828.0,
            "94": 3495.0,
            "95": 3424.0,
            "96": 3559.0,
            "97": 3289.0,
            "98": 3727.0,
            "99": 3275.0,
            "100": 3401.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 552128512.0,
            "2": 552128512.0,
            "3": 552128512.0,
            "4": 552128512.0,
            "5": 552128512.0,
            "6": 552128512.0,
            "7": 552128512.0,
            "8": 552128512.0,
            "9": 552128512.0,
            "10": 552128512.0,
            "11": 552128512.0,
            "12": 552128512.0,
            "13": 552128512.0,
            "14": 552128512.0,
            "15": 552128512.0,
            "16": 552128512.0,
            "17": 552128512.0,
            "18": 552128512.0,
            "19": 552128512.0,
            "20": 552128512.0,
            "21": 552128512.0,
            "22": 552128512.0,
            "23": 552128512.0,
            "24": 552128512.0,
            "25": 552128512.0,
            "26": 552128512.0,
            "27": 552128512.0,
            "28": 552128512.0,
            "29": 552128512.0,
            "30": 552128512.0,
            "31": 552128512.0,
            "32": 552128512.0,
            "33": 552128512.0,
            "34": 552128512.0,
            "35": 552128512.0,
            "36": 552128512.0,
            "37": 552128512.0,
            "38": 552128512.0,
            "39": 552128512.0,
            "40": 552128512.0,
            "41": 552128512.0,
            "42": 552128512.0,
            "43": 552128512.0,
            "44": 552128512.0,
            "45": 552128512.0,
            "46": 552128512.0,
            "47": 552128512.0,
            "48": 552128512.0,
            "49": 552128512.0,
            "50": 552128512.0,
            "51": 552128512.0,
            "52": 552128512.0,
            "53": 552128512.0,
            "54": 552128512.0,
            "55": 552128512.0,
            "56": 552128512.0,
            "57": 552128512.0,
            "58": 552128512.0,
            "59": 552128512.0,
            "60": 552128512.0,
            "61": 552128512.0,
            "62": 552128512.0,
            "63": 552128512.0,
            "64": 552128512.0,
            "65": 552128512.0,
            "66": 552128512.0,
            "67": 552128512.0,
            "68": 552128512.0,
            "69": 552128512.0,
            "70": 552128512.0,
            "71": 552128512.0,
            "72": 552128512.0,
            "73": 552128512.0,
            "74": 552128512.0,
            "75": 552128512.0,
            "76": 552128512.0,
            "77": 552128512.0,
            "78": 552128512.0,
            "79": 552128512.0,
            "80": 552128512.0,
            "81": 552128512.0,
            "82": 552128512.0,
            "83": 552128512.0,
            "84": 552128512.0,
            "85": 552128512.0,
            "86": 552128512.0,
            "87": 552128512.0,
            "88": 552128512.0,
            "89": 552128512.0,
            "90": 552128512.0,
            "91": 552128512.0,
            "92": 552128512.0,
            "93": 552128512.0,
            "94": 552128512.0,
            "95": 552128512.0,
            "96": 552128512.0,
            "97": 552128512.0,
            "98": 552128512.0,
            "99": 552128512.0,
            "100": 552128512.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2615097856.0,
            "2": 2711714304.0,
            "3": 2711714304.0,
            "4": 2711714304.0,
            "5": 2711714304.0,
            "6": 2711714304.0,
            "7": 2711714304.0,
            "8": 2711714304.0,
            "9": 2711714304.0,
            "10": 2711714304.0,
            "11": 2711714304.0,
            "12": 2711714304.0,
            "13": 2711714304.0,
            "14": 2711714304.0,
            "15": 2711714304.0,
            "16": 2711714304.0,
            "17": 2711714304.0,
            "18": 2711714304.0,
            "19": 2711714304.0,
            "20": 2711714304.0,
            "21": 2711714304.0,
            "22": 2711714304.0,
            "23": 2711714304.0,
            "24": 2711714304.0,
            "25": 2711714304.0,
            "26": 2711714304.0,
            "27": 2711714304.0,
            "28": 2711714304.0,
            "29": 2711714304.0,
            "30": 2711714304.0,
            "31": 2711714304.0,
            "32": 2711714304.0,
            "33": 2711714304.0,
            "34": 2711714304.0,
            "35": 2711714304.0,
            "36": 2711714304.0,
            "37": 2711714304.0,
            "38": 2711714304.0,
            "39": 2711714304.0,
            "40": 2711714304.0,
            "41": 2711714304.0,
            "42": 2711714304.0,
            "43": 2711714304.0,
            "44": 2711714304.0,
            "45": 2711714304.0,
            "46": 2711714304.0,
            "47": 2711714304.0,
            "48": 2711714304.0,
            "49": 2711714304.0,
            "50": 2711714304.0,
            "51": 2711714304.0,
            "52": 2711714304.0,
            "53": 2711714304.0,
            "54": 2711714304.0,
            "55": 2711714304.0,
            "56": 2711714304.0,
            "57": 2711714304.0,
            "58": 2711714304.0,
            "59": 2711714304.0,
            "60": 2711714304.0,
            "61": 2711714304.0,
            "62": 2711714304.0,
            "63": 2711714304.0,
            "64": 2711714304.0,
            "65": 2711714304.0,
            "66": 2711714304.0,
            "67": 2711714304.0,
            "68": 2711714304.0,
            "69": 2711714304.0,
            "70": 2711714304.0,
            "71": 2711714304.0,
            "72": 2711714304.0,
            "73": 2711714304.0,
            "74": 2711714304.0,
            "75": 2711714304.0,
            "76": 2711714304.0,
            "77": 2711714304.0,
            "78": 2711714304.0,
            "79": 2711714304.0,
            "80": 2711714304.0,
            "81": 2711714304.0,
            "82": 2711714304.0,
            "83": 2711714304.0,
            "84": 2711714304.0,
            "85": 2711714304.0,
            "86": 2711714304.0,
            "87": 2711714304.0,
            "88": 2711714304.0,
            "89": 2711714304.0,
            "90": 2711714304.0,
            "91": 2711714304.0,
            "92": 2711714304.0,
            "93": 2711714304.0,
            "94": 2711714304.0,
            "95": 2711714304.0,
            "96": 2711714304.0,
            "97": 2711714304.0,
            "98": 2711714304.0,
            "99": 2711714304.0,
            "100": 2711714304.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 3.22526,
            "2": 0.19893,
            "3": 0.09313,
            "4": 0.08045,
            "5": 0.08171,
            "6": 0.08058,
            "7": 0.08022,
            "8": 0.07981,
            "9": 0.0808,
            "10": 0.08068,
            "11": 0.08073,
            "12": 0.08318,
            "13": 0.08514,
            "14": 0.08404,
            "15": 0.08382,
            "16": 0.08982,
            "17": 0.08387,
            "18": 0.08342,
            "19": 0.08359,
            "20": 0.07926,
            "21": 0.08037,
            "22": 0.08041,
            "23": 0.08187,
            "24": 0.08232,
            "25": 0.08012,
            "26": 0.08081,
            "27": 0.08072,
            "28": 0.08454,
            "29": 0.08003,
            "30": 0.07895,
            "31": 0.08312,
            "32": 0.08109,
            "33": 0.08106,
            "34": 0.07905,
            "35": 0.08145,
            "36": 0.08345,
            "37": 0.07972,
            "38": 0.07895,
            "39": 0.0795,
            "40": 0.07971,
            "41": 0.08032,
            "42": 0.07938,
            "43": 0.0806,
            "44": 0.07956,
            "45": 0.07918,
            "46": 0.07961,
            "47": 0.07937,
            "48": 0.08049,
            "49": 0.07875,
            "50": 0.07866,
            "51": 0.08212,
            "52": 0.07853,
            "53": 0.07869,
            "54": 0.07753,
            "55": 0.0774,
            "56": 0.07699,
            "57": 0.07754,
            "58": 0.07721,
            "59": 0.07784,
            "60": 0.07727,
            "61": 0.07709,
            "62": 0.07721,
            "63": 0.07751,
            "64": 0.07763,
            "65": 0.07813,
            "66": 0.07898,
            "67": 0.07875,
            "68": 0.07868,
            "69": 0.0789,
            "70": 0.07834,
            "71": 0.07782,
            "72": 0.07816,
            "73": 0.0785,
            "74": 0.0787,
            "75": 0.07812,
            "76": 0.07812,
            "77": 0.07845,
            "78": 0.07888,
            "79": 0.07811,
            "80": 0.07836,
            "81": 0.07854,
            "82": 0.07902,
            "83": 0.07769,
            "84": 0.07776,
            "85": 0.07749,
            "86": 0.07824,
            "87": 0.07761,
            "88": 0.07812,
            "89": 0.07814,
            "90": 0.07827,
            "91": 0.07825,
            "92": 0.07856,
            "93": 0.07779,
            "94": 0.07786,
            "95": 0.07734,
            "96": 0.07776,
            "97": 0.07809,
            "98": 0.07855,
            "99": 0.07768,
            "100": 0.08111
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.83459,
            "52": 9.73231,
            "53": 10.04881,
            "54": 9.93895,
            "55": 9.86297,
            "56": 9.613,
            "57": 9.46964,
            "58": 9.81136,
            "59": 9.57107,
            "60": 9.48153,
            "61": 9.67881,
            "62": 9.96579,
            "63": 9.35276,
            "64": 9.75644,
            "65": 8.93769,
            "66": 9.68152,
            "67": 9.35669,
            "68": 9.76806,
            "69": 9.7739,
            "70": 9.71012,
            "71": 9.60009,
            "72": 9.56796,
            "73": 9.47739,
            "74": 8.93177,
            "75": 9.40721,
            "76": 9.06847,
            "77": 10.0464,
            "78": 9.70984,
            "79": 9.35731,
            "80": 9.38978,
            "81": 9.4662,
            "82": 9.68056,
            "83": 9.29144,
            "84": 9.40194,
            "85": 9.59734,
            "86": 9.06207,
            "87": 9.57921,
            "88": 9.73262,
            "89": 9.58838,
            "90": 9.80354,
            "91": 9.31991,
            "92": 9.35013,
            "93": 9.06378,
            "94": 8.81909,
            "95": 9.50572,
            "96": 9.51068,
            "97": 9.29244,
            "98": 9.65579,
            "99": 8.87401,
            "100": 9.38837
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2452.0,
            "52": 2576.0,
            "53": 2914.0,
            "54": 2741.0,
            "55": 2408.0,
            "56": 2650.0,
            "57": 2264.0,
            "58": 2853.0,
            "59": 2757.0,
            "60": 2509.0,
            "61": 3076.0,
            "62": 2709.0,
            "63": 2563.0,
            "64": 3041.0,
            "65": 2687.0,
            "66": 3089.0,
            "67": 2767.0,
            "68": 2930.0,
            "69": 2911.0,
            "70": 3286.0,
            "71": 3105.0,
            "72": 2507.0,
            "73": 3063.0,
            "74": 2022.0,
            "75": 2763.0,
            "76": 3002.0,
            "77": 3382.0,
            "78": 3470.0,
            "79": 3109.0,
            "80": 3357.0,
            "81": 3798.0,
            "82": 3348.0,
            "83": 2763.0,
            "84": 3271.0,
            "85": 3245.0,
            "86": 2587.0,
            "87": 3650.0,
            "88": 3103.0,
            "89": 3471.0,
            "90": 3086.0,
            "91": 3050.0,
            "92": 3368.0,
            "93": 2828.0,
            "94": 3495.0,
            "95": 3424.0,
            "96": 3559.0,
            "97": 3289.0,
            "98": 3727.0,
            "99": 3275.0,
            "100": 3401.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 552128512.0,
            "52": 552128512.0,
            "53": 552128512.0,
            "54": 552128512.0,
            "55": 552128512.0,
            "56": 552128512.0,
            "57": 552128512.0,
            "58": 552128512.0,
            "59": 552128512.0,
            "60": 552128512.0,
            "61": 552128512.0,
            "62": 552128512.0,
            "63": 552128512.0,
            "64": 552128512.0,
            "65": 552128512.0,
            "66": 552128512.0,
            "67": 552128512.0,
            "68": 552128512.0,
            "69": 552128512.0,
            "70": 552128512.0,
            "71": 552128512.0,
            "72": 552128512.0,
            "73": 552128512.0,
            "74": 552128512.0,
            "75": 552128512.0,
            "76": 552128512.0,
            "77": 552128512.0,
            "78": 552128512.0,
            "79": 552128512.0,
            "80": 552128512.0,
            "81": 552128512.0,
            "82": 552128512.0,
            "83": 552128512.0,
            "84": 552128512.0,
            "85": 552128512.0,
            "86": 552128512.0,
            "87": 552128512.0,
            "88": 552128512.0,
            "89": 552128512.0,
            "90": 552128512.0,
            "91": 552128512.0,
            "92": 552128512.0,
            "93": 552128512.0,
            "94": 552128512.0,
            "95": 552128512.0,
            "96": 552128512.0,
            "97": 552128512.0,
            "98": 552128512.0,
            "99": 552128512.0,
            "100": 552128512.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2679207424.0,
            "52": 2712762880.0,
            "53": 2712762880.0,
            "54": 2712762880.0,
            "55": 2712762880.0,
            "56": 2712762880.0,
            "57": 2712762880.0,
            "58": 2712762880.0,
            "59": 2712762880.0,
            "60": 2712762880.0,
            "61": 2712762880.0,
            "62": 2712762880.0,
            "63": 2712762880.0,
            "64": 2712762880.0,
            "65": 2712762880.0,
            "66": 2712762880.0,
            "67": 2712762880.0,
            "68": 2712762880.0,
            "69": 2712762880.0,
            "70": 2712762880.0,
            "71": 2712762880.0,
            "72": 2712762880.0,
            "73": 2712762880.0,
            "74": 2712762880.0,
            "75": 2712762880.0,
            "76": 2712762880.0,
            "77": 2712762880.0,
            "78": 2712762880.0,
            "79": 2712762880.0,
            "80": 2712762880.0,
            "81": 2712762880.0,
            "82": 2712762880.0,
            "83": 2712762880.0,
            "84": 2712762880.0,
            "85": 2712762880.0,
            "86": 2712762880.0,
            "87": 2712762880.0,
            "88": 2712762880.0,
            "89": 2712762880.0,
            "90": 2712762880.0,
            "91": 2712762880.0,
            "92": 2712762880.0,
            "93": 2712762880.0,
            "94": 2712762880.0,
            "95": 2712762880.0,
            "96": 2712762880.0,
            "97": 2712762880.0,
            "98": 2712762880.0,
            "99": 2712762880.0,
            "100": 2712762880.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 3.18495,
            "52": 0.6276,
            "53": 0.08049,
            "54": 0.07972,
            "55": 0.08135,
            "56": 0.07856,
            "57": 0.08351,
            "58": 0.07967,
            "59": 0.08019,
            "60": 0.0792,
            "61": 0.07924,
            "62": 0.07905,
            "63": 0.08021,
            "64": 0.07964,
            "65": 0.07981,
            "66": 0.07892,
            "67": 0.07984,
            "68": 0.07904,
            "69": 0.07969,
            "70": 0.07923,
            "71": 0.07928,
            "72": 0.07969,
            "73": 0.07956,
            "74": 0.08002,
            "75": 0.07918,
            "76": 0.07955,
            "77": 0.07938,
            "78": 0.08006,
            "79": 0.07935,
            "80": 0.07959,
            "81": 0.08018,
            "82": 0.07963,
            "83": 0.07952,
            "84": 0.07938,
            "85": 0.07915,
            "86": 0.07965,
            "87": 0.07999,
            "88": 0.07951,
            "89": 0.08006,
            "90": 0.0794,
            "91": 0.07948,
            "92": 0.07896,
            "93": 0.07977,
            "94": 0.07916,
            "95": 0.07921,
            "96": 0.07884,
            "97": 0.0796,
            "98": 0.07923,
            "99": 0.07955,
            "100": 0.07931
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgxa100_dracooci-ord.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.8401,
            "2": 10.83566,
            "3": 10.82993,
            "4": 10.8173,
            "5": 10.84032,
            "6": 10.87262,
            "7": 10.83467,
            "8": 10.8403,
            "9": 10.84359,
            "10": 10.8134,
            "11": 10.85025,
            "12": 10.84316,
            "13": 10.86605,
            "14": 10.86315,
            "15": 10.80276,
            "16": 10.79643,
            "17": 10.7763,
            "18": 10.8015,
            "19": 10.7939,
            "20": 10.705,
            "21": 10.68148,
            "22": 10.56313,
            "23": 10.70136,
            "24": 10.57939,
            "25": 10.53849,
            "26": 10.60617,
            "27": 10.59211,
            "28": 10.56156,
            "29": 10.57666,
            "30": 10.35521,
            "31": 10.12773,
            "32": 10.46367,
            "33": 10.45444,
            "34": 10.22451,
            "35": 10.27148,
            "36": 10.22184,
            "37": 10.33945,
            "38": 10.18637,
            "39": 10.39329,
            "40": 10.08049,
            "41": 10.13789,
            "42": 10.20012,
            "43": 9.83791,
            "44": 9.94327,
            "45": 9.8229,
            "46": 9.82313,
            "47": 10.13353,
            "48": 9.8415,
            "49": 9.52102,
            "50": 9.90118,
            "51": 9.83467,
            "52": 9.73176,
            "53": 10.04773,
            "54": 9.93856,
            "55": 9.86424,
            "56": 9.61259,
            "57": 9.46819,
            "58": 9.81223,
            "59": 9.57172,
            "60": 9.4803,
            "61": 9.67964,
            "62": 9.96738,
            "63": 9.35351,
            "64": 9.7573,
            "65": 8.93743,
            "66": 9.68132,
            "67": 9.35694,
            "68": 9.7681,
            "69": 9.77289,
            "70": 9.71026,
            "71": 9.60024,
            "72": 9.56674,
            "73": 9.47644,
            "74": 8.93189,
            "75": 9.4088,
            "76": 9.06887,
            "77": 10.04696,
            "78": 9.70975,
            "79": 9.35669,
            "80": 9.39078,
            "81": 9.46574,
            "82": 9.68028,
            "83": 9.29218,
            "84": 9.40234,
            "85": 9.59741,
            "86": 9.06109,
            "87": 9.57951,
            "88": 9.73247,
            "89": 9.58838,
            "90": 9.80389,
            "91": 9.32105,
            "92": 9.35011,
            "93": 9.06313,
            "94": 8.82006,
            "95": 9.50562,
            "96": 9.51103,
            "97": 9.29305,
            "98": 9.65571,
            "99": 8.87502,
            "100": 9.38808
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1670.0,
            "2": 1691.0,
            "3": 1630.0,
            "4": 1805.0,
            "5": 1970.0,
            "6": 1901.0,
            "7": 1816.0,
            "8": 1587.0,
            "9": 1905.0,
            "10": 1397.0,
            "11": 1954.0,
            "12": 1859.0,
            "13": 1873.0,
            "14": 1875.0,
            "15": 1936.0,
            "16": 1972.0,
            "17": 1816.0,
            "18": 1773.0,
            "19": 1833.0,
            "20": 1715.0,
            "21": 1923.0,
            "22": 1681.0,
            "23": 2055.0,
            "24": 1727.0,
            "25": 1703.0,
            "26": 1761.0,
            "27": 1917.0,
            "28": 1962.0,
            "29": 2010.0,
            "30": 1957.0,
            "31": 1723.0,
            "32": 1898.0,
            "33": 2153.0,
            "34": 1828.0,
            "35": 1991.0,
            "36": 1937.0,
            "37": 2347.0,
            "38": 2365.0,
            "39": 2349.0,
            "40": 2239.0,
            "41": 2217.0,
            "42": 2222.0,
            "43": 2121.0,
            "44": 2059.0,
            "45": 2144.0,
            "46": 2296.0,
            "47": 2487.0,
            "48": 2376.0,
            "49": 2330.0,
            "50": 2377.0,
            "51": 2540.0,
            "52": 2598.0,
            "53": 2917.0,
            "54": 2715.0,
            "55": 2436.0,
            "56": 2691.0,
            "57": 2196.0,
            "58": 2875.0,
            "59": 2726.0,
            "60": 2445.0,
            "61": 3031.0,
            "62": 2618.0,
            "63": 2551.0,
            "64": 2939.0,
            "65": 2645.0,
            "66": 3160.0,
            "67": 2729.0,
            "68": 2852.0,
            "69": 2938.0,
            "70": 3337.0,
            "71": 3044.0,
            "72": 2531.0,
            "73": 2918.0,
            "74": 1976.0,
            "75": 2726.0,
            "76": 3036.0,
            "77": 3435.0,
            "78": 3375.0,
            "79": 3221.0,
            "80": 3356.0,
            "81": 3820.0,
            "82": 3203.0,
            "83": 2699.0,
            "84": 3073.0,
            "85": 3336.0,
            "86": 2729.0,
            "87": 3962.0,
            "88": 3062.0,
            "89": 3512.0,
            "90": 3044.0,
            "91": 2957.0,
            "92": 3276.0,
            "93": 2757.0,
            "94": 3568.0,
            "95": 3484.0,
            "96": 3627.0,
            "97": 3229.0,
            "98": 3722.0,
            "99": 3219.0,
            "100": 3467.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 552238592.0,
            "2": 552238592.0,
            "3": 552238592.0,
            "4": 552238592.0,
            "5": 552238592.0,
            "6": 552238592.0,
            "7": 552238592.0,
            "8": 552238592.0,
            "9": 552238592.0,
            "10": 552238592.0,
            "11": 552238592.0,
            "12": 552238592.0,
            "13": 552238592.0,
            "14": 552238592.0,
            "15": 552238592.0,
            "16": 552238592.0,
            "17": 552238592.0,
            "18": 552238592.0,
            "19": 552238592.0,
            "20": 552238592.0,
            "21": 552238592.0,
            "22": 552238592.0,
            "23": 552238592.0,
            "24": 552238592.0,
            "25": 552238592.0,
            "26": 552238592.0,
            "27": 552238592.0,
            "28": 552238592.0,
            "29": 552238592.0,
            "30": 552238592.0,
            "31": 552238592.0,
            "32": 552238592.0,
            "33": 552238592.0,
            "34": 552238592.0,
            "35": 552238592.0,
            "36": 552238592.0,
            "37": 552238592.0,
            "38": 552238592.0,
            "39": 552238592.0,
            "40": 552238592.0,
            "41": 552238592.0,
            "42": 552238592.0,
            "43": 552238592.0,
            "44": 552238592.0,
            "45": 552238592.0,
            "46": 552238592.0,
            "47": 552238592.0,
            "48": 552238592.0,
            "49": 552238592.0,
            "50": 552238592.0,
            "51": 552238592.0,
            "52": 552238592.0,
            "53": 552238592.0,
            "54": 552238592.0,
            "55": 552238592.0,
            "56": 552238592.0,
            "57": 552238592.0,
            "58": 552238592.0,
            "59": 552238592.0,
            "60": 552238592.0,
            "61": 552238592.0,
            "62": 552238592.0,
            "63": 552238592.0,
            "64": 552238592.0,
            "65": 552238592.0,
            "66": 552238592.0,
            "67": 552238592.0,
            "68": 552238592.0,
            "69": 552238592.0,
            "70": 552238592.0,
            "71": 552238592.0,
            "72": 552238592.0,
            "73": 552238592.0,
            "74": 552238592.0,
            "75": 552238592.0,
            "76": 552238592.0,
            "77": 552238592.0,
            "78": 552238592.0,
            "79": 552238592.0,
            "80": 552238592.0,
            "81": 552238592.0,
            "82": 552238592.0,
            "83": 552238592.0,
            "84": 552238592.0,
            "85": 552238592.0,
            "86": 552238592.0,
            "87": 552238592.0,
            "88": 552238592.0,
            "89": 552238592.0,
            "90": 552238592.0,
            "91": 552238592.0,
            "92": 552238592.0,
            "93": 552238592.0,
            "94": 552238592.0,
            "95": 552238592.0,
            "96": 552238592.0,
            "97": 552238592.0,
            "98": 552238592.0,
            "99": 552238592.0,
            "100": 552238592.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 4576563200.0,
            "2": 4673179648.0,
            "3": 4673179648.0,
            "4": 4673179648.0,
            "5": 4673179648.0,
            "6": 4673179648.0,
            "7": 4673179648.0,
            "8": 4673179648.0,
            "9": 4673179648.0,
            "10": 4673179648.0,
            "11": 4673179648.0,
            "12": 4673179648.0,
            "13": 4673179648.0,
            "14": 4673179648.0,
            "15": 4673179648.0,
            "16": 4673179648.0,
            "17": 4673179648.0,
            "18": 4673179648.0,
            "19": 4673179648.0,
            "20": 4673179648.0,
            "21": 4673179648.0,
            "22": 4673179648.0,
            "23": 4673179648.0,
            "24": 4673179648.0,
            "25": 4673179648.0,
            "26": 4673179648.0,
            "27": 4673179648.0,
            "28": 4673179648.0,
            "29": 4673179648.0,
            "30": 4673179648.0,
            "31": 4673179648.0,
            "32": 4673179648.0,
            "33": 4673179648.0,
            "34": 4673179648.0,
            "35": 4673179648.0,
            "36": 4673179648.0,
            "37": 4673179648.0,
            "38": 4673179648.0,
            "39": 4673179648.0,
            "40": 4673179648.0,
            "41": 4673179648.0,
            "42": 4673179648.0,
            "43": 4673179648.0,
            "44": 4673179648.0,
            "45": 4673179648.0,
            "46": 4673179648.0,
            "47": 4673179648.0,
            "48": 4673179648.0,
            "49": 4673179648.0,
            "50": 4673179648.0,
            "51": 4673179648.0,
            "52": 4673179648.0,
            "53": 4673179648.0,
            "54": 4673179648.0,
            "55": 4673179648.0,
            "56": 4673179648.0,
            "57": 4673179648.0,
            "58": 4673179648.0,
            "59": 4673179648.0,
            "60": 4673179648.0,
            "61": 4673179648.0,
            "62": 4673179648.0,
            "63": 4673179648.0,
            "64": 4673179648.0,
            "65": 4673179648.0,
            "66": 4673179648.0,
            "67": 4673179648.0,
            "68": 4673179648.0,
            "69": 4673179648.0,
            "70": 4673179648.0,
            "71": 4673179648.0,
            "72": 4673179648.0,
            "73": 4673179648.0,
            "74": 4673179648.0,
            "75": 4673179648.0,
            "76": 4673179648.0,
            "77": 4673179648.0,
            "78": 4673179648.0,
            "79": 4673179648.0,
            "80": 4673179648.0,
            "81": 4673179648.0,
            "82": 4673179648.0,
            "83": 4673179648.0,
            "84": 4673179648.0,
            "85": 4673179648.0,
            "86": 4673179648.0,
            "87": 4673179648.0,
            "88": 4673179648.0,
            "89": 4673179648.0,
            "90": 4673179648.0,
            "91": 4673179648.0,
            "92": 4673179648.0,
            "93": 4673179648.0,
            "94": 4673179648.0,
            "95": 4673179648.0,
            "96": 4673179648.0,
            "97": 4673179648.0,
            "98": 4673179648.0,
            "99": 4673179648.0,
            "100": 4673179648.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 12.14508,
            "2": 0.13504,
            "3": 0.10484,
            "4": 0.10489,
            "5": 0.10473,
            "6": 0.10497,
            "7": 0.10413,
            "8": 0.10536,
            "9": 0.32726,
            "10": 0.10707,
            "11": 0.1004,
            "12": 0.10131,
            "13": 0.10126,
            "14": 0.10152,
            "15": 0.10011,
            "16": 0.10055,
            "17": 0.10006,
            "18": 0.10008,
            "19": 0.09902,
            "20": 0.10043,
            "21": 0.09943,
            "22": 0.10108,
            "23": 0.10016,
            "24": 0.10055,
            "25": 0.10767,
            "26": 0.10062,
            "27": 0.09965,
            "28": 0.09956,
            "29": 0.09902,
            "30": 0.09994,
            "31": 0.10043,
            "32": 0.09913,
            "33": 0.09934,
            "34": 0.10116,
            "35": 0.09881,
            "36": 0.09921,
            "37": 0.09882,
            "38": 0.09871,
            "39": 0.09864,
            "40": 0.09965,
            "41": 0.09923,
            "42": 0.09939,
            "43": 0.10071,
            "44": 0.09983,
            "45": 0.35882,
            "46": 0.10188,
            "47": 0.09992,
            "48": 0.09983,
            "49": 0.09848,
            "50": 0.10049,
            "51": 0.11806,
            "52": 0.10549,
            "53": 0.10158,
            "54": 0.10548,
            "55": 0.10224,
            "56": 0.10244,
            "57": 0.10391,
            "58": 0.10383,
            "59": 0.10417,
            "60": 0.10737,
            "61": 0.1029,
            "62": 0.10202,
            "63": 0.10011,
            "64": 0.10594,
            "65": 0.10093,
            "66": 0.10168,
            "67": 0.1008,
            "68": 0.14562,
            "69": 0.09913,
            "70": 0.10262,
            "71": 0.09958,
            "72": 0.10173,
            "73": 0.09928,
            "74": 0.10376,
            "75": 0.09944,
            "76": 0.10143,
            "77": 0.10005,
            "78": 0.1033,
            "79": 0.09996,
            "80": 0.10114,
            "81": 0.09988,
            "82": 0.10093,
            "83": 0.09908,
            "84": 0.1014,
            "85": 0.09925,
            "86": 0.10175,
            "87": 0.09965,
            "88": 0.10189,
            "89": 0.10015,
            "90": 0.10099,
            "91": 0.09925,
            "92": 0.10123,
            "93": 0.09879,
            "94": 0.10599,
            "95": 0.0991,
            "96": 0.10147,
            "97": 0.09941,
            "98": 0.10245,
            "99": 0.09902,
            "100": 0.10071
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_lts_dgxa100_dracooci.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.8401,
            "2": 10.83566,
            "3": 10.82993,
            "4": 10.8173,
            "5": 10.84032,
            "6": 10.87262,
            "7": 10.83467,
            "8": 10.8403,
            "9": 10.84359,
            "10": 10.8134,
            "11": 10.85025,
            "12": 10.84316,
            "13": 10.86605,
            "14": 10.86315,
            "15": 10.80276,
            "16": 10.79643,
            "17": 10.7763,
            "18": 10.8015,
            "19": 10.7939,
            "20": 10.705,
            "21": 10.68148,
            "22": 10.56313,
            "23": 10.70136,
            "24": 10.57939,
            "25": 10.53849,
            "26": 10.60617,
            "27": 10.59211,
            "28": 10.56156,
            "29": 10.57666,
            "30": 10.35521,
            "31": 10.12773,
            "32": 10.46367,
            "33": 10.45444,
            "34": 10.22451,
            "35": 10.27148,
            "36": 10.22184,
            "37": 10.33945,
            "38": 10.18637,
            "39": 10.39329,
            "40": 10.08049,
            "41": 10.13789,
            "42": 10.20012,
            "43": 9.83791,
            "44": 9.94327,
            "45": 9.8229,
            "46": 9.82313,
            "47": 10.13353,
            "48": 9.8415,
            "49": 9.52102,
            "50": 9.90118,
            "51": 9.83467,
            "52": 9.73176,
            "53": 10.04773,
            "54": 9.93856,
            "55": 9.86424,
            "56": 9.61259,
            "57": 9.46819,
            "58": 9.81223,
            "59": 9.57172,
            "60": 9.4803,
            "61": 9.67964,
            "62": 9.96738,
            "63": 9.35351,
            "64": 9.7573,
            "65": 8.93743,
            "66": 9.68132,
            "67": 9.35694,
            "68": 9.7681,
            "69": 9.77289,
            "70": 9.71026,
            "71": 9.60024,
            "72": 9.56674,
            "73": 9.47644,
            "74": 8.93189,
            "75": 9.4088,
            "76": 9.06887,
            "77": 10.04696,
            "78": 9.70975,
            "79": 9.35669,
            "80": 9.39078,
            "81": 9.46574,
            "82": 9.68028,
            "83": 9.29218,
            "84": 9.40234,
            "85": 9.59741,
            "86": 9.06109,
            "87": 9.57951,
            "88": 9.73247,
            "89": 9.58838,
            "90": 9.80389,
            "91": 9.32105,
            "92": 9.35011,
            "93": 9.06313,
            "94": 8.82006,
            "95": 9.50562,
            "96": 9.51103,
            "97": 9.29305,
            "98": 9.65571,
            "99": 8.87502,
            "100": 9.38808
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1670.0,
            "2": 1691.0,
            "3": 1630.0,
            "4": 1805.0,
            "5": 1970.0,
            "6": 1901.0,
            "7": 1816.0,
            "8": 1587.0,
            "9": 1905.0,
            "10": 1397.0,
            "11": 1954.0,
            "12": 1859.0,
            "13": 1873.0,
            "14": 1875.0,
            "15": 1936.0,
            "16": 1972.0,
            "17": 1816.0,
            "18": 1773.0,
            "19": 1833.0,
            "20": 1715.0,
            "21": 1923.0,
            "22": 1681.0,
            "23": 2055.0,
            "24": 1727.0,
            "25": 1703.0,
            "26": 1761.0,
            "27": 1917.0,
            "28": 1962.0,
            "29": 2010.0,
            "30": 1957.0,
            "31": 1723.0,
            "32": 1898.0,
            "33": 2153.0,
            "34": 1828.0,
            "35": 1991.0,
            "36": 1937.0,
            "37": 2347.0,
            "38": 2365.0,
            "39": 2349.0,
            "40": 2239.0,
            "41": 2217.0,
            "42": 2222.0,
            "43": 2121.0,
            "44": 2059.0,
            "45": 2144.0,
            "46": 2296.0,
            "47": 2487.0,
            "48": 2376.0,
            "49": 2330.0,
            "50": 2377.0,
            "51": 2540.0,
            "52": 2598.0,
            "53": 2917.0,
            "54": 2715.0,
            "55": 2436.0,
            "56": 2691.0,
            "57": 2196.0,
            "58": 2875.0,
            "59": 2726.0,
            "60": 2445.0,
            "61": 3031.0,
            "62": 2618.0,
            "63": 2551.0,
            "64": 2939.0,
            "65": 2645.0,
            "66": 3160.0,
            "67": 2729.0,
            "68": 2852.0,
            "69": 2938.0,
            "70": 3337.0,
            "71": 3044.0,
            "72": 2531.0,
            "73": 2918.0,
            "74": 1976.0,
            "75": 2726.0,
            "76": 3036.0,
            "77": 3435.0,
            "78": 3375.0,
            "79": 3221.0,
            "80": 3356.0,
            "81": 3820.0,
            "82": 3203.0,
            "83": 2699.0,
            "84": 3073.0,
            "85": 3336.0,
            "86": 2729.0,
            "87": 3962.0,
            "88": 3062.0,
            "89": 3512.0,
            "90": 3044.0,
            "91": 2957.0,
            "92": 3276.0,
            "93": 2757.0,
            "94": 3568.0,
            "95": 3484.0,
            "96": 3627.0,
            "97": 3229.0,
            "98": 3722.0,
            "99": 3219.0,
            "100": 3467.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 552238592.0,
            "2": 552238592.0,
            "3": 552238592.0,
            "4": 552238592.0,
            "5": 552238592.0,
            "6": 552238592.0,
            "7": 552238592.0,
            "8": 552238592.0,
            "9": 552238592.0,
            "10": 552238592.0,
            "11": 552238592.0,
            "12": 552238592.0,
            "13": 552238592.0,
            "14": 552238592.0,
            "15": 552238592.0,
            "16": 552238592.0,
            "17": 552238592.0,
            "18": 552238592.0,
            "19": 552238592.0,
            "20": 552238592.0,
            "21": 552238592.0,
            "22": 552238592.0,
            "23": 552238592.0,
            "24": 552238592.0,
            "25": 552238592.0,
            "26": 552238592.0,
            "27": 552238592.0,
            "28": 552238592.0,
            "29": 552238592.0,
            "30": 552238592.0,
            "31": 552238592.0,
            "32": 552238592.0,
            "33": 552238592.0,
            "34": 552238592.0,
            "35": 552238592.0,
            "36": 552238592.0,
            "37": 552238592.0,
            "38": 552238592.0,
            "39": 552238592.0,
            "40": 552238592.0,
            "41": 552238592.0,
            "42": 552238592.0,
            "43": 552238592.0,
            "44": 552238592.0,
            "45": 552238592.0,
            "46": 552238592.0,
            "47": 552238592.0,
            "48": 552238592.0,
            "49": 552238592.0,
            "50": 552238592.0,
            "51": 552238592.0,
            "52": 552238592.0,
            "53": 552238592.0,
            "54": 552238592.0,
            "55": 552238592.0,
            "56": 552238592.0,
            "57": 552238592.0,
            "58": 552238592.0,
            "59": 552238592.0,
            "60": 552238592.0,
            "61": 552238592.0,
            "62": 552238592.0,
            "63": 552238592.0,
            "64": 552238592.0,
            "65": 552238592.0,
            "66": 552238592.0,
            "67": 552238592.0,
            "68": 552238592.0,
            "69": 552238592.0,
            "70": 552238592.0,
            "71": 552238592.0,
            "72": 552238592.0,
            "73": 552238592.0,
            "74": 552238592.0,
            "75": 552238592.0,
            "76": 552238592.0,
            "77": 552238592.0,
            "78": 552238592.0,
            "79": 552238592.0,
            "80": 552238592.0,
            "81": 552238592.0,
            "82": 552238592.0,
            "83": 552238592.0,
            "84": 552238592.0,
            "85": 552238592.0,
            "86": 552238592.0,
            "87": 552238592.0,
            "88": 552238592.0,
            "89": 552238592.0,
            "90": 552238592.0,
            "91": 552238592.0,
            "92": 552238592.0,
            "93": 552238592.0,
            "94": 552238592.0,
            "95": 552238592.0,
            "96": 552238592.0,
            "97": 552238592.0,
            "98": 552238592.0,
            "99": 552238592.0,
            "100": 552238592.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 4576563200.0,
            "2": 4673179648.0,
            "3": 4673179648.0,
            "4": 4673179648.0,
            "5": 4673179648.0,
            "6": 4673179648.0,
            "7": 4673179648.0,
            "8": 4673179648.0,
            "9": 4673179648.0,
            "10": 4673179648.0,
            "11": 4673179648.0,
            "12": 4673179648.0,
            "13": 4673179648.0,
            "14": 4673179648.0,
            "15": 4673179648.0,
            "16": 4673179648.0,
            "17": 4673179648.0,
            "18": 4673179648.0,
            "19": 4673179648.0,
            "20": 4673179648.0,
            "21": 4673179648.0,
            "22": 4673179648.0,
            "23": 4673179648.0,
            "24": 4673179648.0,
            "25": 4673179648.0,
            "26": 4673179648.0,
            "27": 4673179648.0,
            "28": 4673179648.0,
            "29": 4673179648.0,
            "30": 4673179648.0,
            "31": 4673179648.0,
            "32": 4673179648.0,
            "33": 4673179648.0,
            "34": 4673179648.0,
            "35": 4673179648.0,
            "36": 4673179648.0,
            "37": 4673179648.0,
            "38": 4673179648.0,
            "39": 4673179648.0,
            "40": 4673179648.0,
            "41": 4673179648.0,
            "42": 4673179648.0,
            "43": 4673179648.0,
            "44": 4673179648.0,
            "45": 4673179648.0,
            "46": 4673179648.0,
            "47": 4673179648.0,
            "48": 4673179648.0,
            "49": 4673179648.0,
            "50": 4673179648.0,
            "51": 4673179648.0,
            "52": 4673179648.0,
            "53": 4673179648.0,
            "54": 4673179648.0,
            "55": 4673179648.0,
            "56": 4673179648.0,
            "57": 4673179648.0,
            "58": 4673179648.0,
            "59": 4673179648.0,
            "60": 4673179648.0,
            "61": 4673179648.0,
            "62": 4673179648.0,
            "63": 4673179648.0,
            "64": 4673179648.0,
            "65": 4673179648.0,
            "66": 4673179648.0,
            "67": 4673179648.0,
            "68": 4673179648.0,
            "69": 4673179648.0,
            "70": 4673179648.0,
            "71": 4673179648.0,
            "72": 4673179648.0,
            "73": 4673179648.0,
            "74": 4673179648.0,
            "75": 4673179648.0,
            "76": 4673179648.0,
            "77": 4673179648.0,
            "78": 4673179648.0,
            "79": 4673179648.0,
            "80": 4673179648.0,
            "81": 4673179648.0,
            "82": 4673179648.0,
            "83": 4673179648.0,
            "84": 4673179648.0,
            "85": 4673179648.0,
            "86": 4673179648.0,
            "87": 4673179648.0,
            "88": 4673179648.0,
            "89": 4673179648.0,
            "90": 4673179648.0,
            "91": 4673179648.0,
            "92": 4673179648.0,
            "93": 4673179648.0,
            "94": 4673179648.0,
            "95": 4673179648.0,
            "96": 4673179648.0,
            "97": 4673179648.0,
            "98": 4673179648.0,
            "99": 4673179648.0,
            "100": 4673179648.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 8.84608,
            "2": 0.1383,
            "3": 0.11074,
            "4": 0.09988,
            "5": 0.09832,
            "6": 0.09852,
            "7": 0.09942,
            "8": 0.09887,
            "9": 0.09982,
            "10": 0.0999,
            "11": 0.32507,
            "12": 0.0997,
            "13": 0.10073,
            "14": 0.09862,
            "15": 0.09903,
            "16": 0.09813,
            "17": 0.09854,
            "18": 0.09827,
            "19": 0.09818,
            "20": 0.09782,
            "21": 0.0976,
            "22": 0.09763,
            "23": 0.09742,
            "24": 0.10007,
            "25": 0.09709,
            "26": 0.10028,
            "27": 0.09967,
            "28": 0.10005,
            "29": 0.09819,
            "30": 0.09782,
            "31": 0.09728,
            "32": 0.09707,
            "33": 0.09712,
            "34": 0.09768,
            "35": 0.09779,
            "36": 0.09761,
            "37": 0.09958,
            "38": 0.09866,
            "39": 0.09784,
            "40": 0.09877,
            "41": 0.09772,
            "42": 0.09833,
            "43": 0.09811,
            "44": 0.09781,
            "45": 0.09781,
            "46": 0.09827,
            "47": 0.09771,
            "48": 0.09763,
            "49": 0.09768,
            "50": 0.09899,
            "51": 0.10947,
            "52": 0.09886,
            "53": 0.09597,
            "54": 0.09838,
            "55": 0.09729,
            "56": 0.09695,
            "57": 0.09961,
            "58": 0.09847,
            "59": 0.09888,
            "60": 0.09635,
            "61": 0.09692,
            "62": 0.0979,
            "63": 0.09738,
            "64": 0.09561,
            "65": 0.0984,
            "66": 0.0969,
            "67": 0.13611,
            "68": 0.09631,
            "69": 0.09564,
            "70": 0.09538,
            "71": 0.09557,
            "72": 0.09548,
            "73": 0.09581,
            "74": 0.09593,
            "75": 0.09489,
            "76": 0.0959,
            "77": 0.09486,
            "78": 0.09568,
            "79": 0.09634,
            "80": 0.09468,
            "81": 0.09589,
            "82": 0.09598,
            "83": 0.09489,
            "84": 0.0954,
            "85": 0.09413,
            "86": 0.09499,
            "87": 0.09424,
            "88": 0.09411,
            "89": 0.09598,
            "90": 0.09549,
            "91": 0.09452,
            "92": 0.09467,
            "93": 0.09619,
            "94": 0.09523,
            "95": 0.09445,
            "96": 0.09426,
            "97": 0.09435,
            "98": 0.09523,
            "99": 0.09534,
            "100": 0.09547
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 1
  --use-distributed-optimizer: true
  --no-ckpt-fully-parallel-save: true
  --async-save: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: ckpt-resume


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.84092, "5": 10.8406, "10": 10.80966, "15": 10.79977, "20": 10.705, "25": 10.53578, "30": 10.35453, "35": 10.27067, "40": 10.0777, "45": 9.82283, "50": 9.90094, "55": 9.86286, "60": 9.48141, "65": 8.93755, "70": 9.7101, "75": 9.40718, "80": 9.38973, "85": 9.5973, "90": 9.8035, "95": 9.5057, "100": 9.38833}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1629.0, "5": 1944.0, "10": 1346.0, "15": 1926.0, "20": 1656.0, "25": 1631.0, "30": 1980.0, "35": 1957.0, "40": 2272.0, "45": 2175.0, "50": 2352.0, "55": 2457.0, "60": 2531.0, "65": 2658.0, "70": 3403.0, "75": 2689.0, "80": 3367.0, "85": 3361.0, "90": 3070.0, "95": 3344.0, "100": 3359.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 552128512.0, "5": 552128512.0, "10": 552128512.0, "15": 552128512.0, "20": 552128512.0, "25": 552128512.0, "30": 552128512.0, "35": 552128512.0, "40": 552128512.0, "45": 552128512.0, "50": 552128512.0, "55": 552128512.0, "60": 552128512.0, "65": 552128512.0, "70": 552128512.0, "75": 552128512.0, "80": 552128512.0, "85": 552128512.0, "90": 552128512.0, "95": 552128512.0, "100": 552128512.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2615097856.0, "5": 2711714304.0, "10": 2711714304.0, "15": 2711714304.0, "20": 2711714304.0, "25": 2711714304.0, "30": 2711714304.0, "35": 2711714304.0, "40": 2711714304.0, "45": 2711714304.0, "50": 2711714304.0, "55": 2711714304.0, "60": 2711714304.0, "65": 2711714304.0, "70": 2711714304.0, "75": 2711714304.0, "80": 2711714304.0, "85": 2711714304.0, "90": 2711714304.0, "95": 2711714304.0, "100": 2711714304.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 4.51458, "5": 0.08834, "10": 0.08908, "15": 0.08862, "20": 0.0883, "25": 0.08398, "30": 0.0842, "35": 0.08382, "40": 0.08246, "45": 0.08273, "50": 0.08218, "55": 0.08388, "60": 0.0838, "65": 0.08378, "70": 0.08313, "75": 0.08326, "80": 0.08337, "85": 0.08292, "90": 0.08315, "95": 0.08304, "100": 0.08314}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.8401, "5": 10.84032, "10": 10.8134, "15": 10.80276, "20": 10.70493, "25": 10.53847, "30": 10.35518, "35": 10.27143, "40": 10.08046, "45": 9.82288, "50": 9.90114, "55": 9.86426, "60": 9.48028, "65": 8.93744, "70": 9.71023, "75": 9.40882, "80": 9.39078, "85": 9.59744, "90": 9.8039, "95": 9.50564, "100": 9.38814}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1670.0, "5": 1970.0, "10": 1397.0, "15": 1886.0, "20": 1785.0, "25": 1695.0, "30": 2086.0, "35": 1976.0, "40": 2349.0, "45": 2240.0, "50": 2338.0, "55": 2364.0, "60": 2474.0, "65": 2762.0, "70": 3207.0, "75": 2625.0, "80": 3502.0, "85": 3356.0, "90": 3142.0, "95": 3385.0, "100": 3449.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 552238592.0, "5": 552238592.0, "10": 552238592.0, "15": 552238592.0, "20": 552238592.0, "25": 552238592.0, "30": 552238592.0, "35": 552238592.0, "40": 552238592.0, "45": 552238592.0, "50": 552238592.0, "55": 552238592.0, "60": 552238592.0, "65": 552238592.0, "70": 552238592.0, "75": 552238592.0, "80": 552238592.0, "85": 552238592.0, "90": 552238592.0, "95": 552238592.0, "100": 552238592.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 4576563200.0, "5": 4673179648.0, "10": 4673179648.0, "15": 4673179648.0, "20": 4673179648.0, "25": 4673179648.0, "30": 4673179648.0, "35": 4673179648.0, "40": 4673179648.0, "45": 4673179648.0, "50": 4673179648.0, "55": 4673179648.0, "60": 4673179648.0, "65": 4673179648.0, "70": 4673179648.0, "75": 4673179648.0, "80": 4673179648.0, "85": 4673179648.0, "90": 4673179648.0, "95": 4673179648.0, "100": 4673179648.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 7.91605, "5": 0.09541, "10": 0.09763, "15": 0.0946, "20": 0.09797, "25": 0.09462, "30": 0.09739, "35": 0.09346, "40": 0.09524, "45": 0.09203, "50": 0.09536, "55": 0.09056, "60": 0.09061, "65": 0.0914, "70": 0.08981, "75": 0.09028, "80": 0.0901, "85": 0.09021, "90": 0.08972, "95": 0.08975, "100": 0.08952}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer_no_mmap_bin_files/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 1
  --use-distributed-optimizer: true
  --no-mmap-bin-files: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-optim-fully-reshardable: true
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: ckpt-resume


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.84092, "5": 10.84059, "10": 10.80969, "15": 10.79981, "20": 10.70508, "25": 10.53597, "30": 10.35474, "35": 10.27091, "40": 10.07792, "45": 9.82303, "50": 9.90109, "55": 9.863, "60": 9.48155, "65": 8.93771, "70": 9.71016, "75": 9.4072, "80": 9.38975, "85": 9.59735, "90": 9.80354, "95": 9.50568, "100": 9.38833}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1628.0, "5": 1937.0, "10": 1325.0, "15": 1928.0, "20": 1690.0, "25": 1720.0, "30": 1981.0, "35": 2007.0, "40": 2370.0, "45": 2150.0, "50": 2270.0, "55": 2482.0, "60": 2561.0, "65": 2622.0, "70": 3347.0, "75": 2722.0, "80": 3404.0, "85": 3237.0, "90": 3184.0, "95": 3387.0, "100": 3350.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1234585088.0, "5": 1234585088.0, "10": 1234585088.0, "15": 1234585088.0, "20": 1234585088.0, "25": 1234585088.0, "30": 1234585088.0, "35": 1234585088.0, "40": 1234585088.0, "45": 1234585088.0, "50": 1234585088.0, "55": 1234585088.0, "60": 1234585088.0, "65": 1234585088.0, "70": 1234585088.0, "75": 1234585088.0, "80": 1234585088.0, "85": 1234585088.0, "90": 1234585088.0, "95": 1234585088.0, "100": 1234585088.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1984492544.0, "5": 2536167424.0, "10": 2536167424.0, "15": 2536167424.0, "20": 2536167424.0, "25": 2536167424.0, "30": 2536167424.0, "35": 2536167424.0, "40": 2536167424.0, "45": 2536167424.0, "50": 2536167424.0, "55": 2536167424.0, "60": 2536167424.0, "65": 2536167424.0, "70": 2536167424.0, "75": 2536167424.0, "80": 2536167424.0, "85": 2536167424.0, "90": 2536167424.0, "95": 2536167424.0, "100": 2536167424.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 6.96415, "5": 0.17071, "10": 0.16774, "15": 0.16914, "20": 0.1621, "25": 0.16314, "30": 0.16135, "35": 0.16212, "40": 0.16087, "45": 0.16224, "50": 0.16232, "55": 0.16188, "60": 0.16245, "65": 0.16278, "70": 0.16186, "75": 0.16193, "80": 0.16131, "85": 0.16191, "90": 0.16103, "95": 0.16207, "100": 0.16103}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.77447,
            "2": 10.78365,
            "3": 10.78346,
            "4": 10.74822,
            "5": 10.81983,
            "6": 10.82303,
            "7": 10.79055,
            "8": 10.77956,
            "9": 10.78595,
            "10": 10.74453,
            "11": 10.83267,
            "12": 10.80426,
            "13": 10.82082,
            "14": 10.82568,
            "15": 10.74205,
            "16": 10.74901,
            "17": 10.72521,
            "18": 10.74178,
            "19": 10.74415,
            "20": 10.63672,
            "21": 10.63053,
            "22": 10.47964,
            "23": 10.65979,
            "24": 10.52478,
            "25": 10.47552,
            "26": 10.54115,
            "27": 10.55498,
            "28": 10.52138,
            "29": 10.53601,
            "30": 10.3053,
            "31": 10.06443,
            "32": 10.41576,
            "33": 10.42199,
            "34": 10.17396,
            "35": 10.22407,
            "36": 10.18503,
            "37": 10.30413,
            "38": 10.14998,
            "39": 10.37038,
            "40": 10.03991,
            "41": 10.1095,
            "42": 10.17936,
            "43": 9.79751,
            "44": 9.90816,
            "45": 9.79806,
            "46": 9.79659,
            "47": 10.1211,
            "48": 9.82086,
            "49": 9.50494,
            "50": 9.88025,
            "51": 9.83617,
            "52": 9.72317,
            "53": 10.05321,
            "54": 9.93744,
            "55": 9.87386,
            "56": 9.60451,
            "57": 9.45231,
            "58": 9.81883,
            "59": 9.57722,
            "60": 9.48536,
            "61": 9.68547,
            "62": 9.97907,
            "63": 9.36417,
            "64": 9.76205,
            "65": 8.94102,
            "66": 9.69479,
            "67": 9.36657,
            "68": 9.77743,
            "69": 9.78996,
            "70": 9.72377,
            "71": 9.62042,
            "72": 9.57421,
            "73": 9.48574,
            "74": 8.92728,
            "75": 9.41652,
            "76": 9.07749,
            "77": 10.05445,
            "78": 9.71913,
            "79": 9.37304,
            "80": 9.40003,
            "81": 9.47846,
            "82": 9.69869,
            "83": 9.31156,
            "84": 9.41458,
            "85": 9.61162,
            "86": 9.07419,
            "87": 9.59392,
            "88": 9.74925,
            "89": 9.59851,
            "90": 9.82763,
            "91": 9.33629,
            "92": 9.35804,
            "93": 9.08549,
            "94": 8.8279,
            "95": 9.53033,
            "96": 9.52662,
            "97": 9.30484,
            "98": 9.67007,
            "99": 8.89604,
            "100": 9.407
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1531.0,
            "2": 1722.0,
            "3": 1589.0,
            "4": 1870.0,
            "5": 1992.0,
            "6": 1894.0,
            "7": 1954.0,
            "8": 1697.0,
            "9": 1855.0,
            "10": 1477.0,
            "11": 1889.0,
            "12": 1848.0,
            "13": 1885.0,
            "14": 1934.0,
            "15": 1984.0,
            "16": 1934.0,
            "17": 1820.0,
            "18": 1643.0,
            "19": 1735.0,
            "20": 1682.0,
            "21": 1974.0,
            "22": 1733.0,
            "23": 1932.0,
            "24": 1650.0,
            "25": 1603.0,
            "26": 1762.0,
            "27": 1846.0,
            "28": 1899.0,
            "29": 2020.0,
            "30": 1941.0,
            "31": 1620.0,
            "32": 1902.0,
            "33": 2053.0,
            "34": 1891.0,
            "35": 1988.0,
            "36": 1990.0,
            "37": 2382.0,
            "38": 2143.0,
            "39": 2445.0,
            "40": 2284.0,
            "41": 2265.0,
            "42": 2272.0,
            "43": 2112.0,
            "44": 2088.0,
            "45": 2332.0,
            "46": 2345.0,
            "47": 2550.0,
            "48": 2419.0,
            "49": 2250.0,
            "50": 2509.0,
            "51": 2708.0,
            "52": 2707.0,
            "53": 2812.0,
            "54": 2620.0,
            "55": 2399.0,
            "56": 2790.0,
            "57": 2301.0,
            "58": 3008.0,
            "59": 2863.0,
            "60": 2465.0,
            "61": 2808.0,
            "62": 2607.0,
            "63": 2442.0,
            "64": 2977.0,
            "65": 2646.0,
            "66": 3061.0,
            "67": 2818.0,
            "68": 2891.0,
            "69": 3036.0,
            "70": 3160.0,
            "71": 3064.0,
            "72": 2618.0,
            "73": 2978.0,
            "74": 2000.0,
            "75": 2580.0,
            "76": 2967.0,
            "77": 3281.0,
            "78": 3131.0,
            "79": 3108.0,
            "80": 3217.0,
            "81": 3614.0,
            "82": 3411.0,
            "83": 2834.0,
            "84": 3191.0,
            "85": 3306.0,
            "86": 2806.0,
            "87": 3808.0,
            "88": 3237.0,
            "89": 3425.0,
            "90": 3202.0,
            "91": 2829.0,
            "92": 3105.0,
            "93": 2882.0,
            "94": 3303.0,
            "95": 3310.0,
            "96": 3499.0,
            "97": 3211.0,
            "98": 3741.0,
            "99": 3167.0,
            "100": 3049.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1260800512.0,
            "2": 1260800512.0,
            "3": 1260800512.0,
            "4": 1260800512.0,
            "5": 1260800512.0,
            "6": 1260800512.0,
            "7": 1260800512.0,
            "8": 1260800512.0,
            "9": 1260800512.0,
            "10": 1260800512.0,
            "11": 1260800512.0,
            "12": 1260800512.0,
            "13": 1260800512.0,
            "14": 1260800512.0,
            "15": 1260800512.0,
            "16": 1260800512.0,
            "17": 1260800512.0,
            "18": 1260800512.0,
            "19": 1260800512.0,
            "20": 1260800512.0,
            "21": 1260800512.0,
            "22": 1260800512.0,
            "23": 1260800512.0,
            "24": 1260800512.0,
            "25": 1260800512.0,
            "26": 1260800512.0,
            "27": 1260800512.0,
            "28": 1260800512.0,
            "29": 1260800512.0,
            "30": 1260800512.0,
            "31": 1260800512.0,
            "32": 1260800512.0,
            "33": 1260800512.0,
            "34": 1260800512.0,
            "35": 1260800512.0,
            "36": 1260800512.0,
            "37": 1260800512.0,
            "38": 1260800512.0,
            "39": 1260800512.0,
            "40": 1260800512.0,
            "41": 1260800512.0,
            "42": 1260800512.0,
            "43": 1260800512.0,
            "44": 1260800512.0,
            "45": 1260800512.0,
            "46": 1260800512.0,
            "47": 1260800512.0,
            "48": 1260800512.0,
            "49": 1260800512.0,
            "50": 1260800512.0,
            "51": 1260800512.0,
            "52": 1260800512.0,
            "53": 1260800512.0,
            "54": 1260800512.0,
            "55": 1260800512.0,
            "56": 1260800512.0,
            "57": 1260800512.0,
            "58": 1260800512.0,
            "59": 1260800512.0,
            "60": 1260800512.0,
            "61": 1260800512.0,
            "62": 1260800512.0,
            "63": 1260800512.0,
            "64": 1260800512.0,
            "65": 1260800512.0,
            "66": 1260800512.0,
            "67": 1260800512.0,
            "68": 1260800512.0,
            "69": 1260800512.0,
            "70": 1260800512.0,
            "71": 1260800512.0,
            "72": 1260800512.0,
            "73": 1260800512.0,
            "74": 1260800512.0,
            "75": 1260800512.0,
            "76": 1260800512.0,
            "77": 1260800512.0,
            "78": 1260800512.0,
            "79": 1260800512.0,
            "80": 1260800512.0,
            "81": 1260800512.0,
            "82": 1260800512.0,
            "83": 1260800512.0,
            "84": 1260800512.0,
            "85": 1260800512.0,
            "86": 1260800512.0,
            "87": 1260800512.0,
            "88": 1260800512.0,
            "89": 1260800512.0,
            "90": 1260800512.0,
            "91": 1260800512.0,
            "92": 1260800512.0,
            "93": 1260800512.0,
            "94": 1260800512.0,
            "95": 1260800512.0,
            "96": 1260800512.0,
            "97": 1260800512.0,
            "98": 1260800512.0,
            "99": 1260800512.0,
            "100": 1260800512.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2013853696.0,
            "2": 2562382848.0,
            "3": 2562382848.0,
            "4": 2562382848.0,
            "5": 2562382848.0,
            "6": 2562382848.0,
            "7": 2562382848.0,
            "8": 2562382848.0,
            "9": 2562382848.0,
            "10": 2562382848.0,
            "11": 2562382848.0,
            "12": 2562382848.0,
            "13": 2562382848.0,
            "14": 2562382848.0,
            "15": 2562382848.0,
            "16": 2562382848.0,
            "17": 2562382848.0,
            "18": 2562382848.0,
            "19": 2562382848.0,
            "20": 2562382848.0,
            "21": 2562382848.0,
            "22": 2562382848.0,
            "23": 2562382848.0,
            "24": 2562382848.0,
            "25": 2562382848.0,
            "26": 2562382848.0,
            "27": 2562382848.0,
            "28": 2562382848.0,
            "29": 2562382848.0,
            "30": 2562382848.0,
            "31": 2562382848.0,
            "32": 2562382848.0,
            "33": 2562382848.0,
            "34": 2562382848.0,
            "35": 2562382848.0,
            "36": 2562382848.0,
            "37": 2562382848.0,
            "38": 2562382848.0,
            "39": 2562382848.0,
            "40": 2562382848.0,
            "41": 2562382848.0,
            "42": 2562382848.0,
            "43": 2562382848.0,
            "44": 2562382848.0,
            "45": 2562382848.0,
            "46": 2562382848.0,
            "47": 2562382848.0,
            "48": 2562382848.0,
            "49": 2562382848.0,
            "50": 2562382848.0,
            "51": 2562382848.0,
            "52": 2562382848.0,
            "53": 2562382848.0,
            "54": 2562382848.0,
            "55": 2562382848.0,
            "56": 2562382848.0,
            "57": 2562382848.0,
            "58": 2562382848.0,
            "59": 2562382848.0,
            "60": 2562382848.0,
            "61": 2562382848.0,
            "62": 2562382848.0,
            "63": 2562382848.0,
            "64": 2562382848.0,
            "65": 2562382848.0,
            "66": 2562382848.0,
            "67": 2562382848.0,
            "68": 2562382848.0,
            "69": 2562382848.0,
            "70": 2562382848.0,
            "71": 2562382848.0,
            "72": 2562382848.0,
            "73": 2562382848.0,
            "74": 2562382848.0,
            "75": 2562382848.0,
            "76": 2562382848.0,
            "77": 2562382848.0,
            "78": 2562382848.0,
            "79": 2562382848.0,
            "80": 2562382848.0,
            "81": 2562382848.0,
            "82": 2562382848.0,
            "83": 2562382848.0,
            "84": 2562382848.0,
            "85": 2562382848.0,
            "86": 2562382848.0,
            "87": 2562382848.0,
            "88": 2562382848.0,
            "89": 2562382848.0,
            "90": 2562382848.0,
            "91": 2562382848.0,
            "92": 2562382848.0,
            "93": 2562382848.0,
            "94": 2562382848.0,
            "95": 2562382848.0,
            "96": 2562382848.0,
            "97": 2562382848.0,
            "98": 2562382848.0,
            "99": 2562382848.0,
            "100": 2562382848.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 2.02256,
            "3": 0.13455,
            "4": 0.12293,
            "5": 0.12302,
            "6": 0.1233,
            "7": 0.12328,
            "8": 0.12248,
            "9": 0.12446,
            "10": 0.12285,
            "11": 0.12255,
            "12": 0.12296,
            "13": 0.12411,
            "14": 0.12369,
            "15": 0.12438,
            "16": 0.12387,
            "17": 0.12481,
            "18": 0.12591,
            "19": 0.12445,
            "20": 0.12257,
            "21": 0.12141,
            "22": 0.12289,
            "23": 0.12296,
            "24": 0.12246,
            "25": 0.12246,
            "26": 0.12219,
            "27": 0.12283,
            "28": 0.12209,
            "29": 0.12164,
            "30": 0.12236,
            "31": 0.1236,
            "32": 0.12251,
            "33": 0.12372,
            "34": 0.12054,
            "35": 0.12166,
            "36": 0.12052,
            "37": 0.12268,
            "38": 0.12181,
            "39": 0.12231,
            "40": 0.1195,
            "41": 0.12001,
            "42": 0.12145,
            "43": 0.12238,
            "44": 0.12054,
            "45": 0.11842,
            "46": 0.11812,
            "47": 0.11785,
            "48": 0.11631,
            "49": 0.11798,
            "50": 0.11707,
            "51": 0.12234,
            "52": 0.11424,
            "53": 0.11577,
            "54": 0.11058,
            "55": 0.11651,
            "56": 0.12356,
            "57": 0.12837,
            "58": 0.1238,
            "59": 0.13093,
            "60": 0.14556,
            "61": 0.1747,
            "62": 0.14486,
            "63": 0.15679,
            "64": 0.14116,
            "65": 0.13574,
            "66": 0.16023,
            "67": 0.14862,
            "68": 0.14163,
            "69": 0.14244,
            "70": 0.13512,
            "71": 0.1407,
            "72": 0.14689,
            "73": 0.13238,
            "74": 0.1279,
            "75": 0.12535,
            "76": 0.12172,
            "77": 0.12314,
            "78": 0.12089,
            "79": 0.11925,
            "80": 0.11854,
            "81": 0.11618,
            "82": 0.11706,
            "83": 0.11632,
            "84": 0.11839,
            "85": 0.11763,
            "86": 0.11977,
            "87": 0.12013,
            "88": 0.11954,
            "89": 0.11859,
            "90": 0.11546,
            "91": 0.11494,
            "92": 0.1154,
            "93": 0.11866,
            "94": 0.25826,
            "95": 0.11359,
            "96": 0.11427,
            "97": 0.11526,
            "98": 0.11269,
            "99": 0.11239,
            "100": 0.11374
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_gb200_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.83617,
            "52": 9.72317,
            "53": 10.05321,
            "54": 9.93744,
            "55": 9.87386,
            "56": 9.60451,
            "57": 9.45231,
            "58": 9.81883,
            "59": 9.57722,
            "60": 9.48536,
            "61": 9.68547,
            "62": 9.97907,
            "63": 9.36417,
            "64": 9.76205,
            "65": 8.94102,
            "66": 9.69479,
            "67": 9.36657,
            "68": 9.77743,
            "69": 9.78996,
            "70": 9.72377,
            "71": 9.62042,
            "72": 9.57421,
            "73": 9.48574,
            "74": 8.92728,
            "75": 9.41652,
            "76": 9.07749,
            "77": 10.05445,
            "78": 9.71913,
            "79": 9.37304,
            "80": 9.40003,
            "81": 9.47846,
            "82": 9.69869,
            "83": 9.31156,
            "84": 9.41458,
            "85": 9.61162,
            "86": 9.07419,
            "87": 9.59392,
            "88": 9.74925,
            "89": 9.59851,
            "90": 9.82763,
            "91": 9.33629,
            "92": 9.35804,
            "93": 9.08549,
            "94": 8.8279,
            "95": 9.53033,
            "96": 9.52662,
            "97": 9.30484,
            "98": 9.67007,
            "99": 8.89604,
            "100": 9.407
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2708.0,
            "52": 2707.0,
            "53": 2812.0,
            "54": 2620.0,
            "55": 2399.0,
            "56": 2790.0,
            "57": 2301.0,
            "58": 3008.0,
            "59": 2863.0,
            "60": 2465.0,
            "61": 2808.0,
            "62": 2607.0,
            "63": 2442.0,
            "64": 2977.0,
            "65": 2646.0,
            "66": 3061.0,
            "67": 2818.0,
            "68": 2891.0,
            "69": 3036.0,
            "70": 3160.0,
            "71": 3064.0,
            "72": 2618.0,
            "73": 2978.0,
            "74": 2000.0,
            "75": 2580.0,
            "76": 2967.0,
            "77": 3281.0,
            "78": 3131.0,
            "79": 3108.0,
            "80": 3217.0,
            "81": 3614.0,
            "82": 3411.0,
            "83": 2834.0,
            "84": 3191.0,
            "85": 3306.0,
            "86": 2806.0,
            "87": 3808.0,
            "88": 3237.0,
            "89": 3425.0,
            "90": 3202.0,
            "91": 2829.0,
            "92": 3105.0,
            "93": 2882.0,
            "94": 3303.0,
            "95": 3310.0,
            "96": 3499.0,
            "97": 3211.0,
            "98": 3741.0,
            "99": 3167.0,
            "100": 3049.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 1261849088.0,
            "52": 1261849088.0,
            "53": 1261849088.0,
            "54": 1261849088.0,
            "55": 1261849088.0,
            "56": 1261849088.0,
            "57": 1261849088.0,
            "58": 1261849088.0,
            "59": 1261849088.0,
            "60": 1261849088.0,
            "61": 1261849088.0,
            "62": 1261849088.0,
            "63": 1261849088.0,
            "64": 1261849088.0,
            "65": 1261849088.0,
            "66": 1261849088.0,
            "67": 1261849088.0,
            "68": 1261849088.0,
            "69": 1261849088.0,
            "70": 1261849088.0,
            "71": 1261849088.0,
            "72": 1261849088.0,
            "73": 1261849088.0,
            "74": 1261849088.0,
            "75": 1261849088.0,
            "76": 1261849088.0,
            "77": 1261849088.0,
            "78": 1261849088.0,
            "79": 1261849088.0,
            "80": 1261849088.0,
            "81": 1261849088.0,
            "82": 1261849088.0,
            "83": 1261849088.0,
            "84": 1261849088.0,
            "85": 1261849088.0,
            "86": 1261849088.0,
            "87": 1261849088.0,
            "88": 1261849088.0,
            "89": 1261849088.0,
            "90": 1261849088.0,
            "91": 1261849088.0,
            "92": 1261849088.0,
            "93": 1261849088.0,
            "94": 1261849088.0,
            "95": 1261849088.0,
            "96": 1261849088.0,
            "97": 1261849088.0,
            "98": 1261849088.0,
            "99": 1261849088.0,
            "100": 1261849088.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2530924544.0,
            "52": 2564480000.0,
            "53": 2564480000.0,
            "54": 2564480000.0,
            "55": 2564480000.0,
            "56": 2564480000.0,
            "57": 2564480000.0,
            "58": 2564480000.0,
            "59": 2564480000.0,
            "60": 2564480000.0,
            "61": 2564480000.0,
            "62": 2564480000.0,
            "63": 2564480000.0,
            "64": 2564480000.0,
            "65": 2564480000.0,
            "66": 2564480000.0,
            "67": 2564480000.0,
            "68": 2564480000.0,
            "69": 2564480000.0,
            "70": 2564480000.0,
            "71": 2564480000.0,
            "72": 2564480000.0,
            "73": 2564480000.0,
            "74": 2564480000.0,
            "75": 2564480000.0,
            "76": 2564480000.0,
            "77": 2564480000.0,
            "78": 2564480000.0,
            "79": 2564480000.0,
            "80": 2564480000.0,
            "81": 2564480000.0,
            "82": 2564480000.0,
            "83": 2564480000.0,
            "84": 2564480000.0,
            "85": 2564480000.0,
            "86": 2564480000.0,
            "87": 2564480000.0,
            "88": 2564480000.0,
            "89": 2564480000.0,
            "90": 2564480000.0,
            "91": 2564480000.0,
            "92": 2564480000.0,
            "93": 2564480000.0,
            "94": 2564480000.0,
            "95": 2564480000.0,
            "96": 2564480000.0,
            "97": 2564480000.0,
            "98": 2564480000.0,
            "99": 2564480000.0,
            "100": 2564480000.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": "nan",
            "52": 1.73474,
            "53": 0.13329,
            "54": 0.12193,
            "55": 0.12308,
            "56": 0.12634,
            "57": 0.12745,
            "58": 0.12425,
            "59": 0.12199,
            "60": 0.12359,
            "61": 0.11982,
            "62": 0.12161,
            "63": 0.11993,
            "64": 0.12221,
            "65": 0.12364,
            "66": 0.12245,
            "67": 0.12126,
            "68": 0.12211,
            "69": 0.11961,
            "70": 0.12166,
            "71": 0.11999,
            "72": 0.12512,
            "73": 0.12157,
            "74": 0.11996,
            "75": 0.12183,
            "76": 0.11982,
            "77": 0.1205,
            "78": 0.12225,
            "79": 0.12245,
            "80": 0.12222,
            "81": 0.12087,
            "82": 0.11834,
            "83": 0.11849,
            "84": 0.11754,
            "85": 0.1168,
            "86": 0.11739,
            "87": 0.11786,
            "88": 0.1178,
            "89": 0.11801,
            "90": 0.11757,
            "91": 0.11668,
            "92": 0.11659,
            "93": 0.11656,
            "94": 0.11741,
            "95": 0.11613,
            "96": 0.11801,
            "97": 0.11735,
            "98": 0.1168,
            "99": 0.11724,
            "100": 0.1175
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.89631,
            "2": 10.89416,
            "3": 10.88785,
            "4": 10.89141,
            "5": 10.89153,
            "6": 10.90002,
            "7": 10.89187,
            "8": 10.89886,
            "9": 10.90211,
            "10": 10.88366,
            "11": 10.87818,
            "12": 10.89332,
            "13": 10.89815,
            "14": 10.89244,
            "15": 10.84802,
            "16": 10.85399,
            "17": 10.83096,
            "18": 10.83988,
            "19": 10.82801,
            "20": 10.74826,
            "21": 10.73495,
            "22": 10.61721,
            "23": 10.7262,
            "24": 10.6318,
            "25": 10.59315,
            "26": 10.63366,
            "27": 10.63302,
            "28": 10.58261,
            "29": 10.58594,
            "30": 10.41204,
            "31": 10.15907,
            "32": 10.48364,
            "33": 10.46706,
            "34": 10.23815,
            "35": 10.28192,
            "36": 10.24053,
            "37": 10.36222,
            "38": 10.20308,
            "39": 10.40454,
            "40": 10.09271,
            "41": 10.15834,
            "42": 10.21931,
            "43": 9.84356,
            "44": 9.97301,
            "45": 9.84195,
            "46": 9.82013,
            "47": 10.14966,
            "48": 9.86021,
            "49": 9.54237,
            "50": 9.91349,
            "51": 9.85446,
            "52": 9.73932,
            "53": 10.07424,
            "54": 9.96915,
            "55": 9.8857,
            "56": 9.62439,
            "57": 9.48231,
            "58": 9.83487,
            "59": 9.58731,
            "60": 9.50244,
            "61": 9.69338,
            "62": 9.98802,
            "63": 9.39104,
            "64": 9.78023,
            "65": 8.94515,
            "66": 9.70492,
            "67": 9.37249,
            "68": 9.78334,
            "69": 9.79056,
            "70": 9.74448,
            "71": 9.62302,
            "72": 9.58456,
            "73": 9.50509,
            "74": 8.94304,
            "75": 9.42523,
            "76": 9.07599,
            "77": 10.06351,
            "78": 9.72308,
            "79": 9.37501,
            "80": 9.40453,
            "81": 9.47789,
            "82": 9.69668,
            "83": 9.30714,
            "84": 9.41527,
            "85": 9.61294,
            "86": 9.07193,
            "87": 9.58839,
            "88": 9.7476,
            "89": 9.59981,
            "90": 9.81672,
            "91": 9.33789,
            "92": 9.35604,
            "93": 9.07423,
            "94": 8.83512,
            "95": 9.51842,
            "96": 9.52392,
            "97": 9.30921,
            "98": 9.66748,
            "99": 8.88417,
            "100": 9.39923
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1524.0,
            "2": 1653.0,
            "3": 1732.0,
            "4": 1794.0,
            "5": 1835.0,
            "6": 1904.0,
            "7": 1919.0,
            "8": 1747.0,
            "9": 1860.0,
            "10": 1363.0,
            "11": 1886.0,
            "12": 1814.0,
            "13": 2010.0,
            "14": 1805.0,
            "15": 1895.0,
            "16": 1925.0,
            "17": 1797.0,
            "18": 1687.0,
            "19": 1794.0,
            "20": 1640.0,
            "21": 1870.0,
            "22": 1691.0,
            "23": 2048.0,
            "24": 1689.0,
            "25": 1674.0,
            "26": 1851.0,
            "27": 1918.0,
            "28": 2004.0,
            "29": 2002.0,
            "30": 1974.0,
            "31": 1552.0,
            "32": 1919.0,
            "33": 2072.0,
            "34": 1877.0,
            "35": 2014.0,
            "36": 1917.0,
            "37": 2380.0,
            "38": 2217.0,
            "39": 2340.0,
            "40": 2334.0,
            "41": 2432.0,
            "42": 2292.0,
            "43": 2056.0,
            "44": 2175.0,
            "45": 2177.0,
            "46": 2317.0,
            "47": 2577.0,
            "48": 2421.0,
            "49": 2154.0,
            "50": 2463.0,
            "51": 2625.0,
            "52": 2529.0,
            "53": 2937.0,
            "54": 2770.0,
            "55": 2449.0,
            "56": 2668.0,
            "57": 2348.0,
            "58": 3012.0,
            "59": 2766.0,
            "60": 2323.0,
            "61": 2868.0,
            "62": 2665.0,
            "63": 2375.0,
            "64": 2910.0,
            "65": 2634.0,
            "66": 2945.0,
            "67": 2770.0,
            "68": 2758.0,
            "69": 2839.0,
            "70": 3090.0,
            "71": 2928.0,
            "72": 2473.0,
            "73": 2991.0,
            "74": 2012.0,
            "75": 2569.0,
            "76": 2953.0,
            "77": 3333.0,
            "78": 3200.0,
            "79": 3040.0,
            "80": 3305.0,
            "81": 3551.0,
            "82": 3269.0,
            "83": 2808.0,
            "84": 3261.0,
            "85": 3369.0,
            "86": 2708.0,
            "87": 3551.0,
            "88": 3135.0,
            "89": 3206.0,
            "90": 3087.0,
            "91": 2653.0,
            "92": 3131.0,
            "93": 2676.0,
            "94": 3310.0,
            "95": 3393.0,
            "96": 3418.0,
            "97": 3229.0,
            "98": 3725.0,
            "99": 3049.0,
            "100": 3170.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1259751936.0,
            "2": 1259751936.0,
            "3": 1259751936.0,
            "4": 1259751936.0,
            "5": 1259751936.0,
            "6": 1259751936.0,
            "7": 1259751936.0,
            "8": 1259751936.0,
            "9": 1259751936.0,
            "10": 1259751936.0,
            "11": 1259751936.0,
            "12": 1259751936.0,
            "13": 1259751936.0,
            "14": 1259751936.0,
            "15": 1259751936.0,
            "16": 1259751936.0,
            "17": 1259751936.0,
            "18": 1259751936.0,
            "19": 1259751936.0,
            "20": 1259751936.0,
            "21": 1259751936.0,
            "22": 1259751936.0,
            "23": 1259751936.0,
            "24": 1259751936.0,
            "25": 1259751936.0,
            "26": 1259751936.0,
            "27": 1259751936.0,
            "28": 1259751936.0,
            "29": 1259751936.0,
            "30": 1259751936.0,
            "31": 1259751936.0,
            "32": 1259751936.0,
            "33": 1259751936.0,
            "34": 1259751936.0,
            "35": 1259751936.0,
            "36": 1259751936.0,
            "37": 1259751936.0,
            "38": 1259751936.0,
            "39": 1259751936.0,
            "40": 1259751936.0,
            "41": 1259751936.0,
            "42": 1259751936.0,
            "43": 1259751936.0,
            "44": 1259751936.0,
            "45": 1259751936.0,
            "46": 1259751936.0,
            "47": 1259751936.0,
            "48": 1259751936.0,
            "49": 1259751936.0,
            "50": 1259751936.0,
            "51": 1259751936.0,
            "52": 1259751936.0,
            "53": 1259751936.0,
            "54": 1259751936.0,
            "55": 1259751936.0,
            "56": 1259751936.0,
            "57": 1259751936.0,
            "58": 1259751936.0,
            "59": 1259751936.0,
            "60": 1259751936.0,
            "61": 1259751936.0,
            "62": 1259751936.0,
            "63": 1259751936.0,
            "64": 1259751936.0,
            "65": 1259751936.0,
            "66": 1259751936.0,
            "67": 1259751936.0,
            "68": 1259751936.0,
            "69": 1259751936.0,
            "70": 1259751936.0,
            "71": 1259751936.0,
            "72": 1259751936.0,
            "73": 1259751936.0,
            "74": 1259751936.0,
            "75": 1259751936.0,
            "76": 1259751936.0,
            "77": 1259751936.0,
            "78": 1259751936.0,
            "79": 1259751936.0,
            "80": 1259751936.0,
            "81": 1259751936.0,
            "82": 1259751936.0,
            "83": 1259751936.0,
            "84": 1259751936.0,
            "85": 1259751936.0,
            "86": 1259751936.0,
            "87": 1259751936.0,
            "88": 1259751936.0,
            "89": 1259751936.0,
            "90": 1259751936.0,
            "91": 1259751936.0,
            "92": 1259751936.0,
            "93": 1259751936.0,
            "94": 1259751936.0,
            "95": 1259751936.0,
            "96": 1259751936.0,
            "97": 1259751936.0,
            "98": 1259751936.0,
            "99": 1259751936.0,
            "100": 1259751936.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2013853696.0,
            "2": 2561334272.0,
            "3": 2561334272.0,
            "4": 2561334272.0,
            "5": 2561334272.0,
            "6": 2561334272.0,
            "7": 2561334272.0,
            "8": 2561334272.0,
            "9": 2561334272.0,
            "10": 2561334272.0,
            "11": 2561334272.0,
            "12": 2561334272.0,
            "13": 2561334272.0,
            "14": 2561334272.0,
            "15": 2561334272.0,
            "16": 2561334272.0,
            "17": 2561334272.0,
            "18": 2561334272.0,
            "19": 2561334272.0,
            "20": 2561334272.0,
            "21": 2561334272.0,
            "22": 2561334272.0,
            "23": 2561334272.0,
            "24": 2561334272.0,
            "25": 2561334272.0,
            "26": 2561334272.0,
            "27": 2561334272.0,
            "28": 2561334272.0,
            "29": 2561334272.0,
            "30": 2561334272.0,
            "31": 2561334272.0,
            "32": 2561334272.0,
            "33": 2561334272.0,
            "34": 2561334272.0,
            "35": 2561334272.0,
            "36": 2561334272.0,
            "37": 2561334272.0,
            "38": 2561334272.0,
            "39": 2561334272.0,
            "40": 2561334272.0,
            "41": 2561334272.0,
            "42": 2561334272.0,
            "43": 2561334272.0,
            "44": 2561334272.0,
            "45": 2561334272.0,
            "46": 2561334272.0,
            "47": 2561334272.0,
            "48": 2561334272.0,
            "49": 2561334272.0,
            "50": 2561334272.0,
            "51": 2561334272.0,
            "52": 2561334272.0,
            "53": 2561334272.0,
            "54": 2561334272.0,
            "55": 2561334272.0,
            "56": 2561334272.0,
            "57": 2561334272.0,
            "58": 2561334272.0,
            "59": 2561334272.0,
            "60": 2561334272.0,
            "61": 2561334272.0,
            "62": 2561334272.0,
            "63": 2561334272.0,
            "64": 2561334272.0,
            "65": 2561334272.0,
            "66": 2561334272.0,
            "67": 2561334272.0,
            "68": 2561334272.0,
            "69": 2561334272.0,
            "70": 2561334272.0,
            "71": 2561334272.0,
            "72": 2561334272.0,
            "73": 2561334272.0,
            "74": 2561334272.0,
            "75": 2561334272.0,
            "76": 2561334272.0,
            "77": 2561334272.0,
            "78": 2561334272.0,
            "79": 2561334272.0,
            "80": 2561334272.0,
            "81": 2561334272.0,
            "82": 2561334272.0,
            "83": 2561334272.0,
            "84": 2561334272.0,
            "85": 2561334272.0,
            "86": 2561334272.0,
            "87": 2561334272.0,
            "88": 2561334272.0,
            "89": 2561334272.0,
            "90": 2561334272.0,
            "91": 2561334272.0,
            "92": 2561334272.0,
            "93": 2561334272.0,
            "94": 2561334272.0,
            "95": 2561334272.0,
            "96": 2561334272.0,
            "97": 2561334272.0,
            "98": 2561334272.0,
            "99": 2561334272.0,
            "100": 2561334272.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 2.86901,
            "3": 0.0951,
            "4": 0.07664,
            "5": 0.07727,
            "6": 0.07707,
            "7": 0.07716,
            "8": 0.07667,
            "9": 0.07695,
            "10": 0.0768,
            "11": 0.07613,
            "12": 0.07687,
            "13": 0.07608,
            "14": 0.07715,
            "15": 0.07689,
            "16": 0.07617,
            "17": 0.07691,
            "18": 0.07603,
            "19": 0.07642,
            "20": 0.07617,
            "21": 0.0765,
            "22": 0.07638,
            "23": 0.07673,
            "24": 0.07615,
            "25": 0.07629,
            "26": 0.07589,
            "27": 0.07678,
            "28": 0.07588,
            "29": 0.07579,
            "30": 0.07618,
            "31": 0.07686,
            "32": 0.0755,
            "33": 0.07624,
            "34": 0.07587,
            "35": 0.07656,
            "36": 0.07558,
            "37": 0.07674,
            "38": 0.07619,
            "39": 0.07638,
            "40": 0.07629,
            "41": 0.07916,
            "42": 0.07708,
            "43": 0.07736,
            "44": 0.07658,
            "45": 0.07694,
            "46": 0.07639,
            "47": 0.0787,
            "48": 0.0777,
            "49": 0.07677,
            "50": 0.07678,
            "51": 0.08142,
            "52": 0.07793,
            "53": 0.07688,
            "54": 0.0771,
            "55": 0.07682,
            "56": 0.07717,
            "57": 0.07712,
            "58": 0.07637,
            "59": 0.07821,
            "60": 0.07756,
            "61": 0.07793,
            "62": 0.07875,
            "63": 0.07625,
            "64": 0.0775,
            "65": 0.07689,
            "66": 0.07666,
            "67": 0.07699,
            "68": 0.0763,
            "69": 0.0766,
            "70": 0.07706,
            "71": 0.07676,
            "72": 0.07816,
            "73": 0.07645,
            "74": 0.07713,
            "75": 0.07675,
            "76": 0.07795,
            "77": 0.07688,
            "78": 0.07657,
            "79": 0.07815,
            "80": 0.07702,
            "81": 0.07682,
            "82": 0.07647,
            "83": 0.07736,
            "84": 0.07681,
            "85": 0.07718,
            "86": 0.07666,
            "87": 0.07732,
            "88": 0.07639,
            "89": 0.07691,
            "90": 0.07624,
            "91": 0.07703,
            "92": 0.0762,
            "93": 0.0767,
            "94": 0.07746,
            "95": 0.07723,
            "96": 0.07629,
            "97": 0.07689,
            "98": 0.07615,
            "99": 0.0774,
            "100": 0.07615
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.8545,
            "52": 9.7393,
            "53": 10.07426,
            "54": 9.96913,
            "55": 9.88574,
            "56": 9.62438,
            "57": 9.48229,
            "58": 9.83484,
            "59": 9.58731,
            "60": 9.50243,
            "61": 9.6934,
            "62": 9.988,
            "63": 9.39105,
            "64": 9.78022,
            "65": 8.94516,
            "66": 9.70492,
            "67": 9.37249,
            "68": 9.78328,
            "69": 9.79057,
            "70": 9.74451,
            "71": 9.62298,
            "72": 9.58457,
            "73": 9.50511,
            "74": 8.94308,
            "75": 9.42524,
            "76": 9.07602,
            "77": 10.06352,
            "78": 9.72307,
            "79": 9.37497,
            "80": 9.40454,
            "81": 9.4779,
            "82": 9.69669,
            "83": 9.30714,
            "84": 9.41525,
            "85": 9.61295,
            "86": 9.07198,
            "87": 9.58834,
            "88": 9.7476,
            "89": 9.59984,
            "90": 9.81672,
            "91": 9.33791,
            "92": 9.35608,
            "93": 9.07423,
            "94": 8.83511,
            "95": 9.51841,
            "96": 9.52391,
            "97": 9.30922,
            "98": 9.66746,
            "99": 8.88421,
            "100": 9.39923
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2543.0,
            "52": 2613.0,
            "53": 2945.0,
            "54": 2713.0,
            "55": 2503.0,
            "56": 2692.0,
            "57": 2338.0,
            "58": 2961.0,
            "59": 2620.0,
            "60": 2367.0,
            "61": 2909.0,
            "62": 2728.0,
            "63": 2399.0,
            "64": 2909.0,
            "65": 2605.0,
            "66": 2983.0,
            "67": 2793.0,
            "68": 2663.0,
            "69": 2833.0,
            "70": 3135.0,
            "71": 2997.0,
            "72": 2464.0,
            "73": 3088.0,
            "74": 1970.0,
            "75": 2556.0,
            "76": 3064.0,
            "77": 3231.0,
            "78": 3097.0,
            "79": 3035.0,
            "80": 3301.0,
            "81": 3599.0,
            "82": 3215.0,
            "83": 2757.0,
            "84": 3130.0,
            "85": 3380.0,
            "86": 2742.0,
            "87": 3723.0,
            "88": 3066.0,
            "89": 3264.0,
            "90": 3198.0,
            "91": 2718.0,
            "92": 3070.0,
            "93": 2624.0,
            "94": 3301.0,
            "95": 3431.0,
            "96": 3358.0,
            "97": 3142.0,
            "98": 3704.0,
            "99": 3107.0,
            "100": 3089.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 1261849088.0,
            "52": 1261849088.0,
            "53": 1261849088.0,
            "54": 1261849088.0,
            "55": 1261849088.0,
            "56": 1261849088.0,
            "57": 1261849088.0,
            "58": 1261849088.0,
            "59": 1261849088.0,
            "60": 1261849088.0,
            "61": 1261849088.0,
            "62": 1261849088.0,
            "63": 1261849088.0,
            "64": 1261849088.0,
            "65": 1261849088.0,
            "66": 1261849088.0,
            "67": 1261849088.0,
            "68": 1261849088.0,
            "69": 1261849088.0,
            "70": 1261849088.0,
            "71": 1261849088.0,
            "72": 1261849088.0,
            "73": 1261849088.0,
            "74": 1261849088.0,
            "75": 1261849088.0,
            "76": 1261849088.0,
            "77": 1261849088.0,
            "78": 1261849088.0,
            "79": 1261849088.0,
            "80": 1261849088.0,
            "81": 1261849088.0,
            "82": 1261849088.0,
            "83": 1261849088.0,
            "84": 1261849088.0,
            "85": 1261849088.0,
            "86": 1261849088.0,
            "87": 1261849088.0,
            "88": 1261849088.0,
            "89": 1261849088.0,
            "90": 1261849088.0,
            "91": 1261849088.0,
            "92": 1261849088.0,
            "93": 1261849088.0,
            "94": 1261849088.0,
            "95": 1261849088.0,
            "96": 1261849088.0,
            "97": 1261849088.0,
            "98": 1261849088.0,
            "99": 1261849088.0,
            "100": 1261849088.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2530924544.0,
            "52": 2564480000.0,
            "53": 2564480000.0,
            "54": 2564480000.0,
            "55": 2564480000.0,
            "56": 2564480000.0,
            "57": 2564480000.0,
            "58": 2564480000.0,
            "59": 2564480000.0,
            "60": 2564480000.0,
            "61": 2564480000.0,
            "62": 2564480000.0,
            "63": 2564480000.0,
            "64": 2564480000.0,
            "65": 2564480000.0,
            "66": 2564480000.0,
            "67": 2564480000.0,
            "68": 2564480000.0,
            "69": 2564480000.0,
            "70": 2564480000.0,
            "71": 2564480000.0,
            "72": 2564480000.0,
            "73": 2564480000.0,
            "74": 2564480000.0,
            "75": 2564480000.0,
            "76": 2564480000.0,
            "77": 2564480000.0,
            "78": 2564480000.0,
            "79": 2564480000.0,
            "80": 2564480000.0,
            "81": 2564480000.0,
            "82": 2564480000.0,
            "83": 2564480000.0,
            "84": 2564480000.0,
            "85": 2564480000.0,
            "86": 2564480000.0,
            "87": 2564480000.0,
            "88": 2564480000.0,
            "89": 2564480000.0,
            "90": 2564480000.0,
            "91": 2564480000.0,
            "92": 2564480000.0,
            "93": 2564480000.0,
            "94": 2564480000.0,
            "95": 2564480000.0,
            "96": 2564480000.0,
            "97": 2564480000.0,
            "98": 2564480000.0,
            "99": 2564480000.0,
            "100": 2564480000.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 3.87745,
            "52": 0.09791,
            "53": 0.07996,
            "54": 0.07698,
            "55": 0.07921,
            "56": 0.07768,
            "57": 0.07938,
            "58": 0.077,
            "59": 0.0799,
            "60": 0.07696,
            "61": 0.07996,
            "62": 0.07691,
            "63": 0.08005,
            "64": 0.0814,
            "65": 0.07853,
            "66": 0.07696,
            "67": 0.07866,
            "68": 0.07694,
            "69": 0.07801,
            "70": 0.07717,
            "71": 0.07878,
            "72": 0.07724,
            "73": 0.18173,
            "74": 0.09573,
            "75": 0.07905,
            "76": 0.0777,
            "77": 0.07736,
            "78": 0.08065,
            "79": 0.07839,
            "80": 0.08069,
            "81": 0.0787,
            "82": 0.07798,
            "83": 0.08482,
            "84": 0.07927,
            "85": 0.08138,
            "86": 0.08293,
            "87": 0.08143,
            "88": 0.07796,
            "89": 0.07668,
            "90": 0.07901,
            "91": 0.07807,
            "92": 0.0798,
            "93": 0.0768,
            "94": 0.07634,
            "95": 0.07708,
            "96": 0.07653,
            "97": 0.0783,
            "98": 0.07633,
            "99": 0.07617,
            "100": 0.07786
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.84092,
            "2": 10.83661,
            "3": 10.83233,
            "4": 10.81819,
            "5": 10.84059,
            "6": 10.86985,
            "7": 10.83324,
            "8": 10.83877,
            "9": 10.84355,
            "10": 10.80969,
            "11": 10.85186,
            "12": 10.84449,
            "13": 10.86322,
            "14": 10.86353,
            "15": 10.79981,
            "16": 10.79262,
            "17": 10.77477,
            "18": 10.80157,
            "19": 10.79148,
            "20": 10.70508,
            "21": 10.68176,
            "22": 10.56548,
            "23": 10.70147,
            "24": 10.57889,
            "25": 10.53597,
            "26": 10.60742,
            "27": 10.59423,
            "28": 10.56119,
            "29": 10.57569,
            "30": 10.35474,
            "31": 10.12616,
            "32": 10.46566,
            "33": 10.45233,
            "34": 10.22493,
            "35": 10.27091,
            "36": 10.22168,
            "37": 10.33936,
            "38": 10.18641,
            "39": 10.39431,
            "40": 10.07792,
            "41": 10.13872,
            "42": 10.20182,
            "43": 9.83818,
            "44": 9.94274,
            "45": 9.82303,
            "46": 9.82185,
            "47": 10.13443,
            "48": 9.84098,
            "49": 9.52095,
            "50": 9.90109,
            "51": 9.83457,
            "52": 9.73232,
            "53": 10.0488,
            "54": 9.93895,
            "55": 9.863,
            "56": 9.613,
            "57": 9.46966,
            "58": 9.81135,
            "59": 9.57107,
            "60": 9.48155,
            "61": 9.6788,
            "62": 9.96581,
            "63": 9.35273,
            "64": 9.75648,
            "65": 8.93771,
            "66": 9.68153,
            "67": 9.35671,
            "68": 9.76807,
            "69": 9.7739,
            "70": 9.71016,
            "71": 9.60009,
            "72": 9.56793,
            "73": 9.4774,
            "74": 8.93177,
            "75": 9.4072,
            "76": 9.06849,
            "77": 10.0464,
            "78": 9.70988,
            "79": 9.35733,
            "80": 9.38975,
            "81": 9.4662,
            "82": 9.68058,
            "83": 9.2914,
            "84": 9.40191,
            "85": 9.59735,
            "86": 9.06209,
            "87": 9.57922,
            "88": 9.73259,
            "89": 9.58836,
            "90": 9.80354,
            "91": 9.31991,
            "92": 9.35011,
            "93": 9.06382,
            "94": 8.81909,
            "95": 9.50568,
            "96": 9.51071,
            "97": 9.29241,
            "98": 9.65578,
            "99": 8.87401,
            "100": 9.38833
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1628.0,
            "2": 1744.0,
            "3": 1662.0,
            "4": 1801.0,
            "5": 1937.0,
            "6": 1866.0,
            "7": 1855.0,
            "8": 1584.0,
            "9": 1901.0,
            "10": 1325.0,
            "11": 1966.0,
            "12": 1767.0,
            "13": 1876.0,
            "14": 1881.0,
            "15": 1928.0,
            "16": 1875.0,
            "17": 1790.0,
            "18": 1736.0,
            "19": 1804.0,
            "20": 1690.0,
            "21": 2008.0,
            "22": 1765.0,
            "23": 2073.0,
            "24": 1618.0,
            "25": 1720.0,
            "26": 1807.0,
            "27": 1861.0,
            "28": 2026.0,
            "29": 1982.0,
            "30": 1981.0,
            "31": 1688.0,
            "32": 1913.0,
            "33": 2123.0,
            "34": 1893.0,
            "35": 2007.0,
            "36": 1987.0,
            "37": 2334.0,
            "38": 2223.0,
            "39": 2417.0,
            "40": 2370.0,
            "41": 2352.0,
            "42": 2269.0,
            "43": 1967.0,
            "44": 2183.0,
            "45": 2150.0,
            "46": 2350.0,
            "47": 2555.0,
            "48": 2463.0,
            "49": 2326.0,
            "50": 2270.0,
            "51": 2508.0,
            "52": 2495.0,
            "53": 2856.0,
            "54": 2692.0,
            "55": 2482.0,
            "56": 2614.0,
            "57": 2283.0,
            "58": 2894.0,
            "59": 2659.0,
            "60": 2561.0,
            "61": 3006.0,
            "62": 2671.0,
            "63": 2488.0,
            "64": 3092.0,
            "65": 2622.0,
            "66": 3108.0,
            "67": 2741.0,
            "68": 2942.0,
            "69": 2983.0,
            "70": 3347.0,
            "71": 3034.0,
            "72": 2438.0,
            "73": 3075.0,
            "74": 1931.0,
            "75": 2722.0,
            "76": 2960.0,
            "77": 3387.0,
            "78": 3268.0,
            "79": 3079.0,
            "80": 3404.0,
            "81": 3674.0,
            "82": 3192.0,
            "83": 2791.0,
            "84": 3224.0,
            "85": 3237.0,
            "86": 2646.0,
            "87": 3840.0,
            "88": 3114.0,
            "89": 3410.0,
            "90": 3184.0,
            "91": 3073.0,
            "92": 3396.0,
            "93": 2711.0,
            "94": 3530.0,
            "95": 3387.0,
            "96": 3530.0,
            "97": 3277.0,
            "98": 3775.0,
            "99": 3421.0,
            "100": 3350.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1234585088.0,
            "2": 1234585088.0,
            "3": 1234585088.0,
            "4": 1234585088.0,
            "5": 1234585088.0,
            "6": 1234585088.0,
            "7": 1234585088.0,
            "8": 1234585088.0,
            "9": 1234585088.0,
            "10": 1234585088.0,
            "11": 1234585088.0,
            "12": 1234585088.0,
            "13": 1234585088.0,
            "14": 1234585088.0,
            "15": 1234585088.0,
            "16": 1234585088.0,
            "17": 1234585088.0,
            "18": 1234585088.0,
            "19": 1234585088.0,
            "20": 1234585088.0,
            "21": 1234585088.0,
            "22": 1234585088.0,
            "23": 1234585088.0,
            "24": 1234585088.0,
            "25": 1234585088.0,
            "26": 1234585088.0,
            "27": 1234585088.0,
            "28": 1234585088.0,
            "29": 1234585088.0,
            "30": 1234585088.0,
            "31": 1234585088.0,
            "32": 1234585088.0,
            "33": 1234585088.0,
            "34": 1234585088.0,
            "35": 1234585088.0,
            "36": 1234585088.0,
            "37": 1234585088.0,
            "38": 1234585088.0,
            "39": 1234585088.0,
            "40": 1234585088.0,
            "41": 1234585088.0,
            "42": 1234585088.0,
            "43": 1234585088.0,
            "44": 1234585088.0,
            "45": 1234585088.0,
            "46": 1234585088.0,
            "47": 1234585088.0,
            "48": 1234585088.0,
            "49": 1234585088.0,
            "50": 1234585088.0,
            "51": 1234585088.0,
            "52": 1234585088.0,
            "53": 1234585088.0,
            "54": 1234585088.0,
            "55": 1234585088.0,
            "56": 1234585088.0,
            "57": 1234585088.0,
            "58": 1234585088.0,
            "59": 1234585088.0,
            "60": 1234585088.0,
            "61": 1234585088.0,
            "62": 1234585088.0,
            "63": 1234585088.0,
            "64": 1234585088.0,
            "65": 1234585088.0,
            "66": 1234585088.0,
            "67": 1234585088.0,
            "68": 1234585088.0,
            "69": 1234585088.0,
            "70": 1234585088.0,
            "71": 1234585088.0,
            "72": 1234585088.0,
            "73": 1234585088.0,
            "74": 1234585088.0,
            "75": 1234585088.0,
            "76": 1234585088.0,
            "77": 1234585088.0,
            "78": 1234585088.0,
            "79": 1234585088.0,
            "80": 1234585088.0,
            "81": 1234585088.0,
            "82": 1234585088.0,
            "83": 1234585088.0,
            "84": 1234585088.0,
            "85": 1234585088.0,
            "86": 1234585088.0,
            "87": 1234585088.0,
            "88": 1234585088.0,
            "89": 1234585088.0,
            "90": 1234585088.0,
            "91": 1234585088.0,
            "92": 1234585088.0,
            "93": 1234585088.0,
            "94": 1234585088.0,
            "95": 1234585088.0,
            "96": 1234585088.0,
            "97": 1234585088.0,
            "98": 1234585088.0,
            "99": 1234585088.0,
            "100": 1234585088.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1984492544.0,
            "2": 2536167424.0,
            "3": 2536167424.0,
            "4": 2536167424.0,
            "5": 2536167424.0,
            "6": 2536167424.0,
            "7": 2536167424.0,
            "8": 2536167424.0,
            "9": 2536167424.0,
            "10": 2536167424.0,
            "11": 2536167424.0,
            "12": 2536167424.0,
            "13": 2536167424.0,
            "14": 2536167424.0,
            "15": 2536167424.0,
            "16": 2536167424.0,
            "17": 2536167424.0,
            "18": 2536167424.0,
            "19": 2536167424.0,
            "20": 2536167424.0,
            "21": 2536167424.0,
            "22": 2536167424.0,
            "23": 2536167424.0,
            "24": 2536167424.0,
            "25": 2536167424.0,
            "26": 2536167424.0,
            "27": 2536167424.0,
            "28": 2536167424.0,
            "29": 2536167424.0,
            "30": 2536167424.0,
            "31": 2536167424.0,
            "32": 2536167424.0,
            "33": 2536167424.0,
            "34": 2536167424.0,
            "35": 2536167424.0,
            "36": 2536167424.0,
            "37": 2536167424.0,
            "38": 2536167424.0,
            "39": 2536167424.0,
            "40": 2536167424.0,
            "41": 2536167424.0,
            "42": 2536167424.0,
            "43": 2536167424.0,
            "44": 2536167424.0,
            "45": 2536167424.0,
            "46": 2536167424.0,
            "47": 2536167424.0,
            "48": 2536167424.0,
            "49": 2536167424.0,
            "50": 2536167424.0,
            "51": 2536167424.0,
            "52": 2536167424.0,
            "53": 2536167424.0,
            "54": 2536167424.0,
            "55": 2536167424.0,
            "56": 2536167424.0,
            "57": 2536167424.0,
            "58": 2536167424.0,
            "59": 2536167424.0,
            "60": 2536167424.0,
            "61": 2536167424.0,
            "62": 2536167424.0,
            "63": 2536167424.0,
            "64": 2536167424.0,
            "65": 2536167424.0,
            "66": 2536167424.0,
            "67": 2536167424.0,
            "68": 2536167424.0,
            "69": 2536167424.0,
            "70": 2536167424.0,
            "71": 2536167424.0,
            "72": 2536167424.0,
            "73": 2536167424.0,
            "74": 2536167424.0,
            "75": 2536167424.0,
            "76": 2536167424.0,
            "77": 2536167424.0,
            "78": 2536167424.0,
            "79": 2536167424.0,
            "80": 2536167424.0,
            "81": 2536167424.0,
            "82": 2536167424.0,
            "83": 2536167424.0,
            "84": 2536167424.0,
            "85": 2536167424.0,
            "86": 2536167424.0,
            "87": 2536167424.0,
            "88": 2536167424.0,
            "89": 2536167424.0,
            "90": 2536167424.0,
            "91": 2536167424.0,
            "92": 2536167424.0,
            "93": 2536167424.0,
            "94": 2536167424.0,
            "95": 2536167424.0,
            "96": 2536167424.0,
            "97": 2536167424.0,
            "98": 2536167424.0,
            "99": 2536167424.0,
            "100": 2536167424.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 3.55988,
            "2": 0.14641,
            "3": 0.12458,
            "4": 0.12359,
            "5": 0.12323,
            "6": 0.12296,
            "7": 0.12273,
            "8": 0.12322,
            "9": 0.12253,
            "10": 0.12257,
            "11": 0.12186,
            "12": 0.12242,
            "13": 0.12302,
            "14": 0.12233,
            "15": 0.12201,
            "16": 0.12138,
            "17": 0.12155,
            "18": 0.12113,
            "19": 0.12159,
            "20": 0.12132,
            "21": 0.12202,
            "22": 0.12206,
            "23": 0.12186,
            "24": 0.12276,
            "25": 0.12238,
            "26": 0.122,
            "27": 0.12177,
            "28": 0.12203,
            "29": 0.12255,
            "30": 0.12285,
            "31": 0.1224,
            "32": 0.12276,
            "33": 0.12359,
            "34": 0.12194,
            "35": 0.12249,
            "36": 0.12276,
            "37": 0.12249,
            "38": 0.12249,
            "39": 0.12333,
            "40": 0.12327,
            "41": 0.12316,
            "42": 0.12307,
            "43": 0.12249,
            "44": 0.12267,
            "45": 0.12282,
            "46": 0.12405,
            "47": 0.12264,
            "48": 0.12412,
            "49": 0.12277,
            "50": 0.12365,
            "51": 0.1271,
            "52": 0.12708,
            "53": 0.12522,
            "54": 0.1263,
            "55": 0.12587,
            "56": 0.12762,
            "57": 0.12527,
            "58": 0.12651,
            "59": 0.12671,
            "60": 0.12654,
            "61": 0.12604,
            "62": 0.12577,
            "63": 0.12494,
            "64": 0.12609,
            "65": 0.12576,
            "66": 0.12652,
            "67": 0.12628,
            "68": 0.12655,
            "69": 0.12565,
            "70": 0.12576,
            "71": 0.12521,
            "72": 0.12593,
            "73": 0.12578,
            "74": 0.12645,
            "75": 0.12537,
            "76": 0.12616,
            "77": 0.12525,
            "78": 0.12803,
            "79": 0.1252,
            "80": 0.12678,
            "81": 0.12525,
            "82": 0.12597,
            "83": 0.12596,
            "84": 0.12603,
            "85": 0.1257,
            "86": 0.12623,
            "87": 0.12511,
            "88": 0.12609,
            "89": 0.12568,
            "90": 0.12585,
            "91": 0.12495,
            "92": 0.12654,
            "93": 0.12549,
            "94": 0.12609,
            "95": 0.12518,
            "96": 0.12593,
            "97": 0.12598,
            "98": 0.12611,
            "99": 0.12441,
            "100": 0.12715
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.84092,
            "2": 10.83661,
            "3": 10.83233,
            "4": 10.81819,
            "5": 10.84059,
            "6": 10.86985,
            "7": 10.83324,
            "8": 10.83877,
            "9": 10.84355,
            "10": 10.80969,
            "11": 10.85186,
            "12": 10.84449,
            "13": 10.86322,
            "14": 10.86353,
            "15": 10.79981,
            "16": 10.79262,
            "17": 10.77477,
            "18": 10.80157,
            "19": 10.79148,
            "20": 10.70508,
            "21": 10.68176,
            "22": 10.56548,
            "23": 10.70147,
            "24": 10.57889,
            "25": 10.53597,
            "26": 10.60742,
            "27": 10.59423,
            "28": 10.56119,
            "29": 10.57569,
            "30": 10.35474,
            "31": 10.12616,
            "32": 10.46566,
            "33": 10.45233,
            "34": 10.22493,
            "35": 10.27091,
            "36": 10.22168,
            "37": 10.33936,
            "38": 10.18641,
            "39": 10.39431,
            "40": 10.07792,
            "41": 10.13872,
            "42": 10.20182,
            "43": 9.83818,
            "44": 9.94274,
            "45": 9.82303,
            "46": 9.82185,
            "47": 10.13443,
            "48": 9.84098,
            "49": 9.52095,
            "50": 9.90109,
            "51": 9.83457,
            "52": 9.73232,
            "53": 10.0488,
            "54": 9.93895,
            "55": 9.863,
            "56": 9.613,
            "57": 9.46966,
            "58": 9.81135,
            "59": 9.57107,
            "60": 9.48155,
            "61": 9.6788,
            "62": 9.96581,
            "63": 9.35273,
            "64": 9.75648,
            "65": 8.93771,
            "66": 9.68153,
            "67": 9.35671,
            "68": 9.76807,
            "69": 9.7739,
            "70": 9.71016,
            "71": 9.60009,
            "72": 9.56793,
            "73": 9.4774,
            "74": 8.93177,
            "75": 9.4072,
            "76": 9.06849,
            "77": 10.0464,
            "78": 9.70988,
            "79": 9.35733,
            "80": 9.38975,
            "81": 9.4662,
            "82": 9.68058,
            "83": 9.2914,
            "84": 9.40191,
            "85": 9.59735,
            "86": 9.06209,
            "87": 9.57922,
            "88": 9.73259,
            "89": 9.58836,
            "90": 9.80354,
            "91": 9.31991,
            "92": 9.35011,
            "93": 9.06382,
            "94": 8.81909,
            "95": 9.50568,
            "96": 9.51071,
            "97": 9.29241,
            "98": 9.65578,
            "99": 8.87401,
            "100": 9.38833
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1628.0,
            "2": 1744.0,
            "3": 1662.0,
            "4": 1801.0,
            "5": 1937.0,
            "6": 1866.0,
            "7": 1855.0,
            "8": 1584.0,
            "9": 1901.0,
            "10": 1325.0,
            "11": 1966.0,
            "12": 1767.0,
            "13": 1876.0,
            "14": 1881.0,
            "15": 1928.0,
            "16": 1875.0,
            "17": 1790.0,
            "18": 1736.0,
            "19": 1804.0,
            "20": 1690.0,
            "21": 2008.0,
            "22": 1765.0,
            "23": 2073.0,
            "24": 1618.0,
            "25": 1720.0,
            "26": 1807.0,
            "27": 1861.0,
            "28": 2026.0,
            "29": 1982.0,
            "30": 1981.0,
            "31": 1688.0,
            "32": 1913.0,
            "33": 2123.0,
            "34": 1893.0,
            "35": 2007.0,
            "36": 1987.0,
            "37": 2334.0,
            "38": 2223.0,
            "39": 2417.0,
            "40": 2370.0,
            "41": 2352.0,
            "42": 2269.0,
            "43": 1967.0,
            "44": 2183.0,
            "45": 2150.0,
            "46": 2350.0,
            "47": 2555.0,
            "48": 2463.0,
            "49": 2326.0,
            "50": 2270.0,
            "51": 2508.0,
            "52": 2495.0,
            "53": 2856.0,
            "54": 2692.0,
            "55": 2482.0,
            "56": 2614.0,
            "57": 2283.0,
            "58": 2894.0,
            "59": 2659.0,
            "60": 2561.0,
            "61": 3006.0,
            "62": 2671.0,
            "63": 2488.0,
            "64": 3092.0,
            "65": 2622.0,
            "66": 3108.0,
            "67": 2741.0,
            "68": 2942.0,
            "69": 2983.0,
            "70": 3347.0,
            "71": 3034.0,
            "72": 2438.0,
            "73": 3075.0,
            "74": 1931.0,
            "75": 2722.0,
            "76": 2960.0,
            "77": 3387.0,
            "78": 3268.0,
            "79": 3079.0,
            "80": 3404.0,
            "81": 3674.0,
            "82": 3192.0,
            "83": 2791.0,
            "84": 3224.0,
            "85": 3237.0,
            "86": 2646.0,
            "87": 3840.0,
            "88": 3114.0,
            "89": 3410.0,
            "90": 3184.0,
            "91": 3073.0,
            "92": 3396.0,
            "93": 2711.0,
            "94": 3530.0,
            "95": 3387.0,
            "96": 3530.0,
            "97": 3277.0,
            "98": 3775.0,
            "99": 3421.0,
            "100": 3350.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1234585088.0,
            "2": 1234585088.0,
            "3": 1234585088.0,
            "4": 1234585088.0,
            "5": 1234585088.0,
            "6": 1234585088.0,
            "7": 1234585088.0,
            "8": 1234585088.0,
            "9": 1234585088.0,
            "10": 1234585088.0,
            "11": 1234585088.0,
            "12": 1234585088.0,
            "13": 1234585088.0,
            "14": 1234585088.0,
            "15": 1234585088.0,
            "16": 1234585088.0,
            "17": 1234585088.0,
            "18": 1234585088.0,
            "19": 1234585088.0,
            "20": 1234585088.0,
            "21": 1234585088.0,
            "22": 1234585088.0,
            "23": 1234585088.0,
            "24": 1234585088.0,
            "25": 1234585088.0,
            "26": 1234585088.0,
            "27": 1234585088.0,
            "28": 1234585088.0,
            "29": 1234585088.0,
            "30": 1234585088.0,
            "31": 1234585088.0,
            "32": 1234585088.0,
            "33": 1234585088.0,
            "34": 1234585088.0,
            "35": 1234585088.0,
            "36": 1234585088.0,
            "37": 1234585088.0,
            "38": 1234585088.0,
            "39": 1234585088.0,
            "40": 1234585088.0,
            "41": 1234585088.0,
            "42": 1234585088.0,
            "43": 1234585088.0,
            "44": 1234585088.0,
            "45": 1234585088.0,
            "46": 1234585088.0,
            "47": 1234585088.0,
            "48": 1234585088.0,
            "49": 1234585088.0,
            "50": 1234585088.0,
            "51": 1234585088.0,
            "52": 1234585088.0,
            "53": 1234585088.0,
            "54": 1234585088.0,
            "55": 1234585088.0,
            "56": 1234585088.0,
            "57": 1234585088.0,
            "58": 1234585088.0,
            "59": 1234585088.0,
            "60": 1234585088.0,
            "61": 1234585088.0,
            "62": 1234585088.0,
            "63": 1234585088.0,
            "64": 1234585088.0,
            "65": 1234585088.0,
            "66": 1234585088.0,
            "67": 1234585088.0,
            "68": 1234585088.0,
            "69": 1234585088.0,
            "70": 1234585088.0,
            "71": 1234585088.0,
            "72": 1234585088.0,
            "73": 1234585088.0,
            "74": 1234585088.0,
            "75": 1234585088.0,
            "76": 1234585088.0,
            "77": 1234585088.0,
            "78": 1234585088.0,
            "79": 1234585088.0,
            "80": 1234585088.0,
            "81": 1234585088.0,
            "82": 1234585088.0,
            "83": 1234585088.0,
            "84": 1234585088.0,
            "85": 1234585088.0,
            "86": 1234585088.0,
            "87": 1234585088.0,
            "88": 1234585088.0,
            "89": 1234585088.0,
            "90": 1234585088.0,
            "91": 1234585088.0,
            "92": 1234585088.0,
            "93": 1234585088.0,
            "94": 1234585088.0,
            "95": 1234585088.0,
            "96": 1234585088.0,
            "97": 1234585088.0,
            "98": 1234585088.0,
            "99": 1234585088.0,
            "100": 1234585088.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1984492544.0,
            "2": 2536167424.0,
            "3": 2536167424.0,
            "4": 2536167424.0,
            "5": 2536167424.0,
            "6": 2536167424.0,
            "7": 2536167424.0,
            "8": 2536167424.0,
            "9": 2536167424.0,
            "10": 2536167424.0,
            "11": 2536167424.0,
            "12": 2536167424.0,
            "13": 2536167424.0,
            "14": 2536167424.0,
            "15": 2536167424.0,
            "16": 2536167424.0,
            "17": 2536167424.0,
            "18": 2536167424.0,
            "19": 2536167424.0,
            "20": 2536167424.0,
            "21": 2536167424.0,
            "22": 2536167424.0,
            "23": 2536167424.0,
            "24": 2536167424.0,
            "25": 2536167424.0,
            "26": 2536167424.0,
            "27": 2536167424.0,
            "28": 2536167424.0,
            "29": 2536167424.0,
            "30": 2536167424.0,
            "31": 2536167424.0,
            "32": 2536167424.0,
            "33": 2536167424.0,
            "34": 2536167424.0,
            "35": 2536167424.0,
            "36": 2536167424.0,
            "37": 2536167424.0,
            "38": 2536167424.0,
            "39": 2536167424.0,
            "40": 2536167424.0,
            "41": 2536167424.0,
            "42": 2536167424.0,
            "43": 2536167424.0,
            "44": 2536167424.0,
            "45": 2536167424.0,
            "46": 2536167424.0,
            "47": 2536167424.0,
            "48": 2536167424.0,
            "49": 2536167424.0,
            "50": 2536167424.0,
            "51": 2536167424.0,
            "52": 2536167424.0,
            "53": 2536167424.0,
            "54": 2536167424.0,
            "55": 2536167424.0,
            "56": 2536167424.0,
            "57": 2536167424.0,
            "58": 2536167424.0,
            "59": 2536167424.0,
            "60": 2536167424.0,
            "61": 2536167424.0,
            "62": 2536167424.0,
            "63": 2536167424.0,
            "64": 2536167424.0,
            "65": 2536167424.0,
            "66": 2536167424.0,
            "67": 2536167424.0,
            "68": 2536167424.0,
            "69": 2536167424.0,
            "70": 2536167424.0,
            "71": 2536167424.0,
            "72": 2536167424.0,
            "73": 2536167424.0,
            "74": 2536167424.0,
            "75": 2536167424.0,
            "76": 2536167424.0,
            "77": 2536167424.0,
            "78": 2536167424.0,
            "79": 2536167424.0,
            "80": 2536167424.0,
            "81": 2536167424.0,
            "82": 2536167424.0,
            "83": 2536167424.0,
            "84": 2536167424.0,
            "85": 2536167424.0,
            "86": 2536167424.0,
            "87": 2536167424.0,
            "88": 2536167424.0,
            "89": 2536167424.0,
            "90": 2536167424.0,
            "91": 2536167424.0,
            "92": 2536167424.0,
            "93": 2536167424.0,
            "94": 2536167424.0,
            "95": 2536167424.0,
            "96": 2536167424.0,
            "97": 2536167424.0,
            "98": 2536167424.0,
            "99": 2536167424.0,
            "100": 2536167424.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2.58038,
            "2": 0.24481,
            "3": 0.14335,
            "4": 0.12008,
            "5": 0.11519,
            "6": 0.11576,
            "7": 0.11592,
            "8": 0.11621,
            "9": 0.11509,
            "10": 0.11622,
            "11": 0.11438,
            "12": 0.12519,
            "13": 0.11661,
            "14": 0.11675,
            "15": 0.11585,
            "16": 0.11602,
            "17": 0.11511,
            "18": 0.11563,
            "19": 0.1151,
            "20": 0.11612,
            "21": 0.11576,
            "22": 0.11985,
            "23": 0.11629,
            "24": 0.11712,
            "25": 0.11544,
            "26": 0.11643,
            "27": 0.1158,
            "28": 0.1159,
            "29": 0.11547,
            "30": 0.11692,
            "31": 0.11579,
            "32": 0.11621,
            "33": 0.11916,
            "34": 0.11636,
            "35": 0.11562,
            "36": 0.11659,
            "37": 0.11547,
            "38": 0.11647,
            "39": 0.1158,
            "40": 0.11627,
            "41": 0.11596,
            "42": 0.11632,
            "43": 0.11615,
            "44": 0.11641,
            "45": 0.11517,
            "46": 0.117,
            "47": 0.11569,
            "48": 0.11641,
            "49": 0.1153,
            "50": 0.11761,
            "51": 0.12112,
            "52": 0.11688,
            "53": 0.11745,
            "54": 0.11527,
            "55": 0.1155,
            "56": 0.11515,
            "57": 0.1278,
            "58": 0.11901,
            "59": 0.11522,
            "60": 0.11514,
            "61": 0.11577,
            "62": 0.1152,
            "63": 0.11508,
            "64": 0.11441,
            "65": 0.11536,
            "66": 0.11387,
            "67": 0.11491,
            "68": 0.11494,
            "69": 0.11516,
            "70": 0.11427,
            "71": 0.11457,
            "72": 0.11443,
            "73": 0.11522,
            "74": 0.1147,
            "75": 0.11473,
            "76": 0.11408,
            "77": 0.11464,
            "78": 0.11499,
            "79": 0.11494,
            "80": 0.11435,
            "81": 0.11479,
            "82": 0.11427,
            "83": 0.11504,
            "84": 0.11412,
            "85": 0.11455,
            "86": 0.11473,
            "87": 0.11484,
            "88": 0.1137,
            "89": 0.11543,
            "90": 0.11349,
            "91": 0.11471,
            "92": 0.114,
            "93": 0.11498,
            "94": 0.11434,
            "95": 0.11497,
            "96": 0.11416,
            "97": 0.11454,
            "98": 0.1143,
            "99": 0.1145,
            "100": 0.11459
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgx_a100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.83457,
            "52": 9.73232,
            "53": 10.0488,
            "54": 9.93895,
            "55": 9.863,
            "56": 9.613,
            "57": 9.46966,
            "58": 9.81135,
            "59": 9.57107,
            "60": 9.48155,
            "61": 9.6788,
            "62": 9.96581,
            "63": 9.35273,
            "64": 9.75648,
            "65": 8.93771,
            "66": 9.68153,
            "67": 9.35671,
            "68": 9.76807,
            "69": 9.7739,
            "70": 9.71016,
            "71": 9.60009,
            "72": 9.56793,
            "73": 9.4774,
            "74": 8.93177,
            "75": 9.4072,
            "76": 9.06849,
            "77": 10.0464,
            "78": 9.70988,
            "79": 9.35733,
            "80": 9.38975,
            "81": 9.4662,
            "82": 9.68058,
            "83": 9.2914,
            "84": 9.40191,
            "85": 9.59735,
            "86": 9.06209,
            "87": 9.57922,
            "88": 9.73259,
            "89": 9.58836,
            "90": 9.80354,
            "91": 9.31991,
            "92": 9.35011,
            "93": 9.06382,
            "94": 8.81909,
            "95": 9.50568,
            "96": 9.51071,
            "97": 9.29241,
            "98": 9.65578,
            "99": 8.87401,
            "100": 9.38833
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2508.0,
            "52": 2495.0,
            "53": 2856.0,
            "54": 2692.0,
            "55": 2482.0,
            "56": 2614.0,
            "57": 2283.0,
            "58": 2894.0,
            "59": 2659.0,
            "60": 2561.0,
            "61": 3006.0,
            "62": 2671.0,
            "63": 2488.0,
            "64": 3092.0,
            "65": 2622.0,
            "66": 3108.0,
            "67": 2741.0,
            "68": 2942.0,
            "69": 2983.0,
            "70": 3347.0,
            "71": 3034.0,
            "72": 2438.0,
            "73": 3075.0,
            "74": 1931.0,
            "75": 2722.0,
            "76": 2960.0,
            "77": 3387.0,
            "78": 3268.0,
            "79": 3079.0,
            "80": 3404.0,
            "81": 3674.0,
            "82": 3192.0,
            "83": 2791.0,
            "84": 3224.0,
            "85": 3237.0,
            "86": 2646.0,
            "87": 3840.0,
            "88": 3114.0,
            "89": 3410.0,
            "90": 3184.0,
            "91": 3073.0,
            "92": 3396.0,
            "93": 2711.0,
            "94": 3530.0,
            "95": 3387.0,
            "96": 3530.0,
            "97": 3277.0,
            "98": 3775.0,
            "99": 3421.0,
            "100": 3350.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 1232487936.0,
            "52": 1232487936.0,
            "53": 1232487936.0,
            "54": 1232487936.0,
            "55": 1232487936.0,
            "56": 1232487936.0,
            "57": 1232487936.0,
            "58": 1232487936.0,
            "59": 1232487936.0,
            "60": 1232487936.0,
            "61": 1232487936.0,
            "62": 1232487936.0,
            "63": 1232487936.0,
            "64": 1232487936.0,
            "65": 1232487936.0,
            "66": 1232487936.0,
            "67": 1232487936.0,
            "68": 1232487936.0,
            "69": 1232487936.0,
            "70": 1232487936.0,
            "71": 1232487936.0,
            "72": 1232487936.0,
            "73": 1232487936.0,
            "74": 1232487936.0,
            "75": 1232487936.0,
            "76": 1232487936.0,
            "77": 1232487936.0,
            "78": 1232487936.0,
            "79": 1232487936.0,
            "80": 1232487936.0,
            "81": 1232487936.0,
            "82": 1232487936.0,
            "83": 1232487936.0,
            "84": 1232487936.0,
            "85": 1232487936.0,
            "86": 1232487936.0,
            "87": 1232487936.0,
            "88": 1232487936.0,
            "89": 1232487936.0,
            "90": 1232487936.0,
            "91": 1232487936.0,
            "92": 1232487936.0,
            "93": 1232487936.0,
            "94": 1232487936.0,
            "95": 1232487936.0,
            "96": 1232487936.0,
            "97": 1232487936.0,
            "98": 1232487936.0,
            "99": 1232487936.0,
            "100": 1232487936.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2501563392.0,
            "52": 2535118848.0,
            "53": 2535118848.0,
            "54": 2535118848.0,
            "55": 2535118848.0,
            "56": 2535118848.0,
            "57": 2535118848.0,
            "58": 2535118848.0,
            "59": 2535118848.0,
            "60": 2535118848.0,
            "61": 2535118848.0,
            "62": 2535118848.0,
            "63": 2535118848.0,
            "64": 2535118848.0,
            "65": 2535118848.0,
            "66": 2535118848.0,
            "67": 2535118848.0,
            "68": 2535118848.0,
            "69": 2535118848.0,
            "70": 2535118848.0,
            "71": 2535118848.0,
            "72": 2535118848.0,
            "73": 2535118848.0,
            "74": 2535118848.0,
            "75": 2535118848.0,
            "76": 2535118848.0,
            "77": 2535118848.0,
            "78": 2535118848.0,
            "79": 2535118848.0,
            "80": 2535118848.0,
            "81": 2535118848.0,
            "82": 2535118848.0,
            "83": 2535118848.0,
            "84": 2535118848.0,
            "85": 2535118848.0,
            "86": 2535118848.0,
            "87": 2535118848.0,
            "88": 2535118848.0,
            "89": 2535118848.0,
            "90": 2535118848.0,
            "91": 2535118848.0,
            "92": 2535118848.0,
            "93": 2535118848.0,
            "94": 2535118848.0,
            "95": 2535118848.0,
            "96": 2535118848.0,
            "97": 2535118848.0,
            "98": 2535118848.0,
            "99": 2535118848.0,
            "100": 2535118848.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 4.25367,
            "52": 0.13205,
            "53": 0.11484,
            "54": 0.11811,
            "55": 0.11596,
            "56": 0.11581,
            "57": 0.11498,
            "58": 0.11563,
            "59": 0.11477,
            "60": 0.11575,
            "61": 0.11498,
            "62": 0.11551,
            "63": 0.11663,
            "64": 0.11428,
            "65": 0.11448,
            "66": 0.11417,
            "67": 0.11362,
            "68": 0.11442,
            "69": 0.11406,
            "70": 0.11487,
            "71": 0.11375,
            "72": 0.11459,
            "73": 0.11365,
            "74": 0.11414,
            "75": 0.11435,
            "76": 0.11545,
            "77": 0.11362,
            "78": 0.11443,
            "79": 0.11286,
            "80": 0.11385,
            "81": 0.11272,
            "82": 0.11354,
            "83": 0.11294,
            "84": 0.11396,
            "85": 0.11272,
            "86": 0.11396,
            "87": 0.11339,
            "88": 0.11475,
            "89": 0.11779,
            "90": 0.11386,
            "91": 0.11507,
            "92": 0.11404,
            "93": 0.11335,
            "94": 0.11449,
            "95": 0.11323,
            "96": 0.11451,
            "97": 0.11365,
            "98": 0.11398,
            "99": 0.11453,
            "100": 0.11417
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgxa100_dracooci-ord.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.8401,
            "2": 10.83566,
            "3": 10.82993,
            "4": 10.8173,
            "5": 10.84032,
            "6": 10.87262,
            "7": 10.83467,
            "8": 10.84031,
            "9": 10.84361,
            "10": 10.81341,
            "11": 10.85023,
            "12": 10.84316,
            "13": 10.86604,
            "14": 10.86311,
            "15": 10.80278,
            "16": 10.79645,
            "17": 10.77627,
            "18": 10.80147,
            "19": 10.79392,
            "20": 10.70496,
            "21": 10.68149,
            "22": 10.56314,
            "23": 10.70138,
            "24": 10.57935,
            "25": 10.53846,
            "26": 10.60617,
            "27": 10.5921,
            "28": 10.56154,
            "29": 10.57665,
            "30": 10.35517,
            "31": 10.1277,
            "32": 10.46372,
            "33": 10.45444,
            "34": 10.22446,
            "35": 10.27147,
            "36": 10.22183,
            "37": 10.33944,
            "38": 10.18637,
            "39": 10.39327,
            "40": 10.08044,
            "41": 10.13794,
            "42": 10.20012,
            "43": 9.8379,
            "44": 9.9433,
            "45": 9.82292,
            "46": 9.8231,
            "47": 10.13356,
            "48": 9.84151,
            "49": 9.52105,
            "50": 9.90113,
            "51": 9.83465,
            "52": 9.73175,
            "53": 10.04772,
            "54": 9.93858,
            "55": 9.86422,
            "56": 9.61259,
            "57": 9.46816,
            "58": 9.81221,
            "59": 9.57171,
            "60": 9.48029,
            "61": 9.67964,
            "62": 9.96739,
            "63": 9.35353,
            "64": 9.75732,
            "65": 8.93749,
            "66": 9.68132,
            "67": 9.357,
            "68": 9.76807,
            "69": 9.77288,
            "70": 9.71025,
            "71": 9.60021,
            "72": 9.56674,
            "73": 9.47644,
            "74": 8.93192,
            "75": 9.40879,
            "76": 9.06885,
            "77": 10.04691,
            "78": 9.70976,
            "79": 9.35666,
            "80": 9.39077,
            "81": 9.46573,
            "82": 9.6803,
            "83": 9.29215,
            "84": 9.40239,
            "85": 9.59743,
            "86": 9.06112,
            "87": 9.57954,
            "88": 9.73247,
            "89": 9.58838,
            "90": 9.80386,
            "91": 9.32104,
            "92": 9.35012,
            "93": 9.06314,
            "94": 8.82007,
            "95": 9.50565,
            "96": 9.51099,
            "97": 9.29311,
            "98": 9.65573,
            "99": 8.87504,
            "100": 9.38812
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1670.0,
            "2": 1691.0,
            "3": 1630.0,
            "4": 1805.0,
            "5": 1970.0,
            "6": 1901.0,
            "7": 1815.0,
            "8": 1592.0,
            "9": 1968.0,
            "10": 1436.0,
            "11": 1923.0,
            "12": 1867.0,
            "13": 1888.0,
            "14": 1807.0,
            "15": 1918.0,
            "16": 1922.0,
            "17": 1774.0,
            "18": 1735.0,
            "19": 1886.0,
            "20": 1786.0,
            "21": 2020.0,
            "22": 1685.0,
            "23": 2112.0,
            "24": 1657.0,
            "25": 1610.0,
            "26": 1815.0,
            "27": 1880.0,
            "28": 2025.0,
            "29": 1975.0,
            "30": 2039.0,
            "31": 1713.0,
            "32": 1926.0,
            "33": 2163.0,
            "34": 1894.0,
            "35": 2001.0,
            "36": 1963.0,
            "37": 2401.0,
            "38": 2324.0,
            "39": 2351.0,
            "40": 2321.0,
            "41": 2266.0,
            "42": 2317.0,
            "43": 1999.0,
            "44": 2133.0,
            "45": 2205.0,
            "46": 2324.0,
            "47": 2463.0,
            "48": 2447.0,
            "49": 2237.0,
            "50": 2365.0,
            "51": 2534.0,
            "52": 2604.0,
            "53": 2995.0,
            "54": 2699.0,
            "55": 2489.0,
            "56": 2680.0,
            "57": 2285.0,
            "58": 2976.0,
            "59": 2816.0,
            "60": 2508.0,
            "61": 3075.0,
            "62": 2710.0,
            "63": 2574.0,
            "64": 3027.0,
            "65": 2719.0,
            "66": 3182.0,
            "67": 2770.0,
            "68": 2875.0,
            "69": 2961.0,
            "70": 3241.0,
            "71": 2859.0,
            "72": 2495.0,
            "73": 2972.0,
            "74": 1989.0,
            "75": 2643.0,
            "76": 3012.0,
            "77": 3398.0,
            "78": 3413.0,
            "79": 3272.0,
            "80": 3368.0,
            "81": 3656.0,
            "82": 3228.0,
            "83": 2772.0,
            "84": 3146.0,
            "85": 3336.0,
            "86": 2738.0,
            "87": 3886.0,
            "88": 3044.0,
            "89": 3429.0,
            "90": 2961.0,
            "91": 2952.0,
            "92": 3239.0,
            "93": 2791.0,
            "94": 3583.0,
            "95": 3533.0,
            "96": 3530.0,
            "97": 3241.0,
            "98": 3680.0,
            "99": 3320.0,
            "100": 3432.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1230390272.0,
            "2": 1230390272.0,
            "3": 1230390272.0,
            "4": 1230390272.0,
            "5": 1230390272.0,
            "6": 1230390272.0,
            "7": 1230390272.0,
            "8": 1230390272.0,
            "9": 1230390272.0,
            "10": 1230390272.0,
            "11": 1230390272.0,
            "12": 1230390272.0,
            "13": 1230390272.0,
            "14": 1230390272.0,
            "15": 1230390272.0,
            "16": 1230390272.0,
            "17": 1230390272.0,
            "18": 1230390272.0,
            "19": 1230390272.0,
            "20": 1230390272.0,
            "21": 1230390272.0,
            "22": 1230390272.0,
            "23": 1230390272.0,
            "24": 1230390272.0,
            "25": 1230390272.0,
            "26": 1230390272.0,
            "27": 1230390272.0,
            "28": 1230390272.0,
            "29": 1230390272.0,
            "30": 1230390272.0,
            "31": 1230390272.0,
            "32": 1230390272.0,
            "33": 1230390272.0,
            "34": 1230390272.0,
            "35": 1230390272.0,
            "36": 1230390272.0,
            "37": 1230390272.0,
            "38": 1230390272.0,
            "39": 1230390272.0,
            "40": 1230390272.0,
            "41": 1230390272.0,
            "42": 1230390272.0,
            "43": 1230390272.0,
            "44": 1230390272.0,
            "45": 1230390272.0,
            "46": 1230390272.0,
            "47": 1230390272.0,
            "48": 1230390272.0,
            "49": 1230390272.0,
            "50": 1230390272.0,
            "51": 1230390272.0,
            "52": 1230390272.0,
            "53": 1230390272.0,
            "54": 1230390272.0,
            "55": 1230390272.0,
            "56": 1230390272.0,
            "57": 1230390272.0,
            "58": 1230390272.0,
            "59": 1230390272.0,
            "60": 1230390272.0,
            "61": 1230390272.0,
            "62": 1230390272.0,
            "63": 1230390272.0,
            "64": 1230390272.0,
            "65": 1230390272.0,
            "66": 1230390272.0,
            "67": 1230390272.0,
            "68": 1230390272.0,
            "69": 1230390272.0,
            "70": 1230390272.0,
            "71": 1230390272.0,
            "72": 1230390272.0,
            "73": 1230390272.0,
            "74": 1230390272.0,
            "75": 1230390272.0,
            "76": 1230390272.0,
            "77": 1230390272.0,
            "78": 1230390272.0,
            "79": 1230390272.0,
            "80": 1230390272.0,
            "81": 1230390272.0,
            "82": 1230390272.0,
            "83": 1230390272.0,
            "84": 1230390272.0,
            "85": 1230390272.0,
            "86": 1230390272.0,
            "87": 1230390272.0,
            "88": 1230390272.0,
            "89": 1230390272.0,
            "90": 1230390272.0,
            "91": 1230390272.0,
            "92": 1230390272.0,
            "93": 1230390272.0,
            "94": 1230390272.0,
            "95": 1230390272.0,
            "96": 1230390272.0,
            "97": 1230390272.0,
            "98": 1230390272.0,
            "99": 1230390272.0,
            "100": 1230390272.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1984492032.0,
            "2": 2531972608.0,
            "3": 2531972608.0,
            "4": 2531972608.0,
            "5": 2531972608.0,
            "6": 2531972608.0,
            "7": 2531972608.0,
            "8": 2531972608.0,
            "9": 2531972608.0,
            "10": 2531972608.0,
            "11": 2531972608.0,
            "12": 2531972608.0,
            "13": 2531972608.0,
            "14": 2531972608.0,
            "15": 2531972608.0,
            "16": 2531972608.0,
            "17": 2531972608.0,
            "18": 2531972608.0,
            "19": 2531972608.0,
            "20": 2531972608.0,
            "21": 2531972608.0,
            "22": 2531972608.0,
            "23": 2531972608.0,
            "24": 2531972608.0,
            "25": 2531972608.0,
            "26": 2531972608.0,
            "27": 2531972608.0,
            "28": 2531972608.0,
            "29": 2531972608.0,
            "30": 2531972608.0,
            "31": 2531972608.0,
            "32": 2531972608.0,
            "33": 2531972608.0,
            "34": 2531972608.0,
            "35": 2531972608.0,
            "36": 2531972608.0,
            "37": 2531972608.0,
            "38": 2531972608.0,
            "39": 2531972608.0,
            "40": 2531972608.0,
            "41": 2531972608.0,
            "42": 2531972608.0,
            "43": 2531972608.0,
            "44": 2531972608.0,
            "45": 2531972608.0,
            "46": 2531972608.0,
            "47": 2531972608.0,
            "48": 2531972608.0,
            "49": 2531972608.0,
            "50": 2531972608.0,
            "51": 2531972608.0,
            "52": 2531972608.0,
            "53": 2531972608.0,
            "54": 2531972608.0,
            "55": 2531972608.0,
            "56": 2531972608.0,
            "57": 2531972608.0,
            "58": 2531972608.0,
            "59": 2531972608.0,
            "60": 2531972608.0,
            "61": 2531972608.0,
            "62": 2531972608.0,
            "63": 2531972608.0,
            "64": 2531972608.0,
            "65": 2531972608.0,
            "66": 2531972608.0,
            "67": 2531972608.0,
            "68": 2531972608.0,
            "69": 2531972608.0,
            "70": 2531972608.0,
            "71": 2531972608.0,
            "72": 2531972608.0,
            "73": 2531972608.0,
            "74": 2531972608.0,
            "75": 2531972608.0,
            "76": 2531972608.0,
            "77": 2531972608.0,
            "78": 2531972608.0,
            "79": 2531972608.0,
            "80": 2531972608.0,
            "81": 2531972608.0,
            "82": 2531972608.0,
            "83": 2531972608.0,
            "84": 2531972608.0,
            "85": 2531972608.0,
            "86": 2531972608.0,
            "87": 2531972608.0,
            "88": 2531972608.0,
            "89": 2531972608.0,
            "90": 2531972608.0,
            "91": 2531972608.0,
            "92": 2531972608.0,
            "93": 2531972608.0,
            "94": 2531972608.0,
            "95": 2531972608.0,
            "96": 2531972608.0,
            "97": 2531972608.0,
            "98": 2531972608.0,
            "99": 2531972608.0,
            "100": 2531972608.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 6.66979,
            "2": 0.15375,
            "3": 0.13471,
            "4": 0.1451,
            "5": 0.13243,
            "6": 0.13226,
            "7": 0.14437,
            "8": 0.13751,
            "9": 0.1427,
            "10": 0.14549,
            "11": 0.14547,
            "12": 0.14682,
            "13": 0.40877,
            "14": 0.1477,
            "15": 0.15085,
            "16": 0.14383,
            "17": 0.15106,
            "18": 0.14683,
            "19": 0.14809,
            "20": 0.1535,
            "21": 0.14869,
            "22": 0.14139,
            "23": 0.16201,
            "24": 0.15437,
            "25": 0.14424,
            "26": 0.15046,
            "27": 0.14191,
            "28": 0.14273,
            "29": 0.14227,
            "30": 0.14587,
            "31": 0.14729,
            "32": 0.14529,
            "33": 0.14194,
            "34": 0.14753,
            "35": 0.14364,
            "36": 0.15173,
            "37": 0.15588,
            "38": 0.17947,
            "39": 0.16014,
            "40": 0.16333,
            "41": 0.15457,
            "42": 0.17017,
            "43": 0.13231,
            "44": 0.13057,
            "45": 0.13024,
            "46": 0.1296,
            "47": 0.13068,
            "48": 0.12962,
            "49": 0.13029,
            "50": 0.13004,
            "51": 0.13664,
            "52": 0.1321,
            "53": 0.13024,
            "54": 0.16102,
            "55": 0.15998,
            "56": 0.16599,
            "57": 0.1739,
            "58": 0.1617,
            "59": 0.16149,
            "60": 0.15536,
            "61": 0.19483,
            "62": 0.18185,
            "63": 0.17713,
            "64": 0.20241,
            "65": 0.2339,
            "66": 0.19396,
            "67": 0.18469,
            "68": 0.13408,
            "69": 0.13102,
            "70": 0.13245,
            "71": 0.1302,
            "72": 0.13294,
            "73": 0.13181,
            "74": 0.13273,
            "75": 0.13082,
            "76": 0.13319,
            "77": 0.13089,
            "78": 0.13266,
            "79": 0.13146,
            "80": 0.13271,
            "81": 0.13064,
            "82": 0.133,
            "83": 0.1325,
            "84": 0.13269,
            "85": 0.13105,
            "86": 0.13314,
            "87": 0.13059,
            "88": 0.13244,
            "89": 0.13183,
            "90": 0.13294,
            "91": 0.13281,
            "92": 0.13352,
            "93": 0.13201,
            "94": 0.1343,
            "95": 0.13224,
            "96": 0.13339,
            "97": 0.13189,
            "98": 0.1351,
            "99": 0.13191,
            "100": 0.13277
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_lts_dgxa100_dracooci.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.8401,
            "2": 10.83566,
            "3": 10.82993,
            "4": 10.8173,
            "5": 10.84032,
            "6": 10.87262,
            "7": 10.83467,
            "8": 10.84031,
            "9": 10.84361,
            "10": 10.81341,
            "11": 10.85023,
            "12": 10.84316,
            "13": 10.86604,
            "14": 10.86311,
            "15": 10.80278,
            "16": 10.79645,
            "17": 10.77627,
            "18": 10.80147,
            "19": 10.79392,
            "20": 10.70496,
            "21": 10.68149,
            "22": 10.56314,
            "23": 10.70138,
            "24": 10.57935,
            "25": 10.53846,
            "26": 10.60617,
            "27": 10.5921,
            "28": 10.56154,
            "29": 10.57665,
            "30": 10.35517,
            "31": 10.1277,
            "32": 10.46372,
            "33": 10.45444,
            "34": 10.22446,
            "35": 10.27147,
            "36": 10.22183,
            "37": 10.33944,
            "38": 10.18637,
            "39": 10.39327,
            "40": 10.08044,
            "41": 10.13794,
            "42": 10.20012,
            "43": 9.8379,
            "44": 9.9433,
            "45": 9.82292,
            "46": 9.8231,
            "47": 10.13356,
            "48": 9.84151,
            "49": 9.52105,
            "50": 9.90113,
            "51": 9.83465,
            "52": 9.73175,
            "53": 10.04772,
            "54": 9.93858,
            "55": 9.86422,
            "56": 9.61259,
            "57": 9.46816,
            "58": 9.81221,
            "59": 9.57171,
            "60": 9.48029,
            "61": 9.67964,
            "62": 9.96739,
            "63": 9.35353,
            "64": 9.75732,
            "65": 8.93749,
            "66": 9.68132,
            "67": 9.357,
            "68": 9.76807,
            "69": 9.77288,
            "70": 9.71025,
            "71": 9.60021,
            "72": 9.56674,
            "73": 9.47644,
            "74": 8.93192,
            "75": 9.40879,
            "76": 9.06885,
            "77": 10.04691,
            "78": 9.70976,
            "79": 9.35666,
            "80": 9.39077,
            "81": 9.46573,
            "82": 9.6803,
            "83": 9.29215,
            "84": 9.40239,
            "85": 9.59743,
            "86": 9.06112,
            "87": 9.57954,
            "88": 9.73247,
            "89": 9.58838,
            "90": 9.80386,
            "91": 9.32104,
            "92": 9.35012,
            "93": 9.06314,
            "94": 8.82007,
            "95": 9.50565,
            "96": 9.51099,
            "97": 9.29311,
            "98": 9.65573,
            "99": 8.87504,
            "100": 9.38812
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1670.0,
            "2": 1691.0,
            "3": 1630.0,
            "4": 1805.0,
            "5": 1970.0,
            "6": 1901.0,
            "7": 1815.0,
            "8": 1592.0,
            "9": 1968.0,
            "10": 1436.0,
            "11": 1923.0,
            "12": 1867.0,
            "13": 1888.0,
            "14": 1807.0,
            "15": 1918.0,
            "16": 1922.0,
            "17": 1774.0,
            "18": 1735.0,
            "19": 1886.0,
            "20": 1786.0,
            "21": 2020.0,
            "22": 1685.0,
            "23": 2112.0,
            "24": 1657.0,
            "25": 1610.0,
            "26": 1815.0,
            "27": 1880.0,
            "28": 2025.0,
            "29": 1975.0,
            "30": 2039.0,
            "31": 1713.0,
            "32": 1926.0,
            "33": 2163.0,
            "34": 1894.0,
            "35": 2001.0,
            "36": 1963.0,
            "37": 2401.0,
            "38": 2324.0,
            "39": 2351.0,
            "40": 2321.0,
            "41": 2266.0,
            "42": 2317.0,
            "43": 1999.0,
            "44": 2133.0,
            "45": 2205.0,
            "46": 2324.0,
            "47": 2463.0,
            "48": 2447.0,
            "49": 2237.0,
            "50": 2365.0,
            "51": 2534.0,
            "52": 2604.0,
            "53": 2995.0,
            "54": 2699.0,
            "55": 2489.0,
            "56": 2680.0,
            "57": 2285.0,
            "58": 2976.0,
            "59": 2816.0,
            "60": 2508.0,
            "61": 3075.0,
            "62": 2710.0,
            "63": 2574.0,
            "64": 3027.0,
            "65": 2719.0,
            "66": 3182.0,
            "67": 2770.0,
            "68": 2875.0,
            "69": 2961.0,
            "70": 3241.0,
            "71": 2859.0,
            "72": 2495.0,
            "73": 2972.0,
            "74": 1989.0,
            "75": 2643.0,
            "76": 3012.0,
            "77": 3398.0,
            "78": 3413.0,
            "79": 3272.0,
            "80": 3368.0,
            "81": 3656.0,
            "82": 3228.0,
            "83": 2772.0,
            "84": 3146.0,
            "85": 3336.0,
            "86": 2738.0,
            "87": 3886.0,
            "88": 3044.0,
            "89": 3429.0,
            "90": 2961.0,
            "91": 2952.0,
            "92": 3239.0,
            "93": 2791.0,
            "94": 3583.0,
            "95": 3533.0,
            "96": 3530.0,
            "97": 3241.0,
            "98": 3680.0,
            "99": 3320.0,
            "100": 3432.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1230390272.0,
            "2": 1230390272.0,
            "3": 1230390272.0,
            "4": 1230390272.0,
            "5": 1230390272.0,
            "6": 1230390272.0,
            "7": 1230390272.0,
            "8": 1230390272.0,
            "9": 1230390272.0,
            "10": 1230390272.0,
            "11": 1230390272.0,
            "12": 1230390272.0,
            "13": 1230390272.0,
            "14": 1230390272.0,
            "15": 1230390272.0,
            "16": 1230390272.0,
            "17": 1230390272.0,
            "18": 1230390272.0,
            "19": 1230390272.0,
            "20": 1230390272.0,
            "21": 1230390272.0,
            "22": 1230390272.0,
            "23": 1230390272.0,
            "24": 1230390272.0,
            "25": 1230390272.0,
            "26": 1230390272.0,
            "27": 1230390272.0,
            "28": 1230390272.0,
            "29": 1230390272.0,
            "30": 1230390272.0,
            "31": 1230390272.0,
            "32": 1230390272.0,
            "33": 1230390272.0,
            "34": 1230390272.0,
            "35": 1230390272.0,
            "36": 1230390272.0,
            "37": 1230390272.0,
            "38": 1230390272.0,
            "39": 1230390272.0,
            "40": 1230390272.0,
            "41": 1230390272.0,
            "42": 1230390272.0,
            "43": 1230390272.0,
            "44": 1230390272.0,
            "45": 1230390272.0,
            "46": 1230390272.0,
            "47": 1230390272.0,
            "48": 1230390272.0,
            "49": 1230390272.0,
            "50": 1230390272.0,
            "51": 1230390272.0,
            "52": 1230390272.0,
            "53": 1230390272.0,
            "54": 1230390272.0,
            "55": 1230390272.0,
            "56": 1230390272.0,
            "57": 1230390272.0,
            "58": 1230390272.0,
            "59": 1230390272.0,
            "60": 1230390272.0,
            "61": 1230390272.0,
            "62": 1230390272.0,
            "63": 1230390272.0,
            "64": 1230390272.0,
            "65": 1230390272.0,
            "66": 1230390272.0,
            "67": 1230390272.0,
            "68": 1230390272.0,
            "69": 1230390272.0,
            "70": 1230390272.0,
            "71": 1230390272.0,
            "72": 1230390272.0,
            "73": 1230390272.0,
            "74": 1230390272.0,
            "75": 1230390272.0,
            "76": 1230390272.0,
            "77": 1230390272.0,
            "78": 1230390272.0,
            "79": 1230390272.0,
            "80": 1230390272.0,
            "81": 1230390272.0,
            "82": 1230390272.0,
            "83": 1230390272.0,
            "84": 1230390272.0,
            "85": 1230390272.0,
            "86": 1230390272.0,
            "87": 1230390272.0,
            "88": 1230390272.0,
            "89": 1230390272.0,
            "90": 1230390272.0,
            "91": 1230390272.0,
            "92": 1230390272.0,
            "93": 1230390272.0,
            "94": 1230390272.0,
            "95": 1230390272.0,
            "96": 1230390272.0,
            "97": 1230390272.0,
            "98": 1230390272.0,
            "99": 1230390272.0,
            "100": 1230390272.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1984492032.0,
            "2": 2531972608.0,
            "3": 2531972608.0,
            "4": 2531972608.0,
            "5": 2531972608.0,
            "6": 2531972608.0,
            "7": 2531972608.0,
            "8": 2531972608.0,
            "9": 2531972608.0,
            "10": 2531972608.0,
            "11": 2531972608.0,
            "12": 2531972608.0,
            "13": 2531972608.0,
            "14": 2531972608.0,
            "15": 2531972608.0,
            "16": 2531972608.0,
            "17": 2531972608.0,
            "18": 2531972608.0,
            "19": 2531972608.0,
            "20": 2531972608.0,
            "21": 2531972608.0,
            "22": 2531972608.0,
            "23": 2531972608.0,
            "24": 2531972608.0,
            "25": 2531972608.0,
            "26": 2531972608.0,
            "27": 2531972608.0,
            "28": 2531972608.0,
            "29": 2531972608.0,
            "30": 2531972608.0,
            "31": 2531972608.0,
            "32": 2531972608.0,
            "33": 2531972608.0,
            "34": 2531972608.0,
            "35": 2531972608.0,
            "36": 2531972608.0,
            "37": 2531972608.0,
            "38": 2531972608.0,
            "39": 2531972608.0,
            "40": 2531972608.0,
            "41": 2531972608.0,
            "42": 2531972608.0,
            "43": 2531972608.0,
            "44": 2531972608.0,
            "45": 2531972608.0,
            "46": 2531972608.0,
            "47": 2531972608.0,
            "48": 2531972608.0,
            "49": 2531972608.0,
            "50": 2531972608.0,
            "51": 2531972608.0,
            "52": 2531972608.0,
            "53": 2531972608.0,
            "54": 2531972608.0,
            "55": 2531972608.0,
            "56": 2531972608.0,
            "57": 2531972608.0,
            "58": 2531972608.0,
            "59": 2531972608.0,
            "60": 2531972608.0,
            "61": 2531972608.0,
            "62": 2531972608.0,
            "63": 2531972608.0,
            "64": 2531972608.0,
            "65": 2531972608.0,
            "66": 2531972608.0,
            "67": 2531972608.0,
            "68": 2531972608.0,
            "69": 2531972608.0,
            "70": 2531972608.0,
            "71": 2531972608.0,
            "72": 2531972608.0,
            "73": 2531972608.0,
            "74": 2531972608.0,
            "75": 2531972608.0,
            "76": 2531972608.0,
            "77": 2531972608.0,
            "78": 2531972608.0,
            "79": 2531972608.0,
            "80": 2531972608.0,
            "81": 2531972608.0,
            "82": 2531972608.0,
            "83": 2531972608.0,
            "84": 2531972608.0,
            "85": 2531972608.0,
            "86": 2531972608.0,
            "87": 2531972608.0,
            "88": 2531972608.0,
            "89": 2531972608.0,
            "90": 2531972608.0,
            "91": 2531972608.0,
            "92": 2531972608.0,
            "93": 2531972608.0,
            "94": 2531972608.0,
            "95": 2531972608.0,
            "96": 2531972608.0,
            "97": 2531972608.0,
            "98": 2531972608.0,
            "99": 2531972608.0,
            "100": 2531972608.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 6.69156,
            "2": 0.15851,
            "3": 0.15939,
            "4": 0.14587,
            "5": 0.13996,
            "6": 0.14246,
            "7": 0.14168,
            "8": 0.13947,
            "9": 0.1406,
            "10": 0.13629,
            "11": 0.38438,
            "12": 0.13502,
            "13": 0.13606,
            "14": 0.14033,
            "15": 0.13443,
            "16": 0.13179,
            "17": 0.13378,
            "18": 0.13167,
            "19": 0.13416,
            "20": 0.134,
            "21": 0.13338,
            "22": 0.13341,
            "23": 0.13463,
            "24": 0.13194,
            "25": 0.13343,
            "26": 0.13151,
            "27": 0.13224,
            "28": 0.13211,
            "29": 0.13154,
            "30": 0.13114,
            "31": 0.13127,
            "32": 0.13156,
            "33": 0.13112,
            "34": 0.13133,
            "35": 0.13254,
            "36": 0.1314,
            "37": 0.13112,
            "38": 0.13159,
            "39": 0.13294,
            "40": 0.1325,
            "41": 0.1311,
            "42": 0.13177,
            "43": 0.13171,
            "44": 0.13171,
            "45": 0.1308,
            "46": 0.13012,
            "47": 0.13104,
            "48": 0.13108,
            "49": 0.13129,
            "50": 0.13155,
            "51": 0.15273,
            "52": 0.1324,
            "53": 0.13236,
            "54": 0.13244,
            "55": 0.13198,
            "56": 0.1336,
            "57": 0.13148,
            "58": 0.13225,
            "59": 0.13123,
            "60": 0.13225,
            "61": 0.13307,
            "62": 0.13259,
            "63": 0.13191,
            "64": 0.13297,
            "65": 0.13243,
            "66": 0.13236,
            "67": 0.1309,
            "68": 0.13226,
            "69": 0.13072,
            "70": 0.13171,
            "71": 0.13137,
            "72": 0.13229,
            "73": 0.13521,
            "74": 0.13296,
            "75": 0.13526,
            "76": 0.13228,
            "77": 0.13205,
            "78": 0.13248,
            "79": 0.13355,
            "80": 0.13311,
            "81": 0.13269,
            "82": 0.13199,
            "83": 0.13576,
            "84": 0.13205,
            "85": 0.13411,
            "86": 0.13176,
            "87": 0.13273,
            "88": 0.13166,
            "89": 0.13262,
            "90": 0.13138,
            "91": 0.13261,
            "92": 0.13197,
            "93": 0.13258,
            "94": 0.13132,
            "95": 0.13295,
            "96": 0.1307,
            "97": 0.13291,
            "98": 0.13163,
            "99": 0.13281,
            "100": 0.13201
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 1
  --recompute-granularity: full
  --recompute-method: uniform
  --recompute-num-layers: 1
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: ckpt-resume


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.84081, "5": 10.84102, "10": 10.80909, "15": 10.80019, "20": 10.70488, "25": 10.53531, "30": 10.35491, "35": 10.27076, "40": 10.07793, "45": 9.82312, "50": 9.90105}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1546.0, "5": 1912.0, "10": 1432.0, "15": 1927.0, "20": 1652.0, "25": 1693.0, "30": 2019.0, "35": 1969.0, "40": 2266.0, "45": 2113.0, "50": 2411.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1232487936.0, "5": 1232487936.0, "10": 1232487936.0, "15": 1232487936.0, "20": 1232487936.0, "25": 1232487936.0, "30": 1232487936.0, "35": 1232487936.0, "40": 1232487936.0, "45": 1232487936.0, "50": 1232487936.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1984492544.0, "5": 2534070272.0, "10": 2534070272.0, "15": 2534070272.0, "20": 2534070272.0, "25": 2534070272.0, "30": 2534070272.0, "35": 2534070272.0, "40": 2534070272.0, "45": 2534070272.0, "50": 2534070272.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 6.23927, "5": 0.13215, "10": 0.13225, "15": 0.13147, "20": 0.13068, "25": 0.13215, "30": 0.13079, "35": 0.13166, "40": 0.12935, "45": 0.13027, "50": 0.13027}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.8401, "5": 10.84032, "10": 10.81341, "15": 10.80278, "20": 10.70496, "25": 10.53846, "30": 10.35517, "35": 10.27147, "40": 10.08044, "45": 9.82292, "50": 9.90113}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1670.0, "5": 1970.0, "10": 1436.0, "15": 1918.0, "20": 1786.0, "25": 1610.0, "30": 2039.0, "35": 2001.0, "40": 2321.0, "45": 2205.0, "50": 2365.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1230390272.0, "5": 1230390272.0, "10": 1230390272.0, "15": 1230390272.0, "20": 1230390272.0, "25": 1230390272.0, "30": 1230390272.0, "35": 1230390272.0, "40": 1230390272.0, "45": 1230390272.0, "50": 1230390272.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1984492032.0, "5": 2531972608.0, "10": 2531972608.0, "15": 2531972608.0, "20": 2531972608.0, "25": 2531972608.0, "30": 2531972608.0, "35": 2531972608.0, "40": 2531972608.0, "45": 2531972608.0, "50": 2531972608.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 8.47553, "5": 0.12815, "10": 0.12892, "15": 0.1292, "20": 0.12844, "25": 0.12762, "30": 0.1277, "35": 0.12725, "40": 0.12759, "45": 0.12625, "50": 0.12714}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_uniform_full_recompute/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 1
  --recompute-granularity: full
  --recompute-method: uniform
  --recompute-num-layers: 1
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-optim-fully-reshardable: true
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82974, "5": 10.84387, "10": 10.79336, "15": 10.77992, "20": 10.67707, "25": 10.48581, "30": 10.28464, "35": 10.18863, "40": 9.99275, "45": 9.72154, "50": 9.82122}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 214.0, "5": 270.0, "10": 224.0, "15": 235.0, "20": 242.0, "25": 260.0, "30": 280.0, "35": 300.0, "40": 334.0, "45": 324.0, "50": 298.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 829378048.0, "5": 829378048.0, "10": 829378048.0, "15": 829378048.0, "20": 829378048.0, "25": 829378048.0, "30": 829378048.0, "35": 829378048.0, "40": 829378048.0, "45": 829378048.0, "50": 829378048.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 891564544.0, "5": 1248933376.0, "10": 1250505728.0, "15": 1250505728.0, "20": 1250505728.0, "25": 1250505728.0, "30": 1250505728.0, "35": 1250505728.0, "40": 1250505728.0, "45": 1250505728.0, "50": 1250505728.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 20.58657, "5": 0.44565, "10": 0.45716, "15": 0.50953, "20": 0.44872, "25": 0.44791, "30": 0.44871, "35": 0.44188, "40": 0.44233, "45": 0.44161, "50": 0.44069}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82975, "5": 10.8439, "10": 10.79337, "15": 10.77994, "20": 10.67712, "25": 10.48584, "30": 10.28468, "35": 10.18859, "40": 9.99279, "45": 9.72153, "50": 9.82127}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 226.0, "5": 275.0, "10": 181.0, "15": 253.0, "20": 248.0, "25": 207.0, "30": 265.0, "35": 281.0, "40": 315.0, "45": 282.0, "50": 336.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 831212544.0, "5": 831212544.0, "10": 831212544.0, "15": 831212544.0, "20": 831212544.0, "25": 831212544.0, "30": 831212544.0, "35": 831212544.0, "40": 831212544.0, "45": 831212544.0, "50": 831212544.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 891582464.0, "5": 1250786304.0, "10": 1250786304.0, "15": 1250786304.0, "20": 1250786304.0, "25": 1250786304.0, "30": 1250786304.0, "35": 1251833856.0, "40": 1251833856.0, "45": 1251833856.0, "50": 1251833856.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 20.1181, "5": 0.47795, "10": 0.47291, "15": 0.48167, "20": 0.412, "25": 0.41115, "30": 0.41145, "35": 0.41136, "40": 0.41095, "45": 0.40816, "50": 0.42667}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_cp4_a2a_p2p_nondeterministic/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 2
  --context-parallel-size: 4
  --cp-comm-type: a2a+p2p
  --hierarchical-context-parallel-sizes: 2 2
  --sequence-parallel: true
  --hidden-dropout: 0.0
  --attention-dropout: 0.0
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: flash
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.82974, "5": 10.84387, "10": 10.79336, "15": 10.77992, "20": 10.67707, "25": 10.48581, "30": 10.28464, "35": 10.18863, "40": 9.99275, "45": 9.72154, "50": 9.82122, "55": 9.79605, "60": 9.41615, "65": 8.85917, "70": 9.67001, "75": 9.3564, "80": 9.34748, "85": 9.55946, "90": 9.77362, "95": 9.47863, "100": 9.35146}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 214.0, "5": 270.0, "10": 224.0, "15": 235.0, "20": 242.0, "25": 260.0, "30": 280.0, "35": 300.0, "40": 334.0, "45": 324.0, "50": 298.0, "55": 390.0, "60": 342.0, "65": 394.0, "70": 411.0, "75": 319.0, "80": 414.0, "85": 441.0, "90": 381.0, "95": 398.0, "100": 431.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 829378048.0, "5": 829378048.0, "10": 829378048.0, "15": 829378048.0, "20": 829378048.0, "25": 829378048.0, "30": 829378048.0, "35": 829378048.0, "40": 829378048.0, "45": 829378048.0, "50": 829378048.0, "55": 829378048.0, "60": 829378048.0, "65": 829378048.0, "70": 829378048.0, "75": 829378048.0, "80": 829378048.0, "85": 829378048.0, "90": 829378048.0, "95": 829378048.0, "100": 829378048.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 892610560.0, "5": 1248933376.0, "10": 1248933376.0, "15": 1248933376.0, "20": 1248933376.0, "25": 1248933376.0, "30": 1248933376.0, "35": 1249456128.0, "40": 1249456128.0, "45": 1249456128.0, "50": 1249980928.0, "55": 1249980928.0, "60": 1249980928.0, "65": 1249980928.0, "70": 1249980928.0, "75": 1250504192.0, "80": 1250504192.0, "85": 1250504192.0, "90": 1250505728.0, "95": 1250505728.0, "100": 1250505728.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 24.66296, "5": 0.45069, "10": 0.44192, "15": 0.44436, "20": 0.442, "25": 0.44288, "30": 0.44618, "35": 0.44139, "40": 0.44072, "45": 0.44429, "50": 0.43893, "55": 0.43569, "60": 0.43551, "65": 0.43912, "70": 0.44568, "75": 0.44023, "80": 0.43745, "85": 0.43617, "90": 0.43925, "95": 0.43653, "100": 0.43561}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.82974, "5": 10.8439, "10": 10.79337, "15": 10.77994, "20": 10.67712, "25": 10.48584, "30": 10.28468, "35": 10.18859, "40": 9.99279, "45": 9.72153, "50": 9.82127, "55": 9.79611, "60": 9.41616, "65": 8.85917, "70": 9.67002, "75": 9.35641, "80": 9.34751, "85": 9.55947, "90": 9.77367, "95": 9.47865, "100": 9.35145}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 226.0, "5": 275.0, "10": 181.0, "15": 253.0, "20": 248.0, "25": 207.0, "30": 265.0, "35": 281.0, "40": 315.0, "45": 282.0, "50": 336.0, "55": 373.0, "60": 343.0, "65": 389.0, "70": 436.0, "75": 337.0, "80": 395.0, "85": 419.0, "90": 412.0, "95": 405.0, "100": 394.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 831212544.0, "5": 831212544.0, "10": 831212544.0, "15": 831212544.0, "20": 831212544.0, "25": 831212544.0, "30": 831212544.0, "35": 831212544.0, "40": 831212544.0, "45": 831212544.0, "50": 831212544.0, "55": 831212544.0, "60": 831212544.0, "65": 831212544.0, "70": 831212544.0, "75": 831212544.0, "80": 831212544.0, "85": 831212544.0, "90": 831212544.0, "95": 831212544.0, "100": 831212544.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 891582464.0, "5": 1250786304.0, "10": 1250786304.0, "15": 1250786304.0, "20": 1251832832.0, "25": 1251832832.0, "30": 1251832832.0, "35": 1251832832.0, "40": 1251833344.0, "45": 1251833344.0, "50": 1251833344.0, "55": 1251834880.0, "60": 1251834880.0, "65": 1251834880.0, "70": 1251834880.0, "75": 1251834880.0, "80": 1251834880.0, "85": 1251834880.0, "90": 1251834880.0, "95": 1251834880.0, "100": 1251834880.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 13.35774, "5": 0.41339, "10": 0.40364, "15": 0.4021, "20": 0.40412, "25": 0.40129, "30": 0.40039, "35": 0.4007, "40": 0.39722, "45": 0.40898, "50": 0.40589, "55": 0.46769, "60": 0.46488, "65": 0.45451, "70": 0.41438, "75": 0.41255, "80": 0.41126, "85": 0.41046, "90": 0.4189, "95": 0.4149, "100": 0.41491}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_cp4_a2a_p2p_nondeterministic/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 1
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 2
  --context-parallel-size: 4
  --cp-comm-type: a2a+p2p
  --hierarchical-context-parallel-sizes: "[2 2]"
  --sequence-parallel: true
  --hidden-dropout: 0.0
  --attention-dropout: 0.0
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: flash
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: ckpt-resume


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.85052, "5": 10.86418, "10": 10.82554, "15": 10.81471, "20": 10.72092, "25": 10.53117, "30": 10.33839, "35": 10.24217, "40": 10.05215, "45": 9.76678, "50": 9.85504, "55": 9.82383, "60": 9.44305, "65": 8.89174, "70": 9.67964, "75": 9.36791, "80": 9.3579, "85": 9.56115, "90": 9.77138, "95": 9.48122, "100": 9.35039}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1659.0, "5": 1923.0, "10": 1613.0, "15": 2007.0, "20": 1746.0, "25": 1725.0, "30": 2034.0, "35": 2106.0, "40": 2118.0, "45": 2283.0, "50": 2224.0, "55": 2439.0, "60": 2468.0, "65": 2681.0, "70": 3129.0, "75": 2953.0, "80": 3095.0, "85": 3411.0, "90": 3097.0, "95": 3137.0, "100": 3354.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 892293120.0, "5": 892293120.0, "10": 892293120.0, "15": 892293120.0, "20": 892293120.0, "25": 892293120.0, "30": 892293120.0, "35": 892293120.0, "40": 892293120.0, "45": 892293120.0, "50": 892293120.0, "55": 892293120.0, "60": 892293120.0, "65": 892293120.0, "70": 892293120.0, "75": 892293120.0, "80": 892293120.0, "85": 892293120.0, "90": 892293120.0, "95": 892293120.0, "100": 892293120.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2233004032.0, "5": 2595615744.0, "10": 2595615744.0, "15": 2595615744.0, "20": 2595615744.0, "25": 2595615744.0, "30": 2595615744.0, "35": 2595615744.0, "40": 2595615744.0, "45": 2595615744.0, "50": 2595615744.0, "55": 2595615744.0, "60": 2595615744.0, "65": 2595615744.0, "70": 2595615744.0, "75": 2595615744.0, "80": 2595615744.0, "85": 2595615744.0, "90": 2595615744.0, "95": 2595615744.0, "100": 2595615744.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 5.85207, "5": 0.16442, "10": 0.16111, "15": 0.16151, "20": 0.16372, "25": 0.16149, "30": 0.16045, "35": 0.15853, "40": 0.15793, "45": 0.15955, "50": 0.1585, "55": 0.15945, "60": 0.1592, "65": 0.1599, "70": 0.15924, "75": 0.15727, "80": 0.15944, "85": 0.15926, "90": 0.15846, "95": 0.15851, "100": 0.15885}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.77012,
            "2": 10.78244,
            "3": 10.77833,
            "4": 10.75145,
            "5": 10.80955,
            "6": 10.8223,
            "7": 10.80193,
            "8": 10.78868,
            "9": 10.79503,
            "10": 10.71341,
            "11": 10.85003,
            "12": 10.80071,
            "13": 10.8263,
            "14": 10.84293,
            "15": 10.7559,
            "16": 10.75248,
            "17": 10.70854,
            "18": 10.74761,
            "19": 10.74709,
            "20": 10.64388,
            "21": 10.60456,
            "22": 10.43295,
            "23": 10.66573,
            "24": 10.50049,
            "25": 10.43605,
            "26": 10.51463,
            "27": 10.54136,
            "28": 10.51359,
            "29": 10.53716,
            "30": 10.25964,
            "31": 9.97634,
            "32": 10.39958,
            "33": 10.38607,
            "34": 10.11016,
            "35": 10.1741,
            "36": 10.11553,
            "37": 10.26008,
            "38": 10.07462,
            "39": 10.32873,
            "40": 9.96852,
            "41": 10.05099,
            "42": 10.12726,
            "43": 9.70798,
            "44": 9.83287,
            "45": 9.70538,
            "46": 9.7134,
            "47": 10.05872,
            "48": 9.74565,
            "49": 9.40522,
            "50": 9.80891,
            "51": 9.76757,
            "52": 9.64732,
            "53": 9.995,
            "54": 9.88603,
            "55": 9.81763,
            "56": 9.53914,
            "57": 9.38192,
            "58": 9.75896,
            "59": 9.52106,
            "60": 9.42443,
            "61": 9.63665,
            "62": 9.92974,
            "63": 9.29595,
            "64": 9.70631,
            "65": 8.88066,
            "66": 9.64072,
            "67": 9.32146,
            "68": 9.73692,
            "69": 9.75346,
            "70": 9.68289,
            "71": 9.58117,
            "72": 9.52491,
            "73": 9.44094,
            "74": 8.86077,
            "75": 9.36671,
            "76": 9.01691,
            "77": 10.02224,
            "78": 9.68354,
            "79": 9.33325,
            "80": 9.3582,
            "81": 9.43786,
            "82": 9.66102,
            "83": 9.26223,
            "84": 9.37189,
            "85": 9.56652,
            "86": 9.04493,
            "87": 9.5575,
            "88": 9.70541,
            "89": 9.55092,
            "90": 9.79196,
            "91": 9.29173,
            "92": 9.31225,
            "93": 9.0433,
            "94": 8.78683,
            "95": 9.49525,
            "96": 9.48391,
            "97": 9.25966,
            "98": 9.62611,
            "99": 8.85031,
            "100": 9.36043
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1660.0,
            "2": 1892.0,
            "3": 1805.0,
            "4": 1861.0,
            "5": 2134.0,
            "6": 1964.0,
            "7": 2077.0,
            "8": 1755.0,
            "9": 1942.0,
            "10": 1516.0,
            "11": 1981.0,
            "12": 1962.0,
            "13": 2092.0,
            "14": 1940.0,
            "15": 2030.0,
            "16": 1975.0,
            "17": 2081.0,
            "18": 1925.0,
            "19": 1890.0,
            "20": 1806.0,
            "21": 1992.0,
            "22": 1833.0,
            "23": 2082.0,
            "24": 1806.0,
            "25": 1834.0,
            "26": 1935.0,
            "27": 1987.0,
            "28": 2157.0,
            "29": 2045.0,
            "30": 1959.0,
            "31": 1733.0,
            "32": 2011.0,
            "33": 2149.0,
            "34": 2014.0,
            "35": 2131.0,
            "36": 2027.0,
            "37": 2337.0,
            "38": 2210.0,
            "39": 2454.0,
            "40": 2335.0,
            "41": 2379.0,
            "42": 2359.0,
            "43": 2101.0,
            "44": 2280.0,
            "45": 2138.0,
            "46": 2297.0,
            "47": 2454.0,
            "48": 2586.0,
            "49": 2213.0,
            "50": 2414.0,
            "51": 2613.0,
            "52": 2647.0,
            "53": 2908.0,
            "54": 2580.0,
            "55": 2486.0,
            "56": 2687.0,
            "57": 2577.0,
            "58": 2824.0,
            "59": 2720.0,
            "60": 2410.0,
            "61": 2744.0,
            "62": 2536.0,
            "63": 2652.0,
            "64": 2918.0,
            "65": 2742.0,
            "66": 2927.0,
            "67": 2920.0,
            "68": 2652.0,
            "69": 3019.0,
            "70": 2996.0,
            "71": 2835.0,
            "72": 2664.0,
            "73": 3211.0,
            "74": 2311.0,
            "75": 2658.0,
            "76": 3155.0,
            "77": 3051.0,
            "78": 3073.0,
            "79": 3116.0,
            "80": 3191.0,
            "81": 3237.0,
            "82": 3218.0,
            "83": 2689.0,
            "84": 3294.0,
            "85": 3209.0,
            "86": 2558.0,
            "87": 3462.0,
            "88": 3287.0,
            "89": 3201.0,
            "90": 3331.0,
            "91": 3183.0,
            "92": 3201.0,
            "93": 2942.0,
            "94": 3274.0,
            "95": 3132.0,
            "96": 3200.0,
            "97": 3054.0,
            "98": 3544.0,
            "99": 3387.0,
            "100": 3192.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 917459968.0,
            "2": 917459968.0,
            "3": 917459968.0,
            "4": 917459968.0,
            "5": 917459968.0,
            "6": 917459968.0,
            "7": 917459968.0,
            "8": 917459968.0,
            "9": 917459968.0,
            "10": 917459968.0,
            "11": 917459968.0,
            "12": 917459968.0,
            "13": 917459968.0,
            "14": 917459968.0,
            "15": 917459968.0,
            "16": 917459968.0,
            "17": 917459968.0,
            "18": 917459968.0,
            "19": 917459968.0,
            "20": 917459968.0,
            "21": 917459968.0,
            "22": 917459968.0,
            "23": 917459968.0,
            "24": 917459968.0,
            "25": 917459968.0,
            "26": 917459968.0,
            "27": 917459968.0,
            "28": 917459968.0,
            "29": 917459968.0,
            "30": 917459968.0,
            "31": 917459968.0,
            "32": 917459968.0,
            "33": 917459968.0,
            "34": 917459968.0,
            "35": 917459968.0,
            "36": 917459968.0,
            "37": 917459968.0,
            "38": 917459968.0,
            "39": 917459968.0,
            "40": 917459968.0,
            "41": 917459968.0,
            "42": 917459968.0,
            "43": 917459968.0,
            "44": 917459968.0,
            "45": 917459968.0,
            "46": 917459968.0,
            "47": 917459968.0,
            "48": 917459968.0,
            "49": 917459968.0,
            "50": 917459968.0,
            "51": 917459968.0,
            "52": 917459968.0,
            "53": 917459968.0,
            "54": 917459968.0,
            "55": 917459968.0,
            "56": 917459968.0,
            "57": 917459968.0,
            "58": 917459968.0,
            "59": 917459968.0,
            "60": 917459968.0,
            "61": 917459968.0,
            "62": 917459968.0,
            "63": 917459968.0,
            "64": 917459968.0,
            "65": 917459968.0,
            "66": 917459968.0,
            "67": 917459968.0,
            "68": 917459968.0,
            "69": 917459968.0,
            "70": 917459968.0,
            "71": 917459968.0,
            "72": 917459968.0,
            "73": 917459968.0,
            "74": 917459968.0,
            "75": 917459968.0,
            "76": 917459968.0,
            "77": 917459968.0,
            "78": 917459968.0,
            "79": 917459968.0,
            "80": 917459968.0,
            "81": 917459968.0,
            "82": 917459968.0,
            "83": 917459968.0,
            "84": 917459968.0,
            "85": 917459968.0,
            "86": 917459968.0,
            "87": 917459968.0,
            "88": 917459968.0,
            "89": 917459968.0,
            "90": 917459968.0,
            "91": 917459968.0,
            "92": 917459968.0,
            "93": 917459968.0,
            "94": 917459968.0,
            "95": 917459968.0,
            "96": 917459968.0,
            "97": 917459968.0,
            "98": 917459968.0,
            "99": 917459968.0,
            "100": 917459968.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2262889472.0,
            "2": 2621306880.0,
            "3": 2621306880.0,
            "4": 2621306880.0,
            "5": 2621306880.0,
            "6": 2621306880.0,
            "7": 2621306880.0,
            "8": 2621306880.0,
            "9": 2621306880.0,
            "10": 2621306880.0,
            "11": 2621306880.0,
            "12": 2621306880.0,
            "13": 2621306880.0,
            "14": 2621306880.0,
            "15": 2621306880.0,
            "16": 2621306880.0,
            "17": 2621306880.0,
            "18": 2621306880.0,
            "19": 2621306880.0,
            "20": 2621306880.0,
            "21": 2621306880.0,
            "22": 2621306880.0,
            "23": 2621306880.0,
            "24": 2621306880.0,
            "25": 2621306880.0,
            "26": 2621306880.0,
            "27": 2621306880.0,
            "28": 2621306880.0,
            "29": 2621306880.0,
            "30": 2621306880.0,
            "31": 2621306880.0,
            "32": 2621306880.0,
            "33": 2621306880.0,
            "34": 2621306880.0,
            "35": 2621306880.0,
            "36": 2621306880.0,
            "37": 2621306880.0,
            "38": 2621306880.0,
            "39": 2621306880.0,
            "40": 2621306880.0,
            "41": 2621306880.0,
            "42": 2621306880.0,
            "43": 2621306880.0,
            "44": 2621306880.0,
            "45": 2621306880.0,
            "46": 2621306880.0,
            "47": 2621306880.0,
            "48": 2621306880.0,
            "49": 2621306880.0,
            "50": 2621306880.0,
            "51": 2621306880.0,
            "52": 2621306880.0,
            "53": 2621306880.0,
            "54": 2621306880.0,
            "55": 2621306880.0,
            "56": 2621306880.0,
            "57": 2621306880.0,
            "58": 2621306880.0,
            "59": 2621306880.0,
            "60": 2621306880.0,
            "61": 2621306880.0,
            "62": 2621306880.0,
            "63": 2621306880.0,
            "64": 2621306880.0,
            "65": 2621306880.0,
            "66": 2621306880.0,
            "67": 2621306880.0,
            "68": 2621306880.0,
            "69": 2621306880.0,
            "70": 2621306880.0,
            "71": 2621306880.0,
            "72": 2621306880.0,
            "73": 2621306880.0,
            "74": 2621306880.0,
            "75": 2621306880.0,
            "76": 2621306880.0,
            "77": 2621306880.0,
            "78": 2621306880.0,
            "79": 2621306880.0,
            "80": 2621306880.0,
            "81": 2621306880.0,
            "82": 2621306880.0,
            "83": 2621306880.0,
            "84": 2621306880.0,
            "85": 2621306880.0,
            "86": 2621306880.0,
            "87": 2621306880.0,
            "88": 2621306880.0,
            "89": 2621306880.0,
            "90": 2621306880.0,
            "91": 2621306880.0,
            "92": 2621306880.0,
            "93": 2621306880.0,
            "94": 2621306880.0,
            "95": 2621306880.0,
            "96": 2621306880.0,
            "97": 2621306880.0,
            "98": 2621306880.0,
            "99": 2621306880.0,
            "100": 2621306880.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 2.22064,
            "3": 0.13024,
            "4": 0.11768,
            "5": 0.11875,
            "6": 0.11742,
            "7": 0.11821,
            "8": 0.11878,
            "9": 0.11922,
            "10": 0.11834,
            "11": 0.11707,
            "12": 0.1171,
            "13": 0.11874,
            "14": 0.12245,
            "15": 0.11821,
            "16": 0.1177,
            "17": 0.11857,
            "18": 0.11778,
            "19": 0.1187,
            "20": 0.11835,
            "21": 0.12351,
            "22": 0.11771,
            "23": 0.11773,
            "24": 0.11819,
            "25": 0.11705,
            "26": 0.12602,
            "27": 0.12585,
            "28": 0.12677,
            "29": 0.12752,
            "30": 0.12847,
            "31": 0.12883,
            "32": 0.12586,
            "33": 0.12437,
            "34": 0.12277,
            "35": 0.12212,
            "36": 0.12255,
            "37": 0.12084,
            "38": 0.12104,
            "39": 0.12124,
            "40": 0.12086,
            "41": 0.12101,
            "42": 0.11969,
            "43": 0.1197,
            "44": 0.11956,
            "45": 0.11977,
            "46": 0.11865,
            "47": 0.11795,
            "48": 0.11928,
            "49": 0.11794,
            "50": 0.11851,
            "51": 0.12726,
            "52": 0.11929,
            "53": 0.11813,
            "54": 0.11818,
            "55": 0.11748,
            "56": 0.11707,
            "57": 0.11896,
            "58": 0.11832,
            "59": 0.11799,
            "60": 0.11784,
            "61": 0.11888,
            "62": 0.11879,
            "63": 0.11819,
            "64": 0.1185,
            "65": 0.11926,
            "66": 0.11924,
            "67": 0.11982,
            "68": 0.11873,
            "69": 0.11986,
            "70": 0.11895,
            "71": 0.11964,
            "72": 0.11906,
            "73": 0.12017,
            "74": 0.11976,
            "75": 0.11759,
            "76": 0.11921,
            "77": 0.11907,
            "78": 0.11823,
            "79": 0.11867,
            "80": 0.11934,
            "81": 0.11888,
            "82": 0.11988,
            "83": 0.1213,
            "84": 0.11913,
            "85": 0.12002,
            "86": 0.12046,
            "87": 0.11952,
            "88": 0.11819,
            "89": 0.11901,
            "90": 0.11918,
            "91": 0.11919,
            "92": 0.11824,
            "93": 0.12018,
            "94": 0.11929,
            "95": 0.11974,
            "96": 0.11767,
            "97": 0.11845,
            "98": 0.11695,
            "99": 0.11892,
            "100": 0.11948
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_gb200_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.76757,
            "52": 9.64732,
            "53": 9.995,
            "54": 9.88603,
            "55": 9.81763,
            "56": 9.53914,
            "57": 9.38192,
            "58": 9.75896,
            "59": 9.52106,
            "60": 9.42443,
            "61": 9.63665,
            "62": 9.92974,
            "63": 9.29595,
            "64": 9.70631,
            "65": 8.88066,
            "66": 9.64072,
            "67": 9.32146,
            "68": 9.73692,
            "69": 9.75346,
            "70": 9.68289,
            "71": 9.58117,
            "72": 9.52491,
            "73": 9.44094,
            "74": 8.86077,
            "75": 9.36671,
            "76": 9.01691,
            "77": 10.02224,
            "78": 9.68354,
            "79": 9.33325,
            "80": 9.3582,
            "81": 9.43786,
            "82": 9.66102,
            "83": 9.26223,
            "84": 9.37189,
            "85": 9.56652,
            "86": 9.04493,
            "87": 9.5575,
            "88": 9.70541,
            "89": 9.55092,
            "90": 9.79196,
            "91": 9.29173,
            "92": 9.31225,
            "93": 9.0433,
            "94": 8.78683,
            "95": 9.49525,
            "96": 9.48391,
            "97": 9.25966,
            "98": 9.62611,
            "99": 8.85031,
            "100": 9.36043
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2613.0,
            "52": 2647.0,
            "53": 2908.0,
            "54": 2580.0,
            "55": 2486.0,
            "56": 2687.0,
            "57": 2577.0,
            "58": 2824.0,
            "59": 2720.0,
            "60": 2410.0,
            "61": 2744.0,
            "62": 2536.0,
            "63": 2652.0,
            "64": 2918.0,
            "65": 2742.0,
            "66": 2927.0,
            "67": 2920.0,
            "68": 2652.0,
            "69": 3019.0,
            "70": 2996.0,
            "71": 2835.0,
            "72": 2664.0,
            "73": 3211.0,
            "74": 2311.0,
            "75": 2658.0,
            "76": 3155.0,
            "77": 3051.0,
            "78": 3073.0,
            "79": 3116.0,
            "80": 3191.0,
            "81": 3237.0,
            "82": 3218.0,
            "83": 2689.0,
            "84": 3294.0,
            "85": 3209.0,
            "86": 2558.0,
            "87": 3462.0,
            "88": 3287.0,
            "89": 3201.0,
            "90": 3331.0,
            "91": 3183.0,
            "92": 3201.0,
            "93": 2942.0,
            "94": 3274.0,
            "95": 3132.0,
            "96": 3200.0,
            "97": 3054.0,
            "98": 3544.0,
            "99": 3387.0,
            "100": 3192.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 917459968.0,
            "52": 917459968.0,
            "53": 917459968.0,
            "54": 917459968.0,
            "55": 917459968.0,
            "56": 917459968.0,
            "57": 917459968.0,
            "58": 917459968.0,
            "59": 917459968.0,
            "60": 917459968.0,
            "61": 917459968.0,
            "62": 917459968.0,
            "63": 917459968.0,
            "64": 917459968.0,
            "65": 917459968.0,
            "66": 917459968.0,
            "67": 917459968.0,
            "68": 917459968.0,
            "69": 917459968.0,
            "70": 917459968.0,
            "71": 917459968.0,
            "72": 917459968.0,
            "73": 917459968.0,
            "74": 917459968.0,
            "75": 917459968.0,
            "76": 917459968.0,
            "77": 917459968.0,
            "78": 917459968.0,
            "79": 917459968.0,
            "80": 917459968.0,
            "81": 917459968.0,
            "82": 917459968.0,
            "83": 917459968.0,
            "84": 917459968.0,
            "85": 917459968.0,
            "86": 917459968.0,
            "87": 917459968.0,
            "88": 917459968.0,
            "89": 917459968.0,
            "90": 917459968.0,
            "91": 917459968.0,
            "92": 917459968.0,
            "93": 917459968.0,
            "94": 917459968.0,
            "95": 917459968.0,
            "96": 917459968.0,
            "97": 917459968.0,
            "98": 917459968.0,
            "99": 917459968.0,
            "100": 917459968.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2622354432.0,
            "52": 2622355456.0,
            "53": 2622355456.0,
            "54": 2622355456.0,
            "55": 2622355456.0,
            "56": 2622355456.0,
            "57": 2622355456.0,
            "58": 2622355456.0,
            "59": 2622355456.0,
            "60": 2622355456.0,
            "61": 2622355456.0,
            "62": 2622355456.0,
            "63": 2622355456.0,
            "64": 2622355456.0,
            "65": 2622355456.0,
            "66": 2622355456.0,
            "67": 2622355456.0,
            "68": 2622355456.0,
            "69": 2622355456.0,
            "70": 2622355456.0,
            "71": 2622355456.0,
            "72": 2622355456.0,
            "73": 2622355456.0,
            "74": 2622355456.0,
            "75": 2622355456.0,
            "76": 2622355456.0,
            "77": 2622355456.0,
            "78": 2622355456.0,
            "79": 2622355456.0,
            "80": 2622355456.0,
            "81": 2622355456.0,
            "82": 2622355456.0,
            "83": 2622355456.0,
            "84": 2622355456.0,
            "85": 2622355456.0,
            "86": 2622355456.0,
            "87": 2622355456.0,
            "88": 2622355456.0,
            "89": 2622355456.0,
            "90": 2622355456.0,
            "91": 2622355456.0,
            "92": 2622355456.0,
            "93": 2622355456.0,
            "94": 2622355456.0,
            "95": 2622355456.0,
            "96": 2622355456.0,
            "97": 2622355456.0,
            "98": 2622355456.0,
            "99": 2622355456.0,
            "100": 2622355456.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": "nan",
            "52": 2.13245,
            "53": 0.1297,
            "54": 0.11767,
            "55": 0.11927,
            "56": 0.12061,
            "57": 0.12305,
            "58": 0.12043,
            "59": 0.11822,
            "60": 0.11725,
            "61": 0.11813,
            "62": 0.11746,
            "63": 0.11736,
            "64": 0.11897,
            "65": 0.12036,
            "66": 0.11746,
            "67": 0.11937,
            "68": 0.11862,
            "69": 0.11914,
            "70": 0.11949,
            "71": 0.11638,
            "72": 0.11794,
            "73": 0.11866,
            "74": 0.11751,
            "75": 0.11637,
            "76": 0.11834,
            "77": 0.11768,
            "78": 0.11854,
            "79": 0.11727,
            "80": 0.11732,
            "81": 0.11811,
            "82": 0.11878,
            "83": 0.11805,
            "84": 0.11921,
            "85": 0.11932,
            "86": 0.11908,
            "87": 0.12476,
            "88": 0.12628,
            "89": 0.12876,
            "90": 0.12617,
            "91": 0.12743,
            "92": 0.12783,
            "93": 0.12812,
            "94": 0.12493,
            "95": 0.12584,
            "96": 0.12791,
            "97": 0.12455,
            "98": 0.1269,
            "99": 0.12715,
            "100": 0.12714
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.85787,
            "2": 10.87336,
            "3": 10.86822,
            "4": 10.87256,
            "5": 10.87397,
            "6": 10.89632,
            "7": 10.86383,
            "8": 10.87835,
            "9": 10.87398,
            "10": 10.83714,
            "11": 10.86989,
            "12": 10.85948,
            "13": 10.87777,
            "14": 10.87922,
            "15": 10.81888,
            "16": 10.83062,
            "17": 10.78686,
            "18": 10.80152,
            "19": 10.79779,
            "20": 10.7116,
            "21": 10.68645,
            "22": 10.5528,
            "23": 10.70142,
            "24": 10.58526,
            "25": 10.52658,
            "26": 10.58297,
            "27": 10.59488,
            "28": 10.54788,
            "29": 10.55927,
            "30": 10.32815,
            "31": 10.08271,
            "32": 10.44699,
            "33": 10.42754,
            "34": 10.17927,
            "35": 10.24095,
            "36": 10.18089,
            "37": 10.32812,
            "38": 10.16729,
            "39": 10.37344,
            "40": 10.05077,
            "41": 10.10731,
            "42": 10.17806,
            "43": 9.77848,
            "44": 9.91208,
            "45": 9.77394,
            "46": 9.7543,
            "47": 10.09494,
            "48": 9.79522,
            "49": 9.4639,
            "50": 9.86732,
            "51": 9.80376,
            "52": 9.68203,
            "53": 10.02345,
            "54": 9.91631,
            "55": 9.82458,
            "56": 9.56976,
            "57": 9.42674,
            "58": 9.78082,
            "59": 9.53247,
            "60": 9.44592,
            "61": 9.64255,
            "62": 9.94291,
            "63": 9.31767,
            "64": 9.7255,
            "65": 8.88739,
            "66": 9.65691,
            "67": 9.31749,
            "68": 9.73494,
            "69": 9.74868,
            "70": 9.69627,
            "71": 9.57684,
            "72": 9.52424,
            "73": 9.45598,
            "74": 8.88269,
            "75": 9.37587,
            "76": 9.01137,
            "77": 10.0229,
            "78": 9.67961,
            "79": 9.33169,
            "80": 9.35831,
            "81": 9.43394,
            "82": 9.65057,
            "83": 9.25502,
            "84": 9.37136,
            "85": 9.56232,
            "86": 9.03489,
            "87": 9.54613,
            "88": 9.69784,
            "89": 9.54653,
            "90": 9.77622,
            "91": 9.28841,
            "92": 9.30664,
            "93": 9.02649,
            "94": 8.78837,
            "95": 9.48026,
            "96": 9.47969,
            "97": 9.25611,
            "98": 9.6195,
            "99": 8.83827,
            "100": 9.35136
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1848.0,
            "2": 1849.0,
            "3": 1746.0,
            "4": 1950.0,
            "5": 2031.0,
            "6": 1975.0,
            "7": 1943.0,
            "8": 1883.0,
            "9": 1996.0,
            "10": 1630.0,
            "11": 2060.0,
            "12": 1912.0,
            "13": 2031.0,
            "14": 1956.0,
            "15": 1992.0,
            "16": 1950.0,
            "17": 1903.0,
            "18": 1924.0,
            "19": 1905.0,
            "20": 1757.0,
            "21": 1983.0,
            "22": 1985.0,
            "23": 2111.0,
            "24": 1849.0,
            "25": 1871.0,
            "26": 1789.0,
            "27": 1887.0,
            "28": 1973.0,
            "29": 2061.0,
            "30": 2091.0,
            "31": 1643.0,
            "32": 2165.0,
            "33": 2209.0,
            "34": 2005.0,
            "35": 2027.0,
            "36": 2063.0,
            "37": 2374.0,
            "38": 2253.0,
            "39": 2367.0,
            "40": 2182.0,
            "41": 2373.0,
            "42": 2272.0,
            "43": 2154.0,
            "44": 2274.0,
            "45": 2085.0,
            "46": 2228.0,
            "47": 2346.0,
            "48": 2391.0,
            "49": 2150.0,
            "50": 2221.0,
            "51": 2454.0,
            "52": 2520.0,
            "53": 2859.0,
            "54": 2631.0,
            "55": 2425.0,
            "56": 2453.0,
            "57": 2570.0,
            "58": 2639.0,
            "59": 2694.0,
            "60": 2549.0,
            "61": 2746.0,
            "62": 2590.0,
            "63": 2543.0,
            "64": 3028.0,
            "65": 2591.0,
            "66": 2852.0,
            "67": 3044.0,
            "68": 2822.0,
            "69": 2829.0,
            "70": 2980.0,
            "71": 2878.0,
            "72": 2674.0,
            "73": 2920.0,
            "74": 2280.0,
            "75": 2702.0,
            "76": 3061.0,
            "77": 3096.0,
            "78": 3149.0,
            "79": 3172.0,
            "80": 3033.0,
            "81": 3366.0,
            "82": 3265.0,
            "83": 2807.0,
            "84": 3281.0,
            "85": 3266.0,
            "86": 2661.0,
            "87": 3453.0,
            "88": 3202.0,
            "89": 3009.0,
            "90": 3259.0,
            "91": 3051.0,
            "92": 3160.0,
            "93": 2954.0,
            "94": 3471.0,
            "95": 3123.0,
            "96": 3225.0,
            "97": 3116.0,
            "98": 3551.0,
            "99": 3291.0,
            "100": 3140.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 917459968.0,
            "2": 917459968.0,
            "3": 917459968.0,
            "4": 917459968.0,
            "5": 917459968.0,
            "6": 917459968.0,
            "7": 917459968.0,
            "8": 917459968.0,
            "9": 917459968.0,
            "10": 917459968.0,
            "11": 917459968.0,
            "12": 917459968.0,
            "13": 917459968.0,
            "14": 917459968.0,
            "15": 917459968.0,
            "16": 917459968.0,
            "17": 917459968.0,
            "18": 917459968.0,
            "19": 917459968.0,
            "20": 917459968.0,
            "21": 917459968.0,
            "22": 917459968.0,
            "23": 917459968.0,
            "24": 917459968.0,
            "25": 917459968.0,
            "26": 917459968.0,
            "27": 917459968.0,
            "28": 917459968.0,
            "29": 917459968.0,
            "30": 917459968.0,
            "31": 917459968.0,
            "32": 917459968.0,
            "33": 917459968.0,
            "34": 917459968.0,
            "35": 917459968.0,
            "36": 917459968.0,
            "37": 917459968.0,
            "38": 917459968.0,
            "39": 917459968.0,
            "40": 917459968.0,
            "41": 917459968.0,
            "42": 917459968.0,
            "43": 917459968.0,
            "44": 917459968.0,
            "45": 917459968.0,
            "46": 917459968.0,
            "47": 917459968.0,
            "48": 917459968.0,
            "49": 917459968.0,
            "50": 917459968.0,
            "51": 917459968.0,
            "52": 917459968.0,
            "53": 917459968.0,
            "54": 917459968.0,
            "55": 917459968.0,
            "56": 917459968.0,
            "57": 917459968.0,
            "58": 917459968.0,
            "59": 917459968.0,
            "60": 917459968.0,
            "61": 917459968.0,
            "62": 917459968.0,
            "63": 917459968.0,
            "64": 917459968.0,
            "65": 917459968.0,
            "66": 917459968.0,
            "67": 917459968.0,
            "68": 917459968.0,
            "69": 917459968.0,
            "70": 917459968.0,
            "71": 917459968.0,
            "72": 917459968.0,
            "73": 917459968.0,
            "74": 917459968.0,
            "75": 917459968.0,
            "76": 917459968.0,
            "77": 917459968.0,
            "78": 917459968.0,
            "79": 917459968.0,
            "80": 917459968.0,
            "81": 917459968.0,
            "82": 917459968.0,
            "83": 917459968.0,
            "84": 917459968.0,
            "85": 917459968.0,
            "86": 917459968.0,
            "87": 917459968.0,
            "88": 917459968.0,
            "89": 917459968.0,
            "90": 917459968.0,
            "91": 917459968.0,
            "92": 917459968.0,
            "93": 917459968.0,
            "94": 917459968.0,
            "95": 917459968.0,
            "96": 917459968.0,
            "97": 917459968.0,
            "98": 917459968.0,
            "99": 917459968.0,
            "100": 917459968.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2237723648.0,
            "2": 2596141056.0,
            "3": 2596141056.0,
            "4": 2596141056.0,
            "5": 2596141056.0,
            "6": 2596141056.0,
            "7": 2596141056.0,
            "8": 2596141056.0,
            "9": 2596141056.0,
            "10": 2596141056.0,
            "11": 2596141056.0,
            "12": 2596141056.0,
            "13": 2596141056.0,
            "14": 2596141056.0,
            "15": 2596141056.0,
            "16": 2596141056.0,
            "17": 2596141056.0,
            "18": 2596141056.0,
            "19": 2596141056.0,
            "20": 2596141056.0,
            "21": 2596141056.0,
            "22": 2596141056.0,
            "23": 2596141056.0,
            "24": 2596141056.0,
            "25": 2596141056.0,
            "26": 2596141056.0,
            "27": 2596141056.0,
            "28": 2596141056.0,
            "29": 2596141056.0,
            "30": 2596141056.0,
            "31": 2596141056.0,
            "32": 2596141056.0,
            "33": 2596141056.0,
            "34": 2596141056.0,
            "35": 2596141056.0,
            "36": 2596141056.0,
            "37": 2596141056.0,
            "38": 2596141056.0,
            "39": 2596141056.0,
            "40": 2596141056.0,
            "41": 2596141056.0,
            "42": 2596141056.0,
            "43": 2596141056.0,
            "44": 2596141056.0,
            "45": 2596141056.0,
            "46": 2596141056.0,
            "47": 2596141056.0,
            "48": 2596141056.0,
            "49": 2596141056.0,
            "50": 2596141056.0,
            "51": 2596141056.0,
            "52": 2596141056.0,
            "53": 2596141056.0,
            "54": 2596141056.0,
            "55": 2596141056.0,
            "56": 2596141056.0,
            "57": 2596141056.0,
            "58": 2596141056.0,
            "59": 2596141056.0,
            "60": 2596141056.0,
            "61": 2596141056.0,
            "62": 2596141056.0,
            "63": 2596141056.0,
            "64": 2596141056.0,
            "65": 2596141056.0,
            "66": 2596141056.0,
            "67": 2596141056.0,
            "68": 2596141056.0,
            "69": 2596141056.0,
            "70": 2596141056.0,
            "71": 2596141056.0,
            "72": 2596141056.0,
            "73": 2596141056.0,
            "74": 2596141056.0,
            "75": 2596141056.0,
            "76": 2596141056.0,
            "77": 2596141056.0,
            "78": 2596141056.0,
            "79": 2596141056.0,
            "80": 2596141056.0,
            "81": 2596141056.0,
            "82": 2596141056.0,
            "83": 2596141056.0,
            "84": 2596141056.0,
            "85": 2596141056.0,
            "86": 2596141056.0,
            "87": 2596141056.0,
            "88": 2596141056.0,
            "89": 2596141056.0,
            "90": 2596141056.0,
            "91": 2596141056.0,
            "92": 2596141056.0,
            "93": 2596141056.0,
            "94": 2596141056.0,
            "95": 2596141056.0,
            "96": 2596141056.0,
            "97": 2596141056.0,
            "98": 2596141056.0,
            "99": 2596141056.0,
            "100": 2596141056.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 4.9552,
            "3": 0.10033,
            "4": 0.08196,
            "5": 0.08147,
            "6": 0.08085,
            "7": 0.08113,
            "8": 0.08219,
            "9": 0.08231,
            "10": 0.08108,
            "11": 0.08057,
            "12": 0.08093,
            "13": 0.08127,
            "14": 0.08128,
            "15": 0.08213,
            "16": 0.08119,
            "17": 0.08122,
            "18": 0.08074,
            "19": 0.08212,
            "20": 0.08339,
            "21": 0.08221,
            "22": 0.08215,
            "23": 0.0811,
            "24": 0.08084,
            "25": 0.08167,
            "26": 0.08129,
            "27": 0.08149,
            "28": 0.08067,
            "29": 0.08054,
            "30": 0.08096,
            "31": 0.08069,
            "32": 0.08014,
            "33": 0.07984,
            "34": 0.07984,
            "35": 0.0805,
            "36": 0.08039,
            "37": 0.07994,
            "38": 0.08113,
            "39": 0.08006,
            "40": 0.07969,
            "41": 0.08039,
            "42": 0.08012,
            "43": 0.08077,
            "44": 0.08006,
            "45": 0.08062,
            "46": 0.08095,
            "47": 0.0803,
            "48": 0.08011,
            "49": 0.08053,
            "50": 0.08008,
            "51": 0.09167,
            "52": 0.08906,
            "53": 0.08856,
            "54": 0.08817,
            "55": 0.08179,
            "56": 0.08141,
            "57": 0.08275,
            "58": 0.08331,
            "59": 0.08156,
            "60": 0.08245,
            "61": 0.08401,
            "62": 0.08406,
            "63": 0.08119,
            "64": 0.08192,
            "65": 0.08124,
            "66": 0.08077,
            "67": 0.08064,
            "68": 0.08048,
            "69": 0.08077,
            "70": 0.0805,
            "71": 0.08053,
            "72": 0.08112,
            "73": 0.08447,
            "74": 0.08094,
            "75": 0.0807,
            "76": 0.08106,
            "77": 0.08073,
            "78": 0.0819,
            "79": 0.0826,
            "80": 0.08236,
            "81": 0.08103,
            "82": 0.08046,
            "83": 0.081,
            "84": 0.08119,
            "85": 0.08089,
            "86": 0.08193,
            "87": 0.08152,
            "88": 0.08194,
            "89": 0.0812,
            "90": 0.08063,
            "91": 0.08092,
            "92": 0.08068,
            "93": 0.08076,
            "94": 0.08091,
            "95": 0.08087,
            "96": 0.08077,
            "97": 0.0807,
            "98": 0.08029,
            "99": 0.08152,
            "100": 0.08047
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.80381,
            "52": 9.68202,
            "53": 10.02345,
            "54": 9.91634,
            "55": 9.82456,
            "56": 9.56974,
            "57": 9.42672,
            "58": 9.78081,
            "59": 9.53243,
            "60": 9.44593,
            "61": 9.64254,
            "62": 9.94293,
            "63": 9.31764,
            "64": 9.72548,
            "65": 8.88739,
            "66": 9.65691,
            "67": 9.31749,
            "68": 9.73495,
            "69": 9.74866,
            "70": 9.69625,
            "71": 9.57689,
            "72": 9.52422,
            "73": 9.45595,
            "74": 8.88269,
            "75": 9.37584,
            "76": 9.01136,
            "77": 10.02287,
            "78": 9.67963,
            "79": 9.33172,
            "80": 9.35826,
            "81": 9.43394,
            "82": 9.65054,
            "83": 9.25503,
            "84": 9.3714,
            "85": 9.5623,
            "86": 9.03489,
            "87": 9.54614,
            "88": 9.69785,
            "89": 9.54656,
            "90": 9.77624,
            "91": 9.2884,
            "92": 9.30662,
            "93": 9.02647,
            "94": 8.78837,
            "95": 9.48027,
            "96": 9.47974,
            "97": 9.25611,
            "98": 9.61949,
            "99": 8.83824,
            "100": 9.35135
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2514.0,
            "52": 2513.0,
            "53": 2894.0,
            "54": 2656.0,
            "55": 2348.0,
            "56": 2506.0,
            "57": 2501.0,
            "58": 2770.0,
            "59": 2681.0,
            "60": 2434.0,
            "61": 2776.0,
            "62": 2596.0,
            "63": 2617.0,
            "64": 3012.0,
            "65": 2657.0,
            "66": 2947.0,
            "67": 3089.0,
            "68": 2818.0,
            "69": 2909.0,
            "70": 3025.0,
            "71": 2924.0,
            "72": 2702.0,
            "73": 2947.0,
            "74": 2306.0,
            "75": 2791.0,
            "76": 3093.0,
            "77": 3107.0,
            "78": 3134.0,
            "79": 3205.0,
            "80": 3123.0,
            "81": 3290.0,
            "82": 3172.0,
            "83": 2719.0,
            "84": 3328.0,
            "85": 3255.0,
            "86": 2546.0,
            "87": 3472.0,
            "88": 3068.0,
            "89": 2953.0,
            "90": 3300.0,
            "91": 3154.0,
            "92": 3061.0,
            "93": 2889.0,
            "94": 3535.0,
            "95": 3078.0,
            "96": 3181.0,
            "97": 3135.0,
            "98": 3569.0,
            "99": 3319.0,
            "100": 3223.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 917459968.0,
            "52": 917459968.0,
            "53": 917459968.0,
            "54": 917459968.0,
            "55": 917459968.0,
            "56": 917459968.0,
            "57": 917459968.0,
            "58": 917459968.0,
            "59": 917459968.0,
            "60": 917459968.0,
            "61": 917459968.0,
            "62": 917459968.0,
            "63": 917459968.0,
            "64": 917459968.0,
            "65": 917459968.0,
            "66": 917459968.0,
            "67": 917459968.0,
            "68": 917459968.0,
            "69": 917459968.0,
            "70": 917459968.0,
            "71": 917459968.0,
            "72": 917459968.0,
            "73": 917459968.0,
            "74": 917459968.0,
            "75": 917459968.0,
            "76": 917459968.0,
            "77": 917459968.0,
            "78": 917459968.0,
            "79": 917459968.0,
            "80": 917459968.0,
            "81": 917459968.0,
            "82": 917459968.0,
            "83": 917459968.0,
            "84": 917459968.0,
            "85": 917459968.0,
            "86": 917459968.0,
            "87": 917459968.0,
            "88": 917459968.0,
            "89": 917459968.0,
            "90": 917459968.0,
            "91": 917459968.0,
            "92": 917459968.0,
            "93": 917459968.0,
            "94": 917459968.0,
            "95": 917459968.0,
            "96": 917459968.0,
            "97": 917459968.0,
            "98": 917459968.0,
            "99": 917459968.0,
            "100": 917459968.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2596140032.0,
            "52": 2596141056.0,
            "53": 2596141056.0,
            "54": 2596141056.0,
            "55": 2596141056.0,
            "56": 2596141056.0,
            "57": 2596141056.0,
            "58": 2596141056.0,
            "59": 2596141056.0,
            "60": 2596141056.0,
            "61": 2596141056.0,
            "62": 2596141056.0,
            "63": 2596141056.0,
            "64": 2596141056.0,
            "65": 2596141056.0,
            "66": 2596141056.0,
            "67": 2596141056.0,
            "68": 2596141056.0,
            "69": 2596141056.0,
            "70": 2596141056.0,
            "71": 2596141056.0,
            "72": 2596141056.0,
            "73": 2596141056.0,
            "74": 2596141056.0,
            "75": 2596141056.0,
            "76": 2596141056.0,
            "77": 2596141056.0,
            "78": 2596141056.0,
            "79": 2596141056.0,
            "80": 2596141056.0,
            "81": 2596141056.0,
            "82": 2596141056.0,
            "83": 2596141056.0,
            "84": 2596141056.0,
            "85": 2596141056.0,
            "86": 2596141056.0,
            "87": 2596141056.0,
            "88": 2596141056.0,
            "89": 2596141056.0,
            "90": 2596141056.0,
            "91": 2596141056.0,
            "92": 2596141056.0,
            "93": 2596141056.0,
            "94": 2596141056.0,
            "95": 2596141056.0,
            "96": 2596141056.0,
            "97": 2596141056.0,
            "98": 2596141056.0,
            "99": 2596141056.0,
            "100": 2596141056.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 7.16514,
            "52": 0.11315,
            "53": 0.08114,
            "54": 0.08317,
            "55": 0.08019,
            "56": 0.08314,
            "57": 0.08621,
            "58": 0.08016,
            "59": 0.07921,
            "60": 0.08005,
            "61": 0.08103,
            "62": 0.10234,
            "63": 0.1001,
            "64": 0.4876,
            "65": 0.08127,
            "66": 0.079,
            "67": 0.07859,
            "68": 0.08085,
            "69": 0.07943,
            "70": 0.07842,
            "71": 0.07899,
            "72": 0.07958,
            "73": 0.07925,
            "74": 0.08017,
            "75": 0.07902,
            "76": 0.08039,
            "77": 0.07802,
            "78": 0.07857,
            "79": 0.07907,
            "80": 0.07806,
            "81": 0.07858,
            "82": 0.08046,
            "83": 0.07775,
            "84": 0.07777,
            "85": 0.07752,
            "86": 0.07844,
            "87": 0.07834,
            "88": 0.07837,
            "89": 0.07893,
            "90": 0.07826,
            "91": 0.07839,
            "92": 0.07815,
            "93": 0.07767,
            "94": 0.0784,
            "95": 0.07785,
            "96": 0.07909,
            "97": 0.07789,
            "98": 0.0771,
            "99": 0.07799,
            "100": 0.08104
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_lts_dgx_a100.json
================================================
{}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 2
  --position-embedding-type: rope
  --no-ckpt-fully-parallel-save: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: ckpt-resume


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.84865, "5": 10.86283, "10": 10.82287, "15": 10.81406, "20": 10.71958, "25": 10.52997, "30": 10.33749, "35": 10.24173, "40": 10.05124, "45": 9.7664, "50": 9.8548, "55": 9.82389, "60": 9.44284, "65": 8.8916, "70": 9.67942, "75": 9.3677, "80": 9.3577, "85": 9.56091, "90": 9.77126, "95": 9.48115, "100": 9.35043}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1658.0, "5": 1895.0, "10": 1583.0, "15": 1981.0, "20": 1769.0, "25": 1861.0, "30": 2118.0, "35": 2014.0, "40": 2161.0, "45": 2127.0, "50": 2345.0, "55": 2450.0, "60": 2464.0, "65": 2648.0, "70": 2993.0, "75": 2882.0, "80": 3280.0, "85": 3288.0, "90": 3230.0, "95": 3175.0, "100": 3227.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 890195968.0, "5": 890195968.0, "10": 890195968.0, "15": 890195968.0, "20": 890195968.0, "25": 890195968.0, "30": 890195968.0, "35": 890195968.0, "40": 890195968.0, "45": 890195968.0, "50": 890195968.0, "55": 890195968.0, "60": 890195968.0, "65": 890195968.0, "70": 890195968.0, "75": 890195968.0, "80": 890195968.0, "85": 890195968.0, "90": 890195968.0, "95": 890195968.0, "100": 890195968.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2236149760.0, "5": 2596664320.0, "10": 2596664320.0, "15": 2596664320.0, "20": 2596664320.0, "25": 2596664320.0, "30": 2596664320.0, "35": 2596664320.0, "40": 2596664320.0, "45": 2596664320.0, "50": 2596664320.0, "55": 2596664320.0, "60": 2596664320.0, "65": 2596664320.0, "70": 2596664320.0, "75": 2596664320.0, "80": 2596664320.0, "85": 2596664320.0, "90": 2596664320.0, "95": 2596664320.0, "100": 2596664320.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 5.6407, "5": 0.18186, "10": 0.18299, "15": 0.18421, "20": 0.18363, "25": 0.18346, "30": 0.18368, "35": 0.18314, "40": 0.18235, "45": 0.18352, "50": 0.18251, "55": 0.18504, "60": 0.1864, "65": 0.18653, "70": 0.18691, "75": 0.18659, "80": 0.18573, "85": 0.18577, "90": 0.18632, "95": 0.18639, "100": 0.18585}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.7692,
            "2": 10.78173,
            "3": 10.77785,
            "4": 10.75155,
            "5": 10.80909,
            "6": 10.8218,
            "7": 10.80242,
            "8": 10.78781,
            "9": 10.7948,
            "10": 10.713,
            "11": 10.85088,
            "12": 10.80067,
            "13": 10.82614,
            "14": 10.84338,
            "15": 10.75514,
            "16": 10.75194,
            "17": 10.70801,
            "18": 10.74736,
            "19": 10.74723,
            "20": 10.64347,
            "21": 10.60434,
            "22": 10.43223,
            "23": 10.66534,
            "24": 10.50025,
            "25": 10.43523,
            "26": 10.51418,
            "27": 10.5415,
            "28": 10.51383,
            "29": 10.53731,
            "30": 10.25937,
            "31": 9.97666,
            "32": 10.39972,
            "33": 10.38587,
            "34": 10.11012,
            "35": 10.17419,
            "36": 10.11601,
            "37": 10.26042,
            "38": 10.0751,
            "39": 10.32912,
            "40": 9.9687,
            "41": 10.05131,
            "42": 10.12745,
            "43": 9.70822,
            "44": 9.83332,
            "45": 9.70556,
            "46": 9.7136,
            "47": 10.05915,
            "48": 9.7456,
            "49": 9.40552,
            "50": 9.80892,
            "51": 9.76773,
            "52": 9.64757,
            "53": 9.99521,
            "54": 9.88624,
            "55": 9.81783,
            "56": 9.53944,
            "57": 9.38198,
            "58": 9.75913,
            "59": 9.52125,
            "60": 9.42463,
            "61": 9.63669,
            "62": 9.93001,
            "63": 9.29627,
            "64": 9.70638,
            "65": 8.88076,
            "66": 9.64079,
            "67": 9.32154,
            "68": 9.737,
            "69": 9.75369,
            "70": 9.68294,
            "71": 9.58129,
            "72": 9.52492,
            "73": 9.44113,
            "74": 8.86077,
            "75": 9.3667,
            "76": 9.01682,
            "77": 10.0224,
            "78": 9.68369,
            "79": 9.33323,
            "80": 9.35819,
            "81": 9.43805,
            "82": 9.66108,
            "83": 9.26227,
            "84": 9.37195,
            "85": 9.56661,
            "86": 9.04515,
            "87": 9.55767,
            "88": 9.70545,
            "89": 9.55104,
            "90": 9.79186,
            "91": 9.29174,
            "92": 9.31247,
            "93": 9.04313,
            "94": 8.7869,
            "95": 9.49543,
            "96": 9.48418,
            "97": 9.25973,
            "98": 9.62635,
            "99": 8.85054,
            "100": 9.36076
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1750.0,
            "2": 1874.0,
            "3": 1769.0,
            "4": 1936.0,
            "5": 2122.0,
            "6": 2095.0,
            "7": 2027.0,
            "8": 1845.0,
            "9": 2127.0,
            "10": 1456.0,
            "11": 1996.0,
            "12": 1715.0,
            "13": 2108.0,
            "14": 1919.0,
            "15": 2047.0,
            "16": 1932.0,
            "17": 2016.0,
            "18": 1872.0,
            "19": 1921.0,
            "20": 1768.0,
            "21": 1953.0,
            "22": 1836.0,
            "23": 2100.0,
            "24": 1817.0,
            "25": 1809.0,
            "26": 1841.0,
            "27": 2005.0,
            "28": 2109.0,
            "29": 2055.0,
            "30": 1949.0,
            "31": 1736.0,
            "32": 2070.0,
            "33": 2162.0,
            "34": 1964.0,
            "35": 2007.0,
            "36": 2021.0,
            "37": 2425.0,
            "38": 2329.0,
            "39": 2430.0,
            "40": 2340.0,
            "41": 2324.0,
            "42": 2289.0,
            "43": 2097.0,
            "44": 2349.0,
            "45": 2282.0,
            "46": 2442.0,
            "47": 2459.0,
            "48": 2414.0,
            "49": 2282.0,
            "50": 2385.0,
            "51": 2647.0,
            "52": 2648.0,
            "53": 2878.0,
            "54": 2654.0,
            "55": 2580.0,
            "56": 2658.0,
            "57": 2547.0,
            "58": 2739.0,
            "59": 2779.0,
            "60": 2349.0,
            "61": 2741.0,
            "62": 2617.0,
            "63": 2512.0,
            "64": 2800.0,
            "65": 2697.0,
            "66": 2966.0,
            "67": 2952.0,
            "68": 2833.0,
            "69": 3029.0,
            "70": 2977.0,
            "71": 2813.0,
            "72": 2664.0,
            "73": 3085.0,
            "74": 2292.0,
            "75": 2810.0,
            "76": 3025.0,
            "77": 3025.0,
            "78": 3037.0,
            "79": 3181.0,
            "80": 3234.0,
            "81": 3273.0,
            "82": 3294.0,
            "83": 2707.0,
            "84": 3332.0,
            "85": 3336.0,
            "86": 2585.0,
            "87": 3448.0,
            "88": 3239.0,
            "89": 3137.0,
            "90": 3341.0,
            "91": 3188.0,
            "92": 3246.0,
            "93": 2823.0,
            "94": 3358.0,
            "95": 3202.0,
            "96": 3118.0,
            "97": 3163.0,
            "98": 3645.0,
            "99": 3345.0,
            "100": 3201.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 917459968.0,
            "2": 917459968.0,
            "3": 917459968.0,
            "4": 917459968.0,
            "5": 917459968.0,
            "6": 917459968.0,
            "7": 917459968.0,
            "8": 917459968.0,
            "9": 917459968.0,
            "10": 917459968.0,
            "11": 917459968.0,
            "12": 917459968.0,
            "13": 917459968.0,
            "14": 917459968.0,
            "15": 917459968.0,
            "16": 917459968.0,
            "17": 917459968.0,
            "18": 917459968.0,
            "19": 917459968.0,
            "20": 917459968.0,
            "21": 917459968.0,
            "22": 917459968.0,
            "23": 917459968.0,
            "24": 917459968.0,
            "25": 917459968.0,
            "26": 917459968.0,
            "27": 917459968.0,
            "28": 917459968.0,
            "29": 917459968.0,
            "30": 917459968.0,
            "31": 917459968.0,
            "32": 917459968.0,
            "33": 917459968.0,
            "34": 917459968.0,
            "35": 917459968.0,
            "36": 917459968.0,
            "37": 917459968.0,
            "38": 917459968.0,
            "39": 917459968.0,
            "40": 917459968.0,
            "41": 917459968.0,
            "42": 917459968.0,
            "43": 917459968.0,
            "44": 917459968.0,
            "45": 917459968.0,
            "46": 917459968.0,
            "47": 917459968.0,
            "48": 917459968.0,
            "49": 917459968.0,
            "50": 917459968.0,
            "51": 917459968.0,
            "52": 917459968.0,
            "53": 917459968.0,
            "54": 917459968.0,
            "55": 917459968.0,
            "56": 917459968.0,
            "57": 917459968.0,
            "58": 917459968.0,
            "59": 917459968.0,
            "60": 917459968.0,
            "61": 917459968.0,
            "62": 917459968.0,
            "63": 917459968.0,
            "64": 917459968.0,
            "65": 917459968.0,
            "66": 917459968.0,
            "67": 917459968.0,
            "68": 917459968.0,
            "69": 917459968.0,
            "70": 917459968.0,
            "71": 917459968.0,
            "72": 917459968.0,
            "73": 917459968.0,
            "74": 917459968.0,
            "75": 917459968.0,
            "76": 917459968.0,
            "77": 917459968.0,
            "78": 917459968.0,
            "79": 917459968.0,
            "80": 917459968.0,
            "81": 917459968.0,
            "82": 917459968.0,
            "83": 917459968.0,
            "84": 917459968.0,
            "85": 917459968.0,
            "86": 917459968.0,
            "87": 917459968.0,
            "88": 917459968.0,
            "89": 917459968.0,
            "90": 917459968.0,
            "91": 917459968.0,
            "92": 917459968.0,
            "93": 917459968.0,
            "94": 917459968.0,
            "95": 917459968.0,
            "96": 917459968.0,
            "97": 917459968.0,
            "98": 917459968.0,
            "99": 917459968.0,
            "100": 917459968.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2266035200.0,
            "2": 2624452608.0,
            "3": 2624452608.0,
            "4": 2624452608.0,
            "5": 2624452608.0,
            "6": 2624452608.0,
            "7": 2624452608.0,
            "8": 2624452608.0,
            "9": 2624452608.0,
            "10": 2624452608.0,
            "11": 2624452608.0,
            "12": 2624452608.0,
            "13": 2624452608.0,
            "14": 2624452608.0,
            "15": 2624452608.0,
            "16": 2624452608.0,
            "17": 2624452608.0,
            "18": 2624452608.0,
            "19": 2624452608.0,
            "20": 2624452608.0,
            "21": 2624452608.0,
            "22": 2624452608.0,
            "23": 2624452608.0,
            "24": 2624452608.0,
            "25": 2624452608.0,
            "26": 2624452608.0,
            "27": 2624452608.0,
            "28": 2624452608.0,
            "29": 2624452608.0,
            "30": 2624452608.0,
            "31": 2624452608.0,
            "32": 2624452608.0,
            "33": 2624452608.0,
            "34": 2624452608.0,
            "35": 2624452608.0,
            "36": 2624452608.0,
            "37": 2624452608.0,
            "38": 2624452608.0,
            "39": 2624452608.0,
            "40": 2624452608.0,
            "41": 2624452608.0,
            "42": 2624452608.0,
            "43": 2624452608.0,
            "44": 2624452608.0,
            "45": 2624452608.0,
            "46": 2624452608.0,
            "47": 2624452608.0,
            "48": 2624452608.0,
            "49": 2624452608.0,
            "50": 2624452608.0,
            "51": 2624452608.0,
            "52": 2624452608.0,
            "53": 2624452608.0,
            "54": 2624452608.0,
            "55": 2624452608.0,
            "56": 2624452608.0,
            "57": 2624452608.0,
            "58": 2624452608.0,
            "59": 2624452608.0,
            "60": 2624452608.0,
            "61": 2624452608.0,
            "62": 2624452608.0,
            "63": 2624452608.0,
            "64": 2624452608.0,
            "65": 2624452608.0,
            "66": 2624452608.0,
            "67": 2624452608.0,
            "68": 2624452608.0,
            "69": 2624452608.0,
            "70": 2624452608.0,
            "71": 2624452608.0,
            "72": 2624452608.0,
            "73": 2624452608.0,
            "74": 2624452608.0,
            "75": 2624452608.0,
            "76": 2624452608.0,
            "77": 2624452608.0,
            "78": 2624452608.0,
            "79": 2624452608.0,
            "80": 2624452608.0,
            "81": 2624452608.0,
            "82": 2624452608.0,
            "83": 2624452608.0,
            "84": 2624452608.0,
            "85": 2624452608.0,
            "86": 2624452608.0,
            "87": 2624452608.0,
            "88": 2624452608.0,
            "89": 2624452608.0,
            "90": 2624452608.0,
            "91": 2624452608.0,
            "92": 2624452608.0,
            "93": 2624452608.0,
            "94": 2624452608.0,
            "95": 2624452608.0,
            "96": 2624452608.0,
            "97": 2624452608.0,
            "98": 2624452608.0,
            "99": 2624452608.0,
            "100": 2624452608.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 2.44626,
            "3": 0.14544,
            "4": 0.14069,
            "5": 0.13132,
            "6": 0.13447,
            "7": 0.13519,
            "8": 0.13562,
            "9": 0.13513,
            "10": 0.13387,
            "11": 0.13378,
            "12": 0.13575,
            "13": 0.13462,
            "14": 0.13384,
            "15": 0.13412,
            "16": 0.13347,
            "17": 0.13555,
            "18": 0.13515,
            "19": 0.13443,
            "20": 0.14433,
            "21": 0.14638,
            "22": 0.14561,
            "23": 0.13968,
            "24": 0.13694,
            "25": 0.14479,
            "26": 0.14038,
            "27": 0.1473,
            "28": 0.14099,
            "29": 0.13829,
            "30": 0.13782,
            "31": 0.13746,
            "32": 0.13573,
            "33": 0.13325,
            "34": 0.14385,
            "35": 0.14367,
            "36": 0.14113,
            "37": 0.1394,
            "38": 0.136,
            "39": 0.13678,
            "40": 0.13539,
            "41": 0.1364,
            "42": 0.13593,
            "43": 0.13738,
            "44": 0.13238,
            "45": 0.13667,
            "46": 0.14472,
            "47": 0.1358,
            "48": 0.13697,
            "49": 0.13391,
            "50": 0.13536,
            "51": 0.16637,
            "52": 0.15213,
            "53": 0.14685,
            "54": 0.14134,
            "55": 0.14007,
            "56": 0.13524,
            "57": 0.13779,
            "58": 0.13841,
            "59": 0.13821,
            "60": 0.13687,
            "61": 0.13663,
            "62": 0.13401,
            "63": 0.13389,
            "64": 0.13289,
            "65": 0.13362,
            "66": 0.13754,
            "67": 0.13473,
            "68": 0.13402,
            "69": 0.13491,
            "70": 0.13536,
            "71": 0.13258,
            "72": 0.13482,
            "73": 0.13371,
            "74": 0.13507,
            "75": 0.13595,
            "76": 0.13613,
            "77": 0.13395,
            "78": 0.13252,
            "79": 0.13394,
            "80": 0.13329,
            "81": 0.13388,
            "82": 0.13407,
            "83": 0.13522,
            "84": 0.13579,
            "85": 0.13452,
            "86": 0.13422,
            "87": 0.13388,
            "88": 0.1343,
            "89": 0.13546,
            "90": 0.13522,
            "91": 0.13458,
            "92": 0.1341,
            "93": 0.13519,
            "94": 0.13534,
            "95": 0.13521,
            "96": 0.13886,
            "97": 0.13832,
            "98": 0.14048,
            "99": 0.14022,
            "100": 0.13732
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_gb200_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.76773,
            "52": 9.64757,
            "53": 9.99521,
            "54": 9.88624,
            "55": 9.81783,
            "56": 9.53944,
            "57": 9.38198,
            "58": 9.75913,
            "59": 9.52125,
            "60": 9.42463,
            "61": 9.63669,
            "62": 9.93001,
            "63": 9.29627,
            "64": 9.70638,
            "65": 8.88076,
            "66": 9.64079,
            "67": 9.32154,
            "68": 9.737,
            "69": 9.75369,
            "70": 9.68294,
            "71": 9.58129,
            "72": 9.52492,
            "73": 9.44113,
            "74": 8.86077,
            "75": 9.3667,
            "76": 9.01682,
            "77": 10.0224,
            "78": 9.68369,
            "79": 9.33323,
            "80": 9.35819,
            "81": 9.43805,
            "82": 9.66108,
            "83": 9.26227,
            "84": 9.37195,
            "85": 9.56661,
            "86": 9.04515,
            "87": 9.55767,
            "88": 9.70545,
            "89": 9.55104,
            "90": 9.79186,
            "91": 9.29174,
            "92": 9.31247,
            "93": 9.04313,
            "94": 8.7869,
            "95": 9.49543,
            "96": 9.48418,
            "97": 9.25973,
            "98": 9.62635,
            "99": 8.85054,
            "100": 9.36076
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2647.0,
            "52": 2648.0,
            "53": 2878.0,
            "54": 2654.0,
            "55": 2580.0,
            "56": 2658.0,
            "57": 2547.0,
            "58": 2739.0,
            "59": 2779.0,
            "60": 2349.0,
            "61": 2741.0,
            "62": 2617.0,
            "63": 2512.0,
            "64": 2800.0,
            "65": 2697.0,
            "66": 2966.0,
            "67": 2952.0,
            "68": 2833.0,
            "69": 3029.0,
            "70": 2977.0,
            "71": 2813.0,
            "72": 2664.0,
            "73": 3085.0,
            "74": 2292.0,
            "75": 2810.0,
            "76": 3025.0,
            "77": 3025.0,
            "78": 3037.0,
            "79": 3181.0,
            "80": 3234.0,
            "81": 3273.0,
            "82": 3294.0,
            "83": 2707.0,
            "84": 3332.0,
            "85": 3336.0,
            "86": 2585.0,
            "87": 3448.0,
            "88": 3239.0,
            "89": 3137.0,
            "90": 3341.0,
            "91": 3188.0,
            "92": 3246.0,
            "93": 2823.0,
            "94": 3358.0,
            "95": 3202.0,
            "96": 3118.0,
            "97": 3163.0,
            "98": 3645.0,
            "99": 3345.0,
            "100": 3201.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 917459968.0,
            "52": 917459968.0,
            "53": 917459968.0,
            "54": 917459968.0,
            "55": 917459968.0,
            "56": 917459968.0,
            "57": 917459968.0,
            "58": 917459968.0,
            "59": 917459968.0,
            "60": 917459968.0,
            "61": 917459968.0,
            "62": 917459968.0,
            "63": 917459968.0,
            "64": 917459968.0,
            "65": 917459968.0,
            "66": 917459968.0,
            "67": 917459968.0,
            "68": 917459968.0,
            "69": 917459968.0,
            "70": 917459968.0,
            "71": 917459968.0,
            "72": 917459968.0,
            "73": 917459968.0,
            "74": 917459968.0,
            "75": 917459968.0,
            "76": 917459968.0,
            "77": 917459968.0,
            "78": 917459968.0,
            "79": 917459968.0,
            "80": 917459968.0,
            "81": 917459968.0,
            "82": 917459968.0,
            "83": 917459968.0,
            "84": 917459968.0,
            "85": 917459968.0,
            "86": 917459968.0,
            "87": 917459968.0,
            "88": 917459968.0,
            "89": 917459968.0,
            "90": 917459968.0,
            "91": 917459968.0,
            "92": 917459968.0,
            "93": 917459968.0,
            "94": 917459968.0,
            "95": 917459968.0,
            "96": 917459968.0,
            "97": 917459968.0,
            "98": 917459968.0,
            "99": 917459968.0,
            "100": 917459968.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2625500160.0,
            "52": 2625501184.0,
            "53": 2625501184.0,
            "54": 2625501184.0,
            "55": 2625501184.0,
            "56": 2625501184.0,
            "57": 2625501184.0,
            "58": 2625501184.0,
            "59": 2625501184.0,
            "60": 2625501184.0,
            "61": 2625501184.0,
            "62": 2625501184.0,
            "63": 2625501184.0,
            "64": 2625501184.0,
            "65": 2625501184.0,
            "66": 2625501184.0,
            "67": 2625501184.0,
            "68": 2625501184.0,
            "69": 2625501184.0,
            "70": 2625501184.0,
            "71": 2625501184.0,
            "72": 2625501184.0,
            "73": 2625501184.0,
            "74": 2625501184.0,
            "75": 2625501184.0,
            "76": 2625501184.0,
            "77": 2625501184.0,
            "78": 2625501184.0,
            "79": 2625501184.0,
            "80": 2625501184.0,
            "81": 2625501184.0,
            "82": 2625501184.0,
            "83": 2625501184.0,
            "84": 2625501184.0,
            "85": 2625501184.0,
            "86": 2625501184.0,
            "87": 2625501184.0,
            "88": 2625501184.0,
            "89": 2625501184.0,
            "90": 2625501184.0,
            "91": 2625501184.0,
            "92": 2625501184.0,
            "93": 2625501184.0,
            "94": 2625501184.0,
            "95": 2625501184.0,
            "96": 2625501184.0,
            "97": 2625501184.0,
            "98": 2625501184.0,
            "99": 2625501184.0,
            "100": 2625501184.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": "nan",
            "52": 2.60445,
            "53": 0.14686,
            "54": 0.13325,
            "55": 0.13174,
            "56": 0.13234,
            "57": 0.13268,
            "58": 0.13337,
            "59": 0.13324,
            "60": 0.13107,
            "61": 0.13206,
            "62": 0.1329,
            "63": 0.13379,
            "64": 0.1348,
            "65": 0.13602,
            "66": 0.13298,
            "67": 0.13465,
            "68": 0.13495,
            "69": 0.13454,
            "70": 0.13536,
            "71": 0.13494,
            "72": 0.13541,
            "73": 0.13997,
            "74": 0.1423,
            "75": 0.13785,
            "76": 0.14664,
            "77": 0.16548,
            "78": 0.17704,
            "79": 0.15011,
            "80": 0.14471,
            "81": 0.13952,
            "82": 0.13892,
            "83": 0.13568,
            "84": 0.13463,
            "85": 0.13878,
            "86": 0.13867,
            "87": 0.13899,
            "88": 0.13819,
            "89": 0.13945,
            "90": 0.13964,
            "91": 0.13862,
            "92": 0.13655,
            "93": 0.13587,
            "94": 0.13572,
            "95": 0.1357,
            "96": 0.13598,
            "97": 0.13642,
            "98": 0.13742,
            "99": 0.13474,
            "100": 0.13647
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.85902,
            "2": 10.87345,
            "3": 10.86919,
            "4": 10.87273,
            "5": 10.87389,
            "6": 10.89658,
            "7": 10.86387,
            "8": 10.87869,
            "9": 10.87439,
            "10": 10.83846,
            "11": 10.87012,
            "12": 10.86011,
            "13": 10.87824,
            "14": 10.87935,
            "15": 10.8191,
            "16": 10.83109,
            "17": 10.78722,
            "18": 10.80215,
            "19": 10.7983,
            "20": 10.71224,
            "21": 10.68683,
            "22": 10.55402,
            "23": 10.70111,
            "24": 10.58621,
            "25": 10.52673,
            "26": 10.5837,
            "27": 10.59499,
            "28": 10.54816,
            "29": 10.55965,
            "30": 10.32899,
            "31": 10.08331,
            "32": 10.44752,
            "33": 10.4278,
            "34": 10.1796,
            "35": 10.24121,
            "36": 10.18155,
            "37": 10.32827,
            "38": 10.16792,
            "39": 10.37357,
            "40": 10.05111,
            "41": 10.10708,
            "42": 10.17823,
            "43": 9.77867,
            "44": 9.91197,
            "45": 9.77404,
            "46": 9.75415,
            "47": 10.09501,
            "48": 9.79531,
            "49": 9.46422,
            "50": 9.86729,
            "51": 9.80375,
            "52": 9.68218,
            "53": 10.02348,
            "54": 9.91595,
            "55": 9.82442,
            "56": 9.56994,
            "57": 9.42628,
            "58": 9.78075,
            "59": 9.53254,
            "60": 9.44561,
            "61": 9.64249,
            "62": 9.94298,
            "63": 9.31745,
            "64": 9.7256,
            "65": 8.88735,
            "66": 9.65711,
            "67": 9.31747,
            "68": 9.73506,
            "69": 9.74863,
            "70": 9.69601,
            "71": 9.57682,
            "72": 9.52425,
            "73": 9.4558,
            "74": 8.8826,
            "75": 9.37563,
            "76": 9.01106,
            "77": 10.02278,
            "78": 9.6796,
            "79": 9.33171,
            "80": 9.35836,
            "81": 9.43399,
            "82": 9.65055,
            "83": 9.2551,
            "84": 9.37131,
            "85": 9.56237,
            "86": 9.0351,
            "87": 9.54617,
            "88": 9.69806,
            "89": 9.54657,
            "90": 9.77627,
            "91": 9.28858,
            "92": 9.30652,
            "93": 9.02646,
            "94": 8.7883,
            "95": 9.48041,
            "96": 9.47962,
            "97": 9.25545,
            "98": 9.61947,
            "99": 8.83854,
            "100": 9.35116
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1862.0,
            "2": 1874.0,
            "3": 1748.0,
            "4": 1955.0,
            "5": 2050.0,
            "6": 1997.0,
            "7": 1967.0,
            "8": 1853.0,
            "9": 1965.0,
            "10": 1652.0,
            "11": 2042.0,
            "12": 1877.0,
            "13": 2076.0,
            "14": 1956.0,
            "15": 1953.0,
            "16": 1915.0,
            "17": 2045.0,
            "18": 1965.0,
            "19": 1988.0,
            "20": 1785.0,
            "21": 1941.0,
            "22": 1928.0,
            "23": 2112.0,
            "24": 1802.0,
            "25": 1933.0,
            "26": 1786.0,
            "27": 1945.0,
            "28": 2037.0,
            "29": 2119.0,
            "30": 2022.0,
            "31": 1699.0,
            "32": 2130.0,
            "33": 2187.0,
            "34": 1929.0,
            "35": 2092.0,
            "36": 2109.0,
            "37": 2362.0,
            "38": 2211.0,
            "39": 2383.0,
            "40": 2203.0,
            "41": 2288.0,
            "42": 2224.0,
            "43": 2150.0,
            "44": 2206.0,
            "45": 2187.0,
            "46": 2181.0,
            "47": 2260.0,
            "48": 2341.0,
            "49": 2210.0,
            "50": 2219.0,
            "51": 2508.0,
            "52": 2483.0,
            "53": 2959.0,
            "54": 2554.0,
            "55": 2408.0,
            "56": 2452.0,
            "57": 2528.0,
            "58": 2594.0,
            "59": 2750.0,
            "60": 2563.0,
            "61": 2794.0,
            "62": 2495.0,
            "63": 2493.0,
            "64": 2965.0,
            "65": 2569.0,
            "66": 2877.0,
            "67": 2969.0,
            "68": 2803.0,
            "69": 2944.0,
            "70": 3001.0,
            "71": 2867.0,
            "72": 2714.0,
            "73": 3017.0,
            "74": 2281.0,
            "75": 2774.0,
            "76": 2983.0,
            "77": 2955.0,
            "78": 3148.0,
            "79": 3076.0,
            "80": 2992.0,
            "81": 3255.0,
            "82": 3212.0,
            "83": 2809.0,
            "84": 3266.0,
            "85": 3188.0,
            "86": 2616.0,
            "87": 3492.0,
            "88": 3130.0,
            "89": 3020.0,
            "90": 3238.0,
            "91": 3106.0,
            "92": 3183.0,
            "93": 2960.0,
            "94": 3492.0,
            "95": 3112.0,
            "96": 3256.0,
            "97": 3055.0,
            "98": 3558.0,
            "99": 3196.0,
            "100": 3109.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 917459968.0,
            "2": 917459968.0,
            "3": 917459968.0,
            "4": 917459968.0,
            "5": 917459968.0,
            "6": 917459968.0,
            "7": 917459968.0,
            "8": 917459968.0,
            "9": 917459968.0,
            "10": 917459968.0,
            "11": 917459968.0,
            "12": 917459968.0,
            "13": 917459968.0,
            "14": 917459968.0,
            "15": 917459968.0,
            "16": 917459968.0,
            "17": 917459968.0,
            "18": 917459968.0,
            "19": 917459968.0,
            "20": 917459968.0,
            "21": 917459968.0,
            "22": 917459968.0,
            "23": 917459968.0,
            "24": 917459968.0,
            "25": 917459968.0,
            "26": 917459968.0,
            "27": 917459968.0,
            "28": 917459968.0,
            "29": 917459968.0,
            "30": 917459968.0,
            "31": 917459968.0,
            "32": 917459968.0,
            "33": 917459968.0,
            "34": 917459968.0,
            "35": 917459968.0,
            "36": 917459968.0,
            "37": 917459968.0,
            "38": 917459968.0,
            "39": 917459968.0,
            "40": 917459968.0,
            "41": 917459968.0,
            "42": 917459968.0,
            "43": 917459968.0,
            "44": 917459968.0,
            "45": 917459968.0,
            "46": 917459968.0,
            "47": 917459968.0,
            "48": 917459968.0,
            "49": 917459968.0,
            "50": 917459968.0,
            "51": 917459968.0,
            "52": 917459968.0,
            "53": 917459968.0,
            "54": 917459968.0,
            "55": 917459968.0,
            "56": 917459968.0,
            "57": 917459968.0,
            "58": 917459968.0,
            "59": 917459968.0,
            "60": 917459968.0,
            "61": 917459968.0,
            "62": 917459968.0,
            "63": 917459968.0,
            "64": 917459968.0,
            "65": 917459968.0,
            "66": 917459968.0,
            "67": 917459968.0,
            "68": 917459968.0,
            "69": 917459968.0,
            "70": 917459968.0,
            "71": 917459968.0,
            "72": 917459968.0,
            "73": 917459968.0,
            "74": 917459968.0,
            "75": 917459968.0,
            "76": 917459968.0,
            "77": 917459968.0,
            "78": 917459968.0,
            "79": 917459968.0,
            "80": 917459968.0,
            "81": 917459968.0,
            "82": 917459968.0,
            "83": 917459968.0,
            "84": 917459968.0,
            "85": 917459968.0,
            "86": 917459968.0,
            "87": 917459968.0,
            "88": 917459968.0,
            "89": 917459968.0,
            "90": 917459968.0,
            "91": 917459968.0,
            "92": 917459968.0,
            "93": 917459968.0,
            "94": 917459968.0,
            "95": 917459968.0,
            "96": 917459968.0,
            "97": 917459968.0,
            "98": 917459968.0,
            "99": 917459968.0,
            "100": 917459968.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2239820800.0,
            "2": 2599286784.0,
            "3": 2599286784.0,
            "4": 2599286784.0,
            "5": 2599286784.0,
            "6": 2599286784.0,
            "7": 2599286784.0,
            "8": 2599286784.0,
            "9": 2599286784.0,
            "10": 2599286784.0,
            "11": 2599286784.0,
            "12": 2599286784.0,
            "13": 2599286784.0,
            "14": 2599286784.0,
            "15": 2599286784.0,
            "16": 2599286784.0,
            "17": 2599286784.0,
            "18": 2599286784.0,
            "19": 2599286784.0,
            "20": 2599286784.0,
            "21": 2599286784.0,
            "22": 2599286784.0,
            "23": 2599286784.0,
            "24": 2599286784.0,
            "25": 2599286784.0,
            "26": 2599286784.0,
            "27": 2599286784.0,
            "28": 2599286784.0,
            "29": 2599286784.0,
            "30": 2599286784.0,
            "31": 2599286784.0,
            "32": 2599286784.0,
            "33": 2599286784.0,
            "34": 2599286784.0,
            "35": 2599286784.0,
            "36": 2599286784.0,
            "37": 2599286784.0,
            "38": 2599286784.0,
            "39": 2599286784.0,
            "40": 2599286784.0,
            "41": 2599286784.0,
            "42": 2599286784.0,
            "43": 2599286784.0,
            "44": 2599286784.0,
            "45": 2599286784.0,
            "46": 2599286784.0,
            "47": 2599286784.0,
            "48": 2599286784.0,
            "49": 2599286784.0,
            "50": 2599286784.0,
            "51": 2599286784.0,
            "52": 2599286784.0,
            "53": 2599286784.0,
            "54": 2599286784.0,
            "55": 2599286784.0,
            "56": 2599286784.0,
            "57": 2599286784.0,
            "58": 2599286784.0,
            "59": 2599286784.0,
            "60": 2599286784.0,
            "61": 2599286784.0,
            "62": 2599286784.0,
            "63": 2599286784.0,
            "64": 2599286784.0,
            "65": 2599286784.0,
            "66": 2599286784.0,
            "67": 2599286784.0,
            "68": 2599286784.0,
            "69": 2599286784.0,
            "70": 2599286784.0,
            "71": 2599286784.0,
            "72": 2599286784.0,
            "73": 2599286784.0,
            "74": 2599286784.0,
            "75": 2599286784.0,
            "76": 2599286784.0,
            "77": 2599286784.0,
            "78": 2599286784.0,
            "79": 2599286784.0,
            "80": 2599286784.0,
            "81": 2599286784.0,
            "82": 2599286784.0,
            "83": 2599286784.0,
            "84": 2599286784.0,
            "85": 2599286784.0,
            "86": 2599286784.0,
            "87": 2599286784.0,
            "88": 2599286784.0,
            "89": 2599286784.0,
            "90": 2599286784.0,
            "91": 2599286784.0,
            "92": 2599286784.0,
            "93": 2599286784.0,
            "94": 2599286784.0,
            "95": 2599286784.0,
            "96": 2599286784.0,
            "97": 2599286784.0,
            "98": 2599286784.0,
            "99": 2599286784.0,
            "100": 2599286784.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 7.15273,
            "2": 0.12761,
            "3": 0.108,
            "4": 0.08804,
            "5": 0.08914,
            "6": 0.0872,
            "7": 0.0865,
            "8": 0.09025,
            "9": 0.09224,
            "10": 0.08785,
            "11": 0.08842,
            "12": 0.08678,
            "13": 0.08768,
            "14": 0.08732,
            "15": 0.08754,
            "16": 0.08689,
            "17": 0.08745,
            "18": 0.08749,
            "19": 0.08681,
            "20": 0.08755,
            "21": 0.08798,
            "22": 0.08687,
            "23": 0.0869,
            "24": 0.08666,
            "25": 0.08694,
            "26": 0.08728,
            "27": 0.08672,
            "28": 0.09131,
            "29": 0.09876,
            "30": 0.09345,
            "31": 0.0871,
            "32": 0.08745,
            "33": 0.0868,
            "34": 0.08664,
            "35": 0.08688,
            "36": 0.08685,
            "37": 0.08807,
            "38": 0.08807,
            "39": 0.09095,
            "40": 0.08728,
            "41": 0.08918,
            "42": 0.0874,
            "43": 0.08812,
            "44": 0.08765,
            "45": 0.08765,
            "46": 0.08695,
            "47": 0.08967,
            "48": 0.08734,
            "49": 0.08707,
            "50": 0.08818,
            "51": 0.09801,
            "52": 0.09366,
            "53": 0.09478,
            "54": 0.09027,
            "55": 0.08632,
            "56": 0.0857,
            "57": 0.08636,
            "58": 0.08585,
            "59": 0.08632,
            "60": 0.08559,
            "61": 0.08575,
            "62": 0.08716,
            "63": 0.08612,
            "64": 0.08569,
            "65": 0.0876,
            "66": 0.08587,
            "67": 0.0862,
            "68": 0.08594,
            "69": 0.0858,
            "70": 0.08668,
            "71": 0.08553,
            "72": 0.08961,
            "73": 0.09562,
            "74": 0.09156,
            "75": 0.0901,
            "76": 0.08615,
            "77": 0.08562,
            "78": 0.08664,
            "79": 0.08569,
            "80": 0.08621,
            "81": 0.08562,
            "82": 0.08601,
            "83": 0.08551,
            "84": 0.08569,
            "85": 0.08622,
            "86": 0.08639,
            "87": 0.08581,
            "88": 0.08569,
            "89": 0.08624,
            "90": 0.086,
            "91": 0.08602,
            "92": 0.08575,
            "93": 0.08626,
            "94": 0.0869,
            "95": 0.0867,
            "96": 0.0872,
            "97": 0.08727,
            "98": 0.08652,
            "99": 0.0867,
            "100": 0.08593
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_h100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.80375,
            "52": 9.68218,
            "53": 10.02348,
            "54": 9.91595,
            "55": 9.82442,
            "56": 9.56994,
            "57": 9.42628,
            "58": 9.78075,
            "59": 9.53254,
            "60": 9.44561,
            "61": 9.64249,
            "62": 9.94298,
            "63": 9.31745,
            "64": 9.7256,
            "65": 8.88735,
            "66": 9.65711,
            "67": 9.31747,
            "68": 9.73506,
            "69": 9.74863,
            "70": 9.69601,
            "71": 9.57682,
            "72": 9.52425,
            "73": 9.4558,
            "74": 8.8826,
            "75": 9.37563,
            "76": 9.01106,
            "77": 10.02278,
            "78": 9.6796,
            "79": 9.33171,
            "80": 9.35836,
            "81": 9.43399,
            "82": 9.65055,
            "83": 9.2551,
            "84": 9.37131,
            "85": 9.56237,
            "86": 9.0351,
            "87": 9.54617,
            "88": 9.69806,
            "89": 9.54657,
            "90": 9.77627,
            "91": 9.28858,
            "92": 9.30652,
            "93": 9.02646,
            "94": 8.7883,
            "95": 9.48041,
            "96": 9.47962,
            "97": 9.25545,
            "98": 9.61947,
            "99": 8.83854,
            "100": 9.35116
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2508.0,
            "52": 2483.0,
            "53": 2959.0,
            "54": 2554.0,
            "55": 2408.0,
            "56": 2452.0,
            "57": 2528.0,
            "58": 2594.0,
            "59": 2750.0,
            "60": 2563.0,
            "61": 2794.0,
            "62": 2495.0,
            "63": 2493.0,
            "64": 2965.0,
            "65": 2569.0,
            "66": 2877.0,
            "67": 2969.0,
            "68": 2803.0,
            "69": 2944.0,
            "70": 3001.0,
            "71": 2867.0,
            "72": 2714.0,
            "73": 3017.0,
            "74": 2281.0,
            "75": 2774.0,
            "76": 2983.0,
            "77": 2955.0,
            "78": 3148.0,
            "79": 3076.0,
            "80": 2992.0,
            "81": 3255.0,
            "82": 3212.0,
            "83": 2809.0,
            "84": 3266.0,
            "85": 3188.0,
            "86": 2616.0,
            "87": 3492.0,
            "88": 3130.0,
            "89": 3020.0,
            "90": 3238.0,
            "91": 3106.0,
            "92": 3183.0,
            "93": 2960.0,
            "94": 3492.0,
            "95": 3112.0,
            "96": 3256.0,
            "97": 3055.0,
            "98": 3558.0,
            "99": 3196.0,
            "100": 3109.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 917459968.0,
            "52": 917459968.0,
            "53": 917459968.0,
            "54": 917459968.0,
            "55": 917459968.0,
            "56": 917459968.0,
            "57": 917459968.0,
            "58": 917459968.0,
            "59": 917459968.0,
            "60": 917459968.0,
            "61": 917459968.0,
            "62": 917459968.0,
            "63": 917459968.0,
            "64": 917459968.0,
            "65": 917459968.0,
            "66": 917459968.0,
            "67": 917459968.0,
            "68": 917459968.0,
            "69": 917459968.0,
            "70": 917459968.0,
            "71": 917459968.0,
            "72": 917459968.0,
            "73": 917459968.0,
            "74": 917459968.0,
            "75": 917459968.0,
            "76": 917459968.0,
            "77": 917459968.0,
            "78": 917459968.0,
            "79": 917459968.0,
            "80": 917459968.0,
            "81": 917459968.0,
            "82": 917459968.0,
            "83": 917459968.0,
            "84": 917459968.0,
            "85": 917459968.0,
            "86": 917459968.0,
            "87": 917459968.0,
            "88": 917459968.0,
            "89": 917459968.0,
            "90": 917459968.0,
            "91": 917459968.0,
            "92": 917459968.0,
            "93": 917459968.0,
            "94": 917459968.0,
            "95": 917459968.0,
            "96": 917459968.0,
            "97": 917459968.0,
            "98": 917459968.0,
            "99": 917459968.0,
            "100": 917459968.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2599285760.0,
            "52": 2599286784.0,
            "53": 2599286784.0,
            "54": 2599286784.0,
            "55": 2599286784.0,
            "56": 2599286784.0,
            "57": 2599286784.0,
            "58": 2599286784.0,
            "59": 2599286784.0,
            "60": 2599286784.0,
            "61": 2599286784.0,
            "62": 2599286784.0,
            "63": 2599286784.0,
            "64": 2599286784.0,
            "65": 2599286784.0,
            "66": 2599286784.0,
            "67": 2599286784.0,
            "68": 2599286784.0,
            "69": 2599286784.0,
            "70": 2599286784.0,
            "71": 2599286784.0,
            "72": 2599286784.0,
            "73": 2599286784.0,
            "74": 2599286784.0,
            "75": 2599286784.0,
            "76": 2599286784.0,
            "77": 2599286784.0,
            "78": 2599286784.0,
            "79": 2599286784.0,
            "80": 2599286784.0,
            "81": 2599286784.0,
            "82": 2599286784.0,
            "83": 2599286784.0,
            "84": 2599286784.0,
            "85": 2599286784.0,
            "86": 2599286784.0,
            "87": 2599286784.0,
            "88": 2599286784.0,
            "89": 2599286784.0,
            "90": 2599286784.0,
            "91": 2599286784.0,
            "92": 2599286784.0,
            "93": 2599286784.0,
            "94": 2599286784.0,
            "95": 2599286784.0,
            "96": 2599286784.0,
            "97": 2599286784.0,
            "98": 2599286784.0,
            "99": 2599286784.0,
            "100": 2599286784.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 7.45,
            "52": 0.1176,
            "53": 0.08802,
            "54": 0.08699,
            "55": 0.08722,
            "56": 0.08722,
            "57": 0.09047,
            "58": 0.08702,
            "59": 0.08774,
            "60": 0.08696,
            "61": 0.08697,
            "62": 0.08669,
            "63": 0.08744,
            "64": 0.08973,
            "65": 0.08942,
            "66": 0.08847,
            "67": 0.0878,
            "68": 0.0868,
            "69": 0.08686,
            "70": 0.08743,
            "71": 0.08699,
            "72": 0.08754,
            "73": 0.08641,
            "74": 0.08819,
            "75": 0.08738,
            "76": 0.50165,
            "77": 0.08865,
            "78": 0.08729,
            "79": 0.0866,
            "80": 0.08763,
            "81": 0.08755,
            "82": 0.08768,
            "83": 0.0877,
            "84": 0.08704,
            "85": 0.08686,
            "86": 0.0893,
            "87": 0.08757,
            "88": 0.08695,
            "89": 0.08918,
            "90": 0.08715,
            "91": 0.08682,
            "92": 0.08819,
            "93": 0.08755,
            "94": 0.08919,
            "95": 0.08702,
            "96": 0.0863,
            "97": 0.08852,
            "98": 0.08865,
            "99": 0.08679,
            "100": 0.08757
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.84865,
            "2": 10.84835,
            "3": 10.84054,
            "4": 10.82381,
            "5": 10.86283,
            "6": 10.87554,
            "7": 10.87089,
            "8": 10.85554,
            "9": 10.86811,
            "10": 10.82287,
            "11": 10.90334,
            "12": 10.87985,
            "13": 10.8831,
            "14": 10.89653,
            "15": 10.81406,
            "16": 10.81973,
            "17": 10.79918,
            "18": 10.82428,
            "19": 10.8195,
            "20": 10.71958,
            "21": 10.67864,
            "22": 10.53625,
            "23": 10.72125,
            "24": 10.57435,
            "25": 10.52997,
            "26": 10.60096,
            "27": 10.61502,
            "28": 10.57231,
            "29": 10.58614,
            "30": 10.33749,
            "31": 10.06516,
            "32": 10.46436,
            "33": 10.43612,
            "34": 10.17278,
            "35": 10.24173,
            "36": 10.19042,
            "37": 10.32282,
            "38": 10.14881,
            "39": 10.37709,
            "40": 10.05124,
            "41": 10.11355,
            "42": 10.17253,
            "43": 9.76298,
            "44": 9.89293,
            "45": 9.7664,
            "46": 9.7601,
            "47": 10.09424,
            "48": 9.78753,
            "49": 9.454,
            "50": 9.8548,
            "51": 9.79157,
            "52": 9.68731,
            "53": 10.02181,
            "54": 9.90398,
            "55": 9.82389,
            "56": 9.57081,
            "57": 9.40818,
            "58": 9.77678,
            "59": 9.52729,
            "60": 9.44284,
            "61": 9.64071,
            "62": 9.94046,
            "63": 9.31099,
            "64": 9.72506,
            "65": 8.8916,
            "66": 9.6525,
            "67": 9.31718,
            "68": 9.73957,
            "69": 9.74304,
            "70": 9.67942,
            "71": 9.56228,
            "72": 9.53149,
            "73": 9.44531,
            "74": 8.88431,
            "75": 9.3677,
            "76": 9.02482,
            "77": 10.01647,
            "78": 9.6813,
            "79": 9.32719,
            "80": 9.3577,
            "81": 9.43335,
            "82": 9.64804,
            "83": 9.25573,
            "84": 9.36738,
            "85": 9.56091,
            "86": 9.03567,
            "87": 9.54622,
            "88": 9.70041,
            "89": 9.54992,
            "90": 9.77126,
            "91": 9.28801,
            "92": 9.31055,
            "93": 9.03195,
            "94": 8.78121,
            "95": 9.48115,
            "96": 9.4759,
            "97": 9.2489,
            "98": 9.61705,
            "99": 8.8368,
            "100": 9.35043
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1658.0,
            "2": 1892.0,
            "3": 1844.0,
            "4": 1851.0,
            "5": 1895.0,
            "6": 2058.0,
            "7": 1902.0,
            "8": 1808.0,
            "9": 1894.0,
            "10": 1583.0,
            "11": 2044.0,
            "12": 1889.0,
            "13": 2101.0,
            "14": 2062.0,
            "15": 1981.0,
            "16": 1922.0,
            "17": 1845.0,
            "18": 1842.0,
            "19": 1890.0,
            "20": 1769.0,
            "21": 2048.0,
            "22": 1800.0,
            "23": 2134.0,
            "24": 1822.0,
            "25": 1861.0,
            "26": 1864.0,
            "27": 1886.0,
            "28": 2050.0,
            "29": 2009.0,
            "30": 2118.0,
            "31": 1688.0,
            "32": 2112.0,
            "33": 2153.0,
            "34": 1963.0,
            "35": 2014.0,
            "36": 2023.0,
            "37": 2222.0,
            "38": 2239.0,
            "39": 2367.0,
            "40": 2161.0,
            "41": 2422.0,
            "42": 2240.0,
            "43": 2116.0,
            "44": 2343.0,
            "45": 2127.0,
            "46": 2189.0,
            "47": 2411.0,
            "48": 2347.0,
            "49": 2271.0,
            "50": 2345.0,
            "51": 2482.0,
            "52": 2570.0,
            "53": 2835.0,
            "54": 2589.0,
            "55": 2450.0,
            "56": 2744.0,
            "57": 2429.0,
            "58": 2684.0,
            "59": 2748.0,
            "60": 2464.0,
            "61": 2995.0,
            "62": 2518.0,
            "63": 2570.0,
            "64": 2843.0,
            "65": 2648.0,
            "66": 2842.0,
            "67": 2954.0,
            "68": 2833.0,
            "69": 3027.0,
            "70": 2993.0,
            "71": 3010.0,
            "72": 2597.0,
            "73": 3002.0,
            "74": 2325.0,
            "75": 2882.0,
            "76": 3143.0,
            "77": 3062.0,
            "78": 3272.0,
            "79": 3303.0,
            "80": 3280.0,
            "81": 3517.0,
            "82": 3283.0,
            "83": 2834.0,
            "84": 3365.0,
            "85": 3288.0,
            "86": 2562.0,
            "87": 3493.0,
            "88": 3388.0,
            "89": 3102.0,
            "90": 3230.0,
            "91": 3154.0,
            "92": 3263.0,
            "93": 2967.0,
            "94": 3520.0,
            "95": 3175.0,
            "96": 3317.0,
            "97": 2999.0,
            "98": 3549.0,
            "99": 3248.0,
            "100": 3227.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 892293120.0,
            "2": 892293120.0,
            "3": 892293120.0,
            "4": 892293120.0,
            "5": 892293120.0,
            "6": 892293120.0,
            "7": 892293120.0,
            "8": 892293120.0,
            "9": 892293120.0,
            "10": 892293120.0,
            "11": 892293120.0,
            "12": 892293120.0,
            "13": 892293120.0,
            "14": 892293120.0,
            "15": 892293120.0,
            "16": 892293120.0,
            "17": 892293120.0,
            "18": 892293120.0,
            "19": 892293120.0,
            "20": 892293120.0,
            "21": 892293120.0,
            "22": 892293120.0,
            "23": 892293120.0,
            "24": 892293120.0,
            "25": 892293120.0,
            "26": 892293120.0,
            "27": 892293120.0,
            "28": 892293120.0,
            "29": 892293120.0,
            "30": 892293120.0,
            "31": 892293120.0,
            "32": 892293120.0,
            "33": 892293120.0,
            "34": 892293120.0,
            "35": 892293120.0,
            "36": 892293120.0,
            "37": 892293120.0,
            "38": 892293120.0,
            "39": 892293120.0,
            "40": 892293120.0,
            "41": 892293120.0,
            "42": 892293120.0,
            "43": 892293120.0,
            "44": 892293120.0,
            "45": 892293120.0,
            "46": 892293120.0,
            "47": 892293120.0,
            "48": 892293120.0,
            "49": 892293120.0,
            "50": 892293120.0,
            "51": 892293120.0,
            "52": 892293120.0,
            "53": 892293120.0,
            "54": 892293120.0,
            "55": 892293120.0,
            "56": 892293120.0,
            "57": 892293120.0,
            "58": 892293120.0,
            "59": 892293120.0,
            "60": 892293120.0,
            "61": 892293120.0,
            "62": 892293120.0,
            "63": 892293120.0,
            "64": 892293120.0,
            "65": 892293120.0,
            "66": 892293120.0,
            "67": 892293120.0,
            "68": 892293120.0,
            "69": 892293120.0,
            "70": 892293120.0,
            "71": 892293120.0,
            "72": 892293120.0,
            "73": 892293120.0,
            "74": 892293120.0,
            "75": 892293120.0,
            "76": 892293120.0,
            "77": 892293120.0,
            "78": 892293120.0,
            "79": 892293120.0,
            "80": 892293120.0,
            "81": 892293120.0,
            "82": 892293120.0,
            "83": 892293120.0,
            "84": 892293120.0,
            "85": 892293120.0,
            "86": 892293120.0,
            "87": 892293120.0,
            "88": 892293120.0,
            "89": 892293120.0,
            "90": 892293120.0,
            "91": 892293120.0,
            "92": 892293120.0,
            "93": 892293120.0,
            "94": 892293120.0,
            "95": 892293120.0,
            "96": 892293120.0,
            "97": 892293120.0,
            "98": 892293120.0,
            "99": 892293120.0,
            "100": 892293120.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2236674048.0,
            "2": 2599285760.0,
            "3": 2599285760.0,
            "4": 2599285760.0,
            "5": 2599285760.0,
            "6": 2599285760.0,
            "7": 2599285760.0,
            "8": 2599285760.0,
            "9": 2599285760.0,
            "10": 2599285760.0,
            "11": 2599285760.0,
            "12": 2599285760.0,
            "13": 2599285760.0,
            "14": 2599285760.0,
            "15": 2599285760.0,
            "16": 2599285760.0,
            "17": 2599285760.0,
            "18": 2599285760.0,
            "19": 2599285760.0,
            "20": 2599285760.0,
            "21": 2599285760.0,
            "22": 2599285760.0,
            "23": 2599285760.0,
            "24": 2599285760.0,
            "25": 2599285760.0,
            "26": 2599285760.0,
            "27": 2599285760.0,
            "28": 2599285760.0,
            "29": 2599285760.0,
            "30": 2599285760.0,
            "31": 2599285760.0,
            "32": 2599285760.0,
            "33": 2599285760.0,
            "34": 2599285760.0,
            "35": 2599285760.0,
            "36": 2599285760.0,
            "37": 2599285760.0,
            "38": 2599285760.0,
            "39": 2599285760.0,
            "40": 2599285760.0,
            "41": 2599285760.0,
            "42": 2599285760.0,
            "43": 2599285760.0,
            "44": 2599285760.0,
            "45": 2599285760.0,
            "46": 2599285760.0,
            "47": 2599285760.0,
            "48": 2599285760.0,
            "49": 2599285760.0,
            "50": 2599285760.0,
            "51": 2599285760.0,
            "52": 2599285760.0,
            "53": 2599285760.0,
            "54": 2599285760.0,
            "55": 2599285760.0,
            "56": 2599285760.0,
            "57": 2599285760.0,
            "58": 2599285760.0,
            "59": 2599285760.0,
            "60": 2599285760.0,
            "61": 2599285760.0,
            "62": 2599285760.0,
            "63": 2599285760.0,
            "64": 2599285760.0,
            "65": 2599285760.0,
            "66": 2599285760.0,
            "67": 2599285760.0,
            "68": 2599285760.0,
            "69": 2599285760.0,
            "70": 2599285760.0,
            "71": 2599285760.0,
            "72": 2599285760.0,
            "73": 2599285760.0,
            "74": 2599285760.0,
            "75": 2599285760.0,
            "76": 2599285760.0,
            "77": 2599285760.0,
            "78": 2599285760.0,
            "79": 2599285760.0,
            "80": 2599285760.0,
            "81": 2599285760.0,
            "82": 2599285760.0,
            "83": 2599285760.0,
            "84": 2599285760.0,
            "85": 2599285760.0,
            "86": 2599285760.0,
            "87": 2599285760.0,
            "88": 2599285760.0,
            "89": 2599285760.0,
            "90": 2599285760.0,
            "91": 2599285760.0,
            "92": 2599285760.0,
            "93": 2599285760.0,
            "94": 2599285760.0,
            "95": 2599285760.0,
            "96": 2599285760.0,
            "97": 2599285760.0,
            "98": 2599285760.0,
            "99": 2599285760.0,
            "100": 2599285760.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 5.33769,
            "2": 0.18532,
            "3": 0.14816,
            "4": 0.14635,
            "5": 0.14643,
            "6": 0.14673,
            "7": 0.1465,
            "8": 0.14731,
            "9": 0.14559,
            "10": 0.1459,
            "11": 0.14535,
            "12": 0.14613,
            "13": 0.14618,
            "14": 0.14642,
            "15": 0.1462,
            "16": 0.14548,
            "17": 0.14502,
            "18": 0.1449,
            "19": 0.145,
            "20": 0.14696,
            "21": 0.14496,
            "22": 0.14536,
            "23": 0.14489,
            "24": 0.14464,
            "25": 0.14477,
            "26": 0.14552,
            "27": 0.14534,
            "28": 0.14506,
            "29": 0.14487,
            "30": 0.14509,
            "31": 0.14642,
            "32": 0.14499,
            "33": 0.14538,
            "34": 0.14461,
            "35": 0.14541,
            "36": 0.14464,
            "37": 0.14539,
            "38": 0.14569,
            "39": 0.14515,
            "40": 0.14583,
            "41": 0.14836,
            "42": 0.14589,
            "43": 0.14625,
            "44": 0.14559,
            "45": 0.14588,
            "46": 0.14644,
            "47": 0.146,
            "48": 0.1459,
            "49": 0.14609,
            "50": 0.14597,
            "51": 0.15206,
            "52": 0.1459,
            "53": 0.1452,
            "54": 0.14745,
            "55": 0.14578,
            "56": 0.14459,
            "57": 0.14524,
            "58": 0.14545,
            "59": 0.14527,
            "60": 0.14448,
            "61": 0.14539,
            "62": 0.14462,
            "63": 0.14474,
            "64": 0.1447,
            "65": 0.14564,
            "66": 0.14463,
            "67": 0.14466,
            "68": 0.14483,
            "69": 0.14562,
            "70": 0.1456,
            "71": 0.14516,
            "72": 0.14481,
            "73": 0.14539,
            "74": 0.14568,
            "75": 0.14464,
            "76": 0.14465,
            "77": 0.14427,
            "78": 0.14541,
            "79": 0.1445,
            "80": 0.14535,
            "81": 0.14526,
            "82": 0.14617,
            "83": 0.14445,
            "84": 0.14483,
            "85": 0.14457,
            "86": 0.14459,
            "87": 0.14462,
            "88": 0.14433,
            "89": 0.14514,
            "90": 0.14416,
            "91": 0.14667,
            "92": 0.14432,
            "93": 0.14551,
            "94": 0.14453,
            "95": 0.14488,
            "96": 0.14441,
            "97": 0.14545,
            "98": 0.14459,
            "99": 0.14481,
            "100": 0.14918
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.84865,
            "2": 10.84835,
            "3": 10.84054,
            "4": 10.82381,
            "5": 10.86283,
            "6": 10.87554,
            "7": 10.87089,
            "8": 10.85554,
            "9": 10.86811,
            "10": 10.82287,
            "11": 10.90334,
            "12": 10.87985,
            "13": 10.8831,
            "14": 10.89653,
            "15": 10.81406,
            "16": 10.81973,
            "17": 10.79918,
            "18": 10.82428,
            "19": 10.8195,
            "20": 10.71958,
            "21": 10.67864,
            "22": 10.53625,
            "23": 10.72125,
            "24": 10.57435,
            "25": 10.52997,
            "26": 10.60096,
            "27": 10.61502,
            "28": 10.57231,
            "29": 10.58614,
            "30": 10.33749,
            "31": 10.06516,
            "32": 10.46436,
            "33": 10.43612,
            "34": 10.17278,
            "35": 10.24173,
            "36": 10.19042,
            "37": 10.32282,
            "38": 10.14881,
            "39": 10.37709,
            "40": 10.05124,
            "41": 10.11355,
            "42": 10.17253,
            "43": 9.76298,
            "44": 9.89293,
            "45": 9.7664,
            "46": 9.7601,
            "47": 10.09424,
            "48": 9.78753,
            "49": 9.454,
            "50": 9.8548,
            "51": 9.79157,
            "52": 9.68731,
            "53": 10.02181,
            "54": 9.90398,
            "55": 9.82389,
            "56": 9.57081,
            "57": 9.40818,
            "58": 9.77678,
            "59": 9.52729,
            "60": 9.44284,
            "61": 9.64071,
            "62": 9.94046,
            "63": 9.31099,
            "64": 9.72506,
            "65": 8.8916,
            "66": 9.6525,
            "67": 9.31718,
            "68": 9.73957,
            "69": 9.74304,
            "70": 9.67942,
            "71": 9.56228,
            "72": 9.53149,
            "73": 9.44531,
            "74": 8.88431,
            "75": 9.3677,
            "76": 9.02482,
            "77": 10.01647,
            "78": 9.6813,
            "79": 9.32719,
            "80": 9.3577,
            "81": 9.43335,
            "82": 9.64804,
            "83": 9.25573,
            "84": 9.36738,
            "85": 9.56091,
            "86": 9.03567,
            "87": 9.54622,
            "88": 9.70041,
            "89": 9.54992,
            "90": 9.77126,
            "91": 9.28801,
            "92": 9.31055,
            "93": 9.03195,
            "94": 8.78121,
            "95": 9.48115,
            "96": 9.4759,
            "97": 9.2489,
            "98": 9.61705,
            "99": 8.8368,
            "100": 9.35043
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1658.0,
            "2": 1892.0,
            "3": 1844.0,
            "4": 1851.0,
            "5": 1895.0,
            "6": 2058.0,
            "7": 1902.0,
            "8": 1808.0,
            "9": 1894.0,
            "10": 1583.0,
            "11": 2044.0,
            "12": 1889.0,
            "13": 2101.0,
            "14": 2062.0,
            "15": 1981.0,
            "16": 1922.0,
            "17": 1845.0,
            "18": 1842.0,
            "19": 1890.0,
            "20": 1769.0,
            "21": 2048.0,
            "22": 1800.0,
            "23": 2134.0,
            "24": 1822.0,
            "25": 1861.0,
            "26": 1864.0,
            "27": 1886.0,
            "28": 2050.0,
            "29": 2009.0,
            "30": 2118.0,
            "31": 1688.0,
            "32": 2112.0,
            "33": 2153.0,
            "34": 1963.0,
            "35": 2014.0,
            "36": 2023.0,
            "37": 2222.0,
            "38": 2239.0,
            "39": 2367.0,
            "40": 2161.0,
            "41": 2422.0,
            "42": 2240.0,
            "43": 2116.0,
            "44": 2343.0,
            "45": 2127.0,
            "46": 2189.0,
            "47": 2411.0,
            "48": 2347.0,
            "49": 2271.0,
            "50": 2345.0,
            "51": 2482.0,
            "52": 2570.0,
            "53": 2835.0,
            "54": 2589.0,
            "55": 2450.0,
            "56": 2744.0,
            "57": 2429.0,
            "58": 2684.0,
            "59": 2748.0,
            "60": 2464.0,
            "61": 2995.0,
            "62": 2518.0,
            "63": 2570.0,
            "64": 2843.0,
            "65": 2648.0,
            "66": 2842.0,
            "67": 2954.0,
            "68": 2833.0,
            "69": 3027.0,
            "70": 2993.0,
            "71": 3010.0,
            "72": 2597.0,
            "73": 3002.0,
            "74": 2325.0,
            "75": 2882.0,
            "76": 3143.0,
            "77": 3062.0,
            "78": 3272.0,
            "79": 3303.0,
            "80": 3280.0,
            "81": 3517.0,
            "82": 3283.0,
            "83": 2834.0,
            "84": 3365.0,
            "85": 3288.0,
            "86": 2562.0,
            "87": 3493.0,
            "88": 3388.0,
            "89": 3102.0,
            "90": 3230.0,
            "91": 3154.0,
            "92": 3263.0,
            "93": 2967.0,
            "94": 3520.0,
            "95": 3175.0,
            "96": 3317.0,
            "97": 2999.0,
            "98": 3549.0,
            "99": 3248.0,
            "100": 3227.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 892293120.0,
            "2": 892293120.0,
            "3": 892293120.0,
            "4": 892293120.0,
            "5": 892293120.0,
            "6": 892293120.0,
            "7": 892293120.0,
            "8": 892293120.0,
            "9": 892293120.0,
            "10": 892293120.0,
            "11": 892293120.0,
            "12": 892293120.0,
            "13": 892293120.0,
            "14": 892293120.0,
            "15": 892293120.0,
            "16": 892293120.0,
            "17": 892293120.0,
            "18": 892293120.0,
            "19": 892293120.0,
            "20": 892293120.0,
            "21": 892293120.0,
            "22": 892293120.0,
            "23": 892293120.0,
            "24": 892293120.0,
            "25": 892293120.0,
            "26": 892293120.0,
            "27": 892293120.0,
            "28": 892293120.0,
            "29": 892293120.0,
            "30": 892293120.0,
            "31": 892293120.0,
            "32": 892293120.0,
            "33": 892293120.0,
            "34": 892293120.0,
            "35": 892293120.0,
            "36": 892293120.0,
            "37": 892293120.0,
            "38": 892293120.0,
            "39": 892293120.0,
            "40": 892293120.0,
            "41": 892293120.0,
            "42": 892293120.0,
            "43": 892293120.0,
            "44": 892293120.0,
            "45": 892293120.0,
            "46": 892293120.0,
            "47": 892293120.0,
            "48": 892293120.0,
            "49": 892293120.0,
            "50": 892293120.0,
            "51": 892293120.0,
            "52": 892293120.0,
            "53": 892293120.0,
            "54": 892293120.0,
            "55": 892293120.0,
            "56": 892293120.0,
            "57": 892293120.0,
            "58": 892293120.0,
            "59": 892293120.0,
            "60": 892293120.0,
            "61": 892293120.0,
            "62": 892293120.0,
            "63": 892293120.0,
            "64": 892293120.0,
            "65": 892293120.0,
            "66": 892293120.0,
            "67": 892293120.0,
            "68": 892293120.0,
            "69": 892293120.0,
            "70": 892293120.0,
            "71": 892293120.0,
            "72": 892293120.0,
            "73": 892293120.0,
            "74": 892293120.0,
            "75": 892293120.0,
            "76": 892293120.0,
            "77": 892293120.0,
            "78": 892293120.0,
            "79": 892293120.0,
            "80": 892293120.0,
            "81": 892293120.0,
            "82": 892293120.0,
            "83": 892293120.0,
            "84": 892293120.0,
            "85": 892293120.0,
            "86": 892293120.0,
            "87": 892293120.0,
            "88": 892293120.0,
            "89": 892293120.0,
            "90": 892293120.0,
            "91": 892293120.0,
            "92": 892293120.0,
            "93": 892293120.0,
            "94": 892293120.0,
            "95": 892293120.0,
            "96": 892293120.0,
            "97": 892293120.0,
            "98": 892293120.0,
            "99": 892293120.0,
            "100": 892293120.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2236674048.0,
            "2": 2599285760.0,
            "3": 2599285760.0,
            "4": 2599285760.0,
            "5": 2599285760.0,
            "6": 2599285760.0,
            "7": 2599285760.0,
            "8": 2599285760.0,
            "9": 2599285760.0,
            "10": 2599285760.0,
            "11": 2599285760.0,
            "12": 2599285760.0,
            "13": 2599285760.0,
            "14": 2599285760.0,
            "15": 2599285760.0,
            "16": 2599285760.0,
            "17": 2599285760.0,
            "18": 2599285760.0,
            "19": 2599285760.0,
            "20": 2599285760.0,
            "21": 2599285760.0,
            "22": 2599285760.0,
            "23": 2599285760.0,
            "24": 2599285760.0,
            "25": 2599285760.0,
            "26": 2599285760.0,
            "27": 2599285760.0,
            "28": 2599285760.0,
            "29": 2599285760.0,
            "30": 2599285760.0,
            "31": 2599285760.0,
            "32": 2599285760.0,
            "33": 2599285760.0,
            "34": 2599285760.0,
            "35": 2599285760.0,
            "36": 2599285760.0,
            "37": 2599285760.0,
            "38": 2599285760.0,
            "39": 2599285760.0,
            "40": 2599285760.0,
            "41": 2599285760.0,
            "42": 2599285760.0,
            "43": 2599285760.0,
            "44": 2599285760.0,
            "45": 2599285760.0,
            "46": 2599285760.0,
            "47": 2599285760.0,
            "48": 2599285760.0,
            "49": 2599285760.0,
            "50": 2599285760.0,
            "51": 2599285760.0,
            "52": 2599285760.0,
            "53": 2599285760.0,
            "54": 2599285760.0,
            "55": 2599285760.0,
            "56": 2599285760.0,
            "57": 2599285760.0,
            "58": 2599285760.0,
            "59": 2599285760.0,
            "60": 2599285760.0,
            "61": 2599285760.0,
            "62": 2599285760.0,
            "63": 2599285760.0,
            "64": 2599285760.0,
            "65": 2599285760.0,
            "66": 2599285760.0,
            "67": 2599285760.0,
            "68": 2599285760.0,
            "69": 2599285760.0,
            "70": 2599285760.0,
            "71": 2599285760.0,
            "72": 2599285760.0,
            "73": 2599285760.0,
            "74": 2599285760.0,
            "75": 2599285760.0,
            "76": 2599285760.0,
            "77": 2599285760.0,
            "78": 2599285760.0,
            "79": 2599285760.0,
            "80": 2599285760.0,
            "81": 2599285760.0,
            "82": 2599285760.0,
            "83": 2599285760.0,
            "84": 2599285760.0,
            "85": 2599285760.0,
            "86": 2599285760.0,
            "87": 2599285760.0,
            "88": 2599285760.0,
            "89": 2599285760.0,
            "90": 2599285760.0,
            "91": 2599285760.0,
            "92": 2599285760.0,
            "93": 2599285760.0,
            "94": 2599285760.0,
            "95": 2599285760.0,
            "96": 2599285760.0,
            "97": 2599285760.0,
            "98": 2599285760.0,
            "99": 2599285760.0,
            "100": 2599285760.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 6.65648,
            "2": 0.19179,
            "3": 0.15416,
            "4": 0.14165,
            "5": 0.14069,
            "6": 0.14005,
            "7": 0.14441,
            "8": 0.14847,
            "9": 0.14867,
            "10": 0.15034,
            "11": 0.14788,
            "12": 0.14812,
            "13": 0.14762,
            "14": 0.14827,
            "15": 0.14673,
            "16": 0.14725,
            "17": 0.14727,
            "18": 0.14703,
            "19": 0.14722,
            "20": 0.14733,
            "21": 0.14692,
            "22": 0.14653,
            "23": 0.14777,
            "24": 0.14694,
            "25": 0.14763,
            "26": 0.1471,
            "27": 0.14674,
            "28": 0.14635,
            "29": 0.14703,
            "30": 0.14621,
            "31": 0.14691,
            "32": 0.14767,
            "33": 0.14672,
            "34": 0.14669,
            "35": 0.14593,
            "36": 0.14589,
            "37": 0.14687,
            "38": 0.14638,
            "39": 0.14701,
            "40": 0.14657,
            "41": 0.14668,
            "42": 0.14663,
            "43": 0.14455,
            "44": 0.13873,
            "45": 0.13973,
            "46": 0.13942,
            "47": 0.13835,
            "48": 0.13884,
            "49": 0.13842,
            "50": 0.13788,
            "51": 0.14634,
            "52": 0.14143,
            "53": 0.13935,
            "54": 0.14449,
            "55": 0.13995,
            "56": 0.14005,
            "57": 0.13884,
            "58": 0.13823,
            "59": 0.13958,
            "60": 0.13806,
            "61": 0.13998,
            "62": 0.1391,
            "63": 0.13808,
            "64": 0.1378,
            "65": 0.13831,
            "66": 0.13766,
            "67": 0.13871,
            "68": 0.13842,
            "69": 0.13825,
            "70": 0.14322,
            "71": 0.13773,
            "72": 0.13739,
            "73": 0.1379,
            "74": 0.13895,
            "75": 0.14238,
            "76": 0.14002,
            "77": 0.13711,
            "78": 0.13768,
            "79": 0.13786,
            "80": 0.13681,
            "81": 0.13744,
            "82": 0.13817,
            "83": 0.13649,
            "84": 0.13687,
            "85": 0.13779,
            "86": 0.14075,
            "87": 0.13645,
            "88": 0.1389,
            "89": 0.13781,
            "90": 0.13671,
            "91": 0.13682,
            "92": 0.13637,
            "93": 0.13642,
            "94": 0.13696,
            "95": 0.13741,
            "96": 0.1363,
            "97": 0.13656,
            "98": 0.13634,
            "99": 0.13708,
            "100": 0.14224
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.79157,
            "52": 9.68731,
            "53": 10.02181,
            "54": 9.90398,
            "55": 9.82389,
            "56": 9.57081,
            "57": 9.40818,
            "58": 9.77678,
            "59": 9.52729,
            "60": 9.44284,
            "61": 9.64071,
            "62": 9.94046,
            "63": 9.31099,
            "64": 9.72506,
            "65": 8.8916,
            "66": 9.6525,
            "67": 9.31718,
            "68": 9.73957,
            "69": 9.74304,
            "70": 9.67942,
            "71": 9.56228,
            "72": 9.53149,
            "73": 9.44531,
            "74": 8.88431,
            "75": 9.3677,
            "76": 9.02482,
            "77": 10.01647,
            "78": 9.6813,
            "79": 9.32719,
            "80": 9.3577,
            "81": 9.43335,
            "82": 9.64804,
            "83": 9.25573,
            "84": 9.36738,
            "85": 9.56091,
            "86": 9.03567,
            "87": 9.54622,
            "88": 9.70041,
            "89": 9.54992,
            "90": 9.77126,
            "91": 9.28801,
            "92": 9.31055,
            "93": 9.03195,
            "94": 8.78121,
            "95": 9.48115,
            "96": 9.4759,
            "97": 9.2489,
            "98": 9.61705,
            "99": 8.8368,
            "100": 9.35043
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2482.0,
            "52": 2570.0,
            "53": 2835.0,
            "54": 2589.0,
            "55": 2450.0,
            "56": 2744.0,
            "57": 2429.0,
            "58": 2684.0,
            "59": 2748.0,
            "60": 2464.0,
            "61": 2995.0,
            "62": 2518.0,
            "63": 2570.0,
            "64": 2843.0,
            "65": 2648.0,
            "66": 2842.0,
            "67": 2954.0,
            "68": 2833.0,
            "69": 3027.0,
            "70": 2993.0,
            "71": 3010.0,
            "72": 2597.0,
            "73": 3002.0,
            "74": 2325.0,
            "75": 2882.0,
            "76": 3143.0,
            "77": 3062.0,
            "78": 3272.0,
            "79": 3303.0,
            "80": 3280.0,
            "81": 3517.0,
            "82": 3283.0,
            "83": 2834.0,
            "84": 3365.0,
            "85": 3288.0,
            "86": 2562.0,
            "87": 3493.0,
            "88": 3388.0,
            "89": 3102.0,
            "90": 3230.0,
            "91": 3154.0,
            "92": 3263.0,
            "93": 2967.0,
            "94": 3520.0,
            "95": 3175.0,
            "96": 3317.0,
            "97": 2999.0,
            "98": 3549.0,
            "99": 3248.0,
            "100": 3227.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 888098816.0,
            "52": 888098816.0,
            "53": 888098816.0,
            "54": 888098816.0,
            "55": 888098816.0,
            "56": 888098816.0,
            "57": 888098816.0,
            "58": 888098816.0,
            "59": 888098816.0,
            "60": 888098816.0,
            "61": 888098816.0,
            "62": 888098816.0,
            "63": 888098816.0,
            "64": 888098816.0,
            "65": 888098816.0,
            "66": 888098816.0,
            "67": 888098816.0,
            "68": 888098816.0,
            "69": 888098816.0,
            "70": 888098816.0,
            "71": 888098816.0,
            "72": 888098816.0,
            "73": 888098816.0,
            "74": 888098816.0,
            "75": 888098816.0,
            "76": 888098816.0,
            "77": 888098816.0,
            "78": 888098816.0,
            "79": 888098816.0,
            "80": 888098816.0,
            "81": 888098816.0,
            "82": 888098816.0,
            "83": 888098816.0,
            "84": 888098816.0,
            "85": 888098816.0,
            "86": 888098816.0,
            "87": 888098816.0,
            "88": 888098816.0,
            "89": 888098816.0,
            "90": 888098816.0,
            "91": 888098816.0,
            "92": 888098816.0,
            "93": 888098816.0,
            "94": 888098816.0,
            "95": 888098816.0,
            "96": 888098816.0,
            "97": 888098816.0,
            "98": 888098816.0,
            "99": 888098816.0,
            "100": 888098816.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2595090432.0,
            "52": 2595091456.0,
            "53": 2595091456.0,
            "54": 2595091456.0,
            "55": 2595091456.0,
            "56": 2595091456.0,
            "57": 2595091456.0,
            "58": 2595091456.0,
            "59": 2595091456.0,
            "60": 2595091456.0,
            "61": 2595091456.0,
            "62": 2595091456.0,
            "63": 2595091456.0,
            "64": 2595091456.0,
            "65": 2595091456.0,
            "66": 2595091456.0,
            "67": 2595091456.0,
            "68": 2595091456.0,
            "69": 2595091456.0,
            "70": 2595091456.0,
            "71": 2595091456.0,
            "72": 2595091456.0,
            "73": 2595091456.0,
            "74": 2595091456.0,
            "75": 2595091456.0,
            "76": 2595091456.0,
            "77": 2595091456.0,
            "78": 2595091456.0,
            "79": 2595091456.0,
            "80": 2595091456.0,
            "81": 2595091456.0,
            "82": 2595091456.0,
            "83": 2595091456.0,
            "84": 2595091456.0,
            "85": 2595091456.0,
            "86": 2595091456.0,
            "87": 2595091456.0,
            "88": 2595091456.0,
            "89": 2595091456.0,
            "90": 2595091456.0,
            "91": 2595091456.0,
            "92": 2595091456.0,
            "93": 2595091456.0,
            "94": 2595091456.0,
            "95": 2595091456.0,
            "96": 2595091456.0,
            "97": 2595091456.0,
            "98": 2595091456.0,
            "99": 2595091456.0,
            "100": 2595091456.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 3.7416,
            "52": 0.17157,
            "53": 0.14456,
            "54": 0.14361,
            "55": 0.14299,
            "56": 0.14258,
            "57": 0.14257,
            "58": 0.14319,
            "59": 0.14348,
            "60": 0.1429,
            "61": 0.14295,
            "62": 0.1431,
            "63": 0.1419,
            "64": 0.14379,
            "65": 0.59005,
            "66": 0.15082,
            "67": 0.14226,
            "68": 0.14098,
            "69": 0.14096,
            "70": 0.1413,
            "71": 0.14073,
            "72": 0.14094,
            "73": 0.14097,
            "74": 0.14117,
            "75": 0.14054,
            "76": 0.14081,
            "77": 0.14153,
            "78": 0.59387,
            "79": 0.14301,
            "80": 0.14139,
            "81": 0.14173,
            "82": 0.1418,
            "83": 0.14133,
            "84": 0.14096,
            "85": 0.14024,
            "86": 0.14063,
            "87": 0.14049,
            "88": 0.14117,
            "89": 0.14144,
            "90": 0.14055,
            "91": 0.14175,
            "92": 0.14246,
            "93": 0.14114,
            "94": 0.14391,
            "95": 0.14119,
            "96": 0.14114,
            "97": 0.14158,
            "98": 0.1408,
            "99": 0.14214,
            "100": 0.14462
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgxa100_dracooci-ord.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.84474,
            "2": 10.84714,
            "3": 10.84155,
            "4": 10.82474,
            "5": 10.86418,
            "6": 10.87687,
            "7": 10.86881,
            "8": 10.85782,
            "9": 10.86927,
            "10": 10.82155,
            "11": 10.90254,
            "12": 10.87935,
            "13": 10.88455,
            "14": 10.89946,
            "15": 10.81195,
            "16": 10.81872,
            "17": 10.8008,
            "18": 10.82581,
            "19": 10.82045,
            "20": 10.71872,
            "21": 10.67848,
            "22": 10.5397,
            "23": 10.71982,
            "24": 10.57533,
            "25": 10.53036,
            "26": 10.60075,
            "27": 10.61432,
            "28": 10.57308,
            "29": 10.58758,
            "30": 10.3358,
            "31": 10.06363,
            "32": 10.46475,
            "33": 10.43552,
            "34": 10.17388,
            "35": 10.24081,
            "36": 10.19268,
            "37": 10.3222,
            "38": 10.15004,
            "39": 10.37797,
            "40": 10.05008,
            "41": 10.11342,
            "42": 10.17323,
            "43": 9.76225,
            "44": 9.89234,
            "45": 9.76762,
            "46": 9.75986,
            "47": 10.09534,
            "48": 9.78722,
            "49": 9.45529,
            "50": 9.85505,
            "51": 9.79116,
            "52": 9.68704,
            "53": 10.02199,
            "54": 9.90262,
            "55": 9.82465,
            "56": 9.56989,
            "57": 9.40892,
            "58": 9.77732,
            "59": 9.52733,
            "60": 9.44306,
            "61": 9.64215,
            "62": 9.94224,
            "63": 9.31031,
            "64": 9.72428,
            "65": 8.89104,
            "66": 9.65351,
            "67": 9.31775,
            "68": 9.73884,
            "69": 9.7436,
            "70": 9.67902,
            "71": 9.56185,
            "72": 9.53074,
            "73": 9.44621,
            "74": 8.88449,
            "75": 9.36836,
            "76": 9.02423,
            "77": 10.0162,
            "78": 9.68193,
            "79": 9.327,
            "80": 9.35799,
            "81": 9.43376,
            "82": 9.64749,
            "83": 9.25646,
            "84": 9.3666,
            "85": 9.56032,
            "86": 9.0356,
            "87": 9.54626,
            "88": 9.70003,
            "89": 9.54986,
            "90": 9.77055,
            "91": 9.28744,
            "92": 9.31156,
            "93": 9.03212,
            "94": 8.78135,
            "95": 9.48101,
            "96": 9.47679,
            "97": 9.24913,
            "98": 9.61711,
            "99": 8.83684,
            "100": 9.34997
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1776.0,
            "2": 1837.0,
            "3": 1749.0,
            "4": 1902.0,
            "5": 2128.0,
            "6": 2161.0,
            "7": 1990.0,
            "8": 1860.0,
            "9": 1953.0,
            "10": 1615.0,
            "11": 2052.0,
            "12": 1809.0,
            "13": 2136.0,
            "14": 1966.0,
            "15": 2021.0,
            "16": 1892.0,
            "17": 1945.0,
            "18": 1826.0,
            "19": 1858.0,
            "20": 1775.0,
            "21": 1971.0,
            "22": 1818.0,
            "23": 2137.0,
            "24": 1842.0,
            "25": 1916.0,
            "26": 1946.0,
            "27": 1940.0,
            "28": 2046.0,
            "29": 2000.0,
            "30": 2029.0,
            "31": 1701.0,
            "32": 2056.0,
            "33": 2208.0,
            "34": 2024.0,
            "35": 2107.0,
            "36": 1985.0,
            "37": 2243.0,
            "38": 2228.0,
            "39": 2433.0,
            "40": 2174.0,
            "41": 2295.0,
            "42": 2262.0,
            "43": 2097.0,
            "44": 2291.0,
            "45": 2110.0,
            "46": 2293.0,
            "47": 2553.0,
            "48": 2368.0,
            "49": 2280.0,
            "50": 2363.0,
            "51": 2596.0,
            "52": 2582.0,
            "53": 2816.0,
            "54": 2729.0,
            "55": 2460.0,
            "56": 2735.0,
            "57": 2451.0,
            "58": 2746.0,
            "59": 2848.0,
            "60": 2462.0,
            "61": 2890.0,
            "62": 2565.0,
            "63": 2520.0,
            "64": 2932.0,
            "65": 2724.0,
            "66": 3014.0,
            "67": 2958.0,
            "68": 2847.0,
            "69": 2937.0,
            "70": 2952.0,
            "71": 2954.0,
            "72": 2617.0,
            "73": 3068.0,
            "74": 2239.0,
            "75": 2823.0,
            "76": 3073.0,
            "77": 3109.0,
            "78": 3263.0,
            "79": 3254.0,
            "80": 3222.0,
            "81": 3475.0,
            "82": 3277.0,
            "83": 2732.0,
            "84": 3393.0,
            "85": 3314.0,
            "86": 2674.0,
            "87": 3433.0,
            "88": 3250.0,
            "89": 3089.0,
            "90": 3087.0,
            "91": 3070.0,
            "92": 3358.0,
            "93": 2823.0,
            "94": 3442.0,
            "95": 3146.0,
            "96": 3256.0,
            "97": 3086.0,
            "98": 3563.0,
            "99": 3247.0,
            "100": 3331.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 888098304.0,
            "2": 888098304.0,
            "3": 888098304.0,
            "4": 888098304.0,
            "5": 888098304.0,
            "6": 888098304.0,
            "7": 888098304.0,
            "8": 888098304.0,
            "9": 888098304.0,
            "10": 888098304.0,
            "11": 888098304.0,
            "12": 888098304.0,
            "13": 888098304.0,
            "14": 888098304.0,
            "15": 888098304.0,
            "16": 888098304.0,
            "17": 888098304.0,
            "18": 888098304.0,
            "19": 888098304.0,
            "20": 888098304.0,
            "21": 888098304.0,
            "22": 888098304.0,
            "23": 888098304.0,
            "24": 888098304.0,
            "25": 888098304.0,
            "26": 888098304.0,
            "27": 888098304.0,
            "28": 888098304.0,
            "29": 888098304.0,
            "30": 888098304.0,
            "31": 888098304.0,
            "32": 888098304.0,
            "33": 888098304.0,
            "34": 888098304.0,
            "35": 888098304.0,
            "36": 888098304.0,
            "37": 888098304.0,
            "38": 888098304.0,
            "39": 888098304.0,
            "40": 888098304.0,
            "41": 888098304.0,
            "42": 888098304.0,
            "43": 888098304.0,
            "44": 888098304.0,
            "45": 888098304.0,
            "46": 888098304.0,
            "47": 888098304.0,
            "48": 888098304.0,
            "49": 888098304.0,
            "50": 888098304.0,
            "51": 888098304.0,
            "52": 888098304.0,
            "53": 888098304.0,
            "54": 888098304.0,
            "55": 888098304.0,
            "56": 888098304.0,
            "57": 888098304.0,
            "58": 888098304.0,
            "59": 888098304.0,
            "60": 888098304.0,
            "61": 888098304.0,
            "62": 888098304.0,
            "63": 888098304.0,
            "64": 888098304.0,
            "65": 888098304.0,
            "66": 888098304.0,
            "67": 888098304.0,
            "68": 888098304.0,
            "69": 888098304.0,
            "70": 888098304.0,
            "71": 888098304.0,
            "72": 888098304.0,
            "73": 888098304.0,
            "74": 888098304.0,
            "75": 888098304.0,
            "76": 888098304.0,
            "77": 888098304.0,
            "78": 888098304.0,
            "79": 888098304.0,
            "80": 888098304.0,
            "81": 888098304.0,
            "82": 888098304.0,
            "83": 888098304.0,
            "84": 888098304.0,
            "85": 888098304.0,
            "86": 888098304.0,
            "87": 888098304.0,
            "88": 888098304.0,
            "89": 888098304.0,
            "90": 888098304.0,
            "91": 888098304.0,
            "92": 888098304.0,
            "93": 888098304.0,
            "94": 888098304.0,
            "95": 888098304.0,
            "96": 888098304.0,
            "97": 888098304.0,
            "98": 888098304.0,
            "99": 888098304.0,
            "100": 888098304.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 3216302592.0,
            "2": 3575768576.0,
            "3": 3575768576.0,
            "4": 3575768576.0,
            "5": 3575768576.0,
            "6": 3575768576.0,
            "7": 3575768576.0,
            "8": 3575768576.0,
            "9": 3575768576.0,
            "10": 3575768576.0,
            "11": 3575768576.0,
            "12": 3575768576.0,
            "13": 3575768576.0,
            "14": 3575768576.0,
            "15": 3575768576.0,
            "16": 3575768576.0,
            "17": 3575768576.0,
            "18": 3575768576.0,
            "19": 3575768576.0,
            "20": 3575768576.0,
            "21": 3575768576.0,
            "22": 3575768576.0,
            "23": 3575768576.0,
            "24": 3575768576.0,
            "25": 3575768576.0,
            "26": 3575768576.0,
            "27": 3575768576.0,
            "28": 3575768576.0,
            "29": 3575768576.0,
            "30": 3575768576.0,
            "31": 3575768576.0,
            "32": 3575768576.0,
            "33": 3575768576.0,
            "34": 3575768576.0,
            "35": 3575768576.0,
            "36": 3575768576.0,
            "37": 3575768576.0,
            "38": 3575768576.0,
            "39": 3575768576.0,
            "40": 3575768576.0,
            "41": 3575768576.0,
            "42": 3575768576.0,
            "43": 3575768576.0,
            "44": 3575768576.0,
            "45": 3575768576.0,
            "46": 3575768576.0,
            "47": 3575768576.0,
            "48": 3575768576.0,
            "49": 3575768576.0,
            "50": 3575768576.0,
            "51": 3575768576.0,
            "52": 3575768576.0,
            "53": 3575768576.0,
            "54": 3575768576.0,
            "55": 3575768576.0,
            "56": 3575768576.0,
            "57": 3575768576.0,
            "58": 3575768576.0,
            "59": 3575768576.0,
            "60": 3575768576.0,
            "61": 3575768576.0,
            "62": 3575768576.0,
            "63": 3575768576.0,
            "64": 3575768576.0,
            "65": 3575768576.0,
            "66": 3575768576.0,
            "67": 3575768576.0,
            "68": 3575768576.0,
            "69": 3575768576.0,
            "70": 3575768576.0,
            "71": 3575768576.0,
            "72": 3575768576.0,
            "73": 3575768576.0,
            "74": 3575768576.0,
            "75": 3575768576.0,
            "76": 3575768576.0,
            "77": 3575768576.0,
            "78": 3575768576.0,
            "79": 3575768576.0,
            "80": 3575768576.0,
            "81": 3575768576.0,
            "82": 3575768576.0,
            "83": 3575768576.0,
            "84": 3575768576.0,
            "85": 3575768576.0,
            "86": 3575768576.0,
            "87": 3575768576.0,
            "88": 3575768576.0,
            "89": 3575768576.0,
            "90": 3575768576.0,
            "91": 3575768576.0,
            "92": 3575768576.0,
            "93": 3575768576.0,
            "94": 3575768576.0,
            "95": 3575768576.0,
            "96": 3575768576.0,
            "97": 3575768576.0,
            "98": 3575768576.0,
            "99": 3575768576.0,
            "100": 3575768576.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 11.22961,
            "2": 0.22748,
            "3": 0.18391,
            "4": 0.18331,
            "5": 0.1874,
            "6": 0.18206,
            "7": 0.18807,
            "8": 0.18736,
            "9": 0.17626,
            "10": 0.18332,
            "11": 0.18368,
            "12": 0.42125,
            "13": 0.18444,
            "14": 0.18305,
            "15": 0.1848,
            "16": 0.18368,
            "17": 0.18426,
            "18": 0.18316,
            "19": 0.18444,
            "20": 0.18426,
            "21": 0.18455,
            "22": 0.18314,
            "23": 0.18337,
            "24": 0.18472,
            "25": 0.18337,
            "26": 0.18358,
            "27": 0.18264,
            "28": 0.18257,
            "29": 0.18324,
            "30": 0.18335,
            "31": 0.18284,
            "32": 0.18259,
            "33": 0.18301,
            "34": 0.18387,
            "35": 0.1854,
            "36": 0.18356,
            "37": 0.18347,
            "38": 0.18279,
            "39": 0.18388,
            "40": 0.18293,
            "41": 0.1825,
            "42": 0.17397,
            "43": 0.17567,
            "44": 0.17489,
            "45": 0.17541,
            "46": 0.17602,
            "47": 0.38172,
            "48": 0.1751,
            "49": 0.1743,
            "50": 0.17335,
            "51": 0.17566,
            "52": 0.1679,
            "53": 0.16794,
            "54": 0.16866,
            "55": 0.16905,
            "56": 0.16842,
            "57": 0.16848,
            "58": 0.16761,
            "59": 0.16753,
            "60": 0.16801,
            "61": 0.16865,
            "62": 0.16798,
            "63": 0.16843,
            "64": 0.16707,
            "65": 0.16694,
            "66": 0.16951,
            "67": 0.16784,
            "68": 0.16521,
            "69": 0.16496,
            "70": 0.16411,
            "71": 0.16368,
            "72": 0.16388,
            "73": 0.16443,
            "74": 0.16404,
            "75": 0.16491,
            "76": 0.16453,
            "77": 0.16357,
            "78": 0.1639,
            "79": 0.16482,
            "80": 0.1642,
            "81": 0.17333,
            "82": 0.17353,
            "83": 0.17251,
            "84": 0.17307,
            "85": 0.17382,
            "86": 0.17698,
            "87": 0.18538,
            "88": 0.18078,
            "89": 0.17207,
            "90": 0.17225,
            "91": 0.17489,
            "92": 0.17401,
            "93": 0.17299,
            "94": 0.17352,
            "95": 0.17399,
            "96": 0.1736,
            "97": 0.17413,
            "98": 0.17369,
            "99": 0.17278,
            "100": 0.17242
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgxa100_dracooci.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.84474,
            "2": 10.84714,
            "3": 10.84155,
            "4": 10.82474,
            "5": 10.86418,
            "6": 10.87687,
            "7": 10.86881,
            "8": 10.85782,
            "9": 10.86927,
            "10": 10.82155,
            "11": 10.90254,
            "12": 10.87935,
            "13": 10.88455,
            "14": 10.89946,
            "15": 10.81195,
            "16": 10.81872,
            "17": 10.8008,
            "18": 10.82581,
            "19": 10.82045,
            "20": 10.71872,
            "21": 10.67848,
            "22": 10.5397,
            "23": 10.71982,
            "24": 10.57533,
            "25": 10.53036,
            "26": 10.60075,
            "27": 10.61432,
            "28": 10.57308,
            "29": 10.58758,
            "30": 10.3358,
            "31": 10.06363,
            "32": 10.46475,
            "33": 10.43552,
            "34": 10.17388,
            "35": 10.24081,
            "36": 10.19268,
            "37": 10.3222,
            "38": 10.15004,
            "39": 10.37797,
            "40": 10.05008,
            "41": 10.11342,
            "42": 10.17323,
            "43": 9.76225,
            "44": 9.89234,
            "45": 9.76762,
            "46": 9.75986,
            "47": 10.09534,
            "48": 9.78722,
            "49": 9.45529,
            "50": 9.85505,
            "51": 9.79116,
            "52": 9.68704,
            "53": 10.02199,
            "54": 9.90262,
            "55": 9.82465,
            "56": 9.56989,
            "57": 9.40892,
            "58": 9.77732,
            "59": 9.52733,
            "60": 9.44306,
            "61": 9.64215,
            "62": 9.94224,
            "63": 9.31031,
            "64": 9.72428,
            "65": 8.89104,
            "66": 9.65351,
            "67": 9.31775,
            "68": 9.73884,
            "69": 9.7436,
            "70": 9.67902,
            "71": 9.56185,
            "72": 9.53074,
            "73": 9.44621,
            "74": 8.88449,
            "75": 9.36836,
            "76": 9.02423,
            "77": 10.0162,
            "78": 9.68193,
            "79": 9.327,
            "80": 9.35799,
            "81": 9.43376,
            "82": 9.64749,
            "83": 9.25646,
            "84": 9.3666,
            "85": 9.56032,
            "86": 9.0356,
            "87": 9.54626,
            "88": 9.70003,
            "89": 9.54986,
            "90": 9.77055,
            "91": 9.28744,
            "92": 9.31156,
            "93": 9.03212,
            "94": 8.78135,
            "95": 9.48101,
            "96": 9.47679,
            "97": 9.24913,
            "98": 9.61711,
            "99": 8.83684,
            "100": 9.34997
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1776.0,
            "2": 1837.0,
            "3": 1749.0,
            "4": 1902.0,
            "5": 2128.0,
            "6": 2161.0,
            "7": 1990.0,
            "8": 1860.0,
            "9": 1953.0,
            "10": 1615.0,
            "11": 2052.0,
            "12": 1809.0,
            "13": 2136.0,
            "14": 1966.0,
            "15": 2021.0,
            "16": 1892.0,
            "17": 1945.0,
            "18": 1826.0,
            "19": 1858.0,
            "20": 1775.0,
            "21": 1971.0,
            "22": 1818.0,
            "23": 2137.0,
            "24": 1842.0,
            "25": 1916.0,
            "26": 1946.0,
            "27": 1940.0,
            "28": 2046.0,
            "29": 2000.0,
            "30": 2029.0,
            "31": 1701.0,
            "32": 2056.0,
            "33": 2208.0,
            "34": 2024.0,
            "35": 2107.0,
            "36": 1985.0,
            "37": 2243.0,
            "38": 2228.0,
            "39": 2433.0,
            "40": 2174.0,
            "41": 2295.0,
            "42": 2262.0,
            "43": 2097.0,
            "44": 2291.0,
            "45": 2110.0,
            "46": 2293.0,
            "47": 2553.0,
            "48": 2368.0,
            "49": 2280.0,
            "50": 2363.0,
            "51": 2596.0,
            "52": 2582.0,
            "53": 2816.0,
            "54": 2729.0,
            "55": 2460.0,
            "56": 2735.0,
            "57": 2451.0,
            "58": 2746.0,
            "59": 2848.0,
            "60": 2462.0,
            "61": 2890.0,
            "62": 2565.0,
            "63": 2520.0,
            "64": 2932.0,
            "65": 2724.0,
            "66": 3014.0,
            "67": 2958.0,
            "68": 2847.0,
            "69": 2937.0,
            "70": 2952.0,
            "71": 2954.0,
            "72": 2617.0,
            "73": 3068.0,
            "74": 2239.0,
            "75": 2823.0,
            "76": 3073.0,
            "77": 3109.0,
            "78": 3263.0,
            "79": 3254.0,
            "80": 3222.0,
            "81": 3475.0,
            "82": 3277.0,
            "83": 2732.0,
            "84": 3393.0,
            "85": 3314.0,
            "86": 2674.0,
            "87": 3433.0,
            "88": 3250.0,
            "89": 3089.0,
            "90": 3087.0,
            "91": 3070.0,
            "92": 3358.0,
            "93": 2823.0,
            "94": 3442.0,
            "95": 3146.0,
            "96": 3256.0,
            "97": 3086.0,
            "98": 3563.0,
            "99": 3247.0,
            "100": 3331.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 888098304.0,
            "2": 888098304.0,
            "3": 888098304.0,
            "4": 888098304.0,
            "5": 888098304.0,
            "6": 888098304.0,
            "7": 888098304.0,
            "8": 888098304.0,
            "9": 888098304.0,
            "10": 888098304.0,
            "11": 888098304.0,
            "12": 888098304.0,
            "13": 888098304.0,
            "14": 888098304.0,
            "15": 888098304.0,
            "16": 888098304.0,
            "17": 888098304.0,
            "18": 888098304.0,
            "19": 888098304.0,
            "20": 888098304.0,
            "21": 888098304.0,
            "22": 888098304.0,
            "23": 888098304.0,
            "24": 888098304.0,
            "25": 888098304.0,
            "26": 888098304.0,
            "27": 888098304.0,
            "28": 888098304.0,
            "29": 888098304.0,
            "30": 888098304.0,
            "31": 888098304.0,
            "32": 888098304.0,
            "33": 888098304.0,
            "34": 888098304.0,
            "35": 888098304.0,
            "36": 888098304.0,
            "37": 888098304.0,
            "38": 888098304.0,
            "39": 888098304.0,
            "40": 888098304.0,
            "41": 888098304.0,
            "42": 888098304.0,
            "43": 888098304.0,
            "44": 888098304.0,
            "45": 888098304.0,
            "46": 888098304.0,
            "47": 888098304.0,
            "48": 888098304.0,
            "49": 888098304.0,
            "50": 888098304.0,
            "51": 888098304.0,
            "52": 888098304.0,
            "53": 888098304.0,
            "54": 888098304.0,
            "55": 888098304.0,
            "56": 888098304.0,
            "57": 888098304.0,
            "58": 888098304.0,
            "59": 888098304.0,
            "60": 888098304.0,
            "61": 888098304.0,
            "62": 888098304.0,
            "63": 888098304.0,
            "64": 888098304.0,
            "65": 888098304.0,
            "66": 888098304.0,
            "67": 888098304.0,
            "68": 888098304.0,
            "69": 888098304.0,
            "70": 888098304.0,
            "71": 888098304.0,
            "72": 888098304.0,
            "73": 888098304.0,
            "74": 888098304.0,
            "75": 888098304.0,
            "76": 888098304.0,
            "77": 888098304.0,
            "78": 888098304.0,
            "79": 888098304.0,
            "80": 888098304.0,
            "81": 888098304.0,
            "82": 888098304.0,
            "83": 888098304.0,
            "84": 888098304.0,
            "85": 888098304.0,
            "86": 888098304.0,
            "87": 888098304.0,
            "88": 888098304.0,
            "89": 888098304.0,
            "90": 888098304.0,
            "91": 888098304.0,
            "92": 888098304.0,
            "93": 888098304.0,
            "94": 888098304.0,
            "95": 888098304.0,
            "96": 888098304.0,
            "97": 888098304.0,
            "98": 888098304.0,
            "99": 888098304.0,
            "100": 888098304.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 3216302592.0,
            "2": 3575768576.0,
            "3": 3575768576.0,
            "4": 3575768576.0,
            "5": 3575768576.0,
            "6": 3575768576.0,
            "7": 3575768576.0,
            "8": 3575768576.0,
            "9": 3575768576.0,
            "10": 3575768576.0,
            "11": 3575768576.0,
            "12": 3575768576.0,
            "13": 3575768576.0,
            "14": 3575768576.0,
            "15": 3575768576.0,
            "16": 3575768576.0,
            "17": 3575768576.0,
            "18": 3575768576.0,
            "19": 3575768576.0,
            "20": 3575768576.0,
            "21": 3575768576.0,
            "22": 3575768576.0,
            "23": 3575768576.0,
            "24": 3575768576.0,
            "25": 3575768576.0,
            "26": 3575768576.0,
            "27": 3575768576.0,
            "28": 3575768576.0,
            "29": 3575768576.0,
            "30": 3575768576.0,
            "31": 3575768576.0,
            "32": 3575768576.0,
            "33": 3575768576.0,
            "34": 3575768576.0,
            "35": 3575768576.0,
            "36": 3575768576.0,
            "37": 3575768576.0,
            "38": 3575768576.0,
            "39": 3575768576.0,
            "40": 3575768576.0,
            "41": 3575768576.0,
            "42": 3575768576.0,
            "43": 3575768576.0,
            "44": 3575768576.0,
            "45": 3575768576.0,
            "46": 3575768576.0,
            "47": 3575768576.0,
            "48": 3575768576.0,
            "49": 3575768576.0,
            "50": 3575768576.0,
            "51": 3575768576.0,
            "52": 3575768576.0,
            "53": 3575768576.0,
            "54": 3575768576.0,
            "55": 3575768576.0,
            "56": 3575768576.0,
            "57": 3575768576.0,
            "58": 3575768576.0,
            "59": 3575768576.0,
            "60": 3575768576.0,
            "61": 3575768576.0,
            "62": 3575768576.0,
            "63": 3575768576.0,
            "64": 3575768576.0,
            "65": 3575768576.0,
            "66": 3575768576.0,
            "67": 3575768576.0,
            "68": 3575768576.0,
            "69": 3575768576.0,
            "70": 3575768576.0,
            "71": 3575768576.0,
            "72": 3575768576.0,
            "73": 3575768576.0,
            "74": 3575768576.0,
            "75": 3575768576.0,
            "76": 3575768576.0,
            "77": 3575768576.0,
            "78": 3575768576.0,
            "79": 3575768576.0,
            "80": 3575768576.0,
            "81": 3575768576.0,
            "82": 3575768576.0,
            "83": 3575768576.0,
            "84": 3575768576.0,
            "85": 3575768576.0,
            "86": 3575768576.0,
            "87": 3575768576.0,
            "88": 3575768576.0,
            "89": 3575768576.0,
            "90": 3575768576.0,
            "91": 3575768576.0,
            "92": 3575768576.0,
            "93": 3575768576.0,
            "94": 3575768576.0,
            "95": 3575768576.0,
            "96": 3575768576.0,
            "97": 3575768576.0,
            "98": 3575768576.0,
            "99": 3575768576.0,
            "100": 3575768576.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.66914,
            "2": 0.21684,
            "3": 0.17892,
            "4": 0.17346,
            "5": 0.17105,
            "6": 0.17127,
            "7": 0.17098,
            "8": 0.17217,
            "9": 0.17182,
            "10": 0.17103,
            "11": 0.17137,
            "12": 0.17055,
            "13": 0.17065,
            "14": 0.17142,
            "15": 0.17038,
            "16": 0.16903,
            "17": 0.16848,
            "18": 0.16975,
            "19": 0.16977,
            "20": 0.17019,
            "21": 0.16985,
            "22": 0.16955,
            "23": 0.16804,
            "24": 0.16891,
            "25": 0.16902,
            "26": 0.16957,
            "27": 0.16863,
            "28": 0.16926,
            "29": 0.16921,
            "30": 0.168,
            "31": 0.16922,
            "32": 0.16856,
            "33": 0.17245,
            "34": 0.16964,
            "35": 0.16929,
            "36": 0.16825,
            "37": 0.16872,
            "38": 0.16843,
            "39": 0.16954,
            "40": 0.16969,
            "41": 0.16937,
            "42": 0.1686,
            "43": 0.34614,
            "44": 0.16943,
            "45": 0.16912,
            "46": 0.16957,
            "47": 0.16789,
            "48": 0.16768,
            "49": 0.16897,
            "50": 0.16779,
            "51": 0.3373,
            "52": 0.17048,
            "53": 0.16638,
            "54": 0.16813,
            "55": 0.16767,
            "56": 0.16807,
            "57": 0.16799,
            "58": 0.16657,
            "59": 0.16804,
            "60": 0.16874,
            "61": 0.1679,
            "62": 0.16609,
            "63": 0.16577,
            "64": 0.16659,
            "65": 0.16778,
            "66": 0.16673,
            "67": 0.16832,
            "68": 0.16874,
            "69": 0.16895,
            "70": 0.16685,
            "71": 0.16724,
            "72": 0.1677,
            "73": 0.16716,
            "74": 0.16899,
            "75": 0.1687,
            "76": 0.16719,
            "77": 0.16812,
            "78": 0.1671,
            "79": 0.1671,
            "80": 0.16726,
            "81": 0.16712,
            "82": 0.16866,
            "83": 0.16717,
            "84": 0.16749,
            "85": 0.16759,
            "86": 0.16853,
            "87": 0.16786,
            "88": 0.16717,
            "89": 0.16661,
            "90": 0.16719,
            "91": 0.17397,
            "92": 0.17387,
            "93": 0.17474,
            "94": 0.17341,
            "95": 0.17473,
            "96": 0.17386,
            "97": 0.17453,
            "98": 0.17503,
            "99": 0.17293,
            "100": 0.17243
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 2
  --position-embedding-type: rope
  --rotary-interleaved: true
  --no-rope-fusion: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: ckpt-resume


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.85004, "5": 10.86413, "10": 10.82533, "15": 10.81501, "20": 10.72113, "25": 10.53088, "30": 10.33843, "35": 10.24208, "40": 10.05219, "45": 9.76638, "50": 9.85497}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1683.0, "5": 1927.0, "10": 1648.0, "15": 2007.0, "20": 1833.0, "25": 1805.0, "30": 2032.0, "35": 2136.0, "40": 2234.0, "45": 2271.0, "50": 2398.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 886001664.0, "5": 886001664.0, "10": 886001664.0, "15": 886001664.0, "20": 886001664.0, "25": 886001664.0, "30": 886001664.0, "35": 886001664.0, "40": 886001664.0, "45": 886001664.0, "50": 886001664.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3212633088.0, "5": 3570001920.0, "10": 3570001920.0, "15": 3570001920.0, "20": 3570001920.0, "25": 3570001920.0, "30": 3570001920.0, "35": 3570001920.0, "40": 3570001920.0, "45": 3570001920.0, "50": 3570001920.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4.76404, "5": 0.14426, "10": 0.14503, "15": 0.14512, "20": 0.14395, "25": 0.14807, "30": 0.14833, "35": 0.1429, "40": 0.14205, "45": 0.14208, "50": 0.14172}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.8468, "5": 10.8657, "10": 10.82411, "15": 10.8128, "20": 10.72008, "25": 10.53151, "30": 10.33655, "35": 10.24133, "40": 10.05096, "45": 9.76804, "50": 9.85531}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1707.0, "5": 2121.0, "10": 1606.0, "15": 1959.0, "20": 1756.0, "25": 1848.0, "30": 2091.0, "35": 2089.0, "40": 2156.0, "45": 2137.0, "50": 2317.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 888098304.0, "5": 888098304.0, "10": 888098304.0, "15": 888098304.0, "20": 888098304.0, "25": 888098304.0, "30": 888098304.0, "35": 888098304.0, "40": 888098304.0, "45": 888098304.0, "50": 888098304.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3212632576.0, "5": 3572098560.0, "10": 3572098560.0, "15": 3572098560.0, "20": 3572098560.0, "25": 3572098560.0, "30": 3572098560.0, "35": 3572098560.0, "40": 3572098560.0, "45": 3572098560.0, "50": 3572098560.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 12.77598, "5": 0.14261, "10": 0.14233, "15": 0.14134, "20": 0.14113, "25": 0.141, "30": 0.1403, "35": 0.1406, "40": 0.1401, "45": 0.13985, "50": 0.14004}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 2
  --position-embedding-type: rope
  --no-ckpt-fully-parallel-save: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.84823, "5": 10.86283, "10": 10.82271, "15": 10.81437, "20": 10.7198, "25": 10.52962, "30": 10.33756, "35": 10.24165, "40": 10.05137, "45": 9.76609, "50": 9.85463}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1624.0, "5": 1937.0, "10": 1548.0, "15": 2014.0, "20": 1696.0, "25": 1735.0, "30": 2058.0, "35": 2025.0, "40": 2155.0, "45": 2172.0, "50": 2330.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 890195968.0, "5": 890195968.0, "10": 890195968.0, "15": 890195968.0, "20": 890195968.0, "25": 890195968.0, "30": 890195968.0, "35": 890195968.0, "40": 890195968.0, "45": 890195968.0, "50": 890195968.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3215778816.0, "5": 3577341952.0, "10": 3577341952.0, "15": 3577341952.0, "20": 3577341952.0, "25": 3577341952.0, "30": 3577341952.0, "35": 3577341952.0, "40": 3577341952.0, "45": 3577341952.0, "50": 3577341952.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4.46736, "5": 0.16328, "10": 0.16563, "15": 0.16396, "20": 0.16536, "25": 0.16256, "30": 0.16286, "35": 0.1628, "40": 0.16326, "45": 0.16381, "50": 0.16538}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.84474, "5": 10.86418, "10": 10.82155, "15": 10.81195, "20": 10.71872, "25": 10.53036, "30": 10.3358, "35": 10.24082, "40": 10.05008, "45": 9.76762, "50": 9.85505}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1776.0, "5": 2128.0, "10": 1615.0, "15": 2021.0, "20": 1775.0, "25": 1916.0, "30": 2029.0, "35": 2107.0, "40": 2174.0, "45": 2110.0, "50": 2363.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 888098304.0, "5": 888098304.0, "10": 888098304.0, "15": 888098304.0, "20": 888098304.0, "25": 888098304.0, "30": 888098304.0, "35": 888098304.0, "40": 888098304.0, "45": 888098304.0, "50": 888098304.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3215778304.0, "5": 3575244288.0, "10": 3575244288.0, "15": 3575244288.0, "20": 3575244288.0, "25": 3575244288.0, "30": 3575244288.0, "35": 3575244288.0, "40": 3575244288.0, "45": 3575244288.0, "50": 3575244288.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.5247, "5": 0.16258, "10": 0.15981, "15": 0.15978, "20": 0.15979, "25": 0.15916, "30": 0.15921, "35": 0.15937, "40": 0.15879, "45": 0.15964, "50": 0.15865}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_rope_embeddings_interleaved_no_fusion/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 2
  --position-embedding-type: rope
  --rotary-interleaved: true
  --no-rope-fusion: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-optim-fully-reshardable: true
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.79436, "5": 10.84798, "10": 10.7703, "15": 10.78948, "20": 10.68039, "25": 10.506, "30": 10.33228, "35": 10.2547, "40": 10.05593, "45": 9.80637, "50": 9.89113}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1637.0, "5": 1785.0, "10": 1384.0, "15": 1933.0, "20": 1624.0, "25": 1589.0, "30": 1959.0, "35": 1973.0, "40": 2248.0, "45": 2173.0, "50": 2448.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 718931456.0, "5": 718931456.0, "10": 718931456.0, "15": 718931456.0, "20": 718931456.0, "25": 718931456.0, "30": 718931456.0, "35": 718931456.0, "40": 718931456.0, "45": 718931456.0, "50": 718931456.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2399714816.0, "5": 2685510144.0, "10": 2685510144.0, "15": 2685510144.0, "20": 2685510144.0, "25": 2685510144.0, "30": 2685510144.0, "35": 2685510144.0, "40": 2685510144.0, "45": 2685510144.0, "50": 2685510144.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3.76573, "5": 0.16293, "10": 0.16166, "15": 0.1618, "20": 0.16139, "25": 0.16605, "30": 0.162, "35": 0.16243, "40": 0.16141, "45": 0.16279, "50": 0.16404}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.79205, "5": 10.84695, "10": 10.77106, "15": 10.79093, "20": 10.68042, "25": 10.50715, "30": 10.33325, "35": 10.25545, "40": 10.05544, "45": 9.80575, "50": 9.89082}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1580.0, "5": 1901.0, "10": 1346.0, "15": 1926.0, "20": 1643.0, "25": 1683.0, "30": 1867.0, "35": 2020.0, "40": 2252.0, "45": 2243.0, "50": 2459.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 714736640.0, "5": 714736640.0, "10": 714736640.0, "15": 714736640.0, "20": 714736640.0, "25": 714736640.0, "30": 714736640.0, "35": 714736640.0, "40": 714736640.0, "45": 714736640.0, "50": 714736640.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2399714304.0, "5": 2681315328.0, "10": 2681315328.0, "15": 2681315328.0, "20": 2681315328.0, "25": 2681315328.0, "30": 2681315328.0, "35": 2681315328.0, "40": 2681315328.0, "45": 2681315328.0, "50": 2681315328.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 9.98161, "5": 0.16315, "10": 0.16378, "15": 0.16394, "20": 0.16401, "25": 0.16413, "30": 0.16367, "35": 0.16218, "40": 0.16233, "45": 0.1616, "50": 0.16157}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_disable_bias_linear/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --disable-bias-linear: true
  --async-save: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.73373, "5": 10.79214, "10": 10.70712, "15": 10.75766, "20": 10.68748, "25": 10.54799, "30": 10.45547, "35": 10.384, "40": 10.24123, "45": 9.98104, "50": 10.06474, "55": 9.98947, "60": 9.65874, "65": 9.07201, "70": 9.82077, "75": 9.54875, "80": 9.50983, "85": 9.70733, "90": 9.87781, "95": 9.60038, "100": 9.49194}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2484.0, "5": 2817.0, "10": 2416.0, "15": 2627.0, "20": 2532.0, "25": 2575.0, "30": 2636.0, "35": 2660.0, "40": 2535.0, "45": 2405.0, "50": 2544.0, "55": 2585.0, "60": 2384.0, "65": 2706.0, "70": 3124.0, "75": 2679.0, "80": 3012.0, "85": 3229.0, "90": 3433.0, "95": 3282.0, "100": 2843.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 715191808.0, "5": 715191808.0, "10": 715191808.0, "15": 715191808.0, "20": 715191808.0, "25": 715191808.0, "30": 715191808.0, "35": 715191808.0, "40": 715191808.0, "45": 715191808.0, "50": 715191808.0, "55": 715191808.0, "60": 715191808.0, "65": 715191808.0, "70": 715191808.0, "75": 715191808.0, "80": 715191808.0, "85": 715191808.0, "90": 715191808.0, "95": 715191808.0, "100": 715191808.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1910556672.0, "5": 2193004032.0, "10": 2193004032.0, "15": 2193004032.0, "20": 2193004032.0, "25": 2193004032.0, "30": 2193004032.0, "35": 2193004032.0, "40": 2193004032.0, "45": 2193004032.0, "50": 2193004032.0, "55": 2193004032.0, "60": 2193004032.0, "65": 2193004032.0, "70": 2193004032.0, "75": 2193004032.0, "80": 2193004032.0, "85": 2193004032.0, "90": 2193004032.0, "95": 2193004032.0, "100": 2193004032.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.4316, "5": 0.15744, "10": 0.15854, "15": 0.1591, "20": 0.15777, "25": 0.15679, "30": 0.1548, "35": 0.15535, "40": 0.15523, "45": 0.1551, "50": 0.15456, "55": 0.15439, "60": 0.15403, "65": 0.15425, "70": 0.15654, "75": 0.15557, "80": 0.1558, "85": 0.1559, "90": 0.15599, "95": 0.15655, "100": 0.15604}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.76985, "5": 10.81927, "10": 10.73562, "15": 10.8218, "20": 10.74747, "25": 10.58002, "30": 10.46947, "35": 10.39558, "40": 10.23019, "45": 10.00787, "50": 10.04737, "55": 10.00567, "60": 9.6777, "65": 9.09492, "70": 9.85294, "75": 9.55335, "80": 9.52073, "85": 9.72189, "90": 9.88102, "95": 9.60554, "100": 9.49925}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2680.0, "5": 2971.0, "10": 2508.0, "15": 2679.0, "20": 2461.0, "25": 2640.0, "30": 2590.0, "35": 2480.0, "40": 2570.0, "45": 2397.0, "50": 2740.0, "55": 2559.0, "60": 2289.0, "65": 2687.0, "70": 3004.0, "75": 2647.0, "80": 3054.0, "85": 3350.0, "90": 3368.0, "95": 3138.0, "100": 2490.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 745731584.0, "5": 745731584.0, "10": 745731584.0, "15": 745731584.0, "20": 745731584.0, "25": 745731584.0, "30": 745731584.0, "35": 745731584.0, "40": 745731584.0, "45": 745731584.0, "50": 745731584.0, "55": 745731584.0, "60": 745731584.0, "65": 745731584.0, "70": 745731584.0, "75": 745731584.0, "80": 745731584.0, "85": 745731584.0, "90": 745731584.0, "95": 745731584.0, "100": 745731584.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1928906752.0, "5": 2210568192.0, "10": 2210568192.0, "15": 2210568192.0, "20": 2210568192.0, "25": 2210568192.0, "30": 2210568192.0, "35": 2210568192.0, "40": 2210568192.0, "45": 2210568192.0, "50": 2210568192.0, "55": 2210568192.0, "60": 2210568192.0, "65": 2210568192.0, "70": 2210568192.0, "75": 2210568192.0, "80": 2210568192.0, "85": 2210568192.0, "90": 2210568192.0, "95": 2210568192.0, "100": 2210568192.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 13.95823, "5": 0.09453, "10": 0.09582, "15": 0.09396, "20": 0.09576, "25": 0.09433, "30": 0.09506, "35": 0.09683, "40": 0.09399, "45": 0.09218, "50": 0.09312, "55": 0.09545, "60": 0.09548, "65": 0.09342, "70": 0.09527, "75": 0.09211, "80": 0.09723, "85": 0.09778, "90": 0.09299, "95": 0.09226, "100": 0.09188}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.74049, "5": 10.79201, "10": 10.71088, "15": 10.76031, "20": 10.6891, "25": 10.54338, "30": 10.4542, "35": 10.38324, "40": 10.24296, "45": 9.9834, "50": 10.06865, "55": 9.98923, "60": 9.66705, "65": 9.07241, "70": 9.81879, "75": 9.55274, "80": 9.51057, "85": 9.70756, "90": 9.87997, "95": 9.60068, "100": 9.49262}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2527.0, "5": 2875.0, "10": 2475.0, "15": 2508.0, "20": 2650.0, "25": 2392.0, "30": 2484.0, "35": 2573.0, "40": 2559.0, "45": 2519.0, "50": 2500.0, "55": 2430.0, "60": 2191.0, "65": 2646.0, "70": 3203.0, "75": 2515.0, "80": 3140.0, "85": 3195.0, "90": 3365.0, "95": 3240.0, "100": 2507.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 715322368.0, "5": 715322368.0, "10": 715322368.0, "15": 715322368.0, "20": 715322368.0, "25": 715322368.0, "30": 715322368.0, "35": 715322368.0, "40": 715322368.0, "45": 715322368.0, "50": 715322368.0, "55": 715322368.0, "60": 715322368.0, "65": 715322368.0, "70": 715322368.0, "75": 715322368.0, "80": 715322368.0, "85": 715322368.0, "90": 715322368.0, "95": 715322368.0, "100": 715322368.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2402991104.0, "5": 2683341824.0, "10": 2683341824.0, "15": 2683341824.0, "20": 2683341824.0, "25": 2683341824.0, "30": 2683341824.0, "35": 2683341824.0, "40": 2683341824.0, "45": 2683341824.0, "50": 2683341824.0, "55": 2683341824.0, "60": 2683341824.0, "65": 2683341824.0, "70": 2683341824.0, "75": 2683341824.0, "80": 2683341824.0, "85": 2683341824.0, "90": 2683341824.0, "95": 2683341824.0, "100": 2683341824.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 17.72945, "5": 0.17021, "10": 0.17171, "15": 0.16943, "20": 0.16974, "25": 0.16893, "30": 0.16891, "35": 0.16944, "40": 0.16851, "45": 0.16916, "50": 0.16874, "55": 0.17286, "60": 0.17328, "65": 0.17679, "70": 0.17323, "75": 0.1706, "80": 0.17149, "85": 0.17726, "90": 0.171, "95": 0.17074, "100": 0.17122}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_frozen_resume_torch_dist_swiglu/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --swiglu: true
  --ckpt-fully-parallel-load: true
  --async-save: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-optim-fully-reshardable: true
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: frozen-resume


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.79436, "5": 10.84798, "10": 10.7703, "15": 10.78948, "20": 10.68039, "25": 10.506, "30": 10.33228, "35": 10.2547, "40": 10.05593, "45": 9.80637, "50": 9.89113}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1637.0, "5": 1785.0, "10": 1384.0, "15": 1933.0, "20": 1624.0, "25": 1589.0, "30": 1959.0, "35": 1973.0, "40": 2248.0, "45": 2173.0, "50": 2448.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 718931456.0, "5": 718931456.0, "10": 718931456.0, "15": 718931456.0, "20": 718931456.0, "25": 718931456.0, "30": 718931456.0, "35": 718931456.0, "40": 718931456.0, "45": 718931456.0, "50": 718931456.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2399714816.0, "5": 2685510144.0, "10": 2685510144.0, "15": 2685510144.0, "20": 2685510144.0, "25": 2685510144.0, "30": 2685510144.0, "35": 2685510144.0, "40": 2685510144.0, "45": 2685510144.0, "50": 2685510144.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3.8439, "5": 0.16742, "10": 0.16726, "15": 0.1664, "20": 0.16549, "25": 0.16566, "30": 0.16578, "35": 0.1663, "40": 0.16678, "45": 0.16695, "50": 0.16615}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.79205, "5": 10.84695, "10": 10.77106, "15": 10.79093, "20": 10.68042, "25": 10.50715, "30": 10.33325, "35": 10.25545, "40": 10.05544, "45": 9.80575, "50": 9.89082}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1580.0, "5": 1901.0, "10": 1346.0, "15": 1926.0, "20": 1643.0, "25": 1683.0, "30": 1867.0, "35": 2020.0, "40": 2252.0, "45": 2243.0, "50": 2459.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 714736640.0, "5": 714736640.0, "10": 714736640.0, "15": 714736640.0, "20": 714736640.0, "25": 714736640.0, "30": 714736640.0, "35": 714736640.0, "40": 714736640.0, "45": 714736640.0, "50": 714736640.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2399714304.0, "5": 2681315328.0, "10": 2681315328.0, "15": 2681315328.0, "20": 2681315328.0, "25": 2681315328.0, "30": 2681315328.0, "35": 2681315328.0, "40": 2681315328.0, "45": 2681315328.0, "50": 2681315328.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 9.96829, "5": 0.16393, "10": 0.16735, "15": 0.16307, "20": 0.1644, "25": 0.16303, "30": 0.16306, "35": 0.16242, "40": 0.163, "45": 0.16274, "50": 0.16128}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_persistent_ckpt_disable_bias_linear/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --disable-bias-linear: true
  --async-save: true
  --use-persistent-ckpt-worker: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.79332, "5": 10.84763, "10": 10.77007, "15": 10.79054, "20": 10.68058, "25": 10.50697, "30": 10.33257, "35": 10.25465, "40": 10.05603, "45": 9.80619, "50": 9.89126, "55": 9.87124, "60": 9.48507, "65": 8.93913, "70": 9.72196, "75": 9.40867, "80": 9.39746, "85": 9.60793, "90": 9.81041, "95": 9.51149, "100": 9.39727}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1624.0, "5": 1824.0, "10": 1414.0, "15": 1902.0, "20": 1642.0, "25": 1593.0, "30": 2001.0, "35": 1904.0, "40": 2335.0, "45": 2140.0, "50": 2374.0, "55": 2220.0, "60": 2397.0, "65": 2591.0, "70": 3112.0, "75": 2649.0, "80": 3262.0, "85": 3309.0, "90": 3031.0, "95": 3321.0, "100": 3318.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 718931456.0, "5": 718931456.0, "10": 718931456.0, "15": 718931456.0, "20": 718931456.0, "25": 718931456.0, "30": 718931456.0, "35": 718931456.0, "40": 718931456.0, "45": 718931456.0, "50": 718931456.0, "55": 718931456.0, "60": 718931456.0, "65": 718931456.0, "70": 718931456.0, "75": 718931456.0, "80": 718931456.0, "85": 718931456.0, "90": 718931456.0, "95": 718931456.0, "100": 718931456.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1910424576.0, "5": 2195171328.0, "10": 2195171328.0, "15": 2195171328.0, "20": 2195171328.0, "25": 2195171328.0, "30": 2195171328.0, "35": 2195171328.0, "40": 2195171328.0, "45": 2195171328.0, "50": 2195171328.0, "55": 2195171328.0, "60": 2195171328.0, "65": 2195171328.0, "70": 2195171328.0, "75": 2195171328.0, "80": 2195171328.0, "85": 2195171328.0, "90": 2195171328.0, "95": 2195171328.0, "100": 2195171328.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 5.43812, "5": 0.18449, "10": 0.18478, "15": 0.18439, "20": 0.18341, "25": 0.18671, "30": 0.18719, "35": 0.18795, "40": 0.18744, "45": 0.1864, "50": 0.18732, "55": 0.18543, "60": 0.18657, "65": 0.18643, "70": 0.18691, "75": 0.18609, "80": 0.18656, "85": 0.18641, "90": 0.1851, "95": 0.18604, "100": 0.18559}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.83582,
            "2": 10.83571,
            "3": 10.83524,
            "4": 10.79949,
            "5": 10.84909,
            "6": 10.86567,
            "7": 10.82786,
            "8": 10.8363,
            "9": 10.83997,
            "10": 10.79867,
            "11": 10.86763,
            "12": 10.84992,
            "13": 10.85912,
            "14": 10.8687,
            "15": 10.80171,
            "16": 10.79189,
            "17": 10.77351,
            "18": 10.78742,
            "19": 10.78986,
            "20": 10.68447,
            "21": 10.67839,
            "22": 10.52572,
            "23": 10.70729,
            "24": 10.56549,
            "25": 10.51603,
            "26": 10.58018,
            "27": 10.58977,
            "28": 10.54551,
            "29": 10.57724,
            "30": 10.34049,
            "31": 10.07056,
            "32": 10.44508,
            "33": 10.44289,
            "34": 10.19394,
            "35": 10.2426,
            "36": 10.19235,
            "37": 10.32972,
            "38": 10.16551,
            "39": 10.38729,
            "40": 10.05174,
            "41": 10.12185,
            "42": 10.19258,
            "43": 9.80694,
            "44": 9.92474,
            "45": 9.80636,
            "46": 9.80144,
            "47": 10.12106,
            "48": 9.83126,
            "49": 9.50406,
            "50": 9.87955,
            "51": 9.83807,
            "52": 9.72057,
            "53": 10.05682,
            "54": 9.95031,
            "55": 9.88332,
            "56": 9.60428,
            "57": 9.45518,
            "58": 9.81923,
            "59": 9.58266,
            "60": 9.48844,
            "61": 9.68574,
            "62": 9.9778,
            "63": 9.36765,
            "64": 9.75912,
            "65": 8.93762,
            "66": 9.6926,
            "67": 9.36619,
            "68": 9.78309,
            "69": 9.79315,
            "70": 9.72695,
            "71": 9.62875,
            "72": 9.58006,
            "73": 9.487,
            "74": 8.92045,
            "75": 9.41127,
            "76": 9.0757,
            "77": 10.05849,
            "78": 9.72185,
            "79": 9.37321,
            "80": 9.40082,
            "81": 9.47926,
            "82": 9.69753,
            "83": 9.31033,
            "84": 9.41773,
            "85": 9.61195,
            "86": 9.07158,
            "87": 9.59659,
            "88": 9.74711,
            "89": 9.59669,
            "90": 9.82914,
            "91": 9.33728,
            "92": 9.35642,
            "93": 9.08554,
            "94": 8.82803,
            "95": 9.52843,
            "96": 9.52607,
            "97": 9.30634,
            "98": 9.66809,
            "99": 8.89459,
            "100": 9.40668
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1501.0,
            "2": 1576.0,
            "3": 1604.0,
            "4": 1778.0,
            "5": 1880.0,
            "6": 1803.0,
            "7": 1789.0,
            "8": 1669.0,
            "9": 1811.0,
            "10": 1392.0,
            "11": 1835.0,
            "12": 1663.0,
            "13": 1855.0,
            "14": 1841.0,
            "15": 1874.0,
            "16": 1844.0,
            "17": 1738.0,
            "18": 1724.0,
            "19": 1742.0,
            "20": 1612.0,
            "21": 1751.0,
            "22": 1713.0,
            "23": 1964.0,
            "24": 1632.0,
            "25": 1570.0,
            "26": 1675.0,
            "27": 1752.0,
            "28": 2026.0,
            "29": 1938.0,
            "30": 1848.0,
            "31": 1557.0,
            "32": 1926.0,
            "33": 2052.0,
            "34": 1880.0,
            "35": 2022.0,
            "36": 1926.0,
            "37": 2344.0,
            "38": 2202.0,
            "39": 2285.0,
            "40": 2225.0,
            "41": 2328.0,
            "42": 2200.0,
            "43": 1984.0,
            "44": 2142.0,
            "45": 2173.0,
            "46": 2308.0,
            "47": 2592.0,
            "48": 2460.0,
            "49": 2242.0,
            "50": 2383.0,
            "51": 2489.0,
            "52": 2497.0,
            "53": 2875.0,
            "54": 2654.0,
            "55": 2317.0,
            "56": 2599.0,
            "57": 2299.0,
            "58": 2830.0,
            "59": 2784.0,
            "60": 2437.0,
            "61": 2916.0,
            "62": 2599.0,
            "63": 2388.0,
            "64": 2785.0,
            "65": 2677.0,
            "66": 2972.0,
            "67": 2797.0,
            "68": 2752.0,
            "69": 3049.0,
            "70": 3087.0,
            "71": 2952.0,
            "72": 2411.0,
            "73": 3099.0,
            "74": 1975.0,
            "75": 2614.0,
            "76": 2941.0,
            "77": 3166.0,
            "78": 3123.0,
            "79": 3085.0,
            "80": 3198.0,
            "81": 3350.0,
            "82": 3322.0,
            "83": 2858.0,
            "84": 3125.0,
            "85": 3194.0,
            "86": 2777.0,
            "87": 3602.0,
            "88": 3006.0,
            "89": 3267.0,
            "90": 3133.0,
            "91": 2753.0,
            "92": 3113.0,
            "93": 2714.0,
            "94": 3364.0,
            "95": 3273.0,
            "96": 3202.0,
            "97": 3124.0,
            "98": 3716.0,
            "99": 3121.0,
            "100": 3131.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 745146880.0,
            "2": 745146880.0,
            "3": 745146880.0,
            "4": 745146880.0,
            "5": 745146880.0,
            "6": 745146880.0,
            "7": 745146880.0,
            "8": 745146880.0,
            "9": 745146880.0,
            "10": 745146880.0,
            "11": 745146880.0,
            "12": 745146880.0,
            "13": 745146880.0,
            "14": 745146880.0,
            "15": 745146880.0,
            "16": 745146880.0,
            "17": 745146880.0,
            "18": 745146880.0,
            "19": 745146880.0,
            "20": 745146880.0,
            "21": 745146880.0,
            "22": 745146880.0,
            "23": 745146880.0,
            "24": 745146880.0,
            "25": 745146880.0,
            "26": 745146880.0,
            "27": 745146880.0,
            "28": 745146880.0,
            "29": 745146880.0,
            "30": 745146880.0,
            "31": 745146880.0,
            "32": 745146880.0,
            "33": 745146880.0,
            "34": 745146880.0,
            "35": 745146880.0,
            "36": 745146880.0,
            "37": 745146880.0,
            "38": 745146880.0,
            "39": 745146880.0,
            "40": 745146880.0,
            "41": 745146880.0,
            "42": 745146880.0,
            "43": 745146880.0,
            "44": 745146880.0,
            "45": 745146880.0,
            "46": 745146880.0,
            "47": 745146880.0,
            "48": 745146880.0,
            "49": 745146880.0,
            "50": 745146880.0,
            "51": 745146880.0,
            "52": 745146880.0,
            "53": 745146880.0,
            "54": 745146880.0,
            "55": 745146880.0,
            "56": 745146880.0,
            "57": 745146880.0,
            "58": 745146880.0,
            "59": 745146880.0,
            "60": 745146880.0,
            "61": 745146880.0,
            "62": 745146880.0,
            "63": 745146880.0,
            "64": 745146880.0,
            "65": 745146880.0,
            "66": 745146880.0,
            "67": 745146880.0,
            "68": 745146880.0,
            "69": 745146880.0,
            "70": 745146880.0,
            "71": 745146880.0,
            "72": 745146880.0,
            "73": 745146880.0,
            "74": 745146880.0,
            "75": 745146880.0,
            "76": 745146880.0,
            "77": 745146880.0,
            "78": 745146880.0,
            "79": 745146880.0,
            "80": 745146880.0,
            "81": 745146880.0,
            "82": 745146880.0,
            "83": 745146880.0,
            "84": 745146880.0,
            "85": 745146880.0,
            "86": 745146880.0,
            "87": 745146880.0,
            "88": 745146880.0,
            "89": 745146880.0,
            "90": 745146880.0,
            "91": 745146880.0,
            "92": 745146880.0,
            "93": 745146880.0,
            "94": 745146880.0,
            "95": 745146880.0,
            "96": 745146880.0,
            "97": 745146880.0,
            "98": 745146880.0,
            "99": 745146880.0,
            "100": 745146880.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1938737152.0,
            "2": 2222434304.0,
            "3": 2222434304.0,
            "4": 2222434304.0,
            "5": 2222434304.0,
            "6": 2222434304.0,
            "7": 2222434304.0,
            "8": 2222434304.0,
            "9": 2222434304.0,
            "10": 2222434304.0,
            "11": 2222434304.0,
            "12": 2222434304.0,
            "13": 2222434304.0,
            "14": 2222434304.0,
            "15": 2222434304.0,
            "16": 2222434304.0,
            "17": 2222434304.0,
            "18": 2222434304.0,
            "19": 2222434304.0,
            "20": 2222434304.0,
            "21": 2222434304.0,
            "22": 2222434304.0,
            "23": 2222434304.0,
            "24": 2222434304.0,
            "25": 2222434304.0,
            "26": 2222434304.0,
            "27": 2222434304.0,
            "28": 2222434304.0,
            "29": 2222434304.0,
            "30": 2222434304.0,
            "31": 2222434304.0,
            "32": 2222434304.0,
            "33": 2222434304.0,
            "34": 2222434304.0,
            "35": 2222434304.0,
            "36": 2222434304.0,
            "37": 2222434304.0,
            "38": 2222434304.0,
            "39": 2222434304.0,
            "40": 2222434304.0,
            "41": 2222434304.0,
            "42": 2222434304.0,
            "43": 2222434304.0,
            "44": 2222434304.0,
            "45": 2222434304.0,
            "46": 2222434304.0,
            "47": 2222434304.0,
            "48": 2222434304.0,
            "49": 2222434304.0,
            "50": 2222434304.0,
            "51": 2222434304.0,
            "52": 2222434304.0,
            "53": 2222434304.0,
            "54": 2222434304.0,
            "55": 2222434304.0,
            "56": 2222434304.0,
            "57": 2222434304.0,
            "58": 2222434304.0,
            "59": 2222434304.0,
            "60": 2222434304.0,
            "61": 2222434304.0,
            "62": 2222434304.0,
            "63": 2222434304.0,
            "64": 2222434304.0,
            "65": 2222434304.0,
            "66": 2222434304.0,
            "67": 2222434304.0,
            "68": 2222434304.0,
            "69": 2222434304.0,
            "70": 2222434304.0,
            "71": 2222434304.0,
            "72": 2222434304.0,
            "73": 2222434304.0,
            "74": 2222434304.0,
            "75": 2222434304.0,
            "76": 2222434304.0,
            "77": 2222434304.0,
            "78": 2222434304.0,
            "79": 2222434304.0,
            "80": 2222434304.0,
            "81": 2222434304.0,
            "82": 2222434304.0,
            "83": 2222434304.0,
            "84": 2222434304.0,
            "85": 2222434304.0,
            "86": 2222434304.0,
            "87": 2222434304.0,
            "88": 2222434304.0,
            "89": 2222434304.0,
            "90": 2222434304.0,
            "91": 2222434304.0,
            "92": 2222434304.0,
            "93": 2222434304.0,
            "94": 2222434304.0,
            "95": 2222434304.0,
            "96": 2222434304.0,
            "97": 2222434304.0,
            "98": 2222434304.0,
            "99": 2222434304.0,
            "100": 2222434304.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 5.86605,
            "3": 0.13527,
            "4": 0.12296,
            "5": 0.12443,
            "6": 0.12222,
            "7": 0.12431,
            "8": 0.12301,
            "9": 0.12262,
            "10": 0.12615,
            "11": 0.12479,
            "12": 0.12612,
            "13": 0.12396,
            "14": 0.12727,
            "15": 0.1273,
            "16": 0.12644,
            "17": 0.1273,
            "18": 0.12789,
            "19": 0.12832,
            "20": 0.12576,
            "21": 0.12724,
            "22": 0.12793,
            "23": 0.1258,
            "24": 0.12564,
            "25": 0.12595,
            "26": 0.12455,
            "27": 0.12657,
            "28": 0.12588,
            "29": 0.12712,
            "30": 0.12691,
            "31": 0.126,
            "32": 0.12515,
            "33": 0.12441,
            "34": 0.12458,
            "35": 0.12577,
            "36": 0.12386,
            "37": 0.12673,
            "38": 0.1247,
            "39": 0.12614,
            "40": 0.12388,
            "41": 0.12934,
            "42": 0.12674,
            "43": 0.12687,
            "44": 0.1272,
            "45": 0.1238,
            "46": 0.12514,
            "47": 0.12467,
            "48": 0.12579,
            "49": 0.12624,
            "50": 0.12487,
            "51": 0.16156,
            "52": 0.1308,
            "53": 0.12721,
            "54": 0.12451,
            "55": 0.12517,
            "56": 0.12436,
            "57": 0.12767,
            "58": 0.12444,
            "59": 0.12475,
            "60": 0.12331,
            "61": 0.12518,
            "62": 0.12457,
            "63": 0.12132,
            "64": 0.12553,
            "65": 0.12416,
            "66": 0.12219,
            "67": 0.12402,
            "68": 0.12407,
            "69": 0.12423,
            "70": 0.12433,
            "71": 0.12449,
            "72": 0.12308,
            "73": 0.12596,
            "74": 0.12432,
            "75": 0.12395,
            "76": 0.12485,
            "77": 0.12332,
            "78": 0.12531,
            "79": 0.1263,
            "80": 0.12438,
            "81": 0.1277,
            "82": 0.12699,
            "83": 0.12503,
            "84": 0.12566,
            "85": 0.12342,
            "86": 0.12385,
            "87": 0.12328,
            "88": 0.12366,
            "89": 0.12501,
            "90": 0.1245,
            "91": 0.12538,
            "92": 0.12418,
            "93": 0.12242,
            "94": 0.12316,
            "95": 0.12244,
            "96": 0.12316,
            "97": 0.12448,
            "98": 0.12205,
            "99": 0.12459,
            "100": 0.12444
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_gb200_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.83807,
            "52": 9.72058,
            "53": 10.0568,
            "54": 9.95032,
            "55": 9.88328,
            "56": 9.60431,
            "57": 9.45518,
            "58": 9.81927,
            "59": 9.58262,
            "60": 9.48844,
            "61": 9.68577,
            "62": 9.97779,
            "63": 9.36765,
            "64": 9.75913,
            "65": 8.9376,
            "66": 9.69257,
            "67": 9.36621,
            "68": 9.78303,
            "69": 9.79318,
            "70": 9.72699,
            "71": 9.62875,
            "72": 9.58004,
            "73": 9.487,
            "74": 8.92041,
            "75": 9.41128,
            "76": 9.07564,
            "77": 10.05848,
            "78": 9.72184,
            "79": 9.3732,
            "80": 9.40079,
            "81": 9.4792,
            "82": 9.69754,
            "83": 9.31037,
            "84": 9.41777,
            "85": 9.61194,
            "86": 9.07155,
            "87": 9.59661,
            "88": 9.74709,
            "89": 9.59667,
            "90": 9.82915,
            "91": 9.33725,
            "92": 9.3564,
            "93": 9.08552,
            "94": 8.82807,
            "95": 9.52842,
            "96": 9.52611,
            "97": 9.30632,
            "98": 9.66808,
            "99": 8.89461,
            "100": 9.40666
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2546.0,
            "52": 2590.0,
            "53": 2879.0,
            "54": 2697.0,
            "55": 2316.0,
            "56": 2549.0,
            "57": 2261.0,
            "58": 2904.0,
            "59": 2740.0,
            "60": 2434.0,
            "61": 2801.0,
            "62": 2663.0,
            "63": 2502.0,
            "64": 2948.0,
            "65": 2644.0,
            "66": 2961.0,
            "67": 2813.0,
            "68": 2686.0,
            "69": 2912.0,
            "70": 3096.0,
            "71": 2854.0,
            "72": 2454.0,
            "73": 3081.0,
            "74": 1933.0,
            "75": 2465.0,
            "76": 3012.0,
            "77": 3163.0,
            "78": 2997.0,
            "79": 3089.0,
            "80": 3187.0,
            "81": 3500.0,
            "82": 3339.0,
            "83": 2705.0,
            "84": 3205.0,
            "85": 3033.0,
            "86": 2818.0,
            "87": 3671.0,
            "88": 3190.0,
            "89": 3336.0,
            "90": 3320.0,
            "91": 2698.0,
            "92": 3072.0,
            "93": 2750.0,
            "94": 3397.0,
            "95": 3317.0,
            "96": 3290.0,
            "97": 3116.0,
            "98": 3732.0,
            "99": 3049.0,
            "100": 2974.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 746195456.0,
            "52": 746195456.0,
            "53": 746195456.0,
            "54": 746195456.0,
            "55": 746195456.0,
            "56": 746195456.0,
            "57": 746195456.0,
            "58": 746195456.0,
            "59": 746195456.0,
            "60": 746195456.0,
            "61": 746195456.0,
            "62": 746195456.0,
            "63": 746195456.0,
            "64": 746195456.0,
            "65": 746195456.0,
            "66": 746195456.0,
            "67": 746195456.0,
            "68": 746195456.0,
            "69": 746195456.0,
            "70": 746195456.0,
            "71": 746195456.0,
            "72": 746195456.0,
            "73": 746195456.0,
            "74": 746195456.0,
            "75": 746195456.0,
            "76": 746195456.0,
            "77": 746195456.0,
            "78": 746195456.0,
            "79": 746195456.0,
            "80": 746195456.0,
            "81": 746195456.0,
            "82": 746195456.0,
            "83": 746195456.0,
            "84": 746195456.0,
            "85": 746195456.0,
            "86": 746195456.0,
            "87": 746195456.0,
            "88": 746195456.0,
            "89": 746195456.0,
            "90": 746195456.0,
            "91": 746195456.0,
            "92": 746195456.0,
            "93": 746195456.0,
            "94": 746195456.0,
            "95": 746195456.0,
            "96": 746195456.0,
            "97": 746195456.0,
            "98": 746195456.0,
            "99": 746195456.0,
            "100": 746195456.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2223482880.0,
            "52": 2223483904.0,
            "53": 2223483904.0,
            "54": 2223483904.0,
            "55": 2223483904.0,
            "56": 2223483904.0,
            "57": 2223483904.0,
            "58": 2223483904.0,
            "59": 2223483904.0,
            "60": 2223483904.0,
            "61": 2223483904.0,
            "62": 2223483904.0,
            "63": 2223483904.0,
            "64": 2223483904.0,
            "65": 2223483904.0,
            "66": 2223483904.0,
            "67": 2223483904.0,
            "68": 2223483904.0,
            "69": 2223483904.0,
            "70": 2223483904.0,
            "71": 2223483904.0,
            "72": 2223483904.0,
            "73": 2223483904.0,
            "74": 2223483904.0,
            "75": 2223483904.0,
            "76": 2223483904.0,
            "77": 2223483904.0,
            "78": 2223483904.0,
            "79": 2223483904.0,
            "80": 2223483904.0,
            "81": 2223483904.0,
            "82": 2223483904.0,
            "83": 2223483904.0,
            "84": 2223483904.0,
            "85": 2223483904.0,
            "86": 2223483904.0,
            "87": 2223483904.0,
            "88": 2223483904.0,
            "89": 2223483904.0,
            "90": 2223483904.0,
            "91": 2223483904.0,
            "92": 2223483904.0,
            "93": 2223483904.0,
            "94": 2223483904.0,
            "95": 2223483904.0,
            "96": 2223483904.0,
            "97": 2223483904.0,
            "98": 2223483904.0,
            "99": 2223483904.0,
            "100": 2223483904.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": "nan",
            "52": 1.94458,
            "53": 0.13487,
            "54": 0.12133,
            "55": 0.12128,
            "56": 0.12059,
            "57": 0.11937,
            "58": 0.11813,
            "59": 0.11931,
            "60": 0.12225,
            "61": 0.1198,
            "62": 0.1197,
            "63": 0.12083,
            "64": 0.12132,
            "65": 0.12067,
            "66": 0.12047,
            "67": 0.12065,
            "68": 0.12005,
            "69": 0.12047,
            "70": 0.11977,
            "71": 0.1205,
            "72": 0.11909,
            "73": 0.11956,
            "74": 0.12277,
            "75": 0.11982,
            "76": 0.12087,
            "77": 0.12003,
            "78": 0.12188,
            "79": 0.12094,
            "80": 0.12076,
            "81": 0.12072,
            "82": 0.12053,
            "83": 0.11961,
            "84": 0.12306,
            "85": 0.12275,
            "86": 0.11989,
            "87": 0.11996,
            "88": 0.1294,
            "89": 0.12077,
            "90": 0.1204,
            "91": 0.12138,
            "92": 0.11998,
            "93": 0.12202,
            "94": 0.12092,
            "95": 0.11985,
            "96": 0.11995,
            "97": 0.12124,
            "98": 0.12243,
            "99": 0.12016,
            "100": 0.12049
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.85599,
            "2": 10.8648,
            "3": 10.87042,
            "4": 10.85288,
            "5": 10.88397,
            "6": 10.89184,
            "7": 10.86732,
            "8": 10.87057,
            "9": 10.87432,
            "10": 10.84185,
            "11": 10.87989,
            "12": 10.87417,
            "13": 10.87884,
            "14": 10.89184,
            "15": 10.82659,
            "16": 10.83027,
            "17": 10.80933,
            "18": 10.81431,
            "19": 10.8167,
            "20": 10.72165,
            "21": 10.70557,
            "22": 10.56881,
            "23": 10.72025,
            "24": 10.61194,
            "25": 10.55765,
            "26": 10.61149,
            "27": 10.62635,
            "28": 10.57155,
            "29": 10.58212,
            "30": 10.36267,
            "31": 10.11682,
            "32": 10.4682,
            "33": 10.45411,
            "34": 10.21121,
            "35": 10.27207,
            "36": 10.22246,
            "37": 10.34079,
            "38": 10.18964,
            "39": 10.40228,
            "40": 10.08758,
            "41": 10.13714,
            "42": 10.21175,
            "43": 9.82878,
            "44": 9.96255,
            "45": 9.82846,
            "46": 9.80952,
            "47": 10.13734,
            "48": 9.84349,
            "49": 9.52888,
            "50": 9.91046,
            "51": 9.85075,
            "52": 9.73181,
            "53": 10.06388,
            "54": 9.95432,
            "55": 9.87204,
            "56": 9.61823,
            "57": 9.47467,
            "58": 9.82802,
            "59": 9.57962,
            "60": 9.49074,
            "61": 9.68473,
            "62": 9.99245,
            "63": 9.38364,
            "64": 9.77766,
            "65": 8.94008,
            "66": 9.70099,
            "67": 9.3605,
            "68": 9.77766,
            "69": 9.78865,
            "70": 9.73813,
            "71": 9.61811,
            "72": 9.58068,
            "73": 9.4964,
            "74": 8.93812,
            "75": 9.42081,
            "76": 9.07416,
            "77": 10.06077,
            "78": 9.71952,
            "79": 9.37088,
            "80": 9.39874,
            "81": 9.47802,
            "82": 9.69299,
            "83": 9.30276,
            "84": 9.41548,
            "85": 9.60883,
            "86": 9.07461,
            "87": 9.58826,
            "88": 9.74392,
            "89": 9.5951,
            "90": 9.81217,
            "91": 9.33796,
            "92": 9.3534,
            "93": 9.07315,
            "94": 8.83127,
            "95": 9.51524,
            "96": 9.52183,
            "97": 9.31012,
            "98": 9.66532,
            "99": 8.88179,
            "100": 9.39375
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1640.0,
            "2": 1738.0,
            "3": 1638.0,
            "4": 1810.0,
            "5": 1755.0,
            "6": 1681.0,
            "7": 1781.0,
            "8": 1502.0,
            "9": 1817.0,
            "10": 1394.0,
            "11": 1927.0,
            "12": 1691.0,
            "13": 1901.0,
            "14": 1631.0,
            "15": 1765.0,
            "16": 1864.0,
            "17": 1704.0,
            "18": 1771.0,
            "19": 1817.0,
            "20": 1831.0,
            "21": 1813.0,
            "22": 1673.0,
            "23": 2005.0,
            "24": 1553.0,
            "25": 1577.0,
            "26": 1656.0,
            "27": 1734.0,
            "28": 1896.0,
            "29": 2051.0,
            "30": 1897.0,
            "31": 1452.0,
            "32": 1785.0,
            "33": 2061.0,
            "34": 1857.0,
            "35": 1920.0,
            "36": 1990.0,
            "37": 2191.0,
            "38": 2142.0,
            "39": 2215.0,
            "40": 2166.0,
            "41": 2154.0,
            "42": 2148.0,
            "43": 1881.0,
            "44": 2066.0,
            "45": 1952.0,
            "46": 2217.0,
            "47": 2513.0,
            "48": 2356.0,
            "49": 2294.0,
            "50": 2140.0,
            "51": 2509.0,
            "52": 2528.0,
            "53": 2851.0,
            "54": 2747.0,
            "55": 2333.0,
            "56": 2724.0,
            "57": 2315.0,
            "58": 2754.0,
            "59": 2774.0,
            "60": 2336.0,
            "61": 2912.0,
            "62": 2415.0,
            "63": 2341.0,
            "64": 2837.0,
            "65": 2661.0,
            "66": 3000.0,
            "67": 2779.0,
            "68": 2691.0,
            "69": 2793.0,
            "70": 3183.0,
            "71": 2962.0,
            "72": 2393.0,
            "73": 2997.0,
            "74": 1935.0,
            "75": 2463.0,
            "76": 3065.0,
            "77": 3184.0,
            "78": 3154.0,
            "79": 3127.0,
            "80": 3286.0,
            "81": 3386.0,
            "82": 3128.0,
            "83": 2608.0,
            "84": 3079.0,
            "85": 3260.0,
            "86": 2687.0,
            "87": 3591.0,
            "88": 3035.0,
            "89": 3165.0,
            "90": 3166.0,
            "91": 2690.0,
            "92": 2897.0,
            "93": 2630.0,
            "94": 3348.0,
            "95": 3349.0,
            "96": 3288.0,
            "97": 3055.0,
            "98": 3516.0,
            "99": 3035.0,
            "100": 3109.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 747244032.0,
            "2": 747244032.0,
            "3": 747244032.0,
            "4": 747244032.0,
            "5": 747244032.0,
            "6": 747244032.0,
            "7": 747244032.0,
            "8": 747244032.0,
            "9": 747244032.0,
            "10": 747244032.0,
            "11": 747244032.0,
            "12": 747244032.0,
            "13": 747244032.0,
            "14": 747244032.0,
            "15": 747244032.0,
            "16": 747244032.0,
            "17": 747244032.0,
            "18": 747244032.0,
            "19": 747244032.0,
            "20": 747244032.0,
            "21": 747244032.0,
            "22": 747244032.0,
            "23": 747244032.0,
            "24": 747244032.0,
            "25": 747244032.0,
            "26": 747244032.0,
            "27": 747244032.0,
            "28": 747244032.0,
            "29": 747244032.0,
            "30": 747244032.0,
            "31": 747244032.0,
            "32": 747244032.0,
            "33": 747244032.0,
            "34": 747244032.0,
            "35": 747244032.0,
            "36": 747244032.0,
            "37": 747244032.0,
            "38": 747244032.0,
            "39": 747244032.0,
            "40": 747244032.0,
            "41": 747244032.0,
            "42": 747244032.0,
            "43": 747244032.0,
            "44": 747244032.0,
            "45": 747244032.0,
            "46": 747244032.0,
            "47": 747244032.0,
            "48": 747244032.0,
            "49": 747244032.0,
            "50": 747244032.0,
            "51": 747244032.0,
            "52": 747244032.0,
            "53": 747244032.0,
            "54": 747244032.0,
            "55": 747244032.0,
            "56": 747244032.0,
            "57": 747244032.0,
            "58": 747244032.0,
            "59": 747244032.0,
            "60": 747244032.0,
            "61": 747244032.0,
            "62": 747244032.0,
            "63": 747244032.0,
            "64": 747244032.0,
            "65": 747244032.0,
            "66": 747244032.0,
            "67": 747244032.0,
            "68": 747244032.0,
            "69": 747244032.0,
            "70": 747244032.0,
            "71": 747244032.0,
            "72": 747244032.0,
            "73": 747244032.0,
            "74": 747244032.0,
            "75": 747244032.0,
            "76": 747244032.0,
            "77": 747244032.0,
            "78": 747244032.0,
            "79": 747244032.0,
            "80": 747244032.0,
            "81": 747244032.0,
            "82": 747244032.0,
            "83": 747244032.0,
            "84": 747244032.0,
            "85": 747244032.0,
            "86": 747244032.0,
            "87": 747244032.0,
            "88": 747244032.0,
            "89": 747244032.0,
            "90": 747244032.0,
            "91": 747244032.0,
            "92": 747244032.0,
            "93": 747244032.0,
            "94": 747244032.0,
            "95": 747244032.0,
            "96": 747244032.0,
            "97": 747244032.0,
            "98": 747244032.0,
            "99": 747244032.0,
            "100": 747244032.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1927202816.0,
            "2": 2211948544.0,
            "3": 2211948544.0,
            "4": 2211948544.0,
            "5": 2211948544.0,
            "6": 2211948544.0,
            "7": 2211948544.0,
            "8": 2211948544.0,
            "9": 2211948544.0,
            "10": 2211948544.0,
            "11": 2211948544.0,
            "12": 2211948544.0,
            "13": 2211948544.0,
            "14": 2211948544.0,
            "15": 2211948544.0,
            "16": 2211948544.0,
            "17": 2211948544.0,
            "18": 2211948544.0,
            "19": 2211948544.0,
            "20": 2211948544.0,
            "21": 2211948544.0,
            "22": 2211948544.0,
            "23": 2211948544.0,
            "24": 2211948544.0,
            "25": 2211948544.0,
            "26": 2211948544.0,
            "27": 2211948544.0,
            "28": 2211948544.0,
            "29": 2211948544.0,
            "30": 2211948544.0,
            "31": 2211948544.0,
            "32": 2211948544.0,
            "33": 2211948544.0,
            "34": 2211948544.0,
            "35": 2211948544.0,
            "36": 2211948544.0,
            "37": 2211948544.0,
            "38": 2211948544.0,
            "39": 2211948544.0,
            "40": 2211948544.0,
            "41": 2211948544.0,
            "42": 2211948544.0,
            "43": 2211948544.0,
            "44": 2211948544.0,
            "45": 2211948544.0,
            "46": 2211948544.0,
            "47": 2211948544.0,
            "48": 2211948544.0,
            "49": 2211948544.0,
            "50": 2211948544.0,
            "51": 2211948544.0,
            "52": 2211948544.0,
            "53": 2211948544.0,
            "54": 2211948544.0,
            "55": 2211948544.0,
            "56": 2211948544.0,
            "57": 2211948544.0,
            "58": 2211948544.0,
            "59": 2211948544.0,
            "60": 2211948544.0,
            "61": 2211948544.0,
            "62": 2211948544.0,
            "63": 2211948544.0,
            "64": 2211948544.0,
            "65": 2211948544.0,
            "66": 2211948544.0,
            "67": 2211948544.0,
            "68": 2211948544.0,
            "69": 2211948544.0,
            "70": 2211948544.0,
            "71": 2211948544.0,
            "72": 2211948544.0,
            "73": 2211948544.0,
            "74": 2211948544.0,
            "75": 2211948544.0,
            "76": 2211948544.0,
            "77": 2211948544.0,
            "78": 2211948544.0,
            "79": 2211948544.0,
            "80": 2211948544.0,
            "81": 2211948544.0,
            "82": 2211948544.0,
            "83": 2211948544.0,
            "84": 2211948544.0,
            "85": 2211948544.0,
            "86": 2211948544.0,
            "87": 2211948544.0,
            "88": 2211948544.0,
            "89": 2211948544.0,
            "90": 2211948544.0,
            "91": 2211948544.0,
            "92": 2211948544.0,
            "93": 2211948544.0,
            "94": 2211948544.0,
            "95": 2211948544.0,
            "96": 2211948544.0,
            "97": 2211948544.0,
            "98": 2211948544.0,
            "99": 2211948544.0,
            "100": 2211948544.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 8.42141,
            "2": 0.12821,
            "3": 0.10969,
            "4": 0.08528,
            "5": 0.08609,
            "6": 0.08514,
            "7": 0.08511,
            "8": 0.08614,
            "9": 0.0853,
            "10": 0.08556,
            "11": 0.08506,
            "12": 0.08648,
            "13": 0.08513,
            "14": 0.08524,
            "15": 0.08502,
            "16": 0.08679,
            "17": 0.08617,
            "18": 0.08799,
            "19": 0.08587,
            "20": 0.08552,
            "21": 0.08665,
            "22": 0.08551,
            "23": 0.08517,
            "24": 0.08535,
            "25": 0.08579,
            "26": 0.08526,
            "27": 0.08602,
            "28": 0.08519,
            "29": 0.08544,
            "30": 0.08512,
            "31": 0.0856,
            "32": 0.08591,
            "33": 0.08561,
            "34": 0.08518,
            "35": 0.08492,
            "36": 0.08517,
            "37": 0.08548,
            "38": 0.08494,
            "39": 0.08594,
            "40": 0.08522,
            "41": 0.08599,
            "42": 0.0854,
            "43": 0.08536,
            "44": 0.0855,
            "45": 0.08648,
            "46": 0.088,
            "47": 0.08639,
            "48": 0.08682,
            "49": 0.08646,
            "50": 0.08529,
            "51": 0.09801,
            "52": 0.08949,
            "53": 0.08726,
            "54": 0.08702,
            "55": 0.08687,
            "56": 0.08692,
            "57": 0.08726,
            "58": 0.0871,
            "59": 0.08762,
            "60": 0.08729,
            "61": 0.08712,
            "62": 0.0868,
            "63": 0.08725,
            "64": 0.08676,
            "65": 0.08718,
            "66": 0.08682,
            "67": 0.08754,
            "68": 0.08695,
            "69": 0.08788,
            "70": 0.08724,
            "71": 0.08705,
            "72": 0.08759,
            "73": 0.08826,
            "74": 0.0871,
            "75": 0.08684,
            "76": 0.08689,
            "77": 0.08656,
            "78": 0.08667,
            "79": 0.08705,
            "80": 0.08727,
            "81": 0.0879,
            "82": 0.08956,
            "83": 0.08661,
            "84": 0.08671,
            "85": 0.08761,
            "86": 0.08652,
            "87": 0.08663,
            "88": 0.08663,
            "89": 0.08687,
            "90": 0.08718,
            "91": 0.0868,
            "92": 0.08665,
            "93": 0.08695,
            "94": 0.08685,
            "95": 0.08671,
            "96": 0.08669,
            "97": 0.08742,
            "98": 0.08628,
            "99": 0.08628,
            "100": 0.08651
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_h100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.85075,
            "52": 9.73181,
            "53": 10.06388,
            "54": 9.95432,
            "55": 9.87204,
            "56": 9.61823,
            "57": 9.47467,
            "58": 9.82802,
            "59": 9.57962,
            "60": 9.49074,
            "61": 9.68473,
            "62": 9.99245,
            "63": 9.38364,
            "64": 9.77766,
            "65": 8.94008,
            "66": 9.70099,
            "67": 9.3605,
            "68": 9.77766,
            "69": 9.78865,
            "70": 9.73813,
            "71": 9.61811,
            "72": 9.58068,
            "73": 9.4964,
            "74": 8.93812,
            "75": 9.42081,
            "76": 9.07416,
            "77": 10.06077,
            "78": 9.71952,
            "79": 9.37088,
            "80": 9.39874,
            "81": 9.47802,
            "82": 9.69299,
            "83": 9.30276,
            "84": 9.41548,
            "85": 9.60883,
            "86": 9.07461,
            "87": 9.58826,
            "88": 9.74392,
            "89": 9.5951,
            "90": 9.81217,
            "91": 9.33796,
            "92": 9.3534,
            "93": 9.07315,
            "94": 8.83127,
            "95": 9.51524,
            "96": 9.52183,
            "97": 9.31012,
            "98": 9.66532,
            "99": 8.88179,
            "100": 9.39375
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2509.0,
            "52": 2528.0,
            "53": 2851.0,
            "54": 2747.0,
            "55": 2333.0,
            "56": 2724.0,
            "57": 2315.0,
            "58": 2754.0,
            "59": 2774.0,
            "60": 2336.0,
            "61": 2912.0,
            "62": 2415.0,
            "63": 2341.0,
            "64": 2837.0,
            "65": 2661.0,
            "66": 3000.0,
            "67": 2779.0,
            "68": 2691.0,
            "69": 2793.0,
            "70": 3183.0,
            "71": 2962.0,
            "72": 2393.0,
            "73": 2997.0,
            "74": 1935.0,
            "75": 2463.0,
            "76": 3065.0,
            "77": 3184.0,
            "78": 3154.0,
            "79": 3127.0,
            "80": 3286.0,
            "81": 3386.0,
            "82": 3128.0,
            "83": 2608.0,
            "84": 3079.0,
            "85": 3260.0,
            "86": 2687.0,
            "87": 3591.0,
            "88": 3035.0,
            "89": 3165.0,
            "90": 3166.0,
            "91": 2690.0,
            "92": 2897.0,
            "93": 2630.0,
            "94": 3348.0,
            "95": 3349.0,
            "96": 3288.0,
            "97": 3055.0,
            "98": 3516.0,
            "99": 3035.0,
            "100": 3109.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 746195456.0,
            "52": 746195456.0,
            "53": 746195456.0,
            "54": 746195456.0,
            "55": 746195456.0,
            "56": 746195456.0,
            "57": 746195456.0,
            "58": 746195456.0,
            "59": 746195456.0,
            "60": 746195456.0,
            "61": 746195456.0,
            "62": 746195456.0,
            "63": 746195456.0,
            "64": 746195456.0,
            "65": 746195456.0,
            "66": 746195456.0,
            "67": 746195456.0,
            "68": 746195456.0,
            "69": 746195456.0,
            "70": 746195456.0,
            "71": 746195456.0,
            "72": 746195456.0,
            "73": 746195456.0,
            "74": 746195456.0,
            "75": 746195456.0,
            "76": 746195456.0,
            "77": 746195456.0,
            "78": 746195456.0,
            "79": 746195456.0,
            "80": 746195456.0,
            "81": 746195456.0,
            "82": 746195456.0,
            "83": 746195456.0,
            "84": 746195456.0,
            "85": 746195456.0,
            "86": 746195456.0,
            "87": 746195456.0,
            "88": 746195456.0,
            "89": 746195456.0,
            "90": 746195456.0,
            "91": 746195456.0,
            "92": 746195456.0,
            "93": 746195456.0,
            "94": 746195456.0,
            "95": 746195456.0,
            "96": 746195456.0,
            "97": 746195456.0,
            "98": 746195456.0,
            "99": 746195456.0,
            "100": 746195456.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2210899968.0,
            "52": 2210900992.0,
            "53": 2210900992.0,
            "54": 2210900992.0,
            "55": 2210900992.0,
            "56": 2210900992.0,
            "57": 2210900992.0,
            "58": 2210900992.0,
            "59": 2210900992.0,
            "60": 2210900992.0,
            "61": 2210900992.0,
            "62": 2210900992.0,
            "63": 2210900992.0,
            "64": 2210900992.0,
            "65": 2210900992.0,
            "66": 2210900992.0,
            "67": 2210900992.0,
            "68": 2210900992.0,
            "69": 2210900992.0,
            "70": 2210900992.0,
            "71": 2210900992.0,
            "72": 2210900992.0,
            "73": 2210900992.0,
            "74": 2210900992.0,
            "75": 2210900992.0,
            "76": 2210900992.0,
            "77": 2210900992.0,
            "78": 2210900992.0,
            "79": 2210900992.0,
            "80": 2210900992.0,
            "81": 2210900992.0,
            "82": 2210900992.0,
            "83": 2210900992.0,
            "84": 2210900992.0,
            "85": 2210900992.0,
            "86": 2210900992.0,
            "87": 2210900992.0,
            "88": 2210900992.0,
            "89": 2210900992.0,
            "90": 2210900992.0,
            "91": 2210900992.0,
            "92": 2210900992.0,
            "93": 2210900992.0,
            "94": 2210900992.0,
            "95": 2210900992.0,
            "96": 2210900992.0,
            "97": 2210900992.0,
            "98": 2210900992.0,
            "99": 2210900992.0,
            "100": 2210900992.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 8.15802,
            "52": 0.13009,
            "53": 0.08915,
            "54": 0.089,
            "55": 0.08861,
            "56": 0.08871,
            "57": 0.08895,
            "58": 0.08939,
            "59": 0.08862,
            "60": 0.08875,
            "61": 0.08835,
            "62": 0.09029,
            "63": 0.09034,
            "64": 0.08922,
            "65": 0.08953,
            "66": 0.09166,
            "67": 0.08868,
            "68": 0.08954,
            "69": 0.08916,
            "70": 0.08982,
            "71": 0.08837,
            "72": 0.0903,
            "73": 0.08971,
            "74": 0.09129,
            "75": 0.09221,
            "76": 0.08837,
            "77": 0.0912,
            "78": 0.08894,
            "79": 0.08857,
            "80": 0.089,
            "81": 0.0893,
            "82": 0.08924,
            "83": 0.08842,
            "84": 0.08918,
            "85": 0.08897,
            "86": 0.08832,
            "87": 0.08827,
            "88": 0.08998,
            "89": 0.08959,
            "90": 0.08882,
            "91": 0.08911,
            "92": 0.08926,
            "93": 0.08845,
            "94": 0.08884,
            "95": 0.08981,
            "96": 0.08858,
            "97": 0.09088,
            "98": 0.09007,
            "99": 0.08931,
            "100": 0.09003
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.79205, "5": 10.84695, "10": 10.77106, "15": 10.79093, "20": 10.68042, "25": 10.50715, "30": 10.33325, "35": 10.25545, "40": 10.05544, "45": 9.80575, "50": 9.89082, "55": 9.87063, "60": 9.48478, "65": 8.94022, "70": 9.72243, "75": 9.40907, "80": 9.3976, "85": 9.60746, "90": 9.81041, "95": 9.5116, "100": 9.39722}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1580.0, "5": 1901.0, "10": 1346.0, "15": 1926.0, "20": 1643.0, "25": 1683.0, "30": 1867.0, "35": 2020.0, "40": 2252.0, "45": 2243.0, "50": 2459.0, "55": 2291.0, "60": 2404.0, "65": 2474.0, "70": 3102.0, "75": 2603.0, "80": 3420.0, "85": 3388.0, "90": 2904.0, "95": 3333.0, "100": 3347.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 714736640.0, "5": 714736640.0, "10": 714736640.0, "15": 714736640.0, "20": 714736640.0, "25": 714736640.0, "30": 714736640.0, "35": 714736640.0, "40": 714736640.0, "45": 714736640.0, "50": 714736640.0, "55": 714736640.0, "60": 714736640.0, "65": 714736640.0, "70": 714736640.0, "75": 714736640.0, "80": 714736640.0, "85": 714736640.0, "90": 714736640.0, "95": 714736640.0, "100": 714736640.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2399714304.0, "5": 2681315328.0, "10": 2681315328.0, "15": 2681315328.0, "20": 2681315328.0, "25": 2681315328.0, "30": 2681315328.0, "35": 2681315328.0, "40": 2681315328.0, "45": 2681315328.0, "50": 2681315328.0, "55": 2681315328.0, "60": 2681315328.0, "65": 2681315328.0, "70": 2681315328.0, "75": 2681315328.0, "80": 2681315328.0, "85": 2681315328.0, "90": 2681315328.0, "95": 2681315328.0, "100": 2681315328.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 14.35326, "5": 0.16389, "10": 0.16293, "15": 0.16127, "20": 0.16112, "25": 0.15978, "30": 0.16078, "35": 0.16071, "40": 0.16073, "45": 0.15927, "50": 0.15913, "55": 0.15923, "60": 0.15838, "65": 0.15923, "70": 0.15803, "75": 0.15796, "80": 0.15856, "85": 0.15928, "90": 0.15765, "95": 0.15866, "100": 0.15837}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --disable-bias-linear: true
  --async-save: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: ckpt-resume


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.79332, "5": 10.84763, "10": 10.77007, "15": 10.79054, "20": 10.68058, "25": 10.50697, "30": 10.33257, "35": 10.25465, "40": 10.05603, "45": 9.80619, "50": 9.89126, "55": 9.87124, "60": 9.48507, "65": 8.93913, "70": 9.72196, "75": 9.40867, "80": 9.39746, "85": 9.60793, "90": 9.81041, "95": 9.51149, "100": 9.39727}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1624.0, "5": 1824.0, "10": 1414.0, "15": 1902.0, "20": 1642.0, "25": 1593.0, "30": 2001.0, "35": 1904.0, "40": 2335.0, "45": 2140.0, "50": 2374.0, "55": 2220.0, "60": 2397.0, "65": 2591.0, "70": 3112.0, "75": 2649.0, "80": 3262.0, "85": 3309.0, "90": 3031.0, "95": 3321.0, "100": 3318.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 718931456.0, "5": 718931456.0, "10": 718931456.0, "15": 718931456.0, "20": 718931456.0, "25": 718931456.0, "30": 718931456.0, "35": 718931456.0, "40": 718931456.0, "45": 718931456.0, "50": 718931456.0, "55": 718931456.0, "60": 718931456.0, "65": 718931456.0, "70": 718931456.0, "75": 718931456.0, "80": 718931456.0, "85": 718931456.0, "90": 718931456.0, "95": 718931456.0, "100": 718931456.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1910424576.0, "5": 2195171328.0, "10": 2195171328.0, "15": 2195171328.0, "20": 2195171328.0, "25": 2195171328.0, "30": 2195171328.0, "35": 2195171328.0, "40": 2195171328.0, "45": 2195171328.0, "50": 2195171328.0, "55": 2195171328.0, "60": 2195171328.0, "65": 2195171328.0, "70": 2195171328.0, "75": 2195171328.0, "80": 2195171328.0, "85": 2195171328.0, "90": 2195171328.0, "95": 2195171328.0, "100": 2195171328.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 5.68434, "5": 0.1861, "10": 0.19084, "15": 0.18664, "20": 0.18651, "25": 0.18648, "30": 0.18441, "35": 0.1853, "40": 0.18402, "45": 0.18422, "50": 0.1846, "55": 0.18785, "60": 0.18543, "65": 0.1862, "70": 0.18792, "75": 0.18668, "80": 0.18515, "85": 0.18425, "90": 0.18526, "95": 0.18502, "100": 0.18422}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.83582,
            "2": 10.83571,
            "3": 10.83524,
            "4": 10.79949,
            "5": 10.84909,
            "6": 10.86567,
            "7": 10.82786,
            "8": 10.8363,
            "9": 10.83997,
            "10": 10.79867,
            "11": 10.86763,
            "12": 10.84992,
            "13": 10.85912,
            "14": 10.8687,
            "15": 10.80171,
            "16": 10.79189,
            "17": 10.77351,
            "18": 10.78742,
            "19": 10.78986,
            "20": 10.68447,
            "21": 10.67839,
            "22": 10.52572,
            "23": 10.70729,
            "24": 10.56549,
            "25": 10.51603,
            "26": 10.58018,
            "27": 10.58977,
            "28": 10.54551,
            "29": 10.57724,
            "30": 10.34049,
            "31": 10.07056,
            "32": 10.44508,
            "33": 10.44289,
            "34": 10.19394,
            "35": 10.2426,
            "36": 10.19235,
            "37": 10.32972,
            "38": 10.16551,
            "39": 10.38729,
            "40": 10.05174,
            "41": 10.12185,
            "42": 10.19258,
            "43": 9.80694,
            "44": 9.92474,
            "45": 9.80636,
            "46": 9.80144,
            "47": 10.12106,
            "48": 9.83126,
            "49": 9.50406,
            "50": 9.87955,
            "51": 9.83807,
            "52": 9.72057,
            "53": 10.05682,
            "54": 9.95031,
            "55": 9.88332,
            "56": 9.60428,
            "57": 9.45518,
            "58": 9.81923,
            "59": 9.58266,
            "60": 9.48844,
            "61": 9.68574,
            "62": 9.9778,
            "63": 9.36765,
            "64": 9.75912,
            "65": 8.93762,
            "66": 9.6926,
            "67": 9.36619,
            "68": 9.78309,
            "69": 9.79315,
            "70": 9.72695,
            "71": 9.62875,
            "72": 9.58006,
            "73": 9.487,
            "74": 8.92045,
            "75": 9.41127,
            "76": 9.0757,
            "77": 10.05849,
            "78": 9.72185,
            "79": 9.37321,
            "80": 9.40082,
            "81": 9.47926,
            "82": 9.69753,
            "83": 9.31033,
            "84": 9.41773,
            "85": 9.61195,
            "86": 9.07158,
            "87": 9.59659,
            "88": 9.74711,
            "89": 9.59669,
            "90": 9.82914,
            "91": 9.33728,
            "92": 9.35642,
            "93": 9.08554,
            "94": 8.82803,
            "95": 9.52843,
            "96": 9.52607,
            "97": 9.30634,
            "98": 9.66809,
            "99": 8.89459,
            "100": 9.40668
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1501.0,
            "2": 1576.0,
            "3": 1604.0,
            "4": 1778.0,
            "5": 1880.0,
            "6": 1803.0,
            "7": 1789.0,
            "8": 1669.0,
            "9": 1811.0,
            "10": 1392.0,
            "11": 1835.0,
            "12": 1663.0,
            "13": 1855.0,
            "14": 1841.0,
            "15": 1874.0,
            "16": 1844.0,
            "17": 1738.0,
            "18": 1724.0,
            "19": 1742.0,
            "20": 1612.0,
            "21": 1751.0,
            "22": 1713.0,
            "23": 1964.0,
            "24": 1632.0,
            "25": 1570.0,
            "26": 1675.0,
            "27": 1752.0,
            "28": 2026.0,
            "29": 1938.0,
            "30": 1848.0,
            "31": 1557.0,
            "32": 1926.0,
            "33": 2052.0,
            "34": 1880.0,
            "35": 2022.0,
            "36": 1926.0,
            "37": 2344.0,
            "38": 2202.0,
            "39": 2285.0,
            "40": 2225.0,
            "41": 2328.0,
            "42": 2200.0,
            "43": 1984.0,
            "44": 2142.0,
            "45": 2173.0,
            "46": 2308.0,
            "47": 2592.0,
            "48": 2460.0,
            "49": 2242.0,
            "50": 2383.0,
            "51": 2489.0,
            "52": 2497.0,
            "53": 2875.0,
            "54": 2654.0,
            "55": 2317.0,
            "56": 2599.0,
            "57": 2299.0,
            "58": 2830.0,
            "59": 2784.0,
            "60": 2437.0,
            "61": 2916.0,
            "62": 2599.0,
            "63": 2388.0,
            "64": 2785.0,
            "65": 2677.0,
            "66": 2972.0,
            "67": 2797.0,
            "68": 2752.0,
            "69": 3049.0,
            "70": 3087.0,
            "71": 2952.0,
            "72": 2411.0,
            "73": 3099.0,
            "74": 1975.0,
            "75": 2614.0,
            "76": 2941.0,
            "77": 3166.0,
            "78": 3123.0,
            "79": 3085.0,
            "80": 3198.0,
            "81": 3350.0,
            "82": 3322.0,
            "83": 2858.0,
            "84": 3125.0,
            "85": 3194.0,
            "86": 2777.0,
            "87": 3602.0,
            "88": 3006.0,
            "89": 3267.0,
            "90": 3133.0,
            "91": 2753.0,
            "92": 3113.0,
            "93": 2714.0,
            "94": 3364.0,
            "95": 3273.0,
            "96": 3202.0,
            "97": 3124.0,
            "98": 3716.0,
            "99": 3121.0,
            "100": 3131.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 745146880.0,
            "2": 745146880.0,
            "3": 745146880.0,
            "4": 745146880.0,
            "5": 745146880.0,
            "6": 745146880.0,
            "7": 745146880.0,
            "8": 745146880.0,
            "9": 745146880.0,
            "10": 745146880.0,
            "11": 745146880.0,
            "12": 745146880.0,
            "13": 745146880.0,
            "14": 745146880.0,
            "15": 745146880.0,
            "16": 745146880.0,
            "17": 745146880.0,
            "18": 745146880.0,
            "19": 745146880.0,
            "20": 745146880.0,
            "21": 745146880.0,
            "22": 745146880.0,
            "23": 745146880.0,
            "24": 745146880.0,
            "25": 745146880.0,
            "26": 745146880.0,
            "27": 745146880.0,
            "28": 745146880.0,
            "29": 745146880.0,
            "30": 745146880.0,
            "31": 745146880.0,
            "32": 745146880.0,
            "33": 745146880.0,
            "34": 745146880.0,
            "35": 745146880.0,
            "36": 745146880.0,
            "37": 745146880.0,
            "38": 745146880.0,
            "39": 745146880.0,
            "40": 745146880.0,
            "41": 745146880.0,
            "42": 745146880.0,
            "43": 745146880.0,
            "44": 745146880.0,
            "45": 745146880.0,
            "46": 745146880.0,
            "47": 745146880.0,
            "48": 745146880.0,
            "49": 745146880.0,
            "50": 745146880.0,
            "51": 745146880.0,
            "52": 745146880.0,
            "53": 745146880.0,
            "54": 745146880.0,
            "55": 745146880.0,
            "56": 745146880.0,
            "57": 745146880.0,
            "58": 745146880.0,
            "59": 745146880.0,
            "60": 745146880.0,
            "61": 745146880.0,
            "62": 745146880.0,
            "63": 745146880.0,
            "64": 745146880.0,
            "65": 745146880.0,
            "66": 745146880.0,
            "67": 745146880.0,
            "68": 745146880.0,
            "69": 745146880.0,
            "70": 745146880.0,
            "71": 745146880.0,
            "72": 745146880.0,
            "73": 745146880.0,
            "74": 745146880.0,
            "75": 745146880.0,
            "76": 745146880.0,
            "77": 745146880.0,
            "78": 745146880.0,
            "79": 745146880.0,
            "80": 745146880.0,
            "81": 745146880.0,
            "82": 745146880.0,
            "83": 745146880.0,
            "84": 745146880.0,
            "85": 745146880.0,
            "86": 745146880.0,
            "87": 745146880.0,
            "88": 745146880.0,
            "89": 745146880.0,
            "90": 745146880.0,
            "91": 745146880.0,
            "92": 745146880.0,
            "93": 745146880.0,
            "94": 745146880.0,
            "95": 745146880.0,
            "96": 745146880.0,
            "97": 745146880.0,
            "98": 745146880.0,
            "99": 745146880.0,
            "100": 745146880.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1938737152.0,
            "2": 2222434304.0,
            "3": 2222434304.0,
            "4": 2222434304.0,
            "5": 2222434304.0,
            "6": 2222434304.0,
            "7": 2222434304.0,
            "8": 2222434304.0,
            "9": 2222434304.0,
            "10": 2222434304.0,
            "11": 2222434304.0,
            "12": 2222434304.0,
            "13": 2222434304.0,
            "14": 2222434304.0,
            "15": 2222434304.0,
            "16": 2222434304.0,
            "17": 2222434304.0,
            "18": 2222434304.0,
            "19": 2222434304.0,
            "20": 2222434304.0,
            "21": 2222434304.0,
            "22": 2222434304.0,
            "23": 2222434304.0,
            "24": 2222434304.0,
            "25": 2222434304.0,
            "26": 2222434304.0,
            "27": 2222434304.0,
            "28": 2222434304.0,
            "29": 2222434304.0,
            "30": 2222434304.0,
            "31": 2222434304.0,
            "32": 2222434304.0,
            "33": 2222434304.0,
            "34": 2222434304.0,
            "35": 2222434304.0,
            "36": 2222434304.0,
            "37": 2222434304.0,
            "38": 2222434304.0,
            "39": 2222434304.0,
            "40": 2222434304.0,
            "41": 2222434304.0,
            "42": 2222434304.0,
            "43": 2222434304.0,
            "44": 2222434304.0,
            "45": 2222434304.0,
            "46": 2222434304.0,
            "47": 2222434304.0,
            "48": 2222434304.0,
            "49": 2222434304.0,
            "50": 2222434304.0,
            "51": 2222434304.0,
            "52": 2222434304.0,
            "53": 2222434304.0,
            "54": 2222434304.0,
            "55": 2222434304.0,
            "56": 2222434304.0,
            "57": 2222434304.0,
            "58": 2222434304.0,
            "59": 2222434304.0,
            "60": 2222434304.0,
            "61": 2222434304.0,
            "62": 2222434304.0,
            "63": 2222434304.0,
            "64": 2222434304.0,
            "65": 2222434304.0,
            "66": 2222434304.0,
            "67": 2222434304.0,
            "68": 2222434304.0,
            "69": 2222434304.0,
            "70": 2222434304.0,
            "71": 2222434304.0,
            "72": 2222434304.0,
            "73": 2222434304.0,
            "74": 2222434304.0,
            "75": 2222434304.0,
            "76": 2222434304.0,
            "77": 2222434304.0,
            "78": 2222434304.0,
            "79": 2222434304.0,
            "80": 2222434304.0,
            "81": 2222434304.0,
            "82": 2222434304.0,
            "83": 2222434304.0,
            "84": 2222434304.0,
            "85": 2222434304.0,
            "86": 2222434304.0,
            "87": 2222434304.0,
            "88": 2222434304.0,
            "89": 2222434304.0,
            "90": 2222434304.0,
            "91": 2222434304.0,
            "92": 2222434304.0,
            "93": 2222434304.0,
            "94": 2222434304.0,
            "95": 2222434304.0,
            "96": 2222434304.0,
            "97": 2222434304.0,
            "98": 2222434304.0,
            "99": 2222434304.0,
            "100": 2222434304.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 5.94954,
            "3": 1.50291,
            "4": 0.53566,
            "5": 0.40704,
            "6": 0.34894,
            "7": 0.22372,
            "8": 0.53898,
            "9": 0.12291,
            "10": 0.16074,
            "11": 0.35135,
            "12": 0.12732,
            "13": 0.12357,
            "14": 0.12383,
            "15": 0.12737,
            "16": 0.12362,
            "17": 0.12343,
            "18": 0.12473,
            "19": 0.12595,
            "20": 0.12604,
            "21": 0.12429,
            "22": 0.12556,
            "23": 0.1253,
            "24": 0.1263,
            "25": 0.12594,
            "26": 0.12525,
            "27": 0.127,
            "28": 0.12526,
            "29": 0.12711,
            "30": 0.12734,
            "31": 0.12949,
            "32": 0.12815,
            "33": 0.12674,
            "34": 0.12825,
            "35": 0.12995,
            "36": 0.12713,
            "37": 0.12917,
            "38": 0.12617,
            "39": 0.12706,
            "40": 0.1252,
            "41": 0.12715,
            "42": 0.12676,
            "43": 0.12603,
            "44": 0.1271,
            "45": 0.12586,
            "46": 0.1272,
            "47": 0.1267,
            "48": 0.12718,
            "49": 0.12784,
            "50": 0.1242,
            "51": 0.1345,
            "52": 0.1299,
            "53": 0.14634,
            "54": 0.12463,
            "55": 0.12725,
            "56": 0.12725,
            "57": 0.12837,
            "58": 0.12517,
            "59": 0.12774,
            "60": 0.1254,
            "61": 0.12644,
            "62": 0.12626,
            "63": 0.12406,
            "64": 0.12571,
            "65": 0.12665,
            "66": 0.12645,
            "67": 0.12761,
            "68": 0.12692,
            "69": 0.12838,
            "70": 0.12887,
            "71": 0.1276,
            "72": 0.12853,
            "73": 0.12876,
            "74": 0.12752,
            "75": 0.12802,
            "76": 0.12753,
            "77": 0.12582,
            "78": 0.12904,
            "79": 0.12901,
            "80": 0.12789,
            "81": 0.12964,
            "82": 0.1301,
            "83": 0.1264,
            "84": 0.12524,
            "85": 0.1274,
            "86": 0.12805,
            "87": 0.12637,
            "88": 0.12675,
            "89": 0.12674,
            "90": 0.12851,
            "91": 0.12781,
            "92": 0.12833,
            "93": 0.1262,
            "94": 0.1255,
            "95": 0.12578,
            "96": 0.12506,
            "97": 0.12696,
            "98": 0.12507,
            "99": 0.12893,
            "100": 0.12679
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_gb200_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.83807,
            "52": 9.72058,
            "53": 10.0568,
            "54": 9.95032,
            "55": 9.88328,
            "56": 9.60431,
            "57": 9.45518,
            "58": 9.81927,
            "59": 9.58262,
            "60": 9.48844,
            "61": 9.68577,
            "62": 9.97779,
            "63": 9.36765,
            "64": 9.75913,
            "65": 8.9376,
            "66": 9.69257,
            "67": 9.36621,
            "68": 9.78303,
            "69": 9.79318,
            "70": 9.72699,
            "71": 9.62875,
            "72": 9.58004,
            "73": 9.487,
            "74": 8.92041,
            "75": 9.41128,
            "76": 9.07564,
            "77": 10.05848,
            "78": 9.72184,
            "79": 9.3732,
            "80": 9.40079,
            "81": 9.4792,
            "82": 9.69754,
            "83": 9.31037,
            "84": 9.41777,
            "85": 9.61194,
            "86": 9.07155,
            "87": 9.59661,
            "88": 9.74709,
            "89": 9.59667,
            "90": 9.82915,
            "91": 9.33725,
            "92": 9.3564,
            "93": 9.08552,
            "94": 8.82807,
            "95": 9.52842,
            "96": 9.52611,
            "97": 9.30632,
            "98": 9.66808,
            "99": 8.89461,
            "100": 9.40666
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2546.0,
            "52": 2590.0,
            "53": 2879.0,
            "54": 2697.0,
            "55": 2316.0,
            "56": 2549.0,
            "57": 2261.0,
            "58": 2904.0,
            "59": 2740.0,
            "60": 2434.0,
            "61": 2801.0,
            "62": 2663.0,
            "63": 2502.0,
            "64": 2948.0,
            "65": 2644.0,
            "66": 2961.0,
            "67": 2813.0,
            "68": 2686.0,
            "69": 2912.0,
            "70": 3096.0,
            "71": 2854.0,
            "72": 2454.0,
            "73": 3081.0,
            "74": 1933.0,
            "75": 2465.0,
            "76": 3012.0,
            "77": 3163.0,
            "78": 2997.0,
            "79": 3089.0,
            "80": 3187.0,
            "81": 3500.0,
            "82": 3339.0,
            "83": 2705.0,
            "84": 3205.0,
            "85": 3033.0,
            "86": 2818.0,
            "87": 3671.0,
            "88": 3190.0,
            "89": 3336.0,
            "90": 3320.0,
            "91": 2698.0,
            "92": 3072.0,
            "93": 2750.0,
            "94": 3397.0,
            "95": 3317.0,
            "96": 3290.0,
            "97": 3116.0,
            "98": 3732.0,
            "99": 3049.0,
            "100": 2974.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 746195456.0,
            "52": 746195456.0,
            "53": 746195456.0,
            "54": 746195456.0,
            "55": 746195456.0,
            "56": 746195456.0,
            "57": 746195456.0,
            "58": 746195456.0,
            "59": 746195456.0,
            "60": 746195456.0,
            "61": 746195456.0,
            "62": 746195456.0,
            "63": 746195456.0,
            "64": 746195456.0,
            "65": 746195456.0,
            "66": 746195456.0,
            "67": 746195456.0,
            "68": 746195456.0,
            "69": 746195456.0,
            "70": 746195456.0,
            "71": 746195456.0,
            "72": 746195456.0,
            "73": 746195456.0,
            "74": 746195456.0,
            "75": 746195456.0,
            "76": 746195456.0,
            "77": 746195456.0,
            "78": 746195456.0,
            "79": 746195456.0,
            "80": 746195456.0,
            "81": 746195456.0,
            "82": 746195456.0,
            "83": 746195456.0,
            "84": 746195456.0,
            "85": 746195456.0,
            "86": 746195456.0,
            "87": 746195456.0,
            "88": 746195456.0,
            "89": 746195456.0,
            "90": 746195456.0,
            "91": 746195456.0,
            "92": 746195456.0,
            "93": 746195456.0,
            "94": 746195456.0,
            "95": 746195456.0,
            "96": 746195456.0,
            "97": 746195456.0,
            "98": 746195456.0,
            "99": 746195456.0,
            "100": 746195456.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2223482880.0,
            "52": 2223483904.0,
            "53": 2223483904.0,
            "54": 2223483904.0,
            "55": 2223483904.0,
            "56": 2223483904.0,
            "57": 2223483904.0,
            "58": 2223483904.0,
            "59": 2223483904.0,
            "60": 2223483904.0,
            "61": 2223483904.0,
            "62": 2223483904.0,
            "63": 2223483904.0,
            "64": 2223483904.0,
            "65": 2223483904.0,
            "66": 2223483904.0,
            "67": 2223483904.0,
            "68": 2223483904.0,
            "69": 2223483904.0,
            "70": 2223483904.0,
            "71": 2223483904.0,
            "72": 2223483904.0,
            "73": 2223483904.0,
            "74": 2223483904.0,
            "75": 2223483904.0,
            "76": 2223483904.0,
            "77": 2223483904.0,
            "78": 2223483904.0,
            "79": 2223483904.0,
            "80": 2223483904.0,
            "81": 2223483904.0,
            "82": 2223483904.0,
            "83": 2223483904.0,
            "84": 2223483904.0,
            "85": 2223483904.0,
            "86": 2223483904.0,
            "87": 2223483904.0,
            "88": 2223483904.0,
            "89": 2223483904.0,
            "90": 2223483904.0,
            "91": 2223483904.0,
            "92": 2223483904.0,
            "93": 2223483904.0,
            "94": 2223483904.0,
            "95": 2223483904.0,
            "96": 2223483904.0,
            "97": 2223483904.0,
            "98": 2223483904.0,
            "99": 2223483904.0,
            "100": 2223483904.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": "nan",
            "52": 2.08357,
            "53": 0.13321,
            "54": 0.11949,
            "55": 0.11861,
            "56": 0.11817,
            "57": 0.12088,
            "58": 0.11937,
            "59": 0.11893,
            "60": 0.11961,
            "61": 0.11894,
            "62": 0.11953,
            "63": 0.11978,
            "64": 0.11983,
            "65": 0.12255,
            "66": 0.12188,
            "67": 0.12135,
            "68": 0.11972,
            "69": 0.11963,
            "70": 0.11929,
            "71": 0.11924,
            "72": 0.12023,
            "73": 0.12093,
            "74": 0.12082,
            "75": 0.11862,
            "76": 0.11797,
            "77": 0.11862,
            "78": 0.12219,
            "79": 0.12137,
            "80": 0.11873,
            "81": 0.11752,
            "82": 0.1208,
            "83": 0.11974,
            "84": 0.1182,
            "85": 0.11721,
            "86": 0.11748,
            "87": 0.11944,
            "88": 0.11934,
            "89": 0.11847,
            "90": 0.11837,
            "91": 0.11938,
            "92": 0.11761,
            "93": 0.11737,
            "94": 0.12142,
            "95": 0.12574,
            "96": 0.12197,
            "97": 0.12384,
            "98": 0.12251,
            "99": 0.13032,
            "100": 0.12305
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.85599,
            "2": 10.8648,
            "3": 10.87042,
            "4": 10.85288,
            "5": 10.88397,
            "6": 10.89184,
            "7": 10.86732,
            "8": 10.87057,
            "9": 10.87432,
            "10": 10.84185,
            "11": 10.87989,
            "12": 10.87417,
            "13": 10.87884,
            "14": 10.89184,
            "15": 10.82659,
            "16": 10.83027,
            "17": 10.80933,
            "18": 10.81431,
            "19": 10.8167,
            "20": 10.72165,
            "21": 10.70557,
            "22": 10.56881,
            "23": 10.72025,
            "24": 10.61194,
            "25": 10.55765,
            "26": 10.61149,
            "27": 10.62635,
            "28": 10.57155,
            "29": 10.58212,
            "30": 10.36267,
            "31": 10.11682,
            "32": 10.4682,
            "33": 10.45411,
            "34": 10.21121,
            "35": 10.27207,
            "36": 10.22246,
            "37": 10.34079,
            "38": 10.18964,
            "39": 10.40228,
            "40": 10.08758,
            "41": 10.13714,
            "42": 10.21175,
            "43": 9.82878,
            "44": 9.96255,
            "45": 9.82846,
            "46": 9.80952,
            "47": 10.13734,
            "48": 9.84349,
            "49": 9.52888,
            "50": 9.91046,
            "51": 9.85075,
            "52": 9.73181,
            "53": 10.06388,
            "54": 9.95432,
            "55": 9.87204,
            "56": 9.61823,
            "57": 9.47467,
            "58": 9.82802,
            "59": 9.57962,
            "60": 9.49074,
            "61": 9.68473,
            "62": 9.99245,
            "63": 9.38364,
            "64": 9.77766,
            "65": 8.94008,
            "66": 9.70099,
            "67": 9.3605,
            "68": 9.77766,
            "69": 9.78865,
            "70": 9.73813,
            "71": 9.61811,
            "72": 9.58068,
            "73": 9.4964,
            "74": 8.93812,
            "75": 9.42081,
            "76": 9.07416,
            "77": 10.06077,
            "78": 9.71952,
            "79": 9.37088,
            "80": 9.39874,
            "81": 9.47802,
            "82": 9.69299,
            "83": 9.30276,
            "84": 9.41548,
            "85": 9.60883,
            "86": 9.07461,
            "87": 9.58826,
            "88": 9.74392,
            "89": 9.5951,
            "90": 9.81217,
            "91": 9.33796,
            "92": 9.3534,
            "93": 9.07315,
            "94": 8.83127,
            "95": 9.51524,
            "96": 9.52183,
            "97": 9.31012,
            "98": 9.66532,
            "99": 8.88179,
            "100": 9.39375
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1640.0,
            "2": 1738.0,
            "3": 1638.0,
            "4": 1810.0,
            "5": 1755.0,
            "6": 1681.0,
            "7": 1781.0,
            "8": 1502.0,
            "9": 1817.0,
            "10": 1394.0,
            "11": 1927.0,
            "12": 1691.0,
            "13": 1901.0,
            "14": 1631.0,
            "15": 1765.0,
            "16": 1864.0,
            "17": 1704.0,
            "18": 1771.0,
            "19": 1817.0,
            "20": 1831.0,
            "21": 1813.0,
            "22": 1673.0,
            "23": 2005.0,
            "24": 1553.0,
            "25": 1577.0,
            "26": 1656.0,
            "27": 1734.0,
            "28": 1896.0,
            "29": 2051.0,
            "30": 1897.0,
            "31": 1452.0,
            "32": 1785.0,
            "33": 2061.0,
            "34": 1857.0,
            "35": 1920.0,
            "36": 1990.0,
            "37": 2191.0,
            "38": 2142.0,
            "39": 2215.0,
            "40": 2166.0,
            "41": 2154.0,
            "42": 2148.0,
            "43": 1881.0,
            "44": 2066.0,
            "45": 1952.0,
            "46": 2217.0,
            "47": 2513.0,
            "48": 2356.0,
            "49": 2294.0,
            "50": 2140.0,
            "51": 2509.0,
            "52": 2528.0,
            "53": 2851.0,
            "54": 2747.0,
            "55": 2333.0,
            "56": 2724.0,
            "57": 2315.0,
            "58": 2754.0,
            "59": 2774.0,
            "60": 2336.0,
            "61": 2912.0,
            "62": 2415.0,
            "63": 2341.0,
            "64": 2837.0,
            "65": 2661.0,
            "66": 3000.0,
            "67": 2779.0,
            "68": 2691.0,
            "69": 2793.0,
            "70": 3183.0,
            "71": 2962.0,
            "72": 2393.0,
            "73": 2997.0,
            "74": 1935.0,
            "75": 2463.0,
            "76": 3065.0,
            "77": 3184.0,
            "78": 3154.0,
            "79": 3127.0,
            "80": 3286.0,
            "81": 3386.0,
            "82": 3128.0,
            "83": 2608.0,
            "84": 3079.0,
            "85": 3260.0,
            "86": 2687.0,
            "87": 3591.0,
            "88": 3035.0,
            "89": 3165.0,
            "90": 3166.0,
            "91": 2690.0,
            "92": 2897.0,
            "93": 2630.0,
            "94": 3348.0,
            "95": 3349.0,
            "96": 3288.0,
            "97": 3055.0,
            "98": 3516.0,
            "99": 3035.0,
            "100": 3109.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 747244032.0,
            "2": 747244032.0,
            "3": 747244032.0,
            "4": 747244032.0,
            "5": 747244032.0,
            "6": 747244032.0,
            "7": 747244032.0,
            "8": 747244032.0,
            "9": 747244032.0,
            "10": 747244032.0,
            "11": 747244032.0,
            "12": 747244032.0,
            "13": 747244032.0,
            "14": 747244032.0,
            "15": 747244032.0,
            "16": 747244032.0,
            "17": 747244032.0,
            "18": 747244032.0,
            "19": 747244032.0,
            "20": 747244032.0,
            "21": 747244032.0,
            "22": 747244032.0,
            "23": 747244032.0,
            "24": 747244032.0,
            "25": 747244032.0,
            "26": 747244032.0,
            "27": 747244032.0,
            "28": 747244032.0,
            "29": 747244032.0,
            "30": 747244032.0,
            "31": 747244032.0,
            "32": 747244032.0,
            "33": 747244032.0,
            "34": 747244032.0,
            "35": 747244032.0,
            "36": 747244032.0,
            "37": 747244032.0,
            "38": 747244032.0,
            "39": 747244032.0,
            "40": 747244032.0,
            "41": 747244032.0,
            "42": 747244032.0,
            "43": 747244032.0,
            "44": 747244032.0,
            "45": 747244032.0,
            "46": 747244032.0,
            "47": 747244032.0,
            "48": 747244032.0,
            "49": 747244032.0,
            "50": 747244032.0,
            "51": 747244032.0,
            "52": 747244032.0,
            "53": 747244032.0,
            "54": 747244032.0,
            "55": 747244032.0,
            "56": 747244032.0,
            "57": 747244032.0,
            "58": 747244032.0,
            "59": 747244032.0,
            "60": 747244032.0,
            "61": 747244032.0,
            "62": 747244032.0,
            "63": 747244032.0,
            "64": 747244032.0,
            "65": 747244032.0,
            "66": 747244032.0,
            "67": 747244032.0,
            "68": 747244032.0,
            "69": 747244032.0,
            "70": 747244032.0,
            "71": 747244032.0,
            "72": 747244032.0,
            "73": 747244032.0,
            "74": 747244032.0,
            "75": 747244032.0,
            "76": 747244032.0,
            "77": 747244032.0,
            "78": 747244032.0,
            "79": 747244032.0,
            "80": 747244032.0,
            "81": 747244032.0,
            "82": 747244032.0,
            "83": 747244032.0,
            "84": 747244032.0,
            "85": 747244032.0,
            "86": 747244032.0,
            "87": 747244032.0,
            "88": 747244032.0,
            "89": 747244032.0,
            "90": 747244032.0,
            "91": 747244032.0,
            "92": 747244032.0,
            "93": 747244032.0,
            "94": 747244032.0,
            "95": 747244032.0,
            "96": 747244032.0,
            "97": 747244032.0,
            "98": 747244032.0,
            "99": 747244032.0,
            "100": 747244032.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1927202816.0,
            "2": 2211948544.0,
            "3": 2211948544.0,
            "4": 2211948544.0,
            "5": 2211948544.0,
            "6": 2211948544.0,
            "7": 2211948544.0,
            "8": 2211948544.0,
            "9": 2211948544.0,
            "10": 2211948544.0,
            "11": 2211948544.0,
            "12": 2211948544.0,
            "13": 2211948544.0,
            "14": 2211948544.0,
            "15": 2211948544.0,
            "16": 2211948544.0,
            "17": 2211948544.0,
            "18": 2211948544.0,
            "19": 2211948544.0,
            "20": 2211948544.0,
            "21": 2211948544.0,
            "22": 2211948544.0,
            "23": 2211948544.0,
            "24": 2211948544.0,
            "25": 2211948544.0,
            "26": 2211948544.0,
            "27": 2211948544.0,
            "28": 2211948544.0,
            "29": 2211948544.0,
            "30": 2211948544.0,
            "31": 2211948544.0,
            "32": 2211948544.0,
            "33": 2211948544.0,
            "34": 2211948544.0,
            "35": 2211948544.0,
            "36": 2211948544.0,
            "37": 2211948544.0,
            "38": 2211948544.0,
            "39": 2211948544.0,
            "40": 2211948544.0,
            "41": 2211948544.0,
            "42": 2211948544.0,
            "43": 2211948544.0,
            "44": 2211948544.0,
            "45": 2211948544.0,
            "46": 2211948544.0,
            "47": 2211948544.0,
            "48": 2211948544.0,
            "49": 2211948544.0,
            "50": 2211948544.0,
            "51": 2211948544.0,
            "52": 2211948544.0,
            "53": 2211948544.0,
            "54": 2211948544.0,
            "55": 2211948544.0,
            "56": 2211948544.0,
            "57": 2211948544.0,
            "58": 2211948544.0,
            "59": 2211948544.0,
            "60": 2211948544.0,
            "61": 2211948544.0,
            "62": 2211948544.0,
            "63": 2211948544.0,
            "64": 2211948544.0,
            "65": 2211948544.0,
            "66": 2211948544.0,
            "67": 2211948544.0,
            "68": 2211948544.0,
            "69": 2211948544.0,
            "70": 2211948544.0,
            "71": 2211948544.0,
            "72": 2211948544.0,
            "73": 2211948544.0,
            "74": 2211948544.0,
            "75": 2211948544.0,
            "76": 2211948544.0,
            "77": 2211948544.0,
            "78": 2211948544.0,
            "79": 2211948544.0,
            "80": 2211948544.0,
            "81": 2211948544.0,
            "82": 2211948544.0,
            "83": 2211948544.0,
            "84": 2211948544.0,
            "85": 2211948544.0,
            "86": 2211948544.0,
            "87": 2211948544.0,
            "88": 2211948544.0,
            "89": 2211948544.0,
            "90": 2211948544.0,
            "91": 2211948544.0,
            "92": 2211948544.0,
            "93": 2211948544.0,
            "94": 2211948544.0,
            "95": 2211948544.0,
            "96": 2211948544.0,
            "97": 2211948544.0,
            "98": 2211948544.0,
            "99": 2211948544.0,
            "100": 2211948544.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 8.07511,
            "2": 0.14681,
            "3": 0.10596,
            "4": 0.08711,
            "5": 0.0876,
            "6": 0.08568,
            "7": 0.08664,
            "8": 0.08587,
            "9": 0.08577,
            "10": 0.08621,
            "11": 0.08632,
            "12": 0.08547,
            "13": 0.08657,
            "14": 0.086,
            "15": 0.08713,
            "16": 0.08626,
            "17": 0.0867,
            "18": 0.08636,
            "19": 0.08698,
            "20": 0.08625,
            "21": 0.08785,
            "22": 0.08871,
            "23": 0.08659,
            "24": 0.08847,
            "25": 0.09629,
            "26": 0.09476,
            "27": 0.08553,
            "28": 0.08477,
            "29": 0.08431,
            "30": 0.08434,
            "31": 0.08557,
            "32": 0.08544,
            "33": 0.08488,
            "34": 0.08582,
            "35": 0.08395,
            "36": 0.08398,
            "37": 0.08559,
            "38": 0.08441,
            "39": 0.08418,
            "40": 0.08528,
            "41": 0.0861,
            "42": 0.08685,
            "43": 0.08626,
            "44": 0.08751,
            "45": 0.08791,
            "46": 0.087,
            "47": 0.08684,
            "48": 0.08803,
            "49": 0.08859,
            "50": 0.09019,
            "51": 0.10254,
            "52": 0.09302,
            "53": 0.10544,
            "54": 0.08758,
            "55": 0.0856,
            "56": 0.08575,
            "57": 0.08685,
            "58": 0.08631,
            "59": 0.08389,
            "60": 0.08441,
            "61": 0.08423,
            "62": 0.08509,
            "63": 0.08726,
            "64": 0.08594,
            "65": 0.08568,
            "66": 0.08392,
            "67": 0.08415,
            "68": 0.0849,
            "69": 0.08418,
            "70": 0.08396,
            "71": 0.08448,
            "72": 0.08498,
            "73": 0.08408,
            "74": 0.08475,
            "75": 0.08328,
            "76": 0.08384,
            "77": 0.08424,
            "78": 0.08463,
            "79": 0.0841,
            "80": 0.08431,
            "81": 0.08441,
            "82": 0.0848,
            "83": 0.08442,
            "84": 0.08437,
            "85": 0.08486,
            "86": 0.08464,
            "87": 0.0837,
            "88": 0.0844,
            "89": 0.08503,
            "90": 0.08351,
            "91": 0.0839,
            "92": 0.08423,
            "93": 0.08472,
            "94": 0.08463,
            "95": 0.08455,
            "96": 0.08373,
            "97": 0.08396,
            "98": 0.08358,
            "99": 0.08466,
            "100": 0.08402
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_h100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.85075,
            "52": 9.73181,
            "53": 10.06388,
            "54": 9.95432,
            "55": 9.87204,
            "56": 9.61823,
            "57": 9.47467,
            "58": 9.82802,
            "59": 9.57962,
            "60": 9.49074,
            "61": 9.68473,
            "62": 9.99245,
            "63": 9.38364,
            "64": 9.77766,
            "65": 8.94008,
            "66": 9.70099,
            "67": 9.3605,
            "68": 9.77766,
            "69": 9.78865,
            "70": 9.73813,
            "71": 9.61811,
            "72": 9.58068,
            "73": 9.4964,
            "74": 8.93812,
            "75": 9.42081,
            "76": 9.07416,
            "77": 10.06077,
            "78": 9.71952,
            "79": 9.37088,
            "80": 9.39874,
            "81": 9.47802,
            "82": 9.69299,
            "83": 9.30276,
            "84": 9.41548,
            "85": 9.60883,
            "86": 9.07461,
            "87": 9.58826,
            "88": 9.74392,
            "89": 9.5951,
            "90": 9.81217,
            "91": 9.33796,
            "92": 9.3534,
            "93": 9.07315,
            "94": 8.83127,
            "95": 9.51524,
            "96": 9.52183,
            "97": 9.31012,
            "98": 9.66532,
            "99": 8.88179,
            "100": 9.39375
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2509.0,
            "52": 2528.0,
            "53": 2851.0,
            "54": 2747.0,
            "55": 2333.0,
            "56": 2724.0,
            "57": 2315.0,
            "58": 2754.0,
            "59": 2774.0,
            "60": 2336.0,
            "61": 2912.0,
            "62": 2415.0,
            "63": 2341.0,
            "64": 2837.0,
            "65": 2661.0,
            "66": 3000.0,
            "67": 2779.0,
            "68": 2691.0,
            "69": 2793.0,
            "70": 3183.0,
            "71": 2962.0,
            "72": 2393.0,
            "73": 2997.0,
            "74": 1935.0,
            "75": 2463.0,
            "76": 3065.0,
            "77": 3184.0,
            "78": 3154.0,
            "79": 3127.0,
            "80": 3286.0,
            "81": 3386.0,
            "82": 3128.0,
            "83": 2608.0,
            "84": 3079.0,
            "85": 3260.0,
            "86": 2687.0,
            "87": 3591.0,
            "88": 3035.0,
            "89": 3165.0,
            "90": 3166.0,
            "91": 2690.0,
            "92": 2897.0,
            "93": 2630.0,
            "94": 3348.0,
            "95": 3349.0,
            "96": 3288.0,
            "97": 3055.0,
            "98": 3516.0,
            "99": 3035.0,
            "100": 3109.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 746195456.0,
            "52": 746195456.0,
            "53": 746195456.0,
            "54": 746195456.0,
            "55": 746195456.0,
            "56": 746195456.0,
            "57": 746195456.0,
            "58": 746195456.0,
            "59": 746195456.0,
            "60": 746195456.0,
            "61": 746195456.0,
            "62": 746195456.0,
            "63": 746195456.0,
            "64": 746195456.0,
            "65": 746195456.0,
            "66": 746195456.0,
            "67": 746195456.0,
            "68": 746195456.0,
            "69": 746195456.0,
            "70": 746195456.0,
            "71": 746195456.0,
            "72": 746195456.0,
            "73": 746195456.0,
            "74": 746195456.0,
            "75": 746195456.0,
            "76": 746195456.0,
            "77": 746195456.0,
            "78": 746195456.0,
            "79": 746195456.0,
            "80": 746195456.0,
            "81": 746195456.0,
            "82": 746195456.0,
            "83": 746195456.0,
            "84": 746195456.0,
            "85": 746195456.0,
            "86": 746195456.0,
            "87": 746195456.0,
            "88": 746195456.0,
            "89": 746195456.0,
            "90": 746195456.0,
            "91": 746195456.0,
            "92": 746195456.0,
            "93": 746195456.0,
            "94": 746195456.0,
            "95": 746195456.0,
            "96": 746195456.0,
            "97": 746195456.0,
            "98": 746195456.0,
            "99": 746195456.0,
            "100": 746195456.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2210899968.0,
            "52": 2210900992.0,
            "53": 2210900992.0,
            "54": 2210900992.0,
            "55": 2210900992.0,
            "56": 2210900992.0,
            "57": 2210900992.0,
            "58": 2210900992.0,
            "59": 2210900992.0,
            "60": 2210900992.0,
            "61": 2210900992.0,
            "62": 2210900992.0,
            "63": 2210900992.0,
            "64": 2210900992.0,
            "65": 2210900992.0,
            "66": 2210900992.0,
            "67": 2210900992.0,
            "68": 2210900992.0,
            "69": 2210900992.0,
            "70": 2210900992.0,
            "71": 2210900992.0,
            "72": 2210900992.0,
            "73": 2210900992.0,
            "74": 2210900992.0,
            "75": 2210900992.0,
            "76": 2210900992.0,
            "77": 2210900992.0,
            "78": 2210900992.0,
            "79": 2210900992.0,
            "80": 2210900992.0,
            "81": 2210900992.0,
            "82": 2210900992.0,
            "83": 2210900992.0,
            "84": 2210900992.0,
            "85": 2210900992.0,
            "86": 2210900992.0,
            "87": 2210900992.0,
            "88": 2210900992.0,
            "89": 2210900992.0,
            "90": 2210900992.0,
            "91": 2210900992.0,
            "92": 2210900992.0,
            "93": 2210900992.0,
            "94": 2210900992.0,
            "95": 2210900992.0,
            "96": 2210900992.0,
            "97": 2210900992.0,
            "98": 2210900992.0,
            "99": 2210900992.0,
            "100": 2210900992.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 8.5499,
            "52": 0.12372,
            "53": 0.09645,
            "54": 0.09114,
            "55": 0.08966,
            "56": 0.09034,
            "57": 0.08956,
            "58": 0.09056,
            "59": 0.09042,
            "60": 0.0897,
            "61": 0.09016,
            "62": 0.09046,
            "63": 0.08857,
            "64": 0.08779,
            "65": 0.08907,
            "66": 0.08837,
            "67": 0.08806,
            "68": 0.08776,
            "69": 0.08756,
            "70": 0.08787,
            "71": 0.08828,
            "72": 0.08894,
            "73": 0.08812,
            "74": 0.08757,
            "75": 0.08963,
            "76": 0.09209,
            "77": 0.0916,
            "78": 0.09224,
            "79": 0.09091,
            "80": 0.08695,
            "81": 0.0874,
            "82": 0.08839,
            "83": 0.08746,
            "84": 0.09295,
            "85": 0.09,
            "86": 0.09021,
            "87": 0.09075,
            "88": 0.08904,
            "89": 0.08839,
            "90": 0.08875,
            "91": 0.08852,
            "92": 0.08796,
            "93": 0.08905,
            "94": 0.08832,
            "95": 0.08897,
            "96": 0.08836,
            "97": 0.08869,
            "98": 0.08858,
            "99": 0.08878,
            "100": 0.08832
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.79332,
            "2": 10.80674,
            "3": 10.8069,
            "4": 10.77377,
            "5": 10.84763,
            "6": 10.86617,
            "7": 10.8277,
            "8": 10.81226,
            "9": 10.83507,
            "10": 10.77007,
            "11": 10.89243,
            "12": 10.84575,
            "13": 10.85895,
            "14": 10.883,
            "15": 10.79054,
            "16": 10.78048,
            "17": 10.75666,
            "18": 10.79491,
            "19": 10.79656,
            "20": 10.68058,
            "21": 10.66001,
            "22": 10.50257,
            "23": 10.7118,
            "24": 10.55327,
            "25": 10.50697,
            "26": 10.58286,
            "27": 10.58643,
            "28": 10.55758,
            "29": 10.56227,
            "30": 10.33257,
            "31": 10.08654,
            "32": 10.44724,
            "33": 10.44499,
            "34": 10.19999,
            "35": 10.25465,
            "36": 10.19443,
            "37": 10.32044,
            "38": 10.16641,
            "39": 10.3774,
            "40": 10.05603,
            "41": 10.13739,
            "42": 10.19161,
            "43": 9.80954,
            "44": 9.93054,
            "45": 9.80619,
            "46": 9.81395,
            "47": 10.12881,
            "48": 9.82729,
            "49": 9.51291,
            "50": 9.89126,
            "51": 9.84055,
            "52": 9.73438,
            "53": 10.05482,
            "54": 9.94058,
            "55": 9.87124,
            "56": 9.61045,
            "57": 9.46116,
            "58": 9.81654,
            "59": 9.57887,
            "60": 9.48507,
            "61": 9.68515,
            "62": 9.97438,
            "63": 9.36298,
            "64": 9.76793,
            "65": 8.93913,
            "66": 9.68918,
            "67": 9.36638,
            "68": 9.77507,
            "69": 9.78344,
            "70": 9.72196,
            "71": 9.60806,
            "72": 9.57714,
            "73": 9.48934,
            "74": 8.94008,
            "75": 9.40867,
            "76": 9.08075,
            "77": 10.05717,
            "78": 9.72281,
            "79": 9.36465,
            "80": 9.39746,
            "81": 9.47553,
            "82": 9.6886,
            "83": 9.30263,
            "84": 9.41008,
            "85": 9.60793,
            "86": 9.07115,
            "87": 9.58676,
            "88": 9.74129,
            "89": 9.5986,
            "90": 9.81041,
            "91": 9.33113,
            "92": 9.35502,
            "93": 9.07481,
            "94": 8.82745,
            "95": 9.51149,
            "96": 9.51876,
            "97": 9.30173,
            "98": 9.66726,
            "99": 8.88087,
            "100": 9.39727
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1624.0,
            "2": 1758.0,
            "3": 1678.0,
            "4": 1680.0,
            "5": 1824.0,
            "6": 1899.0,
            "7": 1871.0,
            "8": 1677.0,
            "9": 1802.0,
            "10": 1414.0,
            "11": 1904.0,
            "12": 1630.0,
            "13": 1956.0,
            "14": 1777.0,
            "15": 1902.0,
            "16": 1801.0,
            "17": 1830.0,
            "18": 1630.0,
            "19": 1835.0,
            "20": 1642.0,
            "21": 1752.0,
            "22": 1696.0,
            "23": 2085.0,
            "24": 1618.0,
            "25": 1593.0,
            "26": 1722.0,
            "27": 1778.0,
            "28": 2042.0,
            "29": 1900.0,
            "30": 2001.0,
            "31": 1592.0,
            "32": 1757.0,
            "33": 2116.0,
            "34": 1924.0,
            "35": 1904.0,
            "36": 1852.0,
            "37": 2382.0,
            "38": 2195.0,
            "39": 2267.0,
            "40": 2335.0,
            "41": 2223.0,
            "42": 2317.0,
            "43": 2069.0,
            "44": 2060.0,
            "45": 2140.0,
            "46": 2397.0,
            "47": 2464.0,
            "48": 2455.0,
            "49": 2276.0,
            "50": 2374.0,
            "51": 2574.0,
            "52": 2457.0,
            "53": 2905.0,
            "54": 2609.0,
            "55": 2220.0,
            "56": 2663.0,
            "57": 2258.0,
            "58": 2898.0,
            "59": 2676.0,
            "60": 2397.0,
            "61": 3048.0,
            "62": 2533.0,
            "63": 2370.0,
            "64": 2975.0,
            "65": 2591.0,
            "66": 3065.0,
            "67": 2732.0,
            "68": 2870.0,
            "69": 2955.0,
            "70": 3112.0,
            "71": 2989.0,
            "72": 2451.0,
            "73": 2881.0,
            "74": 1859.0,
            "75": 2649.0,
            "76": 3026.0,
            "77": 3316.0,
            "78": 3212.0,
            "79": 3183.0,
            "80": 3262.0,
            "81": 3669.0,
            "82": 3187.0,
            "83": 2798.0,
            "84": 3209.0,
            "85": 3309.0,
            "86": 2738.0,
            "87": 3804.0,
            "88": 2989.0,
            "89": 3327.0,
            "90": 3031.0,
            "91": 2720.0,
            "92": 2972.0,
            "93": 2719.0,
            "94": 3387.0,
            "95": 3321.0,
            "96": 3342.0,
            "97": 3191.0,
            "98": 3533.0,
            "99": 3214.0,
            "100": 3318.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 716834304.0,
            "2": 716834304.0,
            "3": 716834304.0,
            "4": 716834304.0,
            "5": 716834304.0,
            "6": 716834304.0,
            "7": 716834304.0,
            "8": 716834304.0,
            "9": 716834304.0,
            "10": 716834304.0,
            "11": 716834304.0,
            "12": 716834304.0,
            "13": 716834304.0,
            "14": 716834304.0,
            "15": 716834304.0,
            "16": 716834304.0,
            "17": 716834304.0,
            "18": 716834304.0,
            "19": 716834304.0,
            "20": 716834304.0,
            "21": 716834304.0,
            "22": 716834304.0,
            "23": 716834304.0,
            "24": 716834304.0,
            "25": 716834304.0,
            "26": 716834304.0,
            "27": 716834304.0,
            "28": 716834304.0,
            "29": 716834304.0,
            "30": 716834304.0,
            "31": 716834304.0,
            "32": 716834304.0,
            "33": 716834304.0,
            "34": 716834304.0,
            "35": 716834304.0,
            "36": 716834304.0,
            "37": 716834304.0,
            "38": 716834304.0,
            "39": 716834304.0,
            "40": 716834304.0,
            "41": 716834304.0,
            "42": 716834304.0,
            "43": 716834304.0,
            "44": 716834304.0,
            "45": 716834304.0,
            "46": 716834304.0,
            "47": 716834304.0,
            "48": 716834304.0,
            "49": 716834304.0,
            "50": 716834304.0,
            "51": 716834304.0,
            "52": 716834304.0,
            "53": 716834304.0,
            "54": 716834304.0,
            "55": 716834304.0,
            "56": 716834304.0,
            "57": 716834304.0,
            "58": 716834304.0,
            "59": 716834304.0,
            "60": 716834304.0,
            "61": 716834304.0,
            "62": 716834304.0,
            "63": 716834304.0,
            "64": 716834304.0,
            "65": 716834304.0,
            "66": 716834304.0,
            "67": 716834304.0,
            "68": 716834304.0,
            "69": 716834304.0,
            "70": 716834304.0,
            "71": 716834304.0,
            "72": 716834304.0,
            "73": 716834304.0,
            "74": 716834304.0,
            "75": 716834304.0,
            "76": 716834304.0,
            "77": 716834304.0,
            "78": 716834304.0,
            "79": 716834304.0,
            "80": 716834304.0,
            "81": 716834304.0,
            "82": 716834304.0,
            "83": 716834304.0,
            "84": 716834304.0,
            "85": 716834304.0,
            "86": 716834304.0,
            "87": 716834304.0,
            "88": 716834304.0,
            "89": 716834304.0,
            "90": 716834304.0,
            "91": 716834304.0,
            "92": 716834304.0,
            "93": 716834304.0,
            "94": 716834304.0,
            "95": 716834304.0,
            "96": 716834304.0,
            "97": 716834304.0,
            "98": 716834304.0,
            "99": 716834304.0,
            "100": 716834304.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1910424576.0,
            "2": 2193074176.0,
            "3": 2193074176.0,
            "4": 2193074176.0,
            "5": 2193074176.0,
            "6": 2193074176.0,
            "7": 2193074176.0,
            "8": 2193074176.0,
            "9": 2193074176.0,
            "10": 2193074176.0,
            "11": 2193074176.0,
            "12": 2193074176.0,
            "13": 2193074176.0,
            "14": 2193074176.0,
            "15": 2193074176.0,
            "16": 2193074176.0,
            "17": 2193074176.0,
            "18": 2193074176.0,
            "19": 2193074176.0,
            "20": 2193074176.0,
            "21": 2193074176.0,
            "22": 2193074176.0,
            "23": 2193074176.0,
            "24": 2193074176.0,
            "25": 2193074176.0,
            "26": 2193074176.0,
            "27": 2193074176.0,
            "28": 2193074176.0,
            "29": 2193074176.0,
            "30": 2193074176.0,
            "31": 2193074176.0,
            "32": 2193074176.0,
            "33": 2193074176.0,
            "34": 2193074176.0,
            "35": 2193074176.0,
            "36": 2193074176.0,
            "37": 2193074176.0,
            "38": 2193074176.0,
            "39": 2193074176.0,
            "40": 2193074176.0,
            "41": 2193074176.0,
            "42": 2193074176.0,
            "43": 2193074176.0,
            "44": 2193074176.0,
            "45": 2193074176.0,
            "46": 2193074176.0,
            "47": 2193074176.0,
            "48": 2193074176.0,
            "49": 2193074176.0,
            "50": 2193074176.0,
            "51": 2193074176.0,
            "52": 2193074176.0,
            "53": 2193074176.0,
            "54": 2193074176.0,
            "55": 2193074176.0,
            "56": 2193074176.0,
            "57": 2193074176.0,
            "58": 2193074176.0,
            "59": 2193074176.0,
            "60": 2193074176.0,
            "61": 2193074176.0,
            "62": 2193074176.0,
            "63": 2193074176.0,
            "64": 2193074176.0,
            "65": 2193074176.0,
            "66": 2193074176.0,
            "67": 2193074176.0,
            "68": 2193074176.0,
            "69": 2193074176.0,
            "70": 2193074176.0,
            "71": 2193074176.0,
            "72": 2193074176.0,
            "73": 2193074176.0,
            "74": 2193074176.0,
            "75": 2193074176.0,
            "76": 2193074176.0,
            "77": 2193074176.0,
            "78": 2193074176.0,
            "79": 2193074176.0,
            "80": 2193074176.0,
            "81": 2193074176.0,
            "82": 2193074176.0,
            "83": 2193074176.0,
            "84": 2193074176.0,
            "85": 2193074176.0,
            "86": 2193074176.0,
            "87": 2193074176.0,
            "88": 2193074176.0,
            "89": 2193074176.0,
            "90": 2193074176.0,
            "91": 2193074176.0,
            "92": 2193074176.0,
            "93": 2193074176.0,
            "94": 2193074176.0,
            "95": 2193074176.0,
            "96": 2193074176.0,
            "97": 2193074176.0,
            "98": 2193074176.0,
            "99": 2193074176.0,
            "100": 2193074176.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 9.17541,
            "2": 0.17628,
            "3": 0.14823,
            "4": 0.14828,
            "5": 0.14489,
            "6": 0.14681,
            "7": 0.14589,
            "8": 0.14567,
            "9": 0.14899,
            "10": 0.14748,
            "11": 0.1469,
            "12": 0.14571,
            "13": 0.14519,
            "14": 0.14594,
            "15": 0.14553,
            "16": 0.1461,
            "17": 0.14672,
            "18": 0.14616,
            "19": 0.1455,
            "20": 0.14588,
            "21": 0.14801,
            "22": 0.14714,
            "23": 0.14721,
            "24": 0.14624,
            "25": 0.1462,
            "26": 0.14548,
            "27": 0.14684,
            "28": 0.14765,
            "29": 0.14671,
            "30": 0.14515,
            "31": 0.14617,
            "32": 0.14666,
            "33": 0.14596,
            "34": 0.14868,
            "35": 0.14573,
            "36": 0.14694,
            "37": 0.14585,
            "38": 0.14605,
            "39": 0.14599,
            "40": 0.14558,
            "41": 0.14673,
            "42": 0.14745,
            "43": 0.1456,
            "44": 0.14744,
            "45": 0.14524,
            "46": 0.14572,
            "47": 0.14533,
            "48": 0.14632,
            "49": 0.14734,
            "50": 0.1453,
            "51": 0.16371,
            "52": 0.14839,
            "53": 0.17852,
            "54": 0.14579,
            "55": 0.14651,
            "56": 0.14872,
            "57": 0.14723,
            "58": 0.14775,
            "59": 0.14896,
            "60": 0.14649,
            "61": 0.14672,
            "62": 0.14696,
            "63": 0.14572,
            "64": 0.14639,
            "65": 0.14739,
            "66": 0.14722,
            "67": 0.14732,
            "68": 0.14566,
            "69": 0.14664,
            "70": 0.14693,
            "71": 0.14641,
            "72": 0.14742,
            "73": 0.14691,
            "74": 0.1482,
            "75": 0.15006,
            "76": 0.146,
            "77": 0.14585,
            "78": 0.14677,
            "79": 0.14716,
            "80": 0.14605,
            "81": 0.14678,
            "82": 0.14648,
            "83": 0.14624,
            "84": 0.14639,
            "85": 0.14622,
            "86": 0.14829,
            "87": 0.14591,
            "88": 0.14541,
            "89": 0.14865,
            "90": 0.14587,
            "91": 0.14618,
            "92": 0.14625,
            "93": 0.14624,
            "94": 0.14583,
            "95": 0.14675,
            "96": 0.14876,
            "97": 0.14645,
            "98": 0.14588,
            "99": 0.14617,
            "100": 0.14618
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.79332,
            "2": 10.80674,
            "3": 10.8069,
            "4": 10.77377,
            "5": 10.84763,
            "6": 10.86617,
            "7": 10.8277,
            "8": 10.81226,
            "9": 10.83507,
            "10": 10.77007,
            "11": 10.89243,
            "12": 10.84575,
            "13": 10.85895,
            "14": 10.883,
            "15": 10.79054,
            "16": 10.78048,
            "17": 10.75666,
            "18": 10.79491,
            "19": 10.79656,
            "20": 10.68058,
            "21": 10.66001,
            "22": 10.50257,
            "23": 10.7118,
            "24": 10.55327,
            "25": 10.50697,
            "26": 10.58286,
            "27": 10.58643,
            "28": 10.55758,
            "29": 10.56227,
            "30": 10.33257,
            "31": 10.08654,
            "32": 10.44724,
            "33": 10.44499,
            "34": 10.19999,
            "35": 10.25465,
            "36": 10.19443,
            "37": 10.32044,
            "38": 10.16641,
            "39": 10.3774,
            "40": 10.05603,
            "41": 10.13739,
            "42": 10.19161,
            "43": 9.80954,
            "44": 9.93054,
            "45": 9.80619,
            "46": 9.81395,
            "47": 10.12881,
            "48": 9.82729,
            "49": 9.51291,
            "50": 9.89126,
            "51": 9.84055,
            "52": 9.73438,
            "53": 10.05482,
            "54": 9.94058,
            "55": 9.87124,
            "56": 9.61045,
            "57": 9.46116,
            "58": 9.81654,
            "59": 9.57887,
            "60": 9.48507,
            "61": 9.68515,
            "62": 9.97438,
            "63": 9.36298,
            "64": 9.76793,
            "65": 8.93913,
            "66": 9.68918,
            "67": 9.36638,
            "68": 9.77507,
            "69": 9.78344,
            "70": 9.72196,
            "71": 9.60806,
            "72": 9.57714,
            "73": 9.48934,
            "74": 8.94008,
            "75": 9.40867,
            "76": 9.08075,
            "77": 10.05717,
            "78": 9.72281,
            "79": 9.36465,
            "80": 9.39746,
            "81": 9.47553,
            "82": 9.6886,
            "83": 9.30263,
            "84": 9.41008,
            "85": 9.60793,
            "86": 9.07115,
            "87": 9.58676,
            "88": 9.74129,
            "89": 9.5986,
            "90": 9.81041,
            "91": 9.33113,
            "92": 9.35502,
            "93": 9.07481,
            "94": 8.82745,
            "95": 9.51149,
            "96": 9.51876,
            "97": 9.30173,
            "98": 9.66726,
            "99": 8.88087,
            "100": 9.39727
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1624.0,
            "2": 1758.0,
            "3": 1678.0,
            "4": 1680.0,
            "5": 1824.0,
            "6": 1899.0,
            "7": 1871.0,
            "8": 1677.0,
            "9": 1802.0,
            "10": 1414.0,
            "11": 1904.0,
            "12": 1630.0,
            "13": 1956.0,
            "14": 1777.0,
            "15": 1902.0,
            "16": 1801.0,
            "17": 1830.0,
            "18": 1630.0,
            "19": 1835.0,
            "20": 1642.0,
            "21": 1752.0,
            "22": 1696.0,
            "23": 2085.0,
            "24": 1618.0,
            "25": 1593.0,
            "26": 1722.0,
            "27": 1778.0,
            "28": 2042.0,
            "29": 1900.0,
            "30": 2001.0,
            "31": 1592.0,
            "32": 1757.0,
            "33": 2116.0,
            "34": 1924.0,
            "35": 1904.0,
            "36": 1852.0,
            "37": 2382.0,
            "38": 2195.0,
            "39": 2267.0,
            "40": 2335.0,
            "41": 2223.0,
            "42": 2317.0,
            "43": 2069.0,
            "44": 2060.0,
            "45": 2140.0,
            "46": 2397.0,
            "47": 2464.0,
            "48": 2455.0,
            "49": 2276.0,
            "50": 2374.0,
            "51": 2574.0,
            "52": 2457.0,
            "53": 2905.0,
            "54": 2609.0,
            "55": 2220.0,
            "56": 2663.0,
            "57": 2258.0,
            "58": 2898.0,
            "59": 2676.0,
            "60": 2397.0,
            "61": 3048.0,
            "62": 2533.0,
            "63": 2370.0,
            "64": 2975.0,
            "65": 2591.0,
            "66": 3065.0,
            "67": 2732.0,
            "68": 2870.0,
            "69": 2955.0,
            "70": 3112.0,
            "71": 2989.0,
            "72": 2451.0,
            "73": 2881.0,
            "74": 1859.0,
            "75": 2649.0,
            "76": 3026.0,
            "77": 3316.0,
            "78": 3212.0,
            "79": 3183.0,
            "80": 3262.0,
            "81": 3669.0,
            "82": 3187.0,
            "83": 2798.0,
            "84": 3209.0,
            "85": 3309.0,
            "86": 2738.0,
            "87": 3804.0,
            "88": 2989.0,
            "89": 3327.0,
            "90": 3031.0,
            "91": 2720.0,
            "92": 2972.0,
            "93": 2719.0,
            "94": 3387.0,
            "95": 3321.0,
            "96": 3342.0,
            "97": 3191.0,
            "98": 3533.0,
            "99": 3214.0,
            "100": 3318.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 716834304.0,
            "2": 716834304.0,
            "3": 716834304.0,
            "4": 716834304.0,
            "5": 716834304.0,
            "6": 716834304.0,
            "7": 716834304.0,
            "8": 716834304.0,
            "9": 716834304.0,
            "10": 716834304.0,
            "11": 716834304.0,
            "12": 716834304.0,
            "13": 716834304.0,
            "14": 716834304.0,
            "15": 716834304.0,
            "16": 716834304.0,
            "17": 716834304.0,
            "18": 716834304.0,
            "19": 716834304.0,
            "20": 716834304.0,
            "21": 716834304.0,
            "22": 716834304.0,
            "23": 716834304.0,
            "24": 716834304.0,
            "25": 716834304.0,
            "26": 716834304.0,
            "27": 716834304.0,
            "28": 716834304.0,
            "29": 716834304.0,
            "30": 716834304.0,
            "31": 716834304.0,
            "32": 716834304.0,
            "33": 716834304.0,
            "34": 716834304.0,
            "35": 716834304.0,
            "36": 716834304.0,
            "37": 716834304.0,
            "38": 716834304.0,
            "39": 716834304.0,
            "40": 716834304.0,
            "41": 716834304.0,
            "42": 716834304.0,
            "43": 716834304.0,
            "44": 716834304.0,
            "45": 716834304.0,
            "46": 716834304.0,
            "47": 716834304.0,
            "48": 716834304.0,
            "49": 716834304.0,
            "50": 716834304.0,
            "51": 716834304.0,
            "52": 716834304.0,
            "53": 716834304.0,
            "54": 716834304.0,
            "55": 716834304.0,
            "56": 716834304.0,
            "57": 716834304.0,
            "58": 716834304.0,
            "59": 716834304.0,
            "60": 716834304.0,
            "61": 716834304.0,
            "62": 716834304.0,
            "63": 716834304.0,
            "64": 716834304.0,
            "65": 716834304.0,
            "66": 716834304.0,
            "67": 716834304.0,
            "68": 716834304.0,
            "69": 716834304.0,
            "70": 716834304.0,
            "71": 716834304.0,
            "72": 716834304.0,
            "73": 716834304.0,
            "74": 716834304.0,
            "75": 716834304.0,
            "76": 716834304.0,
            "77": 716834304.0,
            "78": 716834304.0,
            "79": 716834304.0,
            "80": 716834304.0,
            "81": 716834304.0,
            "82": 716834304.0,
            "83": 716834304.0,
            "84": 716834304.0,
            "85": 716834304.0,
            "86": 716834304.0,
            "87": 716834304.0,
            "88": 716834304.0,
            "89": 716834304.0,
            "90": 716834304.0,
            "91": 716834304.0,
            "92": 716834304.0,
            "93": 716834304.0,
            "94": 716834304.0,
            "95": 716834304.0,
            "96": 716834304.0,
            "97": 716834304.0,
            "98": 716834304.0,
            "99": 716834304.0,
            "100": 716834304.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1910424576.0,
            "2": 2193074176.0,
            "3": 2193074176.0,
            "4": 2193074176.0,
            "5": 2193074176.0,
            "6": 2193074176.0,
            "7": 2193074176.0,
            "8": 2193074176.0,
            "9": 2193074176.0,
            "10": 2193074176.0,
            "11": 2193074176.0,
            "12": 2193074176.0,
            "13": 2193074176.0,
            "14": 2193074176.0,
            "15": 2193074176.0,
            "16": 2193074176.0,
            "17": 2193074176.0,
            "18": 2193074176.0,
            "19": 2193074176.0,
            "20": 2193074176.0,
            "21": 2193074176.0,
            "22": 2193074176.0,
            "23": 2193074176.0,
            "24": 2193074176.0,
            "25": 2193074176.0,
            "26": 2193074176.0,
            "27": 2193074176.0,
            "28": 2193074176.0,
            "29": 2193074176.0,
            "30": 2193074176.0,
            "31": 2193074176.0,
            "32": 2193074176.0,
            "33": 2193074176.0,
            "34": 2193074176.0,
            "35": 2193074176.0,
            "36": 2193074176.0,
            "37": 2193074176.0,
            "38": 2193074176.0,
            "39": 2193074176.0,
            "40": 2193074176.0,
            "41": 2193074176.0,
            "42": 2193074176.0,
            "43": 2193074176.0,
            "44": 2193074176.0,
            "45": 2193074176.0,
            "46": 2193074176.0,
            "47": 2193074176.0,
            "48": 2193074176.0,
            "49": 2193074176.0,
            "50": 2193074176.0,
            "51": 2193074176.0,
            "52": 2193074176.0,
            "53": 2193074176.0,
            "54": 2193074176.0,
            "55": 2193074176.0,
            "56": 2193074176.0,
            "57": 2193074176.0,
            "58": 2193074176.0,
            "59": 2193074176.0,
            "60": 2193074176.0,
            "61": 2193074176.0,
            "62": 2193074176.0,
            "63": 2193074176.0,
            "64": 2193074176.0,
            "65": 2193074176.0,
            "66": 2193074176.0,
            "67": 2193074176.0,
            "68": 2193074176.0,
            "69": 2193074176.0,
            "70": 2193074176.0,
            "71": 2193074176.0,
            "72": 2193074176.0,
            "73": 2193074176.0,
            "74": 2193074176.0,
            "75": 2193074176.0,
            "76": 2193074176.0,
            "77": 2193074176.0,
            "78": 2193074176.0,
            "79": 2193074176.0,
            "80": 2193074176.0,
            "81": 2193074176.0,
            "82": 2193074176.0,
            "83": 2193074176.0,
            "84": 2193074176.0,
            "85": 2193074176.0,
            "86": 2193074176.0,
            "87": 2193074176.0,
            "88": 2193074176.0,
            "89": 2193074176.0,
            "90": 2193074176.0,
            "91": 2193074176.0,
            "92": 2193074176.0,
            "93": 2193074176.0,
            "94": 2193074176.0,
            "95": 2193074176.0,
            "96": 2193074176.0,
            "97": 2193074176.0,
            "98": 2193074176.0,
            "99": 2193074176.0,
            "100": 2193074176.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 3.59409,
            "2": 0.17465,
            "3": 0.16266,
            "4": 0.1495,
            "5": 0.14527,
            "6": 0.14428,
            "7": 0.14381,
            "8": 0.14313,
            "9": 0.14427,
            "10": 0.14389,
            "11": 0.1443,
            "12": 0.14275,
            "13": 0.1429,
            "14": 0.14279,
            "15": 0.14378,
            "16": 0.14358,
            "17": 0.14299,
            "18": 0.14217,
            "19": 0.14256,
            "20": 0.14345,
            "21": 0.14367,
            "22": 0.14305,
            "23": 0.14257,
            "24": 0.14186,
            "25": 0.1423,
            "26": 0.14156,
            "27": 0.14279,
            "28": 0.14152,
            "29": 0.14248,
            "30": 0.14222,
            "31": 0.14276,
            "32": 0.14268,
            "33": 0.14313,
            "34": 0.14133,
            "35": 0.14312,
            "36": 0.14147,
            "37": 0.14217,
            "38": 0.14071,
            "39": 0.14226,
            "40": 0.14163,
            "41": 0.14393,
            "42": 0.14189,
            "43": 0.14266,
            "44": 0.14185,
            "45": 0.1438,
            "46": 0.14173,
            "47": 0.14272,
            "48": 0.14379,
            "49": 0.14245,
            "50": 0.1422,
            "51": 0.1491,
            "52": 0.16902,
            "53": 0.14276,
            "54": 0.14121,
            "55": 0.14203,
            "56": 0.14111,
            "57": 0.14215,
            "58": 0.14121,
            "59": 0.14274,
            "60": 0.14079,
            "61": 0.14212,
            "62": 0.14078,
            "63": 0.14277,
            "64": 0.14264,
            "65": 0.14256,
            "66": 0.14207,
            "67": 0.14426,
            "68": 0.14138,
            "69": 0.14293,
            "70": 0.1423,
            "71": 0.14265,
            "72": 0.14181,
            "73": 0.14253,
            "74": 0.14239,
            "75": 0.1436,
            "76": 0.14184,
            "77": 0.14185,
            "78": 0.14261,
            "79": 0.14322,
            "80": 0.14295,
            "81": 0.14304,
            "82": 0.14307,
            "83": 0.14253,
            "84": 0.14179,
            "85": 0.14257,
            "86": 0.14198,
            "87": 0.15027,
            "88": 0.14143,
            "89": 0.14408,
            "90": 0.14207,
            "91": 0.14351,
            "92": 0.14216,
            "93": 0.14223,
            "94": 0.14137,
            "95": 0.14285,
            "96": 0.14202,
            "97": 0.14246,
            "98": 0.1411,
            "99": 0.14199,
            "100": 0.14181
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgx_a100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.84055,
            "52": 9.73438,
            "53": 10.05482,
            "54": 9.94058,
            "55": 9.87124,
            "56": 9.61045,
            "57": 9.46116,
            "58": 9.81654,
            "59": 9.57887,
            "60": 9.48507,
            "61": 9.68515,
            "62": 9.97438,
            "63": 9.36298,
            "64": 9.76793,
            "65": 8.93913,
            "66": 9.68918,
            "67": 9.36638,
            "68": 9.77507,
            "69": 9.78344,
            "70": 9.72196,
            "71": 9.60806,
            "72": 9.57714,
            "73": 9.48934,
            "74": 8.94008,
            "75": 9.40867,
            "76": 9.08075,
            "77": 10.05717,
            "78": 9.72281,
            "79": 9.36465,
            "80": 9.39746,
            "81": 9.47553,
            "82": 9.6886,
            "83": 9.30263,
            "84": 9.41008,
            "85": 9.60793,
            "86": 9.07115,
            "87": 9.58676,
            "88": 9.74129,
            "89": 9.5986,
            "90": 9.81041,
            "91": 9.33113,
            "92": 9.35502,
            "93": 9.07481,
            "94": 8.82745,
            "95": 9.51149,
            "96": 9.51876,
            "97": 9.30173,
            "98": 9.66726,
            "99": 8.88087,
            "100": 9.39727
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2574.0,
            "52": 2457.0,
            "53": 2905.0,
            "54": 2609.0,
            "55": 2220.0,
            "56": 2663.0,
            "57": 2258.0,
            "58": 2898.0,
            "59": 2676.0,
            "60": 2397.0,
            "61": 3048.0,
            "62": 2533.0,
            "63": 2370.0,
            "64": 2975.0,
            "65": 2591.0,
            "66": 3065.0,
            "67": 2732.0,
            "68": 2870.0,
            "69": 2955.0,
            "70": 3112.0,
            "71": 2989.0,
            "72": 2451.0,
            "73": 2881.0,
            "74": 1859.0,
            "75": 2649.0,
            "76": 3026.0,
            "77": 3316.0,
            "78": 3212.0,
            "79": 3183.0,
            "80": 3262.0,
            "81": 3669.0,
            "82": 3187.0,
            "83": 2798.0,
            "84": 3209.0,
            "85": 3309.0,
            "86": 2738.0,
            "87": 3804.0,
            "88": 2989.0,
            "89": 3327.0,
            "90": 3031.0,
            "91": 2720.0,
            "92": 2972.0,
            "93": 2719.0,
            "94": 3387.0,
            "95": 3321.0,
            "96": 3342.0,
            "97": 3191.0,
            "98": 3533.0,
            "99": 3214.0,
            "100": 3318.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 716834304.0,
            "52": 716834304.0,
            "53": 716834304.0,
            "54": 716834304.0,
            "55": 716834304.0,
            "56": 716834304.0,
            "57": 716834304.0,
            "58": 716834304.0,
            "59": 716834304.0,
            "60": 716834304.0,
            "61": 716834304.0,
            "62": 716834304.0,
            "63": 716834304.0,
            "64": 716834304.0,
            "65": 716834304.0,
            "66": 716834304.0,
            "67": 716834304.0,
            "68": 716834304.0,
            "69": 716834304.0,
            "70": 716834304.0,
            "71": 716834304.0,
            "72": 716834304.0,
            "73": 716834304.0,
            "74": 716834304.0,
            "75": 716834304.0,
            "76": 716834304.0,
            "77": 716834304.0,
            "78": 716834304.0,
            "79": 716834304.0,
            "80": 716834304.0,
            "81": 716834304.0,
            "82": 716834304.0,
            "83": 716834304.0,
            "84": 716834304.0,
            "85": 716834304.0,
            "86": 716834304.0,
            "87": 716834304.0,
            "88": 716834304.0,
            "89": 716834304.0,
            "90": 716834304.0,
            "91": 716834304.0,
            "92": 716834304.0,
            "93": 716834304.0,
            "94": 716834304.0,
            "95": 716834304.0,
            "96": 716834304.0,
            "97": 716834304.0,
            "98": 716834304.0,
            "99": 716834304.0,
            "100": 716834304.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2194121728.0,
            "52": 2194122752.0,
            "53": 2194122752.0,
            "54": 2194122752.0,
            "55": 2194122752.0,
            "56": 2194122752.0,
            "57": 2194122752.0,
            "58": 2194122752.0,
            "59": 2194122752.0,
            "60": 2194122752.0,
            "61": 2194122752.0,
            "62": 2194122752.0,
            "63": 2194122752.0,
            "64": 2194122752.0,
            "65": 2194122752.0,
            "66": 2194122752.0,
            "67": 2194122752.0,
            "68": 2194122752.0,
            "69": 2194122752.0,
            "70": 2194122752.0,
            "71": 2194122752.0,
            "72": 2194122752.0,
            "73": 2194122752.0,
            "74": 2194122752.0,
            "75": 2194122752.0,
            "76": 2194122752.0,
            "77": 2194122752.0,
            "78": 2194122752.0,
            "79": 2194122752.0,
            "80": 2194122752.0,
            "81": 2194122752.0,
            "82": 2194122752.0,
            "83": 2194122752.0,
            "84": 2194122752.0,
            "85": 2194122752.0,
            "86": 2194122752.0,
            "87": 2194122752.0,
            "88": 2194122752.0,
            "89": 2194122752.0,
            "90": 2194122752.0,
            "91": 2194122752.0,
            "92": 2194122752.0,
            "93": 2194122752.0,
            "94": 2194122752.0,
            "95": 2194122752.0,
            "96": 2194122752.0,
            "97": 2194122752.0,
            "98": 2194122752.0,
            "99": 2194122752.0,
            "100": 2194122752.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 8.8238,
            "52": 0.63078,
            "53": 0.15101,
            "54": 0.14953,
            "55": 0.15024,
            "56": 0.14932,
            "57": 0.15011,
            "58": 0.15001,
            "59": 0.15206,
            "60": 0.14938,
            "61": 0.1487,
            "62": 0.14818,
            "63": 0.14803,
            "64": 0.15056,
            "65": 0.14975,
            "66": 0.14796,
            "67": 0.14853,
            "68": 0.14679,
            "69": 0.14809,
            "70": 0.14665,
            "71": 0.14693,
            "72": 0.1481,
            "73": 0.14536,
            "74": 0.14342,
            "75": 0.14313,
            "76": 0.14287,
            "77": 0.14085,
            "78": 0.14168,
            "79": 0.14286,
            "80": 0.14201,
            "81": 0.14225,
            "82": 0.14262,
            "83": 0.14349,
            "84": 0.14179,
            "85": 0.14222,
            "86": 0.14195,
            "87": 0.14171,
            "88": 0.14105,
            "89": 0.14252,
            "90": 0.14411,
            "91": 0.1446,
            "92": 0.14295,
            "93": 0.14308,
            "94": 0.14176,
            "95": 0.14267,
            "96": 0.14302,
            "97": 0.14305,
            "98": 0.14273,
            "99": 0.14183,
            "100": 0.14202
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgxa100_dracooci-ord.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.79205,
            "2": 10.80272,
            "3": 10.80707,
            "4": 10.77315,
            "5": 10.84695,
            "6": 10.86789,
            "7": 10.82655,
            "8": 10.81333,
            "9": 10.83441,
            "10": 10.77106,
            "11": 10.89149,
            "12": 10.84617,
            "13": 10.85969,
            "14": 10.8812,
            "15": 10.79093,
            "16": 10.78328,
            "17": 10.75926,
            "18": 10.79337,
            "19": 10.797,
            "20": 10.68042,
            "21": 10.66126,
            "22": 10.50248,
            "23": 10.71375,
            "24": 10.55253,
            "25": 10.50715,
            "26": 10.58275,
            "27": 10.58672,
            "28": 10.55873,
            "29": 10.56101,
            "30": 10.33325,
            "31": 10.08467,
            "32": 10.44744,
            "33": 10.44372,
            "34": 10.2003,
            "35": 10.25545,
            "36": 10.19448,
            "37": 10.32113,
            "38": 10.1659,
            "39": 10.37726,
            "40": 10.05544,
            "41": 10.13785,
            "42": 10.19159,
            "43": 9.80956,
            "44": 9.92967,
            "45": 9.80575,
            "46": 9.81454,
            "47": 10.12933,
            "48": 9.82644,
            "49": 9.51395,
            "50": 9.89082,
            "51": 9.8397,
            "52": 9.73412,
            "53": 10.05515,
            "54": 9.94093,
            "55": 9.87063,
            "56": 9.61009,
            "57": 9.46055,
            "58": 9.81541,
            "59": 9.57905,
            "60": 9.48478,
            "61": 9.68485,
            "62": 9.97574,
            "63": 9.36483,
            "64": 9.76838,
            "65": 8.94022,
            "66": 9.68864,
            "67": 9.36647,
            "68": 9.77611,
            "69": 9.78404,
            "70": 9.72243,
            "71": 9.6082,
            "72": 9.57758,
            "73": 9.48936,
            "74": 8.9399,
            "75": 9.40907,
            "76": 9.08135,
            "77": 10.05639,
            "78": 9.72293,
            "79": 9.36509,
            "80": 9.3976,
            "81": 9.47445,
            "82": 9.68843,
            "83": 9.30263,
            "84": 9.4102,
            "85": 9.60746,
            "86": 9.07122,
            "87": 9.58742,
            "88": 9.74129,
            "89": 9.59922,
            "90": 9.81041,
            "91": 9.33141,
            "92": 9.35529,
            "93": 9.07461,
            "94": 8.82759,
            "95": 9.5116,
            "96": 9.51899,
            "97": 9.30162,
            "98": 9.66741,
            "99": 8.88218,
            "100": 9.39722
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1580.0,
            "2": 1686.0,
            "3": 1726.0,
            "4": 1795.0,
            "5": 1901.0,
            "6": 1778.0,
            "7": 1963.0,
            "8": 1704.0,
            "9": 1811.0,
            "10": 1346.0,
            "11": 1849.0,
            "12": 1683.0,
            "13": 1888.0,
            "14": 1711.0,
            "15": 1926.0,
            "16": 1841.0,
            "17": 1931.0,
            "18": 1716.0,
            "19": 1765.0,
            "20": 1643.0,
            "21": 1884.0,
            "22": 1626.0,
            "23": 1954.0,
            "24": 1715.0,
            "25": 1683.0,
            "26": 1679.0,
            "27": 1817.0,
            "28": 2019.0,
            "29": 1946.0,
            "30": 1867.0,
            "31": 1544.0,
            "32": 1832.0,
            "33": 2119.0,
            "34": 1921.0,
            "35": 2020.0,
            "36": 1953.0,
            "37": 2350.0,
            "38": 2210.0,
            "39": 2319.0,
            "40": 2252.0,
            "41": 2449.0,
            "42": 2364.0,
            "43": 2089.0,
            "44": 2094.0,
            "45": 2243.0,
            "46": 2335.0,
            "47": 2406.0,
            "48": 2410.0,
            "49": 2341.0,
            "50": 2459.0,
            "51": 2611.0,
            "52": 2427.0,
            "53": 2838.0,
            "54": 2632.0,
            "55": 2291.0,
            "56": 2663.0,
            "57": 2276.0,
            "58": 2777.0,
            "59": 2601.0,
            "60": 2404.0,
            "61": 2985.0,
            "62": 2595.0,
            "63": 2454.0,
            "64": 3101.0,
            "65": 2474.0,
            "66": 3006.0,
            "67": 2671.0,
            "68": 2874.0,
            "69": 2956.0,
            "70": 3102.0,
            "71": 2891.0,
            "72": 2543.0,
            "73": 2860.0,
            "74": 1888.0,
            "75": 2603.0,
            "76": 2813.0,
            "77": 3361.0,
            "78": 3252.0,
            "79": 3007.0,
            "80": 3420.0,
            "81": 3624.0,
            "82": 3184.0,
            "83": 2708.0,
            "84": 3138.0,
            "85": 3388.0,
            "86": 2619.0,
            "87": 3682.0,
            "88": 3074.0,
            "89": 3260.0,
            "90": 2904.0,
            "91": 2634.0,
            "92": 3097.0,
            "93": 2745.0,
            "94": 3484.0,
            "95": 3333.0,
            "96": 3292.0,
            "97": 3141.0,
            "98": 3550.0,
            "99": 3170.0,
            "100": 3347.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 714736640.0,
            "2": 714736640.0,
            "3": 714736640.0,
            "4": 714736640.0,
            "5": 714736640.0,
            "6": 714736640.0,
            "7": 714736640.0,
            "8": 714736640.0,
            "9": 714736640.0,
            "10": 714736640.0,
            "11": 714736640.0,
            "12": 714736640.0,
            "13": 714736640.0,
            "14": 714736640.0,
            "15": 714736640.0,
            "16": 714736640.0,
            "17": 714736640.0,
            "18": 714736640.0,
            "19": 714736640.0,
            "20": 714736640.0,
            "21": 714736640.0,
            "22": 714736640.0,
            "23": 714736640.0,
            "24": 714736640.0,
            "25": 714736640.0,
            "26": 714736640.0,
            "27": 714736640.0,
            "28": 714736640.0,
            "29": 714736640.0,
            "30": 714736640.0,
            "31": 714736640.0,
            "32": 714736640.0,
            "33": 714736640.0,
            "34": 714736640.0,
            "35": 714736640.0,
            "36": 714736640.0,
            "37": 714736640.0,
            "38": 714736640.0,
            "39": 714736640.0,
            "40": 714736640.0,
            "41": 714736640.0,
            "42": 714736640.0,
            "43": 714736640.0,
            "44": 714736640.0,
            "45": 714736640.0,
            "46": 714736640.0,
            "47": 714736640.0,
            "48": 714736640.0,
            "49": 714736640.0,
            "50": 714736640.0,
            "51": 714736640.0,
            "52": 714736640.0,
            "53": 714736640.0,
            "54": 714736640.0,
            "55": 714736640.0,
            "56": 714736640.0,
            "57": 714736640.0,
            "58": 714736640.0,
            "59": 714736640.0,
            "60": 714736640.0,
            "61": 714736640.0,
            "62": 714736640.0,
            "63": 714736640.0,
            "64": 714736640.0,
            "65": 714736640.0,
            "66": 714736640.0,
            "67": 714736640.0,
            "68": 714736640.0,
            "69": 714736640.0,
            "70": 714736640.0,
            "71": 714736640.0,
            "72": 714736640.0,
            "73": 714736640.0,
            "74": 714736640.0,
            "75": 714736640.0,
            "76": 714736640.0,
            "77": 714736640.0,
            "78": 714736640.0,
            "79": 714736640.0,
            "80": 714736640.0,
            "81": 714736640.0,
            "82": 714736640.0,
            "83": 714736640.0,
            "84": 714736640.0,
            "85": 714736640.0,
            "86": 714736640.0,
            "87": 714736640.0,
            "88": 714736640.0,
            "89": 714736640.0,
            "90": 714736640.0,
            "91": 714736640.0,
            "92": 714736640.0,
            "93": 714736640.0,
            "94": 714736640.0,
            "95": 714736640.0,
            "96": 714736640.0,
            "97": 714736640.0,
            "98": 714736640.0,
            "99": 714736640.0,
            "100": 714736640.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2399714304.0,
            "2": 2681315328.0,
            "3": 2681315328.0,
            "4": 2681315328.0,
            "5": 2681315328.0,
            "6": 2681315328.0,
            "7": 2681315328.0,
            "8": 2681315328.0,
            "9": 2681315328.0,
            "10": 2681315328.0,
            "11": 2681315328.0,
            "12": 2681315328.0,
            "13": 2681315328.0,
            "14": 2681315328.0,
            "15": 2681315328.0,
            "16": 2681315328.0,
            "17": 2681315328.0,
            "18": 2681315328.0,
            "19": 2681315328.0,
            "20": 2681315328.0,
            "21": 2681315328.0,
            "22": 2681315328.0,
            "23": 2681315328.0,
            "24": 2681315328.0,
            "25": 2681315328.0,
            "26": 2681315328.0,
            "27": 2681315328.0,
            "28": 2681315328.0,
            "29": 2681315328.0,
            "30": 2681315328.0,
            "31": 2681315328.0,
            "32": 2681315328.0,
            "33": 2681315328.0,
            "34": 2681315328.0,
            "35": 2681315328.0,
            "36": 2681315328.0,
            "37": 2681315328.0,
            "38": 2681315328.0,
            "39": 2681315328.0,
            "40": 2681315328.0,
            "41": 2681315328.0,
            "42": 2681315328.0,
            "43": 2681315328.0,
            "44": 2681315328.0,
            "45": 2681315328.0,
            "46": 2681315328.0,
            "47": 2681315328.0,
            "48": 2681315328.0,
            "49": 2681315328.0,
            "50": 2681315328.0,
            "51": 2681315328.0,
            "52": 2681315328.0,
            "53": 2681315328.0,
            "54": 2681315328.0,
            "55": 2681315328.0,
            "56": 2681315328.0,
            "57": 2681315328.0,
            "58": 2681315328.0,
            "59": 2681315328.0,
            "60": 2681315328.0,
            "61": 2681315328.0,
            "62": 2681315328.0,
            "63": 2681315328.0,
            "64": 2681315328.0,
            "65": 2681315328.0,
            "66": 2681315328.0,
            "67": 2681315328.0,
            "68": 2681315328.0,
            "69": 2681315328.0,
            "70": 2681315328.0,
            "71": 2681315328.0,
            "72": 2681315328.0,
            "73": 2681315328.0,
            "74": 2681315328.0,
            "75": 2681315328.0,
            "76": 2681315328.0,
            "77": 2681315328.0,
            "78": 2681315328.0,
            "79": 2681315328.0,
            "80": 2681315328.0,
            "81": 2681315328.0,
            "82": 2681315328.0,
            "83": 2681315328.0,
            "84": 2681315328.0,
            "85": 2681315328.0,
            "86": 2681315328.0,
            "87": 2681315328.0,
            "88": 2681315328.0,
            "89": 2681315328.0,
            "90": 2681315328.0,
            "91": 2681315328.0,
            "92": 2681315328.0,
            "93": 2681315328.0,
            "94": 2681315328.0,
            "95": 2681315328.0,
            "96": 2681315328.0,
            "97": 2681315328.0,
            "98": 2681315328.0,
            "99": 2681315328.0,
            "100": 2681315328.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 12.214,
            "2": 0.2986,
            "3": 0.17295,
            "4": 0.16821,
            "5": 0.16854,
            "6": 0.16781,
            "7": 0.16849,
            "8": 0.16759,
            "9": 0.16821,
            "10": 0.16905,
            "11": 0.16939,
            "12": 0.16739,
            "13": 0.16719,
            "14": 0.16712,
            "15": 0.16829,
            "16": 0.1725,
            "17": 0.16696,
            "18": 0.16586,
            "19": 0.16737,
            "20": 0.16711,
            "21": 0.16776,
            "22": 0.16801,
            "23": 0.16812,
            "24": 0.16559,
            "25": 0.16732,
            "26": 0.16954,
            "27": 0.16886,
            "28": 0.1669,
            "29": 0.16695,
            "30": 0.16775,
            "31": 0.16795,
            "32": 0.16696,
            "33": 0.16584,
            "34": 0.16695,
            "35": 0.16714,
            "36": 0.16747,
            "37": 0.16686,
            "38": 0.16675,
            "39": 0.16654,
            "40": 0.18817,
            "41": 0.16797,
            "42": 0.16692,
            "43": 0.16746,
            "44": 0.16567,
            "45": 0.1672,
            "46": 0.1681,
            "47": 0.16794,
            "48": 0.17384,
            "49": 0.17344,
            "50": 0.17178,
            "51": 0.17498,
            "52": 0.16896,
            "53": 0.2031,
            "54": 0.16689,
            "55": 0.16738,
            "56": 0.1658,
            "57": 0.16757,
            "58": 0.16947,
            "59": 0.16981,
            "60": 0.16658,
            "61": 0.16728,
            "62": 0.16586,
            "63": 0.16601,
            "64": 0.16674,
            "65": 0.16826,
            "66": 0.16662,
            "67": 0.16681,
            "68": 0.1673,
            "69": 0.16747,
            "70": 0.16723,
            "71": 0.16746,
            "72": 0.16639,
            "73": 0.16738,
            "74": 0.16734,
            "75": 0.16723,
            "76": 0.16734,
            "77": 0.16644,
            "78": 0.16664,
            "79": 0.16693,
            "80": 0.16638,
            "81": 0.16693,
            "82": 0.16667,
            "83": 0.1665,
            "84": 0.16715,
            "85": 0.16683,
            "86": 0.16633,
            "87": 0.16713,
            "88": 0.16671,
            "89": 0.16706,
            "90": 0.16702,
            "91": 0.16739,
            "92": 0.16596,
            "93": 0.1665,
            "94": 0.16701,
            "95": 0.16634,
            "96": 0.16704,
            "97": 0.16737,
            "98": 0.16691,
            "99": 0.16712,
            "100": 0.16653
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_lts_dgxa100_dracooci.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.79205,
            "2": 10.80272,
            "3": 10.80707,
            "4": 10.77315,
            "5": 10.84695,
            "6": 10.86789,
            "7": 10.82655,
            "8": 10.81333,
            "9": 10.83441,
            "10": 10.77106,
            "11": 10.89149,
            "12": 10.84617,
            "13": 10.85969,
            "14": 10.8812,
            "15": 10.79093,
            "16": 10.78328,
            "17": 10.75926,
            "18": 10.79337,
            "19": 10.797,
            "20": 10.68042,
            "21": 10.66126,
            "22": 10.50248,
            "23": 10.71375,
            "24": 10.55253,
            "25": 10.50715,
            "26": 10.58275,
            "27": 10.58672,
            "28": 10.55873,
            "29": 10.56101,
            "30": 10.33325,
            "31": 10.08467,
            "32": 10.44744,
            "33": 10.44372,
            "34": 10.2003,
            "35": 10.25545,
            "36": 10.19448,
            "37": 10.32113,
            "38": 10.1659,
            "39": 10.37726,
            "40": 10.05544,
            "41": 10.13785,
            "42": 10.19159,
            "43": 9.80956,
            "44": 9.92967,
            "45": 9.80575,
            "46": 9.81454,
            "47": 10.12933,
            "48": 9.82644,
            "49": 9.51395,
            "50": 9.89082,
            "51": 9.8397,
            "52": 9.73412,
            "53": 10.05515,
            "54": 9.94093,
            "55": 9.87063,
            "56": 9.61009,
            "57": 9.46055,
            "58": 9.81541,
            "59": 9.57905,
            "60": 9.48478,
            "61": 9.68485,
            "62": 9.97574,
            "63": 9.36483,
            "64": 9.76838,
            "65": 8.94022,
            "66": 9.68864,
            "67": 9.36647,
            "68": 9.77611,
            "69": 9.78404,
            "70": 9.72243,
            "71": 9.6082,
            "72": 9.57758,
            "73": 9.48936,
            "74": 8.9399,
            "75": 9.40907,
            "76": 9.08135,
            "77": 10.05639,
            "78": 9.72293,
            "79": 9.36509,
            "80": 9.3976,
            "81": 9.47445,
            "82": 9.68843,
            "83": 9.30263,
            "84": 9.4102,
            "85": 9.60746,
            "86": 9.07122,
            "87": 9.58742,
            "88": 9.74129,
            "89": 9.59922,
            "90": 9.81041,
            "91": 9.33141,
            "92": 9.35529,
            "93": 9.07461,
            "94": 8.82759,
            "95": 9.5116,
            "96": 9.51899,
            "97": 9.30162,
            "98": 9.66741,
            "99": 8.88218,
            "100": 9.39722
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1580.0,
            "2": 1686.0,
            "3": 1726.0,
            "4": 1795.0,
            "5": 1901.0,
            "6": 1778.0,
            "7": 1963.0,
            "8": 1704.0,
            "9": 1811.0,
            "10": 1346.0,
            "11": 1849.0,
            "12": 1683.0,
            "13": 1888.0,
            "14": 1711.0,
            "15": 1926.0,
            "16": 1841.0,
            "17": 1931.0,
            "18": 1716.0,
            "19": 1765.0,
            "20": 1643.0,
            "21": 1884.0,
            "22": 1626.0,
            "23": 1954.0,
            "24": 1715.0,
            "25": 1683.0,
            "26": 1679.0,
            "27": 1817.0,
            "28": 2019.0,
            "29": 1946.0,
            "30": 1867.0,
            "31": 1544.0,
            "32": 1832.0,
            "33": 2119.0,
            "34": 1921.0,
            "35": 2020.0,
            "36": 1953.0,
            "37": 2350.0,
            "38": 2210.0,
            "39": 2319.0,
            "40": 2252.0,
            "41": 2449.0,
            "42": 2364.0,
            "43": 2089.0,
            "44": 2094.0,
            "45": 2243.0,
            "46": 2335.0,
            "47": 2406.0,
            "48": 2410.0,
            "49": 2341.0,
            "50": 2459.0,
            "51": 2611.0,
            "52": 2427.0,
            "53": 2838.0,
            "54": 2632.0,
            "55": 2291.0,
            "56": 2663.0,
            "57": 2276.0,
            "58": 2777.0,
            "59": 2601.0,
            "60": 2404.0,
            "61": 2985.0,
            "62": 2595.0,
            "63": 2454.0,
            "64": 3101.0,
            "65": 2474.0,
            "66": 3006.0,
            "67": 2671.0,
            "68": 2874.0,
            "69": 2956.0,
            "70": 3102.0,
            "71": 2891.0,
            "72": 2543.0,
            "73": 2860.0,
            "74": 1888.0,
            "75": 2603.0,
            "76": 2813.0,
            "77": 3361.0,
            "78": 3252.0,
            "79": 3007.0,
            "80": 3420.0,
            "81": 3624.0,
            "82": 3184.0,
            "83": 2708.0,
            "84": 3138.0,
            "85": 3388.0,
            "86": 2619.0,
            "87": 3682.0,
            "88": 3074.0,
            "89": 3260.0,
            "90": 2904.0,
            "91": 2634.0,
            "92": 3097.0,
            "93": 2745.0,
            "94": 3484.0,
            "95": 3333.0,
            "96": 3292.0,
            "97": 3141.0,
            "98": 3550.0,
            "99": 3170.0,
            "100": 3347.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 714736640.0,
            "2": 714736640.0,
            "3": 714736640.0,
            "4": 714736640.0,
            "5": 714736640.0,
            "6": 714736640.0,
            "7": 714736640.0,
            "8": 714736640.0,
            "9": 714736640.0,
            "10": 714736640.0,
            "11": 714736640.0,
            "12": 714736640.0,
            "13": 714736640.0,
            "14": 714736640.0,
            "15": 714736640.0,
            "16": 714736640.0,
            "17": 714736640.0,
            "18": 714736640.0,
            "19": 714736640.0,
            "20": 714736640.0,
            "21": 714736640.0,
            "22": 714736640.0,
            "23": 714736640.0,
            "24": 714736640.0,
            "25": 714736640.0,
            "26": 714736640.0,
            "27": 714736640.0,
            "28": 714736640.0,
            "29": 714736640.0,
            "30": 714736640.0,
            "31": 714736640.0,
            "32": 714736640.0,
            "33": 714736640.0,
            "34": 714736640.0,
            "35": 714736640.0,
            "36": 714736640.0,
            "37": 714736640.0,
            "38": 714736640.0,
            "39": 714736640.0,
            "40": 714736640.0,
            "41": 714736640.0,
            "42": 714736640.0,
            "43": 714736640.0,
            "44": 714736640.0,
            "45": 714736640.0,
            "46": 714736640.0,
            "47": 714736640.0,
            "48": 714736640.0,
            "49": 714736640.0,
            "50": 714736640.0,
            "51": 714736640.0,
            "52": 714736640.0,
            "53": 714736640.0,
            "54": 714736640.0,
            "55": 714736640.0,
            "56": 714736640.0,
            "57": 714736640.0,
            "58": 714736640.0,
            "59": 714736640.0,
            "60": 714736640.0,
            "61": 714736640.0,
            "62": 714736640.0,
            "63": 714736640.0,
            "64": 714736640.0,
            "65": 714736640.0,
            "66": 714736640.0,
            "67": 714736640.0,
            "68": 714736640.0,
            "69": 714736640.0,
            "70": 714736640.0,
            "71": 714736640.0,
            "72": 714736640.0,
            "73": 714736640.0,
            "74": 714736640.0,
            "75": 714736640.0,
            "76": 714736640.0,
            "77": 714736640.0,
            "78": 714736640.0,
            "79": 714736640.0,
            "80": 714736640.0,
            "81": 714736640.0,
            "82": 714736640.0,
            "83": 714736640.0,
            "84": 714736640.0,
            "85": 714736640.0,
            "86": 714736640.0,
            "87": 714736640.0,
            "88": 714736640.0,
            "89": 714736640.0,
            "90": 714736640.0,
            "91": 714736640.0,
            "92": 714736640.0,
            "93": 714736640.0,
            "94": 714736640.0,
            "95": 714736640.0,
            "96": 714736640.0,
            "97": 714736640.0,
            "98": 714736640.0,
            "99": 714736640.0,
            "100": 714736640.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2399714304.0,
            "2": 2681315328.0,
            "3": 2681315328.0,
            "4": 2681315328.0,
            "5": 2681315328.0,
            "6": 2681315328.0,
            "7": 2681315328.0,
            "8": 2681315328.0,
            "9": 2681315328.0,
            "10": 2681315328.0,
            "11": 2681315328.0,
            "12": 2681315328.0,
            "13": 2681315328.0,
            "14": 2681315328.0,
            "15": 2681315328.0,
            "16": 2681315328.0,
            "17": 2681315328.0,
            "18": 2681315328.0,
            "19": 2681315328.0,
            "20": 2681315328.0,
            "21": 2681315328.0,
            "22": 2681315328.0,
            "23": 2681315328.0,
            "24": 2681315328.0,
            "25": 2681315328.0,
            "26": 2681315328.0,
            "27": 2681315328.0,
            "28": 2681315328.0,
            "29": 2681315328.0,
            "30": 2681315328.0,
            "31": 2681315328.0,
            "32": 2681315328.0,
            "33": 2681315328.0,
            "34": 2681315328.0,
            "35": 2681315328.0,
            "36": 2681315328.0,
            "37": 2681315328.0,
            "38": 2681315328.0,
            "39": 2681315328.0,
            "40": 2681315328.0,
            "41": 2681315328.0,
            "42": 2681315328.0,
            "43": 2681315328.0,
            "44": 2681315328.0,
            "45": 2681315328.0,
            "46": 2681315328.0,
            "47": 2681315328.0,
            "48": 2681315328.0,
            "49": 2681315328.0,
            "50": 2681315328.0,
            "51": 2681315328.0,
            "52": 2681315328.0,
            "53": 2681315328.0,
            "54": 2681315328.0,
            "55": 2681315328.0,
            "56": 2681315328.0,
            "57": 2681315328.0,
            "58": 2681315328.0,
            "59": 2681315328.0,
            "60": 2681315328.0,
            "61": 2681315328.0,
            "62": 2681315328.0,
            "63": 2681315328.0,
            "64": 2681315328.0,
            "65": 2681315328.0,
            "66": 2681315328.0,
            "67": 2681315328.0,
            "68": 2681315328.0,
            "69": 2681315328.0,
            "70": 2681315328.0,
            "71": 2681315328.0,
            "72": 2681315328.0,
            "73": 2681315328.0,
            "74": 2681315328.0,
            "75": 2681315328.0,
            "76": 2681315328.0,
            "77": 2681315328.0,
            "78": 2681315328.0,
            "79": 2681315328.0,
            "80": 2681315328.0,
            "81": 2681315328.0,
            "82": 2681315328.0,
            "83": 2681315328.0,
            "84": 2681315328.0,
            "85": 2681315328.0,
            "86": 2681315328.0,
            "87": 2681315328.0,
            "88": 2681315328.0,
            "89": 2681315328.0,
            "90": 2681315328.0,
            "91": 2681315328.0,
            "92": 2681315328.0,
            "93": 2681315328.0,
            "94": 2681315328.0,
            "95": 2681315328.0,
            "96": 2681315328.0,
            "97": 2681315328.0,
            "98": 2681315328.0,
            "99": 2681315328.0,
            "100": 2681315328.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 12.16871,
            "2": 0.19825,
            "3": 0.17764,
            "4": 0.17796,
            "5": 0.17192,
            "6": 0.17224,
            "7": 0.17188,
            "8": 0.17172,
            "9": 0.17327,
            "10": 0.17337,
            "11": 0.17262,
            "12": 0.17206,
            "13": 0.17211,
            "14": 0.17318,
            "15": 0.17218,
            "16": 0.17375,
            "17": 0.17267,
            "18": 0.1736,
            "19": 0.17211,
            "20": 0.16903,
            "21": 0.16941,
            "22": 0.17049,
            "23": 0.17119,
            "24": 0.173,
            "25": 0.16874,
            "26": 0.16822,
            "27": 0.16694,
            "28": 0.16671,
            "29": 0.16762,
            "30": 0.16932,
            "31": 0.17431,
            "32": 0.16784,
            "33": 0.16633,
            "34": 0.16587,
            "35": 0.16729,
            "36": 0.16658,
            "37": 0.16788,
            "38": 0.1666,
            "39": 0.16597,
            "40": 0.16589,
            "41": 0.16706,
            "42": 0.16633,
            "43": 0.16631,
            "44": 0.16797,
            "45": 0.16699,
            "46": 0.16824,
            "47": 0.167,
            "48": 0.16653,
            "49": 0.16587,
            "50": 0.16635,
            "51": 0.18233,
            "52": 0.21141,
            "53": 0.16986,
            "54": 0.1702,
            "55": 0.16952,
            "56": 0.16978,
            "57": 0.16872,
            "58": 0.16891,
            "59": 0.17005,
            "60": 0.16948,
            "61": 0.16922,
            "62": 0.16913,
            "63": 0.1694,
            "64": 0.16954,
            "65": 0.16972,
            "66": 0.16677,
            "67": 0.16621,
            "68": 0.16658,
            "69": 0.16617,
            "70": 0.1656,
            "71": 0.16718,
            "72": 0.16666,
            "73": 0.16987,
            "74": 0.17045,
            "75": 0.16726,
            "76": 0.1671,
            "77": 0.16753,
            "78": 0.17072,
            "79": 0.16826,
            "80": 0.16784,
            "81": 0.16717,
            "82": 0.16591,
            "83": 0.16729,
            "84": 0.16631,
            "85": 0.16697,
            "86": 0.1677,
            "87": 0.16577,
            "88": 0.1676,
            "89": 0.16708,
            "90": 0.16577,
            "91": 0.16637,
            "92": 0.16659,
            "93": 0.16604,
            "94": 0.16681,
            "95": 0.16705,
            "96": 0.16588,
            "97": 0.16674,
            "98": 0.16703,
            "99": 0.16605,
            "100": 0.16691
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --disable-bias-linear: true
  --async-save: true
  --use-persistent-ckpt-worker: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-optim-fully-reshardable: true
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
TEST_TYPE: ckpt-resume


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.79331, "5": 10.84659, "10": 10.76845, "15": 10.7892, "20": 10.67878, "25": 10.50376, "30": 10.32979, "35": 10.25153, "40": 10.05257, "45": 9.80273, "50": 9.88817, "55": 9.86852, "60": 9.48258, "65": 8.93676, "70": 9.72037, "75": 9.40741, "80": 9.39621, "85": 9.60695, "90": 9.80953, "95": 9.51064, "100": 9.39623}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1654.0, "5": 1853.0, "10": 1393.0, "15": 1945.0, "20": 1614.0, "25": 1726.0, "30": 1908.0, "35": 1963.0, "40": 2284.0, "45": 2208.0, "50": 2437.0, "55": 2308.0, "60": 2312.0, "65": 2608.0, "70": 3127.0, "75": 2674.0, "80": 3221.0, "85": 3381.0, "90": 3100.0, "95": 3342.0, "100": 3240.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 719180288.0, "5": 719180288.0, "10": 719180288.0, "15": 719180288.0, "20": 719180288.0, "25": 719180288.0, "30": 719180288.0, "35": 719180288.0, "40": 719180288.0, "45": 719180288.0, "50": 719180288.0, "55": 719180288.0, "60": 719180288.0, "65": 719180288.0, "70": 719180288.0, "75": 719180288.0, "80": 719180288.0, "85": 719180288.0, "90": 719180288.0, "95": 719180288.0, "100": 719180288.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1910562816.0, "5": 2195420160.0, "10": 2195420160.0, "15": 2195420160.0, "20": 2195420160.0, "25": 2195420160.0, "30": 2195420160.0, "35": 2195420160.0, "40": 2195420160.0, "45": 2195420160.0, "50": 2195420160.0, "55": 2195420160.0, "60": 2195420160.0, "65": 2195420160.0, "70": 2195420160.0, "75": 2195420160.0, "80": 2195420160.0, "85": 2195420160.0, "90": 2195420160.0, "95": 2195420160.0, "100": 2195420160.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 5.88516, "5": 0.15891, "10": 0.15747, "15": 0.15701, "20": 0.1572, "25": 0.15726, "30": 0.15548, "35": 0.15626, "40": 0.15494, "45": 0.15525, "50": 0.15556, "55": 0.15847, "60": 0.15606, "65": 0.15643, "70": 0.15566, "75": 0.15539, "80": 0.15573, "85": 0.15618, "90": 0.15594, "95": 0.15563, "100": 0.15458}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.79208, "5": 10.8459, "10": 10.76945, "15": 10.78965, "20": 10.67868, "25": 10.50409, "30": 10.33063, "35": 10.25254, "40": 10.05221, "45": 9.80242, "50": 9.88789, "55": 9.86802, "60": 9.48251, "65": 8.93794, "70": 9.72092, "75": 9.40787, "80": 9.3965, "85": 9.6064, "90": 9.80962, "95": 9.51076, "100": 9.39625}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1633.0, "5": 1952.0, "10": 1432.0, "15": 1852.0, "20": 1592.0, "25": 1743.0, "30": 1896.0, "35": 1976.0, "40": 2182.0, "45": 2119.0, "50": 2511.0, "55": 2268.0, "60": 2425.0, "65": 2527.0, "70": 3208.0, "75": 2653.0, "80": 3362.0, "85": 3398.0, "90": 3062.0, "95": 3386.0, "100": 3279.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 714985472.0, "5": 714985472.0, "10": 714985472.0, "15": 714985472.0, "20": 714985472.0, "25": 714985472.0, "30": 714985472.0, "35": 714985472.0, "40": 714985472.0, "45": 714985472.0, "50": 714985472.0, "55": 714985472.0, "60": 714985472.0, "65": 714985472.0, "70": 714985472.0, "75": 714985472.0, "80": 714985472.0, "85": 714985472.0, "90": 714985472.0, "95": 714985472.0, "100": 714985472.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2399852544.0, "5": 2681564160.0, "10": 2681564160.0, "15": 2681564160.0, "20": 2681564160.0, "25": 2681564160.0, "30": 2681564160.0, "35": 2681564160.0, "40": 2681564160.0, "45": 2681564160.0, "50": 2681564160.0, "55": 2681564160.0, "60": 2681564160.0, "65": 2681564160.0, "70": 2681564160.0, "75": 2681564160.0, "80": 2681564160.0, "85": 2681564160.0, "90": 2681564160.0, "95": 2681564160.0, "100": 2681564160.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 16.57309, "5": 0.17809, "10": 0.17938, "15": 0.17972, "20": 0.17842, "25": 0.17835, "30": 0.17768, "35": 0.17896, "40": 0.17635, "45": 0.17582, "50": 0.1757, "55": 0.1761, "60": 0.17646, "65": 0.17665, "70": 0.1751, "75": 0.17524, "80": 0.17592, "85": 0.17681, "90": 0.18038, "95": 0.17575, "100": 0.17497}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_sequence_parallel/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --sequence-parallel: true
  --ckpt-fully-parallel-load: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: ckpt-resume


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.73373, "5": 10.79216, "10": 10.70711, "15": 10.75767, "20": 10.68744, "25": 10.54813, "30": 10.45558, "35": 10.38415, "40": 10.24136, "45": 9.98124, "50": 10.06498, "55": 9.98958, "60": 9.65891, "65": 9.07213, "70": 9.82086, "75": 9.54885, "80": 9.50995, "85": 9.70743, "90": 9.87787, "95": 9.60043, "100": 9.49201}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2487.0, "5": 2754.0, "10": 2448.0, "15": 2661.0, "20": 2648.0, "25": 2503.0, "30": 2629.0, "35": 2566.0, "40": 2494.0, "45": 2352.0, "50": 2526.0, "55": 2460.0, "60": 2452.0, "65": 2642.0, "70": 3156.0, "75": 2615.0, "80": 3075.0, "85": 3155.0, "90": 3402.0, "95": 3151.0, "100": 2794.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 717288960.0, "5": 717288960.0, "10": 717288960.0, "15": 717288960.0, "20": 717288960.0, "25": 717288960.0, "30": 717288960.0, "35": 717288960.0, "40": 717288960.0, "45": 717288960.0, "50": 717288960.0, "55": 717288960.0, "60": 717288960.0, "65": 717288960.0, "70": 717288960.0, "75": 717288960.0, "80": 717288960.0, "85": 717288960.0, "90": 717288960.0, "95": 717288960.0, "100": 717288960.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1910556672.0, "5": 2194839040.0, "10": 2194839040.0, "15": 2194839040.0, "20": 2194839040.0, "25": 2194839040.0, "30": 2194839040.0, "35": 2194839040.0, "40": 2194839040.0, "45": 2194839040.0, "50": 2194839040.0, "55": 2194839040.0, "60": 2194839040.0, "65": 2194839040.0, "70": 2194839040.0, "75": 2194839040.0, "80": 2194839040.0, "85": 2194839040.0, "90": 2194839040.0, "95": 2194839040.0, "100": 2194839040.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 8.49216, "5": 0.19489, "10": 0.19553, "15": 0.19491, "20": 0.19575, "25": 0.19564, "30": 0.19488, "35": 0.19479, "40": 0.19455, "45": 0.19499, "50": 0.19483, "55": 0.19654, "60": 0.19525, "65": 0.19501, "70": 0.19768, "75": 0.19567, "80": 0.1973, "85": 0.19618, "90": 0.19538, "95": 0.19713, "100": 0.19738}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.74992,
            "2": 10.77613,
            "3": 10.75715,
            "4": 10.72305,
            "5": 10.80038,
            "6": 10.82103,
            "7": 10.77176,
            "8": 10.79877,
            "9": 10.77445,
            "10": 10.70642,
            "11": 10.83286,
            "12": 10.81872,
            "13": 10.83075,
            "14": 10.8338,
            "15": 10.76397,
            "16": 10.76575,
            "17": 10.71928,
            "18": 10.768,
            "19": 10.75318,
            "20": 10.70918,
            "21": 10.69214,
            "22": 10.56531,
            "23": 10.7091,
            "24": 10.61591,
            "25": 10.55061,
            "26": 10.6259,
            "27": 10.64706,
            "28": 10.6362,
            "29": 10.65644,
            "30": 10.43679,
            "31": 10.21909,
            "32": 10.55114,
            "33": 10.5338,
            "34": 10.31817,
            "35": 10.36833,
            "36": 10.35618,
            "37": 10.46296,
            "38": 10.33836,
            "39": 10.50307,
            "40": 10.23444,
            "41": 10.2734,
            "42": 10.32945,
            "43": 9.97415,
            "44": 10.10754,
            "45": 9.9885,
            "46": 9.95475,
            "47": 10.25141,
            "48": 10.01227,
            "49": 9.70793,
            "50": 10.05501,
            "51": 9.9812,
            "52": 9.89199,
            "53": 10.19201,
            "54": 10.09574,
            "55": 10.00502,
            "56": 9.78719,
            "57": 9.6461,
            "58": 9.98626,
            "59": 9.72683,
            "60": 9.67172,
            "61": 9.80986,
            "62": 10.11126,
            "63": 9.54873,
            "64": 9.90931,
            "65": 9.08736,
            "66": 9.84658,
            "67": 9.48259,
            "68": 9.89433,
            "69": 9.87692,
            "70": 9.82465,
            "71": 9.72749,
            "72": 9.7291,
            "73": 9.62049,
            "74": 9.11605,
            "75": 9.55059,
            "76": 9.21504,
            "77": 10.14893,
            "78": 9.8138,
            "79": 9.4751,
            "80": 9.51583,
            "81": 9.58687,
            "82": 9.79025,
            "83": 9.45586,
            "84": 9.50503,
            "85": 9.71387,
            "86": 9.17462,
            "87": 9.666,
            "88": 9.84355,
            "89": 9.70736,
            "90": 9.89548,
            "91": 9.48655,
            "92": 9.47022,
            "93": 9.2148,
            "94": 8.94328,
            "95": 9.61538,
            "96": 9.63633,
            "97": 9.37646,
            "98": 9.74974,
            "99": 9.01759,
            "100": 9.50514
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2677.0,
            "2": 2700.0,
            "3": 2731.0,
            "4": 2503.0,
            "5": 2843.0,
            "6": 2937.0,
            "7": 2619.0,
            "8": 2649.0,
            "9": 2579.0,
            "10": 2466.0,
            "11": 2864.0,
            "12": 2732.0,
            "13": 2935.0,
            "14": 2829.0,
            "15": 2919.0,
            "16": 2924.0,
            "17": 2683.0,
            "18": 2796.0,
            "19": 2828.0,
            "20": 2631.0,
            "21": 2797.0,
            "22": 2631.0,
            "23": 2797.0,
            "24": 2668.0,
            "25": 2526.0,
            "26": 2856.0,
            "27": 2658.0,
            "28": 2939.0,
            "29": 3084.0,
            "30": 2744.0,
            "31": 2420.0,
            "32": 2634.0,
            "33": 2750.0,
            "34": 2458.0,
            "35": 2614.0,
            "36": 2570.0,
            "37": 2879.0,
            "38": 2662.0,
            "39": 2815.0,
            "40": 2558.0,
            "41": 2587.0,
            "42": 2691.0,
            "43": 2442.0,
            "44": 2537.0,
            "45": 2368.0,
            "46": 2456.0,
            "47": 2525.0,
            "48": 2378.0,
            "49": 2264.0,
            "50": 2670.0,
            "51": 2668.0,
            "52": 2560.0,
            "53": 2793.0,
            "54": 2927.0,
            "55": 2495.0,
            "56": 2665.0,
            "57": 2574.0,
            "58": 2851.0,
            "59": 2766.0,
            "60": 2219.0,
            "61": 2640.0,
            "62": 2855.0,
            "63": 2733.0,
            "64": 3001.0,
            "65": 2651.0,
            "66": 2794.0,
            "67": 2786.0,
            "68": 2802.0,
            "69": 2823.0,
            "70": 2942.0,
            "71": 2946.0,
            "72": 2538.0,
            "73": 2930.0,
            "74": 2132.0,
            "75": 2613.0,
            "76": 2961.0,
            "77": 2992.0,
            "78": 3034.0,
            "79": 3106.0,
            "80": 3002.0,
            "81": 3244.0,
            "82": 3292.0,
            "83": 2665.0,
            "84": 3380.0,
            "85": 3218.0,
            "86": 2747.0,
            "87": 3363.0,
            "88": 3272.0,
            "89": 3369.0,
            "90": 3343.0,
            "91": 2487.0,
            "92": 2967.0,
            "93": 2844.0,
            "94": 2936.0,
            "95": 3080.0,
            "96": 3070.0,
            "97": 3045.0,
            "98": 3285.0,
            "99": 2834.0,
            "100": 2448.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 745077248.0,
            "2": 745077248.0,
            "3": 745077248.0,
            "4": 745077248.0,
            "5": 745077248.0,
            "6": 745077248.0,
            "7": 745077248.0,
            "8": 745077248.0,
            "9": 745077248.0,
            "10": 745077248.0,
            "11": 745077248.0,
            "12": 745077248.0,
            "13": 745077248.0,
            "14": 745077248.0,
            "15": 745077248.0,
            "16": 745077248.0,
            "17": 745077248.0,
            "18": 745077248.0,
            "19": 745077248.0,
            "20": 745077248.0,
            "21": 745077248.0,
            "22": 745077248.0,
            "23": 745077248.0,
            "24": 745077248.0,
            "25": 745077248.0,
            "26": 745077248.0,
            "27": 745077248.0,
            "28": 745077248.0,
            "29": 745077248.0,
            "30": 745077248.0,
            "31": 745077248.0,
            "32": 745077248.0,
            "33": 745077248.0,
            "34": 745077248.0,
            "35": 745077248.0,
            "36": 745077248.0,
            "37": 745077248.0,
            "38": 745077248.0,
            "39": 745077248.0,
            "40": 745077248.0,
            "41": 745077248.0,
            "42": 745077248.0,
            "43": 745077248.0,
            "44": 745077248.0,
            "45": 745077248.0,
            "46": 745077248.0,
            "47": 745077248.0,
            "48": 745077248.0,
            "49": 745077248.0,
            "50": 745077248.0,
            "51": 745077248.0,
            "52": 745077248.0,
            "53": 745077248.0,
            "54": 745077248.0,
            "55": 745077248.0,
            "56": 745077248.0,
            "57": 745077248.0,
            "58": 745077248.0,
            "59": 745077248.0,
            "60": 745077248.0,
            "61": 745077248.0,
            "62": 745077248.0,
            "63": 745077248.0,
            "64": 745077248.0,
            "65": 745077248.0,
            "66": 745077248.0,
            "67": 745077248.0,
            "68": 745077248.0,
            "69": 745077248.0,
            "70": 745077248.0,
            "71": 745077248.0,
            "72": 745077248.0,
            "73": 745077248.0,
            "74": 745077248.0,
            "75": 745077248.0,
            "76": 745077248.0,
            "77": 745077248.0,
            "78": 745077248.0,
            "79": 745077248.0,
            "80": 745077248.0,
            "81": 745077248.0,
            "82": 745077248.0,
            "83": 745077248.0,
            "84": 745077248.0,
            "85": 745077248.0,
            "86": 745077248.0,
            "87": 745077248.0,
            "88": 745077248.0,
            "89": 745077248.0,
            "90": 745077248.0,
            "91": 745077248.0,
            "92": 745077248.0,
            "93": 745077248.0,
            "94": 745077248.0,
            "95": 745077248.0,
            "96": 745077248.0,
            "97": 745077248.0,
            "98": 745077248.0,
            "99": 745077248.0,
            "100": 745077248.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1939393536.0,
            "2": 2220398592.0,
            "3": 2220398592.0,
            "4": 2220398592.0,
            "5": 2220398592.0,
            "6": 2220398592.0,
            "7": 2220398592.0,
            "8": 2220398592.0,
            "9": 2220398592.0,
            "10": 2220398592.0,
            "11": 2220398592.0,
            "12": 2220398592.0,
            "13": 2220398592.0,
            "14": 2220398592.0,
            "15": 2220398592.0,
            "16": 2220398592.0,
            "17": 2220398592.0,
            "18": 2220398592.0,
            "19": 2220398592.0,
            "20": 2220398592.0,
            "21": 2220398592.0,
            "22": 2220398592.0,
            "23": 2220398592.0,
            "24": 2220398592.0,
            "25": 2220398592.0,
            "26": 2220398592.0,
            "27": 2220398592.0,
            "28": 2220398592.0,
            "29": 2220398592.0,
            "30": 2220398592.0,
            "31": 2220398592.0,
            "32": 2220398592.0,
            "33": 2220398592.0,
            "34": 2220398592.0,
            "35": 2220398592.0,
            "36": 2220398592.0,
            "37": 2220398592.0,
            "38": 2220398592.0,
            "39": 2220398592.0,
            "40": 2220398592.0,
            "41": 2220398592.0,
            "42": 2220398592.0,
            "43": 2220398592.0,
            "44": 2220398592.0,
            "45": 2220398592.0,
            "46": 2220398592.0,
            "47": 2220398592.0,
            "48": 2220398592.0,
            "49": 2220398592.0,
            "50": 2220398592.0,
            "51": 2220398592.0,
            "52": 2220398592.0,
            "53": 2220398592.0,
            "54": 2220398592.0,
            "55": 2220398592.0,
            "56": 2220398592.0,
            "57": 2220398592.0,
            "58": 2220398592.0,
            "59": 2220398592.0,
            "60": 2220398592.0,
            "61": 2220398592.0,
            "62": 2220398592.0,
            "63": 2220398592.0,
            "64": 2220398592.0,
            "65": 2220398592.0,
            "66": 2220398592.0,
            "67": 2220398592.0,
            "68": 2220398592.0,
            "69": 2220398592.0,
            "70": 2220398592.0,
            "71": 2220398592.0,
            "72": 2220398592.0,
            "73": 2220398592.0,
            "74": 2220398592.0,
            "75": 2220398592.0,
            "76": 2220398592.0,
            "77": 2220398592.0,
            "78": 2220398592.0,
            "79": 2220398592.0,
            "80": 2220398592.0,
            "81": 2220398592.0,
            "82": 2220398592.0,
            "83": 2220398592.0,
            "84": 2220398592.0,
            "85": 2220398592.0,
            "86": 2220398592.0,
            "87": 2220398592.0,
            "88": 2220398592.0,
            "89": 2220398592.0,
            "90": 2220398592.0,
            "91": 2220398592.0,
            "92": 2220398592.0,
            "93": 2220398592.0,
            "94": 2220398592.0,
            "95": 2220398592.0,
            "96": 2220398592.0,
            "97": 2220398592.0,
            "98": 2220398592.0,
            "99": 2220398592.0,
            "100": 2220398592.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 6.8425,
            "3": 0.15357,
            "4": 0.1409,
            "5": 0.13808,
            "6": 0.1382,
            "7": 0.24407,
            "8": 0.13904,
            "9": 0.13868,
            "10": 0.13899,
            "11": 0.13745,
            "12": 0.13793,
            "13": 0.13808,
            "14": 0.1368,
            "15": 0.13736,
            "16": 0.13801,
            "17": 0.13947,
            "18": 0.13945,
            "19": 0.13791,
            "20": 0.13947,
            "21": 0.13849,
            "22": 0.13877,
            "23": 0.13852,
            "24": 0.13794,
            "25": 0.13904,
            "26": 0.14025,
            "27": 0.13916,
            "28": 0.13997,
            "29": 0.1407,
            "30": 0.13911,
            "31": 0.13955,
            "32": 0.1446,
            "33": 0.24847,
            "34": 0.14784,
            "35": 0.14131,
            "36": 0.13933,
            "37": 0.13988,
            "38": 0.19634,
            "39": 0.14058,
            "40": 0.14008,
            "41": 0.14147,
            "42": 0.14265,
            "43": 0.1426,
            "44": 0.14006,
            "45": 0.14114,
            "46": 0.14113,
            "47": 0.1398,
            "48": 0.14109,
            "49": 0.14027,
            "50": 0.13929,
            "51": 0.16842,
            "52": 0.14006,
            "53": 0.13988,
            "54": 0.13768,
            "55": 0.13634,
            "56": 0.13659,
            "57": 0.13814,
            "58": 0.13574,
            "59": 0.13686,
            "60": 0.1366,
            "61": 0.13869,
            "62": 0.13965,
            "63": 0.13601,
            "64": 0.13824,
            "65": 0.13849,
            "66": 0.13984,
            "67": 0.13968,
            "68": 0.13808,
            "69": 0.13877,
            "70": 0.14016,
            "71": 0.13855,
            "72": 0.13617,
            "73": 0.13908,
            "74": 0.13686,
            "75": 0.13824,
            "76": 0.13807,
            "77": 0.13594,
            "78": 0.13948,
            "79": 0.13994,
            "80": 0.13781,
            "81": 0.13907,
            "82": 0.14148,
            "83": 0.13897,
            "84": 0.13796,
            "85": 0.13672,
            "86": 0.13869,
            "87": 0.13816,
            "88": 0.13546,
            "89": 0.13874,
            "90": 0.13865,
            "91": 0.21702,
            "92": 0.13637,
            "93": 0.13856,
            "94": 0.14121,
            "95": 0.1418,
            "96": 0.16074,
            "97": 0.14631,
            "98": 0.13758,
            "99": 0.13713,
            "100": 0.13749
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_gb200_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.9812,
            "52": 9.89198,
            "53": 10.19208,
            "54": 10.09574,
            "55": 10.00506,
            "56": 9.78714,
            "57": 9.64607,
            "58": 9.9862,
            "59": 9.72684,
            "60": 9.67172,
            "61": 9.80984,
            "62": 10.11126,
            "63": 9.54877,
            "64": 9.90929,
            "65": 9.08735,
            "66": 9.84659,
            "67": 9.48264,
            "68": 9.89439,
            "69": 9.87695,
            "70": 9.82469,
            "71": 9.72751,
            "72": 9.72911,
            "73": 9.62051,
            "74": 9.11601,
            "75": 9.55057,
            "76": 9.21504,
            "77": 10.14893,
            "78": 9.8138,
            "79": 9.47515,
            "80": 9.51582,
            "81": 9.58685,
            "82": 9.79026,
            "83": 9.45587,
            "84": 9.50503,
            "85": 9.71387,
            "86": 9.17463,
            "87": 9.66601,
            "88": 9.84354,
            "89": 9.70734,
            "90": 9.8955,
            "91": 9.48652,
            "92": 9.47023,
            "93": 9.21481,
            "94": 8.94327,
            "95": 9.6154,
            "96": 9.63634,
            "97": 9.37644,
            "98": 9.74975,
            "99": 9.01753,
            "100": 9.50515
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2676.0,
            "52": 2581.0,
            "53": 2898.0,
            "54": 2849.0,
            "55": 2548.0,
            "56": 2661.0,
            "57": 2510.0,
            "58": 2758.0,
            "59": 2650.0,
            "60": 2242.0,
            "61": 2628.0,
            "62": 2899.0,
            "63": 2605.0,
            "64": 2939.0,
            "65": 2572.0,
            "66": 2896.0,
            "67": 2640.0,
            "68": 2709.0,
            "69": 2889.0,
            "70": 3012.0,
            "71": 2978.0,
            "72": 2536.0,
            "73": 2964.0,
            "74": 2163.0,
            "75": 2603.0,
            "76": 2974.0,
            "77": 3007.0,
            "78": 3138.0,
            "79": 3197.0,
            "80": 2984.0,
            "81": 3280.0,
            "82": 3341.0,
            "83": 2757.0,
            "84": 3399.0,
            "85": 3320.0,
            "86": 2882.0,
            "87": 3407.0,
            "88": 3278.0,
            "89": 3336.0,
            "90": 3322.0,
            "91": 2472.0,
            "92": 3061.0,
            "93": 2911.0,
            "94": 3005.0,
            "95": 2984.0,
            "96": 2991.0,
            "97": 3178.0,
            "98": 3343.0,
            "99": 2929.0,
            "100": 2588.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 744880640.0,
            "52": 744880640.0,
            "53": 744880640.0,
            "54": 744880640.0,
            "55": 744880640.0,
            "56": 744880640.0,
            "57": 744880640.0,
            "58": 744880640.0,
            "59": 744880640.0,
            "60": 744880640.0,
            "61": 744880640.0,
            "62": 744880640.0,
            "63": 744880640.0,
            "64": 744880640.0,
            "65": 744880640.0,
            "66": 744880640.0,
            "67": 744880640.0,
            "68": 744880640.0,
            "69": 744880640.0,
            "70": 744880640.0,
            "71": 744880640.0,
            "72": 744880640.0,
            "73": 744880640.0,
            "74": 744880640.0,
            "75": 744880640.0,
            "76": 744880640.0,
            "77": 744880640.0,
            "78": 744880640.0,
            "79": 744880640.0,
            "80": 744880640.0,
            "81": 744880640.0,
            "82": 744880640.0,
            "83": 744880640.0,
            "84": 744880640.0,
            "85": 744880640.0,
            "86": 744880640.0,
            "87": 744880640.0,
            "88": 744880640.0,
            "89": 744880640.0,
            "90": 744880640.0,
            "91": 744880640.0,
            "92": 744880640.0,
            "93": 744880640.0,
            "94": 744880640.0,
            "95": 744880640.0,
            "96": 744880640.0,
            "97": 744880640.0,
            "98": 744880640.0,
            "99": 744880640.0,
            "100": 744880640.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2222430208.0,
            "52": 2222431232.0,
            "53": 2222431232.0,
            "54": 2222431232.0,
            "55": 2222431232.0,
            "56": 2222431232.0,
            "57": 2222431232.0,
            "58": 2222431232.0,
            "59": 2222431232.0,
            "60": 2222431232.0,
            "61": 2222431232.0,
            "62": 2222431232.0,
            "63": 2222431232.0,
            "64": 2222431232.0,
            "65": 2222431232.0,
            "66": 2222431232.0,
            "67": 2222431232.0,
            "68": 2222431232.0,
            "69": 2222431232.0,
            "70": 2222431232.0,
            "71": 2222431232.0,
            "72": 2222431232.0,
            "73": 2222431232.0,
            "74": 2222431232.0,
            "75": 2222431232.0,
            "76": 2222431232.0,
            "77": 2222431232.0,
            "78": 2222431232.0,
            "79": 2222431232.0,
            "80": 2222431232.0,
            "81": 2222431232.0,
            "82": 2222431232.0,
            "83": 2222431232.0,
            "84": 2222431232.0,
            "85": 2222431232.0,
            "86": 2222431232.0,
            "87": 2222431232.0,
            "88": 2222431232.0,
            "89": 2222431232.0,
            "90": 2222431232.0,
            "91": 2222431232.0,
            "92": 2222431232.0,
            "93": 2222431232.0,
            "94": 2222431232.0,
            "95": 2222431232.0,
            "96": 2222431232.0,
            "97": 2222431232.0,
            "98": 2222431232.0,
            "99": 2222431232.0,
            "100": 2222431232.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": "nan",
            "52": 2.21538,
            "53": 0.14615,
            "54": 0.13599,
            "55": 0.13518,
            "56": 0.13401,
            "57": 0.13944,
            "58": 0.13509,
            "59": 0.1377,
            "60": 0.13698,
            "61": 0.137,
            "62": 0.13756,
            "63": 0.14119,
            "64": 0.13937,
            "65": 0.13725,
            "66": 0.13667,
            "67": 0.13894,
            "68": 0.13705,
            "69": 0.1375,
            "70": 0.13655,
            "71": 0.13624,
            "72": 0.13743,
            "73": 0.13786,
            "74": 0.13678,
            "75": 0.13803,
            "76": 0.13591,
            "77": 0.13654,
            "78": 0.13783,
            "79": 0.13724,
            "80": 0.13943,
            "81": 0.13808,
            "82": 0.13899,
            "83": 0.13956,
            "84": 0.14004,
            "85": 0.14504,
            "86": 0.14078,
            "87": 0.14075,
            "88": 0.14222,
            "89": 0.14283,
            "90": 0.14178,
            "91": 0.14143,
            "92": 0.14178,
            "93": 0.14108,
            "94": 0.14248,
            "95": 0.14123,
            "96": 0.14274,
            "97": 0.14429,
            "98": 0.14312,
            "99": 0.14121,
            "100": 0.14248
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.76985,
            "2": 10.81791,
            "3": 10.78402,
            "4": 10.78796,
            "5": 10.81924,
            "6": 10.84305,
            "7": 10.83461,
            "8": 10.80657,
            "9": 10.83362,
            "10": 10.73563,
            "11": 10.86814,
            "12": 10.85077,
            "13": 10.84503,
            "14": 10.87135,
            "15": 10.82179,
            "16": 10.80434,
            "17": 10.76124,
            "18": 10.80358,
            "19": 10.80593,
            "20": 10.74748,
            "21": 10.72537,
            "22": 10.60596,
            "23": 10.74384,
            "24": 10.65548,
            "25": 10.58002,
            "26": 10.64493,
            "27": 10.67189,
            "28": 10.66906,
            "29": 10.6666,
            "30": 10.46943,
            "31": 10.26264,
            "32": 10.56935,
            "33": 10.54231,
            "34": 10.36113,
            "35": 10.39552,
            "36": 10.36868,
            "37": 10.47523,
            "38": 10.33713,
            "39": 10.49939,
            "40": 10.23017,
            "41": 10.30906,
            "42": 10.33123,
            "43": 9.99093,
            "44": 10.09605,
            "45": 10.00785,
            "46": 9.96712,
            "47": 10.27069,
            "48": 10.0104,
            "49": 9.73436,
            "50": 10.04737,
            "51": 10.00084,
            "52": 9.89675,
            "53": 10.1988,
            "54": 10.09063,
            "55": 10.00569,
            "56": 9.77206,
            "57": 9.6453,
            "58": 9.98586,
            "59": 9.72612,
            "60": 9.67771,
            "61": 9.81568,
            "62": 10.09202,
            "63": 9.54764,
            "64": 9.90442,
            "65": 9.09488,
            "66": 9.84066,
            "67": 9.48471,
            "68": 9.88998,
            "69": 9.87694,
            "70": 9.85293,
            "71": 9.73276,
            "72": 9.72559,
            "73": 9.63702,
            "74": 9.12336,
            "75": 9.55337,
            "76": 9.21763,
            "77": 10.15204,
            "78": 9.81466,
            "79": 9.47559,
            "80": 9.52071,
            "81": 9.58719,
            "82": 9.79124,
            "83": 9.4485,
            "84": 9.49586,
            "85": 9.7219,
            "86": 9.18037,
            "87": 9.66127,
            "88": 9.84359,
            "89": 9.7165,
            "90": 9.88101,
            "91": 9.48434,
            "92": 9.47046,
            "93": 9.20912,
            "94": 8.95381,
            "95": 9.60555,
            "96": 9.63976,
            "97": 9.38764,
            "98": 9.75733,
            "99": 9.01588,
            "100": 9.49924
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2805.0,
            "2": 2569.0,
            "3": 2669.0,
            "4": 2452.0,
            "5": 2817.0,
            "6": 2939.0,
            "7": 2784.0,
            "8": 2559.0,
            "9": 2824.0,
            "10": 2575.0,
            "11": 2911.0,
            "12": 2689.0,
            "13": 2988.0,
            "14": 2921.0,
            "15": 2693.0,
            "16": 3037.0,
            "17": 2638.0,
            "18": 2916.0,
            "19": 2863.0,
            "20": 2451.0,
            "21": 2687.0,
            "22": 2462.0,
            "23": 2777.0,
            "24": 2644.0,
            "25": 2496.0,
            "26": 2722.0,
            "27": 2758.0,
            "28": 2832.0,
            "29": 3013.0,
            "30": 2558.0,
            "31": 2303.0,
            "32": 2665.0,
            "33": 2780.0,
            "34": 2366.0,
            "35": 2532.0,
            "36": 2537.0,
            "37": 2777.0,
            "38": 2690.0,
            "39": 2843.0,
            "40": 2605.0,
            "41": 2697.0,
            "42": 2686.0,
            "43": 2317.0,
            "44": 2484.0,
            "45": 2297.0,
            "46": 2343.0,
            "47": 2613.0,
            "48": 2414.0,
            "49": 2381.0,
            "50": 2751.0,
            "51": 2584.0,
            "52": 2575.0,
            "53": 2857.0,
            "54": 2756.0,
            "55": 2531.0,
            "56": 2544.0,
            "57": 2484.0,
            "58": 2815.0,
            "59": 2564.0,
            "60": 2263.0,
            "61": 2645.0,
            "62": 2824.0,
            "63": 2589.0,
            "64": 3010.0,
            "65": 2664.0,
            "66": 2928.0,
            "67": 2609.0,
            "68": 2747.0,
            "69": 2832.0,
            "70": 3003.0,
            "71": 2741.0,
            "72": 2483.0,
            "73": 2809.0,
            "74": 2131.0,
            "75": 2673.0,
            "76": 3018.0,
            "77": 3023.0,
            "78": 3182.0,
            "79": 3251.0,
            "80": 3075.0,
            "81": 3372.0,
            "82": 3352.0,
            "83": 2607.0,
            "84": 3299.0,
            "85": 3175.0,
            "86": 3061.0,
            "87": 3470.0,
            "88": 3331.0,
            "89": 3464.0,
            "90": 3438.0,
            "91": 2435.0,
            "92": 2979.0,
            "93": 2983.0,
            "94": 2953.0,
            "95": 3119.0,
            "96": 2879.0,
            "97": 3018.0,
            "98": 3305.0,
            "99": 2999.0,
            "100": 2710.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 745732608.0,
            "2": 745732608.0,
            "3": 745732608.0,
            "4": 745732608.0,
            "5": 745732608.0,
            "6": 745732608.0,
            "7": 745732608.0,
            "8": 745732608.0,
            "9": 745732608.0,
            "10": 745732608.0,
            "11": 745732608.0,
            "12": 745732608.0,
            "13": 745732608.0,
            "14": 745732608.0,
            "15": 745732608.0,
            "16": 745732608.0,
            "17": 745732608.0,
            "18": 745732608.0,
            "19": 745732608.0,
            "20": 745732608.0,
            "21": 745732608.0,
            "22": 745732608.0,
            "23": 745732608.0,
            "24": 745732608.0,
            "25": 745732608.0,
            "26": 745732608.0,
            "27": 745732608.0,
            "28": 745732608.0,
            "29": 745732608.0,
            "30": 745732608.0,
            "31": 745732608.0,
            "32": 745732608.0,
            "33": 745732608.0,
            "34": 745732608.0,
            "35": 745732608.0,
            "36": 745732608.0,
            "37": 745732608.0,
            "38": 745732608.0,
            "39": 745732608.0,
            "40": 745732608.0,
            "41": 745732608.0,
            "42": 745732608.0,
            "43": 745732608.0,
            "44": 745732608.0,
            "45": 745732608.0,
            "46": 745732608.0,
            "47": 745732608.0,
            "48": 745732608.0,
            "49": 745732608.0,
            "50": 745732608.0,
            "51": 745732608.0,
            "52": 745732608.0,
            "53": 745732608.0,
            "54": 745732608.0,
            "55": 745732608.0,
            "56": 745732608.0,
            "57": 745732608.0,
            "58": 745732608.0,
            "59": 745732608.0,
            "60": 745732608.0,
            "61": 745732608.0,
            "62": 745732608.0,
            "63": 745732608.0,
            "64": 745732608.0,
            "65": 745732608.0,
            "66": 745732608.0,
            "67": 745732608.0,
            "68": 745732608.0,
            "69": 745732608.0,
            "70": 745732608.0,
            "71": 745732608.0,
            "72": 745732608.0,
            "73": 745732608.0,
            "74": 745732608.0,
            "75": 745732608.0,
            "76": 745732608.0,
            "77": 745732608.0,
            "78": 745732608.0,
            "79": 745732608.0,
            "80": 745732608.0,
            "81": 745732608.0,
            "82": 745732608.0,
            "83": 745732608.0,
            "84": 745732608.0,
            "85": 745732608.0,
            "86": 745732608.0,
            "87": 745732608.0,
            "88": 745732608.0,
            "89": 745732608.0,
            "90": 745732608.0,
            "91": 745732608.0,
            "92": 745732608.0,
            "93": 745732608.0,
            "94": 745732608.0,
            "95": 745732608.0,
            "96": 745732608.0,
            "97": 745732608.0,
            "98": 745732608.0,
            "99": 745732608.0,
            "100": 745732608.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1927859200.0,
            "2": 2211485184.0,
            "3": 2211485184.0,
            "4": 2211485184.0,
            "5": 2211485184.0,
            "6": 2211485184.0,
            "7": 2211485184.0,
            "8": 2211485184.0,
            "9": 2211485184.0,
            "10": 2211485184.0,
            "11": 2211485184.0,
            "12": 2211485184.0,
            "13": 2211485184.0,
            "14": 2211485184.0,
            "15": 2211485184.0,
            "16": 2211485184.0,
            "17": 2211485184.0,
            "18": 2211485184.0,
            "19": 2211485184.0,
            "20": 2211485184.0,
            "21": 2211485184.0,
            "22": 2211485184.0,
            "23": 2211485184.0,
            "24": 2211485184.0,
            "25": 2211485184.0,
            "26": 2211485184.0,
            "27": 2211485184.0,
            "28": 2211485184.0,
            "29": 2211485184.0,
            "30": 2211485184.0,
            "31": 2211485184.0,
            "32": 2211485184.0,
            "33": 2211485184.0,
            "34": 2211485184.0,
            "35": 2211485184.0,
            "36": 2211485184.0,
            "37": 2211485184.0,
            "38": 2211485184.0,
            "39": 2211485184.0,
            "40": 2211485184.0,
            "41": 2211485184.0,
            "42": 2211485184.0,
            "43": 2211485184.0,
            "44": 2211485184.0,
            "45": 2211485184.0,
            "46": 2211485184.0,
            "47": 2211485184.0,
            "48": 2211485184.0,
            "49": 2211485184.0,
            "50": 2211485184.0,
            "51": 2211485184.0,
            "52": 2211485184.0,
            "53": 2211485184.0,
            "54": 2211485184.0,
            "55": 2211485184.0,
            "56": 2211485184.0,
            "57": 2211485184.0,
            "58": 2211485184.0,
            "59": 2211485184.0,
            "60": 2211485184.0,
            "61": 2211485184.0,
            "62": 2211485184.0,
            "63": 2211485184.0,
            "64": 2211485184.0,
            "65": 2211485184.0,
            "66": 2211485184.0,
            "67": 2211485184.0,
            "68": 2211485184.0,
            "69": 2211485184.0,
            "70": 2211485184.0,
            "71": 2211485184.0,
            "72": 2211485184.0,
            "73": 2211485184.0,
            "74": 2211485184.0,
            "75": 2211485184.0,
            "76": 2211485184.0,
            "77": 2211485184.0,
            "78": 2211485184.0,
            "79": 2211485184.0,
            "80": 2211485184.0,
            "81": 2211485184.0,
            "82": 2211485184.0,
            "83": 2211485184.0,
            "84": 2211485184.0,
            "85": 2211485184.0,
            "86": 2211485184.0,
            "87": 2211485184.0,
            "88": 2211485184.0,
            "89": 2211485184.0,
            "90": 2211485184.0,
            "91": 2211485184.0,
            "92": 2211485184.0,
            "93": 2211485184.0,
            "94": 2211485184.0,
            "95": 2211485184.0,
            "96": 2211485184.0,
            "97": 2211485184.0,
            "98": 2211485184.0,
            "99": 2211485184.0,
            "100": 2211485184.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 6.67128,
            "3": 0.11715,
            "4": 0.10264,
            "5": 0.10169,
            "6": 0.10189,
            "7": 0.10092,
            "8": 0.09811,
            "9": 0.09543,
            "10": 0.09613,
            "11": 0.09543,
            "12": 0.09592,
            "13": 0.09622,
            "14": 0.09609,
            "15": 0.09586,
            "16": 0.09588,
            "17": 0.09644,
            "18": 0.09588,
            "19": 0.09609,
            "20": 0.09593,
            "21": 0.09624,
            "22": 0.09621,
            "23": 0.09548,
            "24": 0.09544,
            "25": 0.09527,
            "26": 0.09554,
            "27": 0.09553,
            "28": 0.09493,
            "29": 0.09537,
            "30": 0.09579,
            "31": 0.09786,
            "32": 0.09613,
            "33": 0.09742,
            "34": 0.09746,
            "35": 0.09803,
            "36": 0.09784,
            "37": 0.09789,
            "38": 0.09536,
            "39": 0.09891,
            "40": 0.0955,
            "41": 0.09548,
            "42": 0.09562,
            "43": 0.09684,
            "44": 0.09596,
            "45": 0.09549,
            "46": 0.09634,
            "47": 0.09586,
            "48": 0.09609,
            "49": 0.09592,
            "50": 0.09588,
            "51": 0.10923,
            "52": 0.09944,
            "53": 0.09884,
            "54": 0.09808,
            "55": 0.09806,
            "56": 0.09755,
            "57": 0.09795,
            "58": 0.09523,
            "59": 0.0943,
            "60": 0.0944,
            "61": 0.09522,
            "62": 0.09461,
            "63": 0.09584,
            "64": 0.09662,
            "65": 0.10008,
            "66": 0.09542,
            "67": 0.0954,
            "68": 0.09384,
            "69": 0.09458,
            "70": 0.09508,
            "71": 0.09433,
            "72": 0.0945,
            "73": 0.09505,
            "74": 0.09509,
            "75": 0.09513,
            "76": 0.09549,
            "77": 0.1005,
            "78": 0.1006,
            "79": 0.10067,
            "80": 0.10033,
            "81": 0.10038,
            "82": 0.10023,
            "83": 0.09673,
            "84": 0.0947,
            "85": 0.09569,
            "86": 0.09555,
            "87": 0.09437,
            "88": 0.09428,
            "89": 0.09456,
            "90": 0.09449,
            "91": 0.09443,
            "92": 0.09552,
            "93": 0.09456,
            "94": 0.09497,
            "95": 0.09414,
            "96": 0.09502,
            "97": 0.09534,
            "98": 0.09476,
            "99": 0.09577,
            "100": 0.09533
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 10.00084,
            "52": 9.89672,
            "53": 10.19876,
            "54": 10.09066,
            "55": 10.00567,
            "56": 9.77199,
            "57": 9.64533,
            "58": 9.98587,
            "59": 9.72608,
            "60": 9.6777,
            "61": 9.8157,
            "62": 10.092,
            "63": 9.54758,
            "64": 9.90438,
            "65": 9.09492,
            "66": 9.84068,
            "67": 9.48471,
            "68": 9.88996,
            "69": 9.87691,
            "70": 9.85294,
            "71": 9.73278,
            "72": 9.72558,
            "73": 9.63706,
            "74": 9.12334,
            "75": 9.55335,
            "76": 9.21765,
            "77": 10.15202,
            "78": 9.81465,
            "79": 9.47558,
            "80": 9.52073,
            "81": 9.5872,
            "82": 9.79125,
            "83": 9.44848,
            "84": 9.49585,
            "85": 9.72189,
            "86": 9.18037,
            "87": 9.66127,
            "88": 9.84359,
            "89": 9.71651,
            "90": 9.88102,
            "91": 9.48434,
            "92": 9.4705,
            "93": 9.20911,
            "94": 8.95382,
            "95": 9.60554,
            "96": 9.63976,
            "97": 9.38762,
            "98": 9.7573,
            "99": 9.0159,
            "100": 9.49925
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2575.0,
            "52": 2621.0,
            "53": 2891.0,
            "54": 2655.0,
            "55": 2559.0,
            "56": 2566.0,
            "57": 2471.0,
            "58": 2767.0,
            "59": 2529.0,
            "60": 2289.0,
            "61": 2642.0,
            "62": 2820.0,
            "63": 2654.0,
            "64": 3020.0,
            "65": 2687.0,
            "66": 2884.0,
            "67": 2666.0,
            "68": 2720.0,
            "69": 2738.0,
            "70": 3004.0,
            "71": 2816.0,
            "72": 2537.0,
            "73": 2826.0,
            "74": 2192.0,
            "75": 2647.0,
            "76": 3048.0,
            "77": 3019.0,
            "78": 3134.0,
            "79": 3092.0,
            "80": 3054.0,
            "81": 3298.0,
            "82": 3350.0,
            "83": 2597.0,
            "84": 3436.0,
            "85": 3350.0,
            "86": 2993.0,
            "87": 3509.0,
            "88": 3403.0,
            "89": 3490.0,
            "90": 3368.0,
            "91": 2461.0,
            "92": 2803.0,
            "93": 2933.0,
            "94": 2888.0,
            "95": 3138.0,
            "96": 3047.0,
            "97": 3016.0,
            "98": 3382.0,
            "99": 2995.0,
            "100": 2490.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 745929216.0,
            "52": 745929216.0,
            "53": 745929216.0,
            "54": 745929216.0,
            "55": 745929216.0,
            "56": 745929216.0,
            "57": 745929216.0,
            "58": 745929216.0,
            "59": 745929216.0,
            "60": 745929216.0,
            "61": 745929216.0,
            "62": 745929216.0,
            "63": 745929216.0,
            "64": 745929216.0,
            "65": 745929216.0,
            "66": 745929216.0,
            "67": 745929216.0,
            "68": 745929216.0,
            "69": 745929216.0,
            "70": 745929216.0,
            "71": 745929216.0,
            "72": 745929216.0,
            "73": 745929216.0,
            "74": 745929216.0,
            "75": 745929216.0,
            "76": 745929216.0,
            "77": 745929216.0,
            "78": 745929216.0,
            "79": 745929216.0,
            "80": 745929216.0,
            "81": 745929216.0,
            "82": 745929216.0,
            "83": 745929216.0,
            "84": 745929216.0,
            "85": 745929216.0,
            "86": 745929216.0,
            "87": 745929216.0,
            "88": 745929216.0,
            "89": 745929216.0,
            "90": 745929216.0,
            "91": 745929216.0,
            "92": 745929216.0,
            "93": 745929216.0,
            "94": 745929216.0,
            "95": 745929216.0,
            "96": 745929216.0,
            "97": 745929216.0,
            "98": 745929216.0,
            "99": 745929216.0,
            "100": 745929216.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2209847296.0,
            "52": 2209848320.0,
            "53": 2209848320.0,
            "54": 2209848320.0,
            "55": 2209848320.0,
            "56": 2209848320.0,
            "57": 2209848320.0,
            "58": 2209848320.0,
            "59": 2209848320.0,
            "60": 2209848320.0,
            "61": 2209848320.0,
            "62": 2209848320.0,
            "63": 2209848320.0,
            "64": 2209848320.0,
            "65": 2209848320.0,
            "66": 2209848320.0,
            "67": 2209848320.0,
            "68": 2209848320.0,
            "69": 2209848320.0,
            "70": 2209848320.0,
            "71": 2209848320.0,
            "72": 2209848320.0,
            "73": 2209848320.0,
            "74": 2209848320.0,
            "75": 2209848320.0,
            "76": 2209848320.0,
            "77": 2209848320.0,
            "78": 2209848320.0,
            "79": 2209848320.0,
            "80": 2209848320.0,
            "81": 2209848320.0,
            "82": 2209848320.0,
            "83": 2209848320.0,
            "84": 2209848320.0,
            "85": 2209848320.0,
            "86": 2209848320.0,
            "87": 2209848320.0,
            "88": 2209848320.0,
            "89": 2209848320.0,
            "90": 2209848320.0,
            "91": 2209848320.0,
            "92": 2209848320.0,
            "93": 2209848320.0,
            "94": 2209848320.0,
            "95": 2209848320.0,
            "96": 2209848320.0,
            "97": 2209848320.0,
            "98": 2209848320.0,
            "99": 2209848320.0,
            "100": 2209848320.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 37.2947,
            "52": 0.14072,
            "53": 0.09482,
            "54": 0.09404,
            "55": 0.09449,
            "56": 0.09381,
            "57": 0.09346,
            "58": 0.09378,
            "59": 0.095,
            "60": 0.09392,
            "61": 0.09499,
            "62": 0.09499,
            "63": 0.09735,
            "64": 0.10206,
            "65": 0.09653,
            "66": 0.09566,
            "67": 0.09553,
            "68": 0.09405,
            "69": 0.09463,
            "70": 0.09396,
            "71": 0.09424,
            "72": 0.0967,
            "73": 0.09895,
            "74": 0.09633,
            "75": 0.0965,
            "76": 0.09665,
            "77": 0.10127,
            "78": 0.10066,
            "79": 0.10529,
            "80": 0.10669,
            "81": 0.10018,
            "82": 0.09658,
            "83": 0.09504,
            "84": 0.0941,
            "85": 0.09377,
            "86": 0.09642,
            "87": 0.09327,
            "88": 0.09416,
            "89": 0.09453,
            "90": 0.09434,
            "91": 0.09472,
            "92": 0.09416,
            "93": 0.09427,
            "94": 0.09459,
            "95": 0.09437,
            "96": 0.09352,
            "97": 0.09986,
            "98": 0.09365,
            "99": 0.09441,
            "100": 0.094
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.74049, "5": 10.79201, "10": 10.71088, "15": 10.76031, "20": 10.6891, "25": 10.54338, "30": 10.4542, "35": 10.38324, "40": 10.24296, "45": 9.9834, "50": 10.06865, "55": 9.98923, "60": 9.66705, "65": 9.07241, "70": 9.81879, "75": 9.55274, "80": 9.51057, "85": 9.70756, "90": 9.87997, "95": 9.60068, "100": 9.49262}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2527.0, "5": 2875.0, "10": 2475.0, "15": 2508.0, "20": 2650.0, "25": 2392.0, "30": 2484.0, "35": 2573.0, "40": 2559.0, "45": 2519.0, "50": 2500.0, "55": 2430.0, "60": 2191.0, "65": 2646.0, "70": 3203.0, "75": 2515.0, "80": 3140.0, "85": 3195.0, "90": 3365.0, "95": 3240.0, "100": 2507.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 715322368.0, "5": 715322368.0, "10": 715322368.0, "15": 715322368.0, "20": 715322368.0, "25": 715322368.0, "30": 715322368.0, "35": 715322368.0, "40": 715322368.0, "45": 715322368.0, "50": 715322368.0, "55": 715322368.0, "60": 715322368.0, "65": 715322368.0, "70": 715322368.0, "75": 715322368.0, "80": 715322368.0, "85": 715322368.0, "90": 715322368.0, "95": 715322368.0, "100": 715322368.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2402991104.0, "5": 2683341824.0, "10": 2683341824.0, "15": 2683341824.0, "20": 2683341824.0, "25": 2683341824.0, "30": 2683341824.0, "35": 2683341824.0, "40": 2683341824.0, "45": 2683341824.0, "50": 2683341824.0, "55": 2683341824.0, "60": 2683341824.0, "65": 2683341824.0, "70": 2683341824.0, "75": 2683341824.0, "80": 2683341824.0, "85": 2683341824.0, "90": 2683341824.0, "95": 2683341824.0, "100": 2683341824.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 17.72945, "5": 0.17021, "10": 0.17171, "15": 0.16943, "20": 0.16974, "25": 0.16893, "30": 0.16891, "35": 0.16944, "40": 0.16851, "45": 0.16916, "50": 0.16874, "55": 0.17286, "60": 0.17328, "65": 0.17679, "70": 0.17323, "75": 0.1706, "80": 0.17149, "85": 0.17726, "90": 0.171, "95": 0.17074, "100": 0.17122}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --swiglu: true
  --ckpt-fully-parallel-load: true
  --async-save: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: ckpt-resume


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.90314, "5": 10.89447, "10": 10.8849, "15": 10.83878, "20": 10.74074, "25": 10.57467, "30": 10.37766, "35": 10.29035, "40": 10.11492, "45": 9.85681, "50": 9.93469, "55": 9.8756, "60": 9.52968, "65": 8.95119, "70": 9.7662, "75": 9.43087, "80": 9.40849, "85": 9.63373, "90": 9.85164, "95": 9.51809, "100": 9.43285}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 22727000.0, "5": 22714814.0, "10": 22918360.0, "15": 22821260.0, "20": 22694352.0, "25": 22818970.0, "30": 22631432.0, "35": 22788032.0, "40": 22658064.0, "45": 22675272.0, "50": 22905284.0, "55": 22518416.0, "60": 22743058.0, "65": 23060212.0, "70": 22829348.0, "75": 23053922.0, "80": 22706236.0, "85": 22712004.0, "90": 22972420.0, "95": 23048514.0, "100": 23016268.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 719180288.0, "5": 719180288.0, "10": 719180288.0, "15": 719180288.0, "20": 719180288.0, "25": 719180288.0, "30": 719180288.0, "35": 719180288.0, "40": 719180288.0, "45": 719180288.0, "50": 719180288.0, "55": 719180288.0, "60": 719180288.0, "65": 719180288.0, "70": 719180288.0, "75": 719180288.0, "80": 719180288.0, "85": 719180288.0, "90": 719180288.0, "95": 719180288.0, "100": 719180288.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1910562816.0, "5": 2195420160.0, "10": 2195420160.0, "15": 2195420160.0, "20": 2195420160.0, "25": 2195420160.0, "30": 2195420160.0, "35": 2195420160.0, "40": 2195420160.0, "45": 2195420160.0, "50": 2195420160.0, "55": 2195420160.0, "60": 2195420160.0, "65": 2195420160.0, "70": 2195420160.0, "75": 2195420160.0, "80": 2195420160.0, "85": 2195420160.0, "90": 2195420160.0, "95": 2195420160.0, "100": 2195420160.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 7.4709, "5": 0.19918, "10": 0.1995, "15": 0.19813, "20": 0.19759, "25": 0.1991, "30": 0.1979, "35": 0.19749, "40": 0.1961, "45": 0.19564, "50": 0.19594, "55": 0.19603, "60": 0.19543, "65": 0.19622, "70": 0.19626, "75": 0.19476, "80": 0.19489, "85": 0.19641, "90": 0.19715, "95": 0.1969, "100": 0.19594}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.97513,
            "2": 10.97995,
            "3": 10.98066,
            "4": 10.99791,
            "5": 10.96412,
            "6": 10.95966,
            "7": 10.97622,
            "8": 10.97531,
            "9": 10.97506,
            "10": 10.97665,
            "11": 10.92846,
            "12": 10.9494,
            "13": 10.94009,
            "14": 10.93747,
            "15": 10.92917,
            "16": 10.91904,
            "17": 10.90495,
            "18": 10.89425,
            "19": 10.89215,
            "20": 10.81808,
            "21": 10.7816,
            "22": 10.70813,
            "23": 10.7819,
            "24": 10.69774,
            "25": 10.66245,
            "26": 10.69992,
            "27": 10.68419,
            "28": 10.62061,
            "29": 10.62277,
            "30": 10.45367,
            "31": 10.24899,
            "32": 10.52222,
            "33": 10.51211,
            "34": 10.30154,
            "35": 10.34384,
            "36": 10.30677,
            "37": 10.38891,
            "38": 10.24857,
            "39": 10.44177,
            "40": 10.16246,
            "41": 10.20434,
            "42": 10.26319,
            "43": 9.9082,
            "44": 10.01995,
            "45": 9.91152,
            "46": 9.886,
            "47": 10.18408,
            "48": 9.9033,
            "49": 9.59959,
            "50": 9.96198,
            "51": 9.90259,
            "52": 9.79281,
            "53": 10.11536,
            "54": 9.99216,
            "55": 9.91665,
            "56": 9.66015,
            "57": 9.52038,
            "58": 9.87094,
            "59": 9.6209,
            "60": 9.54952,
            "61": 9.70012,
            "62": 10.00629,
            "63": 9.42168,
            "64": 9.79893,
            "65": 8.97548,
            "66": 9.73165,
            "67": 9.38933,
            "68": 9.80066,
            "69": 9.81152,
            "70": 9.76761,
            "71": 9.63356,
            "72": 9.59892,
            "73": 9.51708,
            "74": 8.96512,
            "75": 9.43589,
            "76": 9.11207,
            "77": 10.06881,
            "78": 9.72515,
            "79": 9.39985,
            "80": 9.41154,
            "81": 9.50094,
            "82": 9.69861,
            "83": 9.33578,
            "84": 9.4341,
            "85": 9.63907,
            "86": 9.06166,
            "87": 9.60563,
            "88": 9.77626,
            "89": 9.6243,
            "90": 9.82766,
            "91": 9.35869,
            "92": 9.38066,
            "93": 9.09681,
            "94": 8.83995,
            "95": 9.52751,
            "96": 9.53562,
            "97": 9.32689,
            "98": 9.69354,
            "99": 8.88933,
            "100": 9.42104
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 22726972.0,
            "2": 22924386.0,
            "3": 22597036.0,
            "4": 23219218.0,
            "5": 22714492.0,
            "6": 23021698.0,
            "7": 22771376.0,
            "8": 22926820.0,
            "9": 22841276.0,
            "10": 22918392.0,
            "11": 22500620.0,
            "12": 22459672.0,
            "13": 22917468.0,
            "14": 22388398.0,
            "15": 22822252.0,
            "16": 22830612.0,
            "17": 22820228.0,
            "18": 22582844.0,
            "19": 22618412.0,
            "20": 22693594.0,
            "21": 22739320.0,
            "22": 22800076.0,
            "23": 22539112.0,
            "24": 22770966.0,
            "25": 22819404.0,
            "26": 22548188.0,
            "27": 22468652.0,
            "28": 22453560.0,
            "29": 22530344.0,
            "30": 22630776.0,
            "31": 22955664.0,
            "32": 22585020.0,
            "33": 22558760.0,
            "34": 22835536.0,
            "35": 22787790.0,
            "36": 22589526.0,
            "37": 22497640.0,
            "38": 22896056.0,
            "39": 22802282.0,
            "40": 22657698.0,
            "41": 22659592.0,
            "42": 22666980.0,
            "43": 22976392.0,
            "44": 22747128.0,
            "45": 22674364.0,
            "46": 22883920.0,
            "47": 22634300.0,
            "48": 22928164.0,
            "49": 22728710.0,
            "50": 22904340.0,
            "51": 22791436.0,
            "52": 22748292.0,
            "53": 22924772.0,
            "54": 22840284.0,
            "55": 22517880.0,
            "56": 22877730.0,
            "57": 23113080.0,
            "58": 22845568.0,
            "59": 22716022.0,
            "60": 22743056.0,
            "61": 22724434.0,
            "62": 22672316.0,
            "63": 22846416.0,
            "64": 22823178.0,
            "65": 23061654.0,
            "66": 22729712.0,
            "67": 22908434.0,
            "68": 22610444.0,
            "69": 22584604.0,
            "70": 22828526.0,
            "71": 22748442.0,
            "72": 22655052.0,
            "73": 22740588.0,
            "74": 23048316.0,
            "75": 23054664.0,
            "76": 22901072.0,
            "77": 22272198.0,
            "78": 22789244.0,
            "79": 22743700.0,
            "80": 22706576.0,
            "81": 22890704.0,
            "82": 22778282.0,
            "83": 22840256.0,
            "84": 23010368.0,
            "85": 22711796.0,
            "86": 23103236.0,
            "87": 22735120.0,
            "88": 22636998.0,
            "89": 22498612.0,
            "90": 22972652.0,
            "91": 22767776.0,
            "92": 22809424.0,
            "93": 22658980.0,
            "94": 22911920.0,
            "95": 23047890.0,
            "96": 22828804.0,
            "97": 22608196.0,
            "98": 22762820.0,
            "99": 22906714.0,
            "100": 23016048.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 748541440.0,
            "2": 748541440.0,
            "3": 748541440.0,
            "4": 748541440.0,
            "5": 748541440.0,
            "6": 748541440.0,
            "7": 748541440.0,
            "8": 748541440.0,
            "9": 748541440.0,
            "10": 748541440.0,
            "11": 748541440.0,
            "12": 748541440.0,
            "13": 748541440.0,
            "14": 748541440.0,
            "15": 748541440.0,
            "16": 748541440.0,
            "17": 748541440.0,
            "18": 748541440.0,
            "19": 748541440.0,
            "20": 748541440.0,
            "21": 748541440.0,
            "22": 748541440.0,
            "23": 748541440.0,
            "24": 748541440.0,
            "25": 748541440.0,
            "26": 748541440.0,
            "27": 748541440.0,
            "28": 748541440.0,
            "29": 748541440.0,
            "30": 748541440.0,
            "31": 748541440.0,
            "32": 748541440.0,
            "33": 748541440.0,
            "34": 748541440.0,
            "35": 748541440.0,
            "36": 748541440.0,
            "37": 748541440.0,
            "38": 748541440.0,
            "39": 748541440.0,
            "40": 748541440.0,
            "41": 748541440.0,
            "42": 748541440.0,
            "43": 748541440.0,
            "44": 748541440.0,
            "45": 748541440.0,
            "46": 748541440.0,
            "47": 748541440.0,
            "48": 748541440.0,
            "49": 748541440.0,
            "50": 748541440.0,
            "51": 748541440.0,
            "52": 748541440.0,
            "53": 748541440.0,
            "54": 748541440.0,
            "55": 748541440.0,
            "56": 748541440.0,
            "57": 748541440.0,
            "58": 748541440.0,
            "59": 748541440.0,
            "60": 748541440.0,
            "61": 748541440.0,
            "62": 748541440.0,
            "63": 748541440.0,
            "64": 748541440.0,
            "65": 748541440.0,
            "66": 748541440.0,
            "67": 748541440.0,
            "68": 748541440.0,
            "69": 748541440.0,
            "70": 748541440.0,
            "71": 748541440.0,
            "72": 748541440.0,
            "73": 748541440.0,
            "74": 748541440.0,
            "75": 748541440.0,
            "76": 748541440.0,
            "77": 748541440.0,
            "78": 748541440.0,
            "79": 748541440.0,
            "80": 748541440.0,
            "81": 748541440.0,
            "82": 748541440.0,
            "83": 748541440.0,
            "84": 748541440.0,
            "85": 748541440.0,
            "86": 748541440.0,
            "87": 748541440.0,
            "88": 748541440.0,
            "89": 748541440.0,
            "90": 748541440.0,
            "91": 748541440.0,
            "92": 748541440.0,
            "93": 748541440.0,
            "94": 748541440.0,
            "95": 748541440.0,
            "96": 748541440.0,
            "97": 748541440.0,
            "98": 748541440.0,
            "99": 748541440.0,
            "100": 748541440.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1939923968.0,
            "2": 2224781312.0,
            "3": 2224781312.0,
            "4": 2224781312.0,
            "5": 2224781312.0,
            "6": 2224781312.0,
            "7": 2224781312.0,
            "8": 2224781312.0,
            "9": 2224781312.0,
            "10": 2224781312.0,
            "11": 2224781312.0,
            "12": 2224781312.0,
            "13": 2224781312.0,
            "14": 2224781312.0,
            "15": 2224781312.0,
            "16": 2224781312.0,
            "17": 2224781312.0,
            "18": 2224781312.0,
            "19": 2224781312.0,
            "20": 2224781312.0,
            "21": 2224781312.0,
            "22": 2224781312.0,
            "23": 2224781312.0,
            "24": 2224781312.0,
            "25": 2224781312.0,
            "26": 2224781312.0,
            "27": 2224781312.0,
            "28": 2224781312.0,
            "29": 2224781312.0,
            "30": 2224781312.0,
            "31": 2224781312.0,
            "32": 2224781312.0,
            "33": 2224781312.0,
            "34": 2224781312.0,
            "35": 2224781312.0,
            "36": 2224781312.0,
            "37": 2224781312.0,
            "38": 2224781312.0,
            "39": 2224781312.0,
            "40": 2224781312.0,
            "41": 2224781312.0,
            "42": 2224781312.0,
            "43": 2224781312.0,
            "44": 2224781312.0,
            "45": 2224781312.0,
            "46": 2224781312.0,
            "47": 2224781312.0,
            "48": 2224781312.0,
            "49": 2224781312.0,
            "50": 2224781312.0,
            "51": 2224781312.0,
            "52": 2224781312.0,
            "53": 2224781312.0,
            "54": 2224781312.0,
            "55": 2224781312.0,
            "56": 2224781312.0,
            "57": 2224781312.0,
            "58": 2224781312.0,
            "59": 2224781312.0,
            "60": 2224781312.0,
            "61": 2224781312.0,
            "62": 2224781312.0,
            "63": 2224781312.0,
            "64": 2224781312.0,
            "65": 2224781312.0,
            "66": 2224781312.0,
            "67": 2224781312.0,
            "68": 2224781312.0,
            "69": 2224781312.0,
            "70": 2224781312.0,
            "71": 2224781312.0,
            "72": 2224781312.0,
            "73": 2224781312.0,
            "74": 2224781312.0,
            "75": 2224781312.0,
            "76": 2224781312.0,
            "77": 2224781312.0,
            "78": 2224781312.0,
            "79": 2224781312.0,
            "80": 2224781312.0,
            "81": 2224781312.0,
            "82": 2224781312.0,
            "83": 2224781312.0,
            "84": 2224781312.0,
            "85": 2224781312.0,
            "86": 2224781312.0,
            "87": 2224781312.0,
            "88": 2224781312.0,
            "89": 2224781312.0,
            "90": 2224781312.0,
            "91": 2224781312.0,
            "92": 2224781312.0,
            "93": 2224781312.0,
            "94": 2224781312.0,
            "95": 2224781312.0,
            "96": 2224781312.0,
            "97": 2224781312.0,
            "98": 2224781312.0,
            "99": 2224781312.0,
            "100": 2224781312.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 2.33774,
            "3": 0.14705,
            "4": 0.13192,
            "5": 0.13017,
            "6": 0.1292,
            "7": 0.13024,
            "8": 0.13032,
            "9": 0.12858,
            "10": 0.12948,
            "11": 0.12888,
            "12": 0.12888,
            "13": 0.12811,
            "14": 0.12943,
            "15": 0.12948,
            "16": 0.1295,
            "17": 0.13022,
            "18": 0.12847,
            "19": 0.12992,
            "20": 0.1308,
            "21": 0.12844,
            "22": 0.13063,
            "23": 0.13033,
            "24": 0.13003,
            "25": 0.12935,
            "26": 0.13016,
            "27": 0.12989,
            "28": 0.12947,
            "29": 0.12857,
            "30": 0.12949,
            "31": 0.12997,
            "32": 0.12843,
            "33": 0.1291,
            "34": 0.12894,
            "35": 0.13061,
            "36": 0.12974,
            "37": 0.12939,
            "38": 0.13039,
            "39": 0.13034,
            "40": 0.13069,
            "41": 0.13259,
            "42": 0.13109,
            "43": 0.13211,
            "44": 0.1299,
            "45": 0.1295,
            "46": 0.13001,
            "47": 0.13037,
            "48": 0.13043,
            "49": 0.13012,
            "50": 0.12915,
            "51": 0.14665,
            "52": 0.12869,
            "53": 0.12717,
            "54": 0.12709,
            "55": 0.12611,
            "56": 0.12645,
            "57": 0.12711,
            "58": 0.12728,
            "59": 0.1269,
            "60": 0.12701,
            "61": 0.1281,
            "62": 0.12781,
            "63": 0.12842,
            "64": 0.12745,
            "65": 0.12897,
            "66": 0.12786,
            "67": 0.12983,
            "68": 0.13068,
            "69": 0.1284,
            "70": 0.12896,
            "71": 0.1288,
            "72": 0.13026,
            "73": 0.13011,
            "74": 0.12891,
            "75": 0.12798,
            "76": 0.12866,
            "77": 0.12994,
            "78": 0.12957,
            "79": 0.12765,
            "80": 0.12884,
            "81": 0.12898,
            "82": 0.12927,
            "83": 0.12848,
            "84": 0.12845,
            "85": 0.12849,
            "86": 0.12983,
            "87": 0.1303,
            "88": 0.12961,
            "89": 0.13093,
            "90": 0.12951,
            "91": 0.12818,
            "92": 0.12902,
            "93": 0.12967,
            "94": 0.13419,
            "95": 0.14029,
            "96": 0.1429,
            "97": 0.14018,
            "98": 0.13632,
            "99": 0.14128,
            "100": 0.14034
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_gb200_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.90259,
            "52": 9.79281,
            "53": 10.11536,
            "54": 9.99216,
            "55": 9.91665,
            "56": 9.66015,
            "57": 9.52038,
            "58": 9.87094,
            "59": 9.6209,
            "60": 9.54952,
            "61": 9.70012,
            "62": 10.00629,
            "63": 9.42168,
            "64": 9.79893,
            "65": 8.97548,
            "66": 9.73165,
            "67": 9.38933,
            "68": 9.80066,
            "69": 9.81152,
            "70": 9.76761,
            "71": 9.63356,
            "72": 9.59892,
            "73": 9.51708,
            "74": 8.96512,
            "75": 9.43589,
            "76": 9.11207,
            "77": 10.06881,
            "78": 9.72515,
            "79": 9.39985,
            "80": 9.41154,
            "81": 9.50094,
            "82": 9.69861,
            "83": 9.33578,
            "84": 9.4341,
            "85": 9.63907,
            "86": 9.06166,
            "87": 9.60563,
            "88": 9.77626,
            "89": 9.6243,
            "90": 9.82766,
            "91": 9.35869,
            "92": 9.38066,
            "93": 9.09681,
            "94": 8.83995,
            "95": 9.52751,
            "96": 9.53562,
            "97": 9.32689,
            "98": 9.69354,
            "99": 8.88933,
            "100": 9.42104
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 22791436.0,
            "52": 22748292.0,
            "53": 22924772.0,
            "54": 22840284.0,
            "55": 22517880.0,
            "56": 22877730.0,
            "57": 23113080.0,
            "58": 22845568.0,
            "59": 22716022.0,
            "60": 22743056.0,
            "61": 22724434.0,
            "62": 22672316.0,
            "63": 22846416.0,
            "64": 22823178.0,
            "65": 23061654.0,
            "66": 22729712.0,
            "67": 22908434.0,
            "68": 22610444.0,
            "69": 22584604.0,
            "70": 22828526.0,
            "71": 22748442.0,
            "72": 22655052.0,
            "73": 22740588.0,
            "74": 23048316.0,
            "75": 23054664.0,
            "76": 22901072.0,
            "77": 22272198.0,
            "78": 22789244.0,
            "79": 22743700.0,
            "80": 22706576.0,
            "81": 22890704.0,
            "82": 22778282.0,
            "83": 22840256.0,
            "84": 23010368.0,
            "85": 22711796.0,
            "86": 23103236.0,
            "87": 22735120.0,
            "88": 22636998.0,
            "89": 22498612.0,
            "90": 22972652.0,
            "91": 22767776.0,
            "92": 22809424.0,
            "93": 22658980.0,
            "94": 22911920.0,
            "95": 23047890.0,
            "96": 22828804.0,
            "97": 22608196.0,
            "98": 22762820.0,
            "99": 22906714.0,
            "100": 23016048.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 746444288.0,
            "52": 746444288.0,
            "53": 746444288.0,
            "54": 746444288.0,
            "55": 746444288.0,
            "56": 746444288.0,
            "57": 746444288.0,
            "58": 746444288.0,
            "59": 746444288.0,
            "60": 746444288.0,
            "61": 746444288.0,
            "62": 746444288.0,
            "63": 746444288.0,
            "64": 746444288.0,
            "65": 746444288.0,
            "66": 746444288.0,
            "67": 746444288.0,
            "68": 746444288.0,
            "69": 746444288.0,
            "70": 746444288.0,
            "71": 746444288.0,
            "72": 746444288.0,
            "73": 746444288.0,
            "74": 746444288.0,
            "75": 746444288.0,
            "76": 746444288.0,
            "77": 746444288.0,
            "78": 746444288.0,
            "79": 746444288.0,
            "80": 746444288.0,
            "81": 746444288.0,
            "82": 746444288.0,
            "83": 746444288.0,
            "84": 746444288.0,
            "85": 746444288.0,
            "86": 746444288.0,
            "87": 746444288.0,
            "88": 746444288.0,
            "89": 746444288.0,
            "90": 746444288.0,
            "91": 746444288.0,
            "92": 746444288.0,
            "93": 746444288.0,
            "94": 746444288.0,
            "95": 746444288.0,
            "96": 746444288.0,
            "97": 746444288.0,
            "98": 746444288.0,
            "99": 746444288.0,
            "100": 746444288.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2223731712.0,
            "52": 2223732736.0,
            "53": 2223732736.0,
            "54": 2223732736.0,
            "55": 2223732736.0,
            "56": 2223732736.0,
            "57": 2223732736.0,
            "58": 2223732736.0,
            "59": 2223732736.0,
            "60": 2223732736.0,
            "61": 2223732736.0,
            "62": 2223732736.0,
            "63": 2223732736.0,
            "64": 2223732736.0,
            "65": 2223732736.0,
            "66": 2223732736.0,
            "67": 2223732736.0,
            "68": 2223732736.0,
            "69": 2223732736.0,
            "70": 2223732736.0,
            "71": 2223732736.0,
            "72": 2223732736.0,
            "73": 2223732736.0,
            "74": 2223732736.0,
            "75": 2223732736.0,
            "76": 2223732736.0,
            "77": 2223732736.0,
            "78": 2223732736.0,
            "79": 2223732736.0,
            "80": 2223732736.0,
            "81": 2223732736.0,
            "82": 2223732736.0,
            "83": 2223732736.0,
            "84": 2223732736.0,
            "85": 2223732736.0,
            "86": 2223732736.0,
            "87": 2223732736.0,
            "88": 2223732736.0,
            "89": 2223732736.0,
            "90": 2223732736.0,
            "91": 2223732736.0,
            "92": 2223732736.0,
            "93": 2223732736.0,
            "94": 2223732736.0,
            "95": 2223732736.0,
            "96": 2223732736.0,
            "97": 2223732736.0,
            "98": 2223732736.0,
            "99": 2223732736.0,
            "100": 2223732736.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": "nan",
            "52": 2.28424,
            "53": 0.15724,
            "54": 0.14436,
            "55": 0.14133,
            "56": 0.14939,
            "57": 0.15152,
            "58": 0.16555,
            "59": 0.19478,
            "60": 0.13288,
            "61": 0.13086,
            "62": 0.13088,
            "63": 0.13074,
            "64": 0.1303,
            "65": 0.13189,
            "66": 0.13138,
            "67": 0.12968,
            "68": 0.13118,
            "69": 0.13064,
            "70": 0.12931,
            "71": 0.12915,
            "72": 0.12915,
            "73": 0.13375,
            "74": 0.13641,
            "75": 0.13586,
            "76": 0.13551,
            "77": 0.13604,
            "78": 0.13931,
            "79": 0.13798,
            "80": 0.13724,
            "81": 0.13702,
            "82": 0.13663,
            "83": 0.1357,
            "84": 0.13618,
            "85": 0.13577,
            "86": 0.13569,
            "87": 0.13635,
            "88": 0.13659,
            "89": 0.13724,
            "90": 0.13599,
            "91": 0.13637,
            "92": 0.13565,
            "93": 0.13693,
            "94": 0.13576,
            "95": 0.13566,
            "96": 0.13579,
            "97": 0.13592,
            "98": 0.13631,
            "99": 0.13476,
            "100": 0.13606
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.91349,
            "2": 10.90719,
            "3": 10.91328,
            "4": 10.87838,
            "5": 10.91769,
            "6": 10.93821,
            "7": 10.90469,
            "8": 10.90393,
            "9": 10.90876,
            "10": 10.89645,
            "11": 10.92562,
            "12": 10.91891,
            "13": 10.91537,
            "14": 10.93343,
            "15": 10.86115,
            "16": 10.85374,
            "17": 10.82717,
            "18": 10.86544,
            "19": 10.86225,
            "20": 10.76737,
            "21": 10.74634,
            "22": 10.62228,
            "23": 10.76122,
            "24": 10.64732,
            "25": 10.59597,
            "26": 10.66352,
            "27": 10.6542,
            "28": 10.6077,
            "29": 10.62581,
            "30": 10.41591,
            "31": 10.16855,
            "32": 10.50267,
            "33": 10.50304,
            "34": 10.25481,
            "35": 10.31879,
            "36": 10.27167,
            "37": 10.37751,
            "38": 10.22122,
            "39": 10.44798,
            "40": 10.14166,
            "41": 10.1771,
            "42": 10.2426,
            "43": 9.87148,
            "44": 9.99875,
            "45": 9.88702,
            "46": 9.86139,
            "47": 10.18144,
            "48": 9.87873,
            "49": 9.58706,
            "50": 9.9542,
            "51": 9.8866,
            "52": 9.78429,
            "53": 10.10842,
            "54": 9.97368,
            "55": 9.89803,
            "56": 9.65427,
            "57": 9.52013,
            "58": 9.87297,
            "59": 9.6132,
            "60": 9.54967,
            "61": 9.70681,
            "62": 9.98533,
            "63": 9.41357,
            "64": 9.80966,
            "65": 8.97052,
            "66": 9.72773,
            "67": 9.39183,
            "68": 9.8084,
            "69": 9.82052,
            "70": 9.76655,
            "71": 9.63414,
            "72": 9.60485,
            "73": 9.52299,
            "74": 8.9718,
            "75": 9.42321,
            "76": 9.10113,
            "77": 10.0716,
            "78": 9.74266,
            "79": 9.40343,
            "80": 9.41333,
            "81": 9.49931,
            "82": 9.70236,
            "83": 9.33436,
            "84": 9.43774,
            "85": 9.63924,
            "86": 9.07931,
            "87": 9.60447,
            "88": 9.7824,
            "89": 9.62386,
            "90": 9.84241,
            "91": 9.35506,
            "92": 9.38398,
            "93": 9.09747,
            "94": 8.8471,
            "95": 9.5314,
            "96": 9.54263,
            "97": 9.32886,
            "98": 9.6926,
            "99": 8.89976,
            "100": 9.43124
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 22727424.0,
            "2": 22925204.0,
            "3": 22596900.0,
            "4": 23219556.0,
            "5": 22714624.0,
            "6": 23021776.0,
            "7": 22771632.0,
            "8": 22926560.0,
            "9": 22842156.0,
            "10": 22918168.0,
            "11": 22500688.0,
            "12": 22459470.0,
            "13": 22917228.0,
            "14": 22387988.0,
            "15": 22821732.0,
            "16": 22830306.0,
            "17": 22819520.0,
            "18": 22582628.0,
            "19": 22618028.0,
            "20": 22693852.0,
            "21": 22739344.0,
            "22": 22799596.0,
            "23": 22539016.0,
            "24": 22770946.0,
            "25": 22819324.0,
            "26": 22547928.0,
            "27": 22468716.0,
            "28": 22453820.0,
            "29": 22529898.0,
            "30": 22631220.0,
            "31": 22955420.0,
            "32": 22585276.0,
            "33": 22558602.0,
            "34": 22835792.0,
            "35": 22788208.0,
            "36": 22589796.0,
            "37": 22496928.0,
            "38": 22896192.0,
            "39": 22801858.0,
            "40": 22657640.0,
            "41": 22658982.0,
            "42": 22667052.0,
            "43": 22975816.0,
            "44": 22747688.0,
            "45": 22674846.0,
            "46": 22884684.0,
            "47": 22633708.0,
            "48": 22928466.0,
            "49": 22728092.0,
            "50": 22905080.0,
            "51": 22791108.0,
            "52": 22748190.0,
            "53": 22924900.0,
            "54": 22840164.0,
            "55": 22518344.0,
            "56": 22877680.0,
            "57": 23113944.0,
            "58": 22846268.0,
            "59": 22716084.0,
            "60": 22742984.0,
            "61": 22724584.0,
            "62": 22672944.0,
            "63": 22846388.0,
            "64": 22823650.0,
            "65": 23061058.0,
            "66": 22729266.0,
            "67": 22908888.0,
            "68": 22610020.0,
            "69": 22583826.0,
            "70": 22829374.0,
            "71": 22748240.0,
            "72": 22654480.0,
            "73": 22741180.0,
            "74": 23047914.0,
            "75": 23054396.0,
            "76": 22900788.0,
            "77": 22271588.0,
            "78": 22789024.0,
            "79": 22743632.0,
            "80": 22706696.0,
            "81": 22891372.0,
            "82": 22777860.0,
            "83": 22840532.0,
            "84": 23010386.0,
            "85": 22711212.0,
            "86": 23103006.0,
            "87": 22734564.0,
            "88": 22637848.0,
            "89": 22497850.0,
            "90": 22972712.0,
            "91": 22767188.0,
            "92": 22808834.0,
            "93": 22659304.0,
            "94": 22911552.0,
            "95": 23047794.0,
            "96": 22829386.0,
            "97": 22608168.0,
            "98": 22762756.0,
            "99": 22905900.0,
            "100": 23015488.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 747492864.0,
            "2": 747492864.0,
            "3": 747492864.0,
            "4": 747492864.0,
            "5": 747492864.0,
            "6": 747492864.0,
            "7": 747492864.0,
            "8": 747492864.0,
            "9": 747492864.0,
            "10": 747492864.0,
            "11": 747492864.0,
            "12": 747492864.0,
            "13": 747492864.0,
            "14": 747492864.0,
            "15": 747492864.0,
            "16": 747492864.0,
            "17": 747492864.0,
            "18": 747492864.0,
            "19": 747492864.0,
            "20": 747492864.0,
            "21": 747492864.0,
            "22": 747492864.0,
            "23": 747492864.0,
            "24": 747492864.0,
            "25": 747492864.0,
            "26": 747492864.0,
            "27": 747492864.0,
            "28": 747492864.0,
            "29": 747492864.0,
            "30": 747492864.0,
            "31": 747492864.0,
            "32": 747492864.0,
            "33": 747492864.0,
            "34": 747492864.0,
            "35": 747492864.0,
            "36": 747492864.0,
            "37": 747492864.0,
            "38": 747492864.0,
            "39": 747492864.0,
            "40": 747492864.0,
            "41": 747492864.0,
            "42": 747492864.0,
            "43": 747492864.0,
            "44": 747492864.0,
            "45": 747492864.0,
            "46": 747492864.0,
            "47": 747492864.0,
            "48": 747492864.0,
            "49": 747492864.0,
            "50": 747492864.0,
            "51": 747492864.0,
            "52": 747492864.0,
            "53": 747492864.0,
            "54": 747492864.0,
            "55": 747492864.0,
            "56": 747492864.0,
            "57": 747492864.0,
            "58": 747492864.0,
            "59": 747492864.0,
            "60": 747492864.0,
            "61": 747492864.0,
            "62": 747492864.0,
            "63": 747492864.0,
            "64": 747492864.0,
            "65": 747492864.0,
            "66": 747492864.0,
            "67": 747492864.0,
            "68": 747492864.0,
            "69": 747492864.0,
            "70": 747492864.0,
            "71": 747492864.0,
            "72": 747492864.0,
            "73": 747492864.0,
            "74": 747492864.0,
            "75": 747492864.0,
            "76": 747492864.0,
            "77": 747492864.0,
            "78": 747492864.0,
            "79": 747492864.0,
            "80": 747492864.0,
            "81": 747492864.0,
            "82": 747492864.0,
            "83": 747492864.0,
            "84": 747492864.0,
            "85": 747492864.0,
            "86": 747492864.0,
            "87": 747492864.0,
            "88": 747492864.0,
            "89": 747492864.0,
            "90": 747492864.0,
            "91": 747492864.0,
            "92": 747492864.0,
            "93": 747492864.0,
            "94": 747492864.0,
            "95": 747492864.0,
            "96": 747492864.0,
            "97": 747492864.0,
            "98": 747492864.0,
            "99": 747492864.0,
            "100": 747492864.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1927341056.0,
            "2": 2212197376.0,
            "3": 2212197376.0,
            "4": 2212197376.0,
            "5": 2212197376.0,
            "6": 2212197376.0,
            "7": 2212197376.0,
            "8": 2212197376.0,
            "9": 2212197376.0,
            "10": 2212197376.0,
            "11": 2212197376.0,
            "12": 2212197376.0,
            "13": 2212197376.0,
            "14": 2212197376.0,
            "15": 2212197376.0,
            "16": 2212197376.0,
            "17": 2212197376.0,
            "18": 2212197376.0,
            "19": 2212197376.0,
            "20": 2212197376.0,
            "21": 2212197376.0,
            "22": 2212197376.0,
            "23": 2212197376.0,
            "24": 2212197376.0,
            "25": 2212197376.0,
            "26": 2212197376.0,
            "27": 2212197376.0,
            "28": 2212197376.0,
            "29": 2212197376.0,
            "30": 2212197376.0,
            "31": 2212197376.0,
            "32": 2212197376.0,
            "33": 2212197376.0,
            "34": 2212197376.0,
            "35": 2212197376.0,
            "36": 2212197376.0,
            "37": 2212197376.0,
            "38": 2212197376.0,
            "39": 2212197376.0,
            "40": 2212197376.0,
            "41": 2212197376.0,
            "42": 2212197376.0,
            "43": 2212197376.0,
            "44": 2212197376.0,
            "45": 2212197376.0,
            "46": 2212197376.0,
            "47": 2212197376.0,
            "48": 2212197376.0,
            "49": 2212197376.0,
            "50": 2212197376.0,
            "51": 2212197376.0,
            "52": 2212197376.0,
            "53": 2212197376.0,
            "54": 2212197376.0,
            "55": 2212197376.0,
            "56": 2212197376.0,
            "57": 2212197376.0,
            "58": 2212197376.0,
            "59": 2212197376.0,
            "60": 2212197376.0,
            "61": 2212197376.0,
            "62": 2212197376.0,
            "63": 2212197376.0,
            "64": 2212197376.0,
            "65": 2212197376.0,
            "66": 2212197376.0,
            "67": 2212197376.0,
            "68": 2212197376.0,
            "69": 2212197376.0,
            "70": 2212197376.0,
            "71": 2212197376.0,
            "72": 2212197376.0,
            "73": 2212197376.0,
            "74": 2212197376.0,
            "75": 2212197376.0,
            "76": 2212197376.0,
            "77": 2212197376.0,
            "78": 2212197376.0,
            "79": 2212197376.0,
            "80": 2212197376.0,
            "81": 2212197376.0,
            "82": 2212197376.0,
            "83": 2212197376.0,
            "84": 2212197376.0,
            "85": 2212197376.0,
            "86": 2212197376.0,
            "87": 2212197376.0,
            "88": 2212197376.0,
            "89": 2212197376.0,
            "90": 2212197376.0,
            "91": 2212197376.0,
            "92": 2212197376.0,
            "93": 2212197376.0,
            "94": 2212197376.0,
            "95": 2212197376.0,
            "96": 2212197376.0,
            "97": 2212197376.0,
            "98": 2212197376.0,
            "99": 2212197376.0,
            "100": 2212197376.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 9.78643,
            "2": 0.13398,
            "3": 0.11557,
            "4": 0.09095,
            "5": 0.09137,
            "6": 0.09276,
            "7": 0.09034,
            "8": 0.09082,
            "9": 0.09002,
            "10": 0.09121,
            "11": 0.08989,
            "12": 0.0895,
            "13": 0.09015,
            "14": 0.09012,
            "15": 0.0903,
            "16": 0.09019,
            "17": 0.0907,
            "18": 0.09055,
            "19": 0.08988,
            "20": 0.08984,
            "21": 0.08951,
            "22": 0.0913,
            "23": 0.08972,
            "24": 0.08995,
            "25": 0.09008,
            "26": 0.08931,
            "27": 0.09055,
            "28": 0.08926,
            "29": 0.09028,
            "30": 0.09142,
            "31": 0.09085,
            "32": 0.09027,
            "33": 0.09061,
            "34": 0.08998,
            "35": 0.09113,
            "36": 0.09039,
            "37": 0.08973,
            "38": 0.09065,
            "39": 0.08993,
            "40": 0.09112,
            "41": 0.10695,
            "42": 0.11371,
            "43": 0.09964,
            "44": 0.09076,
            "45": 0.0899,
            "46": 0.09204,
            "47": 0.0904,
            "48": 0.08998,
            "49": 0.09097,
            "50": 0.08971,
            "51": 0.10825,
            "52": 0.097,
            "53": 0.09456,
            "54": 0.09109,
            "55": 0.09071,
            "56": 0.09099,
            "57": 0.09129,
            "58": 0.09159,
            "59": 0.09138,
            "60": 0.09089,
            "61": 0.09092,
            "62": 0.09153,
            "63": 0.09208,
            "64": 0.09107,
            "65": 0.0918,
            "66": 0.09116,
            "67": 0.09075,
            "68": 0.09166,
            "69": 0.0948,
            "70": 0.09166,
            "71": 0.09195,
            "72": 0.09271,
            "73": 0.09226,
            "74": 0.09271,
            "75": 0.09216,
            "76": 0.09129,
            "77": 0.09221,
            "78": 0.09252,
            "79": 0.09161,
            "80": 0.09144,
            "81": 0.09112,
            "82": 0.09152,
            "83": 0.09106,
            "84": 0.09137,
            "85": 0.09127,
            "86": 0.09136,
            "87": 0.09077,
            "88": 0.09362,
            "89": 0.09244,
            "90": 0.09162,
            "91": 0.09114,
            "92": 0.09065,
            "93": 0.0913,
            "94": 0.09071,
            "95": 0.09096,
            "96": 0.09066,
            "97": 0.09585,
            "98": 0.09148,
            "99": 0.09232,
            "100": 0.09229
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_dev_dgx_h100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.8866,
            "52": 9.78429,
            "53": 10.10842,
            "54": 9.97368,
            "55": 9.89803,
            "56": 9.65427,
            "57": 9.52013,
            "58": 9.87297,
            "59": 9.6132,
            "60": 9.54967,
            "61": 9.70681,
            "62": 9.98533,
            "63": 9.41357,
            "64": 9.80966,
            "65": 8.97052,
            "66": 9.72773,
            "67": 9.39183,
            "68": 9.8084,
            "69": 9.82052,
            "70": 9.76655,
            "71": 9.63414,
            "72": 9.60485,
            "73": 9.52299,
            "74": 8.9718,
            "75": 9.42321,
            "76": 9.10113,
            "77": 10.0716,
            "78": 9.74266,
            "79": 9.40343,
            "80": 9.41333,
            "81": 9.49931,
            "82": 9.70236,
            "83": 9.33436,
            "84": 9.43774,
            "85": 9.63924,
            "86": 9.07931,
            "87": 9.60447,
            "88": 9.7824,
            "89": 9.62386,
            "90": 9.84241,
            "91": 9.35506,
            "92": 9.38398,
            "93": 9.09747,
            "94": 8.8471,
            "95": 9.5314,
            "96": 9.54263,
            "97": 9.32886,
            "98": 9.6926,
            "99": 8.89976,
            "100": 9.43124
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 22791108.0,
            "52": 22748190.0,
            "53": 22924900.0,
            "54": 22840164.0,
            "55": 22518344.0,
            "56": 22877680.0,
            "57": 23113944.0,
            "58": 22846268.0,
            "59": 22716084.0,
            "60": 22742984.0,
            "61": 22724584.0,
            "62": 22672944.0,
            "63": 22846388.0,
            "64": 22823650.0,
            "65": 23061058.0,
            "66": 22729266.0,
            "67": 22908888.0,
            "68": 22610020.0,
            "69": 22583826.0,
            "70": 22829374.0,
            "71": 22748240.0,
            "72": 22654480.0,
            "73": 22741180.0,
            "74": 23047914.0,
            "75": 23054396.0,
            "76": 22900788.0,
            "77": 22271588.0,
            "78": 22789024.0,
            "79": 22743632.0,
            "80": 22706696.0,
            "81": 22891372.0,
            "82": 22777860.0,
            "83": 22840532.0,
            "84": 23010386.0,
            "85": 22711212.0,
            "86": 23103006.0,
            "87": 22734564.0,
            "88": 22637848.0,
            "89": 22497850.0,
            "90": 22972712.0,
            "91": 22767188.0,
            "92": 22808834.0,
            "93": 22659304.0,
            "94": 22911552.0,
            "95": 23047794.0,
            "96": 22829386.0,
            "97": 22608168.0,
            "98": 22762756.0,
            "99": 22905900.0,
            "100": 23015488.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 746444288.0,
            "52": 746444288.0,
            "53": 746444288.0,
            "54": 746444288.0,
            "55": 746444288.0,
            "56": 746444288.0,
            "57": 746444288.0,
            "58": 746444288.0,
            "59": 746444288.0,
            "60": 746444288.0,
            "61": 746444288.0,
            "62": 746444288.0,
            "63": 746444288.0,
            "64": 746444288.0,
            "65": 746444288.0,
            "66": 746444288.0,
            "67": 746444288.0,
            "68": 746444288.0,
            "69": 746444288.0,
            "70": 746444288.0,
            "71": 746444288.0,
            "72": 746444288.0,
            "73": 746444288.0,
            "74": 746444288.0,
            "75": 746444288.0,
            "76": 746444288.0,
            "77": 746444288.0,
            "78": 746444288.0,
            "79": 746444288.0,
            "80": 746444288.0,
            "81": 746444288.0,
            "82": 746444288.0,
            "83": 746444288.0,
            "84": 746444288.0,
            "85": 746444288.0,
            "86": 746444288.0,
            "87": 746444288.0,
            "88": 746444288.0,
            "89": 746444288.0,
            "90": 746444288.0,
            "91": 746444288.0,
            "92": 746444288.0,
            "93": 746444288.0,
            "94": 746444288.0,
            "95": 746444288.0,
            "96": 746444288.0,
            "97": 746444288.0,
            "98": 746444288.0,
            "99": 746444288.0,
            "100": 746444288.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2211148800.0,
            "52": 2211149824.0,
            "53": 2211149824.0,
            "54": 2211149824.0,
            "55": 2211149824.0,
            "56": 2211149824.0,
            "57": 2211149824.0,
            "58": 2211149824.0,
            "59": 2211149824.0,
            "60": 2211149824.0,
            "61": 2211149824.0,
            "62": 2211149824.0,
            "63": 2211149824.0,
            "64": 2211149824.0,
            "65": 2211149824.0,
            "66": 2211149824.0,
            "67": 2211149824.0,
            "68": 2211149824.0,
            "69": 2211149824.0,
            "70": 2211149824.0,
            "71": 2211149824.0,
            "72": 2211149824.0,
            "73": 2211149824.0,
            "74": 2211149824.0,
            "75": 2211149824.0,
            "76": 2211149824.0,
            "77": 2211149824.0,
            "78": 2211149824.0,
            "79": 2211149824.0,
            "80": 2211149824.0,
            "81": 2211149824.0,
            "82": 2211149824.0,
            "83": 2211149824.0,
            "84": 2211149824.0,
            "85": 2211149824.0,
            "86": 2211149824.0,
            "87": 2211149824.0,
            "88": 2211149824.0,
            "89": 2211149824.0,
            "90": 2211149824.0,
            "91": 2211149824.0,
            "92": 2211149824.0,
            "93": 2211149824.0,
            "94": 2211149824.0,
            "95": 2211149824.0,
            "96": 2211149824.0,
            "97": 2211149824.0,
            "98": 2211149824.0,
            "99": 2211149824.0,
            "100": 2211149824.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 8.06828,
            "52": 0.13754,
            "53": 0.09299,
            "54": 0.0937,
            "55": 0.09396,
            "56": 0.09244,
            "57": 0.09247,
            "58": 0.09209,
            "59": 0.09263,
            "60": 0.09275,
            "61": 0.09238,
            "62": 0.09116,
            "63": 0.0965,
            "64": 0.09261,
            "65": 0.09256,
            "66": 0.09274,
            "67": 0.09252,
            "68": 0.09299,
            "69": 0.09249,
            "70": 0.09223,
            "71": 0.09259,
            "72": 0.09409,
            "73": 0.09265,
            "74": 0.09487,
            "75": 0.0923,
            "76": 0.09244,
            "77": 0.09219,
            "78": 0.0922,
            "79": 0.09407,
            "80": 0.09255,
            "81": 0.09438,
            "82": 0.09241,
            "83": 0.09253,
            "84": 0.09203,
            "85": 0.09473,
            "86": 0.09291,
            "87": 0.0919,
            "88": 0.0924,
            "89": 0.09178,
            "90": 0.09274,
            "91": 0.09205,
            "92": 0.09276,
            "93": 0.09224,
            "94": 0.09252,
            "95": 0.09076,
            "96": 0.09167,
            "97": 0.09167,
            "98": 0.0936,
            "99": 0.09222,
            "100": 0.09183
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.90314,
            "2": 10.88897,
            "3": 10.89725,
            "4": 10.88248,
            "5": 10.89447,
            "6": 10.91226,
            "7": 10.89775,
            "8": 10.88643,
            "9": 10.89227,
            "10": 10.8849,
            "11": 10.91413,
            "12": 10.88666,
            "13": 10.89283,
            "14": 10.90486,
            "15": 10.83878,
            "16": 10.84927,
            "17": 10.83248,
            "18": 10.83371,
            "19": 10.83359,
            "20": 10.74074,
            "21": 10.7043,
            "22": 10.59893,
            "23": 10.72222,
            "24": 10.60745,
            "25": 10.57467,
            "26": 10.62999,
            "27": 10.62364,
            "28": 10.57229,
            "29": 10.6073,
            "30": 10.37766,
            "31": 10.15362,
            "32": 10.47609,
            "33": 10.48062,
            "34": 10.24216,
            "35": 10.29035,
            "36": 10.25955,
            "37": 10.36145,
            "38": 10.21396,
            "39": 10.44502,
            "40": 10.11492,
            "41": 10.1605,
            "42": 10.23468,
            "43": 9.85032,
            "44": 9.97764,
            "45": 9.85681,
            "46": 9.8307,
            "47": 10.17976,
            "48": 9.85811,
            "49": 9.54378,
            "50": 9.93469,
            "51": 9.86793,
            "52": 9.76274,
            "53": 10.10895,
            "54": 9.95538,
            "55": 9.8756,
            "56": 9.64751,
            "57": 9.48989,
            "58": 9.85502,
            "59": 9.59457,
            "60": 9.52968,
            "61": 9.69589,
            "62": 10.01676,
            "63": 9.38778,
            "64": 9.80211,
            "65": 8.95119,
            "66": 9.72857,
            "67": 9.37577,
            "68": 9.80463,
            "69": 9.81,
            "70": 9.7662,
            "71": 9.63135,
            "72": 9.5784,
            "73": 9.52148,
            "74": 8.94976,
            "75": 9.43087,
            "76": 9.08489,
            "77": 10.089,
            "78": 9.72754,
            "79": 9.37612,
            "80": 9.40849,
            "81": 9.49766,
            "82": 9.71298,
            "83": 9.33332,
            "84": 9.43928,
            "85": 9.63373,
            "86": 9.07038,
            "87": 9.61245,
            "88": 9.78304,
            "89": 9.60878,
            "90": 9.85164,
            "91": 9.34542,
            "92": 9.38281,
            "93": 9.07319,
            "94": 8.81684,
            "95": 9.51809,
            "96": 9.54033,
            "97": 9.34061,
            "98": 9.70134,
            "99": 8.88786,
            "100": 9.43285
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 22727000.0,
            "2": 22925616.0,
            "3": 22596924.0,
            "4": 23219540.0,
            "5": 22714814.0,
            "6": 23021786.0,
            "7": 22771458.0,
            "8": 22926012.0,
            "9": 22842856.0,
            "10": 22918360.0,
            "11": 22500702.0,
            "12": 22459866.0,
            "13": 22916820.0,
            "14": 22389026.0,
            "15": 22821260.0,
            "16": 22830812.0,
            "17": 22818944.0,
            "18": 22582240.0,
            "19": 22618380.0,
            "20": 22694352.0,
            "21": 22740180.0,
            "22": 22800024.0,
            "23": 22540132.0,
            "24": 22771492.0,
            "25": 22818970.0,
            "26": 22546852.0,
            "27": 22468504.0,
            "28": 22453052.0,
            "29": 22529222.0,
            "30": 22631432.0,
            "31": 22955696.0,
            "32": 22585238.0,
            "33": 22557676.0,
            "34": 22835412.0,
            "35": 22788032.0,
            "36": 22589678.0,
            "37": 22497140.0,
            "38": 22896132.0,
            "39": 22801314.0,
            "40": 22658064.0,
            "41": 22659700.0,
            "42": 22667816.0,
            "43": 22976356.0,
            "44": 22746708.0,
            "45": 22675272.0,
            "46": 22884382.0,
            "47": 22634556.0,
            "48": 22928080.0,
            "49": 22727538.0,
            "50": 22905284.0,
            "51": 22791326.0,
            "52": 22749392.0,
            "53": 22925970.0,
            "54": 22839434.0,
            "55": 22518416.0,
            "56": 22877660.0,
            "57": 23113304.0,
            "58": 22845008.0,
            "59": 22715512.0,
            "60": 22743058.0,
            "61": 22723950.0,
            "62": 22673248.0,
            "63": 22846074.0,
            "64": 22823228.0,
            "65": 23060212.0,
            "66": 22729902.0,
            "67": 22907278.0,
            "68": 22610092.0,
            "69": 22584360.0,
            "70": 22829348.0,
            "71": 22749420.0,
            "72": 22655446.0,
            "73": 22740974.0,
            "74": 23048296.0,
            "75": 23053922.0,
            "76": 22901008.0,
            "77": 22272806.0,
            "78": 22789370.0,
            "79": 22743288.0,
            "80": 22706236.0,
            "81": 22890976.0,
            "82": 22777092.0,
            "83": 22839240.0,
            "84": 23010352.0,
            "85": 22712004.0,
            "86": 23103740.0,
            "87": 22734788.0,
            "88": 22637620.0,
            "89": 22499200.0,
            "90": 22972420.0,
            "91": 22766428.0,
            "92": 22808890.0,
            "93": 22659888.0,
            "94": 22910970.0,
            "95": 23048514.0,
            "96": 22829470.0,
            "97": 22608826.0,
            "98": 22763528.0,
            "99": 22905754.0,
            "100": 23016268.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 719180288.0,
            "2": 719180288.0,
            "3": 719180288.0,
            "4": 719180288.0,
            "5": 719180288.0,
            "6": 719180288.0,
            "7": 719180288.0,
            "8": 719180288.0,
            "9": 719180288.0,
            "10": 719180288.0,
            "11": 719180288.0,
            "12": 719180288.0,
            "13": 719180288.0,
            "14": 719180288.0,
            "15": 719180288.0,
            "16": 719180288.0,
            "17": 719180288.0,
            "18": 719180288.0,
            "19": 719180288.0,
            "20": 719180288.0,
            "21": 719180288.0,
            "22": 719180288.0,
            "23": 719180288.0,
            "24": 719180288.0,
            "25": 719180288.0,
            "26": 719180288.0,
            "27": 719180288.0,
            "28": 719180288.0,
            "29": 719180288.0,
            "30": 719180288.0,
            "31": 719180288.0,
            "32": 719180288.0,
            "33": 719180288.0,
            "34": 719180288.0,
            "35": 719180288.0,
            "36": 719180288.0,
            "37": 719180288.0,
            "38": 719180288.0,
            "39": 719180288.0,
            "40": 719180288.0,
            "41": 719180288.0,
            "42": 719180288.0,
            "43": 719180288.0,
            "44": 719180288.0,
            "45": 719180288.0,
            "46": 719180288.0,
            "47": 719180288.0,
            "48": 719180288.0,
            "49": 719180288.0,
            "50": 719180288.0,
            "51": 719180288.0,
            "52": 719180288.0,
            "53": 719180288.0,
            "54": 719180288.0,
            "55": 719180288.0,
            "56": 719180288.0,
            "57": 719180288.0,
            "58": 719180288.0,
            "59": 719180288.0,
            "60": 719180288.0,
            "61": 719180288.0,
            "62": 719180288.0,
            "63": 719180288.0,
            "64": 719180288.0,
            "65": 719180288.0,
            "66": 719180288.0,
            "67": 719180288.0,
            "68": 719180288.0,
            "69": 719180288.0,
            "70": 719180288.0,
            "71": 719180288.0,
            "72": 719180288.0,
            "73": 719180288.0,
            "74": 719180288.0,
            "75": 719180288.0,
            "76": 719180288.0,
            "77": 719180288.0,
            "78": 719180288.0,
            "79": 719180288.0,
            "80": 719180288.0,
            "81": 719180288.0,
            "82": 719180288.0,
            "83": 719180288.0,
            "84": 719180288.0,
            "85": 719180288.0,
            "86": 719180288.0,
            "87": 719180288.0,
            "88": 719180288.0,
            "89": 719180288.0,
            "90": 719180288.0,
            "91": 719180288.0,
            "92": 719180288.0,
            "93": 719180288.0,
            "94": 719180288.0,
            "95": 719180288.0,
            "96": 719180288.0,
            "97": 719180288.0,
            "98": 719180288.0,
            "99": 719180288.0,
            "100": 719180288.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1910562816.0,
            "2": 2195420160.0,
            "3": 2195420160.0,
            "4": 2195420160.0,
            "5": 2195420160.0,
            "6": 2195420160.0,
            "7": 2195420160.0,
            "8": 2195420160.0,
            "9": 2195420160.0,
            "10": 2195420160.0,
            "11": 2195420160.0,
            "12": 2195420160.0,
            "13": 2195420160.0,
            "14": 2195420160.0,
            "15": 2195420160.0,
            "16": 2195420160.0,
            "17": 2195420160.0,
            "18": 2195420160.0,
            "19": 2195420160.0,
            "20": 2195420160.0,
            "21": 2195420160.0,
            "22": 2195420160.0,
            "23": 2195420160.0,
            "24": 2195420160.0,
            "25": 2195420160.0,
            "26": 2195420160.0,
            "27": 2195420160.0,
            "28": 2195420160.0,
            "29": 2195420160.0,
            "30": 2195420160.0,
            "31": 2195420160.0,
            "32": 2195420160.0,
            "33": 2195420160.0,
            "34": 2195420160.0,
            "35": 2195420160.0,
            "36": 2195420160.0,
            "37": 2195420160.0,
            "38": 2195420160.0,
            "39": 2195420160.0,
            "40": 2195420160.0,
            "41": 2195420160.0,
            "42": 2195420160.0,
            "43": 2195420160.0,
            "44": 2195420160.0,
            "45": 2195420160.0,
            "46": 2195420160.0,
            "47": 2195420160.0,
            "48": 2195420160.0,
            "49": 2195420160.0,
            "50": 2195420160.0,
            "51": 2195420160.0,
            "52": 2195420160.0,
            "53": 2195420160.0,
            "54": 2195420160.0,
            "55": 2195420160.0,
            "56": 2195420160.0,
            "57": 2195420160.0,
            "58": 2195420160.0,
            "59": 2195420160.0,
            "60": 2195420160.0,
            "61": 2195420160.0,
            "62": 2195420160.0,
            "63": 2195420160.0,
            "64": 2195420160.0,
            "65": 2195420160.0,
            "66": 2195420160.0,
            "67": 2195420160.0,
            "68": 2195420160.0,
            "69": 2195420160.0,
            "70": 2195420160.0,
            "71": 2195420160.0,
            "72": 2195420160.0,
            "73": 2195420160.0,
            "74": 2195420160.0,
            "75": 2195420160.0,
            "76": 2195420160.0,
            "77": 2195420160.0,
            "78": 2195420160.0,
            "79": 2195420160.0,
            "80": 2195420160.0,
            "81": 2195420160.0,
            "82": 2195420160.0,
            "83": 2195420160.0,
            "84": 2195420160.0,
            "85": 2195420160.0,
            "86": 2195420160.0,
            "87": 2195420160.0,
            "88": 2195420160.0,
            "89": 2195420160.0,
            "90": 2195420160.0,
            "91": 2195420160.0,
            "92": 2195420160.0,
            "93": 2195420160.0,
            "94": 2195420160.0,
            "95": 2195420160.0,
            "96": 2195420160.0,
            "97": 2195420160.0,
            "98": 2195420160.0,
            "99": 2195420160.0,
            "100": 2195420160.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 8.03051,
            "2": 0.1884,
            "3": 0.27987,
            "4": 0.15687,
            "5": 0.15614,
            "6": 0.1563,
            "7": 0.15622,
            "8": 0.1563,
            "9": 0.15473,
            "10": 0.15615,
            "11": 0.15528,
            "12": 0.15754,
            "13": 0.15661,
            "14": 0.15677,
            "15": 0.15572,
            "16": 0.15582,
            "17": 0.1571,
            "18": 0.15664,
            "19": 0.1556,
            "20": 0.15592,
            "21": 0.15572,
            "22": 0.15642,
            "23": 0.15643,
            "24": 0.15743,
            "25": 0.15666,
            "26": 0.15552,
            "27": 0.15475,
            "28": 0.15586,
            "29": 0.15603,
            "30": 0.15463,
            "31": 0.15712,
            "32": 0.15442,
            "33": 0.15543,
            "34": 0.1557,
            "35": 0.15682,
            "36": 0.15539,
            "37": 0.1553,
            "38": 0.15578,
            "39": 0.15667,
            "40": 0.15715,
            "41": 0.15704,
            "42": 0.1566,
            "43": 0.15655,
            "44": 0.15629,
            "45": 0.15584,
            "46": 0.15734,
            "47": 0.15735,
            "48": 0.15572,
            "49": 0.15706,
            "50": 0.15561,
            "51": 0.16957,
            "52": 0.1587,
            "53": 0.16014,
            "54": 0.15805,
            "55": 0.1578,
            "56": 0.15801,
            "57": 0.15813,
            "58": 0.1574,
            "59": 0.15781,
            "60": 0.15923,
            "61": 0.15655,
            "62": 0.15633,
            "63": 0.15583,
            "64": 0.15734,
            "65": 0.15761,
            "66": 0.15822,
            "67": 0.15755,
            "68": 0.15815,
            "69": 0.15816,
            "70": 0.15813,
            "71": 0.15747,
            "72": 0.1574,
            "73": 0.15783,
            "74": 0.15766,
            "75": 0.15527,
            "76": 0.15579,
            "77": 0.15483,
            "78": 0.15482,
            "79": 0.15404,
            "80": 0.15431,
            "81": 0.154,
            "82": 0.15512,
            "83": 0.15513,
            "84": 0.15371,
            "85": 0.15488,
            "86": 0.15465,
            "87": 0.15412,
            "88": 0.15403,
            "89": 0.15487,
            "90": 0.15518,
            "91": 0.15549,
            "92": 0.154,
            "93": 0.15405,
            "94": 0.15438,
            "95": 0.15444,
            "96": 0.1534,
            "97": 0.15487,
            "98": 0.15398,
            "99": 0.15434,
            "100": 0.15391
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.90314,
            "2": 10.88897,
            "3": 10.89725,
            "4": 10.88248,
            "5": 10.89447,
            "6": 10.91226,
            "7": 10.89775,
            "8": 10.88643,
            "9": 10.89227,
            "10": 10.8849,
            "11": 10.91413,
            "12": 10.88666,
            "13": 10.89283,
            "14": 10.90486,
            "15": 10.83878,
            "16": 10.84927,
            "17": 10.83248,
            "18": 10.83371,
            "19": 10.83359,
            "20": 10.74074,
            "21": 10.7043,
            "22": 10.59893,
            "23": 10.72222,
            "24": 10.60745,
            "25": 10.57467,
            "26": 10.62999,
            "27": 10.62364,
            "28": 10.57229,
            "29": 10.6073,
            "30": 10.37766,
            "31": 10.15362,
            "32": 10.47609,
            "33": 10.48062,
            "34": 10.24216,
            "35": 10.29035,
            "36": 10.25955,
            "37": 10.36145,
            "38": 10.21396,
            "39": 10.44502,
            "40": 10.11492,
            "41": 10.1605,
            "42": 10.23468,
            "43": 9.85032,
            "44": 9.97764,
            "45": 9.85681,
            "46": 9.8307,
            "47": 10.17976,
            "48": 9.85811,
            "49": 9.54378,
            "50": 9.93469,
            "51": 9.86793,
            "52": 9.76274,
            "53": 10.10895,
            "54": 9.95538,
            "55": 9.8756,
            "56": 9.64751,
            "57": 9.48989,
            "58": 9.85502,
            "59": 9.59457,
            "60": 9.52968,
            "61": 9.69589,
            "62": 10.01676,
            "63": 9.38778,
            "64": 9.80211,
            "65": 8.95119,
            "66": 9.72857,
            "67": 9.37577,
            "68": 9.80463,
            "69": 9.81,
            "70": 9.7662,
            "71": 9.63135,
            "72": 9.5784,
            "73": 9.52148,
            "74": 8.94976,
            "75": 9.43087,
            "76": 9.08489,
            "77": 10.089,
            "78": 9.72754,
            "79": 9.37612,
            "80": 9.40849,
            "81": 9.49766,
            "82": 9.71298,
            "83": 9.33332,
            "84": 9.43928,
            "85": 9.63373,
            "86": 9.07038,
            "87": 9.61245,
            "88": 9.78304,
            "89": 9.60878,
            "90": 9.85164,
            "91": 9.34542,
            "92": 9.38281,
            "93": 9.07319,
            "94": 8.81684,
            "95": 9.51809,
            "96": 9.54033,
            "97": 9.34061,
            "98": 9.70134,
            "99": 8.88786,
            "100": 9.43285
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 22727000.0,
            "2": 22925616.0,
            "3": 22596924.0,
            "4": 23219540.0,
            "5": 22714814.0,
            "6": 23021786.0,
            "7": 22771458.0,
            "8": 22926012.0,
            "9": 22842856.0,
            "10": 22918360.0,
            "11": 22500702.0,
            "12": 22459866.0,
            "13": 22916820.0,
            "14": 22389026.0,
            "15": 22821260.0,
            "16": 22830812.0,
            "17": 22818944.0,
            "18": 22582240.0,
            "19": 22618380.0,
            "20": 22694352.0,
            "21": 22740180.0,
            "22": 22800024.0,
            "23": 22540132.0,
            "24": 22771492.0,
            "25": 22818970.0,
            "26": 22546852.0,
            "27": 22468504.0,
            "28": 22453052.0,
            "29": 22529222.0,
            "30": 22631432.0,
            "31": 22955696.0,
            "32": 22585238.0,
            "33": 22557676.0,
            "34": 22835412.0,
            "35": 22788032.0,
            "36": 22589678.0,
            "37": 22497140.0,
            "38": 22896132.0,
            "39": 22801314.0,
            "40": 22658064.0,
            "41": 22659700.0,
            "42": 22667816.0,
            "43": 22976356.0,
            "44": 22746708.0,
            "45": 22675272.0,
            "46": 22884382.0,
            "47": 22634556.0,
            "48": 22928080.0,
            "49": 22727538.0,
            "50": 22905284.0,
            "51": 22791326.0,
            "52": 22749392.0,
            "53": 22925970.0,
            "54": 22839434.0,
            "55": 22518416.0,
            "56": 22877660.0,
            "57": 23113304.0,
            "58": 22845008.0,
            "59": 22715512.0,
            "60": 22743058.0,
            "61": 22723950.0,
            "62": 22673248.0,
            "63": 22846074.0,
            "64": 22823228.0,
            "65": 23060212.0,
            "66": 22729902.0,
            "67": 22907278.0,
            "68": 22610092.0,
            "69": 22584360.0,
            "70": 22829348.0,
            "71": 22749420.0,
            "72": 22655446.0,
            "73": 22740974.0,
            "74": 23048296.0,
            "75": 23053922.0,
            "76": 22901008.0,
            "77": 22272806.0,
            "78": 22789370.0,
            "79": 22743288.0,
            "80": 22706236.0,
            "81": 22890976.0,
            "82": 22777092.0,
            "83": 22839240.0,
            "84": 23010352.0,
            "85": 22712004.0,
            "86": 23103740.0,
            "87": 22734788.0,
            "88": 22637620.0,
            "89": 22499200.0,
            "90": 22972420.0,
            "91": 22766428.0,
            "92": 22808890.0,
            "93": 22659888.0,
            "94": 22910970.0,
            "95": 23048514.0,
            "96": 22829470.0,
            "97": 22608826.0,
            "98": 22763528.0,
            "99": 22905754.0,
            "100": 23016268.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 719180288.0,
            "2": 719180288.0,
            "3": 719180288.0,
            "4": 719180288.0,
            "5": 719180288.0,
            "6": 719180288.0,
            "7": 719180288.0,
            "8": 719180288.0,
            "9": 719180288.0,
            "10": 719180288.0,
            "11": 719180288.0,
            "12": 719180288.0,
            "13": 719180288.0,
            "14": 719180288.0,
            "15": 719180288.0,
            "16": 719180288.0,
            "17": 719180288.0,
            "18": 719180288.0,
            "19": 719180288.0,
            "20": 719180288.0,
            "21": 719180288.0,
            "22": 719180288.0,
            "23": 719180288.0,
            "24": 719180288.0,
            "25": 719180288.0,
            "26": 719180288.0,
            "27": 719180288.0,
            "28": 719180288.0,
            "29": 719180288.0,
            "30": 719180288.0,
            "31": 719180288.0,
            "32": 719180288.0,
            "33": 719180288.0,
            "34": 719180288.0,
            "35": 719180288.0,
            "36": 719180288.0,
            "37": 719180288.0,
            "38": 719180288.0,
            "39": 719180288.0,
            "40": 719180288.0,
            "41": 719180288.0,
            "42": 719180288.0,
            "43": 719180288.0,
            "44": 719180288.0,
            "45": 719180288.0,
            "46": 719180288.0,
            "47": 719180288.0,
            "48": 719180288.0,
            "49": 719180288.0,
            "50": 719180288.0,
            "51": 719180288.0,
            "52": 719180288.0,
            "53": 719180288.0,
            "54": 719180288.0,
            "55": 719180288.0,
            "56": 719180288.0,
            "57": 719180288.0,
            "58": 719180288.0,
            "59": 719180288.0,
            "60": 719180288.0,
            "61": 719180288.0,
            "62": 719180288.0,
            "63": 719180288.0,
            "64": 719180288.0,
            "65": 719180288.0,
            "66": 719180288.0,
            "67": 719180288.0,
            "68": 719180288.0,
            "69": 719180288.0,
            "70": 719180288.0,
            "71": 719180288.0,
            "72": 719180288.0,
            "73": 719180288.0,
            "74": 719180288.0,
            "75": 719180288.0,
            "76": 719180288.0,
            "77": 719180288.0,
            "78": 719180288.0,
            "79": 719180288.0,
            "80": 719180288.0,
            "81": 719180288.0,
            "82": 719180288.0,
            "83": 719180288.0,
            "84": 719180288.0,
            "85": 719180288.0,
            "86": 719180288.0,
            "87": 719180288.0,
            "88": 719180288.0,
            "89": 719180288.0,
            "90": 719180288.0,
            "91": 719180288.0,
            "92": 719180288.0,
            "93": 719180288.0,
            "94": 719180288.0,
            "95": 719180288.0,
            "96": 719180288.0,
            "97": 719180288.0,
            "98": 719180288.0,
            "99": 719180288.0,
            "100": 719180288.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1910562816.0,
            "2": 2195420160.0,
            "3": 2195420160.0,
            "4": 2195420160.0,
            "5": 2195420160.0,
            "6": 2195420160.0,
            "7": 2195420160.0,
            "8": 2195420160.0,
            "9": 2195420160.0,
            "10": 2195420160.0,
            "11": 2195420160.0,
            "12": 2195420160.0,
            "13": 2195420160.0,
            "14": 2195420160.0,
            "15": 2195420160.0,
            "16": 2195420160.0,
            "17": 2195420160.0,
            "18": 2195420160.0,
            "19": 2195420160.0,
            "20": 2195420160.0,
            "21": 2195420160.0,
            "22": 2195420160.0,
            "23": 2195420160.0,
            "24": 2195420160.0,
            "25": 2195420160.0,
            "26": 2195420160.0,
            "27": 2195420160.0,
            "28": 2195420160.0,
            "29": 2195420160.0,
            "30": 2195420160.0,
            "31": 2195420160.0,
            "32": 2195420160.0,
            "33": 2195420160.0,
            "34": 2195420160.0,
            "35": 2195420160.0,
            "36": 2195420160.0,
            "37": 2195420160.0,
            "38": 2195420160.0,
            "39": 2195420160.0,
            "40": 2195420160.0,
            "41": 2195420160.0,
            "42": 2195420160.0,
            "43": 2195420160.0,
            "44": 2195420160.0,
            "45": 2195420160.0,
            "46": 2195420160.0,
            "47": 2195420160.0,
            "48": 2195420160.0,
            "49": 2195420160.0,
            "50": 2195420160.0,
            "51": 2195420160.0,
            "52": 2195420160.0,
            "53": 2195420160.0,
            "54": 2195420160.0,
            "55": 2195420160.0,
            "56": 2195420160.0,
            "57": 2195420160.0,
            "58": 2195420160.0,
            "59": 2195420160.0,
            "60": 2195420160.0,
            "61": 2195420160.0,
            "62": 2195420160.0,
            "63": 2195420160.0,
            "64": 2195420160.0,
            "65": 2195420160.0,
            "66": 2195420160.0,
            "67": 2195420160.0,
            "68": 2195420160.0,
            "69": 2195420160.0,
            "70": 2195420160.0,
            "71": 2195420160.0,
            "72": 2195420160.0,
            "73": 2195420160.0,
            "74": 2195420160.0,
            "75": 2195420160.0,
            "76": 2195420160.0,
            "77": 2195420160.0,
            "78": 2195420160.0,
            "79": 2195420160.0,
            "80": 2195420160.0,
            "81": 2195420160.0,
            "82": 2195420160.0,
            "83": 2195420160.0,
            "84": 2195420160.0,
            "85": 2195420160.0,
            "86": 2195420160.0,
            "87": 2195420160.0,
            "88": 2195420160.0,
            "89": 2195420160.0,
            "90": 2195420160.0,
            "91": 2195420160.0,
            "92": 2195420160.0,
            "93": 2195420160.0,
            "94": 2195420160.0,
            "95": 2195420160.0,
            "96": 2195420160.0,
            "97": 2195420160.0,
            "98": 2195420160.0,
            "99": 2195420160.0,
            "100": 2195420160.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 5.03932,
            "2": 0.18621,
            "3": 0.17196,
            "4": 0.15545,
            "5": 0.1504,
            "6": 0.15031,
            "7": 0.14857,
            "8": 0.14917,
            "9": 0.1495,
            "10": 0.14924,
            "11": 0.14939,
            "12": 0.14861,
            "13": 0.14915,
            "14": 0.14919,
            "15": 0.14909,
            "16": 0.14904,
            "17": 0.14933,
            "18": 0.14874,
            "19": 0.14902,
            "20": 0.14813,
            "21": 0.14885,
            "22": 0.14872,
            "23": 0.14993,
            "24": 0.14895,
            "25": 0.14768,
            "26": 0.14781,
            "27": 0.14754,
            "28": 0.14775,
            "29": 0.15216,
            "30": 0.15461,
            "31": 0.1541,
            "32": 0.14739,
            "33": 0.14626,
            "34": 0.14619,
            "35": 0.14604,
            "36": 0.14567,
            "37": 0.14566,
            "38": 0.14678,
            "39": 0.14625,
            "40": 0.14515,
            "41": 0.1459,
            "42": 0.14526,
            "43": 0.14647,
            "44": 0.14562,
            "45": 0.14545,
            "46": 0.14621,
            "47": 0.14567,
            "48": 0.14603,
            "49": 0.14558,
            "50": 0.14505,
            "51": 0.16204,
            "52": 0.15073,
            "53": 0.15152,
            "54": 0.15093,
            "55": 0.15055,
            "56": 0.15091,
            "57": 0.15302,
            "58": 0.15142,
            "59": 0.15079,
            "60": 0.15185,
            "61": 0.14979,
            "62": 0.15038,
            "63": 0.15098,
            "64": 0.1503,
            "65": 0.15057,
            "66": 0.15088,
            "67": 0.15024,
            "68": 0.15134,
            "69": 0.15072,
            "70": 0.15092,
            "71": 0.15108,
            "72": 0.15129,
            "73": 0.15025,
            "74": 0.15185,
            "75": 0.15148,
            "76": 0.15102,
            "77": 0.15066,
            "78": 0.15069,
            "79": 0.1514,
            "80": 0.15055,
            "81": 0.15068,
            "82": 0.15079,
            "83": 0.15141,
            "84": 0.15081,
            "85": 0.15116,
            "86": 0.15171,
            "87": 0.15012,
            "88": 0.15018,
            "89": 0.1509,
            "90": 0.15033,
            "91": 0.15134,
            "92": 0.15061,
            "93": 0.1505,
            "94": 0.15109,
            "95": 0.1506,
            "96": 0.15188,
            "97": 0.15182,
            "98": 0.15154,
            "99": 0.15201,
            "100": 0.15117
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgx_a100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.86793,
            "52": 9.76274,
            "53": 10.10895,
            "54": 9.95538,
            "55": 9.8756,
            "56": 9.64751,
            "57": 9.48989,
            "58": 9.85502,
            "59": 9.59457,
            "60": 9.52968,
            "61": 9.69589,
            "62": 10.01676,
            "63": 9.38778,
            "64": 9.80211,
            "65": 8.95119,
            "66": 9.72857,
            "67": 9.37577,
            "68": 9.80463,
            "69": 9.81,
            "70": 9.7662,
            "71": 9.63135,
            "72": 9.5784,
            "73": 9.52148,
            "74": 8.94976,
            "75": 9.43087,
            "76": 9.08489,
            "77": 10.089,
            "78": 9.72754,
            "79": 9.37612,
            "80": 9.40849,
            "81": 9.49766,
            "82": 9.71298,
            "83": 9.33332,
            "84": 9.43928,
            "85": 9.63373,
            "86": 9.07038,
            "87": 9.61245,
            "88": 9.78304,
            "89": 9.60878,
            "90": 9.85164,
            "91": 9.34542,
            "92": 9.38281,
            "93": 9.07319,
            "94": 8.81684,
            "95": 9.51809,
            "96": 9.54033,
            "97": 9.34061,
            "98": 9.70134,
            "99": 8.88786,
            "100": 9.43285
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 22791326.0,
            "52": 22749392.0,
            "53": 22925970.0,
            "54": 22839434.0,
            "55": 22518416.0,
            "56": 22877660.0,
            "57": 23113304.0,
            "58": 22845008.0,
            "59": 22715512.0,
            "60": 22743058.0,
            "61": 22723950.0,
            "62": 22673248.0,
            "63": 22846074.0,
            "64": 22823228.0,
            "65": 23060212.0,
            "66": 22729902.0,
            "67": 22907278.0,
            "68": 22610092.0,
            "69": 22584360.0,
            "70": 22829348.0,
            "71": 22749420.0,
            "72": 22655446.0,
            "73": 22740974.0,
            "74": 23048296.0,
            "75": 23053922.0,
            "76": 22901008.0,
            "77": 22272806.0,
            "78": 22789370.0,
            "79": 22743288.0,
            "80": 22706236.0,
            "81": 22890976.0,
            "82": 22777092.0,
            "83": 22839240.0,
            "84": 23010352.0,
            "85": 22712004.0,
            "86": 23103740.0,
            "87": 22734788.0,
            "88": 22637620.0,
            "89": 22499200.0,
            "90": 22972420.0,
            "91": 22766428.0,
            "92": 22808890.0,
            "93": 22659888.0,
            "94": 22910970.0,
            "95": 23048514.0,
            "96": 22829470.0,
            "97": 22608826.0,
            "98": 22763528.0,
            "99": 22905754.0,
            "100": 23016268.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 717083136.0,
            "52": 717083136.0,
            "53": 717083136.0,
            "54": 717083136.0,
            "55": 717083136.0,
            "56": 717083136.0,
            "57": 717083136.0,
            "58": 717083136.0,
            "59": 717083136.0,
            "60": 717083136.0,
            "61": 717083136.0,
            "62": 717083136.0,
            "63": 717083136.0,
            "64": 717083136.0,
            "65": 717083136.0,
            "66": 717083136.0,
            "67": 717083136.0,
            "68": 717083136.0,
            "69": 717083136.0,
            "70": 717083136.0,
            "71": 717083136.0,
            "72": 717083136.0,
            "73": 717083136.0,
            "74": 717083136.0,
            "75": 717083136.0,
            "76": 717083136.0,
            "77": 717083136.0,
            "78": 717083136.0,
            "79": 717083136.0,
            "80": 717083136.0,
            "81": 717083136.0,
            "82": 717083136.0,
            "83": 717083136.0,
            "84": 717083136.0,
            "85": 717083136.0,
            "86": 717083136.0,
            "87": 717083136.0,
            "88": 717083136.0,
            "89": 717083136.0,
            "90": 717083136.0,
            "91": 717083136.0,
            "92": 717083136.0,
            "93": 717083136.0,
            "94": 717083136.0,
            "95": 717083136.0,
            "96": 717083136.0,
            "97": 717083136.0,
            "98": 717083136.0,
            "99": 717083136.0,
            "100": 717083136.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2194370560.0,
            "52": 2194371584.0,
            "53": 2194371584.0,
            "54": 2194371584.0,
            "55": 2194371584.0,
            "56": 2194371584.0,
            "57": 2194371584.0,
            "58": 2194371584.0,
            "59": 2194371584.0,
            "60": 2194371584.0,
            "61": 2194371584.0,
            "62": 2194371584.0,
            "63": 2194371584.0,
            "64": 2194371584.0,
            "65": 2194371584.0,
            "66": 2194371584.0,
            "67": 2194371584.0,
            "68": 2194371584.0,
            "69": 2194371584.0,
            "70": 2194371584.0,
            "71": 2194371584.0,
            "72": 2194371584.0,
            "73": 2194371584.0,
            "74": 2194371584.0,
            "75": 2194371584.0,
            "76": 2194371584.0,
            "77": 2194371584.0,
            "78": 2194371584.0,
            "79": 2194371584.0,
            "80": 2194371584.0,
            "81": 2194371584.0,
            "82": 2194371584.0,
            "83": 2194371584.0,
            "84": 2194371584.0,
            "85": 2194371584.0,
            "86": 2194371584.0,
            "87": 2194371584.0,
            "88": 2194371584.0,
            "89": 2194371584.0,
            "90": 2194371584.0,
            "91": 2194371584.0,
            "92": 2194371584.0,
            "93": 2194371584.0,
            "94": 2194371584.0,
            "95": 2194371584.0,
            "96": 2194371584.0,
            "97": 2194371584.0,
            "98": 2194371584.0,
            "99": 2194371584.0,
            "100": 2194371584.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 3.88691,
            "52": 0.18475,
            "53": 0.15645,
            "54": 0.15149,
            "55": 0.15178,
            "56": 0.15436,
            "57": 0.15089,
            "58": 0.15055,
            "59": 0.15075,
            "60": 0.1517,
            "61": 0.15028,
            "62": 0.14804,
            "63": 0.14921,
            "64": 0.15,
            "65": 0.14973,
            "66": 0.15168,
            "67": 0.15493,
            "68": 0.15271,
            "69": 0.15341,
            "70": 0.15423,
            "71": 0.15432,
            "72": 0.15491,
            "73": 0.1552,
            "74": 0.15454,
            "75": 0.15427,
            "76": 0.15393,
            "77": 0.15383,
            "78": 0.15459,
            "79": 0.15484,
            "80": 0.1534,
            "81": 0.15504,
            "82": 0.15286,
            "83": 0.15444,
            "84": 0.15427,
            "85": 0.15522,
            "86": 0.15438,
            "87": 0.15378,
            "88": 0.15395,
            "89": 0.15338,
            "90": 0.1542,
            "91": 0.15415,
            "92": 0.15382,
            "93": 0.15529,
            "94": 0.15411,
            "95": 0.15301,
            "96": 0.15392,
            "97": 0.15398,
            "98": 0.15485,
            "99": 0.15384,
            "100": 0.15373
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgxa100_dracooci-ord.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.90105,
            "2": 10.89262,
            "3": 10.90042,
            "4": 10.88139,
            "5": 10.89686,
            "6": 10.91104,
            "7": 10.90071,
            "8": 10.88372,
            "9": 10.89705,
            "10": 10.88269,
            "11": 10.91638,
            "12": 10.88862,
            "13": 10.89506,
            "14": 10.90397,
            "15": 10.83975,
            "16": 10.84821,
            "17": 10.83519,
            "18": 10.83782,
            "19": 10.83204,
            "20": 10.74037,
            "21": 10.70726,
            "22": 10.5989,
            "23": 10.72135,
            "24": 10.60586,
            "25": 10.57931,
            "26": 10.63021,
            "27": 10.62207,
            "28": 10.57267,
            "29": 10.60724,
            "30": 10.37738,
            "31": 10.15237,
            "32": 10.47733,
            "33": 10.48045,
            "34": 10.24256,
            "35": 10.29033,
            "36": 10.26052,
            "37": 10.36236,
            "38": 10.2143,
            "39": 10.44546,
            "40": 10.1156,
            "41": 10.15998,
            "42": 10.23373,
            "43": 9.85188,
            "44": 9.97725,
            "45": 9.85639,
            "46": 9.83161,
            "47": 10.17999,
            "48": 9.85771,
            "49": 9.54486,
            "50": 9.93378,
            "51": 9.86811,
            "52": 9.76315,
            "53": 10.10886,
            "54": 9.95631,
            "55": 9.87553,
            "56": 9.64641,
            "57": 9.49014,
            "58": 9.85454,
            "59": 9.59336,
            "60": 9.528,
            "61": 9.69542,
            "62": 10.01688,
            "63": 9.38936,
            "64": 9.80315,
            "65": 8.95041,
            "66": 9.72761,
            "67": 9.37481,
            "68": 9.80513,
            "69": 9.81015,
            "70": 9.76634,
            "71": 9.63164,
            "72": 9.57894,
            "73": 9.52071,
            "74": 8.94946,
            "75": 9.4304,
            "76": 9.0845,
            "77": 10.08945,
            "78": 9.72783,
            "79": 9.37638,
            "80": 9.40916,
            "81": 9.4973,
            "82": 9.71293,
            "83": 9.33328,
            "84": 9.44016,
            "85": 9.63365,
            "86": 9.07079,
            "87": 9.61271,
            "88": 9.78341,
            "89": 9.60939,
            "90": 9.8516,
            "91": 9.34566,
            "92": 9.38259,
            "93": 9.07364,
            "94": 8.81745,
            "95": 9.51874,
            "96": 9.54064,
            "97": 9.3403,
            "98": 9.7014,
            "99": 8.88889,
            "100": 9.43257
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 22727086.0,
            "2": 22925536.0,
            "3": 22597166.0,
            "4": 23219856.0,
            "5": 22714736.0,
            "6": 23021732.0,
            "7": 22770914.0,
            "8": 22927056.0,
            "9": 22842296.0,
            "10": 22918912.0,
            "11": 22500920.0,
            "12": 22460280.0,
            "13": 22917408.0,
            "14": 22388720.0,
            "15": 22821334.0,
            "16": 22830758.0,
            "17": 22818604.0,
            "18": 22581868.0,
            "19": 22618000.0,
            "20": 22694008.0,
            "21": 22739396.0,
            "22": 22800094.0,
            "23": 22540104.0,
            "24": 22771496.0,
            "25": 22818912.0,
            "26": 22547352.0,
            "27": 22469568.0,
            "28": 22453522.0,
            "29": 22530096.0,
            "30": 22631266.0,
            "31": 22955564.0,
            "32": 22585980.0,
            "33": 22558174.0,
            "34": 22835734.0,
            "35": 22787944.0,
            "36": 22590020.0,
            "37": 22497168.0,
            "38": 22896692.0,
            "39": 22801708.0,
            "40": 22658196.0,
            "41": 22659512.0,
            "42": 22667920.0,
            "43": 22975524.0,
            "44": 22746310.0,
            "45": 22675296.0,
            "46": 22884630.0,
            "47": 22633552.0,
            "48": 22929508.0,
            "49": 22727314.0,
            "50": 22904808.0,
            "51": 22791580.0,
            "52": 22748196.0,
            "53": 22926080.0,
            "54": 22839468.0,
            "55": 22518754.0,
            "56": 22877424.0,
            "57": 23112764.0,
            "58": 22845208.0,
            "59": 22716140.0,
            "60": 22743504.0,
            "61": 22724840.0,
            "62": 22672332.0,
            "63": 22846080.0,
            "64": 22823362.0,
            "65": 23060460.0,
            "66": 22729572.0,
            "67": 22907836.0,
            "68": 22610520.0,
            "69": 22584436.0,
            "70": 22829772.0,
            "71": 22749364.0,
            "72": 22653792.0,
            "73": 22740804.0,
            "74": 23047852.0,
            "75": 23054048.0,
            "76": 22901336.0,
            "77": 22271880.0,
            "78": 22789702.0,
            "79": 22743626.0,
            "80": 22706308.0,
            "81": 22891444.0,
            "82": 22776950.0,
            "83": 22839442.0,
            "84": 23010112.0,
            "85": 22712054.0,
            "86": 23103248.0,
            "87": 22735596.0,
            "88": 22636964.0,
            "89": 22499088.0,
            "90": 22972128.0,
            "91": 22767228.0,
            "92": 22810212.0,
            "93": 22659490.0,
            "94": 22911654.0,
            "95": 23048144.0,
            "96": 22828752.0,
            "97": 22608416.0,
            "98": 22762932.0,
            "99": 22906240.0,
            "100": 23015824.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 717082624.0,
            "2": 717082624.0,
            "3": 717082624.0,
            "4": 717082624.0,
            "5": 717082624.0,
            "6": 717082624.0,
            "7": 717082624.0,
            "8": 717082624.0,
            "9": 717082624.0,
            "10": 717082624.0,
            "11": 717082624.0,
            "12": 717082624.0,
            "13": 717082624.0,
            "14": 717082624.0,
            "15": 717082624.0,
            "16": 717082624.0,
            "17": 717082624.0,
            "18": 717082624.0,
            "19": 717082624.0,
            "20": 717082624.0,
            "21": 717082624.0,
            "22": 717082624.0,
            "23": 717082624.0,
            "24": 717082624.0,
            "25": 717082624.0,
            "26": 717082624.0,
            "27": 717082624.0,
            "28": 717082624.0,
            "29": 717082624.0,
            "30": 717082624.0,
            "31": 717082624.0,
            "32": 717082624.0,
            "33": 717082624.0,
            "34": 717082624.0,
            "35": 717082624.0,
            "36": 717082624.0,
            "37": 717082624.0,
            "38": 717082624.0,
            "39": 717082624.0,
            "40": 717082624.0,
            "41": 717082624.0,
            "42": 717082624.0,
            "43": 717082624.0,
            "44": 717082624.0,
            "45": 717082624.0,
            "46": 717082624.0,
            "47": 717082624.0,
            "48": 717082624.0,
            "49": 717082624.0,
            "50": 717082624.0,
            "51": 717082624.0,
            "52": 717082624.0,
            "53": 717082624.0,
            "54": 717082624.0,
            "55": 717082624.0,
            "56": 717082624.0,
            "57": 717082624.0,
            "58": 717082624.0,
            "59": 717082624.0,
            "60": 717082624.0,
            "61": 717082624.0,
            "62": 717082624.0,
            "63": 717082624.0,
            "64": 717082624.0,
            "65": 717082624.0,
            "66": 717082624.0,
            "67": 717082624.0,
            "68": 717082624.0,
            "69": 717082624.0,
            "70": 717082624.0,
            "71": 717082624.0,
            "72": 717082624.0,
            "73": 717082624.0,
            "74": 717082624.0,
            "75": 717082624.0,
            "76": 717082624.0,
            "77": 717082624.0,
            "78": 717082624.0,
            "79": 717082624.0,
            "80": 717082624.0,
            "81": 717082624.0,
            "82": 717082624.0,
            "83": 717082624.0,
            "84": 717082624.0,
            "85": 717082624.0,
            "86": 717082624.0,
            "87": 717082624.0,
            "88": 717082624.0,
            "89": 717082624.0,
            "90": 717082624.0,
            "91": 717082624.0,
            "92": 717082624.0,
            "93": 717082624.0,
            "94": 717082624.0,
            "95": 717082624.0,
            "96": 717082624.0,
            "97": 717082624.0,
            "98": 717082624.0,
            "99": 717082624.0,
            "100": 717082624.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2399852544.0,
            "2": 2683661312.0,
            "3": 2683661312.0,
            "4": 2683661312.0,
            "5": 2683661312.0,
            "6": 2683661312.0,
            "7": 2683661312.0,
            "8": 2683661312.0,
            "9": 2683661312.0,
            "10": 2683661312.0,
            "11": 2683661312.0,
            "12": 2683661312.0,
            "13": 2683661312.0,
            "14": 2683661312.0,
            "15": 2683661312.0,
            "16": 2683661312.0,
            "17": 2683661312.0,
            "18": 2683661312.0,
            "19": 2683661312.0,
            "20": 2683661312.0,
            "21": 2683661312.0,
            "22": 2683661312.0,
            "23": 2683661312.0,
            "24": 2683661312.0,
            "25": 2683661312.0,
            "26": 2683661312.0,
            "27": 2683661312.0,
            "28": 2683661312.0,
            "29": 2683661312.0,
            "30": 2683661312.0,
            "31": 2683661312.0,
            "32": 2683661312.0,
            "33": 2683661312.0,
            "34": 2683661312.0,
            "35": 2683661312.0,
            "36": 2683661312.0,
            "37": 2683661312.0,
            "38": 2683661312.0,
            "39": 2683661312.0,
            "40": 2683661312.0,
            "41": 2683661312.0,
            "42": 2683661312.0,
            "43": 2683661312.0,
            "44": 2683661312.0,
            "45": 2683661312.0,
            "46": 2683661312.0,
            "47": 2683661312.0,
            "48": 2683661312.0,
            "49": 2683661312.0,
            "50": 2683661312.0,
            "51": 2683661312.0,
            "52": 2683661312.0,
            "53": 2683661312.0,
            "54": 2683661312.0,
            "55": 2683661312.0,
            "56": 2683661312.0,
            "57": 2683661312.0,
            "58": 2683661312.0,
            "59": 2683661312.0,
            "60": 2683661312.0,
            "61": 2683661312.0,
            "62": 2683661312.0,
            "63": 2683661312.0,
            "64": 2683661312.0,
            "65": 2683661312.0,
            "66": 2683661312.0,
            "67": 2683661312.0,
            "68": 2683661312.0,
            "69": 2683661312.0,
            "70": 2683661312.0,
            "71": 2683661312.0,
            "72": 2683661312.0,
            "73": 2683661312.0,
            "74": 2683661312.0,
            "75": 2683661312.0,
            "76": 2683661312.0,
            "77": 2683661312.0,
            "78": 2683661312.0,
            "79": 2683661312.0,
            "80": 2683661312.0,
            "81": 2683661312.0,
            "82": 2683661312.0,
            "83": 2683661312.0,
            "84": 2683661312.0,
            "85": 2683661312.0,
            "86": 2683661312.0,
            "87": 2683661312.0,
            "88": 2683661312.0,
            "89": 2683661312.0,
            "90": 2683661312.0,
            "91": 2683661312.0,
            "92": 2683661312.0,
            "93": 2683661312.0,
            "94": 2683661312.0,
            "95": 2683661312.0,
            "96": 2683661312.0,
            "97": 2683661312.0,
            "98": 2683661312.0,
            "99": 2683661312.0,
            "100": 2683661312.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 16.63764,
            "2": 0.21125,
            "3": 0.18805,
            "4": 0.18329,
            "5": 0.1823,
            "6": 0.18232,
            "7": 0.18144,
            "8": 0.18027,
            "9": 0.17969,
            "10": 0.18238,
            "11": 0.18028,
            "12": 0.36174,
            "13": 0.18167,
            "14": 0.1837,
            "15": 0.18267,
            "16": 0.18257,
            "17": 0.18024,
            "18": 0.18275,
            "19": 0.1832,
            "20": 0.17831,
            "21": 0.18017,
            "22": 0.18109,
            "23": 0.17885,
            "24": 0.18267,
            "25": 0.18058,
            "26": 0.1773,
            "27": 0.1794,
            "28": 0.17907,
            "29": 0.18081,
            "30": 0.17905,
            "31": 0.17854,
            "32": 0.17894,
            "33": 0.17849,
            "34": 0.17658,
            "35": 0.17776,
            "36": 0.17727,
            "37": 0.17642,
            "38": 0.17777,
            "39": 0.17803,
            "40": 0.17642,
            "41": 0.17693,
            "42": 0.17625,
            "43": 0.17866,
            "44": 0.17762,
            "45": 0.17754,
            "46": 0.17702,
            "47": 0.17711,
            "48": 0.17758,
            "49": 0.17715,
            "50": 0.17757,
            "51": 0.18445,
            "52": 0.1799,
            "53": 0.18208,
            "54": 0.17612,
            "55": 0.17944,
            "56": 0.17873,
            "57": 0.18258,
            "58": 0.17483,
            "59": 0.17477,
            "60": 0.17433,
            "61": 0.17366,
            "62": 0.44447,
            "63": 0.17665,
            "64": 0.17466,
            "65": 0.17524,
            "66": 0.17467,
            "67": 0.17584,
            "68": 0.17461,
            "69": 0.17423,
            "70": 0.1742,
            "71": 0.1735,
            "72": 0.17461,
            "73": 0.17526,
            "74": 0.17447,
            "75": 0.17297,
            "76": 0.17355,
            "77": 0.17305,
            "78": 0.17366,
            "79": 0.17341,
            "80": 0.17382,
            "81": 0.17396,
            "82": 0.17489,
            "83": 0.17464,
            "84": 0.17401,
            "85": 0.17498,
            "86": 0.17379,
            "87": 0.1725,
            "88": 0.17312,
            "89": 0.17427,
            "90": 0.17333,
            "91": 0.1738,
            "92": 0.1743,
            "93": 0.1732,
            "94": 0.1739,
            "95": 0.17949,
            "96": 0.17499,
            "97": 0.17375,
            "98": 0.17377,
            "99": 0.17343,
            "100": 0.17383
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/golden_values_lts_dgxa100_dracooci.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.90105,
            "2": 10.89262,
            "3": 10.90042,
            "4": 10.88139,
            "5": 10.89686,
            "6": 10.91104,
            "7": 10.90071,
            "8": 10.88372,
            "9": 10.89705,
            "10": 10.88269,
            "11": 10.91638,
            "12": 10.88862,
            "13": 10.89506,
            "14": 10.90397,
            "15": 10.83975,
            "16": 10.84821,
            "17": 10.83519,
            "18": 10.83782,
            "19": 10.83204,
            "20": 10.74037,
            "21": 10.70726,
            "22": 10.5989,
            "23": 10.72135,
            "24": 10.60586,
            "25": 10.57931,
            "26": 10.63021,
            "27": 10.62207,
            "28": 10.57267,
            "29": 10.60724,
            "30": 10.37738,
            "31": 10.15237,
            "32": 10.47733,
            "33": 10.48045,
            "34": 10.24256,
            "35": 10.29033,
            "36": 10.26052,
            "37": 10.36236,
            "38": 10.2143,
            "39": 10.44546,
            "40": 10.1156,
            "41": 10.15998,
            "42": 10.23373,
            "43": 9.85188,
            "44": 9.97725,
            "45": 9.85639,
            "46": 9.83161,
            "47": 10.17999,
            "48": 9.85771,
            "49": 9.54486,
            "50": 9.93378,
            "51": 9.86811,
            "52": 9.76315,
            "53": 10.10886,
            "54": 9.95631,
            "55": 9.87553,
            "56": 9.64641,
            "57": 9.49014,
            "58": 9.85454,
            "59": 9.59336,
            "60": 9.528,
            "61": 9.69542,
            "62": 10.01688,
            "63": 9.38936,
            "64": 9.80315,
            "65": 8.95041,
            "66": 9.72761,
            "67": 9.37481,
            "68": 9.80513,
            "69": 9.81015,
            "70": 9.76634,
            "71": 9.63164,
            "72": 9.57894,
            "73": 9.52071,
            "74": 8.94946,
            "75": 9.4304,
            "76": 9.0845,
            "77": 10.08945,
            "78": 9.72783,
            "79": 9.37638,
            "80": 9.40916,
            "81": 9.4973,
            "82": 9.71293,
            "83": 9.33328,
            "84": 9.44016,
            "85": 9.63365,
            "86": 9.07079,
            "87": 9.61271,
            "88": 9.78341,
            "89": 9.60939,
            "90": 9.8516,
            "91": 9.34566,
            "92": 9.38259,
            "93": 9.07364,
            "94": 8.81745,
            "95": 9.51874,
            "96": 9.54064,
            "97": 9.3403,
            "98": 9.7014,
            "99": 8.88889,
            "100": 9.43257
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 22727086.0,
            "2": 22925536.0,
            "3": 22597166.0,
            "4": 23219856.0,
            "5": 22714736.0,
            "6": 23021732.0,
            "7": 22770914.0,
            "8": 22927056.0,
            "9": 22842296.0,
            "10": 22918912.0,
            "11": 22500920.0,
            "12": 22460280.0,
            "13": 22917408.0,
            "14": 22388720.0,
            "15": 22821334.0,
            "16": 22830758.0,
            "17": 22818604.0,
            "18": 22581868.0,
            "19": 22618000.0,
            "20": 22694008.0,
            "21": 22739396.0,
            "22": 22800094.0,
            "23": 22540104.0,
            "24": 22771496.0,
            "25": 22818912.0,
            "26": 22547352.0,
            "27": 22469568.0,
            "28": 22453522.0,
            "29": 22530096.0,
            "30": 22631266.0,
            "31": 22955564.0,
            "32": 22585980.0,
            "33": 22558174.0,
            "34": 22835734.0,
            "35": 22787944.0,
            "36": 22590020.0,
            "37": 22497168.0,
            "38": 22896692.0,
            "39": 22801708.0,
            "40": 22658196.0,
            "41": 22659512.0,
            "42": 22667920.0,
            "43": 22975524.0,
            "44": 22746310.0,
            "45": 22675296.0,
            "46": 22884630.0,
            "47": 22633552.0,
            "48": 22929508.0,
            "49": 22727314.0,
            "50": 22904808.0,
            "51": 22791580.0,
            "52": 22748196.0,
            "53": 22926080.0,
            "54": 22839468.0,
            "55": 22518754.0,
            "56": 22877424.0,
            "57": 23112764.0,
            "58": 22845208.0,
            "59": 22716140.0,
            "60": 22743504.0,
            "61": 22724840.0,
            "62": 22672332.0,
            "63": 22846080.0,
            "64": 22823362.0,
            "65": 23060460.0,
            "66": 22729572.0,
            "67": 22907836.0,
            "68": 22610520.0,
            "69": 22584436.0,
            "70": 22829772.0,
            "71": 22749364.0,
            "72": 22653792.0,
            "73": 22740804.0,
            "74": 23047852.0,
            "75": 23054048.0,
            "76": 22901336.0,
            "77": 22271880.0,
            "78": 22789702.0,
            "79": 22743626.0,
            "80": 22706308.0,
            "81": 22891444.0,
            "82": 22776950.0,
            "83": 22839442.0,
            "84": 23010112.0,
            "85": 22712054.0,
            "86": 23103248.0,
            "87": 22735596.0,
            "88": 22636964.0,
            "89": 22499088.0,
            "90": 22972128.0,
            "91": 22767228.0,
            "92": 22810212.0,
            "93": 22659490.0,
            "94": 22911654.0,
            "95": 23048144.0,
            "96": 22828752.0,
            "97": 22608416.0,
            "98": 22762932.0,
            "99": 22906240.0,
            "100": 23015824.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 717082624.0,
            "2": 717082624.0,
            "3": 717082624.0,
            "4": 717082624.0,
            "5": 717082624.0,
            "6": 717082624.0,
            "7": 717082624.0,
            "8": 717082624.0,
            "9": 717082624.0,
            "10": 717082624.0,
            "11": 717082624.0,
            "12": 717082624.0,
            "13": 717082624.0,
            "14": 717082624.0,
            "15": 717082624.0,
            "16": 717082624.0,
            "17": 717082624.0,
            "18": 717082624.0,
            "19": 717082624.0,
            "20": 717082624.0,
            "21": 717082624.0,
            "22": 717082624.0,
            "23": 717082624.0,
            "24": 717082624.0,
            "25": 717082624.0,
            "26": 717082624.0,
            "27": 717082624.0,
            "28": 717082624.0,
            "29": 717082624.0,
            "30": 717082624.0,
            "31": 717082624.0,
            "32": 717082624.0,
            "33": 717082624.0,
            "34": 717082624.0,
            "35": 717082624.0,
            "36": 717082624.0,
            "37": 717082624.0,
            "38": 717082624.0,
            "39": 717082624.0,
            "40": 717082624.0,
            "41": 717082624.0,
            "42": 717082624.0,
            "43": 717082624.0,
            "44": 717082624.0,
            "45": 717082624.0,
            "46": 717082624.0,
            "47": 717082624.0,
            "48": 717082624.0,
            "49": 717082624.0,
            "50": 717082624.0,
            "51": 717082624.0,
            "52": 717082624.0,
            "53": 717082624.0,
            "54": 717082624.0,
            "55": 717082624.0,
            "56": 717082624.0,
            "57": 717082624.0,
            "58": 717082624.0,
            "59": 717082624.0,
            "60": 717082624.0,
            "61": 717082624.0,
            "62": 717082624.0,
            "63": 717082624.0,
            "64": 717082624.0,
            "65": 717082624.0,
            "66": 717082624.0,
            "67": 717082624.0,
            "68": 717082624.0,
            "69": 717082624.0,
            "70": 717082624.0,
            "71": 717082624.0,
            "72": 717082624.0,
            "73": 717082624.0,
            "74": 717082624.0,
            "75": 717082624.0,
            "76": 717082624.0,
            "77": 717082624.0,
            "78": 717082624.0,
            "79": 717082624.0,
            "80": 717082624.0,
            "81": 717082624.0,
            "82": 717082624.0,
            "83": 717082624.0,
            "84": 717082624.0,
            "85": 717082624.0,
            "86": 717082624.0,
            "87": 717082624.0,
            "88": 717082624.0,
            "89": 717082624.0,
            "90": 717082624.0,
            "91": 717082624.0,
            "92": 717082624.0,
            "93": 717082624.0,
            "94": 717082624.0,
            "95": 717082624.0,
            "96": 717082624.0,
            "97": 717082624.0,
            "98": 717082624.0,
            "99": 717082624.0,
            "100": 717082624.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2399852544.0,
            "2": 2683661312.0,
            "3": 2683661312.0,
            "4": 2683661312.0,
            "5": 2683661312.0,
            "6": 2683661312.0,
            "7": 2683661312.0,
            "8": 2683661312.0,
            "9": 2683661312.0,
            "10": 2683661312.0,
            "11": 2683661312.0,
            "12": 2683661312.0,
            "13": 2683661312.0,
            "14": 2683661312.0,
            "15": 2683661312.0,
            "16": 2683661312.0,
            "17": 2683661312.0,
            "18": 2683661312.0,
            "19": 2683661312.0,
            "20": 2683661312.0,
            "21": 2683661312.0,
            "22": 2683661312.0,
            "23": 2683661312.0,
            "24": 2683661312.0,
            "25": 2683661312.0,
            "26": 2683661312.0,
            "27": 2683661312.0,
            "28": 2683661312.0,
            "29": 2683661312.0,
            "30": 2683661312.0,
            "31": 2683661312.0,
            "32": 2683661312.0,
            "33": 2683661312.0,
            "34": 2683661312.0,
            "35": 2683661312.0,
            "36": 2683661312.0,
            "37": 2683661312.0,
            "38": 2683661312.0,
            "39": 2683661312.0,
            "40": 2683661312.0,
            "41": 2683661312.0,
            "42": 2683661312.0,
            "43": 2683661312.0,
            "44": 2683661312.0,
            "45": 2683661312.0,
            "46": 2683661312.0,
            "47": 2683661312.0,
            "48": 2683661312.0,
            "49": 2683661312.0,
            "50": 2683661312.0,
            "51": 2683661312.0,
            "52": 2683661312.0,
            "53": 2683661312.0,
            "54": 2683661312.0,
            "55": 2683661312.0,
            "56": 2683661312.0,
            "57": 2683661312.0,
            "58": 2683661312.0,
            "59": 2683661312.0,
            "60": 2683661312.0,
            "61": 2683661312.0,
            "62": 2683661312.0,
            "63": 2683661312.0,
            "64": 2683661312.0,
            "65": 2683661312.0,
            "66": 2683661312.0,
            "67": 2683661312.0,
            "68": 2683661312.0,
            "69": 2683661312.0,
            "70": 2683661312.0,
            "71": 2683661312.0,
            "72": 2683661312.0,
            "73": 2683661312.0,
            "74": 2683661312.0,
            "75": 2683661312.0,
            "76": 2683661312.0,
            "77": 2683661312.0,
            "78": 2683661312.0,
            "79": 2683661312.0,
            "80": 2683661312.0,
            "81": 2683661312.0,
            "82": 2683661312.0,
            "83": 2683661312.0,
            "84": 2683661312.0,
            "85": 2683661312.0,
            "86": 2683661312.0,
            "87": 2683661312.0,
            "88": 2683661312.0,
            "89": 2683661312.0,
            "90": 2683661312.0,
            "91": 2683661312.0,
            "92": 2683661312.0,
            "93": 2683661312.0,
            "94": 2683661312.0,
            "95": 2683661312.0,
            "96": 2683661312.0,
            "97": 2683661312.0,
            "98": 2683661312.0,
            "99": 2683661312.0,
            "100": 2683661312.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 19.93377,
            "2": 0.214,
            "3": 0.18589,
            "4": 0.17894,
            "5": 0.1785,
            "6": 0.17891,
            "7": 0.18156,
            "8": 0.18079,
            "9": 0.17824,
            "10": 0.17989,
            "11": 0.17805,
            "12": 0.17716,
            "13": 0.17836,
            "14": 0.17787,
            "15": 0.17769,
            "16": 0.17666,
            "17": 0.17653,
            "18": 0.1758,
            "19": 0.17562,
            "20": 0.1768,
            "21": 0.1768,
            "22": 0.17624,
            "23": 0.17472,
            "24": 0.17432,
            "25": 0.1736,
            "26": 0.1746,
            "27": 0.17474,
            "28": 0.17601,
            "29": 0.17807,
            "30": 0.17493,
            "31": 0.17335,
            "32": 0.17319,
            "33": 0.17268,
            "34": 0.17305,
            "35": 0.17412,
            "36": 0.17335,
            "37": 0.17266,
            "38": 0.17413,
            "39": 0.17304,
            "40": 0.17432,
            "41": 0.17519,
            "42": 0.17337,
            "43": 0.17392,
            "44": 0.17265,
            "45": 0.17279,
            "46": 0.17548,
            "47": 0.17651,
            "48": 0.17389,
            "49": 0.17631,
            "50": 0.17232,
            "51": 0.18407,
            "52": 0.17581,
            "53": 0.37263,
            "54": 0.17452,
            "55": 0.17442,
            "56": 0.1745,
            "57": 0.17483,
            "58": 0.17583,
            "59": 0.17494,
            "60": 0.17407,
            "61": 0.17423,
            "62": 0.17441,
            "63": 0.17659,
            "64": 0.17537,
            "65": 0.17556,
            "66": 0.3524,
            "67": 0.17531,
            "68": 0.17588,
            "69": 0.17592,
            "70": 0.17431,
            "71": 0.17395,
            "72": 0.17604,
            "73": 0.17728,
            "74": 0.17752,
            "75": 0.1758,
            "76": 0.17612,
            "77": 0.17411,
            "78": 0.17662,
            "79": 0.17605,
            "80": 0.17671,
            "81": 0.17596,
            "82": 0.1766,
            "83": 0.17666,
            "84": 0.17679,
            "85": 0.17653,
            "86": 0.17635,
            "87": 0.17598,
            "88": 0.17546,
            "89": 0.17602,
            "90": 0.17567,
            "91": 0.17695,
            "92": 0.17831,
            "93": 0.17683,
            "94": 0.17578,
            "95": 0.17724,
            "96": 0.17805,
            "97": 0.17524,
            "98": 0.17706,
            "99": 0.1768,
            "100": 0.17633
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --untie-embeddings-and-output-weights: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: ckpt-resume


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.79436, "5": 10.84693, "10": 10.76869, "15": 10.78817, "20": 10.67854, "25": 10.50279, "30": 10.32951, "35": 10.25161, "40": 10.05245, "45": 9.80295, "50": 9.88802}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1697.0, "5": 1823.0, "10": 1373.0, "15": 1888.0, "20": 1616.0, "25": 1525.0, "30": 1966.0, "35": 1963.0, "40": 2280.0, "45": 2261.0, "50": 2386.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 719180288.0, "5": 719180288.0, "10": 719180288.0, "15": 719180288.0, "20": 719180288.0, "25": 719180288.0, "30": 719180288.0, "35": 719180288.0, "40": 719180288.0, "45": 719180288.0, "50": 719180288.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2399853056.0, "5": 2685758976.0, "10": 2685758976.0, "15": 2685758976.0, "20": 2685758976.0, "25": 2685758976.0, "30": 2685758976.0, "35": 2685758976.0, "40": 2685758976.0, "45": 2685758976.0, "50": 2685758976.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 5.22987, "5": 0.17379, "10": 0.17474, "15": 0.17312, "20": 0.17454, "25": 0.17286, "30": 0.17321, "35": 0.17348, "40": 0.17289, "45": 0.17399, "50": 0.17385}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.79208, "5": 10.8459, "10": 10.76945, "15": 10.78965, "20": 10.67868, "25": 10.50409, "30": 10.33063, "35": 10.25254, "40": 10.05221, "45": 9.80242, "50": 9.88789}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1633.0, "5": 1952.0, "10": 1432.0, "15": 1852.0, "20": 1592.0, "25": 1743.0, "30": 1896.0, "35": 1976.0, "40": 2182.0, "45": 2119.0, "50": 2511.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 714985472.0, "5": 714985472.0, "10": 714985472.0, "15": 714985472.0, "20": 714985472.0, "25": 714985472.0, "30": 714985472.0, "35": 714985472.0, "40": 714985472.0, "45": 714985472.0, "50": 714985472.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2399852544.0, "5": 2681564160.0, "10": 2681564160.0, "15": 2681564160.0, "20": 2681564160.0, "25": 2681564160.0, "30": 2681564160.0, "35": 2681564160.0, "40": 2681564160.0, "45": 2681564160.0, "50": 2681564160.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 17.94225, "5": 0.17343, "10": 0.17088, "15": 0.16811, "20": 0.16863, "25": 0.16961, "30": 0.16912, "35": 0.17084, "40": 0.16914, "45": 0.16729, "50": 0.16794}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_sequence_parallel/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --sequence-parallel: true
  --ckpt-fully-parallel-load: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-optim-fully-reshardable: true
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.73394, "5": 10.79243, "10": 10.70607, "15": 10.76012, "20": 10.68686, "25": 10.54768, "30": 10.45359, "35": 10.38572, "40": 10.24216, "45": 9.98159, "50": 10.06417}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2514.0, "5": 2818.0, "10": 2519.0, "15": 2543.0, "20": 2560.0, "25": 2574.0, "30": 2629.0, "35": 2568.0, "40": 2561.0, "45": 2508.0, "50": 2619.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 717420032.0, "5": 717420032.0, "10": 717420032.0, "15": 717420032.0, "20": 717420032.0, "25": 717420032.0, "30": 717420032.0, "35": 717420032.0, "40": 717420032.0, "45": 717420032.0, "50": 717420032.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2401419776.0, "5": 2684785152.0, "10": 2684785152.0, "15": 2684785152.0, "20": 2684785152.0, "25": 2684785152.0, "30": 2684785152.0, "35": 2684785152.0, "40": 2684785152.0, "45": 2684785152.0, "50": 2684785152.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 4.92787, "5": 0.17447, "10": 0.17372, "15": 0.17578, "20": 0.17588, "25": 0.17513, "30": 0.1731, "35": 0.1734, "40": 0.17385, "45": 0.17319, "50": 0.17333}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.74049, "5": 10.79201, "10": 10.71088, "15": 10.76031, "20": 10.6891, "25": 10.54338, "30": 10.4542, "35": 10.38324, "40": 10.24296, "45": 9.9834, "50": 10.06865}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2527.0, "5": 2875.0, "10": 2475.0, "15": 2508.0, "20": 2650.0, "25": 2392.0, "30": 2484.0, "35": 2573.0, "40": 2559.0, "45": 2519.0, "50": 2500.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 715322368.0, "5": 715322368.0, "10": 715322368.0, "15": 715322368.0, "20": 715322368.0, "25": 715322368.0, "30": 715322368.0, "35": 715322368.0, "40": 715322368.0, "45": 715322368.0, "50": 715322368.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2402991104.0, "5": 2683341824.0, "10": 2683341824.0, "15": 2683341824.0, "20": 2683341824.0, "25": 2683341824.0, "30": 2683341824.0, "35": 2683341824.0, "40": 2683341824.0, "45": 2683341824.0, "50": 2683341824.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 14.81379, "5": 0.17159, "10": 0.17073, "15": 0.16785, "20": 0.17251, "25": 0.17348, "30": 0.17312, "35": 0.17159, "40": 0.16987, "45": 0.17054, "50": 0.16978}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_swiglu/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --swiglu: true
  --ckpt-fully-parallel-load: true
  --async-save: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.90334, "5": 10.89492, "10": 10.8842, "15": 10.83898, "20": 10.74044, "25": 10.57419, "30": 10.37753, "35": 10.29014, "40": 10.11428, "45": 9.85668, "50": 9.93436}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 22727036.0, "5": 22714776.0, "10": 22918296.0, "15": 22821260.0, "20": 22694368.0, "25": 22819084.0, "30": 22631352.0, "35": 22787996.0, "40": 22658092.0, "45": 22675220.0, "50": 22905380.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 719180288.0, "5": 719180288.0, "10": 719180288.0, "15": 719180288.0, "20": 719180288.0, "25": 719180288.0, "30": 719180288.0, "35": 719180288.0, "40": 719180288.0, "45": 719180288.0, "50": 719180288.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2399853056.0, "5": 2685758976.0, "10": 2685758976.0, "15": 2685758976.0, "20": 2685758976.0, "25": 2685758976.0, "30": 2685758976.0, "35": 2685758976.0, "40": 2685758976.0, "45": 2685758976.0, "50": 2685758976.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 5.00246, "5": 0.1722, "10": 0.17432, "15": 0.17324, "20": 0.16927, "25": 0.17003, "30": 0.17839, "35": 0.16979, "40": 0.17076, "45": 0.17066, "50": 0.17209}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.90105, "5": 10.89686, "10": 10.88269, "15": 10.83975, "20": 10.74037, "25": 10.57931, "30": 10.37738, "35": 10.29033, "40": 10.1156, "45": 9.8564, "50": 9.93378}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 22727086.0, "5": 22714736.0, "10": 22918912.0, "15": 22821334.0, "20": 22694008.0, "25": 22818912.0, "30": 22631266.0, "35": 22787944.0, "40": 22658196.0, "45": 22675296.0, "50": 22904808.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 717082624.0, "5": 717082624.0, "10": 717082624.0, "15": 717082624.0, "20": 717082624.0, "25": 717082624.0, "30": 717082624.0, "35": 717082624.0, "40": 717082624.0, "45": 717082624.0, "50": 717082624.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 2399852544.0, "5": 2683661312.0, "10": 2683661312.0, "15": 2683661312.0, "20": 2683661312.0, "25": 2683661312.0, "30": 2683661312.0, "35": 2683661312.0, "40": 2683661312.0, "45": 2683661312.0, "50": 2683661312.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 14.84011, "5": 0.17173, "10": 0.17051, "15": 0.16971, "20": 0.16976, "25": 0.16873, "30": 0.16935, "35": 0.17073, "40": 0.169, "45": 0.16775, "50": 0.16768}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_untie_embeddings_and_outputs/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --untie-embeddings-and-output-weights: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.81478, "5": 10.85168, "10": 10.78749, "15": 10.79513, "20": 10.69131, "25": 10.52316, "30": 10.34623, "35": 10.26185, "40": 10.07219, "45": 9.80998, "50": 9.88351}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1544.0, "5": 1795.0, "10": 1340.0, "15": 1894.0, "20": 1620.0, "25": 1728.0, "30": 1825.0, "35": 1908.0, "40": 2074.0, "45": 1962.0, "50": 2349.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 733860352.0, "5": 733860352.0, "10": 733860352.0, "15": 733860352.0, "20": 733860352.0, "25": 733860352.0, "30": 733860352.0, "35": 733860352.0, "40": 733860352.0, "45": 733860352.0, "50": 733860352.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3838895616.0, "5": 4122704384.0, "10": 4122704384.0, "15": 4122704384.0, "20": 4122704384.0, "25": 4122704384.0, "30": 4122704384.0, "35": 4122704384.0, "40": 4122704384.0, "45": 4122704384.0, "50": 4122704384.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 9.54908, "5": 0.20389, "10": 0.20703, "15": 0.20571, "20": 0.20704, "25": 0.20674, "30": 0.20774, "35": 0.20559, "40": 0.20467, "45": 0.20539, "50": 0.20473}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.82558,
            "2": 10.83322,
            "3": 10.82737,
            "4": 10.79588,
            "5": 10.85705,
            "6": 10.8639,
            "7": 10.82692,
            "8": 10.82592,
            "9": 10.83704,
            "10": 10.79717,
            "11": 10.87851,
            "12": 10.85796,
            "13": 10.85375,
            "14": 10.8755,
            "15": 10.79176,
            "16": 10.80297,
            "17": 10.77451,
            "18": 10.80401,
            "19": 10.79366,
            "20": 10.69587,
            "21": 10.68551,
            "22": 10.53152,
            "23": 10.70657,
            "24": 10.57319,
            "25": 10.51544,
            "26": 10.59074,
            "27": 10.60737,
            "28": 10.57024,
            "29": 10.58907,
            "30": 10.34675,
            "31": 10.07735,
            "32": 10.46316,
            "33": 10.45702,
            "34": 10.19922,
            "35": 10.25588,
            "36": 10.21246,
            "37": 10.34692,
            "38": 10.18008,
            "39": 10.408,
            "40": 10.07601,
            "41": 10.12933,
            "42": 10.21132,
            "43": 9.81692,
            "44": 9.94031,
            "45": 9.81697,
            "46": 9.80607,
            "47": 10.12474,
            "48": 9.84052,
            "49": 9.50972,
            "50": 9.88931
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1692.0,
            "2": 1562.0,
            "3": 1659.0,
            "4": 1661.0,
            "5": 1890.0,
            "6": 1885.0,
            "7": 1867.0,
            "8": 1651.0,
            "9": 1883.0,
            "10": 1424.0,
            "11": 1819.0,
            "12": 1770.0,
            "13": 1985.0,
            "14": 1835.0,
            "15": 1964.0,
            "16": 1829.0,
            "17": 1826.0,
            "18": 1644.0,
            "19": 1752.0,
            "20": 1709.0,
            "21": 1984.0,
            "22": 1705.0,
            "23": 1997.0,
            "24": 1643.0,
            "25": 1621.0,
            "26": 1650.0,
            "27": 1750.0,
            "28": 1867.0,
            "29": 1959.0,
            "30": 2054.0,
            "31": 1547.0,
            "32": 1858.0,
            "33": 2093.0,
            "34": 1860.0,
            "35": 1961.0,
            "36": 1983.0,
            "37": 2362.0,
            "38": 2143.0,
            "39": 2286.0,
            "40": 2090.0,
            "41": 2199.0,
            "42": 2313.0,
            "43": 1992.0,
            "44": 2026.0,
            "45": 2082.0,
            "46": 2197.0,
            "47": 2446.0,
            "48": 2296.0,
            "49": 2232.0,
            "50": 2425.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 759682560.0,
            "2": 759682560.0,
            "3": 759682560.0,
            "4": 759682560.0,
            "5": 759682560.0,
            "6": 759682560.0,
            "7": 759682560.0,
            "8": 759682560.0,
            "9": 759682560.0,
            "10": 759682560.0,
            "11": 759682560.0,
            "12": 759682560.0,
            "13": 759682560.0,
            "14": 759682560.0,
            "15": 759682560.0,
            "16": 759682560.0,
            "17": 759682560.0,
            "18": 759682560.0,
            "19": 759682560.0,
            "20": 759682560.0,
            "21": 759682560.0,
            "22": 759682560.0,
            "23": 759682560.0,
            "24": 759682560.0,
            "25": 759682560.0,
            "26": 759682560.0,
            "27": 759682560.0,
            "28": 759682560.0,
            "29": 759682560.0,
            "30": 759682560.0,
            "31": 759682560.0,
            "32": 759682560.0,
            "33": 759682560.0,
            "34": 759682560.0,
            "35": 759682560.0,
            "36": 759682560.0,
            "37": 759682560.0,
            "38": 759682560.0,
            "39": 759682560.0,
            "40": 759682560.0,
            "41": 759682560.0,
            "42": 759682560.0,
            "43": 759682560.0,
            "44": 759682560.0,
            "45": 759682560.0,
            "46": 759682560.0,
            "47": 759682560.0,
            "48": 759682560.0,
            "49": 759682560.0,
            "50": 759682560.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3866814976.0,
            "2": 4148526592.0,
            "3": 4148526592.0,
            "4": 4148526592.0,
            "5": 4148526592.0,
            "6": 4148526592.0,
            "7": 4148526592.0,
            "8": 4148526592.0,
            "9": 4148526592.0,
            "10": 4148526592.0,
            "11": 4148526592.0,
            "12": 4148526592.0,
            "13": 4148526592.0,
            "14": 4148526592.0,
            "15": 4148526592.0,
            "16": 4148526592.0,
            "17": 4148526592.0,
            "18": 4148526592.0,
            "19": 4148526592.0,
            "20": 4148526592.0,
            "21": 4148526592.0,
            "22": 4148526592.0,
            "23": 4148526592.0,
            "24": 4148526592.0,
            "25": 4148526592.0,
            "26": 4148526592.0,
            "27": 4148526592.0,
            "28": 4148526592.0,
            "29": 4148526592.0,
            "30": 4148526592.0,
            "31": 4148526592.0,
            "32": 4148526592.0,
            "33": 4148526592.0,
            "34": 4148526592.0,
            "35": 4148526592.0,
            "36": 4148526592.0,
            "37": 4148526592.0,
            "38": 4148526592.0,
            "39": 4148526592.0,
            "40": 4148526592.0,
            "41": 4148526592.0,
            "42": 4148526592.0,
            "43": 4148526592.0,
            "44": 4148526592.0,
            "45": 4148526592.0,
            "46": 4148526592.0,
            "47": 4148526592.0,
            "48": 4148526592.0,
            "49": 4148526592.0,
            "50": 4148526592.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 9.03804,
            "3": 0.13858,
            "4": 0.12507,
            "5": 0.12463,
            "6": 0.12425,
            "7": 0.12393,
            "8": 0.12365,
            "9": 0.12427,
            "10": 0.12648,
            "11": 0.12263,
            "12": 0.12575,
            "13": 0.12379,
            "14": 0.12295,
            "15": 0.12869,
            "16": 0.12461,
            "17": 0.12438,
            "18": 0.12268,
            "19": 0.12324,
            "20": 0.12324,
            "21": 0.12291,
            "22": 0.12582,
            "23": 0.12767,
            "24": 0.12691,
            "25": 0.12504,
            "26": 0.12483,
            "27": 0.12358,
            "28": 0.1246,
            "29": 0.12998,
            "30": 0.1346,
            "31": 0.12439,
            "32": 0.12524,
            "33": 0.12436,
            "34": 0.12347,
            "35": 0.12901,
            "36": 0.12928,
            "37": 0.13039,
            "38": 0.12726,
            "39": 0.1253,
            "40": 0.12465,
            "41": 0.12644,
            "42": 0.12361,
            "43": 0.12563,
            "44": 0.12445,
            "45": 0.12536,
            "46": 0.12648,
            "47": 0.12433,
            "48": 0.12535,
            "49": 0.12492,
            "50": 0.12369
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.84523,
            "2": 10.85412,
            "3": 10.85365,
            "4": 10.83867,
            "5": 10.87428,
            "6": 10.89334,
            "7": 10.8541,
            "8": 10.86232,
            "9": 10.86355,
            "10": 10.82858,
            "11": 10.88772,
            "12": 10.87148,
            "13": 10.87939,
            "14": 10.89122,
            "15": 10.81926,
            "16": 10.83064,
            "17": 10.79873,
            "18": 10.81769,
            "19": 10.8196,
            "20": 10.72749,
            "21": 10.70555,
            "22": 10.56395,
            "23": 10.7282,
            "24": 10.60841,
            "25": 10.55195,
            "26": 10.60869,
            "27": 10.62878,
            "28": 10.5827,
            "29": 10.59984,
            "30": 10.36504,
            "31": 10.12095,
            "32": 10.47626,
            "33": 10.46908,
            "34": 10.22325,
            "35": 10.27845,
            "36": 10.22879,
            "37": 10.35946,
            "38": 10.19333,
            "39": 10.41585,
            "40": 10.09773,
            "41": 10.15714,
            "42": 10.22441,
            "43": 9.8328,
            "44": 9.96934,
            "45": 9.84203,
            "46": 9.83023,
            "47": 10.15603,
            "48": 9.85506,
            "49": 9.54051,
            "50": 9.91254
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1725.0,
            "2": 1664.0,
            "3": 1710.0,
            "4": 1712.0,
            "5": 1834.0,
            "6": 1743.0,
            "7": 1803.0,
            "8": 1744.0,
            "9": 1770.0,
            "10": 1478.0,
            "11": 1879.0,
            "12": 1696.0,
            "13": 1952.0,
            "14": 1732.0,
            "15": 1891.0,
            "16": 1872.0,
            "17": 1737.0,
            "18": 1744.0,
            "19": 1843.0,
            "20": 1639.0,
            "21": 1817.0,
            "22": 1615.0,
            "23": 1960.0,
            "24": 1646.0,
            "25": 1623.0,
            "26": 1671.0,
            "27": 1841.0,
            "28": 2009.0,
            "29": 1956.0,
            "30": 1882.0,
            "31": 1597.0,
            "32": 1921.0,
            "33": 2114.0,
            "34": 1828.0,
            "35": 2043.0,
            "36": 1947.0,
            "37": 2338.0,
            "38": 2227.0,
            "39": 2346.0,
            "40": 2168.0,
            "41": 2204.0,
            "42": 2247.0,
            "43": 2078.0,
            "44": 2064.0,
            "45": 2159.0,
            "46": 2489.0,
            "47": 2497.0,
            "48": 2305.0,
            "49": 2272.0,
            "50": 2319.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 759682560.0,
            "2": 759682560.0,
            "3": 759682560.0,
            "4": 759682560.0,
            "5": 759682560.0,
            "6": 759682560.0,
            "7": 759682560.0,
            "8": 759682560.0,
            "9": 759682560.0,
            "10": 759682560.0,
            "11": 759682560.0,
            "12": 759682560.0,
            "13": 759682560.0,
            "14": 759682560.0,
            "15": 759682560.0,
            "16": 759682560.0,
            "17": 759682560.0,
            "18": 759682560.0,
            "19": 759682560.0,
            "20": 759682560.0,
            "21": 759682560.0,
            "22": 759682560.0,
            "23": 759682560.0,
            "24": 759682560.0,
            "25": 759682560.0,
            "26": 759682560.0,
            "27": 759682560.0,
            "28": 759682560.0,
            "29": 759682560.0,
            "30": 759682560.0,
            "31": 759682560.0,
            "32": 759682560.0,
            "33": 759682560.0,
            "34": 759682560.0,
            "35": 759682560.0,
            "36": 759682560.0,
            "37": 759682560.0,
            "38": 759682560.0,
            "39": 759682560.0,
            "40": 759682560.0,
            "41": 759682560.0,
            "42": 759682560.0,
            "43": 759682560.0,
            "44": 759682560.0,
            "45": 759682560.0,
            "46": 759682560.0,
            "47": 759682560.0,
            "48": 759682560.0,
            "49": 759682560.0,
            "50": 759682560.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3866814976.0,
            "2": 4148526592.0,
            "3": 4148526592.0,
            "4": 4148526592.0,
            "5": 4148526592.0,
            "6": 4148526592.0,
            "7": 4148526592.0,
            "8": 4148526592.0,
            "9": 4148526592.0,
            "10": 4148526592.0,
            "11": 4148526592.0,
            "12": 4148526592.0,
            "13": 4148526592.0,
            "14": 4148526592.0,
            "15": 4148526592.0,
            "16": 4148526592.0,
            "17": 4148526592.0,
            "18": 4148526592.0,
            "19": 4148526592.0,
            "20": 4148526592.0,
            "21": 4148526592.0,
            "22": 4148526592.0,
            "23": 4148526592.0,
            "24": 4148526592.0,
            "25": 4148526592.0,
            "26": 4148526592.0,
            "27": 4148526592.0,
            "28": 4148526592.0,
            "29": 4148526592.0,
            "30": 4148526592.0,
            "31": 4148526592.0,
            "32": 4148526592.0,
            "33": 4148526592.0,
            "34": 4148526592.0,
            "35": 4148526592.0,
            "36": 4148526592.0,
            "37": 4148526592.0,
            "38": 4148526592.0,
            "39": 4148526592.0,
            "40": 4148526592.0,
            "41": 4148526592.0,
            "42": 4148526592.0,
            "43": 4148526592.0,
            "44": 4148526592.0,
            "45": 4148526592.0,
            "46": 4148526592.0,
            "47": 4148526592.0,
            "48": 4148526592.0,
            "49": 4148526592.0,
            "50": 4148526592.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 9.85525,
            "2": 0.11909,
            "3": 0.10687,
            "4": 0.08766,
            "5": 0.08696,
            "6": 0.08852,
            "7": 0.08705,
            "8": 0.0866,
            "9": 0.08968,
            "10": 0.09051,
            "11": 0.08988,
            "12": 0.08985,
            "13": 0.09145,
            "14": 0.09034,
            "15": 0.09081,
            "16": 0.09029,
            "17": 0.09013,
            "18": 0.09023,
            "19": 0.09004,
            "20": 0.09017,
            "21": 0.08987,
            "22": 0.09048,
            "23": 0.09047,
            "24": 0.08991,
            "25": 0.09343,
            "26": 0.0901,
            "27": 0.08989,
            "28": 0.09443,
            "29": 0.09097,
            "30": 0.09106,
            "31": 0.0927,
            "32": 0.08602,
            "33": 0.08691,
            "34": 0.08755,
            "35": 0.08733,
            "36": 0.08692,
            "37": 0.08659,
            "38": 0.08868,
            "39": 0.08692,
            "40": 0.08731,
            "41": 0.08817,
            "42": 0.08696,
            "43": 0.08838,
            "44": 0.08859,
            "45": 0.08767,
            "46": 0.0873,
            "47": 0.08882,
            "48": 0.08631,
            "49": 0.08619,
            "50": 0.0861
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.81478,
            "2": 10.82042,
            "3": 10.81232,
            "4": 10.78901,
            "5": 10.85168,
            "6": 10.87098,
            "7": 10.83086,
            "8": 10.83472,
            "9": 10.83886,
            "10": 10.78749,
            "11": 10.87935,
            "12": 10.86017,
            "13": 10.86578,
            "14": 10.8781,
            "15": 10.79513,
            "16": 10.7958,
            "17": 10.76832,
            "18": 10.8109,
            "19": 10.79887,
            "20": 10.69131,
            "21": 10.6801,
            "22": 10.52152,
            "23": 10.7071,
            "24": 10.57678,
            "25": 10.52316,
            "26": 10.59563,
            "27": 10.58607,
            "28": 10.56175,
            "29": 10.56945,
            "30": 10.34623,
            "31": 10.10035,
            "32": 10.45432,
            "33": 10.44591,
            "34": 10.2072,
            "35": 10.26185,
            "36": 10.21228,
            "37": 10.32449,
            "38": 10.16803,
            "39": 10.38353,
            "40": 10.07219,
            "41": 10.13754,
            "42": 10.19755,
            "43": 9.81134,
            "44": 9.93287,
            "45": 9.80998,
            "46": 9.80859,
            "47": 10.12583,
            "48": 9.82132,
            "49": 9.50738,
            "50": 9.88351
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1544.0,
            "2": 1712.0,
            "3": 1660.0,
            "4": 1767.0,
            "5": 1795.0,
            "6": 1799.0,
            "7": 1805.0,
            "8": 1664.0,
            "9": 1769.0,
            "10": 1340.0,
            "11": 1830.0,
            "12": 1812.0,
            "13": 1837.0,
            "14": 1710.0,
            "15": 1894.0,
            "16": 1731.0,
            "17": 1848.0,
            "18": 1705.0,
            "19": 1686.0,
            "20": 1620.0,
            "21": 1840.0,
            "22": 1764.0,
            "23": 1937.0,
            "24": 1620.0,
            "25": 1728.0,
            "26": 1727.0,
            "27": 1821.0,
            "28": 2042.0,
            "29": 2029.0,
            "30": 1825.0,
            "31": 1594.0,
            "32": 1903.0,
            "33": 2041.0,
            "34": 1895.0,
            "35": 1908.0,
            "36": 1906.0,
            "37": 2224.0,
            "38": 2150.0,
            "39": 2327.0,
            "40": 2074.0,
            "41": 2314.0,
            "42": 2230.0,
            "43": 1920.0,
            "44": 2115.0,
            "45": 1962.0,
            "46": 2287.0,
            "47": 2481.0,
            "48": 2407.0,
            "49": 2270.0,
            "50": 2349.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 730321408.0,
            "2": 730321408.0,
            "3": 730321408.0,
            "4": 730321408.0,
            "5": 730321408.0,
            "6": 730321408.0,
            "7": 730321408.0,
            "8": 730321408.0,
            "9": 730321408.0,
            "10": 730321408.0,
            "11": 730321408.0,
            "12": 730321408.0,
            "13": 730321408.0,
            "14": 730321408.0,
            "15": 730321408.0,
            "16": 730321408.0,
            "17": 730321408.0,
            "18": 730321408.0,
            "19": 730321408.0,
            "20": 730321408.0,
            "21": 730321408.0,
            "22": 730321408.0,
            "23": 730321408.0,
            "24": 730321408.0,
            "25": 730321408.0,
            "26": 730321408.0,
            "27": 730321408.0,
            "28": 730321408.0,
            "29": 730321408.0,
            "30": 730321408.0,
            "31": 730321408.0,
            "32": 730321408.0,
            "33": 730321408.0,
            "34": 730321408.0,
            "35": 730321408.0,
            "36": 730321408.0,
            "37": 730321408.0,
            "38": 730321408.0,
            "39": 730321408.0,
            "40": 730321408.0,
            "41": 730321408.0,
            "42": 730321408.0,
            "43": 730321408.0,
            "44": 730321408.0,
            "45": 730321408.0,
            "46": 730321408.0,
            "47": 730321408.0,
            "48": 730321408.0,
            "49": 730321408.0,
            "50": 730321408.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3837453824.0,
            "2": 4119165440.0,
            "3": 4119165440.0,
            "4": 4119165440.0,
            "5": 4119165440.0,
            "6": 4119165440.0,
            "7": 4119165440.0,
            "8": 4119165440.0,
            "9": 4119165440.0,
            "10": 4119165440.0,
            "11": 4119165440.0,
            "12": 4119165440.0,
            "13": 4119165440.0,
            "14": 4119165440.0,
            "15": 4119165440.0,
            "16": 4119165440.0,
            "17": 4119165440.0,
            "18": 4119165440.0,
            "19": 4119165440.0,
            "20": 4119165440.0,
            "21": 4119165440.0,
            "22": 4119165440.0,
            "23": 4119165440.0,
            "24": 4119165440.0,
            "25": 4119165440.0,
            "26": 4119165440.0,
            "27": 4119165440.0,
            "28": 4119165440.0,
            "29": 4119165440.0,
            "30": 4119165440.0,
            "31": 4119165440.0,
            "32": 4119165440.0,
            "33": 4119165440.0,
            "34": 4119165440.0,
            "35": 4119165440.0,
            "36": 4119165440.0,
            "37": 4119165440.0,
            "38": 4119165440.0,
            "39": 4119165440.0,
            "40": 4119165440.0,
            "41": 4119165440.0,
            "42": 4119165440.0,
            "43": 4119165440.0,
            "44": 4119165440.0,
            "45": 4119165440.0,
            "46": 4119165440.0,
            "47": 4119165440.0,
            "48": 4119165440.0,
            "49": 4119165440.0,
            "50": 4119165440.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 12.00614,
            "2": 0.18804,
            "3": 0.28065,
            "4": 0.14064,
            "5": 0.1388,
            "6": 0.13824,
            "7": 0.13862,
            "8": 0.13843,
            "9": 0.14064,
            "10": 0.13568,
            "11": 0.13353,
            "12": 0.13327,
            "13": 0.13418,
            "14": 0.13368,
            "15": 0.13399,
            "16": 0.13326,
            "17": 0.13409,
            "18": 0.13281,
            "19": 0.13303,
            "20": 0.13395,
            "21": 0.13357,
            "22": 0.13388,
            "23": 0.13403,
            "24": 0.1333,
            "25": 0.13242,
            "26": 0.13302,
            "27": 0.134,
            "28": 0.13304,
            "29": 0.13302,
            "30": 0.13398,
            "31": 0.13424,
            "32": 0.13315,
            "33": 0.13365,
            "34": 0.13391,
            "35": 0.13392,
            "36": 0.13316,
            "37": 0.13254,
            "38": 0.13292,
            "39": 0.1333,
            "40": 0.13401,
            "41": 0.13408,
            "42": 0.13349,
            "43": 0.13469,
            "44": 0.13282,
            "45": 0.1344,
            "46": 0.13536,
            "47": 0.13291,
            "48": 0.13374,
            "49": 0.13338,
            "50": 0.13336
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgx_a100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.81478,
            "2": 10.82042,
            "3": 10.81232,
            "4": 10.78901,
            "5": 10.85168,
            "6": 10.87098,
            "7": 10.83086,
            "8": 10.83472,
            "9": 10.83886,
            "10": 10.78749,
            "11": 10.87935,
            "12": 10.86017,
            "13": 10.86578,
            "14": 10.8781,
            "15": 10.79513,
            "16": 10.7958,
            "17": 10.76832,
            "18": 10.8109,
            "19": 10.79887,
            "20": 10.69131,
            "21": 10.6801,
            "22": 10.52152,
            "23": 10.7071,
            "24": 10.57678,
            "25": 10.52316,
            "26": 10.59563,
            "27": 10.58607,
            "28": 10.56175,
            "29": 10.56945,
            "30": 10.34623,
            "31": 10.10035,
            "32": 10.45432,
            "33": 10.44591,
            "34": 10.2072,
            "35": 10.26185,
            "36": 10.21228,
            "37": 10.32449,
            "38": 10.16803,
            "39": 10.38353,
            "40": 10.07219,
            "41": 10.13754,
            "42": 10.19755,
            "43": 9.81134,
            "44": 9.93287,
            "45": 9.80998,
            "46": 9.80859,
            "47": 10.12583,
            "48": 9.82132,
            "49": 9.50738,
            "50": 9.88351
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1544.0,
            "2": 1712.0,
            "3": 1660.0,
            "4": 1767.0,
            "5": 1795.0,
            "6": 1799.0,
            "7": 1805.0,
            "8": 1664.0,
            "9": 1769.0,
            "10": 1340.0,
            "11": 1830.0,
            "12": 1812.0,
            "13": 1837.0,
            "14": 1710.0,
            "15": 1894.0,
            "16": 1731.0,
            "17": 1848.0,
            "18": 1705.0,
            "19": 1686.0,
            "20": 1620.0,
            "21": 1840.0,
            "22": 1764.0,
            "23": 1937.0,
            "24": 1620.0,
            "25": 1728.0,
            "26": 1727.0,
            "27": 1821.0,
            "28": 2042.0,
            "29": 2029.0,
            "30": 1825.0,
            "31": 1594.0,
            "32": 1903.0,
            "33": 2041.0,
            "34": 1895.0,
            "35": 1908.0,
            "36": 1906.0,
            "37": 2224.0,
            "38": 2150.0,
            "39": 2327.0,
            "40": 2074.0,
            "41": 2314.0,
            "42": 2230.0,
            "43": 1920.0,
            "44": 2115.0,
            "45": 1962.0,
            "46": 2287.0,
            "47": 2481.0,
            "48": 2407.0,
            "49": 2270.0,
            "50": 2349.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 730321408.0,
            "2": 730321408.0,
            "3": 730321408.0,
            "4": 730321408.0,
            "5": 730321408.0,
            "6": 730321408.0,
            "7": 730321408.0,
            "8": 730321408.0,
            "9": 730321408.0,
            "10": 730321408.0,
            "11": 730321408.0,
            "12": 730321408.0,
            "13": 730321408.0,
            "14": 730321408.0,
            "15": 730321408.0,
            "16": 730321408.0,
            "17": 730321408.0,
            "18": 730321408.0,
            "19": 730321408.0,
            "20": 730321408.0,
            "21": 730321408.0,
            "22": 730321408.0,
            "23": 730321408.0,
            "24": 730321408.0,
            "25": 730321408.0,
            "26": 730321408.0,
            "27": 730321408.0,
            "28": 730321408.0,
            "29": 730321408.0,
            "30": 730321408.0,
            "31": 730321408.0,
            "32": 730321408.0,
            "33": 730321408.0,
            "34": 730321408.0,
            "35": 730321408.0,
            "36": 730321408.0,
            "37": 730321408.0,
            "38": 730321408.0,
            "39": 730321408.0,
            "40": 730321408.0,
            "41": 730321408.0,
            "42": 730321408.0,
            "43": 730321408.0,
            "44": 730321408.0,
            "45": 730321408.0,
            "46": 730321408.0,
            "47": 730321408.0,
            "48": 730321408.0,
            "49": 730321408.0,
            "50": 730321408.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3837453824.0,
            "2": 4119165440.0,
            "3": 4119165440.0,
            "4": 4119165440.0,
            "5": 4119165440.0,
            "6": 4119165440.0,
            "7": 4119165440.0,
            "8": 4119165440.0,
            "9": 4119165440.0,
            "10": 4119165440.0,
            "11": 4119165440.0,
            "12": 4119165440.0,
            "13": 4119165440.0,
            "14": 4119165440.0,
            "15": 4119165440.0,
            "16": 4119165440.0,
            "17": 4119165440.0,
            "18": 4119165440.0,
            "19": 4119165440.0,
            "20": 4119165440.0,
            "21": 4119165440.0,
            "22": 4119165440.0,
            "23": 4119165440.0,
            "24": 4119165440.0,
            "25": 4119165440.0,
            "26": 4119165440.0,
            "27": 4119165440.0,
            "28": 4119165440.0,
            "29": 4119165440.0,
            "30": 4119165440.0,
            "31": 4119165440.0,
            "32": 4119165440.0,
            "33": 4119165440.0,
            "34": 4119165440.0,
            "35": 4119165440.0,
            "36": 4119165440.0,
            "37": 4119165440.0,
            "38": 4119165440.0,
            "39": 4119165440.0,
            "40": 4119165440.0,
            "41": 4119165440.0,
            "42": 4119165440.0,
            "43": 4119165440.0,
            "44": 4119165440.0,
            "45": 4119165440.0,
            "46": 4119165440.0,
            "47": 4119165440.0,
            "48": 4119165440.0,
            "49": 4119165440.0,
            "50": 4119165440.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 5.08022,
            "2": 0.18501,
            "3": 0.16189,
            "4": 0.1446,
            "5": 0.14506,
            "6": 0.1419,
            "7": 0.14224,
            "8": 0.14228,
            "9": 0.14173,
            "10": 0.14459,
            "11": 0.14301,
            "12": 0.14363,
            "13": 0.14381,
            "14": 0.143,
            "15": 0.14252,
            "16": 0.14227,
            "17": 0.14143,
            "18": 0.1425,
            "19": 0.14097,
            "20": 0.14109,
            "21": 0.1415,
            "22": 0.14165,
            "23": 0.142,
            "24": 0.14241,
            "25": 0.1412,
            "26": 0.14126,
            "27": 0.14207,
            "28": 0.14045,
            "29": 0.14206,
            "30": 0.14192,
            "31": 0.14255,
            "32": 0.14132,
            "33": 0.14178,
            "34": 0.14151,
            "35": 0.14117,
            "36": 0.14088,
            "37": 0.14137,
            "38": 0.14111,
            "39": 0.13997,
            "40": 0.14118,
            "41": 0.14179,
            "42": 0.14063,
            "43": 0.14381,
            "44": 0.14122,
            "45": 0.14142,
            "46": 0.14112,
            "47": 0.14094,
            "48": 0.14134,
            "49": 0.14094,
            "50": 0.14002
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgxa100_dracooci-ord.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.82005,
            "2": 10.81907,
            "3": 10.81396,
            "4": 10.78497,
            "5": 10.85284,
            "6": 10.87449,
            "7": 10.83201,
            "8": 10.83297,
            "9": 10.83935,
            "10": 10.78455,
            "11": 10.87798,
            "12": 10.86112,
            "13": 10.86444,
            "14": 10.87605,
            "15": 10.79229,
            "16": 10.79509,
            "17": 10.76768,
            "18": 10.81005,
            "19": 10.79719,
            "20": 10.69211,
            "21": 10.68164,
            "22": 10.52085,
            "23": 10.70893,
            "24": 10.57599,
            "25": 10.52412,
            "26": 10.59517,
            "27": 10.58426,
            "28": 10.56233,
            "29": 10.57013,
            "30": 10.34552,
            "31": 10.10049,
            "32": 10.45378,
            "33": 10.44627,
            "34": 10.20606,
            "35": 10.26239,
            "36": 10.21239,
            "37": 10.32522,
            "38": 10.16777,
            "39": 10.38334,
            "40": 10.07241,
            "41": 10.13863,
            "42": 10.19814,
            "43": 9.81073,
            "44": 9.93244,
            "45": 9.81101,
            "46": 9.80877,
            "47": 10.12608,
            "48": 9.82108,
            "49": 9.50625,
            "50": 9.88422
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1559.0,
            "2": 1591.0,
            "3": 1727.0,
            "4": 1835.0,
            "5": 1840.0,
            "6": 1719.0,
            "7": 1740.0,
            "8": 1591.0,
            "9": 1839.0,
            "10": 1380.0,
            "11": 1856.0,
            "12": 1693.0,
            "13": 1906.0,
            "14": 1757.0,
            "15": 1848.0,
            "16": 1791.0,
            "17": 1752.0,
            "18": 1669.0,
            "19": 1722.0,
            "20": 1601.0,
            "21": 1900.0,
            "22": 1662.0,
            "23": 2006.0,
            "24": 1597.0,
            "25": 1635.0,
            "26": 1709.0,
            "27": 1931.0,
            "28": 2043.0,
            "29": 1888.0,
            "30": 1936.0,
            "31": 1550.0,
            "32": 1913.0,
            "33": 2135.0,
            "34": 1703.0,
            "35": 1908.0,
            "36": 1953.0,
            "37": 2291.0,
            "38": 2210.0,
            "39": 2334.0,
            "40": 2100.0,
            "41": 2300.0,
            "42": 2236.0,
            "43": 1897.0,
            "44": 1993.0,
            "45": 2098.0,
            "46": 2298.0,
            "47": 2504.0,
            "48": 2356.0,
            "49": 2268.0,
            "50": 2333.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 730320896.0,
            "2": 730320896.0,
            "3": 730320896.0,
            "4": 730320896.0,
            "5": 730320896.0,
            "6": 730320896.0,
            "7": 730320896.0,
            "8": 730320896.0,
            "9": 730320896.0,
            "10": 730320896.0,
            "11": 730320896.0,
            "12": 730320896.0,
            "13": 730320896.0,
            "14": 730320896.0,
            "15": 730320896.0,
            "16": 730320896.0,
            "17": 730320896.0,
            "18": 730320896.0,
            "19": 730320896.0,
            "20": 730320896.0,
            "21": 730320896.0,
            "22": 730320896.0,
            "23": 730320896.0,
            "24": 730320896.0,
            "25": 730320896.0,
            "26": 730320896.0,
            "27": 730320896.0,
            "28": 730320896.0,
            "29": 730320896.0,
            "30": 730320896.0,
            "31": 730320896.0,
            "32": 730320896.0,
            "33": 730320896.0,
            "34": 730320896.0,
            "35": 730320896.0,
            "36": 730320896.0,
            "37": 730320896.0,
            "38": 730320896.0,
            "39": 730320896.0,
            "40": 730320896.0,
            "41": 730320896.0,
            "42": 730320896.0,
            "43": 730320896.0,
            "44": 730320896.0,
            "45": 730320896.0,
            "46": 730320896.0,
            "47": 730320896.0,
            "48": 730320896.0,
            "49": 730320896.0,
            "50": 730320896.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3837453312.0,
            "2": 4119164928.0,
            "3": 4119164928.0,
            "4": 4119164928.0,
            "5": 4119164928.0,
            "6": 4119164928.0,
            "7": 4119164928.0,
            "8": 4119164928.0,
            "9": 4119164928.0,
            "10": 4119164928.0,
            "11": 4119164928.0,
            "12": 4119164928.0,
            "13": 4119164928.0,
            "14": 4119164928.0,
            "15": 4119164928.0,
            "16": 4119164928.0,
            "17": 4119164928.0,
            "18": 4119164928.0,
            "19": 4119164928.0,
            "20": 4119164928.0,
            "21": 4119164928.0,
            "22": 4119164928.0,
            "23": 4119164928.0,
            "24": 4119164928.0,
            "25": 4119164928.0,
            "26": 4119164928.0,
            "27": 4119164928.0,
            "28": 4119164928.0,
            "29": 4119164928.0,
            "30": 4119164928.0,
            "31": 4119164928.0,
            "32": 4119164928.0,
            "33": 4119164928.0,
            "34": 4119164928.0,
            "35": 4119164928.0,
            "36": 4119164928.0,
            "37": 4119164928.0,
            "38": 4119164928.0,
            "39": 4119164928.0,
            "40": 4119164928.0,
            "41": 4119164928.0,
            "42": 4119164928.0,
            "43": 4119164928.0,
            "44": 4119164928.0,
            "45": 4119164928.0,
            "46": 4119164928.0,
            "47": 4119164928.0,
            "48": 4119164928.0,
            "49": 4119164928.0,
            "50": 4119164928.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 21.82644,
            "2": 0.19908,
            "3": 0.17208,
            "4": 0.17348,
            "5": 0.40692,
            "6": 0.17348,
            "7": 0.17221,
            "8": 0.17282,
            "9": 0.17343,
            "10": 0.17259,
            "11": 0.44574,
            "12": 0.17197,
            "13": 0.17235,
            "14": 0.17135,
            "15": 0.17217,
            "16": 0.17214,
            "17": 0.17346,
            "18": 0.17055,
            "19": 0.17076,
            "20": 0.17071,
            "21": 0.17349,
            "22": 0.17417,
            "23": 0.16998,
            "24": 0.17303,
            "25": 0.17019,
            "26": 0.16905,
            "27": 0.16967,
            "28": 0.17087,
            "29": 0.16779,
            "30": 0.16786,
            "31": 0.1689,
            "32": 0.16672,
            "33": 0.1672,
            "34": 0.16926,
            "35": 0.16914,
            "36": 0.16747,
            "37": 0.16765,
            "38": 0.16682,
            "39": 0.1667,
            "40": 0.16914,
            "41": 0.16662,
            "42": 0.16688,
            "43": 0.16639,
            "44": 0.16515,
            "45": 0.16517,
            "46": 0.16701,
            "47": 0.16705,
            "48": 0.16627,
            "49": 0.16652,
            "50": 0.16472
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_lts_dgxa100_dracooci.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.82005,
            "2": 10.81907,
            "3": 10.81396,
            "4": 10.78497,
            "5": 10.85284,
            "6": 10.87449,
            "7": 10.83201,
            "8": 10.83297,
            "9": 10.83935,
            "10": 10.78455,
            "11": 10.87798,
            "12": 10.86112,
            "13": 10.86444,
            "14": 10.87605,
            "15": 10.79229,
            "16": 10.79509,
            "17": 10.76768,
            "18": 10.81005,
            "19": 10.79719,
            "20": 10.69211,
            "21": 10.68164,
            "22": 10.52085,
            "23": 10.70893,
            "24": 10.57599,
            "25": 10.52412,
            "26": 10.59517,
            "27": 10.58426,
            "28": 10.56233,
            "29": 10.57013,
            "30": 10.34552,
            "31": 10.10049,
            "32": 10.45378,
            "33": 10.44627,
            "34": 10.20606,
            "35": 10.26239,
            "36": 10.21239,
            "37": 10.32522,
            "38": 10.16777,
            "39": 10.38334,
            "40": 10.07241,
            "41": 10.13863,
            "42": 10.19814,
            "43": 9.81073,
            "44": 9.93244,
            "45": 9.81101,
            "46": 9.80877,
            "47": 10.12608,
            "48": 9.82108,
            "49": 9.50625,
            "50": 9.88422
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1559.0,
            "2": 1591.0,
            "3": 1727.0,
            "4": 1835.0,
            "5": 1840.0,
            "6": 1719.0,
            "7": 1740.0,
            "8": 1591.0,
            "9": 1839.0,
            "10": 1380.0,
            "11": 1856.0,
            "12": 1693.0,
            "13": 1906.0,
            "14": 1757.0,
            "15": 1848.0,
            "16": 1791.0,
            "17": 1752.0,
            "18": 1669.0,
            "19": 1722.0,
            "20": 1601.0,
            "21": 1900.0,
            "22": 1662.0,
            "23": 2006.0,
            "24": 1597.0,
            "25": 1635.0,
            "26": 1709.0,
            "27": 1931.0,
            "28": 2043.0,
            "29": 1888.0,
            "30": 1936.0,
            "31": 1550.0,
            "32": 1913.0,
            "33": 2135.0,
            "34": 1703.0,
            "35": 1908.0,
            "36": 1953.0,
            "37": 2291.0,
            "38": 2210.0,
            "39": 2334.0,
            "40": 2100.0,
            "41": 2300.0,
            "42": 2236.0,
            "43": 1897.0,
            "44": 1993.0,
            "45": 2098.0,
            "46": 2298.0,
            "47": 2504.0,
            "48": 2356.0,
            "49": 2268.0,
            "50": 2333.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 730320896.0,
            "2": 730320896.0,
            "3": 730320896.0,
            "4": 730320896.0,
            "5": 730320896.0,
            "6": 730320896.0,
            "7": 730320896.0,
            "8": 730320896.0,
            "9": 730320896.0,
            "10": 730320896.0,
            "11": 730320896.0,
            "12": 730320896.0,
            "13": 730320896.0,
            "14": 730320896.0,
            "15": 730320896.0,
            "16": 730320896.0,
            "17": 730320896.0,
            "18": 730320896.0,
            "19": 730320896.0,
            "20": 730320896.0,
            "21": 730320896.0,
            "22": 730320896.0,
            "23": 730320896.0,
            "24": 730320896.0,
            "25": 730320896.0,
            "26": 730320896.0,
            "27": 730320896.0,
            "28": 730320896.0,
            "29": 730320896.0,
            "30": 730320896.0,
            "31": 730320896.0,
            "32": 730320896.0,
            "33": 730320896.0,
            "34": 730320896.0,
            "35": 730320896.0,
            "36": 730320896.0,
            "37": 730320896.0,
            "38": 730320896.0,
            "39": 730320896.0,
            "40": 730320896.0,
            "41": 730320896.0,
            "42": 730320896.0,
            "43": 730320896.0,
            "44": 730320896.0,
            "45": 730320896.0,
            "46": 730320896.0,
            "47": 730320896.0,
            "48": 730320896.0,
            "49": 730320896.0,
            "50": 730320896.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3837453312.0,
            "2": 4119164928.0,
            "3": 4119164928.0,
            "4": 4119164928.0,
            "5": 4119164928.0,
            "6": 4119164928.0,
            "7": 4119164928.0,
            "8": 4119164928.0,
            "9": 4119164928.0,
            "10": 4119164928.0,
            "11": 4119164928.0,
            "12": 4119164928.0,
            "13": 4119164928.0,
            "14": 4119164928.0,
            "15": 4119164928.0,
            "16": 4119164928.0,
            "17": 4119164928.0,
            "18": 4119164928.0,
            "19": 4119164928.0,
            "20": 4119164928.0,
            "21": 4119164928.0,
            "22": 4119164928.0,
            "23": 4119164928.0,
            "24": 4119164928.0,
            "25": 4119164928.0,
            "26": 4119164928.0,
            "27": 4119164928.0,
            "28": 4119164928.0,
            "29": 4119164928.0,
            "30": 4119164928.0,
            "31": 4119164928.0,
            "32": 4119164928.0,
            "33": 4119164928.0,
            "34": 4119164928.0,
            "35": 4119164928.0,
            "36": 4119164928.0,
            "37": 4119164928.0,
            "38": 4119164928.0,
            "39": 4119164928.0,
            "40": 4119164928.0,
            "41": 4119164928.0,
            "42": 4119164928.0,
            "43": 4119164928.0,
            "44": 4119164928.0,
            "45": 4119164928.0,
            "46": 4119164928.0,
            "47": 4119164928.0,
            "48": 4119164928.0,
            "49": 4119164928.0,
            "50": 4119164928.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 19.01426,
            "2": 0.19331,
            "3": 0.17686,
            "4": 0.17351,
            "5": 0.17409,
            "6": 0.39233,
            "7": 0.17062,
            "8": 0.17244,
            "9": 0.1721,
            "10": 0.1728,
            "11": 0.16853,
            "12": 0.16766,
            "13": 0.45674,
            "14": 0.17028,
            "15": 0.16973,
            "16": 0.16893,
            "17": 0.16884,
            "18": 0.17013,
            "19": 0.16961,
            "20": 0.17167,
            "21": 0.1673,
            "22": 0.16984,
            "23": 0.17183,
            "24": 0.17023,
            "25": 0.16914,
            "26": 0.16981,
            "27": 0.1674,
            "28": 0.16751,
            "29": 0.16693,
            "30": 0.16857,
            "31": 0.16737,
            "32": 0.16785,
            "33": 0.16718,
            "34": 0.16686,
            "35": 0.16592,
            "36": 0.16924,
            "37": 0.16753,
            "38": 0.16813,
            "39": 0.16663,
            "40": 0.22514,
            "41": 0.16853,
            "42": 0.17036,
            "43": 0.16917,
            "44": 0.167,
            "45": 0.16766,
            "46": 0.167,
            "47": 0.16654,
            "48": 0.16869,
            "49": 0.16681,
            "50": 0.16794
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --num-layers-per-virtual-pipeline-stage: 1
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-optim-fully-reshardable: true
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.81478, "5": 10.8517, "10": 10.78749, "15": 10.79505, "20": 10.69119, "25": 10.52294, "30": 10.34604, "35": 10.26165, "40": 10.072, "45": 9.80976, "50": 9.88336}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1549.0, "5": 1915.0, "10": 1391.0, "15": 1873.0, "20": 1698.0, "25": 1701.0, "30": 1980.0, "35": 1893.0, "40": 2037.0, "45": 1968.0, "50": 2391.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 731763200.0, "5": 731763200.0, "10": 731763200.0, "15": 731763200.0, "20": 731763200.0, "25": 731763200.0, "30": 731763200.0, "35": 731763200.0, "40": 731763200.0, "45": 731763200.0, "50": 731763200.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3838895616.0, "5": 4120607232.0, "10": 4120607232.0, "15": 4120607232.0, "20": 4120607232.0, "25": 4120607232.0, "30": 4120607232.0, "35": 4120607232.0, "40": 4120607232.0, "45": 4120607232.0, "50": 4120607232.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 6.90183, "5": 0.16201, "10": 0.16065, "15": 0.16057, "20": 0.16003, "25": 0.15943, "30": 0.16236, "35": 0.16281, "40": 0.16251, "45": 0.16312, "50": 0.16235}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.79229, "20": 10.69211, "25": 10.52412, "30": 10.34552, "35": 10.26239, "40": 10.07241, "45": 9.81101, "50": 9.88422}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1848.0, "20": 1601.0, "25": 1635.0, "30": 1936.0, "35": 1908.0, "40": 2100.0, "45": 2098.0, "50": 2333.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 733859840.0, "5": 733859840.0, "10": 733859840.0, "15": 733859840.0, "20": 733859840.0, "25": 733859840.0, "30": 733859840.0, "35": 733859840.0, "40": 733859840.0, "45": 733859840.0, "50": 733859840.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3838895104.0, "5": 4122703872.0, "10": 4122703872.0, "15": 4122703872.0, "20": 4122703872.0, "25": 4122703872.0, "30": 4122703872.0, "35": 4122703872.0, "40": 4122703872.0, "45": 4122703872.0, "50": 4122703872.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 24.4831, "5": 0.16017, "10": 0.16204, "15": 0.15957, "20": 0.16044, "25": 0.16155, "30": 0.16319, "35": 0.16208, "40": 0.16158, "45": 0.16289, "50": 0.16266}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_calculate_per_token_loss/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --num-layers-per-virtual-pipeline-stage: 1
  --calculate-per-token-loss: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.81478, "5": 10.85169, "10": 10.78745, "15": 10.79503, "20": 10.69101, "25": 10.52199, "30": 10.34557, "35": 10.25813, "40": 10.06995, "45": 9.80182, "50": 9.8759}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1549.0, "5": 1939.0, "10": 1348.0, "15": 1913.0, "20": 1684.0, "25": 1625.0, "30": 1929.0, "35": 1956.0, "40": 2108.0, "45": 2034.0, "50": 2458.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 731763200.0, "5": 731763200.0, "10": 731763200.0, "15": 731763200.0, "20": 731763200.0, "25": 731763200.0, "30": 731763200.0, "35": 731763200.0, "40": 731763200.0, "45": 731763200.0, "50": 731763200.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3838895616.0, "5": 4120607232.0, "10": 4120607232.0, "15": 4120607232.0, "20": 4120607232.0, "25": 4120607232.0, "30": 4120607232.0, "35": 4120607232.0, "40": 4120607232.0, "45": 4120607232.0, "50": 4120607232.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.92138, "5": 0.1642, "10": 0.16403, "15": 0.16127, "20": 0.16115, "25": 0.16151, "30": 0.16082, "35": 0.16141, "40": 0.1612, "45": 0.16203, "50": 0.16105}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85285, "10": 10.78449, "15": 10.79226, "20": 10.69196, "25": 10.52317, "30": 10.34507, "35": 10.25889, "40": 10.07027, "45": 9.80301, "50": 9.87673}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1559.0, "5": 1915.0, "10": 1361.0, "15": 1831.0, "20": 1695.0, "25": 1596.0, "30": 1821.0, "35": 1872.0, "40": 2121.0, "45": 2090.0, "50": 2395.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 733859840.0, "5": 733859840.0, "10": 733859840.0, "15": 733859840.0, "20": 733859840.0, "25": 733859840.0, "30": 733859840.0, "35": 733859840.0, "40": 733859840.0, "45": 733859840.0, "50": 733859840.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3838895104.0, "5": 4122703872.0, "10": 4122703872.0, "15": 4122703872.0, "20": 4122703872.0, "25": 4122703872.0, "30": 4122703872.0, "35": 4122703872.0, "40": 4122703872.0, "45": 4122703872.0, "50": 4122703872.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 16.37934, "5": 0.166, "10": 0.16217, "15": 0.1635, "20": 0.16167, "25": 0.15901, "30": 0.15975, "35": 0.15935, "40": 0.15876, "45": 0.16028, "50": 0.15898}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_decoupled_lr/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --num-layers-per-virtual-pipeline-stage: 1
  --decoupled-lr: 0.0002
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --ckpt-format: torch
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.81478, "5": 10.8517, "10": 10.78749, "15": 10.79509, "20": 10.69117, "25": 10.52295, "30": 10.34604, "35": 10.26165, "40": 10.07202, "45": 9.8098, "50": 9.88338}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1549.0, "5": 1915.0, "10": 1391.0, "15": 1847.0, "20": 1680.0, "25": 1704.0, "30": 1891.0, "35": 1937.0, "40": 2016.0, "45": 1985.0, "50": 2387.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 522845696.0, "5": 522845696.0, "10": 522845696.0, "15": 522845696.0, "20": 522845696.0, "25": 522845696.0, "30": 522845696.0, "35": 522845696.0, "40": 522845696.0, "45": 522845696.0, "50": 522845696.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3768845824.0, "5": 3912607232.0, "10": 3912607232.0, "15": 3912607232.0, "20": 3912607232.0, "25": 3912607232.0, "30": 3912607232.0, "35": 3912607232.0, "40": 3912607232.0, "45": 3912607232.0, "50": 3912607232.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 6.39253, "5": 0.16449, "10": 0.16506, "15": 0.16316, "20": 0.16505, "25": 0.16455, "30": 0.16427, "35": 0.16495, "40": 0.16379, "45": 0.1664, "50": 0.16636}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.7923, "20": 10.69211, "25": 10.52414, "30": 10.34555, "35": 10.2624, "40": 10.07237, "45": 9.81103, "50": 9.88417}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1850.0, "20": 1668.0, "25": 1607.0, "30": 1945.0, "35": 1860.0, "40": 2022.0, "45": 2042.0, "50": 2292.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 523003904.0, "5": 523003904.0, "10": 523003904.0, "15": 523003904.0, "20": 523003904.0, "25": 523003904.0, "30": 523003904.0, "35": 523003904.0, "40": 523003904.0, "45": 523003904.0, "50": 523003904.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3768872960.0, "5": 3912765440.0, "10": 3912765440.0, "15": 3912765440.0, "20": 3912765440.0, "25": 3912765440.0, "30": 3912765440.0, "35": 3912765440.0, "40": 3912765440.0, "45": 3912765440.0, "50": 3912765440.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 15.68312, "5": 0.16619, "10": 0.1665, "15": 0.16491, "20": 0.16632, "25": 0.1654, "30": 0.1643, "35": 0.16468, "40": 0.1661, "45": 0.16588, "50": 0.16736}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --num-layers-per-virtual-pipeline-stage: 1
  --use-distributed-optimizer: true
  --overlap-grad-reduce: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.81478, "5": 10.8517, "10": 10.78749, "15": 10.79509, "20": 10.69117, "25": 10.52295, "30": 10.34604, "35": 10.26165, "40": 10.07202, "45": 9.8098, "50": 9.88338}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1549.0, "5": 1915.0, "10": 1391.0, "15": 1847.0, "20": 1680.0, "25": 1704.0, "30": 1891.0, "35": 1937.0, "40": 2016.0, "45": 1985.0, "50": 2387.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 522845696.0, "5": 522845696.0, "10": 522845696.0, "15": 522845696.0, "20": 522845696.0, "25": 522845696.0, "30": 522845696.0, "35": 522845696.0, "40": 522845696.0, "45": 522845696.0, "50": 522845696.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3768845824.0, "5": 3912607232.0, "10": 3912607232.0, "15": 3912607232.0, "20": 3912607232.0, "25": 3912607232.0, "30": 3912607232.0, "35": 3912607232.0, "40": 3912607232.0, "45": 3912607232.0, "50": 3912607232.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 6.25021, "5": 0.17415, "10": 0.16779, "15": 0.16862, "20": 0.16864, "25": 0.1694, "30": 0.16833, "35": 0.16882, "40": 0.1687, "45": 0.17105, "50": 0.16725}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.7923, "20": 10.69211, "25": 10.52414, "30": 10.34555, "35": 10.2624, "40": 10.07237, "45": 9.81103, "50": 9.88417}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1850.0, "20": 1668.0, "25": 1607.0, "30": 1945.0, "35": 1860.0, "40": 2022.0, "45": 2042.0, "50": 2292.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 523003904.0, "5": 523003904.0, "10": 523003904.0, "15": 523003904.0, "20": 523003904.0, "25": 523003904.0, "30": 523003904.0, "35": 523003904.0, "40": 523003904.0, "45": 523003904.0, "50": 523003904.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3768872960.0, "5": 3912765440.0, "10": 3912765440.0, "15": 3912765440.0, "20": 3912765440.0, "25": 3912765440.0, "30": 3912765440.0, "35": 3912765440.0, "40": 3912765440.0, "45": 3912765440.0, "50": 3912765440.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 15.64962, "5": 0.17522, "10": 0.16399, "15": 0.16232, "20": 0.16408, "25": 0.16729, "30": 0.16476, "35": 0.16919, "40": 0.16164, "45": 0.16235, "50": 0.16104}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --num-layers-per-virtual-pipeline-stage: 1
  --use-distributed-optimizer: true
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  --check-weight-hash-across-dp-replicas-interval: 10
  --disable-gloo-process-groups: true
  --ckpt-fully-parallel-load: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-optim-fully-reshardable: true
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.81478, "5": 10.85168, "10": 10.78749, "15": 10.79514, "20": 10.69131, "25": 10.52315, "30": 10.34624, "35": 10.26186, "40": 10.07222, "45": 9.81001, "50": 9.88351}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 1544.0, "5": 1795.0, "10": 1340.0, "15": 1839.0, "20": 1649.0, "25": 1654.0, "30": 1905.0, "35": 1915.0, "40": 2069.0, "45": 2103.0, "50": 2321.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 522977792.0, "5": 522977792.0, "10": 522977792.0, "15": 522977792.0, "20": 522977792.0, "25": 522977792.0, "30": 522977792.0, "35": 522977792.0, "40": 522977792.0, "45": 522977792.0, "50": 522977792.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3768846848.0, "5": 3912739328.0, "10": 3912739328.0, "15": 3912739328.0, "20": 3912739328.0, "25": 3912739328.0, "30": 3912739328.0, "35": 3912739328.0, "40": 3912739328.0, "45": 3912739328.0, "50": 3912739328.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.08577, "5": 0.21022, "10": 0.21068, "15": 0.21012, "20": 0.21233, "25": 0.21049, "30": 0.21043, "35": 0.20774, "40": 0.20847, "45": 0.21664, "50": 0.20762}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.82558,
            "2": 10.83322,
            "3": 10.82737,
            "4": 10.79588,
            "5": 10.85705,
            "6": 10.8639,
            "7": 10.82692,
            "8": 10.82592,
            "9": 10.83704,
            "10": 10.7972,
            "11": 10.87853,
            "12": 10.85795,
            "13": 10.85374,
            "14": 10.8755,
            "15": 10.79182,
            "16": 10.80298,
            "17": 10.7745,
            "18": 10.80403,
            "19": 10.79365,
            "20": 10.69587,
            "21": 10.6855,
            "22": 10.5315,
            "23": 10.7066,
            "24": 10.57324,
            "25": 10.51545,
            "26": 10.59072,
            "27": 10.60738,
            "28": 10.57026,
            "29": 10.58903,
            "30": 10.34678,
            "31": 10.07736,
            "32": 10.46319,
            "33": 10.45702,
            "34": 10.19923,
            "35": 10.25594,
            "36": 10.2125,
            "37": 10.3469,
            "38": 10.1801,
            "39": 10.40797,
            "40": 10.07599,
            "41": 10.12931,
            "42": 10.21136,
            "43": 9.81697,
            "44": 9.94027,
            "45": 9.81697,
            "46": 9.80603,
            "47": 10.12473,
            "48": 9.84051,
            "49": 9.50971,
            "50": 9.88935
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1692.0,
            "2": 1562.0,
            "3": 1659.0,
            "4": 1661.0,
            "5": 1890.0,
            "6": 1885.0,
            "7": 1867.0,
            "8": 1651.0,
            "9": 1897.0,
            "10": 1425.0,
            "11": 1904.0,
            "12": 1768.0,
            "13": 1970.0,
            "14": 1771.0,
            "15": 1880.0,
            "16": 1857.0,
            "17": 1803.0,
            "18": 1721.0,
            "19": 1761.0,
            "20": 1752.0,
            "21": 1981.0,
            "22": 1699.0,
            "23": 2007.0,
            "24": 1696.0,
            "25": 1607.0,
            "26": 1733.0,
            "27": 1771.0,
            "28": 1882.0,
            "29": 1867.0,
            "30": 1994.0,
            "31": 1541.0,
            "32": 1906.0,
            "33": 2052.0,
            "34": 1850.0,
            "35": 1995.0,
            "36": 1956.0,
            "37": 2351.0,
            "38": 2181.0,
            "39": 2298.0,
            "40": 2103.0,
            "41": 2115.0,
            "42": 2326.0,
            "43": 1958.0,
            "44": 2145.0,
            "45": 2066.0,
            "46": 2223.0,
            "47": 2478.0,
            "48": 2352.0,
            "49": 2254.0,
            "50": 2356.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 551137792.0,
            "2": 551137792.0,
            "3": 551137792.0,
            "4": 551137792.0,
            "5": 551137792.0,
            "6": 551137792.0,
            "7": 551137792.0,
            "8": 551137792.0,
            "9": 551137792.0,
            "10": 551137792.0,
            "11": 551137792.0,
            "12": 551137792.0,
            "13": 551137792.0,
            "14": 551137792.0,
            "15": 551137792.0,
            "16": 551137792.0,
            "17": 551137792.0,
            "18": 551137792.0,
            "19": 551137792.0,
            "20": 551137792.0,
            "21": 551137792.0,
            "22": 551137792.0,
            "23": 551137792.0,
            "24": 551137792.0,
            "25": 551137792.0,
            "26": 551137792.0,
            "27": 551137792.0,
            "28": 551137792.0,
            "29": 551137792.0,
            "30": 551137792.0,
            "31": 551137792.0,
            "32": 551137792.0,
            "33": 551137792.0,
            "34": 551137792.0,
            "35": 551137792.0,
            "36": 551137792.0,
            "37": 551137792.0,
            "38": 551137792.0,
            "39": 551137792.0,
            "40": 551137792.0,
            "41": 551137792.0,
            "42": 551137792.0,
            "43": 551137792.0,
            "44": 551137792.0,
            "45": 551137792.0,
            "46": 551137792.0,
            "47": 551137792.0,
            "48": 551137792.0,
            "49": 551137792.0,
            "50": 551137792.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3798208000.0,
            "2": 3940900352.0,
            "3": 3940900352.0,
            "4": 3940900352.0,
            "5": 3940900352.0,
            "6": 3940900352.0,
            "7": 3940900352.0,
            "8": 3940900352.0,
            "9": 3940900352.0,
            "10": 3940900352.0,
            "11": 3940900352.0,
            "12": 3940900352.0,
            "13": 3940900352.0,
            "14": 3940900352.0,
            "15": 3940900352.0,
            "16": 3940900352.0,
            "17": 3940900352.0,
            "18": 3940900352.0,
            "19": 3940900352.0,
            "20": 3940900352.0,
            "21": 3940900352.0,
            "22": 3940900352.0,
            "23": 3940900352.0,
            "24": 3940900352.0,
            "25": 3940900352.0,
            "26": 3940900352.0,
            "27": 3940900352.0,
            "28": 3940900352.0,
            "29": 3940900352.0,
            "30": 3940900352.0,
            "31": 3940900352.0,
            "32": 3940900352.0,
            "33": 3940900352.0,
            "34": 3940900352.0,
            "35": 3940900352.0,
            "36": 3940900352.0,
            "37": 3940900352.0,
            "38": 3940900352.0,
            "39": 3940900352.0,
            "40": 3940900352.0,
            "41": 3940900352.0,
            "42": 3940900352.0,
            "43": 3940900352.0,
            "44": 3940900352.0,
            "45": 3940900352.0,
            "46": 3940900352.0,
            "47": 3940900352.0,
            "48": 3940900352.0,
            "49": 3940900352.0,
            "50": 3940900352.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 10.17412,
            "3": 0.14904,
            "4": 0.12759,
            "5": 0.1274,
            "6": 0.12621,
            "7": 0.12496,
            "8": 0.44001,
            "9": 0.12775,
            "10": 0.12927,
            "11": 0.23731,
            "12": 0.12987,
            "13": 0.12779,
            "14": 0.12929,
            "15": 0.12998,
            "16": 0.12594,
            "17": 0.12925,
            "18": 0.12899,
            "19": 0.1293,
            "20": 0.1284,
            "21": 0.22948,
            "22": 0.13102,
            "23": 0.12998,
            "24": 0.13382,
            "25": 0.13352,
            "26": 0.13255,
            "27": 0.13108,
            "28": 0.13178,
            "29": 0.13341,
            "30": 0.13286,
            "31": 0.24189,
            "32": 0.12944,
            "33": 0.1295,
            "34": 0.12994,
            "35": 0.12844,
            "36": 0.12999,
            "37": 0.13026,
            "38": 0.13104,
            "39": 0.13047,
            "40": 0.13073,
            "41": 0.23481,
            "42": 0.1338,
            "43": 0.1343,
            "44": 0.1326,
            "45": 0.13438,
            "46": 0.13215,
            "47": 0.13101,
            "48": 0.1292,
            "49": 0.13175,
            "50": 0.13038
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.84523,
            "2": 10.85412,
            "3": 10.85365,
            "4": 10.8387,
            "5": 10.87429,
            "6": 10.89335,
            "7": 10.8541,
            "8": 10.86234,
            "9": 10.86352,
            "10": 10.82861,
            "11": 10.88774,
            "12": 10.87154,
            "13": 10.8794,
            "14": 10.89126,
            "15": 10.81919,
            "16": 10.83062,
            "17": 10.79878,
            "18": 10.81768,
            "19": 10.81959,
            "20": 10.72749,
            "21": 10.70552,
            "22": 10.56397,
            "23": 10.72819,
            "24": 10.60839,
            "25": 10.55196,
            "26": 10.6087,
            "27": 10.62882,
            "28": 10.58272,
            "29": 10.59986,
            "30": 10.36507,
            "31": 10.12097,
            "32": 10.47625,
            "33": 10.46906,
            "34": 10.22325,
            "35": 10.27849,
            "36": 10.22879,
            "37": 10.35946,
            "38": 10.19333,
            "39": 10.41588,
            "40": 10.09771,
            "41": 10.15713,
            "42": 10.22445,
            "43": 9.83283,
            "44": 9.96941,
            "45": 9.84201,
            "46": 9.83021,
            "47": 10.15605,
            "48": 9.85509,
            "49": 9.54052,
            "50": 9.91255
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1716.0,
            "2": 1716.0,
            "3": 1713.0,
            "4": 1771.0,
            "5": 1870.0,
            "6": 1787.0,
            "7": 1797.0,
            "8": 1662.0,
            "9": 1857.0,
            "10": 1418.0,
            "11": 1931.0,
            "12": 1675.0,
            "13": 1893.0,
            "14": 1787.0,
            "15": 1973.0,
            "16": 1896.0,
            "17": 1851.0,
            "18": 1766.0,
            "19": 1792.0,
            "20": 1589.0,
            "21": 1815.0,
            "22": 1678.0,
            "23": 1939.0,
            "24": 1611.0,
            "25": 1600.0,
            "26": 1760.0,
            "27": 1842.0,
            "28": 1948.0,
            "29": 1973.0,
            "30": 2003.0,
            "31": 1557.0,
            "32": 1832.0,
            "33": 2126.0,
            "34": 1890.0,
            "35": 2066.0,
            "36": 1915.0,
            "37": 2356.0,
            "38": 2219.0,
            "39": 2394.0,
            "40": 2140.0,
            "41": 2207.0,
            "42": 2136.0,
            "43": 1991.0,
            "44": 2123.0,
            "45": 2102.0,
            "46": 2379.0,
            "47": 2497.0,
            "48": 2457.0,
            "49": 2239.0,
            "50": 2251.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 552193536.0,
            "2": 552193536.0,
            "3": 552193536.0,
            "4": 552193536.0,
            "5": 552193536.0,
            "6": 552193536.0,
            "7": 552193536.0,
            "8": 552193536.0,
            "9": 552193536.0,
            "10": 552193536.0,
            "11": 552193536.0,
            "12": 552193536.0,
            "13": 552193536.0,
            "14": 552193536.0,
            "15": 552193536.0,
            "16": 552193536.0,
            "17": 552193536.0,
            "18": 552193536.0,
            "19": 552193536.0,
            "20": 552193536.0,
            "21": 552193536.0,
            "22": 552193536.0,
            "23": 552193536.0,
            "24": 552193536.0,
            "25": 552193536.0,
            "26": 552193536.0,
            "27": 552193536.0,
            "28": 552193536.0,
            "29": 552193536.0,
            "30": 552193536.0,
            "31": 552193536.0,
            "32": 552193536.0,
            "33": 552193536.0,
            "34": 552193536.0,
            "35": 552193536.0,
            "36": 552193536.0,
            "37": 552193536.0,
            "38": 552193536.0,
            "39": 552193536.0,
            "40": 552193536.0,
            "41": 552193536.0,
            "42": 552193536.0,
            "43": 552193536.0,
            "44": 552193536.0,
            "45": 552193536.0,
            "46": 552193536.0,
            "47": 552193536.0,
            "48": 552193536.0,
            "49": 552193536.0,
            "50": 552193536.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3799125504.0,
            "2": 3941955072.0,
            "3": 3941955072.0,
            "4": 3941955072.0,
            "5": 3941955072.0,
            "6": 3941955072.0,
            "7": 3941955072.0,
            "8": 3941955072.0,
            "9": 3941955072.0,
            "10": 3941955072.0,
            "11": 3941955072.0,
            "12": 3941955072.0,
            "13": 3941955072.0,
            "14": 3941955072.0,
            "15": 3941955072.0,
            "16": 3941955072.0,
            "17": 3941955072.0,
            "18": 3941955072.0,
            "19": 3941955072.0,
            "20": 3941955072.0,
            "21": 3941955072.0,
            "22": 3941955072.0,
            "23": 3941955072.0,
            "24": 3941955072.0,
            "25": 3941955072.0,
            "26": 3941955072.0,
            "27": 3941955072.0,
            "28": 3941955072.0,
            "29": 3941955072.0,
            "30": 3941955072.0,
            "31": 3941955072.0,
            "32": 3941955072.0,
            "33": 3941955072.0,
            "34": 3941955072.0,
            "35": 3941955072.0,
            "36": 3941955072.0,
            "37": 3941955072.0,
            "38": 3941955072.0,
            "39": 3941955072.0,
            "40": 3941955072.0,
            "41": 3941955072.0,
            "42": 3941955072.0,
            "43": 3941955072.0,
            "44": 3941955072.0,
            "45": 3941955072.0,
            "46": 3941955072.0,
            "47": 3941955072.0,
            "48": 3941955072.0,
            "49": 3941955072.0,
            "50": 3941955072.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 8.82316,
            "3": 0.11598,
            "4": 0.09224,
            "5": 0.09137,
            "6": 0.09137,
            "7": 0.09193,
            "8": 0.09275,
            "9": 0.09168,
            "10": 0.09114,
            "11": 0.21733,
            "12": 0.09151,
            "13": 0.09081,
            "14": 0.09911,
            "15": 0.10239,
            "16": 0.09986,
            "17": 0.09258,
            "18": 0.09121,
            "19": 0.09129,
            "20": 0.09092,
            "21": 0.21644,
            "22": 0.09099,
            "23": 0.09097,
            "24": 0.09123,
            "25": 0.0916,
            "26": 0.09084,
            "27": 0.09187,
            "28": 0.09127,
            "29": 0.09167,
            "30": 0.09155,
            "31": 0.21721,
            "32": 0.09166,
            "33": 0.09144,
            "34": 0.0919,
            "35": 0.09076,
            "36": 0.09136,
            "37": 0.09169,
            "38": 0.09204,
            "39": 0.09238,
            "40": 0.09176,
            "41": 0.21641,
            "42": 0.09152,
            "43": 0.09202,
            "44": 0.09101,
            "45": 0.09119,
            "46": 0.09194,
            "47": 0.09123,
            "48": 0.09155,
            "49": 0.09109,
            "50": 0.09125
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgxh100_dgxc.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.84523,
            "2": 10.85412,
            "3": 10.85365,
            "4": 10.83867,
            "5": 10.87428,
            "6": 10.89334,
            "7": 10.8541,
            "8": 10.86235,
            "9": 10.86352,
            "10": 10.82859,
            "11": 10.88772,
            "12": 10.87148,
            "13": 10.87938,
            "14": 10.89123,
            "15": 10.81927,
            "16": 10.83063,
            "17": 10.79878,
            "18": 10.81771,
            "19": 10.81957,
            "20": 10.72749,
            "21": 10.70552,
            "22": 10.56396,
            "23": 10.72823,
            "24": 10.60839,
            "25": 10.55198,
            "26": 10.60868,
            "27": 10.62879,
            "28": 10.58271,
            "29": 10.59982,
            "30": 10.36511,
            "31": 10.12096,
            "32": 10.47628,
            "33": 10.46906,
            "34": 10.22326,
            "35": 10.27848,
            "36": 10.22883,
            "37": 10.35947,
            "38": 10.19331,
            "39": 10.41586,
            "40": 10.09773,
            "41": 10.15718,
            "42": 10.22441,
            "43": 9.83281,
            "44": 9.96935,
            "45": 9.84205,
            "46": 9.83017,
            "47": 10.15602,
            "48": 9.85503,
            "49": 9.54049,
            "50": 9.91258
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1725.0,
            "2": 1664.0,
            "3": 1710.0,
            "4": 1712.0,
            "5": 1834.0,
            "6": 1743.0,
            "7": 1803.0,
            "8": 1737.0,
            "9": 1779.0,
            "10": 1459.0,
            "11": 1898.0,
            "12": 1661.0,
            "13": 1860.0,
            "14": 1764.0,
            "15": 1886.0,
            "16": 1916.0,
            "17": 1773.0,
            "18": 1702.0,
            "19": 1742.0,
            "20": 1649.0,
            "21": 1899.0,
            "22": 1631.0,
            "23": 1960.0,
            "24": 1570.0,
            "25": 1647.0,
            "26": 1649.0,
            "27": 1811.0,
            "28": 1930.0,
            "29": 1910.0,
            "30": 1964.0,
            "31": 1536.0,
            "32": 1873.0,
            "33": 2191.0,
            "34": 1838.0,
            "35": 2017.0,
            "36": 1916.0,
            "37": 2345.0,
            "38": 2247.0,
            "39": 2374.0,
            "40": 2207.0,
            "41": 2246.0,
            "42": 2291.0,
            "43": 2027.0,
            "44": 2147.0,
            "45": 2164.0,
            "46": 2300.0,
            "47": 2418.0,
            "48": 2467.0,
            "49": 2255.0,
            "50": 2224.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 552328704.0,
            "2": 552328704.0,
            "3": 552328704.0,
            "4": 552328704.0,
            "5": 552328704.0,
            "6": 552328704.0,
            "7": 552328704.0,
            "8": 552328704.0,
            "9": 552328704.0,
            "10": 552328704.0,
            "11": 552328704.0,
            "12": 552328704.0,
            "13": 552328704.0,
            "14": 552328704.0,
            "15": 552328704.0,
            "16": 552328704.0,
            "17": 552328704.0,
            "18": 552328704.0,
            "19": 552328704.0,
            "20": 552328704.0,
            "21": 552328704.0,
            "22": 552328704.0,
            "23": 552328704.0,
            "24": 552328704.0,
            "25": 552328704.0,
            "26": 552328704.0,
            "27": 552328704.0,
            "28": 552328704.0,
            "29": 552328704.0,
            "30": 552328704.0,
            "31": 552328704.0,
            "32": 552328704.0,
            "33": 552328704.0,
            "34": 552328704.0,
            "35": 552328704.0,
            "36": 552328704.0,
            "37": 552328704.0,
            "38": 552328704.0,
            "39": 552328704.0,
            "40": 552328704.0,
            "41": 552328704.0,
            "42": 552328704.0,
            "43": 552328704.0,
            "44": 552328704.0,
            "45": 552328704.0,
            "46": 552328704.0,
            "47": 552328704.0,
            "48": 552328704.0,
            "49": 552328704.0,
            "50": 552328704.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3798208000.0,
            "2": 3943007744.0,
            "3": 3943007744.0,
            "4": 3943007744.0,
            "5": 3943007744.0,
            "6": 3943007744.0,
            "7": 3943007744.0,
            "8": 3943007744.0,
            "9": 3943007744.0,
            "10": 3943007744.0,
            "11": 3943007744.0,
            "12": 3943007744.0,
            "13": 3943007744.0,
            "14": 3943007744.0,
            "15": 3943007744.0,
            "16": 3943007744.0,
            "17": 3943007744.0,
            "18": 3943007744.0,
            "19": 3943007744.0,
            "20": 3943007744.0,
            "21": 3943007744.0,
            "22": 3943007744.0,
            "23": 3943007744.0,
            "24": 3943007744.0,
            "25": 3943007744.0,
            "26": 3943007744.0,
            "27": 3943007744.0,
            "28": 3943007744.0,
            "29": 3943007744.0,
            "30": 3943007744.0,
            "31": 3943007744.0,
            "32": 3943007744.0,
            "33": 3943007744.0,
            "34": 3943007744.0,
            "35": 3943007744.0,
            "36": 3943007744.0,
            "37": 3943007744.0,
            "38": 3943007744.0,
            "39": 3943007744.0,
            "40": 3943007744.0,
            "41": 3943007744.0,
            "42": 3943007744.0,
            "43": 3943007744.0,
            "44": 3943007744.0,
            "45": 3943007744.0,
            "46": 3943007744.0,
            "47": 3943007744.0,
            "48": 3943007744.0,
            "49": 3943007744.0,
            "50": 3943007744.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 13.33022,
            "2": 0.14078,
            "3": 0.13198,
            "4": 0.12852,
            "5": 0.13083,
            "6": 0.13237,
            "7": 0.13228,
            "8": 0.1313,
            "9": 0.12811,
            "10": 0.1288,
            "11": 0.33424,
            "12": 0.13269,
            "13": 0.12918,
            "14": 0.12679,
            "15": 0.12826,
            "16": 0.12904,
            "17": 0.12886,
            "18": 0.12955,
            "19": 0.1304,
            "20": 0.13345,
            "21": 0.33748,
            "22": 0.12668,
            "23": 0.13016,
            "24": 0.13048,
            "25": 0.13063,
            "26": 0.12607,
            "27": 0.12969,
            "28": 0.12911,
            "29": 0.12982,
            "30": 0.12875,
            "31": 0.33159,
            "32": 0.13001,
            "33": 0.12965,
            "34": 0.12637,
            "35": 0.12796,
            "36": 0.12613,
            "37": 0.13026,
            "38": 0.1296,
            "39": 0.12924,
            "40": 0.12739,
            "41": 0.33311,
            "42": 0.12916,
            "43": 0.12923,
            "44": 0.12827,
            "45": 0.12448,
            "46": 0.12337,
            "47": 0.12316,
            "48": 0.12962,
            "49": 0.12832,
            "50": 0.12865
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.81478,
            "2": 10.82042,
            "3": 10.81232,
            "4": 10.78901,
            "5": 10.85168,
            "6": 10.87098,
            "7": 10.83086,
            "8": 10.83472,
            "9": 10.83886,
            "10": 10.78749,
            "11": 10.87935,
            "12": 10.86017,
            "13": 10.86578,
            "14": 10.8781,
            "15": 10.79514,
            "16": 10.79576,
            "17": 10.76832,
            "18": 10.81092,
            "19": 10.79889,
            "20": 10.69131,
            "21": 10.68008,
            "22": 10.52146,
            "23": 10.70713,
            "24": 10.57677,
            "25": 10.52315,
            "26": 10.59564,
            "27": 10.5861,
            "28": 10.56176,
            "29": 10.56942,
            "30": 10.34624,
            "31": 10.10032,
            "32": 10.45433,
            "33": 10.44592,
            "34": 10.20725,
            "35": 10.26186,
            "36": 10.21223,
            "37": 10.32453,
            "38": 10.16801,
            "39": 10.38354,
            "40": 10.07222,
            "41": 10.13752,
            "42": 10.19756,
            "43": 9.81134,
            "44": 9.93285,
            "45": 9.81001,
            "46": 9.80858,
            "47": 10.12582,
            "48": 9.82129,
            "49": 9.50739,
            "50": 9.88351
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1544.0,
            "2": 1712.0,
            "3": 1660.0,
            "4": 1767.0,
            "5": 1795.0,
            "6": 1799.0,
            "7": 1805.0,
            "8": 1664.0,
            "9": 1769.0,
            "10": 1340.0,
            "11": 1830.0,
            "12": 1812.0,
            "13": 1837.0,
            "14": 1710.0,
            "15": 1839.0,
            "16": 1776.0,
            "17": 1750.0,
            "18": 1612.0,
            "19": 1764.0,
            "20": 1649.0,
            "21": 1854.0,
            "22": 1750.0,
            "23": 1909.0,
            "24": 1616.0,
            "25": 1654.0,
            "26": 1755.0,
            "27": 1860.0,
            "28": 2042.0,
            "29": 1953.0,
            "30": 1905.0,
            "31": 1684.0,
            "32": 1831.0,
            "33": 2101.0,
            "34": 1769.0,
            "35": 1915.0,
            "36": 1885.0,
            "37": 2324.0,
            "38": 2169.0,
            "39": 2300.0,
            "40": 2069.0,
            "41": 2353.0,
            "42": 2236.0,
            "43": 1978.0,
            "44": 2022.0,
            "45": 2103.0,
            "46": 2292.0,
            "47": 2413.0,
            "48": 2305.0,
            "49": 2218.0,
            "50": 2321.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 523371008.0,
            "2": 523371008.0,
            "3": 523371008.0,
            "4": 523371008.0,
            "5": 523371008.0,
            "6": 523371008.0,
            "7": 523371008.0,
            "8": 523371008.0,
            "9": 523371008.0,
            "10": 523371008.0,
            "11": 523371008.0,
            "12": 523371008.0,
            "13": 523371008.0,
            "14": 523371008.0,
            "15": 523371008.0,
            "16": 523371008.0,
            "17": 523371008.0,
            "18": 523371008.0,
            "19": 523371008.0,
            "20": 523371008.0,
            "21": 523371008.0,
            "22": 523371008.0,
            "23": 523371008.0,
            "24": 523371008.0,
            "25": 523371008.0,
            "26": 523371008.0,
            "27": 523371008.0,
            "28": 523371008.0,
            "29": 523371008.0,
            "30": 523371008.0,
            "31": 523371008.0,
            "32": 523371008.0,
            "33": 523371008.0,
            "34": 523371008.0,
            "35": 523371008.0,
            "36": 523371008.0,
            "37": 523371008.0,
            "38": 523371008.0,
            "39": 523371008.0,
            "40": 523371008.0,
            "41": 523371008.0,
            "42": 523371008.0,
            "43": 523371008.0,
            "44": 523371008.0,
            "45": 523371008.0,
            "46": 523371008.0,
            "47": 523371008.0,
            "48": 523371008.0,
            "49": 523371008.0,
            "50": 523371008.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3768846848.0,
            "2": 3913263616.0,
            "3": 3913263616.0,
            "4": 3913263616.0,
            "5": 3913263616.0,
            "6": 3913263616.0,
            "7": 3913263616.0,
            "8": 3913263616.0,
            "9": 3913263616.0,
            "10": 3913263616.0,
            "11": 3913263616.0,
            "12": 3913263616.0,
            "13": 3913263616.0,
            "14": 3913263616.0,
            "15": 3913263616.0,
            "16": 3913263616.0,
            "17": 3913263616.0,
            "18": 3913263616.0,
            "19": 3913263616.0,
            "20": 3913263616.0,
            "21": 3913263616.0,
            "22": 3913263616.0,
            "23": 3913263616.0,
            "24": 3913263616.0,
            "25": 3913263616.0,
            "26": 3913263616.0,
            "27": 3913263616.0,
            "28": 3913263616.0,
            "29": 3913263616.0,
            "30": 3913263616.0,
            "31": 3913263616.0,
            "32": 3913263616.0,
            "33": 3913263616.0,
            "34": 3913263616.0,
            "35": 3913263616.0,
            "36": 3913263616.0,
            "37": 3913263616.0,
            "38": 3913263616.0,
            "39": 3913263616.0,
            "40": 3913263616.0,
            "41": 3913263616.0,
            "42": 3913263616.0,
            "43": 3913263616.0,
            "44": 3913263616.0,
            "45": 3913263616.0,
            "46": 3913263616.0,
            "47": 3913263616.0,
            "48": 3913263616.0,
            "49": 3913263616.0,
            "50": 3913263616.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 11.01582,
            "2": 0.17662,
            "3": 0.15135,
            "4": 0.14999,
            "5": 0.14829,
            "6": 0.14621,
            "7": 0.14816,
            "8": 0.14731,
            "9": 0.14766,
            "10": 0.14515,
            "11": 0.30054,
            "12": 0.14534,
            "13": 0.14429,
            "14": 0.14592,
            "15": 0.14632,
            "16": 0.14618,
            "17": 0.14537,
            "18": 0.14666,
            "19": 0.14384,
            "20": 0.14453,
            "21": 0.30388,
            "22": 0.14466,
            "23": 0.14511,
            "24": 0.14435,
            "25": 0.14401,
            "26": 0.14328,
            "27": 0.14376,
            "28": 0.14434,
            "29": 0.14386,
            "30": 0.14418,
            "31": 0.30313,
            "32": 0.14394,
            "33": 0.14406,
            "34": 0.14377,
            "35": 0.14417,
            "36": 0.14415,
            "37": 0.14393,
            "38": 0.14577,
            "39": 0.14494,
            "40": 0.14489,
            "41": 0.30235,
            "42": 0.14494,
            "43": 0.1472,
            "44": 0.14577,
            "45": 0.14497,
            "46": 0.14619,
            "47": 0.14474,
            "48": 0.14551,
            "49": 0.14554,
            "50": 0.14507
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgx_a100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.81478,
            "2": 10.82042,
            "3": 10.81232,
            "4": 10.78901,
            "5": 10.85168,
            "6": 10.87098,
            "7": 10.83086,
            "8": 10.83472,
            "9": 10.83886,
            "10": 10.78749,
            "11": 10.87935,
            "12": 10.86017,
            "13": 10.86578,
            "14": 10.8781,
            "15": 10.79514,
            "16": 10.79576,
            "17": 10.76832,
            "18": 10.81092,
            "19": 10.79889,
            "20": 10.69131,
            "21": 10.68008,
            "22": 10.52146,
            "23": 10.70713,
            "24": 10.57677,
            "25": 10.52315,
            "26": 10.59564,
            "27": 10.5861,
            "28": 10.56176,
            "29": 10.56942,
            "30": 10.34624,
            "31": 10.10032,
            "32": 10.45433,
            "33": 10.44592,
            "34": 10.20725,
            "35": 10.26186,
            "36": 10.21223,
            "37": 10.32453,
            "38": 10.16801,
            "39": 10.38354,
            "40": 10.07222,
            "41": 10.13752,
            "42": 10.19756,
            "43": 9.81134,
            "44": 9.93285,
            "45": 9.81001,
            "46": 9.80858,
            "47": 10.12582,
            "48": 9.82129,
            "49": 9.50739,
            "50": 9.88351
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1544.0,
            "2": 1712.0,
            "3": 1660.0,
            "4": 1767.0,
            "5": 1795.0,
            "6": 1799.0,
            "7": 1805.0,
            "8": 1664.0,
            "9": 1769.0,
            "10": 1340.0,
            "11": 1830.0,
            "12": 1812.0,
            "13": 1837.0,
            "14": 1710.0,
            "15": 1839.0,
            "16": 1776.0,
            "17": 1750.0,
            "18": 1612.0,
            "19": 1764.0,
            "20": 1649.0,
            "21": 1854.0,
            "22": 1750.0,
            "23": 1909.0,
            "24": 1616.0,
            "25": 1654.0,
            "26": 1755.0,
            "27": 1860.0,
            "28": 2042.0,
            "29": 1953.0,
            "30": 1905.0,
            "31": 1684.0,
            "32": 1831.0,
            "33": 2101.0,
            "34": 1769.0,
            "35": 1915.0,
            "36": 1885.0,
            "37": 2324.0,
            "38": 2169.0,
            "39": 2300.0,
            "40": 2069.0,
            "41": 2353.0,
            "42": 2236.0,
            "43": 1978.0,
            "44": 2022.0,
            "45": 2103.0,
            "46": 2292.0,
            "47": 2413.0,
            "48": 2305.0,
            "49": 2218.0,
            "50": 2321.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 522966528.0,
            "2": 522966528.0,
            "3": 522966528.0,
            "4": 522966528.0,
            "5": 522966528.0,
            "6": 522966528.0,
            "7": 522966528.0,
            "8": 522966528.0,
            "9": 522966528.0,
            "10": 522966528.0,
            "11": 522966528.0,
            "12": 522966528.0,
            "13": 522966528.0,
            "14": 522966528.0,
            "15": 522966528.0,
            "16": 522966528.0,
            "17": 522966528.0,
            "18": 522966528.0,
            "19": 522966528.0,
            "20": 522966528.0,
            "21": 522966528.0,
            "22": 522966528.0,
            "23": 522966528.0,
            "24": 522966528.0,
            "25": 522966528.0,
            "26": 522966528.0,
            "27": 522966528.0,
            "28": 522966528.0,
            "29": 522966528.0,
            "30": 522966528.0,
            "31": 522966528.0,
            "32": 522966528.0,
            "33": 522966528.0,
            "34": 522966528.0,
            "35": 522966528.0,
            "36": 522966528.0,
            "37": 522966528.0,
            "38": 522966528.0,
            "39": 522966528.0,
            "40": 522966528.0,
            "41": 522966528.0,
            "42": 522966528.0,
            "43": 522966528.0,
            "44": 522966528.0,
            "45": 522966528.0,
            "46": 522966528.0,
            "47": 522966528.0,
            "48": 522966528.0,
            "49": 522966528.0,
            "50": 522966528.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3768846848.0,
            "2": 3913646592.0,
            "3": 3913646592.0,
            "4": 3913646592.0,
            "5": 3913646592.0,
            "6": 3913646592.0,
            "7": 3913646592.0,
            "8": 3913646592.0,
            "9": 3913646592.0,
            "10": 3913646592.0,
            "11": 3913646592.0,
            "12": 3913646592.0,
            "13": 3913646592.0,
            "14": 3913646592.0,
            "15": 3913646592.0,
            "16": 3913646592.0,
            "17": 3913646592.0,
            "18": 3913646592.0,
            "19": 3913646592.0,
            "20": 3913646592.0,
            "21": 3913646592.0,
            "22": 3913646592.0,
            "23": 3913646592.0,
            "24": 3913646592.0,
            "25": 3913646592.0,
            "26": 3913646592.0,
            "27": 3913646592.0,
            "28": 3913646592.0,
            "29": 3913646592.0,
            "30": 3913646592.0,
            "31": 3913646592.0,
            "32": 3913646592.0,
            "33": 3913646592.0,
            "34": 3913646592.0,
            "35": 3913646592.0,
            "36": 3913646592.0,
            "37": 3913646592.0,
            "38": 3913646592.0,
            "39": 3913646592.0,
            "40": 3913646592.0,
            "41": 3913646592.0,
            "42": 3913646592.0,
            "43": 3913646592.0,
            "44": 3913646592.0,
            "45": 3913646592.0,
            "46": 3913646592.0,
            "47": 3913646592.0,
            "48": 3913646592.0,
            "49": 3913646592.0,
            "50": 3913646592.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 7.26942,
            "2": 0.17361,
            "3": 0.16661,
            "4": 0.15374,
            "5": 0.1539,
            "6": 0.15237,
            "7": 0.15491,
            "8": 0.16016,
            "9": 0.1524,
            "10": 0.14907,
            "11": 0.28249,
            "12": 0.14867,
            "13": 0.14835,
            "14": 0.14748,
            "15": 0.14906,
            "16": 0.14768,
            "17": 0.15182,
            "18": 0.14947,
            "19": 0.15009,
            "20": 0.14968,
            "21": 0.28262,
            "22": 0.14991,
            "23": 0.14955,
            "24": 0.14949,
            "25": 0.14929,
            "26": 0.14942,
            "27": 0.14898,
            "28": 0.15187,
            "29": 0.14918,
            "30": 0.14827,
            "31": 0.2861,
            "32": 0.14873,
            "33": 0.14777,
            "34": 0.14736,
            "35": 0.14865,
            "36": 0.14795,
            "37": 0.148,
            "38": 0.14799,
            "39": 0.14777,
            "40": 0.14776,
            "41": 0.28572,
            "42": 0.14812,
            "43": 0.14967,
            "44": 0.14785,
            "45": 0.14785,
            "46": 0.14867,
            "47": 0.14775,
            "48": 0.14841,
            "49": 0.14786,
            "50": 0.14872
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgxa100_dracooci-ord.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.82005,
            "2": 10.81907,
            "3": 10.81396,
            "4": 10.78497,
            "5": 10.85284,
            "6": 10.87449,
            "7": 10.83201,
            "8": 10.83297,
            "9": 10.83935,
            "10": 10.78455,
            "11": 10.87798,
            "12": 10.86112,
            "13": 10.86444,
            "14": 10.87605,
            "15": 10.7923,
            "16": 10.7951,
            "17": 10.76773,
            "18": 10.81002,
            "19": 10.79715,
            "20": 10.69213,
            "21": 10.68165,
            "22": 10.52083,
            "23": 10.70895,
            "24": 10.57597,
            "25": 10.5241,
            "26": 10.59512,
            "27": 10.58424,
            "28": 10.56231,
            "29": 10.57009,
            "30": 10.34556,
            "31": 10.10048,
            "32": 10.45377,
            "33": 10.44632,
            "34": 10.20606,
            "35": 10.26241,
            "36": 10.21241,
            "37": 10.32522,
            "38": 10.16779,
            "39": 10.38327,
            "40": 10.07237,
            "41": 10.13863,
            "42": 10.19814,
            "43": 9.81079,
            "44": 9.93246,
            "45": 9.811,
            "46": 9.8088,
            "47": 10.12607,
            "48": 9.82111,
            "49": 9.50627,
            "50": 9.88419
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1559.0,
            "2": 1591.0,
            "3": 1727.0,
            "4": 1835.0,
            "5": 1840.0,
            "6": 1719.0,
            "7": 1740.0,
            "8": 1591.0,
            "9": 1839.0,
            "10": 1380.0,
            "11": 1856.0,
            "12": 1693.0,
            "13": 1906.0,
            "14": 1757.0,
            "15": 1850.0,
            "16": 1754.0,
            "17": 1768.0,
            "18": 1671.0,
            "19": 1715.0,
            "20": 1699.0,
            "21": 1891.0,
            "22": 1794.0,
            "23": 1970.0,
            "24": 1751.0,
            "25": 1614.0,
            "26": 1805.0,
            "27": 1821.0,
            "28": 2042.0,
            "29": 2014.0,
            "30": 1905.0,
            "31": 1658.0,
            "32": 1848.0,
            "33": 2113.0,
            "34": 1678.0,
            "35": 1933.0,
            "36": 1922.0,
            "37": 2309.0,
            "38": 2120.0,
            "39": 2469.0,
            "40": 2169.0,
            "41": 2241.0,
            "42": 2276.0,
            "43": 1937.0,
            "44": 2090.0,
            "45": 2101.0,
            "46": 2282.0,
            "47": 2493.0,
            "48": 2309.0,
            "49": 2250.0,
            "50": 2421.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 522346496.0,
            "2": 522346496.0,
            "3": 522346496.0,
            "4": 522346496.0,
            "5": 522346496.0,
            "6": 522346496.0,
            "7": 522346496.0,
            "8": 522346496.0,
            "9": 522346496.0,
            "10": 522346496.0,
            "11": 522346496.0,
            "12": 522346496.0,
            "13": 522346496.0,
            "14": 522346496.0,
            "15": 522346496.0,
            "16": 522346496.0,
            "17": 522346496.0,
            "18": 522346496.0,
            "19": 522346496.0,
            "20": 522346496.0,
            "21": 522346496.0,
            "22": 522346496.0,
            "23": 522346496.0,
            "24": 522346496.0,
            "25": 522346496.0,
            "26": 522346496.0,
            "27": 522346496.0,
            "28": 522346496.0,
            "29": 522346496.0,
            "30": 522346496.0,
            "31": 522346496.0,
            "32": 522346496.0,
            "33": 522346496.0,
            "34": 522346496.0,
            "35": 522346496.0,
            "36": 522346496.0,
            "37": 522346496.0,
            "38": 522346496.0,
            "39": 522346496.0,
            "40": 522346496.0,
            "41": 522346496.0,
            "42": 522346496.0,
            "43": 522346496.0,
            "44": 522346496.0,
            "45": 522346496.0,
            "46": 522346496.0,
            "47": 522346496.0,
            "48": 522346496.0,
            "49": 522346496.0,
            "50": 522346496.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3769791488.0,
            "2": 3912108032.0,
            "3": 3912108032.0,
            "4": 3912108032.0,
            "5": 3912108032.0,
            "6": 3912108032.0,
            "7": 3912108032.0,
            "8": 3912108032.0,
            "9": 3912108032.0,
            "10": 3912108032.0,
            "11": 3912108032.0,
            "12": 3912108032.0,
            "13": 3912108032.0,
            "14": 3912108032.0,
            "15": 3912108032.0,
            "16": 3912108032.0,
            "17": 3912108032.0,
            "18": 3912108032.0,
            "19": 3912108032.0,
            "20": 3912108032.0,
            "21": 3912108032.0,
            "22": 3912108032.0,
            "23": 3912108032.0,
            "24": 3912108032.0,
            "25": 3912108032.0,
            "26": 3912108032.0,
            "27": 3912108032.0,
            "28": 3912108032.0,
            "29": 3912108032.0,
            "30": 3912108032.0,
            "31": 3912108032.0,
            "32": 3912108032.0,
            "33": 3912108032.0,
            "34": 3912108032.0,
            "35": 3912108032.0,
            "36": 3912108032.0,
            "37": 3912108032.0,
            "38": 3912108032.0,
            "39": 3912108032.0,
            "40": 3912108032.0,
            "41": 3912108032.0,
            "42": 3912108032.0,
            "43": 3912108032.0,
            "44": 3912108032.0,
            "45": 3912108032.0,
            "46": 3912108032.0,
            "47": 3912108032.0,
            "48": 3912108032.0,
            "49": 3912108032.0,
            "50": 3912108032.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 22.86952,
            "2": 0.20661,
            "3": 0.18026,
            "4": 0.17656,
            "5": 0.17996,
            "6": 0.17701,
            "7": 0.17871,
            "8": 0.17528,
            "9": 0.17563,
            "10": 0.17569,
            "11": 0.74111,
            "12": 0.17396,
            "13": 0.17377,
            "14": 0.1738,
            "15": 0.17271,
            "16": 0.17324,
            "17": 0.17404,
            "18": 0.17229,
            "19": 0.17205,
            "20": 0.17274,
            "21": 0.30088,
            "22": 0.17329,
            "23": 0.17535,
            "24": 0.17212,
            "25": 0.17389,
            "26": 0.19974,
            "27": 0.19407,
            "28": 0.17531,
            "29": 0.17514,
            "30": 0.17299,
            "31": 0.30323,
            "32": 0.17369,
            "33": 0.17341,
            "34": 0.1737,
            "35": 0.17388,
            "36": 0.17546,
            "37": 0.17373,
            "38": 0.17505,
            "39": 0.17758,
            "40": 0.17506,
            "41": 0.3082,
            "42": 0.17306,
            "43": 0.17922,
            "44": 0.17678,
            "45": 0.17538,
            "46": 0.17386,
            "47": 0.17387,
            "48": 0.17425,
            "49": 0.1761,
            "50": 0.17415
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_lts_dgxa100_dracooci.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 10.82005,
            "2": 10.81907,
            "3": 10.81396,
            "4": 10.78497,
            "5": 10.85284,
            "6": 10.87449,
            "7": 10.83201,
            "8": 10.83297,
            "9": 10.83935,
            "10": 10.78455,
            "11": 10.87798,
            "12": 10.86112,
            "13": 10.86444,
            "14": 10.87605,
            "15": 10.7923,
            "16": 10.7951,
            "17": 10.76773,
            "18": 10.81002,
            "19": 10.79715,
            "20": 10.69213,
            "21": 10.68165,
            "22": 10.52083,
            "23": 10.70895,
            "24": 10.57597,
            "25": 10.5241,
            "26": 10.59512,
            "27": 10.58424,
            "28": 10.56231,
            "29": 10.57009,
            "30": 10.34556,
            "31": 10.10048,
            "32": 10.45377,
            "33": 10.44632,
            "34": 10.20606,
            "35": 10.26241,
            "36": 10.21241,
            "37": 10.32522,
            "38": 10.16779,
            "39": 10.38327,
            "40": 10.07237,
            "41": 10.13863,
            "42": 10.19814,
            "43": 9.81079,
            "44": 9.93246,
            "45": 9.811,
            "46": 9.8088,
            "47": 10.12607,
            "48": 9.82111,
            "49": 9.50627,
            "50": 9.88419
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 1559.0,
            "2": 1591.0,
            "3": 1727.0,
            "4": 1835.0,
            "5": 1840.0,
            "6": 1719.0,
            "7": 1740.0,
            "8": 1591.0,
            "9": 1839.0,
            "10": 1380.0,
            "11": 1856.0,
            "12": 1693.0,
            "13": 1906.0,
            "14": 1757.0,
            "15": 1850.0,
            "16": 1754.0,
            "17": 1768.0,
            "18": 1671.0,
            "19": 1715.0,
            "20": 1699.0,
            "21": 1891.0,
            "22": 1794.0,
            "23": 1970.0,
            "24": 1751.0,
            "25": 1614.0,
            "26": 1805.0,
            "27": 1821.0,
            "28": 2042.0,
            "29": 2014.0,
            "30": 1905.0,
            "31": 1658.0,
            "32": 1848.0,
            "33": 2113.0,
            "34": 1678.0,
            "35": 1933.0,
            "36": 1922.0,
            "37": 2309.0,
            "38": 2120.0,
            "39": 2469.0,
            "40": 2169.0,
            "41": 2241.0,
            "42": 2276.0,
            "43": 1937.0,
            "44": 2090.0,
            "45": 2101.0,
            "46": 2282.0,
            "47": 2493.0,
            "48": 2309.0,
            "49": 2250.0,
            "50": 2421.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 522346496.0,
            "2": 522346496.0,
            "3": 522346496.0,
            "4": 522346496.0,
            "5": 522346496.0,
            "6": 522346496.0,
            "7": 522346496.0,
            "8": 522346496.0,
            "9": 522346496.0,
            "10": 522346496.0,
            "11": 522346496.0,
            "12": 522346496.0,
            "13": 522346496.0,
            "14": 522346496.0,
            "15": 522346496.0,
            "16": 522346496.0,
            "17": 522346496.0,
            "18": 522346496.0,
            "19": 522346496.0,
            "20": 522346496.0,
            "21": 522346496.0,
            "22": 522346496.0,
            "23": 522346496.0,
            "24": 522346496.0,
            "25": 522346496.0,
            "26": 522346496.0,
            "27": 522346496.0,
            "28": 522346496.0,
            "29": 522346496.0,
            "30": 522346496.0,
            "31": 522346496.0,
            "32": 522346496.0,
            "33": 522346496.0,
            "34": 522346496.0,
            "35": 522346496.0,
            "36": 522346496.0,
            "37": 522346496.0,
            "38": 522346496.0,
            "39": 522346496.0,
            "40": 522346496.0,
            "41": 522346496.0,
            "42": 522346496.0,
            "43": 522346496.0,
            "44": 522346496.0,
            "45": 522346496.0,
            "46": 522346496.0,
            "47": 522346496.0,
            "48": 522346496.0,
            "49": 522346496.0,
            "50": 522346496.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 3769791488.0,
            "2": 3912108032.0,
            "3": 3912108032.0,
            "4": 3912108032.0,
            "5": 3912108032.0,
            "6": 3912108032.0,
            "7": 3912108032.0,
            "8": 3912108032.0,
            "9": 3912108032.0,
            "10": 3912108032.0,
            "11": 3912108032.0,
            "12": 3912108032.0,
            "13": 3912108032.0,
            "14": 3912108032.0,
            "15": 3912108032.0,
            "16": 3912108032.0,
            "17": 3912108032.0,
            "18": 3912108032.0,
            "19": 3912108032.0,
            "20": 3912108032.0,
            "21": 3912108032.0,
            "22": 3912108032.0,
            "23": 3912108032.0,
            "24": 3912108032.0,
            "25": 3912108032.0,
            "26": 3912108032.0,
            "27": 3912108032.0,
            "28": 3912108032.0,
            "29": 3912108032.0,
            "30": 3912108032.0,
            "31": 3912108032.0,
            "32": 3912108032.0,
            "33": 3912108032.0,
            "34": 3912108032.0,
            "35": 3912108032.0,
            "36": 3912108032.0,
            "37": 3912108032.0,
            "38": 3912108032.0,
            "39": 3912108032.0,
            "40": 3912108032.0,
            "41": 3912108032.0,
            "42": 3912108032.0,
            "43": 3912108032.0,
            "44": 3912108032.0,
            "45": 3912108032.0,
            "46": 3912108032.0,
            "47": 3912108032.0,
            "48": 3912108032.0,
            "49": 3912108032.0,
            "50": 3912108032.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 50,
        "step_interval": 1,
        "values": {
            "1": 26.03973,
            "2": 0.20991,
            "3": 0.18001,
            "4": 0.17535,
            "5": 0.37487,
            "6": 0.17569,
            "7": 0.17538,
            "8": 0.17644,
            "9": 0.17601,
            "10": 0.17454,
            "11": 0.32086,
            "12": 0.17452,
            "13": 0.17725,
            "14": 0.17806,
            "15": 0.17968,
            "16": 0.17731,
            "17": 0.18214,
            "18": 0.17979,
            "19": 0.18197,
            "20": 0.18282,
            "21": 0.31872,
            "22": 0.17621,
            "23": 0.18154,
            "24": 0.17536,
            "25": 0.17248,
            "26": 0.3922,
            "27": 0.17401,
            "28": 0.17258,
            "29": 0.17486,
            "30": 0.17468,
            "31": 0.31294,
            "32": 0.17218,
            "33": 0.17311,
            "34": 0.17553,
            "35": 0.17239,
            "36": 0.17742,
            "37": 0.17354,
            "38": 0.17694,
            "39": 0.17551,
            "40": 0.38673,
            "41": 0.31702,
            "42": 0.17359,
            "43": 0.17781,
            "44": 0.17499,
            "45": 0.17326,
            "46": 0.17496,
            "47": 0.17486,
            "48": 0.17727,
            "49": 0.17954,
            "50": 0.17661
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --num-layers-per-virtual-pipeline-stage: 1
  --use-distributed-optimizer: true
  --overlap-grad-reduce: true
  --overlap-param-gather: true
  --overlap-param-gather-with-optimizer-step: true
  --check-weight-hash-across-dp-replicas-interval: 10
  --ckpt-fully-parallel-load: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.93716, "5": 10.93089, "10": 10.91334, "15": 10.86325, "20": 10.77318, "25": 10.60315, "30": 10.40411, "35": 10.3138, "40": 10.12329, "45": 9.87486, "50": 9.94506}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 22727702.0, "5": 22715428.0, "10": 22918152.0, "15": 22820670.0, "20": 22693568.0, "25": 22818988.0, "30": 22631572.0, "35": 22789012.0, "40": 22657512.0, "45": 22675112.0, "50": 22905278.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 522870272.0, "5": 522870272.0, "10": 522870272.0, "15": 522870272.0, "20": 522870272.0, "25": 522870272.0, "30": 522870272.0, "35": 522870272.0, "40": 522870272.0, "45": 522870272.0, "50": 522870272.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3769915904.0, "5": 3913695744.0, "10": 3913695744.0, "15": 3913695744.0, "20": 3913695744.0, "25": 3913695744.0, "30": 3913695744.0, "35": 3913695744.0, "40": 3913695744.0, "45": 3913695744.0, "50": 3913695744.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 7.24858, "5": 0.16342, "10": 0.16417, "15": 0.16301, "20": 0.16387, "25": 0.16335, "30": 0.1683, "35": 0.16391, "40": 0.16288, "45": 0.16369, "50": 0.16166}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 10.9359, "5": 10.93225, "10": 10.91081, "15": 10.85723, "20": 10.77091, "25": 10.60558, "30": 10.40544, "35": 10.31364, "40": 10.12333, "45": 9.8756, "50": 9.94451}}, "num-zeros": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 22727686.0, "5": 22715312.0, "10": 22919004.0, "15": 22821282.0, "20": 22693812.0, "25": 22819580.0, "30": 22631132.0, "35": 22787906.0, "40": 22658304.0, "45": 22674764.0, "50": 22904438.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 523016192.0, "5": 523016192.0, "10": 523016192.0, "15": 523016192.0, "20": 523016192.0, "25": 523016192.0, "30": 523016192.0, "35": 523016192.0, "40": 523016192.0, "45": 523016192.0, "50": 523016192.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 3769943040.0, "5": 3914774528.0, "10": 3914774528.0, "15": 3914774528.0, "20": 3914774528.0, "25": 3914774528.0, "30": 3914774528.0, "35": 3914774528.0, "40": 3914774528.0, "45": 3914774528.0, "50": 3914774528.0}}, "iteration-time": {"start_step": 1, "end_step": 50, "step_interval": 5, "values": {"1": 20.57472, "5": 0.16226, "10": 0.16062, "15": 0.16146, "20": 0.16029, "25": 0.16236, "30": 0.15846, "35": 0.15883, "40": 0.15927, "45": 0.15925, "50": 0.15874}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 50
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 10000
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --num-layers-per-virtual-pipeline-stage: 1
  --use-distributed-optimizer: true
  --overlap-grad-reduce: true
  --untie-embeddings-and-output-weights: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --attention-backend: unfused
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.81548, "5": 10.85169, "10": 10.78684, "15": 10.79463, "20": 10.69045, "25": 10.52275, "30": 10.34527, "35": 10.25844, "40": 10.07025, "45": 9.8024, "50": 9.87631, "55": 9.85515, "60": 9.4664, "65": 8.9178, "70": 9.69242, "75": 9.37793, "80": 9.3667, "85": 9.57618, "90": 9.77295, "95": 9.46938, "100": 9.34557}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1518.0, "5": 1897.0, "10": 1358.0, "15": 1883.0, "20": 1663.0, "25": 1658.0, "30": 1821.0, "35": 1834.0, "40": 2126.0, "45": 2001.0, "50": 2342.0, "55": 2264.0, "60": 2399.0, "65": 2597.0, "70": 3202.0, "75": 2609.0, "80": 3258.0, "85": 3368.0, "90": 2954.0, "95": 3225.0, "100": 3428.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 733860352.0, "5": 733860352.0, "10": 733860352.0, "15": 733860352.0, "20": 733860352.0, "25": 733860352.0, "30": 733860352.0, "35": 733860352.0, "40": 733860352.0, "45": 733860352.0, "50": 733860352.0, "55": 733860352.0, "60": 733860352.0, "65": 733860352.0, "70": 733860352.0, "75": 733860352.0, "80": 733860352.0, "85": 733860352.0, "90": 733860352.0, "95": 733860352.0, "100": 733860352.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2368927744.0, "5": 2651687936.0, "10": 2651687936.0, "15": 2651687936.0, "20": 2651687936.0, "25": 2651687936.0, "30": 2651687936.0, "35": 2651687936.0, "40": 2651687936.0, "45": 2651687936.0, "50": 2651687936.0, "55": 2651687936.0, "60": 2651687936.0, "65": 2651687936.0, "70": 2651687936.0, "75": 2651687936.0, "80": 2651687936.0, "85": 2651687936.0, "90": 2651687936.0, "95": 2651687936.0, "100": 2651687936.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 8.22153, "5": 0.18949, "10": 0.19059, "15": 0.1919, "20": 0.19458, "25": 0.19448, "30": 0.19727, "35": 0.19298, "40": 0.19378, "45": 0.19233, "50": 0.19133, "55": 0.19285, "60": 0.19531, "65": 0.19211, "70": 0.18897, "75": 0.19058, "80": 0.18926, "85": 0.19119, "90": 0.18846, "95": 0.19006, "100": 0.19055}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.82555,
            "2": 10.83286,
            "3": 10.82762,
            "4": 10.7957,
            "5": 10.85697,
            "6": 10.86388,
            "7": 10.82617,
            "8": 10.82543,
            "9": 10.83586,
            "10": 10.7963,
            "11": 10.87822,
            "12": 10.85823,
            "13": 10.85418,
            "14": 10.87517,
            "15": 10.79204,
            "16": 10.80305,
            "17": 10.77433,
            "18": 10.80462,
            "19": 10.79337,
            "20": 10.69556,
            "21": 10.68641,
            "22": 10.53147,
            "23": 10.70631,
            "24": 10.57272,
            "25": 10.51439,
            "26": 10.58989,
            "27": 10.60708,
            "28": 10.57003,
            "29": 10.5893,
            "30": 10.34669,
            "31": 10.07712,
            "32": 10.46192,
            "33": 10.4548,
            "34": 10.1966,
            "35": 10.2529,
            "36": 10.20971,
            "37": 10.34489,
            "38": 10.1779,
            "39": 10.40615,
            "40": 10.07413,
            "41": 10.12733,
            "42": 10.2082,
            "43": 9.81191,
            "44": 9.93355,
            "45": 9.80953,
            "46": 9.79775,
            "47": 10.11572,
            "48": 9.83237,
            "49": 9.50279,
            "50": 9.8818,
            "51": 9.8346,
            "52": 9.71755,
            "53": 10.05121,
            "54": 9.94375,
            "55": 9.87452,
            "56": 9.60291,
            "57": 9.45086,
            "58": 9.81098,
            "59": 9.56395,
            "60": 9.47154,
            "61": 9.66555,
            "62": 9.96351,
            "63": 9.34708,
            "64": 9.74296,
            "65": 8.92132,
            "66": 9.67854,
            "67": 9.3522,
            "68": 9.76559,
            "69": 9.77742,
            "70": 9.70406,
            "71": 9.601,
            "72": 9.54984,
            "73": 9.46046,
            "74": 8.89067,
            "75": 9.38738,
            "76": 9.04468,
            "77": 10.03651,
            "78": 9.69957,
            "79": 9.34723,
            "80": 9.37822,
            "81": 9.4542,
            "82": 9.67532,
            "83": 9.28445,
            "84": 9.39112,
            "85": 9.58662,
            "86": 9.04692,
            "87": 9.5697,
            "88": 9.72082,
            "89": 9.56729,
            "90": 9.79474,
            "91": 9.30452,
            "92": 9.32188,
            "93": 9.05169,
            "94": 8.79001,
            "95": 9.49179,
            "96": 9.48712,
            "97": 9.2659,
            "98": 9.62594,
            "99": 8.85252,
            "100": 9.35905
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1622.0,
            "2": 1729.0,
            "3": 1639.0,
            "4": 1678.0,
            "5": 1914.0,
            "6": 1900.0,
            "7": 1843.0,
            "8": 1671.0,
            "9": 1818.0,
            "10": 1434.0,
            "11": 1893.0,
            "12": 1772.0,
            "13": 1950.0,
            "14": 1863.0,
            "15": 1885.0,
            "16": 1738.0,
            "17": 1743.0,
            "18": 1679.0,
            "19": 1702.0,
            "20": 1729.0,
            "21": 1914.0,
            "22": 1696.0,
            "23": 1958.0,
            "24": 1574.0,
            "25": 1531.0,
            "26": 1707.0,
            "27": 1804.0,
            "28": 1939.0,
            "29": 1973.0,
            "30": 2024.0,
            "31": 1494.0,
            "32": 1960.0,
            "33": 1971.0,
            "34": 1813.0,
            "35": 1950.0,
            "36": 2051.0,
            "37": 2382.0,
            "38": 2098.0,
            "39": 2262.0,
            "40": 2137.0,
            "41": 2191.0,
            "42": 2258.0,
            "43": 2023.0,
            "44": 2104.0,
            "45": 2062.0,
            "46": 2219.0,
            "47": 2490.0,
            "48": 2393.0,
            "49": 2210.0,
            "50": 2478.0,
            "51": 2565.0,
            "52": 2533.0,
            "53": 2848.0,
            "54": 2623.0,
            "55": 2487.0,
            "56": 2760.0,
            "57": 2384.0,
            "58": 2929.0,
            "59": 2814.0,
            "60": 2418.0,
            "61": 2943.0,
            "62": 2620.0,
            "63": 2470.0,
            "64": 2875.0,
            "65": 2652.0,
            "66": 3070.0,
            "67": 2805.0,
            "68": 2612.0,
            "69": 3021.0,
            "70": 3054.0,
            "71": 2976.0,
            "72": 2577.0,
            "73": 2989.0,
            "74": 2019.0,
            "75": 2649.0,
            "76": 3096.0,
            "77": 3057.0,
            "78": 2960.0,
            "79": 3045.0,
            "80": 3030.0,
            "81": 3357.0,
            "82": 3310.0,
            "83": 2751.0,
            "84": 3176.0,
            "85": 3342.0,
            "86": 2659.0,
            "87": 3504.0,
            "88": 3082.0,
            "89": 3296.0,
            "90": 3304.0,
            "91": 2910.0,
            "92": 3195.0,
            "93": 2812.0,
            "94": 3284.0,
            "95": 3090.0,
            "96": 3330.0,
            "97": 3095.0,
            "98": 3486.0,
            "99": 3148.0,
            "100": 3169.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 759682560.0,
            "2": 759682560.0,
            "3": 759682560.0,
            "4": 759682560.0,
            "5": 759682560.0,
            "6": 759682560.0,
            "7": 759682560.0,
            "8": 759682560.0,
            "9": 759682560.0,
            "10": 759682560.0,
            "11": 759682560.0,
            "12": 759682560.0,
            "13": 759682560.0,
            "14": 759682560.0,
            "15": 759682560.0,
            "16": 759682560.0,
            "17": 759682560.0,
            "18": 759682560.0,
            "19": 759682560.0,
            "20": 759682560.0,
            "21": 759682560.0,
            "22": 759682560.0,
            "23": 759682560.0,
            "24": 759682560.0,
            "25": 759682560.0,
            "26": 759682560.0,
            "27": 759682560.0,
            "28": 759682560.0,
            "29": 759682560.0,
            "30": 759682560.0,
            "31": 759682560.0,
            "32": 759682560.0,
            "33": 759682560.0,
            "34": 759682560.0,
            "35": 759682560.0,
            "36": 759682560.0,
            "37": 759682560.0,
            "38": 759682560.0,
            "39": 759682560.0,
            "40": 759682560.0,
            "41": 759682560.0,
            "42": 759682560.0,
            "43": 759682560.0,
            "44": 759682560.0,
            "45": 759682560.0,
            "46": 759682560.0,
            "47": 759682560.0,
            "48": 759682560.0,
            "49": 759682560.0,
            "50": 759682560.0,
            "51": 759682560.0,
            "52": 759682560.0,
            "53": 759682560.0,
            "54": 759682560.0,
            "55": 759682560.0,
            "56": 759682560.0,
            "57": 759682560.0,
            "58": 759682560.0,
            "59": 759682560.0,
            "60": 759682560.0,
            "61": 759682560.0,
            "62": 759682560.0,
            "63": 759682560.0,
            "64": 759682560.0,
            "65": 759682560.0,
            "66": 759682560.0,
            "67": 759682560.0,
            "68": 759682560.0,
            "69": 759682560.0,
            "70": 759682560.0,
            "71": 759682560.0,
            "72": 759682560.0,
            "73": 759682560.0,
            "74": 759682560.0,
            "75": 759682560.0,
            "76": 759682560.0,
            "77": 759682560.0,
            "78": 759682560.0,
            "79": 759682560.0,
            "80": 759682560.0,
            "81": 759682560.0,
            "82": 759682560.0,
            "83": 759682560.0,
            "84": 759682560.0,
            "85": 759682560.0,
            "86": 759682560.0,
            "87": 759682560.0,
            "88": 759682560.0,
            "89": 759682560.0,
            "90": 759682560.0,
            "91": 759682560.0,
            "92": 759682560.0,
            "93": 759682560.0,
            "94": 759682560.0,
            "95": 759682560.0,
            "96": 759682560.0,
            "97": 759682560.0,
            "98": 759682560.0,
            "99": 759682560.0,
            "100": 759682560.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2395798528.0,
            "2": 2677510144.0,
            "3": 2677510144.0,
            "4": 2677510144.0,
            "5": 2677510144.0,
            "6": 2677510144.0,
            "7": 2677510144.0,
            "8": 2677510144.0,
            "9": 2677510144.0,
            "10": 2677510144.0,
            "11": 2677510144.0,
            "12": 2677510144.0,
            "13": 2677510144.0,
            "14": 2677510144.0,
            "15": 2677510144.0,
            "16": 2677510144.0,
            "17": 2677510144.0,
            "18": 2677510144.0,
            "19": 2677510144.0,
            "20": 2677510144.0,
            "21": 2677510144.0,
            "22": 2677510144.0,
            "23": 2677510144.0,
            "24": 2677510144.0,
            "25": 2677510144.0,
            "26": 2677510144.0,
            "27": 2677510144.0,
            "28": 2677510144.0,
            "29": 2677510144.0,
            "30": 2677510144.0,
            "31": 2677510144.0,
            "32": 2677510144.0,
            "33": 2677510144.0,
            "34": 2677510144.0,
            "35": 2677510144.0,
            "36": 2677510144.0,
            "37": 2677510144.0,
            "38": 2677510144.0,
            "39": 2677510144.0,
            "40": 2677510144.0,
            "41": 2677510144.0,
            "42": 2677510144.0,
            "43": 2677510144.0,
            "44": 2677510144.0,
            "45": 2677510144.0,
            "46": 2677510144.0,
            "47": 2677510144.0,
            "48": 2677510144.0,
            "49": 2677510144.0,
            "50": 2677510144.0,
            "51": 2677510144.0,
            "52": 2677510144.0,
            "53": 2677510144.0,
            "54": 2677510144.0,
            "55": 2677510144.0,
            "56": 2677510144.0,
            "57": 2677510144.0,
            "58": 2677510144.0,
            "59": 2677510144.0,
            "60": 2677510144.0,
            "61": 2677510144.0,
            "62": 2677510144.0,
            "63": 2677510144.0,
            "64": 2677510144.0,
            "65": 2677510144.0,
            "66": 2677510144.0,
            "67": 2677510144.0,
            "68": 2677510144.0,
            "69": 2677510144.0,
            "70": 2677510144.0,
            "71": 2677510144.0,
            "72": 2677510144.0,
            "73": 2677510144.0,
            "74": 2677510144.0,
            "75": 2677510144.0,
            "76": 2677510144.0,
            "77": 2677510144.0,
            "78": 2677510144.0,
            "79": 2677510144.0,
            "80": 2677510144.0,
            "81": 2677510144.0,
            "82": 2677510144.0,
            "83": 2677510144.0,
            "84": 2677510144.0,
            "85": 2677510144.0,
            "86": 2677510144.0,
            "87": 2677510144.0,
            "88": 2677510144.0,
            "89": 2677510144.0,
            "90": 2677510144.0,
            "91": 2677510144.0,
            "92": 2677510144.0,
            "93": 2677510144.0,
            "94": 2677510144.0,
            "95": 2677510144.0,
            "96": 2677510144.0,
            "97": 2677510144.0,
            "98": 2677510144.0,
            "99": 2677510144.0,
            "100": 2677510144.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 8.98992,
            "3": 0.1294,
            "4": 0.11595,
            "5": 0.11241,
            "6": 0.11556,
            "7": 0.11737,
            "8": 0.11364,
            "9": 0.11507,
            "10": 0.117,
            "11": 0.11605,
            "12": 0.11662,
            "13": 0.11519,
            "14": 0.11521,
            "15": 0.11573,
            "16": 0.11538,
            "17": 0.11465,
            "18": 0.1166,
            "19": 0.11515,
            "20": 0.11437,
            "21": 0.11558,
            "22": 0.11522,
            "23": 0.1153,
            "24": 0.11438,
            "25": 0.11808,
            "26": 0.11687,
            "27": 0.11525,
            "28": 0.11383,
            "29": 0.11673,
            "30": 0.11524,
            "31": 0.1166,
            "32": 0.11702,
            "33": 0.11405,
            "34": 0.11492,
            "35": 0.11579,
            "36": 0.11598,
            "37": 0.11831,
            "38": 0.117,
            "39": 0.11673,
            "40": 0.11174,
            "41": 0.11645,
            "42": 0.11308,
            "43": 0.11563,
            "44": 0.11397,
            "45": 0.11626,
            "46": 0.11355,
            "47": 0.11499,
            "48": 0.11524,
            "49": 0.11557,
            "50": 0.11265,
            "51": 0.11887,
            "52": 0.11543,
            "53": 0.1134,
            "54": 0.11629,
            "55": 0.11697,
            "56": 0.11712,
            "57": 0.11885,
            "58": 0.11734,
            "59": 0.11534,
            "60": 0.11888,
            "61": 0.11756,
            "62": 0.11757,
            "63": 0.11525,
            "64": 0.11676,
            "65": 0.1176,
            "66": 0.11477,
            "67": 0.11557,
            "68": 0.116,
            "69": 0.11786,
            "70": 0.11593,
            "71": 0.11704,
            "72": 0.11671,
            "73": 0.11593,
            "74": 0.11743,
            "75": 0.11579,
            "76": 0.11805,
            "77": 0.11433,
            "78": 0.11717,
            "79": 0.11771,
            "80": 0.11637,
            "81": 0.11676,
            "82": 0.11803,
            "83": 0.11703,
            "84": 0.11777,
            "85": 0.11644,
            "86": 0.11704,
            "87": 0.11621,
            "88": 0.11725,
            "89": 0.11643,
            "90": 0.1164,
            "91": 0.11606,
            "92": 0.1162,
            "93": 0.11808,
            "94": 0.11939,
            "95": 0.11748,
            "96": 0.11697,
            "97": 0.11629,
            "98": 0.11519,
            "99": 0.11719,
            "100": 0.11973
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.84517,
            "2": 10.85349,
            "3": 10.8539,
            "4": 10.83826,
            "5": 10.87422,
            "6": 10.89306,
            "7": 10.85452,
            "8": 10.8626,
            "9": 10.86463,
            "10": 10.82907,
            "11": 10.88787,
            "12": 10.87098,
            "13": 10.87914,
            "14": 10.89069,
            "15": 10.81973,
            "16": 10.83156,
            "17": 10.79863,
            "18": 10.8165,
            "19": 10.81889,
            "20": 10.72685,
            "21": 10.7058,
            "22": 10.5635,
            "23": 10.7279,
            "24": 10.6076,
            "25": 10.55128,
            "26": 10.60747,
            "27": 10.62771,
            "28": 10.5826,
            "29": 10.59962,
            "30": 10.36565,
            "31": 10.1199,
            "32": 10.47544,
            "33": 10.46636,
            "34": 10.22008,
            "35": 10.27436,
            "36": 10.2259,
            "37": 10.3573,
            "38": 10.19161,
            "39": 10.41342,
            "40": 10.09564,
            "41": 10.15513,
            "42": 10.22085,
            "43": 9.82792,
            "44": 9.96282,
            "45": 9.83422,
            "46": 9.8221,
            "47": 10.14764,
            "48": 9.84684,
            "49": 9.53373,
            "50": 9.90531,
            "51": 9.85118,
            "52": 9.73512,
            "53": 10.05864,
            "54": 9.94367,
            "55": 9.87297,
            "56": 9.61699,
            "57": 9.46751,
            "58": 9.82221,
            "59": 9.57334,
            "60": 9.48862,
            "61": 9.67922,
            "62": 9.97512,
            "63": 9.37044,
            "64": 9.76642,
            "65": 8.9343,
            "66": 9.69461,
            "67": 9.35362,
            "68": 9.76826,
            "69": 9.77678,
            "70": 9.72363,
            "71": 9.59894,
            "72": 9.56455,
            "73": 9.48329,
            "74": 8.92064,
            "75": 9.40392,
            "76": 9.05297,
            "77": 10.04178,
            "78": 9.69879,
            "79": 9.35126,
            "80": 9.38212,
            "81": 9.45864,
            "82": 9.67516,
            "83": 9.2841,
            "84": 9.39311,
            "85": 9.58936,
            "86": 9.05178,
            "87": 9.56418,
            "88": 9.71755,
            "89": 9.57129,
            "90": 9.78202,
            "91": 9.30611,
            "92": 9.32046,
            "93": 9.03939,
            "94": 8.7952,
            "95": 9.47908,
            "96": 9.48453,
            "97": 9.26989,
            "98": 9.62564,
            "99": 8.84254,
            "100": 9.3498
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1655.0,
            "2": 1697.0,
            "3": 1773.0,
            "4": 1782.0,
            "5": 1897.0,
            "6": 1802.0,
            "7": 1874.0,
            "8": 1653.0,
            "9": 1814.0,
            "10": 1441.0,
            "11": 1909.0,
            "12": 1645.0,
            "13": 1931.0,
            "14": 1678.0,
            "15": 1918.0,
            "16": 1945.0,
            "17": 1707.0,
            "18": 1635.0,
            "19": 1720.0,
            "20": 1609.0,
            "21": 1813.0,
            "22": 1682.0,
            "23": 1908.0,
            "24": 1620.0,
            "25": 1563.0,
            "26": 1640.0,
            "27": 1775.0,
            "28": 1873.0,
            "29": 1969.0,
            "30": 1896.0,
            "31": 1588.0,
            "32": 1907.0,
            "33": 2180.0,
            "34": 1850.0,
            "35": 1987.0,
            "36": 1901.0,
            "37": 2358.0,
            "38": 2253.0,
            "39": 2364.0,
            "40": 2173.0,
            "41": 2234.0,
            "42": 2281.0,
            "43": 2027.0,
            "44": 2127.0,
            "45": 2170.0,
            "46": 2317.0,
            "47": 2438.0,
            "48": 2391.0,
            "49": 2276.0,
            "50": 2205.0,
            "51": 2647.0,
            "52": 2533.0,
            "53": 2935.0,
            "54": 2623.0,
            "55": 2386.0,
            "56": 2664.0,
            "57": 2391.0,
            "58": 2863.0,
            "59": 2758.0,
            "60": 2456.0,
            "61": 2865.0,
            "62": 2559.0,
            "63": 2463.0,
            "64": 3014.0,
            "65": 2526.0,
            "66": 3010.0,
            "67": 2723.0,
            "68": 2616.0,
            "69": 2739.0,
            "70": 3188.0,
            "71": 2919.0,
            "72": 2355.0,
            "73": 2921.0,
            "74": 1944.0,
            "75": 2454.0,
            "76": 3005.0,
            "77": 3204.0,
            "78": 3244.0,
            "79": 3047.0,
            "80": 3220.0,
            "81": 3492.0,
            "82": 3205.0,
            "83": 2692.0,
            "84": 3149.0,
            "85": 3256.0,
            "86": 2562.0,
            "87": 3753.0,
            "88": 2921.0,
            "89": 3239.0,
            "90": 3001.0,
            "91": 2656.0,
            "92": 3146.0,
            "93": 2642.0,
            "94": 3289.0,
            "95": 3324.0,
            "96": 3350.0,
            "97": 3079.0,
            "98": 3564.0,
            "99": 3215.0,
            "100": 3238.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 759682560.0,
            "2": 759682560.0,
            "3": 759682560.0,
            "4": 759682560.0,
            "5": 759682560.0,
            "6": 759682560.0,
            "7": 759682560.0,
            "8": 759682560.0,
            "9": 759682560.0,
            "10": 759682560.0,
            "11": 759682560.0,
            "12": 759682560.0,
            "13": 759682560.0,
            "14": 759682560.0,
            "15": 759682560.0,
            "16": 759682560.0,
            "17": 759682560.0,
            "18": 759682560.0,
            "19": 759682560.0,
            "20": 759682560.0,
            "21": 759682560.0,
            "22": 759682560.0,
            "23": 759682560.0,
            "24": 759682560.0,
            "25": 759682560.0,
            "26": 759682560.0,
            "27": 759682560.0,
            "28": 759682560.0,
            "29": 759682560.0,
            "30": 759682560.0,
            "31": 759682560.0,
            "32": 759682560.0,
            "33": 759682560.0,
            "34": 759682560.0,
            "35": 759682560.0,
            "36": 759682560.0,
            "37": 759682560.0,
            "38": 759682560.0,
            "39": 759682560.0,
            "40": 759682560.0,
            "41": 759682560.0,
            "42": 759682560.0,
            "43": 759682560.0,
            "44": 759682560.0,
            "45": 759682560.0,
            "46": 759682560.0,
            "47": 759682560.0,
            "48": 759682560.0,
            "49": 759682560.0,
            "50": 759682560.0,
            "51": 759682560.0,
            "52": 759682560.0,
            "53": 759682560.0,
            "54": 759682560.0,
            "55": 759682560.0,
            "56": 759682560.0,
            "57": 759682560.0,
            "58": 759682560.0,
            "59": 759682560.0,
            "60": 759682560.0,
            "61": 759682560.0,
            "62": 759682560.0,
            "63": 759682560.0,
            "64": 759682560.0,
            "65": 759682560.0,
            "66": 759682560.0,
            "67": 759682560.0,
            "68": 759682560.0,
            "69": 759682560.0,
            "70": 759682560.0,
            "71": 759682560.0,
            "72": 759682560.0,
            "73": 759682560.0,
            "74": 759682560.0,
            "75": 759682560.0,
            "76": 759682560.0,
            "77": 759682560.0,
            "78": 759682560.0,
            "79": 759682560.0,
            "80": 759682560.0,
            "81": 759682560.0,
            "82": 759682560.0,
            "83": 759682560.0,
            "84": 759682560.0,
            "85": 759682560.0,
            "86": 759682560.0,
            "87": 759682560.0,
            "88": 759682560.0,
            "89": 759682560.0,
            "90": 759682560.0,
            "91": 759682560.0,
            "92": 759682560.0,
            "93": 759682560.0,
            "94": 759682560.0,
            "95": 759682560.0,
            "96": 759682560.0,
            "97": 759682560.0,
            "98": 759682560.0,
            "99": 759682560.0,
            "100": 759682560.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2358049792.0,
            "2": 2639761408.0,
            "3": 2639761408.0,
            "4": 2639761408.0,
            "5": 2639761408.0,
            "6": 2639761408.0,
            "7": 2639761408.0,
            "8": 2639761408.0,
            "9": 2639761408.0,
            "10": 2639761408.0,
            "11": 2639761408.0,
            "12": 2639761408.0,
            "13": 2639761408.0,
            "14": 2639761408.0,
            "15": 2639761408.0,
            "16": 2639761408.0,
            "17": 2639761408.0,
            "18": 2639761408.0,
            "19": 2639761408.0,
            "20": 2639761408.0,
            "21": 2639761408.0,
            "22": 2639761408.0,
            "23": 2639761408.0,
            "24": 2639761408.0,
            "25": 2639761408.0,
            "26": 2639761408.0,
            "27": 2639761408.0,
            "28": 2639761408.0,
            "29": 2639761408.0,
            "30": 2639761408.0,
            "31": 2639761408.0,
            "32": 2639761408.0,
            "33": 2639761408.0,
            "34": 2639761408.0,
            "35": 2639761408.0,
            "36": 2639761408.0,
            "37": 2639761408.0,
            "38": 2639761408.0,
            "39": 2639761408.0,
            "40": 2639761408.0,
            "41": 2639761408.0,
            "42": 2639761408.0,
            "43": 2639761408.0,
            "44": 2639761408.0,
            "45": 2639761408.0,
            "46": 2639761408.0,
            "47": 2639761408.0,
            "48": 2639761408.0,
            "49": 2639761408.0,
            "50": 2639761408.0,
            "51": 2639761408.0,
            "52": 2639761408.0,
            "53": 2639761408.0,
            "54": 2639761408.0,
            "55": 2639761408.0,
            "56": 2639761408.0,
            "57": 2639761408.0,
            "58": 2639761408.0,
            "59": 2639761408.0,
            "60": 2639761408.0,
            "61": 2639761408.0,
            "62": 2639761408.0,
            "63": 2639761408.0,
            "64": 2639761408.0,
            "65": 2639761408.0,
            "66": 2639761408.0,
            "67": 2639761408.0,
            "68": 2639761408.0,
            "69": 2639761408.0,
            "70": 2639761408.0,
            "71": 2639761408.0,
            "72": 2639761408.0,
            "73": 2639761408.0,
            "74": 2639761408.0,
            "75": 2639761408.0,
            "76": 2639761408.0,
            "77": 2639761408.0,
            "78": 2639761408.0,
            "79": 2639761408.0,
            "80": 2639761408.0,
            "81": 2639761408.0,
            "82": 2639761408.0,
            "83": 2639761408.0,
            "84": 2639761408.0,
            "85": 2639761408.0,
            "86": 2639761408.0,
            "87": 2639761408.0,
            "88": 2639761408.0,
            "89": 2639761408.0,
            "90": 2639761408.0,
            "91": 2639761408.0,
            "92": 2639761408.0,
            "93": 2639761408.0,
            "94": 2639761408.0,
            "95": 2639761408.0,
            "96": 2639761408.0,
            "97": 2639761408.0,
            "98": 2639761408.0,
            "99": 2639761408.0,
            "100": 2639761408.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 9.86816,
            "2": 0.1216,
            "3": 0.1166,
            "4": 0.08589,
            "5": 0.08587,
            "6": 0.08491,
            "7": 0.0844,
            "8": 0.08084,
            "9": 0.07931,
            "10": 0.0798,
            "11": 0.07849,
            "12": 0.07832,
            "13": 0.0803,
            "14": 0.08035,
            "15": 0.07881,
            "16": 0.07881,
            "17": 0.08069,
            "18": 0.0794,
            "19": 0.07935,
            "20": 0.07915,
            "21": 0.07896,
            "22": 0.08062,
            "23": 0.08009,
            "24": 0.07923,
            "25": 0.07839,
            "26": 0.08166,
            "27": 0.07977,
            "28": 0.08005,
            "29": 0.08017,
            "30": 0.08118,
            "31": 0.0811,
            "32": 0.07964,
            "33": 0.08086,
            "34": 0.08069,
            "35": 0.07986,
            "36": 0.08098,
            "37": 0.07939,
            "38": 0.07947,
            "39": 0.07943,
            "40": 0.08028,
            "41": 0.07981,
            "42": 0.08016,
            "43": 0.08245,
            "44": 0.0799,
            "45": 0.08077,
            "46": 0.08028,
            "47": 0.07892,
            "48": 0.07997,
            "49": 0.08314,
            "50": 0.08027,
            "51": 0.08246,
            "52": 0.07991,
            "53": 0.08005,
            "54": 0.07954,
            "55": 0.07969,
            "56": 0.07938,
            "57": 0.07891,
            "58": 0.07987,
            "59": 0.0798,
            "60": 0.08057,
            "61": 0.07888,
            "62": 0.07914,
            "63": 0.07997,
            "64": 0.07986,
            "65": 0.07977,
            "66": 0.07953,
            "67": 0.07927,
            "68": 0.08003,
            "69": 0.08005,
            "70": 0.07926,
            "71": 0.07923,
            "72": 0.07966,
            "73": 0.08033,
            "74": 0.08038,
            "75": 0.07956,
            "76": 0.07935,
            "77": 0.07891,
            "78": 0.08007,
            "79": 0.08135,
            "80": 0.08025,
            "81": 0.07919,
            "82": 0.07932,
            "83": 0.07953,
            "84": 0.07937,
            "85": 0.0797,
            "86": 0.08168,
            "87": 0.08023,
            "88": 0.07957,
            "89": 0.08011,
            "90": 0.07975,
            "91": 0.08043,
            "92": 0.08179,
            "93": 0.08049,
            "94": 0.07951,
            "95": 0.08026,
            "96": 0.08,
            "97": 0.07948,
            "98": 0.0805,
            "99": 0.07879,
            "100": 0.07954
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.81548,
            "2": 10.8208,
            "3": 10.81271,
            "4": 10.78877,
            "5": 10.85169,
            "6": 10.8704,
            "7": 10.831,
            "8": 10.83427,
            "9": 10.83995,
            "10": 10.78684,
            "11": 10.88021,
            "12": 10.85971,
            "13": 10.86589,
            "14": 10.87818,
            "15": 10.79463,
            "16": 10.79607,
            "17": 10.7688,
            "18": 10.81045,
            "19": 10.79836,
            "20": 10.69045,
            "21": 10.67932,
            "22": 10.52101,
            "23": 10.70743,
            "24": 10.57665,
            "25": 10.52275,
            "26": 10.595,
            "27": 10.5855,
            "28": 10.56131,
            "29": 10.56894,
            "30": 10.34527,
            "31": 10.10019,
            "32": 10.45229,
            "33": 10.44356,
            "34": 10.20397,
            "35": 10.25844,
            "36": 10.2103,
            "37": 10.32252,
            "38": 10.1661,
            "39": 10.38156,
            "40": 10.07025,
            "41": 10.13542,
            "42": 10.19416,
            "43": 9.80626,
            "44": 9.92627,
            "45": 9.8024,
            "46": 9.79983,
            "47": 10.11662,
            "48": 9.81307,
            "49": 9.50044,
            "50": 9.87631,
            "51": 9.82781,
            "52": 9.71723,
            "53": 10.03979,
            "54": 9.92177,
            "55": 9.85515,
            "56": 9.59253,
            "57": 9.44144,
            "58": 9.79602,
            "59": 9.55567,
            "60": 9.4664,
            "61": 9.6666,
            "62": 9.95363,
            "63": 9.33626,
            "64": 9.74152,
            "65": 8.9178,
            "66": 9.66632,
            "67": 9.34424,
            "68": 9.75273,
            "69": 9.75727,
            "70": 9.69242,
            "71": 9.5868,
            "72": 9.55099,
            "73": 9.46289,
            "74": 8.90671,
            "75": 9.37793,
            "76": 9.04952,
            "77": 10.0301,
            "78": 9.69192,
            "79": 9.33464,
            "80": 9.3667,
            "81": 9.44418,
            "82": 9.66164,
            "83": 9.27209,
            "84": 9.38066,
            "85": 9.57618,
            "86": 9.0424,
            "87": 9.55703,
            "88": 9.70385,
            "89": 9.56619,
            "90": 9.77295,
            "91": 9.29396,
            "92": 9.31912,
            "93": 9.03406,
            "94": 8.78526,
            "95": 9.46938,
            "96": 9.47497,
            "97": 9.25688,
            "98": 9.61835,
            "99": 8.83233,
            "100": 9.34557
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1518.0,
            "2": 1697.0,
            "3": 1668.0,
            "4": 1721.0,
            "5": 1897.0,
            "6": 1823.0,
            "7": 1719.0,
            "8": 1637.0,
            "9": 1742.0,
            "10": 1358.0,
            "11": 1882.0,
            "12": 1781.0,
            "13": 1847.0,
            "14": 1753.0,
            "15": 1883.0,
            "16": 1755.0,
            "17": 1752.0,
            "18": 1683.0,
            "19": 1817.0,
            "20": 1663.0,
            "21": 1795.0,
            "22": 1698.0,
            "23": 1996.0,
            "24": 1620.0,
            "25": 1658.0,
            "26": 1727.0,
            "27": 1781.0,
            "28": 2085.0,
            "29": 1952.0,
            "30": 1821.0,
            "31": 1646.0,
            "32": 1879.0,
            "33": 2034.0,
            "34": 1861.0,
            "35": 1834.0,
            "36": 1913.0,
            "37": 2333.0,
            "38": 2070.0,
            "39": 2245.0,
            "40": 2126.0,
            "41": 2311.0,
            "42": 2213.0,
            "43": 1907.0,
            "44": 1951.0,
            "45": 2001.0,
            "46": 2218.0,
            "47": 2533.0,
            "48": 2436.0,
            "49": 2188.0,
            "50": 2342.0,
            "51": 2562.0,
            "52": 2529.0,
            "53": 3031.0,
            "54": 2744.0,
            "55": 2264.0,
            "56": 2794.0,
            "57": 2183.0,
            "58": 2882.0,
            "59": 2769.0,
            "60": 2399.0,
            "61": 3031.0,
            "62": 2706.0,
            "63": 2388.0,
            "64": 3046.0,
            "65": 2597.0,
            "66": 3092.0,
            "67": 2730.0,
            "68": 2858.0,
            "69": 2982.0,
            "70": 3202.0,
            "71": 2964.0,
            "72": 2450.0,
            "73": 2817.0,
            "74": 1834.0,
            "75": 2609.0,
            "76": 3000.0,
            "77": 3180.0,
            "78": 3113.0,
            "79": 3145.0,
            "80": 3258.0,
            "81": 3645.0,
            "82": 3075.0,
            "83": 2812.0,
            "84": 3295.0,
            "85": 3368.0,
            "86": 2730.0,
            "87": 3717.0,
            "88": 3056.0,
            "89": 3252.0,
            "90": 2954.0,
            "91": 2798.0,
            "92": 3089.0,
            "93": 2742.0,
            "94": 3420.0,
            "95": 3225.0,
            "96": 3362.0,
            "97": 3118.0,
            "98": 3671.0,
            "99": 3341.0,
            "100": 3428.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 730321408.0,
            "2": 730321408.0,
            "3": 730321408.0,
            "4": 730321408.0,
            "5": 730321408.0,
            "6": 730321408.0,
            "7": 730321408.0,
            "8": 730321408.0,
            "9": 730321408.0,
            "10": 730321408.0,
            "11": 730321408.0,
            "12": 730321408.0,
            "13": 730321408.0,
            "14": 730321408.0,
            "15": 730321408.0,
            "16": 730321408.0,
            "17": 730321408.0,
            "18": 730321408.0,
            "19": 730321408.0,
            "20": 730321408.0,
            "21": 730321408.0,
            "22": 730321408.0,
            "23": 730321408.0,
            "24": 730321408.0,
            "25": 730321408.0,
            "26": 730321408.0,
            "27": 730321408.0,
            "28": 730321408.0,
            "29": 730321408.0,
            "30": 730321408.0,
            "31": 730321408.0,
            "32": 730321408.0,
            "33": 730321408.0,
            "34": 730321408.0,
            "35": 730321408.0,
            "36": 730321408.0,
            "37": 730321408.0,
            "38": 730321408.0,
            "39": 730321408.0,
            "40": 730321408.0,
            "41": 730321408.0,
            "42": 730321408.0,
            "43": 730321408.0,
            "44": 730321408.0,
            "45": 730321408.0,
            "46": 730321408.0,
            "47": 730321408.0,
            "48": 730321408.0,
            "49": 730321408.0,
            "50": 730321408.0,
            "51": 730321408.0,
            "52": 730321408.0,
            "53": 730321408.0,
            "54": 730321408.0,
            "55": 730321408.0,
            "56": 730321408.0,
            "57": 730321408.0,
            "58": 730321408.0,
            "59": 730321408.0,
            "60": 730321408.0,
            "61": 730321408.0,
            "62": 730321408.0,
            "63": 730321408.0,
            "64": 730321408.0,
            "65": 730321408.0,
            "66": 730321408.0,
            "67": 730321408.0,
            "68": 730321408.0,
            "69": 730321408.0,
            "70": 730321408.0,
            "71": 730321408.0,
            "72": 730321408.0,
            "73": 730321408.0,
            "74": 730321408.0,
            "75": 730321408.0,
            "76": 730321408.0,
            "77": 730321408.0,
            "78": 730321408.0,
            "79": 730321408.0,
            "80": 730321408.0,
            "81": 730321408.0,
            "82": 730321408.0,
            "83": 730321408.0,
            "84": 730321408.0,
            "85": 730321408.0,
            "86": 730321408.0,
            "87": 730321408.0,
            "88": 730321408.0,
            "89": 730321408.0,
            "90": 730321408.0,
            "91": 730321408.0,
            "92": 730321408.0,
            "93": 730321408.0,
            "94": 730321408.0,
            "95": 730321408.0,
            "96": 730321408.0,
            "97": 730321408.0,
            "98": 730321408.0,
            "99": 730321408.0,
            "100": 730321408.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2366437376.0,
            "2": 2648148992.0,
            "3": 2648148992.0,
            "4": 2648148992.0,
            "5": 2648148992.0,
            "6": 2648148992.0,
            "7": 2648148992.0,
            "8": 2648148992.0,
            "9": 2648148992.0,
            "10": 2648148992.0,
            "11": 2648148992.0,
            "12": 2648148992.0,
            "13": 2648148992.0,
            "14": 2648148992.0,
            "15": 2648148992.0,
            "16": 2648148992.0,
            "17": 2648148992.0,
            "18": 2648148992.0,
            "19": 2648148992.0,
            "20": 2648148992.0,
            "21": 2648148992.0,
            "22": 2648148992.0,
            "23": 2648148992.0,
            "24": 2648148992.0,
            "25": 2648148992.0,
            "26": 2648148992.0,
            "27": 2648148992.0,
            "28": 2648148992.0,
            "29": 2648148992.0,
            "30": 2648148992.0,
            "31": 2648148992.0,
            "32": 2648148992.0,
            "33": 2648148992.0,
            "34": 2648148992.0,
            "35": 2648148992.0,
            "36": 2648148992.0,
            "37": 2648148992.0,
            "38": 2648148992.0,
            "39": 2648148992.0,
            "40": 2648148992.0,
            "41": 2648148992.0,
            "42": 2648148992.0,
            "43": 2648148992.0,
            "44": 2648148992.0,
            "45": 2648148992.0,
            "46": 2648148992.0,
            "47": 2648148992.0,
            "48": 2648148992.0,
            "49": 2648148992.0,
            "50": 2648148992.0,
            "51": 2648148992.0,
            "52": 2648148992.0,
            "53": 2648148992.0,
            "54": 2648148992.0,
            "55": 2648148992.0,
            "56": 2648148992.0,
            "57": 2648148992.0,
            "58": 2648148992.0,
            "59": 2648148992.0,
            "60": 2648148992.0,
            "61": 2648148992.0,
            "62": 2648148992.0,
            "63": 2648148992.0,
            "64": 2648148992.0,
            "65": 2648148992.0,
            "66": 2648148992.0,
            "67": 2648148992.0,
            "68": 2648148992.0,
            "69": 2648148992.0,
            "70": 2648148992.0,
            "71": 2648148992.0,
            "72": 2648148992.0,
            "73": 2648148992.0,
            "74": 2648148992.0,
            "75": 2648148992.0,
            "76": 2648148992.0,
            "77": 2648148992.0,
            "78": 2648148992.0,
            "79": 2648148992.0,
            "80": 2648148992.0,
            "81": 2648148992.0,
            "82": 2648148992.0,
            "83": 2648148992.0,
            "84": 2648148992.0,
            "85": 2648148992.0,
            "86": 2648148992.0,
            "87": 2648148992.0,
            "88": 2648148992.0,
            "89": 2648148992.0,
            "90": 2648148992.0,
            "91": 2648148992.0,
            "92": 2648148992.0,
            "93": 2648148992.0,
            "94": 2648148992.0,
            "95": 2648148992.0,
            "96": 2648148992.0,
            "97": 2648148992.0,
            "98": 2648148992.0,
            "99": 2648148992.0,
            "100": 2648148992.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 11.77273,
            "2": 0.15704,
            "3": 0.19484,
            "4": 0.13176,
            "5": 0.13019,
            "6": 0.12976,
            "7": 0.1302,
            "8": 0.12925,
            "9": 0.12988,
            "10": 0.13099,
            "11": 0.13015,
            "12": 0.1297,
            "13": 0.12988,
            "14": 0.13024,
            "15": 0.12985,
            "16": 0.12971,
            "17": 0.12961,
            "18": 0.1302,
            "19": 0.12963,
            "20": 0.12994,
            "21": 0.1299,
            "22": 0.13037,
            "23": 0.13043,
            "24": 0.12989,
            "25": 0.13018,
            "26": 0.13019,
            "27": 0.12985,
            "28": 0.13014,
            "29": 0.13068,
            "30": 0.13099,
            "31": 0.13197,
            "32": 0.13151,
            "33": 0.13168,
            "34": 0.1303,
            "35": 0.13073,
            "36": 0.13088,
            "37": 0.1307,
            "38": 0.13091,
            "39": 0.13292,
            "40": 0.13172,
            "41": 0.134,
            "42": 0.13157,
            "43": 0.13272,
            "44": 0.13144,
            "45": 0.13142,
            "46": 0.133,
            "47": 0.13069,
            "48": 0.13192,
            "49": 0.13124,
            "50": 0.13106,
            "51": 0.13227,
            "52": 0.13218,
            "53": 0.13063,
            "54": 0.13182,
            "55": 0.13138,
            "56": 0.13226,
            "57": 0.13156,
            "58": 0.13127,
            "59": 0.13198,
            "60": 0.13133,
            "61": 0.13107,
            "62": 0.13121,
            "63": 0.13141,
            "64": 0.13149,
            "65": 0.13192,
            "66": 0.13188,
            "67": 0.13167,
            "68": 0.1319,
            "69": 0.1318,
            "70": 0.13204,
            "71": 0.13077,
            "72": 0.13205,
            "73": 0.13251,
            "74": 0.13212,
            "75": 0.13195,
            "76": 0.13016,
            "77": 0.12994,
            "78": 0.13184,
            "79": 0.13131,
            "80": 0.13153,
            "81": 0.13219,
            "82": 0.13088,
            "83": 0.13074,
            "84": 0.13035,
            "85": 0.12939,
            "86": 0.12953,
            "87": 0.1294,
            "88": 0.12951,
            "89": 0.13007,
            "90": 0.12893,
            "91": 0.13881,
            "92": 0.1299,
            "93": 0.12993,
            "94": 0.13022,
            "95": 0.1304,
            "96": 0.12965,
            "97": 0.13013,
            "98": 0.1306,
            "99": 0.12958,
            "100": 0.13011
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgxa100_dracooci-ord.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.82005,
            "2": 10.81907,
            "3": 10.81397,
            "4": 10.78498,
            "5": 10.85285,
            "6": 10.87448,
            "7": 10.83201,
            "8": 10.83296,
            "9": 10.83936,
            "10": 10.78449,
            "11": 10.87794,
            "12": 10.86113,
            "13": 10.86438,
            "14": 10.87595,
            "15": 10.79226,
            "16": 10.79507,
            "17": 10.76764,
            "18": 10.80977,
            "19": 10.79693,
            "20": 10.69196,
            "21": 10.68154,
            "22": 10.52072,
            "23": 10.70881,
            "24": 10.5753,
            "25": 10.52318,
            "26": 10.59411,
            "27": 10.58357,
            "28": 10.56188,
            "29": 10.5696,
            "30": 10.34505,
            "31": 10.09986,
            "32": 10.45209,
            "33": 10.44378,
            "34": 10.20285,
            "35": 10.25888,
            "36": 10.20951,
            "37": 10.32305,
            "38": 10.1656,
            "39": 10.38115,
            "40": 10.07032,
            "41": 10.1364,
            "42": 10.19467,
            "43": 9.80541,
            "44": 9.92556,
            "45": 9.803,
            "46": 9.80008,
            "47": 10.11716,
            "48": 9.81309,
            "49": 9.49911,
            "50": 9.87675,
            "51": 9.82883,
            "52": 9.71745,
            "53": 10.03867,
            "54": 9.92195,
            "55": 9.85523,
            "56": 9.5922,
            "57": 9.44053,
            "58": 9.79679,
            "59": 9.5545,
            "60": 9.46634,
            "61": 9.66578,
            "62": 9.95346,
            "63": 9.33681,
            "64": 9.74137,
            "65": 8.91657,
            "66": 9.66586,
            "67": 9.34349,
            "68": 9.75312,
            "69": 9.75728,
            "70": 9.69276,
            "71": 9.58799,
            "72": 9.55054,
            "73": 9.46306,
            "74": 8.90575,
            "75": 9.37813,
            "76": 9.04954,
            "77": 10.02987,
            "78": 9.69223,
            "79": 9.33487,
            "80": 9.368,
            "81": 9.44383,
            "82": 9.66162,
            "83": 9.27183,
            "84": 9.38074,
            "85": 9.57598,
            "86": 9.0429,
            "87": 9.55787,
            "88": 9.70459,
            "89": 9.56609,
            "90": 9.77247,
            "91": 9.29341,
            "92": 9.31916,
            "93": 9.03465,
            "94": 8.78492,
            "95": 9.46912,
            "96": 9.47453,
            "97": 9.25689,
            "98": 9.61859,
            "99": 8.83266,
            "100": 9.34574
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1559.0,
            "2": 1591.0,
            "3": 1686.0,
            "4": 1707.0,
            "5": 1915.0,
            "6": 1734.0,
            "7": 1735.0,
            "8": 1584.0,
            "9": 1810.0,
            "10": 1361.0,
            "11": 1884.0,
            "12": 1714.0,
            "13": 1923.0,
            "14": 1736.0,
            "15": 1831.0,
            "16": 1684.0,
            "17": 1787.0,
            "18": 1707.0,
            "19": 1680.0,
            "20": 1695.0,
            "21": 1815.0,
            "22": 1711.0,
            "23": 2079.0,
            "24": 1677.0,
            "25": 1650.0,
            "26": 1714.0,
            "27": 1813.0,
            "28": 1998.0,
            "29": 1931.0,
            "30": 1861.0,
            "31": 1573.0,
            "32": 1934.0,
            "33": 2063.0,
            "34": 1891.0,
            "35": 1916.0,
            "36": 1939.0,
            "37": 2299.0,
            "38": 2235.0,
            "39": 2352.0,
            "40": 2109.0,
            "41": 2286.0,
            "42": 2232.0,
            "43": 1919.0,
            "44": 2032.0,
            "45": 2098.0,
            "46": 2287.0,
            "47": 2513.0,
            "48": 2360.0,
            "49": 2126.0,
            "50": 2424.0,
            "51": 2433.0,
            "52": 2566.0,
            "53": 2902.0,
            "54": 2589.0,
            "55": 2309.0,
            "56": 2761.0,
            "57": 2265.0,
            "58": 2876.0,
            "59": 2821.0,
            "60": 2432.0,
            "61": 3073.0,
            "62": 2638.0,
            "63": 2426.0,
            "64": 2913.0,
            "65": 2660.0,
            "66": 2985.0,
            "67": 2723.0,
            "68": 2790.0,
            "69": 2997.0,
            "70": 3132.0,
            "71": 2837.0,
            "72": 2291.0,
            "73": 2780.0,
            "74": 1936.0,
            "75": 2555.0,
            "76": 3028.0,
            "77": 3175.0,
            "78": 3109.0,
            "79": 2994.0,
            "80": 3370.0,
            "81": 3552.0,
            "82": 3308.0,
            "83": 2898.0,
            "84": 3285.0,
            "85": 3434.0,
            "86": 2573.0,
            "87": 3858.0,
            "88": 2920.0,
            "89": 3217.0,
            "90": 2868.0,
            "91": 2784.0,
            "92": 3011.0,
            "93": 2700.0,
            "94": 3372.0,
            "95": 3273.0,
            "96": 3557.0,
            "97": 3145.0,
            "98": 3635.0,
            "99": 3308.0,
            "100": 3359.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 730320896.0,
            "2": 730320896.0,
            "3": 730320896.0,
            "4": 730320896.0,
            "5": 730320896.0,
            "6": 730320896.0,
            "7": 730320896.0,
            "8": 730320896.0,
            "9": 730320896.0,
            "10": 730320896.0,
            "11": 730320896.0,
            "12": 730320896.0,
            "13": 730320896.0,
            "14": 730320896.0,
            "15": 730320896.0,
            "16": 730320896.0,
            "17": 730320896.0,
            "18": 730320896.0,
            "19": 730320896.0,
            "20": 730320896.0,
            "21": 730320896.0,
            "22": 730320896.0,
            "23": 730320896.0,
            "24": 730320896.0,
            "25": 730320896.0,
            "26": 730320896.0,
            "27": 730320896.0,
            "28": 730320896.0,
            "29": 730320896.0,
            "30": 730320896.0,
            "31": 730320896.0,
            "32": 730320896.0,
            "33": 730320896.0,
            "34": 730320896.0,
            "35": 730320896.0,
            "36": 730320896.0,
            "37": 730320896.0,
            "38": 730320896.0,
            "39": 730320896.0,
            "40": 730320896.0,
            "41": 730320896.0,
            "42": 730320896.0,
            "43": 730320896.0,
            "44": 730320896.0,
            "45": 730320896.0,
            "46": 730320896.0,
            "47": 730320896.0,
            "48": 730320896.0,
            "49": 730320896.0,
            "50": 730320896.0,
            "51": 730320896.0,
            "52": 730320896.0,
            "53": 730320896.0,
            "54": 730320896.0,
            "55": 730320896.0,
            "56": 730320896.0,
            "57": 730320896.0,
            "58": 730320896.0,
            "59": 730320896.0,
            "60": 730320896.0,
            "61": 730320896.0,
            "62": 730320896.0,
            "63": 730320896.0,
            "64": 730320896.0,
            "65": 730320896.0,
            "66": 730320896.0,
            "67": 730320896.0,
            "68": 730320896.0,
            "69": 730320896.0,
            "70": 730320896.0,
            "71": 730320896.0,
            "72": 730320896.0,
            "73": 730320896.0,
            "74": 730320896.0,
            "75": 730320896.0,
            "76": 730320896.0,
            "77": 730320896.0,
            "78": 730320896.0,
            "79": 730320896.0,
            "80": 730320896.0,
            "81": 730320896.0,
            "82": 730320896.0,
            "83": 730320896.0,
            "84": 730320896.0,
            "85": 730320896.0,
            "86": 730320896.0,
            "87": 730320896.0,
            "88": 730320896.0,
            "89": 730320896.0,
            "90": 730320896.0,
            "91": 730320896.0,
            "92": 730320896.0,
            "93": 730320896.0,
            "94": 730320896.0,
            "95": 730320896.0,
            "96": 730320896.0,
            "97": 730320896.0,
            "98": 730320896.0,
            "99": 730320896.0,
            "100": 730320896.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 3837453312.0,
            "2": 4119164928.0,
            "3": 4119164928.0,
            "4": 4119164928.0,
            "5": 4119164928.0,
            "6": 4119164928.0,
            "7": 4119164928.0,
            "8": 4119164928.0,
            "9": 4119164928.0,
            "10": 4119164928.0,
            "11": 4119164928.0,
            "12": 4119164928.0,
            "13": 4119164928.0,
            "14": 4119164928.0,
            "15": 4119164928.0,
            "16": 4119164928.0,
            "17": 4119164928.0,
            "18": 4119164928.0,
            "19": 4119164928.0,
            "20": 4119164928.0,
            "21": 4119164928.0,
            "22": 4119164928.0,
            "23": 4119164928.0,
            "24": 4119164928.0,
            "25": 4119164928.0,
            "26": 4119164928.0,
            "27": 4119164928.0,
            "28": 4119164928.0,
            "29": 4119164928.0,
            "30": 4119164928.0,
            "31": 4119164928.0,
            "32": 4119164928.0,
            "33": 4119164928.0,
            "34": 4119164928.0,
            "35": 4119164928.0,
            "36": 4119164928.0,
            "37": 4119164928.0,
            "38": 4119164928.0,
            "39": 4119164928.0,
            "40": 4119164928.0,
            "41": 4119164928.0,
            "42": 4119164928.0,
            "43": 4119164928.0,
            "44": 4119164928.0,
            "45": 4119164928.0,
            "46": 4119164928.0,
            "47": 4119164928.0,
            "48": 4119164928.0,
            "49": 4119164928.0,
            "50": 4119164928.0,
            "51": 4119164928.0,
            "52": 4119164928.0,
            "53": 4119164928.0,
            "54": 4119164928.0,
            "55": 4119164928.0,
            "56": 4119164928.0,
            "57": 4119164928.0,
            "58": 4119164928.0,
            "59": 4119164928.0,
            "60": 4119164928.0,
            "61": 4119164928.0,
            "62": 4119164928.0,
            "63": 4119164928.0,
            "64": 4119164928.0,
            "65": 4119164928.0,
            "66": 4119164928.0,
            "67": 4119164928.0,
            "68": 4119164928.0,
            "69": 4119164928.0,
            "70": 4119164928.0,
            "71": 4119164928.0,
            "72": 4119164928.0,
            "73": 4119164928.0,
            "74": 4119164928.0,
            "75": 4119164928.0,
            "76": 4119164928.0,
            "77": 4119164928.0,
            "78": 4119164928.0,
            "79": 4119164928.0,
            "80": 4119164928.0,
            "81": 4119164928.0,
            "82": 4119164928.0,
            "83": 4119164928.0,
            "84": 4119164928.0,
            "85": 4119164928.0,
            "86": 4119164928.0,
            "87": 4119164928.0,
            "88": 4119164928.0,
            "89": 4119164928.0,
            "90": 4119164928.0,
            "91": 4119164928.0,
            "92": 4119164928.0,
            "93": 4119164928.0,
            "94": 4119164928.0,
            "95": 4119164928.0,
            "96": 4119164928.0,
            "97": 4119164928.0,
            "98": 4119164928.0,
            "99": 4119164928.0,
            "100": 4119164928.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 20.0062,
            "2": 0.22515,
            "3": 0.1977,
            "4": 0.18911,
            "5": 0.18615,
            "6": 0.17034,
            "7": 0.16978,
            "8": 0.172,
            "9": 0.17258,
            "10": 0.17365,
            "11": 0.17197,
            "12": 0.17127,
            "13": 0.16991,
            "14": 0.16997,
            "15": 0.16994,
            "16": 0.17143,
            "17": 0.17095,
            "18": 0.17098,
            "19": 0.16956,
            "20": 0.1705,
            "21": 0.17016,
            "22": 0.1709,
            "23": 0.18003,
            "24": 0.1728,
            "25": 0.17179,
            "26": 0.17099,
            "27": 0.1721,
            "28": 0.17027,
            "29": 0.17076,
            "30": 0.17085,
            "31": 0.17145,
            "32": 0.17023,
            "33": 0.17166,
            "34": 0.17042,
            "35": 0.17306,
            "36": 0.17083,
            "37": 0.17109,
            "38": 0.17096,
            "39": 0.17162,
            "40": 0.1709,
            "41": 0.17007,
            "42": 0.17021,
            "43": 0.1703,
            "44": 0.1709,
            "45": 0.17091,
            "46": 0.1708,
            "47": 0.17037,
            "48": 0.17053,
            "49": 0.17145,
            "50": 0.17057,
            "51": 0.17728,
            "52": 0.17072,
            "53": 0.17004,
            "54": 0.17259,
            "55": 0.17417,
            "56": 0.17223,
            "57": 0.1731,
            "58": 0.172,
            "59": 0.17128,
            "60": 0.17384,
            "61": 0.17393,
            "62": 0.17367,
            "63": 0.17427,
            "64": 0.17235,
            "65": 0.17484,
            "66": 0.1728,
            "67": 0.17351,
            "68": 0.17401,
            "69": 0.17395,
            "70": 0.1725,
            "71": 0.17219,
            "72": 0.17187,
            "73": 0.17393,
            "74": 0.17345,
            "75": 0.17421,
            "76": 0.17406,
            "77": 0.17155,
            "78": 0.1728,
            "79": 0.17462,
            "80": 0.17582,
            "81": 0.17113,
            "82": 0.17105,
            "83": 0.17061,
            "84": 0.17127,
            "85": 0.17361,
            "86": 0.17294,
            "87": 0.17183,
            "88": 0.17162,
            "89": 0.17105,
            "90": 0.17179,
            "91": 0.17278,
            "92": 0.17216,
            "93": 0.17178,
            "94": 0.17267,
            "95": 0.1706,
            "96": 0.17363,
            "97": 0.17455,
            "98": 0.17149,
            "99": 0.17187,
            "100": 0.1711
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_lts_dgxa100_dracooci.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.82005,
            "2": 10.81907,
            "3": 10.81397,
            "4": 10.78498,
            "5": 10.85285,
            "6": 10.87448,
            "7": 10.83201,
            "8": 10.83296,
            "9": 10.83936,
            "10": 10.78449,
            "11": 10.87794,
            "12": 10.86113,
            "13": 10.86438,
            "14": 10.87595,
            "15": 10.79226,
            "16": 10.79507,
            "17": 10.76764,
            "18": 10.80977,
            "19": 10.79693,
            "20": 10.69196,
            "21": 10.68154,
            "22": 10.52072,
            "23": 10.70881,
            "24": 10.5753,
            "25": 10.52318,
            "26": 10.59411,
            "27": 10.58357,
            "28": 10.56188,
            "29": 10.5696,
            "30": 10.34505,
            "31": 10.09986,
            "32": 10.45209,
            "33": 10.44378,
            "34": 10.20285,
            "35": 10.25888,
            "36": 10.20951,
            "37": 10.32305,
            "38": 10.1656,
            "39": 10.38115,
            "40": 10.07032,
            "41": 10.1364,
            "42": 10.19467,
            "43": 9.80541,
            "44": 9.92556,
            "45": 9.803,
            "46": 9.80008,
            "47": 10.11716,
            "48": 9.81309,
            "49": 9.49911,
            "50": 9.87675,
            "51": 9.82883,
            "52": 9.71745,
            "53": 10.03867,
            "54": 9.92195,
            "55": 9.85523,
            "56": 9.5922,
            "57": 9.44053,
            "58": 9.79679,
            "59": 9.5545,
            "60": 9.46634,
            "61": 9.66578,
            "62": 9.95346,
            "63": 9.33681,
            "64": 9.74137,
            "65": 8.91657,
            "66": 9.66586,
            "67": 9.34349,
            "68": 9.75312,
            "69": 9.75728,
            "70": 9.69276,
            "71": 9.58799,
            "72": 9.55054,
            "73": 9.46306,
            "74": 8.90575,
            "75": 9.37813,
            "76": 9.04954,
            "77": 10.02987,
            "78": 9.69223,
            "79": 9.33487,
            "80": 9.368,
            "81": 9.44383,
            "82": 9.66162,
            "83": 9.27183,
            "84": 9.38074,
            "85": 9.57598,
            "86": 9.0429,
            "87": 9.55787,
            "88": 9.70459,
            "89": 9.56609,
            "90": 9.77247,
            "91": 9.29341,
            "92": 9.31916,
            "93": 9.03465,
            "94": 8.78492,
            "95": 9.46912,
            "96": 9.47453,
            "97": 9.25689,
            "98": 9.61859,
            "99": 8.83266,
            "100": 9.34574
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1559.0,
            "2": 1591.0,
            "3": 1686.0,
            "4": 1707.0,
            "5": 1915.0,
            "6": 1734.0,
            "7": 1735.0,
            "8": 1584.0,
            "9": 1810.0,
            "10": 1361.0,
            "11": 1884.0,
            "12": 1714.0,
            "13": 1923.0,
            "14": 1736.0,
            "15": 1831.0,
            "16": 1684.0,
            "17": 1787.0,
            "18": 1707.0,
            "19": 1680.0,
            "20": 1695.0,
            "21": 1815.0,
            "22": 1711.0,
            "23": 2079.0,
            "24": 1677.0,
            "25": 1650.0,
            "26": 1714.0,
            "27": 1813.0,
            "28": 1998.0,
            "29": 1931.0,
            "30": 1861.0,
            "31": 1573.0,
            "32": 1934.0,
            "33": 2063.0,
            "34": 1891.0,
            "35": 1916.0,
            "36": 1939.0,
            "37": 2299.0,
            "38": 2235.0,
            "39": 2352.0,
            "40": 2109.0,
            "41": 2286.0,
            "42": 2232.0,
            "43": 1919.0,
            "44": 2032.0,
            "45": 2098.0,
            "46": 2287.0,
            "47": 2513.0,
            "48": 2360.0,
            "49": 2126.0,
            "50": 2424.0,
            "51": 2433.0,
            "52": 2566.0,
            "53": 2902.0,
            "54": 2589.0,
            "55": 2309.0,
            "56": 2761.0,
            "57": 2265.0,
            "58": 2876.0,
            "59": 2821.0,
            "60": 2432.0,
            "61": 3073.0,
            "62": 2638.0,
            "63": 2426.0,
            "64": 2913.0,
            "65": 2660.0,
            "66": 2985.0,
            "67": 2723.0,
            "68": 2790.0,
            "69": 2997.0,
            "70": 3132.0,
            "71": 2837.0,
            "72": 2291.0,
            "73": 2780.0,
            "74": 1936.0,
            "75": 2555.0,
            "76": 3028.0,
            "77": 3175.0,
            "78": 3109.0,
            "79": 2994.0,
            "80": 3370.0,
            "81": 3552.0,
            "82": 3308.0,
            "83": 2898.0,
            "84": 3285.0,
            "85": 3434.0,
            "86": 2573.0,
            "87": 3858.0,
            "88": 2920.0,
            "89": 3217.0,
            "90": 2868.0,
            "91": 2784.0,
            "92": 3011.0,
            "93": 2700.0,
            "94": 3372.0,
            "95": 3273.0,
            "96": 3557.0,
            "97": 3145.0,
            "98": 3635.0,
            "99": 3308.0,
            "100": 3359.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 730320896.0,
            "2": 730320896.0,
            "3": 730320896.0,
            "4": 730320896.0,
            "5": 730320896.0,
            "6": 730320896.0,
            "7": 730320896.0,
            "8": 730320896.0,
            "9": 730320896.0,
            "10": 730320896.0,
            "11": 730320896.0,
            "12": 730320896.0,
            "13": 730320896.0,
            "14": 730320896.0,
            "15": 730320896.0,
            "16": 730320896.0,
            "17": 730320896.0,
            "18": 730320896.0,
            "19": 730320896.0,
            "20": 730320896.0,
            "21": 730320896.0,
            "22": 730320896.0,
            "23": 730320896.0,
            "24": 730320896.0,
            "25": 730320896.0,
            "26": 730320896.0,
            "27": 730320896.0,
            "28": 730320896.0,
            "29": 730320896.0,
            "30": 730320896.0,
            "31": 730320896.0,
            "32": 730320896.0,
            "33": 730320896.0,
            "34": 730320896.0,
            "35": 730320896.0,
            "36": 730320896.0,
            "37": 730320896.0,
            "38": 730320896.0,
            "39": 730320896.0,
            "40": 730320896.0,
            "41": 730320896.0,
            "42": 730320896.0,
            "43": 730320896.0,
            "44": 730320896.0,
            "45": 730320896.0,
            "46": 730320896.0,
            "47": 730320896.0,
            "48": 730320896.0,
            "49": 730320896.0,
            "50": 730320896.0,
            "51": 730320896.0,
            "52": 730320896.0,
            "53": 730320896.0,
            "54": 730320896.0,
            "55": 730320896.0,
            "56": 730320896.0,
            "57": 730320896.0,
            "58": 730320896.0,
            "59": 730320896.0,
            "60": 730320896.0,
            "61": 730320896.0,
            "62": 730320896.0,
            "63": 730320896.0,
            "64": 730320896.0,
            "65": 730320896.0,
            "66": 730320896.0,
            "67": 730320896.0,
            "68": 730320896.0,
            "69": 730320896.0,
            "70": 730320896.0,
            "71": 730320896.0,
            "72": 730320896.0,
            "73": 730320896.0,
            "74": 730320896.0,
            "75": 730320896.0,
            "76": 730320896.0,
            "77": 730320896.0,
            "78": 730320896.0,
            "79": 730320896.0,
            "80": 730320896.0,
            "81": 730320896.0,
            "82": 730320896.0,
            "83": 730320896.0,
            "84": 730320896.0,
            "85": 730320896.0,
            "86": 730320896.0,
            "87": 730320896.0,
            "88": 730320896.0,
            "89": 730320896.0,
            "90": 730320896.0,
            "91": 730320896.0,
            "92": 730320896.0,
            "93": 730320896.0,
            "94": 730320896.0,
            "95": 730320896.0,
            "96": 730320896.0,
            "97": 730320896.0,
            "98": 730320896.0,
            "99": 730320896.0,
            "100": 730320896.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 3837453312.0,
            "2": 4119164928.0,
            "3": 4119164928.0,
            "4": 4119164928.0,
            "5": 4119164928.0,
            "6": 4119164928.0,
            "7": 4119164928.0,
            "8": 4119164928.0,
            "9": 4119164928.0,
            "10": 4119164928.0,
            "11": 4119164928.0,
            "12": 4119164928.0,
            "13": 4119164928.0,
            "14": 4119164928.0,
            "15": 4119164928.0,
            "16": 4119164928.0,
            "17": 4119164928.0,
            "18": 4119164928.0,
            "19": 4119164928.0,
            "20": 4119164928.0,
            "21": 4119164928.0,
            "22": 4119164928.0,
            "23": 4119164928.0,
            "24": 4119164928.0,
            "25": 4119164928.0,
            "26": 4119164928.0,
            "27": 4119164928.0,
            "28": 4119164928.0,
            "29": 4119164928.0,
            "30": 4119164928.0,
            "31": 4119164928.0,
            "32": 4119164928.0,
            "33": 4119164928.0,
            "34": 4119164928.0,
            "35": 4119164928.0,
            "36": 4119164928.0,
            "37": 4119164928.0,
            "38": 4119164928.0,
            "39": 4119164928.0,
            "40": 4119164928.0,
            "41": 4119164928.0,
            "42": 4119164928.0,
            "43": 4119164928.0,
            "44": 4119164928.0,
            "45": 4119164928.0,
            "46": 4119164928.0,
            "47": 4119164928.0,
            "48": 4119164928.0,
            "49": 4119164928.0,
            "50": 4119164928.0,
            "51": 4119164928.0,
            "52": 4119164928.0,
            "53": 4119164928.0,
            "54": 4119164928.0,
            "55": 4119164928.0,
            "56": 4119164928.0,
            "57": 4119164928.0,
            "58": 4119164928.0,
            "59": 4119164928.0,
            "60": 4119164928.0,
            "61": 4119164928.0,
            "62": 4119164928.0,
            "63": 4119164928.0,
            "64": 4119164928.0,
            "65": 4119164928.0,
            "66": 4119164928.0,
            "67": 4119164928.0,
            "68": 4119164928.0,
            "69": 4119164928.0,
            "70": 4119164928.0,
            "71": 4119164928.0,
            "72": 4119164928.0,
            "73": 4119164928.0,
            "74": 4119164928.0,
            "75": 4119164928.0,
            "76": 4119164928.0,
            "77": 4119164928.0,
            "78": 4119164928.0,
            "79": 4119164928.0,
            "80": 4119164928.0,
            "81": 4119164928.0,
            "82": 4119164928.0,
            "83": 4119164928.0,
            "84": 4119164928.0,
            "85": 4119164928.0,
            "86": 4119164928.0,
            "87": 4119164928.0,
            "88": 4119164928.0,
            "89": 4119164928.0,
            "90": 4119164928.0,
            "91": 4119164928.0,
            "92": 4119164928.0,
            "93": 4119164928.0,
            "94": 4119164928.0,
            "95": 4119164928.0,
            "96": 4119164928.0,
            "97": 4119164928.0,
            "98": 4119164928.0,
            "99": 4119164928.0,
            "100": 4119164928.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 20.54847,
            "2": 0.20654,
            "3": 0.17899,
            "4": 0.17609,
            "5": 0.17607,
            "6": 0.17545,
            "7": 0.17582,
            "8": 0.3981,
            "9": 0.17427,
            "10": 0.17111,
            "11": 0.1706,
            "12": 0.17427,
            "13": 0.17652,
            "14": 0.17107,
            "15": 0.17191,
            "16": 0.1696,
            "17": 0.17104,
            "18": 0.16925,
            "19": 0.16894,
            "20": 0.17181,
            "21": 0.1703,
            "22": 0.1722,
            "23": 0.16959,
            "24": 0.18369,
            "25": 0.17058,
            "26": 0.17105,
            "27": 0.16942,
            "28": 0.1691,
            "29": 0.16894,
            "30": 0.17,
            "31": 0.17083,
            "32": 0.17034,
            "33": 0.16855,
            "34": 0.16981,
            "35": 0.1699,
            "36": 0.16909,
            "37": 0.16901,
            "38": 0.16998,
            "39": 0.16957,
            "40": 0.17038,
            "41": 0.16846,
            "42": 0.16847,
            "43": 0.16956,
            "44": 0.16964,
            "45": 0.16919,
            "46": 0.16891,
            "47": 0.16901,
            "48": 0.16904,
            "49": 0.16981,
            "50": 0.17034,
            "51": 0.17135,
            "52": 0.16786,
            "53": 0.1668,
            "54": 0.1671,
            "55": 0.16695,
            "56": 0.16737,
            "57": 0.1668,
            "58": 0.16761,
            "59": 0.16755,
            "60": 0.16907,
            "61": 0.16638,
            "62": 0.16819,
            "63": 0.16827,
            "64": 0.17031,
            "65": 0.167,
            "66": 0.39277,
            "67": 0.16989,
            "68": 0.16709,
            "69": 0.16761,
            "70": 0.16602,
            "71": 0.168,
            "72": 0.16646,
            "73": 0.16976,
            "74": 0.16686,
            "75": 0.16959,
            "76": 0.16956,
            "77": 0.1686,
            "78": 0.16588,
            "79": 0.16726,
            "80": 0.16802,
            "81": 0.16806,
            "82": 0.1664,
            "83": 0.16817,
            "84": 0.16729,
            "85": 0.1687,
            "86": 0.16736,
            "87": 0.1677,
            "88": 0.16777,
            "89": 0.16794,
            "90": 0.16675,
            "91": 0.1685,
            "92": 0.1679,
            "93": 0.16927,
            "94": 0.16945,
            "95": 0.171,
            "96": 0.1671,
            "97": 0.38537,
            "98": 0.16869,
            "99": 0.1704,
            "100": 0.16709
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --num-layers-per-virtual-pipeline-stage: 1
  --decoupled-lr: 0.0002
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --ckpt-format: torch
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.81548, "5": 10.85169, "10": 10.78686, "15": 10.79466, "20": 10.69061, "25": 10.52373, "30": 10.34573, "35": 10.262, "40": 10.07231, "45": 9.8104, "50": 9.88382, "55": 9.8636, "60": 9.47966, "65": 8.9318, "70": 9.71182, "75": 9.40028, "80": 9.38946, "85": 9.60163, "90": 9.80555, "95": 9.50858, "100": 9.39222}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1518.0, "5": 1873.0, "10": 1417.0, "15": 1728.0, "20": 1627.0, "25": 1696.0, "30": 1902.0, "35": 1913.0, "40": 2010.0, "45": 2082.0, "50": 2352.0, "55": 2347.0, "60": 2384.0, "65": 2648.0, "70": 3098.0, "75": 2426.0, "80": 3221.0, "85": 3410.0, "90": 2940.0, "95": 3344.0, "100": 3476.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 733860352.0, "5": 733860352.0, "10": 733860352.0, "15": 733860352.0, "20": 733860352.0, "25": 733860352.0, "30": 733860352.0, "35": 733860352.0, "40": 733860352.0, "45": 733860352.0, "50": 733860352.0, "55": 733860352.0, "60": 733860352.0, "65": 733860352.0, "70": 733860352.0, "75": 733860352.0, "80": 733860352.0, "85": 733860352.0, "90": 733860352.0, "95": 733860352.0, "100": 733860352.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2368927744.0, "5": 2651687936.0, "10": 2651687936.0, "15": 2651687936.0, "20": 2651687936.0, "25": 2651687936.0, "30": 2651687936.0, "35": 2651687936.0, "40": 2651687936.0, "45": 2651687936.0, "50": 2651687936.0, "55": 2651687936.0, "60": 2651687936.0, "65": 2651687936.0, "70": 2651687936.0, "75": 2651687936.0, "80": 2651687936.0, "85": 2651687936.0, "90": 2651687936.0, "95": 2651687936.0, "100": 2651687936.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 7.96586, "5": 0.20451, "10": 0.19525, "15": 0.19479, "20": 0.19488, "25": 0.19491, "30": 0.19582, "35": 0.19503, "40": 0.19323, "45": 0.1938, "50": 0.19231, "55": 0.19329, "60": 0.19361, "65": 0.19387, "70": 0.19354, "75": 0.19243, "80": 0.19581, "85": 0.19518, "90": 0.19202, "95": 0.19394, "100": 0.19153}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/golden_values_dev_dgx_h100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.84517, "5": 10.87427, "10": 10.82907, "15": 10.81974, "20": 10.727, "25": 10.55217, "30": 10.36614, "35": 10.2778, "40": 10.0976, "45": 9.84196, "50": 9.9125, "55": 9.88096, "60": 9.50125, "65": 8.94761, "70": 9.7424, "75": 9.42532, "80": 9.40396, "85": 9.61405, "90": 9.81418, "95": 9.5173, "100": 9.39541}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1655.0, "5": 1803.0, "10": 1448.0, "15": 1879.0, "20": 1657.0, "25": 1625.0, "30": 1882.0, "35": 1954.0, "40": 2191.0, "45": 2091.0, "50": 2189.0, "55": 2325.0, "60": 2361.0, "65": 2673.0, "70": 3139.0, "75": 2519.0, "80": 3205.0, "85": 3209.0, "90": 3168.0, "95": 3261.0, "100": 3135.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 763220480.0, "5": 763220480.0, "10": 763220480.0, "15": 763220480.0, "20": 763220480.0, "25": 763220480.0, "30": 763220480.0, "35": 763220480.0, "40": 763220480.0, "45": 763220480.0, "50": 763220480.0, "55": 763220480.0, "60": 763220480.0, "65": 763220480.0, "70": 763220480.0, "75": 763220480.0, "80": 763220480.0, "85": 763220480.0, "90": 763220480.0, "95": 763220480.0, "100": 763220480.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2359490560.0, "5": 2643299328.0, "10": 2643299328.0, "15": 2643299328.0, "20": 2643299328.0, "25": 2643299328.0, "30": 2643299328.0, "35": 2643299328.0, "40": 2643299328.0, "45": 2643299328.0, "50": 2643299328.0, "55": 2643299328.0, "60": 2643299328.0, "65": 2643299328.0, "70": 2643299328.0, "75": 2643299328.0, "80": 2643299328.0, "85": 2643299328.0, "90": 2643299328.0, "95": 2643299328.0, "100": 2643299328.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 12.8201, "5": 0.09841, "10": 0.0973, "15": 0.09747, "20": 0.10122, "25": 0.09733, "30": 0.09549, "35": 0.09419, "40": 0.09444, "45": 0.0942, "50": 0.09393, "55": 0.09442, "60": 0.09625, "65": 0.09616, "70": 0.0935, "75": 0.09533, "80": 0.09411, "85": 0.09429, "90": 0.09394, "95": 0.09358, "100": 0.09368}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/golden_values_lts_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.82005, "5": 10.85284, "10": 10.78455, "15": 10.79229, "20": 10.69211, "25": 10.52412, "30": 10.34552, "35": 10.26239, "40": 10.07241, "45": 9.81101, "50": 9.88422, "55": 9.86374, "60": 9.47965, "65": 8.93063, "70": 9.71215, "75": 9.40048, "80": 9.39077, "85": 9.60141, "90": 9.80501, "95": 9.50816, "100": 9.3924}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1559.0, "5": 1840.0, "10": 1380.0, "15": 1848.0, "20": 1601.0, "25": 1635.0, "30": 1936.0, "35": 1908.0, "40": 2100.0, "45": 2098.0, "50": 2333.0, "55": 2260.0, "60": 2399.0, "65": 2656.0, "70": 3077.0, "75": 2547.0, "80": 3315.0, "85": 3371.0, "90": 2943.0, "95": 3457.0, "100": 3292.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 732811264.0, "5": 732811264.0, "10": 732811264.0, "15": 732811264.0, "20": 732811264.0, "25": 732811264.0, "30": 732811264.0, "35": 732811264.0, "40": 732811264.0, "45": 732811264.0, "50": 732811264.0, "55": 732811264.0, "60": 732811264.0, "65": 732811264.0, "70": 732811264.0, "75": 732811264.0, "80": 732811264.0, "85": 732811264.0, "90": 732811264.0, "95": 732811264.0, "100": 732811264.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 3838895104.0, "5": 4122703872.0, "10": 4122703872.0, "15": 4122703872.0, "20": 4122703872.0, "25": 4122703872.0, "30": 4122703872.0, "35": 4122703872.0, "40": 4122703872.0, "45": 4122703872.0, "50": 4122703872.0, "55": 4122703872.0, "60": 4122703872.0, "65": 4122703872.0, "70": 4122703872.0, "75": 4122703872.0, "80": 4122703872.0, "85": 4122703872.0, "90": 4122703872.0, "95": 4122703872.0, "100": 4122703872.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 18.03419, "5": 0.17939, "10": 0.17912, "15": 0.18403, "20": 0.17941, "25": 0.17738, "30": 0.17654, "35": 0.17535, "40": 0.17586, "45": 0.17588, "50": 0.17262, "55": 0.17573, "60": 0.17244, "65": 0.17339, "70": 0.17159, "75": 0.17252, "80": 0.17266, "85": 0.17259, "90": 0.17221, "95": 0.17017, "100": 0.17083}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
  PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --num-layers-per-virtual-pipeline-stage: 1
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --ckpt-assume-constant-structure: true
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: regular # Usually ckpt-resume, but as a WAR to #513 set to regular


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.81548, "5": 10.85169, "10": 10.78686, "15": 10.79466, "20": 10.69061, "25": 10.52373, "30": 10.34573, "35": 10.262, "40": 10.07231, "45": 9.8104, "50": 9.88382, "55": 9.8636, "60": 9.47966, "65": 8.9318, "70": 9.71182, "75": 9.40028, "80": 9.38946, "85": 9.60163, "90": 9.80555, "95": 9.50858, "100": 9.39222}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1518.0, "5": 1873.0, "10": 1417.0, "15": 1728.0, "20": 1627.0, "25": 1696.0, "30": 1902.0, "35": 1913.0, "40": 2010.0, "45": 2082.0, "50": 2352.0, "55": 2347.0, "60": 2384.0, "65": 2648.0, "70": 3098.0, "75": 2426.0, "80": 3221.0, "85": 3410.0, "90": 2940.0, "95": 3344.0, "100": 3476.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 733860352.0, "5": 733860352.0, "10": 733860352.0, "15": 733860352.0, "20": 733860352.0, "25": 733860352.0, "30": 733860352.0, "35": 733860352.0, "40": 733860352.0, "45": 733860352.0, "50": 733860352.0, "55": 733860352.0, "60": 733860352.0, "65": 733860352.0, "70": 733860352.0, "75": 733860352.0, "80": 733860352.0, "85": 733860352.0, "90": 733860352.0, "95": 733860352.0, "100": 733860352.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2368927744.0, "5": 2651687936.0, "10": 2651687936.0, "15": 2651687936.0, "20": 2651687936.0, "25": 2651687936.0, "30": 2651687936.0, "35": 2651687936.0, "40": 2651687936.0, "45": 2651687936.0, "50": 2651687936.0, "55": 2651687936.0, "60": 2651687936.0, "65": 2651687936.0, "70": 2651687936.0, "75": 2651687936.0, "80": 2651687936.0, "85": 2651687936.0, "90": 2651687936.0, "95": 2651687936.0, "100": 2651687936.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 13.14264, "5": 0.19546, "10": 0.19535, "15": 0.19566, "20": 0.19222, "25": 0.19642, "30": 0.1946, "35": 0.19376, "40": 0.19572, "45": 0.19676, "50": 0.19596, "55": 0.19944, "60": 0.19887, "65": 0.19822, "70": 0.19685, "75": 0.19578, "80": 0.19659, "85": 0.19479, "90": 0.1948, "95": 0.19348, "100": 0.19614}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_gb200_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.8426,
            "52": 9.72578,
            "53": 10.05977,
            "54": 9.95226,
            "55": 9.88321,
            "56": 9.61276,
            "57": 9.46222,
            "58": 9.82313,
            "59": 9.57665,
            "60": 9.48518,
            "61": 9.6788,
            "62": 9.97777,
            "63": 9.36212,
            "64": 9.75714,
            "65": 8.93499,
            "66": 9.69281,
            "67": 9.36709,
            "68": 9.78179,
            "69": 9.79451,
            "70": 9.72295,
            "71": 9.62027,
            "72": 9.56974,
            "73": 9.481,
            "74": 8.91241,
            "75": 9.40906,
            "76": 9.06623,
            "77": 10.05808,
            "78": 9.72188,
            "79": 9.36927,
            "80": 9.40027,
            "81": 9.47702,
            "82": 9.69788,
            "83": 9.30742,
            "84": 9.41496,
            "85": 9.61115,
            "86": 9.07104,
            "87": 9.59609,
            "88": 9.74908,
            "89": 9.5961,
            "90": 9.82722,
            "91": 9.3366,
            "92": 9.3558,
            "93": 9.08695,
            "94": 8.82752,
            "95": 9.53066,
            "96": 9.52759,
            "97": 9.30671,
            "98": 9.66909,
            "99": 8.89637,
            "100": 9.4052
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2587.0,
            "52": 2574.0,
            "53": 2831.0,
            "54": 2602.0,
            "55": 2403.0,
            "56": 2822.0,
            "57": 2223.0,
            "58": 2954.0,
            "59": 2871.0,
            "60": 2518.0,
            "61": 2922.0,
            "62": 2677.0,
            "63": 2533.0,
            "64": 3023.0,
            "65": 2609.0,
            "66": 2960.0,
            "67": 2867.0,
            "68": 2652.0,
            "69": 3053.0,
            "70": 3011.0,
            "71": 2870.0,
            "72": 2460.0,
            "73": 3114.0,
            "74": 2017.0,
            "75": 2527.0,
            "76": 2954.0,
            "77": 2955.0,
            "78": 3055.0,
            "79": 3098.0,
            "80": 3047.0,
            "81": 3362.0,
            "82": 3296.0,
            "83": 2825.0,
            "84": 3113.0,
            "85": 3196.0,
            "86": 2666.0,
            "87": 3583.0,
            "88": 2985.0,
            "89": 3259.0,
            "90": 3220.0,
            "91": 2781.0,
            "92": 3090.0,
            "93": 2686.0,
            "94": 3474.0,
            "95": 3147.0,
            "96": 3418.0,
            "97": 3036.0,
            "98": 3411.0,
            "99": 3152.0,
            "100": 3098.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 763221504.0,
            "52": 763221504.0,
            "53": 763221504.0,
            "54": 763221504.0,
            "55": 763221504.0,
            "56": 763221504.0,
            "57": 763221504.0,
            "58": 763221504.0,
            "59": 763221504.0,
            "60": 763221504.0,
            "61": 763221504.0,
            "62": 763221504.0,
            "63": 763221504.0,
            "64": 763221504.0,
            "65": 763221504.0,
            "66": 763221504.0,
            "67": 763221504.0,
            "68": 763221504.0,
            "69": 763221504.0,
            "70": 763221504.0,
            "71": 763221504.0,
            "72": 763221504.0,
            "73": 763221504.0,
            "74": 763221504.0,
            "75": 763221504.0,
            "76": 763221504.0,
            "77": 763221504.0,
            "78": 763221504.0,
            "79": 763221504.0,
            "80": 763221504.0,
            "81": 763221504.0,
            "82": 763221504.0,
            "83": 763221504.0,
            "84": 763221504.0,
            "85": 763221504.0,
            "86": 763221504.0,
            "87": 763221504.0,
            "88": 763221504.0,
            "89": 763221504.0,
            "90": 763221504.0,
            "91": 763221504.0,
            "92": 763221504.0,
            "93": 763221504.0,
            "94": 763221504.0,
            "95": 763221504.0,
            "96": 763221504.0,
            "97": 763221504.0,
            "98": 763221504.0,
            "99": 763221504.0,
            "100": 763221504.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2682096640.0,
            "52": 2682097664.0,
            "53": 2682097664.0,
            "54": 2682097664.0,
            "55": 2682097664.0,
            "56": 2682097664.0,
            "57": 2682097664.0,
            "58": 2682097664.0,
            "59": 2682097664.0,
            "60": 2682097664.0,
            "61": 2682097664.0,
            "62": 2682097664.0,
            "63": 2682097664.0,
            "64": 2682097664.0,
            "65": 2682097664.0,
            "66": 2682097664.0,
            "67": 2682097664.0,
            "68": 2682097664.0,
            "69": 2682097664.0,
            "70": 2682097664.0,
            "71": 2682097664.0,
            "72": 2682097664.0,
            "73": 2682097664.0,
            "74": 2682097664.0,
            "75": 2682097664.0,
            "76": 2682097664.0,
            "77": 2682097664.0,
            "78": 2682097664.0,
            "79": 2682097664.0,
            "80": 2682097664.0,
            "81": 2682097664.0,
            "82": 2682097664.0,
            "83": 2682097664.0,
            "84": 2682097664.0,
            "85": 2682097664.0,
            "86": 2682097664.0,
            "87": 2682097664.0,
            "88": 2682097664.0,
            "89": 2682097664.0,
            "90": 2682097664.0,
            "91": 2682097664.0,
            "92": 2682097664.0,
            "93": 2682097664.0,
            "94": 2682097664.0,
            "95": 2682097664.0,
            "96": 2682097664.0,
            "97": 2682097664.0,
            "98": 2682097664.0,
            "99": 2682097664.0,
            "100": 2682097664.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": "nan",
            "52": 3.06236,
            "53": 0.13884,
            "54": 0.12077,
            "55": 0.12587,
            "56": 0.12115,
            "57": 0.12166,
            "58": 0.12353,
            "59": 0.1247,
            "60": 0.12221,
            "61": 0.12159,
            "62": 0.12136,
            "63": 0.13043,
            "64": 0.12973,
            "65": 0.13067,
            "66": 0.14918,
            "67": 0.11954,
            "68": 0.11631,
            "69": 0.11511,
            "70": 0.11621,
            "71": 0.11553,
            "72": 0.11537,
            "73": 0.11691,
            "74": 0.11875,
            "75": 0.11769,
            "76": 0.11586,
            "77": 0.11847,
            "78": 0.11896,
            "79": 0.11697,
            "80": 0.11854,
            "81": 0.11758,
            "82": 0.11531,
            "83": 0.11776,
            "84": 0.11613,
            "85": 0.11822,
            "86": 0.11858,
            "87": 0.11763,
            "88": 0.11691,
            "89": 0.11931,
            "90": 0.11678,
            "91": 0.11601,
            "92": 0.11377,
            "93": 0.11692,
            "94": 0.11741,
            "95": 0.11634,
            "96": 0.1145,
            "97": 0.12011,
            "98": 0.11722,
            "99": 0.11609,
            "100": 0.11641
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.84517,
            "2": 10.85349,
            "3": 10.8539,
            "4": 10.83825,
            "5": 10.87427,
            "6": 10.89307,
            "7": 10.85454,
            "8": 10.8626,
            "9": 10.86468,
            "10": 10.82907,
            "11": 10.88789,
            "12": 10.87095,
            "13": 10.87916,
            "14": 10.89079,
            "15": 10.81974,
            "16": 10.83162,
            "17": 10.79863,
            "18": 10.81667,
            "19": 10.81919,
            "20": 10.727,
            "21": 10.70594,
            "22": 10.56364,
            "23": 10.72802,
            "24": 10.60832,
            "25": 10.55217,
            "26": 10.60845,
            "27": 10.62847,
            "28": 10.5831,
            "29": 10.60012,
            "30": 10.36614,
            "31": 10.12044,
            "32": 10.47684,
            "33": 10.46873,
            "34": 10.22319,
            "35": 10.2778,
            "36": 10.22892,
            "37": 10.35949,
            "38": 10.19371,
            "39": 10.4155,
            "40": 10.0976,
            "41": 10.15737,
            "42": 10.22396,
            "43": 9.83286,
            "44": 9.96916,
            "45": 9.84196,
            "46": 9.83045,
            "47": 10.15628,
            "48": 9.85484,
            "49": 9.54086,
            "50": 9.9125,
            "51": 9.8587,
            "52": 9.74287,
            "53": 10.06647,
            "54": 9.95168,
            "55": 9.88096,
            "56": 9.62625,
            "57": 9.47766,
            "58": 9.8335,
            "59": 9.58522,
            "60": 9.50125,
            "61": 9.69186,
            "62": 9.98858,
            "63": 9.38478,
            "64": 9.78027,
            "65": 8.94761,
            "66": 9.70857,
            "67": 9.36847,
            "68": 9.78438,
            "69": 9.79407,
            "70": 9.7424,
            "71": 9.61808,
            "72": 9.58427,
            "73": 9.50347,
            "74": 8.9422,
            "75": 9.42532,
            "76": 9.07407,
            "77": 10.06351,
            "78": 9.7208,
            "79": 9.37296,
            "80": 9.40396,
            "81": 9.48168,
            "82": 9.69778,
            "83": 9.30711,
            "84": 9.41712,
            "85": 9.61405,
            "86": 9.07618,
            "87": 9.59088,
            "88": 9.7464,
            "89": 9.59987,
            "90": 9.81418,
            "91": 9.33775,
            "92": 9.35372,
            "93": 9.07397,
            "94": 8.8317,
            "95": 9.5173,
            "96": 9.52412,
            "97": 9.30995,
            "98": 9.66807,
            "99": 8.8859,
            "100": 9.39541
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1655.0,
            "2": 1697.0,
            "3": 1724.0,
            "4": 1720.0,
            "5": 1803.0,
            "6": 1772.0,
            "7": 1811.0,
            "8": 1678.0,
            "9": 1828.0,
            "10": 1448.0,
            "11": 1890.0,
            "12": 1657.0,
            "13": 1852.0,
            "14": 1717.0,
            "15": 1879.0,
            "16": 1921.0,
            "17": 1666.0,
            "18": 1729.0,
            "19": 1767.0,
            "20": 1657.0,
            "21": 1827.0,
            "22": 1594.0,
            "23": 1918.0,
            "24": 1622.0,
            "25": 1625.0,
            "26": 1649.0,
            "27": 1788.0,
            "28": 2030.0,
            "29": 1980.0,
            "30": 1882.0,
            "31": 1564.0,
            "32": 1918.0,
            "33": 2045.0,
            "34": 1884.0,
            "35": 1954.0,
            "36": 1910.0,
            "37": 2267.0,
            "38": 2195.0,
            "39": 2346.0,
            "40": 2191.0,
            "41": 2171.0,
            "42": 2246.0,
            "43": 1997.0,
            "44": 2156.0,
            "45": 2091.0,
            "46": 2439.0,
            "47": 2539.0,
            "48": 2418.0,
            "49": 2207.0,
            "50": 2189.0,
            "51": 2608.0,
            "52": 2444.0,
            "53": 2898.0,
            "54": 2664.0,
            "55": 2325.0,
            "56": 2614.0,
            "57": 2394.0,
            "58": 2812.0,
            "59": 2771.0,
            "60": 2361.0,
            "61": 2855.0,
            "62": 2675.0,
            "63": 2393.0,
            "64": 3014.0,
            "65": 2673.0,
            "66": 3051.0,
            "67": 2657.0,
            "68": 2662.0,
            "69": 2736.0,
            "70": 3139.0,
            "71": 2943.0,
            "72": 2293.0,
            "73": 2908.0,
            "74": 1887.0,
            "75": 2519.0,
            "76": 3060.0,
            "77": 3191.0,
            "78": 3211.0,
            "79": 3081.0,
            "80": 3205.0,
            "81": 3563.0,
            "82": 3201.0,
            "83": 2614.0,
            "84": 3162.0,
            "85": 3209.0,
            "86": 2660.0,
            "87": 3729.0,
            "88": 3002.0,
            "89": 3160.0,
            "90": 3168.0,
            "91": 2753.0,
            "92": 3258.0,
            "93": 2617.0,
            "94": 3341.0,
            "95": 3261.0,
            "96": 3370.0,
            "97": 3163.0,
            "98": 3566.0,
            "99": 3179.0,
            "100": 3135.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 765318656.0,
            "2": 765318656.0,
            "3": 765318656.0,
            "4": 765318656.0,
            "5": 765318656.0,
            "6": 765318656.0,
            "7": 765318656.0,
            "8": 765318656.0,
            "9": 765318656.0,
            "10": 765318656.0,
            "11": 765318656.0,
            "12": 765318656.0,
            "13": 765318656.0,
            "14": 765318656.0,
            "15": 765318656.0,
            "16": 765318656.0,
            "17": 765318656.0,
            "18": 765318656.0,
            "19": 765318656.0,
            "20": 765318656.0,
            "21": 765318656.0,
            "22": 765318656.0,
            "23": 765318656.0,
            "24": 765318656.0,
            "25": 765318656.0,
            "26": 765318656.0,
            "27": 765318656.0,
            "28": 765318656.0,
            "29": 765318656.0,
            "30": 765318656.0,
            "31": 765318656.0,
            "32": 765318656.0,
            "33": 765318656.0,
            "34": 765318656.0,
            "35": 765318656.0,
            "36": 765318656.0,
            "37": 765318656.0,
            "38": 765318656.0,
            "39": 765318656.0,
            "40": 765318656.0,
            "41": 765318656.0,
            "42": 765318656.0,
            "43": 765318656.0,
            "44": 765318656.0,
            "45": 765318656.0,
            "46": 765318656.0,
            "47": 765318656.0,
            "48": 765318656.0,
            "49": 765318656.0,
            "50": 765318656.0,
            "51": 765318656.0,
            "52": 765318656.0,
            "53": 765318656.0,
            "54": 765318656.0,
            "55": 765318656.0,
            "56": 765318656.0,
            "57": 765318656.0,
            "58": 765318656.0,
            "59": 765318656.0,
            "60": 765318656.0,
            "61": 765318656.0,
            "62": 765318656.0,
            "63": 765318656.0,
            "64": 765318656.0,
            "65": 765318656.0,
            "66": 765318656.0,
            "67": 765318656.0,
            "68": 765318656.0,
            "69": 765318656.0,
            "70": 765318656.0,
            "71": 765318656.0,
            "72": 765318656.0,
            "73": 765318656.0,
            "74": 765318656.0,
            "75": 765318656.0,
            "76": 765318656.0,
            "77": 765318656.0,
            "78": 765318656.0,
            "79": 765318656.0,
            "80": 765318656.0,
            "81": 765318656.0,
            "82": 765318656.0,
            "83": 765318656.0,
            "84": 765318656.0,
            "85": 765318656.0,
            "86": 765318656.0,
            "87": 765318656.0,
            "88": 765318656.0,
            "89": 765318656.0,
            "90": 765318656.0,
            "91": 765318656.0,
            "92": 765318656.0,
            "93": 765318656.0,
            "94": 765318656.0,
            "95": 765318656.0,
            "96": 765318656.0,
            "97": 765318656.0,
            "98": 765318656.0,
            "99": 765318656.0,
            "100": 765318656.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2360539648.0,
            "2": 2645397504.0,
            "3": 2645397504.0,
            "4": 2645397504.0,
            "5": 2645397504.0,
            "6": 2645397504.0,
            "7": 2645397504.0,
            "8": 2645397504.0,
            "9": 2645397504.0,
            "10": 2645397504.0,
            "11": 2645397504.0,
            "12": 2645397504.0,
            "13": 2645397504.0,
            "14": 2645397504.0,
            "15": 2645397504.0,
            "16": 2645397504.0,
            "17": 2645397504.0,
            "18": 2645397504.0,
            "19": 2645397504.0,
            "20": 2645397504.0,
            "21": 2645397504.0,
            "22": 2645397504.0,
            "23": 2645397504.0,
            "24": 2645397504.0,
            "25": 2645397504.0,
            "26": 2645397504.0,
            "27": 2645397504.0,
            "28": 2645397504.0,
            "29": 2645397504.0,
            "30": 2645397504.0,
            "31": 2645397504.0,
            "32": 2645397504.0,
            "33": 2645397504.0,
            "34": 2645397504.0,
            "35": 2645397504.0,
            "36": 2645397504.0,
            "37": 2645397504.0,
            "38": 2645397504.0,
            "39": 2645397504.0,
            "40": 2645397504.0,
            "41": 2645397504.0,
            "42": 2645397504.0,
            "43": 2645397504.0,
            "44": 2645397504.0,
            "45": 2645397504.0,
            "46": 2645397504.0,
            "47": 2645397504.0,
            "48": 2645397504.0,
            "49": 2645397504.0,
            "50": 2645397504.0,
            "51": 2645397504.0,
            "52": 2645397504.0,
            "53": 2645397504.0,
            "54": 2645397504.0,
            "55": 2645397504.0,
            "56": 2645397504.0,
            "57": 2645397504.0,
            "58": 2645397504.0,
            "59": 2645397504.0,
            "60": 2645397504.0,
            "61": 2645397504.0,
            "62": 2645397504.0,
            "63": 2645397504.0,
            "64": 2645397504.0,
            "65": 2645397504.0,
            "66": 2645397504.0,
            "67": 2645397504.0,
            "68": 2645397504.0,
            "69": 2645397504.0,
            "70": 2645397504.0,
            "71": 2645397504.0,
            "72": 2645397504.0,
            "73": 2645397504.0,
            "74": 2645397504.0,
            "75": 2645397504.0,
            "76": 2645397504.0,
            "77": 2645397504.0,
            "78": 2645397504.0,
            "79": 2645397504.0,
            "80": 2645397504.0,
            "81": 2645397504.0,
            "82": 2645397504.0,
            "83": 2645397504.0,
            "84": 2645397504.0,
            "85": 2645397504.0,
            "86": 2645397504.0,
            "87": 2645397504.0,
            "88": 2645397504.0,
            "89": 2645397504.0,
            "90": 2645397504.0,
            "91": 2645397504.0,
            "92": 2645397504.0,
            "93": 2645397504.0,
            "94": 2645397504.0,
            "95": 2645397504.0,
            "96": 2645397504.0,
            "97": 2645397504.0,
            "98": 2645397504.0,
            "99": 2645397504.0,
            "100": 2645397504.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 11.89927,
            "2": 0.1153,
            "3": 0.10368,
            "4": 0.08198,
            "5": 0.0823,
            "6": 0.0813,
            "7": 0.08053,
            "8": 0.08097,
            "9": 0.08083,
            "10": 0.08105,
            "11": 0.08193,
            "12": 0.08083,
            "13": 0.08063,
            "14": 0.08095,
            "15": 0.08115,
            "16": 0.08099,
            "17": 0.08128,
            "18": 0.08134,
            "19": 0.08147,
            "20": 0.08174,
            "21": 0.08185,
            "22": 0.08175,
            "23": 0.08109,
            "24": 0.08065,
            "25": 0.08488,
            "26": 0.08433,
            "27": 0.08446,
            "28": 0.08482,
            "29": 0.08645,
            "30": 0.08469,
            "31": 0.08623,
            "32": 0.08474,
            "33": 0.08443,
            "34": 0.08442,
            "35": 0.08287,
            "36": 0.08188,
            "37": 0.08068,
            "38": 0.0808,
            "39": 0.08041,
            "40": 0.08119,
            "41": 0.08373,
            "42": 0.08116,
            "43": 0.08394,
            "44": 0.08252,
            "45": 0.08182,
            "46": 0.08217,
            "47": 0.08115,
            "48": 0.08122,
            "49": 0.08084,
            "50": 0.08062,
            "51": 0.09006,
            "52": 0.08529,
            "53": 0.08552,
            "54": 0.08335,
            "55": 0.08266,
            "56": 0.08016,
            "57": 0.08221,
            "58": 0.08,
            "59": 0.08121,
            "60": 0.08027,
            "61": 0.08342,
            "62": 0.08237,
            "63": 0.08269,
            "64": 0.0825,
            "65": 0.08238,
            "66": 0.08275,
            "67": 0.08276,
            "68": 0.08526,
            "69": 0.0814,
            "70": 0.08183,
            "71": 0.08214,
            "72": 0.08252,
            "73": 0.0824,
            "74": 0.08248,
            "75": 0.08211,
            "76": 0.0822,
            "77": 0.08148,
            "78": 0.08193,
            "79": 0.08271,
            "80": 0.082,
            "81": 0.08216,
            "82": 0.08205,
            "83": 0.0823,
            "84": 0.08236,
            "85": 0.08239,
            "86": 0.0805,
            "87": 0.07901,
            "88": 0.07985,
            "89": 0.07962,
            "90": 0.07883,
            "91": 0.07962,
            "92": 0.07909,
            "93": 0.07986,
            "94": 0.08107,
            "95": 0.08014,
            "96": 0.07993,
            "97": 0.08061,
            "98": 0.0808,
            "99": 0.07879,
            "100": 0.07901
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.81548,
            "2": 10.8208,
            "3": 10.81272,
            "4": 10.78885,
            "5": 10.85169,
            "6": 10.87038,
            "7": 10.83098,
            "8": 10.8343,
            "9": 10.83996,
            "10": 10.78686,
            "11": 10.88025,
            "12": 10.85974,
            "13": 10.86596,
            "14": 10.8782,
            "15": 10.79466,
            "16": 10.79613,
            "17": 10.76887,
            "18": 10.81076,
            "19": 10.7986,
            "20": 10.69061,
            "21": 10.67945,
            "22": 10.52114,
            "23": 10.70755,
            "24": 10.57741,
            "25": 10.52373,
            "26": 10.59597,
            "27": 10.58619,
            "28": 10.56176,
            "29": 10.56945,
            "30": 10.34573,
            "31": 10.10078,
            "32": 10.45392,
            "33": 10.4461,
            "34": 10.20721,
            "35": 10.262,
            "36": 10.21312,
            "37": 10.32468,
            "38": 10.16831,
            "39": 10.38374,
            "40": 10.07231,
            "41": 10.13763,
            "42": 10.19765,
            "43": 9.81155,
            "44": 9.93311,
            "45": 9.8104,
            "46": 9.80854,
            "47": 10.12558,
            "48": 9.82105,
            "49": 9.50764,
            "50": 9.88382,
            "51": 9.83549,
            "52": 9.72516,
            "53": 10.04799,
            "54": 9.93011,
            "55": 9.8636,
            "56": 9.60217,
            "57": 9.45187,
            "58": 9.8078,
            "59": 9.56783,
            "60": 9.47966,
            "61": 9.67984,
            "62": 9.96754,
            "63": 9.35113,
            "64": 9.75623,
            "65": 8.9318,
            "66": 9.68107,
            "67": 9.35956,
            "68": 9.76948,
            "69": 9.77492,
            "70": 9.71182,
            "71": 9.60632,
            "72": 9.57129,
            "73": 9.48392,
            "74": 8.92911,
            "75": 9.40028,
            "76": 9.07194,
            "77": 10.05252,
            "78": 9.71494,
            "79": 9.35747,
            "80": 9.38946,
            "81": 9.46791,
            "82": 9.68508,
            "83": 9.29588,
            "84": 9.40522,
            "85": 9.60163,
            "86": 9.06713,
            "87": 9.58402,
            "88": 9.73304,
            "89": 9.59526,
            "90": 9.80555,
            "91": 9.32604,
            "92": 9.35323,
            "93": 9.06915,
            "94": 8.82268,
            "95": 9.50858,
            "96": 9.51584,
            "97": 9.2976,
            "98": 9.66184,
            "99": 8.87662,
            "100": 9.39222
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1518.0,
            "2": 1697.0,
            "3": 1678.0,
            "4": 1709.0,
            "5": 1873.0,
            "6": 1786.0,
            "7": 1813.0,
            "8": 1519.0,
            "9": 1684.0,
            "10": 1417.0,
            "11": 1943.0,
            "12": 1723.0,
            "13": 1939.0,
            "14": 1759.0,
            "15": 1728.0,
            "16": 1773.0,
            "17": 1805.0,
            "18": 1702.0,
            "19": 1803.0,
            "20": 1627.0,
            "21": 1822.0,
            "22": 1748.0,
            "23": 1938.0,
            "24": 1642.0,
            "25": 1696.0,
            "26": 1760.0,
            "27": 1809.0,
            "28": 2025.0,
            "29": 1900.0,
            "30": 1902.0,
            "31": 1645.0,
            "32": 1876.0,
            "33": 2105.0,
            "34": 1881.0,
            "35": 1913.0,
            "36": 1864.0,
            "37": 2322.0,
            "38": 2194.0,
            "39": 2318.0,
            "40": 2010.0,
            "41": 2358.0,
            "42": 2155.0,
            "43": 1980.0,
            "44": 2105.0,
            "45": 2082.0,
            "46": 2221.0,
            "47": 2537.0,
            "48": 2367.0,
            "49": 2190.0,
            "50": 2352.0,
            "51": 2441.0,
            "52": 2482.0,
            "53": 2916.0,
            "54": 2550.0,
            "55": 2347.0,
            "56": 2765.0,
            "57": 2116.0,
            "58": 2968.0,
            "59": 2810.0,
            "60": 2384.0,
            "61": 2912.0,
            "62": 2554.0,
            "63": 2364.0,
            "64": 3035.0,
            "65": 2648.0,
            "66": 2979.0,
            "67": 2741.0,
            "68": 2799.0,
            "69": 3071.0,
            "70": 3098.0,
            "71": 2950.0,
            "72": 2342.0,
            "73": 2829.0,
            "74": 1840.0,
            "75": 2426.0,
            "76": 2941.0,
            "77": 3245.0,
            "78": 3272.0,
            "79": 3066.0,
            "80": 3221.0,
            "81": 3565.0,
            "82": 3162.0,
            "83": 2876.0,
            "84": 3180.0,
            "85": 3410.0,
            "86": 2778.0,
            "87": 3752.0,
            "88": 2995.0,
            "89": 3264.0,
            "90": 2940.0,
            "91": 2791.0,
            "92": 3118.0,
            "93": 2634.0,
            "94": 3464.0,
            "95": 3344.0,
            "96": 3499.0,
            "97": 3122.0,
            "98": 3568.0,
            "99": 3272.0,
            "100": 3476.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 733860352.0,
            "2": 733860352.0,
            "3": 733860352.0,
            "4": 733860352.0,
            "5": 733860352.0,
            "6": 733860352.0,
            "7": 733860352.0,
            "8": 733860352.0,
            "9": 733860352.0,
            "10": 733860352.0,
            "11": 733860352.0,
            "12": 733860352.0,
            "13": 733860352.0,
            "14": 733860352.0,
            "15": 733860352.0,
            "16": 733860352.0,
            "17": 733860352.0,
            "18": 733860352.0,
            "19": 733860352.0,
            "20": 733860352.0,
            "21": 733860352.0,
            "22": 733860352.0,
            "23": 733860352.0,
            "24": 733860352.0,
            "25": 733860352.0,
            "26": 733860352.0,
            "27": 733860352.0,
            "28": 733860352.0,
            "29": 733860352.0,
            "30": 733860352.0,
            "31": 733860352.0,
            "32": 733860352.0,
            "33": 733860352.0,
            "34": 733860352.0,
            "35": 733860352.0,
            "36": 733860352.0,
            "37": 733860352.0,
            "38": 733860352.0,
            "39": 733860352.0,
            "40": 733860352.0,
            "41": 733860352.0,
            "42": 733860352.0,
            "43": 733860352.0,
            "44": 733860352.0,
            "45": 733860352.0,
            "46": 733860352.0,
            "47": 733860352.0,
            "48": 733860352.0,
            "49": 733860352.0,
            "50": 733860352.0,
            "51": 733860352.0,
            "52": 733860352.0,
            "53": 733860352.0,
            "54": 733860352.0,
            "55": 733860352.0,
            "56": 733860352.0,
            "57": 733860352.0,
            "58": 733860352.0,
            "59": 733860352.0,
            "60": 733860352.0,
            "61": 733860352.0,
            "62": 733860352.0,
            "63": 733860352.0,
            "64": 733860352.0,
            "65": 733860352.0,
            "66": 733860352.0,
            "67": 733860352.0,
            "68": 733860352.0,
            "69": 733860352.0,
            "70": 733860352.0,
            "71": 733860352.0,
            "72": 733860352.0,
            "73": 733860352.0,
            "74": 733860352.0,
            "75": 733860352.0,
            "76": 733860352.0,
            "77": 733860352.0,
            "78": 733860352.0,
            "79": 733860352.0,
            "80": 733860352.0,
            "81": 733860352.0,
            "82": 733860352.0,
            "83": 733860352.0,
            "84": 733860352.0,
            "85": 733860352.0,
            "86": 733860352.0,
            "87": 733860352.0,
            "88": 733860352.0,
            "89": 733860352.0,
            "90": 733860352.0,
            "91": 733860352.0,
            "92": 733860352.0,
            "93": 733860352.0,
            "94": 733860352.0,
            "95": 733860352.0,
            "96": 733860352.0,
            "97": 733860352.0,
            "98": 733860352.0,
            "99": 733860352.0,
            "100": 733860352.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2367879168.0,
            "2": 2651687936.0,
            "3": 2651687936.0,
            "4": 2651687936.0,
            "5": 2651687936.0,
            "6": 2651687936.0,
            "7": 2651687936.0,
            "8": 2651687936.0,
            "9": 2651687936.0,
            "10": 2651687936.0,
            "11": 2651687936.0,
            "12": 2651687936.0,
            "13": 2651687936.0,
            "14": 2651687936.0,
            "15": 2651687936.0,
            "16": 2651687936.0,
            "17": 2651687936.0,
            "18": 2651687936.0,
            "19": 2651687936.0,
            "20": 2651687936.0,
            "21": 2651687936.0,
            "22": 2651687936.0,
            "23": 2651687936.0,
            "24": 2651687936.0,
            "25": 2651687936.0,
            "26": 2651687936.0,
            "27": 2651687936.0,
            "28": 2651687936.0,
            "29": 2651687936.0,
            "30": 2651687936.0,
            "31": 2651687936.0,
            "32": 2651687936.0,
            "33": 2651687936.0,
            "34": 2651687936.0,
            "35": 2651687936.0,
            "36": 2651687936.0,
            "37": 2651687936.0,
            "38": 2651687936.0,
            "39": 2651687936.0,
            "40": 2651687936.0,
            "41": 2651687936.0,
            "42": 2651687936.0,
            "43": 2651687936.0,
            "44": 2651687936.0,
            "45": 2651687936.0,
            "46": 2651687936.0,
            "47": 2651687936.0,
            "48": 2651687936.0,
            "49": 2651687936.0,
            "50": 2651687936.0,
            "51": 2651687936.0,
            "52": 2651687936.0,
            "53": 2651687936.0,
            "54": 2651687936.0,
            "55": 2651687936.0,
            "56": 2651687936.0,
            "57": 2651687936.0,
            "58": 2651687936.0,
            "59": 2651687936.0,
            "60": 2651687936.0,
            "61": 2651687936.0,
            "62": 2651687936.0,
            "63": 2651687936.0,
            "64": 2651687936.0,
            "65": 2651687936.0,
            "66": 2651687936.0,
            "67": 2651687936.0,
            "68": 2651687936.0,
            "69": 2651687936.0,
            "70": 2651687936.0,
            "71": 2651687936.0,
            "72": 2651687936.0,
            "73": 2651687936.0,
            "74": 2651687936.0,
            "75": 2651687936.0,
            "76": 2651687936.0,
            "77": 2651687936.0,
            "78": 2651687936.0,
            "79": 2651687936.0,
            "80": 2651687936.0,
            "81": 2651687936.0,
            "82": 2651687936.0,
            "83": 2651687936.0,
            "84": 2651687936.0,
            "85": 2651687936.0,
            "86": 2651687936.0,
            "87": 2651687936.0,
            "88": 2651687936.0,
            "89": 2651687936.0,
            "90": 2651687936.0,
            "91": 2651687936.0,
            "92": 2651687936.0,
            "93": 2651687936.0,
            "94": 2651687936.0,
            "95": 2651687936.0,
            "96": 2651687936.0,
            "97": 2651687936.0,
            "98": 2651687936.0,
            "99": 2651687936.0,
            "100": 2651687936.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 12.1008,
            "2": 0.16051,
            "3": 0.12978,
            "4": 0.12855,
            "5": 0.12836,
            "6": 0.12718,
            "7": 0.12817,
            "8": 0.12827,
            "9": 0.12773,
            "10": 0.12934,
            "11": 0.1284,
            "12": 0.1278,
            "13": 0.12824,
            "14": 0.12897,
            "15": 0.12788,
            "16": 0.12662,
            "17": 0.12751,
            "18": 0.12678,
            "19": 0.12784,
            "20": 0.12756,
            "21": 0.12782,
            "22": 0.12765,
            "23": 0.12695,
            "24": 0.12621,
            "25": 0.12639,
            "26": 0.12652,
            "27": 0.1261,
            "28": 0.12599,
            "29": 0.12679,
            "30": 0.12648,
            "31": 0.12791,
            "32": 0.1267,
            "33": 0.12736,
            "34": 0.1275,
            "35": 0.12674,
            "36": 0.12623,
            "37": 0.12561,
            "38": 0.12629,
            "39": 0.12735,
            "40": 0.12739,
            "41": 0.12784,
            "42": 0.12763,
            "43": 0.12841,
            "44": 0.12666,
            "45": 0.12797,
            "46": 0.12722,
            "47": 0.12583,
            "48": 0.1271,
            "49": 0.12675,
            "50": 0.12769,
            "51": 0.13852,
            "52": 0.1338,
            "53": 0.1334,
            "54": 0.13466,
            "55": 0.13471,
            "56": 0.13266,
            "57": 0.13116,
            "58": 0.13405,
            "59": 0.13356,
            "60": 0.13304,
            "61": 0.13328,
            "62": 0.13243,
            "63": 0.13255,
            "64": 0.13344,
            "65": 0.13433,
            "66": 0.13486,
            "67": 0.13338,
            "68": 0.13313,
            "69": 0.13327,
            "70": 0.1324,
            "71": 0.13325,
            "72": 0.13418,
            "73": 0.1341,
            "74": 0.1334,
            "75": 0.13238,
            "76": 0.13198,
            "77": 0.13412,
            "78": 0.1335,
            "79": 0.13208,
            "80": 0.13334,
            "81": 0.13338,
            "82": 0.13187,
            "83": 0.13324,
            "84": 0.13268,
            "85": 0.13362,
            "86": 0.13282,
            "87": 0.13325,
            "88": 0.13348,
            "89": 0.13361,
            "90": 0.13267,
            "91": 0.13322,
            "92": 0.13404,
            "93": 0.13424,
            "94": 0.13249,
            "95": 0.1323,
            "96": 0.13217,
            "97": 0.16026,
            "98": 0.13491,
            "99": 0.13704,
            "100": 0.13716
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.81548,
            "2": 10.8208,
            "3": 10.81272,
            "4": 10.78885,
            "5": 10.85169,
            "6": 10.87038,
            "7": 10.83098,
            "8": 10.8343,
            "9": 10.83996,
            "10": 10.78686,
            "11": 10.88025,
            "12": 10.85974,
            "13": 10.86596,
            "14": 10.8782,
            "15": 10.79466,
            "16": 10.79613,
            "17": 10.76887,
            "18": 10.81076,
            "19": 10.7986,
            "20": 10.69061,
            "21": 10.67945,
            "22": 10.52114,
            "23": 10.70755,
            "24": 10.57741,
            "25": 10.52373,
            "26": 10.59597,
            "27": 10.58619,
            "28": 10.56176,
            "29": 10.56945,
            "30": 10.34573,
            "31": 10.10078,
            "32": 10.45392,
            "33": 10.4461,
            "34": 10.20721,
            "35": 10.262,
            "36": 10.21312,
            "37": 10.32468,
            "38": 10.16831,
            "39": 10.38374,
            "40": 10.07231,
            "41": 10.13763,
            "42": 10.19765,
            "43": 9.81155,
            "44": 9.93311,
            "45": 9.8104,
            "46": 9.80854,
            "47": 10.12558,
            "48": 9.82105,
            "49": 9.50764,
            "50": 9.88382,
            "51": 9.83549,
            "52": 9.72516,
            "53": 10.04799,
            "54": 9.93011,
            "55": 9.8636,
            "56": 9.60217,
            "57": 9.45187,
            "58": 9.8078,
            "59": 9.56783,
            "60": 9.47966,
            "61": 9.67984,
            "62": 9.96754,
            "63": 9.35113,
            "64": 9.75623,
            "65": 8.9318,
            "66": 9.68107,
            "67": 9.35956,
            "68": 9.76948,
            "69": 9.77492,
            "70": 9.71182,
            "71": 9.60632,
            "72": 9.57129,
            "73": 9.48392,
            "74": 8.92911,
            "75": 9.40028,
            "76": 9.07194,
            "77": 10.05252,
            "78": 9.71494,
            "79": 9.35747,
            "80": 9.38946,
            "81": 9.46791,
            "82": 9.68508,
            "83": 9.29588,
            "84": 9.40522,
            "85": 9.60163,
            "86": 9.06713,
            "87": 9.58402,
            "88": 9.73304,
            "89": 9.59526,
            "90": 9.80555,
            "91": 9.32604,
            "92": 9.35323,
            "93": 9.06915,
            "94": 8.82268,
            "95": 9.50858,
            "96": 9.51584,
            "97": 9.2976,
            "98": 9.66184,
            "99": 8.87662,
            "100": 9.39222
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1518.0,
            "2": 1697.0,
            "3": 1678.0,
            "4": 1709.0,
            "5": 1873.0,
            "6": 1786.0,
            "7": 1813.0,
            "8": 1519.0,
            "9": 1684.0,
            "10": 1417.0,
            "11": 1943.0,
            "12": 1723.0,
            "13": 1939.0,
            "14": 1759.0,
            "15": 1728.0,
            "16": 1773.0,
            "17": 1805.0,
            "18": 1702.0,
            "19": 1803.0,
            "20": 1627.0,
            "21": 1822.0,
            "22": 1748.0,
            "23": 1938.0,
            "24": 1642.0,
            "25": 1696.0,
            "26": 1760.0,
            "27": 1809.0,
            "28": 2025.0,
            "29": 1900.0,
            "30": 1902.0,
            "31": 1645.0,
            "32": 1876.0,
            "33": 2105.0,
            "34": 1881.0,
            "35": 1913.0,
            "36": 1864.0,
            "37": 2322.0,
            "38": 2194.0,
            "39": 2318.0,
            "40": 2010.0,
            "41": 2358.0,
            "42": 2155.0,
            "43": 1980.0,
            "44": 2105.0,
            "45": 2082.0,
            "46": 2221.0,
            "47": 2537.0,
            "48": 2367.0,
            "49": 2190.0,
            "50": 2352.0,
            "51": 2441.0,
            "52": 2482.0,
            "53": 2916.0,
            "54": 2550.0,
            "55": 2347.0,
            "56": 2765.0,
            "57": 2116.0,
            "58": 2968.0,
            "59": 2810.0,
            "60": 2384.0,
            "61": 2912.0,
            "62": 2554.0,
            "63": 2364.0,
            "64": 3035.0,
            "65": 2648.0,
            "66": 2979.0,
            "67": 2741.0,
            "68": 2799.0,
            "69": 3071.0,
            "70": 3098.0,
            "71": 2950.0,
            "72": 2342.0,
            "73": 2829.0,
            "74": 1840.0,
            "75": 2426.0,
            "76": 2941.0,
            "77": 3245.0,
            "78": 3272.0,
            "79": 3066.0,
            "80": 3221.0,
            "81": 3565.0,
            "82": 3162.0,
            "83": 2876.0,
            "84": 3180.0,
            "85": 3410.0,
            "86": 2778.0,
            "87": 3752.0,
            "88": 2995.0,
            "89": 3264.0,
            "90": 2940.0,
            "91": 2791.0,
            "92": 3118.0,
            "93": 2634.0,
            "94": 3464.0,
            "95": 3344.0,
            "96": 3499.0,
            "97": 3122.0,
            "98": 3568.0,
            "99": 3272.0,
            "100": 3476.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 733860352.0,
            "2": 733860352.0,
            "3": 733860352.0,
            "4": 733860352.0,
            "5": 733860352.0,
            "6": 733860352.0,
            "7": 733860352.0,
            "8": 733860352.0,
            "9": 733860352.0,
            "10": 733860352.0,
            "11": 733860352.0,
            "12": 733860352.0,
            "13": 733860352.0,
            "14": 733860352.0,
            "15": 733860352.0,
            "16": 733860352.0,
            "17": 733860352.0,
            "18": 733860352.0,
            "19": 733860352.0,
            "20": 733860352.0,
            "21": 733860352.0,
            "22": 733860352.0,
            "23": 733860352.0,
            "24": 733860352.0,
            "25": 733860352.0,
            "26": 733860352.0,
            "27": 733860352.0,
            "28": 733860352.0,
            "29": 733860352.0,
            "30": 733860352.0,
            "31": 733860352.0,
            "32": 733860352.0,
            "33": 733860352.0,
            "34": 733860352.0,
            "35": 733860352.0,
            "36": 733860352.0,
            "37": 733860352.0,
            "38": 733860352.0,
            "39": 733860352.0,
            "40": 733860352.0,
            "41": 733860352.0,
            "42": 733860352.0,
            "43": 733860352.0,
            "44": 733860352.0,
            "45": 733860352.0,
            "46": 733860352.0,
            "47": 733860352.0,
            "48": 733860352.0,
            "49": 733860352.0,
            "50": 733860352.0,
            "51": 733860352.0,
            "52": 733860352.0,
            "53": 733860352.0,
            "54": 733860352.0,
            "55": 733860352.0,
            "56": 733860352.0,
            "57": 733860352.0,
            "58": 733860352.0,
            "59": 733860352.0,
            "60": 733860352.0,
            "61": 733860352.0,
            "62": 733860352.0,
            "63": 733860352.0,
            "64": 733860352.0,
            "65": 733860352.0,
            "66": 733860352.0,
            "67": 733860352.0,
            "68": 733860352.0,
            "69": 733860352.0,
            "70": 733860352.0,
            "71": 733860352.0,
            "72": 733860352.0,
            "73": 733860352.0,
            "74": 733860352.0,
            "75": 733860352.0,
            "76": 733860352.0,
            "77": 733860352.0,
            "78": 733860352.0,
            "79": 733860352.0,
            "80": 733860352.0,
            "81": 733860352.0,
            "82": 733860352.0,
            "83": 733860352.0,
            "84": 733860352.0,
            "85": 733860352.0,
            "86": 733860352.0,
            "87": 733860352.0,
            "88": 733860352.0,
            "89": 733860352.0,
            "90": 733860352.0,
            "91": 733860352.0,
            "92": 733860352.0,
            "93": 733860352.0,
            "94": 733860352.0,
            "95": 733860352.0,
            "96": 733860352.0,
            "97": 733860352.0,
            "98": 733860352.0,
            "99": 733860352.0,
            "100": 733860352.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2368927744.0,
            "2": 2651687936.0,
            "3": 2651687936.0,
            "4": 2651687936.0,
            "5": 2651687936.0,
            "6": 2651687936.0,
            "7": 2651687936.0,
            "8": 2651687936.0,
            "9": 2651687936.0,
            "10": 2651687936.0,
            "11": 2651687936.0,
            "12": 2651687936.0,
            "13": 2651687936.0,
            "14": 2651687936.0,
            "15": 2651687936.0,
            "16": 2651687936.0,
            "17": 2651687936.0,
            "18": 2651687936.0,
            "19": 2651687936.0,
            "20": 2651687936.0,
            "21": 2651687936.0,
            "22": 2651687936.0,
            "23": 2651687936.0,
            "24": 2651687936.0,
            "25": 2651687936.0,
            "26": 2651687936.0,
            "27": 2651687936.0,
            "28": 2651687936.0,
            "29": 2651687936.0,
            "30": 2651687936.0,
            "31": 2651687936.0,
            "32": 2651687936.0,
            "33": 2651687936.0,
            "34": 2651687936.0,
            "35": 2651687936.0,
            "36": 2651687936.0,
            "37": 2651687936.0,
            "38": 2651687936.0,
            "39": 2651687936.0,
            "40": 2651687936.0,
            "41": 2651687936.0,
            "42": 2651687936.0,
            "43": 2651687936.0,
            "44": 2651687936.0,
            "45": 2651687936.0,
            "46": 2651687936.0,
            "47": 2651687936.0,
            "48": 2651687936.0,
            "49": 2651687936.0,
            "50": 2651687936.0,
            "51": 2651687936.0,
            "52": 2651687936.0,
            "53": 2651687936.0,
            "54": 2651687936.0,
            "55": 2651687936.0,
            "56": 2651687936.0,
            "57": 2651687936.0,
            "58": 2651687936.0,
            "59": 2651687936.0,
            "60": 2651687936.0,
            "61": 2651687936.0,
            "62": 2651687936.0,
            "63": 2651687936.0,
            "64": 2651687936.0,
            "65": 2651687936.0,
            "66": 2651687936.0,
            "67": 2651687936.0,
            "68": 2651687936.0,
            "69": 2651687936.0,
            "70": 2651687936.0,
            "71": 2651687936.0,
            "72": 2651687936.0,
            "73": 2651687936.0,
            "74": 2651687936.0,
            "75": 2651687936.0,
            "76": 2651687936.0,
            "77": 2651687936.0,
            "78": 2651687936.0,
            "79": 2651687936.0,
            "80": 2651687936.0,
            "81": 2651687936.0,
            "82": 2651687936.0,
            "83": 2651687936.0,
            "84": 2651687936.0,
            "85": 2651687936.0,
            "86": 2651687936.0,
            "87": 2651687936.0,
            "88": 2651687936.0,
            "89": 2651687936.0,
            "90": 2651687936.0,
            "91": 2651687936.0,
            "92": 2651687936.0,
            "93": 2651687936.0,
            "94": 2651687936.0,
            "95": 2651687936.0,
            "96": 2651687936.0,
            "97": 2651687936.0,
            "98": 2651687936.0,
            "99": 2651687936.0,
            "100": 2651687936.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 5.40788,
            "2": 0.15608,
            "3": 0.1477,
            "4": 0.13403,
            "5": 0.13382,
            "6": 0.13308,
            "7": 0.1344,
            "8": 0.13063,
            "9": 0.12991,
            "10": 0.13084,
            "11": 0.13107,
            "12": 0.13009,
            "13": 0.13035,
            "14": 0.13027,
            "15": 0.13037,
            "16": 0.1302,
            "17": 0.12981,
            "18": 0.12893,
            "19": 0.12914,
            "20": 0.12893,
            "21": 0.12912,
            "22": 0.1334,
            "23": 0.13093,
            "24": 0.13133,
            "25": 0.13036,
            "26": 0.13026,
            "27": 0.13063,
            "28": 0.13046,
            "29": 0.13311,
            "30": 0.13167,
            "31": 0.13145,
            "32": 0.13051,
            "33": 0.13072,
            "34": 0.1308,
            "35": 0.13145,
            "36": 0.13046,
            "37": 0.13066,
            "38": 0.13075,
            "39": 0.13108,
            "40": 0.1305,
            "41": 0.13132,
            "42": 0.1308,
            "43": 0.13149,
            "44": 0.13097,
            "45": 0.13099,
            "46": 0.13204,
            "47": 0.13136,
            "48": 0.13051,
            "49": 0.13073,
            "50": 0.13055,
            "51": 0.1389,
            "52": 0.13184,
            "53": 0.13181,
            "54": 0.13087,
            "55": 0.13152,
            "56": 0.13181,
            "57": 0.13138,
            "58": 0.13134,
            "59": 0.13133,
            "60": 0.13251,
            "61": 0.13157,
            "62": 0.13187,
            "63": 0.13183,
            "64": 0.13133,
            "65": 0.13157,
            "66": 0.13239,
            "67": 0.13213,
            "68": 0.13166,
            "69": 0.13128,
            "70": 0.13118,
            "71": 0.13129,
            "72": 0.1319,
            "73": 0.13204,
            "74": 0.13343,
            "75": 0.13119,
            "76": 0.13129,
            "77": 0.13116,
            "78": 0.13092,
            "79": 0.13228,
            "80": 0.13183,
            "81": 0.13133,
            "82": 0.13205,
            "83": 0.13189,
            "84": 0.13312,
            "85": 0.13289,
            "86": 0.13578,
            "87": 0.13422,
            "88": 0.1347,
            "89": 0.13466,
            "90": 0.13428,
            "91": 0.13512,
            "92": 0.13241,
            "93": 0.12996,
            "94": 0.1315,
            "95": 0.12919,
            "96": 0.12806,
            "97": 0.12848,
            "98": 0.12922,
            "99": 0.12714,
            "100": 0.12757
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgx_a100_2nd.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 9.83549,
            "52": 9.72516,
            "53": 10.04799,
            "54": 9.93011,
            "55": 9.8636,
            "56": 9.60217,
            "57": 9.45187,
            "58": 9.8078,
            "59": 9.56783,
            "60": 9.47966,
            "61": 9.67984,
            "62": 9.96754,
            "63": 9.35113,
            "64": 9.75623,
            "65": 8.9318,
            "66": 9.68107,
            "67": 9.35956,
            "68": 9.76948,
            "69": 9.77492,
            "70": 9.71182,
            "71": 9.60632,
            "72": 9.57129,
            "73": 9.48392,
            "74": 8.92911,
            "75": 9.40028,
            "76": 9.07194,
            "77": 10.05252,
            "78": 9.71494,
            "79": 9.35747,
            "80": 9.38946,
            "81": 9.46791,
            "82": 9.68508,
            "83": 9.29588,
            "84": 9.40522,
            "85": 9.60163,
            "86": 9.06713,
            "87": 9.58402,
            "88": 9.73304,
            "89": 9.59526,
            "90": 9.80555,
            "91": 9.32604,
            "92": 9.35323,
            "93": 9.06915,
            "94": 8.82268,
            "95": 9.50858,
            "96": 9.51584,
            "97": 9.2976,
            "98": 9.66184,
            "99": 8.87662,
            "100": 9.39222
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2441.0,
            "52": 2482.0,
            "53": 2916.0,
            "54": 2550.0,
            "55": 2347.0,
            "56": 2765.0,
            "57": 2116.0,
            "58": 2968.0,
            "59": 2810.0,
            "60": 2384.0,
            "61": 2912.0,
            "62": 2554.0,
            "63": 2364.0,
            "64": 3035.0,
            "65": 2648.0,
            "66": 2979.0,
            "67": 2741.0,
            "68": 2799.0,
            "69": 3071.0,
            "70": 3098.0,
            "71": 2950.0,
            "72": 2342.0,
            "73": 2829.0,
            "74": 1840.0,
            "75": 2426.0,
            "76": 2941.0,
            "77": 3245.0,
            "78": 3272.0,
            "79": 3066.0,
            "80": 3221.0,
            "81": 3565.0,
            "82": 3162.0,
            "83": 2876.0,
            "84": 3180.0,
            "85": 3410.0,
            "86": 2778.0,
            "87": 3752.0,
            "88": 2995.0,
            "89": 3264.0,
            "90": 2940.0,
            "91": 2791.0,
            "92": 3118.0,
            "93": 2634.0,
            "94": 3464.0,
            "95": 3344.0,
            "96": 3499.0,
            "97": 3122.0,
            "98": 3568.0,
            "99": 3272.0,
            "100": 3476.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 733860352.0,
            "52": 733860352.0,
            "53": 733860352.0,
            "54": 733860352.0,
            "55": 733860352.0,
            "56": 733860352.0,
            "57": 733860352.0,
            "58": 733860352.0,
            "59": 733860352.0,
            "60": 733860352.0,
            "61": 733860352.0,
            "62": 733860352.0,
            "63": 733860352.0,
            "64": 733860352.0,
            "65": 733860352.0,
            "66": 733860352.0,
            "67": 733860352.0,
            "68": 733860352.0,
            "69": 733860352.0,
            "70": 733860352.0,
            "71": 733860352.0,
            "72": 733860352.0,
            "73": 733860352.0,
            "74": 733860352.0,
            "75": 733860352.0,
            "76": 733860352.0,
            "77": 733860352.0,
            "78": 733860352.0,
            "79": 733860352.0,
            "80": 733860352.0,
            "81": 733860352.0,
            "82": 733860352.0,
            "83": 733860352.0,
            "84": 733860352.0,
            "85": 733860352.0,
            "86": 733860352.0,
            "87": 733860352.0,
            "88": 733860352.0,
            "89": 733860352.0,
            "90": 733860352.0,
            "91": 733860352.0,
            "92": 733860352.0,
            "93": 733860352.0,
            "94": 733860352.0,
            "95": 733860352.0,
            "96": 733860352.0,
            "97": 733860352.0,
            "98": 733860352.0,
            "99": 733860352.0,
            "100": 733860352.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 2652735488.0,
            "52": 2652736512.0,
            "53": 2652736512.0,
            "54": 2652736512.0,
            "55": 2652736512.0,
            "56": 2652736512.0,
            "57": 2652736512.0,
            "58": 2652736512.0,
            "59": 2652736512.0,
            "60": 2652736512.0,
            "61": 2652736512.0,
            "62": 2652736512.0,
            "63": 2652736512.0,
            "64": 2652736512.0,
            "65": 2652736512.0,
            "66": 2652736512.0,
            "67": 2652736512.0,
            "68": 2652736512.0,
            "69": 2652736512.0,
            "70": 2652736512.0,
            "71": 2652736512.0,
            "72": 2652736512.0,
            "73": 2652736512.0,
            "74": 2652736512.0,
            "75": 2652736512.0,
            "76": 2652736512.0,
            "77": 2652736512.0,
            "78": 2652736512.0,
            "79": 2652736512.0,
            "80": 2652736512.0,
            "81": 2652736512.0,
            "82": 2652736512.0,
            "83": 2652736512.0,
            "84": 2652736512.0,
            "85": 2652736512.0,
            "86": 2652736512.0,
            "87": 2652736512.0,
            "88": 2652736512.0,
            "89": 2652736512.0,
            "90": 2652736512.0,
            "91": 2652736512.0,
            "92": 2652736512.0,
            "93": 2652736512.0,
            "94": 2652736512.0,
            "95": 2652736512.0,
            "96": 2652736512.0,
            "97": 2652736512.0,
            "98": 2652736512.0,
            "99": 2652736512.0,
            "100": 2652736512.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": "nan",
            "3": "nan",
            "4": "nan",
            "5": "nan",
            "6": "nan",
            "7": "nan",
            "8": "nan",
            "9": "nan",
            "10": "nan",
            "11": "nan",
            "12": "nan",
            "13": "nan",
            "14": "nan",
            "15": "nan",
            "16": "nan",
            "17": "nan",
            "18": "nan",
            "19": "nan",
            "20": "nan",
            "21": "nan",
            "22": "nan",
            "23": "nan",
            "24": "nan",
            "25": "nan",
            "26": "nan",
            "27": "nan",
            "28": "nan",
            "29": "nan",
            "30": "nan",
            "31": "nan",
            "32": "nan",
            "33": "nan",
            "34": "nan",
            "35": "nan",
            "36": "nan",
            "37": "nan",
            "38": "nan",
            "39": "nan",
            "40": "nan",
            "41": "nan",
            "42": "nan",
            "43": "nan",
            "44": "nan",
            "45": "nan",
            "46": "nan",
            "47": "nan",
            "48": "nan",
            "49": "nan",
            "50": "nan",
            "51": 6.95149,
            "52": 0.16138,
            "53": 0.14143,
            "54": 0.14147,
            "55": 0.14039,
            "56": 0.14065,
            "57": 0.14197,
            "58": 0.14092,
            "59": 0.13304,
            "60": 0.1316,
            "61": 0.13067,
            "62": 0.13101,
            "63": 0.13087,
            "64": 0.13347,
            "65": 0.13501,
            "66": 0.13486,
            "67": 0.13415,
            "68": 0.13402,
            "69": 0.1339,
            "70": 0.1332,
            "71": 0.13414,
            "72": 0.13291,
            "73": 0.1334,
            "74": 0.13397,
            "75": 0.13253,
            "76": 0.13314,
            "77": 0.13317,
            "78": 0.13335,
            "79": 0.13316,
            "80": 0.13312,
            "81": 0.13302,
            "82": 0.13404,
            "83": 0.13393,
            "84": 0.13355,
            "85": 0.13237,
            "86": 0.13361,
            "87": 0.13268,
            "88": 0.13156,
            "89": 0.13245,
            "90": 0.13179,
            "91": 0.13173,
            "92": 0.13158,
            "93": 0.13204,
            "94": 0.1318,
            "95": 0.13972,
            "96": 0.13128,
            "97": 0.12988,
            "98": 0.13091,
            "99": 0.13155,
            "100": 0.1314
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgxa100_dracooci-ord.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.82005,
            "2": 10.81907,
            "3": 10.81396,
            "4": 10.78497,
            "5": 10.85284,
            "6": 10.87449,
            "7": 10.83201,
            "8": 10.83297,
            "9": 10.83935,
            "10": 10.78455,
            "11": 10.87798,
            "12": 10.86112,
            "13": 10.86444,
            "14": 10.87605,
            "15": 10.79229,
            "16": 10.79509,
            "17": 10.76768,
            "18": 10.81005,
            "19": 10.79719,
            "20": 10.69211,
            "21": 10.68164,
            "22": 10.52085,
            "23": 10.70893,
            "24": 10.57599,
            "25": 10.52412,
            "26": 10.59517,
            "27": 10.58426,
            "28": 10.56233,
            "29": 10.57013,
            "30": 10.34552,
            "31": 10.10049,
            "32": 10.45378,
            "33": 10.44627,
            "34": 10.20606,
            "35": 10.26239,
            "36": 10.21239,
            "37": 10.32522,
            "38": 10.16777,
            "39": 10.38334,
            "40": 10.07241,
            "41": 10.13863,
            "42": 10.19814,
            "43": 9.81073,
            "44": 9.93244,
            "45": 9.81101,
            "46": 9.80877,
            "47": 10.12608,
            "48": 9.82108,
            "49": 9.50625,
            "50": 9.88422,
            "51": 9.83655,
            "52": 9.72542,
            "53": 10.04681,
            "54": 9.93029,
            "55": 9.86374,
            "56": 9.60187,
            "57": 9.4509,
            "58": 9.80848,
            "59": 9.56669,
            "60": 9.47965,
            "61": 9.67901,
            "62": 9.96739,
            "63": 9.35162,
            "64": 9.75606,
            "65": 8.93063,
            "66": 9.68053,
            "67": 9.35888,
            "68": 9.76985,
            "69": 9.77496,
            "70": 9.71215,
            "71": 9.60754,
            "72": 9.57085,
            "73": 9.48404,
            "74": 8.92823,
            "75": 9.40048,
            "76": 9.07196,
            "77": 10.05227,
            "78": 9.71519,
            "79": 9.35769,
            "80": 9.39077,
            "81": 9.46749,
            "82": 9.68504,
            "83": 9.29553,
            "84": 9.40532,
            "85": 9.60141,
            "86": 9.06774,
            "87": 9.585,
            "88": 9.73363,
            "89": 9.59519,
            "90": 9.80501,
            "91": 9.3255,
            "92": 9.35331,
            "93": 9.06981,
            "94": 8.82231,
            "95": 9.50816,
            "96": 9.51534,
            "97": 9.29772,
            "98": 9.66202,
            "99": 8.87692,
            "100": 9.3924
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1559.0,
            "2": 1591.0,
            "3": 1727.0,
            "4": 1835.0,
            "5": 1840.0,
            "6": 1719.0,
            "7": 1740.0,
            "8": 1591.0,
            "9": 1839.0,
            "10": 1380.0,
            "11": 1856.0,
            "12": 1693.0,
            "13": 1906.0,
            "14": 1757.0,
            "15": 1848.0,
            "16": 1791.0,
            "17": 1752.0,
            "18": 1669.0,
            "19": 1722.0,
            "20": 1601.0,
            "21": 1900.0,
            "22": 1662.0,
            "23": 2006.0,
            "24": 1597.0,
            "25": 1635.0,
            "26": 1709.0,
            "27": 1931.0,
            "28": 2043.0,
            "29": 1888.0,
            "30": 1936.0,
            "31": 1550.0,
            "32": 1913.0,
            "33": 2135.0,
            "34": 1703.0,
            "35": 1908.0,
            "36": 1953.0,
            "37": 2291.0,
            "38": 2210.0,
            "39": 2334.0,
            "40": 2100.0,
            "41": 2300.0,
            "42": 2236.0,
            "43": 1897.0,
            "44": 1993.0,
            "45": 2098.0,
            "46": 2298.0,
            "47": 2504.0,
            "48": 2356.0,
            "49": 2268.0,
            "50": 2333.0,
            "51": 2487.0,
            "52": 2422.0,
            "53": 2969.0,
            "54": 2698.0,
            "55": 2260.0,
            "56": 2773.0,
            "57": 2153.0,
            "58": 2903.0,
            "59": 2750.0,
            "60": 2399.0,
            "61": 2943.0,
            "62": 2646.0,
            "63": 2470.0,
            "64": 2952.0,
            "65": 2656.0,
            "66": 3077.0,
            "67": 2683.0,
            "68": 2841.0,
            "69": 3047.0,
            "70": 3077.0,
            "71": 2947.0,
            "72": 2446.0,
            "73": 2719.0,
            "74": 1886.0,
            "75": 2547.0,
            "76": 2983.0,
            "77": 3150.0,
            "78": 3223.0,
            "79": 3085.0,
            "80": 3315.0,
            "81": 3695.0,
            "82": 3285.0,
            "83": 2818.0,
            "84": 3328.0,
            "85": 3371.0,
            "86": 2574.0,
            "87": 3733.0,
            "88": 3046.0,
            "89": 3195.0,
            "90": 2943.0,
            "91": 2825.0,
            "92": 3086.0,
            "93": 2711.0,
            "94": 3416.0,
            "95": 3457.0,
            "96": 3408.0,
            "97": 3161.0,
            "98": 3616.0,
            "99": 3374.0,
            "100": 3292.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 733859840.0,
            "2": 733859840.0,
            "3": 733859840.0,
            "4": 733859840.0,
            "5": 733859840.0,
            "6": 733859840.0,
            "7": 733859840.0,
            "8": 733859840.0,
            "9": 733859840.0,
            "10": 733859840.0,
            "11": 733859840.0,
            "12": 733859840.0,
            "13": 733859840.0,
            "14": 733859840.0,
            "15": 733859840.0,
            "16": 733859840.0,
            "17": 733859840.0,
            "18": 733859840.0,
            "19": 733859840.0,
            "20": 733859840.0,
            "21": 733859840.0,
            "22": 733859840.0,
            "23": 733859840.0,
            "24": 733859840.0,
            "25": 733859840.0,
            "26": 733859840.0,
            "27": 733859840.0,
            "28": 733859840.0,
            "29": 733859840.0,
            "30": 733859840.0,
            "31": 733859840.0,
            "32": 733859840.0,
            "33": 733859840.0,
            "34": 733859840.0,
            "35": 733859840.0,
            "36": 733859840.0,
            "37": 733859840.0,
            "38": 733859840.0,
            "39": 733859840.0,
            "40": 733859840.0,
            "41": 733859840.0,
            "42": 733859840.0,
            "43": 733859840.0,
            "44": 733859840.0,
            "45": 733859840.0,
            "46": 733859840.0,
            "47": 733859840.0,
            "48": 733859840.0,
            "49": 733859840.0,
            "50": 733859840.0,
            "51": 733859840.0,
            "52": 733859840.0,
            "53": 733859840.0,
            "54": 733859840.0,
            "55": 733859840.0,
            "56": 733859840.0,
            "57": 733859840.0,
            "58": 733859840.0,
            "59": 733859840.0,
            "60": 733859840.0,
            "61": 733859840.0,
            "62": 733859840.0,
            "63": 733859840.0,
            "64": 733859840.0,
            "65": 733859840.0,
            "66": 733859840.0,
            "67": 733859840.0,
            "68": 733859840.0,
            "69": 733859840.0,
            "70": 733859840.0,
            "71": 733859840.0,
            "72": 733859840.0,
            "73": 733859840.0,
            "74": 733859840.0,
            "75": 733859840.0,
            "76": 733859840.0,
            "77": 733859840.0,
            "78": 733859840.0,
            "79": 733859840.0,
            "80": 733859840.0,
            "81": 733859840.0,
            "82": 733859840.0,
            "83": 733859840.0,
            "84": 733859840.0,
            "85": 733859840.0,
            "86": 733859840.0,
            "87": 733859840.0,
            "88": 733859840.0,
            "89": 733859840.0,
            "90": 733859840.0,
            "91": 733859840.0,
            "92": 733859840.0,
            "93": 733859840.0,
            "94": 733859840.0,
            "95": 733859840.0,
            "96": 733859840.0,
            "97": 733859840.0,
            "98": 733859840.0,
            "99": 733859840.0,
            "100": 733859840.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 3838895104.0,
            "2": 4122703872.0,
            "3": 4122703872.0,
            "4": 4122703872.0,
            "5": 4122703872.0,
            "6": 4122703872.0,
            "7": 4122703872.0,
            "8": 4122703872.0,
            "9": 4122703872.0,
            "10": 4122703872.0,
            "11": 4122703872.0,
            "12": 4122703872.0,
            "13": 4122703872.0,
            "14": 4122703872.0,
            "15": 4122703872.0,
            "16": 4122703872.0,
            "17": 4122703872.0,
            "18": 4122703872.0,
            "19": 4122703872.0,
            "20": 4122703872.0,
            "21": 4122703872.0,
            "22": 4122703872.0,
            "23": 4122703872.0,
            "24": 4122703872.0,
            "25": 4122703872.0,
            "26": 4122703872.0,
            "27": 4122703872.0,
            "28": 4122703872.0,
            "29": 4122703872.0,
            "30": 4122703872.0,
            "31": 4122703872.0,
            "32": 4122703872.0,
            "33": 4122703872.0,
            "34": 4122703872.0,
            "35": 4122703872.0,
            "36": 4122703872.0,
            "37": 4122703872.0,
            "38": 4122703872.0,
            "39": 4122703872.0,
            "40": 4122703872.0,
            "41": 4122703872.0,
            "42": 4122703872.0,
            "43": 4122703872.0,
            "44": 4122703872.0,
            "45": 4122703872.0,
            "46": 4122703872.0,
            "47": 4122703872.0,
            "48": 4122703872.0,
            "49": 4122703872.0,
            "50": 4122703872.0,
            "51": 4122703872.0,
            "52": 4122703872.0,
            "53": 4122703872.0,
            "54": 4122703872.0,
            "55": 4122703872.0,
            "56": 4122703872.0,
            "57": 4122703872.0,
            "58": 4122703872.0,
            "59": 4122703872.0,
            "60": 4122703872.0,
            "61": 4122703872.0,
            "62": 4122703872.0,
            "63": 4122703872.0,
            "64": 4122703872.0,
            "65": 4122703872.0,
            "66": 4122703872.0,
            "67": 4122703872.0,
            "68": 4122703872.0,
            "69": 4122703872.0,
            "70": 4122703872.0,
            "71": 4122703872.0,
            "72": 4122703872.0,
            "73": 4122703872.0,
            "74": 4122703872.0,
            "75": 4122703872.0,
            "76": 4122703872.0,
            "77": 4122703872.0,
            "78": 4122703872.0,
            "79": 4122703872.0,
            "80": 4122703872.0,
            "81": 4122703872.0,
            "82": 4122703872.0,
            "83": 4122703872.0,
            "84": 4122703872.0,
            "85": 4122703872.0,
            "86": 4122703872.0,
            "87": 4122703872.0,
            "88": 4122703872.0,
            "89": 4122703872.0,
            "90": 4122703872.0,
            "91": 4122703872.0,
            "92": 4122703872.0,
            "93": 4122703872.0,
            "94": 4122703872.0,
            "95": 4122703872.0,
            "96": 4122703872.0,
            "97": 4122703872.0,
            "98": 4122703872.0,
            "99": 4122703872.0,
            "100": 4122703872.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 20.74392,
            "2": 0.20458,
            "3": 0.17337,
            "4": 0.17372,
            "5": 0.17406,
            "6": 0.17407,
            "7": 0.1701,
            "8": 0.1709,
            "9": 0.17096,
            "10": 0.17284,
            "11": 0.17356,
            "12": 0.17143,
            "13": 0.17133,
            "14": 0.17078,
            "15": 0.17163,
            "16": 0.17206,
            "17": 0.17227,
            "18": 0.1714,
            "19": 0.17121,
            "20": 0.17143,
            "21": 0.17086,
            "22": 0.17241,
            "23": 0.17251,
            "24": 0.17165,
            "25": 0.17082,
            "26": 0.17042,
            "27": 0.1695,
            "28": 0.17064,
            "29": 0.17259,
            "30": 0.17056,
            "31": 0.17093,
            "32": 0.16764,
            "33": 0.1668,
            "34": 0.16801,
            "35": 0.1684,
            "36": 0.1676,
            "37": 0.16666,
            "38": 0.16729,
            "39": 0.16578,
            "40": 0.16707,
            "41": 0.16873,
            "42": 0.16705,
            "43": 0.16817,
            "44": 0.16766,
            "45": 0.16793,
            "46": 0.16745,
            "47": 0.16825,
            "48": 0.16561,
            "49": 0.16693,
            "50": 0.167,
            "51": 0.17408,
            "52": 0.17381,
            "53": 0.17359,
            "54": 0.17167,
            "55": 0.17219,
            "56": 0.17329,
            "57": 0.17468,
            "58": 0.17336,
            "59": 0.17436,
            "60": 0.17289,
            "61": 0.17216,
            "62": 0.17277,
            "63": 0.17306,
            "64": 0.17382,
            "65": 0.17362,
            "66": 0.1721,
            "67": 0.17256,
            "68": 0.17189,
            "69": 0.17201,
            "70": 0.17356,
            "71": 0.1728,
            "72": 0.17241,
            "73": 0.17349,
            "74": 0.17357,
            "75": 0.17454,
            "76": 0.17395,
            "77": 0.17253,
            "78": 0.17295,
            "79": 0.17219,
            "80": 0.1746,
            "81": 0.17297,
            "82": 0.1742,
            "83": 0.17306,
            "84": 0.17236,
            "85": 0.17328,
            "86": 0.17434,
            "87": 0.17285,
            "88": 0.17502,
            "89": 0.17257,
            "90": 0.1726,
            "91": 0.17295,
            "92": 0.17284,
            "93": 0.17452,
            "94": 0.17398,
            "95": 0.17312,
            "96": 0.1727,
            "97": 0.17207,
            "98": 0.17436,
            "99": 0.17586,
            "100": 0.17341
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_lts_dgxa100_dracooci.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.82005,
            "2": 10.81907,
            "3": 10.81396,
            "4": 10.78497,
            "5": 10.85284,
            "6": 10.87449,
            "7": 10.83201,
            "8": 10.83297,
            "9": 10.83935,
            "10": 10.78455,
            "11": 10.87798,
            "12": 10.86112,
            "13": 10.86444,
            "14": 10.87605,
            "15": 10.79229,
            "16": 10.79509,
            "17": 10.76768,
            "18": 10.81005,
            "19": 10.79719,
            "20": 10.69211,
            "21": 10.68164,
            "22": 10.52085,
            "23": 10.70893,
            "24": 10.57599,
            "25": 10.52412,
            "26": 10.59517,
            "27": 10.58426,
            "28": 10.56233,
            "29": 10.57013,
            "30": 10.34552,
            "31": 10.10049,
            "32": 10.45378,
            "33": 10.44627,
            "34": 10.20606,
            "35": 10.26239,
            "36": 10.21239,
            "37": 10.32522,
            "38": 10.16777,
            "39": 10.38334,
            "40": 10.07241,
            "41": 10.13863,
            "42": 10.19814,
            "43": 9.81073,
            "44": 9.93244,
            "45": 9.81101,
            "46": 9.80877,
            "47": 10.12608,
            "48": 9.82108,
            "49": 9.50625,
            "50": 9.88422,
            "51": 9.83655,
            "52": 9.72542,
            "53": 10.04681,
            "54": 9.93029,
            "55": 9.86374,
            "56": 9.60187,
            "57": 9.4509,
            "58": 9.80848,
            "59": 9.56669,
            "60": 9.47965,
            "61": 9.67901,
            "62": 9.96739,
            "63": 9.35162,
            "64": 9.75606,
            "65": 8.93063,
            "66": 9.68053,
            "67": 9.35888,
            "68": 9.76985,
            "69": 9.77496,
            "70": 9.71215,
            "71": 9.60754,
            "72": 9.57085,
            "73": 9.48404,
            "74": 8.92823,
            "75": 9.40048,
            "76": 9.07196,
            "77": 10.05227,
            "78": 9.71519,
            "79": 9.35769,
            "80": 9.39077,
            "81": 9.46749,
            "82": 9.68504,
            "83": 9.29553,
            "84": 9.40532,
            "85": 9.60141,
            "86": 9.06774,
            "87": 9.585,
            "88": 9.73363,
            "89": 9.59519,
            "90": 9.80501,
            "91": 9.3255,
            "92": 9.35331,
            "93": 9.06981,
            "94": 8.82231,
            "95": 9.50816,
            "96": 9.51534,
            "97": 9.29772,
            "98": 9.66202,
            "99": 8.87692,
            "100": 9.3924
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1559.0,
            "2": 1591.0,
            "3": 1727.0,
            "4": 1835.0,
            "5": 1840.0,
            "6": 1719.0,
            "7": 1740.0,
            "8": 1591.0,
            "9": 1839.0,
            "10": 1380.0,
            "11": 1856.0,
            "12": 1693.0,
            "13": 1906.0,
            "14": 1757.0,
            "15": 1848.0,
            "16": 1791.0,
            "17": 1752.0,
            "18": 1669.0,
            "19": 1722.0,
            "20": 1601.0,
            "21": 1900.0,
            "22": 1662.0,
            "23": 2006.0,
            "24": 1597.0,
            "25": 1635.0,
            "26": 1709.0,
            "27": 1931.0,
            "28": 2043.0,
            "29": 1888.0,
            "30": 1936.0,
            "31": 1550.0,
            "32": 1913.0,
            "33": 2135.0,
            "34": 1703.0,
            "35": 1908.0,
            "36": 1953.0,
            "37": 2291.0,
            "38": 2210.0,
            "39": 2334.0,
            "40": 2100.0,
            "41": 2300.0,
            "42": 2236.0,
            "43": 1897.0,
            "44": 1993.0,
            "45": 2098.0,
            "46": 2298.0,
            "47": 2504.0,
            "48": 2356.0,
            "49": 2268.0,
            "50": 2333.0,
            "51": 2487.0,
            "52": 2422.0,
            "53": 2969.0,
            "54": 2698.0,
            "55": 2260.0,
            "56": 2773.0,
            "57": 2153.0,
            "58": 2903.0,
            "59": 2750.0,
            "60": 2399.0,
            "61": 2943.0,
            "62": 2646.0,
            "63": 2470.0,
            "64": 2952.0,
            "65": 2656.0,
            "66": 3077.0,
            "67": 2683.0,
            "68": 2841.0,
            "69": 3047.0,
            "70": 3077.0,
            "71": 2947.0,
            "72": 2446.0,
            "73": 2719.0,
            "74": 1886.0,
            "75": 2547.0,
            "76": 2983.0,
            "77": 3150.0,
            "78": 3223.0,
            "79": 3085.0,
            "80": 3315.0,
            "81": 3695.0,
            "82": 3285.0,
            "83": 2818.0,
            "84": 3328.0,
            "85": 3371.0,
            "86": 2574.0,
            "87": 3733.0,
            "88": 3046.0,
            "89": 3195.0,
            "90": 2943.0,
            "91": 2825.0,
            "92": 3086.0,
            "93": 2711.0,
            "94": 3416.0,
            "95": 3457.0,
            "96": 3408.0,
            "97": 3161.0,
            "98": 3616.0,
            "99": 3374.0,
            "100": 3292.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 733859840.0,
            "2": 733859840.0,
            "3": 733859840.0,
            "4": 733859840.0,
            "5": 733859840.0,
            "6": 733859840.0,
            "7": 733859840.0,
            "8": 733859840.0,
            "9": 733859840.0,
            "10": 733859840.0,
            "11": 733859840.0,
            "12": 733859840.0,
            "13": 733859840.0,
            "14": 733859840.0,
            "15": 733859840.0,
            "16": 733859840.0,
            "17": 733859840.0,
            "18": 733859840.0,
            "19": 733859840.0,
            "20": 733859840.0,
            "21": 733859840.0,
            "22": 733859840.0,
            "23": 733859840.0,
            "24": 733859840.0,
            "25": 733859840.0,
            "26": 733859840.0,
            "27": 733859840.0,
            "28": 733859840.0,
            "29": 733859840.0,
            "30": 733859840.0,
            "31": 733859840.0,
            "32": 733859840.0,
            "33": 733859840.0,
            "34": 733859840.0,
            "35": 733859840.0,
            "36": 733859840.0,
            "37": 733859840.0,
            "38": 733859840.0,
            "39": 733859840.0,
            "40": 733859840.0,
            "41": 733859840.0,
            "42": 733859840.0,
            "43": 733859840.0,
            "44": 733859840.0,
            "45": 733859840.0,
            "46": 733859840.0,
            "47": 733859840.0,
            "48": 733859840.0,
            "49": 733859840.0,
            "50": 733859840.0,
            "51": 733859840.0,
            "52": 733859840.0,
            "53": 733859840.0,
            "54": 733859840.0,
            "55": 733859840.0,
            "56": 733859840.0,
            "57": 733859840.0,
            "58": 733859840.0,
            "59": 733859840.0,
            "60": 733859840.0,
            "61": 733859840.0,
            "62": 733859840.0,
            "63": 733859840.0,
            "64": 733859840.0,
            "65": 733859840.0,
            "66": 733859840.0,
            "67": 733859840.0,
            "68": 733859840.0,
            "69": 733859840.0,
            "70": 733859840.0,
            "71": 733859840.0,
            "72": 733859840.0,
            "73": 733859840.0,
            "74": 733859840.0,
            "75": 733859840.0,
            "76": 733859840.0,
            "77": 733859840.0,
            "78": 733859840.0,
            "79": 733859840.0,
            "80": 733859840.0,
            "81": 733859840.0,
            "82": 733859840.0,
            "83": 733859840.0,
            "84": 733859840.0,
            "85": 733859840.0,
            "86": 733859840.0,
            "87": 733859840.0,
            "88": 733859840.0,
            "89": 733859840.0,
            "90": 733859840.0,
            "91": 733859840.0,
            "92": 733859840.0,
            "93": 733859840.0,
            "94": 733859840.0,
            "95": 733859840.0,
            "96": 733859840.0,
            "97": 733859840.0,
            "98": 733859840.0,
            "99": 733859840.0,
            "100": 733859840.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 3838895104.0,
            "2": 4122703872.0,
            "3": 4122703872.0,
            "4": 4122703872.0,
            "5": 4122703872.0,
            "6": 4122703872.0,
            "7": 4122703872.0,
            "8": 4122703872.0,
            "9": 4122703872.0,
            "10": 4122703872.0,
            "11": 4122703872.0,
            "12": 4122703872.0,
            "13": 4122703872.0,
            "14": 4122703872.0,
            "15": 4122703872.0,
            "16": 4122703872.0,
            "17": 4122703872.0,
            "18": 4122703872.0,
            "19": 4122703872.0,
            "20": 4122703872.0,
            "21": 4122703872.0,
            "22": 4122703872.0,
            "23": 4122703872.0,
            "24": 4122703872.0,
            "25": 4122703872.0,
            "26": 4122703872.0,
            "27": 4122703872.0,
            "28": 4122703872.0,
            "29": 4122703872.0,
            "30": 4122703872.0,
            "31": 4122703872.0,
            "32": 4122703872.0,
            "33": 4122703872.0,
            "34": 4122703872.0,
            "35": 4122703872.0,
            "36": 4122703872.0,
            "37": 4122703872.0,
            "38": 4122703872.0,
            "39": 4122703872.0,
            "40": 4122703872.0,
            "41": 4122703872.0,
            "42": 4122703872.0,
            "43": 4122703872.0,
            "44": 4122703872.0,
            "45": 4122703872.0,
            "46": 4122703872.0,
            "47": 4122703872.0,
            "48": 4122703872.0,
            "49": 4122703872.0,
            "50": 4122703872.0,
            "51": 4122703872.0,
            "52": 4122703872.0,
            "53": 4122703872.0,
            "54": 4122703872.0,
            "55": 4122703872.0,
            "56": 4122703872.0,
            "57": 4122703872.0,
            "58": 4122703872.0,
            "59": 4122703872.0,
            "60": 4122703872.0,
            "61": 4122703872.0,
            "62": 4122703872.0,
            "63": 4122703872.0,
            "64": 4122703872.0,
            "65": 4122703872.0,
            "66": 4122703872.0,
            "67": 4122703872.0,
            "68": 4122703872.0,
            "69": 4122703872.0,
            "70": 4122703872.0,
            "71": 4122703872.0,
            "72": 4122703872.0,
            "73": 4122703872.0,
            "74": 4122703872.0,
            "75": 4122703872.0,
            "76": 4122703872.0,
            "77": 4122703872.0,
            "78": 4122703872.0,
            "79": 4122703872.0,
            "80": 4122703872.0,
            "81": 4122703872.0,
            "82": 4122703872.0,
            "83": 4122703872.0,
            "84": 4122703872.0,
            "85": 4122703872.0,
            "86": 4122703872.0,
            "87": 4122703872.0,
            "88": 4122703872.0,
            "89": 4122703872.0,
            "90": 4122703872.0,
            "91": 4122703872.0,
            "92": 4122703872.0,
            "93": 4122703872.0,
            "94": 4122703872.0,
            "95": 4122703872.0,
            "96": 4122703872.0,
            "97": 4122703872.0,
            "98": 4122703872.0,
            "99": 4122703872.0,
            "100": 4122703872.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 21.63875,
            "2": 0.20787,
            "3": 0.17721,
            "4": 0.17658,
            "5": 0.17528,
            "6": 0.17173,
            "7": 0.17222,
            "8": 0.17098,
            "9": 0.16832,
            "10": 0.16824,
            "11": 0.16991,
            "12": 0.16843,
            "13": 0.42886,
            "14": 0.16771,
            "15": 0.16923,
            "16": 0.16925,
            "17": 0.16721,
            "18": 0.16835,
            "19": 0.16585,
            "20": 0.16956,
            "21": 0.16767,
            "22": 0.16714,
            "23": 0.16974,
            "24": 0.16792,
            "25": 0.16824,
            "26": 0.16516,
            "27": 0.16767,
            "28": 0.16689,
            "29": 0.16698,
            "30": 0.16729,
            "31": 0.16513,
            "32": 0.1676,
            "33": 0.16825,
            "34": 0.16806,
            "35": 0.16705,
            "36": 0.16629,
            "37": 0.16592,
            "38": 0.16499,
            "39": 0.16482,
            "40": 0.1659,
            "41": 0.167,
            "42": 0.16751,
            "43": 0.16596,
            "44": 0.16515,
            "45": 0.1666,
            "46": 0.17084,
            "47": 0.16836,
            "48": 0.16826,
            "49": 0.16977,
            "50": 0.16743,
            "51": 0.17999,
            "52": 0.17241,
            "53": 0.17103,
            "54": 0.17085,
            "55": 0.17395,
            "56": 0.17509,
            "57": 0.17396,
            "58": 0.1719,
            "59": 0.171,
            "60": 0.17345,
            "61": 0.16946,
            "62": 0.17066,
            "63": 0.17284,
            "64": 0.17167,
            "65": 0.17007,
            "66": 0.17279,
            "67": 0.17225,
            "68": 0.17054,
            "69": 0.17013,
            "70": 0.16853,
            "71": 0.17021,
            "72": 0.17001,
            "73": 0.17136,
            "74": 0.17139,
            "75": 0.17396,
            "76": 0.17179,
            "77": 0.1705,
            "78": 0.17116,
            "79": 0.17303,
            "80": 0.17196,
            "81": 0.17269,
            "82": 0.16795,
            "83": 0.16966,
            "84": 0.17044,
            "85": 0.17085,
            "86": 0.17338,
            "87": 0.1704,
            "88": 0.17066,
            "89": 0.16954,
            "90": 0.16994,
            "91": 0.17172,
            "92": 0.17222,
            "93": 0.17163,
            "94": 0.17173,
            "95": 0.17012,
            "96": 0.16985,
            "97": 0.17078,
            "98": 0.17262,
            "99": 0.17354,
            "100": 0.1683
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/model_config.yaml
================================================
ENV_VARS:
  CUDA_DEVICE_MAX_CONNECTIONS: 1
  NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
  NCCL_ALGO: Ring
  CUBLAS_WORKSPACE_CONFIG: :4096:8
MODEL_ARGS:
  --num-layers: 12
  --hidden-size: 512
  --num-attention-heads: 8
  --log-params-norm: true
  --log-num-zeros-in-grad: true
  --log-validation-ppl-to-tensorboard: true
  --log-timers-to-tensorboard: true
  --tensorboard-dir: ${TENSORBOARD_PATH}
  --micro-batch-size: 4
  --global-batch-size: 32
  --seq-length: 1024
  --max-position-embeddings: 1024
  --train-iters: 100
  --timing-log-level: 0
  --lr-decay-iters: 320000
  --save: ${CHECKPOINT_SAVE_PATH}
  --load: ${CHECKPOINT_LOAD_PATH}
  --data-path: ${DATA_PATH}/text/the_pile/shard00/my-gpt3_00_text_document
  --vocab-file: ${DATA_PATH}/text/the_pile/shard00/bpe/vocab.json
  --merge-file: ${DATA_PATH}/text/the_pile/shard00/bpe/merges.txt
  --split: 949,50,1
  --distributed-backend: nccl
  --lr: 0.00015
  --lr-decay-style: cosine
  --min-lr: 1.0e-5
  --weight-decay: 1e-2
  --clip-grad: 1.0
  --lr-warmup-fraction: .01
  --log-interval: 1
  --save-interval: 50
  --eval-interval: 1000
  --eval-iters: 10
  --transformer-impl: transformer_engine
  --tensor-model-parallel-size: 1
  --pipeline-model-parallel-size: 4
  --num-layers-per-virtual-pipeline-stage: 1
  --calculate-per-token-loss: true
  --deterministic-mode: true
  --no-gradient-accumulation-fusion: true
  --attention-softmax-in-fp32: true
  --use-checkpoint-opt_param-scheduler: true
  --use-mcore-models: true
  --ckpt-format: torch_dist
  --dist-ckpt-optim-fully-reshardable: true
  --dist-ckpt-strictness: log_all # backward compatibility for TE changes
  --data-cache-path: ${DATA_CACHE_PATH}
  --bf16: true
  --log-memory-to-tensorboard: true
  --async-save: true
  --use-persistent-ckpt-worker: true
TEST_TYPE: ckpt-resume


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.82555,
            "2": 10.83286,
            "3": 10.82762,
            "4": 10.79569,
            "5": 10.85695,
            "6": 10.86388,
            "7": 10.82612,
            "8": 10.82543,
            "9": 10.83589,
            "10": 10.79632,
            "11": 10.8782,
            "12": 10.85826,
            "13": 10.85425,
            "14": 10.87525,
            "15": 10.79207,
            "16": 10.80307,
            "17": 10.7744,
            "18": 10.80487,
            "19": 10.7937,
            "20": 10.69579,
            "21": 10.68655,
            "22": 10.53162,
            "23": 10.70645,
            "24": 10.5734,
            "25": 10.51531,
            "26": 10.5909,
            "27": 10.60778,
            "28": 10.57052,
            "29": 10.58974,
            "30": 10.34723,
            "31": 10.07765,
            "32": 10.4635,
            "33": 10.45722,
            "34": 10.19977,
            "35": 10.25639,
            "36": 10.21261,
            "37": 10.34715,
            "38": 10.18009,
            "39": 10.40837,
            "40": 10.07626,
            "41": 10.12969,
            "42": 10.21172,
            "43": 9.81709,
            "44": 9.94035,
            "45": 9.81749,
            "46": 9.80632,
            "47": 10.12471,
            "48": 9.84046,
            "49": 9.51013,
            "50": 9.88941,
            "51": 9.84258,
            "52": 9.72577,
            "53": 10.05974,
            "54": 9.95228,
            "55": 9.88317,
            "56": 9.61277,
            "57": 9.46223,
            "58": 9.82312,
            "59": 9.57667,
            "60": 9.48518,
            "61": 9.67881,
            "62": 9.97778,
            "63": 9.36213,
            "64": 9.75717,
            "65": 8.93497,
            "66": 9.69283,
            "67": 9.36709,
            "68": 9.78178,
            "69": 9.79453,
            "70": 9.72298,
            "71": 9.62028,
            "72": 9.56979,
            "73": 9.48099,
            "74": 8.91237,
            "75": 9.40908,
            "76": 9.06623,
            "77": 10.05809,
            "78": 9.72192,
            "79": 9.36926,
            "80": 9.40026,
            "81": 9.477,
            "82": 9.69791,
            "83": 9.30743,
            "84": 9.41493,
            "85": 9.61113,
            "86": 9.07104,
            "87": 9.59611,
            "88": 9.74908,
            "89": 9.5961,
            "90": 9.82722,
            "91": 9.33658,
            "92": 9.3558,
            "93": 9.08695,
            "94": 8.82753,
            "95": 9.53065,
            "96": 9.52762,
            "97": 9.30668,
            "98": 9.66908,
            "99": 8.89636,
            "100": 9.40522
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1622.0,
            "2": 1729.0,
            "3": 1764.0,
            "4": 1727.0,
            "5": 1879.0,
            "6": 1863.0,
            "7": 1896.0,
            "8": 1661.0,
            "9": 1798.0,
            "10": 1465.0,
            "11": 1851.0,
            "12": 1790.0,
            "13": 1974.0,
            "14": 1776.0,
            "15": 1923.0,
            "16": 1869.0,
            "17": 1814.0,
            "18": 1673.0,
            "19": 1720.0,
            "20": 1699.0,
            "21": 1893.0,
            "22": 1735.0,
            "23": 1986.0,
            "24": 1634.0,
            "25": 1621.0,
            "26": 1685.0,
            "27": 1759.0,
            "28": 1882.0,
            "29": 1990.0,
            "30": 2038.0,
            "31": 1536.0,
            "32": 1900.0,
            "33": 2086.0,
            "34": 1761.0,
            "35": 1979.0,
            "36": 1971.0,
            "37": 2325.0,
            "38": 2104.0,
            "39": 2266.0,
            "40": 2078.0,
            "41": 2184.0,
            "42": 2202.0,
            "43": 2015.0,
            "44": 2078.0,
            "45": 2015.0,
            "46": 2066.0,
            "47": 2458.0,
            "48": 2363.0,
            "49": 2175.0,
            "50": 2409.0,
            "51": 2558.0,
            "52": 2588.0,
            "53": 2829.0,
            "54": 2591.0,
            "55": 2369.0,
            "56": 2739.0,
            "57": 2309.0,
            "58": 2883.0,
            "59": 2848.0,
            "60": 2493.0,
            "61": 2888.0,
            "62": 2602.0,
            "63": 2545.0,
            "64": 2899.0,
            "65": 2710.0,
            "66": 3002.0,
            "67": 2804.0,
            "68": 2632.0,
            "69": 2956.0,
            "70": 2966.0,
            "71": 2906.0,
            "72": 2514.0,
            "73": 3054.0,
            "74": 2000.0,
            "75": 2549.0,
            "76": 2973.0,
            "77": 3043.0,
            "78": 3007.0,
            "79": 3087.0,
            "80": 2994.0,
            "81": 3419.0,
            "82": 3217.0,
            "83": 2800.0,
            "84": 3251.0,
            "85": 3154.0,
            "86": 2553.0,
            "87": 3551.0,
            "88": 3114.0,
            "89": 3200.0,
            "90": 3219.0,
            "91": 2907.0,
            "92": 3034.0,
            "93": 2797.0,
            "94": 3431.0,
            "95": 3018.0,
            "96": 3330.0,
            "97": 3017.0,
            "98": 3495.0,
            "99": 3308.0,
            "100": 3089.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 551273984.0,
            "2": 551273984.0,
            "3": 551273984.0,
            "4": 551273984.0,
            "5": 551273984.0,
            "6": 551273984.0,
            "7": 551273984.0,
            "8": 551273984.0,
            "9": 551273984.0,
            "10": 551273984.0,
            "11": 551273984.0,
            "12": 551273984.0,
            "13": 551273984.0,
            "14": 551273984.0,
            "15": 551273984.0,
            "16": 551273984.0,
            "17": 551273984.0,
            "18": 551273984.0,
            "19": 551273984.0,
            "20": 551273984.0,
            "21": 551273984.0,
            "22": 551273984.0,
            "23": 551273984.0,
            "24": 551273984.0,
            "25": 551273984.0,
            "26": 551273984.0,
            "27": 551273984.0,
            "28": 551273984.0,
            "29": 551273984.0,
            "30": 551273984.0,
            "31": 551273984.0,
            "32": 551273984.0,
            "33": 551273984.0,
            "34": 551273984.0,
            "35": 551273984.0,
            "36": 551273984.0,
            "37": 551273984.0,
            "38": 551273984.0,
            "39": 551273984.0,
            "40": 551273984.0,
            "41": 551273984.0,
            "42": 551273984.0,
            "43": 551273984.0,
            "44": 551273984.0,
            "45": 551273984.0,
            "46": 551273984.0,
            "47": 551273984.0,
            "48": 551273984.0,
            "49": 551273984.0,
            "50": 551273984.0,
            "51": 551273984.0,
            "52": 551273984.0,
            "53": 551273984.0,
            "54": 551273984.0,
            "55": 551273984.0,
            "56": 551273984.0,
            "57": 551273984.0,
            "58": 551273984.0,
            "59": 551273984.0,
            "60": 551273984.0,
            "61": 551273984.0,
            "62": 551273984.0,
            "63": 551273984.0,
            "64": 551273984.0,
            "65": 551273984.0,
            "66": 551273984.0,
            "67": 551273984.0,
            "68": 551273984.0,
            "69": 551273984.0,
            "70": 551273984.0,
            "71": 551273984.0,
            "72": 551273984.0,
            "73": 551273984.0,
            "74": 551273984.0,
            "75": 551273984.0,
            "76": 551273984.0,
            "77": 551273984.0,
            "78": 551273984.0,
            "79": 551273984.0,
            "80": 551273984.0,
            "81": 551273984.0,
            "82": 551273984.0,
            "83": 551273984.0,
            "84": 551273984.0,
            "85": 551273984.0,
            "86": 551273984.0,
            "87": 551273984.0,
            "88": 551273984.0,
            "89": 551273984.0,
            "90": 551273984.0,
            "91": 551273984.0,
            "92": 551273984.0,
            "93": 551273984.0,
            "94": 551273984.0,
            "95": 551273984.0,
            "96": 551273984.0,
            "97": 551273984.0,
            "98": 551273984.0,
            "99": 551273984.0,
            "100": 551273984.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 2328238592.0,
            "2": 2470149120.0,
            "3": 2470149120.0,
            "4": 2470149120.0,
            "5": 2470149120.0,
            "6": 2470149120.0,
            "7": 2470149120.0,
            "8": 2470149120.0,
            "9": 2470149120.0,
            "10": 2470149120.0,
            "11": 2470149120.0,
            "12": 2470149120.0,
            "13": 2470149120.0,
            "14": 2470149120.0,
            "15": 2470149120.0,
            "16": 2470149120.0,
            "17": 2470149120.0,
            "18": 2470149120.0,
            "19": 2470149120.0,
            "20": 2470149120.0,
            "21": 2470149120.0,
            "22": 2470149120.0,
            "23": 2470149120.0,
            "24": 2470149120.0,
            "25": 2470149120.0,
            "26": 2470149120.0,
            "27": 2470149120.0,
            "28": 2470149120.0,
            "29": 2470149120.0,
            "30": 2470149120.0,
            "31": 2470149120.0,
            "32": 2470149120.0,
            "33": 2470149120.0,
            "34": 2470149120.0,
            "35": 2470149120.0,
            "36": 2470149120.0,
            "37": 2470149120.0,
            "38": 2470149120.0,
            "39": 2470149120.0,
            "40": 2470149120.0,
            "41": 2470149120.0,
            "42": 2470149120.0,
            "43": 2470149120.0,
            "44": 2470149120.0,
            "45": 2470149120.0,
            "46": 2470149120.0,
            "47": 2470149120.0,
            "48": 2470149120.0,
            "49": 2470149120.0,
            "50": 2470149120.0,
            "51": 2470149120.0,
            "52": 2470149120.0,
            "53": 2470149120.0,
            "54": 2470149120.0,
            "55": 2470149120.0,
            "56": 2470149120.0,
            "57": 2470149120.0,
            "58": 2470149120.0,
            "59": 2470149120.0,
            "60": 2470149120.0,
            "61": 2470149120.0,
            "62": 2470149120.0,
            "63": 2470149120.0,
            "64": 2470149120.0,
            "65": 2470149120.0,
            "66": 2470149120.0,
            "67": 2470149120.0,
            "68": 2470149120.0,
            "69": 2470149120.0,
            "70": 2470149120.0,
            "71": 2470149120.0,
            "72": 2470149120.0,
            "73": 2470149120.0,
            "74": 2470149120.0,
            "75": 2470149120.0,
            "76": 2470149120.0,
            "77": 2470149120.0,
            "78": 2470149120.0,
            "79": 2470149120.0,
            "80": 2470149120.0,
            "81": 2470149120.0,
            "82": 2470149120.0,
            "83": 2470149120.0,
            "84": 2470149120.0,
            "85": 2470149120.0,
            "86": 2470149120.0,
            "87": 2470149120.0,
            "88": 2470149120.0,
            "89": 2470149120.0,
            "90": 2470149120.0,
            "91": 2470149120.0,
            "92": 2470149120.0,
            "93": 2470149120.0,
            "94": 2470149120.0,
            "95": 2470149120.0,
            "96": 2470149120.0,
            "97": 2470149120.0,
            "98": 2470149120.0,
            "99": 2470149120.0,
            "100": 2470149120.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": "nan",
            "2": 9.34621,
            "3": 0.13335,
            "4": 0.11754,
            "5": 0.11536,
            "6": 0.11367,
            "7": 0.11663,
            "8": 0.11385,
            "9": 0.11574,
            "10": 0.11631,
            "11": 0.11616,
            "12": 0.11786,
            "13": 0.11675,
            "14": 0.12155,
            "15": 0.11663,
            "16": 0.11781,
            "17": 0.11932,
            "18": 0.11766,
            "19": 0.11708,
            "20": 0.11635,
            "21": 0.11736,
            "22": 0.11806,
            "23": 0.11804,
            "24": 0.1169,
            "25": 0.11774,
            "26": 0.11641,
            "27": 0.11674,
            "28": 0.1177,
            "29": 0.11804,
            "30": 0.11805,
            "31": 0.11916,
            "32": 0.11895,
            "33": 0.11909,
            "34": 0.1191,
            "35": 0.11894,
            "36": 0.11897,
            "37": 0.11622,
            "38": 0.11982,
            "39": 0.1177,
            "40": 0.1197,
            "41": 0.11987,
            "42": 0.11911,
            "43": 0.11866,
            "44": 0.11971,
            "45": 0.11825,
            "46": 0.1203,
            "47": 0.11863,
            "48": 0.1192,
            "49": 0.11951,
            "50": 0.11918,
            "51": 0.13867,
            "52": 0.12177,
            "53": 0.12036,
            "54": 0.1191,
            "55": 0.11745,
            "56": 0.11631,
            "57": 0.11887,
            "58": 0.11974,
            "59": 0.12067,
            "60": 0.1174,
            "61": 0.12254,
            "62": 0.11811,
            "63": 0.11737,
            "64": 0.11568,
            "65": 0.11917,
            "66": 0.1185,
            "67": 0.1199,
            "68": 0.11894,
            "69": 0.11777,
            "70": 0.11816,
            "71": 0.11833,
            "72": 0.11801,
            "73": 0.11849,
            "74": 0.11806,
            "75": 0.11903,
            "76": 0.11812,
            "77": 0.11852,
            "78": 0.11938,
            "79": 0.1176,
            "80": 0.11841,
            "81": 0.11882,
            "82": 0.11877,
            "83": 0.11747,
            "84": 0.11992,
            "85": 0.12148,
            "86": 0.11854,
            "87": 0.1177,
            "88": 0.1201,
            "89": 0.11888,
            "90": 0.1191,
            "91": 0.11815,
            "92": 0.1186,
            "93": 0.1183,
            "94": 0.11918,
            "95": 0.13193,
            "96": 0.13346,
            "97": 0.12725,
            "98": 0.13104,
            "99": 0.12509,
            "100": 0.12079
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci.json
================================================
{
    "lm loss": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 10.82005,
            "2": 10.81907,
            "3": 10.81396,
            "4": 10.78497,
            "5": 10.85284,
            "6": 10.87449,
            "7": 10.83201,
            "8": 10.83297,
            "9": 10.83935,
            "10": 10.78455,
            "11": 10.87798,
            "12": 10.86112,
            "13": 10.86444,
            "14": 10.87605,
            "15": 10.79229,
            "16": 10.79509,
            "17": 10.76768,
            "18": 10.81006,
            "19": 10.79716,
            "20": 10.69212,
            "21": 10.68168,
            "22": 10.52085,
            "23": 10.70898,
            "24": 10.576,
            "25": 10.52413,
            "26": 10.59515,
            "27": 10.58426,
            "28": 10.56233,
            "29": 10.57012,
            "30": 10.34552,
            "31": 10.10047,
            "32": 10.45375,
            "33": 10.44623,
            "34": 10.20608,
            "35": 10.26241,
            "36": 10.2124,
            "37": 10.3252,
            "38": 10.16775,
            "39": 10.38332,
            "40": 10.07236,
            "41": 10.13863,
            "42": 10.19811,
            "43": 9.81071,
            "44": 9.93244,
            "45": 9.81098,
            "46": 9.80879,
            "47": 10.1261,
            "48": 9.82105,
            "49": 9.50626,
            "50": 9.88418,
            "51": 9.8366,
            "52": 9.7254,
            "53": 10.04687,
            "54": 9.93029,
            "55": 9.86374,
            "56": 9.60183,
            "57": 9.4509,
            "58": 9.80845,
            "59": 9.56672,
            "60": 9.47963,
            "61": 9.67901,
            "62": 9.96737,
            "63": 9.3516,
            "64": 9.75605,
            "65": 8.93065,
            "66": 9.68055,
            "67": 9.3589,
            "68": 9.76988,
            "69": 9.77495,
            "70": 9.71218,
            "71": 9.60756,
            "72": 9.57084,
            "73": 9.48407,
            "74": 8.92824,
            "75": 9.4005,
            "76": 9.07193,
            "77": 10.05226,
            "78": 9.71515,
            "79": 9.35771,
            "80": 9.39078,
            "81": 9.46751,
            "82": 9.68504,
            "83": 9.29556,
            "84": 9.4053,
            "85": 9.60138,
            "86": 9.06772,
            "87": 9.58501,
            "88": 9.73362,
            "89": 9.59515,
            "90": 9.80502,
            "91": 9.3255,
            "92": 9.35334,
            "93": 9.06984,
            "94": 8.8223,
            "95": 9.50821,
            "96": 9.51534,
            "97": 9.29768,
            "98": 9.66205,
            "99": 8.87695,
            "100": 9.3924
        }
    },
    "num-zeros": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 1559.0,
            "2": 1591.0,
            "3": 1727.0,
            "4": 1835.0,
            "5": 1840.0,
            "6": 1719.0,
            "7": 1740.0,
            "8": 1591.0,
            "9": 1839.0,
            "10": 1380.0,
            "11": 1856.0,
            "12": 1693.0,
            "13": 1906.0,
            "14": 1757.0,
            "15": 1848.0,
            "16": 1791.0,
            "17": 1729.0,
            "18": 1672.0,
            "19": 1718.0,
            "20": 1621.0,
            "21": 1931.0,
            "22": 1738.0,
            "23": 1992.0,
            "24": 1676.0,
            "25": 1689.0,
            "26": 1748.0,
            "27": 1801.0,
            "28": 1986.0,
            "29": 2043.0,
            "30": 1907.0,
            "31": 1627.0,
            "32": 1918.0,
            "33": 2003.0,
            "34": 1779.0,
            "35": 1922.0,
            "36": 1942.0,
            "37": 2294.0,
            "38": 2145.0,
            "39": 2395.0,
            "40": 2045.0,
            "41": 2415.0,
            "42": 2277.0,
            "43": 1863.0,
            "44": 2087.0,
            "45": 2097.0,
            "46": 2265.0,
            "47": 2436.0,
            "48": 2460.0,
            "49": 2217.0,
            "50": 2368.0,
            "51": 2552.0,
            "52": 2541.0,
            "53": 2907.0,
            "54": 2604.0,
            "55": 2383.0,
            "56": 2762.0,
            "57": 2128.0,
            "58": 3040.0,
            "59": 2797.0,
            "60": 2509.0,
            "61": 3041.0,
            "62": 2642.0,
            "63": 2401.0,
            "64": 2913.0,
            "65": 2628.0,
            "66": 2934.0,
            "67": 2791.0,
            "68": 2718.0,
            "69": 3050.0,
            "70": 3129.0,
            "71": 3014.0,
            "72": 2263.0,
            "73": 2761.0,
            "74": 1887.0,
            "75": 2552.0,
            "76": 3111.0,
            "77": 3240.0,
            "78": 3150.0,
            "79": 3139.0,
            "80": 3279.0,
            "81": 3595.0,
            "82": 3194.0,
            "83": 2797.0,
            "84": 3272.0,
            "85": 3344.0,
            "86": 2611.0,
            "87": 3802.0,
            "88": 3054.0,
            "89": 3205.0,
            "90": 2980.0,
            "91": 2726.0,
            "92": 3043.0,
            "93": 2751.0,
            "94": 3247.0,
            "95": 3324.0,
            "96": 3503.0,
            "97": 3057.0,
            "98": 3465.0,
            "99": 3320.0,
            "100": 3467.0
        }
    },
    "mem-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 522345472.0,
            "2": 522345472.0,
            "3": 522345472.0,
            "4": 522345472.0,
            "5": 522345472.0,
            "6": 522345472.0,
            "7": 522345472.0,
            "8": 522345472.0,
            "9": 522345472.0,
            "10": 522345472.0,
            "11": 522345472.0,
            "12": 522345472.0,
            "13": 522345472.0,
            "14": 522345472.0,
            "15": 522345472.0,
            "16": 522345472.0,
            "17": 522345472.0,
            "18": 522345472.0,
            "19": 522345472.0,
            "20": 522345472.0,
            "21": 522345472.0,
            "22": 522345472.0,
            "23": 522345472.0,
            "24": 522345472.0,
            "25": 522345472.0,
            "26": 522345472.0,
            "27": 522345472.0,
            "28": 522345472.0,
            "29": 522345472.0,
            "30": 522345472.0,
            "31": 522345472.0,
            "32": 522345472.0,
            "33": 522345472.0,
            "34": 522345472.0,
            "35": 522345472.0,
            "36": 522345472.0,
            "37": 522345472.0,
            "38": 522345472.0,
            "39": 522345472.0,
            "40": 522345472.0,
            "41": 522345472.0,
            "42": 522345472.0,
            "43": 522345472.0,
            "44": 522345472.0,
            "45": 522345472.0,
            "46": 522345472.0,
            "47": 522345472.0,
            "48": 522345472.0,
            "49": 522345472.0,
            "50": 522345472.0,
            "51": 522345472.0,
            "52": 522345472.0,
            "53": 522345472.0,
            "54": 522345472.0,
            "55": 522345472.0,
            "56": 522345472.0,
            "57": 522345472.0,
            "58": 522345472.0,
            "59": 522345472.0,
            "60": 522345472.0,
            "61": 522345472.0,
            "62": 522345472.0,
            "63": 522345472.0,
            "64": 522345472.0,
            "65": 522345472.0,
            "66": 522345472.0,
            "67": 522345472.0,
            "68": 522345472.0,
            "69": 522345472.0,
            "70": 522345472.0,
            "71": 522345472.0,
            "72": 522345472.0,
            "73": 522345472.0,
            "74": 522345472.0,
            "75": 522345472.0,
            "76": 522345472.0,
            "77": 522345472.0,
            "78": 522345472.0,
            "79": 522345472.0,
            "80": 522345472.0,
            "81": 522345472.0,
            "82": 522345472.0,
            "83": 522345472.0,
            "84": 522345472.0,
            "85": 522345472.0,
            "86": 522345472.0,
            "87": 522345472.0,
            "88": 522345472.0,
            "89": 522345472.0,
            "90": 522345472.0,
            "91": 522345472.0,
            "92": 522345472.0,
            "93": 522345472.0,
            "94": 522345472.0,
            "95": 522345472.0,
            "96": 522345472.0,
            "97": 522345472.0,
            "98": 522345472.0,
            "99": 522345472.0,
            "100": 522345472.0
        }
    },
    "mem-max-allocated-bytes": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 3769790464.0,
            "2": 3912107008.0,
            "3": 3912107008.0,
            "4": 3912107008.0,
            "5": 3912107008.0,
            "6": 3912107008.0,
            "7": 3912107008.0,
            "8": 3912107008.0,
            "9": 3912107008.0,
            "10": 3912107008.0,
            "11": 3912107008.0,
            "12": 3912107008.0,
            "13": 3912107008.0,
            "14": 3912107008.0,
            "15": 3912107008.0,
            "16": 3912107008.0,
            "17": 3912107008.0,
            "18": 3912107008.0,
            "19": 3912107008.0,
            "20": 3912107008.0,
            "21": 3912107008.0,
            "22": 3912107008.0,
            "23": 3912107008.0,
            "24": 3912107008.0,
            "25": 3912107008.0,
            "26": 3912107008.0,
            "27": 3912107008.0,
            "28": 3912107008.0,
            "29": 3912107008.0,
            "30": 3912107008.0,
            "31": 3912107008.0,
            "32": 3912107008.0,
            "33": 3912107008.0,
            "34": 3912107008.0,
            "35": 3912107008.0,
            "36": 3912107008.0,
            "37": 3912107008.0,
            "38": 3912107008.0,
            "39": 3912107008.0,
            "40": 3912107008.0,
            "41": 3912107008.0,
            "42": 3912107008.0,
            "43": 3912107008.0,
            "44": 3912107008.0,
            "45": 3912107008.0,
            "46": 3912107008.0,
            "47": 3912107008.0,
            "48": 3912107008.0,
            "49": 3912107008.0,
            "50": 3912107008.0,
            "51": 3912107008.0,
            "52": 3912107008.0,
            "53": 3912107008.0,
            "54": 3912107008.0,
            "55": 3912107008.0,
            "56": 3912107008.0,
            "57": 3912107008.0,
            "58": 3912107008.0,
            "59": 3912107008.0,
            "60": 3912107008.0,
            "61": 3912107008.0,
            "62": 3912107008.0,
            "63": 3912107008.0,
            "64": 3912107008.0,
            "65": 3912107008.0,
            "66": 3912107008.0,
            "67": 3912107008.0,
            "68": 3912107008.0,
            "69": 3912107008.0,
            "70": 3912107008.0,
            "71": 3912107008.0,
            "72": 3912107008.0,
            "73": 3912107008.0,
            "74": 3912107008.0,
            "75": 3912107008.0,
            "76": 3912107008.0,
            "77": 3912107008.0,
            "78": 3912107008.0,
            "79": 3912107008.0,
            "80": 3912107008.0,
            "81": 3912107008.0,
            "82": 3912107008.0,
            "83": 3912107008.0,
            "84": 3912107008.0,
            "85": 3912107008.0,
            "86": 3912107008.0,
            "87": 3912107008.0,
            "88": 3912107008.0,
            "89": 3912107008.0,
            "90": 3912107008.0,
            "91": 3912107008.0,
            "92": 3912107008.0,
            "93": 3912107008.0,
            "94": 3912107008.0,
            "95": 3912107008.0,
            "96": 3912107008.0,
            "97": 3912107008.0,
            "98": 3912107008.0,
            "99": 3912107008.0,
            "100": 3912107008.0
        }
    },
    "iteration-time": {
        "start_step": 1,
        "end_step": 100,
        "step_interval": 1,
        "values": {
            "1": 22.61328,
            "2": 0.20632,
            "3": 0.1825,
            "4": 0.17425,
            "5": 0.17426,
            "6": 0.17288,
            "7": 0.17611,
            "8": 0.17588,
            "9": 0.17544,
            "10": 0.17232,
            "11": 0.17362,
            "12": 0.17368,
            "13": 0.17578,
            "14": 0.17305,
            "15": 0.17514,
            "16": 0.17367,
            "17": 0.17474,
            "18": 0.17196,
            "19": 0.1737,
            "20": 0.17359,
            "21": 0.17277,
            "22": 0.17502,
            "23": 0.17321,
            "24": 0.172,
            "25": 0.17239,
            "26": 0.17041,
            "27": 0.17172,
            "28": 0.17178,
            "29": 0.17225,
            "30": 0.17082,
            "31": 0.17234,
            "32": 0.17192,
            "33": 0.17201,
            "34": 0.17283,
            "35": 0.17212,
            "36": 0.17393,
            "37": 0.17078,
            "38": 0.17394,
            "39": 0.17341,
            "40": 0.17259,
            "41": 0.17595,
            "42": 0.17237,
            "43": 0.17334,
            "44": 0.17079,
            "45": 0.17254,
            "46": 0.17378,
            "47": 0.17228,
            "48": 0.17193,
            "49": 0.17207,
            "50": 0.17337,
            "51": 0.18317,
            "52": 0.44439,
            "53": 0.17445,
            "54": 0.1761,
            "55": 0.17625,
            "56": 0.17729,
            "57": 0.17831,
            "58": 0.17704,
            "59": 0.17623,
            "60": 0.17946,
            "61": 0.17712,
            "62": 0.17274,
            "63": 0.17809,
            "64": 0.17585,
            "65": 0.179,
            "66": 0.17777,
            "67": 0.17718,
            "68": 0.17654,
            "69": 0.17491,
            "70": 0.17913,
            "71": 0.17578,
            "72": 0.17669,
            "73": 0.17735,
            "74": 0.17979,
            "75": 0.17759,
            "76": 0.17852,
            "77": 0.1802,
            "78": 0.17531,
            "79": 0.17834,
            "80": 0.17782,
            "81": 0.17526,
            "82": 0.17347,
            "83": 0.17511,
            "84": 0.17403,
            "85": 0.17634,
            "86": 0.1725,
            "87": 0.17606,
            "88": 0.17534,
            "89": 0.17477,
            "90": 0.17578,
            "91": 0.1753,
            "92": 0.17582,
            "93": 0.17671,
            "94": 0.17621,
            "95": 0.17573,
            "96": 0.17511,
            "97": 0.17469,
            "98": 0.17498,
            "99": 0.41864,
            "100": 0.17148
        }
    }
}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
================================================
{"lm loss": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 10.81548, "5": 10.85169, "10": 10.78686, "15": 10.79466, "20": 10.69058, "25": 10.52375, "30": 10.34575, "35": 10.26198, "40": 10.07233, "45": 9.81043, "50": 9.8838, "55": 9.86362, "60": 9.47966, "65": 8.93178, "70": 9.71179, "75": 9.4003, "80": 9.38946, "85": 9.60161, "90": 9.80559, "95": 9.50858, "100": 9.39222}}, "num-zeros": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 1518.0, "5": 1873.0, "10": 1417.0, "15": 1772.0, "20": 1655.0, "25": 1708.0, "30": 1934.0, "35": 1894.0, "40": 2091.0, "45": 2020.0, "50": 2363.0, "55": 2406.0, "60": 2307.0, "65": 2578.0, "70": 3157.0, "75": 2523.0, "80": 3213.0, "85": 3275.0, "90": 3085.0, "95": 3236.0, "100": 3458.0}}, "mem-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 521926144.0, "5": 521926144.0, "10": 521926144.0, "15": 521926144.0, "20": 521926144.0, "25": 521926144.0, "30": 521926144.0, "35": 521926144.0, "40": 521926144.0, "45": 521926144.0, "50": 521926144.0, "55": 521926144.0, "60": 521926144.0, "65": 521926144.0, "70": 521926144.0, "75": 521926144.0, "80": 521926144.0, "85": 521926144.0, "90": 521926144.0, "95": 521926144.0, "100": 521926144.0}}, "mem-max-allocated-bytes": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 2298877952.0, "5": 2440802304.0, "10": 2440802304.0, "15": 2440802304.0, "20": 2440802304.0, "25": 2440802304.0, "30": 2440802304.0, "35": 2440802304.0, "40": 2440802304.0, "45": 2440802304.0, "50": 2440802304.0, "55": 2440802304.0, "60": 2440802304.0, "65": 2440802304.0, "70": 2440802304.0, "75": 2440802304.0, "80": 2440802304.0, "85": 2440802304.0, "90": 2440802304.0, "95": 2440802304.0, "100": 2440802304.0}}, "iteration-time": {"start_step": 1, "end_step": 100, "step_interval": 5, "values": {"1": 14.68047, "5": 0.20509, "10": 0.19886, "15": 0.20047, "20": 0.20067, "25": 0.20043, "30": 0.19944, "35": 0.19968, "40": 0.19863, "45": 0.19927, "50": 0.19674, "55": 0.20218, "60": 0.20258, "65": 0.19974, "70": 0.19785, "75": 0.19872, "80": 0.19816, "85": 0.19906, "90": 0.19842, "95": 0.19999, "100": 0.20212}}}

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 15.0 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts.json
================================================
[File too large to display: 15.0 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 15.0 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 15.0 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 15.0 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_cp2_nondeterministic/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_cp2_nondeterministic/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_fsdp2_resume_torch_dist/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_gdn/model_config.yaml
================================================
[File too large to display: 2.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 3.0 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2 B]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_modelopt_distill_resume/model_config.yaml
================================================
[File too large to display: 2.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_multi_dist_optimizer_instances/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_nondeterministic/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion/model_config.yaml
================================================
[File too large to display: 1.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ddp_average_in_collective/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_defer_embedding_wgrad_compute/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_dgxc.json
================================================


================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.4 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 7.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 7.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_create_attention_mask_in_dataloader/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_no_mmap_bin_files/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.4 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.4 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts.json
================================================
[File too large to display: 14.4 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.4 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.4 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.4 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_mmap_bin_files/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2 B]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_qk_layernorm_test_mode/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgxh100_dgxc.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_frozen_resume_torch_dist_reshard_8x1xNone/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_fsdp2_resume_torch_dist_te/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 357 B]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 357 B]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_fp16/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_a100_2nd.json
================================================
[File too large to display: 13.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2_resume_torch_dist/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_dev_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4_resume_torch_dist/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_resume_torch_dist_uninstall_te/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp2_pp2_uninstall_te/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch/model_config.yaml
================================================
[File too large to display: 1.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_gb200_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_weekly_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 289.4 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_weekly_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 289.4 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_weekly_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/model_config.yaml
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_weekly_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap/tp_comm_overlap_cfg.yaml
================================================
[File too large to display: 566 B]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_weekly_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 280.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_weekly_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 287.9 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt3_weekly_mcore_tp4_cp2_current_scaling_native_fp8_tp_sp_cp_tp_overlap/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 6.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_fp8_logitsmatch/model_config.yaml
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 6.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_logitsmatch_decode_graphs_only/model_config.yaml
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_validation/cuda_graphs.py
================================================
[File too large to display: 8.2 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_validation/cuda_graphs.sh
================================================
[File too large to display: 3.0 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_cuda_graphs_validation/model_config.yaml
================================================
[File too large to display: 165 B]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 86.2 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_583m_logitsmatch/model_config.yaml
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 3.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_logitsmatch_zmq/model_config.yaml
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 27.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp1_dp8_583m_throughputtest_zmq/model_config.yaml
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 3.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp1_pp8_dp1_583m_logitsmatch_zmq/model_config.yaml
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 4.9 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp2_pp2_dp2_583m_logitsmatch_zmq/model_config.yaml
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 5.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_583m_logitsmatch/model_config.yaml
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 4.9 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_dynamic_inference_tp8_pp1_dp1_583m_logitsmatch_zmq/model_config.yaml
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_basic_function/env_config.yaml
================================================
[File too large to display: 211 B]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_basic_function/model_config.yaml
================================================
[File too large to display: 2.9 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/env_config.yaml
================================================
[File too large to display: 211 B]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 4.9 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest/model_config.yaml
================================================
[File too large to display: 2.5 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/env_config.yaml
================================================
[File too large to display: 211 B]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github/model_config.yaml
================================================
[File too large to display: 2.6 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_tp4_pp1_dp2_8b_cudagraphs_throughput/env_config.yaml
================================================
[File too large to display: 211 B]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_tp4_pp1_dp2_8b_cudagraphs_throughput/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_tp4_pp1_dp2_8b_cudagraphs_throughput/model_config.yaml
================================================
[File too large to display: 3.0 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_tp4_pp1_dp2_8b_throughput/env_config.yaml
================================================
[File too large to display: 211 B]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_tp4_pp1_dp2_8b_throughput/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_tp4_pp1_dp2_8b_throughput/model_config.yaml
================================================
[File too large to display: 2.9 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_tp4_pp1_dp2_8b_throughput_github/env_config.yaml
================================================
[File too large to display: 211 B]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_tp4_pp1_dp2_8b_throughput_github/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_grpo_tp4_pp1_dp2_8b_throughput_github/model_config.yaml
================================================
[File too large to display: 2.9 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/README.md
================================================
[File too large to display: 879 B]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 990 B]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 11.7 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/model_config.yaml
================================================
[File too large to display: 2.9 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_16b_multiprompt_tokensmatch/test_prompts.jsonl
================================================
[File too large to display: 481 B]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 3.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_cudagraphs/model_config.yaml
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 3.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_fp8_cudagraphs/model_config.yaml
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 4.0 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 3.8 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt/gpt_static_inference_tp1_pp1_583m_logitsmatch/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt-nemo/bert-nemo_340m_mr_mbs2_gbs32_mcore_te_tp2_pp2_1N8G/model_config.yaml
================================================
[File too large to display: 485 B]

================================================
FILE: tests/functional_tests/test_cases/gpt-nemo/gemma2-nemo_2b_mr_mbs1_gbs8_mcore_te_tp4_pp1_cp1_1N8G/model_config.yaml
================================================
[File too large to display: 490 B]

================================================
FILE: tests/functional_tests/test_cases/gpt-nemo/llama3-nemo_8b_mr_mbs1_gbs8_mcore_te_8experts_tp2_ep2_pp2_dgx_a100_1N8G/model_config.yaml
================================================
[File too large to display: 1.1 KB]

================================================
FILE: tests/functional_tests/test_cases/gpt-nemo/llama3-nemo_8b_mr_mbs4_gbs64_mcore_te_tp1_pp1_cp2_dgx_a100_1N8G/model_config.yaml
================================================
[File too large to display: 668 B]

================================================
FILE: tests/functional_tests/test_cases/gpt-nemo/mixtral-nemo_8x7b_mr_mbs1_gbs8_mcore_te_tp2_pp1_ep2_1N8G/model_config.yaml
================================================
[File too large to display: 633 B]

================================================
FILE: tests/functional_tests/test_cases/gpt-nemo/t5-nemo_220m_mr_mbs4_gbs64_te_tp1_pp1_1N8G/model_config.yaml
================================================
[File too large to display: 403 B]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 6.6 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m/model_config.yaml
================================================
[File too large to display: 2.7 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m_chunked_prefill/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 148.3 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_dynamic_inference_tp1_pp1_dp8_583m_chunked_prefill/model_config.yaml
================================================
[File too large to display: 9.8 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.2 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp2_vpp2_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp2_vpp2_cp1_dgx_a100_1N8G/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp1_pp4_cp1_dgx_a100_1N8G/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G/model_config.yaml
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 3.0 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_cudagraphs/model_config.yaml
================================================
[File too large to display: 2.7 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 3.0 KB]

================================================
FILE: tests/functional_tests/test_cases/hybrid/hybrid_static_inference_tp1_pp1_2B_logitsmatch/model_config.yaml
================================================
[File too large to display: 2.6 KB]

================================================
FILE: tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8/golden_values_dev.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8/model_config.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8_seq_packing/golden_values_dev.json
================================================
[File too large to display: 3.6 KB]

================================================
FILE: tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8_seq_packing/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp1_dp8_seq_packing/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp2_dp8/golden_values_dev.json
================================================
[File too large to display: 3.6 KB]

================================================
FILE: tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp2_dp8/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/mimo/mimo_vlm_pretrain_convergence_tp1_pp1_cp2_dp8/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release/model_config.yaml
================================================
[File too large to display: 4.4 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release_sm/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 356.7 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp1pp4emp16etp1cp1_release_sm/model_config.yaml
================================================
[File too large to display: 4.4 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp2pp2emp16etp1cp1_gb_200_release/model_config.yaml
================================================
[File too large to display: 4.4 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp2pp2emp16etp1cp1_gb_200_release_sm/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 356.5 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/deepseekv3_proxy_flex_tp2pp2emp16etp1cp1_gb_200_release_sm/model_config.yaml
================================================
[File too large to display: 4.5 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/mixtral_8x22b_tp2pp8ep8vpp1_release/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 5.2 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/mixtral_8x22b_tp2pp8ep8vpp1_release/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 381.0 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/mixtral_8x22b_tp2pp8ep8vpp1_release/model_config.yaml
================================================
[File too large to display: 3.0 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/mixtral_8x7b_alltoall_tp2pp4ep4_release/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 478.9 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/mixtral_8x7b_alltoall_tp2pp4ep4_release/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 445.6 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/mixtral_8x7b_alltoall_tp2pp4ep4_release/model_config.yaml
================================================
[File too large to display: 2.9 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/mixtral_8x7b_alltoall_tp2pp4ep4_release_sm/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 93.4 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/mixtral_8x7b_alltoall_tp2pp4ep4_release_sm/model_config.yaml
================================================
[File too large to display: 2.9 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/mixtral_8x7b_tp1pp4ep8vpp8_release/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 6.8 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/mixtral_8x7b_tp1pp4ep8vpp8_release/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 4.1 KB]

================================================
FILE: tests/functional_tests/test_cases/mixtral/mixtral_8x7b_tp1pp4ep8vpp8_release/model_config.yaml
================================================
[File too large to display: 3.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/deepseek_proxy_fsdp_ep2_fsdp2/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 6.2 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/deepseek_proxy_fsdp_ep2_fsdp2/model_config.yaml
================================================
[File too large to display: 3.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/deepseek_proxy_fsdp_ep2_fsdp2_1node/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/deepseek_proxy_fsdp_ep2_fsdp2_1node/model_config.yaml
================================================
[File too large to display: 3.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_dev_dgxa100_dracooci.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_dev_dgxa100_dracooci.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 3.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_frozen_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 3.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/model_config.yaml
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_groupedGEMM/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 3.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/model_config.yaml
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/model_config.yaml
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_dist_optimizer/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/model_config.yaml
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_top2router/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4/model_config.yaml
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/model_config.yaml
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 9.4 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/golden_values_dev_dgxh100_dgxc.json
================================================
[File too large to display: 9.4 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed/model_config.yaml
================================================
[File too large to display: 3.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 2.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_mtp_resume_torch_dist_fp8/model_config.yaml
================================================
[File too large to display: 4.0 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph/model_config.yaml
================================================
[File too large to display: 4.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 2.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp2_ep4_etp1_selective_recompute_experimental/model_config.yaml
================================================
[File too large to display: 4.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 3.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 2 B]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/model_config.yaml
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/model_config.yaml
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgxa100_dracooci.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_resume_torch_dist_te_4experts2parallel/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgxa100_dracooci.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci-ord.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_lts_dgxa100_dracooci.json
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_dev.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/golden_values_lts.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_resume_torch_dist_te_2experts/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon/model_config.yaml
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/golden_values_lts_dgx_a100.json
================================================


================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer/model_config.yaml
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/model_config.yaml
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 8.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 9.3 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/model_config.yaml
================================================
[File too large to display: 4.3 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 7.3 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 8.0 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/model_config.yaml
================================================
[File too large to display: 4.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgxh100_dgxc.json
================================================
[File too large to display: 14.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/model_config.yaml
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 17.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 17.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 16.6 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/model_config.yaml
================================================
[File too large to display: 2.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 6.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
================================================
[File too large to display: 3.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_etp1_pp1_ep8_16B_logitsmatch_cudagraph_zmq/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 560.3 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_etp1_pp1_ep8_16B_logitsmatch_cudagraph_zmq/model_config.yaml
================================================
[File too large to display: 3.2 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_etp1_pp1_ep8_16B_logitsmatch_zmq/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 560.3 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_etp1_pp1_ep8_16B_logitsmatch_zmq/model_config.yaml
================================================
[File too large to display: 3.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_etp1_pp1_ep8_16B_logitsmatch_zmq_suspend_resume/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 4.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_etp1_pp1_ep8_16B_logitsmatch_zmq_suspend_resume/model_config.yaml
================================================
[File too large to display: 3.0 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_etp1_pp1_ep8_16B_logitsmatch_zmq_suspend_resume/prompts.json
================================================
[File too large to display: 7.1 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 3.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
================================================
[File too large to display: 2.9 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/env_config.yaml
================================================
[File too large to display: 211 B]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 1.4 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest/model_config.yaml
================================================
[File too large to display: 4.0 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 3.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
================================================
[File too large to display: 3.0 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 3.7 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/model_config.yaml
================================================
[File too large to display: 2.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 3.8 KB]

================================================
FILE: tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/model_config.yaml
================================================
[File too large to display: 2.8 KB]

================================================
FILE: tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.4 KB]

================================================
FILE: tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.4 KB]

================================================
FILE: tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/model_config.yaml
================================================
[File too large to display: 1.4 KB]

================================================
FILE: tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/model_config.yaml
================================================
[File too large to display: 1.5 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 948 B]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 4.2 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100_2nd.json
================================================
[File too large to display: 13.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp1_pp1_vp1_resume_torch/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_dev_dgxa100_dracooci.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp2_pp1_vp1_sequence_parallel/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_te_tp4_pp1_resume_torch_dist/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1/model_config.yaml
================================================
[File too large to display: 1.5 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_a100_2nd.json
================================================
[File too large to display: 13.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_dev_dgxa100_dracooci.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp1_pp1_vp1_resume_torch/model_config.yaml
================================================
[File too large to display: 1.5 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci-ord.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_dev_dgxa100_dracooci.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp2_pp1_vp1/model_config.yaml
================================================
[File too large to display: 1.5 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json
================================================
[File too large to display: 13.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_mcore_tp4_pp1_resume_torch_dist/model_config.yaml
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_release/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 3.0 MB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_release/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 844.5 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_release/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_release_sm/golden_values_dev_dgx_gb200.json
================================================
[File too large to display: 298.5 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_release_sm/golden_values_dev_dgx_h100.json
================================================
[File too large to display: 298.6 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_release_sm/model_config.yaml
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_weekly_mcore_te_tp2_pp1_vp1/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/functional_tests/test_cases/t5/t5_weekly_mcore_te_tp2_pp1_vp1_sequence_parallel/golden_values_lts_dgx_a100.json
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/test_utils/python_scripts/approve_merge_gate.py
================================================
[File too large to display: 3.7 KB]

================================================
FILE: tests/test_utils/python_scripts/auto_reminder.py
================================================
[File too large to display: 11.3 KB]

================================================
FILE: tests/test_utils/python_scripts/auto_reminder_github.py
================================================
[File too large to display: 14.9 KB]

================================================
FILE: tests/test_utils/python_scripts/check_status_of_main.py
================================================
[File too large to display: 2.7 KB]

================================================
FILE: tests/test_utils/python_scripts/dashboard.py
================================================
[File too large to display: 6.7 KB]

================================================
FILE: tests/test_utils/python_scripts/download_coverage_results.py
================================================
[File too large to display: 3.4 KB]

================================================
FILE: tests/test_utils/python_scripts/download_golden_values.py
================================================
[File too large to display: 17.5 KB]

================================================
FILE: tests/test_utils/python_scripts/download_unit_tests_dataset.py
================================================
[File too large to display: 3.1 KB]

================================================
FILE: tests/test_utils/python_scripts/generate_jet_trigger_job.py
================================================
[File too large to display: 7.9 KB]

================================================
FILE: tests/test_utils/python_scripts/generate_local_jobs.py
================================================
[File too large to display: 3.4 KB]

================================================
FILE: tests/test_utils/python_scripts/launch_jet_workload.py
================================================
[File too large to display: 19.4 KB]

================================================
FILE: tests/test_utils/python_scripts/launch_nemo_run_workload.py
================================================
[File too large to display: 6.4 KB]

================================================
FILE: tests/test_utils/python_scripts/notify.py
================================================
[File too large to display: 3.5 KB]

================================================
FILE: tests/test_utils/python_scripts/recipe_parser.py
================================================
[File too large to display: 10.4 KB]

================================================
FILE: tests/test_utils/python_scripts/swap_pr_labels.py
================================================
[File too large to display: 12.4 KB]

================================================
FILE: tests/test_utils/python_scripts/wait_for_resources.py
================================================
[File too large to display: 2.5 KB]

================================================
FILE: tests/test_utils/recipes/_build-mcore-dev.yaml
================================================
[File too large to display: 306 B]

================================================
FILE: tests/test_utils/recipes/_build-mcore-lts.yaml
================================================
[File too large to display: 306 B]

================================================
FILE: tests/test_utils/recipes/_build-nemo.yaml
================================================
[File too large to display: 281 B]

================================================
FILE: tests/test_utils/recipes/gb200/gpt.yaml
================================================
[File too large to display: 12.5 KB]

================================================
FILE: tests/test_utils/recipes/gb200/moe-1node.yaml
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/test_utils/recipes/gb200/moe.yaml
================================================
[File too large to display: 8.3 KB]

================================================
FILE: tests/test_utils/recipes/gb200/unit-tests.yaml
================================================
[File too large to display: 4.1 KB]

================================================
FILE: tests/test_utils/recipes/h100/bert.yaml
================================================
[File too large to display: 2.9 KB]

================================================
FILE: tests/test_utils/recipes/h100/ckpt_converter.yaml
================================================
[File too large to display: 1.5 KB]

================================================
FILE: tests/test_utils/recipes/h100/gpt-dynamic-inference-cuda-graphs.yaml
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/test_utils/recipes/h100/gpt-dynamic-inference-with-coordinator.yaml
================================================
[File too large to display: 2.8 KB]

================================================
FILE: tests/test_utils/recipes/h100/gpt-dynamic-inference.yaml
================================================
[File too large to display: 2.7 KB]

================================================
FILE: tests/test_utils/recipes/h100/gpt-grads.yaml
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/test_utils/recipes/h100/gpt-grpo.yaml
================================================
[File too large to display: 2.8 KB]

================================================
FILE: tests/test_utils/recipes/h100/gpt-nemo.yaml
================================================
[File too large to display: 3.0 KB]

================================================
FILE: tests/test_utils/recipes/h100/gpt-static-inference.yaml
================================================
[File too large to display: 2.6 KB]

================================================
FILE: tests/test_utils/recipes/h100/gpt.yaml
================================================
[File too large to display: 16.7 KB]

================================================
FILE: tests/test_utils/recipes/h100/mamba-dynamic-inference.yaml
================================================
[File too large to display: 2.3 KB]

================================================
FILE: tests/test_utils/recipes/h100/mamba-static-inference.yaml
================================================
[File too large to display: 2.3 KB]

================================================
FILE: tests/test_utils/recipes/h100/mamba.yaml
================================================
[File too large to display: 3.1 KB]

================================================
FILE: tests/test_utils/recipes/h100/mimo.yaml
================================================
[File too large to display: 2.4 KB]

================================================
FILE: tests/test_utils/recipes/h100/module_performance.yaml
================================================
[File too large to display: 1.4 KB]

================================================
FILE: tests/test_utils/recipes/h100/moe-dynamic-inference-with-coordinator.yaml
================================================
[File too large to display: 2.6 KB]

================================================
FILE: tests/test_utils/recipes/h100/moe-dynamic-inference.yaml
================================================
[File too large to display: 2.3 KB]

================================================
FILE: tests/test_utils/recipes/h100/moe-grpo.yaml
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/test_utils/recipes/h100/moe-static-inference.yaml
================================================
[File too large to display: 2.5 KB]

================================================
FILE: tests/test_utils/recipes/h100/moe.yaml
================================================
[File too large to display: 8.9 KB]

================================================
FILE: tests/test_utils/recipes/h100/multimodal-llava.yaml
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/test_utils/recipes/h100/t5.yaml
================================================
[File too large to display: 3.5 KB]

================================================
FILE: tests/test_utils/recipes/h100/unit-tests.yaml
================================================
[File too large to display: 4.2 KB]

================================================
FILE: tests/unit_tests/__init__.py
================================================
[File too large to display: 66 B]

================================================
FILE: tests/unit_tests/a2a_overlap/test_cuda_graphed_schedule_chunk_1f1b.py
================================================
[File too large to display: 14.3 KB]

================================================
FILE: tests/unit_tests/a2a_overlap/test_schedule_chunk_1f1b.py
================================================
[File too large to display: 11.8 KB]

================================================
FILE: tests/unit_tests/a2a_overlap/test_schedule_layer_1f1b.py
================================================
[File too large to display: 19.9 KB]

================================================
FILE: tests/unit_tests/a2a_overlap/utils.py
================================================
[File too large to display: 8.1 KB]

================================================
FILE: tests/unit_tests/conftest.py
================================================
[File too large to display: 3.2 KB]

================================================
FILE: tests/unit_tests/data/__init__.py
================================================


================================================
FILE: tests/unit_tests/data/test_bin_reader.py
================================================
[File too large to display: 6.9 KB]

================================================
FILE: tests/unit_tests/data/test_builder.py
================================================
[File too large to display: 20.6 KB]

================================================
FILE: tests/unit_tests/data/test_fim_dataset.py
================================================
[File too large to display: 2.9 KB]

================================================
FILE: tests/unit_tests/data/test_gpt_dataset.py
================================================
[File too large to display: 3.6 KB]

================================================
FILE: tests/unit_tests/data/test_multimodal_dataset.py
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tests/unit_tests/data/test_preprocess_data.py
================================================
[File too large to display: 8.5 KB]

================================================
FILE: tests/unit_tests/data/test_preprocess_mmdata.py
================================================
[File too large to display: 7.3 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/__init__.py
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/conftest.py
================================================
[File too large to display: 911 B]

================================================
FILE: tests/unit_tests/dist_checkpointing/models/__init__.py
================================================


================================================
FILE: tests/unit_tests/dist_checkpointing/models/common.py
================================================
[File too large to display: 9.6 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/models/test_bert_model.py
================================================
[File too large to display: 5.2 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/models/test_gpt_model.py
================================================
[File too large to display: 7.8 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/models/test_mamba.py
================================================
[File too large to display: 7.4 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/models/test_mlp_glu.py
================================================
[File too large to display: 5.7 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/models/test_moe_experts.py
================================================
[File too large to display: 18.9 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/models/test_t5_model.py
================================================
[File too large to display: 5.7 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_async_save.py
================================================
[File too large to display: 4.4 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_checkpointable.py
================================================
[File too large to display: 3.4 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_fp8.py
================================================
[File too large to display: 4.5 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_fully_parallel.py
================================================
[File too large to display: 25.9 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_global_metadata_reuse.py
================================================
[File too large to display: 6.8 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_layer_wise_optimizer.py
================================================
[File too large to display: 22.6 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_local.py
================================================
[File too large to display: 14.0 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_mapping.py
================================================
[File too large to display: 4.5 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_msc.py
================================================
[File too large to display: 2.5 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_nonpersistent.py
================================================
[File too large to display: 5.7 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_optimizer.py
================================================
[File too large to display: 45.7 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_pipeline_parallel_layout.py
================================================
[File too large to display: 13.1 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_replication.py
================================================
[File too large to display: 6.2 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_safe_globals.py
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_serialization.py
================================================
[File too large to display: 43.2 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_strict.py
================================================
[File too large to display: 13.0 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/test_torch_dist.py
================================================
[File too large to display: 4.7 KB]

================================================
FILE: tests/unit_tests/dist_checkpointing/utils.py
================================================
[File too large to display: 11.0 KB]

================================================
FILE: tests/unit_tests/distributed/megatron_fsdp/test_mcore_fully_sharded_data_parallel.py
================================================
[File too large to display: 32.1 KB]

================================================
FILE: tests/unit_tests/distributed/megatron_fsdp/test_mfsdp_fully_shard.py
================================================
[File too large to display: 36.8 KB]

================================================
FILE: tests/unit_tests/distributed/megatron_fsdp/utils.py
================================================
[File too large to display: 6.5 KB]

================================================
FILE: tests/unit_tests/distributed/test_distributed_data_parallel.py
================================================
[File too large to display: 4.3 KB]

================================================
FILE: tests/unit_tests/distributed/test_finalize_model_grads.py
================================================
[File too large to display: 3.6 KB]

================================================
FILE: tests/unit_tests/distributed/test_grad_reduce_for_replicated_embedder.py
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/unit_tests/distributed/test_grad_sync_with_expert_parallel.py
================================================
[File too large to display: 10.4 KB]

================================================
FILE: tests/unit_tests/distributed/test_param_and_grad_buffer.py
================================================
[File too large to display: 18.2 KB]

================================================
FILE: tests/unit_tests/distributed/test_reduce_scatter_with_fp32_accumulation.py
================================================
[File too large to display: 2.9 KB]

================================================
FILE: tests/unit_tests/distributed/test_torch_fully_sharded_parallel.py
================================================
[File too large to display: 4.3 KB]

================================================
FILE: tests/unit_tests/export/trtllm/__init__.py
================================================


================================================
FILE: tests/unit_tests/export/trtllm/test_distributed_fp8.py
================================================
[File too large to display: 10.7 KB]

================================================
FILE: tests/unit_tests/export/trtllm/test_single_device_fp8.py
================================================
[File too large to display: 10.6 KB]

================================================
FILE: tests/unit_tests/export/trtllm/test_trtllm_distributed_gpu_converter.py
================================================
[File too large to display: 4.5 KB]

================================================
FILE: tests/unit_tests/export/trtllm/test_trtllm_helper.py
================================================
[File too large to display: 2.4 KB]

================================================
FILE: tests/unit_tests/export/trtllm/test_trtllm_layers.py
================================================
[File too large to display: 4.4 KB]

================================================
FILE: tests/unit_tests/export/trtllm/test_trtllm_single_device_converter.py
================================================
[File too large to display: 28.8 KB]

================================================
FILE: tests/unit_tests/extension/test_kitchen_sdpa.py
================================================
[File too large to display: 15.9 KB]

================================================
FILE: tests/unit_tests/find_test_cases.py
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/unit_tests/fusions/test_bias_dropout_fusion.py
================================================
[File too large to display: 14.4 KB]

================================================
FILE: tests/unit_tests/fusions/test_mla_yarn_rope_apply.py
================================================
[File too large to display: 8.8 KB]

================================================
FILE: tests/unit_tests/fusions/test_rmsnorm_residual_fusion.py
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/unit_tests/fusions/test_swiglu_fusion.py
================================================
[File too large to display: 1.4 KB]

================================================
FILE: tests/unit_tests/fusions/test_torch_softmax.py
================================================
[File too large to display: 9.7 KB]

================================================
FILE: tests/unit_tests/fusions/test_weighted_squared_relu_fusion.py
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/unit_tests/inference/__init__.py
================================================


================================================
FILE: tests/unit_tests/inference/contexts/attention_metadata/test_mamba_metadata.py
================================================
[File too large to display: 21.3 KB]

================================================
FILE: tests/unit_tests/inference/contexts/attention_metadata/test_tensor_ops.py
================================================
[File too large to display: 10.6 KB]

================================================
FILE: tests/unit_tests/inference/contexts/test_dynamic_context.py
================================================
[File too large to display: 115.5 KB]

================================================
FILE: tests/unit_tests/inference/contexts/test_dynamic_prefix_caching.py
================================================
[File too large to display: 37.9 KB]

================================================
FILE: tests/unit_tests/inference/engines/__init__.py
================================================


================================================
FILE: tests/unit_tests/inference/engines/test_dynamic_engine.py
================================================
[File too large to display: 123.5 KB]

================================================
FILE: tests/unit_tests/inference/engines/test_dynamic_events.py
================================================
[File too large to display: 26.6 KB]

================================================
FILE: tests/unit_tests/inference/engines/test_mamba_prefix_caching_e2e.py
================================================
[File too large to display: 27.6 KB]

================================================
FILE: tests/unit_tests/inference/engines/test_static_engine.py
================================================
[File too large to display: 15.7 KB]

================================================
FILE: tests/unit_tests/inference/model_inference_wrappers/__init__.py
================================================


================================================
FILE: tests/unit_tests/inference/model_inference_wrappers/gpt/test_gpt_inference_wrapper.py
================================================
[File too large to display: 4.7 KB]

================================================
FILE: tests/unit_tests/inference/model_inference_wrappers/t5/test_t5_inference_wrapper.py
================================================
[File too large to display: 4.6 KB]

================================================
FILE: tests/unit_tests/inference/test_batch_dimension_utils.py
================================================
[File too large to display: 21.2 KB]

================================================
FILE: tests/unit_tests/inference/test_common_inference_params.py
================================================
[File too large to display: 375 B]

================================================
FILE: tests/unit_tests/inference/test_communication_utils.py
================================================
[File too large to display: 4.9 KB]

================================================
FILE: tests/unit_tests/inference/test_data_parallel_inference_coordinator.py
================================================
[File too large to display: 26.5 KB]

================================================
FILE: tests/unit_tests/inference/test_dynamic_prefix_caching_coordinator.py
================================================
[File too large to display: 22.4 KB]

================================================
FILE: tests/unit_tests/inference/test_flash_decode.py
================================================
[File too large to display: 1.2 KB]

================================================
FILE: tests/unit_tests/inference/test_inference_config.py
================================================
[File too large to display: 708 B]

================================================
FILE: tests/unit_tests/inference/test_inference_utils.py
================================================
[File too large to display: 434 B]

================================================
FILE: tests/unit_tests/inference/test_moe_inference.py
================================================
[File too large to display: 15.3 KB]

================================================
FILE: tests/unit_tests/inference/test_moe_permute.py
================================================
[File too large to display: 19.5 KB]

================================================
FILE: tests/unit_tests/inference/test_mxfp8_utils.py
================================================
[File too large to display: 25.9 KB]

================================================
FILE: tests/unit_tests/inference/test_scheduler.py
================================================
[File too large to display: 4.3 KB]

================================================
FILE: tests/unit_tests/inference/test_stop_words.py
================================================
[File too large to display: 18.0 KB]

================================================
FILE: tests/unit_tests/inference/test_wandb_logging.py
================================================
[File too large to display: 11.3 KB]

================================================
FILE: tests/unit_tests/inference/text_generation_controllers/__init__.py
================================================


================================================
FILE: tests/unit_tests/inference/text_generation_controllers/test_encoder_decoder_text_generation_controller.py
================================================
[File too large to display: 5.4 KB]

================================================
FILE: tests/unit_tests/inference/text_generation_controllers/test_text_generation_controller.py
================================================
[File too large to display: 62.6 KB]

================================================
FILE: tests/unit_tests/inference/text_generation_controllers/test_vlm_text_generation_controller.py
================================================
[File too large to display: 6.7 KB]

================================================
FILE: tests/unit_tests/models/__init__.py
================================================


================================================
FILE: tests/unit_tests/models/test_base_embedding.py
================================================
[File too large to display: 2.4 KB]

================================================
FILE: tests/unit_tests/models/test_bert_model.py
================================================
[File too large to display: 9.8 KB]

================================================
FILE: tests/unit_tests/models/test_clip_vit_model.py
================================================
[File too large to display: 2.5 KB]

================================================
FILE: tests/unit_tests/models/test_gpt_model.py
================================================
[File too large to display: 18.4 KB]

================================================
FILE: tests/unit_tests/models/test_gpt_model_batch_invariant.py
================================================
[File too large to display: 12.7 KB]

================================================
FILE: tests/unit_tests/models/test_gpt_model_quantization.py
================================================
[File too large to display: 30.8 KB]

================================================
FILE: tests/unit_tests/models/test_heterogeneous_gpt_model.py
================================================
[File too large to display: 5.6 KB]

================================================
FILE: tests/unit_tests/models/test_llava_model.py
================================================
[File too large to display: 31.4 KB]

================================================
FILE: tests/unit_tests/models/test_mamba_model.py
================================================
[File too large to display: 15.3 KB]

================================================
FILE: tests/unit_tests/models/test_mamba_moe_model.py
================================================
[File too large to display: 20.9 KB]

================================================
FILE: tests/unit_tests/models/test_mimo_audio_submodules.py
================================================
[File too large to display: 15.5 KB]

================================================
FILE: tests/unit_tests/models/test_mimo_embedding_alignment.py
================================================
[File too large to display: 18.6 KB]

================================================
FILE: tests/unit_tests/models/test_mimo_model.py
================================================
[File too large to display: 23.4 KB]

================================================
FILE: tests/unit_tests/models/test_mimo_partition.py
================================================
[File too large to display: 17.3 KB]

================================================
FILE: tests/unit_tests/models/test_mimo_submodules.py
================================================
[File too large to display: 11.1 KB]

================================================
FILE: tests/unit_tests/models/test_multimodal_projector.py
================================================
[File too large to display: 2.5 KB]

================================================
FILE: tests/unit_tests/models/test_radio_model.py
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/unit_tests/models/test_t5_model.py
================================================
[File too large to display: 8.7 KB]

================================================
FILE: tests/unit_tests/optimizer/__init__.py
================================================
[File too large to display: 75 B]

================================================
FILE: tests/unit_tests/optimizer/test_optimizer_config.py
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/unit_tests/pipeline_parallel/__init__.py
================================================


================================================
FILE: tests/unit_tests/pipeline_parallel/test_bridge_communicator.py
================================================
[File too large to display: 19.1 KB]

================================================
FILE: tests/unit_tests/pipeline_parallel/test_fine_grained_activation_offloading.py
================================================
[File too large to display: 22.1 KB]

================================================
FILE: tests/unit_tests/pipeline_parallel/test_helpers.py
================================================
[File too large to display: 5.7 KB]

================================================
FILE: tests/unit_tests/pipeline_parallel/test_multimodule_communicator.py
================================================
[File too large to display: 35.0 KB]

================================================
FILE: tests/unit_tests/pipeline_parallel/test_multimodule_schedules.py
================================================
[File too large to display: 19.4 KB]

================================================
FILE: tests/unit_tests/pipeline_parallel/test_pipeline_layout.py
================================================
[File too large to display: 12.6 KB]

================================================
FILE: tests/unit_tests/pipeline_parallel/test_schedules.py
================================================
[File too large to display: 25.0 KB]

================================================
FILE: tests/unit_tests/post_training/__init__.py
================================================


================================================
FILE: tests/unit_tests/post_training/test_modelopt_model_builder.py
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/unit_tests/post_training/test_modelopt_module_spec.py
================================================
[File too large to display: 11.5 KB]

================================================
FILE: tests/unit_tests/resharding/test_communication_scheduler.py
================================================
[File too large to display: 8.3 KB]

================================================
FILE: tests/unit_tests/resharding/test_dp_balancing.py
================================================
[File too large to display: 16.7 KB]

================================================
FILE: tests/unit_tests/resharding/test_model_swap.py
================================================
[File too large to display: 20.5 KB]

================================================
FILE: tests/unit_tests/resharding/test_mxfp8_refit.py
================================================
[File too large to display: 10.3 KB]

================================================
FILE: tests/unit_tests/resharding/test_task_segmenter.py
================================================
[File too large to display: 7.0 KB]

================================================
FILE: tests/unit_tests/resharding/test_workload_packer.py
================================================
[File too large to display: 5.1 KB]

================================================
FILE: tests/unit_tests/rl/test_grouped_rollouts.py
================================================
[File too large to display: 4.0 KB]

================================================
FILE: tests/unit_tests/rl/test_rl_batch_invariant.py
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/unit_tests/rl/test_rl_utils.py
================================================
[File too large to display: 37.6 KB]

================================================
FILE: tests/unit_tests/rl/test_sequence_packing_utils.py
================================================
[File too large to display: 16.8 KB]

================================================
FILE: tests/unit_tests/run_ci_test.sh
================================================
[File too large to display: 3.8 KB]

================================================
FILE: tests/unit_tests/ssm/ops/test_causal_conv1d_varlen.py
================================================
[File too large to display: 7.4 KB]

================================================
FILE: tests/unit_tests/ssm/ops/test_ops_init.py
================================================
[File too large to display: 1.1 KB]

================================================
FILE: tests/unit_tests/ssm/ops/test_ssd_bmm.py
================================================
[File too large to display: 3.6 KB]

================================================
FILE: tests/unit_tests/ssm/ops/test_ssd_chunk_scan.py
================================================
[File too large to display: 5.6 KB]

================================================
FILE: tests/unit_tests/ssm/ops/test_ssd_chunk_state.py
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/unit_tests/ssm/ops/test_ssd_combined.py
================================================
[File too large to display: 14.6 KB]

================================================
FILE: tests/unit_tests/ssm/ops/test_ssd_state_passing.py
================================================
[File too large to display: 3.3 KB]

================================================
FILE: tests/unit_tests/ssm/ops/test_ssm_kernel.py
================================================
[File too large to display: 6.5 KB]

================================================
FILE: tests/unit_tests/ssm/test_causal_conv1d_triton.py
================================================
[File too large to display: 10.2 KB]

================================================
FILE: tests/unit_tests/ssm/test_gated_delta_net.py
================================================
[File too large to display: 12.2 KB]

================================================
FILE: tests/unit_tests/ssm/test_mamba_block.py
================================================
[File too large to display: 3.8 KB]

================================================
FILE: tests/unit_tests/ssm/test_mamba_context_parallel.py
================================================
[File too large to display: 5.5 KB]

================================================
FILE: tests/unit_tests/ssm/test_mamba_hybrid_layer_allocation.py
================================================
[File too large to display: 24.6 KB]

================================================
FILE: tests/unit_tests/ssm/test_mamba_layer.py
================================================
[File too large to display: 2.3 KB]

================================================
FILE: tests/unit_tests/ssm/test_mamba_mixer.py
================================================
[File too large to display: 5.9 KB]

================================================
FILE: tests/unit_tests/tensor_parallel/__init__.py
================================================


================================================
FILE: tests/unit_tests/tensor_parallel/test_cross_entropy.py
================================================
[File too large to display: 938 B]

================================================
FILE: tests/unit_tests/tensor_parallel/test_data.py
================================================
[File too large to display: 809 B]

================================================
FILE: tests/unit_tests/tensor_parallel/test_initialization.py
================================================
[File too large to display: 6.5 KB]

================================================
FILE: tests/unit_tests/tensor_parallel/test_layers.py
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/unit_tests/tensor_parallel/test_mappings.py
================================================
[File too large to display: 7.3 KB]

================================================
FILE: tests/unit_tests/tensor_parallel/test_random.py
================================================
[File too large to display: 11.0 KB]

================================================
FILE: tests/unit_tests/tensor_parallel/test_tensor_parallel_utils.py
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/unit_tests/test_api_backwards_compat_setup.py
================================================
[File too large to display: 5.0 KB]

================================================
FILE: tests/unit_tests/test_argument_utils.py
================================================
[File too large to display: 22.0 KB]

================================================
FILE: tests/unit_tests/test_basic.py
================================================
[File too large to display: 39 B]

================================================
FILE: tests/unit_tests/test_checkpointing.py
================================================
[File too large to display: 13.7 KB]

================================================
FILE: tests/unit_tests/test_fp8_param.py
================================================
[File too large to display: 19.8 KB]

================================================
FILE: tests/unit_tests/test_fp8_utils.py
================================================
[File too large to display: 4.8 KB]

================================================
FILE: tests/unit_tests/test_hyper_comm_grid.py
================================================
[File too large to display: 20.0 KB]

================================================
FILE: tests/unit_tests/test_imports.py
================================================
[File too large to display: 4.1 KB]

================================================
FILE: tests/unit_tests/test_inference.py
================================================
[File too large to display: 4.4 KB]

================================================
FILE: tests/unit_tests/test_layer_wise_optimizer.py
================================================
[File too large to display: 49.7 KB]

================================================
FILE: tests/unit_tests/test_lion_optimizer.py
================================================
[File too large to display: 9.5 KB]

================================================
FILE: tests/unit_tests/test_local_multi_tensor_fns.py
================================================
[File too large to display: 3.0 KB]

================================================
FILE: tests/unit_tests/test_model_configs.py
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/unit_tests/test_muon_optimizer.py
================================================
[File too large to display: 22.2 KB]

================================================
FILE: tests/unit_tests/test_nccl_allocator.py
================================================
[File too large to display: 3.3 KB]

================================================
FILE: tests/unit_tests/test_num_microbatches_calculator.py
================================================
[File too large to display: 6.2 KB]

================================================
FILE: tests/unit_tests/test_optimizer.py
================================================
[File too large to display: 39.2 KB]

================================================
FILE: tests/unit_tests/test_optimizer_cpu_offloading.py
================================================
[File too large to display: 9.0 KB]

================================================
FILE: tests/unit_tests/test_optimizer_param_scheduler.py
================================================
[File too large to display: 11.2 KB]

================================================
FILE: tests/unit_tests/test_parallel_state.py
================================================
[File too large to display: 22.6 KB]

================================================
FILE: tests/unit_tests/test_process_groups_config.py
================================================
[File too large to display: 4.2 KB]

================================================
FILE: tests/unit_tests/test_training.py
================================================
[File too large to display: 4.9 KB]

================================================
FILE: tests/unit_tests/test_typed_torch.py
================================================
[File too large to display: 7.4 KB]

================================================
FILE: tests/unit_tests/test_utilities.py
================================================
[File too large to display: 4.8 KB]

================================================
FILE: tests/unit_tests/test_utils.py
================================================
[File too large to display: 15.3 KB]

================================================
FILE: tests/unit_tests/tokenizers/test_tokenizer.py
================================================
[File too large to display: 14.2 KB]

================================================
FILE: tests/unit_tests/transformer/__init__.py
================================================


================================================
FILE: tests/unit_tests/transformer/experimental_attention_variant/test_absorbed_mla.py
================================================
[File too large to display: 15.4 KB]

================================================
FILE: tests/unit_tests/transformer/experimental_attention_variant/test_attention_variant_dsa.py
================================================
[File too large to display: 62.7 KB]

================================================
FILE: tests/unit_tests/transformer/moe/__init__.py
================================================


================================================
FILE: tests/unit_tests/transformer/moe/conftest.py
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/unit_tests/transformer/moe/test_a2a_token_dispatcher.py
================================================
[File too large to display: 5.1 KB]

================================================
FILE: tests/unit_tests/transformer/moe/test_aux_loss.py
================================================
[File too large to display: 30.7 KB]

================================================
FILE: tests/unit_tests/transformer/moe/test_grouped_mlp.py
================================================
[File too large to display: 8.0 KB]

================================================
FILE: tests/unit_tests/transformer/moe/test_latent_moe_layer.py
================================================
[File too large to display: 4.5 KB]

================================================
FILE: tests/unit_tests/transformer/moe/test_moe_layer.py
================================================
[File too large to display: 14.8 KB]

================================================
FILE: tests/unit_tests/transformer/moe/test_moe_layer_discrepancy.py
================================================
[File too large to display: 10.3 KB]

================================================
FILE: tests/unit_tests/transformer/moe/test_multihot_indices_converter.py
================================================
[File too large to display: 3.5 KB]

================================================
FILE: tests/unit_tests/transformer/moe/test_router_replay.py
================================================
[File too large to display: 3.6 KB]

================================================
FILE: tests/unit_tests/transformer/moe/test_routers.py
================================================
[File too large to display: 23.8 KB]

================================================
FILE: tests/unit_tests/transformer/moe/test_sequential_mlp.py
================================================
[File too large to display: 8.7 KB]

================================================
FILE: tests/unit_tests/transformer/moe/test_shared_experts.py
================================================
[File too large to display: 4.8 KB]

================================================
FILE: tests/unit_tests/transformer/moe/test_token_dispatcher.py
================================================
[File too large to display: 21.8 KB]

================================================
FILE: tests/unit_tests/transformer/moe/test_upcycling.py
================================================
[File too large to display: 11.3 KB]

================================================
FILE: tests/unit_tests/transformer/test_attention.py
================================================
[File too large to display: 26.5 KB]

================================================
FILE: tests/unit_tests/transformer/test_attention_no_rope.py
================================================
[File too large to display: 8.6 KB]

================================================
FILE: tests/unit_tests/transformer/test_attention_packed_seq.py
================================================
[File too large to display: 7.3 KB]

================================================
FILE: tests/unit_tests/transformer/test_core_attention.py
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/unit_tests/transformer/test_cuda_graphs.py
================================================
[File too large to display: 44.1 KB]

================================================
FILE: tests/unit_tests/transformer/test_full_cuda_graph.py
================================================
[File too large to display: 3.0 KB]

================================================
FILE: tests/unit_tests/transformer/test_mlp.py
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/unit_tests/transformer/test_module.py
================================================
[File too large to display: 3.7 KB]

================================================
FILE: tests/unit_tests/transformer/test_multi_latent_attention.py
================================================
[File too large to display: 70.8 KB]

================================================
FILE: tests/unit_tests/transformer/test_multi_token_prediction.py
================================================
[File too large to display: 37.4 KB]

================================================
FILE: tests/unit_tests/transformer/test_mup.py
================================================
[File too large to display: 23.0 KB]

================================================
FILE: tests/unit_tests/transformer/test_quantization_config.py
================================================
[File too large to display: 7.7 KB]

================================================
FILE: tests/unit_tests/transformer/test_relative_attention.py
================================================
[File too large to display: 1.4 KB]

================================================
FILE: tests/unit_tests/transformer/test_rope.py
================================================
[File too large to display: 6.1 KB]

================================================
FILE: tests/unit_tests/transformer/test_spec_customization.py
================================================
[File too large to display: 10.8 KB]

================================================
FILE: tests/unit_tests/transformer/test_submodule_callables.py
================================================
[File too large to display: 5.5 KB]

================================================
FILE: tests/unit_tests/transformer/test_te_layers_batch_invariant.py
================================================
[File too large to display: 24.6 KB]

================================================
FILE: tests/unit_tests/transformer/test_thd_correctness.py
================================================
[File too large to display: 24.4 KB]

================================================
FILE: tests/unit_tests/transformer/test_transformer_block.py
================================================
[File too large to display: 34.7 KB]

================================================
FILE: tests/unit_tests/transformer/test_transformer_block_custom_pgs.py
================================================
[File too large to display: 30.2 KB]

================================================
FILE: tests/unit_tests/transformer/test_transformer_layer.py
================================================
[File too large to display: 13.2 KB]

================================================
FILE: tests/unit_tests/transformer/test_utils.py
================================================
[File too large to display: 15.3 KB]

================================================
FILE: tests/unit_tests/transformer/test_vision_cuda_graphs.py
================================================
[File too large to display: 26.2 KB]

================================================
FILE: tests/unit_tests/utils/test_experimental_log_once.py
================================================
[File too large to display: 3.2 KB]

================================================
FILE: tools/__init__.py
================================================


================================================
FILE: tools/autoformat.sh
================================================
[File too large to display: 1.5 KB]

================================================
FILE: tools/bert_embedding/__init__.py
================================================
[File too large to display: 127 B]

================================================
FILE: tools/bert_embedding/dataset.py
================================================
[File too large to display: 1.7 KB]

================================================
FILE: tools/bert_embedding/embed.py
================================================
[File too large to display: 16.8 KB]

================================================
FILE: tools/bert_embedding/external_libs.py
================================================
[File too large to display: 405 B]

================================================
FILE: tools/bert_embedding/huggingface.py
================================================
[File too large to display: 3.7 KB]

================================================
FILE: tools/build_sequences_per_dataset.py
================================================
[File too large to display: 5.4 KB]

================================================
FILE: tools/check_copyright.py
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tools/checkpoint/checkpoint_inspector.py
================================================
[File too large to display: 36.9 KB]

================================================
FILE: tools/checkpoint/convert.py
================================================
[File too large to display: 5.2 KB]

================================================
FILE: tools/checkpoint/hybrid_conversion.py
================================================
[File too large to display: 16.5 KB]

================================================
FILE: tools/checkpoint/loader_base.py
================================================
[File too large to display: 20.7 KB]

================================================
FILE: tools/checkpoint/loader_core.py
================================================
[File too large to display: 3.3 KB]

================================================
FILE: tools/checkpoint/loader_legacy.py
================================================
[File too large to display: 16.2 KB]

================================================
FILE: tools/checkpoint/loader_llama_mistral.py
================================================
[File too large to display: 27.0 KB]

================================================
FILE: tools/checkpoint/loader_llava.py
================================================
[File too large to display: 18.2 KB]

================================================
FILE: tools/checkpoint/loader_mixtral_hf.py
================================================
[File too large to display: 13.2 KB]

================================================
FILE: tools/checkpoint/saver_base.py
================================================
[File too large to display: 30.3 KB]

================================================
FILE: tools/checkpoint/saver_core.py
================================================
[File too large to display: 2.8 KB]

================================================
FILE: tools/checkpoint/saver_hf_llava.py
================================================
[File too large to display: 17.2 KB]

================================================
FILE: tools/checkpoint/saver_legacy.py
================================================
[File too large to display: 20.2 KB]

================================================
FILE: tools/checkpoint/saver_llava.py
================================================
[File too large to display: 22.1 KB]

================================================
FILE: tools/checkpoint/schema_base.py
================================================
[File too large to display: 2.6 KB]

================================================
FILE: tools/checkpoint/schema_core.py
================================================
[File too large to display: 5.7 KB]

================================================
FILE: tools/checkpoint/schema_hf.py
================================================
[File too large to display: 8.1 KB]

================================================
FILE: tools/checkpoint/utils.py
================================================
[File too large to display: 2.5 KB]

================================================
FILE: tools/copyright.sh
================================================
[File too large to display: 863 B]

================================================
FILE: tools/linter.py
================================================
[File too large to display: 1.2 KB]

================================================
FILE: tools/merge_datasets.py
================================================
[File too large to display: 2.3 KB]

================================================
FILE: tools/preprocess_data.py
================================================
[File too large to display: 18.9 KB]

================================================
FILE: tools/preprocess_data_nmt.py
================================================
[File too large to display: 3.9 KB]

================================================
FILE: tools/preprocess_mmdata.py
================================================
[File too large to display: 7.4 KB]

================================================
FILE: tools/report_theoretical_memory.py
================================================
[File too large to display: 557 B]

================================================
FILE: tools/run_dynamic_text_generation_server.py
================================================
[File too large to display: 3.6 KB]

================================================
FILE: tools/run_inference_performance_test.py
================================================
[File too large to display: 9.5 KB]

================================================
FILE: tools/run_mamba_text_generation_server.py
================================================
[File too large to display: 278 B]

================================================
FILE: tools/run_mamba_text_generation_server_completions.py
================================================
[File too large to display: 278 B]

================================================
FILE: tools/run_text_generation_server.py
================================================
[File too large to display: 6.3 KB]

================================================
FILE: tools/run_vlm_text_generation.py
================================================
[File too large to display: 7.4 KB]

================================================
FILE: tools/text_generation_cli.py
================================================
[File too large to display: 785 B]

================================================
FILE: tools/trigger_internal_ci.md
================================================
[File too large to display: 2.5 KB]

================================================
FILE: tools/trigger_internal_ci.py
================================================
[File too large to display: 5.5 KB]

================================================
FILE: tools/upgrade_dependencies.sh
================================================
[File too large to display: 460 B]

================================================
FILE: tools/wait_daemon.sh
================================================
[File too large to display: 552 B]

================================================
FILE: train_rl.py
================================================
[File too large to display: 15.1 KB]